From c203826dda2e71206eca1cbcd18ebee8689a7794 Mon Sep 17 00:00:00 2001
From: Werner Kramer <werner.kramer@vortech.nl>
Date: Mon, 2 Jan 2023 08:59:29 +0100
Subject: [PATCH 01/15] Copy cta source to own folder

---
 costa/native/cta/cta_util_timing.f90          |   89 +
 costa/native/cta/include/cta.h                |   57 +
 costa/native/cta/include/cta_array.h          |  317 ++
 costa/native/cta/include/cta_bb_modbuild.h    |  153 +
 .../cta/include/cta_bb_modbuild_utils.h       |  171 +
 costa/native/cta/include/cta_datatypes.h      |  208 +
 costa/native/cta/include/cta_datetime.h       |  122 +
 costa/native/cta/include/cta_defaults.h       |  111 +
 costa/native/cta/include/cta_errors.h         |  108 +
 costa/native/cta/include/cta_f77blas.h        |  167 +
 costa/native/cta/include/cta_f77lapack.h      |  101 +
 costa/native/cta/include/cta_file.h           |  133 +
 costa/native/cta/include/cta_flush.h          |   49 +
 costa/native/cta/include/cta_functions.h      |  152 +
 costa/native/cta/include/cta_handles.h        |  178 +
 costa/native/cta/include/cta_initialise.h     |   46 +
 costa/native/cta/include/cta_interface.h      |  143 +
 costa/native/cta/include/cta_matrix.h         |  297 ++
 costa/native/cta/include/cta_matrix_blas.h    |  294 ++
 costa/native/cta/include/cta_mem.h            |   64 +
 costa/native/cta/include/cta_message.h        |  100 +
 costa/native/cta/include/cta_metainfo.h       |  191 +
 costa/native/cta/include/cta_method.h         |   89 +
 costa/native/cta/include/cta_modbuild_b3b.h   |  140 +
 .../cta/include/cta_modbuild_b3b_utils.h      |  170 +
 costa/native/cta/include/cta_modbuild_par.h   |   56 +
 costa/native/cta/include/cta_modbuild_sp.h    |   78 +
 costa/native/cta/include/cta_model.h          |  587 +++
 costa/native/cta/include/cta_model_factory.h  |   84 +
 .../native/cta/include/cta_model_utilities.h  |   76 +
 costa/native/cta/include/cta_modelcombiner.h  |   45 +
 costa/native/cta/include/cta_obsdescr.h       |  179 +
 .../native/cta/include/cta_obsdescr_sqlite3.h |  158 +
 costa/native/cta/include/cta_obsdescr_table.h |  132 +
 costa/native/cta/include/cta_pack.h           |  149 +
 costa/native/cta/include/cta_par.h            |  138 +
 costa/native/cta/include/cta_reltable.h       |  172 +
 costa/native/cta/include/cta_resultwriter.h   |  187 +
 costa/native/cta/include/cta_sobs.h           |  228 +
 costa/native/cta/include/cta_sobs_combine.h   |   48 +
 costa/native/cta/include/cta_sobs_netcdf.h    |  200 +
 costa/native/cta/include/cta_sobs_sqlite3.h   |  188 +
 costa/native/cta/include/cta_string.h         |  184 +
 costa/native/cta/include/cta_system.h         |   71 +
 costa/native/cta/include/cta_time.h           |  189 +
 costa/native/cta/include/cta_tree.h           |  187 +
 costa/native/cta/include/cta_treevector.h     |  512 ++
 costa/native/cta/include/cta_usr_matrix.h     |  456 ++
 costa/native/cta/include/cta_usr_method.h     |  151 +
 costa/native/cta/include/cta_usr_model.h      |  437 ++
 costa/native/cta/include/cta_usr_obs_desc.h   |  220 +
 .../cta/include/cta_usr_stoch_observer.h      |  294 ++
 costa/native/cta/include/cta_usr_vector.h     |  440 ++
 costa/native/cta/include/cta_util_methods.h   |   90 +
 costa/native/cta/include/cta_util_sort.h      |   53 +
 costa/native/cta/include/cta_util_sqlite3.h   |   91 +
 .../native/cta/include/cta_util_statistics.h  |   62 +
 costa/native/cta/include/cta_vector.h         |  363 ++
 costa/native/cta/include/cta_vector_blas.h    |  322 ++
 costa/native/cta/include/cta_xml.h            |   57 +
 costa/native/cta/include/ctai.h               |   37 +
 costa/native/cta/include/ctai_datatypes.h     |   53 +
 costa/native/cta/include/ctai_handles.h       |   69 +
 costa/native/cta/include/ctai_sobs.h          |   41 +
 costa/native/cta/include/ctai_string.h        |   48 +
 costa/native/cta/include/ctai_vector.h        |  103 +
 costa/native/cta/include/ctai_xml.h           |   74 +
 costa/native/cta/include/f_cta_utils.h        |   61 +
 .../cta/include/modbuild_sp_model_template.h  |  123 +
 costa/native/cta/src/CMakeLists.txt           |   23 +
 costa/native/cta/src/cta_array.c              | 1293 +++++
 costa/native/cta/src/cta_datatypes.c          |  224 +
 costa/native/cta/src/cta_datetime.c           |  130 +
 costa/native/cta/src/cta_defaults.c           |   91 +
 costa/native/cta/src/cta_file.c               |  399 ++
 costa/native/cta/src/cta_flush.c              |   32 +
 costa/native/cta/src/cta_functions.c          |  606 +++
 costa/native/cta/src/cta_handles.c            |  975 ++++
 costa/native/cta/src/cta_initialise.c         |  144 +
 costa/native/cta/src/cta_interface.c          |  228 +
 costa/native/cta/src/cta_matrix.c             |  960 ++++
 costa/native/cta/src/cta_matrix_blas.c        |  805 +++
 costa/native/cta/src/cta_mem.c                |   59 +
 costa/native/cta/src/cta_message.c            |  111 +
 costa/native/cta/src/cta_metainfo.c           | 1147 +++++
 costa/native/cta/src/cta_method.c             |  208 +
 costa/native/cta/src/cta_modbuild_par.c       | 3296 +++++++++++++
 costa/native/cta/src/cta_modbuild_sp.c        | 1351 ++++++
 costa/native/cta/src/cta_model.c              | 2120 ++++++++
 costa/native/cta/src/cta_model_factory.c      |  983 ++++
 costa/native/cta/src/cta_model_utilities.c    |   78 +
 costa/native/cta/src/cta_obsdescr.c           |  576 +++
 costa/native/cta/src/cta_obsdescr_combine.c   |  392 ++
 costa/native/cta/src/cta_obsdescr_maori.c     |   59 +
 costa/native/cta/src/cta_obsdescr_netcdf.c    |  550 +++
 costa/native/cta/src/cta_obsdescr_sqlite3.c   |  564 +++
 costa/native/cta/src/cta_obsdescr_table.c     |  611 +++
 costa/native/cta/src/cta_obsdescr_user.c      |   59 +
 costa/native/cta/src/cta_pack.c               |  381 ++
 costa/native/cta/src/cta_par.c                |  905 ++++
 costa/native/cta/src/cta_reltable.c           |  642 +++
 costa/native/cta/src/cta_resultwriter.c       |  251 +
 costa/native/cta/src/cta_sobs.c               |  814 ++++
 costa/native/cta/src/cta_sobs_combine.c       |  704 +++
 costa/native/cta/src/cta_sobs_factory.c       |  310 ++
 costa/native/cta/src/cta_sobs_maori.c         |   74 +
 costa/native/cta/src/cta_sobs_netcdf.c        | 1021 ++++
 costa/native/cta/src/cta_sobs_sqlite3.c       |  743 +++
 costa/native/cta/src/cta_sobs_user.c          |   75 +
 costa/native/cta/src/cta_string.c             |  454 ++
 costa/native/cta/src/cta_time.c               |  634 +++
 costa/native/cta/src/cta_tree.c               |  771 +++
 costa/native/cta/src/cta_treevector.c         | 4301 +++++++++++++++++
 costa/native/cta/src/cta_util_methods.c       |  453 ++
 costa/native/cta/src/cta_util_sort.c          |   78 +
 costa/native/cta/src/cta_util_sqlite3.c       |  372 ++
 costa/native/cta/src/cta_util_statistics.c    |  151 +
 costa/native/cta/src/cta_vector.c             | 1747 +++++++
 costa/native/cta/src/cta_vector_blas.c        | 1357 ++++++
 costa/native/cta/src/cta_xml.c                |  860 ++++
 costa/native/cta/src/f_cta_defaults.c         |   47 +
 costa/native/cta/src/f_cta_utils.c            |   62 +
 costa/native/cta_f90/cta_f90_parameters.f90   |   25 +
 costa/native/cta_f90/generated/cta_f90.f90    |   87 +
 .../cta_f90/generated/cta_f90_array.f90       |  487 ++
 .../cta_f90/generated/cta_f90_datatypes.f90   |   23 +
 .../cta_f90/generated/cta_f90_datetime.f90    |  150 +
 .../native/cta_f90/generated/cta_f90_file.f90 |  149 +
 .../cta_f90/generated/cta_f90_flush_mod.f90   |   22 +
 .../cta_f90/generated/cta_f90_functions.f90   |  145 +
 .../cta_f90/generated/cta_f90_handles.f90     |  223 +
 .../cta_f90/generated/cta_f90_initialise.f90  |   27 +
 .../cta_f90/generated/cta_f90_interface.f90   |  109 +
 .../cta_f90/generated/cta_f90_matrix.f90      |  373 ++
 .../native/cta_f90/generated/cta_f90_mem.f90  |   19 +
 .../cta_f90/generated/cta_f90_message.f90     |   60 +
 .../cta_f90/generated/cta_f90_metainfo.f90    |   60 +
 .../cta_f90/generated/cta_f90_method.f90      |   71 +
 .../generated/cta_f90_modbuild_par.f90        |   34 +
 .../cta_f90/generated/cta_f90_model.f90       |  618 +++
 .../generated/cta_f90_model_factory.f90       |   43 +
 .../generated/cta_f90_model_utilities.f90     |   42 +
 .../cta_f90/generated/cta_f90_obsdescr.f90    |  191 +
 .../native/cta_f90/generated/cta_f90_pack.f90 |  604 +++
 .../native/cta_f90/generated/cta_f90_par.f90  |   62 +
 .../cta_f90/generated/cta_f90_parameters.f90  |   25 +
 .../cta_f90/generated/cta_f90_reltable.f90    |  157 +
 .../generated/cta_f90_resultwriter.f90        |   91 +
 .../native/cta_f90/generated/cta_f90_sobs.f90 |  271 ++
 .../cta_f90/generated/cta_f90_string.f90      |  229 +
 .../native/cta_f90/generated/cta_f90_time.f90 |  249 +
 .../native/cta_f90/generated/cta_f90_tree.f90 |  242 +
 .../cta_f90/generated/cta_f90_treevector.f90  | 1146 +++++
 .../generated/cta_f90_util_methods.f90        |   85 +
 .../cta_f90/generated/cta_f90_util_sort.f90   |   31 +
 .../generated/cta_f90_util_statistics.f90     |   44 +
 .../cta_f90/generated/cta_f90_vector.f90      | 1013 ++++
 .../native/cta_f90/generated/cta_f90_xml.f90  |   39 +
 costa/native/cta_f90/include/cta_f77.inc      |  247 +
 costa/native/cta_f90/include/cta_f90.inc      |   19 +
 .../cta_f90/include/cta_f90_contains.inc      |   42 +
 costa/native/external/blas/caxpy.f            |   34 +
 costa/native/external/blas/ccopy.f            |   33 +
 costa/native/external/blas/cdotc.f            |   38 +
 costa/native/external/blas/cdotu.f            |   37 +
 costa/native/external/blas/cgbmv.f            |  322 ++
 costa/native/external/blas/cgemm.f            |  414 ++
 costa/native/external/blas/cgemv.f            |  281 ++
 costa/native/external/blas/cgerc.f            |  157 +
 costa/native/external/blas/cgeru.f            |  157 +
 costa/native/external/blas/chbmv.f            |  309 ++
 costa/native/external/blas/chemm.f            |  304 ++
 costa/native/external/blas/chemv.f            |  266 +
 costa/native/external/blas/cher.f             |  212 +
 costa/native/external/blas/cher2.f            |  249 +
 costa/native/external/blas/cher2k.f           |  371 ++
 costa/native/external/blas/cherk.f            |  328 ++
 costa/native/external/blas/chpmv.f            |  270 ++
 costa/native/external/blas/chpr.f             |  217 +
 costa/native/external/blas/chpr2.f            |  251 +
 costa/native/external/blas/crotg.f            |   20 +
 costa/native/external/blas/cscal.f            |   28 +
 costa/native/external/blas/csscal.f           |   29 +
 costa/native/external/blas/cswap.f            |   36 +
 costa/native/external/blas/csymm.f            |  296 ++
 costa/native/external/blas/csyr2k.f           |  324 ++
 costa/native/external/blas/csyrk.f            |  293 ++
 costa/native/external/blas/ctbmv.f            |  377 ++
 costa/native/external/blas/ctbsv.f            |  381 ++
 costa/native/external/blas/ctpmv.f            |  338 ++
 costa/native/external/blas/ctpsv.f            |  341 ++
 costa/native/external/blas/ctrmm.f            |  392 ++
 costa/native/external/blas/ctrmv.f            |  321 ++
 costa/native/external/blas/ctrsm.f            |  414 ++
 costa/native/external/blas/ctrsv.f            |  324 ++
 costa/native/external/blas/dasum.f            |   43 +
 costa/native/external/blas/daxpy.f            |   48 +
 costa/native/external/blas/dcabs1.f           |    8 +
 costa/native/external/blas/dcopy.f            |   50 +
 costa/native/external/blas/ddot.f             |   49 +
 costa/native/external/blas/dgbmv.f            |  300 ++
 costa/native/external/blas/dgemm.f            |  313 ++
 costa/native/external/blas/dgemv.f            |  261 +
 costa/native/external/blas/dger.f             |  157 +
 costa/native/external/blas/dnrm2.f            |   60 +
 costa/native/external/blas/drot.f             |   37 +
 costa/native/external/blas/drotg.f            |   27 +
 costa/native/external/blas/dsbmv.f            |  303 ++
 costa/native/external/blas/dscal.f            |   43 +
 costa/native/external/blas/dspmv.f            |  262 +
 costa/native/external/blas/dspr.f             |  198 +
 costa/native/external/blas/dspr2.f            |  229 +
 costa/native/external/blas/dswap.f            |   56 +
 costa/native/external/blas/dsymm.f            |  294 ++
 costa/native/external/blas/dsymv.f            |  262 +
 costa/native/external/blas/dsyr.f             |  197 +
 costa/native/external/blas/dsyr2.f            |  230 +
 costa/native/external/blas/dsyr2k.f           |  327 ++
 costa/native/external/blas/dsyrk.f            |  294 ++
 costa/native/external/blas/dtbmv.f            |  342 ++
 costa/native/external/blas/dtbsv.f            |  346 ++
 costa/native/external/blas/dtpmv.f            |  299 ++
 costa/native/external/blas/dtpsv.f            |  302 ++
 costa/native/external/blas/dtrmm.f            |  355 ++
 costa/native/external/blas/dtrmv.f            |  286 ++
 costa/native/external/blas/dtrsm.f            |  378 ++
 costa/native/external/blas/dtrsv.f            |  289 ++
 costa/native/external/blas/dzasum.f           |   34 +
 costa/native/external/blas/dznrm2.f           |   67 +
 costa/native/external/blas/icamax.f           |   43 +
 costa/native/external/blas/idamax.f           |   39 +
 costa/native/external/blas/isamax.f           |   39 +
 costa/native/external/blas/izamax.f           |   41 +
 costa/native/external/blas/lsame.f            |   87 +
 costa/native/external/blas/sasum.f            |   44 +
 costa/native/external/blas/saxpy.f            |   48 +
 costa/native/external/blas/scasum.f           |   34 +
 costa/native/external/blas/scnrm2.f           |   67 +
 costa/native/external/blas/scopy.f            |   50 +
 costa/native/external/blas/sdot.f             |   49 +
 costa/native/external/blas/sgbmv.f            |  300 ++
 costa/native/external/blas/sgemm.f            |  313 ++
 costa/native/external/blas/sgemv.f            |  261 +
 costa/native/external/blas/sger.f             |  157 +
 costa/native/external/blas/snrm2.f            |   60 +
 costa/native/external/blas/srot.f             |   37 +
 costa/native/external/blas/srotg.f            |   27 +
 costa/native/external/blas/ssbmv.f            |  303 ++
 costa/native/external/blas/sscal.f            |   43 +
 costa/native/external/blas/sspmv.f            |  262 +
 costa/native/external/blas/sspr.f             |  198 +
 costa/native/external/blas/sspr2.f            |  229 +
 costa/native/external/blas/sswap.f            |   56 +
 costa/native/external/blas/ssymm.f            |  294 ++
 costa/native/external/blas/ssymv.f            |  262 +
 costa/native/external/blas/ssyr.f             |  197 +
 costa/native/external/blas/ssyr2.f            |  230 +
 costa/native/external/blas/ssyr2k.f           |  327 ++
 costa/native/external/blas/ssyrk.f            |  294 ++
 costa/native/external/blas/stbmv.f            |  342 ++
 costa/native/external/blas/stbsv.f            |  346 ++
 costa/native/external/blas/stpmv.f            |  299 ++
 costa/native/external/blas/stpsv.f            |  302 ++
 costa/native/external/blas/strmm.f            |  355 ++
 costa/native/external/blas/strmv.f            |  286 ++
 costa/native/external/blas/strsm.f            |  378 ++
 costa/native/external/blas/strsv.f            |  289 ++
 costa/native/external/blas/xerbla.f           |   43 +
 costa/native/external/blas/zaxpy.f            |   34 +
 costa/native/external/blas/zcopy.f            |   33 +
 costa/native/external/blas/zdotc.f            |   36 +
 costa/native/external/blas/zdotu.f            |   36 +
 costa/native/external/blas/zdscal.f           |   30 +
 costa/native/external/blas/zgbmv.f            |  322 ++
 costa/native/external/blas/zgemm.f            |  415 ++
 costa/native/external/blas/zgemv.f            |  281 ++
 costa/native/external/blas/zgerc.f            |  157 +
 costa/native/external/blas/zgeru.f            |  157 +
 costa/native/external/blas/zhbmv.f            |  309 ++
 costa/native/external/blas/zhemm.f            |  304 ++
 costa/native/external/blas/zhemv.f            |  266 +
 costa/native/external/blas/zher.f             |  212 +
 costa/native/external/blas/zher2.f            |  249 +
 costa/native/external/blas/zher2k.f           |  372 ++
 costa/native/external/blas/zherk.f            |  330 ++
 costa/native/external/blas/zhpmv.f            |  270 ++
 costa/native/external/blas/zhpr.f             |  217 +
 costa/native/external/blas/zhpr2.f            |  251 +
 costa/native/external/blas/zrotg.f            |   21 +
 costa/native/external/blas/zscal.f            |   29 +
 costa/native/external/blas/zswap.f            |   36 +
 costa/native/external/blas/zsymm.f            |  296 ++
 costa/native/external/blas/zsyr2k.f           |  324 ++
 costa/native/external/blas/zsyrk.f            |  293 ++
 costa/native/external/blas/ztbmv.f            |  377 ++
 costa/native/external/blas/ztbsv.f            |  381 ++
 costa/native/external/blas/ztpmv.f            |  338 ++
 costa/native/external/blas/ztpsv.f            |  341 ++
 costa/native/external/blas/ztrmm.f            |  392 ++
 costa/native/external/blas/ztrmv.f            |  321 ++
 costa/native/external/blas/ztrsm.f            |  414 ++
 costa/native/external/blas/ztrsv.f            |  324 ++
 costa/native/external/lapack/cbdsqr.f         |  733 +++
 costa/native/external/lapack/cgbbrd.f         |  466 ++
 costa/native/external/lapack/cgbcon.f         |  230 +
 costa/native/external/lapack/cgbequ.f         |  248 +
 costa/native/external/lapack/cgbrfs.f         |  361 ++
 costa/native/external/lapack/cgbsv.f          |  143 +
 costa/native/external/lapack/cgbsvx.f         |  518 ++
 costa/native/external/lapack/cgbtf2.f         |  203 +
 costa/native/external/lapack/cgbtrf.f         |  443 ++
 costa/native/external/lapack/cgbtrs.f         |  215 +
 costa/native/external/lapack/cgebak.f         |  190 +
 costa/native/external/lapack/cgebal.f         |  331 ++
 costa/native/external/lapack/cgebd2.f         |  249 +
 costa/native/external/lapack/cgebrd.f         |  270 ++
 costa/native/external/lapack/cgecon.f         |  189 +
 costa/native/external/lapack/cgeequ.f         |  234 +
 costa/native/external/lapack/cgees.f          |  322 ++
 costa/native/external/lapack/cgeesx.f         |  371 ++
 costa/native/external/lapack/cgeev.f          |  391 ++
 costa/native/external/lapack/cgeevx.f         |  520 ++
 costa/native/external/lapack/cgegs.f          |  442 ++
 costa/native/external/lapack/cgegv.f          |  591 +++
 costa/native/external/lapack/cgehd2.f         |  149 +
 costa/native/external/lapack/cgehrd.f         |  254 +
 costa/native/external/lapack/cgelq2.f         |  124 +
 costa/native/external/lapack/cgelqf.f         |  196 +
 costa/native/external/lapack/cgels.f          |  405 ++
 costa/native/external/lapack/cgelsd.f         |  547 +++
 costa/native/external/lapack/cgelss.f         |  639 +++
 costa/native/external/lapack/cgelsx.f         |  358 ++
 costa/native/external/lapack/cgelsy.f         |  386 ++
 costa/native/external/lapack/cgeql2.f         |  122 +
 costa/native/external/lapack/cgeqlf.f         |  205 +
 costa/native/external/lapack/cgeqp3.f         |  285 ++
 costa/native/external/lapack/cgeqpf.f         |  225 +
 costa/native/external/lapack/cgeqr2.f         |  122 +
 costa/native/external/lapack/cgeqrf.f         |  197 +
 costa/native/external/lapack/cgerfs.f         |  341 ++
 costa/native/external/lapack/cgerq2.f         |  125 +
 costa/native/external/lapack/cgerqf.f         |  205 +
 costa/native/external/lapack/cgesc2.f         |  134 +
 costa/native/external/lapack/cgesdd.f         | 1950 ++++++++
 costa/native/external/lapack/cgesv.f          |  108 +
 costa/native/external/lapack/cgesvd.f         | 3618 ++++++++++++++
 costa/native/external/lapack/cgesvx.f         |  484 ++
 costa/native/external/lapack/cgetc2.f         |  146 +
 costa/native/external/lapack/cgetf2.f         |  136 +
 costa/native/external/lapack/cgetrf.f         |  160 +
 costa/native/external/lapack/cgetri.f         |  194 +
 costa/native/external/lapack/cgetrs.f         |  150 +
 costa/native/external/lapack/cggbak.f         |  216 +
 costa/native/external/lapack/cggbal.f         |  474 ++
 costa/native/external/lapack/cgges.f          |  476 ++
 costa/native/external/lapack/cggesx.f         |  545 +++
 costa/native/external/lapack/cggev.f          |  449 ++
 costa/native/external/lapack/cggevx.f         |  640 +++
 costa/native/external/lapack/cggglm.f         |  213 +
 costa/native/external/lapack/cgghrd.f         |  256 +
 costa/native/external/lapack/cgglse.f         |  218 +
 costa/native/external/lapack/cggqrf.f         |  212 +
 costa/native/external/lapack/cggrqf.f         |  212 +
 costa/native/external/lapack/cggsvd.f         |  334 ++
 costa/native/external/lapack/cggsvp.f         |  403 ++
 costa/native/external/lapack/cgtcon.f         |  167 +
 costa/native/external/lapack/cgtrfs.f         |  369 ++
 costa/native/external/lapack/cgtsv.f          |  174 +
 costa/native/external/lapack/cgtsvx.f         |  294 ++
 costa/native/external/lapack/cgttrf.f         |  175 +
 costa/native/external/lapack/cgttrs.f         |  143 +
 costa/native/external/lapack/cgtts2.f         |  272 ++
 costa/native/external/lapack/chbev.f          |  209 +
 costa/native/external/lapack/chbevd.f         |  298 ++
 costa/native/external/lapack/chbevx.f         |  417 ++
 costa/native/external/lapack/chbgst.f         | 1377 ++++++
 costa/native/external/lapack/chbgv.f          |  192 +
 costa/native/external/lapack/chbgvd.f         |  295 ++
 costa/native/external/lapack/chbgvx.f         |  374 ++
 costa/native/external/lapack/chbtrd.f         |  589 +++
 costa/native/external/lapack/checon.f         |  159 +
 costa/native/external/lapack/cheev.f          |  220 +
 costa/native/external/lapack/cheevd.f         |  296 ++
 costa/native/external/lapack/cheevr.f         |  522 ++
 costa/native/external/lapack/cheevx.f         |  426 ++
 costa/native/external/lapack/chegs2.f         |  225 +
 costa/native/external/lapack/chegst.f         |  260 +
 costa/native/external/lapack/chegv.f          |  229 +
 costa/native/external/lapack/chegvd.f         |  297 ++
 costa/native/external/lapack/chegvx.f         |  329 ++
 costa/native/external/lapack/cherfs.f         |  339 ++
 costa/native/external/lapack/chesv.f          |  171 +
 costa/native/external/lapack/chesvx.f         |  299 ++
 costa/native/external/lapack/chetd2.f         |  259 +
 costa/native/external/lapack/chetf2.f         |  544 +++
 costa/native/external/lapack/chetrd.f         |  297 ++
 costa/native/external/lapack/chetrf.f         |  282 ++
 costa/native/external/lapack/chetri.f         |  328 ++
 costa/native/external/lapack/chetrs.f         |  394 ++
 costa/native/external/lapack/chgeqz.f         |  734 +++
 costa/native/external/lapack/chpcon.f         |  155 +
 costa/native/external/lapack/chpev.f          |  197 +
 costa/native/external/lapack/chpevd.f         |  280 ++
 costa/native/external/lapack/chpevx.f         |  384 ++
 costa/native/external/lapack/chpgst.f         |  216 +
 costa/native/external/lapack/chpgv.f          |  197 +
 costa/native/external/lapack/chpgvd.f         |  291 ++
 costa/native/external/lapack/chpgvx.f         |  284 ++
 costa/native/external/lapack/chprfs.f         |  337 ++
 costa/native/external/lapack/chpsv.f          |  149 +
 costa/native/external/lapack/chpsvx.f         |  279 ++
 costa/native/external/lapack/chptrd.f         |  238 +
 costa/native/external/lapack/chptrf.f         |  581 +++
 costa/native/external/lapack/chptri.f         |  344 ++
 costa/native/external/lapack/chptrs.f         |  402 ++
 costa/native/external/lapack/chsein.f         |  351 ++
 costa/native/external/lapack/chseqr.f         |  474 ++
 costa/native/external/lapack/clabrd.f         |  329 ++
 costa/native/external/lapack/clacgv.f         |   61 +
 costa/native/external/lapack/clacon.f         |  211 +
 costa/native/external/lapack/clacp2.f         |   92 +
 costa/native/external/lapack/clacpy.f         |   91 +
 costa/native/external/lapack/clacrm.f         |  111 +
 costa/native/external/lapack/clacrt.f         |   91 +
 costa/native/external/lapack/cladiv.f         |   47 +
 costa/native/external/lapack/claed0.f         |  289 ++
 costa/native/external/lapack/claed7.f         |  267 +
 costa/native/external/lapack/claed8.f         |  364 ++
 costa/native/external/lapack/claein.f         |  264 +
 costa/native/external/lapack/claesy.f         |  153 +
 costa/native/external/lapack/claev2.f         |   96 +
 costa/native/external/lapack/clags2.f         |  305 ++
 costa/native/external/lapack/clagtm.f         |  234 +
 costa/native/external/lapack/clahef.f         |  648 +++
 costa/native/external/lapack/clahqr.f         |  383 ++
 costa/native/external/lapack/clahrd.f         |  212 +
 costa/native/external/lapack/claic1.f         |  296 ++
 costa/native/external/lapack/clals0.f         |  434 ++
 costa/native/external/lapack/clalsa.f         |  504 ++
 costa/native/external/lapack/clalsd.f         |  597 +++
 costa/native/external/lapack/clangb.f         |  155 +
 costa/native/external/lapack/clange.f         |  146 +
 costa/native/external/lapack/clangt.f         |  142 +
 costa/native/external/lapack/clanhb.f         |  202 +
 costa/native/external/lapack/clanhe.f         |  188 +
 costa/native/external/lapack/clanhp.f         |  202 +
 costa/native/external/lapack/clanhs.f         |  143 +
 costa/native/external/lapack/clanht.f         |  126 +
 costa/native/external/lapack/clansb.f         |  188 +
 costa/native/external/lapack/clansp.f         |  207 +
 costa/native/external/lapack/clansy.f         |  175 +
 costa/native/external/lapack/clantb.f         |  286 ++
 costa/native/external/lapack/clantp.f         |  287 ++
 costa/native/external/lapack/clantr.f         |  278 ++
 costa/native/external/lapack/clapll.f         |  104 +
 costa/native/external/lapack/clapmt.f         |  135 +
 costa/native/external/lapack/claqgb.f         |  170 +
 costa/native/external/lapack/claqge.f         |  156 +
 costa/native/external/lapack/claqhb.f         |  152 +
 costa/native/external/lapack/claqhe.f         |  148 +
 costa/native/external/lapack/claqhp.f         |  147 +
 costa/native/external/lapack/claqp2.f         |  170 +
 costa/native/external/lapack/claqps.f         |  260 +
 costa/native/external/lapack/claqsb.f         |  150 +
 costa/native/external/lapack/claqsp.f         |  142 +
 costa/native/external/lapack/claqsy.f         |  143 +
 costa/native/external/lapack/clar1v.f         |  328 ++
 costa/native/external/lapack/clar2v.f         |   98 +
 costa/native/external/lapack/clarcm.f         |  111 +
 costa/native/external/lapack/clarf.f          |  121 +
 costa/native/external/lapack/clarfb.f         |  609 +++
 costa/native/external/lapack/clarfg.f         |  146 +
 costa/native/external/lapack/clarft.f         |  225 +
 costa/native/external/lapack/clarfx.f         |  641 +++
 costa/native/external/lapack/clargv.f         |  225 +
 costa/native/external/lapack/clarnv.f         |  131 +
 costa/native/external/lapack/clarrv.f         |  433 ++
 costa/native/external/lapack/clartg.f         |  193 +
 costa/native/external/lapack/clartv.f         |   79 +
 costa/native/external/lapack/clarz.f          |  158 +
 costa/native/external/lapack/clarzb.f         |  235 +
 costa/native/external/lapack/clarzt.f         |  187 +
 costa/native/external/lapack/clascl.f         |  268 +
 costa/native/external/lapack/claset.f         |  115 +
 costa/native/external/lapack/clasr.f          |  325 ++
 costa/native/external/lapack/classq.f         |  102 +
 costa/native/external/lapack/claswp.f         |  120 +
 costa/native/external/lapack/clasyf.f         |  598 +++
 costa/native/external/lapack/clatbs.f         |  909 ++++
 costa/native/external/lapack/clatdf.f         |  242 +
 costa/native/external/lapack/clatps.f         |  895 ++++
 costa/native/external/lapack/clatrd.f         |  280 ++
 costa/native/external/lapack/clatrs.f         |  880 ++++
 costa/native/external/lapack/clatrz.f         |  134 +
 costa/native/external/lapack/clatzm.f         |  147 +
 costa/native/external/lapack/clauu2.f         |  144 +
 costa/native/external/lapack/clauum.f         |  161 +
 costa/native/external/lapack/cpbcon.f         |  194 +
 costa/native/external/lapack/cpbequ.f         |  168 +
 costa/native/external/lapack/cpbrfs.f         |  342 ++
 costa/native/external/lapack/cpbstf.f         |  264 +
 costa/native/external/lapack/cpbsv.f          |  152 +
 costa/native/external/lapack/cpbsvx.f         |  423 ++
 costa/native/external/lapack/cpbtf2.f         |  201 +
 costa/native/external/lapack/cpbtrf.f         |  372 ++
 costa/native/external/lapack/cpbtrs.f         |  146 +
 costa/native/external/lapack/cpocon.f         |  180 +
 costa/native/external/lapack/cpoequ.f         |  138 +
 costa/native/external/lapack/cporfs.f         |  333 ++
 costa/native/external/lapack/cposv.f          |  122 +
 costa/native/external/lapack/cposvx.f         |  378 ++
 costa/native/external/lapack/cpotf2.f         |  175 +
 costa/native/external/lapack/cpotrf.f         |  187 +
 costa/native/external/lapack/cpotri.f         |   97 +
 costa/native/external/lapack/cpotrs.f         |  133 +
 costa/native/external/lapack/cppcon.f         |  179 +
 costa/native/external/lapack/cppequ.f         |  170 +
 costa/native/external/lapack/cpprfs.f         |  331 ++
 costa/native/external/lapack/cppsv.f          |  134 +
 costa/native/external/lapack/cppsvx.f         |  383 ++
 costa/native/external/lapack/cpptrf.f         |  179 +
 costa/native/external/lapack/cpptri.f         |  131 +
 costa/native/external/lapack/cpptrs.f         |  135 +
 costa/native/external/lapack/cptcon.f         |  151 +
 costa/native/external/lapack/cpteqr.f         |  191 +
 costa/native/external/lapack/cptrfs.f         |  367 ++
 costa/native/external/lapack/cptsv.f          |  101 +
 costa/native/external/lapack/cptsvx.f         |  238 +
 costa/native/external/lapack/cpttrf.f         |  169 +
 costa/native/external/lapack/cpttrs.f         |  136 +
 costa/native/external/lapack/cptts2.f         |  177 +
 costa/native/external/lapack/crot.f           |   92 +
 costa/native/external/lapack/cspcon.f         |  155 +
 costa/native/external/lapack/cspmv.f          |  265 +
 costa/native/external/lapack/cspr.f           |  214 +
 costa/native/external/lapack/csprfs.f         |  336 ++
 costa/native/external/lapack/cspsv.f          |  149 +
 costa/native/external/lapack/cspsvx.f         |  279 ++
 costa/native/external/lapack/csptrf.f         |  556 +++
 costa/native/external/lapack/csptri.f         |  338 ++
 costa/native/external/lapack/csptrs.f         |  378 ++
 costa/native/external/lapack/csrot.f          |   53 +
 costa/native/external/lapack/csrscl.f         |  115 +
 costa/native/external/lapack/cstedc.f         |  390 ++
 costa/native/external/lapack/cstegr.f         |  405 ++
 costa/native/external/lapack/cstein.f         |  377 ++
 costa/native/external/lapack/csteqr.f         |  504 ++
 costa/native/external/lapack/csycon.f         |  159 +
 costa/native/external/lapack/csymv.f          |  265 +
 costa/native/external/lapack/csyr.f           |  199 +
 costa/native/external/lapack/csyrfs.f         |  339 ++
 costa/native/external/lapack/csysv.f          |  171 +
 costa/native/external/lapack/csysvx.f         |  299 ++
 costa/native/external/lapack/csytf2.f         |  515 ++
 costa/native/external/lapack/csytrf.f         |  287 ++
 costa/native/external/lapack/csytri.f         |  314 ++
 costa/native/external/lapack/csytrs.f         |  370 ++
 costa/native/external/lapack/ctbcon.f         |  205 +
 costa/native/external/lapack/ctbrfs.f         |  393 ++
 costa/native/external/lapack/ctbtrs.f         |  163 +
 costa/native/external/lapack/ctgevc.f         |  632 +++
 costa/native/external/lapack/ctgex2.f         |  264 +
 costa/native/external/lapack/ctgexc.f         |  207 +
 costa/native/external/lapack/ctgsen.f         |  642 +++
 costa/native/external/lapack/ctgsja.f         |  526 ++
 costa/native/external/lapack/ctgsna.f         |  402 ++
 costa/native/external/lapack/ctgsy2.f         |  357 ++
 costa/native/external/lapack/ctgsyl.f         |  548 +++
 costa/native/external/lapack/ctpcon.f         |  194 +
 costa/native/external/lapack/ctprfs.f         |  387 ++
 costa/native/external/lapack/ctptri.f         |  177 +
 costa/native/external/lapack/ctptrs.f         |  154 +
 costa/native/external/lapack/ctrcon.f         |  200 +
 costa/native/external/lapack/ctrevc.f         |  390 ++
 costa/native/external/lapack/ctrexc.f         |  162 +
 costa/native/external/lapack/ctrrfs.f         |  378 ++
 costa/native/external/lapack/ctrsen.f         |  358 ++
 costa/native/external/lapack/ctrsna.f         |  354 ++
 costa/native/external/lapack/ctrsyl.f         |  368 ++
 costa/native/external/lapack/ctrti2.f         |  147 +
 costa/native/external/lapack/ctrtri.f         |  178 +
 costa/native/external/lapack/ctrtrs.f         |  149 +
 costa/native/external/lapack/ctzrqf.f         |  174 +
 costa/native/external/lapack/ctzrzf.f         |  241 +
 costa/native/external/lapack/cung2l.f         |  129 +
 costa/native/external/lapack/cung2r.f         |  131 +
 costa/native/external/lapack/cungbr.f         |  246 +
 costa/native/external/lapack/cunghr.f         |  166 +
 costa/native/external/lapack/cungl2.f         |  137 +
 costa/native/external/lapack/cunglq.f         |  216 +
 costa/native/external/lapack/cungql.f         |  214 +
 costa/native/external/lapack/cungqr.f         |  217 +
 costa/native/external/lapack/cungr2.f         |  135 +
 costa/native/external/lapack/cungrq.f         |  215 +
 costa/native/external/lapack/cungtr.f         |  185 +
 costa/native/external/lapack/cunm2l.f         |  197 +
 costa/native/external/lapack/cunm2r.f         |  202 +
 costa/native/external/lapack/cunmbr.f         |  282 ++
 costa/native/external/lapack/cunmhr.f         |  203 +
 costa/native/external/lapack/cunml2.f         |  206 +
 costa/native/external/lapack/cunmlq.f         |  269 ++
 costa/native/external/lapack/cunmql.f         |  258 +
 costa/native/external/lapack/cunmqr.f         |  262 +
 costa/native/external/lapack/cunmr2.f         |  199 +
 costa/native/external/lapack/cunmr3.f         |  213 +
 costa/native/external/lapack/cunmrq.f         |  265 +
 costa/native/external/lapack/cunmrz.f         |  294 ++
 costa/native/external/lapack/cunmtr.f         |  224 +
 costa/native/external/lapack/cupgtr.f         |  162 +
 costa/native/external/lapack/cupmtr.f         |  268 +
 costa/native/external/lapack/dbdsdc.f         |  427 ++
 costa/native/external/lapack/dbdsqr.f         |  733 +++
 costa/native/external/lapack/ddisna.f         |  180 +
 costa/native/external/lapack/dgbbrd.f         |  444 ++
 costa/native/external/lapack/dgbcon.f         |  222 +
 costa/native/external/lapack/dgbequ.f         |  240 +
 costa/native/external/lapack/dgbrfs.f         |  351 ++
 costa/native/external/lapack/dgbsv.f          |  143 +
 costa/native/external/lapack/dgbsvx.f         |  517 ++
 costa/native/external/lapack/dgbtf2.f         |  203 +
 costa/native/external/lapack/dgbtrf.f         |  442 ++
 costa/native/external/lapack/dgbtrs.f         |  187 +
 costa/native/external/lapack/dgebak.f         |  189 +
 costa/native/external/lapack/dgebal.f         |  323 ++
 costa/native/external/lapack/dgebd2.f         |  238 +
 costa/native/external/lapack/dgebrd.f         |  269 ++
 costa/native/external/lapack/dgecon.f         |  181 +
 costa/native/external/lapack/dgeequ.f         |  226 +
 costa/native/external/lapack/dgees.f          |  431 ++
 costa/native/external/lapack/dgeesx.f         |  502 ++
 costa/native/external/lapack/dgeev.f          |  409 ++
 costa/native/external/lapack/dgeevx.f         |  544 +++
 costa/native/external/lapack/dgegs.f          |  470 ++
 costa/native/external/lapack/dgegv.f          |  641 +++
 costa/native/external/lapack/dgehd2.f         |  150 +
 costa/native/external/lapack/dgehrd.f         |  255 +
 costa/native/external/lapack/dgelq2.f         |  122 +
 costa/native/external/lapack/dgelqf.f         |  196 +
 costa/native/external/lapack/dgels.f          |  403 ++
 costa/native/external/lapack/dgelsd.f         |  529 ++
 costa/native/external/lapack/dgelss.f         |  613 +++
 costa/native/external/lapack/dgelsx.f         |  350 ++
 costa/native/external/lapack/dgelsy.f         |  379 ++
 costa/native/external/lapack/dgeql2.f         |  123 +
 costa/native/external/lapack/dgeqlf.f         |  205 +
 costa/native/external/lapack/dgeqp3.f         |  279 ++
 costa/native/external/lapack/dgeqpf.f         |  222 +
 costa/native/external/lapack/dgeqr2.f         |  122 +
 costa/native/external/lapack/dgeqrf.f         |  197 +
 costa/native/external/lapack/dgerfs.f         |  332 ++
 costa/native/external/lapack/dgerq2.f         |  123 +
 costa/native/external/lapack/dgerqf.f         |  205 +
 costa/native/external/lapack/dgesc2.f         |  133 +
 costa/native/external/lapack/dgesdd.f         | 1335 +++++
 costa/native/external/lapack/dgesv.f          |  108 +
 costa/native/external/lapack/dgesvd.f         | 3417 +++++++++++++
 costa/native/external/lapack/dgesvx.f         |  482 ++
 costa/native/external/lapack/dgetc2.f         |  147 +
 costa/native/external/lapack/dgetf2.f         |  135 +
 costa/native/external/lapack/dgetrf.f         |  160 +
 costa/native/external/lapack/dgetri.f         |  193 +
 costa/native/external/lapack/dgetrs.f         |  150 +
 costa/native/external/lapack/dggbak.f         |  216 +
 costa/native/external/lapack/dggbal.f         |  461 ++
 costa/native/external/lapack/dgges.f          |  550 +++
 costa/native/external/lapack/dggesx.f         |  640 +++
 costa/native/external/lapack/dggev.f          |  481 ++
 costa/native/external/lapack/dggevx.f         |  698 +++
 costa/native/external/lapack/dggglm.f         |  212 +
 costa/native/external/lapack/dgghrd.f         |  253 +
 costa/native/external/lapack/dgglse.f         |  217 +
 costa/native/external/lapack/dggqrf.f         |  212 +
 costa/native/external/lapack/dggrqf.f         |  212 +
 costa/native/external/lapack/dggsvd.f         |  336 ++
 costa/native/external/lapack/dggsvp.f         |  394 ++
 costa/native/external/lapack/dgtcon.f         |  166 +
 costa/native/external/lapack/dgtrfs.f         |  357 ++
 costa/native/external/lapack/dgtsv.f          |  263 +
 costa/native/external/lapack/dgtsvx.f         |  293 ++
 costa/native/external/lapack/dgttrf.f         |  169 +
 costa/native/external/lapack/dgttrs.f         |  141 +
 costa/native/external/lapack/dgtts2.f         |  197 +
 costa/native/external/lapack/dhgeqz.f         | 1243 +++++
 costa/native/external/lapack/dhsein.f         |  412 ++
 costa/native/external/lapack/dhseqr.f         |  467 ++
 costa/native/external/lapack/dlabad.f         |   56 +
 costa/native/external/lapack/dlabrd.f         |  291 ++
 costa/native/external/lapack/dlacon.f         |  204 +
 costa/native/external/lapack/dlacpy.f         |   88 +
 costa/native/external/lapack/dladiv.f         |   63 +
 costa/native/external/lapack/dlae2.f          |  124 +
 costa/native/external/lapack/dlaebz.f         |  552 +++
 costa/native/external/lapack/dlaed0.f         |  350 ++
 costa/native/external/lapack/dlaed1.f         |  196 +
 costa/native/external/lapack/dlaed2.f         |  435 ++
 costa/native/external/lapack/dlaed3.f         |  265 +
 costa/native/external/lapack/dlaed4.f         |  846 ++++
 costa/native/external/lapack/dlaed5.f         |  125 +
 costa/native/external/lapack/dlaed6.f         |  299 ++
 costa/native/external/lapack/dlaed7.f         |  288 ++
 costa/native/external/lapack/dlaed8.f         |  400 ++
 costa/native/external/lapack/dlaed9.f         |  206 +
 costa/native/external/lapack/dlaeda.f         |  218 +
 costa/native/external/lapack/dlaein.f         |  532 ++
 costa/native/external/lapack/dlaev2.f         |  170 +
 costa/native/external/lapack/dlaexc.f         |  355 ++
 costa/native/external/lapack/dlag2.f          |  301 ++
 costa/native/external/lapack/dlags2.f         |  270 ++
 costa/native/external/lapack/dlagtf.f         |  191 +
 costa/native/external/lapack/dlagtm.f         |  191 +
 costa/native/external/lapack/dlagts.f         |  305 ++
 costa/native/external/lapack/dlagv2.f         |  290 ++
 costa/native/external/lapack/dlahqr.f         |  435 ++
 costa/native/external/lapack/dlahrd.f         |  206 +
 costa/native/external/lapack/dlaic1.f         |  293 ++
 costa/native/external/lapack/dlaln2.f         |  508 ++
 costa/native/external/lapack/dlals0.f         |  375 ++
 costa/native/external/lapack/dlalsa.f         |  363 ++
 costa/native/external/lapack/dlalsd.f         |  433 ++
 costa/native/external/lapack/dlamch.f         |  857 ++++
 costa/native/external/lapack/dlamrg.f         |  104 +
 costa/native/external/lapack/dlangb.f         |  155 +
 costa/native/external/lapack/dlange.f         |  145 +
 costa/native/external/lapack/dlangt.f         |  142 +
 costa/native/external/lapack/dlanhs.f         |  142 +
 costa/native/external/lapack/dlansb.f         |  187 +
 costa/native/external/lapack/dlansp.f         |  197 +
 costa/native/external/lapack/dlanst.f         |  125 +
 costa/native/external/lapack/dlansy.f         |  174 +
 costa/native/external/lapack/dlantb.f         |  285 ++
 costa/native/external/lapack/dlantp.f         |  286 ++
 costa/native/external/lapack/dlantr.f         |  277 ++
 costa/native/external/lapack/dlanv2.f         |  206 +
 costa/native/external/lapack/dlapll.f         |  100 +
 costa/native/external/lapack/dlapmt.f         |  135 +
 costa/native/external/lapack/dlapy2.f         |   54 +
 costa/native/external/lapack/dlapy3.f         |   54 +
 costa/native/external/lapack/dlaqgb.f         |  169 +
 costa/native/external/lapack/dlaqge.f         |  155 +
 costa/native/external/lapack/dlaqp2.f         |  166 +
 costa/native/external/lapack/dlaqps.f         |  245 +
 costa/native/external/lapack/dlaqsb.f         |  149 +
 costa/native/external/lapack/dlaqsp.f         |  141 +
 costa/native/external/lapack/dlaqsy.f         |  142 +
 costa/native/external/lapack/dlaqtr.f         |  666 +++
 costa/native/external/lapack/dlar1v.f         |  323 ++
 costa/native/external/lapack/dlar2v.f         |   87 +
 costa/native/external/lapack/dlarf.f          |  116 +
 costa/native/external/lapack/dlarfb.f         |  588 +++
 costa/native/external/lapack/dlarfg.f         |  138 +
 costa/native/external/lapack/dlarft.f         |  218 +
 costa/native/external/lapack/dlarfx.f         |  639 +++
 costa/native/external/lapack/dlargv.f         |  100 +
 costa/native/external/lapack/dlarnv.f         |  116 +
 costa/native/external/lapack/dlarrb.f         |  282 ++
 costa/native/external/lapack/dlarre.f         |  318 ++
 costa/native/external/lapack/dlarrf.f         |  150 +
 costa/native/external/lapack/dlarrv.f         |  419 ++
 costa/native/external/lapack/dlartg.f         |  143 +
 costa/native/external/lapack/dlartv.f         |   77 +
 costa/native/external/lapack/dlaruv.f         |  368 ++
 costa/native/external/lapack/dlarz.f          |  153 +
 costa/native/external/lapack/dlarzb.f         |  221 +
 costa/native/external/lapack/dlarzt.f         |  185 +
 costa/native/external/lapack/dlas2.f          |  122 +
 costa/native/external/lapack/dlascl.f         |  268 +
 costa/native/external/lapack/dlasd0.f         |  231 +
 costa/native/external/lapack/dlasd1.f         |  233 +
 costa/native/external/lapack/dlasd2.f         |  513 ++
 costa/native/external/lapack/dlasd3.f         |  359 ++
 costa/native/external/lapack/dlasd4.f         |  891 ++++
 costa/native/external/lapack/dlasd5.f         |  164 +
 costa/native/external/lapack/dlasd6.f         |  306 ++
 costa/native/external/lapack/dlasd7.f         |  445 ++
 costa/native/external/lapack/dlasd8.f         |  254 +
 costa/native/external/lapack/dlasd9.f         |  256 +
 costa/native/external/lapack/dlasda.f         |  391 ++
 costa/native/external/lapack/dlasdq.f         |  317 ++
 costa/native/external/lapack/dlasdt.f         |  106 +
 costa/native/external/lapack/dlaset.f         |  115 +
 costa/native/external/lapack/dlasq1.f         |  149 +
 costa/native/external/lapack/dlasq2.f         |  436 ++
 costa/native/external/lapack/dlasq3.f         |  298 ++
 costa/native/external/lapack/dlasq4.f         |  330 ++
 costa/native/external/lapack/dlasq5.f         |  196 +
 costa/native/external/lapack/dlasq6.f         |  176 +
 costa/native/external/lapack/dlasr.f          |  325 ++
 costa/native/external/lapack/dlasrt.f         |  244 +
 costa/native/external/lapack/dlassq.f         |   89 +
 costa/native/external/lapack/dlasv2.f         |  250 +
 costa/native/external/lapack/dlaswp.f         |  120 +
 costa/native/external/lapack/dlasy2.f         |  382 ++
 costa/native/external/lapack/dlasyf.f         |  588 +++
 costa/native/external/lapack/dlatbs.f         |  724 +++
 costa/native/external/lapack/dlatdf.f         |  238 +
 costa/native/external/lapack/dlatps.f         |  713 +++
 costa/native/external/lapack/dlatrd.f         |  259 +
 costa/native/external/lapack/dlatrs.f         |  702 +++
 costa/native/external/lapack/dlatrz.f         |  128 +
 costa/native/external/lapack/dlatzm.f         |  143 +
 costa/native/external/lapack/dlauu2.f         |  136 +
 costa/native/external/lapack/dlauum.f         |  156 +
 costa/native/external/lapack/dopgtr.f         |  161 +
 costa/native/external/lapack/dopmtr.f         |  258 +
 costa/native/external/lapack/dorg2l.f         |  128 +
 costa/native/external/lapack/dorg2r.f         |  130 +
 costa/native/external/lapack/dorgbr.f         |  245 +
 costa/native/external/lapack/dorghr.f         |  165 +
 costa/native/external/lapack/dorgl2.f         |  134 +
 costa/native/external/lapack/dorglq.f         |  216 +
 costa/native/external/lapack/dorgql.f         |  214 +
 costa/native/external/lapack/dorgqr.f         |  217 +
 costa/native/external/lapack/dorgr2.f         |  132 +
 costa/native/external/lapack/dorgrq.f         |  214 +
 costa/native/external/lapack/dorgtr.f         |  184 +
 costa/native/external/lapack/dorm2l.f         |  194 +
 costa/native/external/lapack/dorm2r.f         |  198 +
 costa/native/external/lapack/dormbr.f         |  282 ++
 costa/native/external/lapack/dormhr.f         |  202 +
 costa/native/external/lapack/dorml2.f         |  198 +
 costa/native/external/lapack/dormlq.f         |  268 +
 costa/native/external/lapack/dormql.f         |  257 +
 costa/native/external/lapack/dormqr.f         |  261 +
 costa/native/external/lapack/dormr2.f         |  194 +
 costa/native/external/lapack/dormr3.f         |  207 +
 costa/native/external/lapack/dormrq.f         |  264 +
 costa/native/external/lapack/dormrz.f         |  288 ++
 costa/native/external/lapack/dormtr.f         |  223 +
 costa/native/external/lapack/dpbcon.f         |  188 +
 costa/native/external/lapack/dpbequ.f         |  167 +
 costa/native/external/lapack/dpbrfs.f         |  337 ++
 costa/native/external/lapack/dpbstf.f         |  251 +
 costa/native/external/lapack/dpbsv.f          |  152 +
 costa/native/external/lapack/dpbsvx.f         |  424 ++
 costa/native/external/lapack/dpbtf2.f         |  195 +
 costa/native/external/lapack/dpbtrf.f         |  365 ++
 costa/native/external/lapack/dpbtrs.f         |  146 +
 costa/native/external/lapack/dpocon.f         |  173 +
 costa/native/external/lapack/dpoequ.f         |  137 +
 costa/native/external/lapack/dporfs.f         |  327 ++
 costa/native/external/lapack/dposv.f          |  122 +
 costa/native/external/lapack/dposvx.f         |  379 ++
 costa/native/external/lapack/dpotf2.f         |  168 +
 costa/native/external/lapack/dpotrf.f         |  184 +
 costa/native/external/lapack/dpotri.f         |   97 +
 costa/native/external/lapack/dpotrs.f         |  133 +
 costa/native/external/lapack/dppcon.f         |  172 +
 costa/native/external/lapack/dppequ.f         |  169 +
 costa/native/external/lapack/dpprfs.f         |  324 ++
 costa/native/external/lapack/dppsv.f          |  134 +
 costa/native/external/lapack/dppsvx.f         |  383 ++
 costa/native/external/lapack/dpptrf.f         |  178 +
 costa/native/external/lapack/dpptri.f         |  129 +
 costa/native/external/lapack/dpptrs.f         |  135 +
 costa/native/external/lapack/dptcon.f         |  150 +
 costa/native/external/lapack/dpteqr.f         |  190 +
 costa/native/external/lapack/dptrfs.f         |  302 ++
 costa/native/external/lapack/dptsv.f          |  100 +
 costa/native/external/lapack/dptsvx.f         |  235 +
 costa/native/external/lapack/dpttrf.f         |  153 +
 costa/native/external/lapack/dpttrs.f         |  115 +
 costa/native/external/lapack/dptts2.f         |   94 +
 costa/native/external/lapack/drscl.f          |  115 +
 costa/native/external/lapack/dsbev.f          |  206 +
 costa/native/external/lapack/dsbevd.f         |  265 +
 costa/native/external/lapack/dsbevx.f         |  411 ++
 costa/native/external/lapack/dsbgst.f         | 1346 ++++++
 costa/native/external/lapack/dsbgv.f          |  189 +
 costa/native/external/lapack/dsbgvd.f         |  270 ++
 costa/native/external/lapack/dsbgvx.f         |  371 ++
 costa/native/external/lapack/dsbtrd.f         |  553 +++
 costa/native/external/lapack/dsecnd.f         |   34 +
 costa/native/external/lapack/dspcon.f         |  158 +
 costa/native/external/lapack/dspev.f          |  188 +
 costa/native/external/lapack/dspevd.f         |  249 +
 costa/native/external/lapack/dspevx.f         |  377 ++
 costa/native/external/lapack/dspgst.f         |  209 +
 costa/native/external/lapack/dspgv.f          |  196 +
 costa/native/external/lapack/dspgvd.f         |  281 ++
 costa/native/external/lapack/dspgvx.f         |  286 ++
 costa/native/external/lapack/dsprfs.f         |  331 ++
 costa/native/external/lapack/dspsv.f          |  149 +
 costa/native/external/lapack/dspsvx.f         |  279 ++
 costa/native/external/lapack/dsptrd.f         |  229 +
 costa/native/external/lapack/dsptrf.f         |  548 +++
 costa/native/external/lapack/dsptri.f         |  335 ++
 costa/native/external/lapack/dsptrs.f         |  378 ++
 costa/native/external/lapack/dstebz.f         |  652 +++
 costa/native/external/lapack/dstedc.f         |  397 ++
 costa/native/external/lapack/dstegr.f         |  400 ++
 costa/native/external/lapack/dstein.f         |  362 ++
 costa/native/external/lapack/dsteqr.f         |  501 ++
 costa/native/external/lapack/dsterf.f         |  365 ++
 costa/native/external/lapack/dstev.f          |  165 +
 costa/native/external/lapack/dstevd.f         |  217 +
 costa/native/external/lapack/dstevr.f         |  434 ++
 costa/native/external/lapack/dstevx.f         |  346 ++
 costa/native/external/lapack/dsycon.f         |  161 +
 costa/native/external/lapack/dsyev.f          |  213 +
 costa/native/external/lapack/dsyevd.f         |  265 +
 costa/native/external/lapack/dsyevr.f         |  497 ++
 costa/native/external/lapack/dsyevx.f         |  421 ++
 costa/native/external/lapack/dsygs2.f         |  212 +
 costa/native/external/lapack/dsygst.f         |  250 +
 costa/native/external/lapack/dsygv.f          |  227 +
 costa/native/external/lapack/dsygvd.f         |  277 ++
 costa/native/external/lapack/dsygvx.f         |  326 ++
 costa/native/external/lapack/dsyrfs.f         |  335 ++
 costa/native/external/lapack/dsysv.f          |  171 +
 costa/native/external/lapack/dsysvx.f         |  297 ++
 costa/native/external/lapack/dsytd2.f         |  249 +
 costa/native/external/lapack/dsytf2.f         |  511 ++
 costa/native/external/lapack/dsytrd.f         |  295 ++
 costa/native/external/lapack/dsytrf.f         |  288 ++
 costa/native/external/lapack/dsytri.f         |  313 ++
 costa/native/external/lapack/dsytrs.f         |  370 ++
 costa/native/external/lapack/dtbcon.f         |  198 +
 costa/native/external/lapack/dtbrfs.f         |  381 ++
 costa/native/external/lapack/dtbtrs.f         |  163 +
 costa/native/external/lapack/dtgevc.f         | 1146 +++++
 costa/native/external/lapack/dtgex2.f         |  582 +++
 costa/native/external/lapack/dtgexc.f         |  434 ++
 costa/native/external/lapack/dtgsen.f         |  718 +++
 costa/native/external/lapack/dtgsja.f         |  516 ++
 costa/native/external/lapack/dtgsna.f         |  585 +++
 costa/native/external/lapack/dtgsy2.f         |  950 ++++
 costa/native/external/lapack/dtgsyl.f         |  534 ++
 costa/native/external/lapack/dtpcon.f         |  187 +
 costa/native/external/lapack/dtprfs.f         |  375 ++
 costa/native/external/lapack/dtptri.f         |  176 +
 costa/native/external/lapack/dtptrs.f         |  154 +
 costa/native/external/lapack/dtrcon.f         |  193 +
 costa/native/external/lapack/dtrevc.f         | 1005 ++++
 costa/native/external/lapack/dtrexc.f         |  346 ++
 costa/native/external/lapack/dtrrfs.f         |  371 ++
 costa/native/external/lapack/dtrsen.f         |  457 ++
 costa/native/external/lapack/dtrsna.f         |  493 ++
 costa/native/external/lapack/dtrsyl.f         |  914 ++++
 costa/native/external/lapack/dtrti2.f         |  147 +
 costa/native/external/lapack/dtrtri.f         |  177 +
 costa/native/external/lapack/dtrtrs.f         |  148 +
 costa/native/external/lapack/dtzrqf.f         |  165 +
 costa/native/external/lapack/dtzrzf.f         |  241 +
 costa/native/external/lapack/dzsum1.f         |   82 +
 costa/native/external/lapack/icmax1.f         |   96 +
 costa/native/external/lapack/ieeeck.f         |  148 +
 costa/native/external/lapack/ilaenv.f         |  547 +++
 costa/native/external/lapack/izmax1.f         |   96 +
 costa/native/external/lapack/lsame.f          |   87 +
 costa/native/external/lapack/lsamen.f         |   68 +
 costa/native/external/lapack/sbdsdc.f         |  427 ++
 costa/native/external/lapack/sbdsqr.f         |  733 +++
 costa/native/external/lapack/scsum1.f         |   82 +
 costa/native/external/lapack/sdisna.f         |  180 +
 costa/native/external/lapack/second.f         |   34 +
 costa/native/external/lapack/sgbbrd.f         |  444 ++
 costa/native/external/lapack/sgbcon.f         |  222 +
 costa/native/external/lapack/sgbequ.f         |  240 +
 costa/native/external/lapack/sgbrfs.f         |  351 ++
 costa/native/external/lapack/sgbsv.f          |  143 +
 costa/native/external/lapack/sgbsvx.f         |  517 ++
 costa/native/external/lapack/sgbtf2.f         |  203 +
 costa/native/external/lapack/sgbtrf.f         |  442 ++
 costa/native/external/lapack/sgbtrs.f         |  187 +
 costa/native/external/lapack/sgebak.f         |  189 +
 costa/native/external/lapack/sgebal.f         |  323 ++
 costa/native/external/lapack/sgebd2.f         |  238 +
 costa/native/external/lapack/sgebrd.f         |  269 ++
 costa/native/external/lapack/sgecon.f         |  181 +
 costa/native/external/lapack/sgeequ.f         |  226 +
 costa/native/external/lapack/sgees.f          |  431 ++
 costa/native/external/lapack/sgeesx.f         |  502 ++
 costa/native/external/lapack/sgeev.f          |  410 ++
 costa/native/external/lapack/sgeevx.f         |  543 +++
 costa/native/external/lapack/sgegs.f          |  470 ++
 costa/native/external/lapack/sgegv.f          |  641 +++
 costa/native/external/lapack/sgehd2.f         |  150 +
 costa/native/external/lapack/sgehrd.f         |  255 +
 costa/native/external/lapack/sgelq2.f         |  122 +
 costa/native/external/lapack/sgelqf.f         |  196 +
 costa/native/external/lapack/sgels.f          |  403 ++
 costa/native/external/lapack/sgelsd.f         |  530 ++
 costa/native/external/lapack/sgelss.f         |  613 +++
 costa/native/external/lapack/sgelsx.f         |  350 ++
 costa/native/external/lapack/sgelsy.f         |  379 ++
 costa/native/external/lapack/sgeql2.f         |  123 +
 costa/native/external/lapack/sgeqlf.f         |  205 +
 costa/native/external/lapack/sgeqp3.f         |  279 ++
 costa/native/external/lapack/sgeqpf.f         |  221 +
 costa/native/external/lapack/sgeqr2.f         |  122 +
 costa/native/external/lapack/sgeqrf.f         |  197 +
 costa/native/external/lapack/sgerfs.f         |  332 ++
 costa/native/external/lapack/sgerq2.f         |  123 +
 costa/native/external/lapack/sgerqf.f         |  205 +
 costa/native/external/lapack/sgesc2.f         |  133 +
 costa/native/external/lapack/sgesdd.f         | 1335 +++++
 costa/native/external/lapack/sgesv.f          |  108 +
 costa/native/external/lapack/sgesvd.f         | 3417 +++++++++++++
 costa/native/external/lapack/sgesvx.f         |  482 ++
 costa/native/external/lapack/sgetc2.f         |  147 +
 costa/native/external/lapack/sgetf2.f         |  135 +
 costa/native/external/lapack/sgetrf.f         |  160 +
 costa/native/external/lapack/sgetri.f         |  193 +
 costa/native/external/lapack/sgetrs.f         |  150 +
 costa/native/external/lapack/sggbak.f         |  216 +
 costa/native/external/lapack/sggbal.f         |  461 ++
 costa/native/external/lapack/sgges.f          |  548 +++
 costa/native/external/lapack/sggesx.f         |  640 +++
 costa/native/external/lapack/sggev.f          |  482 ++
 costa/native/external/lapack/sggevx.f         |  698 +++
 costa/native/external/lapack/sggglm.f         |  212 +
 costa/native/external/lapack/sgghrd.f         |  253 +
 costa/native/external/lapack/sgglse.f         |  217 +
 costa/native/external/lapack/sggqrf.f         |  212 +
 costa/native/external/lapack/sggrqf.f         |  212 +
 costa/native/external/lapack/sggsvd.f         |  336 ++
 costa/native/external/lapack/sggsvp.f         |  394 ++
 costa/native/external/lapack/sgtcon.f         |  166 +
 costa/native/external/lapack/sgtrfs.f         |  357 ++
 costa/native/external/lapack/sgtsv.f          |  263 +
 costa/native/external/lapack/sgtsvx.f         |  293 ++
 costa/native/external/lapack/sgttrf.f         |  169 +
 costa/native/external/lapack/sgttrs.f         |  141 +
 costa/native/external/lapack/sgtts2.f         |  197 +
 costa/native/external/lapack/shgeqz.f         | 1243 +++++
 costa/native/external/lapack/shsein.f         |  412 ++
 costa/native/external/lapack/shseqr.f         |  467 ++
 costa/native/external/lapack/slabad.f         |   56 +
 costa/native/external/lapack/slabrd.f         |  291 ++
 costa/native/external/lapack/slacon.f         |  204 +
 costa/native/external/lapack/slacpy.f         |   88 +
 costa/native/external/lapack/sladiv.f         |   63 +
 costa/native/external/lapack/slae2.f          |  124 +
 costa/native/external/lapack/slaebz.f         |  552 +++
 costa/native/external/lapack/slaed0.f         |  350 ++
 costa/native/external/lapack/slaed1.f         |  196 +
 costa/native/external/lapack/slaed2.f         |  435 ++
 costa/native/external/lapack/slaed3.f         |  265 +
 costa/native/external/lapack/slaed4.f         |  846 ++++
 costa/native/external/lapack/slaed5.f         |  125 +
 costa/native/external/lapack/slaed6.f         |  299 ++
 costa/native/external/lapack/slaed7.f         |  288 ++
 costa/native/external/lapack/slaed8.f         |  400 ++
 costa/native/external/lapack/slaed9.f         |  206 +
 costa/native/external/lapack/slaeda.f         |  218 +
 costa/native/external/lapack/slaein.f         |  532 ++
 costa/native/external/lapack/slaev2.f         |  170 +
 costa/native/external/lapack/slaexc.f         |  354 ++
 costa/native/external/lapack/slag2.f          |  301 ++
 costa/native/external/lapack/slags2.f         |  270 ++
 costa/native/external/lapack/slagtf.f         |  191 +
 costa/native/external/lapack/slagtm.f         |  191 +
 costa/native/external/lapack/slagts.f         |  305 ++
 costa/native/external/lapack/slagv2.f         |  290 ++
 costa/native/external/lapack/slahqr.f         |  435 ++
 costa/native/external/lapack/slahrd.f         |  206 +
 costa/native/external/lapack/slaic1.f         |  293 ++
 costa/native/external/lapack/slaln2.f         |  508 ++
 costa/native/external/lapack/slals0.f         |  375 ++
 costa/native/external/lapack/slalsa.f         |  363 ++
 costa/native/external/lapack/slalsd.f         |  433 ++
 costa/native/external/lapack/slamch.f         |  857 ++++
 costa/native/external/lapack/slamrg.f         |  104 +
 costa/native/external/lapack/slangb.f         |  155 +
 costa/native/external/lapack/slange.f         |  145 +
 costa/native/external/lapack/slangt.f         |  142 +
 costa/native/external/lapack/slanhs.f         |  142 +
 costa/native/external/lapack/slansb.f         |  187 +
 costa/native/external/lapack/slansp.f         |  197 +
 costa/native/external/lapack/slanst.f         |  125 +
 costa/native/external/lapack/slansy.f         |  174 +
 costa/native/external/lapack/slantb.f         |  285 ++
 costa/native/external/lapack/slantp.f         |  286 ++
 costa/native/external/lapack/slantr.f         |  277 ++
 costa/native/external/lapack/slanv2.f         |  206 +
 costa/native/external/lapack/slapll.f         |  100 +
 costa/native/external/lapack/slapmt.f         |  135 +
 costa/native/external/lapack/slapy2.f         |   54 +
 costa/native/external/lapack/slapy3.f         |   54 +
 costa/native/external/lapack/slaqgb.f         |  169 +
 costa/native/external/lapack/slaqge.f         |  155 +
 costa/native/external/lapack/slaqp2.f         |  166 +
 costa/native/external/lapack/slaqps.f         |  245 +
 costa/native/external/lapack/slaqsb.f         |  149 +
 costa/native/external/lapack/slaqsp.f         |  141 +
 costa/native/external/lapack/slaqsy.f         |  142 +
 costa/native/external/lapack/slaqtr.f         |  666 +++
 costa/native/external/lapack/slar1v.f         |  323 ++
 costa/native/external/lapack/slar2v.f         |   87 +
 costa/native/external/lapack/slarf.f          |  116 +
 costa/native/external/lapack/slarfb.f         |  588 +++
 costa/native/external/lapack/slarfg.f         |  138 +
 costa/native/external/lapack/slarft.f         |  218 +
 costa/native/external/lapack/slarfx.f         |  638 +++
 costa/native/external/lapack/slargv.f         |  100 +
 costa/native/external/lapack/slarnv.f         |  116 +
 costa/native/external/lapack/slarrb.f         |  282 ++
 costa/native/external/lapack/slarre.f         |  318 ++
 costa/native/external/lapack/slarrf.f         |  150 +
 costa/native/external/lapack/slarrv.f         |  417 ++
 costa/native/external/lapack/slartg.f         |  143 +
 costa/native/external/lapack/slartv.f         |   77 +
 costa/native/external/lapack/slaruv.f         |  368 ++
 costa/native/external/lapack/slarz.f          |  153 +
 costa/native/external/lapack/slarzb.f         |  221 +
 costa/native/external/lapack/slarzt.f         |  185 +
 costa/native/external/lapack/slas2.f          |  122 +
 costa/native/external/lapack/slascl.f         |  268 +
 costa/native/external/lapack/slasd0.f         |  231 +
 costa/native/external/lapack/slasd1.f         |  233 +
 costa/native/external/lapack/slasd2.f         |  513 ++
 costa/native/external/lapack/slasd3.f         |  359 ++
 costa/native/external/lapack/slasd4.f         |  891 ++++
 costa/native/external/lapack/slasd5.f         |  164 +
 costa/native/external/lapack/slasd6.f         |  306 ++
 costa/native/external/lapack/slasd7.f         |  445 ++
 costa/native/external/lapack/slasd8.f         |  254 +
 costa/native/external/lapack/slasd9.f         |  256 +
 costa/native/external/lapack/slasda.f         |  391 ++
 costa/native/external/lapack/slasdq.f         |  317 ++
 costa/native/external/lapack/slasdt.f         |  106 +
 costa/native/external/lapack/slaset.f         |  115 +
 costa/native/external/lapack/slasq1.f         |  149 +
 costa/native/external/lapack/slasq2.f         |  436 ++
 costa/native/external/lapack/slasq3.f         |  298 ++
 costa/native/external/lapack/slasq4.f         |  330 ++
 costa/native/external/lapack/slasq5.f         |  196 +
 costa/native/external/lapack/slasq6.f         |  176 +
 costa/native/external/lapack/slasr.f          |  325 ++
 costa/native/external/lapack/slasrt.f         |  244 +
 costa/native/external/lapack/slassq.f         |   89 +
 costa/native/external/lapack/slasv2.f         |  250 +
 costa/native/external/lapack/slaswp.f         |  120 +
 costa/native/external/lapack/slasy2.f         |  382 ++
 costa/native/external/lapack/slasyf.f         |  588 +++
 costa/native/external/lapack/slatbs.f         |  724 +++
 costa/native/external/lapack/slatdf.f         |  238 +
 costa/native/external/lapack/slatps.f         |  713 +++
 costa/native/external/lapack/slatrd.f         |  259 +
 costa/native/external/lapack/slatrs.f         |  702 +++
 costa/native/external/lapack/slatrz.f         |  128 +
 costa/native/external/lapack/slatzm.f         |  143 +
 costa/native/external/lapack/slauu2.f         |  136 +
 costa/native/external/lapack/slauum.f         |  156 +
 costa/native/external/lapack/sopgtr.f         |  161 +
 costa/native/external/lapack/sopmtr.f         |  258 +
 costa/native/external/lapack/sorg2l.f         |  128 +
 costa/native/external/lapack/sorg2r.f         |  130 +
 costa/native/external/lapack/sorgbr.f         |  245 +
 costa/native/external/lapack/sorghr.f         |  165 +
 costa/native/external/lapack/sorgl2.f         |  134 +
 costa/native/external/lapack/sorglq.f         |  216 +
 costa/native/external/lapack/sorgql.f         |  214 +
 costa/native/external/lapack/sorgqr.f         |  217 +
 costa/native/external/lapack/sorgr2.f         |  132 +
 costa/native/external/lapack/sorgrq.f         |  214 +
 costa/native/external/lapack/sorgtr.f         |  184 +
 costa/native/external/lapack/sorm2l.f         |  194 +
 costa/native/external/lapack/sorm2r.f         |  198 +
 costa/native/external/lapack/sormbr.f         |  283 ++
 costa/native/external/lapack/sormhr.f         |  203 +
 costa/native/external/lapack/sorml2.f         |  198 +
 costa/native/external/lapack/sormlq.f         |  269 ++
 costa/native/external/lapack/sormql.f         |  258 +
 costa/native/external/lapack/sormqr.f         |  262 +
 costa/native/external/lapack/sormr2.f         |  194 +
 costa/native/external/lapack/sormr3.f         |  207 +
 costa/native/external/lapack/sormrq.f         |  265 +
 costa/native/external/lapack/sormrz.f         |  289 ++
 costa/native/external/lapack/sormtr.f         |  224 +
 costa/native/external/lapack/spbcon.f         |  188 +
 costa/native/external/lapack/spbequ.f         |  167 +
 costa/native/external/lapack/spbrfs.f         |  337 ++
 costa/native/external/lapack/spbstf.f         |  251 +
 costa/native/external/lapack/spbsv.f          |  152 +
 costa/native/external/lapack/spbsvx.f         |  424 ++
 costa/native/external/lapack/spbtf2.f         |  195 +
 costa/native/external/lapack/spbtrf.f         |  365 ++
 costa/native/external/lapack/spbtrs.f         |  146 +
 costa/native/external/lapack/spocon.f         |  173 +
 costa/native/external/lapack/spoequ.f         |  137 +
 costa/native/external/lapack/sporfs.f         |  327 ++
 costa/native/external/lapack/sposv.f          |  122 +
 costa/native/external/lapack/sposvx.f         |  379 ++
 costa/native/external/lapack/spotf2.f         |  168 +
 costa/native/external/lapack/spotrf.f         |  184 +
 costa/native/external/lapack/spotri.f         |   97 +
 costa/native/external/lapack/spotrs.f         |  133 +
 costa/native/external/lapack/sppcon.f         |  172 +
 costa/native/external/lapack/sppequ.f         |  169 +
 costa/native/external/lapack/spprfs.f         |  324 ++
 costa/native/external/lapack/sppsv.f          |  134 +
 costa/native/external/lapack/sppsvx.f         |  383 ++
 costa/native/external/lapack/spptrf.f         |  178 +
 costa/native/external/lapack/spptri.f         |  129 +
 costa/native/external/lapack/spptrs.f         |  135 +
 costa/native/external/lapack/sptcon.f         |  150 +
 costa/native/external/lapack/spteqr.f         |  190 +
 costa/native/external/lapack/sptrfs.f         |  302 ++
 costa/native/external/lapack/sptsv.f          |  100 +
 costa/native/external/lapack/sptsvx.f         |  235 +
 costa/native/external/lapack/spttrf.f         |  153 +
 costa/native/external/lapack/spttrs.f         |  115 +
 costa/native/external/lapack/sptts2.f         |   94 +
 costa/native/external/lapack/srscl.f          |  115 +
 costa/native/external/lapack/ssbev.f          |  206 +
 costa/native/external/lapack/ssbevd.f         |  265 +
 costa/native/external/lapack/ssbevx.f         |  411 ++
 costa/native/external/lapack/ssbgst.f         | 1346 ++++++
 costa/native/external/lapack/ssbgv.f          |  189 +
 costa/native/external/lapack/ssbgvd.f         |  270 ++
 costa/native/external/lapack/ssbgvx.f         |  371 ++
 costa/native/external/lapack/ssbtrd.f         |  553 +++
 costa/native/external/lapack/sspcon.f         |  158 +
 costa/native/external/lapack/sspev.f          |  188 +
 costa/native/external/lapack/sspevd.f         |  249 +
 costa/native/external/lapack/sspevx.f         |  377 ++
 costa/native/external/lapack/sspgst.f         |  209 +
 costa/native/external/lapack/sspgv.f          |  196 +
 costa/native/external/lapack/sspgvd.f         |  281 ++
 costa/native/external/lapack/sspgvx.f         |  286 ++
 costa/native/external/lapack/ssprfs.f         |  331 ++
 costa/native/external/lapack/sspsv.f          |  149 +
 costa/native/external/lapack/sspsvx.f         |  279 ++
 costa/native/external/lapack/ssptrd.f         |  228 +
 costa/native/external/lapack/ssptrf.f         |  548 +++
 costa/native/external/lapack/ssptri.f         |  335 ++
 costa/native/external/lapack/ssptrs.f         |  378 ++
 costa/native/external/lapack/sstebz.f         |  651 +++
 costa/native/external/lapack/sstedc.f         |  397 ++
 costa/native/external/lapack/sstegr.f         |  400 ++
 costa/native/external/lapack/sstein.f         |  362 ++
 costa/native/external/lapack/ssteqr.f         |  501 ++
 costa/native/external/lapack/ssterf.f         |  365 ++
 costa/native/external/lapack/sstev.f          |  165 +
 costa/native/external/lapack/sstevd.f         |  217 +
 costa/native/external/lapack/sstevr.f         |  434 ++
 costa/native/external/lapack/sstevx.f         |  346 ++
 costa/native/external/lapack/ssycon.f         |  161 +
 costa/native/external/lapack/ssyev.f          |  213 +
 costa/native/external/lapack/ssyevd.f         |  265 +
 costa/native/external/lapack/ssyevr.f         |  497 ++
 costa/native/external/lapack/ssyevx.f         |  421 ++
 costa/native/external/lapack/ssygs2.f         |  212 +
 costa/native/external/lapack/ssygst.f         |  250 +
 costa/native/external/lapack/ssygv.f          |  227 +
 costa/native/external/lapack/ssygvd.f         |  277 ++
 costa/native/external/lapack/ssygvx.f         |  325 ++
 costa/native/external/lapack/ssyrfs.f         |  335 ++
 costa/native/external/lapack/ssysv.f          |  171 +
 costa/native/external/lapack/ssysvx.f         |  297 ++
 costa/native/external/lapack/ssytd2.f         |  248 +
 costa/native/external/lapack/ssytf2.f         |  511 ++
 costa/native/external/lapack/ssytrd.f         |  295 ++
 costa/native/external/lapack/ssytrf.f         |  288 ++
 costa/native/external/lapack/ssytri.f         |  313 ++
 costa/native/external/lapack/ssytrs.f         |  370 ++
 costa/native/external/lapack/stbcon.f         |  198 +
 costa/native/external/lapack/stbrfs.f         |  381 ++
 costa/native/external/lapack/stbtrs.f         |  163 +
 costa/native/external/lapack/stgevc.f         | 1146 +++++
 costa/native/external/lapack/stgex2.f         |  582 +++
 costa/native/external/lapack/stgexc.f         |  434 ++
 costa/native/external/lapack/stgsen.f         |  718 +++
 costa/native/external/lapack/stgsja.f         |  516 ++
 costa/native/external/lapack/stgsna.f         |  585 +++
 costa/native/external/lapack/stgsy2.f         |  950 ++++
 costa/native/external/lapack/stgsyl.f         |  534 ++
 costa/native/external/lapack/stpcon.f         |  187 +
 costa/native/external/lapack/stprfs.f         |  375 ++
 costa/native/external/lapack/stptri.f         |  176 +
 costa/native/external/lapack/stptrs.f         |  154 +
 costa/native/external/lapack/strcon.f         |  193 +
 costa/native/external/lapack/strevc.f         | 1006 ++++
 costa/native/external/lapack/strexc.f         |  346 ++
 costa/native/external/lapack/strrfs.f         |  371 ++
 costa/native/external/lapack/strsen.f         |  457 ++
 costa/native/external/lapack/strsna.f         |  493 ++
 costa/native/external/lapack/strsyl.f         |  914 ++++
 costa/native/external/lapack/strti2.f         |  147 +
 costa/native/external/lapack/strtri.f         |  177 +
 costa/native/external/lapack/strtrs.f         |  148 +
 costa/native/external/lapack/stzrqf.f         |  165 +
 costa/native/external/lapack/stzrzf.f         |  241 +
 costa/native/external/lapack/xerbla.f         |   46 +
 costa/native/external/lapack/zbdsqr.f         |  733 +++
 costa/native/external/lapack/zdrot.f          |   54 +
 costa/native/external/lapack/zdrscl.f         |  115 +
 costa/native/external/lapack/zgbbrd.f         |  466 ++
 costa/native/external/lapack/zgbcon.f         |  230 +
 costa/native/external/lapack/zgbequ.f         |  248 +
 costa/native/external/lapack/zgbrfs.f         |  361 ++
 costa/native/external/lapack/zgbsv.f          |  143 +
 costa/native/external/lapack/zgbsvx.f         |  518 ++
 costa/native/external/lapack/zgbtf2.f         |  203 +
 costa/native/external/lapack/zgbtrf.f         |  443 ++
 costa/native/external/lapack/zgbtrs.f         |  215 +
 costa/native/external/lapack/zgebak.f         |  190 +
 costa/native/external/lapack/zgebal.f         |  331 ++
 costa/native/external/lapack/zgebd2.f         |  249 +
 costa/native/external/lapack/zgebrd.f         |  269 ++
 costa/native/external/lapack/zgecon.f         |  189 +
 costa/native/external/lapack/zgeequ.f         |  234 +
 costa/native/external/lapack/zgees.f          |  322 ++
 costa/native/external/lapack/zgeesx.f         |  371 ++
 costa/native/external/lapack/zgeev.f          |  391 ++
 costa/native/external/lapack/zgeevx.f         |  521 ++
 costa/native/external/lapack/zgegs.f          |  442 ++
 costa/native/external/lapack/zgegv.f          |  591 +++
 costa/native/external/lapack/zgehd2.f         |  149 +
 costa/native/external/lapack/zgehrd.f         |  254 +
 costa/native/external/lapack/zgelq2.f         |  124 +
 costa/native/external/lapack/zgelqf.f         |  196 +
 costa/native/external/lapack/zgels.f          |  405 ++
 costa/native/external/lapack/zgelsd.f         |  545 +++
 costa/native/external/lapack/zgelss.f         |  639 +++
 costa/native/external/lapack/zgelsx.f         |  358 ++
 costa/native/external/lapack/zgelsy.f         |  386 ++
 costa/native/external/lapack/zgeql2.f         |  122 +
 costa/native/external/lapack/zgeqlf.f         |  205 +
 costa/native/external/lapack/zgeqp3.f         |  285 ++
 costa/native/external/lapack/zgeqpf.f         |  225 +
 costa/native/external/lapack/zgeqr2.f         |  122 +
 costa/native/external/lapack/zgeqrf.f         |  197 +
 costa/native/external/lapack/zgerfs.f         |  341 ++
 costa/native/external/lapack/zgerq2.f         |  124 +
 costa/native/external/lapack/zgerqf.f         |  205 +
 costa/native/external/lapack/zgesc2.f         |  134 +
 costa/native/external/lapack/zgesdd.f         | 1950 ++++++++
 costa/native/external/lapack/zgesv.f          |  108 +
 costa/native/external/lapack/zgesvd.f         | 3618 ++++++++++++++
 costa/native/external/lapack/zgesvx.f         |  484 ++
 costa/native/external/lapack/zgetc2.f         |  146 +
 costa/native/external/lapack/zgetf2.f         |  136 +
 costa/native/external/lapack/zgetrf.f         |  160 +
 costa/native/external/lapack/zgetri.f         |  194 +
 costa/native/external/lapack/zgetrs.f         |  150 +
 costa/native/external/lapack/zggbak.f         |  216 +
 costa/native/external/lapack/zggbal.f         |  474 ++
 costa/native/external/lapack/zgges.f          |  475 ++
 costa/native/external/lapack/zggesx.f         |  544 +++
 costa/native/external/lapack/zggev.f          |  448 ++
 costa/native/external/lapack/zggevx.f         |  640 +++
 costa/native/external/lapack/zggglm.f         |  213 +
 costa/native/external/lapack/zgghrd.f         |  256 +
 costa/native/external/lapack/zgglse.f         |  218 +
 costa/native/external/lapack/zggqrf.f         |  212 +
 costa/native/external/lapack/zggrqf.f         |  212 +
 costa/native/external/lapack/zggsvd.f         |  334 ++
 costa/native/external/lapack/zggsvp.f         |  403 ++
 costa/native/external/lapack/zgtcon.f         |  167 +
 costa/native/external/lapack/zgtrfs.f         |  369 ++
 costa/native/external/lapack/zgtsv.f          |  174 +
 costa/native/external/lapack/zgtsvx.f         |  294 ++
 costa/native/external/lapack/zgttrf.f         |  175 +
 costa/native/external/lapack/zgttrs.f         |  143 +
 costa/native/external/lapack/zgtts2.f         |  272 ++
 costa/native/external/lapack/zhbev.f          |  209 +
 costa/native/external/lapack/zhbevd.f         |  298 ++
 costa/native/external/lapack/zhbevx.f         |  417 ++
 costa/native/external/lapack/zhbgst.f         | 1378 ++++++
 costa/native/external/lapack/zhbgv.f          |  192 +
 costa/native/external/lapack/zhbgvd.f         |  295 ++
 costa/native/external/lapack/zhbgvx.f         |  374 ++
 costa/native/external/lapack/zhbtrd.f         |  589 +++
 costa/native/external/lapack/zhecon.f         |  159 +
 costa/native/external/lapack/zheev.f          |  220 +
 costa/native/external/lapack/zheevd.f         |  296 ++
 costa/native/external/lapack/zheevr.f         |  522 ++
 costa/native/external/lapack/zheevx.f         |  426 ++
 costa/native/external/lapack/zhegs2.f         |  225 +
 costa/native/external/lapack/zhegst.f         |  260 +
 costa/native/external/lapack/zhegv.f          |  229 +
 costa/native/external/lapack/zhegvd.f         |  297 ++
 costa/native/external/lapack/zhegvx.f         |  330 ++
 costa/native/external/lapack/zherfs.f         |  339 ++
 costa/native/external/lapack/zhesv.f          |  171 +
 costa/native/external/lapack/zhesvx.f         |  299 ++
 costa/native/external/lapack/zhetd2.f         |  259 +
 costa/native/external/lapack/zhetf2.f         |  546 +++
 costa/native/external/lapack/zhetrd.f         |  297 ++
 costa/native/external/lapack/zhetrf.f         |  282 ++
 costa/native/external/lapack/zhetri.f         |  328 ++
 costa/native/external/lapack/zhetrs.f         |  394 ++
 costa/native/external/lapack/zhgeqz.f         |  735 +++
 costa/native/external/lapack/zhpcon.f         |  155 +
 costa/native/external/lapack/zhpev.f          |  197 +
 costa/native/external/lapack/zhpevd.f         |  280 ++
 costa/native/external/lapack/zhpevx.f         |  384 ++
 costa/native/external/lapack/zhpgst.f         |  216 +
 costa/native/external/lapack/zhpgv.f          |  197 +
 costa/native/external/lapack/zhpgvd.f         |  291 ++
 costa/native/external/lapack/zhpgvx.f         |  284 ++
 costa/native/external/lapack/zhprfs.f         |  337 ++
 costa/native/external/lapack/zhpsv.f          |  149 +
 costa/native/external/lapack/zhpsvx.f         |  279 ++
 costa/native/external/lapack/zhptrd.f         |  238 +
 costa/native/external/lapack/zhptrf.f         |  582 +++
 costa/native/external/lapack/zhptri.f         |  344 ++
 costa/native/external/lapack/zhptrs.f         |  402 ++
 costa/native/external/lapack/zhsein.f         |  351 ++
 costa/native/external/lapack/zhseqr.f         |  474 ++
 costa/native/external/lapack/zlabrd.f         |  329 ++
 costa/native/external/lapack/zlacgv.f         |   61 +
 costa/native/external/lapack/zlacon.f         |  211 +
 costa/native/external/lapack/zlacp2.f         |   92 +
 costa/native/external/lapack/zlacpy.f         |   91 +
 costa/native/external/lapack/zlacrm.f         |  111 +
 costa/native/external/lapack/zlacrt.f         |   91 +
 costa/native/external/lapack/zladiv.f         |   47 +
 costa/native/external/lapack/zlaed0.f         |  289 ++
 costa/native/external/lapack/zlaed7.f         |  267 +
 costa/native/external/lapack/zlaed8.f         |  364 ++
 costa/native/external/lapack/zlaein.f         |  264 +
 costa/native/external/lapack/zlaesy.f         |  153 +
 costa/native/external/lapack/zlaev2.f         |   96 +
 costa/native/external/lapack/zlags2.f         |  309 ++
 costa/native/external/lapack/zlagtm.f         |  234 +
 costa/native/external/lapack/zlahef.f         |  648 +++
 costa/native/external/lapack/zlahqr.f         |  384 ++
 costa/native/external/lapack/zlahrd.f         |  212 +
 costa/native/external/lapack/zlaic1.f         |  296 ++
 costa/native/external/lapack/zlals0.f         |  434 ++
 costa/native/external/lapack/zlalsa.f         |  504 ++
 costa/native/external/lapack/zlalsd.f         |  599 +++
 costa/native/external/lapack/zlangb.f         |  155 +
 costa/native/external/lapack/zlange.f         |  146 +
 costa/native/external/lapack/zlangt.f         |  142 +
 costa/native/external/lapack/zlanhb.f         |  202 +
 costa/native/external/lapack/zlanhe.f         |  188 +
 costa/native/external/lapack/zlanhp.f         |  202 +
 costa/native/external/lapack/zlanhs.f         |  143 +
 costa/native/external/lapack/zlanht.f         |  126 +
 costa/native/external/lapack/zlansb.f         |  188 +
 costa/native/external/lapack/zlansp.f         |  207 +
 costa/native/external/lapack/zlansy.f         |  175 +
 costa/native/external/lapack/zlantb.f         |  286 ++
 costa/native/external/lapack/zlantp.f         |  287 ++
 costa/native/external/lapack/zlantr.f         |  278 ++
 costa/native/external/lapack/zlapll.f         |  104 +
 costa/native/external/lapack/zlapmt.f         |  135 +
 costa/native/external/lapack/zlaqgb.f         |  170 +
 costa/native/external/lapack/zlaqge.f         |  156 +
 costa/native/external/lapack/zlaqhb.f         |  152 +
 costa/native/external/lapack/zlaqhe.f         |  148 +
 costa/native/external/lapack/zlaqhp.f         |  147 +
 costa/native/external/lapack/zlaqp2.f         |  170 +
 costa/native/external/lapack/zlaqps.f         |  260 +
 costa/native/external/lapack/zlaqsb.f         |  150 +
 costa/native/external/lapack/zlaqsp.f         |  142 +
 costa/native/external/lapack/zlaqsy.f         |  143 +
 costa/native/external/lapack/zlar1v.f         |  328 ++
 costa/native/external/lapack/zlar2v.f         |   98 +
 costa/native/external/lapack/zlarcm.f         |  111 +
 costa/native/external/lapack/zlarf.f          |  121 +
 costa/native/external/lapack/zlarfb.f         |  609 +++
 costa/native/external/lapack/zlarfg.f         |  146 +
 costa/native/external/lapack/zlarft.f         |  225 +
 costa/native/external/lapack/zlarfx.f         |  642 +++
 costa/native/external/lapack/zlargv.f         |  225 +
 costa/native/external/lapack/zlarnv.f         |  131 +
 costa/native/external/lapack/zlarrv.f         |  437 ++
 costa/native/external/lapack/zlartg.f         |  193 +
 costa/native/external/lapack/zlartv.f         |   79 +
 costa/native/external/lapack/zlarz.f          |  158 +
 costa/native/external/lapack/zlarzb.f         |  235 +
 costa/native/external/lapack/zlarzt.f         |  187 +
 costa/native/external/lapack/zlascl.f         |  268 +
 costa/native/external/lapack/zlaset.f         |  115 +
 costa/native/external/lapack/zlasr.f          |  325 ++
 costa/native/external/lapack/zlassq.f         |  102 +
 costa/native/external/lapack/zlaswp.f         |  120 +
 costa/native/external/lapack/zlasyf.f         |  598 +++
 costa/native/external/lapack/zlatbs.f         |  909 ++++
 costa/native/external/lapack/zlatdf.f         |  242 +
 costa/native/external/lapack/zlatps.f         |  895 ++++
 costa/native/external/lapack/zlatrd.f         |  280 ++
 costa/native/external/lapack/zlatrs.f         |  880 ++++
 costa/native/external/lapack/zlatrz.f         |  134 +
 costa/native/external/lapack/zlatzm.f         |  147 +
 costa/native/external/lapack/zlauu2.f         |  144 +
 costa/native/external/lapack/zlauum.f         |  161 +
 costa/native/external/lapack/zpbcon.f         |  194 +
 costa/native/external/lapack/zpbequ.f         |  168 +
 costa/native/external/lapack/zpbrfs.f         |  342 ++
 costa/native/external/lapack/zpbstf.f         |  264 +
 costa/native/external/lapack/zpbsv.f          |  152 +
 costa/native/external/lapack/zpbsvx.f         |  423 ++
 costa/native/external/lapack/zpbtf2.f         |  201 +
 costa/native/external/lapack/zpbtrf.f         |  372 ++
 costa/native/external/lapack/zpbtrs.f         |  146 +
 costa/native/external/lapack/zpocon.f         |  180 +
 costa/native/external/lapack/zpoequ.f         |  138 +
 costa/native/external/lapack/zporfs.f         |  333 ++
 costa/native/external/lapack/zposv.f          |  122 +
 costa/native/external/lapack/zposvx.f         |  378 ++
 costa/native/external/lapack/zpotf2.f         |  175 +
 costa/native/external/lapack/zpotrf.f         |  187 +
 costa/native/external/lapack/zpotri.f         |   97 +
 costa/native/external/lapack/zpotrs.f         |  133 +
 costa/native/external/lapack/zppcon.f         |  179 +
 costa/native/external/lapack/zppequ.f         |  170 +
 costa/native/external/lapack/zpprfs.f         |  331 ++
 costa/native/external/lapack/zppsv.f          |  134 +
 costa/native/external/lapack/zppsvx.f         |  383 ++
 costa/native/external/lapack/zpptrf.f         |  179 +
 costa/native/external/lapack/zpptri.f         |  131 +
 costa/native/external/lapack/zpptrs.f         |  135 +
 costa/native/external/lapack/zptcon.f         |  151 +
 costa/native/external/lapack/zpteqr.f         |  191 +
 costa/native/external/lapack/zptrfs.f         |  367 ++
 costa/native/external/lapack/zptsv.f          |  101 +
 costa/native/external/lapack/zptsvx.f         |  238 +
 costa/native/external/lapack/zpttrf.f         |  169 +
 costa/native/external/lapack/zpttrs.f         |  136 +
 costa/native/external/lapack/zptts2.f         |  177 +
 costa/native/external/lapack/zrot.f           |   92 +
 costa/native/external/lapack/zspcon.f         |  155 +
 costa/native/external/lapack/zspmv.f          |  265 +
 costa/native/external/lapack/zspr.f           |  214 +
 costa/native/external/lapack/zsprfs.f         |  336 ++
 costa/native/external/lapack/zspsv.f          |  149 +
 costa/native/external/lapack/zspsvx.f         |  279 ++
 costa/native/external/lapack/zsptrf.f         |  556 +++
 costa/native/external/lapack/zsptri.f         |  338 ++
 costa/native/external/lapack/zsptrs.f         |  378 ++
 costa/native/external/lapack/zstedc.f         |  390 ++
 costa/native/external/lapack/zstegr.f         |  405 ++
 costa/native/external/lapack/zstein.f         |  377 ++
 costa/native/external/lapack/zsteqr.f         |  504 ++
 costa/native/external/lapack/zsycon.f         |  159 +
 costa/native/external/lapack/zsymv.f          |  265 +
 costa/native/external/lapack/zsyr.f           |  199 +
 costa/native/external/lapack/zsyrfs.f         |  339 ++
 costa/native/external/lapack/zsysv.f          |  171 +
 costa/native/external/lapack/zsysvx.f         |  299 ++
 costa/native/external/lapack/zsytf2.f         |  515 ++
 costa/native/external/lapack/zsytrf.f         |  287 ++
 costa/native/external/lapack/zsytri.f         |  314 ++
 costa/native/external/lapack/zsytrs.f         |  370 ++
 costa/native/external/lapack/ztbcon.f         |  205 +
 costa/native/external/lapack/ztbrfs.f         |  393 ++
 costa/native/external/lapack/ztbtrs.f         |  163 +
 costa/native/external/lapack/ztgevc.f         |  632 +++
 costa/native/external/lapack/ztgex2.f         |  268 +
 costa/native/external/lapack/ztgexc.f         |  207 +
 costa/native/external/lapack/ztgsen.f         |  645 +++
 costa/native/external/lapack/ztgsja.f         |  526 ++
 costa/native/external/lapack/ztgsna.f         |  402 ++
 costa/native/external/lapack/ztgsy2.f         |  357 ++
 costa/native/external/lapack/ztgsyl.f         |  550 +++
 costa/native/external/lapack/ztpcon.f         |  194 +
 costa/native/external/lapack/ztprfs.f         |  387 ++
 costa/native/external/lapack/ztptri.f         |  177 +
 costa/native/external/lapack/ztptrs.f         |  154 +
 costa/native/external/lapack/ztrcon.f         |  200 +
 costa/native/external/lapack/ztrevc.f         |  390 ++
 costa/native/external/lapack/ztrexc.f         |  163 +
 costa/native/external/lapack/ztrrfs.f         |  378 ++
 costa/native/external/lapack/ztrsen.f         |  358 ++
 costa/native/external/lapack/ztrsna.f         |  353 ++
 costa/native/external/lapack/ztrsyl.f         |  368 ++
 costa/native/external/lapack/ztrti2.f         |  147 +
 costa/native/external/lapack/ztrtri.f         |  178 +
 costa/native/external/lapack/ztrtrs.f         |  149 +
 costa/native/external/lapack/ztzrqf.f         |  174 +
 costa/native/external/lapack/ztzrzf.f         |  241 +
 costa/native/external/lapack/zung2l.f         |  129 +
 costa/native/external/lapack/zung2r.f         |  131 +
 costa/native/external/lapack/zungbr.f         |  246 +
 costa/native/external/lapack/zunghr.f         |  166 +
 costa/native/external/lapack/zungl2.f         |  137 +
 costa/native/external/lapack/zunglq.f         |  216 +
 costa/native/external/lapack/zungql.f         |  214 +
 costa/native/external/lapack/zungqr.f         |  217 +
 costa/native/external/lapack/zungr2.f         |  135 +
 costa/native/external/lapack/zungrq.f         |  215 +
 costa/native/external/lapack/zungtr.f         |  185 +
 costa/native/external/lapack/zunm2l.f         |  197 +
 costa/native/external/lapack/zunm2r.f         |  202 +
 costa/native/external/lapack/zunmbr.f         |  281 ++
 costa/native/external/lapack/zunmhr.f         |  202 +
 costa/native/external/lapack/zunml2.f         |  206 +
 costa/native/external/lapack/zunmlq.f         |  268 +
 costa/native/external/lapack/zunmql.f         |  257 +
 costa/native/external/lapack/zunmqr.f         |  261 +
 costa/native/external/lapack/zunmr2.f         |  199 +
 costa/native/external/lapack/zunmr3.f         |  213 +
 costa/native/external/lapack/zunmrq.f         |  264 +
 costa/native/external/lapack/zunmrz.f         |  293 ++
 costa/native/external/lapack/zunmtr.f         |  223 +
 costa/native/external/lapack/zupgtr.f         |  162 +
 costa/native/external/lapack/zupmtr.f         |  268 +
 1593 files changed, 468683 insertions(+)
 create mode 100644 costa/native/cta/cta_util_timing.f90
 create mode 100644 costa/native/cta/include/cta.h
 create mode 100644 costa/native/cta/include/cta_array.h
 create mode 100644 costa/native/cta/include/cta_bb_modbuild.h
 create mode 100644 costa/native/cta/include/cta_bb_modbuild_utils.h
 create mode 100644 costa/native/cta/include/cta_datatypes.h
 create mode 100644 costa/native/cta/include/cta_datetime.h
 create mode 100644 costa/native/cta/include/cta_defaults.h
 create mode 100644 costa/native/cta/include/cta_errors.h
 create mode 100644 costa/native/cta/include/cta_f77blas.h
 create mode 100644 costa/native/cta/include/cta_f77lapack.h
 create mode 100644 costa/native/cta/include/cta_file.h
 create mode 100644 costa/native/cta/include/cta_flush.h
 create mode 100644 costa/native/cta/include/cta_functions.h
 create mode 100644 costa/native/cta/include/cta_handles.h
 create mode 100644 costa/native/cta/include/cta_initialise.h
 create mode 100644 costa/native/cta/include/cta_interface.h
 create mode 100644 costa/native/cta/include/cta_matrix.h
 create mode 100644 costa/native/cta/include/cta_matrix_blas.h
 create mode 100644 costa/native/cta/include/cta_mem.h
 create mode 100644 costa/native/cta/include/cta_message.h
 create mode 100644 costa/native/cta/include/cta_metainfo.h
 create mode 100644 costa/native/cta/include/cta_method.h
 create mode 100644 costa/native/cta/include/cta_modbuild_b3b.h
 create mode 100644 costa/native/cta/include/cta_modbuild_b3b_utils.h
 create mode 100644 costa/native/cta/include/cta_modbuild_par.h
 create mode 100644 costa/native/cta/include/cta_modbuild_sp.h
 create mode 100644 costa/native/cta/include/cta_model.h
 create mode 100644 costa/native/cta/include/cta_model_factory.h
 create mode 100644 costa/native/cta/include/cta_model_utilities.h
 create mode 100644 costa/native/cta/include/cta_modelcombiner.h
 create mode 100644 costa/native/cta/include/cta_obsdescr.h
 create mode 100644 costa/native/cta/include/cta_obsdescr_sqlite3.h
 create mode 100644 costa/native/cta/include/cta_obsdescr_table.h
 create mode 100644 costa/native/cta/include/cta_pack.h
 create mode 100644 costa/native/cta/include/cta_par.h
 create mode 100644 costa/native/cta/include/cta_reltable.h
 create mode 100644 costa/native/cta/include/cta_resultwriter.h
 create mode 100644 costa/native/cta/include/cta_sobs.h
 create mode 100644 costa/native/cta/include/cta_sobs_combine.h
 create mode 100644 costa/native/cta/include/cta_sobs_netcdf.h
 create mode 100644 costa/native/cta/include/cta_sobs_sqlite3.h
 create mode 100644 costa/native/cta/include/cta_string.h
 create mode 100644 costa/native/cta/include/cta_system.h
 create mode 100644 costa/native/cta/include/cta_time.h
 create mode 100644 costa/native/cta/include/cta_tree.h
 create mode 100644 costa/native/cta/include/cta_treevector.h
 create mode 100644 costa/native/cta/include/cta_usr_matrix.h
 create mode 100644 costa/native/cta/include/cta_usr_method.h
 create mode 100644 costa/native/cta/include/cta_usr_model.h
 create mode 100644 costa/native/cta/include/cta_usr_obs_desc.h
 create mode 100644 costa/native/cta/include/cta_usr_stoch_observer.h
 create mode 100644 costa/native/cta/include/cta_usr_vector.h
 create mode 100644 costa/native/cta/include/cta_util_methods.h
 create mode 100644 costa/native/cta/include/cta_util_sort.h
 create mode 100644 costa/native/cta/include/cta_util_sqlite3.h
 create mode 100644 costa/native/cta/include/cta_util_statistics.h
 create mode 100644 costa/native/cta/include/cta_vector.h
 create mode 100644 costa/native/cta/include/cta_vector_blas.h
 create mode 100644 costa/native/cta/include/cta_xml.h
 create mode 100644 costa/native/cta/include/ctai.h
 create mode 100644 costa/native/cta/include/ctai_datatypes.h
 create mode 100644 costa/native/cta/include/ctai_handles.h
 create mode 100644 costa/native/cta/include/ctai_sobs.h
 create mode 100644 costa/native/cta/include/ctai_string.h
 create mode 100644 costa/native/cta/include/ctai_vector.h
 create mode 100644 costa/native/cta/include/ctai_xml.h
 create mode 100644 costa/native/cta/include/f_cta_utils.h
 create mode 100644 costa/native/cta/include/modbuild_sp_model_template.h
 create mode 100644 costa/native/cta/src/CMakeLists.txt
 create mode 100644 costa/native/cta/src/cta_array.c
 create mode 100644 costa/native/cta/src/cta_datatypes.c
 create mode 100644 costa/native/cta/src/cta_datetime.c
 create mode 100644 costa/native/cta/src/cta_defaults.c
 create mode 100644 costa/native/cta/src/cta_file.c
 create mode 100644 costa/native/cta/src/cta_flush.c
 create mode 100644 costa/native/cta/src/cta_functions.c
 create mode 100644 costa/native/cta/src/cta_handles.c
 create mode 100644 costa/native/cta/src/cta_initialise.c
 create mode 100644 costa/native/cta/src/cta_interface.c
 create mode 100644 costa/native/cta/src/cta_matrix.c
 create mode 100644 costa/native/cta/src/cta_matrix_blas.c
 create mode 100644 costa/native/cta/src/cta_mem.c
 create mode 100644 costa/native/cta/src/cta_message.c
 create mode 100644 costa/native/cta/src/cta_metainfo.c
 create mode 100644 costa/native/cta/src/cta_method.c
 create mode 100644 costa/native/cta/src/cta_modbuild_par.c
 create mode 100644 costa/native/cta/src/cta_modbuild_sp.c
 create mode 100644 costa/native/cta/src/cta_model.c
 create mode 100644 costa/native/cta/src/cta_model_factory.c
 create mode 100644 costa/native/cta/src/cta_model_utilities.c
 create mode 100644 costa/native/cta/src/cta_obsdescr.c
 create mode 100644 costa/native/cta/src/cta_obsdescr_combine.c
 create mode 100644 costa/native/cta/src/cta_obsdescr_maori.c
 create mode 100644 costa/native/cta/src/cta_obsdescr_netcdf.c
 create mode 100644 costa/native/cta/src/cta_obsdescr_sqlite3.c
 create mode 100644 costa/native/cta/src/cta_obsdescr_table.c
 create mode 100644 costa/native/cta/src/cta_obsdescr_user.c
 create mode 100644 costa/native/cta/src/cta_pack.c
 create mode 100644 costa/native/cta/src/cta_par.c
 create mode 100644 costa/native/cta/src/cta_reltable.c
 create mode 100644 costa/native/cta/src/cta_resultwriter.c
 create mode 100644 costa/native/cta/src/cta_sobs.c
 create mode 100644 costa/native/cta/src/cta_sobs_combine.c
 create mode 100644 costa/native/cta/src/cta_sobs_factory.c
 create mode 100644 costa/native/cta/src/cta_sobs_maori.c
 create mode 100644 costa/native/cta/src/cta_sobs_netcdf.c
 create mode 100644 costa/native/cta/src/cta_sobs_sqlite3.c
 create mode 100644 costa/native/cta/src/cta_sobs_user.c
 create mode 100644 costa/native/cta/src/cta_string.c
 create mode 100644 costa/native/cta/src/cta_time.c
 create mode 100644 costa/native/cta/src/cta_tree.c
 create mode 100644 costa/native/cta/src/cta_treevector.c
 create mode 100644 costa/native/cta/src/cta_util_methods.c
 create mode 100644 costa/native/cta/src/cta_util_sort.c
 create mode 100644 costa/native/cta/src/cta_util_sqlite3.c
 create mode 100644 costa/native/cta/src/cta_util_statistics.c
 create mode 100644 costa/native/cta/src/cta_vector.c
 create mode 100644 costa/native/cta/src/cta_vector_blas.c
 create mode 100644 costa/native/cta/src/cta_xml.c
 create mode 100644 costa/native/cta/src/f_cta_defaults.c
 create mode 100644 costa/native/cta/src/f_cta_utils.c
 create mode 100644 costa/native/cta_f90/cta_f90_parameters.f90
 create mode 100644 costa/native/cta_f90/generated/cta_f90.f90
 create mode 100644 costa/native/cta_f90/generated/cta_f90_array.f90
 create mode 100644 costa/native/cta_f90/generated/cta_f90_datatypes.f90
 create mode 100644 costa/native/cta_f90/generated/cta_f90_datetime.f90
 create mode 100644 costa/native/cta_f90/generated/cta_f90_file.f90
 create mode 100644 costa/native/cta_f90/generated/cta_f90_flush_mod.f90
 create mode 100644 costa/native/cta_f90/generated/cta_f90_functions.f90
 create mode 100644 costa/native/cta_f90/generated/cta_f90_handles.f90
 create mode 100644 costa/native/cta_f90/generated/cta_f90_initialise.f90
 create mode 100644 costa/native/cta_f90/generated/cta_f90_interface.f90
 create mode 100644 costa/native/cta_f90/generated/cta_f90_matrix.f90
 create mode 100644 costa/native/cta_f90/generated/cta_f90_mem.f90
 create mode 100644 costa/native/cta_f90/generated/cta_f90_message.f90
 create mode 100644 costa/native/cta_f90/generated/cta_f90_metainfo.f90
 create mode 100644 costa/native/cta_f90/generated/cta_f90_method.f90
 create mode 100644 costa/native/cta_f90/generated/cta_f90_modbuild_par.f90
 create mode 100644 costa/native/cta_f90/generated/cta_f90_model.f90
 create mode 100644 costa/native/cta_f90/generated/cta_f90_model_factory.f90
 create mode 100644 costa/native/cta_f90/generated/cta_f90_model_utilities.f90
 create mode 100644 costa/native/cta_f90/generated/cta_f90_obsdescr.f90
 create mode 100644 costa/native/cta_f90/generated/cta_f90_pack.f90
 create mode 100644 costa/native/cta_f90/generated/cta_f90_par.f90
 create mode 100644 costa/native/cta_f90/generated/cta_f90_parameters.f90
 create mode 100644 costa/native/cta_f90/generated/cta_f90_reltable.f90
 create mode 100644 costa/native/cta_f90/generated/cta_f90_resultwriter.f90
 create mode 100644 costa/native/cta_f90/generated/cta_f90_sobs.f90
 create mode 100644 costa/native/cta_f90/generated/cta_f90_string.f90
 create mode 100644 costa/native/cta_f90/generated/cta_f90_time.f90
 create mode 100644 costa/native/cta_f90/generated/cta_f90_tree.f90
 create mode 100644 costa/native/cta_f90/generated/cta_f90_treevector.f90
 create mode 100644 costa/native/cta_f90/generated/cta_f90_util_methods.f90
 create mode 100644 costa/native/cta_f90/generated/cta_f90_util_sort.f90
 create mode 100644 costa/native/cta_f90/generated/cta_f90_util_statistics.f90
 create mode 100644 costa/native/cta_f90/generated/cta_f90_vector.f90
 create mode 100644 costa/native/cta_f90/generated/cta_f90_xml.f90
 create mode 100644 costa/native/cta_f90/include/cta_f77.inc
 create mode 100644 costa/native/cta_f90/include/cta_f90.inc
 create mode 100644 costa/native/cta_f90/include/cta_f90_contains.inc
 create mode 100644 costa/native/external/blas/caxpy.f
 create mode 100644 costa/native/external/blas/ccopy.f
 create mode 100644 costa/native/external/blas/cdotc.f
 create mode 100644 costa/native/external/blas/cdotu.f
 create mode 100644 costa/native/external/blas/cgbmv.f
 create mode 100644 costa/native/external/blas/cgemm.f
 create mode 100644 costa/native/external/blas/cgemv.f
 create mode 100644 costa/native/external/blas/cgerc.f
 create mode 100644 costa/native/external/blas/cgeru.f
 create mode 100644 costa/native/external/blas/chbmv.f
 create mode 100644 costa/native/external/blas/chemm.f
 create mode 100644 costa/native/external/blas/chemv.f
 create mode 100644 costa/native/external/blas/cher.f
 create mode 100644 costa/native/external/blas/cher2.f
 create mode 100644 costa/native/external/blas/cher2k.f
 create mode 100644 costa/native/external/blas/cherk.f
 create mode 100644 costa/native/external/blas/chpmv.f
 create mode 100644 costa/native/external/blas/chpr.f
 create mode 100644 costa/native/external/blas/chpr2.f
 create mode 100644 costa/native/external/blas/crotg.f
 create mode 100644 costa/native/external/blas/cscal.f
 create mode 100644 costa/native/external/blas/csscal.f
 create mode 100644 costa/native/external/blas/cswap.f
 create mode 100644 costa/native/external/blas/csymm.f
 create mode 100644 costa/native/external/blas/csyr2k.f
 create mode 100644 costa/native/external/blas/csyrk.f
 create mode 100644 costa/native/external/blas/ctbmv.f
 create mode 100644 costa/native/external/blas/ctbsv.f
 create mode 100644 costa/native/external/blas/ctpmv.f
 create mode 100644 costa/native/external/blas/ctpsv.f
 create mode 100644 costa/native/external/blas/ctrmm.f
 create mode 100644 costa/native/external/blas/ctrmv.f
 create mode 100644 costa/native/external/blas/ctrsm.f
 create mode 100644 costa/native/external/blas/ctrsv.f
 create mode 100644 costa/native/external/blas/dasum.f
 create mode 100644 costa/native/external/blas/daxpy.f
 create mode 100644 costa/native/external/blas/dcabs1.f
 create mode 100644 costa/native/external/blas/dcopy.f
 create mode 100644 costa/native/external/blas/ddot.f
 create mode 100644 costa/native/external/blas/dgbmv.f
 create mode 100644 costa/native/external/blas/dgemm.f
 create mode 100644 costa/native/external/blas/dgemv.f
 create mode 100644 costa/native/external/blas/dger.f
 create mode 100644 costa/native/external/blas/dnrm2.f
 create mode 100644 costa/native/external/blas/drot.f
 create mode 100644 costa/native/external/blas/drotg.f
 create mode 100644 costa/native/external/blas/dsbmv.f
 create mode 100644 costa/native/external/blas/dscal.f
 create mode 100644 costa/native/external/blas/dspmv.f
 create mode 100644 costa/native/external/blas/dspr.f
 create mode 100644 costa/native/external/blas/dspr2.f
 create mode 100644 costa/native/external/blas/dswap.f
 create mode 100644 costa/native/external/blas/dsymm.f
 create mode 100644 costa/native/external/blas/dsymv.f
 create mode 100644 costa/native/external/blas/dsyr.f
 create mode 100644 costa/native/external/blas/dsyr2.f
 create mode 100644 costa/native/external/blas/dsyr2k.f
 create mode 100644 costa/native/external/blas/dsyrk.f
 create mode 100644 costa/native/external/blas/dtbmv.f
 create mode 100644 costa/native/external/blas/dtbsv.f
 create mode 100644 costa/native/external/blas/dtpmv.f
 create mode 100644 costa/native/external/blas/dtpsv.f
 create mode 100644 costa/native/external/blas/dtrmm.f
 create mode 100644 costa/native/external/blas/dtrmv.f
 create mode 100644 costa/native/external/blas/dtrsm.f
 create mode 100644 costa/native/external/blas/dtrsv.f
 create mode 100644 costa/native/external/blas/dzasum.f
 create mode 100644 costa/native/external/blas/dznrm2.f
 create mode 100644 costa/native/external/blas/icamax.f
 create mode 100644 costa/native/external/blas/idamax.f
 create mode 100644 costa/native/external/blas/isamax.f
 create mode 100644 costa/native/external/blas/izamax.f
 create mode 100644 costa/native/external/blas/lsame.f
 create mode 100644 costa/native/external/blas/sasum.f
 create mode 100644 costa/native/external/blas/saxpy.f
 create mode 100644 costa/native/external/blas/scasum.f
 create mode 100644 costa/native/external/blas/scnrm2.f
 create mode 100644 costa/native/external/blas/scopy.f
 create mode 100644 costa/native/external/blas/sdot.f
 create mode 100644 costa/native/external/blas/sgbmv.f
 create mode 100644 costa/native/external/blas/sgemm.f
 create mode 100644 costa/native/external/blas/sgemv.f
 create mode 100644 costa/native/external/blas/sger.f
 create mode 100644 costa/native/external/blas/snrm2.f
 create mode 100644 costa/native/external/blas/srot.f
 create mode 100644 costa/native/external/blas/srotg.f
 create mode 100644 costa/native/external/blas/ssbmv.f
 create mode 100644 costa/native/external/blas/sscal.f
 create mode 100644 costa/native/external/blas/sspmv.f
 create mode 100644 costa/native/external/blas/sspr.f
 create mode 100644 costa/native/external/blas/sspr2.f
 create mode 100644 costa/native/external/blas/sswap.f
 create mode 100644 costa/native/external/blas/ssymm.f
 create mode 100644 costa/native/external/blas/ssymv.f
 create mode 100644 costa/native/external/blas/ssyr.f
 create mode 100644 costa/native/external/blas/ssyr2.f
 create mode 100644 costa/native/external/blas/ssyr2k.f
 create mode 100644 costa/native/external/blas/ssyrk.f
 create mode 100644 costa/native/external/blas/stbmv.f
 create mode 100644 costa/native/external/blas/stbsv.f
 create mode 100644 costa/native/external/blas/stpmv.f
 create mode 100644 costa/native/external/blas/stpsv.f
 create mode 100644 costa/native/external/blas/strmm.f
 create mode 100644 costa/native/external/blas/strmv.f
 create mode 100644 costa/native/external/blas/strsm.f
 create mode 100644 costa/native/external/blas/strsv.f
 create mode 100644 costa/native/external/blas/xerbla.f
 create mode 100644 costa/native/external/blas/zaxpy.f
 create mode 100644 costa/native/external/blas/zcopy.f
 create mode 100644 costa/native/external/blas/zdotc.f
 create mode 100644 costa/native/external/blas/zdotu.f
 create mode 100644 costa/native/external/blas/zdscal.f
 create mode 100644 costa/native/external/blas/zgbmv.f
 create mode 100644 costa/native/external/blas/zgemm.f
 create mode 100644 costa/native/external/blas/zgemv.f
 create mode 100644 costa/native/external/blas/zgerc.f
 create mode 100644 costa/native/external/blas/zgeru.f
 create mode 100644 costa/native/external/blas/zhbmv.f
 create mode 100644 costa/native/external/blas/zhemm.f
 create mode 100644 costa/native/external/blas/zhemv.f
 create mode 100644 costa/native/external/blas/zher.f
 create mode 100644 costa/native/external/blas/zher2.f
 create mode 100644 costa/native/external/blas/zher2k.f
 create mode 100644 costa/native/external/blas/zherk.f
 create mode 100644 costa/native/external/blas/zhpmv.f
 create mode 100644 costa/native/external/blas/zhpr.f
 create mode 100644 costa/native/external/blas/zhpr2.f
 create mode 100644 costa/native/external/blas/zrotg.f
 create mode 100644 costa/native/external/blas/zscal.f
 create mode 100644 costa/native/external/blas/zswap.f
 create mode 100644 costa/native/external/blas/zsymm.f
 create mode 100644 costa/native/external/blas/zsyr2k.f
 create mode 100644 costa/native/external/blas/zsyrk.f
 create mode 100644 costa/native/external/blas/ztbmv.f
 create mode 100644 costa/native/external/blas/ztbsv.f
 create mode 100644 costa/native/external/blas/ztpmv.f
 create mode 100644 costa/native/external/blas/ztpsv.f
 create mode 100644 costa/native/external/blas/ztrmm.f
 create mode 100644 costa/native/external/blas/ztrmv.f
 create mode 100644 costa/native/external/blas/ztrsm.f
 create mode 100644 costa/native/external/blas/ztrsv.f
 create mode 100644 costa/native/external/lapack/cbdsqr.f
 create mode 100644 costa/native/external/lapack/cgbbrd.f
 create mode 100644 costa/native/external/lapack/cgbcon.f
 create mode 100644 costa/native/external/lapack/cgbequ.f
 create mode 100644 costa/native/external/lapack/cgbrfs.f
 create mode 100644 costa/native/external/lapack/cgbsv.f
 create mode 100644 costa/native/external/lapack/cgbsvx.f
 create mode 100644 costa/native/external/lapack/cgbtf2.f
 create mode 100644 costa/native/external/lapack/cgbtrf.f
 create mode 100644 costa/native/external/lapack/cgbtrs.f
 create mode 100644 costa/native/external/lapack/cgebak.f
 create mode 100644 costa/native/external/lapack/cgebal.f
 create mode 100644 costa/native/external/lapack/cgebd2.f
 create mode 100644 costa/native/external/lapack/cgebrd.f
 create mode 100644 costa/native/external/lapack/cgecon.f
 create mode 100644 costa/native/external/lapack/cgeequ.f
 create mode 100644 costa/native/external/lapack/cgees.f
 create mode 100644 costa/native/external/lapack/cgeesx.f
 create mode 100644 costa/native/external/lapack/cgeev.f
 create mode 100644 costa/native/external/lapack/cgeevx.f
 create mode 100644 costa/native/external/lapack/cgegs.f
 create mode 100644 costa/native/external/lapack/cgegv.f
 create mode 100644 costa/native/external/lapack/cgehd2.f
 create mode 100644 costa/native/external/lapack/cgehrd.f
 create mode 100644 costa/native/external/lapack/cgelq2.f
 create mode 100644 costa/native/external/lapack/cgelqf.f
 create mode 100644 costa/native/external/lapack/cgels.f
 create mode 100644 costa/native/external/lapack/cgelsd.f
 create mode 100644 costa/native/external/lapack/cgelss.f
 create mode 100644 costa/native/external/lapack/cgelsx.f
 create mode 100644 costa/native/external/lapack/cgelsy.f
 create mode 100644 costa/native/external/lapack/cgeql2.f
 create mode 100644 costa/native/external/lapack/cgeqlf.f
 create mode 100644 costa/native/external/lapack/cgeqp3.f
 create mode 100644 costa/native/external/lapack/cgeqpf.f
 create mode 100644 costa/native/external/lapack/cgeqr2.f
 create mode 100644 costa/native/external/lapack/cgeqrf.f
 create mode 100644 costa/native/external/lapack/cgerfs.f
 create mode 100644 costa/native/external/lapack/cgerq2.f
 create mode 100644 costa/native/external/lapack/cgerqf.f
 create mode 100644 costa/native/external/lapack/cgesc2.f
 create mode 100644 costa/native/external/lapack/cgesdd.f
 create mode 100644 costa/native/external/lapack/cgesv.f
 create mode 100644 costa/native/external/lapack/cgesvd.f
 create mode 100644 costa/native/external/lapack/cgesvx.f
 create mode 100644 costa/native/external/lapack/cgetc2.f
 create mode 100644 costa/native/external/lapack/cgetf2.f
 create mode 100644 costa/native/external/lapack/cgetrf.f
 create mode 100644 costa/native/external/lapack/cgetri.f
 create mode 100644 costa/native/external/lapack/cgetrs.f
 create mode 100644 costa/native/external/lapack/cggbak.f
 create mode 100644 costa/native/external/lapack/cggbal.f
 create mode 100644 costa/native/external/lapack/cgges.f
 create mode 100644 costa/native/external/lapack/cggesx.f
 create mode 100644 costa/native/external/lapack/cggev.f
 create mode 100644 costa/native/external/lapack/cggevx.f
 create mode 100644 costa/native/external/lapack/cggglm.f
 create mode 100644 costa/native/external/lapack/cgghrd.f
 create mode 100644 costa/native/external/lapack/cgglse.f
 create mode 100644 costa/native/external/lapack/cggqrf.f
 create mode 100644 costa/native/external/lapack/cggrqf.f
 create mode 100644 costa/native/external/lapack/cggsvd.f
 create mode 100644 costa/native/external/lapack/cggsvp.f
 create mode 100644 costa/native/external/lapack/cgtcon.f
 create mode 100644 costa/native/external/lapack/cgtrfs.f
 create mode 100644 costa/native/external/lapack/cgtsv.f
 create mode 100644 costa/native/external/lapack/cgtsvx.f
 create mode 100644 costa/native/external/lapack/cgttrf.f
 create mode 100644 costa/native/external/lapack/cgttrs.f
 create mode 100644 costa/native/external/lapack/cgtts2.f
 create mode 100644 costa/native/external/lapack/chbev.f
 create mode 100644 costa/native/external/lapack/chbevd.f
 create mode 100644 costa/native/external/lapack/chbevx.f
 create mode 100644 costa/native/external/lapack/chbgst.f
 create mode 100644 costa/native/external/lapack/chbgv.f
 create mode 100644 costa/native/external/lapack/chbgvd.f
 create mode 100644 costa/native/external/lapack/chbgvx.f
 create mode 100644 costa/native/external/lapack/chbtrd.f
 create mode 100644 costa/native/external/lapack/checon.f
 create mode 100644 costa/native/external/lapack/cheev.f
 create mode 100644 costa/native/external/lapack/cheevd.f
 create mode 100644 costa/native/external/lapack/cheevr.f
 create mode 100644 costa/native/external/lapack/cheevx.f
 create mode 100644 costa/native/external/lapack/chegs2.f
 create mode 100644 costa/native/external/lapack/chegst.f
 create mode 100644 costa/native/external/lapack/chegv.f
 create mode 100644 costa/native/external/lapack/chegvd.f
 create mode 100644 costa/native/external/lapack/chegvx.f
 create mode 100644 costa/native/external/lapack/cherfs.f
 create mode 100644 costa/native/external/lapack/chesv.f
 create mode 100644 costa/native/external/lapack/chesvx.f
 create mode 100644 costa/native/external/lapack/chetd2.f
 create mode 100644 costa/native/external/lapack/chetf2.f
 create mode 100644 costa/native/external/lapack/chetrd.f
 create mode 100644 costa/native/external/lapack/chetrf.f
 create mode 100644 costa/native/external/lapack/chetri.f
 create mode 100644 costa/native/external/lapack/chetrs.f
 create mode 100644 costa/native/external/lapack/chgeqz.f
 create mode 100644 costa/native/external/lapack/chpcon.f
 create mode 100644 costa/native/external/lapack/chpev.f
 create mode 100644 costa/native/external/lapack/chpevd.f
 create mode 100644 costa/native/external/lapack/chpevx.f
 create mode 100644 costa/native/external/lapack/chpgst.f
 create mode 100644 costa/native/external/lapack/chpgv.f
 create mode 100644 costa/native/external/lapack/chpgvd.f
 create mode 100644 costa/native/external/lapack/chpgvx.f
 create mode 100644 costa/native/external/lapack/chprfs.f
 create mode 100644 costa/native/external/lapack/chpsv.f
 create mode 100644 costa/native/external/lapack/chpsvx.f
 create mode 100644 costa/native/external/lapack/chptrd.f
 create mode 100644 costa/native/external/lapack/chptrf.f
 create mode 100644 costa/native/external/lapack/chptri.f
 create mode 100644 costa/native/external/lapack/chptrs.f
 create mode 100644 costa/native/external/lapack/chsein.f
 create mode 100644 costa/native/external/lapack/chseqr.f
 create mode 100644 costa/native/external/lapack/clabrd.f
 create mode 100644 costa/native/external/lapack/clacgv.f
 create mode 100644 costa/native/external/lapack/clacon.f
 create mode 100644 costa/native/external/lapack/clacp2.f
 create mode 100644 costa/native/external/lapack/clacpy.f
 create mode 100644 costa/native/external/lapack/clacrm.f
 create mode 100644 costa/native/external/lapack/clacrt.f
 create mode 100644 costa/native/external/lapack/cladiv.f
 create mode 100644 costa/native/external/lapack/claed0.f
 create mode 100644 costa/native/external/lapack/claed7.f
 create mode 100644 costa/native/external/lapack/claed8.f
 create mode 100644 costa/native/external/lapack/claein.f
 create mode 100644 costa/native/external/lapack/claesy.f
 create mode 100644 costa/native/external/lapack/claev2.f
 create mode 100644 costa/native/external/lapack/clags2.f
 create mode 100644 costa/native/external/lapack/clagtm.f
 create mode 100644 costa/native/external/lapack/clahef.f
 create mode 100644 costa/native/external/lapack/clahqr.f
 create mode 100644 costa/native/external/lapack/clahrd.f
 create mode 100644 costa/native/external/lapack/claic1.f
 create mode 100644 costa/native/external/lapack/clals0.f
 create mode 100644 costa/native/external/lapack/clalsa.f
 create mode 100644 costa/native/external/lapack/clalsd.f
 create mode 100644 costa/native/external/lapack/clangb.f
 create mode 100644 costa/native/external/lapack/clange.f
 create mode 100644 costa/native/external/lapack/clangt.f
 create mode 100644 costa/native/external/lapack/clanhb.f
 create mode 100644 costa/native/external/lapack/clanhe.f
 create mode 100644 costa/native/external/lapack/clanhp.f
 create mode 100644 costa/native/external/lapack/clanhs.f
 create mode 100644 costa/native/external/lapack/clanht.f
 create mode 100644 costa/native/external/lapack/clansb.f
 create mode 100644 costa/native/external/lapack/clansp.f
 create mode 100644 costa/native/external/lapack/clansy.f
 create mode 100644 costa/native/external/lapack/clantb.f
 create mode 100644 costa/native/external/lapack/clantp.f
 create mode 100644 costa/native/external/lapack/clantr.f
 create mode 100644 costa/native/external/lapack/clapll.f
 create mode 100644 costa/native/external/lapack/clapmt.f
 create mode 100644 costa/native/external/lapack/claqgb.f
 create mode 100644 costa/native/external/lapack/claqge.f
 create mode 100644 costa/native/external/lapack/claqhb.f
 create mode 100644 costa/native/external/lapack/claqhe.f
 create mode 100644 costa/native/external/lapack/claqhp.f
 create mode 100644 costa/native/external/lapack/claqp2.f
 create mode 100644 costa/native/external/lapack/claqps.f
 create mode 100644 costa/native/external/lapack/claqsb.f
 create mode 100644 costa/native/external/lapack/claqsp.f
 create mode 100644 costa/native/external/lapack/claqsy.f
 create mode 100644 costa/native/external/lapack/clar1v.f
 create mode 100644 costa/native/external/lapack/clar2v.f
 create mode 100644 costa/native/external/lapack/clarcm.f
 create mode 100644 costa/native/external/lapack/clarf.f
 create mode 100644 costa/native/external/lapack/clarfb.f
 create mode 100644 costa/native/external/lapack/clarfg.f
 create mode 100644 costa/native/external/lapack/clarft.f
 create mode 100644 costa/native/external/lapack/clarfx.f
 create mode 100644 costa/native/external/lapack/clargv.f
 create mode 100644 costa/native/external/lapack/clarnv.f
 create mode 100644 costa/native/external/lapack/clarrv.f
 create mode 100644 costa/native/external/lapack/clartg.f
 create mode 100644 costa/native/external/lapack/clartv.f
 create mode 100644 costa/native/external/lapack/clarz.f
 create mode 100644 costa/native/external/lapack/clarzb.f
 create mode 100644 costa/native/external/lapack/clarzt.f
 create mode 100644 costa/native/external/lapack/clascl.f
 create mode 100644 costa/native/external/lapack/claset.f
 create mode 100644 costa/native/external/lapack/clasr.f
 create mode 100644 costa/native/external/lapack/classq.f
 create mode 100644 costa/native/external/lapack/claswp.f
 create mode 100644 costa/native/external/lapack/clasyf.f
 create mode 100644 costa/native/external/lapack/clatbs.f
 create mode 100644 costa/native/external/lapack/clatdf.f
 create mode 100644 costa/native/external/lapack/clatps.f
 create mode 100644 costa/native/external/lapack/clatrd.f
 create mode 100644 costa/native/external/lapack/clatrs.f
 create mode 100644 costa/native/external/lapack/clatrz.f
 create mode 100644 costa/native/external/lapack/clatzm.f
 create mode 100644 costa/native/external/lapack/clauu2.f
 create mode 100644 costa/native/external/lapack/clauum.f
 create mode 100644 costa/native/external/lapack/cpbcon.f
 create mode 100644 costa/native/external/lapack/cpbequ.f
 create mode 100644 costa/native/external/lapack/cpbrfs.f
 create mode 100644 costa/native/external/lapack/cpbstf.f
 create mode 100644 costa/native/external/lapack/cpbsv.f
 create mode 100644 costa/native/external/lapack/cpbsvx.f
 create mode 100644 costa/native/external/lapack/cpbtf2.f
 create mode 100644 costa/native/external/lapack/cpbtrf.f
 create mode 100644 costa/native/external/lapack/cpbtrs.f
 create mode 100644 costa/native/external/lapack/cpocon.f
 create mode 100644 costa/native/external/lapack/cpoequ.f
 create mode 100644 costa/native/external/lapack/cporfs.f
 create mode 100644 costa/native/external/lapack/cposv.f
 create mode 100644 costa/native/external/lapack/cposvx.f
 create mode 100644 costa/native/external/lapack/cpotf2.f
 create mode 100644 costa/native/external/lapack/cpotrf.f
 create mode 100644 costa/native/external/lapack/cpotri.f
 create mode 100644 costa/native/external/lapack/cpotrs.f
 create mode 100644 costa/native/external/lapack/cppcon.f
 create mode 100644 costa/native/external/lapack/cppequ.f
 create mode 100644 costa/native/external/lapack/cpprfs.f
 create mode 100644 costa/native/external/lapack/cppsv.f
 create mode 100644 costa/native/external/lapack/cppsvx.f
 create mode 100644 costa/native/external/lapack/cpptrf.f
 create mode 100644 costa/native/external/lapack/cpptri.f
 create mode 100644 costa/native/external/lapack/cpptrs.f
 create mode 100644 costa/native/external/lapack/cptcon.f
 create mode 100644 costa/native/external/lapack/cpteqr.f
 create mode 100644 costa/native/external/lapack/cptrfs.f
 create mode 100644 costa/native/external/lapack/cptsv.f
 create mode 100644 costa/native/external/lapack/cptsvx.f
 create mode 100644 costa/native/external/lapack/cpttrf.f
 create mode 100644 costa/native/external/lapack/cpttrs.f
 create mode 100644 costa/native/external/lapack/cptts2.f
 create mode 100644 costa/native/external/lapack/crot.f
 create mode 100644 costa/native/external/lapack/cspcon.f
 create mode 100644 costa/native/external/lapack/cspmv.f
 create mode 100644 costa/native/external/lapack/cspr.f
 create mode 100644 costa/native/external/lapack/csprfs.f
 create mode 100644 costa/native/external/lapack/cspsv.f
 create mode 100644 costa/native/external/lapack/cspsvx.f
 create mode 100644 costa/native/external/lapack/csptrf.f
 create mode 100644 costa/native/external/lapack/csptri.f
 create mode 100644 costa/native/external/lapack/csptrs.f
 create mode 100644 costa/native/external/lapack/csrot.f
 create mode 100644 costa/native/external/lapack/csrscl.f
 create mode 100644 costa/native/external/lapack/cstedc.f
 create mode 100644 costa/native/external/lapack/cstegr.f
 create mode 100644 costa/native/external/lapack/cstein.f
 create mode 100644 costa/native/external/lapack/csteqr.f
 create mode 100644 costa/native/external/lapack/csycon.f
 create mode 100644 costa/native/external/lapack/csymv.f
 create mode 100644 costa/native/external/lapack/csyr.f
 create mode 100644 costa/native/external/lapack/csyrfs.f
 create mode 100644 costa/native/external/lapack/csysv.f
 create mode 100644 costa/native/external/lapack/csysvx.f
 create mode 100644 costa/native/external/lapack/csytf2.f
 create mode 100644 costa/native/external/lapack/csytrf.f
 create mode 100644 costa/native/external/lapack/csytri.f
 create mode 100644 costa/native/external/lapack/csytrs.f
 create mode 100644 costa/native/external/lapack/ctbcon.f
 create mode 100644 costa/native/external/lapack/ctbrfs.f
 create mode 100644 costa/native/external/lapack/ctbtrs.f
 create mode 100644 costa/native/external/lapack/ctgevc.f
 create mode 100644 costa/native/external/lapack/ctgex2.f
 create mode 100644 costa/native/external/lapack/ctgexc.f
 create mode 100644 costa/native/external/lapack/ctgsen.f
 create mode 100644 costa/native/external/lapack/ctgsja.f
 create mode 100644 costa/native/external/lapack/ctgsna.f
 create mode 100644 costa/native/external/lapack/ctgsy2.f
 create mode 100644 costa/native/external/lapack/ctgsyl.f
 create mode 100644 costa/native/external/lapack/ctpcon.f
 create mode 100644 costa/native/external/lapack/ctprfs.f
 create mode 100644 costa/native/external/lapack/ctptri.f
 create mode 100644 costa/native/external/lapack/ctptrs.f
 create mode 100644 costa/native/external/lapack/ctrcon.f
 create mode 100644 costa/native/external/lapack/ctrevc.f
 create mode 100644 costa/native/external/lapack/ctrexc.f
 create mode 100644 costa/native/external/lapack/ctrrfs.f
 create mode 100644 costa/native/external/lapack/ctrsen.f
 create mode 100644 costa/native/external/lapack/ctrsna.f
 create mode 100644 costa/native/external/lapack/ctrsyl.f
 create mode 100644 costa/native/external/lapack/ctrti2.f
 create mode 100644 costa/native/external/lapack/ctrtri.f
 create mode 100644 costa/native/external/lapack/ctrtrs.f
 create mode 100644 costa/native/external/lapack/ctzrqf.f
 create mode 100644 costa/native/external/lapack/ctzrzf.f
 create mode 100644 costa/native/external/lapack/cung2l.f
 create mode 100644 costa/native/external/lapack/cung2r.f
 create mode 100644 costa/native/external/lapack/cungbr.f
 create mode 100644 costa/native/external/lapack/cunghr.f
 create mode 100644 costa/native/external/lapack/cungl2.f
 create mode 100644 costa/native/external/lapack/cunglq.f
 create mode 100644 costa/native/external/lapack/cungql.f
 create mode 100644 costa/native/external/lapack/cungqr.f
 create mode 100644 costa/native/external/lapack/cungr2.f
 create mode 100644 costa/native/external/lapack/cungrq.f
 create mode 100644 costa/native/external/lapack/cungtr.f
 create mode 100644 costa/native/external/lapack/cunm2l.f
 create mode 100644 costa/native/external/lapack/cunm2r.f
 create mode 100644 costa/native/external/lapack/cunmbr.f
 create mode 100644 costa/native/external/lapack/cunmhr.f
 create mode 100644 costa/native/external/lapack/cunml2.f
 create mode 100644 costa/native/external/lapack/cunmlq.f
 create mode 100644 costa/native/external/lapack/cunmql.f
 create mode 100644 costa/native/external/lapack/cunmqr.f
 create mode 100644 costa/native/external/lapack/cunmr2.f
 create mode 100644 costa/native/external/lapack/cunmr3.f
 create mode 100644 costa/native/external/lapack/cunmrq.f
 create mode 100644 costa/native/external/lapack/cunmrz.f
 create mode 100644 costa/native/external/lapack/cunmtr.f
 create mode 100644 costa/native/external/lapack/cupgtr.f
 create mode 100644 costa/native/external/lapack/cupmtr.f
 create mode 100644 costa/native/external/lapack/dbdsdc.f
 create mode 100644 costa/native/external/lapack/dbdsqr.f
 create mode 100644 costa/native/external/lapack/ddisna.f
 create mode 100644 costa/native/external/lapack/dgbbrd.f
 create mode 100644 costa/native/external/lapack/dgbcon.f
 create mode 100644 costa/native/external/lapack/dgbequ.f
 create mode 100644 costa/native/external/lapack/dgbrfs.f
 create mode 100644 costa/native/external/lapack/dgbsv.f
 create mode 100644 costa/native/external/lapack/dgbsvx.f
 create mode 100644 costa/native/external/lapack/dgbtf2.f
 create mode 100644 costa/native/external/lapack/dgbtrf.f
 create mode 100644 costa/native/external/lapack/dgbtrs.f
 create mode 100644 costa/native/external/lapack/dgebak.f
 create mode 100644 costa/native/external/lapack/dgebal.f
 create mode 100644 costa/native/external/lapack/dgebd2.f
 create mode 100644 costa/native/external/lapack/dgebrd.f
 create mode 100644 costa/native/external/lapack/dgecon.f
 create mode 100644 costa/native/external/lapack/dgeequ.f
 create mode 100644 costa/native/external/lapack/dgees.f
 create mode 100644 costa/native/external/lapack/dgeesx.f
 create mode 100644 costa/native/external/lapack/dgeev.f
 create mode 100644 costa/native/external/lapack/dgeevx.f
 create mode 100644 costa/native/external/lapack/dgegs.f
 create mode 100644 costa/native/external/lapack/dgegv.f
 create mode 100644 costa/native/external/lapack/dgehd2.f
 create mode 100644 costa/native/external/lapack/dgehrd.f
 create mode 100644 costa/native/external/lapack/dgelq2.f
 create mode 100644 costa/native/external/lapack/dgelqf.f
 create mode 100644 costa/native/external/lapack/dgels.f
 create mode 100644 costa/native/external/lapack/dgelsd.f
 create mode 100644 costa/native/external/lapack/dgelss.f
 create mode 100644 costa/native/external/lapack/dgelsx.f
 create mode 100644 costa/native/external/lapack/dgelsy.f
 create mode 100644 costa/native/external/lapack/dgeql2.f
 create mode 100644 costa/native/external/lapack/dgeqlf.f
 create mode 100644 costa/native/external/lapack/dgeqp3.f
 create mode 100644 costa/native/external/lapack/dgeqpf.f
 create mode 100644 costa/native/external/lapack/dgeqr2.f
 create mode 100644 costa/native/external/lapack/dgeqrf.f
 create mode 100644 costa/native/external/lapack/dgerfs.f
 create mode 100644 costa/native/external/lapack/dgerq2.f
 create mode 100644 costa/native/external/lapack/dgerqf.f
 create mode 100644 costa/native/external/lapack/dgesc2.f
 create mode 100644 costa/native/external/lapack/dgesdd.f
 create mode 100644 costa/native/external/lapack/dgesv.f
 create mode 100644 costa/native/external/lapack/dgesvd.f
 create mode 100644 costa/native/external/lapack/dgesvx.f
 create mode 100644 costa/native/external/lapack/dgetc2.f
 create mode 100644 costa/native/external/lapack/dgetf2.f
 create mode 100644 costa/native/external/lapack/dgetrf.f
 create mode 100644 costa/native/external/lapack/dgetri.f
 create mode 100644 costa/native/external/lapack/dgetrs.f
 create mode 100644 costa/native/external/lapack/dggbak.f
 create mode 100644 costa/native/external/lapack/dggbal.f
 create mode 100644 costa/native/external/lapack/dgges.f
 create mode 100644 costa/native/external/lapack/dggesx.f
 create mode 100644 costa/native/external/lapack/dggev.f
 create mode 100644 costa/native/external/lapack/dggevx.f
 create mode 100644 costa/native/external/lapack/dggglm.f
 create mode 100644 costa/native/external/lapack/dgghrd.f
 create mode 100644 costa/native/external/lapack/dgglse.f
 create mode 100644 costa/native/external/lapack/dggqrf.f
 create mode 100644 costa/native/external/lapack/dggrqf.f
 create mode 100644 costa/native/external/lapack/dggsvd.f
 create mode 100644 costa/native/external/lapack/dggsvp.f
 create mode 100644 costa/native/external/lapack/dgtcon.f
 create mode 100644 costa/native/external/lapack/dgtrfs.f
 create mode 100644 costa/native/external/lapack/dgtsv.f
 create mode 100644 costa/native/external/lapack/dgtsvx.f
 create mode 100644 costa/native/external/lapack/dgttrf.f
 create mode 100644 costa/native/external/lapack/dgttrs.f
 create mode 100644 costa/native/external/lapack/dgtts2.f
 create mode 100644 costa/native/external/lapack/dhgeqz.f
 create mode 100644 costa/native/external/lapack/dhsein.f
 create mode 100644 costa/native/external/lapack/dhseqr.f
 create mode 100644 costa/native/external/lapack/dlabad.f
 create mode 100644 costa/native/external/lapack/dlabrd.f
 create mode 100644 costa/native/external/lapack/dlacon.f
 create mode 100644 costa/native/external/lapack/dlacpy.f
 create mode 100644 costa/native/external/lapack/dladiv.f
 create mode 100644 costa/native/external/lapack/dlae2.f
 create mode 100644 costa/native/external/lapack/dlaebz.f
 create mode 100644 costa/native/external/lapack/dlaed0.f
 create mode 100644 costa/native/external/lapack/dlaed1.f
 create mode 100644 costa/native/external/lapack/dlaed2.f
 create mode 100644 costa/native/external/lapack/dlaed3.f
 create mode 100644 costa/native/external/lapack/dlaed4.f
 create mode 100644 costa/native/external/lapack/dlaed5.f
 create mode 100644 costa/native/external/lapack/dlaed6.f
 create mode 100644 costa/native/external/lapack/dlaed7.f
 create mode 100644 costa/native/external/lapack/dlaed8.f
 create mode 100644 costa/native/external/lapack/dlaed9.f
 create mode 100644 costa/native/external/lapack/dlaeda.f
 create mode 100644 costa/native/external/lapack/dlaein.f
 create mode 100644 costa/native/external/lapack/dlaev2.f
 create mode 100644 costa/native/external/lapack/dlaexc.f
 create mode 100644 costa/native/external/lapack/dlag2.f
 create mode 100644 costa/native/external/lapack/dlags2.f
 create mode 100644 costa/native/external/lapack/dlagtf.f
 create mode 100644 costa/native/external/lapack/dlagtm.f
 create mode 100644 costa/native/external/lapack/dlagts.f
 create mode 100644 costa/native/external/lapack/dlagv2.f
 create mode 100644 costa/native/external/lapack/dlahqr.f
 create mode 100644 costa/native/external/lapack/dlahrd.f
 create mode 100644 costa/native/external/lapack/dlaic1.f
 create mode 100644 costa/native/external/lapack/dlaln2.f
 create mode 100644 costa/native/external/lapack/dlals0.f
 create mode 100644 costa/native/external/lapack/dlalsa.f
 create mode 100644 costa/native/external/lapack/dlalsd.f
 create mode 100644 costa/native/external/lapack/dlamch.f
 create mode 100644 costa/native/external/lapack/dlamrg.f
 create mode 100644 costa/native/external/lapack/dlangb.f
 create mode 100644 costa/native/external/lapack/dlange.f
 create mode 100644 costa/native/external/lapack/dlangt.f
 create mode 100644 costa/native/external/lapack/dlanhs.f
 create mode 100644 costa/native/external/lapack/dlansb.f
 create mode 100644 costa/native/external/lapack/dlansp.f
 create mode 100644 costa/native/external/lapack/dlanst.f
 create mode 100644 costa/native/external/lapack/dlansy.f
 create mode 100644 costa/native/external/lapack/dlantb.f
 create mode 100644 costa/native/external/lapack/dlantp.f
 create mode 100644 costa/native/external/lapack/dlantr.f
 create mode 100644 costa/native/external/lapack/dlanv2.f
 create mode 100644 costa/native/external/lapack/dlapll.f
 create mode 100644 costa/native/external/lapack/dlapmt.f
 create mode 100644 costa/native/external/lapack/dlapy2.f
 create mode 100644 costa/native/external/lapack/dlapy3.f
 create mode 100644 costa/native/external/lapack/dlaqgb.f
 create mode 100644 costa/native/external/lapack/dlaqge.f
 create mode 100644 costa/native/external/lapack/dlaqp2.f
 create mode 100644 costa/native/external/lapack/dlaqps.f
 create mode 100644 costa/native/external/lapack/dlaqsb.f
 create mode 100644 costa/native/external/lapack/dlaqsp.f
 create mode 100644 costa/native/external/lapack/dlaqsy.f
 create mode 100644 costa/native/external/lapack/dlaqtr.f
 create mode 100644 costa/native/external/lapack/dlar1v.f
 create mode 100644 costa/native/external/lapack/dlar2v.f
 create mode 100644 costa/native/external/lapack/dlarf.f
 create mode 100644 costa/native/external/lapack/dlarfb.f
 create mode 100644 costa/native/external/lapack/dlarfg.f
 create mode 100644 costa/native/external/lapack/dlarft.f
 create mode 100644 costa/native/external/lapack/dlarfx.f
 create mode 100644 costa/native/external/lapack/dlargv.f
 create mode 100644 costa/native/external/lapack/dlarnv.f
 create mode 100644 costa/native/external/lapack/dlarrb.f
 create mode 100644 costa/native/external/lapack/dlarre.f
 create mode 100644 costa/native/external/lapack/dlarrf.f
 create mode 100644 costa/native/external/lapack/dlarrv.f
 create mode 100644 costa/native/external/lapack/dlartg.f
 create mode 100644 costa/native/external/lapack/dlartv.f
 create mode 100644 costa/native/external/lapack/dlaruv.f
 create mode 100644 costa/native/external/lapack/dlarz.f
 create mode 100644 costa/native/external/lapack/dlarzb.f
 create mode 100644 costa/native/external/lapack/dlarzt.f
 create mode 100644 costa/native/external/lapack/dlas2.f
 create mode 100644 costa/native/external/lapack/dlascl.f
 create mode 100644 costa/native/external/lapack/dlasd0.f
 create mode 100644 costa/native/external/lapack/dlasd1.f
 create mode 100644 costa/native/external/lapack/dlasd2.f
 create mode 100644 costa/native/external/lapack/dlasd3.f
 create mode 100644 costa/native/external/lapack/dlasd4.f
 create mode 100644 costa/native/external/lapack/dlasd5.f
 create mode 100644 costa/native/external/lapack/dlasd6.f
 create mode 100644 costa/native/external/lapack/dlasd7.f
 create mode 100644 costa/native/external/lapack/dlasd8.f
 create mode 100644 costa/native/external/lapack/dlasd9.f
 create mode 100644 costa/native/external/lapack/dlasda.f
 create mode 100644 costa/native/external/lapack/dlasdq.f
 create mode 100644 costa/native/external/lapack/dlasdt.f
 create mode 100644 costa/native/external/lapack/dlaset.f
 create mode 100644 costa/native/external/lapack/dlasq1.f
 create mode 100644 costa/native/external/lapack/dlasq2.f
 create mode 100644 costa/native/external/lapack/dlasq3.f
 create mode 100644 costa/native/external/lapack/dlasq4.f
 create mode 100644 costa/native/external/lapack/dlasq5.f
 create mode 100644 costa/native/external/lapack/dlasq6.f
 create mode 100644 costa/native/external/lapack/dlasr.f
 create mode 100644 costa/native/external/lapack/dlasrt.f
 create mode 100644 costa/native/external/lapack/dlassq.f
 create mode 100644 costa/native/external/lapack/dlasv2.f
 create mode 100644 costa/native/external/lapack/dlaswp.f
 create mode 100644 costa/native/external/lapack/dlasy2.f
 create mode 100644 costa/native/external/lapack/dlasyf.f
 create mode 100644 costa/native/external/lapack/dlatbs.f
 create mode 100644 costa/native/external/lapack/dlatdf.f
 create mode 100644 costa/native/external/lapack/dlatps.f
 create mode 100644 costa/native/external/lapack/dlatrd.f
 create mode 100644 costa/native/external/lapack/dlatrs.f
 create mode 100644 costa/native/external/lapack/dlatrz.f
 create mode 100644 costa/native/external/lapack/dlatzm.f
 create mode 100644 costa/native/external/lapack/dlauu2.f
 create mode 100644 costa/native/external/lapack/dlauum.f
 create mode 100644 costa/native/external/lapack/dopgtr.f
 create mode 100644 costa/native/external/lapack/dopmtr.f
 create mode 100644 costa/native/external/lapack/dorg2l.f
 create mode 100644 costa/native/external/lapack/dorg2r.f
 create mode 100644 costa/native/external/lapack/dorgbr.f
 create mode 100644 costa/native/external/lapack/dorghr.f
 create mode 100644 costa/native/external/lapack/dorgl2.f
 create mode 100644 costa/native/external/lapack/dorglq.f
 create mode 100644 costa/native/external/lapack/dorgql.f
 create mode 100644 costa/native/external/lapack/dorgqr.f
 create mode 100644 costa/native/external/lapack/dorgr2.f
 create mode 100644 costa/native/external/lapack/dorgrq.f
 create mode 100644 costa/native/external/lapack/dorgtr.f
 create mode 100644 costa/native/external/lapack/dorm2l.f
 create mode 100644 costa/native/external/lapack/dorm2r.f
 create mode 100644 costa/native/external/lapack/dormbr.f
 create mode 100644 costa/native/external/lapack/dormhr.f
 create mode 100644 costa/native/external/lapack/dorml2.f
 create mode 100644 costa/native/external/lapack/dormlq.f
 create mode 100644 costa/native/external/lapack/dormql.f
 create mode 100644 costa/native/external/lapack/dormqr.f
 create mode 100644 costa/native/external/lapack/dormr2.f
 create mode 100644 costa/native/external/lapack/dormr3.f
 create mode 100644 costa/native/external/lapack/dormrq.f
 create mode 100644 costa/native/external/lapack/dormrz.f
 create mode 100644 costa/native/external/lapack/dormtr.f
 create mode 100644 costa/native/external/lapack/dpbcon.f
 create mode 100644 costa/native/external/lapack/dpbequ.f
 create mode 100644 costa/native/external/lapack/dpbrfs.f
 create mode 100644 costa/native/external/lapack/dpbstf.f
 create mode 100644 costa/native/external/lapack/dpbsv.f
 create mode 100644 costa/native/external/lapack/dpbsvx.f
 create mode 100644 costa/native/external/lapack/dpbtf2.f
 create mode 100644 costa/native/external/lapack/dpbtrf.f
 create mode 100644 costa/native/external/lapack/dpbtrs.f
 create mode 100644 costa/native/external/lapack/dpocon.f
 create mode 100644 costa/native/external/lapack/dpoequ.f
 create mode 100644 costa/native/external/lapack/dporfs.f
 create mode 100644 costa/native/external/lapack/dposv.f
 create mode 100644 costa/native/external/lapack/dposvx.f
 create mode 100644 costa/native/external/lapack/dpotf2.f
 create mode 100644 costa/native/external/lapack/dpotrf.f
 create mode 100644 costa/native/external/lapack/dpotri.f
 create mode 100644 costa/native/external/lapack/dpotrs.f
 create mode 100644 costa/native/external/lapack/dppcon.f
 create mode 100644 costa/native/external/lapack/dppequ.f
 create mode 100644 costa/native/external/lapack/dpprfs.f
 create mode 100644 costa/native/external/lapack/dppsv.f
 create mode 100644 costa/native/external/lapack/dppsvx.f
 create mode 100644 costa/native/external/lapack/dpptrf.f
 create mode 100644 costa/native/external/lapack/dpptri.f
 create mode 100644 costa/native/external/lapack/dpptrs.f
 create mode 100644 costa/native/external/lapack/dptcon.f
 create mode 100644 costa/native/external/lapack/dpteqr.f
 create mode 100644 costa/native/external/lapack/dptrfs.f
 create mode 100644 costa/native/external/lapack/dptsv.f
 create mode 100644 costa/native/external/lapack/dptsvx.f
 create mode 100644 costa/native/external/lapack/dpttrf.f
 create mode 100644 costa/native/external/lapack/dpttrs.f
 create mode 100644 costa/native/external/lapack/dptts2.f
 create mode 100644 costa/native/external/lapack/drscl.f
 create mode 100644 costa/native/external/lapack/dsbev.f
 create mode 100644 costa/native/external/lapack/dsbevd.f
 create mode 100644 costa/native/external/lapack/dsbevx.f
 create mode 100644 costa/native/external/lapack/dsbgst.f
 create mode 100644 costa/native/external/lapack/dsbgv.f
 create mode 100644 costa/native/external/lapack/dsbgvd.f
 create mode 100644 costa/native/external/lapack/dsbgvx.f
 create mode 100644 costa/native/external/lapack/dsbtrd.f
 create mode 100644 costa/native/external/lapack/dsecnd.f
 create mode 100644 costa/native/external/lapack/dspcon.f
 create mode 100644 costa/native/external/lapack/dspev.f
 create mode 100644 costa/native/external/lapack/dspevd.f
 create mode 100644 costa/native/external/lapack/dspevx.f
 create mode 100644 costa/native/external/lapack/dspgst.f
 create mode 100644 costa/native/external/lapack/dspgv.f
 create mode 100644 costa/native/external/lapack/dspgvd.f
 create mode 100644 costa/native/external/lapack/dspgvx.f
 create mode 100644 costa/native/external/lapack/dsprfs.f
 create mode 100644 costa/native/external/lapack/dspsv.f
 create mode 100644 costa/native/external/lapack/dspsvx.f
 create mode 100644 costa/native/external/lapack/dsptrd.f
 create mode 100644 costa/native/external/lapack/dsptrf.f
 create mode 100644 costa/native/external/lapack/dsptri.f
 create mode 100644 costa/native/external/lapack/dsptrs.f
 create mode 100644 costa/native/external/lapack/dstebz.f
 create mode 100644 costa/native/external/lapack/dstedc.f
 create mode 100644 costa/native/external/lapack/dstegr.f
 create mode 100644 costa/native/external/lapack/dstein.f
 create mode 100644 costa/native/external/lapack/dsteqr.f
 create mode 100644 costa/native/external/lapack/dsterf.f
 create mode 100644 costa/native/external/lapack/dstev.f
 create mode 100644 costa/native/external/lapack/dstevd.f
 create mode 100644 costa/native/external/lapack/dstevr.f
 create mode 100644 costa/native/external/lapack/dstevx.f
 create mode 100644 costa/native/external/lapack/dsycon.f
 create mode 100644 costa/native/external/lapack/dsyev.f
 create mode 100644 costa/native/external/lapack/dsyevd.f
 create mode 100644 costa/native/external/lapack/dsyevr.f
 create mode 100644 costa/native/external/lapack/dsyevx.f
 create mode 100644 costa/native/external/lapack/dsygs2.f
 create mode 100644 costa/native/external/lapack/dsygst.f
 create mode 100644 costa/native/external/lapack/dsygv.f
 create mode 100644 costa/native/external/lapack/dsygvd.f
 create mode 100644 costa/native/external/lapack/dsygvx.f
 create mode 100644 costa/native/external/lapack/dsyrfs.f
 create mode 100644 costa/native/external/lapack/dsysv.f
 create mode 100644 costa/native/external/lapack/dsysvx.f
 create mode 100644 costa/native/external/lapack/dsytd2.f
 create mode 100644 costa/native/external/lapack/dsytf2.f
 create mode 100644 costa/native/external/lapack/dsytrd.f
 create mode 100644 costa/native/external/lapack/dsytrf.f
 create mode 100644 costa/native/external/lapack/dsytri.f
 create mode 100644 costa/native/external/lapack/dsytrs.f
 create mode 100644 costa/native/external/lapack/dtbcon.f
 create mode 100644 costa/native/external/lapack/dtbrfs.f
 create mode 100644 costa/native/external/lapack/dtbtrs.f
 create mode 100644 costa/native/external/lapack/dtgevc.f
 create mode 100644 costa/native/external/lapack/dtgex2.f
 create mode 100644 costa/native/external/lapack/dtgexc.f
 create mode 100644 costa/native/external/lapack/dtgsen.f
 create mode 100644 costa/native/external/lapack/dtgsja.f
 create mode 100644 costa/native/external/lapack/dtgsna.f
 create mode 100644 costa/native/external/lapack/dtgsy2.f
 create mode 100644 costa/native/external/lapack/dtgsyl.f
 create mode 100644 costa/native/external/lapack/dtpcon.f
 create mode 100644 costa/native/external/lapack/dtprfs.f
 create mode 100644 costa/native/external/lapack/dtptri.f
 create mode 100644 costa/native/external/lapack/dtptrs.f
 create mode 100644 costa/native/external/lapack/dtrcon.f
 create mode 100644 costa/native/external/lapack/dtrevc.f
 create mode 100644 costa/native/external/lapack/dtrexc.f
 create mode 100644 costa/native/external/lapack/dtrrfs.f
 create mode 100644 costa/native/external/lapack/dtrsen.f
 create mode 100644 costa/native/external/lapack/dtrsna.f
 create mode 100644 costa/native/external/lapack/dtrsyl.f
 create mode 100644 costa/native/external/lapack/dtrti2.f
 create mode 100644 costa/native/external/lapack/dtrtri.f
 create mode 100644 costa/native/external/lapack/dtrtrs.f
 create mode 100644 costa/native/external/lapack/dtzrqf.f
 create mode 100644 costa/native/external/lapack/dtzrzf.f
 create mode 100644 costa/native/external/lapack/dzsum1.f
 create mode 100644 costa/native/external/lapack/icmax1.f
 create mode 100644 costa/native/external/lapack/ieeeck.f
 create mode 100644 costa/native/external/lapack/ilaenv.f
 create mode 100644 costa/native/external/lapack/izmax1.f
 create mode 100644 costa/native/external/lapack/lsame.f
 create mode 100644 costa/native/external/lapack/lsamen.f
 create mode 100644 costa/native/external/lapack/sbdsdc.f
 create mode 100644 costa/native/external/lapack/sbdsqr.f
 create mode 100644 costa/native/external/lapack/scsum1.f
 create mode 100644 costa/native/external/lapack/sdisna.f
 create mode 100644 costa/native/external/lapack/second.f
 create mode 100644 costa/native/external/lapack/sgbbrd.f
 create mode 100644 costa/native/external/lapack/sgbcon.f
 create mode 100644 costa/native/external/lapack/sgbequ.f
 create mode 100644 costa/native/external/lapack/sgbrfs.f
 create mode 100644 costa/native/external/lapack/sgbsv.f
 create mode 100644 costa/native/external/lapack/sgbsvx.f
 create mode 100644 costa/native/external/lapack/sgbtf2.f
 create mode 100644 costa/native/external/lapack/sgbtrf.f
 create mode 100644 costa/native/external/lapack/sgbtrs.f
 create mode 100644 costa/native/external/lapack/sgebak.f
 create mode 100644 costa/native/external/lapack/sgebal.f
 create mode 100644 costa/native/external/lapack/sgebd2.f
 create mode 100644 costa/native/external/lapack/sgebrd.f
 create mode 100644 costa/native/external/lapack/sgecon.f
 create mode 100644 costa/native/external/lapack/sgeequ.f
 create mode 100644 costa/native/external/lapack/sgees.f
 create mode 100644 costa/native/external/lapack/sgeesx.f
 create mode 100644 costa/native/external/lapack/sgeev.f
 create mode 100644 costa/native/external/lapack/sgeevx.f
 create mode 100644 costa/native/external/lapack/sgegs.f
 create mode 100644 costa/native/external/lapack/sgegv.f
 create mode 100644 costa/native/external/lapack/sgehd2.f
 create mode 100644 costa/native/external/lapack/sgehrd.f
 create mode 100644 costa/native/external/lapack/sgelq2.f
 create mode 100644 costa/native/external/lapack/sgelqf.f
 create mode 100644 costa/native/external/lapack/sgels.f
 create mode 100644 costa/native/external/lapack/sgelsd.f
 create mode 100644 costa/native/external/lapack/sgelss.f
 create mode 100644 costa/native/external/lapack/sgelsx.f
 create mode 100644 costa/native/external/lapack/sgelsy.f
 create mode 100644 costa/native/external/lapack/sgeql2.f
 create mode 100644 costa/native/external/lapack/sgeqlf.f
 create mode 100644 costa/native/external/lapack/sgeqp3.f
 create mode 100644 costa/native/external/lapack/sgeqpf.f
 create mode 100644 costa/native/external/lapack/sgeqr2.f
 create mode 100644 costa/native/external/lapack/sgeqrf.f
 create mode 100644 costa/native/external/lapack/sgerfs.f
 create mode 100644 costa/native/external/lapack/sgerq2.f
 create mode 100644 costa/native/external/lapack/sgerqf.f
 create mode 100644 costa/native/external/lapack/sgesc2.f
 create mode 100644 costa/native/external/lapack/sgesdd.f
 create mode 100644 costa/native/external/lapack/sgesv.f
 create mode 100644 costa/native/external/lapack/sgesvd.f
 create mode 100644 costa/native/external/lapack/sgesvx.f
 create mode 100644 costa/native/external/lapack/sgetc2.f
 create mode 100644 costa/native/external/lapack/sgetf2.f
 create mode 100644 costa/native/external/lapack/sgetrf.f
 create mode 100644 costa/native/external/lapack/sgetri.f
 create mode 100644 costa/native/external/lapack/sgetrs.f
 create mode 100644 costa/native/external/lapack/sggbak.f
 create mode 100644 costa/native/external/lapack/sggbal.f
 create mode 100644 costa/native/external/lapack/sgges.f
 create mode 100644 costa/native/external/lapack/sggesx.f
 create mode 100644 costa/native/external/lapack/sggev.f
 create mode 100644 costa/native/external/lapack/sggevx.f
 create mode 100644 costa/native/external/lapack/sggglm.f
 create mode 100644 costa/native/external/lapack/sgghrd.f
 create mode 100644 costa/native/external/lapack/sgglse.f
 create mode 100644 costa/native/external/lapack/sggqrf.f
 create mode 100644 costa/native/external/lapack/sggrqf.f
 create mode 100644 costa/native/external/lapack/sggsvd.f
 create mode 100644 costa/native/external/lapack/sggsvp.f
 create mode 100644 costa/native/external/lapack/sgtcon.f
 create mode 100644 costa/native/external/lapack/sgtrfs.f
 create mode 100644 costa/native/external/lapack/sgtsv.f
 create mode 100644 costa/native/external/lapack/sgtsvx.f
 create mode 100644 costa/native/external/lapack/sgttrf.f
 create mode 100644 costa/native/external/lapack/sgttrs.f
 create mode 100644 costa/native/external/lapack/sgtts2.f
 create mode 100644 costa/native/external/lapack/shgeqz.f
 create mode 100644 costa/native/external/lapack/shsein.f
 create mode 100644 costa/native/external/lapack/shseqr.f
 create mode 100644 costa/native/external/lapack/slabad.f
 create mode 100644 costa/native/external/lapack/slabrd.f
 create mode 100644 costa/native/external/lapack/slacon.f
 create mode 100644 costa/native/external/lapack/slacpy.f
 create mode 100644 costa/native/external/lapack/sladiv.f
 create mode 100644 costa/native/external/lapack/slae2.f
 create mode 100644 costa/native/external/lapack/slaebz.f
 create mode 100644 costa/native/external/lapack/slaed0.f
 create mode 100644 costa/native/external/lapack/slaed1.f
 create mode 100644 costa/native/external/lapack/slaed2.f
 create mode 100644 costa/native/external/lapack/slaed3.f
 create mode 100644 costa/native/external/lapack/slaed4.f
 create mode 100644 costa/native/external/lapack/slaed5.f
 create mode 100644 costa/native/external/lapack/slaed6.f
 create mode 100644 costa/native/external/lapack/slaed7.f
 create mode 100644 costa/native/external/lapack/slaed8.f
 create mode 100644 costa/native/external/lapack/slaed9.f
 create mode 100644 costa/native/external/lapack/slaeda.f
 create mode 100644 costa/native/external/lapack/slaein.f
 create mode 100644 costa/native/external/lapack/slaev2.f
 create mode 100644 costa/native/external/lapack/slaexc.f
 create mode 100644 costa/native/external/lapack/slag2.f
 create mode 100644 costa/native/external/lapack/slags2.f
 create mode 100644 costa/native/external/lapack/slagtf.f
 create mode 100644 costa/native/external/lapack/slagtm.f
 create mode 100644 costa/native/external/lapack/slagts.f
 create mode 100644 costa/native/external/lapack/slagv2.f
 create mode 100644 costa/native/external/lapack/slahqr.f
 create mode 100644 costa/native/external/lapack/slahrd.f
 create mode 100644 costa/native/external/lapack/slaic1.f
 create mode 100644 costa/native/external/lapack/slaln2.f
 create mode 100644 costa/native/external/lapack/slals0.f
 create mode 100644 costa/native/external/lapack/slalsa.f
 create mode 100644 costa/native/external/lapack/slalsd.f
 create mode 100644 costa/native/external/lapack/slamch.f
 create mode 100644 costa/native/external/lapack/slamrg.f
 create mode 100644 costa/native/external/lapack/slangb.f
 create mode 100644 costa/native/external/lapack/slange.f
 create mode 100644 costa/native/external/lapack/slangt.f
 create mode 100644 costa/native/external/lapack/slanhs.f
 create mode 100644 costa/native/external/lapack/slansb.f
 create mode 100644 costa/native/external/lapack/slansp.f
 create mode 100644 costa/native/external/lapack/slanst.f
 create mode 100644 costa/native/external/lapack/slansy.f
 create mode 100644 costa/native/external/lapack/slantb.f
 create mode 100644 costa/native/external/lapack/slantp.f
 create mode 100644 costa/native/external/lapack/slantr.f
 create mode 100644 costa/native/external/lapack/slanv2.f
 create mode 100644 costa/native/external/lapack/slapll.f
 create mode 100644 costa/native/external/lapack/slapmt.f
 create mode 100644 costa/native/external/lapack/slapy2.f
 create mode 100644 costa/native/external/lapack/slapy3.f
 create mode 100644 costa/native/external/lapack/slaqgb.f
 create mode 100644 costa/native/external/lapack/slaqge.f
 create mode 100644 costa/native/external/lapack/slaqp2.f
 create mode 100644 costa/native/external/lapack/slaqps.f
 create mode 100644 costa/native/external/lapack/slaqsb.f
 create mode 100644 costa/native/external/lapack/slaqsp.f
 create mode 100644 costa/native/external/lapack/slaqsy.f
 create mode 100644 costa/native/external/lapack/slaqtr.f
 create mode 100644 costa/native/external/lapack/slar1v.f
 create mode 100644 costa/native/external/lapack/slar2v.f
 create mode 100644 costa/native/external/lapack/slarf.f
 create mode 100644 costa/native/external/lapack/slarfb.f
 create mode 100644 costa/native/external/lapack/slarfg.f
 create mode 100644 costa/native/external/lapack/slarft.f
 create mode 100644 costa/native/external/lapack/slarfx.f
 create mode 100644 costa/native/external/lapack/slargv.f
 create mode 100644 costa/native/external/lapack/slarnv.f
 create mode 100644 costa/native/external/lapack/slarrb.f
 create mode 100644 costa/native/external/lapack/slarre.f
 create mode 100644 costa/native/external/lapack/slarrf.f
 create mode 100644 costa/native/external/lapack/slarrv.f
 create mode 100644 costa/native/external/lapack/slartg.f
 create mode 100644 costa/native/external/lapack/slartv.f
 create mode 100644 costa/native/external/lapack/slaruv.f
 create mode 100644 costa/native/external/lapack/slarz.f
 create mode 100644 costa/native/external/lapack/slarzb.f
 create mode 100644 costa/native/external/lapack/slarzt.f
 create mode 100644 costa/native/external/lapack/slas2.f
 create mode 100644 costa/native/external/lapack/slascl.f
 create mode 100644 costa/native/external/lapack/slasd0.f
 create mode 100644 costa/native/external/lapack/slasd1.f
 create mode 100644 costa/native/external/lapack/slasd2.f
 create mode 100644 costa/native/external/lapack/slasd3.f
 create mode 100644 costa/native/external/lapack/slasd4.f
 create mode 100644 costa/native/external/lapack/slasd5.f
 create mode 100644 costa/native/external/lapack/slasd6.f
 create mode 100644 costa/native/external/lapack/slasd7.f
 create mode 100644 costa/native/external/lapack/slasd8.f
 create mode 100644 costa/native/external/lapack/slasd9.f
 create mode 100644 costa/native/external/lapack/slasda.f
 create mode 100644 costa/native/external/lapack/slasdq.f
 create mode 100644 costa/native/external/lapack/slasdt.f
 create mode 100644 costa/native/external/lapack/slaset.f
 create mode 100644 costa/native/external/lapack/slasq1.f
 create mode 100644 costa/native/external/lapack/slasq2.f
 create mode 100644 costa/native/external/lapack/slasq3.f
 create mode 100644 costa/native/external/lapack/slasq4.f
 create mode 100644 costa/native/external/lapack/slasq5.f
 create mode 100644 costa/native/external/lapack/slasq6.f
 create mode 100644 costa/native/external/lapack/slasr.f
 create mode 100644 costa/native/external/lapack/slasrt.f
 create mode 100644 costa/native/external/lapack/slassq.f
 create mode 100644 costa/native/external/lapack/slasv2.f
 create mode 100644 costa/native/external/lapack/slaswp.f
 create mode 100644 costa/native/external/lapack/slasy2.f
 create mode 100644 costa/native/external/lapack/slasyf.f
 create mode 100644 costa/native/external/lapack/slatbs.f
 create mode 100644 costa/native/external/lapack/slatdf.f
 create mode 100644 costa/native/external/lapack/slatps.f
 create mode 100644 costa/native/external/lapack/slatrd.f
 create mode 100644 costa/native/external/lapack/slatrs.f
 create mode 100644 costa/native/external/lapack/slatrz.f
 create mode 100644 costa/native/external/lapack/slatzm.f
 create mode 100644 costa/native/external/lapack/slauu2.f
 create mode 100644 costa/native/external/lapack/slauum.f
 create mode 100644 costa/native/external/lapack/sopgtr.f
 create mode 100644 costa/native/external/lapack/sopmtr.f
 create mode 100644 costa/native/external/lapack/sorg2l.f
 create mode 100644 costa/native/external/lapack/sorg2r.f
 create mode 100644 costa/native/external/lapack/sorgbr.f
 create mode 100644 costa/native/external/lapack/sorghr.f
 create mode 100644 costa/native/external/lapack/sorgl2.f
 create mode 100644 costa/native/external/lapack/sorglq.f
 create mode 100644 costa/native/external/lapack/sorgql.f
 create mode 100644 costa/native/external/lapack/sorgqr.f
 create mode 100644 costa/native/external/lapack/sorgr2.f
 create mode 100644 costa/native/external/lapack/sorgrq.f
 create mode 100644 costa/native/external/lapack/sorgtr.f
 create mode 100644 costa/native/external/lapack/sorm2l.f
 create mode 100644 costa/native/external/lapack/sorm2r.f
 create mode 100644 costa/native/external/lapack/sormbr.f
 create mode 100644 costa/native/external/lapack/sormhr.f
 create mode 100644 costa/native/external/lapack/sorml2.f
 create mode 100644 costa/native/external/lapack/sormlq.f
 create mode 100644 costa/native/external/lapack/sormql.f
 create mode 100644 costa/native/external/lapack/sormqr.f
 create mode 100644 costa/native/external/lapack/sormr2.f
 create mode 100644 costa/native/external/lapack/sormr3.f
 create mode 100644 costa/native/external/lapack/sormrq.f
 create mode 100644 costa/native/external/lapack/sormrz.f
 create mode 100644 costa/native/external/lapack/sormtr.f
 create mode 100644 costa/native/external/lapack/spbcon.f
 create mode 100644 costa/native/external/lapack/spbequ.f
 create mode 100644 costa/native/external/lapack/spbrfs.f
 create mode 100644 costa/native/external/lapack/spbstf.f
 create mode 100644 costa/native/external/lapack/spbsv.f
 create mode 100644 costa/native/external/lapack/spbsvx.f
 create mode 100644 costa/native/external/lapack/spbtf2.f
 create mode 100644 costa/native/external/lapack/spbtrf.f
 create mode 100644 costa/native/external/lapack/spbtrs.f
 create mode 100644 costa/native/external/lapack/spocon.f
 create mode 100644 costa/native/external/lapack/spoequ.f
 create mode 100644 costa/native/external/lapack/sporfs.f
 create mode 100644 costa/native/external/lapack/sposv.f
 create mode 100644 costa/native/external/lapack/sposvx.f
 create mode 100644 costa/native/external/lapack/spotf2.f
 create mode 100644 costa/native/external/lapack/spotrf.f
 create mode 100644 costa/native/external/lapack/spotri.f
 create mode 100644 costa/native/external/lapack/spotrs.f
 create mode 100644 costa/native/external/lapack/sppcon.f
 create mode 100644 costa/native/external/lapack/sppequ.f
 create mode 100644 costa/native/external/lapack/spprfs.f
 create mode 100644 costa/native/external/lapack/sppsv.f
 create mode 100644 costa/native/external/lapack/sppsvx.f
 create mode 100644 costa/native/external/lapack/spptrf.f
 create mode 100644 costa/native/external/lapack/spptri.f
 create mode 100644 costa/native/external/lapack/spptrs.f
 create mode 100644 costa/native/external/lapack/sptcon.f
 create mode 100644 costa/native/external/lapack/spteqr.f
 create mode 100644 costa/native/external/lapack/sptrfs.f
 create mode 100644 costa/native/external/lapack/sptsv.f
 create mode 100644 costa/native/external/lapack/sptsvx.f
 create mode 100644 costa/native/external/lapack/spttrf.f
 create mode 100644 costa/native/external/lapack/spttrs.f
 create mode 100644 costa/native/external/lapack/sptts2.f
 create mode 100644 costa/native/external/lapack/srscl.f
 create mode 100644 costa/native/external/lapack/ssbev.f
 create mode 100644 costa/native/external/lapack/ssbevd.f
 create mode 100644 costa/native/external/lapack/ssbevx.f
 create mode 100644 costa/native/external/lapack/ssbgst.f
 create mode 100644 costa/native/external/lapack/ssbgv.f
 create mode 100644 costa/native/external/lapack/ssbgvd.f
 create mode 100644 costa/native/external/lapack/ssbgvx.f
 create mode 100644 costa/native/external/lapack/ssbtrd.f
 create mode 100644 costa/native/external/lapack/sspcon.f
 create mode 100644 costa/native/external/lapack/sspev.f
 create mode 100644 costa/native/external/lapack/sspevd.f
 create mode 100644 costa/native/external/lapack/sspevx.f
 create mode 100644 costa/native/external/lapack/sspgst.f
 create mode 100644 costa/native/external/lapack/sspgv.f
 create mode 100644 costa/native/external/lapack/sspgvd.f
 create mode 100644 costa/native/external/lapack/sspgvx.f
 create mode 100644 costa/native/external/lapack/ssprfs.f
 create mode 100644 costa/native/external/lapack/sspsv.f
 create mode 100644 costa/native/external/lapack/sspsvx.f
 create mode 100644 costa/native/external/lapack/ssptrd.f
 create mode 100644 costa/native/external/lapack/ssptrf.f
 create mode 100644 costa/native/external/lapack/ssptri.f
 create mode 100644 costa/native/external/lapack/ssptrs.f
 create mode 100644 costa/native/external/lapack/sstebz.f
 create mode 100644 costa/native/external/lapack/sstedc.f
 create mode 100644 costa/native/external/lapack/sstegr.f
 create mode 100644 costa/native/external/lapack/sstein.f
 create mode 100644 costa/native/external/lapack/ssteqr.f
 create mode 100644 costa/native/external/lapack/ssterf.f
 create mode 100644 costa/native/external/lapack/sstev.f
 create mode 100644 costa/native/external/lapack/sstevd.f
 create mode 100644 costa/native/external/lapack/sstevr.f
 create mode 100644 costa/native/external/lapack/sstevx.f
 create mode 100644 costa/native/external/lapack/ssycon.f
 create mode 100644 costa/native/external/lapack/ssyev.f
 create mode 100644 costa/native/external/lapack/ssyevd.f
 create mode 100644 costa/native/external/lapack/ssyevr.f
 create mode 100644 costa/native/external/lapack/ssyevx.f
 create mode 100644 costa/native/external/lapack/ssygs2.f
 create mode 100644 costa/native/external/lapack/ssygst.f
 create mode 100644 costa/native/external/lapack/ssygv.f
 create mode 100644 costa/native/external/lapack/ssygvd.f
 create mode 100644 costa/native/external/lapack/ssygvx.f
 create mode 100644 costa/native/external/lapack/ssyrfs.f
 create mode 100644 costa/native/external/lapack/ssysv.f
 create mode 100644 costa/native/external/lapack/ssysvx.f
 create mode 100644 costa/native/external/lapack/ssytd2.f
 create mode 100644 costa/native/external/lapack/ssytf2.f
 create mode 100644 costa/native/external/lapack/ssytrd.f
 create mode 100644 costa/native/external/lapack/ssytrf.f
 create mode 100644 costa/native/external/lapack/ssytri.f
 create mode 100644 costa/native/external/lapack/ssytrs.f
 create mode 100644 costa/native/external/lapack/stbcon.f
 create mode 100644 costa/native/external/lapack/stbrfs.f
 create mode 100644 costa/native/external/lapack/stbtrs.f
 create mode 100644 costa/native/external/lapack/stgevc.f
 create mode 100644 costa/native/external/lapack/stgex2.f
 create mode 100644 costa/native/external/lapack/stgexc.f
 create mode 100644 costa/native/external/lapack/stgsen.f
 create mode 100644 costa/native/external/lapack/stgsja.f
 create mode 100644 costa/native/external/lapack/stgsna.f
 create mode 100644 costa/native/external/lapack/stgsy2.f
 create mode 100644 costa/native/external/lapack/stgsyl.f
 create mode 100644 costa/native/external/lapack/stpcon.f
 create mode 100644 costa/native/external/lapack/stprfs.f
 create mode 100644 costa/native/external/lapack/stptri.f
 create mode 100644 costa/native/external/lapack/stptrs.f
 create mode 100644 costa/native/external/lapack/strcon.f
 create mode 100644 costa/native/external/lapack/strevc.f
 create mode 100644 costa/native/external/lapack/strexc.f
 create mode 100644 costa/native/external/lapack/strrfs.f
 create mode 100644 costa/native/external/lapack/strsen.f
 create mode 100644 costa/native/external/lapack/strsna.f
 create mode 100644 costa/native/external/lapack/strsyl.f
 create mode 100644 costa/native/external/lapack/strti2.f
 create mode 100644 costa/native/external/lapack/strtri.f
 create mode 100644 costa/native/external/lapack/strtrs.f
 create mode 100644 costa/native/external/lapack/stzrqf.f
 create mode 100644 costa/native/external/lapack/stzrzf.f
 create mode 100644 costa/native/external/lapack/xerbla.f
 create mode 100644 costa/native/external/lapack/zbdsqr.f
 create mode 100644 costa/native/external/lapack/zdrot.f
 create mode 100644 costa/native/external/lapack/zdrscl.f
 create mode 100644 costa/native/external/lapack/zgbbrd.f
 create mode 100644 costa/native/external/lapack/zgbcon.f
 create mode 100644 costa/native/external/lapack/zgbequ.f
 create mode 100644 costa/native/external/lapack/zgbrfs.f
 create mode 100644 costa/native/external/lapack/zgbsv.f
 create mode 100644 costa/native/external/lapack/zgbsvx.f
 create mode 100644 costa/native/external/lapack/zgbtf2.f
 create mode 100644 costa/native/external/lapack/zgbtrf.f
 create mode 100644 costa/native/external/lapack/zgbtrs.f
 create mode 100644 costa/native/external/lapack/zgebak.f
 create mode 100644 costa/native/external/lapack/zgebal.f
 create mode 100644 costa/native/external/lapack/zgebd2.f
 create mode 100644 costa/native/external/lapack/zgebrd.f
 create mode 100644 costa/native/external/lapack/zgecon.f
 create mode 100644 costa/native/external/lapack/zgeequ.f
 create mode 100644 costa/native/external/lapack/zgees.f
 create mode 100644 costa/native/external/lapack/zgeesx.f
 create mode 100644 costa/native/external/lapack/zgeev.f
 create mode 100644 costa/native/external/lapack/zgeevx.f
 create mode 100644 costa/native/external/lapack/zgegs.f
 create mode 100644 costa/native/external/lapack/zgegv.f
 create mode 100644 costa/native/external/lapack/zgehd2.f
 create mode 100644 costa/native/external/lapack/zgehrd.f
 create mode 100644 costa/native/external/lapack/zgelq2.f
 create mode 100644 costa/native/external/lapack/zgelqf.f
 create mode 100644 costa/native/external/lapack/zgels.f
 create mode 100644 costa/native/external/lapack/zgelsd.f
 create mode 100644 costa/native/external/lapack/zgelss.f
 create mode 100644 costa/native/external/lapack/zgelsx.f
 create mode 100644 costa/native/external/lapack/zgelsy.f
 create mode 100644 costa/native/external/lapack/zgeql2.f
 create mode 100644 costa/native/external/lapack/zgeqlf.f
 create mode 100644 costa/native/external/lapack/zgeqp3.f
 create mode 100644 costa/native/external/lapack/zgeqpf.f
 create mode 100644 costa/native/external/lapack/zgeqr2.f
 create mode 100644 costa/native/external/lapack/zgeqrf.f
 create mode 100644 costa/native/external/lapack/zgerfs.f
 create mode 100644 costa/native/external/lapack/zgerq2.f
 create mode 100644 costa/native/external/lapack/zgerqf.f
 create mode 100644 costa/native/external/lapack/zgesc2.f
 create mode 100644 costa/native/external/lapack/zgesdd.f
 create mode 100644 costa/native/external/lapack/zgesv.f
 create mode 100644 costa/native/external/lapack/zgesvd.f
 create mode 100644 costa/native/external/lapack/zgesvx.f
 create mode 100644 costa/native/external/lapack/zgetc2.f
 create mode 100644 costa/native/external/lapack/zgetf2.f
 create mode 100644 costa/native/external/lapack/zgetrf.f
 create mode 100644 costa/native/external/lapack/zgetri.f
 create mode 100644 costa/native/external/lapack/zgetrs.f
 create mode 100644 costa/native/external/lapack/zggbak.f
 create mode 100644 costa/native/external/lapack/zggbal.f
 create mode 100644 costa/native/external/lapack/zgges.f
 create mode 100644 costa/native/external/lapack/zggesx.f
 create mode 100644 costa/native/external/lapack/zggev.f
 create mode 100644 costa/native/external/lapack/zggevx.f
 create mode 100644 costa/native/external/lapack/zggglm.f
 create mode 100644 costa/native/external/lapack/zgghrd.f
 create mode 100644 costa/native/external/lapack/zgglse.f
 create mode 100644 costa/native/external/lapack/zggqrf.f
 create mode 100644 costa/native/external/lapack/zggrqf.f
 create mode 100644 costa/native/external/lapack/zggsvd.f
 create mode 100644 costa/native/external/lapack/zggsvp.f
 create mode 100644 costa/native/external/lapack/zgtcon.f
 create mode 100644 costa/native/external/lapack/zgtrfs.f
 create mode 100644 costa/native/external/lapack/zgtsv.f
 create mode 100644 costa/native/external/lapack/zgtsvx.f
 create mode 100644 costa/native/external/lapack/zgttrf.f
 create mode 100644 costa/native/external/lapack/zgttrs.f
 create mode 100644 costa/native/external/lapack/zgtts2.f
 create mode 100644 costa/native/external/lapack/zhbev.f
 create mode 100644 costa/native/external/lapack/zhbevd.f
 create mode 100644 costa/native/external/lapack/zhbevx.f
 create mode 100644 costa/native/external/lapack/zhbgst.f
 create mode 100644 costa/native/external/lapack/zhbgv.f
 create mode 100644 costa/native/external/lapack/zhbgvd.f
 create mode 100644 costa/native/external/lapack/zhbgvx.f
 create mode 100644 costa/native/external/lapack/zhbtrd.f
 create mode 100644 costa/native/external/lapack/zhecon.f
 create mode 100644 costa/native/external/lapack/zheev.f
 create mode 100644 costa/native/external/lapack/zheevd.f
 create mode 100644 costa/native/external/lapack/zheevr.f
 create mode 100644 costa/native/external/lapack/zheevx.f
 create mode 100644 costa/native/external/lapack/zhegs2.f
 create mode 100644 costa/native/external/lapack/zhegst.f
 create mode 100644 costa/native/external/lapack/zhegv.f
 create mode 100644 costa/native/external/lapack/zhegvd.f
 create mode 100644 costa/native/external/lapack/zhegvx.f
 create mode 100644 costa/native/external/lapack/zherfs.f
 create mode 100644 costa/native/external/lapack/zhesv.f
 create mode 100644 costa/native/external/lapack/zhesvx.f
 create mode 100644 costa/native/external/lapack/zhetd2.f
 create mode 100644 costa/native/external/lapack/zhetf2.f
 create mode 100644 costa/native/external/lapack/zhetrd.f
 create mode 100644 costa/native/external/lapack/zhetrf.f
 create mode 100644 costa/native/external/lapack/zhetri.f
 create mode 100644 costa/native/external/lapack/zhetrs.f
 create mode 100644 costa/native/external/lapack/zhgeqz.f
 create mode 100644 costa/native/external/lapack/zhpcon.f
 create mode 100644 costa/native/external/lapack/zhpev.f
 create mode 100644 costa/native/external/lapack/zhpevd.f
 create mode 100644 costa/native/external/lapack/zhpevx.f
 create mode 100644 costa/native/external/lapack/zhpgst.f
 create mode 100644 costa/native/external/lapack/zhpgv.f
 create mode 100644 costa/native/external/lapack/zhpgvd.f
 create mode 100644 costa/native/external/lapack/zhpgvx.f
 create mode 100644 costa/native/external/lapack/zhprfs.f
 create mode 100644 costa/native/external/lapack/zhpsv.f
 create mode 100644 costa/native/external/lapack/zhpsvx.f
 create mode 100644 costa/native/external/lapack/zhptrd.f
 create mode 100644 costa/native/external/lapack/zhptrf.f
 create mode 100644 costa/native/external/lapack/zhptri.f
 create mode 100644 costa/native/external/lapack/zhptrs.f
 create mode 100644 costa/native/external/lapack/zhsein.f
 create mode 100644 costa/native/external/lapack/zhseqr.f
 create mode 100644 costa/native/external/lapack/zlabrd.f
 create mode 100644 costa/native/external/lapack/zlacgv.f
 create mode 100644 costa/native/external/lapack/zlacon.f
 create mode 100644 costa/native/external/lapack/zlacp2.f
 create mode 100644 costa/native/external/lapack/zlacpy.f
 create mode 100644 costa/native/external/lapack/zlacrm.f
 create mode 100644 costa/native/external/lapack/zlacrt.f
 create mode 100644 costa/native/external/lapack/zladiv.f
 create mode 100644 costa/native/external/lapack/zlaed0.f
 create mode 100644 costa/native/external/lapack/zlaed7.f
 create mode 100644 costa/native/external/lapack/zlaed8.f
 create mode 100644 costa/native/external/lapack/zlaein.f
 create mode 100644 costa/native/external/lapack/zlaesy.f
 create mode 100644 costa/native/external/lapack/zlaev2.f
 create mode 100644 costa/native/external/lapack/zlags2.f
 create mode 100644 costa/native/external/lapack/zlagtm.f
 create mode 100644 costa/native/external/lapack/zlahef.f
 create mode 100644 costa/native/external/lapack/zlahqr.f
 create mode 100644 costa/native/external/lapack/zlahrd.f
 create mode 100644 costa/native/external/lapack/zlaic1.f
 create mode 100644 costa/native/external/lapack/zlals0.f
 create mode 100644 costa/native/external/lapack/zlalsa.f
 create mode 100644 costa/native/external/lapack/zlalsd.f
 create mode 100644 costa/native/external/lapack/zlangb.f
 create mode 100644 costa/native/external/lapack/zlange.f
 create mode 100644 costa/native/external/lapack/zlangt.f
 create mode 100644 costa/native/external/lapack/zlanhb.f
 create mode 100644 costa/native/external/lapack/zlanhe.f
 create mode 100644 costa/native/external/lapack/zlanhp.f
 create mode 100644 costa/native/external/lapack/zlanhs.f
 create mode 100644 costa/native/external/lapack/zlanht.f
 create mode 100644 costa/native/external/lapack/zlansb.f
 create mode 100644 costa/native/external/lapack/zlansp.f
 create mode 100644 costa/native/external/lapack/zlansy.f
 create mode 100644 costa/native/external/lapack/zlantb.f
 create mode 100644 costa/native/external/lapack/zlantp.f
 create mode 100644 costa/native/external/lapack/zlantr.f
 create mode 100644 costa/native/external/lapack/zlapll.f
 create mode 100644 costa/native/external/lapack/zlapmt.f
 create mode 100644 costa/native/external/lapack/zlaqgb.f
 create mode 100644 costa/native/external/lapack/zlaqge.f
 create mode 100644 costa/native/external/lapack/zlaqhb.f
 create mode 100644 costa/native/external/lapack/zlaqhe.f
 create mode 100644 costa/native/external/lapack/zlaqhp.f
 create mode 100644 costa/native/external/lapack/zlaqp2.f
 create mode 100644 costa/native/external/lapack/zlaqps.f
 create mode 100644 costa/native/external/lapack/zlaqsb.f
 create mode 100644 costa/native/external/lapack/zlaqsp.f
 create mode 100644 costa/native/external/lapack/zlaqsy.f
 create mode 100644 costa/native/external/lapack/zlar1v.f
 create mode 100644 costa/native/external/lapack/zlar2v.f
 create mode 100644 costa/native/external/lapack/zlarcm.f
 create mode 100644 costa/native/external/lapack/zlarf.f
 create mode 100644 costa/native/external/lapack/zlarfb.f
 create mode 100644 costa/native/external/lapack/zlarfg.f
 create mode 100644 costa/native/external/lapack/zlarft.f
 create mode 100644 costa/native/external/lapack/zlarfx.f
 create mode 100644 costa/native/external/lapack/zlargv.f
 create mode 100644 costa/native/external/lapack/zlarnv.f
 create mode 100644 costa/native/external/lapack/zlarrv.f
 create mode 100644 costa/native/external/lapack/zlartg.f
 create mode 100644 costa/native/external/lapack/zlartv.f
 create mode 100644 costa/native/external/lapack/zlarz.f
 create mode 100644 costa/native/external/lapack/zlarzb.f
 create mode 100644 costa/native/external/lapack/zlarzt.f
 create mode 100644 costa/native/external/lapack/zlascl.f
 create mode 100644 costa/native/external/lapack/zlaset.f
 create mode 100644 costa/native/external/lapack/zlasr.f
 create mode 100644 costa/native/external/lapack/zlassq.f
 create mode 100644 costa/native/external/lapack/zlaswp.f
 create mode 100644 costa/native/external/lapack/zlasyf.f
 create mode 100644 costa/native/external/lapack/zlatbs.f
 create mode 100644 costa/native/external/lapack/zlatdf.f
 create mode 100644 costa/native/external/lapack/zlatps.f
 create mode 100644 costa/native/external/lapack/zlatrd.f
 create mode 100644 costa/native/external/lapack/zlatrs.f
 create mode 100644 costa/native/external/lapack/zlatrz.f
 create mode 100644 costa/native/external/lapack/zlatzm.f
 create mode 100644 costa/native/external/lapack/zlauu2.f
 create mode 100644 costa/native/external/lapack/zlauum.f
 create mode 100644 costa/native/external/lapack/zpbcon.f
 create mode 100644 costa/native/external/lapack/zpbequ.f
 create mode 100644 costa/native/external/lapack/zpbrfs.f
 create mode 100644 costa/native/external/lapack/zpbstf.f
 create mode 100644 costa/native/external/lapack/zpbsv.f
 create mode 100644 costa/native/external/lapack/zpbsvx.f
 create mode 100644 costa/native/external/lapack/zpbtf2.f
 create mode 100644 costa/native/external/lapack/zpbtrf.f
 create mode 100644 costa/native/external/lapack/zpbtrs.f
 create mode 100644 costa/native/external/lapack/zpocon.f
 create mode 100644 costa/native/external/lapack/zpoequ.f
 create mode 100644 costa/native/external/lapack/zporfs.f
 create mode 100644 costa/native/external/lapack/zposv.f
 create mode 100644 costa/native/external/lapack/zposvx.f
 create mode 100644 costa/native/external/lapack/zpotf2.f
 create mode 100644 costa/native/external/lapack/zpotrf.f
 create mode 100644 costa/native/external/lapack/zpotri.f
 create mode 100644 costa/native/external/lapack/zpotrs.f
 create mode 100644 costa/native/external/lapack/zppcon.f
 create mode 100644 costa/native/external/lapack/zppequ.f
 create mode 100644 costa/native/external/lapack/zpprfs.f
 create mode 100644 costa/native/external/lapack/zppsv.f
 create mode 100644 costa/native/external/lapack/zppsvx.f
 create mode 100644 costa/native/external/lapack/zpptrf.f
 create mode 100644 costa/native/external/lapack/zpptri.f
 create mode 100644 costa/native/external/lapack/zpptrs.f
 create mode 100644 costa/native/external/lapack/zptcon.f
 create mode 100644 costa/native/external/lapack/zpteqr.f
 create mode 100644 costa/native/external/lapack/zptrfs.f
 create mode 100644 costa/native/external/lapack/zptsv.f
 create mode 100644 costa/native/external/lapack/zptsvx.f
 create mode 100644 costa/native/external/lapack/zpttrf.f
 create mode 100644 costa/native/external/lapack/zpttrs.f
 create mode 100644 costa/native/external/lapack/zptts2.f
 create mode 100644 costa/native/external/lapack/zrot.f
 create mode 100644 costa/native/external/lapack/zspcon.f
 create mode 100644 costa/native/external/lapack/zspmv.f
 create mode 100644 costa/native/external/lapack/zspr.f
 create mode 100644 costa/native/external/lapack/zsprfs.f
 create mode 100644 costa/native/external/lapack/zspsv.f
 create mode 100644 costa/native/external/lapack/zspsvx.f
 create mode 100644 costa/native/external/lapack/zsptrf.f
 create mode 100644 costa/native/external/lapack/zsptri.f
 create mode 100644 costa/native/external/lapack/zsptrs.f
 create mode 100644 costa/native/external/lapack/zstedc.f
 create mode 100644 costa/native/external/lapack/zstegr.f
 create mode 100644 costa/native/external/lapack/zstein.f
 create mode 100644 costa/native/external/lapack/zsteqr.f
 create mode 100644 costa/native/external/lapack/zsycon.f
 create mode 100644 costa/native/external/lapack/zsymv.f
 create mode 100644 costa/native/external/lapack/zsyr.f
 create mode 100644 costa/native/external/lapack/zsyrfs.f
 create mode 100644 costa/native/external/lapack/zsysv.f
 create mode 100644 costa/native/external/lapack/zsysvx.f
 create mode 100644 costa/native/external/lapack/zsytf2.f
 create mode 100644 costa/native/external/lapack/zsytrf.f
 create mode 100644 costa/native/external/lapack/zsytri.f
 create mode 100644 costa/native/external/lapack/zsytrs.f
 create mode 100644 costa/native/external/lapack/ztbcon.f
 create mode 100644 costa/native/external/lapack/ztbrfs.f
 create mode 100644 costa/native/external/lapack/ztbtrs.f
 create mode 100644 costa/native/external/lapack/ztgevc.f
 create mode 100644 costa/native/external/lapack/ztgex2.f
 create mode 100644 costa/native/external/lapack/ztgexc.f
 create mode 100644 costa/native/external/lapack/ztgsen.f
 create mode 100644 costa/native/external/lapack/ztgsja.f
 create mode 100644 costa/native/external/lapack/ztgsna.f
 create mode 100644 costa/native/external/lapack/ztgsy2.f
 create mode 100644 costa/native/external/lapack/ztgsyl.f
 create mode 100644 costa/native/external/lapack/ztpcon.f
 create mode 100644 costa/native/external/lapack/ztprfs.f
 create mode 100644 costa/native/external/lapack/ztptri.f
 create mode 100644 costa/native/external/lapack/ztptrs.f
 create mode 100644 costa/native/external/lapack/ztrcon.f
 create mode 100644 costa/native/external/lapack/ztrevc.f
 create mode 100644 costa/native/external/lapack/ztrexc.f
 create mode 100644 costa/native/external/lapack/ztrrfs.f
 create mode 100644 costa/native/external/lapack/ztrsen.f
 create mode 100644 costa/native/external/lapack/ztrsna.f
 create mode 100644 costa/native/external/lapack/ztrsyl.f
 create mode 100644 costa/native/external/lapack/ztrti2.f
 create mode 100644 costa/native/external/lapack/ztrtri.f
 create mode 100644 costa/native/external/lapack/ztrtrs.f
 create mode 100644 costa/native/external/lapack/ztzrqf.f
 create mode 100644 costa/native/external/lapack/ztzrzf.f
 create mode 100644 costa/native/external/lapack/zung2l.f
 create mode 100644 costa/native/external/lapack/zung2r.f
 create mode 100644 costa/native/external/lapack/zungbr.f
 create mode 100644 costa/native/external/lapack/zunghr.f
 create mode 100644 costa/native/external/lapack/zungl2.f
 create mode 100644 costa/native/external/lapack/zunglq.f
 create mode 100644 costa/native/external/lapack/zungql.f
 create mode 100644 costa/native/external/lapack/zungqr.f
 create mode 100644 costa/native/external/lapack/zungr2.f
 create mode 100644 costa/native/external/lapack/zungrq.f
 create mode 100644 costa/native/external/lapack/zungtr.f
 create mode 100644 costa/native/external/lapack/zunm2l.f
 create mode 100644 costa/native/external/lapack/zunm2r.f
 create mode 100644 costa/native/external/lapack/zunmbr.f
 create mode 100644 costa/native/external/lapack/zunmhr.f
 create mode 100644 costa/native/external/lapack/zunml2.f
 create mode 100644 costa/native/external/lapack/zunmlq.f
 create mode 100644 costa/native/external/lapack/zunmql.f
 create mode 100644 costa/native/external/lapack/zunmqr.f
 create mode 100644 costa/native/external/lapack/zunmr2.f
 create mode 100644 costa/native/external/lapack/zunmr3.f
 create mode 100644 costa/native/external/lapack/zunmrq.f
 create mode 100644 costa/native/external/lapack/zunmrz.f
 create mode 100644 costa/native/external/lapack/zunmtr.f
 create mode 100644 costa/native/external/lapack/zupgtr.f
 create mode 100644 costa/native/external/lapack/zupmtr.f

diff --git a/costa/native/cta/cta_util_timing.f90 b/costa/native/cta/cta_util_timing.f90
new file mode 100644
index 000000000..8d8727cee
--- /dev/null
+++ b/costa/native/cta/cta_util_timing.f90
@@ -0,0 +1,89 @@
+!
+! $URL: https://repos.deltares.nl/repos/openda/public/trunk/core/native/src/cta/cta_util_timing.f90 $
+! $Revision: 680 $, $Date: 2008-10-10 17:01:17 +0200 (Fri, 10 Oct 2008) $
+! 
+!COSTA: Problem solving environment for data assimilation
+!Copyright (C) 2008  Nils van Velzen
+!
+!This library is free software; you can redistribute it and/or
+!modify it under the terms of the GNU Lesser General Public
+!License as published by the Free Software Foundation; either
+!version 2.1 of the License, or (at your option) any later version.
+!
+!This library is distributed in the hope that it will be useful,
+!but WITHOUT ANY WARRANTY; without even the implied warranty of
+!MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+!Lesser General Public License for more details.
+!
+!You should have received a copy of the GNU Lesser General Public
+!License along with this library; if not, write to the Free Software
+!Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+
+!DEC$ ATTRIBUTES DLLEXPORT::cta_util_timing
+subroutine cta_util_timing(dtimes,itime,start)
+implicit none
+real(kind=8), dimension(2,3,*), intent(inout) ::dtimes
+integer,                        intent(in)    ::itime
+integer,                        intent(in)    ::start
+
+real(kind=8) ::wtime,ctime
+
+call cta_util_timing_walltime(wtime)
+call cpu_time(ctime)
+if (start>0) then
+   dtimes(1,1,itime)=wtime
+   dtimes(2,1,itime)=ctime
+else
+   dtimes(1,2,itime)=wtime
+   dtimes(2,2,itime)=ctime
+   dtimes(1,3,itime)=dtimes(1,3,itime)+(dtimes(1,2,itime)-dtimes(1,1,itime))
+   dtimes(2,3,itime)=dtimes(2,3,itime)+(dtimes(2,2,itime)-dtimes(2,1,itime))
+endif
+
+
+end subroutine cta_util_timing
+
+
+subroutine cta_util_timing_cputime(dtime)
+implicit none
+real(kind=8), intent(out)  ::dtime
+
+call cpu_time(dtime)
+
+end subroutine cta_util_timing_cputime
+
+
+
+
+subroutine cta_util_timing_walltime(dtime)
+implicit none
+real(kind=8), intent(out)  ::dtime
+
+integer, dimension(8) ::times
+integer ::year,month,day,hour,minutes,seconds,milisec,julnum
+integer ::imon1
+
+
+call date_and_time(values=times)
+year   =times(1)
+month  =times(2)
+day    =times(3)
+hour   =times(5)
+minutes=times(6)
+seconds=times(7)
+milisec=times(8)
+
+!calculate julian day number
+imon1 = (month-14)/12;
+julnum = day - 32075 + 1461 * ( year + 4800 + imon1 ) / 4 &
+         + 367 * ( month - 2 - imon1 * 12 ) / 12         &
+         - 3 * ( ( year + 4900 + imon1 ) / 100 ) / 4;
+
+dtime=(((dble(julnum)*24.0d0+dble(hour))*60.0d0+dble(minutes))*60.0d0)+dble(seconds)+dble(milisec)*1.0d-3
+
+
+end subroutine cta_util_timing_walltime
+
+
+
diff --git a/costa/native/cta/include/cta.h b/costa/native/cta/include/cta.h
new file mode 100644
index 000000000..bde7d2814
--- /dev/null
+++ b/costa/native/cta/include/cta.h
@@ -0,0 +1,57 @@
+/*
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+/**
+   \mainpage
+TODO: fill the main page of the doxygen documentation
+
+**/
+
+#ifndef CTA_H
+#define CTA_H
+#include "cta_initialise.h"
+#include "cta_datatypes.h" 
+#include "cta_interface.h"
+#include "cta_functions.h"
+#include "cta_file.h"
+#include "cta_string.h"
+#include "cta_vector.h"
+#include "cta_vector_blas.h"
+#include "cta_tree.h"
+#include "cta_matrix.h"
+#include "cta_matrix_blas.h"
+#include "cta_sobs.h"
+//#include "cta_sobs_sqlite3.h"
+#include "cta_treevector.h"
+#include "cta_model.h"
+#include "cta_model_factory.h"
+#include "cta_errors.h"
+#include "cta_xml.h"
+#include "cta_defaults.h"
+#include "cta_modbuild_sp.h"
+#include "cta_modbuild_par.h"
+#include "cta_pack.h"
+#include "cta_metainfo.h"
+#include "cta_modelcombiner.h"
+#include "cta_util_sort.h"
+#include "cta_util_statistics.h"
+#include "cta_util_methods.h"
+#include "cta_message.h"
+
+#endif
diff --git a/costa/native/cta/include/cta_array.h b/costa/native/cta/include/cta_array.h
new file mode 100644
index 000000000..5d6bac3b0
--- /dev/null
+++ b/costa/native/cta/include/cta_array.h
@@ -0,0 +1,317 @@
+/*
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2012  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+/**
+\file  cta_array.h
+\brief Interface description of array component.
+
+   An array in OpenDA is an N-dimensional Matrix of values.
+   In a similar wat as in Matlab and Octave a vector is a special case of a Matrix/Array.
+
+*/
+
+#ifndef CTA_ARRAY_H
+#define CTA_ARRAY_H
+#include "cta_system.h"
+#include "cta_handles.h"
+#include "cta_datatypes.h"
+/* Function Handle */
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef CTA_Handle CTA_Array;
+
+/** \brief Free Array instance
+ *
+ * \param h IO handle of Array instance CTA_NULL on return
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Array_Free(CTA_Array *h);
+
+/** \brief Create a new Array instance and fill with given values
+ *
+ * \param values      I values of new Array
+ * \param nDimensions I Number of dimensions of new array
+ * \param dimensions  I Number of elements in each dimension
+ * \param h           O Array handle of new instance
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Array_CreateAsDoubles(double *values, int nDimensions, int *dimensions, CTA_Array *h);
+
+/** \brief Get number of dimensions, eg 1 for a vector and 2 for a matrix
+ * \param h           I Array handle
+ *  \return rank of the array
+ */
+CTAEXPORT int CTA_Array_getNumberOfDimensions(CTA_Array h);
+
+/** \brief Get the sizes of the individual dimensions
+ * \param h           I Array handle
+ * \param nDimensions O  size of array, eg 2 for the array [[3,4,10],[3,4,10]]
+ *
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Array_getnDimensions(CTA_Array h, int *nDimensions);
+
+/** \brief Get the size of the array for each dimension. 
+ * The length of the return value equals the number of dimensions.
+ *
+ * \param h          I Array handle
+ * \param dimensions O size of array, eg [3,4,10]
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Array_getDimensions(CTA_Array h, int *dimensions);
+
+/** \brief Total number of elements in an array. 
+ * This is equal to the product of the values returned by getDimension and is thus just for c
+ *
+ * \param h      I Array handle
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Array_length(CTA_Array h);
+
+/** \brief Get all values of an array.
+ * The values are guaranteed be a copy if
+ * copyValues==true or may be a reference if copyValues==false.
+ * Note that the values are never guaranteed to be a reference and can not be used
+ * to change the array. 
+ *
+ * \param h      I Array handle
+ * \param values O Values from the array
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Array_getValuesAsDoubles(CTA_Array h, double *values);
+
+/** \brief Get a part of the values from the array. 
+ * Eg let a=[[1,2,3][4,5,6]] then getValuesAsDoubles(1,2)
+ * returns [2,3]
+ * Note that negative values are allowed to denote counting from the end, is -1 denotes the last value.
+ * \param h          I Array handle
+ * \param firstIndex I firstIndex of selection
+ * \param lastIndex  I lastIndex of selection
+ * \param values     O Values from the array
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Array_getValuesAsDoubles_indexrange(CTA_Array h, int firstIndex, int lastIndex, double *values);
+
+/** \brief Get a value from an array for specific indices. 
+ * Eg let a=[[1,2,3][4,5,6]] then getValueAsDouble(5)
+ * returns 6
+ * Note that negative values are allowed to denote counting from the end, is -1 denotes the last value.
+ * \param h       I Array handle
+ * \param index   I index specifier
+ * \param value   O double value at the specific indices
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Array_getValueAsDoubles_index(CTA_Array h, int index, double *value);
+
+/** \brief Get a value from an array for specific indices. 
+ * Eg let a=[[1,2,3][4,5,6]] then getValueAsDouble([1,2])
+ * returns 6
+ * Note that negative values are allowed to denote counting from the end, is -1 denotes the last value.
+ * \param h        I Array handle
+ * \param nIndices I length of array of indices indices
+ * \param indices  I indices of value
+ * \param value    O value from array
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Array_getValueAsDouble_indices(CTA_Array h, int nIndices, int* indices, double *value);
+
+/** \brief Set the values of this array.
+ * \param h      I Array handle
+ * \param values I the values as an array of doubles
+ * \param length I length of array values
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Array_setValuesAsDoubles(CTA_Array h, double *values, int length);
+
+/** \brief Set a value from an array for specific indices. 
+ * Eg let a=[[1,2,3][4,5,6]] then setValueAsDouble([1,2],60.)
+ * results in a=[[1,2,3][4,5,60.]]
+ * Note that negative values are allowed to denote counting from the end, is -1 denotes the last value.
+ * \param h         I Array handle
+ * \param nIndices  I length of array indices
+ * \param indices   I index specifier
+ * \param value     I values at the specific indices
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Array_setValueAsDouble_indices(CTA_Array h, int nIndices, int *indices, double value);
+
+/** \brief Set part of the values of this array.
+ * Let a=[[1,2,3][4,5,6]] then setValuesAsDoubles(1,2,[20,30]) will result in a=[[1,20,30][4,5,6]]
+ * Note that negative values are allowed to denote counting from the end, is -1 denotes the last value.
+ * \param h          I Array handle
+ * \param firstIndex I specifies the start of the selection
+ * \param lastIndex  I specifies the end of the selection
+ * \param values     I that will replace the selected range of numbers
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Array_setValuesAsDoubles_indexrange(CTA_Array h, int firstIndex, int lastIndex, double *values);
+
+/** \brief Get a value from an array for specific indices. 
+ * Eg let a=[[1,2,3][4,5,6]] then setValueAsDouble(5,60.)
+ * results in a=[[1,2,3][4,5,60.]]
+ * Note that negative values are allowed to denote counting from the end, is -1 denotes the last value.
+ * \param h          I Array handle
+ * \param index      I index specifier
+ * \param value      I value that will replace the selected number
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Array_setValueAsDouble_index(CTA_Array h, int index, double value);
+
+/** \brief Set whole vector equal to a constant value.
+ * Note:  This method can only be used if all elements of the vector
+ * have the same data type.
+ * \param h     I Array handle
+ * \param value I value to set
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Array_setConstant(CTA_Array h, double value);
+
+/** \brief Perform a values += alpha * axpyValues</c> operation on each value in this array.
+ * \param h          I Array handle
+ * \param alpha      I The <c>alpha</c> in <c>state variable += alpha * vector</c>.
+ * \param axpyValues I the values for the axpy-operation on all values in this array.
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Array_axpyOnValues(CTA_Array h, double alpha, double *axpyValues);
+
+/** \brief Multiply each value in this array with the corresponding multiplication factor.
+ * \param h                     I Array handle
+ * \param multiplicationFactors I the multiplication factors for all array values.
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Array_multiplyValues(CTA_Array h, double *multiplicationFactors);
+
+/** \brief Change the dimensions of an array. The new array should have the same length
+ * \param h           I Array handle
+ * \param nDimensions I length of array dimensions
+ * \param dimensions  I new dimensions of array
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Array_reshape(CTA_Array h, int nDimensions, int *dimensions);
+
+
+/** \brief Get part of the array by selection of a subset in one dimension.
+ * Eg. a=[[1,2,3],[4,5,6]] a.getSlice(0,0) returns [1,2,3] 
+ * Note that the number of dimensions IS reduced by one.
+ * \param h           I Array handle
+ * \param dimension   I Dimension to make selection in
+ * \param index       I Element in dimension that is selected
+ * \param h_out       O new Array with selected selection
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Array_getSlice(CTA_Array h, int dimension, int index, CTA_Array *h_out);
+
+/** \brief Get part of the array by selection of a subset in one dimension.
+ * Eg. a=[[1,2,3],[4,5,6],[7,8,9]] a.getSlice(0,0,1) returns [[1,2],[3,4]] 
+ * Note that the number of dimensions is NOT reduced by one.
+ * \param h           I Array handle
+ * \param dimension   I Dimension to make selection in
+ * \param minIndex    I start of range to select
+ * \param maxIndex    I end of range to select
+ * \param h_out       O new Array with selected selection
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Array_getSlice_range(CTA_Array h, int dimension, int minIndex, int maxIndex, CTA_Array *h_out);
+
+/** \brief Get part of the array by selection of a subset in one dimension.
+ * Eg. a=[[1,2,3],[4,5,6],[7,8,9]] a.getSlice(0,0,1) returns [1,2,3,4] 
+ * \param h           I Array handle
+ * \param dimension   I Dimension to make selection in
+ * \param minIndex    I start of range to select
+ * \param maxIndex    I end of range to select
+ * \param values      O selected values
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Array_getSliceAsDoubles_range(CTA_Array h, int dimension, int minIndex, int maxIndex, double *values);
+
+
+/** \brief Set the values of a part of an array. 
+ * Eg. a=[[1,2,3],[4,5,6],[7,8,9]] and a.setSlice([11,12,13],1,1) 
+ * sets the second column a=[[1,11,3],[4,12,6],[7,13,9]]
+ * Note that the dimension of the slice is one smaller than for the array.
+ * \param h         I Array handle
+ * \param slice     I Values to set
+ * \param dimension I Dimension that is replaced
+ * \param index     I Index of selected dimension to replace
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Array_setSliceAsDoubles(CTA_Array h, double *slice, int dimension, int index);
+
+
+/** \brief Set the values of a part of an array. 
+ * Eg. a=[[1,2,3],[4,5,6],[7,8,9]] and a.setSlice([11,12,13],1,1) 
+ * sets the second column a=[[1,11,3],[4,12,6],[7,13,9]]
+ * Note that the dimension of the slice is one smaller than for the array.
+ * \param h         I Array handle
+ * \param slice_h   I Values to set
+ * \param dimension I Dimension that is replaced
+ * \param index     I Index of selected dimension to replace
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Array_setSliceAsArray(CTA_Array h, CTA_Array slice_h, int dimension, int index);
+
+/** \brief Set the values of a part of an array. 
+ * Eg. a=[[1,2,3],[4,5,6],[7,8,9]] and a.setSlice([[11,12,13],[14,15,16]],1,1,2) 
+ * sets the second and third columns a=[[1,11,14],[4,12,15],[7,13,16]]
+ * Note that the dimension of the slice is the same as for the array.
+ * \param h         I Array handle
+ * \param slice_h   I Values to set
+ * \param dimension I Dimension that is replaced
+ * \param minIndex  I Start of range in Dimension to be replaced
+ * \param maxIndex  I End of range in Dimension to be replaced
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Array_setSliceAsArray_range(CTA_Array h, CTA_Array slice_h, int dimension, int minIndex, int maxIndex);
+
+/** \brief Set the values of a part of an array. 
+ * Eg. a=[[1,2,3],[4,5,6],[7,8,9]] and a.setSlice([11,12,13,14,15,16],1,1,2) 
+ * sets the second and third columns a=[[1,11,14],[4,12,15],[7,13,16]]
+ * Note that the dimension of the slice is the same as for the array.
+ * \param h         I Array handle
+ * \param slice     I Values to set
+ * \param dimension I Dimension that is replaced
+ * \param minIndex  I Start of range in Dimension to be replaced
+ * \param maxIndex  I End of range in Dimension to be replaced
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Array_setSliceAsDoubles_range(CTA_Array h, double *slice, int dimension, int minIndex, int maxIndex);
+
+/** \brief Convert indices in multiple dimensions to position in the one-dimensional array as
+ * returned eg by getValuesAsDoubles
+ * Eg. a=[[1,2,3],[4,5,6],[7,8,9]] then valueIndex([1,0]) returns 3 which points to the value 4 here.  
+ * \param h        I Array handle
+ * \param nIndices I length of indices
+ * \param indices  I indices of element
+ * \param index    O position of element
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Array_valueIndex(CTA_Array h, int nIndices, int *indices, int *index);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+
+
+
+
+#endif
diff --git a/costa/native/cta/include/cta_bb_modbuild.h b/costa/native/cta/include/cta_bb_modbuild.h
new file mode 100644
index 000000000..c0aa446ae
--- /dev/null
+++ b/costa/native/cta/include/cta_bb_modbuild.h
@@ -0,0 +1,153 @@
+/*
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+/**
+\file  cta_modbuild_b3.h
+\brief Description of the COSTA blackbox component
+*/
+
+#ifndef CTA_MODBUILD_BB_H
+#define CTA_MODBUILD_BB_H
+
+#include <string.h>
+#ifdef WIN32
+#include <direct.h>
+#endif
+#include "cta.h"
+#include "cta_system.h"
+#include "f_cta_utils.h"
+#include "cta_datatypes.h"
+#include "cta_model_utilities.h" 
+#include "cta_handles.h"
+#include "cta_datetime.h"
+
+#define STRING_MAX		1024                 /* maximum length of string */
+
+#define BB_ENCODING  ("utf-8")              /* XML encodiing */
+
+
+typedef struct BB_Variable BB_Variable;
+typedef BB_Variable * BB_VariablePntr;
+
+struct BB_Variable {
+   char    *name;                            /* variable name */
+   double   value;                           /* variable value */
+};
+
+typedef struct BB_Station BB_Station;
+typedef BB_Station * BB_StationPntr;
+
+struct BB_Station {
+   char             *name;                   /* station name */
+   char             *location;               /* station location */
+   char             *filename;               /* station filename */
+   int               nvariables;             /* station number of variables */
+   BB_VariablePntr *variables;              /* station variables */
+};
+
+typedef struct BB_Forcing BB_Forcing;
+typedef BB_Forcing * BB_ForcingPntr;
+
+struct BB_Forcing {
+   char    *name;                            /* forcing name */
+   char    *file;                            /* forcing file */
+   char    *element;                         /* forcing element */
+   char    *property;                        /* forcing property */
+   char    *item;                            /* forcing item */
+   double   value;                           /* forcing value */
+};
+
+typedef struct BB_Parameter BB_Parameter;
+typedef BB_Parameter * BB_ParameterPntr;
+
+struct BB_Parameter {
+   char    *name;                            /* parameter name */
+   char    *file;                            /* parameter file */
+   char    *element;                         /* parameter element */
+   char    *property;                        /* parameter property */
+   char    *item;                            /* parameter item */
+   double   value;                           /* parameter value */
+};
+
+typedef struct BB_StateExchange BB_StateExchange;
+typedef BB_StateExchange * BB_StateExchangePntr;
+
+struct BB_StateExchange {
+   char    *executable;                      /* name of blackbox executable */
+   char    *state2model;                     /* name of input file */
+   char    *model2state;                     /* name of output file */
+   char    *outputSteps;                     /* output steps 'last' or 'all' */
+   char    *state2model_file;                /* filename to write state */
+   char    *model2state_file;                /* filename to read state */
+   char    *initial_state_file;              /* filename containing initial state */
+   CTA_Func state2model_func;                /* Handle to user function for writing state 2 model */
+   CTA_Func model2state_func;                /* Handle to user function for writing model 2 state */
+   CTA_Func initial_state_func;                /* Handle to user function for reading initial state */
+  int      nofquantities;                    /* number of quantities == number of substates */
+  int     quantsize;                          /* length of quantityvector */
+  int nofdimensions;                         /* number of co-ordinate dimensions */
+  CTA_String     *hquantid;                 /* id of each quantity */ 
+  CTA_String     *hdimid;                 /* id of each dimension */ 
+  int *dimlength;                        /* length of each dimension */
+};
+
+typedef struct BB_Model BB_Model;
+typedef BB_Model * BB_ModelPntr;
+
+struct BB_Model{
+   char                   *type;             /* model type */
+   char                   *description;      /* model description */
+   double                  timestep;         /* simulation timestep in seconds */
+   int                     simulationNumber; /* simulation number */
+   char                   *workingdir;       /* working map of simulation */
+   char                   *templatedir;      /* map with simulation template */
+   char                   *simulationMap;    /* map with simulation results */
+   int                     stateLength;      /* length of the state vector */
+   int                     nstations;        /* number of stations */
+   BB_StateExchangePntr   stateexchange;    /* pointer to the state exchange variables */
+   BB_StationPntr        *stations;         /* pointer to the list of stations */
+   int                     nparameters;      /* number of parameters */
+   BB_ParameterPntr      *parameters;       /* pointer to the list of parameters */
+   int                     nforcings;        /* number of forcings */
+   BB_ForcingPntr        *forcings;         /* pointer to the list of forcings */
+};
+
+static int BB_simulationNumber=0;             // Number of the simulation         
+
+#define BB_INDEX_THIS              ( 0) /* Handle of instance               */
+#define BB_INDEX_TIME              ( 1) /* Time instance of model (state)   */
+#define BB_INDEX_STATE             ( 2) /* State vector of model            */
+#define BB_INDEX_FORCINGS          ( 3) /* Tree vector containing the forcings of the model */
+#define BB_INDEX_PARAMETERS        ( 4) /* Tree vector of model parameters */
+#define BB_INDEX_USERDATA          ( 5) /* Userdata */
+
+#define BB_SIZE_DATABLK            ( 6)
+
+
+/** \brief Create the model class of the BB Black-box builder
+ *
+ * \note This is not a user function. It is called at initialization of the
+ *       COSTA environment.
+ *
+ *  \param modelcls  O  receives handle of the BB-modelbuilder class
+ */
+CTANOEXPORT void CTA_Modbuild_b3b_CreateClass(CTA_ModelClass *modelcls);
+
+#endif
+
diff --git a/costa/native/cta/include/cta_bb_modbuild_utils.h b/costa/native/cta/include/cta_bb_modbuild_utils.h
new file mode 100644
index 000000000..ec7493325
--- /dev/null
+++ b/costa/native/cta/include/cta_bb_modbuild_utils.h
@@ -0,0 +1,171 @@
+/*
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2006  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+/**
+\file  cta_modbuild_b3b_utils_h.h
+\brief Description of the COSTA blackbox component utilisties
+*/
+
+#ifndef CTA_MODBUILD_BB_UTILS_H
+#define CTA_MODBUILD_BB_UTILS_H
+
+#include <libxml/encoding.h>
+#include <libxml/parser.h>
+#include <libxml/tree.h>
+#include <libxml/xinclude.h>
+#include <libxml/xmlwriter.h>
+
+#include "cta.h"
+#include "cta_xml.h"
+#include "cta_bb_modbuild.h"
+
+/** \brief Copy a string
+ *
+ * \param name     I  string to be copied
+ *
+ * \return copy of the string
+ */
+CTANOEXPORT char *        BB_CopyText(const char *in);
+
+/** \brief Reallocates a block of memory
+ *
+ * \param memblock  I  Pointer to previously allocated memory block
+ * \param size      I  New size in bytes.
+ *
+ * \return pointer to the allocated space
+ */
+CTANOEXPORT void *        BB_Realloc(void *memblock, size_t size);
+
+/** \brief Allocates a block of memory
+ *
+ * \param nelem     I  Number of elements
+ * \param elsize    I  Element size
+ *
+ * \return pointer to the allocated space
+ */
+CTANOEXPORT void *        BB_Malloc(size_t nelem, size_t elsize);
+   
+/** \brief Frees a block of memory that is currently allocated
+ *
+ * \param pntr     I  starting address of the memory block to be freed
+ *
+ * \return no return value
+ */
+CTANOEXPORT void          BB_Free(void *pntr);
+
+/** \brief Frees a model that is currently allocated
+ *
+ * \param pntr     I  starting address of the model to be freed
+ *
+ * \return error status: CTA_OK, CTA_ILLEGAL_HANDLE
+ */
+CTANOEXPORT int           BB_Free_Model(BB_ModelPntr model);
+
+/** \brief Frees a stateexchange that is currently allocated
+ *
+ * \param pntr     I  starting address of the stateexchange to be freed
+ *
+ * \return no return value
+ */
+CTANOEXPORT void          BB_Free_StateExchange(BB_StateExchangePntr stateexchange);
+
+/** \brief Frees a parameter that is currently allocated
+ *
+ * \param pntr     I  starting address of the parameter to be freed
+ * \return no return value
+ */
+CTANOEXPORT void          BB_Free_Parameter(BB_ParameterPntr parameter);
+
+/** \brief Frees a forcing that is currently allocated
+ *
+ * \param pntr     I  starting address of the forcing to be freed
+ * \return no return value
+ */
+CTANOEXPORT void          BB_Free_Forcing(BB_ForcingPntr forcings);
+
+/** \brief Frees a station that is currently allocated
+ *
+ * \param pntr     I  starting address of the station to be freed
+ * \return no return value
+ */
+CTANOEXPORT void          BB_Free_Station(BB_StationPntr stations);
+
+/** \brief Convert COSTA state vector to BB model
+ *
+ * \param model    I  Pointer naar de model data
+ * \param start    I  Simulatie starttijd in modified julian day
+ * \param stop     I  Simulatie stoptijd in modified julian day
+ * \param sstate   I  Model state to be written
+ * \return error status: CTA_OK, CTA_ILLEGAL_HANDLE
+ */
+CTANOEXPORT int BB_State2Model(BB_ModelPntr model, double start, double stop, CTA_TreeVector sstate);
+
+/** \brief Convert BB model to COSTA state vector
+ *
+ * \param model    I  Pointer naar de model data
+ * \param start    I  Simulatie starttijd in modified julian day
+ * \param stop     I  Simulatie stoptijd in modified julian day
+ * \param sstate   O  Pointer naar state vector
+ * \return error status: CTA_OK, CTA_ILLEGAL_HANDLE
+ */
+CTANOEXPORT int  BB_Model2State(BB_ModelPntr model, double start, double stop, CTA_TreeVector *sstate);
+
+/** \brief Open model2state file 
+ *
+ * \param model     I  Pointer naar de model data
+ * \param htree     O  Pointer naar costa tree
+ * \return error status: CTA_OK, CTA_ILLEGAL_HANDLE
+ */
+CTANOEXPORT int           BB_OpenModel2State(BB_ModelPntr model, CTA_Tree *htree);
+
+/** \brief Get value of BB model 
+ *
+ * \param model     I  Pointer naar de model data
+ * \param htree     I  Pointer naar costa tree
+ * \param station   I  Name of the station
+ * \param variable  I  Name of the variabele
+ * \param time      I  tijdstip in MJD
+ * \param value     O  waarde van de variabele op tijdstip van station
+ * \return error status: CTA_OK, CTA_ILLEGAL_HANDLE
+ */
+CTANOEXPORT int           BB_Model2StateByStationVariableTime(BB_ModelPntr model, CTA_Tree htree, char *station, char *variable, double time, double *value);
+
+/** \brief 
+ *
+ * \param model    I  Pointer naar de model data
+ * \return error status: CTA_OK, CTA_ILLEGAL_HANDLE
+ */
+CTANOEXPORT int           BB_Run_Model(BB_ModelPntr model);
+
+/** \brief 
+ *
+ * \param 
+ * \return 
+ */
+CTANOEXPORT BB_ModelPntr BB_Read_Model(int simulationNumber, CTA_Tree hmodelinput, CTA_Handle *sstate, CTA_Handle *sparam, CTA_Handle *sforc);
+
+/** \brief Check for a COSTA error
+ *
+ * \param ierr     I  COSTA error code
+ * \param msg      I  Error message
+ * \return no return value
+ */
+CTANOEXPORT void          BB_CheckError(int ierr, const char *msg);
+
+#endif
diff --git a/costa/native/cta/include/cta_datatypes.h b/costa/native/cta/include/cta_datatypes.h
new file mode 100644
index 000000000..457d278c4
--- /dev/null
+++ b/costa/native/cta/include/cta_datatypes.h
@@ -0,0 +1,208 @@
+/*
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+/**
+\file cta_datatypes.h
+\brief Definitions of constants and datatypes that are publically used in COSTA
+*/
+
+
+#ifndef CTA_DATA_TYPS_H
+#define CTA_DATA_TYPS_H
+#include <stdlib.h>
+#include <math.h>
+#include "cta_system.h"
+
+/** The datatpe of a COSTA Datatype */
+typedef int CTA_Datatype;
+ 
+/** A COSTA-function CTA_Function *function
+    is a void-pointer. If it points to an
+    existing function, it may be used to call
+    that function, simply by
+    retval = function(arguments);
+*/
+typedef void CTA_Function();  
+
+/*! handle to a COSTA object */
+#define CTA_HANDLE      (-1)
+/*! A handle of a COSTA interface object */
+#define CTA_INTERFACE   (-2)
+/*! A handle of a COSTA function object  */
+#define CTA_FUNCTION    (-3)
+/*! A handle of a COSTA vector object    */
+#define CTA_VECTOR      (-4)
+/*! A handle of a COSTA Vector class     */
+#define CTA_VECTORCLASS (-5)
+/*! A handle of a COSTA (sub) treevector      */
+#define CTA_TREEVECTOR       (-6)
+/*! A handle of a COSTA Matrix class     */
+#define CTA_MATRIXCLASS (-7)
+/*! A handle of a COSTA Matrix           */
+#define CTA_MATRIX      (-8)
+/*! A handle of a COSTA Covariance matrix class     */
+#define CTA_COVMATCLASS (-9)
+/*! A handle of a COSTA Covariance matrix    */
+#define CTA_COVMAT      (-10)
+/*! A handle of a COSTA Interpolation matrix */
+#define CTA_INTPOL      (-11)
+/*! A handle of a COSTA observation component */
+#define CTA_OBS         (-12)
+/*! A handle of a COSTA Model class     */
+#define CTA_MODELCLASS  (-13)
+/*! A handle of a COSTA Model    */
+#define CTA_MODEL       (-14)
+/*! A handle of a COSTA Time */
+#define CTA_TIME        (-15)
+/*! A handle of a COSTA StochObserver object    */
+#define CTA_SOBS        (-16)
+/*! A handle of a COSTA StochObserver class     */
+#define CTA_SOBSCLASS   (-17)
+/*! A handle of a COSTA ObserverDescription object */
+#define CTA_OBSDESCR    (-18)
+/*! A handle of a COSTA ObserverDescription class */
+#define CTA_OBSDESCRCLASS (CTA_SOBSCLASS)
+/*! A handle of a COSTA Method object */
+#define CTA_METHODCLASS (-19)
+/*! A handle of a COSTA Method object */
+#define CTA_METHOD      (-20)
+/*! A handle of a COSTA tree object    */
+#define CTA_TREE        (-21)
+/*! A handle of a COSTA Pack-object */
+#define CTA_PACK        (-22)
+/*! A handle to an unknown data object */
+#define CTA_DATABLOCK   (-23)
+/*! A handle of a COSTA meta information object */
+#define CTA_METAINFO   (-24)
+/*! A handle of a COSTA meta information object */
+#define CTA_METAINFOCLASS   (-25)
+/*! A handle of a COSTA relation table object */
+#define CTA_RELTABLE        (-26)
+/*! An explicit subtreevector (for XML-use)  */
+#define CTA_SUBTREEVECTOR        (-27)
+/*! CTA_Datatypes constant: COSTA-string  */
+#define CTA_STRING      (-28)
+/*! CTA_Datatypes constant: COSTA-file  */
+#define CTA_FILE        (-29)
+/*! CTA_Datatypes constant: COSTA-array  */
+#define CTA_ARRAY        (-30)
+
+
+
+// Codes to indicate CTA_Datatypes
+/*! COSTA CTA_Datatypes: arbitrary void */
+#define CTA_VOID        (-100)
+/*! CTA_Datatypes constant: integer (scalar) */
+#define CTA_INTEGER     (-101)
+/*! CTA_Datatypes constant: single precision real (scalar) */
+#define CTA_REAL        (-102)
+/*! CTA_Datatypes constant: double precision real (scalar) */
+#define CTA_DOUBLE      (-103)
+/*! CTA_Datatypes constant: FORTRAN-string */
+#define CTA_FSTRING     (-104)
+/*! CTA_Datatypes constant: C-string */
+#define CTA_CSTRING     (-105)
+
+
+/*! CTA_Datatypes constant: integer array */
+#define CTA_1DINTEGER   (-201)
+/*! CTA_Datatypes constant: single precision real array */
+#define CTA_1DREAL      (-202)
+/*! CTA_Datatypes constant: double precision real array */
+#define CTA_1DDOUBLE    (-203)
+/*! CTA_Datatypes constant: array of FORTRAN-strings */
+#define CTA_1DFSTRING   (-204)
+/*! CTA_Datatypes constant: array of C-strings */
+#define CTA_1DCSTRING   (-205)
+
+// Common Constants
+
+/*! COSTA flag/constant: Default */
+#define CTA_DEFAULT          (0)
+/*! COSTA flag/constant: NULL-Handle */
+#define CTA_NULL             (0)
+/*! COSTA flag/constant: FALSE */
+#define CTA_FALSE            (0)
+/*! COSTA flag/constant: TRUE */
+#define CTA_TRUE             (1)
+
+#ifndef BOOL
+/*! Datatype for boolians */
+#define BOOL int
+/*! Boolean value of TRUE */
+#ifndef TRUE
+#define TRUE (1)
+#endif
+/*! Boolean value of FALSE */
+#ifndef FALSE
+#define FALSE (0)
+#endif
+#endif
+#endif
+
+#ifndef M_PI
+/*! constant pi */
+#define M_PI (3.14159265358979)
+#endif
+
+/*! The machine precision.  More precisely, `eps' is the largest
+     relative spacing between any two adjacent numbers in the machine's
+     floating point system.  This number is obviously system-dependent.
+     On machines that support 64 bit IEEE floating point arithmetic,
+     `eps' is approximately  2.2204e-16.
+     This variable is not set automatically (yet)
+*/
+#ifndef M_EPS
+#define M_EPS (2.22044604925031e-16)
+
+
+#define CTA_STRLEN_TAG   (80)
+#define CTA_STRLEN_NAME  (80)
+
+
+//#define CTA_ASSIMOBS    (1)
+//#define CTA_VALIDATEOBS (2)
+//#define CTA_ALLOBS      (3)
+
+/*! flush stdout and stderr in a number of the COSTA methods. useful for debugging */
+#define CTA_FLUSH_ALWAYS          ( 1)
+
+/*! FORTRAN file unit for standard output */
+#define CTA_F77_STDOUT            ( 6) 
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/** \brief Get the result of the C-function sizeof for a COSTA datatype
+ *
+ *  \param datatype I COSTA data type
+ *  \param size     O receives result sizeof-function
+ *  \return CTA_OK if successful
+ */
+CTAEXPORT int CTA_SizeOf(CTA_Datatype datatype, int *size);
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif
+
diff --git a/costa/native/cta/include/cta_datetime.h b/costa/native/cta/include/cta_datetime.h
new file mode 100644
index 000000000..63d419ad5
--- /dev/null
+++ b/costa/native/cta/include/cta_datetime.h
@@ -0,0 +1,122 @@
+/*
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2007  Johan Ansink
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+/**
+\file  cta_datetime.h
+\brief Interface description of the default COSTA datetime component.
+
+A datetime object describes a date and time object
+*/
+
+#ifndef CTA_DATETIME_H
+#define CTA_DATETIME_H
+
+#include <time.h>
+#include "cta_handles.h"
+#include "cta_datatypes.h"
+#include "cta_system.h"
+
+#define MJDREF   2400000.5 
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** \brief Julian day number from Gregorian date.
+ *
+ * \param year    I  Year
+ * \param month   I  Month
+ * \param day     I  Day
+ * \param hour    I  Hour
+ * \param minute  I  Minute
+ * \param second  I  Second
+ * \param jd      O  Julian day number
+ *
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_DateTime_GregorianToJulian(int year, int month, int day, int hour, int minute, int second, double *jd);
+
+/** \brief Modified Julian day number from Gregorian date.
+ *
+ * \param year    I  Year
+ * \param month   I  Month
+ * \param day     I  Day
+ * \param hour    I  Hour
+ * \param minute  I  Minute
+ * \param second  I  Second
+ * \param mjd     O  Modified Julian day number
+ *
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_DateTime_GregorianToModifiedJulian(int year, int month, int day, int hour, int minute, int second, double *mjd);
+
+/** \brief Convert days into hours, minutes, and seconds. 
+ *
+ * \param days    I  Year
+ * \param hour    O  Hour
+ * \param minute  O  Minute
+ * \param second  O  Second
+ *
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_DateTime_DaysToHMS (double days, int *hour, int *minute, int *second);
+
+/** \brief Julian day number from Modified Julian day number
+ *
+ * \param mjd     I  Modified Julian day number
+ * \param jd      O  Julian day number
+ *
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_DateTime_ModifiedJulianToJulian(double mjd, double *jd);
+
+/** \brief Gregorian calendar date from Julian day number
+ *
+ * \param jd      I  Julian day number
+ * \param year    O  Year
+ * \param month   O  Month
+ * \param day     O  Day
+ * \param hour    O  Hour
+ * \param minute  O  Minute
+ * \param second  O  Second
+ * \param jd      O  Julian day number
+ *
+ * \return error status: CTA_OK if successful
+*/
+CTAEXPORT int CTA_DateTime_JulianToGregorian(double jd, int *year, int *month, int *day, int *hour, int *minute, int *second);
+
+/** \brief Gregorian calendar date from Modified Julian day number
+ *
+ * \param mjd     I  Modified Julian day number
+ * \param year    O  Year
+ * \param month   O  Month
+ * \param day     O  Day
+ * \param hour    O  Hour
+ * \param minute  O  Minute
+ * \param second  O  Second
+ *
+ * \return error status: CTA_OK if successful
+*/
+CTAEXPORT int CTA_DateTime_ModifiedJulianToGregorian(double mjd, int *year, int *month, int *day, int *hour, int *minute, int *second);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/costa/native/cta/include/cta_defaults.h b/costa/native/cta/include/cta_defaults.h
new file mode 100644
index 000000000..ed4cce296
--- /dev/null
+++ b/costa/native/cta/include/cta_defaults.h
@@ -0,0 +1,111 @@
+/*
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+/**
+\file  cta_defaults.h
+\brief A list of all default class implementations.
+*/
+
+#ifndef CTA_DEFAULTS_H
+#define CTA_DEFAULTS_H
+#include "cta_system.h"
+#include "cta_vector.h"
+#include "cta_matrix.h"
+#include "cta_sobs.h"
+#include "cta_model.h"
+#include "cta_file.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+/** default dense vector class implementation */
+CTAEXPORT extern CTA_VecClass  CTA_DEFAULT_VECTOR;
+
+/** default stochastic observer class implementation 
+ *  based on an sqlite3 database                     */
+CTAEXPORT extern CTA_SObsClass CTA_DEFAULT_SOBS;
+
+/** stochastic observer class implementation
+ * that concatenates stochastic observers of arbitrary kind */
+CTAEXPORT extern CTA_SObsClass CTA_COMBINE_SOBS;
+
+/** stochastic observer class implementation
+ * based on NetCDF input files                       */
+CTAEXPORT extern CTA_SObsClass CTA_NETCDF_SOBS;
+
+/** stochastic observer class implementation
+ * based on NetCDF input files                       */
+CTAEXPORT extern CTA_SObsClass CTA_MAORI_SOBS;
+
+/** stochastic observer class implementation
+ * based on user provided library                    */
+CTAEXPORT extern CTA_SObsClass CTA_USER_SOBS;
+
+/** observer description class implementation based on a simple table */
+CTAEXPORT extern CTA_ObsDescrClass CTA_OBSDESCR_TABLE;
+
+/** default dense matrix class implementation */
+CTAEXPORT extern CTA_MatClass CTA_DEFAULT_MATRIX;
+
+/** The SP (Single processor) model builder */
+CTAEXPORT extern CTA_ModelClass CTA_MODBUILD_SP;
+
+/** The PAR (multiple processor parallel) model builder */
+CTAEXPORT extern CTA_ModelClass CTA_MODBUILD_PAR;
+
+
+/** File handle that actually prints to standard out */
+CTAEXPORT extern CTA_File CTA_FILE_STDOUT;
+
+/** The modelcombiner */
+CTAEXPORT extern CTA_ModelClass CTA_MODELCOMBINER;
+
+/** The B3B (Black box) model builder */
+CTAEXPORT extern CTA_ModelClass CTA_MODBUILD_B3B;
+
+/** The 'New' (Black box) model builder */
+CTAEXPORT extern CTA_ModelClass CTA_MODBUILD_BB;
+
+/** Operator for dispaying the scaled RMS of the roots of a treevector to use with method
+ *  CTA_TreeVector_OpOnLeafs */
+CTAEXPORT CTA_Func extern CTA_OP_ROOT_RMS;
+
+/** Operator for finding the index of the maxabs of the roots of a 
+  * treevector to use with method CTA_TreeVector_OpOnLeafs */
+CTAEXPORT CTA_Func extern CTA_OP_ROOT_AMAX;
+
+/** Operator for finding displaying the values at given indices of the
+  * roots of a treevector to use with method CTA_TreeVector_OpOnLeafs */
+CTAEXPORT CTA_Func extern CTA_OP_ROOT_PRINTI;
+
+/** Operator for dispaying the scaled Sum-of-Squares of the roots of a 
+    treevector to use with method CTA_TreeVector_OpOnLeafs */
+CTAEXPORT CTA_Func extern CTA_OP_ROOT_SSQ;
+
+/** Initial random seed */
+CTAEXPORT extern long int CTA_INITIAL_RANDOM_SEED;
+
+/** Name of the user provided dynamic library with user functions */
+CTAEXPORT extern char userDefaultDynamicLibrary[256];
+
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/costa/native/cta/include/cta_errors.h b/costa/native/cta/include/cta_errors.h
new file mode 100644
index 000000000..5b0bbba86
--- /dev/null
+++ b/costa/native/cta/include/cta_errors.h
@@ -0,0 +1,108 @@
+/*
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+/**
+\file cta_errors.h
+\brief Definitions of error return values used by COSTA.
+*/
+
+
+#ifndef CTA_ERRORS_H
+#define CTA_ERRORS_H
+
+// Common errors
+/** No errors occured */
+#define CTA_OK                      ( 0)
+/** The handle of a COSTA component is not associated to any
+    COSTA component nor is it the CTA_NULL handle. */
+#define CTA_ILLEGAL_HANDLE          (-2)
+/** A handle of a COSTA component instance corresponds to a 
+    different component than expected */
+#define CTA_INCOMPATIBLE_HANDLE     (-3)
+/** A handle of a COSTA component cannot be found in administration */
+#define CTA_HANDLE_NOT_FOUND        (-4)
+/** Output array is too short to contain result */
+#define CTA_ARRAY_TOO_SHORT         (10)
+/** The datatype does not exist or is not supported */
+#define CTA_ILLEGAL_DATATYPE        (11)
+/** Dimensions of the component are different from input */
+#define CTA_DIMENSION_ERROR         (12)
+/** The operation is not (yet) supported for given combination of vectors-classes */
+#define CTA_INCOMPATIBLE_VECTORS    (13)
+/** The concatenation operation is not possible */
+#define CTA_CONCAT_NOT_POSSIBLE     (14)
+/** The setval operation is not possible */
+#define CTA_SETVAL_NOT_POSSIBLE     (15)
+/** Requested item not found */
+#define CTA_ITEM_NOT_FOUND          (16)
+/** Trying to access an non-initialised subtreevector */
+#define CTA_UNINITIALISED_SUBTREEVECTOR (17)
+/** Operation between two different treevectors is not possible because build-up/dimension is different */
+#define CTA_TREEVECTORS_NOT_COMPATIBLE   (18)
+/** The operation is not (yet) supported for given combination of matrix-classes */
+#define CTA_INCOMPATIBLE_MATRICES   (19)
+/** There is no implementation available for this method */
+#define CTA_NOT_IMPLEMENTED         (20)
+/** Illegal specification of userdata argument */
+#define CTA_WRONG_USERDATA          (21)
+/** Opening of file failed */
+#define CTA_CANNOT_OPEN_FILE        (22)
+/** Command is not valid */
+#define CTA_INVALID_COMMAND         (23)
+/** Error in external library */
+#define CTA_EXTERNAL_ERROR          (24)
+/** The matrix is singular */
+#define CTA_SINGULAR_MATRIX         (25)
+/** The buffer length is too small */
+#define CTA_BUFFER_TOO_SMALL        (26)
+/** The import/export format is not supported */
+#define CTA_FORMAT_NOT_SUPPORTED    (27)
+/** The metainfo of two treevectors is incompatible */ 
+#define CTA_INCOMPATIBLE_METAINFO   (28)
+/** The combined model is invalid  */ 
+#define CTA_COMBINED_MODEL_ERROR   (29)
+/** The announced observations do not fit into timespan of compute */
+#define CTA_ANNOUNCED_OBS_INTERVAL_ERROR (30)
+
+/** Operation is not (yet) implemented for given object */
+#define CTA_INPUT_OBJECT_NOT_SUPPORTED (31)
+/** Input arguments are of incompatible data type */
+#define CTA_INPUT_OBJECTS_ARE_INCOMPATIBLE (32)
+/** The relation tables cannot be combined */
+#define CTA_RELTABLES_CANNOT_BE_COMBINED (33)
+/** The matrix is not square              */
+#define CTA_MATRIX_IS_NOT_SQUARE (34)
+
+/** The input value is not correct */
+#define CTA_ILLEGAL_INPUT_ARGUMENT (34)
+
+/** Cannot find the process group */
+#define CTA_CANNOT_FIND_PROCESS_GROUP (35)
+
+/** Some JNI-interface error */
+#define CTA_JNI_INTERFACING_ERROR (36)
+#define CTA_RESULTWRITER_ERROR (37)
+
+/** Hmm. you have found an internal error in the COSTA implementation */
+#define CTA_INTERNAL_ERROR          (911)
+/** You tried to do something that should work but has not been implemented yet in COSTA.
+    Please implement and help improving COSTA*/
+#define CTA_NOT_YET_SUPPORTED       (999)
+
+#endif
diff --git a/costa/native/cta/include/cta_f77blas.h b/costa/native/cta/include/cta_f77blas.h
new file mode 100644
index 000000000..114a47b3e
--- /dev/null
+++ b/costa/native/cta/include/cta_f77blas.h
@@ -0,0 +1,167 @@
+/*
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+/**
+\file  cta_f77blas.h
+\brief Package of internally used functions for C/FORTRAN compatibility.
+*/
+
+#ifndef CTA_F77BLAS_H
+#define CTA_F77BLAS_H
+#include "cta_system.h"
+
+/*------------------*/
+/* Single precision */
+/*------------------*/
+
+#define SSCAL_F77 F77_FUNC(sscal,SSCAL)
+#ifdef __cplusplus
+extern "C" /* prevent C++ name mangling */
+#endif
+CTAEXPORT void SSCAL_F77(int *n, float *alpha, float *x, int *incx);
+
+#define SCOPY_F77 F77_FUNC(scopy,SCOPY)
+#ifdef __cplusplus
+extern "C" /*bla */
+#endif
+CTAEXPORT void SCOPY_F77(int *n, float *x, int *incx, float *y, int *incy);
+
+#define SAXPY_F77 F77_FUNC(saxpy,SAXPY)
+#ifdef __cplusplus
+extern "C" /* prevent C++ name mangling */
+#endif
+CTAEXPORT void SAXPY_F77(int *n, float *alpha, float *x, int *incx, float *y, int *incy);
+
+#define SDOT_F77 F77_FUNC(sdot,SDOT)
+#ifdef __cplusplus
+extern "C" /* prevent C++ name mangling */
+#endif
+CTAEXPORT float SDOT_F77(int *n, float *x, int *incx, float *y, int *incy);
+
+#define SNRM2_F77 F77_FUNC(snrm2,SNRM2)
+#ifdef __cplusplus
+extern "C" /* prevent C++ name mangling */
+#endif
+CTAEXPORT float SNRM2_F77(int *n, float *x, int *incx);
+
+#define ISAMAX_F77 F77_FUNC(isamax,ISAMAX)
+#ifdef __cplusplus
+extern "C" /* prevent C++ name mangling */
+#endif
+CTAEXPORT int ISAMAX_F77(int *n, float *x, int *incx);
+
+#define SGER_F77 F77_FUNC(sger,sger)
+#ifdef __cplusplus
+extern "C" /* prevent C++ name mangling */
+#endif
+CTAEXPORT int SGER_F77(int *m, int *n, float *alpha, float *x, int *incx,
+             float *y, int *incy, float *a, int *lda);
+
+#define SGEMV_F77 F77_FUNC(sgemv,SGEMV)
+#ifdef __cplusplus
+extern "C" /* prevent C++ name mangling */
+#endif
+CTAEXPORT void SGEMV_F77(char *trans, int *m, int *n, float* alpha, float *A, int *lda,
+                float *x, int *incx, float *beta, float *y, int *incy,
+                int lentrans);
+
+#define SGEMM_F77 F77_FUNC(sgemm,SGEMM)
+#ifdef __cplusplus
+extern "C" /* prevent C++ name mangling */
+#endif
+CTAEXPORT void SGEMM_F77(char *transa, char *transb,  int *m, int *n, int *k, 
+               float* alpha, float *A, int *lda,
+               float *B, int *ldb, float *beta, float *C, int *ldc,
+               int lentrans1, int lenstran2);
+
+/*------------------*/
+/* Double precision */
+/*------------------*/
+#define DSCAL_F77 F77_FUNC(dscal,DSCAL)
+#ifdef __cplusplus
+extern "C" /* prevent C++ name mangling */
+#endif
+CTAEXPORT void DSCAL_F77(int *n, double *alpha, double *x, int *incx);
+
+#define DCOPY_F77 F77_FUNC(dcopy,DCOPY)
+#ifdef __cplusplus
+extern "C" /*bla */
+#endif
+CTAEXPORT void DCOPY_F77(int *n, double *x, int *incx, double *y, int *incy);
+
+#define DAXPY_F77 F77_FUNC(daxpy,DAXPY)
+#ifdef __cplusplus
+extern "C" /* prevent C++ name mangling */
+#endif
+CTAEXPORT void DAXPY_F77(int *n, double *alpha, double *x, int *incx, float *y, int *incy);
+
+#define DDOT_F77 F77_FUNC(ddot,DDOT)
+#ifdef __cplusplus
+extern "C" /* prevent C++ name mangling */
+#endif
+CTAEXPORT double DDOT_F77(int *n, double *x, int *incx, double *y, int *incy);
+
+#define DNRM2_F77 F77_FUNC(dnrm2,DNRM2)
+#ifdef __cplusplus
+extern "C" /* prevent C++ name mangling */
+#endif
+CTAEXPORT double DNRM2_F77(int *n, double *x, int *incx);
+
+#define IDAMAX_F77 F77_FUNC(idamax,IDAMAX)
+#ifdef __cplusplus
+extern "C" /* prevent C++ name mangling */
+#endif
+CTAEXPORT int IDAMAX_F77(int *n, double *x, int *incx);
+
+#define DGER_F77 F77_FUNC(dger,dger)
+#ifdef __cplusplus
+extern "C" /* prevent C++ name mangling */
+#endif
+CTAEXPORT int DGER_F77(int *m, int *n, double *alpha, double *x, int *incx,
+             double *y, int *incy, double *a, int *lda);
+
+#define DGEMV_F77 F77_FUNC(dgemv,DGEMV)
+#ifdef __cplusplus
+extern "C" /* prevent C++ name mangling */
+#endif
+CTAEXPORT void DGEMV_F77(char *trans, int *m, int *n, double* alpha, double *A, int *lda,
+                double *x, int *incx, double *beta, double *y, int *incy,
+                int lentrans);
+
+#define DGEMM_F77 F77_FUNC(dgemm,DGEMM)
+#ifdef __cplusplus
+extern "C" /* prevent C++ name mangling */
+#endif
+CTAEXPORT void DGEMM_F77(char *transa, char *transb,  int *m, int *n, int *k, 
+               double* alpha, double *A, int *lda,
+               double *B, int *ldb, double *beta, double *C, int *ldc,
+                int lentrans1, int lenstran2);
+
+
+
+
+
+#endif
+
+
+
+
+
+
+
diff --git a/costa/native/cta/include/cta_f77lapack.h b/costa/native/cta/include/cta_f77lapack.h
new file mode 100644
index 000000000..dd71f2990
--- /dev/null
+++ b/costa/native/cta/include/cta_f77lapack.h
@@ -0,0 +1,101 @@
+/*
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+/**
+\file  cta_f77lapack.h
+\brief Package of internally used functions for C/FORTRAN compatibility.
+*/
+
+#ifndef CTA_F77LAPACK_H
+#define CTA_F77LAPACK_H
+#include "cta_system.h"
+
+/*------------------*/
+/* Single precision */
+/*------------------*/
+
+#define SGESV_F77 F77_FUNC(sgesv,SGESV)
+#ifdef __cplusplus
+extern "C" /* prevent C++ name mangling */
+#endif
+CTAEXPORT void SGESV_F77(int *n, int *nrhs, float *A, int *lda, int *ipiv,
+               float *B, int *ldb, int *info);
+
+#define SGETRS_F77 F77_FUNC(sgetrs,SGETRS)
+#ifdef __cplusplus
+extern "C" /* prevent C++ name mangling */
+#endif
+CTAEXPORT void SGETRS_F77(char *trans, int *n, int *nrhs, float *A, int *lda, int *ipiv,
+               float *B, int *ldb, int *info, int lentrans);
+
+#define SGEEV_F77 F77_FUNC(sgeev,SGEEV)
+#ifdef __cplusplus
+extern "C" /* prevent C++ name mangling */
+#endif
+CTAEXPORT void SGEEV_F77( char *JOBVL, char *JOBVR, int *N, float *A,
+                          int *LDA, float *WR, float *WI, float *VL,
+                          int *LDVL, float *VR, int *LDVR, 
+                          float *WORK, int *LWORK, int *INFO);
+
+/*------------------*/
+/* Double precision */
+/*------------------*/
+#define DGESV_F77 F77_FUNC(dgesv,DGESV)
+#ifdef __cplusplus
+extern "C" /* prevent C++ name mangling */
+#endif
+CTAEXPORT void DGESV_F77(int *n, int *nrhs, double *A, int *lda, int *ipiv,
+               double *B, int *ldb, int *info);
+
+#define DGETRS_F77 F77_FUNC(dgetrs,DGETRS)
+#ifdef __cplusplus
+extern "C" /* prevent C++ name mangling */
+#endif
+CTAEXPORT void DGETRS_F77(char *trans, int *n, int *nrhs, double *A, int *lda, int *ipiv,
+                double *B, int *ldb, int *info, int lentrans);
+
+#define DPOSV_F77 F77_FUNC(dposv,DPOSV)
+#ifdef __cplusplus
+extern "C" /* prevent C++ name mangling */
+#endif
+CTAEXPORT void DPOSV_F77(char *uplo, int *n, int *nrhs, double *A, int *lda, double *B, 
+               int *ldb, int *info);
+
+#define DLASRT_F77 F77_FUNC(dlasrt,DLASRT)
+#ifdef __cplusplus
+extern "C" /* prevent C++ name mangling */
+#endif
+CTAEXPORT void DLASRT_F77(char *ID, int *n, double *D, int *info, int lenid);
+
+#define DGEEV_F77 F77_FUNC(dgeev,DGEEV)
+#ifdef __cplusplus
+extern "C" /* prevent C++ name mangling */
+#endif
+CTAEXPORT void DGEEV_F77( char *JOBVL, char *JOBVR, int *N, double *A,
+                          int *LDA, double *WR, double *WI, double *VL,
+                          int *LDVL, double *VR, int *LDVR, 
+                          double *WORK, int *LWORK, int *INFO);
+
+#endif
+
+
+
+
+
+
diff --git a/costa/native/cta/include/cta_file.h b/costa/native/cta/include/cta_file.h
new file mode 100644
index 000000000..db0de178b
--- /dev/null
+++ b/costa/native/cta/include/cta_file.h
@@ -0,0 +1,133 @@
+/*
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+/**
+\file  cta_file.h
+\brief Functions for the implementation of CTA_FILE objects.
+*/
+
+#ifndef CTA_FILE_H
+#define CTA_FILE_H
+
+#include <stdio.h>
+#include "cta_system.h"
+#include "cta_string.h"
+#include "cta_handles.h"
+#include "cta_datatypes.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+/* Function Handle */
+typedef CTA_Handle CTA_File;
+
+/* functions */
+
+/** \brief Create a new COSTA file
+ *   for holding a C file descriptor of a FORTRAN file LUN
+ *
+ *  \note This call does not open a file. 
+ *   No FORTRAN support in this version
+ *
+ *  \param hfile  O  receives handle of created file
+ *  \return CTA_OK if successful
+ */
+CTAEXPORT int CTA_File_Create(CTA_File *hfile);
+
+/** \brief Free a new COSTA file-handle
+ *
+ *  \note The File is not closed (in this version)
+ *
+ *  \param hfile  IO  handle of COSTA file CTA_NULL on return
+ *  \return CTA_OK if successful
+ */
+CTAEXPORT int CTA_File_Free  (CTA_File *hfile);
+
+/** \brief Get the C-file descriptor of the COSTA file
+ *
+ *  \note
+ *
+ *  \param hfile  I  handle of COSTA file-handle
+ *  \param file   O  receives file descriptor
+ *  \return CTA_OK if successful
+ */
+CTAEXPORT int CTA_File_Get   (CTA_File hfile, FILE **file);
+
+/** \brief Get the NETCDF file id of the COSTA file
+ *
+ *  \note
+ *
+ *  \param hfile  I  handle of COSTA file-handle
+ *  \param ncid   O  receives NETCDF file id
+ *  \return CTA_OK if successful
+ */
+CTAEXPORT int CTA_File_GetNetcdf(CTA_File hfile,int *ncid);
+
+
+/** \brief Set the C-file descriptor of the COSTA file
+ *
+ *  \note
+ *
+ *  \param hfile  IO  handle of COSTA file-handle
+ *  \param file   I   file descriptor
+ *  \return CTA_OK if successful
+ */
+CTAEXPORT int CTA_File_Set   (CTA_File hfile, FILE *file);
+
+/** \brief Open a C-file and set descriptor 
+ *
+ *  \note
+ *
+ *  \param hfile   I  provide a valid handle of COSTA file
+ *  \param sname   I  file path
+ *  \param smode   I  open-mode (see C fopen documentation)
+ *                    if CTA_NULL is provided, the file will be 
+ *                    opened with read/write access (file pointer at begin
+ *                    of file)
+ *  \return CTA_OK if successful
+ */
+CTAEXPORT int CTA_File_Open  (CTA_File hfile, CTA_String sname, CTA_String smode);
+
+/** \brief check whether file is a NETCDF file 
+ *
+ *  \param hfile    I  provide a valid handle of COSTA file
+ *  \param isnetcdf O  CTA_TRUE if file is NETCDF file CTA_FALSE otherwise
+ *
+ *  \return CTA_OK if successful
+ */
+CTAEXPORT int CTA_File_IsNetcdf(CTA_File hfile, int *isnetcdf);
+
+
+/** \brief Write a string to file
+ *
+ *  \note
+ *
+ *  \param hfile   I  handle of COSTA file
+ *  \param str     I  string that must be written to file
+ *  \param eol     I  Add end of line, CTA_TRUE for adding end of line or CTA_FALSE otherwise
+ *  \return CTA_OK if successful
+ */
+CTAEXPORT int CTA_File_WriteStr(CTA_File hfile, char *str, int eol);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
+
diff --git a/costa/native/cta/include/cta_flush.h b/costa/native/cta/include/cta_flush.h
new file mode 100644
index 000000000..97a4b8fee
--- /dev/null
+++ b/costa/native/cta/include/cta_flush.h
@@ -0,0 +1,49 @@
+/*
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+/**
+\file  cta_flush.h
+\brief Flush function for compatibility reasons with FORTRAN.
+*/
+
+#ifndef CTA_FLUSH_H
+#define CTA_FLUSH_H
+
+#include "cta_system.h"
+
+/* Function Handle */
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* functions */
+/** \brief Flushes STDOUT and STDERR both for C as FORTRAN
+ *
+ *  \note Function is introduced because some FORTRAN compilers use
+ *  different flushing than the C compiler. As a result the order of output
+ *  generated by both languages is "random". This routine makes sure that
+ *  all buffers have been flushed.
+ *
+ */
+CTAEXPORT void CTA_Flush();
+#ifdef __cplusplus
+}
+#endif
+#endif
+
diff --git a/costa/native/cta/include/cta_functions.h b/costa/native/cta/include/cta_functions.h
new file mode 100644
index 000000000..ba3fca5cc
--- /dev/null
+++ b/costa/native/cta/include/cta_functions.h
@@ -0,0 +1,152 @@
+/*
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+/**
+\file  cta_functions.h
+\brief Functions for working with CTA_Function objects. CTA_Function objects can be used to address functions.
+*/
+
+#ifndef CTA_FUNCTIONS_H
+#define CTA_FUNCTIONS_H
+#include "cta_system.h"
+#include "cta_datatypes.h"
+#include "cta_handles.h"
+#include "cta_interface.h"
+#include "ctai_xml.h"
+
+/* Function Handle */
+typedef CTA_Handle CTA_Func;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/** \brief Create a new COSTA function.
+ *
+ *  \note Argument name is only used for debugging and output.
+ *
+ *  \param name     I  name of the new function for debugging purposes
+ *  \param function I  pointer to function that has to be associated
+ *                     with new COSTA function
+ *  \param hintf    I  handle of associated interface
+ *  \param hfunc    O  receives handle of created COSTA function
+ *  \return CTA_OK if successful
+ */
+CTAEXPORT int CTA_Func_Create(const char *name, CTA_Function *function , 
+                    const CTA_Intf hintf, CTA_Func *hfunc);
+
+/** \brief Create a COSTA function loaded from dynamic library
+*
+*
+
+*  \param libraryName  I  Name of the dynamic library excluding the extention (.dll, .so, .dylib) eg "libmyroutines"
+*  \param functionName I  Name of the function to load. 
+*  \param name         I  Name of the function used inside the OpenDA administration
+*  \param id           I  Name (id) of the handle that is created (needed when stored as part of a tree
+*  \return             Handle to function or CTA_NULL in case of an error.
+*
+*  \note When the load of the function fails, some decorated, typical Fortran variations are tried as well
+*
+*/
+CTANOF90 CTAEXPORT CTA_Func CTA_CreateFuncDynamicLib(char *libraryName, char *functionName, char *name, char *id);
+
+/** \brief Duplicates a user defined function
+ *
+ * \param hfunc      I COSTA user function handle
+ * \param hdupl      I duplication of hfunc
+ * \return error status: CTA_OK
+ */
+CTAEXPORT int CTA_Func_Duplicate(CTA_Func hfunc, CTA_Func *hdupl);
+
+
+
+/** \brief Free a new COSTA function.
+ *
+ *  \note 
+ *
+ *  \param hfunc  IO handle of COSTA function, replaced by CTA_NULL on return
+ *  \return CTA_OK if sucessful
+ */
+CTAEXPORT int CTA_Func_Free(CTA_Func *hfunc);
+
+/** \brief Get interface of COSTA function.
+ *
+ *  \note For performance reasons, the interface is not a copy but a handle
+ *        to the actual interface, it should NOT be freed by the calling routine!
+ *
+ *  \param hfunc  I  handle of COSTA function
+ *  \param hintf  O  receives handle of interface of function
+ *  \return CTA_OK if successful
+ */
+CTAEXPORT int CTA_Func_GetIntf(const CTA_Func hfunc, CTA_Intf *hintf);
+
+/** \brief Get function pointer of function
+ *
+ *  \note There is no FORTRAN verion of this function available
+ *
+ *  \param hfunc     I  handle of COSTA function.
+ *  \param function  O  receives pointer to function
+ *  \return CTA_OK if successful
+ */
+CTAEXPORT int CTA_Func_GetFunc(const CTA_Func hfunc, CTA_Function **function);
+
+/** \brief Get name of function
+ *
+ *  \note Future versions will return a COSTA string handle.
+ *
+ *  \param hfunc     I  handle of COSTA function.
+ *  \param name      O  handle of string object that is to receive function name, must exist before calling
+ *  \return CTA_OK if successful
+ */
+CTAEXPORT int CTA_Func_GetName(const CTA_Func hfunc, CTA_String name);
+
+/** \brief Set userdata of function
+ *
+ *  \note Frees existing user data and replaces it with userdata
+ *
+ *  \param hfunc     IO handle of COSTA function.
+ *  \param userdata  I  new userdata handles
+ *  \return CTA_OK if successful
+ */
+CTAEXPORT int CTA_Func_SetUserdata(const CTA_Func hfunc, const CTA_Handle userdata );
+
+/** \brief Get userdata of function
+ *
+ *  \param hfunc     I  handle of COSTA function.
+ *  \param userdata  O  userdata handle
+ *  \return CTA_OK if successful
+ */
+CTAEXPORT int CTA_Func_GetUserdata(const CTA_Func hfunc, CTA_Handle userdata );
+
+/** \brief Create a COSTA function from XML
+*          (load from dynamic load library). 
+*
+*  \param cur_node  I  Current XML node 
+*  \return             Handle to create or CTA_NULL in case of an error.
+*/
+CTANOF90 CTAEXPORT CTA_Func CTAI_XML_CreateFunc(xmlNode *cur_node);
+
+
+
+#ifdef __cplusplus
+}
+#endif
+#endif
+
diff --git a/costa/native/cta/include/cta_handles.h b/costa/native/cta/include/cta_handles.h
new file mode 100644
index 000000000..794cbf29a
--- /dev/null
+++ b/costa/native/cta/include/cta_handles.h
@@ -0,0 +1,178 @@
+/*
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+/**
+\file  cta_handles.h
+\brief Description of functions for working with CTA_Handle objects that are being used to access COSTA objects.
+*/
+
+#ifndef CTA_HANDLES_H
+#define CTA_HANDLES_H
+
+#include "cta_datatypes.h"
+
+// Constants
+
+// Derived types
+CTAEXPORT typedef int CTA_Handle;
+
+/* String instance handle - needed for GetName() */
+CTAEXPORT typedef CTA_Handle CTA_String;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Interfaces
+/** \brief Create a new COSTA handle
+ *
+ * \param name     I  name associated with handle
+ * \param datatype I  data type of handle
+ * \param data     I  block of data associated to handle
+ * \param handle   O  receives COSTA handle
+ * \return error status: CTA_OK
+ */
+CTAEXPORT int CTA_Handle_Create(const char *name, const CTA_Datatype datatype,
+                       void *data, CTA_Handle *handle);
+
+/** \brief Free a COSTA handle
+ *
+ *  \note The data part of the handle is NOT freed.
+ *
+ *  \param handle IO handle that is to be freed, replaced by CTA_NULL on return.
+ *  \return error status: CTA_OK, CTA_ILLEGAL_HANDLE
+ */
+CTAEXPORT int CTA_Handle_Free(CTA_Handle *handle);
+
+/** \brief Free a COSTA handle
+ *
+ *  \note Calls datatype-specific free methods, if available.
+ *
+ *  \param handle IO handle that must be freed, replaced by CTA_NULL on return.
+ *  \return error status: CTA_OK, CTA_ILLEGAL_HANDLE
+ */
+CTAEXPORT int CTA_Handle_Free_All(CTA_Handle *handle);
+
+/** \brief Check whether a handle is valid and checks type
+ *
+ *  \note The handle CTA_NULL is not valid.
+ *
+ * \param handle   I  COSTA handle
+ * \param datatype I  data type to compare handle with
+ * \return error status: CTA_OK, CTA_ILLEGAL_HANDLE, CTA_INCOMPATIBLE_HANDLE 
+ *
+ */
+CTAEXPORT int CTA_Handle_Check(const CTA_Handle handle,const CTA_Datatype datatype);
+
+/** \brief Get pointer to data element of handle
+ *
+ *  \param handle I  COSTA handle
+ *  \param data   O  receives pointer to data element
+ *  \return error status: CTA_OK, CTA_ILLEGAL_HANDLE
+ */
+CTAEXPORT int CTA_Handle_GetData(const CTA_Handle handle, void **data);
+
+/** \brief Get the value the handle points to
+ *
+ *  \param handle 	I  COSTA handle
+ *  \param value   	O  receives pointer to data element
+ *  \param datatype     I  specify the data type of *value, must be the same as data type of handle
+ *  \return error status: CTA_OK, CTA_ILLEGAL_HANDLE, CTA_INCOMPATIBLE_HANDLE
+ */
+CTAEXPORT int CTA_Handle_GetValue(const CTA_Handle handle, void *value, CTA_Datatype datatype);
+
+/** \brief Get name associated with handle
+ *
+ *  \param handle   I  COSTA handle
+ *  \param hname    O  receives name of data type
+ *  \return error status: CTA_OK, CTA_ILLEGAL_HANDLE
+ */
+CTAEXPORT int CTA_Handle_GetName(const CTA_Handle handle, CTA_String hname);
+
+/** \brief Get data type associated with handle
+ *
+ *  \param handle   I  COSTA handle
+ *  \param datatype O  receives data type of handle
+ *  \return error status: CTA_OK, CTA_ILLEGAL_HANDLE
+ */
+CTAEXPORT int CTA_Handle_GetDatatype(const CTA_Handle handle, CTA_Datatype *datatype);
+
+/** \brief Set name associated with handle (internal use only!)
+ *
+ *  \param handle   IO COSTA handle
+ *  \param name     I  COSTA string
+ *  \return error status: CTA_OK, CTA_ILLEGAL_HANDLE
+ */
+CTAEXPORT int CTAI_Handle_SetName(const CTA_Handle handle, const char *name);
+
+/** \brief Find a handle by name and data type in the COSTA handle administration
+ *
+ *  \param sname    I  name of handle
+ *  \param datatype I  data type of handle
+ *  \param handlenr O  receives the handle (only if return value is CTA_OK)
+   *  \return error status: CTA_OK, CTA_HANDLE_NOT_FOUND
+ */
+CTAEXPORT int CTA_Handle_Find(CTA_String sname, CTA_Datatype datatype, int *handlenr);
+
+/** \brief Print overview of all COSTA handles 
+ *
+   *  \return error status: CTA_OK
+ */
+CTAEXPORT int CTA_Handle_Printall();
+
+/** \brief Counts all handles sorts them by type and prints overview.
+ * This function can be usefull for detecting memory leaks that are 
+ * the result of not freeing instances of COSTA objects costa objects.
+ *
+ *  \param location I String to indicate location of call
+ *  \return error status: CTA_OK
+ */
+CTAEXPORT int CTA_Handle_PrintInfo(const char *location);
+
+/** \brief Get the reference count of the handle
+ *
+ *  \param handle   I COSTA handle
+ *  \param refCount O reference count of handle
+ *  \return error status: CTA_OK
+ *
+ */
+CTAEXPORT int CTA_Handle_GetRefCount(const CTA_Handle handle, int *refCount);
+
+/** \brief Increase the reference count of the handle
+ *
+ *  \param handle   I COSTA handle
+ *  \return error status: CTA_OK
+ *
+ */ 
+CTAEXPORT int CTA_Handle_IncRefCount(const CTA_Handle handle);
+
+/** \brief Decrease the reference count of the handle
+ *
+ *  \param handle   I COSTA handle
+ *  \return error status: CTA_OK
+ *
+ */   
+CTAEXPORT int CTA_Handle_DecrRefCount(const CTA_Handle handle);
+   
+
+#ifdef __cplusplus
+}
+#endif
+#endif
+
diff --git a/costa/native/cta/include/cta_initialise.h b/costa/native/cta/include/cta_initialise.h
new file mode 100644
index 000000000..e85ff4408
--- /dev/null
+++ b/costa/native/cta/include/cta_initialise.h
@@ -0,0 +1,46 @@
+/*
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+/**
+\file  cta_initialise.h
+\brief CTA_Initialise for COSTA initialisation.
+*/
+
+#ifndef CTA_INITIALISE_H
+#define CTA_INITIALISE_H
+#include "cta_system.h"
+#include "cta_vector.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+/** \brief Initialize the COSTA environment
+ *  \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Core_Initialise();
+
+/** \brief Finalise the COSTA environment
+ *  \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Core_Finalise();
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/costa/native/cta/include/cta_interface.h b/costa/native/cta/include/cta_interface.h
new file mode 100644
index 000000000..ffbee4fff
--- /dev/null
+++ b/costa/native/cta/include/cta_interface.h
@@ -0,0 +1,143 @@
+/*
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+/**
+\file  cta_interface.h
+\brief Description of the COSTA interface componennt used for
+communication with user implementation classes.
+
+To create an object of your user implementation go through the following steps:
+
+1: Create a new class (=implementation) of a COSTA component by calling the
+appropriate DefineClass() function providing the name of the class, a matrix with
+CTA_Function objects describing the user functions.
+
+example:
+
+<code>void Usr_Object_initialise(CTA_MatClass *hmatcl){
+   CTA_Intf hintf=0;\n
+   CTA_Func h_func[CTA_MATRIX_NUMFUNC];\n
+//   CTA_Func h_create_size;\n
+//   CTA_Func h_Create_Init;\n
+   int ierr;\n
+
+   ierr=CTA_Func_Create(" ",&CTAI_Matrix_Create_Size, hintf,&h_func[CTA_MATRIX_CREATE_SIZE]);\n
+   ierr=CTA_Func_Create(" ",&CTAI_Matrix_Create_Init, hintf,&h_func[CTA_MATRIX_CREATE_INIT]);\n
+   ierr=CTA_Func_Create(" ",&CTAI_Matrix_getvals,     hintf,&h_func[CTA_MATRIX_GETVALS]    );\n
+   ierr=CTA_Func_Create(" ",&CTAI_Matrix_getval,      hintf,&h_func[CTA_MATRIX_GETVAL]     );\n
+   ierr=CTA_Func_Create(" ",&CTAI_Matrix_setcol,     hintf,&h_func[CTA_MATRIX_SETCOL]    );\n
+   ierr=CTA_Func_Create(" ",&CTAI_Matrix_setvals,     hintf,&h_func[CTA_MATRIX_SETVALS]    );\n
+   ierr=CTA_Func_Create(" ",&CTAI_Matrix_setval,      hintf,&h_func[CTA_MATRIX_SETVAL]     );\n
+   ierr=CTA_Func_Create(" ",&CTAI_Matrix_setconst,    hintf,&h_func[CTA_MATRIX_SETCONST]   );\n
+   ierr=CTA_Func_Create(" ",&CTAI_Matrix_Export,      hintf,&h_func[CTA_MATRIX_EXPORT]     );\n
+   ierr=CTA_Func_Create(" ",&CTAI_Matrix_Ger,         hintf,&h_func[CTA_MATRIX_GER]        );\n
+   ierr=CTA_Func_Create(" ",&CTAI_Matrix_Inv,         hintf,&h_func[CTA_MATRIX_INV]        );\n
+   ierr=CTA_Func_Create(" ",&CTAI_Matrix_Gemv,        hintf,&h_func[CTA_MATRIX_GEMV]        );\n
+   ierr=CTA_Func_Create(" ",&CTAI_Matrix_Gemm,        hintf,&h_func[CTA_MATRIX_GEMM]        );\n
+   ierr=CTA_Func_Create(" ",&CTAI_Matrix_Axpy,        hintf,&h_func[CTA_MATRIX_AXPY]        );\n
+   ierr=CTA_Func_Create(" ",&CTAI_Matrix_free,        hintf,&h_func[CTA_MATRIX_FREE]       );\n
+
+   ierr=CTA_Object_DefineClass("cta_matrix_blas",h_func,hmatcl);\n
+}</code>
+
+2: use the resulting class object for creating the user object in following way:
+
+<code>int CTA_Matrix_Create(classobject, ..., *userdata, Usr_Object *usrObject);</code>
+
+*/
+
+#ifndef CTA_INTERFACE_H
+#define CTA_INTERFACE_H
+#include "cta_system.h"
+#include "cta_datatypes.h"
+#include "cta_handles.h"
+
+CTAEXPORT typedef int CTA_Intf;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** \brief Create a new interface
+ *
+ * \param name     I  name of the new interface
+ * \param argtyp   I  list with the data types of arguments
+ * \param narg     I  number of arguments of interface
+ * \param hintf    O  receives the new COSTA interface handle
+ * \return error status: CTA_OK
+ */
+CTAEXPORT int CTA_Intf_Create(const char *name, const CTA_Datatype *argtyp,
+                    const int narg,CTA_Intf *hintf);
+
+/** \brief Free an interface
+ *
+ *  \note Freeing CTA_NULL is allowed.
+ *
+ *  \param hintf  IO  handle of interface, replaced by CTA_NULL on return
+ *  \return error status: CTA_OK, CTA_ILLEGAL_HANDLE, CTA_INCOMPATIBLE_HANDLE 
+ */
+CTAEXPORT int CTA_Intf_Free(CTA_Intf *hintf);
+
+/** \brief Match two interfaces for compatibility argumentlist-argumentlist
+ *
+ *  \note Two interfaces are compatible if all arguments have the same datatype,
+ *        CTA_VOID is compatible with all other arguments except for CTA_FSTRING
+ *
+ *  \param argtyp1  I  list with the data types of arguments of first interface
+ *  \param narg1    I  number of arguments in first interface
+ *  \param argtyp2  I  list with the data types of arguments of second interface
+ *  \param narg2    I  number of arguments in second interface
+ *  \param flag     O  receives TRUE if interfaces are compatible FALSE ortherwise
+ *  \return error status: CTA_OK
+ */
+CTAEXPORT int CTA_Intf_Match_aa(const CTA_Datatype *argtyp1, const int narg1, 
+                      const CTA_Datatype *argtyp2, const int narg2,
+                      BOOL *flag);
+
+/** \brief Match two interfaces for compatibility handle-argumentlist
+ *
+ *  \note Two interfaces are compatible if all arguments have the same datatype,
+ *        CTA_VOID is compatible with all other arguments except for CTA_FSTRING
+ *
+ *  \param hintf1   I  handle of first interface
+ *  \param argtyp2  I  list with the data types of arguments of second interface
+ *  \param narg2    I  number of arguments in second interface
+ *  \param flag     O  receives TRUE if interfaces are compatible FALSE ortherwise
+ *  \return error status: CTA_OK, CTA_ILLEGAL_HANDLE, CTA_INCOMPATIBLE_HANDLE
+ */
+CTAEXPORT int CTA_Intf_Match_ha(const CTA_Intf hintf1,
+                      const CTA_Datatype *argtyp2, const int narg2, BOOL *flag);
+
+/** \brief Match two interfaces for compatibility handle-handle
+ *
+ *  \note two interfaces are compatible if all arguments have the same datatype,
+ *        CTA_VOID is compatible with all other arguments except for CTA_FSTRING
+ *
+ *  \param hintf1   I  handle of first interface
+ *  \param hintf2   I  handle of second interface
+ *  \param flag     O  receives TRUE if interfaces are compatible FALSE ortherwise
+ *  \return error status: CTA_OK, CTA_ILLEGAL_HANDLE, CTA_INCOMPATIBLE_HANDLE 
+ */
+CTAEXPORT int CTA_Intf_Match_hh(const CTA_Intf hintf1, const CTA_Intf hintf2, BOOL *flag);
+
+#ifdef __cplusplus
+}
+#endif
+#endif
+
diff --git a/costa/native/cta/include/cta_matrix.h b/costa/native/cta/include/cta_matrix.h
new file mode 100644
index 000000000..f08426460
--- /dev/null
+++ b/costa/native/cta/include/cta_matrix.h
@@ -0,0 +1,297 @@
+/*
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+/**
+\file  cta_matrix.h
+\brief The interface description of the COSTA matrix component. For user implementation see cta_usr_matrix.h.
+
+CTA_Matrix is the default class implementation for matrices.
+*/
+
+#ifndef CTA_MATRIX_H
+#define CTA_MATRIX_H
+#include "cta_system.h"
+#include "cta_handles.h"
+#include "cta_datatypes.h"
+#include "cta_functions.h"
+#include "cta_vector.h"
+
+/* Function Handle */
+typedef CTA_Handle CTA_Matrix;
+typedef CTA_Handle CTA_MatClass;
+
+/* parameters for different functions */
+#define CTA_MATRIX_CREATE_SIZE ( 1)
+#define CTA_MATRIX_CREATE_INIT ( 2)
+#define I_CTA_MATRIX_GETVALS     ( 4) 
+#define I_CTA_MATRIX_GETVAL      ( 5) 
+#define I_CTA_MATRIX_SETCOL      ( 6)
+#define I_CTA_MATRIX_SETVALS     ( 7)
+#define I_CTA_MATRIX_SETVAL      ( 8)
+#define I_CTA_MATRIX_SETCONST    ( 9) 
+#define I_CTA_MATRIX_FREE        (10)
+#define I_CTA_MATRIX_EXPORT      (11)
+#define I_CTA_MATRIX_GER         (12)
+#define I_CTA_MATRIX_INV         (13)
+#define I_CTA_MATRIX_GEMV        (14)
+#define I_CTA_MATRIX_GEMM        (15)
+#define I_CTA_MATRIX_AXPY        (16)
+#define CTA_MATRIX_NUMFUNC     (17)
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** \brief Create a new class (=implementation) of a COSTA matrix component.
+ *
+ * \param name     I  name of the new matrix class
+ * \param h_func   I  COSTA function handles for functions that implement class,
+ *                    missing functions must have value CTA_NULL
+ * \param hmatcl   O  receives handle of new matrix class
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Matrix_DefineClass(const char *name,
+                           const CTA_Func h_func[CTA_MATRIX_NUMFUNC],
+                           CTA_MatClass *hmatcl);
+
+/** \brief Duplicate a matrix instance.
+ *
+ * \note Only size, data type and class type are duplicated, the values are not
+ *       copied.
+ *
+ * \param hmat1 I  handle of matrix to be duplicated
+ * \param hmat2 O  receives handle of duplicate matrix, empty handle before calling
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Matrix_Duplicate(CTA_Matrix hmat1, CTA_Matrix *hmat2);
+
+/** \brief Create a new matrix.
+ *
+ * \note
+ *
+ * \param hmatcl   I  matrix class of new matrix
+ * \param m        I  number of rows
+ * \param n        I  number of columns
+ * \param datatype I  datatype of elements in matrix
+ * \param userdata IO userdata for creation (depends on class)
+ * \param hmat     O  receives handle of new matrix
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Matrix_Create(CTA_MatClass hmatcl, const int m, const int n,
+                      CTA_Datatype datatype, CTA_Handle userdata, CTA_Matrix *hmat);
+
+/* *\brief Get size of matrix.
+ *
+ * \note
+ *
+ * \param hmat  I  handle of matrix
+ * \param m     O  receives number of rows
+ * \param n     O  receives number of columns
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Matrix_GetSize(CTA_Matrix hmat, int *m, int *n);
+
+/** \brief Get datatype of matrix
+ *
+ * \note 
+ *
+ * \param hmat     I  handle of matrix
+ * \param datatype O  receives data type of matrix
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Matrix_GetDatatype(CTA_Matrix hmat, CTA_Datatype *datatype);
+
+/** \brief Get copy of all values in matrix.
+ *
+ * \note The elements in the matrix are returned column-wise
+ *       (FORTRAN matrix representation)
+ *
+ * \param hmat     I  handle of matrix
+ * \param vals     O  copy of values in matrix
+ * \param m        I  number of rows of vals (must be the same as for the matrix)
+ * \param n        I  number of columns of vals (must be the same as for the matrix)
+ * \param datatype I  data type of *vals, must be the same as data type of matrix elements
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Matrix_GetVals(CTA_Matrix hmat, void *vals, int  m, int  n,
+                       CTA_Datatype datatype);
+
+/** \brief Get copy of single value in the matrix.
+ *
+ * \note Counting of the indices starts from 1.
+ * \param hmat     I  handle of matrix
+ * \param val      O  receives copy of value in matrix
+ * \param m        I  row index of value to be copied
+ * \param n        I  column index of value to be copied
+ * \param datatype I  data type of *val, must be the same as data type of matrix elements
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Matrix_GetVal (CTA_Matrix hmat, void *val , int  m, int  n,
+                       CTA_Datatype datatype);
+
+/** \brief Set whole matrix to one single value.
+ *
+ * \note
+ * \param hmat     IO handle of matrix
+ * \param val      I  value that must be set
+ * \param datatype I  data type of *val, must be the same as data type of matrix elements
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Matrix_SetConstant(CTA_Matrix hmat, void *val, CTA_Datatype datatype);
+
+/** \brief Set values of a single column of the matrix.
+ *
+ * \note Counting of the indices starts from 1.
+ * \param hmat     IO handle of matrix
+ * \param n        I  index of matrix column to set
+ * \param hvec     I  handle of COSTA vector the values have to be set to, length must be the same as number of rows of matrix
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Matrix_SetCol(CTA_Matrix hmat, int  n, CTA_Vector hvec);
+
+/** \brief Set all values of the matrix.
+ *
+ * \note The elements in vals should be column-wise
+ *       (FORTRAN matrix representation)
+ * \param hmat     IO handle of matrix
+ * \param vals     I  copy of values in matrix
+ * \param m        I  number of rows of vals (must be the same as for the matrix)
+ * \param n        I  number of columns of vals (must be the same as for the matrix)
+ * \param datatype I  data type of vals
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Matrix_SetVals(CTA_Matrix hmat, void *vals, int  m, int  n,
+                       CTA_Datatype datatype);
+
+/** \brief Set a single value in the matrix.
+ *
+ * \param hmat     IO handle of matrix
+ * \param val      I  value to be set at position (m,n)
+ * \param m        I  row index
+ * \param n        I  column index
+ * \param datatype I  data type of *val, must be the same as data type of matrix elements
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Matrix_SetVal(CTA_Matrix hmat, void *val, int  m, int  n,
+                       CTA_Datatype datatype);
+
+/** \brief Export a matrix.
+ *
+ * \note CTA_DEFAULT_MATRIX supports exporting to:\n
+ *       file (usedoc is handle of COSTA file)\n
+ *
+ * \param hmat     I  handle of matrix
+ * \param usedoc   I  configuration of output
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Matrix_Export(CTA_Matrix hmat, CTA_Handle usedoc);
+
+
+/** \brief Perform a rank 1 operation A:=alpha*x*y'+A
+ *
+ * \param hmat     IO handle of matrix A
+ * \param alpha    I  scalar
+ * \param vx       I  vector x
+ * \param vy       I  vector y
+ * \return error status: CTA_OK if successful
+ * \note it is allowed that are the same object (*vx==*vy)
+ */
+CTAEXPORT int CTA_Matrix_Ger(CTA_Matrix hmat, double alpha, CTA_Vector vx,
+                      CTA_Vector vy);
+
+
+/** \brief Compute inverse of a square matrix A:=inv(A)
+ *
+ * \param hmat     IO handle of matrix A
+ * \return error status: CTA_OK if successful
+ *
+ */
+CTAEXPORT int CTA_Matrix_Inv(CTA_Matrix hmat);
+
+/** \brief Perform the matrix multiplication y:=alpha*OP(A)*x+beta*y
+ *   where op(X)=X, X^T
+ *
+ * \param hmat     I  handle of matrix (A from the equation above)
+ * \param trans    I  transpose flag CTA_TRUE/CTA_FALSE for matrix A
+ * \param alpha    I  scalar
+ * \param vx       I  vector x
+ * \param beta     I  scalar
+ * \param vy       IO vector y
+ * \return error status: CTA_OK if successful
+ * \note it is allowed that vectors are the same object (*vx==*vy)
+ */
+CTAEXPORT int CTA_Matrix_Gemv(CTA_Matrix hmat, int trans, double alpha,
+                              CTA_Vector vx, double beta,  CTA_Vector vy);
+
+/** \brief Perform the matrix multication C:=alpha*op(A)*op(B)+beta*C
+    where op(X)=X, X^T
+ *
+ *  \param mC     IO handle of matrix C
+ *  \param transa I  transpose flag CTA_TRUE/CTA_FALSE for matrix A
+ *  \param transb I  transpose flag CTA_TRUE/CTA_FALSE for matrix A
+ *  \param alpha  I  scalar
+ *  \param mA     I  handle of matrix A
+ *  \param mB     I  handle of matrix B
+ *  \param beta   I  scalar
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Matrix_Gemm(CTA_Matrix mC, int transa, int transb, double alpha,
+                    CTA_Matrix mA, CTA_Matrix mB, double beta);
+
+/** \brief Perform the matrix addition Y:=alpha*X+Y
+ *
+ *  \param mY     IO handle of matrix Y
+ *  \param alpha  I  scalar
+ *  \param mX     I  handle of matrix X
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Matrix_Axpy(CTA_Matrix mY, double alpha, CTA_Matrix mX);
+
+
+/** \brief Computes the eigenvalues and optionally the eigenvectors
+ *         of a general matrix A
+ *
+ *  The computed eigenvectors are normalized to have Euclidean norm
+ *  equal to 1 and largest component real.
+ *
+ *  \param A     I Matrix A
+ *  \param eigvals  O Vector with eigenvalues
+ *  \param eigvecs  O Matrix with eigenvalues. The eigenvectors are
+ *                    not computed when  eigvecs in CTA_NULL on entry
+ * \return error status: CTA_OK if successful
+ * \note the eigenvalues can be complex. Since COSTA does not yet support
+ *       complex vectors, only the real part of the eigenvalues is
+ *       returned.
+ */
+CTAEXPORT int CTA_Matrix_EigVals(CTA_Matrix A, CTA_Vector eigvals, CTA_Matrix eigvecs);
+
+
+/** \brief Free the matrix object
+ *
+ * \Note hmat=CTA_NULL is allowed
+ *
+ * \param hmat     IO handle of matrix, replaced by CTA_NULL on return.
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Matrix_Free(CTA_Matrix *hmat);
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/costa/native/cta/include/cta_matrix_blas.h b/costa/native/cta/include/cta_matrix_blas.h
new file mode 100644
index 000000000..6acf74c84
--- /dev/null
+++ b/costa/native/cta/include/cta_matrix_blas.h
@@ -0,0 +1,294 @@
+/*
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+
+
+#ifndef CTA_MATRIX_BLAS_H
+#define CTA_MATRIX_BLAS_H
+
+#include "cta_f77blas.h"
+#include "cta_f77lapack.h"
+#include "cta_datatypes.h"
+#include "cta_handles.h"
+#include "cta_vector.h"
+#include "cta_matrix.h"
+#include "cta_functions.h"
+
+
+/**
+\file  cta_matrix_blas.h
+\brief Interface description of the COSTA BLAS-matrix component (a standard implementation class of CTA_Matrix) that can be used for standard BLAS operations.
+
+<i>BLAS matrix object data:</i>
+
+\code
+typedef struct {
+CTA_Datatype datatype;
+void *values;
+int m;
+int n;
+int *perm;
+BOOL inverse;
+} CTAI_Matrix_blas;
+\endcode
+\n
+*/
+
+
+
+
+typedef struct {
+CTA_Datatype datatype;
+void *values;
+int m;
+int n;
+int *perm;
+BOOL inverse;
+} CTAI_Matrix_blas;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** \brief Define the BLAS matrix class. Use the resulting class object when calling CTA_Matrix_Create().
+ *
+ * \param hmatcl    IO receives class BLAS matrix class object, must be empty before calling
+ * \return no return value
+ */
+CTANOEXPORT void CTA_Matrix_blas_initialise(CTA_MatClass *hmatcl);
+
+
+/** \brief Implementation that forms part of the create process.
+ * 
+ * Gives the memory size of a new BLAS matrix object.
+ *
+ * \param m        I  dimension, m (rows)
+ * \param n        I  dimension, n (columns)
+ * \param datatype I  data type of matrix elements
+ * \param userdata IO user data (not being used)
+ * \param retval   O  receives return value
+ * \param memsize  O  receives the number of bytes which are necessary to store one
+                      BLAS matrix class, with a pointer to the contents (data), but without the
+                      contents themselves
+ * \return no return value
+ */
+void CTAI_Matrix_Create_Size(int *m, int *n, CTA_Datatype *datatype,
+                             CTA_Handle userdata, int *retval, int *memsize);
+                             
+/** \brief Implementation that forms part of the create process.
+ *
+ * Prepares a BLAS matrix object for use.
+ *
+ * \param x          IO pointer to object data of BLAS matrix
+ * \param m          I  dimension, m (rows)
+ * \param n          I  dimension, n (columns)
+ * \param datatype   I  data type of the matrix elements
+ * \param userdata   IO user data (not being used)
+ * \param retval     O  receives return value
+ * \return no return value
+ */
+void CTAI_Matrix_Create_Init( CTAI_Matrix_blas *x, int *m, int *n,
+                              CTA_Datatype *datatype, CTA_Handle userdata, int *retval);
+
+
+/** \brief Get all values from the BLAS matrix.
+ *
+ * \note At index CTA_MATRIX_GETVALS in the function list of the class descriptor.
+ *
+ * \param x          I  pointer to object data of BLAS matrix
+ * \param vals       O  receives values; must exist before calling
+ * \param m          I  number of rows of *vals, must be the same as number of rows in matrix
+ * \param n          I  number of columns of *vals, must be the same as number of columns in matrix
+ * \param datatype   I  data type of value; must be the same as data type of matrix elements
+ * \param userdata   IO user data (not being used)
+ * \param retval     O  receives return value
+ * \return no return value
+ */
+void CTAI_Matrix_getvals( CTAI_Matrix_blas *x, void *vals, int *m,
+                          int *n, CTA_Datatype *datatype, int *retval);
+
+
+/** \brief Get a single value from the BLAS matrix.
+ *
+ * \param x          I  pointer to object data of BLAS matrix
+ * \param val        O  receives value; must exist before calling
+ * \param m          I  row index of matrix value to get
+ * \param n          I  column index of matrix value to get
+ * \param datatype   I  data type of val; must be the same as data type of matrix elements
+ * \param retval     O  receives return value
+ * \return no return value
+ */
+void CTAI_Matrix_getval ( CTAI_Matrix_blas *x, void *val, int *m,
+                          int *n, CTA_Datatype *datatype, int *retval);
+
+
+/** \brief Set column of BLAS matrix.
+ *
+ * \param x          IO pointer to object data of BLAS matrix
+ * \param n          I  index of column to set
+ * \param hvec       I  handle of sending column; dimensions must be compatible
+ * \param retval     O  receives return value
+ * \return no return value
+ */
+void CTAI_Matrix_setcol( CTAI_Matrix_blas *x, int *n, CTA_Vector *hvec, int *retval);
+
+
+/** \brief Set all values of BLAS matrix.
+ *
+ * \param x          IO pointer to object data of BLAS matrix
+ * \param vals       I  values to set to
+ * \param m          I  number of rows of *vals, must be the same as number of rows in matrix
+ * \param n          I  number of columns of *vals, must be the same as number of columns in matrix
+ * \param datatype   I  data type of value; must be the same as data type of matrix elements
+ * \param retval     O  receives return value
+ * \return no return value
+ */
+void CTAI_Matrix_setvals( CTAI_Matrix_blas *x, void *vals, int *m, int *n,
+                          CTA_Datatype *datatype, int *retval);
+
+
+/** \brief Set a single value of BLAS matrix.
+ *
+ * \param x          IO pointer to object data of BLAS matrix
+ * \param val        I  value to set to
+ * \param m          I  row index of matrix value to set
+ * \param n          I  column index of matrix value to set
+ * \param datatype   I  data type of val; must be the same as data type of matrix elements
+ * \param retval     O  receives return value
+ * \return no return value
+ */
+void CTAI_Matrix_setval ( CTAI_Matrix_blas *x, void *val , int *m, int *n,
+                          CTA_Datatype *datatype, int *retval);
+
+
+/** \brief Set all BLAS matrix elements to constant value.
+ *
+ * \note At index CTA_MATRIX_SETCONST in the function list of the class descriptor.
+ *
+ * \param x          IO pointer to object data of BLAS matrix
+ * \param val        I  constant value to set to
+ * \param datatype   I  data type of val; must be the same as data type of matrix elements
+ * \param retval     O  receives return value
+ * \return no return value
+ */
+void CTAI_Matrix_setconst( CTAI_Matrix_blas *x, void *val,
+                           CTA_Datatype *datatype, int *retval);
+
+
+/** \brief Export BLAS matrix.
+ *
+ * \note Export a BLAS matrix to file (userdata is a handle of COSTA file) or
+ *
+ * \param x          I  pointer to object data of BLAS matrix
+ * \param userdata   IO user data (see note)
+ * \param retval     O  receives return value
+ * \return no return value
+ */
+void CTAI_Matrix_Export(
+         CTAI_Matrix_blas *x,
+         CTA_Handle userdata,
+         int *retval);
+
+
+/** \brief Apply the BLAS operation GER: A=A+(alpha)x(y(T))
+ * 
+ * i.e. for matrix A, vectors x and y and scalar alpha
+ *
+ * \param A           IO pointer to object data of BLAS matrix A; must exist before calling
+ * \param alpha       I  scalar
+ * \param vx          I  handle of vector x
+ * \param vy          I  handle of vector y
+ * \param retval      O  receives return value
+ * \return no return value
+ */
+void CTAI_Matrix_Ger( CTAI_Matrix_blas *A, double *alpha, CTA_Vector *vx,
+                      CTA_Vector *vy, int *retval);
+
+
+/** \brief Invert BLAS matrix.
+ *
+ * \param A        IO pointer to object data of BLAS matrix
+ * \param retval   O  receives return value
+ * \return no return value
+ */
+void CTAI_Matrix_Inv( CTAI_Matrix_blas *A, int *retval);
+
+
+/** \brief Apply the BLAS operation GEMV: A=(alpha)Ax+(beta)y
+ *
+ * i.e. for matrix A, vectors x (optionally transposed) and y and scalars alpha and beta
+ *
+ * \param A        IO pointer to object data of BLAS matrix
+ * \param transa   I  transpose flag for matrix A (CTA_TRUE for transposing or CTA_FALSE otherwise)
+ * \param alpha    I  scalar
+ * \param vx       I  handle of vector x
+ * \param beta     I  scalar
+ * \param vy       I  handle of vector y
+ * \param retval   O  receives return value
+ * \return no return value
+ */
+void CTAI_Matrix_Gemv( CTAI_Matrix_blas *A, int *trans, double *alpha, CTA_Vector *vx,
+                      double *beta, CTA_Vector *vy, int *retval);
+
+
+/** \brief Apply the BLAS operation GEMM: C=(alpha)AB+(beta)C
+ *
+ * i.e. for matrices A,B (both optionally transposed) and C, scalars alpha and beta
+ *
+ * \param C        IO pointer to object data of BLAS matrix C
+ * \param transa   I  transpose flag for matrix A (CTA_TRUE for transposing or CTA_FALSE otherwise)
+ * \param transb   I  transpose flag for matrix B (CTA_TRUE for transposing or CTA_FALSE otherwise)
+ * \param alpha    I  scalar
+ * \param A        I  pointer to object data of BLAS matrix A
+ * \param B        I  pointer to object data of BLAS matrix B
+ * \param beta     I  scalar
+ * \param retval   O  receives return value
+ * \return no return value
+ */
+void CTAI_Matrix_Gemm( CTAI_Matrix_blas *C, int *transa, int *transb, double *alpha,
+         CTAI_Matrix_blas *A, CTAI_Matrix_blas *B, double *beta, int *retval);
+
+
+/** \brief Apply the BLAS operation AXPY: Y=Y+(alpha)X
+ *
+ * i.e. for matrices X and Y, scalar alpha
+ *
+ * \param y        IO pointer to object data of BLAS matrix y
+ * \param alpha    I  scalar
+ * \param x        I  pointer to object data of BLAS matrix x
+ * \param retval   O  receives return value
+ * \return no return value
+ */
+void CTAI_Matrix_Axpy( CTAI_Matrix_blas *y, double *alpha, CTAI_Matrix_blas *x, int *retval);
+
+
+/** \brief Free the BLAS matrix object data and associated resources.
+ *
+ * \param x          IO pointer to object data of BLAS matrix
+ * \param retval     O  receives return value
+ * \return no return value
+ */
+void CTAI_Matrix_free( CTAI_Matrix_blas *x, int *retval);
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif
diff --git a/costa/native/cta/include/cta_mem.h b/costa/native/cta/include/cta_mem.h
new file mode 100644
index 000000000..5c896297d
--- /dev/null
+++ b/costa/native/cta/include/cta_mem.h
@@ -0,0 +1,64 @@
+/*
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2011  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+/**
+\file  cta_mem.h
+\brief Memory management routines. These routines are equivalent to standard C
+       routines but allows us to do additional checking and debugging
+*/
+
+#ifndef CTA_MEM_H
+#define CTA_MEM_H
+
+#include <stdlib.h> 
+#include "cta_system.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** \brief Free memory that has been allocated with CTA_Malloc or CTA_Realloc
+ *
+ * \param ptr     I  pointer to memory block to deallocate
+ */
+CTAEXPORT void CTA_Free(void *ptr);
+
+/** \brief Allocates size bytes of memory and returns a pointer
+ *         to the allocated memory.
+ *
+ * \param size  I  number of bytes to allocate
+ *
+ * \return pointer to allocated memory. NULL is returned when allocation fails
+ */
+CTANOF90 CTAEXPORT void* CTA_Malloc(size_t size);
+
+/** \brief Changes the size of the allocated memory block pointed by ptr
+ *         Values in ptr are copied to returned memoy block
+ * \param size  I  pointer to memoryblock of which the size is changed
+ * \param size  I  number of bytes of returned memory block
+ *
+ * \return pointer to allocated memory. NULL is returned when allocation fails
+ */
+CTANOF90 CTAEXPORT void* CTA_Realloc(void *ptr, size_t size);
+
+#ifdef __cplusplus
+}
+#endif
+#endif
+
diff --git a/costa/native/cta/include/cta_message.h b/costa/native/cta/include/cta_message.h
new file mode 100644
index 000000000..d1244998c
--- /dev/null
+++ b/costa/native/cta/include/cta_message.h
@@ -0,0 +1,100 @@
+/*
+$URL: https://repos.deltares.nl/repos/openda/openda_1/public/trunk/openda_core/native/src/cta/cta_interface.c $
+$Revision: 671 $, $Date: 2008-10-07 14:49:42 +0200 (di, 07 okt 2008) $
+
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+#ifndef CTA_MESSAGE_H
+#define CTA_MESSAGE_H
+#include "cta_functions.h"
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** \brief Write a message
+ *
+ * \param className I name of class that writes the message
+ * \param method    I name of the method that writes the message
+ * \param message   I message
+ * \param type      I type of message
+ *                    -'M':message
+ *                    -'I':Info
+ *                    -'W':Warning
+ *                    -'E':Error
+ *                    -'F':Fatal error  (will terminate application)
+ */
+CTAEXPORT void CTA_Message_Write(const char *className, const char *method, const char *message, char type);
+
+/** \brief Set an external writer for handling messages
+ *
+ *  An external writer must comply to the following C interface:
+ *
+ *  void my_writer(char *className, char *method, char *message, char type);
+ *
+ * \param externalWriter I External writer
+ *
+ *
+ *  \note Fortran writers are not yet supported
+ */
+CTAEXPORT void CTA_Message_SetExternalWriter(CTA_Func externalWriter);
+
+
+/** \brief Toggle message handler between quiet and normal mode.
+ *  in the quiet mode no messages are send (not even to external writers)
+ *
+ * \param setting  I set message handler in quiet mode CTA_TRUE/CTA_FALSE
+ *
+ */
+CTAEXPORT void CTA_Message_Quiet(int setting);
+
+
+/** \brief Macro for writing errors
+ *
+ * The name of the class and the method name must be set by the defines CLSNAM  and METHOD
+ *
+ * \param message I Error message
+ *
+ */
+#define CTA_WRITE_ERROR(message) CTA_Message_Write(CLASSNAME,METHOD,message,'E')
+
+/** \brief Macro for writing info messages
+ *
+ * The name of the class and the method name must be set by the defines CLSNAM  and METHOD
+ *
+ * \param message I Info message
+ *
+ */
+#define CTA_WRITE_INFO(message) CTA_Message_Write(CLASSNAME,METHOD,message,'I')
+
+/** \brief Macro for writing warnings
+ *
+ * The name of the class and the method name must be set by the defines CLSNAM  and METHOD
+ *
+ * \param message I Warning message
+ *
+ */
+#define CTA_WRITE_WARNING(message) CTA_Message_Write(CLASSNAME,METHOD,message,'W')
+
+#ifdef __cplusplus
+}
+#endif
+#endif
+
diff --git a/costa/native/cta/include/cta_metainfo.h b/costa/native/cta/include/cta_metainfo.h
new file mode 100644
index 000000000..9a66a03f3
--- /dev/null
+++ b/costa/native/cta/include/cta_metainfo.h
@@ -0,0 +1,191 @@
+/*
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+/**
+\file  cta_obsdescr.h
+\brief Interface description of the COSTA default observation descriptor component. For user implementation see cta_usr_obs_descr.h.
+
+CTA_ObsDescr is used for describing observations.
+*/
+
+#ifndef CTA_METAINFO_H
+#define CTA_METAINFO_H
+#include "cta_system.h"
+#include "cta_handles.h"
+#include "cta_datatypes.h"
+#include "cta_functions.h"
+#include "cta_string.h"
+#include "cta_vector.h"
+#include "cta_matrix.h"
+
+/* Function Handle */
+typedef CTA_Handle CTA_Grid;
+typedef CTA_Handle CTA_Metainfo;
+typedef CTA_Handle CTA_MetainfoClass;
+
+#include "cta_sobs.h"
+
+/* parameters for different functions */
+#define CTA_METAINFO_CREATE_SIZE         ( 1)
+#define CTA_METAINFO_CREATE_INIT         ( 2)
+#define I_CTA_METAINFO_FREE                ( 3)
+#define I_CTA_METAINFO_GET_REST            ( 4)
+#define I_CTA_METAINFO_GET_KEYS            ( 5)
+#define I_CTA_METAINFO_COUNT_OBSERVATIONS  ( 6)
+#define I_CTA_METAINFO_COUNT_PROPERTIES    ( 7)
+#define I_CTA_METAINFO_EXPORT              ( 8)
+#define CTA_METAINFO_NUMFUNC             ( 9)
+
+
+
+
+typedef struct {
+char name[CTA_STRLEN_TAG];
+  int type;
+  int nx, ny, nz,nsize;
+double x_origin, y_origin, z_origin;
+double dx,dy,dz;
+   char refdimp[10][80];              // possible reference to  tree node where coordinates are
+} CTAI_Gridm;
+
+/* EXPLANATION TYPE OF GRID:
+-99: undefined
+-3: 3D regular grid, only by reference 
+-2: 2D regular grid, only by reference 
+-1: 1D regular grid, only by reference 
+1: 1D regular grid
+2: 2D regular grid
+3: 3D regular grid
+10: curve (?)
+*/
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+
+/** \brief Create a new class (=implementation) of a COSTA observation description component.
+ *
+ * \param name       I  name of the new observation description class
+ * \param h_func     I  COSTA function handles for functions that implement class,
+ *                      missing functions must have value CTA_NULL
+ * \param hobsdscrcl O  receives handle of new observation description class
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Metainfo_DefineClass(const char *name,
+   const CTA_Func h_func[CTA_METAINFO_NUMFUNC],
+   CTA_MetainfoClass *hobsdscrcl);
+
+/** \brief Create a new observation description instance.
+ *
+ * \param hsobscl   I  class of new observation description
+ * \param usrdat    IO data of the stochastic observer for which
+                       a descriptor is to be created
+ * \param hobsdscr  O  receives handle of created observation description object
+ * \return error status: CTA_OK if successful
+ */
+ CTAEXPORT int CTA_Metainfo_Create( CTA_Metainfo *hobsdscr);
+// int CTA_Metainfo_Create( CTA_MetainfoClass hsobscl,
+//                          CTA_Handle usrdat, CTA_Metainfo *hobsdscr);
+
+///** \brief Check whether given stochastic observer corresponds to this observation description
+// *
+// * \param hobsdscr  I  handle of observation description
+// * \param hsobs     I  handle of stochastic observer
+// * \return error status: CTA_OK if successful
+// */
+//int CTA_Metainfo_Check_SObs( CTA_Metainfo hobsdscr, CTA_StochObs hsobs);
+
+/** \brief Get properties/values that correspond to a given key.
+ *
+ * \param hobsdscr   I  handle of observation description
+ * \param Key        I  key for which the value is asked
+ * \param Properties IO COSTA-vector that is to receive the values
+ * \param datatype   I  data type of elements in properties vector, must be the same as of queried properties
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Metainfo_SetUnit( CTA_Metainfo hobsdscr,  char* Key); 
+
+
+
+/** \brief set tag of metainfo
+
+*/
+CTANOEXPORT int CTA_Metainfo_GetDescription(CTA_Metainfo hobsdescr,  char **description);
+
+CTANOEXPORT int CTA_Metainfo_IsEqual( CTA_Metainfo hmeta_x, CTA_Metainfo hmeta_y );
+
+CTANOEXPORT int CTAI_Grid_IsEqual( CTAI_Gridm *hgrid1, CTAI_Gridm *hgrid2 );
+
+CTANOEXPORT int CTA_Metainfo_Import(CTA_Metainfo minfo, CTA_Handle usrdata);
+
+
+CTANOEXPORT int CTA_Metainfo_SetTag(CTA_Metainfo hobsdescr,  char* tagname);
+
+
+CTANOEXPORT int CTA_Metainfo_Free( CTA_Metainfo *hobsdscr);
+
+
+CTANOEXPORT int CTA_Metainfo_SetDescription(CTA_Metainfo hobsdescr,  char* description);
+
+CTANOEXPORT int CTA_Metainfo_SetRest(CTA_Metainfo hobsdescr, int *rest);
+
+CTANOEXPORT int CTA_Metainfo_SetGrid(CTA_Metainfo hobsdescr, CTAI_Gridm *hgrid);
+
+CTANOEXPORT int CTA_Metainfo_GetGrid(CTA_Metainfo hobsdescr, CTAI_Gridm *hgrid);
+
+CTANOEXPORT int CTA_Metainfo_Copy(CTA_Metainfo hmeta_x, CTA_Metainfo hmeta_y);
+
+CTANOEXPORT int CTA_Metainfo_GetRest(CTA_Metainfo hobsdescr, int *rest);
+
+CTANOEXPORT int CTA_Metainfo_GetTag(CTA_Metainfo hobsdescr,  char *tagname);
+
+CTANOEXPORT int CTA_Metainfo_SetBelongsTo(CTA_Metainfo hobsdescr,  char* tagname);
+
+CTANOEXPORT int CTA_Metainfo_GetBelongsTo(CTA_Metainfo hobsdescr,  char *tagname);
+
+CTANOEXPORT int CTAI_Grid_Interpolate(CTAI_Gridm gridy, CTAI_Gridm gridx, CTA_Vector vecx, CTA_Vector vecx_to_y);
+
+CTANOEXPORT int CTA_Metainfo_Export(CTA_Metainfo minfo, CTA_Handle usrdata);
+
+CTANOEXPORT int CTA_Metainfo_GetUnit(CTA_Metainfo hobsdescr,  char *nameofunit);
+
+CTANOEXPORT int CTAI_XML_CreateGrid(xmlNode *cur_node, CTAI_Gridm *thisgrid);
+
+
+/* tijdelijk */
+CTANOF90 CTAEXPORT int CTA_Metainfo_setRegGrid(CTA_Metainfo hdescr, char *name, 
+                            int nx, int ny, int nz,
+                            double x_origin, double y_origin,
+                            double z_origin,
+                            double dx, double dy, double dz);
+
+CTANOF90 CTAEXPORT int CTA_Metainfo_getRegGrid(CTA_Metainfo hdescr, char *name, 
+                            int *nx, int *ny, int *nz,
+                            double *x_origin, double *y_origin,
+                            double *z_origin,
+                            double *dx, double *dy, double *dz);
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif
diff --git a/costa/native/cta/include/cta_method.h b/costa/native/cta/include/cta_method.h
new file mode 100644
index 000000000..612124114
--- /dev/null
+++ b/costa/native/cta/include/cta_method.h
@@ -0,0 +1,89 @@
+/*
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+/**
+\file  cta_method.h
+\brief The interface description of the COSTA method component (this component is still under construction). For user implementation see cta_usr_method.h.
+
+The COSTA method component is provided for calling data assimilation and calibration methods in a uniform way.
+*/
+
+#ifndef CTA_METHOD_H
+#define CTA_METHOD_H
+#include "cta_system.h"
+#include "cta_errors.h"
+#include "cta_handles.h"
+#include "cta_datatypes.h"
+#include "cta_functions.h"
+
+/* Method handle */
+typedef CTA_Handle CTA_Method;
+typedef CTA_Handle CTA_MethClass;
+
+/* parameters for different functions */
+#define CTA_METH_CREATE_SIZE  ( 1)
+#define CTA_METH_CREATE_INIT  ( 2)
+#define CTA_METH_RUN          ( 3)
+#define CTA_METH_FREE         ( 4)
+#define CTA_METH_NUMFUNC      ( 5)
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** \brief Create a new class (=implementation) of a COSTA method.
+ *
+ * \param name     I  name of the new method class
+ * \param h_func   I  COSTA function handles for functions that implement class,
+ *                    missing functions must have value CTA_NULL
+ * \param hmethcl  O  receives handle of new method class
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Meth_DefineClass(const char *name, const CTA_Func h_func[CTA_METH_NUMFUNC],
+                         CTA_MethClass *hmethcl);
+
+/** \brief Create an instance of a method.
+ *
+ * \param hmethcl   I  method class of new object
+ * \param userdata  IO user data for creation (depends on class)
+ * \param hmeth     O  receives handle of new method
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Meth_Create(CTA_Method hmethcl, CTA_Handle userdata, CTA_Method *hmeth);
+                                  
+/** \brief Run method.
+ * \param hmeth     I  handle of method
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Meth_Run(CTA_Method hmeth);
+
+/** \brief Free the method object.
+ *
+ * \Note hmeth=CTA_NULL is allowed
+ *
+ * \param hmeth  IO handle of method, replaced by CTA_NULL on return.
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Meth_Free(CTA_Method *hmeth);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/costa/native/cta/include/cta_modbuild_b3b.h b/costa/native/cta/include/cta_modbuild_b3b.h
new file mode 100644
index 000000000..772a5bef5
--- /dev/null
+++ b/costa/native/cta/include/cta_modbuild_b3b.h
@@ -0,0 +1,140 @@
+/*
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+/**
+\file  cta_modbuild_b3.h
+\brief Description of the COSTA blackbox component
+*/
+
+#ifndef CTA_MODBUILD_B3B_H
+#define CTA_MODBUILD_B3B_H
+
+#include <string.h>
+#ifdef WIN32
+#include <direct.h>
+#endif
+#include "cta.h"
+#include "cta_system.h"
+#include "f_cta_utils.h"
+#include "cta_datatypes.h"
+#include "cta_model_utilities.h" 
+#include "cta_handles.h"
+#include "cta_datetime.h"
+
+#define STRING_MAX		1024                 /* maximum length of string */
+
+#define B3B_ENCODING  ("utf-8")              /* XML encodiing */
+
+
+typedef struct B3B_Variable B3B_Variable;
+typedef B3B_Variable * B3B_VariablePntr;
+
+struct B3B_Variable {
+   char    *name;                            /* variable name */
+   double   value;                           /* variable value */
+};
+
+typedef struct B3B_Station B3B_Station;
+typedef B3B_Station * B3B_StationPntr;
+
+struct B3B_Station {
+   char             *name;                   /* station name */
+   char             *location;               /* station location */
+   char             *filename;               /* station filename */
+   int               nvariables;             /* station number of variables */
+   B3B_VariablePntr *variables;              /* station variables */
+};
+
+typedef struct B3B_Forcing B3B_Forcing;
+typedef B3B_Forcing * B3B_ForcingPntr;
+
+struct B3B_Forcing {
+   char    *name;                            /* forcing name */
+   char    *file;                            /* forcing file */
+   char    *element;                         /* forcing element */
+   char    *property;                        /* forcing property */
+   char    *item;                            /* forcing item */
+   double   value;                           /* forcing value */
+};
+
+typedef struct B3B_Parameter B3B_Parameter;
+typedef B3B_Parameter * B3B_ParameterPntr;
+
+struct B3B_Parameter {
+   char    *name;                            /* parameter name */
+   char    *file;                            /* parameter file */
+   char    *element;                         /* parameter element */
+   char    *property;                        /* parameter property */
+   char    *item;                            /* parameter item */
+   double   value;                           /* parameter value */
+};
+
+typedef struct B3B_StateExchange B3B_StateExchange;
+typedef B3B_StateExchange * B3B_StateExchangePntr;
+
+struct B3B_StateExchange {
+   char    *executable;                      /* name of blackbox executable */
+   char    *state2model;                     /* name of input file */
+   char    *model2state;                     /* name of output file */
+   char    *outputSteps;                     /* output steps 'last' or 'all' */
+};
+
+typedef struct B3B_Model B3B_Model;
+typedef B3B_Model * B3B_ModelPntr;
+
+struct B3B_Model{
+   char                   *type;             /* model type */
+   char                   *description;      /* model description */
+   double                  timestep;         /* simulation timestep in seconds */
+   int                     simulationNumber; /* simulation number */
+   char                   *workingdir;       /* working map of simulation */
+   char                   *templatedir;      /* map with simulation template */
+   char                   *simulationMap;    /* map with simulation results */
+   int                     stateLength;      /* length of the state vector */
+   int                     nstations;        /* number of stations */
+   B3B_StateExchangePntr   stateexchange;    /* pointer to the state exchange variables */
+   B3B_StationPntr        *stations;         /* pointer to the list of stations */
+   int                     nparameters;      /* number of parameters */
+   B3B_ParameterPntr      *parameters;       /* pointer to the list of parameters */
+   int                     nforcings;        /* number of forcings */
+   B3B_ForcingPntr        *forcings;         /* pointer to the list of forcings */
+};
+
+#define B3B_INDEX_THIS              ( 0) /* Handle of instance               */
+#define B3B_INDEX_TIME              ( 1) /* Time instance of model (state)   */
+#define B3B_INDEX_STATE             ( 2) /* State vector of model            */
+#define B3B_INDEX_FORCINGS          ( 3) /* Tree vector containing the forcings of the model */
+#define B3B_INDEX_PARAMETERS        ( 4) /* Tree vector of model parameters */
+#define B3B_INDEX_USERDATA          ( 5) /* Userdata */
+
+#define B3B_SIZE_DATABLK            ( 6)
+
+
+/** \brief Create the model class of the B3B Black-box builder
+ *
+ * \note This is not a user function. It is called at initialization of the
+ *       COSTA environment.
+ *
+ *  \param modelcls  O  receives handle of the B3B-modelbuilder class
+ */
+
+CTANOEXPORT void CTA_Modbuild_b3b_CreateClass(CTA_ModelClass *modelcls);
+
+#endif
+
diff --git a/costa/native/cta/include/cta_modbuild_b3b_utils.h b/costa/native/cta/include/cta_modbuild_b3b_utils.h
new file mode 100644
index 000000000..eeb016d3c
--- /dev/null
+++ b/costa/native/cta/include/cta_modbuild_b3b_utils.h
@@ -0,0 +1,170 @@
+/*
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2006  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+/**
+\file  cta_modbuild_b3b_utils_h.h
+\brief Description of the COSTA blackbox component utilisties
+*/
+
+#ifndef CTA_MODBUILD_B3B_UTILS_H
+#define CTA_MODBUILD_B3B_UTILS_H
+
+#include <libxml/encoding.h>
+#include <libxml/parser.h>
+#include <libxml/tree.h>
+#include <libxml/xinclude.h>
+#include <libxml/xmlwriter.h>
+
+#include "cta.h"
+#include "cta_xml.h"
+#include "cta_modbuild_b3b.h"
+
+/** \brief Copy a string
+ *
+ * \param name     I  string to be copied
+ *
+ * \return copy of the string
+ */
+CTANOEXPORT char *        B3B_CopyText(const char *in);
+
+/** \brief Reallocates a block of memory
+ *
+ * \param memblock  I  Pointer to previously allocated memory block
+ * \param size      I  New size in bytes.
+ *
+ * \return pointer to the allocated space
+ */
+CTANOEXPORT void *        B3B_Realloc(void *memblock, size_t size);
+
+/** \brief Allocates a block of memory
+ *
+ * \param nelem     I  Number of elements
+ * \param elsize    I  Element size
+ *
+ * \return pointer to the allocated space
+ */
+CTANOEXPORT void *        B3B_Malloc(size_t nelem, size_t elsize);
+   
+/** \brief Frees a block of memory that is currently allocated
+ *
+ * \param pntr     I  starting address of the memory block to be freed
+ *
+ * \return no return value
+ */
+CTANOEXPORT void          B3B_Free(void *pntr);
+
+/** \brief Frees a model that is currently allocated
+ *
+ * \param pntr     I  starting address of the model to be freed
+ *
+ * \return error status: CTA_OK, CTA_ILLEGAL_HANDLE
+ */
+CTANOEXPORT int           B3B_Free_Model(B3B_ModelPntr model);
+
+/** \brief Frees a stateexchange that is currently allocated
+ *
+ * \param pntr     I  starting address of the stateexchange to be freed
+ *
+ * \return no return value
+ */
+CTANOEXPORT void          B3B_Free_StateExchange(B3B_StateExchangePntr stateexchange);
+
+/** \brief Frees a parameter that is currently allocated
+ *
+ * \param pntr     I  starting address of the parameter to be freed
+ * \return no return value
+ */
+CTANOEXPORT void          B3B_Free_Parameter(B3B_ParameterPntr parameter);
+
+/** \brief Frees a forcing that is currently allocated
+ *
+ * \param pntr     I  starting address of the forcing to be freed
+ * \return no return value
+ */
+CTANOEXPORT void          B3B_Free_Forcing(B3B_ForcingPntr forcings);
+
+/** \brief Frees a station that is currently allocated
+ *
+ * \param pntr     I  starting address of the station to be freed
+ * \return no return value
+ */
+CTANOEXPORT void          B3B_Free_Station(B3B_StationPntr stations);
+
+/** \brief Convert COSTA state vector to B3B model
+ *
+ * \param model    I  Pointer naar de model data
+ * \param start    I  Simulatie starttijd in modified julian day
+ * \param stop     I  Simulatie stoptijd in modified julian day
+ * \return error status: CTA_OK, CTA_ILLEGAL_HANDLE
+ */
+CTANOEXPORT int           B3B_State2Model(B3B_ModelPntr model, double start, double stop);
+
+/** \brief Convert B3B model to COSTA state vector
+ *
+ * \param model    I  Pointer naar de model data
+ * \param start    I  Simulatie starttijd in modified julian day
+ * \param stop     I  Simulatie stoptijd in modified julian day
+ * \param sstate   O  Pointer naar state vector
+ * \return error status: CTA_OK, CTA_ILLEGAL_HANDLE
+ */
+CTANOEXPORT int           B3B_Model2State(B3B_ModelPntr model, double start, double stop, CTA_TreeVector *sstate);
+
+/** \brief Open model2state file 
+ *
+ * \param model     I  Pointer naar de model data
+ * \param htree     O  Pointer naar costa tree
+ * \return error status: CTA_OK, CTA_ILLEGAL_HANDLE
+ */
+CTANOEXPORT int           B3B_OpenModel2State(B3B_ModelPntr model, CTA_Tree *htree);
+
+/** \brief Get value of B3B model 
+ *
+ * \param model     I  Pointer naar de model data
+ * \param htree     I  Pointer naar costa tree
+ * \param station   I  Name of the station
+ * \param variable  I  Name of the variabele
+ * \param time      I  tijdstip in MJD
+ * \param value     O  waarde van de variabele op tijdstip van station
+ * \return error status: CTA_OK, CTA_ILLEGAL_HANDLE
+ */
+CTANOEXPORT int           B3B_Model2StateByStationVariableTime(B3B_ModelPntr model, CTA_Tree htree, char *station, char *variable, double time, double *value);
+
+/** \brief 
+ *
+ * \param model    I  Pointer naar de model data
+ * \return error status: CTA_OK, CTA_ILLEGAL_HANDLE
+ */
+CTANOEXPORT int           B3B_Run_Model(B3B_ModelPntr model);
+
+/** \brief 
+ *
+ * \param 
+ * \return 
+ */
+CTANOEXPORT B3B_ModelPntr B3B_Read_Model(int simulationNumber, CTA_Tree hmodelinput, CTA_Handle *sstate, CTA_Handle *sparam, CTA_Handle *sforc);
+
+/** \brief Check for a COSTA error
+ *
+ * \param ierr     I  COSTA error code
+ * \param msg      I  Error message
+ * \return no return value
+ */
+CTANOEXPORT void          B3B_CheckError(int ierr, const char *msg);
+
+#endif
diff --git a/costa/native/cta/include/cta_modbuild_par.h b/costa/native/cta/include/cta_modbuild_par.h
new file mode 100644
index 000000000..57c0c6d4b
--- /dev/null
+++ b/costa/native/cta/include/cta_modbuild_par.h
@@ -0,0 +1,56 @@
+/*
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+/**
+\file  cta_modbuild_par.h
+
+\brief Par-Modelbuilder.
+*/
+
+#ifndef CTA_MODBUILD_PAR_H
+#define CTA_MODBUILD_PAR_H
+#include "cta_handles.h"
+#include "cta_datatypes.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** \brief Create the model class of the Par-Modelbuilder and initilizes MPI
+ *
+ * \note This is not a user function. It is called at initialization of the
+ *       COSTA environment.
+ *
+ *  \param modelcls  O  receives handle of the SP-modelbuilder class
+ */
+CTAEXPORT void CTA_Modbuild_par_CreateClass(CTA_ModelClass *modelcls);
+
+/** \brief Stop the model class of the Par-Modelbuilder and finalize MPI
+ *
+ * \note This is not a user function. It is called at the finalization of the
+ *       COSTA environment.
+ *
+ */
+CTAEXPORT void CTA_Modbuild_par_Finalize();
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/costa/native/cta/include/cta_modbuild_sp.h b/costa/native/cta/include/cta_modbuild_sp.h
new file mode 100644
index 000000000..6fdb8edf4
--- /dev/null
+++ b/costa/native/cta/include/cta_modbuild_sp.h
@@ -0,0 +1,78 @@
+/*
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+/**
+\file  cta_modbuild_sp.h
+\brief SP-Modelbuilder.
+*/
+
+#ifndef CTA_MODBUILD_SP_H
+#define CTA_MODBUILD_SP_H
+#include "cta_handles.h"
+#include "cta_datatypes.h"
+#include "cta_metainfo.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** \brief Create the model class of the SP-Modelbuilder
+ *
+ * \note This is not a user function. It is called at initialization of the
+ *       COSTA environment.
+ *
+ *  \param modelcls  O  receives handle of the SP-modelbuilder class
+ */
+CTANOEXPORT void CTA_Modbuild_sp_CreateClass(CTA_ModelClass *modelcls);
+CTANOEXPORT void modbuild_sp_create_size(CTA_Handle userdata, int *memsize, int *ierr);
+CTANOEXPORT void modbuild_sp_create_init(CTA_Handle *this_obj, CTA_Handle *data, CTA_Handle *hinput, int *ierr);
+CTANOEXPORT void modbuild_sp_free(CTA_Handle *data ,int *ierr);
+CTANOEXPORT void modbuild_sp_compute(CTA_Handle *data, CTA_Time *timespan, int *ierr);
+CTANOEXPORT void modbuild_sp_setstate(CTA_Handle *data, CTA_TreeVector *state, int *ierr);
+CTANOEXPORT void modbuild_sp_getstate(CTA_Handle *data, CTA_TreeVector *state, int *ierr);
+CTANOEXPORT void modbuild_sp_axpymodel(CTA_Handle *datay, double *alpha, CTA_Handle *datax, int *ierr);
+CTANOEXPORT void modbuild_sp_axpystate(CTA_Handle *data, double *alpha, CTA_TreeVector *statex, int *ierr);
+CTANOEXPORT void modbuild_sp_setforc(CTA_Handle *data, CTA_TreeVector *state, int *ierr);
+CTANOEXPORT void modbuild_sp_getforc(CTA_Handle *data, CTA_TreeVector *state, int *ierr);
+CTANOEXPORT void modbuild_sp_axpyforc(CTA_Handle *data, CTA_Time *tspan, double *alpha,  CTA_TreeVector *statex, int *ierr);
+CTANOEXPORT void modbuild_sp_setparam(CTA_Handle *data, CTA_TreeVector *state, int *ierr);
+CTANOEXPORT void modbuild_sp_getparam(CTA_Handle *data, CTA_TreeVector *state, int *ierr);
+CTANOEXPORT void modbuild_sp_axpyparam(CTA_Handle *data, double *alpha, CTA_TreeVector *statex, int *ierr);
+CTANOEXPORT void modbuild_sp_getnoisecount(CTA_Handle *data, int* nnoise, int* ierr);
+CTANOEXPORT void modbuild_sp_getnoisecovar(CTA_Handle *data, CTA_TreeVector *colsvar, int* ierr);
+CTANOEXPORT void modbuild_sp_getobsvalues(CTA_Handle *data, CTA_Time *ttime, CTA_ObsDescr *hdescr, CTA_Vector *vval, int* ierr);
+CTANOEXPORT void modbuild_sp_getobsselect(CTA_Handle *data, CTA_Time *ttime, CTA_ObsDescr *hdescr, CTA_String *sselect, int* ierr);
+CTANOEXPORT void modbuild_sp_addnoise(CTA_Handle *data, CTA_Time *ttime, int* ierr);
+CTANOEXPORT void modbuild_sp_ar1_create(CTA_Handle *hinput, CTA_TreeVector *state, CTA_TreeVector *sbound, 
+                            CTA_TreeVector *sparam, int *nnoise, CTA_Time  *time0,
+                            CTA_String *snamnoise, CTA_Handle *husrdata, int *ierr);
+CTANOEXPORT void  modbuild_sp_ar1_covar(CTA_TreeVector *colsvar,int* nnoise, CTA_Handle husrdata,int* ierr);
+CTANOEXPORT void modbuild_sp_ar1_compute(CTA_Time *timesspan,CTA_TreeVector *state, CTA_TreeVector *saxpyforc,
+                             BOOL *baddnoise, CTA_TreeVector *sparam, CTA_Handle husrdata,
+                             int* ierr);
+CTANOEXPORT void modbuild_sp_ar1_getobsval(CTA_TreeVector *state, CTA_ObsDescr *hdescr, CTA_Vector *vval, CTA_Handle *husrdata, int *ierr);
+CTANOEXPORT int modbuild_sp_compute_covars(int nmodel, double *Qar1, double *p0, CTAI_Gridm hgrid);
+CTANOEXPORT void CTA_Modbuild_sp_CreateClass(CTA_ModelClass *modelcls);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
+
diff --git a/costa/native/cta/include/cta_model.h b/costa/native/cta/include/cta_model.h
new file mode 100644
index 000000000..0f9df3965
--- /dev/null
+++ b/costa/native/cta/include/cta_model.h
@@ -0,0 +1,587 @@
+/*
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+/**
+\file  cta_model.h
+\brief Interface description of the COSTA default model component. For user implementation see cta_usr_model.h.
+
+Functions for creating and working with models. CTA_Model is the default class implementation for models.
+*/
+
+#ifndef CTA_MODEL_H
+#define CTA_MODEL_H
+#include "cta_handles.h"
+#include "cta_datatypes.h"
+#include "cta_functions.h"
+#include "cta_treevector.h"
+#include "cta_time.h"
+#include "cta_obsdescr.h"
+
+/* Function Handle */
+typedef CTA_Handle CTA_Model;
+typedef CTA_Handle CTA_ModelClass;
+
+/* parameters for different user functions */
+#define I_CTA_MODEL_CREATE_SIZE      ( 0)
+#define I_CTA_MODEL_CREATE_INIT      ( 1)
+#define I_CTA_MODEL_FREE             ( 2)
+#define I_CTA_MODEL_COMPUTE          ( 3)
+#define I_CTA_MODEL_SET_STATE        ( 4)
+#define I_CTA_MODEL_GET_STATE        ( 5)
+#define CTA_MODEL_AXPY_STATE       ( 6)
+#define CTA_MODEL_AXPY_MODEL       ( 7)
+#define CTA_MODEL_SET_FORC         ( 8)
+#define CTA_MODEL_GET_FORC         ( 9)
+#define CTA_MODEL_AXPY_FORC        (10)
+#define CTA_MODEL_SET_PARAM        (11)
+#define CTA_MODEL_GET_PARAM        (12)
+#define CTA_MODEL_AXPY_PARAM       (13)
+#define CTA_MODEL_GET_STATESCALING (14)
+#define CTA_MODEL_GET_TIMEHORIZON  (15)
+#define CTA_MODEL_GET_CURRENTTIME  (16)
+
+/* stochastic functions */
+#define CTA_MODEL_GET_NOISE_COUNT  (17)
+#define CTA_MODEL_GET_NOISE_COVAR  (18)
+
+/* handling of observations */
+#define CTA_MODEL_GET_OBSVALUES      (19)
+#define CTA_MODEL_GET_OBSSELECT      (20)
+#define CTA_MODEL_ANNOUNCE_OBSVALUES (21)
+
+#define CTA_MODEL_ADD_NOISE          (22)
+#define I_CTA_MODEL_EXPORT           (23)
+#define I_CTA_MODEL_IMPORT           (24)
+
+/* Methods for adjoint models */
+#define CTA_MODEL_ADJ_SET_FORC       (25)
+#define CTA_MODEL_ADJ_COMPUTE        (26)
+#define CTA_MODEL_ADJ_PREPARE        (27)
+
+/* Localization */
+#define I_CTA_MODEL_GETOBSLOCALIZATION (28)
+
+/* Loading and restoring of state */
+#define CTA_MODEL_LOAD_PERSISTENTSTATE       (29)
+#define CTA_MODEL_SAVE_INTERNALSTATE         (30)
+#define CTA_MODEL_RESTORE_INTERNALSTATE      (31)
+#define CTA_MODEL_RELEASE_INTERNALSTATE      (32)
+#define CTA_MODEL_SAVE_PERSISTENTSTATE       (33)
+
+/* Domain and local analysis support */
+#define CTA_MODEL_GET_NUMDOMAINS              (34)
+#define CTA_MODEL_GET_OBSSELECTOR             (35)
+#define CTA_MODEL_GET_OBSLOCALIZATIONDOMAIN   (36)
+#define CTA_MODEL_GET_STATEDOMAIN             (37)
+#define CTA_MODEL_AXPY_STATEDOMAIN            (38)
+
+/* Number of model function handles */
+#define CTA_MODEL_NUMFUNC                   (39)
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** \brief Create a model instance
+ *
+ * \param hmodcl   I  model class of new instance
+ * \param userdata IO user data needed for creation (depends on modelclass)
+ * \param hmodel   O  receives handle of new model instance
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Model_Create(CTA_ModelClass hmodcl, CTA_Handle userdata, CTA_Model *hmodel);
+
+/** \brief Compute model for given timespan
+ *
+ * \param hmodel   IO handle of model instance
+ * \param htime    I  timespan for which to compute
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Model_Compute(CTA_Model hmodel, CTA_Time htime);
+
+/** \brief Add noise during during the given timespan at 
+ *        the Compute
+ *
+ * \note Noise is added in the compute-method
+ * \param hmodel   IO handle of model instance
+ * \param htime    I  timespan for which to compute adding noise
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Model_AddNoise(CTA_Model hmodel, CTA_Time htime);
+
+/** \brief Set the internal state of the model.
+ *
+ * \note A copy of the state is set
+ *
+ * \param hmodel   IO handle of model instance
+ * \param hstate   I  handle of new state
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Model_SetState(CTA_Model hmodel, CTA_TreeVector hstate);
+
+/** \brief Get a copy of the internal state.
+ *
+ * \note Optionally a tree-vector is created. In that case the caller of this
+ * method is responsible for freeing that tree-vector. The input state must be compatible
+ * (same size and or composition) as the models internal state.
+ * \note If *hstate == CTA_NULL a new object is created, user is responsible for freeing this object.
+ *
+ * \param hmodel   I  handle of model instance
+ * \param hstate   IO receives state of the model, *hstate can be CTA_NULL on calling (see note)
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Model_GetState(CTA_Model hmodel, CTA_TreeVector *hstate);
+
+/** \brief Perform axpy operation on the internal state.
+ *
+ * \note AXPY: y=alpha*x+y. y corresponds to the models
+ *       internal state and x can be a state vector or a model
+ 
+ * \param hmodel   IO handle of model instance (y)
+ * \param alpha    I  alpha
+ * \param hx       I  handle of x (state or model)
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Model_AxpyState(CTA_Model hmodel, double alpha, CTA_Handle hx);
+
+/** \brief Get element-wise scaling for model state
+ *
+ * The values in the state-vector are compared on "importance" in various
+ * algorithms like RRSQRT and COFFEE. The model state holds in general
+ * various quantities like concentration, velicity, location etc in
+ * arbitrary units. The scaling vector (that can be model state dependend)
+ * makes it possible to meaningfull compare elements in the state-vector
+ * for importance. Various methods are available like a transformation to
+ * enery.
+ *
+ * The scaling vector represents a diagonal scaling matrix but is
+ * respresented by a tree-vector. 
+ *
+ * \note The elementwise scaling is returned in the form of a tree-vector
+ *       with same build-up as the tree-vector of the model state. The scaling vector
+ *       is created whenever hscale==CTA_NULL on input, the caller is
+ *       responsible for freeing this object.
+ *
+ * \param hmodel   I  handle of model instance
+ * \param hscale   IO receives state scaling vector for the model state,
+                      *hstate can be CTA_NULL on calling (see note)
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Model_GetStateScaling(CTA_Model hmodel, CTA_TreeVector *hscale);
+
+
+/** \brief Set the models forcings.
+ *
+ * \note Set the forcings (constant) for the given timespan.
+ *       The model will fall back to its own forcings definition
+ *       outside the given timespan.
+ *
+ * \param hmodel   IO handle of model instance
+ * \param tspan    I  time span on which to set the forcing values
+ * \param hforc    I  handle of vector with new forcings
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Model_SetForc(CTA_Model hmodel, CTA_Time tspan, CTA_TreeVector hforc);
+
+/** \brief Get a copy of the values of the models forcings
+ *
+ * \note Optionally a tree-vector is created in that case the caller of this
+ * method is responsible for freeing that tree-vector. The input tree-vector
+ * must be compatible (same size and or composition) as the models
+ * internal tree-vector representing the forcings.
+ * If the forcings of the model are not constant for the given timespan
+ * the result is dependent on the model-implementation
+ * \note If *hforc == CTA_NULL a new object is created, user is responsible for freeing this object.
+ *
+ * \param hmodel   I  handle of model instance
+ * \param tspan    I  timespan for wich the given forcings are valid
+ * \param hforc    IO receives models forcings, *hforc can be CTA_NULL on calling (see note)
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Model_GetForc(CTA_Model hmodel, CTA_Time tspan, CTA_TreeVector *hforc);
+
+/** \brief Perform axpy operation on the models forcings.
+ *
+ * \note AXPY: y=alpha*x+y. y corresponds to the models
+ *       internal forcings.
+ *       The adjustment to the forcings (alpha*x) is only valid for the given 
+ *       time span. Note that the model will use y(t)+x for the given time span
+ *       where y(t) denotes the default forcings of the model.
+ *
+ * \param hmodel   IO handle of model instance (y)
+ * \param tspan    I  time span for wich the given forcings are valid
+ * \param alpha    I  scalar
+ * \param hx       I  handle of forcings tree-vector x
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Model_AxpyForc( CTA_Model hmodel, CTA_Time tspan, double alpha, CTA_TreeVector hx);
+
+/** \brief Set parameters of the model.
+ *
+ * \param hmodel   IO handle of model instance
+ * \param hparam   I  handle of parameters vector
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Model_SetParam(CTA_Model hmodel, CTA_TreeVector hparam);
+
+/** \brief Get a copy of the parameters of the model.
+ *
+ * \note Optionally a tree-vector is created in that case the caller of this
+ * method is responsible for freeing that tree-vector. The input tree-vector
+ * must be compatible (same size and or composition) as the models
+ * internal tree-vector representing the parameters.
+ * \note If *hforc == CTA_NULL a new object is created, user is responsible for freeing this object.
+ *
+ * \param hmodel   I  handle of model instance
+ * \param hparam   IO receives model forcings, *hforc can equal CTA_NULL on calling (see note)
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Model_GetParam(CTA_Model hmodel, CTA_TreeVector *hparam);
+
+/** \brief Perform axpy operation on the models parameters.
+ *
+ * \note AXPY: y=alpha*x+y where y corresponds to the models
+ *       internal parameters.
+ *
+ * \param hmodel   IO handle of model instance (y)
+ * \param alpha    I  alpha
+ * \param hx       I  handle of treevector of parameters (x)
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Model_AxpyParam( CTA_Model hmodel, double alpha, CTA_TreeVector hx);
+
+/** \brief Return the timehorizon on the model.
+ * The time horizon is the initial overal simulation span for which the mode is configured 
+ *
+ * \param hmodel   I handle of model instance
+ * \param tHorizon I time horizon of model
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Model_GetTimeHorizon( CTA_Model hmodel, CTA_Time tHorizon);
+
+/** \brief Return the current time of the model.
+ *
+ * \param hmodel   I handle of model instance
+ * \param tCurrent I time corresponding the the model state
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Model_GetCurrentTime( CTA_Model hmodel, CTA_Time tCurrent);
+
+/** \brief Get covariance matrix of noise parameters.
+ *
+ * \note ONLY for Stochastic models.
+ *       The covariance matrix is represented by an array
+ *       of tree-vectors (columns of the matrix)
+ *       optionally a tree-vector is created in that case the caller of this
+ *       method is responsible for freeing that tree-vector. The input tree-vector
+ *       must be compatible (same size and or composition) as the models
+ *       internal tree-vector.
+ * \note If hstmat[icol] == CTA_NULL a new object is created, user is responsible for freeing this object.
+ *
+ * \param hmodel   I  handle of model instance
+ * \param hstmat   O  receives array of tree-vectors, *hstmat can equal CTA_NULL on calling (see note)
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Model_GetNoiseCovar(CTA_Model hmodel, CTA_TreeVector *hstmat);
+
+/** \brief Get number of noise parameters: the number of columns of the noise covariance matrix.
+ *
+ * \note ONLY for Stochastic models. 
+ *
+ * \param hmodel   I  handle of model instance
+ * \param nnoise   O  receives number of noise parameters
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Model_GetNoiseCount(CTA_Model hmodel,int *nnoise);
+
+/** \brief Free model instance.
+ *
+ * \note ONLY for Stochastic models.
+ *
+ * \param hmodel   IO handle of model instance, replaced by CTA_NULL on return
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Model_Free(CTA_Model *hmodel);
+
+/** \brief Announce to the model what observations will be requested.
+ *
+ *  Before the compute method this method is used to announce what
+ *  obeservation will be requested after the CTA_Model_Compute using the
+ *  CTA_Model_GetObsvalues method.
+ *
+ *  For some simulation models it is more efficient to do a single simulation
+ *  (a single CTA_Model_Compute call) for a particular simulation span then
+ *  simulating the same simulation span in a number of steps (multiple
+ *  CTA_Model_Compute calls).
+ *  
+ *  This method can be used to announce for what observations the model
+ *  must provide a prediction in advance. This method must be called prior
+ *  to the CTA_Compute method and makes it possible to perform simulations
+ *  over a longer time interval without the need to interupt the computations
+ *  in order to get the predictions at intermediate time instances.
+ *
+ *  Notes on the behavior of the method:
+ *  - The observation description used in the first CTA_Model_GetObsValues
+ *    after the compute MUST be the same as the observation description
+ *    used in the announce.
+ *  - All observations that are announced MUST be in the timespan of the
+ *    following CTA_Model_Compute.
+ *  - The announced observations can only be retreved ONCE after the 
+ *    CTA_Model_Compute.
+ *  - A CTA_Model_SetState or CTA_Model_AxpyState will reset the announced 
+ *    CTA_Model_AnnounceObsValues administration (since stored predictions
+ *    might not be valid anymore)
+ *
+ * \param hmodel   I  handle of model instance
+ * \param hdescr   I  observation description component
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Model_AnnounceObsValues(CTA_Model hmodel, CTA_ObsDescr hdescr);
+
+/** \brief Get (interpolate) the models internal state to the
+ *   observations described as specified in the observation
+ *   description component.
+ *
+ * \note The interface supports a the time instance for time-interpolation.
+ *       It depends on the model whether and how this is supported.
+ *
+ * \param hmodel   I  handle of model instance
+ * \param htime    I  time instance (for checking and time-interpolation if
+ *                    supported by model)
+ * \param hdescr   I  observation description component
+ * \param values   O  receives values of the models internal state corresponding to
+ *                    observations as described in hdescr
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Model_GetObsValues(CTA_Model hmodel, CTA_Time htime,
+   CTA_ObsDescr hdescr, CTA_Vector values);
+
+
+/** \brief Get for each observation a localization scaling vector 
+ *  
+ * \param hmodel   I  handle of model instance
+ * \param hdescr   I  observation description for which we want localization scaling vectors
+ * \param distance I  characteristic distance
+ * \param locVecs  O  costa vector of handles to treevectors (scaling vectors). The treevectors
+ *                    are created when the indices are CTA_NULL on entry
+ *
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Model_GetObsLocalization( CTA_Model hmodel, 
+                   CTA_ObsDescr hdescr, double distance, CTA_Vector locVecs);
+
+
+/** \brief Get a query for the stochastic observer in order to
+ *  filter out the observations that can actually be provided by the model.
+ *
+ * \param hmodel   I  handle of model instance
+ * \param htime    I  time instance
+ * \param hdescr   I  observation description component
+ * \param sselect  O  receives a query to filter out the observations, must exist before calling
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Model_GetObsSelect(CTA_Model hmodel, CTA_Time htime,
+   CTA_ObsDescr hdescr, CTA_String sselect);
+
+/* \brief  Save the current state of the model in a treevector
+ *
+ * \param hmodel     I  handle of model instance
+ * \param instanceID O  ID to retrieve the CTA_Handle to the treevector
+ * \return error status: CTA_OK if successful
+*/
+CTAEXPORT int CTA_Model_SaveInternalState(CTA_Model hmodel, CTA_String *instanceID);
+
+/* \brief  Restore the state of the model to a previously saved state
+ *
+ * \param hmodel     I  handle of model instance
+ * \param instanceID O  ID to retrieve the CTA_Handle to the treevector with the previously saved state
+ * \return error status: CTA_OK if successful
+ */
+  CTAEXPORT int CTA_Model_RestoreInternalState(CTA_Model hmodel, CTA_String instanceID);
+
+/* \brief  Free memory of a previously saved state
+ *
+ * \param hmodel     I  handle of model instance
+ * \param instanceID I  ID to retrieve the CTA_Handle to the treevector to release
+ * \return error status: CTA_OK if successful
+ */
+  CTAEXPORT int CTA_Model_ReleaseInternalState(CTA_Model hmodel, CTA_String instanceID);
+
+/* \brief  Save a previously saved internal state to file
+ *
+ * \param hmodel     I  handle of model instance
+ * \param filename   I  name of file to be written
+ * \param instanceID I  ID to retrieve the CTA_Handle to the treevector to export to file
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Model_SavePersistentState(CTA_Model hmodel, CTA_String filename, CTA_String instanceID);
+
+/* \brief  Load a model state from file and save it as an internal state
+ *
+ * \param hmodel     I  handle of model instance
+ * \param filename   I  name of file to read state from
+ * \param instanceID O  ID to retrieve the CTA_Handle to the treevector that holds the state
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Model_LoadPersistentState(CTA_Model hmodel, CTA_String filename, CTA_String *instanceID);
+
+/** \brief Export the whole internal state of a model
+ *  This export function will export the whole state of the model such that
+ *  a so called "restart" start from this point yielding the same results.
+ *  There are no ruled on the format that is used to store the data.
+ *  Various extra otions are valid but a model will in most cases support an export 
+ *  to a file and to a COSTA pack object.
+ *  
+ *
+ * \param hmodel   I  handle of model instance
+ * \param hexport  I  target for export e.g. CTA_File or CTA_Pack
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Model_Export( CTA_Model hmodel, CTA_Handle hexport);
+
+/** \brief Import the whole internal state of a model
+ *  After the inport the models internal state is exactly the same as the point that
+ *  the export was created using CTA_Model_Export.
+ *  
+ *
+ * \param hmodel   I  handle of model instance
+ * \param himport  I  handle with data created by CTA_MODEL_Export e.g. CTA_File or CTA_Pack
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Model_Import( CTA_Model hmodel, CTA_Handle himport);
+
+/** \brief 
+ *  The adjoint model is mostly forced with scaled observation minus model residuals
+ *  This routines is used to set the forcing.
+ *
+ * \param hmodel   IO handle of model instance
+ * \param hdescr   I  handle to observation descriptions needed for interpretation of the forcing vector
+ * \param vforc    I  vector with the forcing values
+ *
+ * \return error status: CTA_OK if successful
+ */
+CTANOEXPORT int CTA_Model_AdjSetointForc(CTA_Model hmodel, CTA_ObsDescr hdescr, CTA_Vector vforc);
+
+/** \brief 
+ *  bla
+ *  HOMEWORK FOR JULIUS!
+ *
+ * \param hmodel   I  handle of model instance
+ *
+ * \return error status: CTA_OK if successful
+ */
+CTANOEXPORT int CTA_Model_AdjPrepare(CTA_Model hmodel, CTA_Time time);
+
+/** \brief 
+ *  bla
+ *  HOMEWORK FOR JULIUS!
+ *
+ * \param hmodel   I  handle of model instance
+ *
+ * \return error status: CTA_OK if successful
+ */
+CTANOEXPORT int CTA_Model_AdjCompute(CTA_Model hmodel, CTA_Time time);
+
+/*Initernal non-user routines */
+int CTAI_Model_PerformTimesteps(
+   CTA_Model hmodel,         /* handle of model */
+   CTA_Function *function,   /* Function that must be called */
+   CTA_Time htime,           /* timespan to compute */
+   int mindBarrier           /* Flag to activate barrier check fixed timestepping */
+);
+
+
+
+/** \brief Get the number of domains for local analysis
+ *  
+ * \param hmodel   I  handle of model instance
+ * \param distance I  characteristic distance
+ * \param ndomains O  number of domains
+ *
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Model_GetNumDomains(CTA_Model hmodel, double distance, int *ndomains);
+
+
+/** \brief Get selection of observations that are relevnet for assimilation in the given domain
+ *  
+ * \param hmodel    I  handle of model instance
+ * \param hdescr    I  observation description of all observations
+ * \param distance  I  characteristic distance
+ * \param idomain   I  domain number
+ * \param selection O  costa vector with the indices of the relevant observations (0 based)
+ *
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Model_GetObsSelector( CTA_Model hmodel, 
+                   CTA_ObsDescr hdescr, double distance, int idomain, CTA_Vector *selection);
+
+
+
+
+
+/** \brief Get for each observation a localization scaling vector for single domain
+ *  
+ * \param hmodel   I  handle of model instance
+ * \param hdescr   I  observation description for which we want localization scaling vectors
+ * \param distance I  characteristic distance
+ * \param idomain  I  domain number
+ * \param locVecs  O  costa vector of handles to treevectors (scaling vectors). The treevectors
+ *                    are created when the indices are CTA_NULL on entry
+ *
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Model_GetObsLocalizationDomain( CTA_Model hmodel, 
+                   CTA_ObsDescr hdescr, double distance, int idomain, CTA_Vector locVecs);
+
+
+/** \brief Get a copy of the internal state.
+ *
+ * \note Optionally a tree-vector is created. In that case the caller of this
+ * method is responsible for freeing that tree-vector. The input state must be compatible
+ * (same size and or composition) as the models internal state.
+ * \note If *hstate == CTA_NULL a new object is created, user is responsible for freeing this object.
+ *
+ * \param hmodel   I  handle of model instance
+ * \param idomain  I  domain number
+ * \param hstate   IO receives state of the model, *hstate can be CTA_NULL on calling (see note)
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Model_GetStateDomain(CTA_Model hmodel, int idomain, CTA_TreeVector *hstate);
+
+/** \brief Perform axpy operation on the internal state for a single domain 
+ *
+ * \note AXPY: y=alpha*x+y. y corresponds to the models
+ *       internal state and x can be a state vector or a model
+ 
+ * \param hmodel   IO handle of model instance (y)
+ * \param alpha    I  alpha
+ * \param idomain  I  domain number
+ * \param hx       I  handle of x (state or model)
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Model_AxpyStateDomain(CTA_Model hmodel, double alpha, int idomain, CTA_Handle hx);
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif
+
diff --git a/costa/native/cta/include/cta_model_factory.h b/costa/native/cta/include/cta_model_factory.h
new file mode 100644
index 000000000..c08005f37
--- /dev/null
+++ b/costa/native/cta/include/cta_model_factory.h
@@ -0,0 +1,84 @@
+/*
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+/**
+\file  cta_model.h
+\brief Interface description of the COSTA default model component. For user implementation see cta_usr_model.h.
+
+Functions for creating and working with models. CTA_Model is the default class implementation for models.
+*/
+
+#ifndef CTA_MODEL_FACTORY_H
+#define CTA_MODEL_FACTORY_H
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+ 
+ 
+/** \brief Create a COSTA modell class from XML input file
+*          (load from methods from dynamic load library)
+*
+*  \param fName  I  XML-configuration file
+*  \param modelClass O Class of new model Factory
+*  \return          Model class handle
+*/
+CTAEXPORT int CTA_ModelFactory_New(const char *fName, CTA_ModelClass* modelClass );
+
+
+/** \brief Create a COSTA modell class from XML
+*          (load from methods from dynamic load library). 
+*
+*  \param cur_node  I  Current XML node 
+*  \return             Handle to create or CTA_NULL in case of an error.
+*/
+CTAEXPORT CTA_ModelClass CTAI_XML_CreateModelClass(xmlNode *cur_node);
+
+/** \brief Define a new class (=implementation) of a COSTA model component
+ *
+ * \param name     I  name of the new model class
+ * \param h_func   I  COSTA function handles for functions that implement class,
+ *                    missing functions must have value CTA_NULL
+ * \param hmodcl   O  receives handle of new model class
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Model_DefineClass(const char *name, const CTA_Func h_func[CTA_MODEL_NUMFUNC], CTA_ModelClass *hmodcl);
+
+const char *CTAI_ModelFac_GetImplements(CTA_ModelClass hmodcl);
+
+int CTAI_ModelFac_GetParallelData(CTA_ModelClass hmodcl, char **implements, char **parallel_type,
+                char **spawn_workers, char **nproc, char **ntimes, char **dumProcs);
+
+
+int CTAI_ModelFac_GetBlock(CTA_ModelClass hmodcl, CTA_Model hmodel);
+int CTAI_ModelFac_GetBarrierData(CTA_ModelClass hmodcl, char **flag_barrier, double *t_step );
+int CTAI_ModelFac_AddModelInstance(CTA_ModelClass hmodcl, CTA_Model hmodel);
+int CTAI_ModelFac_SetBlock(CTA_ModelClass hmodcl, CTA_Model hmodel);
+int CTAI_ModelFac_TimeStepAllModels(CTA_ModelClass hmodcl, CTA_Function *function, double tstart, double tstop);
+
+
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif
+
diff --git a/costa/native/cta/include/cta_model_utilities.h b/costa/native/cta/include/cta_model_utilities.h
new file mode 100644
index 000000000..b8ace658a
--- /dev/null
+++ b/costa/native/cta/include/cta_model_utilities.h
@@ -0,0 +1,76 @@
+/*
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+/**
+\file  cta_model_utilities.h
+\brief A set of utilities that simplify the creation of COSTA model components.
+
+A set of utility routines. These routines are originally designed for
+the rapid development of COSTA model components. But their use is not limited
+to model components.
+
+\note This is just a set of utility routines it does not define any COSTA component
+
+*/
+
+
+#ifndef CTA_MODEL_UTILITIES_H
+#define CTA_MODEL_UTILITIES_H
+#include "cta_system.h"
+#include "cta_handles.h"
+#include "cta_datatypes.h"
+#include "cta_tree.h"
+#include "cta_string.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** \brief Handles model configuration tree or name of input xml-file
+ *
+ * When a new instance of the a model component is created it needs some
+ * input. The most convenient way is to provide the root to a COSTA-tree
+ * with configuration information the name of a XML-configuration file
+ * This routine will check whether the input handle is a COSTA-tree or
+ * the name of a XML-configuration file. A COSTA-tree containing the content
+ * of the XML-file is created when the input is the name of the XML-file.
+ *
+ * \note A COSTA-tree is created if the input is the name of an XML-file.
+ *       the handle of the input tree is returned otherwise. This means that 
+ *       depending on the input, a tree is created. The routine will return whether
+ *       the returned tree is created by this routine. The caller is responsible 
+ *       for freeing the tree when necessary. 
+ *
+ * \param hinput   I  handle of tree (CTA_Tree) with model input or string
+ *                    (CTA_Sring) with name of xml-input file
+ * \param tinput   O  receives handle of tree (CTA_Tree) with model input
+ * \param cleanup  O  receives flag (CTA_TRUE/CTA_FALSE) indicating whether tinput is
+ *                    created and must be freed by the caller
+ *
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Model_Util_InputTree(CTA_Handle hinput, CTA_Tree *tinput, int *cleanup);
+
+#ifdef __cplusplus
+}
+#endif
+#endif
+
+
+   
diff --git a/costa/native/cta/include/cta_modelcombiner.h b/costa/native/cta/include/cta_modelcombiner.h
new file mode 100644
index 000000000..6845bd730
--- /dev/null
+++ b/costa/native/cta/include/cta_modelcombiner.h
@@ -0,0 +1,45 @@
+/*
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+/**
+\file  cta_modelcombiner.h
+\brief ModelCombiner.
+*/
+
+#ifndef CTA_MODELCOMBINER_H
+#define CTA_MODELCOMBINER_H
+#include "cta_handles.h"
+#include "cta_datatypes.h"
+#include "cta_metainfo.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+/** \brief Create the model class of the ModelCombiner
+ *
+ * \note This is not a user function. It is called at initialization of the
+ *       COSTA environment.
+ *
+ *  \param modelcls  O  receives handle of the modelcombiner class
+ */
+CTANOEXPORT void CTA_Modelcombiner_CreateClass(CTA_ModelClass *modelcls);
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/costa/native/cta/include/cta_obsdescr.h b/costa/native/cta/include/cta_obsdescr.h
new file mode 100644
index 000000000..29e832c30
--- /dev/null
+++ b/costa/native/cta/include/cta_obsdescr.h
@@ -0,0 +1,179 @@
+/*
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+/**
+\file  cta_obsdescr.h
+\brief Interface description of the COSTA default observation descriptor component. For user implementation see cta_usr_obs_descr.h.
+
+CTA_ObsDescr is used for describing observations.
+*/
+
+#ifndef CTA_OBSDESCR_H
+#define CTA_OBSDESCR_H
+#include "cta_system.h"
+#include "cta_handles.h"
+#include "cta_datatypes.h"
+#include "cta_functions.h"
+#include "cta_string.h"
+#include "cta_vector.h"
+#include "cta_matrix.h"
+#include "cta_reltable.h"
+
+/* Function Handle */
+typedef CTA_Handle CTA_ObsDescr;
+typedef CTA_Handle CTA_ObsDescrClass;
+
+#include "cta_sobs.h"
+
+/* parameters for different functions */
+#define I_CTA_OBSDESCR_CREATE_SIZE         ( 1)
+#define I_CTA_OBSDESCR_CREATE_INIT         ( 2)
+#define I_CTA_OBSDESCR_FREE                ( 3)
+#define I_CTA_OBSDESCR_GET_PROPERTIES      ( 4)
+#define I_CTA_OBSDESCR_GET_KEYS            ( 5)
+#define I_CTA_OBSDESCR_COUNT_OBSERVATIONS  ( 6)
+#define I_CTA_OBSDESCR_COUNT_PROPERTIES    ( 7)
+#define I_CTA_OBSDESCR_EXPORT              ( 8)
+#define I_CTA_OBSDESCR_SELECTION           ( 9)
+#define I_CTA_OBSDESCR_NUMFUNC             (10)
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** \brief Create a new class (=implementation) of a COSTA observation description component.
+ *
+ * \param name       I  name of the new observation description class
+ * \param h_func     I  COSTA function handles for functions that implement class,
+ *                      missing functions must have value CTA_NULL
+ * \param hobsdscrcl O  receives handle of new observation description class
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_ObsDescr_DefineClass(const char *name,
+   const CTA_Func h_func[I_CTA_OBSDESCR_NUMFUNC],
+   CTA_ObsDescrClass *hobsdscrcl);
+
+
+/** \brief Create a new observation description instance.
+ *
+ * \param hsobscl   I  class of new observation description
+ * \param usrdat    IO data of the stochastic observer for which
+ *                     a descriptor is to be created
+ * \param hobsdscr  O  receives handle of created observation description
+ *                     object
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_ObsDescr_Create( CTA_ObsDescrClass hsobscl,
+                          CTA_Handle usrdat, CTA_ObsDescr *hobsdscr);
+
+/** \brief Create a new observation description that is subset of existing observation description.
+ *
+ * \param hobsdescr     I the observation description to create a subset
+ *                        from
+ * \param selection     I selection criterion (subset of SQL)
+ * \param reltab        O Relation table specifying the relation between
+ *                        the original and new observation description
+ *                        component. Note no relation table is created when 
+ *                        reltab==CTA_NULL on entry
+ * \param hobsdescrout  O the new COSTA-stochastic observer, empty before
+ *                        calling, caller responsible for freeing after use
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_ObsDescr_CreateSel( CTA_StochObs hobsdescr, CTA_String selection,   
+                            CTA_RelTable reltab, CTA_StochObs *hobsdescrout);
+
+/** \brief Create a new observation description that is subset of existing observation description.
+ *         All observations in the interval (t1,t2] (note t1 is not part
+ *         of the interval!) of the time span are selected.
+ *
+ * \param hobsdescr     I the observation description to create a subset
+ *                        from
+ * \param timespan      I  time span over which selection has to be made
+ * \param reltab        O Relation table specifying the relation between
+ *                        the original and new observation description
+ *                        component. Note no relation table is created when 
+ *                        reltab==CTA_NULL on enty
+  * \param hobsdescrout  O the new COSTA-stochastic observer, empty before
+ *                        calling, caller responsible for freeing after use
+ * \return error status: CTA_OK if successful
+ */
+
+CTAEXPORT int CTA_ObsDescr_CreateTimSel( CTA_ObsDescr hobsdescr, CTA_Time timespan,
+                               CTA_RelTable reltab, CTA_ObsDescr *hobsdescrout);
+
+/** \brief Get properties/values that correspond to a given key.
+ *
+ * \param hobsdscr   I  handle of observation description
+ * \param Key        I  key for which the value is asked
+ * \param Properties IO COSTA-vector that is to receive the values
+ * \param datatype   I  data type of elements in properties vector, must be the same as of queried properties
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_ObsDescr_Get_ValueProperties( CTA_ObsDescr hobsdscr, const char* Key,
+   CTA_Vector Properties, CTA_Datatype datatype); 
+
+/** \brief Get all keys names.
+ *
+ * \param hobsdscr   I  handle of observation description
+ * \param Keys       O  receives all keys (COSTA-string vector); must exist before calling and be large enough
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_ObsDescr_Get_PropertyKeys(CTA_ObsDescr hobsdscr, CTA_Vector Keys);
+
+/** \brief Get number of properties/keys.
+ *
+ * \param hobsdscr   I  handle of observation description
+ * \param nkeys      O  receives number of properties/keys
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_ObsDescr_Property_Count( CTA_ObsDescr hobsdscr, int *nkeys);
+
+/** \brief Get number of observations.
+ *
+ * \param hobsdscr   I  handle of observation description
+ * \param nobs       O  receives the number of observations
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_ObsDescr_Observation_Count( CTA_ObsDescr hobsdscr, int *nobs);
+
+/** \brief Export observation description.
+ *
+ * The default observation description CTA_DEFAULT_OBSDESC supports exporting to:\n
+ * TODO
+ *
+ * \param hdescr     I  handle of observation description
+ * \param usrdat     IO export configuration/medium
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_ObsDescr_Export(CTA_ObsDescr hdescr, CTA_Handle usrdat);
+
+/** \brief Free observation description object.
+ *
+ * \param hobsdscr  IO handle of observation description, replaced by CTA_NULL on return.
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_ObsDescr_Free(
+   CTA_ObsDescr *hobsdscr  /* Handle of observation description */
+   );
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/costa/native/cta/include/cta_obsdescr_sqlite3.h b/costa/native/cta/include/cta_obsdescr_sqlite3.h
new file mode 100644
index 000000000..1cd51afad
--- /dev/null
+++ b/costa/native/cta/include/cta_obsdescr_sqlite3.h
@@ -0,0 +1,158 @@
+/*
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+/**
+\file  cta_obsdescr_sqlite3.h
+\brief SQLite3 user implementation of the observation descriptor interface.
+*/
+
+#ifndef CTA_OBSDESCR_SQLITE3_H
+#define CTA_OBSDESCR_SQLITE3_H
+
+#include "cta_f77blas.h"
+#include "cta_datatypes.h"
+#include "cta_handles.h"
+#include "cta_functions.h"
+#include "cta_obsdescr.h"
+#include "cta_reltable.h"
+#include "cta_util_sqlite3.h"
+#include "cta_sobs_sqlite3.h"
+
+typedef struct {
+CTA_Handle myhandle;
+CTAI_util_sqlite3_database *database;
+char* condition;
+int n_keys;
+int nmeasr;
+CTA_String *Keys;
+} CTAI_ObsDescr_sqlite3;
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+/** \brief Initialization function for defining ObsDescr_SQLite3 class
+ *
+ * \return no return value
+ */
+CTANOEXPORT void CTA_ObsDescr_sqlite3_initialise();
+
+
+/** \brief Implementation function as part of the create process.
+ *
+ * \param memsize         O  receives the size of object data of new observation description
+ * \param retval          O  receives return value
+ * \return no return value
+ */
+void CTAI_ObsDescr_sqlite3_Create_Size( int *memsize,int *retval );
+
+
+/** \brief Implementation function as part of the create process.
+           Makes object ready to use.
+ *
+ * \note userdata must contain handle of COSTA stochastic observer belonging to observation description
+ *
+ * \param myhandle        I  Handle assigned by COSTA
+ * \param descr           IO pointer to object data of observation description
+ * \param usrdat          I  user data (see note)
+ * \param retval          O  receives the return value
+ * \return no return value
+ */
+void CTAI_ObsDescr_sqlite3_Create_Init(CTA_ObsDescr *myhandle, 
+                                       CTAI_ObsDescr_sqlite3 *descr,
+                                       CTA_Handle *usrdat, int *retval);
+
+
+/** \brief Count the number of properties in the observation descriptor object.
+ *
+ * \param descr           I  pointer to object data of observation description
+ * \param nkeys           O  receives number of property keys
+ * \param retval          O  receives return value
+ * \return no return value
+ */
+void CTAI_ObsDescr_sqlite3_Property_Count(CTAI_ObsDescr_sqlite3 *descr, 
+                            int* nkeys, int *retval);
+
+
+/** \brief Count the number of observations in the observation descriptor object.
+ *
+ * \param descr           I  pointer to object data of observation description
+ * \param nobs            O  receives the number of observations
+ * \param retval          O  receives return value
+ * \return no return value
+ */
+void CTAI_ObsDescr_sqlite3_Observation_Count(CTAI_ObsDescr_sqlite3 *descr, 
+                            int* nobs, int *retval);
+
+
+/** \brief Get keys of observation descriptor object.
+ *
+ * \param descr           I  pointer to object data of observation description
+ * \param Keys            I  handle of string vector that receives key descriptions, must exist before calling and have enough elements
+ * \param retval          O  receives return value
+ * \return no return value
+ */
+void CTAI_ObsDescr_sqlite3_Get_Keys(CTAI_ObsDescr_sqlite3 *descr, 
+                            CTA_Vector * Keys, int *retval);
+
+
+/** \brief Get properties of CTAI_ObsDescr object.
+ *
+ * \param descr           I  pointer to object data of observation description
+ * \param Key             I  pointer to C string describing key of which to get the properties
+ * \param Properties      I  handle of vector that receives properties, must exist before calling and have enough elements
+ * \param datatype        O  data type of vector elements
+ * \param retval          O  receives return value
+ * \return no return value
+ */
+void CTAI_ObsDescr_sqlite3_Get_Properties(CTAI_ObsDescr_sqlite3 *descr, 
+          const char *Key, CTA_Vector *Properties, CTA_Datatype *datatype,
+          int *retval);
+
+
+/** \brief Free the object data and associated resources.
+ *
+ * \param descr           IO object data of observation description to be freed
+ * \param retval          O  receives return value
+ * \return no return value
+ */
+void CTAI_ObsDescr_sqlite3_Free(CTAI_ObsDescr_sqlite3 *descr, 
+                                int *retval);
+
+/** \brief Create a new observation description that is subset of existing observation description.
+ *
+ * \param descr         I  object data of observation description to be freed
+ * \param selection     I  selection criterion (subset of SQL)
+ * \param reltab        O  Relation table specifying the relation between
+ *                         the original and new observation description
+ *                         component. Note no relation table is created when 
+ *                         reltab==CTA_NULL on entry
+ * \param my_handle_out I  Handle of new relation table 
+ * \param descrout      O  new observation description created subset
+ * \param retval        O  receives return value
+ */
+void CTAI_ObsDescr_sqlite3_CreateSel(CTAI_ObsDescr_sqlite3 *descr,
+          CTA_String *selection, CTA_RelTable *reltab, 
+          CTA_ObsDescr *myhandle_out,
+          CTAI_ObsDescr_sqlite3 *descrout, int *retval);
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/costa/native/cta/include/cta_obsdescr_table.h b/costa/native/cta/include/cta_obsdescr_table.h
new file mode 100644
index 000000000..0802afe2d
--- /dev/null
+++ b/costa/native/cta/include/cta_obsdescr_table.h
@@ -0,0 +1,132 @@
+/*
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+#include <string.h>
+#include <stdio.h>
+#include <math.h>
+#include <memory.h>
+#include "cta.h"
+
+typedef struct {
+int nkeys;
+int nmeasr;
+char **Keys;
+char ***Columns;
+} CTAI_ObsDescr_table;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** \brief Explanation
+ *
+ * \param memsize         O  The number of bytes which are necessary to 
+ *                           store one CTAI_SObs_sqlite3, with a 
+ *                           pointer to the contents (data), 
+ *                           but without the contents themselves.
+ * \param retval          O  Error code (see cta_datatypes.h for possible 
+ *                           error codes)
+ * \return no return value
+ */
+void CTAI_ObsDescr_table_Create_Size(int *memsize, int *retval);
+
+ /** \brief Allocate the memory which is necessary to 
+ *          store the data necessary for a sqlite3-observer
+ *
+ * \param myhandle        I  Handle assigned by COSTA
+ * \param descr           I  The sqlite3-observation description
+ *                           for which the memory must be
+ *                           allocated
+ * \param usrdat          I  User data
+ * \param retval          O  Error code 
+ * \return no return value
+ */
+void CTAI_ObsDescr_table_Create_Init(CTA_ObsDescr myhandle, CTAI_ObsDescr_table *descr,
+CTA_Handle this, CTA_Handle *usrdat, int *retval);
+
+/** \brief Explanation
+ *
+ * \param                 IO descr
+ * \param                 IO Keys
+ * \param retval          O  Error code 
+ * \return no return value
+ */
+void CTAI_ObsDescr_table_Get_Keys(CTAI_ObsDescr_table *descr, CTA_Vector *Keys, int *retval);
+
+/** \brief Explanation
+ *
+ * \param                 IO descr
+ * \param                 IO nkeys
+ * \param retval          O  Error code 
+ * \return no return value
+ */
+void CTAI_ObsDescr_table_Property_Count(CTAI_ObsDescr_table *descr, int *nkeys, int *retval);
+
+/** \brief Explanation
+ *
+ * \param descr           I 
+ * \param nobs            I 
+ * \param retval          O Error code 
+ * \return no return value
+ */
+void CTAI_ObsDescr_table_Observation_Count(CTAI_ObsDescr_table *descr, int *nobs, int *retval);
+
+/** \brief Explanation
+ *
+ * \param descr           I 
+ * \param Key             I 
+ * \param Properties      I 
+ * \param datatype        I 
+ * \param nobs            I 
+ * \param retval          O Error code 
+ * \return no return value
+ */
+void CTAI_ObsDescr_table_Get_Properties(CTAI_ObsDescr_table *descr, const char *Key, CTA_Vector *Properties,
+         CTA_Datatype *datatype,int *retval);
+
+/** \brief Explanation
+ *
+ * \param descr           I 
+ * \param nobs            I 
+ * \param usrdat          I 
+ * \param retval          O Error code 
+ * \return no return value
+ */
+void CTAI_ObsDescr_table_Export(CTAI_ObsDescr_table *descr, CTA_Handle *usrdat, int *retval);
+ 
+/** \brief Explanation
+ *
+ * \param descr           I 
+ * \param retval          O Error code 
+ * \return no return value
+ */
+void CTAI_ObsDescr_table_Free(CTAI_ObsDescr_table *descr, int *retval);
+
+
+/** \brief The vector h_func is filled with COSTA-function handles of the 
+ *         implementations in this file.
+ * \param hobsdescrcl     I 
+ * \return no return value
+ */
+CTANOEXPORT void CTA_ObsDescr_table_initialise(CTA_ObsDescrClass *hobsdescrcl);
+
+#ifdef __cplusplus
+}
+#endif
+
diff --git a/costa/native/cta/include/cta_pack.h b/costa/native/cta/include/cta_pack.h
new file mode 100644
index 000000000..8430dfde3
--- /dev/null
+++ b/costa/native/cta/include/cta_pack.h
@@ -0,0 +1,149 @@
+/*
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+#ifndef CTA_PACK_H
+#define CTA_PACK_H
+#include "cta_system.h"
+#include "cta_handles.h"
+#include "cta_datatypes.h"
+
+/**
+\file  cta_pack.h
+
+\brief The interface description of the COSTA pack component.
+
+The pack component is used for storing non-sequential data before it is
+saved to file communicated in a parallel environment.
+The pack component contains a memory buffer that can be filled with data. The size of the buffer is
+automatically increased when new data is added.
+
+The pack component uses the FIFO princeple. Data that is added first can be retreved first.
+
+*/
+
+/* Function Handle */
+typedef CTA_Handle CTA_Pack;
+
+/*! Reset pack/unpack pointer of pack object    */
+#define CTA_PACK_RESET (-1)
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** \brief Create a pack instance.
+ *
+ * \param initsize I  the initial size >=0 of the buffer
+ * \param hpack    O  receives handle of new pack object
+ *
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Pack_Create(int initsize, CTA_Pack *hpack);
+
+/** \brief Free a pack instance.
+ *
+ * \param hpack   IO handle of pack object, replaced by CTA_NULL on return
+ *
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Pack_Free(CTA_Pack *hpack);
+
+/** \brief Add data to pack object.
+ *
+ * \param hpack    IO handle of pack object
+ * \param data     I  data that must be packed
+ * \param lendat   I  size of the data to be packed (chars)
+ *
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Pack_Add(CTA_Pack hpack, void *data, int lendat);
+
+/** \brief Unpack (get) data from pack object.
+ *
+ * \param hpack    IO handle of pack object
+ * \param data     O  buffer that receives data that is unpacked from pack-buffer (buffer length must be >= lendat)
+ * \param lendat   I  size of the data to be unpacked (chars)
+ *
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Pack_Get(CTA_Pack hpack, void *data, int lendat);
+
+/** \brief Get pointer to pack-buffer.
+ *
+ * \param hpack    I  handle of pack object
+ *
+ * \return pointer to buffer
+ */
+CTANOF90 CTAEXPORT char* CTA_Pack_GetPtr(CTA_Pack hpack);
+
+/** \brief Get length of packed data in pack-buffer.
+ *
+ * \param hpack    I  handle of pack object
+ *
+ * \return length packed data
+ */
+CTAEXPORT int CTA_Pack_GetLen(CTA_Pack hpack);
+
+/** \brief Only update administration for added elements
+ *
+ * This function can be used to update the administration after the
+ * pack-buffer is filled externally (e.g. using an mpi_recv)
+ *
+ * \param hpack    I  handle of pack object
+ * \param lendat   I  number of added elements (chars) 
+ *
+ * \return length packed data
+ */
+CTAEXPORT int CTA_Pack_AddCnt(CTA_Pack hpack, int lendat);
+
+/** \brief Get the internal pack and unpack pointers
+ *
+ * This function can be used to save to pointers and 
+ * reset the state of the pack component after unpacking or adding 
+ * some data
+ *
+ * \param hpack    I  handle of pack object
+ * \param ip1      O  unpack pointer
+ * \param ip2      O  pack pointer
+ *
+ * \return length packed data
+ */
+CTAEXPORT int CTA_Pack_GetIndx(CTA_Pack hpack, int *ip1, int *ip2);
+
+/** \brief Set the internal pack and unpack pointers
+ *
+ * This function can be used to restore the pointers and 
+ * reset the state of the pack component after unpacking or adding 
+ * some data
+ *
+ * \param hpack    I  handle of pack object
+ * \param ip1      I  unpack pointer. In order to reset all unpackin 
+ *                    set to CTA_PACK_RESET
+ * \param ip2      I  pack pointer. In order to reset the whole pack object
+ *                    set to CTA_PACK_RESET
+ *
+ * \return length packed data
+ */
+
+
+CTAEXPORT int CTA_Pack_SetIndx(CTA_Pack hpack, int ip1, int ip2);
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/costa/native/cta/include/cta_par.h b/costa/native/cta/include/cta_par.h
new file mode 100644
index 000000000..ab9032cfc
--- /dev/null
+++ b/costa/native/cta/include/cta_par.h
@@ -0,0 +1,138 @@
+/*
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+#ifndef CTA_PAR_H
+#define CTA_PAR_H
+#include "cta_system.h"
+#include "cta_handles.h"
+#include "cta_datatypes.h"
+
+#ifdef USE_MPI
+#include "mpi.h"
+#endif
+
+/**
+\file  cta_par.h
+
+\brief Interface for creating parallel applications with COSTA */
+
+/** Type of processes */
+enum CTA_ParProcType {CTA_ParMaster, CTA_ParWorker, CTA_ParOther};
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** Flag to indicate whether this is a filter process or model/worker process */
+CTAEXPORT extern int CTA_FILTER_PROCESS;
+
+
+/** Flag CTA_TRUE/CTA_FALSE to indicate whether run is in a parallel
+ *  environment */
+CTAEXPORT extern int CTA_IS_PARALLEL;
+
+/** Rank in world of all processes Note this variable is only
+ * used for debugging and output. */
+CTAEXPORT extern int CTA_PAR_MY_RANK;
+
+/** Kind of this process */
+CTAEXPORT extern enum CTA_ParProcType CTA_MY_PROC_TYPE;
+
+#ifdef USE_MPI
+/** The whole communication universe              */
+CTAEXPORT extern MPI_Comm CTA_COMM_WORLD;
+
+/** My own communication group                    */
+CTAEXPORT extern MPI_Comm CTA_COMM_MYWORLD;
+
+/** Group consisting of master process and all worker processes the master
+ *  communicates with */
+CTAEXPORT extern MPI_Comm CTA_COMM_MASTER_WORKER;        
+
+#endif
+
+/** \brief Initialises parallel environment for a process that spawned
+ *  The executable is spawned using MPI_COMM_SPAWN or MPI_COMM_SPAWN_MULTIPLE
+ *
+ *  It will set up the communication groups and optionally starts the parallel
+ *  model builder
+ *
+ * \param StartPar  I  CTA_TRUE/CTA_FALSE start parallel model builder
+ *
+ * Note when a worker process is part of a Master-Worker model and it does
+ * not implement the COSTA model interface it should not start the parallel model builder
+ *
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Par_WorkerSpawn(int StartPar);
+
+
+/** \brief Initialises parallel environment and create process groups
+ *
+ * \param parConfig I  configuration input from XML-file
+ * \param StartPar  I  CTA_TRUE/CTA_FALSE start parallel model builder
+ *
+ * Note when a worker process is part of a Master-Worker model and it does
+ * not implement the COSTA model interface it should not start the parallel model builder
+ *
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Par_CreateGroups(int parConfig, int StartPar);
+
+#ifdef USE_MPI
+/** \brief Get the Fortran (integer handles) of the communicators 
+ *
+ * \param cta_comm_world         O  Fortran communcator CTA_COMM_WORLD
+ * \param cta_comm_myworld       O  Fortran communcator CTA_COMM_MYWORLD
+ * \param cta_comm_master_worker O  Fortran communcator CTA_COMM_MASTER_WORKER
+ *
+ */
+CTANOF90 CTAEXPORT int CTA_Par_CreateNewCreateGetComm(CTA_ModelClass modelCls, MPI_Comm *comm);
+#endif
+
+#ifdef USE_MPI
+/** \brief Get a communicator for a process group by index 
+ *
+ * \param indx I  indx of communicator
+ * \param comm O  MPI communicator
+ * \return CTA_OK when succesfull. The value is CTA_CANNOT_FIND_PROCESS_GROUP is
+ *         returned when the communicator cannot be found
+ *
+ */
+CTANOF90 CTAEXPORT int CTA_Par_GetAllCommByIndex(int indx, MPI_Comm *comm);
+#endif
+
+/** \brief Get the global number of COSTA process group and index of this process
+ *
+ * \param itime  O The index of this process in the group
+ * \param iGroup O The group number of this process belongs to (1..nGroups)
+ *
+ * \note If this function is called by the COSTA master process or in a sequential run it will return 0 for iGroup
+ *
+ */
+CTAEXPORT void CTA_Par_GetGroupInfo(int *iGroup, int *itime);
+
+
+
+#ifdef __cplusplus
+}
+#endif
+#endif
+
+
diff --git a/costa/native/cta/include/cta_reltable.h b/costa/native/cta/include/cta_reltable.h
new file mode 100644
index 000000000..eb9043a70
--- /dev/null
+++ b/costa/native/cta/include/cta_reltable.h
@@ -0,0 +1,172 @@
+/*
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+/**
+\file  cta_reltable.h
+\brief Relation table component that defines a relation between two
+       elements (ordered) sets of elements. 
+
+  Relation tables are used to define a relation between elements in sets.
+  Examples are the elements of a vector, matrix, tree vector or in a
+  stochastic observer.
+
+  The Relation table can be used for copying elements from one set to the
+  other optionally using interpolation (not yet supported).
+
+*/
+
+#ifndef CTA_RELTABLE_H
+#define CTA_RELTABLE_H
+#include "cta_system.h"
+#include "cta_handles.h"
+#include "cta_datatypes.h"
+#include "cta_vector.h"
+
+/* Function Handle */
+typedef CTA_Handle CTA_RelTable;
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* functions */
+
+/** \brief Create a relation table
+ *
+ * \param hreltable  O  created relation table
+ *
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_RelTable_Create(CTA_RelTable *hreltable);
+
+/** \brief Free a relation table object.
+ *
+ * \param hreltable  IO relation table to be freed, 
+ *                      value is set to CTA_NULL on return.
+ *
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_RelTable_Free(CTA_RelTable *hreltable);
+
+/** \brief Copy elements according to relation table
+ *
+ * \note we currently only support copying of elements 
+ *       between two vector instances. Other types of
+ *       COSTA object will be supported when needed
+ *       in later versions
+ *
+ * \param hreltable  I handle of relation table
+ * \param hfrom      I Origin object to copy data from
+ * \param hto        I Target object to copy data to
+ *
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_RelTable_Apply(CTA_RelTable hreltable, 
+                       CTA_Handle hfrom, CTA_Handle hto);
+
+/** \brief Copy elements according to inverse of relation table
+ *
+ * \note we currently only support copying of elements 
+ *       between two vector instances. Other types of
+ *       COSTA object will be supported when needed
+ *       in later versions
+ *
+ * \param hreltable  I handle of relation table
+ * \param hfrom      I Origin object to copy data from
+ * \param hto        I Target object to copy data to
+ *
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_RelTable_ApplyInv(CTA_RelTable hreltable, 
+                       CTA_Handle hfrom, CTA_Handle hto);
+
+
+/** \brief Set a relation table
+ *  A Set a relation table that defines a selection of elements
+ *
+ * \param hreltable  O relation table that is set
+ * \param vselect    I (integer) vector with indices of elements
+                       from the target set that are selected.
+ *
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_RelTable_SetSelect(CTA_RelTable hreltable, CTA_Vector vselect);
+
+/** \brief Get the number of elements that are copied when the table is applied
+ *
+ *
+ * \param hreltable  I  relation table
+ * \param nelt     O  number of elements that are copied 
+ *
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_RelTable_Count(CTA_RelTable hreltable, int *nelt);
+
+
+/** \brief Set a relation table that is combination of two 
+ *         relation tables.
+ *
+ * Set a relation table that is the combination of two exisiting relation
+ * tables. It is possible to use the inverse of the relation tables when
+ * needed
+ *.
+ * A usefull application of this method is to create a relation table that
+ * defines a relation between a subset of elements from set1 and a subset of
+ * the elements of set2. In order to set a relation table of this kind first
+ * create two relation tables:
+ * hrel1 elements from set 1 that have a relation with the elements from set 2,
+ * hrel2 elements from set 2 that have a relation with the elements from set 1
+*
+ * The combined relation table of hrel1 and inverse(hrel2) is a relation
+ * table that spcifies the relation of a subset of elements from set1 and a
+ * subset of elements from set2.
+ *
+ * \param hreltable  O relation table that is set
+ * \param hrel1      I first relation table 
+ * \param inverse1   I use inverse of hrel1 (CTA_TRUE/CTA_FALSE)
+ * \param hrel2      I first relation table 
+ * \param inverse2   I use inverse of hrel2 (CTA_TRUE/CTA_FALSE)
+ *
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_RelTable_SetTableCombine(CTA_RelTable hreltable, 
+                                 CTA_RelTable hrel1, int inverse1, 
+                                 CTA_RelTable hrel2, int inverse2 );
+
+
+
+
+/** \brief Apply relation table to two components
+ *
+ * \param reltable (I)  Data of relation table
+ * \param hrom     (I)  Source set of elements
+ * \param hto      (IO) Target set (some will be overwitten)
+ * \param iverse   (I)  CTA_TRUE/CTA_FALSE apply inverse table
+ *
+ * \return error status: CTA_OK if successful
+ * \note Internal routine not a user routine
+ */
+int CTAI_RelTable_Apply(CTA_RelTable hreltable,
+                       CTA_Handle hfrom, CTA_Handle hto, int inverse);
+
+
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/costa/native/cta/include/cta_resultwriter.h b/costa/native/cta/include/cta_resultwriter.h
new file mode 100644
index 000000000..38f2d6109
--- /dev/null
+++ b/costa/native/cta/include/cta_resultwriter.h
@@ -0,0 +1,187 @@
+/*
+$URL: https://repos.deltares.nl/repos/openda/openda_1/public/trunk/core/native/src/cta/cta_time.c $
+$Revision: 2751 $, $Date: 2011-09-09 08:58:46 +0200 (Fri, 09 Sep 2011) $
+
+OpenDA
+Copyright (C) 2013  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+/**
+ * OpenDA support the concept of resultwriters. The Data assimilation algorithm
+ * will presents intermediate results and information on the computations to the
+ * result writer. Each implementation of a resultwriter can then handle this data
+ * in its own way.
+ *
+ * To implement an native resultwriter the following methods need to be implemented.
+ * Note: for explanation on variables see resultwriter function methods. All strings
+ *       have a len_... containing the result of strlen (used for using c-strings in Fortran)
+ *
+ * C:
+ * 
+ * void putmessage(int iDWriter, CTA_Tree config, char* workingDir, char* message, int *retval,
+ *                 int len_workingDir, int len_message);
+ *
+ * void putvalue(int iDWriter, CTA_Tree config, char* workingDir, int id, int handle,
+ *               int outputlevel, char* context, int iteration, int &retval,
+ *               len_workingDir, len_id, len_context);
+ *
+ * void putiterationreport(int iDWriter, CTA_Tree config, char *workingDir, int iteration,
+ *                          double cost, CTA_Vector handle, int *retval, int len_workingDir);
+ *
+ * void freewriter(int iDWriter);
+ *
+ * Fortran 2003:
+ *
+ * interface
+ *    subroutine putmessage(iDWriter, config, workingDir, message, retval,         &
+ *                          len_workingDir, len_message)                           &
+ *       bind(C, NAME='putmessage')
+ *       use iso_c_binding
+ *       implicit none
+ *       include 'cta_f90.inc'
+ *
+ *      integer(C_INT), VALUE  ::iDWriter, config, len_workingDir, len_message
+ *      integer(C_INT)         ::retval
+ *      character(kind=c_char) ::workingdir(*), message(*)
+ *    end subroutine putmessage
+ * end interface
+ *
+ *
+ * interface
+ *   subroutine putvalue(iDWriter, config, workingDir, id, handle,                 &
+ *                       outputlevel, context, iteration, retval,                  &
+ *                       len_workingDir, len_id, len_context)                      &
+ *      bind(C, NAME='putvalue')
+ *      use iso_c_binding
+ *      implicit none
+ *      include 'cta_f90.inc'
+ *      integer(C_INT), VALUE  ::iDWriter, config, handle, outputlevel, iteration, &
+ *                               len_workingDir, len_id, len_context
+ *      integer(C_INT)         ::retval
+ *      character(kind=c_char) ::workingdir(*), id(*), context(*)
+ *   
+ *    end subroutine putvalue
+ * end interface
+ *
+ * interface
+ *    subroutine putiterationreport(iDWriter, config, workingDir, iteration, cost,  &
+ *                                  handle, retval, len_workingDir)                 &
+ *       bind(C, NAME='putiterationreport')
+ *       use iso_c_binding
+ *       implicit none
+ *       include 'cta_f90.inc'
+ *       integer(C_INT), VALUE    ::iDWriter, config, iteration, handle, len_workingDir
+ *       real(C_DOUBLE), VALUE    ::cost
+ *       integer(C_INT)           ::retval
+ *       character(kind=c_char)   ::workingdir(*)
+ *    end subroutine putiterationreport
+ * end interface
+ *
+ * interface
+ *    subroutine freewriter(iDWriter)                                               &
+ *       bind(C, NAME='freewriter')
+ *       use iso_c_binding
+ *       implicit none
+ *       include 'cta_f90.inc'
+ *       integer(C_INT), VALUE    ::iDWriter
+ *    end subroutine freewriter
+ * end interface
+
+ * 
+ * The resultwriter must have an xml-configuration file containing at least the following
+ * (additional field may be used for private usage by the implementation)
+ * Note name of library and funtion name must correspond to the user implementation.
+ *
+ * <?xml version="1.0" encoding="UTF-8"?>
+ * <openda_native xmlns:xi="http://www.w3.org/2001/XInclude">
+ *    <CTA_FUNCTION id="putvalue"               name="putvalue"              library="libresultwriter.so" function="putvalue"    />
+ *    <CTA_FUNCTION id="putmessage"             name="putmessage"            library="libresultwriter.so" function="putmessage"    />
+ *    <CTA_FUNCTION id="putiterationreport"     name="putiterationreport"    library="libresultwriter.so" function="putiterationreport"    />
+ *    <CTA_FUNCTION id="freewriter"             name="freewriter"            library="libresultwriter.so" function="freewriter"    />
+ * </openda_native>
+ *
+ * 
+ * 
+ * @author nils van velzen
+ *
+ */
+
+#ifndef CTA_RESULTWRITER_H
+#define CTA_RESULTWRITER_H
+#include "cta_system.h"
+#include "cta_handles.h"
+#include "cta_datatypes.h"
+/* Function Handle */
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/** \brief Handle a string message send to the resultwriter
+ *
+ * \param idWriter    I  ID of this resultwriter (Counter of number of native result writers)
+ * \param config      I  Name of XML configuration file containting the function pointers and additional information
+ * \param workingDir  I  Full path to working directory
+ * \param message     I  Message send to resultwriter
+ *
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Resultwriter_putmessage(int idWriter, char *config, char *workingDir, char *message);
+
+/** \brief Handle a string message send to the resultwriter
+ *
+ * \param idWriter    I  ID of this resultwriter (Counter of number of native result writers)
+ * \param config      I  Name of XML configuration file containting the function pointers and additional information
+ * \param workingDir  I  Full path to working directory
+ * \param id          I  Name of the variable/array send to the resultwriter
+ * \param handle      I  Handle (Vector or TreeVector) of variable
+ * \param outputLevel I  Selected output level (see opendabridge for possible values)
+ * \param context     I  Location from which the resultwriter was called
+ * \param iteration   I  Iteration number from which the resultwriter was called
+ *
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Resultwriter_putvalue(int idWriter, char *config, char *workingDir, char *id, int handle, int outputLevel, char *context, int iteration);
+
+/** \brief Handle a string message send to the resultwriter
+ *
+ * \param idWriter    I  ID of this resultwriter (Counter of number of native result writers)
+ * \param config      I  Name of XML configuration file containting the function pointers and additional information
+ * \param workingDir  I  Full path to working directory
+ * \param iteration   I  Iteration number from which the resultwriter was called
+ * \param cost        I  Value of cost function
+ * \param handle      I  Handle (Vector or TreeVector) of the current parameters 
+ *
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Resultwriter_putiterationreport(int idWriter, char *config, char *workingDir, int iteration, double cost, int handle);
+
+/** \brief Free a resultwriter (close output files etc).
+ *
+ * \param idWriter    I  ID of this resultwriter (Counter of number of native result writers)
+ *
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Resultwriter_free(int idWriter);
+
+
+#ifdef __cplusplus
+}
+#endif
+#endif
+
+
diff --git a/costa/native/cta/include/cta_sobs.h b/costa/native/cta/include/cta_sobs.h
new file mode 100644
index 000000000..2b667e3ee
--- /dev/null
+++ b/costa/native/cta/include/cta_sobs.h
@@ -0,0 +1,228 @@
+/*
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+/**
+\file  cta_sobs.h
+\brief Interface description of the COSTA default stochastic observer component. For user implementation see cta_usr_sobs.h.
+*/
+
+#ifndef CTA_SOBS_H
+#define CTA_SOBS_H
+#include "cta_system.h"
+#include "cta_handles.h"
+#include "cta_datatypes.h"
+#include "cta_functions.h"
+#include "cta_vector.h"
+#include "cta_matrix.h"
+#include "cta_time.h"
+
+/* StochObs and Decription Handles */
+typedef CTA_Handle CTA_StochObs;
+typedef CTA_Handle CTA_SObsClass;
+
+#include "cta_obsdescr.h"
+
+/* parameters for different functions */
+#define CTA_SOBS_CREATE_SIZE         ( 1)
+#define CTA_SOBS_CREATE_INIT         ( 2)
+#define I_CTA_SOBS_FREE                ( 3)
+#define CTA_SOBS_CREATE_SELECTION    ( 4)
+#define I_CTA_SOBS_COUNT               ( 5)
+#define CTA_SOBS_GET_OBS_DESCRIPTION ( 6)
+#define CTA_SOBS_GET_VALUES          ( 7)
+#define CTA_SOBS_GET_REALISATION     ( 8)
+#define CTA_SOBS_GET_EXPECTATION     ( 9)
+#define CTA_SOBS_EVALUATE_PDF        (10)
+#define CTA_SOBS_GET_COV_MATRIX      (11)
+#define CTA_SOBS_GET_VARIANCE        (12)
+#define I_CTA_SOBS_EXPORT              (13)
+#define CTA_SOBS_GET_TIMES           (14)
+#define CTA_SOBS_NUMFUNC             (15)
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** \brief Create a new class (=implementation) of a COSTA stochastic observer component.
+ *
+ * \param name          I  name of the new stochastic observer class
+ * \param h_func        I  COSTA function handles for functions that implement class,
+ *                         missing functions must have value CTA_NULL
+ * \param descrcl       I  class of the observation description that is created by stochastic observer
+ * \param hstochobscl   O  handle of new stochastic observer class
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_SObs_DefineClass(const char *name, const CTA_Func h_func[CTA_SOBS_NUMFUNC],
+                         CTA_ObsDescrClass descrcl, CTA_SObsClass *hstochobscl);
+
+/** \brief Create an instance of a stocastic observer
+ *
+ * \param hstochobscl I  stochastic observer class of new stochastic observer
+ * \param userdata    IO userdata for creation (depends on class)
+ * \param hstochobs   O  receives handle of new stochastic observer object
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_SObs_Create(CTA_SObsClass hstochobscl, CTA_Handle userdata,
+                    CTA_StochObs *hstochobs);
+
+/** \brief Create a new stochastic observer that is subset of existing stochastic observer.
+ *
+ * \param hsobsin  I  handle of the existing stochastic observer of
+ *                    which  a selection is to be made
+ * \param userdata IO inputs necessary for making a selection (depends on user implementation)
+ * \param hsobsout O  receives handle of the new COSTA-stochastic observer, empty before calling, caller responsible for freeing after use
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_SObs_CreateSel(CTA_StochObs hsobsin, CTA_Handle userdata,
+                       CTA_StochObs *hsobsout);
+
+/** \brief Create a new stoch observer that is subset in time of existing stochastic observer.
+ *
+ *         All observations in the closed interval [t1,t2] of the time span are selected.
+ *
+ * \param hsobsin  I  handle of the stochastic observer of
+ *                    which a selection is to be made
+ * \param timespan I  time span over which selection has to be made
+ * \param hsobsout O  receives handle of the new COSTA-stochastic observer, empty before calling
+ * \return error states: CTA_OK if successful
+ */
+CTAEXPORT int CTA_SObs_CreateTimSel(CTA_StochObs hsobsin, CTA_Time timespan,
+                          CTA_StochObs *hsobsout);
+
+
+/** \brief Count the number of elements in stochastic observer.
+ * \param hsobs    I  handle of the stochastic observer
+ * \param nmeasr   O  receives number of measurements in this observer
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_SObs_Count(CTA_StochObs hsobs, int *nmeasr);
+
+
+/** \brief Get a vector with the measurements.
+ *
+ * \param hsobs    I  handle of the stochastic observer
+ * \param hvec     IO handle of vector that receives the measurements; must exist before calling
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_SObs_GetVal(CTA_StochObs hsobs,CTA_Vector hvec);
+
+
+/** \brief Count the times associated to the measurements.
+ *
+ * \param hsobs    I  handle of the stochastic observer
+ * \param hvec     IO handle to vector that receives the times; must exist before calling
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_SObs_GetTimes( CTA_StochObs hsobs,CTA_Vector hvec);
+
+
+/** \brief Draw random values (measurements) according to the probability density
+ *         function of the mesurements.
+ *
+ * \param hsobs    I  handle of the stochastic observer
+ * \param hvec     IO handle of vector that receives the draw (measurements); must exist before calling
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_SObs_GetRealisation(CTA_StochObs hsobs, CTA_Vector hvec);
+
+
+/** \brief Get expectation of the probability density function of the mesurements.
+ *
+ * \param hsobs    I  handle of the stochastic observer
+ * \param hvec     IO handle of vector that receives the expectation values; must exist before calling
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_SObs_GetExpectation(CTA_StochObs hsobs, CTA_Vector hvec);
+
+
+/** \brief Get the value of the probability density function of the mesurements at given location.
+ *
+ * \param hsobs    I  handle of the stochastic observer
+ * \param hvecx    I  handle of vector with location for evaluating pdf
+ * \param hvecy    IO handle of vector that is to contain the pdf-value; must exist before calling
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_SObs_EvalPDF(CTA_StochObs hsobs, CTA_Vector hvecx, CTA_Vector hvecy);
+
+
+/** \brief Get covariance matrix of probability density function of the measurements.
+ *
+ * \param hsobs    I  handle of the stochastic observer
+ * \param hmat     IO handle of matrix that receives the covariance matrix; must exist before calling
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_SObs_GetCovMat(CTA_StochObs hsobs, CTA_Matrix hmat);
+
+
+/** \brief Get variance of probability density function of the mesurements.
+ *
+ * \param hsobs    I  handle of the stochastic observer
+ * \param hvec     IO handle of vector that receives the variance; must exist before calling
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_SObs_GetVar( CTA_StochObs hsobs, CTA_Vector hvec);
+
+
+/** \brief Get standard deviation of probability density function of the measurements.
+ *
+ * \param hsobs    I  handle of the stochastic observer
+ * \param hvec     IO handle of vector that is to contain the standard deviation
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_SObs_GetStd( CTA_StochObs hsobs, CTA_Vector hvec);
+
+
+/** \brief Create the observation description corresponding to the stochastic observer.
+ *
+ * \note Caller is responsible for freeing the here created observation description
+ *
+ * \param hsobs      I  handle of the stochastic observer
+ * \param hobsdescr  O  receives handle of newly created observation description class, empty before calling
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_SObs_GetDescription(CTA_StochObs hsobs, CTA_ObsDescr *hobsdescr);
+
+
+/** \brief Export the stochastic observer.
+ *
+ * \note Supported by CTA_DEFAULT_SOBS:\n
+ *       output to file (userdata must contain handle of COSTA file)\n
+ *
+ * \param hsobs    I  handle of the stochastic observer
+ * \param userdata I  configuration of output
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_SObs_Export(CTA_StochObs hsobs, CTA_Handle userdata);
+
+/** \brief Free the stochastic observer
+ *
+ * \Note hsobs=CTA_NULL is allowed
+ *
+ * \param hsobs  IO handle of the stochastic observer, replaced by CTA_NULL on return
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_SObs_Free(
+   CTA_StochObs *hsobs  /* Handle of stochastic observer  */
+   );
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/costa/native/cta/include/cta_sobs_combine.h b/costa/native/cta/include/cta_sobs_combine.h
new file mode 100644
index 000000000..fefafd394
--- /dev/null
+++ b/costa/native/cta/include/cta_sobs_combine.h
@@ -0,0 +1,48 @@
+/*
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+/**
+\file  cta_sobs_combine.h
+\brief  COMBINE implementation of the stochastic observer interface.
+*/
+
+
+#include "cta_f77blas.h"
+#include "cta_datatypes.h"
+#include "cta_handles.h"
+#include "cta_functions.h"
+#include "cta_util_sqlite3.h"
+#include "cta_sobs.h"
+#include "cta_obsdescr.h"
+
+
+/* Dit is een struct om wat op te slaan wat we nodig hebben 
+   Enigszins analoog aan CTAI_SOBS en de datablock van de modelcombiner */
+
+typedef struct {
+int nstations;
+double timeoffset;
+int nmeasr;
+int * stations;
+int nofsubsobs;      // number of stochastic observers
+CTA_Vector subsobs; // list of handles of stochastic observers
+} CTAI_SObs_combine;
+
+
+void CTA_SObs_combine_initialise(CTA_SObsClass *hsobscl);
diff --git a/costa/native/cta/include/cta_sobs_netcdf.h b/costa/native/cta/include/cta_sobs_netcdf.h
new file mode 100644
index 000000000..d906ec306
--- /dev/null
+++ b/costa/native/cta/include/cta_sobs_netcdf.h
@@ -0,0 +1,200 @@
+/*
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+/**
+\file  cta_sobs_netcdf.h
+\brief Netcdf user implementation of the stochastic observer interface.
+*/
+
+#ifndef CTA_SOBS_NETCDF_H
+#define CTA_SOBS_NETCDF_H
+
+#include "cta_f77blas.h"
+#include "cta_datatypes.h"
+#include "cta_handles.h"
+#include "cta_functions.h"
+#include "cta_sobs.h"
+#include "cta_obsdescr.h"
+#if HAVE_LIBNETCDF
+//#include <netcdf.h>
+#endif
+
+typedef struct {
+int     nusers;
+int     ncid;
+char    *dbname;
+} CTAI_OMI_database;
+
+typedef struct {
+  CTAI_OMI_database *database;
+  int nstations;
+  double timeoffset;
+  int nmeasr;
+  int nmeasr_orig;  //number of observations in the netcdf-file
+  CTA_RelTable selectionReltab;
+  CTA_Time tspan;
+  float bb_lon[2];
+  float bb_lat[2];
+} CTAI_SObs_netcdf;
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** \brief Initialization function for defining the SObs_netcdf class.
+ *
+ * \return no return value
+ */
+CTANOEXPORT void CTA_SObs_netcdf_initialise();
+
+
+/** \brief Implementation function that is part of object create process
+ *
+ * \param memsize         O  receives size of new CTAI_SObs_netcdf object
+ * \param retval          O  receives return value
+ * \return no return value
+ */
+CTANOEXPORT void CTAI_SObs_netcdf_Create_Size( int *memsize,int *retval );
+
+
+/** \brief Implementation function that is part of object create process.
+ *
+ * \note userdata must contain handle of COSTA string giving the NETCDF database name.\n
+ *
+ * \param x               IO pointer to object data of stochastic observer
+ * \param userdata        I  pointer to user data
+ * \param retval          O  receives return value
+ * \return no return value
+ */
+CTANOEXPORT void CTAI_SObs_netcdf_Create_Init(CTAI_SObs_netcdf *x,
+                            CTA_Handle userdata, int *retval);
+
+
+/** \brief Create a new stochastic observer that is subset of existing stochastic observer.
+ *
+ * \note userdata must contain handle of COSTA string giving the selection condition.\n
+ * \note This function fills in the object data of the new stochastic observer. The handle is created by CTA_SObs_CreateSel().
+ *
+ * \param obsin           I  pointer to object data of the existing stochastic observer
+ * \param userdata        I  pointer to userdata
+ * \param obsout          O  pointer to object data of the new COSTA-stochastic observer, must exist before calling
+ * \param retval          I  receives return value
+ * \return no return value
+ */
+CTANOEXPORT void CTAI_SObs_netcdf_CreateSel( CTAI_SObs_netcdf *obsin,
+        CTA_Handle *userdata, CTAI_SObs_netcdf *obsout, int *retval);
+
+
+/** \brief Count the number of measurements
+ *
+ * \param x               I pointer to object data of stochastic observer
+ * \param nmeasr          O receives number of measurements
+ * \param retval          O receives the return value
+ * \return no return value
+ */
+CTANOEXPORT void CTAI_SObs_netcdf_Count( CTAI_SObs_netcdf *x, int *nmeasr,
+       int *retval);
+
+
+/** \brief Get a vector with the measurements.
+ *
+ * \param x               I  pointer to object data of stochastic observer
+ * \param hvec            O  handle of vector that receives the measurements, must exist before calling
+ * \param retval          O  receives the return value
+ * \return no return value
+ */
+CTANOEXPORT void CTAI_SObs_netcdf_GetVals( CTAI_SObs_netcdf *x, CTA_Vector *hvec,
+       int *retval);
+
+
+/** \brief Get times associated with the measurements.
+ *
+ * \param x               I  pointer to object data of stochastic observer
+ * \param hvec            O  handle of vector that receives the associated times, must exist before calling
+ * \param retval          O  receives the return value
+ * \return no return value
+ */
+CTANOEXPORT void CTAI_SObs_netcdf_GetTimes( CTAI_SObs_netcdf *x, CTA_Vector *hvec,
+       int *retval);
+
+
+/** \brief Get variance of probability density function of the mesurements.
+ *
+ * \param x               I  pointer to object data of stochastic observer
+ * \param hvec            I  handle of the vector that receives the variances, must exist before calling
+ * \param returnvar       O  flag: TRUE for getting variance, FALSE for getting standard deviation
+ * \param retval          O  receives return value
+ * \return no return value
+ */
+CTANOEXPORT void CTAI_SObs_netcdf_GetVariances( CTAI_SObs_netcdf *x,
+       CTA_Vector *hvec, int *returnvar, int *retval);
+
+
+/** \brief Draw random values (measurements) according to the probability density
+ *         function of the mesurements.
+ *
+ * \param x               I  pointer to object data of stochastic observer
+ * \param hvec            O  handle of vector that receives the draw (measurements); must exist before calling
+ * \param retval          O  receives return value
+ * \return no return value
+ */
+CTANOEXPORT void CTAI_SObs_netcdf_GetRealisation( CTAI_SObs_netcdf *x,
+       CTA_Vector *hvec, int *retval);
+
+
+/** \brief Get covariance matrix of probability density function of the measurements.
+ *
+ * \param x               I  pointer to object data of stochastic observer
+ * \param hmat            O  handle of matrix that receives the covariance matrix; must exist before calling
+ * \param retval          O  receives return value
+ * \return no return value
+ */
+CTANOEXPORT void CTAI_SObs_netcdf_GetCovMat( CTAI_SObs_netcdf *x, CTA_Matrix *hmat,
+       int *retval);
+
+
+/** \brief Export a CTAI_SObs object to file or standard output.
+ *
+ * \note Supported:\n
+ *       output to file (userdata must contain handle of COSTA file)\n
+ *
+ * \param x               I  pointer to object data of stochastic observer
+ * \param userdata        I  user data
+ * \param retval          O  receives return value
+ * \return no return value
+ */
+CTANOEXPORT void CTAI_SObs_netcdf_export( CTAI_SObs_netcdf *x, CTA_Handle *userdata,
+       int *retval);
+
+
+/** \brief Free the object data and associated resources.
+ *
+ * \param x               I  pointer to object data of stochastic observer
+ * \param retval          O  receives return value
+ * \return no return value
+ */
+CTANOEXPORT void CTAI_SObs_netcdf_Free( CTAI_SObs_netcdf *x, int *retval);
+
+#ifdef __cplusplus
+extern }
+#endif
+
+#endif
+
diff --git a/costa/native/cta/include/cta_sobs_sqlite3.h b/costa/native/cta/include/cta_sobs_sqlite3.h
new file mode 100644
index 000000000..cd9902b28
--- /dev/null
+++ b/costa/native/cta/include/cta_sobs_sqlite3.h
@@ -0,0 +1,188 @@
+/*
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+/**
+\file  cta_sobs_sqlite3.h
+\brief SQLite3 user implementation of the stochastic observer interface.
+*/
+
+#ifndef CTA_SOBS_SQLITE3_H
+#define CTA_SOBS_SQLITE3_H
+
+#include "cta_f77blas.h"
+#include "cta_datatypes.h"
+#include "cta_handles.h"
+#include "cta_functions.h"
+#include "cta_util_sqlite3.h"
+#include "cta_sobs.h"
+#include "cta_obsdescr.h"
+#include <sqlite3.h>
+
+typedef struct {
+CTAI_util_sqlite3_database *database;
+char* condition;
+int nstations;
+int nmeasr;
+int * stations;
+} CTAI_SObs_sqlite3;
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** \brief Initialization function for defining the SObs_sqlite3 class.
+ *
+ * \return no return value
+ */
+CTANOEXPORT void CTA_SObs_sqlite3_initialise();
+
+
+/** \brief Implementation function that is part of object create process
+ *
+ * \param memsize         O  receives size of new CTAI_SObs_sqlite3 object
+ * \param retval          O  receives return value
+ * \return no return value
+ */
+void CTAI_SObs_sqlite3_Create_Size( int *memsize,int *retval );
+
+
+/** \brief Implementation function that is part of object create process.
+ *
+ * \note userdata must contain handle of COSTA string giving the SQLITE3 database name.\n
+ *
+ * \param x               IO pointer to object data of stochastic observer
+ * \param userdata        I  pointer to user data
+ * \param retval          O  receives return value
+ * \return no return value
+ */
+void CTAI_SObs_sqlite3_Create_Init(CTAI_SObs_sqlite3 *x,
+                            CTA_Handle userdata, int *retval);
+
+
+/** \brief Create a new stochastic observer that is subset of existing stochastic observer.
+ *
+ * \note userdata must contain handle of COSTA string giving the selection condition.\n
+ * \note This function fills in the object data of the new stochastic observer. The handle is created by CTA_SObs_CreateSel().
+ *
+ * \param obsin           I  pointer to object data of the existing stochastic observer
+ * \param userdata        I  pointer to userdata
+ * \param obsout          O  pointer to object data of the new COSTA-stochastic observer, must exist before calling
+ * \param retval          I  receives return value
+ * \return no return value
+ */
+void CTAI_SObs_sqlite3_CreateSel( CTAI_SObs_sqlite3 *obsin,
+        CTA_Handle *userdata, CTAI_SObs_sqlite3 *obsout, int *retval);
+
+
+/** \brief Count the number of measurements
+ *
+ * \param x               I pointer to object data of stochastic observer
+ * \param nmeasr          O receives number of measurements
+ * \param retval          O receives the return value
+ * \return no return value
+ */
+void CTAI_SObs_sqlite3_Count( CTAI_SObs_sqlite3 *x, int *nmeasr,
+       int *retval);
+
+
+/** \brief Get a vector with the measurements.
+ *
+ * \param x               I  pointer to object data of stochastic observer
+ * \param hvec            O  handle of vector that receives the measurements, must exist before calling
+ * \param retval          O  receives the return value
+ * \return no return value
+ */
+void CTAI_SObs_sqlite3_GetVals( CTAI_SObs_sqlite3 *x, CTA_Vector *hvec,
+       int *retval);
+
+
+/** \brief Get times associated with the measurements.
+ *
+ * \param x               I  pointer to object data of stochastic observer
+ * \param hvec            O  handle of vector that receives the associated times, must exist before calling
+ * \param retval          O  receives the return value
+ * \return no return value
+ */
+void CTAI_SObs_sqlite3_GetTimes( CTAI_SObs_sqlite3 *x, CTA_Vector *hvec,
+       int *retval);
+
+
+/** \brief Get variance of probability density function of the mesurements.
+ *
+ * \param x               I  pointer to object data of stochastic observer
+ * \param hvec            I  handle of the vector that receives the variances, must exist before calling
+ * \param returnvar       O  flag: TRUE for getting variance, FALSE for getting standard deviation
+ * \param retval          O  receives return value
+ * \return no return value
+ */
+void CTAI_SObs_sqlite3_GetVariances( CTAI_SObs_sqlite3 *x,
+       CTA_Vector *hvec, int *returnvar, int *retval);
+
+
+/** \brief Draw random values (measurements) according to the probability density
+ *         function of the mesurements.
+ *
+ * \param x               I  pointer to object data of stochastic observer
+ * \param hvec            O  handle of vector that receives the draw (measurements); must exist before calling
+ * \param retval          O  receives return value
+ * \return no return value
+ */
+void CTAI_SObs_sqlite3_GetRealisation( CTAI_SObs_sqlite3 *x,
+       CTA_Vector *hvec, int *retval);
+
+
+/** \brief Get covariance matrix of probability density function of the measurements.
+ *
+ * \param x               I  pointer to object data of stochastic observer
+ * \param hmat            O  handle of matrix that receives the covariance matrix; must exist before calling
+ * \param retval          O  receives return value
+ * \return no return value
+ */
+void CTAI_SObs_sqlite3_GetCovMat( CTAI_SObs_sqlite3 *x, CTA_Matrix *hmat,
+       int *retval);
+
+
+/** \brief Export a CTAI_SObs object to file or standard output.
+ *
+ * \note Supported:\n
+ *       output to file (userdata must contain handle of COSTA file)\n
+ *
+ * \param x               I  pointer to object data of stochastic observer
+ * \param userdata        I  user data
+ * \param retval          O  receives return value
+ * \return no return value
+ */
+void CTAI_SObs_sqlite3_export( CTAI_SObs_sqlite3 *x, CTA_Handle *userdata,
+       int *retval);
+
+
+/** \brief Free the object data and associated resources.
+ *
+ * \param x               I  pointer to object data of stochastic observer
+ * \param retval          O  receives return value
+ * \return no return value
+ */
+void CTAI_SObs_sqlite3_Free( CTAI_SObs_sqlite3 *x, int *retval);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/costa/native/cta/include/cta_string.h b/costa/native/cta/include/cta_string.h
new file mode 100644
index 000000000..b60cc31be
--- /dev/null
+++ b/costa/native/cta/include/cta_string.h
@@ -0,0 +1,184 @@
+/*
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+/**
+\file  cta_string.h
+\brief Interface description of the default COSTA string component.
+
+Utilities including most of the basic string operations and access to the string data itself.
+*/
+
+#ifndef CTA_STRING_H
+#define CTA_STRING_H
+#include "cta_system.h"
+#include "cta_handles.h"
+#include "cta_datatypes.h"
+
+/* String instance handle declared in cta_handles.h */
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** \brief Create a new COSTA string instance.
+ *
+ *  \param hstring  O  handle of created string
+ *  \return CTA_OK if successful
+ */
+CTAEXPORT int CTA_String_Create(CTA_String *hstring);
+
+/** \brief Create a new COSTA string that is a copy of an existing one
+ *
+ *  \param hto    O  receives the handle of the created string
+ *  \param hfrom  I  handle of string to copy
+ *
+ *  \return CTA_OK if successful
+ */
+CTAEXPORT int CTA_String_Copy(CTA_String *hto, CTA_String hfrom);
+
+/** \brief Free the COSTA string instance.
+ *
+ *  \note
+ *
+ *  \param hstring  IO handle of the string instance, replaced by CTA_NULL on return
+ *  \return CTA_OK if successful
+ */
+CTAEXPORT int CTA_String_Free(CTA_String *hstring);
+
+/** \brief Get the number of characters in string.
+ *
+ *  \note The returned length is the number of characters excluding the 
+ *        0-character.
+ *
+ *  \param hstring  I  handle of the string
+ *  \param len      O  receives the number of characters in string
+ *  \return CTA_OK if successful
+ */
+CTAEXPORT int CTA_String_GetLength(CTA_String hstring, int *len);
+
+/** \brief Set the string to new content.
+ *
+ *  \param hstring  IO handle of the string
+ *  \param str      I  new content
+ *  \return CTA_OK if successful
+ */
+CTAEXPORT int CTA_String_Set(CTA_String hstring, const char *str);
+
+/** \brief Get a copy of the string.
+ *
+ *  \note It is the responsibility of the caller making str large enough to
+ *        hold the string and trailing 0-character.
+ *
+ *  \param hstring  I  handle of the string
+ *  \param str      O  buffer that receives a copy of the string including trailing 0-character
+ *  \return CTA_OK if successful
+ */
+CTAEXPORT int CTA_String_Get(CTA_String hstring, char *str);
+
+/** \brief Get the (scalar) value of a string
+ *
+ *  \note It is the responsibility of the caller that parameter value is large enough to
+ *        hold the value as specified by the datatype.
+ *
+ *  \param hstring  I  handle of the string
+ *  \param value    O  receives the value
+ *  \param datatype I  data type of value
+ *  \return CTA_OK if successful
+ */
+CTAEXPORT int CTA_String_GetValue(CTA_String hstring, void *value, CTA_Datatype datatype);
+
+
+/** \brief Create new string that is a concatination of existing strings.
+ *
+ *  \param istring  IO handle of the string (first string in concatination)
+ *                     and whole concatinated string on return
+ *  \param xstring  I  handle of the second string (extension string)
+ *  \return CTA_OK if successful
+ */
+CTAEXPORT int CTA_String_Conc(CTA_String istring, CTA_String xstring);
+
+
+/** \brief Get a pointer to the contents of the string (INTERNAL USE)
+ *
+ *  \param hstring  I  handle of the string
+ *  \return pointer to the string contents
+ */
+CTAEXPORT char* CTAI_String_GetPtr(CTA_String hstring);
+
+
+/** \brief Imports string.
+ *
+ *  Supports: pack objects (usrdata must be handle of pack object to import from)
+ *
+ *  \param hstring  IO handle of the string
+ *  \param usrdata  I  configuration of import
+ *  \return CTA_OK if successful
+ *
+ *  \note Only CTA_Pack is currently supported fot usrdata
+ */
+CTAEXPORT int CTA_String_Import(CTA_String hstring, CTA_Handle usrdata);
+
+/** \brief Exports length of string and string itself.
+ *
+ *  Supports: pack objects (usrdata must be handle of pack object to export to)
+ *
+ *  \param hstring  I  handle of the string
+ *  \param usrdata  IO configuration of export
+ *  \return CTA_OK if successful
+ *
+ *  \note Only CTA_Pack is currently supported fot usrdata
+ */
+CTAEXPORT int CTA_String_Export(CTA_String hstring, CTA_Handle usrdata);
+
+/** \brief Check whether string is equal to COSTA string.
+ *
+ *
+ *  \param hstring  I  handle of the string
+ *  \param str0     I  string to compare hsting with
+ *  \return CTA_TRUE/CTA_FALSE 
+ *
+ *  \note Only CTA_Pack is currently supported fot usrdata
+ */
+CTAEXPORT int CTA_String_Equals_Char(CTA_String hstring, const char *str0);
+
+/** \brief Check whether two COSTA strings are equal.
+ *
+ *
+ *  \param hstring1  I  handle of first string
+ *  \param hstring2  I  handle of second string
+ *  \return CTA_TRUE/CTA_FALSE 
+ *
+ *  \note Only CTA_Pack is currently supported fot usrdata
+ */
+CTAEXPORT int CTA_Strings_Equal(CTA_String hstring1, CTA_String hstring2);
+
+
+/** \brief Create a duplication of a COSTA string 
+ *
+ *  \param hfrom  I  handle of string to copy
+ *  \param hto    O  handle of created string
+ *  \return CTA_OK if successful
+ */
+CTAEXPORT int CTA_String_Duplicate(CTA_String hfrom, CTA_String *hto);
+
+/*=====================================================================*/
+#ifdef __cplusplus
+}
+#endif
+#endif
+
diff --git a/costa/native/cta/include/cta_system.h b/costa/native/cta/include/cta_system.h
new file mode 100644
index 000000000..9b7c4a312
--- /dev/null
+++ b/costa/native/cta/include/cta_system.h
@@ -0,0 +1,71 @@
+/*
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2007  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+/**
+\file  cta_system.h
+\brief Macros for handing system dependent issues
+*/
+
+#ifndef CTA_SYSTEM_H
+#define CTA_SYSTEM_H
+
+#define CTANOEXPORT /* does not do anything yet, but it makes clear which routines are internal */
+#define CTANOF90 /* does not do anything yet, but it makes clear if no fortran90 header is needed */
+#ifdef WIN32
+
+	#ifdef CTALIB
+		#define CTAEXPORT  __declspec(dllexport)
+	#else
+		#define CTAEXPORT  __declspec(dllimport)
+	#endif
+
+    #ifdef FTN_CAPITAL	
+       #define F77_FUNC(X,Y) Y
+	   #define F77_FUNC_NOEXP(X,Y) Y
+    #else
+       #define F77_FUNC(X,Y) X
+	   #define F77_FUNC_NOEXP(X,Y) X
+    #endif
+
+	#ifdef FTN_CAPITAL
+		#define F77_CALL(X,Y) __cdecl Y
+		#define CF77_CALL(X,Y) Y
+	#else
+		#define F77_CALL(X,Y) __cdecl X
+		#define CF77_CALL(X,Y) X
+	#endif
+
+
+#else
+
+	#define CTAEXPORT
+	#ifdef FTN_CAPITAL
+                #define F77_FUNC(X,Y) Y ## _
+	        #define F77_CALL(X,Y) Y ## _
+        	#define CF77_CALL(X,Y) Y ## _
+        #else
+                #define F77_FUNC(X,Y) X ## _
+        	#define F77_CALL(X,Y) X ## _
+	        #define CF77_CALL(X,Y) X ## _
+        #endif
+
+#endif
+
+#endif
+
diff --git a/costa/native/cta/include/cta_time.h b/costa/native/cta/include/cta_time.h
new file mode 100644
index 000000000..f62b4147b
--- /dev/null
+++ b/costa/native/cta/include/cta_time.h
@@ -0,0 +1,189 @@
+/*
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+/**
+\file  cta_time.h
+\brief Interface description of the default COSTA time component.
+
+A time object describes a time span (time of start, time of end). Utility functions are provided for changing and evaluating or comparing time spans.
+*/
+
+#ifndef CTA_TIME_H
+#define CTA_TIME_H
+#include "cta_system.h"
+#include "cta_handles.h"
+#include "cta_datatypes.h"
+/* Function Handle */
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef CTA_Handle CTA_Time;
+
+/* functions */
+
+/** \brief Create a time object.
+ *
+ * \param htime  O  receives handle of newly created time object
+ *
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Time_Create(CTA_Time *htime);
+
+/** \brief Free a time object.
+ *
+ * \param htime    IO handle of time object to be freed, replaced by CTA_NULL on return.
+ *
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Time_Free(CTA_Time *htime);
+
+/** \brief Set the time span.
+ *
+ * \param htime    IO time object of which to set time span
+ * \param tstart   I  starting time
+ * \param tend     I  ending time
+ *
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Time_SetSpan(CTA_Time htime,double tstart, double tend);
+
+/** \brief Get the time span.
+ *
+ * \param htime    I  time object of which to get time span
+ * \param tstart   O  receives the starting time
+ * \param tend     O  receives ending time
+ *
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Time_GetSpan(CTA_Time htime,double *tstart, double *tend);
+
+/** \brief Set the time step.
+ *
+ * \param htime    IO time object of which to set time step
+ * \param tstep    I  new time step
+ *
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Time_SetStep(CTA_Time htime,double tstep);
+
+/** \brief Get time step.
+ *
+ * \param htime    IO time object of which to get time step
+ * \param tstep    O  receives time step
+ *
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Time_GetStep(CTA_Time htime,double *tstep);
+
+/** \brief Count number of timesteps in time 
+ *
+ * \param htime    I time object (see function description)
+ * \param nsteps   O number of timesteps
+ *
+ * \return error status: CTA_OK if successful
+ *
+ * \note number of steps is rounded to nearest integer
+ */
+CTAEXPORT int CTA_Time_CountSteps(CTA_Time htime, int *nsteps);
+
+/** \brief Get interval of i-th step
+ *
+ * \param htime    I time object (see function description)
+ * \param istep    I interval of step
+ * \param hstep    O time step of model
+ *
+ * \return error status: CTA_OK if successful
+ *
+ * \note intervals are counted from 1 to nsteps
+ */
+CTAEXPORT int CTA_Time_GetTimeStep(CTA_Time htime, int istep, CTA_Time hstep);
+
+/** \brief Check whether htimesub is within time span of htime.
+ *
+ * \param htimesub I time object (see function description)
+ * \param htime    I time object (see function description)
+ * \param inspan   O receives TRUE if htimesub is within time span of htime or FALSE otherwise
+ *
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Time_InSpan(CTA_Time htimesub, CTA_Time htime,  BOOL *inspan);
+
+/** \brief Check whether time step of time object equals t
+ *
+ * \param htime    I time object
+ * \param t        I time step to compare
+ * \param isstep   O receives TRUE if t equals time step of time object or FALSE otherwise
+ *
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Time_IsStep(CTA_Time htime, double t,  BOOL *isstep);
+
+/** \brief Copy a time object.
+ *
+ * \param hfrom    I time object to copy from
+ * \param hto      O handle of time object that receives copy, must exist before calling
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Time_Copy(CTA_Time hfrom, CTA_Time hto);
+
+/** \brief Export a time object.
+ * exports the whole internal state of the time object to given target
+ * CTA_FILE will export the time component in a MATLAB/OCTAVE readable form
+ * CTA_PACK will pack the content
+ *
+ * \param htime    I time object to export
+ * \param hexport  I target for export (CTA_FILE or CTA_PACK)
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Time_Export(CTA_Time htime, CTA_Handle hexport);
+
+/** \brief Import a time object.
+ * imports the whole internal state of the time object from given source
+ *
+ * \param htime    I time object to import to 
+ * \param himport  I source of import (CTA_PACK)
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Time_Import(CTA_Time htime, CTA_Handle himport);
+
+
+/** \brief Returns whether time object describes an timespan or a single
+ * instance.
+ *
+ * \param htime    I time object to import to 
+ * \param isspan   O time object is a time timespan (CTA_TRUE/CTA_FALSE) 
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Time_IsSpan(CTA_Time htime, int *isspan);
+
+
+
+
+
+
+
+
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif
diff --git a/costa/native/cta/include/cta_tree.h b/costa/native/cta/include/cta_tree.h
new file mode 100644
index 000000000..25d6472cc
--- /dev/null
+++ b/costa/native/cta/include/cta_tree.h
@@ -0,0 +1,187 @@
+/*
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+/**
+\file  cta_tree.h
+
+\brief Interface description of the COSTA tree component.
+
+Store data in a CTA_Tree object in tree form. Access the members in the following way:
+ <I>branch1\\subbranch\\member</I> or <I>branch1/branch2/member</I>
+*/
+
+#ifndef CTA_TREE_H
+#define CTA_TREE_H
+#include "cta_system.h"
+#include "cta_handles.h"
+#include "cta_datatypes.h"
+
+/* Tree instance handle */
+typedef CTA_Handle CTA_Tree;
+#ifdef __cplusplus
+extern "C" {
+#endif
+/** \brief Create a new COSTA tree instance 
+ *
+ *  \note
+ *
+ *  \param htree  O  receives handle of created tree
+ *  \return CTA_OK if successful
+ */
+CTAEXPORT int CTA_Tree_Create(CTA_Tree *htree);
+
+
+/** \brief Free the COSTA tree instance 
+ *
+ *  \note 
+ *
+ *  \param htree  IO handle of the tree instance, replaced by CTA_NULL on return
+ *  \return CTA_OK if successful
+ */
+CTAEXPORT int CTA_Tree_Free(CTA_Tree *htree);
+
+
+/** \brief Add a COSTA handle to the COSTA tree
+ *
+ *  \note
+ *
+ *  \param htree  IO handle of the tree object (parent)
+ *  \param name   I  name of the COSTA item
+ *  \param hitem  I  handle of the COSTA item to add (do not free the object after adding it to the tree)
+ *  \return CTA_OK if successful
+ */
+CTAEXPORT int CTA_Tree_AddHandle(CTA_Tree htree, const char *name, CTA_Handle hitem);
+
+
+/** \brief Count the number of COSTA handles specified by the given path.
+ *
+ *  \param htree  I  handle of the tree object
+ *  \param path   I  path of the item, separated by / or \\
+ *  \param count  O  receives the number of items found
+ *  \return CTA_OK if successful or CTA_ITEM_NOT_FOUND in case of not found
+ */
+CTAEXPORT int CTA_Tree_CountHandles(CTA_Tree htree, CTA_String path, int *count);
+
+/** \brief Count the number of COSTA handles specified by the given path.
+ *
+ *  \param htree  I  handle of the tree object
+ *  \param path   I  path of the item, separated by / or \
+ *  \param count  O  receives the number of items found
+ *  \return CTA_OK if successful or CTA_ITEM_NOT_FOUND in case of not found
+ */
+CTAEXPORT int CTA_Tree_CountHandlesStr(CTA_Tree htree, char *path, int *count);
+
+
+/** \brief Get a COSTA handle from the COSTA tree (by path)
+ *
+ *  \note In case of trees with default values, returns the default value.
+ *  \note The returned handle must not be freed.
+ *
+ *  \param htree  I  handle of the tree object
+ *  \param path   I  path of the item, separated by / or \\
+ *  \param hitem  O  receives the handle of the COSTA item, or CTA_NULL in case not found, do not free this handle.
+ *  \return CTA_OK if successful or CTA_ITEM_NOT_FOUND in case of not found
+ */
+CTAEXPORT int CTA_Tree_GetHandle(CTA_Tree htree, CTA_String path, CTA_Handle *hitem);
+
+
+/** \brief Get the value of a COSTA handle from the COSTA tree (by path)
+ *
+ *  \note In case of trees with default values, returns the default value.
+ *
+ *  \param htree    I  handle of the tree object
+ *  \param path     I  COSTA string describing path of the item, separated by / or \
+ *  \param value    O  receives the value of the COSTA item, or CTA_NULL in case of not found
+ *  \param datatype I  data type of parameter value, must be the same as item in tree
+ *  \return CTA_OK if successful or CTA_ITEM_NOT_FOUND in case of not found
+ */
+CTAEXPORT int CTA_Tree_GetValue(CTA_Tree htree, CTA_String path, void *value, CTA_Datatype datatype);
+
+/** \brief Get a COSTA handle from the COSTA tree (by path)
+ *
+ *  \note In case of trees with default values, returns the default value.
+ *  \note The returned handle must not be freed.
+ *
+ *  \param htree  I  handle of the tree object
+ *  \param str    I  C string describing path of the item, separated by / or \
+ *  \param hitem  O  receives the handle of the COSTA item, or CTA_NULL in case of not found, do not free this handle
+ *  \return CTA_OK if successful or CTA_ITEM_NOT_FOUND in case of not found
+ */
+CTAEXPORT int CTA_Tree_GetHandleStr(CTA_Tree htree, char* str, CTA_Handle *hitem);
+
+
+/** \brief Get the value of a COSTA handle from the COSTA tree (by path)
+ *
+ *  \note In case of trees with default values, returns the default value.
+ *
+ *  \param htree    I  handle of the tree instance
+ *  \param str      I  C string describing path of the item, separated by / or \
+ *  \param value    O  receives the value of the COSTA item, or CTA_NULL in case of not found
+ *  \param datatype I  data type of the value specified
+ *  \return CTA_OK if successful or CTA_ITEM_NOT_FOUND in case of not found
+ */
+CTAEXPORT int CTA_Tree_GetValueStr(CTA_Tree htree, char* str, void *value, CTA_Datatype datatype);
+
+
+/** \brief Count the number of elements on the current level of the COSTA tree
+ *
+ *  \param htree  I  handle of the tree level
+ *  \param count  O  receives the number of elements on the current tree level
+ *  \return CTA_OK if successful
+ */
+CTAEXPORT int CTA_Tree_CountItems(CTA_Tree htree, int *count);
+
+
+/** \brief Get a handle (by index) on the current level of the COSTA tree
+ *
+ *  \param htree  I  handle of the tree level
+ *  \param index  I  index of the item to return, 1 <= index <= CTA_Tree_CountItems()
+ *  \param hitem  O  receives handle of the item at given index
+ *  \return CTA_OK if successful
+ */
+CTAEXPORT int CTA_Tree_GetItem(CTA_Tree htree, int index, CTA_Handle *hitem);
+
+
+/** \brief Get the value of a COSTA handle from the COSTA tree (by index)
+ *
+ *  \note In case of trees with default values, returns the default value.
+ *
+ *  \param htree    I  handle of the tree instance
+ *  \param index    I  index of the item
+ *  \param value    O  receives value of the COSTA item, or CTA_NULL in case of not found
+ *  \param datatype I  data type of the value specified
+ *  \return CTA_OK if successful or CTA_ITEM_NOT_FOUND in case not found
+ */
+CTAEXPORT int CTA_Tree_GetItemValue(CTA_Tree htree, int index, void *value, CTA_Datatype datatype);
+
+
+/** \brief Print a COSTA tree to STDOUT
+ *
+ *  \note
+ *
+ *  \param htree  I  handle of the tree
+ *  \return CTA_OK if successful
+ */
+CTAEXPORT int CTA_Tree_Print(CTA_Tree htree);
+
+/*=====================================================================*/
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/costa/native/cta/include/cta_treevector.h b/costa/native/cta/include/cta_treevector.h
new file mode 100644
index 000000000..cd4c8ab45
--- /dev/null
+++ b/costa/native/cta/include/cta_treevector.h
@@ -0,0 +1,512 @@
+/*
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+#ifndef CTA_TREEVECTOR_H
+#define CTA_TREEVECTOR_H
+#include "cta_system.h"
+#include "cta_datatypes.h"
+#include "cta_handles.h"
+#include "cta_vector.h"
+#include "cta_matrix.h"
+#include "cta_metainfo.h"
+
+/**
+\file  cta_treevector.h
+\brief Interface description of the COSTA default tree-vector component.
+
+The tree-vector is an extension of a vector component. A tree-vector
+either contains a single vector or is a concatenation of a number of
+tree-vectors, called sub-tree-vectors in this context. The usage of
+sub-tree-vectors makes is possible to concatenate models or extend models
+as is done when a deterministic model is extended into a stochastic model.
+The sub-tree-vectors are also very useful inside the model source code where the
+whole state of the model is not represented by a single vector. 
+The default tree-vector component uses COSTA vector-components for storing the values.
+
+Each (sub-)tree-vector has a tag. Tree-vectors having the same tag are considered
+to be the same, meaning they have the same buildup in sub-tree-vectors, length and
+datatypes.
+*/
+
+
+/**
+  Type definition of a handle to a COSTA tree-vector instance
+*/
+typedef CTA_Handle CTA_TreeVector;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** \brief Create a tree-vector.
+ *
+ * \param name    I  name of the tree-vector, this is a human readable
+ *                   string used for (debug) output and not by the algorithms
+ *                   itself
+ * \param tag     I  tag of this tree-vector
+ * \param treevec  O new tree-vector
+ *
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_TreeVector_Create(const char *name, const char *tag, CTA_TreeVector *treevec);
+
+/** \brief Duplicate a tree-vector.
+ *
+ * \note Duplication means that a new tree-vector is created that is identical to
+ * the originating tree-vector. All data in the original tree-vector is also copied.
+ *
+ * \param treevec1  I  handle of treevector to be duplicated
+ * \param treevec2  O  receives handle to duplicate
+ *
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_TreeVector_Duplicate(CTA_TreeVector treevec1, CTA_TreeVector *treevec2 );
+
+
+/** \brief Define a tree-vector to be a concatination of other tree-vectors.
+ *
+ * \note The concatenation is done by reference (handle). The sub-tree-vectors that
+ *       are concatenated are not copied.
+ *
+ * \param treevec1  I  tree-vector that will be concatenation of the sub-tree-vectors provided in parameter treevecs
+ * \param treevecs  I  array of the sub-tree-vectors
+ * \param ntreevecs I  number of sub-tree-vectors in treevecs
+ *
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_TreeVector_Conc(CTA_TreeVector treevec1, CTA_TreeVector *treevecs, int ntreevecs);
+
+/** \brief Get the handle of a sub-tree-vectors using its tag.
+ *
+ * \note This is done by reference (handle). The handle of the
+ *       returned sub-tree-vector is not a copy
+ *
+ * \param treevec   I  Tree-vector
+ * \param tag       I  tag of the requested sub-tree-vector
+ * \param subtreevec O  receives handle of the requested sub-tree-vectors, this is by reference, not a copy
+ *
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_TreeVector_GetSubTreeVec(CTA_TreeVector treevec, const char *tag, CTA_TreeVector *subtreevec);
+
+/** \brief Get the tag of a sub-tree-vector using its index (starting with 0).
+ *
+ * \param treevec   I  Tree-vector
+ * \param index     I  index of the requested sub-tree-vector
+ * \param tag       O  String of standard length containnig the tag
+ *
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_TreeVector_GetSubTreeVecId(CTA_TreeVector treevec, int index, char tag[CTA_STRLEN_TAG]);
+
+
+/** \brief Get the handle of a first-layer sub-tree-vector using its index.
+ *
+ * \note The concatination is done by reference (handle). The handle of the
+ *       returned sub-tree-vector is not a copy
+ *
+ * \param treevec   I  Tree-vector
+ * \param index     I  index of requested sub-tree-vector. Note that the first sub-tree-vector has index 1.
+ * \param subtreevec O  receives handle of the requested sub-tree-vector, this is by reference, not a copy
+ *
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_TreeVector_GetSubTreeVecIndex(CTA_TreeVector treevec, int index,
+                                CTA_TreeVector *subtreevec);
+
+/** \brief Get number of sub-treevectors
+ *
+ * \note In case of a leaf 0 is returned
+ *
+ * \param treevec      I  Tree-vector
+ * \param numSubTrees  O  Number of sub-treevectors
+ *
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_TreeVector_GetNumSubTree(CTA_TreeVector treevec, int* numSubTrees);
+
+
+/** \brief Get the tag of the tree-vector.
+ *
+ * Note tag should be large enough to hold the result
+ *      length of CTA_STRLEN_TAG is always save (no internal protection)
+ *
+ * \param treevec   I  Tree-vector
+ * \param tag       O  receives the tag of the requested sub-tree-vector (see note)
+ *
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_TreeVector_GetTag(CTA_TreeVector treevec, char *tag);
+
+
+/** \brief Set the values of the tree-vector
+ *
+ * \note This operation is only possible when all data elements in the tree-vector
+ *       are of the same type and the size of the tree-vector corresponds to the
+ *       size of the input vector.
+ *
+ * \param treevec   IO TreeVector
+ * \param hvec      I  handle of the vector containing new values (see note)
+ *
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_TreeVector_SetVec(CTA_TreeVector treevec, CTA_Vector hvec);
+
+/** \brief Get the values of the tree-vector.
+ *
+ * \note This operation is only possible when all data elements in the tree-vector
+ *       are of the same type and the size of the tree-vector corresponds to the
+ *       vector size.
+ *
+ * \param treevec   I  Tree-vector
+ * \param hvec      O  Vector that is receiving the values; must exist before calling
+ *
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_TreeVector_GetVec(CTA_TreeVector treevec, CTA_Vector hvec);
+
+/** \brief Axpy operation between two tree-vectors.
+ *
+ * \note  Axpy: y=alpha*x+y. Add alpha times tree-vector x to
+ *              this tree-vector (y).
+ *
+ * \param y         IO Tree-vector (y)
+ * \param alpha     I  scalar
+ * \param x         I  Tree-vector (x)
+ *
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_TreeVector_Axpy(CTA_TreeVector y, double alpha, CTA_TreeVector x);
+
+/** \brief Compute dot product of two tree-vectors.
+ *
+ * \note  dotprod = sum[all i]  (treevec1_i * treevec2_i)                           
+ *
+ * \param treevec1  I  first tree-vector
+ * \param treevec2  I  second tree-vector
+ * \param dotprod   O  receives the dot product
+ *
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_TreeVector_Dot(CTA_TreeVector treevec1, CTA_TreeVector treevec2, double *dotprod);
+
+/** \brief Compute the 2-norm of a tree-vector.
+ *
+ * \param treevec1  I  Tree-vector
+ * \param nrm2      O  receives the 2-norm
+ *
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_TreeVector_Nrm2(CTA_TreeVector treevec1, double *nrm2);
+
+
+/** \brief Copy a tree-vector
+ *
+ * \note  The two tree-vectors must be compatible: same structure and datatypes.
+ *
+ * \param treevec1   I  sending tree-vector
+ * \param treevec2   O  receiving tree-vector
+ *
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_TreeVector_Copy(CTA_TreeVector treevec1, CTA_TreeVector treevec2);
+
+/** \brief Set whole tree-vector equal to a constant value.
+ *
+ * \note  This method can only be used if all elements of the tree-vector 
+ *        have the same data type.
+ *
+ * \param treevec  IO TreeVector
+ * \param val      I  value to set
+ * \param datatype I  data type of val, must be same as data type of tree-vector
+ *
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_TreeVector_SetConstant(CTA_TreeVector treevec, void *val,CTA_Datatype datatype);
+
+/** \brief Scale tree-vector.
+ *
+ * \param treevec   IO handle of tree-vector
+ * \param alpha    I  scalar
+ *
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_TreeVector_Scal(CTA_TreeVector treevec, double alpha);
+
+/** \brief Set all values of the tree-vector.
+ *
+ * \note  This method can only be used if all elements of the tree-vector 
+ *        are of the same data type.
+ *
+ * \param treevec  IO Tree-vector
+ * \param val      I  values to be set
+ * \param nval     I  number of elements in val
+ * \param datatype I  data type of *val, must be the same as data type of elements in tree-vector
+ *
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_TreeVector_SetVals(CTA_TreeVector treevec, void *val,int nval, CTA_Datatype datatype);
+
+
+/** \brief Get all values of the tree-vector.
+ *
+ * \note  This method can only be used if all elements of the tree-vector
+ *        are of the same data type.
+ *
+ * \param treevec  I  Tree-vector
+ * \param val      O  receives the values
+ * \param nval     I  number of elements in val
+ * \param datatype I  data type of *val, must be the same as data type of elements in tree-vector
+ *
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_TreeVector_GetVals(CTA_TreeVector treevec, void *val,int nval,CTA_Datatype datatype);
+
+/** \brief Set single value of the tree-vector.
+ *
+ * \param treevec  IO Tree-Vector
+ * \param i        I  index of value in tree-vector
+ * \param val      I  value to be set
+ * \param datatype I  data type of *val, must be the same as data type of element in tree-vector
+ *
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_TreeVector_SetVal(CTA_TreeVector treevec, int i, void *val, CTA_Datatype datatype);
+
+
+/** \brief Get single value of the tree-vector.
+ *
+ * \param treevec  I  Tree-vector
+ * \param i        I  index in value in tree-vector
+ * \param val      O  returned value
+ * \param datatype I  data type of *val, must be the same as data type of element in tree-vector
+ *
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_TreeVector_GetVal(CTA_TreeVector treevec, int i, void *val,CTA_Datatype datatype);
+
+/** \brief Get size of tree-vector.
+ *
+ * \param treevec  I  Tree-vector
+ * \param n        O  receives size of tree-vector
+ *
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_TreeVector_GetSize(CTA_TreeVector treevec, int *n);
+
+/** \brief Export tree-vector.
+ *
+ * Can export tree-vector to file or pack object.\n
+ * usrdata must contain a handle of the file or pack object to be used.\n
+ * Dependency: CTA_Vector_Export()
+ *
+ *
+ * \param treevec  I  Tree-vector
+ * \param usrdata  I  export properties
+ *
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_TreeVector_Export(CTA_TreeVector treevec, CTA_Handle usrdata);
+
+/** \brief Import Tree-vector.
+ *
+ * Can import tree-vector from file or pack object.\n
+ * usrdata must contain a handle of the file or pack object to be used.\n
+ * Dependency: CTA_Vector_Import()
+ *
+ *
+ * \param treevec  I  Tree-vector
+ * \param usrdata  I  import properties
+ *
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_TreeVector_Import(CTA_TreeVector treevec, CTA_Handle usrdata);
+
+/** \brief Import Tree-vector as flat vector.
+ *
+ * Can import tree-vector from netcdf file.\n
+ * usrdata must contain a handle of the file .\n
+ * Dependency: CTA_Vector_VImport()
+ *
+ *
+ * \param treevec  I  Tree-vector
+ * \param usrdata  I  import properties
+ *
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_TreeVector_VImport(CTA_TreeVector treevec, CTA_Handle usrdata);
+
+
+/** \brief Free Tree-vector.
+ *
+ * \param treevec     I  handle of tree-vector
+ * \param recursive  I  also free all sub-tree-vectors, yes: CTA_TRUE or no: CTA_FALSE
+ *
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_TreeVector_Free(CTA_TreeVector *treevec, int recursive);
+
+/** \brief Print tree-vector information.
+ *
+ * Gives following information:\n\n
+ *  Tree-vector information:\n
+ *  tag: [tag]\n
+ *  nsubtreevecs: [number of sub-tree-vectors]\n
+ *
+ * If nsubtreevecs > 0: recursively prints all sub-tree-vectors
+ * Else prints:\n
+ * leaf: yes\n
+ * tree-vector size (leaf)
+ *
+ * \param treevec     I  tree-vector
+ *
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_TreeVector_Info(CTA_TreeVector treevec);
+
+/** \brief Perform the matrix multiplication C:=alpha*op(A)*op(B)+beta*C
+    where op(X)=X, X^T. However C and A are matrices of wich the columns are 
+    tree-vectors
+ *
+ *  \param sC     IO array of tree-vector (matrix C)
+ *  \param nc     I  number of columns of C (dimension of sC)
+ *  \param transa I  transpose flag CTA_TRUE/CTA_FALSE for matrix A (not supported)
+ *  \param transb I  transpose flag CTA_TRUE/CTA_FALSE for matrix B
+ *  \param alpha  I  scalar
+ *  \param sA     I  handle of matrix A
+ *  \param na     I  number of columns of A (dimension of sA)
+ *  \param mB     I  handle of matrix B
+ *  \param beta   I  scalar
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_TreeVector_Gemm(CTA_TreeVector *sC, int nc, int transa, int transb, double alpha, CTA_TreeVector *sA, int na,
+                   CTA_Matrix mB, double beta);
+
+
+/** \brief Generate XML from one COSTA tree-vector
+*
+*  \param treevec   I  handle of a COSTA tree-vector
+*  \param writer I  the XML text writer
+*/
+CTAEXPORT void CTAI_XML_WriteTreeVec(CTA_TreeVector treevec, xmlTextWriter *writer);
+
+/** \brief Create a COSTA tree-vector from XML. 
+*
+*  \param cur_node  I  Current XML node 
+*  \return             Handle to create or CTA_NULL in case of an error.
+*/
+CTAEXPORT CTA_TreeVector CTAI_XML_CreateTreeVec(xmlNode *cur_node);
+
+
+/** \brief Perform given operation on all leafs of the treevector
+*
+*  \param treevec1 I  handle of first COSTA tree-vector
+*  \param treevec2 I  handle of second COSTA tree-vector
+*  \param treevec  I  handle of a COSTA tree-vector
+*  \param op       I  operation to perform on the leafs
+*  \param arg      I  additional argument of operation
+ * \return error status: CTA_OK if successful
+*/
+CTAEXPORT int CTA_TreeVector_OpOnLeafs(CTA_TreeVector treevec1, CTA_TreeVector treevec2, CTA_Func op, CTA_Handle arg);
+
+/** \brief Elementwise division of two vectors
+*  \note y:=y./x
+*
+*  \param y       I  handle of a COSTA tree-vector (y)
+*  \param x       I  handle of a COSTA tree-vector (y)
+ * \return error status: CTA_OK if successful
+*/
+CTAEXPORT int CTA_TreeVector_ElmDiv(CTA_TreeVector y, CTA_TreeVector x);
+
+/** \brief Elementwise multiplication of two vectors
+*  \note y:=y.*x
+*
+*  \param y       I  handle of a COSTA tree-vector (y)
+*  \param x       I  handle of a COSTA tree-vector (y)
+*  \return error status: CTA_OK if successful
+*/
+CTAEXPORT int CTA_TreeVector_ElmProd(CTA_TreeVector y, CTA_TreeVector x);
+
+/** \brief Elementwise sqare root 
+*  \note y:=sqrt(y)
+*
+*  \param y       I  handle of a COSTA tree-vector (y)
+*  \return error status: CTA_OK if successful
+*/
+CTAEXPORT int CTA_TreeVector_ElmSqrt(CTA_TreeVector y);
+
+
+/** \brief Set nocompute flag of a sub-tree vector 
+*
+*  When this flag is set, the values of the sub-treevector will
+*  be ignored in all basic vector operations (including asking the 
+*  total length of the tree-vector). This propertie is used for
+*  additionally adding some meta information
+*
+* \note the nocompute flag is set at the level of the parent!
+* so the "isolated" sub-treevector can be used in basic vector
+* operations.
+*
+*  \param x       I  handle of a COSTA tree-vector (y)
+*  \param tag     I  tag of sub-treevector 
+*  \return error status: CTA_OK if successful
+*/
+CTAEXPORT int CTA_TreeVector_SetSubTreeNocompute(CTA_TreeVector x, const char *tag);
+
+/** \brief Increase the reference count of a treevector and all subtrevectors
+ *
+ * \param treevec  I handle of a COSTA tree-vector
+*  \return error status: CTA_OK if successful
+ *
+ */
+CTAEXPORT int CTA_TreeVector_IncRefCount(CTA_TreeVector treevec);
+   
+   
+
+CTAEXPORT int CTA_TreeVector_SetMetainfo(CTA_TreeVector treevec, CTA_Metainfo minfo);
+CTAEXPORT int CTA_TreeVector_GetMetainfo(CTA_TreeVector treevec, CTA_Metainfo minfo);
+
+
+CTAEXPORT int CTAI_TreeVec_GetVecNumHandles(CTA_TreeVector treevec);
+
+CTAEXPORT int CTAI_TreeVec_List(CTA_TreeVector treevec, CTA_Vector taglist, int *indx);
+
+CTAEXPORT int CTA_TreeVector_List(CTA_TreeVector treevec, CTA_Vector taglist );
+
+CTAEXPORT int CTA_TreeVector_GetVecNumHandles(CTA_TreeVector treevec);
+
+CTAEXPORT void CTAI_Treevector_Operation_ScaledRMS(char *tag, CTA_Vector v1, 
+                             CTA_Vector vscal, CTA_Handle hdum, int *retval);
+
+CTAEXPORT void CTAI_Treevector_Operation_Amax(char *tag, CTA_Vector v1, 
+                             CTA_Vector * v2, CTA_Handle hdum, int *retval);
+
+CTAEXPORT void CTAI_Treevector_Operation_PrintEntry(char *tag, CTA_Vector v1, 
+                             CTA_Vector v2, CTA_Handle hdum, int *retval);
+
+CTAEXPORT void CTAI_Treevector_Operation_ScaledSSQ(char *tag, CTA_Vector v1, 
+                             CTA_Vector vscal, CTA_Handle hdum, int *retval);
+
+CTAEXPORT void CTAI_Treevector_Operation_MaxAbs(char *tag, CTA_Vector v1, 
+                             CTA_Vector , CTA_Handle hdum, int *retval);
+
+#ifdef __cplusplus
+}
+#endif
+#endif
+
diff --git a/costa/native/cta/include/cta_usr_matrix.h b/costa/native/cta/include/cta_usr_matrix.h
new file mode 100644
index 000000000..dd1e0a234
--- /dev/null
+++ b/costa/native/cta/include/cta_usr_matrix.h
@@ -0,0 +1,456 @@
+/*
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+/**
+\file  cta_usr_matrix.h
+
+\brief In this file a description is given of the interface of user matrix functions.
+When creating your own user matrix class use the following as template.
+
+The Usr_Matrix to which is being referred in this template can be substituted by your own user matrix.
+
+<b>Step 1</b>: for creating your own user matrix class call the function CTA_Matrix_DefineClass().
+
+Example:
+
+\code
+typedef CTA_Handle CTA_MatrixClass;
+
+CTA_Func h_func[CTA_MATRIX_NUMFUNC];
+CTA_MatrixClass my_own_matrix_class;
+
+ierr=CTA_Func_Create(" ",&usr_matrix_create_size, hintf, &h_func[CTA_MATRIX_CREATE_SIZE]);
+//...for all implementation functions...
+
+CTA_Matrix_DefineClass("classname", h_func, &my_own_matrix_class);\endcode
+
+
+
+Making a user matrix class involves the implementation of the following functions:
+
+CTA_MATRIX_CREATE_SIZE \n
+CTA_MATRIX_CREATE_INIT \n
+CTA_MATRIX_GETVALS     \n
+CTA_MATRIX_GETVAL      \n
+CTA_MATRIX_SETCOL      \n
+CTA_MATRIX_SETVALS     \n
+CTA_MATRIX_SETVAL      \n
+CTA_MATRIX_SETCONST    \n
+CTA_MATRIX_FREE        \n
+CTA_MATRIX_EXPORT      \n
+CTA_MATRIX_GER         \n
+CTA_MATRIX_INV         \n
+CTA_MATRIX_GEMV        \n
+CTA_MATRIX_GEMM        \n
+CTA_MATRIX_AXPY        \n
+CTA_MATRIX_NUMFUNC     
+
+For creating an implementation function see documentation of CTA_Func_Create().
+
+<b>Step 2</b>: to create an object of the newly defined matrix class call CTA_Matrix_Create() in the
+same way as creating a CTA_Matrix but with a different class handle, i.e. the user class handle from step 1 above.
+
+Example:
+
+\code
+Usr_Matrix usrmat; //user matrix object
+int n = 10;
+int m = 10;
+CTA_Datatype datatype = CTAI_String2Type("CTA_STRING");
+CTA_Handle userdata = CTA_NULL;
+CTA_Matrix_Create(my_own_matrix_class, m, n, datatype, &userdata, &usrmat);
+\endcode
+\n
+<b>Note 1: </b> with object data is meant only the object itself including pointer(s) to its contents, but
+not the contents of the matrix.\n\n
+<b>Note 2: </b> matrix indices start from 1, m and n for rows and columns respectively.\n\n
+*/
+
+/* parameters for different functions */
+//#define CTA_MATRIX_CREATE_SIZE ( 1)
+//#define CTA_MATRIX_CREATE_INIT ( 2)
+//#define CTA_MATRIX_GETVALS     ( 4)
+//#define CTA_MATRIX_GETVAL      ( 5)
+//#define CTA_MATRIX_SETCOL      ( 6)
+//#define CTA_MATRIX_SETVALS     ( 7)
+//#define CTA_MATRIX_SETVAL      ( 8)
+//#define CTA_MATRIX_SETCONST    ( 9)
+//#define CTA_MATRIX_FREE        (10)
+//#define CTA_MATRIX_EXPORT      (11)
+//#define CTA_MATRIX_GER         (12)
+//#define CTA_MATRIX_INV         (13)
+//#define CTA_MATRIX_GEMV        (14)
+//#define CTA_MATRIX_GEMM        (15)
+//#define CTA_MATRIX_AXPY        (16)
+//#define CTA_MATRIX_NUMFUNC     (17)
+
+
+
+
+//#define CTA_MATRIX_CREATE_SIZE ( 1)
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/** \brief Implementation that forms part of the create process.
+ * 
+ * Must give the memory size of a new user matrix object.
+ *
+ * Example: 
+ *  \code
+//in header file:
+typedef struct {
+   //your own user object data goes here...
+}USR_MATRIX;
+
+//user implementation:
+void usr_matrix_create_size(...){
+   *memsize = sizeof(USR_MATRIX);
+   *retval = CTA_OK;
+}
+ \endcode
+ * \note At index CTA_MATRIX_CREATE_SIZE in the function list of the class descriptor.
+ *
+ * \param m        I  dimension, m (rows)
+ * \param n        I  dimension, n (columns)
+ * \param datatype I  data type of matrix elements
+ * \param userdata IO user data
+ * \param retval   O  must receive return value of user implementation function
+ * \param memsize  O  must receive the number of bytes which are necessary to store one
+                      user matrix class, with a pointer to the contents (data), but without the
+                      contents themselves
+ * \return no return value
+ */
+void usr_matrix_create_size(
+   int *m,
+   int *n,
+   CTA_Datatype *datatype,
+   void *userdata,
+   int *retval,
+   int *memsize
+   );
+
+
+//#define CTA_MATRIX_CREATE_INIT ( 2)
+/** \brief Implementation that forms part of the create process.
+ *
+ * The user matrix object needs to be made ready for use.
+ *
+ * \note At index CTA_MATRIX_CREATE_INIT in the function list of the class descriptor.
+ *
+ * \param objectdata IO pointer to object data of user matrix
+ * \param m          I  dimension, m (rows)
+ * \param n          I  dimension, n (columns)
+ * \param datatype   I  data type of the matrix elements
+ * \param *userdata   IO user data
+ * \param retval     O  must receive return value of user implementation function
+ * \return no return value
+ */
+void usr_matrix_create_init(
+   Usr_Matrix *objectdata,
+   int *m,
+   int *n,
+   CTA_Datatype *datatype,
+   void *userdata,
+   int *retval
+   );
+
+
+//#define CTA_MATRIX_GETVALS     ( 4)
+/** \brief Implementation for getting all values from the user matrix.
+ *
+ * \note At index CTA_MATRIX_GETVALS in the function list of the class descriptor.
+ *
+ * \param objectdata I  pointer to object data of user matrix
+ * \param vals       O  must receive values; must exist before calling
+ * \param m          I  number of rows of *vals, must be the same as number of rows in matrix
+ * \param n          I  number of columns of *vals, must be the same as number of columns in matrix
+ * \param datatype   I  data type of value; must be the same as data type of matrix elements
+ * \param *userdata   IO user data
+ * \param retval     O  must receive return value of user implementation function
+ * \return no return value
+ */
+void usr_matrix_getvals(
+   Usr_Matrix *objectdata,
+   void *vals,
+   int *m,
+   int *n,
+   CTA_Datatype *datatype,
+   void *userdata,
+   int *retval
+   );
+
+
+//#define CTA_MATRIX_GETVAL      ( 5)
+/** \brief Implementation for getting a single value from the user matrix.
+ *
+ * \note At index CTA_MATRIX_GETVAL in the function list of the class descriptor.
+ *
+ * \param objectdata I  pointer to object data of user matrix
+ * \param val        O  must receive value; must exist before calling
+ * \param m          I  row index of matrix value to get
+ * \param n          I  column index of matrix value to get
+ * \param datatype   I  data type of val; must be the same as data type of matrix elements
+ * \param *userdata   IO user data
+ * \param retval     O  must receive return value of user implementation function
+ * \return no return value
+ */
+void usr_matrix_getval(
+   Usr_Matrix *objectdata,
+   void *val,
+   int *m,
+   int *n,
+   CTA_Datatype *datatype,
+   void *userdata,
+   int *retval
+   );
+
+
+//#define CTA_MATRIX_SETCOL      ( 6)
+/** \brief Implementation for setting column of user matrix.
+ *
+ * \note At index CTA_MATRIX_SETCOL in the function list of the class descriptor.
+ *
+ * \param objectdata IO pointer to object data of user matrix
+ * \param n          I  index of column to set
+ * \param hvec       I  handle of sending column; dimensions must be compatible
+ * \param retval     O  must receive return value of user implementation function
+ * \return no return value
+ */
+void usr_matrix_setcol(
+   Usr_Matrix *objectdata,
+   int *n,
+   CTA_Vector *hvec,
+   int *retval
+  );
+
+
+//#define CTA_MATRIX_SETVALS     ( 7)
+/** \brief Implementation for setting all values of user matrix.
+ *
+ * \note At index CTA_MATRIX_SETVALS in the function list of the class descriptor.
+ *
+ * \param objectdata IO pointer to object data of user matrix
+ * \param vals       I  values to set to
+ * \param m          I  number of rows of *vals, must be the same as number of rows in matrix
+ * \param n          I  number of columns of *vals, must be the same as number of columns in matrix
+ * \param datatype   I  data type of value; must be the same as data type of matrix elements
+ * \param *userdata   IO user data
+ * \param retval     O  must receive return value of user implementation function
+ * \return no return value
+ */
+void usr_matrix_setvals(
+   Usr_Matrix *objectdata,
+   void *vals,
+   int *m,
+   int *n,
+   CTA_Datatype *datatype,
+   void *userdata,
+   int *retval
+  );
+  
+
+//#define CTA_MATRIX_SETVAL      ( 8)
+/** \brief Implementation for setting a single value of user matrix.
+ *
+ * \note At index CTA_MATRIX_SETVAL in the function list of the class descriptor.
+ *
+ * \param objectdata IO pointer to object data of user matrix
+ * \param val        I  value to set to
+ * \param m          I  row index of matrix value to set
+ * \param n          I  column index of matrix value to set
+ * \param datatype   I  data type of val; must be the same as data type of matrix elements
+ * \param *userdata   IO user data
+ * \param retval     O  must receive return value of user implementation function
+ * \return no return value
+ */
+void usr_matrix_setval(
+   Usr_Matrix *objectdata,
+   void *val,
+   int *m,
+   int *n,
+   CTA_Datatype *datatype,
+   void *userdata,
+   int *retval
+  );
+
+//#define CTA_MATRIX_SETCONST    ( 9)
+/** \brief Implementation for setting all user matrix elements to constant value.
+ *
+ * \note At index CTA_MATRIX_SETCONST in the function list of the class descriptor.
+ *
+ * \param objectdata IO pointer to object data of user matrix
+ * \param val        I  constant value to set to
+ * \param datatype   I  data type of val; must be the same as data type of matrix elements
+ * \param *userdata   IO user data
+ * \param retval     O  must receive return value of user implementation function
+ * \return no return value
+ */
+void usr_matrix_setconst(
+   Usr_Matrix *objectdata,
+   void *val,
+   CTA_Datatype *datatype,
+   void *userdata,
+   int *retval
+  );
+
+//#define CTA_MATRIX_FREE        (10)
+/** \brief Implementation for freeing the object data and associated resources.
+ *
+ * \note At index CTA_MATRIX_FREE in the function list of the class descriptor.
+ *
+ * \param objectdata IO pointer to object data of user matrix
+ * \param *userdata   IO user data
+ * \param retval     O  must receive return value of user implementation function
+ * \return no return value
+ */
+void usr_matrix_free(
+   Usr_Matrix *objectdata,
+   CTA_Handle *userdata,
+   int *retval
+   );
+
+//#define CTA_MATRIX_EXPORT      (11)
+/** \brief Implementation for exporting user matrix.
+ *
+ * \note At index CTA_MATRIX_EXPORT in the function list of the class descriptor.
+ *
+ * \param objectdata I  pointer to object data of user matrix
+ * \param *userdata   IO user data
+ * \param retval     O  must receive return value of user implementation function
+ * \return no return value
+ */
+void usr_matrix_export(
+         Usr_Matrix *objectdata,
+         CTA_Handle *userdata,
+         int *retval);
+         
+
+//#define CTA_MATRIX_GER         (12)
+/** \brief Implementation for applying the BLAS operation GER: A=A+(alpha)x(y(T))
+ * 
+ * i.e. for matrix A, vectors x and y and scalar alpha
+ *
+ * \note At index CTA_MATRIX_GER in the function list of the class descriptor.
+ *
+ * \param A           IO pointer to object data of user matrix A; must exist before calling
+ * \param alpha       I  scalar
+ * \param vx          I  handle of vector x
+ * \param vy          I  handle of vector y
+ * \param retval      O  must receive return value of user implementation function
+ * \return no return value
+ */
+void usr_matrix_ger(
+         Usr_Matrix *A,
+         double *alpha,
+         CTA_Vector *vx,
+         CTA_Vector *vy,
+         int *retval);
+
+//#define         (13)
+/** \brief Implementation for inverting user matrix.
+ *
+ * \note At index CTA_MATRIX_INV in the function list of the class descriptor.
+ *
+ * \param A        IO pointer to object data of user matrix
+ * \param retval   O  must receive return value of user implementation function
+ * \return no return value
+ */
+void usr_matrix_inv(
+   Usr_Matrix *A,
+   int *retval
+   );
+
+//#define CTA_MATRIX_GEMV        (14)
+/** \brief Implementation for applying the BLAS operation GEMV: A=(alpha)Ax+(beta)y
+ *
+ * i.e. for matrix A, vectors x (optionally transposed) and y and scalars alpha and beta
+ *
+ * \note At index CTA_MATRIX_GEMV in the function list of the class descriptor.
+ *
+ * \param A        IO pointer to object data of user matrix
+ * \param trans    I  transpose flag CTA_TRUE/CTA_FALSE for matrix A
+ * \param alpha    I  scalar
+ * \param vx       I  handle of vector x
+ * \param beta     I  scalar
+ * \param vy       I  handle of vector y
+ * \param retval   O  must receive return value of user implementation function
+ * \return no return value
+ */
+void usr_matrix_gemv(
+         Usr_Matrix *A,
+         int *trans,
+         double *alpha,
+         CTA_Vector *vx,
+         double *beta,
+         CTA_Vector *vy,
+         int *retval);
+
+//#define CTA_MATRIX_GEMM        (15)
+/** \brief Implementation for applying the BLAS operation GEMM: C=(alpha)AB+(beta)C
+ *
+ * i.e. for matrices A,B (both optionally transposed) and C, scalars alpha and beta
+ *
+ * \note At index CTA_MATRIX_GEMM in the function list of the class descriptor.
+ *
+ * \param C        IO pointer to object data of user matrix C
+ * \param transa   I  transpose flag for matrix A (CTA_TRUE for transposing or CTA_FALSE otherwise)
+ * \param transb   I  transpose flag for matrix B (CTA_TRUE for transposing or CTA_FALSE otherwise)
+ * \param alpha    I  scalar
+ * \param A        I  pointer to object data of user matrix A
+ * \param B        I  pointer to object data of user matrix B
+ * \param beta     I  scalar
+ * \param retval   O  must receive return value of user implementation function
+ * \return no return value
+ */
+void usr_matrix_gemm(
+         Usr_Matrix *C,
+         int *transa,
+         int *transb,
+         double *alpha,
+         Usr_Matrix *A,
+         Usr_Matrix *B,
+         double *beta,
+         int *retval);
+
+//#define CTA_MATRIX_AXPY        (16)
+/** \brief Implementation for applying the BLAS operation AXPY: Y=Y+(alpha)X
+ *
+ * i.e. for matrices X and Y, scalar alpha
+ *
+ * \note At index CTA_MATRIX_AXPY in the function list of the class descriptor.
+ *
+ * \param y        IO pointer to object data of user matrix y
+ * \param alpha    I  scalar
+ * \param x        I  pointer to object data of user matrix x
+ * \param retval   O  must receive return value of user implementation function
+ * \return no return value
+ */
+void usr_matrix_axpy(
+   Usr_Matrix *y,
+   double *alpha,
+   Usr_Matrix *x,
+   int *retval
+  );
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/costa/native/cta/include/cta_usr_method.h b/costa/native/cta/include/cta_usr_method.h
new file mode 100644
index 000000000..19ea485a3
--- /dev/null
+++ b/costa/native/cta/include/cta_usr_method.h
@@ -0,0 +1,151 @@
+/*
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+/**
+\file  cta_usr_method.h
+
+\brief In this file a description is given of the interface of user method functions.
+When creating your own user method class use the following as template.
+
+The Usr_Meth to which is being referred in this template can be substituted by your own user method object.
+
+<b>Step 1</b>: for creating your own user method class call the function CTA_Meth_DefineClass().
+
+Example:
+
+\code
+typedef CTA_Handle CTA_MethClass;
+
+CTA_Func h_func[CTA_METH_NUMFUNC];
+CTA_MethClass my_own_meth_class;
+
+ierr=CTA_Func_Create(" ",&usr_meth_create_size, hintf, &h_func[CTA_METH_CREATE_SIZE]);
+//...for all implementation functions...
+
+CTA_Meth_DefineClass("classname", h_func, &my_own_meth_class);\endcode
+
+Making a user method class involves the implementation of the following functions:
+
+CTA_METH_CREATE_SIZE  \n
+CTA_METH_CREATE_INIT  \n
+CTA_METH_RUN          \n
+CTA_METH_FREE         \n
+
+For creating an implementation function see documentation of CTA_Func_Create().
+
+<b>Step 2</b>: to create an object of the newly defined method class call CTA_Meth_Create() in the
+same way as creating a CTA_Meth object but with a different class handle, i.e. the user class handle from step 1 above.
+
+Example:
+
+\code
+Usr_Meth usrmeth; //user method object
+CTA_Handle userdata = CTA_NULL;
+CTA_Meth_Create(my_own_meth_class, &userdata, &usrmeth);
+\endcode
+\n
+<b>Note 1: </b> with object data is meant only the object itself including pointer(s) to its contents, but
+not the contents themselves.\n\n
+*/
+
+
+//#define CTA_METH_CREATE_SIZE  ( 1)
+/** \brief Implementation that forms part of the create process.
+ * 
+ * Must give the memory size of a new method user object.
+ *
+ * Example:
+ *  \code
+//in header file:
+typedef struct {
+   //your own user object data goes here...
+}USR_METH;
+
+//user implementation:
+void usr_meth_create_size(...){
+   *memsize = sizeof(USR_METH);
+   *retval = CTA_OK;
+}
+ \endcode
+ *
+ * \note At index CTA_METH_CREATE_SIZE in the function list of the class descriptor.
+ *
+ * \param memsize  O must receive the number of bytes which are necessary to store one
+                     user method class, with a pointer to the contents (data), but without the
+                     contents themselves
+ * \param retval   O must receive return value of user implementation function
+ * \return no return value
+ */
+void usr_meth_create_size(int *memsize, int *retval);
+
+
+
+//#define CTA_METH_CREATE_INIT  ( 2)
+/** \brief Implementation that forms part of the create process.
+ *
+ * The user method object needs to be made ready for use.
+ *
+ * \note At index CTA_METH_CREATE_INIT in the function list of the class descriptor.
+ *
+ * \param objectdata I  pointer to user method object data
+ * \param userdata   IO user data
+ * \param retval     O  must receive return value of user implementation function
+ * \return no return value
+ */
+void usr_meth_create_init(Usr_Meth *objectdata, void *userdata, int *retval);
+
+
+
+//#define CTA_METH_RUN          ( 3)
+/** \brief Implementation for running user method.
+ *
+ * \note At index CTA_METH_RUN in the function list of the class descriptor.
+ *
+ * \param objectdata I pointer to user method object data
+ * \param retval     O must receive return value of user implementation function
+ * \return no return value
+ */
+void usr_meth_run(Usr_Meth *objectdata, int *retval);
+
+
+
+//#define CTA_METH_FREE         ( 4)
+/** \brief Implementation for freeing the object data and associated resources.
+ *
+ * \note At index CTA_METH_FREE in the function list of the class descriptor.
+ *
+ * \param objectdata I pointer to user method object data
+ * \param retval     O must receive return value of user implementation function
+ * \return no return value
+ */
+void usr_meth_free(Usr_Meth *objectdata, int *retval);
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/costa/native/cta/include/cta_usr_model.h b/costa/native/cta/include/cta_usr_model.h
new file mode 100644
index 000000000..0b53a6666
--- /dev/null
+++ b/costa/native/cta/include/cta_usr_model.h
@@ -0,0 +1,437 @@
+/*
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+/**
+\file  cta_usr_model.h
+
+\brief In this file a description is given of the interface of user model functions.
+When creating your own user model class use the following as template.
+
+The Usr_Model to which is being referred in this template can be substituted by your own user model object.
+
+<b>Step 1</b>: for creating your own user model class call the function CTA_Model_DefineClass().
+
+Example:
+
+\code
+typedef CTA_Handle CTA_ModelClass;
+
+CTA_Func h_func[CTA_MODEL_NUMFUNC];
+CTA_ModelClass my_own_model_class;
+
+ierr=CTA_Func_Create(" ",&usr_model_create_size, hintf, &h_func[I_CTA_MODEL_CREATE_SIZE]);
+//...for all implementation functions...
+
+CTA_Model_DefineClass("classname", h_func, &my_own_model_class);\endcode
+
+Making a user method class involves the implementation of the following functions:
+
+CTA_MODEL_CREATE_SIZE  \n
+CTA_MODEL_CREATE_INIT  \n
+CTA_MODEL_FREE         \n
+CTA_MODEL_COMPUTE      \n
+CTA_MODEL_SET_STATE    \n
+CTA_MODEL_GET_STATE    \n
+CTA_MODEL_AXPY_STATE   \n
+CTA_MODEL_AXPY_MODEL   \n
+CTA_MODEL_SET_FORC     \n
+CTA_MODEL_GET_FORC     \n
+CTA_MODEL_AXPY_FORC    \n
+CTA_MODEL_SET_PARAM    \n
+CTA_MODEL_GET_PARAM    \n
+CTA_MODEL_AXPY_PARAM   \n
+CTA_MODEL_IMPORT       \n
+CTA_MODEL_EXPORT       \n
+CTA_MODEL_GET_STATESCALING
+
+For creating an implementation function see documentation of CTA_Func_Create().
+
+<b>Step 2</b>: to create an object of the newly defined model class call CTA_Model_Create() in the
+same way as creating a CTA_Model object but with a different class handle, i.e. the user class handle from step 1 above.
+
+Example:
+
+\code
+Usr_Model usrmodel; //user model object
+CTA_Handle userdata = CTA_NULL;
+CTA_Model_Create(my_own_model_class, &userdata, &usrmodel);
+\endcode
+\n
+<b>Note 1: </b> with object data is meant only the object itself including pointer(s) to its contents, but
+not the contents themselves.\n\n
+*/
+
+
+//#define CTA_MODEL_CREATE_SIZE 
+/** \brief Implementation that forms part of the create process. 
+ * 
+ * Must give the memory size of a new user model object.
+ *
+ *  Example: 
+ *  \code
+//in header file:
+typedef struct {
+   //your own user object data goes here...
+}USR_MODEL;
+
+//user implementation:
+void usr_model_create_size(...){
+   *memsize = sizeof(USR_MODEL);
+   *retval = CTA_OK;
+}
+ \endcode
+ *
+ * \note At index I_CTA_MODEL_CREATE_SIZE in the function list of the class descriptor.
+ *
+ * \param userdata IO pointer to user data
+ * \param memsize  O  must receive the number of bytes which are necessary to store one
+                      user method class, with a pointer to the contents (data), but without the
+                      contents themselves
+ * \param retval   O  must receive return value of user implementation function
+ * \return no return value
+ */
+void usr_model_create_size(void *userdata, int *memsize, int *retval);
+
+
+//#define CTA_MODEL_CREATE_INIT
+/** \brief Implementation that forms part of the create process.
+ *
+ * The user model object needs to be made ready for use.
+ *
+ * \note At index CTA_MODEL_CREATE_INIT in the function list of the class descriptor.
+ *
+ * \param hmodel      I  handle of own model instance
+ * \param objectdata  IO pointer to object data
+ * \param userdata    IO pointer to user data
+ * \param retval      O  must receive return value of user implementation function
+ * \return no return value
+ */
+void usr_model_create_init(CTA_Model *hmodel, Usr_Model *objectdata, void *userdata, int *retval);
+
+
+//#define CTA_MODEL_FREE
+/** \brief Implementation for freeing the object data and associated resources.
+ *
+ * \note At index CTA_MODEL_FREE in the function list of the class descriptor.
+ *
+ * \param objectdata    IO pointer to object data
+ * \param retval        O  must receive return value of user implementation function
+ * \return no return value
+ */
+void usr_model_free(Usr_Model *objectdata, int *retval);
+
+
+//#define CTA_MODEL_COMPUTE
+/** \brief Implementation for computing a simulation of user model. A simulation is computed at the given time span.
+ *
+ * 
+ *
+ * \note At index CTA_MODEL_COMPUTE in the function list of the class descriptor.
+ *
+ * \param objectdata  IO pointer to object data
+ * \param htime       I  handle of time object describing time span of simulation
+ * \param retval      O  must receive return value of user implementation function
+ * \return no return value
+ */
+void usr_model_compute(Usr_Model *objectdata, CTA_Time *htime, int *retval);
+
+
+//#define CTA_MODEL_SET_STATE
+/** \brief Implementation for setting model state.
+ *
+ * \note At index I_CTA_MODEL_SET_STATE in the function list of the class descriptor.
+ *
+ * \param objectdata IO pointer to object data
+ * \param hstate     I  handle of tree-vector describing new model state
+ * \param retval     O  must receive return value of user implementation function
+ * \return no return value
+ */
+void usr_model_set_state(Usr_Model *objectdata, CTA_TreeVector *hstate, int *retval);
+
+
+//#define CTA_MODEL_GET_STATE
+/** \brief Implementation for getting a model state.
+ *
+ * \note At index I_CTA_MODEL_GET_STATE in the function list of the class descriptor.
+ *
+ * \param objectdata  I  pointer to object data
+ * \param hstate      O  handle of tree-vector (if CTA_NULL a new object must be created and caller is responsible for freeing) that must receive model state description
+ * \param retval      O  must receive return value of user implementation function
+ * \return no return value
+ */
+void usr_model_get_state(Usr_Model *objectdata, CTA_Handle *hstate, int *retval);
+
+
+//#define CTA_MODEL_AXPY_STATE
+/** \brief Implementation for: modelstate=alpha*state+modelstate
+ *
+ * \note At index CTA_MODEL_AXPY_STATE in the function list of the class descriptor.
+ *
+ * \note If not implemented, a default implementation will be used.
+ * \param objectdatay  IO  pointer to object data of model y
+ * \param alpha        I  scalar
+ * \param hstatex      I  handle of tree-vector  x
+ * \param retval       O  must receive return value of user implementation function
+ * \return no return value
+ */
+void usr_model_axpy_state(Usr_Model *objectdatay, double *alpha, CTA_TreeVector *hstatex, int *retval);
+
+
+//#define CTA_MODEL_AXPY_MODEL
+/** \brief Implementation for: modelstatey=alpha*modelstatex+modelstatey
+ *
+ * \note At index CTA_MODEL_AXPY_MODEL in the function list of the class descriptor.
+ *
+ * \note If not implemented, a default implementation will be used.
+ * \param alpha       I  scalar
+ * \param objectdatax IO pointer to object data of model x
+ * \param objectdatay I  pointer to object data of model y
+ * \param retval      O  must receive return value of user implementation function
+ * \return no return value
+ */
+void usr_model_axpy_model(Usr_Model *objectdatay, double *alpha, Usr_Model *objectdatax, int *retval);
+
+
+//#define CTA_MODEL_SET_FORC
+/** \brief Implementation for setting the forcing values of the user model
+ *
+ * \note At index CTA_MODEL_SET_FORC in the function list of the class descriptor.
+ *
+ * \param objectdata IO pointer to object data
+ * \param tspan      I  time span on which to set the forcing values
+ * \param forc       I  handle of tree-vector object with new forcings
+ * \param retval     O  must receive return value of user implementation function
+ * \return no return value
+ */
+void usr_model_set_forc(Usr_Model *objectdata, CTA_Time *tspan, CTA_Handle *forc, int *retval);
+
+
+//#define CTA_MODEL_GET_FORC
+/** \brief Implementation for getting the forcing values.
+ *
+ * \note At index CTA_MODEL_GET_FORC in the function list of the class descriptor.
+ * \note If forc equals CTA_NULL a new object is created, the caller is responsible for freeing it after usage
+ *
+ * \param objectdata I  pointer to object data
+ * \param alpha      I  scalar
+ * \param forc       I  handle of tree-vector object that must receive forcing values (if equal to CTA_NULL, it must be created and caller is responsible for freeing)
+ * \param retval     O  must receive return value of user implementation function
+ * \return no return value
+ */
+void usr_model_get_forc(Usr_Model *objectdata, double *alpha, CTA_TreeVector *forc, int *retval);
+
+
+//#define CTA_MODEL_AXPY_FORC
+/** \brief Implementation for performing axpy operation on the models forcings.
+ *
+ * \note At index CTA_MODEL_AXPY_FORC in the function list of the class descriptor.
+ *
+ * \note AXPY: y=alpha*x+y. y corresponds to the models
+ *       internal forcings.
+ *       The adjustment to the forcings (alpha*x) is only valid for the given 
+ *       time span. Note that the model will use y(t)+x for the given time span
+ *       where y(t) denotes the default forcings of the model.
+ *
+ * \param objectdata IO pointer to object data
+ * \param tspan      I  handle of time span object
+ * \param alpha      I  scalar
+ * \param forc       I  handle of forcings x
+ * \param retval     O  must receive return value of user implementation function
+ * \return no return value
+ */
+void usr_model_axpy_forc(Usr_Model *objectdata, CTA_Time *tspan, int *alpha, CTA_TreeVector *forc, int *retval);
+
+
+//#define CTA_MODEL_SET_PARAM
+/** \brief Implementation for setting model parameters.
+ *
+ * \note At index CTA_MODEL_SET_PARAM in the function list of the class descriptor.
+ *
+ * \param objectdata I  pointer to object data
+ * \param hparam    I  handle of parameter object
+ * \param retval   O  must receive return value of user implementation function
+ * \return no return value
+ */
+void usr_model_set_param(Usr_Model *objectdata, CTA_TreeVector *hparam, int *retval);
+
+
+//#define CTA_MODEL_GET_PARAM
+/** \brief Implementation for getting model parameters.
+ *
+ * \note At index CTA_MODEL_GET_PARAM in the function list of the class descriptor.
+ *
+ * \param objectdata I  pointer to object data
+ * \param hparam     O  handle of object (if equal to CTA_NULL it must be created and caller is responsible for freeing) that must receive model parameters
+ * \param retval     O  must receive return value of user implementation function
+ * \return no return value
+ */
+void usr_model_get_param(Usr_Model *objectdata, CTA_TreeVector *hparam, int *retval);
+
+
+//#define CTA_MODEL_AXPY_PARAM
+/** \brief Implementation for performing axpy operation on the models parameters.
+ *
+ * \note At index CTA_MODEL_AXPY_PARAM in the function list of the class descriptor.
+ *
+ * \note AXPY: y=alpha*x+y where y corresponds to the models
+ *       internal parameters.
+ *
+ * \param objectdata IO  pointer to object data
+ * \param alpha     I  scalar
+ * \param hparamx   I  handle of parameter x that is added to parameters of the model
+ * \param retval    O  must receive return value of user implementation function
+ * \return no return value
+ */
+void usr_model_axpy_param(Usr_Model *objectdata, double *alpha, CTA_TreeVector *hparamx, int *retval);
+
+//#define CTA_MODEL_GET_NOISE_COUNT
+/** \brief Implementation for returning the number of noise parameters:
+ *         the number of columns of the noise covariance matrix.
+ *
+ * \note ONLY for Stochastic models. 
+ *
+ * \param objectdata IO  pointer to object data
+ * \param nnoise      O  receives number of noise parameters
+ * \param retval      O  error status: CTA_OK if successful
+ */
+void usr_model_get_noisecount(Usr_Model *objectdata, int *nnoise, int *retval);
+
+//#define CTA_MODEL_GET_NOISE_COVAR
+/** \brief Implementation of returning covariance matrix of noise parameters.
+ *
+ * \note ONLY for Stochastic models.
+ *       The covariance matrix is represented by an array
+ *       of tree-vectors (columns of the matrix)
+ *       optionally a tree-vector is created in that case the caller of this
+ *       method is responsible for freeing that tree-vector. The input tree-vector
+ *       must be compatible (same size and or composition) as the models
+ *       internal tree-vector.
+ * \note If hstmat[icol] == CTA_NULL a new object is created, user is responsible for freeing this object.
+ *
+ * \param objectdata IO  pointer to object data
+ * \param hstmat      O  receives array of tree-vectors, *hstmat can equal CTA_NULL on calling (see note)
+ * \param retval      O  error status: CTA_OK if successful
+ */
+void usr_model_get_noisecovar(Usr_Model *objectdata, CTA_TreeVector *hstmat, int *retval);
+
+//#define CTA_MODEL_GET_OBSVALUES
+/** \brief Implementations to get (interpolate) the models internal state to the
+ *   observations described as specified in the observation
+ *   description component.
+ *
+ * \note The interface supports a the time instance for time-interpolation.
+ *       It depends on the model whether and how this is supported.
+ *
+ * \param objectdata IO  pointer to object data
+ * \param htime      I   time instance (for checking and time-interpolation if
+ *                       supported by model)
+ * \param hdescr     I   observation description component
+ * \param values     O   receives values of the models internal state corresponding to
+ *                       observations as described in hdescr
+ * \param retval     O   error status: CTA_OK if successful
+ */
+void usr_model_getobsvalues(Usr_Model *objectdata, CTA_Time *htime, CTA_ObsDescr *hdescr, CTA_Vector *values, int *retval);
+
+//#define CTA_MODEL_GET_OBSSELECT
+/** \brief Implements to get a query for the stochastic observer in order to
+ *  filter out the observations that can actually be provided by the model.
+ *
+ * \param objectdata IO  pointer to object data
+ * \param htime    I  time instance
+ * \param hdescr   I  observation description component
+ * \param sselect  O  receives a query to filter out the observations, must exist before calling
+ * \param retval     O   error status: CTA_OK if successful
+ */
+void usr_model_getobsselect(Usr_Model *objectdata, CTA_Time *htime, CTA_ObsDescr *hdescr, CTA_String *sselect, int *retval);
+
+//#define CTA_MODEL_ADD_NOISE
+/** \brief Add noise during during the given timespan at 
+ *        the Compute
+ *
+ * \note Noise is added in the compute-method
+ *
+ * \param objectdata IO  pointer to object data
+ * \param htime      I  timespan for which to compute adding noise
+ * \param retval     O   error status: CTA_OK if successful
+ */
+void usr_model_addnoise(Usr_Model *objectdata, CTA_Time *htime, int *retval);
+
+//#define CTA_MODEL_EXPORT
+/** \brief Implements the export the whole internal state of a model
+ *  This export function will export the whole state of the model such that
+ *  a so called "restart" start from this point yielding the same results.
+ *  There are no ruled on the format that is used to store the data.
+ *  Various extra otions are valid but a model will in most cases support an export 
+ *  to a file and to a COSTA pack object.
+ *  
+ *
+ * \param objectdata IO  pointer to object data
+ * \param hexport    I   target for export e.g. CTA_File or CTA_Pack
+ * \param retval     O   error status: CTA_OK if successful
+ */
+void usr_model_export(Usr_Model *objectdata, CTA_Time *hexport, int *retval);
+
+//#define CTA_MODEL_IMPORT
+/** \brief Implements the import the whole internal state of a model
+ *  After the inport the models internal state is exactly the same as the point that
+ *  the export was created using CTA_Model_Export.
+ *  
+ *
+ * \param objectdata IO  pointer to object data
+ * \param himport  I  handle with data created by CTA_MODEL_Export e.g. CTA_File or CTA_Pack
+ * \param retval     O   error status: CTA_OK if successful
+ */
+void usr_model_import(Usr_Model *objectdata, CTA_Time *himport, int *retval);
+
+
+//#define cta_model_get_statescaling
+/** \brief implements the return of an element wise scaling vector for the
+ *         model state (method cta_model_getstatescaling)
+ *  
+ *
+ * \param objectdata io  pointer to object data
+ * \param hscale     o   tree-vector containing scaling vector. is created
+ *                       when hscale==cta_null on input
+ * \param retval     o   error status: cta_ok if successful
+ */
+void usr_model_getstatescaling(Usr_Model *objectdata, CTA_TreeVector *hscale, int *retval);
+
+//#define CTA_MODEL_GETCURRENTTIME
+/** \brief implements the return of the current time of the model instance
+ *         (method CTA_Model_GetCurrentTime)
+ *  
+ *
+ * \param objectdata io  pointer to object data
+ * \param tCurrent    o  Current time of the model instance
+ * \param retval     o   error status: cta_ok if successful
+ */
+void usr_model_getcurrenttime(Usr_Model *objectdata, int* tCurrent, int* retval);
+
+//#define CTA_MODEL_GETTIMEHORIZON
+/** \brief implements the return of the time horizon of the model instance
+ *         (method CTA_Model_GetTimeHorizon)
+ *  
+ *
+ * \param objectdata io  pointer to object data
+ * \param tCurrent    o  Time horizon of the model instance
+ * \param retval      o   error status: cta_ok if successful
+ */
+void usr_model_gettimehorizon(Usr_Model *objectdata, int* tHorizon, int* retval);
+
+
diff --git a/costa/native/cta/include/cta_usr_obs_desc.h b/costa/native/cta/include/cta_usr_obs_desc.h
new file mode 100644
index 000000000..a6a5adab2
--- /dev/null
+++ b/costa/native/cta/include/cta_usr_obs_desc.h
@@ -0,0 +1,220 @@
+/*
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+/**
+\file  cta_usr_obs_desc.h
+
+\brief In this file a description is given of the interface of user observation descriptor functions.
+When creating your own user observation descriptor class use the following as template.
+
+The CTA_Usr_ObsDescr to which is being referred in this template can be substituted by your own user observation descriptor object.
+
+<b>Step 1</b>: for creating your own user model class call the function CTA_Model_DefineClass().
+
+Example:
+
+\code
+typedef CTA_Handle CTA_ObsDescrClass;
+
+CTA_Func h_func[CTA_OBSDESCR_NUMFUNC];
+CTA_ObsDescrClass my_own_obsdescr_class;
+
+ierr=CTA_Func_Create(" ",&usr_obsdescr_create_size, hintf, &h_func[CTA_OBSDESCR_CREATE_SIZE]);
+//...for all implementation functions...
+
+CTA_ObsDescr_DefineClass("classname", h_func, &my_own_obsdescr_class);\endcode
+
+Making a new observation descriptor class involves the implementation of the following functions:
+
+CTA_OBSDESCR_CREATE_SIZE         \n
+CTA_OBSDESCR_CREATE_INIT         \n
+CTA_OBSDESCR_FREE                \n
+CTA_OBSDESCR_GET_PROPERTIES      \n
+CTA_OBSDESCR_GET_KEYS            \n
+CTA_OBSDESCR_COUNT_OBSERVATIONS  \n
+CTA_OBSDESCR_COUNT_PROPERTIES    \n
+CTA_OBSDESCR_EXPORT
+
+For creating an implementation function see documentation of CTA_Func_Create().
+
+<b>Step 2</b>: to create an object of the newly defined observation descriptor class call CTA_ObsDescr_Create() in the
+same way as creating a CTA_ObsDescr object but with a different class handle, i.e. the user class handle from step 1 above.
+
+Example:
+
+\code
+Usr_ObsDescr usrobsdescr; //user observation descriptor object
+CTA_Handle userdata = CTA_NULL;
+CTA_ObsDescr_Create(my_own_obsdescr_class, &userdata, &usrobsdescr);
+\endcode
+\n
+<b>Note 1: </b> with object data is meant only the object itself including pointer(s) to its contents, but
+not the contents of the observation descriptor.\n\n
+*/
+
+
+
+//#define CTA_OBSDESCR_CREATE_SIZE         ( 1)
+/** \brief Implementation that forms part of the create process.
+ * 
+ * Must give the memory size of a new object.
+ *
+ * Example:
+ *  \code
+//in header file:
+typedef struct {
+   //your own user object data goes here...
+}USR_OBSDESCR;
+
+//user implementation:
+void usr_obsdescr_create_size(...){
+   *memsize = sizeof(USR_OBSDESCR);
+   *retval = CTA_OK;
+}
+ \endcode
+ *
+ * \note At index CTA_OBSDESCR_CREATE_SIZE in the function list of the class descriptor.
+ *
+ * \param memsize  O  must receive the number of bytes which are necessary to store one
+                      user stochastic observer class, with a pointer to the contents (data), but without the
+                      contents themselves
+ * \param retval   O  must receive return value of user implementation function
+
+ * \return no return value
+ */
+void usr_obsdescr_create_size(int *memsize, int *retval);
+
+
+//#define CTA_OBSDESCR_CREATE_INIT         ( 2)
+/** \brief Implementation that forms part of the create process.
+ *
+ * The user observation descriptor object needs to be made ready for use.
+ *
+ * \note At index CTA_OBSDESCR_CREATE_INIT in the function list of the class descriptor.
+ *
+ * \param myhandle        I  Handle assigned by COSTA
+ * \param objectdata I  pointer to object data of user stochastic observer
+ * \param userdata   IO user data
+ * \param retval     O  must receive return value of user implementation function
+ * \return no return value
+ */
+void usr_obsdescr_create_init(CTA_ObsDescr myhandle, Usr_ObsDescr *objectdata,
+                              void *userdata, int *retval);
+
+
+//#define CTA_OBSDESCR_FREE                ( 3)
+/** \brief Implementation for freeing the object data and associated resources.
+ *
+ * \note At index CTA_OBSDESCR_FREE in the function list of the class descriptor.
+ *
+ * \param objectdata I  pointer to object data of user stochastic observer
+ * \param retval     O  must receive return value of user implementation function
+ * \return no return value
+ */
+void usr_obsdescr_free(Usr_ObsDescr *objectdata, int *retval);
+
+
+//#define CTA_OBSDESCR_GET_PROPERTIES      ( 4)
+/** \brief Implementation for gettings properties associated with given key.
+ *
+ * \note At index CTA_OBSDESCR_GET_PROPERTIES in the function list of the class descriptor.
+ *
+ * \param objectdata I  pointer to object data of user stochastic observer
+ * \param key        I  description of property key
+ * \param properties O  vector that must receive properties associated with given key; must exist before calling
+ * \param datatype   I  data type of elements in properties vector
+ * \param retval     O  must receive return value of user implementation function
+ * \return no return value
+ */
+void usr_obsdescr_get_properties(Usr_ObsDescr *objectdata,
+          const char *key, CTA_Vector *properties, CTA_Datatype *datatype, int *retval);
+
+
+//#define CTA_OBSDESCR_GET_KEYS            ( 5)
+/** \brief Implementation for getting all key names of user observation descriptor.
+ *
+ * \note At index CTA_OBSDESCR_GET_KEYS in the function list of the class descriptor.
+ *
+ * \param objectdata I  pointer to object data of user stochastic observer
+ * \param keys       O  handle vector that must receive key descriptions; must exist before calling
+ * \param retval     O  must receive return value of user implementation function
+ * \return no return value
+ */
+void usr_obsdescr_get_keys(Usr_ObsDescr *objectdata, CTA_Vector *keys, int *retval);
+
+
+//#define CTA_OBSDESCR_COUNT_OBSERVATIONS  ( 6)
+/** \brief Implementation for counting number of observations.
+ *
+ * \note At index CTA_OBSDESCR_COUNT_OBSERVATIONS in the function list of the class descriptor.
+ *
+ * \param objectdata I  pointer to object data of user stochastic observer
+ * \param nobs       O  must receive number of observations
+ * \param retval     O  must receive return value of user implementation function
+ * \return no return value
+ */
+void usr_obsdescr_count_observations(Usr_ObsDescr *objectdata, int* nobs, int *retval);
+
+
+//#define CTA_OBSDESCR_COUNT_PROPERTIES    ( 7)
+/** \brief Implementation for counting number of properties.
+ *
+ * \note At index CTA_OBSDESCR_COUNT_PROPERTIES in the function list of the class descriptor.
+ *
+ * \param objectdata I  pointer to object data of user stochastic observer
+ * \param nkeys      O  must receive number of property keys
+ * \param retval     O  must receive return value of user implementation function
+ * \return no return value
+ */
+void usr_obsdescr_count_properties(Usr_ObsDescr *objectdata, int* nkeys, int *retval);
+
+
+//#define CTA_OBSDESCR_EXPORT              ( 8)
+/** \brief Implementation for exporting user observation descriptor.
+ *
+ * \note At index CTA_OBSDESCR_EXPORT in the function list of the class descriptor.
+ *
+ * \param objectdata I  pointer to object data of user stochastic observer
+ * \param userdata   IO user data
+ * \param retval     O  must receive return value of user implementation function
+ * \return no return value
+ */
+void usr_obsdescr_export(Usr_ObsDescr *objectdata, void *userdata, int *retval);
+
+
+//#define CTA_OBSDESCR_SELECTION           ( 9)
+/** \brief Create a new observation description that is subset of existing
+ *         observation description.
+ *
+ * \param objectdata      I  object data of observation description of the
+ *                           initial observation description insntance
+ * \param selection       I  selection criterion (subset of SQL)
+ * \param reltab          O  Relation table specifying the relation between
+ *                           the original and new observation description
+ *                           component. Note no relation table is created when 
+ *                           reltab==CTA_NULL on enty
+ * \param objectdata_out  O  new observation description created subset
+ * \param retval          O  receives return value
+ */
+void usr_obsdescr_createsel(Usr_ObsDescr *descr,
+          CTA_String *selection, CTA_RelTable reltab, 
+          CTA_ObsDescr myhandle_out,
+          Usr_ObsDescr *descrout, int *retval);
+
+
diff --git a/costa/native/cta/include/cta_usr_stoch_observer.h b/costa/native/cta/include/cta_usr_stoch_observer.h
new file mode 100644
index 000000000..cca48d63b
--- /dev/null
+++ b/costa/native/cta/include/cta_usr_stoch_observer.h
@@ -0,0 +1,294 @@
+/*
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+/**
+\file  cta_usr_stoch_observer.h
+
+\brief In this file a description is given of the interface of user stochastic observer functions.
+When creating your own user stochastic observer class use the following as template.
+
+The Usr_SObs to which is being referred in this template can be substituted by your own user stochastic observer object.
+
+<b>Step 1</b>: for creating your own user stochastic observer class call the function CTA_SObs_DefineClass().
+
+Example:
+
+\code
+typedef CTA_Handle CTA_SObsClass;
+
+CTA_Func h_func[CTA_SOBS_NUMFUNC];
+CTA_SObsClass my_own_sobs_class;
+CTA_ObsDescrClass obsdescrclass; //observation descriptor class that is being used by stochastic observer
+
+ierr=CTA_Func_Create(" ",&usr_sobs_create_size, hintf, &h_func[CTA_SOBS_CREATE_SIZE]);
+//...for all implementation functions...
+
+CTA_SObs_DefineClass("classname", h_func, obsdescrclass, &my_own_sobs_class);\endcode
+
+
+
+Making a user stochastic observer class involves the implementation of the following functions:
+
+CTA_SOBS_CREATE_SIZE         \n
+CTA_SOBS_CREATE_INIT         \n
+CTA_SOBS_FREE                \n
+CTA_SOBS_CREATE_SELECTION    \n
+CTA_SOBS_COUNT               \n
+CTA_SOBS_GET_OBS_DESCRIPTION \n
+CTA_SOBS_GET_VALUES          \n
+CTA_SOBS_GET_REALISATION     \n
+CTA_SOBS_GET_EXPECTATION     \n
+CTA_SOBS_EVALUATE_PDF        \n
+CTA_SOBS_GET_COV_MATRIX      \n
+CTA_SOBS_GET_VARIANCE        \n
+CTA_SOBS_EXPORT              \n
+CTA_SOBS_GET_TIMES           \n
+
+For creating an implementation function see documentation of CTA_Func_Create().
+
+<b>Step 2</b>: to create an object of the newly defined stochastic observer class call CTA_SObs_Create() in the
+same way as creating a CTA_SObs object but with a different class handle, i.e. the user class handle from step 1 above.
+
+Example:
+
+\code
+Usr_SObs usrsobs; //user stochastic observer object
+CTA_Handle userdata = CTA_NULL;
+CTA_SObs_Create(my_own_sobs_class, &userdata, &usrsobs);
+\endcode
+\n
+<b>Note 1: </b> with object data is meant only the object itself including pointer(s) to its contents, but
+not the contents of the stochastic observer.\n\n
+*/
+
+
+
+//#define CTA_SOBS_CREATE_SIZE         ( 1)
+/** \brief Implementation that forms part of the create process.
+ * 
+ * Must give the memory size of a new user stochastic observer object.
+ * 
+ * Example:
+ *  \code
+//in header file:
+typedef struct {
+   //your own user object data goes here...
+}USR_SOBS;
+
+//user implementation:
+void usr_sobs_create_size(...){
+   *memsize = sizeof(USR_SOBS);
+   *retval = CTA_OK;
+}
+ \endcode
+ *
+ * \note At index CTA_SOBS_CREATE_SIZE in the function list of the class descriptor.
+ *
+ * \param memsize  O  must receive the number of bytes which are necessary to store one
+                      user stochastic observer class, with a pointer to the contents (data), but without the
+                      contents themselves
+ * \param retval   O  must receive return value of user implementation function
+ * \return no return value
+ */
+void usr_sobs_create_size(int *memsize, int *retval);
+
+
+//#define CTA_SOBS_CREATE_INIT         ( 2)
+/** \brief Implementation that forms part of the create process.
+ *
+ * The user stochastic observer object needs to be made ready for use.
+ *
+ * \note At index CTA_SOBS_CREATE_INIT in the function list of the class descriptor.
+ *
+ * \param sobsdata I  pointer to user object data
+ * \param userdata IO user data
+ * \param retval   O  must receive return value of user implementation function
+ * \return no return value
+ */
+void usr_sobs_create_init(Usr_SObs *sobsdata, void *userdata, int *retval);
+
+
+//#define CTA_SOBS_FREE                ( 3)
+/** \brief Implementation for freeing the object data and associated resources.
+ *
+ * \note At index CTA_SOBS_FREE in the function list of the class descriptor.
+ *
+ * \param sobsdata IO pointer to user object data
+ * \param userdata IO user data
+ * \param retval   O  must receive return value of user implementation function
+ * \return no return value
+ */
+void usr_sobs_free(Usr_SObs *sobsdata, void *userdata, int *retval);
+
+
+//#define CTA_SOBS_CREATE_SELECTION    ( 4)
+/** \brief Implementation for creating a new stochastic observer that is subset of existing stochastic observer.
+ *
+ * \note At index CTA_SOBS_CREATE_SELECTION in the function list of the class descriptor.
+ * \note CTA_SObs_CreateTimSel() calls this implementation with userdata a handle of a time selection string.
+ *
+ * \param sobsdatain  I  pointer to object data of in-object
+ * \param userdata    IO user data
+ * \param sobsdataout IO pointer to object data of out-object
+ * \param retval      O  must receive return value of user implementation function
+ * \return no return value
+ */
+void usr_sobs_create_selection(Usr_SObs *sobsdatain, void *userdata, Usr_SObs *sobsdataout, int *retval);
+
+
+//#define CTA_SOBS_COUNT               ( 5)
+/** \brief Implementation for counting the number of measures 
+in user stochastic observer object.
+ *
+ * \note At index CTA_SOBS_COUNT in the function list of the class descriptor.
+ *
+ * \param sobsdata IO pointer to user object data
+ * \param nmeasr   O  must receive number of measures in object
+ * \param retval   O  must receive return value of user implementation function
+ * \return no return value
+ */
+void usr_sobs_count(Usr_SObs *sobsdata, int *nmeasr, int *retval);
+
+
+//#define CTA_SOBS_GET_OBS_DESCRIPTION ( 6)
+/** \brief Implementation for creating the observation description corresponding to the stochastic observer.
+ *
+ * \note At index CTA_SOBS_GET_DESCRIPTION in the function list of the class descriptor.
+ * \note Caller is responsible for freeing the here created observation description
+ * \note THIS IMPLEMENTATION FUNCTION IS NOT SUPPORTED
+ *
+ * \param sobsdata IO pointer to user object data
+ * \return no return value
+ */
+void usr_sobs_get_description(Usr_SObs *sobsdata);
+
+
+//#define CTA_SOBS_GET_VALUES          ( 7)
+/** \brief Implementation for getting all values of user stochastic observer object.
+ *
+ * \note At index CTA_SOBS_GET_VALUES in the function list of the class descriptor.
+ *
+ * \param sobsdata IO pointer to user object data
+ * \param hvec     O  handle of vector that must receive the values; must exist before calling;
+                      must be of appropriate data type
+ * \param retval   O  must receive return value of user implementation function
+ * \return no return value
+ */
+void usr_sobs_get_values(Usr_SObs *sobsdata, CTA_Vector *hvec, int *retval);
+
+
+//#define CTA_SOBS_GET_REALISATION     ( 8)
+/** \brief Implementation for calculating stochastic realizations for all the 
+           measurements in a user stochastic observer.
+ *
+ * \note At index CTA_SOBS_GET_REALISATION in the function list of the class descriptor.
+ *
+ * \param sobsdata I pointer to user object data
+ * \param hvec     O handle of vector that must receive realisation; must exist before calling
+ * \param retval   O must receive return value of implementation function
+ * \return no return value
+ */
+void usr_sobs_get_realisation(Usr_SObs *sobsdata, CTA_Vector *hvec, int *retval);
+
+
+//#define CTA_SOBS_GET_EXPECTATION     ( 9)
+/** \brief Implementation for getting the expectation of the probability density function of the mesurements.
+ *
+ * \note At index CTA_SOBS_GET_EXPECTATION in the function list of the class descriptor.
+ *
+ * \param sobsdata I  pointer to user object data
+ * \param hvec     O  handle of vector that must receive expectation; must exist before calling
+ * \param retval   O  must receive return value of implementation function
+ * \return no return value
+ */
+void usr_sobs_get_expectation(Usr_SObs *sobsdata, CTA_Vector *hvec, int *retval);
+
+
+//#define CTA_SOBS_EVALUATE_PDF        (10)
+/** \brief Implementation for getting the value of the probability density function of the mesurements at given location.
+ *
+ * \note At index CTA_SOBS_EVALUATE_PDF in the function list of the class descriptor.
+ *
+ * \param sobsdata I  pointer to user object data
+ * \param location I  handle of vector with location for evaluating pdf
+ * \param pdfvalue O  handle of vector that must receive result of evaluation; must exist before calling
+ * \param retval   O  must receive return value of implementation function
+ * \return no return value
+ */
+void usr_sobs_evaluate_pdf(Usr_SObs *sobsdata, CTA_Vector *location, CTA_Vector *pdfvalue, int *retval);
+
+
+//#define CTA_SOBS_GET_COV_MATRIX      (11)
+/** \brief Implementation for getting all the variances of the measurements in a
+           user stochastic observer.
+ *
+ * \note At index CTA_SOBS_GET_COV_MATRIX in the function list of the class descriptor.
+ *
+ * \param sobsdata I  pointer to user object data
+ * \param hmatrix  O  handle of matrix object that must receive the covariance matrix; must exist before calling
+ * \param retval   O  must receive return value of implementation function
+ * \return no return value
+ */
+void usr_sobs_get_cov_matrix(Usr_SObs *sobsdata, CTA_Matrix *hmatrix, int *retval);
+
+
+//#define CTA_SOBS_GET_VARIANCE        (12)
+/** \brief Implementation for calculating all variances or standard deviations. 
+ *
+ * \note At index CTA_SOBS_GET_VARIANCE in the function list of the class descriptor.
+ *
+ * \param sobsdata I  pointer to user object data
+ * \param variance O  handle of vector that must receive variances / standard
+                      deviations; must exist before calling
+ * \param varflag  I  variance flag: CTA_TRUE for calculating variances,
+                      CTA_FALSE for standard deviations
+ * \param retval   O  must receive return value of implementation function
+ * \return no return value
+ */
+void usr_sobs_get_variance(Usr_SObs *sobsdata, CTA_Vector *variance, int *varflag, int *retval);
+
+
+//#define CTA_SOBS_EXPORT              (13)
+/** \brief Implementation for exporting user stochastic observer object.
+ *
+ * \note At index CTA_SOBS_EXPORT in the function list of the class descriptor.
+ *
+ * \param sobsdata I  pointer to user object data
+ * \param userdata IO pointer to user data
+ * \param retval   O  must receive return value of implementation function
+ * \return no return value
+ */
+void usr_sobs_export(Usr_SObs *sobsdata, void *userdata, int *retval);
+
+
+//#define CTA_SOBS_GET_TIMES           (14)
+/** \brief Implementation for getting all times associated with the measurements.
+ *
+ * \note At index CTA_SOBS_GET_TIMES in the function list of the class descriptor.
+ *
+ * \param sobsdata I  pointer to user object data
+ * \param times    O  handle of vector that must receive times; must exist before calling
+ * \param retval   O  must receive return value of user implementation function
+ * \return no return value
+ */
+void usr_sobs_get_times(Usr_SObs *sobsdata, CTA_Vector *times, int *retval);
+
+
+
+
diff --git a/costa/native/cta/include/cta_usr_vector.h b/costa/native/cta/include/cta_usr_vector.h
new file mode 100644
index 000000000..1e0a3913d
--- /dev/null
+++ b/costa/native/cta/include/cta_usr_vector.h
@@ -0,0 +1,440 @@
+/*
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+/* parameters for different functions */
+#define CTA_VECTOR_CREATE_SIZE ( 1)
+#define CTA_VECTOR_CREATE_INIT ( 2)
+#define CTA_VECTOR_GETVALS     ( 3)
+#define CTA_VECTOR_SETVALS     ( 4)
+#define CTA_VECTOR_SETCONST    ( 5)
+#define CTA_VECTOR_SCAL        ( 6)
+#define CTA_VECTOR_COPY        ( 7)
+#define CTA_VECTOR_AXPY        ( 8)
+#define CTA_VECTOR_DOT         ( 9)
+#define CTA_VECTOR_NRM2        (10)
+#define CTA_VECTOR_AMAX        (11)
+#define CTA_VECTOR_GETMAXLEN   (12)
+#define CTA_VECTOR_FREE        (13)
+#define CTA_VECTOR_GETVAL      (14)
+#define CTA_VECTOR_SETVAL      (15)
+#define CTA_VECTOR_EXPORT      (16)
+#define CTA_VECTOR_PRINT_TABLE (17)
+#define CTA_VECTOR_APPENDVAL   (18)
+#define CTA_VECTOR_ELMDIV      (19)
+#define CTA_VECTOR_IMPORT      (20)
+#define CTA_VECTOR_NUMFUNC     (21)
+
+/**
+\file  cta_usr_vector.h
+
+\brief In this file a description is given of the interface of user vector functions.
+When creating your own vector class use the following as template.
+
+The Usr_Vector to which is being referred in this template can be substituted by your own user vector object.
+
+<b>Step 1</b>: for creating your own user vector class call the function CTA_Vector_DefineClass().
+
+Example:
+
+\code
+typedef CTA_Handle CTA_VectorClass;
+
+CTA_Func h_func[CTA_VECTOR_NUMFUNC];
+CTA_VectorClass my_own_vector_class;
+
+ierr=CTA_Func_Create(" ",&usr_vector_create_size, hintf, &h_func[CTA_VECTOR_CREATE_SIZE]);
+//...for all implementation functions...
+
+CTA_Vector_DefineClass("classname", h_func, &my_own_vector_class);\endcode
+
+Making a user vector class involves the implementation of the following functions:
+
+<code>CTA_VECTOR_CREATE_SIZE \n
+CTA_VECTOR_CREATE_INIT \n
+CTA_VECTOR_GETVALS     \n
+CTA_VECTOR_SETVALS     \n
+CTA_VECTOR_SETCONST    \n
+CTA_VECTOR_SCAL        \n
+CTA_VECTOR_COPY        \n
+CTA_VECTOR_AXPY        \n
+CTA_VECTOR_DOT         \n
+CTA_VECTOR_NRM2        \n
+CTA_VECTOR_AMAX        \n
+CTA_VECTOR_GETMAXLEN   \n
+CTA_VECTOR_FREE        \n
+CTA_VECTOR_GETVAL      \n
+CTA_VECTOR_SETVAL      \n
+CTA_VECTOR_EXPORT      \n
+CTA_VECTOR_PRINT_TABLE \n
+CTA_VECTOR_APPENDVAL   \n
+CTA_VECTOR_ELMDIV      \n
+CTA_VECTOR_IMPORT      \n</code>
+
+For creating an implementation function see documentation of CTA_Func_Create().
+
+<b>Step 2</b>: to create an object of the newly defined vector class call CTA_Vector_Create() in the
+same way as creating a CTA_Vector but with a different class handle, i.e. the user class handle from step 1 above.
+
+Example:
+
+\code
+Usr_Vector usrvec; //user vector object
+int nelements = 10;
+CTA_Datatype datatype = CTAI_String2Type("CTA_STRING");
+CTA_Handle userdata = CTA_NULL;
+CTA_Vector_Create(my_own_vector_class, nelements, datatype, &userdata, &usrvec);
+\endcode
+\n
+<b>Note 1: </b> with object data is meant only the object itself including pointer(s) to its contents, but
+not the contents of the vector.\n\n
+<b>Note 2: </b> vector indices start from 1.\n\n
+*/
+
+
+//CTA_VECTOR_CREATE_SIZE ( 1)
+/** \brief Implementation that forms part of the create process.
+ * 
+ * Must give the memory size of a new object.
+ *
+ * Example:
+ *  \code
+//in header file:
+typedef struct {
+   //your own user object data goes here...
+}USR_VECTOR;
+
+//user implementation:
+void usr_vector_create_size(...){
+   *memsize = sizeof(USR_VECTOR);
+   *retval = CTA_OK;
+}
+ \endcode
+ *
+ * \note At index CTA_VECTOR_CREATE_SIZE in the function list of the class descriptor.
+ *
+ * \param n        I  dimension
+ * \param datatype I  data type of the vector elements
+ * \param userdata IO user data
+ * \param retval   O  must receive return value of user implementation function
+ * \param memsize  O  must receive the number of bytes which are necessary to store one
+                      user vector class, with a pointer to the contents (data), but without the
+                      contents themselves
+ * \return no return value
+ */
+void usr_vector_create_size(int *n, CTA_Datatype *datatype,
+                            void *userdata, int *retval, int *memsize);
+
+
+//CTA_VECTOR_CREATE_INIT ( 2)
+/** \brief Implementation that forms part of the create process.
+ *
+ * The user vector object needs to be made ready for use.
+ *
+ * \note At index CTA_VECTOR_CREATE_INIT in the function list of the class descriptor.
+ *
+ * \param objectdata IO pointer to object data of user vector
+ * \param n          I  size of vector data
+ * \param datatype   I  data type of the vector elements
+ * \param userdata   IO user data
+ * \param retval     O  must receive return value of user implementation function
+ * \return no return value
+ */
+void usr_vector_create_init(Usr_Vector *objectdata, int *n, CTA_Datatype *datatype,
+                            void *userdata, int *retval);
+
+
+//CTA_VECTOR_GETVALS     ( 3)
+/** \brief Implementation for retrieving all values of the user vector.
+ *
+ * \note At index CTA_VECTOR_GETVALS in the function list of the class descriptor.
+ *
+ * \param x        I   pointer to object data of the user vector
+ * \param vals     O   must receive the values of the user vector
+ * \param n        I   number of elements in the vals buffer, must equal number of elements in user vector
+ * \param datatype I   data type of elements of vals, must be the same as data type of user vector x
+ * \param userdata IO  user data
+ * \param retval   O   must receive return value of user implementation function
+ * \return no return value
+ */
+void usr_vector_getvals(Usr_Vector *x, void *vals, int *n, CTA_Datatype *datatype,
+                         void *userdata, int *retval);
+
+
+//CTA_VECTOR_SETVALS     ( 4)
+/** \brief Implementation for setting all values of the user vector.
+ *
+ * \note At index CTA_VECTOR_SETVALS in the function list of the class descriptor.
+ *
+ * \param x        I   pointer to object data of user vector
+ * \param vals     I   new values
+ * \param n        I   number of elements in the vals buffer, must equal number of elements in user vector
+ * \param datatype I   data type of elements of vals, must be the same as data type of user vector x
+ * \param userdata IO  user data
+ * \param retval   O   must receive return value of user implementation function
+ * \return no return value
+ */
+void usr_vector_setvals(Usr_Vector *x, void *vals, int *n, CTA_Datatype *datatype,
+                         void *userdata, int *retval);
+
+
+//CTA_VECTOR_SETCONST    ( 5)
+/** \brief Implementation for setting all values of user vector to same value.
+ *
+ * \note At index CTA_VECTOR_SETCONST in the function list of the class descriptor.
+ *
+ * \param x        I  pointer to object data of user vector
+ * \param val      I  value to set vector elements to
+ * \param datatype I  data type of elements of vals, must be the same as data type of user vector x
+ * \param userdata IO user data
+ * \param retval   O  must receive return value of user implementation function
+ * \return no return value
+ */
+void usr_vector_setconst(Usr_Vector *x, void *val, CTA_Datatype *datatype,
+                          void *userdata, int *retval);
+
+
+//CTA_VECTOR_SCAL        ( 6)
+/** \brief Implementation for scaling the user vector: x=ax
+ *
+ * \note At index CTA_VECTOR_SCAL in the function list of the class descriptor.
+ *
+ * \param x        IO pointer to object data of user vector
+ * \param alpha    I  scaling factor
+ * \param userdata IO user data
+ * \param retval   O  must receive return value of user implementation function
+ * \return no return value
+ */
+void usr_vector_scal(Usr_Vector *x, double *alpha, void *userdata, int *retval);
+
+
+//CTA_VECTOR_COPY        ( 7)
+/** \brief Implementation for copying the user vector.
+ *
+ * \note At index CTA_VECTOR_COPY in the function list of the class descriptor.
+ *
+ * \param x        I  pointer to object data of a user vector
+ * \param y        O  pointer to object data of user vector y that must receive copy of x; must exist before calling
+ * \param userdata IO user data
+ * \param retval   O  must receive return value of user implementation function
+ * \return no return value
+ */
+void usr_vector_copy(Usr_Vector *x, Usr_Vector *y, void *userdata,
+                      int *retval);
+
+
+//CTA_VECTOR_AXPY        ( 8)
+/** \brief Implementation for applying the BLAS operation axpy: y=y+ax
+ *
+ * \note At index CTA_VECTOR_AXPY in the function list of the class descriptor.
+ *
+ * \param y        IO pointer to object data of user vector y; must exist before calling
+ * \param alpha    I  scaling factor
+ * \param x        I  pointer to object data of user vector x
+ * \param userdata IO user data
+ * \param retval   O  must receive return value of user implementation function
+ * \return no return value
+ */
+void usr_vector_axpy(Usr_Vector *y, double *alpha, Usr_Vector *x,
+                      void *userdata, int *retval);
+
+//CTA_VECTOR_DOT         ( 9)
+/** \brief Implementation for calculating the dot product of two user vectors x.y
+ *
+ * \note At index CTA_VECTOR_DOT in the function list of the class descriptor.
+ *
+ * \param y        I  pointer to object data of user vector y
+ * \param x        I  pointer to object data of user vector x
+ * \param userdata IO user data
+ * \param dotprod  O  must receive the dot product
+ * \param retval   O  must receive return value of user implementation function
+ * \return no return value
+ */
+void usr_vector_dot(Usr_Vector *x, Usr_Vector *y, void *userdata,
+                     double *dotprod, int *retval);
+
+//CTA_VECTOR_NRM2        (10)
+/** \brief Implementation for calculating the 2-norm of a user vector
+ *
+ * \note At index CTA_VECTOR_NRM2 in the function list of the class descriptor.
+ *
+ * \param x        I  pointer to object data of user vector
+ * \param userdata IO user data
+ * \param norm2    O  must receive the 2-norm
+ * \param retval   O  must receive return value of user implementation function
+ * \return no return value
+ */
+void usr_vector_nrm2(Usr_Vector *x, void *userdata, double *norm2, int *retval);
+
+//CTA_VECTOR_AMAX        (11)
+/** \brief Implementation for getting the index of the element with the largest maximum value. First index is 1.
+ *
+ * \note At index CTA_VECTOR_AMAX in the function list of the class descriptor.
+ *
+ * \param x        I  pointer to object data of user vector
+ * \param userdata IO user data
+ * \param iloc     O  must receive index of maximum value
+ * \param retval   O  must receive return value of user implementation function
+ * \return no return value
+ */
+void usr_vector_amax(Usr_Vector *x, void *userdata, int *iloc, int *retval);
+
+
+//CTA_VECTOR_GETMAXLEN   (12)
+/** \brief Implementation for getting the maximum length in case of vector with string elements. 
+ *
+ * \note At index CTA_VECTOR_GETMAXLEN in the function list of the class descriptor.
+ *
+ * \param x        I  pointer to object data of user vector
+ * \param userdata IO user data
+ * \param maxlen   O  must receive maximum length
+ * \param retval   O  must receive return value of user implementation function
+ * \return no return value
+ */
+void usr_vector_getmaxlen(Usr_Vector *x, void *userdata, int *maxlen, int *retval);
+
+
+//CTA_VECTOR_FREE        (13)
+/** \brief Implementation for freeing the user vector.
+ *
+ * \note At index CTA_VECTOR_FREE in the function list of the class descriptor.
+ *
+ * \param x        IO pointer to object data of user vector
+ * \param userdata IO user data
+ * \param retval   O  must receive return value of user implementation function
+ * \return no return value
+ */
+void usr_vector_free(Usr_Vector *x, void *userdata, int *retval);
+
+
+//CTA_VECTOR_GETVAL      (14)
+/** \brief Implementation for retrieving a single value of user vector at given index
+ *
+ * \note At index CTA_VECTOR_GETVAL in the function list of the class descriptor.
+ *
+ * \param x        I  pointer to object data of user vector
+ * \param i        I  index of value to retrieve
+ * \param val      O  must receive value; must exist before calling
+ * \param datatype I  data type of *val, must be the same as data type of user vector x
+ * \param userdata IO user data
+ * \param retval   O  must receive return value of user implementation function
+ * \return no return value
+ */
+void usr_vector_getval(Usr_Vector *x, int *i, void *val, CTA_Datatype *datatype,
+                        void *userdata, int *retval);
+
+
+//CTA_VECTOR_SETVAL      (15)
+/** \brief Implementation for setting a single value of a user vector.
+ *
+ * \note At index CTA_VECTOR_SETVAL in the function list of the class descriptor.
+ *
+ * \param x        I  pointer to object data of user vector
+ * \param i        I  index of value to set
+ * \param val      I  new data of value
+ * \param datatype I  data type of *val, must be the same as data type of user vector x
+ * \param userdata IO user data
+ * \param retval   O  must receive return value of user implementation function
+ * \return no return value
+ */
+void usr_vector_setval(Usr_Vector *x, int *i, void *val, CTA_Datatype *datatype,
+                        void *userdata, int *retval);
+
+
+//CTA_VECTOR_EXPORT      (16)
+/** \brief Implementation for exporting a user vector.
+ *
+ * \note At index CTA_VECTOR_EXPORT in the function list of the class descriptor.
+ * \n For example implementation for exporting a user vector to a pack object or to screen.
+ *
+ * \param x        I  pointer to object data of user vector
+ * \param userdata IO user data
+ * \param retval   O  must receive return value of user implementation function
+ * \return no return value
+ */
+void usr_vector_export(Usr_Vector *x, void userdata, int *retval);
+
+
+//CTA_VECTOR_PRINT_TABLE (17)
+/** \brief Implementation for printing a table.
+ *
+ * \note At index CTA_VECTOR_PRINT_TABLE in the function list of the class descriptor.
+ *
+ * \param table        IO array of vectors that forms the table to be filled
+ * \param ncolumns     I  number of columns
+ * \param formats      I  user vector
+ * \param retval       O  must receive return value of user implementation function
+ * \return no return value
+ */
+void usr_vector_print_table(Usr_Vector **table, int *ncolumns,
+                      Usr_Vector * formats,     int *retval);
+
+
+//CTA_VECTOR_APPENDVAL   (18)
+/** \brief Implementation for appending a single value to the user vector.
+ *
+ * \note At index CTA_VECTOR_APPENDVAL in the function list of the class descriptor.
+ *
+ * \param x            IO pointer to object data of user vector
+ * \param val          I  value to append
+ * \param datatype     I  data type of value to append, must be the same as data type of user vector x
+ * \param retval       O  must receive return value of user implementation function
+ * \return no return value
+ */
+void usr_vector_appendval(Usr_Vector *x, void *val, CTA_Datatype *datatype,
+                           int *retval);
+
+
+//CTA_VECTOR_ELMDIV      (19)
+/** \brief Implementation for element-wise division of user vector.
+ *
+ * \note At index CTA_VECTOR_ELMDIV in the function list of the class descriptor.
+ *
+ * \param y            IO pointer to object data of receiving user vector
+ * \param x            I  pointer to object data of user vector x
+ * \param userdata     IO user data
+ * \param retval       O  must receive return value of user implementation function
+ * \return no return value
+ */
+void usr_vector_elmdiv(Usr_Vector *y, CTAI_Vector_blas *x,
+                      void *userdata, int *retval);
+
+
+
+//CTA_VECTOR_IMPORT      (20)
+/** \brief Implementation for importing a user vector.
+ *
+ * \note At index CTA_VECTOR_IMPORT in the function list of the class descriptor.
+ *
+ * \param x            O  pointer to object data of user vector that must receive import result
+ * \param userdata     IO user data
+ * \param retval       O  must receive return value of user implementation function
+ * \return no return value
+ */
+void usr_vector_import(Usr_Vector *x, void userdata, int *retval);
+
+
+
+
+
+
+
+
+
+
+
diff --git a/costa/native/cta/include/cta_util_methods.h b/costa/native/cta/include/cta_util_methods.h
new file mode 100644
index 000000000..f96211336
--- /dev/null
+++ b/costa/native/cta/include/cta_util_methods.h
@@ -0,0 +1,90 @@
+/*
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+/**
+\file  cta_util_methods.h
+\brief Utility routines for creating data assimilation methods. 
+\note This is just a set of utility routines it does not define any COSTA component
+
+*/
+
+#ifndef CTA_UTIL_METHODS_H
+#define CTA_UTIL_METHODS_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** \brief print the predicted values and the observed values
+ *
+ * \param fgModel     O Model of foreground run (with data
+ *                      assimilation) or Vector with the predicted
+ *                      values of the foreground run.
+ *                      if set to CTA_NULL NaN will be printed as result
+ * \param bgModel     I Model of background run (without data
+ *                      assimilation) or Vector with the predicted
+ *                      values of the background run.
+ *                      if set to CTA_NULL NaN will be printed as result
+ * \param sObs        I Stochastic observer
+ * \param time        I Corresponding time
+ * \param file        I Output file (note CTA_FILE_STDOUT prints to screen)
+ * \param printHeader I Print header CTA_TRUE/CTA_FALSE
+ *
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Util_MethodsPrintObservations(CTA_Handle fgModel,
+   CTA_Handle bgModel, CTA_StochObs sObs, CTA_Time time, CTA_File file,
+    int printHeader);
+
+/** \brief Make an initial selection of the observations
+ *
+ *  The selection of observations is based on the given simulation timespan
+ *  and the criterion provided by the model (CTA_Model_GetObsSelect)
+ *
+ *  \note sObsSel is created and should be freed by the caller of this routine
+ *
+ * \param model      I Model (for CTA_Model_GetObsSelect)
+ * \param sObsAll    I All observations
+ * \param spanSim    I Simulation timespan
+ * \param sObsSel    O Selection of observations
+ *
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Util_MethodsSelectObservations(CTA_Model model, CTA_StochObs sObsAll, CTA_Time spanSim, CTA_StochObs *sObsSel);
+
+/** \brief Create an output file for filter predictions at station
+ *         locations
+ *
+ *  The routine  CTA_Util_MethodsPrintObservations can be used for writing
+ *  the results. 
+ *  \note the header is written by this call
+ *
+ * \param   stationFile  (I) Name of result file
+ * \param   fStationFile (O) Handle to result file
+ *
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Util_MethodsOpenResultFile(char *stationFile,
+                                   CTA_File *fStationFile);
+
+#ifdef __cplusplus
+}
+#endif
+#endif
+
diff --git a/costa/native/cta/include/cta_util_sort.h b/costa/native/cta/include/cta_util_sort.h
new file mode 100644
index 000000000..2833bdc16
--- /dev/null
+++ b/costa/native/cta/include/cta_util_sort.h
@@ -0,0 +1,53 @@
+/*
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2007  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+/**
+\file  cta_util_sort.h
+\brief Interface to the sorting utility functions of COSTA. 
+       This header file handles the C/FORTRAN interfacing 
+*/
+
+#ifndef CTA_UTILSORT_H
+#define CTA_UTILSORT_H
+#include "cta_system.h"
+
+#ifdef __cplusplus
+extern "C" { 
+#endif
+
+/** \brief Sort an integer array using the Quicksort algorithm.
+ *  An additional interger array is permutated in way as the unsorted array
+ *
+ *
+ *  \param list       (input/output) array, dimension (N)
+ *          On entry, the array to be sorted.
+ *          On exit, list has been sorted into increasing order
+ *
+ * \param indx (input/output) array, dimension (N)
+
+ *  \param n       (input) INTEGER
+ *          The length of the array D.
+ *
+ */
+CTAEXPORT void CTA_Util_IQSort2(int *list, int *indx, int n);
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/costa/native/cta/include/cta_util_sqlite3.h b/costa/native/cta/include/cta_util_sqlite3.h
new file mode 100644
index 000000000..d6a5677a9
--- /dev/null
+++ b/costa/native/cta/include/cta_util_sqlite3.h
@@ -0,0 +1,91 @@
+/*
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+/**
+\file  cta_util_sqlite3.h
+\brief Callback functions used for SQL access.
+\note This is just a set of routines it does not define any COSTA component
+*/
+
+#ifndef CTA_UTIL_SQLITE3_H
+#define CTA_UTIL_SQLITE3_H 
+#include <sqlite3.h>
+#include "cta_string.h"
+#include "cta_datatypes.h"
+
+typedef struct {
+int index;
+int dimension;
+CTA_Datatype datatype;
+void* values;
+} CTAI_counter_vector;
+
+typedef struct {
+sqlite3 *db;
+int     nusers;
+char    *name;
+} CTAI_util_sqlite3_database;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** \brief Utility for returning values. Callback-function for sqlite3.
+ *
+ * \param vout         I  See SQLITE3 documentation
+ * \param argc         I  See SQLITE3 documentation
+ * \param argv         I  See SQLITE3 documentation
+ * \param azColName    I  See SQLITE3 documentation
+ * \return non-zero if an error occured (see SQLITE3 documentation)
+ */
+int CTAI_util_sqlite3_return_values(void *vout,
+         int argc, char **argv, char **azColName);
+
+
+/** \brief Utility for selecting values. Callback-function for sqlite3.
+ *
+ * \param out          O selected values
+ * \param nout         I dimension of the output array 
+ * \param datatype     I data type of the output array 
+ * \param db           I the database 
+ * \param selection    I the column-name which is returned  
+ * \param condition    I the selection creterion 
+ * \return non-zero if an error occured (see SQLITE3 documentation)
+ */
+int CTAI_util_sqlite3_select_values( void *out,
+    int nout, CTA_Datatype datatype, sqlite3 *db,
+    const char *selection, const char *condition);
+
+
+/** \brief Utility for returning keys. Callback-function for sqlite3.
+ *
+ * \param n_keys       O  number of keys in table
+ * \param Keys         O  Keys of table
+ * \param db           I  pointer to sql database
+ * \param condition    I  the selection creterion
+ * \return  non-zero if an error occured (see SQLITE3 documentation)
+ */
+int CTAI_util_sqlite3_return_keys( int *n_keys, CTA_String **Keys,
+    sqlite3 *db, const char *condition);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/costa/native/cta/include/cta_util_statistics.h b/costa/native/cta/include/cta_util_statistics.h
new file mode 100644
index 000000000..edeff84b0
--- /dev/null
+++ b/costa/native/cta/include/cta_util_statistics.h
@@ -0,0 +1,62 @@
+/*
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+/**
+\file  cta_util_statistics.h
+\brief Utility for calculating a realization of a standard normal distribution.
+\note This is just a set of utility routines it does not define any COSTA component
+
+*/
+
+#ifndef CTA_UTIL_STATISTICS_H
+#define CTA_UTIL_STATISTICS_H
+#include "cta_system.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** \brief Initialize the random generator.
+ *  \note only initialize the random generator once.
+ * \param seed     I some positive initial seed
+ */
+CTAEXPORT void CTA_rand_seed(int seed);
+
+
+/** \brief Get an uniform random number from the interval [0 1].
+ *
+ * \param x     O  receives the random number
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_rand_u(double *x);
+	
+
+/** \brief Get a random number from a normal distribution whit mean 0 and
+ * standard deviation 1.
+ *
+ * \param x     O  receives the random number
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_rand_n(double *x);
+
+#ifdef __cplusplus
+}
+#endif
+#endif
+
diff --git a/costa/native/cta/include/cta_vector.h b/costa/native/cta/include/cta_vector.h
new file mode 100644
index 000000000..d166b7577
--- /dev/null
+++ b/costa/native/cta/include/cta_vector.h
@@ -0,0 +1,363 @@
+/*
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+/**
+\file  cta_vector.h
+\brief Interface description of the default COSTA vector componennt. For user implementation see cta_usr_vector.h.
+
+Basic functions and utilities for vector operations on CTA_VECTOR objects.
+*/
+
+#ifndef CTA_VECTOR_H
+#define CTA_VECTOR_H
+#include "cta_system.h"
+#include "cta_handles.h"
+#include "cta_datatypes.h"
+#include "cta_functions.h"
+
+/* Function Handle */
+typedef CTA_Handle CTA_Vector;
+typedef CTA_Handle CTA_VecClass;
+
+/* parameters for different functions */
+#define I_CTA_VECTOR_CREATE_SIZE ( 1)
+#define I_CTA_VECTOR_CREATE_INIT ( 2)
+#define I_CTA_VECTOR_GETVALS     ( 3)
+#define I_CTA_VECTOR_SETVALS     ( 4)
+#define I_CTA_VECTOR_SETCONST    ( 5)
+#define I_CTA_VECTOR_SCAL        ( 6)
+#define I_CTA_VECTOR_COPY        ( 7)
+#define I_CTA_VECTOR_AXPY        ( 8)
+#define I_CTA_VECTOR_DOT         ( 9)
+#define I_CTA_VECTOR_NRM2        (10)
+#define I_CTA_VECTOR_AMAX        (11)
+#define I_CTA_VECTOR_GETMAXLEN   (12)
+#define I_CTA_VECTOR_FREE        (13)
+#define I_CTA_VECTOR_GETVAL      (14)
+#define I_CTA_VECTOR_SETVAL      (15)
+#define I_CTA_VECTOR_EXPORT      (16)
+#define I_CTA_VECTOR_PRINT_TABLE (17)
+#define I_CTA_VECTOR_APPENDVAL   (18)
+#define I_CTA_VECTOR_ELMDIV      (19)
+#define I_CTA_VECTOR_IMPORT      (20)
+#define I_CTA_VECTOR_ELMPROD     (21)
+#define I_CTA_VECTOR_ELMSQRT     (22)
+#define I_CTA_VECTOR_NUMFUNC     (23)
+
+// CTA_Vector_Create                Create a new COSTA-vector:
+// CTA_Vector_DefineClass           ...
+
+/* function interfaces */
+#ifdef __cplusplus
+extern "C" {
+#endif
+/** \brief Create a new class (=implementation) of a COSTA vector component.
+ *
+ * \param name     I  name of the new vector class
+ * \param h_func   I  COSTA function handles for functions that implement class.
+ *                    Missing functions must have value CTA_NULL
+ * \param hveccl   O  receives handle of new vector class
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Vector_DefineClass(
+   const char *name,
+   const CTA_Func h_func[I_CTA_VECTOR_NUMFUNC],
+   CTA_VecClass     *hveccl
+   );
+
+/** \brief Duplicate a vector object.
+ *
+ * \note Only size, data type and type (class) are duplicated, the content is not
+ *       copied.
+ *
+ * \param hvector1  I  handle of vector to be duplicated
+ * \param hvector2  O  receives handle of new duplicate vector, empty before calling
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Vector_Duplicate(CTA_Vector hvector1, CTA_Vector *hvector2);
+
+/** \brief Create a new vector.
+ *
+ * \note
+ *
+ * \param hveccl   I  vector class of new vector
+ * \param n        I  number of elements
+ * \param datatype I  data type of elements in vector
+ * \param userdata IO user data for creation (depends on class)
+ * \param hvector  O  receives handle of new vector
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Vector_Create(CTA_VecClass hveccl, const int n, CTA_Datatype datatype,
+                      CTA_Handle userdata, CTA_Vector *hvector);
+
+/** \brief Get size of vector.
+ *
+ * \param hvec  I  handle of vector
+ * \param n     O  receives number of elements
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Vector_GetSize(CTA_Vector hvec, int *n);
+
+/** \brief Get a copy of a single element in the vector.
+ *
+ * \param hvec     I  handle of vector
+ * \param i        I  index of element
+ * \param vals     O  receives copy of value in vector, must exist before calling
+ * \param datatype I  data type of *vals, must be the same as data type of elements in vector
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Vector_GetVal(CTA_Vector hvec, int  i, void *vals,
+                      CTA_Datatype datatype);
+
+/** \brief Get a copy of all elements in the vector.
+ *
+ * \param hvec     I  handle of vector
+ * \param vals     O  receives copy of all elements in vector, must exist before calling
+ * \param n        I  length of array vals
+ * \param datatype I  data type of *vals, must be the same as data type of elements in vector
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Vector_GetVals(CTA_Vector hvec, void *vals, int  n,
+                       CTA_Datatype datatype);
+
+/** \brief Set a copy of an element in the vector.
+ *
+ * \note The value is copied in the vector.
+ *
+ * \param hvec     IO handle of vector
+ * \param i        I  index of element
+ * \param val      I  new value that is copied into element at given index
+ * \param datatype I  data type of *val, must be the same as data type of elements in vector
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Vector_SetVal(CTA_Vector hvec, int  i, void *val,
+                      CTA_Datatype datatype);
+
+/** \brief Append a copy of an element to a vector.
+ *
+ * \note The value is copied into the vector, size of vector is increased.
+ *
+ * \param hvec     IO handle of vector
+ * \param val      I  value that needs to be added to vector
+ * \param datatype I  data type of *val, must be the same as data type of elements in vector
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Vector_AppendVal(CTA_Vector hvec, const void *val, CTA_Datatype datatype);
+
+/** \brief Set all elementes in the vector.
+ *
+ * \note The values are copied in the vector.
+ *
+ * \param hvec     IO handle of vector
+ * \param vals     I  values that need to be copied to vector
+ * \param n        I  number of elements in vals
+ * \param datatype I  data type of *vals, must be the same as data type of elements in vector
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Vector_SetVals(CTA_Vector hvec, void *vals, int  n,
+                       CTA_Datatype datatype);
+
+/** \brief Scale a vector.
+ *
+ * \note scale: x=alpha*x
+ *
+ * \param hvec     IO handle of vector
+ * \param alpha    I  scalar
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Vector_Scal(CTA_Vector hvec, double alpha);
+
+/** \brief Copy a vector
+ *
+ * \note This function copies the vector content (y=x), but does not make a new vector.
+ *
+ * \param hvec_x  I  handle of sending vector
+ * \param hvec_y  O  handle of receiving vector
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Vector_Copy(CTA_Vector hvec_x, CTA_Vector hvec_y);
+
+/** \brief Operation axpy for two vectors x and y.
+ *
+ * \note axpy: y=alpha*x+y
+ *
+ * \param hvec_y  IO handle of vector y
+ * \param alpha   I  scalar
+ * \param hvec_x  I  handle of vector x
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Vector_Axpy(CTA_Vector hvec_y, double alpha, CTA_Vector hvec_x);
+
+/** \brief Dot product operation of two vectors.
+ *
+ * \note dot: dotprod=x^t*y
+ *
+ * \param hvec_x  I  handle of vector x
+ * \param hvec_y  I  handle of vector y
+ * \param dotprod O  receives dot product
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Vector_Dot(CTA_Vector hvec_x, CTA_Vector hvec_y, double *dotprod);
+
+/** \brief Compute 2-norm of vector.
+ *
+ * \note 2-norm: sqrt(x^t*x)
+ *
+ * \param hvec_x  I  handle of vector x
+ * \param norm2   O  receives 2-norm
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Vector_Nrm2(CTA_Vector hvec_x, double *norm2);
+
+/** \brief Find index of element in vector with largest absolute value
+ *
+ * \param hvec_x  I  handle of vector
+ * \param iloc    O  receives index of largest absolute value
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Vector_Amax(CTA_Vector hvec_x, int *iloc);
+
+/** \brief Find largest length of elements in vector.
+ *
+ * \note e.g. length of string in a vector of strings
+ *
+ * \param hvec_x  I  handle of vector
+ * \param maxlen  O  receives largest length of elements in vector
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Vector_GetMaxLen(
+   CTA_Vector hvec_x,  /* Handle of the first vector  */
+   int *maxlen
+   );
+
+/** \brief Export a vector to file, stdout or pack object.
+ *
+ * \Note CTA_DEFAULT_VECTOR supports exporting to:\n
+ *       file (usrdata a handle of COSTA file)\n
+ *       pack object (usrdata a handle of COSTA pack object)\n
+ *
+ * \param hvec     I  handle of vector
+ * \param usrdata  I  configuration of output
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Vector_Export(
+   CTA_Vector hvec, /* Handle of the vector */
+   CTA_Handle usrdata
+   );
+
+/** \brief Import a vector.
+ *
+ * \note CTA_DEFAULT_VECTOR supports importing from pack object (usrdata[0] a pack handle).
+ * \note Data type and size of vector can be changed due to this action
+ *
+ * \param hvec     I  handle of vector
+ * \param usrdata  I  configuration of output
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Vector_Import( CTA_Vector hvec, CTA_Handle usrdata);
+
+
+/** \brief Print table, each column built up by a vector.
+ *
+ * \note CTA_DEFAULT_VECTOR    TODO
+ *
+ * \param table    I  array of vector handles, these vectors form the table to be filled
+ * \param ncolumns I  number of columns in table
+ *                    (number of vector handles in table)
+ * \param vformats I  handle of vector of string (size ncolumns) containing the
+ *                    formats for printing each column (C-format)
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Vector_Print_Table(CTA_Vector* table, int ncolumns,
+                           CTA_Vector vformats);
+
+/** \brief Free the vector object.
+ *
+ *
+ * \Note hvec_x=CTA_NULL is allowed
+ *
+ * \param hvec_x  IO handle of vector, replaced by CTA_NULL on return
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Vector_Free(CTA_Vector *hvec_x);
+
+
+/** \brief Get type of vector.
+ *
+ * \param hvec     I  handle of vector
+ * \param datatype O  receives data type of vector
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Vector_GetDatatype(
+   CTA_Vector hvec,         /* Handle of the vector */
+   CTA_Datatype *datatype   /* Returned data type   */
+   );
+
+/** \brief Set whole vector to one single value.
+ *
+ * \Note hvec=CTA_NULL is allowed, function returns error code
+ *
+ * \param hvec     IO handle of vector
+ * \param val      I  value that must be set
+ * \param datatype I  data type of *val, must be the same as data type of elements in vector
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Vector_SetConstant(
+   CTA_Vector hvec,  /* Handle of the vector */
+   void *val,        /* value that must be set */
+   CTA_Datatype datatype /* Data type */
+   );
+
+/** \brief Element wise division
+ *
+ * \note y=x./y
+ *
+ * \param hvec_y  IO handle of vector y
+ * \param hvec_x  I  handle of vector x
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Vector_ElmDiv(CTA_Vector hvec_y, CTA_Vector hvec_x);
+
+
+/** \brief Element wise product
+ *
+ * \note y=x.y
+ *
+ * \param hvec_y  IO handle of vector y
+ * \param hvec_x  I  handle of vector x
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Vector_ElmProd(CTA_Vector hvec_y, CTA_Vector hvec_x);
+
+/** \brief Element wise square root
+ *
+ * \note y=y.^0.5
+ *
+ * \param  hvec_x  IO handle of vector y
+ * \return error status: CTA_OK if successful
+ */
+CTAEXPORT int CTA_Vector_ElmSqrt(CTA_Vector hvec_x);
+
+
+
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/costa/native/cta/include/cta_vector_blas.h b/costa/native/cta/include/cta_vector_blas.h
new file mode 100644
index 000000000..370af0910
--- /dev/null
+++ b/costa/native/cta/include/cta_vector_blas.h
@@ -0,0 +1,322 @@
+/*
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+#ifndef CTA_VECTOR_BLAS_H
+#define CTA_VECTOR_BLAS_H
+
+#include "cta_f77blas.h"
+#include "cta_datatypes.h"
+#include "cta_handles.h"
+#include "cta_vector.h"
+#include "cta_functions.h"
+#include "cta_pack.h"
+
+/**
+\file  cta_vector_blas.h
+\brief Interface description of the COSTA BLAS-vector component (a standard implementation class of CTA_Vector) that can be used for standard BLAS operations.
+
+
+<i>BLAS matrix object data:</i>
+
+\code
+typedef struct {
+CTA_Datatype datatype;
+void *values;
+int n;
+int size;
+} CTAI_Vector_blas;
+\endcode
+\n
+
+
+*/
+
+typedef struct {
+CTA_Datatype datatype;
+void *values;
+int n;                                  // total number of elements
+int nDimensions;                        // total number of dimensions
+int dimensions[7];                      // number of elements in each dimension
+int size;
+} CTAI_Vector_blas;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+/** \brief Define the BLAS vector class. Use the resulting class object when calling CTA_Vector_Create().
+ *
+ * \param hveccl    IO receives class BLAS vector class object, must be empty before calling
+ * \return no return value
+ */
+CTANOEXPORT void CTA_Vector_blas_initialise(CTA_VecClass *hveccl);
+
+
+/** \brief Part of the create process, get size of a new object.
+ *
+ * \param n        O  receives the size of a new vector blas object
+ * \param datatype I  data type
+ * \param usrdata  IO user data (not being used)
+ * \param retval   O  receives return value of the function, CTA_OK if succesful
+ * \param memsize  O  the number of bytes which are necessary to store one
+                      CTAI_Vector_blas, with a pointer to the contents (data), but without the
+                      contents themselves
+ * \return no return value
+ */
+void CTAI_Vector_Create_Size(int *n, CTA_Datatype *datatype,
+                            CTA_Handle *usrdata, int *retval, int *memsize);
+
+
+/** \brief Part of the create process, initialize vector object.
+ *
+ * \param x          IO pointer to object data of user vector
+ * \param n          I  size of vector data
+ * \param datatype   I  data type of the vector elements
+ * \param usrdata    IO user data (not being used)
+ * \param retval     O  receives return value
+ * \return no return value
+ */
+void CTAI_Vector_Create_Init(CTAI_Vector_blas *x, int *n,
+                             CTA_Datatype *datatype,
+                             CTA_Handle *usrdata, int *retval);
+
+
+/** \brief Retrieve all values of the BLAS vector.
+ *
+ * \param x        I   pointer to object data of the vector
+ * \param vals     O   receives the values of the vector
+ * \param n        I   number of elements in the vals buffer, must equal number of elements in BLAS vector
+ * \param datatype I   data type of elements of vals, must be the same as data type of BLAS vector x
+ * \param retval   O   receives return value
+ * \return no return value
+ */
+void CTAI_Vector_getvals(CTAI_Vector_blas *x, void *vals, int *n, CTA_Datatype *datatype, int *retval);
+
+
+/** \brief Retrieve a single value of the BLAS vector at given index.
+ *
+ * \param x        I  pointer to object data of BLAS vector
+ * \param i        I  index of value to retrieve
+ * \param val      O  receives value; must exist before calling
+ * \param datatype I  data type of *val, must be the same as data type of BLAS vector x
+ * \param retval   O  receives return value
+ * \return no return value
+ */
+void CTAI_Vector_getval(CTAI_Vector_blas *x, int *i, void *val, CTA_Datatype *datatype, int *retval);
+
+
+/** \brief Set all values of a BLAS vector.
+ *
+ * \param x        I   pointer to object data of BLAS vector
+ * \param vals     I   new values
+ * \param n        I   number of elements in the vals buffer, must equal number of elements in BLAS vector
+ * \param datatype I   data type of elements of vals, must be the same as data type of BLAS vector x
+ * \param retval   O   receives return value
+ * \return no return value
+ */
+void CTAI_Vector_setvals(CTAI_Vector_blas *x, void *vals, int *n, CTA_Datatype *datatype, int *retval);
+
+
+/** \brief Set a single value of a BLAS vector.
+ *
+ * \param x        I  pointer to object data of BLAS vector
+ * \param i        I  index of value to set
+ * \param val      I  new data of value
+ * \param datatype I  data type of *val, must be the same as data type of BLAS vector x
+ * \param retval   O  receives return value
+ * \return no return value
+ */
+void CTAI_Vector_setval(CTAI_Vector_blas *x, int *i, void *val, CTA_Datatype *datatype, int *retval);
+
+
+/** \brief Set all values of BLAS vector to the same value.
+ *
+ * \param x        I  pointer to object data of BLAS vector
+ * \param val      I  value to set vector elements to
+ * \param datatype I  data type of elements of vals, must be the same as data type of BLAS vector x
+ * \param retval   O  receives return value
+ * \return no return value
+ */
+void CTAI_Vector_setconst(CTAI_Vector_blas *x, void *val, CTA_Datatype *datatype, int *retval);
+
+
+/** \brief Scale the BLAS vector: x=ax
+ *
+ * \param x        IO pointer to object data of user vector
+ * \param alpha    I  scalar
+ * \param retval   O  receives return value
+ * \return no return value
+ */
+void CTAI_Vector_scal(CTAI_Vector_blas *x, double *alpha, int *retval);
+
+
+/** \brief Copy the BLAS vector.
+ *
+ * \param x        I  pointer to object data of a BLAS vector
+ * \param y        O  pointer to object data of BLAS vector y that receives copy of x; must exist before calling
+ * \param retval   O  receives return value
+ * \return no return value
+ */
+void CTAI_Vector_copy(CTAI_Vector_blas *x, CTAI_Vector_blas *y, int *retval);
+
+
+/** \brief Apply the BLAS operation axpy: y=y+a*x
+ *
+ * \param y        IO pointer to object data of BLAS vector y; must exist before calling
+ * \param alpha    I  scalar
+ * \param x        I  pointer to object data of BLAS vector x
+ * \param retval   O  receives return value
+ * \return no return value
+ */
+void CTAI_Vector_axpy(CTAI_Vector_blas *y, double *alpha, CTAI_Vector_blas *x, int *retval);
+
+
+/** \brief Calculate the dot product of two BLAS vectors: x.y
+ *
+ * \param y        I  pointer to object data of BLAS vector y
+ * \param x        I  pointer to object data of BLAS vector x
+ * \param dotprod  O  receives the dot product
+ * \param retval   O  receives return value
+ * \return no return value
+ */
+void CTAI_Vector_dot(CTAI_Vector_blas *x, CTAI_Vector_blas *y, double *dotprod, int *retval);
+
+
+/** \brief Calculate the 2-norm of a BLAS vector
+ *
+ * \param x        I  pointer to object data of BLAS vector
+ * \param norm2    O  receives 2-norm
+ * \param retval   O  receives return value
+ * \return no return value
+ */
+void CTAI_Vector_nrm2(CTAI_Vector_blas *x, double *norm2, int *retval);
+
+
+/** \brief Get the index of the maximum vector value. First index is 1.
+ *
+ * \param x        I  pointer to object data of BLAS vector
+ * \param iloc     O  receives index of maximum value
+ * \param retval   O  receives return value
+ * \return no return value
+ */
+void CTAI_Vector_amax(CTAI_Vector_blas *x, int *iloc, int *retval);
+
+
+/** \brief Get the maximum length in case of BLAS vector with string elements.
+ *
+ * \param x        I  pointer to object data of BLAS vector
+ * \param maxlen   O  receives maximum length
+ * \param retval   O  receives return value
+ * \return no return value
+ */
+void CTAI_Vector_GetMaxLen(CTAI_Vector_blas *x, int *maxlen, int *retval);
+
+/** \brief Export a BLAS vector.
+ *
+ * \note Supports exporting float, double, int and CTA_String values.\n
+ *       Possibilities:
+ *       Export to file (usrdata contains handle of COSTA file).\n
+ *       Export to pack object (usrdata contains handle of COSTA pack object).\n
+ * 
+ *
+ * \param x        I  pointer to object data of BLAS vector
+ * \param usrdata  IO user data
+ * \param retval   O  receives return value
+ * \return no return value
+ */
+void CTAI_Vector_export(CTAI_Vector_blas *x, CTA_Handle *usrdata, int *retval);
+
+
+/** \brief Import a BLAS vector.
+ *
+ * \note Supported:\n
+      Import from pack object (usrdata contains handle of COSTA pack object).\n
+ *
+ * \param x            O  pointer to object data of BLAS vector that receives import result
+ * \param usrdata     IO user data
+ * \param retval       O  receives return value
+ * \return no return value
+ */
+void CTAI_Vector_import(CTAI_Vector_blas *x, CTA_Handle *usrdata, int *retval);
+
+
+/** \brief Print table. TODO
+ *
+ * \param table        IO
+ * \param ncolumns     I  number of columns
+ * \param formats      I  user vector
+ * \param retval       O  receives return value
+ * \return no return value
+ */
+void CTAI_Vector_Print_Table(CTAI_Vector_blas **table, int *ncolumns,
+                      CTAI_Vector_blas * formats,     int *retval);
+
+
+/** \brief Append single value to the BLAS vector.
+ *
+ * \param x            IO pointer to object data of BLAS vector
+ * \param val          I  value to append
+ * \param datatype     I  data type of value to append, must be the same as data type of user vector x
+ * \param retval       O  receives return value
+ * \return no return value
+ */
+void CTAI_Vector_appendval(CTAI_Vector_blas *x, void *val, CTA_Datatype *datatype,
+                           int *retval);
+
+
+/** \brief Free the BLAS vector object data and associated resources.
+ *
+ * \param x        IO pointer to object data of BLAS vector
+ * \param retval   O  receives return value
+ * \return no return value
+ */
+void CTAI_Vector_free(CTAI_Vector_blas *x, int *retval);
+
+
+/** \brief Element-wise division of BLAS vector.
+ *
+ * \param y            IO pointer to object data of receiving BLAS vector
+ * \param x            I  pointer to object data of user vector x
+ * \param retval       O  receives return value
+ * \return no return value
+ */
+void CTAI_Vector_elmdiv(CTAI_Vector_blas *y, CTAI_Vector_blas *x, int *retval);
+
+/** \brief Element-wise product of BLAS vector.
+ *
+ * \param y            IO pointer to object data of receiving BLAS vector
+ * \param x            I  pointer to object data of user vector x
+ * \param retval       O  receives return value
+ * \return no return value
+ */
+void CTAI_Vector_elmprod(CTAI_Vector_blas *y, CTAI_Vector_blas *x, int *retval);
+
+/** \brief Element-wise square root of BLAS vector.
+ *
+ * \param y            IO pointer to object data of receiving BLAS vector
+ * \param retval       O  receives return value
+ * \return no return value
+ */
+void CTAI_Vector_elmsqrt(CTAI_Vector_blas *y, int *retval);
+
+
+#ifdef __cplusplus
+}
+#endif
+#endif
+
diff --git a/costa/native/cta/include/cta_xml.h b/costa/native/cta/include/cta_xml.h
new file mode 100644
index 000000000..9ac327728
--- /dev/null
+++ b/costa/native/cta/include/cta_xml.h
@@ -0,0 +1,57 @@
+/*
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+/**
+\file  cta_xml.h
+\brief Utilities for XML access: reading/writing of trees from/to a XML file.
+\note This is just a set of utility routines it does not define any COSTA component
+
+*/
+
+#ifndef CTA_XML_H
+#define CTA_XML_H
+#include "cta_system.h"
+#include "cta_tree.h"
+#include "cta_string.h"
+
+/* tree functions */
+#ifdef __cplusplus
+extern "C" {
+#endif
+/** \brief Read a COSTA XML file into a new tree.
+ *
+ *  \param hfname  I  file name of XML file to read
+ *  \param hroot   O  handle of a new COSTA tree
+ *  \return CTA_OK if successful
+ */
+CTAEXPORT int CTA_XML_Read(CTA_String hfname, CTA_Tree *hroot);
+
+
+/** \brief Write a tree to a COSTA XML file
+ *
+ *  \param hfname  I  file name of XML file to write
+ *  \param hroot   I  handle of a COSTA tree
+ *  \return CTA_OK if successful
+ */
+CTAEXPORT int CTA_XML_Write(CTA_String hfname, CTA_Tree hroot);
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* CTA_XML_H */
diff --git a/costa/native/cta/include/ctai.h b/costa/native/cta/include/ctai.h
new file mode 100644
index 000000000..ae6266632
--- /dev/null
+++ b/costa/native/cta/include/ctai.h
@@ -0,0 +1,37 @@
+/*
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+/**
+\file  ctai.h
+
+\brief Definition of MIN(a,b) and MAX(a,b)
+*/
+
+#ifndef NL_VORTECH_CTAI_H
+#define NL_VORTECH_CTAI_H
+
+#ifndef MAX
+#define MAX(a,b) (a>b ? a: b)
+#endif
+
+#ifndef MIN
+#define MIN(a,b) (a<b ? a: b)
+#endif
+
+#endif
diff --git a/costa/native/cta/include/ctai_datatypes.h b/costa/native/cta/include/ctai_datatypes.h
new file mode 100644
index 000000000..c6712ae12
--- /dev/null
+++ b/costa/native/cta/include/ctai_datatypes.h
@@ -0,0 +1,53 @@
+/*
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+/**
+\file ctai_datatypes.h
+\brief Utility functions for working with data types.
+
+\note This is just a set of utility routines it does not define any COSTA component.
+
+*/
+
+
+#ifndef NL_VORTECH_CTAI_DATA_TYPS_H
+#define NL_VORTECH_CTAI_DATA_TYPS_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+/** \brief Get a CTA_Datatype object describing the data type given in string representation
+ *
+ *  \param dt I description of data type in string form
+ *  \return CTA_Datatype describing the data type, CTA_NULL if data type is unknown
+ */
+CTA_Datatype CTAI_String2Type(const char *dt);
+
+
+
+/** \brief Get string representation of CTA_Datatype object
+ *
+ *  \param datatype I CTA_Datatype object describing the data type
+ *  \return pointer to string containing description of data type ("CTA_ILLEGAL_DATATYPE" if unknown)
+ */
+const char *CTAI_Type2String(CTA_Datatype datatype);
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/costa/native/cta/include/ctai_handles.h b/costa/native/cta/include/ctai_handles.h
new file mode 100644
index 000000000..b76a752c9
--- /dev/null
+++ b/costa/native/cta/include/ctai_handles.h
@@ -0,0 +1,69 @@
+/*
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+/**
+\file  ctai_handles.h
+\brief Utility functions for working with CTA_Handle objects.
+
+\note This is just a set of utility routines it does not define any COSTA component
+*/
+#ifndef NL_VORTECH_CTAI_HANDLES_H
+#define NL_VORTECH_CTAI_HANDLES_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** \brief Get data type of object the given handle is referring to (internal use only!)
+ *
+ *  \param handle I handle of which to get the data type
+ *  \return data type associated with handle (no error codes)
+ */
+CTA_Datatype CTAI_Handle_GetDatatype(const CTA_Handle handle);
+
+
+/** \brief Get name associated with the handle (internal use only!)
+ *
+ *  \param handle I handle to get name of
+ *  \return pointer to name associated with the given handle
+ */
+const char *CTAI_Handle_GetName(const CTA_Handle handle);
+
+
+/** \brief Get data belonging to the handle (internal use only!)
+ *
+ *  \param handle I handle of which to get the data
+ *  \return constant pointer to the data belonging to the handle
+ */
+const void *CTAI_Handle_GetData(const CTA_Handle handle);
+
+
+/** \brief Set name associated with the handle (internal use only!)
+ *
+ *  \param handle IO handle of which to set the name
+ *  \param name   I  new name associated with handle
+ *  \return CTA_OK is succesful
+ */
+int CTAI_Handle_SetName(const CTA_Handle handle, const char *name);
+
+#ifdef __cplusplus
+}
+#endif
+#endif
+
diff --git a/costa/native/cta/include/ctai_sobs.h b/costa/native/cta/include/ctai_sobs.h
new file mode 100644
index 000000000..7e0e9d3d2
--- /dev/null
+++ b/costa/native/cta/include/ctai_sobs.h
@@ -0,0 +1,41 @@
+/*
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+/**
+\file  ctai_stoch_observer.h
+\brief Utility for stochastic observer object.
+*/
+#ifndef NL_VORTECH_CTAI_SOBS_H__
+#define NL_VORTECH_CTAI_SOBS_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+/** \brief Get user data of stochastic observer object
+ *
+ *  \param hsobs I  handle of stochastic observer object of which to get user data
+ *  \param index I  index in user data array
+
+ *  \return handle of object at given index in the user data array of the stochastic observer object
+ */
+CTA_Handle CTAI_SObs_GetUserData(CTA_StochObs hsobs, int index);
+#ifdef __cplusplus
+}
+#endif                                  
+#endif
diff --git a/costa/native/cta/include/ctai_string.h b/costa/native/cta/include/ctai_string.h
new file mode 100644
index 000000000..fea62348c
--- /dev/null
+++ b/costa/native/cta/include/ctai_string.h
@@ -0,0 +1,48 @@
+/*
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+/**
+\file  ctai_string.h
+
+\brief Internal string utility functions
+*/
+#ifndef NL_VORTECH_CTAI_STRING_H
+#define NL_VORTECH_CTAI_STRING_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+/** \brief Get a pointer to the contents of the string (internal use only!)
+ *
+ *  \param hstring I handle of string object
+ *  \return pointer to string contents
+ */
+char *CTAI_String_GetPtr(CTA_String hstring);
+
+/** \brief Allocate a string, created from a COSTA string (internal use only!).
+ *
+ *  \param hstr I handle of string object to use
+ *  \return pointer to C string allocated by this function
+ */
+char *CTAI_String_Allocate(CTA_String hstr);
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/costa/native/cta/include/ctai_vector.h b/costa/native/cta/include/ctai_vector.h
new file mode 100644
index 000000000..dccf1d52c
--- /dev/null
+++ b/costa/native/cta/include/ctai_vector.h
@@ -0,0 +1,103 @@
+/*
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+/**
+\file  ctai_vector.h
+\brief Utilities for working with CTA_Vector objects.
+*/
+
+#ifndef NL_VORTECH_CTAI_VECTOR_H
+#define NL_VORTECH_CTAI_VECTOR_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+/** \brief Count the number of handles with the given name in the vector (internal use only!)
+ *
+ * \param hvec   I  vector in which to search
+ * \param name   I  name of handle(s) to find
+ *
+ * \return number of handles counted or -1 in case of an error.
+ */
+int CTAI_Vector_CountHandles(CTA_Vector hvec, const char* name);
+
+
+/** \brief Find the handle with the given name (internal use only!)
+ *
+ *
+ * \param hvec     I  vector in which to search
+ * \param name     I  name of handle to find
+ *
+ * \return handle or CTA_NULL if not found or in case of an error
+ */
+CTA_Handle CTAI_Vector_FindHandle(CTA_Vector hvec, const char* name);
+
+
+/** \brief Get vector value at index i as C string (internal use only!)
+ *
+ * \note Free pointer after use.
+ *
+ * \param hvec I  vector
+ * \param i    I  index of value
+ *
+ * \return string representation of value at index i of vector
+ */
+const char *CTAI_Vector_GetStringVal(CTA_Vector hvec, int  i);
+
+
+/** \brief Set vector value at index i giving a C string to be converted to given data type (internal use only!)
+ *
+ * \param hvec      IO handle of vector
+ * \param i         I  index of value to set
+ * \param val       I  string representation of value
+ * \param datatype  I  data type of value
+ *
+ * \return CTA_OK if succesful or CTA_ILLEGAL_DATATYPE if invalid data type
+ */
+int CTAI_Vector_SetStringVal(CTA_Vector hvec, int  i, const char *val, CTA_Datatype datatype);
+
+/** \brief Create a COSTA vector. 
+*
+*  \param cur_node  I  Current XML node 
+*  \return             Handle to create or CTA_NULL in case of an error.
+*/
+CTA_Vector CTAI_XML_CreateVector(xmlNode *cur_node);
+
+/** \brief Generate XML from one COSTA vector
+*
+*  \param hvec   I  handle of a COSTA vector
+*  \param writer I  the XML text writer
+*/
+//void CTAI_XML_WriteVector(CTA_Vector hvec, xmlTextWriter *writer);
+  void CTAI_XML_WriteVector(CTA_Vector hvec, const char *id, const char *caption, CTA_Metainfo minfo, int level, xmlTextWriter *writer);
+
+
+/** \brief Create a COSTA vector from XML. 
+*
+*  \param cur_node  I  Current XML node 
+*  \param minfo     I/O  Meta info object
+*  \return             Handle to create or CTA_NULL in case of an error.
+*/
+CTA_Vector CTAI_XML_CreateVector_New(xmlNode *cur_node, CTA_Metainfo minfo);
+
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/costa/native/cta/include/ctai_xml.h b/costa/native/cta/include/ctai_xml.h
new file mode 100644
index 000000000..65b467ded
--- /dev/null
+++ b/costa/native/cta/include/ctai_xml.h
@@ -0,0 +1,74 @@
+/*
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+/**
+\file  ctai_xml.h
+\brief Utilities for XML access: reading/writing of trees from/to a XML file.
+\note This include is only to be used internally
+
+*/
+
+#ifndef CTAI_XML_H
+#define CTAI_XML_H
+
+#include <libxml/encoding.h>
+#include <libxml/parser.h>
+#include <libxml/tree.h>
+#include <libxml/xinclude.h>
+#include <libxml/xmlwriter.h>
+
+
+/* COSTA-specific element properties */
+#define CTAI_XML_DATABASE     (xmlChar*)("database")
+#define CTAI_XML_SELECT       (xmlChar*)("select")
+#define CTAI_XML_TIMEOFFSET   (xmlChar*)("timeoffset")
+#define CTAI_XML_ID           (xmlChar*)("id")
+#define CTAI_XML_START        (xmlChar*)("start")
+#define CTAI_XML_STEP         (xmlChar*)("step")
+#define CTAI_XML_STOP         (xmlChar*)("stop")
+#define CTAI_XML_TAG          (xmlChar*)("tag")
+#define CTAI_XML_NAME         (xmlChar*)("name")
+#define CTAI_XML_DATATYPE     (xmlChar*)("datatype")
+#define CTAI_XML_DIMENSION    (xmlChar*)("dimension")
+#define CTAI_XML_VALUES       (xmlChar*)("values")
+#define CTAI_XML_VECTOR       (xmlChar*)("vector")
+#define CTAI_XML_VALUE        (xmlChar*)("value")
+#define CTAI_XML_LIBRARY      (xmlChar*)("library")
+#define CTAI_XML_FUNCTION     (xmlChar*)("function")
+#define CTAI_XML_UNIT         (xmlChar*)("unit")
+#define CTAI_XML_MISSINGVALUE (xmlChar*)("missingValue")
+#define CTAI_XML_GRID         (xmlChar*)("grid")
+#define CTAI_XML_CAPTION      (xmlChar*)("caption")
+#define CTAI_XML_LENGTH       (xmlChar*)("length")
+#define CTAI_XML_EXCLUDEFROMVECTOR      (xmlChar*)("excludeFromVector")
+#define CTAI_XML_IMPLEMENTS   (xmlChar*)("implements")
+#define CTAI_XML_NPROC          (xmlChar*)("nproc")
+#define CTAI_XML_PARALLEL_TYPE  (xmlChar*)("parallel_type")
+#define CTAI_XML_SPAWN_WORKERS  (xmlChar*)("spawn_workers")
+#define CTAI_XML_NTIMES         (xmlChar*)("ntimes")
+#define CTAI_XML_DUMPROCS       (xmlChar*)("dumproc")
+#define CTAI_XML_SOBSCLASSNAME  (xmlChar*)("class")
+#define CTAI_XML_FLAG_BARRIER   (xmlChar*)("flag_barrier")
+#define CTAI_XML_T_STEP         (xmlChar*)("T_step")
+
+#define MY_ENCODING  (xmlChar*)("ISO-8859-1")
+
+
+
+#endif /* CTA_XML_H */
diff --git a/costa/native/cta/include/f_cta_utils.h b/costa/native/cta/include/f_cta_utils.h
new file mode 100644
index 000000000..a7b44eea2
--- /dev/null
+++ b/costa/native/cta/include/f_cta_utils.h
@@ -0,0 +1,61 @@
+/*
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+/**
+\file  f_cta_utils.h
+
+\brief String utility functions for FORTRAN / C compatibility.
+*/
+
+#include <string.h>
+#include "cta_system.h"
+#include "cta_datatypes.h"
+#include "cta_errors.h"
+
+#ifndef F_CTA_UTILS_H
+#define F_CTA_UTILS_H
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** \brief Create C string from FORTRAN string
+ *
+ * \param f_str I pointer to FORTRAN string
+ * \param c_str O buffer for receiving C string
+ * \param len_f I length of FORTRAN string
+ *
+ * \return error status: always CTA_OK (no internal error check)
+ */
+int CTA_fstr2cstr(char *f_str,char *c_str, int len_f);
+
+/** \brief Create FORTRAN string from C string
+ *
+ * \param c_str I pointer to C string
+ * \param f_str O buffer for receiving FORTRAN string
+ * \param len_f I length of C string
+ *
+ * \return error status: always CTA_OK (no internal error check)
+ */
+int CTA_cstr2fstr(char *c_str,char *f_str, int len_f);
+
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/costa/native/cta/include/modbuild_sp_model_template.h b/costa/native/cta/include/modbuild_sp_model_template.h
new file mode 100644
index 000000000..9694f2165
--- /dev/null
+++ b/costa/native/cta/include/modbuild_sp_model_template.h
@@ -0,0 +1,123 @@
+/*
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2007  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+/**
+\file  cta_modbuild_sp_model_template.h
+
+\brief Interfaces of the user-routines that can be provided in order to create a model using the SP-modelbuilder
+
+Copy this file to your_model.c and fill in the routines.
+
+
+*/
+#include "cta.h"
+#include "f_cta_utils.h"
+
+#define CREATEFUNC_F77  CF77_CALL(usr_createfunc,USR_CREATEFUNC)
+
+
+
+
+/** \brief Creates a new model instance
+ *  This routine creates and initialises a new model instance.
+ *
+ *  \param hinput    I   Model configuration CTA_Tree of CTA_String
+ *  \param state     O   Model state (initialized to initial value. Note this statevector must be created
+ *  \param sbound    O   State-vector for the offset on the forcings. Set CTA_NULL if not used. Note this statevector must be created
+ *  \param nnoise    O   The number of noise parameters in model state. Set 0 in case of a deterministic model
+ *  \param time0     O   Time instance of the initial state state. The time object is already created
+ *  \param snamnoise O   Name of the substate containing the noise parameters. The string is already created
+ *  \param husrdata  O   Handle that can be used for storing instance specific data
+ *  \param ierr      O   Return flag CTA_OK if successful
+*/
+void usr_create(CTA_Handle *hinput,  CTA_TreeVector *state, CTA_TreeVector *sbound, 
+                CTA_TreeVector *sparam, int *nnoise, CTA_Time *time0, 
+                CTA_String *snamnoise, CTA_Handle *husrdata, int *ierr);
+
+/** \brief Compute timestep(s)
+ *  This routine is computes several timesteps over a giving timespan.
+ *
+ *  \param timespan  I  Timespan to simulate
+ *  \param state     IO State vector
+ *  \param saxpyforc I  Offset on models forcings
+ *  \param baddnoise I  flag (CTA_TRUE/CTA_FALSE) whether to add noise
+ *  \param sparam    I  Model parameters
+ *  \param husrdata  IO Instance specific data
+ *  \param ierr      O  Return flag CTA_OK if successful
+*/
+void usr_compute(CTA_Time *timespan, CTA_TreeVector *state, CTA_TreeVector *saxpyforc,
+                     int *baddnoise, CTA_TreeVector *sparam, CTA_Handle *husrdata,
+                     int *ierr);
+
+/** \brief Return the root of noise covariance matrix
+ *  This routine is responsible for returning the covariance matrix of the noise parameters.
+ *
+ *  \param colsvar         O   Covariance of noise parameters array of noise. 
+ *                             Represented als an array (nnoise) of tree-vectors.
+ *                             Note the sub-tree-vectors are already allocated.
+ *  \param nnoise          I   Number of noise parameters
+ *  \param husrdata        IO  Instance specific data
+ *  \param ierr            O   Return flag CTA_OK if successful
+*/
+void usr_covar(CTA_TreeVector *colsvar, int *nnoise, CTA_Handle *husrdata, int *ierr);
+
+/** \brief Return values that correspond to observed values
+ *  This routine is responsible for the transformation of the state-vector to the observations. 
+ *
+ *  \param state    I  state vector
+ *  \param hdescr   I  Observation description of observations
+ *  \param vval     O  Model (state) values corresponding to observations in hdescr
+ *  \param husrdata IO Instance specific data
+ *  \param ierr     O  Return flag CTA_OK if successful
+*/
+void usr_obs(CTA_TreeVector *state, CTA_ObsDescr *hdescr, CTA_Vector *vval,
+                CTA_Handle *husrdata, int *ierr);
+
+
+/** \brief Select criterion for observations that can be used for the model.
+ *  This routine is responsible for producing a selection criterion that will filter out all invalid observations.
+ *  Invalid observations are observations for which the model cannot produce a corresponding value. For example
+ *  observations that are outside the computational domain.
+ *
+ *  \param state    I  state vector
+ *  \param ttime    I  timespan for selection
+ *  \param hdescr   I  observation description of all available observations
+ *  \param sselect  O  The select criterion to filter out all invalid observations
+ *  \param husrdata IO Instance specific data
+ *  \param ierr     O  Return flag CTA_OK if successful
+*/
+void usr_obssel(CTA_TreeVector *state, CTA_Time *ttime, CTA_ObsDescr *hdescr,
+                CTA_String *sselect, CTA_Handle *husrdata, int* ierr);
+
+
+void usr_SP_Model_CreateFunc(){
+CTA_Intf hintf;
+CTA_Func func;
+   hintf = CTA_NULL;
+   CTA_Func_Create("usr_create",  usr_create,  hintf, &func);
+   CTA_Func_Create("usr_compute", usr_compute, hintf, &func);
+   CTA_Func_Create("usr_covar",   usr_covar,   hintf, &func);
+   CTA_Func_Create("usr_obs",     usr_obs,     hintf, &func);
+   CTA_Func_Create("usr_obssel",  usr_obssel,  hintf, &func);
+}
+
+void CREATEFUNC_F77(){
+   usr_CreateFunc();
+}
+
diff --git a/costa/native/cta/src/CMakeLists.txt b/costa/native/cta/src/CMakeLists.txt
new file mode 100644
index 000000000..afd3dcd6a
--- /dev/null
+++ b/costa/native/cta/src/CMakeLists.txt
@@ -0,0 +1,23 @@
+cmake_minimum_required(VERSION 3.9.1)
+
+set(LIBRARY_OUTPUT_PATH  ${CMAKE_BINARY_DIR}/lib)
+set(SOURCES
+  cta_array.c       cta_matrix_blas.c      cta_obsdescr_combine.c  cta_sobs_combine.c  cta_util_sort.c
+  cta_datatypes.c   cta_mem.c              cta_obsdescr_maori.c    cta_sobs_factory.c  cta_util_sqlite3.c
+  cta_datetime.c    cta_message.c          cta_obsdescr_netcdf.c   cta_sobs_maori.c    cta_util_statistics.c
+  cta_defaults.c    cta_metainfo.c         cta_obsdescr_sqlite3.c  cta_sobs_netcdf.c   cta_vector.c
+  cta_file.c        cta_method.c           cta_obsdescr_table.c    cta_sobs_sqlite3.c  cta_vector_blas.c
+  cta_flush.c       cta_modbuild_par.c     cta_obsdescr_user.c     cta_sobs_user.c     cta_xml.c
+  cta_functions.c   cta_modbuild_sp.c      cta_pack.c              cta_string.c        f_cta_defaults.c
+  cta_handles.c     cta_model.c            cta_par.c               cta_time.c          f_cta_utils.c
+  cta_initialise.c  cta_model_factory.c    cta_reltable.c          cta_tree.c
+  cta_interface.c   cta_model_utilities.c  cta_resultwriter.c      cta_treevector.c
+  cta_matrix.c      cta_obsdescr.c         cta_sobs.c              cta_util_methods.c 
+)
+
+add_library(cta SHARED ${SOURCES})
+target_include_directories(cta PUBLIC ../include)
+target_include_directories(cta PUBLIC ${CMAKE_SOURCE_DIR}/external)
+
+#target_link_libraries(cta INTERFACE libxml)
+
diff --git a/costa/native/cta/src/cta_array.c b/costa/native/cta/src/cta_array.c
new file mode 100644
index 000000000..064ac8db2
--- /dev/null
+++ b/costa/native/cta/src/cta_array.c
@@ -0,0 +1,1293 @@
+/*
+$URL: https://repos.deltares.nl/repos/openda/openda_1/public/trunk/core/native/src/cta/cta_time.c $
+$Revision: 2751 $, $Date: 2011-09-09 08:58:46 +0200 (Fri, 09 Sep 2011) $
+
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2012  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+#include <stdio.h>
+#include <math.h>
+#include "cta_array.h"
+
+#include "cta_mem.h"
+#include "f_cta_utils.h"
+#include "cta_errors.h"
+#include "cta_message.h"
+
+#define CTA_ARRAY_FREE_F77                                  F77_CALL(CTA_ARRAY_FREE,CTA_ARRAY_FREE)                         
+#define CTA_ARRAY_CREATEASDOUBLES_F77                       F77_CALL(CTA_ARRAY_CREATEASDOUBLES,CTA_ARRAY_CREATEASDOUBLES)              
+#define CTA_ARRAY_GETNDIMENSIONS_F77                        F77_CALL(CTA_ARRAY_GETNDIMENSIONS,CTA_ARRAY_GETNDIMENSIONS)               
+#define CTA_ARRAY_GETDIMENSIONS_F77                         F77_CALL(CTA_ARRAY_GETDIMENSIONS,CTA_ARRAY_GETDIMENSIONS)                
+#define CTA_ARRAY_LENGTH_F77                                F77_CALL(CTA_ARRAY_LENGTH,CTA_ARRAY_LENGTH)                       
+#define CTA_ARRAY_GETVALUESASDOUBLES_F77                    F77_CALL(CTA_ARRAY_GETVALUESASDOUBLES,CTA_ARRAY_GETVALUESASDOUBLES)           
+#define CTA_ARRAY_GETVALUESASDOUBLES_INDEXRANGE_F77         F77_CALL(CTA_ARRAY_GETVALUESASDOUBLES_INDEXRANGE,CTA_ARRAY_GETVALUESASDOUBLES_INDEXRANGE)
+#define CTA_ARRAY_GETVALUEASDOUBLES_INDEX_F77               F77_CALL(CTA_ARRAY_GETVALUEASDOUBLES_INDEX,CTA_ARRAY_GETVALUEASDOUBLES_INDEX)      
+#define CTA_ARRAY_GETVALUEASDOUBLE_INDICES_F77              F77_CALL(CTA_ARRAY_GETVALUEASDOUBLE_INDICES,CTA_ARRAY_GETVALUEASDOUBLE_INDICES)     
+#define CTA_ARRAY_SETVALUESASDOUBLES_F77                    F77_CALL(CTA_ARRAY_SETVALUESASDOUBLES,CTA_ARRAY_SETVALUESASDOUBLES)           
+#define CTA_ARRAY_SETVALUEASDOUBLE_INDICES_F77              F77_CALL(CTA_ARRAY_SETVALUEASDOUBLE_INDICES,CTA_ARRAY_SETVALUEASDOUBLE_INDICES)     
+#define CTA_ARRAY_SETVALUESASDOUBLES_INDEXRANGE_F77         F77_CALL(CTA_ARRAY_SETVALUESASDOUBLES_INDEXRANGE,CTA_ARRAY_SETVALUESASDOUBLES_INDEXRANGE)
+#define CTA_ARRAY_SETVALUEASDOUBLE_INDEX_F77                F77_CALL(CTA_ARRAY_SETVALUEASDOUBLE_INDEX,CTA_ARRAY_SETVALUEASDOUBLE_INDEX)       
+#define CTA_ARRAY_SETCONSTANT_F77                           F77_CALL(CTA_ARRAY_SETCONSTANT,CTA_ARRAY_SETCONSTANT)                  
+#define CTA_ARRAY_AXPYONVALUES_F77                          F77_CALL(CTA_ARRAY_AXPYONVALUES,CTA_ARRAY_AXPYONVALUES)                 
+#define CTA_ARRAY_MULTIPLYVALUES_F77                        F77_CALL(CTA_ARRAY_MULTIPLYVALUES,CTA_ARRAY_MULTIPLYVALUES)               
+#define CTA_ARRAY_RESHAPE_F77                               F77_CALL(CTA_ARRAY_RESHAPE,CTA_ARRAY_RESHAPE)                      
+#define CTA_ARRAY_GETSLICE_F77                              F77_CALL(CTA_ARRAY_GETSLICE,CTA_ARRAY_GETSLICE)                     
+#define CTA_ARRAY_GETSLICE_RANGE_F77                        F77_CALL(CTA_ARRAY_GETSLICE_RANGE,CTA_ARRAY_GETSLICE_RANGE)               
+#define CTA_ARRAY_GETSLICEASDOUBLES_RANGE_F77               F77_CALL(CTA_ARRAY_GETSLICEASDOUBLES_RANGE,CTA_ARRAY_GETSLICEASDOUBLES_RANGE)      
+#define CTA_ARRAY_SETSLICEASDOUBLES_F77                     F77_CALL(CTA_ARRAY_SETSLICEASDOUBLES,CTA_ARRAY_SETSLICEASDOUBLES)            
+#define CTA_ARRAY_SETSLICEASARRAY_F77                       F77_CALL(CTA_ARRAY_SETSLICEASARRAY,CTA_ARRAY_SETSLICEASARRAY)              
+#define CTA_ARRAY_SETSLICEASARRAY_RANGE_F77                 F77_CALL(CTA_ARRAY_SETSLICEASARRAY_RANGE,CTA_ARRAY_SETSLICEASARRAY_RANGE)        
+#define CTA_ARRAY_SETSLICEASDOUBLES_RANGE_F77               F77_CALL(CTA_ARRAY_SETSLICEASDOUBLES_RANGE,CTA_ARRAY_SETSLICEASDOUBLES_RANGE)      
+#define CTA_ARRAY_VALUEINDEX_F77                            F77_CALL(CTA_ARRAY_VALUEINDEX,CTA_ARRAY_VALUEINDEX)                   
+
+
+#define CLASSNAME "CTA_Array"
+
+/**
+ * This is an implementation of an array of doubles with multiple dimensions, a is eg available in fortran or
+ * matlab. It is written as an interface to allow for other implementations with highly optimized code or using parallel
+ * computing.
+ * 
+ * Assumptions:
+ * - The ordering of double[] java-arrays is with the first dimension running slowest, eg the array [[1,2],[3,4],[5,6]]
+ *   is returned/set as [1,2,3,4,5,6] , this is consistent with conventions in c or java.
+ * - Indices start at 0, consistent with c/java conventions.
+ * - The index for dimension starts counting at 0, thus 0 denotes the first dimension.
+ * 
+ * @author nils van velzen
+ *
+ */
+
+/* Struct holding all data associated to an native array */
+typedef struct {
+   double  *values;
+   int     n;
+   int     nValues;
+   int     *dimensions;
+   int     nDimensions;
+} CTAI_Array;
+
+#define MaxDimensions (20)
+
+
+/* Some utility routines */
+
+#undef METHOD
+#define METHOD "CTAI_Array_getData"
+int CTAI_Array_getData(CTA_Array h, CTAI_Array **cthis){
+
+   int retval=CTA_Handle_Check((CTA_Handle) h,CTA_ARRAY);
+   if (retval!=CTA_OK) {
+      CTA_WRITE_ERROR("Handle is not a cta_array handle");
+      return retval;
+   }
+   retval=CTA_Handle_GetData((CTA_Handle) h,(void **) cthis);
+   if (retval!=CTA_OK) {
+      CTA_WRITE_ERROR("Cannot retrieve handle data");
+      return retval;
+   }
+   return CTA_OK;
+}
+
+
+#undef METHOD
+#define METHOD "CTAI_Array_getData"
+int CTAI_Array_valueIndex(CTAI_Array *cthis, int nIndices, int *indices){
+
+   int blockSize=1;
+   int result=0;
+   int i;
+
+   if (cthis->nDimensions!=nIndices){
+      char msg[120];
+      sprintf(msg, "number of indices (%d) does not match dimensions of array (%d)",nIndices, cthis->nDimensions);
+      CTA_WRITE_ERROR(msg);
+      return 0;
+   }
+
+   for(i=cthis->nDimensions-1;i>=0;i--){
+      int indexI=indices[i];
+      if(indexI<0){indexI+=cthis->dimensions[i];}
+      result+=indexI*blockSize;
+      blockSize*=cthis->dimensions[i];
+   }
+   return result;
+}
+
+
+
+
+int CTAI_Array_Create(CTA_Array *h){
+   CTAI_Array *cthis;
+   int retval;
+
+   cthis=CTA_Malloc(sizeof(CTAI_Array));
+   cthis->values=NULL;
+   cthis->n=0;
+   cthis->nValues=0;
+   cthis->nDimensions=0;
+   retval=CTA_Handle_Create("array",CTA_ARRAY,cthis,h);
+   if (retval) {
+      CTA_WRITE_ERROR("Cannot create time handle");
+      CTA_Free(cthis);
+      return retval;
+   }
+   return CTA_OK;
+}
+
+int CTA_Array_Free(CTA_Array *h){
+
+   CTAI_Array *cthis;
+   int ierr=CTAI_Array_getData(*h, &cthis);
+   if (ierr==CTA_OK){
+      if (cthis->values){
+         CTA_Free(cthis->values);
+      }
+      CTA_Free(cthis);
+      ierr=CTA_Handle_Free(h);
+   }
+   return ierr;
+}
+
+
+int CTAI_Array_CreateDim(int nDimensions, int *dimensions, CTA_Array *h){
+   CTAI_Array *cthis;
+
+   int ierr=CTAI_Array_Create(h);
+
+   ierr=CTAI_Array_getData(*h, &cthis);
+   if (ierr==CTA_OK){
+      int i;
+      int n=dimensions[0];
+	  cthis->nDimensions=nDimensions;
+      cthis->dimensions=CTA_Malloc(nDimensions*sizeof(int));
+      for (i=0;i<nDimensions;i++){
+         cthis->dimensions[i]=dimensions[i];
+      }
+      for (i=1;i<nDimensions;i++){
+         n*=dimensions[i];
+      }
+      cthis->n=n;
+      cthis->values=CTA_Malloc(n*sizeof(double));
+   }
+   else {
+      CTA_WRITE_ERROR("Handle is not correct");
+      return ierr;
+   }
+   return CTA_OK;
+}
+
+int CTA_Array_CreateAsDoubles(double *values, int nDimensions, int *dimensions, CTA_Array *h){
+   CTAI_Array *cthis;
+   int ierr;
+   CTAI_Array_CreateDim(nDimensions, dimensions, h);
+   ierr=CTAI_Array_getData(*h, &cthis);
+   if (ierr==CTA_OK){
+      int i;
+      for (i=0;i<cthis->n;i++){
+         cthis->values[i]=values[i];
+      }
+   }
+   else {
+      CTA_WRITE_ERROR("Handle is not correct");
+   }
+   return ierr;
+}
+
+
+
+	//storage array
+
+	/**
+	 * Constructor based on java arrays as input.
+	 * @param values
+	 * @param dimensions
+	 * @param copyValues
+	 */
+////	public Array(double[] values, int[] dimensions, boolean copyValues){
+////		//check size
+////		int rank=dimensions.length;
+//	int numberOfValues=dimensions[0];
+//		for(int i=1;i<rank;i++){
+//			numberOfValues*=dimensions[i];
+//		}
+//		if(numberOfValues!=values.length){
+//			throw new RuntimeException("DoublesArray constructor: The number of values does not match the dimensions");
+//		}
+//		//copy data
+//		if(!copyValues){
+//			cthis.values=values; 
+//		}else{
+//			cthis.values = new double[values.length];
+//			System.arraycopy(values, 0, cthis.values, 0, values.length);
+//		}
+//		cthis.dimensions=new int[dimensions.length];
+//		System.arraycopy(dimensions, 0, cthis.dimensions, 0, dimensions.length);
+//	}
+//
+//	public Array(double[] values){
+//		cthis(values,new int[]{values.length},true);
+//	}
+//	
+//	public Array(int[] dimensions){
+//		int rank=dimensions.length;
+//		int numberOfValues=dimensions[0];
+//		for(int i=1;i<rank;i++){
+//			numberOfValues*=dimensions[i];
+//		}
+//		cthis.values = new double[numberOfValues];
+//		cthis.dimensions=new int[dimensions.length];
+//		System.arraycopy(dimensions, 0, cthis.dimensions, 0, dimensions.length);
+//	}
+//
+//	public Array(int dimension){
+//		cthis.values = new double[dimension];
+//		cthis.dimensions=new int[]{dimension};
+//		System.arraycopy(dimensions, 0, cthis.dimensions, 0, dimensions.length);		
+//	}
+//	/**
+//	 * Constructor based on IArray as input.
+//	 * @param sourceArray
+//	 */
+//	public Array(IArray sourceArray){
+//		cthis.values=sourceArray.getValuesAsDoubles(true); 
+//		cthis.dimensions=sourceArray.getDimensions();
+//	}
+//
+//	/**
+//	 * Constructor based on formatted String as input. Format is '{{1,2},{3,4}}' 
+//	 * @param source
+//	 */
+//	public Array(String source){
+//		int stringIndex=0;
+//		ArrayList<Double> valuesList= new ArrayList<Double>();
+//		int[] counter = new int[maxDimensions];
+//		int[] dims    = new int[maxDimensions];
+//		int curDim=-1; //current dimension 
+//		int rank=0;
+//		while(stringIndex<source.length()){
+//			if(source.charAt(stringIndex)=='{'){
+//				curDim++;
+//				if(curDim==rank) rank=curDim+1;
+//				stringIndex++;
+//			}else if(source.charAt(stringIndex)=='}'){
+//				if(counter[curDim]>dims[curDim]) dims[curDim]=counter[curDim];
+//				counter[curDim]=0;
+//				curDim--;
+//				if(curDim<-1){
+//					throw new RuntimeException("Too many closing brackets in array at position="+stringIndex
+//							+" in string="+source);
+//				}
+//				stringIndex++;
+//			}else if(source.charAt(stringIndex)==','){
+//				counter[curDim]++;
+//				stringIndex++;
+//			}else{ //try to find a number
+//				int indexEnd=source.indexOf('}', stringIndex);
+//				// if a comma comes first
+//				int indexComma=source.indexOf(',', stringIndex);
+//				if((indexComma>=0)&(indexComma<indexEnd)){
+//					indexEnd = indexComma;
+//				}
+//				String numberString=source.substring(stringIndex, indexEnd);
+//				double value;
+//				try {
+//					value = Double.parseDouble(numberString);
+//				} catch (NumberFormatException e) {
+//					throw new RuntimeException("Problems parsing array at position="+stringIndex
+//							+"while processing number "+numberString);
+//				}
+//				valuesList.add(value);
+//				stringIndex=indexEnd;
+//			}
+//		}
+//		// store values
+//		int n=valuesList.size();
+//		cthis.values = new double[n];
+//		for(int i=0;i<n;i++){
+//			cthis.values[i]=valuesList.get(i);
+//		}
+//		//store dimensions
+//		cthis.dimensions = new int[rank];
+//		for(int i=0;i<rank;i++){
+//			cthis.dimensions[i]=dims[i]+1;
+//		}
+//	}
+
+	/**
+	 * Get number of dimensions, eg 1 for a vector and 2 for a matrix
+	 * @return rank of the array
+	 */
+#undef METHOD
+#define METHOD "CTA_Array_getNumberOfDimensions"
+int CTA_Array_getNumberOfDimensions(CTA_Array h){
+   CTAI_Array *cthis=NULL;
+   int ierr=CTAI_Array_getData(h, &cthis);
+   if (ierr==CTA_OK){
+	return cthis->nDimensions;
+   }
+   else {
+      CTA_WRITE_ERROR("Handle is not correct");
+      return 0;
+   }
+}  
+
+
+/**
+ * Get the number of array dimensions 
+ * @return size of array, eg 2 for the array [[3,4,10],[3,4,10]]
+ */
+#undef METHOD
+#define METHOD "CTA_Array_getnDimensions"
+int CTA_Array_getnDimensions(CTA_Array h, int *nDimensions){
+   CTAI_Array *cthis=NULL;
+   int ierr=CTAI_Array_getData(h, &cthis);
+   if (ierr==CTA_OK){
+      *nDimensions=cthis->nDimensions;
+   }
+   else {
+      CTA_WRITE_ERROR("Handle is not correct");
+   }
+   return ierr;
+}
+
+
+/**
+ * Get the size of the array for each dimension. The length of the return value equals
+ * the number of dimensions.
+ * @return size of array, eg [3,4,10]
+ */
+#undef METHOD
+#define METHOD "CTA_Array_getDimensions"
+int CTA_Array_getDimensions(CTA_Array h, int *dimensions){
+   CTAI_Array *cthis=NULL;
+   int ierr=CTAI_Array_getData(h, &cthis);
+   if (ierr==CTA_OK){
+      int i;
+      for (i=0; i<cthis->nDimensions; i++){
+         dimensions[i]=cthis->dimensions[i];
+      }
+   }
+   else {
+      CTA_WRITE_ERROR("Handle is not correct");
+   }
+   return ierr;
+}
+
+/**
+ * Total number of elements in an array. this is equal to the product of the values
+ * returned by getDimension and is thus just for c
+ * @return
+ */
+#undef METHOD
+#define METHOD "CTA_Array_length"
+int CTA_Array_length(CTA_Array h){
+   CTAI_Array *cthis=NULL;
+   int ierr=CTAI_Array_getData(h, &cthis);
+   if (ierr==CTA_OK){
+      return cthis->n;
+   }
+   else {
+      CTA_WRITE_ERROR("Handle is not correct");
+   }
+   return ierr;
+}
+
+/**
+ * Get all values of an array. The values are guaranteed be a copy if
+ * copyValues==true or may be a reference if copyValues==false.
+ * Note that the values are never guaranteed to be a reference and can not be used
+ * to change the array. 
+ * @param copyValues
+ * @return
+ */
+#undef METHOD
+#define METHOD "CTA_Array_getValuesAsDoubles"
+int CTA_Array_getValuesAsDoubles(CTA_Array h, double *values){
+   CTAI_Array *cthis=NULL;
+   int ierr=CTAI_Array_getData(h, &cthis);
+   if (ierr==CTA_OK){
+      int i;
+      for (i=0; i<cthis->n; i++){
+         values[i]=cthis->values[i];
+      }
+   }
+   else {
+      CTA_WRITE_ERROR("Handle is not correct");
+   }
+   return ierr;
+}
+
+/**
+ * Get a part of the values from the array. 
+ * Eg let a=[[1,2,3][4,5,6]] then getValuesAsDoubles(1,2)
+ * returns [2,3]
+ * Note that negative values are allowed to denote counting from the end, is -1 denotes the last value.
+ * @param firstIndex
+ * @param lastIndex
+ * @return
+ */
+#undef METHOD
+#define METHOD "CTA_Array_getValuesAsDoubles_indexrange"
+int CTA_Array_getValuesAsDoubles_indexrange(CTA_Array h, int firstIndex, int lastIndex, double *values){
+   CTAI_Array *cthis=NULL;
+   int ierr=CTAI_Array_getData(h, &cthis);
+   if (ierr==CTA_OK){
+      int resultLength;
+	  int i;
+      int n=cthis->n;
+      if(firstIndex<0){firstIndex+=n;}
+      if(lastIndex<0){lastIndex+=n;}
+      resultLength=lastIndex-firstIndex+1;      
+      for (i=0;i<resultLength;i++){
+         values[i]=cthis->values[firstIndex+i];
+      }
+   }
+   else {
+      CTA_WRITE_ERROR("Handle is not correct");
+   }
+   return ierr;
+}
+
+/**
+ * Get a value from an array for specific indices. 
+ * Eg let a=[[1,2,3][4,5,6]] then getValueAsDouble(5)
+ * returns 6
+ * Note that negative values are allowed to denote counting from the end, is -1 denotes the last value.
+ * @param indices index specifier
+ * @return double value at the specific indices
+ */
+
+#undef METHOD
+#define METHOD "CTA_Array_getValueAsDoubles_index"
+int CTA_Array_getValueAsDoubles_index(CTA_Array h, int index, double *value) {
+   CTAI_Array *cthis=NULL;
+   int ierr=CTAI_Array_getData(h, &cthis);
+   if (ierr==CTA_OK){
+      int n=cthis->n;
+      if(index<0){index+=n;}
+      *value=cthis->values[index];
+   }
+   else {
+      CTA_WRITE_ERROR("Handle is not correct");
+   }
+   return ierr;
+}
+
+
+/**
+ * Get a value from an array for specific indices. 
+ * Eg let a=[[1,2,3][4,5,6]] then getValueAsDouble([1,2])
+ * returns 6
+ * Note that negative values are allowed to denote counting from the end, is -1 denotes the last value.
+ * @return
+ */
+#undef METHOD
+#define METHOD "CTA_Array_getValueAsDoubles_indices"
+int CTA_Array_getValueAsDouble_indices(CTA_Array h, int nIndices, int* indices, double *value){
+   CTAI_Array *cthis=NULL;
+   int ierr=CTAI_Array_getData(h, &cthis);
+   if (ierr==CTA_OK){
+      int index = CTAI_Array_valueIndex(cthis, nIndices, indices);
+      *value=cthis->values[index];
+   }
+   else {
+      CTA_WRITE_ERROR("Handle is not correct");
+   }
+   return ierr;
+}
+
+/**
+ * Set the values of this array.
+ * @param values the values as an array of doubles
+ */
+#undef METHOD
+#define METHOD "CTA_Array_setValuesAsDoubles"
+int CTA_Array_setValuesAsDoubles(CTA_Array h, double *values, int length) {
+   CTAI_Array *cthis=NULL;
+   int ierr=CTAI_Array_getData(h, &cthis);
+   if (ierr==CTA_OK){
+      if (cthis->n==length){
+         int i;
+         for (i=0;i<length;i++){
+            cthis->values[i]=values[i];
+         }
+      }
+      else {
+         char msg[120];
+         sprintf(msg,"The length of the array is %d but the user tries to set %d values\n",cthis->n,length);
+         CTA_WRITE_ERROR(msg);
+      }
+   }
+   else {
+      CTA_WRITE_ERROR("Handle is not correct");
+   }
+   return ierr;
+}
+
+/**
+ * Set a value from an array for specific indices. 
+ * Eg let a=[[1,2,3][4,5,6]] then setValueAsDouble([1,2],60.)
+ * results in a=[[1,2,3][4,5,60.]]
+ * Note that negative values are allowed to denote counting from the end, is -1 denotes the last value.
+ * @param indices index specifier
+ * @param value at the specific indices
+ */
+#undef METHOD
+#define METHOD "CTA_Array_setValueAsDouble_indices"
+int CTA_Array_setValueAsDouble_indices(CTA_Array h, int nIndices, int *indices, double value){
+   CTAI_Array *cthis=NULL;
+   int ierr=CTAI_Array_getData(h, &cthis);
+   if (ierr==CTA_OK){
+      int index = CTAI_Array_valueIndex(cthis, nIndices, indices);
+      cthis->values[index]=value;
+   }
+   else {
+      CTA_WRITE_ERROR("Handle is not correct");
+   }
+   return ierr;
+}
+
+/**
+ * Set part of the values of this array.
+ * Let a=[[1,2,3][4,5,6]] then setValuesAsDoubles(1,2,[20,30]) will result in a=[[1,20,30][4,5,6]]
+ * Note that negative values are allowed to denote counting from the end, is -1 denotes the last value.
+ * @param firstIndex specifies the start of the selection
+ * @param lastIndex specifies the end of the selection
+ * @param values that will replace the selected range of numbers
+ */
+#undef METHOD
+#define METHOD "CTA_Array_setValuesAsDoubles_indexrange"
+int CTA_Array_setValuesAsDoubles_indexrange(CTA_Array h, int firstIndex, int lastIndex, double *values){
+   CTAI_Array *cthis=NULL;
+   int ierr=CTAI_Array_getData(h, &cthis);
+   if (ierr==CTA_OK){
+      int resultLength;
+	  int i;
+	  int n=cthis->n;
+      if(firstIndex<0){firstIndex+=n;}
+      if(lastIndex<0){lastIndex+=n;}
+      resultLength=lastIndex-firstIndex+1;      
+      for (i=0;i<resultLength;i++){
+         cthis->values[firstIndex+i]=values[i];
+      }
+   }
+   else {
+      CTA_WRITE_ERROR("Handle is not correct");
+   }
+   return ierr;
+}
+
+	/**
+	 * Get a value from an array for specific indices. 
+	 * Eg let a=[[1,2,3][4,5,6]] then setValueAsDouble(5,60.)
+	 * results in a=[[1,2,3][4,5,60.]]
+	 * Note that negative values are allowed to denote counting from the end, is -1 denotes the last value.
+     * @param indices index specifier
+	 * @param value at the specific indices
+	 */
+#undef METHOD
+#define METHOD "CTA_Array_setValueAsDouble_index"
+int CTA_Array_setValueAsDouble_index(CTA_Array h, int index, double value) {
+   CTAI_Array *cthis=NULL;
+   int ierr=CTAI_Array_getData(h, &cthis);
+   if (ierr==CTA_OK){
+      int n=cthis->n;
+      if(index<0){index+=n;}
+      cthis->values[index]=value;
+   }
+   else {
+      CTA_WRITE_ERROR("Handle is not correct");
+   }
+   return ierr;
+}
+
+/**
+ * Set whole vector equal to a constant value.
+ * <p/>
+ * Note:  This method can only be used if all elements of the vector
+ * have the same data type.
+ *
+ * @param value value to set
+ */
+#undef METHOD
+#define METHOD "CTA_Array_setConstant"
+int CTA_Array_setConstant(CTA_Array h, double value) {
+   CTAI_Array *cthis=NULL;
+   int ierr=CTAI_Array_getData(h, &cthis);
+   if (ierr==CTA_OK){
+      int i;
+      for (i=0;i<cthis->n;i++){
+         cthis->values[i]=value;
+      }
+   }
+   else {
+      CTA_WRITE_ERROR("Handle is not correct");
+   }
+   return ierr;
+}
+
+/**
+ * Perform a values += alpha * axpyValues</c> operation on each value in this array.
+ * @param alpha the <c>alpha</c> in <c>state variable += alpha * vector</c>.
+ * @param axpyValues the values for the axpy-operation on all values in this array.
+ */
+#undef METHOD
+#define METHOD "CTA_Array_axpyOnValues"
+int CTA_Array_axpyOnValues(CTA_Array h, double alpha, double *axpyValues) {
+   CTAI_Array *cthis=NULL;
+   int ierr=CTAI_Array_getData(h, &cthis);
+   if (ierr==CTA_OK){
+      int i;
+      for (i=0;i<cthis->n;i++){
+         cthis->values[i] += alpha*axpyValues[i];
+      }
+   }
+   else {
+      CTA_WRITE_ERROR("Handle is not correct");
+   }
+   return ierr;
+}
+
+/**
+ * Multiply each value in this array with the corresponding multiplication factor.
+ * @param multiplicationFactors the multiplication factors for all array values.
+ */
+#undef METHOD
+#define METHOD "CTA_Array_multiplyValues"
+int CTA_Array_multiplyValues(CTA_Array h, double* multiplicationFactors) {
+   CTAI_Array *cthis=NULL;
+   int ierr=CTAI_Array_getData(h, &cthis);
+   if (ierr==CTA_OK){
+      int i;
+      for (i=0;i<cthis->n;i++){
+         cthis->values[i] *= multiplicationFactors[i];
+      }
+   }
+   else {
+      CTA_WRITE_ERROR("Handle is not correct");
+   }
+   return ierr;
+}
+
+/**
+ * Change the dimensions of an array. The new array should have the same length
+ * @param dimensions
+ */
+#undef METHOD
+#define METHOD "CTA_Array_reshape"
+int CTA_Array_reshape(CTA_Array h, int nDimensions, int *dimensions){
+   CTAI_Array *cthis=NULL;
+   int ierr=CTAI_Array_getData(h, &cthis);
+   if (ierr==CTA_OK){
+      int n=dimensions[0];
+      int i;
+      for(i=1;i<nDimensions;i++){
+         n*=dimensions[i];
+      }
+      if(n!=cthis->n){
+         char msg[120];
+         sprintf(msg,"Can not reshape to a different length. Length %d to %d.",cthis->n, n);
+         CTA_WRITE_ERROR(msg);
+         return CTA_DIMENSION_ERROR;
+      }
+      else {
+         int iDim;
+		 cthis->nDimensions=nDimensions;
+         for (iDim=0; iDim<nDimensions; iDim++){
+            cthis->dimensions[i]=dimensions[i];
+         }
+      }
+   }
+   else {
+      CTA_WRITE_ERROR("Handle is not correct");
+   }
+   return ierr;
+}
+
+
+///**
+// * Is it allowed to write beyond the current end of the array. Some implementations, especially ones writing to disk
+// * can increase the size of the first dimension of the array. 
+// * 
+// * Multiple growing dimensions requires moving the existing data if it implemented with a one-dimensional storage, 
+// * eg as double[]. This is NOT SUPPORTED.  
+// *
+// * @return 
+// */
+//// boolean allowsGrowingFirstDimension(){
+////		return false;
+////}
+//
+
+
+
+
+
+/**
+ * Get part of the array by selection of a subset in one dimension.
+ * Eg. a=[[1,2,3],[4,5,6]] a.getSlice(0,0) returns [1,2,3] 
+ * Note that the number of dimensions IS reduced by one.
+ * @param dimension
+ * @param index
+ * @return
+ */
+#undef METHOD
+#define METHOD "CTA_Array_getSlice"
+int CTA_Array_getSlice(CTA_Array h, int dimension, int index, CTA_Array *h_out){
+
+   CTAI_Array *cthis=NULL;
+   CTAI_Array *cthis_out=NULL;
+   int ierr;
+
+   *h_out=CTA_NULL;
+   ierr=CTAI_Array_getData(h, &cthis);
+
+   if (ierr==CTA_OK){
+      // indexing eg A(:,: ,index,:,:  )
+      //             A(left,at   ,right)
+      int nDimsLeft=dimension;
+      int dimsLeft[MaxDimensions];
+      int dims[MaxDimensions];
+      int rankRight;
+      int dimsRight[MaxDimensions];
+	  int nDims;
+	  int i;
+      for (i=0; i<dimension; i++){
+         dimsLeft[i]=cthis->dimensions[i];
+         dims[i]=dimsLeft[i];
+      }
+      rankRight=cthis->nDimensions-dimension-1;      
+      for (i=0;i<rankRight;i++){
+         dimsRight[i]=cthis->dimensions[dimension+1+i];
+         dims[i+dimension]=dimsRight[i];
+      }
+
+      nDims=nDimsLeft+rankRight;
+      ierr=CTAI_Array_CreateDim(nDims,  dims, h_out);
+      if (ierr==CTA_OK){
+         ierr=CTAI_Array_getData(*h_out, &cthis_out);
+      }
+      if (ierr==CTA_OK){
+
+         // prepare for copy
+         int blockSize = 1;
+		 int offset;
+         int i;
+		 int srcPos; 
+         int destPos;
+		 int stride;
+		 int count;
+
+         for(i=0;i<rankRight;i++) blockSize*=dimsRight[i];
+         offset = index*blockSize;
+         stride = cthis->dimensions[dimension]*blockSize;
+         count  = 1;
+         for(i=0;i<nDimsLeft;i++) count*=dimsLeft[i];
+         // copy values
+         srcPos = offset; 
+         destPos = 0;
+         for(i=0;i<count;i++){
+            int j;
+            for (j=0;j<blockSize;j++){
+               cthis_out->values[destPos+j]=cthis->values[srcPos+j];
+            }
+            srcPos+=stride;
+            destPos+=blockSize;
+         }
+      }
+      else {
+         CTA_WRITE_ERROR("Cannot allocate return array");
+      }
+   }
+   else {
+      CTA_WRITE_ERROR("Handle is not correct");
+   }
+   return ierr;
+}
+
+/**
+ * Get part of the array by selection of a subset in one dimension.
+ * Eg. a=[[1,2,3],[4,5,6],[7,8,9]] a.getSlice(0,0,1) returns [[1,2],[3,4]] 
+ * Note that the number of dimensions is NOT reduced by one.
+ * @param dimension
+ * @param minIndex
+ * @param maxIndex
+ * @return
+ */
+#undef METHOD
+#define METHOD "CTA_Array_getSlice_range"
+int CTA_Array_getSlice_range(CTA_Array h, int dimension, int minIndex, int maxIndex, CTA_Array *h_out){
+	// indexing eg A(:,: ,index,:,:  )
+	//             A(left,at   ,right)
+   CTAI_Array *cthis=NULL;
+   CTAI_Array *cthis_out=NULL;
+   int ierr=CTAI_Array_getData(h, &cthis);
+   if (ierr==CTA_OK){
+	int rankRight;
+      int dimsLeft[MaxDimensions];
+      int dimsRight[MaxDimensions];
+      int dims[MaxDimensions];
+	  int nDims;
+	  int i;
+      int nDimsLeft=dimension;
+      for (i=0;i<dimension;i++){
+         dimsLeft[i]=cthis->dimensions[i];
+      }
+      rankRight=cthis->nDimensions-dimension-1;      
+      for (i=0;i<rankRight;i++){
+         dimsRight[i]=cthis->dimensions[dimension+1+i];
+      }
+      for (i=0; i<cthis->nDimensions;i++){
+         dims[i]=cthis->dimensions[i];
+      }
+      dims[dimension]=maxIndex-minIndex+1;
+      nDims=cthis->nDimensions;
+
+      ierr=CTAI_Array_CreateDim(nDims,  dims, h_out);
+      if (ierr==CTA_OK){
+         ierr=CTAI_Array_getData(*h_out, &cthis_out);
+      }
+      if (ierr==CTA_OK){
+         // prepare for copy
+		 int minOffset;
+		 int stride;
+		 int count;
+		 int srcPos; 
+         int destPos;
+         int j,k;
+
+         int blockSize = 1;
+         for(i=0;i<rankRight;i++) blockSize*=dimsRight[i];
+         minOffset = minIndex*blockSize;
+         stride = cthis->dimensions[dimension]*blockSize;
+         count  = 1;
+         for(i=0;i<nDimsLeft;i++) count*=dimsLeft[i];
+         // copy values
+         srcPos = minOffset; 
+         destPos = 0;
+         for(i=0;i<count;i++){
+	    for(j=0;j<dims[dimension];j++){
+               for (k=0; k<blockSize; k++){
+                  cthis_out->values[destPos+k]=cthis->values[srcPos+k];
+               }           	
+               srcPos+=blockSize;
+               destPos+=blockSize;
+            }
+            srcPos+=stride;
+            destPos+=blockSize;
+         }
+      }
+      else {
+         CTA_WRITE_ERROR("Cannot allocate return array");
+      }
+   }
+   else {
+      CTA_WRITE_ERROR("Handle is not correct");
+   }
+   return ierr;
+}
+
+/**
+ * Get part of the array by selection of a subset in one dimension.
+ * Eg. a=[[1,2,3],[4,5,6],[7,8,9]] a.getSlice(0,0,1) returns [1,2,3,4] 
+ * @param dimension
+ * @param minIndex
+ * @param maxIndex
+ * @return
+ */
+#undef METHOD
+#define METHOD "CTA_Array_getSliceAsDoubles_range"
+int CTA_Array_getSliceAsDoubles_range(CTA_Array h, int dimension, int minIndex, int maxIndex, double *values){
+	// indexing eg A(:,: ,index,:,:  )
+	//             A(left,at   ,right)
+   CTAI_Array *cthis=NULL;
+   int ierr=CTAI_Array_getData(h, &cthis);
+   if (ierr==CTA_OK){
+      int dimsLeft[MaxDimensions];
+	  int rankRight;
+      int dimsRight[MaxDimensions];
+      int i;
+	  int blockSize;
+	  int count;
+	  int minOffset;
+      int stride;
+	  int srcPos; 
+      int destPos;
+      int j,k;
+      int nDimsLeft=dimension;
+	  int dims[MaxDimensions];
+      for (i=0;i<dimension;i++){
+         dimsLeft[i]=cthis->dimensions[i];
+      }
+      rankRight=cthis->nDimensions-dimension-1;
+      for (i=0;i<rankRight;i++){
+         dimsRight[i]=cthis->dimensions[dimension+1+i];
+      }
+      for (i=0; i<cthis->nDimensions;i++){
+         dims[i]=cthis->dimensions[i];
+      }
+      dims[dimension]=maxIndex-minIndex+1;
+
+      // prepare for copy
+      blockSize = 1;
+      for(i=0;i<rankRight;i++) blockSize*=dimsRight[i];
+      minOffset = minIndex*blockSize;
+      stride = cthis->dimensions[dimension]*blockSize;
+      count  = 1;
+      for(i=0;i<nDimsLeft;i++) count*=dimsLeft[i];
+      // copy values
+      srcPos = minOffset; 
+      destPos = 0;
+      for(i=0;i<count;i++){
+         for(j=0;j<dims[dimension];j++){
+            for (k=0; k<blockSize; k++){
+               values[destPos+k]=cthis->values[srcPos+k];
+            }           	
+            srcPos+=blockSize;
+            destPos+=blockSize;
+         }
+         srcPos+=stride;
+         destPos+=blockSize;
+      }
+   }
+   else {
+      CTA_WRITE_ERROR("Handle is not correct");
+   }
+   return ierr;
+}
+
+
+
+/**
+ * Set the values of a part of an array. 
+ * Eg. a=[[1,2,3],[4,5,6],[7,8,9]] and a.setSlice([11,12,13],1,1) 
+ * sets the second column a=[[1,11,3],[4,12,6],[7,13,9]]
+ * Note that the dimension of the slice is one smaller than for the array.
+ * @param slice
+ * @param dimension
+ * @param index
+ */
+#undef METHOD
+#define METHOD "CTA_Array_setSliceAsDoubles"
+int CTA_Array_setSliceAsDoubles(CTA_Array h, double *slice, int dimension, int index){
+   CTAI_Array *cthis=NULL;
+   int ierr=CTAI_Array_getData(h, &cthis);
+   if (ierr==CTA_OK){
+      // indexing eg A(:,: ,index,:,:  )
+      //             A(left,at   ,right)
+      int dimsLeft[MaxDimensions];
+	  int rankRight;
+      int dimsRight[MaxDimensions];
+      int nDimsLeft=dimension;
+      int i;
+	  int blockSize;
+      int offset;
+      int stride;
+      int count;
+	  int srcPos; 
+      int destPos;
+
+      for (i=0;i<dimension;i++){dimsLeft[i]=cthis->dimensions[i];}
+
+      rankRight=cthis->nDimensions-dimension-1;
+      
+      for (i=0;i<rankRight;i++){
+         dimsRight[i]=cthis->dimensions[dimension+1+i];
+      }
+
+      // prepare for copy
+      blockSize = 1;
+      for(i=0;i<rankRight;i++) blockSize*=dimsRight[i];
+      offset = index*blockSize;
+      stride = cthis->dimensions[dimension]*blockSize;
+      count  = 1;
+      for(i=0;i<nDimsLeft;i++) count*=dimsLeft[i];
+      // copy values
+      srcPos = 0; 
+      destPos = offset;
+      for(i=0;i<count;i++){
+         int j;
+         for (j=0; j<blockSize; j++){
+            cthis->values[destPos+j]=slice[srcPos+j];
+         }
+         destPos+=stride;
+         srcPos+=blockSize;
+      }
+   }
+   else {
+      CTA_WRITE_ERROR("Handle is not correct");
+   }
+   return ierr;
+}
+
+
+/**
+ * Set the values of a part of an array. 
+ * Eg. a=[[1,2,3],[4,5,6],[7,8,9]] and a.setSlice([11,12,13],1,1) 
+ * sets the second column a=[[1,11,3],[4,12,6],[7,13,9]]
+ * Note that the dimension of the slice is one smaller than for the array.
+ * @param slice
+ * @param dimension
+ * @param index
+ */
+#undef METHOD
+#define METHOD "CTA_Array_setSliceAsArray"
+int CTA_Array_setSliceAsArray(CTA_Array h, CTA_Array slice_h, int dimension, int index){
+   CTAI_Array *slice;
+   int ierr=CTAI_Array_getData(slice_h, &slice);
+   if (ierr==CTA_OK){
+      ierr=CTA_Array_setSliceAsDoubles(h, slice->values, dimension, index);
+      if (ierr!=CTA_OK){
+         CTA_WRITE_ERROR("Error setting the values of the slice");
+      }
+   }
+   else {
+      CTA_WRITE_ERROR("Handle slice_h is not correct");
+   }
+   return CTA_OK;
+}
+
+
+
+/**
+ * Set the values of a part of an array. 
+ * Eg. a=[[1,2,3],[4,5,6],[7,8,9]] and a.setSlice([[11,12,13],[14,15,16]],1,1,2) 
+ * sets the second and third columns a=[[1,11,14],[4,12,15],[7,13,16]]
+ * Note that the dimension of the slice is the same as for the array.
+ * @param slice
+ * @param dimension
+ * @param minIndex
+ * @param maxIndex
+ */
+#undef METHOD
+#define METHOD "CTA_Array_setSliceAsArray_range"
+int CTA_Array_setSliceAsArray_range(CTA_Array h, CTA_Array slice_h, int dimension, int minIndex, int maxIndex){
+   CTAI_Array *slice;
+   int ierr=CTAI_Array_getData(slice_h, &slice);
+   if (ierr==CTA_OK){
+      ierr=CTA_Array_setSliceAsDoubles_range(h, slice->values, dimension, minIndex, maxIndex);
+      if (ierr!=CTA_OK){
+         CTA_WRITE_ERROR("Error setting the values of the slice");
+      }
+   }
+   else {
+      CTA_WRITE_ERROR("Handle slice_h is not correct");
+   }
+   return ierr;
+
+}
+
+
+/**
+ * Set the values of a part of an array. 
+ * Eg. a=[[1,2,3],[4,5,6],[7,8,9]] and a.setSlice([11,12,13,14,15,16],1,1,2) 
+ * sets the second and third columns a=[[1,11,14],[4,12,15],[7,13,16]]
+ * Note that the dimension of the slice is the same as for the array.
+ * @param slice
+ * @param dimension
+ * @param minIndex
+ * @param maxIndex
+ */
+#undef METHOD
+#define METHOD "CTA_Array_setSliceAsDoubles_range"
+int CTA_Array_setSliceAsDoubles_range(CTA_Array h, double *slice, int dimension, int minIndex, int maxIndex){
+   CTAI_Array *cthis=NULL;
+   int dimsRight[MaxDimensions];
+
+
+   int ierr=CTAI_Array_getData(h, &cthis);
+   if (ierr==CTA_OK){
+
+      // indexing eg A(:,: ,index,:,:  )
+      //             A(left,at   ,right)
+      int dimsLeft[MaxDimensions];
+	  int dims[MaxDimensions];
+      int nDimsLeft=dimension;
+      int i;
+	  int blockSize;
+	  int minOffset;
+      int stride;
+      int count;
+	  int destPos; 
+      int srcPos;
+      int rankRight=cthis->nDimensions-1;
+
+      for (i=0; i<dimension; i++){dimsLeft[i]=cthis->dimensions[i];}
+      for (i=0; i<rankRight; i++){dimsRight[i]=cthis->dimensions[dimension+1+i];}
+      for (i=0; i<cthis->nDimensions; i++){dims[i]=cthis->dimensions[i];}
+      dims[dimension]=maxIndex-minIndex+1;
+
+      // prepare for copy
+      blockSize = 1;
+      for(i=0;i<rankRight;i++) blockSize*=dimsRight[i];
+      minOffset = minIndex*blockSize;
+      stride = cthis->dimensions[dimension]*blockSize;
+      count  = 1;
+      for(i=0;i<nDimsLeft;i++) count*=dimsLeft[i];
+      // copy values
+      destPos = minOffset; 
+      srcPos = 0;
+      for(i=0;i<count;i++){
+         int j;
+         for(j=0;j<dims[dimension];j++){
+            int k;
+            for (k=0; k<blockSize; k++){
+               cthis->values[destPos+k]=slice[srcPos+k];
+            }
+            destPos+=blockSize;
+            srcPos+=blockSize;
+         }
+         destPos+=stride;
+         srcPos+=blockSize;
+      }
+   }
+   else {
+      CTA_WRITE_ERROR("Handle is not correct");
+   }
+   return ierr;
+}	
+
+
+/**
+ * Convert indices in multiple dimensions to position in the one-dimensional array as
+ * returned eg by getValuesAsDoubles
+ * Eg. a=[[1,2,3],[4,5,6],[7,8,9]] then valueIndex([1,0]) returns 3 which points to the value 4 here.  
+ * @param indices
+ * @return position
+ */
+#undef METHOD
+#define METHOD "CTA_Array_reshape"
+int CTA_Array_valueIndex(CTA_Array h, int nIndices, int *indices, int *index){
+   CTAI_Array *cthis=NULL;
+   int ierr=CTAI_Array_getData(h, &cthis);
+   if (ierr==CTA_OK){
+      *index = CTAI_Array_valueIndex(cthis, nIndices, indices);
+   }
+   else {
+      *index=0;
+      CTA_WRITE_ERROR("Handle is not correct");
+   }
+   return ierr;
+}
+
+
+
+CTAEXPORT void CTA_ARRAY_FREE_F77(CTA_Array *h, int *ierr){
+   *ierr = CTA_Array_Free(h);
+}
+CTAEXPORT void CTA_ARRAY_CREATEASDOUBLES_F77(double *values, int *nDimensions, int *dimensions, CTA_Array *h, int *ierr){
+   *ierr = CTA_Array_CreateAsDoubles(values, *nDimensions, dimensions, h);
+}
+
+//void CTA_Array_getNumberOfDimensions(CTA_Array h);
+
+CTAEXPORT void  CTA_ARRAY_GETNDIMENSIONS_F77(CTA_Array *h, int *nDimensions, int *ierr){
+   *ierr = CTA_Array_getnDimensions(*h, nDimensions);
+}
+
+CTAEXPORT void CTA_ARRAY_GETDIMENSIONS_F77(CTA_Array *h, int *dimensions, int *ierr){
+   *ierr = CTA_Array_getDimensions(*h, dimensions);
+}
+
+CTAEXPORT void CTA_ARRAY_LENGTH_F77(CTA_Array *h, int *length, int *ierr){
+   *length = CTA_Array_length(*h);
+   *ierr = CTA_OK;
+}
+
+CTAEXPORT void CTA_ARRAY_GETVALUESASDOUBLES_F77(CTA_Array *h, double *values, int *ierr){
+   *ierr = CTA_Array_getValuesAsDoubles(*h, values);
+}
+
+CTAEXPORT void CTA_ARRAY_GETVALUESASDOUBLES_INDEXRANGE_F77(CTA_Array *h, int *firstIndex, int *lastIndex, double *values, int *ierr){
+   *ierr = CTA_Array_getValuesAsDoubles_indexrange(*h, *firstIndex, *lastIndex, values);
+}
+
+CTAEXPORT void CTA_ARRAY_GETVALUEASDOUBLES_INDEX_F77(CTA_Array *h, int *index, double *value, int *ierr){
+   *ierr = CTA_Array_getValueAsDoubles_index(*h, *index, value);
+}
+
+CTAEXPORT void CTA_ARRAY_GETVALUEASDOUBLE_INDICES_F77(CTA_Array *h, int *nIndices, int* indices, double *value, int *ierr){
+   *ierr = CTA_Array_getValueAsDouble_indices(*h, *nIndices, indices, value);
+}
+
+CTAEXPORT void CTA_ARRAY_SETVALUESASDOUBLES_F77(CTA_Array *h, double *values, int *length, int *ierr){
+   *ierr = CTA_Array_setValuesAsDoubles(*h, values, *length);
+}
+
+CTAEXPORT void CTA_ARRAY_SETVALUEASDOUBLE_INDICES_F77(CTA_Array *h, int *nIndices, int *indices, double *value, int *ierr){
+   *ierr = CTA_Array_setValueAsDouble_indices(*h, *nIndices, indices, *value);
+}
+
+CTAEXPORT void CTA_ARRAY_SETVALUESASDOUBLES_INDEXRANGE_F77(CTA_Array *h, int *firstIndex, int *lastIndex, double *values, int *ierr){
+   *ierr = CTA_Array_setValuesAsDoubles_indexrange(*h, *firstIndex, *lastIndex, values);
+}
+
+CTAEXPORT void CTA_ARRAY_SETVALUEASDOUBLE_INDEX_F77(CTA_Array *h, int *index, double *value, int *ierr){
+   *ierr = CTA_Array_setValueAsDouble_index(*h, *index, *value);
+}
+
+CTAEXPORT void CTA_ARRAY_SETCONSTANT_F77(CTA_Array *h, double *value, int *ierr){
+   *ierr = CTA_Array_setConstant(*h, *value);
+}
+
+CTAEXPORT void CTA_ARRAY_AXPYONVALUES_F77(CTA_Array *h, double *alpha, double *axpyValues, int *ierr){
+   *ierr = CTA_Array_axpyOnValues(*h, *alpha, axpyValues);
+}
+
+CTAEXPORT void CTA_ARRAY_MULTIPLYVALUES_F77(CTA_Array *h, double* multiplicationFactors, int *ierr){
+   *ierr = CTA_Array_multiplyValues(*h, multiplicationFactors);
+}
+
+CTAEXPORT void CTA_ARRAY_RESHAPE_F77(CTA_Array *h, int *nDimensions, int *dimensions, int *ierr){
+   *ierr = CTA_Array_reshape(*h, *nDimensions, dimensions);
+}
+
+CTAEXPORT void CTA_ARRAY_GETSLICE_F77(CTA_Array *h, int *dimension, int *index, CTA_Array *h_out, int *ierr){
+   *ierr = CTA_Array_getSlice(*h, *dimension, *index, h_out);
+}
+
+CTAEXPORT void CTA_ARRAY_GETSLICE_RANGE_F77(CTA_Array *h, int *dimension, int *minIndex, int *maxIndex, CTA_Array *h_out, int *ierr){
+   *ierr = CTA_Array_getSlice_range(*h, *dimension, *minIndex, *maxIndex, h_out);
+}
+
+CTAEXPORT void CTA_ARRAY_GETSLICEASDOUBLES_RANGE_F77(CTA_Array *h, int *dimension, int *minIndex, int *maxIndex, double *values, int *ierr){
+   *ierr = CTA_Array_getSliceAsDoubles_range(*h, *dimension, *minIndex, *maxIndex, values);
+}
+
+CTAEXPORT void CTA_ARRAY_SETSLICEASDOUBLES_F77(CTA_Array *h, double *slice, int *dimension, int *index, int *ierr){
+   *ierr = CTA_Array_setSliceAsDoubles(*h, slice, *dimension, *index);
+}
+
+CTAEXPORT void CTA_ARRAY_SETSLICEASARRAY_F77(CTA_Array *h, CTA_Array *slice_h, int *dimension, int *index, int *ierr){
+   *ierr = CTA_Array_setSliceAsArray(*h, *slice_h, *dimension, *index);
+}
+
+CTAEXPORT void CTA_ARRAY_SETSLICEASARRAY_RANGE_F77(CTA_Array *h, CTA_Array *slice_h, int *dimension, int *minIndex, int *maxIndex, int *ierr){
+   *ierr = CTA_Array_setSliceAsArray_range(*h, *slice_h, *dimension, *minIndex, *maxIndex);
+}
+
+CTAEXPORT void CTA_ARRAY_SETSLICEASDOUBLES_RANGE_F77(CTA_Array *h, double *slice, int *dimension, int *minIndex, int *maxIndex, int *ierr){
+   *ierr = CTA_Array_setSliceAsDoubles_range(*h, slice, *dimension, *minIndex, *maxIndex);
+}
+
+CTAEXPORT void CTA_ARRAY_VALUEINDEX_F77(CTA_Array *h, int *nIndices, int *indices, int *index, int *ierr){
+   *ierr = CTA_Array_valueIndex(*h, *nIndices, indices, index);
+}
+
+
diff --git a/costa/native/cta/src/cta_datatypes.c b/costa/native/cta/src/cta_datatypes.c
new file mode 100644
index 000000000..c808dd33b
--- /dev/null
+++ b/costa/native/cta/src/cta_datatypes.c
@@ -0,0 +1,224 @@
+/*
+$URL: https://repos.deltares.nl/repos/openda/public/trunk/core/native/src/cta/cta_datatypes.c $
+$Revision: 3406 $, $Date: 2012-08-16 15:25:53 +0200 (Thu, 16 Aug 2012) $
+
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+#include <string.h>
+#include "f_cta_utils.h"
+#include "cta_errors.h"
+#include "cta_datatypes.h"
+#include "cta_message.h"
+
+#define CTA_SIZEOF_F77  F77_CALL(cta_sizeof,CTA_SIZEOF)
+#define CLASSNAME "CTA_Datatypes"
+
+#undef METHOD
+#define METHOD "SizeOf"
+int CTA_SizeOf(CTA_Datatype datatype, int *size){
+
+   switch(datatype){
+   case CTA_HANDLE:
+   case CTA_INTEGER:
+   case CTA_STRING:
+      *size=sizeof(int);
+      break;
+   case CTA_REAL:
+      *size=sizeof(float);
+      break;
+   case CTA_DOUBLE:
+      *size=sizeof(double);
+      break;
+   default:
+      CTA_WRITE_ERROR("This datatype is unknown."); 
+      return CTA_ILLEGAL_DATATYPE;
+      break;
+   }
+   return CTA_OK;
+}
+
+const char *CTAI_Type2String(CTA_Datatype datatype) {
+   switch(datatype){
+   case CTA_HANDLE:
+      return "CTA_HANDLE";
+   case CTA_INTERFACE:
+      return "CTA_INTERFACE";
+   case CTA_FUNCTION:
+      return "CTA_FUNCTION";
+   case CTA_VECTOR:
+      return "treeVectorLeaf";
+   case CTA_VECTORCLASS:
+      return "CTA_VECTORCLASS";
+   case CTA_TREEVECTOR:
+      return "treeVector";
+   case CTA_SUBTREEVECTOR:
+      return "subTreeVector";
+   case CTA_MATRIXCLASS:
+      return "CTA_MATRIXCLASS";
+   case CTA_MATRIX:
+      return "CTA_MATRIX";
+   case CTA_COVMATCLASS:
+      return "CTA_COVMATCLASS";
+   case CTA_COVMAT:
+      return "CTA_COVMAT";
+   case CTA_INTPOL:
+      return "CTA_INTPOL";
+   case CTA_OBS:
+      return "CTA_OBS";
+   case CTA_MODELCLASS:
+      return "CTA_MODELCLASS";
+   case CTA_MODEL:
+      return "CTA_MODEL";
+   case CTA_TIME:
+      return "CTA_TIME";
+   case CTA_SOBS:
+      return "CTA_SOBS";
+   case CTA_SOBSCLASS: /* and CTA_OBSDESCRCLASS */
+      return "CTA_SOBSCLASS";
+   case CTA_OBSDESCR:
+      return "CTA_OBSDESCR";
+   case CTA_METHODCLASS:
+      return "CTA_METHODCLASS";
+   case CTA_METHOD:
+      return "CTA_METHOD";
+   case CTA_TREE:
+      return "CTA_TREE";
+   case CTA_VOID:
+      return "CTA_VOID";
+   case CTA_INTEGER:
+      return "CTA_INTEGER";
+   case CTA_REAL:
+      return "CTA_REAL";
+   case CTA_DOUBLE:
+      return "CTA_DOUBLE";
+   case CTA_FSTRING:
+      return "CTA_FSTRING";
+   case CTA_CSTRING:
+      return "CTA_CSTRING";
+   case CTA_STRING:
+      return "CTA_STRING";
+   case CTA_FILE:
+      return "CTA_FILE";
+   case CTA_1DINTEGER :
+      return "CTA_1DINTEGER";
+   case CTA_1DREAL:
+      return "CTA_1DREAL";
+   case CTA_1DDOUBLE:
+      return "CTA_1DDOUBLE";
+   case CTA_1DFSTRING:
+      return "CTA_1DFSTRING";
+   case CTA_1DCSTRING:
+      return "CTA_1DCSTRING";
+   case CTA_ARRAY:
+      return "CTA_ARRAY";
+   }
+   return "CTA_ILLEGAL_DATATYPE";
+}
+
+CTA_Datatype CTAI_String2Type(const char *dt) {
+   if (0 == strcmp("CTA_HANDLE", dt)) {
+      return CTA_HANDLE;
+   } else if (0 == strcmp("CTA_INTERFACE", dt)) {
+      return CTA_INTERFACE;
+   } else if (0 == strcmp("CTA_FUNCTION", dt)) {
+      return CTA_FUNCTION;
+   } else if (0 == strcmp("CTA_VECTOR", dt)) {
+      return CTA_VECTOR;
+   } else if (0 == strcmp("treeVectorLeaf", dt)) {
+      return CTA_VECTOR;
+   } else if (0 == strcmp("CTA_VECTORCLASS", dt)) {
+      return CTA_VECTORCLASS;
+   } else if (0 == strcmp("CTA_TREEVECTOR", dt)) {
+      return CTA_TREEVECTOR;
+   } else if (0 == strcmp("treeVector", dt)) {
+      return CTA_TREEVECTOR;
+   } else if (0 == strcmp("subTreeVector", dt)) {
+      return CTA_TREEVECTOR;
+   } else if (0 == strcmp("CTA_MATRIXCLASS", dt)) {
+      return CTA_MATRIXCLASS;
+   } else if (0 == strcmp("CTA_MATRIX", dt)) {
+      return CTA_MATRIX;
+   } else if (0 == strcmp("CTA_COVMATCLASS", dt)) {
+      return CTA_COVMATCLASS;
+   } else if (0 == strcmp("CTA_COVMAT", dt)) {
+      return CTA_COVMAT;
+   } else if (0 == strcmp("CTA_INTPOL", dt)) {
+      return CTA_INTPOL;
+   } else if (0 == strcmp("CTA_OBS", dt)) {
+      return CTA_OBS;
+   } else if (0 == strcmp("CTA_MODELCLASS", dt)) {
+      return CTA_MODELCLASS;
+   } else if (0 == strcmp("CTA_MODEL", dt)) {
+      return CTA_MODEL;
+   } else if (0 == strcmp("CTA_TIME", dt)) {
+      return CTA_TIME;
+   } else if (0 == strcmp("CTA_SOBS", dt)) {
+      return CTA_SOBS;
+   } else if (0 == strcmp("CTA_SOBSCLASS", dt)) {
+      return CTA_SOBSCLASS;
+   } else if (0 == strcmp("CTA_OBSDESCRCLASS", dt)) {
+      return CTA_OBSDESCRCLASS;
+   } else if (0 == strcmp("CTA_OBSDESCR", dt)) {
+      return CTA_OBSDESCR;
+   } else if (0 == strcmp("CTA_METHODCLASS", dt)) {
+      return CTA_METHODCLASS;
+   } else if (0 == strcmp("CTA_METHOD", dt)) {
+      return CTA_METHOD;
+   } else if (0 == strcmp("CTA_TREE", dt)) {
+      return CTA_TREE;
+   } else if (0 == strcmp("CTA_VOID", dt)) {
+      return CTA_VOID;
+   } else if (0 == strcmp("CTA_INTEGER", dt)) {
+      return CTA_INTEGER;
+   } else if (0 == strcmp("CTA_REAL", dt)) {
+      return CTA_REAL;
+   } else if (0 == strcmp("CTA_DOUBLE", dt)) {
+      return CTA_DOUBLE;
+   } else if (0 == strcmp("CTA_FSTRING", dt)) {
+      return CTA_FSTRING;
+   } else if (0 == strcmp("CTA_CSTRING", dt)) {
+      return CTA_CSTRING;
+   } else if (0 == strcmp("CTA_STRING", dt)) {
+      return CTA_STRING;
+   } else if (0 == strcmp("CTA_FILE", dt)) {
+      return CTA_FILE;
+   } else if (0 == strcmp("CTA_1DINTEGER ", dt)) {
+      return CTA_1DINTEGER;
+   } else if (0 == strcmp("CTA_1DREAL", dt)) {
+      return CTA_1DREAL;
+   } else if (0 == strcmp("CTA_1DDOUBLE", dt)) {
+      return CTA_1DDOUBLE;
+   } else if (0 == strcmp("CTA_1DFSTRING", dt)) {
+      return CTA_1DFSTRING;
+   } else if (0 == strcmp("CTA_1DCSTRING", dt)) {
+      return CTA_1DCSTRING;
+   } else if (0 == strcmp("CTA_ARRAY", dt)) {
+      return CTA_ARRAY;
+   }
+   return CTA_NULL;
+}
+
+/* Interfacing with Fortran */
+
+CTAEXPORT void CTA_SIZEOF_F77 (int *datatype, int *size, int *ierr){
+   *ierr=CTA_SizeOf((CTA_Datatype) *datatype, size);
+}
+
+
+
diff --git a/costa/native/cta/src/cta_datetime.c b/costa/native/cta/src/cta_datetime.c
new file mode 100644
index 000000000..bf0d2a206
--- /dev/null
+++ b/costa/native/cta/src/cta_datetime.c
@@ -0,0 +1,130 @@
+/*
+$URL: https://repos.deltares.nl/repos/openda/public/trunk/core/native/src/cta/cta_datetime.c $
+$Revision: 3407 $, $Date: 2012-08-17 13:50:50 +0200 (Fri, 17 Aug 2012) $
+
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2007  Johan Ansink
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+#include <math.h>
+
+#include "f_cta_utils.h"
+#include "ctai.h"
+#include "cta_datetime.h"
+#include "cta_errors.h"
+
+#define CTA_DATETIME_MODIFIEDJULIANTOGREGORIAN_F77   F77_CALL(cta_datetime_modifiedjuliantogregorian,CTA_DATETIME_MODIFIEDJULIANTOGREGORIAN)
+#define CTA_DATETIME_GREGORIANTOMODIFIEDJULIAN_F77   F77_CALL(cta_datetime_gregoriantomodifiedjulian,CTA_DATETIME_GREGORIANTOMODIFIEDJULIAN)
+
+int CTA_DateTime_GregorianToJulian(int year, int month, int day, int hour, int minute, int second, double *jd){
+
+   /*Julian day number from Gregorian date */
+   double a, y, m;
+
+   a = floor((14 - month)/12);
+   y = year + 4800 - a;
+   m = month + 12*a - 3;
+
+   /* For a date in the Gregorian calendar */
+
+   *jd = day + floor((153.*m + 2.)/5.) + y*365. + floor(y/4.) - floor(y/100.) + floor(y/400.)
+         - 32045. + ( second + 60.*minute + 3600.*(hour - 12.) )/86400.0;
+
+	return CTA_OK;
+}
+
+int CTA_DateTime_GregorianToModifiedJulian(int year, int month, int day, int hour, int minute, int second, double *mjd){
+   double jd;
+
+   CTA_DateTime_GregorianToJulian(year,month,day,hour,minute,second,&jd);
+   *mjd = jd- MJDREF;
+   return CTA_OK;
+}
+
+int CTA_DateTime_DaysToHMS (double days, int *h, int *m, int *s){
+   int hour, minute, second;
+
+   second = (int)(86400 * days);
+   /* get number of hours */
+   hour   = second/3600;
+   /* remove the hours */
+   second = second - 3600*hour;
+   /* get number of minutes */
+   minute = second/60;
+   /* remove the minutes */
+   second = second - 60*minute;
+
+   *h = hour;
+   *m = minute;
+   *s = second;
+
+   return CTA_OK;
+}
+
+int CTA_DateTime_ModifiedJulianToJulian(double mjd, double *jd){
+   *jd = mjd + MJDREF;
+   return CTA_OK;
+}
+
+int CTA_DateTime_JulianToGregorian(double jd, int *year, int *month, int *day, int *hour, int *minute, int *second){
+
+   double ijd;          // integer part
+   double fjd;          // fraction part
+   double a,b,c,d,e,m;
+
+   ijd = floor(jd + 0.5);
+
+   fjd = jd - ijd + 0.5;
+   CTA_DateTime_DaysToHMS(fjd, hour, minute, second);
+
+   a = ijd + 32044;
+   b = floor((4 * a + 3) / 146097);
+   c = a - floor((b * 146097) / 4);
+
+   d = floor((4 * c + 3) / 1461);
+   e = c - floor((1461 * d) / 4);
+   m = floor((5 * e + 2) / 153);
+
+   *day   = (int)(e - floor((153 * m + 2) / 5) + 1);
+   *month = (int)(m + 3 - 12 * floor(m / 10));
+   *year  = (int)(b * 100 + d - 4800 + floor(m / 10));
+
+   return CTA_OK;
+}
+
+int CTA_DateTime_ModifiedJulianToGregorian(double mjd, int *year, int *month, int *day, int *hour, int *minute, int *second){
+
+   int    ierr;   // error code
+   double jd;     // Julian date
+
+   ierr=CTA_DateTime_ModifiedJulianToJulian(mjd, &jd);
+   if (ierr == CTA_OK){
+      ierr=CTA_DateTime_JulianToGregorian(jd, year, month, day, hour, minute, second);
+   }
+   return ierr;
+}
+
+/* Interfacing with Fortran */
+
+CTAEXPORT void CTA_DATETIME_GREGORIANTOMODIFIEDJULIAN_F77(int *year, int *month, int *day, int *hour, int *minute, int *second, double *mjd, int *ierr){
+   *ierr=CTA_DateTime_GregorianToModifiedJulian(*year, *month, *day, *hour, *minute, *second, mjd);
+}
+
+CTAEXPORT void CTA_DATETIME_MODIFIEDJULIANTOGREGORIAN_F77(double *mjd, int *year, int *month, int *day, int *hour, int *minute, int *second, int *ierr){
+  *ierr=CTA_DateTime_ModifiedJulianToGregorian(*mjd, year, month, day, hour, minute, second);
+}
+
diff --git a/costa/native/cta/src/cta_defaults.c b/costa/native/cta/src/cta_defaults.c
new file mode 100644
index 000000000..203727e6a
--- /dev/null
+++ b/costa/native/cta/src/cta_defaults.c
@@ -0,0 +1,91 @@
+/*
+$URL: https://repos.deltares.nl/repos/openda/public/trunk/core/native/src/cta/cta_defaults.c $
+$Revision: 3406 $, $Date: 2012-08-16 15:25:53 +0200 (Thu, 16 Aug 2012) $
+
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+#include "cta_defaults.h"
+
+/** default dense vector class implementation */
+CTA_VecClass  CTA_DEFAULT_VECTOR;
+
+/** default stochastic observer class implementation */
+CTA_SObsClass CTA_DEFAULT_SOBS;
+
+/** observer description class implementation based on a simple table */
+CTA_ObsDescrClass CTA_OBSDESCR_TABLE;
+
+/** stochastic observer class implementation
+ * that concatenates stochastic observers of arbitrary kind */
+CTA_SObsClass CTA_COMBINE_SOBS;
+
+/** stochastic observer class implementation
+ * based on NetCDF input files                       */
+CTA_SObsClass CTA_NETCDF_SOBS;
+
+/** stochastic observer class implementation
+ * based on MAORI observations library               */
+CTA_SObsClass CTA_MAORI_SOBS;
+
+/** stochastic observer class implementation
+ * based on user provided library                    */
+CTA_SObsClass CTA_USER_SOBS;
+
+/** default dense matrix class implementation */
+CTA_MatClass CTA_DEFAULT_MATRIX;
+
+/** The SP (Single processor) model builder */
+CTA_ModelClass CTA_MODBUILD_SP;
+
+/** The PAR (multiple processor parallel) model builder */
+CTA_ModelClass CTA_MODBUILD_PAR;
+
+/** File handle that actually prints to standard out */
+CTA_File CTA_FILE_STDOUT;
+
+/** The modelcombiner */
+CTA_ModelClass CTA_MODELCOMBINER;
+
+/** The B3B (Black box) model builder */
+CTA_ModelClass CTA_MODBUILD_B3B;
+
+/** The 'New' (Black box) model builder */
+CTA_ModelClass CTA_MODBUILD_BB;
+
+/** Operator for dispaying the scaled RMS of the roots of a treevector to use with method
+ *  CTA_TreeVector_OpOnLeafs */
+CTA_Func CTA_OP_ROOT_RMS;
+
+/** Operator for displaying the values in given indices of the roots of a 
+  * treevector to use with method CTA_TreeVector_OpOnLeafs */
+CTA_Func CTA_OP_ROOT_PRINTI;
+
+/** Operator for calculating the locations of maxabs-values of the roots
+  * of a treevector to use with method CTA_TreeVector_OpOnLeafs */
+CTA_Func CTA_OP_ROOT_AMAX;
+
+/** Operator for dispaying the scaled Sum-of-Squares of the roots of a 
+  * treevector to use with method CTA_TreeVector_OpOnLeafs */
+CTA_Func CTA_OP_ROOT_SSQ;
+
+/** Initial random seed */
+long int CTA_INITIAL_RANDOM_SEED;
+
+/** Name of the user provided dynamic library with user functions */
+char userDefaultDynamicLibrary[256];
diff --git a/costa/native/cta/src/cta_file.c b/costa/native/cta/src/cta_file.c
new file mode 100644
index 000000000..0b4517493
--- /dev/null
+++ b/costa/native/cta/src/cta_file.c
@@ -0,0 +1,399 @@
+/*
+$URL: https://repos.deltares.nl/repos/openda/public/trunk/core/native/src/cta/cta_file.c $
+$Revision: 3286 $, $Date: 2012-05-02 18:15:05 +0200 (Wed, 02 May 2012) $
+
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+#include <stdio.h>
+#include <string.h>
+#if HAVE_LIBNETCDF
+#include <netcdf.h>
+#endif
+#include "cta_mem.h"
+#include "f_cta_utils.h"
+#include "cta_file.h"
+#include "cta_errors.h"
+#include "cta_message.h"
+
+#define IDEBUG (0)
+
+#define CTA_FILE_CREATE_F77   F77_CALL(cta_file_create,CTA_FILE_CREATE)
+#define CTA_FILE_FREE_F77     F77_CALL(cta_file_free,CTA_FILE_FREE)
+//#define CTA_FILE_GET_F77      F77_CALL(cta_file_get,CTA_FILE_GET)
+//#define CTA_FILE_SET_F77      F77_CALL(cta_file_set,CTA_FILE_SET)
+#define CTA_FILE_OPEN_F77     F77_CALL(cta_file_open,CTA_FILE_OPEN)
+#define CTA_FILE_WRITESTR_F77 F77_CALL(cta_file_writestr,CTA_FILE_WRITESTR)
+
+#define CTA_FILE_GETNETCDF_F77 F77_CALL(cta_file_getnetcdf,CTA_FILE_GETNETCDF)
+#define CTA_File_ISNETCDF_F77  F77_CALL(cta_fILE_isnetcdf,CTA_FILE_ISNETCDF)
+#define CLASSNAME "CTA_File"
+
+typedef enum {text, netcdf} cta_filetype;
+
+
+/* Struct holding all data associated to a COSTA file */
+
+typedef struct {
+cta_filetype fileType;
+FILE *file;
+int ncid;
+} CTAI_File;
+
+
+
+int CTA_File_Create(CTA_File *hfile){
+
+   CTAI_File *cta_file;
+   int retval;
+
+   /* allocate memory for new File object */
+   cta_file=CTA_Malloc(sizeof(CTAI_File));
+   cta_file->fileType     = text;
+   cta_file->file         = NULL;
+   cta_file->ncid         = 0;
+
+   /* Allocate new handle and return eror when unsuccesfull */
+   retval=CTA_Handle_Create("file",CTA_FILE,cta_file,hfile);
+   return retval;
+};
+
+
+#undef METHOD
+#define METHOD "Open"
+int CTA_File_Open(CTA_File hfile, CTA_String spath, CTA_String smode){
+   FILE *file;
+   char *path;
+   char *mode;
+   int lenpath, lenmode, retval;
+   int ncid;
+
+   CTAI_File *cta_file;
+
+   if (IDEBUG>0){printf("Start of CTA_File_Open\n");}
+   path=NULL;
+   mode=NULL;
+
+   /* Chech handle and get data struct */
+   retval=CTA_Handle_Check((CTA_Handle) hfile,CTA_FILE);
+   if (retval!=CTA_OK) {
+      CTA_WRITE_ERROR("Handle is not a cta_time handle");
+      return retval;
+   }
+   retval=CTA_Handle_GetData((CTA_Handle) hfile,(void**) &cta_file);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+
+   // handle path
+   retval=CTA_String_GetLength(spath, &lenpath);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot get length of string");
+	   return retval;
+   }
+   path=CTA_Malloc((lenpath+1)*sizeof(char));
+   retval=CTA_String_Get(spath, path);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot get string");
+	   return retval;
+   }
+
+   // determine the filetype
+   if (strlen(path)>3){
+      if (0==strncmp(path+strlen(path)-3,".nc",3)){
+         cta_file->fileType=netcdf;
+      }
+   }
+   if (cta_file->fileType==netcdf){
+#if HAVE_LIBNETCDF
+    if (IDEBUG>0){printf("Open netcdf file\n");}
+{
+   // handle mode
+    if (smode==CTA_NULL){
+        mode=CTA_Malloc(3*sizeof(char));
+        strcpy(mode,"w");
+    } else {
+        retval=CTA_String_GetLength(smode, &lenmode);
+        if (retval!=CTA_OK) {
+           CTA_WRITE_ERROR("Cannot get length of string"); return retval;
+        }
+        mode=CTA_Malloc((lenmode+1)*sizeof(char));
+        retval=CTA_String_Get(smode, mode);
+        if (retval!=CTA_OK) {
+          CTA_WRITE_ERROR("Cannot get string");
+          return retval;
+        }
+    }
+    // first check if file already exists;so we first try nc_open!!!
+    if (0==strcmp(mode,"w"))  {
+      retval = nc_create(path, NC_CLOBBER, &ncid);
+      if (retval != NC_NOERR) {   // no success; so just create it
+        char message[1024];
+        sprintf(message,"Cannot create netCDF-file for no-append writing %s: %s\n", path, nc_strerror(retval));
+        CTA_WRITE_ERROR(message);
+      }
+      if (IDEBUG>0){printf("Created new netcdf file\n");}
+    } else {  //"r" or "a"
+      retval = nc_open(path, NC_WRITE, &ncid);
+      if (retval!=0){
+        char message[1024];
+        sprintf(message,"Cannot open netCDF-file %s for appending or reading: %s\n", path, nc_strerror(retval));
+        CTA_WRITE_ERROR(message);
+      }
+      if (IDEBUG>0){printf("Open existing netcdf file\n");}
+    }
+    cta_file->ncid=ncid;
+}
+
+#else
+      printf("CTA_File_Open: WARNING COSTA is compiled without NETCDF support\n");
+      printf("               Will open file %s as normal text file \n",path);
+      cta_file->fileType=text;
+#endif
+   }
+
+   // Open normal text file
+   if (cta_file->fileType==text){
+
+      // handle mode
+      if (smode==CTA_NULL){
+        mode=CTA_Malloc(3*sizeof(char));
+        strcpy(mode,"w+");
+      } else {
+         retval=CTA_String_GetLength(smode, &lenmode);
+		 if (retval!=CTA_OK) {
+			CTA_WRITE_ERROR("Cannot get length of string");
+			return retval;
+		 }
+         mode=CTA_Malloc((lenmode+1)*sizeof(char));
+         retval=CTA_String_Get(smode, mode);
+		 if (retval!=CTA_OK) {
+			 CTA_WRITE_ERROR("Cannot get string");	 
+			 return retval;
+		 }
+      }
+
+      // Open the file
+      file=fopen(path,mode);
+      CTA_File_Set(hfile,file);
+   }
+   if (path) free(path);
+   if (mode) free(mode);
+   if (IDEBUG>0){printf("End of CTA_File_Open");}
+   return CTA_OK;
+}
+
+   #undef METHOD
+   #define METHOD "WriteStr"
+   int CTA_File_WriteStr(CTA_File hfile, char *str, int eol){
+   CTAI_File *cta_file;
+   int retval;
+
+   retval=CTA_Handle_Check((CTA_Handle) hfile,CTA_FILE);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Handle is not a cta_time handle");
+	   return retval;
+   }
+
+   retval=CTA_Handle_GetData((CTA_Handle) hfile,(void**) &cta_file);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+
+   fprintf(cta_file->file,"%s",str);
+   if (eol==CTA_TRUE){
+      fprintf(cta_file->file,"\n");
+   }
+   return CTA_OK;
+}
+
+
+#undef METHOD
+#define METHOD "File_Free"
+int CTA_File_Free(CTA_File *hfile)
+{
+   CTAI_File *cta_file;
+   int retval;
+
+
+   retval=CTA_Handle_Check((CTA_Handle) *hfile,CTA_FILE);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Handle is not a cta_file handle");
+	   return retval;
+   }
+
+   retval=CTA_Handle_GetData((CTA_Handle) *hfile,(void**) &cta_file);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+
+   //Close netcdf file
+   if (cta_file->fileType==netcdf){
+#if HAVE_LIBNETCDF
+      if (cta_file->ncid != 0) {
+		  if ((retval = nc_close(cta_file->ncid))){
+            printf("CTA_File_Free: cannot close netCDF-file: %s\n",
+            nc_strerror(retval));
+         }
+      }
+#endif
+   }
+
+   free(cta_file);
+
+   retval=CTA_Handle_Free(hfile);
+
+   return retval;
+}
+
+#undef METHOD
+#define METHOD "IsNetcdf"
+int CTA_File_IsNetcdf(CTA_File hfile, int *isnetcdf) {
+   CTAI_File *cta_file;
+   int retval;
+
+   *isnetcdf=CTA_FALSE;
+
+   retval=CTA_Handle_Check((CTA_Handle) hfile,CTA_FILE);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Handle is not a cta_file handle");
+	   return retval;
+   }
+   retval=CTA_Handle_GetData((CTA_Handle) hfile,(void**) &cta_file);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+   if (cta_file->fileType==netcdf){
+      *isnetcdf=CTA_TRUE;
+   }
+   return CTA_OK;
+}
+
+#undef METHOD
+#define METHOD "File_Set"
+int CTA_File_Set(CTA_File hfile, FILE* file)
+{
+   CTAI_File *cta_file;
+   int retval;
+
+   retval=CTA_Handle_Check((CTA_Handle) hfile,CTA_FILE);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Handle is not a cta_file handle");
+	   return retval;
+   }
+   retval=CTA_Handle_GetData((CTA_Handle) hfile,(void**) &cta_file);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+
+   cta_file->file = file;
+   return CTA_OK;
+}
+
+#undef METHOD
+#define METHOD "File_Get"
+int CTA_File_Get(CTA_File hfile,FILE** file)
+{
+   CTAI_File *cta_file;
+   int retval;
+
+   retval=CTA_Handle_Check((CTA_Handle) hfile,CTA_FILE);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Handle is not a cta_time handle");
+	   return retval;
+   }
+   retval=CTA_Handle_GetData((CTA_Handle) hfile,(void**) &cta_file);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+
+   *file = cta_file->file;
+   return CTA_OK;
+}
+
+#undef METHOD
+#define METHOD "File_GetNetcdf"
+int CTA_File_GetNetcdf(CTA_File hfile,int *ncid)
+{
+   CTAI_File *cta_file;
+   int retval;
+
+   retval=CTA_Handle_Check((CTA_Handle) hfile,CTA_FILE);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Handle is not a cta_time handle");
+	   return retval;
+   }
+   retval=CTA_Handle_GetData((CTA_Handle) hfile,(void**) &cta_file);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+   *ncid = cta_file->ncid;
+   return CTA_OK;
+}
+
+
+
+
+/* Interfacing with Fortran */
+
+CTAEXPORT void CTA_FILE_CREATE_F77(int *hfile, int *ierr){
+   *ierr=CTA_File_Create((CTA_File*) hfile);
+}
+
+CTAEXPORT void CTA_FILE_FREE_F77(int *hfile, int *ierr){
+   *ierr=CTA_File_Free  ((CTA_File*) hfile);
+}
+
+//NOT avaialble for Fortran:
+//int CTA_File_Get   (CTA_File hfile, FILE **file);
+//int CTA_File_Set   (CTA_File hfile, FILE *file);
+
+CTAEXPORT void CTA_FILE_OPEN_F77(int *hfile, int *sname, int *smode, int *ierr){
+   *ierr=CTA_File_Open((CTA_File) *hfile, (CTA_String) *sname,
+                       (CTA_String) *smode);
+}
+
+
+CTAEXPORT void CTA_FILE_WRITESTR_F77(int *hfile, char *str, int *eol, int *ierr,
+                       int len_str){
+
+   char *c_str;
+   /* create a c-string equivalent to name */
+   c_str=CTA_Malloc((len_str+1)*sizeof(char));
+   CTA_fstr2cstr(str,c_str,len_str);
+
+   *ierr=CTA_File_WriteStr((CTA_File) *hfile, c_str, *eol);
+
+   free(c_str);
+}
+
+
+CTAEXPORT void CTA_FILE_GETNETCDF_F77(int *hfile,int *ncid, int *ierr){
+   *ierr=CTA_File_GetNetcdf((CTA_File) *hfile, ncid);
+}
+
+
+CTAEXPORT void CTA_File_ISNETCDF_F77(int *hfile, int *isnetcdf, int *ierr) {
+   *ierr=CTA_File_IsNetcdf((CTA_File) *hfile, isnetcdf);
+}
diff --git a/costa/native/cta/src/cta_flush.c b/costa/native/cta/src/cta_flush.c
new file mode 100644
index 000000000..bd9d270b2
--- /dev/null
+++ b/costa/native/cta/src/cta_flush.c
@@ -0,0 +1,32 @@
+/*
+$URL: https://repos.deltares.nl/repos/openda/public/trunk/core/native/src/cta/cta_flush.c $
+$Revision: 3453 $, $Date: 2012-09-04 12:10:55 +0200 (Tue, 04 Sep 2012) $
+
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+#include "f_cta_utils.h"
+#include "cta.h"
+
+void CTA_Flush()
+{
+#ifndef WIN32
+   fflush(stdout);
+   fflush(stderr);
+#endif
+}
diff --git a/costa/native/cta/src/cta_functions.c b/costa/native/cta/src/cta_functions.c
new file mode 100644
index 000000000..62f1068c9
--- /dev/null
+++ b/costa/native/cta/src/cta_functions.c
@@ -0,0 +1,606 @@
+/*
+$URL: https://repos.deltares.nl/repos/openda/public/trunk/core/native/src/cta/cta_functions.c $
+$Revision: 3797 $, $Date: 2013-02-05 16:05:31 +0100 (Tue, 05 Feb 2013) $
+
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+// Header files for loading dynamic librarys and functions
+#ifdef HAVE_DLFCN_H
+#include<dlfcn.h>
+#endif
+#ifdef WIN32
+#include <windows.h>
+#endif
+
+#include "ctype.h"
+#include <stdio.h>
+#include "cta_mem.h"
+#include "f_cta_utils.h"
+#include "cta_functions.h"
+#include "cta_errors.h"
+#include "ctai_xml.h"
+#include "cta_message.h"
+
+#define CTA_FUNC_CREATE_F77   F77_CALL(cta_func_create,CTA_FUNC_CREATE)
+#define CTA_FUNC_FREE_F77     F77_CALL(cta_func_free,CTA_FUNC_FREE)
+#define CTA_FUNC_GETINTF_F77  F77_CALL(cta_func_getintf,CTA_FUNC_GETINTF)
+#define CTA_FUNC_GETNAME_F77  F77_CALL(cta_func_getname,CTA_FUNC_GETNAME)
+#define CLASSNAME "CTA_Functions"
+
+
+#define IDEBUG (0)
+// Struct containing specific data associated to a COSTA function handle
+typedef struct {
+   void *function;        //pointer to function
+   CTA_Intf hintf;        //handle of corresponding interface
+   CTA_Handle userdata;   //userdata (array of COSTA handles)
+   void *libhandle;       //Handle of dynamic library
+   int  refcount;         //Number of references to the function
+} CTAI_Function;
+
+
+/** \brief Creates a new user defined function
+ *
+ * \param name     I name of the new user function
+ * \param function I pointer to user funtion
+ * \param hintf    I COSTA interface handle
+ * \param hfunc    O COSTA user function handle
+ * \return error status: CTA_OK
+ */
+#undef METHOD
+#define METHOD "Func_Create"
+int CTA_Func_Create(const char *name, CTA_Function *function,
+                    const CTA_Intf hintf, CTA_Func *hfunc){
+
+   int retval;            //Return value of a call
+   CTAI_Function *data;   //Function specific data
+
+   // Allocate data and set properties
+   data=CTA_Malloc(sizeof(CTAI_Function));
+
+   // Set properties
+   if (IDEBUG>0) {
+      printf("CTA_Func_Create: creating function name=%s\n",name);
+      printf("function pointer is %p\n",function);
+   }
+   data->function=function;
+   data->userdata=CTA_NULL;
+   data->libhandle=NULL;
+   data->refcount=1;
+
+   // Allocate new handle and return eror when unsuccesfull
+   retval=CTA_Handle_Create(name,CTA_FUNCTION,data,hfunc);
+   if (retval) {
+	   CTA_WRITE_ERROR("Cannot create handle");
+	   return retval;
+   }
+   if (IDEBUG>0) printf("function handle is %d\n",*hfunc);
+
+   return CTA_OK;
+}
+
+/** \brief Duplicates a user defined function
+ *
+ * \param hfunc      I COSTA user function handle
+ * \param hdupl      I duplication of hfunc
+ * \return error status: CTA_OK
+ */
+#undef METHOD
+#define METHOD "Duplicate"
+int CTA_Func_Duplicate(CTA_Func hfunc, CTA_Func *hdupl){
+
+   int retval;            //Return value of a call
+   CTAI_Function *data;   //Function specific data
+
+   // Set default return value
+   *hdupl=CTA_NULL;
+
+   // If handle is CTA_NULL -> nothing to be done
+   if (hfunc==CTA_NULL) return CTA_OK;
+
+   // Check Handle and return error if handle is not valid
+   retval=CTA_Handle_Check(hfunc,CTA_FUNCTION);
+   if (retval) {
+	   CTA_WRITE_ERROR("Handle is not a cta_function handle");
+       return retval;
+   }
+
+   // get data
+   CTA_Handle_GetData(hfunc,(void*) &data);
+
+   // increase reference count
+   data->refcount++;
+
+   // copy handle
+   *hdupl=hfunc;
+
+   return CTA_OK;
+}
+
+/** \brief Frees a user defined function
+ *
+ *  \note Freeing CTA_NULL is allowed.
+ *
+ *  \param hfunc  IO  handle of user function. The value is
+ *                   CTA_NULL on return
+ *  \return error status: CTA_OK, CTA_ILLEGAL_HANDLE, CTA_INCOMPATIBLE_HANDLE
+ */
+#undef METHOD
+#define METHOD "Free"
+int CTA_Func_Free(CTA_Func *hfunc){
+
+   int retval;            //Return value of a call
+   CTAI_Function *data;   //Function specific data
+   char msg[256];
+
+   // If handle is CTA_NULL -> nothing to be done
+   if (*hfunc==CTA_NULL) return CTA_OK;
+
+   // Check Handle and return error if handle is not valid
+   retval=CTA_Handle_Check(*hfunc,CTA_FUNCTION);
+   if (retval) {
+	   CTA_WRITE_ERROR("Handle is not a cta_function handle");
+       return retval;
+   }
+
+   // get data
+   CTA_Handle_GetData(*hfunc,(void*) &data);
+
+   // decrease reference count
+   data->refcount--;
+
+   // If last reference is cleared, delete function
+   if (data->refcount==0){
+      // Check whether to unload dll
+      if (data->libhandle) {
+#ifdef HAVE_DLFCN_H
+         if (dlclose(data->libhandle)){
+            sprintf(msg, "Cannot close library: %s\n",dlerror());
+	    CTA_WRITE_WARNING(msg);
+         }
+#endif
+      }
+
+      // Free data item
+      free(data);
+
+      //Free Handle
+      CTA_Handle_Free(hfunc);
+   } else {
+      *hfunc=CTA_NULL;
+   }
+   return CTA_OK;
+}
+
+
+/** \brief Returns interface handle of a user defined function
+ *
+ *  \param hfunc  I  handle of user function
+ *  \param hintf  O  handle of interface
+ *  \return error status: CTA_OK, CTA_ILLEGAL_HANDLE, CTA_INCOMPATIBLE_HANDLE
+ */
+#undef METHOD
+#define METHOD "GetIntf"
+int CTA_Func_GetIntf(const CTA_Func hfunc, CTA_Intf *hintf){
+
+   int retval;            //Return value of a call
+   CTAI_Function *data;   //Function specific data
+
+   // Check Handle and return error if handle is not valid
+   retval=CTA_Handle_Check(hfunc,CTA_FUNCTION);
+   if (retval)  {
+	   CTA_WRITE_ERROR("Handle is not a cta_function handle");
+       return retval;
+   }
+
+   // get data-item
+   CTA_Handle_GetData(hfunc,(void*) &data);
+
+   // return interface
+   *hintf=data->hintf;
+
+   return CTA_OK;
+}
+
+/** \brief Returns function pointer of a user function
+ *
+ *  \param hfunc    handle of user function
+ *  \param function pointer to user defined function
+ *  \return error status: CTA_OK, CTA_ILLEGAL_HANDLE, CTA_INCOMPATIBLE_HANDLE
+ */
+#undef METHOD
+#define METHOD "GetFunc"
+int CTA_Func_GetFunc(const CTA_Func hfunc, CTA_Function **function){
+
+   int retval;            //Return value of a call
+   CTAI_Function *data;   //Function specific data
+
+   // Check Handle and return error if handle is not valid
+   retval=CTA_Handle_Check(hfunc,CTA_FUNCTION);
+   if (retval) {
+	   CTA_WRITE_ERROR("Handle is not a cta_function handle");
+	   return retval;
+   }
+   // Get data-item
+   CTA_Handle_GetData(hfunc,(void*) &data);
+
+   // Set function pointer
+   *function= (CTA_Function*) data->function;
+   return CTA_OK;
+}
+
+/** \brief Returns name of a user function
+ *
+ *  \param hfunc    I handle of user function
+ *  \param name     O name of user function,
+ *  \return error status: CTA_OK, CTA_ILLEGAL_HANDLE, CTA_INCOMPATIBLE_HANDLE
+ */
+#undef METHOD
+#define METHOD "GetName"
+int CTA_Func_GetName(const CTA_Func hfunc, CTA_String name){
+
+   int retval;            //Return value of a call
+
+   // Check Handle and return error if handle is not valid
+   retval=CTA_Handle_Check(hfunc,CTA_FUNCTION);
+   if (retval) {
+	   CTA_WRITE_ERROR("Handle is not a cta_function handle");
+	   return retval;
+   }
+
+   retval=CTA_Handle_GetName(hfunc,name);
+   return retval;
+}
+
+
+/** \brief Set userdata for a user defined function
+ *
+ * \param hfunc    I handle of user function
+ * \param userdata I handles to userdata
+ * \return error status: CTA_OK, CTA_ILLEGAL_HANDLE, CTA_INCOMPATIBLE_HANDLE
+ */
+#undef METHOD
+#define METHOD "SetUSerData"
+int CTA_Func_SetUserdata(const CTA_Func hfunc, const CTA_Handle userdata ){
+
+   int retval;            //Return value of a call
+   CTAI_Function *data;   //Function specific data
+
+
+   // Check Handle and return error if handle is not valid
+   retval=CTA_Handle_Check(hfunc,CTA_FUNCTION);
+   if (retval) {
+	   CTA_WRITE_ERROR("Handle is not a cta_function handle");
+	   return retval;
+   }
+
+   // Get data-item
+   CTA_Handle_GetData(hfunc,(void*) &data);
+
+   // Allocate userdata and copy values
+   data->userdata=userdata;
+
+   return CTA_OK;
+}
+/** \brief Returns the userdata array of a user defined function
+ *
+ * \param hfunc    I   handle of user function
+ * \param userdata O   handles to userdata
+ *  \return error status: CTA_OK, CTA_ILLEGAL_HANDLE, CTA_INCOMPATIBLE_HANDLE,
+ *                        CTA_ARRAY_TOO_SHORT
+ */
+#undef METHOD
+#define METHOD "GetUserdata"
+int CTA_Func_GetUserdata(const CTA_Func hfunc, CTA_Handle userdata ){
+
+   int retval;            //Return value of a call
+   CTAI_Function *data;   //Function specific data
+
+   // Check Handle and return error if handle is not valid
+   retval=CTA_Handle_Check(hfunc,CTA_FUNCTION);
+   if (retval) {
+	   CTA_WRITE_ERROR("Handle is not a cta_function handle");
+	   return retval;
+   }
+
+   // Get data-item
+   CTA_Handle_GetData(hfunc,(void*) &data);
+
+   // Copy userdata
+   userdata=data->userdata;
+
+   return CTA_OK;
+}
+
+
+/** \brief Change the name of the library into a platform specific name
+*
+*  \param libname_in   Suggested name of the library
+*  \param libname_out  Name of library on platform
+*  \return             Handle to create or CTA_NULL in case of an error.
+*/
+void CTAI_DLLName(const char *libname_in, char *libname_out){
+
+   int i;
+   /* step1 strip of extension (if any) */
+   for (i= strlen(libname_in)-1;i>=0;i--){
+      if (libname_in[i]=='.') {break;}
+   }
+   if (i>0) {
+      strncpy(libname_out,libname_in,i);
+      libname_out[i]='\0';
+   }
+   else {
+      strcpy(libname_out,libname_in);
+   }
+
+#ifdef WIN32
+   strcat(libname_out,".dll");
+#else
+#ifdef MAC_OS_X
+   strcat(libname_out,".dylib");
+#else
+   strcat(libname_out,".so");
+#endif
+#endif
+
+}
+
+
+/** \brief Change the name of the function into a Fortran function name*
+*  \param funcname   I name of the function
+*  \param f_funcname O Fortran name of the function
+*
+*  \note f_funcname is allocated by this function and should be freed by
+*  the calling routine.
+
+*/
+void CTAI_FunctionNameFortran(char *funcname, char **f_funcname, int mode){
+
+   int i, lenName, lenName2;
+   // Make different variants of the function name 
+   /* 1: Name
+      2: Name_
+      3: name
+      4: name_
+      5: NAME
+      6: NAME_
+   */
+   lenName=strlen(funcname);
+   lenName2 = lenName + 1;
+   if (mode == 2 || mode == 4 || mode ==6) {lenName2 = lenName + 2;}
+   *f_funcname=CTA_Malloc((lenName2)*sizeof(char));
+
+   for (i=0;i<lenName;i++){
+	   if (mode == 3 || mode == 4) {
+		   (*f_funcname)[i]=tolower(funcname[i]);}
+	   else if (mode == 5 || mode == 6) {
+		   (*f_funcname)[i]=toupper(funcname[i]);} 
+	   else { (*f_funcname)[i]= (funcname[i]); }
+   }
+   if (mode == 2 || mode == 4 || mode ==6) {   
+	   (*f_funcname)[lenName]='_';}
+
+   (*f_funcname)[lenName2-1]='\0';
+}
+
+
+int CTAI_GetProcAddress(CTAI_Function *data, char *funcname){
+  #ifdef HAVE_DLFCN_H  // linux!
+    data->function = dlsym(data->libhandle, funcname);
+  #else
+              // windows
+    data->function =  GetProcAddress(data->libhandle, funcname); 
+  #endif
+  return CTA_OK;
+}
+
+
+CTA_Func CTA_CreateFuncDynamicLib(char *libraryName, char *functionName, char *name, char *id){
+   CTA_Func      hfunc;               /* the new function */
+//   xmlChar       *id = NULL;          /* id of function in XML-tree */
+//   xmlChar       *library_inp = NULL; /* name of the dynamic link library */
+//   xmlChar       *funcname =NULL;     /* name of the function */
+   char*         f_funcname=NULL;     /* Fortran name of funcname */
+   int           retval;              /* return status of creation       */
+   char          library[256];        /* name of the dynamic link library */
+#if defined WIN32
+   WCHAR wLibrary[256];
+   int i;
+#endif
+
+   CTAI_Function *data;   //Function specific data
+   int nerror;
+   char msg[256];
+   nerror=0;
+
+   /* Add the dynamic library extention to the library name as used on this OS (so, dll, dylib,...) */
+   CTAI_DLLName(libraryName, library);
+
+   /* Create a new COSTA function instance */
+   retval=CTA_Func_Create((char *) name, NULL , CTA_NULL, &hfunc);
+   if (retval!=CTA_OK){
+      CTA_WRITE_WARNING("Cannot create a new COSTA function");
+      return CTA_NULL;
+   }
+
+   /* Get data block */
+   CTA_Handle_GetData(hfunc,(void*) &data);
+
+   /* WINDOWS VERSION FOR LOADING A FUNCTION FROM A SHARED LIBRARY: */
+   #if defined HAVE_DLFCN_H || defined WIN32
+     /* Open Dynamic link library */
+      #ifdef HAVE_DLFCN_H
+         data->libhandle=dlopen(library, RTLD_LAZY);
+      #else
+         /* convert string wLibrary to WCHAR, not sure this is the right way.
+	    but it seems to work */
+         for (i=0;i<256;i++){wLibrary[i]=library[i];}
+         data->libhandle=LoadLibrary(wLibrary);
+      #endif
+
+      /* Check library handle */
+      if (data->libhandle==NULL){
+         sprintf(msg,"Error in CTAI_XML_CreateFunc:Cannot open library %s\n",library);
+         CTA_WRITE_WARNING(msg);
+         #ifdef HAVE_DLFCN_H
+            sprintf(msg, "error is: |%s| \n",dlerror());
+            CTA_WRITE_WARNING(msg);
+         #else
+            sprintf(msg, "GetLastError code is %d\n",GetLastError());
+            CTA_WRITE_WARNING(msg);
+         #endif
+         nerror++;
+      }
+
+      /* Load function and try all kinds of typical fortran variants */
+      /* 1: original approach: Name */
+      retval = CTAI_GetProcAddress(data, (char *) functionName);
+      if (data->function == NULL){  /* 2: Name_ */
+        CTAI_FunctionNameFortran((char *) functionName, &f_funcname,2);
+        retval=CTAI_GetProcAddress(data, f_funcname);
+        free(f_funcname);
+      }
+      if (data->function == NULL){  /* 3: name */
+        CTAI_FunctionNameFortran((char *) functionName, &f_funcname,3);
+        retval=CTAI_GetProcAddress(data, f_funcname);
+        free(f_funcname);
+       }
+      if (data->function == NULL){  /* 4: name_ */
+         CTAI_FunctionNameFortran((char *) functionName, &f_funcname,4);
+         retval=CTAI_GetProcAddress(data, f_funcname);
+         free(f_funcname);
+      }
+      if (data->function == NULL){  /* 5: NAME */
+         CTAI_FunctionNameFortran((char *) functionName, &f_funcname,5);
+         retval=CTAI_GetProcAddress(data, f_funcname);
+         free(f_funcname);
+      }
+      if (data->function == NULL){  /* 6: NAME_ */
+         CTAI_FunctionNameFortran((char *) functionName, &f_funcname,6);
+         retval=CTAI_GetProcAddress(data, f_funcname);
+         free(f_funcname);
+       }
+
+       /* Check function pointer */
+       if (data->function==NULL){
+          sprintf(msg, "WARNING in CTAI_XML_CreateFunc:Cannot load function %s\n",functionName);
+          CTA_WRITE_WARNING(msg);
+          #ifdef HAVE_DLFCN_H
+             sprintf(msg,"%s\n",dlerror());
+             CTA_WRITE_WARNING(msg);
+          #else
+             sprintf(msg,"GetLastError code is %d\n",GetLastError());
+             CTA_WRITE_WARNING(msg);
+          #endif
+          nerror++;
+      }
+   #else
+      sprintf(msg, "Warning: Cannot load functions dynamically\n");
+      CTA_WRITE_WARNING(msg);
+      sprintf(msg, "Compile COSTA with support for Dynamic loading \n");
+      CTA_WRITE_WARNING(msg);
+      nerror++;
+   #endif
+
+
+   //finaly Free created function handle in case of an error or set the id of the handle
+   if (nerror>0) {
+      retval=CTA_Func_Free(&hfunc);
+   } else {
+      /* Set id (=name) of handle */
+      CTAI_Handle_SetName(hfunc, (char *) id);
+   }
+
+   return hfunc;
+} 
+
+
+/** \brief Create a COSTA function from XML
+*          (load from dynamic load library).
+*
+*  \param cur_node  I  Current XML node
+*  \return             Handle to create or CTA_NULL in case of an error.
+*/
+CTA_Func CTAI_XML_CreateFunc(xmlNode *cur_node) {
+
+   CTA_Func      hfunc;               /* the new function */
+   xmlChar       *id = NULL;          /* id of function in XML-tree */
+   xmlChar       *name = NULL;        /* (lookup) name of the function */
+   xmlChar       *libraryName = NULL; /* name of the dynamic link library */
+   xmlChar       *functionName =NULL;     /* name of the function */
+
+   /* Parse this node's attributes */
+   /* Get id */
+   id = xmlGetProp(cur_node, CTAI_XML_ID);
+
+   /* Get name */
+   name = xmlGetProp(cur_node, CTAI_XML_NAME);
+
+   /* Get tag */
+   libraryName = xmlGetProp(cur_node,  CTAI_XML_LIBRARY);
+
+   /* Get function name */
+   functionName = xmlGetProp(cur_node, CTAI_XML_FUNCTION);
+
+   hfunc=CTA_CreateFuncDynamicLib((char*) libraryName, (char*) functionName, (char*) name, (char*) id);
+
+   return hfunc;
+}
+
+/* Interfacing with Fortran */
+
+CTAEXPORT void CTA_FUNC_CREATE_F77(char *name, void *function , int *hintf, int *hfunc,
+                     int *ierr, int len_name){
+   char  *c_name;
+   // create a c-string equivalent to name
+   c_name=CTA_Malloc((len_name+1)*sizeof(char));
+   CTA_fstr2cstr(name,c_name,len_name);
+
+   *ierr=CTA_Func_Create(c_name, (CTA_Function*) function , (CTA_Intf) *hintf,
+                         (CTA_Func *) hfunc);
+   free(c_name);
+}
+
+
+CTAEXPORT void CTA_FUNC_FREE_F77(CTA_Func *hfunc, int* ierr){
+
+   *ierr=CTA_Func_Free((CTA_Func*) hfunc);
+}
+
+CTAEXPORT void CTA_FUNC_GETINTF_F77(int *hfunc, int *hintf, int *ierr){
+
+  *ierr=CTA_Func_GetIntf((CTA_Func) *hfunc, (CTA_Intf*) hintf);
+};
+
+// It is not possible to create a Fortran interface for CTA_Func_GetFunc
+//int CTA_Func_GetFunc(const CTA_Func hfunc, CTA_Function **function);
+
+
+CTAEXPORT void CTA_FUNC_GETNAME_F77(int *hfunc, int *hname, int *ierr){
+/*
+   char *c_name; //C-string copy of name
+   c_name=CTA_Malloc((len_name+1)*sizeof(char));
+   *ierr=CTA_Func_GetName((CTA_Func) *hfunc, c_name);
+   CTA_cstr2fstr(c_name,name,len_name);
+   free(c_name);
+*/
+   *ierr=CTA_Func_GetName((CTA_Func) *hfunc, (CTA_String) *hname);
+};
+
diff --git a/costa/native/cta/src/cta_handles.c b/costa/native/cta/src/cta_handles.c
new file mode 100644
index 000000000..f27b958c1
--- /dev/null
+++ b/costa/native/cta/src/cta_handles.c
@@ -0,0 +1,975 @@
+/*
+$URL: https://repos.deltares.nl/repos/openda/public/trunk/core/native/src/cta/cta_handles.c $
+$Revision: 3406 $, $Date: 2012-08-16 15:25:53 +0200 (Thu, 16 Aug 2012) $
+
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+#include <stdio.h>
+#include <string.h>
+#include <pthread.h>
+
+#include "cta_mem.h"
+#include "f_cta_utils.h"
+#include "cta_string.h"
+#include "cta_handles.h"
+#include "cta_errors.h"
+#include "cta_message.h"
+
+#define CTA_HANDLE_CREATE_F77      F77_CALL(cta_handle_create,CTA_HANDLE_CREATE)
+#define CTA_HANDLE_FREE_F77        F77_CALL(cta_handle_free,CTA_HANDLE_FREE)
+#define CTA_HANDLE_CHECK_F77       F77_CALL(cta_handle_check,CTA_HANDLE_CHECK)
+#define CTA_HANDLE_GETNAME_F77     F77_CALL(cta_handle_getname,CTA_HANDLE_GETNAME)
+#define CTA_HANDLE_GETVALUE_F77    F77_CALL(cta_handle_getvalue,CTA_HANDLE_GETVALUE)
+#define CTA_HANDLE_GETDATATYPE_F77 F77_CALL(cta_handle_getdatatype,CTA_HANDLE_GETDATATYPE)
+#define CTA_HANDLE_FIND_F77        F77_CALL(cta_handle_find,CTA_HANDLE_FIND)
+#define CTA_HANDLE_PRINTALL_F77    F77_CALL(cta_handle_printall,CTA_HANDLE_PRINTALL)
+#define CTA_HANDLE_PRINTINFO_F77   F77_CALL(cta_handle_printinfo,CTA_HANDLE_PRINTINFO)
+
+#define CLASSNAME "CTA_Handles"
+
+#define IDEBUG (1)
+
+int counter =0;
+static pthread_mutex_t mutex =  PTHREAD_MUTEX_INITIALIZER;
+
+CTAEXPORT extern int CTA_PAR_MY_RANK;
+
+
+long CTAI_Vector_GetMemsize();
+
+
+
+
+/* Struct holding all data associated to an COSTA Handle */
+typedef struct {
+   char *name;              // Name of handle for debugging and informational messages
+   CTA_Datatype datatype;   // Data type associated with this handle
+   void *data;              // Pointer to the type dependent data
+   int refCount; 	        // Number of references from which we expect a free
+	                        // (only used for a limited number of components used from java)
+   int globCount;   //DEBUG
+   char debugTag[80];
+} CTAI_HandleInfo;
+
+/*
+   Handle administration: list of all COSTA handles and their data.
+   The array of pointers to CTAI_Handles contains all data. The COSTA handles
+   are the indices in this array.
+
+   Finding our way in the administration:
+   The size information is the most important and holds the current length of
+   CTA_Handles. The other two CTAI_Handles_free, holding the number of free
+   elements and CTAI_Handles_last are only usefull for performance.
+   CTAI_Handles_free is used to easy check whether the array CTAI_HandleInfo must
+   be enlarged when a new handle is to be created.
+   CTAI_Handles_last points to the postion where the last element is added or one
+   position before the last handle that has been freed. It is the starting position
+   for finding a new free position in CTAI_Handles.
+
+*/
+static CTAI_HandleInfo **CTAI_Handles=NULL; //List of handles
+static int CTAI_Handles_size=0;             // Size of list
+static int CTAI_Handles_free=0;             // Number of free handles
+static int CTAI_Handles_last=0;             // Position of last allocated handle handle
+                                            // or just before last freed handle
+
+
+/** \brief Creates a new COSTA handle
+ *
+ * \param name     I name associated to handle
+ * \param datatype I datatype of handle
+ * \param data     I block of data associated to handle
+ * \param handle   O COSTA handle
+ * \return error status: CTA_OK
+ */
+
+#undef METHOD
+#define METHOD "Create"
+int CTA_Handle_Create(const char *name, const CTA_Datatype datatype,
+                      void *data, CTA_Handle *handle){
+
+   int i; //index counter
+
+   // Initialisation
+   *handle=CTA_NULL;
+
+   pthread_mutex_lock( &mutex );
+
+   // Some additional debug stuff when we expect a memleak
+   if (counter%10000 ==0){
+      pthread_mutex_unlock( &mutex );
+    //  CTA_Handle_PrintInfo("CTA_handle_Create ");
+      pthread_mutex_lock( &mutex );
+   }
+   counter++;
+
+   // Find a new handle:
+   if (CTAI_Handles_size == 0)
+   {
+   // IF (list does not exist yet) THEN
+      // Create a list
+      int newlen=1000;
+      //int newlen=100000;
+      CTAI_Handles=CTA_Malloc(newlen*sizeof(CTA_Handle*));
+
+      // Initialise list with null-pointers
+      for (i=0;i<newlen;i++){
+         CTAI_Handles[i]=NULL;
+      }
+      // Set handle and ''list'' administration
+      *handle=1;
+      CTAI_Handles_free=newlen-2;
+      CTAI_Handles_size=newlen;
+   }
+   else if (CTAI_Handles_free == 0)
+   {
+      // No free entries extent list -> extent list and choose first new handle
+      int oldlen=CTAI_Handles_size;
+      //int newlen=oldlen*2;
+      int newlen=oldlen*2;
+
+
+      CTA_Handle **Handles_new=realloc(CTAI_Handles,newlen*sizeof(CTA_Handle*));
+      if (Handles_new==NULL){
+	CTA_WRITE_ERROR("FATAL Cannot allocate new memory to store handels");
+        exit(-1);
+      }
+      else {
+         CTAI_Handles = Handles_new;
+      }
+
+      // Initialise reallocated part
+      for (i=oldlen;i<newlen;i++){
+         CTAI_Handles[i]=NULL;
+      }
+      // Set handle and ''list'' administration
+      *handle=oldlen;
+      CTAI_Handles_free=newlen-oldlen-1;
+      CTAI_Handles_size=newlen;
+   } else {
+      // Find a new free entry starting at last allocated position
+      i=CTAI_Handles_last;
+      while (*handle == CTA_NULL){
+         i= i % (CTAI_Handles_size-1) + 1;
+         if (CTAI_Handles[i]==NULL) *handle=i;
+      }
+      CTAI_Handles_free--;
+   }
+   // Allocate CTAI_HandleInfo-struct and fill it
+   CTAI_Handles[*handle]=CTA_Malloc(sizeof(CTAI_HandleInfo));
+   if (name) {
+      CTAI_Handles[*handle]->name = CTA_Malloc(1 + strlen(name));
+      strcpy(CTAI_Handles[*handle]->name, name);
+      CTAI_Handles[*handle]->name[strlen(name)] = '\0';
+   } else {
+      CTAI_Handles[*handle]->name = CTA_Malloc(1);
+      CTAI_Handles[*handle]->name[0] = '\0';
+   }
+   CTAI_Handles[*handle]->datatype=datatype;
+   CTAI_Handles[*handle]->data=data;
+   CTAI_Handles[*handle]->refCount=1;
+
+   pthread_mutex_unlock( &mutex );
+
+   return CTA_OK;
+}
+
+
+#undef METHOD
+#define METHOD "GetRefCount"
+int CTA_Handle_GetRefCount(const CTA_Handle handle, int *refCount){
+
+   pthread_mutex_lock( &mutex );
+
+   if (handle>=0 && handle<CTAI_Handles_size && CTAI_Handles[handle]!=NULL){
+      *refCount=CTAI_Handles[handle]->refCount;
+      // else, function handle is not used ->return CTA_ILLEGAL_HANDLE
+   }
+   else {
+      char message[1024];
+      pthread_mutex_unlock( &mutex );
+      sprintf(message,"Handle nr %d is NOT in list of %d handles",handle,CTAI_Handles_size);
+      CTA_WRITE_ERROR(message);
+
+
+      return CTA_ILLEGAL_HANDLE;
+   }
+
+
+   pthread_mutex_unlock( &mutex );
+
+   return CTA_OK;
+}
+
+#undef METHOD
+#define METHOD "IncRefCount"
+int CTA_Handle_IncRefCount(const CTA_Handle handle){
+
+   pthread_mutex_lock( &mutex );
+
+   if (handle>=0 && handle<CTAI_Handles_size && CTAI_Handles[handle]!=NULL){
+      CTAI_Handles[handle]->refCount++;
+
+      // else, function handle is not used ->return CTA_ILLEGAL_HANDLE
+   }
+   else {
+
+      char message[1024];
+	  pthread_mutex_unlock( &mutex );
+      sprintf(message,"Handle nr %d is not in list of %d handles",handle,CTAI_Handles_size);
+      CTA_WRITE_ERROR(message);
+
+
+      return CTA_ILLEGAL_HANDLE;
+   }
+   pthread_mutex_unlock( &mutex );
+
+   return CTA_OK;
+}
+
+#undef METHOD
+#define METHOD "DecrRefCount"
+int CTA_Handle_DecrRefCount(const CTA_Handle handle){
+
+   pthread_mutex_lock( &mutex );
+
+   if (handle>=0 && handle<CTAI_Handles_size && CTAI_Handles[handle]!=NULL){
+      CTAI_Handles[handle]->refCount--;
+      // else, function handle is not used ->return CTA_ILLEGAL_HANDLE
+   }
+   else {
+
+      char message[1024];
+	  pthread_mutex_unlock( &mutex );
+
+      sprintf(message,"Handle nr %d is not in list of %d handles",handle,CTAI_Handles_size);
+      CTA_WRITE_ERROR(message);
+
+
+      return CTA_ILLEGAL_HANDLE;
+   }
+
+   pthread_mutex_unlock( &mutex );
+
+   return CTA_OK;
+}
+
+
+/** \brief Checks whether a handle is valid and checks type
+ *
+ *  \note The handle CTA_NULL is not valid.
+ *
+ * \param handle   I COSTA handle
+ * \param datatype I The datatype the handle must be compatible with
+ * \return error status: CTA_OK, CTA_ILLEGAL_HANDLE, CTA_INCOMPATIBLE_HANDLE
+ *
+ */
+
+#undef METHOD
+#define METHOD "Check"
+int CTA_Handle_Check(const CTA_Handle handle,
+                                 const CTA_Datatype datatype){
+
+   pthread_mutex_lock( &mutex );
+
+   if (handle>=0 && handle<CTAI_Handles_size &&
+       CTAI_Handles[handle]!=NULL){
+       // Handle is used, now check dataType
+       if (datatype!=CTA_HANDLE && CTAI_Handles[handle]->datatype!=datatype){
+          // datatype does not correspond -> return CTA_INCOMPATIBLE_HANDLE
+
+             char message[1024];
+		     pthread_mutex_unlock( &mutex );
+             sprintf(message,"Handle is of type %d  instead of %d .\nSee cta_datatypes.h for more information.",CTAI_Handles[handle]->datatype, datatype);
+             CTA_WRITE_ERROR(message);
+
+
+             return CTA_INCOMPATIBLE_HANDLE;
+       }
+   // else, function handle is not used ->return CTA_ILLEGAL_HANDLE
+   }else{
+
+      char message[1024];
+	  pthread_mutex_unlock( &mutex );
+
+      sprintf(message,"Handle nr %d is not in list of %d handles",handle,CTAI_Handles_size);
+      CTA_WRITE_ERROR(message);
+      return CTA_ILLEGAL_HANDLE;
+   }
+
+   pthread_mutex_unlock( &mutex );
+
+   return CTA_OK;
+}
+
+/** \brief Frees a COSTA handle
+ *
+ *  \note The data part of the handle is NOT freed, freeing CTA_NULL is
+ *
+ *  \param handle I/O handle that must be freed, CTA_NULL on return.
+ *   allowed
+ *  \return error status: CTA_OK, CTA_ILLEGAL_HANDLE
+ */
+#undef METHOD
+#define METHOD "Free"
+int CTA_Handle_Free(CTA_Handle *handle){
+
+   // If handle is CTA_FUNC_NULL -> nothing to be done
+   if (*handle==CTA_NULL) return CTA_OK;
+   // if handle is not ok -> return CTA_ILLEGAL_HANDLE
+   if (CTA_OK != CTA_Handle_Check(*handle,CTA_HANDLE)) {
+	   CTA_WRITE_ERROR("Handle is not a cta_handle handle");
+	   return CTA_ILLEGAL_HANDLE;
+   }
+
+   pthread_mutex_lock( &mutex );
+
+   // Free associated data
+   free(CTAI_Handles[*handle]->name);
+   free(CTAI_Handles[*handle]);
+   CTAI_Handles[*handle]=NULL;
+   // Adjust pointer administration
+   CTAI_Handles_free++;
+   CTAI_Handles_last=*handle-1;
+   // Set value of hfunc
+   *handle=CTA_NULL;
+
+   pthread_mutex_unlock( &mutex );
+
+   return CTA_OK;
+}
+
+
+/** \brief Returns pointer to data element of handle
+ *
+ *  \param handle I COSTA handle
+ *  \param data   O pointer to data element
+ *  \return error status: CTA_OK, CTA_ILLEGAL_HANDLE
+ */
+#undef METHOD
+#define METHOD "GetData"
+int CTA_Handle_GetData(const CTA_Handle handle, void **data){
+
+   // if handle is not ok -> return CTA_ILLEGAL_HANDLE
+	if (CTA_OK != CTA_Handle_Check(handle,CTA_HANDLE)) {
+		CTA_WRITE_ERROR("Cannot retrieve handle data");
+		return CTA_ILLEGAL_HANDLE;
+	}
+
+   pthread_mutex_lock( &mutex );
+
+   // return data
+   *data=CTAI_Handles[handle]->data;
+
+   pthread_mutex_unlock( &mutex );
+
+   return CTA_OK;
+}
+
+
+/** \brief Returns data associated with handle (INTERNAL USE ONLY)
+ *
+ *  \param handle   I COSTA handle
+ *  \return handle name
+ */
+const void *CTAI_Handle_GetData(const CTA_Handle handle){
+   void *data;
+
+   pthread_mutex_unlock( &mutex );
+
+   data = CTAI_Handles[handle]->data;
+
+   pthread_mutex_unlock( &mutex );
+
+   return data;
+}
+
+
+/** \brief Returns name associated with handle
+ *
+ *  \param handle   I COSTA handle
+ *  \param name     O name
+ *  \return error status: CTA_OK, CTA_ILLEGAL_HANDLE
+ */
+#undef METHOD
+#define METHOD "GetName"
+int CTA_Handle_GetName(const CTA_Handle handle, CTA_String hname){
+   int retVal;
+   char *name;
+
+   // if handle is not ok -> return CTA_ILLEGAL_HANDLE
+   if (CTA_OK != CTA_Handle_Check(handle,CTA_HANDLE)) {
+	  CTA_WRITE_ERROR("Handle is not a cta_handle handle");
+	  return CTA_ILLEGAL_HANDLE;
+   }
+
+   if (CTA_OK != CTA_Handle_Check(hname,CTA_STRING)){
+	   CTA_WRITE_ERROR("Handle is not a cta_string handle");
+	   return CTA_ILLEGAL_HANDLE;
+   }
+
+   // return name
+
+   pthread_mutex_lock( &mutex );
+   name = CTAI_Handles[handle]->name;
+   pthread_mutex_unlock( &mutex );
+
+   retVal=CTA_String_Set(hname, name);
+
+
+   return retVal;
+}
+
+/** \brief Set name associated with handle (INTERNAL ONLY)
+ *
+ *  \param handle   I COSTA handle
+ *  \param hname    I COSTA string
+ *  \return error status: CTA_OK, CTA_ILLEGAL_HANDLE
+ */
+int CTAI_Handle_SetName(const CTA_Handle handle, const char *name)
+{
+   pthread_mutex_lock( &mutex );
+
+   if (strcmp(CTAI_Handles[handle]->name,name)!=0){
+      CTAI_Handles[handle]->name = realloc(CTAI_Handles[handle]->name, 1 + strlen(name));
+      strcpy(CTAI_Handles[handle]->name, name);
+   }
+
+   pthread_mutex_unlock( &mutex );
+
+   return CTA_OK;
+}
+
+
+/** \brief Returns name associated with handle (INTERNAL USE ONLY)
+ *
+ *  \param handle   I COSTA handle
+ *  \return handle name
+ */
+const char *CTAI_Handle_GetName(const CTA_Handle handle){
+   char *retStr;
+
+   pthread_mutex_lock( &mutex );
+
+   retStr = CTAI_Handles[handle]->name;
+
+   pthread_mutex_unlock( &mutex );
+
+   return retStr;
+}
+
+
+/** \brief Returns datatype associated with handle
+ *
+ *  \param handle   I COSTA handle
+ *  \param datatype O name of data type
+ *  \return error status: CTA_OK, CTA_ILLEGAL_HANDLE
+ */
+#undef METHOD
+#define METHOD "CGetDatatype"
+int CTA_Handle_GetDatatype(const CTA_Handle handle, CTA_Datatype *datatype){
+
+
+   // if handle is not ok -> return CTA_ILLEGAL_HANDLE
+	if (CTA_OK != CTA_Handle_Check(handle,CTA_HANDLE)) {
+		CTA_WRITE_ERROR("Handle is not a cta_handle handle");
+
+
+		return CTA_ILLEGAL_HANDLE;
+	}
+   pthread_mutex_lock( &mutex );
+
+   // return datatype
+   *datatype=CTAI_Handles[handle]->datatype;
+
+   pthread_mutex_unlock( &mutex );
+
+   return CTA_OK;
+}
+
+
+/** \brief Returns datatype associated with handle
+ *
+ *  \param handle   I COSTA handle
+ *  \return           name of data type,
+ */
+CTA_Datatype CTAI_Handle_GetDatatype(const CTA_Handle handle){
+
+   CTA_Datatype retVal;
+
+   pthread_mutex_lock( &mutex );
+   retVal = CTAI_Handles[handle]->datatype;
+
+
+   pthread_mutex_unlock( &mutex );
+
+   return retVal;
+}
+
+
+/** \brief Frees a COSTA handle and its associated data
+ *
+ *  \note The data part of the handle is also freed
+ *
+ *  \param handle I/O handle that must be freed, CTA_NULL on return.
+ *  \return error status: CTA_OK, CTA_ILLEGAL_HANDLE
+ */
+int CTA_Intf_Free(CTA_Handle *handle);
+int CTA_Func_Free(CTA_Handle *handle);
+int CTA_Vector_Free(CTA_Handle *handle);
+int CTA_TreeVector_Free(CTA_Handle *handle, int recursive);
+int CTA_Matrix_Free(CTA_Handle *handle);
+int CTA_Model_Free(CTA_Handle *handle);
+int CTA_SObs_Free(CTA_Handle *handle);
+int CTA_Tree_Free(CTA_Handle *handle);
+int CTA_String_Free(CTA_Handle *handle);
+int CTA_File_Free(CTA_Handle *handle);
+int CTA_Time_Free(CTA_Handle *handle);
+int CTA_ObsDescr_Free(CTA_Handle *handle);
+int CTA_Pack_Free(CTA_Handle *handle);
+int CTA_Metainfo_Free(CTA_Handle *handle);
+int CTA_RelTable_Free(CTA_Handle *handle);
+int CTA_Array_Free(CTA_Handle *handle);
+
+int CTA_Handle_Free_All(CTA_Handle *handle){
+
+   /* If handle is CTA_FUNC_NULL -> nothing to be done */
+   if (*handle==CTA_NULL) return CTA_OK;
+
+   switch (CTAI_Handles[*handle]->datatype){
+      case CTA_HANDLE :
+         if (IDEBUG>0) printf("Delete CTA_HANDLE\n");
+         return CTA_Handle_Free(handle);
+      case CTA_INTERFACE :
+         if (IDEBUG>0) printf("Delete CTA_INTERFACE\n");
+         return CTA_Intf_Free(handle);
+      case CTA_FUNCTION :
+         if (IDEBUG>0) printf("Delete CTA_FUNCTION\n");
+         return CTA_Func_Free(handle);
+      case CTA_VECTOR :
+         if (IDEBUG>0) printf("Delete CTA_VECTOR\n");
+         return CTA_Vector_Free(handle);
+      case CTA_VECTORCLASS :
+         if (IDEBUG>0) {
+            printf("Delete CTA_VECTORCLASS\n");
+            printf("WARNING: Cannot delete CTA_VECTORCLASS\n");
+         }
+         return CTA_Handle_Free(handle);
+      case CTA_TREEVECTOR :
+         if (IDEBUG>0) printf("Delete CTA_TREEVECTOR\n");
+         return CTA_TreeVector_Free(handle,CTA_TRUE);
+      case CTA_MATRIXCLASS :
+         if (IDEBUG>0) {
+            printf("Delete CTA_MAXTRIXCLASS\n");
+            printf("WARNING: Cannot delete CTA_MATRIXCLASS\n");
+         }
+         return CTA_Handle_Free(handle);
+      case CTA_MATRIX :
+         if (IDEBUG>0) printf("Delete CTA_MATRIX\n");
+         return CTA_Matrix_Free(handle);
+      case CTA_COVMATCLASS :
+         if (IDEBUG>0) {
+            printf("Delete CTA_COVMATCLASS\n");
+            printf("WARNING: Cannot delete CTA_COVMATCLASS\n");
+         }
+         return CTA_Handle_Free(handle);
+      case CTA_COVMAT :
+         if (IDEBUG>0) {
+            printf("Delete CTA_COVMAT\n");
+            printf("WARNING: Cannot delete CTA_COVMAT\n");
+         }
+         return CTA_OK;
+      //   return CTA_Handle_Free(handle);
+      case CTA_INTPOL :
+         if (IDEBUG>0) {
+            printf("Delete CTA_INTPOL\n");
+            printf("WARNING: Cannot delete CTA_INTPOL\n");
+         }
+         return CTA_OK;
+      //   return CTA_Handle_Free(handle);
+      case CTA_OBS :
+         if (IDEBUG>0) {
+            printf("Delete CTA_OBS\n");
+            printf("WARNING: Cannot delete CTA_OBS\n");
+         }
+         return CTA_OK;
+      case CTA_MODELCLASS :
+         if (IDEBUG>0) {
+            printf("Delete CTA_MODELCLASS\n");
+            printf("WARNING: Cannot delete CTA_MODELCLASS\n");
+         }
+         return CTA_OK;
+      //   return CTA_Handle_Free(handle);
+      case CTA_MODEL :
+         if (IDEBUG>0) printf("Delete CTA_MODEL\n");
+         return CTA_Model_Free(handle);
+      case CTA_TIME :
+         if (IDEBUG>0) printf("Delete CTA_TIME\n");
+         return CTA_Time_Free(handle);
+	   case CTA_SOBS :
+         if (IDEBUG>0) printf("Delete CTA_SOBS\n");
+		   return CTA_SObs_Free(handle);
+      case CTA_SOBSCLASS :
+         if (IDEBUG>0) {
+            printf("Delete CTA_SOBCLASS\n");
+            printf("WARNING: Cannot delete CTA_OBSCLASS\n");
+         }
+         return CTA_OK;
+      case CTA_OBSDESCR :
+         if (IDEBUG>0) printf("Delete CTA_OBSDESCR\n");
+		   return CTA_ObsDescr_Free(handle);
+      case CTA_METHODCLASS :
+         if (IDEBUG>0) {
+            printf("Delete CTA_METHODCLASS\n");
+            printf("WARNING: Cannot delete CTA_METHODCLASS\n");
+         }
+         return CTA_OK;
+      case CTA_METHOD :
+         if (IDEBUG>0) {
+            printf("Delete CTA_METHOD\n");
+            printf("WARNING: Cannot delete CTA_METHOD\n");
+         }
+         return CTA_OK;
+      case CTA_TREE :
+         return CTA_Tree_Free(handle);
+      case CTA_PACK :
+         if (IDEBUG>0) printf("Delete CTA_PACK\n");
+         return CTA_Pack_Free(handle);
+      case CTA_DATABLOCK :
+         if (IDEBUG>0) {
+            printf("Delete CTA_DATABLOCK\n");
+            printf("WARNING: Cannot delete CTA_DATABLOCK\n");
+         }
+         return CTA_OK;
+      case CTA_METAINFO :
+         if (IDEBUG>0) printf("Delete CTA_METAINFO\n");
+         return CTA_Metainfo_Free(handle);
+      case CTA_METAINFOCLASS :
+         if (IDEBUG>0) {
+            printf("Delete CTA_METAINFOCLASS\n");
+            printf("WARNING: Cannot delete CTA_METAINFOCLASS\n");
+         }
+         return CTA_OK;
+      case CTA_RELTABLE :
+         if (IDEBUG>0) printf("Delete CTA_RELTABLE\n");
+         return CTA_RelTable_Free(handle);
+      case CTA_SUBTREEVECTOR :
+         if (IDEBUG>0) {
+            printf("Delete CTA_SUBTREEVECTOR\n");
+            printf("WARNING: Cannot delete CTA_SUBTREEVECTOR\n");
+         }
+         return CTA_OK;
+      case CTA_STRING :
+         if (IDEBUG>0) printf("Delete CTA_STRING\n");
+		 return CTA_String_Free(handle);
+      case CTA_FILE :
+         if (IDEBUG>0) printf("Delete CTA_FILE\n");
+         return CTA_File_Free(handle);
+      case CTA_ARRAY :
+         if (IDEBUG>0) printf("Delete CTA_ARRAY\n");
+         return CTA_Array_Free(handle);
+      default:
+          printf("INTERNAL ERROR IN CTA_Handle_Free_All\n");
+          printf("WARNING: Cannot delete handle of datatype %d\n",
+                  CTAI_Handles[*handle]->datatype);
+          exit(-1);
+   }
+}
+
+
+
+
+/** \brief Returns the value a handle points to
+ *
+ *  \param handle I COSTA handle
+ *  \param data   O pointer to data element
+ *  \return error status: CTA_OK, CTA_ILLEGAL_HANDLE, CTA_INCOMPATIBLE_HANDLE
+ */
+int CTA_Handle_GetValue(const CTA_Handle handle, void *value, CTA_Datatype datatype)
+{
+   CTA_Datatype mytype;
+
+   /* If handle is CTA_FUNC_NULL -> nothing to be done */
+   if (handle==CTA_NULL) return CTA_OK;
+
+   mytype = CTAI_Handle_GetDatatype(handle);
+
+   /* Return the handle value in case of identical types or CTA_HANDLE */
+   if (mytype == datatype || datatype == CTA_HANDLE) {
+      *(CTA_Handle*)value = handle;
+      return CTA_OK;
+   }
+
+   /* In case of string, attempt to convert */
+   if (mytype == CTA_STRING) {
+      return CTA_String_GetValue(handle, value, datatype);
+   }
+
+   /* Otherwise, return CTA_INCOMPATIBLE_HANDLE */
+   return CTA_INCOMPATIBLE_HANDLE;
+}
+
+#undef METHOD
+#define METHOD "Find"
+int CTA_Handle_Find(CTA_String sname, CTA_Datatype datatype, int *handlenr)
+{
+  int i,retval, len;
+  char *sstr;
+
+
+  /* Copy the input string into a C-string */
+  retval = CTA_String_GetLength(sname,&len);
+  if (retval!=CTA_OK) {
+	  CTA_WRITE_ERROR("Cannot get Length");
+	  return retval;
+  }
+  sstr=CTA_Malloc((len+1)*sizeof(char));
+  retval = CTA_String_Get(sname,sstr);
+  if (retval!=CTA_OK) {
+	  CTA_WRITE_ERROR("Cannot get String");
+
+          free(sstr);
+	  return retval;
+  }
+
+
+  pthread_mutex_lock( &mutex );
+
+  for (i=0; i < CTAI_Handles_size;i++){
+
+     if (CTAI_Handles[i] != NULL) {
+
+          if (!strncmp(CTAI_Handles[i]->name,sstr,len)) {
+
+          //printf("gevonden! %d \n",i);
+          *handlenr = i;
+
+          pthread_mutex_unlock( &mutex );
+
+          free(sstr);
+          return CTA_OK; }
+
+     }
+  }
+
+
+  pthread_mutex_unlock( &mutex );
+
+  free(sstr);
+  return CTA_HANDLE_NOT_FOUND;
+}
+
+
+int CTA_Handle_Printall()
+{
+  int i,ierr;
+  char *sstr;
+  int len;
+
+  pthread_mutex_lock( &mutex );
+
+  for (i=0; i < CTAI_Handles_size;i++)
+  {
+     if (CTAI_Handles[i] != NULL)
+     {
+        printf("Name of handle %d  is '%s'", i, CTAI_Handles[i]->name);
+
+        if (CTAI_Handles[i]->datatype == CTA_STRING) {
+
+          pthread_mutex_unlock( &mutex );
+          ierr = CTA_String_GetLength(i,&len);
+          pthread_mutex_lock( &mutex );
+
+          sstr=CTA_Malloc((len+1)*sizeof(char));
+
+          pthread_mutex_unlock( &mutex );
+
+          ierr=CTA_String_Get(i, sstr);
+
+          pthread_mutex_lock( &mutex );
+
+          printf(": %s",sstr);
+        }
+        printf("\n");
+     }
+  }
+  pthread_mutex_unlock( &mutex );
+  return ierr;
+}
+
+int CTA_Handle_PrintInfo(const char *location)
+{
+   int i;
+   int ntotal;
+   int ncount[32];
+   int nrefcount[32];
+   int datatype;
+   long memvec;
+   float fmemvec;
+
+   pthread_mutex_lock( &mutex );
+
+   memvec=CTAI_Vector_GetMemsize();
+   fmemvec=(float) memvec/(1024.0*1024.0);
+
+   /* initialise counting variables */
+   ntotal=0;
+   for (i=0;i<32;i++){ncount[i]=0; nrefcount[i]=0;}
+
+   for (i=0; i < CTAI_Handles_size;i++)
+   {
+      if (CTAI_Handles[i] != NULL)
+      {
+         ntotal++;
+         datatype=-CTAI_Handles[i]->datatype;
+         if (  datatype<0 || datatype >31 ){
+            printf("ERROR HANDLE %d has datatype %d",i ,-datatype);
+         }
+         else {
+            ncount[datatype]++;
+            nrefcount[datatype] = nrefcount[datatype] + CTAI_Handles[i]->refCount;
+         }
+      }
+      else{
+         ncount[31]++;
+      }
+   }
+   fflush(stdout);
+   printf("#%d CTA_Handle_PrintInfo: NOTE: SPECIAL VERSION OF CTA_HANDLES.c:\n", CTA_PAR_MY_RANK);
+   printf("#%d CTA_Handle_PrintInfo: Overview of handles:\n", CTA_PAR_MY_RANK);
+   printf("#%d location=%s\n", CTA_PAR_MY_RANK,location);
+   printf("#%d #handles including empty slots: %d\n", CTA_PAR_MY_RANK,CTAI_Handles_size);
+   printf("#%d empty slots       : %d \n", CTA_PAR_MY_RANK,ncount[31]);
+   printf("#%d type                number of handles  number of refcounts\n", CTA_PAR_MY_RANK  );
+   printf("#%d CTA_HANDLE        : %d \n", CTA_PAR_MY_RANK,ncount[-CTA_HANDLE]);
+   printf("#%d CTA_INTERFACE     : %d \n", CTA_PAR_MY_RANK,ncount[-CTA_INTERFACE   ]);
+   printf("#%d CTA_FUNCTION      : %d %d\n", CTA_PAR_MY_RANK,ncount[-CTA_FUNCTION  ],  nrefcount[-CTA_FUNCTION    ]);
+   printf("#%d CTA_VECTOR        : %d %d %f Mb\n", CTA_PAR_MY_RANK,ncount[-CTA_VECTOR    ], nrefcount[-CTA_VECTOR    ], fmemvec);
+   printf("#%d CTA_VECTORCLASS   : %d \n", CTA_PAR_MY_RANK,ncount[-CTA_VECTORCLASS ]);
+   printf("#%d CTA_TREEVECTOR    : %d %d\n", CTA_PAR_MY_RANK,ncount[-CTA_TREEVECTOR], nrefcount[-CTA_TREEVECTOR ]);
+   printf("#%d CTA_MATRIXCLASS   : %d \n", CTA_PAR_MY_RANK,ncount[-CTA_MATRIXCLASS ]);
+   printf("#%d CTA_MATRIX        : %d %d\n", CTA_PAR_MY_RANK,ncount[-CTA_MATRIX      ], nrefcount[-CTA_MATRIX      ]);
+   printf("#%d CTA_COVMATCLASS   : %d \n", CTA_PAR_MY_RANK,ncount[-CTA_COVMATCLASS ]);
+   printf("#%d CTA_COVMAT        : %d \n", CTA_PAR_MY_RANK,ncount[-CTA_COVMAT      ]);
+   printf("#%d CTA_INTPOL        : %d \n", CTA_PAR_MY_RANK,ncount[-CTA_INTPOL      ]);
+   printf("#%d CTA_OBS           : %d \n", CTA_PAR_MY_RANK,ncount[-CTA_OBS         ]);
+   printf("#%d CTA_MODELCLASS    : %d \n", CTA_PAR_MY_RANK,ncount[-CTA_MODELCLASS  ]);
+   printf("#%d CTA_MODEL         : %d \n", CTA_PAR_MY_RANK,ncount[-CTA_MODEL       ]);
+   printf("#%d CTA_TIME          : %d \n", CTA_PAR_MY_RANK,ncount[-CTA_TIME        ]);
+   printf("#%d CTA_SOBS          : %d \n", CTA_PAR_MY_RANK,ncount[-CTA_SOBS        ]);
+   printf("#%d CTA_SOBSCLASS     : %d \n", CTA_PAR_MY_RANK,ncount[-CTA_SOBSCLASS   ]);
+   printf("#%d CTA_OBSDESCR      : %d \n", CTA_PAR_MY_RANK,ncount[-CTA_OBSDESCR    ]);
+   printf("#%d CTA_METHODCLASS   : %d \n", CTA_PAR_MY_RANK,ncount[-CTA_METHODCLASS ]);
+   printf("#%d CTA_METHOD        : %d \n", CTA_PAR_MY_RANK,ncount[-CTA_METHOD      ]);
+   printf("#%d CTA_TREE          : %d \n", CTA_PAR_MY_RANK,ncount[-CTA_TREE        ]);
+   printf("#%d CTA_PACK          : %d \n", CTA_PAR_MY_RANK,ncount[-CTA_PACK        ]);
+   printf("#%d CTA_DATABLOCK     : %d \n", CTA_PAR_MY_RANK,ncount[-CTA_DATABLOCK   ]);
+   printf("#%d CTA_METAINFO      : %d \n", CTA_PAR_MY_RANK,ncount[-CTA_METAINFO    ]);
+   printf("#%d CTA_METAINFOCLASS : %d \n", CTA_PAR_MY_RANK,ncount[-CTA_METAINFOCLASS]);
+   printf("#%d CTA_RELTABLE      : %d \n", CTA_PAR_MY_RANK,ncount[-CTA_RELTABLE     ]);
+   printf("#%d CTA_SUBTREEVECTOR : %d \n", CTA_PAR_MY_RANK,ncount[-CTA_SUBTREEVECTOR]);
+   printf("#%d CTA_STRING        : %d \n", CTA_PAR_MY_RANK,ncount[-CTA_STRING       ]);
+   printf("#%d CTA_FILE          : %d \n", CTA_PAR_MY_RANK,ncount[-CTA_FILE         ]);
+   printf("#%d                     -----+\n", CTA_PAR_MY_RANK);
+   printf("#%d Total             : %d \n", CTA_PAR_MY_RANK,ntotal);
+   fflush(stdout);
+
+   pthread_mutex_unlock( &mutex );
+
+  return CTA_OK;
+}
+
+
+
+
+/* Interfacing with Fortran */
+CTAEXPORT void CTA_HANDLE_CREATE_F77(char *name, int *datatype, int *data,
+                         int *handle, int *ierr, int len_name){
+  char  *c_name;
+  // create a c-string equivalent to name
+  c_name=CTA_Malloc((len_name+1)*sizeof(char));
+  CTA_fstr2cstr(name,c_name,len_name);
+
+  *ierr=CTA_Handle_Create(c_name, (CTA_Datatype) *datatype, (void *) data,
+                          (CTA_Handle*) handle);
+
+  free(c_name);
+}
+
+CTAEXPORT void CTA_HANDLE_FREE_F77(int *handle, int *ierr){
+
+   *ierr=CTA_Handle_Free((CTA_Handle *) handle);
+}
+
+CTAEXPORT void CTA_HANDLE_FREE_ALLF77(int *handle, int *ierr){
+
+   *ierr=CTA_Handle_Free_All((CTA_Handle *) handle);
+}
+
+CTAEXPORT void CTA_HANDLE_CHECK_F77(int *handle, int *datatype, int *ierr){
+
+   *ierr=CTA_Handle_Check((CTA_Handle) *handle, (CTA_Datatype) *datatype);
+}
+
+
+// Note it is not possible to handle a CTA_Handle_GetData operation
+// In fortran due to the lack of poiters
+// void CTA_Handle_GetData(int *handle, void **data ,int *ierr);
+
+
+CTAEXPORT void CTA_HANDLE_GETNAME_F77(int *handle, int *name, int*ierr){
+/*
+   char * c_name; //C-string copy of name
+   c_name=CTA_Malloc((len_name+1)*sizeof(char));
+   *ierr=CTA_Handle_GetName((CTA_Handle) *handle, name);
+   CTA_cstr2fstr(c_name,name,len_name);
+   free(c_name);
+*/
+   *ierr=CTA_Handle_GetName((CTA_Handle) *handle, (CTA_String)*name);
+}
+
+/* Try to get the value from the given handle */
+CTAEXPORT void CTA_HANDLE_GETVALUE_F77(int *handle, void *value, int *datatype, int*ierr){
+   *ierr=CTA_Handle_GetValue((CTA_Handle) *handle, value, *datatype);
+}
+
+/* Try to get the value from the given handle */
+CTAEXPORT void CTA_HANDLE_GETDATATYPE_F77(int *handle, int *datatype, int*ierr){
+   *ierr=CTA_Handle_GetDatatype((CTA_Handle) *handle, datatype);
+}
+
+
+CTAEXPORT void CTA_HANDLE_FIND_F77(int *sname, int *datatype, int *handlenr,
+                         int *ierr){
+   *ierr=CTA_Handle_Find((CTA_String) *sname, (CTA_Datatype) *datatype,
+                         handlenr);
+}
+
+CTAEXPORT void CTA_HANDLE_PRINTALL_F77(int*ierr){
+   *ierr=CTA_Handle_Printall();
+}
+
+CTAEXPORT void CTA_HANDLE_PRINTINFO_F77(char *location, int*ierr, int len_str){
+   char *c_str;
+
+   c_str=CTA_Malloc((len_str+1)*sizeof(char));
+   CTA_fstr2cstr(location,c_str,len_str);
+
+   *ierr=CTA_Handle_PrintInfo(c_str);
+   free(c_str);
+}
diff --git a/costa/native/cta/src/cta_initialise.c b/costa/native/cta/src/cta_initialise.c
new file mode 100644
index 000000000..11cd4e28f
--- /dev/null
+++ b/costa/native/cta/src/cta_initialise.c
@@ -0,0 +1,144 @@
+/*
+$URL: https://repos.deltares.nl/repos/openda/public/trunk/core/native/src/cta/cta_initialise.c $
+$Revision: 4055 $, $Date: 2013-07-03 10:16:57 +0200 (Wed, 03 Jul 2013) $
+
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+#include <locale.h>
+#include "f_cta_utils.h"
+#include "cta.h"
+#include "cta_obsdescr_table.h"
+#include "cta_modbuild_b3b.h"
+#include "cta_sobs_netcdf.h"
+#include "cta_sobs_combine.h"
+#include "cta_sobs_sqlite3.h"
+#include "cta_message.h"
+
+#define CLASSNAME "CTA_Core"
+#define CTA_INITIALISE_F77 F77_CALL(cta_core_initialise,CTA_CORE_INITIALISE)
+#define CTA_FINALISE_F77 F77_CALL(cta_core_finalise,CTA_CORE_FINALISE)
+#define CTA_SOBS_MAORI_INITIALISE_F77 F77_CALL(cta_sobs_maori_initialise, CTA_SOBS_MAORI_INITIALISE)
+
+#define IDEBUG (0)
+
+int is_initialised=0;
+
+#define METHOD "Initialise"
+int CTA_Core_Initialise()
+{
+
+   //Fixes a very very nasty bug that results in non working atof (string -> double) conversions
+   setlocale(LC_ALL, "C");
+
+   CTA_INITIAL_RANDOM_SEED=16111970; //Bas ;-)
+//   CTA_INITIAL_RANDOM_SEED=2101975;  //Nils ;-)
+
+   // Default library name for user routines
+   strcpy(userDefaultDynamicLibrary,"libuseropenda");
+
+   char message[64];
+   sprintf(message,"OpenDA Native initialize: Initial random seed %ld\n",CTA_INITIAL_RANDOM_SEED);
+   CTA_WRITE_INFO(message);
+
+   CTA_rand_seed((long) CTA_INITIAL_RANDOM_SEED);
+   if (! is_initialised)
+   {
+      is_initialised=1;
+      CTA_File_Create(&CTA_FILE_STDOUT);
+      CTA_File_Set(CTA_FILE_STDOUT, stdout);
+      CTA_SObs_sqlite3_initialise(&CTA_DEFAULT_SOBS);
+
+      CTA_SObs_netcdf_initialise(&CTA_NETCDF_SOBS);
+
+      CTA_SObs_combine_initialise(&CTA_COMBINE_SOBS);
+
+      CTA_ObsDescr_table_initialise(&CTA_OBSDESCR_TABLE);
+
+      CTA_Vector_blas_initialise (&CTA_DEFAULT_VECTOR);
+
+      CTA_Matrix_blas_initialise (&CTA_DEFAULT_MATRIX);
+      CTA_Modbuild_sp_CreateClass(&CTA_MODBUILD_SP);
+
+      CTA_Func_Create("CTAI_Treevector_Operation_ScaledRMS",
+             &CTAI_Treevector_Operation_ScaledRMS, CTA_NULL,
+             &CTA_OP_ROOT_RMS);
+
+      CTA_Func_Create("CTAI_Treevector_Operation_Amax",
+             &CTAI_Treevector_Operation_Amax, CTA_NULL,
+             &CTA_OP_ROOT_AMAX);
+
+      CTA_Func_Create("CTAI_Treevector_Operation_PrintEntry",
+             &CTAI_Treevector_Operation_PrintEntry, CTA_NULL,
+             &CTA_OP_ROOT_PRINTI);
+
+      CTA_Func_Create("CTAI_Treevector_Operation_ScaledSSQ",
+             &CTAI_Treevector_Operation_ScaledSSQ, CTA_NULL,
+             &CTA_OP_ROOT_SSQ);
+
+      CTA_Message_Quiet(1);
+      // Additional observers:
+      // 1) MAORI
+      CTA_SObs_maori_initialize(&CTA_MAORI_SOBS);
+      // 2) USER
+      CTA_SObs_user_initialize(&CTA_USER_SOBS);
+      CTA_Message_Quiet(0);
+
+      CTA_MODBUILD_PAR=CTA_NULL;
+
+      if (IDEBUG) {
+         printf("cta_initialise :CTA_MODBUILD_PAR  =%d\n",CTA_MODBUILD_PAR);
+         printf("cta_initialise :CTA_FILE_STDOUT   =%d\n",CTA_FILE_STDOUT);
+         printf("cta_initialise :CTA_DEFAULT_SOBS  =%d\n",CTA_DEFAULT_SOBS);
+         printf("cta_initialise :CTA_NETCDF_SOBS   =%d\n",CTA_NETCDF_SOBS);
+         printf("cta_initialise :CTA_COMBINE_SOBS  =%d\n",CTA_COMBINE_SOBS);
+         printf("cta_initialise :CTA_OBSDESCR_TABLE=%d\n",CTA_OBSDESCR_TABLE);
+         printf("cta_initialise :CTA_DEFAULT_VECTOR=%d\n",CTA_DEFAULT_VECTOR);
+         printf("cta_initialise :CTA_DEFAULT_MATRIX=%d\n",CTA_DEFAULT_MATRIX);
+         printf("cta_initialise :CTA_MODBUILD_SP   =%d\n",CTA_MODBUILD_SP);
+         printf("cta_initialise :CTA_MODELCOMBINER =%d\n",CTA_MODELCOMBINER);
+         printf("cta_initialise :CTA_MODBUILD_B3B  =%d\n",CTA_MODBUILD_B3B);
+         printf("cta_initialise :CTA_MODBUILD_BB   =%d\n",CTA_MODBUILD_BB);
+         printf("cta_initialise :CTA_OP_ROOT_RMS   =%d\n",CTA_OP_ROOT_RMS);
+         printf("cta_initialise :CTA_OP_ROOT_AMAX  =%d\n",CTA_OP_ROOT_AMAX);
+         printf("cta_initialise :CTA_OP_ROOT_PRINTI=%d\n",CTA_OP_ROOT_PRINTI);
+         printf("cta_initialise :CTA_OP_ROOT_SSQ   =%d\n",CTA_OP_ROOT_SSQ);
+         printf("cta_initialise :CTA_MAORI_SOBS    =%d\n",CTA_MAORI_SOBS);
+         printf("cta_initialise :CTA_USER_SOBS     =%d\n",CTA_USER_SOBS);
+      }
+   };
+   return CTA_OK;
+}
+
+
+int CTA_Core_Finalise(){
+   if (IDEBUG) printf("Calling CTA_Finalise\n");
+   CTA_Modbuild_par_Finalize();
+   return CTA_OK;
+}
+
+
+/* Interfacing with Fortran */
+
+CTAEXPORT void CTA_INITIALISE_F77 (int *ierr){
+   *ierr=CTA_Core_Initialise();
+}
+
+CTAEXPORT void CTA_FINALISE_F77 (int *ierr){
+   *ierr=CTA_Core_Finalise();
+}
diff --git a/costa/native/cta/src/cta_interface.c b/costa/native/cta/src/cta_interface.c
new file mode 100644
index 000000000..a035d9da1
--- /dev/null
+++ b/costa/native/cta/src/cta_interface.c
@@ -0,0 +1,228 @@
+/*
+$URL: https://repos.deltares.nl/repos/openda/public/trunk/core/native/src/cta/cta_interface.c $
+$Revision: 2751 $, $Date: 2011-09-09 08:58:46 +0200 (Fri, 09 Sep 2011) $
+
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+#include "cta_mem.h"
+#include "cta_interface.h"
+#include "cta_errors.h"
+#include "cta_message.h"
+
+#define CLASSNAME "CTA_Interface"
+
+/* Struct holding all data associated to an COSTA interface */
+// Struct containing specific data associated to a COSTA interface
+typedef struct {
+int narg;
+CTA_Datatype *argtyp;
+} CTAI_Interface;
+
+
+/** \brief Creates a new interface
+ *
+ * \param name     I name of the new interface
+ * \param narg     I number of arguments of interface
+ * \param argtyp   I list with the datatypes of arguments
+ * \param hintf    O COSTA interface handle
+ * \return error status: CTA_OK
+ */
+#undef METHOD
+#define METHOD "Create"
+int CTA_Intf_Create(const char *name, const CTA_Datatype *argtyp,
+                    const int narg,CTA_Intf *hintf){
+
+   int retval;           //Return value of a call
+   CTAI_Interface *data; //Interface specific data
+   int i;                //loop counter
+
+   // Allocate data and set properties
+   data=CTA_Malloc(sizeof(CTAI_Interface));
+
+   // Set properties
+   data->narg=narg;
+   data->argtyp=CTA_Malloc(narg*sizeof(CTA_Datatype));
+
+   // Copy values
+   for (i=0;i<narg;i++){
+      data->argtyp[i]=argtyp[i];
+   };
+
+   // Allocate new handle and return eror when unsuccesfull
+   retval=CTA_Handle_Create(name,CTA_INTERFACE,data,hintf);
+   if (retval) {
+	   CTA_WRITE_ERROR("Cannot create handle");
+	   return retval;
+   }
+
+   return CTA_OK;
+}
+
+
+/** \brief Frees an interface
+ *
+ *  \note Freeing CTA_NULL is allowed.
+ *
+ *  \param hintf  I/O  handle of interface. The value is 
+ *                     CTA_NULL on return
+ *  \return error status: CTA_OK, CTA_ILLEGAL_HANDLE, CTA_INCOMPATIBLE_HANDLE 
+ */
+#undef METHOD
+#define METHOD "Free"
+int CTA_Intf_Free(CTA_Intf *hintf){
+
+   int retval;           //Return value of a call
+   CTAI_Interface *data; //Interface specific data
+
+   // If handle is CTA_NULL -> nothing to be done
+   if (*hintf==CTA_NULL) return CTA_OK;
+
+   // Check Handle and return error if handle is not valid
+   retval=CTA_Handle_Check(*hintf,CTA_INTERFACE);
+   if (retval) {
+	   CTA_WRITE_ERROR("Handle is not a cta_interface handle");
+	   return retval;
+   }
+
+   // Free data item
+   CTA_Handle_GetData(*hintf,(void*) &data); 
+   if (data->argtyp) free(data->argtyp);
+   free(data);
+
+   //Free Handle
+   CTA_Handle_Free(hintf);
+   
+   return CTA_OK;
+}
+
+/** \brief Mactches two interfaces for compatibility argumentlist-argumentlist
+ *
+ *  \note two interfaces are compatible if all arguments have the same datatype
+ *        CTA_VOID is compatible with all other arguments except for CTA_FSTRING
+
+ *  \param argtyp1  I  list with the datatypes of arguments of first interface
+ *  \param narg1    I  number of argumetns in first interface
+ *  \param argtyp2  I  list with the datatypes of arguments of second interface
+ *  \param narg2    I  number of argumetns in second interface
+ *  \param flag     O  TRUE if interfaces are compatible FALSE ortherwise
+ *  \return error status: CTA_OK
+ */
+int CTA_Intf_Match_aa(const CTA_Datatype *argtyp1, const int narg1, 
+                      const CTA_Datatype *argtyp2, const int narg2,
+                      BOOL *flag){
+
+   // Check all arguments:
+   // note CTA_VOID can be combined with all other datatypes except for
+   // CTA_FSTRING
+   *flag=FALSE;
+
+   if (narg1==narg2){
+      int i;
+      BOOL allok=TRUE;
+      for (i=0;i<narg1;i++){
+         if (argtyp1[i]!=argtyp2[i]){
+            if (argtyp1[i]!=CTA_VOID && argtyp2[i]!=CTA_VOID){
+               allok=FALSE;
+            }
+            else if (argtyp1[i]==CTA_FSTRING ||
+                     argtyp2[i]==CTA_FSTRING) { 
+               allok=FALSE;
+            };
+         };
+      }; 
+      *flag=allok;
+   };
+   return CTA_OK;
+}
+
+
+/** \brief Mactches two interfaces for compatibility handle-argumentlist
+ *
+ *  \note two interfaces are compatible if all arguments have the same datatype
+ *        CTA_VOID is compatible with all other arguments except for CTA_FSTRING
+
+ *  \param hintf1   I  Handle of first interface
+ *  \param argtyp2  I  list with the datatypes of arguments of second interface
+ *  \param narg2    I  number of argumetns in second interface
+ *  \param flag     O  TRUE if interfaces are compatible FALSE ortherwise
+ *  \return error status: CTA_OK, CTA_ILLEGAL_HANDLE, CTA_INCOMPATIBLE_HANDLE 
+ */
+#undef METHOD
+#define METHOD "Match_ha"
+int CTA_Intf_Match_ha(const CTA_Intf hintf1,
+                      const CTA_Datatype *argtyp2, const int narg2,
+                      BOOL *flag){
+
+   int retval;
+   CTAI_Interface *data1;
+   
+   // Check Handle and return error if handle is not valid
+   retval=CTA_Handle_Check(hintf1,CTA_INTERFACE);
+   if (retval) {
+	   CTA_WRITE_ERROR("Handle is not a cta_interface handle");
+	   return retval;
+   }
+
+   // Get data for first  interface
+   CTA_Handle_GetData(hintf1,(void*) &data1); 
+
+   retval=CTA_Intf_Match_aa(data1->argtyp, data1->narg, 
+                            argtyp2, narg2, flag);
+   return retval;
+}
+
+
+/** \brief Mactches two interfaces for compatibility handle-handle
+ *
+ *  \note two interfaces are compatible if all arguments have the same datatype
+ *        CTA_VOID is compatible with all other arguments except for CTA_FSTRING
+
+ *  \param hintf1   I  Handle of first interface
+ *  \param hintf2   I  Handle of second
+ *  \param flag     O  TRUE if interfaces are compatible FALSE ortherwise
+ *  \return error status: CTA_OK, CTA_ILLEGAL_HANDLE, CTA_INCOMPATIBLE_HANDLE 
+ */
+#undef METHOD
+#define METHOD "Match_hh"
+int CTA_Intf_Match_hh(const CTA_Intf hintf1, const CTA_Intf hintf2, BOOL *flag){
+
+   int retval;
+   CTAI_Interface *data1, *data2;
+   
+   // Check Handle and return error if handle is not valid
+   retval=CTA_Handle_Check(hintf1,CTA_INTERFACE);
+   if (retval) {
+	   CTA_WRITE_ERROR("Handle is not a cta_interface handle");
+	   return retval;
+   }
+   retval=CTA_Handle_Check(hintf2,CTA_INTERFACE);
+   if (retval) {
+	   CTA_WRITE_ERROR("Handle is not a cta_interface handle");	   
+	   return retval;
+   }
+
+   // Get data for both interfaces
+   CTA_Handle_GetData(hintf1,(void*) &data1); 
+   CTA_Handle_GetData(hintf2,(void*) &data2); 
+
+   retval=CTA_Intf_Match_aa(data1->argtyp, data1->narg, 
+                            data2->argtyp, data2->narg, flag);
+   return retval;
+}
+
diff --git a/costa/native/cta/src/cta_matrix.c b/costa/native/cta/src/cta_matrix.c
new file mode 100644
index 000000000..aa7e3521b
--- /dev/null
+++ b/costa/native/cta/src/cta_matrix.c
@@ -0,0 +1,960 @@
+/*
+$URL: https://repos.deltares.nl/repos/openda/public/trunk/core/native/src/cta/cta_matrix.c $
+$Revision: 3407 $, $Date: 2012-08-17 13:50:50 +0200 (Fri, 17 Aug 2012) $
+
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+#include <stdio.h>
+#include "cta_mem.h"
+#include "f_cta_utils.h"
+#include "cta_f77lapack.h"
+#include "cta_flush.h"
+#include "cta_matrix.h"
+#include "cta_errors.h"
+#include "ctai.h"
+#include "cta_message.h"
+
+#define CTA_MATRIX_DEFINECLASS_F77  F77_CALL(cta_matrix_defineclass,CTA_MATRIX_DEFINECLASS)
+#define CTA_MATRIX_DUPLICATE_F77    F77_CALL(cta_matrix_duplicate,CTA_MATRIX_DUPLICATE)
+#define CTA_MATRIX_CREATE_F77       F77_CALL(cta_matrix_create,CTA_MATRIX_CREATE)
+#define CTA_MATRIX_GETSIZE_F77      F77_CALL(cta_matrix_getsize,CTA_MATRIX_GETSIZE)
+#define CTA_MATRIX_GETDATATYPE_F77  F77_CALL(cta_matrix_getdatatype,CTA_MATRIX_GETDATATYPE)
+#define CTA_MATRIX_GETVALS_F77      F77_CALL(cta_matrix_getvals,CTA_MATRIX_GETVALS)
+#define CTA_MATRIX_GETVAL_F77       F77_CALL(cta_matrix_getval,CTA_MATRIX_GETVAL)
+#define CTA_MATRIX_SETCONSTANT_F77  F77_CALL(cta_matrix_setconstant,CTA_MATRIX_SETCONSTANT)
+#define CTA_MATRIX_SETCOL_F77       F77_CALL(cta_matrix_setcol,CTA_MATRIX_SETCOL)
+#define CTA_MATRIX_SETVALS_F77      F77_CALL(cta_matrix_setvals,CTA_MATRIX_SETVALS)
+#define CTA_MATRIX_SETVAL_F77       F77_CALL(cta_matrix_setval,CTA_MATRIX_SETVAL)
+#define CTA_MATRIX_EXPORT_F77       F77_CALL(cta_matrix_export,CTA_MATRIX_EXPORT)
+#define CTA_MATRIX_GER_F77          F77_CALL(cta_matrix_ger,CTA_MATRIX_GER)
+#define CTA_MATRIX_GEMV_F77         F77_CALL(cta_matrix_gemv,CTA_MATRIX_GEMV)
+#define CTA_MATRIX_GEMM_F77         F77_CALL(cta_matrix_gemm,CTA_MATRIX_GEMM)
+#define CTA_MATRIX_INV_F77          F77_CALL(cta_matrix_inv,CTA_MATRIX_INV)
+#define CTA_MATRIX_AXPY_F77         F77_CALL(cta_matrix_axpy,CTA_MATRIX_AXPY)
+#define CTA_MATRIX_EIGVALS_F77      F77_CALL(cta_matrix_eigvals,CTA_MATRIX_EIGVALS)
+#define CTA_MATRIX_FREE_F77         F77_CALL(cta_matrix_free,CTA_MATRIX_FREE)
+#define CLASSNAME "CTA_Matrix"
+
+/* Struct holding all data associated to an COSTA Vector */
+
+typedef struct {
+CTA_Func functions[CTA_MATRIX_NUMFUNC];
+CTA_MatClass hmatcl;
+int m;
+int n;
+CTA_Datatype datatype;
+CTA_Handle userdata;
+void *data;      /*implementation specific data */
+} CTAI_Matrix;
+
+typedef struct {
+CTA_Func functions[CTA_MATRIX_NUMFUNC];
+} CTAI_MatrixClass;
+
+
+int CTA_Matrix_DefineClass(
+   const char *name,
+   const CTA_Func h_func[CTA_MATRIX_NUMFUNC],
+   CTA_MatClass     *hmatcl
+   ){
+
+   CTAI_MatrixClass *data;
+   int retval;
+
+   /* Allocate new Vector object */
+   data=CTA_Malloc(sizeof(CTAI_MatrixClass));
+
+   data->functions[CTA_MATRIX_CREATE_SIZE]=h_func[CTA_MATRIX_CREATE_SIZE];
+   data->functions[CTA_MATRIX_CREATE_INIT]=h_func[CTA_MATRIX_CREATE_INIT];
+   data->functions[I_CTA_MATRIX_GETVALS    ]=h_func[I_CTA_MATRIX_GETVALS    ];
+   data->functions[I_CTA_MATRIX_GETVAL     ]=h_func[I_CTA_MATRIX_GETVAL     ];
+   data->functions[I_CTA_MATRIX_SETCOL     ]=h_func[I_CTA_MATRIX_SETCOL     ];
+   data->functions[I_CTA_MATRIX_SETVALS    ]=h_func[I_CTA_MATRIX_SETVALS    ];
+   data->functions[I_CTA_MATRIX_SETVAL     ]=h_func[I_CTA_MATRIX_SETVAL     ];
+   data->functions[I_CTA_MATRIX_SETCONST   ]=h_func[I_CTA_MATRIX_SETCONST   ];
+   data->functions[I_CTA_MATRIX_EXPORT     ]=h_func[I_CTA_MATRIX_EXPORT     ];
+   data->functions[I_CTA_MATRIX_GER        ]=h_func[I_CTA_MATRIX_GER        ];
+   data->functions[I_CTA_MATRIX_INV        ]=h_func[I_CTA_MATRIX_INV        ];
+   data->functions[I_CTA_MATRIX_GEMV       ]=h_func[I_CTA_MATRIX_GEMV       ];
+   data->functions[I_CTA_MATRIX_GEMM       ]=h_func[I_CTA_MATRIX_GEMM       ];
+   data->functions[I_CTA_MATRIX_AXPY       ]=h_func[I_CTA_MATRIX_AXPY       ];
+   data->functions[I_CTA_MATRIX_FREE       ]=h_func[I_CTA_MATRIX_FREE       ];
+
+      // Allocate new handle and return eror when unsuccesfull
+   retval=CTA_Handle_Create(name,CTA_MATRIXCLASS,data,hmatcl);
+   return retval;
+}
+
+#undef METHOD
+#define METHOD "Duplicate" 
+int CTA_Matrix_Duplicate(CTA_Matrix hmatrix1, CTA_Matrix *hmatrix2){
+
+   CTAI_Matrix *data;      /* All data of vector hvec      */
+   int retval;
+
+   /* Get class data containing all function pointers */
+   retval=CTA_Handle_Check((CTA_Handle) hmatrix1,CTA_MATRIX);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Handle is not a cta_matrix handle");
+	   return retval;
+   }
+   
+   retval=CTA_Handle_GetData((CTA_Handle) hmatrix1,(void*) &data);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+
+   retval=CTA_Matrix_Create(data->hmatcl, data->m, data->n, data->datatype,
+                            data->userdata, hmatrix2);
+
+   return retval;
+}
+
+#undef METHOD
+#define METHOD "Create"
+int CTA_Matrix_Create(CTA_MatClass hmatcl, const int m, const int n,
+                      CTA_Datatype datatype, CTA_Handle userdata,
+                      CTA_Matrix *hmatrix){
+
+   CTAI_Matrix *matrix;
+   int memsize;
+   int retval;
+   CTAI_MatrixClass *clsdata;
+   CTA_Function *function;
+   int i;
+
+   /* Get class data containing all function pointers */
+   retval=CTA_Handle_Check((CTA_Handle) hmatcl,CTA_MATRIXCLASS);
+   if (retval!=CTA_OK){
+	   CTA_WRITE_ERROR("Handle is not a cta_matrixclass handle");
+	   return retval;
+   }
+   
+   retval=CTA_Handle_GetData((CTA_Handle) hmatcl,(void*) &clsdata);
+   if (retval!=CTA_OK){
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+
+   /* determine size of data object (CTA_MATRIX_CREATE_SIZE)*/
+   retval=CTA_Func_GetFunc(clsdata->functions[CTA_MATRIX_CREATE_SIZE],
+                           &function);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot get function CTA_MATRIX_CREATE_SIZE");
+	   return retval;
+   }
+
+   (void) function(&m, &n,&datatype,userdata,&retval,&memsize);
+   if (retval){
+	   CTA_WRITE_ERROR("Error in function");   
+	   return retval;
+   }
+
+   /* allocate memory for new matrix object */
+   matrix=CTA_Malloc(sizeof(CTAI_Matrix));
+   matrix->data=CTA_Malloc(memsize);
+
+   /* copy function pointers */
+   for (i=0;i<CTA_MATRIX_NUMFUNC;i++){
+      matrix->functions[i]=clsdata->functions[i];
+   }
+   /* set other general information */
+   matrix->hmatcl=hmatcl;
+   matrix->m=m;
+   matrix->n=n;
+   matrix->datatype=datatype;
+   matrix->userdata=userdata;
+
+   /* Initilise and fill new vector */
+   retval=CTA_Func_GetFunc(clsdata->functions[CTA_MATRIX_CREATE_INIT],
+                           &function);
+   if (retval!=CTA_OK){
+	   CTA_WRITE_ERROR("Cannot get function CTA_MATRIX_CREATE_INIT");   
+	   return retval;
+   }
+   (void) function(matrix->data, &m, &n, &datatype, userdata, &retval);
+   if (retval) {
+	   CTA_WRITE_ERROR("Error in function");   
+	   return retval;
+   }
+
+   /* Allocate new handle and return eror when unsuccesfull */
+   retval=CTA_Handle_Create("vector",CTA_MATRIX,matrix,hmatrix);
+   if (retval) {
+	   CTA_WRITE_ERROR("Cannot create handle for CTA_MATRIX");   
+	   return retval;
+   }
+
+   return CTA_OK;
+}
+#undef METHOD
+#define METHOD "GetSize"
+int CTA_Matrix_GetSize(
+   CTA_Matrix hmat,      /* Handle of the matrix */
+   int *m,              /* number of rows        */
+   int *n               /* number of cols        */
+   ){
+
+   CTAI_Matrix *data;      /* All data of matrix hmat      */
+   int retval;             /* Return value of COSTA call   */
+
+   /* Get pointer to implementation of this function */
+   retval=CTA_Handle_Check((CTA_Handle) hmat,CTA_MATRIX);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Handle is not a cta_matrix handle");   
+	   return retval;
+   }
+
+   retval=CTA_Handle_GetData((CTA_Handle) hmat,(void*) &data);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retreive handle data");
+	   return retval;
+   }
+
+   *m=data->m;
+   *n=data->n;
+   return CTA_OK;
+};
+#undef METHOD
+#define METHOD "GetDatatype"
+int CTA_Matrix_GetDatatype(
+   CTA_Matrix hmat,         /* Handle of the matrix */
+   CTA_Datatype *datatype   /* Returned data type   */
+   ){
+
+   CTAI_Matrix *data;      /* All data of matrix hmat     */
+   int retval;             /* Return value of COSTA call   */
+
+   /* Get pointer to implementation of this function */
+   retval=CTA_Handle_Check((CTA_Handle) hmat,CTA_MATRIX);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Handle is not a cta_matrix handle");
+	   return retval;
+   }
+
+   retval=CTA_Handle_GetData((CTA_Handle) hmat,(void*) &data);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retreive handle data");
+	   return retval;
+   }
+
+   *datatype=data->datatype;
+   return CTA_OK;
+};
+
+#undef METHOD
+#define METHOD "GetVals"
+int CTA_Matrix_GetVals(
+   CTA_Matrix hmat,      /* Handle of the matrix */
+   void *vals,           /* Retured value        */
+   int  m,               /* number of rows (fortran) matrix vals */
+   int  n,               /* number of columns (fortran) matrix vals */
+   CTA_Datatype datatype /* Data type */
+   ){
+
+   /* Local variables */
+   int retval;             /* Return value of COSTA call   */
+   CTAI_Matrix *data;      /* All data of matrix hmat      */
+   CTA_Function *function; /* Function that must be called */
+
+   /* Get pointer to implementation of this function */
+   retval=CTA_Handle_Check((CTA_Handle) hmat,CTA_MATRIX);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Handle is not a cta_matrix handle");
+	   return retval;
+   }
+
+   retval=CTA_Handle_GetData((CTA_Handle) hmat,(void*) &data);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retreive handle data");
+	   return retval;
+   }
+
+   CTA_Func_GetFunc(data->functions[I_CTA_MATRIX_GETVALS],&function);
+
+   /* Call (user) implementation */
+   function(data->data,vals,&m,&n,&datatype,&retval);
+   return retval;
+};
+
+
+
+#undef METHOD
+#define METHOD "GetVal"
+int CTA_Matrix_GetVal(
+   CTA_Matrix hmat,      /* Handle of the matrix */
+   void *val,            /* Returned value        */
+   int  m,               /* number of rows (fortran) matrix vals */
+   int  n,               /* number of columns (fortran) matrix vals */
+   CTA_Datatype datatype /* Data type */
+   ){
+
+   /* Local variables */
+   int retval;             /* Return value of COSTA call   */
+   CTAI_Matrix *data;      /* All data of matrix hmat      */
+   CTA_Function *function; /* Function that must be called */
+
+   /* Get pointer to implementation of this function */
+   retval=CTA_Handle_Check((CTA_Handle) hmat,CTA_MATRIX);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Handle is not a cta_matrix handle");
+	   return retval;
+   }
+
+   retval=CTA_Handle_GetData((CTA_Handle) hmat,(void*) &data);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retreive handle data");
+	   return retval;
+   }
+   CTA_Func_GetFunc(data->functions[I_CTA_MATRIX_GETVAL],&function);
+
+   /* Call (user) implementation */
+   function(data->data,val,&m,&n,&datatype,&retval);
+   return retval;
+};
+
+
+#undef METHOD
+#define METHOD "SetConstant"
+int CTA_Matrix_SetConstant(
+   CTA_Matrix hmat,  /* Handle of the matrix */
+   void *val,        /* value that must be set */
+   CTA_Datatype datatype /* Data type */
+   ){
+
+   /* Local variables */
+   int retval;             /* Return value of COSTA call   */
+   CTAI_Matrix *data;      /* All data of matrix hmat      */
+   CTA_Function *function; /* Function that must be called */
+
+   /* Get pointer to implementation of this function */
+   retval=CTA_Handle_Check((CTA_Handle) hmat,CTA_MATRIX);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Handle is not a cta_matrix handle");
+	   return retval;
+   }
+
+   retval=CTA_Handle_GetData((CTA_Handle) hmat, (void*) &data);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retreive handle data");
+	   return retval;
+   }
+
+   CTA_Func_GetFunc(data->functions[I_CTA_MATRIX_SETCONST],&function);
+
+   /* Call (user) implementation */
+   function(data->data,val,&datatype,&retval);
+   return retval;
+};
+
+#undef METHOD
+#define METHOD "Export"
+int CTA_Matrix_Export(
+   CTA_Matrix hmat,  /* Handle of the matrix */
+   CTA_Handle userdata    /* user-data */
+   ){
+
+   /* Local variables */
+   int retval;             /* Return value of COSTA call   */
+   CTAI_Matrix *data;      /* All data of matrix hmat      */
+   CTA_Function *function; /* Function that must be called */
+
+   /* Get pointer to implementation of this function */
+   retval=CTA_Handle_Check((CTA_Handle) hmat,CTA_MATRIX);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Handle is not a cta_matrix handle");
+	   return retval;
+   }
+
+   retval=CTA_Handle_GetData((CTA_Handle) hmat, (void*) &data);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retreive handle data");
+	   return retval;
+   }
+
+   CTA_Func_GetFunc(data->functions[I_CTA_MATRIX_EXPORT],&function);
+
+   /* Call (user) implementation */
+   function(data->data,userdata,&retval);
+   return retval;
+};
+
+
+#undef METHOD
+#define METHOD "SetCol"
+int CTA_Matrix_SetCol(
+   CTA_Matrix hmat,  /* Handle of the matrix  */
+   int  n,           /* column number         */
+   CTA_Vector hvec   /* new values for column */
+   )
+{
+   /* Local variables */
+   int retval;             /* Return value of COSTA call   */
+   CTAI_Matrix *data;      /* All data of matrix hmat      */
+   CTA_Function *function; /* Function that must be called */
+
+   /* Get pointer to implementation of this function */
+   retval=CTA_Handle_Check((CTA_Handle) hmat,CTA_MATRIX);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Handle is not a cta_matrix handle");
+	   return retval;
+   }
+
+   retval=CTA_Handle_GetData((CTA_Handle) hmat, (void*) &data);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retreive handle data");
+	   return retval;
+   }
+
+   CTA_Func_GetFunc(data->functions[I_CTA_MATRIX_SETCOL],&function);
+
+   /* Call (user) implementation */
+   function(data->data,&n,&hvec,&retval);
+   return retval;
+};
+
+#undef METHOD
+#define METHOD "SetVals"
+int CTA_Matrix_SetVals(
+   CTA_Matrix hmat,  /* Handle of the matrix */
+   void *vals,       /* value that must be set */
+   int  m,           /* number of rows        */
+   int  n,           /* number of cols        */
+   CTA_Datatype datatype /* Data type */
+   ){
+   /* Local variables */
+   int retval;             /* Return value of COSTA call   */
+   CTAI_Matrix *data;      /* All data of matrix hmat      */
+   CTA_Function *function; /* Function that must be called */
+
+   /* Get pointer to implementation of this function */
+   retval=CTA_Handle_Check((CTA_Handle) hmat,CTA_MATRIX);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Handle is not a cta_matrix handle");
+	   return retval;
+   }
+
+   retval=CTA_Handle_GetData((CTA_Handle) hmat, (void*) &data);
+   if (retval!=CTA_OK)  {
+	   CTA_WRITE_ERROR("Cannot retreive handle data");
+	   return retval;
+   }
+
+   CTA_Func_GetFunc(data->functions[I_CTA_MATRIX_SETVALS],&function);
+
+   /* Call (user) implementation */
+   function(data->data,vals,&m, &n,&datatype,&retval);
+   return retval;
+};
+
+
+#undef METHOD
+#define METHOD "SetVal"
+int CTA_Matrix_SetVal(
+   CTA_Matrix hmat,  /* Handle of the matrix */
+   void *vals,       /* value that must be set */
+   int  m,           /* row index        */
+   int  n,           /* col index        */
+   CTA_Datatype datatype /* Data type */
+   ){
+   /* Local variables */
+   int retval;             /* Return value of COSTA call   */
+   CTAI_Matrix *data;      /* All data of matrix hmat      */
+   CTA_Function *function; /* Function that must be called */
+
+   /* Get pointer to implementation of this function */
+   retval=CTA_Handle_Check((CTA_Handle) hmat,CTA_MATRIX);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Handle is not a cta_matrix handle");
+	   return retval;
+   }
+   
+   retval=CTA_Handle_GetData((CTA_Handle) hmat, (void*) &data);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retreive handle data");
+	   return retval;
+   }
+
+   CTA_Func_GetFunc(data->functions[I_CTA_MATRIX_SETVAL],&function);
+
+   /* Call (user) implementation */
+   function(data->data,vals,&m, &n,&datatype,&retval);
+   return retval;
+};
+
+
+#undef METHOD
+#define METHOD "Ger"
+int CTA_Matrix_Ger(CTA_Matrix hmat, double alpha, CTA_Vector vx,
+                      CTA_Vector vy){
+
+   /* Local variables */
+   int retval;             /* Return value of COSTA call   */
+   CTAI_Matrix *data;      /* All data of matrix hmat      */
+   CTA_Function *function; /* Function that must be called */
+
+   /* Get pointer to implementation of this function */
+   retval=CTA_Handle_Check((CTA_Handle) hmat,CTA_MATRIX);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Handle is not a cta_matrix handle");
+	   return retval;
+   }
+   
+   retval=CTA_Handle_GetData((CTA_Handle) hmat, (void*) &data);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retreive handle data");
+	   return retval;
+   }
+
+   CTA_Func_GetFunc(data->functions[I_CTA_MATRIX_GER],&function);
+
+   /* Call (user) implementation */
+   function(data->data,&alpha,&vx,&vy,&retval);
+   return retval;
+}
+
+#undef METHOD
+#define METHOD "Inv"
+int CTA_Matrix_Inv(CTA_Matrix hmat){
+
+   /* Local variables */
+   int retval;             /* Return value of COSTA call   */
+   CTAI_Matrix *data;      /* All data of matrix hmat      */
+   CTA_Function *function; /* Function that must be called */
+
+   /* Get pointer to implementation of this function */
+   retval=CTA_Handle_Check((CTA_Handle) hmat,CTA_MATRIX);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Handle is not a cta_matrix handle");
+	   return retval;
+   }
+   
+   retval=CTA_Handle_GetData((CTA_Handle) hmat, (void*) &data);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retreive handle data");
+	   return retval;
+   }
+
+   CTA_Func_GetFunc(data->functions[I_CTA_MATRIX_INV],&function);
+
+   /* Call (user) implementation */
+   function(data->data,&retval);
+   return retval;
+}
+
+#undef METHOD
+#define METHOD "Gemv"
+int CTA_Matrix_Gemv(CTA_Matrix hmat, int trans, double alpha, CTA_Vector vx,
+                    double beta,  CTA_Vector vy){
+
+   /* Local variables */
+   int retval;             /* Return value of COSTA call   */
+   CTAI_Matrix *data;      /* All data of matrix hmat      */
+   CTA_Function *function; /* Function that must be called */
+
+   /* Get pointer to implementation of this function */
+   retval=CTA_Handle_Check((CTA_Handle) hmat,CTA_MATRIX);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Handle is not a cta_matrix handle");
+	   return retval;
+   }
+   
+   retval=CTA_Handle_GetData((CTA_Handle) hmat, (void*) &data);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retreive handle data");
+	   return retval;
+   }
+
+   CTA_Func_GetFunc(data->functions[I_CTA_MATRIX_GEMV],&function);
+
+   /* Call (user) implementation */
+   function(data->data, &trans, &alpha,&vx,&beta,&vy,&retval);
+   return retval;
+}
+
+
+#undef METHOD
+#define METHOD "Gemm"
+int CTA_Matrix_Gemm(CTA_Matrix mC, int transa, int transb, double alpha,
+                    CTA_Matrix mA, CTA_Matrix mB, double beta){
+
+   /* Local variables */
+   int retval;             /* Return value of COSTA call   */
+   CTAI_Matrix *dataA;     /* All data of matrix A         */
+   CTAI_Matrix *dataB;     /* All data of matrix B         */
+   CTAI_Matrix *dataC;     /* All data of matrix C         */
+   CTA_Function *function; /* Function that must be called */
+
+   /* Get pointer to implementation of this function */
+   retval=CTA_Handle_Check((CTA_Handle) mA,CTA_MATRIX);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Handle is not a cta_matrix handle");
+	   return retval;
+   }
+   
+   retval=CTA_Handle_Check((CTA_Handle) mB,CTA_MATRIX);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Handle is not a cta_matrix handle");
+	   return retval;
+   }
+   
+   retval=CTA_Handle_Check((CTA_Handle) mC,CTA_MATRIX);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Handle is not a cta_matrix handle");
+	   return retval;
+   }
+   
+
+   retval=CTA_Handle_GetData((CTA_Handle) mA, (void*) &dataA);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retreive handle data");
+	   return retval;
+   }
+
+   retval=CTA_Handle_GetData((CTA_Handle) mB, (void*) &dataB);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retreive handle data");
+	   return retval;
+   }
+
+   retval=CTA_Handle_GetData((CTA_Handle) mC, (void*) &dataC);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retreive handle data");
+	   return retval;
+   }
+
+   if (dataA->functions[I_CTA_MATRIX_GEMV]!=
+          dataB->functions[I_CTA_MATRIX_GEMV] ||
+       dataA->functions[I_CTA_MATRIX_GEMV]!=
+          dataC->functions[I_CTA_MATRIX_GEMV]){
+      return CTA_INCOMPATIBLE_MATRICES;
+   }
+
+   CTA_Func_GetFunc(dataC->functions[I_CTA_MATRIX_GEMM],&function);
+
+   /* Call (user) implementation */
+   function(dataC->data, &transa, &transb, &alpha,  dataA->data,
+            dataB->data, &beta, &retval);
+   return retval;
+}
+
+
+#undef METHOD
+#define METHOD "Free"
+int CTA_Matrix_Free(
+   CTA_Matrix *hmat  /* Handle of matrix  */
+   ){
+
+   /* Local variables */
+   int retval;             /* Return value of COSTA call   */
+   CTAI_Matrix *data;      /* All data of matrix hmat  */
+   CTA_Function *function; /* Function that must be called */
+
+   /* Get pointer to implementation of this function */
+   retval=CTA_Handle_Check((CTA_Handle) *hmat,CTA_MATRIX);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Handle is not a cta_matrix handle");
+	   return retval;
+   }
+
+   retval=CTA_Handle_GetData((CTA_Handle) *hmat, (void*) &data);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retreive handle data");
+	   return retval;
+   }
+
+   CTA_Func_GetFunc(data->functions[I_CTA_MATRIX_FREE],&function);
+
+   /* Call (user) implementation */
+   function(data->data,&retval);
+   free(data->data);
+   free(data);
+   retval=CTA_Handle_Free(hmat);
+
+   return retval;
+};
+
+#undef METHOD
+#define METHOD "Axpy"
+int CTA_Matrix_Axpy(CTA_Matrix mY, double alpha, CTA_Matrix mX){
+
+   /* Local variables */
+   int retval;             /* Return value of COSTA call   */
+   CTAI_Matrix *dataX;     /* All data of matrix A         */
+   CTAI_Matrix *dataY;     /* All data of matrix B         */
+   CTA_Function *function; /* Function that must be called */
+
+   /* Get pointer to implementation of this function */
+   retval=CTA_Handle_Check((CTA_Handle) mY,CTA_MATRIX);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Handle is not a cta_matrix handle");
+	   return retval;
+   }
+
+   retval=CTA_Handle_Check((CTA_Handle) mX,CTA_MATRIX);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Handle is not a cta_matrix handle");
+	   return retval;
+   }
+
+   retval=CTA_Handle_GetData((CTA_Handle) mY, (void*) &dataY);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retreive handle data");
+	   return retval;
+   }
+
+   retval=CTA_Handle_GetData((CTA_Handle) mX, (void*) &dataX);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retreive handle data");
+	   return retval;
+   }
+
+   if (dataY->functions[I_CTA_MATRIX_AXPY]!=
+          dataX->functions[I_CTA_MATRIX_AXPY]){
+      return CTA_INCOMPATIBLE_MATRICES;
+   }
+
+   CTA_Func_GetFunc(dataY->functions[I_CTA_MATRIX_AXPY],&function);
+
+   /* Call (user) implementation */
+   function(dataY->data, &alpha, dataX->data, &retval);
+   return retval;
+}
+
+
+
+int CTA_Matrix_EigVals(CTA_Matrix hmat, CTA_Vector eigvals,
+                       CTA_Matrix Reigvecs){
+
+char leigv, reigv;
+
+int nrows, ncols, neig, n;
+void *A, *VL, *VR, *eigR, *eigI, *work;
+CTA_Datatype datatype;
+int size, lwork, info;
+CTA_Matrix Leigvecs=CTA_NULL;
+double dlength;
+float  slength;
+
+   /* get dimensions and check them */
+   CTA_Matrix_GetSize(hmat, &nrows, &ncols);
+   if (nrows!=ncols) {
+      return CTA_MATRIX_IS_NOT_SQUARE;
+   }
+   n=ncols;
+   CTA_Matrix_GetDatatype(hmat,&datatype);
+   CTA_SizeOf(datatype,&size);
+
+   /* Check vector of eigen values */
+   CTA_Vector_GetSize(eigvals,&neig);
+   if (neig!=n){
+      return CTA_DIMENSION_ERROR;
+   }
+
+   /* Check matrix of left eigenvectors */
+   leigv='N';
+   if (Leigvecs!=CTA_NULL){
+      leigv='V';
+      CTA_Matrix_GetSize(Leigvecs, &nrows, &ncols);
+      if (ncols!=n || nrows!=n) {
+         return CTA_DIMENSION_ERROR;
+      }
+   }
+
+   /* Check matrix of right eigenvectors */
+   reigv='N';
+   if (Reigvecs!=CTA_NULL){
+      reigv='V';
+      CTA_Matrix_GetSize(Reigvecs, &nrows, &ncols);
+      if (ncols!=n || nrows!=n) {
+         return CTA_DIMENSION_ERROR;
+      }
+   }
+
+   A=CTA_Malloc(size*n*n);
+   VL=CTA_Malloc(size*n*n);
+   VR=CTA_Malloc(size*n*n);
+   CTA_Matrix_GetVals(hmat, A, n, n, datatype);
+
+   eigR=CTA_Malloc(size*n);
+   eigI=CTA_Malloc(size*n);
+
+   switch (datatype){
+   case CTA_REAL :
+   /* Determine amount of work memory */
+   lwork=-1;
+   SGEEV_F77( &leigv, &reigv, &n, A, &n, eigR, eigI, VL, &n, VR, &n, &slength,
+              &lwork, &info);
+
+   /* Allocate work memory and start compute */
+   lwork=(int) slength;
+   work=CTA_Malloc(size*lwork);
+
+   SGEEV_F77( &leigv, &reigv, &n, A, &n, eigR, eigI, VL, &n, VR, &n, work,
+              &lwork, &info);
+   break;
+   case CTA_DOUBLE :
+      lwork=-1;
+      DGEEV_F77( &leigv, &reigv, &n, A, &n, eigR, eigI, VL, &n, VR, &n,
+                 &dlength, &lwork, &info);
+
+      /* Allocate work memory and start compute */
+      lwork=(int) dlength;
+      work=CTA_Malloc(size*lwork);
+
+      DGEEV_F77( &leigv, &reigv, &n, A, &n, eigR, eigI, VL, &n, VR, &n, work,
+                 &lwork, &info);
+
+   break;
+   }
+
+   /* Set left eigenvalues */
+   if (Leigvecs!=CTA_NULL){
+      CTA_Matrix_SetVals(Leigvecs, VL, n, n, datatype);
+   }
+
+   /* Set right eigenvalues */
+   if (Reigvecs!=CTA_NULL){
+      CTA_Matrix_SetVals(Reigvecs, VR, n, n, datatype);
+   }
+
+   /* Set real-part eigenvalues */
+   CTA_Vector_SetVals(eigvals, eigR, n, datatype);
+
+   free(A);
+   free(VL);
+   free(VR);
+   free(eigR);
+   free(eigI);
+   free(work);
+   return CTA_OK;
+}
+
+/* Interfacing with Fortran */
+
+CTAEXPORT void CTA_MATRIX_DEFINECLASS_F77(char *name, int *h_func,int *hmatcl, int *ierr,
+                           int len_name){
+
+   char  *c_name;
+   // create a c-string equivalent to name
+   c_name=CTA_Malloc((len_name+1)*sizeof(char));
+   CTA_fstr2cstr(name,c_name,len_name);
+   *ierr=CTA_Matrix_DefineClass(name, (CTA_Func*) h_func,
+                                (CTA_MatClass*) hmatcl);
+  free(c_name);
+}
+
+CTAEXPORT void CTA_MATRIX_DUPLICATE_F77(int *hmatrix1, int *hmatrix2, int *ierr){
+   *ierr=CTA_Matrix_Duplicate((CTA_Matrix) *hmatrix1, (CTA_Matrix*) hmatrix2);
+}
+
+CTAEXPORT void CTA_MATRIX_CREATE_F77(int *hmatcl, int *m, int *n, int *datatype,
+                         int *userdata, int *hmatrix, int *ierr){
+   *ierr=CTA_Matrix_Create((CTA_MatClass) *hmatcl, *m, *n,
+                      (CTA_Datatype) *datatype, (CTA_Handle) *userdata, (CTA_Matrix*) hmatrix);
+}
+
+CTAEXPORT void CTA_MATRIX_GETSIZE_F77(int *hmat, int *m, int *n, int *ierr){
+   *ierr=CTA_Matrix_GetSize((CTA_Matrix) *hmat, m, n);
+}
+
+CTAEXPORT void CTA_MATRIX_GETDATATYPE_F77(int *hmat, int *datatype ,int *ierr){
+   *ierr=CTA_Matrix_GetDatatype((CTA_Matrix) *hmat, (CTA_Datatype*) datatype);
+}
+
+CTAEXPORT void CTA_MATRIX_GETVALS_F77(int *hmat, void *vals, int  *m, int  *n,
+                          int *datatype, int *ierr){
+   *ierr=CTA_Matrix_GetVals((CTA_Matrix) *hmat, vals, *m, *n,
+                            (CTA_Datatype) *datatype);
+}
+
+CTAEXPORT void CTA_MATRIX_GETVAL_F77(int *hmat, void *vals, int  *m, int  *n,
+                          int *datatype, int *ierr){
+   *ierr=CTA_Matrix_GetVal((CTA_Matrix) *hmat, vals, *m, *n,
+                            (CTA_Datatype) *datatype);
+}
+
+
+CTAEXPORT void CTA_MATRIX_SETCONSTANT_F77(int *hmat, void *val, int *datatype, int *ierr){
+   *ierr=CTA_Matrix_SetConstant((CTA_Matrix) *hmat, val,
+                                (CTA_Datatype) *datatype);
+}
+
+
+CTAEXPORT void CTA_MATRIX_SETCOL_F77(int *hmat, int *n, int *hvec, int *ierr){
+   *ierr=CTA_Matrix_SetCol((CTA_Matrix) *hmat, *n, (CTA_Vector) *hvec);
+}
+
+CTAEXPORT void CTA_MATRIX_SETVALS_F77(int *hmat, void *vals, int  *m, int *n,
+                       int *datatype, int *ierr){
+   *ierr=CTA_Matrix_SetVals((CTA_Matrix) *hmat, vals, *m, *n,
+                            (CTA_Datatype) *datatype);
+}
+
+CTAEXPORT void CTA_MATRIX_SETVAL_F77(int *hmat, void *val, int  *m, int  *n,
+                       int *datatype, int *ierr){
+   *ierr=CTA_Matrix_SetVal((CTA_Matrix) *hmat, val, *m, *n,
+                       (CTA_Datatype) *datatype);
+}
+
+CTAEXPORT void CTA_MATRIX_EXPORT_F77(int *hmat, int *usedoc, int *ierr){
+   *ierr=CTA_Matrix_Export((CTA_Matrix) *hmat, (CTA_Handle) *usedoc);
+
+}
+
+CTAEXPORT void CTA_MATRIX_GER_F77(int *hmat, double *alpha, int *vx, int *vy, int *ierr){
+   *ierr=CTA_Matrix_Ger((CTA_Matrix) *hmat, (double) *alpha, (CTA_Vector) *vx,
+                        (CTA_Vector) *vy);
+}
+
+CTAEXPORT void CTA_MATRIX_INV_F77(int *hmat, int *ierr){
+   *ierr=CTA_Matrix_Inv((CTA_Matrix) *hmat);
+}
+
+CTAEXPORT void CTA_MATRIX_GEMV_F77(int *hmat, int *trans, double *alpha, int *vx, double *beta, int *vy, int *ierr){
+   *ierr=CTA_Matrix_Gemv((CTA_Matrix) *hmat, *trans, (double) *alpha, (CTA_Vector) *vx,
+                        (double) *beta, (CTA_Vector) *vy);
+}
+
+CTAEXPORT void CTA_MATRIX_GEMM_F77(int *mC, int *transa, int *transb, double *alpha,
+                    int *mA, int *mB, double *beta, int *ierr){
+   *ierr=CTA_Matrix_Gemm((CTA_Matrix) *mC, *transa, *transb, (double) *alpha,
+                    (CTA_Matrix) *mA, (CTA_Matrix) *mB, (double) *beta);
+}
+
+
+
+CTAEXPORT void CTA_MATRIX_AXPY_F77(int *mY, double *alpha, int *mX, int *ierr){
+   *ierr=CTA_Matrix_Axpy((CTA_Matrix) *mY, *alpha, (CTA_Matrix) *mX);
+}
+
+
+CTAEXPORT void CTA_MATRIX_EIGVALS_F77(int *hmat, int *eigvals, int
+*eigvecs, int *ierr){
+   *ierr=CTA_Matrix_EigVals((CTA_Matrix) *hmat, (CTA_Vector) *eigvals, (CTA_Matrix) *eigvecs );
+}
+
+
+
+
+CTAEXPORT void CTA_MATRIX_FREE_F77(int *hmat, int *ierr){
+   *ierr=CTA_Matrix_Free((CTA_Matrix*) hmat);
+}
+
+
diff --git a/costa/native/cta/src/cta_matrix_blas.c b/costa/native/cta/src/cta_matrix_blas.c
new file mode 100644
index 000000000..d06b74ad6
--- /dev/null
+++ b/costa/native/cta/src/cta_matrix_blas.c
@@ -0,0 +1,805 @@
+/*
+$URL: https://repos.deltares.nl/repos/openda/public/trunk/core/native/src/cta/cta_matrix_blas.c $
+$Revision: 3361 $, $Date: 2012-07-04 16:52:30 +0200 (Wed, 04 Jul 2012) $
+
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+#include <stdio.h>
+#include "cta_mem.h"
+#include "cta_system.h"
+#include "cta_flush.h"
+#include "cta_matrix_blas.h"
+#include "cta_errors.h"
+#include "cta_file.h"
+#include "ctai.h"
+#include "cta_message.h"
+
+#define CLASSNAME "CTA_Matrix_blas"
+
+
+void CTA_Matrix_blas_initialise(CTA_MatClass *hmatcl)
+{
+   CTA_Intf hintf=0;
+   CTA_Func h_func[CTA_MATRIX_NUMFUNC];
+
+   CTA_Func_Create(" ",&CTAI_Matrix_Create_Size, hintf,&h_func[CTA_MATRIX_CREATE_SIZE]);
+   CTA_Func_Create(" ",&CTAI_Matrix_Create_Init, hintf,&h_func[CTA_MATRIX_CREATE_INIT]);
+   CTA_Func_Create(" ",&CTAI_Matrix_getvals,     hintf,&h_func[I_CTA_MATRIX_GETVALS]    );
+   CTA_Func_Create(" ",&CTAI_Matrix_getval,      hintf,&h_func[I_CTA_MATRIX_GETVAL]     );
+   CTA_Func_Create(" ",&CTAI_Matrix_setcol,      hintf,&h_func[I_CTA_MATRIX_SETCOL]    );
+   CTA_Func_Create(" ",&CTAI_Matrix_setvals,     hintf,&h_func[I_CTA_MATRIX_SETVALS]    );
+   CTA_Func_Create(" ",&CTAI_Matrix_setval,      hintf,&h_func[I_CTA_MATRIX_SETVAL]     );
+   CTA_Func_Create(" ",&CTAI_Matrix_setconst,    hintf,&h_func[I_CTA_MATRIX_SETCONST]   );
+   CTA_Func_Create(" ",&CTAI_Matrix_Export,      hintf,&h_func[I_CTA_MATRIX_EXPORT]     );
+   CTA_Func_Create(" ",&CTAI_Matrix_Ger,         hintf,&h_func[I_CTA_MATRIX_GER]        );
+   CTA_Func_Create(" ",&CTAI_Matrix_Inv,         hintf,&h_func[I_CTA_MATRIX_INV]        );
+   CTA_Func_Create(" ",&CTAI_Matrix_Gemv,        hintf,&h_func[I_CTA_MATRIX_GEMV]        );
+   CTA_Func_Create(" ",&CTAI_Matrix_Gemm,        hintf,&h_func[I_CTA_MATRIX_GEMM]        );
+   CTA_Func_Create(" ",&CTAI_Matrix_Axpy,        hintf,&h_func[I_CTA_MATRIX_AXPY]        );
+   CTA_Func_Create(" ",&CTAI_Matrix_free,        hintf,&h_func[I_CTA_MATRIX_FREE]       );
+
+   CTA_Matrix_DefineClass("cta_matrix_blas",h_func,hmatcl);
+}
+
+void CTAI_Matrix_Create_Size(
+   int *m,
+   int *n,
+   CTA_Datatype *datatype,
+   CTA_Handle userdata,
+   int *retval,
+   int *memsize
+   ){
+
+   *memsize=(int) sizeof(CTAI_Matrix_blas);
+   *retval=CTA_OK;
+};
+
+void CTAI_Matrix_Create_Init(
+   CTAI_Matrix_blas *x,
+   int *m,
+   int *n,
+   CTA_Datatype *datatype,
+   CTA_Handle userdata,
+   int *retval
+   ){
+   if (*datatype==CTA_REAL){
+      x->values=CTA_Malloc(sizeof(float)*(*m)*(*n));
+   }
+   else if (*datatype==CTA_DOUBLE){
+      x->values=CTA_Malloc(sizeof(double)*(*m)*(*n));
+   }
+   else if (*datatype==CTA_INTEGER){
+      x->values=CTA_Malloc(sizeof(int)*(*m)*(*n));
+   }
+   else {
+     *retval=CTA_ILLEGAL_DATATYPE;
+   }
+   x->datatype=*datatype;
+   x->n=*n;
+   x->m=*m;
+   x->inverse=FALSE;
+   x->perm=NULL;
+};
+
+
+void CTAI_Matrix_construct_inverse(CTAI_Matrix_blas *A)
+{
+   void *lu;
+   double *dA;
+   int i, length, info;
+   int one=1;
+   char notrans='N';
+
+   length=A->n*A->m;
+   if (A->datatype==CTA_REAL){
+      float  *sA;
+      lu=CTA_Malloc(sizeof(float)*length);
+      SCOPY_F77(&length,A->values,&one,(float *) lu,&one);
+      // Create identity matrix
+      sA=A->values;
+      for (i=0;i<length; i++){ sA[i]=0.0;}
+      for (i=0;i<length; i=i+A->n+1){ sA[i]=1.0;}
+      // Compute inverse
+      SGETRS_F77(&notrans,&(A->n), &A->n, lu, &(A->n), A->perm, A->values,
+      &(A->n), &info,1);
+      free(lu);
+      
+   }
+   else if (A->datatype==CTA_DOUBLE){
+      lu=CTA_Malloc(sizeof(double)*length);
+      DCOPY_F77(&length,A->values,&one,(double *) lu,&one);
+      // Create identity matrix
+      dA=A->values;
+      for (i=0;i<length; i++){ dA[i]=0.0;}
+      for (i=0;i<length; i=i+A->n+1){ dA[i]=1.0;}
+      // Compute inverse
+      DGETRS_F77(&notrans,&(A->n), &A->n, lu, &(A->n), A->perm, A->values,
+      &(A->n), &info,1);
+      free(lu);
+   }
+   else {
+      printf("INTERNAL ERROR IN CTAI_Matrix_construct_inverse\n");
+      exit(-1);
+   }
+   A->inverse=FALSE;
+
+}
+
+
+void CTAI_Matrix_getvals(
+   CTAI_Matrix_blas *A,
+   void *vals,
+   int *m,
+   int *n,
+   CTA_Datatype *datatype,
+   int *retval
+   ){
+   /* Local variables */
+   int one=1; 
+   int length;
+
+   /* check dimensions */
+   if (*m!=A->m){
+      *retval=CTA_DIMENSION_ERROR;
+      return;
+   }
+   if (*n!=A->n){
+      *retval=CTA_DIMENSION_ERROR;
+      return;
+   }
+
+   /* check data types */
+   if (*datatype!=A->datatype){
+      *retval=CTA_INCOMPATIBLE_MATRICES;
+      return;
+   }
+
+   if (A->inverse) {
+      /* OK we are now going to really going to invert the matrix */
+      CTAI_Matrix_construct_inverse(A);
+   }
+   
+   /* copy values using BLAS copy */
+   length=(*n)*(*m);
+   if (*datatype==CTA_REAL || *datatype == CTA_INTEGER){
+      SCOPY_F77(&length,A->values,&one,vals,&one);
+   }
+   else {
+      DCOPY_F77(&length,A->values,&one,vals,&one);
+   }   
+   *retval=CTA_OK;
+};
+
+void CTAI_Matrix_getval(
+   CTAI_Matrix_blas *x,
+   void *val,
+   int *m,
+   int *n,
+   CTA_Datatype *datatype,
+   int *retval
+   ){
+   /* Local variables */
+
+   if (x->inverse)  {
+      /* OK we are now going to really going to invert the matrix */
+      CTAI_Matrix_construct_inverse(x);
+   }
+   /* check dimensions */
+   if (*m>x->m || *m<1){
+      *retval=CTA_DIMENSION_ERROR;
+      return;
+   }
+   if (*n>x->n || *n<1){
+      *retval=CTA_DIMENSION_ERROR;
+      return;
+   }
+
+   /* check data types */
+   if (*datatype!=x->datatype){
+      *retval=CTA_INCOMPATIBLE_MATRICES;
+      return;
+   }
+
+   if (*datatype==CTA_REAL)
+   {
+      float * values = (float *) x->values;
+      float * value = (float *) val;
+      *value = values[ (*m-1) + (x->m) * (*n-1) ];
+   }
+   else if (*datatype==CTA_INTEGER)
+   {
+      int * values = (int *) x->values;
+      int * value = (int *) val;
+      *value = values[ (*m-1) + (*n-1) * (x->m) ];
+   }   
+   else if (*datatype==CTA_DOUBLE)
+   {
+      double * values = (double *) x->values;
+      double * value = (double *) val;
+      *value = values[ (*m-1) + (x->m) * (*n-1) ];
+   }
+   else
+   {
+     *retval = CTA_ILLEGAL_DATATYPE;
+     return;
+   } 
+   *retval=CTA_OK;
+};
+
+
+
+void CTAI_Matrix_setcol(
+   CTAI_Matrix_blas *x,
+   int *n,
+   CTA_Vector *hvec,
+   int *retval
+  ){
+   /* Local variables */
+   CTA_Datatype datatype;
+   int length;
+   void * values;
+
+   if (x->inverse){
+      /* OK we are now going to really going to invert the matrix */
+      CTAI_Matrix_construct_inverse(x);
+   }
+   /* Check datatype: must be same as matrix' */
+   *retval = CTA_Vector_GetDatatype( *hvec, &datatype);
+   if (*retval != CTA_OK) return;
+
+   if (datatype!=x->datatype)
+   {
+      *retval = CTA_ILLEGAL_DATATYPE;
+      return;
+   }
+
+   /* Check the dimensions */
+   *retval = CTA_Vector_GetSize(*hvec, &length);
+   if (*retval != CTA_OK) return;
+   if (length != x->m) 
+   {
+      *retval = CTA_DIMENSION_ERROR; return;
+   }
+
+   /* Find the location where the values will be stored */
+   if (datatype==CTA_REAL)
+   {
+      float * xvalues = (float*) x->values;
+      values = &xvalues[ (x->m) * (*n-1)];
+   }
+   else if (datatype==CTA_INTEGER)
+   {
+      int * xvalues = (int *) x->values;
+      values = &xvalues[ (x->m) * (*n-1)];
+   }
+   else if (datatype == CTA_DOUBLE)
+   {
+      double * xvalues = (double*) x->values;
+      values = &xvalues[ (x->m) * (*n-1)];
+   }
+   else
+   {
+      *retval = CTA_ILLEGAL_DATATYPE;
+      return;
+   }
+
+   *retval =  CTA_Vector_GetVals( *hvec, values, length, datatype);
+   if (*retval != CTA_OK) return;
+
+   *retval=CTA_OK;
+};
+
+
+
+void CTAI_Matrix_setvals(
+   CTAI_Matrix_blas *x,
+   void *vals,
+   int *m,
+   int *n,
+   CTA_Datatype *datatype,
+   int *retval
+  ){
+   /* Local variables */
+   int one=1; 
+   int length;
+
+   if (x->inverse){
+      /* OK we are now going to really going to invert the matrix */
+      CTAI_Matrix_construct_inverse(x);
+   }
+   /* check dimensions */
+   if (*m!=x->m){
+      *retval=CTA_DIMENSION_ERROR;
+      return;
+   }   if (*n!=x->n){
+      *retval=CTA_DIMENSION_ERROR;
+      return;
+   }
+
+   /* check data types */
+   if (*datatype!=x->datatype){
+      *retval=CTA_INCOMPATIBLE_MATRICES;
+      return;
+   }
+
+   length=(*n)*(*m);
+   /* copy values using BLAS copy */
+   if (*datatype==CTA_REAL || *datatype==CTA_INTEGER){
+      SCOPY_F77(&length,vals,&one,x->values,&one);
+   }
+   else {
+      DCOPY_F77(&length,vals,&one,x->values,&one);
+   }   
+   *retval=CTA_OK;
+};
+
+
+
+void CTAI_Matrix_setval(
+   CTAI_Matrix_blas *x,
+   void *val,
+   int *m,
+   int *n,
+   CTA_Datatype *datatype,
+   int *retval
+  ){
+
+   if (x->inverse){
+      /* OK we are now going to really going to invert the matrix */
+      CTAI_Matrix_construct_inverse(x);
+   }
+   /* check dimensions */
+   if ( *m > x->m || *m<1){
+      *retval=CTA_DIMENSION_ERROR;
+      return;
+   }   if (*n > x->n || *n<1){
+      *retval=CTA_DIMENSION_ERROR;
+      return;
+   }
+
+   /* check data types */
+   if (*datatype!=x->datatype){
+      *retval=CTA_INCOMPATIBLE_MATRICES;
+      return;
+   }
+
+   /* copy values using BLAS copy */
+   if (*datatype==CTA_REAL)
+   {
+      float * values= (float*) x->values;
+      float * value = (float*) val;
+      values[ (*m-1) + (*n-1) * (x->m) ] = *value;
+   }
+   else if (*datatype==CTA_DOUBLE)
+   {
+      double * values= (double*) x->values;
+      double * value = (double*) val;
+      values[ (*m-1) + (*n-1)* (x->m) ] = *value;
+   }   
+   else if (*datatype==CTA_INTEGER)
+   {
+      int * values= (int*) x->values;
+      int * value = (int*) val;
+      values[ (*m-1) + (*n-1)* (x->m) ] = *value;
+   }   
+   *retval=CTA_OK;
+};
+
+void CTAI_Matrix_setconst(
+   CTAI_Matrix_blas *x,
+   void *val,
+   CTA_Datatype *datatype,
+   int *retval
+  ){
+   /* Local variables */
+   int length;
+   int i;
+
+   if (x->inverse) {
+      /* OK we are now going to really going to invert the matrix */
+      CTAI_Matrix_construct_inverse(x);
+   }
+   /* check data types */
+   if (*datatype!=x->datatype){
+      *retval=CTA_INCOMPATIBLE_MATRICES;
+      return;
+   }
+
+   /* set values */
+   length=x->m*x->n;
+   if (*datatype==CTA_REAL)
+   {
+      float *value = (float *) val;
+      float *values= (float *) x->values;
+      for (i=0;i<length;i++){
+         values[i]=*value;
+      }
+   }
+   else if (*datatype==CTA_DOUBLE)
+   {
+      double *value = (double *) val;
+      double *values= (double *) x->values;
+      for (i=0;i<length;i++){
+         values[i]=*value;
+      }
+   }   
+   else if (*datatype==CTA_INTEGER)
+   {
+      int *value = (int *) val;
+      int *values= (int *) x->values;
+      for (i=0;i<length;i++){
+         values[i]=*value;
+      }
+   }   
+   *retval=CTA_OK;
+};
+
+
+
+void CTAI_Matrix_Export(
+         CTAI_Matrix_blas *x,
+         CTA_Handle userdata,
+         int *retval)
+{
+   // The user-data must be 1 item: the output file. Look it up.
+   FILE *file;
+
+   if (x->inverse){
+      /* OK we are now going to really going to invert the matrix */
+      CTAI_Matrix_construct_inverse(x);
+   }
+
+   if (CTA_Handle_Check(userdata,CTA_FILE)==CTA_OK) {
+      int i0;
+      *retval = CTA_File_Get(userdata,&file);
+
+      if (CTA_FLUSH_ALWAYS) CTA_Flush();
+
+      for (i0=0; i0<x->n; i0+=5)
+      {
+         int j;
+         fprintf(file,"matrix(:,%d:%d)=[",i0+1,MIN(x->n,i0+5));
+         for (j=0; j<x->m; j++)
+         {
+            int i;
+            fprintf(file,"\n    ");
+            for (i=i0; i<MIN(x->n,i0+5);i++)
+            {
+               if (x->datatype == CTA_DOUBLE)
+               {
+                  double *values = (double*) x->values;
+                  //fprintf(file," %10.3lg",values[j+x->m*i]);
+                  fprintf(file," %18.8le",values[j+x->m*i]);
+               }
+               else if (x->datatype == CTA_REAL)
+               {
+                  float *values = (float *) x->values;
+                  fprintf(file," %10.3g",values[j+x->m*i]);
+               }
+               else if (x->datatype == CTA_INTEGER)
+               {
+                  int *values = (int *) x->values;
+                  fprintf(file," %10d",values[j+x->m*i]);
+               }
+            }
+         }
+         fprintf(file,"];\n\n");
+         if (CTA_FLUSH_ALWAYS) CTA_Flush();
+      }
+   } else {
+      *retval=CTA_FORMAT_NOT_SUPPORTED;
+      return;
+   }
+
+   *retval = CTA_OK;
+}
+
+void CTAI_Matrix_Ger(
+         CTAI_Matrix_blas *A,
+         double *alpha,
+         CTA_Vector *vx,
+         CTA_Vector *vy,
+         int *retval)
+{
+   void *x, *y;
+   int nx, ny;
+   int sizeoftyp;
+   int one;
+   float salpha;
+   one=1;
+
+   if (A->inverse){
+      /* OK we are now going to really going to invert the matrix */
+      CTAI_Matrix_construct_inverse(A);
+   }
+   *retval=CTA_SizeOf(A->datatype, &sizeoftyp);
+   if (*retval!=CTA_OK) return;
+   *retval=CTA_Vector_GetSize(*vx,&nx);
+   if (*retval!=CTA_OK) return;
+   *retval=CTA_Vector_GetSize(*vy,&ny);
+   if (*retval!=CTA_OK) return;
+   x=CTA_Malloc(sizeoftyp*nx);
+   y=CTA_Malloc(sizeoftyp*ny);
+   *retval=CTA_Vector_GetVals(*vx,x,nx,A->datatype);
+   if (*retval!=CTA_OK) return;
+   *retval=CTA_Vector_GetVals(*vy,y,ny,A->datatype);
+   if (*retval!=CTA_OK) return;
+   if (A->datatype==CTA_REAL){
+      salpha=(float) *alpha;
+      SGER_F77(&(A->m),&(A->n),&salpha,x,&one,y,&one,A->values,&(A->m));
+      *retval=CTA_OK;
+   }
+   else if (A->datatype==CTA_DOUBLE){
+      DGER_F77(&(A->m),&(A->n),alpha,x,&one,y,&one,A->values,&(A->m));
+      *retval=CTA_OK;
+   }
+   else {
+      *retval=CTA_ILLEGAL_DATATYPE;
+   }
+   free(x);
+   free(y);
+
+   
+   return;      
+}
+
+void CTAI_Matrix_Gemv(
+         CTAI_Matrix_blas *A,
+         int *trans,
+         double *alpha,
+         CTA_Vector *vx,
+         double *beta,
+         CTA_Vector *vy,
+         int *retval)
+{
+   void *x, *y;
+   int nx, ny, ma, na;
+   int sizeoftyp;
+   int one;
+   float salpha;
+   float sbeta;
+   char ctrans;
+   one=1;
+
+   if (A->inverse)   {
+      /* OK we are now going to really going to invert the matrix */
+      CTAI_Matrix_construct_inverse(A);
+   }
+
+   if (*trans) {ctrans='T'; ma=A->n; na=A->m;}
+   else        {ctrans='N'; ma=A->m; na=A->n;}
+
+   *retval=CTA_SizeOf(A->datatype, &sizeoftyp);
+   if (*retval!=CTA_OK) return;
+   *retval=CTA_Vector_GetSize(*vx,&nx);
+   if (*retval!=CTA_OK) return;
+   *retval=CTA_Vector_GetSize(*vy,&ny);
+   if (*retval!=CTA_OK) return;
+
+   /* check dimensions */
+   if (ny!=ma || nx!=na){
+      *retval=CTA_DIMENSION_ERROR;
+   }
+
+   x=CTA_Malloc(sizeoftyp*nx);
+   y=CTA_Malloc(sizeoftyp*ny);
+   *retval=CTA_Vector_GetVals(*vx,x,nx,A->datatype);
+   if (*retval!=CTA_OK) return;
+   *retval=CTA_Vector_GetVals(*vy,y,ny,A->datatype);
+   if (*retval!=CTA_OK) return;
+   if (A->datatype==CTA_REAL){
+      salpha=(float) *alpha;
+      sbeta=(float)  *beta;
+      SGEMV_F77(&ctrans,&(A->m),&(A->n),&salpha,A->values,&(A->m),x,&one,&sbeta,y,&one,1);
+      *retval=CTA_OK;
+   }
+   else if (A->datatype==CTA_DOUBLE){
+      DGEMV_F77(&ctrans,&(A->m),&(A->n),alpha,A->values,&(A->m),x,&one,beta,y,&one,1);
+      *retval=CTA_OK;
+   }
+   else {
+      *retval=CTA_ILLEGAL_DATATYPE;
+   }
+   *retval=CTA_Vector_SetVals(*vy,y,ny,A->datatype);
+   if (*retval!=CTA_OK) return;
+   
+   free(x);
+   free(y);
+
+   
+   return;      
+}
+
+
+void CTAI_Matrix_Gemm(
+         CTAI_Matrix_blas *C,
+         int *transa,
+         int *transb,
+         double *alpha,
+         CTAI_Matrix_blas *A,
+         CTAI_Matrix_blas *B,
+         double *beta,
+         int *retval)
+{
+   int ma, mb, mc, na, nb, nc;
+   float salpha;
+   float sbeta;
+   char ctransa;
+   char ctransb;
+
+   // check dimensions
+   mc=C->m;
+   nc=C->n;
+   if (*transa) {ctransa='T'; ma=A->n; na=A->m;}
+   else        {ctransa='N'; ma=A->m; na=A->n;}
+   if (*transb) {ctransb='T'; mb=B->n; nb=B->m;}
+   else        {ctransb='N'; mb=B->m; nb=B->n;}
+
+   if (ma!=mc || nb!=mc || na!=mb) {
+      *retval=CTA_DIMENSION_ERROR;
+   }
+
+   if (A->datatype != B->datatype || A->datatype != C->datatype){
+      *retval=CTA_NOT_YET_SUPPORTED; return;
+   }
+
+   if (A->inverse)   {
+      /* OK we are now going to really going to invert the matrix */
+      CTAI_Matrix_construct_inverse(A);
+   }
+   if (B->inverse)   {
+      /* OK we are now going to really going to invert the matrix */
+      CTAI_Matrix_construct_inverse(B);
+   }
+   if (C->inverse)   {
+      /* OK we are now going to really going to invert the matrix */
+      CTAI_Matrix_construct_inverse(C);
+   }
+   
+   if (C->datatype==CTA_REAL){
+      salpha=(float) *alpha;
+      sbeta=(float)  *beta;
+      SGEMM_F77(&ctransa,&ctransb,&mc,&nc,&na,&salpha,A->values,&(A->m),B->values,&(B->m),
+               &sbeta,C->values,&(C->m),1,1);
+      *retval=CTA_OK;
+   }
+   else if (C->datatype==CTA_DOUBLE){
+      DGEMM_F77(&ctransa,&ctransb,&mc,&nc,&na,alpha,A->values,&(A->m),B->values,&(B->m),
+               beta,C->values,&(C->m),1,1);
+      *retval=CTA_OK;
+   }
+   else {
+      *retval=CTA_ILLEGAL_DATATYPE;
+   }
+   return;      
+}
+
+
+
+
+
+
+
+
+
+
+void CTAI_Matrix_Inv(
+   CTAI_Matrix_blas *A,
+   int *retval
+   )
+{
+   int nul=0;
+   double *ddum;
+   float  *sdum;
+   int    info;
+
+   info = 0;
+   if (A->inverse)   {
+      /* OK we are now going to really going to invert the matrix */
+      CTAI_Matrix_construct_inverse(A);
+   }
+   if (A->m != A->m) {*retval=CTA_DIMENSION_ERROR; return;}
+ 
+   // Create permutation array
+   if (!A->perm) A->perm=CTA_Malloc(sizeof(int)*A->n); 
+
+   // Compute LU-decomposition of A
+   if (A->datatype==CTA_REAL){
+	  sdum=NULL;
+      SGESV_F77(&(A->n), &nul, A->values, &(A->n), A->perm, sdum, &(A->n), &info);
+   }
+   else if (A->datatype==CTA_DOUBLE){
+	  ddum=NULL;
+      DGESV_F77(&(A->n), &nul, A->values, &(A->n), A->perm, ddum, &(A->n), &info);
+   }
+   else {
+      *retval=CTA_ILLEGAL_DATATYPE;
+   }
+   A->inverse=TRUE;
+
+   if (info>0) {
+      *retval=CTA_SINGULAR_MATRIX;
+   }
+   else if (info<0){
+      *retval=CTA_INTERNAL_ERROR;
+   } 
+   else {
+      *retval=CTA_OK;
+   }
+};
+
+void CTAI_Matrix_Axpy(
+   CTAI_Matrix_blas *y,
+   double *alpha,
+   CTAI_Matrix_blas *x,
+   int *retval
+  ){
+   /* Local variables */
+   int one=1; 
+   int length;
+   float salpha;
+
+   if (y->inverse)   {
+      CTAI_Matrix_construct_inverse(y);
+   }
+   if (x->inverse)   {
+      CTAI_Matrix_construct_inverse(x);
+   }
+   /* check dimensions */
+   if (x->m!=y->m){
+      *retval=CTA_DIMENSION_ERROR;
+      return;
+   }
+   if (x->n!=y->n){
+      *retval=CTA_DIMENSION_ERROR;
+      return;
+   }
+   
+   /* check data types */
+   if (y->datatype!=x->datatype){
+      *retval=CTA_INCOMPATIBLE_MATRICES;
+      return;
+   }
+
+   length=(y->n)*(y->m);
+   /* copy values using BLAS copy */
+   if (y->datatype==CTA_REAL || y->datatype==CTA_INTEGER){
+      salpha=(float) *alpha;
+      SAXPY_F77(&length,&salpha,x->values,&one,y->values,&one);
+   }
+   else {
+      DAXPY_F77(&length,alpha,x->values,&one,y->values,&one);
+   }   
+   *retval=CTA_OK;
+};
+
+
+void CTAI_Matrix_free(
+   CTAI_Matrix_blas *x,
+   int *retval
+   )
+{
+   free(x->values);
+   if (x->perm); free(x->perm);
+   *retval=CTA_OK;
+};
+
+
+
+
+
diff --git a/costa/native/cta/src/cta_mem.c b/costa/native/cta/src/cta_mem.c
new file mode 100644
index 000000000..c7ea295ea
--- /dev/null
+++ b/costa/native/cta/src/cta_mem.c
@@ -0,0 +1,59 @@
+/*
+$URL: https://repos.deltares.nl/repos/openda/openda_1/public/trunk/core/native/src/cta/cta_handles.c $
+$Revision: 2738 $, $Date: 2011-09-05 10:48:32 +0200 (Mon, 05 Sep 2011) $
+
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2011  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+#define DEBUG  0
+#include <stdlib.h>
+#include <stdio.h>
+#include "cta_mem.h"
+
+
+
+void CTA_Free(void *ptr){
+   free(ptr);
+}
+
+void * CTA_Malloc(size_t size){
+   void *ptr = malloc(size);
+#ifdef DEBUG
+   if (! ptr){
+      printf("CTA_Malloc: Warning: malloc returns NULL. (size=%zd)\n",size);
+   }
+   else {
+//      memset(ptr,'@', size);  
+   }
+#endif
+  return ptr;
+
+}
+
+void * CTA_Realloc(void *ptr, size_t size){
+   void *ptrOut = realloc(ptr, size);
+#ifdef DEBUG
+   if (! ptrOut){
+      printf("CTA_Malloc: Warning: malloc returns NULL. (size=%zd)\n",size);
+   }
+#endif
+  return ptrOut;
+}
+
+
+
diff --git a/costa/native/cta/src/cta_message.c b/costa/native/cta/src/cta_message.c
new file mode 100644
index 000000000..01df3e8e2
--- /dev/null
+++ b/costa/native/cta/src/cta_message.c
@@ -0,0 +1,111 @@
+/*
+$URL: https://repos.deltares.nl/repos/openda/openda_1/public/trunk/openda_core/native/src/cta/cta_interface.c $
+$Revision: 671 $, $Date: 2008-10-07 14:49:42 +0200 (di, 07 okt 2008) $
+
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+#include "cta_message.h"
+#include "ctai_handles.h"
+#include "cta_errors.h"
+
+
+#define CTA_MESSAGE_QUIET_F77  F77_CALL(cta_message_quiet,CTA_MESSAGE_QUIET)
+
+#define CLASSNAME "CTA_Message"
+
+// Handle to an external writer
+CTA_Func CTAI_Message_externalWriter=CTA_NULL;
+
+int CTAI_Quiet=0;
+
+void CTA_Message_Quiet(int setting){
+   CTAI_Quiet=setting;
+}
+
+
+#define METHOD "Write"
+void CTA_Message_Write(const char *className, const char *method, const char *message, char type){
+
+
+   CTA_Function *externalFunction;
+   int ierr;
+
+   // if program is in quiet mode return directly
+   if (CTAI_Quiet) return;
+
+   if (CTAI_Message_externalWriter==CTA_NULL) {
+      if (type=='m' || type=='M' ) {
+         printf("%s\n",message);
+      }
+      else if (type=='i' || type=='I') {
+         printf("Info: (%s.%s) %s\n",className, method, message);
+      }
+      else if (type=='w' || type=='W') {
+         printf("Warning: (%s.%s) %s\n",className, method, message);
+      }
+      else if (type=='e' || type=='E') {
+	          printf("Error: (%s.%s) %s\n",className, method, message);
+      }
+      else if (type=='f' || type=='F') {
+
+		printf("FATAL ERROR: (%s.%s) %s\n",className, method, message);
+        printf("APPLICATION WILL BE TERMINATED\n");
+   		exit(-1);
+      }
+      else {
+         printf("Error: (cta_msg.CTA_Message_Write) illegal message type (%c)\n", type);
+         printf("Message: (%s.%s) %s\n",className, method, message);
+      }
+	}
+   else {
+      ierr=CTA_Func_GetFunc(CTAI_Message_externalWriter, &externalFunction);
+      if (ierr != CTA_OK) {
+          // Error, first "kill" external writer before writing error
+          CTAI_Message_externalWriter=CTA_NULL;
+          printf("Cannot get pointer to external writer");
+      }
+      else {
+        // Call external writer
+        (void) externalFunction(className, method, message, type);
+      }
+   }
+}
+
+#undef METHOD
+#define METHOD "SetExternalWriter"
+void CTA_Message_SetExternalWriter(CTA_Func externalWriter){
+
+   if (externalWriter==CTA_NULL){
+      CTAI_Message_externalWriter=CTA_NULL;
+   }
+   else {
+      if (CTAI_Handle_GetDatatype(externalWriter)!=CTA_FUNCTION){
+          CTA_WRITE_ERROR("Input handle is not a function handle.\n External writer is not changed\n");
+      }
+      else {
+         CTAI_Message_externalWriter=externalWriter;
+      }
+   }
+}
+
+/* Interfacing with Fortran */
+
+CTAEXPORT void CTA_MESSAGE_QUIET_F77(int *setting){
+   CTA_Message_Quiet(*setting);
+}
diff --git a/costa/native/cta/src/cta_metainfo.c b/costa/native/cta/src/cta_metainfo.c
new file mode 100644
index 000000000..c428d25cf
--- /dev/null
+++ b/costa/native/cta/src/cta_metainfo.c
@@ -0,0 +1,1147 @@
+/*
+$URL: https://repos.deltares.nl/repos/openda/public/trunk/core/native/src/cta/cta_metainfo.c $
+$Revision: 3407 $, $Date: 2012-08-17 13:50:50 +0200 (Fri, 17 Aug 2012) $
+
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+#include <stdio.h>
+#include "cta_mem.h"
+#include "f_cta_utils.h"
+#include "cta_sobs.h"
+#include "cta_obsdescr.h"
+#include "cta_metainfo.h"
+#include "cta_errors.h"
+#include "cta_handles.h"
+#include "ctai.h"
+#include "cta_file.h"
+#include "cta_defaults.h"
+#include "ctai_xml.h"
+#include "cta_message.h"
+#include "cta_pack.h"
+
+#define CTA_METAINFO_DEFINEClASS_F77       F77_CALL(cta_metainfo_definecLass,CTA_METAINFO_DEFINECLASS)
+#define CTA_METAINFO_CREATE_F77            F77_CALL(cta_metainfo_create,CTA_METAINFO_CREATE)
+#define CTA_METAINFO_CHECK_SOBS_F77        F77_CALL(cta_metainfo_check_sobs,CTA_METAINFO_CHECK_SOBS)
+#define CTA_METAINFO_GET_PROPERTIES_F77    F77_CALL(cta_metainfo_get_properties,CTA_METAINFO_GET_PROPERTIES)
+#define CTA_METAINFO_GET_KEYS_F77          F77_CALL(cta_metainfo_get_keys,CTA_METAINFO_GET_KEYS)
+#define CTA_METAINFO_PROPERTY_COUNT_F77    F77_CALL(cta_metainfo_property_count,CTA_METAINFO_PROPERTY_COUNT)
+#define CTA_METAINFO_OBSERVATION_COUNT_F77 F77_CALL(cta_metainfo_observation_count,CTA_METAINFO_OBSERVATION_COUNT)
+#define CTA_METAINFO_EXPORT_F77            F77_CALL(cta_metainfo_export,CTA_METAINFO_EXPORT)
+#define CTA_METAINFO_IMPORT_F77            F77_CALL(cta_metainfo_import,CTA_METAINFO_IMPORT)
+#define CTA_METAINFO_FREE_F77              F77_CALL(cta_metainfo_free,CTA_METAINFO_FREE)
+#define CTA_METAINFO_SETTAG_F77            F77_CALL(cta_metainfo_settag,CTA_METAINFO_SETTAG)
+#define CTA_METAINFO_SETBELONGSTO_F77      F77_CALL(cta_metainfo_setbelongsto,CTA_METAINFO_SETBELONGSTO)
+#define CTA_METAINFO_SETGRID_F77      F77_CALL(cta_metainfo_setgrid,CTA_METAINFO_SETGRID)
+
+#define CTA_METAINFO_SETREGGRID_F77        F77_CALL(cta_metainfo_setreggrid,CTA_METAINFO_SETREGGRID)
+#define CTA_METAINFO_GETREGGRID_F77        F77_CALL(cta_metainfo_getreggrid,CTA_METAINFO_GETREGGRID)
+#define CLASSNAME "CTA_Metainfo"
+
+#define IDEBUG (0)
+
+
+/* Struct holding all data associated to an COSTA Vector */
+typedef struct {
+CTA_Func functions[CTA_METAINFO_NUMFUNC];
+} CTAI_MetainfoClass; // A MetainfoClass contains a list of the member-functions
+
+
+
+
+
+typedef struct {
+char unit[CTA_STRLEN_TAG];
+char tag[CTA_STRLEN_TAG];
+char belongs_to[CTA_STRLEN_TAG];
+char *description;
+int rest;
+CTAI_Gridm *thisgrid;
+CTA_Func functions[CTA_METAINFO_NUMFUNC]; // See cta_vector.h for a list of
+} CTAI_Metainfo;
+
+
+
+
+int CTAI_Grid_Init(
+   CTAI_Gridm *hgrid,  /* Handle of the grid */
+   int nx, int ny,int nz )
+   {
+   /* Local variables */
+   int retval;             /* Return value of COSTA call   */
+
+   /* copy */
+   strcpy(hgrid->name,"gridname") ;
+   hgrid->type = -99;   //undefined
+   hgrid->nx = nx;    hgrid->ny = ny;    hgrid->nz = nz;
+   hgrid->x_origin = 0.1;    hgrid->y_origin = 0.0;
+   hgrid->z_origin = 0.0;
+   hgrid->dx = 1.0;    hgrid->dy = 1.0;    hgrid->dz = 1.0;
+   strcpy(hgrid->refdimp[1],"none");
+   strcpy(hgrid->refdimp[2],"none");
+   strcpy(hgrid->refdimp[3],"none");
+   retval = 0;
+
+   return retval;
+};
+
+
+int CTAI_Grid_IsEqual(
+   CTAI_Gridm *hgrid1,  /* Handle of the grid */
+   CTAI_Gridm *hgrid2 ){
+
+   /* Local variables */
+   int retval;             /* Return value of COSTA call   */
+   int dimseq, chareq, realeq;
+   double eps1=1.0E-4;
+   double eps2=1.0E-6;
+   /* compare */
+   retval = CTA_DIMENSION_ERROR;
+   chareq = (strcmp(hgrid1->name, hgrid2->name)==0);
+   dimseq = ((hgrid1->type == hgrid2->type) &&   \
+             hgrid1->nx == hgrid2->nx && hgrid1->ny == hgrid2->ny \
+             &&  hgrid1->nz == hgrid2->nz );
+   realeq = ((fabs(hgrid1->x_origin - hgrid2->x_origin) + \
+             fabs(hgrid1->y_origin - hgrid2->y_origin) + \
+             fabs(hgrid1->z_origin - hgrid2->z_origin) < eps1) && \
+             (fabs(hgrid1->dx - hgrid2->dx) + \
+             fabs(hgrid1->dy - hgrid2->dy) + \
+              fabs(hgrid1->dz - hgrid2->dz) < eps2));
+   if (dimseq && realeq) {
+     if (!chareq) { printf("Grids are equal, but names differ \n");}
+     retval = 0;}
+   return retval;
+};
+
+
+int CTAI_Grid_Copy(
+   CTAI_Gridm *hgrid1,  /* Handle of the sending grid */
+   CTAI_Gridm *hgrid2   /* Handle of the receiving grid */
+   ){
+
+   /* Local variables */
+   int retval=0;             /* Return value of COSTA call   */
+
+   /* copy */
+   //   printf("hgrid1->name=%s\n",hgrid1->name);
+   if (
+       (strcmp(hgrid1->name,"gridname") != 0) &
+       (strcmp(hgrid1->name,"AR(1)-process") != 0)
+       ) {// return -1234567;
+}
+   strcpy(hgrid2->name, hgrid1->name) ;
+   hgrid2->type = hgrid1->type;
+   hgrid2->nx = hgrid1->nx;    hgrid2->ny = hgrid1->ny;    hgrid2->nz = hgrid1->nz;
+   hgrid2->x_origin = hgrid1->x_origin;    hgrid2->y_origin = hgrid1->y_origin;
+   hgrid2->z_origin = hgrid1->z_origin;
+   hgrid2->dx = hgrid1->dx;    hgrid2->dy = hgrid1->dy;    hgrid2->dz = hgrid1->dz;
+   hgrid2->nsize = hgrid1->nsize;
+   strcpy(hgrid2->refdimp[1],hgrid1->refdimp[1]);
+   strcpy(hgrid2->refdimp[2],hgrid1->refdimp[2]);
+   strcpy(hgrid2->refdimp[3],hgrid1->refdimp[3]);
+
+   return retval;
+};
+
+
+/* ------------------------------------------------ */
+
+int CTAI_Grid_Interpolate(CTAI_Gridm gridy, //desired grid
+                          CTAI_Gridm gridx, // grid where vecx lives
+                          CTA_Vector vecx,  // the vector to be interpolated
+                                            // from gridx to gridy
+                          CTA_Vector vecx_to_y ) // the resulting vector, living on gridy
+{
+  int retval, i,j,k,mi,mj,mk;
+  int size_vecx;
+  double px,py,pz;
+  double xL, xR, yL, yR, zL, zR;
+  int ixL, ixR, iyL, iyR, izL, izR;
+  double fLLL, fRLL, fLRL, fRRL, fLLR, fRLR, fLRR, fRRR;
+  double fyLL, fyRL, fyLR, fyRR, fzL, fzR, fint;
+
+
+  if (CTAI_Grid_IsEqual(&gridx,&gridy) == CTA_OK) {
+    // immediately finished; copy vecx_to_y to vecx
+    if (IDEBUG>0) { printf("interpolating vectors on grid: grids are equal! \n");}
+    retval = CTA_Vector_Copy(vecx, vecx_to_y);
+    return retval;
+  }
+
+
+  CTA_Vector_GetSize(vecx, &size_vecx);
+  if (size_vecx != gridx.nsize) return CTA_DIMENSION_ERROR;
+
+
+  // dit werkt alleen goed voor uniform en rectilinear I en II,
+  //        niet voor irregular/ curvilinear!
+  // verder: eenheidsvectoren van grids moeten gelijk zijn! (niet gedraaid tov elkaar)
+  // anders moet gridx sowieso eerst getransformeerd naar irregular grid
+  //      met basisvectoren van gridy
+
+
+
+  /* loop along grid points of desired gridy */
+  for (i=1; i <= gridy.nx ;i++){
+    for (j=1; j <= gridy.ny ;j++){
+      for (k=1; k <= gridy.nz ;k++){
+        px = gridy.x_origin + (i-1)*gridy.dx ; py = gridy.y_origin + (j-1)*gridy.dy ;
+        pz = gridy.z_origin + (k-1)*gridy.dz ;
+
+
+        /* search for grid points around px and values in x-direction  */
+        if (gridx.nx ==1) {xL = gridx.x_origin; xR = xL + 1.0; ixL = 1; ixR=1;}
+        else if (gridx.x_origin > px) {xL = gridx.x_origin; xR = xL +gridx.dx; ixL=1;ixR=2; }
+        else if (gridx.x_origin+gridx.dx*(gridx.nx-1)< px) {
+              xR = gridx.x_origin+gridx.dx*(gridx.nx-1); xL = xR -gridx.dx;ixL=gridx.nx-1;ixR=gridx.nx; }
+        else {
+          for (mi=1; mi <= gridx.nx-1; mi++) {
+            if (gridx.x_origin + mi*gridx.dx > px ) {
+              xR = gridx.x_origin+mi*gridx.dx ; xL = xR - gridx.dx; ixL=mi; ixR=mi+1;
+              break;}
+          }
+        }
+        /* same for y direction */
+        if (gridx.ny ==1) {yL = gridx.y_origin; yR = yL + 1.0; iyL = 1; iyR=1;}
+        else if (gridx.y_origin > py) {yL = gridx.y_origin; yR = yL +gridx.dy; iyL=1;iyR=2; }
+        else if (gridx.y_origin+gridx.dy*(gridx.ny-1)< py) {
+              yR = gridx.y_origin+gridx.dy*(gridx.ny-1); yL = yR -gridx.dy;iyL=gridx.ny-1;iyR=gridx.ny; }
+        else {
+          for (mj=1; mj <= gridx.ny-1; mj++) {
+            if (gridx.y_origin + mj*gridx.dy > py ) {
+              yR = gridx.y_origin+mj*gridx.dy ; yL = yR - gridx.dy; iyL=mj; iyR=mj+1;
+              break;}
+          }
+        }
+        /* same for z direction */
+        if (gridx.nz ==1) {zL = gridx.z_origin; zR = zL + 1.0; izL = 1; izR=1;}
+        else if (gridx.z_origin > pz) {zL = gridx.z_origin; zR = zL +gridx.dz; izL=1;izR=2; }
+        else if (gridx.z_origin+gridx.dz*(gridx.nz-1)< pz) {
+              zR = gridx.z_origin+gridx.dz*(gridx.nz-1); zL = zR -gridx.dz;izL=gridx.nz-1;izR=gridx.nz; }
+        else {
+          for (mk=1; mk <= gridx.nz-1; mk++) {
+            if (gridx.z_origin + mk*gridx.dz > pz ) {
+              zR = gridx.z_origin+mk*gridx.dz ; zL = zR - gridx.dz; izL=mk; izR=mk+1;
+              break;}
+          }
+        }
+
+      // is it always possible to convert to double?
+        CTA_Vector_GetVal(vecx,ixL + (iyL-1)*gridx.nx +(izL-1)*gridx.nx*gridx.ny, &fLLL, CTA_DOUBLE);
+        CTA_Vector_GetVal(vecx,ixR + (iyL-1)*gridx.nx +(izL-1)*gridx.nx*gridx.ny, &fRLL, CTA_DOUBLE);
+        CTA_Vector_GetVal(vecx,ixL + (iyR-1)*gridx.nx +(izL-1)*gridx.nx*gridx.ny, &fLRL, CTA_DOUBLE);
+        CTA_Vector_GetVal(vecx,ixR + (iyR-1)*gridx.nx +(izL-1)*gridx.nx*gridx.ny, &fRRL, CTA_DOUBLE);
+
+        CTA_Vector_GetVal(vecx,ixL + (iyL-1)*gridx.nx +(izR-1)*gridx.nx*gridx.ny, &fLLR, CTA_DOUBLE);
+        CTA_Vector_GetVal(vecx,ixR + (iyL-1)*gridx.nx +(izR-1)*gridx.nx*gridx.ny, &fRLR, CTA_DOUBLE);
+        CTA_Vector_GetVal(vecx,ixL + (iyR-1)*gridx.nx +(izR-1)*gridx.nx*gridx.ny, &fLRR, CTA_DOUBLE);
+        CTA_Vector_GetVal(vecx,ixR + (iyR-1)*gridx.nx +(izR-1)*gridx.nx*gridx.ny, &fRRR, CTA_DOUBLE);
+
+
+        //  printf("distances %f %f %f %f %f %f  \n",xR-px, yR-py, zR-pz,xR-xL,yR-yL,zR-zL);
+
+
+        // lower z- layer
+        fyLL = ((xR-px)/(xR-xL))*fLLL + ((px-xL)/(xR-xL))*fRLL ;
+        fyRL = ((xR-px)/(xR-xL))*fLRL + ((px-xL)/(xR-xL))*fRRL ;
+        fzL = ((yR-py)/(yR-yL))*fyLL + ((py-yL)/(yR-yL))*fyRL ;
+
+        // upper z-layer
+        fyLR = ((xR-px)/(xR-xL))*fLLR + ((px-xL)/(xR-xL))*fRLR ;
+        fyRR = ((xR-px)/(xR-xL))*fLRR + ((px-xL)/(xR-xL))*fRRR ;
+        fzR = ((yR-py)/(yR-yL))*fyLR + ((py-yL)/(yR-yL))*fyRR ;
+
+        fint = ((zR-pz)/(zR-zL))*fzL + ((pz-zL)/(zR-zL))*fzR;
+
+
+        retval = CTA_Vector_SetVal(vecx_to_y,(i + (j-1)*gridy.nx), &fint, CTA_DOUBLE);
+
+      };
+
+
+    }
+  }
+
+
+  return CTA_OK;
+
+
+};
+
+/* ------------------------------------------------ */
+int CTAI_XML_CreateGrid(xmlNode *cur_node, CTAI_Gridm *thisgrid) {
+   xmlNode       *values_node = NULL;     /* values child node */
+   xmlNode       *val_node = NULL;        /* value child node */
+   xmlChar *idtmp;
+   xmlChar *lentmp;
+   xmlChar *axestmp;
+   xmlChar *reftmp;
+
+   BOOL isphysicalSpace = FALSE;
+   int ndims_comp = 0;
+   int ndims_phys = 0;
+   char dimidc[10][CTA_STRLEN_TAG];   // names of comp. dimensions
+   char dimidp[10][CTA_STRLEN_TAG];   // names of physical dimensions
+   char axes[10][CTA_STRLEN_TAG];     // list of referenced comp. dimensions
+   int dimlength[10];                 // number of elements  of comp. dimension
+
+
+   /* fill some properties of the grid */
+   thisgrid->type =  -1;          //  actual coordinates not yet read
+   strcpy(thisgrid->name,"xml_grid");
+   thisgrid->nsize = 1;
+
+   for (values_node = cur_node->children; values_node; values_node = values_node->next) {
+     if (0 == strcmp("computationalSpace", (char *) values_node->name)) {
+       /* read dimensions */
+
+       for (val_node = values_node->children; val_node; val_node = val_node->next) {
+         if (0 == strcmp("dimension", (char *) val_node->name)) {
+           ndims_comp = ndims_comp + 1;
+
+           idtmp = xmlGetProp(val_node, CTAI_XML_ID);
+           strcpy(dimidc[ndims_comp], (char *) idtmp);
+           lentmp = xmlGetProp(val_node, CTAI_XML_LENGTH);
+           dimlength[ndims_comp] = atoi((char *) lentmp);
+           thisgrid->nsize = thisgrid->nsize*dimlength[ndims_comp];
+           //           printf("grid comp dims: %d %d %s \n",ndims_comp,dimlength[ndims_comp],dimidc[ndims_comp]);
+         }
+       }
+       if (ndims_comp < 1 || ndims_comp > 3) {printf("wrong number of computational dimensions\n");
+         exit(-1);
+       }
+     }
+
+     /* the physicalSpace part is not always present. In that case an integer grid can be formed using
+        the computationalSpace. */
+
+     if (0 == strcmp("physicalSpace", (char *) values_node->name)) {
+       isphysicalSpace = TRUE;
+       /* read dimensions */
+       for (val_node = values_node->children; val_node; val_node = val_node->next) {
+         if (0 == strcmp("dimension", (char *) val_node->name)) {
+           ndims_phys = ndims_phys + 1;
+
+           idtmp = xmlGetProp(val_node, CTAI_XML_ID);
+           strcpy(dimidp[ndims_phys], (char *) idtmp);
+
+           axestmp = xmlGetProp(val_node, (xmlChar *) "axes");
+           strcpy(axes[ndims_phys], (char *) axestmp);
+
+           reftmp = xmlGetProp(val_node, (xmlChar *) "ref");
+           strcpy(thisgrid->refdimp[ndims_phys], (char *) reftmp);
+
+           //   printf("grid phys dims: %d  |%s|%s|%s| \n",
+           //     ndims_phys,dimidp[ndims_phys],axes[ndims_phys],thisgrid->refdimp[ndims_phys]);
+         }
+       }
+     }
+   }
+   if (isphysicalSpace == FALSE) {    //  make an integer, regular  grid
+     thisgrid->nx = dimlength[1];
+     if (ndims_comp == 1) {
+       thisgrid->type = 1; thisgrid->ny = 1;  //degenerated 2D
+       thisgrid->x_origin = 0.0; thisgrid->y_origin = 0.0; thisgrid->z_origin = 0.0;
+       thisgrid->dx = 1.0; thisgrid->dy = 1.0; thisgrid->dz = 1.0;
+     };
+     if (ndims_comp == 2 ) {
+       thisgrid->type = 1; thisgrid->ny=dimlength[2];  // 2D
+       thisgrid->x_origin = 1.0; thisgrid->y_origin = 1.0; thisgrid->z_origin = 0.0;
+       thisgrid->dx = 1.0; thisgrid->dy = 1.0; thisgrid->dz = 1.0;
+     };
+     if (ndims_comp == 3 ) { thisgrid->type = 2;
+       thisgrid->ny = dimlength[2];thisgrid->nz=dimlength[3];  // 3D
+       thisgrid->x_origin = 1.0; thisgrid->y_origin = 1.0; thisgrid->z_origin = 1.0;
+       thisgrid->dx = 1.0; thisgrid->dy = 1.0; thisgrid->dz = 1.0;
+     }
+   } else {     ;
+     /* the reference to the coordinate vectors is contained in the grid.
+        We have to wait until the whole tree has been read to be able to access it. */
+     thisgrid->type = -ndims_comp;   // this denotes (minus) the number of refernces to coordinates
+   }
+
+   return CTA_OK;
+  };
+
+/* ------------------------------------------------ */
+
+#undef METHOD
+#define METHOD "DefineClass" 
+int CTA_Metainfo_DefineClass(
+   // INPUTS:
+      const char *name,                // Name of the new stochobs class
+      const CTA_Func h_func[CTA_METAINFO_NUMFUNC],  // function handles to
+                                       // the implementations of the
+                                       // stochobs-class' functions.
+   // OUTPUTS:
+      CTA_MetainfoClass  *hdescrcl)    // The (handle to) the new
+                                       //   observation descriptor-class
+{
+
+   CTAI_MetainfoClass *data;
+   int retval;
+
+   /* Allocate new Metainfo object */
+   data=CTA_Malloc(sizeof(CTAI_MetainfoClass));
+   data->functions[CTA_METAINFO_CREATE_SIZE        ]=
+                      h_func[CTA_METAINFO_CREATE_SIZE        ];
+   data->functions[CTA_METAINFO_CREATE_INIT        ]=
+                      h_func[CTA_METAINFO_CREATE_INIT        ];
+   data->functions[I_CTA_METAINFO_FREE               ]=
+                      h_func[I_CTA_METAINFO_FREE               ];
+   data->functions[I_CTA_METAINFO_GET_REST     ]=
+                      h_func[I_CTA_METAINFO_GET_REST     ];
+   data->functions[I_CTA_METAINFO_GET_KEYS           ]=
+                      h_func[I_CTA_METAINFO_GET_KEYS           ];
+   data->functions[I_CTA_METAINFO_COUNT_OBSERVATIONS ]=
+                      h_func[I_CTA_METAINFO_COUNT_OBSERVATIONS ];
+   data->functions[I_CTA_METAINFO_COUNT_PROPERTIES   ]=
+                      h_func[I_CTA_METAINFO_COUNT_PROPERTIES   ];
+   data->functions[I_CTA_METAINFO_EXPORT             ]=
+                      h_func[I_CTA_METAINFO_EXPORT             ];
+
+   // Allocate new handle
+   retval=CTA_Handle_Create(name,CTA_METAINFOCLASS,data,hdescrcl);
+   retval=CTA_Handle_GetData((CTA_Handle) *hdescrcl,(void**) &data);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+   // return error when unsuccesfull
+   return retval;
+}
+
+int CTA_Metainfo_Create(CTA_Metainfo *hmetainfo){
+
+   CTAI_Metainfo *metainfo;
+   CTAI_Gridm initgrid;
+   int retval;
+
+   /* allocate memory for new metainfo object */
+   metainfo=CTA_Malloc(sizeof(CTAI_Metainfo));
+   metainfo->rest=0;
+   strcpy(metainfo->unit,"-");
+   strcpy(metainfo->tag,"_");
+   strcpy(metainfo->belongs_to,"NONE");
+   metainfo->description = CTA_Malloc(9*sizeof(char));
+   strcpy(metainfo->description,"none yet");
+
+   retval=CTAI_Grid_Init(&initgrid,3,2,1);
+   if (retval) {
+	   CTA_WRITE_ERROR("Cannot initialize Grid");   
+	   return retval;
+   }
+   // printf("cta_metainfo_create 1: initgrid.nx %d \n ",initgrid.nx);
+   //printf("cta_metainfo_create 1: initgrid.x_origin %f \n ",initgrid.x_origin);
+
+   metainfo->thisgrid=CTA_Malloc(sizeof(CTAI_Gridm));
+
+   retval=CTAI_Grid_Copy(&initgrid, metainfo->thisgrid);
+   //   printf("cta_metainfo_create 2:  %d \n ",initgrid.nx);
+
+   if (retval) {
+	   CTA_WRITE_ERROR("Cannot copy grid");
+	   return retval;
+   }
+
+   /* Allocate new handle and return error when unsuccesfull */
+   retval=CTA_Handle_Create("metainfo",CTA_METAINFO,metainfo,hmetainfo);
+
+   if (retval){
+	   CTA_WRITE_ERROR("Handle is not a cta_metainfo handle");
+	   return retval;
+   }
+
+   return CTA_OK;
+}
+
+
+
+
+#undef METHOD
+#define METHOD "Copy" 
+int CTA_Metainfo_Copy(
+   CTA_Metainfo hmeta_x,  /* Handle of the sending metainfo */
+   CTA_Metainfo hmeta_y   /* Handle of the receiving metainfo */
+   ){
+
+   /* Local variables */
+   int retval;             /* Return value of COSTA call   */
+   CTAI_Metainfo *data_x;      /* All data of vector hvec_x  */
+   CTAI_Metainfo *data_y;      /* All data of vector hvec_y  */
+
+
+   retval=CTA_Handle_Check((CTA_Handle) hmeta_x,CTA_METAINFO);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Handle is not a cta_metainfo handle");
+	   return retval;
+   }
+   retval=CTA_Handle_GetData((CTA_Handle) hmeta_x, (void*) &data_x);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+
+   retval=CTA_Handle_Check((CTA_Handle) hmeta_y,CTA_METAINFO);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Handle is not a cta_metainfo handle");
+	   return retval;
+   }
+   retval=CTA_Handle_GetData((CTA_Handle) hmeta_y, (void*) &data_y);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+
+
+   /* copy */
+   //  printf("!!!controle copy:|%s|%s|  \n", data_x->tag, data_y->tag);
+   strcpy(data_y->unit, data_x->unit) ;
+   strcpy(data_y->tag, data_x->tag) ;
+   strcpy(data_y->belongs_to, data_x->belongs_to) ;
+   data_y->rest = data_x->rest ;
+   //  data_y->thisgrid=CTA_Malloc(sizeof(CTAI_Gridm));
+   retval = CTAI_Grid_Copy(data_x->thisgrid, data_y->thisgrid);
+
+
+   return retval;
+};
+
+
+#undef METHOD
+#define METHOD "IsEqual"
+int CTA_Metainfo_IsEqual(
+   CTA_Metainfo hmeta_x,  /* Handle of the metainfo1 */
+   CTA_Metainfo hmeta_y   /* Handle of the metainfo2 */
+   ){
+
+
+   /* Local variables */
+   int retval;                 /* Return value of COSTA call   */
+   CTAI_Metainfo *data_x;      /* All data of vector hvec_x  */
+   CTAI_Metainfo *data_y;      /* All data of vector hvec_y  */
+
+   retval=CTA_Handle_Check((CTA_Handle) hmeta_x,CTA_METAINFO);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Handle is not a cta_metainfo handle");
+	   return CTA_FALSE;
+   }
+   retval=CTA_Handle_GetData((CTA_Handle) hmeta_x, (void*) &data_x);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return CTA_FALSE;
+   }
+
+   retval=CTA_Handle_Check((CTA_Handle) hmeta_y,CTA_METAINFO);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Handle is not a cta_metainfo handle");
+	   return CTA_FALSE;
+   }
+   retval=CTA_Handle_GetData((CTA_Handle) hmeta_y, (void*) &data_y);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return CTA_FALSE;
+   }
+
+
+   /* compare */
+   retval = CTAI_Grid_IsEqual(data_x->thisgrid, data_y->thisgrid);
+   if (retval) return CTA_FALSE;
+   
+   retval = (strcmp(data_x->unit, data_y->unit)==0);
+   if (retval) return CTA_FALSE;
+
+   retval = (strcmp(data_x->tag, data_y->tag)==0);
+   if (retval) return CTA_FALSE;
+
+   retval = (strcmp(data_x->belongs_to, data_y->belongs_to)==0);
+   if (retval) return CTA_FALSE;
+
+   retval = (data_x->rest == data_y->rest);
+   if (retval) return CTA_TRUE;
+   
+   return CTA_FALSE;
+};
+
+
+
+
+
+#undef METHOD
+#define METHOD "SetUnit"
+int CTA_Metainfo_SetUnit(CTA_Metainfo hobsdescr,  char* nameofunit){
+   /* Local variables */
+   int retval;                 /* Return value of COSTA call    */
+   CTAI_Metainfo *metainfo;    /* All data of observer hmetainfo  */
+
+   /* Get pointer to struct with observation description data */
+   retval=CTA_Handle_GetData((CTA_Handle) hobsdescr,(void**) &metainfo);
+   //      printf("metainfo_setunit getdata %d \n", retval);
+   if (retval!=CTA_OK)  {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+   strcpy(metainfo->unit,nameofunit);
+   return retval;
+}
+
+#undef METHOD
+#define METHOD "SetTag"
+int CTA_Metainfo_SetTag(CTA_Metainfo hobsdescr,  char* tagname){
+   /* Local variables */
+   int retval;                 /* Return value of COSTA call    */
+   CTAI_Metainfo *metainfo;    /* All data of observer hmetainfo  */
+
+   /* Get pointer to struct with observation description data */
+   retval=CTA_Handle_GetData((CTA_Handle) hobsdescr,(void**) &metainfo);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+   strcpy(metainfo->tag,tagname);
+   return retval;
+}
+
+#undef METHOD
+#define METHOD "SetDescription"
+int CTA_Metainfo_SetDescription(CTA_Metainfo hobsdescr,  char* description){
+   /* Local variables */
+   int retval;                 /* Return value of COSTA call    */
+   CTAI_Metainfo *metainfo;    /* All data of observer hmetainfo  */
+
+   /* Get pointer to struct with observation description data */
+   retval=CTA_Handle_GetData((CTA_Handle) hobsdescr,(void**) &metainfo);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+   if (metainfo->description !=NULL) {
+       free(metainfo->description);
+      }
+   metainfo->description = CTA_Malloc((1+strlen(description))*sizeof(char));
+   strcpy(metainfo->description,description);
+   return retval;
+}
+
+#undef METHOD
+#define METHOD "GetDescription"
+int CTA_Metainfo_GetDescription(CTA_Metainfo hobsdescr,  char **description){
+   /* Local variables */
+   int retval;                 /* Return value of COSTA call    */
+   CTAI_Metainfo *metainfo;    /* All data of observer hmetainfo  */
+
+   /* Get pointer to struct with observation description data */
+   retval=CTA_Handle_GetData((CTA_Handle) hobsdescr,(void**) &metainfo);
+   if (retval!=CTA_OK){
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+   /* Note: description is supposed not be allocated yet */
+   if (*description!=NULL) {
+          free(*description);
+   }
+   *description = CTA_Malloc((1+strlen(metainfo->description))*sizeof(char));
+   strcpy(*description, metainfo->description);
+   return retval;
+}
+
+#undef METHOD
+#define METHOD "SetBelongsTo"
+int CTA_Metainfo_SetBelongsTo(CTA_Metainfo hobsdescr,  char* tagname){
+   /* Local variables */
+   int retval;                 /* Return value of COSTA call    */
+   CTAI_Metainfo *metainfo;    /* All data of observer hmetainfo  */
+
+   /* Get pointer to struct with observation description data */
+   retval=CTA_Handle_GetData((CTA_Handle) hobsdescr,(void**) &metainfo);
+   if (retval!=CTA_OK){
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+   strcpy(metainfo->belongs_to,tagname);
+   return retval;
+}
+
+
+
+#undef METHOD
+#define METHOD "GetUnit"
+int CTA_Metainfo_GetUnit(CTA_Metainfo hobsdescr,  char *nameofunit){
+   /* Local variables */
+   int retval;                 /* Return value of COSTA call    */
+   CTAI_Metainfo *metainfo;    /* All data of observer hmetainfo  */
+
+   /* Get pointer to struct with observation description data */
+   retval=CTA_Handle_GetData((CTA_Handle) hobsdescr,(void**) &metainfo);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+   strcpy(nameofunit,metainfo->unit);
+   return retval;
+}
+
+#undef METHOD
+#define METHOD "GetTag"
+int CTA_Metainfo_GetTag(CTA_Metainfo hobsdescr,  char *tagname){
+   /* Local variables */
+   int retval;                 /* Return value of COSTA call    */
+   CTAI_Metainfo *metainfo;    /* All data of observer hmetainfo  */
+
+   /* Get pointer to struct with observation description data */
+   retval=CTA_Handle_GetData((CTA_Handle) hobsdescr,(void**) &metainfo);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+   strcpy(tagname,metainfo->tag);
+   return retval;
+}
+
+#undef METHOD
+#define METHOD "GetBelongsTo"
+int CTA_Metainfo_GetBelongsTo(CTA_Metainfo hobsdescr,  char *tagname){
+   /* Local variables */
+   int retval;                 /* Return value of COSTA call    */
+   CTAI_Metainfo *metainfo;    /* All data of observer hmetainfo  */
+
+   /* Get pointer to struct with observation description data */
+   retval=CTA_Handle_GetData((CTA_Handle) hobsdescr,(void**) &metainfo);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+   strcpy(tagname,metainfo->belongs_to);
+   return retval;
+}
+
+
+#undef METHOD
+#define METHOD "GetRest"
+int CTA_Metainfo_GetRest(CTA_Metainfo hobsdescr, int *rest){
+   /* Local variables */
+   int retval;                 /* Return value of COSTA call    */
+   CTAI_Metainfo *metainfo;    /* All data of observer hmetainfo  */
+
+
+   retval=CTA_Handle_GetData((CTA_Handle) hobsdescr,(void**) &metainfo);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+   *rest = metainfo->rest;
+   if (retval) {
+	   CTA_WRITE_ERROR("Error in CTA_Metainfo_GetRest");
+	   return retval;
+   }
+
+   //   retval = CTAI_Metainfo_member_function(hobsdescr, CTA_METAINFO_GET_REST,
+   //                                      &metainfo, &memfun);
+   //if (retval) return retval;
+
+   /* Call (user) implementation */
+   //memfun(metainfo,rest,&retval);
+   return retval;
+}
+
+#undef METHOD
+#define METHOD "SetRest"
+int CTA_Metainfo_SetRest(CTA_Metainfo hobsdescr, int *rest){
+   /* Local variables */
+   int retval;                 /* Return value of COSTA call    */
+   CTAI_Metainfo *metainfo;    /* All data of observer hmetainfo  */
+
+
+   retval=CTA_Handle_GetData((CTA_Handle) hobsdescr,(void**) &metainfo);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+   metainfo->rest = *rest;
+   if (retval) {
+	   CTA_WRITE_ERROR("Error in CTA_Metainfo_SetRest");
+	   return retval;
+   }
+   return retval;
+}
+
+
+#undef METHOD
+#define METHOD "GetGrid"
+int CTA_Metainfo_GetGrid(CTA_Metainfo hobsdescr, CTAI_Gridm *hgrid){
+   /* Local variables */
+   int retval;                 /* Return value of COSTA call    */
+   CTAI_Metainfo *metainfo;    /* All data of observer hmetainfo  */
+
+   /* Get pointer to struct with observation description data */
+   retval=CTA_Handle_Check((CTA_Handle) hobsdescr,CTA_METAINFO);
+   if (retval!=CTA_OK)  {
+	   CTA_WRITE_ERROR("Handle is not a cta_metainfo handle");
+       return retval;
+   }
+
+   retval=CTA_Handle_GetData((CTA_Handle) hobsdescr,(void**) &metainfo);
+   //   printf("metainfo_getgrid%d \n", retval);
+   if (retval!=CTA_OK)  {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+
+   retval = CTAI_Grid_Copy(metainfo->thisgrid, hgrid);
+   //printf("metainfo_getgrid%d \n", retval);
+
+   if (retval) {
+	   CTA_WRITE_ERROR("Error in CTA_Metainfo_GetGrid");
+	   return retval;
+   }
+   return retval;
+}
+
+#undef METHOD
+#define METHOD "SetGrid"
+int CTA_Metainfo_SetGrid(CTA_Metainfo hobsdescr, CTAI_Gridm *hgrid){
+   /* Local variables */
+   int retval;                 /* Return value of COSTA call    */
+   CTAI_Metainfo *metainfo;    /* All data of observer hmetainfo  */
+
+   /* Get pointer to struct with observation description data */
+   retval=CTA_Handle_GetData((CTA_Handle) hobsdescr,(void**) &metainfo);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+
+   retval = CTAI_Grid_Copy(hgrid, metainfo->thisgrid);
+   if (retval) {
+	   CTA_WRITE_ERROR("Error in CTA_Metainfo_SetGrid");
+	   return retval;
+   }
+   return retval;
+}
+
+
+
+int CTA_Metainfo_Export(CTA_Metainfo minfo, CTA_Handle usrdata){
+  int retval;
+  CTAI_Gridm thisgrid;
+  char unitname[CTA_STRLEN_TAG];
+  FILE *file;          //File pointer
+  CTA_Datatype userDataType;
+
+  retval = CTA_Metainfo_GetUnit(minfo,unitname);
+  retval = CTA_Metainfo_GetGrid(minfo,&thisgrid);
+
+  retval = CTA_Handle_GetDatatype(usrdata, &userDataType);
+  if (retval != CTA_OK) return retval;
+
+  if (userDataType == CTA_PACK) {
+      if (IDEBUG>0) printf("Metainfo: packing:\n");
+      retval=CTA_Pack_Add(usrdata,unitname,sizeof(char)*CTA_STRLEN_TAG);
+      retval=CTA_Pack_Add(usrdata,&thisgrid,sizeof(CTAI_Gridm));
+   }
+
+   else if (userDataType == CTA_FILE) {
+     retval=CTA_File_Get(usrdata,&file);
+     if (thisgrid.type > -99 ) { // grid is defined
+       fprintf(file,"         Metainfo: gridtype, size: %d %d\n",thisgrid.type, thisgrid.nsize);
+       fprintf(file,"         Metainfo: phys. dimensions:  %d %d %d\n",thisgrid.nx, thisgrid.ny,thisgrid.nz);
+       if (thisgrid.type < 0) {fprintf(file,"         Metainfo: coordinates-reference :  %s\n",
+                                     thisgrid.refdimp[1]);}
+       else {
+         fprintf(file,"         Metainfo: grid dx,dy,dz: %f %f %f\n",thisgrid.dx, thisgrid.dy, thisgrid.dz);
+         fprintf(file,"         Metainfo: grid origin: %f %f %f\n",
+                        thisgrid.x_origin, thisgrid.y_origin, thisgrid.z_origin);
+       }
+     }
+   }
+   else {
+      return retval=CTA_FORMAT_NOT_SUPPORTED;
+   }
+
+
+
+  return retval;
+
+}
+
+
+
+int CTA_Metainfo_Import(CTA_Metainfo minfo, CTA_Handle usrdata){
+  int retval;
+  CTAI_Gridm thisgrid;
+  char unitname[CTA_STRLEN_TAG];
+
+
+  if (CTA_Handle_Check(usrdata,CTA_PACK)==CTA_OK) {
+
+    if (IDEBUG>0) printf("Metainfo: unpacking:\n");
+    retval=CTA_Pack_Get(usrdata,unitname,sizeof(char)*CTA_STRLEN_TAG);
+    retval=CTA_Pack_Get(usrdata,&thisgrid,sizeof(CTAI_Gridm));
+
+    retval = CTA_Metainfo_SetUnit(minfo,unitname);
+    retval = CTA_Metainfo_SetGrid(minfo,&thisgrid);
+
+    // controle:
+    if (IDEBUG> 4) {
+      printf("CONTROLE: metainfo import:\n -------------------------- \n");
+      retval = CTA_Metainfo_Export(minfo,CTA_FILE_STDOUT);
+      printf("controle: end  -------------------------- \n");
+    }
+
+   }
+
+  else {
+      return CTA_FORMAT_NOT_SUPPORTED;
+   }
+
+
+
+  return retval;
+
+}
+
+
+
+#undef METHOD
+#define METHOD "Free"
+int CTA_Metainfo_Free(CTA_Metainfo *hdescr){
+   int retval;                 /* Return value of COSTA call    */
+   CTAI_Metainfo *descr;    /* All data of observer hmetainfo  */
+
+   /* Check for quick return */
+   if (*hdescr==CTA_NULL) return CTA_OK;
+
+   /* Get pointer to struct with observation description data */
+   retval=CTA_Handle_GetData((CTA_Handle) *hdescr,(void*) &descr);
+   if (retval!=CTA_OK) { 
+	   char message[1024];
+	   sprintf(message,"metainfo_free: error %d \n",retval);
+	   CTA_WRITE_ERROR(message);
+	   return retval;
+   }
+
+   free(descr->thisgrid);
+   free(descr);
+   retval = CTA_Handle_Free(hdescr);
+   if (retval!=CTA_OK) { 
+	   char message[1024];
+	   sprintf(message,"metainfo_free: handle_free : error %d \n",retval); 
+	   CTA_WRITE_ERROR(message);
+   }
+   return retval;
+}
+
+
+#undef METHOD
+#define METHOD "SetRegGrid"
+int CTA_Metainfo_setRegGrid(CTA_Metainfo hdescr, char *name,
+                            int nx, int ny, int nz,
+                            double x_origin, double y_origin,
+                            double z_origin,
+                            double dx, double dy, double dz){
+
+   int retval;              /* Return value of COSTA call    */
+   CTAI_Metainfo* descr;    /* All data of observer hmetainfo  */
+   CTAI_Gridm *grid;
+   int typeGrid;
+
+
+   /* Check for quick return */
+   if (hdescr==CTA_NULL) return CTA_OK;
+
+   /* Get pointer to struct with observation description data */
+   retval=CTA_Handle_GetData((CTA_Handle) hdescr,(void**) &descr);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+
+   /* Just set the grid info */
+   grid=descr->thisgrid;
+   strcpy(grid->name,name);
+
+   if (ny==0){
+      typeGrid = 1;
+   } else if (nz==0) {
+      typeGrid = 2;
+   } else {
+      typeGrid = 3;
+   }
+
+
+
+
+   grid->type     =typeGrid;
+   grid->nx       =nx;
+   grid->ny       =ny;
+   grid->nz       =nz;
+   grid->nsize    =nx*ny;
+   grid->x_origin =x_origin;
+   grid->y_origin =y_origin;
+   grid->z_origin =z_origin;
+   grid->dx       =dx;
+   grid->dy       =dy;
+   grid->dz       =dz;
+
+   return CTA_OK;
+ /*refdimp[10][80]*/
+}
+
+#undef METHOD
+#define METHOD "getRegGrid"
+int CTA_Metainfo_getRegGrid(CTA_Metainfo hdescr, char *name,
+                            int *nx, int *ny, int *nz,
+                            double *x_origin, double *y_origin,
+                            double *z_origin,
+                            double *dx, double *dy, double *dz){
+
+   int retval;              /* Return value of COSTA call    */
+   CTAI_Metainfo* descr;    /* All data of observer hmetainfo  */
+   CTAI_Gridm *grid;
+
+
+   /* Check for quick return */
+   if (hdescr==CTA_NULL) return CTA_OK;
+
+   /* Get pointer to struct with observation description data */
+   retval=CTA_Handle_GetData((CTA_Handle) hdescr,(void**) &descr);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+
+   /* Just set the grid info */
+   grid=descr->thisgrid;
+   strcpy(name,grid->name);
+   *nx       = grid->nx;
+   *ny       = grid->ny;
+   *nz       = grid->nz;
+   *x_origin = grid->x_origin;
+   *y_origin = grid->y_origin;
+   *z_origin = grid->z_origin;
+   *dx       = grid->dx;
+   *dy       = grid->dy;
+   *dz       = grid->dz;
+
+   return CTA_OK;
+ /*refdimp[10][80]*/
+}
+
+
+
+/* Interfacing with Fortran */
+
+CTAEXPORT void CTA_METAINFO_DEFINECLASS_F77( char *name, int *h_func, int *hobsdscrcl,int *ierr, int len_name){
+
+   char  *c_name;
+   // create a c-string equivalent to name
+   c_name=CTA_Malloc((len_name+1)*sizeof(char));
+   CTA_fstr2cstr(name,c_name,len_name);
+
+   *ierr=CTA_Metainfo_DefineClass(name, (CTA_Func*) h_func, (CTA_MetainfoClass*) hobsdscrcl);
+
+   free(c_name);
+}
+/*
+void CTA_METAINFO_CREATE_F77(int *hsobscl, int *usrdat,
+              int *hmetainfo, int *ierr){
+
+   *ierr=CTA_Metainfo_Create((CTA_MetainfoClass) *hsobscl, (CTA_Handle) *usrdat,
+                             (CTA_Metainfo*) hmetainfo);
+*/
+
+CTAEXPORT void CTA_METAINFO_CREATE_F77(int *hmetainfo, int *ierr){
+
+   *ierr=CTA_Metainfo_Create((CTA_Metainfo*) hmetainfo);
+}
+
+
+CTAEXPORT void CTA_METAINFO_SETTAG_F77(int *hobsdescr, char *tag, int *ierr, int len_tag) {
+  char  *c_tag;
+  // create a c-string equivalent to tag
+  c_tag = CTA_Malloc((len_tag+1)*sizeof(char));
+  CTA_fstr2cstr(tag,c_tag,len_tag);
+
+   *ierr = CTA_Metainfo_SetTag((CTA_Metainfo) *hobsdescr,  c_tag);
+   free(c_tag);
+};
+
+CTAEXPORT void CTA_METAINFO_SETBELONGSTO_F77(int *hobsdescr, char *tag, int *ierr, int len_tag) {
+  char  *c_tag;
+  // create a c-string equivalent to tag
+  c_tag = CTA_Malloc((len_tag+1)*sizeof(char));
+  CTA_fstr2cstr(tag,c_tag,len_tag);
+
+   *ierr = CTA_Metainfo_SetBelongsTo((CTA_Metainfo) *hobsdescr,  c_tag);
+   free(c_tag);
+};
+
+
+CTAEXPORT void CTA_METAINFO_SETGRID_F77(int *hobsdescr, int *hgrid, int *ierr) {
+  *ierr= CTA_Metainfo_SetGrid((CTA_Metainfo) *hobsdescr, (CTAI_Gridm*) hgrid);
+}
+
+
+CTAEXPORT void CTA_METAINFO_EXPORT_F77(int *hdescr, int *usrdat, int *ierr){
+   *ierr=CTA_Metainfo_Export((CTA_Metainfo) *hdescr, (CTA_Handle) *usrdat);
+}
+
+CTAEXPORT void CTA_METAINFO_IMPORT_F77(int *hdescr, int *usrdat, int *ierr){
+   *ierr=CTA_Metainfo_Import((CTA_Metainfo) *hdescr, (CTA_Handle) *usrdat);
+}
+
+
+CTAEXPORT void CTA_METAINFO_FREE_F77(int *hobsdscr, int *ierr){
+   *ierr=CTA_Metainfo_Free((CTA_Metainfo *) hobsdscr);
+}
+
+
+CTAEXPORT void CTA_METAINFO_GETREGGRID_F77(int *hdescr, char *name,
+                            int *nx, int *ny, int *nz,
+                            double *x_origin, double *y_origin,
+                            double *z_origin,
+                            double *dx, double *dy, double *dz, int *ierr, int lenstr){
+   char  c_name[CTA_STRLEN_TAG];
+   *ierr = CTA_Metainfo_getRegGrid((CTA_Metainfo) *hdescr, c_name, nx, ny, nz,
+                            x_origin, y_origin, z_origin, dx, dy, dz);
+
+   CTA_cstr2fstr(c_name,name,lenstr);
+}
+
+CTAEXPORT void CTA_METAINFO_SETREGGRID_F77(int *hdescr, char *name,
+                            int *nx, int *ny, int *nz,
+                            double *x_origin, double *y_origin,
+                            double *z_origin,
+                            double *dx, double *dy, double *dz, int *ierr, int strlen){
+
+  char  *c_name;
+  // create a c-string equivalent to name
+  c_name=CTA_Malloc((strlen+1)*sizeof(char));
+  CTA_fstr2cstr(name,c_name,strlen);
+
+  *ierr=CTA_Metainfo_setRegGrid((CTA_Metainfo) *hdescr, c_name,
+                            *nx, *ny, *nz,
+                            *x_origin, *y_origin,
+                            *z_origin,
+                            *dx, *dy, *dz);
+   free(c_name);
+
+}
+
+
+
diff --git a/costa/native/cta/src/cta_method.c b/costa/native/cta/src/cta_method.c
new file mode 100644
index 000000000..373ab3781
--- /dev/null
+++ b/costa/native/cta/src/cta_method.c
@@ -0,0 +1,208 @@
+/*
+$URL: https://repos.deltares.nl/repos/openda/public/trunk/core/native/src/cta/cta_method.c $
+$Revision: 2751 $, $Date: 2011-09-09 08:58:46 +0200 (Fri, 09 Sep 2011) $
+
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+#include "cta_mem.h"
+#include "cta_method.h"
+#include "cta_message.h"
+
+#define CLASSNAME "CTA_Method"
+
+
+/* Struct holding all data associated to an COSTA Vector */
+typedef struct {
+CTA_Func functions[CTA_METH_NUMFUNC];
+} CTAI_MethodClass;                    // A SObsClass contains a list of the member-functions
+
+
+
+typedef struct {
+CTA_Func functions[CTA_METH_NUMFUNC];
+void *data;   // pointer to the implementation-specific data.
+} CTAI_Method;
+
+
+
+#undef METHOD
+#define METHOD "DefineClass"
+int CTA_Meth_DefineClass(const char *name, const CTA_Func h_func[CTA_METH_NUMFUNC],
+                         CTA_MethClass *hmethcl){
+
+   CTAI_MethodClass *data;
+   int retval;
+
+   /* Allocate new StochObs object */ 
+   data=CTA_Malloc(sizeof(CTAI_MethodClass));
+   data->functions[CTA_METH_CREATE_SIZE]=h_func[CTA_METH_CREATE_SIZE];
+   data->functions[CTA_METH_CREATE_INIT]=h_func[CTA_METH_CREATE_INIT];
+   data->functions[CTA_METH_RUN ]       =h_func[CTA_METH_RUN ];
+   data->functions[CTA_METH_FREE       ]=h_func[CTA_METH_FREE       ];
+
+   // Allocate new handle
+   retval=CTA_Handle_Create(name,CTA_METHOD,data,hmethcl);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_method handle");
+       return retval;
+   }
+   // return error when unsuccesfull 
+   return retval;
+}
+
+
+#undef METHOD
+#define METHOD "Create"
+int CTA_Meth_Create(CTA_Method hmethcl, CTA_Handle userdata,
+                    CTA_Method *hmeth){
+
+   CTAI_Method *meth;
+   int memsize;
+   int retval;
+   CTAI_MethodClass *clsdata;
+   CTA_Function *my_Create_Size, *my_Create_Init;
+   int i;
+
+   /* Get class data containing all function pointers */
+   retval=CTA_Handle_Check((CTA_Handle) hmethcl,CTA_METHODCLASS);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_methodclass handle");
+       return retval;
+   }
+   retval=CTA_Handle_GetData((CTA_Handle) hmethcl,(void**) &clsdata);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+
+
+   /* determine size of data object (CTA_SOBS_CREATE_SIZE)*/
+   retval=CTA_Func_GetFunc(clsdata->functions[CTA_METH_CREATE_SIZE],
+                           &my_Create_Size);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot get function CTA_METH_CREATE_SIZE");
+	   return retval;
+   }
+   my_Create_Size(&memsize,&retval);
+   if (retval) {
+	   CTA_WRITE_ERROR("Error in my_Create_Size");
+	   return retval;
+   }
+   
+   /* allocate memory for new stochobs object */
+   meth=CTA_Malloc(sizeof(CTAI_Method));
+   meth->data=CTA_Malloc(memsize);
+
+   /* copy function pointers */
+   for (i=0;i<CTA_METH_NUMFUNC;i++){
+      meth->functions[i]=clsdata->functions[i];
+   }
+
+   /* Initialise and fill new stochobs */
+   retval=CTA_Func_GetFunc(clsdata->functions[CTA_METH_CREATE_INIT],
+                           &my_Create_Init);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot get function CTA_METH_CREATE_INIT");
+	   return retval;
+   }
+   my_Create_Init(meth->data, userdata, &retval);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Error in my_Create_Init");
+	   return retval;
+   }
+
+   /* Allocate new handle and return error when unsuccesfull */
+   retval=CTA_Handle_Create("method",CTA_METHOD,meth,hmeth);
+   if (retval) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+
+   return CTA_OK;
+}
+
+
+
+
+int CTAI_Meth_member_function(
+        // INPUTS
+        CTA_Method hmeth,    /* Handle of the method of
+                                  which a member function is wanted */
+        int member,            /* Code of the member function */
+        // OUTPUT
+        CTAI_Method **meth,      /* All data of method  */
+        CTA_Function **memfun  /* Member-Function pointer */
+)
+{
+   int retval;
+   /* Check that the given handle is indeed an observer */
+   retval=CTA_Handle_Check((CTA_Handle) hmeth, CTA_METHOD);
+   if (retval!=CTA_OK) return retval;
+
+   /* Get pointer to struct with observer data */
+   retval=CTA_Handle_GetData((CTA_Handle) hmeth,(void**) meth);
+   if (retval!=CTA_OK) return retval;
+
+   /* Get pointer to implementation of this function */
+   retval=CTA_Func_GetFunc((*meth)->functions[member],memfun);
+   return retval;
+}
+
+
+int CTA_Meth_Run(CTA_Method hmeth) {
+   /* Local variables */
+   int retval;                 /* Return value of COSTA call   */
+   CTAI_Method *meth;            /* All data of method           */
+   CTA_Function *memfun;       /* Function that must be called */
+
+   /* Look up member function and method data */
+   retval = CTAI_Meth_member_function(hmeth, CTA_METH_RUN,
+                                      &meth, &memfun);
+   if (retval!=CTA_OK) return retval;
+
+   /* Call (user) implementation */
+   memfun(meth->data,&retval);
+   return retval;
+};
+
+int CTA_Meth_Free(CTA_Method *hmeth){
+   /* Local variables */
+   int retval;             /* Return value of COSTA call   */
+   CTAI_Method *meth;      /* All data of meth */
+   CTA_Function *my_free;  /* Function that must be called */
+
+   /* Get pointer to implementation of this function */
+   
+   if (*hmeth==CTA_NULL) return CTA_OK;
+
+   /* Look up member function and method data */
+   retval = CTAI_Meth_member_function(*hmeth, CTA_METH_FREE,
+                                      &meth, &my_free);
+   if (retval!=CTA_OK) return retval;
+
+   /* Call (user) implementation */
+   my_free(meth->data,&retval);
+   free(meth->data);
+   free(meth);
+   retval=CTA_Handle_Free(hmeth);
+
+   return retval;
+};
+
+
diff --git a/costa/native/cta/src/cta_modbuild_par.c b/costa/native/cta/src/cta_modbuild_par.c
new file mode 100644
index 000000000..e4c7d32c7
--- /dev/null
+++ b/costa/native/cta/src/cta_modbuild_par.c
@@ -0,0 +1,3296 @@
+/*
+$URL: https://repos.deltares.nl/repos/openda/public/trunk/core/native/src/cta/cta_modbuild_par.c $
+$Revision: 4056 $, $Date: 2013-07-03 14:19:55 +0200 (Wed, 03 Jul 2013) $
+
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2006  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+#include "cta_mem.h"
+#include "cta.h"
+#include "f_cta_utils.h"
+#include "cta_model_utilities.h"
+
+#ifdef USE_MPI
+#include "mpi.h"
+#include "cta_par.h"
+
+#define IDEBUG (1)
+#define EXITONERROR (1)
+#define ALL_WORKERS (-1)
+#define MASTER (0)
+#define NOOSOBS (1)
+
+#define CTA_PACK_DEFAULT_SIZE (1000000)
+
+
+double MISVAL=-1E20;
+
+enum TypeState {STATE, FORC, PARAM, SCAL};
+
+
+typedef struct {
+CTA_Model mthis;        /* Handles of this model (local handle)         */
+int       *mremote;     /* Remote handle numbers of the model for each process 1..nworker */
+MPI_Comm  comm;         /* Communicator to use for this model instance  */
+int       is_intercomm; /* Flag indicating whether comm is an intercommunication group */
+int       nworker;      /* Number of workers to adress to               */
+CTA_String *SelectObs;  /* Selection for observations for all models    */
+} CTAI_Modbuild_par;
+
+
+int ctai_rank_ind   = 1;  /* Rank of new worker to be created */
+int ctai_rank       =-1;  /* My rank */
+int ctai_rank_master= 0;  /* rank of master */
+int ctai_comm_size  =-1;  /* Total number of workers          */
+int hmodel_scratch  =CTA_NULL; /* handle of extra model instance on worker */
+
+CTA_ObsDescr last_hdescr=CTA_NULL;
+CTA_ObsDescr last_hdescr_tab=CTA_NULL;
+
+CTA_Model *createdModels   =NULL;
+int        numCreatedModels=0;
+
+void modbuild_par_getobslocalizationdomain(CTAI_Modbuild_par *data,
+                   CTA_ObsDescr *hdescr, double *distance, int *idomain, CTA_Vector *locVecs, int *ierr);
+
+
+void modbuild_models_add(CTA_Model hmodel){
+   int iModel;
+   CTA_Model *newList;
+
+   printf("Adding model %d\n",hmodel);
+   numCreatedModels++;
+   newList= (CTA_Model*) CTA_Malloc(numCreatedModels*sizeof(CTA_Model));
+   for (iModel=0; iModel<numCreatedModels-1; iModel++){
+      newList[iModel]=createdModels[iModel];
+   }
+   newList[numCreatedModels-1]=hmodel;
+   if (createdModels) free(createdModels);
+   createdModels=newList;
+}
+
+void modbuild_models_delete(CTA_Model hmodel){
+   int iPlek=-1;
+   int iModel;
+
+   for (iModel=0; iModel<numCreatedModels; iModel++){
+      if (createdModels[iModel]==hmodel) iPlek=iModel;
+   }
+   if (iPlek>=0){
+      numCreatedModels--;
+      if (numCreatedModels>0){
+        CTA_Model *newList= (CTA_Model*) CTA_Malloc(numCreatedModels*sizeof(CTA_Model));
+        for (iModel=0;iModel<iPlek; iModel++){newList[iModel]=createdModels[iModel];}
+        for (iModel=iPlek; iModel<numCreatedModels; iModel++){newList[iModel]=createdModels[iModel+1];}
+        free(createdModels);
+        createdModels=newList;
+      } 
+      else {
+        free(createdModels);
+        createdModels=NULL;
+      }
+   }
+}
+
+void modbuild_models_delete_all_models(){
+   int iModel;
+
+   for (iModel=0; iModel<numCreatedModels; iModel++){
+      CTA_Model_Free(&(createdModels[iModel]));
+   }
+}
+
+
+
+void modbuild_par_sendtaskandhandle(char *task, int rank, CTAI_Modbuild_par *data,
+                                    int tag){
+  int lentask;
+  int irank;
+  int jrank;
+
+  if (IDEBUG>0){
+     printf("modbuild_par_sendtaskandhandle\n");
+     printf("task=%s rank=%d, \n",task,rank);
+     printf("nworker=%d\n",data->nworker);
+  }
+
+
+  /* Loop over all worker processes of this model */
+  for (irank=1;irank<=data->nworker;irank++){
+     if (irank==rank || rank<0) {
+        if (data->is_intercomm){
+           jrank=irank-1;
+        }
+        else {
+           jrank=irank;
+        }
+
+
+        /* Send Task */
+        lentask=strlen(task)+1;
+        MPI_Send(task,lentask, MPI_CHAR, jrank, tag, data->comm);
+
+        /* Send remote model handle */
+        if (IDEBUG>0) printf("# %d modbuild_par_sendtaskandhandle: send task (%s) and handle (%d) to worker %d\n",
+                        CTA_PAR_MY_RANK, task, data->mremote[irank],irank);
+        MPI_Send((void*) &data->mremote[irank], 1, MPI_INT, jrank, tag, data->comm);
+     }
+  }
+}
+
+
+/* Send a state-vector */
+void modbuild_par_sendstate(CTA_TreeVector state, CTAI_Modbuild_par *data, int tag){
+   int lentask;
+   CTA_Pack hpack;
+   char *pack;
+   CTA_TreeVector state_sub;
+   MPI_Comm comm;
+   int irank,jrank, nworker;
+
+   /* Handle to send to master if data==NULL) */
+   if (data) {
+      nworker=data->nworker;
+      comm=data->comm;
+   } else {
+      nworker=1;
+      comm=CTA_COMM_MASTER_WORKER;
+   }
+
+
+   /* Loop over all worker processes of this model */
+   for (irank=1;irank<=nworker;irank++){
+      /* Get substate (when necessary) */
+      if (nworker>1){
+         CTA_TreeVector_GetSubTreeVecIndex (state, irank, &state_sub);
+      } else {
+         state_sub=state;
+      }
+
+      CTA_Pack_Create(CTA_PACK_DEFAULT_SIZE,&hpack);
+
+      CTA_TreeVector_Export(state_sub,hpack);
+      lentask= CTA_Pack_GetLen(hpack);
+      pack   = CTA_Pack_GetPtr(hpack);
+
+      /* Handle to send to master if data==NULL) */
+      if (data){ jrank=irank;} else {jrank=0;}
+
+      if (data && data->is_intercomm) jrank--;
+
+
+      /* Send length of packed state */
+      MPI_Send(&lentask, 1, MPI_INT, jrank, tag, comm);
+
+      /* Send packed state */
+      MPI_Send(pack, lentask, MPI_CHAR, jrank, tag, comm);
+
+      /* Free pack-buffer */
+      CTA_Pack_Free(&hpack);
+   }
+}
+
+/* Receive a state-vector */
+void modbuild_par_recvstate(CTA_TreeVector *state, CTAI_Modbuild_par *data, int tag){
+   MPI_Status status;
+   int retval;
+   int lenpack;
+   CTA_Pack hpack;
+   char *pack;
+   char sNum[20];
+   char rootwrk[30];
+   MPI_Comm comm;
+
+   CTA_TreeVector *state_subs, state_sub;
+   int nworker, irank, jrank;
+
+   /* Handle to send to master if data==NULL) */
+   if (data) {
+      nworker=data->nworker;
+      comm=data->comm;
+   } else {
+      nworker=1;
+      comm=CTA_COMM_MASTER_WORKER;
+   }
+
+   /* Create state including sub-states when needed */
+   if (*state==CTA_NULL) {
+      CTA_TreeVector_Create("MASTERroot","MASTERroot",state);
+
+      /* Create sub-states as well when we have multiple workers */
+      if (nworker>1) {
+         state_subs=(CTA_Vector*) CTA_Malloc(sizeof(CTA_TreeVector)*nworker);
+ 
+    
+         for (irank=1;irank<=nworker;irank++){
+
+            sprintf(sNum,"%d",irank);
+            strcpy(rootwrk,"rtwrk"); 
+            strcat(rootwrk,sNum); 
+            CTA_TreeVector_Create("root_worker",rootwrk,&(state_subs[irank-1]));
+         }
+         CTA_TreeVector_Conc (*state, state_subs, nworker);
+         free(state_subs);
+      }
+      //printf("modbuild_par_recstate: state did not exist yet; created \n");
+   }
+
+   /* Receive all states from worker */
+   for (irank=1;irank<=nworker;irank++){
+      if (nworker>1){
+         retval=CTA_TreeVector_GetSubTreeVecIndex (*state, irank, &state_sub);
+      } else {
+         state_sub=*state;
+      }
+
+      /* Handle receive from master if data==NULL) */
+      if (data){ jrank=irank;} else {jrank=0;}
+
+      if (data && data->is_intercomm) jrank--;
+
+      /* Receive state-vector */
+      retval=MPI_Recv(&lenpack, 1, MPI_INT, jrank,1234, comm, &status);
+      retval=CTA_Pack_Create(lenpack,&hpack);
+      pack  =CTA_Pack_GetPtr(hpack);
+
+      retval=MPI_Recv(pack, lenpack, MPI_CHAR, jrank,1234, comm, &status);
+      retval=CTA_Pack_AddCnt(hpack,lenpack);
+
+      /* unpack the received-state */
+      retval=CTA_TreeVector_Import(state_sub,hpack);
+      if (retval!=CTA_OK) {
+         printf("modbuild_par_recvstate: error (%d) in CTA_TreeVector_Import\n",retval);
+         exit(-1);
+      }
+
+      /* Free pack-buffer */
+      retval=CTA_Pack_Free(&hpack);
+     
+   }
+     if (IDEBUG>0) {
+        printf("---------------------------------modbuild_par_recstate: whole state DONE \n"); 
+        printf("................................ whole state is: \n");
+        retval = CTA_TreeVector_Info(*state);
+     }
+}
+
+
+void modbuild_par_sendobsdescr(CTAI_Modbuild_par *data, int krank, CTA_Time ttime,
+                               CTA_ObsDescr hdescr, int tag, int *ierr){
+
+   double span[2];
+   int lentask;
+   CTA_Pack hpack;
+   char *pack;
+   int irank, jrank;
+
+
+   hpack=CTA_NULL;
+   /* handle an empty desciption */
+   if (hdescr==CTA_NULL){
+         hpack=CTA_NULL;
+         lentask= 0;
+   }
+   else {
+      if (IDEBUG>0) printf("modbuild_par_sendobsdescr :Start\n");
+      /* First delete an existing obsdescr */
+      if (! last_hdescr_tab == CTA_NULL) {
+         if (IDEBUG>0) printf("modbuild_par_sendobsdescr: MEMLEAK FIXED\n");
+         CTA_ObsDescr_Free(&last_hdescr_tab);
+      }
+      /* Create a new table obsdescr */
+      *ierr=CTA_ObsDescr_Create(CTA_OBSDESCR_TABLE,hdescr, &last_hdescr_tab);
+      if (IDEBUG>0) printf("Created table version ierr=%d\n",*ierr);
+      if (*ierr!=CTA_OK) return;
+
+      /* Pack hdescr_tab */
+      *ierr=CTA_Pack_Create(CTA_PACK_DEFAULT_SIZE,&hpack);
+      if (*ierr!=CTA_OK) return;
+      *ierr=CTA_ObsDescr_Export(last_hdescr_tab, hpack);
+      if (*ierr!=CTA_OK) return;
+
+      /* Send hdescr_tab */
+      lentask= CTA_Pack_GetLen(hpack);
+      pack   = CTA_Pack_GetPtr(hpack);
+   }
+
+   /* Send to all workers */
+   for (irank=1;irank<=data->nworker;irank++){
+      if (irank==krank || krank<0){
+
+         jrank=irank;
+         if (data && data->is_intercomm) jrank--;
+
+
+         /* Send timespan */
+         *ierr=CTA_Time_GetSpan(ttime, &span[0], &span[1]);
+
+         if (*ierr!=CTA_OK) return;
+         if (IDEBUG>0) {printf("#%d Sending timespan to %d\n",CTA_PAR_MY_RANK,irank);}
+         *ierr=MPI_Send((void*) span, 2, MPI_DOUBLE, jrank, tag, data->comm);
+         if (*ierr!=CTA_OK) return;
+         if (IDEBUG>0) {printf("#%d Sending done\n",CTA_PAR_MY_RANK);}
+
+
+         if (IDEBUG>0) {printf("#%d Sending lenpack to %d\n",CTA_PAR_MY_RANK,irank);}
+         /* Send length of packed obsdescr */
+         MPI_Send(&lentask, 1, MPI_INT, jrank, tag,
+                         data->comm);
+         if (lentask>0){
+            if (IDEBUG>0) {printf("#%d Sending packed obsdescr to %d\n",CTA_PAR_MY_RANK,irank);}
+            /* Send packed obsdescr */
+            MPI_Send(pack, lentask, MPI_CHAR, jrank, tag, data->comm);
+         }
+       }
+   }
+
+   /* Free vars we do not need */
+   *ierr=CTA_Pack_Free(&hpack);
+   if (*ierr!=CTA_OK) return;
+
+   *ierr=CTA_OK;
+   if (IDEBUG>0) printf("modbuild_par_sendobsdescr :end\n");
+}
+
+
+
+
+void modbuild_par_recvobsdescr(CTA_Time *timespan, CTA_ObsDescr *hdescr_tab,
+                               int tag){
+
+   MPI_Status status;
+   CTA_Pack hpack;
+   double span[2];
+   int lenpack, retval;
+   char *pack;
+
+   /* Receive the simulation timespan */
+   retval=CTA_Time_Create(timespan);
+
+   if (IDEBUG>0) {printf("#%d Receiving timespan from master\n",CTA_PAR_MY_RANK);}
+   retval=MPI_Recv(span, 2, MPI_DOUBLE, ctai_rank_master,tag,
+                   CTA_COMM_MASTER_WORKER, &status);
+   retval=CTA_Time_SetSpan(*timespan, span[0], span[1]);
+
+   /* Receive the observation description */
+   if (IDEBUG>0) {printf("#%d Receiving lenpack from master\n",CTA_PAR_MY_RANK);}
+   retval=MPI_Recv(&lenpack, 1, MPI_INT, ctai_rank_master,tag,
+                   CTA_COMM_MASTER_WORKER, &status);
+   if (IDEBUG>0) {printf("#%d lenpack =%d\n",CTA_PAR_MY_RANK, lenpack);}
+
+   if (lenpack==0){
+      CTA_ObsDescr_Free(hdescr_tab);
+      *hdescr_tab=CTA_NULL;
+   }
+   else {
+      retval=CTA_Pack_Create(lenpack,&hpack);
+      pack  =CTA_Pack_GetPtr(hpack);
+
+      if (IDEBUG>0) {printf("#%d Receiving packed obsdescr from master\n",CTA_PAR_MY_RANK);}
+      retval=MPI_Recv(pack, lenpack, MPI_CHAR, ctai_rank_master,tag,
+                      CTA_COMM_MASTER_WORKER, &status);
+      if (IDEBUG>0) {printf("#%d Done Receiving packed obsdescr from master\n",CTA_PAR_MY_RANK); }
+
+      retval=CTA_Pack_AddCnt(hpack,lenpack);
+
+      /* Create the observation description */
+      if (! *hdescr_tab == CTA_NULL){
+         printf("modbuild_par_sendobsdescr :MEMLEAK (2) FIXED\n");
+         CTA_ObsDescr_Free(hdescr_tab);
+      }
+      retval=CTA_ObsDescr_Create(CTA_OBSDESCR_TABLE,hpack, hdescr_tab);
+      if (IDEBUG>0) {printf("#%d Done Creating obsdescr %d\n",CTA_PAR_MY_RANK, retval);}
+      retval=CTA_Pack_Free(&hpack);
+   }
+   if (IDEBUG>0) {printf("#%d End of modbuild_par_recvobsdescr\n",CTA_PAR_MY_RANK);}
+}
+
+/* Determine the size of the datablock of one model instance */
+void modbuild_par_create_size(CTA_Handle *userdata, int *memsize,
+                          int *ierr){
+   *memsize=(int) sizeof(CTAI_Modbuild_par);
+   *ierr=CTA_OK;
+}
+
+/* Create a model-instance */
+void modbuild_par_create_init(CTA_Handle *this, CTAI_Modbuild_par *data ,
+                              CTA_Handle *hinput, int *ierr){
+
+   MPI_Status status;
+   CTA_String sclsnam;
+   CTA_ModelClass modcls;
+   CTA_Handle tmodel;
+   const char *clsnam, *modinp;
+   int lennamcls, lenmodel;
+   int rank, jrank;
+   int tag, lentask;
+   int isintercomm;
+   char *task;
+
+   int cleanup; /*Flag indicating whether tinput must be freed */
+   CTA_Tree tinput;
+
+   /* Convert input into a COSTA-tree if it is not done already */
+   *ierr=CTA_Model_Util_InputTree(*hinput, &tinput, &cleanup);
+   if (*ierr!=CTA_OK) return;
+
+   /* Handle input :*/
+
+   /* Determine name of model class to worker process */
+   *ierr=CTA_Tree_GetHandleStr(tinput,"modelclass", &modcls);
+   if (*ierr!=CTA_OK) return;
+
+   /* get name of model class */
+   CTA_String_Create(&sclsnam);
+   CTA_Handle_GetName(modcls,sclsnam);
+   clsnam=CTAI_String_GetPtr(sclsnam);
+
+   /* Get parallel information */
+   *ierr=CTA_Par_CreateNewCreateGetComm(modcls, &(data->comm));
+   if (*ierr!=CTA_OK){
+      printf("FATAL ERROR CTA_Par_CreateNewCreateGetComm -failed \n");
+      exit(-1);
+   }
+
+   /* Test whether this is an inter communicator */
+   /* Test whether this is an inter communicator */
+   MPI_Comm_test_inter(data->comm, &(data->is_intercomm));
+
+   /* Determine the number of workers */
+   if (data->is_intercomm){
+      MPI_Comm_remote_size(data->comm,&(data->nworker));
+   }
+   else {
+      MPI_Comm_size(data->comm,&(data->nworker));
+      //printf("DE GROEP BESTAAT UIT %d processes \n", data->nworker);
+      data->nworker--;
+      //printf("DE GROEP BESTAAT UIT %d workers \n", data->nworker);
+   }
+
+   /* Allocate array mremote holding remote handle numbers create one
+    * larger in order to avoind 0-indexing */
+   data->mremote=CTA_Malloc((data->nworker+1)*sizeof(int));
+
+   /* Determine the model input name of model class to worker process */
+   *ierr=CTA_Tree_GetHandleStr(tinput,"model", &tmodel);
+   if (*ierr!=CTA_OK) return;
+
+   /* We cannot pack and send whole costa trees.
+      We only support a single string */
+   *ierr=CTA_String_GetLength(tmodel, &lenmodel);
+   if (*ierr==CTA_OK) {
+      modinp=CTAI_String_GetPtr(tmodel);
+   } else {
+      printf("modbuild_par_create_init: input /modelbuild_par/model ");
+      printf("must be a string.\n");
+      printf("packing and sending of COSTA-tree objects is not yet ");
+      printf("supported.\n");
+      exit(-1);
+   }
+      /* Send data to worker (s)*/
+      tag=1234;
+      task="cta_model_create";
+      lentask=strlen(task)+1;
+
+
+      /* Communicate to all workers Note rank 0 is master! */
+      for (rank=1;rank<=data->nworker;rank++){
+         if (IDEBUG>0) printf("#%d Creating model instance on worker %d\n",CTA_PAR_MY_RANK, rank);
+
+         jrank=rank;
+         if (data && data->is_intercomm) jrank--;
+
+         /* Send task */
+
+         MPI_Comm_test_inter(data->comm,&isintercomm);
+
+         MPI_Send("cta_model_create",lentask, MPI_CHAR, jrank,tag,
+                         data->comm);
+
+         /* Send name of model class */
+         lennamcls=strlen(clsnam);
+         MPI_Send((void*) clsnam, lennamcls+1, MPI_CHAR, jrank,tag,
+                         data->comm);
+         /* Send name of input */
+         lenmodel=strlen(modinp);
+         MPI_Send((void*) modinp, lenmodel+1, MPI_CHAR, jrank,tag,
+                         data->comm);
+      }
+      /* Receive local handle if new model-instances */
+      for (rank=1;rank<=data->nworker;rank++){
+
+         jrank=rank;
+         if (data && data->is_intercomm) jrank--;
+
+         MPI_Recv(&(data->mremote[rank]),1,MPI_INT, jrank, 1234,
+                         data->comm,&status);
+         if (IDEBUG>0) printf("#%d Done creating model instance on worker\n",CTA_PAR_MY_RANK);
+      }
+      /* Set rest of data */
+      data->mthis=*this;
+
+      /* Set selections */
+      data->SelectObs=NULL;
+
+      *ierr=CTA_OK;
+
+   CTA_String_Free(&sclsnam);
+   /* Clean-up input tree */
+   if (cleanup==CTA_TRUE){
+      CTA_Tree_Free(&tinput);
+   }
+}
+
+int receive_pack_object( int rank, int tag, BOOL add_length, CTA_Pack hpack, MPI_Comm comm)
+{
+   int retval, lenpack;
+   char* buffer;
+   MPI_Status status;
+
+   if (IDEBUG>0) printf("receive_pack_object: ervoor \n");
+
+   retval=MPI_Recv(&lenpack,1,MPI_INT,rank,tag,comm,&status);
+
+   if (IDEBUG>0) printf("receive_pack_object: lenpack %d\n", lenpack);
+
+   buffer= (char*) CTA_Malloc(lenpack*sizeof(char));
+   retval=MPI_Recv(buffer,lenpack,MPI_CHAR,rank,tag,comm,&status);
+
+   /* Additional add length in pack object */
+   if (add_length) {
+      retval=CTA_Pack_Add(hpack,&lenpack,sizeof(int));
+   }
+
+   /* Add packed model state */
+   retval=CTA_Pack_Add(hpack,buffer,lenpack);
+   free(buffer);
+   return retval;
+}
+
+void modbuild_par_export(
+  CTAI_Modbuild_par *data,
+  CTA_Handle *hpack,    /* export object (e.q. handle to file or pack instance) */
+  int *ierr)
+{
+   int tag;
+   int irank, jrank;
+
+   if (IDEBUG>0) printf("modbuild_par_export\n");
+
+   tag=1234;
+
+   /* Send Task and model handle */
+   modbuild_par_sendtaskandhandle("cta_model_export", ALL_WORKERS, data, tag);
+
+   for (irank=1;irank<=data->nworker;irank++){
+
+      jrank=irank;
+      if (data && data->is_intercomm) jrank--;
+
+      /* Send receiving rank */
+
+      if (IDEBUG>0) printf("modbuild_par_export: send rank of receiving process (%d) to process %d\n",ctai_rank_master, irank);
+      MPI_Send(&ctai_rank_master, 1, MPI_INT, jrank,tag, data->comm);
+   }
+
+   for (irank=1;irank<=data->nworker;irank++){
+
+      jrank=irank;
+      if (data && data->is_intercomm) jrank--;
+
+//      Hmmmmmmm what is the global rank???
+      /* Receive the export (currently only supporting COSTA pack object) */
+      *ierr=receive_pack_object(jrank,tag, TRUE, *hpack, data->comm);
+
+   }
+
+
+
+
+}
+
+void CTA_Modbuild_par_worker_export(){
+
+   MPI_Status status;
+   CTA_Model hmodel;
+   int lentask, retval, tag, rank;
+   CTA_Pack hpack;
+   char *pack;
+
+   if (IDEBUG>0) printf("# %d modbuild_par_worker_export\n",CTA_PAR_MY_RANK);
+
+   tag=1234;
+
+   /* Receive the model handle */
+   retval=MPI_Recv(&hmodel, 1, MPI_INT, ctai_rank_master,tag,CTA_COMM_MASTER_WORKER,&status);
+   if (IDEBUG>0) printf("# %d modbuild_par_worker_export: receiving model handle %d\n",CTA_PAR_MY_RANK, hmodel);
+   /* Receive receiving rank */
+   retval=MPI_Recv(&rank,   1, MPI_INT, ctai_rank_master,tag,CTA_COMM_MASTER_WORKER,&status);
+
+   if (IDEBUG>0) printf("# %d modbuild_par_worker_export: receiving rank %d\n",CTA_PAR_MY_RANK, rank);
+
+   /* Create local pack object */
+   retval  = CTA_Pack_Create(CTA_PACK_DEFAULT_SIZE,&hpack);
+   retval  = CTA_Model_Export(hmodel,hpack);
+   if (retval!=CTA_OK) {
+      printf("#%d modbuild_par_worker_exort: FATAL ERROR in CTA_Model_Export ierr=%d\n",CTA_PAR_MY_RANK, retval);
+      exit(-1);
+   }
+
+   lentask = CTA_Pack_GetLen(hpack);
+   pack    = CTA_Pack_GetPtr(hpack);
+
+   /* Send length of packed state */
+   retval=MPI_Send(&lentask, 1, MPI_INT, rank, tag, CTA_COMM_MASTER_WORKER);
+   /* Send packed state */
+   retval=MPI_Send(pack, lentask, MPI_CHAR, rank, tag, CTA_COMM_MASTER_WORKER);
+   /* Free pack-buffer */
+   retval=CTA_Pack_Free(&hpack);
+}
+
+
+void modbuild_par_import(
+  CTAI_Modbuild_par *data,
+  CTA_Handle *himport,    /* import object (e.q. handle to file or pack instance) */
+  int *ierr)
+{
+
+   int tag, lentask;
+   char *pack;
+   int irank, jrank, ip1, ip2;
+
+
+   if (IDEBUG>0) printf("modbuild_par_import\n");
+
+   tag=1234;
+
+   /* Send Task and model handle */
+   modbuild_par_sendtaskandhandle("cta_model_import", ALL_WORKERS, data, tag);
+   for (irank=1;irank<=data->nworker;irank++){
+      
+      jrank=irank;
+      if (data && data->is_intercomm) jrank--;
+
+      /* Unpack length of packed data for this worker */
+      *ierr=CTA_Pack_Get(*himport, &lentask,  sizeof(int));
+      if (*ierr!=CTA_OK) return;
+
+      /* Get pointer to data */
+      pack    = CTA_Pack_GetPtr(*himport);
+
+      /* Send length of data to worker */
+      *ierr=MPI_Send(&lentask, 1, MPI_INT, jrank,tag,data->comm);
+      if (*ierr!=CTA_OK) return;
+
+      /* Send packed state */
+      *ierr=MPI_Send(pack, lentask, MPI_CHAR, jrank,tag,data->comm);
+      if (*ierr!=CTA_OK) return;
+
+      /* Set pointer in packed data */
+      *ierr=CTA_Pack_GetIndx(*himport, &ip1, &ip2);
+      if (*ierr!=CTA_OK) return;
+      *ierr=CTA_Pack_SetIndx(*himport, ip1+lentask, ip2);
+      if (*ierr!=CTA_OK) return;
+   }
+}
+
+void CTA_Modbuild_par_worker_import()
+{
+   CTA_Pack himport;    /* import object (e.q. handle to file or pack instance)*/
+   CTA_Model hmodel;
+   MPI_Status status;
+   int retval, tag;
+
+   if (IDEBUG>0) printf("#%d modbuild_par_worker_import\n",CTA_PAR_MY_RANK );
+
+   tag=1234;
+
+   /* Receive the model handle */
+   retval = MPI_Recv(&hmodel, 1, MPI_INT, ctai_rank_master,tag,CTA_COMM_MASTER_WORKER,&status);
+
+   retval = CTA_Pack_Create(CTA_PACK_DEFAULT_SIZE,&himport);
+   if (retval!=CTA_OK) {
+      printf("#%d modbuild_par_worker_import: FATAL ERROR in COSTA METHOD ierr=%d\n",CTA_PAR_MY_RANK, retval);
+      exit(-1);
+   }
+
+   retval = receive_pack_object(ctai_rank_master,tag,FALSE,himport,CTA_COMM_MASTER_WORKER);
+   retval = CTA_Model_Import(hmodel,himport);
+   if (retval!=CTA_OK) {
+      printf("#%d modbuild_par_worker_import: FATAL ERROR in CTA_Model_Import ierr=%d\n",CTA_PAR_MY_RANK, retval);
+      exit(-1);
+   }
+   retval = CTA_Pack_Free(&himport);
+}
+
+
+void modbuild_par_free(CTAI_Modbuild_par *data ,int *ierr){
+   int tag;
+
+   tag=1234;
+
+   if (IDEBUG>0) printf("#%d modbuild_par_free\n",CTA_PAR_MY_RANK);
+
+   /* Send Task and model handle */
+   modbuild_par_sendtaskandhandle("cta_model_free", ALL_WORKERS, data, tag);
+
+   *ierr=CTA_OK;
+}
+
+void modbuild_par_compute(
+          CTAI_Modbuild_par *data,
+          CTA_Time *timespan, /* current time, "now" */
+          int *ierr)
+{
+   double span[2];
+   int tag;
+   int irank, jrank;
+
+   if (IDEBUG>0) printf("#%d modbuild_par_compute\n",CTA_PAR_MY_RANK);
+
+   tag=1234;
+
+   /* Send Task and model handle */
+   modbuild_par_sendtaskandhandle("cta_model_compute", ALL_WORKERS, data, tag);
+
+   /* Send timespan */
+   CTA_Time_GetSpan(*timespan, &span[0], &span[1]);
+   for (irank=1;irank<=data->nworker;irank++){
+
+      jrank=irank;
+      if (data && data->is_intercomm) jrank--;
+
+      MPI_Send((void*) span, 2, MPI_DOUBLE,jrank,tag,data->comm);
+   }
+   *ierr=CTA_OK;
+}
+
+
+void modbuild_par_setstate(CTAI_Modbuild_par *data, CTA_TreeVector *state, int *ierr){
+
+   int tag;
+
+   if (IDEBUG>0) printf("#%d modbuild_par_setstate\n",CTA_PAR_MY_RANK);
+   tag=1234;
+
+   /* Send Task and model handle */
+   modbuild_par_sendtaskandhandle("cta_model_setstate", ALL_WORKERS, data, tag);
+
+   /* Send the state */
+   modbuild_par_sendstate(*state, data, 1234);
+
+   *ierr=CTA_OK;
+}
+
+void modbuild_par_getstate(CTAI_Modbuild_par *data, CTA_TreeVector *state, int *ierr){
+
+   int tag;
+
+   if (IDEBUG>0) printf("#%d modbuild_par_getstate\n",CTA_PAR_MY_RANK);
+
+   tag=1234;
+
+   /* Send Task and model handle */
+   modbuild_par_sendtaskandhandle("cta_model_getstate", ALL_WORKERS, data, tag);
+
+   /* Receive the state */
+   modbuild_par_recvstate(state, data, tag);
+
+   *ierr=CTA_OK;
+
+}
+
+void modbuild_par_getstatescaling(CTAI_Modbuild_par *data, CTA_TreeVector *state, int *ierr){
+
+   int tag;
+
+   if (IDEBUG>0) printf("#%d modbuild_par_getstatescaling\n",CTA_PAR_MY_RANK);
+
+   tag=1234;
+
+   /* Send Task and model handle */
+   modbuild_par_sendtaskandhandle("cta_model_getstatescaling", ALL_WORKERS, data, tag);
+
+   /* Receive the state */
+   modbuild_par_recvstate(state, data, tag);
+
+   *ierr=CTA_OK;
+
+}
+
+
+ void modbuild_par_axpymodel(CTAI_Modbuild_par *datay,double *alpha,
+                             CTAI_Modbuild_par *datax, int *ierr)
+ {
+   int tag;
+   int OneWorker;
+   int irank, jrank;
+
+//   printf("AXPYMODEL NOT IMPLEMENTED\n");
+//   exit(-1);
+
+//   if (ctai_comm_size>1) {
+      tag=1234;
+
+      /* Send Task and model handle */
+      modbuild_par_sendtaskandhandle("cta_model_axpymodel", ALL_WORKERS, datay, tag);
+      /* Send alpha */
+      for (irank=1;irank<=datay->nworker;irank++){
+         jrank=irank;
+         if (datay && datay->is_intercomm) jrank--;
+
+         MPI_Send(alpha, 1, MPI_DOUBLE, jrank,tag, datay->comm);
+      }
+
+      /* Check whether the two models are on the same worker */
+      OneWorker=datay->comm==datax->comm;
+
+      /* Send flag  "Oneworker" */
+      for (irank=1;irank<=datay->nworker;irank++){
+         jrank=irank;
+         if (datay && datay->is_intercomm) jrank--;
+
+         MPI_Send(&OneWorker, 1, MPI_INT, jrank,tag, datay->comm);
+      }
+
+
+
+
+
+
+      if (OneWorker) {
+         /* Both models are on the same worker, the worker can handle it by
+          * itself
+          */
+
+         /* Send model handle of x */
+         for (irank=1;irank<=datay->nworker;irank++){
+
+            jrank=irank;
+            if (datay && datay->is_intercomm) jrank--;
+
+            MPI_Send(&datax->mremote[irank], 1, MPI_INT, jrank,tag, datay->comm);
+         }
+     } else {
+        printf("AXPYMODEL NOT FOR MODELS IN DIFFERENT WORKER GROUPS IS NOT (YET) IMPLEMENTED\n");
+        exit(-1);
+
+//        /* Send sending rank */
+//        retval=MPI_Send(&datax->rank, 1, MPI_INT, datay->rank,tag,
+//                        CTA_COMM_WORKER);
+//
+//        /* Ask second worker to export to worker that holds model y */
+//        modbuild_par_sendtaskandhandle("cta_model_export", datax, tag);
+//
+//        /* Send receiving rank */
+//        retval=MPI_Send(&datay->rank, 1, MPI_INT, datax->rank,tag,
+//                        CTA_COMM_WORKER);
+
+     }
+//   } else {
+//     *ierr=CTA_Model_AxpyState(datay->mremote,*alpha,datax->mremote);
+//   }
+
+}
+
+
+void modbuild_par_axpystate(CTAI_Modbuild_par *data, double *alpha,
+                            CTA_TreeVector *statex, int *ierr){
+
+   int tag, irank, jrank;
+
+   if (IDEBUG>0) printf("#%d modbuild_par_axpystate\n",CTA_PAR_MY_RANK);
+
+   tag=1234;
+
+   /* Send Task and model handle */
+   modbuild_par_sendtaskandhandle("cta_model_axpystate", ALL_WORKERS, data, tag);
+
+   /* Send alpha */
+   for (irank=1;irank<=data->nworker;irank++){
+      jrank=irank;
+      if (data && data->is_intercomm) jrank--;
+
+      MPI_Send(alpha, 1, MPI_DOUBLE, jrank, tag,
+                      data->comm);
+   }
+   /* Send the state */
+   modbuild_par_sendstate(*statex, data, 1234);
+
+   *ierr=CTA_OK;
+}
+
+void modbuild_par_setforc(CTAI_Modbuild_par *data, CTA_Time *timespan,
+                          CTA_TreeVector *state, int *ierr){
+
+   int tag, irank, jrank;
+   double span[2];
+
+   tag=1234;
+
+   /* Send Task and model handle */
+   modbuild_par_sendtaskandhandle("cta_model_setstate", ALL_WORKERS, data, tag);
+
+   /* Send timespan */
+   CTA_Time_GetSpan(*timespan, &span[0], &span[1]);
+   for (irank=1;irank<=data->nworker;irank++){
+
+      jrank=irank;
+      if (data && data->is_intercomm) jrank--;
+
+      MPI_Send((void*) span, 2, MPI_DOUBLE, jrank,tag,data->comm);
+   }
+   /* Send the state */
+   modbuild_par_sendstate(*state, data, 1234);
+
+   *ierr=CTA_OK;
+
+}
+
+void modbuild_par_getforc(CTAI_Modbuild_par *data, CTA_Time *timespan,
+                          CTA_TreeVector *state, int *ierr){
+   int tag, irank, jrank;
+   double span[2];
+
+      tag=1234;
+
+      /* Send Task and model handle */
+      modbuild_par_sendtaskandhandle("cta_model_getforc", ALL_WORKERS, data, tag);
+
+      /* Send timespan */
+      CTA_Time_GetSpan(*timespan, &span[0], &span[1]);
+      for (irank=1;irank<=data->nworker;irank++){
+
+         jrank=irank;
+         if (data && data->is_intercomm) jrank--;
+
+         MPI_Send((void*) span, 2, MPI_DOUBLE,jrank,tag,data->comm);
+      }
+
+      /* Receive the state */
+      modbuild_par_recvstate(state, data, 1234);
+
+      *ierr=CTA_OK;
+
+}
+
+void modbuild_par_axpyforc(CTAI_Modbuild_par *data, CTA_Time *timespan,
+                           double *alpha, CTA_TreeVector *statex, int *ierr){
+   int tag, irank, jrank;
+   double span[2];
+
+   tag=1234;
+
+   /* Send Task and model handle */
+   modbuild_par_sendtaskandhandle("cta_model_axpyforc", ALL_WORKERS, data, tag);
+
+   /* Send timespan */
+   CTA_Time_GetSpan(*timespan, &span[0], &span[1]);
+
+   for (irank=1;irank<=data->nworker; irank++){
+
+      jrank=irank;
+      if (data && data->is_intercomm) jrank--;
+
+      MPI_Send((void*) span, 2, MPI_DOUBLE, jrank,tag,data->comm);
+
+      /* Send alpha */
+      MPI_Send(alpha, 1, MPI_DOUBLE, jrank,tag, data->comm);
+   }
+
+   /* Send the state */
+   modbuild_par_sendstate(*statex, data, 1234);
+
+   *ierr=CTA_OK;
+}
+
+void modbuild_par_setparam(CTAI_Modbuild_par *data, CTA_TreeVector *state,
+                           int *ierr){
+   int tag;
+
+   tag=1234;
+
+   /* Send Task and model handle */
+   modbuild_par_sendtaskandhandle("cta_model_setparam", ALL_WORKERS, data, tag);
+
+   /* Send the state */
+   modbuild_par_sendstate(*state, data, 1234);
+
+   *ierr=CTA_OK;
+
+}
+
+void modbuild_par_getparam(CTAI_Modbuild_par *data, CTA_TreeVector *state,
+                           int *ierr){
+   int tag;
+
+   tag=1234;
+
+   /* Send Task and model handle */
+   modbuild_par_sendtaskandhandle("cta_model_getparam", ALL_WORKERS, data, tag);
+
+    /* Create root of state-vector */
+   if (*state==CTA_NULL) CTA_TreeVector_Create("root","root",state);
+
+   /* Receive the state */
+   modbuild_par_recvstate(state, data, 1234);
+
+   *ierr=CTA_OK;
+}
+
+void modbuild_par_axpyparam(CTAI_Modbuild_par *data, double *alpha,
+                            CTA_TreeVector *statex, int *ierr){
+   int tag, irank, jrank;
+
+      tag=1234;
+
+      /* Send Task and model handle */
+      modbuild_par_sendtaskandhandle("cta_model_axpyparam", ALL_WORKERS, data, tag);
+
+      /* Send alpha */
+      for (irank=1;irank<=data->nworker;irank++){
+
+         jrank=irank;
+         if (data && data->is_intercomm) jrank--;
+
+         MPI_Send(alpha, 1, MPI_DOUBLE, jrank,tag, data->comm);
+      }
+
+      /* Send the state */
+      modbuild_par_sendstate(*statex, data, 1234);
+
+      *ierr=CTA_OK;
+}
+
+void modbuild_par_getnoisecount(CTAI_Modbuild_par *data, int* nnoise, int* ierr) {
+
+   MPI_Status status;
+   int tag;
+   int nnoise_sub, irank, jrank;
+
+   *nnoise=0;
+
+   if (IDEBUG>0) printf("#%d modbuild_par_getnoisecount\n",CTA_PAR_MY_RANK);
+
+   tag=1234;
+
+   /* Send Task and model handle */
+   modbuild_par_sendtaskandhandle("cta_model_getnoisecount", ALL_WORKERS, data, tag);
+
+   for (irank=1;irank<=data->nworker;irank++){
+
+      jrank=irank;
+      if (data && data->is_intercomm) jrank--;
+
+
+      /* Receive number of noise parameters */
+      MPI_Recv(&nnoise_sub,1,MPI_INT, jrank, 1234, data->comm,&status);
+      if (irank==1) {
+         *nnoise=nnoise_sub;
+      } else {
+         if (*nnoise!=nnoise_sub) {
+            printf("Error number of noise parameters of models is not the same\n");
+            printf("#%d rank 1 has %d parameters\n", CTA_PAR_MY_RANK, *nnoise);
+            printf("#%d rank %d has %d parameters\n",CTA_PAR_MY_RANK, irank, nnoise_sub);
+            exit(-1);
+         }
+      }
+   }
+
+   if (IDEBUG>0) printf("#%d Number of noise parameters %d\n",CTA_PAR_MY_RANK,*nnoise);
+   *ierr=CTA_OK;
+}
+
+void modbuild_par_getnoisecovar(CTAI_Modbuild_par *data, CTA_TreeVector *colsvar, int* ierr){
+
+
+   MPI_Status status;
+   int tag, nnoise, icol;
+   int irank, jrank;
+
+   if (IDEBUG>0) printf("#%d modbuild_par_getnoisecovar\n",CTA_PAR_MY_RANK);
+
+      tag=1234;
+
+      /* Send Task and model handle */
+      modbuild_par_sendtaskandhandle("cta_model_getnoisecovar", ALL_WORKERS,
+                                     data, tag);
+
+      /* Receive number of noise parameters from all workers
+       * actually one worker is enough but for the time beeing they all send
+       * it */
+      for (irank=1;irank<=data->nworker;irank++){
+
+         jrank=irank;
+         if (data && data->is_intercomm) jrank--;
+
+         /* Receive number of noise parameters */
+         MPI_Recv(&nnoise,1,MPI_INT, jrank, 1234,
+                         data->comm,&status);
+
+      }
+
+      /* Receive the columns of the covariance matrix one-by-one */
+      for (icol=0;icol<nnoise;icol++){
+         /* Create root of state-vector */
+//         printf("#%d icol=%d\n",CTA_PAR_MY_RANK, icol);
+         modbuild_par_recvstate(&colsvar[icol], data, tag);
+//         if (IDEBUG>0) {printf("Reveived column of covar");
+//            retval=CTA_TreeVector_Export(colsvar[icol],CTA_FILE_STDOUT);
+//         }
+      }
+      *ierr=CTA_OK;
+}
+
+void modbuild_par_getobsselect_communicate(CTAI_Modbuild_par *data, int krank, CTA_Time *ttime, CTA_ObsDescr *hdescr,
+                                    CTA_String *sselect, int* ierr){
+
+   MPI_Status status;
+   int tag;
+   char select[1024];
+   CTA_String sselect_sub, sor;
+
+   int irank, jrank;
+   int retval_method;
+   int retval_all;
+   CTA_String_Create(&sselect_sub);
+   CTA_String_Create(&sor);
+   CTA_String_Set(sor," OR ");
+
+   strcpy(select,"( ");
+
+      tag=1234;
+
+      if (IDEBUG>0) printf("#%d modbuild_par_getobsselect: krank=%d\n",CTA_PAR_MY_RANK, krank);
+      if (IDEBUG>0) printf("#%d modbuild_par_getobsselect: ttime=%d\n",CTA_PAR_MY_RANK, *ttime);
+
+      /* Send Task and model handle */
+
+      modbuild_par_sendtaskandhandle("cta_model_getobsselect", krank, data, tag);
+      /* Send time and obsdescr (to all workers) */
+      if (IDEBUG>0) printf("#%d Send timespan to krank=%d\n",CTA_PAR_MY_RANK, krank);
+      modbuild_par_sendobsdescr(data, krank,  *ttime, *hdescr, tag, ierr);
+      if (*ierr!=CTA_OK){
+         printf("ERROR in modbuild_par_getobsselect: modbuild_par_sendobsdescr returned with error %d\n",*ierr);                if (EXITONERROR) exit(-1);
+         return;
+      }
+      retval_all=CTA_OK;
+      for (irank=1;irank<=data->nworker;irank++){
+         if (irank==krank || krank<0){
+
+            jrank=irank;
+            if (data && data->is_intercomm) jrank--;
+
+            /* Receive the error string */
+            MPI_Recv(&retval_method,1,MPI_INT, jrank, 1234, data->comm,&status);
+            if (retval_all==CTA_OK && retval_method!=CTA_OK) retval_all=retval_method;
+
+            /* Receive the select string */
+            MPI_Recv(select+2,1022,MPI_CHAR, jrank, 1234,
+                            data->comm,&status);
+            strcpy(select+strlen(select)," )");
+
+
+            if (IDEBUG>0) printf("#%d modbuild_par_getobsselect: selection rank %d is %s\n",CTA_PAR_MY_RANK,
+                            irank, select);
+
+            if (irank==1 || krank>0){
+               CTA_String_Set(*sselect,select);
+
+            } else {
+               CTA_String_Set(sselect_sub,select);
+               CTA_String_Conc(*sselect,sor);
+               CTA_String_Conc(*sselect,sselect_sub);
+            }
+         }
+         if (IDEBUG>0) printf("#%d modbuild_par_getobsselect: selection string is %s\n",CTA_PAR_MY_RANK, CTAI_String_GetPtr(*sselect));
+         *ierr=retval_all;
+
+         if (IDEBUG>0) printf("#%d end of modbuild_par_getobsselect\n",CTA_PAR_MY_RANK);
+      }
+      CTA_String_Free(&sselect_sub);
+      CTA_String_Free(&sor);
+}
+
+
+void modbuild_par_announceobsvalues(CTAI_Modbuild_par *data, CTA_ObsDescr *hdescr, int* ierr){
+
+   CTA_Time timedum;
+   int retval;
+   int tag;
+   int irank, jrank;
+   CTA_ObsDescr *subObsDescr;
+   CTA_RelTable *reltab;
+   int nobs, nobs_sub;
+
+      CTA_ObsDescr_Observation_Count (*hdescr,&nobs);
+      if (IDEBUG>0) printf("#%d modbuild_par_announceobsvalues; nobs tot:%d \n",CTA_PAR_MY_RANK, nobs);
+
+      tag=1234;
+      *ierr=CTA_OK;
+
+      // Create dummy time
+      CTA_Time_Create(&timedum);
+
+      if (NOOSOBS < 1) {
+      // We need to know what observations the various processes can provide
+      //
+         if (IDEBUG>0) printf("#%d modbuild_par_getobsvalues: asking for getobsselect to processes \n",CTA_PAR_MY_RANK);
+         if (data->nworker>1 && !data->SelectObs){
+           data->SelectObs= (CTA_ObsDescr*) CTA_Malloc((data->nworker+1)*sizeof(CTA_ObsDescr));
+           for (irank=1;irank<=data->nworker;irank++){
+             CTA_String_Create(&(data->SelectObs[irank]));
+        //      printf("calling  modbuild_par_getobsselect_communicate... \n");
+              modbuild_par_getobsselect_communicate(data, irank, &timedum, hdescr, &(data->SelectObs[irank]), &retval);
+           }
+         }
+      }
+      /* Create stochastic observers for different process */
+      subObsDescr= (CTA_ObsDescr*) CTA_Malloc(sizeof(CTA_ObsDescr)*(data->nworker+1));
+
+      if (data->nworker==1){
+         subObsDescr[1]=*hdescr;
+      } else {
+         if (NOOSOBS < 1) {
+
+            reltab=CTA_Malloc(sizeof(CTA_RelTable)*(data->nworker+1));
+            for (irank=1;irank<=data->nworker;irank++){
+               if (IDEBUG>0) printf("#%d Creating Obs selection for process %d\n",CTA_PAR_MY_RANK, irank);
+               if (IDEBUG>0) printf("#%d Creating Obs selection='%s'\n",CTA_PAR_MY_RANK, CTAI_String_GetPtr(data->SelectObs[irank]));
+               CTA_RelTable_Create(&(reltab[irank]));
+               retval=CTA_ObsDescr_CreateSel(*hdescr, data->SelectObs[irank], reltab[irank], &subObsDescr[irank]);
+               if (retval!=CTA_OK) {
+                  printf("#%d Error creating selection for worker %d process error code is %d\n",CTA_PAR_MY_RANK, irank, retval);
+                  exit(-1);
+               }
+
+            }
+         } else {
+             for (irank=1;irank<=data->nworker;irank++){
+               subObsDescr[irank]=*hdescr;
+             }
+            // reltab not defined
+         }
+      }
+
+      /* Send observation descriptions of the various sub processes */
+      for (irank=1;irank<=data->nworker;irank++){
+
+         jrank=irank;
+         if (data && data->is_intercomm) jrank--;
+
+         if (IDEBUG>0) printf("#%d Prepare to send Obsdescr to worker %d\n",CTA_PAR_MY_RANK, irank);
+
+         CTA_ObsDescr_Observation_Count (subObsDescr[irank],&nobs_sub);
+         if (IDEBUG>0) printf("#%d Number of observations in obsdescr for worker %d is %d\n",CTA_PAR_MY_RANK, irank, nobs_sub);
+         if (nobs_sub>0) {
+
+            /* Send Task and model handle */
+            modbuild_par_sendtaskandhandle("cta_model_announceobsvalues", irank, data, tag);
+            if (IDEBUG>0) printf("#%d done sending task and handle \n",CTA_PAR_MY_RANK);
+
+            /* Send time and obsdescr */
+            if (IDEBUG>0) printf("#%d Sending observation description to worker %d\n",CTA_PAR_MY_RANK, irank);
+            modbuild_par_sendobsdescr(data, irank, timedum, subObsDescr[irank], tag, ierr);
+            if (*ierr!=CTA_OK) return;
+
+         }
+      }
+      /* Free obsdescr of sub-domains */
+      if (data->nworker>1){
+         if (NOOSOBS < 1) {
+            for (irank=1;irank<=data->nworker;irank++){
+               CTA_ObsDescr_Free(&(subObsDescr[irank]));
+               CTA_RelTable_Free(&(reltab[irank]));
+            }
+            free(reltab);
+         } else { // do nothing
+
+         }  
+      }
+
+      CTA_Time_Free(&timedum);
+      free(subObsDescr);
+      *ierr=CTA_OK;
+}
+
+
+
+void modbuild_par_getobsvalues(CTAI_Modbuild_par *data, CTA_Time *ttime,
+                               CTA_ObsDescr *hdescr, CTA_Vector *vval,
+                               int* ierr){
+
+   int retval;
+   int tag;
+   int lenpack;
+   int irank, jrank;
+   CTA_Pack hpack;
+   char *pack;
+   MPI_Status status;
+   CTA_ObsDescr *subObsDescr;
+   CTA_RelTable *reltab;
+   CTA_Vector vsub;
+   int nobs, nobs_sub;
+   double dmisval;
+   CTA_Datatype datatype;
+   int size_type;
+   double value;
+   int iobs;
+
+   dmisval = -1E-20;
+
+
+
+      CTA_ObsDescr_Observation_Count (*hdescr,&nobs);
+      CTA_Vector_SetConstant(*vval,&dmisval,CTA_DOUBLE);
+      if (IDEBUG>0) printf("#%d modbuild_par_getobsvalues; nobs tot:%d \n",CTA_PAR_MY_RANK, nobs);
+
+      tag=1234;
+      *ierr=CTA_OK;
+
+      if (NOOSOBS < 1) {
+      // We need to know what observations the various processes can provide
+      //
+         if (IDEBUG>0) printf("#%d modbuild_par_getobsvalues: asking for getobsselect to processes \n",CTA_PAR_MY_RANK);
+         if (data->nworker>1 && !data->SelectObs){
+           data->SelectObs=CTA_Malloc((data->nworker+1)*sizeof(CTA_ObsDescr));
+           for (irank=1;irank<=data->nworker;irank++){
+             CTA_String_Create(&(data->SelectObs[irank]));
+        //      printf("calling  modbuild_par_getobsselect_communicate... \n");
+              modbuild_par_getobsselect_communicate(data, irank, ttime, hdescr, &(data->SelectObs[irank]), &retval);
+           }
+         }
+      }
+      /* Create stochastic observers for different process */
+      subObsDescr=CTA_Malloc(sizeof(CTA_ObsDescr)*(data->nworker+1));
+
+      if (data->nworker==1){
+         subObsDescr[1]=*hdescr;
+      } else {
+         if (NOOSOBS < 1) {
+
+            reltab=CTA_Malloc(sizeof(CTA_RelTable)*(data->nworker+1));
+            for (irank=1;irank<=data->nworker;irank++){
+               if (IDEBUG>0) printf("#%d Creating Obs selection for process %d\n",CTA_PAR_MY_RANK, irank);
+               if (IDEBUG>0) printf("#%d Creating Obs selection='%s'\n",CTA_PAR_MY_RANK, CTAI_String_GetPtr(data->SelectObs[irank]));
+               CTA_RelTable_Create(&(reltab[irank]));
+               retval=CTA_ObsDescr_CreateSel(*hdescr, data->SelectObs[irank], reltab[irank], &subObsDescr[irank]);
+               if (retval!=CTA_OK) {
+                  printf("#%d Error creating selection for worker %d process error code is %d\n",CTA_PAR_MY_RANK, irank, retval);
+                  exit(-1);
+               }
+
+            }
+         } else {
+             for (irank=1;irank<=data->nworker;irank++){
+               subObsDescr[irank]=*hdescr;
+             }
+            // reltab not defined
+         }
+      }
+
+      /* Send observation descriptions of the various sub processes */
+      for (irank=1;irank<=data->nworker;irank++){
+
+         jrank=irank;
+         if (data && data->is_intercomm) jrank--;
+
+         if (IDEBUG>0) printf("#%d Prepare to send Obsdescr to worker %d\n",CTA_PAR_MY_RANK, irank);
+
+         CTA_ObsDescr_Observation_Count (subObsDescr[irank],&nobs_sub);
+         if (IDEBUG>0) printf("#%d Number of observations in obsdescr for worker %d is %d\n",CTA_PAR_MY_RANK, irank, nobs_sub);
+         if (nobs_sub>0) {
+
+            /* Send Task and model handle */
+            modbuild_par_sendtaskandhandle("cta_model_getobsvalues", irank, data, tag);
+            if (IDEBUG>0) printf("#%d done sending task and handle \n",CTA_PAR_MY_RANK);
+
+            /* create vector for individual processes */
+            if (data->nworker>1) {
+               if (NOOSOBS < 1) {  // probably unneccessary since nobs_sub = nobs;
+                  CTA_Vector_Create(CTA_DEFAULT_VECTOR,nobs_sub,CTA_DOUBLE,CTA_NULL,&vsub);
+               } else {
+                  CTA_Vector_Create(CTA_DEFAULT_VECTOR,nobs,CTA_DOUBLE,CTA_NULL,&vsub);
+               } 
+            } else {
+               vsub=*vval;
+            }
+
+            /* Send time and obsdescr */
+            if (IDEBUG>0) printf("#%d Sending observation description to worker %d\n",CTA_PAR_MY_RANK, irank);
+            modbuild_par_sendobsdescr(data, irank, *ttime, subObsDescr[irank], tag, ierr);
+            if (*ierr!=CTA_OK) return;
+
+            /* Receive vector */
+            if (IDEBUG>0) printf("#%d Waiting for a vector form worker %d\n",CTA_PAR_MY_RANK, irank);
+            retval=MPI_Recv(&lenpack, 1, MPI_INT, jrank,tag,
+                         data->comm, &status);
+            if (IDEBUG>0) printf("lenpack %d\n", lenpack);
+            retval=CTA_Pack_Create(lenpack,&hpack);
+            pack  =CTA_Pack_GetPtr(hpack);
+
+            retval=MPI_Recv(pack, lenpack, MPI_CHAR, jrank,tag,
+                         data->comm, &status);
+            *ierr=CTA_Pack_AddCnt(hpack,lenpack);
+
+            if (IDEBUG>0) printf("#%d Received vector from worker %d\n",CTA_PAR_MY_RANK, irank);
+            if (*ierr!=CTA_OK) return;
+
+            if (IDEBUG>0) printf("modbuild_par_getobsvalues: receiving vectors done\n");
+            *ierr=CTA_Vector_Import(vsub,hpack);
+            if (*ierr!=CTA_OK) return;
+
+            /* Free workvars */
+            *ierr=CTA_Pack_Free(&hpack);
+            if (*ierr!=CTA_OK) return;
+
+            /* if noosobs, then vsub has the same size as vval (all observations) but contains MISVALS.
+               These MISVALS have to be filtered out. */
+
+            /* Copy observations of domain to global vector */
+            if (data->nworker>1) {
+               if (NOOSOBS < 1) {
+                  retval=CTA_RelTable_ApplyInv(reltab[irank], vsub, *vval);
+                  if (*ierr!=CTA_OK) {
+                     printf("modbuild_par_getobsvalues ERROR in CTA_RelTable_ApplyInv\n");
+                     return;
+                  }
+               } else {
+
+                  *ierr=CTA_Vector_GetDatatype(vsub,&datatype);
+                  *ierr=CTA_SizeOf(datatype,&size_type);
+                 
+                  for ( iobs = 0; iobs < nobs; iobs++) {
+                    *ierr = CTA_Vector_GetVal(vsub,iobs+1, &value,datatype);
+                     if (  value > MISVAL+1.0) {
+                     *ierr = CTA_Vector_SetVal(*vval, iobs+1, &value,datatype);
+                     if (*ierr!=CTA_OK) {
+                        printf("modbuild_par_getobsvalues ERROR %d in filling vvals from vsub\n",*ierr);
+                        return;
+                        } 
+                     }
+                  }
+             
+               }
+
+               CTA_Vector_Free(&vsub);
+            }
+         }
+         else {
+            if (IDEBUG>0) printf("#%d No need to ask observations since there are none \n",CTA_PAR_MY_RANK);
+         }
+      }
+      /* Free obsdescr of sub-domains */
+      if (data->nworker>1){
+         if (NOOSOBS < 1) {
+            for (irank=1;irank<=data->nworker;irank++){
+               CTA_ObsDescr_Free(&(subObsDescr[irank]));
+               CTA_RelTable_Free(&(reltab[irank]));
+            }
+            free(reltab);
+         } else { // do nothing
+
+         }  
+      }
+      free(subObsDescr);
+       //  printf("@@ getobsvalues end; final array of predictions:\n");
+       //CTA_Vector_Export(*vval, CTA_FILE_STDOUT);
+
+      *ierr=CTA_OK;
+}
+
+
+void modbuild_par_getobslocalization(CTAI_Modbuild_par *data, CTA_ObsDescr *hdescr, 
+                   double *distance, CTA_Vector *locVecs, int* ierr) {
+   int minone=-1;
+   modbuild_par_getobslocalizationdomain(data, hdescr, distance, &minone, locVecs, ierr);
+
+}
+
+
+void modbuild_par_getobsselect(CTAI_Modbuild_par *data, CTA_Time *ttime, CTA_ObsDescr *hdescr,
+                                CTA_String *sselect, int* ierr){
+
+   CTA_ObsDescr hdescr_null=CTA_NULL;
+
+   if (IDEBUG>0) printf("#%d modbuild_par_getobsselect hdescr=%d\n",CTA_PAR_MY_RANK, *hdescr);
+
+   /* First try do not send the observation description */
+   modbuild_par_getobsselect_communicate(data, ALL_WORKERS, ttime, &hdescr_null, sselect, ierr);
+   if (ierr!=CTA_OK) {
+      /* Second try and send the observation description */
+      modbuild_par_getobsselect_communicate(data, ALL_WORKERS, ttime, hdescr, sselect, ierr);
+   }
+   if (IDEBUG>0) printf("#%d END if modbuild_par_getobsselect ierr=%d\n",CTA_PAR_MY_RANK, *ierr);
+}
+
+
+void modbuild_par_gettime(CTAI_Modbuild_par *data, CTA_Time *timespan, char* task, int* ierr){
+
+  double span[2];
+  int tag;
+  int irank, jrank;
+  MPI_Status status;
+
+  tag=1234;
+
+  /* Send Task and model handle */
+  modbuild_par_sendtaskandhandle(task, ALL_WORKERS, data, tag);
+
+  /* Send timespan */
+  for (irank=1;irank<=data->nworker;irank++){
+
+     jrank=irank;
+     if (data && data->is_intercomm) jrank--;
+
+     MPI_Recv(span, 2, MPI_DOUBLE, jrank,1234, data->comm, &status);
+     CTA_Time_SetSpan(*timespan, span[0], span[1]);
+  }
+
+  *ierr=CTA_OK;
+}
+
+
+void modbuild_par_getcurrenttime(CTAI_Modbuild_par *data, CTA_Time *timespan, int* ierr){
+
+  if (IDEBUG>0) printf("#%d modbuild_par_getcurrenttime\n",CTA_PAR_MY_RANK);
+  modbuild_par_gettime(data, timespan, "cta_model_getcurrenttime", ierr);
+
+}
+
+void modbuild_par_gettimehorizon(CTAI_Modbuild_par *data, CTA_Time *timespan, int* ierr){
+
+  if (IDEBUG>0) printf("#%d modbuild_par_gettimehorizon\n",CTA_PAR_MY_RANK);
+  modbuild_par_gettime(data, timespan, "cta_model_gettimehorizon", ierr);
+
+}
+
+
+void modbuild_par_addnoise(CTAI_Modbuild_par *data, CTA_Time *timespan, int* ierr){
+
+  double span[2];
+  int tag;
+  int irank, jrank;
+
+  if (IDEBUG>0) printf("#%d modbuild_par_addnoise\n",CTA_PAR_MY_RANK);
+
+  tag=1234;
+
+  /* Send Task and model handle */
+  modbuild_par_sendtaskandhandle("cta_model_addnoise", ALL_WORKERS, data, tag);
+
+  /* Send timespan */
+  CTA_Time_GetSpan(*timespan, &span[0], &span[1]);
+  for (irank=1;irank<=data->nworker;irank++){
+     jrank=irank;
+     if (data && data->is_intercomm) jrank--;
+
+     MPI_Send((void*) span, 2, MPI_DOUBLE,jrank,tag,data->comm);
+  }
+
+  *ierr=CTA_OK;
+}
+
+void modbuild_par_savepersistentstate(CTAI_Modbuild_par *data, CTA_String *filename, CTA_String *instanceID, int *ierr){
+   MPI_Status status;
+   CTA_Pack hpack;
+   int irank, jrank;
+   int tag, lenpack, dumval;
+   char str1[256], str2[256],fname[256];
+   char *pack;
+
+   if (IDEBUG>0) printf("#%d modbuild_par_savepersistentstate\n",CTA_PAR_MY_RANK);
+
+   tag=1234;
+   /* Send Task and model handle */
+   modbuild_par_sendtaskandhandle("cta_model_savepersistentstate", ALL_WORKERS, data, tag);
+
+   /* Create pack object for instance ID */
+   CTA_Pack_Create(CTA_PACK_DEFAULT_SIZE,&hpack);
+   CTA_String_Export(*instanceID,hpack);
+   tag=7321;
+   for (irank=1;irank<=data->nworker;irank++){
+      jrank=irank;
+      if (data && data->is_intercomm) jrank--;
+
+      /*Send packed instanceID to each worker process */
+      lenpack= CTA_Pack_GetLen(hpack);
+      pack=CTA_Pack_GetPtr(hpack);
+      MPI_Send(&lenpack, 1, MPI_INT, jrank, tag,
+                      data->comm);
+      MPI_Send(pack, lenpack, MPI_CHAR, jrank , tag,
+                      data->comm);
+   }
+   /* Free packed object */
+   CTA_Pack_Free(&hpack);
+
+   /* Check extension NetCDF and temporarily remove it */
+   CTA_String_Get(*filename,str1);
+   if (strlen(str1)>3){
+     if (0==strcmp( ".nc",(str1+strlen(str1)-3))){
+        str1[strlen(str1)-3]='\0';
+      } else {
+         printf("#%d Error: only export to NetCDF possible '%s'\n",
+               CTA_PAR_MY_RANK, str1);
+         exit(-1);
+      }
+   }
+   tag=3217;
+   for (irank=1;irank<=data->nworker;irank++){
+      jrank=irank;
+      if (data && data->is_intercomm) jrank--;
+
+      /*Add rank of worker process to string containing filename*/
+      strcpy(fname,str1);
+      sprintf(str2,"_part%04d",jrank);
+      strcat(fname,str2);
+      strcat(fname,".nc");
+      if (IDEBUG>0) printf("send filename '%s' to process #%d \n",fname, jrank);
+      CTA_String_Set(*filename,fname);
+
+      /* send packed filename to each worker process */
+      CTA_Pack_Create(CTA_PACK_DEFAULT_SIZE,&hpack);
+      CTA_String_Export(*filename,hpack);
+
+      lenpack= CTA_Pack_GetLen(hpack);
+      pack=CTA_Pack_GetPtr(hpack);
+      MPI_Send(&lenpack, 1, MPI_INT, jrank, tag,
+                      data->comm);
+      MPI_Send(pack, lenpack, MPI_CHAR, jrank , tag,
+                      data->comm);
+      CTA_Pack_Free(&hpack);
+   }
+
+   /* Receive CTA_OK to prevent the master from collecting zip files
+      before they are written */
+   tag = 2173;
+   for (irank=1;irank<=data->nworker;irank++){
+      jrank=irank;
+      if (data && data->is_intercomm) jrank--;
+
+      MPI_Recv(&dumval, 1, MPI_INT, jrank, tag, data->comm, &status);
+   }
+   *ierr=CTA_OK;
+}
+
+void modbuild_par_loadpersistentstate(CTAI_Modbuild_par *data, CTA_String *filename, CTA_String *instanceID, int *ierr){
+
+   MPI_Status status;
+   CTA_Pack hpack;
+   CTA_String hID;
+   int irank, jrank;
+   int tag, lenpack;
+   char str1[256], str2[256],fname[256];
+   char *pack;
+
+   if (IDEBUG>0) printf("#%d modbuild_par_loadpersistentstate\n",CTA_PAR_MY_RANK);
+
+   tag=1234;
+
+   /* Send Task and model handle */
+   modbuild_par_sendtaskandhandle("cta_model_loadpersistentstate", ALL_WORKERS, data, tag);
+
+   /* Check extension NetCDF and temporarily remove it */
+   CTA_String_Get(*filename,str1);
+   if (strlen(str1)>3){
+     if (0==strcmp( ".nc",(str1+strlen(str1)-3))){
+        str1[strlen(str1)-3]='\0';
+      } else {
+         printf("#%d Error: only export to NetCDF possible '%s'\n",
+               CTA_PAR_MY_RANK, str1);
+         exit(-1);
+      }
+   }
+   tag=3218;
+   for (irank=1;irank<=data->nworker;irank++){
+      jrank=irank;
+      if (data && data->is_intercomm) jrank--;
+
+      /*Add rank of worker process to string containing filename*/
+      strcpy(fname,str1);
+      sprintf(str2,"_part%04d",jrank);
+      strcat(fname,str2);
+      strcat(fname,".nc");
+      if (IDEBUG>0) printf("send filename '%s' to process #%d \n",fname, jrank);
+      CTA_String_Set(*filename,fname);
+
+      /* send packed filename to each worker process */
+      CTA_Pack_Create(CTA_PACK_DEFAULT_SIZE,&hpack);
+      CTA_String_Export(*filename,hpack);
+
+      lenpack= CTA_Pack_GetLen(hpack);
+      pack=CTA_Pack_GetPtr(hpack);
+      MPI_Send(&lenpack, 1, MPI_INT, jrank, tag,
+                      data->comm);
+      MPI_Send(pack, lenpack, MPI_CHAR, jrank , tag,
+                      data->comm);
+      CTA_Pack_Free(&hpack);
+   }
+
+   /* Receive instanceID from each worker process */
+   CTA_String_Create(&hID);
+   tag=8321;
+   for (irank=1;irank<=data->nworker;irank++){
+      jrank=irank;
+      if (data && data->is_intercomm) jrank--;
+
+      /* Receive string: TODO: teach CTA_Pack this simple and repetitive task:
+         CTA_Pack hpack = CTA_Pack_MPI_Receive(jrank,tag,comm);
+      */
+      MPI_Recv(&lenpack, 1, MPI_INT, jrank,tag, data->comm, &status);
+      CTA_Pack_Create(lenpack,&hpack);
+      pack  =CTA_Pack_GetPtr(hpack);
+      MPI_Recv(pack, lenpack, MPI_CHAR, jrank, tag, data->comm, &status);
+      CTA_Pack_AddCnt(hpack,lenpack);
+
+      /* Get string from pack object*/
+      CTA_String_Import(hID,hpack);
+      CTA_Pack_Free(&hpack);
+
+      if (irank==1) {
+         *instanceID=hID;
+      }
+      if (IDEBUG>0) {
+         char *tmpstr = CTAI_String_GetPtr(hID);
+         printf("# %d LPS: received instanceID '%s'\n",CTA_PAR_MY_RANK, tmpstr);
+      }
+   }
+   *ierr=CTA_OK;
+}
+
+void modbuild_par_saveinternalstate(CTAI_Modbuild_par *data, CTA_String *instanceID, int *ierr){
+
+  MPI_Status status;
+  CTA_String hID;
+  CTA_Pack hpack;
+  int tag, lenpack;
+  int irank, jrank;
+  char *pack;
+
+  if (IDEBUG>0) printf("#%d modbuild_par_saveinternalstate\n",CTA_PAR_MY_RANK);
+
+  tag=1234;
+  /* Send Task and model handle */
+  modbuild_par_sendtaskandhandle("cta_model_saveinternalstate", ALL_WORKERS, data, tag);
+
+  /* Receive instanceID from each worker process */
+  CTA_String_Create(&hID);
+  tag=6321;
+  for (irank=1;irank<=data->nworker;irank++){
+     jrank=irank;
+     if (data && data->is_intercomm) jrank--;
+
+     /* Receive string: TODO: teach CTA_Pack this simple and repetitive task:
+        CTA_Pack hpack = CTA_Pack_MPI_Receive(jrank,tag,comm);
+     */
+     MPI_Recv(&lenpack, 1, MPI_INT, jrank,tag, data->comm, &status);
+     CTA_Pack_Create(lenpack,&hpack);
+     pack  =CTA_Pack_GetPtr(hpack);
+     MPI_Recv(pack, lenpack, MPI_CHAR, jrank, tag, data->comm, &status);
+     CTA_Pack_AddCnt(hpack,lenpack);
+
+     /* Get string from pack object*/
+     CTA_String_Import(hID,hpack);
+     CTA_Pack_Free(&hpack);
+
+     if (irank==1) {
+        *instanceID=hID;
+     }
+     if (IDEBUG>0) {
+        char *tmpstr = CTAI_String_GetPtr(hID);
+        printf("# %d SIS: received instanceID '%s'\n",CTA_PAR_MY_RANK, tmpstr);
+     }
+  }
+  *ierr=CTA_OK;
+}
+
+void modbuild_par_restoreinternalstate(CTAI_Modbuild_par *data, CTA_String *instanceID, int *ierr){
+
+   int irank, jrank;
+   int tag, lenpack;
+   CTA_Pack hpack;
+   char *pack;
+
+  if (IDEBUG>0) printf("#%d modbuild_par_restoreinternalstate\n",CTA_PAR_MY_RANK);
+
+  tag=1234;
+  /* Send Task and model handle */
+  modbuild_par_sendtaskandhandle("cta_model_restoreinternalstate", ALL_WORKERS, data, tag);
+
+   tag=5321;
+   /* Create pack object for instance ID */
+   CTA_Pack_Create(CTA_PACK_DEFAULT_SIZE,&hpack);
+   CTA_String_Export(*instanceID,hpack);
+
+   for (irank=1;irank<=data->nworker;irank++){
+      jrank=irank;
+      if (data && data->is_intercomm) jrank--;
+
+      /*Send packed instanceID to each worker process */
+      lenpack= CTA_Pack_GetLen(hpack);
+      pack=CTA_Pack_GetPtr(hpack);
+      MPI_Send(&lenpack, 1, MPI_INT, jrank, tag,
+                      data->comm);
+      MPI_Send(pack, lenpack, MPI_CHAR, jrank , tag,
+                      data->comm);
+   }
+   /* Free local objects */
+   CTA_Pack_Free(&hpack);
+
+   *ierr=CTA_OK;
+}
+
+void modbuild_par_releaseinternalstate(CTAI_Modbuild_par *data, CTA_String *instanceID, int *ierr){
+
+   int irank, jrank;
+   int tag, lenpack;
+   CTA_Pack hpack;
+   char *pack;
+
+   if (IDEBUG>0) printf("#%d modbuild_par_releaseinternalstate\n",CTA_PAR_MY_RANK);
+
+   tag=1234;
+   /* Send Task and model handle */
+   modbuild_par_sendtaskandhandle("cta_model_releaseinternalstate", ALL_WORKERS, data, tag);
+
+   tag=9321;
+   /* Create pack object for instance ID */
+   CTA_Pack_Create(CTA_PACK_DEFAULT_SIZE,&hpack);
+   CTA_String_Export(*instanceID,hpack);
+
+   for (irank=1;irank<=data->nworker;irank++){
+      jrank=irank;
+      if (data && data->is_intercomm) jrank--;
+
+      /*Send packed instanceID to each worker process */
+      lenpack= CTA_Pack_GetLen(hpack);
+      pack=CTA_Pack_GetPtr(hpack);
+      MPI_Send(&lenpack, 1, MPI_INT, jrank, tag,
+                      data->comm);
+      MPI_Send(pack, lenpack, MPI_CHAR, jrank , tag,
+                      data->comm);
+   }
+   /* Free local objects */
+   CTA_Pack_Free(&hpack);
+
+  *ierr=CTA_OK;
+}
+
+/** \brief Get the number of domains for local analysis
+ *  
+ * \param data     IO instance data block 
+ * \param distance I  characteristic distance
+ * \param ndomains O  number of domains
+ * \param ierr     O  error status: CTA_OK if successful
+ */
+void modbuild_par_getnumdomains(CTAI_Modbuild_par *data, double *distance, int *ndomains, int *ierr){
+}
+
+
+/** \brief Get selection of observations that are relevnet for assimilation in the given domain
+ *  
+ * \param data      IO instance data block 
+ * \param hdescr    I  observation description of all observations
+ * \param distance  I  characteristic distance
+ * \param idomain   I  domain number
+ * \param selection O  costa vector with the indices of the relevant observations (0 based)
+ * \param ierr     O  error status: CTA_OK if successful
+ */
+void modbuild_par_getobsselector(CTAI_Modbuild_par *data, 
+                   CTA_ObsDescr *hdescr, double *distance, int *idomain, CTA_Vector *selection, int *ierr){
+}
+
+
+
+
+
+/** \brief Get for each observation a localization scaling vector for single domain
+ *  
+ * \param data     IO instance data block 
+ * \param hdescr   I  observation description for which we want localization scaling vectors
+ * \param distance I  characteristic distance
+ * \param idomain  I  domain number
+ * \param locVecs  O  costa vector of handles to treevectors (scaling vectors). The treevectors
+ *                    are created when the indices are CTA_NULL on entry
+ * \param ierr     O  error status: CTA_OK if successful
+ */
+void modbuild_par_getobslocalizationdomain(CTAI_Modbuild_par *data,
+                   CTA_ObsDescr *hdescr, double *distance, int *idomain, CTA_Vector *locVecs, int *ierr){
+
+   //NB  locvecs (size: nobs) is vector containing handles
+
+   int tag;
+   int lenpack;
+   int irank, jrank, lentask;
+   CTA_Pack hpack;
+   CTA_Time ttime;
+   char *pack;
+   CTA_ObsDescr *subObsDescr;
+   int nobs, iobs; 
+   CTA_TreeVector *locstate;
+   int locIDEBUG = 0;
+
+      if (locIDEBUG>0) { printf("modbuild_par_getobslocalization START\n");}
+
+      tag = 1234;
+
+      CTA_ObsDescr_Observation_Count (*hdescr,&nobs);
+
+      if (nobs>0){
+         /* Create stochastic observers for different process */
+         subObsDescr=CTA_Malloc(sizeof(CTA_ObsDescr)*(data->nworker+1));
+
+         if (data->nworker==1){
+            subObsDescr[1]=*hdescr;
+         } else {
+                for (irank=1;irank<=data->nworker;irank++){
+                  subObsDescr[irank]=*hdescr;
+                }
+         }
+
+         /* Pack the vector containing handles*/
+         lenpack=nobs*sizeof(int)+100;
+         CTA_Pack_Create(lenpack,&hpack);
+         CTA_Vector_Export(*locVecs, hpack);
+
+         /* Send vector to workers */
+         lentask= CTA_Pack_GetLen(hpack);
+         pack   = CTA_Pack_GetPtr(hpack);
+
+         /* Send observation descriptions of the various sub processes */
+         for (irank=1;irank<=data->nworker;irank++){
+
+            jrank=irank;
+            if (data && data->is_intercomm) jrank--;
+
+            if (locIDEBUG>0) printf("#%d Prepare to send Obsdescr to worker %d\n",CTA_PAR_MY_RANK, irank);
+
+            /* Send Task and model handle */
+            modbuild_par_sendtaskandhandle("cta_model_getobslocalization", irank, data, tag);
+            if (locIDEBUG>0) printf("#%d done sending task and handle \n",CTA_PAR_MY_RANK);
+
+            /* Send time and obsdescr */
+            /* note: time is not needed but we want to use the  sendobsdescr function */
+            CTA_Time_Create(&ttime);
+            CTA_Time_SetSpan(ttime, 0.0,0.0);
+           
+            if (locIDEBUG>0) printf("#%d Sending observation description to worker %d\n",CTA_PAR_MY_RANK, irank);
+            modbuild_par_sendobsdescr(data, irank, ttime, subObsDescr[irank], tag, ierr);
+            if (*ierr!=CTA_OK) return;
+
+            CTA_Time_Free(&ttime);
+
+            /* send characteristic distance */  
+            MPI_Send(distance, 1, MPI_DOUBLE, jrank, tag,
+                      data->comm);
+
+            /* send domain number */
+            MPI_Send(distance, 1, MPI_INT, jrank, tag,
+                      data->comm);
+
+            /* Send length of packed vector */
+            if (locIDEBUG>0) printf("#%d Send vector to worker (lentask)\n", irank);
+            MPI_Send(&lentask, 1, MPI_INT, jrank, tag,
+                      data->comm);
+
+            /* Send packed vector */
+            MPI_Send(pack, lentask, MPI_CHAR, jrank, tag,
+                      data->comm);
+            if (locIDEBUG>0) printf("#%d Done sending vector\n",CTA_PAR_MY_RANK);
+
+         }     
+
+         locstate=CTA_Malloc(sizeof(CTA_TreeVector) *nobs);
+    
+         /* receive for each observation the localization state from all workers */        
+         for ( iobs = 0; iobs < nobs; iobs++) {
+            if (locIDEBUG>0) printf("modbuild_par_getobslocalization: receiving state of observation %d \n",iobs);
+            locstate[iobs] = CTA_NULL; 
+            modbuild_par_recvstate(&(locstate[iobs]), data, 1234);
+
+            /* put the handles in the cta-vector */
+            CTA_Vector_SetVal(*locVecs,iobs+1,&(locstate[iobs]),CTA_HANDLE);  
+         }
+
+           
+         if (locIDEBUG>0) {printf("modbuild_par_getobslocalization: returning vector of handles:  \n");
+                      CTA_Vector_Export(*locVecs, CTA_FILE_STDOUT);
+                     }      
+
+         /* Free work variables/objects */
+         CTA_Pack_Free(&hpack);
+         free(subObsDescr);
+         free(locstate);
+   }
+
+
+
+
+
+}
+
+
+/** \brief Get a copy of the internal state.
+ *
+ * \note Optionally a tree-vector is created. In that case the caller of this
+ * method is responsible for freeing that tree-vector. The input state must be compatible
+ * (same size and or composition) as the models internal state.
+ * \note If *hstate == CTA_NULL a new object is created, user is responsible for freeing this object.
+ *
+ * \param data     IO instance data block 
+ * \param idomain  I  domain number
+ * \param hstate   IO receives state of the model, *hstate can be CTA_NULL on calling (see note)
+ * \param ierr     O  error status: CTA_OK if successful
+ */
+void modbuild_par_getstatedomain(CTAI_Modbuild_par *data, int *idomain, CTA_TreeVector *hstate, int *ierr){
+}
+
+/** \brief Perform axpy operation on the internal state for a single domain 
+ *
+ * \note AXPY: y=alpha*x+y. y corresponds to the models
+ *       internal state and x can be a state vector or a model
+ 
+ * \param data     IO instance data block 
+ * \param alpha    I  alpha
+ * \param idomain  I  domain number
+ * \param hx       I  handle of x (state or model)
+ * \param ierr     O  error status: CTA_OK if successful
+ */
+void modbuild_par_model_axpystatedomain(CTAI_Modbuild_par *data, double *alpha, int *idomain, CTA_Handle *hx, int *ierr){
+}
+
+
+
+
+
+
+
+
+
+
+void CTA_Modbuild_par_worker_create(){
+   MPI_Status status;
+   int retval;
+   CTA_Model hmodel;
+   /* cta_model_create */
+   char clsnam[256], modinp[256];
+   CTA_String sclsnam, smodinp;
+   CTA_ModelClass hmodcl;
+
+   /* Create some work-variables */
+   CTA_String_Create(&sclsnam);
+   CTA_String_Create(&smodinp);
+   /* Receive the name of the modelclass */
+   retval=MPI_Recv(clsnam,256,MPI_CHAR, ctai_rank_master, 1234,
+                   CTA_COMM_MASTER_WORKER,&status);
+   CTA_String_Set(sclsnam,clsnam);
+   if (IDEBUG>0) printf("#%d name of model-class is '%s'\n",CTA_PAR_MY_RANK,clsnam);
+   /* Receive the name of the models input-file */
+   retval=MPI_Recv(modinp,256,MPI_CHAR, ctai_rank_master, 1234,
+                   CTA_COMM_MASTER_WORKER,&status);
+   CTA_String_Set(smodinp,modinp);
+   if (IDEBUG>0) printf("#%d model-input is '%s'\n",CTA_PAR_MY_RANK, modinp);
+
+   /* Find the handle of the modelclass */
+   retval=CTA_Handle_Find(sclsnam, CTA_MODELCLASS, &hmodcl);
+   if (retval!=CTA_OK) {
+      printf("#%d Cannot find model-class with name '%s'\n",
+              CTA_PAR_MY_RANK, CTAI_String_GetPtr(sclsnam));
+      exit(-1);
+   }
+   /* Create the model */
+   if (IDEBUG>0) printf("#%d Calling model-create for modell class %d\n",CTA_PAR_MY_RANK, hmodcl);
+
+   retval=CTA_Model_Create(hmodcl,smodinp,&hmodel);
+   if (retval!=CTA_OK) {
+      printf("#%d CTA_Modbuild_par_worker:Error creating model instance ",CTA_PAR_MY_RANK);
+      printf("ierr='%d'\n",retval);
+      exit(-1);
+   }
+
+   /* add new model to list */
+   modbuild_models_add(hmodel);
+
+
+   /* Do we need to create our sratch model? */
+   if (hmodel_scratch==CTA_NULL) {
+      if (IDEBUG>0){ printf("WARNING NOT CREATING SCRATCH MODEL\n"); }
+//      retval=CTA_Model_Create(hmodcl,smodinp,&hmodel_scratch);
+//      if (retval!=CTA_OK) {
+//         printf("#%d Failed to create scratch model\n",CTA_PAR_MY_RANK);
+//         printf("#%d CTA_Modbuild_par_worker:Error creating model instance ",CTA_PAR_MY_RANK);
+//         printf("ierr='%d'\n",retval);
+//         exit(-1);
+//      }
+   }
+
+
+   /* Send the handle of new model instance to master */
+   retval=MPI_Send(&hmodel,1, MPI_INT,ctai_rank_master,1234,CTA_COMM_MASTER_WORKER);
+
+   /* Free work variables */
+   CTA_String_Free(&sclsnam);
+   CTA_String_Free(&smodinp);
+}
+
+void CTA_Modbuild_par_worker_compute(){
+   MPI_Status status;
+   CTA_Time timespan;
+   CTA_Model hmodel;
+   double span[2];
+
+
+   if (IDEBUG>0) printf("#%d start CTA_Modbuild_par_worker_compute\n",CTA_PAR_MY_RANK);
+
+   /* Receive the model handle */
+   MPI_Recv(&hmodel, 1, MPI_INT, ctai_rank_master,1234,
+                  CTA_COMM_MASTER_WORKER, &status);
+
+  /* Receive the simulation timespan */
+  CTA_Time_Create(&timespan);
+  MPI_Recv(span, 2, MPI_DOUBLE, ctai_rank_master,1234,
+                  CTA_COMM_MASTER_WORKER, &status);
+  CTA_Time_SetSpan(timespan, span[0], span[1]);
+
+  /* Perform the compute */
+  CTA_Model_Compute(hmodel,timespan);
+
+  /* Free work variables */
+   CTA_Time_Free(&timespan);
+
+   if (IDEBUG>0) printf("#%d end CTA_Modbuild_par_worker_compute\n",CTA_PAR_MY_RANK);
+
+}
+
+
+void CTA_Modbuild_par_worker_setstate(int typeset){
+   MPI_Status status;
+   CTA_Time timespan;
+   CTA_Model hmodel;
+   double span[2];
+   CTA_TreeVector state;
+
+   /* Receive the model handle */
+   MPI_Recv(&hmodel, 1, MPI_INT, ctai_rank_master,1234,
+                   CTA_COMM_MASTER_WORKER, &status);
+
+  /* Receive the simulation timespan (Forcings) */
+  timespan=CTA_NULL;
+  if (typeset==FORC) {
+     CTA_Time_Create(&timespan);
+     MPI_Recv(span, 2, MPI_DOUBLE, ctai_rank_master,1234,
+                     CTA_COMM_MASTER_WORKER, &status);
+     CTA_Time_SetSpan(timespan, span[0], span[1]);
+   }
+
+   /* Receive the state */
+   state=CTA_NULL;
+   modbuild_par_recvstate(&state, NULL, 1234);
+
+   /* Perform the setstate */
+   if (typeset==STATE){
+      CTA_Model_SetState(hmodel,state);
+   } else if (typeset==FORC){
+      CTA_Model_SetForc(hmodel,timespan,state);
+   } else if  (typeset==PARAM){
+      CTA_Model_SetParam(hmodel,state);
+   } else {
+      printf("Internal error in CTA_Modbuild_par_worker_setstate\n");
+      exit(-1);
+   }
+   CTA_Time_Free(&timespan);
+
+   /* Free work variables */
+   CTA_TreeVector_Free(&state,CTA_TRUE);
+}
+
+void CTA_Modbuild_par_worker_getstate(int typeget){
+   MPI_Status status;
+   CTA_Time timespan;
+   CTA_Model hmodel;
+   double span[2];
+   int retval;
+   CTA_TreeVector state;
+
+   /* Receive the model handle */
+   retval=MPI_Recv(&hmodel, 1, MPI_INT, ctai_rank_master,1234,
+                   CTA_COMM_MASTER_WORKER, &status);
+
+  /* Receive the simulation timespan (Forcings) */
+  timespan=CTA_NULL;
+  if (typeget==FORC) {
+     retval=CTA_Time_Create(&timespan);
+     retval=MPI_Recv(span, 2, MPI_DOUBLE, ctai_rank_master,1234,
+                     CTA_COMM_MASTER_WORKER, &status);
+     retval=CTA_Time_SetSpan(timespan, span[0], span[1]);
+   }
+
+   /* Perform the getstate */
+   state=CTA_NULL;
+   if (typeget==STATE) {
+      if (IDEBUG>0) printf("#%d Calling CTA_Model_GetState\n",CTA_PAR_MY_RANK);
+      retval=CTA_Model_GetState(hmodel,&state);
+      if (retval!=CTA_OK) {
+         printf("#%d CTA_Modbuild_par_worker_getstate: error (%d) calling CTA_Model_GetState\n",CTA_PAR_MY_RANK,retval );
+         exit(-1);
+      }
+   } else if (typeget==FORC){
+      retval=CTA_Model_GetForc(hmodel,timespan, &state);
+   } else if (typeget==PARAM){
+      retval=CTA_Model_GetParam(hmodel,&state);
+   } else if (typeget==SCAL){
+      retval=CTA_Model_GetStateScaling(hmodel,&state);
+   } else {
+      printf("#%d Internal error in CTA_Modbuild_par_worker_getstate\n",CTA_PAR_MY_RANK);
+      exit(-1);
+   }
+
+   /* Send the state */
+
+   modbuild_par_sendstate(state, NULL, 1234);
+
+   /* Free work variables */
+   retval=CTA_TreeVector_Free(&state,CTA_TRUE);
+   retval=CTA_Time_Free(&timespan);
+}
+
+void CTA_Modbuild_par_worker_axpymodel(){
+   MPI_Status status;
+   double alpha;
+   int OneWorker;
+   int retval;
+   CTA_Model hmodely, hmodelx;
+   int rank;
+   CTA_Pack himport;
+   int tag;
+
+   tag=1234;
+
+   /* Receive the model handle */
+   retval=MPI_Recv(&hmodely, 1, MPI_INT, ctai_rank_master,tag,
+                   CTA_COMM_MASTER_WORKER, &status);
+
+   /* Receive alpha */
+   retval=MPI_Recv(&alpha, 1, MPI_DOUBLE, ctai_rank_master,tag,
+                   CTA_COMM_MASTER_WORKER, &status);
+
+   /* Receive Oneworker option */
+   retval=MPI_Recv(&OneWorker, 1, MPI_INT, ctai_rank_master,tag,
+                   CTA_COMM_MASTER_WORKER, &status);
+
+   if (OneWorker) {
+      /* Receive handle of other model */
+      retval=MPI_Recv(&hmodelx, 1, MPI_INT, ctai_rank_master,tag,
+                      CTA_COMM_MASTER_WORKER, &status);
+
+   } else {
+      /* Receive rank of worker that will send the export */
+      retval=MPI_Recv(&rank, 1, MPI_INT, ctai_rank_master,tag,
+                      CTA_COMM_MASTER_WORKER, &status);
+
+      /* Receive export */
+      printf("ERROR SCRATCH MODEL DOES NOT WORK!\n");
+      exit(-1);
+      hmodelx=hmodel_scratch;
+      retval = CTA_Pack_Create(CTA_PACK_DEFAULT_SIZE,&himport);
+      retval = receive_pack_object(rank,tag,FALSE, himport, CTA_COMM_MASTER_WORKER);
+      retval = CTA_Model_Import(hmodelx,himport);
+      retval = CTA_Pack_Free(&himport);
+   }
+
+   retval=CTA_Model_AxpyState(hmodely,alpha,hmodelx);
+
+   if (retval!=CTA_OK) {
+      printf("#%d CTA_Modbuild_par_worker_axpymodel: error (%d) calling CTA_Model_Axpy\n",CTA_PAR_MY_RANK,retval );
+         exit(-1);
+      }
+}
+
+void CTA_Modbuild_par_worker_axpy(int typeaxpy){
+   MPI_Status status;
+   CTA_Time timespan;
+   CTA_Model hmodel;
+   CTA_TreeVector state;
+   double alpha;
+   double span[2];
+
+   /* Receive the model handle */
+   MPI_Recv(&hmodel, 1, MPI_INT, ctai_rank_master,1234,
+                   CTA_COMM_MASTER_WORKER, &status);
+
+  /* Receive the simulation timespan (Forcings) */
+  timespan=CTA_NULL;
+  if (typeaxpy==FORC) {
+     CTA_Time_Create(&timespan);
+     MPI_Recv(span, 2, MPI_DOUBLE, ctai_rank_master,1234,
+                     CTA_COMM_MASTER_WORKER, &status);
+     CTA_Time_SetSpan(timespan, span[0], span[1]);
+   }
+
+   /* Receive alpha */
+   MPI_Recv(&alpha, 1, MPI_DOUBLE, ctai_rank_master,1234,
+                   CTA_COMM_MASTER_WORKER, &status);
+
+   /* Receive the state */
+   state=CTA_NULL;
+   modbuild_par_recvstate(&state, NULL, 1234);
+
+   /* Perform the axpy */
+   if (typeaxpy==STATE) {
+      CTA_Model_AxpyState(hmodel,alpha,state);
+   } else if (typeaxpy==FORC) {
+      CTA_Model_AxpyForc(hmodel,timespan,alpha,state);
+   } else if (typeaxpy==PARAM) {
+      CTA_Model_AxpyParam(hmodel,alpha,state);
+   } else {
+      printf("#%d Internal error in CTA_Modbuild_par_worker_axpy\n",CTA_PAR_MY_RANK);
+      exit(-1);
+   }
+
+   /* Free work variables */
+   CTA_TreeVector_Free(&state,CTA_TRUE);
+   CTA_Time_Free(&timespan);
+}
+
+
+void CTA_Modbuild_par_worker_gettime(char *task){
+   MPI_Status status;
+   CTA_Time timespan;
+   CTA_Model hmodel;
+   double span[2];
+   int tag;
+
+   tag=1234;
+
+   /* Receive the model handle */
+   MPI_Recv(&hmodel, 1, MPI_INT, ctai_rank_master,tag,
+                   CTA_COMM_MASTER_WORKER, &status);
+
+   timespan=CTA_NULL;
+   CTA_Time_Create(&timespan);
+
+   if (strcmp(task, "cta_model_getcurrenttime") ==0) {
+      CTA_Model_GetCurrentTime(hmodel,timespan);
+   } else if (strcmp(task, "cta_model_gettimehorizon") ==0) {
+      CTA_Model_GetTimeHorizon(hmodel,timespan);
+   } else {
+     printf("#%d CTA_Modbuild_par_worker_gettime: Unkown task '%s'",CTA_PAR_MY_RANK,task);
+     exit(-1);
+   }
+
+   /* Sent timespan */
+   CTA_Time_GetSpan(timespan, &span[0], &span[1]);
+
+   MPI_Send(span, 2, MPI_DOUBLE, ctai_rank_master,tag,
+                   CTA_COMM_MASTER_WORKER);
+
+   /* Free workvars */
+   CTA_Time_Free(&timespan);
+}
+
+
+void CTA_Modbuild_par_worker_addnoise(){
+
+   MPI_Status status;
+   CTA_Time timespan;
+   CTA_Model hmodel;
+   double span[2];
+   int tag;
+
+   tag=1234;
+
+   /* Receive the model handle */
+   MPI_Recv(&hmodel, 1, MPI_INT, ctai_rank_master,tag,
+                   CTA_COMM_MASTER_WORKER, &status);
+
+   /* Receive the simulation timespan */
+   timespan=CTA_NULL;
+   CTA_Time_Create(&timespan);
+   MPI_Recv(span, 2, MPI_DOUBLE, ctai_rank_master,tag,
+                   CTA_COMM_MASTER_WORKER, &status);
+   CTA_Time_SetSpan(timespan, span[0], span[1]);
+
+   /* Perform the interpolation */
+   CTA_Model_AddNoise(hmodel, timespan);
+
+   /* Free workvars */
+   CTA_Time_Free(&timespan);
+}
+
+void CTA_Modbuild_par_worker_savepersistentstate(){
+
+   MPI_Status status;
+   CTA_Model hmodel;
+   CTA_Pack hpacki, hpackf;
+   CTA_String hID, fname;
+   int tag, lenpack, dumval;
+   char *pack;
+
+   if (IDEBUG>0) printf("#%d start CTA_Modbuild_par_worker_savepersistentstate\n",CTA_PAR_MY_RANK);
+
+   tag=1234;
+   /* Receive the model handle */
+   MPI_Recv(&hmodel, 1, MPI_INT, ctai_rank_master,tag,
+                    CTA_COMM_MASTER_WORKER, &status);
+
+   /* Receive instanceID */
+   tag = 7321;
+   MPI_Recv(&lenpack, 1, MPI_INT, ctai_rank_master,tag,
+                   CTA_COMM_MASTER_WORKER, &status);
+   CTA_Pack_Create(lenpack,&hpacki);
+   pack  =CTA_Pack_GetPtr(hpacki);
+   MPI_Recv(pack, lenpack, MPI_CHAR, ctai_rank_master,tag,
+                   CTA_COMM_MASTER_WORKER, &status);
+   CTA_Pack_AddCnt(hpacki,lenpack);
+
+   /* Get string from pack object*/
+   CTA_String_Create(&hID);
+   CTA_String_Import(hID,hpacki);
+   CTA_Pack_Free(&hpacki);
+   if (IDEBUG>0) {
+      char *ID = CTAI_String_GetPtr(hID);
+      printf("#%d worker SPS received instance ID = '%s'\n",CTA_PAR_MY_RANK,ID);
+   }
+
+   tag = 3217;
+   /* Receive filename */
+   MPI_Recv(&lenpack, 1, MPI_INT, ctai_rank_master,tag,
+                   CTA_COMM_MASTER_WORKER, &status);
+   CTA_Pack_Create(lenpack,&hpackf);
+   pack  =CTA_Pack_GetPtr(hpackf);
+   MPI_Recv(pack, lenpack, MPI_CHAR, ctai_rank_master,tag,
+                   CTA_COMM_MASTER_WORKER, &status);
+   CTA_Pack_AddCnt(hpackf,lenpack);
+
+   /* Get string from pack object*/
+   CTA_String_Create(&fname);
+   CTA_String_Import(fname,hpackf);
+   CTA_Pack_Free(&hpackf);
+   if (IDEBUG>0) {
+      char *fstr = CTAI_String_GetPtr(fname);
+      printf("#%d worker SPS received filename = '%s'\n",CTA_PAR_MY_RANK,fstr);
+   }
+
+   /*  Write state to file for own domain*/
+   CTA_Model_SavePersistentState(hmodel, fname, hID);
+
+   /* Return CTA_OK for each worker process to prevent the master to start
+      zipping files before they are written */
+   tag = 2173;
+   dumval = CTA_OK;
+   MPI_Send(&dumval, 1, MPI_INT, ctai_rank_master, tag,
+                   CTA_COMM_MASTER_WORKER);
+}
+
+void CTA_Modbuild_par_worker_loadpersistentstate(){
+
+   MPI_Status status;
+   CTA_Model hmodel;
+   CTA_Pack hpack;
+   CTA_Pack hpackf;
+   CTA_String hID, fname;
+   int tag, lenpack;
+   char *pack;
+
+   tag=1234;
+
+   /* Receive the model handle */
+   MPI_Recv(&hmodel, 1, MPI_INT, ctai_rank_master,tag,
+                    CTA_COMM_MASTER_WORKER, &status);
+
+   if (IDEBUG>0) {
+     printf("#%d LPS received model handle = '%d'\n",CTA_PAR_MY_RANK,hmodel);
+   }
+
+   tag = 3218;
+   /* Receive filename */
+   MPI_Recv(&lenpack, 1, MPI_INT, ctai_rank_master,tag,
+                   CTA_COMM_MASTER_WORKER, &status);
+   CTA_Pack_Create(lenpack,&hpackf);
+   pack  =CTA_Pack_GetPtr(hpackf);
+   MPI_Recv(pack, lenpack, MPI_CHAR, ctai_rank_master,tag,
+                   CTA_COMM_MASTER_WORKER, &status);
+   CTA_Pack_AddCnt(hpackf,lenpack);
+
+   /* Get string from pack object*/
+   CTA_String_Create(&fname);
+   CTA_String_Import(fname,hpackf);
+   CTA_Pack_Free(&hpackf);
+   if (IDEBUG>0) {
+      char *fstr = CTAI_String_GetPtr(fname);
+      printf("#%d worker LPS received filename = '%s'\n",CTA_PAR_MY_RANK,fstr);
+   }
+
+   /*  Read state from file for own domain*/
+   CTA_String_Create(&hID);
+   CTA_Model_LoadPersistentState(hmodel, fname, &hID);
+
+   /* Print the ID string for debugging reasons*/
+   if (IDEBUG>0) {
+      char *select = CTAI_String_GetPtr(hID);
+      printf("#%d: Debugging: my instanceID is '%s'\n",CTA_PAR_MY_RANK, select);
+   }
+
+   /* Send instance ID */
+   CTA_Pack_Create(CTA_PACK_DEFAULT_SIZE,&hpack);
+   CTA_String_Export(hID,hpack);
+
+   /* Send package: TODO: teach CTA_Pack this simple repetitive task */
+   tag = 8321;
+   lenpack= CTA_Pack_GetLen(hpack);
+   pack=CTA_Pack_GetPtr(hpack);
+   MPI_Send(&lenpack, 1, MPI_INT, ctai_rank_master, tag,
+                   CTA_COMM_MASTER_WORKER);
+   MPI_Send(pack, lenpack, MPI_CHAR,ctai_rank_master, tag,
+                   CTA_COMM_MASTER_WORKER);
+
+   /* Free local objects */
+   CTA_Pack_Free(&hpack);
+}
+
+void CTA_Modbuild_par_worker_saveinternalstate(){
+
+   MPI_Status status;
+   CTA_Model hmodel;
+   CTA_Pack hpack;
+   CTA_String hID;
+   int tag, lenpack;
+   char *pack;
+
+   if (IDEBUG>0) printf("#%d start CTA_Modbuild_par_worker_saveinternalstate\n",CTA_PAR_MY_RANK);
+
+   tag=1234;
+   /* Receive the model handle */
+   MPI_Recv(&hmodel, 1, MPI_INT, ctai_rank_master,tag,
+                     CTA_COMM_MASTER_WORKER, &status);
+   if (IDEBUG>0) printf("# %d modbuild_par_worker_saveinternalstate: receiving model handle %d\n",CTA_PAR_MY_RANK, hmodel);
+
+   /* Save internal state for each worker process and get ID back*/
+   CTA_String_Create(&hID);
+   CTA_Model_SaveInternalState(hmodel,&hID);
+
+   /* Print the ID string for debugging reasons*/
+   if (IDEBUG>0) {
+      char *select = CTAI_String_GetPtr(hID);
+      printf("#%d: Debugging: my instanceID is '%s'\n",CTA_PAR_MY_RANK, select);
+   }
+
+   /* Send instance ID */
+   CTA_Pack_Create(CTA_PACK_DEFAULT_SIZE,&hpack);
+   CTA_String_Export(hID,hpack);
+
+   /* Send package: TODO: teach CTA_Pack this simple repetitive task */
+   tag = 6321;
+   lenpack= CTA_Pack_GetLen(hpack);
+   pack=CTA_Pack_GetPtr(hpack);
+   MPI_Send(&lenpack, 1, MPI_INT, ctai_rank_master, tag,
+                   CTA_COMM_MASTER_WORKER);
+   MPI_Send(pack, lenpack, MPI_CHAR,ctai_rank_master, tag,
+                   CTA_COMM_MASTER_WORKER);
+
+   /* Free local objects */
+   CTA_Pack_Free(&hpack);
+ }
+
+void CTA_Modbuild_par_worker_restoreinternalstate(){
+
+   MPI_Status status;
+   CTA_Model hmodel;
+   CTA_String hID;
+   CTA_Pack hpack;
+   int tag, lenpack;
+   char *pack;
+
+   if (IDEBUG>0) printf("#%d start CTA_Modbuild_par_worker_restoreinternalstate\n",CTA_PAR_MY_RANK);
+
+   tag=1234;
+   /* Receive the model handle */
+   MPI_Recv(&hmodel, 1, MPI_INT, ctai_rank_master,tag,
+                   CTA_COMM_MASTER_WORKER, &status);
+
+   /* Receive instanceID to be restored */
+   tag = 5321;
+   MPI_Recv(&lenpack, 1, MPI_INT, ctai_rank_master,tag,
+                   CTA_COMM_MASTER_WORKER, &status);
+   CTA_Pack_Create(lenpack,&hpack);
+   pack  =CTA_Pack_GetPtr(hpack);
+   MPI_Recv(pack, lenpack, MPI_CHAR, ctai_rank_master,tag,
+                   CTA_COMM_MASTER_WORKER, &status);
+   CTA_Pack_AddCnt(hpack,lenpack);
+
+   /* Get string from pack object*/
+   CTA_String_Create(&hID);
+   CTA_String_Import(hID,hpack);
+   if (IDEBUG>0) {
+      char *ID = CTAI_String_GetPtr(hID);
+      printf("#%d worker RIS received instance ID = '%s'\n",CTA_PAR_MY_RANK, ID);
+   }
+   /* Release the internal state */
+   CTA_Model_RestoreInternalState(hmodel, hID);
+
+   /* Free local objects */
+   CTA_Pack_Free(&hpack);
+   CTA_String_Free(&hID);
+}
+
+void CTA_Modbuild_par_worker_releaseinternalstate(){
+
+   MPI_Status status;
+   CTA_Model hmodel;
+   CTA_String hID;
+   CTA_Pack hpack;
+   int tag, lenpack;
+   char *pack;
+
+   if (IDEBUG>0) printf("#%d start CTA_Modbuild_par_worker_releaseinternalstate\n",CTA_PAR_MY_RANK);
+
+   tag=1234;
+   /* Receive the model handle */
+   MPI_Recv(&hmodel, 1, MPI_INT, ctai_rank_master,tag,
+                    CTA_COMM_MASTER_WORKER, &status);
+
+   /* Receive instanceID to be released */
+   tag = 9321;
+   MPI_Recv(&lenpack, 1, MPI_INT, ctai_rank_master,tag,
+                   CTA_COMM_MASTER_WORKER, &status);
+   CTA_Pack_Create(lenpack,&hpack);
+   pack  =CTA_Pack_GetPtr(hpack);
+   MPI_Recv(pack, lenpack, MPI_CHAR, ctai_rank_master,tag,
+                   CTA_COMM_MASTER_WORKER, &status);
+   CTA_Pack_AddCnt(hpack,lenpack);
+
+   /* Get string from pack object*/
+   CTA_String_Create(&hID);
+   CTA_String_Import(hID,hpack);
+   if (IDEBUG>0) {
+      char *ID = CTAI_String_GetPtr(hID);
+      printf("#%d worker RLS received instance ID = '%s'\n",CTA_PAR_MY_RANK, ID);
+   }
+   /* Release the internal state */
+   CTA_Model_ReleaseInternalState(hmodel, hID);
+
+   /* Free local objects */
+   CTA_Pack_Free(&hpack);
+   CTA_String_Free(&hID);
+}
+
+void CTA_Modbuild_par_worker_free(){
+
+   MPI_Status status;
+   CTA_Model hmodel;
+   int tag;
+
+   tag=1234;
+
+   /* Receive the model handle */
+   MPI_Recv(&hmodel, 1, MPI_INT, ctai_rank_master,tag,
+                   CTA_COMM_MASTER_WORKER, &status);
+
+   /* Free model */
+   modbuild_models_delete(hmodel);
+   CTA_Model_Free(&hmodel);
+}
+
+void CTA_Modbuild_par_worker_getnoisecount(){
+   MPI_Status status;
+   CTA_Model hmodel;
+   int tag, retval, nnoise;
+
+   tag=1234;
+
+   /* Receive the model handle */
+   retval=MPI_Recv(&hmodel, 1, MPI_INT, ctai_rank_master,tag,
+                   CTA_COMM_MASTER_WORKER, &status);
+
+   /* Get number of noise parameters */
+   retval=CTA_Model_GetNoiseCount(hmodel, &nnoise);
+   if (retval!=CTA_OK) {
+      printf("#%d CTA_Modbuild_par_worker_getnoisecount: error (%d) calling CTA_Model_GetNoiseCount\n",CTA_PAR_MY_RANK,retval );
+         exit(-1);
+      }
+
+   /* Send number of noise parameters */
+   retval=MPI_Send(&nnoise,1, MPI_INT,ctai_rank_master,1234,
+                   CTA_COMM_MASTER_WORKER);
+
+}
+
+void CTA_Modbuild_par_worker_getnoisecovar(){
+   MPI_Status status;
+   CTA_Model hmodel;
+   int tag, retval, nnoise, inoise;
+   CTA_TreeVector *hcovar;
+   tag=1234;
+
+   /* Receive the model handle */
+   retval=MPI_Recv(&hmodel, 1, MPI_INT, ctai_rank_master,tag,
+                   CTA_COMM_MASTER_WORKER, &status);
+
+   /* Get number of noise parameters */
+   retval=CTA_Model_GetNoiseCount(hmodel, &nnoise);
+   if (retval!=CTA_OK) {
+      printf("#%d CTA_Modbuild_par_worker_getnoisecount: error (%d) calling CTA_Model_GetNoiseCount\n",
+         CTA_PAR_MY_RANK,retval );
+      exit(-1);
+   }
+
+   /* Send number of noise parameters */
+   retval=MPI_Send(&nnoise,1, MPI_INT,ctai_rank_master,1234,
+                   CTA_COMM_MASTER_WORKER);
+
+   if (IDEBUG>0){printf ("#%d Number of noise parameters is %d",CTA_PAR_MY_RANK,nnoise);}
+
+   /* Allocate workspace */
+   hcovar=CTA_Malloc(nnoise*sizeof(CTA_TreeVector));
+   for (inoise=0;inoise<nnoise;inoise++){
+      hcovar[inoise]=CTA_NULL;
+   }
+
+   /* Get the noise covariance for own domain */
+   retval=CTA_Model_GetNoiseCovar(hmodel, hcovar);
+   if (retval!=CTA_OK) {
+      printf("#%d CTA_Modbuild_par_worker_getnoisecount: error (%d) calling CTA_Model_GetNoiseCovar\n",CTA_PAR_MY_RANK,retval );
+         exit(-1);
+      }
+
+   /* Send columns of covariance matrix */
+   for (inoise=0;inoise<nnoise;inoise++){
+      modbuild_par_sendstate(hcovar[inoise], NULL, 1234);
+
+   }
+
+   /* Free workspace */
+   for (inoise=0;inoise<nnoise;inoise++){
+      retval=CTA_TreeVector_Free(&hcovar[inoise],CTA_TRUE);
+   }
+   free(hcovar);
+
+}
+
+
+void CTA_Modbuild_par_worker_announceobsvalues(){
+
+   MPI_Status status;
+   CTA_Time timespan;
+   CTA_ObsDescr hdescr_tab;
+   CTA_Model hmodel;
+   int tag, retval;
+
+   tag=1234;
+
+   /* Receive the model handle */
+   retval=MPI_Recv(&hmodel, 1, MPI_INT, ctai_rank_master,tag,
+                   CTA_COMM_MASTER_WORKER, &status);
+
+
+   timespan=CTA_NULL;
+   hdescr_tab=CTA_NULL;
+   modbuild_par_recvobsdescr(&timespan, &hdescr_tab, tag);
+
+   /* Perform the interpolation */
+   retval=CTA_Model_AnnounceObsValues(hmodel, hdescr_tab);
+   if (retval!=CTA_OK) {
+      printf("#%d CTA_Modbuild_par_worker_announceobsvalues Error (%d) calling CTA_Model_AnnounceObsValues \n",CTA_PAR_MY_RANK, retval);
+      exit(-1);
+   }
+
+   /* Free workvars */
+   retval=CTA_Time_Free(&timespan);
+   retval=CTA_ObsDescr_Free(&hdescr_tab);
+
+
+}
+
+
+void CTA_Modbuild_par_worker_getobsvalues(){
+
+   MPI_Status status;
+   CTA_Time timespan;
+   CTA_Pack hpack;
+   CTA_ObsDescr hdescr_tab;
+   CTA_Model hmodel;
+   CTA_Vector values;
+   int lenpack, lentask, retval, nmeasr, tag;
+   char *pack;
+
+   tag=1234;
+
+   /* Receive the model handle */
+   retval=MPI_Recv(&hmodel, 1, MPI_INT, ctai_rank_master,tag,
+                   CTA_COMM_MASTER_WORKER, &status);
+
+
+   timespan=CTA_NULL;
+   hdescr_tab=CTA_NULL;
+   modbuild_par_recvobsdescr(&timespan, &hdescr_tab, tag);
+
+   /* Receive the simulation timespan */
+//   retval=CTA_Time_Create(&timespan);
+//   retval=MPI_Recv(span, 2, MPI_DOUBLE, ctai_rank_master,tag,
+//                   CTA_COMM_WORKER, &status);
+//   retval=CTA_Time_SetSpan(timespan, span[0], span[1]);
+
+   /* Receive the observation description */
+//   retval=MPI_Recv(&lenpack, 1, MPI_INT, ctai_rank_master,tag,
+//                   CTA_COMM_WORKER, &status);
+//   retval=CTA_Pack_Create(lenpack,&hpack);
+//   pack  =CTA_Pack_GetPtr(hpack);
+//
+//   retval=MPI_Recv(pack, lenpack, MPI_CHAR, ctai_rank_master,tag,
+//                   CTA_COMM_WORKER, &status);
+//   retval=CTA_Pack_AddCnt(hpack,lenpack);
+//   if (IDEBUG>0) printf("CREATE THE CTA_OBSDESCR_TABLE1\n");
+
+   /* Create the observation description */
+//   retval=CTA_ObsDescr_Create(CTA_OBSDESCR_TABLE,hpack, &hdescr_tab);
+//   retval=CTA_Pack_Free(&hpack);
+//   if (IDEBUG>0) printf("Send vector to master\n");
+
+   /* Create vector for holding values */
+   retval=CTA_ObsDescr_Observation_Count(hdescr_tab, &nmeasr);
+   if (IDEBUG>0) printf("#%d Number of observations %d\n",CTA_PAR_MY_RANK,nmeasr );
+   retval=CTA_Vector_Create(CTA_DEFAULT_VECTOR, nmeasr,  CTA_DOUBLE, CTA_NULL, &values);
+
+   /* Perform the interpolation */
+   retval=CTA_Model_GetObsValues(hmodel, timespan, hdescr_tab, values);
+   if (retval!=CTA_OK) {
+      printf("#%d CTA_Modbuild_par_worker_getobsvalues Error (%d) calling CTA_Model_GetObsValues \n",CTA_PAR_MY_RANK, retval);
+      exit(-1);
+   }
+   /* Pack the vector */
+   lenpack=nmeasr*sizeof(double)+100;
+   retval=CTA_Pack_Create(lenpack,&hpack);
+   retval=CTA_Vector_Export(values, hpack);
+
+   /* Send vector to master */
+   lentask= CTA_Pack_GetLen(hpack);
+   pack   = CTA_Pack_GetPtr(hpack);
+
+   /* Send length of packed state */
+   if (IDEBUG>0) printf("#%d Send vector to master (lentask)\n",CTA_PAR_MY_RANK);
+   retval=MPI_Send(&lentask, 1, MPI_INT, ctai_rank_master, tag,
+                   CTA_COMM_MASTER_WORKER);
+   if (IDEBUG>0) printf("#%d Send vector to master (packed vector)\n",CTA_PAR_MY_RANK);
+
+   /* Send packed state */
+   retval=MPI_Send(pack, lentask, MPI_CHAR, ctai_rank_master, tag,
+                   CTA_COMM_MASTER_WORKER);
+   if (IDEBUG>0) printf("#%d Done sending vector\n",CTA_PAR_MY_RANK);
+
+
+   /* Free workvars */
+   retval=CTA_Pack_Free(&hpack);
+   retval=CTA_Time_Free(&timespan);
+   retval=CTA_Vector_Free(&values);
+   retval=CTA_ObsDescr_Free(&hdescr_tab);
+
+
+}
+
+/* ----------------------------------------- */
+
+
+void CTA_Modbuild_par_worker_getobslocalization(){
+
+   MPI_Status status;
+   CTA_Pack hpack;
+   CTA_ObsDescr hdescr_tab;
+   CTA_Model hmodel;
+   CTA_Vector values;
+   int lenpack, retval, nmeasr, tag;
+   char *pack;
+   CTA_Time timespan; 
+   int iobs;
+   CTA_Handle hstate;
+   int locIDEBUG = 0;
+   double distance;
+
+   tag=1234;
+
+   /* Receive the model handle */
+   retval=MPI_Recv(&hmodel, 1, MPI_INT, ctai_rank_master,tag,
+                   CTA_COMM_MASTER_WORKER, &status);
+
+   timespan=CTA_NULL;
+   hdescr_tab=CTA_NULL;
+   modbuild_par_recvobsdescr(&timespan, &hdescr_tab, tag);
+   if (locIDEBUG>0) printf("#%d received obsdescr  %d\n",CTA_PAR_MY_RANK,hdescr_tab );
+
+   /* Create vector for holding values */
+   retval=CTA_ObsDescr_Observation_Count(hdescr_tab, &nmeasr);
+   if (locIDEBUG>0) printf("#%d Number of observations %d\n",CTA_PAR_MY_RANK,nmeasr ); 
+
+   retval=CTA_Vector_Create(CTA_DEFAULT_VECTOR, nmeasr,  CTA_HANDLE, CTA_NULL, &values);
+
+   /* receive distance */
+   if (IDEBUG>0) {printf("#%d Receiving characteristic distance from master\n",CTA_PAR_MY_RANK);}
+   retval=MPI_Recv(&distance, 1, MPI_DOUBLE, ctai_rank_master,tag,
+                   CTA_COMM_MASTER_WORKER, &status);
+
+
+   /* Receive the vector with handles */
+   if (locIDEBUG>0) {printf("#%d Receiving lenpack from master\n",CTA_PAR_MY_RANK);}
+   retval=MPI_Recv(&lenpack, 1, MPI_INT, ctai_rank_master,tag,
+                   CTA_COMM_MASTER_WORKER, &status);
+   if (locIDEBUG>0) {printf("#%d vector locvals lenpack =%d\n",CTA_PAR_MY_RANK, lenpack);}
+
+   if (lenpack==0){
+      // free something? 
+   }
+   else {
+      retval=CTA_Pack_Create(lenpack,&hpack);
+      pack  =CTA_Pack_GetPtr(hpack);
+
+      if (locIDEBUG>0) {printf("#%d Receiving packed locvals from master\n",CTA_PAR_MY_RANK);}
+      retval=MPI_Recv(pack, lenpack, MPI_CHAR, ctai_rank_master,tag,
+                      CTA_COMM_MASTER_WORKER, &status);
+      if (IDEBUG>0) {printf("#%d Done Receiving packed locvals from master\n",CTA_PAR_MY_RANK); }
+
+      retval=CTA_Pack_AddCnt(hpack,lenpack);
+
+      /* Create the vector */
+       retval = CTA_Vector_Import(values,hpack);
+      if (IDEBUG>0) {printf("#%d Done Creating vector %d\n",CTA_PAR_MY_RANK, retval);}
+      retval=CTA_Pack_Free(&hpack);
+   }
+
+   if (locIDEBUG>0) printf("#%d now obtained null vector from master  %d\n",CTA_PAR_MY_RANK,values );
+
+   /* Perform the interpolation */
+   retval=CTA_Model_GetObsLocalization(hmodel, hdescr_tab, distance, values);
+   if (retval!=CTA_OK) {
+      printf("#%d CTA_Modbuild_par_worker_getobslocalization Error (%d) calling CTA_Model_GetOblocalization \n",CTA_PAR_MY_RANK, retval);
+      exit(-1);
+   }
+   
+   /* The vector cannot be packed since the state handles are only known locally. Therefore,
+    we have to pack and send the corresponding states. We do this for each observation.*/
+   for (iobs=0;iobs<nmeasr;iobs++) {
+      retval = CTA_Vector_GetVal(values,iobs+1, &hstate, CTA_HANDLE);
+
+      if (locIDEBUG>0) printf("#%d sending localization tv (handle %d) of obs  %d\n",CTA_PAR_MY_RANK,hstate,iobs+1 );
+      modbuild_par_sendstate(hstate, NULL, 1234);
+   }  
+     
+  
+   /* Free workvars */
+   retval=CTA_Vector_Free(&values);
+   retval=CTA_ObsDescr_Free(&hdescr_tab);
+   retval=CTA_Time_Free(&timespan);
+}
+
+
+void CTA_Modbuild_par_worker_getobsselect(){
+
+   MPI_Status status;
+   CTA_Time timespan;
+   CTA_String sselect;
+   CTA_ObsDescr hdescr_tab;
+   CTA_Model hmodel;
+   int lenstr, tag;
+   int retval_method;
+   char *select;
+
+   tag=1234;
+
+   /* Receive the model handle */
+   MPI_Recv(&hmodel, 1, MPI_INT, ctai_rank_master,tag,
+                   CTA_COMM_MASTER_WORKER, &status);
+
+   timespan=CTA_NULL;
+   hdescr_tab=CTA_NULL;
+   modbuild_par_recvobsdescr(&timespan, &hdescr_tab, tag);
+
+   /* Call obsselect */
+   CTA_String_Create(&sselect);
+   retval_method=CTA_Model_GetObsSelect(hmodel,timespan,hdescr_tab,sselect);
+
+   /* Send the error code back to the master */
+   MPI_Send(&retval_method,1, MPI_INT,ctai_rank_master,tag, CTA_COMM_MASTER_WORKER);
+
+   /* Send the select string */
+   select=CTAI_String_GetPtr(sselect);
+   if (IDEBUG>0) printf("#%d CTA_Modbuild_par_worker_getobsselect: My @@ selection is '%s'\n",CTA_PAR_MY_RANK, select);
+   lenstr=strlen(select)+1;
+   MPI_Send(select,lenstr, MPI_CHAR,ctai_rank_master,tag,
+                   CTA_COMM_MASTER_WORKER);
+
+   /* Free workvars */
+   CTA_String_Free(&sselect);
+   CTA_ObsDescr_Free(&hdescr_tab);
+   CTA_Time_Free(&timespan);
+}
+
+void CTA_Modbuild_par_worker(){
+   MPI_Status status;
+   char task[256];
+
+   /* Note we have to set our own random-seed */
+   CTA_INITIAL_RANDOM_SEED=CTA_INITIAL_RANDOM_SEED+ctai_rank;
+   if (IDEBUG>0) printf("#%d Set initial random seed:  %ld\n", CTA_PAR_MY_RANK, CTA_INITIAL_RANDOM_SEED);
+   CTA_rand_seed((long) CTA_INITIAL_RANDOM_SEED);
+
+   if (IDEBUG>0) printf("#%d I am the listening process of worker %d\n", CTA_PAR_MY_RANK, ctai_rank);
+   for (;1;){
+      if (IDEBUG>0) printf("#%d waiting for message of master...\n",CTA_PAR_MY_RANK);
+
+      MPI_Recv(task,256,MPI_CHAR, ctai_rank_master, 1234,
+                     CTA_COMM_MASTER_WORKER ,&status);
+
+
+      if (IDEBUG>0) {printf("%d My task is: %s \n",CTA_PAR_MY_RANK,task);}
+
+      if        (strcmp(task, "cta_model_create")         ==0) {
+         CTA_Modbuild_par_worker_create();
+      } else if (strcmp(task, "cta_model_compute")        ==0) {
+         CTA_Modbuild_par_worker_compute();
+      } else if (strcmp(task, "cta_model_setstate")       ==0) {
+         CTA_Modbuild_par_worker_setstate(STATE);
+      } else if (strcmp(task, "cta_model_getstate")       ==0) {
+         CTA_Modbuild_par_worker_getstate(STATE);
+      } else if (strcmp(task, "cta_model_axpystate")      ==0) {
+         CTA_Modbuild_par_worker_axpy(STATE);
+      } else if (strcmp(task, "cta_model_axpymodel")      ==0) {
+         CTA_Modbuild_par_worker_axpymodel();
+      } else if (strcmp(task, "cta_model_setforc")        ==0) {
+         CTA_Modbuild_par_worker_setstate(FORC);
+      } else if (strcmp(task, "cta_model_getforc")        ==0) {
+         CTA_Modbuild_par_worker_getstate(FORC);
+      } else if (strcmp(task, "cta_model_getstatescaling")==0) {
+         CTA_Modbuild_par_worker_getstate(SCAL);
+      } else if (strcmp(task, "cta_model_axpyforc")       ==0) {
+         CTA_Modbuild_par_worker_axpy(FORC);
+      } else if (strcmp(task, "cta_model_getnoisecount")  ==0) {
+         CTA_Modbuild_par_worker_getnoisecount();
+      } else if (strcmp(task, "cta_model_getnoisecovar")  ==0) {
+         CTA_Modbuild_par_worker_getnoisecovar();
+      } else if (strcmp(task, "cta_model_getobsselect")   ==0) {
+         CTA_Modbuild_par_worker_getobsselect();
+      } else if (strcmp(task, "cta_model_addnoise")       ==0) {
+         CTA_Modbuild_par_worker_addnoise();
+      } else if (strcmp(task, "cta_model_export")         ==0) {
+         CTA_Modbuild_par_worker_export();
+      } else if (strcmp(task, "cta_model_import")         ==0) {
+         CTA_Modbuild_par_worker_import();
+      } else if (strcmp(task, "cta_model_getobsvalues")   ==0) {
+         CTA_Modbuild_par_worker_getobsvalues();
+      } else if (strcmp(task, "cta_model_getobslocalization")   ==0) {
+        CTA_Modbuild_par_worker_getobslocalization(); 
+      } else if (strcmp(task, "cta_model_getcurrenttime") ==0) {
+         CTA_Modbuild_par_worker_gettime(task);
+      } else if (strcmp(task, "cta_model_gettimehorizon") ==0) {
+         CTA_Modbuild_par_worker_gettime(task);
+      } else if (strcmp(task, "cta_model_free")           ==0) {
+         CTA_Modbuild_par_worker_free();
+      } else if (strcmp(task, "cta_model_savepersistentstate") ==0) {
+         CTA_Modbuild_par_worker_savepersistentstate();
+      } else if (strcmp(task, "cta_model_loadpersistentstate") ==0) {
+         CTA_Modbuild_par_worker_loadpersistentstate();
+      } else if (strcmp(task, "cta_model_saveinternalstate") ==0) {
+        CTA_Modbuild_par_worker_saveinternalstate();
+      } else if (strcmp(task, "cta_model_restoreinternalstate") ==0) {
+         CTA_Modbuild_par_worker_restoreinternalstate();
+      } else if (strcmp(task, "cta_model_releaseinternalstate") ==0) {
+         CTA_Modbuild_par_worker_releaseinternalstate();
+      } else if (strcmp(task, "cta_model_announceobsvalues") ==0) {
+         CTA_Modbuild_par_worker_announceobsvalues();
+      } else if (strcmp(task, "finalize")                 ==0) {
+         if (IDEBUG>0) printf("Task is finalize\n");
+         // Free all models (in order to finalize workers as well)
+         modbuild_models_delete_all_models();
+         MPI_Finalize();
+         exit(0);
+      } else {
+         printf("#%d Unkown task '%s'",CTA_PAR_MY_RANK,task);
+         exit(-1);
+      }
+   }
+   printf("#%d CTA_Modbuild_par_worker: Internal error\n",CTA_PAR_MY_RANK);
+   exit(-1);
+}
+
+#endif
+void CTA_Modbuild_par_Finalize(){
+#if USE_MPI
+
+   /* Send Task */
+   char *task;
+   int lentask, irank;
+   int tag=1234;
+   int comSize;
+   MPI_Comm comm;
+   int is_intercomm;
+   int indx;
+   int isInit;
+   int rank_first;
+
+
+   if (IDEBUG>0) printf("#%d CTA_Modbuild_par_Finalize\n",CTA_PAR_MY_RANK);
+   // Loop over all communicators
+   task="finalize";
+   lentask=strlen(task)+1;
+
+   comm=MPI_COMM_WORLD;
+   for (indx=0; comm!=MPI_COMM_NULL; indx++){
+      CTA_Par_GetAllCommByIndex(indx, &comm);
+      if (comm!=MPI_COMM_NULL){
+         /* Determine processes to send finalize task
+            But first we have to check the kind of communicater 
+          */
+         MPI_Comm_test_inter(comm, &is_intercomm);
+         if (is_intercomm){
+            rank_first=0;
+            MPI_Comm_remote_size(comm,&comSize);
+         }
+         else {
+            rank_first=1;
+            MPI_Comm_size(comm,&comSize);
+         }
+
+         if (IDEBUG>0) printf("Size of communication group :%d\n", comSize);
+         if (IDEBUG>0) printf("Communicator is inter comm  :%d\n",is_intercomm);
+         for (irank=rank_first;irank<comSize; irank++){
+            if (IDEBUG>0) printf("Sending finalize to process %d\n", irank);
+            MPI_Send(task,lentask, MPI_CHAR, irank,tag,
+                           comm); 
+         }
+      }
+   }
+
+   // Only finalize when mpi is initialized
+   if (IDEBUG>0) printf("#%d CTA_Modbuild_par_Finalize calling MPI_Finalize\n",CTA_PAR_MY_RANK);
+   MPI_Initialized(&isInit);
+   if (isInit){
+     MPI_Finalize();
+   }
+   exit(0);
+#endif
+}
+
+
+
+void CTA_Modbuild_par_CreateClass(CTA_ModelClass *modelcls){
+
+#ifdef USE_MPI
+
+   int ierr; //COSTA return value
+   int aierr[CTA_MODEL_NUMFUNC];
+   int i;
+   CTA_Func hfunc[CTA_MODEL_NUMFUNC];
+   CTA_Intf intf;
+
+
+   /* Calculate number of nodes */
+   MPI_Comm_size(CTA_COMM_WORLD,&ctai_comm_size);
+
+   /* Determine whether you are master or worker */
+   MPI_Comm_rank(CTA_COMM_WORLD,&ctai_rank);
+
+   if (CTA_FILTER_PROCESS) {
+      if (IDEBUG>0) printf("#%d I'm a filter process; my rank is %d\n",CTA_PAR_MY_RANK, ctai_rank);
+   } else {
+      /* I am a worker starting to wait for new tasks */
+      if (IDEBUG>0) printf("#%d I'm a worker my rank is %d\n",CTA_PAR_MY_RANK, ctai_rank);
+      CTA_Modbuild_par_worker();
+   }
+
+   // Create a COSTA model component from my own implementation
+   intf=CTA_NULL;
+   for (i=0; i<CTA_MODEL_NUMFUNC; i++){
+      hfunc[i]=CTA_NULL;
+	  aierr[i]=CTA_OK;
+   }
+   aierr[I_CTA_MODEL_CREATE_SIZE    ]     =CTA_Func_Create("modbuild_par_create_size",      &modbuild_par_create_size,    intf, &hfunc[I_CTA_MODEL_CREATE_SIZE    ]);
+   aierr[I_CTA_MODEL_CREATE_INIT    ]     =CTA_Func_Create("modbuild_par_create_init",      &modbuild_par_create_init,    intf, &hfunc[I_CTA_MODEL_CREATE_INIT    ]);
+   aierr[I_CTA_MODEL_FREE           ]     =CTA_Func_Create("modbuild_par_free",             &modbuild_par_free,           intf, &hfunc[I_CTA_MODEL_FREE           ]);
+   aierr[I_CTA_MODEL_COMPUTE        ]     =CTA_Func_Create("modbuild_par_compute",          &modbuild_par_compute,        intf, &hfunc[I_CTA_MODEL_COMPUTE        ]);
+   aierr[I_CTA_MODEL_SET_STATE      ]     =CTA_Func_Create("modbuild_par_setstate",         &modbuild_par_setstate,       intf, &hfunc[I_CTA_MODEL_SET_STATE      ]);
+   aierr[I_CTA_MODEL_GET_STATE      ]     =CTA_Func_Create("modbuild_par_getstate",         &modbuild_par_getstate,       intf, &hfunc[I_CTA_MODEL_GET_STATE      ]);
+   aierr[CTA_MODEL_AXPY_MODEL       ]     =CTA_Func_Create("modbuild_par_axpymodel",        &modbuild_par_axpymodel,      intf, &hfunc[CTA_MODEL_AXPY_MODEL       ]);
+   aierr[CTA_MODEL_AXPY_STATE       ]     =CTA_Func_Create("modbuild_par_axpystate",        &modbuild_par_axpystate,      intf, &hfunc[CTA_MODEL_AXPY_STATE       ]);
+   aierr[CTA_MODEL_SET_FORC         ]     =CTA_Func_Create("modbuild_par_setforc",          &modbuild_par_setforc,        intf, &hfunc[CTA_MODEL_SET_FORC         ]);
+   aierr[CTA_MODEL_GET_FORC         ]     =CTA_Func_Create("modbuild_par_getforc",          &modbuild_par_getforc,        intf, &hfunc[CTA_MODEL_GET_FORC         ]);
+   aierr[CTA_MODEL_AXPY_FORC        ]     =CTA_Func_Create("modbuild_par_axpyforc",         &modbuild_par_axpyforc,       intf, &hfunc[CTA_MODEL_AXPY_FORC        ]);
+   aierr[CTA_MODEL_SET_PARAM        ]     =CTA_Func_Create("modbuild_par_setparam",         &modbuild_par_setparam,       intf, &hfunc[CTA_MODEL_SET_PARAM        ]);
+   aierr[CTA_MODEL_GET_PARAM        ]     =CTA_Func_Create("modbuild_par_getparam",         &modbuild_par_getparam,       intf, &hfunc[CTA_MODEL_GET_PARAM        ]);
+   aierr[CTA_MODEL_AXPY_PARAM       ]     =CTA_Func_Create("modbuild_par_axpyparam",        &modbuild_par_axpyparam,      intf, &hfunc[CTA_MODEL_AXPY_PARAM       ]);
+   aierr[CTA_MODEL_GET_STATESCALING ]     =CTA_Func_Create("modbuild_par_getstatescaling",  &modbuild_par_getstatescaling,intf, &hfunc[CTA_MODEL_GET_STATESCALING ]);
+   aierr[CTA_MODEL_GET_NOISE_COUNT  ]     =CTA_Func_Create("modbuild_par_getnoisecount",    &modbuild_par_getnoisecount,  intf, &hfunc[CTA_MODEL_GET_NOISE_COUNT  ]);
+   aierr[CTA_MODEL_GET_NOISE_COVAR  ]     =CTA_Func_Create("modbuild_par_getnoisecovar",    &modbuild_par_getnoisecovar,  intf, &hfunc[CTA_MODEL_GET_NOISE_COVAR  ]);
+   aierr[CTA_MODEL_GET_OBSVALUES    ]     =CTA_Func_Create("modbuild_par_getobsvalues",     &modbuild_par_getobsvalues,   intf, &hfunc[CTA_MODEL_GET_OBSVALUES    ]);
+   aierr[CTA_MODEL_GET_OBSSELECT    ]     =CTA_Func_Create("modbuild_par_getobsselect",     &modbuild_par_getobsselect,   intf, &hfunc[CTA_MODEL_GET_OBSSELECT    ]);
+   aierr[CTA_MODEL_ADD_NOISE        ]     =CTA_Func_Create("modbuild_par_addnoise",         &modbuild_par_addnoise,       intf, &hfunc[CTA_MODEL_ADD_NOISE        ]);
+   aierr[I_CTA_MODEL_IMPORT         ]     =CTA_Func_Create("modbuild_par_import",           &modbuild_par_import,         intf, &hfunc[I_CTA_MODEL_IMPORT         ]);
+   aierr[I_CTA_MODEL_EXPORT         ]     =CTA_Func_Create("modbuild_par_export",           &modbuild_par_export,         intf, &hfunc[I_CTA_MODEL_EXPORT         ]);
+   aierr[CTA_MODEL_GET_CURRENTTIME  ]     =CTA_Func_Create("modbuild_par_getcurrenttime",   &modbuild_par_getcurrenttime, intf, &hfunc[CTA_MODEL_GET_CURRENTTIME  ]);
+   aierr[CTA_MODEL_GET_TIMEHORIZON  ]     =CTA_Func_Create("modbuild_par_gettimehorizon",   &modbuild_par_gettimehorizon, intf, &hfunc[CTA_MODEL_GET_TIMEHORIZON  ]);
+   aierr[I_CTA_MODEL_GETOBSLOCALIZATION ] =CTA_Func_Create("modbuild_par_getobslocalization",   &modbuild_par_getobslocalization  , intf, &hfunc[I_CTA_MODEL_GETOBSLOCALIZATION   ]);
+   aierr[CTA_MODEL_SAVE_PERSISTENTSTATE ] =CTA_Func_Create("modbuild_par_savepersistentstate",  &modbuild_par_savepersistentstate , intf, &hfunc[CTA_MODEL_SAVE_PERSISTENTSTATE  ]);
+   aierr[CTA_MODEL_LOAD_PERSISTENTSTATE]  =CTA_Func_Create("modbuild_par_loadpersistentstate",  &modbuild_par_loadpersistentstate , intf, &hfunc[CTA_MODEL_LOAD_PERSISTENTSTATE  ]);
+   aierr[CTA_MODEL_SAVE_INTERNALSTATE  ]  =CTA_Func_Create("modbuild_par_saveinternalstate",    &modbuild_par_saveinternalstate   , intf, &hfunc[CTA_MODEL_SAVE_INTERNALSTATE    ]);
+   aierr[CTA_MODEL_RESTORE_INTERNALSTATE] =CTA_Func_Create("modbuild_par_restoreinternalstate", &modbuild_par_restoreinternalstate, intf, &hfunc[CTA_MODEL_RESTORE_INTERNALSTATE ]);
+   aierr[CTA_MODEL_RELEASE_INTERNALSTATE] =CTA_Func_Create("modbuild_par_releaseinternalstate", &modbuild_par_releaseinternalstate, intf, &hfunc[CTA_MODEL_RELEASE_INTERNALSTATE ]);
+   aierr[CTA_MODEL_ANNOUNCE_OBSVALUES]    =CTA_Func_Create("modbuild_par_announceobsvalues", &modbuild_par_announceobsvalues, intf, &hfunc[CTA_MODEL_ANNOUNCE_OBSVALUES]);
+
+   aierr[CTA_MODEL_GET_NUMDOMAINS]            =CTA_Func_Create("modbuild_par_getnumdomains",  &modbuild_par_getnumdomains, intf, &hfunc[CTA_MODEL_GET_NUMDOMAINS]);
+   aierr[CTA_MODEL_GET_OBSSELECTOR]           =CTA_Func_Create("modbuild_par_getobsselector", &modbuild_par_getobsselector, intf, &hfunc[CTA_MODEL_GET_OBSSELECTOR]);
+   aierr[CTA_MODEL_GET_OBSLOCALIZATIONDOMAIN] =CTA_Func_Create("modbuild_par_getobslocalizationdomain", &modbuild_par_getobslocalizationdomain, intf, &hfunc[CTA_MODEL_GET_OBSLOCALIZATIONDOMAIN]);
+   aierr[CTA_MODEL_GET_STATEDOMAIN]           =CTA_Func_Create("modbuild_par_getstatedomain", &modbuild_par_getstatedomain, intf, &hfunc[CTA_MODEL_GET_STATEDOMAIN]);
+   aierr[CTA_MODEL_AXPY_STATEDOMAIN]          =CTA_Func_Create("modbuild_par_model_axpystatedomain", &modbuild_par_model_axpystatedomain, intf, &hfunc[CTA_MODEL_AXPY_STATEDOMAIN]);
+
+
+
+
+
+   // If something went wrong we are really fatal!
+   for (i=0; i<CTA_MODEL_NUMFUNC ;i++){
+      if (aierr[i]!=CTA_OK) {
+      printf("Internal error in CTA_Modbuild_par_createclass\n");
+      printf("Cannot create function handles for own implementation\n");
+      exit(1);
+      }
+   }
+   /* Create new model class */
+   ierr=CTA_Model_DefineClass("CTA_MODBUILD_PAR", hfunc, modelcls);
+   if (ierr!=CTA_NULL) {
+      printf("Internal error in CTA_Modbuild_par_createclass\n");
+      printf("Cannot create new class\n");
+      exit(-1);
+   }
+#else
+   modelcls=CTA_NULL;
+#endif
+
+
+}
+
+
+
+
+#define CTA_MODELBUILD_PAR_CREATECLASS_F77  F77_CALL(cta_modbuild_par_createclass,CTA_MODBUILD_PAR_CREATECLASS)
+#define CTA_MODELBUILD_PAR_FINALIZE_F77     F77_CALL(cta_modbuild_par_finalize,CTA_MODBUILD_PAR_FINALIZE)
+
+void CTA_MODELBUILD_PAR_CREATECLASS_F77(CTA_ModelClass *modelcls){
+   CTA_Modbuild_par_CreateClass(modelcls);
+}
+
+void CTA_MODELBUILD_PAR_FINALIZE_F77(){
+   CTA_Modbuild_par_Finalize();
+}
+
+
diff --git a/costa/native/cta/src/cta_modbuild_sp.c b/costa/native/cta/src/cta_modbuild_sp.c
new file mode 100644
index 000000000..c15c85368
--- /dev/null
+++ b/costa/native/cta/src/cta_modbuild_sp.c
@@ -0,0 +1,1351 @@
+/*
+$URL: https://repos.deltares.nl/repos/openda/public/trunk/core/native/src/cta/cta_modbuild_sp.c $
+$Revision: 3407 $, $Date: 2012-08-17 13:50:50 +0200 (Fri, 17 Aug 2012) $
+
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2006  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+//#include<dlfcn.h>
+#include "cta_mem.h"
+#include "cta.h"
+#include "ctai.h"
+#include "f_cta_utils.h"
+#include "cta_model_utilities.h"
+#include "cta_util_statistics.h"
+#include "cta_metainfo.h"
+#include "cta_message.h"
+
+#define INDX_THIS              ( 0) /* Handle of instance               */
+#define INDX_TIME              ( 1) /* Time instance of model (state)   */
+#define INDX_TIMEHORIZON       ( 2) /* Time instance of model (state)   */
+#define INDX_STATE             ( 3) /* State vector of model            */
+#define INDX_ADD_NOISE_SPAN    ( 4)
+#define INDX_AXPY_FORC_SPAN    ( 5) /* Timespan to add given offset to forcings */
+#define INDX_AXPY_FORC         ( 6) /* offset to be added to model forcings     */
+#define INDX_PARAM             ( 7) /* State vector of model parameters */
+#define INDX_NAME_NOISE        ( 8)
+#define INDX_NNOISE            ( 9)
+#define INDX_ZERO_FORC         (10) /* zero noise vector */
+#define INDX_USRDATA           (11)
+#define INDX_USRFUNC_CREATE    (12)
+#define INDX_USRFUNC_COVAR     (13)
+#define INDX_USRFUNC_OBS       (14)
+#define INDX_USRFUNC_OBSSEL    (15)
+#define INDX_USRFUNC_COMPUTE   (16)
+#define INDX_USRFUNC_FREE      (17)
+#define INDX_FORC              (18) /* State vector of forcings */
+#define INDX_ADJ_SPAN          (19) /* Time span to prepaire adjoint for */
+#define INDX_ADJ_DESCR_FORC    (20) /* Observation description describing the forcings of the adjoint */
+#define INDX_ADJ_VFORC         (21) /* Vector The forcings of the adjoint run that have been set */
+#define SIZE_DATABLK           (22)
+
+#define IDEBUG (0)
+#define CLASSNAME "CTA_Modbuild_sp"
+
+void modbuild_sp_create_size(CTA_Handle userdata, int *memsize, int *ierr){
+   /* Create size for datablock holding:
+      0 CTA_Model:  mthis     Handle of this model
+      1 CTA_Time :  time      Time instance of model
+      2 CTA_TreeVector:  state     Model state
+      3 CTA_TreeVector:  snoise    Noise offset state-vector
+      4 CTA_TreeVector:  sparam    Parameter state-vector
+      5 CTA_Handle: tusrmod   User data for model
+      6... CTA_Func: func     User functions
+   */
+   *memsize=sizeof(CTA_Handle)*(SIZE_DATABLK);
+}
+
+/** \brief Get a user function as specified in the input-tree
+ *  \note internal function
+ *
+ *  \param tinput  I  input-configuration tree
+ *  \param path    I  path in input tree to user function
+ *  \param func    O  handle to user function
+ *  \return CTA_OK if successful
+ */
+#undef METHOD
+#define METHOD "get_user_function" 
+int modbuild_sp_getusrfunction(CTA_Tree tinput, char *path, CTA_Func *func){
+   CTA_Handle hfunc;
+   CTA_Func func_wrk;
+   CTA_Datatype datatype;
+   int retval;
+
+   retval=CTA_Tree_GetHandleStr(tinput,path, &hfunc);
+   if (retval==CTA_OK) {
+      CTA_Handle_GetDatatype(hfunc, &datatype);
+      if (datatype==CTA_STRING) {
+         retval=CTA_Handle_Find(hfunc, CTA_FUNCTION, &func_wrk);
+         if (retval!=CTA_OK){
+			 char message[1024];
+             sprintf(message,"Cannot find the function that is specified in %s \n",path);
+             CTA_WRITE_INFO(message);		
+             return retval;
+         }
+         retval=CTA_Func_Duplicate(func_wrk,func);
+         if (retval!=CTA_OK) return retval;
+      } else if (datatype==CTA_FUNCTION) {
+         retval=CTA_Func_Duplicate(hfunc,func);
+         if (retval!=CTA_OK) return retval;
+      } else {
+	      char message[1024];
+		  sprintf(message,"Specification in \"%s\" is neither CTA_STRING nor CTA_FUNC \n",path);
+          CTA_WRITE_ERROR(message);
+          return CTA_INCOMPATIBLE_HANDLE;
+      }
+   } else {
+	  char message[1024];
+      sprintf(message,"Path \"%s\" is not specified in input\n",path);
+      CTA_WRITE_INFO(message);
+   }
+   return CTA_OK;
+}
+
+#undef METHOD
+#define METHOD "Create" 
+void modbuild_sp_create_init(CTA_Handle *this, CTA_Handle *data, CTA_Handle *hinput, int *ierr){
+   int cleanup;               /*Flag indicating whether tinput must be freed */
+   CTA_Func fusrcreate;       /* Function handle of user implementation      */
+   CTA_Function *usrcreate;   /* Function pointer of user implementation     */
+   CTA_Tree tinput;           /* Input configuration tree                    */
+   int retval;                /* return status of COSTA method               */
+   CTA_String funcar1;        /* Name of ar(1)-function                      */
+   CTA_Handle hmodelinput;    /* Handle to model specific input              */
+   CTA_Handle hspecialinput;  /* Handle to special model input               */
+   double dzero;              /* double with value 0.0                       */
+   double tstart, tend;       /* start and end time of time horizon of model */
+
+   if (IDEBUG) printf("Start of modbuild_sp_create_init\n");
+
+   dzero=0.0;
+
+
+   /* Convert input into a COSTA-tree if it is not done already */
+    *ierr=CTA_Model_Util_InputTree(*hinput, &tinput, &cleanup);
+    if (*ierr!=CTA_OK) return;
+
+   /* Initialise DATA */
+
+     data[INDX_THIS]            =*this;
+     data[INDX_STATE]           =CTA_NULL;
+     data[INDX_AXPY_FORC]       =CTA_NULL;
+     data[INDX_PARAM]           =CTA_NULL;
+     data[INDX_NNOISE]          =0;
+     data[INDX_ZERO_FORC]       =CTA_NULL;
+     data[INDX_USRDATA]         =CTA_NULL;
+     data[INDX_USRFUNC_CREATE]  =CTA_NULL;
+     data[INDX_USRFUNC_COVAR]   =CTA_NULL;
+     data[INDX_USRFUNC_OBS]     =CTA_NULL;
+     data[INDX_USRFUNC_OBSSEL]  =CTA_NULL;
+     data[INDX_USRFUNC_COMPUTE] =CTA_NULL;
+     data[INDX_USRFUNC_FREE]    =CTA_NULL;
+
+
+     *ierr=CTA_Time_Create(&data[INDX_TIME]);
+     *ierr=CTA_Time_Create(&data[INDX_TIMEHORIZON]);
+     *ierr=CTA_Time_Create(&data[INDX_ADD_NOISE_SPAN]);
+     *ierr=CTA_Time_Create(&data[INDX_AXPY_FORC_SPAN]);
+     *ierr=CTA_String_Create(&data[INDX_NAME_NOISE]);
+     *ierr=CTA_Time_Create(&data[INDX_ADJ_SPAN]);
+
+     /* First, check for a special 'model builder'-model: the AR(1) process model */
+
+     retval=CTA_Tree_GetHandleStr(tinput,"/modelbuild_sp/special_ar1",
+                                  &hspecialinput);
+
+     if (retval ==CTA_OK) {
+       /* skip the remainder and provide the special ar(1) model */
+       /*  define the following functions directly:
+             modbuild_sp_ar1_compute
+             modbuild_sp_ar1_covar
+
+       */
+       *ierr = CTA_String_Create(&funcar1);
+
+       *ierr = CTA_String_Set(funcar1, "modbuild_sp_ar1_create");
+       *ierr=CTA_Handle_Find(funcar1, CTA_FUNCTION, &data[INDX_USRFUNC_CREATE]);
+       *ierr = CTA_String_Set(funcar1, "modbuild_sp_ar1_compute");
+       *ierr=CTA_Handle_Find(funcar1, CTA_FUNCTION, &data[INDX_USRFUNC_COMPUTE]);
+       *ierr = CTA_String_Set(funcar1, "modbuild_sp_ar1_covar");
+       *ierr=CTA_Handle_Find(funcar1, CTA_FUNCTION, &data[INDX_USRFUNC_COVAR]);
+       *ierr = CTA_String_Set(funcar1, "modbuild_sp_ar1_getobsval");
+       *ierr=CTA_Handle_Find(funcar1, CTA_FUNCTION, &data[INDX_USRFUNC_OBS]);
+
+        retval=CTA_Tree_GetHandleStr(tinput,"/modelbuild_sp/special_ar1", &hmodelinput);
+
+
+     }
+     else {
+       /* Get the provided function names from the input and store the
+          corresponding function handles in data-array */
+        retval=modbuild_sp_getusrfunction(tinput, "/modelbuild_sp/functions/create",     &data[INDX_USRFUNC_CREATE]);
+        retval=modbuild_sp_getusrfunction(tinput,"/modelbuild_sp/functions/covariance",  &data[INDX_USRFUNC_COVAR]);
+        retval=modbuild_sp_getusrfunction(tinput, "/modelbuild_sp/functions/getobsvals", &data[INDX_USRFUNC_OBS]);
+        retval=modbuild_sp_getusrfunction(tinput, "/modelbuild_sp/functions/getobssel",  &data[INDX_USRFUNC_OBSSEL]);
+        retval=modbuild_sp_getusrfunction(tinput, "/modelbuild_sp/functions/compute",    &data[INDX_USRFUNC_COMPUTE]);
+        retval=modbuild_sp_getusrfunction(tinput, "/modelbuild_sp/functions/free",       &data[INDX_USRFUNC_FREE]);
+
+
+        if (IDEBUG>0) {
+          printf("Modbuild_sp: the following function handles have been created:\n");
+          printf("create: %d \n",data[INDX_USRFUNC_CREATE]);
+          printf("covariance: %d \n",data[INDX_USRFUNC_COVAR]);
+          printf("getobsvals: %d \n",data[INDX_USRFUNC_OBS]);
+          printf("getobssel: %d \n",data[INDX_USRFUNC_OBSSEL]);
+          printf("compute: %d \n",data[INDX_USRFUNC_COMPUTE]);
+          printf("free: %d \n",data[INDX_USRFUNC_FREE]);
+          printf("end message.\n");
+        }
+
+        /* Get model input from the input tree */
+        retval=CTA_Tree_GetHandleStr(tinput,"/modelbuild_sp/model", &hmodelinput);
+
+     } // end if: if ar1 then use standard function else read from xml-file
+
+     /* Call the user supplied create function */
+   fusrcreate=data[INDX_USRFUNC_CREATE];
+   //  printf("indx_usrfunc_create %d\n",INDX_USRFUNC_CREATE);
+   if (fusrcreate==CTA_NULL) {
+       CTA_WRITE_ERROR("Usrfunc_create function does not exist! \n");
+      *ierr=CTA_NOT_IMPLEMENTED;
+      return;
+   }
+
+   /* Get function pointer */
+   *ierr=CTA_Func_GetFunc(fusrcreate,&usrcreate);
+
+   if (*ierr!=CTA_OK) return;
+
+   /* CALL USR_CREATE(CTA_Handle *hmodelinput, CTA_TreeVector *state,
+                      CTA_TreeVector *sbound, CTA_TreeVector *sparam, int *nnoise,
+                      CTA_Time  *time0, CTA_String *snamnoise,
+                      CTA_Handle *husrdata, int* ierr)             */
+
+   /* Get input of the model itself */
+
+   /* call user function */
+   if (IDEBUG) printf("Calling user function 'usrcreate'\n");
+   usrcreate(&hmodelinput, &data[INDX_STATE],  &data[INDX_AXPY_FORC],
+             &data[INDX_PARAM], &data[INDX_NNOISE], &data[INDX_TIMEHORIZON],
+             &data[INDX_NAME_NOISE], &data[INDX_USRDATA], ierr);
+   if (IDEBUG) printf("End of user function 'usrcreate'\n");
+   if (*ierr!=CTA_OK) return;
+
+   /* Set time (Start of the time horizon) */
+   *ierr=CTA_Time_GetSpan(data[INDX_TIMEHORIZON],&tstart, &tend);
+   if (*ierr!=CTA_OK) return;
+   *ierr=CTA_Time_SetSpan(data[INDX_TIME], tstart, tend);
+   if (*ierr!=CTA_OK) return;
+
+   /* Some additional initialisation: */
+       //printf(" additional initialisation \n");
+
+   /* Set forcings axpy state vectors */
+   if (data[INDX_AXPY_FORC]!=CTA_NULL) {
+      *ierr=CTA_TreeVector_SetConstant(data[INDX_AXPY_FORC], &dzero,CTA_DOUBLE);
+      if (*ierr!=CTA_OK) return;
+      *ierr=CTA_TreeVector_Duplicate(data[INDX_AXPY_FORC],&data[INDX_ZERO_FORC]);
+      if (*ierr!=CTA_OK) return;
+   }
+
+   *ierr=CTA_Time_SetSpan(data[INDX_ADD_NOISE_SPAN],0.0,-1.0); /* empty span */
+   if (*ierr!=CTA_OK) return;
+   *ierr=CTA_Time_SetSpan(data[INDX_AXPY_FORC_SPAN],0.0,-1.0); /* empty span */
+   if (*ierr!=CTA_OK) return;
+   *ierr=CTA_Time_SetSpan(data[INDX_ADJ_SPAN],0.0,-1.0);      /* empty span */
+   if (*ierr!=CTA_OK) return;
+
+   if (IDEBUG) printf("before cleaning up the input tree\n");
+
+
+   /* Clean-up input tree */
+   if (cleanup==CTA_TRUE){
+      *ierr=CTA_Tree_Free(&tinput);
+   }
+   *ierr=CTA_OK;
+   if (IDEBUG) printf(" end of create_init \n");
+}
+
+void modbuild_sp_export(CTA_Handle *data, CTA_Handle *hexport, int *ierr){
+
+   CTA_Datatype datatype;
+
+   /* Get type of handle of hexport */
+   *ierr=CTA_Handle_GetDatatype(*hexport, &datatype);
+   if (*ierr != CTA_OK) return;
+
+   /* Export header */
+   if (datatype==CTA_PACK){
+      CTA_Pack_Add(*hexport,data,sizeof(CTA_HANDLE)*SIZE_DATABLK);
+   }
+   else if (datatype==CTA_FILE){
+      *ierr=CTA_FORMAT_NOT_SUPPORTED;
+      return;
+   }
+   else {
+      *ierr=CTA_FORMAT_NOT_SUPPORTED;
+      return;
+   }
+
+   /* Pack current time */
+   if (data[INDX_TIME]!=CTA_NULL){
+      CTA_Time_Export(data[INDX_TIME], *hexport);
+   }
+
+   /* Pack time horizon */
+   if (data[INDX_TIMEHORIZON]!=CTA_NULL){
+      CTA_Time_Export(data[INDX_TIMEHORIZON], *hexport);
+   }
+
+
+   /* Pack state-vector */
+   if (data[INDX_STATE]!=CTA_NULL){
+      CTA_TreeVector_Export(data[INDX_STATE], *hexport);
+   }
+   /* Pack noise span */
+   if (data[INDX_ADD_NOISE_SPAN]!=CTA_NULL){
+      CTA_Time_Export(data[INDX_ADD_NOISE_SPAN], *hexport);
+   }
+   /* Pack forcings span */
+   if (data[INDX_AXPY_FORC_SPAN]!=CTA_NULL){
+      CTA_Time_Export(data[INDX_AXPY_FORC_SPAN], *hexport);
+   }
+
+   /* Pack adjoint span */
+   if (data[INDX_ADJ_SPAN]!=CTA_NULL){
+      CTA_Time_Export(data[INDX_ADJ_SPAN], *hexport);
+   }
+
+   /* Pack forcings addition */
+   if (data[INDX_AXPY_FORC]!=CTA_NULL){
+      CTA_TreeVector_Export(data[INDX_AXPY_FORC], *hexport);
+   }
+
+   /* Pack model parameters */
+   if (data[INDX_PARAM]!=CTA_NULL){
+      CTA_TreeVector_Export(data[INDX_PARAM], *hexport);
+   }
+}
+
+
+void modbuild_sp_import(CTA_Handle *data, CTA_Handle *himport, int *ierr){
+
+   CTA_Handle header[SIZE_DATABLK];
+   CTA_Datatype datatype;
+
+   /* Get type of handle of hexport */
+   *ierr=CTA_Handle_GetDatatype(*himport, &datatype);
+   if (*ierr != CTA_OK) return;
+
+   /* Import header */
+   if (datatype==CTA_PACK){
+      CTA_Pack_Get(*himport,header,sizeof(CTA_HANDLE)*SIZE_DATABLK);
+   }
+   else if (datatype==CTA_FILE){
+      *ierr=CTA_FORMAT_NOT_SUPPORTED;
+      return;
+   }
+   else {
+      *ierr=CTA_FORMAT_NOT_SUPPORTED;
+      return;
+   }
+
+   /* Unpack current time */
+   if (header[INDX_TIME]!=CTA_NULL){
+      CTA_Time_Import(data[INDX_TIME], *himport);
+   }
+
+   /* Unpack time horizon */
+   if (header[INDX_TIMEHORIZON]!=CTA_NULL){
+      CTA_Time_Import(data[INDX_TIMEHORIZON], *himport);
+   }
+
+   /* Unpack state-vector */
+   if (header[INDX_STATE]!=CTA_NULL){
+      CTA_TreeVector_Import(data[INDX_STATE], *himport);
+   }
+
+   /* Unpack noise span */
+   if (header[INDX_ADD_NOISE_SPAN]!=CTA_NULL){
+      CTA_Time_Import(data[INDX_ADD_NOISE_SPAN], *himport);
+   }
+
+   /* Unpack forcings span */
+   if (header[INDX_AXPY_FORC_SPAN]!=CTA_NULL){
+      CTA_Time_Import(data[INDX_AXPY_FORC_SPAN], *himport);
+   }
+
+   /* Unpack adjoint span */
+   if (header[INDX_ADJ_SPAN]!=CTA_NULL){
+      CTA_Time_Import(data[INDX_ADJ_SPAN], *himport);
+   }
+
+   /* Unpack forcings addition */
+   if (header[INDX_AXPY_FORC]!=CTA_NULL){
+      CTA_TreeVector_Import(data[INDX_AXPY_FORC], *himport);
+   }
+
+   /* Unpack model parameters */
+   if (header[INDX_PARAM]!=CTA_NULL){
+      CTA_TreeVector_Import(data[INDX_PARAM], *himport);
+   }
+}
+
+
+void modbuild_sp_adjcompute(CTA_Handle *data ,int* time, int *ierr){
+    *ierr=CTA_NOT_IMPLEMENTED;
+}
+
+void modbuild_sp_adjprepare(CTA_Handle *data ,int* time, int *ierr){
+   /* do we already have a add noise timespan? */
+   if (data[INDX_ADJ_SPAN]==CTA_NULL) {
+      *ierr=CTA_Time_Create(&data[INDX_ADJ_SPAN]);
+      if (*ierr!=CTA_OK) return;
+   }
+   /* Just copy time span */
+   *ierr=CTA_Time_Copy(*time,data[INDX_ADJ_SPAN]);
+}
+
+
+void modbuild_sp_adjsetforc(CTA_Handle *data ,int* hdescr, int *vforc, int *ierr){
+    int nmeasr;
+    CTA_Vector vforc_temp;
+
+    *ierr=CTA_ObsDescr_Observation_Count(*hdescr, &nmeasr);
+    if (*ierr!=CTA_OK) return;
+    if (nmeasr<0) return;
+
+    /* Create work variables */
+    *ierr=CTA_Vector_Duplicate(*vforc, &vforc_temp);
+    if (*ierr!=CTA_OK) return;
+
+    /* TODO WE HAVE TO COPY THE OBJECT HOMEWORK FOR NILS FOR TIME BEEING HOPE THAT OBJECT IS NOT DELETED AND ONLY SAVE HANDLE!!!!!! */
+    if (data[INDX_ADJ_DESCR_FORC]!=CTA_NULL){
+        *ierr=CTA_Vector_Create(CTA_DEFAULT_VECTOR, 1, CTA_HANDLE, CTA_NULL, &data[INDX_ADJ_DESCR_FORC]);
+        *ierr=CTA_Vector_SetVal(data[INDX_ADJ_DESCR_FORC], 1, hdescr, CTA_HANDLE);
+
+        *ierr=CTA_Vector_Duplicate(*vforc, &data[INDX_ADJ_VFORC]);
+    }
+    *ierr=CTA_Vector_AppendVal(data[INDX_ADJ_DESCR_FORC], hdescr, CTA_HANDLE);
+
+    *ierr=CTA_Vector_AppendVal(data[INDX_ADJ_VFORC], &vforc_temp, CTA_HANDLE);
+}
+
+
+void modbuild_sp_free(CTA_Handle *data ,int *ierr){
+
+   CTA_Func fusrfree;       /* Function handle of user implementation       */
+   CTA_Function *usrfree;   /* Function pointer of user implementation      */
+   CTA_Handle husrdata;     /* Handle to user data                          */
+
+   fusrfree  = data[INDX_USRFUNC_FREE];
+   husrdata  = data[INDX_USRDATA];
+
+   if (fusrfree!=CTA_NULL){
+      /* Get function pointer */
+      *ierr=CTA_Func_GetFunc(fusrfree,&usrfree);
+      if (*ierr!=CTA_OK) return;
+      usrfree(&husrdata, ierr);
+   }
+    *ierr=CTA_OK;
+}
+
+#undef METHOD
+#define METHOD "Compute"
+void modbuild_sp_compute(
+          CTA_Handle *data,
+          CTA_Time *timSim, /* current time, "now" */
+          int *ierr)
+{
+   CTA_TreeVector state;         /* state vector of model                        */
+   CTA_TreeVector sparam;        /* state vector of model parameters             */
+   CTA_TreeVector saxpyforc;     /* state vector of added forcings               */
+   CTA_Func fusrcompute;         /* Function handle of user implementation       */
+   CTA_Function *usrcompute;     /* Function pointer of user implementation      */
+   CTA_Handle husrdata;          /* Handle to model-spefic data                  */
+   CTA_Time timemod;             /* Current time instance of model               */
+   CTA_Time timeaddnoise;        /* Time instance for model to add noise         */
+   CTA_Time timeaxpy;            /* Timespan to apply given noise axpy           */
+   double t1, t2, t1Sim, tDum;   /* start and end of simulation timespan         */
+   BOOL inspan;                  /* simulation timespan in inside given span     */
+   BOOL baddnoise;               /* Model must add its own noise or not          */
+   CTA_Time timespan;            /* Timespan of computations                     */
+   int isSpan;                   /* Flag indicating whether timSim is a timespan */
+   double eps;                   /* Tollerence for comparing time instances      */
+
+
+   fusrcompute = data[INDX_USRFUNC_COMPUTE];
+
+   /* check whether we have an implementation and number of noise
+      parameters is >0                                           */
+   if (fusrcompute==CTA_NULL) {
+      CTA_WRITE_ERROR("user compute function does not exist \n");
+      *ierr=CTA_NOT_IMPLEMENTED;
+      return;
+   }
+
+   state        = data[INDX_STATE];
+   timemod      = data[INDX_TIME];
+   timeaddnoise = data[INDX_ADD_NOISE_SPAN];
+   timeaxpy     = data[INDX_AXPY_FORC_SPAN];
+   sparam       = data[INDX_PARAM];
+   husrdata     = data[INDX_USRDATA];
+
+
+   /* time management */
+   *ierr=CTA_Time_GetSpan(*timSim,&t1Sim, &t2);
+   *ierr=CTA_Time_GetSpan(timemod,&t1, &tDum);
+
+   *ierr=CTA_Time_IsSpan(*timSim, &isSpan);
+   if (isSpan == CTA_TRUE){
+      eps=M_EPS*fabs(t1Sim)+M_EPS;
+      if (fabs(t1Sim-t1)>eps) {
+         printf("Error in modbuild_sp_compute: current time of model is %f; start of simulation span is %f\n",
+            t1Sim,t1);
+         exit(-1);
+      }
+   }
+
+   /* Set the simulation span of this compute */
+   *ierr=CTA_Time_Create(&timespan);
+   *ierr=CTA_Time_SetSpan(timespan, t1, t2);
+
+   /* Addition axpy of noise */
+   *ierr=CTA_Time_InSpan(timespan, timeaxpy, &inspan); /* is "now" inside timeasxpy? */
+   if (*ierr!=CTA_OK) {
+     CTA_WRITE_ERROR("Timespan is not in interval");
+     return;
+   }
+   if (inspan) {
+      saxpyforc=data[INDX_AXPY_FORC];
+   } else {
+      saxpyforc=data[INDX_ZERO_FORC];
+   }
+
+   /* Addition of noise by model itself */
+
+   *ierr=CTA_Time_InSpan(timespan, timeaddnoise, &baddnoise);
+   if (*ierr!=CTA_OK) {
+     printf("modbuild_sp_compute:cta_time_inspan noise: ierr: %d \n",*ierr);
+     return;
+      }
+
+   /* Get function pointer */
+   *ierr=CTA_Func_GetFunc(fusrcompute,&usrcompute);
+   if (*ierr!=CTA_OK) return;
+
+   *ierr=CTA_Time_GetSpan(timespan, &t1, &t2);
+//     printf("cta_modbuild_compute:before call my_compute t1, t2:   %f %f \n", t1,t2);
+	 fflush(stdout);
+
+
+   /* CALL USR_COMPUTE(CTA_Time *timesspan,CTA_TreeVector *state,
+                      CTA_TreeVector *saxpyforc, BOOL *baddnoise,
+                      CTA_TreeVector *sparam, CTA_Handle husrdata,
+                      int* ierr) */
+   usrcompute(&timespan, &state, &saxpyforc, &baddnoise, &sparam,
+               &husrdata, ierr);
+
+   if (*ierr!=CTA_OK) {
+     printf("modbuild_sp_compute:usr_compute: WRONG: %d  \n",*ierr);
+	 fflush(stdout);
+     return;
+   }
+
+   // printf("Back in modbuild_sp_compute; adjust timespan  \n");
+
+   *ierr=CTA_Time_SetSpan(timemod, t2, t2);
+   if (*ierr!=CTA_OK) {
+     printf("modbuild_sp_compute:CTA_Time_SetSpan:  %d   ",*ierr);
+	 fflush(stdout);
+     return;
+   }
+
+   *ierr=CTA_Time_Free(&timespan);
+
+}
+
+
+void modbuild_sp_setstate(CTA_Handle *data, CTA_TreeVector *state, int *ierr){
+
+     *ierr=CTA_TreeVector_Copy(*state,data[INDX_STATE]);
+}
+
+void modbuild_sp_getstate(CTA_Handle *data, CTA_TreeVector *state, int *ierr){
+
+  if (IDEBUG>9) {printf("DEBUG: modbuild_sp_getstate:state :  %d \n",*state);}
+
+     /* Duplicate the state if necessary */
+     if (*state==CTA_NULL){
+
+        *ierr=CTA_TreeVector_Duplicate(data[INDX_STATE], state);
+     }
+     else {
+     /* Just copy the state */
+
+        *ierr=CTA_TreeVector_Copy(data[INDX_STATE], *state);
+     }
+}
+
+
+void modbuild_sp_axpymodel(CTA_Handle *datay, double *alpha, CTA_Handle *datax,
+                           int *ierr){
+
+     *ierr=CTA_TreeVector_Axpy(datay[INDX_STATE], *alpha, datax[INDX_STATE]);
+}
+
+
+void modbuild_sp_axpystate(CTA_Handle *data, double *alpha, CTA_TreeVector *statex,
+                           int *ierr){
+
+     *ierr=CTA_TreeVector_Axpy(data[INDX_STATE], *alpha, *statex);
+}
+
+void modbuild_sp_setforc(CTA_Handle *data, CTA_TreeVector *state, int *ierr){
+
+     *ierr=CTA_TreeVector_Copy(*state,data[INDX_FORC]);
+}
+
+void modbuild_sp_getforc(CTA_Handle *data, CTA_TreeVector *state, int *ierr){
+
+     /* Duplicate the state if necessary */
+     if (*state==CTA_NULL){
+        *ierr=CTA_TreeVector_Duplicate(data[INDX_FORC], state);
+     }
+     else {
+     /* Just copy the state */
+        *ierr=CTA_TreeVector_Copy(data[INDX_FORC], *state);
+     }
+
+}
+
+void modbuild_sp_axpyforc(CTA_Handle *data, CTA_Time *tspan, double *alpha,
+                          CTA_TreeVector *statex, int *ierr){
+
+
+   /* Do we have a time object ? */
+   if (data[INDX_AXPY_FORC_SPAN]==CTA_NULL) {
+      *ierr=CTA_Time_Create(&data[INDX_AXPY_FORC_SPAN]);
+      if (*ierr!=CTA_OK) return;
+   }
+   /* Copy time */
+   *ierr=CTA_Time_Copy(data[INDX_AXPY_FORC_SPAN],*tspan);
+   if (*ierr!=CTA_OK) return;
+
+   /* Copy state */
+   *ierr=CTA_TreeVector_Copy(*statex,data[INDX_AXPY_FORC]);
+   if (*ierr!=CTA_OK) return;
+
+   /* apply scaling factor alpha */
+   *ierr=CTA_TreeVector_Scal(data[INDX_AXPY_FORC], *alpha);
+   if (*ierr!=CTA_OK) return;
+}
+
+void modbuild_sp_setparam(CTA_Handle *data, CTA_TreeVector *state, int *ierr){
+     *ierr=CTA_TreeVector_Copy(*state,data[INDX_PARAM]);
+   /* *ierr=CTA_NOT_IMPLEMENTED; */ /*wednesday, 27-09-06 jhs*/
+}
+
+void modbuild_sp_getparam(CTA_Handle *data, CTA_TreeVector *state, int *ierr){
+
+     /* Duplicate the state if necessary */
+     if (*state==CTA_NULL){
+        *ierr=CTA_TreeVector_Duplicate(data[INDX_PARAM], state);
+     }
+     else {
+     /* Just copy the state */
+        *ierr=CTA_TreeVector_Copy(data[INDX_PARAM], *state);
+     }
+
+    /* *ierr=CTA_NOT_IMPLEMENTED;*/  /*wednesday, 27-09-06 jhs*/
+}
+
+void modbuild_sp_axpyparam(CTA_Handle *data, double *alpha, CTA_TreeVector *statex,
+                           int *ierr){
+     *ierr=CTA_TreeVector_Axpy(data[INDX_PARAM], *alpha, *statex);
+   /* *ierr=CTA_NOT_IMPLEMENTED; */ /*wednesday, 27-09-06 jhs*/
+}
+
+void modbuild_sp_getnoisecount(CTA_Handle *data, int* nnoise, int* ierr) {
+    *nnoise=data[INDX_NNOISE];
+    *ierr=CTA_OK;
+}
+
+void modbuild_sp_getcurrenttime(CTA_Handle *data, int* tCurrent, int* ierr) {
+
+   *ierr=CTA_Time_Copy(data[INDX_TIME],*tCurrent);
+}
+
+void modbuild_sp_gettimehorizon(CTA_Handle *data, int* tHorizon, int* ierr) {
+
+   *ierr=CTA_Time_Copy(data[INDX_TIMEHORIZON],*tHorizon);
+}
+
+
+void modbuild_sp_getnoisecovar(CTA_Handle *data, CTA_TreeVector *colsvar, int* ierr){
+   int nnoise;            /* number of noise parameters              */
+   CTA_Handle husrdata;   /* Handle to model-spefic data             */
+   CTA_TreeVector state;       /* state vector of model                   */
+   CTA_TreeVector snoise;      /* noise state vector                      */
+   CTA_TreeVector snoisesub;   /* name of noise state vector              */
+   int inoise;            /* loop counter over noise parameters      */
+   CTA_Func fusrcovar;    /* Function handle of user implementation  */
+   CTA_Function *usrcovar;/* Function pointer of user implementation */
+   char *tag;             /* name/tag of substate with noise         */
+   int  len;              /* length of string tag (excluding char(0))*/
+
+   nnoise    = data[INDX_NNOISE];
+   state     = data[INDX_STATE];
+   snoisesub = data[INDX_NAME_NOISE];
+   snoise    = CTA_NULL;
+   fusrcovar = data[INDX_USRFUNC_COVAR];
+   husrdata  = data[INDX_USRDATA];
+
+   /* check whether we have an implementation and number of noise
+      parameters is >0                                           */
+
+
+   if (nnoise==0 || fusrcovar==CTA_NULL) {
+      *ierr=CTA_NOT_IMPLEMENTED;
+      return;
+   }
+
+
+      /* create state-vectors when colsvar contains CTA_NULL */
+   for (inoise=0; inoise<nnoise; inoise++) {
+      if (colsvar[inoise]==CTA_NULL){
+         /* we need to create states of noise parameters */
+         /* Check whether we already have handle to noise */
+         /* if not get it */
+         if (snoise==CTA_NULL){
+             /* Get name of substate */
+             *ierr=CTA_String_GetLength(snoisesub, &len);
+             if (*ierr!=CTA_OK) return;
+             tag=CTA_Malloc(sizeof(char)*(len+1));
+             *ierr=CTA_String_Get(snoisesub, tag);
+             if (*ierr!=CTA_OK) return;
+             /* get handle to noise substate */
+
+             *ierr=CTA_TreeVector_GetSubTreeVec(state, tag, &snoise);
+
+             if (*ierr!=CTA_OK) return;
+             free(tag);
+         }
+         /* duplicate noise state */
+         *ierr=CTA_TreeVector_Duplicate(snoise, &colsvar[inoise]);
+         if (*ierr!=CTA_OK) return;
+      }
+   }
+   /* Get function pointer */
+
+   *ierr=CTA_Func_GetFunc(fusrcovar,&usrcovar);
+   if (*ierr!=CTA_OK) return;
+
+
+  /* CALL USR_COVAR(CTA_TreeVector *colsvar,int* nnoise, CTA_Handle husrdata,
+                    int* ierr) */
+  usrcovar(colsvar, &nnoise, &husrdata, ierr);
+}
+
+
+void modbuild_sp_getobsvalues(CTA_Handle *data, CTA_Time *ttime,
+                               CTA_ObsDescr *hdescr, CTA_Vector *vval,
+                               int* ierr){
+
+   CTA_TreeVector state;      /* state vector of model                   */
+   CTA_Handle husrdata;  /* Handle to model-spefic data             */
+   CTA_Func fusrobs;     /* Function handle of user implementation  */
+   CTA_Function *usrobs; /* Function pointer of user implementation */
+
+   state     = data[INDX_STATE];
+   fusrobs = data[INDX_USRFUNC_OBS];
+   husrdata  = data[INDX_USRDATA];
+
+   /* check whether we have an implementation and number of noise
+      parameters is >0                                           */
+   if (fusrobs==CTA_NULL) {
+      *ierr=CTA_NOT_IMPLEMENTED;
+      return;
+   }
+
+   /* Get function pointer */
+   *ierr=CTA_Func_GetFunc(fusrobs,&usrobs);
+   if (*ierr!=CTA_OK) return;
+
+   /* CALL USR_OBS(CTA_TreeVector *state ,CTA_ObsDescr *hdescr CTA_Vector *vval,
+                    CTA_Handle husrdata, int* ierr) */
+   usrobs(&state, hdescr, vval, &husrdata, ierr);
+
+}
+
+void modbuild_sp_getobsselect(CTA_Handle *data, CTA_Time *ttime, CTA_ObsDescr *hdescr,
+                                CTA_String *sselect, int* ierr){
+
+   CTA_TreeVector state;         /* state vector of model                   */
+   CTA_Handle husrdata;     /* Handle to model-spefic data             */
+   CTA_Func fusrobssel;     /* Function handle of user implementation  */
+   CTA_Function *usrobssel; /* Function pointer of user implementation */
+
+   if (IDEBUG>10){printf("modbuild_sp_getobsselect START \n");}
+
+   state     = data[INDX_STATE];
+   fusrobssel = data[INDX_USRFUNC_OBSSEL];
+   husrdata  = data[INDX_USRDATA];
+
+   /* check whether we have an implementation and number of noise
+      parameters is >0                                           */
+   if (fusrobssel==CTA_NULL) {
+   if (IDEBUG>10){printf("modbuild_sp_getobsselect ; returning 1>0 \n");}
+      *ierr=CTA_String_Set(*sselect," 1 >= 0 ");
+      return;
+   }
+
+   /* Get function pointer */
+   *ierr=CTA_Func_GetFunc(fusrobssel,&usrobssel);
+   if (*ierr!=CTA_OK) return;
+
+   /* CALL USR_OBSSEL(CTA_TreeVector *state, CTA_Time *ttime, CTA_ObsDescr *hdescr,
+                                CTA_String *sselect, CTA_Handle husrdata,
+                                int* ierr)             */
+   usrobssel(&state, ttime, hdescr, sselect, &husrdata, ierr);
+
+}
+
+void modbuild_sp_addnoise(CTA_Handle *data, CTA_Time *ttime, int* ierr){
+
+   /* do we already have a add noise timespan? */
+   if (data[INDX_ADD_NOISE_SPAN]==CTA_NULL) {
+      *ierr=CTA_Time_Create(&data[INDX_ADD_NOISE_SPAN]);
+      if (*ierr!=CTA_OK) return;
+   }
+   /* Just copy time span */
+   *ierr=CTA_Time_Copy(*ttime,data[INDX_ADD_NOISE_SPAN]);
+}
+
+
+
+
+
+
+   /* CALL USR_CREATE(CTA_Handle *hmodelinput, CTA_TreeVector *state,
+                      CTA_TreeVector *sbound, CTA_TreeVector *sparam, int *nnoise,
+                      CTA_Time  *time0, CTA_String *snamnoise,
+                      CTA_Handle *husrdata, int* ierr)             */
+
+
+/*
+typedef struct {
+char name[20];
+  BOOL is2D;
+  int nx, ny, nz,nsize;
+double x_origin, y_origin, z_origin;
+double dx,dy,dz;
+} CTAI_Grid;
+
+typedef struct {
+char name[20];
+  int npoints;
+double x_origin, y_origin;
+CTA_Vector h_x_coords,h_y_coords;
+} CTAI_curve;
+*/
+
+
+void modbuild_sp_ar1_create(CTA_Handle *hinput, CTA_TreeVector *state,
+                      CTA_TreeVector *sbound, CTA_TreeVector *sparam, int *nnoise,
+                      CTA_Time  *tHorizon, CTA_String *snamnoise,
+                                    CTA_Handle *husrdata, int *ierr){
+   double p0[3];
+   double zeroes = 0.0;
+   CTA_Vector hvec1,hvec2, h_x_coords,h_y_coords;
+   CTA_String htag, hgridtype;
+   int len, retval;
+   char *str_grid, *str_tag;
+   CTAI_Gridm thisgrid, hgrid2;
+   CTA_Metainfo hdescr_state;
+   CTA_Handle hhandle;
+   int rest0=12;
+   CTA_Time inputTimeHorizon;
+
+   *ierr=CTA_Tree_GetValueStr(*hinput,"parameters/std_dev",&p0[0],CTA_DOUBLE);
+   *ierr=CTA_Tree_GetValueStr(*hinput,"parameters/kar_t",&p0[1],CTA_DOUBLE);
+   *ierr=CTA_Tree_GetValueStr(*hinput,"parameters/kar_l",&p0[2],CTA_DOUBLE);
+   if (*ierr != CTA_OK) {
+     printf("Error in ar1_create. Initial parameters not specified in input\n");
+     return;
+   }
+
+   *ierr=CTA_Metainfo_Create(&hdescr_state);
+   //   printf("metainfo_create: retval %d \n", *ierr);
+
+   /* get the tag of the metainfo of the noise model */
+   *ierr = CTA_Tree_GetHandleStr(*hinput,"tag",&htag);
+   *ierr = CTA_String_GetLength (htag, &len);
+   str_tag=CTA_Malloc((len+1)*sizeof(char));
+   *ierr = CTA_String_Get(htag, str_tag);
+
+   *ierr=CTA_Metainfo_SetRest(hdescr_state,&rest0);
+
+   *ierr=CTA_Metainfo_SetTag(hdescr_state,str_tag);
+
+    //    *ierr=CTA_Metainfo_SetTag(hdescr_state,"ar1-noise");
+
+   *ierr = CTA_Tree_GetHandleStr(*hinput,"grid/type_id",&hgridtype);
+   if (*ierr != CTA_OK) {
+     printf("Error in ar1_create. Grid type not given. \n");
+     return; }
+
+   *ierr = CTA_String_GetLength (hgridtype, &len);
+   str_grid=CTA_Malloc((len+1)*sizeof(char));
+   *ierr = CTA_String_Get(hgridtype, str_grid);
+   if (!strncmp("2D",str_grid,len) || !strncmp("3D",str_grid,len )) {
+     //  printf("grid type is 2D/3D, now reading more parameters \n");
+
+     strcpy(thisgrid.name,"AR(1)-process");
+     thisgrid.type = 2;
+     thisgrid.nz = 1; thisgrid.dz = 1.0; thisgrid.z_origin = 0.0;
+     *ierr=CTA_Tree_GetValueStr(*hinput,"grid/gridsize/nx", &thisgrid.nx, CTA_INTEGER);
+
+     *ierr=CTA_Tree_GetValueStr(*hinput,"grid/gridsize/ny", &thisgrid.ny, CTA_INTEGER);
+
+     *ierr=CTA_Tree_GetValueStr(*hinput,"grid/gridparams/x_origin",
+                             &thisgrid.x_origin, CTA_DOUBLE);
+     *ierr=CTA_Tree_GetValueStr(*hinput,"grid/gridparams/y_origin",
+                             &thisgrid.y_origin, CTA_DOUBLE);
+     *ierr=CTA_Tree_GetValueStr(*hinput,"grid/gridparams/dx",&thisgrid.dx, CTA_DOUBLE);
+     *ierr=CTA_Tree_GetValueStr(*hinput,"grid/gridparams/dy",&thisgrid.dy, CTA_DOUBLE);
+
+     if (!strncmp("3D",str_grid,len )) {
+       thisgrid.type = 3;
+       *ierr=CTA_Tree_GetValueStr(*hinput,"grid/gridsize/nz", &thisgrid.nz, CTA_INTEGER);
+       *ierr=CTA_Tree_GetValueStr(*hinput,"grid/gridparams/z_origin",
+                             &thisgrid.z_origin, CTA_DOUBLE);
+       *ierr=CTA_Tree_GetValueStr(*hinput,"grid/gridparams/dz",&thisgrid.dz,CTA_DOUBLE);
+     }
+     thisgrid.nsize = thisgrid.nx * thisgrid.ny * thisgrid.nz;
+
+
+   } // end of reading 2D/3D grid info
+
+
+   if (!strncmp("curve",str_grid,len)) {
+     //  printf("grid type is curve, now reading more parameters \n");
+
+     strcpy(thisgrid.name,"AR(1)-process");
+     thisgrid.type = 10;
+     *ierr=CTA_Tree_GetValueStr(*hinput,"grid/gridsize/nsize", &thisgrid.nsize, CTA_INTEGER);
+     *ierr=CTA_Tree_GetValueStr(*hinput,"grid/gridparams/x_origin",
+                             &thisgrid.x_origin, CTA_DOUBLE);
+     *ierr=CTA_Tree_GetValueStr(*hinput,"grid/gridparams/y_origin",
+                             &thisgrid.y_origin, CTA_DOUBLE);
+     // read vector with x-coords and y-coords
+     *ierr=CTA_Tree_GetHandleStr(*hinput,"grid/x-coords",&h_x_coords);
+     *ierr=CTA_Tree_GetHandleStr(*hinput,"grid/y-coords",&h_y_coords);
+   } // end of reading curve info
+   // printf("end of reading grid info \n");
+
+   *ierr = CTA_TreeVector_Create("ar1 model","ar1_model",state);
+   if (*ierr != CTA_OK) {printf("Error in ar1_create. no state \n");return; }
+   *ierr=CTA_Vector_Create(CTA_DEFAULT_VECTOR,thisgrid.nsize, CTA_DOUBLE, CTA_NULL, &hvec1);
+
+   CTA_Vector_SetConstant(hvec1,&zeroes,CTA_DOUBLE);
+   CTA_TreeVector_SetVec(*state,hvec1);
+
+
+   if (*ierr != CTA_OK) { printf("Error in ar1_create. State not initialised \n");
+     return;
+   }
+
+
+   /* attach grid information to state meta-info */
+   //     printf("Now grid will be attached to metainfo \n");
+   *ierr=CTA_Metainfo_SetGrid(hdescr_state, &thisgrid);
+   // printf("QQ metainfo_setgrid: nsize(=8*12)   %d |%d| \n", *ierr ,thisgrid.nsize);
+   if (*ierr != CTA_OK) { printf("Error in ar1_create.grid not attached \n");
+     return;
+   }
+
+   // attach meta-info to state
+
+   // printf("Now meta_info will be attached to state \n");
+   *ierr = CTA_TreeVector_SetMetainfo(*state, hdescr_state);
+   if (*ierr != CTA_OK) { printf("Error in ar1_create.meta-info not attached \n");
+     return;
+   }
+
+   *sbound = CTA_NULL;
+
+   /* fill husrdata with grid and parameters; necessary for ar1_covar */
+   /* do this by constructing a tree */
+   *ierr=CTA_Vector_Create(CTA_DEFAULT_VECTOR, 3, CTA_DOUBLE, CTA_NULL, &hvec2);
+   *ierr=CTA_Vector_SetVals(hvec2,p0,3,CTA_DOUBLE);
+
+   CTA_Tree_Create(husrdata);
+
+   CTA_Tree_AddHandle(*husrdata, "p0", hvec2);
+
+   *ierr=CTA_Handle_Create("grid",CTA_DATABLOCK,&hgrid2,&hhandle);
+   if (*ierr!=CTA_OK) {printf("ar1_create: error husrdata tree \n");}
+   *ierr=CTA_Tree_AddHandle(*husrdata, "hgrid", hhandle);
+   if (*ierr!=CTA_OK) {printf("ar1_create: error husrdata tree \n");}
+
+
+
+   /* fill sparam with parameters */
+   *ierr = CTA_TreeVector_Create(" ","ar1_params",sparam);
+   *ierr=CTA_TreeVector_SetVec(*sparam,hvec2);
+   if (*ierr!=CTA_OK) {printf("error vector params create %d\n",*ierr);}
+   //   *sparam = CTA_NULL;
+
+
+   *nnoise = thisgrid.nsize;
+   //printf("ar1_create: nnoise %d \n",*nnoise);
+
+   /* Read time from the input configuration */
+   retval=CTA_Tree_GetHandleStr (*hinput, "timehorizon",  &inputTimeHorizon);
+   if (retval==CTA_OK){
+      *ierr=CTA_Time_Copy(inputTimeHorizon,*tHorizon);
+   } else {
+      printf("WARNING: No time horizon has been set for AR(1) model\n");
+      printf("WARNING: Using default horizon [0, 1.0e6]\n");
+      *ierr=CTA_Time_SetSpan(*tHorizon,0.0,1.0e6); /* empty span */
+   }
+   if (*ierr!=CTA_OK) {printf("error handling the time horizon of the model %d\n",*ierr);}
+
+
+   *ierr=CTA_String_Set(*snamnoise,"ar1_model");
+
+   //   printf("End of modbuild_sp_ar1_create  \n");
+   printf("--------------------------  \n");
+}
+
+/*------------------------------------------------------------------------------------*/
+
+  /* CALL USR_COVAR(CTA_TreeVector *colsvar,int* nnoise, CTA_Handle husrdata,
+                    int* ierr) */
+void  modbuild_sp_ar1_covar(CTA_TreeVector *colsvar,int* nnoise, CTA_Handle husrdata,int* ierr){
+  int i,j,ier2;
+  double p0[3];
+  int nmodel;
+  CTA_Handle hhandle;
+  CTAI_Gridm *hgrid;
+  CTA_Vector hvec1, hvec2;
+  double *Lar1;
+  double *col1;
+
+ /* NB gridinfo moet uit de husrdata gehaald! */
+ /* en ook de parameters in p0! */
+ /* note: nnoise equals nmodel */
+
+  nmodel = *nnoise;
+  Lar1=CTA_Malloc(nmodel*nmodel*sizeof(double));
+  col1=CTA_Malloc(nmodel*sizeof(double));
+  hgrid=CTA_Malloc(sizeof(CTAI_Gridm));
+
+  CTA_Vector_Create(CTA_DEFAULT_VECTOR, 3, CTA_DOUBLE, CTA_NULL, &hvec1);
+  ier2 = CTA_Tree_GetHandleStr(husrdata,"/p0",&hvec1);
+  if (IDEBUG) printf ("-----------ar1_covar:tree: ier2=%d   \n",ier2);
+  ier2=CTA_Vector_GetVals(hvec1,p0,3,CTA_DOUBLE);
+  if (IDEBUG) printf("ar1_covar: getvals ier p0 %d %f \n",ier2, p0[1]);
+
+  ier2 = CTA_Tree_GetHandleStr(husrdata,"/hgrid",&hhandle);
+
+  ier2=CTA_Handle_GetData((CTA_Handle) hhandle,(void**) &hgrid);
+
+  if (IDEBUG) printf("ar1_covar: ier2 grid.nx %d %d \n",ier2,hgrid->nx);
+
+
+  ier2 = modbuild_sp_compute_covars(nmodel,Lar1,p0, *hgrid);
+
+  for (i=0;i<nmodel; i++){
+    for (j=0;j<nmodel;j++){
+      col1[j] = Lar1[i*nmodel + j];
+    }
+    CTA_Vector_Create(CTA_DEFAULT_VECTOR, nmodel, CTA_DOUBLE, CTA_NULL, &hvec2);
+    CTA_Vector_SetVals(hvec2,col1,nmodel,CTA_DOUBLE);
+    CTA_TreeVector_SetVec(colsvar[i],hvec2);
+    if (IDEBUG) printf("ar1_covar: state-setvec ier %d %d\n",i,ier2);
+  }
+
+  CTA_Vector_Free(&hvec1);
+  CTA_Vector_Free(&hvec2);
+
+}
+
+
+/*------------------------------------------------------------------------------------*/
+void modbuild_sp_ar1_compute(CTA_Time *timesspan,CTA_TreeVector *state,
+                      CTA_TreeVector *saxpyforc, BOOL *baddnoise,
+                      CTA_TreeVector *sparam, CTA_Handle husrdata,
+                             int* ierr) {
+  int i;
+  int nmodel;
+  double t1,t2;
+  CTA_Metainfo minfo;
+  CTAI_Gridm hgrid;
+  double *x_in;
+  double *Lar1;
+  double *x_out, *rndh;
+  double p0[3];
+  double kar_t, alfa;
+  double *AR_Noise;
+  double eend;
+  int    eeni;
+  char   dumch;
+
+  //  printf("modbuild_sp_ar1_compute: start  \n");
+
+    *ierr = CTA_Time_GetSpan(*timesspan,&t1,&t2);
+
+    //  printf("modbuild_sp_ar1_compute: getspan %f %f \n",t1,t2 );
+
+   *ierr=CTA_Metainfo_Create(&minfo);
+   *ierr = CTA_TreeVector_GetMetainfo(*state,minfo);
+   *ierr = CTA_Metainfo_GetGrid(minfo,&hgrid);
+   nmodel = hgrid.nsize;
+
+   x_in=CTA_Malloc(nmodel*sizeof(double));
+   x_out=CTA_Malloc(nmodel*sizeof(double));
+   rndh=CTA_Malloc(nmodel*sizeof(double));
+   AR_Noise=CTA_Malloc(nmodel*sizeof(double));
+   for (i=0;i<nmodel;i++) {AR_Noise[i] = 0.0;}
+   Lar1=CTA_Malloc(nmodel*nmodel*sizeof(double));
+
+//   double x_in[nmodel], x_out[nmodel];
+//   double Lar1[nmodel][nmodel];
+
+   *ierr = CTA_TreeVector_GetVals(*state,x_in,nmodel,CTA_DOUBLE);
+   //   printf("x-in: %f %f \n ",x_in[0],x_in[1]);
+
+  if (*ierr!=CTA_OK) {return;}
+  // Get 'sparam' and set the value of the local variable 'param'
+   *ierr = CTA_TreeVector_GetVals(*sparam,p0,3,CTA_DOUBLE);
+   //printf("modbuild_sp_ar1_compute: p0 %f %f\n ",p0[1],p0[2]);
+
+
+   modbuild_sp_compute_covars(nmodel,Lar1,p0, hgrid);
+
+
+   /* The ar1 model */
+
+   /* Propagate the values by multiplying with the time correspondence */
+   kar_t = p0[1];
+   alfa = exp(-(t2-t1) / kar_t);
+
+   /* Doe we need to add noise? */
+   if (*baddnoise){
+     //printf("adding noise\n");
+     /* Collect random numbers for white noise */
+     for (i=0; i < nmodel; i++){
+       CTA_rand_n(&rndh[i]);
+     }
+
+     /* Add the white noise to the model state */
+
+     eend  = 1.0;
+     eeni  = 1;
+     dumch = 'N';
+     DGEMV_F77(&dumch,&nmodel,&nmodel,&eend, Lar1,&nmodel, rndh,&eeni,&eend,AR_Noise,&eeni,1);
+
+
+     /* Scale the space-correlated, time-uncorrelated noise so that
+        adding this to the colored noise results in the correct
+        stationary covariance matrix */
+     for (i=0; i < nmodel; i++){
+       AR_Noise[i] = AR_Noise[i] * sqrt(1.0-alfa*alfa);
+//       printf("AR_Noise[%i]=%f\n",i,AR_Noise[i]);
+       x_out[i] = alfa * x_in[i]  + AR_Noise[i];
+     }
+   }
+   else {
+     for (i=0; i < nmodel; i++){
+       x_out[i] = alfa * x_in[i];
+     }
+   }
+   *ierr = CTA_TreeVector_SetVals(*state,x_out,nmodel,CTA_DOUBLE);
+
+   // free variabelen
+   free(x_in); free(x_out); free(Lar1); free(rndh); free(AR_Noise);
+
+}
+/*------------------------------------------------------------------------------------*/
+void modbuild_sp_ar1_getobsval(CTA_TreeVector *state, CTA_ObsDescr *hdescr, CTA_Vector *vval,
+                CTA_Handle *husrdata, int *ierr){
+
+int imeasr, nmeasr, indx;
+CTA_Vector vindex;
+double dval;
+
+   *ierr=CTA_ObsDescr_Observation_Count(*hdescr, &nmeasr);
+   if (*ierr!=CTA_OK) return;
+   if (nmeasr<0) return;
+
+   /* Create work variables */
+   *ierr=CTA_Vector_Create(CTA_DEFAULT_VECTOR, nmeasr, CTA_INTEGER,
+                          CTA_NULL, &vindex);
+   if (*ierr!=CTA_OK) return;
+
+   /* Use the INDX-property to find the position in the state */
+   *ierr=CTA_ObsDescr_Get_ValueProperties(*hdescr, "INDX", vindex, CTA_INTEGER);
+    if (*ierr!=CTA_OK) {
+      printf ("Cannot get property ""INDX"" from observation description\n");
+      printf ("Did you use the wrong observation database?\n");
+      printf ("Fatal error in oscill_obs\n");
+      exit(1);
+    }
+
+    for (imeasr=1;imeasr<=nmeasr;imeasr++){
+      CTA_Vector_GetVal(vindex,imeasr,&indx,CTA_INTEGER);
+      CTA_TreeVector_GetVal(*state,imeasr,&dval,CTA_DOUBLE);
+      CTA_Vector_SetVal(*vval,imeasr,&dval,CTA_DOUBLE);
+    }
+
+   CTA_Vector_Free(&vindex);
+   *ierr=CTA_OK;
+}
+/*------------------------------------------------------------------------------------*/
+
+
+int modbuild_sp_compute_covars(int nmodel, double *Qar1, double *p0, CTAI_Gridm hgrid){
+  int i,j,qi;
+  double std_dev, kar_l;
+  double x1,y1,x2,y2,distance2;
+  double rdum;
+  int info;
+  int nul;
+  char dumch;
+
+  std_dev = p0[0];
+  //kar_t = p0[1];
+  kar_l = p0[2];
+
+ 
+      if (hgrid.type ==2) {
+
+        for (i=1;i<=nmodel;i++){
+          x1 = ((i-1) % hgrid.nx)*hgrid.dx + hgrid.x_origin;
+          y1 = ((i-1)/hgrid.nx)*hgrid.dy + hgrid.y_origin;
+
+            for (j = 1;j <= nmodel; j++){
+              x2 = ((j-1) % hgrid.nx)*hgrid.dx + hgrid.x_origin;
+              y2 = ((j-1)/hgrid.nx)*hgrid.dy  + hgrid.y_origin;
+
+              distance2 = (x1-x2)*(x1-x2) + (y1-y2)*(y1-y2);
+              qi = (i-1)*nmodel + j - 1;
+              Qar1[qi] = std_dev*std_dev * exp(-distance2/(kar_l*kar_l));
+          
+            }
+
+        }
+        //printf("end of matrix \n");
+
+      }
+      else { printf("compute_covar: 3D grid and curve not yet implemented \n");
+        exit(-1);
+      }
+
+     
+  /*   Compute Cholesky decomposition of this matrix */
+          nul = 0;
+          dumch='L';
+          DPOSV_F77( &dumch, &nmodel, &nul, Qar1, &nmodel, &rdum, &nmodel, &info);
+       if (info !=0 ) {printf("compute_covar: error in dposv %d \n",info) ;}
+       // if (info ==0 ) {printf("compute_covar: dposv succeeded %d \n",info) ;}
+
+
+  return CTA_OK;
+
+}
+
+
+
+
+
+/*-------------------------------------------------------------------------------------*/
+
+void CTA_Modbuild_sp_CreateClass(CTA_ModelClass *modelcls){
+   int ierr; //COSTA return value
+   int aierr[CTA_MODEL_NUMFUNC];
+   int i;
+   CTA_Func hfunc[CTA_MODEL_NUMFUNC];
+   CTA_Func hfuncar1[4];
+   CTA_Intf intf;
+
+   // Create a COSTA model component from my own implementation
+   intf=CTA_NULL;
+   for (i=0; i<CTA_MODEL_NUMFUNC; i++){
+      hfunc[i]=CTA_NULL;
+      aierr[i]=CTA_OK;
+   }
+   aierr[ 0]=CTA_Func_Create("modbuild_sp_create_size",     &modbuild_sp_create_size,    intf, &hfunc[I_CTA_MODEL_CREATE_SIZE  ]);
+   aierr[ 1]=CTA_Func_Create("modbuild_sp_create_init",     &modbuild_sp_create_init,    intf, &hfunc[I_CTA_MODEL_CREATE_INIT  ]);
+   aierr[ 2]=CTA_Func_Create("modbuild_sp_free",            &modbuild_sp_free,           intf, &hfunc[I_CTA_MODEL_FREE         ]);
+   aierr[ 3]=CTA_Func_Create("modbuild_sp_compute",         &modbuild_sp_compute,        intf, &hfunc[I_CTA_MODEL_COMPUTE      ]);
+   aierr[ 4]=CTA_Func_Create("modbuild_sp_setstate",        &modbuild_sp_setstate,       intf, &hfunc[I_CTA_MODEL_SET_STATE      ]);
+   aierr[ 5]=CTA_Func_Create("modbuild_sp_getstate",        &modbuild_sp_getstate,       intf, &hfunc[I_CTA_MODEL_GET_STATE      ]);
+   aierr[ 6]=CTA_Func_Create("modbuild_sp_axpymodel",       &modbuild_sp_axpymodel,      intf, &hfunc[CTA_MODEL_AXPY_MODEL     ]);
+   aierr[ 7]=CTA_Func_Create("modbuild_sp_axpystate",       &modbuild_sp_axpystate,      intf, &hfunc[CTA_MODEL_AXPY_STATE     ]);
+   aierr[ 8]=CTA_Func_Create("modbuild_sp_setforc",         &modbuild_sp_setforc,        intf, &hfunc[CTA_MODEL_SET_FORC       ]);
+   aierr[ 9]=CTA_Func_Create("modbuild_sp_getforc",         &modbuild_sp_getforc,        intf, &hfunc[CTA_MODEL_GET_FORC       ]);
+   aierr[10]=CTA_Func_Create("modbuild_sp_axpyforc",        &modbuild_sp_axpyforc,       intf, &hfunc[CTA_MODEL_AXPY_FORC      ]);
+   aierr[11]=CTA_Func_Create("modbuild_sp_setparam",        &modbuild_sp_setparam,       intf, &hfunc[CTA_MODEL_SET_PARAM      ]);
+   aierr[12]=CTA_Func_Create("modbuild_sp_getparam",        &modbuild_sp_getparam,       intf, &hfunc[CTA_MODEL_GET_PARAM      ]);
+   aierr[13]=CTA_Func_Create("modbuild_sp_axpyparam",       &modbuild_sp_axpyparam,      intf, &hfunc[CTA_MODEL_AXPY_PARAM     ]);
+   aierr[14]=CTA_Func_Create("modbuild_sp_getnoisecount",   &modbuild_sp_getnoisecount,  intf, &hfunc[CTA_MODEL_GET_NOISE_COUNT]);
+   aierr[15]=CTA_Func_Create("modbuild_sp_getnoisecovar",   &modbuild_sp_getnoisecovar,  intf, &hfunc[CTA_MODEL_GET_NOISE_COVAR]);
+   aierr[16]=CTA_Func_Create("modbuild_sp_getobsvalues",    &modbuild_sp_getobsvalues,   intf, &hfunc[CTA_MODEL_GET_OBSVALUES  ]);
+   aierr[17]=CTA_Func_Create("modbuild_sp_getobsselect",    &modbuild_sp_getobsselect,   intf, &hfunc[CTA_MODEL_GET_OBSSELECT  ]);
+   aierr[18]=CTA_Func_Create("modbuild_sp_addnoise",        &modbuild_sp_addnoise,       intf, &hfunc[CTA_MODEL_ADD_NOISE      ]);
+   aierr[19]=CTA_Func_Create("modbuild_sp_export",          &modbuild_sp_export,         intf, &hfunc[I_CTA_MODEL_EXPORT         ]);
+   aierr[20]=CTA_Func_Create("modbuild_sp_import",          &modbuild_sp_import,         intf, &hfunc[I_CTA_MODEL_IMPORT         ]);
+   aierr[21]=CTA_Func_Create("modbuild_sp_adjcompute",      &modbuild_sp_adjcompute,     intf, &hfunc[CTA_MODEL_ADJ_COMPUTE    ]);
+   aierr[22]=CTA_Func_Create("modbuild_sp_adjprepare",      &modbuild_sp_adjprepare,     intf, &hfunc[CTA_MODEL_ADJ_PREPARE    ]);
+   aierr[23]=CTA_Func_Create("modbuild_sp_adjsetforc",      &modbuild_sp_adjsetforc,     intf, &hfunc[CTA_MODEL_ADJ_SET_FORC   ]);
+
+   aierr[23]=CTA_Func_Create("modbuild_sp_gettimehorizon",  &modbuild_sp_gettimehorizon, intf, &hfunc[CTA_MODEL_GET_TIMEHORIZON   ]);
+   aierr[24]=CTA_Func_Create("modbuild_sp_getcurrenttime",  &modbuild_sp_getcurrenttime, intf, &hfunc[CTA_MODEL_GET_CURRENTTIME   ]);
+
+
+
+   ierr=CTA_Func_Create("modbuild_sp_ar1_create",&modbuild_sp_ar1_create,intf, &hfuncar1[0]);
+   ierr=CTA_Func_Create("modbuild_sp_ar1_covar",&modbuild_sp_ar1_covar,intf, &hfuncar1[1]);
+   ierr=CTA_Func_Create("modbuild_sp_ar1_compute",&modbuild_sp_ar1_compute,intf, &hfuncar1[2]);
+   ierr=CTA_Func_Create("modbuild_sp_ar1_getobsval",&modbuild_sp_ar1_getobsval,intf, &hfuncar1[3]);
+
+
+   // If something went wrong we are really fatal!
+   for (i=0; i<CTA_MODEL_NUMFUNC ;i++){
+      if (aierr[i]!=CTA_OK) {
+      printf("Internal error in CTA_Modbuild_sp_createclass\n");
+      printf("Cannot create function handles for own implementation\n");
+      exit(1);
+      }
+   }
+   /* Create new model class */
+   ierr=CTA_Model_DefineClass("CTA_MODBUILD_SP", hfunc, modelcls);
+   if (ierr!=CTA_NULL) {
+      printf("Internal error in CTA_Modbuild_sp_createclass\n");
+      printf("Cannot create new class\n");
+      exit(1);
+   }
+}
+
+#define CTA_MODELBUILD_SP_CREATECLASS_F77  F77_CALL(cta_modbuild_sp_createclass,CTA_MODBUILD_SP_CREATECLASS_F)
+void CTA_MODELBUILD_SP_CREATECLASS_F77(CTA_ModelClass *modelcls){
+   CTA_Modbuild_sp_CreateClass(modelcls);
+}
+
diff --git a/costa/native/cta/src/cta_model.c b/costa/native/cta/src/cta_model.c
new file mode 100644
index 000000000..82ad4890d
--- /dev/null
+++ b/costa/native/cta/src/cta_model.c
@@ -0,0 +1,2120 @@
+/*
+$URL: https://repos.deltares.nl/repos/openda/public/trunk/core/native/src/cta/cta_model.c $
+$Revision: 4055 $, $Date: 2013-07-03 10:16:57 +0200 (Wed, 03 Jul 2013) $
+
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+#include "cta.h"
+
+#include "cta_mem.h"
+#include "cta_datatypes.h"
+#include "ctai_xml.h"
+#include "cta_handles.h"
+#include "ctai_handles.h"
+#include "f_cta_utils.h"
+#include "cta_model.h"
+#include "cta_errors.h"
+#include "cta_f77lapack.h"
+#include "cta_reltable.h"
+#include "cta_par.h"
+#include "cta_message.h"
+#include "cta_model_factory.h"
+
+#define MAX(a,b) (a>b ? a: b)
+#define IDEBUG (1)
+
+
+#define CTA_MODEL_DEFINECLASS_F77      F77_CALL(cta_model_defineclass,CTA_MODEL_DEFINECLASS)
+#define CTA_MODEL_CREATE_F77           F77_CALL(cta_model_create,CTA_MODEL_CREATE)
+#define CTA_MODEL_COMPUTE_F77          F77_CALL(cta_model_compute,CTA_MODEL_COMPUTE)
+#define CTA_MODEL_SETSTATE_F77         F77_CALL(cta_model_setstate,CTA_MODEL_SETSTATE)
+#define CTA_MODEL_GETSTATE_F77         F77_CALL(cta_model_getstate,CTA_MODEL_GETSTATE)
+#define CTA_MODEL_AXPYSTATE_F77        F77_CALL(cta_model_axpystate,CTA_MODEL_AXPYSTATE)
+#define CTA_MODEL_GETSTATESCALING_F77  F77_CALL(cta_model_getstatescaling,CTA_MODEL_GETSTATESCALING)
+#define CTA_MODEL_SETFORC_F77          F77_CALL(cta_model_setforc,CTA_MODEL_SETFORC)
+#define CTA_MODEL_GETFORC_F77          F77_CALL(cta_model_getforc,CTA_MODEL_GETFORC)
+#define CTA_MODEL_AXPYFORC_F77         F77_CALL(cta_model_axpyforc,CTA_MODEL_AXPYFORC)
+#define CTA_MODEL_SETPARAM_F77         F77_CALL(cta_model_setparam,CTA_MODEL_SETPARAM)
+#define CTA_MODEL_GETPARAM_F77         F77_CALL(cta_model_getparam,CTA_MODEL_GETPARAM)
+#define CTA_MODEL_AXPYPARAM_F77        F77_CALL(cta_model_axpyparam,CTA_MODEL_AXPYPARAM)
+#define CTA_MODEL_GETTIMEHORIZON_F77   F77_CALL(cta_model_gettimehorizon,CTA_MODEL_GETTIMEHORIZON)
+#define CTA_MODEL_GETCURRENTTIME_F77   F77_CALL(cta_model_getcurrenttime,CTA_MODEL_GETCURRENTTIME)
+#define CTA_MODEL_GETNOISECOVAR_F77    F77_CALL(cta_model_getnoisecovar,CTA_MODEL_GETNOISECOVAR)
+#define CTA_MODEL_GETNOISECOUNT_F77    F77_CALL(cta_model_getnoisecount,CTA_MODEL_GETNOISECOUNT)
+#define CTA_MODEL_FREE_F77             F77_CALL(cta_model_free,CTA_MODEL_FREE)
+#define CTA_MODEL_ANNOUNCEOBSVALUES_F77 F77_CALL(cta_model_announceobsvalues,CTA_MODEL_ANNOUNCEOBSVALUES)
+#define CTA_MODEL_GETOBSVALUES_F77     F77_CALL(cta_model_getobsvalues,CTA_MODEL_GETOBSVALUES)
+#define CTA_MODEL_GETOBSSELECT_F77     F77_CALL(cta_model_getobsselect,CTA_MODEL_GETOBSSELECT)
+#define CTA_MODEL_ADDNOISE_F77         F77_CALL(cta_model_addnoise,CTA_MODEL_ADDNOISE)
+#define CTA_MODEL_IMPORT_F77           F77_CALL(cta_model_import,CTA_MODEL_IMPORT)
+#define CTA_MODEL_EXPORT_F77           F77_CALL(cta_model_export,CTA_MODEL_EXPORT)
+#define CTA_MODEL_GETOBSLOCALIZATION_F77   F77_CALL(cta_model_getobslocalization,CTA_MODEL_GETOBSLOCALIZATION)
+#define CTA_MODEL_ADJSETFORC_F77       F77_CALL(cta_model_adjsetforc,CTA_MODEL_ADJSETFORC)
+#define CTA_MODEL_ADJCOMPUTE_F77       F77_CALL(cta_model_adjcompute,CTA_MODEL_ADJCOMPUTE)
+#define CTA_MODEL_ADJPREPARE_F77       F77_CALL(cta_model_adjprepare,CTA_MODEL_ADJPREPARE)
+#define CTA_MODEL_LOADPERSISTENTSTATE_F77     F77_CALL(cta_model_loadpersistentstate,CTA_MODEL_LOADPERSISTENTSTATE)
+#define CTA_MODEL_SAVEINTERNALSTATE_F77       F77_CALL(cta_model_saveinternalstate,CTA_MODEL_SAVEINTERNALSTATE)
+#define CTA_MODEL_RESTOREINTERNALSTATE_F77    F77_CALL(cta_model_restoreinternalstate,CTA_MODEL_RESTOREINTERNALSTATE)
+#define CTA_MODEL_RELEASEINTERNALSTATE_F77    F77_CALL(cta_model_releaseinternalstate,CTA_MODEL_RELEASEINTERNALSTATE)
+#define CTA_MODEL_SAVEPERSISTENTSTATE_F77     F77_CALL(cta_model_savepersistentstate,CTA_MODEL_SAVEPERSISTENTSTATE)
+
+#define CLASSNAME "CTA_Model"
+
+
+/* Struct holding all data associated to an COSTA Model */
+typedef struct{
+   CTA_ObsDescr ObsDescr;
+   double *times;
+   int ntimes;
+   int isset;
+   CTA_Vector values;
+} CTAI_AnnoinceObs;
+
+typedef struct {
+  int flag;
+  double t_step;
+} CTAI_Barrier;
+
+typedef struct {
+CTA_Func functions[CTA_MODEL_NUMFUNC];
+CTA_ModelClass hmodcl;
+void *data;      /*implementation specific data */
+CTAI_AnnoinceObs announced;
+CTAI_Barrier barrier;
+} CTAI_Model;
+
+typedef struct {
+CTA_Func functions[CTA_MODEL_NUMFUNC];
+} CTAI_ModelClass;
+
+
+void CTAI_Clear_Announced(CTAI_AnnoinceObs *announced, BOOL init){
+
+   /* Free existing data */
+   if (!init){
+      if (announced->times) {
+         free(announced->times);
+         announced->times=NULL;
+      }
+      if (announced->values!=CTA_NULL){
+         CTA_Vector_Free(&(announced->values));
+      }
+      if (announced->ObsDescr !=CTA_NULL){
+        CTA_ObsDescr_Free(&(announced->ObsDescr));
+      }
+   }
+
+   announced->ObsDescr=CTA_NULL;
+   announced->values=CTA_NULL;
+   announced->times=NULL;
+   announced->ntimes=0;
+   announced->isset=FALSE;
+}
+
+
+
+/** \brief Check model handle and return data of a model instance
+ *
+ * \param hmodel     I Model instance handle
+ * \param findex     I Index/ID of user function
+ * \param data       O Data block of model
+ * \param function   O function pointer to user function
+ * \return error status: CTA_OK if successful
+ */
+int CTAI_Model_GetDataAndFunc(CTA_Model hmodel, int findex, CTAI_Model **data, CTA_Function **function){
+   int retval; /* Return status of COSTA method */
+
+   /* Check type of handle */
+   retval=CTA_Handle_Check((CTA_Handle) hmodel,CTA_MODEL);
+   if (retval!=CTA_OK) return retval;
+
+   /* Get Modeldata object */
+   retval=CTA_Handle_GetData((CTA_Handle) hmodel, (void**) data);
+   if (retval!=CTA_OK) return retval;
+
+   /* Check whether user function is implemented */
+   if ((*data)->functions[findex]==CTA_NULL) return CTA_NOT_IMPLEMENTED;
+
+   /* Get function */
+   retval=CTA_Func_GetFunc((*data)->functions[findex],function);
+   if (retval!=CTA_OK) return retval;
+
+   return CTA_OK;
+}
+
+#undef METHOD
+#define METHOD "GetSet"
+/* function used by the different get and set functions
+   (all user implementations have the same interface) */
+int CTAI_Model_GetSet(
+   CTA_Model hmodel,     /* handle of model                 */
+   CTA_Time  tspan,
+   CTA_Handle *hthing,   /* set or returned item            */
+   int Func_ID,          /* ID (parameter) of user function */
+   int idomain
+){
+
+   /* Local variables */
+   int retval;               /* Return value of COSTA call   */
+   CTA_Function *function;   /* Function that must be called */
+   CTAI_Model *data;         /* Data associated to model     */
+   double one;               /* value 1.0 in double          */
+   CTA_ModelClass hmodcl;    /* handle to ModelClass         */
+   int isBlocked;            /* flag indicating tha model is currently blocked */
+
+   /* Check type of handle */
+   retval=CTA_Handle_Check((CTA_Handle) hmodel,CTA_MODEL);
+   if (retval!=CTA_OK) return retval;
+
+   /* Get Modeldata object */
+   retval=CTA_Handle_GetData((CTA_Handle) hmodel, (void*) &data);
+   if (retval!=CTA_OK) return retval;
+
+   /* Check if model is currently blocked */
+   hmodcl = data->hmodcl;
+   isBlocked = CTAI_ModelFac_GetBlock(hmodcl, hmodel);
+   if (isBlocked==CTA_TRUE) {
+      if (Func_ID==I_CTA_MODEL_GET_STATE || Func_ID==CTA_MODEL_GET_STATEDOMAIN ){
+         CTA_WRITE_WARNING("CTA_MODEL_GETSTATE: model is currently blocked.\n");
+      }
+      else {
+         CTA_WRITE_ERROR("This operation is not permitted while model is blocked\n");
+         return -1;
+      }
+   }
+
+   /* Clear the announced administration in case of I_CTA_MODEL_SET_STATE */
+   if (Func_ID==I_CTA_MODEL_SET_STATE){
+      CTAI_Clear_Announced(&(data->announced), FALSE);
+   }
+
+   /* Check existence of Function pointer :*/
+
+   /* -special treatment for CTA_MODEL_GET_STATESCALING      */
+   /* if function does not exist return state containing 1.0 */
+   if (Func_ID==CTA_MODEL_GET_STATESCALING){
+      if (data->functions[Func_ID]==CTA_NULL){
+         /* create a state-vector when necessary by calling getstate */
+         if (*hthing==CTA_NULL) {
+            if (Func_ID==CTA_MODEL_GET_STATESCALING){
+                retval=CTAI_Model_GetSet(hmodel, CTA_NULL, hthing,
+                                    I_CTA_MODEL_GET_STATE, idomain);
+            }
+            if (retval!=CTA_OK) return retval;
+         }
+         /* set identity */
+         one=1.0;
+         return CTA_TreeVector_SetConstant((CTA_TreeVector) *hthing,&one,CTA_DOUBLE);
+      }
+   }
+
+   if (data->functions[Func_ID]==CTA_NULL) return CTA_NOT_IMPLEMENTED;
+
+   retval=CTA_Func_GetFunc(data->functions[Func_ID],&function);
+   if (retval!=CTA_OK) return retval;
+
+   /* Call function */
+   if (Func_ID== CTA_MODEL_SET_FORC || Func_ID== CTA_MODEL_GET_FORC ){
+     function(data->data, &tspan, hthing,&retval);
+   }
+   else if (Func_ID == CTA_MODEL_GET_STATEDOMAIN){
+     function(data->data, &idomain, hthing, &retval);
+   } else {
+     function(data->data, hthing,&retval);
+   }
+   return retval;
+}
+
+#undef METHOD
+#define METHOD "SetBarrier"
+/* Set Barrier information for this model */
+int CTAI_Model_SetBarrier(CTA_ModelClass hmodcl, CTAI_Barrier *barrier ) {
+
+   int retval;               /* Return value of COSTA call              */
+   char *flag_barrier;       /* flag                                    */
+   double t_step;            /* maximum interval that model may run     */
+
+   if (IDEBUG>0) {printf("CTA_Model_SetBarrier: Start of function \n");}
+
+   /* get barrier information from the model class */
+   retval = CTAI_ModelFac_GetBarrierData(hmodcl, &flag_barrier, &t_step);
+   if (retval!=CTA_OK) {
+      CTA_WRITE_ERROR("Error: cannot get barrier information.");
+      return retval;
+   }
+
+   barrier->flag   = CTA_FALSE;
+   barrier->t_step = 0.0;
+
+   if (flag_barrier != NULL) {
+      if ( (0 == strcmp("true", flag_barrier)) && t_step ) {
+      /* Set flag_barrier */
+      barrier->flag = CTA_TRUE;
+      /* Set t_step */
+      barrier->t_step = t_step;
+      }
+   }
+   if (IDEBUG>0) {printf("CTA_Model_SetBarrier: End of function \n");}
+   return CTA_OK;
+}
+
+int CTAI_Model_Axpy(
+   CTA_Model  hmodel,  /* y element in axpy (handle of model)            */
+   CTA_Time  tspan,
+   double     alpha,   /* scalar multyplication factor for x             */
+   CTA_TreeVector  h_x,     /* x element in axpy state handle of model handle */
+   int axpy_id,        /* ID (parameter) of axpy user function           */
+   int get_id,         /* ID (parameter) of get user function            */
+   int set_id          /* ID (parameter) of set user function            */
+   ){
+
+   /* Local variables */
+   int retval;               /* Return value of COSTA call           */
+   CTA_Function *function;   /* Function that must be called         */
+   CTAI_Model *data;         /* Data associated to model hmodel      */
+   CTA_TreeVector y;              /* internal y in model state            */
+
+
+   /* Check type of model handle */
+   retval=CTA_Handle_Check((CTA_Handle) hmodel,CTA_MODEL);
+   if (retval!=CTA_OK) return retval;
+
+   /* Get data object from model */
+   retval=CTA_Handle_GetData((CTA_Handle) hmodel, (void*) &data);
+   if (retval!=CTA_OK) return retval;
+
+   /* Check whether user implementation is available */
+   if (axpy_id==CTA_NULL){
+      /* Try default implementation :*/
+      if (get_id==CTA_NULL || set_id==CTA_NULL) return CTA_NOT_IMPLEMENTED;
+
+      /* get y-vector from model */
+      y=CTA_NULL;
+      retval=CTAI_Model_GetSet(hmodel,tspan, &y,get_id, -1);
+      if (retval!=CTA_OK) return retval;
+
+      /* do a state axpy */
+      retval=CTA_TreeVector_Axpy(y, alpha,h_x);
+      if (retval!=CTA_OK) return retval;
+
+      /* set new y-vector in model */
+      retval=CTAI_Model_GetSet(hmodel,tspan, &y,set_id, -1);
+      if (retval!=CTA_OK) return retval;
+
+      /* free work variable */
+      retval=CTA_TreeVector_Free(&y,CTA_TRUE);
+      if (retval!=CTA_OK) return retval;
+
+   }
+   else{
+      /* Use user implementation */
+      retval=CTA_Func_GetFunc(data->functions[axpy_id],&function);
+      if (retval!=CTA_OK) return retval;
+      if (axpy_id==CTA_MODEL_AXPY_FORC){
+        function(data->data, &tspan, &alpha, &h_x, &retval);
+      }
+      else {
+        function(data->data, &alpha, &h_x, &retval);
+      }
+      if (retval!=CTA_OK) return retval;
+   }
+   return CTA_OK;
+}
+
+
+//int CTA_Model_DefineClass(
+//   const char *name,
+//   const CTA_Func h_func[CTA_MODEL_NUMFUNC],
+//   CTA_ModelClass     *hmodcl
+//   ){
+//
+//   CTAI_ModelClass *data;
+//   int retval;
+//   int i;
+//
+//   /* Allocate new Vector object */
+//   data=CTA_Malloc(sizeof(CTAI_ModelClass));
+//
+//   for (i=0;i<CTA_MODEL_NUMFUNC;i++){
+//      data->functions[i]=h_func[i];
+//   }
+//
+//   // Allocate new handle and return eror when unsuccesfull
+//   retval=CTA_Handle_Create(name,CTA_MODELCLASS,data,hmodcl);
+//   return retval;
+//}
+
+#undef METHOD
+#define METHOD "Create" 
+int CTA_Model_Create(CTA_ModelClass hmodcl, CTA_Handle userdata, CTA_Model *hmodel){
+
+   CTAI_Model *model;
+   int memsize;
+   int retval;
+   CTAI_ModelClass *clsdata;
+   CTA_Function *function;
+   CTA_Tree tConfig;
+
+   if (IDEBUG>0) {
+      printf("Start of CTA_Model_Create hmodcl=%d userdata=%d\n",hmodcl,userdata);
+   }
+
+   /* Check in what context we are */
+   if (!(hmodcl==CTA_MODBUILD_PAR) &&
+       CTA_IS_PARALLEL==CTA_TRUE &&
+       CTA_MY_PROC_TYPE==CTA_ParMaster) {
+
+      /* We are not creating an instance of the parallel model builder */
+      /* We are running parallel and this is the master process        */
+      /* We create the model on one of the worker processes            */
+      CTA_Tree_Create(&tConfig);
+      CTA_Tree_AddHandle(tConfig, "model", userdata);
+      CTA_Tree_AddHandle(tConfig, "modelclass", hmodcl);
+      /* Create model (using parallel modelbuild model */
+      //printf("CREATE A MODEL USING PARALLE MODEL BUILDER\n");
+      retval=CTA_Model_Create(CTA_MODBUILD_PAR, tConfig, hmodel);
+      if (retval!=CTA_OK){
+		  char message[1024];
+		  sprintf(message,"#%d Error in model create %d\n",CTA_PAR_MY_RANK, retval);
+	  }
+
+      // DON't free since we will lose the input then!!!
+      // Have to fix this in Tree-object //
+      // retval=CTA_Tree_Free(&tConfig);
+
+      //printf("WE ZIJN NA DE PARALLEL MODEL CREATE!!!\n");
+   } else {
+      int i;
+      //printf("CREATING A MODEL LOCALLY\n");
+
+      /* We are running sequential or we are a worker process */
+      /* We need to create the model instance locally         */
+
+      //printf("WE ZIJN BIJ DE SEQUENTIAL MODEL CREATE!!!\n");
+      //exit(-1);
+
+
+      if (IDEBUG>0) printf("#%d Start of cta_model_create\n", CTA_PAR_MY_RANK);
+      //if (IDEBUG>0) printf("#%d hmodcls=%d type=%d",CTA_PAR_MY_RANK, hmodcl,CTAI_Handle_GetDatatype(hmodcl));
+      if (IDEBUG>0) printf("#%d hmodcls=%d type=%d \n",CTA_PAR_MY_RANK, hmodcl,999);
+
+      retval=CTA_Handle_Check((CTA_Handle) hmodcl,CTA_MODELCLASS);
+      if (retval!=CTA_OK) {
+          CTA_WRITE_ERROR("Handle is not a cta_modelclass handle");
+          return retval;
+      }
+
+      /* Get class data containing all function pointers */
+      retval=CTA_Handle_GetData((CTA_Handle) hmodcl,(void*) &clsdata);
+      if (retval!=CTA_OK) {
+         CTA_WRITE_ERROR("Cannot retrieve handle data");
+         return retval;
+      }
+
+      if (IDEBUG>0) printf("#%d cta_model_create: Initialise model (size)\n", CTA_PAR_MY_RANK);
+      /* determine size of data object (CTA_VECTOR_CREATE_SIZE)*/
+      if (clsdata->functions[I_CTA_MODEL_CREATE_SIZE]==CTA_NULL)
+      {
+        printf("#%d ERROR in CTA_Model_Create\n", CTA_PAR_MY_RANK);
+        printf("#%d There is no handle for the function 'Create_size'\n", CTA_PAR_MY_RANK);
+        return CTA_NOT_IMPLEMENTED;
+      }
+      retval=CTA_Func_GetFunc(clsdata->functions[I_CTA_MODEL_CREATE_SIZE],&function);
+      if (retval!=CTA_OK) {
+         char message[1024];
+         sprintf(message,"#%d ERROR in CTA_Model_Create;\nCannot get handle for function 'Create_size'\n", CTA_PAR_MY_RANK);
+         CTA_WRITE_ERROR(message);
+         return retval;
+      }
+      (void) function(&userdata,&memsize,&retval);
+      if (retval) {
+         CTA_WRITE_ERROR("Error in function CTA_MODEL_CREATE_SIZE");
+         return retval;
+      }
+
+      /* allocate memory for new model object */
+      model=CTA_Malloc(sizeof(CTAI_Model));
+      model->data=CTA_Malloc(memsize);
+
+      /* copy function pointers */
+      for (i=0;i<CTA_MODEL_NUMFUNC;i++){
+         model->functions[i]=clsdata->functions[i];
+      }
+      /* set other general information */
+      model->hmodcl=hmodcl;
+
+      // set barrier information for current model
+      retval = CTAI_Model_SetBarrier( hmodcl, &model->barrier);
+      if (retval) {
+         CTA_WRITE_ERROR("Error in CTA_Model_SetBarrierData");
+         return retval;
+      }
+
+      /* Initialise announced observation admin */
+      CTAI_Clear_Announced(&(model->announced), TRUE);
+
+      /* Copy user's function handles */
+      if (clsdata->functions[I_CTA_MODEL_CREATE_INIT]==CTA_NULL) return CTA_NOT_IMPLEMENTED;
+
+      /* Allocate new handle and return error when unsuccesfull */
+      retval=CTA_Handle_Create("model",CTA_MODEL,model,hmodel);
+      if (retval) {
+         CTA_WRITE_ERROR("Handle CTA_MODEL cannot be created");
+         return retval;
+      }
+
+      if (IDEBUG>0) printf("#%d cta_model_create: Initialise model (init)\n", CTA_PAR_MY_RANK);
+      /* Initialise a new model */
+      retval=CTA_Func_GetFunc(clsdata->functions[I_CTA_MODEL_CREATE_INIT],&function);
+
+      if (retval!=CTA_OK) {
+         CTA_WRITE_ERROR("Cannot get function CTA_MODEL_CREATE_INIT");
+         return retval;
+      }
+
+      (void) function(hmodel, model->data, &userdata, &retval);
+      if (IDEBUG>0) printf("#%d cta_model_create: Initialise model (done) retval=%d\n", CTA_PAR_MY_RANK,retval);
+      if (retval) {
+         CTA_WRITE_ERROR("Error in function CTA_MODEL_CREATE_INIT");
+         return retval;
+      }
+   }
+   /* The model has been created. Administrate this model in the modelclass object */
+   CTAI_ModelFac_AddModelInstance(hmodcl, *hmodel);
+
+   if (IDEBUG>0) printf("#%d cta_model_create: end of function\n", CTA_PAR_MY_RANK);
+   return CTA_OK;
+}
+
+
+#undef METHOD
+#define METHOD "AddNoise" 
+int CTA_Model_AddNoise(
+   CTA_Model hmodel, /* handle of model */
+   CTA_Time htime    /* timespan of simulation */
+){
+
+   /* Local variables */
+   int retval;               /* Return value of COSTA call   */
+   CTA_Function *function;   /* Function that must be called */
+   CTAI_Model *data;         /* Data assosiated to model     */
+
+   /* Check type of handle */
+   retval=CTA_Handle_Check((CTA_Handle) hmodel,CTA_MODEL);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_model handle");
+       return retval;
+   } 
+
+   /* Get Modeldata object */
+   retval=CTA_Handle_GetData((CTA_Handle) hmodel, (void*) &data);
+   if (retval!=CTA_OK)  {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+
+   /* Get Function pointer */
+   if (data->functions[CTA_MODEL_ADD_NOISE]==CTA_NULL) return CTA_NOT_IMPLEMENTED;
+   retval=CTA_Func_GetFunc(data->functions[CTA_MODEL_ADD_NOISE],&function);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot get function CTA_MODEL_ADD_NOISE");
+	   return retval;
+   }
+
+   /* Call function */
+   function(data->data,&htime,&retval);
+   return retval;
+}
+
+#undef METHOD
+#define METHOD "PerformTimesteps"
+/* handle barrier information and advance the model in time */
+int CTAI_Model_PerformTimesteps(
+   CTA_Model hmodel,         /* handle of model */
+   CTA_Function *function,   /* Function that must be called */
+   CTA_Time htime,           /* timespan to compute */
+   int mindBarrier           /* Flag to activate barrier check fixed timestepping */
+){
+
+   /* Local variables */
+   int retval;               /* Return value of COSTA call   */
+   CTAI_Model *data;         /* Data associated to model     */
+   double tstart, tstop;     /* Begin and end time of htime  */
+   double tstep;             /* Maximum number of steps that model can proceed */
+   int isspan;               /* flag indicating whether input is timespan */
+   int isBlocked;            /* flag indicating that maximum number of model steps is exceeded */
+
+   if (IDEBUG>0) {
+     printf("CTA_MODEL DEBUG: Calling performTimeStep function \n");
+   }
+
+   /* Check type of handle */
+   retval=CTA_Handle_Check((CTA_Handle) hmodel,CTA_MODEL);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_model handle");
+       return retval;
+   }
+
+   /* Get Modeldata object */
+   retval=CTA_Handle_GetData((CTA_Handle) hmodel, (void*) &data);
+   if (retval!=CTA_OK) {
+      CTA_WRITE_ERROR("Cannot retrieve handle data");
+      return retval;
+   }
+
+   isBlocked = CTA_FALSE;
+   if (mindBarrier == CTA_TRUE){
+     /* Time management */
+     retval=CTA_Time_GetSpan(htime,&tstart,&tstop);
+     retval=CTA_Time_IsSpan(htime, &isspan);
+     /* Is there a possible barrier? */
+     if (data->barrier.flag==CTA_TRUE) {
+       int nstep;
+       tstep = data->barrier.t_step;
+       nstep = (int) ((tstop-tstart)/tstep+0.5);
+       if (IDEBUG>0) {
+          printf("CTA_performTimesteps: Time information: \n");
+          printf("Tstart= %f, Tstep=%f, Tstop=%f, nStep=%d\n ", tstart, tstep, tstop, nstep);
+       }
+       if (nstep>1) {
+          if (IDEBUG>0) printf("CTA_performTimesteps: set block; I have to wait\n");
+          isBlocked = CTA_TRUE;
+       }
+       else {
+          if (IDEBUG>0) printf("CTA_performTimesteps: no barrier in this time interval\n");
+       }
+     }
+   }
+
+   if (isBlocked == CTA_TRUE) {
+     if (IDEBUG>0) printf("CTA_performTimesteps: TimeStepAllModels\n");
+     /* set block for this model in modelclass administration */
+     retval=CTAI_ModelFac_SetBlock(data->hmodcl,hmodel);
+     /* advance all childmodels */
+     retval=CTAI_ModelFac_TimeStepAllModels(data->hmodcl,function, tstart, tstop);
+     if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("problem in TimeStepAllModels \n");
+       return retval;
+     }
+   } else {
+     if (IDEBUG>0) printf("CTA_performTimesteps: compute for model %d \n", hmodel);
+     function(data->data,&htime,&retval);
+     if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("problem in compute");
+       return retval;
+     }
+   }
+   return CTA_OK;
+}
+
+
+
+
+#undef METHOD
+#define METHOD "Compute"
+/* handle announced observations if necessary and advance the model in
+   time by calling method PerformTimeSteps */
+int CTA_Model_Compute(
+   CTA_Model hmodel, /* handle of model */
+   CTA_Time htime    /* timespan of simulation */
+){
+
+   /* Local variables */
+   int retval;               /* Return value of COSTA call   */
+   CTA_Function *function;   /* Function that must be called */
+   CTAI_Model *data;         /* Data associated to model     */
+   double tstart, tstop;     /* Begin and end time of htime  */
+   double t1, t2;            /* begin and end time of sub-step */
+   CTA_Time hstep;           /* Singe step in compute        */
+   int ntimes;               /* number of times where observations are
+                              * available */
+   int ntimes_loop;          /* number time intervals for compute */
+   CTA_ObsDescr obsdescr_sub;/* Observation description of subset of
+                                observations */
+   int nobs;                 /* number of observations in obsdescr_sub */
+   CTA_RelTable reltab;      /* Relation table for selection of
+                                observations within a give timespan */
+   CTA_Vector vals;          /* predicted values in a sub-step */
+   double eps;               /* small value for comparing doubles */
+   CTA_Time currentTime;     /* time instance of the model */
+   int isspan;               /* flag indicating whether input is timespan */
+
+   /* Time management */
+   retval=CTA_Time_GetSpan(htime,&tstart,&tstop);
+   retval=CTA_Time_IsSpan(htime, &isspan);
+   if (isspan!=CTA_TRUE) {
+      // Get the current time of the model
+      CTA_Time_Create(&currentTime);
+      CTA_Model_GetCurrentTime(hmodel,currentTime);
+      retval=CTA_Time_GetSpan(currentTime,&t1,&t2);
+      tstart=t1;
+      CTA_Time_SetSpan(htime,tstart,tstop);
+      CTA_Time_Free(&currentTime);
+   }
+
+   if (IDEBUG) printf("entering cta_model_compute: tstart,tstop: %d %f %f \n",htime,tstart,tstop);
+
+   if (IDEBUG) printf("CTA_MODEL DEBUG: Calling compute function \n");
+
+   /* Check type of handle */
+   retval=CTA_Handle_Check((CTA_Handle) hmodel,CTA_MODEL);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_model handle");
+       return retval;
+   }
+
+   /* Get Modeldata object */
+   retval=CTA_Handle_GetData((CTA_Handle) hmodel, (void*) &data);
+   if (retval!=CTA_OK) {
+      CTA_WRITE_ERROR("Cannot retrieve handle data");
+      return retval;
+   }
+
+   /* Get Function pointer */
+   if (data->functions[I_CTA_MODEL_COMPUTE]==CTA_NULL) return CTA_NOT_IMPLEMENTED;
+   retval=CTA_Func_GetFunc(data->functions[I_CTA_MODEL_COMPUTE],&function);
+   if (retval!=CTA_OK)  {
+	   CTA_WRITE_ERROR("Cannot get function I_CTA_MODEL_COMPUTE");
+	   return retval;
+   }
+
+   /* Do we need to handle announced observations? */
+   ntimes=data->announced.ntimes;
+   if (IDEBUG) printf("We have %d times with announced observations\n",ntimes);
+   if (ntimes>0){
+      int itime;
+      /* Do we need to interrupt the computations? */
+
+      if (IDEBUG>0) {
+         for (itime=0;itime<ntimes;itime++){
+            printf("%f ",data->announced.times[itime]);
+         }
+         printf("First announced time %f\n",data->announced.times[0]);
+         printf("Last announced time  %f\n",data->announced.times[ntimes-1]);
+         printf("compute interval [%f,%f]\n",tstart,tstop);
+      }
+      /* check whether announce is within this compute interval */
+      if (data->announced.times[0]<=tstart ||
+         tstop <data->announced.times[ntimes-1]){
+         fprintf(stderr,"Error: Interval of announced observations lies before current time of the model\n");
+         fprintf(stderr,"Tstart= %g, first announced=%g\n",
+            tstart,data->announced.times[0]);
+         fprintf(stderr,"Tstop= %g, last announced=%g\n",
+            tstop,data->announced.times[ntimes-1]);
+         return CTA_ANNOUNCED_OBS_INTERVAL_ERROR;
+      }
+
+      /* Create relation table for observations */
+      retval=CTA_RelTable_Create(&reltab);
+      if (retval!=CTA_OK)  {
+         CTA_WRITE_ERROR("Cannot create RelTable");
+         return retval;
+      }
+
+      /* determine number of steps in the time-loop by checking whether
+         the last observation is at time tstop */
+      eps=M_EPS*MAX(fabs(tstart),fabs(tstop))+M_EPS;
+      if (fabs(tstop-data->announced.times[ntimes-1])<eps){
+         ntimes_loop=ntimes;
+      } else {
+         ntimes_loop=ntimes+1;
+      }
+       if (IDEBUG>0) {printf("Number of steps in compute is %d\n",ntimes_loop);}
+
+       /* loop over all intervals of observations */
+      CTA_Time_Create(&hstep);
+      CTA_Time_SetSpan(hstep,tstart,data->announced.times[0]);
+
+      for (itime=0;itime<ntimes_loop;itime++){
+         if (IDEBUG>0){
+            CTA_Time_GetSpan(hstep,&t1,&t2);
+            printf("running from %f to %f \n",t1,t2);
+         }
+
+         /* create an obs-selection for this timestep */
+
+         retval=CTA_ObsDescr_CreateTimSel(data->announced.ObsDescr, hstep,
+            reltab, &obsdescr_sub);
+         if (retval!=CTA_OK)  {
+            char message[1024];
+            sprintf(message,"Cannot create obs-selection for timestep %d",hstep);
+            CTA_WRITE_ERROR(message);
+            return retval;
+        }
+
+         /* Determine the number of observations */
+         retval=CTA_ObsDescr_Observation_Count(obsdescr_sub, &nobs);
+         if (retval!=CTA_OK) {
+            CTA_WRITE_ERROR("Cannot determine the number of observations");
+            return retval;
+         }
+
+         if (IDEBUG) {printf("Number of obs in this step are %d\n",nobs);}
+
+         /* perform timestep(s) */
+         retval = CTAI_Model_PerformTimesteps(hmodel,function,hstep,CTA_TRUE);
+         if (retval!=CTA_OK) {
+            char message[1024];
+            sprintf(message,"Error in performing timestep %d",hstep);
+            CTA_WRITE_ERROR(message);
+            return retval;
+         }
+
+         /* create vector for holding the observations */
+         retval=CTA_Vector_Create(CTA_DEFAULT_VECTOR,nobs,CTA_DOUBLE,CTA_NULL,
+            &vals);
+         if (retval!=CTA_OK) {
+            CTA_WRITE_ERROR("Cannot create vector");
+            return retval;
+         }
+
+         /* get observed values */
+         retval=CTA_Model_GetObsValues(hmodel,hstep,obsdescr_sub,vals);
+         if (retval!=CTA_OK) {
+            char message[1024];
+            sprintf(message,"Cannot get observated values of timestep %d",hstep);
+            CTA_WRITE_ERROR(message);
+            return retval;
+         }
+
+         /* copy observed values in global vector */
+         retval=CTA_RelTable_ApplyInv(reltab, vals, data->announced.values);
+         if (retval!=CTA_OK) {
+            CTA_WRITE_ERROR("Cannot copy observed values in global vector");
+            return retval;
+         }
+
+         if (IDEBUG) {
+            printf("predicted values:\n");
+            CTA_Vector_Export(data->announced.values,CTA_FILE_STDOUT);
+         }
+         /* Free variables */
+         retval=CTA_Vector_Free(&vals);
+         if (retval!=CTA_OK) {
+            CTA_WRITE_ERROR("Cannot free vector");
+            return retval;
+         }
+
+         retval=CTA_ObsDescr_Free(&obsdescr_sub);
+         if (retval!=CTA_OK) {
+            CTA_WRITE_ERROR("Cannot free ObsDescr??");
+            return retval;
+         }
+
+
+         /* set timespan for next loop */
+         if (itime<ntimes-1){
+            CTA_Time_SetSpan(hstep,data->announced.times[itime],
+                                   data->announced.times[itime+1]);
+         } else {
+            CTA_Time_SetSpan(hstep,data->announced.times[ntimes-1],tstop);
+         }
+      }
+
+      /* Mark announced observations as being set */
+      data->announced.isset=TRUE;
+
+      /* Free variables */
+      retval=CTA_Time_Free(&hstep);
+      if (retval!=CTA_OK) {
+         CTA_WRITE_ERROR("Cannot free time");
+         return retval;
+      }
+
+      retval=CTA_RelTable_Free(&reltab);
+      if (retval!=CTA_OK) {
+         CTA_WRITE_ERROR("Cannot free RelTable");
+         return retval;
+      }
+
+      if (IDEBUG) printf("CTA_MODEL DEBUG: (1) Return code export function %d \n", retval);
+
+  } else {
+     /* No model handles, we only need to call the compute method */
+      retval=CTA_Time_GetSpan(htime,&tstart,&tstop);
+      retval = CTAI_Model_PerformTimesteps(hmodel,function,htime,CTA_TRUE);
+      if (IDEBUG) printf("CTA_MODEL DEBUG: (2) Return code compute function %d \n",
+         retval);
+      return retval;
+  }
+  return CTA_OK;
+}
+
+
+
+int CTA_Model_SetState(
+   CTA_Model hmodel,  /* handle of model              */
+   CTA_TreeVector hstate    /* model state that must be set */
+){
+
+   return CTAI_Model_GetSet(hmodel, CTA_NULL, &hstate,I_CTA_MODEL_SET_STATE, -1);
+}
+
+int CTA_Model_GetStateScaling(
+   CTA_Model hmodel,         /* handle of model       */
+   CTA_TreeVector *hstate    /* returned scaling vector for model state  */
+){
+   return CTAI_Model_GetSet(hmodel, CTA_NULL, hstate,
+                            CTA_MODEL_GET_STATESCALING, -1);
+}
+
+
+int CTAI_Model_GetObsLocalization(
+   CTA_Model hmodel,       /* handle of model       */
+   CTA_ObsDescr hdescr,    /* observation description */
+   double distance,        /* characteristic distance */
+   int iDomain,            /* sub domain number (<0 whole model) */
+   CTA_Vector locVecs
+){
+
+   /* Local variables */
+   int retval;               /* Return value of COSTA call   */
+   CTA_Function *function;   /* Function that must be called */
+   CTAI_Model *data;         /* Data associated to model     */
+   int iObs, nObs;
+   double one=1.0;
+   CTA_TreeVector hState;
+
+   int domain_version;
+
+
+   domain_version=iDomain>=0;
+
+   if (domain_version){
+        retval=CTAI_Model_GetDataAndFunc(hmodel, CTA_MODEL_GET_OBSLOCALIZATIONDOMAIN, &data, &function);
+   }
+   else {
+        retval=CTAI_Model_GetDataAndFunc(hmodel, I_CTA_MODEL_GETOBSLOCALIZATION, &data, &function);
+   }
+
+   if (retval == CTA_NOT_IMPLEMENTED){
+      printf("Debug: CTA_Model_GetObsLocalization: No localization function is implemented for this model\n");
+      printf("Debug: Consider running method without localization (if possible), this will improve performance \n");
+      /* Return identity localization */
+
+      retval = CTA_Vector_GetSize(locVecs, &nObs);
+      printf("Lengte van de obs-vector is %d\n",nObs);
+
+      for (iObs=1; iObs<=nObs; iObs++){
+         CTA_Vector_GetVal(locVecs, iObs, &hState, CTA_HANDLE);
+         if (domain_version){
+            CTA_Model_GetStateDomain(hmodel,iDomain, &hState);
+         }
+         else {
+            CTA_Model_GetState(hmodel,&hState);
+         }
+         CTA_TreeVector_SetConstant(hState, &one, CTA_DOUBLE );
+         CTA_Vector_SetVal(locVecs, iObs, &hState, CTA_HANDLE);
+      }
+      return retval;
+   }
+   else if (retval == CTA_OK) {
+      /* Call user supplied implementation */
+      if (IDEBUG) printf("CTA_MODEL DEBUG: Calling Localization function \n");
+      if (domain_version){
+         function(data->data, &hdescr, &distance, iDomain, &locVecs, &retval);
+      }
+      else {
+         function(data->data, &hdescr, &distance, &locVecs, &retval);
+      }
+      if (IDEBUG) printf("CTA_MODEL DEBUG: Return code Localization function %d \n",
+         retval);
+      return retval;
+   }
+   else {
+      /* Error in retrieving function handle */
+      CTA_WRITE_ERROR("Error in CTAI_Model_GetDataAndFunc");
+      return retval;
+   }
+}
+
+
+int CTA_Model_GetObsLocalization(
+   CTA_Model hmodel,       /* handle of model       */
+   CTA_ObsDescr hdescr,    /* observation description */
+   double distance,        /* characteristic distance */
+   CTA_Vector locVecs
+){
+ 
+   return CTAI_Model_GetObsLocalization(hmodel,hdescr, distance, -1, locVecs);
+
+}
+
+
+
+
+
+int CTA_Model_GetState(
+   CTA_Model hmodel,   /* handle of model       */
+   CTA_TreeVector *hstate    /* returned model state  */
+){
+  //  printf("cta_model_getstate : model %d state %d\n  ",hmodel,*hstate);
+   return CTAI_Model_GetSet(hmodel, CTA_NULL, hstate, I_CTA_MODEL_GET_STATE, -1);
+}
+
+int CTA_Model_SetForc(
+   CTA_Model hmodel,  /* handle of model                        */
+   CTA_Time tspan,
+   CTA_TreeVector hforc    /* model forcings values that must be set */
+){
+   return CTAI_Model_GetSet(hmodel,tspan, &hforc,CTA_MODEL_SET_FORC, -1);
+}
+
+int CTA_Model_GetForc(
+   CTA_Model hmodel,   /* handle of model                  */
+   CTA_Time tspan,
+   CTA_TreeVector *hforc    /* returned model forcings values  */
+){
+   return CTAI_Model_GetSet(hmodel,tspan, hforc,CTA_MODEL_GET_FORC, -1);
+
+}
+
+int CTA_Model_AxpyForc(
+   CTA_Model hmodel,    /* handle of model  */
+   CTA_Time tspan,
+   double alpha,        /* alpha of axpy    */
+   CTA_TreeVector h_x        /* x-vector of axpy */
+){
+   return CTAI_Model_Axpy(hmodel, tspan, alpha, h_x, CTA_MODEL_AXPY_FORC,
+       CTA_MODEL_GET_FORC,CTA_MODEL_SET_FORC);
+}
+
+int CTA_Model_SetParam(
+   CTA_Model hmodel,  /* handle of model                   */
+   CTA_TreeVector hparam   /* model parameters that must be set */
+){
+   return CTAI_Model_GetSet(hmodel,CTA_NULL, &hparam,CTA_MODEL_SET_PARAM, -1);
+}
+
+int CTA_Model_GetParam(
+   CTA_Model hmodel,   /* handle of model             */
+   CTA_TreeVector *hparam   /* returned model parameters  */
+){
+   return CTAI_Model_GetSet(hmodel,CTA_NULL, hparam,CTA_MODEL_GET_PARAM, -1);
+}
+
+int CTA_Model_AxpyParam(
+   CTA_Model hmodel,    /* handle of model  */
+   double alpha,        /* alpha of axpy    */
+   CTA_TreeVector h_x        /* x-vector of axpy */
+){
+   return CTAI_Model_Axpy(hmodel, CTA_NULL, alpha, h_x, CTA_MODEL_AXPY_PARAM,
+       CTA_MODEL_GET_PARAM,CTA_MODEL_SET_PARAM);
+}
+
+/* AXPY operation for models: add alpha times a vector to a model
+   Importante notice: the usual order of the arguments differs from
+   the order we use in this function. In BLAS we have:
+   y=alpha*x+y, yielding in a call like axpy(alpha,x,y)
+
+   For the model axpy, the model state is y.! Because the model handle
+   is the first argument in all COSTA model calls. axpy for models
+   is defined by;
+   axpy(y,alpha,x)
+
+   The "vector" x can have two different types
+   1) CTA_TreeVector (COSTA state vector)
+   2) CTA_MODEL (COSTA model)
+*/
+#undef METHOD
+#define METHOD "AxpyState"
+int CTA_Model_AxpyState(
+   CTA_Model  hmodel,   /* y element in axpy (handle of model)            */
+   double     alpha,    /* scalar multyplication factor for x             */
+   CTA_Handle h_x       /* x element in axpy state handle of model handle */
+){
+
+   /* Local variables */
+   int retval;               /* Return value of COSTA call      */
+   CTA_Function *function;   /* Function that must be called    */
+   CTAI_Model *data_y;       /* Data assosiated to model hmodel */
+   CTAI_Model *data_x;       /* Data assosiated to model h_x    */
+   CTA_TreeVector hstate_x;  /* State vector   (x)              */
+   CTA_TreeVector hstate_y;  /* State vector   (y)              */
+   BOOL same_models;         /* flag: model class of hmode and h_x are the same */
+   CTA_Handle dt;
+
+   /* Check type of model handle */
+   retval=CTA_Handle_Check((CTA_Handle) hmodel,CTA_MODEL);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_model handle");
+       return retval;
+   }
+
+   /* Get Modeldata object */
+   retval=CTA_Handle_GetData((CTA_Handle) hmodel, (void*) &data_y);
+   if (retval!=CTA_OK) {
+      CTA_WRITE_ERROR("Cannot retrieve handle data");
+      return retval;
+   }
+
+   /* Clear the announced administration */
+   CTAI_Clear_Announced(&(data_y->announced), FALSE);
+   retval = CTA_Handle_GetDatatype(h_x, &dt);
+   if (retval != CTA_OK) {return retval; }
+
+   if (dt == CTA_MODEL) {
+
+   /* This is a MODEL+alpha*MODEL axpy */
+      /* Get Modeldata object of x */
+      retval=CTA_Handle_GetData((CTA_Handle) h_x, (void*) &data_x);
+      if (retval!=CTA_OK) {
+         CTA_WRITE_ERROR("Cannot retrieve handle data");
+         return retval;
+      }
+
+      /* check whether models are of the same model class */
+      same_models=(data_y->hmodcl==data_x->hmodcl);
+
+      /* If models are not the same or no user implementation is available: */
+      if (same_models && data_y->functions[CTA_MODEL_AXPY_MODEL]==CTA_NULL)
+      {
+         /* Use default implementation :      */
+         /* -> get state from model h_x       */
+         /* -> call CTA_Model_Axpy with state */
+
+         hstate_x=CTA_NULL;
+         retval=CTA_Model_GetState(h_x,&hstate_x);
+         if (retval!=CTA_OK) {
+            CTA_WRITE_ERROR("Cannot get state");
+            return retval;
+         }
+
+         retval=CTA_Model_AxpyState(hmodel,alpha,hstate_x);
+         if (retval!=CTA_OK){
+     	    CTA_WRITE_ERROR("Cannot get AxpyState");
+	        return retval;
+         } 
+
+         retval=CTA_TreeVector_Free(&hstate_x, CTA_TRUE);
+         if (retval!=CTA_OK) {
+     	    CTA_WRITE_ERROR("Cannot free tree vector");
+	        return retval;
+         } 
+      }
+      else
+      {
+         /* Use user implementation */
+         retval=CTA_Func_GetFunc(data_y->functions[CTA_MODEL_AXPY_MODEL],&function);
+         if (retval!=CTA_OK) {
+     	    CTA_WRITE_ERROR("Cannot get function CTA_MODEL_AXPY_MODEL");
+	        return retval;
+         } 
+         function(data_y->data, &alpha, data_x->data, &retval);
+         if (retval!=CTA_OK) {
+     	    CTA_WRITE_ERROR("Error in function CTA_MODEL_AXPY_MODEL");
+	        return retval;
+         } 
+      }
+   }
+   else{
+   /* This is a MODEL+alpha*STATE axpy */
+
+
+      if (dt != CTA_TREEVECTOR) {
+
+         CTA_WRITE_ERROR("Handle is not a cta_treevector");
+         return CTA_ILLEGAL_HANDLE;
+      }
+
+      /* Check whether user implementation is available */
+      if (data_y->functions[CTA_MODEL_AXPY_STATE]==CTA_NULL)
+      {
+         /* Use default implementation */
+         hstate_y=CTA_NULL;
+         retval=CTA_Model_GetState(hmodel,&hstate_y);
+         if (retval!=CTA_OK) {
+            CTA_WRITE_ERROR("Cannot get state");
+            return retval;
+         }
+
+         retval=CTA_TreeVector_Axpy(hstate_y,alpha, h_x);
+         if (retval!=CTA_OK) {
+            CTA_WRITE_ERROR("Error in CTA_TreeVector_Axpy");
+            return retval;
+         }
+         retval=CTA_Model_SetState(hmodel,hstate_y);
+         if (retval!=CTA_OK) {
+            CTA_WRITE_ERROR("Error in CTA_Model_SetState");
+            return retval;
+         }
+         retval=CTA_TreeVector_Free(&hstate_y, CTA_TRUE);
+         if (retval!=CTA_OK)  {
+            CTA_WRITE_ERROR("Cannot free TreeVector");
+            return retval;
+         }
+      }
+      else
+      {
+         /* Use user implementation */
+         retval=CTA_Func_GetFunc(data_y->functions[CTA_MODEL_AXPY_STATE],&function);
+         if (retval!=CTA_OK)  {
+            CTA_WRITE_ERROR("Cannot get function CTA_MODEL_AXPY_STATE");
+            return retval;
+         }
+         function(data_y->data, &alpha, &h_x, &retval);
+//         function(&alpha, &h_x, data_y->data,&retval);
+         if (retval!=CTA_OK)  {
+            CTA_WRITE_ERROR("Error in function CTA_MODEL_AXPY_STATE");
+            return retval;
+         }
+      }
+   }
+   return CTA_OK;
+}
+
+
+
+
+#undef METHOD
+#define METHOD "GetNoiseCovar" 
+int CTA_Model_GetNoiseCovar(
+   CTA_Model hmodel,   /* handle of model       */
+   CTA_TreeVector *hstmat   /* returned state-matrix */
+){
+
+   /* Local variables */
+   int retval;               /* Return value of COSTA call   */
+   CTA_Function *function;   /* Function that must be called */
+   CTAI_Model *data;         /* Data assosiated to model     */
+
+
+   /* Check type of handle */
+   retval=CTA_Handle_Check((CTA_Handle) hmodel,CTA_MODEL);
+   if (retval!=CTA_OK)  {
+       CTA_WRITE_ERROR("Handle is not a cta_model handle");
+       return retval;
+   }
+
+   /* Get Modeldata object */
+   retval=CTA_Handle_GetData((CTA_Handle) hmodel, (void*) &data);
+   if (retval!=CTA_OK)  {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+
+   /* Get Function pointer */
+   if (data->functions[CTA_MODEL_GET_NOISE_COVAR]==CTA_NULL){
+	  CTA_WRITE_ERROR("Error in function ...");
+	  return CTA_NOT_IMPLEMENTED;
+   }
+
+   retval=CTA_Func_GetFunc(data->functions[CTA_MODEL_GET_NOISE_COVAR],&function);
+   if (retval!=CTA_OK) {
+	  CTA_WRITE_ERROR("Cannot get function CTA_MODEL_GET_NOISE_COVAR");
+	  return CTA_NOT_IMPLEMENTED;
+   }
+
+   /* Call function */
+   function(data->data,hstmat,&retval);
+
+   return retval;
+}
+
+#undef METHOD
+#define METHOD "GetNoiseCount" 
+int CTA_Model_GetNoiseCount(
+   CTA_Model hmodel,   /* handle of model       */
+   int *nnoise         /* returned number of noise parameters */
+){
+
+   /* Local variables */
+   int retval;               /* Return value of COSTA call   */
+   CTA_Function *function;   /* Function that must be called */
+   CTAI_Model *data;         /* Data assosiated to model     */
+
+   /* Check type of handle */
+   retval=CTA_Handle_Check((CTA_Handle) hmodel,CTA_MODEL);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_model handle");
+       return retval;
+   }
+
+   /* Get Modeldata object */
+   retval=CTA_Handle_GetData((CTA_Handle) hmodel, (void*) &data);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+
+   /* Get Function pointer */
+   if (data->functions[CTA_MODEL_GET_NOISE_COUNT]==CTA_NULL) return CTA_NOT_IMPLEMENTED;
+   retval=CTA_Func_GetFunc(data->functions[CTA_MODEL_GET_NOISE_COUNT],&function);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot get function CTA_MODEL_GET_COISE_COUNT");
+	   return retval;
+   }
+
+   /* Call function */
+   function(data->data,nnoise,&retval);
+   return retval;
+}
+
+#undef METHOD
+#define METHOD "Free" 
+int CTA_Model_Free(
+   CTA_Model *hmodel  /* Handle of model  */
+   ){
+
+   /* Local variables */
+   int retval;             /* Return value of COSTA call   */
+   CTAI_Model *data;       /* All data of hmodel  */
+   CTA_Function *function; /* Function that must be called */
+
+   /* Get pointer to implementation of this function */
+
+   if (*hmodel==CTA_NULL) return CTA_OK;
+
+   retval=CTA_Handle_Check((CTA_Handle) *hmodel,CTA_MODEL);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_model handle");
+       return retval;
+   }
+
+   retval=CTA_Handle_GetData((CTA_Handle) *hmodel, (void*) &data);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Cannot retrieve handle data");
+       return retval;
+   }
+
+   /* call user implementation if available */
+   if (data->functions[I_CTA_MODEL_FREE]!=CTA_NULL){
+      retval=CTA_Func_GetFunc(data->functions[I_CTA_MODEL_FREE],&function);
+      if (retval!=CTA_OK) return retval;
+      function(data->data,&retval);
+   }
+   free(data->data);
+   free(data);
+   retval=CTA_Handle_Free(hmodel);
+
+   return retval;
+};
+
+/** \brief Fill the struct announced with info from the given
+ *         observation description.
+ *
+ * \param hdescr    I Observation description with annonced observations
+ * \param announced O handle of model instance
+ * \return error status: CTA_OK if successful
+ */
+#undef METHOD
+#define METHOD "AnnounceObsValues_Set" 
+int CTA_Model_AnnounceObsValues_Set(
+  CTA_ObsDescr hdescr,          /* observation description */
+  CTAI_AnnoinceObs *announced   /* data-block of model     */
+){
+   /* Local variables */
+   int nobs;                 /* Number of observations       */
+   double *allTimes;         /* Array with all available times */
+   CTA_Vector vtimes;        /* Vector with all available times */
+   int info;
+   int nunique;              /* Number of unique times of observations */
+   double lastTime;          /* Previous time in list                  */
+   int i,itime;              /* Loop counters */
+   char direc;               /* Sorting direction */
+
+   /* free all administration that might be available */
+   CTAI_Clear_Announced(announced, FALSE);
+
+   if (hdescr!=CTA_NULL) {
+      int retval;               /* Return value of COSTA call   */
+
+      /* If there are any observations in the description */
+      retval=CTA_ObsDescr_Observation_Count(hdescr, &nobs);
+      if (retval!=CTA_OK) {
+         CTA_WRITE_ERROR("There are not any observations in the description");
+         return retval;
+      }
+
+      if (IDEBUG) {printf("CTA_Model_AnnounceObsValues_Set: nobs=%d\n",nobs);}
+
+      if (nobs>0){
+         /* Allocate an vector for holding all predicted values */
+         retval=CTA_Vector_Create(CTA_DEFAULT_VECTOR,nobs,
+                                  CTA_DOUBLE,CTA_NULL,&(announced->values));
+         if (retval!=CTA_OK) {
+            CTA_WRITE_ERROR("Cannot create vector");
+            return retval;
+         }
+
+         /* Allocate vector for holding all times of the observations */
+         retval=CTA_Vector_Create(CTA_DEFAULT_VECTOR,nobs,
+                                  CTA_DOUBLE,CTA_NULL,&vtimes);
+         if (retval!=CTA_OK) {
+            CTA_WRITE_ERROR("Cannot create vector");
+            return retval;
+         }
+
+         /* Get times associated to all observations */
+         retval=CTA_ObsDescr_Get_ValueProperties(hdescr, "TIME", vtimes, CTA_DOUBLE);
+         if (retval!=CTA_OK) {
+            CTA_WRITE_ERROR("Cannot get times associated to all observations");
+            return retval;
+         }
+
+         /* Get all values in a C-array */
+         allTimes=CTA_Malloc(nobs*sizeof(double));
+         retval=CTA_Vector_GetVals(vtimes,allTimes,nobs,CTA_DOUBLE);
+         if (retval!=CTA_OK) {
+            CTA_WRITE_ERROR("Cannot get values in array");
+            return retval;
+         }
+
+         /* Sort times */
+         direc='I';
+         DLASRT_F77(&direc, &nobs, allTimes, &info ,1);
+
+         /* count the number of different times in allTimes */
+         nunique=1;
+         lastTime=allTimes[0];
+         for (i=1;i<nobs;i++){
+            if (lastTime!=allTimes[i]){
+               nunique++;
+               lastTime=allTimes[i];
+            }
+         }
+         if (IDEBUG>0) {
+            printf("CTA_Model_AnnounceObsValues_Set: observations at %d times\n",
+               nunique);
+            printf("CTA_Model_AnnounceObsValues_Set: times are: \n");
+
+            for (itime=0;itime<nunique; itime++){
+               printf("%f ",allTimes[itime]);
+            }
+            printf("\n");
+         }
+
+         /* allocate array times (only containing the unique times */
+         announced->times=CTA_Malloc(nunique*sizeof(double));
+         announced->ntimes=nunique;
+         announced->isset=FALSE;
+         announced->times[0]=allTimes[0];
+         nunique=0;
+         for (i=1;i<nobs;i++){
+            if (announced->times[nunique]!=allTimes[i]){
+               nunique++;
+               announced->times[nunique]=allTimes[i];
+                       }
+         }
+         free(allTimes);
+         CTA_Vector_Free(&vtimes);
+         CTA_ObsDescr_Create(CTA_OBSDESCR_TABLE, hdescr,
+                             &(announced->ObsDescr));
+      }
+   }
+   return CTA_OK;
+}
+
+
+#undef METHOD
+#define METHOD "AnnounceObsValues"
+int CTA_Model_AnnounceObsValues(
+  CTA_Model hmodel,    /* handle of model               */
+  CTA_ObsDescr hdescr  /* observation description       */
+){
+   /* Local variables */
+   int retval;               /* Return value of COSTA call   */
+   CTA_Function *function;   /* Function that must be called */
+   CTAI_Model *data;         /* Data assosiated to model     */
+
+   if (IDEBUG) {printf("CTA_Model_AnnounceObsValues: start\n");}
+   if (IDEBUG) {printf("CTA_Model_AnnounceObsValues: hmodel=%d, hdescr=%d\n",
+      hmodel,hdescr);}
+
+   /* Check type of handle */
+   retval=CTA_Handle_Check((CTA_Handle) hmodel,CTA_MODEL);
+   if (retval!=CTA_OK) {
+      CTA_WRITE_ERROR("Handle is not a cta_model handle");
+      return retval;
+   }
+
+   /* Get Modeldata object */
+   retval=CTA_Handle_GetData((CTA_Handle) hmodel, (void*) &data);
+   if (retval!=CTA_OK)  {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+
+   /* Get Function pointer */
+   if (data->functions[CTA_MODEL_ANNOUNCE_OBSVALUES]!=CTA_NULL) {
+      if (IDEBUG) {printf("CTA_Model_AnnounceObsValues: using user-implementation\n");}
+
+      /* User implementation is available of this method */
+      retval=CTA_Func_GetFunc(data->functions[CTA_MODEL_ANNOUNCE_OBSVALUES],&function);
+      if (retval!=CTA_OK)  {
+	     CTA_WRITE_ERROR("Cannot get function CTA_Model_AnnoundeObsValues");
+	     return retval;
+      }
+
+      /* Call function */
+      function(data->data, &hdescr, &retval);
+   } else {
+      if (IDEBUG) {printf("CTA_Model_AnnounceObsValues: using costa-implementation\n");}
+
+      /* Not available, then COSTA will try to do it for you :*/
+      /* COSTA will fill the announced administration: */
+      retval=CTA_Model_AnnounceObsValues_Set(hdescr, &(data->announced));
+      if (retval!=CTA_OK)  {
+	     CTA_WRITE_ERROR("Cannot fill announced administation");
+	     return retval;
+      }
+   }
+   return CTA_OK;
+}
+
+
+#undef METHOD
+#define METHOD "GetObsValues" 
+int CTA_Model_GetObsValues(
+  CTA_Model hmodel,    /* handle of model               */
+  CTA_Time htime,      /* timespan of simulation        */
+  CTA_ObsDescr hdescr, /* observation description       */
+  CTA_Vector values   /* returned (interpolated) values */
+){
+   /* Local variables */
+   int retval;               /* Return value of COSTA call   */
+   CTA_Function *function;   /* Function that must be called */
+   CTAI_Model *data;         /* Data assosiated to model     */
+
+   /* Check type of handle */
+   retval=CTA_Handle_Check((CTA_Handle) hmodel,CTA_MODEL);
+   if (retval!=CTA_OK)  {
+       CTA_WRITE_ERROR("Handle is not a cta_model handle");
+       return retval;
+   }
+
+   /* Get Modeldata object */
+   retval=CTA_Handle_GetData((CTA_Handle) hmodel, (void*) &data);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+
+   /* Do we have announced obs */
+   if (data->announced.isset){
+
+     /* We shoud check whether obsdescr is same as announced
+        But for now we will not check just return the values
+     */
+
+     CTA_Vector_Copy(data->announced.values,values);
+
+     /* We will now free the internal administration */
+     CTAI_Clear_Announced(&(data->announced), FALSE);
+
+      return CTA_OK;
+   } else {
+      /* We do not have announced observations just call user function */
+
+      /* Get Function pointer */
+      if (data->functions[CTA_MODEL_GET_OBSVALUES]==CTA_NULL){
+         return CTA_NOT_IMPLEMENTED;
+      }
+      retval=CTA_Func_GetFunc(data->functions[CTA_MODEL_GET_OBSVALUES],
+         &function);
+      if (retval!=CTA_OK) {
+	     CTA_WRITE_ERROR("Cannot get function CTA_MODEL_GET_OBSVALUES");
+	     return retval;
+      }
+
+      /* Call function */
+      function(data->data,&htime, &hdescr, &values, &retval);
+
+      return retval;
+   }
+}
+
+int CTA_Model_GetObsSelect(
+  CTA_Model hmodel,    /* handle of model               */
+  CTA_Time htime,      /* timespan of simulation        */
+  CTA_ObsDescr hdescr, /* observation description       */
+  CTA_String sselect   /* query to filter out the observations */
+){
+   /* Local variables */
+   int retval;               /* Return value of COSTA call   */
+   CTA_Function *function;   /* Function that must be called */
+   CTAI_Model *data;         /* Data associated to model     */
+
+   if (IDEBUG>10) {printf("cta_model_getobsselect START \n");}
+
+   /* Check type of handle */
+   retval=CTA_Handle_Check((CTA_Handle) hmodel,CTA_MODEL);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Handle is not a cta_model handle");
+	   return retval;
+   }
+   /* Get Modeldata object */
+   retval=CTA_Handle_GetData((CTA_Handle) hmodel, (void*) &data);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+
+   /* Get Function pointer */
+   if (data->functions[CTA_MODEL_GET_OBSSELECT]==CTA_NULL) {
+      /* Obsselect is not implemented : return "*" */
+      retval=CTA_String_Set(sselect," 1 >= 0 ");
+      return retval;
+
+   } else {
+      retval=CTA_Func_GetFunc(data->functions[CTA_MODEL_GET_OBSSELECT],&function);
+
+      if (retval!=CTA_OK) {
+	     char message[1024];
+         sprintf(message,"cta_model_getobsselect: user function not found %d \n",retval);
+		 CTA_WRITE_ERROR(message);
+         return retval;
+      }
+
+      if (IDEBUG>10) {printf("cta_model_getobsselect call user function \n");}
+
+      /* Call function */
+      function(data->data,&htime, &hdescr, &sselect, &retval);
+      return retval;
+   }}
+
+
+#undef METHOD
+#define METHOD "Export" 
+int CTA_Model_Export(
+  CTA_Model hmodel,     /* handle of model                                      */
+  CTA_Handle hexport    /* export object (e.q. handle to file or pack instance) */
+){
+   /* Local variables */
+   int retval;               /* Return value of COSTA call   */
+   CTA_Function *function;   /* Function that must be called */
+   CTAI_Model *data;         /* Data associated to model     */
+
+   retval=CTAI_Model_GetDataAndFunc(hmodel, I_CTA_MODEL_EXPORT, &data, &function);
+   if (retval!=CTA_OK)  {
+      CTA_WRITE_ERROR("Error in CTAI_Model_Get_DataAndFunk");
+      return retval;
+   }
+
+   if (IDEBUG) printf("CTA_MODEL DEBUG: Calling export function \n");
+   function(data->data, &hexport, &retval);
+   if (IDEBUG) printf("CTA_MODEL DEBUG: Return code export function %d \n",
+      retval);
+   return retval;
+}
+
+#undef METHOD
+#define METHOD "Import" 
+int CTA_Model_Import(
+  CTA_Model hmodel,     /* handle of model                                      */
+  CTA_Handle himport     /* import object (e.q. handle to file or pack instance) */
+){
+   /* Local variables */
+   int retval;               /* Return value of COSTA call   */
+   CTA_Function *function;   /* Function that must be called */
+   CTAI_Model *data;         /* Data associated to model     */
+
+   retval=CTAI_Model_GetDataAndFunc(hmodel, I_CTA_MODEL_IMPORT, &data, &function);
+   if (retval!=CTA_OK) {
+      CTA_WRITE_ERROR("Error in GetDataAndFunc");
+      return retval;
+   }
+
+   if (IDEBUG) printf("CTA_MODEL DEBUG: Calling import function \n");
+   function(data->data, &himport, &retval);
+   if (IDEBUG) printf("CTA_MODEL DEBUG: Return code import function %d \n",
+      retval);
+   return retval;
+}
+
+
+#undef METHOD
+#define METHOD "GetTimeHorizon" 
+int CTA_Model_GetTimeHorizon(
+  CTA_Model hmodel,
+  CTA_Time tHorizon
+){
+   /* Local variables */
+   int retval;               /* Return value of COSTA call   */
+   CTA_Function *function;   /* Function that must be called */
+   CTAI_Model *data;         /* Data associated to model     */
+
+   retval=CTAI_Model_GetDataAndFunc(hmodel, CTA_MODEL_GET_TIMEHORIZON, &data, &function);
+   if (retval!=CTA_OK) {
+      CTA_WRITE_ERROR("Error in CTAI_Model_GetDataAndFunc");
+      return retval;
+   }
+
+   if (IDEBUG) printf("CTA_MODEL DEBUG: Calling TimeHorizon function \n");
+   function(data->data, &tHorizon, &retval);
+   if (IDEBUG) printf("CTA_MODEL DEBUG: Return code TimeHorizon function %d \n",
+      retval);
+   return retval;
+}
+
+#undef METHOD
+#define METHOD "GetCurrentTime" 
+int CTA_Model_GetCurrentTime(
+  CTA_Model hmodel,
+  CTA_Time tCurrent
+){
+   /* Local variables */
+   int retval;               /* Return value of COSTA call   */
+   CTA_Function *function;   /* Function that must be called */
+   CTAI_Model *data;         /* Data associated to model     */
+
+   retval=CTAI_Model_GetDataAndFunc(hmodel, CTA_MODEL_GET_CURRENTTIME, &data, &function);
+   if (retval!=CTA_OK) {
+      CTA_WRITE_ERROR("Error in CTAI_Model_GetDataAndFunc");
+      return retval;
+   }
+
+   if (IDEBUG) printf("CTA_MODEL DEBUG: Calling CurrentTime function \n");
+   function(data->data, &tCurrent, &retval);
+   if (IDEBUG) printf("CTA_MODEL DEBUG: Return code CurrentTime function %d \n",
+      retval);
+   return retval;
+}
+
+
+#undef METHOD
+#define METHOD "AdjCompute" 
+int CTA_Model_AdjCompute(CTA_Model hmodel, CTA_Time time){
+   /* Local variables */
+   int retval;               /* Return value of COSTA call   */
+   CTA_Function *function;   /* Function that must be called */
+   CTAI_Model *data;         /* Data associated to model     */
+
+   retval=CTAI_Model_GetDataAndFunc(hmodel, CTA_MODEL_ADJ_COMPUTE, &data, &function);
+   if (retval!=CTA_OK) {
+      CTA_WRITE_ERROR("Error in CTAI_Model_GetDataAndFunc");
+      return retval;
+   }
+
+   function(data->data, &time, &retval);
+   return retval;
+}
+
+
+#undef METHOD
+#define METHOD "AdjPrepare" 
+int CTA_Model_AdjPrepare(CTA_Model hmodel, CTA_Time time){
+   /* Local variables */
+   int retval;               /* Return value of COSTA call   */
+   CTA_Function *function;   /* Function that must be called */
+   CTAI_Model *data;         /* Data associated to model     */
+
+   retval=CTAI_Model_GetDataAndFunc(hmodel, CTA_MODEL_ADJ_PREPARE, &data, &function);
+   if (retval!=CTA_OK) {
+      CTA_WRITE_ERROR("Error in CTAI_Model_GetDataAndFunc");
+      return retval;
+   }
+
+   function(data->data, &time, &retval);
+   return retval;
+}
+
+
+#undef METHOD
+#define METHOD "AdjSetForc"
+int CTA_Model_AdjSetForc(CTA_Model hmodel, CTA_ObsDescr hdescr, CTA_Vector vforc){
+
+   /* Local variables */
+   int retval;               /* Return value of COSTA call   */
+   CTA_Function *function;   /* Function that must be called */
+   CTAI_Model *data;         /* Data associated to model     */
+
+   retval=CTAI_Model_GetDataAndFunc(hmodel, CTA_MODEL_ADJ_SET_FORC, &data, &function);
+   if (retval!=CTA_OK) {
+      CTA_WRITE_ERROR("Error in CTAI_Model_GetDataAndFunc");
+      return retval;
+   }
+   function(data->data, &hdescr, &vforc, &retval);
+   return retval;
+
+}
+
+#undef METHOD
+#define METHOD "LoadPersistentState"
+int CTA_Model_LoadPersistentState(CTA_Model hmodel, CTA_String filename, CTA_String *instanceID){
+
+   /* Local variables */
+   int retval;               /* Return value of COSTA call   */
+   CTA_Function *function;   /* Function that must be called */
+   CTAI_Model *data;         /* Data associated to model     */
+
+   retval=CTAI_Model_GetDataAndFunc(hmodel, CTA_MODEL_LOAD_PERSISTENTSTATE, &data, &function);
+   if (retval!=CTA_OK) {
+      CTA_WRITE_ERROR("Error in CTA_Model_LoadPersistentState");
+      return retval;
+   }
+   function(data->data, &filename, instanceID, &retval);
+   return retval;
+}
+
+#undef METHOD
+#define METHOD "SaveInternalState"
+int CTA_Model_SaveInternalState(CTA_Model hmodel, CTA_String *instanceID){
+
+   /* Local variables */
+   int retval;               /* Return value of COSTA call   */
+   CTA_Function *function;   /* Function that must be called */
+   CTAI_Model *data;         /* Data associated to model     */
+
+   if (IDEBUG>0) {
+      printf("Start of CTA_Model_SaveInternalState \n");
+      printf("Model handle = %d\n",hmodel);
+   }
+
+   retval=CTAI_Model_GetDataAndFunc(hmodel, CTA_MODEL_SAVE_INTERNALSTATE, &data, &function);
+   if (retval!=CTA_OK) {
+      CTA_WRITE_ERROR("Error in CTA_Model_SaveInternalState");
+      return retval;
+   }
+   function(data->data, instanceID, &retval);
+   if (IDEBUG>0) {
+      printf("End of CTA_Model_SaveInternalState \n");
+   }
+   return retval;
+}
+
+#undef METHOD
+#define METHOD "RestoreInternalState"
+int CTA_Model_RestoreInternalState(CTA_Model hmodel, CTA_String instanceID){
+
+   /* Local variables */
+   int retval;               /* Return value of COSTA call   */
+   CTA_Function *function;   /* Function that must be called */
+   CTAI_Model *data;         /* Data associated to model     */
+
+   retval=CTAI_Model_GetDataAndFunc(hmodel, CTA_MODEL_RESTORE_INTERNALSTATE, &data, &function);
+   if (retval!=CTA_OK) {
+      CTA_WRITE_ERROR("Error in CTA_Model_RestoreState");
+      return retval;
+   }
+   function(data->data, &instanceID, &retval);
+   return retval;
+}
+
+#undef METHOD
+#define METHOD "ReleaseInternalState"
+int CTA_Model_ReleaseInternalState(CTA_Model hmodel, CTA_String instanceID){
+
+   /* Local variables */
+   int retval;               /* Return value of COSTA call   */
+   CTA_Function *function;   /* Function that must be called */
+   CTAI_Model *data;         /* Data associated to model     */
+
+   if (IDEBUG>0) {
+      printf("Start of CTA_Model_ReleaseInternalState \n");
+   }
+   retval=CTAI_Model_GetDataAndFunc(hmodel, CTA_MODEL_RELEASE_INTERNALSTATE, &data, &function);
+   if (retval!=CTA_OK) {
+      CTA_WRITE_ERROR("Error in CTA_Model_ReleaseInternalState");
+      return retval;
+   }
+   function(data->data, &instanceID, &retval);
+   return retval;
+}
+
+#undef METHOD
+#define METHOD "SavePersistentState"
+// DE fop methode die eigenlijk bij een ImodelState zou moeten horen
+int CTA_Model_SavePersistentState(CTA_Model hmodel, CTA_String filename, CTA_String instanceID){
+
+   /* Local variables */
+   int retval;               /* Return value of COSTA call   */
+   CTA_Function *function;   /* Function that must be called */
+   CTAI_Model *data;         /* Data associated to model     */
+
+   if (IDEBUG>0) {
+      printf("Start of CTA_Model_SavePersistentState \n");
+   }
+
+   retval=CTAI_Model_GetDataAndFunc(hmodel, CTA_MODEL_SAVE_PERSISTENTSTATE, &data, &function);
+   if (retval!=CTA_OK) {
+      CTA_WRITE_ERROR("Error in CTA_Model_SavePersistentState");
+      return retval;
+   }
+   /* Create new handle with usrdata */
+   //retval=CTA_Handle_Create("file",CTA_FILE,filename,hfile);
+
+   function(data->data, &filename, &instanceID, &retval);
+   return retval;
+}
+
+/** \brief Get the number of domains for local analysis
+ *  
+ * \param hmodel   I  handle of model instance
+ * \param distance I  characteristic distance
+ * \param ndomain  O  number of domains
+ * \param locVecs  O  costa vector of handles to treevectors (scaling vectors). The treevectors
+ *                    are created when the indices are CTA_NULL on entry
+ *
+ * \return error status: CTA_OK if successful
+ */
+#undef METHOD
+#define METHOD "GetNumDomains"
+int CTA_Model_GetNumDomains(CTA_Model hmodel, double distance, int *nDomains){
+
+   /* Local variables */
+   int retval;               /* Return value of COSTA call   */
+   CTA_Function *function;   /* Function that must be called */
+   CTAI_Model *data;         /* Data associated to model     */
+
+   retval=CTAI_Model_GetDataAndFunc(hmodel, CTA_MODEL_GET_NUMDOMAINS, &data, &function);
+   if (retval!=CTA_OK) {
+      CTA_WRITE_ERROR("Error in retreving function pointer of CTA_MODEL_GET_NUMDOMAINS");
+      return retval;
+   }
+
+   if (IDEBUG) printf("CTA_MODEL DEBUG: Callling CTA_MODEL_GET_NUMDOMAINS function \n");
+   function(data->data, &distance, nDomains, &retval);
+   if (IDEBUG) printf("CTA_MODEL DEBUG: Return code CTA_MODEL_GET_NUMDOMAINS function %d\n",
+      retval);
+   return retval;
+}
+
+
+/** \brief Get selection of observations that are relevnet for assimilation in the given domain
+ *  
+ * \param hmodel   I  handle of model instance
+ * \param hdescr   I  observation description of all observations
+ * \param distance I  characteristic distance
+ * \param idomain  I  domain number
+ * \param locVecs  O  costa vector of handles to treevectors (scaling vectors). The treevectors
+ *                    are created when the indices are CTA_NULL on entry
+ *
+ * \return error status: CTA_OK if successful
+ */
+#undef METHOD
+#define METHOD "GetObsSelector"
+int CTA_Model_GetObsSelector( CTA_Model hmodel, 
+                   CTA_ObsDescr hdescr, double distance, int iDomain, CTA_Vector *selection){
+
+   /* Local variables */
+   int retval;               /* Return value of COSTA call   */
+   CTA_Function *function;   /* Function that must be called */
+   CTAI_Model *data;         /* Data associated to model     */
+
+   retval=CTAI_Model_GetDataAndFunc(hmodel, CTA_MODEL_GET_OBSSELECTOR, &data, &function);
+   if (retval!=CTA_OK) {
+      CTA_WRITE_ERROR("Error in retreving function pointer of CTA_MODEL_GET_OBSSELECTOR");
+      return retval;
+   }
+
+   if (IDEBUG) printf("CTA_MODEL DEBUG: Callling CTA_MODEL_GET_OBSSELECTOR function \n");
+   function(data->data, hdescr, &distance, &iDomain, selection, &retval);
+   if (IDEBUG) printf("CTA_MODEL DEBUG: Return code CTA_MODEL_GET_OBSSELECTOR function %d\n",
+      retval);
+   return retval;
+}
+
+
+
+/** \brief Get for each observation a localization scaling vector for single domain
+ *  
+ * \param hmodel   I  handle of model instance
+ * \param hdescr   I  observation description for which we want localization scaling vectors
+ * \param distance I  characteristic distance
+ * \param idomain  I  domain number
+ * \param locVecs  O  costa vector of handles to treevectors (scaling vectors). The treevectors
+ *                    are created when the indices are CTA_NULL on entry
+ *
+ * \return error status: CTA_OK if successful
+ */
+#undef METHOD
+#define METHOD "GetObsLocalizationDomain"
+int CTA_Model_GetObsLocalizationDomain( CTA_Model hmodel, 
+                   CTA_ObsDescr hdescr, double distance, int iDomain, CTA_Vector locVecs){
+
+   return CTAI_Model_GetObsLocalization(hmodel, hdescr, distance, iDomain, locVecs);
+
+}
+
+
+/** \brief Get a copy of the internal state.
+ *
+ * \note Optionally a tree-vector is created. In that case the caller of this
+ * method is responsible for freeing that tree-vector. The input state must be compatible
+ * (same size and or composition) as the models internal state.
+ * \note If *hstate == CTA_NULL a new object is created, user is responsible for freeing this object.
+ *
+ * \param hmodel   I  handle of model instance
+ * \param iDomain  I  domain number
+ * \param hstate   IO receives state of the model, *hstate can be CTA_NULL on calling (see note)
+ * \return error status: CTA_OK if successful
+ */
+#undef METHOD
+#define METHOD "GetStateDomain"
+int CTA_Model_GetStateDomain(CTA_Model hmodel, int iDomain, CTA_TreeVector *hstate){
+   return CTAI_Model_GetSet(hmodel, CTA_NULL, hstate, I_CTA_MODEL_GET_STATE, iDomain); 
+}
+
+/** \brief Perform axpy operation on the internal state for a single domain 
+ *
+ * \note AXPY: y=alpha*x+y. y corresponds to the models
+ *       internal state and x can be a state vector or a model
+ 
+ * \param hmodel   IO handle of model instance (y)
+ * \param alpha    I  alpha
+ * \param hx       I  handle of x (state vector )
+ * \param iDomain  I  domain number
+ * \return error status: CTA_OK if successful
+ */
+#undef METHOD
+#define METHOD "AxpyStateDomain"
+int CTA_Model_AxpyStateDomain(CTA_Model hmodel, double alpha, int iDomain, CTA_Handle hx){
+
+   /* Local variables */
+   int retval;               /* Return value of COSTA call   */
+   CTA_Function *function;   /* Function that must be called */
+   CTAI_Model *data;         /* Data associated to model     */
+
+
+   retval=CTAI_Model_GetDataAndFunc(hmodel, CTA_MODEL_AXPY_STATEDOMAIN, &data, &function);
+   if (retval!=CTA_OK) {
+      CTA_WRITE_ERROR("Error in retreving function pointer of CTA_MODEL_GET_OBSSELECTOR");
+      return retval;
+   }
+   function(data->data, &alpha, &hx, &iDomain, &retval);
+   if (retval!=CTA_OK)  {
+      CTA_WRITE_ERROR("Error in function CTA_MODEL_AXPY_STATEDOMAIN");
+      return retval;
+    }
+    return CTA_OK;
+}
+
+
+CTAEXPORT void CTA_MODEL_CREATE_F77(int *hmodcl, int *userdata, int *hmodel, int *ierr){
+   *ierr=CTA_Model_Create((CTA_ModelClass) *hmodcl, (CTA_Handle) *userdata, (CTA_Model*) hmodel);
+}
+
+CTAEXPORT void CTA_MODEL_COMPUTE_F77(int *hmodel, int *htime, int *ierr){
+   *ierr=CTA_Model_Compute((CTA_Model) *hmodel, (CTA_Time) *htime);
+}
+
+CTAEXPORT void CTA_MODEL_ADDNOISE_F77(int *hmodel, int *htime, int *ierr){
+   *ierr=CTA_Model_AddNoise((CTA_Model) *hmodel, (CTA_Time) *htime);
+}
+CTAEXPORT void CTA_MODEL_SETSTATE_F77(int *hmodel, int *hstate, int *ierr){
+   *ierr=CTA_Model_SetState((CTA_Model) *hmodel, (CTA_TreeVector) *hstate);
+}
+
+CTAEXPORT void CTA_MODEL_GETSTATE_F77(int *hmodel, int *hstate, int *ierr){
+   *ierr=CTA_Model_GetState((CTA_Model) *hmodel, (CTA_TreeVector*) hstate);
+}
+
+CTAEXPORT void CTA_MODEL_AXPYSTATE_F77(int *hmodel, double *alpha, int *hstate_x, int *ierr){
+   *ierr=CTA_Model_AxpyState((CTA_Model) *hmodel, (double) *alpha, (CTA_Handle) *hstate_x);
+}
+
+CTAEXPORT void CTA_MODEL_GETSTATESCALING_F77(int *hmodel, int *hscal, int *ierr){
+   *ierr=CTA_Model_GetStateScaling((CTA_Model) *hmodel, (CTA_TreeVector*) hscal);
+}
+
+CTAEXPORT void CTA_MODEL_SETFORC_F77(int *hmodel, int *tspan, int *hstate, int *ierr){
+   *ierr=CTA_Model_SetForc((CTA_Model) *hmodel, (CTA_Time) *tspan, (CTA_TreeVector) *hstate);
+}
+
+CTAEXPORT void CTA_MODEL_GETFORC_F77(int *hmodel, int *tspan, int *hstate, int *ierr){
+   *ierr=CTA_Model_GetForc((CTA_Model) *hmodel, (CTA_Time) *tspan, (CTA_TreeVector*) hstate);
+}
+
+CTAEXPORT void CTA_MODEL_AXPYFORC_F77(int *hmodel, int *tspan, double *alpha, int *hstate_x, int *ierr){
+   *ierr=CTA_Model_AxpyForc((CTA_Model) *hmodel, (CTA_Time) *tspan, (double) *alpha, (CTA_TreeVector) *hstate_x);
+}
+
+CTAEXPORT void CTA_MODEL_SETPARAM_F77(int *hmodel, int *hstate, int *ierr){
+   *ierr=CTA_Model_SetParam((CTA_Model) *hmodel, (CTA_TreeVector) *hstate);
+}
+
+CTAEXPORT void CTA_MODEL_GETPARAM_F77(int *hmodel, int *hstate, int *ierr){
+   *ierr=CTA_Model_GetParam((CTA_Model) *hmodel, (CTA_TreeVector*) hstate);
+}
+
+CTAEXPORT void CTA_MODEL_AXPYPARAM_F77(int *hmodel, double *alpha, int *hstate_x, int *ierr){
+   *ierr=CTA_Model_AxpyParam((CTA_Model) *hmodel, (double) *alpha, (CTA_TreeVector) *hstate_x);
+}
+
+CTAEXPORT void CTA_MODEL_GETNOISECOVAR_F77(int *hmodel, int *hstmat, int *ierr){
+   *ierr=CTA_Model_GetNoiseCovar((CTA_Model) *hmodel, (CTA_TreeVector*) hstmat);
+}
+
+CTAEXPORT void CTA_MODEL_GETNOISECOUNT_F77(int *hmodel,int *nnoise, int *ierr){
+   *ierr=CTA_Model_GetNoiseCount((CTA_Model) *hmodel,(CTA_Time *) nnoise);
+};
+
+CTAEXPORT void CTA_MODEL_FREE_F77(int *hmodel, int *ierr){
+   *ierr=CTA_Model_Free((CTA_Model*) hmodel);
+};
+
+CTAEXPORT void CTA_MODEL_EXPORT_F77(int *hmodel, int *hexport, int *ierr){
+   *ierr=CTA_Model_Export((CTA_Model) *hmodel, (CTA_Handle) *hexport);
+};
+
+CTAEXPORT void CTA_MODEL_IMPORT_F77(int *hmodel, int *himport, int *ierr){
+   *ierr=CTA_Model_Import((CTA_Model) *hmodel, (CTA_Handle) *himport);
+};
+
+CTAEXPORT void CTA_MODEL_GETTIMEHORIZON_F77(int *hmodel, int *tHorizon,
+                                            int *ierr){
+   *ierr=CTA_Model_GetTimeHorizon((CTA_Model) *hmodel, (CTA_Time) *tHorizon);
+};
+
+CTAEXPORT void CTA_MODEL_GETCURRENTTIME_F77(int *hmodel, int *tHorizon,
+                                            int *ierr){
+   *ierr=CTA_Model_GetCurrentTime((CTA_Model) *hmodel, (CTA_Time) *tHorizon);
+};
+
+CTAEXPORT void CTA_MODEL_ANNOUNCEOBSVALUES_F77(int *hmodel, int *hdescr, int *ierr){
+   *ierr=CTA_Model_AnnounceObsValues((CTA_Model) *hmodel,
+                                      (CTA_ObsDescr) *hdescr);
+};
+
+CTAEXPORT void CTA_MODEL_GETOBSVALUES_F77(int *hmodel, int *htime, int *hdescr, int *values, int *ierr){
+
+   *ierr=CTA_Model_GetObsValues((CTA_Model) *hmodel, (CTA_Time) *htime,
+                                (CTA_ObsDescr) *hdescr, (CTA_Vector) *values);
+}
+
+CTAEXPORT void CTA_MODEL_GETOBSLOCALIZATION_F77 (int *hmodel, int *hdescr, double *distance, int *locVecs, int*ierr){
+
+   *ierr=CTA_Model_GetObsLocalization((CTA_Model) *hmodel,(CTA_ObsDescr) *hdescr,  (double) *distance, (CTA_Vector) *locVecs);
+}
+
+CTAEXPORT void CTA_MODEL_GETOBSSELECT_F77(int *hmodel, int *htime, int *hdescr, int *sselect, int *ierr){
+
+   *ierr=CTA_Model_GetObsSelect((CTA_Model) *hmodel, (CTA_Time) *htime,
+                                (CTA_ObsDescr) *hdescr, (CTA_String) *sselect);
+};
+
+CTAEXPORT void CTA_MODEL_SAVEINTERNALSTATE_F77(int *hmodel, int *instanceID, int *ierr){
+  *ierr=CTA_Model_SaveInternalState((CTA_Model) *hmodel, (CTA_String *) instanceID);
+};
+
+CTAEXPORT void CTA_MODEL_RESTOREINTERNALSTATE_F77(int *hmodel, int *instanceID, int *ierr){
+  *ierr=CTA_Model_RestoreInternalState((CTA_Model) *hmodel, (CTA_String) *instanceID);
+};
+
+CTAEXPORT void CTA_MODEL_RELEASEINTERNALSTATE_F77(int *hmodel, int *instanceID, int *ierr){
+  *ierr=CTA_Model_ReleaseInternalState((CTA_Model) *hmodel, (CTA_String) *instanceID);
+};
+
+CTAEXPORT void CTA_MODEL_SAVEPERSISTENTSTATE_F77(int *hmodel, int *filename, int *instanceID, int *ierr){
+  *ierr=CTA_Model_SavePersistentState((CTA_Model) *hmodel, (CTA_String) *filename, (CTA_String) *instanceID);
+};
+
+CTAEXPORT void CTA_MODEL_LOADPERSISTENTSTATE_F77(int *hmodel, int *filename, int *instanceID, int *ierr){
+  *ierr=CTA_Model_LoadPersistentState((CTA_Model) *hmodel, (CTA_String) *filename, (CTA_String *) instanceID);
+};
diff --git a/costa/native/cta/src/cta_model_factory.c b/costa/native/cta/src/cta_model_factory.c
new file mode 100644
index 000000000..dfbfe46dd
--- /dev/null
+++ b/costa/native/cta/src/cta_model_factory.c
@@ -0,0 +1,983 @@
+/*
+$URL: https://repos.deltares.nl/repos/openda/public/trunk/core/native/src/cta/cta_model_factory.c $
+$Revision: 4029 $, $Date: 2013-06-13 16:30:19 +0200 (Thu, 13 Jun 2013) $
+
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+#include "cta.h"
+
+#include "cta_string.h"
+#include "cta_mem.h"
+#include "cta_datatypes.h"
+#include "ctai_xml.h"
+#include "cta_handles.h"
+#include "ctai_handles.h"
+#include "f_cta_utils.h"
+#include "cta_model.h"
+#include "cta_errors.h"
+#include "cta_f77lapack.h"
+#include "cta_reltable.h"
+#include "cta_par.h"
+#include "cta_message.h"
+#include "ctai.h"
+
+#if defined(_WIN32) || defined(_WIN64) 
+  #define snprintf _snprintf 
+  #define vsnprintf _vsnprintf 
+  #define strcasecmp _stricmp 
+  #define strncasecmp _strnicmp 
+#endif
+
+
+#define MAX(a,b) (a>b ? a: b)
+#define IDEBUG (1)
+
+#define CTA_MODEL_DEFINECLASS_F77      F77_CALL(cta_model_defineclass,CTA_MODEL_DEFINECLASS)
+
+#define CLASSNAME "CTA_Model_Factory"
+
+typedef struct {
+CTA_Func functions[CTA_MODEL_NUMFUNC];
+char *implements;
+char *parallel_type;
+char *spawn_workers;
+char *nproc;
+char *ntimes;
+char *dumProcs;
+char *flag_barrier;
+double t_step;
+int nChildModels;            /* Number of child models that are created (and still alive) from this class.              */
+CTA_Model *ChildModels;      /* Handles of all child models                                                             */
+int *ChildComputeIsBlocked;  /* Flags indication which child models are blocked due to a compute over interval > t_step */
+} CTAI_ModelClass;
+
+
+int CTAI_ModelFac_SetImplements(CTA_ModelClass hmodcl, char *implements){
+
+   CTAI_ModelClass *clsdata;
+   int retval;
+
+   /* Get class data containing all function pointers */
+   retval=CTA_Handle_Check((CTA_Handle) hmodcl,CTA_MODELCLASS);
+   if (retval!=CTA_OK) return retval;
+   retval=CTA_Handle_GetData((CTA_Handle) hmodcl,(void*) &clsdata);
+   if (retval!=CTA_OK) return retval;
+
+   if (clsdata->implements) free(clsdata->implements);
+   if (implements) {
+      clsdata->implements=CTA_Malloc(sizeof(char)*(strlen(implements)+1));
+      strcpy(clsdata->implements,implements);
+   } else {
+      clsdata->implements=CTA_Malloc(sizeof(char)*(strlen("unknown")+1));
+      strcpy(clsdata->implements,"unknown");
+   }
+   return CTA_OK;
+}
+
+int CTAI_Model_DuplicateClass(CTA_ModelClass hmodcl,
+                              CTA_ModelClass *hmodcl_dup){
+
+   CTAI_ModelClass *clsdata;
+   CTAI_ModelClass *clsdata_dup;
+   CTA_Func hfunc;
+   CTA_String sname;
+   char *implements, *parallel_type, *nproc, *ntimes, *dumProcs, *spawn_workers;
+   char *flag_barrier;
+   double t_step;
+   int nChildModels;
+   CTA_Model *ChildModels;
+   int *ChildComputeIsBlocked;
+   int retval;
+   int i;
+
+   /* Get class data containing all function pointers */
+   retval=CTA_Handle_Check((CTA_Handle) hmodcl,CTA_MODELCLASS);
+   if (retval!=CTA_OK) return retval;
+   retval=CTA_Handle_GetData((CTA_Handle) hmodcl,(void*) &clsdata);
+   if (retval!=CTA_OK) return retval;
+
+   /* Allocate new ModelClass object */
+   clsdata_dup=CTA_Malloc(sizeof(CTAI_ModelClass));
+   clsdata_dup->implements            = NULL;
+   clsdata_dup->parallel_type         = NULL;
+   clsdata_dup->spawn_workers         = NULL;
+   clsdata_dup->nproc                 = NULL;
+   clsdata_dup->ntimes                = NULL;
+   clsdata_dup->dumProcs              = NULL;
+   clsdata_dup->flag_barrier          = NULL;
+   clsdata_dup->t_step                = 0.0;
+   clsdata_dup->nChildModels          = 0;
+   clsdata_dup->ChildModels           = NULL;
+   clsdata_dup->ChildComputeIsBlocked = NULL;
+
+   /* Duplicate all function pointers */
+   for (i=0;i<CTA_MODEL_NUMFUNC;i++){
+      CTA_Func_Duplicate(clsdata->functions[i], &hfunc);
+      clsdata_dup->functions[i]=hfunc;
+   }
+
+   /* Duplicate implements */
+   implements=clsdata->implements;
+   if (implements) {
+      clsdata_dup->implements=CTA_Malloc(sizeof(char)*(strlen(implements)+1));
+      strcpy(clsdata_dup->implements,implements);
+   }
+
+   /* Duplicate parallel_type */
+   parallel_type=clsdata->parallel_type;
+   if(parallel_type) {
+      clsdata_dup->parallel_type=CTA_Malloc(sizeof(char)*(strlen(parallel_type)+1));
+      strcpy(clsdata_dup->parallel_type,parallel_type);
+   }
+
+   /* Duplicate spawn_workers */
+   spawn_workers=clsdata->spawn_workers;
+   if(spawn_workers) {
+      clsdata_dup->spawn_workers=CTA_Malloc(sizeof(char)*(strlen(spawn_workers)+1));
+      strcpy(clsdata_dup->spawn_workers,spawn_workers);
+   }
+
+   /* Duplicate nproc */
+   nproc=clsdata->parallel_type;
+   if (nproc) {
+      clsdata_dup->nproc=CTA_Malloc(sizeof(char)*(strlen(nproc)+1));
+      strcpy(clsdata_dup->nproc,nproc);
+   }
+
+   /* Duplicate ntimes */
+   ntimes=clsdata->ntimes;
+   if (ntimes) {
+      clsdata_dup->ntimes=CTA_Malloc(sizeof(char)*(strlen(ntimes)+1));
+      strcpy(clsdata_dup->ntimes,ntimes);
+   }
+
+   /* Duplicate dumProcs */
+   dumProcs=clsdata->dumProcs;
+   if (dumProcs) {
+      clsdata_dup->dumProcs=CTA_Malloc(sizeof(char)*(strlen(dumProcs)+1));
+      strcpy(clsdata_dup->dumProcs,dumProcs);
+   }
+
+   /* Duplicate flag_barrier */
+   flag_barrier=clsdata->flag_barrier;
+   if (flag_barrier) {
+      clsdata_dup->flag_barrier=CTA_Malloc(sizeof(char)*(strlen(flag_barrier)+1));
+      strcpy(clsdata_dup->flag_barrier,flag_barrier);
+   }
+
+   /* Duplicate t_step */
+   t_step=clsdata->t_step;
+   clsdata_dup->t_step=t_step;
+
+   /* Duplicate administration of child models */
+   nChildModels = clsdata->nChildModels;
+   clsdata_dup->nChildModels=nChildModels;
+
+   ChildModels = clsdata->ChildModels;
+   if (ChildModels) {
+      clsdata_dup->ChildModels = CTA_Malloc(nChildModels*sizeof(CTA_Model));
+      memcpy(clsdata_dup->ChildModels, ChildModels, sizeof(clsdata_dup->ChildModels));
+   }
+
+   ChildComputeIsBlocked = clsdata->ChildComputeIsBlocked;
+   if (ChildComputeIsBlocked) {
+      clsdata_dup->ChildComputeIsBlocked = CTA_Malloc(nChildModels*sizeof(int));
+      memcpy(clsdata_dup->ChildComputeIsBlocked, ChildComputeIsBlocked, sizeof(clsdata_dup->ChildComputeIsBlocked));
+   }
+
+   // Allocate new handle and return error when unsuccesfull
+   CTA_String_Create(&sname);
+   CTA_Handle_GetName(hmodcl, sname);
+   if (IDEBUG>0) printf("CTAI_Model_DuplicateClass: NAME OF MODEL CLASS IS %s\n", CTAI_String_GetPtr(sname));
+
+
+   retval=CTA_Handle_Create(CTAI_String_GetPtr(sname), CTA_MODELCLASS,
+                            clsdata_dup, hmodcl_dup);
+   CTA_String_Free(&sname);
+   return retval;
+}
+
+/* Add new model instance to list of all models */
+int CTAI_ModelFac_AddModelInstance(CTA_ModelClass hmodcl, CTA_Model hmodel){
+
+   /* Local variables */
+   CTAI_ModelClass *clsdata; /* Data associated to modelclass     */
+   int retval;               /* Return value of COSTA call        */
+   int imodel;               /* loop variable                     */
+
+   if (IDEBUG>0) printf("CTA_ModelFac_AddModelInstance: Start of function \n");
+
+   /* Get class data */
+   retval=CTA_Handle_Check((CTA_Handle) hmodcl,CTA_MODELCLASS);
+   if (retval!=CTA_OK) return retval;
+   retval=CTA_Handle_GetData((CTA_Handle) hmodcl,(void*) &clsdata);
+   if (retval!=CTA_OK) return retval;
+
+   // reallocate 2 arrays + initialisation
+   clsdata->nChildModels++;
+
+   if (clsdata->ChildModels==NULL) {
+      clsdata->ChildModels = CTA_Malloc(clsdata->nChildModels*sizeof(CTA_Model));
+   } else {
+      CTA_Model *tmp1 = CTA_Realloc(clsdata->ChildModels, clsdata->nChildModels*sizeof(CTA_Model));
+      if (tmp1!=NULL) {
+        clsdata->ChildModels = tmp1;
+      }else{
+        free(clsdata->ChildModels);
+        printf("error allocating");
+        return 1;
+      }
+    }
+
+   if (clsdata->ChildComputeIsBlocked==NULL) {
+      clsdata->ChildComputeIsBlocked = CTA_Malloc(clsdata->nChildModels*sizeof(int));
+   } else {
+      CTA_Model *tmp2 = CTA_Realloc(clsdata->ChildComputeIsBlocked, clsdata->nChildModels*sizeof(int));
+      if (tmp2!=NULL) {
+         clsdata->ChildComputeIsBlocked = tmp2;
+      } else{
+         free(clsdata->ChildComputeIsBlocked);
+         printf("error allocating");
+         return 1;
+      }
+   }
+
+   clsdata->ChildModels[clsdata->nChildModels-1] = hmodel;
+   clsdata->ChildComputeIsBlocked[clsdata->nChildModels-1] = CTA_FALSE;
+
+   if (IDEBUG>0) {
+     printf("number of Child models: %d \n", clsdata->nChildModels);
+     for (imodel=0;imodel<clsdata->nChildModels;imodel++){
+     printf("i, hmodel,flag: %d, %d, %d \n",imodel,clsdata->ChildModels[imodel],
+         clsdata->ChildComputeIsBlocked[imodel]);
+     }
+   }
+   if (IDEBUG>0) printf("CTA_ModelFac_AddModelInstance: End of function \n");
+   return CTA_OK;
+}
+
+/* Remove model instance from list of all models */
+int CTAI_ModelFac_DelModelInstance(CTA_ModelClass hmodcl, CTA_Model hmodel){
+
+   /* Local variables */
+   CTAI_ModelClass *clsdata; /* Data associated to modelclass           */
+   int retval;               /* Return value of COSTA call              */
+   int imodel;               /* loop variable                           */
+   int ifound;               /* flag indicating model was found in list */
+
+   if (IDEBUG>0) printf("CTA_ModelFac_DelModelInstance: Start of function \n");
+
+   /* Get class data */
+   retval=CTA_Handle_Check((CTA_Handle) hmodcl,CTA_MODELCLASS);
+   if (retval!=CTA_OK) return retval;
+   retval=CTA_Handle_GetData((CTA_Handle) hmodcl,(void*) &clsdata);
+   if (retval!=CTA_OK) return retval;
+
+   ifound = -1;
+   /* find index of model to be removed from list*/
+   for (imodel=0;imodel<clsdata->nChildModels;imodel++){
+     if (clsdata->ChildModels[imodel] == hmodel) {
+       ifound = imodel;
+     }
+   }
+
+   /* overwrite information of this model */
+   if (ifound>0) {
+     clsdata->nChildModels--;
+     for (imodel=ifound;imodel<clsdata->nChildModels;imodel++) {
+       clsdata->ChildModels[imodel] = clsdata->ChildModels[imodel+1];
+       clsdata->ChildComputeIsBlocked[imodel] = clsdata->ChildComputeIsBlocked[imodel+1];
+     }
+   }
+   if (IDEBUG>0) printf("CTA_ModelFac_DelModelInstance: End of function \n");
+   return CTA_OK;
+}
+
+#undef METHOD
+#define METHOD "SetBlock"
+/* Set block for own model if barrier falls within current compute interval */
+int CTAI_ModelFac_SetBlock(CTA_ModelClass hmodcl, CTA_Model hmodel){
+
+   /* Local variables */
+   CTAI_ModelClass *clsdata; /* Data associated to modelclass     */
+   int retval;               /* Return value of COSTA call        */
+   int imodel;               /* loop variable                     */
+
+   if (IDEBUG>0) printf("CTA_ModelFac_SetBlock: Start of function \n");
+
+   /* Get class data */
+   retval=CTA_Handle_Check((CTA_Handle) hmodcl,CTA_MODELCLASS);
+   if (retval!=CTA_OK) return retval;
+   retval=CTA_Handle_GetData((CTA_Handle) hmodcl,(void*) &clsdata);
+   if (retval!=CTA_OK) return retval;
+
+   for (imodel=0;imodel<clsdata->nChildModels;imodel++){
+      if (clsdata->ChildModels[imodel]==hmodel){
+         clsdata->ChildComputeIsBlocked[imodel]=CTA_TRUE;
+      }
+   }
+   if (IDEBUG>0) printf("CTA_ModelFac_SetBlock: End of function \n");
+   return CTA_OK;
+}
+
+#undef METHOD
+#define METHOD "GetBlock"
+/* Return value of clsdata->ChildComputeIsBlocked[hmodel] */
+int CTAI_ModelFac_GetBlock(CTA_ModelClass hmodcl, CTA_Model hmodel){
+
+   /* Local variables */
+   CTAI_ModelClass *clsdata; /* Data associated to modelclass     */
+   int retval;               /* Return value of COSTA call        */
+   int imodel;               /* loop variable                     */
+
+   if (IDEBUG>0) printf("CTA_ModelFac_GetBlock: Start of function \n");
+
+   /* Get class data */
+   retval=CTA_Handle_Check((CTA_Handle) hmodcl,CTA_MODELCLASS);
+   if (retval!=CTA_OK) return retval;
+   retval=CTA_Handle_GetData((CTA_Handle) hmodcl,(void*) &clsdata);
+   if (retval!=CTA_OK) return retval;
+
+   for (imodel=0;imodel<clsdata->nChildModels;imodel++){
+      if (clsdata->ChildModels[imodel]==hmodel){
+         return clsdata->ChildComputeIsBlocked[imodel];
+      }
+   }
+   CTA_WRITE_ERROR("Model was not found in list of Child Models\n");
+   return -1;
+}
+
+#undef METHOD
+#define METHOD "TimeStepAllModels"
+int CTAI_ModelFac_TimeStepAllModels(CTA_ModelClass hmodcl, CTA_Function *function, double tstart, double tstop){
+
+   /* Local variables */
+   CTAI_ModelClass *clsdata; /* Data associated to modelclass           */
+   CTA_Model hmodel;         /* Handle of model                         */
+   CTA_Time hstep;           /* timespan to compute                     */
+   double t1,t2;             /* Begin and end time of hstep             */
+   int retval;               /* Return value of COSTA call              */
+   int imodel,istep;         /* loop counters                           */
+   int nStep;                /* number of sub-intervals to step         */
+   int nblock;               /* number of models currently blocked      */
+   double tstep;             /* maximum interval that model may run     */
+
+   if (IDEBUG>0) printf("CTA_ModelFac_TimeStepAllModels: Start of function \n");
+
+   /* Get class data */
+   retval=CTA_Handle_Check((CTA_Handle) hmodcl,CTA_MODELCLASS);
+   if (retval!=CTA_OK) return retval;
+   retval=CTA_Handle_GetData((CTA_Handle) hmodcl,(void*) &clsdata);
+   if (retval!=CTA_OK) return retval;
+
+   /* Find number of blocked models */
+   nblock = 0;
+   for (imodel = 0;imodel < clsdata->nChildModels;imodel++){
+       nblock = nblock + clsdata->ChildComputeIsBlocked[imodel];
+   }
+   if (nblock==clsdata->nChildModels) {
+      /* all child models are blocked*/
+      if (IDEBUG>0) printf("CTA_ModelFac_TimeStepAllModels: All model instances are waiting\n");
+
+      /* determine number of barrier steps in compute interval */
+      tstep = clsdata->t_step;
+      nStep=(int) ((tstop-tstart)/tstep + 0.5);
+      if (IDEBUG>0) printf("CTA_ModelFac_TimeStepAllModels: number of timesteps: nStep = %d\n",nStep);
+
+      CTA_Time_Create(&hstep);
+
+      /* loop over all barrier intervals */
+      t1 = tstart;
+      for (istep=0; istep < nStep; istep++){
+         t2 = MIN(t1 + tstep,tstop);
+         retval=CTA_Time_SetSpan(hstep,t1,t2);
+         if (IDEBUG>0) printf("CTA_ModelFac_TimeStepAllModels: iStep=%d of nStep=%d tstep=%f\n",istep, nStep,tstep);
+         if (IDEBUG>0) printf("CTA_ModelFac_TimeStepAllModels: tstart=%f tstop=%f\n",t1,t2);
+
+         /* loop over all models */
+         for (imodel = 0;imodel < clsdata->nChildModels;imodel++){
+            if (IDEBUG>0) printf("model %d: from t=%f to t=%f\n",imodel,t1,t2);
+
+               /* Check type of handle */
+               hmodel = clsdata->ChildModels[imodel];
+               retval=CTA_Handle_Check((CTA_Handle) hmodel,CTA_MODEL);
+               if (retval!=CTA_OK) {
+                  CTA_WRITE_ERROR("Handle is not a cta_model handle");
+                  return retval;
+               }
+            retval = CTAI_Model_PerformTimesteps(hmodel,function,hstep,CTA_FALSE);
+         }
+         t1 = t2;
+      }
+
+      //reset barrier information for all child models
+      if (IDEBUG>0) {printf("CTA_ModelFac_TimeStepAllModels: barrier information is reset. \n");}
+      for (imodel = 0;imodel < clsdata->nChildModels;imodel++){
+         clsdata->ChildComputeIsBlocked[imodel] = CTA_FALSE;
+      }
+      CTA_Time_Free(&hstep);
+   }
+   if (IDEBUG>0) {printf("CTA_ModelFac_TimeStepAllModels: End of function \n");}
+   return CTA_OK;
+}
+
+int CTAI_ModelFac_SetBarrierData(CTA_ModelClass hmodcl, char *flag_barrier, char *t_step ) {
+
+   CTAI_ModelClass *clsdata; /* Data associated to modelclass           */
+   int retval;               /* Return value of COSTA call              */
+   double fac;               /* multiply factor                         */
+   double tval;              /* help variable                           */
+
+   if (IDEBUG>0) {
+      printf("CTA_ModelFac_SetBarrierData: Start of function \n");
+      printf("flag = %s, t_step = %s \n", flag_barrier, t_step);
+   }
+
+   /* Get class data containing all function pointers */
+   retval=CTA_Handle_Check((CTA_Handle) hmodcl,CTA_MODELCLASS);
+   if (retval!=CTA_OK) return retval;
+   retval=CTA_Handle_GetData((CTA_Handle) hmodcl,(void*) &clsdata);
+   if (retval!=CTA_OK) return retval;
+
+   if (clsdata->flag_barrier) free(clsdata->flag_barrier);
+
+   clsdata->flag_barrier = CTA_FALSE;
+   clsdata->t_step       = 0.0;
+
+   if (flag_barrier != NULL) {
+      if ( (0 == strcmp("true", flag_barrier)) && t_step ) {
+         /* Set flag_barrier */
+         clsdata->flag_barrier=CTA_Malloc(sizeof(char)*(strlen(flag_barrier)+1));
+         strcpy(clsdata->flag_barrier,flag_barrier);
+
+         /* Set t_step.                             */
+         /* Possibly remove units MJD/HOUR/MIN/SEC */
+         fac = 1.0;
+         if (strlen(t_step)>4) {
+             if (0==strcasecmp( "HOUR",(t_step+strlen(t_step)-4))) {
+               t_step[strlen(t_step)-4]='\0';
+               fac = 1.0/24;
+             }
+         }
+         if (strlen(t_step)>3) {
+            if (0==strcasecmp( "MJD",(t_step+strlen(t_step)-3))) {
+              t_step[strlen(t_step)-3]='\0';
+            }
+            if (0==strcasecmp( "SEC",(t_step+strlen(t_step)-3))) {
+              t_step[strlen(t_step)-3]='\0';
+              fac = 1.0/(24*60*60);
+            }
+            if (0==strcasecmp( "MIN",(t_step+strlen(t_step)-3))) {
+              t_step[strlen(t_step)-3]='\0';
+              fac = 1.0/(24*60);
+            }
+         }
+         tval = atof(t_step);
+         clsdata->t_step=fac*tval;
+      } else {
+         if (0 == strcmp("false", flag_barrier)) {
+            /* all ok no barrier info is specified */
+         } else {
+           CTA_WRITE_ERROR("Error: Incomplete specification of barrier information.\n");
+           exit(-1);
+        }
+      }
+   }
+   if (IDEBUG>0) {
+     printf("CTA_ModelFac_SetBarrierData: End of function \n");
+     printf("clsdata->flag_barrier = %s, clsdata->t_step = %f \n", clsdata->flag_barrier, clsdata->t_step);
+   }
+   return CTA_OK;
+}
+
+int CTAI_ModelFac_SetParallelData(CTA_ModelClass hmodcl, char *implements, char *parallel_type, char *spawn_workers, char *nproc, char *ntimes, char *dumProcs ){
+
+   CTAI_ModelClass *clsdata;
+   int retval;
+
+   /* Get class data containing all function pointers */
+   retval=CTA_Handle_Check((CTA_Handle) hmodcl,CTA_MODELCLASS);
+   if (retval!=CTA_OK) return retval;
+   retval=CTA_Handle_GetData((CTA_Handle) hmodcl,(void*) &clsdata);
+   if (retval!=CTA_OK) return retval;
+
+   if (clsdata->parallel_type) free(clsdata->parallel_type);
+   if (clsdata->spawn_workers) free(clsdata->spawn_workers);
+   if (clsdata->nproc)         free(clsdata->nproc);
+   if (clsdata->ntimes)        free(clsdata->ntimes);
+   if (clsdata->implements)    free(clsdata->implements);
+   if (clsdata->dumProcs)      free(clsdata->dumProcs);
+
+   clsdata->parallel_type =NULL;
+   clsdata->spawn_workers =NULL;
+   clsdata->nproc         =NULL;
+   clsdata->ntimes        =NULL;
+   clsdata->implements    =NULL;
+   clsdata->dumProcs      =NULL;
+
+   /* Set implements */
+   retval=CTAI_ModelFac_SetImplements(hmodcl, implements);
+
+//   printf("%p, %p, %p, %p\n",parallel_type, nproc, ntimes, implements);
+   if (parallel_type && nproc && ntimes && implements) {
+
+      /* Set parallel_type */
+      clsdata->parallel_type=CTA_Malloc(sizeof(char)*(strlen(parallel_type)+1));
+      strcpy(clsdata->parallel_type,parallel_type);
+
+      /* Set spawn_workers */
+      clsdata->spawn_workers=CTA_Malloc(sizeof(char)*(strlen(spawn_workers)+1));
+      strcpy(clsdata->spawn_workers,spawn_workers);
+
+      /* Set nproc */
+      clsdata->nproc=CTA_Malloc(sizeof(char)*(strlen(nproc)+1));
+      strcpy(clsdata->nproc,nproc);
+
+      /* Set ntimes */
+      clsdata->ntimes=CTA_Malloc(sizeof(char)*(strlen(ntimes)+1));
+      strcpy(clsdata->ntimes,ntimes);
+
+      if (dumProcs){
+         clsdata->dumProcs=CTA_Malloc(sizeof(char)*(strlen(dumProcs)+1));
+         strcpy(clsdata->dumProcs,dumProcs);
+      }
+   } else if (!parallel_type && !nproc && !ntimes) {
+      /* all ok no parallel info is specified */
+   } else {
+      printf("Error: Incomplete specification of parallel information of model_factory\n");
+      printf("All or none of the fields  'parallel_type', 'nproc', 'ntimes' must be specified in the input\n");
+      printf("specified are:\n");
+      if (parallel_type) printf("parallel_type='%s'\n",parallel_type);
+      if (spawn_workers) printf("spawn_workers='%s'\n",spawn_workers);
+      if (nproc)         printf("nproc        ='%s'\n",nproc);
+      if (ntimes)        printf("ntimes       ='%s'\n",ntimes);
+      if (implements)    printf("implements   ='%s'\n",implements);
+      exit(-1);
+   }
+
+   return CTA_OK;
+
+}
+
+int CTAI_ModelFac_GetParallelData(CTA_ModelClass hmodcl, char **implements, char **parallel_type, char **spawn_workers, char **nproc, char **ntimes, char **dumProcs ){
+
+   CTAI_ModelClass *clsdata;
+   int retval;
+
+   /* Get class data containing all function pointers */
+   retval=CTA_Handle_Check((CTA_Handle) hmodcl,CTA_MODELCLASS);
+   if (retval!=CTA_OK) return retval;
+   retval=CTA_Handle_GetData((CTA_Handle) hmodcl,(void*) &clsdata);
+   if (retval!=CTA_OK) return retval;
+
+   *parallel_type = clsdata->parallel_type;
+   *spawn_workers = clsdata->spawn_workers;
+   *nproc         = clsdata->nproc;
+   *ntimes        = clsdata->ntimes;
+   *implements    = clsdata->implements;
+   *dumProcs      = clsdata->dumProcs;
+
+   return CTA_OK;
+
+}
+
+int CTAI_ModelFac_GetBarrierData(CTA_ModelClass hmodcl, char **flag_barrier, double *t_step ) {
+
+   CTAI_ModelClass *clsdata;
+   int retval;
+
+   /* Get class data containing all function pointers */
+   retval=CTA_Handle_Check((CTA_Handle) hmodcl,CTA_MODELCLASS);
+   if (retval!=CTA_OK) return retval;
+
+   retval=CTA_Handle_GetData((CTA_Handle) hmodcl,(void*) &clsdata);
+   if (retval!=CTA_OK) return retval;
+
+   *flag_barrier = clsdata->flag_barrier;
+   *t_step       = clsdata->t_step;
+
+   if (IDEBUG>0) {
+      printf("CTA_ModelFac_GetBarrierData: flag_barrier = %s, t_step = %f \n", *flag_barrier, *t_step);
+   }
+   return CTA_OK;
+
+}
+
+const char *CTAI_ModelFac_GetImplements(CTA_ModelClass hmodcl){
+
+   CTAI_ModelClass *clsdata;
+   int retval;
+
+   /* Get class data containing all function pointers */
+   retval=CTA_Handle_Check((CTA_Handle) hmodcl,CTA_MODELCLASS);
+   if (retval!=CTA_OK) return NULL;
+   retval=CTA_Handle_GetData((CTA_Handle) hmodcl,(void*) &clsdata);
+   if (retval!=CTA_OK) return NULL;
+
+   return clsdata->implements;
+}
+
+#undef METHOD
+#define METHOD "New"
+int CTA_ModelFactory_New(const char *fName, CTA_ModelClass *modelClass){
+
+   int retval;
+   CTA_String sFile;
+
+   CTA_Tree   hTree;
+   CTA_ModelClass modelClassFromTree;
+
+   /* Read the model configuration file */
+   if (IDEBUG>0) {printf("CTA_ModelFactory_New: Start of function \n");}
+   if (IDEBUG>0) {printf("CTA_ModelFactory_New: name of configuration file: %s\n",fName);}
+   retval=CTA_String_Create(&sFile);
+   retval=CTA_String_Set(sFile,fName);
+   retval=CTA_XML_Read(sFile, &hTree);
+   retval=CTA_String_Free(&sFile);
+
+   /* Get model class from tree */
+   if (IDEBUG>0) {printf("CTA_ModelFactory_New: Get the modelclass from the tree \n");}
+   retval=CTA_Tree_GetItem(hTree,1,&modelClassFromTree);
+   if (retval!=CTA_OK){
+	  char message[1024];
+      sprintf(message,"CTA_ModelFactory_New cannot get first item from input-tree \n");
+	  CTA_WRITE_ERROR(message);
+      return retval;
+   }
+
+   /* Duplicate the modelclass since it will be deleted
+      when the tree is deleted*/
+   if (IDEBUG>0) {printf("CTA_ModelFactory_New: Duplicate this model class \n");}
+   retval=CTAI_Model_DuplicateClass(modelClassFromTree, modelClass);
+   if (retval!=CTA_OK){
+      char message[1024];
+	  sprintf(message,"CTA_ModelFactory_New cannot duplicate class from input-file\n");
+      CTA_WRITE_ERROR(message);
+	  return retval;
+   }
+   /* Delete the input-tree */
+   if (IDEBUG>0) {printf("CTA_ModelFactory_New: Delete input tree \n");}
+   retval=CTA_Tree_Free(&hTree);
+   if (retval!=CTA_OK){
+      char message[1024];
+	  sprintf(message,"CTA_ModelFactory_New cannot free input tree\n");
+      CTA_WRITE_ERROR(message);
+	  return retval;
+   }
+
+   if (IDEBUG>0) {printf("CTA_ModelFactory_New: end of function class handle is %d\n",*modelClass);}
+   return CTA_OK;
+}
+
+int CTA_Model_DefineClass(
+   const char *name,
+   const CTA_Func h_func[CTA_MODEL_NUMFUNC],
+   CTA_ModelClass     *hmodcl
+   ){
+
+   CTAI_ModelClass *data;
+   int retval;
+   int i;
+
+   /* Allocate new Vector object */
+   data=CTA_Malloc(sizeof(CTAI_ModelClass));
+   data->implements            = NULL;
+   data->parallel_type         = NULL;
+   data->spawn_workers         = NULL;
+   data->nproc                 = NULL;
+   data->ntimes                = NULL;
+   data->dumProcs              = NULL;
+   data->flag_barrier          = NULL;
+   data->t_step                = 0.0;
+   data->nChildModels          = 0;
+   data->ChildModels           = NULL;
+   data->ChildComputeIsBlocked = NULL;
+
+   for (i=0;i<CTA_MODEL_NUMFUNC;i++){
+      data->functions[i]=h_func[i];
+   }
+   data->implements=CTA_Malloc(sizeof(char)*(strlen("unknown")+1));
+   strcpy(data->implements,"unknown");
+
+   // Allocate new handle and return eror when unsuccesfull
+   retval=CTA_Handle_Create(name,CTA_MODELCLASS,data,hmodcl);
+   return retval;
+}
+
+
+int CTA_Model_DefineClass2(
+   const char *name,
+   const char *implements,
+   const CTA_Func h_func[CTA_MODEL_NUMFUNC],
+   CTA_ModelClass     *hmodcl
+   ){
+
+   CTAI_ModelClass *data;
+   int retval;
+   int i;
+
+   if (IDEBUG>0) printf("CTA_Model_DefineClass2\n");
+
+   /* Allocate new Vector object */
+   data=CTA_Malloc(sizeof(CTAI_ModelClass));
+   data->implements              = NULL;
+   data->parallel_type           = NULL;
+   data->spawn_workers           = NULL;
+   data->nproc                   = NULL;
+   data->ntimes                  = NULL;
+   data->dumProcs                = NULL;
+   data->flag_barrier            = NULL;
+   data->t_step                  = 0.0;
+   data->nChildModels            = 0;
+   data->ChildModels             = NULL;
+   data->ChildComputeIsBlocked   = NULL;
+
+   for (i=0;i<CTA_MODEL_NUMFUNC;i++){
+      data->functions[i]=h_func[i];
+   }
+
+   /* Implements model */
+   if (implements) {
+      data->implements=CTA_Malloc(sizeof(char)*(strlen(implements)+1));
+      strcpy(data->implements,implements);
+   } else {
+      data->implements=CTA_Malloc(sizeof(char)*(strlen("unknown")+1));
+      strcpy(data->implements,"unknown");
+   }
+
+   // Allocate new handle and return error when unsuccesfull
+   retval=CTA_Handle_Create(name,CTA_MODELCLASS,data,hmodcl);
+   return retval;
+}
+
+
+/* Interfacing with Fortran */
+CTAEXPORT void CTA_MODEL_DEFINECLASS_F77(char *name, int *h_func, int *hmodcl, int *ierr, int len_name){
+   char  *c_name;
+
+   /* create a c-string equivalent to name */
+   c_name=CTA_Malloc((len_name+1)*sizeof(char));
+   CTA_fstr2cstr(name,c_name,len_name);
+
+   *ierr=CTA_Model_DefineClass(c_name, (CTA_Func*) h_func, (CTA_ModelClass*) hmodcl);
+   free(c_name);
+}
+
+
+/** \brief Create a COSTA modell class from XML
+*          (load from methods from dynamic load library).
+*
+*  \param cur_node  I  Current XML node
+*  \return             Handle to create or CTA_NULL in case of an error.
+*/
+CTA_ModelClass CTAI_XML_CreateModelClass(xmlNode *cur_node) {
+
+   CTA_Func      hfunc;                  /* the new function */
+   xmlChar       *id = NULL;             /* id of function in XML-tree */
+   xmlChar       *clsname = NULL;        /* (lookup) name of the model class */
+   xmlChar       *flag_barrier = NULL;   /* flag indicating that model cannot be advanced unlimited*/
+   xmlChar       *t_step = NULL;         /* maximum number of time-steps that model may perform */
+   xmlChar       *implements = NULL;     /* Name of model that is implemented */
+   xmlChar       *parallel_type = NULL;  /* Type of parallelism that is used */
+   xmlChar       *spawn_workers = NULL;  /* Spawn worker processes (1) yes/ (0) no */
+   xmlChar       *nproc         = NULL;  /* number of processes for a single instance */
+   xmlChar       *ntimes        = NULL;  /* number of times the workers will be assinged to processors */
+   xmlChar       *dumProcs      = NULL;  /* ranks of the dummy processes in the worker-worker model */
+   int           retval;                 /* return status of creation       */
+   CTA_Func      h_func[CTA_MODEL_NUMFUNC]; /* List of functions */
+   xmlNode       *func_node = NULL;      /* values of children nodes */
+   int i;
+   CTA_ModelClass hmodcl;                /* function class */
+   CTA_ModelClass hmodcl_old;
+   const char *name;
+   CTA_String   sclsname;
+
+   if (IDEBUG>0) printf("CTAI_XML_CreateModelClass: Start of function\n");
+   /* Parse this node's attributes */
+   /* Get id */
+   id = xmlGetProp(cur_node, CTAI_XML_ID);
+
+   /* Get class name */
+   clsname = xmlGetProp(cur_node, CTAI_XML_NAME);
+
+   /* Get implements_model */
+   implements = xmlGetProp(cur_node, CTAI_XML_IMPLEMENTS);
+
+   /* parallel options: */
+   /*-number of processes */
+   nproc = xmlGetProp(cur_node, CTAI_XML_NPROC);
+
+   /*-type of parallel run */
+   parallel_type = xmlGetProp(cur_node, CTAI_XML_PARALLEL_TYPE);
+
+   /*-type of parallel run */
+   spawn_workers = xmlGetProp(cur_node, CTAI_XML_SPAWN_WORKERS);
+
+   /*-Number of times the parallel group must be created */
+   ntimes = xmlGetProp(cur_node, CTAI_XML_NTIMES);
+
+   /*-Process numbers (ranks) of dummy processes */
+   dumProcs = xmlGetProp(cur_node, CTAI_XML_DUMPROCS);
+
+   /* Flag for barrier moment */
+   flag_barrier = xmlGetProp(cur_node, CTAI_XML_FLAG_BARRIER);
+
+   /* Get T_step */
+   t_step = xmlGetProp(cur_node, CTAI_XML_T_STEP);
+
+   /* Check whether this is a known model-class */
+   CTA_String_Create(&sclsname);
+
+   CTA_String_Set(sclsname,(char *) clsname);
+
+   retval=CTA_Handle_Find(sclsname, CTA_MODELCLASS, &hmodcl_old);
+
+   CTA_String_Free(&sclsname);
+
+   if (retval==CTA_OK) {
+      /* duplicate model class */
+      CTAI_Model_DuplicateClass(hmodcl_old, &hmodcl);
+
+      /* Set implements and any parallel information set for this model? */
+      CTAI_ModelFac_SetParallelData(hmodcl, (char *) implements, (char *) parallel_type, (char *) spawn_workers, (char *) nproc, (char *) ntimes, (char *) dumProcs);
+
+      /* model-class is known */
+      if (IDEBUG>0) printf("CTAI_XML_CreateModelClass: found modelclass '%s'\n",clsname);
+      if (IDEBUG>0) printf("CTAI_XML_CreateModelClass: duplication is created \n");
+
+   } else {
+      /* Set all function handles to CTA_NULL */
+      for (i=0;i<CTA_MODEL_NUMFUNC;i++){
+         h_func[i]=CTA_NULL;
+      }
+
+      /* Load all functions that are specified in input */
+      for (func_node = cur_node->children; func_node;
+              func_node = func_node->next) {
+         if (0 == strcmp("CTA_FUNCTION", (char *) func_node->name)){
+            hfunc=CTAI_XML_CreateFunc(func_node);
+            if (hfunc!=CTA_NULL){
+               name = CTAI_Handle_GetName(hfunc);
+               if (0 == strcmp("CTA_MODEL_CREATE_SIZE", name)){
+                  h_func[I_CTA_MODEL_CREATE_SIZE    ]=hfunc;
+               } else if (0 == strcmp("CTA_MODEL_CREATE_INIT", name)){
+                  h_func[I_CTA_MODEL_CREATE_INIT    ]=hfunc;
+               } else if (0 == strcmp("CTA_MODEL_FREE", name)){
+                  h_func[I_CTA_MODEL_FREE           ]=hfunc;
+               } else if (0 == strcmp("CTA_MODEL_COMPUTE", name)){
+                  h_func[I_CTA_MODEL_COMPUTE        ]=hfunc;
+               } else if (0 == strcmp("CTA_MODEL_SET_STATE", name)){
+                  h_func[I_CTA_MODEL_SET_STATE      ]=hfunc;
+               } else if (0 == strcmp("CTA_MODEL_GET_STATE", name)){
+                  h_func[I_CTA_MODEL_GET_STATE      ]=hfunc;
+               } else if (0 == strcmp("CTA_MODEL_AXPY_STATE", name)){
+                  h_func[CTA_MODEL_AXPY_STATE     ]=hfunc;
+               } else if (0 == strcmp("CTA_MODEL_AXPY_MODEL", name)){
+                  h_func[CTA_MODEL_AXPY_MODEL     ]=hfunc;
+               } else if (0 == strcmp("CTA_MODEL_SET_FORC", name)){
+                  h_func[CTA_MODEL_SET_FORC       ]=hfunc;
+               } else if (0 == strcmp("CTA_MODEL_GET_FORC", name)){
+                  h_func[CTA_MODEL_GET_FORC       ]=hfunc;
+               } else if (0 == strcmp("CTA_MODEL_AXPY_FORC", name)){
+                  h_func[CTA_MODEL_AXPY_FORC      ]=hfunc;
+               } else if (0 == strcmp("CTA_MODEL_SET_PARAM", name)){
+                  h_func[CTA_MODEL_SET_PARAM      ]=hfunc;
+               } else if (0 == strcmp("CTA_MODEL_GET_PARAM", name)){
+                 h_func[CTA_MODEL_GET_PARAM      ]=hfunc;
+               } else if (0 == strcmp("CTA_MODEL_AXPY_PARAM", name)){
+                 h_func[CTA_MODEL_AXPY_PARAM     ]=hfunc;
+               } else if (0 == strcmp("CTA_MODEL_GET_NOISE_COUNT", name)){
+                 h_func[CTA_MODEL_GET_NOISE_COUNT]=hfunc;
+               } else if (0 == strcmp("CTA_MODEL_GET_NOISE_COVAR", name)){
+                 h_func[CTA_MODEL_GET_NOISE_COVAR]=hfunc;
+               } else if (0 == strcmp("CTA_MODEL_GET_OBSVALUES", name)){
+                 h_func[CTA_MODEL_GET_OBSVALUES  ]=hfunc;
+               } else if (0 == strcmp("CTA_MODEL_ANNOUNCE_OBSVALUES", name)){
+                 h_func[CTA_MODEL_ANNOUNCE_OBSVALUES  ]=hfunc;
+               } else if (0 == strcmp("CTA_MODEL_GET_OBSSELECT", name)){
+                 h_func[CTA_MODEL_GET_OBSSELECT  ]=hfunc;
+               } else if (0 == strcmp("CTA_MODEL_ADD_NOISE", name)){
+                 h_func[CTA_MODEL_ADD_NOISE      ]=hfunc;
+               } else if (0 == strcmp("CTA_MODEL_IMPORT", name)){
+                 h_func[I_CTA_MODEL_IMPORT      ]=hfunc;
+               } else if (0 == strcmp("CTA_MODEL_EXPORT", name)){
+                 h_func[I_CTA_MODEL_EXPORT      ]=hfunc;
+               } else if (0 == strcmp("CTA_MODEL_GET_STATESCALING", name)){
+                 h_func[CTA_MODEL_GET_STATESCALING]=hfunc;
+               } else if (0 == strcmp("CTA_MODEL_GETTIMEHORIZON", name)){
+                 h_func[CTA_MODEL_GET_TIMEHORIZON]=hfunc;
+               } else if (0 == strcmp("CTA_MODEL_GETCURRENTTIME", name)){
+                 h_func[CTA_MODEL_GET_CURRENTTIME]=hfunc;
+               } else if (0 == strcmp("CTA_MODEL_GETOBSLOCALIZATION", name)){
+                 h_func[I_CTA_MODEL_GETOBSLOCALIZATION]=hfunc;
+               } else if (0 == strcmp("CTA_MODEL_SAVEINTERNALSTATE", name)){
+                 h_func[CTA_MODEL_SAVE_INTERNALSTATE]=hfunc;
+               } else if (0 == strcmp("CTA_MODEL_RESTOREINTERNALSTATE", name)){
+                 h_func[CTA_MODEL_RESTORE_INTERNALSTATE]=hfunc;
+               } else if (0 == strcmp("CTA_MODEL_RELEASEINTERNALSTATE", name)){
+                 h_func[CTA_MODEL_RELEASE_INTERNALSTATE]=hfunc;
+               } else if (0 == strcmp("CTA_MODEL_SAVEPERSISTENTSTATE", name)){
+                 h_func[CTA_MODEL_SAVE_PERSISTENTSTATE]=hfunc;
+               } else if (0 == strcmp("CTA_MODEL_LOADPERSISTENTSTATE", name)){
+                 h_func[CTA_MODEL_LOAD_PERSISTENTSTATE]=hfunc;
+               } else if (0 == strcmp("CTA_MODEL_GET_NUMDOMAINS", name)){
+                 h_func[CTA_MODEL_GET_NUMDOMAINS]=hfunc;
+               } else if (0 == strcmp("CTA_MODEL_GET_OBSSELECTOR", name)){
+                 h_func[CTA_MODEL_GET_OBSSELECTOR]=hfunc;
+               } else if (0 == strcmp("CTA_MODEL_GET_OBSLOCALIZATIONDOMAIN", name)){
+                 h_func[CTA_MODEL_GET_OBSLOCALIZATIONDOMAIN]=hfunc;
+               } else if (0 == strcmp("CTA_MODEL_GET_STATEDOMAIN", name)){
+                 h_func[CTA_MODEL_GET_STATEDOMAIN]=hfunc;
+               } else if (0 == strcmp("CTA_MODEL_AXPY_STATEDOMAIN", name)){
+                 h_func[CTA_MODEL_AXPY_STATEDOMAIN]=hfunc;
+               } else {
+                  printf("CTAI_XML_CreateModelClass :Warning found unknown node %s\n", name);
+               }
+            }
+         }
+      }
+      /* Create new function class */
+      retval=CTA_Model_DefineClass2((char *) clsname, (char *) implements, h_func, &hmodcl);
+      /* Set barrier information */
+      CTAI_ModelFac_SetBarrierData(hmodcl, (char *) flag_barrier, (char *) t_step);
+      /* Set parallel info */
+      CTAI_ModelFac_SetParallelData(hmodcl, (char *) implements, (char *) parallel_type, (char *) spawn_workers, (char *) nproc, (char *) ntimes, (char *) dumProcs);
+
+      /* Set id (=name) of handle */
+      CTAI_Handle_SetName(hmodcl, (char *) id);
+
+   }
+   CTAI_Handle_SetName(hmodcl, (char *) id);
+   xmlFree(id);
+   xmlFree(clsname);
+   xmlFree(implements);
+   xmlFree(parallel_type);
+   xmlFree(spawn_workers);
+   xmlFree(nproc);
+   xmlFree(ntimes);
+   xmlFree(dumProcs);
+   xmlFree(flag_barrier);
+   xmlFree(t_step);
+
+   if (IDEBUG>0) printf("CTAI_XML_CreateModelClass: End of function\n");
+   return hmodcl;
+}
+
+
+
diff --git a/costa/native/cta/src/cta_model_utilities.c b/costa/native/cta/src/cta_model_utilities.c
new file mode 100644
index 000000000..3fef1b1f1
--- /dev/null
+++ b/costa/native/cta/src/cta_model_utilities.c
@@ -0,0 +1,78 @@
+/*
+$URL: https://repos.deltares.nl/repos/openda/public/trunk/core/native/src/cta/cta_model_utilities.c $
+$Revision: 2288 $, $Date: 2011-05-11 16:18:21 +0200 (Wed, 11 May 2011) $
+
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+#include <string.h>
+#include <stdio.h>
+#include "f_cta_utils.h"
+#include "cta_errors.h"
+#include "cta_model_utilities.h"
+#include "cta_xml.h"
+#include "cta_message.h"
+
+#define CTA_MODEL_UTIL_INPUTTREE_F77  F77_CALL(cta_model_util_inputtree,CTA_MODEL_UTIL_INPUTTREE)
+
+#define CLASSNAME "CTA_Model_Utilities" 
+/* Several utility routines that can be of usage when creating COSTA model components */
+
+#undef METHOD
+#define METHOD "Util_Inputtree"
+int CTA_Model_Util_InputTree(CTA_Handle hinput, CTA_Tree *tinput, int *cleanup){
+  int ierr; /*Return value of a costa method */
+  int retval;
+  CTA_Handle dt;
+
+  /* check handle it must be a tree or an input-filename */
+  retval = CTA_Handle_GetDatatype(hinput, &dt);
+  if (retval != CTA_OK) {return retval;
+  } else {
+      if (dt == CTA_TREE) {
+
+        //printf("ik denk dat dit een boom is \n");
+        /* It is a tree only copy handle of root of tree */
+        *tinput=hinput;
+        *cleanup=CTA_FALSE;
+        return CTA_OK;
+      } else if (dt == CTA_STRING) {
+        /* It is a string. Parse input-file with this name */
+        // printf("ik denk dat dit een cta-string is \n");
+        ierr=CTA_XML_Read(hinput, tinput);
+        if (ierr!=CTA_OK) {
+          CTA_WRITE_ERROR("xml_read failed");
+          return ierr;
+        }
+        *cleanup=CTA_TRUE;
+        return CTA_OK;
+      } else {
+        /* It is nor a tree nor a string */
+        char message[1024];
+        sprintf(message,"Input is neither tree nor string! \n");
+        CTA_WRITE_ERROR(message);
+        return CTA_INPUT_OBJECT_NOT_SUPPORTED;
+      }
+  }
+}
+
+/* Interfacing with Fortran */
+
+CTAEXPORT void CTA_MODEL_UTIL_INPUTTREE_F77(int *hinput, int *tinput, int *cleanup, int *ierr){
+  *ierr= CTA_Model_Util_InputTree((CTA_Handle) *hinput, (CTA_Tree*) tinput, cleanup);
+}
+
diff --git a/costa/native/cta/src/cta_obsdescr.c b/costa/native/cta/src/cta_obsdescr.c
new file mode 100644
index 000000000..39f963b92
--- /dev/null
+++ b/costa/native/cta/src/cta_obsdescr.c
@@ -0,0 +1,576 @@
+/*
+$URL: https://repos.deltares.nl/repos/openda/public/trunk/core/native/src/cta/cta_obsdescr.c $
+$Revision: 3730 $, $Date: 2012-12-19 11:07:29 +0100 (Wed, 19 Dec 2012) $
+
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+#include <stdio.h>
+#include "f_cta_utils.h"
+#include "cta_mem.h"
+#include "cta_sobs.h"
+#include "cta_obsdescr.h"
+#include "cta_errors.h"
+#include "cta_handles.h"
+#include "ctai.h"
+#include "cta_message.h"
+
+#define CTA_OBSDESCR_DEFINECLASS_F77          F77_CALL(cta_obsdescr_defineclass,CTA_OBSDESCR_DEFINECLASS)
+#define CTA_OBSDESCR_CREATE_F77               F77_CALL(cta_obsdescr_create,CTA_OBSDESCR_CREATE)
+#define CTA_OBSDESCR_CHECK_SOBS_F77           F77_CALL(cta_obsdescr_check_sobs,CTA_OBSDESCR_CHECK_SOBS)
+#define CTA_OBSDESCR_GET_VALUEPROPERTIES_F77  F77_CALL(cta_obsdescr_get_valueproperties,CTA_OBSDESCR_GET_VALUEPROPERTIES)
+#define CTA_OBSDESCR_GET_PROPERTY_KEYS_F77    F77_CALL(cta_obsdescr_get_propertykeys,CTA_OBSDESCR_GET_PROPERTYKEYS)
+#define CTA_OBSDESCR_PROPERTY_COUNT_F77       F77_CALL(cta_obsdescr_property_count,CTA_OBSDESCR_PROPERTY_COUNT)
+#define CTA_OBSDESCR_OBSERVATION_COUNT_F77    F77_CALL(cta_obsdescr_observation_count,CTA_OBSDESCR_OBSERVATION_COUNT)
+#define CTA_OBSDESCR_EXPORT_F77               F77_CALL(cta_obsdescr_export,CTA_OBSDESCR_EXPORT)
+#define CTA_OBSDESCR_FREE_F77                 F77_CALL(cta_obsdescr_free,CTA_OBSDESCR_FREE)
+
+#define CLASSNAME "CTA_Obsdescr"
+
+/* Struct holding all data associated to an COSTA Vector */
+typedef struct {
+CTA_Func functions[I_CTA_OBSDESCR_NUMFUNC];
+} CTAI_ObsDescrClass; // A ObsDescrClass contains a list of the member-functions
+
+
+typedef struct {
+CTA_Func functions[I_CTA_OBSDESCR_NUMFUNC]; // See cta_obsdescr.h for a list of
+                           //     available stochobs-functions
+CTA_ObsDescrClass hdescrcl; // ObsDescr-class
+void *data;                // pointer to the implementation-specific data.
+//CTA_StochObs hsobs;        // The observer of which this is a description
+} CTAI_ObsDescr;
+
+
+
+#undef METHOD
+#define METHOD "DefineClass"
+int CTA_ObsDescr_DefineClass(
+   // INPUTS:
+      const char *name,                // Name of the new stochobs class
+      const CTA_Func h_func[I_CTA_OBSDESCR_NUMFUNC],  // function handles to
+                                       // the implementations of the
+                                       // stochobs-class' functions.
+   // OUTPUTS:
+      CTA_ObsDescrClass  *hdescrcl)    // The (handle to) the new
+                                       //   observation descriptor-class
+{
+
+   CTAI_ObsDescrClass *data;
+   int retval;
+   int i;
+
+   /* Allocate new ObsDescr object */
+   data=CTA_Malloc(sizeof(CTAI_ObsDescrClass));
+
+   for (i=0;i<I_CTA_OBSDESCR_NUMFUNC;i++)
+      {data->functions[i]=h_func[i];}
+
+   // Allocate new handle
+   retval=CTA_Handle_Create(name,CTA_OBSDESCRCLASS,data,hdescrcl);
+   retval=CTA_Handle_GetData((CTA_Handle) *hdescrcl,(void**) &data);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+   // return error when unsuccesfull
+   return retval;
+}
+
+
+int CTAI_ObsDescr_member_function(
+        // INPUTS
+        CTA_ObsDescr hdescr,    /* Handle of the stochastic observer of
+                                  which a member function is wanted */
+        int member,            /* Code of the member function */
+        // OUTPUT
+        CTAI_ObsDescr **descr,      /* All data of obs-descr hdescr  */
+        CTA_Function **memfun  /* Member-Function pointer */
+)
+{
+   int retval;
+
+   if (hdescr==CTA_NULL){
+      printf("CTAI_ObsDescr_member_function WARNING NULL handle\n");
+   }
+   /* Check that the given handle is indeed an observation description */
+   retval=CTA_Handle_Check((CTA_Handle) hdescr, CTA_OBSDESCR);
+   if (retval!=CTA_OK) return retval;
+
+   /* Get pointer to struct with observation description data */
+   retval=CTA_Handle_GetData((CTA_Handle) hdescr,(void**) descr);
+   if (retval!=CTA_OK) return retval;
+
+   /* Get pointer to implementation of this function */
+   retval=CTA_Func_GetFunc((*descr)->functions[member],memfun);
+   return retval;
+}
+
+
+#undef METHOD
+#define METHOD "Create"  
+int CTA_ObsDescr_Create(
+    // INPUTS:
+        CTA_ObsDescrClass hdescrcl,       // class of observer hsobs
+        CTA_Handle usrdat,           // Data of the stochastic observer for which
+                              //   a descriptor is to be created
+    // OUTPUTS:
+        CTA_ObsDescr *hdescr)       // The new COSTA-stochastic observer
+                                   // (handle)
+{
+   CTAI_ObsDescr *descr;
+   int memsize;
+   int retval;
+   CTAI_ObsDescrClass *clsdata;
+   CTA_Function *my_Create_Size, *my_Create_Init;
+   int i;
+
+   /* Get class data containing all function pointers */
+   retval=CTA_Handle_Check((CTA_Handle) hdescrcl,CTA_OBSDESCRCLASS);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a CTA_OBSDESCRCLASS handle");
+       return retval;
+   }
+
+   retval=CTA_Handle_GetData((CTA_Handle) hdescrcl,(void**) &clsdata);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+
+   /* determine size of data object (CTA_OBSDESCR_CREATE_SIZE)*/
+   retval=CTA_Func_GetFunc(clsdata->functions[I_CTA_OBSDESCR_CREATE_SIZE],
+                           &my_Create_Size);
+
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot get function CTA_OBSDESCR_CREATE_SIZE");
+	   return retval;
+   }
+
+   my_Create_Size(&memsize,&retval);
+   if (retval) {
+	   CTA_WRITE_ERROR("Error in Create_Size");
+	   return retval;
+   }
+
+   /* allocate memory for new observation description object */
+   descr=CTA_Malloc(sizeof(CTAI_ObsDescr));
+   descr->data=CTA_Malloc(memsize);
+
+   /* copy function pointers */
+   for (i=0;i<I_CTA_OBSDESCR_NUMFUNC;i++){
+      descr->functions[i]=clsdata->functions[i];
+   }
+
+   /* set other general information */
+   descr->hdescrcl=hdescrcl;
+//   descr->hsobs=hsobs;
+
+   /* Initialise and fill new obsdescr */
+   retval=CTA_Func_GetFunc(clsdata->functions[I_CTA_OBSDESCR_CREATE_INIT],
+                           &my_Create_Init);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot get function CTA_OBSDESCR_CREATE_INIT");
+	   return retval;
+   }
+
+   /* Allocate new handle and return error when unsuccesfull */
+   retval=CTA_Handle_Create("obsdescr",CTA_OBSDESCR,descr,hdescr);
+   if (retval) {
+	   CTA_WRITE_ERROR("Cannot create handle");
+	   return retval;
+   }
+
+   my_Create_Init(hdescr, descr->data, &usrdat, &retval);
+   if (retval!=CTA_OK) {
+       printf("error in create init: %d \n",retval);  
+	   CTA_WRITE_ERROR("Error in Create Init");
+	   return retval;
+   }
+
+   return CTA_OK;
+}
+
+#undef METHOD
+#define METHOD "CreateSel"
+int CTA_ObsDescr_CreateSel( CTA_StochObs hobsdescr, CTA_String selection,
+                            CTA_RelTable reltab, CTA_StochObs *hobsdescrout)
+{
+   /* Local variables */
+   int retval;                 /* Return value of COSTA call    */
+   CTAI_ObsDescr* sobsin;          /* All data of observer hsobsin  */
+   CTAI_ObsDescr* sobsout;         /* All data of observer hsobsout */
+   CTA_Function *memfun;       /* Function that must be called  */
+   CTA_Function *my_Create_Size; /* Function for determining mem-block */
+   int memsize,i;
+
+   /* Look up the Create-size function and the data of the input observer*/
+   retval=CTA_Handle_Check((CTA_Handle) hobsdescr,CTA_OBSDESCR);
+   if (retval!=CTA_OK)  {
+       CTA_WRITE_ERROR("Handle is not a cta_obsdescr handle");
+       return retval;
+   }
+
+   /* Get the create-size member function */
+   retval = CTA_Handle_GetData(hobsdescr,(void **) &sobsin);
+   retval = CTA_Func_GetFunc(sobsin->functions[I_CTA_OBSDESCR_CREATE_SIZE],
+                             &my_Create_Size);
+
+   if (retval!=CTA_OK)  {
+       CTA_WRITE_ERROR("Cannot get function CTA_OBSDESCR_CREATE_SIZE");
+       return retval;
+   }
+
+   my_Create_Size(&memsize,&retval);
+   if (retval) {
+       CTA_WRITE_ERROR("Error in Create_Size");
+       return retval;
+   }
+
+   sobsout=CTA_Malloc(sizeof(CTAI_ObsDescr));
+   sobsout->data=CTA_Malloc(memsize);
+
+   /* copy function pointers and observation class handle */
+   for (i=0;i<I_CTA_OBSDESCR_NUMFUNC;i++)
+      { sobsout->functions[i]=sobsin->functions[i]; }
+
+
+   /* Look up member function and observer data */
+   retval = CTA_Func_GetFunc(sobsin->functions[I_CTA_OBSDESCR_SELECTION],
+                             &memfun);
+   if (retval!=CTA_OK)  {
+       CTA_WRITE_ERROR("Cannot get function CTA_OBSDESCR_SELECTION");
+       return retval;
+   }
+
+   /* Allocate new handle and return error when unsuccesfull */
+   retval=CTA_Handle_Create("obsdescr",CTA_OBSDESCR,sobsout,hobsdescrout);
+   if (retval!=CTA_OK)  {
+       CTA_WRITE_ERROR("Cannot create handle");
+       return retval;
+   }
+
+   /* Call (user) implementation */
+   memfun(sobsin->data,&selection,&reltab, hobsdescrout, sobsout->data,&retval);
+
+   return retval;
+};
+
+#undef METHOD
+#define METHOD "CreateTimeSe1"
+int CTA_ObsDescr_CreateTimSel(CTA_ObsDescr hobsdescr, CTA_Time timespan,
+                              CTA_RelTable reltab, CTA_ObsDescr *hobsdescrout)
+{
+   int ierr;
+   char str[80];
+   double t1,t2;
+   double eps;
+
+   CTA_String sselect;
+
+
+   *hobsdescrout=CTA_NULL;
+   if (hobsdescr!=CTA_NULL){
+      // Get interval of timespan (t1,t2)
+      ierr=CTA_Time_GetSpan(timespan,&t1,&t2); 
+	  if (ierr!=CTA_OK) {
+	     char message[1024];
+	     sprintf(message,"Cannot get interval of timespan (%g,%g)",t1,t2);
+         CTA_WRITE_ERROR(message);
+         return ierr;
+      }
+      // Create the Selection string (SQL-statement)
+      ierr=CTA_String_Create(&sselect);
+	  if (ierr!=CTA_OK) {
+         CTA_WRITE_ERROR("Cannot create string");
+         return ierr;
+      }
+
+      eps=(t2-t1)*1.0e-4+1.0e-16;
+      t1=t1+eps;
+      t2=t2+eps;
+      sprintf(str, "time BETWEEN %f AND %f",t1, t2 );
+      ierr=CTA_String_Set(sselect,str); 
+	  if (ierr!=CTA_OK) {
+         CTA_WRITE_ERROR("Cannot set string");
+         return ierr;
+      }
+
+      // Create the selcection
+      ierr=CTA_ObsDescr_CreateSel(hobsdescr, sselect, reltab, hobsdescrout);
+      if (ierr!=CTA_OK) {
+         CTA_WRITE_ERROR("Cannot create selection");
+         return ierr;
+      }
+
+      // Free work variables
+      ierr=CTA_String_Free(&sselect); 
+	  if (ierr!=CTA_OK) {
+         CTA_WRITE_ERROR("Cannot free string");
+         return ierr;
+      }
+   }
+   return CTA_OK;
+};
+
+
+
+
+#undef METHOD
+#define METHOD "Get_PropertyKeys"
+int CTA_ObsDescr_Get_PropertyKeys(
+   // INPUTS:
+   CTA_ObsDescr hobsdescr,   /* Handle of the observation description of
+                                    which a property is to be returned */
+   // OUTPUTS:
+   CTA_Vector Keys)          /* Name of the key */
+{
+   /* Local variables */
+   int retval;                 /* Return value of COSTA call    */
+   CTAI_ObsDescr* obsdescr;    /* All data of observer hobsdescr  */
+   CTA_Function *memfun;       /* Function that must be called  */
+
+   /* trivial return we accept CTA_NULL -> empty vector */
+   if (hobsdescr==CTA_NULL){
+     Keys = CTA_NULL;
+     return CTA_OK;
+   }
+
+   /* Look up the member function and the data of the observation descriptor*/
+   retval = CTAI_ObsDescr_member_function(hobsdescr, I_CTA_OBSDESCR_GET_KEYS,
+                                          &obsdescr, &memfun);
+   if (retval) {
+       CTA_WRITE_ERROR("Error in function CTA_OBSDESCR_GET_KEYS");
+       return retval;
+   }
+
+   /* Call (user) implementation */
+   memfun(obsdescr->data,&Keys,&retval);
+
+   return retval;
+}
+
+#undef METHOD
+#define METHOD "Property_Count"
+int CTA_ObsDescr_Property_Count(CTA_ObsDescr hobsdscr, int *nkeys){
+   /* Local variables */
+   int retval;                 /* Return value of COSTA call    */
+   CTAI_ObsDescr* obsdescr;    /* All data of observer hobsdescr  */
+   CTA_Function *memfun;       /* Function that must be called  */
+   char msg[256];
+
+   /* trivial return: CTA_NULL means empty description: no keys */
+   if (hobsdscr==CTA_NULL){
+      *nkeys=0;
+      return CTA_OK;
+   }
+
+   /* Look up the member function and the data of the observation descriptor*/
+   retval = CTAI_ObsDescr_member_function(hobsdscr, I_CTA_OBSDESCR_COUNT_PROPERTIES,
+                                          &obsdescr, &memfun);
+   if (retval) {
+       sprintf(msg,"Cannot find the member function I_CTA_OBSDESCR_COUNT_PROPERTIES retval=%d",retval);
+       CTA_WRITE_ERROR(msg);
+       return retval;
+   }
+
+   /* Call (user) implementation */
+   memfun(obsdescr->data,nkeys,&retval);
+
+   return retval;
+}
+
+#undef METHOD
+#define METHOD "Observation_Count"
+int CTA_ObsDescr_Observation_Count(CTA_ObsDescr hobsdscr, int *nobs){
+   /* Local variables */
+   int retval;                 /* Return value of COSTA call    */
+   CTAI_ObsDescr* obsdescr;    /* All data of observer hobsdescr  */
+   CTA_Function *memfun;       /* Function that must be called  */
+   char msg[256];
+
+   /* trivial return we accept CTA_NULL -> nobs=0 */
+   if (hobsdscr==CTA_NULL){
+      *nobs=0;
+      return CTA_OK;
+   }
+
+   /* Look up the member function and the data of the observation descriptor*/
+   retval = CTAI_ObsDescr_member_function(hobsdscr, I_CTA_OBSDESCR_COUNT_OBSERVATIONS,
+                                          &obsdescr, &memfun);
+   if (retval) {
+       sprintf(msg,"Cannot find the member function I_CTA_OBSDESCR_COUNT_OBSERVATIONS retval=%d",retval);
+       CTA_WRITE_ERROR(msg);
+       return retval;
+   }
+
+   /* Call (user) implementation */
+   memfun(obsdescr->data,nobs,&retval);
+
+   return retval;
+}
+
+#undef METHOD
+#define METHOD "Get_ValueProperties"
+int CTA_ObsDescr_Get_ValueProperties(
+          CTA_ObsDescr hobsdscr,
+          const char* Key,
+          CTA_Vector Properties,
+          CTA_Datatype datatype)
+{
+   /* Local variables */
+   int retval;                 /* Return value of COSTA call    */
+   CTAI_ObsDescr* obsdescr;    /* All data of observer hobsdescr  */
+   CTA_Function *memfun;       /* Function that must be called  */
+   char msg[256];
+
+
+   /* Look up the member function and the data of the observation descriptor*/
+   retval = CTAI_ObsDescr_member_function(hobsdscr, I_CTA_OBSDESCR_GET_PROPERTIES,
+                                          &obsdescr, &memfun);
+   if (retval) {
+       sprintf(msg,"Cannot find the member function I_CTA_OBSDESCR_GET_PROPERTIES retval=%d",retval);
+       CTA_WRITE_ERROR(msg);
+       return retval;
+   }
+
+ 
+   /* Call (user) implementation */
+   memfun(obsdescr->data,Key,&Properties,&datatype,&retval);
+
+   return retval;
+}
+
+
+#undef METHOD
+#define METHOD "Export"
+int CTA_ObsDescr_Export(CTA_ObsDescr hdescr, CTA_Handle usrdat){
+   int retval;                 /* Return value of COSTA call    */
+   CTAI_ObsDescr* descr;    /* All data of observer hobsdescr  */
+   CTA_Function *memfun;       /* Function that must be called  */
+   char msg[256];
+
+   /* Look up the member function and the data of the observation descriptor*/
+   retval = CTAI_ObsDescr_member_function(hdescr, I_CTA_OBSDESCR_EXPORT,
+                                          &descr, &memfun);
+   if (retval) {
+       sprintf(msg,"Cannot find the member function I_CTA_OBSDESCR_EXPORT retval=%d",retval);
+       CTA_WRITE_ERROR(msg);
+       return retval;
+   }
+
+   /* Call (user) implementation */
+   memfun(descr->data,&usrdat,&retval);
+   return retval;
+}
+
+
+
+
+#undef METHOD
+#define METHOD "Free"
+int CTA_ObsDescr_Free(CTA_ObsDescr *hdescr){
+   int retval;                 /* Return value of COSTA call    */
+   CTAI_ObsDescr* descr;    /* All data of observer hobsdescr  */
+   CTA_Function *memfun;       /* Function that must be called  */
+   char msg[256];
+
+   /* Check for quick return */
+   if (*hdescr==CTA_NULL) return CTA_OK;
+
+   /* Look up the member function and the data of the observation descriptor*/
+   retval = CTAI_ObsDescr_member_function(*hdescr, I_CTA_OBSDESCR_FREE,
+                                          &descr, &memfun);
+   if (retval) {
+       sprintf(msg,"Cannot find the member function I_CTA_OBSDESCR_FREE retval=%d",retval);
+       CTA_WRITE_ERROR(msg);
+       return retval;
+   }
+
+   /* Call (user) implementation */
+   memfun(descr->data,&retval);
+
+   free(descr->data);
+   free(descr);
+   retval = CTA_Handle_Free(hdescr);
+   return retval;
+}
+
+/* Interfacing with Fortran */
+
+CTAEXPORT void CTA_OBSDESCR_DEFINECLASS_F77( char *name, int *h_func, int *hobsdscrcl,int *ierr, int len_name){
+//CTAEXPORT void cta_obsdescr_defineclass_( char *name, int *h_func, int *hobsdscrcl,int *ierr, int len_name){
+
+   char  *c_name;
+   // create a c-string equivalent to name
+   c_name=CTA_Malloc((len_name+1)*sizeof(char));
+   CTA_fstr2cstr(name,c_name,len_name);
+
+   *ierr=CTA_ObsDescr_DefineClass(name, (CTA_Func*) h_func, (CTA_ObsDescrClass*) hobsdscrcl);
+
+   free(c_name);
+}
+
+CTAEXPORT void CTA_OBSDESCR_CREATE_F77(int *hsobscl, int *usrdat,
+              int *hobsdescr, int *ierr){
+
+   *ierr=CTA_ObsDescr_Create((CTA_ObsDescrClass) *hsobscl, (CTA_Handle) *usrdat,
+                             (CTA_ObsDescr*) hobsdescr);
+}
+
+//void CTA_OBSDESCR_CHECK_SOBS_F77(int *hobsdescr, int *hsobs ,int *ierr){
+//   *ierr=CTA_ObsDescr_Check_SObs((CTA_ObsDescr) *hobsdescr, (CTA_StochObs) *hsobs);
+//}
+
+CTAEXPORT void CTA_OBSDESCR_GET_VALUEPROPERTIES_F77(
+           int *hobsdscr, char *key, int *Property, int *datatype,
+           int *ierr, int len_key)
+{
+   char *c_Key;
+   c_Key=CTA_Malloc((len_key+1)*sizeof(char));
+   CTA_fstr2cstr(key,c_Key,len_key);
+   *ierr=CTA_ObsDescr_Get_ValueProperties(
+                    (CTA_ObsDescr) *hobsdscr,
+                    c_Key,
+                    (CTA_Vector)   *Property,
+                    (CTA_Datatype) *datatype);
+   free(c_Key);
+}
+
+CTAEXPORT void CTA_OBSDESCR_GET_PROPERTY_KEYS_F77(int *hobsdscr, int *Keys, int *ierr){
+   *ierr=CTA_ObsDescr_Get_PropertyKeys( (CTA_ObsDescr) *hobsdscr, (CTA_Vector) *Keys);
+}
+
+CTAEXPORT void CTA_OBSDESCR_PROPERTY_COUNT_F77(int *hobsdscr, int *nkeys, int *ierr){
+   *ierr=CTA_ObsDescr_Property_Count( (CTA_ObsDescr) *hobsdscr, nkeys);
+}
+
+CTAEXPORT void CTA_OBSDESCR_OBSERVATION_COUNT_F77(int *hobsdscr, int *nobs, int *ierr){
+   *ierr=CTA_ObsDescr_Observation_Count((CTA_ObsDescr) *hobsdscr, nobs);
+}
+
+CTAEXPORT void CTA_OBSDESCR_EXPORT_F77(int *hdescr, int *usrdat, int *ierr){
+   *ierr=CTA_ObsDescr_Export((CTA_ObsDescr) *hdescr, (CTA_Handle) *usrdat);
+}
+
+CTAEXPORT void CTA_OBSDESCR_FREE_F77(int *hobsdscr, int *ierr){
+   *ierr=CTA_ObsDescr_Free((CTA_ObsDescr *) hobsdscr);
+}
+
+
+
diff --git a/costa/native/cta/src/cta_obsdescr_combine.c b/costa/native/cta/src/cta_obsdescr_combine.c
new file mode 100644
index 000000000..b0dbbc302
--- /dev/null
+++ b/costa/native/cta/src/cta_obsdescr_combine.c
@@ -0,0 +1,392 @@
+/*
+$URL: https://repos.deltares.nl/repos/openda/trunk/costa/src/cta/cta_obsdescr_combine.c $
+$Revision: 671 $, $Date: 2008-10-07 14:49:42 +0200 (Tue, 07 Oct 2008) $
+
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+#include <string.h>
+#include <stdio.h>
+#include <math.h>
+#include <memory.h>
+#include "cta_mem.h"
+#include "cta_file.h"
+#include "cta_errors.h"
+#include "cta_string.h"
+//#include "cta_obsdescr_combine.h"
+#include "cta_defaults.h"
+
+#include "cta_sobs_combine.h"
+
+#define IDEBUG (0)
+
+typedef struct {
+CTA_Handle myhandle;
+char* condition;
+int n_keys;
+int nmeasr;
+CTA_String *Keys;
+int nofsubdescr;      // number of descriptions
+CTA_Vector subdescr; // list of handles of descriptions
+} CTAI_ObsDescr_combine;
+
+
+void CTAI_ObsDescr_combine_Create_Size(
+   // OUTPUTS:
+      int *memsize,           // The number of bytes which are necessary to 
+                              //     store one CTAI_SObs_combine, with a 
+                              //     pointer to the contents (data), 
+                              //     but without the contents themselves.
+      int *retval             // error code (see cta_datatypes.h for possible 
+                              //     error codes)
+   ){
+//   *memsize=(int) sizeof(CTAI_SObs_combine);
+   *memsize=(int) sizeof(CTAI_ObsDescr_combine);
+   *retval=CTA_OK;
+};
+
+void CTAI_ObsDescr_combine_Create_Init(
+/*
+   Allocate the memory which is necessary to store the data necessary for a
+   combine-observer
+*/
+     // INPUT:
+      CTA_ObsDescr *myhandle,  /* Handle assigned by COSTA */
+     // IN-OUTPUTS
+        CTAI_ObsDescr_combine *descr,// The combine-observation description
+                                 //     for which the memory must be
+                                 //     allocated
+     // INPUTS:
+        CTA_Handle *usrdat, // User data: de data from the stochobs. Here it is the CTAI_Sobs_combine
+     // OUTPUTS
+        int *retval)             // Error code.
+{
+  CTAI_SObs_combine *x;
+  int nofsubdescr, idescr, ierr;
+  CTA_Handle subsob;
+  CTA_Vector hvec1;
+  CTA_ObsDescr hsubdescr;  // description of  sub-stochobs subsob
+
+  // get the CTAI_Sobs_Combine 
+   *retval= CTA_Handle_GetData(*usrdat, (void **) &x);
+   if (*retval!=CTA_OK) return;
+
+   /* Copy the information about the number of subsobs */
+  /* Note: the number of subdescr is, by definition, equal to the number of subsobs. */
+   descr->nofsubdescr =  x->nofsubsobs ; 
+
+   nofsubdescr = descr->nofsubdescr; 
+   /* create a vector for the handles of ghe observation descriptions */
+   ierr=CTA_Vector_Create(CTA_DEFAULT_VECTOR, nofsubdescr, CTA_HANDLE, CTA_NULL, &hvec1);
+
+   for (idescr=1; idescr<=nofsubdescr; idescr++) {
+     //get the sub-stochastic observer
+       ierr=CTA_Vector_GetVal(x->subsobs,idescr,&subsob,CTA_HANDLE);
+       /* check if handle is a stochastic observation? (superfluous check) */
+       ierr=CTA_Handle_Check((CTA_Handle) subsob,CTA_SOBS);
+       if (IDEBUG > 0) 
+         printf("sobs_obsdescr_combine_create_init: TEST if subsobs %d is real (sub-)sobs : retval %d \n",idescr,ierr);
+       /* Each subsobs already  has its own  description created. */ 
+       CTA_SObs_GetDescription(subsob,&hsubdescr);
+
+     /* put handle in cta-vector of handles */    
+       CTA_Vector_SetVal(hvec1,idescr,&hsubdescr,CTA_HANDLE);
+
+   }
+
+
+   /* Finally, set the vector with sub descriptions */
+     descr->subdescr = hvec1;
+
+};
+
+void CTAI_ObsDescr_combine_CreateSel(CTAI_ObsDescr_combine *descr,
+          CTA_String *selection, CTA_RelTable *reltab, 
+          CTA_ObsDescr *myhandle_out, 
+          CTAI_ObsDescr_combine *descrout, int *retval){
+
+  int nofsubdescr,idescr;
+  CTA_Handle subdescr_in ;
+  int ierr;
+  CTA_Vector hvec1;
+  CTA_ObsDescr subdescr_out;
+
+  nofsubdescr = descr->nofsubdescr;
+
+  /* Warning: only the memory of descrout has been created already,
+     the nofsubdescr and vector have to be defined now. */
+  descrout->nofsubdescr = nofsubdescr;
+  ierr=CTA_Vector_Create(CTA_DEFAULT_VECTOR, nofsubdescr, CTA_HANDLE, CTA_NULL, &hvec1);      
+
+  for (idescr=1; idescr<=nofsubdescr; idescr++) {
+       ierr=CTA_Vector_GetVal(descr->subdescr,idescr,&subdescr_in,CTA_HANDLE);
+       /* call the createsel function for each individual sub-sobdescr */
+       if (ierr == CTA_OK) ierr = CTA_ObsDescr_CreateSel(subdescr_in, *selection,   
+                            *reltab, &subdescr_out);
+
+       /* put the selected subdescr in the vector descrout->subdescr */
+       if (ierr == CTA_OK) ierr=CTA_Vector_SetVal(hvec1,idescr,&subdescr_out,CTA_HANDLE);    
+       if (ierr != CTA_OK) {
+         printf("ctai_obsdescr_createsel: ERROR %d \n",ierr);}
+
+  }
+  /* set the cta_vector of the descrout */
+  descrout->subdescr = hvec1;
+
+  *retval = ierr ;
+  /* TODO: think about reltab and myhandle. */
+
+
+}
+         
+
+
+void CTAI_ObsDescr_combine_Get_Keys(
+         CTAI_ObsDescr_combine *descr,
+         CTA_Vector *Keys,
+         int *retval)
+{
+
+  int nofsubdescr,idescr;
+  CTA_Handle subdescr;
+  int nsubkeys,ierr, offset, j;
+  CTA_Vector hsubvec;
+  CTA_String hstr1;
+
+  nofsubdescr = descr->nofsubdescr;
+  offset=0; 
+  for (idescr=1; idescr<=nofsubdescr; idescr++) {
+       ierr=CTA_Vector_GetVal(descr->subdescr,idescr,&subdescr,CTA_HANDLE);
+       /* count the number of keys in sub-obsdescr */
+        ierr=CTA_ObsDescr_Property_Count(subdescr, &nsubkeys);
+
+       /* create a vector holding the keys of the sub-obsdescr */
+       ierr=CTA_Vector_Create(CTA_DEFAULT_VECTOR, nsubkeys, CTA_STRING, CTA_NULL, &hsubvec);
+       /* call the get_propertykeys function for each individual sub-sobdescr */
+       ierr = CTA_ObsDescr_Get_PropertyKeys(subdescr, hsubvec);
+
+       /* add the new keys to the existing keys */
+       ierr =  CTA_String_Create(&hstr1);
+       for (j=1; j<= nsubkeys; j++){
+         CTA_Vector_GetVal(hsubvec,j, &hstr1,CTA_STRING);
+         CTA_Vector_SetVal(*Keys,offset+j, &hstr1,CTA_STRING);
+       }
+       offset = offset + nsubkeys;
+       ierr = CTA_Vector_Free(&hsubvec);
+  }
+  *retval = ierr;
+
+  // TODO: hier moeten wel de dubbele uit!
+};
+
+void CTAI_ObsDescr_combine_Property_Count(
+         CTAI_ObsDescr_combine *descr, 
+         int *nkeys, 
+         int *retval)
+{
+  int nofsubdescr,idescr;
+  CTA_Handle subdescr;
+  int nsubkeys,ierr;
+
+  *nkeys = 0;
+  nofsubdescr = descr->nofsubdescr; 
+  for (idescr=1; idescr<=nofsubdescr; idescr++) {
+       ierr=CTA_Vector_GetVal(descr->subdescr,idescr,&subdescr,CTA_HANDLE);
+       /* call the propertycount function for each individual sub-obsdescr */
+       ierr = CTA_ObsDescr_Property_Count(subdescr, &nsubkeys);
+       *nkeys = *nkeys + nsubkeys;
+  }
+  *retval = ierr;
+}
+;
+
+void CTAI_ObsDescr_combine_Observation_Count(
+         CTAI_ObsDescr_combine *descr, 
+         int *nobs, 
+         int *retval)
+{
+
+  int nofsubdescr,idescr;
+  CTA_Handle subdescr;
+  int nsubobs,ierr;
+
+  *nobs = 0;
+  nofsubdescr = descr->nofsubdescr; 
+  for (idescr=1; idescr<=nofsubdescr; idescr++) {
+       ierr=CTA_Vector_GetVal(descr->subdescr,idescr,&subdescr,CTA_HANDLE);
+       /* check if handle is a observation description? */
+       ierr=CTA_Handle_Check((CTA_Handle) subdescr,CTA_OBSDESCR);
+
+       if (IDEBUG > 0) {printf("ctai_obsdescr_combine_observation_count: "); 
+         printf("TEST if subdescr %d is a real (sub-)descr : retval %d \n",idescr,ierr);}
+
+       /* call the count function for each individual sub-sobdescr */
+       ierr = CTA_ObsDescr_Observation_Count(subdescr, &nsubobs);
+       *nobs = *nobs + nsubobs;
+  }
+  *retval = ierr;
+};
+
+
+void CTAI_ObsDescr_combine_Get_Properties(
+         CTAI_ObsDescr_combine *descr, 
+         const char *Key,
+         CTA_Vector *Properties,
+         CTA_Datatype *datatype,
+         int *retval)
+{
+  int nofsubdescr,idescr,j;
+  CTA_Handle subdescr;
+  int ierr, nsubobs, ntotobs,offset;
+  CTA_Vector hsubvec; 
+  CTA_String *strval, *substrval;
+  double *val, *subval;
+
+  //  CTA_String *strval, *substrval;  
+
+  /* first ask the total number of values of the vector to be filled*/
+  ierr = CTA_Vector_GetSize(*Properties,&ntotobs);
+  
+/* allocate the main array containing all the values */
+  if(*datatype == CTA_STRING)  {strval = CTA_Malloc(sizeof(CTA_String)*ntotobs); 
+  if (IDEBUG > 0)  printf("obsdescr_combine_get_properties: Warning: string as propertydatatype!\n");}
+  else { val = CTA_Malloc(sizeof(double)*ntotobs);}
+
+
+  offset = 0;
+  nofsubdescr = descr->nofsubdescr; 
+  for (idescr=1; idescr<=nofsubdescr; idescr++) {
+       ierr=CTA_Vector_GetVal(descr->subdescr,idescr,&subdescr,CTA_HANDLE);
+       /* First count the number of observations in sub descr */
+       ierr = CTA_ObsDescr_Observation_Count(subdescr, &nsubobs);
+
+       if (nsubobs > 0) {
+
+         /* create the subvector to receive the values of the subdescr */
+         ierr=CTA_Vector_Create(CTA_DEFAULT_VECTOR, nsubobs, *datatype, CTA_NULL, &hsubvec);
+
+         /* call the get_valueproperties function for each individual sub-sobdescr */
+         ierr = CTA_ObsDescr_Get_ValueProperties(subdescr, Key, hsubvec, *datatype);
+       
+         /* put the values in the sub-array. In case of String-output, create the strings */
+         if (*datatype == CTA_STRING) {
+           substrval = CTA_Malloc(sizeof(CTA_String)*nsubobs); 
+           for (j=0; j<nsubobs; j++)
+             { ierr = CTA_String_Create(&substrval[j]);}
+           ierr=CTA_Vector_GetVals(hsubvec,substrval, nsubobs, *datatype);
+         }
+         else {
+           subval = CTA_Malloc(sizeof(double)*nsubobs);
+           ierr=CTA_Vector_GetVals(hsubvec,subval, nsubobs, *datatype);
+         }
+       
+
+         /* fill the main vector  */
+         if (*datatype == CTA_STRING) { 
+           for (j=0; j < nsubobs; j++) {
+             //the duplicate function already creates the cta_string for us.
+             ierr = CTA_String_Duplicate(substrval[j], &strval[offset+j]); 
+           
+           }
+         } else {
+           for (j=0; j < nsubobs; j++) {
+             val[offset+j] = subval[j];
+           }
+         }
+
+         // In case of String-output, erase the strings
+         if (*datatype == CTA_STRING)
+           {
+             for (j=0; j<nsubobs; j++) {
+               ierr = CTA_String_Free(&substrval[j]);}
+             free(substrval);
+           } else {
+           free(subval);}
+
+         ierr = CTA_Vector_Free(&hsubvec);
+         offset = offset + nsubobs;
+       }
+  }
+  /* fill *Properties with the main array */
+  if (*datatype == CTA_STRING) {
+    ierr=CTA_Vector_SetVals(*Properties,strval, ntotobs, *datatype);
+    for (j=0; j<ntotobs; j++)
+      ierr = CTA_String_Free(&strval[j]);
+    free(strval);
+  } else {
+    ierr=CTA_Vector_SetVals(*Properties,val, ntotobs, *datatype);
+    free(val);
+  }
+
+
+  // for (j=0; j < ntotobs; j++) {printf("%d %f\n",j, val[j]);    }
+
+  *retval = ierr;
+
+
+}
+
+
+void CTAI_ObsDescr_combine_Free(
+         CTAI_ObsDescr_combine *descr, 
+         int *retval)
+{
+  int nofsubdescr, idescr, ierr;
+  CTA_Handle subdescr;
+
+  nofsubdescr = descr->nofsubdescr; 
+  ierr = CTA_OK;
+  for (idescr=1; idescr<=nofsubdescr; idescr++) {
+       if (ierr == CTA_OK) ierr=CTA_Vector_GetVal(descr->subdescr,idescr,&subdescr,CTA_HANDLE);
+       if (ierr == CTA_OK) ierr = CTA_ObsDescr_Free(&subdescr);
+  } 
+  *retval = ierr;
+
+}
+
+
+void CTA_ObsDescr_combine_initialise(CTA_ObsDescrClass *hobsdescrcl)
+{
+   CTA_Intf hintf=0;
+   CTA_Func h_func[I_CTA_OBSDESCR_NUMFUNC];
+
+   // The vector h_func is filled with COSTA-function handles of the 
+   // implementations in this file.
+   CTA_Func_Create(" ",&CTAI_ObsDescr_combine_Create_Size,    hintf,
+                       &h_func[I_CTA_OBSDESCR_CREATE_SIZE]);
+   CTA_Func_Create(" ",&CTAI_ObsDescr_combine_Create_Init,    hintf,
+                       &h_func[I_CTA_OBSDESCR_CREATE_INIT]);
+   CTA_Func_Create(" ",&CTAI_ObsDescr_combine_Property_Count,    hintf,
+                       &h_func[I_CTA_OBSDESCR_COUNT_PROPERTIES]);
+   CTA_Func_Create(" ",&CTAI_ObsDescr_combine_Get_Properties,    hintf,
+                       &h_func[I_CTA_OBSDESCR_GET_PROPERTIES]);
+   CTA_Func_Create(" ",&CTAI_ObsDescr_combine_Observation_Count, hintf,
+                       &h_func[I_CTA_OBSDESCR_COUNT_OBSERVATIONS]);
+   CTA_Func_Create(" ",&CTAI_ObsDescr_combine_Get_Keys,    hintf,
+                       &h_func[I_CTA_OBSDESCR_GET_KEYS]);
+   CTA_Func_Create(" ",&CTAI_ObsDescr_combine_Free,    hintf,
+                       &h_func[I_CTA_OBSDESCR_FREE]);
+   CTA_Func_Create(" ",&CTAI_ObsDescr_combine_CreateSel,    hintf,
+                       &h_func[I_CTA_OBSDESCR_SELECTION]);
+   CTA_ObsDescr_DefineClass("cta_obsdescr_combine",h_func,hobsdescrcl);
+
+}
+
+
+
+
diff --git a/costa/native/cta/src/cta_obsdescr_maori.c b/costa/native/cta/src/cta_obsdescr_maori.c
new file mode 100644
index 000000000..8cf96d6d0
--- /dev/null
+++ b/costa/native/cta/src/cta_obsdescr_maori.c
@@ -0,0 +1,59 @@
+/*
+$URL: https://repos.deltares.nl/repos/openda/trunk/costa/src/cta/cta_obsdescr_netcdf.c $
+$Revision: 671 $, $Date: 2008-10-07 14:49:42 +0200 (Tue, 07 Oct 2008) $
+
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+#include <string.h>
+#include <stdio.h>
+#include "cta.h"
+
+void CTA_ObsDescr_maori_initialize(CTA_ObsDescrClass *hobsdescrcl)
+{
+   CTA_Func h_func[I_CTA_OBSDESCR_NUMFUNC];
+   char *libraryName;
+   char *functionName;
+
+   // The vector h_func is filled with function read from the maori user dynamic library
+
+   libraryName="libleoda.so";
+   functionName="ctai_obsdescr_maori_create_size";
+   h_func[I_CTA_OBSDESCR_CREATE_SIZE]        = CTA_CreateFuncDynamicLib(libraryName, functionName, functionName, functionName);
+   functionName="ctai_obsdescr_maori_create_init";
+   h_func[I_CTA_OBSDESCR_CREATE_INIT]        = CTA_CreateFuncDynamicLib(libraryName, functionName, functionName, functionName);
+   functionName="ctai_obsdescr_maori_property_count";
+   h_func[I_CTA_OBSDESCR_COUNT_PROPERTIES]   = CTA_CreateFuncDynamicLib(libraryName, functionName, functionName, functionName);
+   functionName="ctai_obsdescr_maori_get_properties";
+   h_func[I_CTA_OBSDESCR_GET_PROPERTIES]     = CTA_CreateFuncDynamicLib(libraryName, functionName, functionName, functionName);
+   functionName="ctai_obsdescr_maori_observations_count";
+   h_func[I_CTA_OBSDESCR_COUNT_OBSERVATIONS] = CTA_CreateFuncDynamicLib(libraryName, functionName, functionName, functionName);
+   functionName="ctai_obsdescr_maori_get_keys";
+   h_func[I_CTA_OBSDESCR_GET_KEYS]           = CTA_CreateFuncDynamicLib(libraryName, functionName, functionName, functionName);
+   functionName="ctai_obsdescr_maori_free";
+   h_func[I_CTA_OBSDESCR_FREE]               = CTA_CreateFuncDynamicLib(libraryName, functionName, functionName, functionName);
+   functionName="ctai_obsdescr_maori_selection";
+   h_func[I_CTA_OBSDESCR_SELECTION]          = CTA_CreateFuncDynamicLib(libraryName, functionName, functionName, functionName);
+
+   CTA_ObsDescr_DefineClass("cta_obsdescr_maori",h_func,hobsdescrcl);
+
+
+}
+
+
+
diff --git a/costa/native/cta/src/cta_obsdescr_netcdf.c b/costa/native/cta/src/cta_obsdescr_netcdf.c
new file mode 100644
index 000000000..0b3d012a8
--- /dev/null
+++ b/costa/native/cta/src/cta_obsdescr_netcdf.c
@@ -0,0 +1,550 @@
+/*
+$URL: https://repos.deltares.nl/repos/openda/trunk/costa/src/cta/cta_obsdescr_netcdf.c $
+$Revision: 671 $, $Date: 2008-10-07 14:49:42 +0200 (Tue, 07 Oct 2008) $
+
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+#include <string.h>
+#include <stdio.h>
+#include <math.h>
+#include <memory.h>
+#include "cta.h"
+#include "cta_mem.h"
+#include "cta_sobs_netcdf.h"
+#include "cta_defaults.h"
+
+#define IDEBUG (0)
+
+#define MAX(a,b) (a>b ? a: b)
+#define MIN(a,b) (a>b ? b: a)
+
+
+/* Dit is een struct om wat op te slaan wat we nodig hebben */
+typedef struct {
+#if HAVE_LIBNETCDF
+  CTA_Handle myhandle;
+  char* condition;
+  int n_keys;
+  int size_1Dproperties;
+  int nmeasr;
+  int nmeasr_orig;
+  CTA_String *Keys;
+  CTA_RelTable selectionReltab;
+  CTA_Time tspan;
+  float bb_lon[2];
+  float bb_lat[2];
+  CTAI_OMI_database *database;
+  CTAI_SObs_netcdf *sobs;  
+#else
+	int dum;
+#endif
+} CTAI_ObsDescr_netcdf;
+
+
+
+
+void CTAI_ObsDescr_netcdf_Create_Size(
+   // OUTPUTS:
+      int *memsize,           // The number of bytes which are necessary to 
+                              //     store one CTAI_SObs_netcdf, with a 
+                              //     pointer to the contents (data), 
+                              //     but without the contents themelves.
+      int *retval             // error code (see cta_datatypes.h for possible 
+                              //     error codes)
+   ){
+#if HAVE_LIBNETCDF
+
+//   *memsize=(int) sizeof(CTAI_SObs_netcdf);
+   *memsize=(int) sizeof(CTAI_ObsDescr_netcdf);
+   *retval=CTA_OK;
+#else
+   printf("Error: CTAI_ObsDescr_netcdf_Create_Size: COSTA is compiled without NETCD suport\n");  
+#endif
+};
+
+void CTAI_ObsDescr_netcdf_Create_Init(
+/*
+   Allocate the memory which is necesary to store the data necessary for a
+   netcdf-observer
+*/
+     // INPUT:
+      CTA_ObsDescr *myhandle,  /* Handle assigned by COSTA */
+     // IN-OUTPUTS
+        CTAI_ObsDescr_netcdf *descr,// The netcdf-observation description
+                                 //     for which the memory must be
+                                 //     allocated
+     // INPUTS:
+        CTA_Handle *usrdat, // User data: database-name
+     // OUTPUTS
+        int *retval)             // Error code.
+{
+#if HAVE_LIBNETCDF
+
+   CTAI_SObs_netcdf *sobs; // User data: database-name
+   int ierr,i,j ;
+   CTA_Time tspan0;
+   char buffer[20];
+
+   descr->myhandle=*myhandle;
+
+   *retval= CTA_Handle_GetData(*usrdat, (void **) &sobs);
+   if (*retval!=CTA_OK) {
+     printf("error: ctai_obsdescr-netcdf_create_init : handle_getdata \n");
+     return;
+   }
+
+   ierr=CTA_Time_Create(&tspan0);
+   ierr=CTA_Time_SetSpan(tspan0, 0.0, 24.0);
+   descr->tspan = tspan0;
+
+   // store the handle of the stochastic observer
+   descr->sobs = sobs;
+
+   descr->database = sobs->database;
+   (descr->database->nusers)++;
+
+   // Copy the conditionCTA_RelTable *hreltable ?
+   ierr = CTA_RelTable_Create(&(descr->selectionReltab));
+   descr->selectionReltab = sobs->selectionReltab;
+
+   // Store the keys ?
+   // Here: do it ad hoc since our OMI-netcdf database is very specific
+   descr->size_1Dproperties = 30; //here: for kernel and pressure_levels
+   descr->n_keys=2 + descr->size_1Dproperties + descr->size_1Dproperties+1 ;
+   descr->Keys = CTA_Malloc(sizeof(CTA_String)* (descr->n_keys));
+   for (i=0; i< descr->n_keys; i++) 
+   {
+     ierr = CTA_String_Create( &(descr->Keys[i]) );
+     if (ierr != CTA_OK) {printf("error in cta_obsdecr_init %d \n",ierr);return ; }
+   }
+   /*   Key: 0:long
+             1: lat
+             2-31 kernel
+             32-62 pressure_levels
+   */
+
+   ierr = CTA_String_Set(descr->Keys[0],"longitude");
+   ierr = CTA_String_Set(descr->Keys[1],"latitude");
+   for (j=1; j<= descr->size_1Dproperties; j++){
+     sprintf(buffer,"kernel_%d",j); 
+     ierr = CTA_String_Set(descr->Keys[1+j],buffer);
+   }
+   for (j=1; j<= descr->size_1Dproperties + 1; j++){
+     sprintf(buffer,"pressure_levels_%d",j); 
+     ierr = CTA_String_Set(descr->Keys[31+j],buffer);
+   }
+
+   if (ierr  != CTA_OK) {printf("error cta_string_set%d\n",ierr ); return;}
+
+   // Copy the dimension of the observer
+   descr->nmeasr = sobs->nmeasr;
+   descr->nmeasr_orig = sobs->nmeasr_orig;
+   *retval=CTA_OK;
+#else
+   printf("Error: CTAI_ObsDescr_netcdf_Create_Init: COSTA is compiled without NETCD suport\n");
+  *retval=CTA_NOT_IMPLEMENTED;
+#endif
+
+};
+
+
+void CTAI_ObsDescr_netcdf_CreateSel(CTAI_ObsDescr_netcdf *descr,
+          CTA_String *selection, CTA_RelTable *reltab, 
+          CTA_ObsDescr *myhandle_out, 
+          CTAI_ObsDescr_netcdf *descrout, int *retval){
+
+#if HAVE_LIBNETCDF
+			  
+	  int len, ierr,i ;
+      char *condition;
+      double t1_in,t2_in, t1,t2, t1_out, t2_out; 
+      int nstations, count_out;
+      CTA_Vector hvec_time;
+      double *vec_time;
+      int  *flag;
+
+      // Get the condition
+      // Allocate a name-string 
+      *retval = CTA_String_GetLength(*selection, &len);
+      if (*retval!=CTA_OK) return;
+
+      // Get the condition
+      condition = CTA_Malloc((len+1)*sizeof(char));
+
+      *retval = CTA_String_Get(*selection, condition);
+      if (IDEBUG>0) printf("CTAI_ObsDescr_netcdf_CreateSel: given    condition is '%s'\n",condition);
+      if (*retval!=CTA_OK) return;
+
+   // Set handle of new observation description
+      descrout->myhandle=*myhandle_out;
+
+
+      // IF the callling sequence has been : cta_sobs_createtimsel -> cta_sobs_creatsel -> 
+      // ctai_sobs_netcdf_createsel, THEN
+      // The condition is the string starting with 'time BETWEEN'
+      if (strstr(condition,"time BETWEEN") != NULL) {
+        ierr = sscanf(condition,"%*s %lf %lf",&t1,&t2);
+
+      } else {
+        t1 = 0.0; t2 = 24.0;
+      }
+      printf("ctai_obsdescr_createsel: time selection: %f %f \n",t1,t2);
+
+      // It may be possible that a spatial restriction is given. We will implement that later. TODO
+  
+     
+   // Combine the input-condition and the condition of the input observer.
+   // In our netcdf-case, the condition is a combination of spatial window and timespan.
+      ierr =  CTA_Time_GetSpan(descr->tspan, &t1_in, &t2_in);
+      t1_out = MAX(t1_in,t1);
+      t2_out = MIN(t2_in,t2);
+      ierr =  CTA_Time_SetSpan(descrout->tspan, t1_out, t2_out);
+
+
+
+      // first get the time vector. 
+      ierr = CTA_Vector_Create(CTA_DEFAULT_VECTOR, descr->nmeasr, CTA_DOUBLE, CTA_NULL, &hvec_time);
+      CTAI_SObs_netcdf_GetTimes(descr->sobs, &hvec_time, &ierr);
+
+      printf("cta_obsdescr_netcdf: manipulation sobs %p %d \n",descr->sobs,ierr);
+
+      vec_time = CTA_Malloc(descr->sobs->nmeasr * sizeof(double));
+      ierr=CTA_Vector_GetVals(hvec_time,vec_time, descr->sobs->nmeasr, CTA_DOUBLE);
+
+      // We use the relation table to investigate if the current selection should be restricted more.
+      count_out = 0;
+      flag=CTA_Malloc((descr->sobs->nmeasr)*sizeof(int));
+      for (i=0; i < descr->sobs->nmeasr; i++) {
+        flag[i]= 0;
+        if (vec_time[i] >= t1_out && vec_time[i] <=t2_out) {
+          flag[i] = 1;
+
+          count_out = count_out + 1;
+        }
+      }
+      nstations = count_out;
+
+      descrout->nmeasr    = nstations;
+      descrout->n_keys = descr->n_keys;
+      descrout->size_1Dproperties = descr->size_1Dproperties;
+
+      if (*reltab!=CTA_NULL) {
+        *retval=CTAI_ObsDescr_CreateRelTable(descr->myhandle,
+                                             descrout->myhandle, *reltab);
+      } else {
+        *retval=CTA_OK;
+      }
+
+      descrout->database = descr->database;
+      descrout->database->nusers++;
+
+      free(vec_time);
+      free(flag);
+      *retval = CTA_OK;
+
+#else
+   printf("Error: CTAI_ObsDescr_netcdf_CreateSel: COSTA is compiled without NETCD support\n");
+  *retval=CTA_NOT_IMPLEMENTED;
+#endif
+
+
+}
+         
+
+
+void CTAI_ObsDescr_netcdf_Get_Keys(
+         CTAI_ObsDescr_netcdf *descr,
+         CTA_Vector *Keys,
+         int *retval)
+{ 
+#if HAVE_LIBNETCDF	
+   int i;
+   for (i=1; i<=descr->n_keys; i++)
+   {
+      *retval = CTA_Vector_SetVal(*Keys,i,&(descr->Keys[i-1]),CTA_STRING);
+   };
+
+   *retval = CTA_OK;
+
+
+#else
+   printf("Error: CTAI_ObsDescr_netcdf_Get_Keys: COSTA is compiled without NETCD suport\n");
+  *retval=CTA_NOT_IMPLEMENTED;
+#endif
+
+};
+
+void CTAI_ObsDescr_netcdf_Property_Count(
+         CTAI_ObsDescr_netcdf *descr, 
+         int *nkeys, 
+         int *retval)
+{
+#if HAVE_LIBNETCDF
+   *nkeys = descr->n_keys;
+   *retval = CTA_OK;
+#else
+   printf("Error: CTAI_ObsDescr_netcdf_Property_Count: COSTA is compiled without NETCD suport\n");
+  *retval=CTA_NOT_IMPLEMENTED;
+#endif
+};
+
+void CTAI_ObsDescr_netcdf_Observation_Count(
+         CTAI_ObsDescr_netcdf *descr, 
+         int *nobs, 
+         int *retval)
+{
+#if HAVE_LIBNETCDF
+   *nobs = descr->nmeasr;
+   *retval = CTA_OK;
+#else
+   printf("Error: CTAI_ObsDescr_netcdf_Observation_Count: COSTA is compiled without NETCD suport\n");
+  *retval=CTA_NOT_IMPLEMENTED;
+#endif
+};
+
+
+void CTAI_ObsDescr_netcdf_Get_Properties(
+         CTAI_ObsDescr_netcdf *descr, 
+         const char *Key,
+         CTA_Vector *Properties,
+         CTA_Datatype *datatype,
+         int *retval)
+{
+#if HAVE_LIBNETCDF
+
+  int ierr,ncid,varid;
+  int i, ival, dim2;
+  float *nc_values, *selected_nc_values, *nc_tot_values, *nc_values_lon,*nc_values_lat;
+  char tmpchar[20];
+  CTA_String hstr;
+
+  if (IDEBUG>0) {printf("ctai_obsdescr_netcdf_get_properties; START\n");}
+  if (IDEBUG>0) {printf("ctai_obsdescr_netcdf_get_properties: key: %s  of: |%s|  \n",Key,descr->database->dbname);}
+
+
+   // first allocate space for temporary arrays
+  dim2 = 1;
+  if (strstr(Key,"kernel")!=NULL){dim2 = descr->size_1Dproperties;}
+  if (strstr(Key,"pressure_levels")!=NULL){dim2 = descr->size_1Dproperties+1 ;}
+  nc_tot_values = CTA_Malloc(descr->nmeasr_orig * dim2 * sizeof(float));
+
+
+   nc_values = CTA_Malloc(descr->nmeasr_orig  * sizeof(float));
+   nc_values_lon = CTA_Malloc(descr->nmeasr_orig  * sizeof(float));
+   nc_values_lat = CTA_Malloc(descr->nmeasr_orig  * sizeof(float));
+   selected_nc_values = CTA_Malloc(descr->nmeasr * sizeof(float));
+
+
+  // Get the netcdf-id of the database
+   ncid = descr->database->ncid;
+
+
+  /* now obtain the desired secondary variable, given by the keyname */
+  /* For our specific OMI-netcdf-file, we need:    */
+
+  /* always: get the lon and lat */
+   ierr = nc_inq_varid(ncid, "longitude", &varid);
+   ierr = nc_get_var_float(ncid, varid, &nc_values_lon[0]);
+   ierr = nc_inq_varid(ncid, "latitude", &varid);
+   ierr = nc_get_var_float(ncid, varid, &nc_values_lat[0]);
+   if (ierr != CTA_OK)
+     {printf("Error: could not read the property lat/lon \n");
+       *retval = -1; return;
+     }
+
+  if (strcmp(Key,"NAME")==0){
+    /* this key should not be necessary, but it is naively asked by certain COSTA-routines 
+           like CTA_Util_MethodsPrintObservations */
+    for (i=0; i < descr->nmeasr_orig; i++){
+      nc_values[i] = i*1.0;
+    }
+
+} else if (strcmp(Key,"longitude")==0){
+
+    if (IDEBUG>0) {printf("ctai_obsdescr_netcdf_get_properties: LONGITUDE    \n");}
+
+    // read the lon vector. We know the size: nmeasr_orig. 
+    //    ierr = nc_inq_varid(ncid, "longitude", &varid);
+    //ierr = nc_get_var_float(ncid, varid, &nc_values[0]);
+    for (i=0; i < descr->nmeasr_orig; i++){
+      nc_values[i] = nc_values_lon[i]; }
+
+  } else if (strcmp(Key,"latitude")==0) {
+    // ierr = nc_inq_varid(ncid, "latitude", &varid);
+    //ierr = nc_get_var_float(ncid, varid, &nc_values[0]);
+    for (i=0; i < descr->nmeasr_orig; i++){
+      nc_values[i] = nc_values_lat[i]; }
+
+  } else if (strstr(Key,"pressure_levels")!=NULL) {
+    ierr = nc_inq_varid(ncid, "pressure_levels", &varid);
+    sscanf(&Key[16] ,"%d",&ival);
+    if (IDEBUG>0) {printf("Getting key %s, number: %d \n",Key,ival);}
+
+    ierr = nc_get_var_float(ncid, varid, &nc_tot_values[0]);
+
+     if (ierr != CTA_OK)
+      {printf("Error: could not read the property %s %d \n",Key,ierr);
+        *retval = -1; return;
+      }
+    for (i=0; i < descr->nmeasr_orig; i++){
+      nc_values[i] = nc_tot_values[ival-1 + (i* (descr->size_1Dproperties+1))];
+    }
+
+
+
+  } else if (strstr(Key,"kernel")!=NULL) {
+
+    ierr = nc_inq_varid(ncid, "kernel", &varid);
+    sscanf(&Key[7] ,"%d",&ival);
+    if (IDEBUG>0) {printf("Getting key %s, number: %d \n",Key,ival);}
+
+    ierr = nc_get_var_float(ncid, varid, &nc_tot_values[0]);
+
+    if (ierr != CTA_OK)
+      {printf("Error: could not read the property %s %d \n",Key,ierr);
+        *retval = -1; return;
+      }
+    for (i=0; i < descr->nmeasr_orig; i++){
+      nc_values[i] = nc_tot_values[ival-1 + (i* descr->size_1Dproperties)];
+    }
+
+
+  }else {
+    printf("Key %s  is NOT YET IMPLEMENTED!!! \n",Key);
+    *retval = CTA_NOT_IMPLEMENTED;
+    return;
+
+  }
+
+  // now fill the properties vector with the obtained values. The relation table is used to get only the selection.
+  ierr = CTA_RelTable_ApplyVal(descr->selectionReltab, nc_values,descr->nmeasr_orig,
+                                 selected_nc_values,descr->nmeasr,CTA_REAL );
+   
+  if (IDEBUG>0) {printf("ctai_obsdescr_netcdf_get_properties:ierr %d   \n",ierr);}
+  if (IDEBUG>0) {printf("selected values %f %f %d %d  \n",selected_nc_values[0], selected_nc_values[1],descr->nmeasr, *datatype);}
+
+
+
+  if (*datatype == CTA_STRING){
+    if (IDEBUG>0) {printf("cta-string-expected;workaround \n");}
+    ierr = CTA_String_Create(&hstr);
+    for (i=0; i<descr->nmeasr; i++){
+      sprintf(tmpchar,"%f",selected_nc_values[i]);
+      ierr = CTA_String_Set(hstr,tmpchar);
+      ierr=CTA_Vector_SetVal(*Properties,i+1,&hstr, CTA_STRING);
+    }
+    ierr = CTA_String_Free(&hstr);
+  } else {
+
+    ierr=CTA_Vector_SetVals(*Properties,selected_nc_values, descr->nmeasr, CTA_REAL);
+  }
+
+  if (IDEBUG>0) {printf("ctai_obsdescr_netcdf_get_properties:setvals %d   \n",ierr);}
+
+  if (ierr != CTA_OK)
+    {printf("Error: could not fill the property vector %s \n",Key);
+      *retval = -1; return;
+    }
+
+  free(nc_values);
+  free(nc_values_lon);
+  free(nc_values_lat);
+  free(selected_nc_values);
+  free(nc_tot_values);
+
+  if (*retval!=CTA_OK) return;
+
+  *retval = CTA_OK;
+#else
+   printf("CTAI_ObsDescr_netcdf_Get_Properties :Version is compiled without NETCDF support.\n");
+   *retval=CTA_NOT_IMPLEMENTED;
+#endif
+
+};
+
+
+
+
+
+void CTAI_ObsDescr_netcdf_Free(
+         CTAI_ObsDescr_netcdf *descr, 
+         int *retval)
+{
+#if HAVE_LIBNETCDF
+  int i, ierr;
+ 
+
+   descr->database->nusers--;
+
+   //  printf("cta_obsdescr_free (netcdf): now %d users remaining of database %s \n",descr->database->nusers, descr->database->dbname);
+
+    if (descr->database->nusers==0)
+    {
+
+    if ((ierr = nc_close(descr->database->ncid))){
+        printf("CTA_Sobs_netcdf_Free: cannot close netCDF-file: %d\n",
+               nc_strerror(ierr));
+    }
+    free(descr->database->dbname);
+    free(descr->database);
+
+    }
+
+
+ 
+  for (i=0; i<descr->n_keys; i++){ CTA_String_Free(&(descr->Keys[i]));}
+  free(descr->Keys);
+
+  *retval = CTA_OK;
+#else
+   printf("CTAI_ObsDescr_netcdf_Get_Properties :Version is compiled without NETCDF support.\n");
+   *retval=CTA_NOT_IMPLEMENTED;
+#endif
+}
+
+
+void CTA_ObsDescr_netcdf_initialise(CTA_ObsDescrClass *hobsdescrcl)
+{
+   CTA_Intf hintf=0;
+   CTA_Func h_func[I_CTA_OBSDESCR_NUMFUNC];
+
+   // The vector h_func is filled with COSTA-function handles of the 
+   // implementations in this file.
+   CTA_Func_Create(" ",&CTAI_ObsDescr_netcdf_Create_Size,    hintf,
+                            &h_func[I_CTA_OBSDESCR_CREATE_SIZE]);
+   CTA_Func_Create(" ",&CTAI_ObsDescr_netcdf_Create_Init,    hintf,
+                            &h_func[I_CTA_OBSDESCR_CREATE_INIT]);
+   CTA_Func_Create(" ",&CTAI_ObsDescr_netcdf_Property_Count,    hintf,
+                       &h_func[I_CTA_OBSDESCR_COUNT_PROPERTIES]);
+   CTA_Func_Create(" ",&CTAI_ObsDescr_netcdf_Get_Properties,    hintf,
+                       &h_func[I_CTA_OBSDESCR_GET_PROPERTIES]);
+   CTA_Func_Create(" ",&CTAI_ObsDescr_netcdf_Observation_Count, hintf,
+                       &h_func[I_CTA_OBSDESCR_COUNT_OBSERVATIONS]);
+   CTA_Func_Create(" ",&CTAI_ObsDescr_netcdf_Get_Keys,    hintf,
+                       &h_func[I_CTA_OBSDESCR_GET_KEYS]);
+   CTA_Func_Create(" ",&CTAI_ObsDescr_netcdf_Free,    hintf,
+                       &h_func[I_CTA_OBSDESCR_FREE]);
+   CTA_Func_Create(" ",&CTAI_ObsDescr_netcdf_CreateSel,    hintf,
+                       &h_func[I_CTA_OBSDESCR_SELECTION]);
+   CTA_ObsDescr_DefineClass("cta_obsdescr_netcdf",h_func,hobsdescrcl);
+
+}
+
+
+
diff --git a/costa/native/cta/src/cta_obsdescr_sqlite3.c b/costa/native/cta/src/cta_obsdescr_sqlite3.c
new file mode 100644
index 000000000..e7c90aab6
--- /dev/null
+++ b/costa/native/cta/src/cta_obsdescr_sqlite3.c
@@ -0,0 +1,564 @@
+/*
+$URL: https://repos.deltares.nl/repos/openda/public/trunk/core/native/src/cta/cta_obsdescr_sqlite3.c $
+$Revision: 3361 $, $Date: 2012-07-04 16:52:30 +0200 (Wed, 04 Jul 2012) $
+
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+#include <string.h>
+#include <stdio.h>
+#include <math.h>
+#include <memory.h>
+#include "cta_mem.h"
+#include "cta_file.h"
+#include "cta_errors.h"
+#include "cta_string.h"
+#include "cta_util_sqlite3.h"
+#include "cta_obsdescr_sqlite3.h"
+#include "cta_defaults.h"
+
+#define IDEBUG (0)
+/*
+   Fills the relation table for the elements in hobsdescr2 that are also in 
+   hobsdescr1.
+
+   Note hobsdescr2 MUST be a subset of hobsdescr1!
+
+*/
+int CTAI_ObsDescr_CreateRelTable(CTA_ObsDescr hobsdescr1, CTA_ObsDescr hobsdescr2,
+                             CTA_RelTable reltab){
+   int nobs1, nobs2;
+   CTA_Vector vtime1, vtime2, vid1, vid2, vselect;
+   int ilo, iup, imid, i2;
+   double t1, t2;
+   int id1, id2;
+   int retval;
+   BOOL found;
+
+   /* Count number of observations */
+   CTA_ObsDescr_Observation_Count(hobsdescr1,&nobs1);
+   CTA_ObsDescr_Observation_Count(hobsdescr2,&nobs2);
+
+   /* Check whether nobs1>=nobs2 since hobsdecr2 must be a subset of
+    * hobsdecr1
+    */
+   if (nobs1<nobs2){
+      return CTA_INTERNAL_ERROR;
+   }
+
+   /* Allocate arrays for holding primary keys */
+   CTA_Vector_Create(CTA_DEFAULT_VECTOR, nobs1, CTA_DOUBLE,  CTA_NULL, &vtime1);
+   CTA_Vector_Create(CTA_DEFAULT_VECTOR, nobs2, CTA_DOUBLE,  CTA_NULL, &vtime2);
+   CTA_Vector_Create(CTA_DEFAULT_VECTOR, nobs1, CTA_INTEGER, CTA_NULL, &vid1);
+   CTA_Vector_Create(CTA_DEFAULT_VECTOR, nobs2, CTA_INTEGER, CTA_NULL, &vid2);
+   CTA_Vector_Create(CTA_DEFAULT_VECTOR, nobs2, CTA_INTEGER, CTA_NULL, &vselect);
+
+   /* Get Time and ID from both */
+   CTA_ObsDescr_Get_ValueProperties(hobsdescr1, "TIME", vtime1, CTA_DOUBLE);
+   CTA_ObsDescr_Get_ValueProperties(hobsdescr2, "TIME", vtime2, CTA_DOUBLE);
+   CTA_ObsDescr_Get_ValueProperties(hobsdescr1, "STATION_ID",vid1,CTA_INTEGER);
+   CTA_ObsDescr_Get_ValueProperties(hobsdescr2, "STATION_ID",vid2,CTA_INTEGER);
+
+   /* now we have to figure out which elements from hobsdescr1 are also
+    * present in hobsdescr2                                           */
+
+    /* Note we know that elements are sorted first by time then by id! */
+    for (i2=1;i2<=nobs2;i2++){
+       CTA_Vector_GetVal(vtime2, i2, &t2,  CTA_DOUBLE);
+       CTA_Vector_GetVal(vid2,   i2, &id2, CTA_INTEGER);
+
+       if (IDEBUG) printf("==>looking for %f, %d\n",t2,id2);
+
+       /* Use bi-section in order to find the elements */
+       ilo=1;
+       iup=nobs1;
+       if (IDEBUG) printf("ilo=%d, iup=%d \n",ilo,iup);
+
+       found=FALSE;
+       for (;;){
+          imid=(iup+ilo)/2;
+          CTA_Vector_GetVal(vtime1, imid, &t1,  CTA_DOUBLE);
+          CTA_Vector_GetVal(vid1,   imid, &id1, CTA_INTEGER);
+          if (IDEBUG) printf("imid= %d found %f, %d\n",imid, t1,id1);
+          if (t1==t2 && id1==id2){
+             found=TRUE;
+             break;
+          }
+     
+          /* exit loop when element is not there :*/
+          if(ilo==iup){break;}  /*Ony one element there */
+
+          /* check whether element (t1,id1)<(t2,id2) and
+           * set new values for ilo and iup
+           */
+          if (t1<t2 || (t1==t2 && id1<id2)) {
+             if (ilo==imid) {ilo=iup;} else {ilo=imid;}
+          } else {
+             if (iup==imid) {iup=ilo;} else {iup=imid;}
+          }
+       }
+       if (!found){
+          /* INTERNAL ERROR */
+          /* nicely Free memory */
+          CTA_Vector_Free(&vtime1);
+          CTA_Vector_Free(&vtime2);
+          CTA_Vector_Free(&vid1);
+          CTA_Vector_Free(&vid2);
+          CTA_Vector_Free(&vselect);
+          return CTA_INTERNAL_ERROR;
+       }
+       CTA_Vector_SetVal(vselect, i2, &imid, CTA_INTEGER);
+    }
+    retval=CTA_RelTable_SetSelect(reltab, vselect);
+   
+   /* Free memory */
+   CTA_Vector_Free(&vtime1);
+   CTA_Vector_Free(&vtime2);
+   CTA_Vector_Free(&vid1);
+   CTA_Vector_Free(&vid2);
+   CTA_Vector_Free(&vselect);
+
+   return retval;
+}
+
+
+
+void CTA_ObsDescr_sqlite3_initialise(CTA_ObsDescrClass *hobsdescrcl)
+{
+   CTA_Intf hintf=0;
+   CTA_Func h_func[I_CTA_OBSDESCR_NUMFUNC];
+
+   // The vector h_func is filled with COSTA-function handles of the 
+   // implementations in this file.
+   CTA_Func_Create(" ",&CTAI_ObsDescr_sqlite3_Create_Size,    hintf,
+                       &h_func[I_CTA_OBSDESCR_CREATE_SIZE]);
+   CTA_Func_Create(" ",&CTAI_ObsDescr_sqlite3_Create_Init,    hintf,
+                       &h_func[I_CTA_OBSDESCR_CREATE_INIT]);
+   CTA_Func_Create(" ",&CTAI_ObsDescr_sqlite3_Property_Count,    hintf,
+                       &h_func[I_CTA_OBSDESCR_COUNT_PROPERTIES]);
+   CTA_Func_Create(" ",&CTAI_ObsDescr_sqlite3_Get_Properties,    hintf,
+                       &h_func[I_CTA_OBSDESCR_GET_PROPERTIES]);
+   CTA_Func_Create(" ",&CTAI_ObsDescr_sqlite3_Observation_Count, hintf,
+                       &h_func[I_CTA_OBSDESCR_COUNT_OBSERVATIONS]);
+   CTA_Func_Create(" ",&CTAI_ObsDescr_sqlite3_Get_Keys,    hintf,
+                       &h_func[I_CTA_OBSDESCR_GET_KEYS]);
+   CTA_Func_Create(" ",&CTAI_ObsDescr_sqlite3_Free,    hintf,
+                       &h_func[I_CTA_OBSDESCR_FREE]);
+   CTA_Func_Create(" ",&CTAI_ObsDescr_sqlite3_CreateSel,    hintf,
+                       &h_func[I_CTA_OBSDESCR_SELECTION]);
+   CTA_ObsDescr_DefineClass("cta_obsdescr_sqlite3",h_func,hobsdescrcl);
+
+}
+
+
+
+void CTAI_ObsDescr_sqlite3_Create_Size(
+   // OUTPUTS:
+      int *memsize,           // The number of bytes which are necessary to 
+                              //     store one CTAI_SObs_sqlite3, with a 
+                              //     pointer to the contents (data), 
+                              //     but without the contents themelves.
+      int *retval             // error code (see cta_datatypes.h for possible 
+                              //     error codes)
+   ){
+//   *memsize=(int) sizeof(CTAI_SObs_sqlite3);
+   *memsize=(int) sizeof(CTAI_ObsDescr_sqlite3);
+   *retval=CTA_OK;
+};
+
+void CTAI_ObsDescr_sqlite3_Create_Init(
+/*
+   Allocate the memory which is necesary to store the data necessary for a
+   sqlite3-observer
+*/
+     // INPUT:
+      CTA_ObsDescr *myhandle,  /* Handle assigned by COSTA */
+     // IN-OUTPUTS
+        CTAI_ObsDescr_sqlite3 *descr,// The sqlite3-observation description
+                                 //     for which the memory must be
+                                 //     allocated
+     // INPUTS:
+        CTA_Handle *usrdat, // User data: database-name
+     // OUTPUTS
+        int *retval)             // Error code.
+{
+
+   CTAI_SObs_sqlite3 *sobs; // User data: database-name
+
+   descr->myhandle=*myhandle;
+
+   *retval= CTA_Handle_GetData(*usrdat, (void **) &sobs);
+   if (*retval!=CTA_OK) return;
+
+   // Link the data base to this descriptor; increase number of users
+   descr->database = sobs->database;
+   (descr->database->nusers)++;
+
+   // Copy the conditionCTA_RelTable *hreltable
+   descr->condition = CTA_Malloc(sizeof(char)*(1+strlen(sobs->condition)));
+   strcpy(descr->condition,sobs->condition);
+
+   // Store the keys
+   *retval = CTAI_util_sqlite3_return_keys(
+                  &(descr->n_keys),
+                  &(descr->Keys),
+                  descr->database->db,
+                  descr->condition);
+
+   if (*retval != CTA_OK) return;
+
+   // Copy the dimension of the observer
+   descr->nmeasr = sobs->nmeasr;
+   *retval=CTA_OK;
+};
+
+void CTAI_ObsDescr_sqlite3_CreateSel(CTAI_ObsDescr_sqlite3 *descr,
+          CTA_String *selection, CTA_RelTable *reltab, 
+          CTA_ObsDescr *myhandle_out, 
+          CTAI_ObsDescr_sqlite3 *descrout, int *retval){
+
+
+   int i,len;
+   char *condition;
+
+   // Get the condition
+   // Allocate a name-string 
+   *retval = CTA_String_GetLength(*selection, &len);
+   if (*retval!=CTA_OK) return;
+
+   // Get the condition
+   condition = CTA_Malloc((len+1)*sizeof(char));
+   *retval = CTA_String_Get(*selection, condition);
+   if (IDEBUG>0) printf("CTAI_ObsDescr_sqlite3_CreateSel: given    condition is '%s'\n",condition);
+   if (IDEBUG>0) printf("CTAI_ObsDescr_sqlite3_CreateSel: Internal condition is '%s'\n",descr->condition);
+   if (*retval!=CTA_OK) return;
+
+   // Set handle of new observation description
+   descrout->myhandle=*myhandle_out;
+
+   // Link the database also to this observer and keep track of the number of 
+   // observers using this database
+   descrout->database = descr->database;
+   descrout->database->nusers++;
+
+   // Combine the input-condition and the condition of the input observer.
+   if (strcmp(descr->condition,"")==0)
+   {
+      descrout->condition = condition;
+   }
+   else
+   {
+      descrout->condition = CTA_Malloc(sizeof(char)*
+                                  (    strlen(condition) 
+                                    +  strlen(descr->condition)
+                                    +  strlen("  (  ) AND (  )  ")
+                                ) );
+      sprintf(descrout->condition,"(%s) AND (%s)",condition,
+              descr->condition);
+      free(condition);
+   }
+
+   // copy number of keys
+   descrout->n_keys=descr->n_keys;
+
+   // Count the measurements in this observer
+   *retval = CTAI_util_sqlite3_select_values(
+       &(descrout->nmeasr), 1, CTA_INTEGER, descrout->database->db,
+       "count(stations.station_id)",descrout->condition);
+
+   // copy the Keys 
+   descrout->Keys=CTA_Malloc(descr->n_keys*sizeof(CTA_String));
+   for (i=0;i<descr->n_keys;i++){
+      *retval=CTA_String_Duplicate(descr->Keys[i],&descrout->Keys[i]);
+      if (*retval!=CTA_OK) return;
+   }
+   
+   if (*reltab!=CTA_NULL) {
+      *retval=CTAI_ObsDescr_CreateRelTable(descr->myhandle,
+                                              descrout->myhandle, *reltab);
+   } else {
+      *retval=CTA_OK;
+   }
+}
+         
+
+
+void CTAI_ObsDescr_sqlite3_Get_Keys(
+         CTAI_ObsDescr_sqlite3 *descr,
+         CTA_Vector *Keys,
+         int *retval)
+{
+   int i;
+   for (i=1; i<=descr->n_keys; i++)
+   {
+      *retval = CTA_Vector_SetVal(*Keys,i,&(descr->Keys[i-1]),CTA_STRING);
+   };
+
+   *retval = CTA_OK;
+};
+
+void CTAI_ObsDescr_sqlite3_Property_Count(
+         CTAI_ObsDescr_sqlite3 *descr, 
+         int *nkeys, 
+         int *retval)
+{
+   *nkeys = descr->n_keys;
+   *retval = CTA_OK;
+};
+
+void CTAI_ObsDescr_sqlite3_Observation_Count(
+         CTAI_ObsDescr_sqlite3 *descr, 
+         int *nobs, 
+         int *retval)
+{
+   *nobs = descr->nmeasr;
+   *retval = CTA_OK;
+};
+
+
+BOOL isNaN(double x)
+{
+   return (!(x>0 || x<=0));
+}
+
+void CTAI_ObsDescr_sqlite3_Get_Properties(
+         CTAI_ObsDescr_sqlite3 *descr, 
+         const char *Key,
+         CTA_Vector *Properties,
+         CTA_Datatype *datatype,
+         int *retval)
+{
+    // Find the key among the key names
+    int i;
+    int len;
+    BOOL found=0;
+    int isize;
+    void * StatProp;
+    void * TimeProp;
+    char *KeyLong;
+    int retStat;
+    int retTime;
+    void * PropArray;
+
+
+    for (i=0; !found && i<descr->n_keys; i++)
+    {
+       char *str;
+       *retval = CTA_String_GetLength(descr->Keys[i],&len);
+       if (*retval!=CTA_OK) return;
+
+       str=CTA_Malloc((len+1)*sizeof(char));
+       *retval = CTA_String_Get(descr->Keys[i],str);
+       if (*retval!=CTA_OK){
+          free(str);
+          return;
+       }
+       found = strcmp(Key,str)==0;
+       free(str);
+    }
+    i--;
+
+    // Error if the key was not found
+    if (!found) {*retval= CTA_ITEM_NOT_FOUND; return;}
+
+    // Declare an array for the properties
+    if     (*datatype == CTA_INTEGER) {isize = sizeof(int);}
+    else if(*datatype == CTA_REAL)    {isize = sizeof(float);}
+    else if(*datatype == CTA_DOUBLE)  {isize = sizeof(double);}
+    else if(*datatype == CTA_STRING)  {isize = sizeof(CTA_String);}
+    else   {*retval = CTA_ILLEGAL_DATATYPE; return;}
+
+    StatProp = CTA_Malloc(isize*descr->nmeasr);
+    TimeProp = CTA_Malloc(isize*descr->nmeasr);
+
+    // In case of String-output, create the strings
+    if (*datatype == CTA_STRING)
+    {
+       CTA_String * PropStr=StatProp;
+       for (i=0; i<descr->nmeasr; i++)
+       {
+         *retval = CTA_String_Create(&PropStr[i]);
+         if (*retval != CTA_OK) return;
+       }
+
+       PropStr=TimeProp;
+       for (i=0; i<descr->nmeasr; i++)
+       {
+         *retval = CTA_String_Create(&PropStr[i]);
+         if (*retval != CTA_OK) return;
+       }
+    }
+
+
+    // Get the properties from the stationary information
+    KeyLong=CTA_Malloc((strlen(Key)+strlen("stations.")+1));
+    sprintf(KeyLong,"stations.%s",Key);
+    retStat = CTAI_util_sqlite3_select_values(
+              StatProp, descr->nmeasr, *datatype,
+              descr->database->db, KeyLong, descr->condition);
+
+    // Get the properties from the time dependent information
+    sprintf(KeyLong,"data.%s",Key);
+    retTime = CTAI_util_sqlite3_select_values(
+              TimeProp, descr->nmeasr, *datatype,
+              descr->database->db, KeyLong, descr->condition);
+
+    free(KeyLong);
+    if (retStat != CTA_OK && retTime != CTA_OK) {*retval = retStat; return;}
+
+    // Merge the two answers
+    
+    if (retStat == CTA_OK && retTime == CTA_OK)
+    {
+    // Column found in stationary and in time-dependent information
+
+       // Supplement the Stationary data array with instationary data, where
+       // these are available. Check that the stationary and instationary data do
+       // not conflict.
+       if (*datatype == CTA_DOUBLE)
+       {
+          double * dStatProp = StatProp;
+          double * dTimeProp = TimeProp;
+          for (i=0; i<descr->nmeasr; i++)
+          {
+             if (isNaN(dStatProp[i]) && !isNaN(dTimeProp[i]))
+                { dStatProp[i] = dTimeProp[i]; }
+             else if ( !isNaN(dStatProp[i]) && !isNaN(dTimeProp[i]) &&
+                        dStatProp[i] != dTimeProp[i] )
+                { *retval = CTA_INCOMPATIBLE_VECTORS; return;}
+          }
+
+       }
+       else if (*datatype == CTA_INTEGER)
+       {
+          int * iStatProp = StatProp;
+          int * iTimeProp = TimeProp;
+          int iNAN =-987654;
+          for (i=0; i<descr->nmeasr; i++)
+          {
+             if (iStatProp[i] == iNAN && iTimeProp[i] != iNAN)
+                { iStatProp[i] = iTimeProp[i]; }
+             else if (iStatProp[i] != iNAN && iTimeProp[i] != iNAN &&
+                      iStatProp[i] != iTimeProp[i] )
+                { *retval = CTA_INCOMPATIBLE_VECTORS; return;}
+          }
+       }
+       else if (*datatype == CTA_STRING)
+       {
+          CTA_String * sStatProp = StatProp;
+          CTA_String * sTimeProp = TimeProp;
+          for (i=0; i<descr->nmeasr; i++)
+          {
+             int statlen, timelen;
+             *retval = CTA_String_GetLength(sStatProp[i],&statlen);
+             if (*retval != CTA_OK) return;
+             *retval = CTA_String_GetLength(sTimeProp[i],&timelen);
+             if (*retval != CTA_OK) return;
+
+             if (statlen == 0 && timelen != 0)
+             { 
+                 CTA_String help = sStatProp[i];
+                 sStatProp[i] = sTimeProp[i]; 
+                 sTimeProp[i] = help;
+             }
+             else if (statlen != 0 && timelen != 0 )
+             {
+                char *statstr;
+                char *timestr;
+                if (statlen != timelen)   
+                   { *retval = CTA_INCOMPATIBLE_VECTORS; return;}
+
+                statstr=CTA_Malloc((statlen+1)*sizeof(char));
+                timestr=CTA_Malloc((timelen+1)*sizeof(char));
+                *retval = CTA_String_Get(sStatProp[i],statstr);
+                if (*retval != CTA_OK) return;
+                *retval = CTA_String_Get(sTimeProp[i],timestr);
+                if (*retval != CTA_OK) return;
+                if (strcmp(statstr,timestr) != 0)
+                   { *retval = CTA_INCOMPATIBLE_VECTORS; return;}
+                free(statstr);
+                free(timestr);
+             } 
+          }
+       }
+       else
+       {  
+          *retval = CTA_ILLEGAL_DATATYPE; return;
+       }
+
+       PropArray = StatProp;
+    }
+    else if (retStat == CTA_OK)
+    {
+       PropArray = StatProp;
+    }
+    else 
+    {
+       PropArray = TimeProp;
+    }
+    *retval = CTA_Vector_SetVals(*Properties, PropArray, descr->nmeasr, *datatype);
+    if (*retval != CTA_OK) return;
+
+
+    // In case of String-output, erase the strings
+    if (*datatype == CTA_STRING)
+    {
+       CTA_String * PropStr=StatProp;
+       for (i=0; i<descr->nmeasr; i++)
+       {
+         *retval = CTA_String_Free(&PropStr[i]);
+         if (*retval != CTA_OK) return;
+       }
+       PropStr=TimeProp;
+       for (i=0; i<descr->nmeasr; i++)
+       {
+         *retval = CTA_String_Free(&PropStr[i]);
+         if (*retval != CTA_OK) return;
+       }
+    }
+
+   free(StatProp);
+   free(TimeProp); 
+   *retval = CTA_OK;
+}
+
+
+void CTAI_ObsDescr_sqlite3_Free(
+         CTAI_ObsDescr_sqlite3 *descr, 
+         int *retval)
+{
+   int i;
+   
+   descr->database->nusers--;
+    if (descr->database->nusers==0)
+    {
+
+       sqlite3_close(descr->database->db);
+       free(descr->database->name);
+       free(descr->database);
+
+    }
+    free(descr->condition);
+
+    for (i=0; i<descr->n_keys; i++){ CTA_String_Free(&(descr->Keys[i]));}
+    free(descr->Keys);
+
+
+    *retval = CTA_OK;
+}
+
diff --git a/costa/native/cta/src/cta_obsdescr_table.c b/costa/native/cta/src/cta_obsdescr_table.c
new file mode 100644
index 000000000..872567f04
--- /dev/null
+++ b/costa/native/cta/src/cta_obsdescr_table.c
@@ -0,0 +1,611 @@
+/*
+$URL: https://repos.deltares.nl/repos/openda/public/trunk/core/native/src/cta/cta_obsdescr_table.c $
+$Revision: 3407 $, $Date: 2012-08-17 13:50:50 +0200 (Fri, 17 Aug 2012) $
+ 
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+#define IDEBUG (0)
+
+#include <string.h>
+#include <ctype.h>
+#include <stdio.h>
+#include <math.h>
+#include <memory.h>
+#include "cta.h"
+#include "cta_mem.h"
+
+typedef struct {
+int nkeys;
+int nmeasr;
+char **Keys;
+char ***Columns;
+} CTAI_ObsDescr_table;
+
+
+void CTAI_ObsDescr_table_Create_Size(
+   // OUTPUTS:
+      int *memsize,           // The number of bytes which are necessary to 
+                              //     store one CTAI_SObs_sqlite3, with a 
+                              //     pointer to the contents (data), 
+                              //     but without the contents themselves.
+      int *retval             // error code (see cta_datatypes.h for possible 
+                              //     error codes)
+   ){
+   *memsize=(int) sizeof(CTAI_ObsDescr_table);
+   *retval=CTA_OK;
+};
+
+void CTAI_ObsDescr_table_Create_Init(
+/*
+   Allocate the memory which is necessary to store the data necessary for a
+   table-observer
+*/
+     // INPUT:
+      CTA_ObsDescr myhandle,  /* Handle assigned by COSTA */
+     // IN-OUTPUTS
+        CTAI_ObsDescr_table *descr,// The table-observation description
+                                 //     for which the memory must be
+                                 //     allocated
+     // INPUTS:
+        CTA_Handle          *usrdat, // User data
+     // OUTPUTS
+        int *retval)             // Error code.
+{
+   CTA_Datatype datatype;
+   int ikey, imeasr, nkeys, nmeasr, lenkey, lencol, len;
+   CTA_Vector vKeys, vCol;
+   CTA_String sKey;
+   CTA_String sCol;
+   const char *key;
+
+   if (IDEBUG>0) printf("CTAI_ObsDescr_table_Create_Init: Start handle=%d\n",*usrdat);
+   /* Check input we only support a pack array input and an observation
+    * description component */
+   *retval=CTA_Handle_GetDatatype(*usrdat, &datatype);
+   if (datatype==CTA_PACK) {
+      
+      /* Get number of keys and number of measurements */
+      *retval=CTA_Pack_Get(*usrdat,&nkeys,sizeof(int));
+      if (*retval!=CTA_OK) return;
+      if (IDEBUG>0) printf("CTAI_ObsDescr_table_Create_Init: nkeys=%d\n",nkeys);
+      *retval=CTA_Pack_Get(*usrdat,&nmeasr,sizeof(int));
+      if (IDEBUG>0) printf("CTAI_ObsDescr_table_Create_Init: nmeasr=%d\n",nmeasr);
+      if (*retval!=CTA_OK) return;
+   } else if (datatype==CTA_OBSDESCR) {
+      if (IDEBUG>0) printf("CTAI_ObsDescr_table_Create_Init: datatype==CTA_OBSDESCR\n");
+      /* Get number of keys and number of measurements */
+      *retval=CTA_ObsDescr_Property_Count(*usrdat, &nkeys);
+      if (IDEBUG>0) printf("CTAI_ObsDescr_table_Create_Init: nkeys=%d ierr=%d\n",nkeys, *retval);
+      if (*retval!=CTA_OK) return;
+      *retval=CTA_ObsDescr_Observation_Count(*usrdat, &nmeasr);
+      if (IDEBUG>0) printf("CTAI_ObsDescr_table_Create_Init: nmeasr=%d ierr=%d\n",nmeasr, *retval);
+      if (*retval!=CTA_OK) return;
+   } else {
+      *retval=CTA_NOT_IMPLEMENTED;
+      return;
+   }
+   /* Allocate arrays */
+   descr->Keys=CTA_Malloc(nkeys*sizeof(char*));
+   descr->Columns=CTA_Malloc(nkeys*sizeof(char**));
+
+   for (ikey=0;ikey<nkeys;ikey++) {
+      descr->Columns[ikey]=CTA_Malloc(nmeasr*sizeof(char*));
+   }
+   /* Nullify the strings */
+   for (ikey=0;ikey<nkeys;ikey++) {
+      descr->Keys[ikey]=NULL;
+      for (imeasr=0;imeasr<nmeasr;imeasr++) {
+         descr->Columns[ikey][imeasr]=NULL;
+      }
+   }
+   /* Set nkeys and nmeasr */
+   descr->nkeys=nkeys;
+   descr->nmeasr=nmeasr;
+ 
+   if (IDEBUG>0) printf("CTAI_ObsDescr_table_Create_Init: datatype=%d \n",datatype);
+ 
+  
+   if (datatype==CTA_PACK) {
+      /* Unpack all keys */
+     if (IDEBUG>0) printf("CTAI_ObsDescr_table_Create_Init: unpack-keys\n");
+      for (ikey=0; ikey<descr->nkeys; ikey++){
+         /* unpack stringlength */
+         *retval=CTA_Pack_Get(*usrdat,&len,sizeof(int));
+         if (*retval!=CTA_OK) return;
+         /* pack string */
+         descr->Keys[ikey]=CTA_Malloc(len*sizeof(char));
+         *retval=CTA_Pack_Get(*usrdat,descr->Keys[ikey],len*sizeof(char));
+         if (*retval!=CTA_OK) return;
+         if (IDEBUG>0) printf("CTAI_ObsDescr_table_Create_Init: Key[%d]=%s\n",
+                              ikey, descr->Keys[ikey]);
+      }
+
+      /* Unpack all Columns */
+      for (ikey=0; ikey<descr->nkeys; ikey++){
+         for (imeasr=0; imeasr<descr->nmeasr; imeasr++){
+            if (IDEBUG>0) printf("CTAI_ObsDescr_table_Create_Init\n");
+            /* pack stringlength */
+            *retval=CTA_Pack_Get(*usrdat,&len,sizeof(int));
+            if (*retval!=CTA_OK) return;
+            /* pack string */
+            descr->Columns[ikey][imeasr]=CTA_Malloc(len*sizeof(char));
+            *retval=CTA_Pack_Get(*usrdat,descr->Columns[ikey][imeasr],
+                                len*sizeof(char));
+            if (*retval!=CTA_OK) return;
+            if (IDEBUG>0) printf("CTAI_ObsDescr_table_Create_Init: Val[%d][%d]=%s\n",
+                                 ikey,imeasr,descr->Columns[ikey][imeasr]);
+         }
+      }
+
+
+
+
+
+   } else if (datatype==CTA_OBSDESCR) {
+      
+
+      if (IDEBUG>0) {printf("CTAI_ObsDescr_table_Create_Init: start \n");}
+
+      /* Allocate vectors for getting out the information */
+
+      *retval=CTA_Vector_Create(CTA_DEFAULT_VECTOR, nkeys,  CTA_STRING, CTA_NULL, &vKeys);
+      if (*retval!=CTA_OK) return;
+      *retval=CTA_Vector_Create(CTA_DEFAULT_VECTOR, nmeasr, CTA_STRING, CTA_NULL, &vCol);
+      if (*retval!=CTA_OK) return;
+ 
+       *retval=CTA_String_Create(&sKey);
+       if (*retval!=CTA_OK) return;
+       *retval=CTA_String_Create(&sCol);
+       if (*retval!=CTA_OK) return;
+       
+       /* Get all Keys */
+       *retval=CTA_ObsDescr_Get_PropertyKeys(*usrdat, vKeys);
+       if (*retval!=CTA_OK) return;
+       
+       /* For all keys */
+       for (ikey=0; ikey<nkeys; ikey++) {
+          /* Copy Key into local administration */
+          *retval=CTA_Vector_GetVal(vKeys, ikey+1, &sKey, CTA_STRING);
+          if (*retval!=CTA_OK) return;
+          *retval=CTA_String_GetLength(sKey,&lenkey);
+          if (*retval!=CTA_OK) return;
+          descr->Keys[ikey]=CTA_Malloc((lenkey+1)*sizeof(char));
+          *retval=CTA_String_Get(sKey,descr->Keys[ikey]);
+          if (*retval!=CTA_OK) return;
+       
+          /* Get the properties of this key */
+          key=CTAI_String_GetPtr(sKey);
+          if (IDEBUG>0) {printf("CTAI_ObsDescr_table_Create_Init: get data for key '%s'\n",key);}
+          // NOTE this is were performance goes lost!
+          *retval=CTA_ObsDescr_Get_ValueProperties(*usrdat, key, vCol, CTA_STRING);
+          if (*retval!=CTA_OK) return;
+//          printf("done\n");
+       
+          /* For all measurement */
+          for (imeasr=0; imeasr<nmeasr; imeasr++) {
+             *retval=CTA_Vector_GetVal(vCol, imeasr+1, &sCol, CTA_STRING);
+             if (*retval!=CTA_OK) return;
+             *retval=CTA_String_GetLength(sCol,&lencol);
+             if (*retval!=CTA_OK) return;
+                 descr->Columns[ikey][imeasr]=CTA_Malloc((lencol+1)*sizeof(char));
+                 *retval=CTA_String_Get(sCol,descr->Columns[ikey][imeasr]);
+                if (IDEBUG>0) printf("CTAI_ObsDescr_table_Create_Init: Val[%d][%d]=%s\n",
+                                 ikey,imeasr,descr->Columns[ikey][imeasr]);  
+             if (*retval!=CTA_OK) return;
+          }
+       }
+       
+      /* Deallocate work variables */
+      CTA_Vector_Free(&vKeys);
+      CTA_Vector_Free(&vCol);
+      CTA_String_Free(&sKey);
+      CTA_String_Free(&sCol);
+   } else {
+      *retval=CTA_NOT_IMPLEMENTED;
+      return;
+   }
+
+   *retval=CTA_OK;
+};
+
+void CTAI_ObsDescr_table_Get_Keys(
+         CTAI_ObsDescr_table *descr,
+         CTA_Vector *Keys,
+         int *retval)
+{
+   int i;
+   CTA_String hlpstr;
+
+   CTA_String_Create(&hlpstr);
+   for (i=1; i<=descr->nkeys; i++)
+   {  
+      CTA_String_Set(hlpstr,descr->Keys[i-1]);     
+      *retval = CTA_Vector_SetVal(*Keys,i,&hlpstr,CTA_STRING);
+       if (IDEBUG>0) {printf("CTAI_ObsDescr_table_Get_Keys: set key '%s' ierr=%d",descr->Keys[i-1],*retval);}
+   };
+   CTA_String_Free(&hlpstr);
+
+   *retval = CTA_OK;
+};
+
+void CTAI_ObsDescr_table_Property_Count(
+         CTAI_ObsDescr_table *descr, 
+         int *nkeys, 
+         int *retval)
+{
+   *nkeys = descr->nkeys;
+   *retval = CTA_OK;
+};
+
+void CTAI_ObsDescr_table_Observation_Count(
+         CTAI_ObsDescr_table *descr, 
+         int *nobs, 
+         int *retval)
+{
+   *nobs = descr->nmeasr;
+   *retval = CTA_OK;
+};
+
+
+void CTAI_ObsDescr_table_Get_Properties(
+         CTAI_ObsDescr_table *descr, 
+         const char *Key,
+         CTA_Vector *Properties,
+         CTA_Datatype *datatype,
+         int *retval)
+{
+   // Find the key among the key names
+   BOOL found=0;
+   int isize, ikey, imeasr;
+   void *vals;
+   int  *ivals;
+   float *rvals;
+   double *dvals;
+   CTA_String str;
+   
+   
+
+   for (ikey=0; !found && ikey<descr->nkeys; ikey++)
+   {
+      found = strcmp(Key,descr->Keys[ikey])==0;
+   }
+   ikey--;
+
+   // Error if the key was not found
+   if (!found) {*retval= CTA_ITEM_NOT_FOUND; return;}
+
+   // Declare an array for the properties
+   if     (*datatype == CTA_INTEGER) {isize = sizeof(int);}
+   else if(*datatype == CTA_REAL)    {isize = sizeof(float);}
+   else if(*datatype == CTA_DOUBLE)  {isize = sizeof(double);}
+   else if(*datatype == CTA_STRING)  {isize = 0;}
+   else   {*retval = CTA_ILLEGAL_DATATYPE; return;}
+
+   vals=NULL;
+   if (isize>0) {
+      isize=isize*descr->nmeasr;
+      vals=CTA_Malloc(isize);
+   } else if (*datatype == CTA_STRING) {
+      CTA_String_Create(&str);
+   }
+
+
+   ivals=vals;
+   rvals=vals;
+   dvals=vals;
+
+   for (imeasr=0;imeasr<descr->nmeasr;imeasr++){
+
+      if (*datatype == CTA_INTEGER) {
+         // Add an integer to the output vector:
+
+         // Read the value
+         if (descr->Columns[ikey][imeasr] != NULL) { 
+            int nscan = sscanf(descr->Columns[ikey][imeasr],"%d",&ivals[imeasr]);
+            if (nscan != 1) {
+               *retval=CTA_ILLEGAL_DATATYPE;
+               free(vals);
+               return;
+            }
+         }
+       } else if(*datatype == CTA_DOUBLE) {
+          // Add a double to the output vector:
+
+          // Read the value
+          double NaN;
+
+          NaN = 0; NaN = 1.0/NaN; NaN=(1.0+NaN)/NaN;
+          dvals[imeasr]=NaN;
+
+          if (descr->Columns[ikey][imeasr] != NULL) {
+             int nscan = sscanf(descr->Columns[ikey][imeasr],"%lf",&dvals[imeasr]);
+            if (nscan != 1) {
+               *retval=CTA_ILLEGAL_DATATYPE;
+               free(vals);
+               return;
+            }
+          }
+       } else if(*datatype == CTA_REAL) {
+          // Add a float to the output vector:
+
+          // Read the value
+          float NaN;
+          NaN = 0;
+          NaN = (float) 1.0/NaN;
+          NaN=NaN/NaN;
+          rvals[imeasr]=NaN;
+          if (descr->Columns[ikey][imeasr] != NULL) {
+             int nscan = sscanf(descr->Columns[ikey][imeasr],"%f",&rvals[imeasr]);
+            if (nscan != 1) {
+               *retval=CTA_ILLEGAL_DATATYPE;
+               free(vals);
+               return;
+            }
+          }
+       } else if(*datatype == CTA_STRING) {
+          // Add a string to the output vector:
+          if (descr->Columns[ikey][imeasr] != NULL) {
+             // Set value in array of Strings
+             *retval = CTA_String_Set(str, descr->Columns[ikey][imeasr]);
+             if (*retval!=CTA_OK) return;
+          } else {
+             *retval = CTA_String_Set(str,"");
+             if (*retval!=CTA_OK) return;
+          }
+          *retval=CTA_Vector_SetVal(*Properties,imeasr+1,&str,*datatype);
+       } else  {
+          // No other data types supported (yet)
+          *retval=CTA_ILLEGAL_DATATYPE;
+       };
+    }
+    /* Values in the return vector (except for characters) */
+    if (*datatype == CTA_INTEGER || *datatype == CTA_REAL ||
+        *datatype == CTA_DOUBLE){
+       *retval=CTA_Vector_SetVals(*Properties,vals,descr->nmeasr,*datatype);
+    }
+
+    if (vals) {
+       free(vals);
+    } else if (*datatype == CTA_STRING) {
+      CTA_String_Free(&str);
+    }
+
+}
+
+void CTAI_ObsDescr_table_Export(CTAI_ObsDescr_table *descr, CTA_Handle
+*usrdat, int *retval) 
+{
+   int ikey, imeasr, len;
+   BOOL packout;
+
+   packout = (CTA_Handle_Check(*usrdat,CTA_PACK)==CTA_OK);
+   if (packout) {
+
+      /* pack nkeys and nmeasr */
+      *retval=CTA_Pack_Add(*usrdat,&descr->nkeys,sizeof(int));
+      if (*retval!=CTA_OK) return;
+      *retval=CTA_Pack_Add(*usrdat,&descr->nmeasr,sizeof(int));
+      if (*retval!=CTA_OK) return;
+
+      /* Pack all keys */
+      for (ikey=0; ikey<descr->nkeys; ikey++){
+         /* pack stringlength */
+         len=strlen(descr->Keys[ikey])+1;
+         *retval=CTA_Pack_Add(*usrdat,&len,sizeof(int));
+         if (*retval!=CTA_OK) return;
+         /* pack string */
+         *retval=CTA_Pack_Add(*usrdat,descr->Keys[ikey],len*sizeof(char));
+         if (*retval!=CTA_OK) return;
+      }
+
+      /* Pack all Columns */
+      for (ikey=0; ikey<descr->nkeys; ikey++){
+         for (imeasr=0; imeasr<descr->nmeasr; imeasr++){
+            /* pack stringlength */
+            len=strlen(descr->Columns[ikey][imeasr])+1;
+            *retval=CTA_Pack_Add(*usrdat,&len,sizeof(int));
+            if (*retval!=CTA_OK) return;
+            /* pack string */
+            *retval=CTA_Pack_Add(*usrdat,descr->Columns[ikey][imeasr],
+                                len*sizeof(char));
+            if (*retval!=CTA_OK) return;
+         }
+      }
+   } else {
+      *retval=CTA_FORMAT_NOT_SUPPORTED;
+      return;
+   }
+   *retval=CTA_OK;
+   return;
+}
+ 
+void CTAI_ObsDescr_table_Free(
+         CTAI_ObsDescr_table *descr, 
+         int *retval)
+{
+   int ikey, imeasr;
+
+   for (ikey=0; ikey<descr->nkeys; ikey++) {
+      
+      free (descr->Keys[ikey]);
+      for (imeasr=0; imeasr<descr->nmeasr; imeasr++) {
+         free(descr->Columns[ikey][imeasr]);
+      }
+      free(descr->Columns[ikey]);
+   }
+   free(descr->Keys);
+   free(descr->Columns);
+
+   *retval=CTA_OK;
+}
+
+
+
+void CTAI_ObsDescr_table_CreateSel(CTAI_ObsDescr_table *descr,
+          CTA_String *selection, CTA_RelTable *reltab, 
+          CTA_ObsDescr *myhandle_out, 
+          CTAI_ObsDescr_table *descrout, int *retval){
+
+
+   int i, len, imeasr, nmeasr;
+   int ikey, nkeys, irow;
+   int datatype;
+   int *index;
+   char *condition;
+   double t1,t2,t;
+   CTA_Vector vtimes, vselect;
+
+   // Get the condition
+   // Allocate a name-string 
+   *retval = CTA_String_GetLength(*selection, &len);
+   if (*retval!=CTA_OK) return;
+
+   // Get the condition
+   condition = CTA_Malloc((len+1)*sizeof(char));
+   *retval = CTA_String_Get(*selection, condition);
+   if (*retval!=CTA_OK) return;
+
+   *retval = CTA_String_Get(*selection, condition);
+   if (*retval!=CTA_OK) return;
+
+
+   // convert to upper case
+   for (i=0;i<len;i++){
+      condition[i]=toupper(condition[i]);
+   }
+
+
+   if (IDEBUG) {printf("CTAI_ObsDescr_table_CreateSel select=%s\n",condition);}
+
+   // We only support the timeselect criterion whith the format:
+   // TIME BETWEEN 0.000010 AND 0.100010
+   
+   t1=atof(&(condition[12]));
+   if (IDEBUG) {printf("t1=%f\n",t1);}
+
+   // find first second number
+   for (i=0;12<len;i++){
+      if (condition[i]=='D') {break;}
+   }
+
+   t2=atof(&(condition[i+1]));
+   if (IDEBUG) {printf("t2=%f\n",t2);}
+
+   // Get vector with all times
+   datatype=CTA_DOUBLE;
+   CTA_Vector_Create(CTA_DEFAULT_VECTOR, descr->nmeasr, datatype, CTA_NULL,
+      &vtimes);
+   CTAI_ObsDescr_table_Get_Properties(descr,"TIME", &vtimes, &datatype,
+                                      retval);
+   if (*retval!=CTA_OK) return;
+   
+
+   // find observations are in given timespan 
+   nmeasr=-1;
+   index=CTA_Malloc(descr->nmeasr*sizeof(int));
+   for (i=1;i<=descr->nmeasr;i++){
+      CTA_Vector_GetVal(vtimes, i, &t, CTA_DOUBLE);
+      if (t1<t && t<=t2) {
+         nmeasr++;
+         index[nmeasr]=i;
+      }
+   }
+   nmeasr++;
+   if (IDEBUG) {printf("Number of measrements=%d\n",nmeasr);}
+
+   // create the relation table
+   if (*reltab!=CTA_NULL){
+      CTA_Vector_Create(CTA_DEFAULT_VECTOR, nmeasr, CTA_INTEGER, CTA_NULL,
+         &vselect);
+      CTA_Vector_SetVals(vselect, index, nmeasr, CTA_INTEGER);
+      CTA_RelTable_SetSelect(*reltab, vselect);
+      CTA_Vector_Free(&vselect);
+   }
+
+   // create new stochastic observer
+   nkeys=descr->nkeys;
+
+   descrout->nkeys=nkeys;
+   descrout->nmeasr=nmeasr;
+   
+   /* Allocate table */
+   descrout->Keys=CTA_Malloc(nkeys*sizeof(char*));
+   descrout->Columns=CTA_Malloc(nkeys*sizeof(char**));
+   for (ikey=0;ikey<nkeys;ikey++) {
+      descrout->Columns[ikey]=CTA_Malloc(nmeasr*sizeof(char*));
+   }
+   /* Copy Keys */
+   for (ikey=0;ikey<nkeys;ikey++) {
+      /* Copy Keys */
+      len=strlen(descr->Keys[ikey]);
+      descrout->Keys[ikey]=CTA_Malloc(sizeof(char)*(len+1));
+      strcpy(descrout->Keys[ikey],descr->Keys[ikey]);
+   }   
+   
+   /* Copy rows of table */
+   for (imeasr=0;imeasr<nmeasr;imeasr++){
+      irow=index[imeasr]-1;
+      for (ikey=0;ikey<nkeys;ikey++){
+         if (descr->Columns[ikey][irow]==NULL){
+            descrout->Columns[ikey][imeasr]=NULL;
+         } else {
+            len=strlen(descr->Columns[ikey][irow]);
+            descrout->Columns[ikey][imeasr]=CTA_Malloc(sizeof(char)*(len+1));
+            strcpy(descrout->Columns[ikey][imeasr],
+                   descr->Columns[ikey][irow]);
+         }
+      }
+   }
+
+   // Clean up memory
+   free(index);
+   CTA_Vector_Free(&vtimes);
+}
+
+
+
+
+void CTA_ObsDescr_table_initialise(CTA_ObsDescrClass *hobsdescrcl)
+{
+   CTA_Intf hintf=0;
+   CTA_Func h_func[I_CTA_OBSDESCR_NUMFUNC];
+
+   // The vector h_func is filled with COSTA-function handles of the 
+   // implementations in this file.
+   CTA_Func_Create(" ",&CTAI_ObsDescr_table_Create_Size,    hintf,
+                       &h_func[I_CTA_OBSDESCR_CREATE_SIZE]);
+   CTA_Func_Create(" ",&CTAI_ObsDescr_table_Create_Init,    hintf,
+                       &h_func[I_CTA_OBSDESCR_CREATE_INIT]);
+   CTA_Func_Create(" ",&CTAI_ObsDescr_table_Property_Count,    hintf,
+                       &h_func[I_CTA_OBSDESCR_COUNT_PROPERTIES]);
+   CTA_Func_Create(" ",&CTAI_ObsDescr_table_Get_Properties,    hintf,
+                       &h_func[I_CTA_OBSDESCR_GET_PROPERTIES]);
+   CTA_Func_Create(" ",&CTAI_ObsDescr_table_Observation_Count, hintf,
+                       &h_func[I_CTA_OBSDESCR_COUNT_OBSERVATIONS]);
+   CTA_Func_Create(" ",&CTAI_ObsDescr_table_Get_Keys,    hintf,
+                       &h_func[I_CTA_OBSDESCR_GET_KEYS]);
+   CTA_Func_Create(" ",&CTAI_ObsDescr_table_Export,    hintf,
+                       &h_func[I_CTA_OBSDESCR_EXPORT]);
+   CTA_Func_Create(" ",&CTAI_ObsDescr_table_Free,    hintf,
+                       &h_func[I_CTA_OBSDESCR_FREE]);
+   CTA_Func_Create(" ",&CTAI_ObsDescr_table_CreateSel,    hintf,
+                       &h_func[I_CTA_OBSDESCR_SELECTION]);
+
+   CTA_ObsDescr_DefineClass("cta_obsdescr_table",h_func,hobsdescrcl);
+}
diff --git a/costa/native/cta/src/cta_obsdescr_user.c b/costa/native/cta/src/cta_obsdescr_user.c
new file mode 100644
index 000000000..81c05a587
--- /dev/null
+++ b/costa/native/cta/src/cta_obsdescr_user.c
@@ -0,0 +1,59 @@
+/*
+$URL: https://repos.deltares.nl/repos/openda/trunk/costa/src/cta/cta_obsdescr_netcdf.c $
+$Revision: 671 $, $Date: 2008-10-07 14:49:42 +0200 (Tue, 07 Oct 2008) $
+
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+#include <string.h>
+#include <stdio.h>
+#include "cta.h"
+
+void CTA_ObsDescr_user_initialize(CTA_ObsDescrClass *hobsdescrcl)
+{
+   CTA_Func h_func[I_CTA_OBSDESCR_NUMFUNC];
+   char *libraryName;
+   char *functionName;
+
+   // The vector h_func is filled with function read from the default user dynamic library
+
+   libraryName=userDefaultDynamicLibrary;
+   functionName="user_obsdescr_create_size";
+   h_func[I_CTA_OBSDESCR_CREATE_SIZE]        = CTA_CreateFuncDynamicLib(libraryName, functionName, functionName, functionName);
+   functionName="user_obsdescr_create_size";
+   h_func[I_CTA_OBSDESCR_CREATE_INIT]        = CTA_CreateFuncDynamicLib(libraryName, functionName, functionName, functionName);
+   functionName="user_obsdescr_count_properties";
+   h_func[I_CTA_OBSDESCR_COUNT_PROPERTIES]   = CTA_CreateFuncDynamicLib(libraryName, functionName, functionName, functionName);
+   functionName="user_obsdescr_get_properties";
+   h_func[I_CTA_OBSDESCR_GET_PROPERTIES]     = CTA_CreateFuncDynamicLib(libraryName, functionName, functionName, functionName);
+   functionName="user_obsdescr_count_observations";
+   h_func[I_CTA_OBSDESCR_COUNT_OBSERVATIONS] = CTA_CreateFuncDynamicLib(libraryName, functionName, functionName, functionName);
+   functionName="user_obsdescr_get_keys";
+   h_func[I_CTA_OBSDESCR_GET_KEYS]           = CTA_CreateFuncDynamicLib(libraryName, functionName, functionName, functionName);
+   functionName="user_obsdescr_free";
+   h_func[I_CTA_OBSDESCR_FREE]               = CTA_CreateFuncDynamicLib(libraryName, functionName, functionName, functionName);
+   functionName="user_obsdescr_selection";
+   h_func[I_CTA_OBSDESCR_SELECTION]          = CTA_CreateFuncDynamicLib(libraryName, functionName, functionName, functionName);
+
+   CTA_ObsDescr_DefineClass("cta_obsdescr_user",h_func,hobsdescrcl);
+
+
+}
+
+
+
diff --git a/costa/native/cta/src/cta_pack.c b/costa/native/cta/src/cta_pack.c
new file mode 100644
index 000000000..a66a73839
--- /dev/null
+++ b/costa/native/cta/src/cta_pack.c
@@ -0,0 +1,381 @@
+/*
+$URL: https://repos.deltares.nl/repos/openda/public/trunk/core/native/src/cta/cta_pack.c $
+$Revision: 2932 $, $Date: 2011-11-30 17:10:24 +0100 (Wed, 30 Nov 2011) $
+
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+#include <math.h>
+#include <stdio.h>
+#include "cta_mem.h"
+#include "f_cta_utils.h"
+#include "ctai.h"
+#include "cta_pack.h"
+#include "cta_errors.h"
+#include "cta_message.h"
+
+#define CTA_PACK_CREATE_F77      F77_CALL(cta_pack_create,CTA_PACK_CREATE)
+#define CTA_PACK_FREE_F77        F77_CALL(cta_pack_free,CTA_PACK_FREE)
+#define CTA_PACK_ADD_F77         F77_CALL(cta_pack_add,CTA_PACK_ADD)
+#define CTA_PACK_GET_F77         F77_CALL(cta_pack_get,CTA_PACK_GET)
+#define CTA_PACK_GETPTR_F77      F77_CALL(cta_pack_getptr,CTA_PACK_GETPTR)
+#define CTA_PACK_GETLEN_F77      F77_CALL(cta_pack_getlen,CTA_PACK_GETLEN)
+#define CTA_PACK_ADDCNT_F77      F77_CALL(cta_pack_addcnt,CTA_PACK_ADDCNT)
+#define CTA_PACK_GETINDX_F77     F77_CALL(cta_pack_getindx,CTA_PACK_GETINDX)
+#define CTA_PACK_SETINDX_F77     F77_CALL(cta_pack_setindx,CTA_PACK_SETINDX)
+
+#define CLASSNAME "CTA_Pack"
+
+/* Struct holding all data associated to a COSTA package */
+
+typedef struct {
+char *pack_array;
+int lenpack;
+int ip1;
+int ip2;
+} CTAI_Pack;
+
+
+#undef METHOD
+#define METHOD "Create"
+int CTA_Pack_Create(int initsize, CTA_Pack *hpack){
+
+   CTAI_Pack *pack;
+   int retval;
+
+   /* allocate memory for new pack object */
+   pack=CTA_Malloc(sizeof(CTAI_Pack));
+   pack->pack_array=NULL;
+   pack->lenpack=0;
+   pack->ip1=0;
+   pack->ip2=0;
+
+   /* Allocate new handle and return eror when unsuccesfull */
+   retval=CTA_Handle_Create("pack",CTA_PACK,pack,hpack);
+   if (retval) {
+       CTA_WRITE_ERROR("Cannot create handle");
+       return retval;
+   }
+
+   if (initsize>0) {
+      pack->pack_array=CTA_Malloc(initsize);
+      pack->lenpack=initsize;
+   }
+
+   return CTA_OK;
+}
+
+#undef METHOD
+#define METHOD "Free"
+int CTA_Pack_Free(CTA_Pack *hpack){
+
+   CTAI_Pack *pack;
+   int retval;
+
+   if (*hpack==CTA_NULL) return CTA_OK;
+
+   retval=CTA_Handle_Check((CTA_Handle) *hpack,CTA_PACK);
+   if (retval!=CTA_OK)  {
+       CTA_WRITE_ERROR("Handle is not a cta_pack handle");
+       return retval;
+   }
+
+   retval=CTA_Handle_GetData((CTA_Handle) *hpack,(void**) &pack);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+
+   if (pack->pack_array) {free(pack->pack_array);}
+   free(pack);
+   retval=CTA_Handle_Free(hpack);
+   return retval;
+
+}
+
+#undef METHOD
+#define METHOD "Add"
+int CTA_Pack_Add(CTA_Pack hpack, void *data, int lendat){
+
+   CTAI_Pack *pack;
+   int retval;
+
+   retval=CTA_Handle_Check((CTA_Handle) hpack,CTA_PACK);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_pack handle");
+       return retval;
+   }
+
+   retval=CTA_Handle_GetData((CTA_Handle) hpack,(void**) &pack);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+
+//   printf("pack->lenpack=%d\n",pack->lenpack);
+//   printf("lendat=%d",lendat);
+//   printf("pack->ip2=%d");
+   // Check whether buffer is long enough and reallocate when necessary
+   if (pack->lenpack<lendat+pack->ip2) {
+      pack->lenpack = (int)(pack->lenpack*1.5) + lendat;
+      // printf("CTA_Pack_Add: reallocate new size %d\n", pack->lenpack);
+      pack->pack_array=realloc(pack->pack_array, pack->lenpack);
+   }
+
+   // pack new data
+   memcpy(pack->pack_array+pack->ip2, data, lendat);
+   pack->ip2=pack->ip2+lendat;
+   return CTA_OK;
+}
+
+#undef METHOD
+#define METHOD "Get"
+int CTA_Pack_Get(CTA_Pack hpack, void *data, int lendat){
+
+   CTAI_Pack *pack;
+   int retval;
+
+   retval=CTA_Handle_Check((CTA_Handle) hpack,CTA_PACK);
+   if (retval!=CTA_OK)  {
+       CTA_WRITE_ERROR("Handle is not a cta_pack handle");
+       return retval;
+   }
+
+   retval=CTA_Handle_GetData((CTA_Handle) hpack,(void**) &pack);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+
+   // Check whether requested data can be in buffer
+   if (pack->ip1+lendat>pack->lenpack) {
+      return CTA_BUFFER_TOO_SMALL;
+   }
+
+   // pack new data
+   memcpy(data, pack->pack_array+pack->ip1, lendat);
+   pack->ip1=pack->ip1+lendat;
+   return CTA_OK;
+}
+
+
+
+#undef METHOD
+#define METHOD "GetPtr"
+char* CTA_Pack_GetPtr(CTA_Pack hpack){
+
+   CTAI_Pack *pack;
+   int retval;
+
+   retval=CTA_Handle_Check((CTA_Handle) hpack,CTA_PACK);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Handle is not a cta_pack handle");
+	   return NULL;
+   }
+   retval=CTA_Handle_GetData((CTA_Handle) hpack,(void**) &pack);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return NULL;
+   }
+
+   return &(pack->pack_array[pack->ip1]);
+}
+
+#undef METHOD
+#define METHOD "GetLen"
+int CTA_Pack_GetLen(CTA_Pack hpack){
+
+   CTAI_Pack *pack;
+   int retval;
+
+   retval=CTA_Handle_Check((CTA_Handle) hpack,CTA_PACK);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Handle is not a cta_pack handle");
+	   return -1;
+   }
+   retval=CTA_Handle_GetData((CTA_Handle) hpack,(void**) &pack);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return -1;
+   }
+
+   return pack->ip2-pack->ip1;
+}
+
+#undef METHOD
+#define METHOD "AddCnt"
+int CTA_Pack_AddCnt(CTA_Pack hpack, int lendat){
+   CTAI_Pack *pack;
+   int retval;
+
+   retval=CTA_Handle_Check((CTA_Handle) hpack,CTA_PACK);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Handle is not a cta_pack handle");
+	   return retval;
+   }
+
+   retval=CTA_Handle_GetData((CTA_Handle) hpack,(void**) &pack);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+
+   if (pack->ip2+lendat>pack->lenpack) {
+      return CTA_BUFFER_TOO_SMALL;
+   }
+
+   pack->ip2=pack->ip2+lendat;
+
+   return CTA_OK;
+}
+
+
+#undef METHOD
+#define METHOD "Debug"
+int CTA_Pack_Debug(CTA_Pack hpack){
+
+   CTAI_Pack *pack;
+   int retval;
+
+   retval=CTA_Handle_Check((CTA_Handle) hpack,CTA_PACK);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Handle is not a cta_pack handle");
+	   return retval;
+   }
+
+   retval=CTA_Handle_GetData((CTA_Handle) hpack,(void**) &pack);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+
+   // Check whether requested data can be in buffer
+   printf("DEBUG OF PACK OBJECT %d\n",hpack );
+   printf("geheugen adress %p\n",&pack);
+   printf("ip1=%d lenpack=%d\n",pack->ip1, pack->lenpack);
+   printf("END OF PACK OBJECT %d\n",hpack );
+
+   return CTA_NULL;
+}
+
+#undef METHOD
+#define METHOD "GEtIndx"
+int CTA_Pack_GetIndx(CTA_Pack hpack, int *ip1, int *ip2){
+
+   CTAI_Pack *pack;
+   int retval;
+
+   retval=CTA_Handle_Check((CTA_Handle) hpack,CTA_PACK);
+   if (retval!=CTA_OK) {
+	  CTA_WRITE_ERROR("Handle is not a cta_pack handle");
+	  return retval;
+   }
+   
+   retval=CTA_Handle_GetData((CTA_Handle) hpack,(void**) &pack);
+   if (retval!=CTA_OK) {
+      CTA_WRITE_ERROR("Cannot retrieve handle data");
+	  return retval;
+   }
+
+   *ip1=pack->ip1;
+   *ip2=pack->ip2;
+
+   return CTA_OK;
+}
+
+
+#undef METHOD
+#define METHOD "SetIndex"
+int CTA_Pack_SetIndx(CTA_Pack hpack, int ip1, int ip2){
+
+   CTAI_Pack *pack;
+   int retval;
+
+   retval=CTA_Handle_Check((CTA_Handle) hpack,CTA_PACK);
+   if (retval!=CTA_OK) {
+	  CTA_WRITE_ERROR("Handle is not a cta_pack handle");
+	  return retval;
+   }
+
+   retval=CTA_Handle_GetData((CTA_Handle) hpack,(void**) &pack);
+   if (retval!=CTA_OK) {
+      CTA_WRITE_ERROR("Cannot retrieve handle data");
+	  return retval;
+   }
+
+//   printf("ip1, ip2 =%d %d\n",pack->ip1, pack->ip2);
+   if (ip1==CTA_PACK_RESET) {
+      pack->ip1=0;
+   }
+   else if (ip2==CTA_PACK_RESET) {
+      pack->ip1=0;
+      pack->ip2=0;
+   }
+   else {
+      pack->ip2=ip2;
+      pack->ip1=ip1;
+   }
+
+   return CTA_OK;
+}
+
+
+CTAEXPORT void CTA_PACK_CREATE_F77(int *initsize, int *hpack, int *ierr){
+   *ierr=CTA_Pack_Create(*initsize, (CTA_Pack*) hpack);
+}
+
+
+CTAEXPORT void CTA_PACK_FREE_F77(int *hpack, int *ierr){
+   *ierr=CTA_Pack_Free((CTA_Pack*) hpack);
+}
+
+CTAEXPORT void CTA_PACK_ADD_F77(int *hpack, void *data, int *lendat, int *ierr){
+   *ierr=CTA_Pack_Add((CTA_Pack) *hpack, data, *lendat);
+}
+
+CTAEXPORT void CTA_PACK_GET_F77(int *hpack, void *data, int *lendat, int *ierr){
+   *ierr=CTA_Pack_Get((CTA_Pack) *hpack, data, *lendat);
+}
+
+
+CTAEXPORT char* CTA_PACK_GETPTR_F77(int *hpack){
+   return CTA_Pack_GetPtr((CTA_Pack) *hpack);
+}
+
+CTAEXPORT void CTA_PACK_GETLEN_F77(int *hpack, int *ierr){
+   *ierr=CTA_Pack_GetLen((CTA_Pack) *hpack);
+}
+
+CTAEXPORT void CTA_PACK_ADDCNT_F77(int *hpack, int *lendat, int *ierr){
+   *ierr=CTA_Pack_AddCnt((CTA_Pack) *hpack, *lendat);
+}
+
+CTAEXPORT void CTA_PACK_GETINDX_F77(int *hpack, int *ip1, int *ip2, int *ierr){
+   *ierr=CTA_Pack_GetIndx((CTA_Pack) *hpack, ip1, ip2);
+}
+
+CTAEXPORT void CTA_PACK_SETINDX_F77(int *hpack, int *ip1, int *ip2, int *ierr){
+   *ierr=CTA_Pack_SetIndx((CTA_Pack) *hpack, *ip1, *ip2);
+}
+
+
+
+
+
+
+
diff --git a/costa/native/cta/src/cta_par.c b/costa/native/cta/src/cta_par.c
new file mode 100644
index 000000000..f0b86abdf
--- /dev/null
+++ b/costa/native/cta/src/cta_par.c
@@ -0,0 +1,905 @@
+/*
+$URL: https://repos.deltares.nl/repos/openda/public/trunk/core/native/src/cta/cta_par.c $
+$Revision: 4073 $, $Date: 2013-07-31 15:43:55 +0200 (Wed, 31 Jul 2013) $
+
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2008  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+#include "cta_mem.h"
+#include "cta.h"
+#include <string.h>
+#ifdef USE_MPI
+//#include "cta_util_mpi.h"
+#include "mpi.h"
+#endif
+#include "cta_par.h"
+
+#define CLASSNAME "CTA_Par" 
+
+#define CTA_PAR_WORKERSPAWN_F77   F77_CALL(cta_par_workerspawn,CTA_PAR_WORKERSPAWN)
+#define CTA_PAR_GETFCOMM_F77      F77_CALL(cta_par_getfcomm,CTA_PAR_GETFCOMM)
+#define CTA_PAR_CREATEGROUPS_F77  F77_CALL(cta_par_creategroups,CTA_PAR_CREATEGROUPS)
+#define CTA_PAR_GETGROUPINFO_F77  F77_CALL(cta_par_getgroupinfo,CTA_PAR_GETGROUPINFO)
+
+#define IDEBUG (0)
+enum CTAI_ParType  {WorkerWorker,MasterWorker};
+enum CTA_ParProcType CTA_MY_PROC_TYPE=CTA_ParMaster;
+int  CTA_IS_PARALLEL=CTA_FALSE;
+int  CTA_PAR_MY_RANK=0;
+#ifdef USE_MPI
+
+typedef struct {
+int spawn_workers;            // spawn worker processes 0 (false) 1 (true)
+char *name;                  // Name of group
+char *forModel;              // Model (tag) to use this model for
+int nProc;                   // Number of processes in instance of group
+int nTimes;                  // Number of instances of this group
+int iTime;                   // Index in communicators used for providing new groups when models are created
+enum CTAI_ParType  parType;  // Type of parallel model
+MPI_Comm *CommNoMaster;
+MPI_Comm *CommWithMaster;
+int nDumProcs;
+int *dumProcs;
+} CTAI_Group;
+
+/* Public variables */
+
+MPI_Comm CTA_COMM_WORLD=MPI_COMM_WORLD;          // Communication group of all processes in the COSTA universe.
+MPI_Comm CTA_COMM_MYWORLD=MPI_COMM_WORLD;        // Private world of this process (excluding COSTA master).
+MPI_Comm CTA_COMM_MASTER_WORKER=MPI_COMM_WORLD;  // Communcication group between the master and this worker.
+int CTA_FILTER_PROCESS=0;
+
+int myRankInWorld=-1;
+
+int CTAI_iGroup=0;                            // Group number I belong to (not valid for the master)
+int CTAI_nGroups=0;                              // Global number of groups
+
+CTAI_Group *CTAI_AllGroups;
+
+/* Local interfaces */
+void CTAI_Par_FatalError(int ierr, char* msg);
+
+
+
+#undef METHOD
+#define METHOD "CTAI_Par_NumGroups"
+void CTAI_Par_NumGroups(int parConfig, int *ngroups1, int *ngroups2){
+   int ierr;
+   CTA_Handle hitem;
+   int i, ncount, datatype;
+   char *implements, *parallel_type, *spawn_workers, *nproc, *ntimes,  *dumProcs;
+
+   *ngroups1=0;
+   *ngroups2=0;
+
+   /* first count the groups in parallel configuration */
+   ierr=CTA_Tree_CountHandlesStr(parConfig,"parallel/process_groups/group",
+                                 ngroups1);
+   if (ierr!=CTA_OK){
+      *ngroups1=0;
+   }
+
+   /* Second look for all model classes in configuration */
+   ierr=CTA_Tree_CountItems (parConfig, &ncount);
+   for (i=1;i<=ncount;i++){
+      CTA_Tree_GetItem (parConfig, i, &hitem);
+      CTA_Handle_GetDatatype(hitem, &datatype);
+      /* check whether it is a model class */
+      if (datatype==CTA_MODELCLASS){
+         /* Get paralel information */
+         CTAI_ModelFac_GetParallelData(hitem, &implements, &parallel_type, &spawn_workers, &nproc, &ntimes, &dumProcs);
+         /* increase number of parallel groups when parallel information is available */
+         if (implements && parallel_type && nproc && ntimes) (*ngroups2)++;
+      }
+   }
+}
+
+#undef METHOD
+#define METHOD "CTAI_Par_GetGroupConfig2"
+void CTAI_Par_GetGroupConfig2(CTA_Tree parConfig, int iGroup, CTA_ModelClass *groupConfig){
+   int icount,i;
+   int ncount;
+   int datatype;
+   CTA_Handle hitem;
+   char *implements, *parallel_type, *spawn_workers, *nproc, *ntimes, *dumProcs;
+
+   icount=0;
+   *groupConfig=CTA_NULL;
+
+
+   /* Second look for all model clases in configuration */
+   CTA_Tree_CountItems (parConfig, &ncount);
+   for (i=1;i<=ncount;i++){
+      CTA_Tree_GetItem (parConfig, i, &hitem);
+      CTA_Handle_GetDatatype(hitem, &datatype);
+      /* check whether it is a model class */
+      if (datatype==CTA_MODELCLASS){
+         /* Get paralel information */
+         CTAI_ModelFac_GetParallelData(hitem, &implements, &parallel_type, &spawn_workers, &nproc, &ntimes, &dumProcs);
+         /* increase number of parallel groups when parallel information is available */
+         if (implements && parallel_type && nproc && ntimes) {
+            if (iGroup==icount) {
+               *groupConfig=hitem;
+               return;
+            }
+            icount++;
+         }
+      }
+   }
+}
+
+
+
+#undef METHOD
+#define METHOD "CTAI_Par_GetGroupConfig"
+void CTAI_Par_GetGroupConfig(CTA_Tree parConfig, int iGroup, CTA_Tree *groupConfig){
+   int ierr;
+   CTA_Tree tGroups;
+
+   ierr=CTA_Tree_GetHandleStr(parConfig,"parallel/process_groups", &tGroups);
+   if (ierr==CTA_OK) {
+      ierr=CTA_Tree_GetItem(tGroups, iGroup+1, groupConfig);
+   }
+
+   if (ierr!=CTA_OK){
+      *groupConfig=CTA_NULL;
+   }
+}
+
+/* Distribute the groups over the available processes */
+/* We set field nTimes for groups with free multiplicity  */
+#undef METHOD
+#define METHOD "CTAI_Par_SetNtimes"
+void CTAI_Par_SetNtimes(CTAI_Group *allGroups, int nGroups, int nProcWorld){
+   int nodes_left; /* Number of processes that are left */
+   BOOL didAddGroup; /* Assigned group to nodes in last cycle */
+   BOOL first;       /* Flag indicating first time in loop distributing processes to groups */
+   int iLoop;        /* Assigment loop counter */
+   int nProcGroup;   /* Number of processes in a group */
+   int nTimes;       /* multiplicity of group <=0 yet unknown number */
+   int iGroup;       /* Counter over all groups */
+   int myRank;
+   int mySize;
+   char msg[256]; /* String for writing messages */
+
+   nodes_left=nProcWorld-1;
+   didAddGroup=TRUE;
+   
+   MPI_Comm_rank(CTA_COMM_WORLD, &myRank);
+   MPI_Comm_size(CTA_COMM_WORLD, &mySize);
+   if (IDEBUG) {
+      printf("#%d DEBUG: CTAI_Par_SetNtimes Size of CTA_COMM_WORLD =%d:\n",myRank, mySize);
+      MPI_Comm_size(MPI_COMM_WORLD, &mySize);
+      printf("#%d DEBUG: CTAI_Par_SetNtimes Size of MPI_COMM_WORLD =%d:\n",myRank, mySize);
+      printf("#%d DEBUG: CTAI_Par_SetNtimes nProcWorld=%d:\n",myRank, nProcWorld);
+      printf("#%d DEBUG: CTAI_Par_SetNtimes nGroups=%d:\n",nGroups, nProcWorld);
+      for (iGroup=0; iGroup<nGroups; iGroup++){
+         printf("#%d DEBUG: CTAI_Par_SetNtimes allGroups[%d].spawn_workers=%d\n",myRank, iGroup, allGroups[iGroup].spawn_workers);
+         printf("#%d DEBUG: CTAI_Par_SetNtimes allGroups[%d].name         =%s\n",myRank, iGroup,  allGroups[iGroup].name );
+         printf("#%d DEBUG: CTAI_Par_SetNtimes allGroups[%d].forModel     =%s\n",myRank, iGroup,  allGroups[iGroup].forModel);
+         printf("#%d DEBUG: CTAI_Par_SetNtimes allGroups[%d].nProc        =%d\n",myRank, iGroup,  allGroups[iGroup].nProc);
+         printf("#%d DEBUG: CTAI_Par_SetNtimes allGroups[%d].nTimes       =%d\n",myRank, iGroup,  allGroups[iGroup].nTimes);
+         printf("#%d DEBUG: CTAI_Par_SetNtimes allGroups[%d].iTime        =%d\n",myRank, iGroup,  allGroups[iGroup].iTime);
+         printf("#%d DEBUG: CTAI_Par_SetNtimes allGroups[%d].nDumProcs    =%d\n",myRank, iGroup,  allGroups[iGroup].nDumProcs);
+         printf("#%d DEBUG: CTAI_Par_SetNtimes allGroups[%d].dumProcs     =%p\n",myRank, iGroup,  allGroups[iGroup].dumProcs );
+      }
+   }
+
+   /* iLoop assign all processes fource the first pass*/
+   first=TRUE;
+   for (iLoop=0;((didAddGroup & nodes_left) >0) || first;iLoop++){
+      first=FALSE;
+      didAddGroup=FALSE;
+      /* Loop over all groups */
+      for (iGroup=0;iGroup<nGroups;iGroup++){
+         nProcGroup=allGroups[iGroup].nProc;
+         nTimes=allGroups[iGroup].nTimes;
+         if (allGroups[iGroup].spawn_workers){
+           /* When workers are spawned:
+              1) do not count the processes (since they do not exist yet)
+              2) nTimes must be set since we cannot determine it
+            */
+            if (nTimes<1){
+               sprintf(msg,"Group %d has option 'spawn_workers=Yes' but option nTimes is not set\n",
+                       iGroup);
+               CTA_WRITE_ERROR(msg);
+               exit(-1);
+            }
+            didAddGroup=TRUE;
+         } 
+         else {
+            if (nTimes>0) {
+               if (iLoop==0) {
+                  nodes_left=nodes_left-nProcGroup*nTimes;
+                  didAddGroup=TRUE;
+               }
+            }
+            else {
+               if ((nodes_left>=nProcGroup) | (iLoop==0)){
+                  nodes_left=nodes_left-nProcGroup;
+                  allGroups[iGroup].nTimes=nTimes-1;
+                  didAddGroup=TRUE;
+               }
+            }
+         }
+      }
+   }
+
+   /* Check for more nodes requested than available */
+   if (nodes_left<0){
+      sprintf(msg,"%d spawned processes too less. Please spawn more processes or cange parallel configuration\n",
+              nProcWorld-nodes_left);
+      CTA_WRITE_ERROR(msg);
+      exit(-1);
+   }
+
+   /* Processes have been spawned that cannot be assigned
+      Probably, the user has made an error so we give an error as well.
+      Note: we even cannot continue since we will not be able to terminate
+     this process in a correct way at the end of the program
+   */
+   if (nodes_left>0){
+      sprintf(msg,"%d spawned processes cannot be assigned please spawn less processes or change parallel configuration.\n",
+              nodes_left);
+      CTA_WRITE_ERROR(msg);
+      exit(-1);
+   }
+
+   /* set nTimes field positive for all groups */
+   if (IDEBUG) printf("#%d CTAI_Par_SetNtimes nGroups is %d\n",myRank,nGroups);
+   for (iGroup=0;iGroup<nGroups;iGroup++){
+      nTimes=allGroups[iGroup].nTimes;
+      if (nTimes<0) {allGroups[iGroup].nTimes=-nTimes;}
+      if (nTimes==0) {
+         sprintf(msg,"There are not enough processes to hold a single instance of group %d.\n", iGroup);
+         CTA_WRITE_ERROR(msg);
+      }
+      if (IDEBUG) printf("#%d CTAI_Par_SetNtimes allGroups[%d].nTimesnGroups is %d\n",myRank,iGroup,allGroups[iGroup].nTimes);
+   }
+}
+
+#undef METHOD
+#define METHOD "CTAI_Par_CreateMPIGroups"
+void CTAI_Par_CreateMPIGroups(CTAI_Group *allGroups, int nGroups){
+   MPI_Group group_world;
+   MPI_Comm  subCommNoMaster, subCommWithMaster;
+   MPI_Group subGroupNoMaster, subGroupWithMaster;
+   int itime;  /* Counter over multiplicity of group */
+   int nProc;  /* Number of processes in one instance of the group */
+   int iProc;  /* Loop counter over processes in group */
+   int *ranks; /* list of ranks in group */
+   int nTimes;
+   int myRank;
+   int iOff;   /* offset deleting processes from list */
+
+   int nProcWorld, nWork;
+   int iGroup;
+   int proc_free;
+   BOOL isMyGroup;
+
+   /* Get number of processes */
+   MPI_Comm_size(CTA_COMM_WORLD, &nProcWorld);
+   MPI_Comm_rank(CTA_COMM_WORLD, &myRank);
+   nWork=nProcWorld-1;
+
+   if (CTA_FILTER_PROCESS) {
+      if (IDEBUG) printf("#%d CTAI_Par_CreateMPIGroups: nProcWorld=%d\n",myRank,nProcWorld);
+      if (IDEBUG) printf("#%d CTAI_Par_CreateMPIGroups: nWork=%d\n",myRank, nWork);
+   }
+
+   /* Check whether we have enough processes and set nTimes fields */
+   CTAI_Par_SetNtimes(allGroups, nGroups, nProcWorld);
+
+   /* Make process groups and communicators */
+   MPI_Comm_group(CTA_COMM_WORLD,&group_world);
+   ranks=CTA_Malloc(sizeof(int)*nProcWorld);
+   ranks[0]=0; /* Set master process */
+
+   proc_free=1;
+   for (iGroup=0;iGroup<nGroups;iGroup++){
+      isMyGroup=FALSE;
+      nTimes=allGroups[iGroup].nTimes;
+      if (CTA_FILTER_PROCESS) {
+         if (IDEBUG) printf("#%d iGroup=%d nTimes=%d\n",myRank,iGroup+1, nTimes);
+      }
+      /* Allocate fields for storing communicators and init to MPI_COMM_NULL */
+      allGroups[iGroup].CommNoMaster  =CTA_Malloc(sizeof(MPI_Comm)*nTimes);
+      allGroups[iGroup].CommWithMaster=CTA_Malloc(sizeof(MPI_Comm)*nTimes);
+      for (itime=0;itime<nTimes;itime++){
+         allGroups[iGroup].CommNoMaster[itime]  =MPI_COMM_NULL;
+         allGroups[iGroup].CommWithMaster[itime]=MPI_COMM_NULL;
+      }
+      if (! allGroups[iGroup].spawn_workers){
+         /* For the number of times we have assigned this group */
+         for (itime=0;itime<nTimes;itime++) {
+            isMyGroup=FALSE;
+            if (CTA_FILTER_PROCESS) {
+               if (IDEBUG) printf("CTAI_Par_CreateMPIGroups Create Group[%d,%d]\n",iGroup+1,
+                       itime);
+            }
+            nProc=allGroups[iGroup].nProc;
+            if (CTA_FILTER_PROCESS) {
+               if (IDEBUG) printf("CTAI_Par_CreateMPIGroups nProc for Group %d =%d\n",iGroup+1, nProc);
+            }
+            /* Set array of ranks of processes */
+            for (iProc=0;iProc<nProc;iProc++){
+                ranks[iProc+1]=proc_free+iProc;
+                /* Check if this process is in group */
+                if (ranks[iProc+1]==myRankInWorld){
+                   isMyGroup=TRUE;
+                }
+            }
+            if (isMyGroup) {
+               if (IDEBUG) printf("#%d I'm a member of group[%d,%d]\n",myRank, iGroup+1, itime);
+               CTAI_iGroup=iGroup+1;
+               allGroups[iGroup].iTime=itime;
+            } else {
+               if (IDEBUG) printf("#%d I'm not a member of group[%d,%d]\n",myRank, iGroup+1, itime);
+            }
+
+            proc_free=proc_free+nProc;
+
+            if (CTA_FILTER_PROCESS) {
+               if (IDEBUG) {
+                  printf("CTAI_Par_CreateMPIGroups processes in group [%d,%d] are:\n",iGroup+1, itime);
+                  for (iProc=0;iProc<nProc-1;iProc++){
+                     printf("%d, ",ranks[iProc+1]);
+                  }
+                  printf("%d\n",ranks[nProc]);
+               }
+            }
+
+            /* Create group without master */
+            MPI_Group_incl(group_world,nProc,ranks+1,&subGroupNoMaster);
+
+            /* Create group with master */
+            if (allGroups[iGroup].parType==WorkerWorker) {
+               if (CTA_FILTER_PROCESS) {
+                  if (IDEBUG) printf("CTAI_Par_CreateMPIGroups Group %d is of WorkerWorker type:\n",iGroup+1);
+               }
+               if (allGroups[iGroup].nDumProcs>0) {
+                  /* This is the complicated case were there are a number of dummy processes
+                   * that do not communicate with the COSTA master */
+                  iOff=0;
+                  for (iProc=0;iProc<nProc;iProc++){
+                     if (iOff<allGroups[iGroup].nDumProcs &&  iProc==allGroups[iGroup].dumProcs[iOff]){
+                        iOff++;
+                     }
+                     else  {
+                        ranks[iProc+1-iOff]=ranks[iProc+1];
+                     }
+                  }
+                  /* Decrease number of processes */
+                  nProc=nProc-allGroups[iGroup].nDumProcs;
+               }
+               if (CTA_FILTER_PROCESS) {
+                  if (IDEBUG) {
+                     printf("CTAI_Par_CreateMPIGroups processes in group min dum [%d,%d] are:\n",iGroup+1, itime);
+                     for (iProc=0;iProc<nProc-1;iProc++){
+                        printf("%d, ",ranks[iProc+1]);
+                     }
+                     printf("%d\n",ranks[nProc]);
+                  }
+               }
+
+               MPI_Group_incl(group_world,nProc+1,ranks,&subGroupWithMaster);
+            }
+            else  {
+               if (CTA_FILTER_PROCESS) {
+                  if (IDEBUG) printf("CTAI_Par_CreateMPIGroups Group %d is of MasterWorker type:\n",iGroup+1);
+               }
+               MPI_Group_incl(group_world,2,ranks,&subGroupWithMaster);
+            }
+
+            /* Create communicators */
+            MPI_Comm_create(CTA_COMM_WORLD,subGroupNoMaster,
+                                                &subCommNoMaster);
+            MPI_Comm_create(CTA_COMM_WORLD,subGroupWithMaster,
+                                                &subCommWithMaster);
+            allGroups[iGroup].CommNoMaster[itime]= subCommNoMaster;
+            allGroups[iGroup].CommWithMaster[itime]=subCommWithMaster;
+
+            /* Set global communicators */
+            if (isMyGroup) {
+               if (IDEBUG) printf("#%d I'm in group [%d,%d]\n",      myRankInWorld, iGroup+1,itime);
+               if (IDEBUG) printf("#%d CTA_COMM_MYWORLD       =%p\n",myRankInWorld, subCommNoMaster);
+               if (IDEBUG) printf("#%d CTA_COMM_MASTER_WORKER =%p\n",myRankInWorld, subCommWithMaster);
+
+               CTA_COMM_MYWORLD       = subCommNoMaster;
+               CTA_COMM_MASTER_WORKER = subCommWithMaster;
+            }
+         }
+      }
+   }
+   free(ranks);
+}
+
+#undef METHOD
+#define METHOD "CTAI_Par_SetGroupInfo"
+int CTAI_Par_SetGroupInfo(CTA_Handle parConfig, CTAI_Group *group){
+
+   CTA_String sName, sUseFor, sParType, sSpawn_workers, sDumProcs;
+   int ierr;
+   int myRank;
+   int len;
+   int datatype;
+   char *implements, *parallel_type, *nproc, *ntimes, *dumProcs, *spawn_workers;
+   int i;
+   char *txtpart, *txtstr, *s;
+
+   /* Get rank for output */
+   ierr=MPI_Comm_rank(CTA_COMM_WORLD, &myRank);
+
+
+   CTA_Handle_GetDatatype(parConfig, &datatype);
+   /* check whether it is a model class */
+
+   // Set dummy processes
+   group->nDumProcs=0;
+   group->dumProcs=NULL;
+
+
+   if (datatype==CTA_MODELCLASS){
+      /* get information from the modelcls */
+      CTAI_ModelFac_GetParallelData(parConfig, &implements, &parallel_type, &spawn_workers, &nproc,  &ntimes, &dumProcs);
+
+      // NAME (using implements)
+      group->name=CTA_Malloc((strlen(implements)+1)*sizeof(char));
+      strcpy(group->name,implements);
+
+      // NPROC
+      group->nProc = atoi(nproc);
+
+      // SPAWN WORKERS
+      group->spawn_workers = (spawn_workers && (spawn_workers[1]=='Y' || spawn_workers[1]=='y'));
+
+      //FORMODEL (implements)
+      group->forModel=CTA_Malloc((strlen(implements)+1)*sizeof(char));
+      strcpy(group->forModel,implements);
+
+      // PARALLEL_TYPE
+      if (strcmp(parallel_type,"Worker-Worker")==0){
+         group->parType=WorkerWorker;
+      } else if (strcmp(parallel_type,"Master-Worker")==0){
+         group->parType=MasterWorker;
+      } else {
+         CTAI_Par_FatalError(CTA_ILLEGAL_INPUT_ARGUMENT,
+            "parallel_type must have value 'Worker-Worker' or  'Master-Worker'");
+      }
+      // NTIMES
+      group->nTimes = atoi(ntimes);
+
+      // ITIMES
+      group->iTime=0;
+
+
+
+   } else {
+
+      // NAME
+      ierr=CTA_Tree_GetHandleStr(parConfig,"group/name", &sName);
+      CTAI_Par_FatalError(ierr, "cannot find  group/name in input");
+      ierr=CTA_String_GetLength(sName, &len);
+      group->name=CTA_Malloc((len+1)*sizeof(char));
+      ierr=CTA_String_Get(sName, group->name);
+
+      //NPROC
+      ierr=CTA_Tree_GetValueStr(parConfig,"group/nproc", &(group->nProc), CTA_INTEGER);
+      CTAI_Par_FatalError(ierr, "cannot find  group/nproc in input");
+
+      //FORMODEL
+      ierr=CTA_Tree_GetHandleStr(parConfig,"group/use_for_model", &sUseFor);
+      CTAI_Par_FatalError(ierr, "cannot find  group/use_for_model in input");
+      CTA_String_GetLength(sUseFor, &len);
+      group->forModel=CTA_Malloc((len+1)*sizeof(char));
+      CTA_String_Get(sUseFor, group->forModel);
+
+      // PARALLEL_TYPE
+      ierr=CTA_Tree_GetHandleStr(parConfig,"group/parallel_type", &sParType);
+      CTAI_Par_FatalError(ierr, "cannot find  group/parallel_type in input");
+      if (strcmp(CTAI_String_GetPtr(sParType),"Worker-Worker")==0){
+         group->parType=WorkerWorker;
+      } else if (strcmp(CTAI_String_GetPtr(sParType),"Master-Worker")==0){
+         group->parType=MasterWorker;
+      } else {
+         CTAI_Par_FatalError(CTA_ILLEGAL_INPUT_ARGUMENT,
+            "group/parallel_type must have value 'Worker-Worker' or  'Master-Worker'");
+      }
+
+      // SPAWN_WORKER
+      group->spawn_workers=FALSE;
+      ierr=CTA_Tree_GetHandleStr(parConfig,"group/spawn_workers", &sSpawn_workers);
+      if (ierr==CTA_OK){
+         spawn_workers=CTAI_String_GetPtr(sSpawn_workers);
+         group->spawn_workers=(spawn_workers[0]=='Y' || spawn_workers[0]=='y');
+      }
+
+      // NTIMES
+      ierr=CTA_Tree_GetValueStr(parConfig,"group/ntimes", &(group->nTimes), CTA_INTEGER);
+      if (ierr!=CTA_OK){
+         group->nTimes=0;
+      }
+
+      // ITIMES
+      group->iTime=0;
+
+
+      // Set dummy processes (only in case of Worker-Worker processes
+      if ( group->parType==WorkerWorker){
+         // Set message writer in quiet mode since it is ok if this info
+         // does not exist
+         CTA_Message_Quiet(CTA_TRUE);
+         ierr=CTA_Tree_GetHandleStr(parConfig,"group/dumproc", &sDumProcs);
+         CTA_Message_Quiet(CTA_FALSE);
+         if (ierr==CTA_OK){
+             txtstr=CTAI_String_GetPtr(sDumProcs);
+             s = txtstr;
+             for (i=0; s[i]; s[i]==',' ? i++ : *s++);
+             group->nDumProcs = strlen(txtstr) == 0 ? 0 :  i+1;
+             if (group->nDumProcs > 0) {
+                group->dumProcs  = CTA_Malloc(sizeof(int)*group->nDumProcs);
+                for (i=0; ; i++,txtstr=NULL) {
+                   txtpart = strtok(txtstr,",");
+                   if (txtpart == NULL) break;
+                   group->dumProcs[i]=atoi(txtpart);
+                }
+             }
+             if (IDEBUG) printf("#%d Dummy process is selected in Worker-Worker concept\n", myRank);
+         }
+      }
+   }
+   if (CTA_FILTER_PROCESS) {
+      if (IDEBUG) {
+         printf("----- OpenDA communication/model group information ------\n");
+         printf("Name of group           =%s\n",group->name);
+         printf("number of proccesses    =%d\n",group->nProc);
+         printf("Used for model          =%s\n",group->forModel);
+         printf("Number of replications  =%d\n",group->nTimes);
+         printf("Kind of parallelization =");
+         if (group->parType==WorkerWorker){
+            printf("WorkerWorker\n");
+         } else {
+            printf("MasterWorker\n");
+         }
+         printf("Spawn workers           =");
+         if (group->spawn_workers){
+            printf("Yes\n");
+         } else {
+            printf("No\n");
+         }
+      }
+   }
+   return CTA_OK;
+}
+
+
+
+
+#undef METHOD
+#define METHOD "CTAI_Par_FatalError"
+void CTAI_Par_FatalError(int ierr, char* msg){
+   if (ierr!=CTA_OK){
+      printf("FATAL ERROR:\n");
+      printf("%s\n", msg);
+      exit(-1);
+   }
+}
+
+#endif
+
+#undef METHOD
+#define METHOD "CTA_Par_WorkerSpawn"
+int CTA_Par_WorkerSpawn(int StartPar){
+#ifdef USE_MPI
+   int retval;      /** return value of MPI-call */
+   int comm_size;   /** Size of communcation group/number of spawned processes */
+
+   if (IDEBUG) printf("Debug: start of %s\n",METHOD);
+   /* initialise MPI. */
+   retval=MPI_Init(NULL,NULL);
+   retval=MPI_Comm_size(CTA_COMM_WORLD, &comm_size);
+   retval=MPI_Comm_rank(CTA_COMM_WORLD, &myRankInWorld);
+
+   if (IDEBUG) {
+      if (myRankInWorld==0) printf("%s Number of processes that are spawned :%d\n",METHOD, comm_size);
+   }
+   /* Set rank of this process, note: this is not in the whole world! */
+   CTA_PAR_MY_RANK=myRankInWorld;
+
+   /* By definition: we are running parallel */
+   CTA_IS_PARALLEL=TRUE;
+
+   /* By definition: this process is a worker process */
+   CTA_FILTER_PROCESS=FALSE;
+   CTA_MY_PROC_TYPE=CTA_ParWorker;
+   
+   /* Set communicators */
+   CTA_COMM_MYWORLD       = MPI_COMM_WORLD;
+   retval=MPI_Comm_get_parent(&CTA_COMM_MASTER_WORKER);
+   if (retval){
+      printf("Error in MPI_Comm_get_parent\n");
+      exit(-1);
+   }
+
+   /* Do we need to start the modelbuilder */
+   if (StartPar==CTA_TRUE) {
+      if(IDEBUG) {
+         printf("=================================================\n");
+         printf("Starting the modelbuilder\n");
+         printf("Inter communicator is %p\n",CTA_COMM_MASTER_WORKER);
+         printf("=================================================\n");
+      }
+      CTA_Modbuild_par_CreateClass(&CTA_MODBUILD_PAR);
+   }
+#endif
+   return CTA_OK;
+
+   }
+
+
+#undef METHOD
+#define METHOD "CTA_Par_CreateGroups"
+int CTA_Par_CreateGroups(int parConfig, int StartPar){
+
+#ifdef USE_MPI
+
+   int comm_size;
+   int iGroup;
+   CTA_Tree groupConfig;
+   int nGroups1, nGroups2;
+
+   /* initialise MPI. We now assume that COSTA is the whole world.
+    * This might change in the future */
+   MPI_Init(NULL,NULL);
+   MPI_Comm_size(CTA_COMM_WORLD, &comm_size);
+
+   /* When this function is called we will always run in parallel */
+   CTA_IS_PARALLEL=CTA_TRUE;
+
+   if (IDEBUG) {
+      if (myRankInWorld==0) printf("CTA_Par_CreateGroups: Size of CTA_COMM_WORLD is %d\n",comm_size);
+   }
+
+   /* Get the rank of this process */
+   MPI_Comm_rank(CTA_COMM_WORLD, &myRankInWorld);
+   CTA_PAR_MY_RANK=myRankInWorld;
+
+   CTA_FILTER_PROCESS=(myRankInWorld==0);
+
+   /* Set master and worker */
+   if (CTA_FILTER_PROCESS) {
+      CTA_MY_PROC_TYPE=CTA_ParMaster;
+   } else {
+      CTA_MY_PROC_TYPE=CTA_ParWorker;
+   }
+
+   /* Read configuration */
+   if (IDEBUG) {
+      if (CTA_FILTER_PROCESS) printf("CTA_Par_CreateGroups: number of processes is %d\n",comm_size);
+   }
+
+   /* Count number of process groups */
+   CTAI_Par_NumGroups(parConfig, &nGroups1, &nGroups2);
+   CTAI_nGroups=nGroups1+nGroups2;
+   if (IDEBUG) {
+      if (CTA_FILTER_PROCESS) printf("CTA_Par_CreateGroups: number groups defined globally  =%d\n",nGroups1);
+      if (CTA_FILTER_PROCESS) printf("CTA_Par_CreateGroups: number groups defined locally   =%d\n",nGroups2);
+      if (CTA_FILTER_PROCESS) printf("CTA_Par_CreateGroups: total number of defined ngroups =%d\n",CTAI_nGroups);
+   }
+
+   /* Get properties of the groups and store them in CTAI_AllGroups */
+   CTAI_AllGroups=NULL;
+   if (CTAI_nGroups>0) {
+      CTAI_AllGroups=CTA_Malloc(CTAI_nGroups*sizeof(CTAI_Group));
+
+      for (iGroup=0;iGroup<nGroups1; iGroup++){
+         CTAI_Par_GetGroupConfig(parConfig, iGroup, &groupConfig);
+         CTAI_Par_SetGroupInfo(groupConfig, &(CTAI_AllGroups[iGroup]));
+      }
+
+      for (iGroup=0;iGroup<nGroups2; iGroup++){
+         CTAI_Par_GetGroupConfig2(parConfig, iGroup, &groupConfig);
+         CTAI_Par_SetGroupInfo(groupConfig, &(CTAI_AllGroups[nGroups1+iGroup]));
+      }
+   }
+   /* Create process groups and communicators */
+   CTAI_Par_CreateMPIGroups(CTAI_AllGroups, CTAI_nGroups);
+
+   /* Start parallel model builder for workers */
+   /* OR start worker for Master-Worker processes */
+
+   if (StartPar==CTA_TRUE) {
+      CTA_Modbuild_par_CreateClass(&CTA_MODBUILD_PAR);
+   }
+#endif
+   return CTA_OK;
+
+   }
+
+
+#ifdef USE_MPI
+#undef METHOD
+#define METHOD "CTA_Par_GetAllCommByIndex"
+int CTA_Par_GetAllCommByIndex(int indx, MPI_Comm *comm){
+   int iGroup; /* Counter over process groups */
+   int jndx;   /* Total counter over communicators */
+   int nTimes; /* Number of times an process group exists */
+   int iTime;  /* Counter over NTimes */
+
+   *comm=MPI_COMM_NULL;
+   if (IDEBUG) printf("CTA_Par_GetAllCommByIndex requesting comm with index %d\n",indx);
+
+   jndx=-1;
+   if (IDEBUG) printf("Number of groups is %d\n",CTAI_nGroups);
+   for (iGroup=0;iGroup<CTAI_nGroups;iGroup++){
+      if (IDEBUG) printf("CTA_Par_GetAllCommByIndex iGroup=%d\n",iGroup);
+      nTimes=CTAI_AllGroups[iGroup].nTimes;
+      if (IDEBUG) printf("CTA_Par_GetAllCommByIndex nTimes=%d\n",nTimes);
+      for (iTime=0;iTime<nTimes;iTime++){
+         jndx++;
+         if (jndx==indx) {
+            *comm=CTAI_AllGroups[iGroup].CommWithMaster[iTime];
+            return CTA_OK;
+         }
+      }
+   }
+   return CTA_CANNOT_FIND_PROCESS_GROUP;
+
+}
+#endif
+
+
+
+#ifdef USE_MPI
+#undef METHOD
+#define METHOD "CTA_Par_CreateNewCreateGetComm"
+int CTA_Par_CreateNewCreateGetComm(CTA_ModelClass modelCls, MPI_Comm *comm){
+
+int iGroup;  /* Counter over all groups */
+int iTime;   /* Loop counter over number of instances of the group */
+int nTimes;  /* Number of instances of the group */
+
+char *command; /* Startup command of processes */
+int *errCodes; /* Startup error codes for MPI_Comm_spawn */
+char **argv;   /* Startup arguments of processes in MPI_Comm_spawn */
+int nProc;     /* Number of processes that must be spawned */
+MPI_Comm intercomm; /* inter communicator between spawning process and worker processes */
+
+
+
+const char *implements;
+
+  *comm=MPI_COMM_NULL;
+  if (IDEBUG) printf("CTA_Par_CreateNewCreateGetComm: CTAI_nGroups=%d\n",CTAI_nGroups );
+
+  /* What model is implemented by this modelclass? */
+  implements=CTAI_ModelFac_GetImplements(modelCls);
+  if (IDEBUG) printf("CTA_Par_CreateNewCreateGetComm: implements=%s\n",implements);
+
+  /* Search for the corresponding process group */
+  for (iGroup=0;iGroup<CTAI_nGroups;iGroup++){
+
+     if (IDEBUG) {
+        printf("Exisiting group is:%s\n",CTAI_AllGroups[iGroup].forModel);
+        printf("We are looking for:%s\n",implements);
+     }
+     if (strcmp(implements,CTAI_AllGroups[iGroup].forModel)==0){
+        /* return communicator */
+        iTime =CTAI_AllGroups[iGroup].iTime;
+        nTimes=CTAI_AllGroups[iGroup].nTimes;
+        /* spawn worker processes when needed */
+        if (CTAI_AllGroups[iGroup].spawn_workers){
+           if(CTAI_AllGroups[iGroup].CommWithMaster[iTime]==MPI_COMM_NULL){
+
+
+              command="pollute2d_ww";
+              argv=CTA_Malloc(sizeof(char*));
+              argv[0]=NULL;
+              nProc=CTAI_AllGroups[iGroup].nProc;
+              errCodes=CTA_Malloc(sizeof(int)*nProc);
+              if (IDEBUG) {
+                 printf("Spawning processes\n");
+                 printf("command            :%s\n",command);
+                 printf("Number of processes:%d\n",nProc);
+              }
+
+              MPI_Comm_spawn(command,argv,nProc,MPI_INFO_NULL,
+                             0,CTA_COMM_MYWORLD, 
+                             &intercomm,
+                             errCodes);
+              free(errCodes);
+              free(argv);
+              CTAI_AllGroups[iGroup].CommWithMaster[iTime]=intercomm;
+           }
+        }
+        *comm=CTAI_AllGroups[iGroup].CommWithMaster[iTime];
+        CTAI_AllGroups[iGroup].iTime=(iTime+1)%nTimes;
+        break;
+     }
+  }
+  if (iGroup==CTAI_nGroups) {
+     return CTA_CANNOT_FIND_PROCESS_GROUP;
+  }
+  else {
+     return CTA_OK;
+  }
+}
+
+
+
+#undef METHOD
+#define METHOD "CTA_Par_GetGroupInfo"
+void CTA_Par_GetGroupInfo(int *myGroup, int *itime){
+*myGroup = CTAI_iGroup;
+*itime =   CTAI_AllGroups[CTAI_iGroup-1].iTime + 1;
+// printf("**************** cta_par_getgroupinfo: %d %d \n",CTAI_iGroup, *itime);
+}
+
+
+
+
+
+#undef METHOD
+#define METHOD "CTA_Par_GetFComm"
+void CTA_Par_GetFComm(int *cta_comm_world, int *cta_comm_myworld, int *cta_comm_master_worker){
+   int isInit;
+   MPI_Initialized(&isInit);
+   if (isInit){
+      *cta_comm_world          = MPI_Comm_c2f(CTA_COMM_WORLD);
+      *cta_comm_myworld        = MPI_Comm_c2f(CTA_COMM_MYWORLD);
+      *cta_comm_master_worker  = MPI_Comm_c2f(CTA_COMM_MASTER_WORKER);
+   }
+   else {
+      *cta_comm_world          = 0;
+      *cta_comm_myworld        = 0;
+      *cta_comm_master_worker  = 0;      
+   }
+}
+
+
+#else
+
+#undef METHOD
+#define METHOD "CTA_Par_GetFComm"
+void CTA_Par_GetFComm(int *cta_comm_world, int *cta_comm_myworld, int *cta_comm_master_worker){
+
+*cta_comm_world          = -911;
+*cta_comm_myworld        = -911;
+*cta_comm_master_worker  = -911;
+
+}
+#endif
+
+CTAEXPORT void CTA_PAR_WORKERSPAWN_F77 (int *StartPar, int *ierr) {
+   *ierr=CTA_Par_WorkerSpawn(*StartPar);
+}
+
+CTAEXPORT void CTA_PAR_GETFCOMM_F77(int *cta_comm_world, int *cta_comm_myworld, int *cta_comm_master_worker){
+   CTA_Par_GetFComm(cta_comm_world, cta_comm_myworld, cta_comm_master_worker);
+}
+
+CTAEXPORT void CTA_PAR_CREATEGROUPS_F77 (int *parConfig, int *StartPar, int *ierr) {
+   *ierr=CTA_Par_CreateGroups(*parConfig, *StartPar);
+}
+
+
+#if USE_MPI
+CTAEXPORT void CTA_PAR_GETGROUPINFO_F77 (int *myGroup, int *itime){
+   CTA_Par_GetGroupInfo(myGroup, itime);
+}
+#endif
diff --git a/costa/native/cta/src/cta_reltable.c b/costa/native/cta/src/cta_reltable.c
new file mode 100644
index 000000000..53a42aac7
--- /dev/null
+++ b/costa/native/cta/src/cta_reltable.c
@@ -0,0 +1,642 @@
+/*
+$URL: https://repos.deltares.nl/repos/openda/public/trunk/core/native/src/cta/cta_reltable.c $
+$Revision: 3567 $, $Date: 2012-10-08 16:53:30 +0200 (Mon, 08 Oct 2012) $
+
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+#include "cta_mem.h"
+#include "f_cta_utils.h"
+#include "ctai.h"
+#include "cta_reltable.h"
+#include "cta_errors.h"
+#include "cta_util_sort.h"
+#include "cta_string.h"
+#include "cta_defaults.h"
+#include "cta_message.h"
+
+#define CTA_RELTABLE_CREATE_F77    F77_CALL(cta_reltable_create,CTA_RELTABLE_CREATE)
+#define CTA_RELTABLE_FREE_F77      F77_CALL(cta_reltable_free,CTA_RELTABLE_FREE)
+#define CTA_RELTABLE_APPLY_F77     F77_CALL(cta_reltable_appy,CTA_RELTABLE_APPY)
+#define CTA_RELTABLE_APPLYINV_F77  F77_CALL(cta_reltable_appyinv,CTA_RELTABLE_APPYINV)
+#define CTA_RELTABLE_SETSELECT_F77 F77_CALL(cta_reltable_setselect,CTA_RELTABLE_SETSELECT)
+#define CTA_RELTABLE_APPLY_VAL_F77     F77_CALL(cta_reltable_appyval,CTA_RELTABLE_APPYVAL)
+#define CTA_RELTABLE_APPLYINV_VAL_F77  F77_CALL(cta_reltable_appyinvval,CTA_RELTABLE_APPYINVVAL)
+#define CTA_RELTABLE_SETSELECT_VAL_F77 F77_CALL(cta_reltable_setselectval,CTA_RELTABLE_SETSELECTVAL)
+#define CTA_SETTABLECOMBINE_F77    F77_CALL(cta_settablecombine,CTA_SETTABLECOMBINE)
+#define CTA_RELTABLE_COUNT_F77     F77_CALL(cta_reltable_count,CTA_RELTABLE_COUNT)
+
+#define IDEBUG (0)
+
+#define CLASSNAME "CTA_Reltable"
+/* Struct holding all data associated to an COSTA time span */
+
+typedef struct {
+int nsel1;  /* size of array sel1 if 0 then use identity for set1 */
+int nsel2;  /* size of array sel2 if 0 then use identity for set2 */
+int *sel1;  /* points in origin set to be copied                  */
+int *sel2;  /* points in target set to copy elements from sel1 to */
+} CTAI_RelTable;
+
+/** \brief Get data of relation table
+ *
+ *  \param hrel     (I) Handle of relation table
+ *  \param reltable (O) Data of relation table
+ *
+ * \return error status: CTA_OK if successful
+ */
+int CTAI_RelTable_GetData(CTA_RelTable hrel, CTAI_RelTable **reltable ){
+   int retval;
+
+   retval=CTA_Handle_Check((CTA_Handle) hrel,CTA_RELTABLE);
+   if (retval!=CTA_OK) return retval;
+   retval=CTA_Handle_GetData((CTA_Handle) hrel,(void**) reltable);
+   if (retval!=CTA_OK) return retval;
+
+   return CTA_OK;
+}
+
+/** \brief Get (pointers to) information in relation table
+ *         taking into account for inverses.
+ *
+ *  \param reltable (I) Data of relation table
+ *  \param inverse  (I) Select CTA_TRUE/CTA_FALSE to return info on inverse
+ *                      of relation table table
+ *  \param nsel1    (O) size of sel1
+ *  \param sel1     (O) selected points of origin set
+ *  \param nsel1    (O) size of sel2
+ *  \param sel1     (O) selected points of target set
+ */
+void CTAI_GetRelTableInfo(CTAI_RelTable *reltable, int inverse,
+                         int *nsel1, int **sel1,
+                         int *nsel2, int **sel2){
+
+    if (inverse) {
+       *nsel1=reltable->nsel2;
+       *nsel2=reltable->nsel1;
+       *sel1=reltable->sel2;
+       *sel2=reltable->sel1;
+   }
+   else {
+       *nsel1=reltable->nsel1;
+       *nsel2=reltable->nsel2;
+       *sel1=reltable->sel1;
+       *sel2=reltable->sel2;
+  }
+}
+
+
+/** \brief Apply relation table to two components
+ *
+ * \param reltable (I)  Data of relation table
+ * \param hrom     (I)  Source set of elements
+ * \param hto      (IO) Target set (some will be overwitten)
+ * \param iverse   (I)  CTA_TRUE/CTA_FALSE apply inverse table
+ *
+ * \return error status: CTA_OK if successful
+ */
+int CTAI_RelTable_Apply(CTA_RelTable hreltable,
+                       CTA_Handle hfrom, CTA_Handle hto, int inverse){
+
+   int retval;
+   CTAI_RelTable *reltable;
+   int size_of_datatype;
+   void *val;
+   int isel, isel1, isel2;
+   int nsel, nsel1, nsel2;
+   int *sel1, *sel2;
+   CTA_Datatype type_from, type_to, datatype;
+
+   /* Get data object of relation table */
+   retval=CTAI_RelTable_GetData(hreltable, &reltable);
+   if (retval!=CTA_OK) return retval;
+
+   /* Set relation table administration depending on inverse option */
+   CTAI_GetRelTableInfo(reltable, inverse, &nsel1, &sel1, &nsel2, &sel2);
+
+   /* check kind of object to copy elements from: */
+   retval=CTA_Handle_GetDatatype(hfrom, &type_from);
+   if (retval!=CTA_OK) return retval;
+   retval=CTA_Handle_GetDatatype(hto, &type_to);
+   if (retval!=CTA_OK) return retval;
+
+   /* Are they both of the same datatype? */
+   if (type_to!=type_from) return CTA_INPUT_OBJECTS_ARE_INCOMPATIBLE;
+
+   /* At this time we use an expensive way of applying the relation tables
+      we need to make changes (extentions to the interface) of the
+      supported objects in order to improve the performance */
+   switch(type_from){
+      case CTA_VECTOR:
+         /* get datatype of vector object */
+         retval=CTA_Vector_GetDatatype(hfrom,&datatype);
+         /* strings are not yet supported */
+
+
+         /* allocate memory block for storing values */
+         retval=CTA_SizeOf(datatype, &size_of_datatype);
+         if (retval!=CTA_OK) return retval;
+         val=CTA_Malloc(size_of_datatype);
+
+         /* Create string instance for holding copied values
+            when vector contains strings                    */
+         if (datatype==CTA_STRING){
+            retval=CTA_String_Create(val);
+            if (retval!=CTA_OK) return retval;
+         };
+
+         /* Loop over all elements that need to be copied */
+         nsel=MAX(nsel1,nsel2);
+         for (isel=1;isel<=nsel;isel++){
+            if (nsel1==0) {isel1=isel;} else {isel1=sel1[isel-1];}
+            if (nsel2==0) {isel2=isel;} else {isel2=sel2[isel-1];}
+
+            retval=CTA_Vector_GetVal(hfrom,isel1,val,datatype);
+            if (retval!=CTA_OK) {return retval;}
+            retval=CTA_Vector_SetVal(hto,isel2,val,datatype);
+            if (retval!=CTA_OK)  {return retval;}
+         }
+
+         /* Free string instance (when strings have been copied) */
+         if (datatype==CTA_STRING){
+            retval=CTA_String_Free(val);
+            if (retval!=CTA_OK) return retval;
+         };
+         free(val);
+
+         break;
+      case CTA_TREEVECTOR:
+         return CTA_NOT_YET_SUPPORTED;
+         break;
+      default:
+         return CTA_INPUT_OBJECT_NOT_SUPPORTED;
+         break;
+   }
+   return CTA_OK;
+}
+
+void CTAI_RelTable_SetWorkingCopy(int nsel1, int *sel1, int nsel2, int *sel2,
+                                 int *nelm,  int **from, int **to){
+
+   int i;
+
+
+   *nelm=MAX(nsel1,nsel2);
+   *from=CTA_Malloc((*nelm)*sizeof(int));
+   *to  =CTA_Malloc((*nelm)*sizeof(int));
+
+   /* fill the from table */
+   if (nsel1==0) {
+      for (i=0;i<*nelm;i++){(*from)[i]=i+1;}
+   } else {
+      for (i=0;i<*nelm;i++){(*from)[i]=sel1[i];}
+   }
+
+   /* fill the to table */
+   if (nsel2==0) {
+      for (i=0;i<*nelm;i++){(*to)[i]=i+1;}
+   } else {
+      for (i=0;i<*nelm;i++){(*to)[i]=sel2[i];}
+   }
+
+   /* now we have a table; shall we sort the first or second column? */
+
+
+
+}
+
+
+
+
+
+#undef METHOD
+#define METHOD "Create" 
+int CTA_RelTable_Create(CTA_RelTable *hreltable){
+
+   CTAI_RelTable *reltable;
+   int retval;
+
+   /* allocate memory for new time object */
+   reltable=CTA_Malloc(sizeof(CTAI_RelTable));
+
+   /* initialise administration */
+   reltable->nsel1=0;
+   reltable->nsel2=0;
+   reltable->sel1=NULL;
+   reltable->sel2=NULL;
+
+   /* Allocate new handle and return error when unsuccesfull */
+   retval=CTA_Handle_Create("reltable",CTA_RELTABLE,reltable,hreltable);
+   if (retval) {
+       CTA_WRITE_ERROR("Cannot create handle");
+       return retval;
+   }
+
+   return CTA_OK;
+}
+
+#undef METHOD
+#define METHOD "Create" 
+int CTA_RelTable_Free(CTA_RelTable *hreltable){
+
+   CTAI_RelTable *reltable;
+   int retval;
+
+   /* Get data object of relation table */
+   retval=CTAI_RelTable_GetData(*hreltable, &reltable);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+
+   /* free selection arrays */
+   if (reltable->sel1){free(reltable->sel1);}
+   if (reltable->sel2){free(reltable->sel2);}
+
+   /* free data block and handle */
+   free(reltable);
+   retval=CTA_Handle_Free(hreltable);
+   return retval;
+}
+
+#undef METHOD
+#define METHOD "ApplyVal" 
+int CTA_RelTable_ApplyVal(CTA_RelTable hreltable,
+                          void *vfrom, int nfrom,
+                          void *vto, int nto, CTA_Datatype datatype){
+  int retval;
+  CTA_Vector hfrom, hto;
+
+  /* create and fill the 'from' vector */
+   retval=CTA_Vector_Create(CTA_DEFAULT_VECTOR, nfrom, datatype, CTA_NULL, &hfrom);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Cannot create vector");
+       return retval;
+   }
+
+   retval=CTA_Vector_SetVals(hfrom, vfrom, nfrom, datatype);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Cannot set values");
+       return retval;
+   }
+
+  /* create the 'to' vector */
+   retval=CTA_Vector_Create(CTA_DEFAULT_VECTOR, nto, datatype, CTA_NULL, &hto);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Cannot create vector");
+       return retval;
+   }
+
+   retval = CTA_RelTable_Apply(hreltable, hfrom, hto);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Cannot apply RelTabkle");
+       return retval;
+   }
+
+   /* fill the 'vto' array and clean up */
+   retval=CTA_Vector_GetVals(hto, vto, nto, datatype);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Cannot get values");
+       return retval;
+   }
+
+   retval=CTA_Vector_Free(&hfrom);
+   retval=CTA_Vector_Free(&hto);
+   return retval;
+
+}
+
+int CTA_RelTable_ApplyInvVal(CTA_RelTable hreltable,
+                          void *vfrom, int nfrom,
+                          void *vto, int nto, CTA_Datatype datatype){
+
+  return CTA_RelTable_ApplyVal(hreltable, vto,nto, vfrom,nfrom, datatype);
+
+}
+
+int CTA_RelTable_Apply(CTA_RelTable hreltable,
+                       CTA_Handle hfrom, CTA_Handle hto){
+
+   return CTAI_RelTable_Apply(hreltable, hfrom, hto, FALSE);
+
+}
+
+int CTA_RelTable_ApplyInv(CTA_RelTable hreltable,
+                       CTA_Handle hfrom, CTA_Handle hto){
+
+   return CTAI_RelTable_Apply(hreltable, hfrom, hto, TRUE);
+}
+
+
+#undef METHOD
+#define METHOD "SetSelectVal" 
+int CTA_RelTable_SetSelectVal(CTA_RelTable hreltable, void *val, int nval, CTA_Datatype datatype){
+   CTA_Vector vwork;
+   int retval;
+
+   retval=CTA_Vector_Create(CTA_DEFAULT_VECTOR, nval, datatype, CTA_NULL, &vwork);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Cannot create verctor handle");
+       return retval;
+   }
+   retval=CTA_Vector_SetVals(vwork, val, nval, datatype);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Cannot set values in vector");
+       return retval;
+   }
+   retval=CTA_RelTable_SetSelect(hreltable,vwork);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Error in CTA_RelTable_SetSelect");
+       return retval;
+   }
+   retval=CTA_Vector_Free(&vwork);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Cannot free vector");
+       return retval;
+   }
+
+   return CTA_OK;
+}
+
+
+
+#undef METHOD
+#define METHOD "SetSelect" 
+int CTA_RelTable_SetSelect(CTA_RelTable hreltable, CTA_Vector vselect){
+   int retval;
+   int i, n;
+   CTAI_RelTable *reltable;
+
+   /* Get data object of relation table */
+   retval=CTAI_RelTable_GetData(hreltable, &reltable);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+
+   /* free memory if relation table is used in past */
+   if (reltable->sel1){free(reltable->sel1);}
+   if (reltable->sel2){free(reltable->sel2);}
+   reltable->nsel2=0;
+
+   /* Set cardinality */
+   retval=CTA_Vector_GetSize(vselect, &n);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot get size of vector");
+	   return retval;
+   }
+   reltable->nsel1=n;
+
+   /* copy values of vselect into local administration */
+   reltable->sel1=CTA_Malloc(n*sizeof(int));
+   retval=CTA_Vector_GetVals(vselect,  reltable->sel1, n, CTA_INTEGER);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot get values from vector");
+	   return retval;
+   }
+
+   if (IDEBUG) {
+      printf("CTA_RelTable_SetSelect: set a new relation table\n");
+      printf("reltable->nsel1=%d\n",reltable->nsel1);
+      printf("reltable->nsel2=%d\n",reltable->nsel2);
+      printf("reltable->sel1=");
+      for (i=0;i<reltable->nsel1;i++){
+         printf("%d ",reltable->sel1[i]);
+      }
+      printf("\n");
+      printf("reltable->sel2=");
+      for (i=0;i<reltable->nsel2;i++){
+         printf("%d ",reltable->sel1[i]);
+      }
+      printf("\n");
+   }
+
+   return CTA_OK;
+}
+
+
+#undef METHOD
+#define METHOD "SetTableCombine" 
+int CTA_RelTable_SetTableCombine(CTA_RelTable hreltable,
+                        CTA_RelTable hrel1, int inverse1,
+                        CTA_RelTable hrel2, int inverse2 ){
+
+int retval, i,j;
+BOOL found;
+int nsel1_rel1, nsel2_rel1, nsel1_rel2, nsel2_rel2;
+int *sel1_rel1, *sel2_rel1, *sel1_rel2, *sel2_rel2;
+int nelm1, nelm2, *from1, *to1, *from2, *to2;
+int *from;
+
+CTAI_RelTable *reltable, *reltable1, *reltable2;
+
+   /* Get data object of relation tables */
+   retval=CTAI_RelTable_GetData(hreltable, &reltable);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+   retval=CTAI_RelTable_GetData(hrel1, &reltable1);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+   retval=CTAI_RelTable_GetData(hrel2, &reltable2);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+
+/* first check for empty tables */
+   if (MAX(reltable1->nsel1,reltable1->nsel2) == 0) {
+     reltable = reltable1; return CTA_OK; //mag deze toekenning?
+   }
+   if (MAX(reltable2->nsel1,reltable2->nsel2) == 0) {
+     reltable = reltable2; return CTA_OK; //mag deze toekenning?
+   }
+
+
+
+   /* Set local variables and pointers in order to handle the inverse
+    * options
+    */
+   CTAI_GetRelTableInfo(reltable1, inverse1,
+                        &nsel1_rel1, &sel1_rel1,
+                        &nsel2_rel1, &sel2_rel1);
+
+   CTAI_GetRelTableInfo(reltable2, inverse2,
+                        &nsel1_rel2, &sel1_rel2,
+                        &nsel2_rel2, &sel2_rel2);
+
+
+   /* Create a local copy of administration of relation tables */
+   CTAI_RelTable_SetWorkingCopy(nsel1_rel1, sel1_rel1,
+                                nsel2_rel1, sel2_rel1,
+                                &nelm1, &from1, &to1);
+
+   CTAI_RelTable_SetWorkingCopy(nsel1_rel2, sel1_rel2,
+                                nsel2_rel2, sel2_rel2,
+                                &nelm2, &from2, &to2);
+
+   if (IDEBUG > 0) {printf("reltable_combine1 %d %d %d %d \n",nsel1_rel1,nsel2_rel1,nsel1_rel2,nsel2_rel2);}
+   //   printf("from2 01: %d %d \n",from2[0], from2[1]);
+
+   /* permutate towards target of first relation table */
+   CTA_Util_IQSort2(to1, from1, nelm1);
+
+   if (IDEBUG > 0) {printf("reltable_combine2-1 %d %d\n",nelm1,from2[0]);}
+
+   /* permutate towards origin of second relation table */
+   CTA_Util_IQSort2(to2, from2, nelm2);
+
+   if (IDEBUG > 0) {printf("reltable_combine2-2; nelm1,2:%d %d   \n", nelm1, from2[0]);}
+
+   /* find all target elements of first relation table  that corresponds
+      that are an origin element of second relation table. */
+   from=CTA_Malloc(sizeof(int)*nelm2);
+   j=0;
+   for (i=0;i<nelm2;i++){
+      for (;j<nelm1;j++){
+         found=FALSE;
+         //    printf("checking : i j %d %d: to1[j] %d from2[01]%d %d \n",i,j,to1[j],from2[0],from2[1]);
+         if (to1[j]==from2[i]){
+            from[i]=from1[j];
+            found=TRUE;
+            break;
+         }
+      }
+      /* Check whether element is found */
+      if (!found) {
+         printf("STOP: error: reltables cannot be combined ");
+         free(to1); free(from1); free(to2); free(from2); free(from);
+
+         return CTA_RELTABLES_CANNOT_BE_COMBINED;
+      }
+   }
+   if (IDEBUG > 0) {printf("reltable_combine4 %d %d\n",nelm1,nelm2);}
+
+
+   /* free memory if relation table is used in past */
+   if (reltable->sel1){free(reltable->sel1);}
+   if (reltable->sel2){free(reltable->sel2);}
+   reltable->nsel2=0;
+   reltable->sel1=CTA_Malloc(nelm2*sizeof(int));
+   reltable->sel2=CTA_Malloc(nelm2*sizeof(int));
+   reltable->nsel1=nelm2;
+   reltable->nsel2=nelm2;
+
+   if (IDEBUG > 0) {printf("reltable_combine6\n");}
+
+   /* Copy the relation table */
+   for (i=0;i<nelm2;i++){
+     reltable->sel1[i]=from[i];
+     reltable->sel2[i]=to2[i];
+   }
+
+
+  /* free all work variables */
+  free(to1); free(from1); free(to2); free(from2); free(from);
+
+  return CTA_OK;
+
+}
+
+
+#undef METHOD
+#define METHOD "Count"
+int CTA_RelTable_Count(CTA_RelTable hreltable, int *nelt){
+
+   CTAI_RelTable *reltable;
+   int retval;
+
+   /* Get data object of relation table */
+   retval=CTAI_RelTable_GetData(hreltable, &reltable);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+
+   *nelt=MAX(reltable->nsel1,reltable->nsel2);
+
+   return CTA_OK;
+}
+
+
+
+
+
+/* Interfacing with Fortran */
+
+CTAEXPORT void CTA_RELTABLE_COUNT_F77(int *hreltable, int *nelt, int *ierr){
+   *ierr=CTA_RelTable_Count((CTA_RelTable) *hreltable, nelt);
+}
+
+CTAEXPORT void CTA_RELTABLE_CREATE_F77(int *hreltable, int *ierr){
+   *ierr=CTA_RelTable_Create((CTA_RelTable*) hreltable);
+}
+
+CTAEXPORT void CTA_RELTABLE_FREE_F77(int *hreltable, int *ierr){
+   *ierr=CTA_RelTable_Free((CTA_RelTable*) hreltable);
+}
+
+CTAEXPORT void CTA_RELTABLE_APPLY_F77(int *hreltable, int *hfrom, int *hto, int *ierr){
+   *ierr=CTA_RelTable_Apply((CTA_RelTable) *hreltable,
+                             (CTA_Handle) *hfrom, (CTA_Handle) *hto);
+}
+
+CTAEXPORT void CTA_RELTABLE_APPLY_VAL_F77(int *hreltable, void *hfrom,  int *nfrom,
+                                              void *hto, int *nto, int *datatype, int *ierr){
+   *ierr=CTA_RelTable_ApplyVal((CTA_RelTable) *hreltable,
+                               hfrom, *nfrom, hto, *nto, (CTA_Datatype) *datatype);
+}
+
+
+CTAEXPORT void CTA_RELTABLE_APPLYINV_F77(int *hreltable, int *hfrom, int *hto,
+                               int *ierr){
+   *ierr=CTA_RelTable_ApplyInv((CTA_RelTable) *hreltable,
+                       (CTA_Handle) *hfrom, (CTA_Handle) *hto);
+}
+
+CTAEXPORT void CTA_RELTABLE_APPLYINV_VAL_F77(int *hreltable, void *hfrom,  int *nfrom,
+                                              void *hto, int *nto, int *datatype, int *ierr){
+   *ierr=CTA_RelTable_ApplyInvVal((CTA_RelTable) *hreltable,
+                               hfrom, *nfrom, hto, *nto, (CTA_Datatype) *datatype);
+
+}
+
+
+CTAEXPORT void CTA_RELTABLE_SETSELECT_F77(int *hreltable, int *selorig, int *ierr){
+   *ierr=CTA_RelTable_SetSelect((CTA_RelTable) *hreltable,
+                                (CTA_Vector) *selorig);
+}
+
+CTAEXPORT void CTA_RELTABLE_SETSELECT_VAL_F77(int *hreltable, void *val, int *nval, int*datatype, int *ierr){
+  *ierr=CTA_RelTable_SetSelectVal((CTA_RelTable) *hreltable, val, *nval,
+                                (CTA_Datatype) *datatype);
+}
+
+
+CTAEXPORT void CTA_SETTABLECOMBINE_F77(int *hreltable,
+                             int *hrel1, int *inverse1,
+                             int *hrel2, int *inverse2,
+                             int *ierr ){
+   *ierr=CTA_RelTable_SetTableCombine((CTA_RelTable) *hreltable,
+                             (CTA_RelTable) *hrel1, *inverse1,
+                             (CTA_RelTable) *hrel2, *inverse2);
+}
+
diff --git a/costa/native/cta/src/cta_resultwriter.c b/costa/native/cta/src/cta_resultwriter.c
new file mode 100644
index 000000000..bd334f69f
--- /dev/null
+++ b/costa/native/cta/src/cta_resultwriter.c
@@ -0,0 +1,251 @@
+/*
+$URL: https://repos.deltares.nl/repos/openda/openda_1/public/trunk/core/native/src/cta/cta_time.c $
+$Revision: 2751 $, $Date: 2011-09-09 08:58:46 +0200 (Fri, 09 Sep 2011) $
+
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2013  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+#include <stdio.h>
+#include <math.h>
+#include "cta_resultwriter.h"
+
+#include "cta_mem.h"
+#include "cta_tree.h"
+#include "cta_string.h"
+#include "cta_xml.h"
+#include "f_cta_utils.h"
+#include "cta_errors.h"
+#include "cta_message.h"
+
+/**
+ * OpenDA support the concept of resultwriters. The Data assimilation algorithm
+ * will presents intermediate results and information on the computations to the
+ * result writer. Each implementation of a resultwriter can then handle this data
+ * in its own way.
+ *
+ * See header file cta_resultwriter.h for more information
+ *
+ * @author nils van velzen
+ *
+ */
+#define CLASSNAME "CTA_ResultWriter"
+#ifdef WIN32
+#define FILESEP "\\"
+#else
+#define FILESEP "/"
+#endif
+
+
+/* Struct holding all different data associated to an native array */
+#define MaxNativeResultWriters (20)
+
+static int not_initialized=1;
+
+typedef struct {
+   int not_initialized;
+   int h_config;
+   CTA_Func h_putmessage;
+   CTA_Func h_putvalue;
+   CTA_Func h_putiterationreport;
+   CTA_Func h_free;
+} CTAI_ResultWriterAdmin;
+
+static CTAI_ResultWriterAdmin resultWriterAdmin[MaxNativeResultWriters];
+
+
+
+#undef METHOD
+#define METHOD "CTAI_Resultwriter_handleID"
+int CTAI_Resultwriter_handleID(int iDWriter, char *config, char *workingDir){
+  char msg[256];
+  char filename[265];
+  int iID, retval;
+  CTA_String hfname;
+  CTA_Tree htree=CTA_NULL;
+
+  
+  //Check Input
+  if (iDWriter<0){
+     sprintf(msg,"The Id of the resultwriter is negative (%d). This is a programming error in OpenDA",iDWriter);
+     CTA_WRITE_ERROR(msg);
+     return 0;
+  }
+  if (iDWriter>=MaxNativeResultWriters){
+     sprintf(msg,"The ID of the resultwriter is %d >= MaxNativeResultWriters (=%d). This might be caused by a programming error in OpenDA or you are trying to use too many resultwriters.",iDWriter,MaxNativeResultWriters);
+     CTA_WRITE_ERROR(msg);
+     return 0;
+  }
+
+  //Initialize the local administration when needed
+  if (not_initialized){
+     for (iID=0;iID<MaxNativeResultWriters;iID++){
+        resultWriterAdmin[iID].not_initialized      = 1;
+        resultWriterAdmin[iID].h_config             = CTA_NULL; 
+        resultWriterAdmin[iID].h_putmessage         = CTA_NULL; 
+        resultWriterAdmin[iID].h_putvalue           = CTA_NULL; 
+        resultWriterAdmin[iID].h_putiterationreport = CTA_NULL; 
+        resultWriterAdmin[iID].h_free               = CTA_NULL; 
+     }
+     not_initialized=0;
+  }
+ 
+  //Initialize this resultwriter when needed
+  if (resultWriterAdmin[iDWriter].not_initialized){
+    resultWriterAdmin[iDWriter].not_initialized=0;
+
+    //Parse the configuration
+    CTA_String_Create(&hfname);
+    sprintf(filename,"%s%s%s",workingDir,FILESEP,config);
+    CTA_String_Set(hfname,filename);
+    retval = CTA_XML_Read(hfname, &htree);
+    CTA_String_Free(&hfname);
+    if (retval !=CTA_OK){
+       sprintf(msg,"Cannot read the configuration file of the native resultwriter (%s)",filename);
+       CTA_WRITE_ERROR(msg);
+       return 0;
+    }
+    resultWriterAdmin[iDWriter].h_config = htree;  //Store configuration tree in admin
+
+    //Get the function handles
+    retval=CTA_Tree_GetHandleStr(htree, "putmessage",         &(resultWriterAdmin[iDWriter].h_putmessage)        );
+    if (retval!=CTA_OK){
+       CTA_WRITE_ERROR("Cannot get the function handle of native putmessage");
+       CTA_Tree_Free(&htree); //Cleanup
+       return 0;
+    }
+    retval=CTA_Tree_GetHandleStr(htree, "putvalue",           &(resultWriterAdmin[iDWriter].h_putvalue)          );
+    if (retval!=CTA_OK){
+       CTA_WRITE_ERROR("Cannot get the function handle of native putvalue");
+       CTA_Tree_Free(&htree); //Cleanup
+       return 0;
+    }
+    retval=CTA_Tree_GetHandleStr(htree, "putiterationreport", &(resultWriterAdmin[iDWriter].h_putiterationreport));
+    if (retval!=CTA_OK){ 
+       CTA_WRITE_ERROR("Cannot get the function handle of native putiterationreport");
+       CTA_Tree_Free(&htree); //Cleanup
+       return 0;
+    }
+
+    retval=CTA_Tree_GetHandleStr(htree, "freewriter", &(resultWriterAdmin[iDWriter].h_free));
+    if (retval!=CTA_OK){ 
+       CTA_WRITE_ERROR("Cannot get the function handle of native freewriter");
+       CTA_Tree_Free(&htree); //Cleanup
+       return 0;
+    }
+  }
+  return 1;
+}
+
+
+#undef METHOD
+#define METHOD "CTA_Resultwriter_putmessage"
+int CTA_Resultwriter_putmessage(int iDWriter, char* config, char *workingDir, char *message){
+   CTA_Function *putmessage;
+   int retval;
+   if (CTAI_Resultwriter_handleID(iDWriter, config, workingDir)){
+      // Fetch function pointer
+      retval=CTA_Func_GetFunc(resultWriterAdmin[iDWriter].h_putmessage,&putmessage);
+      if (retval!=CTA_OK){
+         CTA_WRITE_ERROR("Cannot get the function pointer");
+         return retval;
+      }
+      // Call user function
+      putmessage(iDWriter, resultWriterAdmin[iDWriter].h_config, workingDir, message, &retval,
+                 strlen(workingDir), strlen(message));
+      return retval;
+   }
+   else {
+      CTA_WRITE_ERROR("Cannot initialize or fetch data for this resultwriter");
+      return CTA_RESULTWRITER_ERROR;
+   }
+}
+
+#undef method
+#define method "CTA_Resultwriter_putvalue"
+int CTA_Resultwriter_putvalue(int iDWriter, char*config, char *workingDir, char *id, int handle, int outputlevel, char *context, int iteration){
+   CTA_Function *putvalue;
+   int retval;
+   if (CTAI_Resultwriter_handleID(iDWriter, config, workingDir)){
+      // Fetch function pointer
+      retval=CTA_Func_GetFunc(resultWriterAdmin[iDWriter].h_putvalue,&putvalue);
+      if (retval!=CTA_OK){
+         CTA_WRITE_ERROR("Cannot get the function pointer");
+         return retval;
+      }
+      // Call user function
+      putvalue(iDWriter, resultWriterAdmin[iDWriter].h_config, workingDir, id, handle, outputlevel, context, iteration, &retval,
+               strlen(workingDir), strlen(id), strlen(context));
+      return retval;
+   }
+   else {
+      CTA_WRITE_ERROR("Cannot initialize or fetch data for this resultwriter");
+      return CTA_RESULTWRITER_ERROR;
+   }
+}
+
+#undef method
+#define method "CTA_Resultwriter_putiterationreport"
+int CTA_Resultwriter_putiterationreport(int iDWriter, char*config, char *workingDir, int iteration, double cost, int handle){
+   
+   CTA_Function *putiterationreport;
+   int retval;
+
+   if (CTAI_Resultwriter_handleID(iDWriter, config, workingDir)){
+      // Fetch function pointer
+      retval=CTA_Func_GetFunc(resultWriterAdmin[iDWriter].h_putiterationreport,&putiterationreport);
+      if (retval!=CTA_OK){
+         CTA_WRITE_ERROR("Cannot get the function pointer");
+         return retval;
+      }
+      // Call user function
+      putiterationreport(iDWriter, resultWriterAdmin[iDWriter].h_config, workingDir, iteration, cost, handle, &retval,
+                         strlen(workingDir));
+      return retval;
+   }
+   else {
+      CTA_WRITE_ERROR("Cannot initialize or fetch data for this resultwriter");
+      return CTA_RESULTWRITER_ERROR;
+   }
+}
+
+
+#undef method
+#define method "CTA_Resultwriter_free"
+int CTA_Resultwriter_free(int iDWriter){
+   
+   CTA_Function *freeWriter;
+   int retval;
+
+   if (CTAI_Resultwriter_handleID(iDWriter, NULL, NULL)){
+      // Fetch function pointer
+      retval=CTA_Func_GetFunc(resultWriterAdmin[iDWriter].h_free,&freeWriter);
+      if (retval!=CTA_OK){
+         CTA_WRITE_ERROR("Cannot get the function pointer");
+         return retval;
+      }
+      // Call user function
+      freeWriter(iDWriter);
+      return CTA_OK;
+   }
+   else {
+      CTA_WRITE_ERROR("Cannot initialize or fetch data for this resultwriter");
+      return CTA_RESULTWRITER_ERROR;
+   }
+}
+
+
diff --git a/costa/native/cta/src/cta_sobs.c b/costa/native/cta/src/cta_sobs.c
new file mode 100644
index 000000000..f152b1051
--- /dev/null
+++ b/costa/native/cta/src/cta_sobs.c
@@ -0,0 +1,814 @@
+/*
+$URL: https://repos.deltares.nl/repos/openda/public/trunk/core/native/src/cta/cta_sobs.c $
+$Revision: 3732 $, $Date: 2012-12-19 12:08:58 +0100 (Wed, 19 Dec 2012) $
+
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+#include <stdio.h>
+#include <memory.h>
+#include "cta_mem.h"
+#include "f_cta_utils.h"
+#include "cta_sobs.h"
+#include "cta_errors.h"
+#include "cta_handles.h"
+#include "ctai.h"
+#include "ctai_handles.h"
+#include "cta_message.h"
+
+#define CTA_SOBS_CREATE_F77          F77_CALL(cta_sobs_create,CTA_SOBS_CREATE)
+#define CTA_SOBS_CREATESEL_F77       F77_CALL(cta_sobs_createsel,CTA_SOBS_CREATESEL)
+#define CTA_SOBS_CREATETIMSEL_F77    F77_CALL(cta_sobs_createtimsel,CTA_SOBS_CREATETIMSEL)
+#define CTA_SOBS_COUNT_F77           F77_CALL(cta_sobs_count,CTA_SOBS_COUNT)
+#define CTA_SOBS_GETVAL_F77          F77_CALL(cta_sobs_getval,CTA_SOBS_GETVAL)
+#define CTA_SOBS_GETTIMES_F77        F77_CALL(cta_sobs_gettimes,CTA_SOBS_GETTIMES)
+#define CTA_SOBS_GETREALISATION_F77  F77_CALL(cta_sobs_getrealisation,CTA_SOBS_GETREALISATION)
+#define CTA_SOBS_GETEXPECTATION_F77  F77_CALL(cta_sobs_getexpectation,CTA_SOBS_GETEXPECTATION)
+#define CTA_SOBS_EVALPDF_F77         F77_CALL(cta_sobs_evalpdf,CTA_SOBS_EVALPDF)
+#define CTA_SOBS_GETCOVMAT_F77       F77_CALL(cta_sobs_getcovmat,CTA_SOBS_GETCOVMAT)
+#define CTA_SOBS_GETVAR_F77          F77_CALL(cta_sobs_getvar,CTA_SOBS_GETVAR)
+#define CTA_SOBS_GETSTD_F77          F77_CALL(cta_sobs_getstd,CTA_SOBS_GETSTD)
+#define CTA_SOBS_GETDESCRIPTION_F77  F77_CALL(cta_sobs_getdescription,CTA_SOBS_GETDESCRIPTION)
+#define CTA_SOBS_EXPORT_F77          F77_CALL(cta_sobs_export,CTA_SOBS_EXPORT)
+#define CTA_SOBS_FREE_F77            F77_CALL(cta_sobs_free,CTA_SOBS_FREE)
+
+#define IDEBUG (0)
+
+#define CLASSNAME "CTA_Sobs"
+/* Struct holding all data associated to an COSTA Vector */
+typedef struct {
+CTA_Func functions[CTA_SOBS_NUMFUNC];
+CTA_ObsDescrClass descrcl;
+} CTAI_SObsClass;                    // A SObsClass contains a list of the member-functions
+
+
+typedef struct {
+CTA_Func functions[CTA_SOBS_NUMFUNC]; // See cta_sobs.h for a list of
+                        //               available stochobs-functions
+CTA_SObsClass hsobscl;  // StochObs-class
+void *data;             // pointer to the implementation-specific data.
+CTA_ObsDescr hdescr;    // The observation-descriptor of this observer
+CTA_Handle *userdata;   // Copy of the userdata handles
+} CTAI_SObs;
+
+
+
+
+
+int CTAI_SObs_member_function(
+        // INPUTS
+        CTA_StochObs hsobs,    /* Handle of the stochastic observer of
+                                  which a member function is wanted */
+        int member,            /* Code of the member function */
+        // OUTPUT
+        CTAI_SObs **sobs,      /* All data of observer hsobs  */
+        CTA_Function **memfun  /* Member-Function pointer */
+)
+{
+
+   int retval;
+   /* Check that the given handle is indeed an observer */
+   retval=CTA_Handle_Check((CTA_Handle) hsobs, CTA_SOBS);
+   if (retval!=CTA_OK) return retval;
+
+   /* Get pointer to struct with observer data */
+   retval=CTA_Handle_GetData((CTA_Handle) hsobs,(void**) sobs);
+   if (retval!=CTA_OK) return retval;
+
+   /* Get pointer to implementation of this function */
+   retval=CTA_Func_GetFunc((*sobs)->functions[member],memfun);
+   return retval;
+}
+
+
+#undef METHOD
+#define METHOD "Create"
+int CTA_SObs_Create(
+    // INPUTS:
+        CTA_SObsClass hsobscl, // stochastic observer class
+        CTA_Handle userdata,  // e.g. the name of the database
+    // OUTPUTS:
+        CTA_StochObs *hsobs)   // The new COSTA-stochastic observer
+                                   // (handle)
+{
+   CTAI_SObs *sobs;
+   int memsize;
+   int retval;
+   CTAI_SObsClass *clsdata;
+   CTA_Function *my_Create_Size, *my_Create_Init;
+   int i;
+   char message[256];
+
+   /* Get class data containing all function pointers */
+   retval=CTA_Handle_Check((CTA_Handle) hsobscl,CTA_SOBSCLASS);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_sobsclass handle");
+       return retval;
+   }
+
+   retval=CTA_Handle_GetData((CTA_Handle) hsobscl,(void**) &clsdata);
+   if (retval!=CTA_OK) {
+      CTA_WRITE_ERROR("Cannot retrieve handle data");
+      return retval;
+   }
+
+   /* determine size of data object (CTA_SOBS_CREATE_SIZE)*/
+   if (IDEBUG>0){ printf("CTA_SOBS: get CREATE_SIZE \n"); }
+   retval=CTA_Func_GetFunc(clsdata->functions[CTA_SOBS_CREATE_SIZE],
+                           &my_Create_Size);
+   if (IDEBUG>0){ printf("CTA_SOBS: done calling CREATE_SIZE %p\n",my_Create_Size); }
+   if (retval!=CTA_OK) {
+      CTA_WRITE_ERROR("Cannot get function CTA_SOB_CREATE_SIZE");
+      return retval;
+   }
+   if (IDEBUG>0){ printf("Calling my_create_size\n"); }
+   my_Create_Size(&memsize,&retval);
+   if (IDEBUG>0){ printf("Done Calling my_create_size\n"); }
+   if (retval) {
+      CTA_WRITE_ERROR("Error in my_Create_Size");
+      return retval;
+   }
+
+   /* allocate memory for new stochobs object */
+   sobs=CTA_Malloc(sizeof(CTAI_SObs));
+   sobs->data=CTA_Malloc(memsize);
+   sobs->userdata = NULL;
+
+
+   /* copy function pointers */
+   for (i=0;i<CTA_SOBS_NUMFUNC;i++){
+      if (IDEBUG>0) { printf("Member function[%d]=%d\n",i,clsdata->functions[i]); }
+      sobs->functions[i]=clsdata->functions[i];
+   }
+
+   /* set other general information */
+   sobs->hsobscl = hsobscl;
+   sobs->hdescr  = CTA_NULL;
+
+   /* Initialise and fill new stochobs */
+   retval=CTA_Func_GetFunc(clsdata->functions[CTA_SOBS_CREATE_INIT],
+                           &my_Create_Init);
+   if (retval!=CTA_OK) {
+      CTA_WRITE_ERROR("Cannot get function CTA_SOBS_CREATE_INIT");
+      return retval;
+   }
+   my_Create_Init(sobs->data, userdata, &retval);
+   if (retval!=CTA_OK) {
+      sprintf(message,"my_Create_Init returned with (COSTA) error code %d",retval);
+      CTA_WRITE_ERROR(message);
+      return retval;
+   }
+
+   /* Allocate new handle and return eror when unsuccesfull */
+   retval=CTA_Handle_Create("stochobs",CTA_SOBS,sobs,hsobs);
+   if (retval) {
+      CTA_WRITE_ERROR("Cannot create handle");
+      return retval;
+   }
+
+   return CTA_OK;
+}
+
+#undef METHOD
+#define METHOD "CreateTimSel"
+int CTA_SObs_CreateTimSel(
+   CTA_StochObs hsobsin,   /* Handle of the stochastic observer of
+                                    which  a selection is to be made*/
+   CTA_Time timespan,      /* Timespan over which selection has te be made*/
+   CTA_StochObs *hsobsout) /* The new COSTA-stochastic observer
+                                    (handle) */
+{
+   int ierr;
+   char str[80];
+   double t1,t2;
+   double eps;
+
+   CTA_String sselect;
+
+   if (IDEBUG>0) { printf("DEBUG: CTA_SObs_CreateTimSel: START \n");}
+
+   *hsobsout=CTA_NULL;
+   if (hsobsin!=CTA_NULL){
+      // Get interval of timespan (t1,t2)
+      ierr=CTA_Time_GetSpan(timespan,&t1,&t2);
+     if (ierr!=CTA_OK) {
+        char message[1024];
+        sprintf(message,"Cannot retrieve interval of timespan (%g,%g)",t1,t2);
+       CTA_WRITE_ERROR(message);
+        return ierr;
+      }
+      // Create the Selection string (SQL-statement)
+      ierr=CTA_String_Create(&sselect);
+     if (ierr!=CTA_OK) {
+        CTA_WRITE_ERROR("Cannot create string");
+       return ierr;
+      }
+
+      eps=(t2-t1)*1.0e-4+1.0e-16;
+      t1=t1+eps;
+      t2=t2+eps;
+      sprintf(str, "time BETWEEN %f AND %f",t1, t2 );
+      ierr=CTA_String_Set(sselect,str);
+     if (ierr!=CTA_OK) {
+        CTA_WRITE_ERROR("Cannot set string");
+       return ierr;
+      }
+
+     // Create the selcection
+      ierr=CTA_SObs_CreateSel(hsobsin, sselect, hsobsout);
+      if (ierr!=CTA_OK) {
+        CTA_WRITE_ERROR("Error in CreateSel");
+       return ierr;
+      }
+
+      // Free work variables
+      ierr=CTA_String_Free(&sselect);
+     if (ierr!=CTA_OK)  {
+        CTA_WRITE_ERROR("Cannot free string");
+       return ierr;
+      }
+   }
+   return CTA_OK;
+};
+
+#undef METHOD
+#define METHOD "CreateSel"
+int CTA_SObs_CreateSel(
+   // INPUTS:
+   CTA_StochObs hsobsin,   /* Handle of the stochastic observer of
+                                    which  a selection is to be made*/
+   CTA_Handle userdata,   /* Inputs necessary for making a selection */
+   // OUTPUTS:
+   CTA_StochObs *hsobsout) /* The new COSTA-stochastic observer
+                                    (handle) */
+{
+   /* Local variables */
+   int retval;                 /* Return value of COSTA call    */
+   CTAI_SObs* sobsin;          /* All data of observer hsobsin  */
+   CTAI_SObs* sobsout;         /* All data of observer hsobsout */
+   CTA_Function *memfun;       /* Function that must be called  */
+   int memsize,i;
+
+   if (hsobsin==CTA_NULL){
+      *hsobsout=CTA_NULL;
+      return CTA_OK;
+   }
+   /* Look up the Create-size function and the data of the input observer*/
+   retval = CTAI_SObs_member_function(hsobsin, CTA_SOBS_CREATE_SIZE,
+                                      &sobsin, &memfun);
+   if (retval) {
+       CTA_WRITE_ERROR("Error while looking up the Create-size function and the data of the input observer");
+       return retval;
+   }
+
+   /* Determine the necessary memory size and allocate memory */
+   memfun(&memsize,&retval);
+   if (retval) {
+       CTA_WRITE_ERROR("Error while determining the necessary memory size and allocate memory");
+       return retval;
+   }
+
+   sobsout=CTA_Malloc(sizeof(CTAI_SObs));
+   sobsout->data=CTA_Malloc(memsize);
+   sobsout->userdata=NULL;
+
+
+   /* copy function pointers and observation class handle */
+   sobsout->hsobscl=sobsin->hsobscl;
+   for (i=0;i<CTA_SOBS_NUMFUNC;i++)
+      { sobsout->functions[i]=sobsin->functions[i]; }
+
+   /* Look up member function and observer data */
+   retval = CTA_Func_GetFunc(sobsout->functions[CTA_SOBS_CREATE_SELECTION],
+                             &memfun);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Error while loking up member functions and observer data");
+       return retval;
+   }
+
+   /* Call (user) implementation */
+   //   printf("memfun, sobsout->functions[i],  %d %d\n",&memfun,sobsout->functions[CTA_SOBS_CREATE_SELECTION]);
+   //   printf("cta_sobs_createsel: calling member function with userdata %d \n",userdata);
+   memfun(sobsin->data,&userdata,sobsout->data,&retval);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Error while calling (user) implementation");
+       return retval;
+   }
+
+   /* Allocate new handle and return error when unsuccesfull */
+   retval=CTA_Handle_Create("stochobs",CTA_SOBS,sobsout,hsobsout);
+   if (IDEBUG>0) printf("end of cta_sobs_createsel: retval %d  handle %d \n",retval,*hsobsout);
+
+   return retval;
+};
+
+
+
+#undef METHOD
+#define METHOD "Count"
+int CTA_SObs_Count(
+   // INPUTS:
+   CTA_StochObs hsobs,   /* Handle of the stochastic observer */
+   // OUTPUTS:
+   int *nmeasr           /* number of measurements in this observer */
+   )
+{
+   /* Local variables */
+   int retval;                 /* Return value of COSTA call   */
+   CTAI_SObs *sobs;            /* All data of observer hsobs   */
+   CTA_Function *memfun;       /* Function that must be called */
+
+   /* trivial return we accept CTA_NULL -> nmeasr=0 */
+   if (hsobs==CTA_NULL){
+      *nmeasr=0;
+      return CTA_OK;
+   }
+   /* Look up member function and sobserver data */
+   retval = CTAI_SObs_member_function(hsobs, I_CTA_SOBS_COUNT,
+                                      &sobs, &memfun);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Error while looking up member function and sobserver data");
+       return retval;
+   }
+
+   /* Call (user) implementation */
+   memfun(sobs->data,nmeasr,&retval);
+   return retval;
+};
+
+#undef METHOD
+#define METHOD "GetvVl"
+int CTA_SObs_GetVal(
+   // INPUTS:
+   CTA_StochObs hsobs,   /* Handle of the stochastic observer */
+   // OUTPUTS:
+   CTA_Vector hvec       /* measurements */
+   )
+{
+   /* Local variables */
+   int retval;                 /* Return value of COSTA call   */
+   CTAI_SObs *sobs;            /* All data of observer hsobs   */
+   CTA_Function *memfun;       /* Function that must be called */
+
+   /* Look up member function and sobserver data */
+   retval = CTAI_SObs_member_function(hsobs, CTA_SOBS_GET_VALUES,
+                                      &sobs, &memfun);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Error while looking up member function and sobserver data");
+       return retval;
+   }
+
+   /* Call (user) implementation */
+   memfun(sobs->data,&hvec,&retval);
+   return retval;
+};
+
+#undef METHOD
+#define METHOD "GetTimes"
+int CTA_SObs_GetTimes(
+   // INPUTS:
+   CTA_StochObs hsobs,   /* Handle of the stochastic observer */
+   // OUTPUTS:
+   CTA_Vector hvec       /* time instances associated to measurements */
+   )
+{
+   /* Local variables */
+   int retval;                 /* Return value of COSTA call   */
+   CTAI_SObs *sobs;            /* All data of observer hsobs   */
+   CTA_Function *memfun;       /* Function that must be called */
+
+   /* Look up member function and sobserver data */
+   retval = CTAI_SObs_member_function(hsobs, CTA_SOBS_GET_TIMES,
+                                      &sobs, &memfun);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Error while looking up member function and sobserver data");
+       return retval;
+   }
+
+   /* Call (user) implementation */
+   memfun(sobs->data,&hvec,&retval);
+   return retval;
+};
+
+
+#undef METHOD
+#define METHOD "GetRealisation"
+int CTA_SObs_GetRealisation(
+   // INPUTS:
+   CTA_StochObs hsobs,   /* Handle of the stochastic observer */
+   // OUTPUTS:
+   CTA_Vector hvec       /* measurements */
+   )
+{
+   /* Local variables */
+   int retval;                 /* Return value of COSTA call   */
+   CTAI_SObs *sobs;            /* All data of observer hsobs   */
+   CTA_Function *memfun;       /* Function that must be called */
+
+   /* Look up member function and sobserver data */
+   retval = CTAI_SObs_member_function(hsobs, CTA_SOBS_GET_REALISATION,
+                                      &sobs, &memfun);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Error while looking up member function and sobserver data");
+       return retval;
+   }
+
+   /* Call (user) implementation */
+   memfun(sobs->data,&hvec,&retval);
+   return retval;
+};
+
+
+
+
+#undef METHOD
+#define METHOD "GetExpectation"
+int CTA_SObs_GetExpectation(
+   // INPUTS:
+   CTA_StochObs hsobs,   /* Handle of the stochastic observer */
+   // OUTPUTS:
+   CTA_Vector hvec       /* measurements */
+   )
+{
+   /* Local variables */
+   int retval;                 /* Return value of COSTA call   */
+   CTAI_SObs *sobs;            /* All data of observer hsobs   */
+   CTA_Function *memfun;       /* Function that must be called */
+
+   /* Look up member function and sobserver data */
+   retval = CTAI_SObs_member_function( hsobs, CTA_SOBS_GET_EXPECTATION,
+                                       &sobs, &memfun);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Error while looking up member function and sobserver data");
+       return retval;
+   }
+
+   /* Call (user) implementation */
+   memfun(sobs->data,&hvec,&retval);
+   return retval;
+};
+
+
+
+#undef METHOD
+#define METHOD "EvalPDF"
+int CTA_SObs_EvalPDF(
+/* Ik vind dit nogal een vage functie. Ik weet niet wat een mens
+   hiermee moet gaan doen. In sommige gevallen kan ook geen eerlijk
+   antwoord worden gegeven (Dirac-achtige dingen en zo) */
+
+   // INPUTS:
+   CTA_StochObs hsobs,   /* Handle of the stochastic observer */
+   CTA_Vector hvecx,     /* location for evaluating PDF */
+   // OUTPUTS:
+   CTA_Vector hvecy      /* PDF-value */
+   )
+{
+   /* Local variables */
+   int retval;                 /* Return value of COSTA call   */
+   CTAI_SObs *sobs;            /* All data of observer hsobs   */
+   CTA_Function *memfun;       /* Function that must be called */
+
+   /* Look up member function and sobserver data */
+   retval = CTAI_SObs_member_function(hsobs, CTA_SOBS_EVALUATE_PDF,
+                                      &sobs, &memfun);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Error while looking up member function and sobserver data");
+       return retval;
+   }
+
+   /* Call (user) implementation */
+   memfun(sobs->data,&hvecx,&hvecy,&retval);
+   return retval;
+};
+
+
+
+
+#undef METHOD
+#define METHOD "GetCovMat"
+int CTA_SObs_GetCovMat(
+   // INPUTS:
+   CTA_StochObs hsobs,   /* Handle of the stochastic observer */
+   // OUTPUTS:
+   CTA_Matrix hmat       /* Covariance matrix */
+   )
+{
+   /* Local variables */
+   int retval;                 /* Return value of COSTA call   */
+   CTAI_SObs *sobs;            /* All data of observer hsobs   */
+   CTA_Function *memfun;       /* Function that must be called */
+
+   /* Look up member function and sobserver data */
+   retval = CTAI_SObs_member_function( hsobs, CTA_SOBS_GET_COV_MATRIX,
+                                       &sobs, &memfun);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Error while looking up member function and sobserver data");
+       return retval;
+   }
+
+   /* Call (user) implementation */
+   memfun(sobs->data,&hmat,&retval);
+   return retval;
+};
+
+
+
+
+#undef METHOD
+#define METHOD "GetVar"
+int CTA_SObs_GetVar(
+   // INPUTS:
+   CTA_StochObs hsobs,   /* Handle of the stochastic observer */
+   // OUTPUTS:
+   CTA_Vector hvec       /* Variances of measurements */
+   )
+{
+   /* Local variables */
+   int retval;                 /* Return value of COSTA call   */
+   CTAI_SObs *sobs;            /* All data of observer hsobs   */
+   CTA_Function *memfun;       /* Function that must be called */
+   int varflag;
+
+   /* Look up member function and sobserver data */
+   retval = CTAI_SObs_member_function( hsobs, CTA_SOBS_GET_VARIANCE,
+                                       &sobs, &memfun);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Error while looking up member function and sobserver data");
+       return retval;
+   }
+
+   /* Call (user) implementation */
+   varflag=CTA_TRUE;
+   memfun(sobs->data,&varflag,&hvec,&retval);
+   return retval;
+};
+
+#undef METHOD
+#define METHOD "GetStd"
+int CTA_SObs_GetStd(
+   // INPUTS:
+   CTA_StochObs hsobs,   /* Handle of the stochastic observer */
+   // OUTPUTS:
+   CTA_Vector hvec       /* Variances of measurements */
+   )
+{
+   /* Local variables */
+   int retval;                 /* Return value of COSTA call   */
+   CTAI_SObs *sobs;            /* All data of observer hsobs   */
+   CTA_Function *memfun;       /* Function that must be called */
+   int varflag;
+
+   /* Look up member function and sobserver data */
+   retval = CTAI_SObs_member_function( hsobs, CTA_SOBS_GET_VARIANCE,
+                                       &sobs, &memfun);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Error while looking up member function and sobserver data");
+       return retval;
+   }
+
+   /* Call (user) implementation */
+   varflag=CTA_FALSE;
+   memfun(sobs->data, &varflag, &hvec, &retval);
+   return retval;
+};
+
+
+
+
+#undef METHOD
+#define METHOD "GetDescription"
+int CTA_SObs_GetDescription(
+    CTA_StochObs hsobs,     /* Handle of the stochastic observer */
+    CTA_ObsDescr *hobsdescr  /* descriptor of the stochastic observer*/
+    )
+{
+   int retval;
+   int nmeasr;           /* number of mesurements in hsobs */
+   CTAI_SObs *sobs;      /* All data of observer hsobs  */
+   CTAI_SObsClass *clsdata;
+   CTA_Handle hsobs_data;
+
+   /* Check that the given handle is indeed an observer */
+   retval=CTA_Handle_Check((CTA_Handle) hsobs, CTA_SOBS);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_sobs handle");
+       return retval;
+   }
+   /* Get pointer to struct with observer data */
+   retval=CTA_Handle_GetData((CTA_Handle) hsobs,(void**) &sobs);
+   if (retval!=CTA_OK) {
+      CTA_WRITE_ERROR("Cannot retrieve handle data");
+      return retval;
+   }
+
+   /* If stochastic observer does not contain any observations
+      Return CTA_NULL                                          */
+      retval=CTA_SObs_Count(hsobs, &nmeasr);
+      if (nmeasr==0) {
+        if (IDEBUG > 0) printf("cta_stoch_observer:CTA_SObs_GetDescription: Warning: you asked observations description of empty stochastic observer %d !\n",hsobs);
+        *hobsdescr=CTA_NULL;
+        return CTA_OK;
+      }
+      retval=CTA_Handle_GetData((CTA_Handle) sobs->hsobscl,
+                                (void**) &clsdata);
+      if (retval!=CTA_OK) {
+        CTA_WRITE_ERROR("Cannot retrieve handle data");
+        return retval;
+      }
+
+      /* Create a handle for holding pointer to data-object "sobs" */
+      retval=CTA_Handle_Create("", CTA_DATABLOCK, sobs->data, &hsobs_data);
+      if (retval!=CTA_OK) {
+      CTA_WRITE_ERROR("Cannot create handle");
+      return retval;
+   }
+
+      retval = CTA_ObsDescr_Create(clsdata->descrcl, hsobs_data,  hobsdescr);
+      if (retval!=CTA_OK){
+         CTA_WRITE_ERROR("Cannot create observation description");
+      }
+
+      /* Free handle (note data is not deallocated!) */
+      retval=CTA_Handle_Free(&hsobs_data);
+      if (retval!=CTA_OK) {
+        CTA_WRITE_ERROR("Cannot free handle");
+        return retval;
+      }
+
+return CTA_OK;
+}
+
+#undef METHOD
+#define METHOD "Export"
+int CTA_SObs_Export(
+   CTA_StochObs hsobs,  /* Handle of the stochastic observer */
+   CTA_Handle userdata
+   )
+{
+   /* Local variables */
+   int retval;                 /* Return value of COSTA call   */
+   CTAI_SObs *sobs;            /* All data of observer hsobs   */
+   CTA_Function *memfun;       /* Function that must be called */
+
+   /* Look up member function and sobserver data */
+   retval = CTAI_SObs_member_function( hsobs, I_CTA_SOBS_EXPORT,
+                                       &sobs, &memfun);
+   if (retval!=CTA_OK) {
+      CTA_WRITE_ERROR("Error while looking up member function and sobserver data");
+      return retval;
+   }
+
+   /* Call (user) implementation */
+   memfun(sobs->data,&userdata,&retval);
+   return retval;
+};
+
+
+
+
+#undef METHOD
+#define METHOD "Free"
+int CTA_SObs_Free(
+   CTA_StochObs *hsobs  /* Handle of stochastic observer  */
+   )
+{
+   /* Local variables */
+   int retval;             /* Return value of COSTA call   */
+   CTAI_SObs *sobs;        /* All data of stochastic observer */
+   CTA_Function *my_free; /* Function that must be called */
+   CTA_Handle *userdata;   /* User data of function call   */
+
+
+   /* Check for quick return */
+   if (*hsobs==CTA_NULL) return CTA_OK;
+
+   /* Look up member function and sobserver data */
+   retval = CTAI_SObs_member_function(*hsobs, I_CTA_SOBS_FREE,
+                                      &sobs, &my_free);
+   if (retval!=CTA_OK) {
+      CTA_WRITE_ERROR("Error while looking up member function and sobserver data");
+      return retval;
+   }
+
+   /* Call (user) implementation */
+   my_free(sobs->data,&userdata,&retval);
+   free(sobs->data);
+   if (sobs->userdata) free(sobs->userdata);
+   free(sobs);
+   retval=CTA_Handle_Free(hsobs);
+
+   return retval;
+};
+
+
+CTA_Handle CTAI_SObs_GetUserData(CTA_StochObs hsobs, int index) {
+//   CTAI_SObs *sobs;        /* All data of stochastic observer */
+
+//   sobs = (CTAI_SObs *)CTAI_Handle_GetData(hsobs);
+   return CTA_NULL;
+}
+
+/* Interfacing with Fortran */
+
+CTAEXPORT void CTA_SOBS_CREATE_F77(int *hstochobscl, int *userdata,
+                         int *hstochobs, int *ierr){
+
+  *ierr=CTA_SObs_Create((CTA_SObsClass) *hstochobscl,
+     (CTA_Handle) *userdata, (CTA_StochObs *) hstochobs);
+}
+
+
+
+CTAEXPORT void CTA_SOBS_CREATESEL_F77(int *hsobsin, int *userdata,
+                            int *hsobsout, int *ierr){
+
+   *ierr=CTA_SObs_CreateSel((CTA_StochObs) *hsobsin,
+                            (CTA_Handle) *userdata,
+                            (CTA_StochObs*) hsobsout);
+}
+
+CTAEXPORT void CTA_SOBS_CREATETIMSEL_F77(int *hsobsin, int *timespan,
+                            int *hsobsout, int *ierr){
+
+   *ierr=CTA_SObs_CreateTimSel((CTA_StochObs) *hsobsin,
+                            (CTA_Time) *timespan,
+                            (CTA_StochObs*) hsobsout);
+}
+
+
+
+CTAEXPORT void CTA_SOBS_COUNT_F77(int *hsobs, int *nmeasr, int *ierr){
+
+   *ierr=CTA_SObs_Count((CTA_StochObs) *hsobs, nmeasr);
+}
+
+
+CTAEXPORT void CTA_SOBS_GETVAL_F77(int *hsobs, int *hvec, int *ierr){
+   *ierr=CTA_SObs_GetVal((CTA_StochObs) *hsobs,  (CTA_Vector) *hvec);
+}
+
+CTAEXPORT void CTA_SOBS_GETTIMES_F77(int *hsobs, int *hvec, int *ierr){
+   *ierr=CTA_SObs_GetTimes((CTA_StochObs) *hsobs,  (CTA_Vector) *hvec);
+}
+
+CTAEXPORT void CTA_SOBS_GETREALISATION_F77(int *hsobs, int *hvec, int *ierr){
+    *ierr=CTA_SObs_GetRealisation((CTA_StochObs) *hsobs,
+                                   (CTA_Vector) *hvec);
+}
+
+
+CTAEXPORT void CTA_SOBS_GETEXPECTATION_F77(int *hsobs, int *hvec, int *ierr){
+   *ierr=CTA_SObs_GetExpectation((CTA_StochObs) *hsobs,
+                                 (CTA_Vector) *hvec);
+}
+
+CTAEXPORT void CTA_SOBS_EVALPDF_F77(int *hsobs, int *hvecx,  int *hvecy, int *ierr){
+
+   *ierr=CTA_SObs_EvalPDF((CTA_StochObs) *hsobs, (CTA_Vector) *hvecx,
+                           (CTA_Vector) *hvecy);
+}
+
+CTAEXPORT void CTA_SOBS_GETCOVMAT_F77(int *hsobs, int *hmat, int *ierr){
+   *ierr=CTA_SObs_GetCovMat((CTA_StochObs) *hsobs, (CTA_Matrix) *hmat);
+}
+
+
+CTAEXPORT void CTA_SOBS_GETVAR_F77(int *hsobs, int *hvec, int *ierr){
+   *ierr=CTA_SObs_GetVar((CTA_StochObs) *hsobs, (CTA_Matrix) *hvec);
+}
+
+CTAEXPORT void CTA_SOBS_GETSTD_F77(int *hsobs, int *hvec, int *ierr){
+   *ierr=CTA_SObs_GetStd((CTA_StochObs) *hsobs, (CTA_Matrix) *hvec);
+}
+
+CTAEXPORT void CTA_SOBS_GETDESCRIPTION_F77(int *hsobs, int *hobsdescr, int *ierr){
+   *ierr=CTA_SObs_GetDescription((CTA_StochObs) *hsobs,
+                                 (CTA_ObsDescr*) hobsdescr);
+}
+
+
+CTAEXPORT void CTA_SOBS_EXPORT_F77(int  *hsobs, int *userdata, int *ierr){
+   *ierr=CTA_SObs_Export((CTA_StochObs) *hsobs, (CTA_Handle) *userdata);
+}
+
+CTAEXPORT void  CTA_SOBS_FREE_F77( int *hsobs, int *ierr ){
+   *ierr=CTA_SObs_Free( (CTA_StochObs*) hsobs);
+}
+
+
diff --git a/costa/native/cta/src/cta_sobs_combine.c b/costa/native/cta/src/cta_sobs_combine.c
new file mode 100644
index 000000000..fed3d9f42
--- /dev/null
+++ b/costa/native/cta/src/cta_sobs_combine.c
@@ -0,0 +1,704 @@
+/*
+$URL: https://repos.deltares.nl/repos/openda/trunk/costa/src/cta/cta_stoch_observer_combine.c $
+$Revision: 671 $, $Date: 2008-10-07 14:49:42 +0200 (Tue, 07 Oct 2008) $
+
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+#include <string.h>
+#include <memory.h>
+#include "cta_mem.h"
+#include "cta.h"
+#include "ctai.h"
+#include "cta_flush.h"
+#include "cta_util_statistics.h"
+//#include "cta_util_combine.h"
+#include "cta_file.h"
+#include "cta_string.h"
+//#include "cta_obsdescr_combine.h"
+//#include "cta_sobs_combine.h"
+#include "cta_sobs.h"
+#include "cta_errors.h"
+#include "cta_reltable.h"
+
+#include "cta_sobs_combine.h"
+
+#define IDEBUG (0)
+
+
+
+void CTAI_SObs_combine_Create_Size(
+   // OUTPUTS:
+      int *memsize,           // The number of bytes which are necessary to store one 
+                              //     CTAI_SObs_combine, with a pointer to the contents (data), 
+                              //     but without the contents themselves.
+      int *retval             // error code (see cta_datatypes.h for possible error codes)
+   ){
+   *memsize=(int) sizeof(CTAI_SObs_combine);
+   *retval=CTA_OK;
+};
+
+
+
+
+
+void CTAI_SObs_combine_Create_Init(
+/*
+   Allocate the memory which is necessary to store the data necessary for a
+   combined observer
+*/
+     // IN-OUTPUTS
+        CTAI_SObs_combine *x,   // The combine-observer for which the memory must 
+                                //     be allocated
+     // INPUTS:
+        CTA_Handle userdata,   // User data: vector of size 2 containing tree with SObs and timeoffset
+     // OUTPUTS
+        int *retval)            // Error code. Possible error: Illegal data type
+{
+  //CTA_Tree hsubsobs_tr;
+  CTA_Handle *hsubsobsstr;
+  int ierr;
+  int nofsubsobs,isob;
+  CTA_String h_ss_name;
+  char ss_name[100];
+  CTA_Vector hvec1; 
+  double timeoffset;
+  CTA_Handle  userdata1, userdata2;
+
+   ierr=CTA_Handle_Check((CTA_Handle) userdata,CTA_VECTOR);
+   if (ierr!=CTA_OK) {
+     printf("CTAI_SObs_combine_Create_Init: userdata is not a vector! \n");exit(-1);
+   }
+   ierr=CTA_Vector_GetVal(userdata,1,&userdata1,CTA_HANDLE);
+   ierr=CTA_Vector_GetVal(userdata,2,&userdata2,CTA_HANDLE);
+
+   ierr=CTA_Handle_Check((CTA_Handle) userdata1,CTA_TREE);
+   if (ierr!=CTA_OK) {
+     printf("CTAI_SObs_combine_Create_Init: userdata1 is not a tree! \n");
+   }
+    printf("CTAI_SObs_combine_Create_Init: -----------boom:  \n");
+    ierr = CTA_Tree_Print(userdata1);
+    printf("------ \n");
+
+
+     /* determine number of sub-SObs (nofsubsobs) */
+     ierr = CTA_Tree_CountItems(userdata1, &nofsubsobs );
+     printf("combine-SObs: count subsobs %d %d\n",ierr, nofsubsobs);
+     x->nofsubsobs = nofsubsobs;
+
+     /* Create array with input configuration of all sobs */
+     hsubsobsstr = CTA_Malloc((nofsubsobs+1)*sizeof(CTA_Tree));
+
+
+
+     /* Create and store handles to all subsobs */
+     for (isob=1; isob<=nofsubsobs; isob++) {
+       ierr = CTA_Tree_GetItem (userdata1, isob, &hsubsobsstr[isob]);
+       printf("get substochobs: number :%d  ,retval %d \n",isob,ierr);
+       ierr=CTA_Handle_Check((CTA_Handle) hsubsobsstr[isob],CTA_SOBS);
+       printf("get substochobs TEST if it is a real sobs : retval %d \n",ierr);
+
+       ierr = CTA_String_Create(&h_ss_name);
+       ierr=CTA_Handle_GetName(hsubsobsstr[isob],h_ss_name);
+       //   printf("getname: %d \n",ierr);
+       ierr=CTA_String_Get(h_ss_name, ss_name);
+       //printf("string get: %d \n",ierr);
+       //printf("naam is %s \n",ss_name);
+     }
+
+     /* put array in cta-vector of handles */    
+     ierr=CTA_Vector_Create(CTA_DEFAULT_VECTOR, nofsubsobs, CTA_HANDLE, CTA_NULL, &hvec1);
+
+     for (isob=1; isob<=nofsubsobs; isob++) {
+       ierr=CTA_Vector_SetVal(hvec1,isob,&hsubsobsstr[isob],CTA_HANDLE);
+     }
+     x->subsobs = hvec1;
+
+     ierr=CTA_Vector_GetVal(userdata2,1,&timeoffset,CTA_DOUBLE);
+   if (ierr!=CTA_OK) {
+     printf("CTAI_SObs_combine_Create_Init: userdata2 is not a vector of doubles! \n");
+   }
+     x->timeoffset = timeoffset;
+
+     *retval = ierr;
+};
+
+
+
+void CTAI_SObs_combine_CreateSel(
+/*
+   Allocate the memory which is neccesary to store the data necessary for a
+   combine-observer
+*/
+     // INPUTS
+        CTAI_SObs_combine *obsin,// The combine-observer of which a selection 
+                                 // is to be made
+        CTA_Handle *userdata_in,   // User data: condition
+     // OUTPUTS
+        CTAI_SObs_combine *obsout, // The combine-observer which is a selection 
+                                // of observer obsin
+        int *retval)            // Error code
+{
+  CTA_Handle userdata=*userdata_in;
+  int nofsubsobs,isob;
+  CTA_Handle subsob;
+  CTA_StochObs hobsout;
+  int ierr, totcount, subcount, selected_subcount, sel2count, i;
+  CTA_Vector hvec1;
+  CTA_RelTable reltab2;
+  int * shadow_vals;
+  int * selected_shadow_vals;
+  int * sel2_shadow_vals;
+
+  CTA_Datatype datatype;
+
+  CTA_Handle_GetDatatype(userdata, &datatype);
+  //  if (datatype==CTA_STRING){printf("CTAI_SOBS_Combine_Createsel(0): userdata STRING\n"); }
+  // if (datatype==CTA_RELTABLE){printf("CTAI_SOBS_Combine_Createsel(0): userdata RELTABLE\n"); }
+
+
+  /* Warning: if userdata is a relation table, then calling the subsobs with this userdata
+   will not work. Solution: make a, adjusted relation table
+  */
+
+  nofsubsobs = obsin->nofsubsobs;
+
+  /* Warning: only the memory of obsout has been created already,
+     the nofsubsobs and vector have to be defined now. */
+  obsout->nofsubsobs = nofsubsobs;
+  ierr=CTA_Vector_Create(CTA_DEFAULT_VECTOR, nofsubsobs, CTA_HANDLE, CTA_NULL, &hvec1);      
+  
+  //  printf("CTAI_SOBS_Combine_Createsel(1): nofsubsobs: %d ierr: %d \n",nofsubsobs,ierr);
+  //printf("CTAI_SOBS_Combine_Createsel(2): userdata: %d   \n",userdata);
+
+
+  totcount = 0;
+  for (isob=1; isob<=nofsubsobs; isob++) {
+    //   printf("-------------------sobs_combine_createsel: subsob %d of %d \n",isob, nofsubsobs);
+       ierr=CTA_Vector_GetVal(obsin->subsobs,isob,&subsob,CTA_HANDLE);
+       /* check if handle is a stochobs? */
+       ierr=CTA_Handle_Check((CTA_Handle) subsob,CTA_SOBS);
+       if (IDEBUG > 5) {
+         printf("sobs_combine_createsel: TEST if subsobs %d is a real (sub-)sobs : retval %d \n",isob,ierr);}
+       /* call the createsel function for each individual sub-sobserver. Adjust userdata in case of reltable! */
+
+       ierr = CTA_SObs_Count(subsob, &subcount);
+
+
+       if (datatype!=CTA_RELTABLE || subcount == 0) {
+         ierr = CTA_SObs_CreateSel(subsob,userdata, &hobsout);
+       }
+       else {  /* relation tables has to be redefined for each subsob !           */
+        // use the relation table
+         CTA_RelTable_Count(userdata,&selected_subcount);
+
+         shadow_vals = CTA_Malloc(subcount * sizeof(int));
+ 
+         selected_shadow_vals = CTA_Malloc(selected_subcount* sizeof(int));
+         sel2_shadow_vals = CTA_Malloc(selected_subcount* sizeof(int));
+         for (i=0; i < subcount; i++) { shadow_vals[i] = i+1 ;}
+
+         CTA_RelTable_ApplyVal(userdata, shadow_vals, subcount, 
+                                           selected_shadow_vals, selected_subcount , CTA_INTEGER);
+         sel2count = 0;
+         for (i=0; i < selected_subcount; i++) { 
+           if (selected_shadow_vals[i] - totcount > 0) {
+                 sel2_shadow_vals[sel2count] = selected_shadow_vals[i] - totcount ;
+                 sel2count = sel2count + 1;
+           }
+         }
+         CTA_RelTable_Create(&reltab2);
+         CTA_RelTable_SetSelectVal(reltab2, sel2_shadow_vals, sel2count, CTA_INTEGER);
+
+         //         printf("sobcombine_createsel: nieuwe reltab: van: %d naar %d; %d \n",selected_subcount, sel2count, sel2_shadow_vals[0]);
+
+         ierr = CTA_SObs_CreateSel(subsob,reltab2, &hobsout);
+         free(shadow_vals); free(selected_shadow_vals); free(sel2_shadow_vals);
+       }
+
+       /* put the selected subsobs in the vector subsout->subsobs */
+       ierr=CTA_Vector_SetVal(hvec1,isob,&hobsout,CTA_HANDLE);    
+       if (ierr != CTA_OK) {
+         printf("ctai_sobs_createsel: ERROR %d \n",ierr);
+
+
+       totcount = totcount + subcount;
+       }
+  }
+  /* set the cta_vector of the obsout */
+  obsout->subsobs = hvec1;
+
+  *retval = ierr;
+
+};
+
+
+void CTAI_SObs_combine_Count(
+// Return the number of measurements in the observer
+   // INPUTS
+      CTAI_SObs_combine *x,  // The StochObserver of which the number of measurements is
+                                  //     returned 
+   // OUTPUTS 
+      int *nmeasr,
+      int *retval
+   )
+{
+  int nofsubsobs, isob, ierr;
+  CTA_Handle subsob;
+  int nsubmeasr;
+  
+  *nmeasr = 0;
+  nofsubsobs = x->nofsubsobs;
+
+  for (isob=1; isob<=nofsubsobs; isob++) {
+       ierr=CTA_Vector_GetVal(x->subsobs,isob,&subsob,CTA_HANDLE);
+       ierr = CTA_SObs_Count(subsob, &nsubmeasr);
+       *nmeasr = *nmeasr + nsubmeasr;
+  } 
+  if (IDEBUG>10) printf("ctai_sobs_combine_count; total: %d\n",*nmeasr);
+
+  *retval = ierr;
+};
+
+
+
+
+void CTAI_SObs_combine_GetVals(
+// Get all the values from a combine-StochObserver
+   // INPUTS
+   CTAI_SObs_combine *x, // StochObserver from which the measurements are 
+                         //    returned
+   // OUTPUTS
+   CTA_Vector *hvec,      // COSTA-vector containing the values
+   int *retval
+   )
+{
+  int nsubmeasr, totmeasr,j;
+  int nofsubsobs, offset, isob, ierr;
+  CTA_Handle subsob;
+  CTA_Vector hsubvec;
+  double *val, *subval;  
+  CTA_String *strval, *substrval;
+
+  CTA_Datatype dt;  
+
+  nofsubsobs = x->nofsubsobs;
+  /* first ask the total number of values  and the datatype*/
+  ierr = CTA_Vector_GetSize(*hvec,&totmeasr);
+  ierr = CTA_Vector_GetDatatype(*hvec,&dt);
+  if (IDEBUG > 5) printf("ctai_sobs_combine_getvals: subsobs;ask total length %d %d \n",nofsubsobs,totmeasr);
+
+  /* allocate the main array containing all the values */
+  if(dt == CTA_STRING)  {strval = CTA_Malloc(sizeof(CTA_String)*totmeasr); 
+            printf("sobs_combine_getvals: PAS OP: string als propertydatatype!\n");}
+  else { val = CTA_Malloc(sizeof(double)*totmeasr);}
+
+  offset = 0;
+  for (isob=1; isob<=nofsubsobs; isob++) {
+       ierr=CTA_Vector_GetVal(x->subsobs,isob,&subsob,CTA_HANDLE);
+       /* first count the the measurements in the subsobs */
+       ierr = CTA_SObs_Count(subsob, &nsubmeasr);
+
+       if (nsubmeasr > 0) {
+
+         /* create the vector which receives the values */
+         ierr=CTA_Vector_Create(CTA_DEFAULT_VECTOR, nsubmeasr, dt, CTA_NULL, &hsubvec);
+         /* receive the values */
+         ierr = CTA_SObs_GetVal(subsob, hsubvec);
+
+         /* put the values in the sub-array. In case of String-output, create the strings */
+         if (dt == CTA_STRING) {
+           substrval = CTA_Malloc(sizeof(CTA_String)*nsubmeasr); 
+           for (j=0; j<nsubmeasr; j++)
+             { ierr = CTA_String_Create(&substrval[j]);}
+           ierr=CTA_Vector_GetVals(hsubvec,substrval, nsubmeasr, dt);
+         }
+         else {
+           subval = CTA_Malloc(sizeof(double)*nsubmeasr);
+           ierr=CTA_Vector_GetVals(hsubvec,subval, nsubmeasr, dt);
+         }
+         
+         /* fill the main vector  */
+         if (dt == CTA_STRING) { 
+           for (j=0; j < nsubmeasr; j++) {
+             //the duplicate function already creates the cta_string for us.
+             ierr = CTA_String_Duplicate(substrval[j], &strval[offset+j]); 
+           }
+         } else {
+           for (j=0; j < nsubmeasr; j++) {
+             val[offset+j] = subval[j];
+           }
+         }
+         
+         offset = offset + nsubmeasr;
+         ierr = CTA_Vector_Free(&hsubvec);
+
+         // In case of String-output, erase the strings
+         if (dt == CTA_STRING)
+           {
+             for (j=0; j<nsubmeasr; j++) {
+               ierr = CTA_String_Free(&substrval[j]);}
+             free(substrval);
+           } else {
+           free(subval);}
+         
+       }
+  }
+  /* fill hvec with the main array */
+  if (dt == CTA_STRING) {
+    ierr=CTA_Vector_SetVals(*hvec,strval, totmeasr, dt);
+    for (j=0; j<totmeasr; j++)
+      ierr = CTA_String_Free(&strval[j]);
+    free(strval);
+  } else {
+    ierr=CTA_Vector_SetVals(*hvec,val, totmeasr, dt);
+    free(val);
+  }
+
+  // for (j=0; j < totmeasr; j++) {printf("%d %f\n",j, val[j]);    }
+
+  *retval = ierr;
+
+};
+
+void CTAI_SObs_combine_GetTimes(
+// Get all the times from a combine-StochObserver
+   // INPUTS
+   CTAI_SObs_combine *x, // StochObserver from which the measurements are 
+                         //    returned
+   // OUTPUTS
+   CTA_Vector *hvec,      // COSTA-vector containing the values
+   int *retval
+   )
+{ int nsubmeasr, offset,j, totmeasr; 
+  int nofsubsobs, isob, ierr;
+  CTA_Handle subsob;
+  CTA_Vector hsubvec;  
+  double *val, *subval;  
+  CTA_Datatype dt;  
+
+  nofsubsobs = x->nofsubsobs;
+  /* first ask the total number of values  and the datatype*/
+  ierr = CTA_Vector_GetSize(*hvec,&totmeasr);
+  ierr = CTA_Vector_GetDatatype(*hvec,&dt);
+  if (IDEBUG > 0) printf("ctai_sobs_combine_gettimes: subsobs;ask total length %d %d \n",nofsubsobs,totmeasr);
+
+  /* allocate the main array containing all the values */
+  val = CTA_Malloc(sizeof(double)*totmeasr);
+
+  offset = 0;
+  for (isob=1; isob<=nofsubsobs; isob++) {
+       ierr=CTA_Vector_GetVal(x->subsobs,isob,&subsob,CTA_HANDLE);
+       /* first count the the measurements in the subsobs */
+       ierr = CTA_SObs_Count(subsob, &nsubmeasr);
+       /* create the vector which receives the values */
+       ierr=CTA_Vector_Create(CTA_DEFAULT_VECTOR, nsubmeasr, dt, CTA_NULL, &hsubvec);
+       /* receive the values */
+       ierr = CTA_SObs_GetTimes(subsob, hsubvec);
+       
+       /* put the values in the sub-array */
+       subval = CTA_Malloc(sizeof(double)*nsubmeasr);
+       ierr=CTA_Vector_GetVals(hsubvec,subval, nsubmeasr, dt);
+
+       for (j=0; j < nsubmeasr; j++) { val[offset+j] = subval[j] ; }
+       offset = offset + nsubmeasr;
+       free(subval);
+       ierr = CTA_Vector_Free(&hsubvec);
+  }
+  /* fill hvec with the main array */
+  ierr=CTA_Vector_SetVals(*hvec,val, totmeasr, dt);
+  free(val);
+  if (IDEBUG>0) {printf("sobs_combine_gettimes: total %d, first,last:%f %f \n",totmeasr,val[0],val[totmeasr-1]);}
+
+  // for (j=0; j < totmeasr; j++) {printf("%d %f\n",j, val[j]);    }
+
+  *retval = ierr;
+
+};
+
+void CTAI_SObs_combine_GetVariances(
+// Get all the variances of the measurements in a combine-StochObserver
+   // INPUTS
+   CTAI_SObs_combine *x, // StochObserver from which the measurements are 
+                         //    returned
+   int *returnvar,        //return variance (CTA_TRUE) or std (CTA_FALSE)
+   // OUTPUTS
+   CTA_Vector *hvec,      // COSTA-vector containing the values
+   int *retval
+   )
+{
+  int nsubmeasr, offset, totmeasr; 
+  int nofsubsobs, isob, j,ierr;
+  CTA_Handle subsob;
+  CTA_Vector hsubvec;  
+  double *val, *subval;  
+  CTA_Datatype dt;  
+
+
+  nofsubsobs = x->nofsubsobs;
+  /* first ask the total number of values  and the datatype*/
+  ierr = CTA_Vector_GetSize(*hvec,&totmeasr);
+  ierr = CTA_Vector_GetDatatype(*hvec,&dt);
+  /* allocate the main array containing all the values */
+  val = CTA_Malloc(sizeof(double)*totmeasr);
+  offset = 0;
+
+  for (isob=1; isob<=nofsubsobs; isob++) {
+       ierr=CTA_Vector_GetVal(x->subsobs,isob,&subsob,CTA_HANDLE);
+       /* first count the the measurements in the subsobs */
+       ierr = CTA_SObs_Count(subsob, &nsubmeasr);
+       /* create the vector which receives the values */
+       ierr=CTA_Vector_Create(CTA_DEFAULT_VECTOR, nsubmeasr, dt, CTA_NULL, &hsubvec);
+       /* receive the values */
+       if (*returnvar == CTA_TRUE) {
+         ierr = CTA_SObs_GetVar(subsob,hsubvec);
+       }
+       else {
+         ierr = CTA_SObs_GetStd(subsob,hsubvec);
+       }
+       /* put the values in the sub-array */
+       subval = CTA_Malloc(sizeof(double)*nsubmeasr);
+       ierr=CTA_Vector_GetVals(hsubvec,subval, nsubmeasr, dt);
+       /* fill the main vector  */
+       for (j=0; j < nsubmeasr; j++) { val[offset+j] = subval[j]; }
+       offset = offset + nsubmeasr;
+       free(subval);
+       ierr = CTA_Vector_Free(&hsubvec);
+  }
+  /* fill hvec with the main array */
+  ierr=CTA_Vector_SetVals(*hvec,val, totmeasr, dt);
+  free(val);
+
+  *retval = ierr;
+
+};
+
+
+void CTAI_SObs_combine_GetRealisation(
+// Calculate stochastic realizations for all the measurements in a combine-StochObserver
+   // INPUTS
+   CTAI_SObs_combine *x,  // StochObserver from which the measurements are 
+                         //    returned
+   // OUTPUTS
+   CTA_Vector *hvec,      // COSTA-vector containing the realizations
+   int *retval
+   )
+{
+  int nsubmeasr, offset, totmeasr; 
+  int nofsubsobs, isob, j,ierr;
+  CTA_Handle subsob;
+  CTA_Vector hsubvec;  
+  double *val, *subval;  
+  CTA_Datatype dt;  
+
+
+
+  nofsubsobs = x->nofsubsobs;
+  /* first ask the total number of values  and the datatype*/
+  ierr = CTA_Vector_GetSize(*hvec,&totmeasr);
+  ierr = CTA_Vector_GetDatatype(*hvec,&dt);
+
+  /* allocate the main array containing all the values */
+  val = CTA_Malloc(sizeof(double)*totmeasr);
+
+  offset = 0;
+  for (isob=1; isob<=nofsubsobs; isob++) {
+       ierr=CTA_Vector_GetVal(x->subsobs,isob,&subsob,CTA_HANDLE);
+       /* first count the the measurements in the subsobs */
+       ierr = CTA_SObs_Count(subsob, &nsubmeasr);
+       /* create the vector which receives the values */
+       ierr=CTA_Vector_Create(CTA_DEFAULT_VECTOR, nsubmeasr, dt, CTA_NULL, &hsubvec);
+       /* Call the getRealisation of the subsob */
+       ierr = CTA_SObs_GetRealisation(subsob,hsubvec);
+       
+       /* put the values in the sub-array */
+       subval = CTA_Malloc(sizeof(double)*nsubmeasr);
+       ierr=CTA_Vector_GetVals(hsubvec,subval, nsubmeasr, dt);
+       /* fill the main vector  */
+       for (j=0; j < nsubmeasr; j++) { val[offset+j] = subval[j]; }
+       offset = offset + nsubmeasr;
+       free(subval);
+       ierr = CTA_Vector_Free(&hsubvec);
+  }
+  /* fill hvec with the main array */
+  ierr=CTA_Vector_SetVals(*hvec,val, totmeasr, dt);
+  free(val);
+  // for (j=0; j < totmeasr; j++) {printf("%d %f\n",j, val[j]);    }
+
+  *retval = ierr;
+
+};
+
+void CTAI_SObs_combine_GetCovMat(
+// Get all the variances of the measurements in a combine-StochObserver
+   // INPUTS
+   CTAI_SObs_combine *x,  // StochObserver from which the measurements are 
+                         //    returned
+   // OUTPUTS
+   CTA_Matrix *hmat,      /* Covariance matrix */
+   int *retval
+   )
+{
+  int nofsubsobs, isob, offset, j,k,ierr;
+  int nsubmeasr, nmeasr_tot,n,m;
+  CTA_Handle subsob;
+  CTA_Matrix hsubmat;
+  double *val, *subval, dzero;
+  CTA_Datatype dt;  
+
+  nofsubsobs = x->nofsubsobs;
+  /* first ask the total number of values  and the datatype*/
+  ierr = CTA_Matrix_GetSize(*hmat,&m, &n);
+  if (n != m ) printf("ctai_sobs_combine_getcovmat: ERROR: matrix is not square \n");
+  nmeasr_tot = m;
+  ierr = CTA_Matrix_GetDatatype(*hmat,&dt);
+
+  /* allocate the main array containing all the values */
+  val = CTA_Malloc(sizeof(double)*nmeasr_tot*nmeasr_tot);
+
+  dzero = 0.0;
+  offset = 0;
+
+  /* initialize the combined matrix */
+  ierr = CTA_Matrix_SetConstant(*hmat, &dzero, dt);
+
+  for (isob=1; isob<=nofsubsobs; isob++) {
+       ierr=CTA_Vector_GetVal(x->subsobs,isob,&subsob,CTA_HANDLE);
+       /* first count the the measurements in the subsobs */
+       ierr = CTA_SObs_Count(subsob, &nsubmeasr);
+       //       printf("sobs_combine_getcovmat: sobs_count %d %d \n",nsubmeasr,ierr);CTA_Matrix_Export(*hmat,CTA_FILE_STDOUT);
+
+       if (nsubmeasr > 0) {
+         /* create the matrix which receives the values */
+         ierr=CTA_Matrix_Create(CTA_DEFAULT_MATRIX, nsubmeasr, nsubmeasr, dt, CTA_NULL, &hsubmat);
+         ierr = CTA_Matrix_SetConstant(hsubmat, &dzero, dt);
+
+         /* Call the getcovmat of the subsob */
+         ierr = CTA_SObs_GetCovMat(subsob,hsubmat);
+
+         /* Fill the combined matrix. It is a block matrix, so the first and third quadrant
+            should contain zeros. This is the case since we initialized the matrix in the beginning. 
+         */
+         subval = CTA_Malloc(sizeof(double)*nsubmeasr*nsubmeasr);
+         ierr = CTA_Matrix_GetVals(hsubmat, subval, nsubmeasr, nsubmeasr, dt);
+
+         /* fill the main vector  */
+
+         for (j=0; j < nsubmeasr; j++) {
+           for (k=0; k < nsubmeasr; k++) {
+             val[(offset+j)*nmeasr_tot + offset + k] = subval[j*nsubmeasr + k]; }
+         }
+         
+         offset = offset + nsubmeasr;
+         free(subval);
+         ierr = CTA_Matrix_Free(&hsubmat);
+       }
+  }
+  /* fill hmat with the main array */
+  ierr=CTA_Matrix_SetVals(*hmat,val, nmeasr_tot, nmeasr_tot, dt);
+  free(val);
+
+ 
+  *retval = ierr;
+};
+
+
+void CTAI_SObs_combine_export(
+   CTAI_SObs_combine *x,
+   CTA_Handle *userdata,
+   int *retval
+  )
+{
+  int nofsubsobs, isob, ierr;
+  CTA_Handle subsob;
+  nofsubsobs = x->nofsubsobs;
+  for (isob=1; isob<=nofsubsobs; isob++) {
+       ierr=CTA_Vector_GetVal(x->subsobs,isob,&subsob,CTA_HANDLE);
+       ierr = CTA_SObs_Export(subsob, *userdata);
+  } 
+  *retval = ierr;
+};
+
+
+void CTAI_SObs_combine_Free(
+   CTAI_SObs_combine *x,
+   int *retval
+  )
+{  
+  int nofsubsobs, isob, ierr;
+  CTA_Handle subsob;
+  nofsubsobs = x->nofsubsobs;
+  for (isob=1; isob<=nofsubsobs; isob++) {
+       ierr=CTA_Vector_GetVal(x->subsobs,isob,&subsob,CTA_HANDLE);
+       ierr = CTA_SObs_Free(&subsob);
+  } 
+  *retval = ierr;
+}
+
+
+/*
+
+*/
+void CTA_SObs_combine_initialise(CTA_SObsClass *hsobscl)
+{
+   CTA_Intf hintf=0;
+   CTA_Func h_func[CTA_SOBS_NUMFUNC];
+   CTA_ObsDescrClass descrcl;
+   int i;
+
+   // The vector h_func is filled with COSTA-function handles of the 
+   // implementations in this file.
+   for (i=0;i<CTA_SOBS_NUMFUNC;i++){
+      h_func[i]=CTA_NULL;
+   }
+   CTA_Func_Create(" ",&CTAI_SObs_combine_Create_Size,    hintf,
+                       &h_func[CTA_SOBS_CREATE_SIZE]);
+   CTA_Func_Create(" ",&CTAI_SObs_combine_Create_Init,    hintf,
+                       &h_func[CTA_SOBS_CREATE_INIT]);
+   CTA_Func_Create(" ",&CTAI_SObs_combine_CreateSel  ,    hintf,
+                       &h_func[CTA_SOBS_CREATE_SELECTION]);
+   CTA_Func_Create(" ",&CTAI_SObs_combine_Count,          hintf,
+                       &h_func[I_CTA_SOBS_COUNT]      );
+   CTA_Func_Create(" ",&CTAI_SObs_combine_GetVals,        hintf,
+                       &h_func[CTA_SOBS_GET_VALUES] );
+   CTA_Func_Create(" ",&CTAI_SObs_combine_GetTimes,       hintf,
+                       &h_func[CTA_SOBS_GET_TIMES] );
+   CTA_Func_Create(" ",&CTAI_SObs_combine_GetVals,        hintf,
+                       &h_func[CTA_SOBS_GET_EXPECTATION]);
+   CTA_Func_Create(" ",&CTAI_SObs_combine_GetRealisation, hintf,
+                       &h_func[CTA_SOBS_GET_REALISATION]);
+   CTA_Func_Create(" ",&CTAI_SObs_combine_GetVariances,   hintf,
+                       &h_func[CTA_SOBS_GET_VARIANCE]);
+   CTA_Func_Create(" ",&CTAI_SObs_combine_GetCovMat,      hintf,
+                       &h_func[CTA_SOBS_GET_COV_MATRIX]);
+   CTA_Func_Create(" ",&CTAI_SObs_combine_export,     hintf,
+                       &h_func[I_CTA_SOBS_EXPORT]     );
+   CTA_Func_Create(" ",&CTAI_SObs_combine_Free,     hintf,
+                            &h_func[I_CTA_SOBS_FREE]     );
+   
+   CTA_ObsDescr_combine_initialise(&descrcl);
+
+   CTA_SObs_DefineClass("cta_sobs_combine",h_func,descrcl,hsobscl);
+
+
+}
+
+
+
+
+
diff --git a/costa/native/cta/src/cta_sobs_factory.c b/costa/native/cta/src/cta_sobs_factory.c
new file mode 100644
index 000000000..460b0fe02
--- /dev/null
+++ b/costa/native/cta/src/cta_sobs_factory.c
@@ -0,0 +1,310 @@
+/*
+$URL: https://repos.deltares.nl/repos/openda/trunk/costa/src/cta/cta_model_factory.c $
+$Revision: 676 $, $Date: 2008-10-09 17:00:24 +0200 (Thu, 09 Oct 2008) $
+
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+//#include <libxml/encoding.h>
+//#include <libxml/parser.h>
+//#include <libxml/tree.h>
+//#include <libxml/xinclude.h>
+//#include <libxml/xmlwriter.h>
+
+#include "cta.h"
+#include "cta_mem.h"
+
+#include "cta_datatypes.h"
+#include "ctai_xml.h"
+#include "cta_handles.h"
+#include "ctai_handles.h"
+#include "f_cta_utils.h"
+#include "cta_model.h"
+#include "cta_errors.h"
+#include "cta_f77lapack.h"
+#include "cta_reltable.h"
+#include "cta_par.h"
+#include "cta_message.h"
+
+#define MAX(a,b) (a>b ? a: b)
+#define IDEBUG (0)
+
+#define CTA_SOBS_DEFINECLASS_F77      F77_CALL(cta_sobs_defineclass,CTA_SOBS_DEFINECLASS)
+
+#define CLASSNAME "CTA_Sobs_Factory"
+
+typedef struct {
+CTA_Func functions[CTA_SOBS_NUMFUNC];
+  CTA_ObsDescrClass descrcl;
+} CTAI_SObsClass;
+
+
+#undef METHOD
+#define METHOD "CTAI_SObs_DuplicateClass"
+int CTAI_SObs_DuplicateClass(CTA_SObsClass hsobscl,
+                              CTA_SObsClass *hsobscl_dup){
+
+   CTAI_SObsClass *clsdata;
+   CTAI_SObsClass *clsdata_dup;
+   CTA_Func hfunc;
+   CTA_String sname;
+   int retval;
+   int i;
+
+   /* Get class data containing all function pointers */
+   retval=CTA_Handle_Check((CTA_Handle) hsobscl,CTA_SOBSCLASS);
+   if (retval!=CTA_OK) return retval;
+   retval=CTA_Handle_GetData((CTA_Handle) hsobscl,(void*) &clsdata);
+   if (retval!=CTA_OK) return retval;
+
+
+   /* Allocate new SObsClass object */
+   clsdata_dup=CTA_Malloc(sizeof(CTAI_SObsClass));
+
+
+   /* Duplicate all function pointers */
+   for (i=0;i<CTA_SOBS_NUMFUNC;i++){
+      CTA_Func_Duplicate(clsdata->functions[i], &hfunc);
+      clsdata_dup->functions[i]=hfunc;
+   }
+
+
+   // Allocate new handle and return eror when unsuccesfull
+   CTA_String_Create(&sname);
+   CTA_Handle_GetName(hsobscl, sname);
+   if (IDEBUG>0) printf("CTAI_SObs_DuplicateClass: NAME OF SOBS CLASS IS %s\n", CTAI_String_GetPtr(sname));
+
+
+   retval=CTA_Handle_Create(CTAI_String_GetPtr(sname), CTA_SOBSCLASS,
+                            clsdata_dup, hsobscl_dup);
+   if (retval!=CTA_OK) {
+      char msg[256];
+      sprintf(msg,"Cannot create a handle of type CTA_SOBSCLASS with name '%s'",CTAI_String_GetPtr(sname));
+      CTA_WRITE_ERROR(msg);
+   }
+   CTA_String_Free(&sname);
+   return retval;
+}
+
+
+#undef METHOD
+#define METHOD "DefineClass"
+int CTA_SObs_DefineClass(
+   // INPUTS:
+      const char *name,                // Name of the new stochobs class
+      const CTA_Func h_func[CTA_SOBS_NUMFUNC],  // function handles to
+                                       // the implementations of the
+                                       // stochobs-class' functions.
+      CTA_ObsDescrClass descrcl,       //
+   // OUTPUTS:
+      CTA_SObsClass  *hsobscl          // The (handle to) the new stochobs-class
+   ){
+
+   CTAI_SObsClass *data;
+   int retval;
+   int i;
+
+
+   /* Allocate new StochObs object */
+   data=CTA_Malloc(sizeof(CTAI_SObsClass));
+
+   data->descrcl = descrcl;
+   for (i=0;i<CTA_SOBS_NUMFUNC;i++){
+      data->functions[i]=h_func[i];
+   }
+
+   // Allocate new handle and return eror when unsuccesfull
+   retval=CTA_Handle_Create(name,CTA_SOBSCLASS,data,hsobscl);
+   retval=CTA_Handle_GetData((CTA_Handle) *hsobscl,(void**) &data);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+   // return error when unsuccesfull
+   return retval;
+}
+
+#undef METHOD
+#define METHOD "DefineClass2"
+int CTA_SObs_DefineClass2(
+   // INPUTS:
+      const char *name,                // Name of the new stochobs class
+      const CTA_Func h_func[CTA_SOBS_NUMFUNC],  // function handles to
+                                       // the implementations of the
+                                       // stochobs-class' functions.
+      CTA_ObsDescrClass descrcl,       //
+   // OUTPUTS:
+      CTA_SObsClass  *hsobscl          // The (handle to) the new stochobs-class
+   ){
+
+   CTAI_SObsClass *data;
+   int retval;
+   int i;
+
+
+   /* Allocate new StochObs object */
+   data=CTA_Malloc(sizeof(CTAI_SObsClass));
+
+   data->descrcl = descrcl;
+   for (i=0;i<CTA_SOBS_NUMFUNC;i++){
+      data->functions[i]=h_func[i];
+   }
+
+   // Allocate new handle and return eror when unsuccesfull
+   retval=CTA_Handle_Create(name,CTA_SOBSCLASS,data,hsobscl);
+   retval=CTA_Handle_GetData((CTA_Handle) *hsobscl,(void**) &data);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+   // return error when unsuccesfull
+   return retval;
+}
+
+
+/* Interfacing with Fortran */
+
+CTAEXPORT void  CTA_SOBS_DEFINECLASS_F77(char *name, int *h_func, int *descrcl,
+                               int   *hstochobscl, int *ierr,
+                               int len_name){
+
+   char  *c_name;
+
+   /* create a c-string equivalent to name */
+   c_name=CTA_Malloc((len_name+1)*sizeof(char));
+   CTA_fstr2cstr(name,c_name,len_name);
+
+  *ierr=CTA_SObs_DefineClass( name, (CTA_Func*) h_func,
+   (CTA_ObsDescrClass) *descrcl, (CTA_SObsClass*) hstochobscl);
+
+   free(c_name);
+}
+
+/** \brief Create a COSTA sobsl class from XML
+*          (load from methods from dynamic load library).
+*
+*  \param cur_node  I  Current XML node
+*  \return             Handle to create or CTA_NULL in case of an error.
+*/
+CTA_SObsClass CTAI_XML_CreateSObsClass(xmlNode *cur_node) {
+
+   CTA_Func      hfunc;                   /* the new function */
+   xmlChar       *id = NULL;              /* id of function in XML-tree */
+   xmlChar       *clsname = NULL;         /* (lookup) name of the sobs class */
+   int           retval;                  /* return status of creation       */
+   CTA_Func      h_func[CTA_SOBS_NUMFUNC];/* List of functions */
+   xmlNode       *func_node = NULL;       /* values of children nodes */
+   int i;
+   CTA_SObsClass hsobscl;                 /* function class */
+   CTA_SObsClass hsobscl_old;
+   const char *name;
+   CTA_String   sclsname;
+   CTA_ObsDescrClass descrcl;
+
+
+   if (IDEBUG>0) printf("CTAI_XML_CreateSObsClass: Start of function\n");
+   /* Parse this node's attributes */
+   /* Get id */
+   id = xmlGetProp(cur_node, CTAI_XML_ID);
+
+   /* Get class name */
+   clsname = xmlGetProp(cur_node, CTAI_XML_NAME);
+
+   /* Check whether this is a known sobs-class */
+   CTA_String_Create(&sclsname);
+   CTA_String_Set(sclsname,(char *) clsname);
+   retval=CTA_Handle_Find(sclsname, CTA_SOBSCLASS, &hsobscl_old);
+   CTA_String_Free(&sclsname);
+
+   if (retval==CTA_OK) {
+      /* duplicate sobs class */
+
+      CTAI_SObs_DuplicateClass(hsobscl_old, &hsobscl);
+
+      /* sobs-class is known */
+      if (IDEBUG>0) printf("CTAI_XML_CreateSObsClass: found sobsclass '%s'\n",clsname);
+      if (IDEBUG>0) printf("CTAI_XML_CreateSObsClass: duplication is created \n");
+
+   } else {
+      /* Set all function handles to CTA_NULL */
+      for (i=0;i<CTA_SOBS_NUMFUNC;i++){
+         h_func[i]=CTA_NULL;
+      }
+
+      /* Load all functions that are specified in input */
+      for (func_node = cur_node->children; func_node;
+              func_node = func_node->next) {
+         if (0 == strcmp("CTA_FUNCTION", (char *) func_node->name)){
+            hfunc=CTAI_XML_CreateFunc(func_node);
+            if (hfunc!=CTA_NULL){
+               name = CTAI_Handle_GetName(hfunc);
+               if (0 == strcmp("CTA_SOBS_CREATE_SIZE", name)){
+                  h_func[CTA_SOBS_CREATE_SIZE    ]=hfunc;
+               } else if (0 == strcmp("CTA_SOBS_CREATE_INIT", name)){
+                  h_func[CTA_SOBS_CREATE_INIT    ]=hfunc;
+               } else if (0 == strcmp("CTA_SOBS_FREE", name)){
+                  h_func[I_CTA_SOBS_FREE           ]=hfunc;
+               } else if (0 == strcmp("CTA_SOBS_CREATE_SELECTION", name)){
+                  h_func[CTA_SOBS_CREATE_SELECTION        ]=hfunc;
+               } else if (0 == strcmp("CTA_SOBS_COUNT", name)){
+                  h_func[I_CTA_SOBS_COUNT      ]=hfunc;
+               } else if (0 == strcmp("CTA_SOBS_OBS_DESCRIPTION", name)){
+                  h_func[CTA_SOBS_GET_OBS_DESCRIPTION      ]=hfunc;
+               } else if (0 == strcmp("CTA_SOBS_GET_VALUES", name)){
+                  h_func[CTA_SOBS_GET_VALUES     ]=hfunc;
+               } else if (0 == strcmp("CTA_SOBS_GET_TIMES", name)){
+                  h_func[CTA_SOBS_GET_TIMES     ]=hfunc;
+               } else if (0 == strcmp("CTA_SOBS_GET_REALISATION", name)){
+                  h_func[CTA_SOBS_GET_REALISATION       ]=hfunc;
+               } else if (0 == strcmp("CTA_SOBS_GET_EXPECTATION", name)){
+                  h_func[CTA_SOBS_GET_EXPECTATION       ]=hfunc;
+               } else if (0 == strcmp("CTA_SOBS_EVALUATE_PDF", name)){
+                  h_func[CTA_SOBS_EVALUATE_PDF       ]=hfunc;
+               } else if (0 == strcmp("CTA_SOBS_GET_COV_MATRIX", name)){
+                  h_func[CTA_SOBS_GET_COV_MATRIX       ]=hfunc;
+               } else if (0 == strcmp("CTA_SOBS_GET_VARIANCE", name)){
+                  h_func[CTA_SOBS_GET_VARIANCE       ]=hfunc;
+               } else if (0 == strcmp("CTA_SOBS_EXPORT", name)){
+                  h_func[I_CTA_SOBS_EXPORT       ]=hfunc;
+               } else {
+                  printf("CTAI_XML_CreateSObsClass :Warning found unknown node %s\n", func_node->name);
+               }
+            }
+         }
+      }
+
+      /* Create a default observation description. Should be changed??? */
+      CTA_ObsDescr_sqlite3_initialise(&descrcl);
+
+      /* Create new function class */
+      retval=CTA_SObs_DefineClass2((char *)clsname, h_func, descrcl, &hsobscl);
+
+      /* Set id (=name) of handle */
+      CTAI_Handle_SetName(hsobscl, (char *) id);
+
+   }
+   CTAI_Handle_SetName(hsobscl, (char *) id);
+   xmlFree(id);
+   xmlFree(clsname);
+
+   if (IDEBUG>0) printf("CTAI_XML_CreateSObsClass: End of function\n");
+   return hsobscl;
+}
+
+
+
diff --git a/costa/native/cta/src/cta_sobs_maori.c b/costa/native/cta/src/cta_sobs_maori.c
new file mode 100644
index 000000000..9c647c698
--- /dev/null
+++ b/costa/native/cta/src/cta_sobs_maori.c
@@ -0,0 +1,74 @@
+/*
+$URL: https://repos.deltares.nl/repos/openda/trunk/costa/src/cta/cta_obsdescr_netcdf.c $
+$Revision: 671 $, $Date: 2008-10-07 14:49:42 +0200 (Tue, 07 Oct 2008) $
+
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+#include <string.h>
+#include <stdio.h>
+#include "cta.h"
+
+void CTA_SObs_maori_initialize(CTA_SObsClass *hsobscl)
+{
+   CTA_Func h_func[CTA_SOBS_NUMFUNC];
+   char *libraryName;
+   char *functionName;
+   CTA_ObsDescrClass hobsdescrcl;
+   int i;
+
+   // The vector h_func is filled with function read from the default user dynamic library
+   libraryName="libleoda.so";
+   for (i=0;i<CTA_SOBS_NUMFUNC;i++){
+      h_func[i]=CTA_NULL;
+   }
+   functionName="ctai_sobs_maori_create_size";
+   h_func[CTA_SOBS_CREATE_SIZE]      = CTA_CreateFuncDynamicLib(libraryName, functionName, functionName, functionName);
+   functionName="ctai_sobs_maori_create_init";
+   h_func[CTA_SOBS_CREATE_INIT]      = CTA_CreateFuncDynamicLib(libraryName, functionName, functionName, functionName);
+   functionName="ctai_sobs_maori_create_selection";
+   h_func[CTA_SOBS_CREATE_SELECTION] = CTA_CreateFuncDynamicLib(libraryName, functionName, functionName, functionName);
+   functionName="ctai_sobs_maori_count";
+   h_func[I_CTA_SOBS_COUNT]          = CTA_CreateFuncDynamicLib(libraryName, functionName, functionName, functionName);
+   functionName="ctai_sobs_maori_get_values";
+   h_func[CTA_SOBS_GET_VALUES]       = CTA_CreateFuncDynamicLib(libraryName, functionName, functionName, functionName);
+   functionName="ctai_sobs_maori_get_times";
+   h_func[CTA_SOBS_GET_TIMES]        = CTA_CreateFuncDynamicLib(libraryName, functionName, functionName, functionName);
+   functionName="ctai_sobs_maori_get_expectation";
+   h_func[CTA_SOBS_GET_EXPECTATION]  = CTA_CreateFuncDynamicLib(libraryName, functionName, functionName, functionName);
+   functionName="ctai_sobs_maori_get_realisation";
+   h_func[CTA_SOBS_GET_REALISATION]  = CTA_CreateFuncDynamicLib(libraryName, functionName, functionName, functionName);
+   functionName="ctai_sobs_maori_get_variance";
+   h_func[CTA_SOBS_GET_VARIANCE]     = CTA_CreateFuncDynamicLib(libraryName, functionName, functionName, functionName);
+   functionName="ctai_sobs_maori_get_cov_matrix";
+   h_func[CTA_SOBS_GET_COV_MATRIX]   = CTA_CreateFuncDynamicLib(libraryName, functionName, functionName, functionName);
+   functionName="ctai_sobs_maori_export";
+   h_func[I_CTA_SOBS_EXPORT]         = CTA_CreateFuncDynamicLib(libraryName, functionName, functionName, functionName);
+   functionName="ctai_sobs_maori_free";
+   h_func[I_CTA_SOBS_FREE]           = CTA_CreateFuncDynamicLib(libraryName, functionName, functionName, functionName);
+
+
+   CTA_ObsDescr_maori_initialize(&hobsdescrcl);
+
+   CTA_SObs_DefineClass("cta_sobs_maori",h_func,hobsdescrcl,hsobscl);
+}
+
+
+
+
+
diff --git a/costa/native/cta/src/cta_sobs_netcdf.c b/costa/native/cta/src/cta_sobs_netcdf.c
new file mode 100644
index 000000000..98d263e93
--- /dev/null
+++ b/costa/native/cta/src/cta_sobs_netcdf.c
@@ -0,0 +1,1021 @@
+/*
+$URL: https://repos.deltares.nl/repos/openda/trunk/costa/src/cta/cta_stoch_observer_netcdf.c $
+$Revision: 671 $, $Date: 2008-10-07 14:49:42 +0200 (Tue, 07 Oct 2008) $
+
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+
+#include "cta.h"
+#include "cta_mem.h"
+#include "ctai.h"
+#include <string.h>
+#include <memory.h>
+#include "cta_flush.h"
+
+#if HAVE_LIBNETCDF
+#include <netcdf.h>
+#endif
+
+#include "cta_sobs_netcdf.h"
+
+
+#define MAX(a,b) (a>b ? a: b)
+#define IDEBUG (0)
+
+/* This is a struct to store what we need */
+/* Specific for our case of Netcdf-OMI-data: The model will only ask two aspects: 
+   - certain time selections (typically one-hour intervals)
+   -It should be situated in a certain rectangular area (lat/lon)
+*/
+ 
+
+
+
+void CTAI_SObs_netcdf_Create_Size(
+   // OUTPUTS:
+      int *memsize,           // The number of bytes which are necessary to store one 
+                              //     CTAI_SObs_netcdf, with a pointer to the contents (data), 
+                              //     but without the contents themselves.
+      int *retval             // error code (see cta_datatypes.h for possible error codes)
+   ){
+   *memsize=(int) sizeof(CTAI_SObs_netcdf);
+   *retval=CTA_OK;
+};
+
+
+
+
+
+void CTAI_SObs_netcdf_Create_Init(
+
+
+
+/*
+   Allocate the memory which is neccesary to store the data necessary for a
+   netcdf-observer
+*/
+     // IN-OUTPUTS
+        CTAI_SObs_netcdf *x,   // The netcdf-observer for which the memory must 
+                                //     be allocated
+     // INPUTS:
+        CTA_Handle userdata,   // User data: database-name. Note: the string ends with the date
+                               // It is possible that userdata is vector of two handles: (name,timeoffset)
+     // OUTPUTS
+        int *retval)            // Error code. Possible error: Illegal data type
+{
+#if HAVE_LIBNETCDF
+  char *dbname;
+  int   ncid, varid, nmeasr, intNStations;
+  size_t nstations;
+  int ierr, i, len;
+  int dimids[1];
+  CTA_Time tspan0;
+  float lon1,lon2,lat1,lat2;
+  int *flag;
+  CTAI_OMI_database *database;
+  double timeoffset;
+  CTA_Handle  userdata1, userdata2;
+
+  database = CTA_Malloc(sizeof(CTAI_OMI_database));  
+
+  // Get the name of the database file:
+
+  // Check if userdata is a string (database name) or a vector (name and timeoffset)
+   ierr=CTA_Handle_Check((CTA_Handle) userdata,CTA_VECTOR);
+   if (ierr!=CTA_OK) {
+     //then it should be only the string with the database name
+     ierr=CTA_Handle_Check((CTA_Handle) userdata,CTA_STRING);
+     if (ierr!=CTA_OK) { printf("sobs_netcdf: create :: userdata is neither a database name or vector\n");exit(-1);}
+     userdata1 = userdata; timeoffset = 0.0;
+
+     if (IDEBUG>0) {printf("sobs_netcdf_create 1: timeoffset %f ",timeoffset);}
+
+   } else { //it is a vector: deconstruct it into the database string and the timeoffset
+   
+     CTA_Vector_GetVal(userdata,1,&userdata1,CTA_HANDLE);
+     CTA_Vector_GetVal(userdata,2,&userdata2,CTA_HANDLE);
+     ierr=CTA_Vector_GetVal(userdata2,1,&timeoffset,CTA_DOUBLE);
+     if (ierr!=CTA_OK) {
+       printf("CTAI_SObs_netcdf :: Create: userdata2 is not a vector of doubles! \n");
+   }
+     if (IDEBUG>0) {printf("sobs_netcdf_create 2: timeoffset %f ",timeoffset);}
+   }
+
+
+  // Allocate a name-string 
+  *retval = CTA_String_GetLength(userdata1, &len);
+  if (*retval!=CTA_OK) return;
+  // Get the name of the database
+  dbname = CTA_Malloc((len+1)*sizeof(char));
+  *retval = CTA_String_Get(userdata1, dbname);
+  if (*retval!=CTA_OK) return;
+
+  // Open the database
+  ierr = nc_open(dbname, NC_NOWRITE, &ncid);
+  if (ierr != CTA_OK)
+    {printf("Error: could not open netcdf-file \n");
+      *retval = -1;
+      return;}
+
+   // Count the stations in this observer. 
+  // Note: we denote each valid observation of the satellite as one station with a specific pace and time. 
+  // The number of valid observations is O(10^4), while the total grid is typical O(10^5), in the case of 
+  //   OMI-data and our part of Europe: 350x501 and a time series of length 8000.
+  ierr = nc_inq_varid(ncid, "longitude", &varid);
+  if (ierr != CTA_OK)
+    {printf("Error: could not find datetime \n");*retval = -1; return;}
+
+  ierr = nc_inq_vardimid(ncid, varid, dimids); // get ID of time dimension
+  if (ierr != CTA_OK)
+    {printf("Error: could not find dimension ID \n");*retval = -1; return;}
+
+  ierr = nc_inq_dimlen(ncid, dimids[0], &nstations); /* get length of time series */
+  if (ierr != CTA_OK)
+    {printf("Error: could not length of time series \n");*retval = -1; return;}
+
+ 
+  // initialise the selection relation table
+  CTA_RelTable_Create(&(x->selectionReltab));
+
+  flag=CTA_Malloc(nstations*sizeof(int));
+  for (i=0; i < (int) nstations; i++) {flag[i] = i+1;}
+
+  intNStations=(int) nstations; 
+  CTA_RelTable_SetSelectVal(x->selectionReltab, flag, intNStations,CTA_INTEGER);
+  if (IDEBUG>0) {printf("ctai_sobs_netcdf_init: number of stations/length of time series: %d \n",intNStations);}
+  
+
+  // initialise the time
+   CTA_Time_Create(&tspan0);
+   CTA_Time_SetSpan(tspan0, 0.0, 24.0);
+
+   // initialise the spatial window. Maybe it should be read from netcdf-file (global attribute)
+   lon1 = -90.0; // far west
+   lon2 = 90.0;  // far east
+   lat1 = 0.0;  //equator
+   lat2= 90.0;  //north pole   
+
+   // Measurements in this observer = Number of stations in our case! 
+   nmeasr = nstations;
+
+   // Set the observer-fields
+   x->tspan = tspan0;    
+   x->bb_lon[0] = lon1;   x->bb_lon[1] = lon2;
+   x->bb_lat[0] = lat1;   x->bb_lat[1] = lat2;
+
+
+
+   database->dbname = dbname;
+   database->ncid = ncid;
+   database->nusers = 1;
+
+   x->database = database;
+   x->nstations = nstations;
+   x->nmeasr    = nmeasr;
+   x->nmeasr_orig = nmeasr;
+
+
+   x->timeoffset = timeoffset;
+
+if (IDEBUG>0) {printf("END of cta_sobs_netcdf_create_init \n");}
+   *retval=CTA_OK;
+
+#else
+   printf("Error: CTAI_SObs_netcdf_Create_Init: COSTA is compiled without NETCDF support\n");
+  *retval=CTA_NOT_IMPLEMENTED;
+#endif
+
+};
+
+
+
+void CTAI_SObs_netcdf_CreateSelString(
+/*
+   Allocate the memory which is necessary to store the data necessary for a
+   netcdf-observer
+*/
+     // INPUTS
+        CTAI_SObs_netcdf *obsin,// The netcdf-observer of which a selection 
+                                 // is to be made
+        CTA_Handle userdata,   // User data: condition
+     // OUTPUTS
+        CTAI_SObs_netcdf *obsout, // The netcdf-observer which is a selection 
+                                // of observer obsin
+        int *retval)            // Error code
+{
+  #if HAVE_LIBNETCDF
+  int len, ierr,i ;
+      char *condition;
+      double t1_in,t2_in, t1,t2, t1_out, t2_out; 
+      float t1_f, t2_f;
+      int nstations, count_out, varid;
+      CTA_Vector hvec_time;
+      double *vec_time;
+      int *selvec, *flag;
+      CTA_RelTable reltab1;
+      float bbx_1, bbx_2, bby_1, bby_2;
+      float *nc_values1, *nc_values2, *nc_sel_values1, *nc_sel_values2;
+
+      if (IDEBUG>0) {printf("DEBUG:  CTAI_SObs_netcdf_CreateSelString START\n");}
+      if (IDEBUG>0) {printf("DEBUG: *obsin: %p  \n", obsin);}
+      if (IDEBUG>0) {printf("DEBUG: *obsout: %p \n", obsout);}
+
+      // Allocate a name-string 
+      *retval = CTA_String_GetLength(userdata, &len);
+      if (*retval!=CTA_OK) return;
+
+      // Get the condition
+      condition = CTA_Malloc((len+1)*sizeof(char));
+      ierr = CTA_String_Get(userdata, condition);
+      if (ierr!=CTA_OK) return;
+
+   // Link the database also to this observer and keep track of the number of 
+   // observers using this database
+      obsout->database = obsin->database;
+      obsout->database->nusers++;
+
+
+      // IF the callling sequence has been : cta_sobs_createtimsel -> cta_sobs_createsel -> 
+      // ctai_sobs_netcdf_createsel, THEN
+      // The condition is the string starting with 'time BETWEEN'
+      if (strstr(condition,"time BETWEEN") != NULL) {
+        //printf("condition string is: |%s| \n",condition);
+        ierr = sscanf(condition,"%*s %*s %f %*s %f",&t1_f,&t2_f); t1 = t1_f; t2=t2_f;
+
+      } else {
+        t1 = 0.0; t2 = 24.0;
+      }
+      if (IDEBUG>0){
+        printf("ctai_sobs_createselstring: time selection: %s %f %f %d \n",obsout->database->dbname,t1,t2,ierr);
+      }
+
+      // It may be possible that a spatial restriction is given. 
+      for (i=0; i < 2; i++) {
+        obsout->bb_lon[i] = obsin->bb_lon[i];
+        obsout->bb_lat[i] = obsin->bb_lat[i]; 
+      }
+
+      /* spatial selection: */
+      if (strstr(condition,"bounding box") !=NULL) {
+        ierr = sscanf(condition,"%*s %*s  %f  %f %f %f ",&bbx_1,&bbx_2, &bby_1,&bby_2);
+        obsout->bb_lon[0] = bbx_1;        obsout->bb_lon[1] = bbx_2;
+        obsout->bb_lat[0] = bby_1;        obsout->bb_lat[1] = bby_2;
+        //        printf("SOBS_NETCDF_CREATESEL: condition |%s| \n",condition);
+        //printf("SOBS_NETCDF_CREATESEL: condition window is: %f %f %f %f \n",bbx_1,bbx_2,bby_1,bby_2);
+      }
+     
+   // Combine the input-condition and the condition of the input observer.
+   // In our netcdf-case, the condition is a combination of spatial window and timespan.
+      ierr =  CTA_Time_GetSpan(obsin->tspan, &t1_in, &t2_in);
+      t1_out = MAX(t1_in,t1);
+      t2_out = MIN(t2_in,t2);
+      ierr = CTA_Time_Create(&(obsout->tspan));
+      ierr =  CTA_Time_SetSpan(obsout->tspan, t1_out, t2_out);
+      if (ierr != CTA_OK) {printf("time_create in ctai_sobs_netcdf_createsel FAILED!\n");}
+
+
+
+      // We must recompute the number of stations after selection. Also, the relation table should be adjusted. 
+
+
+      // first get the time vector. 
+      ierr = CTA_Vector_Create(CTA_DEFAULT_VECTOR, obsin->nmeasr, CTA_DOUBLE, CTA_NULL, &hvec_time);
+      CTAI_SObs_netcdf_GetTimes(obsin, &hvec_time, &ierr);
+      vec_time = CTA_Malloc(obsin->nmeasr * sizeof(double));
+      ierr=CTA_Vector_GetVals(hvec_time,vec_time, obsin->nmeasr, CTA_DOUBLE);
+
+
+      if (IDEBUG>5){
+        printf("createsel: ierr time vector %d ; timespan:  %f %f \n",ierr, t1_out, t2_out);
+        printf("vectime, %f  %f \n",vec_time[0],vec_time[1]);
+      }
+
+      // We use the relation table to investigate if the current selection should be restricted more.
+
+
+      nc_values1 = CTA_Malloc(obsin->nmeasr_orig * sizeof(float));
+      nc_values2 = CTA_Malloc(obsin->nmeasr_orig * sizeof(float));
+      nc_sel_values1 = CTA_Malloc(obsin->nmeasr * sizeof(float));
+      nc_sel_values2 = CTA_Malloc(obsin->nmeasr * sizeof(float));
+      nc_inq_varid(obsin->database->ncid, "longitude", &varid);
+      nc_get_var_float(obsin->database->ncid, varid, &nc_values1[0]);
+      nc_inq_varid(obsin->database->ncid, "latitude", &varid);
+      nc_get_var_float(obsin->database->ncid, varid, &nc_values2[0]);
+      CTA_RelTable_ApplyVal(obsin->selectionReltab, nc_values1, obsin->nmeasr_orig, 
+                                nc_sel_values1,obsin->nmeasr, CTA_REAL);
+      CTA_RelTable_ApplyVal(obsin->selectionReltab, nc_values2, obsin->nmeasr_orig, 
+                                nc_sel_values2,obsin->nmeasr, CTA_REAL);
+
+      if (IDEBUG>10){
+        printf("latitudes of obsin: \n");
+        for (i=0; i < MIN(15,obsin->nmeasr); i++) {
+                  printf(" %d %f\n ",i, nc_values2[i]);
+                }
+        printf("------------%f %f ------\n",obsout->bb_lat[0],obsout->bb_lat[1]);
+      }
+
+      count_out = 0;
+      flag=CTA_Malloc((obsin->nmeasr)*sizeof(int));
+      for (i=0; i < obsin->nmeasr; i++) {
+        flag[i]= 0;
+        if ((vec_time[i] >= t1_out) && (vec_time[i] <=t2_out) && 
+            (nc_sel_values1[i] > obsout->bb_lon[0]) && (nc_sel_values1[i] < obsout->bb_lon[1]) 
+            && (nc_sel_values2[i] > obsout->bb_lat[0]) && (nc_sel_values2[i] < obsout->bb_lat[1]) 
+            ) {
+          flag[i] = 1;
+          //printf("selected: %d = %f ; lat %f \n",i,vec_time[i], nc_sel_values2[i]);
+          count_out = count_out + 1;
+        }
+      }
+      
+      free(nc_values1); free(nc_values2);free(nc_sel_values1);free(nc_sel_values2);
+
+      nstations = count_out;
+
+
+      obsout->nstations = nstations;
+      obsout->nmeasr    = nstations;
+
+
+      if (IDEBUG>0) {printf("selection made: now %d stations \n",nstations);}
+
+      // make the relation table for this restriction
+      ierr = CTA_RelTable_Create(&reltab1);
+      selvec = CTA_Malloc(obsout->nmeasr * sizeof(int));
+
+      
+      count_out = 0;
+      for (i=0; i < obsin->nmeasr; i++) {
+        if (flag[i] > 0 && count_out < obsout->nmeasr) {
+          selvec[count_out] = i+1;
+          count_out = count_out + 1;
+        }
+      }
+
+      ierr = CTA_RelTable_SetSelectVal(reltab1, selvec, obsout->nmeasr, CTA_INTEGER);                
+
+      if (IDEBUG>5){printf("createselstring: setselect ierr  %d \n",ierr);}
+
+      // now combine the two relation tables to get the whole selection
+      ierr = CTA_RelTable_Create(&(obsout->selectionReltab)); //moet dit?????
+
+      ierr=CTA_RelTable_SetTableCombine(obsout->selectionReltab, obsin->selectionReltab, CTA_FALSE,
+                                                reltab1, CTA_FALSE);
+
+      obsout->nmeasr_orig = obsin->nmeasr_orig;
+      obsout->timeoffset = obsin->timeoffset;
+
+      //printf("createsel: ierr reltable_combine: end  %d \n",ierr);
+
+      free(vec_time);free(selvec);
+      free(flag);
+      ierr = CTA_RelTable_Free(&reltab1);
+      *retval = CTA_OK;
+
+      if (IDEBUG>0){printf("createselstring %s : END:  from  %d to %d \n",obsin->database->dbname,obsin->nmeasr, obsout->nmeasr);}
+      if (IDEBUG>10){printf("createsel: END: lat.window  %f to %f \n",obsout->bb_lat[0], obsout->bb_lat[1]);}
+#else
+   printf("Error: CTAI_SObs_netcdf_CreateSelString: COSTA is compiled without NETCDF support\n");
+  *retval=CTA_NOT_IMPLEMENTED;
+#endif
+
+};
+
+
+void CTAI_SObs_netcdf_CreateSelRelTab(CTAI_SObs_netcdf *obsin, CTA_RelTable reltab, 
+        CTAI_SObs_netcdf *obsout, int *retval) 
+{
+#if HAVE_LIBNETCDF
+  int nsel, ierr, i;
+  double t1_in,t2_in;
+
+
+  //   printf(" CTAI_SObs_netcdf_CreateSelRelTab: START: NMEASR %d \n",obsin->nmeasr);
+   CTA_RelTable_Count(obsin->selectionReltab,&nsel);
+
+  /* Note: this is a quick implementation. The selection is not again checked w.r.t. the spatial window. Use with care!   */
+
+   // First: combine the two relation tables to get the whole selection
+   ierr = CTA_RelTable_Create(&(obsout->selectionReltab)); //moet dit?????
+   //   printf("createselRelTab: ierr reltable_create van obsout:  %d \n",ierr);
+   ierr=CTA_RelTable_SetTableCombine(obsout->selectionReltab, obsin->selectionReltab, CTA_FALSE,
+                                                reltab, CTA_FALSE);
+
+   if (ierr != CTA_OK) {
+     printf("ERROR in  CTAI_SObs_netcdf_CreateSelRelTab: %d \n",ierr);
+     *retval = ierr;
+     return;
+   }
+
+   obsout->nmeasr_orig = obsin->nmeasr_orig;
+   obsout->timeoffset = obsin->timeoffset;
+
+   CTA_RelTable_Count(obsout->selectionReltab,&nsel);
+   obsout->nmeasr = nsel; 
+   // will this also work in case of empty selection?  Yes.
+
+
+   //   printf(" CTAI_SObs_netcdf_CreateSelRelTab: %s SELECTION NMEASR %d \n",obsin->database->dbname,nsel);
+   //  printf(" more: handles *obsin  %d  \n", *obsin);
+   //printf(" more: handles *obsout %d   \n", *obsout);
+   //printf(" more: handles  obsin->reltab ;reltab %d  %d \n",obsin->selectionReltab,reltab);
+
+   for (i=0; i < 2; i++) {
+     obsout->bb_lon[i] = obsin->bb_lon[i];
+     obsout->bb_lat[i] = obsin->bb_lat[i]; 
+   }
+
+
+   // Link the database also to this observer and keep track of the number of 
+   // observers using this database
+   obsout->database = obsin->database;
+   obsout->database->nusers++;
+
+   obsout->nstations = obsout->nmeasr;
+
+
+   CTA_Time_GetSpan(obsin->tspan, &t1_in, &t2_in);
+   CTA_Time_Create(&(obsout->tspan));
+   CTA_Time_SetSpan(obsout->tspan, t1_in, t2_in);
+
+
+
+#else
+   printf("Error: CTAI_SObs_netcdf_CreateSelRelTab: COSTA is compiled without NETCDF support\n");
+  *retval=CTA_NOT_IMPLEMENTED;
+#endif
+
+};
+
+
+
+
+
+void CTAI_SObs_netcdf_CreateSel(
+/*
+   Allocate the memory which is neccesary to store the data necessary for a
+   netcdf-observer
+*/
+     // INPUTS
+        CTAI_SObs_netcdf *obsin,// The netcdf-observer of which a selection 
+                                 // is to be made
+        CTA_Handle *userdata_in,   // User data: condition
+     // OUTPUTS
+        CTAI_SObs_netcdf *obsout, // The netcdf-observer which is a selection 
+                                // of observer obsin
+        int *retval)            // Error code
+{
+   CTA_Handle userdata=*userdata_in;
+#if HAVE_LIBNETCDF
+   CTA_Datatype datatype;
+
+   if (IDEBUG > 0) { printf("ctai_sobs_netcf_createsel: obsin : %p  \n", obsin);}
+   if (IDEBUG > 0) { printf("ctai_sobs_netcf_createsel: obsout : %p \n", obsout);}
+
+
+   CTA_Handle_GetDatatype(userdata, &datatype);
+   if (datatype==CTA_STRING){
+   if (IDEBUG > 0) { printf("ctai_sobs_netcf_createsel: userdata STRING : %d   \n",userdata);}
+      CTAI_SObs_netcdf_CreateSelString(obsin,userdata,obsout,retval); 
+   } else if (datatype==CTA_RELTABLE){
+   if (IDEBUG > 0) { printf("ctai_sobs_netcf_createsel: userdata RELTAB : %d  \n",userdata);}
+      CTAI_SObs_netcdf_CreateSelRelTab(obsin,userdata,obsout,retval); 
+   } else {
+      printf("Error in CTAI_SObs_netcdf_CreateSel: datatype (%d) of selection is not supported\n", datatype);
+      *retval=CTA_INPUT_OBJECT_NOT_SUPPORTED;
+   }
+#else
+   printf("Error: CTAI_SObs_netcdf_CreateSel: COSTA is compiled without NETCDFsupport\n");
+  *retval=CTA_NOT_IMPLEMENTED;
+#endif
+};
+
+
+void CTAI_SObs_netcdf_Count(
+// Return the number of measurements in the observer
+   // INPUTS
+      CTAI_SObs_netcdf *x,  // The StochObserver of which the number of measurements is
+                                  //     returned 
+   // OUTPUTS 
+      int *nmeasr,             
+      int *retval
+   )
+{
+#if HAVE_LIBNETCDF
+  *nmeasr = x->nmeasr;
+  *retval = CTA_OK;
+#else
+   printf("Error: CTAI_SObs_netcdf_Count: COSTA is compiled without NETCDFsupport\n");
+  *retval=CTA_NOT_IMPLEMENTED;
+#endif
+};
+
+
+void CTAI_SObs_netcdf_GetVals(
+// Get all the values from a netcdf-StochObserver
+   // INPUTS
+   CTAI_SObs_netcdf *x, // StochObserver from which the measurements are 
+                         //    returned
+   // OUTPUTS
+   CTA_Vector *hvec,      // COSTA-vector containing the values
+   int *retval
+   )
+{
+#if HAVE_LIBNETCDF
+   int ierr, ncid, varid;
+   CTA_Datatype datatype;
+   float *nc_vcd_values;
+   CTA_Vector hvcd;
+
+   /* Get datatype of vector */
+   *retval=CTA_Vector_GetDatatype(*hvec,&datatype);
+   if (*retval!=CTA_OK) return;
+
+
+   /* read values from database. First read entire  vcd-array. Note: netcdf file should first be opened */
+  // Get the netcdf-id of the database
+   ncid = x->database->ncid;
+   // read the vcd vector. We know the size: nmeasr_orig. OMI-vcd vector contains one float per entry.
+   
+   // first allocate space for temporary arrays
+   nc_vcd_values = CTA_Malloc(x->nmeasr_orig * sizeof(float));
+
+   ierr = nc_inq_varid(ncid, "vcd_trop", &varid);
+   ierr = nc_get_var_float(ncid, varid, &nc_vcd_values[0]);
+   if (ierr != CTA_OK)
+     {printf("Error: could not read vcd_trop \n");
+       *retval = -1;
+       return;}
+
+// now fill the values array with the vcd_trop. The  
+//     relation table is used to get only the selection.
+   ierr = CTA_Vector_Create(CTA_DEFAULT_VECTOR, x->nmeasr_orig, CTA_REAL, CTA_NULL,&hvcd);
+   ierr = CTA_Vector_SetVals(hvcd, nc_vcd_values, x->nmeasr_orig, CTA_REAL);
+   ierr = CTA_RelTable_Apply(x->selectionReltab, hvcd, *hvec);
+   
+
+   // TODO: use the kernel and lon/lat info to couple the no2-conc to specific imerid,izonal and iverti  
+   // This is done in the model_obs! 
+
+
+
+   CTA_Vector_Free(&hvcd);
+   free(nc_vcd_values);
+   if (*retval!=CTA_OK) return;
+
+   *retval = CTA_OK;
+#else
+   printf("Error: CTAI_SObs_netcdf_GetVals: COSTA is compiled without NETCDFsupport\n");
+  *retval=CTA_NOT_IMPLEMENTED;
+#endif
+
+
+
+};
+
+void CTAI_SObs_netcdf_GetTimes(
+// Get all the time values from a netcdf-StochObserver
+   // INPUTS
+   CTAI_SObs_netcdf *x, // StochObserver from which the measurements are 
+                         //    returned
+   // OUTPUTS
+   CTA_Vector *hvec,      // COSTA-vector containing the values
+   int *retval
+   )
+{
+#if HAVE_LIBNETCDF
+  int i, ierr, ncid, varid, idx;
+   CTA_Datatype datatype;
+   int * nc_time_values;
+   double  *selected_values;
+   int * shadow_vals;
+   int * selected_shadow_vals;
+
+   /* Get datatype of vector */
+   *retval=CTA_Vector_GetDatatype(*hvec,&datatype);
+   if (*retval!=CTA_OK) return;
+
+   /* read values from database. First read entire time array. Note: netcdf file should first be opened */
+  // Get the netcdf-id of the database
+   ncid = x->database->ncid;
+   // read the time vector. We know the size: nmeasr_orig. OMI-Time vector contains of six integers per entry.
+   
+   // first allocate space for temporary arrays
+   nc_time_values = CTA_Malloc(x->nmeasr_orig * 6 * sizeof(int));
+   selected_values = CTA_Malloc(x->nmeasr * sizeof(double));
+   shadow_vals = CTA_Malloc(x->nmeasr_orig * sizeof(int));
+   selected_shadow_vals = CTA_Malloc(x->nmeasr * sizeof(int));
+
+   // use the relation table
+   for (i=0; i < x->nmeasr_orig; i++) { shadow_vals[i] = i+1 ;}
+
+   ierr = CTA_RelTable_ApplyVal(x->selectionReltab, shadow_vals, x->nmeasr_orig, 
+                                           selected_shadow_vals,x->nmeasr, CTA_INTEGER);
+
+   ierr = nc_inq_varid(ncid, "date_time", &varid);
+   if (ierr != CTA_OK)
+     {printf("Error: could not read date_time : %d error ;  ncid %d \n",ierr,ncid);
+       *retval = -1;
+       return;}
+   ierr = nc_get_var_int(ncid, varid, &nc_time_values[0]);
+
+
+// now fill the values array with the times. The times have to be re-computed to hours and 
+//     the relation table is used to get only the selection.
+
+
+       /* NOTE: times are adjusted with a certain offset! */
+   for (i=0; i < x->nmeasr; i++) {
+     idx = selected_shadow_vals[i]  ;       
+     selected_values[i] = 1.0*nc_time_values[(idx-1)*6+3] + 
+                              nc_time_values[(idx-1)*6+4]/60.0 + 
+                              nc_time_values[(idx-1)*6+5]/3600.0  + x->timeoffset ;
+
+
+   }
+
+   //   printf("selected_shadow_vals %d \n",selected_shadow_vals[0]);
+   if (IDEBUG>0) {printf("sobs_netcdf_gettimes: time values: count, first 3: %s %d %f %f %f \n",
+             x->database->dbname,x->nmeasr, selected_values[0],selected_values[1],selected_values[2]);}
+
+
+  /* Set values in vector and clean work-memory */
+   *retval = CTA_Vector_SetVals( *hvec, selected_values, x->nmeasr, datatype);
+
+   free(selected_values); 
+   free(nc_time_values);
+   free(selected_shadow_vals);
+   free(shadow_vals);
+
+   if (*retval!=CTA_OK) return;
+
+   *retval = CTA_OK;
+#else
+   printf("Error: CTAI_SObs_netcdf_GetTimes: COSTA is compiled without NETCDFsupport\n");
+  *retval=CTA_NOT_IMPLEMENTED;
+#endif
+
+};
+
+void CTAI_SObs_netcdf_GetVariances(
+// Get all the variances of the measurements in a netcdf-StochObserver
+   // INPUTS
+   CTAI_SObs_netcdf *x, // StochObserver from which the measurements are 
+                        //    returned
+   int *returnvar,      //return variance (CTA_TRUE) or std (CTA_FALSE)
+   // OUTPUTS
+   CTA_Vector *hvec,      // COSTA-vector containing the values
+
+   int *retval
+   )
+{
+#if HAVE_LIBNETCDF
+   int ierr, ncid, varid;
+   float *stdv;
+   float *nc_values;
+
+   // first allocate space for temporary arrays
+   nc_values = CTA_Malloc(x->nmeasr_orig * sizeof(float));
+   stdv=CTA_Malloc(x->nmeasr*sizeof(float));
+
+   // Look up all the standard deviations of the measurements in the observer:
+
+   /* read values from database. First read entire  vcd-array. Note: netcdf file should first be opened */
+
+  // Get the netcdf-id of the database
+   ncid = x->database->ncid;
+   // read the vcd vector. We know the size: nmeasr_orig. OMI-vcd vector contains one float per entry.
+   ierr = nc_inq_varid(ncid, "sigma_vcd_trop", &varid);
+   if (ierr != 0) ierr = nc_get_var_float(ncid, varid, &nc_values[0]);
+   if (ierr != 0)
+     {printf("Error: could not read sigma_vcd_trop \n");
+       *retval = -1; return;}
+
+// now fill the stdv array. The relation table is used to get only the selection.
+   ierr = CTA_RelTable_ApplyVal(x->selectionReltab, nc_values, x->nmeasr_orig, 
+                                           stdv, x->nmeasr, CTA_REAL);
+   if (ierr != CTA_OK)
+     {printf("Error: reltable_applyval in netcdf_getvariances \n");
+       *retval = -1; return;}
+
+   // Fill in the variances in the variable *hvec
+
+   *retval = CTA_Vector_SetVals( *hvec, stdv, x->nmeasr, CTA_REAL);
+   if (*retval!=CTA_OK) return;
+
+   *returnvar = CTA_FALSE;  //it is the std, not the var !
+   *retval = CTA_OK;
+#else
+   printf("Error: CTAI_SObs_netcdf_GetVariances: COSTA is compiled without NETCDFsupport\n");
+  *retval=CTA_NOT_IMPLEMENTED;
+#endif
+
+};
+
+
+void CTAI_SObs_netcdf_GetRealisation(
+// Calculate stochastic realizations for all the measurements in a netcdf-StochObserver
+   // INPUTS
+   CTAI_SObs_netcdf *x,  // StochObserver from which the measurements are 
+                         //    returned
+   // OUTPUTS
+   CTA_Vector *hvec,      // COSTA-vector containing the realizations
+   int *retval
+   )
+{
+#if HAVE_LIBNETCDF
+   double *values, *stdv;
+   int i, ierr, returnvar;
+   CTA_Vector  hvalues, hstdv;
+
+   values=CTA_Malloc(x->nmeasr*sizeof(double));
+   stdv=CTA_Malloc(x->nmeasr*sizeof(double));
+
+   ierr = CTA_Vector_Create(CTA_DEFAULT_VECTOR, x->nmeasr, CTA_DOUBLE, CTA_NULL, &hvalues);
+   ierr = CTA_Vector_Create(CTA_DEFAULT_VECTOR, x->nmeasr, CTA_DOUBLE, CTA_NULL, &hstdv);
+
+   // Look up the expectations
+   CTAI_SObs_netcdf_GetVals( x, &hvalues,&ierr);
+   if (ierr != CTA_OK) return;
+
+   // Look up the standard deviations
+   CTAI_SObs_netcdf_GetVariances( x, &hstdv,&returnvar, &ierr);
+   if (ierr!=CTA_OK) return;
+
+   ierr = CTA_Vector_GetVals(hvalues, values, x->nmeasr, CTA_DOUBLE);
+   ierr = CTA_Vector_GetVals(hstdv, stdv, x->nmeasr, CTA_DOUBLE);
+
+   // Calculate realizations of all the measurements
+   for (i=0;i<x->nmeasr;i++)
+   {
+      double r;
+      *retval = CTA_rand_n(&r);
+      if (*retval!=CTA_OK) return;
+
+      if (IDEBUG>0) {printf("cta_sobs_getrealisations:%d:  val: %f std: %f r: %f \n", i, values[i],stdv[i],r);}
+
+      if (returnvar == CTA_TRUE) {
+        values[i] = values[i] + r*stdv[i];}
+      else {
+        values[i] = values[i] + r*stdv[i]*stdv[i];
+      }
+
+
+   }
+
+   // Fill in the results in the vector
+   *retval = CTA_Vector_SetVals( *hvec, values, x->nmeasr, CTA_DOUBLE);
+   free(values);
+   free(stdv);
+   ierr = CTA_Vector_Free(&hvalues);
+   ierr = CTA_Vector_Free(&hstdv);
+
+   
+   if (*retval!=CTA_OK) return;
+
+  *retval = CTA_OK;
+#else
+   printf("Error: CTAI_SObs_netcdf_GetRealisation: COSTA is compiled without NETCDFsupport\n");
+  *retval=CTA_NOT_IMPLEMENTED;
+#endif
+};
+
+void CTAI_SObs_netcdf_GetCovMat(
+// Get all the variances of the measurements in a netcdf-StochObserver
+   // INPUTS
+   CTAI_SObs_netcdf *x,  // StochObserver from which the measurements are 
+                         //    returned
+   // OUTPUTS
+   CTA_Matrix *hmat,      /* Covariance matrix */
+   int *retval
+   )
+{
+#if HAVE_LIBNETCDF
+   double *matval;
+   int i, ierr, ncid, varid;
+   float *stdv;
+   float *nc_values;
+
+   // first allocate space for temporary arrays
+   nc_values = CTA_Malloc(x->nmeasr_orig * sizeof(float));
+   stdv=CTA_Malloc(x->nmeasr*sizeof(float));
+
+   // Look up all the standard deviations of the measurements in the observer:
+
+   /* read values from database. First read entire  vcd-array. Note: netcdf file should first be opened */
+  // Get the netcdf-id of the database
+   ncid = x->database->ncid;
+   // read the vcd vector. We know the size: nmeasr_orig. OMI-vcd vector contains one float per entry.
+   ierr = nc_inq_varid(ncid, "sigma_vcd_trop", &varid);
+   ierr = nc_get_var_float(ncid, varid, &nc_values[0]);
+   if (IDEBUG>0) {printf("variance has been read %f \n",nc_values[0]);} 
+
+
+   if (ierr != CTA_OK)
+     {printf("Error: could not read sigma_vcd_trop \n");
+       *retval = -1; return;}
+
+// now fill the stdv array. The relation table is used to get only the selection.
+   ierr = CTA_RelTable_ApplyVal(x->selectionReltab, nc_values, x->nmeasr_orig, 
+                                           stdv, x->nmeasr, CTA_REAL);
+   if (ierr != CTA_OK)
+     {printf("Error: reltable_applyval in netcdf_getcovmat; nobs:%d of %d  \n",x->nmeasr,x->nmeasr_orig);
+       *retval = -1; return;}
+
+   // Fill in the variances in the variable matval (matrix values)
+
+   if (IDEBUG>0) {printf("filling matrix of size %d...%f  %f \n",x->nmeasr,stdv[0],stdv[1]);} 
+
+   matval=CTA_Malloc(x->nmeasr * x->nmeasr * sizeof(double));
+   for (i=0;i < x->nmeasr * x->nmeasr ; i++){matval[i]=0.0;}
+
+   for (i=0;i < x->nmeasr ; i++){
+     matval[i * x->nmeasr + i] = stdv[i]*stdv[i];
+   }
+   free(stdv);
+   free(nc_values);
+
+   *retval = CTA_Matrix_SetVals( *hmat, matval, x->nmeasr, x->nmeasr,
+                                 CTA_DOUBLE);
+   free(matval);
+
+   if (IDEBUG>0) {printf("getcovmat completed!  \n");} 
+
+   if (*retval!=CTA_OK) return;
+   *retval=CTA_OK;
+
+   *retval = CTA_OK;
+return ;
+#else
+   printf("Error: CTAI_SObs_netcdf_GetCovMat: COSTA is compiled without NETCDFsupport\n");
+  *retval=CTA_NOT_IMPLEMENTED;
+#endif
+};
+
+
+void CTAI_SObs_netcdf_export(
+   CTAI_SObs_netcdf *x,
+   CTA_Handle *userdata,
+   int *retval
+  )
+{
+#if HAVE_LIBNETCDF
+   // The user-data must be 0, 1 or 2 items: 
+   //  the output file (default: stdout)
+   //  and a format-string (default: empty).
+   //
+   // Look up the output file
+   FILE *file;
+   int i, ierr;
+   CTA_Vector hvec_time, hvec_vals;
+   double dval1,dval2;
+
+   if (CTA_Handle_Check(*userdata,CTA_FILE)==CTA_OK) {
+      *retval = CTA_File_Get(*userdata,&file);
+
+      if (CTA_FLUSH_ALWAYS) CTA_Flush();
+
+
+      // Print the contents of the observer.
+      fprintf(file,"%% This Observer has the following properties: \n");
+
+      if (x->nmeasr == 0 ) {
+        fprintf(file,"This is observer of an EMPTY database\n");}
+      else { 
+
+
+      fprintf(file,"   dbasename='%s';\n",x->database->dbname);
+      fprintf(file,"   nusers=%d;\n",x->database->nusers);
+      fprintf(file," nmeasr_orig; %d  nmeasr=%d;\n\n",x->nmeasr_orig, x->nmeasr);
+
+      ierr = CTA_Vector_Create(CTA_DEFAULT_VECTOR, x->nmeasr, CTA_DOUBLE, CTA_NULL,&hvec_time);
+      ierr = CTA_Vector_Create(CTA_DEFAULT_VECTOR, x->nmeasr, CTA_DOUBLE, CTA_NULL,&hvec_vals);
+      CTAI_SObs_netcdf_GetTimes(x, &hvec_time, &ierr);
+      CTAI_SObs_netcdf_GetVals(x, &hvec_vals, &ierr);
+
+      for (i=1; i <= x->nmeasr; i++) {
+
+        CTA_Vector_GetVal(hvec_time, i, &dval1, CTA_DOUBLE);
+        CTA_Vector_GetVal(hvec_vals, i, &dval2, CTA_DOUBLE);
+
+        fprintf(file," nr:%d  time: %f  value: %f \n",i,dval1, dval2);        
+      }
+      CTA_Vector_Free(&hvec_time);
+      CTA_Vector_Free(&hvec_vals);
+
+      }
+   };
+#else
+   printf("Error: CTAI_SObs_netcdf_export: COSTA is compiled without NETCDFsupport\n");
+  *retval=CTA_NOT_IMPLEMENTED;
+#endif
+
+};
+
+void CTAI_SObs_netcdf_Free(
+   CTAI_SObs_netcdf *x,
+   int *retval
+  )
+{
+#if HAVE_LIBNETCDF
+  int ierr;
+  /* We only are allowed to close the database if all users (including all selectioned observers) are gone! */
+  //  if (x->ncid != 0) {
+  //    if ((ierr = nc_close(x->ncid))){
+  //      printf("CTA_Sobs_netcdf_Free: cannot close netCDF-file: %s\n",
+  //             nc_strerror(retval));
+  //    }
+  //  }
+  x->database->nusers--;
+  //  printf("cta_sobs_free (netcdf): now %d users remaining of database %s \n",x->database->nusers, x->database->dbname);
+ 
+  ierr = CTA_RelTable_Free(&(x->selectionReltab));
+  if (ierr !=CTA_OK)
+    {printf("CTA_Sobs_netcdf_Free: cannot close relation table \n");}
+
+  if (x->database->nusers == 0) {
+    if ((ierr = nc_close(x->database->ncid))){
+        printf("CTA_Sobs_netcdf_Free: cannot close netCDF-file: %s\n",
+               nc_strerror(ierr));
+    }
+    free(x->database->dbname);
+    free(x->database);
+
+  }
+  
+  *retval = CTA_OK;
+#else
+   printf("Error: CTAI_SObs_netcdf_Free: COSTA is compiled without NETCDFsupport\n");
+  *retval=CTA_NOT_IMPLEMENTED;
+#endif
+}
+
+
+
+/*
+
+*/
+void CTA_SObs_netcdf_initialise(CTA_SObsClass *hsobscl)
+{
+   CTA_Intf hintf=0;
+   CTA_Func h_func[CTA_SOBS_NUMFUNC];
+   CTA_ObsDescrClass descrcl;
+   int i;
+
+   // The vector h_func is filled with COSTA-function handles of the 
+   // implementations in this file.
+   for (i=0;i<CTA_SOBS_NUMFUNC;i++){
+      h_func[i]=CTA_NULL;
+   }
+   CTA_Func_Create(" ",&CTAI_SObs_netcdf_Create_Size,    hintf,
+                       &h_func[CTA_SOBS_CREATE_SIZE]);
+   CTA_Func_Create(" ",&CTAI_SObs_netcdf_Create_Init,    hintf,
+                       &h_func[CTA_SOBS_CREATE_INIT]);
+   CTA_Func_Create(" ",&CTAI_SObs_netcdf_CreateSel  ,    hintf,
+                       &h_func[CTA_SOBS_CREATE_SELECTION]);
+   CTA_Func_Create(" ",&CTAI_SObs_netcdf_Count,          hintf,
+                       &h_func[I_CTA_SOBS_COUNT]      );
+   CTA_Func_Create(" ",&CTAI_SObs_netcdf_GetVals,        hintf,
+                       &h_func[CTA_SOBS_GET_VALUES] );
+   CTA_Func_Create(" ",&CTAI_SObs_netcdf_GetTimes,       hintf,
+                       &h_func[CTA_SOBS_GET_TIMES] );
+   CTA_Func_Create(" ",&CTAI_SObs_netcdf_GetVals,        hintf,
+                       &h_func[CTA_SOBS_GET_EXPECTATION]);
+   CTA_Func_Create(" ",&CTAI_SObs_netcdf_GetRealisation, hintf,
+                       &h_func[CTA_SOBS_GET_REALISATION]);
+   CTA_Func_Create(" ",&CTAI_SObs_netcdf_GetVariances,   hintf,
+                       &h_func[CTA_SOBS_GET_VARIANCE]);
+   CTA_Func_Create(" ",&CTAI_SObs_netcdf_GetCovMat,      hintf,
+                       &h_func[CTA_SOBS_GET_COV_MATRIX]);
+   CTA_Func_Create(" ",&CTAI_SObs_netcdf_export,     hintf,
+                       &h_func[I_CTA_SOBS_EXPORT]     );
+   CTA_Func_Create(" ",&CTAI_SObs_netcdf_Free,     hintf,
+                            &h_func[I_CTA_SOBS_FREE]     );
+   
+   CTA_ObsDescr_netcdf_initialise(&descrcl);
+
+   CTA_SObs_DefineClass("cta_sobs_netcdf",h_func,descrcl,hsobscl);
+
+
+}
+
+
+
+
+
diff --git a/costa/native/cta/src/cta_sobs_sqlite3.c b/costa/native/cta/src/cta_sobs_sqlite3.c
new file mode 100644
index 000000000..2de6299e3
--- /dev/null
+++ b/costa/native/cta/src/cta_sobs_sqlite3.c
@@ -0,0 +1,743 @@
+/*
+$URL: https://repos.deltares.nl/repos/openda/public/trunk/core/native/src/cta/cta_sobs_sqlite3.c $
+$Revision: 4304 $, $Date: 2014-01-14 07:29:40 +0100 (Tue, 14 Jan 2014) $
+
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+#include <string.h>
+#include <memory.h>
+#include "cta_mem.h"
+#include "cta_defaults.h"
+#include "cta_flush.h"
+#include "cta_util_statistics.h"
+#include "cta_util_sqlite3.h"
+#include "cta_file.h"
+#include "cta_string.h"
+#include "cta_obsdescr_sqlite3.h"
+#include "cta_sobs_sqlite3.h"
+#include "cta_errors.h"
+#include "cta_message.h"
+
+#define IDEBUG (0)
+
+#define CLASSNAME "CTA_SObs_sqlite3"
+/*
+
+*/
+void CTA_SObs_sqlite3_initialise(CTA_SObsClass *hsobscl)
+{
+   CTA_Intf hintf=0;
+   CTA_Func h_func[CTA_SOBS_NUMFUNC];
+   CTA_ObsDescrClass descrcl;
+   int i;
+
+   // The vector h_func is filled with COSTA-function handles of the 
+   // implementations in this file.
+   for (i=0;i<CTA_SOBS_NUMFUNC;i++){
+      h_func[i]=CTA_NULL;
+   }
+   CTA_Func_Create(" ",&CTAI_SObs_sqlite3_Create_Size,    hintf,
+                       &h_func[CTA_SOBS_CREATE_SIZE]);
+   CTA_Func_Create(" ",&CTAI_SObs_sqlite3_Create_Init,    hintf,
+                       &h_func[CTA_SOBS_CREATE_INIT]);
+   CTA_Func_Create(" ",&CTAI_SObs_sqlite3_CreateSel  ,    hintf,
+                       &h_func[CTA_SOBS_CREATE_SELECTION]);
+   CTA_Func_Create(" ",&CTAI_SObs_sqlite3_Count,          hintf,
+                       &h_func[I_CTA_SOBS_COUNT]      );
+   CTA_Func_Create(" ",&CTAI_SObs_sqlite3_GetVals,        hintf,
+                       &h_func[CTA_SOBS_GET_VALUES] );
+   CTA_Func_Create(" ",&CTAI_SObs_sqlite3_GetTimes,       hintf,
+                       &h_func[CTA_SOBS_GET_TIMES] );
+   CTA_Func_Create(" ",&CTAI_SObs_sqlite3_GetVals,        hintf,
+                       &h_func[CTA_SOBS_GET_EXPECTATION]);
+   CTA_Func_Create(" ",&CTAI_SObs_sqlite3_GetRealisation, hintf,
+                       &h_func[CTA_SOBS_GET_REALISATION]);
+   CTA_Func_Create(" ",&CTAI_SObs_sqlite3_GetVariances,   hintf,
+                       &h_func[CTA_SOBS_GET_VARIANCE]);
+   CTA_Func_Create(" ",&CTAI_SObs_sqlite3_GetCovMat,      hintf,
+                       &h_func[CTA_SOBS_GET_COV_MATRIX]);
+   CTA_Func_Create(" ",&CTAI_SObs_sqlite3_export,     hintf,
+                       &h_func[I_CTA_SOBS_EXPORT]     );
+   CTA_Func_Create(" ",&CTAI_SObs_sqlite3_Free,     hintf,
+                            &h_func[I_CTA_SOBS_FREE]     );
+   
+   CTA_ObsDescr_sqlite3_initialise(&descrcl);
+
+   CTA_SObs_DefineClass("cta_sobs_sqlite3",h_func,descrcl,hsobscl);
+
+
+}
+
+
+void CTAI_SObs_sqlite3_Create_Size(
+   // OUTPUTS:
+      int *memsize,           // The number of bytes which are necessary to store one 
+                              //     CTAI_SObs_sqlite3, with a pointer to the contents (data), 
+                              //     but without the contents themselves.
+      int *retval             // error code (see cta_datatypes.h for possible error codes)
+   ){
+   *memsize=(int) sizeof(CTAI_SObs_sqlite3);
+   *retval=CTA_OK;
+};
+
+
+
+
+
+#define METHOD "CTAI_SObs_sqlite3_Create_Init"
+void CTAI_SObs_sqlite3_Create_Init(
+/*
+   Allocate the memory which is necessary to store the data necessary for a
+   sqlite3-observer
+*/
+     // IN-OUTPUTS
+        CTAI_SObs_sqlite3 *x,   // The sqlite3-observer for which the memory must 
+                                //     be allocated
+     // INPUTS:
+        CTA_Handle userdata,   // User data: database-name
+     // OUTPUTS
+        int *retval)            // Error code. Possible error: Illegal data type
+{
+   int len;
+   char *dbname;
+   char *condition=CTA_Malloc(sizeof(char));
+   int nstations,nmeasr;
+   CTAI_util_sqlite3_database *database;
+   sqlite3 *db;
+   int rc;
+   int *stations;
+   char message[256];
+   FILE *file;
+
+   condition[0]=0;
+   database = CTA_Malloc(sizeof(CTAI_util_sqlite3_database));
+
+
+   // Get the name of the database file:
+   // Check that indeed only one userdata-item is passed: the database name
+   // Allocate a name-string 
+   *retval = CTA_String_GetLength(userdata, &len);
+   if (*retval!=CTA_OK) return;
+   // Get the name of the database
+   dbname = CTA_Malloc((len+1)*sizeof(char));
+   *retval = CTA_String_Get(userdata, dbname);
+   if (*retval!=CTA_OK) return;
+
+   // Open the database
+   //
+   // First check whether the file exists
+   //
+   if ((file = fopen(dbname, "r"))){
+      fclose(file);
+   }
+   else {
+      sprintf(message,"Cannot open observation database file %s",dbname);
+      CTA_WRITE_ERROR(message);		   
+      *retval=CTA_CANNOT_OPEN_FILE;
+      free(database);
+      free(dbname);
+      return;
+   }
+
+
+
+   rc = sqlite3_open(dbname, &db);
+   if ( rc!=SQLITE_OK ){
+      sprintf(message,"Cannot open observation database file %s",dbname);
+      CTA_WRITE_ERROR(message);		   
+      sqlite3_close(db);
+      *retval=CTA_CANNOT_OPEN_FILE;
+      free(database);
+      free(dbname);
+      return;
+   }
+
+
+   // Count the stations in this observer
+   *retval = CTAI_util_sqlite3_select_values(&nstations, 1, CTA_INTEGER, db,
+       "count(distinct stations.station_id)",condition);
+   if (*retval!=CTA_OK) return;
+
+   // Save the stations_ids of this observer in array stations
+   stations=CTA_Malloc(nstations*sizeof(int));
+   *retval = CTAI_util_sqlite3_select_values(stations, nstations, CTA_INTEGER, db,
+       "distinct stations.station_id",condition);
+   if (*retval!=CTA_OK) return;
+   
+   //printf("sobs_sqlite_create_init: debug3: %d \n ",*retval);
+
+   // Count the measurements in this observer
+   *retval = CTAI_util_sqlite3_select_values(&nmeasr, 1, CTA_INTEGER, db,
+       "count(stations.station_id)",condition);
+   if (*retval!=CTA_OK) return;
+
+   // Set the observer-fields
+    
+   database->db        = db;
+   database->name      = dbname;
+   database->nusers    = 1;
+
+   x->database  = database;
+   x->nstations = nstations;
+   x->stations  = stations;
+   x->nmeasr    = nmeasr;
+   x->condition = condition;
+   *retval=CTA_OK;
+};
+
+
+void CTAI_SObs_sqlite3_CreateSelString(
+/*
+   Allocate the memory which is neccesary to store the data necessary for a
+   sqlite3-observer
+*/
+     // INPUTS
+        CTAI_SObs_sqlite3 *obsin,// The sqlite3-observer of which a selection 
+                                 // is to be made
+        CTA_Handle userdata,   // User data: condition
+     // OUTPUTS
+        CTAI_SObs_sqlite3 *obsout, // The sqlite3-observer which is a selection 
+                                // of observer obsin
+        int *retval)            // Error code
+{
+      int len;
+      char *condition;
+
+   // Get the condition
+      // Allocate a name-string 
+      *retval = CTA_String_GetLength(userdata, &len);
+      if (*retval!=CTA_OK) return;
+
+      // Get the condition
+      condition = CTA_Malloc((len+1)*sizeof(char));
+      *retval = CTA_String_Get(userdata, condition);
+      if (*retval!=CTA_OK) return;
+
+   // Link the database also to this observer and keep track of the number of 
+   // observers using this database
+   obsout->database = obsin->database;
+   obsout->database->nusers++;
+
+   // Combine the input-condition and the condition of the input observer.
+   if (strcmp(obsin->condition,"")==0)
+   {
+      obsout->condition = condition;
+   }
+   else
+   {
+      obsout->condition = CTA_Malloc(sizeof(char)*
+                                  (    strlen(condition) 
+                                    +  strlen(obsin->condition)
+                                    +  strlen("  (  ) AND (  )  ")
+                                ) );
+      sprintf(obsout->condition,"(%s) AND (%s)",condition,obsin->condition);
+      if (IDEBUG) printf("CTAI_SObs_sqlite3_CreateSel: DEBUG condition=%s\n",
+                         obsout->condition);
+      free(condition);
+   }
+
+   // Count the stations in this observer
+   *retval = CTAI_util_sqlite3_select_values(
+       &(obsout->nstations), 1, CTA_INTEGER, obsout->database->db,
+       "count(distinct stations.station_id)",obsout->condition);
+   if (*retval!=CTA_OK) return;
+
+   // Save the stations_ids of this observer in array stations
+   obsout->stations=CTA_Malloc((obsout->nstations)*sizeof(int));
+   *retval = CTAI_util_sqlite3_select_values(
+       obsout->stations, obsout->nstations, CTA_INTEGER, obsout->database->db,
+       "distinct stations.station_id",obsout->condition);
+   if (*retval!=CTA_OK) return;
+   
+   // Count the measurements in this observer
+   *retval = CTAI_util_sqlite3_select_values(
+       &(obsout->nmeasr), 1, CTA_INTEGER, obsout->database->db,
+       "count(stations.station_id)",obsout->condition);
+
+   if (*retval!=CTA_OK) return;
+
+   *retval=CTA_OK;
+};
+
+
+void CTAI_SObs_sqlite3_CreateSelRelTab( CTAI_SObs_sqlite3 *obsin, CTA_RelTable reltab, 
+        CTAI_SObs_sqlite3 *obsout, int *retval) 
+{
+   int nmeasr;
+   int isel, nsel;
+   int *rowIds, *selRowIds;
+   CTA_Handle hdum=0;
+   CTA_Vector vRowIds, vSelRowIds;
+   CTA_String sSel;
+   char sNum[10];
+   char *sel;
+
+   /* Get number of observations */
+   CTAI_SObs_sqlite3_Count(obsin, &nmeasr, retval);
+   if (*retval!=CTA_OK) return;
+    
+   /* Create a vector with the rowID's */
+   rowIds=CTA_Malloc(nmeasr*sizeof(int));
+    *retval = CTAI_util_sqlite3_select_values(
+               rowIds,  nmeasr,  CTA_INTEGER,  obsin->database->db,
+               "data._ROWID_", obsin->condition);
+   if (*retval!=CTA_OK) return;
+
+   *retval=CTA_Vector_Create(CTA_DEFAULT_VECTOR, nmeasr, CTA_INTEGER, hdum, &vRowIds);
+   if (*retval!=CTA_OK) return;
+   *retval=CTA_Vector_SetVals(vRowIds,rowIds,nmeasr,CTA_INTEGER);
+   if (*retval!=CTA_OK) return;
+   free(rowIds);
+
+   /* Create a vector with the selected rowID's */
+   CTA_RelTable_Count(reltab,&nsel);
+   selRowIds=CTA_Malloc(nsel*sizeof(int));
+   *retval=CTA_Vector_Create(CTA_DEFAULT_VECTOR, nsel, CTA_INTEGER, hdum, &vSelRowIds);
+   if (*retval!=CTA_OK) return;
+
+   /* Apply the relation table */
+   *retval=CTAI_RelTable_Apply(reltab, vRowIds, vSelRowIds, CTA_FALSE);
+   if (*retval!=CTA_OK){
+      printf("Error in CTAI_SObs_sqlite3_CreateSelRelTab: Cannot apply selection relation table \n");
+      return;
+   }
+
+   /* Create a selection criterion for */
+   *retval=CTA_Vector_GetVals(vSelRowIds,selRowIds,nsel,CTA_INTEGER);
+   if (*retval!=CTA_OK) return;
+
+   sel=CTA_Malloc((2+nsel*(4+10+16)+2+1)*sizeof(char));
+   strcpy(sel,"( ");                       //2
+   for (isel=0;isel<nsel;isel++){
+      if (isel>0) {
+         strcat(sel," OR ");               //4
+      }
+      sprintf(sNum,"%d",selRowIds[isel]);  //10
+      strcat(sel," data._ROWID_ = ");      //16
+      strcat(sel,sNum);
+   }
+   strcat(sel," )");                       //2
+
+   printf("========================================\n");
+   printf("%s\n",sel);
+   printf("========================================\n");
+
+   /* Create the actual selection */   
+   CTA_String_Create(&sSel);
+   CTA_String_Set(sSel,sel);
+   CTAI_SObs_sqlite3_CreateSelString(obsin, sSel, obsout, retval);
+   CTA_String_Free(&sSel);
+   if (*retval !=CTA_OK){
+      printf("CTAI_SObs_sqlite3_CreateSelRelTab: Error creating selection '%s' \n",sel);
+   }
+
+   /* Free work variables */
+   free(selRowIds);
+   free(sel);
+};
+
+
+
+
+
+
+
+
+
+
+
+
+void CTAI_SObs_sqlite3_CreateSel(
+/*
+   Allocate the memory which is neccesary to store the data necessary for a
+   sqlite3-observer
+*/
+     // INPUTS
+        CTAI_SObs_sqlite3 *obsin,// The sqlite3-observer of which a selection 
+                                 // is to be made
+        CTA_Handle *userdata_in,   // User data: condition
+     // OUTPUTS
+        CTAI_SObs_sqlite3 *obsout, // The sqlite3-observer which is a selection 
+                                // of observer obsin
+        int *retval)            // Error code
+{
+   CTA_Handle userdata=*userdata_in;
+   CTA_Datatype datatype;
+
+   CTA_Handle_GetDatatype(userdata, &datatype);
+   if (datatype==CTA_STRING){
+      CTAI_SObs_sqlite3_CreateSelString(obsin,userdata,obsout,retval); 
+   } else if (datatype==CTA_RELTABLE){
+      CTAI_SObs_sqlite3_CreateSelRelTab(obsin,userdata,obsout,retval); 
+   } else {
+      printf("Error in CTAI_SObs_sqlite3_CreateSel: datatype (%d) of selection is not supported\n",datatype);
+      *retval=CTA_INPUT_OBJECT_NOT_SUPPORTED;
+   }
+};
+
+
+void CTAI_SObs_sqlite3_Count(
+// Return the number of measurements in the observer
+   // INPUTS
+      CTAI_SObs_sqlite3 *x,  // The StochObesrver of which the number of measurements is
+                                  //     returned 
+   // OUTPUTS 
+      int *nmeasr,
+      int *retval
+   )
+{
+   /* Local variables */
+   *nmeasr = x->nmeasr;
+   *retval = CTA_OK;
+};
+
+
+void CTAI_SObs_sqlite3_GetVals(
+// Get all the values from a sqlite3-StochObserver
+   // INPUTS
+   CTAI_SObs_sqlite3 *x, // StochObserver from which the measurements are 
+                         //    returned
+   // OUTPUTS
+   CTA_Vector *hvec,      // COSTA-vector containing the values
+   int *retval
+   )
+{
+   void * values;
+   int size;
+   CTA_Datatype datatype;
+
+   /* Get datatype of vector */
+   *retval=CTA_Vector_GetDatatype(*hvec,&datatype);
+   if (*retval!=CTA_OK) return;
+
+   /* allocate space for holding result */
+   *retval = CTA_SizeOf(datatype,&size);
+   if (*retval!=CTA_OK) return;
+
+   values=CTA_Malloc(x->nmeasr*size);
+
+   /* read values from database */
+   *retval = CTAI_util_sqlite3_select_values(
+                  values,  x->nmeasr,  datatype,  x->database->db,
+                  "value", x->condition);
+   if (*retval!=CTA_OK) return;
+
+  /* Set values in vector en clean work-memory */
+   *retval = CTA_Vector_SetVals( *hvec, values, x->nmeasr, datatype);
+   free(values);
+   if (*retval!=CTA_OK) return;
+
+
+
+   *retval = CTA_OK;
+};
+
+void CTAI_SObs_sqlite3_GetTimes(
+// Get all the times from a sqlite3-StochObserver
+   // INPUTS
+   CTAI_SObs_sqlite3 *x, // StochObserver from which the measurements are 
+                         //    returned
+   // OUTPUTS
+   CTA_Vector *hvec,      // COSTA-vector containing the values
+   int *retval
+   )
+{
+   void * values;
+   int size;
+   CTA_Datatype datatype;
+
+   /* Get datatype of vector */
+   *retval=CTA_Vector_GetDatatype(*hvec,&datatype);
+   if (*retval!=CTA_OK) return;
+
+   /* allocate space for holding result */
+   *retval = CTA_SizeOf(datatype,&size);
+   if (*retval!=CTA_OK) return;
+
+   values=CTA_Malloc(x->nmeasr*size);
+
+   /* read values from database */
+   *retval = CTAI_util_sqlite3_select_values(
+                  values,  x->nmeasr,  datatype,  x->database->db,
+                  "time", x->condition);
+   if (*retval!=CTA_OK) return;
+
+  /* Set values in vector en clean work-memory */
+   *retval = CTA_Vector_SetVals( *hvec, values, x->nmeasr, datatype);
+   free(values);
+   if (*retval!=CTA_OK) return;
+
+   *retval = CTA_OK;
+};
+
+void CTAI_SObs_sqlite3_GetVariances(
+// Get all the variances of the measurements in a sqlite3-StochObserver
+   // INPUTS
+   CTAI_SObs_sqlite3 *x, // StochObserver from which the measurements are 
+                         //    returned
+   int *returnvar,        //return variance (CTA_TRUE) or std (CTA_FALSE)
+   // OUTPUTS
+   CTA_Vector *hvec,      // COSTA-vector containing the values
+   int *retval
+   )
+{
+   double *var;
+   float  *rvar;
+   int i;
+   int size;
+   CTA_Datatype datatype;
+
+   /* Get datatype of vector */
+   *retval=CTA_Vector_GetDatatype(*hvec,&datatype);
+   if (*retval!=CTA_OK) return;
+
+   /* allocate space for holding result */
+   *retval = CTA_SizeOf(datatype,&size);
+   if (*retval!=CTA_OK) return;
+
+   var=CTA_Malloc(x->nmeasr*size);
+   // Look up all the standard deviations of the measurements in the observer
+   *retval = CTAI_util_sqlite3_select_values(
+                  var,  x->nmeasr,  datatype,  x->database->db,
+                  "standarddeviation",x->condition);
+   if (*retval!=CTA_OK) return;
+
+   // Calculate the variances (square of the standard deviations)
+   if (*returnvar==CTA_TRUE){
+      if (datatype==CTA_REAL) {
+         rvar=(float*) var;
+         for (i=0;i<x->nmeasr;i++)
+            { rvar[i] = rvar[i]*rvar[i]; };
+      }else
+      {
+         for (i=0;i<x->nmeasr;i++)
+            { var[i] = var[i]*var[i]; };
+      }
+   }
+
+   // Fill in the variances in the vector
+   *retval = CTA_Vector_SetVals( *hvec, var, x->nmeasr, datatype);
+   free(var);
+   if (*retval!=CTA_OK) return;
+   *retval=CTA_OK;
+   
+};
+
+
+void CTAI_SObs_sqlite3_GetRealisation(
+// Calculate stochastic realizations for all the measurements in a sqlite3-StochObserver
+   // INPUTS
+   CTAI_SObs_sqlite3 *x,  // StochObserver from which the measurements are 
+                         //    returned
+   // OUTPUTS
+   CTA_Vector *hvec,      // COSTA-vector containing the realizations
+   int *retval
+   )
+{
+   double *values, *stdv;
+   int i;
+
+   values=CTA_Malloc(x->nmeasr*sizeof(double));
+   stdv=CTA_Malloc(x->nmeasr*sizeof(double));
+   // Look up the expectations
+   *retval = CTAI_util_sqlite3_select_values(
+                  values,  x->nmeasr,  CTA_DOUBLE,  x->database->db,
+                  "value",x->condition);
+   if (*retval!=CTA_OK) return;
+
+   // Look up the standard deviations
+   *retval = CTAI_util_sqlite3_select_values(
+                  stdv,  x->nmeasr,  CTA_DOUBLE,  x->database->db,
+                  "standarddeviation",x->condition);
+   if (*retval!=CTA_OK) return;
+
+   // Calculate realizations of all the measurements
+   for (i=0;i<x->nmeasr;i++)
+   {
+      double r;
+      *retval = CTA_rand_n(&r);
+      if (*retval!=CTA_OK) return;
+      values[i] = values[i] + r*stdv[i];
+   }
+
+   // Fill in the results in the vector
+   *retval = CTA_Vector_SetVals( *hvec, values, x->nmeasr, CTA_DOUBLE);
+   free(values);
+   free(stdv);
+   
+   if (*retval!=CTA_OK) return;
+   
+   *retval=CTA_OK;
+};
+
+void CTAI_SObs_sqlite3_GetCovMat(
+// Get all the variances of the measurements in a sqlite3-StochObserver
+   // INPUTS
+   CTAI_SObs_sqlite3 *x,  // StochObserver from which the measurements are 
+                         //    returned
+   // OUTPUTS
+   CTA_Matrix *hmat,      /* Covariance matrix */
+   int *retval
+   )
+{
+   double *matval;
+   int i;
+   double *stdv;
+   stdv=CTA_Malloc(x->nmeasr*sizeof(double));
+
+   // Look up all the standard deviations of the measurements in the observer
+   *retval = CTAI_util_sqlite3_select_values(
+                  stdv,  x->nmeasr,  CTA_DOUBLE,  x->database->db,
+                  "standarddeviation",x->condition);
+   if (*retval!=CTA_OK) return;
+
+   // Fill in the variances in the variable matval (matrix values)
+
+   matval=CTA_Malloc(x->nmeasr * x->nmeasr * sizeof(double));
+   for (i=0;i < x->nmeasr * x->nmeasr ; i++){matval[i]=0.0;}
+
+   for (i=0;i < x->nmeasr ; i++){
+      matval[i * x->nmeasr + i] = stdv[i]*stdv[i];
+   }
+   free(stdv);
+   *retval = CTA_Matrix_SetVals( *hmat, matval, x->nmeasr, x->nmeasr,
+                                 CTA_DOUBLE);
+   free(matval);
+   if (*retval!=CTA_OK) return;
+   *retval=CTA_OK;
+   
+};
+
+
+
+
+static int CTAI_SObs_printen(void *vfile, int argc, char **argv, char **azColName){
+  int i;
+  FILE * file = *((FILE **) vfile);
+  for(i=0; i<argc; i++){
+    fprintf(file,"%s = %s\n", azColName[i], argv[i] ? argv[i] : "NULL");
+  }
+  fprintf(file,"\n");
+  return 0;
+}
+
+void CTAI_SObs_sqlite3_export(
+   CTAI_SObs_sqlite3 *x,
+   CTA_Handle *userdata,
+   int *retval
+  )
+{
+   // The user-data must be 0, 1 or 2 items: 
+   //  the output file (default: stdout)
+   //  and a format-string (default: empty).
+   //
+   // Look up the output file
+   FILE *file;
+   int i;
+
+   if (CTA_Handle_Check(*userdata,CTA_FILE)==CTA_OK) {
+      *retval = CTA_File_Get(*userdata,&file);
+
+      if (CTA_FLUSH_ALWAYS) CTA_Flush();
+
+
+      // Print the contents of the observer.
+      fprintf(file,"%% This Observer has the following properties: \n");
+
+      if (x->nmeasr == 0 ) {
+        fprintf(file,"This is observer of an EMPTY database\n");}
+      else { 
+
+
+      fprintf(file,"   dbasename='%s';\n",x->database->name);
+      fprintf(file,"   nusers=%d;\n",x->database->nusers);
+      fprintf(file,"   condition='%s';\n",x->condition);
+      fprintf(file,"   stations=");
+
+      for (i=0; i < x->nstations; i++)
+         { fprintf(file,"%d ",x->stations[i]); }
+      fprintf(file,"\n");
+      fprintf(file,"   nmeasr=%d;\n\n",x->nmeasr);
+
+      // Optionally, print also the values, using the appropriate SQL-command
+      //if (strcmp(format,"values also")==0)
+      {
+        char *command;
+        char *zErrMsg;
+
+        int rc;
+        // Set the constants necessary 
+        const char *whole_table = // The expression which 
+                       // produces the complete obs-table
+           "stations inner join data on stations.station_id = data.station_id";
+
+        command=CTA_Malloc((
+                   strlen("select * from where") +
+                   strlen(whole_table)     +
+                   strlen(x->condition))*sizeof(char));
+
+        if (strcmp(x->condition,"")==0)
+        { 
+            sprintf(command,"select * from %s",whole_table); 
+        }
+        else
+        { 
+           sprintf(command,"select * from %s where %s",
+                     whole_table,x->condition); 
+        }
+
+        zErrMsg = NULL;
+
+        rc = sqlite3_exec(x->database->db, command, CTAI_SObs_printen,
+                      (void *) &file, &zErrMsg);
+      //  if (zErrMsg!=NULL){
+      //     printf("Error message from sqlite3: %s\n", &zErrMsg);
+      //  }
+        free(command);
+        if( rc != SQLITE_OK ) {*retval=CTA_INVALID_COMMAND;return;};
+
+      }
+      }
+
+      if (CTA_FLUSH_ALWAYS) CTA_Flush();
+
+      *retval=CTA_OK;
+   }
+   else {
+      *retval=CTA_FORMAT_NOT_SUPPORTED;
+   }
+};
+
+void CTAI_SObs_sqlite3_Free(
+   CTAI_SObs_sqlite3 *x,
+   int *retval
+  )
+{
+    x->database->nusers--;
+    if (x->database->nusers==0)
+    {
+       sqlite3_close(x->database->db);
+       free(x->database->name);
+       free(x->database);
+    }
+    free(x->stations);
+    free(x->condition);
+
+    *retval = CTA_OK;
+}
diff --git a/costa/native/cta/src/cta_sobs_user.c b/costa/native/cta/src/cta_sobs_user.c
new file mode 100644
index 000000000..b7e022d65
--- /dev/null
+++ b/costa/native/cta/src/cta_sobs_user.c
@@ -0,0 +1,75 @@
+/*
+$URL: https://repos.deltares.nl/repos/openda/trunk/costa/src/cta/cta_obsdescr_netcdf.c $
+$Revision: 671 $, $Date: 2008-10-07 14:49:42 +0200 (Tue, 07 Oct 2008) $
+
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+#include <string.h>
+#include <stdio.h>
+#include "cta.h"
+
+
+void CTA_ObsDescr_user_initialize(CTA_ObsDescrClass *hobsdescrcl);
+
+void CTA_SObs_user_initialize(CTA_SObsClass *hsobscl)
+{
+   CTA_Func h_func[CTA_SOBS_NUMFUNC];
+   char *libraryName;
+   char *functionName;
+   CTA_ObsDescrClass hobsdescrcl;
+   int i;
+
+   // The vector h_func is filled with function read from the default user dynamic library
+   libraryName=userDefaultDynamicLibrary;
+   for (i=0;i<CTA_SOBS_NUMFUNC;i++){
+      h_func[i]=CTA_NULL;
+   }
+   functionName="user_sobs_create_size";
+   h_func[CTA_SOBS_CREATE_SIZE]      = CTA_CreateFuncDynamicLib(libraryName, functionName, functionName, functionName);
+   functionName="user_sobs_create_init";
+   h_func[CTA_SOBS_CREATE_INIT]      = CTA_CreateFuncDynamicLib(libraryName, functionName, functionName, functionName);
+   functionName="user_sobs_create_selection";
+   h_func[CTA_SOBS_CREATE_SELECTION] = CTA_CreateFuncDynamicLib(libraryName, functionName, functionName, functionName);
+   functionName="user_sobs_count";
+   h_func[I_CTA_SOBS_COUNT]          = CTA_CreateFuncDynamicLib(libraryName, functionName, functionName, functionName);
+   functionName="user_sobs_get_values";
+   h_func[CTA_SOBS_GET_VALUES]       = CTA_CreateFuncDynamicLib(libraryName, functionName, functionName, functionName);
+   functionName="user_sobs_get_times";
+   h_func[CTA_SOBS_GET_TIMES]        = CTA_CreateFuncDynamicLib(libraryName, functionName, functionName, functionName);
+   functionName="user_sobs_get_expectation";
+   h_func[CTA_SOBS_GET_EXPECTATION]  = CTA_CreateFuncDynamicLib(libraryName, functionName, functionName, functionName);
+   functionName="user_sobs_get_realisation";
+   h_func[CTA_SOBS_GET_REALISATION]  = CTA_CreateFuncDynamicLib(libraryName, functionName, functionName, functionName);
+   functionName="user_sobs_get_variance";
+   h_func[CTA_SOBS_GET_VARIANCE]     = CTA_CreateFuncDynamicLib(libraryName, functionName, functionName, functionName);
+   functionName="user_sobs_get_cov_matrix";
+   h_func[CTA_SOBS_GET_COV_MATRIX]   = CTA_CreateFuncDynamicLib(libraryName, functionName, functionName, functionName);
+   functionName="user_sobs_export";
+   h_func[I_CTA_SOBS_EXPORT]         = CTA_CreateFuncDynamicLib(libraryName, functionName, functionName, functionName);
+   functionName="user_sobs_free";
+   h_func[I_CTA_SOBS_FREE]           = CTA_CreateFuncDynamicLib(libraryName, functionName, functionName, functionName);
+
+
+   CTA_ObsDescr_user_initialize(&hobsdescrcl);
+
+   CTA_ObsDescr_DefineClass("cta_sobs_user",h_func,hsobscl);
+
+}
+
+
diff --git a/costa/native/cta/src/cta_string.c b/costa/native/cta/src/cta_string.c
new file mode 100644
index 000000000..e89b7fb05
--- /dev/null
+++ b/costa/native/cta/src/cta_string.c
@@ -0,0 +1,454 @@
+/*
+$URL: https://repos.deltares.nl/repos/openda/public/trunk/core/native/src/cta/cta_string.c $
+$Revision: 3286 $, $Date: 2012-05-02 18:15:05 +0200 (Wed, 02 May 2012) $
+
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+#include <string.h>
+#include <stdio.h>
+#include "cta_mem.h"
+#include "f_cta_utils.h"
+#include "cta_errors.h"
+#include "cta_vector.h"
+#include "cta_string.h"
+#include "cta_errors.h"
+#include "cta_pack.h"
+#include "cta_message.h"
+
+#define CTA_STRING_CREATE_F77     F77_CALL(cta_string_create,CTA_STRING_CREATE)
+#define CTA_STRING_FREE_F77       F77_CALL(cta_string_free,CTA_STRING_FREE)
+#define CTA_STRING_GETLENGTH_F77  F77_CALL(cta_string_getlength,CTA_STRING_GETLENGTH)
+#define CTA_STRING_SET_F77        F77_CALL(cta_string_set,CTA_STRING_SET)
+#define CTA_STRING_GET_F77        F77_CALL(cta_string_get,CTA_STRING_GET)
+#define CTA_STRING_GETVALUE_F77   F77_CALL(cta_string_getvalue,CTA_STRING_GETVALUE)
+#define CTA_STRING_CONC_F77       F77_CALL(cta_string_conc,CTA_STRING_CONC)
+#define CTA_STRING_DUPLICATE_F77  F77_CALL(cta_string_duplicate,CTA_STRING_DUPLICATE)
+#define CTA_STRING_EXPORT_F77     F77_CALL(cta_string_export,CTA_STRING_EXPORT)
+#define CTA_STRING_IMPORT_F77     F77_CALL(cta_string_import,CTA_STRING_IMPORT)
+
+#define CLASSNAME "CTA_String"
+
+/* Struct holding all data associated to an COSTA String */
+
+typedef struct {
+int len;
+char *str;
+} CTAI_String;
+
+int CTA_String_Create(CTA_String *hstring){
+
+   CTAI_String *string;
+   int retval;
+
+   /* allocate memory for new string object */
+   string=CTA_Malloc(sizeof(CTAI_String));
+   string->len=0;
+   string->str=CTA_Malloc(sizeof(char));
+   string->str[0]=(char)0;
+
+   /* Allocate new handle and return eror when unsuccesfull */
+   retval=CTA_Handle_Create("string",CTA_STRING,string,hstring);
+   return retval;
+}
+
+#undef METHOD
+#define METHOD "Free" 
+int CTA_String_Free(CTA_String *hstring)
+{
+   CTAI_String *string;
+   int retval;
+
+   retval=CTA_Handle_Check((CTA_Handle) *hstring,CTA_STRING);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_string handle");
+       return retval;
+   }
+
+   retval=CTA_Handle_GetData((CTA_Handle) *hstring,(void**) &string);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+
+   free(string->str);
+   free(string);
+
+   retval=CTA_Handle_Free(hstring);
+   return retval;
+}
+
+#undef METHOD
+#define METHOD "Set"
+int CTA_String_Set(CTA_String hstring, const char* str)
+{
+// Deallocate the current contents of a cta-string, then
+// allocate a new string and fill it with the contents in the input
+   CTAI_String *string;
+   int retval;
+   int len;
+
+   retval=CTA_Handle_Check((CTA_Handle) hstring,CTA_STRING);
+
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_string handle");
+       return retval;
+   }
+
+   retval=CTA_Handle_GetData((CTA_Handle) hstring,(void**) &string);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   } 
+
+   len=(int) strlen(str);
+   string->len=len;
+   free(string->str);
+   string->str=CTA_Malloc((len+1)*sizeof(char));
+
+   strncpy(string->str,str,len+1);
+
+   return CTA_OK;
+}
+
+#undef METHOD
+#define METHOD "Get"
+int CTA_String_Get(CTA_String hstring, char *str)
+{
+   CTAI_String *string;
+   int retval;
+
+   retval=CTA_Handle_Check((CTA_Handle) hstring,CTA_STRING);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_string handle");
+       return retval;
+   }
+
+   retval=CTA_Handle_GetData((CTA_Handle) hstring,(void**) &string);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+
+   strncpy(str,string->str,string->len+1);
+
+   return CTA_OK;
+}
+
+int CTA_String_Equals_Char(CTA_String hstring, const char *str0)
+{
+   char *str;
+   int retval, len;
+
+   retval = CTA_String_GetLength(hstring,&len);
+   str=CTA_Malloc((len+1)*sizeof(char));
+   retval = CTA_String_Get(hstring, str);
+   retval= CTA_FALSE;
+   if (strcmp(str,str0)==0) retval = CTA_TRUE;
+   return retval;
+}
+
+
+int CTA_Strings_Equal(CTA_String hstring1, CTA_String hstring2)
+{
+   char *str1, *str2;
+   int retval, len1, len2;
+
+   retval = CTA_String_GetLength(hstring1,&len1);
+   str1=CTA_Malloc((len1+1)*sizeof(char));
+   retval = CTA_String_Get(hstring1, str1);
+   retval = CTA_String_GetLength(hstring2,&len2);
+   str2=CTA_Malloc((len2+1)*sizeof(char));
+   retval = CTA_String_Get(hstring2, str2);
+   retval= CTA_FALSE;
+   if (strcmp(str1,str2)==0) retval = CTA_TRUE;
+   return retval;
+}
+
+
+
+
+#undef METHOD
+#define METHOD "GetValue"
+int CTA_String_GetValue(CTA_String hstring, void *value, CTA_Datatype datatype)
+{
+   CTAI_String *string;
+   int retval;
+
+   retval=CTA_Handle_Check((CTA_Handle) hstring, CTA_STRING);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_time handle");
+       return retval;
+   }
+
+   CTA_Handle_GetData((CTA_Handle) hstring,(void**) &string);
+   switch (datatype) {
+   case CTA_INTEGER:
+      *(int*)value = atoi(string->str);
+      return CTA_OK;
+   case CTA_REAL:
+      *(float*)value = (float)atof(string->str);
+      return CTA_OK;
+   case CTA_DOUBLE:
+      *(double*)value = atof(string->str);
+      return CTA_OK;
+   case CTA_STRING:
+      return CTA_String_Get(hstring, (char*)value);
+   }
+   return CTA_INCOMPATIBLE_HANDLE;
+}
+
+#undef METHOD
+#define METHOD "Conc"
+int CTA_String_Conc(CTA_String istring, CTA_String xstring)
+{
+   int retval, len1, len2, newlen;
+   char *hlpstr;
+   CTAI_String *string1;
+   CTAI_String *string2;
+
+   retval=CTA_Handle_Check((CTA_Handle) istring, CTA_STRING);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_string handle");
+       return retval;
+   }
+   retval=CTA_Handle_Check((CTA_Handle) xstring, CTA_STRING);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_string handle");
+       return retval;
+   }
+
+   retval=CTA_Handle_GetData((CTA_Handle) istring, (void**) &string1);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+   retval=CTA_Handle_GetData((CTA_Handle) xstring, (void**) &string2);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+
+   len1=string1->len;
+   len2=string2->len;
+   newlen=len1+len2;
+
+   hlpstr=CTA_Malloc((newlen+1)*sizeof(char));
+   strncpy(hlpstr,     string1->str,len1);
+   strncpy(hlpstr+len1,string2->str,len2+1);
+
+   string1->len=newlen;
+   free(string1->str);
+   string1->str=hlpstr;
+
+   return CTA_OK;
+}
+
+#undef METHOD
+#define METHOD "GetLength"
+int CTA_String_GetLength(CTA_String hstring, int *len)
+{
+   CTAI_String *string;
+   int retval;
+
+   retval=CTA_Handle_Check((CTA_Handle) hstring,CTA_STRING);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_string handle");
+       return retval;
+   }
+   retval=CTA_Handle_GetData((CTA_Handle) hstring,(void**) &string);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+
+   *len = string->len;
+
+   return CTA_OK;
+}
+
+/** \brief Get a pointer to the contents of the string (INTERNAL USE)
+ *
+ *  \param hstring  I handle of the string
+ *  \return Pointer to the string contents
+ */
+char *CTAI_String_GetPtr(CTA_String hstring)
+{
+   CTAI_String *str;
+   CTA_Handle_GetData((CTA_Handle) hstring,(void**) &str);
+   return str->str;
+}
+
+/** \brief Allocate a string, created from a COSTA string (INTERNAL USE).
+ *
+ *  \param  hstr  I  COSTA string
+ *  \return          C string allocated by this function
+ */
+char *CTAI_String_Allocate(CTA_String hstr) {
+   int   len;
+   char* str;
+
+   str=NULL;
+   if (CTA_OK == CTA_String_GetLength(hstr, &len)){
+      str = (char *)CTA_Malloc((1 + len) * sizeof(char));
+      CTA_String_Get(hstr, str);
+   }
+   return str;
+}
+
+/** \brief Create a duplication of a COSTA string
+ *
+ *  \param hfrom  I  handle of string to copy
+ *  \param hto    O  handle of created string
+ *  \return CTA_OK if successful
+ */
+#undef METHOD
+#define METHOD "Duplicate"
+int CTA_String_Duplicate(CTA_String hfrom, CTA_String *hto) {
+   int retval;
+
+   retval = CTA_String_Create(hto);
+   if (retval != CTA_OK) {
+       CTA_WRITE_ERROR("Cannot create string");
+       return retval;
+   }
+   return CTA_String_Set(*hto, CTAI_String_GetPtr(hfrom));
+}
+
+#undef METHOD
+#define METHOD "Export"
+int CTA_String_Export(CTA_String hstring, CTA_Handle usrdata)
+{
+   int retval;
+   BOOL packout;
+
+   packout = (CTA_Handle_Check(usrdata,CTA_PACK)==CTA_OK);
+   if (packout) {
+      CTAI_String *str;
+      CTA_Handle_GetData((CTA_Handle) hstring,(void**) &str);
+
+      /* pack length and string itself */
+      retval=CTA_Pack_Add(usrdata,&str->len,sizeof(int));
+      if (retval!=CTA_OK) {
+         CTA_WRITE_ERROR("Error in CTA_Pack_Add ");
+         return retval;
+      }
+
+      retval=CTA_Pack_Add(usrdata,str->str,(str->len+1)*sizeof(char));
+      if (retval!=CTA_OK) {
+         CTA_WRITE_ERROR("Error in CTA_Pack_Add ");
+         return retval;
+      }
+
+   } else {
+      return CTA_FORMAT_NOT_SUPPORTED;
+   }
+   return CTA_OK;
+}
+
+#undef METHOD
+#define METHOD "Import"
+int CTA_String_Import(CTA_String hstring, CTA_Handle usrdata)
+{
+   int retval;
+   BOOL packout;
+
+   packout = (CTA_Handle_Check(usrdata,CTA_PACK)==CTA_OK);
+   if (packout) {
+      CTAI_String *str;
+      CTA_Handle_GetData((CTA_Handle) hstring,(void**) &str);
+
+      /* unpack length and string itself */
+      retval=CTA_Pack_Get(usrdata,&(str->len),sizeof(int));
+      if (retval!=CTA_OK) {
+         CTA_WRITE_ERROR("Error in CTA_Pack_Get ");
+         return retval;
+      }
+      str->str=realloc(str->str,sizeof(int)*(str->len+1));
+      retval=CTA_Pack_Get(usrdata,str->str,(str->len+1)*sizeof(char));
+      if (retval!=CTA_OK) {
+         CTA_WRITE_ERROR("Error in CTA_Pack_Get ");
+         return retval;
+      }
+
+   } else {
+      return CTA_FORMAT_NOT_SUPPORTED;
+   }
+   return CTA_OK;
+}
+
+
+/* Interfacing with Fortran */
+
+CTAEXPORT void CTA_STRING_CREATE_F77(int *hstring, int *ierr){
+   *ierr=CTA_String_Create((CTA_String*) hstring);
+}
+
+CTAEXPORT void CTA_STRING_FREE_F77(int *hstring, int *ierr){
+   *ierr=CTA_String_Free((CTA_String*) hstring);
+}
+
+CTAEXPORT void CTA_STRING_GETLENGTH_F77(int *hstring,int *len, int *ierr){
+   *ierr=CTA_String_GetLength((CTA_String) *hstring, len);
+}
+
+CTAEXPORT void CTA_STRING_SET_F77(int *hstring,char *str, int *ierr, int len_str){
+   char *c_str;
+
+   /* create a c-string equivalent to name */
+   c_str=CTA_Malloc((len_str+1)*sizeof(char));
+   CTA_fstr2cstr(str,c_str,len_str);
+
+   *ierr=CTA_String_Set((CTA_String) *hstring,c_str);
+
+   free(c_str);
+}
+
+
+CTAEXPORT void CTA_STRING_GET_F77(int *hstring,char *str, int *ierr, int len_str){
+   char *c_str;
+
+   c_str=CTA_Malloc((len_str+1)*sizeof(char));
+   *ierr=CTA_String_Get((CTA_String) *hstring,c_str);
+   if (*ierr!=CTA_OK) return;
+   *ierr=CTA_cstr2fstr(c_str,str, len_str);
+   free(c_str);
+
+}
+
+CTAEXPORT void CTA_STRING_GETVALUE_F77(int *hstring,void *value,int *datatype, int *ierr){
+   *ierr=CTA_String_GetValue((CTA_String) *hstring, value, *datatype);
+}
+
+CTAEXPORT void CTA_STRING_CONC_F77(int *istring, int *xstring, int *ierr){
+   *ierr=CTA_String_Conc((CTA_String) *istring, (CTA_String) *xstring);
+}
+
+
+CTAEXPORT void CTA_STRING_EXPORT_F77(int *hstring, int *usrdata, int *ierr){
+   *ierr=CTA_String_Export((CTA_String) *hstring, (CTA_Handle) *usrdata);
+}
+
+CTAEXPORT void CTA_STRING_IMPORT_F77(int *hstring, int *usrdata, int *ierr){
+   *ierr=CTA_String_Import((CTA_String) *hstring, (CTA_Handle) *usrdata);
+}
+
+CTAEXPORT void CTA_STRING_DUPLICATE_F77(int *hfrom, int *hto, int *ierr){
+   *ierr=CTA_String_Duplicate((CTA_String) *hfrom, (CTA_Handle*) hto);
+}
+
+
diff --git a/costa/native/cta/src/cta_time.c b/costa/native/cta/src/cta_time.c
new file mode 100644
index 000000000..80362174a
--- /dev/null
+++ b/costa/native/cta/src/cta_time.c
@@ -0,0 +1,634 @@
+/*
+$URL: https://repos.deltares.nl/repos/openda/public/trunk/core/native/src/cta/cta_time.c $
+$Revision: 2751 $, $Date: 2011-09-09 08:58:46 +0200 (Fri, 09 Sep 2011) $
+
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+#include <stdio.h>
+#include <math.h>
+#include "cta_mem.h"
+#include "f_cta_utils.h"
+#include "ctai.h"
+#include "cta_time.h"
+#include "cta_pack.h"
+#include "cta_errors.h"
+#include "cta_file.h"
+#include "cta_message.h"
+
+#define CTA_TIME_CREATE_F77  F77_CALL(cta_time_create,CTA_TIME_CREATE)
+#define CTA_TIME_FREE_F77    F77_CALL(cta_time_free,CTA_TIME_FREE)
+#define CTA_TIME_SETSPAN_F77  F77_CALL(cta_time_setspan,CTA_TIME_SETSPAN)
+#define CTA_TIME_GETSPAN_F77  F77_CALL(cta_time_getspan,CTA_TIME_GETSPAN)
+#define CTA_TIME_SETSTEP_F77  F77_CALL(cta_time_setstep,CTA_TIME_SETSTEP)
+#define CTA_TIME_GETSTEP_F77  F77_CALL(cta_time_getstep,CTA_TIME_GETSTEP)
+#define CTA_TIME_GETSTEP_F77  F77_CALL(cta_time_getstep,CTA_TIME_GETSTEP)
+#define CTA_TIME_INSPAN_F77   F77_CALL(cta_time_inspan,CTA_TIME_INSPAN)
+#define CTA_TIME_ISSTEP_F77   F77_CALL(cta_time_isstep,CTA_TIME_ISSTEP)
+#define CTA_TIME_COPY_F77        F77_CALL(cta_time_copy,CTA_TIME_COPY)
+#define CTA_TIME_COUNTSTEPS_F77  F77_CALL(cta_time_countsteps,CTA_TIME_COUNTSTEPS)
+#define CTA_TIME_GETTIMESTEP_F77 F77_CALL(cta_time_gettimestep,CTA_TIME_GETTIMESTEP)
+#define CTA_TIME_EXPORT_F77 F77_CALL(cta_time_export,CTA_TIME_EXPORT)
+#define CTA_TIME_IMPORT_F77 F77_CALL(cta_time_import,CTA_TIME_IMPORT)
+#define CTA_TIME_ISSPAN_F77 F77_CALL(cta_time_isspan,CTA_TIME_ISSPAN)
+
+
+#define CLASSNAME "CTA_Time"
+
+/* Struct holding all data associated to an COSTA time span */
+
+typedef struct {
+BOOL isspan;
+double t1;
+double t2;
+double step;
+} CTAI_Time;
+
+
+#undef METHOD
+#define METHOD "Create"
+int CTA_Time_Create(CTA_Time *htime){
+
+   CTAI_Time *time;
+   int retval;
+
+   /* allocate memory for new time object */
+   time=CTA_Malloc(sizeof(CTAI_Time));
+   time->isspan=FALSE;
+   time->t1=0.0;
+   time->t2=0.0;
+   time->step=1.0;
+
+   /* Allocate new handle and return error when unsuccesfull */
+   retval=CTA_Handle_Create("time",CTA_TIME,time,htime);
+   if (retval) {
+	   CTA_WRITE_ERROR("Cannot create time handle");
+	   return retval;
+   }
+   return CTA_OK;
+}
+
+#undef METHOD
+#define METHOD "Free"
+int CTA_Time_Free(CTA_Time *htime){
+
+   CTAI_Time *time;
+   int retval;
+
+   if (*htime==CTA_NULL) return CTA_OK;
+
+   retval=CTA_Handle_Check((CTA_Handle) *htime,CTA_TIME);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Handle is not a cta_time handle");
+       return retval;
+   }
+
+   retval=CTA_Handle_GetData((CTA_Handle) *htime,(void**) &time);
+   if (retval!=CTA_OK) {
+	   	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+   free(time);
+   retval=CTA_Handle_Free(htime);
+   return retval;
+}
+
+#undef METHOD
+#define METHOD "IsSpan"
+int CTA_Time_IsSpan(CTA_Time htime, int *isspan){
+
+   CTAI_Time *time;
+   int retval;
+
+   retval=CTA_Handle_Check((CTA_Handle) htime,CTA_TIME);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_Time handle");
+	   return retval;
+   }
+   retval=CTA_Handle_GetData((CTA_Handle) htime,(void**) &time);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+
+
+    if (time->isspan){
+       *isspan=CTA_TRUE;
+    } else {
+       *isspan=CTA_FALSE;
+    }
+   return CTA_OK;
+}
+
+#undef METHOD
+#define METHOD "SetSpan"
+int CTA_Time_SetSpan(CTA_Time htime,double tstart, double tend){
+
+   CTAI_Time *time;
+   int retval;
+
+   retval=CTA_Handle_Check((CTA_Handle) htime,CTA_TIME);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_time handle");
+       return retval;
+   }
+   retval=CTA_Handle_GetData((CTA_Handle) htime,(void**) &time);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+
+   time->t1     = tstart;
+   time->t2     = tend;
+   time->isspan = TRUE;
+
+   if (tstart==tend){
+      time->isspan = FALSE;
+   }
+
+
+   return CTA_OK;
+}
+
+#undef METHOD
+#define METHOD "GetSpan"
+int CTA_Time_GetSpan(CTA_Time htime,double *tstart, double *tend){
+
+   CTAI_Time *time;
+   int retval;
+
+   retval=CTA_Handle_Check((CTA_Handle) htime,CTA_TIME);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Handle is not a cta_time handle");	   
+	   return retval;
+   }
+   retval=CTA_Handle_GetData((CTA_Handle) htime,(void**) &time);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+   *tstart=time->t1;
+   *tend=time->t2;
+
+   return CTA_OK;
+}
+
+#undef METHOD
+#define METHOD "SetStep"
+int CTA_Time_SetStep(CTA_Time htime, double tstep){
+
+   CTAI_Time *time;
+   int retval;
+
+   retval=CTA_Handle_Check((CTA_Handle) htime,CTA_TIME);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_time handle");
+       return retval;
+   }
+
+   retval=CTA_Handle_GetData((CTA_Handle) htime,(void**) &time);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+
+   time->step=tstep;
+   return CTA_OK;
+}
+
+#undef METHOD
+#define METHOD "CountSteps"
+int CTA_Time_CountSteps(CTA_Time htime, int *nsteps){
+
+   CTAI_Time *time;
+   int retval;
+
+   retval=CTA_Handle_Check((CTA_Handle) htime,CTA_TIME);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_time handle");
+       return retval;
+   }
+
+   retval=CTA_Handle_GetData((CTA_Handle) htime,(void**) &time);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+
+   if (time->isspan) {
+      *nsteps = (int) ((time->t2-time->t1)/time->step +0.5);
+   } else {
+      *nsteps = 0;
+   }
+   return CTA_OK;
+}
+#undef METHOD
+#define METHOD "GetTimeStep"
+int CTA_Time_GetTimeStep(CTA_Time htime, int istep, CTA_Time htime_step){
+   CTAI_Time *time1, *time2;
+   int retval;
+
+   retval=CTA_Handle_Check((CTA_Handle) htime,CTA_TIME);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_time handle");
+       return retval;
+   }
+   retval=CTA_Handle_GetData((CTA_Handle) htime,(void**) &time1);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+   retval=CTA_Handle_Check((CTA_Handle) htime_step,CTA_TIME);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_time handle");
+       return retval;
+   }
+   retval=CTA_Handle_GetData((CTA_Handle) htime_step,(void**) &time2);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+
+   time2->step=time1->step;
+   if (time1->isspan) {
+      time2->t1=time1->t1+(double)(istep-1)*time1->step;
+      time2->t2=time1->t1+(double)(istep)  *time1->step;
+      if (time2->t2>time1->t2) time2->t2=time1->t2;
+   } else {
+      time2->step=time1->step;
+      time2->t1= 0.0;
+      time2->t2=-1.0;
+   }
+   return CTA_OK;
+
+}
+
+#undef METHOD
+#define METHOD "GetStep"
+int CTA_Time_GetStep(CTA_Time htime, double *tstep){
+
+   CTAI_Time *time;
+   int retval;
+
+   retval=CTA_Handle_Check((CTA_Handle) htime,CTA_TIME);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_time handle");
+       return retval;
+   }
+   retval=CTA_Handle_GetData((CTA_Handle) htime,(void**) &time);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+
+   if (time->isspan) {
+      *tstep = time->step;
+   } else {
+      *tstep = 0;
+   }
+   return CTA_OK;
+}
+
+#undef METHOD
+#define METHOD "InSpan"
+/* is timesub inside time */
+int CTA_Time_InSpan(CTA_Time htimesub, CTA_Time htime,  BOOL *inspan){
+
+   CTAI_Time *time ;
+   CTAI_Time *timesub ;
+   int retval;
+   double tstartsub, tendsub, tstart, tend;
+
+   retval=CTA_Handle_Check((CTA_Handle) htimesub,CTA_TIME);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_time handle");
+       return retval;
+   }
+   retval=CTA_Handle_Check((CTA_Handle) htime,CTA_TIME);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_time handle");
+       return retval;
+   }
+   retval=CTA_Handle_GetData((CTA_Handle) htime,(void**) &time);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+   retval=CTA_Handle_GetData((CTA_Handle) htimesub,(void**) &timesub);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+
+   tstartsub = timesub->t1;
+   tendsub = timesub->t2;
+   tstart = time->t1;
+   tend = time->t2;
+
+   if (tstartsub>tendsub || tstart> tend){
+      *inspan=FALSE;
+   }
+   else {
+      *inspan = (tstartsub >= tstart && tendsub <= tend) ;
+   }
+   return CTA_OK;
+}
+
+#undef METHOD
+#define METHOD "IsStep"
+ /* is timesub inside time */
+int CTA_Time_IsStep(CTA_Time htime, double t,  BOOL *isstep){
+
+   CTAI_Time *time ;
+   int retval, ifrac;
+   double eps, frac, diff;
+
+   retval=CTA_Handle_Check((CTA_Handle) htime,CTA_TIME);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_time handle");
+       return retval;
+   }
+   retval=CTA_Handle_GetData((CTA_Handle) htime,(void**) &time);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+
+   /* Don't know what is really nice just try something */
+   eps=M_EPS*100.0*MAX(fabs(time->t1),fabs(time->t2))+M_EPS;
+
+   *isstep=FALSE;
+   if ((time->t1-eps)<t && t< (time->t2+eps)){
+      frac=(t-time->t1)/time->step;
+      ifrac=(int) (frac +0.5);
+      diff=fabs(((double) ifrac) - frac);
+      if ( diff<eps) {
+        *isstep=TRUE;
+      }
+   }
+   return CTA_OK;
+}
+
+#undef METHOD
+#define METHOD "Copy"
+int CTA_Time_Copy(CTA_Time hfrom, CTA_Time hto) {
+   int retval;
+   double tstart, tend, tstep;
+   CTAI_Time *to;
+
+   retval=CTA_Handle_Check((CTA_Handle) hfrom,CTA_TIME);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_time handle");
+       return retval;
+   }
+
+   retval=CTA_Handle_Check((CTA_Handle) hto,CTA_TIME);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_time handle");
+       return retval;
+   }
+
+   retval = CTA_Time_GetSpan(hfrom, &tstart, &tend);
+   if (retval != CTA_OK) {
+       CTA_WRITE_ERROR("Cannot get timespan");
+       return retval;
+   }
+
+   retval = CTA_Time_GetStep(hfrom, &tstep);
+   if (retval != CTA_OK) {
+       CTA_WRITE_ERROR("Cannot get timespan");
+       return retval;
+   }
+
+   retval=CTA_Handle_GetData((CTA_Handle) hto,(void**) &to);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+
+   to->t1     = tstart;
+   to->t2     = tend;
+   to->step   = tstep;
+   to->isspan = TRUE;
+   retval = CTA_OK;
+
+   return retval;
+     }
+
+#undef METHOD
+#define METHOD "Export"
+int CTA_Time_Export(CTA_Time htime, CTA_Handle hexport){
+   CTA_Datatype datatype;
+   int retval;
+   FILE *file;          //File pointer
+   CTAI_Time *time;
+
+   /* Check handle */
+   retval=CTA_Handle_Check((CTA_Handle) htime,CTA_TIME);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_time handle");
+       return retval;
+   }
+
+   /* Get data */
+   retval=CTA_Handle_GetData((CTA_Handle) htime,(void**) &time);
+   if (retval!=CTA_OK) return retval;
+
+   /* Get type of handle of hexport */
+   retval=CTA_Handle_GetDatatype(hexport, &datatype);
+   if (retval != CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+
+   /* perform the export depending on hexport */
+   if (datatype==CTA_PACK){
+      retval=CTA_Pack_Add(hexport,time,sizeof(CTAI_Time));
+   }
+   else if (datatype==CTA_FILE){
+      retval=CTA_File_Get(hexport,&file);
+      if (retval != CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot get file");
+	   return retval;
+   }
+      fprintf(file,"[%lg %lg %lg %d];\n", time->t1, time->t2, time->step, time->isspan);
+   }
+   else {
+      return CTA_FORMAT_NOT_SUPPORTED;
+   }
+   return CTA_OK;
+}
+
+#undef METHOD
+#define METHOD "Import"
+int CTA_Time_Import(CTA_Time htime, CTA_Handle himport){
+   CTA_Datatype datatype;
+   int retval;
+   CTAI_Time *time;
+
+   /* Check handle */
+   retval=CTA_Handle_Check((CTA_Handle) htime,CTA_TIME);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_time handle");
+       return retval;
+   }
+
+   /* Get data */
+   retval=CTA_Handle_GetData((CTA_Handle) htime,(void**) &time);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+
+   /* Get type of handle of himport */
+   retval=CTA_Handle_GetDatatype(himport, &datatype);
+   if (retval != CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle datatype");
+	   return retval;
+   }
+
+   /* perform the export depending on hexport */
+   if (datatype==CTA_PACK){
+      retval=CTA_Pack_Get(himport,time,sizeof(CTAI_Time));
+   }
+   else if (datatype==CTA_FILE){
+      return CTA_FORMAT_NOT_SUPPORTED;
+   }
+   else {
+      return CTA_FORMAT_NOT_SUPPORTED;
+   }
+   return CTA_OK;
+}
+
+
+void ctai_gregor(int ijuld, int *iy, int *im, int *id){
+/* id      o   gregorian day   */
+/* ijuld   i   julian day      */
+/* im      o   gregorian month */
+/* iy      o   gregorian year  */
+
+  int l,n;
+//    put julian day in auxiliary variable
+      l  = ijuld + 68569;
+      n  = 4*l / 146097;
+      l  = l - ( 146097*n + 3 ) / 4;
+      *iy = 4000 * ( l+1 ) / 1461001;
+      l  = l - 1461 * *iy / 4 + 31;
+      *im = 80 * l / 2447;
+
+//     calculate gregorian day
+      *id = l - 2447 * *im / 80;
+      l  = *im / 11;
+
+//     calculate gregorian month
+      *im = *im + 2 - 12 * l;
+
+//     calculate gregorian year
+      *iy = 100 * ( n- 49 ) + *iy + l;
+}
+
+int ctai_julian ( int iyear, int imonth, int iday ){
+
+/*     iday    i   gregorian day
+     imonth  i   gregorian month
+     iyear   i   gregorian year
+     julian      name of this function
+*/
+      int ijuld, imon1;
+/*
+     id      gregorian day
+     ijuld   julian daynumber
+     im      gregorian month
+     imon1   auxiliary variable
+     iy      gregorian year
+*/
+
+      imon1 = (imonth-14)/12;
+
+//    calculate julian day number
+
+      ijuld = iday - 32075 + 1461 * ( iyear + 4800 + imon1 ) / 4
+               + 367 * ( imonth - 2 - imon1 * 12 ) / 12
+               - 3 * ( ( iyear + 4900 + imon1 ) / 100 ) / 4;
+      return ijuld;
+}
+
+/* Interfacing with Fortran */
+
+CTAEXPORT void CTA_TIME_CREATE_F77(int *htime, int *ierr){
+   *ierr=CTA_Time_Create((CTA_Time *) htime);
+}
+
+CTAEXPORT void CTA_TIME_FREE_F77(int *htime, int *ierr){
+   *ierr=CTA_Time_Free((CTA_Time *) htime);
+}
+CTAEXPORT void CTA_TIME_SETSPAN_F77(int *htime,double *tstart, double *tend, int *ierr){
+   *ierr=CTA_Time_SetSpan((CTA_Time) *htime,*tstart,*tend);
+}
+CTAEXPORT void CTA_TIME_GETSPAN_F77(int *htime,double *tstart, double *tend, int *ierr){
+   *ierr=CTA_Time_GetSpan((CTA_Time) *htime,tstart, tend);
+}
+CTAEXPORT void CTA_TIME_SETSTEP_F77(int *htime,double *tstep, int *ierr){
+   *ierr=CTA_Time_SetStep((CTA_Time) *htime,*tstep);
+}
+CTAEXPORT void CTA_TIME_GETSTEP_F77(int *htime,double *tstep, int *ierr){
+   *ierr=CTA_Time_GetStep((CTA_Time) *htime,tstep);
+}
+
+CTAEXPORT void CTA_TIME_INSPAN_F77(int *htimesub, int *htime,  int *inspan, int *ierr){
+   *ierr=CTA_Time_InSpan((CTA_Time) *htimesub, (CTA_Time) *htime,  (BOOL*) inspan);
+}
+
+
+CTAEXPORT void CTA_TIME_ISSTEP_F77(int *htime, double *t,  int *isstep, int *ierr){
+   *ierr=CTA_Time_IsStep((CTA_Time) *htime, *t,  (BOOL*) isstep);
+}
+
+CTAEXPORT void CTA_TIME_COPY_F77(int *hfrom, int *hto, int *ierr){
+  *ierr=CTA_Time_Copy((CTA_Time) *hfrom, (CTA_Time) *hto);
+}
+
+CTAEXPORT void CTA_TIME_COUNTSTEPS_F77(int *htime, int *nsteps, int *ierr){
+   *ierr=CTA_Time_CountSteps((CTA_Time) *htime, nsteps);
+}
+
+
+CTAEXPORT void CTA_TIME_GETTIMESTEP_F77(int *htime, int *istep, int *htime_step,
+                              int *ierr){
+   *ierr=CTA_Time_GetTimeStep((CTA_Time) *htime, *istep,
+                              (CTA_Time) *htime_step);
+}
+
+
+CTAEXPORT void CTA_TIME_EXPORT_F77(int *htime, int *hexport, int *ierr){
+   *ierr=CTA_Time_Export((CTA_Time) *htime, (CTA_Handle) *hexport);
+}
+
+
+CTAEXPORT void CTA_TIME_IMPORT_F77(int *htime, int *himport, int *ierr){
+   *ierr=CTA_Time_Import((CTA_Time) *htime, (CTA_Handle) *himport);
+}
+
+
+
+CTAEXPORT void CTA_TIME_ISSPAN_F77(int *htime, int *isspan, int *ierr){
+   *ierr=CTA_Time_IsSpan((CTA_Time) *htime, isspan);
+}
+
+
+
diff --git a/costa/native/cta/src/cta_tree.c b/costa/native/cta/src/cta_tree.c
new file mode 100644
index 000000000..63ac6d365
--- /dev/null
+++ b/costa/native/cta/src/cta_tree.c
@@ -0,0 +1,771 @@
+/*
+$URL: https://repos.deltares.nl/repos/openda/public/trunk/core/native/src/cta/cta_tree.c $
+$Revision: 3361 $, $Date: 2012-07-04 16:52:30 +0200 (Wed, 04 Jul 2012) $
+
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+#include <stdio.h>
+#include <string.h>
+#include "cta_mem.h"
+#include "f_cta_utils.h"
+#include "cta_defaults.h"
+#include "cta_handles.h"
+#include "cta_time.h"
+#include "cta_vector.h"
+#include "cta_errors.h"
+#include "cta_tree.h"
+#include "ctai_datatypes.h"
+#include "ctai_handles.h"
+#include "ctai_string.h"
+#include "ctai_vector.h"
+#include "ctai_sobs.h"
+#include "cta_message.h"
+
+#define SIGNAL_COUNT (-222)
+
+#define CTA_TREE_CREATE_F77            F77_CALL(cta_tree_create,CTA_TREE_CREATE)
+#define CTA_TREE_FREE_F77              F77_CALL(cta_tree_free,CTA_TREE_FREE)
+#define CTA_TREE_ADDHANDLE_F77        F77_CALL(cta_tree_addhandle,CTA_TREE_ADDHANDLE)
+#define CTA_TREE_COUNT_HANDLES_F77     F77_CALL(cta_tree_counthandles,CTA_TREE_COUNTHANDLES)
+#define CTA_TREE_COUNT_HANDLESSTR_F77  F77_CALL(cta_tree_counthandlesstr,CTA_TREE_COUNTHANDLESSTR)
+#define CTA_TREE_GET_HANDLESTR_F77     F77_CALL(cta_tree_gethandlestr,CTA_TREE_GETHANDLESTR)
+#define CTA_TREE_GET_VALUESTR_F77      F77_CALL(cta_tree_getvaluestr,CTA_TREE_GETVALUESTR)
+#define CTA_TREE_GET_HANDLE_F77        F77_CALL(cta_tree_gethandle,CTA_TREE_GETHANDLE)
+#define CTA_TREE_GET_VALUE_F77         F77_CALL(cta_tree_getvalue,CTA_TREE_GETVALUE)
+#define CTA_TREE_COUNT_ITEMS_F77       F77_CALL(cta_tree_countitems,CTA_TREE_COUNTITEMS)
+#define CTA_TREE_GET_ITEM_F77          F77_CALL(cta_tree_getitem,CTA_TREE_GETITEM)
+#define CTA_TREE_GET_ITEMVALUE_F77     F77_CALL(cta_tree_getitemvalue,CTA_TREE_GETITEMVALUE)
+#define CTA_TREE_PRINT_F77             F77_CALL(cta_tree_print,CTA_TREE_PRINT)
+
+#define CLASSNAME "CTA_Tree"
+
+
+/* Struct holding all data associated to an COSTA tree */
+typedef struct {
+   CTA_Vector hvector;
+} CTAI_Tree;
+
+
+/* Create a COSTA tree */
+int CTA_Tree_Create(CTA_Tree *htree){
+
+   CTAI_Tree *tree;
+   int retval;
+
+   /* Allocate memory for new tree object */
+   tree=CTA_Malloc(sizeof(CTAI_Tree));
+   retval = CTA_Vector_Create(CTA_DEFAULT_VECTOR, 0, CTA_HANDLE, CTA_HANDLE, &(tree->hvector));
+
+   /* Allocate new handle and return error when unsuccessful */
+   retval=CTA_Handle_Create("tree",CTA_TREE,tree,htree);
+   return retval;
+}
+
+
+/* Release a COSTA tree */
+#undef METHOD
+#define METHOD "Free"  
+int CTA_Tree_Free(CTA_Tree *htree)
+{
+   CTAI_Tree *tree;
+   int retval;
+
+   retval=CTA_Handle_Check((CTA_Handle) *htree,CTA_TREE);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_tree handle");
+       return retval;
+   }
+   retval=CTA_Handle_GetData((CTA_Handle) *htree,(void**) &tree);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+
+   /* Remove the actual tree */
+   retval = CTA_Vector_Free(&(tree->hvector));
+   free(tree);
+   retval=CTA_Handle_Free(htree);
+   return retval;
+}
+
+
+/* Add a COSTA handle to the COSTA tree */
+#undef METHOD
+#define METHOD "AddHandle" 
+int CTA_Tree_AddHandle(CTA_Tree htree, const char *name, CTA_Handle hitem)
+{
+   CTAI_Tree *tree;
+   int retval;
+
+   retval=CTA_Handle_Check((CTA_Handle) htree, CTA_TREE);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_tree handle");
+       return retval;
+   }
+   retval=CTA_Handle_GetData((CTA_Handle) htree,(void**) &tree);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+
+   retval = CTAI_Handle_SetName(hitem, name);
+   if (retval != CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot set name");
+	   return retval;
+   }
+
+   return CTA_Vector_AppendVal(tree->hvector, &hitem, CTA_HANDLE);
+}
+
+
+/* Get the next name from a path */
+/* A path must be separated by / or \ */
+static void CTAI_Tree_GetNextName(const char *path, char *name, const char **psplitpos) {
+   const char *splitpos;
+   const char *slashpos;
+   const char *bslashpos;
+   const char *srchpath;
+
+   /* If the first character of the path is a separator, skip it */
+   srchpath = path;
+   if (*srchpath == '/' || *srchpath == '\\') {
+      ++srchpath;
+   }
+
+   /* Find first path component */
+   slashpos = strchr(srchpath, '/');
+   bslashpos = strchr(srchpath, '\\');
+   if (slashpos && !bslashpos) {
+      splitpos = slashpos;
+   } else if (bslashpos && !slashpos) {
+      splitpos = bslashpos;
+   } else if (slashpos && bslashpos) {
+      splitpos = (slashpos < bslashpos ? slashpos : bslashpos);
+   } else{
+      splitpos = NULL;
+   }
+
+   if (splitpos) {
+      /* Get the path part upto the split position */
+      strncpy(name, srchpath, splitpos - srchpath);
+      name[splitpos - srchpath] = '\0';
+   } else {
+      /* No split position, take the whole path */
+      strcpy(name, srchpath);
+   }
+
+   *psplitpos = splitpos;
+}
+
+
+/* Find a COSTA handle in the COSTA tree */
+/* A path must be separated by / or \ */
+/* Returns the handle, or CTA_NULL if not found */
+static CTA_Handle CTAI_Tree_FindHandle(CTA_Tree htree, const char *path, BOOL skiproot)
+{
+   const CTAI_Tree *tree;
+   const char *splitpos;
+   char *name;
+   CTA_Handle h;
+   CTA_Handle hdefault;
+   CTA_Datatype dt;
+   int retval;
+
+   /* If path is NULL, return CTA_NULL */
+   if (!path) return CTA_NULL;
+
+   /* Get vector */
+   retval=CTA_Handle_GetData((CTA_Handle) htree,(void**) &tree);
+   if (retval!=CTA_OK) return retval;
+
+   /* Allocate and get name */
+   name = (char*)CTA_Malloc(1 + strlen(path));
+   CTAI_Tree_GetNextName(path, name, &splitpos);
+
+   /* If this name is the root, skip it */
+   if (skiproot && splitpos && (*(splitpos + 1) != '\0') &&
+       0 == strcmp(CTAI_Handle_GetName(htree), name)) {
+      CTAI_Tree_GetNextName(splitpos + 1, name, &splitpos);
+   }
+
+   /* Find the given element on this level */
+   h = CTAI_Vector_FindHandle(tree->hvector, name);
+   free(name);
+   if (h == CTA_NULL) return h;
+
+
+   /* Check whether there is path remaining */
+   if (splitpos && (*(splitpos + 1) != '\0'))
+   {
+      /* Check whether it's a tree */
+      retval = CTA_Handle_GetDatatype(h, &dt);
+      if (retval != CTA_OK) return retval;
+      if (dt == CTA_TREE) {
+         /* It's a tree, recurse */
+         return CTAI_Tree_FindHandle(h, splitpos + 1, FALSE);
+      }
+   }
+
+   /* If the handle found is a tree with a default value,
+      return the default value */
+   retval = CTA_Handle_GetDatatype(h, &dt);
+   if (retval != CTA_OK) return retval;
+   if (dt == CTA_TREE) {
+      /* It's a tree, try to find a default value */
+      hdefault = CTAI_Tree_FindHandle(h, "", FALSE);
+      if (hdefault != CTA_NULL) {
+         h = hdefault;
+      }
+   }
+
+   /* Return the handle found */
+   return h;
+}
+
+
+
+#undef METHOD
+#define METHOD "GetHandleStr"
+int CTA_Tree_GetHandleStr(CTA_Tree htree, char* str, CTA_Handle *hitem){
+   CTA_String hstr;
+   int retval;
+
+   retval=CTA_String_Create(&hstr);
+   if (retval != CTA_OK) {
+       CTA_WRITE_ERROR("Cannot create string");
+       return retval;
+   }
+   retval=CTA_String_Set(hstr,str);
+   if (retval != CTA_OK) {
+       CTA_WRITE_ERROR("Cannot set string");
+       return retval;
+   }
+   retval=CTA_Tree_GetHandle(htree, hstr, hitem);
+   if (retval != CTA_OK) {
+	   char message[1024];
+	   sprintf(message,"Cannot get tree handle: %s \n",str);
+       CTA_WRITE_INFO(message);
+       return retval;
+   }
+   retval=CTA_String_Free(&hstr);
+   if (retval != CTA_OK) {
+       CTA_WRITE_ERROR("Cannot free string");
+       return retval;
+   }
+   return CTA_OK;
+};
+
+
+#undef METHOD
+#define METHOD "GetValueStr"
+int CTA_Tree_GetValueStr(CTA_Tree htree, char* str, void *value, CTA_Datatype datatype){
+   CTA_String hstr;
+   int retval;
+   char msg[256];
+
+   retval=CTA_String_Create(&hstr);
+   if (retval != CTA_OK) {
+       CTA_WRITE_ERROR("Cannot create string");
+       return retval;
+   }
+   retval=CTA_String_Set(hstr,str);
+   if (retval != CTA_OK) {
+       CTA_WRITE_ERROR("Cannot set string");
+       return retval;
+   }
+   retval=CTA_Tree_GetValue(htree, hstr, value, datatype);
+   if (retval != CTA_OK) {
+       sprintf(msg,"Cannot get value '%s' from tree",str);    
+       CTA_WRITE_ERROR(msg);
+       return retval;
+   }
+   retval=CTA_String_Free(&hstr);
+   if (retval != CTA_OK) {
+       CTA_WRITE_ERROR("Cannot free string");
+       return retval;
+   }
+   return CTA_OK;
+};
+
+/* Find a COSTA handle in the COSTA tree */
+/* A path must be separated by / or \ */
+#undef METHOD
+#define METHOD "GetHandle"
+int CTA_Tree_GetHandle(CTA_Tree htree, CTA_String path, CTA_Handle *hitem)
+{
+   int retval;
+   const char *pstr;
+
+
+   retval=CTA_Handle_Check((CTA_Handle) htree, CTA_TREE);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_tree handle");
+       return retval;
+   }
+
+   pstr = CTAI_String_GetPtr(path);
+   if (pstr) {
+      *hitem = CTAI_Tree_FindHandle(htree, pstr, TRUE);
+      return (*hitem == CTA_NULL ? CTA_ITEM_NOT_FOUND : CTA_OK);
+   }
+   *hitem = 0;
+   return CTA_ITEM_NOT_FOUND;
+}
+
+
+/** \brief Return the value of a COSTA handle from the COSTA tree (by path)
+ *
+ *  \note In case of trees with default values, returns the default value.
+ *
+ *  \param htree    I handle of the tree instance
+ *  \param path     I path of the item, separated by / or \
+ *  \param value    O value of the COSTA item, or CTA_NULL in case of not found
+ *  \param datatype I datatype of the value specified
+ *  \return CTA_OK if successful or CTA_ITEM_NOT_FOUND in case of not found
+ */
+int CTA_Tree_GetValue(CTA_Tree htree, CTA_String path, void *value, CTA_Datatype datatype)
+{
+   int        retval;
+   CTA_Handle hitem;
+
+   retval = CTA_Tree_GetHandle(htree, path, &hitem);
+   if (retval == CTA_OK) {
+      retval = CTA_Handle_GetValue(hitem, value, datatype);
+   } else {
+      *(CTA_Handle *)value = CTA_NULL;
+   }
+   return retval;
+}
+
+/* Count the number of COSTA handles in the COSTA tree */
+/* A path must be separated by / or \ */
+static int CTAI_Tree_CountHandles(CTA_Tree htree, const char *path, BOOL skiproot)
+{
+   const CTAI_Tree *tree;
+   const CTAI_Tree *subtree;
+   const char *splitpos;
+   const char *skippos = NULL;
+   char *name;
+   CTA_Handle h;
+   CTA_Datatype dt;
+   int retval;
+   int count = 0;
+   BOOL root_skipped = FALSE;
+
+   /* If path is NULL, return CTA_NULL */
+   if (!path) return CTA_NULL;
+
+   /* Get vector */
+   retval=CTA_Handle_GetData((CTA_Handle) htree,(void**) &tree);
+   if (retval!=CTA_OK) return -1;
+
+   /* Allocate and get name */
+   name = (char*)CTA_Malloc(1 + strlen(path));
+   CTAI_Tree_GetNextName(path, name, &splitpos);
+
+   /* If this name is the root, skip it */
+   if (skiproot && splitpos && (*(splitpos + 1) != '\0') &&
+      0 == strcmp(CTAI_Handle_GetName(htree), name)) {
+      root_skipped = TRUE;
+      skippos = splitpos;
+      CTAI_Tree_GetNextName(splitpos + 1, name, &splitpos);
+   }
+
+   /* Find the given element on this level */
+   h = CTAI_Vector_FindHandle(tree->hvector, name);
+   if (h == CTA_NULL) {
+      count = -1;
+      goto return_pos;
+   }
+
+   /* Check whether there is path remaining */
+   if (splitpos && (*(splitpos + 1) != '\0'))
+   {
+      /* Check whether it's a tree */
+      retval = CTA_Handle_GetDatatype(h, &dt);
+      if (retval != CTA_OK) {
+         count = -1;
+         goto return_pos;
+      }
+      if (dt == CTA_TREE) {
+         /* It's a tree, recurse */
+         count = CTAI_Tree_CountHandles(h, splitpos + 1, FALSE);
+         if (count == SIGNAL_COUNT) {
+            retval=CTA_Handle_GetData((CTA_Handle) h,(void**) &subtree);
+            if (retval != CTA_OK) {
+               count = -1;
+               goto return_pos;
+            }
+            count = CTAI_Vector_CountHandles(subtree->hvector, splitpos + 1);
+         }
+      }
+   } else {
+      if (root_skipped) {
+         if (skippos && (*(skippos + 1) != '\0')) {
+            /* No recursion yet: count elements on root level */
+            retval=CTA_Handle_GetData((CTA_Handle)htree, (void**)&subtree);
+            if (retval != CTA_OK) {
+               count = -1;
+               goto return_pos;
+            }
+            count = CTAI_Vector_CountHandles(subtree->hvector, skippos + 1);
+         } else {
+            /* There is only one root */
+            count = 1;
+         }
+      } else {
+         /* Return SIGNAL_COUNT to signal that the count should be determined */
+         count = SIGNAL_COUNT;
+      }
+   }
+
+return_pos:
+   free(name);
+   return count;
+}
+
+
+/** \brief Counts the number of COSTA handles specified by the given path.
+ *
+ *  \param htree  I handle of the tree instance
+ *  \param path   I path of the item, separated by / or \
+ *  \param hitem  O number of items found
+ *  \return CTA_OK if successful or CTA_ITEM_NOT_FOUND in case of not found
+ */
+#undef METHOD
+#define METHOD "CountHandles" 
+int CTA_Tree_CountHandles(CTA_Tree htree, CTA_String path, int *count) {
+   int        retval;
+   const char *pstr;
+
+   retval=CTA_Handle_Check((CTA_Handle) htree, CTA_TREE);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_tree handle");
+       return retval;
+   }
+
+   pstr = CTAI_String_GetPtr(path);
+   if (pstr) {
+      *count = CTAI_Tree_CountHandles(htree, pstr, TRUE);
+      return (*count < 0 ? CTA_INTERNAL_ERROR : CTA_OK);
+   }
+   *count = -1;
+   return CTA_INTERNAL_ERROR;
+}
+
+/** \brief Counts the number of COSTA handles specified by the given path.
+ *
+ *  \param htree  I handle of the tree instance
+ *  \param path   I path of the item, separated by / or \
+ *  \param hitem  O number of items found
+ *  \return CTA_OK if successful or CTA_ITEM_NOT_FOUND in case of not found
+ */
+#undef METHOD
+#define METHOD "CountHandleStr" 
+int CTA_Tree_CountHandlesStr(CTA_Tree htree, char *path, int *count) {
+   int        retval;
+
+   retval=CTA_Handle_Check((CTA_Handle) htree, CTA_TREE);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_tree handle");
+       return retval;
+   }
+
+   if (path) {
+      *count = CTAI_Tree_CountHandles(htree, path, TRUE);
+      return (*count < 0 ? CTA_INTERNAL_ERROR : CTA_OK);
+   }
+   *count = -1;
+   return CTA_INTERNAL_ERROR;
+}
+
+
+
+/** \brief Return the number of elements on the current level of the COSTA tree
+ *
+ *  \param htree  I handle of the tree level
+ *  \param count  O number of elements on the current tree level
+ *  \return CTA_OK if successful
+ */
+#undef METHOD
+#define METHOD "CountItems" 
+int CTA_Tree_CountItems(CTA_Tree htree, int *count) {
+   const CTAI_Tree *tree;
+   int             retval;
+
+   retval=CTA_Handle_Check((CTA_Handle) htree, CTA_TREE);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("cta_handle_check in cta_tree_countItems failed.");
+       return retval;
+   }
+   retval=CTA_Handle_GetData((CTA_Handle) htree,(void**) &tree);
+   if (retval!=CTA_OK) {
+     CTA_WRITE_ERROR("cta_handle_getdata in cta_tree_countItems failed.");
+     return retval;
+   }
+   return CTA_Vector_GetSize(tree->hvector, count);
+ }
+
+
+/** \brief Returns an element on the current level of the COSTA tree
+ *
+ *  \param htree  I handle of the tree level
+ *  \param index  I index of the item to return. 1 <= index <= CTA_Tree_CountItems()
+ *  \param hitem  O handle of the item to return
+ *  \return CTA_OK if successful
+ */
+#undef METHOD
+#define METHOD "GetItem"
+int CTA_Tree_GetItem(CTA_Tree htree, int index, CTA_Handle *hitem) {
+   const CTAI_Tree *tree;
+   int             retval;
+
+   retval=CTA_Handle_Check((CTA_Handle) htree, CTA_TREE);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_tree handle");
+       return retval;
+   };
+   retval=CTA_Handle_GetData((CTA_Handle) htree,(void**) &tree);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+   return CTA_Vector_GetVal(tree->hvector, index, hitem, CTA_HANDLE);
+}
+
+
+/** \brief Return the value of a COSTA handle from the COSTA tree (by index)
+ *
+ *  \note In case of trees with default values, returns the default value.
+ *
+ *  \param htree    I handle of the tree instance
+ *  \param index    I index of the item
+ *  \param value    O value of the COSTA item, or CTA_NULL in case of not found
+ *  \param datatype I datatype of the value specified
+ *  \return CTA_OK if successful or CTA_ITEM_NOT_FOUND in case of not found
+ */
+int CTA_Tree_GetItemValue(CTA_Tree htree, int index, void *value, CTA_Datatype datatype)
+{
+   int        retval;
+   CTA_Handle hitem;
+
+   retval = CTA_Tree_GetItem(htree, index, &hitem);
+   if (retval == CTA_OK) {
+      retval = CTA_Handle_GetValue(hitem, value, datatype);
+   } else {
+      *(CTA_Handle *)value = CTA_NULL;
+   }
+   return retval;
+}
+
+
+/** \brief Print a COSTA tree to STDOUT
+*
+*  \param htree  I handle of the tree
+*  \return CTA_OK if successful
+*/
+static void ctatree_print(CTA_Handle h, int depth)
+{
+   int          i;
+   int          j;
+   int          n;
+   int          count;
+   double       t1, t2, ts;
+   CTA_Datatype dt;
+   CTAI_Tree*   tree;
+   CTA_Handle   val;
+   const char*  text;
+
+   /* Print depth */
+
+
+   for (i = 0; i < depth; ++i) {
+      printf("| ");
+   }
+
+   CTA_Handle_GetDatatype(h, &dt);
+   /* Print handle name */
+   printf("name: '%s': ", CTAI_Handle_GetName(h));
+
+   /* If this handle is a tree, handle each argument at depth + 1 */
+   switch (dt) {
+   case CTA_TREE:
+      printf("tree\n");
+
+      /* Get vector */
+      CTA_Handle_GetData((CTA_Handle) h,(void**) &tree);
+
+      // ce test of het wel vector is
+      CTA_Handle_GetDatatype(h, &dt);
+      //if (dt == CTA_TREE) printf(" inderdaad een tree %d \n",retval );
+
+      CTA_Tree_CountItems(h, &count) ;
+
+      /* Get number of elements in the current tree */
+      CTA_Vector_GetSize(tree->hvector, &n);
+
+      /* Recurse */
+      for (i = 1; i <= n; ++i) {
+         CTA_Vector_GetVal(tree->hvector, i, &val, CTA_HANDLE);
+         ctatree_print(val, depth + 1);
+      }
+      break;
+   case CTA_STRING:
+      /* Print string text */
+      printf("string: '%s'\n", CTAI_String_GetPtr(h));
+      break;
+   case CTA_TIME:
+      /* Print time span */
+      CTA_Time_GetSpan(h, &t1, &t2);
+      CTA_Time_GetStep(h, &ts);
+      printf("time: from %lg to %lg, step %lg\n", t1, t2, ts);
+      break;
+   case CTA_VECTOR:
+      /* Print vector */
+      CTA_Vector_GetSize(h, &n);
+      CTA_Vector_GetDatatype(h, &dt);
+      printf("vector: type='%s', dim=%d\n", CTAI_Type2String(dt), n);
+      for (j = 1; j <= n; ++j) {
+         for (i = 0; i <= depth; ++i) {
+            printf("| ");
+         }
+         text = CTAI_Vector_GetStringVal(h, j);
+         printf("element[%d] = '%s'\n", j, text);
+         free((char*)text);
+      }
+      break;
+   case CTA_SOBS:
+      /* Print stochastic observer */
+      val = CTAI_SObs_GetUserData(h, 1);
+      if (val != CTA_NULL) {
+         printf("stochastic observer: database='%s'\n", CTAI_String_GetPtr(val));
+      } else {
+         printf("stochastic observer\n");
+      }
+      break;
+   default:
+      printf("unknown data type\n");
+      break;
+   }
+}
+
+
+/** \brief Print a COSTA tree to STDOUT
+*
+*  \param htree  I handle of the tree
+*  \return CTA_OK if successful
+*/
+#undef METHOD
+#define METHOD "Print"
+int CTA_Tree_Print(CTA_Tree htree)
+{
+   int retval;
+
+   retval=CTA_Handle_Check((CTA_Handle) htree, CTA_TREE);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_tree handle");
+       return retval;
+   }
+
+   ctatree_print(htree, 0);
+   return CTA_OK;
+}
+
+/* Interfacing with Fortran */
+CTAEXPORT void CTA_TREE_CREATE_F77(int *htree, int *ierr) {
+   *ierr=CTA_Tree_Create((CTA_Tree*) htree);
+}
+
+CTAEXPORT void CTA_TREE_FREE_F77(int *htree, int *ierr) {
+   *ierr=CTA_Tree_Free((CTA_Tree*) htree);
+}
+
+CTAEXPORT void CTA_TREE_ADDHANDLE_F77(int *htree, int *hname, int *hitem, int *ierr) {
+   *ierr=CTA_Tree_AddHandle((CTA_Tree)*htree, CTAI_String_GetPtr(*hname), (CTA_Handle)*hitem);
+}
+
+CTAEXPORT void CTA_TREE_COUNT_HANDLES_F77(int *htree, int *hpath, int *count, int *ierr) {
+   *ierr=CTA_Tree_CountHandles((CTA_Tree)*htree, (CTA_String)*hpath, count);
+}
+
+CTAEXPORT void CTA_TREE_COUNT_HANDLESSTR_F77(int *htree, char *path, int *count, int *ierr, int len_str) {
+   char *c_str;
+
+   /* create a c-string equivalent to name */
+   c_str=CTA_Malloc((len_str+1)*sizeof(char));
+   CTA_fstr2cstr(path,c_str,len_str);
+
+
+   *ierr=CTA_Tree_CountHandlesStr((CTA_Tree)*htree, c_str, count);
+   free(c_str);
+}
+
+
+
+CTAEXPORT void CTA_TREE_GET_HANDLESTR_F77(int *htree, char* str, int *hitem, int* ierr, int len_str){
+   char *c_str;
+
+   /* create a c-string equivalent to name */
+   c_str=CTA_Malloc((len_str+1)*sizeof(char));
+   CTA_fstr2cstr(str,c_str,len_str);
+
+   *ierr=CTA_Tree_GetHandleStr((CTA_Tree) *htree, c_str, (CTA_Handle*) hitem);
+
+   free(c_str);
+}
+
+
+CTAEXPORT void CTA_TREE_GET_VALUESTR_F77(int *htree, char* str, void *value, int *datatype,
+                         int *ierr, int len_str){
+   char *c_str;
+
+   /* create a c-string equivalent to name */
+   c_str=CTA_Malloc((len_str+1)*sizeof(char));
+   CTA_fstr2cstr(str,c_str,len_str);
+
+  *ierr=CTA_Tree_GetValueStr( (CTA_Tree)*htree, c_str, value, (CTA_Datatype)*datatype);
+
+   free(c_str);
+}
+
+
+CTAEXPORT void CTA_TREE_GETHANDLE_F77(int *htree, int *hpath, int *hitem, int *ierr) {
+   *ierr=CTA_Tree_GetHandle((CTA_Tree)*htree, (CTA_String)*hpath, (CTA_Handle*)hitem);
+}
+
+CTAEXPORT void CTA_TREE_GET_VALUE_F77(int *htree, int *hpath, void *value, int *datatype, int *ierr) {
+   *ierr=CTA_Tree_GetValue((CTA_Tree)*htree, (CTA_String)*hpath, value, *datatype);
+}
+
+CTAEXPORT void CTA_TREE_COUNT_ITEMS_F77(int *htree, int *count, int *ierr) {
+   *ierr=CTA_Tree_CountItems((CTA_Tree)*htree, count);
+}
+
+CTAEXPORT void CTA_TREE_GET_ITEM_F77(int *htree, int *index, int *hitem, int *ierr) {
+   *ierr=CTA_Tree_GetItem((CTA_Tree)*htree, *index, (CTA_Handle*)hitem);
+}
+
+CTAEXPORT void CTA_TREE_GET_ITEMVALUE_F77(int *htree, int *index, void *value, int *datatype, int *ierr) {
+   *ierr=CTA_Tree_GetItemValue((CTA_Tree)*htree, *index, value, *datatype);
+}
+
+CTAEXPORT void CTA_TREE_PRINT_F77(int *htree, int *ierr) {
+   *ierr=CTA_Tree_Print((CTA_Tree)*htree);
+}
+
diff --git a/costa/native/cta/src/cta_treevector.c b/costa/native/cta/src/cta_treevector.c
new file mode 100644
index 000000000..63df1280d
--- /dev/null
+++ b/costa/native/cta/src/cta_treevector.c
@@ -0,0 +1,4301 @@
+/*
+$URL: https://repos.deltares.nl/repos/openda/public/trunk/core/native/src/cta/cta_treevector.c $
+$Revision: 3792 $, $Date: 2013-01-31 12:34:05 +0100 (Thu, 31 Jan 2013) $
+
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+#define IDEBUG (0)
+
+#define ELMDIV  (1)
+#define ELMPROD (2)
+
+#include <stdio.h>
+#include <string.h>
+#if HAVE_LIBNETCDF
+#include <netcdf.h>
+#endif
+
+
+#include <math.h>
+#include "cta_mem.h"
+#include "f_cta_utils.h"
+#include "cta_errors.h"
+#include "cta_defaults.h"
+#include "cta_treevector.h"
+#include "ctai_vector.h"
+#include "ctai_datatypes.h"
+#include "cta_handles.h"
+#include "ctai_handles.h"
+#include "cta_file.h"
+#include "cta_metainfo.h"
+#include "ctai_xml.h"
+#include "cta_pack.h"
+#include "cta_message.h"
+#include "cta_metainfo.h"
+
+#define CTA_TREEVECTOR_CREATE_F77              F77_CALL(cta_treevector_create,CTA_TREEVECTOR_CREATE)
+#define CTA_TREEVECTOR_DUPLICATE_F77           F77_CALL(cta_treevector_duplicate,CTA_TREEVECTOR_DUPLICATE)
+#define CTA_TREEVECTOR_CONC_F77                F77_CALL(cta_treevector_conc,CTA_TREEVECTOR_CONC)
+#define CTA_TREEVECTOR_GETSUBTREEVEC_F77       F77_CALL(cta_treevector_getsubtreevec,CTA_TREEVECTOR_GETSUBTREEVEC)
+#define CTA_TREEVECTOR_SETSUBTREENOCOMPUTE_F77 F77_CALL(cta_treevector_setsubtreenocompute,CTA_TREEVECTOR_SETSUBTREENOCOMPUTE)
+#define CTA_TREEVECTOR_GETSUBTREEVECINDEX_F77  F77_CALL(cta_treevector_getsubtreevecindex,CTA_TREEVECTOR_GETSUBTREEVECINDEX)
+#define CTA_TREEVECTOR_GETTAG_F77              F77_CALL(cta_treevector_gettag,CTA_TREEVECTOR_GETTAG)
+#define CTA_TREEVECTOR_SETVEC_F77              F77_CALL(cta_treevector_setvec,CTA_TREEVECTOR_SETVEC)
+#define CTA_TREEVECTOR_GETVEC_F77              F77_CALL(cta_treevector_getvec,CTA_TREEVECTOR_GETVEC)
+#define CTA_TREEVECTOR_SETVAL_F77              F77_CALL(cta_treevector_setval,CTA_TREEVECTOR_SETVAL)
+#define CTA_TREEVECTOR_GETVAL_F77              F77_CALL(cta_treevector_getval,CTA_TREEVECTOR_GETVAL)
+#define CTA_TREEVECTOR_SETVALS_F77             F77_CALL(cta_treevector_setvals,CTA_TREEVECTOR_SETVALS)
+#define CTA_TREEVECTOR_GETVALS_F77             F77_CALL(cta_treevector_getvals,CTA_TREEVECTOR_GETVALS)
+#define CTA_TREEVECTOR_GETSIZE_F77             F77_CALL(cta_treevector_getsize,CTA_TREEVECTOR_GETSIZE)
+#define CTA_TREEVECTOR_GETMETAINFO_F77         F77_CALL(cta_treevector_getmetainfo,CTA_TREEVECTOR_GETMETAINFO)
+#define CTA_TREEVECTOR_SETMETAINFO_F77         F77_CALL(cta_treevector_setmetainfo,CTA_TREEVECTOR_SETMETAINFO)
+#define CTA_TREEVECTOR_COPY_F77                F77_CALL(cta_treevector_copy,CTA_TREEVECTOR_COPY)
+#define CTA_TREEVECTOR_AXPY_F77                F77_CALL(cta_treevector_axpy,CTA_TREEVECTOR_AXPY)
+#define CTA_TREEVECTOR_DOT_F77                 F77_CALL(cta_treevector_dot,CTA_TREEVECTOR_DOT)
+#define CTA_TREEVECTOR_NRM2_F77                F77_CALL(cta_treevector_nrm2,CTA_TREEVECTOR_NRM2)
+#define CTA_TREEVECTOR_SETCONSTANT_F77         F77_CALL(cta_treevector_setconstant,CTA_TREEVECTOR_SETCONSTANT)
+#define CTA_TREEVECTOR_SCAL_F77                F77_CALL(cta_treevector_scal,CTA_TREEVECTOR_SCAL)
+#define CTA_TREEVECTOR_EXPORT_F77              F77_CALL(cta_treevector_export,CTA_TREEVECTOR_EXPORT)
+#define CTA_TREEVECTOR_IMPORT_F77              F77_CALL(cta_treevector_import,CTA_TREEVECTOR_IMPORT)
+#define CTA_TREEVECTOR_FREE_F77                F77_CALL(cta_treevector_free,CTA_TREEVECTOR_FREE)
+#define CTA_TREEVECTOR_INFO_F77                F77_CALL(cta_treevector_info,CTA_TREEVECTOR_INFO)
+#define CTA_TREEVECTOR_LIST_F77                F77_CALL(cta_treevector_list,CTA_TREEVECTOR_LIST)
+#define CTA_TREEVECTOR_GEMM_F77                F77_CALL(cta_treevector_gemm,CTA_TREEVECTOR_GEMM)
+#define CTA_TREEVECTOR_OPONLEAFS_F77           F77_CALL(cta_treevector_oponleafs,CTA_TREEVECTOR_OPONLEAFS)
+#define CTA_TREEVECTOR_ELMSQRT_F77             F77_CALL(cta_treevector_elmsqrt,CTA_TREEVECTOR_ELMSQRT)
+#define CTA_TREEVECTOR_ELMPROD_F77             F77_CALL(cta_treevector_elmprod,CTA_TREEVECTOR_ELMPROD)
+#define CTA_TREEVECTOR_GETSUBTREEVECINDEX_F77  F77_CALL(cta_treevector_getsubtreevecindex,CTA_TREEVECTOR_GETSUBTREEVECINDEX)
+#define CTA_TREEVECTOR_GETNUMSUBTREE_F77       F77_CALL(cta_treevector_getnumsubtree,CTA_TREEVECTOR_GETNUMSUBTREE)
+#define CTA_TREEVECTOR_GETSUBTREEVECID_F77    F77_CALL(cta_treevector_getsubtreevecid,CTA_TREEVECTOR_GETSUBTREEVECID)
+
+
+#define CLASSNAME "CTA_TreeVector"
+
+/* --------------------------------------------------------------------- */
+
+/* Struct holding all data associated to an COSTA Tree-vector */
+
+
+typedef struct {
+char tag[CTA_STRLEN_TAG];
+char name[CTA_STRLEN_NAME];
+int            size;
+CTA_Vector     *v;
+int            nsubtrees;
+CTA_Metainfo   *metainfo;
+CTA_TreeVector *treevecs;
+BOOL           *nocompute;
+} CTAI_TreeVec;
+
+
+/* Local headers */
+
+int CTAI_TreeVector_ExcludeFromVector(CTA_TreeVector treevec, BOOL *excludefv);
+
+
+
+#undef METHOD
+#define METHOD "Create"
+int CTA_TreeVector_Create(const char *name, const char *tag, CTA_TreeVector *treevec){
+   int retval;         //Return value of a call
+   CTAI_TreeVec *data;   //Treevector specific data
+
+   // Allocate data and set properties
+   data=CTA_Malloc(sizeof(CTAI_TreeVec));
+
+   // Set properties
+   strcpy(data->name,name);
+   strcpy(data->tag,tag);
+   data->v=NULL;
+   data->nsubtrees=0;
+   data->treevecs=NULL;
+   data->nocompute=NULL;
+
+   // Allocate new handle and return error when unsuccesfull
+   retval=CTA_Handle_Create("tree-vector",CTA_TREEVECTOR,data,treevec);
+   if (retval) {
+       CTA_WRITE_ERROR("Cannot create tree-vector");
+       return retval;
+   }
+
+   data->metainfo=NULL;
+
+   return CTA_OK;
+};
+
+#undef METHOD
+#define METHOD "Duplicate"
+int CTA_TreeVector_Duplicate(CTA_TreeVector treevec1, CTA_TreeVector *treevec2 ){
+   int retval;         //Return value of a call
+   CTAI_TreeVec *data1, *data2;   //Tree-vector specific data
+   int istate; // counter over all substates
+   const char *name;
+
+   if (IDEBUG>10){printf("CTA_TreeVector_Duplicate start\n");}
+   /* check handle and get data object */
+   retval=CTA_Handle_Check((CTA_Handle) treevec1,CTA_TREEVECTOR);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_treevector handle");
+       return retval;
+   }
+   retval=CTA_Handle_GetData((CTA_Handle) treevec1,(void*) &data1);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+
+   name = CTAI_Handle_GetName((CTA_Handle) treevec1);
+   if (IDEBUG>10){printf("CTA_TreeVector_Duplicate name of state '%s'\n",name);}
+   if (IDEBUG>10){printf("CTA_TreeVector_Duplicate adress data1=%p\n",data1);}
+
+   /* create new state vector and get data object */
+   retval=CTA_TreeVector_Create(name, data1->tag, treevec2);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_treevector handle");
+       return retval;
+   }
+
+   retval=CTA_Handle_GetData((CTA_Handle) *treevec2,(void*) &data2);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+
+
+   /* check whether this is a leaf */
+   if (data1->nsubtrees==0){
+
+      if (IDEBUG>10){printf("CTA_TreeVector_Duplicate duplicate vector \n");}
+      /* Duplicate and copy vector */
+
+      if (data1->v) {
+         data2->v=CTA_Malloc(sizeof(CTA_Vector));
+         retval=CTA_Vector_Duplicate(*data1->v, data2->v);
+         if (IDEBUG>10){printf("CTA_TreeVector_Duplicate retval Vector ok %d\n",retval);}
+
+         if (retval!=CTA_OK) return retval;
+      }
+      if (data1->metainfo) {
+        // make data2->metainfo.
+        // *data2->metainfo has been made (and set at NULL) in cta_treevector_create.
+        data2->metainfo=CTA_Malloc(sizeof(CTA_Metainfo));
+        retval=CTA_Metainfo_Create(data2->metainfo);
+        retval = CTA_Metainfo_Copy(*data1->metainfo,*data2->metainfo);
+      }
+   }
+
+   else {
+      if (IDEBUG>10){printf("CTA_TreeVector_Duplicate duplicate substates \n");}
+      /* duplicate sub-states */
+      data2->treevecs=CTA_Malloc(sizeof(CTA_TreeVector)*data1->nsubtrees);
+      data2->nocompute=CTA_Malloc(sizeof(BOOL)*data1->nsubtrees);
+      for (istate=0;istate<data1->nsubtrees;istate++){
+         retval=CTA_TreeVector_Duplicate(data1->treevecs[istate],
+                                    &data2->treevecs[istate] );
+         if (retval!=CTA_OK) return retval;
+         data2->nocompute[istate]=data1->nocompute[istate];
+      }
+      data2->nsubtrees=data1->nsubtrees;
+
+
+      if (data1->metainfo) {
+         data2->metainfo=CTA_Malloc(sizeof(CTA_Metainfo));
+         retval=CTA_Metainfo_Create(data2->metainfo);
+         retval = CTA_Metainfo_Copy(*data1->metainfo,*data2->metainfo);
+      }
+
+   }
+   if (IDEBUG>10){printf("CTA_TreeVector_Duplicate end of function\n");}
+   return CTA_OK;
+};
+
+/* ------------------------------------------------------------*/
+
+int CTAI_axpy_grid(CTA_TreeVector y, CTAI_Gridm gridy, double alpha, CTA_TreeVector x, CTAI_Gridm gridx){
+   CTAI_TreeVec *datax, *datay;   //Tree-vector specific data
+   CTA_Vector vecx_to_y;
+   int retval;
+
+   retval=CTA_Handle_GetData((CTA_Handle) x,(void*) &datax);
+   retval=CTA_Handle_GetData((CTA_Handle) y,(void*) &datay);
+
+   retval=CTA_Vector_Duplicate(*datay->v, &vecx_to_y);
+
+
+
+  /*  interpolate values of vector x on gridx towards gridy. That means the following:
+     - for each point on gridy:
+            use bi/trilinear interpolation using 4/8 gridpoints of gridx which surround
+            gridpoints of y  */
+
+
+
+  if (!datax->v) return CTA_TREEVECTORS_NOT_COMPATIBLE;
+  retval = CTAI_Grid_Interpolate(gridy, gridx, *datax->v, vecx_to_y );
+
+
+
+  /* use now the standard vector axpy   */
+
+  if (!datay->v) return CTA_TREEVECTORS_NOT_COMPATIBLE;
+
+  retval=CTA_Vector_Axpy(*datay->v, alpha, vecx_to_y);
+
+  if (retval!=CTA_OK) return retval;
+
+  /* free vectors */
+  retval = CTA_Vector_Free(&vecx_to_y);
+
+
+  return retval;
+}
+
+/* ------------------------------------------------------------*/
+
+
+#undef METHOD
+#define METHOD "Conc"
+int CTA_TreeVector_Conc(CTA_TreeVector treevec1, CTA_TreeVector *treevecs, int nsubtrees){
+
+   int retval;         //Return value of a call
+   CTAI_TreeVec *data;   //Tree-vector specific data
+   int istate;         //counter of the states
+
+
+   /* check handle and get data object */
+   retval=CTA_Handle_Check((CTA_Handle) treevec1,CTA_TREEVECTOR);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_treevector handle");
+       return retval;
+   }
+   retval=CTA_Handle_GetData((CTA_Handle) treevec1,(void*) &data);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+
+   /* Check handles */
+   for (istate=0;istate<nsubtrees;istate++){
+      retval=CTA_Handle_Check((CTA_Handle) treevecs[istate],CTA_TREEVECTOR);
+      if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_treevector handle");
+       return retval;
+   }
+   }
+   /* Check whether concatenation is still possible */
+   if (data->nsubtrees || data->treevecs){
+      return CTA_CONCAT_NOT_POSSIBLE;
+   }
+
+   /* Add handles */
+   data->treevecs=CTA_Malloc(sizeof(CTA_TreeVector)*nsubtrees);
+   data->nocompute=CTA_Malloc(sizeof(BOOL)*nsubtrees);
+   for (istate=0;istate<nsubtrees;istate++){
+      data->treevecs[istate]=treevecs[istate];
+      data->nocompute[istate]=FALSE;
+   }
+   data->nsubtrees=nsubtrees;
+
+   return CTA_OK;
+};
+
+
+#undef METHOD
+#define METHOD "GetSubTreeVec"
+int CTA_TreeVector_GetSubTreeVec(CTA_TreeVector treevec, const char *tag,
+                          CTA_TreeVector *hsubstate){
+
+   int retval;         //Return value of a call
+   CTAI_TreeVec *data;   //Tree-vector specific data
+   int istate;         //counter of the states
+
+   *hsubstate=CTA_NULL;
+   /* check handle and get data object */
+   retval=CTA_Handle_Check((CTA_Handle) treevec,CTA_TREEVECTOR);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_treevector handle");
+       return retval;
+   }
+   retval=CTA_Handle_GetData((CTA_Handle) treevec,(void*) &data);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+
+   if (strcmp(data->tag,tag)==0 || strcmp(".",tag)==0){
+      *hsubstate=treevec;
+      return CTA_OK;
+   }
+
+   /* look in all substates */
+
+   for (istate=0;istate<data->nsubtrees;istate++){
+
+      retval=CTA_TreeVector_GetSubTreeVec(data->treevecs[istate], tag,hsubstate);
+      if (retval==CTA_OK) return CTA_OK;
+   };
+
+   if (IDEBUG>10){
+      char msg[1024];
+      strcpy(msg,"Cannot find Treevector Item : ");
+      strncat(msg,tag,1023);
+      CTA_WRITE_ERROR(msg);
+   }
+   return CTA_ITEM_NOT_FOUND;
+};
+
+
+#undef METHOD
+#define METHOD "SetSubTreeNocompute"
+int CTA_TreeVector_SetSubTreeNocompute(CTA_TreeVector treevec, const char *tag){
+   int retval;         //Return value of a call
+   CTAI_TreeVec *data;   //Tree-vector specific data
+   int istate;         //counter of the states
+   char tagsub[CTA_STRLEN_TAG];
+
+   /* check handle and get data object */
+   retval=CTA_Handle_Check((CTA_Handle) treevec,CTA_TREEVECTOR);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_treevector handle");
+       return retval;
+   }
+   retval=CTA_Handle_GetData((CTA_Handle) treevec,(void*) &data);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+
+   if (data->nsubtrees == 0) return CTA_OK;
+
+   /* look in all substates */
+
+   for (istate=0;istate<data->nsubtrees;istate++){
+     retval = CTA_TreeVector_GetTag(data->treevecs[istate],tagsub);
+     if (strcmp(tagsub,tag)==0){
+       data->nocompute[istate] = TRUE;
+     }
+
+     /* look in substates of substate */
+     retval=CTA_TreeVector_SetSubTreeNocompute(data->treevecs[istate], tag);
+	 if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Error in CTA_TreeVector_SetSubTreeNocompute");
+	   return retval;
+   }
+   };
+   return CTA_OK;
+}
+
+#undef METHOD
+#define METHOD "GetSubTreeVecIndex"
+int CTA_TreeVector_GetSubTreeVecIndex(CTA_TreeVector treevec, int index,
+                          CTA_TreeVector *hsubstate){
+   int retval;         //Return value of a call
+   CTAI_TreeVec *data;   //Tree-vector specific data
+   char msg[1024];
+
+   *hsubstate=CTA_NULL;
+   /* check handle and get data object */
+   retval=CTA_Handle_Check((CTA_Handle) treevec,CTA_TREEVECTOR);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_treevector handle");
+       return retval;
+   }
+   retval=CTA_Handle_GetData((CTA_Handle) treevec,(void*) &data);
+   if (retval!=CTA_OK) {
+      CTA_WRITE_ERROR("Cannot retrieve handle data");
+      return retval;
+   }
+
+   /* get the substate.Note that we have to subtract 1 because of our counting convention */
+   if (index > 0 && index <= data->nsubtrees) {
+     *hsubstate = data->treevecs[index-1];
+     return CTA_OK;
+     }
+   if (IDEBUG>10){
+      sprintf(msg,"Cannot find sub treevector with index :%d",index);
+      CTA_WRITE_ERROR(msg);
+   }
+   return CTA_ITEM_NOT_FOUND;
+};
+
+#undef METHOD
+#define METHOD "GetSubTreeVecId"
+int CTA_TreeVector_GetSubTreeVecId(CTA_TreeVector treevec, int index, char tag[CTA_STRLEN_TAG]){
+   int retval;         //Return value of a call
+   CTAI_TreeVec *data;   //Tree-vector specific data
+
+   /* check handle and get data object */
+   retval=CTA_Handle_Check((CTA_Handle) treevec,CTA_TREEVECTOR);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_treevector handle");
+       return retval;
+   }
+   retval=CTA_Handle_GetData((CTA_Handle) treevec,(void*) &data);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+ /* get the substate-id. Note that we DO NOT have to subtract 1 because of Java counting convention 
+    (this method is only used by JAVA users!) */
+     retval = CTA_TreeVector_GetTag(data->treevecs[index],tag);
+   
+   return retval;
+};
+
+
+
+#undef METHOD
+#define METHOD "GetNumSubTree"
+int CTA_TreeVector_GetNumSubTree(CTA_TreeVector treevec, int *numSubTrees){
+   int retval;         //Return value of a call
+   CTAI_TreeVec *data;   //Tree-vector specific data
+
+   *numSubTrees=0;
+   /* check handle and get data object */
+   retval=CTA_Handle_Check((CTA_Handle) treevec,CTA_TREEVECTOR);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_treevector handle");
+       return retval;
+   }
+   retval=CTA_Handle_GetData((CTA_Handle) treevec,(void*) &data);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+
+   /* get number of substates */
+   *numSubTrees=data->nsubtrees;
+   return CTA_OK;
+};
+
+
+#undef METHOD
+#define METHOD "GetTag"
+int CTA_TreeVector_GetTag(CTA_TreeVector treevec, char *tag){
+
+   int retval;         //Return value of a call
+   CTAI_TreeVec *data;   //Tree-vector specific data
+
+   /* check handle and get data object */
+   retval=CTA_Handle_Check((CTA_Handle) treevec,CTA_TREEVECTOR);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_treevector handle");
+       return retval;
+   }
+   retval=CTA_Handle_GetData((CTA_Handle) treevec,(void*) &data);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+
+   /* Note this copy is not protected */
+   strcpy(tag,data->tag);
+
+   return CTA_OK;
+};
+
+
+#undef METHOD
+#define METHOD "GetMetainfo"
+int CTA_TreeVector_GetMetainfo(CTA_TreeVector treevec, CTA_Metainfo minfo){
+
+   int retval;         //Return value of a call
+   CTAI_TreeVec *data;   //Tree-vector specific data
+   int rest;
+
+
+   /* check handle and get data object */
+   retval=CTA_Handle_Check((CTA_Handle) treevec,CTA_TREEVECTOR);
+   //      printf("cta_treevector_getmetainfo0:  %d \n ",retval);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_treevector handle");
+       return retval;
+   }
+   retval=CTA_Handle_GetData((CTA_Handle) treevec,(void*) &data);
+   // printf("cta_treevector_getmetainfo1:  %d \n ",retval);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+   if (!data->metainfo ){
+      printf("cta_treevector_getmetainfo: NO METAINFO!  \n ");
+      return CTA_ILLEGAL_HANDLE;
+   }
+
+
+   retval=CTA_Handle_Check((CTA_Handle) *data->metainfo, CTA_METAINFO);
+   //printf("cta_treevector_getmetainfo2:  %d \n ",retval);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_treevector handle");
+       return retval;
+   }
+
+   retval=CTA_Handle_Check((CTA_Handle) minfo, CTA_METAINFO);
+   //printf("cta_treevector_getmetainfo3:  %d \n ",retval);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_treevector handle");
+       return retval;
+   }
+
+   retval = CTA_Metainfo_Copy(*data->metainfo, minfo);
+   retval = CTA_Metainfo_GetRest(minfo,&rest);
+
+   if (retval!=CTA_OK){
+       CTA_WRITE_ERROR("cta_treevector_getmetainfo: error");
+       return retval;
+   }
+
+   return CTA_OK;
+};
+
+
+#undef METHOD
+#define METHOD "SetMetainfo"
+int CTA_TreeVector_SetMetainfo(CTA_TreeVector treevec, CTA_Metainfo minfo){
+
+   int retval;         //Return value of a call
+   CTAI_TreeVec *data;   //Tree-vector specific data
+
+   /* check handle and get data object */
+   retval=CTA_Handle_Check((CTA_Handle) treevec,CTA_TREEVECTOR);
+   //   printf("cta_treevector_setmetainfo0:  %d \n ",retval);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_treevector handle");
+       return retval;
+   }
+   retval=CTA_Handle_GetData((CTA_Handle) treevec,(void*) &data);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+   retval=CTA_Handle_Check((CTA_Handle) minfo, CTA_METAINFO);
+   //printf("cta_treevector_setmetainfo1:  %d \n ",retval);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_treevector handle");
+       return retval;
+   }
+
+   /*  er wordt verondersteld dat metainfo van state nog niet is gevuld*/
+   /* dus state_setmetainfo is eenmalig per state; anders copy gebruiken! */
+
+   if (!data->metainfo){
+     data->metainfo=CTA_Malloc(sizeof(CTA_Metainfo));
+   }
+     retval=CTA_Metainfo_Create(data->metainfo);
+     //printf("cta_treevector_setmetainfo2:  %d \n ",retval);
+       if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Error in CTA_Metainfo_Create");
+	   return retval;
+   }
+
+   //dit kan niet met een toekenning; er is een functie cta_metainfo_copy nodig!!!
+
+   retval = CTA_Metainfo_Copy(minfo,*data->metainfo);
+   //printf("cta_treevector_setmetainfo3:  %d \n ",retval);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Error in CTA_Metainfo_Copy");
+	   return retval;
+   }
+
+   //   data->metainfo = minfo;
+
+   return CTA_OK;
+};
+
+/* ------------------------------------------------------- */
+//int CTA_TreeVector_SetConnection(CTA_TreeVector treevec1, CTA_TreeVector treevec2){
+//  int retval;         //Return value of a call
+//  CTA_Metainfo minfo1, minfo2;
+//  char  tag1[CTA_STRLEN_TAG],  tag2[CTA_STRLEN_TAG];
+//
+//  /* get both metainfos; checking occurs in called routine  */
+//  retval = CTA_TreeVector_GetMetainfo(treevec1, minfo1);
+//  if (retval!=CTA_OK) return retval;
+//  retval = CTA_TreeVector_GetMetainfo(treevec2, minfo2);
+//  if (retval!=CTA_OK) return retval;
+//
+//  retval = CTA_Metainfo_GetTag(minfo1, tag1);
+//  if (retval!=CTA_OK) return retval;
+//  retval = CTA_Metainfo_GetTag(minfo2, tag2);
+//  if (retval!=CTA_OK) return retval;
+//
+//  /* let both metainfos connect to each other */
+//  retval = CTA_Metainfo_SetBelongsTo(minfo1, tag2);
+//  if (retval!=CTA_OK) return retval;
+//  retval = CTA_Metainfo_SetBelongsTo(minfo2, tag1);
+//  if (retval!=CTA_OK) return retval;
+//
+// /* put the changed metainfo back */
+//  retval = CTA_TreeVector_SetMetainfo(treevec1, minfo1);
+//  if (retval!=CTA_OK) return retval;
+//  retval = CTA_TreeVector_SetMetainfo(treevec2, minfo2);
+//  if (retval!=CTA_OK) return retval;
+//
+//  return CTA_OK;
+//}
+
+/* ------------------------------------------------------- */
+
+// return the total number of leaves/vectors that form the state
+int CTA_TreeVector_GetVecNumHandles(CTA_TreeVector treevec){
+   return CTAI_TreeVec_GetVecNumHandles(treevec);
+};
+
+
+
+// gives a list of all tags of state and substates
+int CTA_TreeVector_List(CTA_TreeVector treevec, CTA_Vector taglist ){
+  int indx, retval;
+  indx = 0;
+  retval=CTAI_TreeVec_List(treevec, taglist, &indx);
+   return retval;
+};
+
+
+// gives a list of all tags of state and substates
+int CTAI_TreeVec_List(CTA_TreeVector treevec, CTA_Vector taglist, int *indx){
+  int retval,istate;
+  CTA_String str1;
+  CTAI_TreeVec *data;   //Tree-vector specific data
+
+
+   /* check handle and get data object */
+   retval=CTA_Handle_Check((CTA_Handle) treevec,CTA_TREEVECTOR);
+   if (retval!=CTA_OK) return retval;
+   retval=CTA_Handle_GetData((CTA_Handle) treevec,(void*) &data);
+   if (retval!=CTA_OK) return retval;
+
+   printf("State-vector information:\n");
+   printf("tag     :%s\n",data->tag);
+
+   retval = CTA_String_Create(&str1);
+   retval = CTA_String_Set(str1,data->tag);
+
+   retval = CTA_Vector_SetVal(taglist, *indx + 1, &str1,CTA_STRING);
+
+   *indx = *indx + 1;
+
+   for (istate=0;istate<data->nsubtrees;istate++){
+     retval=CTAI_TreeVec_List(data->treevecs[istate], taglist, indx);
+     if (retval!=CTA_OK) return retval;
+   }
+
+   return CTA_OK;
+};
+
+
+
+
+
+
+
+
+
+
+// return the total number of leaves/vectors that form the state
+// do not count 'nocompute'-substates
+int CTAI_TreeVec_GetVecNumHandles(CTA_TreeVector treevec){
+
+   CTAI_TreeVec *data;   //Tree-vector specific data
+   int nvec;
+   int istate;
+
+   CTA_Handle_GetData((CTA_Handle) treevec,(void*) &data);
+
+   // We are in a leaf
+   if (data->nsubtrees==0){
+      return 1;
+   } else {
+      nvec=0;
+      for (istate=0;istate<data->nsubtrees;istate++){
+        if (data->nocompute[istate] == FALSE) {
+          nvec=nvec+CTAI_TreeVec_GetVecNumHandles(data->treevecs[istate]);
+        }
+      }
+      return nvec;
+   }
+};
+
+int CTAI_TreeVec_GetVecHandles(CTA_TreeVector treevec, CTA_Vector **hvecs, int *indx){
+
+   int retval;         //Return value of a call
+   CTAI_TreeVec *data;   //Tree-vector specific data
+   int istate;         //Counter over substates
+
+   retval=CTA_Handle_GetData((CTA_Handle) treevec,(void*) &data);
+   if (retval) return retval;
+
+   // We are in a leaf
+   if (data->nsubtrees==0){
+      hvecs[*indx]=data->v;
+      (*indx)++;
+
+   } else {
+      for (istate=0;istate<data->nsubtrees;istate++){
+        if (data->nocompute[istate] == FALSE) {
+         CTAI_TreeVec_GetVecHandles(data->treevecs[istate], hvecs, indx);
+        }
+      }
+   }
+   return CTA_OK;
+};
+
+
+#undef METHOD
+#define METHOD "SetVec"
+int CTA_TreeVector_SetVec(CTA_TreeVector treevec, CTA_Vector hvec){
+
+   int retval;         //Return value of a call
+   CTAI_TreeVec *data;   //Tree-vector specific data
+   CTA_Vector **hvecs; //array with pointers to all vectors of substates
+   int nvecs;          //number of sub vectors state is build of
+   int ivec;           //counter over sub vectors
+   int indx;
+   void *values;       //values of hvec
+   int nglob;          //dimension of vector hvec
+   int nsub;           //dimension of a vector of a substate
+   int size_type;      //C- sizeof of data items in hvec
+   int ioff;           //offset in input vector to start of subvector
+   CTA_Datatype datatype; //Data Type of hvec
+   char *cvalues;
+
+   /* check handle and get data object */
+   retval=CTA_Handle_Check((CTA_Handle) treevec,CTA_TREEVECTOR);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_treevector handle");
+       return retval;
+   }
+   retval=CTA_Handle_GetData((CTA_Handle) treevec,(void*) &data);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+
+   /* we are in a leaf -> copy vector */
+   if (data->nsubtrees==0) {
+      if (data->v){
+        /* copy vector */
+        retval=CTA_Vector_Copy(hvec,*data->v);
+        if (retval!=CTA_OK) {
+	      CTA_WRITE_ERROR("Cannot copy vector");
+	      return retval;
+      }
+      }else{
+        /* duplicate vector */
+        data->v=CTA_Malloc(sizeof(CTA_Vector));
+        retval=CTA_Vector_Duplicate(hvec,data->v);
+        if (retval!=CTA_OK) {
+	      CTA_WRITE_ERROR("Cannot duplicate vector");
+	      return retval;
+      }
+      }
+   }else{
+      /* we are not in a leaf, try to break vector up */
+      /* This is only allowed if all vectors have already been filled-in */
+      /* Note: the 'excludefromvector'-leafs are excluded */
+      nvecs=CTAI_TreeVec_GetVecNumHandles(treevec);
+      hvecs=CTA_Malloc(sizeof(CTA_Vector*)*nvecs);
+      indx=0;
+      retval=CTAI_TreeVec_GetVecHandles(treevec, hvecs, &indx);
+      if (retval) {
+	      CTA_WRITE_ERROR("Cannot get treevector handles");
+	      return retval;
+      }
+      /* no NULL vectors are allowed */
+      for (ivec=0;ivec<nvecs;ivec++){
+         if (!hvecs[ivec]) return CTA_UNINITIALISED_SUBTREEVECTOR;
+      }
+
+      /* Copy values over all substates :*/
+
+      /* First get values from hvec */
+      retval=CTA_Vector_GetDatatype(hvec,&datatype);
+      retval=CTA_Vector_GetSize(hvec,&nglob);
+      if (retval) {
+	      CTA_WRITE_ERROR("Cannot get size");
+	      return retval;
+      }
+      retval=CTA_SizeOf(datatype,&size_type);
+      if (retval) {
+	      CTA_WRITE_ERROR("Cannot get sizeOf");
+	      return retval;
+      }
+      values=CTA_Malloc(size_type*nglob);
+      retval=CTA_Vector_GetVals(hvec,values,nglob,datatype);
+      if (retval) {
+	      CTA_WRITE_ERROR("Cannot get values");
+	      return retval;
+      }
+
+      /* copy values into sub-vector */
+      ioff=0;
+      for (ivec=0;ivec<nvecs;ivec++){
+         char *ptr;
+
+         retval=CTA_Vector_GetSize(*hvecs[ivec],&nsub);
+         if (retval) {
+	        CTA_WRITE_ERROR("Cannot get size of vector");
+	        return retval;
+         }
+         if (ioff+nsub>nglob) return CTA_INCOMPATIBLE_VECTORS;
+         if (retval) return retval;
+         cvalues=values;
+
+         ptr=cvalues;
+         retval=CTA_Vector_SetVals(*hvecs[ivec],ptr+ioff*size_type,nsub,
+                                   datatype);
+         if (retval) {
+	        CTA_WRITE_ERROR("Cannot set values of vector");
+	        return retval;
+         }
+         ioff=ioff+nsub;
+
+      }
+      if (ioff!=nglob) return CTA_INCOMPATIBLE_VECTORS;
+
+      /* free work variables */
+      free(values);
+      free(hvecs);
+   }
+   return CTA_OK;
+};
+
+void zet999(double *vals, int n){
+
+int i;
+
+   for (i=0;i<n;i++){
+      printf ("(%d) %lg\n",i,vals[i]);
+   }
+
+}
+
+#undef METHOD
+#define METHOD "GetVec"
+int CTA_TreeVector_GetVec(CTA_TreeVector treevec, CTA_Vector hvec){
+
+   int retval;         //Return value of a call
+   CTAI_TreeVec *data;   //Tree-vector specific data
+   CTA_Vector **hvecs; //array with pointers to all vectors of substates
+   int nvecs;          //number of sub vectors state is build of
+   int ivec;           //counter over sub vectors
+   int indx;
+   void *values;       //values of hvec
+   int nglob;          //dimension of vector hveec
+   int nsub;           //dimension of a vector of a substate
+   int size_type;      //C- sizeof of data items in hvec
+   int ioff;           //offset in input vector to start of subvector
+   CTA_Datatype datatype; //Data Type of hvec
+
+   /* check handle and get data object */
+   retval=CTA_Handle_Check((CTA_Handle) treevec,CTA_TREEVECTOR);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_treevector handle");
+       return retval;
+   }
+   retval=CTA_Handle_GetData((CTA_Handle) treevec,(void*) &data);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+
+   /* we are in a leaf -> copy vector */
+   if (data->nsubtrees==0) {
+      if (data->v){
+        /* copy vector */
+        retval=CTA_Vector_Copy(*data->v,hvec);
+        if (retval!=CTA_OK) {
+	       CTA_WRITE_ERROR("Cannot copy vector");
+	       return retval;
+        }
+      }
+   }else{
+      /* we are not in a leave, try to break vector up */
+      /* This is only allowed if all vectors have already been filled-in */
+      nvecs=CTAI_TreeVec_GetVecNumHandles(treevec);
+      hvecs=CTA_Malloc(sizeof(CTA_Vector*)*nvecs);
+      indx=0;
+      retval=CTAI_TreeVec_GetVecHandles(treevec, hvecs, &indx);
+      /* no NULL vecors are allowed */
+      for (ivec=0;ivec<nvecs;ivec++){
+         if (hvecs[ivec]==CTA_NULL) return CTA_UNINITIALISED_SUBTREEVECTOR;
+      }
+
+      /* Copy values over all substates :*/
+
+      /* create array for holding values from hvec */
+      retval=CTA_Vector_GetDatatype(hvec,&datatype);
+      retval=CTA_Vector_GetSize(hvec,&nglob);
+      if (retval) {
+	     CTA_WRITE_ERROR("Cannot get size of vector");
+	     return retval;
+      }
+      retval=CTA_SizeOf(datatype,&size_type);
+      if (retval) {
+	     CTA_WRITE_ERROR("Error in CTA_SizeOf");
+	     return retval;
+      }
+      values=CTA_Malloc(size_type*nglob);
+
+      /* copy values into sub-vector */
+      ioff=0;
+      for (ivec=0;ivec<nvecs;ivec++){
+         char *ptr;
+         retval=CTA_Vector_GetSize(*hvecs[ivec],&nsub);
+         if (retval) {
+	        CTA_WRITE_ERROR("Cannot get size of vector");
+	        return retval;
+         }
+         if (ioff+nsub>nglob) return CTA_INCOMPATIBLE_VECTORS;
+         if (retval) return retval;
+
+         ptr=values;
+
+         retval=CTA_Vector_GetVals(*hvecs[ivec],ptr+ioff*size_type,nsub,
+                                 datatype);
+         if (retval) {
+	        CTA_WRITE_ERROR("Cannot get values of vector");
+	        return retval;
+         }
+         ioff=ioff+nsub;
+      }
+      if (ioff!=nglob) return CTA_INCOMPATIBLE_VECTORS;
+
+
+      /* set values in hvec */
+//         zet999(values,nglob);
+      retval=CTA_Vector_SetVals(hvec,values,nglob,datatype);
+      if (retval) {
+        CTA_WRITE_ERROR("Cannot set values of vector");
+        return retval;
+      }
+
+      /* free work variables */
+//      zet999(values,nglob);
+      free(values);
+      free(hvecs);
+   }
+   return CTA_OK;
+};
+
+
+#undef METHOD
+#define METHOD "Copy"
+int CTA_TreeVector_Copy(CTA_TreeVector treevec1, CTA_TreeVector treevec2){
+// DIT IS EEN ERG KORT DOOR DE BOCHT GEDEELTELIJK INGEKLAPTE '
+// SUBSTATES MOETEN OOK ONDERSTEUND WORDEN !!!!
+
+
+   int retval;         //Return value of a call
+   CTAI_TreeVec *data1, *data2;   //Tree-vector specific data
+   int istate; // counter over all substates
+
+   /* check handle and get data object */
+   retval=CTA_Handle_Check((CTA_Handle) treevec1,CTA_TREEVECTOR);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_treevector handle");
+       return retval;
+   }
+   retval=CTA_Handle_GetData((CTA_Handle) treevec1,(void*) &data1);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+
+   retval=CTA_Handle_Check((CTA_Handle) treevec2,CTA_TREEVECTOR);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_treevector handle");
+       return retval;
+   }
+   retval=CTA_Handle_GetData((CTA_Handle) treevec2,(void*) &data2);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+
+   // check if there is any meta-information; if yes, make a copy of it.
+
+
+
+   /* check whether this is a leaf */
+   if (data1->nsubtrees==0){
+      if (data2->nsubtrees!=0) return CTA_TREEVECTORS_NOT_COMPATIBLE;
+
+      /* copy vector */
+      if (data1->v) {
+         if (!data2->v) return CTA_TREEVECTORS_NOT_COMPATIBLE;
+         retval=CTA_Vector_Copy(*data1->v, *data2->v);
+         if (retval!=CTA_OK) {
+            CTA_WRITE_ERROR("Cannot copy vector");
+            return retval;
+         }
+      }
+
+      /* copy metainfo of leaf */
+      if (data1->metainfo) {
+        // check if the other state has a metainfo
+        //        if (!data2->metainfo) printf("leaf: data2 has no metainfo \n" );
+        if (!data2->metainfo) return CTA_TREEVECTORS_NOT_COMPATIBLE;
+        // assume that the other metainfo is of the right size and copy
+
+        retval = CTA_Metainfo_Copy(*data1->metainfo,*data2->metainfo);
+      }
+
+
+   }
+   else {    /* we are in a node. Copy the metainfo of the node, then the substates.*/
+
+
+      if (data1->metainfo) {
+        // check if the other state has a metainfo
+        //  if (!data2->metainfo) printf("node: data2 has no metainfo \n" );
+        if (!data2->metainfo) return CTA_TREEVECTORS_NOT_COMPATIBLE;
+        // assume that the other metainfo is of the right size and copy
+
+        retval = CTA_Metainfo_Copy(*data1->metainfo,*data2->metainfo);
+      }
+
+
+      /* copy sub-states */
+      if (data1->nsubtrees!=data2->nsubtrees) return CTA_TREEVECTORS_NOT_COMPATIBLE;
+      for (istate=0;istate<data1->nsubtrees;istate++){
+         retval=CTA_TreeVector_Copy(data1->treevecs[istate],data2->treevecs[istate]);
+         if (retval!=CTA_OK) {
+            CTA_WRITE_ERROR("Cannot copy treevector");
+            return retval;
+         }
+      }
+   }
+   return CTA_OK;
+};
+
+
+
+
+#undef METHOD
+#define METHOD "SetConstant"
+int CTA_TreeVector_SetConstant(CTA_TreeVector treevec, void *val,CTA_Datatype datatype){
+
+   int retval;         //Return value of a call
+   CTAI_TreeVec *data;   //Tree-vector specific data
+   int istate; // counter over all substates
+//   char name[CTA_STRLEN_NAME];
+
+   /* check handle and get data object */
+   retval=CTA_Handle_Check((CTA_Handle) treevec,CTA_TREEVECTOR);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_treevector handle");
+       return retval;
+   }
+   retval=CTA_Handle_GetData((CTA_Handle) treevec,(void*) &data);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+
+   /* check whether this is a leaf */
+   if (data->nsubtrees==0){
+
+      /* set constant vector */
+     if (data->v) {
+       retval=CTA_Vector_SetConstant(*data->v,val,datatype);
+       if (retval!=CTA_OK) {
+	      CTA_WRITE_ERROR("Error in CTA_Vector_SetConstant");
+	      return retval;
+       }
+     }
+   }
+   else {
+     /* copy sub-states */
+     for (istate=0;istate<data->nsubtrees;istate++){
+       if (data->nocompute[istate] == FALSE) {
+         retval=CTA_TreeVector_SetConstant(data->treevecs[istate],val,datatype);
+         if (retval!=CTA_OK) {
+	        CTA_WRITE_ERROR("Error in CTA_TreeVector_SetConstant");
+	        return retval;
+         }
+       }
+     }
+   }
+
+   return CTA_OK;
+};
+
+
+#undef METHOD
+#define METHOD "Scal"
+int CTA_TreeVector_Scal(CTA_TreeVector treevec, double val){
+
+   int retval;         //Return value of a call
+   CTAI_TreeVec *data;   //Tree-vector specific data
+   int istate; // counter over all substates
+//   char name[CTA_STRLEN_NAME];
+
+   /* check handle and get data object */
+   retval=CTA_Handle_Check((CTA_Handle) treevec,CTA_TREEVECTOR);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_treevector handle");
+       return retval;
+   }
+   retval=CTA_Handle_GetData((CTA_Handle) treevec,(void*) &data);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+
+   /* check whether this is a leaf */
+   if (data->nsubtrees==0){
+
+      /* set constant vector */
+      if (data->v) {
+         retval=CTA_Vector_Scal(*data->v,val);
+         if (retval!=CTA_OK) {
+	        CTA_WRITE_ERROR("Error in CTA_Vector_Scal");
+	        return retval;
+         }
+      }
+   }
+   else {
+      /* copy sub-states */
+     for (istate=0;istate<data->nsubtrees;istate++){
+       if (data->nocompute[istate] == FALSE) {
+         retval=CTA_TreeVector_Scal(data->treevecs[istate],val);
+         if (retval!=CTA_OK) {
+	        CTA_WRITE_ERROR("Error in CTA_TreeVector_Scal");
+	        return retval;
+         }
+       }
+     }
+   }
+   return CTA_OK;
+};
+
+
+#undef METHOD
+#define METHOD "GetSetVal"
+int CTAI_TreeVec_GetSetVal(CTA_TreeVector treevec1, int i, void *val,
+                         CTA_Datatype datatype, BOOL lget ){
+   int retval;         //Return value of a call
+   CTAI_TreeVec *data1;  //Tree-vector specific data
+   int istate;         // counter over all substates
+   int nstate, offset;
+   char msg[128];
+
+
+   /* check handle and get data object */
+   retval=CTA_Handle_Check((CTA_Handle) treevec1,CTA_TREEVECTOR);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_treevector handle");
+       return retval;
+   }
+   retval=CTA_Handle_GetData((CTA_Handle) treevec1,(void*) &data1);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+
+   /* check whether this is a leaf */
+   if (data1->nsubtrees==0){
+      /* Set value in vector vector */
+      if (data1->v) {
+         if (lget){
+            retval=CTA_Vector_GetVal(*data1->v, i,val, datatype);
+         } else {
+            retval=CTA_Vector_SetVal(*data1->v, i,val, datatype);
+         }
+         if (retval!=CTA_OK) {
+            sprintf(msg, "Error %d in getting or setting element %d vals", retval, i);
+            CTA_WRITE_ERROR(msg);
+	    return retval;
+         }
+      }
+   }
+   else {
+      /* Find corresponding substate */
+     offset=0;
+     for (istate=0;istate<data1->nsubtrees;istate++){
+       if (data1->nocompute[istate] == FALSE) {
+         retval=CTA_TreeVector_GetSize(data1->treevecs[istate], &nstate);
+         if (retval!=CTA_OK) {
+	        CTA_WRITE_ERROR("Cannot get size of TreeVector");
+	        return retval;
+         }
+         if (nstate>=i-offset) {
+           retval=CTAI_TreeVec_GetSetVal(data1->treevecs[istate],i-offset, val,
+                                         datatype, lget);
+           return retval;
+         }
+         offset=offset+nstate;
+       }
+     }
+   }
+   return CTA_OK;
+};
+
+
+int CTA_TreeVector_GetVal(CTA_TreeVector treevec, int i, void *val,CTA_Datatype datatype){
+   return CTAI_TreeVec_GetSetVal(treevec, i, val, datatype, TRUE );
+};
+
+int CTA_TreeVector_SetVal(CTA_TreeVector treevec, int i, void *val,CTA_Datatype datatype){
+   return CTAI_TreeVec_GetSetVal(treevec, i, val, datatype, FALSE );
+};
+
+
+
+
+#undef METHOD
+#define METHOD "SetVals"
+int CTA_TreeVector_SetVals(CTA_TreeVector treevec, void *val,int nval, CTA_Datatype datatype){
+// DIT IS EEN ERG KORT DOOR DE BOCHT VERSIE'
+   CTA_Vector vwork;
+   int retval;
+
+   retval=CTA_Vector_Create(CTA_DEFAULT_VECTOR, nval, datatype, CTA_NULL, &vwork);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Cannot create vector");
+       return retval;
+   }
+   retval=CTA_Vector_SetVals(vwork, val, nval, datatype);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Cannot set values in vector");
+       return retval;
+   }
+   retval=CTA_TreeVector_SetVec(treevec,vwork);
+   if (retval!=CTA_OK)  {
+       CTA_WRITE_ERROR("Cannot set vector in vectortree");
+       return retval;
+   }
+   retval=CTA_Vector_Free(&vwork);
+   if (retval!=CTA_OK)  {
+       CTA_WRITE_ERROR("Cannot free vector");
+       return retval;
+   }
+
+   return CTA_OK;
+};
+
+#undef METHOD
+#define METHOD "GetVals"
+int CTA_TreeVector_GetVals(CTA_TreeVector treevec, void *val,int nval,CTA_Datatype datatype){
+// DIT IS EEN ERG KORT DOOR DE BOCHT VERSIE'
+   CTA_Vector vwork;
+   int retval;
+   char msg[128];
+
+   retval=CTA_Vector_Create(CTA_DEFAULT_VECTOR, nval, datatype, CTA_NULL, &vwork);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_treevector handle");
+       return retval;
+   }
+
+   retval=CTA_TreeVector_GetVec(treevec,vwork);
+   if (retval!=CTA_OK) {
+       CTA_Vector_Free(&vwork);
+       sprintf(msg,"Cannot get vector from treevector (length=%d) error code is %d", nval,retval);
+       CTA_WRITE_ERROR(msg);
+       return retval;
+   }
+
+   retval=CTA_Vector_GetVals(vwork, val, nval, datatype);
+   if (retval!=CTA_OK) {
+       CTA_Vector_Free(&vwork);
+       CTA_WRITE_ERROR("Cannot get values from vector");
+       return retval;
+   }
+
+   retval=CTA_Vector_Free(&vwork);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Cannot free vector");
+
+
+       return retval;
+   }
+   return CTA_OK;
+};
+
+/* ----------------------------------------------------------------- */
+
+#undef METHOD
+#define METHOD "Axpy"
+int CTA_TreeVector_Axpy(CTA_TreeVector y, double alpha, CTA_TreeVector x){
+// DIT IS EEN ERG KORT DOOR DE BOCHT GEDEELTELIJK INGEKLAPTE '
+// SUBSTATES MOETEN OOK ONDERSTEUND WORDEN !!!!
+
+
+   int retval, retvalA;         //Return value of a call
+   CTAI_TreeVec *datax, *datay;   //Tree-vector specific data
+   int istate, jstate; // counter over all substates
+   CTA_Metainfo minfox, minfoy;
+   CTAI_Gridm hgridx, hgridy;
+   char tagx[CTA_STRLEN_TAG], tagy[CTA_STRLEN_TAG];
+   char xbelongsto[CTA_STRLEN_TAG], ybelongsto[CTA_STRLEN_TAG];
+   CTA_TreeVector substate_x, substate_y;
+   CTA_Datatype datatype;
+   CTA_Vector vecy;
+   int nx,ny, ierr;
+
+   if (IDEBUG) {
+      printf("Start of CTA_TreeVector_Axpy(%d,%f,%d)\n",y,alpha,x);
+   }
+   //
+   //
+   // printf("CTA_TreeVector_Axpy: y=\n");
+   // retval=CTA_TreeVector_Export(y,CTA_FILE_STDOUT);
+
+   /* check handle and get data object */
+   retval=CTA_Handle_Check((CTA_Handle) x,CTA_TREEVECTOR);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Argument x must be a CTA_TreeVector");
+       return retval;
+   }
+
+   retval=CTA_Handle_GetData((CTA_Handle) x,(void*) &datax);
+   if (retval!=CTA_OK)
+      {   printf("ERROR in CTA_TreeVector_Axpy: ");
+          printf("Handle-getdata of argument x fails\n");
+          return retval;
+      }
+   retval=CTA_Handle_Check((CTA_Handle) y,CTA_TREEVECTOR);
+   if (retval!=CTA_OK) {
+      char message[1024];
+	  sprintf(message,"Argument y (handle=%d) must be a CTA_TreeVector\n",y);
+	  CTA_WRITE_ERROR(message);
+      return retval;
+   }
+
+   retval=CTA_Handle_GetData((CTA_Handle) y,(void*) &datay);
+   if (retval!=CTA_OK) {
+      CTA_WRITE_ERROR("Handle-getdata of argument y fails");
+      return retval;
+   }
+
+/* FIRST RULE: if x is flat, y not flat, then if size(x)==size(y) do a crude axpy with one datatype only
+  This is handy for steady-state filters, when KG has an unknown tree structure */
+   retval=CTA_TreeVector_GetSize(x,&nx);
+   retval=CTA_TreeVector_GetSize(y,&ny);
+   if (datax->nsubtrees==0 && datay->nsubtrees > 0 && nx == ny){
+	  retval=CTA_Vector_GetDatatype(*datax->v,&datatype);  // usually: cta_double
+      retval=CTA_Vector_Create(CTA_DEFAULT_VECTOR, ny, datatype, CTA_NULL, &vecy);
+      retval=CTA_TreeVector_GetVec(y,vecy);
+      retval=CTA_Vector_Axpy(vecy, alpha,*datax->v);
+      retval=CTA_TreeVector_SetVec(y,vecy);
+	  retval=CTA_Vector_Free(&vecy);
+	  return retval;
+   }
+/* END OF first rule ; now exploring other possibilities. */
+
+
+   /* decide what to do according to metainfo */
+   if (!datax->metainfo || !datay->metainfo ) {
+   if (IDEBUG) {
+     printf("CTA_TreeVector_Axpy: NO meta info in one (or both) of the treevectors %s and %s \n",
+                  datax->tag, datay->tag);
+   }
+     /* check whether this is a leaf */
+     if (datax->nsubtrees==0){
+
+	   if (datay->nsubtrees!=0) {
+             char message[1024];
+	         sprintf(message,"Argument y (size %d, handle=%d) contains subtrees while x (size %d) is a leaf.\n",ny,y,nx);
+	         CTA_WRITE_ERROR(message);			 
+			 return CTA_TREEVECTORS_NOT_COMPATIBLE;
+		 }
+
+       /* axpy operation for vector */
+       if (datax->v) {
+         if (!datay->v) return CTA_TREEVECTORS_NOT_COMPATIBLE;
+         retval=CTA_Vector_Axpy(*datay->v, alpha,*datax->v);
+         if (retval!=CTA_OK) {
+            CTA_WRITE_ERROR("Error in CTA_Vector_Axpy");
+            return retval;
+         }
+       }
+     }
+     else {
+       /* axpy operation for sub-states */
+       if (datax->nsubtrees!=datay->nsubtrees){
+           char message[1024];
+	       sprintf(message,"arguments y and x have different number of subtrees \n");
+	       CTA_WRITE_ERROR(message);
+		   return CTA_TREEVECTORS_NOT_COMPATIBLE;
+       }
+       for (istate=0;istate<datax->nsubtrees;istate++){
+         if (datax->nocompute[istate] == FALSE) {
+           retval=CTA_TreeVector_Axpy(datay->treevecs[istate], alpha,datax->treevecs[istate]);
+           if (retval!=CTA_OK) {
+              CTA_WRITE_ERROR("Error in CTA_TreeVector_Axpy");
+              return retval;
+           }
+         }
+       }
+     }
+     return CTA_OK;
+   }
+   else {
+      if (IDEBUG) {
+         printf("CTA_TreeVector_Axpy meta info available for both ");
+         printf("treevectors %s and %s \n",datax->tag, datay->tag);
+               retval=CTA_TreeVector_GetSize(x,&nx);
+               retval=CTA_TreeVector_GetSize(y,&ny);
+               printf("sizes: x %d y %d \n",nx,ny);
+      }
+
+     /* ---------- metainfo exists for both x and y --------------  */
+
+     retvalA = CTA_OK;
+     retval=CTA_Metainfo_Create(&minfox);   retval=CTA_Metainfo_Create(&minfoy);
+
+     retval = CTA_Metainfo_Copy(*datax->metainfo, minfox);
+     retval = CTA_Metainfo_Copy(*datay->metainfo, minfoy);
+     retval = CTA_Metainfo_GetGrid(minfox, &hgridx);
+     retval = CTA_Metainfo_GetGrid(minfoy, &hgridy);
+     retval = CTA_Metainfo_GetTag(minfox, tagx);
+     retval = CTA_Metainfo_GetTag(minfoy, tagy);
+     retval = CTA_Metainfo_GetBelongsTo(minfox, xbelongsto);
+     retval = CTA_Metainfo_GetBelongsTo(minfoy, ybelongsto);
+
+     /* -------------------------- */
+     /* search for equal tags        */
+
+     if (IDEBUG) {
+        printf("CTA_TreeVector_Axpy: tagx=%s,tagy=%s\n",tagx, tagy);
+     }
+
+     if (0==strcmp(tagx, tagy)) { // y and x have same tag ; finished scanning!
+
+     if (IDEBUG) {
+        printf("CTA_TreeVector_Axpy: equal tags; now calling ctai_axpy_grid\n");
+     }
+
+       retvalA = CTAI_axpy_grid(y,hgridy,alpha,x,hgridx);
+	   if (retvalA) {
+		  char message[1024];
+		  sprintf(message," ERROR cta_treevector_axpy: code %d  \n",retval);
+	      CTA_WRITE_ERROR(message);
+	   }
+
+     }
+     else {
+
+       /* check if tag of y equals tag of substate of x */
+
+       if (datax->nsubtrees != 0) {
+         for (istate=0; istate<datax->nsubtrees;istate++){
+           if (datax->nocompute[istate] == FALSE) {
+             substate_x= datax->treevecs[istate];
+             retval = CTA_TreeVector_Axpy(y, alpha, substate_x);
+           }
+           // retvalA = max(retvalA, retval);
+         }
+       }  // loop along substates of x
+
+
+       /* check if tag of substate of y equals tag of x */
+       if (datay->nsubtrees!=0) {
+
+         for (jstate=0; jstate<datay->nsubtrees;jstate++){
+           if (datay->nocompute[jstate] == FALSE) {
+             substate_y= datay->treevecs[jstate];
+             retval = CTA_TreeVector_Axpy(substate_y, alpha, x);
+           }
+           // retvalA = max(retvalA, retval);
+         }
+       }
+     }
+     /* free variables */
+     ierr = CTA_Metainfo_Free(&minfox);
+     if (IDEBUG>0) {printf("end of ctai_treevector_axpy: free metainfo %d\n",ierr);}
+     ierr = CTA_Metainfo_Free(&minfoy);
+
+
+
+     return retvalA;
+   } // else metainfo exists
+
+
+   return CTA_OK;
+};
+
+
+#undef METHOD
+#define METHOD "ElmOp"
+int CTA_TreeVector_ElmOp(CTA_TreeVector y, CTA_TreeVector x, int oper){
+// DIT IS EEN ERG KORT DOOR DE BOCHT GEDEELTELIJK INGEKLAPTE '
+// SUBSTATES MOETEN OOK ONDERSTEUND WORDEN !!!!
+
+
+   int retval;                    //Return value of a call
+   CTAI_TreeVec *datax, *datay;   //Tree-vector specific data
+   int istate; // counter over all substates
+   BOOL noInterpolation;
+
+   /* check handle and get data object */
+   retval=CTA_Handle_Check((CTA_Handle) x,CTA_TREEVECTOR);
+   if (retval!=CTA_OK) {
+       char message[1024];
+	   sprintf(message, "ERROR in CTA_TreeVector_Axpy: argument x must be a CTA_TreeVector");
+	   CTA_WRITE_ERROR(message);
+       return retval;
+   }
+
+   retval=CTA_Handle_GetData((CTA_Handle) x,(void*) &datax);
+   if (retval!=CTA_OK) {
+      char message[1024];
+	  sprintf(message, "ERROR in CTA_TreeVector_Axpy: Handle-getdata of argument x fails");
+      CTA_WRITE_ERROR(message);
+	  return retval;
+   }
+   retval=CTA_Handle_Check((CTA_Handle) y,CTA_TREEVECTOR);
+   if (retval!=CTA_OK){
+	   char message[1024];
+	   sprintf(message, "ERROR in CTA_TreeVector_Axpy: argument y must be a CTA_TreeVector");
+       CTA_WRITE_ERROR(message);
+       return retval;
+   }
+
+   retval=CTA_Handle_GetData((CTA_Handle) y,(void*) &datay);
+   if (retval!=CTA_OK) {
+      char message[1024];
+	  sprintf(message, "ERROR in CTA_TreeVector_Axpy: Handle-getdata of argument y fails");
+	  CTA_WRITE_ERROR(message);
+      return retval;
+   }
+
+   /* decide what to do according to metainfo */
+   /* Note :if only one has meta info just continue without interpolation */
+   /* If both meta info's are the same we can continue as well without interpolation */
+
+   noInterpolation=TRUE;
+   /* If both have meta info check whether we need interpolation */
+   if (datax->metainfo && datay->metainfo){
+      if (CTA_Metainfo_IsEqual(*(datax->metainfo),
+                               *(datay->metainfo))==CTA_TRUE){
+         noInterpolation=FALSE;
+      }
+   }
+
+   if (noInterpolation) {
+
+     /* check whether this is a leaf */
+     if (datax->nsubtrees==0){
+       if (datay->nsubtrees!=0) {
+          printf("Error: Vector x is a leaf but vector y has %d children\n",datay->nsubtrees);
+          return CTA_TREEVECTORS_NOT_COMPATIBLE; // hoeft niet!
+       }
+
+       /* axpy operation for vector */
+       if (datax->v) {
+         if (!datay->v) {
+            printf("Error: Vector x is allocated (leaf) but vector v not\n");
+            return CTA_TREEVECTORS_NOT_COMPATIBLE;
+         }
+         if (oper==ELMDIV){
+            retval=CTA_Vector_ElmDiv(*datay->v, *datax->v);
+         } else {
+            retval=CTA_Vector_ElmProd(*datay->v, *datax->v);
+         }
+         if (retval!=CTA_OK) {
+            CTA_WRITE_ERROR("Error in CTA_Vector_ElmDiv or CTA_VEctor_ElmProd");
+            return retval;
+         }
+       }
+     }
+     else {
+       /* axpy operation for sub-states */
+       if (datax->nsubtrees!=datay->nsubtrees) {
+          printf("Error: Number of sub-trees is not the same x(%d) y(%d)\n",
+                 datax->nsubtrees,datay->nsubtrees);
+          return CTA_TREEVECTORS_NOT_COMPATIBLE;
+       }
+       for (istate=0;istate<datax->nsubtrees;istate++){
+         if (datax->nocompute[istate] == FALSE) {
+           retval=CTA_TreeVector_ElmOp(datay->treevecs[istate], datax->treevecs[istate], oper);
+           if (retval!=CTA_OK) {
+             CTA_WRITE_ERROR("Error in CTA_TreeVector_ElmOp");
+             return retval;
+           }
+         }
+       }
+     }
+     return CTA_OK;
+   }
+   else {
+      printf("Error: CTA_TreeVector_ElmDiv with meta info is not supported\n");
+      return CTA_TREEVECTORS_NOT_COMPATIBLE;
+   } // else metainfo exists
+
+
+   return CTA_OK;
+};
+
+
+int CTA_TreeVector_ElmDiv(CTA_TreeVector y, CTA_TreeVector x){
+   return CTA_TreeVector_ElmOp(y, x, ELMDIV);
+
+};
+
+
+int CTA_TreeVector_ElmProd(CTA_TreeVector y, CTA_TreeVector x){
+   return CTA_TreeVector_ElmOp(y, x, ELMPROD);
+};
+
+
+/* ------------------------------------------------------------*/
+
+
+#undef METHOD
+#define METHOD "Dot"
+int CTA_TreeVector_Dot(CTA_TreeVector treevec1, CTA_TreeVector treevec2, double *dotprod){
+// DIT IS EEN ERG KORT DOOR DE BOCHT GEDEELTELIJK INGEKLAPTE '
+// SUBSTATES MOETEN OOK ONDERSTEUND WORDEN !!!!
+
+
+   int retval;         //Return value of a call
+   CTAI_TreeVec *data1, *data2;   //Tree-vector specific data
+   int istate; // counter over all substates
+//   char name[CTA_STRLEN_NAME];
+   double dotwrk; //dot product of substate
+
+
+   /* check handle and get data object */
+   retval=CTA_Handle_Check((CTA_Handle) treevec1,CTA_TREEVECTOR);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_treevector handle");
+       return retval;
+   }
+   retval=CTA_Handle_GetData((CTA_Handle) treevec1,(void*) &data1);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+   retval=CTA_Handle_Check((CTA_Handle) treevec2,CTA_TREEVECTOR);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_treevector handle");
+       return retval;
+   }
+   retval=CTA_Handle_GetData((CTA_Handle) treevec2,(void*) &data2);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+   /* check whether this is a leaf */
+   if (data1->nsubtrees==0){
+      if (data2->nsubtrees!=0) return CTA_TREEVECTORS_NOT_COMPATIBLE;
+
+      /* dot vector */
+      if (data1->v) {
+         if (!data2->v) return CTA_TREEVECTORS_NOT_COMPATIBLE;
+         retval=CTA_Vector_Dot(*data1->v, *data2->v,dotprod);
+         if (retval!=CTA_OK) {
+            CTA_WRITE_ERROR("Error in using CTA_Vector_Dot");
+            return retval;
+         }
+      }
+   }
+   else {
+      /* dot sub-states */
+      *dotprod=0.0;
+      if (data1->nsubtrees!=data2->nsubtrees) return CTA_TREEVECTORS_NOT_COMPATIBLE;
+      for (istate=0;istate<data1->nsubtrees;istate++){
+        if (data1->nocompute[istate] == FALSE) {
+          retval=CTA_TreeVector_Dot(data1->treevecs[istate],data2->treevecs[istate],
+                                    &dotwrk);
+          if (retval!=CTA_OK) {
+             CTA_WRITE_ERROR("Error in using CTA_TreeVector_Dot");
+             return retval;
+          }
+          *dotprod=*dotprod+dotwrk;
+        }
+      }
+   }
+   return CTA_OK;
+};
+
+#undef METHOD
+#define METHOD "Nrm2"
+int CTA_TreeVector_Nrm2(CTA_TreeVector treevec1, double *nrm2){
+
+   int retval;         //Return value of a call
+   CTAI_TreeVec *data1;  //Tree-vector specific data
+   int istate;         // counter over all substates
+   double nrmwrk; //dot product of substate
+
+
+   /* check handle and get data object */
+   retval=CTA_Handle_Check((CTA_Handle) treevec1,CTA_TREEVECTOR);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_treevector handle");
+       return retval;
+   }
+   retval=CTA_Handle_GetData((CTA_Handle) treevec1,(void*) &data1);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+
+   /* check whether this is a leaf */
+   if (data1->nsubtrees==0){
+      /* dot vector */
+      if (data1->v) {
+         retval=CTA_Vector_Nrm2(*data1->v,nrm2);
+         if (retval!=CTA_OK) {
+	        CTA_WRITE_ERROR("Error in using CTA_Vector_Nrm2");
+	        return retval;
+         }
+      }
+   }
+   else {
+      /* dot sub-states */
+      *nrm2=0.0;
+      for (istate=0;istate<data1->nsubtrees;istate++){
+        if (data1->nocompute[istate] == FALSE) {
+          retval=CTA_TreeVector_Nrm2(data1->treevecs[istate],&nrmwrk);
+          if (retval!=CTA_OK) {
+	         CTA_WRITE_ERROR("Error in using CTA_TreeVector_Nrm2");
+	         return retval;
+          }
+          *nrm2=*nrm2+(nrmwrk*nrmwrk);
+        }
+      }
+      *nrm2=sqrt(*nrm2);
+   }
+   return CTA_OK;
+};
+
+
+#undef METHOD
+#define METHOD "GetSize"
+int CTA_TreeVector_GetSize(CTA_TreeVector treevec, int *n){
+   int retval;         //Return value of a call
+   CTAI_TreeVec *data;   //Tree-vector specific data
+   int nsub;
+   int istate;
+
+   /* check handle and get data object */
+   retval=CTA_Handle_Check((CTA_Handle) treevec,CTA_TREEVECTOR);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_treevector handle");
+       return retval;
+   }
+
+   retval=CTA_Handle_GetData((CTA_Handle) treevec,(void*) &data);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+
+   // We are in a leaf
+   if (data->nsubtrees==0){
+      if (data->v){
+         retval=CTA_Vector_GetSize(*data->v,n);
+         if (retval) {
+	        CTA_WRITE_ERROR("Error while using CTA_Vector_GetSize");
+	        return retval;
+         }
+      }
+      else {
+         return CTA_UNINITIALISED_SUBTREEVECTOR;
+      }
+   } else {
+      *n=0;
+      for (istate=0;istate<data->nsubtrees;istate++){
+        if (data->nocompute[istate] == FALSE) {
+          retval=CTA_TreeVector_GetSize(data->treevecs[istate], &nsub);
+          if (retval) {
+	         CTA_WRITE_ERROR("Error while using CTA_TreeVector_GetSize");
+	         return retval;
+          }
+          *n=*n+nsub;
+        }
+      }
+
+   }
+   return CTA_OK;
+}
+
+// This is a try-out function that writes a tree-vector in
+// a NETCDF file
+
+char *CTAI_AddNameToPath(const char *path,const char *name, const char *sep){
+     size_t len1, len2, len3;
+     char *retStr;
+
+     len1=strlen(path);
+     len2=strlen(name);
+     len3=strlen(sep);
+     retStr=CTA_Malloc(sizeof(char)*(len1+len2+len3+1));
+
+      strcpy(retStr,path);
+      strcat(retStr,name);
+      strcat(retStr,sep);
+
+      return retStr;
+}
+
+//-------------------------------------------
+// small function for string manipulation 
+int replace_char_of_string(char *s, char old, char new){
+
+   char *p = s;
+
+   while(*p){
+      if(*p == old)
+      *p = new;
+      ++p;
+   }
+   return CTA_OK;
+}
+//--------------------------------------------
+
+int CTAI_CTA_TreeVector_ToNetCDF(CTA_TreeVector treeVec, char *path, int ncid, int timeDimid, BOOL append, int ntime, BOOL isdefvar){
+#if HAVE_LIBNETCDF
+
+#define ERR(e) {printf("Error: %s\n", nc_strerror(e)); exit(2);}
+
+   int nSize; //Size of the whole Tree-vector
+   int retval;          //Return value of a call
+   int varid;
+   void *values;
+   int dimids[5];
+   int dimidsRev[5];
+
+   char *nameDim1, *nameDim2, *nameDim3, *nameDim4;
+   char *nameVar;
+   char *nameVar_;
+   char *extendedPath;
+   char nameGrid[20];
+   char tvtag[CTA_STRLEN_NAME];
+   char modtag[CTA_STRLEN_NAME];
+   
+   /* Grid information */
+   int nx;
+   int ny;
+   int nz;
+   int nquant;
+   double x_origin;
+   double y_origin;
+   double z_origin;
+   double dx;
+   double dy;
+   double dz;
+
+   CTA_Datatype datatype;
+   CTA_Metainfo meta;
+   CTA_Vector v;
+   CTAI_TreeVec *data1;   //Tree-vector specific data
+   int istate;          // counter over all substates
+   int ndims[4];
+   int numDims;
+   int idim;
+   int i;
+   int size_type;
+   int locIDEBUG = 0;
+  
+   size_t *start;
+   size_t *count;
+
+   if (IDEBUG>0) {printf("start treevector_netcdf_export; append, isdefvar %d %d\n",append,isdefvar);}
+
+   nameDim1 = NULL;
+   nameDim2 = NULL;
+   nameDim3 = NULL;
+   nameDim4 = NULL;
+   nameVar_ = NULL;
+   nameVar  = NULL;
+
+   /* check handle and get data object */
+   retval=CTA_Handle_Check((CTA_Handle) treeVec,CTA_TREEVECTOR);
+   if (retval!=CTA_OK) return retval;
+   retval=CTA_Handle_GetData((CTA_Handle) treeVec,(void*) &data1);
+   if (retval!=CTA_OK) return retval;
+
+   /* if we are in a leaf */
+   if (data1->nsubtrees==0){
+      /* export the treevector to netCDF */
+      if (data1->v){
+         v=*data1->v;
+
+         /* Get the size of the vector */
+         retval=CTA_Vector_GetSize(v,&nSize);
+         if (IDEBUG>0) {printf("nSize=%d %d\n",nSize,retval);}
+         /* also get the datatype */
+         retval=CTA_Vector_GetDatatype(v,&datatype);
+
+         /* if we have meta information */
+         if (data1->metainfo){
+            meta=*data1->metainfo;
+            retval=CTA_Metainfo_getRegGrid(meta, nameGrid, &nx, &ny, &nz, &x_origin, &y_origin, &z_origin, &dx, &dy, &dz);
+            ndims[0]=nx;
+            ndims[1]=ny;
+            ndims[2]=nz;
+
+            if (nx*ny*nz == nSize) {numDims=3;}
+
+            if (nz<=1) numDims=2;
+            if (ny<=1) numDims=1;
+
+            if (numDims == 3  && nx*ny*nz < nSize) {
+              nquant = nSize / (nx*ny*nz) ;
+              if (nquant*nx*ny*nz != nSize) {
+                printf("Error: grid dimension and state dimension do not match %d %d %d %d %d \n",nx,ny,nz,nSize,nquant);
+                return CTA_DIMENSION_ERROR;
+              }
+
+              if (locIDEBUG>0) {printf("State is larger than product of grid dimensions. (%d %d %d) \n"
+                "We assume the state can be divided into %d separate quantities \n",nx,ny,nz,nquant);}
+              numDims=4;
+              ndims[3]=nquant;
+
+            }
+         } else {
+           if (locIDEBUG>0) {printf("We do not have meta information; so we  assume  1D array of size %d\n",nSize);}
+            numDims=1;
+            ndims[0]=nSize;
+         }
+
+         /* Now write the vector to netcdf */
+
+         /* First construct the names of the dimensions */
+
+         /* for safety reasons, underscores in tags are replaced by '-'
+            since underscores play a dividing role in importing netcdf from scratch
+            */
+         strcpy(modtag,data1->tag);
+         replace_char_of_string(modtag,'_','-');
+         
+         nameVar_=CTAI_AddNameToPath(path,modtag,"_");
+         nameDim1=CTAI_AddNameToPath(nameVar_,"x","");
+         nameDim2=CTAI_AddNameToPath(nameVar_,"y","");
+         nameDim3=CTAI_AddNameToPath(nameVar_,"z","");
+         nameDim4=CTAI_AddNameToPath(nameVar_,"n","");
+         nameVar=CTAI_AddNameToPath(path,modtag,"");
+
+         if (append || !isdefvar ){
+            /* appending a state-vector; we need to get the dimensions from the file */
+            /* and the data array */
+
+            dimids[0]=timeDimid;
+            if (numDims>=1){
+               if ((retval = nc_inq_dimid(ncid, nameDim1, &dimids[1]))) ERR(retval);
+            }
+            if (numDims>=2){
+               if ((retval = nc_inq_dimid(ncid, nameDim2, &dimids[2]))) ERR(retval);
+            }
+            if (numDims>=3){
+               if ((retval = nc_inq_dimid(ncid, nameDim3, &dimids[3]))) ERR(retval);
+            }
+            if (numDims>=4){
+               if ((retval = nc_inq_dimid(ncid, nameDim4, &dimids[4]))) ERR(retval);
+            }
+            if ((retval = nc_inq_varid(ncid, nameVar,  &varid    ))) ERR(retval);
+         }
+         else {
+           if (locIDEBUG>0) {printf("%s: netcdf dimensions are defined: %d \n",data1->tag, numDims);}
+         
+
+         /* defining a tree-vector so we have to define the dimensions y*/
+            retval = nc_redef(ncid);
+
+            dimids[0]=timeDimid;
+        if (locIDEBUG>0) {printf(" r1977 dimids[0] %d  namedim1 %s\n",timeDimid, nameDim1);}
+         
+
+            if (numDims>=1){
+               if ((retval = nc_def_dim(ncid, nameDim1, ndims[0], &dimids[1]))) ERR(retval);
+            }
+            if (numDims>=2){
+               if ((retval = nc_def_dim(ncid, nameDim2, ndims[1], &dimids[2]))) ERR(retval);
+            }
+            if (numDims>=3){
+               if ((retval = nc_def_dim(ncid, nameDim3, ndims[2], &dimids[3]))) ERR(retval);
+            }
+            if (numDims>=4){
+               if ((retval = nc_def_dim(ncid, nameDim4, ndims[3], &dimids[4]))) ERR(retval);
+            }
+         }
+
+         /* Reverse all dimensions since NETCDF accesses arrays in the C way */
+         dimidsRev[0]=dimids[0];
+         for (idim=0;idim<numDims;idim++){
+            dimidsRev[idim+1]=dimids[numDims-idim];
+         }
+        //          for (idim=0;idim<=numDims;idim++){
+        //         printf("dimid[%d]=%d  size dim1 %d \n",idim,dimids[idim], ndims[0]);
+        // }
+        // for (idim=0;idim<=numDims;idim++){
+         //        printf("dimidRev[%d]=%d\n",idim,dimidsRev[idim]);
+        // }
+         if (locIDEBUG>0) {printf("netcdf dimensions have been reversed \n");}
+
+         if (!append){
+
+           if (isdefvar) {
+
+             if (locIDEBUG>0) {printf("ctai_cta_exporttonetcdf: defining new variable %s \n",nameVar);}
+
+             /* Define the array */
+               if (datatype == CTA_REAL) {
+			     if ((retval = nc_def_var(ncid, nameVar, NC_FLOAT, numDims+1, dimidsRev, &varid))) ERR(retval);
+			   } else if (datatype == CTA_INTEGER) {
+                 if ((retval = nc_def_var(ncid, nameVar, NC_INT, numDims+1, dimidsRev, &varid))) ERR(retval);
+
+			   } else {  //default: double
+                 if ((retval = nc_def_var(ncid, nameVar, NC_DOUBLE, numDims+1, dimidsRev, &varid))) ERR(retval);
+
+			   }
+		   }
+         }
+         free(nameDim1);
+         free(nameDim2);
+         free(nameDim3);
+         free(nameDim4);
+         free(nameVar_);
+         free(nameVar);
+
+         if (!isdefvar) { // write the data itself
+
+         /* Get the values of the vector */
+         /* Note: we will export all values as float for space considerations.
+            Output files are mostly meant for visualisation purposes, so floats will be accurate enough. */
+         /* NO! for non-visualisation purposes, this gives problems! */
+
+           retval=CTA_SizeOf(datatype,&size_type);
+
+           values=CTA_Malloc(nSize * size_type);
+
+           retval=CTA_TreeVector_GetTag(treeVec, tvtag);
+		   if (datatype != CTA_STRING) {
+             retval = CTA_Vector_GetVals(v,values,nSize, datatype);
+             if (retval !=CTA_OK) {
+			   printf("cta_treevector.c: export netcdf: getting values of %s failed %d \n",&tvtag[0],retval);
+		     }
+		   }
+
+           // At what level must we add our new values???
+           count = CTA_Malloc((numDims+1)*sizeof(size_t));
+           start = CTA_Malloc((numDims+1)*sizeof(size_t));
+           count[0] = 1;
+
+           //reverse dimensions as well
+           for (idim=0;idim<numDims;idim++){
+             count[idim+1]=ndims[numDims-idim-1];
+           }
+
+           start[0] = ntime-1;
+           for (i=1; i<=numDims;i++){start[i] = 0;}
+
+	       if (datatype == CTA_REAL) {
+              if ((retval = nc_put_vara_float(ncid, varid, start, count, values))) ERR(retval);
+    	   } else  if (datatype == CTA_INTEGER) {
+           if ((retval = nc_put_vara_int(ncid, varid, start, count, values))) ERR(retval);
+    	   } else if (datatype == CTA_DOUBLE){
+            if ((retval = nc_put_vara_double(ncid, varid, start, count, values))) ERR(retval);
+		   } else { //do nothing
+		   }
+
+           free(values);
+           free(count);free(start);
+         }
+      }
+   }
+   else {
+
+     if (IDEBUG>0) { printf("RECURSIVELY calling ctai_cta_treevectortonetcdf! \n");}
+
+      /* create the path name */
+      strcpy(modtag,data1->tag);
+      replace_char_of_string(modtag,'_','-');
+      extendedPath=CTAI_AddNameToPath(path,modtag,"_");
+
+      /* iterate over all sub-tree-vectors */
+      for (istate=0;istate<data1->nsubtrees;istate++){
+        CTAI_CTA_TreeVector_ToNetCDF(data1->treevecs[istate], extendedPath,
+                   ncid, timeDimid, append, ntime, isdefvar);
+      }
+      free(extendedPath);
+      return CTA_OK;
+   }
+#else
+   printf("CTAI_CTA_TreeVector_ToNetCDF :Version is compiled without NETCDF support. This function should never have been called!!!!\n");
+   exit(-1);
+#endif
+   return CTA_INTERNAL_ERROR;
+}
+
+
+
+/*
+Import a treevector from netcdf. The caller has no idea yet of
+the tree-structure, so treevec is not yet filled/structured (it has just been created, nothing more)
+and all variables are concatenated in one large CTA_DOUBLE vector.
+In the near future, the ctai_treevector_importfromnetcdf will be made smarter
+such that the tree-structure will be created from the netcdf-file.
+*/
+int CTA_TreeVector_VImport(CTA_TreeVector treeVec, CTA_File hfile) {
+#if HAVE_LIBNETCDF
+
+   int retval;          //Return value of a call
+   int ncid;
+   int nvars;
+   int varid;
+   char var_name[NC_MAX_NAME+1];
+   nc_type data_type;
+   int numdims;  //total number of dimensions
+   int ndimsvar ;// number of dimensions for a certain variable
+   int natts, i, ivar;
+   int dimids[5];
+
+   double *values_d;
+   float *values_f;
+   int *values_i;
+   double *values_tot;
+   int nsize, nsize_tot;
+   size_t dimlength;
+   int size_type;      //C- sizeof of data items in hvec
+   int indx;
+   CTA_Datatype datatype; //Data Type of hvec
+   CTA_File_GetNetcdf(hfile,&ncid);
+
+   /* Assume that the treevector has just been created. This routine
+      simply reads each variable in the netcdf-file and places it in one long vector.
+   */
+
+   // first get all dimensions
+   retval =  nc_inq_ndims(ncid, &numdims);
+   /* the dim-ids are simply in the range 0..numdims-1 */
+
+
+   retval = nc_inq_nvars(ncid, &nvars);
+   /* The var-ids are simply in the range 0..nvars-1
+      We skip the first two, since they are ALWAYS (when created here in the native code)
+	  time and ntime.  */
+
+
+   /* first sweep through the netcdf-file to determine the total size */
+   nsize_tot = 0;
+   for (ivar=2; ivar < nvars ; ivar ++) {
+     varid = ivar;
+
+     retval = nc_inq_var(ncid, varid, var_name, &data_type, &ndimsvar, dimids, &natts);
+     if (retval != NC_NOERR ) {
+      // printf("ctai_tv_importfromnetcdf : error nc_inq_var %d :error nr: %d\n",ncid, retval);
+     }
+	 nsize = 1;  //0-th dimension
+	 for (i=0; i < ndimsvar; i++) {
+	     retval = nc_inq_dimlen(ncid, dimids[i], &dimlength);
+		 nsize = nsize * dimlength;
+	 }
+     nsize_tot = nsize_tot + nsize; //incremented length of receiving vector
+   }
+
+   /* create receiving vector. Note: this is ALWAYS a double! */
+    values_tot = CTA_Malloc(nsize_tot * sizeof(double));
+
+  /* second sweep through the netcdf-file; read everything */
+
+   indx = 0;
+   for (ivar=2; ivar < nvars ; ivar ++) {
+     varid = ivar;
+
+     retval = nc_inq_var(ncid, varid, var_name, &data_type, &ndimsvar, dimids, &natts);
+     nsize = 1;  //0-th dimension
+	 for (i=0; i < ndimsvar; i++) {
+	     retval = nc_inq_dimlen(ncid, dimids[i], &dimlength);
+		 nsize = nsize * dimlength;
+	 }
+
+	 if (data_type == NC_DOUBLE) {datatype = CTA_DOUBLE;
+	 } else if (data_type == NC_FLOAT) {datatype = CTA_REAL;
+	 } else if (data_type == NC_INT) {datatype = CTA_INTEGER;
+	 } else { printf("unknown datatype in netcdf file \n");
+		 return CTA_ILLEGAL_DATATYPE;
+	 }
+     retval=CTA_SizeOf(datatype,&size_type);
+
+
+	 values_d=CTA_Malloc(nsize*sizeof(double));
+	 values_f=CTA_Malloc(nsize*sizeof(float));
+	 values_i=CTA_Malloc(nsize*sizeof(int));
+
+     if (datatype == CTA_REAL) {
+	     retval = nc_get_var_float(ncid, varid, values_f);
+	     for (i=0; i< nsize; i++) {
+           values_tot[indx + i] = values_f[i];
+	    }
+	   } else  if (datatype == CTA_INTEGER) {
+	     retval = nc_get_var_int(ncid, varid, values_i);
+	     for (i=0; i< nsize; i++) {
+           values_tot[indx + i] = values_i[i];
+	     }
+
+	   } else if (datatype == CTA_DOUBLE){
+	     retval = nc_get_var_double(ncid, varid, values_d);
+	     for (i=0; i< nsize; i++) {
+          values_tot[indx + i] = values_d[i];
+	     }
+	   }
+
+
+      indx = indx + nsize;
+	  free(values_d);
+	  free(values_i);
+	  free(values_f);
+
+   }
+
+   // set the treevector
+    retval = CTA_TreeVector_SetVals(treeVec, values_tot,nsize_tot, CTA_DOUBLE);
+    free(values_tot);
+
+    return retval;
+
+#else
+   printf("CTAI_TreeVector_VImport :Version is compiled without NETCDF support. This function should never have been called!!!!\n");
+   exit(-1);
+#endif
+};
+
+/* ------------------------------------------------------- */
+
+int CTAI_TreeVector_ImportFromNetCDF(CTA_TreeVector treeVec, CTA_File hfile){
+
+
+#if HAVE_LIBNETCDF
+
+   int retval;          //Return value of a call
+   int ncid;
+   int nvars;
+   int varid;
+   char var_name[NC_MAX_NAME+1];
+   nc_type data_type;
+   int numdims;  //total number of dimensions
+   int ndimsvar ;// number of dimensions for a certain variable
+   int natts, i, ivar;
+   int dimids[5];
+   void *values;
+   CTA_TreeVector hsubstate;
+   char *tag, *tag0;
+   int nsize, tv_size;
+   size_t dimlength;
+   int size_type;      //C- sizeof of data items in hvec
+   int indx;
+   CTA_Datatype datatype; //Data Type of hvec
+   CTA_Vector **hvec; //array (of length 1) with pointer to vector of hsubstate
+
+   tag0 = NULL;
+   CTA_File_GetNetcdf(hfile,&ncid);
+
+   /* Assume that the treevector already contains the desired sub-treevectors. This routine
+      simply reads each variable in the netcdf-file and places it in the proper subtreevector.
+   */
+
+   // first get all dimensions
+   retval =  nc_inq_ndims(ncid, &numdims);
+   /* the dim-ids are simply in the range 0..numdims-1 */
+
+
+   retval = nc_inq_nvars(ncid, &nvars);
+   /* The var-ids are simply in the range 0..nvars-1  */
+
+
+   for (ivar=0; ivar < nvars ; ivar ++) {
+     varid = ivar;
+
+     retval = nc_inq_var(ncid, varid, var_name, &data_type, &ndimsvar, dimids, &natts);
+     if (retval != NC_NOERR ) {
+       //printf("ctai_tv_importfromnetcdf : error nc_inq_var %d :error nr: %d\n",ncid, retval);
+     }
+     nsize = 1;  //0-th dimension
+     for (i=0; i < ndimsvar; i++) {
+       retval = nc_inq_dimlen(ncid, dimids[i], &dimlength);
+       nsize = nsize * dimlength;
+     }
+     /* Now find the subtreevec matching the var-name */
+     // printf("variable : name %s  \n",&var_name);
+
+     tag = CTA_Malloc((strlen(var_name)+1)*sizeof(char));
+     tag0 = strrchr(&var_name[0],'_');
+     if (tag0 != NULL) {
+       tag = tag0 + 1 ;
+     } else {
+       strcpy(tag, "novar");
+       if (nvars == 3) { // simple case: no subtrees, only time,ntime,vec
+         strcpy(tag , &var_name[0]);
+       }
+     }
+
+     //printf("last part of variable name = %s, nvars = %d  \n",tag, nvars);
+
+     retval = CTA_TreeVector_GetSubTreeVec(treeVec,  tag, &hsubstate);
+
+     if (retval == CTA_ITEM_NOT_FOUND) {
+       //printf(".. no subtreevector found; ignoring the  variable %s \n",&var_name[0]);
+     } else {
+       retval = CTA_TreeVector_GetSize(hsubstate, &tv_size);
+       if (tv_size != nsize) {
+         printf("ctai_tv_importfromnetcdf: size subtreevec (%s) of %d does not match netcdf-size %d \n",tag, tv_size,nsize);
+         return CTA_DIMENSION_ERROR;
+       }
+       /* now determine the datatype */
+
+       hvec=CTA_Malloc(sizeof(CTA_Vector*));
+       indx=0;
+       retval=CTAI_TreeVec_GetVecHandles(hsubstate, hvec, &indx);
+       retval=CTA_Vector_GetDatatype(*hvec[0],&datatype);
+       retval=CTA_SizeOf(datatype,&size_type);
+
+       values = CTA_Malloc(tv_size * size_type);
+
+       if (datatype == CTA_REAL) {
+         retval = nc_get_var_float(ncid, varid, values);
+       } else  if (datatype == CTA_INTEGER) {
+         retval = nc_get_var_int(ncid, varid, values);
+       } else if (datatype == CTA_DOUBLE){
+         retval = nc_get_var_double(ncid, varid, values);
+       } else {
+         printf("no valid datatype to import %s from netcdf \n",&var_name[0]);
+         retval  = -1;
+       }
+
+       /* now fill the subtreevector */
+       if (retval == CTA_OK) {
+         retval = CTA_TreeVector_SetVals(hsubstate, values,tv_size, datatype);
+       }
+       retval = CTA_OK;
+       free(values);
+       printf("variable %s  done \n",var_name);
+     }
+   }
+   return retval;
+
+#else
+   printf("CTAI_TreeVector_ImportFromNetCDF :Version is compiled without NETCDF support. This function should never have been called!!!!\n");
+   exit(-1);
+#endif
+
+
+
+};
+
+
+
+int CTAI_TreeVector_ExportToNetCDF(CTA_TreeVector treeVec, CTA_File hfile){
+
+
+#if HAVE_LIBNETCDF
+
+   int retval;          //Return value of a call
+   int timeDimid;
+   int oneDimid;
+   int ncid;
+   BOOL append;
+   BOOL isdefvar;
+   size_t start[1];
+   int one;
+   int timeVarid;
+   int ntimeVarid;
+   int ntime;
+
+   int idebug=0;
+
+   if (idebug>0) {printf("CTAI_TreeVector_ExportToNetCDF: Export treevector to netcdf\n");}
+
+   CTA_File_GetNetcdf(hfile,&ncid);
+
+   /* Check whether the file already contains the time dimension */
+   append=TRUE;
+   retval = nc_inq_dimid(ncid, "time", &timeDimid);
+
+   if (retval != NC_NOERR){
+      if (idebug>0) {printf("CTAI_TreeVector_ExportToNetCDF: First write to this file; ncid: %d\n",ncid);}
+      if (idebug>0) {printf("CTAI_TreeVector_ExportToNetCDF: creating time and ntime variables\n");}
+
+      /* Hmm could not find it so let's create it */
+      if ((retval = nc_def_dim(ncid, "time", NC_UNLIMITED, &timeDimid))) ERR(retval);
+
+      if ((retval = nc_def_dim(ncid, "one", 1, &oneDimid))) ERR(retval);
+
+      
+      append=FALSE;
+
+      /* Define the dimensions and the array*/
+      retval = nc_redef(ncid);
+
+      /* Now create time and counter array */
+      if ((retval = nc_def_var(ncid, "time",  NC_INT, 1, &timeDimid, &timeVarid))) ERR(retval);
+      if ((retval = nc_def_var(ncid, "ntime", NC_INT, 1, &oneDimid,  &ntimeVarid))) ERR(retval);
+
+      retval = nc_enddef(ncid);
+      /* Set first value in time array */
+
+      start[0] = 0;
+      one      = 1;
+      ntime    = 1;
+      if ((retval = nc_put_var1_int   (ncid, timeVarid,  start, &one))) {
+         ERR(retval);
+      }
+      if ((retval = nc_put_var1_int   (ncid, ntimeVarid, start, &ntime))) ERR(retval);
+   }
+
+   /* Call the actual write routine */
+   if ((retval = nc_inq_varid(ncid, "time",  &timeVarid))) ERR(retval);
+   if ((retval = nc_inq_varid(ncid, "ntime", &ntimeVarid))) ERR(retval);
+
+   start[0] = 0;
+   retval = nc_get_var1_int   (ncid, ntimeVarid, start, &ntime);
+
+   if (append) {
+      ntime=ntime+1;
+      start[0] = 0;
+      if ((retval = nc_put_var1_int   (ncid, ntimeVarid, start, &ntime))) ERR(retval);
+      start[0] = ntime-1;
+      if ((retval = nc_put_var1_int   (ncid, timeVarid,  start, &ntime))) ERR(retval);
+   }
+   /* first write only the dimensions, if it is the first time */
+   
+   isdefvar = TRUE;
+   
+   
+   if (idebug>0){printf("NOW 1st NETCDF sweep: writing dimensions\n");}
+   retval=CTAI_CTA_TreeVector_ToNetCDF(treeVec,"",ncid, timeDimid, append, ntime, isdefvar);
+   /* now end the definition, if it is the first time */
+   
+   if (!append){
+     if ((retval = nc_enddef(ncid))) ERR(retval);
+   }
+
+   /* in the second sweep , actually write the data. */
+   isdefvar = FALSE;
+   if (IDEBUG>0) {printf("NOW 2nd NETCDF sweep: writing data\n");}
+   retval=CTAI_CTA_TreeVector_ToNetCDF(treeVec,"",ncid, timeDimid, append, ntime, isdefvar);
+
+   return CTA_OK;
+
+#else
+   printf("CTAI_TreeVector_ExportToNetCDF :Version is compiled without NETCDF support. This function should never have been called!!!!\n");
+   exit(-1);
+#endif
+
+
+
+
+};
+
+
+
+#undef METHOD
+#define METHOD "Export"
+int CTA_TreeVector_Export(CTA_TreeVector treevec1, CTA_Handle usrdata){
+   int retval;          //Return value of a call
+   CTAI_TreeVec *data1;   //Tree-vector specific data
+   CTAI_TreeVec *data2;   //subTree-vector specific data
+   int istate;          // counter over all substates
+   FILE *file;          //File pointer
+   int n;
+   int isnetcdf;
+   int locIDEBUG=0;
+   CTA_Datatype datatype;
+   BOOL excludefv;
+
+   /* check handle and get data object */
+
+   if (IDEBUG>10){printf("\nCTA_TreeVector_Export Start\n");}
+   retval=CTA_Handle_Check((CTA_Handle) treevec1,CTA_TREEVECTOR);
+   if (locIDEBUG>0){printf("\nCTA_TreeVector_Export1 %d\n",retval);}
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_treevector handle");
+       return retval;
+   }
+
+   retval=CTA_Handle_GetData((CTA_Handle) treevec1,(void*) &data1);
+   if (locIDEBUG>0){printf("\nCTA_TreeVector_Export2 %d\n",retval);}
+   if (retval!=CTA_OK) return retval;
+
+   CTA_Message_Quiet(CTA_TRUE);
+   if (CTA_Handle_Check(usrdata,CTA_FILE)==CTA_OK) {
+      CTA_Message_Quiet(CTA_FALSE);
+      CTA_File_IsNetcdf(usrdata,&isnetcdf);
+      if (isnetcdf==CTA_TRUE){
+         if (locIDEBUG>10){printf("\nCTA_TreeVector_Export: NETCDF\n");}
+         return CTAI_TreeVector_ExportToNetCDF(treevec1, usrdata);
+      }
+      else {
+         if (locIDEBUG>10){printf("\nCTA_TreeVector_Export: NOT :NETCDF\n");}
+         retval=CTA_File_Get(usrdata,&file);
+         /* check whether this is a leaf */
+         if (data1->nsubtrees==0){
+            /* print tag of this state */
+            fprintf(file,"%s=",data1->tag);
+            /* Print vector */
+            if (data1->v) {
+               retval=CTA_Vector_Export(*data1->v, usrdata);
+               if (retval!=CTA_OK) {
+	              CTA_WRITE_ERROR("Error using CTA_Vector_Export");
+	              return retval;
+               }
+            }
+            /* print metainfo, if available */
+            if (locIDEBUG>0 || 1 ) {
+               if (data1->metainfo) {
+                 retval=CTA_Metainfo_Export(*data1->metainfo,usrdata);
+                 if (retval!=CTA_OK) {
+	                CTA_WRITE_ERROR("Error using CTA_Metainfo_Export");
+	                return retval;
+                 }
+               }
+            }
+         }
+         else {
+
+           /* first print metainfo of node, if available */
+               if (data1->metainfo) {
+                 if (locIDEBUG>1) {printf("METAINFO node: %p  \n ",data1->metainfo);}
+                 retval=CTA_Metainfo_Export(*data1->metainfo,usrdata);
+                 if (retval!=CTA_OK) {
+	                CTA_WRITE_ERROR("Error using CTA_Metainfo_Export");
+	                return retval;
+                 }
+               }
+
+            /* export sub-states */
+            for (istate=0;istate<data1->nsubtrees;istate++){
+               /* print tag of this state */
+               retval=CTA_TreeVector_Export(data1->treevecs[istate], usrdata);
+               if (retval!=CTA_OK) {
+	              CTA_WRITE_ERROR("Error using CTA_TreeVector_Export");
+	              return retval;
+               }
+
+               /* Get data object of substate (in order to get its name) */
+               retval=CTA_Handle_GetData((CTA_Handle) data1->treevecs[istate],
+                                         (void*) &data2);
+               if (retval!=CTA_OK) {
+	              CTA_WRITE_ERROR("Cannot retrieve handle data");
+	              return retval;
+               }
+               excludefv = data1->nocompute[istate];
+
+               fprintf(file,"%s.%s=%s;",data1->tag,data2->tag,data2->tag);
+               if (excludefv == FALSE) {
+                 fprintf(file,"part %s\n",data2->tag);
+               }
+               else {
+                 fprintf(file,"part %s, EXCLUDEFROMVECTOR\n",data2->tag);
+               }
+               fprintf(file,"\n");
+            }
+         }
+      }
+   }   /* next section: handle export to pack object  */
+     else if (CTA_Handle_Check(usrdata,CTA_PACK)==CTA_OK){
+      CTA_Message_Quiet(CTA_FALSE);
+      if (locIDEBUG>0){printf("\nCTA_TreeVector_Export packing state\n");}
+      retval=CTA_Pack_Add(usrdata,data1,sizeof(CTAI_TreeVec));
+      if (retval!=CTA_OK) {
+	     CTA_WRITE_ERROR("Error using CTA_Pack_Add");
+	     return retval;
+      }
+      /* add metainfo, if available. This can be the case for both node and leaves. */
+      if (data1->metainfo) {
+        //Now pack the metainfo
+        retval=CTA_Metainfo_Export(*data1->metainfo, usrdata);
+        if (retval!=CTA_OK) {
+             printf("cta_treevector_export: metainfo could not be added: %d\n",retval);
+             return retval;
+        }
+      }
+      if (data1->nsubtrees==0) {
+         if (data1->v) {
+           if (locIDEBUG>0){printf("\nCTA_TreeVector_Export packing leaf tag %s \n",data1->tag);}
+           retval=CTA_Vector_GetSize(*data1->v,&n);
+           if (retval!=CTA_OK) {
+	          CTA_WRITE_ERROR("Cannot get size of vector");
+	          return retval;
+           }
+           retval=CTA_Pack_Add(usrdata,&n,sizeof(int));
+           if (retval!=CTA_OK) {
+	          CTA_WRITE_ERROR("Error using CTA_Pack_Add");
+	          return retval;
+           }
+           retval=CTA_Vector_GetDatatype(*data1->v,&datatype);
+           if (retval!=CTA_OK) {
+	          CTA_WRITE_ERROR("Cannot get datatype of vector");
+	          return retval;
+           }
+           retval=CTA_Pack_Add(usrdata,&datatype,sizeof(CTA_Datatype));
+           if (retval!=CTA_OK) {
+	          CTA_WRITE_ERROR("Error using CTA_Pack_Add");
+	          return retval;
+           }
+
+           if (locIDEBUG>0){printf("\nCTA_TreeVector_Export Export vector\n");}
+           retval=CTA_Vector_Export(*data1->v, usrdata);
+           if (retval!=CTA_OK) {
+	          CTA_WRITE_ERROR("Error using CTA_Vector_Export");
+	          return retval;
+           }
+           if (locIDEBUG>0){printf("\nCTA_TreeVector_Export Export done\n");}
+         }
+      } else {
+         if (locIDEBUG>0){printf("\nCTA_TreeVector_Export Packing substates of %s\n",data1->tag);}
+
+           /* add excludevector information  */
+         for (istate=0;istate<data1->nsubtrees;istate++){
+           retval=CTA_Pack_Add(usrdata,&data1->nocompute[istate],sizeof(int));
+           if (retval!=CTA_OK) {
+	          CTA_WRITE_ERROR("Error using CTA_Pack_Add");
+	          return retval;
+           }
+		 }
+
+         for (istate=0;istate<data1->nsubtrees;istate++){
+
+           retval=CTA_TreeVector_Export(data1->treevecs[istate], usrdata);
+           if (retval!=CTA_OK) {
+	          CTA_WRITE_ERROR("Error using CTA_TreeVector_Export");
+	          return retval;
+           }
+         }
+      }
+   } else {
+      CTA_Message_Quiet(CTA_FALSE);
+      return CTA_FORMAT_NOT_SUPPORTED;
+   }
+   if (IDEBUG>0){printf("CTA_TreeVector_Export End\n");}
+   return CTA_OK;
+};
+
+
+#undef METHOD
+#define METHOD "Import"
+int CTA_TreeVector_Import(CTA_TreeVector treevec1, CTA_Handle usrdata){
+   int retval;          //Return value of a call
+   CTAI_TreeVec *data1;   //Tree-vector specific data
+   int istate;          // counter over all substates
+   BOOL packout, isfile;
+
+   CTA_Datatype datatype;
+   int nsubtrees;
+   CTA_Vector *v;
+   CTA_TreeVector  *treevecs;
+   CTA_Metainfo *minfo;
+   BOOL *nocompute;
+   int n;
+   int isnetcdf;
+   char oldtag[CTA_STRLEN_TAG];
+   int locIDEBUG = 0;
+
+
+   if (locIDEBUG>4) {
+      printf("\nCTA_TreeVector_Import: Start\n");
+   }
+   /* check handle and get data object */
+   retval=CTA_Handle_Check((CTA_Handle) treevec1,CTA_TREEVECTOR);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_treevector handle");
+       return retval;
+   }
+   retval=CTA_Handle_GetData((CTA_Handle) treevec1,(void*) &data1);
+   if (retval!=CTA_OK) {
+      CTA_WRITE_ERROR("Cannot retrieve handle data");
+      return retval;
+   }
+
+   CTA_Message_Quiet(CTA_TRUE);
+   packout = (CTA_Handle_Check(usrdata,CTA_PACK)==CTA_OK);
+   CTA_Message_Quiet(CTA_FALSE);
+   if (packout){
+
+      if (locIDEBUG>4) {
+         printf("CTA_TreeVector_Import: importing from pack array %d \n",usrdata );
+      }
+      /* Store some data of current state */
+      nsubtrees=data1->nsubtrees;
+      v=data1->v;
+      treevecs=data1->treevecs;
+      nocompute=data1->nocompute;
+      minfo=data1->metainfo;
+      strcpy(oldtag,data1->tag);
+      /* unpack the administration */
+      retval=CTA_Pack_Get(usrdata,data1,sizeof(CTAI_TreeVec));
+      if (retval!=CTA_OK) {
+	     CTA_WRITE_ERROR("Cannot retrieve handle data");
+	     return retval;
+      }
+
+      if (locIDEBUG>9) {
+         printf("CTA_TreeVector_Import: data->tag=%s while original tag was %s\n ",data1->tag, oldtag);
+         printf("CTA_TreeVector_Import: data->nsubtrees=%d\n",data1->nsubtrees);
+         printf("CTA_TreeVector_Import: data->v      =%p\n",data1->v);
+         printf("CTA_TreeVector_Import: data->metainfo      =%p\n",data1->metainfo);
+      }
+      
+      if (strlen(oldtag)>0 && strcmp(oldtag,"unknown")!=0) { strcpy(data1->tag, oldtag) ;}
+      
+      /* unpack metainfo */
+      if (minfo && !data1->metainfo) { // remove existing meta-info
+        retval = CTA_Metainfo_Free(minfo);
+      };
+
+      if (data1->metainfo) {
+        /* Check first if a metainfo of the receiving treevector already exists.
+             If not, create it first. */
+
+        if (!minfo) {
+          if (locIDEBUG>4) {
+            printf("receiving state has no metainfo yet \n");
+            printf("old minfo: %p,  data1->metainfo: %p\n",minfo,data1->metainfo);
+        }
+          //CTA_Malloc?
+          data1->metainfo = CTA_Malloc(sizeof(CTA_Metainfo));
+          retval=CTA_Metainfo_Create(data1->metainfo);
+
+          if (locIDEBUG>4){printf("... therefore it is created. %d \n",retval);}
+        } else{ //data1->metainfo and minfo
+
+          data1->metainfo = minfo;
+        }
+
+        retval=CTA_Metainfo_Import(*data1->metainfo,usrdata);
+        if (retval!=CTA_OK) {
+             printf("cta_treevector_import: could not get metainfo : %d\n",retval);
+             return retval;
+        }
+      }
+
+
+      /* unpack size and datatype of vector */
+      if (data1->v) {
+         retval=CTA_Pack_Get(usrdata,&n,sizeof(int));
+         retval=CTA_Pack_Get(usrdata,&datatype,sizeof(CTA_Datatype));
+      }
+      /* do both have a vector (leaf) */
+      if (!v && data1->v) {
+         if (locIDEBUG>4) {
+            printf("CTA_TreeVector_Import: this is a leaf\n");
+         }
+         /* we need to create a leaf-vector */
+         data1->v=CTA_Malloc(sizeof(CTA_Vector));
+         /* NOTE SIMPLIFICATION.. CREATING CTA_DEFAULT_VECTOR !! */
+
+         retval=CTA_Vector_Create(CTA_DEFAULT_VECTOR, n, datatype, CTA_NULL, data1->v);
+         if (retval!=CTA_OK) {
+            CTA_WRITE_ERROR("Cannot create Vector");
+            return retval;
+         }
+      } else if (v && !data1->v) {
+         retval=CTA_Vector_Free(v);
+         if (retval!=CTA_OK) {
+            CTA_WRITE_ERROR("Cannot free vector");
+            return retval;
+         }
+      } else {
+         data1->v=v;
+      }
+
+      /* do we have substates */
+      if (nsubtrees==data1->nsubtrees) {
+         data1->treevecs=treevecs;
+         data1->nocompute=nocompute;  /*CVV*/
+
+      } else {
+         /* Deallocate all substates (if there are any) */
+         if (treevecs) {
+            for (istate=0;istate<nsubtrees;istate++){
+               retval=CTA_TreeVector_Free(&treevecs[istate],CTA_TRUE);
+               if (retval!=CTA_OK) {
+                  CTA_WRITE_ERROR("Cannot free TreeVector");
+                  return retval;
+               }
+            }
+            free(treevecs);
+            free(nocompute);
+         }
+         /* create new substates */
+         if (data1->nsubtrees>0) {
+            data1->treevecs=CTA_Malloc(sizeof(CTA_TreeVector)*data1->nsubtrees);
+            data1->nocompute=CTA_Malloc(sizeof(BOOL)*data1->nsubtrees);
+            for (istate=0;istate<data1->nsubtrees;istate++){
+               retval=CTA_TreeVector_Create("import_state", "unknown",
+                                       &data1->treevecs[istate]);
+               data1->nocompute[istate] = FALSE;
+            }
+         }
+      }
+      /* unpack all data */
+      if (data1->nsubtrees>0) {
+         if (locIDEBUG>0) {
+            printf("CTA_TreeVector_Import: this is a node\n");
+         }
+        /* Unpack all substates */
+
+        for (istate=0;istate<data1->nsubtrees;istate++){
+          retval=CTA_Pack_Get(usrdata,&data1->nocompute[istate],sizeof(int));
+          if (retval!=CTA_OK) {
+             CTA_WRITE_ERROR("Error using CTA_Pack_Get");
+             return retval;
+          }
+        }
+
+        for (istate=0;istate<data1->nsubtrees;istate++){
+          retval=CTA_TreeVector_Import(data1->treevecs[istate], usrdata);
+          if (retval!=CTA_OK) {
+             CTA_WRITE_ERROR("Error using CTA_TreeVector_Import");
+             return retval;
+          }
+        }
+      } else if (data1->v) {
+         retval=CTA_Vector_Import(*data1->v,usrdata);
+      }
+   } else {
+     CTA_Message_Quiet(CTA_TRUE);
+     isfile =  (CTA_Handle_Check(usrdata,CTA_FILE)==CTA_OK) ;
+     CTA_Message_Quiet(CTA_FALSE);
+     if (isfile) {
+       CTA_File_IsNetcdf(usrdata,&isnetcdf);
+       if (isnetcdf==CTA_TRUE){
+         return CTAI_TreeVector_ImportFromNetCDF(treevec1, usrdata);
+       } else {
+         return CTA_FORMAT_NOT_SUPPORTED;
+       }
+     } else {
+         return CTA_FORMAT_NOT_SUPPORTED;
+     }
+   }
+   if (locIDEBUG>4) {
+      printf("CTA_TreeVector_Import: End\n");
+   }
+   return CTA_OK;
+};
+
+
+
+#undef METHOD
+#define METHOD "Free"
+int CTA_TreeVector_Free(CTA_TreeVector *treevec, int recursive){
+
+   int retval;         //Return value of a call
+   CTAI_TreeVec *data;   //Tree-vector specific data
+   int istate;         //Counter over substates
+   int refCount;       //Number of referencs to this treevector
+
+
+   /* Check for quick return */
+   if (*treevec==CTA_NULL) return CTA_OK;
+
+   /* Get the reference count */
+   refCount=1;
+   CTA_Handle_GetRefCount(*treevec,&refCount);
+   CTA_Handle_DecrRefCount(*treevec);
+
+   retval=CTA_Handle_GetData((CTA_Handle) *treevec,(void*) &data);
+   if (retval) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+
+   // We are in a leaf
+   if (data->nsubtrees==0){
+
+      if (refCount<=1){
+         if (data->v){
+            retval=CTA_Vector_Free(data->v);
+            if (retval) {
+               CTA_WRITE_ERROR("Cannot free vector");
+               return retval;
+            }
+         }
+         if (data->metainfo){
+            retval=CTA_Metainfo_Free(data->metainfo);
+            if (retval) {
+               CTA_WRITE_ERROR("Cannot free metainfo");
+               return retval;
+            }
+         }
+      }
+
+   }
+   else {
+
+      if (recursive){
+         for (istate=0;istate<data->nsubtrees;istate++){
+
+            retval=CTA_TreeVector_Free(&data->treevecs[istate], recursive);
+            if (retval) {
+	           CTA_WRITE_ERROR("Cannot free treevector");
+	           return retval;
+            }
+         }
+      }
+
+
+   }
+   if (refCount<=1){
+      if (data->v)         free(data->v);
+      if (data->nocompute) free(data->nocompute);
+      if (data->treevecs)  free(data->treevecs);
+      free(data);
+      retval=CTA_Handle_Free((CTA_Handle*) treevec);
+      if (retval) {
+         CTA_WRITE_ERROR("Cannot free handle");
+         return retval;
+      }
+   }
+
+   return CTA_OK;
+};
+
+
+#undef METHOD
+#define METHOD "Info"
+int CTA_TreeVector_Info(CTA_TreeVector treevec){
+
+   int retval;         //Return value of a call
+   CTAI_TreeVec *data;
+   int istate; // counter over all substates
+   int n;
+
+   /* check handle and get data object */
+   retval=CTA_Handle_Check((CTA_Handle) treevec,CTA_TREEVECTOR);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_treevector handle");
+       return retval;
+   }
+   retval=CTA_Handle_GetData((CTA_Handle) treevec,(void*) &data);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+
+   printf("State-vector information:\n");
+   printf("tag     :%s\n",data->tag);
+   printf("nsubtrees :%d\n",data->nsubtrees);
+   if (data->nsubtrees>0){
+      printf("--> nocompute :");
+      for (istate=0;istate<data->nsubtrees;istate++){
+              printf("%d ",data->nocompute[istate]);
+      }
+      printf("\n");
+   }
+
+   if (data->nsubtrees==0) {
+      printf("leaf    :yes\n");
+      n=0;
+      if (data->v) {
+         retval=CTA_Vector_GetSize(*data->v,&n);
+         if (retval!=CTA_OK) {
+	        CTA_WRITE_ERROR("Cannot get size of vector");
+	        return retval;
+         }
+      }
+      printf("dim     :%d\n",n);
+   }
+   else {
+      for (istate=0;istate<data->nsubtrees;istate++){
+         retval=CTA_TreeVector_Info(data->treevecs[istate]);
+         if (retval!=CTA_OK) {
+	        CTA_WRITE_ERROR("Cannot retrieve info from treevector");
+	        return retval;
+         }
+      }
+   }
+   return CTA_OK;
+};
+
+
+#undef METHOD
+#define METHOD "OpOnLeafs"
+int CTA_TreeVector_OpOnLeafs(
+    CTA_TreeVector treevec1,  // (First) input treevector
+    CTA_TreeVector treevec2,  // If (input value == empty treevector):
+                              //    output treevector
+                              // Else if (input value != CTA_NULL)
+                              //    second input treevector
+    CTA_Func op,              // Function to be called for every leaf
+    CTA_Handle arg)           // Extra arguments
+{
+   int retval;              //Return value of a call
+
+   CTA_Function * operator;     // the operator given in the input
+   CTAI_TreeVec *data1, *data2; // contents of the treevectors
+   int istate;                  // counter over all substates
+   BOOL empty2=FALSE;           // treevector2 is empty yes/no
+   CTA_Vector v2;
+   CTA_TreeVector * t2array;
+   CTA_TreeVector t2;
+   int nsubtrees;
+   char tag2[CTA_STRLEN_TAG];
+
+   int jdebug=0;
+   if (jdebug)
+   {
+      printf("CTA_TreeVector_OpOnLeafs: start of function %d %d\n",
+             treevec1, treevec2 );
+   }
+
+   /* check handle and get data object */
+   retval=CTA_Handle_Check((CTA_Handle) treevec1,CTA_TREEVECTOR);
+   if (retval!=CTA_OK)
+   {
+       char message[1024];
+	   sprintf(message,"CTA_TreeVector_OpOnLeafs: handle %d is not a TreeVector",
+             treevec1);
+	   CTA_WRITE_ERROR(message);
+       return retval;
+   }
+   retval=CTA_Handle_GetData((CTA_Handle) treevec1,(void*) &data1);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+
+   if (treevec2 != CTA_NULL)
+   {
+      retval=CTA_Handle_Check((CTA_Handle) treevec2,CTA_TREEVECTOR);
+      if (retval!=CTA_OK)
+      {
+          char message[1024];
+		  sprintf(message,"CTA_TreeVector_OpOnLeafs: handle %d is not a TreeVector",
+                treevec2);
+		  CTA_WRITE_ERROR(message);
+          return retval;
+      }
+      retval=CTA_Handle_GetData((CTA_Handle) treevec2,(void*) &data2);
+      if (retval!=CTA_OK) {
+	     CTA_WRITE_ERROR("Cannot retrieve handle data");
+	     return retval;
+      }
+   }
+
+   if (jdebug)
+   {
+      printf("CTA_TreeVector_OpOnLeafs: input is OK");
+   }
+
+   /* consistency check */
+   if (treevec2 == CTA_NULL)
+   {
+      // There is no second treevector
+   }
+   if (data2->nsubtrees==0 && data2->v == NULL)
+   {
+      // The second treevector is an empty treevector: fill it
+      //     with the output.
+      empty2 = TRUE;
+   }
+   else if (data1->nsubtrees!=data2->nsubtrees)
+   {
+      return CTA_TREEVECTORS_NOT_COMPATIBLE;
+   }
+
+   if (data1->nsubtrees==0)
+   {
+      /* No subtrees: this treevector is a leaf treevector
+         call operator */
+      retval = CTA_Func_GetFunc(op, &operator);
+      if (retval!=CTA_OK) {
+	     CTA_WRITE_ERROR("Error using CTA_Func_GetFunc");
+	     return retval;
+      }
+
+      if (treevec2 == CTA_NULL)
+      {
+         operator(data1->tag, *data1->v, CTA_NULL, arg, &retval);
+         if (retval!=CTA_OK) {
+	        CTA_WRITE_ERROR("Error using operator ...");
+	        return retval;
+         }
+      }
+      else if (empty2)
+      {
+         operator(data1->tag, *data1->v, &v2, arg, &retval);
+         if (retval!=CTA_OK)
+         {
+            char message[1024];
+			sprintf(message, "CTA_TreeVector_OpOnLeafs: operator %d kapot",op);
+			CTA_WRITE_ERROR(message);
+            return retval;
+         }
+         if (jdebug)
+         {
+           printf("setting vector %d into treevector %d\n",treevec2,v2);
+         }
+         retval = CTA_TreeVector_SetVec(treevec2, v2);
+         if (retval!=CTA_OK) {
+            CTA_WRITE_ERROR("Cannot set vector in treevector");
+            return retval;
+         }
+      }
+      else
+      {
+         operator(data1->tag, *data1->v, *data2->v, arg, &retval);
+         if (retval!=CTA_OK) {
+            CTA_WRITE_ERROR("Error using operator ...");
+            return retval;
+         }
+      }
+   }
+   else
+   {
+      nsubtrees = data1->nsubtrees;
+      if (empty2)
+      {
+         if (jdebug)
+         {
+            printf("CTA_TreeVector_OpOnLeafs: creating %d subleafs\n",
+                   nsubtrees);
+         }
+         t2array = CTA_Malloc(nsubtrees*sizeof(CTA_TreeVector));
+      }
+
+      /* recursion for all sub-states */
+      for (istate=0; istate<nsubtrees; istate++)
+      {
+         if (treevec2==CTA_NULL)
+         {
+            t2 = CTA_NULL;
+         }
+         else if (empty2)
+         {
+            retval=CTA_TreeVector_GetTag(data1->treevecs[istate], tag2);
+            if (retval!=CTA_OK) {
+               CTA_WRITE_ERROR("Cannot get tag from treevector");
+               return retval;
+            }
+
+            if (jdebug)
+            {
+               printf("CTA_TreeVector_OpOnLeafs: creating subleaf %d: '%s'\n",
+                      istate, tag2);
+            }
+
+            retval=CTA_TreeVector_Create( tag2, tag2, &t2array[istate]);
+            if (retval!=CTA_OK) {
+               CTA_WRITE_ERROR("Cannot create TreeVector");
+               return retval;
+            }
+
+            t2 = t2array[istate];
+         }
+         else
+         {
+            t2 = data2->treevecs[istate];
+         }
+
+         retval=CTA_TreeVector_OpOnLeafs(
+                        data1->treevecs[istate], t2, op, arg);
+         if (retval!=CTA_OK) {
+            CTA_WRITE_ERROR("Error using CTA_TreeVector_OpOnLeafs");
+            return retval;
+         }
+      }
+
+      if (empty2)
+      {
+         if (jdebug)
+         {
+            printf("CTA_TreeVector_OpOnLeafs: concatenatng %d leafs\n",
+                      nsubtrees);
+         }
+         retval=CTA_TreeVector_Conc( treevec2, t2array, nsubtrees);
+         if (retval!=CTA_OK) {
+            CTA_WRITE_ERROR("Error using CTA_TreeVector_COnc");
+            return retval;
+         }
+
+         free(t2array);
+      }
+   }
+   return CTA_OK;
+}
+
+
+#undef METHOD
+#define METHOD "ElmSqrt"
+int CTA_TreeVector_ElmSqrt(CTA_TreeVector y){
+
+   int retval;         //Return value of a call
+   CTAI_TreeVec *data;
+   int istate; // counter over all substates
+
+   /* check handle and get data object */
+   retval=CTA_Handle_Check((CTA_Handle) y ,CTA_TREEVECTOR);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_treevector handle");
+       return retval;
+   }
+   retval=CTA_Handle_GetData((CTA_Handle) y,(void*) &data);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+
+   if (data->nsubtrees==0)
+   {
+      retval=CTA_Vector_ElmSqrt(*(data->v));
+      if (retval!=CTA_OK) {
+	     CTA_WRITE_ERROR("Error using CTA_Vector_ElmSqrt");
+	     return retval;
+      }
+
+   } else {
+      /* recursion for all sub-states */
+      for (istate=0;istate<data->nsubtrees;istate++){
+         retval=CTA_TreeVector_ElmSqrt(data->treevecs[istate]);
+         if (retval!=CTA_OK) {
+	        CTA_WRITE_ERROR("Error using CTA_TreeVector_ElmSqrt");
+	        return retval;
+         }
+      }
+   }
+   return CTA_OK;
+}
+
+
+void CTAI_Treevector_Operation_ScaledRMS(
+     char *tag,
+     CTA_Vector v1,
+     CTA_Vector vscal,
+     CTA_Handle hdum, int *retval)
+{
+
+   int jdebug;
+   CTA_Vector vwrk;
+   double dv2, ds2, drms;
+
+   jdebug=0;
+
+   if (jdebug){
+     printf("CTAI_Treevector_Operation_ScaledRMS computing rms ");
+     printf("of leaf of treevector\n");
+   }
+
+   /* create working copy of the vector:     vwrk = v1; */
+   *retval=CTA_Vector_Duplicate(v1,&vwrk);
+   if (*retval!=CTA_OK) return;
+
+   /* Scale the vector                       vwrk = v1.*vscal */
+   *retval=CTA_Vector_ElmProd(vwrk,vscal);
+   if (*retval!=CTA_OK) return;
+
+   /* compute sum of squares  */
+       /*  dv2 = | vwrk |^2 = sum( (v1 .* vscal).^2); */
+   *retval=CTA_Vector_Nrm2(vwrk,&dv2);
+   if (*retval!=CTA_OK) return;
+   dv2=dv2*dv2;
+
+   /* Delete work vector */
+   *retval=CTA_Vector_Free(&vwrk);
+   if (*retval!=CTA_OK) return;
+
+   /* compute scaling vector's sum-of-squares */
+       /*  ds2 = | vscal |^2 = sum( vscal.^2); */
+   *retval=CTA_Vector_Nrm2(vscal,&ds2);
+   if (*retval!=CTA_OK) return;
+   ds2=ds2*ds2;
+
+   if (jdebug)
+   {
+     printf("scaling vector's sum-of-squares is %g\n",ds2);
+     printf("rms=sqrt(%g/%g)\n",dv2,ds2);
+   }
+
+   /* compute RMS */
+   drms=sqrt(dv2/ds2);
+
+   /* output the RMS */
+   printf("  %32s: %18.8le\n", tag, drms);
+}
+
+
+
+void CTAI_Treevector_Operation_ScaledSSQ(
+     char *tag,
+     CTA_Vector v1,
+     CTA_Vector vscal,
+     CTA_Handle hdum, int *retval)
+{
+
+   int jdebug;
+   CTA_Vector vwrk;
+   double dv2;
+
+   jdebug=0;
+
+   if (jdebug){
+     printf("CTAI_Treevector_Operation_ScaledSSQ computing ");
+     printf("ssq of leaf of treevector\n");
+   }
+
+   /* create working copy of the vector:     vwrk = v1; */
+   *retval=CTA_Vector_Duplicate(v1,&vwrk);
+   if (*retval!=CTA_OK) return;
+
+   /* Scale the vector                       vwrk = v1.*vscal */
+   *retval=CTA_Vector_ElmProd(vwrk,vscal);
+   if (*retval!=CTA_OK) return;
+
+   /* compute sum of squares  */
+       /*  dv2 = | vwrk |^2 = sum( (v1 .* vscal).^2); */
+   *retval=CTA_Vector_Nrm2(vwrk,&dv2);
+   if (*retval!=CTA_OK) return;
+   dv2=dv2*dv2;
+
+   /* Delete work vector */
+   *retval=CTA_Vector_Free(&vwrk);
+   if (*retval!=CTA_OK) return;
+
+   /* output the SSQ */
+   printf("  %32s: %18.8le\n", tag, dv2);
+}
+
+
+
+
+
+
+void CTAI_Treevector_Operation_Amax(
+     char *tag,
+     CTA_Vector vin,
+     CTA_Vector *vout,
+     CTA_Handle hdum, int *retval)
+{
+
+   int jdebug=0;
+   int iloc;
+
+   if (jdebug){
+     printf("CTAI_Treevector_Operation_Amax ");
+     printf("finding maxabs of leaf of treevector %d\n",vin);
+   }
+
+   *retval = CTA_Vector_Amax(vin, &iloc);
+   if (*retval!=CTA_OK) return;
+
+   if (jdebug){
+     printf("CTAI_Treevector_Operation_Amax ");
+     printf("maxabs found at %d\n",iloc);
+   }
+
+   *retval = CTA_Vector_Create( CTA_DEFAULT_VECTOR, 1, CTA_INTEGER,
+                                CTA_NULL, vout);
+   if (*retval!=CTA_OK) return;
+
+   *retval = CTA_Vector_SetVals( *vout, &iloc, 1, CTA_INTEGER);
+   if (*retval!=CTA_OK) return;
+
+   if (jdebug){
+     printf("CTAI_Treevector_Operation_Amax: succesful ending\n");
+     *retval = CTA_Vector_Export( *vout, CTA_FILE_STDOUT);
+     if (*retval!=CTA_OK) return;
+   }
+
+}
+
+void CTAI_Treevector_Operation_PrintEntry(
+     char *tag,
+     CTA_Vector vin,
+     CTA_Vector viloc,
+     CTA_Handle hdum, int *retval)
+{
+
+   int jdebug;
+   double value;
+   int iloc;
+   double NaN;
+
+   // Create a NaN
+   NaN = 0; NaN = 1.0/NaN; NaN=NaN/NaN;
+
+
+   jdebug=0;
+
+   if (jdebug){
+     printf("CTAI_Treevector_Operation_PrintEntry ");
+     printf("print an entry of leaf of treevector\n");
+   }
+
+   *retval = CTA_Vector_GetVal(viloc, 1, &iloc, CTA_INTEGER);
+   if (*retval!=CTA_OK) return;
+
+   if (iloc==0){
+           value=NaN;
+   }
+   else {
+      *retval = CTA_Vector_GetVal(vin, iloc, &value, CTA_DOUBLE);
+      if (*retval!=CTA_OK) return;
+   }
+
+   /* output the location and value */
+   printf("  %32s(%6d): %18.8le\n", tag, iloc, value);
+
+   if (jdebug){
+     printf("CTAI_Treevector_Operation_PrintEntry: succesful ending\n");
+   }
+
+}
+
+
+
+
+
+
+
+
+
+#undef METHOD
+#define METHOD "Gemm"
+int CTA_TreeVector_Gemm(CTA_TreeVector *hC, int nc, int transa, int transb, double alpha, CTA_TreeVector *hA, int na,
+                   CTA_Matrix mB, double beta){
+   int ma, mb, mc, nb, m;
+   double *B, fac;
+   double nul=0.0;
+   int ierr, ic, i, j, indx, ia;
+
+   if (IDEBUG>10) {printf("debug: entering cta_treevector_gemm\n");}
+
+   /* check dimensions */
+   if (transa) return CTA_NOT_YET_SUPPORTED;
+
+   ierr=CTA_TreeVector_GetSize(hC[0],&mc);
+   for (ic=1;ic<nc;ic++){
+      ierr=CTA_TreeVector_GetSize(hC[ic],&m); if (ierr!=CTA_OK) return ierr;
+      if (m!=mc) {
+         printf("Error in CTA_TreeVector_Gemm: Not all treevectors in hC have the same length\n");
+         printf("First treevector has length %d and %d-th treevector has length %d\n",mc, ic+1, m);
+         return CTA_DIMENSION_ERROR;
+      }
+   }
+
+   ierr=CTA_TreeVector_GetSize(hA[0],&ma);
+   for (ia=1;ia<na;ia++){
+      ierr=CTA_TreeVector_GetSize(hA[ia],&m); if (ierr!=CTA_OK) return ierr;
+      if (m!=ma){
+         printf("Error in CTA_TreeVector_Gemm: Not all treevectors in hA have the same length\n");
+         printf("First treevector has length %d and %d-th treevector has length %d\n",ma, ic+1, m);
+         return CTA_DIMENSION_ERROR;
+      }
+   }
+
+
+   /* Get dimension of B */
+   if (transb) {
+      ierr=CTA_Matrix_GetSize(mB,&nb,&mb);
+   } else {
+      ierr=CTA_Matrix_GetSize(mB,&mb,&nb);
+   }
+   if (ierr!=CTA_OK) {
+       CTA_WRITE_ERROR("Cannot get dimension of matrix B");
+       return ierr;
+   }
+   if (mc!=ma || nc!=nb || na!=mb) return CTA_DIMENSION_ERROR;
+
+   /* NOTE A LOT OF OPTIMISATION IS POSSIBLE THIS VERSION WILL JUST MAKE IT WORK */
+   B=CTA_Malloc(mb*nb*sizeof(double));
+   if (transb) {
+      ierr=CTA_Matrix_GetVals(mB,B,nb,mb,CTA_DOUBLE);
+   } else {
+      ierr=CTA_Matrix_GetVals(mB,B,mb,nb,CTA_DOUBLE);
+   }
+   if (ierr!=CTA_OK) {
+       CTA_WRITE_ERROR("Cannot get values from matrix");
+       return ierr;
+   }
+
+   if (beta== 0.0) {
+      /* Zero C if beta==0.0 (avoiding problems with NaN and Inf in state) */
+      for (j=0;j<nc;j++){
+         ierr=CTA_TreeVector_SetConstant(hC[j],&nul, CTA_DOUBLE);
+      }
+   } else {
+      /* Scale C with beta  */
+      for (j=0;j<nc;j++){
+         ierr=CTA_TreeVector_Scal(hC[j],beta);
+		 if (ierr!=CTA_OK) {
+            CTA_WRITE_ERROR("Cannot scale treevector");
+            return ierr;
+         }
+      }
+   }
+
+  for (j=0;j<nc;j++){
+     /* Compute one column of C */
+     for (i=0;i<mb;i++){
+        if (transb) {
+           indx=i*nb+j;
+        } else {
+           indx=j*mb+i;
+        }
+        fac=alpha*B[indx];
+        ierr=CTA_TreeVector_Axpy(hC[j],fac,hA[i]);
+		if (ierr!=CTA_OK) {
+            CTA_WRITE_ERROR("Error using CTA_TreeVector_Axpy");
+            return ierr;
+        }
+     }
+  }
+
+  if (IDEBUG>10) {printf("debug: cta_treevector_gemm  END \n");}
+
+  free(B);
+  return CTA_OK;
+}
+
+
+/* -------------------------- */
+void XML_newline(int spc, xmlTextWriter *writer){
+  int i;
+  char spaces[80];
+  char str2[5];
+  strcpy(str2," ");
+  strcpy(spaces,"");
+
+  for (i=0; i<spc; i++){
+    strcat(spaces,str2);
+  }
+  xmlTextWriterWriteString(writer, (xmlChar *) "\n");
+  xmlTextWriterWriteString(writer, (xmlChar *) spaces);
+
+}
+
+
+
+/** \brief Generate XML from one COSTA state vector
+*
+*  \param hvec   I  handle of a COSTA state vector
+*  \param writer I  the XML text writer
+*/
+void CTAI_XML_WriteTreeVector(int issubtree, int isxfv,CTA_TreeVector treevec, int level,xmlTextWriter *writer) {
+  /* issubtree: flag if this is not the root (so 'subtreevector' should be written instead of 'treevector'  */
+  /* isxfv: flag: for excludefromvector. It is always false for the root.       */
+   CTAI_TreeVec *data;   //Tree-vector specific data
+   CTA_TreeVector subtreevector;
+   CTAI_TreeVec *subdata; // subTree-vector specific data
+   int isub;
+   int subisxfv;
+   char *desctext = NULL;
+
+   /* check handle and get data object */
+   CTA_Handle_Check((CTA_Handle) treevec,CTA_TREEVECTOR);
+   CTA_Handle_GetData((CTA_Handle) treevec,(void*) &data);
+
+
+   /* Start an element the the name of the tree handle */
+   XML_newline(level,writer);
+   if (issubtree == 1){
+     xmlTextWriterStartElement(writer, (xmlChar *) CTAI_Type2String(CTA_SUBTREEVECTOR));
+   } else{
+     xmlTextWriterStartElement(writer, (xmlChar *) CTAI_Type2String(CTA_TREEVECTOR));
+   }
+   /* COSTA-id is not written to file */
+
+
+   /* Write tag */
+   xmlTextWriterWriteAttribute(writer, CTAI_XML_ID,(xmlChar *) data->tag);
+
+   /* Write name */
+   xmlTextWriterWriteAttribute(writer, CTAI_XML_CAPTION, (xmlChar *) data->name);
+
+   /* write excludefromvector. this information is available as argument! */
+   if (isxfv) {
+     xmlTextWriterWriteAttribute(writer, CTAI_XML_EXCLUDEFROMVECTOR, (xmlChar *) "true" );
+   }
+
+   level = level + 3; XML_newline(level,writer);
+   xmlTextWriterStartElement(writer, (xmlChar *) "description");
+
+   /* Check for meta-info */
+   if (data->metainfo) {
+     CTA_Metainfo_GetDescription(*data->metainfo,&desctext);
+     xmlTextWriterWriteString(writer, (xmlChar *) desctext);
+     xmlTextWriterEndElement(writer); XML_newline(level,writer);
+
+   } else {
+   /* write (empty) description */
+     xmlTextWriterWriteString(writer, (xmlChar *) "no description yet");
+     xmlTextWriterEndElement(writer); XML_newline(level,writer);
+   }
+
+   /* If we have a vector write the vector */
+   if (data->v) {
+     CTAI_XML_WriteVector(*data->v, "","",*data->metainfo,level+3, writer);
+   } else {
+     for (isub=0;isub<data->nsubtrees;isub++){
+       subtreevector = data->treevecs[isub];
+       /* determine nocompute of substate */
+       subisxfv = 0;
+       if (data->nocompute[isub] == TRUE) {subisxfv = 1;}
+
+       CTA_Handle_Check((CTA_Handle) subtreevector,CTA_TREEVECTOR);
+       CTA_Handle_GetData((CTA_Handle) subtreevector,(void*) &subdata);
+       /* If the substate is a leaf, write the vector, i.e. skip the statelevel */
+       if (subdata->v) {
+
+         if (subdata->metainfo) {
+           //         printf("!!!calling xml_writevector; sub-metainfo: %s %d\n",subdata->tag, *subdata->metainfo);
+           CTAI_XML_WriteVector(*subdata->v, subdata->tag, subdata->name,*subdata->metainfo,level+3, writer);
+         //  here: possibly add state for coordinates
+         }
+         if (!subdata->metainfo) {
+           //    printf("!!!calling xml_writevector; NO sub-metainfo: %s %d\n",subdata->tag,subdata->metainfo);
+           CTAI_XML_WriteVector(*subdata->v, subdata->tag, subdata->name,0,level+3,writer);}
+
+       } else {
+         CTAI_XML_WriteTreeVector(1,subisxfv,subtreevector, level, writer);
+       }
+     }
+   }
+   level=level-3;XML_newline(level,writer); xmlTextWriterEndElement(writer);
+   XML_newline(level,writer);
+}
+
+
+/* -------------------------------------------------------------------- */
+/** \OBSOLETE brief Generate XML from one COSTA state vector
+*
+*  \param hvec   I  handle of a COSTA state vector
+*  \param writer I  the XML text writer
+*/
+void CTAI_XML_WriteState(CTA_TreeVector treevec, xmlTextWriter *writer) {
+   CTAI_TreeVec *data;   //Tree-vector specific data
+   const char* name;
+   int isub;
+
+   /* check handle and get data object */
+   CTA_Handle_Check((CTA_Handle) treevec,CTA_TREEVECTOR);
+   CTA_Handle_GetData((CTA_Handle) treevec,(void*) &data);
+
+   /* Check for meta-info */
+   if (data->metainfo) {
+      printf("WARNING in CTAI_XML_WriteState\n");
+      printf("State-vector contains META information\n");
+      printf("XML-export for META information is not (yet) available \n");
+   }
+
+   /* Start an element the the name of the tree handle */
+   xmlTextWriterStartElement(writer, (xmlChar *) CTAI_Type2String(CTA_TREEVECTOR));
+
+   /* Write id (if any) */
+   name = CTAI_Handle_GetName(treevec);
+   if (name && *name) {
+      xmlTextWriterWriteAttribute(writer, CTAI_XML_ID, (xmlChar *) name);
+   }
+
+   /* Write name */
+   xmlTextWriterWriteAttribute(writer, CTAI_XML_NAME, (xmlChar *) data->name);
+
+   /* Write tag */
+   xmlTextWriterWriteAttribute(writer, CTAI_XML_TAG, (xmlChar *) data->tag);
+
+   /* If we a vector write the vector */
+   if (data->v) {
+     CTAI_XML_WriteVector(*data->v,"", "", CTA_NULL, 0,writer);
+   } else {
+     for (isub=0;isub<data->nsubtrees;isub++){
+
+        CTAI_XML_WriteState(data->treevecs[isub], writer);
+     }
+   }
+   xmlTextWriterEndElement(writer);
+}
+
+//------------------------------
+
+/** \brief Create a treeVector from XML.
+*
+*  \param cur_node  I  Current XML node
+*  \return             Handle to create or CTA_NULL in case of an error.
+*/
+CTA_TreeVector CTAI_XML_CreatetreeVector(xmlNode *cur_node) {
+
+   CTA_TreeVector     treevec;                  /* new state vector */
+   CTA_Vector    hsubvec;
+   xmlChar       *name = NULL;            /* name of the vector to create */
+   xmlChar       *tag;                    /* element/property value */
+   xmlNode       *sub_node = NULL;        /* values child node */
+   xmlNode       *txt_node = NULL;        /* values child node */
+   int           nsubstates;              /* number of sub state vectors */
+   int           nsubvectors;              /* number of subvectors */
+   CTA_TreeVector     *substates;              /* Array of handles of sub state vectors */
+   int           isub;                    /* loop counter in substates */
+   xmlChar       *excludefromvector = NULL;            /* name of the vector to create */
+   char          *desctext = NULL;
+   CTA_Metainfo  minfo;
+   CTA_Metainfo  sub_minfo;
+   BOOL          *excludefv;
+   /* Parse this node's attributes */
+   /* Get id */
+
+
+   /* Get name */
+   name = xmlGetProp(cur_node, CTAI_XML_CAPTION);
+   if (!name){
+     name= (xmlChar*) CTA_Malloc(5*sizeof(xmlChar));
+     strcpy((char *) name, (char *) "none");
+   }
+
+   /* Get tag */
+   tag = xmlGetProp(cur_node, CTAI_XML_ID);
+
+   /* Create the new state-vector */
+   CTA_TreeVector_Create((char *) name, (char *) tag, &treevec);
+
+   /* Create metainfo */
+   CTA_Metainfo_Create(&minfo);
+
+   /* Set id (=name) of handle */
+   CTAI_Handle_SetName(treevec, "tree-vector");
+
+   /* Look for a Vector */
+   /* Count number of "TREEVECTOR"-elements */
+   nsubstates=0; nsubvectors=0;
+   for (sub_node = cur_node->children; sub_node; sub_node = sub_node->next) {
+     if (0 == strcmp("description",(char *) sub_node->name)) {
+
+       for (txt_node = sub_node->children; txt_node; txt_node = txt_node->next) {
+         if (txt_node->type == XML_TEXT_NODE) {
+           if (desctext !=NULL) {
+             free(desctext);
+           }
+           desctext = (char *) CTA_Malloc((1+strlen((char *) txt_node->content))*sizeof(char));
+           strcpy( desctext, (char *) txt_node->content);
+           CTA_Metainfo_SetDescription(minfo, (char *) txt_node->content);
+
+           // controle
+           //     retval = CTA_Metainfo_GetDescription(minfo,&desctext);
+           // printf("controle: desctext: %s\n",desctext);
+           // free(desctext);
+           // free(minfo);
+         }
+       }
+       /* attach the metainfo (with only the description) to this state */
+       /* note: grid and unit information are  in a metainfo on a lower level: */
+       /* in the state where the actual vector is */
+       CTA_TreeVector_SetMetainfo(treevec, minfo);
+
+     }
+     else if (CTA_VECTOR == CTAI_String2Type((char *) sub_node->name)) {
+       /* if a treeVectorLeaf is found */
+       // now COSTA needs to build an extra state-level at this point.
+       nsubvectors++;
+     }
+     else if (CTA_TREEVECTOR == CTAI_String2Type((char *) sub_node->name)) {
+       nsubstates++;
+     } else {
+        /* continue */
+     }
+   }
+
+
+   isub = 0;
+   if (nsubvectors+nsubstates>0) {
+     substates=CTA_Malloc((nsubvectors+nsubstates)*sizeof(CTA_TreeVector));
+     excludefv=CTA_Malloc((nsubvectors+nsubstates)*sizeof(BOOL));
+
+     /* first, loop along all  subvectors (COSTA-states with one vector)
+      We need therefore to make an extra state  */
+     if (nsubvectors>0) {
+       for (sub_node = cur_node->children; sub_node; sub_node = sub_node->next) {
+
+         if (CTA_VECTOR == CTAI_String2Type((char *) sub_node->name)) {
+
+           /* Get name */
+           name = xmlGetProp(sub_node, CTAI_XML_CAPTION);
+           if (!name){
+             name= (xmlChar *) CTA_Malloc(5*sizeof(xmlChar)); strcpy((char *) name,"none");
+           }
+
+           /* Get tag */
+           tag = xmlGetProp(sub_node, CTAI_XML_ID);
+
+           /* get excludefromVector */
+           excludefromvector = xmlGetProp(sub_node, CTAI_XML_EXCLUDEFROMVECTOR);
+           excludefv[isub] = FALSE;
+
+           if (excludefromvector) {    //it exists
+             if (0 == strcmp("true", (char *) excludefromvector)) {
+               excludefv[isub] = TRUE;
+             }
+           }
+
+
+           CTA_Metainfo_Create(&sub_minfo);
+           hsubvec = CTAI_XML_CreateVector_New(sub_node,sub_minfo);
+
+           /* now make the  extra state to  attach the vector and the metainfo */
+           /* essentially, we create an extra layer that the xml-description does not need */
+           CTA_TreeVector_Create((char *) name, (char *) tag, &substates[isub]);
+           CTA_TreeVector_SetVec(substates[isub],hsubvec);
+
+           /* here the metainfo with grid should be set */
+           CTA_TreeVector_SetMetainfo(substates[isub], sub_minfo);
+
+           isub++;
+
+        }
+       }
+
+     }
+
+     /* now, add the treevectorsleafs to substatepositions isub+1..(nsubvectors+nsubstates)
+        (these are COSTA substates)  */
+     if (nsubstates>0) {
+       for (sub_node = cur_node->children; sub_node; sub_node = sub_node->next) {
+         if (CTA_TREEVECTOR == CTAI_String2Type((char *) sub_node->name)) {
+
+           /* get excludefromVector */
+           excludefromvector = xmlGetProp(sub_node, CTAI_XML_EXCLUDEFROMVECTOR);
+           excludefv[isub] = FALSE;
+           if (excludefromvector) {    //it exists
+             if (0 == strcmp("true", (char *) excludefromvector)) {
+               excludefv[isub] = TRUE;
+             }
+           }
+
+           substates[isub]= CTAI_XML_CreatetreeVector(sub_node);
+           isub++;
+         }
+       }
+
+     }
+     CTA_TreeVector_Conc(treevec, substates, nsubvectors+nsubstates);
+     /* fill the excludefromvector list */
+     CTAI_TreeVector_ExcludeFromVector(treevec,excludefv);
+     // CTA_TreeVector_Export(treevec,CTA_FILE_STDOUT);
+     free(substates);
+   }
+
+   xmlFree(name);
+   xmlFree(tag);
+   xmlFree(excludefromvector);
+
+   return treevec;
+}
+//------------------------
+/* sweep through the treevector and fill the regular grids using the references to its own
+   (nocompute)-coordinate-vectors */
+int CTAI_XML_TreeVector_regular_grid(CTA_TreeVector treevec, CTA_TreeVector root ){
+  int retval, istate;
+  CTA_TreeVector coords;
+  CTAI_TreeVec *data;   //Tree-vector specific data
+  CTAI_Gridm thisgrid;
+  char *tag0,*tag;
+  int ndims_phys, j,ncoords, found;
+  double *vals = NULL;
+
+  /* Parse the whole tree and look for grids */
+
+   /* check handle and get data object */
+   retval=CTA_Handle_Check((CTA_Handle) treevec,CTA_TREEVECTOR);
+   if (retval!=CTA_OK) return retval;
+   retval=CTA_Handle_GetData((CTA_Handle) treevec,(void*) &data);
+   if (retval!=CTA_OK) return retval;
+
+   if (data->metainfo) {
+     retval = CTA_Metainfo_GetGrid(*data->metainfo,&thisgrid);
+     if (thisgrid.type > -4 && thisgrid.type <0) {  // reference to coords; not yet filled in
+       ndims_phys = -thisgrid.type;
+
+       /* the first dimension (x) */
+       tag0 =  strrchr(thisgrid.refdimp[1],'/');
+       if (tag0 == NULL) {
+         tag = thisgrid.refdimp[1];}
+       else {
+         tag = tag0 + 1 ;
+       }
+       //       printf("ctai-xml-tv_reg_grid: searching for tag |%s| \n",tag);
+       retval = CTA_TreeVector_GetSubTreeVec(root, tag, &coords);
+       if (retval !=  CTA_OK) {
+         printf("ctai_xml_treevector_regular_grid: tag %s not found! \n",tag);exit(-1);
+       }
+       retval = CTA_TreeVector_GetSize(coords,&ncoords);
+       if (ncoords != thisgrid.nsize) {
+         printf("ctai_xml_treevector_regular_grid: number of coordinates %d is wrong! \n",ncoords);exit(-1);
+       }
+       vals = CTA_Malloc(ncoords*sizeof(double));  // allocate for first time
+       retval = CTA_TreeVector_GetVals(coords, vals, ncoords, CTA_DOUBLE);
+       thisgrid.x_origin = vals[0];
+       for (j=2; j<= ncoords; j++) {
+         if (abs(vals[j-1] - vals[0]) < 1.0E-5) {break;}
+       }
+       thisgrid.nx = j-1;
+       thisgrid.dx =  vals[1]-vals[0];
+       if (j >= ncoords) {
+         if (ndims_phys > 1) {printf("for regular grids a repeating x-sequence is required!\n"); exit(-1);}
+         thisgrid.nx=ncoords;
+       }
+
+       /* now, the second dimension (y) */
+       if (ndims_phys > 1) {
+
+         tag0 =  strrchr(thisgrid.refdimp[2],'/') ;
+         if (tag0 == NULL) {
+           tag = thisgrid.refdimp[1];}
+         else {
+           tag = tag0 + 1 ;
+         }
+         // printf("ctai-xml-tv_reg_grid: searching for tag |%s| \n",tag);
+         retval = CTA_TreeVector_GetSubTreeVec(root, tag, &coords);
+         if (retval !=  CTA_OK) {
+           printf("ctai_xml_treevector_regular_grid: tag %s not found! \n",tag);
+           exit(-1);
+         }
+         retval = CTA_TreeVector_GetSize(coords,&ncoords);
+         if (ncoords != thisgrid.nsize) {
+           printf("ctai_xml_treevector_regular_grid: number of coordinates %d is wrong! \n",ncoords);exit(-1);
+         }
+         retval = CTA_TreeVector_GetVals(coords, vals, ncoords, CTA_DOUBLE);
+         thisgrid.y_origin = vals[0];
+         found = 0;
+         for (j=1; j <  ncoords/thisgrid.nx; j++) {
+           if (abs(vals[0+j*thisgrid.nx] - vals[0]) < 1.0E-5) {found=1; break;}
+         }
+         if (found == 1){
+           thisgrid.ny = j;
+         } else {
+           thisgrid.ny = ncoords/thisgrid.nx;
+           if (ndims_phys > 2) {printf("for regular grids a repeating y-sequence is required!\n"); exit(-1);}
+         }
+         thisgrid.dy =  vals[0+thisgrid.nx]-vals[0];
+       }
+       /* now, the third and last dimension (z) */
+       if (ndims_phys > 2) {
+         tag0 =  strrchr(thisgrid.refdimp[3],'/') ;
+         if (tag0 == NULL) {
+           tag = thisgrid.refdimp[1];}
+         else {
+           tag = tag0 + 1 ;
+         }
+         //printf("ctai-xml-tv_reg_grid: searching for tag |%s| \n",tag);
+         retval = CTA_TreeVector_GetSubTreeVec(root, tag, &coords);
+         if (retval !=  CTA_OK) {
+           printf("ctai_xml_treevector_regular_grid: tag %s not found! \n",tag);
+           exit(-1);
+         }
+         retval = CTA_TreeVector_GetSize(coords,&ncoords);
+         if (ncoords != thisgrid.nsize) {
+           printf("ctai_xml_treevector_regular_grid: number of coordinates %d is wrong! \n",ncoords);exit(-1);
+         }
+         retval = CTA_TreeVector_GetVals(coords, vals, ncoords, CTA_DOUBLE);
+         thisgrid.z_origin = vals[0];
+         found = 0;
+         for (j=2; j <= thisgrid.nx*thisgrid.ny; j++) {
+           if (abs(vals[j-1] - vals[0]) > 1.0E-5) {found=1;}
+         }
+         if (found == 0){
+           thisgrid.nz =  ncoords / (thisgrid.nx*thisgrid.ny);
+         } else {
+              printf("for regular grids a repeating y-sequence is required!\n"); exit(-1);
+         }
+         thisgrid.dz =  vals[0+thisgrid.nx*thisgrid.ny]-vals[0];
+
+       }
+       thisgrid.type = ndims_phys;
+       retval = CTA_Metainfo_SetGrid(*data->metainfo,&thisgrid);
+       //       retval = CTA_Metainfo_Copy(minfo,*data->metainfo);
+     }
+   }
+
+
+
+
+   /* parse subtrees */
+   for (istate=0;istate<data->nsubtrees;istate++){
+     retval = CTAI_XML_TreeVector_regular_grid(data->treevecs[istate], root);
+   }
+   return CTA_OK;
+}
+
+
+//---------------------------
+int CTAI_TreeVector_ExcludeFromVector(CTA_TreeVector treevec, BOOL *excludefv){
+
+   int retval;         //Return value of a call
+   CTAI_TreeVec *data;
+   int istate; // counter over all substates
+
+
+   /* check handle and get data object */
+   retval=CTA_Handle_Check((CTA_Handle) treevec,CTA_TREEVECTOR);
+   if (retval!=CTA_OK) return retval;
+   retval=CTA_Handle_GetData((CTA_Handle) treevec,(void*) &data);
+   if (retval!=CTA_OK) return retval;
+
+   if (data->nsubtrees==0) {
+      printf("CTAI:TreeVector_ExcludeFromVector: ERROR: no substates available.\n");
+      return CTA_CONCAT_NOT_POSSIBLE;
+   }
+   else {
+      for (istate=0;istate<data->nsubtrees;istate++){
+        data->nocompute[istate] = excludefv[istate];
+      }
+      //  printf("ctai_treevec_XFV: state:  %s %d %d %d %d \n",data->tag,data->nsubtrees,data->nocompute[0],data->nocompute[1],data->nocompute[2]);
+   }
+   return CTA_OK;
+};
+
+
+
+#undef METHOD
+#define METHOD "IncRefCount"
+int CTA_TreeVector_IncRefCount(CTA_TreeVector treevec){
+
+   int retval;         //Return value of a call
+   CTAI_TreeVec *data1;  //Tree-vector specific data
+   int istate;         // counter over all substates
+
+   /* check handle and get data object */
+   retval=CTA_Handle_Check((CTA_Handle) treevec,CTA_TREEVECTOR);
+   if (retval!=CTA_OK) {
+      CTA_WRITE_ERROR("Handle is not a cta_treevector handle");
+      return retval;
+   }
+
+   /* Increase reference count */
+   CTA_Handle_IncRefCount((CTA_Handle) treevec);
+
+
+   retval=CTA_Handle_GetData((CTA_Handle) treevec,(void*) &data1);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+
+   /* Increase reference count of all sub treevectors as well */
+   for (istate=0;istate<data1->nsubtrees;istate++){
+      retval=CTA_TreeVector_IncRefCount(data1->treevecs[istate]);
+      if (retval!=CTA_OK) {
+         CTA_WRITE_ERROR("Error setting reference count");
+         return retval;
+      }
+
+   }
+   return CTA_OK;
+};
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+//----------------------------
+
+/* Interfacing with Fortran */
+CTAEXPORT void CTA_TREEVECTOR_CREATE_F77(char *name, char *tag, int *treevec, int *ierr,
+                       int len_name, int len_tag){
+
+  char  *c_name;
+  char  *c_tag;
+  // create a c-string equivalent to name
+  c_name=CTA_Malloc((len_name+1)*sizeof(char));
+  CTA_fstr2cstr(name,c_name,len_name);
+  // create a c-string equivalent to tag
+  c_tag=CTA_Malloc((len_tag+1)*sizeof(char));
+  CTA_fstr2cstr(tag,c_tag,len_tag);
+
+   *ierr=CTA_TreeVector_Create(c_name, c_tag, (CTA_TreeVector*) treevec);
+
+   free(c_name);
+   free(c_tag);
+
+};
+
+CTAEXPORT void CTA_TREEVECTOR_DUPLICATE_F77(int *treevec1, int *treevec2, int *ierr ){
+   *ierr=CTA_TreeVector_Duplicate((CTA_TreeVector) *treevec1, (CTA_TreeVector*) treevec2 );
+};
+
+
+CTAEXPORT void CTA_TREEVECTOR_CONC_F77(int *treevec1, int *treevecs, int *nsubtrees, int *ierr){
+
+   *ierr=CTA_TreeVector_Conc((CTA_TreeVector) *treevec1, (CTA_TreeVector*) treevecs, *nsubtrees);
+};
+
+
+CTAEXPORT void CTA_TREEVECTOR_GETSUBTREEVEC_F77(int *treevec, char *tag, int *hsubstate, int *ierr,
+                          int len_tag){
+   char  *c_tag;
+   // create a c-string equivalent to tag
+   c_tag=CTA_Malloc((len_tag+1)*sizeof(char));
+   CTA_fstr2cstr(tag,c_tag,len_tag);
+
+   *ierr=CTA_TreeVector_GetSubTreeVec((CTA_TreeVector) *treevec, c_tag,
+                               (CTA_TreeVector*) hsubstate) ;
+   free(c_tag);
+};
+
+CTAEXPORT void CTA_TREEVECTOR_SETSUBTREENOCOMPUTE_F77(int *treevec, char *tag, int *ierr,
+                          int len_tag){
+   char  *c_tag;
+   // create a c-string equivalent to tag
+   c_tag=CTA_Malloc((len_tag+1)*sizeof(char));
+   CTA_fstr2cstr(tag,c_tag,len_tag);
+
+   *ierr=CTA_TreeVector_SetSubTreeNocompute((CTA_TreeVector) *treevec, c_tag) ;
+   free(c_tag);
+};
+
+
+CTAEXPORT void CTA_TREEVECTOR_GETSUBTREEVECINDEX_F77(int *treevec, int *index, int *hsubstate, int *ierr){
+
+   *ierr=CTA_TreeVector_GetSubTreeVecIndex((CTA_TreeVector) *treevec, *index,
+                               (CTA_TreeVector*) hsubstate) ;
+};
+
+CTAEXPORT void CTA_TREEVECTOR_GETSUBTREEVECID_F77(int *treevec, int *index, char *tag, int *ierr, int len_tag){
+   char  c_tag[CTA_STRLEN_TAG];
+   *ierr=CTA_TreeVector_GetSubTreeVecId((CTA_TreeVector) *treevec, *index,
+                               c_tag) ;
+   CTA_cstr2fstr(c_tag,tag,len_tag);
+};
+
+CTAEXPORT void CTA_TREEVECTOR_GETTAG_F77(int *treevec, char *tag, int *ierr, int len_tag){
+   char  c_tag[CTA_STRLEN_TAG];
+
+   *ierr=CTA_TreeVector_GetTag((CTA_TreeVector) *treevec, c_tag) ;
+   CTA_cstr2fstr(c_tag,tag,len_tag);
+};
+
+CTAEXPORT void CTA_TREEVECTOR_SETVEC_F77(int *treevec, int *hvec, int *ierr){
+   *ierr=CTA_TreeVector_SetVec((CTA_TreeVector) *treevec, (CTA_Vector) *hvec);
+};
+
+CTAEXPORT void CTA_TREEVECTOR_GETVEC_F77(int *treevec, int *hvec, int *ierr){
+   *ierr=CTA_TreeVector_GetVec((CTA_TreeVector)* treevec, (CTA_Vector) *hvec);
+};
+
+CTAEXPORT void CTA_TREEVECTOR_SETVALS_F77(int *treevec, void *val,int *nval, int *datatype, int *ierr){
+   *ierr=CTA_TreeVector_SetVals((CTA_TreeVector) *treevec, val, *nval, (CTA_Datatype) *datatype);
+};
+
+CTAEXPORT void CTA_TREEVECTOR_GETVALS_F77(int *treevec, void *val,int *nval, int *datatype, int *ierr){
+   *ierr=CTA_TreeVector_GetVals((CTA_TreeVector) *treevec, val, *nval, (CTA_Datatype) *datatype);
+};
+
+
+CTAEXPORT void CTA_TREEVECTOR_GETVAL_F77(int *treevec, int *i, void *val, int *datatype, int *ierr){
+   *ierr=CTA_TreeVector_GetVal((CTA_TreeVector) *treevec, *i, val, (CTA_Datatype) *datatype);
+};
+
+CTAEXPORT void CTA_TREEVECTOR_SETVAL_F77(int *treevec, int *i, void *val, int *datatype, int *ierr){
+   *ierr=CTA_TreeVector_SetVal((CTA_TreeVector) *treevec, *i, val, (CTA_Datatype) *datatype);
+};
+
+
+
+CTAEXPORT void CTA_TREEVECTOR_GETSIZE_F77(int *treevec, int *n, int *ierr){
+   *ierr=CTA_TreeVector_GetSize((CTA_TreeVector) *treevec, n);
+};
+
+CTAEXPORT void CTA_TREEVECTOR_SETMETAINFO_F77(int *treevec, int *minfo, int *ierr){
+  *ierr= CTA_TreeVector_SetMetainfo((CTA_TreeVector) *treevec, (CTA_Metainfo) *minfo);
+}
+
+CTAEXPORT void CTA_TREEVECTOR_GETMETAINFO_F77(int *treevec, int *minfo, int *ierr){
+  *ierr= CTA_TreeVector_GetMetainfo((CTA_TreeVector) *treevec, (CTA_Metainfo) *minfo);
+}
+
+
+CTAEXPORT void CTA_TREEVECTOR_COPY_F77(int *treevec1, int *treevec2, int *ierr){
+   *ierr=CTA_TreeVector_Copy((CTA_TreeVector) *treevec1, (CTA_TreeVector) *treevec2);
+}
+
+CTAEXPORT void CTA_TREEVECTOR_AXPY_F77(int *treevecy, double *alpha, int *treevecx, int *ierr){
+   *ierr=CTA_TreeVector_Axpy((CTA_TreeVector) *treevecy, *alpha, (CTA_TreeVector) *treevecx);
+}
+
+CTAEXPORT void CTA_TREEVECTOR_DOT_F77(int *treevec1, int *treevec2, double *dotprod, int *ierr){
+   *ierr=CTA_TreeVector_Dot((CTA_TreeVector) *treevec1, (CTA_TreeVector) *treevec2,
+                       dotprod);
+}
+
+CTAEXPORT void CTA_TREEVECTOR_NRM2_F77(int *treevec1, double *nrm2, int *ierr){
+   *ierr=CTA_TreeVector_Nrm2((CTA_TreeVector) *treevec1, nrm2);
+}
+
+CTAEXPORT void CTA_TREEVECTOR_SETCONSTANT_F77(int *treevec, void *val, int *datatype, int *ierr){
+   *ierr=CTA_TreeVector_SetConstant((CTA_TreeVector) *treevec, val, (CTA_Datatype) *datatype);
+}
+
+CTAEXPORT void CTA_TREEVECTOR_SCAL_F77(int *treevec, double *val, int *ierr){
+   *ierr=CTA_TreeVector_Scal((CTA_TreeVector) *treevec, *val);
+}
+
+
+CTAEXPORT void CTA_TREEVECTOR_EXPORT_F77(int *hvec_x, int *usrdata, int *ierr){
+   *ierr=CTA_TreeVector_Export((CTA_TreeVector) *hvec_x, (CTA_Handle) *usrdata);
+}
+
+CTAEXPORT void CTA_TREEVECTOR_IMPORT_F77(int *hvec_x, int *usrdata, int *ierr){
+   *ierr=CTA_TreeVector_Import((CTA_TreeVector) *hvec_x, (CTA_Handle) *usrdata);
+}
+
+CTAEXPORT void CTA_TREEVECTOR_FREE_F77(int *hvec_x, int *recursive, int *ierr){
+   *ierr=CTA_TreeVector_Free((CTA_TreeVector *) hvec_x, *recursive);
+}
+
+
+CTAEXPORT void CTA_TREEVECTOR_INFO_F77(int *hvec_x, int *ierr){
+   *ierr=CTA_TreeVector_Info((CTA_TreeVector) *hvec_x);
+
+}
+
+CTAEXPORT void CTA_TREEVECTOR_LIST_F77(int *hvec_x, int *hvec, int *ierr){
+   *ierr=CTA_TreeVector_List((CTA_TreeVector) *hvec_x, (CTA_Vector) *hvec);
+}
+
+CTAEXPORT void CTA_TREEVECTOR_GEMM_F77(int *sC, int *nc, int *transa, int *transb, double *alpha, int *sA, int *na,
+                        int *mB, double *beta, int *ierr){
+   *ierr=CTA_TreeVector_Gemm((CTA_TreeVector*) sC, *nc, *transa, *transb, *alpha, (CTA_TreeVector*) sA, *na,
+                         (CTA_Matrix) *mB, *beta);
+}
+
+
+CTAEXPORT void CTA_TREEVECTOR_OPONLEAFS_F77( int *treevec1, int* treevec2,
+               int *op, int *arg, int *ierr)
+{
+   *ierr = CTA_TreeVector_OpOnLeafs( (CTA_TreeVector) *treevec1,
+               (CTA_TreeVector) *treevec2, (CTA_Func) *op,
+              (CTA_Handle) *arg);
+}
+
+
+
+CTAEXPORT void CTA_TREEVECTOR_ELMSQRT_F77(int *y, int *ierr){
+   *ierr=CTA_TreeVector_ElmSqrt((CTA_TreeVector) *y);
+}
+
+CTAEXPORT void CTA_TREEVECTOR_ELMPROD_F77(int *y, int *x, int *ierr){
+   *ierr=CTA_TreeVector_ElmProd((CTA_TreeVector) *y, (CTA_TreeVector) *x);
+}
+
+CTAEXPORT void CTA_TREEVECTOR_GETNUMSUBTREE_F77(int *treevec, int* numSubTrees, int *ierr){
+   *ierr=CTA_TreeVector_GetNumSubTree((CTA_TreeVector) *treevec, numSubTrees);
+}
+
diff --git a/costa/native/cta/src/cta_util_methods.c b/costa/native/cta/src/cta_util_methods.c
new file mode 100644
index 000000000..4473f9daf
--- /dev/null
+++ b/costa/native/cta/src/cta_util_methods.c
@@ -0,0 +1,453 @@
+/*
+$URL: https://repos.deltares.nl/repos/openda/public/trunk/core/native/src/cta/cta_util_methods.c $
+$Revision: 2751 $, $Date: 2011-09-09 08:58:46 +0200 (Fri, 09 Sep 2011) $
+
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2007  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+#include "cta.h"
+#include "cta_mem.h"
+#include "cta_util_methods.h"
+#include "cta_message.h"
+#include "f_cta_utils.h"
+
+#define CTA_UTIL_METHODSPRINTOBSERVATIONS_F77  F77_CALL(cta_util_methodsprintobservations,CTA_UTIL_METHODSPRINTOBSERVATIONS)
+#define CTA_UTIL_METHODSSELECTOBSERVATIONS_F77 F77_CALL(cta_util_methodsselectobservations,CTA_UTIL_METHODSSELECTOBSERVATIONS)
+#define CTA_UTIL_METHODSOPENRESULTFILE_F77 F77_CALL(cta_util_methodsopenresultfile,CTA_UTIL_METHODSOPENRESULTFILE)
+
+#define IDEBUG (0)
+
+#define CLASSNAME "CTA_Util_Methods"
+
+/** \brief Internal routine prints the predicted values and the observed values
+ *
+ * \param fgModel   :predicted values of foreground run
+ * \param bgModel   :predicted values of background run
+ * \param
+ *                   if set to CTA_NULL NaN will be printed as result
+ * \param vObs        :Observed values
+ * \param time        :Corresponding time
+ * \param file        :Output file
+ *
+ * \return error status: CTA_OK if successful
+ */
+int CTAI_Util_MethodsPrintObservations(CTA_Vector fgModel, CTA_Vector bgModel,
+      CTA_Vector vObs, CTA_Vector stationNames, CTA_Time time,
+      CTA_File file){
+
+   int iObs, nObs;
+   int ierr;
+   CTA_String vStationNames;
+   double vFgModel, vBgModel, vObservations, vTime;
+   double t1, t2;
+   char outStr[1024];
+
+   ierr=CTA_String_Create(&vStationNames);
+
+   /* Get the number of observations */
+   ierr=CTA_Vector_GetSize(fgModel, &nObs);
+   if (ierr!=CTA_OK) return ierr;
+
+   /* Get time Note we will print end-time of span */
+   ierr=CTA_Time_GetSpan(time, &t1, &t2);
+   if (ierr!=CTA_OK) return ierr;
+   vTime=t2;
+
+   for (iObs=1;iObs<=nObs;iObs++){
+      /* Get values */
+      ierr=CTA_Vector_GetVal(fgModel,      iObs, &vFgModel,      CTA_DOUBLE);
+      if (ierr!=CTA_OK) return ierr;
+      ierr=CTA_Vector_GetVal(bgModel,      iObs, &vBgModel,      CTA_DOUBLE);
+      if (ierr!=CTA_OK) return ierr;
+      ierr=CTA_Vector_GetVal(vObs,         iObs, &vObservations, CTA_DOUBLE);
+      if (ierr!=CTA_OK) return ierr;
+      ierr=CTA_Vector_GetVal(stationNames, iObs, &vStationNames, CTA_STRING);
+      if (ierr!=CTA_OK) return ierr;
+      /* Write Line of output */
+
+      sprintf(outStr,"%18.8le,  %18s, %18.8le,  %18.8le,  %18.8le ", vTime,
+              CTAI_String_GetPtr(vStationNames),  vFgModel, vBgModel,
+              vObservations);
+       ierr=CTA_File_WriteStr(file, outStr, CTA_TRUE);
+       if (ierr!=CTA_OK) return ierr;
+   }
+
+   /* Free Local work variables */
+   ierr=CTA_String_Free(&vStationNames);
+   if (ierr!=CTA_OK) return ierr;
+
+   return CTA_OK;
+}
+
+
+#undef METHOD
+#define METHOD "MethodsPrintObservations"
+int CTA_Util_MethodsPrintObservations(CTA_Handle fgModel, CTA_Handle bgModel,
+   CTA_StochObs sObs, CTA_Time time, CTA_File file, int printHeader){
+
+   CTA_Datatype tFgModel, tBgModel;
+   int ierr;
+   BOOL lConvertFg, lConvertBg;
+   int nobs;
+   CTA_ObsDescr obsDescr;
+   CTA_Vector vStationNames, vObs, vFgModel, vBgModel;
+   double NaN;
+   char outStr[1024];
+
+   /* print header */
+   if (printHeader) {
+      sprintf(outStr,"%18s,  %18s, %18s,  %18s,  %18s ", "Time", "Station Name", "Fg-prediction", "Bg-prediction", "Observed");
+      ierr=CTA_File_WriteStr(file, outStr, CTA_TRUE);
+      if (ierr!=CTA_OK) {
+         CTA_WRITE_ERROR("cannot write struct to file");
+         return ierr;
+      }
+   }
+
+   /* quick return when sobs is CTA_NULL */
+   if (sObs==CTA_NULL) return CTA_OK;
+
+   /* Set value NaN */
+   NaN = 0; NaN = 1.0/NaN; NaN=NaN/NaN;
+
+   /* Get the types of the various handles */
+   ierr=CTA_Handle_GetDatatype(fgModel, &tFgModel);
+   if (ierr!=CTA_OK) {
+       CTA_WRITE_ERROR("Cannot get datatype from fgModel");
+       return ierr;
+   }
+   ierr=CTA_Handle_GetDatatype(bgModel, &tBgModel);
+   if (ierr!=CTA_OK) {
+       CTA_WRITE_ERROR("Cannot get datatype from bgModlel");
+       return ierr;
+   }
+
+   /* set number of observations (nobs) */
+   nobs=0;
+   ierr=CTA_SObs_Count(sObs,&nobs);
+   if (ierr!=CTA_OK) {
+       CTA_WRITE_ERROR("Cannot set number of observations");
+       return ierr;
+   }
+   if (nobs==0) return CTA_OK;
+
+   /* Do we need to convert handles */
+   if (fgModel==CTA_NULL || tFgModel==CTA_MODEL){
+      lConvertFg=TRUE;
+   } else if (tFgModel==CTA_VECTOR) {
+      lConvertFg=FALSE;
+   } else {
+      printf("Error in CTA_Util_MethodsPrintObservations:\n");
+      printf("  illegal input argument fgModel \n");
+      return CTA_ILLEGAL_HANDLE;
+   }
+    if (bgModel==CTA_NULL || tBgModel==CTA_MODEL){
+      lConvertBg=TRUE;
+   } else if (tBgModel==CTA_VECTOR) {
+      lConvertBg=FALSE;
+   } else {
+      printf("Error in CTA_Util_MethodsPrintObservations:\n");
+      printf("  illegal input argument bgModel \n");
+      return CTA_ILLEGAL_HANDLE;
+
+   }
+
+   /* Get the observation description */
+   ierr=CTA_SObs_GetDescription(sObs, &obsDescr);
+   if (ierr!=CTA_OK) {
+       CTA_WRITE_ERROR("CAnnot get observation description");
+       return ierr;
+   }
+
+   /* Create and set work arrays */
+   /* - Station names */
+   ierr=CTA_Vector_Create(CTA_DEFAULT_VECTOR,nobs,CTA_STRING,CTA_NULL,
+                         &vStationNames);
+   if (ierr!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_treevector handle");
+       return ierr;
+   }
+   ierr=CTA_ObsDescr_Get_ValueProperties(obsDescr, "NAME", vStationNames, CTA_STRING);
+   if (ierr!=CTA_OK) {
+       CTA_WRITE_ERROR("Error using CTA_ObsDescr_Get_ValueProperties");
+       return ierr;
+   }
+
+   /*- observed values */
+   ierr=CTA_Vector_Create(CTA_DEFAULT_VECTOR,nobs,CTA_DOUBLE,CTA_NULL,
+                         &vObs);
+   if (ierr!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_treevector handle");
+       return ierr;
+   }
+   ierr=CTA_SObs_GetVal (sObs, vObs);
+   if (ierr!=CTA_OK) {
+       CTA_WRITE_ERROR("Cannot get values form observation");
+       return ierr;
+   }
+
+   /*- predicted values: */
+   /* Foreground run */
+   if (lConvertFg) {
+      ierr=CTA_Vector_Create(CTA_DEFAULT_VECTOR,nobs,CTA_DOUBLE,CTA_NULL,
+                            &vFgModel);
+      if (ierr!=CTA_OK) {
+         CTA_WRITE_ERROR("Cannot create vector");
+         return ierr;
+      }
+      if (fgModel==CTA_NULL) {
+         ierr=CTA_Vector_SetConstant(vFgModel,&NaN,CTA_DOUBLE);
+         if (ierr!=CTA_OK) {
+            CTA_WRITE_ERROR("Cannot set constant in vector");
+            return ierr;
+         }
+      } else {
+         ierr=CTA_Model_GetObsValues (fgModel, time, obsDescr, vFgModel);
+         if (ierr!=CTA_OK) {
+            CTA_WRITE_ERROR("Cannot get observed values from fgModel");
+            return ierr;
+         }
+      }
+   } else {
+      vFgModel     = fgModel;
+   }
+
+   /* Background run */
+   if (lConvertBg) {
+      ierr=CTA_Vector_Create(CTA_DEFAULT_VECTOR,nobs,CTA_DOUBLE,CTA_NULL,
+                            &vBgModel);
+      if (ierr!=CTA_OK) {
+         CTA_WRITE_ERROR("Cannot create an vector");
+         return ierr;
+      }
+      if (bgModel==CTA_NULL) {
+         ierr=CTA_Vector_SetConstant(vBgModel,&NaN,CTA_DOUBLE);
+         if (ierr!=CTA_OK) {
+            CTA_WRITE_ERROR("Cannot set constants in vector");
+            return ierr;
+         }
+      } else {
+         ierr=CTA_Model_GetObsValues (bgModel, time, obsDescr, vBgModel);
+         if (ierr!=CTA_OK) {
+            CTA_WRITE_ERROR("Cannot get observations from bgModel");
+            return ierr;
+         }
+      }
+   } else {
+      vBgModel     = bgModel;
+   }
+
+   /* print output */
+   ierr=CTAI_Util_MethodsPrintObservations(vFgModel, vBgModel, vObs,
+                           vStationNames,  time, file);
+   if (ierr!=CTA_OK) {
+      CTA_WRITE_ERROR("Error using CTAI_Util_MethodsPrintObservations");
+      return ierr;
+   }
+
+   /* Free work arrays */
+   if (lConvertFg) {
+      CTA_Vector_Free(&vFgModel);
+   }
+   if (lConvertBg) {
+      CTA_Vector_Free(&vBgModel);
+   }
+   CTA_Vector_Free(&vStationNames);
+   CTA_Vector_Free(&vObs);
+
+   CTA_ObsDescr_Free(&obsDescr);
+
+   return CTA_OK;
+
+}
+
+#undef METHOD
+#define METHOD "Util_MethodsSelectObservations"
+int CTA_Util_MethodsSelectObservations(CTA_Model model, CTA_StochObs sObsAll, CTA_Time spanSim, CTA_StochObs *sObsSel){
+
+CTA_StochObs sObsSpanSim;
+CTA_ObsDescr obsDescrSpanSim;
+CTA_String modSelect;
+int ierr;
+
+   *sObsSel=CTA_NULL;
+
+   if (sObsAll == CTA_NULL) {
+     *sObsSel = CTA_NULL;
+     return CTA_OK;
+   }
+
+   if (IDEBUG>0) {printf("CTA_Util_MethodsSelectObservations START");}
+
+/* Select all observations that fit in the simulation time span */
+   ierr=CTA_SObs_CreateTimSel(sObsAll,spanSim,&sObsSpanSim);
+   if (ierr!=CTA_OK) {
+      char message[1024];
+      sprintf(message, "Error %d selecting observations for simulation timespan",
+      ierr);
+	  CTA_WRITE_ERROR(message);
+      return ierr;
+   }
+
+   ierr=CTA_SObs_GetDescription(sObsSpanSim,&obsDescrSpanSim);
+   if (ierr!=CTA_OK) {
+      char message[1024];
+      sprintf(message, "Error %d in CTA_SObs_GetDescription", ierr);
+	  CTA_WRITE_ERROR(message);
+      return ierr;
+   }
+   ierr=CTA_String_Create(&modSelect);
+
+   if (IDEBUG>0) {printf("CTA_Util_MethodsSelectObservations 2 \n");}
+
+   ierr=CTA_Model_GetObsSelect(model,spanSim,obsDescrSpanSim,modSelect);
+
+   if (IDEBUG>0) {printf("CTA_Util_MethodsSelectObservations 3 \n");}
+
+   if (ierr!=CTA_OK) {
+      char message[1024];
+      sprintf(message, "Error %d in CTA_Model_GetObsSelect", ierr);
+	  CTA_WRITE_ERROR(message);
+      return ierr;
+   }
+
+
+   ierr=CTA_SObs_CreateSel(sObsSpanSim,modSelect,sObsSel);
+   if (ierr!=CTA_OK) {
+      char message[1024];
+      sprintf(message, "Error %d in CTA_SObs_CreateSel", ierr);
+	  CTA_WRITE_ERROR(message);
+      return ierr;
+   }
+
+   if (IDEBUG>0) {printf("CTA_Util_MethodsSelectObservations 4 \n");}
+
+   CTA_String_Free(&modSelect);
+   CTA_ObsDescr_Free(&obsDescrSpanSim);
+   CTA_SObs_Free(&sObsSpanSim);
+
+   return CTA_OK;
+}
+
+
+#undef METHOD
+#define METHOD "Util_MethodsOpenResultFile"
+int CTA_Util_MethodsOpenResultFile(char *stationFile,
+                                   CTA_File *fStationFile){
+
+   CTA_String sfile;
+   int ierr;
+
+   CTA_String_Create(&sfile);
+   CTA_String_Set(sfile,stationFile);
+   CTA_File_Create(fStationFile);
+   ierr=CTA_File_Open(*fStationFile ,sfile,CTA_NULL);
+   CTA_String_Free(&sfile);
+   if (ierr!=CTA_OK) {
+       CTA_WRITE_ERROR("Cannot free string");
+       return ierr;
+   }
+   ierr=CTA_Util_MethodsPrintObservations(CTA_NULL, CTA_NULL, CTA_NULL,
+                                     CTA_NULL, *fStationFile, CTA_TRUE);
+   return ierr;
+}
+
+
+//int CTA_Util_RankReduction(CTA_State scaling, int *rankIn, int *rankOut, CTA_State *QmatL){
+//
+//  int icol, ierr;
+//  CTA_Datatype datatype;
+//  double *LTL;
+//
+//   /* Check Handles */
+//   for (icol=0;icol<rankIn;icol++){
+//      ierr=CTA_Handle_Check(QmatL[icol], &datatype);
+//      if (ierr!=CTA_OK) return ierr;
+//   }
+//
+//  ierr=CTA_TreeVector_GetSize (QmatL[0], &nState);
+//  if (ierr!=CTA_OK) return ierr;
+//
+//  /* Copy states into work array */
+//    for (icol=0;icol<rankIn;icol++){
+//       ierr=CTA_TreeVector_GetVals (QmatL[icol], L[indx], nState, CTA_DOUBLE);
+//       if (ierr!=CTA_OK) return ierr;
+//   }
+//
+//
+//
+//
+//
+//
+//
+//
+//
+//
+//   /* allocate work array */
+//   LTL=CTA_Malloc(rankIn*rankIn*sizeof(double))
+//
+//   if (scaling==CTA_NULL){
+//      do (j=0;j<rankIn;j++){
+//         do (i=j;i<rankIn;i++)
+//            indx=i+j*rankIn;
+//            ierr=CTA_TreeVector_Dot  	(   	CTA_TreeVector   	 treevec1,
+//		CTA_TreeVector  	treevec2,
+//		double *  	dotprod
+//	)
+//
+//
+//
+//      }
+//   }
+//
+//
+//
+//
+//
+//
+//
+//}
+//
+
+
+
+
+
+/* Interfacing with Fortran */
+CTAEXPORT void CTA_UTIL_METHODSPRINTOBSERVATIONS_F77(int *fgModel,
+      int *bgModel, int *sObs, int *time, int *file, int * printHeader, int *ierr){
+  *ierr=CTA_Util_MethodsPrintObservations((CTA_Handle) *fgModel,
+        (CTA_Handle) *bgModel, (CTA_StochObs) *sObs, (CTA_Time) *time,
+        (CTA_File) *file, *printHeader);
+}
+
+CTAEXPORT void CTA_UTIL_METHODSSELECTOBSERVATIONS_F77(int *model, int *sObsAll, int *spanSim, int *sObsSel, int *ierr){
+   *ierr=CTA_Util_MethodsSelectObservations((CTA_Model) *model, (CTA_StochObs) *sObsAll, (CTA_Time) *spanSim, (CTA_StochObs*) sObsSel);
+}
+
+CTAEXPORT void CTA_UTIL_METHODSOPENRESULTFILE_F77(char *stationFile,
+                                   int *fStationFile, int *ierr, int len_str){
+
+   char  *c_file;
+
+   c_file=CTA_Malloc((len_str+1)*sizeof(char));
+   CTA_fstr2cstr(stationFile,c_file,len_str);
+    *ierr=CTA_Util_MethodsOpenResultFile(c_file,
+                                   (CTA_File *) fStationFile);
+   free(c_file);
+}
+
diff --git a/costa/native/cta/src/cta_util_sort.c b/costa/native/cta/src/cta_util_sort.c
new file mode 100644
index 000000000..52315fb9b
--- /dev/null
+++ b/costa/native/cta/src/cta_util_sort.c
@@ -0,0 +1,78 @@
+/*
+$URL: https://repos.deltares.nl/repos/openda/openda_1/public/trunk/core/native/src/cta/cta_time.c $
+$Revision: 2751 $, $Date: 2011-09-09 08:58:46 +0200 (Fri, 09 Sep 2011) $
+
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2012  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+#include <stdio.h>
+#include <stdlib.h>
+#include "cta_util_sort.h"
+ 
+void swap(int *x,int *y){
+
+   int temp;
+   temp = *x;
+   *x = *y;
+   *y = temp;
+}
+
+
+
+int choose_pivot(int i,int j ){
+  return((i+j) /2);
+}
+
+
+
+void CTAI_Util_IQSort2(int *list, int *indx, int m,int n){
+ int key,i,j,k;
+ if( m < n)
+ {
+    k = choose_pivot(m,n);
+    swap(&list[m],&list[k]);
+    swap(&indx[m],&indx[k]);
+    key = list[m];
+    i = m+1;
+    j = n;
+    while(i <= j)
+    {
+       while((i <= n) && (list[i] <= key))
+              i++;
+       while((j >= m) && (list[j] > key))
+              j--;
+       if( i < j){
+              swap(&list[i],&list[j]);
+              swap(&indx[i],&indx[j]);
+       }
+    }
+    // swap two elements
+    swap(&list[m],&list[j]);
+    swap(&indx[m],&indx[j]);
+    // recursively sort the lesser list
+    CTAI_Util_IQSort2(list,indx,m,j-1);
+    CTAI_Util_IQSort2(list,indx,j+1,n);
+ }
+}
+
+
+void CTA_Util_IQSort2(int *list, int *indx, int n){
+   CTAI_Util_IQSort2(list, indx, 0, n-1);
+}
+
+
+
diff --git a/costa/native/cta/src/cta_util_sqlite3.c b/costa/native/cta/src/cta_util_sqlite3.c
new file mode 100644
index 000000000..b0f2e8d33
--- /dev/null
+++ b/costa/native/cta/src/cta_util_sqlite3.c
@@ -0,0 +1,372 @@
+/*
+$URL: https://repos.deltares.nl/repos/openda/public/trunk/core/native/src/cta/cta_util_sqlite3.c $
+$Revision: 4304 $, $Date: 2014-01-14 07:29:40 +0100 (Tue, 14 Jan 2014) $
+
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+#include <stdio.h>
+#include <string.h>
+#include <math.h>
+#include "cta_mem.h"
+#include "cta_flush.h"
+#include "cta_util_sqlite3.h"
+#include "cta_file.h"
+#include "cta_string.h"
+#include "cta_errors.h"
+
+#define IDEBUG (0)
+
+
+int CTAI_util_sqlite3_return_values(void *vout,
+         int argc, char **argv, char **azColName)
+// Callback-function for sqlite3
+{
+  int retval;
+  int *values;
+
+  // Cast the input to a counter-and-vector-struct
+  CTAI_counter_vector *out = (CTAI_counter_vector*) vout;
+
+  // Perform checks
+  if (argc != 1) {return CTA_DIMENSION_ERROR;};
+  if (out->index >= out->dimension) {return CTA_DIMENSION_ERROR;};
+
+  if (out->datatype == CTA_INTEGER) 
+  {
+  // Add an integer to the output vector:
+
+     // Read the value
+     int iNAN =-987654;
+     int value= iNAN;
+     if (argv[0] != NULL) 
+     { 
+       int nscan = sscanf(argv[0],"%d",&value);
+       if (nscan != 1) {return CTA_ILLEGAL_DATATYPE;}
+     }
+
+     // Cast the values-field to an integer-array and store the value
+     values = (int *) out->values;
+     values[out->index] = value;
+  }
+  else if(out->datatype == CTA_DOUBLE) 
+  {
+  // Add a double to the output vector:
+
+     // Read the value
+     double NaN;
+     double value;
+     double *values;
+
+     NaN = 0; NaN = 1.0/NaN; NaN=NaN/NaN;
+     value=NaN;
+
+     if (argv[0] != NULL) 
+     {
+       int nscan = sscanf(argv[0],"%lf",&value);
+       if (nscan != 1) {return CTA_ILLEGAL_DATATYPE;}
+     }
+
+     // Cast the values-field to a double-array and store the value
+     values = (double *) out->values;
+     values[out->index] = value;
+  }
+  else if(out->datatype == CTA_REAL) 
+  {
+  // Add a float to the output vector:
+
+     // Read the value
+     float NaN;
+     float value;
+     float *values;
+     NaN = 0;
+     NaN = (float) 1.0/NaN;
+     NaN=NaN/NaN;
+     value=NaN;
+     if (argv[0] != NULL) 
+     {
+       int nscan = sscanf(argv[0],"%f",&value);
+       if (nscan != 1) {return CTA_ILLEGAL_DATATYPE;}
+     }
+
+     // Cast the values-field to a double-array and store the value
+     values = (float *) out->values;
+     values[out->index] = value;
+  }
+  else if(out->datatype == CTA_STRING) 
+  {
+  // Add a string to the output vector:
+
+     // Read the value
+     CTA_String *values = (CTA_String *) out->values;
+     if (argv[0] != NULL) 
+     {
+        // Cast the values-field to a double-array and store the value
+        retval = CTA_String_Set(values[out->index],argv[0]);
+        if (retval!=CTA_OK) return retval;
+     }
+     else
+     {
+        retval = CTA_String_Set(values[out->index],"");
+        if (retval!=CTA_OK) return retval;
+     }
+  }
+  else
+  {
+  // No other data types supported (yet)
+     return CTA_ILLEGAL_DATATYPE;
+  };
+
+  // Increase the counter, so next value is put in the next spot
+  out->index++;
+  return SQLITE_OK;
+}
+
+
+
+
+
+int CTAI_util_sqlite3_keynames(void *vout,
+         int argc, char **argv, char **azColName)
+// Callback-function for sqlite3
+{
+  // Cast the input to a COSTA-string array
+  CTA_String *out = (CTA_String*) vout;
+
+  // Copy all the column names into the string array
+  int i;
+  for (i=0; i<argc; i++) {CTA_String_Set(out[i],azColName[i]);};
+
+  // Return an error code on purpose, so that the query is done.
+  return -1;
+}
+
+
+
+
+int CTAI_util_sqlite3_count_columns(void *vout,
+         int argc, char **argv, char **azColName)
+// Callback-function for sqlite3
+{
+  // Cast the input to an integer
+  int *out = (int*) vout;
+
+  // Fill the column-counter
+  *out = argc;
+
+  // Return an error code on purpose, so that the query is done.
+  return -1;
+}
+
+
+
+
+
+int CTAI_util_sqlite3_select_values(
+/* 
+   Return the integers found in the sqlite3-database
+*/
+    // OUTPUTS:
+    void *out, 
+    // INPUTS:
+    int nout,              // dimension of the output array
+    CTA_Datatype datatype, // data type of the output array
+    sqlite3 *db,           // the database
+    const char *selection, // the column-name which is returned 
+    const char *condition) // the selection creterion
+{
+   char *command;
+   CTAI_counter_vector cvout;
+   char *zErrMsg;
+   int rc;
+   
+   // Set the constants necessary 
+   const char *whole_table = // The expression which produces the complete obs-table
+      "stations inner join data on stations.station_id = data.station_id";
+   command=CTA_Malloc((strlen(selection)+strlen(whole_table)+strlen(condition)+100)*sizeof(char));
+
+   // Construct the query-command
+   if (strcmp(condition,"") == 0)
+   {
+      sprintf(command,"select %s from %s",selection,whole_table);
+   }
+   else
+   {
+      sprintf(command,"select %s from %s where %s",
+              selection,whole_table,condition);
+   };
+
+   if (IDEBUG) printf("CTAI_util_sqlite3_select_values: DEBUG command=%s\n",command);
+
+   // Arrange a counter-vector to store the results   
+   cvout.dimension = nout;
+   cvout.index     = 0;
+   cvout.datatype  = datatype;
+   cvout.values    = out;
+
+   // Carry out the query
+   if (IDEBUG>0) printf("Doing query '%s'\n",command);
+   zErrMsg=NULL;
+   rc = sqlite3_exec(db, command, CTAI_util_sqlite3_return_values,
+                     (void *) &cvout, &zErrMsg);
+//   if (zErrMsg!=NULL){
+//      printf("Error message from sqlite3: %s\n", zErrMsg);
+//   }
+   free(command);
+   if( rc != SQLITE_OK )
+     {return CTA_INVALID_COMMAND;};
+   return CTA_OK;
+}
+
+
+
+
+int CTAI_util_sqlite3_return_keys(
+/* 
+   Return the keys in the sqlite3-database. Allocate a string-array
+   to store the names
+*/
+    // OUTPUTS:
+    int *n_keys, 
+    CTA_String **Keys,
+    // INPUTS:
+    sqlite3 *db,           // the database
+    const char *condition) // the selection creterion
+{
+   // Set the constants necessary 
+   const char *whole_table = // The expression which produces the 
+                             //     complete obs-table
+      "stations inner join data on stations.station_id = data.station_id";
+
+   char *command;
+   char *zErrMsg;
+   int rc;
+   int i;
+
+   command=CTA_Malloc( sizeof(char) * (   strlen("select * from   where  ")
+                                    + strlen(whole_table)
+                                    + strlen(condition)
+                                    + 100
+                 )                ); 
+
+   // Construct the query-command
+   if (strcmp(condition,"") == 0)
+   {
+      sprintf(command,"select * from %s",whole_table);
+   }
+   else
+   {
+      sprintf(command,"select * from %s where %s",
+              whole_table,condition);
+   };
+
+   // Carry out the query
+   zErrMsg = NULL;
+
+   CTA_Flush();
+//   printf("Doing query '%s'\n",command);
+   CTA_Flush();  
+   rc = sqlite3_exec(db, command, CTAI_util_sqlite3_count_columns,
+                     (void *) n_keys, &zErrMsg);
+//   if (zErrMsg!=NULL){
+//      printf("Error message from sqlite3: %s\n", zErrMsg);
+//   }
+//   printf("Return code '%d'\n",rc);
+   CTA_Flush();
+   if ( rc != 4 ) {
+     printf("Return code of sqlite3_exec %d see sqlite.h\n",rc);
+     return CTA_INVALID_COMMAND;
+   };
+
+   *Keys = CTA_Malloc(sizeof(CTA_String)* (*n_keys));
+   for (i=0; i<*n_keys; i++) 
+   {
+     rc = CTA_String_Create( (*Keys+i) );
+     if (rc != CTA_OK) return rc;
+   }
+
+   // Carry out the query
+   zErrMsg = 0;
+   //printf("Doing query '%s'\n",command);
+   rc = sqlite3_exec(db, command, CTAI_util_sqlite3_keynames,
+                     (void *) *Keys, &zErrMsg);
+//   if (zErrMsg!=NULL){
+//      printf("Error message from sqlite3: %s\n", zErrMsg);
+//   }
+   free(command);
+   if( rc != 4 )
+     {return CTA_INVALID_COMMAND;};
+
+   // Remove the double entries and the VALUE-entry
+   for (i=0; i<*n_keys; i++)
+   {
+     int ilen;
+     char *stri;
+
+     rc = CTA_String_GetLength( *(*Keys+i), &ilen);
+     if (rc != CTA_OK) return rc;
+     stri=CTA_Malloc((ilen+1)*sizeof(char));
+     rc = CTA_String_Get( *(*Keys+i), stri);
+     if (rc != CTA_OK) return rc;
+
+     // Remove word <i> if it is the key 'VALUE'
+     if (strcmp(stri,"VALUE")==0)
+     {
+        rc = CTA_String_Free(*Keys+i);
+        if (rc != CTA_OK) return rc;
+        if (i<*n_keys-1) { *(*Keys+i) = *(*Keys+*n_keys-1); }
+        (*n_keys)--;
+        i--;
+     }
+     else
+     {
+        // Check all the words up to <i> to see if word <i> is already in the list
+        int j;
+        for (j=0; j<i; j++)
+        {
+          int jlen;
+          rc = CTA_String_GetLength( *(*Keys+j), &jlen);
+          if (jlen==ilen)
+          {
+             char *strj;
+
+             strj=CTA_Malloc((jlen+1)*sizeof(char));
+             rc = CTA_String_Get( *(*Keys+j), strj);
+             if (rc != CTA_OK) return rc;
+             if (strcmp(stri,strj)==0)
+             {
+             // Remove word <i> from the list: it is already there.
+                rc = CTA_String_Free(*Keys+i);
+                if (rc != CTA_OK) return rc;
+                if (i<*n_keys-1) { *(*Keys+i) = *(*Keys+*n_keys-1); }
+                (*n_keys)--;
+                i--;
+                j=i;
+             }
+             free(strj);
+          }
+        }
+      }
+     free(stri);
+   }
+
+   *Keys = realloc(*Keys, sizeof(CTA_String)* (*n_keys));
+
+   return CTA_OK;
+}
+
diff --git a/costa/native/cta/src/cta_util_statistics.c b/costa/native/cta/src/cta_util_statistics.c
new file mode 100644
index 000000000..abaf715fe
--- /dev/null
+++ b/costa/native/cta/src/cta_util_statistics.c
@@ -0,0 +1,151 @@
+/*
+$URL: https://repos.deltares.nl/repos/openda/public/trunk/core/native/src/cta/cta_util_statistics.c $
+$Revision: 4200 $, $Date: 2013-11-01 10:59:20 +0100 (Fri, 01 Nov 2013) $
+
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+#include <math.h>
+#include <stdio.h>
+#include "f_cta_utils.h"
+#include "cta_datatypes.h"
+#include "cta_errors.h"
+#include "cta_util_statistics.h"
+#include "cta_message.h"
+
+#define CTA_RAND_U_F77  F77_CALL(cta_rand_u,CTA_RAND_U)
+#define CTA_RAND_N_F77  F77_CALL(cta_rand_n,CTA_RAND_N)
+
+#define IM1 2147483563
+#define IM2 2147483399
+#define AM (1.0/IM1) 
+#define IMM1 (IM1-1) 
+#define IA1 40014
+#define IA2 40692
+#define IQ1 53668
+#define IQ2 52774
+#define IR1 12211
+#define IR2 3791
+#define NTAB 32
+#define NDIV (1+IMM1/NTAB) 
+#define EPS 1.2e-7
+#define RNMX (1.0-EPS)
+
+#define CLASSNAME "CTA_Utils_Statisctics"
+
+static long CTAI_current_state_random=-21075;
+
+float ran2(long *idum)
+{
+	int j;
+	long k;
+	static long idum2=123456789; static long iy=0;
+	static long iv[NTAB];
+	float temp;
+	if (*idum <= 0) {
+		if (-(*idum) < 1) *idum=1; else *idum = -(*idum); idum2=(*idum);
+		for (j=NTAB+7;j>=0;j--) {
+			k=(*idum)/IQ1;
+			*idum=IA1*(*idum-k*IQ1)-k*IR1;
+			if (*idum < 0) *idum += IM1; 
+			if (j < NTAB) iv[j] = *idum;
+		}
+		iy=iv[0];
+	 }
+	k=(*idum)/IQ1;
+	*idum=IA1*(*idum-k*IQ1)-k*IR1;
+	if (*idum < 0) *idum += IM1;
+	k=idum2/IQ2;
+	idum2=IA2*(idum2-k*IQ2)-k*IR2;
+	if (idum2 < 0) idum2 += IM2;
+	j=iy/NDIV;
+	iy=iv[j]-idum2;
+	iv[j] = *idum;
+	if (iy < 1) iy += IMM1;
+	if ((temp= (float) AM*iy) > RNMX){
+           return (float) RNMX;
+        }
+	else {
+           return (float) temp;
+        }
+}
+
+#define METHOD "CTA_rand_seed"
+void CTA_rand_seed(int seed){
+   char message[64];
+   sprintf(message,"Set initial seed: %ld\n",seed);
+   CTA_WRITE_INFO(message);
+   if (seed=0) seed=-21075;
+   if (seed>0) seed=-seed;
+   CTAI_current_state_random=(long) seed;
+   ran2(&CTAI_current_state_random);
+}
+
+int CTA_rand_u(double *x)
+// Calculate a realization from a uniform [0 1] distribution
+{
+   //*x=(double) (random()/(double)(RAND_MAX));
+   *x=(double) ran2(&CTAI_current_state_random);
+   return CTA_OK;
+}
+
+int CTA_rand_n(double *x)
+// Calculate a realization from a standard normal distribution
+{
+//    use the box-muller scheme, which calculates two standard
+//    normal random numbers from two standard uniform random
+//    numbers.
+
+   // Remember the extra value in variable value2;
+   //    remember whether an extra value is available in variable hebnog
+   static BOOL hebnog=FALSE;
+   static double value2;
+
+   if (hebnog)
+   {
+   // extra value still available: return it
+      hebnog = FALSE;
+      *x = value2;
+   }
+   else
+   {
+   // no extra value available: calculate 2 normal rando:m numbers
+   //   and return only one
+     // double r1=(double) (random()+1.0)/(double) (RAND_MAX+1.0);
+     // double r2=(double) (random()+1.0)/(double) (RAND_MAX+1.0);
+     double r1= (double) ran2(&CTAI_current_state_random);
+     double r2= (double) ran2(&CTAI_current_state_random);
+      double hlp = sqrt(-2*log(r1));
+
+      *x     = hlp * cos(2.0*M_PI*r2);
+      value2 = hlp * sin(2.0*M_PI*r2);
+      hebnog = TRUE;
+   }
+   return CTA_OK;
+}
+
+/* Interfacing with Fortran */
+
+CTAEXPORT void CTA_RAND_U_F77(double *x, int *ierr){
+   *ierr=CTA_rand_u(x);
+}
+
+CTAEXPORT void CTA_RAND_N_F77(double *x, int *ierr){
+   *ierr=CTA_rand_n(x);
+}
+
diff --git a/costa/native/cta/src/cta_vector.c b/costa/native/cta/src/cta_vector.c
new file mode 100644
index 000000000..c28e9c394
--- /dev/null
+++ b/costa/native/cta/src/cta_vector.c
@@ -0,0 +1,1747 @@
+/*
+$URL: https://repos.deltares.nl/repos/openda/public/trunk/core/native/src/cta/cta_vector.c $
+$Revision: 3361 $, $Date: 2012-07-04 16:52:30 +0200 (Wed, 04 Jul 2012) $
+
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+#include <stdio.h>
+#include <string.h>
+#include "cta_mem.h"
+#include "f_cta_utils.h"
+#include "cta_errors.h"
+#include "cta_datatypes.h"
+#include "ctai_datatypes.h"
+#include "cta_string.h"
+#include "cta_vector.h"
+#include "ctai.h"
+#include "ctai_string.h"
+#include "cta_pack.h"
+#include "ctai_xml.h"
+#include "cta_defaults.h"
+#include "ctai_handles.h"
+#include "cta_message.h"
+#include "cta_metainfo.h"
+
+#define CTA_VECTOR_DEFINECLASS_F77  F77_CALL(cta_vector_defineclass,CTA_VECTOR_DEFINECLASS)
+#define CTA_VECTOR_DUPLICATE_F77    F77_CALL(cta_vector_duplicate,CTA_VECTOR_DUPLICATE)
+#define CTA_VECTOR_CREATE_F77       F77_CALL(cta_vector_create,CTA_VECTOR_CREATE)
+#define CTA_VECTOR_GETSIZE_F77      F77_CALL(cta_vector_getsize,CTA_VECTOR_GETSIZE)
+#define CTA_VECTOR_GETVALS_F77      F77_CALL(cta_vector_getvals,CTA_VECTOR_GETVALS)
+#define CTA_VECTOR_GETVAL_F77       F77_CALL(cta_vector_getval,CTA_VECTOR_GETVAL)
+#define CTA_VECTOR_SETVALS_F77      F77_CALL(cta_vector_setvals,CTA_VECTOR_SETVALS)
+#define CTA_VECTOR_SETVAL_F77       F77_CALL(cta_vector_setval,CTA_VECTOR_SETVAL)
+#define CTA_VECTOR_SETCONSTANT_F77  F77_CALL(cta_vector_setconstant,CTA_VECTOR_SETCONSTANT)
+#define CTA_VECTOR_SCAL_F77         F77_CALL(cta_vector_scal,CTA_VECTOR_SCAL)
+#define CTA_VECTOR_COPY_F77         F77_CALL(cta_vector_copy,CTA_VECTOR_COPY)
+#define CTA_VECTOR_AXPY_F77         F77_CALL(cta_vector_axpy,CTA_VECTOR_AXPY)
+#define CTA_VECTOR_DOT_F77          F77_CALL(cta_vector_dot,CTA_VECTOR_DOT)
+#define CTA_VECTOR_NRM2_F77         F77_CALL(cta_vector_nrm2,CTA_VECTOR_NRM2)
+#define CTA_VECTOR_AMAX_F77         F77_CALL(cta_vector_amax,CTA_VECTOR_AMAX)
+#define CTA_VECTOR_GETMAXLEN_F77    F77_CALL(cta_vector_getmaxlen,CTA_VECTOR_GETMAXLEN)
+#define CTA_VECTOR_EXPORT_F77       F77_CALL(cta_vector_export,CTA_VECTOR_EXPORT)
+#define CTA_VECTOR_IMPORT_F77       F77_CALL(cta_vector_import,CTA_VECTOR_IMPORT)
+#define CTA_VECTOR_PRINT_TABLE_F77  F77_CALL(cta_vector_print_table,CTA_VECTOR_PRINT_TABLE)
+#define CTA_VECTOR_FREE_F77         F77_CALL(cta_vector_free,CTA_VECTOR_FREE)
+#define CTA_VECTOR_ELMDIV_F77       F77_CALL(cta_vector_elmdiv,CTA_VECTOR_ELMDIV)
+#define CTA_VECTOR_ELMPROD_F77      F77_CALL(cta_vector_elmprod,CTA_VECTOR_ELMPROD)
+
+#define CLASSNAME "CTA_Vector" 
+
+/* Struct holding all data associated to an COSTA Vector */
+
+static long CTAI_Vector_Memsize=0;
+
+
+typedef struct {
+CTA_Func functions[I_CTA_VECTOR_NUMFUNC]; // See cta_vector.h for a list of
+                                        //     available vector functions
+CTA_VecClass hveccl;                    // Vector class
+int n;                                  // dimension
+CTA_Datatype datatype;                  // data type of the vector elements
+CTA_Handle usrdata;                     // ???
+long memsize;                           // size of allocated memblock
+void *data;                             // pointer to the data. The data are not only
+                                        //   the vector values. In the case of a BLAS-vector
+                                        //   for instance, data is a struct consisting of
+                                        //   the fields datatype, values and n (dimension)
+                                        //   Some information is stored twice (once directly in a CTA_Vector
+                                        //   and once in its data.
+} CTAI_Vector;
+
+
+
+typedef struct {
+CTA_Func functions[I_CTA_VECTOR_NUMFUNC];
+} CTAI_VectorClass;                    // A VectorClass  contains a list of
+                                       //   functions ???
+
+/* Local interfaces */
+int CTAI_XML_read_txt_node_size(char *txtstr);
+int CTAI_XML_read_txt_node_content(char *txtstr, double *vals);
+
+/* Global interface from a shared routine from cta_treevector.c */
+void XML_newline(int spc, xmlTextWriter *writer);
+
+
+
+int CTA_Vector_DefineClass(
+   // INPUTS:
+      const char *name,                // Name of the new vector class
+      const CTA_Func h_func[I_CTA_VECTOR_NUMFUNC],  // function handles to
+                                       // the implementations of the
+                                       // vector class' functions.
+   // OUTPUTS:
+      CTA_VecClass     *hveccl)   // The (handle to) the new vector class
+{
+
+   CTAI_VectorClass *data;
+   int retval;
+
+   /* Allocate new Vector object */
+   data=CTA_Malloc(sizeof(CTAI_VectorClass));
+
+   data->functions[I_CTA_VECTOR_CREATE_SIZE]=h_func[I_CTA_VECTOR_CREATE_SIZE];
+   data->functions[I_CTA_VECTOR_CREATE_INIT]=h_func[I_CTA_VECTOR_CREATE_INIT];
+   data->functions[I_CTA_VECTOR_GETVALS]    =h_func[I_CTA_VECTOR_GETVALS    ];
+   data->functions[I_CTA_VECTOR_GETVAL]     =h_func[I_CTA_VECTOR_GETVAL     ];
+   data->functions[I_CTA_VECTOR_SETVALS]    =h_func[I_CTA_VECTOR_SETVALS    ];
+   data->functions[I_CTA_VECTOR_SETVAL]     =h_func[I_CTA_VECTOR_SETVAL     ];
+   data->functions[I_CTA_VECTOR_SETCONST]   =h_func[I_CTA_VECTOR_SETCONST   ];
+   data->functions[I_CTA_VECTOR_SCAL   ]    =h_func[I_CTA_VECTOR_SCAL       ];
+   data->functions[I_CTA_VECTOR_COPY   ]    =h_func[I_CTA_VECTOR_COPY       ];
+   data->functions[I_CTA_VECTOR_AXPY   ]    =h_func[I_CTA_VECTOR_AXPY       ];
+   data->functions[I_CTA_VECTOR_DOT    ]    =h_func[I_CTA_VECTOR_DOT        ];
+   data->functions[I_CTA_VECTOR_NRM2   ]    =h_func[I_CTA_VECTOR_NRM2       ];
+   data->functions[I_CTA_VECTOR_AMAX   ]    =h_func[I_CTA_VECTOR_AMAX       ];
+   data->functions[I_CTA_VECTOR_GETMAXLEN]  =h_func[I_CTA_VECTOR_GETMAXLEN  ];
+   data->functions[I_CTA_VECTOR_EXPORT ]    =h_func[I_CTA_VECTOR_EXPORT     ];
+   data->functions[I_CTA_VECTOR_IMPORT ]    =h_func[I_CTA_VECTOR_IMPORT     ];
+   data->functions[I_CTA_VECTOR_PRINT_TABLE]=h_func[I_CTA_VECTOR_PRINT_TABLE];
+   data->functions[I_CTA_VECTOR_FREE   ]    =h_func[I_CTA_VECTOR_FREE       ];
+   data->functions[I_CTA_VECTOR_APPENDVAL]  =h_func[I_CTA_VECTOR_APPENDVAL  ];
+   data->functions[I_CTA_VECTOR_ELMDIV]     =h_func[I_CTA_VECTOR_ELMDIV     ];
+   data->functions[I_CTA_VECTOR_ELMPROD]    =h_func[I_CTA_VECTOR_ELMPROD    ];
+   data->functions[I_CTA_VECTOR_ELMSQRT]    =h_func[I_CTA_VECTOR_ELMSQRT    ];
+
+
+   // Allocate new handle
+   retval=CTA_Handle_Create(name,CTA_VECTORCLASS,data,hveccl);
+
+   // return error when unsuccesfull
+   return retval;
+}
+
+
+#undef METHOD
+#define METHOD "Duplicate"
+int CTA_Vector_Duplicate(CTA_Vector hvector1, CTA_Vector *hvector2)
+{
+   CTAI_Vector *data;      /* All data of vector hvec      */
+   int retval;
+
+   /* Get class data containing all function pointers */
+   retval=CTA_Handle_Check((CTA_Handle) hvector1,CTA_VECTOR);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_treevector handle");
+       return retval;
+   }
+   retval=CTA_Handle_GetData((CTA_Handle) hvector1,(void*) &data);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+
+   retval=CTA_Vector_Create(data->hveccl, data->n, data->datatype,
+                            data->usrdata, hvector2);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot create vector");
+	   return retval;
+   }
+   retval=CTA_Vector_Copy(hvector1,*hvector2);
+   return retval;
+}
+
+
+
+
+#undef METHOD
+#define METHOD "Create"
+int CTA_Vector_Create(
+    // INPUTS:
+       CTA_VecClass hveccl,
+       const int n,            // dimension
+       CTA_Datatype datatype,  // datatype of the vector elements
+       CTA_Handle usrdata,     // ???
+    // OUTPUTS
+       CTA_Vector *hvector)    // The new COSTA-function (handle)
+{
+   CTAI_Vector *vector;
+   int memsize;
+   int retval;
+   CTAI_VectorClass *clsdata;
+   CTA_Function *my_Create_Size, *my_Create_Init;
+   int i;
+   int elSize;
+
+   /* Get class data containing all function pointers */
+   retval=CTA_Handle_Check((CTA_Handle) hveccl,CTA_VECTORCLASS);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_vectorclass handle");
+       return retval;
+   }
+   retval=CTA_Handle_GetData((CTA_Handle) hveccl,(void*) &clsdata);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+
+   /* determine size of data object (CTA_VECTOR_CREATE_SIZE)*/
+   retval=CTA_Func_GetFunc(clsdata->functions[I_CTA_VECTOR_CREATE_SIZE],&my_Create_Size);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot get function");
+	   return retval;
+   }
+   (void) my_Create_Size(&n,&datatype,&usrdata,&retval,&memsize);
+   if (retval) {
+	   CTA_WRITE_ERROR("Error using my_Create_Size");
+	   return retval;
+   }
+
+   /* allocate memory for new vector object */
+   vector=CTA_Malloc(sizeof(CTAI_Vector));
+   vector->data=CTA_Malloc(memsize);
+
+   /* Count allocated memory local and global */
+   CTA_SizeOf(datatype, &elSize);
+   vector->memsize=sizeof(CTAI_Vector)+memsize+n*elSize;
+   CTAI_Vector_Memsize+=vector->memsize;
+
+   /* copy function pointers */
+   for (i=0;i< I_CTA_VECTOR_NUMFUNC;i++){
+      vector->functions[i]=clsdata->functions[i];
+   }
+   /* set other general information */
+   vector->hveccl=hveccl;
+   vector->n=n;
+   vector->datatype=datatype;
+
+   vector->usrdata=usrdata;
+
+   /* Initialise and fill new vector */
+   retval=CTA_Func_GetFunc(clsdata->functions[I_CTA_VECTOR_CREATE_INIT],&my_Create_Init);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot get function");
+	   return retval;
+   }
+   (void) my_Create_Init(vector->data, &n, &datatype, &usrdata, &retval);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Error using my_Create_Init");
+	   return retval;
+   }
+
+   /* Allocate new handle and return eror when unsuccesfull */
+   retval=CTA_Handle_Create("vector",CTA_VECTOR,vector,hvector);
+   if (retval) {
+	   CTA_WRITE_ERROR("Cannot create handle");
+	   return retval;
+   }
+
+   return CTA_OK;
+
+
+}
+
+
+#undef METHOD
+#define METHOD "GetSize"
+int CTA_Vector_GetSize(
+   CTA_Vector hvec,      /* Handle of the vector */
+   int *n               /* size of vector */
+   ){
+
+   CTAI_Vector *data;      /* All data of vector hvec      */
+   int retval;             /* Return value of COSTA call   */
+
+   /* Get pointer to implementation of this function */
+   retval=CTA_Handle_Check((CTA_Handle) hvec,CTA_VECTOR);
+   if (retval!=CTA_OK) {
+      char message[1024];
+	  sprintf(message, "handle check in cta_vector_getsize failed");
+	  CTA_WRITE_ERROR(message);
+      return retval;
+   }
+   retval=CTA_Handle_GetData((CTA_Handle) hvec,(void*) &data);
+   if (retval!=CTA_OK) {
+      char message[1024];
+      sprintf(message, "handle _getdata in cta_vector_getsize failed");
+      CTA_WRITE_ERROR(message);
+	  return retval;
+   }
+
+   *n=data->n;
+   return CTA_OK;
+};
+
+#undef METHOD
+#define METHOD "GetDatatype"
+int CTA_Vector_GetDatatype(
+   CTA_Vector hvec,         /* Handle of the vector */
+   CTA_Datatype *datatype   /* Returned data type   */
+   ){
+
+   CTAI_Vector *data;      /* All data of vector hvec      */
+   int retval;             /* Return value of COSTA call   */
+
+   /* Get pointer to implementation of this function */
+   retval=CTA_Handle_Check((CTA_Handle) hvec,CTA_VECTOR);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_vectorclass handle");
+       return retval;
+   }
+   retval=CTA_Handle_GetData((CTA_Handle) hvec,(void*) &data);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+
+   *datatype=data->datatype;
+   return CTA_OK;
+};
+
+
+
+
+#undef METHOD
+#define METHOD "GetVals"
+int CTA_Vector_GetVals(
+   CTA_Vector hvec,      /* Handle of the vector */
+   void *vals,           /* Returned value        */
+   int  n,               /* length of array vals  */
+   CTA_Datatype datatype /* Data type */
+   )
+{
+
+   /* Local variables */
+   int retval;             /* Return value of COSTA call   */
+   CTAI_Vector *data;      /* All data of vector hvec      */
+   CTA_Function *function; /* Function that must be called */
+
+   /* Get pointer to implementation of this function */
+   retval=CTA_Handle_Check((CTA_Handle) hvec,CTA_VECTOR);
+   if (retval!=CTA_OK)
+   {
+      char message[1024];
+      sprintf(message,"cta_vector: Handle %d is not a vector handle", hvec);
+	  CTA_WRITE_ERROR(message);
+      return retval;
+   }
+   retval=CTA_Handle_GetData((CTA_Handle) hvec,(void*) &data);
+   if (retval!=CTA_OK)
+   {
+     char message[1024];
+     sprintf(message,"cta_vector: Handle %d has no data", hvec);
+     CTA_WRITE_ERROR(message);
+	 return retval;
+   }
+   retval=CTA_Func_GetFunc(data->functions[I_CTA_VECTOR_GETVALS],&function);
+   if (retval!=CTA_OK)
+   {
+       char message[1024];
+       sprintf(message,"cta_vector: getvals function %d, handle %d is not valid",
+           I_CTA_VECTOR_GETVALS,data->functions[I_CTA_VECTOR_GETVALS]);
+       CTA_WRITE_ERROR(message);
+       return retval;
+   }
+
+   /* Call (user) implementation */
+   function(data->data,vals,&n,&datatype,&retval);
+   return retval;
+};
+
+
+
+#undef METHOD
+#define METHOD "GetVal"
+int CTA_Vector_GetVal(
+   CTA_Vector hvec,      /* Handle of the vector */
+   int  i,               /* position of value    */
+   void *val,            /* Returned value        */
+   CTA_Datatype datatype /* Data type */
+   ){
+
+   /* Local variables */
+   int retval;             /* Return value of COSTA call   */
+   CTAI_Vector *data;      /* All data of vector hvec      */
+   CTA_Function *function; /* Function that must be called */
+
+   /* Get pointer to implementation of this function */
+   retval=CTA_Handle_Check((CTA_Handle) hvec,CTA_VECTOR);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_vector handle");
+       return retval;
+   }
+   retval=CTA_Handle_GetData((CTA_Handle) hvec,(void*) &data);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+   retval=CTA_Func_GetFunc(data->functions[I_CTA_VECTOR_GETVAL],&function);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot get function I_CTA_VECTOR_GETVAL");
+	   return retval;
+   }
+
+   /* Call (user) implementation */
+   function(data->data,&i,val,&datatype,&retval);
+
+   return retval;
+};
+
+
+/* Get value as string  c; free pointer after use */
+const char *CTAI_Vector_GetStringVal(CTA_Vector hvec, int  i)
+{
+   char          *buf = NULL;
+   double         dval;
+   float          fval;
+   int            ival;
+   int            len;
+   CTA_String     sval;
+   int            retval;
+   CTA_Datatype   dt;
+
+   retval = CTA_Vector_GetDatatype(hvec, &dt);
+   if (retval == CTA_OK) {
+      switch (dt) {
+      case CTA_DOUBLE:
+         retval = CTA_Vector_GetVal(hvec, i, &dval, dt);
+         if (retval == CTA_OK) {
+            buf = (char*)CTA_Malloc(64);
+            sprintf(buf, "%lf", dval);
+         }
+         break;
+      case CTA_REAL:
+         retval = CTA_Vector_GetVal(hvec, i, &fval, dt);
+         if (retval == CTA_OK) {
+            buf = (char*)CTA_Malloc(64);
+            sprintf(buf, "%g", fval);
+         }
+         break;
+      case CTA_INTEGER:
+      case CTA_HANDLE:
+         retval = CTA_Vector_GetVal(hvec, i, &ival, dt);
+         if (retval == CTA_OK) {
+            buf = (char*)CTA_Malloc(64);
+            sprintf(buf, "%d", ival);
+         }
+         break;
+      case CTA_STRING:
+         retval = CTA_String_Create(&sval);
+         if (retval != CTA_OK) break;
+         retval = CTA_Vector_GetVal(hvec, i, &sval, dt);
+         if (retval == CTA_OK) {
+            CTA_String_GetLength(sval, &len);
+            buf = (char*)CTA_Malloc(len + 1);
+            strcpy(buf, CTAI_String_GetPtr(sval));
+            buf[len] = '\0';
+         }
+         CTA_String_Free(&sval);
+         break;
+      }
+   }
+   return buf;
+};
+
+
+#undef METHOD
+#define METHOD "SetConstant"
+int CTA_Vector_SetConstant(
+   CTA_Vector hvec,  /* Handle of the vector */
+   void *val,        /* value that must be set */
+   CTA_Datatype datatype /* Data type */
+   ){
+
+   /* Local variables */
+   int retval;             /* Return value of COSTA call   */
+   CTAI_Vector *data;      /* All data of vector hvec      */
+   CTA_Function *function; /* Function that must be called */
+
+   /* Get pointer to implementation of this function */
+   retval=CTA_Handle_Check((CTA_Handle) hvec,CTA_VECTOR);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_vector handle");
+       return retval;
+   }
+   retval=CTA_Handle_GetData((CTA_Handle) hvec, (void*) &data);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+   retval=CTA_Func_GetFunc(data->functions[I_CTA_VECTOR_SETCONST],&function);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot get function I_CTA_VECTOR_SETCONST");
+	   return retval;
+   }
+   /* Call (user) implementation */
+   function(data->data,val,&datatype,&retval);
+   return retval;
+};
+
+
+
+
+
+#undef METHOD
+#define METHOD "Export"
+int CTA_Vector_Export(
+   CTA_Vector hvec, /* Handle of the vector */
+   CTA_Handle usrdata
+   ){
+
+   /* Local variables */
+   int retval;             /* Return value of COSTA call   */
+   CTAI_Vector *data;      /* All data of vector hvec      */
+   CTA_Function *function; /* Function that must be called */
+
+
+   /* Get pointer to implementation of this function */
+   retval=CTA_Handle_Check((CTA_Handle) hvec,CTA_VECTOR);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_vector handle");
+       return retval;
+   }
+   retval=CTA_Handle_GetData((CTA_Handle) hvec, (void*) &data);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+   retval=CTA_Func_GetFunc(data->functions[I_CTA_VECTOR_EXPORT],&function);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot get function I_CTA_VECTOR_EXPORT");
+	   return retval;
+   }
+
+   /* Call (user) implementation */
+   function(data->data,&usrdata,&retval);
+   return retval;
+};
+
+#undef METHOD
+#define METHOD "Import"
+int CTA_Vector_Import(
+   CTA_Vector hvec,    /* Handle of the vector */
+   CTA_Handle usrdata
+   ){
+
+   /* Local variables */
+   int retval;             /* Return value of COSTA call   */
+   CTAI_Vector *data;      /* All data of vector hvec      */
+   CTA_Function *function; /* Function that must be called */
+
+
+   /* Get pointer to implementation of this function */
+   retval=CTA_Handle_Check((CTA_Handle) hvec,CTA_VECTOR);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_vector handle");
+       return retval;
+   }
+   retval=CTA_Handle_GetData((CTA_Handle) hvec, (void*) &data);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+   retval=CTA_Func_GetFunc(data->functions[I_CTA_VECTOR_IMPORT],&function);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot get function I_CTA_VECTOR_IMPORT");
+	   return retval;
+   }
+
+   /* Call (user) implementation */
+   function(data->data,&usrdata,&retval);
+   return retval;
+};
+
+
+#undef METHOD
+#define METHOD "Print_Table"
+int CTA_Vector_Print_Table(
+   CTA_Vector* vtable, /* Handle of the vector */
+   int ncolumns,
+   CTA_Vector vformats
+   )
+{
+
+   /* Local variables */
+   int retval;             /* Return value of COSTA call   */
+   CTAI_Vector *data;      /* All data of vector hvec      */
+   void *formats;          /* data of vector formats       */
+   CTA_Function *function; /* Function that must be called */
+   void **table =          /* All data of vectors in table */
+      CTA_Malloc(sizeof(CTAI_Vector *)*ncolumns) ;
+
+   int i; /* loop counter */
+
+   /* Get pointer to implementation of this function */
+   for (i=0; i<ncolumns; i++)
+   {
+      retval=CTA_Handle_Check((CTA_Handle) vtable[i], CTA_VECTOR);
+      if (retval!=CTA_OK)
+      {
+		  char message[1024];
+		  sprintf(message, "ERROR in CTA_Vector_Print_Table: element %d of first argument must be a CTA_Vector",i);
+          CTA_WRITE_ERROR(message);
+		  return retval;
+      }
+
+      retval=CTA_Handle_GetData((CTA_Handle) vtable[i], (void*) &data);
+      if (retval!=CTA_OK)
+      {
+          char message[1024];
+		  sprintf(message, "ERROR in CTA_Vector_Print_Table: element %d of first argument would not return its data",i);
+          CTA_WRITE_ERROR(message);
+		  return retval;
+      }
+
+      table[i] = data->data;
+   }
+
+   retval=CTA_Handle_Check((CTA_Handle) vformats, CTA_VECTOR);
+   if (retval!=CTA_OK)
+   {
+       char message[1024];
+       sprintf(message, "ERROR in CTA_Vector_Print_Table: last argument is not a CTA_Vector");
+	   CTA_WRITE_ERROR(message);
+       return retval;
+   }
+
+   retval=CTA_Handle_GetData((CTA_Handle) vformats, (void*) &data);
+   if (retval!=CTA_OK){
+       CTA_WRITE_ERROR("ERROR in CTA_Vector_Print_Table: last argument would not return its data");
+       return retval;
+   }
+
+   formats = data->data;
+
+   retval=CTA_Func_GetFunc(data->functions[I_CTA_VECTOR_PRINT_TABLE],&function);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("ERROR in CTA_Vector_Print_Table: could not obtain a PRINT_TABLE function for last argument");
+       return retval;
+   }
+
+   /* Call (user) implementation */
+   function(table,&ncolumns,formats,&retval);
+   if (retval!=CTA_OK)
+   {
+       CTA_WRITE_ERROR("ERROR in CTA_Vector_Print_Table: PRINT_TABLE function returned an error code");
+   }
+
+
+   free(table);
+   return retval;
+};
+
+
+#undef METHOD
+#define METHOD "SetVals"
+int CTA_Vector_SetVals(
+   CTA_Vector hvec,  /* Handle of the vector */
+   void *vals,       /* value that must be set */
+   int  n,           /* Length of array vals */
+   CTA_Datatype datatype /* Data type */
+   ){
+
+   /* Local variables */
+   int retval;             /* Return value of COSTA call   */
+   CTAI_Vector *data;      /* All data of vector hvec      */
+   CTA_Function *function; /* Function that must be called */
+
+   /* Get pointer to implementation of this function */
+   retval=CTA_Handle_Check((CTA_Handle) hvec,CTA_VECTOR);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_vector handle");
+       return retval;
+   }
+   retval=CTA_Handle_GetData((CTA_Handle) hvec, (void*) &data);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+   retval=CTA_Func_GetFunc(data->functions[I_CTA_VECTOR_SETVALS],&function);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot get function I_CTA_VECTOR_SETVALS");
+	   return retval;
+   }
+
+   /* Call (user) implementation */
+   function(data->data,vals,&n,&datatype,&retval);
+   return retval;
+};
+
+#undef METHOD
+#define METHOD "SetVal"
+int CTA_Vector_SetVal(
+   CTA_Vector hvec,  /* Handle of the vector */
+   int  i,           /* position of value that must be set */
+   void *val,        /* value that must be set */
+   CTA_Datatype datatype /* Data type */
+   ){
+
+   /* Local variables */
+   int retval;             /* Return value of COSTA call   */
+   CTAI_Vector *data;      /* All data of vector hvec      */
+   CTA_Function *function; /* Function that must be called */
+
+   /* Get pointer to implementation of this function */
+   retval=CTA_Handle_Check((CTA_Handle) hvec,CTA_VECTOR);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_vector handle");
+       return retval;
+   }
+   retval=CTA_Handle_GetData((CTA_Handle) hvec, (void*) &data);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+   retval=CTA_Func_GetFunc(data->functions[I_CTA_VECTOR_SETVAL],&function);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot get function I_CTA_VECTOR_SETVAL");
+	   return retval;
+   }
+
+   /* Call (user) implementation */
+   function(data->data,&i,val,&datatype,&retval);
+   return retval;
+};
+
+
+/* Set value from string */
+int CTAI_Vector_SetStringVal(CTA_Vector hvec, int  i, const char *val, CTA_Datatype datatype) {
+   void *buf = NULL;     /* Buffer to store the converted value */
+   int  size = 0;        /* Buffer size */
+   int  retval;
+
+   if (CTA_OK == CTA_SizeOf(datatype, &size)) {
+      buf = CTA_Malloc(size);
+      switch (datatype) {
+         case CTA_REAL:
+            *((float *)buf) = (float)atof(val);
+            break;
+         case CTA_DOUBLE:
+            *((double *)buf) = atof(val);
+            break;
+         case CTA_INTEGER:
+         case CTA_HANDLE:
+            *((int *)buf) = atoi(val);
+            break;
+         case CTA_STRING:
+            retval = CTA_String_Create((CTA_String *)buf);
+            if (retval == CTA_OK) {
+               CTA_String_Set(*((CTA_String *)buf), val);
+               break;
+            }
+            free(buf);
+            return retval;
+         default:
+            free(buf);
+            return CTA_ILLEGAL_DATATYPE;
+      }
+      /* Set the actual value */
+      retval =  CTA_Vector_SetVal(hvec, i, buf, datatype);
+      free(buf);
+      return retval;
+   }
+   return CTA_ILLEGAL_DATATYPE;
+};
+
+
+#undef METHOD
+#define METHOD "AppendVal"
+int CTA_Vector_AppendVal(
+   CTA_Vector hvec,        /* Handle of the vector */
+   const void *val,        /* value that must be set */
+   CTA_Datatype datatype   /* Data type */
+   ){
+
+   /* Local variables */
+   int retval;             /* Return value of COSTA call   */
+   CTAI_Vector *data;      /* All data of vector hvec      */
+   CTA_Function *function; /* Function that must be called */
+
+   /* Get pointer to implementation of this function */
+   retval=CTA_Handle_Check((CTA_Handle) hvec, CTA_VECTOR);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_vector handle");
+       return retval;
+   }
+   retval=CTA_Handle_GetData((CTA_Handle) hvec, (void*) &data);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+   retval=CTA_Func_GetFunc(data->functions[I_CTA_VECTOR_APPENDVAL],&function);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot get function I_CTA_VECTOR_APPENDVAL");
+	   return retval;
+   }
+
+   /* Update vector length */
+   ++data->n;
+
+   /* Call (user) implementation */
+   function(data->data,val,&datatype,&retval);
+   return retval;
+};
+
+
+
+#undef METHOD
+#define METHOD "Scal"
+int CTA_Vector_Scal(
+   CTA_Vector hvec,  /* Handle of the vector */
+   double alpha     /* scaling factor        */
+   ){
+
+   /* Local variables */
+   int retval;             /* Return value of COSTA call   */
+   CTAI_Vector *data;      /* All data of vector hvec      */
+   CTA_Function *function; /* Function that must be called */
+
+   /* Get pointer to implementation of this function */
+   retval=CTA_Handle_Check((CTA_Handle) hvec,CTA_VECTOR);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_vector handle");
+       return retval;
+   }
+   retval=CTA_Handle_GetData((CTA_Handle) hvec, (void*) &data);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+   retval=CTA_Func_GetFunc(data->functions[I_CTA_VECTOR_SCAL],&function);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot get function I_CTA_VECTOR_SCAL");
+	   return retval;
+   }
+
+   /* Call (user) implementation */
+   function(data->data,&alpha,&retval);
+   return retval;
+};
+
+#undef METHOD
+#define METHOD "Copy"
+int CTA_Vector_Copy(
+   CTA_Vector hvec_x,  /* Handle of the sending vector */
+   CTA_Vector hvec_y   /* Handle of the receiving vector */
+   ){
+
+   /* Local variables */
+   int retval;             /* Return value of COSTA call   */
+   CTAI_Vector *data_x;      /* All data of vector hvec_x  */
+   CTAI_Vector *data_y;      /* All data of vector hvec_y  */
+   CTA_Function *function_x;   /* Function that must be called */
+   CTA_Function *function_y;   /* Function that must be called */
+
+   /* Get pointer to implementation of this function */
+   retval=CTA_Handle_Check((CTA_Handle) hvec_x,CTA_VECTOR);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_vector handle");
+       return retval;
+   }
+   retval=CTA_Handle_GetData((CTA_Handle) hvec_x, (void*) &data_x);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+
+   retval=CTA_Handle_Check((CTA_Handle) hvec_y,CTA_VECTOR);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_vector handle");
+       return retval;
+   }
+   retval=CTA_Handle_GetData((CTA_Handle) hvec_y, (void*) &data_y);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+
+   retval=CTA_Func_GetFunc(data_x->functions[I_CTA_VECTOR_COPY],&function_x);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot get function I_CTA_VECTOR_COPY");
+	   return retval;
+   }
+   retval=CTA_Func_GetFunc(data_y->functions[I_CTA_VECTOR_COPY],&function_y);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot get function I_CTA_VECTOR_COPY");
+	   return retval;
+   }
+
+   /* Check whether implementions match  */
+   if (function_x==function_y){
+
+      /* Call (user) implementation */
+      function_x(data_x->data,data_y->data,&retval);
+      return retval;
+   }
+   else {
+      return CTA_INCOMPATIBLE_VECTORS;
+   }
+};
+
+#undef METHOD
+#define METHOD "ElmSqrt"
+int CTA_Vector_ElmSqrt(
+   CTA_Vector hvec_y  /* Handle of the receiving vector */
+   ){
+
+   /* Local variables */
+   int retval;             /* Return value of COSTA call   */
+   CTAI_Vector *data_y;      /* All data of vector hvec_y  */
+   CTA_Function *function_y;   /* Function that must be called */
+
+   /* Get pointer to implementation of this function */
+   retval=CTA_Handle_Check((CTA_Handle) hvec_y,CTA_VECTOR);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_vector handle");
+       return retval;
+   }
+   retval=CTA_Handle_GetData((CTA_Handle) hvec_y, (void*) &data_y);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+
+   retval=CTA_Func_GetFunc(data_y->functions[I_CTA_VECTOR_ELMSQRT],&function_y);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot get function I_CTA_VECTOR_ELMSQRT");
+	   return retval;
+   }
+
+   /* Call (user) implementation */
+   function_y(data_y->data,&retval);
+   return retval;
+};
+
+
+#undef METHOD
+#define METHOD "ElmDiv"
+int CTA_Vector_ElmDiv(
+   CTA_Vector hvec_y,  /* Handle of the receiving vector */
+   CTA_Vector hvec_x  /* Handle of the sending vector */
+   ){
+
+   /* Local variables */
+   int retval;             /* Return value of COSTA call   */
+   CTAI_Vector *data_x;      /* All data of vector hvec_x  */
+   CTAI_Vector *data_y;      /* All data of vector hvec_y  */
+   CTA_Function *function_x;   /* Function that must be called */
+   CTA_Function *function_y;   /* Function that must be called */
+
+   /* Get pointer to implementation of this function */
+   retval=CTA_Handle_Check((CTA_Handle) hvec_x,CTA_VECTOR);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_vector handle");
+       return retval;
+   }
+   retval=CTA_Handle_GetData((CTA_Handle) hvec_x, (void*) &data_x);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+
+   retval=CTA_Handle_Check((CTA_Handle) hvec_y,CTA_VECTOR);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_vector handle");
+       return retval;
+   }
+   retval=CTA_Handle_GetData((CTA_Handle) hvec_y, (void*) &data_y);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+
+   retval=CTA_Func_GetFunc(data_x->functions[I_CTA_VECTOR_ELMDIV],&function_x);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot get function I_CTA_VECTOR_ELMDIV");
+	   return retval;
+   }
+   retval=CTA_Func_GetFunc(data_y->functions[I_CTA_VECTOR_ELMDIV],&function_y);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot get function I_CTA_VECTOR_ELMDIV");
+	   return retval;
+   }
+   /* Check whether implementions match  */
+   if (function_x==function_y){
+
+      /* Call (user) implementation */
+      function_x(data_y->data,data_x->data,&retval);
+      return retval;
+   }
+   else {
+      return CTA_INCOMPATIBLE_VECTORS;
+   }
+};
+
+
+#undef METHOD
+#define METHOD "ElmProd"
+int CTA_Vector_ElmProd(
+   CTA_Vector hvec_y,  /* Handle of the receiving vector */
+   CTA_Vector hvec_x  /* Handle of the sending vector */
+   ){
+
+   /* Local variables */
+   int retval;             /* Return value of COSTA call   */
+   CTAI_Vector *data_x;      /* All data of vector hvec_x  */
+   CTAI_Vector *data_y;      /* All data of vector hvec_y  */
+   CTA_Function *function_x;   /* Function that must be called */
+   CTA_Function *function_y;   /* Function that must be called */
+
+   /* Get pointer to implementation of this function */
+   retval=CTA_Handle_Check((CTA_Handle) hvec_x,CTA_VECTOR);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_vector handle");
+       return retval;
+   }
+   retval=CTA_Handle_GetData((CTA_Handle) hvec_x, (void*) &data_x);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+
+   retval=CTA_Handle_Check((CTA_Handle) hvec_y,CTA_VECTOR);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_vector handle");
+       return retval;
+   }
+   retval=CTA_Handle_GetData((CTA_Handle) hvec_y, (void*) &data_y);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+
+   retval=CTA_Func_GetFunc(data_x->functions[I_CTA_VECTOR_ELMPROD],&function_x);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot get function I_CTA_VECTOR_ELMPROD");
+	   return retval;
+   }
+   retval=CTA_Func_GetFunc(data_y->functions[I_CTA_VECTOR_ELMPROD],&function_y);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot get function I_CTA_VECTOR_ELMPROD");
+	   return retval;
+   }
+   /* Check whether implementions match  */
+   if (function_x==function_y){
+
+      /* Call (user) implementation */
+      function_x(data_y->data,data_x->data,&retval);
+      return retval;
+   }
+   else {
+      return CTA_INCOMPATIBLE_VECTORS;
+   }
+};
+
+
+#undef METHOD
+#define METHOD "Axpy"
+int CTA_Vector_Axpy(
+   CTA_Vector hvec_y,  /* Handle of the receiving vector */
+   double alpha,       /* value of alpha */
+   CTA_Vector hvec_x  /* Handle of the sending vector */
+   ){
+
+   /* Local variables */
+   int retval;                 /* Return value of COSTA call   */
+   CTAI_Vector *data_x;        /* All data of vector hvec_x    */
+   CTAI_Vector *data_y;        /* All data of vector hvec_y    */
+   CTA_Function *function_x;   /* Function that must be called */
+   CTA_Function *function_y;   /* Function that must be called */
+
+   /* Get pointer to implementation of this function */
+   retval=CTA_Handle_Check((CTA_Handle) hvec_x,CTA_VECTOR);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_vector handle");
+       return retval;
+   }
+   retval=CTA_Handle_GetData((CTA_Handle) hvec_x, (void*) &data_x);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+
+   retval=CTA_Handle_Check((CTA_Handle) hvec_y,CTA_VECTOR);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_vector handle");
+       return retval;
+   }
+   retval=CTA_Handle_GetData((CTA_Handle) hvec_y, (void*) &data_y);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+
+   retval=CTA_Func_GetFunc(data_x->functions[I_CTA_VECTOR_AXPY],&function_x);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot get function I_CTA_VECTOR_AXPY");
+	   return retval;
+   }
+   retval=CTA_Func_GetFunc(data_y->functions[I_CTA_VECTOR_AXPY],&function_y);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot get function I_CTA_VECTOR_AXPY");
+	   return retval;
+   }
+
+   /* Check whether implementions match  */
+   if (function_x==function_y){
+
+      /* Call (user) implementation */
+      function_x(data_y->data,&alpha,data_x->data,&retval);
+      return retval;
+   }
+   else {
+      return CTA_INCOMPATIBLE_VECTORS;
+   }
+};
+
+
+#undef METHOD
+#define METHOD "Dot"
+int CTA_Vector_Dot(
+   CTA_Vector hvec_x,  /* Handle of the first vector  */
+   CTA_Vector hvec_y,  /* Handle of the second vector */
+   double *dotprod
+   ){
+
+   /* Local variables */
+   int retval;             /* Return value of COSTA call   */
+   CTAI_Vector *data_x;      /* All data of vector hvec_x  */
+   CTAI_Vector *data_y;      /* All data of vector hvec_y  */
+   CTA_Function *function_x;   /* Function that must be called */
+   CTA_Function *function_y;   /* Function that must be called */
+
+   /* Get pointer to implementation of this function */
+   retval=CTA_Handle_Check((CTA_Handle) hvec_x,CTA_VECTOR);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_vector handle");
+       return retval;
+   }
+   retval=CTA_Handle_GetData((CTA_Handle) hvec_x, (void*) &data_x);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+   retval=CTA_Handle_Check((CTA_Handle) hvec_y,CTA_VECTOR);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_vector handle");
+       return retval;
+   }
+   retval=CTA_Handle_GetData((CTA_Handle) hvec_y, (void*) &data_y);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+
+   retval=CTA_Func_GetFunc(data_x->functions[I_CTA_VECTOR_DOT],&function_x);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot get function I_CTA_VECTOR_DOT");
+	   return retval;
+   }
+   retval=CTA_Func_GetFunc(data_y->functions[I_CTA_VECTOR_DOT],&function_y);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot get function I_CTA_VECTOR_DOT");
+	   return retval;
+   }
+
+   /* Check whether implementions match  */
+   if (function_x==function_y){
+
+      /* Call (user) implementation */
+      function_x(data_x->data,data_y->data,dotprod,&retval);
+      return retval;
+   }
+   else {
+      return CTA_INCOMPATIBLE_VECTORS;
+   }
+};
+
+#undef METHOD
+#define METHOD "Dot"
+int CTA_Vector_Nrm2(
+   CTA_Vector hvec_x,  /* Handle of the vector  */
+   double *norm2
+   ){
+
+   /* Local variables */
+   int retval;               /* Return value of COSTA call   */
+   CTAI_Vector *data_x;      /* All data of vector hvec_x  */
+   CTA_Function *function_x; /* Function that must be called */
+
+   /* Get pointer to implementation of this function */
+   retval=CTA_Handle_Check((CTA_Handle) hvec_x,CTA_VECTOR);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_vector handle");
+       return retval;
+   }
+   retval=CTA_Handle_GetData((CTA_Handle) hvec_x, (void*) &data_x);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+
+   retval=CTA_Func_GetFunc(data_x->functions[I_CTA_VECTOR_NRM2],&function_x);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot get function I_CTA_VECTOR_NRM2");
+	   return retval;
+   }
+
+   /* Call (user) implementation */
+   function_x(data_x->data,norm2,&retval);
+   return retval;
+};
+
+#undef METHOD
+#define METHOD "Amax"
+int CTA_Vector_Amax(
+   CTA_Vector hvec_x,  /* Handle of the first vector  */
+   int *iloc
+   ){
+
+   /* Local variables */
+   int retval;             /* Return value of COSTA call   */
+   CTAI_Vector *data_x;      /* All data of vector hvec_x  */
+   CTA_Function *function_x;   /* Function that must be called */
+
+   /* Get pointer to implementation of this function */
+   retval=CTA_Handle_Check((CTA_Handle) hvec_x,CTA_VECTOR);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_vector handle");
+       return retval;
+   }
+   retval=CTA_Handle_GetData((CTA_Handle) hvec_x, (void*) &data_x);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+
+   retval=CTA_Func_GetFunc(data_x->functions[I_CTA_VECTOR_AMAX],&function_x);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot get function I_CTA_VECTOR_AMAX");
+	   return retval;
+   }
+   /* Call (user) implementation */
+   function_x(data_x->data,iloc, &retval);
+   return retval;
+};
+
+#undef METHOD
+#define METHOD "GetMaxLen"
+int CTA_Vector_GetMaxLen(
+   CTA_Vector hvec_x,  /* Handle of the first vector  */
+   int *maxlen
+   ){
+
+   /* Local variables */
+   int retval;             /* Return value of COSTA call   */
+   CTAI_Vector *data_x;      /* All data of vector hvec_x  */
+   CTA_Function *function_x;   /* Function that must be called */
+
+   /* Get pointer to implementation of this function */
+   retval=CTA_Handle_Check((CTA_Handle) hvec_x,CTA_VECTOR);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_vector handle");
+       return retval;
+   }
+   retval=CTA_Handle_GetData((CTA_Handle) hvec_x, (void*) &data_x);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+   retval=CTA_Func_GetFunc(data_x->functions[I_CTA_VECTOR_GETMAXLEN],&function_x);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot get function I_CTA_VECTOR_GETMAXLEN");
+	   return retval;
+   }
+
+   /* Call (user) implementation */
+   function_x(data_x->data, maxlen, &retval);
+   return retval;
+};
+
+#undef METHOD
+#define METHOD "Free"
+int CTA_Vector_Free(
+   CTA_Vector *hvec_x  /* Handle of vector  */
+   ){
+
+   /* Local variables */
+   int retval;             /* Return value of COSTA call   */
+   CTAI_Vector *data_x;      /* All data of vector hvec_x  */
+   CTA_Function *function_x;   /* Function that must be called */
+
+   /* Get pointer to implementation of this function */
+
+   if (*hvec_x==CTA_NULL) return CTA_OK;
+   retval=CTA_Handle_Check((CTA_Handle) *hvec_x,CTA_VECTOR);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_vector handle");
+       return retval;
+   }
+   retval=CTA_Handle_GetData((CTA_Handle) *hvec_x, (void*) &data_x);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+
+   retval=CTA_Func_GetFunc(data_x->functions[I_CTA_VECTOR_FREE],&function_x);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot get function I_CTA_VECTOR_FREE");
+	   return retval;
+   }
+
+
+   /* Count allocated memory local and global */
+   CTAI_Vector_Memsize-=data_x->memsize;
+
+   /* Call (user) implementation */
+   function_x(data_x->data,&retval);
+   free(data_x->data);
+   free(data_x);
+   retval=CTA_Handle_Free(hvec_x);
+
+   return retval;
+};
+
+
+/** \brief Generate XML from one COSTA vector
+*
+*  \param hvec   I  handle of a COSTA vector
+*  \param writer I  the XML text writer
+*/
+
+void CTAI_XML_WriteVector(CTA_Vector hvec, const char *id, const char *caption, CTA_Metainfo minfo, int level, xmlTextWriter *writer) {
+   const char   *text;
+   int          dimension;
+   int          i;
+   char unitname[CTA_STRLEN_TAG];
+   int misval=0;
+   CTAI_Gridm thisgrid;
+   char misvaltxt[20];
+   char valtotxt[10];
+
+    if (minfo) {
+      // note: description according to xml-scheme should be at a state node, so it is not read here.
+      CTA_Metainfo_GetUnit(minfo,unitname);
+      CTA_Metainfo_GetRest(minfo,&misval);
+      CTA_Metainfo_GetGrid(minfo,&thisgrid);
+    }
+
+
+   /* Start an element the the name of the tree handle */
+   xmlTextWriterStartElement(writer, (xmlChar *) CTAI_Type2String(CTA_VECTOR));
+
+   /* write id (COSTA: tag of parent state) */
+   xmlTextWriterWriteAttribute(writer, CTAI_XML_ID,  (xmlChar *) id);
+
+   /* Write caption (COSTA: name of parent state */
+   xmlTextWriterWriteAttribute(writer, CTAI_XML_CAPTION, (xmlChar *) caption);
+   XML_newline(level,writer);
+
+   /* Write unit */
+   xmlTextWriterStartElement(writer, (xmlChar *) "unit");
+   xmlTextWriterWriteString(writer, (xmlChar *) unitname);
+   xmlTextWriterEndElement(writer); XML_newline(level,writer);
+   /* Write missingvalue */
+   xmlTextWriterStartElement(writer, (xmlChar *) "missingValue");
+   sprintf(misvaltxt,"%d",misval);
+   xmlTextWriterWriteString(writer, (xmlChar *) misvaltxt);
+   xmlTextWriterEndElement(writer); XML_newline(level,writer);
+
+   if (thisgrid.type > -99) {
+     /* write grid; always computational dimensions */
+     xmlTextWriterStartElement(writer, (xmlChar *) "grid");      level=level+3;XML_newline(level,writer);
+     xmlTextWriterStartElement(writer, (xmlChar *) "computationalSpace");    level=level+3;XML_newline(level,writer);
+
+   xmlTextWriterStartElement(writer, (xmlChar *) "dimension");
+   xmlTextWriterWriteAttribute(writer, CTAI_XML_ID,  (xmlChar *) "xi");
+   sprintf(valtotxt,"%d",thisgrid.nx);
+   xmlTextWriterWriteAttribute(writer, (xmlChar *) "length", (xmlChar *) valtotxt);
+   xmlTextWriterEndElement(writer);
+   if (thisgrid.type > 1) {
+     XML_newline(level,writer);
+     xmlTextWriterStartElement(writer, (xmlChar *) "dimension");
+     xmlTextWriterWriteAttribute(writer, CTAI_XML_ID,  (xmlChar *) "eta");
+     sprintf(valtotxt,"%d",thisgrid.ny);
+     xmlTextWriterWriteAttribute(writer, (xmlChar *) "length", (xmlChar *) valtotxt);
+   xmlTextWriterEndElement(writer);
+   }
+   if (thisgrid.type > 2) {
+     XML_newline(level,writer);
+     xmlTextWriterStartElement(writer, (xmlChar *) "dimension");
+     xmlTextWriterWriteAttribute(writer, CTAI_XML_ID,  (xmlChar *) "psi");
+     sprintf(valtotxt,"%d",thisgrid.nz);
+     xmlTextWriterWriteAttribute(writer, (xmlChar *) "length", (xmlChar *) valtotxt);
+     xmlTextWriterEndElement(writer);
+   }
+   level=level-3; XML_newline(level,writer);
+   xmlTextWriterEndElement(writer); XML_newline(level,writer);  //computationalSpace
+
+   if (0==strcmp(thisgrid.refdimp[1],"none")) {
+     printf("no reference for physical space given.\n");
+   } else {
+     xmlTextWriterStartElement(writer, (xmlChar *) "physicalSpace"); level=level+3; XML_newline(level,writer);
+
+     xmlTextWriterStartElement(writer, (xmlChar *) "dimension");
+     xmlTextWriterWriteAttribute(writer, (xmlChar *) "id", (xmlChar *) "x");
+     xmlTextWriterWriteAttribute(writer, (xmlChar *) "axes", (xmlChar *) "xi");
+     xmlTextWriterWriteAttribute(writer, (xmlChar *) "ref", (xmlChar *) thisgrid.refdimp[1]);
+     xmlTextWriterEndElement(writer);
+
+     if (thisgrid.type > 1) {
+       XML_newline(level,writer);
+       xmlTextWriterStartElement(writer, (xmlChar *) "dimension");
+       xmlTextWriterWriteAttribute(writer, (xmlChar *) "id", (xmlChar *) "y");
+       xmlTextWriterWriteAttribute(writer, (xmlChar *) "axes",(xmlChar *) "eta");
+       xmlTextWriterWriteAttribute(writer, (xmlChar *) "ref", (xmlChar *) thisgrid.refdimp[2]);
+       xmlTextWriterEndElement(writer);
+     }
+     if (thisgrid.type > 2) {
+       XML_newline(level,writer);
+       xmlTextWriterStartElement(writer, (xmlChar *) "dimension");
+       xmlTextWriterWriteAttribute(writer, (xmlChar *) "id", (xmlChar *) "z");
+       xmlTextWriterWriteAttribute(writer, (xmlChar *) "axes",(xmlChar *) "psi");
+       xmlTextWriterWriteAttribute(writer, (xmlChar *) "ref",(xmlChar *)  thisgrid.refdimp[3]);
+       xmlTextWriterEndElement(writer);
+     }
+     level=level-3;XML_newline(level,writer);
+
+     xmlTextWriterEndElement(writer); level=level-3;XML_newline(level,writer);  //physicalSpace
+   }
+
+
+
+   xmlTextWriterEndElement(writer); XML_newline(level,writer);  //grid
+   }
+
+
+   /* Get dimension */
+   CTA_Vector_GetSize(hvec, &dimension);
+
+   /* Write values */
+   xmlTextWriterStartElement(writer, CTAI_XML_VECTOR);
+
+   /* Write each value */
+   for (i = 1; i <= dimension; ++i) {
+
+      text = CTAI_Vector_GetStringVal(hvec, i);
+      xmlTextWriterWriteString(writer, (xmlChar *) text);
+      xmlTextWriterWriteString(writer, (xmlChar *) " ");
+      free((char *)text);
+
+   }
+
+   /* End the tree level elements */
+   xmlTextWriterEndElement(writer);
+   level=level-3; XML_newline(level,writer);
+   xmlTextWriterEndElement(writer);
+   XML_newline(level,writer);
+}
+
+/* -------------------------------------------------------------------- */
+
+/** \brief Create a COSTA vector from NEW xml-input
+*
+*  \param cur_node  I  Current XML node
+*  \return             Handle to create or CTA_NULL in case of an error.
+*/
+CTA_Vector CTAI_XML_CreateVector_New(xmlNode *cur_node, CTA_Metainfo minfo) {
+   xmlNode       *values_node = NULL;     /* values child node */
+   xmlNode       *txt_node = NULL;        /* text node */
+   CTA_Vector    hnew = CTA_NULL;         /* The new COSTA handle */
+   xmlChar       *val;                    /* element/property value */
+   CTA_Datatype  datatype = CTA_DOUBLE;   /* datatype of the vector */
+   CTA_VecClass  vecclass = CTA_DEFAULT_VECTOR;
+   int           dim = 0;                 /* vector dim */
+   char          *teststr, *teststr2;
+   int           misval;
+   double        *vals;
+   CTAI_Gridm    thisgrid;
+
+
+   /* Parse this node's attributes */
+
+   /* Get datatype */
+   val = xmlGetProp(cur_node, CTAI_XML_DATATYPE);
+   if (val) {
+      datatype = CTAI_String2Type((char *) val);
+      xmlFree(val);
+      if (datatype != CTA_DOUBLE && datatype != CTA_REAL &&
+          datatype != CTA_INTEGER && datatype != CTA_STRING) return CTA_NULL;
+   } else {
+     /* default: */
+     datatype = CTA_DOUBLE ;
+   }
+
+
+   /* Get dimension */
+   dim = 0;
+   val = xmlGetProp(cur_node, CTAI_XML_DIMENSION);
+   if (val) {
+      dim = atoi((char *) val);
+      xmlFree(val);
+   }
+
+   /* First: get all modes other than the 'vector' containing the values */
+   for (values_node = cur_node->children; values_node; values_node = values_node->next) {
+     if (0 == strcmp((char *) CTAI_XML_UNIT, (char *) values_node->name)) {
+       for (txt_node = values_node->children; txt_node; txt_node = txt_node->next) {
+         if (txt_node->type == XML_TEXT_NODE) {
+           //  printf("unit: |%s| %d \n",txt_node->content,strlen(txt_node->content));
+           CTA_Metainfo_SetUnit(minfo,(char *) txt_node->content);
+         }
+       }
+     }
+     if (0 == strcmp((char *) CTAI_XML_MISSINGVALUE, (char *) values_node->name)) {
+       for (txt_node = values_node->children; txt_node; txt_node = txt_node->next) {
+         if (txt_node->type == XML_TEXT_NODE) {
+           //        printf("missingvalue: |%s| %d \n",txt_node->content,strlen(txt_node->content));
+           if (0==strcmp((char *) txt_node->content,"NaN")) {misval=-9999;}
+           else {misval=atoi((char *) txt_node->content);}
+           CTA_Metainfo_SetRest(minfo,&misval);
+         }
+       }
+     }
+     if (0 == strcmp((char *) CTAI_XML_GRID, (char *) values_node->name)) {
+       //       printf("entering ctai_xml_grid ...\n");
+       CTAI_XML_CreateGrid(values_node, &thisgrid);
+       //      printf("... done; retval; type: %d %d \n",retval,thisgrid.type);
+       CTA_Metainfo_SetGrid(minfo, &thisgrid);
+     }
+
+   }
+
+
+   /* TODO: read vector class when it can be something else than CTA_DEFAULT_VECTOR */
+
+
+   /* Get values node. NOTE: it is denoted as 'vector'*/
+   values_node = NULL;
+   for (values_node = cur_node->children; values_node; values_node = values_node->next) {
+      if (0 == strcmp((char *) CTAI_XML_VECTOR, (char *) values_node->name)) break;
+   }
+
+   if (values_node) {
+
+     for (txt_node = values_node->children; txt_node; txt_node = txt_node->next) {
+       if (txt_node->type == XML_TEXT_NODE) {
+         if (txt_node->content) {
+
+
+           teststr = CTA_Malloc((strlen((char *) txt_node->content)+1)*sizeof(char));
+           teststr2 = CTA_Malloc((strlen((char *) txt_node->content)+1)*sizeof(char));
+           strcpy(teststr,(char *) txt_node->content);
+           strcpy(teststr2,teststr);
+
+           dim = CTAI_XML_read_txt_node_size(teststr);
+
+
+           if (dim > 0) {
+             /* Create vector */
+             CTA_Vector_Create(vecclass, dim, datatype, CTA_NULL, &hnew);
+             vals = CTA_Malloc((dim)*sizeof(double));
+             dim = CTAI_XML_read_txt_node_content(teststr2,vals);
+             //  printf("vals: %f %f %f\n",vals[0],vals[1],vals[2]);
+
+             free(teststr);
+             free(teststr2);
+             CTA_Vector_SetVals(hnew, vals,dim, datatype);
+
+             return hnew;
+           }
+
+         }
+       }
+     }
+   }
+
+   return CTA_NULL;
+}
+
+
+/* ---------------------- */
+int CTAI_XML_read_txt_node_size(char *txtstr){
+  int i;
+  // char *saveptr;
+  char *txtpart;
+
+  //for (i=0; txtpart != NULL; i++)
+  for (i=0; ; i++,txtstr=NULL)
+    {
+      txtpart = strtok(txtstr," ");
+      if (txtpart == NULL) break;
+    }
+  return i ;
+}
+
+int CTAI_XML_read_txt_node_content(char *txtstr, double *vals){
+  int i;
+  //  char *saveptr;
+  char *txtpart;
+
+  //for (i=0; txtpart != NULL; i++)
+  for (i=0; ; i++,txtstr=NULL)
+    {
+      txtpart = strtok(txtstr," ");
+      //      txtpart = strtok_r(txtstr," ",&saveptr);
+      if (txtpart == NULL) break;
+      //      printf(" %d: %s -> %f\n",i,txtpart, atof(txtpart));
+      vals[i]=atof(txtpart);
+    }
+  return i ;
+}
+
+
+long CTAI_Vector_GetMemsize(){
+   return CTAI_Vector_Memsize;
+}
+
+
+
+
+/* ------------------------------------------------- */
+
+/* Interfacing with Fortran */
+CTAEXPORT void CTA_VECTOR_DEFINECLASS_F77(char *name,int *h_func,int *hveccl,
+                             int *ierr, int len_name){
+  char  *c_name;
+  // create a c-string equivalent to name
+  c_name=CTA_Malloc((len_name+1)*sizeof(char));
+  CTA_fstr2cstr(name,c_name,len_name);
+
+  *ierr=CTA_Vector_DefineClass(c_name, (CTA_Func*) h_func,
+                               (CTA_VecClass*) hveccl);
+  free(c_name);
+};
+
+CTAEXPORT void CTA_VECTOR_DUPLICATE_F77(int *hvector1, int *hvector2, int *ierr){
+   *ierr=CTA_Vector_Duplicate(*hvector1, hvector2);
+};
+CTAEXPORT void CTA_VECTOR_CREATE_F77(int *hveccl, const int *n, int *datatype,
+                         int *usrdata, int *hvector, int *ierr){
+
+ *ierr=CTA_Vector_Create((CTA_VecClass) *hveccl, *n, (CTA_Datatype) *datatype,
+                      (CTA_Handle) *usrdata, (CTA_Vector*) hvector);
+
+};
+
+CTAEXPORT void CTA_VECTOR_GETSIZE_F77 (int *hvec, int *n, int *ierr){
+
+   *ierr= CTA_Vector_GetSize(*hvec, n);
+};
+
+CTAEXPORT void CTA_VECTOR_GETVALS_F77(int *hvec, void *vals, int  *n, int *datatype,
+                       int *ierr){
+
+   *ierr=CTA_Vector_GetVals(*hvec, vals, *n, *datatype);
+};
+
+CTAEXPORT void CTA_VECTOR_GETVAL_F77(int *hvec, int *i, void *val, int *datatype,
+                         int *ierr){
+
+   *ierr=CTA_Vector_GetVal(*hvec, *i, val, *datatype);
+};
+
+CTAEXPORT void CTA_VECTOR_SETVALS_F77(int *hvec, void *vals, int  *n, int *datatype,
+                        int *ierr){
+
+   *ierr=CTA_Vector_SetVals(*hvec, vals, *n, *datatype);
+};
+
+
+CTAEXPORT void CTA_VECTOR_SETVAL_F77(int *hvec, int  *i, void *val, int *datatype,
+                       int *ierr){
+
+   *ierr =CTA_Vector_SetVal((CTA_Vector) *hvec, *i, val,
+                            (CTA_Datatype) *datatype);
+}
+
+CTAEXPORT void CTA_VECTOR_SETCONSTANT_F77(int *hvec, void *val, int *datatype,
+                        int *ierr){
+
+   *ierr=CTA_Vector_SetConstant(*hvec, val, *datatype);
+};
+
+CTAEXPORT void CTA_VECTOR_SCAL_F77(int *hvec, double *alpha, int *ierr){
+
+   *ierr=CTA_Vector_Scal((CTA_Vector) *hvec,*alpha);
+};
+
+CTAEXPORT void CTA_VECTOR_COPY_F77( int *hvec_x, int *hvec_y, int *ierr){
+   *ierr=CTA_Vector_Copy( (CTA_Vector) *hvec_x, (CTA_Vector) *hvec_y);
+};
+
+CTAEXPORT void CTA_VECTOR_AXPY_F77( int *hvec_y, double *alpha, int *hvec_x, int *ierr){
+   *ierr=CTA_Vector_Axpy( (CTA_Vector) *hvec_y, *alpha, (CTA_Vector) *hvec_x);
+};
+
+CTAEXPORT void CTA_VECTOR_DOT_F77( int *hvec_x, int *hvec_y, double *dotprod, int *ierr){
+   *ierr=CTA_Vector_Dot( (CTA_Vector) *hvec_x, (CTA_Vector) *hvec_y,
+                         dotprod);
+};
+
+CTAEXPORT void CTA_VECTOR_NRM2_F77( int *hvec_x,double *norm2, int *ierr){
+   *ierr=CTA_Vector_Nrm2( (CTA_Vector) *hvec_x, norm2);
+};
+
+CTAEXPORT void CTA_VECTOR_AMAX_F77( int *hvec_x, int *iloc, int *ierr){
+   *ierr=CTA_Vector_Amax( (CTA_Vector) *hvec_x, iloc);
+};
+
+CTAEXPORT void CTA_VECTOR_GETMAXLEN_F77( int *hvec_x, int *maxlen, int *ierr){
+   *ierr=CTA_Vector_GetMaxLen( (CTA_Vector) *hvec_x, maxlen);
+};
+
+CTAEXPORT void CTA_VECTOR_EXPORT_F77(int *hvec_x, int *usrdata, int *ierr){
+   *ierr=CTA_Vector_Export((CTA_Vector) *hvec_x, (CTA_Handle) *usrdata);
+};
+
+CTAEXPORT void CTA_VECTOR_IMPORT_F77(int *hvec_x, int *usrdata, int *ierr){
+   *ierr=CTA_Vector_Import((CTA_Vector) *hvec_x, (CTA_Handle) *usrdata);
+};
+
+
+
+CTAEXPORT void CTA_VECTOR_PRINT_TABLE_F77(int *table, int *ncolumns, int *vformats, int *ierr){
+   *ierr=CTA_Vector_Print_Table((CTA_Vector *) table, *ncolumns, (CTA_Vector) *vformats);
+};
+
+CTAEXPORT void CTA_VECTOR_FREE_F77(int *hvec_x, int *ierr){
+   *ierr=CTA_Vector_Free((CTA_Vector*) hvec_x);
+};
+
+CTAEXPORT void CTA_VECTOR_ELMDIV_F77( int *hvec_y, int *hvec_x, int *ierr){
+   *ierr=CTA_Vector_ElmDiv( (CTA_Vector) *hvec_y, (CTA_Vector) *hvec_x);
+};
+
+CTAEXPORT void CTA_VECTOR_ELMPROD_F77( int *hvec_y, int *hvec_x, int *ierr){
+   *ierr=CTA_Vector_ElmProd( (CTA_Vector) *hvec_y, (CTA_Vector) *hvec_x);
+};
+
+
+
diff --git a/costa/native/cta/src/cta_vector_blas.c b/costa/native/cta/src/cta_vector_blas.c
new file mode 100644
index 000000000..03132d919
--- /dev/null
+++ b/costa/native/cta/src/cta_vector_blas.c
@@ -0,0 +1,1357 @@
+/*
+$URL: https://repos.deltares.nl/repos/openda/public/trunk/core/native/src/cta/cta_vector_blas.c $
+$Revision: 4445 $, $Date: 2014-06-03 08:56:12 +0200 (Tue, 03 Jun 2014) $
+
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+#include <stdio.h>
+#include <math.h>
+#include <memory.h>
+#include <string.h>
+#include "cta_mem.h"
+#include "cta_flush.h"
+#include "cta_file.h"
+#include "cta_errors.h"
+#include "cta_string.h"
+#include "cta_vector_blas.h"
+#include "ctai.h"
+#include "ctai_handles.h"
+#include "cta_message.h"
+
+/* Maximum length of a path index */
+#define MAXIXLEN (31)
+
+#define IDEBUG (0)
+#define CLASSNAME "CTA_Vector_blas" 
+
+/*
+This file contains an implementation of the CTA_VECTOR_NUMFUNC operations
+which a COSTA-vector should be able to perform. These implementations
+have been created using BLAS.
+
+ The function CTA_Vector_blas_initialise creates a CTA_VecClass, which 
+ uses these implementations.
+
+  The user may create different vector-classes, which use a different set of
+  implementations. To do so, a function must be written which s similar to
+  CTA_Vector_blas_initialise, and implementations must be supplied for 
+  those functions which are differently implemented than the BLAS-ones.
+
+   CTA_Vector_Create_Size          return memory-size required for creating a vector
+   CTA_Vector_blas_initialise      create a vector class: set function pointers to all
+   member variables.
+
+*/
+void CTA_Vector_blas_initialise(CTA_VecClass *hveccl)
+{
+   CTA_Intf hintf=0;
+   CTA_Func h_func[I_CTA_VECTOR_NUMFUNC];
+
+   // The vector h_func is filled with COSTA-function handles of the implementations in this
+   // file.
+   CTA_Func_Create(" ",&CTAI_Vector_Create_Size, hintf,&h_func[I_CTA_VECTOR_CREATE_SIZE]);
+   CTA_Func_Create(" ",&CTAI_Vector_Create_Init, hintf,&h_func[I_CTA_VECTOR_CREATE_INIT]);
+   CTA_Func_Create(" ",&CTAI_Vector_getvals,    hintf,&h_func[I_CTA_VECTOR_GETVALS]    );
+   CTA_Func_Create(" ",&CTAI_Vector_getval,     hintf,&h_func[I_CTA_VECTOR_GETVAL]     );
+   CTA_Func_Create(" ",&CTAI_Vector_setvals,    hintf,&h_func[I_CTA_VECTOR_SETVALS]    );
+   CTA_Func_Create(" ",&CTAI_Vector_setval,     hintf,&h_func[I_CTA_VECTOR_SETVAL]     );
+   CTA_Func_Create(" ",&CTAI_Vector_setconst,   hintf,&h_func[I_CTA_VECTOR_SETCONST]   );
+   CTA_Func_Create(" ",&CTAI_Vector_scal,       hintf,&h_func[I_CTA_VECTOR_SCAL]       );
+   CTA_Func_Create(" ",&CTAI_Vector_copy,       hintf,&h_func[I_CTA_VECTOR_COPY]       );
+   CTA_Func_Create(" ",&CTAI_Vector_axpy,       hintf,&h_func[I_CTA_VECTOR_AXPY]       );
+   CTA_Func_Create(" ",&CTAI_Vector_dot,        hintf,&h_func[I_CTA_VECTOR_DOT]        );
+   CTA_Func_Create(" ",&CTAI_Vector_nrm2,       hintf,&h_func[I_CTA_VECTOR_NRM2]       );
+   CTA_Func_Create(" ",&CTAI_Vector_amax,       hintf,&h_func[I_CTA_VECTOR_AMAX]       );
+   CTA_Func_Create(" ",&CTAI_Vector_GetMaxLen,  hintf,&h_func[I_CTA_VECTOR_GETMAXLEN]  );
+   CTA_Func_Create(" ",&CTAI_Vector_export,     hintf,&h_func[I_CTA_VECTOR_EXPORT]     );
+   CTA_Func_Create(" ",&CTAI_Vector_import,     hintf,&h_func[I_CTA_VECTOR_IMPORT]     );
+   CTA_Func_Create(" ",&CTAI_Vector_Print_Table,hintf,&h_func[I_CTA_VECTOR_PRINT_TABLE]);
+   CTA_Func_Create(" ",&CTAI_Vector_free,       hintf,&h_func[I_CTA_VECTOR_FREE]       );
+   CTA_Func_Create(" ",&CTAI_Vector_appendval,  hintf,&h_func[I_CTA_VECTOR_APPENDVAL]  );
+   CTA_Func_Create(" ",&CTAI_Vector_elmdiv,     hintf,&h_func[I_CTA_VECTOR_ELMDIV]     );
+   CTA_Func_Create(" ",&CTAI_Vector_elmprod,    hintf,&h_func[I_CTA_VECTOR_ELMPROD]    );
+   CTA_Func_Create(" ",&CTAI_Vector_elmsqrt,    hintf,&h_func[I_CTA_VECTOR_ELMSQRT]    );
+
+   CTA_Vector_DefineClass("cta_vector_blas",h_func,hveccl);
+}
+
+void CTAI_Vector_Create_Size(
+                             // INPUTS (all unused):
+                             int *n,                 //
+                             CTA_Datatype *datatype, //
+                             CTA_Handle *usrdata,   //
+                             // OUTPUTS:
+                             int *retval,            // error code (see cta_datatypes.h for possible error codes)
+                             int *memsize            // The number of bytes which are necessary to store one 
+                             //     CTAI_Vector_blas, with a pointer to the contents (data), but without the
+                             //     contents themselves.
+                             ){
+
+   *memsize=(int) sizeof(CTAI_Vector_blas);
+   *retval=CTA_OK;
+};
+
+
+void CTAI_Vector_Create_Init(
+/*
+Allocate the memory which is neccesary to store the vector values
+*/
+// IN-OUTPUTS
+CTAI_Vector_blas *x,    // The COSTA-BLAS-vector for which memory is reserved.
+// INPUTS:
+int *n,                 // dimension of the vector
+CTA_Datatype *datatype, // data type
+CTA_Handle *usrdata,    // ???
+// OUTPUTS
+int *retval)            // Error code. Possible error: Illegal data type
+{
+   *retval = CTA_OK;
+   x->n=*n;
+   x->size=*n;
+   x->values = NULL;
+
+   // allocate an array
+   if (*datatype==CTA_REAL){
+      if (n>0){     
+         x->values=CTA_Malloc(*n * sizeof(float));
+      }
+   }
+   else if (*datatype==CTA_DOUBLE){
+      if (n>0) {     
+         x->values=CTA_Malloc(*n * sizeof(double));
+      }
+   }
+   else if (*datatype==CTA_INTEGER || *datatype==CTA_HANDLE){
+      if (n>0){
+         x->values=CTA_Malloc(*n * sizeof(int));
+      }
+   }
+   else if (*datatype==CTA_STRING){
+      // For strings: allocate the CTA_String array AND create the strings
+      if (n>0){
+         int i;
+         CTA_String *StrVal;
+      
+         x->values=CTA_Malloc(*n * sizeof(CTA_String));
+         StrVal = (CTA_String*) x->values;
+         for (i=0; i<*n; i++)
+         {
+            *retval = CTA_String_Create(&(StrVal[i]));
+            if (*retval != CTA_OK) return;
+         }
+      }
+   }
+   else {
+      x->datatype = CTA_NULL;
+      x->n=0;
+      x->size=0;
+      
+      *retval=CTA_ILLEGAL_DATATYPE; 
+      return;
+   }
+
+   // set datatype
+   x->datatype=*datatype;
+};
+
+/* Copy single real array to double real array */
+static void CTAI_Single2Double(float *rx, double *dx, int n){
+   int i;
+   for (i=0;i<n;i++){
+      dx[i]=(double) rx[i];
+   }
+}
+
+/* Copy double real array to single real array */
+static void CTAI_Double2Single(double *dx, float *rx, int n){
+   int i;
+   for (i=0;i<n;i++){
+      rx[i]=(float) dx[i];
+   }
+}
+
+static void CTAI_Double2Double(double *dx, double *rx, int n){
+   int i;
+   for (i=0;i<n;i++){
+      rx[i]= dx[i];
+   }
+}
+
+void CTAI_Vector_getval(
+                        // Get one value from a COSTA-BLAS-vector
+                        // INPUTS
+                        CTAI_Vector_blas *x,   // The COSTA-BLAS-vector of which the values is
+                        //     returned 
+                        int *i,                // Index of the value to be returned in variable val 
+                        // OUTPUTS 
+                        void *val,             // The i-th value of the vector *x
+                        // INPUTS
+                        CTA_Datatype *datatype,// Data-type of variable *val. This must be the
+                        //      same datatype as that of x
+                        int *retval            // Error code. Possible errors: 
+                        //  * negative number of values asked OR no values asked OR 
+                        //    more values asked than available
+                        //  * data types of *val and *x differ
+                        ){
+   /* Local variables */
+   float *fptr;
+   double *dptr;
+   int *iptr;
+   
+   /* check dimensions */
+   if (*i<1 || *i>x->n){
+      *retval=CTA_DIMENSION_ERROR;
+      return;
+   }
+   //printf("CTAI_Vector_getval: returning the %d-th value\n",*i);
+
+   /* check data types and handle conversion */
+   if (*datatype!=x->datatype){
+      if (x->datatype==CTA_DOUBLE && *datatype==CTA_REAL) {
+         double *fptr= (double *)x->values;
+         CTAI_Double2Single(fptr+(*i-1), val, 1);
+      } else if (x->datatype==CTA_REAL && *datatype==CTA_DOUBLE){
+         float *fptr= (float *)x->values;
+         CTAI_Single2Double(fptr+(*i-1),val, 1);
+      } else {
+         *retval=CTA_INCOMPATIBLE_VECTORS;
+         return;
+      }
+   }
+   else if (*datatype==CTA_REAL){
+      /* copy REAL-values using memcpy */
+      fptr=x->values;
+      memcpy(val,fptr+(*i-1),sizeof(float));
+   }
+   else if (*datatype==CTA_INTEGER || *datatype==CTA_HANDLE){
+      /* copy INTEGER-values using memcpy */
+      iptr=x->values;
+      memcpy(val,iptr+(*i-1),sizeof(int));
+   }
+   else if (*datatype==CTA_DOUBLE){
+      /* copy DOUBLE-values using memcpy */
+      dptr=x->values;
+      memcpy(val,dptr+(*i-1),sizeof(double));
+   }   
+   else if (*datatype==CTA_STRING){
+      /* copy CTA_STRING-values using COSTA-functions */
+      char *str;
+      int len;
+      // Cast the vector data to string-handles
+      CTA_String *StrPtr = (CTA_String*) x->values;
+      
+      // Get the string length
+      *retval = CTA_String_GetLength(StrPtr[*i-1],&len);
+      if (*retval!=CTA_OK) return;
+      
+      // Get the string contents from the COSTA-string
+      str=CTA_Malloc((len+1)*sizeof(char));
+      *retval = CTA_String_Get(StrPtr[*i-1],str);
+      if (*retval!=CTA_OK) return;
+      
+      // Set the string contents into output COSTA-string
+      StrPtr = (CTA_String*) val;
+      *retval = CTA_String_Set( *StrPtr ,str);
+      if (*retval!=CTA_OK) return;
+      free(str);
+   }
+   else {
+      *retval=CTA_ILLEGAL_DATATYPE; return;
+   }
+   *retval=CTA_OK;
+};
+
+
+void CTAI_Vector_getvals(
+                         // Get all the values from a COSTA-BLAS-vector
+                         CTAI_Vector_blas *x,
+                         void *vals,
+                         int *n,
+                         CTA_Datatype *datatype,
+                         int *retval
+                         ){
+   /* Local variables */
+   int one=1; 
+   
+
+   /* check dimensions */
+   if (*n!=x->n){
+      *retval=CTA_DIMENSION_ERROR;
+      return;
+   }
+   
+   /* check data types and handle conversion */
+   if (*datatype!=x->datatype){
+      if (x->datatype==CTA_DOUBLE && *datatype==CTA_REAL) {
+         CTAI_Double2Single(x->values, vals, *n);
+      } else if (x->datatype==CTA_REAL && *datatype==CTA_DOUBLE){
+         CTAI_Single2Double(x->values, vals, *n);
+      } else {
+         *retval=CTA_INCOMPATIBLE_VECTORS;
+         return;
+      }
+   }
+   /* copy values using BLAS copy */
+   else if (*datatype==CTA_INTEGER || *datatype==CTA_HANDLE){
+      SCOPY_F77(n,x->values,&one,vals,&one);
+   }
+   else if (*datatype==CTA_REAL){
+      SCOPY_F77(n,x->values,&one,vals,&one);
+   }
+   else if (*datatype==CTA_DOUBLE){
+      DCOPY_F77(n,x->values,&one,vals,&one);
+   }   
+   else if (*datatype==CTA_STRING){
+      /* copy values using GetVal*/
+      
+      // Cast the vector data to string-handles
+      CTA_String *StrPtr = (CTA_String*) vals;
+      
+      int i; 
+      for (i=1; i<=*n; i++)
+      {
+         CTAI_Vector_getval( x, &i, &(StrPtr[i-1]), datatype, retval);
+         if (*retval!=CTA_OK) return;
+      }
+   }
+   else {
+      *retval=CTA_ILLEGAL_DATATYPE; return;
+   }
+   *retval=CTA_OK;
+};
+
+
+
+void CTAI_Vector_setval(
+                        // Set one value of a COSTA-BLAS-vector
+                        CTAI_Vector_blas *x,
+                        int *i,
+                        void *val,
+                        CTA_Datatype *datatype,
+                        int *retval
+                        ){
+   /* Local variables */
+
+   /* check dimensions */
+   if (*i<1 || *i>x->n){
+      *retval=CTA_DIMENSION_ERROR;
+      return;
+   }
+
+   /* check data types and handle conversion */
+   if (*datatype!=x->datatype){
+      if (*datatype==CTA_REAL && x->datatype==CTA_DOUBLE ) {
+         double *fptr= (double *)x->values;
+         CTAI_Single2Double(val, fptr+(*i-1), 1);
+      } else if (*datatype==CTA_DOUBLE && x->datatype==CTA_REAL){
+         float *fptr= (float *)x->values;
+         CTAI_Double2Single(val, fptr+(*i-1), 1);
+      } else {
+         *retval=CTA_INCOMPATIBLE_VECTORS;
+         return;
+      }
+   /* copy values using memcpy */
+   } else if (*datatype==CTA_REAL){
+      float *fptr= (float *)x->values;
+      memcpy(fptr+(*i-1),val,sizeof(float));
+   }
+   else if (*datatype==CTA_INTEGER || *datatype==CTA_HANDLE){
+      int * iptr= (int *)x->values;
+      memcpy(iptr+(*i-1),val,sizeof(int));
+   }
+   else if (*datatype==CTA_DOUBLE){
+      double *dptr=(double *)x->values;
+      memcpy(dptr+(*i-1),val,sizeof(double));
+   }
+   else if (*datatype==CTA_STRING){
+      int len;
+      char *str;
+      CTA_String str_in;
+      CTA_String *StrPtr;
+
+      /* copy CTA_STRING-values using COSTA-functions */
+      // Cast the vector data to string-handles
+      str_in = *((CTA_String*)val);
+
+      // Get the string length
+      *retval = CTA_String_GetLength(str_in,&len);
+      if (*retval!=CTA_OK) return;
+
+      // Get the string contents from the COSTA-string
+      str=CTA_Malloc((len+1)*sizeof(char));
+      *retval = CTA_String_Get(str_in,str);
+      if (*retval!=CTA_OK) return;
+
+      // Set the string contents into output COSTA-string
+      StrPtr = (CTA_String*) x->values;
+      *retval = CTA_String_Set( StrPtr[*i-1] ,str);
+      if (*retval!=CTA_OK) return;
+      free(str);
+   }
+   else {
+      *retval=CTA_ILLEGAL_DATATYPE; return;
+   }
+   *retval=CTA_OK;
+};
+
+void CTAI_Vector_setvals(
+                         // Set all values of a COSTA-BLAS-vector
+                         CTAI_Vector_blas *x,
+                         void *vals,
+                         int *n,
+                         CTA_Datatype *datatype,
+                         int *retval
+                         ){
+   /* Local variables */
+   int one=1; 
+
+   /* check dimensions */
+   if (*n!=x->n){
+      *retval=CTA_DIMENSION_ERROR;
+      return;
+   }
+
+   /* check data types and handle conversion */
+  if (*datatype!=x->datatype){
+      if (*datatype==CTA_REAL && x->datatype==CTA_DOUBLE ) {
+         CTAI_Single2Double(vals, x->values, *n);
+      } else if (*datatype==CTA_DOUBLE && x->datatype==CTA_REAL){
+         CTAI_Double2Single(vals, x->values, *n);
+      } else {
+         *retval=CTA_INCOMPATIBLE_VECTORS;
+         return;
+      }
+
+   /* copy values using BLAS copy or memcpy*/
+  } else if (*datatype==CTA_INTEGER || *datatype==CTA_HANDLE){
+      SCOPY_F77(n,vals,&one,x->values,&one);
+   }
+   else if (*datatype==CTA_REAL){
+      SCOPY_F77(n,vals,&one,x->values,&one);
+   }
+   else if (*datatype==CTA_DOUBLE){
+      DCOPY_F77(n,vals,&one,x->values,&one);
+   } 
+   else if (*datatype==CTA_STRING){
+      /* copy CTA_STRING-values using COSTA-functions */
+      // Cast the vector data to string-handles
+      CTA_String *StrPtr = (CTA_String*) vals;
+
+      int i; 
+      for (i=1; i<=*n; i++)
+      {
+         CTAI_Vector_setval( x, &i, &(StrPtr[i-1]), datatype, retval);
+         if (*retval!=CTA_OK) return;
+      }
+
+   }
+   else {
+      *retval=CTA_ILLEGAL_DATATYPE; return;
+   }
+   *retval=CTA_OK;
+};
+
+
+#undef METHOD
+#define METHOD "CTAI_Vector_setconst"
+void CTAI_Vector_setconst(
+                          // Set all values of a COSTA-BLAS-vector to the same value
+                          CTAI_Vector_blas *x,
+                          void *val,
+                          CTA_Datatype *datatype,
+                          int *retval
+                          ){
+   /* Local variables */
+   float sval;
+   double dval;
+
+   /* check data types */
+   if (*datatype==CTA_REAL) {
+      memcpy(&sval,val,sizeof(float));
+   } else if (*datatype==CTA_DOUBLE) {
+      memcpy(&dval,val,sizeof(double));
+   }
+
+
+   if (x->datatype==CTA_REAL && *datatype==CTA_DOUBLE) {
+      sval=(float) dval;
+   } else if (x->datatype==CTA_DOUBLE && *datatype==CTA_REAL) {
+      dval=(double) sval;
+   } else if (*datatype!=x->datatype){
+      CTA_WRITE_ERROR("DataType of vector and constant value is not compatible");
+      *retval=CTA_INCOMPATIBLE_VECTORS;
+      return ;
+   }
+
+   /* copy values using BLAS copy */
+   if (x->datatype==CTA_REAL){
+      float *svals = (float *) x->values;
+      int i;
+      for (i=0;i<x->n;i++){ svals[i]=sval; }
+   }
+   else if (x->datatype==CTA_INTEGER || x->datatype==CTA_HANDLE){
+      int *ival  = (int *) val;
+      int *ivals = (int *) x->values;
+      int i;
+      for (i=0;i<x->n;i++){ ivals[i]=*ival; }
+   }
+   else if (x->datatype==CTA_DOUBLE){
+      double * dvals= (double *) x->values;
+      int i;
+      for (i=0;i<x->n;i++){ dvals[i]=dval; }
+   }
+   else if (x->datatype==CTA_STRING){
+      /* copy CTA_STRING-values using COSTA-functions */
+      char *str;
+      // Cast the vector data to string-handles
+      CTA_String *StrPtr = (CTA_String*) val;
+
+      // Get the string length
+      int len;
+      int i;
+      *retval = CTA_String_GetLength(*StrPtr,&len);
+      if (*retval!=CTA_OK) return;
+
+      // Get the string contents from the COSTA-string
+      str=CTA_Malloc((len+1)*sizeof(char));
+      *retval = CTA_String_Get(*StrPtr,str);
+      if (*retval!=CTA_OK) return;
+
+      StrPtr = (CTA_String*) x->values;
+      for (i=0;i<x->n;i++)
+      {
+         // Set the string contents into output COSTA-string
+         *retval = CTA_String_Set( StrPtr[i] ,str);
+         if (*retval!=CTA_OK) return;
+      }
+      free(str);
+   }
+   else {
+      CTA_WRITE_ERROR("DataType is not supported");
+      *retval=CTA_ILLEGAL_DATATYPE; return;
+   }
+   *retval=CTA_OK;
+};
+
+
+
+
+void CTAI_Vector_scal(
+                      // Multiply a COSTA-BLAS-vector with a scalar
+                      CTAI_Vector_blas *x,
+                      double *alpha,
+                      int *retval
+                      ){
+   /* Local variables */
+   int one=1; 
+   float salpha;
+
+   if (x->datatype==CTA_REAL){
+      salpha=(float) *alpha;
+      SSCAL_F77(&x->n,&salpha, x->values,&one);
+   }
+   else if (x->datatype==CTA_INTEGER){
+      int i;
+      int *ival = (int *)x->values;
+      for (i=0; i<x->n; i++) 
+      {
+         ival[i] = (int) (*alpha * ival[i]);
+      }
+   }
+   else if (x->datatype==CTA_DOUBLE){
+      DSCAL_F77(&x->n,alpha, x->values,&one);
+   }
+   else {
+      *retval=CTA_ILLEGAL_DATATYPE; return;
+   }
+   *retval=CTA_OK;
+};
+
+
+void CTAI_Vector_copy(
+                      // Copy the values of a COSTA-BLAS-vector to another existing one.
+                      CTAI_Vector_blas *x, 
+                      CTAI_Vector_blas *y, 
+                      int *retval
+                      ){
+   /* Local variables */
+   int one=1; 
+
+   /* check dimensions */
+   if (y->n!=x->n){
+      *retval=CTA_DIMENSION_ERROR;
+      return;
+   }
+
+   /* check data types and handle conversion */
+   if (x->datatype!=y->datatype){
+      if (x->datatype==CTA_DOUBLE && y->datatype==CTA_REAL) {
+         CTAI_Double2Single(x->values, y->values, x->n);
+
+      } else if (x->datatype==CTA_REAL && y->datatype==CTA_DOUBLE){
+         CTAI_Single2Double(x->values, y->values, x->n);
+      } else {
+         *retval=CTA_INCOMPATIBLE_VECTORS;
+         return;
+      }
+   } else if (x->datatype==CTA_REAL){
+      SCOPY_F77(&x->n,x->values,&one,y->values,&one);
+   }
+   else if (x->datatype==CTA_INTEGER || x->datatype==CTA_HANDLE ){
+      SCOPY_F77(&x->n,x->values,&one,y->values,&one);
+   }
+   else if (x->datatype==CTA_DOUBLE){
+      DCOPY_F77(&x->n,x->values,&one,y->values,&one);
+   }
+   else if (x->datatype==CTA_STRING){
+      int i;
+      CTA_String *yval = (CTA_String *)y->values;
+      for (i=1; i<=x->n; i++)
+      {
+         CTAI_Vector_getval( x, &i, &(yval[i-1]), &y->datatype, retval);
+         if (*retval!=CTA_OK) return;
+      }
+   }
+   else {
+      *retval=CTA_ILLEGAL_DATATYPE; return;
+   }
+
+   *retval=CTA_OK;
+};
+
+void CTAI_Vector_axpy(
+                      CTAI_Vector_blas *y, 
+                      double *alpha,
+                      CTAI_Vector_blas *x, 
+                      int *retval
+                      ){
+   /* Local variables */
+   int one=1; 
+   float salpha;
+
+   /* check dimensions */
+   if (y->n!=x->n){
+      *retval=CTA_DIMENSION_ERROR;
+      return;
+   }
+
+   /* check data types */
+   if (y->datatype!=x->datatype){
+      printf("datatype y=%d datatype y=%d", y->datatype,x->datatype);
+      *retval=CTA_INCOMPATIBLE_VECTORS;
+      return;
+   }
+
+   if (x->datatype==CTA_REAL){
+      salpha=(float) *alpha;
+      SAXPY_F77(&x->n,&salpha,x->values,&one,y->values,&one);
+   }
+   else if (x->datatype==CTA_INTEGER){
+      int i;
+      int * xval = (int *) x->values;
+      int * yval = (int *) y->values;
+      for (i=0; i<x->n; i++) { yval[i] = (int) (yval[i] + *alpha * xval[i]);}
+   }
+   else if (x->datatype == CTA_DOUBLE) {
+      DAXPY_F77(&x->n,alpha,x->values,&one,y->values,&one);
+   }
+   else {
+      *retval=CTA_ILLEGAL_DATATYPE; return;
+   }
+   
+   *retval=CTA_OK;
+};
+
+void CTAI_Vector_dot(
+                     // Calculate the dot-product of two COSTA-BLAS-vectors.
+                     CTAI_Vector_blas *x, 
+                     CTAI_Vector_blas *y, 
+                     double *dotprod,
+                     int *retval
+                     ){
+   /* Local variables */
+   int one=1; 
+
+   /* check dimensions */
+   if (y->n!=x->n){
+      *retval=CTA_DIMENSION_ERROR;
+      return;
+   }
+
+   /* check data types */
+   if (y->datatype!=x->datatype){
+      *retval=CTA_INCOMPATIBLE_VECTORS;
+      return;
+   }
+
+   if (x->datatype==CTA_REAL){
+      *dotprod= (double) SDOT_F77(&x->n,x->values,&one,y->values,&one);
+   }
+   else if (x->datatype==CTA_INTEGER){
+      int i;
+      int * xval = (int *) x->values;
+      int * yval = (int *) y->values;
+      *dotprod=0.0;
+      for (i=0; i<x->n; i++) { *dotprod += xval[i]*yval[i];}
+   }
+   else if (x->datatype==CTA_DOUBLE){
+      *dotprod=DDOT_F77(&x->n,x->values,&one,y->values,&one);
+   }
+   else {
+      *retval=CTA_ILLEGAL_DATATYPE; return;
+   }
+
+   *retval=CTA_OK;
+};
+
+void CTAI_Vector_nrm2(
+                      CTAI_Vector_blas *x,
+                      double *norm2,
+                      int *retval
+                      ){
+   
+   /* Local variables */
+   int one=1; 
+   
+   if (x->datatype==CTA_REAL){
+      *norm2= (double) SNRM2_F77(&x->n,x->values,&one);
+   }
+   else if (x->datatype==CTA_INTEGER){
+      int i;
+      int * xval = (int *) x->values;
+      *norm2=0.0;
+      for (i=0; i<x->n; i++) { *norm2 += xval[i]*xval[i];}
+      *norm2 = sqrt(*norm2);
+   }
+   else if (x->datatype==CTA_DOUBLE){
+      *norm2=DNRM2_F77(&x->n,x->values,&one);
+   }
+   else {
+      *retval=CTA_ILLEGAL_DATATYPE; return;
+   }
+   *retval=CTA_OK;
+};
+
+void CTAI_Vector_amax(
+                      CTAI_Vector_blas *x,
+                      int *iloc,
+                      int *retval
+                      ){
+
+   /* Local variables */
+   int one=1; 
+
+						  
+   /* quick return in case of 0-length vector*/
+   if (x->n==0){
+      *iloc=0;
+      *retval=CTA_OK;
+   }
+
+   if (x->datatype==CTA_REAL){
+      *iloc=ISAMAX_F77(&x->n,x->values,&one);
+   }
+   else if (x->datatype==CTA_INTEGER){
+      int i;
+      int * xval = (int *) x->values;
+
+      // Find the location in C-notation (counting from zero)
+      *iloc=0;
+      for (i=1; i<x->n; i++) 
+      { 
+         if (abs(xval[i])>abs(xval[*iloc])) {*iloc = i;}
+      }
+
+      // Increase the index to get a vector-index (counting from one)
+      (*iloc)++;
+   }
+   else if (x->datatype==CTA_DOUBLE){
+      *iloc=IDAMAX_F77(&x->n,x->values,&one);
+   }
+   else {
+      *retval=CTA_ILLEGAL_DATATYPE; return;
+   }
+
+   *retval=CTA_OK;
+};
+
+void CTAI_Vector_GetMaxLen(
+                           CTAI_Vector_blas *x,
+                           int *maxlen,
+                           int *retval
+                           )
+{
+   int i;
+   int length;
+   CTA_String * values = (CTA_String*) x->values;
+   if (x->datatype!=CTA_STRING)
+   {
+      *retval = CTA_ILLEGAL_DATATYPE; return;
+   }
+
+   *maxlen = 0;
+   for (i=0; i<x->n; i++) 
+   {
+      *retval = CTA_String_GetLength(values[i], &length);
+      if (*retval != CTA_OK) return;
+
+      if (length>*maxlen) {*maxlen=length;}
+   }
+
+   *retval=CTA_OK;
+};
+
+void CTAI_Vector_export(
+                        CTAI_Vector_blas *x,
+                        CTA_Handle *usrdata,
+                        int *retval
+                        )
+{
+   /* Local variables */
+   int i;
+   FILE *file;
+   int size;
+
+   /* set pointers for easy printing */
+   float      *xreal   = (float *)      x->values;
+   double     *xdouble = (double *)     x->values;
+   int        *xint    = (int *)        x->values;
+   CTA_String *xstr    = (CTA_String *) x->values;
+
+   if (IDEBUG>0) printf("Vector_blas: Start of export\n");
+   if (IDEBUG>0) printf("Vector_blas: size of n %d\n",x->n);
+
+   CTA_Message_Quiet(CTA_TRUE);
+   if (CTA_Handle_Check(*usrdata,CTA_FILE)==CTA_OK) {
+      CTA_Message_Quiet(CTA_FALSE);
+      /* We have to export to a file */
+      *retval=CTA_File_Get(*usrdata,&file);
+      if (*retval != CTA_OK) return;
+
+      if (CTA_FLUSH_ALWAYS) CTA_Flush();
+
+      if (x->datatype == CTA_STRING) {
+         fprintf(file,"{");
+      } else {
+         fprintf(file,"[");
+      }
+      /* Print the values */
+      for (i=0;i<x->n;i++){
+         if (x->datatype==CTA_REAL){
+            fprintf(file,"%10.5g",xreal[i]);
+         }
+         else if (x->datatype==CTA_DOUBLE){
+            fprintf(file,"%18.8le  ",xdouble[i]);
+         } 
+         else if (x->datatype == CTA_INTEGER){
+            fprintf(file,"%d  ",xint[i]);
+         }
+         else if (x->datatype == CTA_STRING){
+            int len;
+            char *str;
+            *retval = CTA_String_GetLength(xstr[i],&len);
+            if (*retval != CTA_OK) return;
+
+            str=CTA_Malloc((len+1)*sizeof(char));
+            *retval = CTA_String_Get(xstr[i],str);
+            if (*retval != CTA_OK) return;
+
+            fprintf(file,"'%s'\n",str);
+            free(str);
+         }
+         if (i+1<x->n) {
+            fprintf(file,"\n");
+         }
+      }
+      if (x->datatype == CTA_STRING) {
+         fprintf(file,"};\n");
+      } else {
+         fprintf(file,"];\n");
+      }
+      if (CTA_FLUSH_ALWAYS) CTA_Flush();
+   } 
+   else if (CTA_Handle_Check(*usrdata,CTA_PACK)==CTA_OK) {
+      CTA_Message_Quiet(CTA_FALSE);
+      if (IDEBUG>0) printf("Vector_blas: packing vector\n");
+      *retval=CTA_Pack_Add(*usrdata,x,sizeof(CTAI_Vector_blas));
+      if (x->datatype==CTA_STRING) {
+         for (i=0;i<x->n;i++){
+            *retval=CTA_String_Export(xstr[i],*usrdata);
+            if (retval!=CTA_OK) return;
+         }
+      } else {
+         if (IDEBUG>0) printf("Vector_blas: packing values\n");
+         *retval=CTA_SizeOf(x->datatype, &size);
+         if (IDEBUG>0) printf("Vector_blas: size of data-type %d\n",size);
+         if (IDEBUG>0) printf("Vector_blas: size of n %d\n",x->n);
+         *retval=CTA_Pack_Add(*usrdata,x->values,x->n*size);
+         if (IDEBUG>0) printf("Vector_blas: packing done\n");
+      }
+   } else {
+      CTA_Message_Quiet(CTA_FALSE);
+      *retval=CTA_FORMAT_NOT_SUPPORTED;
+      return;
+   }
+   *retval=CTA_OK;
+};
+
+
+void CTAI_Vector_import(
+                        CTAI_Vector_blas *x,
+                        CTA_Handle *usrdata,
+                        int *retval
+                        )
+{
+   /* Local variables */
+   BOOL packout;
+   int size;
+
+   void *values;
+   int n;
+   int datatype;
+
+   /* set pointers for easy printing */
+
+   packout = (CTA_Handle_Check(*usrdata,CTA_PACK)==CTA_OK);
+
+   datatype=x->datatype;
+   n=x->n;
+   values=x->values;
+
+   if (packout) {
+      *retval=CTA_Pack_Get(*usrdata,x,sizeof(CTAI_Vector_blas));
+      x->values=values;
+      *retval=CTA_SizeOf(x->datatype, &size);
+      size=size*x->n;
+
+      // Realloc values part when needed
+      if (x->datatype!=datatype || n!=x->n) {
+              x->values=realloc(x->values,size);
+      }
+      *retval=CTA_Pack_Get(*usrdata,x->values,size);
+   } else {
+      *retval=CTA_FORMAT_NOT_SUPPORTED;
+   }
+   *retval=CTA_OK;
+};
+
+
+
+void CTAI_Vector_Print_Table(
+                             CTAI_Vector_blas **table,
+                             int *ncolumns,
+                             CTAI_Vector_blas *formats,
+                             int *retval)
+{
+   int nrows =     /* number of rows in the table */
+      table[0]->n; 
+   int irow, icol; /* loop counters for rows and columns */
+   char *str;
+
+   CTA_String * formvalues =  /* */
+      (CTA_String *) formats->values;
+
+   char** formstr =  /* string array with the format strings */
+      CTA_Malloc(sizeof(char *) * (*ncolumns));
+
+   if (CTA_FLUSH_ALWAYS) CTA_Flush();
+
+   // Check dimensions
+   if (formats->n != *ncolumns) 
+   {
+      *retval = CTA_DIMENSION_ERROR; return;
+   }
+
+   for (icol=1; icol<*ncolumns; icol++) 
+   {
+      if (table[icol]->n != nrows)
+      {
+         *retval = CTA_DIMENSION_ERROR; return;
+      }
+   }
+
+   // Retrieve the format strings
+   for (icol=0; icol<*ncolumns; icol++) 
+   {
+      int length;
+      *retval = CTA_String_GetLength(formvalues[icol],&length);
+      if (*retval != CTA_OK) return;
+
+      formstr[icol] = CTA_Malloc(sizeof(char)*(length+1));
+      *retval = CTA_String_Get(formvalues[icol],formstr[icol]);
+      if (*retval != CTA_OK) return;
+   }
+
+   // Print the table
+   for (irow=0; irow < nrows; irow++)
+   {
+      for (icol=0; icol<*ncolumns; icol++)
+      {
+         // print item (irow,icol) of the table
+         if (table[icol]->datatype == CTA_INTEGER || table[icol]->datatype == CTA_HANDLE)
+         {
+            int * tabvalues =(int *) table[icol]->values;
+            printf(formstr[icol],tabvalues[irow]);
+         }
+         else if (table[icol]->datatype == CTA_REAL)
+         {
+            float * tabvalues =(float *) table[icol]->values;
+            printf(formstr[icol],tabvalues[irow]);
+         }
+         else if (table[icol]->datatype == CTA_DOUBLE)
+         {
+            double * tabvalues =(double *) table[icol]->values;
+            printf(formstr[icol],tabvalues[irow]);
+         }
+         else if (table[icol]->datatype == CTA_STRING)
+         {
+            int length;
+            CTA_String * tabvalues =(CTA_String *) table[icol]->values;
+
+            *retval = CTA_String_GetLength(tabvalues[irow],&length);
+            if (*retval != CTA_OK) return;
+
+            str = CTA_Malloc(sizeof(char)*(length+1));
+            *retval = CTA_String_Get(tabvalues[irow],str);
+            if (*retval != CTA_OK) return;
+
+            printf(formstr[icol],str);
+            free(str); 
+         }
+      }
+      printf("\n");
+   }
+
+   // Delete items which are no longer necessary
+   for (icol=1; icol<*ncolumns; icol++) 
+   {
+      free(formstr[icol]);
+   }
+   free(formstr);
+
+   if (CTA_FLUSH_ALWAYS) CTA_Flush();
+
+   *retval=CTA_OK;
+};
+
+
+/* Append a value to a vector */
+void CTAI_Vector_appendval(
+                           CTAI_Vector_blas *x,
+                           void *val,
+                           CTA_Datatype *datatype,
+                           int *retval
+                           ){
+   int i = 1 + x->n; /* Index of the new element    */
+
+   /* Update n */
+   ++(x->n);
+
+   /* Ensure the vector is long enough and update n */
+   if (x->n > x->size)
+   {
+      x->size = MAX((int)(1.5 * (float)(x->size) + 0.5), 1 + x->n);
+
+      /* Reallocate the values array */
+      if (x->datatype==CTA_INTEGER || x->datatype==CTA_HANDLE || x->datatype==CTA_STRING){
+         x->values=realloc(x->values, x->size * sizeof(int));
+      }
+      else if (x->datatype==CTA_REAL){
+         x->values=realloc(x->values, x->size * sizeof(float));
+      }
+      else if (x->datatype==CTA_DOUBLE){
+         x->values=realloc(x->values, x->size * sizeof(double));
+      }
+      else {
+         *retval=CTA_ILLEGAL_DATATYPE; 
+         return;
+      }
+   }
+
+   /* Call setval() to set the value at the new element */
+   CTAI_Vector_setval(x, &i, val, datatype, retval);
+};
+
+
+void CTAI_Vector_free(
+                      // The idea of re-alloating a vector seems not to have been considered.
+                      // It would not be so difficult to allow reallocation.
+                      CTAI_Vector_blas *x,
+                      int *retval
+                      ){
+   int i;
+   CTA_String *xstr;
+   CTA_Handle *xhdl;
+
+   switch (x->datatype)
+   {
+   case CTA_STRING:
+      xstr = (CTA_String *) x->values;
+      for (i = 0; i<x->n; i++) 
+      {
+         *retval = CTA_String_Free(&xstr[i]);
+         if (*retval != CTA_OK) return;
+      }
+      break;
+   case CTA_HANDLE:
+      xhdl = (CTA_Handle *) x->values;
+      for (i = 0; i<x->n; i++) 
+      {
+         *retval = CTA_Handle_Free_All(&xhdl[i]);
+         if (*retval != CTA_OK) return;
+      }
+      break;
+   }
+
+   free(x->values);
+   *retval=CTA_OK;
+};
+
+
+/* Get the index from a path component */
+/* Updates name so it will be without index */
+/* The index is indicated with an ending on '<index>' */
+static int CTAI_Vector_GetPathIndex(const char *name) {
+   char       *open;
+   const char *close;
+   char       szix[MAXIXLEN+1];
+
+   /* Find open and close brackets */
+   open = strrchr(name, '<');
+   close = strrchr(name, '>');
+   if (!open || !close || (open + 2) > close ||
+       (close - open) >= MAXIXLEN) {
+      /* Default index: 1 */
+      return 1;
+   }
+
+   /* Remove index from name */
+   *open = '\0';
+
+   /* Determine the index */
+   strncpy(szix, open + 1, close - open - 1);
+   szix[close - open - 1] = '\0';
+   return atoi(szix);  
+}
+
+
+/* Find the handle with the given name (INTERNAL USE) */
+/* Returns the handle or CTA_NULL if not found or in case of an error */
+CTA_Handle CTAI_Vector_FindHandle(CTA_Vector hvec, const char* name)
+{
+   CTA_Vector *vector;
+   CTA_Handle   h;
+   int          i;
+   int          n;
+   int          counter;
+   int          index;
+
+   if (CTA_OK != CTA_Handle_GetData(hvec, (void*) &vector)) return CTA_NULL;   
+   if (CTA_OK != CTA_Vector_GetSize(hvec, &n)) return CTA_NULL;      
+
+   /* Get index from name */
+   index = CTAI_Vector_GetPathIndex(name);
+
+   /* Search for handle */
+   counter = 0;
+   for (i = 1; i <= n; ++i) {
+      if (CTA_OK != CTA_Vector_GetVal(hvec, i, &h, CTA_HANDLE)) return CTA_NULL;      
+      if (0 == strcmp(CTAI_Handle_GetName(h), name)) {
+         ++counter;
+         if (counter == index) return h;
+      }
+   }
+   return CTA_NULL;
+}
+
+/* Count the number of handles with the given name (INTERNAL USE) */
+/* Returns the count or -1 in case of an error */
+int CTAI_Vector_CountHandles(CTA_Vector hvec, const char* name)
+{
+   CTA_Vector *vector;
+   CTA_Handle   h;
+   int          i;
+   int          n;
+   int          counter;
+
+   if (CTA_OK != CTA_Handle_GetData(hvec, (void*) &vector)) return -1;   
+   if (CTA_OK != CTA_Vector_GetSize(hvec, &n)) return -1;      
+
+   counter = 0;
+   for (i = 1; i <= n; ++i) {
+      if (CTA_OK != CTA_Vector_GetVal(hvec, i, &h, CTA_HANDLE)) return -1;      
+      if (0 == strcmp(CTAI_Handle_GetName(h), name)) {
+         ++counter;
+      }
+   }
+   return counter;
+}
+
+void CTAI_Vector_elmdiv(
+                      CTAI_Vector_blas *y, 
+                      CTAI_Vector_blas *x, 
+                      int *retval
+                      ){
+   /* Local variables */
+   float  *svec_y, *svec_x;
+   double *dvec_y, *dvec_x;
+   int i,n;
+
+   /* check dimensions */
+   if (y->n!=x->n){
+      *retval=CTA_DIMENSION_ERROR;
+      return;
+   }
+
+  /* Handle data-type stuff for x */
+  if (x->datatype ==CTA_DOUBLE) {
+      dvec_x=(double*) x->values;
+   }
+   else if (x->datatype ==CTA_REAL){
+      svec_x=(float*) x->values;
+   } else {
+      *retval=CTA_ILLEGAL_DATATYPE; return;
+   }
+
+    /* Handle data-type stuff for y */
+  if (y->datatype ==CTA_DOUBLE) {
+      dvec_y=(double*) y->values;
+   }
+   else if (y->datatype ==CTA_REAL){
+      svec_y=(float*) y->values;
+   } else {
+      *retval=CTA_ILLEGAL_DATATYPE; return;
+   }
+ 
+   n=x->n;
+   if (x->datatype ==CTA_DOUBLE && y->datatype ==CTA_DOUBLE){
+      for (i=0;i<n;i++){
+         dvec_y[i]=dvec_y[i]/dvec_x[i];
+      }
+   }
+   else if (x->datatype ==CTA_REAL && y->datatype ==CTA_REAL){
+      for (i=0;i<n;i++){
+         svec_y[i]=svec_y[i]/svec_x[i];
+      }
+   }
+   else if (x->datatype ==CTA_DOUBLE && y->datatype ==CTA_REAL){
+      for (i=0;i<n;i++){
+         svec_y[i]=((float) dvec_y[i])/svec_x[i];
+      }
+   }
+   else if (x->datatype ==CTA_REAL && y->datatype ==CTA_DOUBLE){
+      for (i=0;i<n;i++){
+         dvec_y[i]=((double) svec_y[i])/dvec_x[i];
+      }
+   }
+   *retval=CTA_OK;
+};
+
+void CTAI_Vector_elmprod(
+                      CTAI_Vector_blas *y, 
+                      CTAI_Vector_blas *x, 
+                      int *retval
+                      ){
+   /* Local variables */
+   float  *svec_y, *svec_x;
+   double *dvec_y, *dvec_x;
+   int i,n;
+
+   /* check dimensions */
+   if (y->n!=x->n){
+      *retval=CTA_DIMENSION_ERROR;
+      return;
+   }
+
+  /* Handle data-type stuff for x */
+  if (x->datatype ==CTA_DOUBLE) {
+      dvec_x=(double*) x->values;
+   }
+   else if (x->datatype ==CTA_REAL){
+      svec_x=(float*) x->values;
+   } else {
+      *retval=CTA_ILLEGAL_DATATYPE; return;
+   }
+
+    /* Handle data-type stuff for y */
+  if (y->datatype ==CTA_DOUBLE) {
+      dvec_y=(double*) y->values;
+   }
+   else if (y->datatype ==CTA_REAL){
+      svec_y=(float*) y->values;
+   } else {
+      *retval=CTA_ILLEGAL_DATATYPE; return;
+   }
+ 
+   n=x->n;
+   if (x->datatype ==CTA_DOUBLE && y->datatype ==CTA_DOUBLE){
+      for (i=0;i<n;i++){
+         dvec_y[i]=dvec_x[i]*dvec_y[i];
+      }
+   }
+   else if (x->datatype ==CTA_REAL && y->datatype ==CTA_REAL){
+      for (i=0;i<n;i++){
+         svec_y[i]=svec_x[i]*svec_y[i];
+      }
+   }
+   else if (x->datatype ==CTA_DOUBLE && y->datatype ==CTA_REAL){
+      for (i=0;i<n;i++){
+         svec_y[i]=((float) dvec_x[i])*svec_y[i];
+      }
+   }
+   else if (x->datatype ==CTA_REAL && y->datatype ==CTA_DOUBLE){
+      for (i=0;i<n;i++){
+         dvec_y[i]=((double) svec_x[i])*dvec_y[i];
+      }
+   }
+   *retval=CTA_OK;
+};
+
+void CTAI_Vector_elmsqrt(
+                      CTAI_Vector_blas *y, 
+                      int *retval
+                      ){
+   /* Local variables */
+   float  *svec_y;
+   double *dvec_y;
+   int i,n;
+
+   /* Handle data-type stuff for y */
+   if (y->datatype ==CTA_DOUBLE) {
+      dvec_y=(double*) y->values;
+   }
+   else if (y->datatype ==CTA_REAL){
+      svec_y=(float*) y->values;
+   } else {
+      *retval=CTA_ILLEGAL_DATATYPE; return;
+   }
+ 
+   n=y->n;
+   if (y->datatype ==CTA_DOUBLE){
+      for (i=0;i<n;i++){
+         dvec_y[i]=sqrt(dvec_y[i]);
+      }
+   }
+   else if (y->datatype ==CTA_REAL){
+      for (i=0;i<n;i++){
+         svec_y[i]=(float) sqrt(svec_y[i]);
+      }
+   }
+   *retval=CTA_OK;
+};
+
diff --git a/costa/native/cta/src/cta_xml.c b/costa/native/cta/src/cta_xml.c
new file mode 100644
index 000000000..fd62d29fc
--- /dev/null
+++ b/costa/native/cta/src/cta_xml.c
@@ -0,0 +1,860 @@
+/*
+$URL: https://repos.deltares.nl/repos/openda/public/trunk/core/native/src/cta/cta_xml.c $
+$Revision: 3407 $, $Date: 2012-08-17 13:50:50 +0200 (Fri, 17 Aug 2012) $
+
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+#include <string.h>
+#include "cta_mem.h"
+#include "ctai_xml.h"
+#include "f_cta_utils.h"
+#include "cta_datatypes.h"
+#include "cta_defaults.h"
+#include "cta_errors.h"
+#include "cta_time.h"
+#include "cta_vector.h"
+#include "cta_sobs.h"
+#include "cta_xml.h"
+#include "ctai.h"
+#include "ctai_datatypes.h"
+#include "ctai_handles.h"
+#include "ctai_string.h"
+#include "ctai_vector.h"
+#include "ctai_sobs.h"
+#include "cta_message.h"
+
+
+#define CTA_XML_READ_F77   F77_CALL(cta_xml_read,CTA_XML_READ)
+#define CTA_XML_WRITE_F77  F77_CALL(cta_xml_write,CTA_XML_WRITE)
+
+#define CLASSNAME "CTA_XML"
+
+#define DEBUG 0
+/* Local interfaces */
+CTA_Handle CTAI_XML_CreateObject_Comb(xmlNode *cur_node);
+CTA_TreeVector CTAI_XML_CreatetreeVector(xmlNode *cur_node);
+int CTAI_XML_TreeVector_regular_grid(CTA_TreeVector treevec, CTA_TreeVector root );
+CTA_SObsClass CTAI_XML_CreateSObsClass(xmlNode *cur_node);
+CTA_ModelClass CTAI_XML_CreateModelClass(xmlNode *cur_node);
+void CTAI_XML_WriteTreeVector(int issubtree, int isxfv,CTA_TreeVector treevec, int level,xmlTextWriter *writer);
+
+/***************************************************************
+READ
+***************************************************************/
+
+/** \brief Get content of a text element; remove training spaces, tabs etc.
+*
+*  \param   cur_node  I  Text node.
+*  \return  Text content of the given (TEXT) node. To free().
+*/
+static char *CTAI_XML_GetContent(xmlNode *cur_node) {
+   char       *outtxt;
+   const xmlChar *intxt;
+   int         pos;
+
+   intxt = cur_node->content;
+   /* Find position of the last non-special character */
+   pos = strlen((char *) intxt) - 1;
+   while (pos >= 0 && intxt[pos] < 33) {
+      --pos;
+   }
+   /* Allocate return string */
+   if (pos < 0) {
+      /* Empty string */
+      outtxt = (char*)CTA_Malloc(1);
+      outtxt[0] = '\0';
+   } else {
+      /* Non-empty string */
+      outtxt = (char*)CTA_Malloc(2 + pos);
+      memcpy(outtxt, intxt, pos + 1);
+      outtxt[pos + 1]='\0';
+   }
+   return outtxt;
+}
+
+
+/** \brief Create a default COSTA tree.
+*
+*  \param cur_node  I  Current XML node
+*  \return             Handle to create or CTA_NULL in case of an error.
+*/
+static CTA_Tree CTAI_XML_CreateDefaultTree(xmlNode *cur_node) {
+   xmlChar       *val;        /* element  value */
+   xmlAttr       *cur_prop;   /* property loop counter */
+   CTA_Tree      htree;       /* COSTA tree */
+   CTA_String    hstr;        /* COSTA string */
+   int           retval;
+
+   /* Get node text */
+   retval = CTA_Tree_Create(&htree);
+   if (retval == CTA_OK) {
+      CTAI_Handle_SetName(htree, (char *) cur_node->name);
+
+
+      if (0!=strcmp((char *) cur_node->name,"treeVectorFile")) {
+
+        /* Parse this node's attributes (only if it is not treeVectorFile*/
+
+        for (cur_prop = cur_node->properties; cur_prop; cur_prop = cur_prop->next) {
+
+          val = xmlGetProp(cur_node, cur_prop->name);
+          CTA_String_Create(&hstr);
+          CTA_String_Set(hstr, (char *) val);
+          xmlFree(val);
+          CTA_Tree_AddHandle(htree, (char *) cur_prop->name, hstr);
+        }
+      }
+      return htree;
+   }
+   return CTA_NULL;
+}
+
+
+
+static CTA_Tree CTAI_XML_Create_Combine_SObs(xmlNode *startnode, CTA_Tree hparent) {
+   xmlNode       *cur_node;   /* node loop counter */
+   CTA_Handle     hobj;       /* COSTA object */
+
+
+   for (cur_node = startnode; cur_node; cur_node = cur_node->next) {
+      if (cur_node->type == XML_ELEMENT_NODE || cur_node->type == XML_TEXT_NODE) {
+         /* Create COSTA object  from the current node */
+        hobj = CTAI_XML_CreateObject_Comb(cur_node);
+         if (hobj != CTA_NULL) {
+            if (hparent == CTA_NULL) {
+               /* Create tree */
+               hparent = hobj;
+            } else {
+               /* Add object to the parent */
+               CTA_Tree_AddHandle(hparent, CTAI_Handle_GetName(hobj), hobj);
+            }
+            if (CTAI_Handle_GetDatatype(hobj) == CTA_TREE) {
+               /* In case of a tree, handle nested COSTA objects */
+               CTAI_XML_Create_Combine_SObs(cur_node->children, hobj);
+            }
+         }
+      }
+   }
+
+
+   return hparent;
+
+}
+
+
+/** \brief Create a COSTA stochastic observer.
+*
+*  \param cur_node  I  Current XML node
+*  \return             Handle to create or CTA_NULL in case of an error.
+*/
+static CTA_StochObs CTAI_XML_CreateSObs(xmlNode *cur_node) {
+   CTA_StochObs  hnew, hnew2;             /* The new COSTA handle */
+   int           retval;
+   xmlChar       *name     = NULL;
+   xmlChar       *database = NULL;
+   xmlChar       *selection = NULL;
+   xmlChar       *timeoffset = NULL;
+   xmlChar       *sobs_class_name = NULL;
+   CTA_SObsClass sobsclass = CTA_DEFAULT_SOBS;
+   CTA_String    hsel;
+   int           is_combined;
+   CTA_Tree      hnew_sub;
+   CTA_Handle    hdb;
+   CTA_Vector    hvec1, htimeoffset;
+   double        timeoffsetval;
+
+
+   is_combined = 0;
+   /* Parse this node's attributes */
+   name     = xmlGetProp(cur_node, CTAI_XML_ID);
+   sobs_class_name = xmlGetProp(cur_node, CTAI_XML_SOBSCLASSNAME);
+   database = xmlGetProp(cur_node, CTAI_XML_DATABASE);
+   selection = xmlGetProp(cur_node, CTAI_XML_SELECT);
+   timeoffset = xmlGetProp(cur_node, CTAI_XML_TIMEOFFSET);
+
+   /* first check if the stochobs is a 'combined' one */
+
+   if (sobs_class_name) {
+      if (0==strcmp((char *) sobs_class_name,"CTA_COMBINE_SOBS")) {
+         sobsclass = CTA_COMBINE_SOBS ;
+         if (DEBUG) printf("-- ctai_create_sobs: combiner! --\n");
+
+         // read a subtree using the sobs-combiner-xml-read
+         // hnew_sub is a valid cta_subtree with the cta_sobs already created
+         hnew_sub = CTAI_XML_Create_Combine_SObs(cur_node->children,CTA_NULL);
+         is_combined = 1;
+      } else {    //sobsclass can be sql or netcdf or ...
+         // TODO: better: a  handle_find or sobsclass_find action
+         if (DEBUG) printf("-- ctai_create_sobs: class name given ,NO combiner! --\n");
+         if (0==strcmp((char *) sobs_class_name,"CTA_NETCDF_SOBS")) {
+            sobsclass = CTA_NETCDF_SOBS ;
+         }
+      }
+   } else { // sobsclass remains default
+     if (DEBUG) printf("-- ctai_create_sobs: NO class name given! --\n");
+   };
+
+
+   hnew=CTA_NULL;
+
+
+      /* Create stochastic observer */
+
+   CTA_String_Create(&hdb);
+      if (database) {
+
+
+          CTA_String_Set(hdb, (char *) database);
+
+
+          if (timeoffset){
+             timeoffsetval = atof((char *) timeoffset);
+
+             // make a vector of two handles: the subtree and the offset value
+             retval = CTA_Vector_Create(CTA_DEFAULT_VECTOR, 2, CTA_HANDLE, CTA_NULL, &hvec1);
+             // fill first element (name of database)
+             retval = CTA_Vector_SetVal(hvec1,1,&hdb,CTA_HANDLE);
+             //fill second element
+             retval = CTA_Vector_Create(CTA_DEFAULT_VECTOR, 1, CTA_DOUBLE, CTA_NULL, &htimeoffset);
+
+             retval = CTA_Vector_SetVal(htimeoffset,1,&timeoffsetval,CTA_DOUBLE);
+             retval = CTA_Vector_SetVal(hvec1,2,&htimeoffset,CTA_HANDLE);
+             // create sobs
+
+             retval = CTA_SObs_Create(sobsclass, hvec1, &hnew);
+
+          } else {
+
+            // 'old' situation: userdata is name of database
+          retval = CTA_SObs_Create(sobsclass, hdb, &hnew);
+          }
+      }
+      else { //combined CTA_OBS
+        if (is_combined == 1) {
+          /* create the combined SObs  with for the 2nd argument not a database
+             but the subsobs subtree and the timeoffset  */
+          // make a vector of two handles: the subtree and the offset value
+          retval = CTA_Vector_Create(CTA_DEFAULT_VECTOR, 2, CTA_HANDLE, CTA_NULL, &hvec1);
+          // fill first element
+          retval = CTA_Vector_SetVal(hvec1,1,&hnew_sub,CTA_HANDLE);
+          //fill second element
+          retval = CTA_Vector_Create(CTA_DEFAULT_VECTOR, 1, CTA_DOUBLE, CTA_NULL, &htimeoffset);
+          if (timeoffset){
+             timeoffsetval = atof((char *) timeoffset);
+          } else {
+             timeoffsetval = 0.0;
+          }
+          retval = CTA_Vector_SetVal(htimeoffset,1,&timeoffsetval,CTA_DOUBLE);
+          retval = CTA_Vector_SetVal(hvec1,2,&htimeoffset,CTA_HANDLE);
+
+
+          retval = CTA_SObs_Create(sobsclass, hvec1, &hnew);
+
+        } else {
+          printf("ERROR: neither database given nor combined observer\n");
+          exit(-1);
+        }
+
+      }
+
+
+      CTA_String_Free(&hdb);
+      if (retval == CTA_OK) {
+         /* Perform initial selection */
+         if (selection) {
+            CTA_String_Create(&hsel);
+            CTA_String_Set(hsel, (char *) selection);
+            retval=CTA_SObs_CreateSel(hnew, hsel, &hnew2);
+            CTA_String_Free(&hsel);
+            CTA_SObs_Free(&hnew);
+            if (retval != CTA_OK){
+               return CTA_NULL;
+            }
+            hnew=hnew2;
+         }
+
+         /* Set name (if any) */
+         if (name) {
+            CTAI_Handle_SetName(hnew, (char *) name);
+           // xmlFree(name);
+         } else {
+            CTAI_Handle_SetName(hnew, "");
+         }
+      }
+
+      //      if (is_combined == 1) {
+        // attach combined-subtree to sobs
+
+      //}
+
+   if (name) xmlFree(name);
+   if (database) xmlFree(database);
+   if (selection) xmlFree(selection);
+
+   return hnew;
+}
+
+
+
+   CTA_Handle CTAI_XML_CreateObject_Comb(xmlNode *cur_node) {
+   xmlChar       *val;              /* Element value */
+   CTA_Handle    hnew = CTA_NULL;   /* The return value */
+
+   // read objects in part of the xml-file containing the subSObs. The objects are either SObs or default trees.
+
+   if (cur_node->type == XML_TEXT_NODE) {
+      /* If this node is a non-empty text node, create a string */
+      val = (xmlChar *) CTAI_XML_GetContent(cur_node);
+
+
+      if (strlen((char *) val) < 1) {
+      if (val) xmlFree(val);
+         return CTA_NULL;
+      }
+      CTA_String_Create(&hnew);
+      CTA_String_Set(hnew, (char *) val);
+      xmlFree(val);
+      CTAI_Handle_SetName(hnew, "");
+   } else {
+      /* This is not a text node */
+      switch (CTAI_String2Type((char *) cur_node->name)) {
+      case CTA_SOBS:
+        /* Create a COSTA stochastic observer */
+        hnew = CTAI_XML_CreateSObs(cur_node);
+        break;
+      }
+      if (hnew == CTA_NULL) {
+        /* Default: create a COSTA subtree */
+        hnew = CTAI_XML_CreateDefaultTree(cur_node);
+      }
+   }
+   return hnew;
+}
+
+
+
+
+/** \brief Create a COSTA time.
+*
+*  \param cur_node  I  Current XML node
+*  \return             Handle to create or CTA_NULL in case of an error.
+*/
+static CTA_Time CTAI_XML_CreateTime(xmlNode *cur_node) {
+   CTA_Time      hnew;                    /* The new COSTA handle */
+   int           retval;
+   xmlChar       *name   = NULL;
+   xmlChar       *sstart = NULL;
+   xmlChar       *sstop  = NULL;
+   xmlChar       *sstep  = NULL;
+   double        dstart;
+   double        dstop;
+   double        dstep;
+   double        tmp;
+
+   /* Parse this node's attributes */
+   name   = xmlGetProp(cur_node, CTAI_XML_ID);
+   sstart = xmlGetProp(cur_node, CTAI_XML_START);
+   sstop  = xmlGetProp(cur_node, CTAI_XML_STOP);
+   sstep  = xmlGetProp(cur_node, CTAI_XML_STEP);
+
+   /* Convert to numbers and/or fill defaults */
+   if (sstart) {
+      dstart = atof((char *) sstart);
+      xmlFree(sstart);
+   } else {
+      dstart = 0.0;
+   }
+   if (sstop) {
+      dstop = atof((char *) sstop);
+      xmlFree(sstop);
+   } else {
+      dstop = dstart;
+   }
+   if (dstop < dstart) {
+      tmp = dstart;
+      dstart = dstop;
+      dstop = tmp;
+   }
+   if (sstep) {
+      dstep = atof((char *) sstep);
+      xmlFree(sstep);
+   } else {
+      /* Default */
+      dstep = MIN(1.0, (dstop - dstart));
+   }
+
+   /* Create time */
+   retval = CTA_Time_Create(&hnew);
+   if (retval == CTA_OK) {
+      /* Set name (if any) */
+      if (name) {
+         CTAI_Handle_SetName(hnew, (char *) name);
+         xmlFree(name);
+      } else {
+         CTAI_Handle_SetName(hnew, "");
+      }
+
+      /* Set values */
+      CTA_Time_SetSpan(hnew, dstart, dstop);
+      CTA_Time_SetStep(hnew, dstep);
+      return hnew;
+   }
+   return CTA_NULL;
+}
+
+
+
+
+/** \brief Create a COSTA object.
+*
+*  \param cur_node  I  Current XML node
+*  \return             Handle for a COSTA node to create or 0 in case of an error.
+*/
+ static CTA_Handle CTAI_XML_CreateObject(xmlNode *cur_node) {
+   char       *val;              /* Element value */
+   CTA_Handle    hnew = CTA_NULL;   /* The return value */
+   CTA_Metainfo minfo;
+
+   if (cur_node->type == XML_TEXT_NODE) {
+      /* If this node is a non-empty text node, create a string */
+      val = CTAI_XML_GetContent(cur_node);
+
+
+      if (strlen(val) < 1) {
+      if (val) free(val);
+         return CTA_NULL;
+      }
+      CTA_String_Create(&hnew);
+      CTA_String_Set(hnew, val);
+      free(val);
+      CTAI_Handle_SetName(hnew, "");
+   } else {
+      /* This is not a text node */
+      switch (CTAI_String2Type((char *) cur_node->name)) {
+      case CTA_TIME:
+         /* Create a COSTA time */
+         hnew = CTAI_XML_CreateTime(cur_node);
+         break;
+      case CTA_VECTOR:
+         /* Create a COSTA vector */
+        CTA_Metainfo_Create(&minfo);
+        hnew = CTAI_XML_CreateVector_New(cur_node,minfo);
+         break;
+      case CTA_TREEVECTOR:
+        /* Create a (general) treeVector */
+        hnew = CTAI_XML_CreatetreeVector(cur_node);
+        // retval = CTA_TreeVector_Export(hnew, CTA_FILE_STDOUT);
+        CTAI_XML_TreeVector_regular_grid(hnew, hnew);
+        break;
+      case CTA_FUNCTION:
+        /* Create a COSTA function */
+        hnew = CTAI_XML_CreateFunc(cur_node);
+        break;
+      case CTA_SOBS:
+        /* Create a COSTA stochastic observer */
+        hnew = CTAI_XML_CreateSObs(cur_node);
+        break;
+      case CTA_SOBSCLASS:
+        /* Create a COSTA stochastic observer class */
+        hnew = CTAI_XML_CreateSObsClass(cur_node);
+        break;
+      case CTA_MODELCLASS:
+        hnew = CTAI_XML_CreateModelClass(cur_node);
+        break;
+      }
+      if (hnew == CTA_NULL) {
+        /* Default: create a COSTA subtree */
+        hnew = CTAI_XML_CreateDefaultTree(cur_node);
+      }
+   }
+   return hnew;
+}
+
+
+/** \brief Recursively read elements
+*
+*  \param startnode  I  XML node to start from
+*  \param hparent    I  parent COSTA handle
+*  \return              tree root
+*/
+static CTA_Tree CTAI_XML_Read(xmlNode *startnode, CTA_Tree hparent) {
+   xmlNode       *cur_node;   /* node loop counter */
+   CTA_Handle     hobj;       /* COSTA object */
+
+   for (cur_node = startnode; cur_node; cur_node = cur_node->next) {
+      if (cur_node->type == XML_ELEMENT_NODE || cur_node->type == XML_TEXT_NODE) {
+         /* Create COSTA object  from the current node */
+        hobj = CTAI_XML_CreateObject(cur_node);
+         if (hobj != CTA_NULL) {
+            if (hparent == CTA_NULL) {
+               /* Create tree */
+               hparent = hobj;
+            } else {
+               /* Add object to the parent */
+               CTA_Tree_AddHandle(hparent, CTAI_Handle_GetName(hobj), hobj);
+            }
+            if (CTAI_Handle_GetDatatype(hobj) == CTA_TREE) {
+               /* In case of a tree, handle nested COSTA objects */
+               CTAI_XML_Read(cur_node->children, hobj);
+            }
+         }
+      }
+   }
+   return hparent;
+}
+
+
+/** \brief Read a COSTA XML file into a tree
+*
+*  \param hfname  I  file name of XML file to read
+*  \param hroot   O  handle of a COSTA tree
+*  \return CTA_OK if successful
+*/
+int CTA_XML_Read(CTA_String hfname, CTA_Tree *hroot) {
+   char       *fname;
+   xmlDoc     *doc;
+   xmlNode    *root_element;
+   int         retval;
+
+   /* Check handle types */
+   retval=CTA_Handle_Check((CTA_Handle) hfname, CTA_STRING);
+   if (retval!=CTA_OK) return retval;
+   /* Initialize libxml */
+   LIBXML_TEST_VERSION;
+
+   /* Parse the file and get the DOM */
+   fname = CTAI_String_Allocate(hfname);
+   doc = xmlReadFile(fname, NULL, 0);
+   free(fname);
+   if (!doc) {
+      // OLD CODEhroot = NULL;
+      *hroot = CTA_NULL;
+      return CTA_CANNOT_OPEN_FILE;
+   }
+
+#ifdef LIBXML_XINCLUDE_ENABLED
+   /* Handle xinclude */
+   xmlXIncludeProcess(doc);
+#endif
+
+   /* Parse the XML tree */
+   root_element = xmlDocGetRootElement(doc);
+   *hroot = CTAI_XML_Read(root_element, CTA_NULL);
+
+   /* Clean up */
+   xmlFreeDoc(doc);
+   return CTA_OK;
+}
+
+
+/***************************************************************
+WRITE
+***************************************************************/
+#undef METHOD
+#define METHOD "ConvertInput"
+/**
+* ConvertInput:
+* @in: string in a given encoding
+* @encoding: the encoding used
+*
+* Converts @in into UTF-8 for processing with libxml2 APIs
+*
+* Returns the converted UTF-8 string, or NULL in case of error.
+*/
+xmlChar *ConvertInput(const char *in, const char *encoding)
+{
+   xmlChar *out;
+   int ret;
+   int size;
+   int out_size;
+   int temp;
+   xmlCharEncodingHandlerPtr handler;
+
+   if (in == 0)
+      return 0;
+
+   handler = xmlFindCharEncodingHandler(encoding);
+
+   if (!handler) {
+      char message[128];
+      sprintf(message,"ConvertInput: no encoding handler found for '%s'\n",
+         encoding ? encoding : "");
+      CTA_WRITE_ERROR(message);
+      return 0;
+   }
+
+   size = (int) strlen(in) + 1;
+   out_size = size * 2 - 1;
+   out = (unsigned char *) xmlMalloc((size_t) out_size);
+
+   if (out != 0) {
+      temp = size - 1;
+      ret = handler->input(out, &out_size, (const xmlChar *) in, &temp);
+      if ((ret < 0) || (temp - size + 1)) {
+         if (ret < 0) {
+            CTA_WRITE_ERROR("Conversion is not successful.\n");
+         } else {
+            char message[128];
+            sprintf
+               ("Conversion is not successful. converted: %i octets.\n",
+               temp);
+            CTA_WRITE_ERROR(message);
+         }
+
+         xmlFree(out);
+         out = 0;
+      } else {
+         out = (unsigned char *) xmlRealloc(out, out_size + 1);
+         out[out_size] = 0;  /*null terminating out */
+      }
+   } else {
+      CTA_WRITE_ERROR("Cannot allocate memory\n");
+   }
+
+   return out;
+}
+
+
+/** \brief Generate XML from COSTA stochastic observer
+*
+*  \param h       I  COSTA handle
+*  \param writer  I  the XML text writer
+*/
+static void CTAI_XML_WriteSObs(CTA_StochObs h, xmlTextWriter *writer) {
+   const xmlChar   *name;
+   CTA_String   hdb;
+   /* Start an element the the name of the tree handle */
+   xmlTextWriterStartElement(writer, (xmlChar *) CTAI_Type2String(CTA_SOBS));
+   /* Write name (if any) */
+   name = (xmlChar *) CTAI_Handle_GetName(h);
+   if (name && *name) {
+      xmlTextWriterWriteAttribute(writer, CTAI_XML_ID, name);
+   }
+   /* Write database */
+   hdb = CTAI_SObs_GetUserData(h, 1);
+   if (hdb == CTA_NULL) {
+      xmlTextWriterWriteComment(writer,
+         ConvertInput("Cannot find database in stochastic observer", (char *) MY_ENCODING));
+   } else {
+      xmlTextWriterWriteAttribute(writer, CTAI_XML_DATABASE, (xmlChar *) CTAI_String_GetPtr(hdb));
+   }
+   /* End the tree level elements */
+   xmlTextWriterEndElement(writer);
+}
+
+
+/** \brief Generate XML from COSTA time span
+*
+*  \param htime   I  COSTA handle
+*  \param writer  I  the XML text writer
+*/
+static void CTAI_XML_WriteTime(CTA_Time htime, xmlTextWriter *writer) {
+   const xmlChar   *name;
+   double       t1, t2, ts;
+   xmlChar      sz[64];
+
+   /* Start an element the the name of the tree handle */
+   xmlTextWriterStartElement(writer, (xmlChar *) CTAI_Type2String(CTA_TIME));
+
+   /* Write name (if any) */
+   name = (xmlChar *) CTAI_Handle_GetName(htime);
+   if (name && *name) {
+      xmlTextWriterWriteAttribute(writer, CTAI_XML_ID, name);
+   }
+
+   /* Get time span */
+   CTA_Time_GetSpan(htime, &t1, &t2);
+   CTA_Time_GetStep(htime, &ts);
+
+   /* Write time span */
+   sprintf((char *) sz, "%lg", t1);
+   xmlTextWriterWriteAttribute(writer, CTAI_XML_START, sz);
+   sprintf((char *) sz, "%lg", t2);
+   xmlTextWriterWriteAttribute(writer, CTAI_XML_STOP,  sz);
+   sprintf((char *) sz, "%lg", ts);
+   xmlTextWriterWriteAttribute(writer, CTAI_XML_STEP,  sz);
+
+   /* End the tree level elements */
+   xmlTextWriterEndElement(writer);
+}
+
+
+
+
+
+/** \brief Generate XML from one COSTA string
+*
+*  \param hstr   I  handle of a COSTA string
+*  \param writer I  the XML text writer
+*/
+static void CTAI_XML_WriteString(CTA_String hstr, xmlTextWriter *writer) {
+   const xmlChar *name;
+   const xmlChar *text;
+
+   name = (xmlChar *) CTAI_Handle_GetName(hstr);
+   text = (xmlChar *) CTAI_String_GetPtr(hstr);
+   if (!text) {
+      xmlTextWriterWriteComment(writer,
+         ConvertInput("Cannot retrieve string value", (char *) MY_ENCODING));
+      return;
+   }
+
+   if (strlen((char *) name) == 0) {
+      /* If the name is empty, it's a text element */
+      xmlTextWriterWriteString(writer, text);
+   } else {
+      /* If the name is not empty, it's an attribute */
+      xmlTextWriterWriteAttribute(writer, name, text);
+   }
+}
+
+
+/** \brief Generate XML from one tree level (recursively)
+*
+*  \param htree  I  handle of a COSTA tree
+*  \param writer I  the XML text writer
+*/
+static void CTAI_XML_WriteTree(CTA_Tree htree, xmlTextWriter *writer) {
+   int        n;
+   int        i;
+   CTA_Handle hitem;
+
+   /* Start an element the the name of the tree handle */
+   xmlTextWriterStartElement(writer, (xmlChar *) CTAI_Handle_GetName(htree));
+
+   /* Walk through all items on the current level */
+   CTA_Tree_CountItems(htree, &n);
+   for (i = 1; i <= n; ++i) {
+      CTA_Tree_GetItem(htree, i, &hitem);
+      if (hitem != CTA_NULL) {
+         switch (CTAI_Handle_GetDatatype(hitem)) {
+         case CTA_TREE:
+            /* Recurse */
+            CTAI_XML_WriteTree(hitem, writer);
+            break;
+         case CTA_STRING:
+            /* Write a string */
+            CTAI_XML_WriteString(hitem, writer);
+            break;
+         case CTA_TIME:
+            /* Write a time span */
+            CTAI_XML_WriteTime(hitem, writer);
+            break;
+         case CTA_VECTOR:
+            /* Write a vector */
+           CTAI_XML_WriteVector(hitem,"","",0,0, writer);
+            break;
+         case CTA_TREEVECTOR:
+            /* Write a state vector */
+           CTAI_XML_WriteTreeVector(0,0,hitem,0, writer);
+            break;
+         case CTA_SOBS:
+            /* Write a stochastic observer */
+            CTAI_XML_WriteSObs(hitem, writer);
+            break;
+         default:
+            /* Write a comment */
+            xmlTextWriterWriteComment(writer,
+               ConvertInput("Cannot write unknown data type", (char *) MY_ENCODING));
+            break;
+         }
+      }
+   }
+
+   /* End the tree level element */
+   xmlTextWriterEndElement(writer);
+}
+
+
+/** \brief Build an XML document to write to a COSTA XML file
+*
+*  \param hroot  I  handle of a COSTA tree
+*  \param writer I  the XML text writer
+*/
+static void CTAI_XML_WriteDocument(CTA_Tree hroot, xmlTextWriter *writer) {
+/* Start the document with the xml default for the version,
+* encoding ISO 8859-1 and the default for the standalone
+   * declaration. */
+   xmlTextWriterStartDocument(writer, NULL, (char *) MY_ENCODING, NULL);
+
+   /* Write the tree */
+   CTAI_XML_WriteTree(hroot, writer);
+
+   /* End the document */
+   xmlTextWriterEndDocument(writer);
+}
+
+
+/** \brief Write a tree to a COSTA XML file
+*
+*  \param fname  I  file name of XML file to write
+*  \param hroot  I  handle of a COSTA tree
+*  \return CTA_OK if successful
+*/
+#undef METHOD
+#define METHOD "Write"
+int CTA_XML_Write(CTA_String hfname, CTA_Tree hroot) {
+   char          *fname;
+   xmlDoc        *doc;
+   xmlTextWriter *writer;
+   int           retval;
+
+   /* Check handle types */
+   retval=CTA_Handle_Check((CTA_Handle) hfname, CTA_STRING);
+   if (retval!=CTA_OK) {
+       CTA_WRITE_ERROR("Handle is not a cta_vector handle");
+       return retval;
+   }
+   retval=CTA_Handle_Check((CTA_Handle) hroot, CTA_TREE);
+   if (retval!=CTA_OK) {
+	   CTA_WRITE_ERROR("Cannot retrieve handle data");
+	   return retval;
+   }
+
+   /* Initialize libxml */
+   LIBXML_TEST_VERSION;
+   
+   /* Create a new xmlWriter for DOM, with no compression. */
+   writer = xmlNewTextWriterDoc(&doc, 0);
+   if (!writer) {
+      return CTA_EXTERNAL_ERROR;
+   }
+
+   /* Create document */
+   CTAI_XML_WriteDocument(hroot, writer);
+
+   /* Write document and clean up */
+   xmlFreeTextWriter(writer);
+   fname = CTAI_String_Allocate(hfname);
+   xmlSaveFileEnc(fname, doc, (char *) MY_ENCODING);
+   xmlFreeDoc(doc);
+   free(fname);
+   return CTA_OK;
+}
+
+/* Interfacing with Fortran */
+CTAEXPORT void CTA_XML_READ_F77(int *hfname, int *hroot, int *ierr) {
+   *ierr=CTA_XML_Read((CTA_String)*hfname, (CTA_Tree*)hroot);
+}
+
+CTAEXPORT void CTA_XML_WRITE_F77(int *hfname, int *hroot, int *ierr) {
+   *ierr=CTA_XML_Write((CTA_String)*hfname, (CTA_Tree)*hroot);
+}
+
+
diff --git a/costa/native/cta/src/f_cta_defaults.c b/costa/native/cta/src/f_cta_defaults.c
new file mode 100644
index 000000000..da0dabcdb
--- /dev/null
+++ b/costa/native/cta/src/f_cta_defaults.c
@@ -0,0 +1,47 @@
+/*
+$URL: https://repos.deltares.nl/repos/openda/public/trunk/core/native/src/cta/f_cta_defaults.c $
+$Revision: 1400 $, $Date: 2010-03-18 16:03:08 +0100 (Thu, 18 Mar 2010) $
+
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+#include "cta_defaults.h"
+
+
+#define CTA_DEFAULT_VECTOR_F77   F77_CALL(cta_default_vector,CTA_DEFAULT_VECTOR_F)
+#define CTA_DEFAULT_SOBS_F77     F77_CALL(cta_default_sobs,CTA_DEFAULT_SOBS_F)
+#define CTA_OBSDESCR_TABLE_F77   F77_CALL(cta_obsdescr_table,CTA_OBSDESCR_TABLE_F)
+#define CTA_DEFAULT_MATRIX_F77   F77_CALL(cta_default_matrix,CTA_DEFAULT_MATRIX_F)
+#define CTA_MODBUILD_SP_F77      F77_CALL(cta_modbuild_sp,CTA_MODBUILD_SP_F)
+#define CTA_MODBUILD_PAR_F77     F77_CALL(cta_modbuild_par,CTA_MODBUILD_PAR_F)
+#define CTA_FILE_STDOUT_F77      F77_CALL(cta_file_stdout,CTA_FILE_STDOUT_F)
+#define CTA_MODELCOMBINER_F77    F77_CALL(cta_modelcombiner,CTA_MODELCOMBINER_F)
+#define CTA_MODBUILD_B3B_F77     F77_CALL(cta_modbuild_b3b,CTA_MODBUILD_B3B_F)
+
+CTAEXPORT int CTA_DEFAULT_VECTOR_F77(){return CTA_DEFAULT_VECTOR;};
+CTAEXPORT int CTA_DEFAULT_SOBS_F77()  {return CTA_DEFAULT_SOBS;};
+CTAEXPORT int CTA_OBSDESCR_TABLE_F77(){return CTA_OBSDESCR_TABLE;};
+CTAEXPORT int CTA_DEFAULT_MATRIX_F77(){return CTA_DEFAULT_MATRIX;};
+CTAEXPORT int CTA_MODBUILD_SP_F77()   {return CTA_MODBUILD_SP;};
+CTAEXPORT int CTA_MODBUILD_PAR_F77()  {return CTA_MODBUILD_PAR;};
+CTAEXPORT int CTA_FILE_STDOUT_F77()   {return CTA_FILE_STDOUT;};
+CTAEXPORT int CTA_MODELCOMBINER_F77() {return CTA_MODELCOMBINER;};
+CTAEXPORT int CTA_MODBUILD_B3B_F77()  {return CTA_MODBUILD_B3B;};
+
+
+
diff --git a/costa/native/cta/src/f_cta_utils.c b/costa/native/cta/src/f_cta_utils.c
new file mode 100644
index 000000000..5899b2847
--- /dev/null
+++ b/costa/native/cta/src/f_cta_utils.c
@@ -0,0 +1,62 @@
+/*
+$URL: https://repos.deltares.nl/repos/openda/public/trunk/core/native/src/cta/f_cta_utils.c $
+$Revision: 671 $, $Date: 2008-10-07 14:49:42 +0200 (Tue, 07 Oct 2008) $
+
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2005  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+#include "f_cta_utils.h"
+#include "ctai.h"
+
+/*! \brief Copies a Fortran-string into a zero terminated C-string
+
+    Traling blanks in the Fortran-string are not copied.
+    Note: it is the programmers responsibility that the C-string is long enough 
+    to hold the copy of the fortran sting.
+    \param f_str    I fortran string
+    \param c_str    O c-string, copy of fstr (is allocated in this function)
+    \param len_f    I length of Fortran string (hidden argument in Fortran)
+*/
+int CTA_fstr2cstr(char *f_str,char *c_str, int len_f){
+
+   int len_c; //lengt of the c equivalent of f_str
+
+   //determine "strlen"-length of fortran string
+   for (len_c=len_f-1;len_c>=0 && f_str[len_c]==' ';len_c--){};
+   //copy f-string into c-string
+   if (len_c>=0) {
+      strncpy(c_str,f_str,len_c+1);
+   }
+   //add "0"-character
+   c_str[len_c+1]='\0';
+   return CTA_OK;
+};
+
+int CTA_cstr2fstr(char *c_str,char *f_str, int len_f){
+   int len_cpy; //numver of characters that must/can be copied
+   int i;      // loopcouter over traling elements of f_str
+
+   // avoid overflow by checking length
+   len_cpy=MIN((int) strlen(c_str),len_f);
+   strncpy(f_str,c_str,len_cpy);
+   // put spaces in trailing elements of f_str
+   for (i=len_cpy;i<len_f;i++){
+      f_str[i]=' ';
+   }
+   return CTA_OK;
+};
diff --git a/costa/native/cta_f90/cta_f90_parameters.f90 b/costa/native/cta_f90/cta_f90_parameters.f90
new file mode 100644
index 000000000..d247090f0
--- /dev/null
+++ b/costa/native/cta_f90/cta_f90_parameters.f90
@@ -0,0 +1,25 @@
+module CTA_F90_Parameters
+
+  implicit none
+  
+
+  ! --- in/out -----------------------------------
+  
+  public
+
+
+  ! --- const ------------------------------------
+  
+  include 'cta_f90.inc'
+  
+  ! integer kind used for handles:
+  integer, parameter  :: CTA_HANDLE_IKIND = 4
+
+  ! Do not use the 'sizeof' function! Not standard ...
+  !integer, parameter  :: CTA_HANDLE_IKIND = sizeof(CTA_HANDLE)
+  
+  ! real kind used for time values:
+  integer, parameter  :: CTA_TIME_RKIND = 8
+  
+  
+end module CTA_F90_Parameters
diff --git a/costa/native/cta_f90/generated/cta_f90.f90 b/costa/native/cta_f90/generated/cta_f90.f90
new file mode 100644
index 000000000..785e682d9
--- /dev/null
+++ b/costa/native/cta_f90/generated/cta_f90.f90
@@ -0,0 +1,87 @@
+module cta_f90
+
+  use cta_f90_parameters
+  use cta_f90_interface
+  use cta_f90_datetime
+  use cta_f90_metainfo
+  use cta_f90_util_methods
+  use cta_f90_mem
+  use cta_f90_matrix
+  use cta_f90_string
+  use cta_f90_util_statistics
+  use cta_f90_functions
+  use cta_f90_resultwriter
+  use cta_f90_reltable
+  use cta_f90_flush_mod
+  use cta_f90_treevector
+  use cta_f90_pack
+  use cta_f90_util_sort
+  use cta_f90_datatypes
+  use cta_f90_xml
+  use cta_f90_model
+  use cta_f90_file
+  use cta_f90_par
+  use cta_f90_handles
+  use cta_f90_vector
+  use cta_f90_modbuild_par
+  use cta_f90_time
+  use cta_f90_sobs
+  use cta_f90_array
+  use cta_f90_model_utilities
+  use cta_f90_tree
+  use cta_f90_obsdescr
+  use cta_f90_initialise
+  use cta_f90_message
+  use cta_f90_method
+  use cta_f90_model_factory
+
+  implicit none
+
+  public
+
+contains
+
+  ! return error message given error code
+
+  function CTA_F90_StrError( ierr )
+
+    use CTA_F90_Parameters, only : CTA_OK
+    use CTA_F90_Parameters, only : CTA_ARRAY_TOO_SHORT
+    use CTA_F90_Parameters, only : CTA_ILLEGAL_DATATYPE
+    use CTA_F90_Parameters, only : CTA_DIMENSION_ERROR
+    use CTA_F90_Parameters, only : CTA_INCOMPATIBLE_VECTORS
+    use CTA_F90_Parameters, only : CTA_CONCAT_NOT_POSSIBLE
+    use CTA_F90_Parameters, only : CTA_SETVAL_NOT_POSSIBLE
+    use CTA_F90_Parameters, only : CTA_ITEM_NOT_FOUND
+    use CTA_F90_Parameters, only : CTA_UNINITIALISED_SUBSTATES
+    use CTA_F90_Parameters, only : CTA_STATES_NOT_COMPATIBLE
+    use CTA_F90_Parameters, only : CTA_INCOMPATIBLE_MATRICES
+    use CTA_F90_Parameters, only : CTA_NOT_IMPLEMENTED
+
+    ! --- in/out ---------------------------------
+
+    character(len=80)     ::  CTA_F90_StrError
+    integer, intent(in)   ::  ierr
+
+    ! --- begin ----------------------------------
+
+    select case ( ierr )
+      case ( CTA_OK                      ) ! ok, no message
+      case ( CTA_ARRAY_TOO_SHORT         ) ; CTA_F90_StrError = 'CTA - ERROR - Array too short'
+      case ( CTA_ILLEGAL_DATATYPE        ) ; CTA_F90_StrError = 'CTA - ERROR - Illegal datatype'
+      case ( CTA_DIMENSION_ERROR         ) ; CTA_F90_StrError = 'CTA - ERROR - Dimension error'
+      case ( CTA_INCOMPATIBLE_VECTORS    ) ; CTA_F90_StrError = 'CTA - ERROR - Incompatible vectors'
+      case ( CTA_CONCAT_NOT_POSSIBLE     ) ; CTA_F90_StrError = 'CTA - ERROR - Concat not possible'
+      case ( CTA_SETVAL_NOT_POSSIBLE     ) ; CTA_F90_StrError = 'CTA - ERROR - Setval not possible'
+      case ( CTA_ITEM_NOT_FOUND          ) ; CTA_F90_StrError = 'CTA - ERROR - Item not found'
+      case ( CTA_UNINITIALISED_SUBSTATES ) ; CTA_F90_StrError = 'CTA - ERROR - Uninitialised substates'
+      case ( CTA_STATES_NOT_COMPATIBLE   ) ; CTA_F90_StrError = 'CTA - ERROR - States not compatible'
+      case ( CTA_INCOMPATIBLE_MATRICES   ) ; CTA_F90_StrError = 'CTA - ERROR - Incompatible matrices'
+      case ( CTA_NOT_IMPLEMENTED         ) ; CTA_F90_StrError = 'CTA - ERROR - Not implemented'
+      case default
+        write (CTA_F90_StrError,'("CTA - ERROR - Unknown error code: ",i8)') ierr
+    end select
+
+  end function CTA_F90_StrError
+
+end module cta_f90
diff --git a/costa/native/cta_f90/generated/cta_f90_array.f90 b/costa/native/cta_f90/generated/cta_f90_array.f90
new file mode 100644
index 000000000..58e9c371f
--- /dev/null
+++ b/costa/native/cta_f90/generated/cta_f90_array.f90
@@ -0,0 +1,487 @@
+module cta_f90_array
+
+  implicit none
+
+  public
+
+  !  \brief Free Array instance
+  ! 
+  !  \param h IO handle of Array instance CTA_NULL on return
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Array_Free
+    subroutine CTA_Array_Free( h, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  h
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Array_Free
+  end interface
+
+  !  \brief Create a new Array instance and fill with given values
+  ! 
+  !  \param values      I values of new Array
+  !  \param nDimensions I Number of dimensions of new array
+  !  \param dimensions  I Number of elements in each dimension
+  !  \param h           O Array handle of new instance
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Array_CreateAsDoubles
+    subroutine CTA_Array_CreateAsDoubles( values, nDimensions, dimensions, h, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      real(8)                       , intent(in   )     ::  values(*)
+      integer                       , intent(in   )     ::  nDimensions
+      integer                       , intent(in   )     ::  dimensions(*)
+      integer(CTA_HANDLE_IKIND)     , intent(out  )     ::  h
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Array_CreateAsDoubles
+  end interface
+
+  !  \brief Get number of dimensions, eg 1 for a vector and 2 for a matrix
+  !  \param h           I Array handle
+  !   \param status O rank of the array
+  !
+  interface CTA_F90_Array_getNumberOfDimensions
+    subroutine CTA_Array_getNumberOfDimensions( h, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  h
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Array_getNumberOfDimensions
+  end interface
+
+  !  \brief Get the sizes of the individual dimensions
+  !  \param h           I Array handle
+  !  \param nDimensions O  size of array, eg 2 for the array [[3,4,10],[3,4,10]]
+  ! 
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Array_getnDimensions
+    subroutine CTA_Array_getnDimensions( h, nDimensions, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  h
+      integer                       , intent(out  )     ::  nDimensions
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Array_getnDimensions
+  end interface
+
+  !  \brief Get the size of the array for each dimension.
+  !  The length of the return value equals the number of dimensions.
+  ! 
+  !  \param h          I Array handle
+  !  \param dimensions O size of array, eg [3,4,10]
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Array_getDimensions
+    subroutine CTA_Array_getDimensions( h, dimensions, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  h
+      integer                       , intent(out  )     ::  dimensions
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Array_getDimensions
+  end interface
+
+  !  \brief Total number of elements in an array.
+  !  This is equal to the product of the values returned by getDimension and is thus just for c
+  ! 
+  !  \param h      I Array handle
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Array_length
+    subroutine CTA_Array_length( h, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  h
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Array_length
+  end interface
+
+  !  \brief Get all values of an array.
+  !  The values are guaranteed be a copy if
+  !  copyValues==true or may be a reference if copyValues==false.
+  !  Note that the values are never guaranteed to be a reference and can not be used
+  !  to change the array.
+  ! 
+  !  \param h      I Array handle
+  !  \param values O Values from the array
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Array_getValuesAsDoubles
+    subroutine CTA_Array_getValuesAsDoubles( h, values, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  h
+      real(8)                       , intent(out  )     ::  values
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Array_getValuesAsDoubles
+  end interface
+
+  !  \brief Get a part of the values from the array.
+  !  Eg let a=[[1,2,3][4,5,6]] then getValuesAsDoubles(1,2)
+  !  returns [2,3]
+  !  Note that negative values are allowed to denote counting from the end, is -1 denotes the last value.
+  !  \param h          I Array handle
+  !  \param firstIndex I firstIndex of selection
+  !  \param lastIndex  I lastIndex of selection
+  !  \param values     O Values from the array
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Array_getValuesAsDoubles_indexrange
+    subroutine CTA_Array_getValuesAsDoubles_indexrange( h, firstIndex, lastIndex, values, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  h
+      integer                       , intent(in   )     ::  firstIndex
+      integer                       , intent(in   )     ::  lastIndex
+      real(8)                       , intent(out  )     ::  values
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Array_getValuesAsDoubles_indexrange
+  end interface
+
+  !  \brief Get a value from an array for specific indices.
+  !  Eg let a=[[1,2,3][4,5,6]] then getValueAsDouble(5)
+  !  returns 6
+  !  Note that negative values are allowed to denote counting from the end, is -1 denotes the last value.
+  !  \param h       I Array handle
+  !  \param index   I index specifier
+  !  \param value   O double value at the specific indices
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Array_getValueAsDoubles_index
+    subroutine CTA_Array_getValueAsDoubles_index( h, index, value, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  h
+      integer                       , intent(in   )     ::  index
+      real(8)                       , intent(out  )     ::  value
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Array_getValueAsDoubles_index
+  end interface
+
+  !  \brief Get a value from an array for specific indices.
+  !  Eg let a=[[1,2,3][4,5,6]] then getValueAsDouble([1,2])
+  !  returns 6
+  !  Note that negative values are allowed to denote counting from the end, is -1 denotes the last value.
+  !  \param h        I Array handle
+  !  \param nIndices I length of array of indices indices
+  !  \param indices  I indices of value
+  !  \param value    O value from array
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Array_getValueAsDouble_indices
+    subroutine CTA_Array_getValueAsDouble_indices( h, nIndices, indices, value, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  h
+      integer                       , intent(in   )     ::  nIndices
+      integer                       , intent(in   )     ::  indices
+      real(8)                       , intent(out  )     ::  value
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Array_getValueAsDouble_indices
+  end interface
+
+  !  \brief Set the values of this array.
+  !  \param h      I Array handle
+  !  \param values I the values as an array of doubles
+  !  \param length I length of array values
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Array_setValuesAsDoubles
+    subroutine CTA_Array_setValuesAsDoubles( h, values, length, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  h
+      real(8)                       , intent(in   )     ::  values(*)
+      integer                       , intent(in   )     ::  length
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Array_setValuesAsDoubles
+  end interface
+
+  !  \brief Set a value from an array for specific indices.
+  !  Eg let a=[[1,2,3][4,5,6]] then setValueAsDouble([1,2],60.)
+  !  results in a=[[1,2,3][4,5,60.]]
+  !  Note that negative values are allowed to denote counting from the end, is -1 denotes the last value.
+  !  \param h         I Array handle
+  !  \param nIndices  I length of array indices
+  !  \param indices   I index specifier
+  !  \param value     I values at the specific indices
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Array_setValueAsDouble_indices
+    subroutine CTA_Array_setValueAsDouble_indices( h, nIndices, indices, value, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  h
+      integer                       , intent(in   )     ::  nIndices
+      integer                       , intent(in   )     ::  indices(*)
+      real(8)                       , intent(in   )     ::  value
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Array_setValueAsDouble_indices
+  end interface
+
+  !  \brief Set part of the values of this array.
+  !  Let a=[[1,2,3][4,5,6]] then setValuesAsDoubles(1,2,[20,30]) will result in a=[[1,20,30][4,5,6]]
+  !  Note that negative values are allowed to denote counting from the end, is -1 denotes the last value.
+  !  \param h          I Array handle
+  !  \param firstIndex I specifies the start of the selection
+  !  \param lastIndex  I specifies the end of the selection
+  !  \param values     I that will replace the selected range of numbers
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Array_setValuesAsDoubles_indexrange
+    subroutine CTA_Array_setValuesAsDoubles_indexrange( h, firstIndex, lastIndex, values, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  h
+      integer                       , intent(in   )     ::  firstIndex
+      integer                       , intent(in   )     ::  lastIndex
+      real(8)                       , intent(in   )     ::  values(*)
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Array_setValuesAsDoubles_indexrange
+  end interface
+
+  !  \brief Get a value from an array for specific indices.
+  !  Eg let a=[[1,2,3][4,5,6]] then setValueAsDouble(5,60.)
+  !  results in a=[[1,2,3][4,5,60.]]
+  !  Note that negative values are allowed to denote counting from the end, is -1 denotes the last value.
+  !  \param h          I Array handle
+  !  \param index      I index specifier
+  !  \param value      I value that will replace the selected number
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Array_setValueAsDouble_index
+    subroutine CTA_Array_setValueAsDouble_index( h, index, value, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  h
+      integer                       , intent(in   )     ::  index
+      real(8)                       , intent(in   )     ::  value
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Array_setValueAsDouble_index
+  end interface
+
+  !  \brief Set whole vector equal to a constant value.
+  !  Note:  This method can only be used if all elements of the vector
+  !  have the same data type.
+  !  \param h     I Array handle
+  !  \param value I value to set
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Array_setConstant
+    subroutine CTA_Array_setConstant( h, value, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  h
+      real(8)                       , intent(in   )     ::  value
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Array_setConstant
+  end interface
+
+  !  \brief Perform a values += alpha * axpyValues</c> operation on each value in this array.
+  !  \param h          I Array handle
+  !  \param alpha      I The <c>alpha</c> in <c>state variable += alpha * vector</c>.
+  !  \param axpyValues I the values for the axpy-operation on all values in this array.
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Array_axpyOnValues
+    subroutine CTA_Array_axpyOnValues( h, alpha, axpyValues, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  h
+      real(8)                       , intent(in   )     ::  alpha
+      real(8)                       , intent(in   )     ::  axpyValues(*)
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Array_axpyOnValues
+  end interface
+
+  !  \brief Multiply each value in this array with the corresponding multiplication factor.
+  !  \param h                     I Array handle
+  !  \param multiplicationFactors I the multiplication factors for all array values.
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Array_multiplyValues
+    subroutine CTA_Array_multiplyValues( h, multiplicationFactors, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  h
+      real(8)                       , intent(in   )     ::  multiplicationFactors(*)
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Array_multiplyValues
+  end interface
+
+  !  \brief Change the dimensions of an array. The new array should have the same length
+  !  \param h           I Array handle
+  !  \param nDimensions I length of array dimensions
+  !  \param dimensions  I new dimensions of array
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Array_reshape
+    subroutine CTA_Array_reshape( h, nDimensions, dimensions, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  h
+      integer                       , intent(in   )     ::  nDimensions
+      integer                       , intent(in   )     ::  dimensions(*)
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Array_reshape
+  end interface
+
+  !  \brief Get part of the array by selection of a subset in one dimension.
+  !  Eg. a=[[1,2,3],[4,5,6]] a.getSlice(0,0) returns [1,2,3]
+  !  Note that the number of dimensions IS reduced by one.
+  !  \param h           I Array handle
+  !  \param dimension   I Dimension to make selection in
+  !  \param index       I Element in dimension that is selected
+  !  \param h_out       O new Array with selected selection
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Array_getSlice
+    subroutine CTA_Array_getSlice( h, dimension, index, h_out, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  h
+      integer                       , intent(in   )     ::  dimension
+      integer                       , intent(in   )     ::  index
+      integer(CTA_HANDLE_IKIND)     , intent(out  )     ::  h_out
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Array_getSlice
+  end interface
+
+  !  \brief Get part of the array by selection of a subset in one dimension.
+  !  Eg. a=[[1,2,3],[4,5,6],[7,8,9]] a.getSlice(0,0,1) returns [[1,2],[3,4]]
+  !  Note that the number of dimensions is NOT reduced by one.
+  !  \param h           I Array handle
+  !  \param dimension   I Dimension to make selection in
+  !  \param minIndex    I start of range to select
+  !  \param maxIndex    I end of range to select
+  !  \param h_out       O new Array with selected selection
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Array_getSlice_range
+    subroutine CTA_Array_getSlice_range( h, dimension, minIndex, maxIndex, h_out, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  h
+      integer                       , intent(in   )     ::  dimension
+      integer                       , intent(in   )     ::  minIndex
+      integer                       , intent(in   )     ::  maxIndex
+      integer(CTA_HANDLE_IKIND)     , intent(out  )     ::  h_out
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Array_getSlice_range
+  end interface
+
+  !  \brief Get part of the array by selection of a subset in one dimension.
+  !  Eg. a=[[1,2,3],[4,5,6],[7,8,9]] a.getSlice(0,0,1) returns [1,2,3,4]
+  !  \param h           I Array handle
+  !  \param dimension   I Dimension to make selection in
+  !  \param minIndex    I start of range to select
+  !  \param maxIndex    I end of range to select
+  !  \param values      O selected values
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Array_getSliceAsDoubles_range
+    subroutine CTA_Array_getSliceAsDoubles_range( h, dimension, minIndex, maxIndex, values, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  h
+      integer                       , intent(in   )     ::  dimension
+      integer                       , intent(in   )     ::  minIndex
+      integer                       , intent(in   )     ::  maxIndex
+      real(8)                       , intent(out  )     ::  values
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Array_getSliceAsDoubles_range
+  end interface
+
+  !  \brief Set the values of a part of an array.
+  !  Eg. a=[[1,2,3],[4,5,6],[7,8,9]] and a.setSlice([11,12,13],1,1)
+  !  sets the second column a=[[1,11,3],[4,12,6],[7,13,9]]
+  !  Note that the dimension of the slice is one smaller than for the array.
+  !  \param h         I Array handle
+  !  \param slice     I Values to set
+  !  \param dimension I Dimension that is replaced
+  !  \param index     I Index of selected dimension to replace
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Array_setSliceAsDoubles
+    subroutine CTA_Array_setSliceAsDoubles( h, slice, dimension, index, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  h
+      real(8)                       , intent(in   )     ::  slice(*)
+      integer                       , intent(in   )     ::  dimension
+      integer                       , intent(in   )     ::  index
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Array_setSliceAsDoubles
+  end interface
+
+  !  \brief Set the values of a part of an array.
+  !  Eg. a=[[1,2,3],[4,5,6],[7,8,9]] and a.setSlice([11,12,13],1,1)
+  !  sets the second column a=[[1,11,3],[4,12,6],[7,13,9]]
+  !  Note that the dimension of the slice is one smaller than for the array.
+  !  \param h         I Array handle
+  !  \param slice_h   I Values to set
+  !  \param dimension I Dimension that is replaced
+  !  \param index     I Index of selected dimension to replace
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Array_setSliceAsArray
+    subroutine CTA_Array_setSliceAsArray( h, slice_h, dimension, index, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  h
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  slice_h
+      integer                       , intent(in   )     ::  dimension
+      integer                       , intent(in   )     ::  index
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Array_setSliceAsArray
+  end interface
+
+  !  \brief Set the values of a part of an array.
+  !  Eg. a=[[1,2,3],[4,5,6],[7,8,9]] and a.setSlice([[11,12,13],[14,15,16]],1,1,2)
+  !  sets the second and third columns a=[[1,11,14],[4,12,15],[7,13,16]]
+  !  Note that the dimension of the slice is the same as for the array.
+  !  \param h         I Array handle
+  !  \param slice_h   I Values to set
+  !  \param dimension I Dimension that is replaced
+  !  \param minIndex  I Start of range in Dimension to be replaced
+  !  \param maxIndex  I End of range in Dimension to be replaced
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Array_setSliceAsArray_range
+    subroutine CTA_Array_setSliceAsArray_range( h, slice_h, dimension, minIndex, maxIndex, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  h
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  slice_h
+      integer                       , intent(in   )     ::  dimension
+      integer                       , intent(in   )     ::  minIndex
+      integer                       , intent(in   )     ::  maxIndex
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Array_setSliceAsArray_range
+  end interface
+
+  !  \brief Set the values of a part of an array.
+  !  Eg. a=[[1,2,3],[4,5,6],[7,8,9]] and a.setSlice([11,12,13,14,15,16],1,1,2)
+  !  sets the second and third columns a=[[1,11,14],[4,12,15],[7,13,16]]
+  !  Note that the dimension of the slice is the same as for the array.
+  !  \param h         I Array handle
+  !  \param slice     I Values to set
+  !  \param dimension I Dimension that is replaced
+  !  \param minIndex  I Start of range in Dimension to be replaced
+  !  \param maxIndex  I End of range in Dimension to be replaced
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Array_setSliceAsDoubles_range
+    subroutine CTA_Array_setSliceAsDoubles_range( h, slice, dimension, minIndex, maxIndex, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  h
+      real(8)                       , intent(in   )     ::  slice(*)
+      integer                       , intent(in   )     ::  dimension
+      integer                       , intent(in   )     ::  minIndex
+      integer                       , intent(in   )     ::  maxIndex
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Array_setSliceAsDoubles_range
+  end interface
+
+  !  \brief Convert indices in multiple dimensions to position in the one-dimensional array as
+  !  returned eg by getValuesAsDoubles
+  !  Eg. a=[[1,2,3],[4,5,6],[7,8,9]] then valueIndex([1,0]) returns 3 which points to the value 4 here.
+  !  \param h        I Array handle
+  !  \param nIndices I length of indices
+  !  \param indices  I indices of element
+  !  \param index    O position of element
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Array_valueIndex
+    subroutine CTA_Array_valueIndex( h, nIndices, indices, index, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  h
+      integer                       , intent(in   )     ::  nIndices
+      integer                       , intent(in   )     ::  indices(*)
+      integer                       , intent(out  )     ::  index
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Array_valueIndex
+  end interface
+
+
+end module cta_f90_array
+
diff --git a/costa/native/cta_f90/generated/cta_f90_datatypes.f90 b/costa/native/cta_f90/generated/cta_f90_datatypes.f90
new file mode 100644
index 000000000..a8a49e53e
--- /dev/null
+++ b/costa/native/cta_f90/generated/cta_f90_datatypes.f90
@@ -0,0 +1,23 @@
+module cta_f90_datatypes
+
+  implicit none
+
+  public
+
+  !  \brief Get the result of the C-function sizeof for a COSTA datatype
+  ! 
+  !   \param datatype I COSTA data type
+  !   \param size     O receives result sizeof-function
+  !   \param status O CTA_OK if successful
+  !
+  interface CTA_F90_SizeOf
+    subroutine CTA_SizeOf( datatype, size, status )
+      integer                       , intent(in   )     ::  datatype
+      integer                       , intent(out  )     ::  size
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_SizeOf
+  end interface
+
+
+end module cta_f90_datatypes
+
diff --git a/costa/native/cta_f90/generated/cta_f90_datetime.f90 b/costa/native/cta_f90/generated/cta_f90_datetime.f90
new file mode 100644
index 000000000..ddbf8cc92
--- /dev/null
+++ b/costa/native/cta_f90/generated/cta_f90_datetime.f90
@@ -0,0 +1,150 @@
+module cta_f90_datetime
+
+  implicit none
+
+  public
+
+  !  \brief Julian day number from Gregorian date.
+  ! 
+  !  \param year    I  Year
+  !  \param month   I  Month
+  !  \param day     I  Day
+  !  \param hour    I  Hour
+  !  \param minute  I  Minute
+  !  \param second  I  Second
+  !  \param jd      O  Julian day number
+  ! 
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_DateTime_GregorianToJulian
+    subroutine CTA_DateTime_GregorianToJulian( year, month, day, hour, minute, second, jd, status )
+      use CTA_F90_Parameters, only : CTA_TIME_RKIND
+      integer                       , intent(in   )     ::  year
+      integer                       , intent(in   )     ::  month
+      integer                       , intent(in   )     ::  day
+      integer                       , intent(in   )     ::  hour
+      integer                       , intent(in   )     ::  minute
+      integer                       , intent(in   )     ::  second
+      real(CTA_TIME_RKIND)          , intent(out  )     ::  jd
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_DateTime_GregorianToJulian
+  end interface
+
+  !  \brief Modified Julian day number from Gregorian date.
+  ! 
+  !  \param year    I  Year
+  !  \param month   I  Month
+  !  \param day     I  Day
+  !  \param hour    I  Hour
+  !  \param minute  I  Minute
+  !  \param second  I  Second
+  !  \param mjd     O  Modified Julian day number
+  ! 
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_DateTime_GregorianToModifiedJulian
+    subroutine CTA_DateTime_GregorianToModifiedJulian( year, month, day, hour, minute, second, mjd, status )
+      use CTA_F90_Parameters, only : CTA_TIME_RKIND
+      integer                       , intent(in   )     ::  year
+      integer                       , intent(in   )     ::  month
+      integer                       , intent(in   )     ::  day
+      integer                       , intent(in   )     ::  hour
+      integer                       , intent(in   )     ::  minute
+      integer                       , intent(in   )     ::  second
+      real(CTA_TIME_RKIND)          , intent(out  )     ::  mjd
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_DateTime_GregorianToModifiedJulian
+  end interface
+
+  !  \brief Convert days into hours, minutes, and seconds.
+  ! 
+  !  \param days    I  Year
+  !  \param hour    O  Hour
+  !  \param minute  O  Minute
+  !  \param second  O  Second
+  ! 
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_DateTime_DaysToHMS
+    subroutine CTA_DateTime_DaysToHMS( days, hour, minute, second, status )
+      use CTA_F90_Parameters, only : CTA_TIME_RKIND
+      real(CTA_TIME_RKIND)          , intent(in   )     ::  days
+      integer                       , intent(out  )     ::  hour
+      integer                       , intent(out  )     ::  minute
+      integer                       , intent(out  )     ::  second
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_DateTime_DaysToHMS
+  end interface
+
+  !  \brief Julian day number from Modified Julian day number
+  ! 
+  !  \param mjd     I  Modified Julian day number
+  !  \param jd      O  Julian day number
+  ! 
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_DateTime_ModifiedJulianToJulian
+    subroutine CTA_DateTime_ModifiedJulianToJulian( mjd, jd, status )
+      use CTA_F90_Parameters, only : CTA_TIME_RKIND
+      real(CTA_TIME_RKIND)          , intent(in   )     ::  mjd
+      real(CTA_TIME_RKIND)          , intent(out  )     ::  jd
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_DateTime_ModifiedJulianToJulian
+  end interface
+
+  !  \brief Gregorian calendar date from Julian day number
+  ! 
+  !  \param jd      I  Julian day number
+  !  \param year    O  Year
+  !  \param month   O  Month
+  !  \param day     O  Day
+  !  \param hour    O  Hour
+  !  \param minute  O  Minute
+  !  \param second  O  Second
+  !  \param jd      O  Julian day number
+  ! 
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_DateTime_JulianToGregorian
+    subroutine CTA_DateTime_JulianToGregorian( jd, year, month, day, hour, minute, second, status )
+      use CTA_F90_Parameters, only : CTA_TIME_RKIND
+      real(CTA_TIME_RKIND)          , intent(out  )     ::  jd
+      integer                       , intent(out  )     ::  year
+      integer                       , intent(out  )     ::  month
+      integer                       , intent(out  )     ::  day
+      integer                       , intent(out  )     ::  hour
+      integer                       , intent(out  )     ::  minute
+      integer                       , intent(out  )     ::  second
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_DateTime_JulianToGregorian
+  end interface
+
+  !  \brief Gregorian calendar date from Modified Julian day number
+  ! 
+  !  \param mjd     I  Modified Julian day number
+  !  \param year    O  Year
+  !  \param month   O  Month
+  !  \param day     O  Day
+  !  \param hour    O  Hour
+  !  \param minute  O  Minute
+  !  \param second  O  Second
+  ! 
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_DateTime_ModifiedJulianToGregorian
+    subroutine CTA_DateTime_ModifiedJulianToGregorian( mjd, year, month, day, hour, minute, second, status )
+      use CTA_F90_Parameters, only : CTA_TIME_RKIND
+      real(CTA_TIME_RKIND)          , intent(in   )     ::  mjd
+      integer                       , intent(out  )     ::  year
+      integer                       , intent(out  )     ::  month
+      integer                       , intent(out  )     ::  day
+      integer                       , intent(out  )     ::  hour
+      integer                       , intent(out  )     ::  minute
+      integer                       , intent(out  )     ::  second
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_DateTime_ModifiedJulianToGregorian
+  end interface
+
+
+end module cta_f90_datetime
+
diff --git a/costa/native/cta_f90/generated/cta_f90_file.f90 b/costa/native/cta_f90/generated/cta_f90_file.f90
new file mode 100644
index 000000000..b7038defe
--- /dev/null
+++ b/costa/native/cta_f90/generated/cta_f90_file.f90
@@ -0,0 +1,149 @@
+module cta_f90_file
+
+  implicit none
+
+  public
+
+  !  \brief Create a new COSTA file
+  !    for holding a C file descriptor of a FORTRAN file LUN
+  ! 
+  !   \note This call does not open a file.
+  !    No FORTRAN support in this version
+  ! 
+  !   \param hfile  O  receives handle of created file
+  !   \param status O CTA_OK if successful
+  !
+  interface CTA_F90_File_Create
+    subroutine CTA_File_Create( hfile, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(out  )     ::  hfile
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_File_Create
+  end interface
+
+  !  \brief Free a new COSTA file-handle
+  ! 
+  !   \note The File is not closed (in this version)
+  ! 
+  !   \param hfile  IO  handle of COSTA file CTA_NULL on return
+  !   \param status O CTA_OK if successful
+  !
+  interface CTA_F90_File_Free
+    subroutine CTA_File_Free( hfile, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hfile
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_File_Free
+  end interface
+
+  !  \brief Get the C-file descriptor of the COSTA file
+  ! 
+  !   \note
+  ! 
+  !   \param hfile  I  handle of COSTA file-handle
+  !   \param file   O  receives file descriptor
+  !   \param status O CTA_OK if successful
+  !
+  interface CTA_F90_File_Get
+    subroutine CTA_File_Get( hfile, file, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hfile
+      integer                       , intent(out  )     ::  file
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_File_Get
+  end interface
+
+  !  \brief Get the NETCDF file id of the COSTA file
+  ! 
+  !   \note
+  ! 
+  !   \param hfile  I  handle of COSTA file-handle
+  !   \param ncid   O  receives NETCDF file id
+  !   \param status O CTA_OK if successful
+  !
+  interface CTA_F90_File_GetNetcdf
+    subroutine CTA_File_GetNetcdf( hfile, ncid, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hfile
+      integer                       , intent(out  )     ::  ncid
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_File_GetNetcdf
+  end interface
+
+  !  \brief Set the C-file descriptor of the COSTA file
+  ! 
+  !   \note
+  ! 
+  !   \param hfile  IO  handle of COSTA file-handle
+  !   \param file   I   file descriptor
+  !   \param status O CTA_OK if successful
+  !
+  interface CTA_F90_File_Set
+    subroutine CTA_File_Set( hfile, file, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hfile
+      integer                       , intent(in   )     ::  file
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_File_Set
+  end interface
+
+  !  \brief Open a C-file and set descriptor
+  ! 
+  !   \note
+  ! 
+  !   \param hfile   I  provide a valid handle of COSTA file
+  !   \param sname   I  file path
+  !   \param smode   I  open-mode (see C fopen documentation)
+  !                     if CTA_NULL is provided, the file will be
+  !                     opened with read/write access (file pointer at begin
+  !                     of file)
+  !   \param status O CTA_OK if successful
+  !
+  interface CTA_F90_File_Open
+    subroutine CTA_File_Open( hfile, sname, smode, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hfile
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  sname
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  smode
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_File_Open
+  end interface
+
+  !  \brief check whether file is a NETCDF file
+  ! 
+  !   \param hfile    I  provide a valid handle of COSTA file
+  !   \param isnetcdf O  CTA_TRUE if file is NETCDF file CTA_FALSE otherwise
+  ! 
+  !   \param status O CTA_OK if successful
+  !
+  interface CTA_F90_File_IsNetcdf
+    subroutine CTA_File_IsNetcdf( hfile, isnetcdf, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hfile
+      integer                       , intent(out  )     ::  isnetcdf
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_File_IsNetcdf
+  end interface
+
+  !  \brief Write a string to file
+  ! 
+  !   \note
+  ! 
+  !   \param hfile   I  handle of COSTA file
+  !   \param str     I  string that must be written to file
+  !   \param eol     I  Add end of line, CTA_TRUE for adding end of line or CTA_FALSE otherwise
+  !   \param status O CTA_OK if successful
+  !
+  interface CTA_F90_File_WriteStr
+    subroutine CTA_File_WriteStr( hfile, str, eol, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hfile
+      character(len=*)              , intent(in   )     ::  str(*)
+      integer                       , intent(in   )     ::  eol
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_File_WriteStr
+  end interface
+
+
+end module cta_f90_file
+
diff --git a/costa/native/cta_f90/generated/cta_f90_flush_mod.f90 b/costa/native/cta_f90/generated/cta_f90_flush_mod.f90
new file mode 100644
index 000000000..090d6798b
--- /dev/null
+++ b/costa/native/cta_f90/generated/cta_f90_flush_mod.f90
@@ -0,0 +1,22 @@
+module cta_f90_flush_mod
+
+  implicit none
+
+  public
+
+  !  \brief Flushes STDOUT and STDERR both for C as FORTRAN
+  ! 
+  !   \note Function is introduced because some FORTRAN compilers use
+  !   different flushing than the C compiler. As a result the order of output
+  !   generated by both languages is "random". This routine makes sure that
+  !   all buffers have been flushed.
+  ! 
+  !
+  interface CTA_F90_Flush
+    subroutine CTA_Flush(  )
+    end subroutine CTA_Flush
+  end interface
+
+
+end module cta_f90_flush_mod
+
diff --git a/costa/native/cta_f90/generated/cta_f90_functions.f90 b/costa/native/cta_f90/generated/cta_f90_functions.f90
new file mode 100644
index 000000000..b9e9a8ae9
--- /dev/null
+++ b/costa/native/cta_f90/generated/cta_f90_functions.f90
@@ -0,0 +1,145 @@
+module cta_f90_functions
+
+  implicit none
+
+  public
+
+  !  \brief Create a new COSTA function.
+  ! 
+  !   \note Argument name is only used for debugging and output.
+  ! 
+  !   \param name     I  name of the new function for debugging purposes
+  !   \param function I  pointer to function that has to be associated
+  !                      with new COSTA function
+  !   \param hintf    I  handle of associated interface
+  !   \param hfunc    O  receives handle of created COSTA function
+  !   \param status O CTA_OK if successful
+  !
+  interface CTA_F90_Func_Create
+    subroutine CTA_Func_Create( name, function, hintf, hfunc, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      character(len=*)              , intent(in   )     ::  name
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  function(*)
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hintf
+      integer(CTA_HANDLE_IKIND)     , intent(out  )     ::  hfunc
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Func_Create
+  end interface
+
+  !  \brief Duplicates a user defined function
+  ! 
+  !  \param hfunc      I COSTA user function handle
+  !  \param hdupl      I duplication of hfunc
+  !  \param status O error status: CTA_OK
+  !
+  interface CTA_F90_Func_Duplicate
+    subroutine CTA_Func_Duplicate( hfunc, hdupl, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hfunc
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hdupl(*)
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Func_Duplicate
+  end interface
+
+  !  \brief Free a new COSTA function.
+  ! 
+  !   \note
+  ! 
+  !   \param hfunc  IO handle of COSTA function, replaced by CTA_NULL on return
+  !   \param status O CTA_OK if sucessful
+  !
+  interface CTA_F90_Func_Free
+    subroutine CTA_Func_Free( hfunc, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hfunc
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Func_Free
+  end interface
+
+  !  \brief Get interface of COSTA function.
+  ! 
+  !   \note For performance reasons, the interface is not a copy but a handle
+  !         to the actual interface, it should NOT be freed by the calling routine!
+  ! 
+  !   \param hfunc  I  handle of COSTA function
+  !   \param hintf  O  receives handle of interface of function
+  !   \param status O CTA_OK if successful
+  !
+  interface CTA_F90_Func_GetIntf
+    subroutine CTA_Func_GetIntf( hfunc, hintf, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hfunc
+      integer(CTA_HANDLE_IKIND)     , intent(out  )     ::  hintf
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Func_GetIntf
+  end interface
+
+  !  \brief Get function pointer of function
+  ! 
+  !   \note There is no FORTRAN verion of this function available
+  ! 
+  !   \param hfunc     I  handle of COSTA function.
+  !   \param function  O  receives pointer to function
+  !   \param status O CTA_OK if successful
+  !
+  interface CTA_F90_Func_GetFunc
+    subroutine CTA_Func_GetFunc( hfunc, function, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hfunc
+      integer(CTA_HANDLE_IKIND)     , intent(out  )     ::  function
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Func_GetFunc
+  end interface
+
+  !  \brief Get name of function
+  ! 
+  !   \note Future versions will return a COSTA string handle.
+  ! 
+  !   \param hfunc     I  handle of COSTA function.
+  !   \param name      O  handle of string object that is to receive function name, must exist before calling
+  !   \param status O CTA_OK if successful
+  !
+  interface CTA_F90_Func_GetName
+    subroutine CTA_Func_GetName( hfunc, name, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hfunc
+      integer(CTA_HANDLE_IKIND)     , intent(out  )     ::  name
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Func_GetName
+  end interface
+
+  !  \brief Set userdata of function
+  ! 
+  !   \note Frees existing user data and replaces it with userdata
+  ! 
+  !   \param hfunc     IO handle of COSTA function.
+  !   \param userdata  I  new userdata handles
+  !   \param status O CTA_OK if successful
+  !
+  interface CTA_F90_Func_SetUserdata
+    subroutine CTA_Func_SetUserdata( hfunc, userdata, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hfunc
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  userdata
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Func_SetUserdata
+  end interface
+
+  !  \brief Get userdata of function
+  ! 
+  !   \param hfunc     I  handle of COSTA function.
+  !   \param userdata  O  userdata handle
+  !   \param status O CTA_OK if successful
+  !
+  interface CTA_F90_Func_GetUserdata
+    subroutine CTA_Func_GetUserdata( hfunc, userdata, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hfunc
+      integer(CTA_HANDLE_IKIND)     , intent(out  )     ::  userdata
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Func_GetUserdata
+  end interface
+
+
+end module cta_f90_functions
+
diff --git a/costa/native/cta_f90/generated/cta_f90_handles.f90 b/costa/native/cta_f90/generated/cta_f90_handles.f90
new file mode 100644
index 000000000..60e232b56
--- /dev/null
+++ b/costa/native/cta_f90/generated/cta_f90_handles.f90
@@ -0,0 +1,223 @@
+module cta_f90_handles
+
+  implicit none
+
+  public
+
+  !  \brief Create a new COSTA handle
+  ! 
+  !  \param name     I  name associated with handle
+  !  \param datatype I  data type of handle
+  !  \param data     I  block of data associated to handle
+  !  \param handle   O  receives COSTA handle
+  !  \param status O error status: CTA_OK
+  !
+  !interface CTA_F90_Handle_Create
+  !  subroutine CTA_Handle_Create( name, datatype, data, handle, status )
+  !    use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+  !    character(len=*)              , intent(in   )     ::  name
+  !    integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  datatype
+  !    void                          , intent(in   )     ::  data(*)
+  !    integer(CTA_HANDLE_IKIND)     , intent(out  )     ::  handle
+  !    integer                       , intent(out  )     ::  status
+  !  end subroutine CTA_Handle_Create
+  !end interface
+
+  !  \brief Free a COSTA handle
+  ! 
+  !   \note The data part of the handle is NOT freed.
+  ! 
+  !   \param handle IO handle that is to be freed, replaced by CTA_NULL on return.
+  !   \param status O error status: CTA_OK, CTA_ILLEGAL_HANDLE
+  !
+  interface CTA_F90_Handle_Free
+    subroutine CTA_Handle_Free( handle, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  handle
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Handle_Free
+  end interface
+
+  !  \brief Free a COSTA handle
+  ! 
+  !   \note Calls datatype-specific free methods, if available.
+  ! 
+  !   \param handle IO handle that must be freed, replaced by CTA_NULL on return.
+  !   \param status O error status: CTA_OK, CTA_ILLEGAL_HANDLE
+  !
+  interface CTA_F90_Handle_Free_All
+    subroutine CTA_Handle_Free_All( handle, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  handle
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Handle_Free_All
+  end interface
+
+  !  \brief Check whether a handle is valid and checks type
+  ! 
+  !   \note The handle CTA_NULL is not valid.
+  ! 
+  !  \param handle   I  COSTA handle
+  !  \param datatype I  data type to compare handle with
+  !  \param status O error status: CTA_OK, CTA_ILLEGAL_HANDLE, CTA_INCOMPATIBLE_HANDLE
+  ! 
+  !
+  interface CTA_F90_Handle_Check
+    subroutine CTA_Handle_Check( handle, datatype, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  handle
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  datatype
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Handle_Check
+  end interface
+
+  !  \brief Get pointer to data element of handle
+  ! 
+  !   \param handle I  COSTA handle
+  !   \param data   O  receives pointer to data element
+  !   \param status O error status: CTA_OK, CTA_ILLEGAL_HANDLE
+  !
+  !interface CTA_F90_Handle_GetData
+  !  subroutine CTA_Handle_GetData( handle, data, status )
+  !    use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+  !    integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  handle
+  !    void                          , intent(out  )     ::  data
+  !    integer                       , intent(out  )     ::  status
+  !  end subroutine CTA_Handle_GetData
+  !end interface
+
+  !  \brief Get the value the handle points to
+  ! 
+  !   \param handle 	I  COSTA handle
+  !   \param value   	O  receives pointer to data element
+  !   \param datatype     I  specify the data type of *value, must be the same as data type of handle
+  !   \param status O error status: CTA_OK, CTA_ILLEGAL_HANDLE, CTA_INCOMPATIBLE_HANDLE
+  !
+  !interface CTA_F90_Handle_GetValue
+  !  subroutine CTA_Handle_GetValue( handle, value, datatype, status )
+  !    use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+  !    integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  handle
+  !    void                          , intent(out  )     ::  value
+  !    integer                       , intent(in   )     ::  datatype
+  !    integer                       , intent(out  )     ::  status
+  !  end subroutine CTA_Handle_GetValue
+  !end interface
+
+  !  \brief Get name associated with handle
+  ! 
+  !   \param handle   I  COSTA handle
+  !   \param hname    O  receives name of data type
+  !   \param status O error status: CTA_OK, CTA_ILLEGAL_HANDLE
+  !
+  interface CTA_F90_Handle_GetName
+    subroutine CTA_Handle_GetName( handle, hname, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  handle
+      integer(CTA_HANDLE_IKIND)     , intent(out  )     ::  hname
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Handle_GetName
+  end interface
+
+  !  \brief Get data type associated with handle
+  ! 
+  !   \param handle   I  COSTA handle
+  !   \param datatype O  receives data type of handle
+  !   \param status O error status: CTA_OK, CTA_ILLEGAL_HANDLE
+  !
+  interface CTA_F90_Handle_GetDatatype
+    subroutine CTA_Handle_GetDatatype( handle, datatype, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  handle
+      integer                       , intent(out  )     ::  datatype
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Handle_GetDatatype
+  end interface
+
+  !  \brief Find a handle by name and data type in the COSTA handle administration
+  ! 
+  !   \param sname    I  name of handle
+  !   \param datatype I  data type of handle
+  !   \param handlenr O  receives the handle (only if return value is CTA_OK)
+  !   \param status O error status: CTA_OK, CTA_HANDLE_NOT_FOUND
+  !
+  interface CTA_F90_Handle_Find
+    subroutine CTA_Handle_Find( sname, datatype, handlenr, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  sname
+      integer                       , intent(in   )     ::  datatype
+      integer                       , intent(out  )     ::  handlenr
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Handle_Find
+  end interface
+
+  !  \brief Print overview of all COSTA handles
+  ! 
+  !   \param status O error status: CTA_OK
+  !
+  interface CTA_F90_Handle_Printall
+    subroutine CTA_Handle_Printall( status )
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Handle_Printall
+  end interface
+
+  !  \brief Counts all handles sorts them by type and prints overview.
+  !  This function can be usefull for detecting memory leaks that are
+  !  the result of not freeing instances of COSTA objects costa objects.
+  ! 
+  !   \param location I String to indicate location of call
+  !   \param status O error status: CTA_OK
+  !
+  interface CTA_F90_Handle_PrintInfo
+    subroutine CTA_Handle_PrintInfo( location, status )
+      character(len=*)              , intent(in   )     ::  location
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Handle_PrintInfo
+  end interface
+
+  !  \brief Get the reference count of the handle
+  ! 
+  !   \param handle   I COSTA handle
+  !   \param refCount O reference count of handle
+  !   \param status O error status: CTA_OK
+  ! 
+  !
+  interface CTA_F90_Handle_GetRefCount
+    subroutine CTA_Handle_GetRefCount( handle, refCount, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  handle
+      integer                       , intent(out  )     ::  refCount
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Handle_GetRefCount
+  end interface
+
+  !  \brief Increase the reference count of the handle
+  ! 
+  !   \param handle   I COSTA handle
+  !   \param status O error status: CTA_OK
+  ! 
+  !
+  interface CTA_F90_Handle_IncRefCount
+    subroutine CTA_Handle_IncRefCount( handle, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  handle
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Handle_IncRefCount
+  end interface
+
+  !  \brief Decrease the reference count of the handle
+  ! 
+  !   \param handle   I COSTA handle
+  !   \param status O error status: CTA_OK
+  ! 
+  !
+  interface CTA_F90_Handle_DecrRefCount
+    subroutine CTA_Handle_DecrRefCount( handle, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  handle
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Handle_DecrRefCount
+  end interface
+
+
+end module cta_f90_handles
+
diff --git a/costa/native/cta_f90/generated/cta_f90_initialise.f90 b/costa/native/cta_f90/generated/cta_f90_initialise.f90
new file mode 100644
index 000000000..c481ce448
--- /dev/null
+++ b/costa/native/cta_f90/generated/cta_f90_initialise.f90
@@ -0,0 +1,27 @@
+module cta_f90_initialise
+
+  implicit none
+
+  public
+
+  !  \brief Initialize the COSTA environment
+  !   \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Core_Initialise
+    subroutine CTA_Core_Initialise( status )
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Core_Initialise
+  end interface
+
+  !  \brief Finalise the COSTA environment
+  !   \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Core_Finalise
+    subroutine CTA_Core_Finalise( status )
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Core_Finalise
+  end interface
+
+
+end module cta_f90_initialise
+
diff --git a/costa/native/cta_f90/generated/cta_f90_interface.f90 b/costa/native/cta_f90/generated/cta_f90_interface.f90
new file mode 100644
index 000000000..eed032e3b
--- /dev/null
+++ b/costa/native/cta_f90/generated/cta_f90_interface.f90
@@ -0,0 +1,109 @@
+module cta_f90_interface
+
+  implicit none
+
+  public
+
+  !  \brief Create a new interface
+  ! 
+  !  \param name     I  name of the new interface
+  !  \param argtyp   I  list with the data types of arguments
+  !  \param narg     I  number of arguments of interface
+  !  \param hintf    O  receives the new COSTA interface handle
+  !  \param status O error status: CTA_OK
+  !
+  interface CTA_F90_Intf_Create
+    subroutine CTA_Intf_Create( name, argtyp, narg, hintf, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      character(len=*)              , intent(in   )     ::  name
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  argtyp(*)
+      integer                       , intent(in   )     ::  narg
+      integer(CTA_HANDLE_IKIND)     , intent(out  )     ::  hintf
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Intf_Create
+  end interface
+
+  !  \brief Free an interface
+  ! 
+  !   \note Freeing CTA_NULL is allowed.
+  ! 
+  !   \param hintf  IO  handle of interface, replaced by CTA_NULL on return
+  !   \param status O error status: CTA_OK, CTA_ILLEGAL_HANDLE, CTA_INCOMPATIBLE_HANDLE
+  !
+  interface CTA_F90_Intf_Free
+    subroutine CTA_Intf_Free( hintf, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hintf
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Intf_Free
+  end interface
+
+  !  \brief Match two interfaces for compatibility argumentlist-argumentlist
+  ! 
+  !   \note Two interfaces are compatible if all arguments have the same datatype,
+  !         CTA_VOID is compatible with all other arguments except for CTA_FSTRING
+  ! 
+  !   \param argtyp1  I  list with the data types of arguments of first interface
+  !   \param narg1    I  number of arguments in first interface
+  !   \param argtyp2  I  list with the data types of arguments of second interface
+  !   \param narg2    I  number of arguments in second interface
+  !   \param flag     O  receives TRUE if interfaces are compatible FALSE ortherwise
+  !   \param status O error status: CTA_OK
+  !
+  interface CTA_F90_Intf_Match_aa
+    subroutine CTA_Intf_Match_aa( argtyp1, narg1, argtyp2, narg2, flag, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  argtyp1(*)
+      integer                       , intent(in   )     ::  narg1
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  argtyp2(*)
+      integer                       , intent(in   )     ::  narg2
+      integer                       , intent(out  )     ::  flag
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Intf_Match_aa
+  end interface
+
+  !  \brief Match two interfaces for compatibility handle-argumentlist
+  ! 
+  !   \note Two interfaces are compatible if all arguments have the same datatype,
+  !         CTA_VOID is compatible with all other arguments except for CTA_FSTRING
+  ! 
+  !   \param hintf1   I  handle of first interface
+  !   \param argtyp2  I  list with the data types of arguments of second interface
+  !   \param narg2    I  number of arguments in second interface
+  !   \param flag     O  receives TRUE if interfaces are compatible FALSE ortherwise
+  !   \param status O error status: CTA_OK, CTA_ILLEGAL_HANDLE, CTA_INCOMPATIBLE_HANDLE
+  !
+  interface CTA_F90_Intf_Match_ha
+    subroutine CTA_Intf_Match_ha( hintf1, argtyp2, narg2, flag, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hintf1
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  argtyp2(*)
+      integer                       , intent(in   )     ::  narg2
+      integer                       , intent(out  )     ::  flag
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Intf_Match_ha
+  end interface
+
+  !  \brief Match two interfaces for compatibility handle-handle
+  ! 
+  !   \note two interfaces are compatible if all arguments have the same datatype,
+  !         CTA_VOID is compatible with all other arguments except for CTA_FSTRING
+  ! 
+  !   \param hintf1   I  handle of first interface
+  !   \param hintf2   I  handle of second interface
+  !   \param flag     O  receives TRUE if interfaces are compatible FALSE ortherwise
+  !   \param status O error status: CTA_OK, CTA_ILLEGAL_HANDLE, CTA_INCOMPATIBLE_HANDLE
+  !
+  interface CTA_F90_Intf_Match_hh
+    subroutine CTA_Intf_Match_hh( hintf1, hintf2, flag, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hintf1
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hintf2
+      integer                       , intent(out  )     ::  flag
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Intf_Match_hh
+  end interface
+
+
+end module cta_f90_interface
+
diff --git a/costa/native/cta_f90/generated/cta_f90_matrix.f90 b/costa/native/cta_f90/generated/cta_f90_matrix.f90
new file mode 100644
index 000000000..2b67f46bd
--- /dev/null
+++ b/costa/native/cta_f90/generated/cta_f90_matrix.f90
@@ -0,0 +1,373 @@
+module cta_f90_matrix
+
+  implicit none
+
+  public
+
+  !  \brief Create a new class (=implementation) of a COSTA matrix component.
+  ! 
+  !  \param name     I  name of the new matrix class
+  !  \param h_func   I  COSTA function handles for functions that implement class,
+  !                     missing functions must have value CTA_NULL
+  !  \param hmatcl   O  receives handle of new matrix class
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Matrix_DefineClass
+    subroutine CTA_Matrix_DefineClass( name, h_func, hmatcl, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      character(len=*)              , intent(in   )     ::  name
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  h_func(*)
+      integer(CTA_HANDLE_IKIND)     , intent(out  )     ::  hmatcl
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Matrix_DefineClass
+  end interface
+
+  !  \brief Duplicate a matrix instance.
+  ! 
+  !  \note Only size, data type and class type are duplicated, the values are not
+  !        copied.
+  ! 
+  !  \param hmat1 I  handle of matrix to be duplicated
+  !  \param hmat2 O  receives handle of duplicate matrix, empty handle before calling
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Matrix_Duplicate
+    subroutine CTA_Matrix_Duplicate( hmat1, hmat2, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hmat1
+      integer(CTA_HANDLE_IKIND)     , intent(out  )     ::  hmat2
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Matrix_Duplicate
+  end interface
+
+  !  \brief Create a new matrix.
+  ! 
+  !  \note
+  ! 
+  !  \param hmatcl   I  matrix class of new matrix
+  !  \param m        I  number of rows
+  !  \param n        I  number of columns
+  !  \param datatype I  datatype of elements in matrix
+  !  \param userdata IO userdata for creation (depends on class)
+  !  \param hmat     O  receives handle of new matrix
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Matrix_Create
+    subroutine CTA_Matrix_Create( hmatcl, m, n, datatype, userdata, hmat, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hmatcl
+      integer                       , intent(in   )     ::  m
+      integer                       , intent(in   )     ::  n
+      integer                       , intent(in   )     ::  datatype
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  userdata
+      integer(CTA_HANDLE_IKIND)     , intent(out  )     ::  hmat
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Matrix_Create
+  end interface
+
+  !  \brief Get datatype of matrix
+  ! 
+  !  \note
+  ! 
+  !  \param hmat     I  handle of matrix
+  !  \param datatype O  receives data type of matrix
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Matrix_GetDatatype
+    subroutine CTA_Matrix_GetDatatype( hmat, datatype, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hmat
+      integer                       , intent(out  )     ::  datatype
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Matrix_GetDatatype
+  end interface
+
+  !  \brief Get copy of all values in matrix.
+  ! 
+  !  \note The elements in the matrix are returned column-wise
+  !        (FORTRAN matrix representation)
+  ! 
+  !  \param hmat     I  handle of matrix
+  !  \param vals     O  copy of values in matrix
+  !  \param m        I  number of rows of vals (must be the same as for the matrix)
+  !  \param n        I  number of columns of vals (must be the same as for the matrix)
+  !  \param datatype I  data type of *vals, must be the same as data type of matrix elements
+  !  \param status O error status: CTA_OK if successful
+  !
+  !interface CTA_F90_Matrix_GetVals
+  !  subroutine CTA_Matrix_GetVals( hmat, vals, m, n, datatype, status )
+  !    use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+  !    integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hmat
+  !    void                          , intent(out  )     ::  vals
+  !    integer                       , intent(in   )     ::  m
+  !    integer                       , intent(in   )     ::  n
+  !    integer                       , intent(in   )     ::  datatype
+  !    integer                       , intent(out  )     ::  status
+  !  end subroutine CTA_Matrix_GetVals
+  !end interface
+
+  !  \brief Get copy of single value in the matrix.
+  ! 
+  !  \note Counting of the indices starts from 1.
+  !  \param hmat     I  handle of matrix
+  !  \param val      O  receives copy of value in matrix
+  !  \param m        I  row index of value to be copied
+  !  \param n        I  column index of value to be copied
+  !  \param datatype I  data type of *val, must be the same as data type of matrix elements
+  !  \param status O error status: CTA_OK if successful
+  !
+  !interface CTA_F90_Matrix_GetVal
+  !  subroutine CTA_Matrix_GetVal( hmat, val, m, n, datatype, status )
+  !    use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+  !    integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hmat
+  !    void                          , intent(out  )     ::  val
+  !    integer                       , intent(in   )     ::  m
+  !    integer                       , intent(in   )     ::  n
+  !    integer                       , intent(in   )     ::  datatype
+  !    integer                       , intent(out  )     ::  status
+  !  end subroutine CTA_Matrix_GetVal
+  !end interface
+
+  !  \brief Set whole matrix to one single value.
+  ! 
+  !  \note
+  !  \param hmat     IO handle of matrix
+  !  \param val      I  value that must be set
+  !  \param datatype I  data type of *val, must be the same as data type of matrix elements
+  !  \param status O error status: CTA_OK if successful
+  !
+  !interface CTA_F90_Matrix_SetConstant
+  !  subroutine CTA_Matrix_SetConstant( hmat, val, datatype, status )
+  !    use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+  !    integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hmat
+  !    void                          , intent(in   )     ::  val(*)
+  !    integer                       , intent(in   )     ::  datatype
+  !    integer                       , intent(out  )     ::  status
+  !  end subroutine CTA_Matrix_SetConstant
+  !end interface
+
+  !  \brief Set values of a single column of the matrix.
+  ! 
+  !  \note Counting of the indices starts from 1.
+  !  \param hmat     IO handle of matrix
+  !  \param n        I  index of matrix column to set
+  !  \param hvec     I  handle of COSTA vector the values have to be set to, length must be the same as number of rows of matrix
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Matrix_SetCol
+    subroutine CTA_Matrix_SetCol( hmat, n, hvec, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hmat
+      integer                       , intent(in   )     ::  n
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hvec
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Matrix_SetCol
+  end interface
+
+  !  \brief Set all values of the matrix.
+  ! 
+  !  \note The elements in vals should be column-wise
+  !        (FORTRAN matrix representation)
+  !  \param hmat     IO handle of matrix
+  !  \param vals     I  copy of values in matrix
+  !  \param m        I  number of rows of vals (must be the same as for the matrix)
+  !  \param n        I  number of columns of vals (must be the same as for the matrix)
+  !  \param datatype I  data type of vals
+  !  \param status O error status: CTA_OK if successful
+  !
+  !interface CTA_F90_Matrix_SetVals
+  !  subroutine CTA_Matrix_SetVals( hmat, vals, m, n, datatype, status )
+  !    use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+  !    integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hmat
+  !    void                          , intent(in   )     ::  vals(*)
+  !    integer                       , intent(in   )     ::  m
+  !    integer                       , intent(in   )     ::  n
+  !    integer                       , intent(in   )     ::  datatype
+  !    integer                       , intent(out  )     ::  status
+  !  end subroutine CTA_Matrix_SetVals
+  !end interface
+
+  !  \brief Set a single value in the matrix.
+  ! 
+  !  \param hmat     IO handle of matrix
+  !  \param val      I  value to be set at position (m,n)
+  !  \param m        I  row index
+  !  \param n        I  column index
+  !  \param datatype I  data type of *val, must be the same as data type of matrix elements
+  !  \param status O error status: CTA_OK if successful
+  !
+  !interface CTA_F90_Matrix_SetVal
+  !  subroutine CTA_Matrix_SetVal( hmat, val, m, n, datatype, status )
+  !    use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+  !    integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hmat
+  !    void                          , intent(in   )     ::  val(*)
+  !    integer                       , intent(in   )     ::  m
+  !    integer                       , intent(in   )     ::  n
+  !    integer                       , intent(in   )     ::  datatype
+  !    integer                       , intent(out  )     ::  status
+  !  end subroutine CTA_Matrix_SetVal
+  !end interface
+
+  !  \brief Export a matrix.
+  ! 
+  !  \note CTA_DEFAULT_MATRIX supports exporting to:\n
+  !        file (usedoc is handle of COSTA file)\n
+  ! 
+  !  \param hmat     I  handle of matrix
+  !  \param usedoc   I  configuration of output
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Matrix_Export
+    subroutine CTA_Matrix_Export( hmat, usedoc, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hmat
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  usedoc
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Matrix_Export
+  end interface
+
+  !  \brief Perform a rank 1 operation A:=alpha*x*y'+A
+  ! 
+  !  \param hmat     IO handle of matrix A
+  !  \param alpha    I  scalar
+  !  \param vx       I  vector x
+  !  \param vy       I  vector y
+  !  \param status O error status: CTA_OK if successful
+  !  \note it is allowed that are the same object (*vx==*vy)
+  !
+  interface CTA_F90_Matrix_Ger
+    subroutine CTA_Matrix_Ger( hmat, alpha, vx, vy, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hmat
+      real(8)                       , intent(in   )     ::  alpha
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  vx
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  vy
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Matrix_Ger
+  end interface
+
+  !  \brief Compute inverse of a square matrix A:=inv(A)
+  ! 
+  !  \param hmat     IO handle of matrix A
+  !  \param status O error status: CTA_OK if successful
+  ! 
+  !
+  interface CTA_F90_Matrix_Inv
+    subroutine CTA_Matrix_Inv( hmat, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hmat
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Matrix_Inv
+  end interface
+
+  !  \brief Perform the matrix multiplication y:=alpha*OP(A)*x+beta*y
+  !    where op(X)=X, X^T
+  ! 
+  !  \param hmat     I  handle of matrix (A from the equation above)
+  !  \param trans    I  transpose flag CTA_TRUE/CTA_FALSE for matrix A
+  !  \param alpha    I  scalar
+  !  \param vx       I  vector x
+  !  \param beta     I  scalar
+  !  \param vy       IO vector y
+  !  \param status O error status: CTA_OK if successful
+  !  \note it is allowed that vectors are the same object (*vx==*vy)
+  !
+  interface CTA_F90_Matrix_Gemv
+    subroutine CTA_Matrix_Gemv( hmat, trans, alpha, vx, beta, vy, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hmat
+      integer                       , intent(in   )     ::  trans
+      real(8)                       , intent(in   )     ::  alpha
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  vx
+      real(8)                       , intent(in   )     ::  beta
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  vy
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Matrix_Gemv
+  end interface
+
+  !  \brief Perform the matrix multication C:=alpha*op(A)*op(B)+beta*C
+  ! where op(X)=X, X^T
+  ! 
+  !   \param mC     IO handle of matrix C
+  !   \param transa I  transpose flag CTA_TRUE/CTA_FALSE for matrix A
+  !   \param transb I  transpose flag CTA_TRUE/CTA_FALSE for matrix A
+  !   \param alpha  I  scalar
+  !   \param mA     I  handle of matrix A
+  !   \param mB     I  handle of matrix B
+  !   \param beta   I  scalar
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Matrix_Gemm
+    subroutine CTA_Matrix_Gemm( mC, transa, transb, alpha, mA, mB, beta, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  mC
+      integer                       , intent(in   )     ::  transa
+      integer                       , intent(in   )     ::  transb
+      real(8)                       , intent(in   )     ::  alpha
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  mA
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  mB
+      real(8)                       , intent(in   )     ::  beta
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Matrix_Gemm
+  end interface
+
+  !  \brief Perform the matrix addition Y:=alpha*X+Y
+  ! 
+  !   \param mY     IO handle of matrix Y
+  !   \param alpha  I  scalar
+  !   \param mX     I  handle of matrix X
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Matrix_Axpy
+    subroutine CTA_Matrix_Axpy( mY, alpha, mX, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  mY
+      real(8)                       , intent(in   )     ::  alpha
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  mX
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Matrix_Axpy
+  end interface
+
+  !  \brief Computes the eigenvalues and optionally the eigenvectors
+  !          of a general matrix A
+  ! 
+  !   The computed eigenvectors are normalized to have Euclidean norm
+  !   equal to 1 and largest component real.
+  ! 
+  !   \param A     I Matrix A
+  !   \param eigvals  O Vector with eigenvalues
+  !   \param eigvecs  O Matrix with eigenvalues. The eigenvectors are
+  !                     not computed when  eigvecs in CTA_NULL on entry
+  !  \param status O error status: CTA_OK if successful
+  !  \note the eigenvalues can be complex. Since COSTA does not yet support
+  !        complex vectors, only the real part of the eigenvalues is
+  !        returned.
+  !
+  interface CTA_F90_Matrix_EigVals
+    subroutine CTA_Matrix_EigVals( A, eigvals, eigvecs, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  A
+      integer(CTA_HANDLE_IKIND)     , intent(out  )     ::  eigvals
+      integer(CTA_HANDLE_IKIND)     , intent(out  )     ::  eigvecs
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Matrix_EigVals
+  end interface
+
+  !  \brief Free the matrix object
+  ! 
+  !  \Note hmat=CTA_NULL is allowed
+  ! 
+  !  \param hmat     IO handle of matrix, replaced by CTA_NULL on return.
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Matrix_Free
+    subroutine CTA_Matrix_Free( hmat, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hmat
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Matrix_Free
+  end interface
+
+
+end module cta_f90_matrix
+
diff --git a/costa/native/cta_f90/generated/cta_f90_mem.f90 b/costa/native/cta_f90/generated/cta_f90_mem.f90
new file mode 100644
index 000000000..dd05e3fd8
--- /dev/null
+++ b/costa/native/cta_f90/generated/cta_f90_mem.f90
@@ -0,0 +1,19 @@
+module cta_f90_mem
+
+  implicit none
+
+  public
+
+  !  \brief Free memory that has been allocated with CTA_Malloc or CTA_Realloc
+  ! 
+  !  \param ptr     I  pointer to memory block to deallocate
+  !
+  !interface CTA_F90_Free
+  !  subroutine CTA_Free( ptr )
+  !    void                          , intent(in   )     ::  ptr(*)
+  !  end subroutine CTA_Free
+  !end interface
+
+
+end module cta_f90_mem
+
diff --git a/costa/native/cta_f90/generated/cta_f90_message.f90 b/costa/native/cta_f90/generated/cta_f90_message.f90
new file mode 100644
index 000000000..75eb035a2
--- /dev/null
+++ b/costa/native/cta_f90/generated/cta_f90_message.f90
@@ -0,0 +1,60 @@
+module cta_f90_message
+
+  implicit none
+
+  public
+
+  !  \brief Write a message
+  ! 
+  !  \param className I name of class that writes the message
+  !  \param method    I name of the method that writes the message
+  !  \param message   I message
+  !  \param type      I type of message
+  !                     -'M':message
+  !                     -'I':Info
+  !                     -'W':Warning
+  !                     -'E':Error
+  !                     -'F':Fatal error  (will terminate application)
+  !
+  interface CTA_F90_Message_Write
+    subroutine CTA_Message_Write( className, method, message, type )
+      character(len=*)              , intent(in   )     ::  className
+      character(len=*)              , intent(in   )     ::  method
+      character(len=*)              , intent(in   )     ::  message
+      character(len=*)              , intent(in   )     ::  type
+    end subroutine CTA_Message_Write
+  end interface
+
+  !  \brief Set an external writer for handling messages
+  ! 
+  !   An external writer must comply to the following C interface:
+  ! 
+  !   void my_writer(char *className, char *method, char *message, char type);
+  ! 
+  !  \param externalWriter I External writer
+  ! 
+  ! 
+  !   \note Fortran writers are not yet supported
+  !
+  interface CTA_F90_Message_SetExternalWriter
+    subroutine CTA_Message_SetExternalWriter( externalWriter )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  externalWriter
+    end subroutine CTA_Message_SetExternalWriter
+  end interface
+
+  !  \brief Toggle message handler between quiet and normal mode.
+  !   in the quiet mode no messages are send (not even to external writers)
+  ! 
+  !  \param setting  I set message handler in quiet mode CTA_TRUE/CTA_FALSE
+  ! 
+  !
+  interface CTA_F90_Message_Quiet
+    subroutine CTA_Message_Quiet( setting )
+      integer                       , intent(in   )     ::  setting
+    end subroutine CTA_Message_Quiet
+  end interface
+
+
+end module cta_f90_message
+
diff --git a/costa/native/cta_f90/generated/cta_f90_metainfo.f90 b/costa/native/cta_f90/generated/cta_f90_metainfo.f90
new file mode 100644
index 000000000..199e86d3a
--- /dev/null
+++ b/costa/native/cta_f90/generated/cta_f90_metainfo.f90
@@ -0,0 +1,60 @@
+module cta_f90_metainfo
+
+  implicit none
+
+  public
+
+  !  \brief Create a new class (=implementation) of a COSTA observation description component.
+  ! 
+  !  \param name       I  name of the new observation description class
+  !  \param h_func     I  COSTA function handles for functions that implement class,
+  !                       missing functions must have value CTA_NULL
+  !  \param hobsdscrcl O  receives handle of new observation description class
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Metainfo_DefineClass
+    subroutine CTA_Metainfo_DefineClass( name, h_func, hobsdscrcl, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      character(len=*)              , intent(in   )     ::  name
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  h_func(*)
+      integer(CTA_HANDLE_IKIND)     , intent(out  )     ::  hobsdscrcl
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Metainfo_DefineClass
+  end interface
+
+  !  \brief Create a new observation description instance.
+  ! 
+  !  \param hsobscl   I  class of new observation description
+  !  \param usrdat    IO data of the stochastic observer for which
+  ! a descriptor is to be created
+  !  \param hobsdscr  O  receives handle of created observation description object
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Metainfo_Create
+    subroutine CTA_Metainfo_Create( hobsdscr, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(out  )     ::  hobsdscr
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Metainfo_Create
+  end interface
+
+  !  \brief Get properties/values that correspond to a given key.
+  ! 
+  !  \param hobsdscr   I  handle of observation description
+  !  \param Key        I  key for which the value is asked
+  !  \param Properties IO COSTA-vector that is to receive the values
+  !  \param datatype   I  data type of elements in properties vector, must be the same as of queried properties
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Metainfo_SetUnit
+    subroutine CTA_Metainfo_SetUnit( hobsdscr, Key, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hobsdscr
+      character(len=*)              , intent(in   )     ::  Key
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Metainfo_SetUnit
+  end interface
+
+
+end module cta_f90_metainfo
+
diff --git a/costa/native/cta_f90/generated/cta_f90_method.f90 b/costa/native/cta_f90/generated/cta_f90_method.f90
new file mode 100644
index 000000000..0217e2570
--- /dev/null
+++ b/costa/native/cta_f90/generated/cta_f90_method.f90
@@ -0,0 +1,71 @@
+module cta_f90_method
+
+  implicit none
+
+  public
+
+  !  \brief Create a new class (=implementation) of a COSTA method.
+  ! 
+  !  \param name     I  name of the new method class
+  !  \param h_func   I  COSTA function handles for functions that implement class,
+  !                     missing functions must have value CTA_NULL
+  !  \param hmethcl  O  receives handle of new method class
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Meth_DefineClass
+    subroutine CTA_Meth_DefineClass( name, h_func, hmethcl, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      character(len=*)              , intent(in   )     ::  name
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  h_func(*)
+      integer(CTA_HANDLE_IKIND)     , intent(out  )     ::  hmethcl
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Meth_DefineClass
+  end interface
+
+  !  \brief Create an instance of a method.
+  ! 
+  !  \param hmethcl   I  method class of new object
+  !  \param userdata  IO user data for creation (depends on class)
+  !  \param hmeth     O  receives handle of new method
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Meth_Create
+    subroutine CTA_Meth_Create( hmethcl, userdata, hmeth, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hmethcl
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  userdata
+      integer(CTA_HANDLE_IKIND)     , intent(out  )     ::  hmeth
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Meth_Create
+  end interface
+
+  !  \brief Run method.
+  !  \param hmeth     I  handle of method
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Meth_Run
+    subroutine CTA_Meth_Run( hmeth, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hmeth
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Meth_Run
+  end interface
+
+  !  \brief Free the method object.
+  ! 
+  !  \Note hmeth=CTA_NULL is allowed
+  ! 
+  !  \param hmeth  IO handle of method, replaced by CTA_NULL on return.
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Meth_Free
+    subroutine CTA_Meth_Free( hmeth, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hmeth
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Meth_Free
+  end interface
+
+
+end module cta_f90_method
+
diff --git a/costa/native/cta_f90/generated/cta_f90_modbuild_par.f90 b/costa/native/cta_f90/generated/cta_f90_modbuild_par.f90
new file mode 100644
index 000000000..b4c54053b
--- /dev/null
+++ b/costa/native/cta_f90/generated/cta_f90_modbuild_par.f90
@@ -0,0 +1,34 @@
+module cta_f90_modbuild_par
+
+  implicit none
+
+  public
+
+  !  \brief Create the model class of the Par-Modelbuilder and initilizes MPI
+  ! 
+  !  \note This is not a user function. It is called at initialization of the
+  !        COSTA environment.
+  ! 
+  !   \param modelcls  O  receives handle of the SP-modelbuilder class
+  !
+  interface CTA_F90_Modbuild_par_CreateClass
+    subroutine CTA_Modbuild_par_CreateClass( modelcls )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(out  )     ::  modelcls
+    end subroutine CTA_Modbuild_par_CreateClass
+  end interface
+
+  !  \brief Stop the model class of the Par-Modelbuilder and finalize MPI
+  ! 
+  !  \note This is not a user function. It is called at the finalization of the
+  !        COSTA environment.
+  ! 
+  !
+  interface CTA_F90_Modbuild_par_Finalize
+    subroutine CTA_Modbuild_par_Finalize(  )
+    end subroutine CTA_Modbuild_par_Finalize
+  end interface
+
+
+end module cta_f90_modbuild_par
+
diff --git a/costa/native/cta_f90/generated/cta_f90_model.f90 b/costa/native/cta_f90/generated/cta_f90_model.f90
new file mode 100644
index 000000000..5aeaa8fde
--- /dev/null
+++ b/costa/native/cta_f90/generated/cta_f90_model.f90
@@ -0,0 +1,618 @@
+module cta_f90_model
+
+  implicit none
+
+  public
+
+  !  \brief Create a model instance
+  ! 
+  !  \param hmodcl   I  model class of new instance
+  !  \param userdata IO user data needed for creation (depends on modelclass)
+  !  \param hmodel   O  receives handle of new model instance
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Model_Create
+    subroutine CTA_Model_Create( hmodcl, userdata, hmodel, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hmodcl
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  userdata
+      integer(CTA_HANDLE_IKIND)     , intent(out  )     ::  hmodel
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Model_Create
+  end interface
+
+  !  \brief Compute model for given timespan
+  ! 
+  !  \param hmodel   IO handle of model instance
+  !  \param htime    I  timespan for which to compute
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Model_Compute
+    subroutine CTA_Model_Compute( hmodel, htime, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hmodel
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  htime
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Model_Compute
+  end interface
+
+  !  \brief Add noise during during the given timespan at
+  !         the Compute
+  ! 
+  !  \note Noise is added in the compute-method
+  !  \param hmodel   IO handle of model instance
+  !  \param htime    I  timespan for which to compute adding noise
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Model_AddNoise
+    subroutine CTA_Model_AddNoise( hmodel, htime, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hmodel
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  htime
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Model_AddNoise
+  end interface
+
+  !  \brief Set the internal state of the model.
+  ! 
+  !  \note A copy of the state is set
+  ! 
+  !  \param hmodel   IO handle of model instance
+  !  \param hstate   I  handle of new state
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Model_SetState
+    subroutine CTA_Model_SetState( hmodel, hstate, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hmodel
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hstate
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Model_SetState
+  end interface
+
+  !  \brief Get a copy of the internal state.
+  ! 
+  !  \note Optionally a tree-vector is created. In that case the caller of this
+  !  method is responsible for freeing that tree-vector. The input state must be compatible
+  !  (same size and or composition) as the models internal state.
+  !  \note If *hstate == CTA_NULL a new object is created, user is responsible for freeing this object.
+  ! 
+  !  \param hmodel   I  handle of model instance
+  !  \param hstate   IO receives state of the model, *hstate can be CTA_NULL on calling (see note)
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Model_GetState
+    subroutine CTA_Model_GetState( hmodel, hstate, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hmodel
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hstate
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Model_GetState
+  end interface
+
+  !  \brief Perform axpy operation on the internal state.
+  ! 
+  !  \note AXPY: y=alpha*x+y. y corresponds to the models
+  !        internal state and x can be a state vector or a model
+  ! 
+  !  \param hmodel   IO handle of model instance (y)
+  !  \param alpha    I  alpha
+  !  \param hx       I  handle of x (state or model)
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Model_AxpyState
+    subroutine CTA_Model_AxpyState( hmodel, alpha, hx, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hmodel
+      real(8)                       , intent(in   )     ::  alpha
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hx
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Model_AxpyState
+  end interface
+
+  !  \brief Get element-wise scaling for model state
+  ! 
+  !  The values in the state-vector are compared on "importance" in various
+  !  algorithms like RRSQRT and COFFEE. The model state holds in general
+  !  various quantities like concentration, velicity, location etc in
+  !  arbitrary units. The scaling vector (that can be model state dependend)
+  !  makes it possible to meaningfull compare elements in the state-vector
+  !  for importance. Various methods are available like a transformation to
+  !  enery.
+  ! 
+  !  The scaling vector represents a diagonal scaling matrix but is
+  !  respresented by a tree-vector.
+  ! 
+  !  \note The elementwise scaling is returned in the form of a tree-vector
+  !        with same build-up as the tree-vector of the model state. The scaling vector
+  !        is created whenever hscale==CTA_NULL on input, the caller is
+  !        responsible for freeing this object.
+  ! 
+  !  \param hmodel   I  handle of model instance
+  !  \param hscale   IO receives state scaling vector for the model state,
+  ! hstate can be CTA_NULL on calling (see note)
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Model_GetStateScaling
+    subroutine CTA_Model_GetStateScaling( hmodel, hscale, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hmodel
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hscale
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Model_GetStateScaling
+  end interface
+
+  !  \brief Set the models forcings.
+  ! 
+  !  \note Set the forcings (constant) for the given timespan.
+  !        The model will fall back to its own forcings definition
+  !        outside the given timespan.
+  ! 
+  !  \param hmodel   IO handle of model instance
+  !  \param tspan    I  time span on which to set the forcing values
+  !  \param hforc    I  handle of vector with new forcings
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Model_SetForc
+    subroutine CTA_Model_SetForc( hmodel, tspan, hforc, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hmodel
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  tspan
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hforc
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Model_SetForc
+  end interface
+
+  !  \brief Get a copy of the values of the models forcings
+  ! 
+  !  \note Optionally a tree-vector is created in that case the caller of this
+  !  method is responsible for freeing that tree-vector. The input tree-vector
+  !  must be compatible (same size and or composition) as the models
+  !  internal tree-vector representing the forcings.
+  !  If the forcings of the model are not constant for the given timespan
+  !  the result is dependent on the model-implementation
+  !  \note If *hforc == CTA_NULL a new object is created, user is responsible for freeing this object.
+  ! 
+  !  \param hmodel   I  handle of model instance
+  !  \param tspan    I  timespan for wich the given forcings are valid
+  !  \param hforc    IO receives models forcings, *hforc can be CTA_NULL on calling (see note)
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Model_GetForc
+    subroutine CTA_Model_GetForc( hmodel, tspan, hforc, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hmodel
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  tspan
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hforc
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Model_GetForc
+  end interface
+
+  !  \brief Perform axpy operation on the models forcings.
+  ! 
+  !  \note AXPY: y=alpha*x+y. y corresponds to the models
+  !        internal forcings.
+  !        The adjustment to the forcings (alpha*x) is only valid for the given
+  !        time span. Note that the model will use y(t)+x for the given time span
+  !        where y(t) denotes the default forcings of the model.
+  ! 
+  !  \param hmodel   IO handle of model instance (y)
+  !  \param tspan    I  time span for wich the given forcings are valid
+  !  \param alpha    I  scalar
+  !  \param hx       I  handle of forcings tree-vector x
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Model_AxpyForc
+    subroutine CTA_Model_AxpyForc( hmodel, tspan, alpha, hx, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hmodel
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  tspan
+      real(8)                       , intent(in   )     ::  alpha
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hx
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Model_AxpyForc
+  end interface
+
+  !  \brief Set parameters of the model.
+  ! 
+  !  \param hmodel   IO handle of model instance
+  !  \param hparam   I  handle of parameters vector
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Model_SetParam
+    subroutine CTA_Model_SetParam( hmodel, hparam, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hmodel
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hparam
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Model_SetParam
+  end interface
+
+  !  \brief Get a copy of the parameters of the model.
+  ! 
+  !  \note Optionally a tree-vector is created in that case the caller of this
+  !  method is responsible for freeing that tree-vector. The input tree-vector
+  !  must be compatible (same size and or composition) as the models
+  !  internal tree-vector representing the parameters.
+  !  \note If *hforc == CTA_NULL a new object is created, user is responsible for freeing this object.
+  ! 
+  !  \param hmodel   I  handle of model instance
+  !  \param hparam   IO receives model forcings, *hforc can equal CTA_NULL on calling (see note)
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Model_GetParam
+    subroutine CTA_Model_GetParam( hmodel, hparam, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hmodel
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hparam
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Model_GetParam
+  end interface
+
+  !  \brief Perform axpy operation on the models parameters.
+  ! 
+  !  \note AXPY: y=alpha*x+y where y corresponds to the models
+  !        internal parameters.
+  ! 
+  !  \param hmodel   IO handle of model instance (y)
+  !  \param alpha    I  alpha
+  !  \param hx       I  handle of treevector of parameters (x)
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Model_AxpyParam
+    subroutine CTA_Model_AxpyParam( hmodel, alpha, hx, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hmodel
+      real(8)                       , intent(in   )     ::  alpha
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hx
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Model_AxpyParam
+  end interface
+
+  !  \brief Return the timehorizon on the model.
+  !  The time horizon is the initial overal simulation span for which the mode is configured
+  ! 
+  !  \param hmodel   I handle of model instance
+  !  \param tHorizon I time horizon of model
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Model_GetTimeHorizon
+    subroutine CTA_Model_GetTimeHorizon( hmodel, tHorizon, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hmodel
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  tHorizon
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Model_GetTimeHorizon
+  end interface
+
+  !  \brief Return the current time of the model.
+  ! 
+  !  \param hmodel   I handle of model instance
+  !  \param tCurrent I time corresponding the the model state
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Model_GetCurrentTime
+    subroutine CTA_Model_GetCurrentTime( hmodel, tCurrent, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hmodel
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  tCurrent
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Model_GetCurrentTime
+  end interface
+
+  !  \brief Get covariance matrix of noise parameters.
+  ! 
+  !  \note ONLY for Stochastic models.
+  !        The covariance matrix is represented by an array
+  !        of tree-vectors (columns of the matrix)
+  !        optionally a tree-vector is created in that case the caller of this
+  !        method is responsible for freeing that tree-vector. The input tree-vector
+  !        must be compatible (same size and or composition) as the models
+  !        internal tree-vector.
+  !  \note If hstmat[icol] == CTA_NULL a new object is created, user is responsible for freeing this object.
+  ! 
+  !  \param hmodel   I  handle of model instance
+  !  \param hstmat   O  receives array of tree-vectors, *hstmat can equal CTA_NULL on calling (see note)
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Model_GetNoiseCovar
+    subroutine CTA_Model_GetNoiseCovar( hmodel, hstmat, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hmodel
+      integer(CTA_HANDLE_IKIND)     , intent(out  )     ::  hstmat
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Model_GetNoiseCovar
+  end interface
+
+  !  \brief Get number of noise parameters: the number of columns of the noise covariance matrix.
+  ! 
+  !  \note ONLY for Stochastic models.
+  ! 
+  !  \param hmodel   I  handle of model instance
+  !  \param nnoise   O  receives number of noise parameters
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Model_GetNoiseCount
+    subroutine CTA_Model_GetNoiseCount( hmodel, nnoise, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hmodel
+      integer                       , intent(out  )     ::  nnoise
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Model_GetNoiseCount
+  end interface
+
+  !  \brief Free model instance.
+  ! 
+  !  \note ONLY for Stochastic models.
+  ! 
+  !  \param hmodel   IO handle of model instance, replaced by CTA_NULL on return
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Model_Free
+    subroutine CTA_Model_Free( hmodel, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hmodel
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Model_Free
+  end interface
+
+  !  \brief Announce to the model what observations will be requested.
+  ! 
+  !   Before the compute method this method is used to announce what
+  !   obeservation will be requested after the CTA_Model_Compute using the
+  !   CTA_Model_GetObsvalues method.
+  ! 
+  !   For some simulation models it is more efficient to do a single simulation
+  !   (a single CTA_Model_Compute call) for a particular simulation span then
+  !   simulating the same simulation span in a number of steps (multiple
+  !   CTA_Model_Compute calls).
+  ! 
+  !   This method can be used to announce for what observations the model
+  !   must provide a prediction in advance. This method must be called prior
+  !   to the CTA_Compute method and makes it possible to perform simulations
+  !   over a longer time interval without the need to interupt the computations
+  !   in order to get the predictions at intermediate time instances.
+  ! 
+  !   Notes on the behavior of the method:
+  !   - The observation description used in the first CTA_Model_GetObsValues
+  !     after the compute MUST be the same as the observation description
+  !     used in the announce.
+  !   - All observations that are announced MUST be in the timespan of the
+  !     following CTA_Model_Compute.
+  !   - The announced observations can only be retreved ONCE after the
+  !     CTA_Model_Compute.
+  !   - A CTA_Model_SetState or CTA_Model_AxpyState will reset the announced
+  !     CTA_Model_AnnounceObsValues administration (since stored predictions
+  !     might not be valid anymore)
+  ! 
+  !  \param hmodel   I  handle of model instance
+  !  \param hdescr   I  observation description component
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Model_AnnounceObsValues
+    subroutine CTA_Model_AnnounceObsValues( hmodel, hdescr, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hmodel
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hdescr
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Model_AnnounceObsValues
+  end interface
+
+  !  \brief Get (interpolate) the models internal state to the
+  !    observations described as specified in the observation
+  !    description component.
+  ! 
+  !  \note The interface supports a the time instance for time-interpolation.
+  !        It depends on the model whether and how this is supported.
+  ! 
+  !  \param hmodel   I  handle of model instance
+  !  \param htime    I  time instance (for checking and time-interpolation if
+  !                     supported by model)
+  !  \param hdescr   I  observation description component
+  !  \param values   O  receives values of the models internal state corresponding to
+  !                     observations as described in hdescr
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Model_GetObsValues
+    subroutine CTA_Model_GetObsValues( hmodel, htime, hdescr, values, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hmodel
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  htime
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hdescr
+      integer(CTA_HANDLE_IKIND)     , intent(out  )     ::  values
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Model_GetObsValues
+  end interface
+
+  !  \brief Get for each observation a localization scaling vector
+  ! 
+  !  \param hmodel   I  handle of model instance
+  !  \param hdescr   I  observation description for which we want localization scaling vectors
+  !  \param distance I  characteristic distance
+  !  \param locVecs  O  costa vector of handles to treevectors (scaling vectors). The treevectors
+  !                     are created when the indices are CTA_NULL on entry
+  ! 
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Model_GetObsLocalization
+    subroutine CTA_Model_GetObsLocalization( hmodel, hdescr, distance, locVecs, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hmodel
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hdescr
+      real(8)                       , intent(in   )     ::  distance
+      integer(CTA_HANDLE_IKIND)     , intent(out  )     ::  locVecs
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Model_GetObsLocalization
+  end interface
+
+  !  \brief Get a query for the stochastic observer in order to
+  !   filter out the observations that can actually be provided by the model.
+  ! 
+  !  \param hmodel   I  handle of model instance
+  !  \param htime    I  time instance
+  !  \param hdescr   I  observation description component
+  !  \param sselect  O  receives a query to filter out the observations, must exist before calling
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Model_GetObsSelect
+    subroutine CTA_Model_GetObsSelect( hmodel, htime, hdescr, sselect, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hmodel
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  htime
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hdescr
+      integer(CTA_HANDLE_IKIND)     , intent(out  )     ::  sselect
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Model_GetObsSelect
+  end interface
+
+  !  \brief Export the whole internal state of a model
+  !   This export function will export the whole state of the model such that
+  !   a so called "restart" start from this point yielding the same results.
+  !   There are no ruled on the format that is used to store the data.
+  !   Various extra otions are valid but a model will in most cases support an export
+  !   to a file and to a COSTA pack object.
+  ! 
+  ! 
+  !  \param hmodel   I  handle of model instance
+  !  \param hexport  I  target for export e.g. CTA_File or CTA_Pack
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Model_Export
+    subroutine CTA_Model_Export( hmodel, hexport, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hmodel
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hexport
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Model_Export
+  end interface
+
+  !  \brief Import the whole internal state of a model
+  !   After the inport the models internal state is exactly the same as the point that
+  !   the export was created using CTA_Model_Export.
+  ! 
+  ! 
+  !  \param hmodel   I  handle of model instance
+  !  \param himport  I  handle with data created by CTA_MODEL_Export e.g. CTA_File or CTA_Pack
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Model_Import
+    subroutine CTA_Model_Import( hmodel, himport, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hmodel
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  himport
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Model_Import
+  end interface
+
+  !  \brief Get the number of domains for local analysis
+  ! 
+  !  \param hmodel   I  handle of model instance
+  !  \param distance I  characteristic distance
+  !  \param ndomains O  number of domains
+  !  \param locVecs  O  costa vector of handles to treevectors (scaling vectors). The treevectors
+  !                     are created when the indices are CTA_NULL on entry
+  ! 
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Model_GetNumDomains
+    subroutine CTA_Model_GetNumDomains( hmodel, distance, ndomains, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hmodel
+      real(8)                       , intent(in   )     ::  distance
+      integer                       , intent(out  )     ::  ndomains
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Model_GetNumDomains
+  end interface
+
+  !  \brief Get selection of observations that are relevnet for assimilation in the given domain
+  ! 
+  !  \param hmodel    I  handle of model instance
+  !  \param hdescr    I  observation description of all observations
+  !  \param distance  I  characteristic distance
+  !  \param idomain   I  domain number
+  !  \param selection O  costa vector with the indices of the relevant observations (0 based)
+  ! 
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Model_GetObsSelector
+    subroutine CTA_Model_GetObsSelector( hmodel, hdescr, distance, idomain, selection, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hmodel
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hdescr
+      real(8)                       , intent(in   )     ::  distance
+      integer                       , intent(in   )     ::  idomain
+      integer(CTA_HANDLE_IKIND)     , intent(out  )     ::  selection
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Model_GetObsSelector
+  end interface
+
+  !  \brief Get for each observation a localization scaling vector for single domain
+  ! 
+  !  \param hmodel   I  handle of model instance
+  !  \param hdescr   I  observation description for which we want localization scaling vectors
+  !  \param distance I  characteristic distance
+  !  \param idomain  I  domain number
+  !  \param locVecs  O  costa vector of handles to treevectors (scaling vectors). The treevectors
+  !                     are created when the indices are CTA_NULL on entry
+  ! 
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Model_GetObsLocalizationDomain
+    subroutine CTA_Model_GetObsLocalizationDomain( hmodel, hdescr, distance, idomain, locVecs, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hmodel
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hdescr
+      real(8)                       , intent(in   )     ::  distance
+      integer                       , intent(in   )     ::  idomain
+      integer(CTA_HANDLE_IKIND)     , intent(out  )     ::  locVecs
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Model_GetObsLocalizationDomain
+  end interface
+
+  !  \brief Get a copy of the internal state.
+  ! 
+  !  \note Optionally a tree-vector is created. In that case the caller of this
+  !  method is responsible for freeing that tree-vector. The input state must be compatible
+  !  (same size and or composition) as the models internal state.
+  !  \note If *hstate == CTA_NULL a new object is created, user is responsible for freeing this object.
+  ! 
+  !  \param hmodel   I  handle of model instance
+  !  \param idomain  I  domain number
+  !  \param hstate   IO receives state of the model, *hstate can be CTA_NULL on calling (see note)
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Model_GetStateDomain
+    subroutine CTA_Model_GetStateDomain( hmodel, idomain, hstate, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hmodel
+      integer                       , intent(in   )     ::  idomain
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hstate
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Model_GetStateDomain
+  end interface
+
+  !  \brief Perform axpy operation on the internal state for a single domain
+  ! 
+  !  \note AXPY: y=alpha*x+y. y corresponds to the models
+  !        internal state and x can be a state vector or a model
+  ! 
+  !  \param hmodel   IO handle of model instance (y)
+  !  \param alpha    I  alpha
+  !  \param hx       I  handle of x (state or model)
+  !  \param idomain  I  domain number
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Model_AxpyStateDomain
+    subroutine CTA_Model_AxpyStateDomain( hmodel, alpha, idomain, hx, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hmodel
+      real(8)                       , intent(in   )     ::  alpha
+      integer                       , intent(in   )     ::  idomain
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hx
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Model_AxpyStateDomain
+  end interface
+
+
+end module cta_f90_model
+
diff --git a/costa/native/cta_f90/generated/cta_f90_model_factory.f90 b/costa/native/cta_f90/generated/cta_f90_model_factory.f90
new file mode 100644
index 000000000..9c3603ad5
--- /dev/null
+++ b/costa/native/cta_f90/generated/cta_f90_model_factory.f90
@@ -0,0 +1,43 @@
+module cta_f90_model_factory
+
+  implicit none
+
+  public
+
+  !  \brief Create a COSTA modell class from XML input file
+  !           (load from methods from dynamic load library)
+  ! 
+  !   \param fName  I  XML-configuration file
+  !   \param modelClass O Class of new model Factory
+  !   \param status O          Model class handle
+  !
+  interface CTA_F90_ModelFactory_New
+    subroutine CTA_ModelFactory_New( fName, modelClass, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      character(len=*)              , intent(in   )     ::  fName
+      integer(CTA_HANDLE_IKIND)     , intent(out  )     ::  modelClass
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_ModelFactory_New
+  end interface
+
+  !  \brief Define a new class (=implementation) of a COSTA model component
+  ! 
+  !  \param name     I  name of the new model class
+  !  \param h_func   I  COSTA function handles for functions that implement class,
+  !                     missing functions must have value CTA_NULL
+  !  \param hmodcl   O  receives handle of new model class
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Model_DefineClass
+    subroutine CTA_Model_DefineClass( name, h_func, hmodcl, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      character(len=*)              , intent(in   )     ::  name
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  h_func(*)
+      integer(CTA_HANDLE_IKIND)     , intent(out  )     ::  hmodcl
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Model_DefineClass
+  end interface
+
+
+end module cta_f90_model_factory
+
diff --git a/costa/native/cta_f90/generated/cta_f90_model_utilities.f90 b/costa/native/cta_f90/generated/cta_f90_model_utilities.f90
new file mode 100644
index 000000000..49402d4f4
--- /dev/null
+++ b/costa/native/cta_f90/generated/cta_f90_model_utilities.f90
@@ -0,0 +1,42 @@
+module cta_f90_model_utilities
+
+  implicit none
+
+  public
+
+  !  \brief Handles model configuration tree or name of input xml-file
+  ! 
+  !  When a new instance of the a model component is created it needs some
+  !  input. The most convenient way is to provide the root to a COSTA-tree
+  !  with configuration information the name of a XML-configuration file
+  !  This routine will check whether the input handle is a COSTA-tree or
+  !  the name of a XML-configuration file. A COSTA-tree containing the content
+  !  of the XML-file is created when the input is the name of the XML-file.
+  ! 
+  !  \note A COSTA-tree is created if the input is the name of an XML-file.
+  !        the handle of the input tree is returned otherwise. This means that
+  !        depending on the input, a tree is created. The routine will return whether
+  !        the returned tree is created by this routine. The caller is responsible
+  !        for freeing the tree when necessary.
+  ! 
+  !  \param hinput   I  handle of tree (CTA_Tree) with model input or string
+  !                     (CTA_Sring) with name of xml-input file
+  !  \param tinput   O  receives handle of tree (CTA_Tree) with model input
+  !  \param cleanup  O  receives flag (CTA_TRUE/CTA_FALSE) indicating whether tinput is
+  !                     created and must be freed by the caller
+  ! 
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Model_Util_InputTree
+    subroutine CTA_Model_Util_InputTree( hinput, tinput, cleanup, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hinput
+      integer(CTA_HANDLE_IKIND)     , intent(out  )     ::  tinput
+      integer                       , intent(out  )     ::  cleanup
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Model_Util_InputTree
+  end interface
+
+
+end module cta_f90_model_utilities
+
diff --git a/costa/native/cta_f90/generated/cta_f90_obsdescr.f90 b/costa/native/cta_f90/generated/cta_f90_obsdescr.f90
new file mode 100644
index 000000000..bc9cc7860
--- /dev/null
+++ b/costa/native/cta_f90/generated/cta_f90_obsdescr.f90
@@ -0,0 +1,191 @@
+module cta_f90_obsdescr
+
+  implicit none
+
+  public
+
+  !  \brief Create a new class (=implementation) of a COSTA observation description component.
+  ! 
+  !  \param name       I  name of the new observation description class
+  !  \param h_func     I  COSTA function handles for functions that implement class,
+  !                       missing functions must have value CTA_NULL
+  !  \param hobsdscrcl O  receives handle of new observation description class
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_ObsDescr_DefineClass
+    subroutine CTA_ObsDescr_DefineClass( name, h_func, hobsdscrcl, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      character(len=*)              , intent(in   )     ::  name
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  h_func(*)
+      integer(CTA_HANDLE_IKIND)     , intent(out  )     ::  hobsdscrcl
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_ObsDescr_DefineClass
+  end interface
+
+  !  \brief Create a new observation description instance.
+  ! 
+  !  \param hsobscl   I  class of new observation description
+  !  \param usrdat    IO data of the stochastic observer for which
+  !                      a descriptor is to be created
+  !  \param hobsdscr  O  receives handle of created observation description
+  !                      object
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_ObsDescr_Create
+    subroutine CTA_ObsDescr_Create( hsobscl, usrdat, hobsdscr, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hsobscl
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  usrdat
+      integer(CTA_HANDLE_IKIND)     , intent(out  )     ::  hobsdscr
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_ObsDescr_Create
+  end interface
+
+  !  \brief Create a new observation description that is subset of existing observation description.
+  ! 
+  !  \param hobsdescr     I the observation description to create a subset
+  !                         from
+  !  \param selection     I selection criterion (subset of SQL)
+  !  \param reltab        O Relation table specifying the relation between
+  !                         the original and new observation description
+  !                         component. Note no relation table is created when
+  !                         reltab==CTA_NULL on entry
+  !  \param hobsdescrout  O the new COSTA-stochastic observer, empty before
+  !                         calling, caller responsible for freeing after use
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_ObsDescr_CreateSel
+    subroutine CTA_ObsDescr_CreateSel( hobsdescr, selection, reltab, hobsdescrout, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hobsdescr
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  selection
+      integer(CTA_HANDLE_IKIND)     , intent(out  )     ::  reltab
+      integer(CTA_HANDLE_IKIND)     , intent(out  )     ::  hobsdescrout
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_ObsDescr_CreateSel
+  end interface
+
+  !  \brief Create a new observation description that is subset of existing observation description.
+  !          All observations in the interval (t1,t2] (note t1 is not part
+  !          of the interval!) of the time span are selected.
+  ! 
+  !  \param hobsdescr     I the observation description to create a subset
+  !                         from
+  !  \param timespan      I  time span over which selection has to be made
+  !  \param reltab        O Relation table specifying the relation between
+  !                         the original and new observation description
+  !                         component. Note no relation table is created when
+  !                         reltab==CTA_NULL on enty
+  !  \param hobsdescrout  O the new COSTA-stochastic observer, empty before
+  !                         calling, caller responsible for freeing after use
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_ObsDescr_CreateTimSel
+    subroutine CTA_ObsDescr_CreateTimSel( hobsdescr, timespan, reltab, hobsdescrout, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hobsdescr
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  timespan
+      integer(CTA_HANDLE_IKIND)     , intent(out  )     ::  reltab
+      integer(CTA_HANDLE_IKIND)     , intent(out  )     ::  hobsdescrout
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_ObsDescr_CreateTimSel
+  end interface
+
+  !  \brief Get properties/values that correspond to a given key.
+  ! 
+  !  \param hobsdscr   I  handle of observation description
+  !  \param Key        I  key for which the value is asked
+  !  \param Properties IO COSTA-vector that is to receive the values
+  !  \param datatype   I  data type of elements in properties vector, must be the same as of queried properties
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_ObsDescr_Get_ValueProperties
+    subroutine CTA_ObsDescr_Get_ValueProperties( hobsdscr, Key, Properties, datatype, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hobsdscr
+      character(len=*)              , intent(in   )     ::  Key
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  Properties
+      integer                       , intent(in   )     ::  datatype
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_ObsDescr_Get_ValueProperties
+  end interface
+
+  !  \brief Get all keys names.
+  ! 
+  !  \param hobsdscr   I  handle of observation description
+  !  \param Keys       O  receives all keys (COSTA-string vector); must exist before calling and be large enough
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_ObsDescr_Get_PropertyKeys
+    subroutine CTA_ObsDescr_Get_PropertyKeys( hobsdscr, Keys, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hobsdscr
+      integer(CTA_HANDLE_IKIND)     , intent(out  )     ::  Keys
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_ObsDescr_Get_PropertyKeys
+  end interface
+
+  !  \brief Get number of properties/keys.
+  ! 
+  !  \param hobsdscr   I  handle of observation description
+  !  \param nkeys      O  receives number of properties/keys
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_ObsDescr_Property_Count
+    subroutine CTA_ObsDescr_Property_Count( hobsdscr, nkeys, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hobsdscr
+      integer                       , intent(out  )     ::  nkeys
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_ObsDescr_Property_Count
+  end interface
+
+  !  \brief Get number of observations.
+  ! 
+  !  \param hobsdscr   I  handle of observation description
+  !  \param nobs       O  receives the number of observations
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_ObsDescr_Observation_Count
+    subroutine CTA_ObsDescr_Observation_Count( hobsdscr, nobs, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hobsdscr
+      integer                       , intent(out  )     ::  nobs
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_ObsDescr_Observation_Count
+  end interface
+
+  !  \brief Export observation description.
+  ! 
+  !  The default observation description CTA_DEFAULT_OBSDESC supports exporting to:\n
+  !  TODO
+  ! 
+  !  \param hdescr     I  handle of observation description
+  !  \param usrdat     IO export configuration/medium
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_ObsDescr_Export
+    subroutine CTA_ObsDescr_Export( hdescr, usrdat, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hdescr
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  usrdat
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_ObsDescr_Export
+  end interface
+
+  !  \brief Free observation description object.
+  ! 
+  !  \param hobsdscr  IO handle of observation description, replaced by CTA_NULL on return.
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_ObsDescr_Free
+    subroutine CTA_ObsDescr_Free( hobsdscr, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hobsdscr
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_ObsDescr_Free
+  end interface
+
+
+end module cta_f90_obsdescr
+
diff --git a/costa/native/cta_f90/generated/cta_f90_pack.f90 b/costa/native/cta_f90/generated/cta_f90_pack.f90
new file mode 100644
index 000000000..f357c1376
--- /dev/null
+++ b/costa/native/cta_f90/generated/cta_f90_pack.f90
@@ -0,0 +1,604 @@
+module cta_f90_pack
+
+  implicit none
+
+  public
+
+  !  \brief Create a pack instance.
+  ! 
+  !  \param initsize I  the initial size >=0 of the buffer
+  !  \param hpack    O  receives handle of new pack object
+  ! 
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Pack_Create
+    subroutine CTA_Pack_Create( initsize, hpack, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer                       , intent(in   )     ::  initsize
+      integer(CTA_HANDLE_IKIND)     , intent(out  )     ::  hpack
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Pack_Create
+  end interface
+
+  !  \brief Free a pack instance.
+  ! 
+  !  \param hpack   IO handle of pack object, replaced by CTA_NULL on return
+  ! 
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Pack_Free
+    subroutine CTA_Pack_Free( hpack, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hpack
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Pack_Free
+  end interface
+
+  !  \brief Add data to pack object.
+  ! 
+  !  \param hpack    IO handle of pack object
+  !  \param data     I  data that must be packed
+  !  \param lendat   I  size of the data to be packed (chars)
+  ! 
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Pack_Add
+    module procedure CTA_Pack_Add_integer_1d
+    module procedure CTA_Pack_Add_integer_2d
+    module procedure CTA_Pack_Add_integer_3d
+    module procedure CTA_Pack_Add_integer_4d
+    module procedure CTA_Pack_Add_integer_5d
+    module procedure CTA_Pack_Add_integer_6d
+    module procedure CTA_Pack_Add_integer_7d
+    module procedure CTA_Pack_Add_real4_1d
+    module procedure CTA_Pack_Add_real4_2d
+    module procedure CTA_Pack_Add_real4_3d
+    module procedure CTA_Pack_Add_real4_4d
+    module procedure CTA_Pack_Add_real4_5d
+    module procedure CTA_Pack_Add_real4_6d
+    module procedure CTA_Pack_Add_real4_7d
+    module procedure CTA_Pack_Add_real8_1d
+    module procedure CTA_Pack_Add_real8_2d
+    module procedure CTA_Pack_Add_real8_3d
+    module procedure CTA_Pack_Add_real8_4d
+    module procedure CTA_Pack_Add_real8_5d
+    module procedure CTA_Pack_Add_real8_6d
+    module procedure CTA_Pack_Add_real8_7d
+  end interface
+
+  !  \brief Unpack (get) data from pack object.
+  ! 
+  !  \param hpack    IO handle of pack object
+  !  \param data     O  buffer that receives data that is unpacked from pack-buffer (buffer length must be >= lendat)
+  !  \param lendat   I  size of the data to be unpacked (chars)
+  ! 
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Pack_Get
+    module procedure CTA_Pack_Get_integer_1d
+    module procedure CTA_Pack_Get_integer_2d
+    module procedure CTA_Pack_Get_integer_3d
+    module procedure CTA_Pack_Get_integer_4d
+    module procedure CTA_Pack_Get_integer_5d
+    module procedure CTA_Pack_Get_integer_6d
+    module procedure CTA_Pack_Get_integer_7d
+    module procedure CTA_Pack_Get_real4_1d
+    module procedure CTA_Pack_Get_real4_2d
+    module procedure CTA_Pack_Get_real4_3d
+    module procedure CTA_Pack_Get_real4_4d
+    module procedure CTA_Pack_Get_real4_5d
+    module procedure CTA_Pack_Get_real4_6d
+    module procedure CTA_Pack_Get_real4_7d
+    module procedure CTA_Pack_Get_real8_1d
+    module procedure CTA_Pack_Get_real8_2d
+    module procedure CTA_Pack_Get_real8_3d
+    module procedure CTA_Pack_Get_real8_4d
+    module procedure CTA_Pack_Get_real8_5d
+    module procedure CTA_Pack_Get_real8_6d
+    module procedure CTA_Pack_Get_real8_7d
+  end interface
+
+  !  \brief Get length of packed data in pack-buffer.
+  ! 
+  !  \param hpack    I  handle of pack object
+  ! 
+  !  \param status O length packed data
+  !
+  interface CTA_F90_Pack_GetLen
+    subroutine CTA_Pack_GetLen( hpack, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hpack
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Pack_GetLen
+  end interface
+
+  !  \brief Only update administration for added elements
+  ! 
+  !  This function can be used to update the administration after the
+  !  pack-buffer is filled externally (e.g. using an mpi_recv)
+  ! 
+  !  \param hpack    I  handle of pack object
+  !  \param lendat   I  number of added elements (chars)
+  ! 
+  !  \param status O length packed data
+  !
+  interface CTA_F90_Pack_AddCnt
+    subroutine CTA_Pack_AddCnt( hpack, lendat, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hpack
+      integer                       , intent(in   )     ::  lendat
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Pack_AddCnt
+  end interface
+
+  !  \brief Get the internal pack and unpack pointers
+  ! 
+  !  This function can be used to save to pointers and
+  !  reset the state of the pack component after unpacking or adding
+  !  some data
+  ! 
+  !  \param hpack    I  handle of pack object
+  !  \param ip1      O  unpack pointer
+  !  \param ip2      O  pack pointer
+  ! 
+  !  \param status O length packed data
+  !
+  interface CTA_F90_Pack_GetIndx
+    subroutine CTA_Pack_GetIndx( hpack, ip1, ip2, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hpack
+      integer                       , intent(out  )     ::  ip1
+      integer                       , intent(out  )     ::  ip2
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Pack_GetIndx
+  end interface
+
+  !  \brief Set the internal pack and unpack pointers
+  ! 
+  !  This function can be used to restore the pointers and
+  !  reset the state of the pack component after unpacking or adding
+  !  some data
+  ! 
+  !  \param hpack    I  handle of pack object
+  !  \param ip1      I  unpack pointer. In order to reset all unpackin
+  !                     set to CTA_PACK_RESET
+  !  \param ip2      I  pack pointer. In order to reset the whole pack object
+  !                     set to CTA_PACK_RESET
+  ! 
+  !  \param status O length packed data
+  !
+  interface CTA_F90_Pack_SetIndx
+    subroutine CTA_Pack_SetIndx( hpack, ip1, ip2, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hpack
+      integer                       , intent(in   )     ::  ip1
+      integer                       , intent(in   )     ::  ip2
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Pack_SetIndx
+  end interface
+
+
+contains
+
+    subroutine CTA_Pack_Add_integer_1d( hpack, data, lendat, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_INTEGER
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hpack
+      integer                       , intent(in   )     ::  data(:)
+      integer                       , intent(in   )     ::  lendat
+      integer                       , intent(out  )     ::  status
+      call CTA_Pack_Add( hpack, data, lendat, status )
+    end subroutine CTA_Pack_Add_integer_1d
+
+    subroutine CTA_Pack_Add_integer_2d( hpack, data, lendat, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_INTEGER
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hpack
+      integer                       , intent(in   )     ::  data(:,:)
+      integer                       , intent(in   )     ::  lendat
+      integer                       , intent(out  )     ::  status
+      call CTA_Pack_Add( hpack, data, lendat, status )
+    end subroutine CTA_Pack_Add_integer_2d
+
+    subroutine CTA_Pack_Add_integer_3d( hpack, data, lendat, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_INTEGER
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hpack
+      integer                       , intent(in   )     ::  data(:,:,:)
+      integer                       , intent(in   )     ::  lendat
+      integer                       , intent(out  )     ::  status
+      call CTA_Pack_Add( hpack, data, lendat, status )
+    end subroutine CTA_Pack_Add_integer_3d
+
+    subroutine CTA_Pack_Add_integer_4d( hpack, data, lendat, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_INTEGER
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hpack
+      integer                       , intent(in   )     ::  data(:,:,:,:)
+      integer                       , intent(in   )     ::  lendat
+      integer                       , intent(out  )     ::  status
+      call CTA_Pack_Add( hpack, data, lendat, status )
+    end subroutine CTA_Pack_Add_integer_4d
+
+    subroutine CTA_Pack_Add_integer_5d( hpack, data, lendat, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_INTEGER
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hpack
+      integer                       , intent(in   )     ::  data(:,:,:,:,:)
+      integer                       , intent(in   )     ::  lendat
+      integer                       , intent(out  )     ::  status
+      call CTA_Pack_Add( hpack, data, lendat, status )
+    end subroutine CTA_Pack_Add_integer_5d
+
+    subroutine CTA_Pack_Add_integer_6d( hpack, data, lendat, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_INTEGER
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hpack
+      integer                       , intent(in   )     ::  data(:,:,:,:,:,:)
+      integer                       , intent(in   )     ::  lendat
+      integer                       , intent(out  )     ::  status
+      call CTA_Pack_Add( hpack, data, lendat, status )
+    end subroutine CTA_Pack_Add_integer_6d
+
+    subroutine CTA_Pack_Add_integer_7d( hpack, data, lendat, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_INTEGER
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hpack
+      integer                       , intent(in   )     ::  data(:,:,:,:,:,:,:)
+      integer                       , intent(in   )     ::  lendat
+      integer                       , intent(out  )     ::  status
+      call CTA_Pack_Add( hpack, data, lendat, status )
+    end subroutine CTA_Pack_Add_integer_7d
+
+    subroutine CTA_Pack_Add_real4_1d( hpack, data, lendat, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_REAL
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hpack
+      real(4)                       , intent(in   )     ::  data(:)
+      integer                       , intent(in   )     ::  lendat
+      integer                       , intent(out  )     ::  status
+      call CTA_Pack_Add( hpack, data, lendat, status )
+    end subroutine CTA_Pack_Add_real4_1d
+
+    subroutine CTA_Pack_Add_real4_2d( hpack, data, lendat, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_REAL
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hpack
+      real(4)                       , intent(in   )     ::  data(:,:)
+      integer                       , intent(in   )     ::  lendat
+      integer                       , intent(out  )     ::  status
+      call CTA_Pack_Add( hpack, data, lendat, status )
+    end subroutine CTA_Pack_Add_real4_2d
+
+    subroutine CTA_Pack_Add_real4_3d( hpack, data, lendat, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_REAL
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hpack
+      real(4)                       , intent(in   )     ::  data(:,:,:)
+      integer                       , intent(in   )     ::  lendat
+      integer                       , intent(out  )     ::  status
+      call CTA_Pack_Add( hpack, data, lendat, status )
+    end subroutine CTA_Pack_Add_real4_3d
+
+    subroutine CTA_Pack_Add_real4_4d( hpack, data, lendat, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_REAL
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hpack
+      real(4)                       , intent(in   )     ::  data(:,:,:,:)
+      integer                       , intent(in   )     ::  lendat
+      integer                       , intent(out  )     ::  status
+      call CTA_Pack_Add( hpack, data, lendat, status )
+    end subroutine CTA_Pack_Add_real4_4d
+
+    subroutine CTA_Pack_Add_real4_5d( hpack, data, lendat, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_REAL
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hpack
+      real(4)                       , intent(in   )     ::  data(:,:,:,:,:)
+      integer                       , intent(in   )     ::  lendat
+      integer                       , intent(out  )     ::  status
+      call CTA_Pack_Add( hpack, data, lendat, status )
+    end subroutine CTA_Pack_Add_real4_5d
+
+    subroutine CTA_Pack_Add_real4_6d( hpack, data, lendat, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_REAL
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hpack
+      real(4)                       , intent(in   )     ::  data(:,:,:,:,:,:)
+      integer                       , intent(in   )     ::  lendat
+      integer                       , intent(out  )     ::  status
+      call CTA_Pack_Add( hpack, data, lendat, status )
+    end subroutine CTA_Pack_Add_real4_6d
+
+    subroutine CTA_Pack_Add_real4_7d( hpack, data, lendat, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_REAL
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hpack
+      real(4)                       , intent(in   )     ::  data(:,:,:,:,:,:,:)
+      integer                       , intent(in   )     ::  lendat
+      integer                       , intent(out  )     ::  status
+      call CTA_Pack_Add( hpack, data, lendat, status )
+    end subroutine CTA_Pack_Add_real4_7d
+
+    subroutine CTA_Pack_Add_real8_1d( hpack, data, lendat, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_DOUBLE
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hpack
+      real(8)                       , intent(in   )     ::  data(:)
+      integer                       , intent(in   )     ::  lendat
+      integer                       , intent(out  )     ::  status
+      call CTA_Pack_Add( hpack, data, lendat, status )
+    end subroutine CTA_Pack_Add_real8_1d
+
+    subroutine CTA_Pack_Add_real8_2d( hpack, data, lendat, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_DOUBLE
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hpack
+      real(8)                       , intent(in   )     ::  data(:,:)
+      integer                       , intent(in   )     ::  lendat
+      integer                       , intent(out  )     ::  status
+      call CTA_Pack_Add( hpack, data, lendat, status )
+    end subroutine CTA_Pack_Add_real8_2d
+
+    subroutine CTA_Pack_Add_real8_3d( hpack, data, lendat, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_DOUBLE
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hpack
+      real(8)                       , intent(in   )     ::  data(:,:,:)
+      integer                       , intent(in   )     ::  lendat
+      integer                       , intent(out  )     ::  status
+      call CTA_Pack_Add( hpack, data, lendat, status )
+    end subroutine CTA_Pack_Add_real8_3d
+
+    subroutine CTA_Pack_Add_real8_4d( hpack, data, lendat, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_DOUBLE
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hpack
+      real(8)                       , intent(in   )     ::  data(:,:,:,:)
+      integer                       , intent(in   )     ::  lendat
+      integer                       , intent(out  )     ::  status
+      call CTA_Pack_Add( hpack, data, lendat, status )
+    end subroutine CTA_Pack_Add_real8_4d
+
+    subroutine CTA_Pack_Add_real8_5d( hpack, data, lendat, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_DOUBLE
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hpack
+      real(8)                       , intent(in   )     ::  data(:,:,:,:,:)
+      integer                       , intent(in   )     ::  lendat
+      integer                       , intent(out  )     ::  status
+      call CTA_Pack_Add( hpack, data, lendat, status )
+    end subroutine CTA_Pack_Add_real8_5d
+
+    subroutine CTA_Pack_Add_real8_6d( hpack, data, lendat, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_DOUBLE
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hpack
+      real(8)                       , intent(in   )     ::  data(:,:,:,:,:,:)
+      integer                       , intent(in   )     ::  lendat
+      integer                       , intent(out  )     ::  status
+      call CTA_Pack_Add( hpack, data, lendat, status )
+    end subroutine CTA_Pack_Add_real8_6d
+
+    subroutine CTA_Pack_Add_real8_7d( hpack, data, lendat, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_DOUBLE
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hpack
+      real(8)                       , intent(in   )     ::  data(:,:,:,:,:,:,:)
+      integer                       , intent(in   )     ::  lendat
+      integer                       , intent(out  )     ::  status
+      call CTA_Pack_Add( hpack, data, lendat, status )
+    end subroutine CTA_Pack_Add_real8_7d
+
+    subroutine CTA_Pack_Get_integer_1d( hpack, data, lendat, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_INTEGER
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hpack
+      integer                       , intent(out  )     ::  data(:)
+      integer                       , intent(in   )     ::  lendat
+      integer                       , intent(out  )     ::  status
+      call CTA_Pack_Get( hpack, data, lendat, status )
+    end subroutine CTA_Pack_Get_integer_1d
+
+    subroutine CTA_Pack_Get_integer_2d( hpack, data, lendat, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_INTEGER
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hpack
+      integer                       , intent(out  )     ::  data(:,:)
+      integer                       , intent(in   )     ::  lendat
+      integer                       , intent(out  )     ::  status
+      call CTA_Pack_Get( hpack, data, lendat, status )
+    end subroutine CTA_Pack_Get_integer_2d
+
+    subroutine CTA_Pack_Get_integer_3d( hpack, data, lendat, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_INTEGER
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hpack
+      integer                       , intent(out  )     ::  data(:,:,:)
+      integer                       , intent(in   )     ::  lendat
+      integer                       , intent(out  )     ::  status
+      call CTA_Pack_Get( hpack, data, lendat, status )
+    end subroutine CTA_Pack_Get_integer_3d
+
+    subroutine CTA_Pack_Get_integer_4d( hpack, data, lendat, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_INTEGER
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hpack
+      integer                       , intent(out  )     ::  data(:,:,:,:)
+      integer                       , intent(in   )     ::  lendat
+      integer                       , intent(out  )     ::  status
+      call CTA_Pack_Get( hpack, data, lendat, status )
+    end subroutine CTA_Pack_Get_integer_4d
+
+    subroutine CTA_Pack_Get_integer_5d( hpack, data, lendat, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_INTEGER
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hpack
+      integer                       , intent(out  )     ::  data(:,:,:,:,:)
+      integer                       , intent(in   )     ::  lendat
+      integer                       , intent(out  )     ::  status
+      call CTA_Pack_Get( hpack, data, lendat, status )
+    end subroutine CTA_Pack_Get_integer_5d
+
+    subroutine CTA_Pack_Get_integer_6d( hpack, data, lendat, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_INTEGER
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hpack
+      integer                       , intent(out  )     ::  data(:,:,:,:,:,:)
+      integer                       , intent(in   )     ::  lendat
+      integer                       , intent(out  )     ::  status
+      call CTA_Pack_Get( hpack, data, lendat, status )
+    end subroutine CTA_Pack_Get_integer_6d
+
+    subroutine CTA_Pack_Get_integer_7d( hpack, data, lendat, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_INTEGER
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hpack
+      integer                       , intent(out  )     ::  data(:,:,:,:,:,:,:)
+      integer                       , intent(in   )     ::  lendat
+      integer                       , intent(out  )     ::  status
+      call CTA_Pack_Get( hpack, data, lendat, status )
+    end subroutine CTA_Pack_Get_integer_7d
+
+    subroutine CTA_Pack_Get_real4_1d( hpack, data, lendat, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_REAL
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hpack
+      real(4)                       , intent(out  )     ::  data(:)
+      integer                       , intent(in   )     ::  lendat
+      integer                       , intent(out  )     ::  status
+      call CTA_Pack_Get( hpack, data, lendat, status )
+    end subroutine CTA_Pack_Get_real4_1d
+
+    subroutine CTA_Pack_Get_real4_2d( hpack, data, lendat, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_REAL
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hpack
+      real(4)                       , intent(out  )     ::  data(:,:)
+      integer                       , intent(in   )     ::  lendat
+      integer                       , intent(out  )     ::  status
+      call CTA_Pack_Get( hpack, data, lendat, status )
+    end subroutine CTA_Pack_Get_real4_2d
+
+    subroutine CTA_Pack_Get_real4_3d( hpack, data, lendat, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_REAL
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hpack
+      real(4)                       , intent(out  )     ::  data(:,:,:)
+      integer                       , intent(in   )     ::  lendat
+      integer                       , intent(out  )     ::  status
+      call CTA_Pack_Get( hpack, data, lendat, status )
+    end subroutine CTA_Pack_Get_real4_3d
+
+    subroutine CTA_Pack_Get_real4_4d( hpack, data, lendat, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_REAL
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hpack
+      real(4)                       , intent(out  )     ::  data(:,:,:,:)
+      integer                       , intent(in   )     ::  lendat
+      integer                       , intent(out  )     ::  status
+      call CTA_Pack_Get( hpack, data, lendat, status )
+    end subroutine CTA_Pack_Get_real4_4d
+
+    subroutine CTA_Pack_Get_real4_5d( hpack, data, lendat, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_REAL
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hpack
+      real(4)                       , intent(out  )     ::  data(:,:,:,:,:)
+      integer                       , intent(in   )     ::  lendat
+      integer                       , intent(out  )     ::  status
+      call CTA_Pack_Get( hpack, data, lendat, status )
+    end subroutine CTA_Pack_Get_real4_5d
+
+    subroutine CTA_Pack_Get_real4_6d( hpack, data, lendat, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_REAL
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hpack
+      real(4)                       , intent(out  )     ::  data(:,:,:,:,:,:)
+      integer                       , intent(in   )     ::  lendat
+      integer                       , intent(out  )     ::  status
+      call CTA_Pack_Get( hpack, data, lendat, status )
+    end subroutine CTA_Pack_Get_real4_6d
+
+    subroutine CTA_Pack_Get_real4_7d( hpack, data, lendat, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_REAL
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hpack
+      real(4)                       , intent(out  )     ::  data(:,:,:,:,:,:,:)
+      integer                       , intent(in   )     ::  lendat
+      integer                       , intent(out  )     ::  status
+      call CTA_Pack_Get( hpack, data, lendat, status )
+    end subroutine CTA_Pack_Get_real4_7d
+
+    subroutine CTA_Pack_Get_real8_1d( hpack, data, lendat, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_DOUBLE
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hpack
+      real(8)                       , intent(out  )     ::  data(:)
+      integer                       , intent(in   )     ::  lendat
+      integer                       , intent(out  )     ::  status
+      call CTA_Pack_Get( hpack, data, lendat, status )
+    end subroutine CTA_Pack_Get_real8_1d
+
+    subroutine CTA_Pack_Get_real8_2d( hpack, data, lendat, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_DOUBLE
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hpack
+      real(8)                       , intent(out  )     ::  data(:,:)
+      integer                       , intent(in   )     ::  lendat
+      integer                       , intent(out  )     ::  status
+      call CTA_Pack_Get( hpack, data, lendat, status )
+    end subroutine CTA_Pack_Get_real8_2d
+
+    subroutine CTA_Pack_Get_real8_3d( hpack, data, lendat, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_DOUBLE
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hpack
+      real(8)                       , intent(out  )     ::  data(:,:,:)
+      integer                       , intent(in   )     ::  lendat
+      integer                       , intent(out  )     ::  status
+      call CTA_Pack_Get( hpack, data, lendat, status )
+    end subroutine CTA_Pack_Get_real8_3d
+
+    subroutine CTA_Pack_Get_real8_4d( hpack, data, lendat, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_DOUBLE
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hpack
+      real(8)                       , intent(out  )     ::  data(:,:,:,:)
+      integer                       , intent(in   )     ::  lendat
+      integer                       , intent(out  )     ::  status
+      call CTA_Pack_Get( hpack, data, lendat, status )
+    end subroutine CTA_Pack_Get_real8_4d
+
+    subroutine CTA_Pack_Get_real8_5d( hpack, data, lendat, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_DOUBLE
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hpack
+      real(8)                       , intent(out  )     ::  data(:,:,:,:,:)
+      integer                       , intent(in   )     ::  lendat
+      integer                       , intent(out  )     ::  status
+      call CTA_Pack_Get( hpack, data, lendat, status )
+    end subroutine CTA_Pack_Get_real8_5d
+
+    subroutine CTA_Pack_Get_real8_6d( hpack, data, lendat, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_DOUBLE
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hpack
+      real(8)                       , intent(out  )     ::  data(:,:,:,:,:,:)
+      integer                       , intent(in   )     ::  lendat
+      integer                       , intent(out  )     ::  status
+      call CTA_Pack_Get( hpack, data, lendat, status )
+    end subroutine CTA_Pack_Get_real8_6d
+
+    subroutine CTA_Pack_Get_real8_7d( hpack, data, lendat, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_DOUBLE
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hpack
+      real(8)                       , intent(out  )     ::  data(:,:,:,:,:,:,:)
+      integer                       , intent(in   )     ::  lendat
+      integer                       , intent(out  )     ::  status
+      call CTA_Pack_Get( hpack, data, lendat, status )
+    end subroutine CTA_Pack_Get_real8_7d
+
+end module cta_f90_pack
+
diff --git a/costa/native/cta_f90/generated/cta_f90_par.f90 b/costa/native/cta_f90/generated/cta_f90_par.f90
new file mode 100644
index 000000000..85d8ba9fb
--- /dev/null
+++ b/costa/native/cta_f90/generated/cta_f90_par.f90
@@ -0,0 +1,62 @@
+module cta_f90_par
+
+  implicit none
+
+  public
+
+  !  \brief Initialises parallel environment for a process that spawned
+  !   The executable is spawned using MPI_COMM_SPAWN or MPI_COMM_SPAWN_MULTIPLE
+  ! 
+  !   It will set up the communication groups and optionally starts the parallel
+  !   model builder
+  ! 
+  !  \param StartPar  I  CTA_TRUE/CTA_FALSE start parallel model builder
+  ! 
+  !  Note when a worker process is part of a Master-Worker model and it does
+  !  not implement the COSTA model interface it should not start the parallel model builder
+  ! 
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Par_WorkerSpawn
+    subroutine CTA_Par_WorkerSpawn( StartPar, status )
+      integer                       , intent(in   )     ::  StartPar
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Par_WorkerSpawn
+  end interface
+
+  !  \brief Initialises parallel environment and create process groups
+  ! 
+  !  \param parConfig I  configuration input from XML-file
+  !  \param StartPar  I  CTA_TRUE/CTA_FALSE start parallel model builder
+  ! 
+  !  Note when a worker process is part of a Master-Worker model and it does
+  !  not implement the COSTA model interface it should not start the parallel model builder
+  ! 
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Par_CreateGroups
+    subroutine CTA_Par_CreateGroups( parConfig, StartPar, status )
+      integer                       , intent(in   )     ::  parConfig
+      integer                       , intent(in   )     ::  StartPar
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Par_CreateGroups
+  end interface
+
+  !  \brief Get the global number of COSTA process group and index of this process
+  ! 
+  !  \param itime  O The index of this process in the group
+  !  \param iGroup O The group number of this process belongs to (1..nGroups)
+  ! 
+  !  \note If this function is called by the COSTA master process or in a sequential run it will return 0 for iGroup
+  ! 
+  !
+  interface CTA_F90_Par_GetGroupInfo
+    subroutine CTA_Par_GetGroupInfo( iGroup, itime )
+      integer                       , intent(out  )     ::  iGroup
+      integer                       , intent(out  )     ::  itime
+    end subroutine CTA_Par_GetGroupInfo
+  end interface
+
+
+end module cta_f90_par
+
diff --git a/costa/native/cta_f90/generated/cta_f90_parameters.f90 b/costa/native/cta_f90/generated/cta_f90_parameters.f90
new file mode 100644
index 000000000..d247090f0
--- /dev/null
+++ b/costa/native/cta_f90/generated/cta_f90_parameters.f90
@@ -0,0 +1,25 @@
+module CTA_F90_Parameters
+
+  implicit none
+  
+
+  ! --- in/out -----------------------------------
+  
+  public
+
+
+  ! --- const ------------------------------------
+  
+  include 'cta_f90.inc'
+  
+  ! integer kind used for handles:
+  integer, parameter  :: CTA_HANDLE_IKIND = 4
+
+  ! Do not use the 'sizeof' function! Not standard ...
+  !integer, parameter  :: CTA_HANDLE_IKIND = sizeof(CTA_HANDLE)
+  
+  ! real kind used for time values:
+  integer, parameter  :: CTA_TIME_RKIND = 8
+  
+  
+end module CTA_F90_Parameters
diff --git a/costa/native/cta_f90/generated/cta_f90_reltable.f90 b/costa/native/cta_f90/generated/cta_f90_reltable.f90
new file mode 100644
index 000000000..9215143a2
--- /dev/null
+++ b/costa/native/cta_f90/generated/cta_f90_reltable.f90
@@ -0,0 +1,157 @@
+module cta_f90_reltable
+
+  implicit none
+
+  public
+
+  !  \brief Create a relation table
+  ! 
+  !  \param hreltable  O  created relation table
+  ! 
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_RelTable_Create
+    subroutine CTA_RelTable_Create( hreltable, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(out  )     ::  hreltable
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_RelTable_Create
+  end interface
+
+  !  \brief Free a relation table object.
+  ! 
+  !  \param hreltable  IO relation table to be freed,
+  !                       value is set to CTA_NULL on return.
+  ! 
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_RelTable_Free
+    subroutine CTA_RelTable_Free( hreltable, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hreltable
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_RelTable_Free
+  end interface
+
+  !  \brief Copy elements according to relation table
+  ! 
+  !  \note we currently only support copying of elements
+  !        between two vector instances. Other types of
+  !        COSTA object will be supported when needed
+  !        in later versions
+  ! 
+  !  \param hreltable  I handle of relation table
+  !  \param hfrom      I Origin object to copy data from
+  !  \param hto        I Target object to copy data to
+  ! 
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_RelTable_Apply
+    subroutine CTA_RelTable_Apply( hreltable, hfrom, hto, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hreltable
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hfrom
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hto
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_RelTable_Apply
+  end interface
+
+  !  \brief Copy elements according to inverse of relation table
+  ! 
+  !  \note we currently only support copying of elements
+  !        between two vector instances. Other types of
+  !        COSTA object will be supported when needed
+  !        in later versions
+  ! 
+  !  \param hreltable  I handle of relation table
+  !  \param hfrom      I Origin object to copy data from
+  !  \param hto        I Target object to copy data to
+  ! 
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_RelTable_ApplyInv
+    subroutine CTA_RelTable_ApplyInv( hreltable, hfrom, hto, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hreltable
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hfrom
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hto
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_RelTable_ApplyInv
+  end interface
+
+  !  \brief Set a relation table
+  !   A Set a relation table that defines a selection of elements
+  ! 
+  !  \param hreltable  O relation table that is set
+  !  \param vselect    I (integer) vector with indices of elements
+  ! from the target set that are selected.
+  ! 
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_RelTable_SetSelect
+    subroutine CTA_RelTable_SetSelect( hreltable, vselect, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(out  )     ::  hreltable
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  vselect
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_RelTable_SetSelect
+  end interface
+
+  !  \brief Get the number of elements that are copied when the table is applied
+  ! 
+  ! 
+  !  \param hreltable  I  relation table
+  !  \param nelt     O  number of elements that are copied
+  ! 
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_RelTable_Count
+    subroutine CTA_RelTable_Count( hreltable, nelt, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hreltable
+      integer                       , intent(out  )     ::  nelt
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_RelTable_Count
+  end interface
+
+  !  \brief Set a relation table that is combination of two
+  !          relation tables.
+  ! 
+  !  Set a relation table that is the combination of two exisiting relation
+  !  tables. It is possible to use the inverse of the relation tables when
+  !  needed
+  ! .
+  !  A usefull application of this method is to create a relation table that
+  !  defines a relation between a subset of elements from set1 and a subset of
+  !  the elements of set2. In order to set a relation table of this kind first
+  !  create two relation tables:
+  !  hrel1 elements from set 1 that have a relation with the elements from set 2,
+  !  hrel2 elements from set 2 that have a relation with the elements from set 1
+  ! 
+  !  The combined relation table of hrel1 and inverse(hrel2) is a relation
+  !  table that spcifies the relation of a subset of elements from set1 and a
+  !  subset of elements from set2.
+  ! 
+  !  \param hreltable  O relation table that is set
+  !  \param hrel1      I first relation table
+  !  \param inverse1   I use inverse of hrel1 (CTA_TRUE/CTA_FALSE)
+  !  \param hrel2      I first relation table
+  !  \param inverse2   I use inverse of hrel2 (CTA_TRUE/CTA_FALSE)
+  ! 
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_RelTable_SetTableCombine
+    subroutine CTA_RelTable_SetTableCombine( hreltable, hrel1, inverse1, hrel2, inverse2, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(out  )     ::  hreltable
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hrel1
+      integer                       , intent(in   )     ::  inverse1
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hrel2
+      integer                       , intent(in   )     ::  inverse2
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_RelTable_SetTableCombine
+  end interface
+
+
+end module cta_f90_reltable
+
diff --git a/costa/native/cta_f90/generated/cta_f90_resultwriter.f90 b/costa/native/cta_f90/generated/cta_f90_resultwriter.f90
new file mode 100644
index 000000000..3c2ab0055
--- /dev/null
+++ b/costa/native/cta_f90/generated/cta_f90_resultwriter.f90
@@ -0,0 +1,91 @@
+module cta_f90_resultwriter
+
+  implicit none
+
+  public
+
+  !  \brief Handle a string message send to the resultwriter
+  ! 
+  !  \param idWriter    I  ID of this resultwriter (Counter of number of native result writers)
+  !  \param config      I  Name of XML configuration file containting the function pointers and additional information
+  !  \param workingDir  I  Full path to working directory
+  !  \param message     I  Message send to resultwriter
+  ! 
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Resultwriter_putmessage
+    subroutine CTA_Resultwriter_putmessage( idWriter, config, workingDir, message, status )
+      integer                       , intent(in   )     ::  idWriter
+      character(len=*)              , intent(in   )     ::  config(*)
+      character(len=*)              , intent(in   )     ::  workingDir(*)
+      character(len=*)              , intent(in   )     ::  message(*)
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Resultwriter_putmessage
+  end interface
+
+  !  \brief Handle a string message send to the resultwriter
+  ! 
+  !  \param idWriter    I  ID of this resultwriter (Counter of number of native result writers)
+  !  \param config      I  Name of XML configuration file containting the function pointers and additional information
+  !  \param workingDir  I  Full path to working directory
+  !  \param id          I  Name of the variable/array send to the resultwriter
+  !  \param handle      I  Handle (Vector or TreeVector) of variable
+  !  \param outputLevel I  Selected output level (see opendabridge for possible values)
+  !  \param context     I  Location from which the resultwriter was called
+  !  \param iteration   I  Iteration number from which the resultwriter was called
+  ! 
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Resultwriter_putvalue
+    subroutine CTA_Resultwriter_putvalue( idWriter, config, workingDir, id, handle, outputLevel, context, iteration, status )
+      integer                       , intent(in   )     ::  idWriter
+      character(len=*)              , intent(in   )     ::  config(*)
+      character(len=*)              , intent(in   )     ::  workingDir(*)
+      character(len=*)              , intent(in   )     ::  id(*)
+      integer                       , intent(in   )     ::  handle
+      integer                       , intent(in   )     ::  outputLevel
+      character(len=*)              , intent(in   )     ::  context(*)
+      integer                       , intent(in   )     ::  iteration
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Resultwriter_putvalue
+  end interface
+
+  !  \brief Handle a string message send to the resultwriter
+  ! 
+  !  \param idWriter    I  ID of this resultwriter (Counter of number of native result writers)
+  !  \param config      I  Name of XML configuration file containting the function pointers and additional information
+  !  \param workingDir  I  Full path to working directory
+  !  \param iteration   I  Iteration number from which the resultwriter was called
+  !  \param cost        I  Value of cost function
+  !  \param handle      I  Handle (Vector or TreeVector) of the current parameters
+  ! 
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Resultwriter_putiterationreport
+    subroutine CTA_Resultwriter_putiterationreport( idWriter, config, workingDir, iteration, cost, handle, status )
+      integer                       , intent(in   )     ::  idWriter
+      character(len=*)              , intent(in   )     ::  config(*)
+      character(len=*)              , intent(in   )     ::  workingDir(*)
+      integer                       , intent(in   )     ::  iteration
+      real(8)                       , intent(in   )     ::  cost
+      integer                       , intent(in   )     ::  handle
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Resultwriter_putiterationreport
+  end interface
+
+  !  \brief Free a resultwriter (close output files etc).
+  ! 
+  !  \param idWriter    I  ID of this resultwriter (Counter of number of native result writers)
+  ! 
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Resultwriter_free
+    subroutine CTA_Resultwriter_free( idWriter, status )
+      integer                       , intent(in   )     ::  idWriter
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Resultwriter_free
+  end interface
+
+
+end module cta_f90_resultwriter
+
diff --git a/costa/native/cta_f90/generated/cta_f90_sobs.f90 b/costa/native/cta_f90/generated/cta_f90_sobs.f90
new file mode 100644
index 000000000..36f9efb39
--- /dev/null
+++ b/costa/native/cta_f90/generated/cta_f90_sobs.f90
@@ -0,0 +1,271 @@
+module cta_f90_sobs
+
+  implicit none
+
+  public
+
+  !  \brief Create a new class (=implementation) of a COSTA stochastic observer component.
+  ! 
+  !  \param name          I  name of the new stochastic observer class
+  !  \param h_func        I  COSTA function handles for functions that implement class,
+  !                          missing functions must have value CTA_NULL
+  !  \param descrcl       I  class of the observation description that is created by stochastic observer
+  !  \param hstochobscl   O  handle of new stochastic observer class
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_SObs_DefineClass
+    subroutine CTA_SObs_DefineClass( name, h_func, descrcl, hstochobscl, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      character(len=*)              , intent(in   )     ::  name
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  h_func(*)
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  descrcl
+      integer(CTA_HANDLE_IKIND)     , intent(out  )     ::  hstochobscl
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_SObs_DefineClass
+  end interface
+
+  !  \brief Create an instance of a stocastic observer
+  ! 
+  !  \param hstochobscl I  stochastic observer class of new stochastic observer
+  !  \param userdata    IO userdata for creation (depends on class)
+  !  \param hstochobs   O  receives handle of new stochastic observer object
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_SObs_Create
+    subroutine CTA_SObs_Create( hstochobscl, userdata, hstochobs, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hstochobscl
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  userdata
+      integer(CTA_HANDLE_IKIND)     , intent(out  )     ::  hstochobs
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_SObs_Create
+  end interface
+
+  !  \brief Create a new stochastic observer that is subset of existing stochastic observer.
+  ! 
+  !  \param hsobsin  I  handle of the existing stochastic observer of
+  !                     which  a selection is to be made
+  !  \param userdata IO inputs necessary for making a selection (depends on user implementation)
+  !  \param hsobsout O  receives handle of the new COSTA-stochastic observer, empty before calling, caller responsible for freeing after use
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_SObs_CreateSel
+    subroutine CTA_SObs_CreateSel( hsobsin, userdata, hsobsout, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hsobsin
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  userdata
+      integer(CTA_HANDLE_IKIND)     , intent(out  )     ::  hsobsout
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_SObs_CreateSel
+  end interface
+
+  !  \brief Create a new stoch observer that is subset in time of existing stochastic observer.
+  ! 
+  !          All observations in the closed interval [t1,t2] of the time span are selected.
+  ! 
+  !  \param hsobsin  I  handle of the stochastic observer of
+  !                     which a selection is to be made
+  !  \param timespan I  time span over which selection has to be made
+  !  \param hsobsout O  receives handle of the new COSTA-stochastic observer, empty before calling
+  !  \param status O error states: CTA_OK if successful
+  !
+  interface CTA_F90_SObs_CreateTimSel
+    subroutine CTA_SObs_CreateTimSel( hsobsin, timespan, hsobsout, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hsobsin
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  timespan
+      integer(CTA_HANDLE_IKIND)     , intent(out  )     ::  hsobsout
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_SObs_CreateTimSel
+  end interface
+
+  !  \brief Count the number of elements in stochastic observer.
+  !  \param hsobs    I  handle of the stochastic observer
+  !  \param nmeasr   O  receives number of measurements in this observer
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_SObs_Count
+    subroutine CTA_SObs_Count( hsobs, nmeasr, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hsobs
+      integer                       , intent(out  )     ::  nmeasr
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_SObs_Count
+  end interface
+
+  !  \brief Get a vector with the measurements.
+  ! 
+  !  \param hsobs    I  handle of the stochastic observer
+  !  \param hvec     IO handle of vector that receives the measurements; must exist before calling
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_SObs_GetVal
+    subroutine CTA_SObs_GetVal( hsobs, hvec, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hsobs
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hvec
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_SObs_GetVal
+  end interface
+
+  !  \brief Count the times associated to the measurements.
+  ! 
+  !  \param hsobs    I  handle of the stochastic observer
+  !  \param hvec     IO handle to vector that receives the times; must exist before calling
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_SObs_GetTimes
+    subroutine CTA_SObs_GetTimes( hsobs, hvec, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hsobs
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hvec
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_SObs_GetTimes
+  end interface
+
+  !  \brief Draw random values (measurements) according to the probability density
+  !          function of the mesurements.
+  ! 
+  !  \param hsobs    I  handle of the stochastic observer
+  !  \param hvec     IO handle of vector that receives the draw (measurements); must exist before calling
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_SObs_GetRealisation
+    subroutine CTA_SObs_GetRealisation( hsobs, hvec, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hsobs
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hvec
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_SObs_GetRealisation
+  end interface
+
+  !  \brief Get expectation of the probability density function of the mesurements.
+  ! 
+  !  \param hsobs    I  handle of the stochastic observer
+  !  \param hvec     IO handle of vector that receives the expectation values; must exist before calling
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_SObs_GetExpectation
+    subroutine CTA_SObs_GetExpectation( hsobs, hvec, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hsobs
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hvec
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_SObs_GetExpectation
+  end interface
+
+  !  \brief Get the value of the probability density function of the mesurements at given location.
+  ! 
+  !  \param hsobs    I  handle of the stochastic observer
+  !  \param hvecx    I  handle of vector with location for evaluating pdf
+  !  \param hvecy    IO handle of vector that is to contain the pdf-value; must exist before calling
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_SObs_EvalPDF
+    subroutine CTA_SObs_EvalPDF( hsobs, hvecx, hvecy, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hsobs
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hvecx
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hvecy
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_SObs_EvalPDF
+  end interface
+
+  !  \brief Get covariance matrix of probability density function of the measurements.
+  ! 
+  !  \param hsobs    I  handle of the stochastic observer
+  !  \param hmat     IO handle of matrix that receives the covariance matrix; must exist before calling
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_SObs_GetCovMat
+    subroutine CTA_SObs_GetCovMat( hsobs, hmat, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hsobs
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hmat
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_SObs_GetCovMat
+  end interface
+
+  !  \brief Get variance of probability density function of the mesurements.
+  ! 
+  !  \param hsobs    I  handle of the stochastic observer
+  !  \param hvec     IO handle of vector that receives the variance; must exist before calling
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_SObs_GetVar
+    subroutine CTA_SObs_GetVar( hsobs, hvec, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hsobs
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hvec
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_SObs_GetVar
+  end interface
+
+  !  \brief Get standard deviation of probability density function of the measurements.
+  ! 
+  !  \param hsobs    I  handle of the stochastic observer
+  !  \param hvec     IO handle of vector that is to contain the standard deviation
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_SObs_GetStd
+    subroutine CTA_SObs_GetStd( hsobs, hvec, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hsobs
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hvec
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_SObs_GetStd
+  end interface
+
+  !  \brief Create the observation description corresponding to the stochastic observer.
+  ! 
+  !  \note Caller is responsible for freeing the here created observation description
+  ! 
+  !  \param hsobs      I  handle of the stochastic observer
+  !  \param hobsdescr  O  receives handle of newly created observation description class, empty before calling
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_SObs_GetDescription
+    subroutine CTA_SObs_GetDescription( hsobs, hobsdescr, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hsobs
+      integer(CTA_HANDLE_IKIND)     , intent(out  )     ::  hobsdescr
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_SObs_GetDescription
+  end interface
+
+  !  \brief Export the stochastic observer.
+  ! 
+  !  \note Supported by CTA_DEFAULT_SOBS:\n
+  !        output to file (userdata must contain handle of COSTA file)\n
+  ! 
+  !  \param hsobs    I  handle of the stochastic observer
+  !  \param userdata I  configuration of output
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_SObs_Export
+    subroutine CTA_SObs_Export( hsobs, userdata, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hsobs
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  userdata
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_SObs_Export
+  end interface
+
+  !  \brief Free the stochastic observer
+  ! 
+  !  \Note hsobs=CTA_NULL is allowed
+  ! 
+  !  \param hsobs  IO handle of the stochastic observer, replaced by CTA_NULL on return
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_SObs_Free
+    subroutine CTA_SObs_Free( hsobs, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hsobs
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_SObs_Free
+  end interface
+
+
+end module cta_f90_sobs
+
diff --git a/costa/native/cta_f90/generated/cta_f90_string.f90 b/costa/native/cta_f90/generated/cta_f90_string.f90
new file mode 100644
index 000000000..3caf7a1be
--- /dev/null
+++ b/costa/native/cta_f90/generated/cta_f90_string.f90
@@ -0,0 +1,229 @@
+module cta_f90_string
+
+  implicit none
+
+  public
+
+  !  \brief Create a new COSTA string instance.
+  ! 
+  !   \param hstring  O  handle of created string
+  !   \param status O CTA_OK if successful
+  !
+  interface CTA_F90_String_Create
+    subroutine CTA_String_Create( hstring, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(out  )     ::  hstring
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_String_Create
+  end interface
+
+  !  \brief Create a new COSTA string that is a copy of an existing one
+  ! 
+  !   \param hto    O  receives the handle of the created string
+  !   \param hfrom  I  handle of string to copy
+  ! 
+  !   \param status O CTA_OK if successful
+  !
+  interface CTA_F90_String_Copy
+    subroutine CTA_String_Copy( hto, hfrom, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(out  )     ::  hto
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hfrom
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_String_Copy
+  end interface
+
+  !  \brief Free the COSTA string instance.
+  ! 
+  !   \note
+  ! 
+  !   \param hstring  IO handle of the string instance, replaced by CTA_NULL on return
+  !   \param status O CTA_OK if successful
+  !
+  interface CTA_F90_String_Free
+    subroutine CTA_String_Free( hstring, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hstring
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_String_Free
+  end interface
+
+  !  \brief Get the number of characters in string.
+  ! 
+  !   \note The returned length is the number of characters excluding the
+  !         0-character.
+  ! 
+  !   \param hstring  I  handle of the string
+  !   \param len      O  receives the number of characters in string
+  !   \param status O CTA_OK if successful
+  !
+  interface CTA_F90_String_GetLength
+    subroutine CTA_String_GetLength( hstring, len, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hstring
+      integer                       , intent(out  )     ::  len
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_String_GetLength
+  end interface
+
+  !  \brief Set the string to new content.
+  ! 
+  !   \param hstring  IO handle of the string
+  !   \param str      I  new content
+  !   \param status O CTA_OK if successful
+  !
+  interface CTA_F90_String_Set
+    subroutine CTA_String_Set( hstring, str, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hstring
+      character(len=*)              , intent(in   )     ::  str
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_String_Set
+  end interface
+
+  !  \brief Get a copy of the string.
+  ! 
+  !   \note It is the responsibility of the caller making str large enough to
+  !         hold the string and trailing 0-character.
+  ! 
+  !   \param hstring  I  handle of the string
+  !   \param str      O  buffer that receives a copy of the string including trailing 0-character
+  !   \param status O CTA_OK if successful
+  !
+  interface CTA_F90_String_Get
+    subroutine CTA_String_Get( hstring, str, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hstring
+      character(len=*)              , intent(out  )     ::  str
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_String_Get
+  end interface
+
+  !  \brief Get the (scalar) value of a string
+  ! 
+  !   \note It is the responsibility of the caller that parameter value is large enough to
+  !         hold the value as specified by the datatype.
+  ! 
+  !   \param hstring  I  handle of the string
+  !   \param value    O  receives the value
+  !   \param datatype I  data type of value
+  !   \param status O CTA_OK if successful
+  !
+  !interface CTA_F90_String_GetValue
+  !  subroutine CTA_String_GetValue( hstring, value, datatype, status )
+  !    use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+  !    integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hstring
+  !    void                          , intent(out  )     ::  value
+  !    integer                       , intent(in   )     ::  datatype
+  !    integer                       , intent(out  )     ::  status
+  !  end subroutine CTA_String_GetValue
+  !end interface
+
+  !  \brief Create new string that is a concatination of existing strings.
+  ! 
+  !   \param istring  IO handle of the string (first string in concatination)
+  !                      and whole concatinated string on return
+  !   \param xstring  I  handle of the second string (extension string)
+  !   \param status O CTA_OK if successful
+  !
+  interface CTA_F90_String_Conc
+    subroutine CTA_String_Conc( istring, xstring, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  istring
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  xstring
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_String_Conc
+  end interface
+
+  !  \brief Imports string.
+  ! 
+  !   Supports: pack objects (usrdata must be handle of pack object to import from)
+  ! 
+  !   \param hstring  IO handle of the string
+  !   \param usrdata  I  configuration of import
+  !   \param status O CTA_OK if successful
+  ! 
+  !   \note Only CTA_Pack is currently supported fot usrdata
+  !
+  interface CTA_F90_String_Import
+    subroutine CTA_String_Import( hstring, usrdata, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hstring
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  usrdata
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_String_Import
+  end interface
+
+  !  \brief Exports length of string and string itself.
+  ! 
+  !   Supports: pack objects (usrdata must be handle of pack object to export to)
+  ! 
+  !   \param hstring  I  handle of the string
+  !   \param usrdata  IO configuration of export
+  !   \param status O CTA_OK if successful
+  ! 
+  !   \note Only CTA_Pack is currently supported fot usrdata
+  !
+  interface CTA_F90_String_Export
+    subroutine CTA_String_Export( hstring, usrdata, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hstring
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  usrdata
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_String_Export
+  end interface
+
+  !  \brief Check whether string is equal to COSTA string.
+  ! 
+  ! 
+  !   \param hstring  I  handle of the string
+  !   \param str0     I  string to compare hsting with
+  !   \param status O CTA_TRUE/CTA_FALSE
+  ! 
+  !   \note Only CTA_Pack is currently supported fot usrdata
+  !
+  interface CTA_F90_String_Equals_Char
+    subroutine CTA_String_Equals_Char( hstring, str0, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hstring
+      character(len=*)              , intent(in   )     ::  str0
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_String_Equals_Char
+  end interface
+
+  !  \brief Check whether two COSTA strings are equal.
+  ! 
+  ! 
+  !   \param hstring1  I  handle of first string
+  !   \param hstring2  I  handle of second string
+  !   \param status O CTA_TRUE/CTA_FALSE
+  ! 
+  !   \note Only CTA_Pack is currently supported fot usrdata
+  !
+  interface CTA_F90_Strings_Equal
+    subroutine CTA_Strings_Equal( hstring1, hstring2, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hstring1
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hstring2
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Strings_Equal
+  end interface
+
+  !  \brief Create a duplication of a COSTA string
+  ! 
+  !   \param hfrom  I  handle of string to copy
+  !   \param hto    O  handle of created string
+  !   \param status O CTA_OK if successful
+  !
+  interface CTA_F90_String_Duplicate
+    subroutine CTA_String_Duplicate( hfrom, hto, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hfrom
+      integer(CTA_HANDLE_IKIND)     , intent(out  )     ::  hto
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_String_Duplicate
+  end interface
+
+
+end module cta_f90_string
+
diff --git a/costa/native/cta_f90/generated/cta_f90_time.f90 b/costa/native/cta_f90/generated/cta_f90_time.f90
new file mode 100644
index 000000000..65a0b690e
--- /dev/null
+++ b/costa/native/cta_f90/generated/cta_f90_time.f90
@@ -0,0 +1,249 @@
+module cta_f90_time
+
+  implicit none
+
+  public
+
+  !  \brief Create a time object.
+  ! 
+  !  \param htime  O  receives handle of newly created time object
+  ! 
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Time_Create
+    subroutine CTA_Time_Create( htime, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(out  )     ::  htime
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Time_Create
+  end interface
+
+  !  \brief Free a time object.
+  ! 
+  !  \param htime    IO handle of time object to be freed, replaced by CTA_NULL on return.
+  ! 
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Time_Free
+    subroutine CTA_Time_Free( htime, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  htime
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Time_Free
+  end interface
+
+  !  \brief Set the time span.
+  ! 
+  !  \param htime    IO time object of which to set time span
+  !  \param tstart   I  starting time
+  !  \param tend     I  ending time
+  ! 
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Time_SetSpan
+    subroutine CTA_Time_SetSpan( htime, tstart, tend, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_TIME_RKIND
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  htime
+      real(CTA_TIME_RKIND)          , intent(in   )     ::  tstart
+      real(CTA_TIME_RKIND)          , intent(in   )     ::  tend
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Time_SetSpan
+  end interface
+
+  !  \brief Get the time span.
+  ! 
+  !  \param htime    I  time object of which to get time span
+  !  \param tstart   O  receives the starting time
+  !  \param tend     O  receives ending time
+  ! 
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Time_GetSpan
+    subroutine CTA_Time_GetSpan( htime, tstart, tend, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_TIME_RKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  htime
+      real(CTA_TIME_RKIND)          , intent(out  )     ::  tstart
+      real(CTA_TIME_RKIND)          , intent(out  )     ::  tend
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Time_GetSpan
+  end interface
+
+  !  \brief Set the time step.
+  ! 
+  !  \param htime    IO time object of which to set time step
+  !  \param tstep    I  new time step
+  ! 
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Time_SetStep
+    subroutine CTA_Time_SetStep( htime, tstep, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_TIME_RKIND
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  htime
+      real(CTA_TIME_RKIND)          , intent(in   )     ::  tstep
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Time_SetStep
+  end interface
+
+  !  \brief Get time step.
+  ! 
+  !  \param htime    IO time object of which to get time step
+  !  \param tstep    O  receives time step
+  ! 
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Time_GetStep
+    subroutine CTA_Time_GetStep( htime, tstep, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_TIME_RKIND
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  htime
+      real(CTA_TIME_RKIND)          , intent(out  )     ::  tstep
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Time_GetStep
+  end interface
+
+  !  \brief Count number of timesteps in time
+  ! 
+  !  \param htime    I time object (see function description)
+  !  \param nsteps   O number of timesteps
+  ! 
+  !  \param status O error status: CTA_OK if successful
+  ! 
+  !  \note number of steps is rounded to nearest integer
+  !
+  interface CTA_F90_Time_CountSteps
+    subroutine CTA_Time_CountSteps( htime, nsteps, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  htime
+      integer                       , intent(out  )     ::  nsteps
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Time_CountSteps
+  end interface
+
+  !  \brief Get interval of i-th step
+  ! 
+  !  \param htime    I time object (see function description)
+  !  \param istep    I interval of step
+  !  \param hstep    O time step of model
+  ! 
+  !  \param status O error status: CTA_OK if successful
+  ! 
+  !  \note intervals are counted from 1 to nsteps
+  !
+  interface CTA_F90_Time_GetTimeStep
+    subroutine CTA_Time_GetTimeStep( htime, istep, hstep, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  htime
+      integer                       , intent(in   )     ::  istep
+      integer(CTA_HANDLE_IKIND)     , intent(out  )     ::  hstep
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Time_GetTimeStep
+  end interface
+
+  !  \brief Check whether htimesub is within time span of htime.
+  ! 
+  !  \param htimesub I time object (see function description)
+  !  \param htime    I time object (see function description)
+  !  \param inspan   O receives TRUE if htimesub is within time span of htime or FALSE otherwise
+  ! 
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Time_InSpan
+    subroutine CTA_Time_InSpan( htimesub, htime, inspan, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  htimesub
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  htime
+      integer                       , intent(out  )     ::  inspan
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Time_InSpan
+  end interface
+
+  !  \brief Check whether time step of time object equals t
+  ! 
+  !  \param htime    I time object
+  !  \param t        I time step to compare
+  !  \param isstep   O receives TRUE if t equals time step of time object or FALSE otherwise
+  ! 
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Time_IsStep
+    subroutine CTA_Time_IsStep( htime, t, isstep, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_TIME_RKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  htime
+      real(CTA_TIME_RKIND)          , intent(in   )     ::  t
+      integer                       , intent(out  )     ::  isstep
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Time_IsStep
+  end interface
+
+  !  \brief Copy a time object.
+  ! 
+  !  \param hfrom    I time object to copy from
+  !  \param hto      O handle of time object that receives copy, must exist before calling
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Time_Copy
+    subroutine CTA_Time_Copy( hfrom, hto, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hfrom
+      integer(CTA_HANDLE_IKIND)     , intent(out  )     ::  hto
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Time_Copy
+  end interface
+
+  !  \brief Export a time object.
+  !  exports the whole internal state of the time object to given target
+  !  CTA_FILE will export the time component in a MATLAB/OCTAVE readable form
+  !  CTA_PACK will pack the content
+  ! 
+  !  \param htime    I time object to export
+  !  \param hexport  I target for export (CTA_FILE or CTA_PACK)
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Time_Export
+    subroutine CTA_Time_Export( htime, hexport, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  htime
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hexport
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Time_Export
+  end interface
+
+  !  \brief Import a time object.
+  !  imports the whole internal state of the time object from given source
+  ! 
+  !  \param htime    I time object to import to
+  !  \param himport  I source of import (CTA_PACK)
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Time_Import
+    subroutine CTA_Time_Import( htime, himport, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  htime
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  himport
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Time_Import
+  end interface
+
+  !  \brief Returns whether time object describes an timespan or a single
+  !  instance.
+  ! 
+  !  \param htime    I time object to import to
+  !  \param isspan   O time object is a time timespan (CTA_TRUE/CTA_FALSE)
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Time_IsSpan
+    subroutine CTA_Time_IsSpan( htime, isspan, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  htime
+      integer                       , intent(out  )     ::  isspan
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Time_IsSpan
+  end interface
+
+
+end module cta_f90_time
+
diff --git a/costa/native/cta_f90/generated/cta_f90_tree.f90 b/costa/native/cta_f90/generated/cta_f90_tree.f90
new file mode 100644
index 000000000..c79fae22e
--- /dev/null
+++ b/costa/native/cta_f90/generated/cta_f90_tree.f90
@@ -0,0 +1,242 @@
+module cta_f90_tree
+
+  implicit none
+
+  public
+
+  !  \brief Create a new COSTA tree instance
+  ! 
+  !   \note
+  ! 
+  !   \param htree  O  receives handle of created tree
+  !   \param status O CTA_OK if successful
+  !
+  interface CTA_F90_Tree_Create
+    subroutine CTA_Tree_Create( htree, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(out  )     ::  htree
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Tree_Create
+  end interface
+
+  !  \brief Free the COSTA tree instance
+  ! 
+  !   \note
+  ! 
+  !   \param htree  IO handle of the tree instance, replaced by CTA_NULL on return
+  !   \param status O CTA_OK if successful
+  !
+  interface CTA_F90_Tree_Free
+    subroutine CTA_Tree_Free( htree, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  htree
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Tree_Free
+  end interface
+
+  !  \brief Add a COSTA handle to the COSTA tree
+  ! 
+  !   \note
+  ! 
+  !   \param htree  IO handle of the tree object (parent)
+  !   \param name   I  name of the COSTA item
+  !   \param hitem  I  handle of the COSTA item to add (do not free the object after adding it to the tree)
+  !   \param status O CTA_OK if successful
+  !
+  interface CTA_F90_Tree_AddHandle
+    subroutine CTA_Tree_AddHandle( htree, name, hitem, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  htree
+      character(len=*)              , intent(in   )     ::  name
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hitem
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Tree_AddHandle
+  end interface
+
+  !  \brief Count the number of COSTA handles specified by the given path.
+  ! 
+  !   \param htree  I  handle of the tree object
+  !   \param path   I  path of the item, separated by / or \\
+  !   \param count  O  receives the number of items found
+  !   \param status O CTA_OK if successful or CTA_ITEM_NOT_FOUND in case of not found
+  !
+  interface CTA_F90_Tree_CountHandles
+    subroutine CTA_Tree_CountHandles( htree, path, count, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  htree
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  path
+      integer                       , intent(out  )     ::  count
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Tree_CountHandles
+  end interface
+
+  !  \brief Count the number of COSTA handles specified by the given path.
+  ! 
+  !   \param htree  I  handle of the tree object
+  !   \param path   I  path of the item, separated by / or \
+  !   \param count  O  receives the number of items found
+  !   \param status O CTA_OK if successful or CTA_ITEM_NOT_FOUND in case of not found
+  !
+  interface CTA_F90_Tree_CountHandlesStr
+    subroutine CTA_Tree_CountHandlesStr( htree, path, count, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  htree
+      character(len=*)              , intent(in   )     ::  path(*)
+      integer                       , intent(out  )     ::  count
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Tree_CountHandlesStr
+  end interface
+
+  !  \brief Get a COSTA handle from the COSTA tree (by path)
+  ! 
+  !   \note In case of trees with default values, returns the default value.
+  !   \note The returned handle must not be freed.
+  ! 
+  !   \param htree  I  handle of the tree object
+  !   \param path   I  path of the item, separated by / or \\
+  !   \param hitem  O  receives the handle of the COSTA item, or CTA_NULL in case not found, do not free this handle.
+  !   \param status O CTA_OK if successful or CTA_ITEM_NOT_FOUND in case of not found
+  !
+  interface CTA_F90_Tree_GetHandle
+    subroutine CTA_Tree_GetHandle( htree, path, hitem, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  htree
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  path
+      integer(CTA_HANDLE_IKIND)     , intent(out  )     ::  hitem
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Tree_GetHandle
+  end interface
+
+  !  \brief Get the value of a COSTA handle from the COSTA tree (by path)
+  ! 
+  !   \note In case of trees with default values, returns the default value.
+  ! 
+  !   \param htree    I  handle of the tree object
+  !   \param path     I  COSTA string describing path of the item, separated by / or \
+  !   \param value    O  receives the value of the COSTA item, or CTA_NULL in case of not found
+  !   \param datatype I  data type of parameter value, must be the same as item in tree
+  !   \param status O CTA_OK if successful or CTA_ITEM_NOT_FOUND in case of not found
+  !
+  !interface CTA_F90_Tree_GetValue
+  !  subroutine CTA_Tree_GetValue( htree, path, value, datatype, status )
+  !    use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+  !    integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  htree
+  !    integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  path
+  !    void                          , intent(out  )     ::  value
+  !    integer                       , intent(in   )     ::  datatype
+  !    integer                       , intent(out  )     ::  status
+  !  end subroutine CTA_Tree_GetValue
+  !end interface
+
+  !  \brief Get a COSTA handle from the COSTA tree (by path)
+  ! 
+  !   \note In case of trees with default values, returns the default value.
+  !   \note The returned handle must not be freed.
+  ! 
+  !   \param htree  I  handle of the tree object
+  !   \param str    I  C string describing path of the item, separated by / or \
+  !   \param hitem  O  receives the handle of the COSTA item, or CTA_NULL in case of not found, do not free this handle
+  !   \param status O CTA_OK if successful or CTA_ITEM_NOT_FOUND in case of not found
+  !
+  interface CTA_F90_Tree_GetHandleStr
+    subroutine CTA_Tree_GetHandleStr( htree, str, hitem, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  htree
+      character(len=*)              , intent(in   )     ::  str
+      integer(CTA_HANDLE_IKIND)     , intent(out  )     ::  hitem
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Tree_GetHandleStr
+  end interface
+
+  !  \brief Get the value of a COSTA handle from the COSTA tree (by path)
+  ! 
+  !   \note In case of trees with default values, returns the default value.
+  ! 
+  !   \param htree    I  handle of the tree instance
+  !   \param str      I  C string describing path of the item, separated by / or \
+  !   \param value    O  receives the value of the COSTA item, or CTA_NULL in case of not found
+  !   \param datatype I  data type of the value specified
+  !   \param status O CTA_OK if successful or CTA_ITEM_NOT_FOUND in case of not found
+  !
+  !interface CTA_F90_Tree_GetValueStr
+  !  subroutine CTA_Tree_GetValueStr( htree, str, value, datatype, status )
+  !    use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+  !    integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  htree
+  !    character(len=*)              , intent(in   )     ::  str
+  !    void                          , intent(out  )     ::  value
+  !    integer                       , intent(in   )     ::  datatype
+  !    integer                       , intent(out  )     ::  status
+  !  end subroutine CTA_Tree_GetValueStr
+  !end interface
+
+  !  \brief Count the number of elements on the current level of the COSTA tree
+  ! 
+  !   \param htree  I  handle of the tree level
+  !   \param count  O  receives the number of elements on the current tree level
+  !   \param status O CTA_OK if successful
+  !
+  interface CTA_F90_Tree_CountItems
+    subroutine CTA_Tree_CountItems( htree, count, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  htree
+      integer                       , intent(out  )     ::  count
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Tree_CountItems
+  end interface
+
+  !  \brief Get a handle (by index) on the current level of the COSTA tree
+  ! 
+  !   \param htree  I  handle of the tree level
+  !   \param index  I  index of the item to return, 1 <= index <= CTA_Tree_CountItems()
+  !   \param hitem  O  receives handle of the item at given index
+  !   \param status O CTA_OK if successful
+  !
+  interface CTA_F90_Tree_GetItem
+    subroutine CTA_Tree_GetItem( htree, index, hitem, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  htree
+      integer                       , intent(in   )     ::  index
+      integer(CTA_HANDLE_IKIND)     , intent(out  )     ::  hitem
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Tree_GetItem
+  end interface
+
+  !  \brief Get the value of a COSTA handle from the COSTA tree (by index)
+  ! 
+  !   \note In case of trees with default values, returns the default value.
+  ! 
+  !   \param htree    I  handle of the tree instance
+  !   \param index    I  index of the item
+  !   \param value    O  receives value of the COSTA item, or CTA_NULL in case of not found
+  !   \param datatype I  data type of the value specified
+  !   \param status O CTA_OK if successful or CTA_ITEM_NOT_FOUND in case not found
+  !
+  !interface CTA_F90_Tree_GetItemValue
+  !  subroutine CTA_Tree_GetItemValue( htree, index, value, datatype, status )
+  !    use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+  !    integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  htree
+  !    integer                       , intent(in   )     ::  index
+  !    void                          , intent(out  )     ::  value
+  !    integer                       , intent(in   )     ::  datatype
+  !    integer                       , intent(out  )     ::  status
+  !  end subroutine CTA_Tree_GetItemValue
+  !end interface
+
+  !  \brief Print a COSTA tree to STDOUT
+  ! 
+  !   \note
+  ! 
+  !   \param htree  I  handle of the tree
+  !   \param status O CTA_OK if successful
+  !
+  interface CTA_F90_Tree_Print
+    subroutine CTA_Tree_Print( htree, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  htree
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Tree_Print
+  end interface
+
+
+end module cta_f90_tree
+
diff --git a/costa/native/cta_f90/generated/cta_f90_treevector.f90 b/costa/native/cta_f90/generated/cta_f90_treevector.f90
new file mode 100644
index 000000000..d22ee5e85
--- /dev/null
+++ b/costa/native/cta_f90/generated/cta_f90_treevector.f90
@@ -0,0 +1,1146 @@
+module cta_f90_treevector
+
+  implicit none
+
+  public
+
+  !  \brief Create a tree-vector.
+  ! 
+  !  \param name    I  name of the tree-vector, this is a human readable
+  !                    string used for (debug) output and not by the algorithms
+  !                    itself
+  !  \param tag     I  tag of this tree-vector
+  !  \param treevec  O new tree-vector
+  ! 
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_TreeVector_Create
+    subroutine CTA_TreeVector_Create( name, tag, treevec, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      character(len=*)              , intent(in   )     ::  name
+      character(len=*)              , intent(in   )     ::  tag
+      integer(CTA_HANDLE_IKIND)     , intent(out  )     ::  treevec
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_TreeVector_Create
+  end interface
+
+  !  \brief Duplicate a tree-vector.
+  ! 
+  !  \note Duplication means that a new tree-vector is created that is identical to
+  !  the originating tree-vector. All data in the original tree-vector is also copied.
+  ! 
+  !  \param treevec1  I  handle of treevector to be duplicated
+  !  \param treevec2  O  receives handle to duplicate
+  ! 
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_TreeVector_Duplicate
+    subroutine CTA_TreeVector_Duplicate( treevec1, treevec2, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  treevec1
+      integer(CTA_HANDLE_IKIND)     , intent(out  )     ::  treevec2
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_TreeVector_Duplicate
+  end interface
+
+  !  \brief Define a tree-vector to be a concatination of other tree-vectors.
+  ! 
+  !  \note The concatenation is done by reference (handle). The sub-tree-vectors that
+  !        are concatenated are not copied.
+  ! 
+  !  \param treevec1  I  tree-vector that will be concatenation of the sub-tree-vectors provided in parameter treevecs
+  !  \param treevecs  I  array of the sub-tree-vectors
+  !  \param ntreevecs I  number of sub-tree-vectors in treevecs
+  ! 
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_TreeVector_Conc
+    subroutine CTA_TreeVector_Conc( treevec1, treevecs, ntreevecs, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  treevec1
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  treevecs(*)
+      integer                       , intent(in   )     ::  ntreevecs
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_TreeVector_Conc
+  end interface
+
+  !  \brief Get the handle of a sub-tree-vectors using its tag.
+  ! 
+  !  \note This is done by reference (handle). The handle of the
+  !        returned sub-tree-vector is not a copy
+  ! 
+  !  \param treevec   I  Tree-vector
+  !  \param tag       I  tag of the requested sub-tree-vector
+  !  \param subtreevec O  receives handle of the requested sub-tree-vectors, this is by reference, not a copy
+  ! 
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_TreeVector_GetSubTreeVec
+    subroutine CTA_TreeVector_GetSubTreeVec( treevec, tag, subtreevec, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  treevec
+      character(len=*)              , intent(in   )     ::  tag
+      integer(CTA_HANDLE_IKIND)     , intent(out  )     ::  subtreevec
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_TreeVector_GetSubTreeVec
+  end interface
+
+  !  \brief Get the tag of a sub-tree-vector using its index (starting with 0).
+  ! 
+  !  \param treevec   I  Tree-vector
+  !  \param index     I  index of the requested sub-tree-vector
+  !  \param tag       O  String of standard length containnig the tag
+  ! 
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_TreeVector_GetSubTreeVecId
+    subroutine CTA_TreeVector_GetSubTreeVecId( treevec, index, tag, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  treevec
+      integer                       , intent(in   )     ::  index
+      character(len=*)              , intent(out  )     ::  tag(*)
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_TreeVector_GetSubTreeVecId
+  end interface
+
+  !  \brief Get the handle of a first-layer sub-tree-vector using its index.
+  ! 
+  !  \note The concatination is done by reference (handle). The handle of the
+  !        returned sub-tree-vector is not a copy
+  ! 
+  !  \param treevec   I  Tree-vector
+  !  \param index     I  index of requested sub-tree-vector. Note that the first sub-tree-vector has index 1.
+  !  \param subtreevec O  receives handle of the requested sub-tree-vector, this is by reference, not a copy
+  ! 
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_TreeVector_GetSubTreeVecIndex
+    subroutine CTA_TreeVector_GetSubTreeVecIndex( treevec, index, subtreevec, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  treevec
+      integer                       , intent(in   )     ::  index
+      integer(CTA_HANDLE_IKIND)     , intent(out  )     ::  subtreevec
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_TreeVector_GetSubTreeVecIndex
+  end interface
+
+  !  \brief Get number of sub-treevectors
+  ! 
+  !  \note In case of a leaf 0 is returned
+  ! 
+  !  \param treevec      I  Tree-vector
+  !  \param numSubTrees  O  Number of sub-treevectors
+  ! 
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_TreeVector_GetNumSubTree
+    subroutine CTA_TreeVector_GetNumSubTree( treevec, numSubTrees, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  treevec
+      integer                       , intent(out  )     ::  numSubTrees
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_TreeVector_GetNumSubTree
+  end interface
+
+  !  \brief Get the tag of the tree-vector.
+  ! 
+  !  Note tag should be large enough to hold the result
+  !       length of CTA_STRLEN_TAG is always save (no internal protection)
+  ! 
+  !  \param treevec   I  Tree-vector
+  !  \param tag       O  receives the tag of the requested sub-tree-vector (see note)
+  ! 
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_TreeVector_GetTag
+    subroutine CTA_TreeVector_GetTag( treevec, tag, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  treevec
+      character(len=*)              , intent(out  )     ::  tag
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_TreeVector_GetTag
+  end interface
+
+  !  \brief Set the values of the tree-vector
+  ! 
+  !  \note This operation is only possible when all data elements in the tree-vector
+  !        are of the same type and the size of the tree-vector corresponds to the
+  !        size of the input vector.
+  ! 
+  !  \param treevec   IO TreeVector
+  !  \param hvec      I  handle of the vector containing new values (see note)
+  ! 
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_TreeVector_SetVec
+    subroutine CTA_TreeVector_SetVec( treevec, hvec, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  treevec
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hvec
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_TreeVector_SetVec
+  end interface
+
+  !  \brief Get the values of the tree-vector.
+  ! 
+  !  \note This operation is only possible when all data elements in the tree-vector
+  !        are of the same type and the size of the tree-vector corresponds to the
+  !        vector size.
+  ! 
+  !  \param treevec   I  Tree-vector
+  !  \param hvec      O  Vector that is receiving the values; must exist before calling
+  ! 
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_TreeVector_GetVec
+    subroutine CTA_TreeVector_GetVec( treevec, hvec, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  treevec
+      integer(CTA_HANDLE_IKIND)     , intent(out  )     ::  hvec
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_TreeVector_GetVec
+  end interface
+
+  !  \brief Axpy operation between two tree-vectors.
+  ! 
+  !  \note  Axpy: y=alpha*x+y. Add alpha times tree-vector x to
+  !               this tree-vector (y).
+  ! 
+  !  \param y         IO Tree-vector (y)
+  !  \param alpha     I  scalar
+  !  \param x         I  Tree-vector (x)
+  ! 
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_TreeVector_Axpy
+    subroutine CTA_TreeVector_Axpy( y, alpha, x, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  y
+      real(8)                       , intent(in   )     ::  alpha
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  x
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_TreeVector_Axpy
+  end interface
+
+  !  \brief Compute dot product of two tree-vectors.
+  ! 
+  !  \note  dotprod = sum[all i]  (treevec1_i * treevec2_i)
+  ! 
+  !  \param treevec1  I  first tree-vector
+  !  \param treevec2  I  second tree-vector
+  !  \param dotprod   O  receives the dot product
+  ! 
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_TreeVector_Dot
+    subroutine CTA_TreeVector_Dot( treevec1, treevec2, dotprod, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  treevec1
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  treevec2
+      real(8)                       , intent(out  )     ::  dotprod
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_TreeVector_Dot
+  end interface
+
+  !  \brief Compute the 2-norm of a tree-vector.
+  ! 
+  !  \param treevec1  I  Tree-vector
+  !  \param nrm2      O  receives the 2-norm
+  ! 
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_TreeVector_Nrm2
+    subroutine CTA_TreeVector_Nrm2( treevec1, nrm2, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  treevec1
+      real(8)                       , intent(out  )     ::  nrm2
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_TreeVector_Nrm2
+  end interface
+
+  !  \brief Copy a tree-vector
+  ! 
+  !  \note  The two tree-vectors must be compatible: same structure and datatypes.
+  ! 
+  !  \param treevec1   I  sending tree-vector
+  !  \param treevec2   O  receiving tree-vector
+  ! 
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_TreeVector_Copy
+    subroutine CTA_TreeVector_Copy( treevec1, treevec2, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  treevec1
+      integer(CTA_HANDLE_IKIND)     , intent(out  )     ::  treevec2
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_TreeVector_Copy
+  end interface
+
+  !  \brief Set whole tree-vector equal to a constant value.
+  ! 
+  !  \note  This method can only be used if all elements of the tree-vector
+  !         have the same data type.
+  ! 
+  !  \param treevec  IO TreeVector
+  !  \param val      I  value to set
+  !  \param datatype I  data type of val, must be same as data type of tree-vector
+  ! 
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_TreeVector_SetConstant
+    module procedure CTA_TreeVector_SetConstant_integer
+    module procedure CTA_TreeVector_SetConstant_real4
+    module procedure CTA_TreeVector_SetConstant_real8
+  end interface
+
+  !  \brief Scale tree-vector.
+  ! 
+  !  \param treevec   IO handle of tree-vector
+  !  \param alpha    I  scalar
+  ! 
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_TreeVector_Scal
+    subroutine CTA_TreeVector_Scal( treevec, alpha, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  treevec
+      real(8)                       , intent(in   )     ::  alpha
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_TreeVector_Scal
+  end interface
+
+  !  \brief Set all values of the tree-vector.
+  ! 
+  !  \note  This method can only be used if all elements of the tree-vector
+  !         are of the same data type.
+  ! 
+  !  \param treevec  IO Tree-vector
+  !  \param val      I  values to be set
+  !  \param nval     I  number of elements in val
+  !  \param datatype I  data type of *val, must be the same as data type of elements in tree-vector
+  ! 
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_TreeVector_SetVals
+    module procedure CTA_TreeVector_SetVals_integer_1d
+    module procedure CTA_TreeVector_SetVals_integer_2d
+    module procedure CTA_TreeVector_SetVals_integer_3d
+    module procedure CTA_TreeVector_SetVals_integer_4d
+    module procedure CTA_TreeVector_SetVals_integer_5d
+    module procedure CTA_TreeVector_SetVals_integer_6d
+    module procedure CTA_TreeVector_SetVals_integer_7d
+    module procedure CTA_TreeVector_SetVals_real4_1d
+    module procedure CTA_TreeVector_SetVals_real4_2d
+    module procedure CTA_TreeVector_SetVals_real4_3d
+    module procedure CTA_TreeVector_SetVals_real4_4d
+    module procedure CTA_TreeVector_SetVals_real4_5d
+    module procedure CTA_TreeVector_SetVals_real4_6d
+    module procedure CTA_TreeVector_SetVals_real4_7d
+    module procedure CTA_TreeVector_SetVals_real8_1d
+    module procedure CTA_TreeVector_SetVals_real8_2d
+    module procedure CTA_TreeVector_SetVals_real8_3d
+    module procedure CTA_TreeVector_SetVals_real8_4d
+    module procedure CTA_TreeVector_SetVals_real8_5d
+    module procedure CTA_TreeVector_SetVals_real8_6d
+    module procedure CTA_TreeVector_SetVals_real8_7d
+  end interface
+
+  !  \brief Get all values of the tree-vector.
+  ! 
+  !  \note  This method can only be used if all elements of the tree-vector
+  !         are of the same data type.
+  ! 
+  !  \param treevec  I  Tree-vector
+  !  \param val      O  receives the values
+  !  \param nval     I  number of elements in val
+  !  \param datatype I  data type of *val, must be the same as data type of elements in tree-vector
+  ! 
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_TreeVector_GetVals
+    module procedure CTA_TreeVector_GetVals_integer_1d
+    module procedure CTA_TreeVector_GetVals_integer_2d
+    module procedure CTA_TreeVector_GetVals_integer_3d
+    module procedure CTA_TreeVector_GetVals_integer_4d
+    module procedure CTA_TreeVector_GetVals_integer_5d
+    module procedure CTA_TreeVector_GetVals_integer_6d
+    module procedure CTA_TreeVector_GetVals_integer_7d
+    module procedure CTA_TreeVector_GetVals_real4_1d
+    module procedure CTA_TreeVector_GetVals_real4_2d
+    module procedure CTA_TreeVector_GetVals_real4_3d
+    module procedure CTA_TreeVector_GetVals_real4_4d
+    module procedure CTA_TreeVector_GetVals_real4_5d
+    module procedure CTA_TreeVector_GetVals_real4_6d
+    module procedure CTA_TreeVector_GetVals_real4_7d
+    module procedure CTA_TreeVector_GetVals_real8_1d
+    module procedure CTA_TreeVector_GetVals_real8_2d
+    module procedure CTA_TreeVector_GetVals_real8_3d
+    module procedure CTA_TreeVector_GetVals_real8_4d
+    module procedure CTA_TreeVector_GetVals_real8_5d
+    module procedure CTA_TreeVector_GetVals_real8_6d
+    module procedure CTA_TreeVector_GetVals_real8_7d
+  end interface
+
+  !  \brief Set single value of the tree-vector.
+  ! 
+  !  \param treevec  IO Tree-Vector
+  !  \param i        I  index of value in tree-vector
+  !  \param val      I  value to be set
+  !  \param datatype I  data type of *val, must be the same as data type of element in tree-vector
+  ! 
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_TreeVector_SetVal
+    module procedure CTA_TreeVector_SetVal_integer
+    module procedure CTA_TreeVector_SetVal_real4
+    module procedure CTA_TreeVector_SetVal_real8
+  end interface
+
+  !  \brief Get single value of the tree-vector.
+  ! 
+  !  \param treevec  I  Tree-vector
+  !  \param i        I  index in value in tree-vector
+  !  \param val      O  returned value
+  !  \param datatype I  data type of *val, must be the same as data type of element in tree-vector
+  ! 
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_TreeVector_GetVal
+    module procedure CTA_TreeVector_GetVal_integer
+    module procedure CTA_TreeVector_GetVal_real4
+    module procedure CTA_TreeVector_GetVal_real8
+  end interface
+
+  !  \brief Get size of tree-vector.
+  ! 
+  !  \param treevec  I  Tree-vector
+  !  \param n        O  receives size of tree-vector
+  ! 
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_TreeVector_GetSize
+    subroutine CTA_TreeVector_GetSize( treevec, n, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  treevec
+      integer                       , intent(out  )     ::  n
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_TreeVector_GetSize
+  end interface
+
+  !  \brief Export tree-vector.
+  ! 
+  !  Can export tree-vector to file or pack object.\n
+  !  usrdata must contain a handle of the file or pack object to be used.\n
+  !  Dependency: CTA_Vector_Export()
+  ! 
+  ! 
+  !  \param treevec  I  Tree-vector
+  !  \param usrdata  I  export properties
+  ! 
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_TreeVector_Export
+    subroutine CTA_TreeVector_Export( treevec, usrdata, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  treevec
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  usrdata
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_TreeVector_Export
+  end interface
+
+  !  \brief Import Tree-vector.
+  ! 
+  !  Can import tree-vector from file or pack object.\n
+  !  usrdata must contain a handle of the file or pack object to be used.\n
+  !  Dependency: CTA_Vector_Import()
+  ! 
+  ! 
+  !  \param treevec  I  Tree-vector
+  !  \param usrdata  I  import properties
+  ! 
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_TreeVector_Import
+    subroutine CTA_TreeVector_Import( treevec, usrdata, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  treevec
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  usrdata
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_TreeVector_Import
+  end interface
+
+  !  \brief Import Tree-vector as flat vector.
+  ! 
+  !  Can import tree-vector from netcdf file.\n
+  !  usrdata must contain a handle of the file .\n
+  !  Dependency: CTA_Vector_VImport()
+  ! 
+  ! 
+  !  \param treevec  I  Tree-vector
+  !  \param usrdata  I  import properties
+  ! 
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_TreeVector_VImport
+    subroutine CTA_TreeVector_VImport( treevec, usrdata, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  treevec
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  usrdata
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_TreeVector_VImport
+  end interface
+
+  !  \brief Free Tree-vector.
+  ! 
+  !  \param treevec     I  handle of tree-vector
+  !  \param recursive  I  also free all sub-tree-vectors, yes: CTA_TRUE or no: CTA_FALSE
+  ! 
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_TreeVector_Free
+    subroutine CTA_TreeVector_Free( treevec, recursive, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  treevec(*)
+      integer                       , intent(in   )     ::  recursive
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_TreeVector_Free
+  end interface
+
+  !  \brief Print tree-vector information.
+  ! 
+  !  Gives following information:\n\n
+  !   Tree-vector information:\n
+  !   tag: [tag]\n
+  !   nsubtreevecs: [number of sub-tree-vectors]\n
+  ! 
+  !  If nsubtreevecs > 0: recursively prints all sub-tree-vectors
+  !  Else prints:\n
+  !  leaf: yes\n
+  !  tree-vector size (leaf)
+  ! 
+  !  \param treevec     I  tree-vector
+  ! 
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_TreeVector_Info
+    subroutine CTA_TreeVector_Info( treevec, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  treevec
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_TreeVector_Info
+  end interface
+
+  !  \brief Perform the matrix multiplication C:=alpha*op(A)*op(B)+beta*C
+  ! where op(X)=X, X^T. However C and A are matrices of wich the columns are
+  ! tree-vectors
+  ! 
+  !   \param sC     IO array of tree-vector (matrix C)
+  !   \param nc     I  number of columns of C (dimension of sC)
+  !   \param transa I  transpose flag CTA_TRUE/CTA_FALSE for matrix A (not supported)
+  !   \param transb I  transpose flag CTA_TRUE/CTA_FALSE for matrix B
+  !   \param alpha  I  scalar
+  !   \param sA     I  handle of matrix A
+  !   \param na     I  number of columns of A (dimension of sA)
+  !   \param mB     I  handle of matrix B
+  !   \param beta   I  scalar
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_TreeVector_Gemm
+    subroutine CTA_TreeVector_Gemm( sC, nc, transa, transb, alpha, sA, na, mB, beta, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  sC
+      integer                       , intent(in   )     ::  nc
+      integer                       , intent(in   )     ::  transa
+      integer                       , intent(in   )     ::  transb
+      real(8)                       , intent(in   )     ::  alpha
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  sA(*)
+      integer                       , intent(in   )     ::  na
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  mB
+      real(8)                       , intent(in   )     ::  beta
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_TreeVector_Gemm
+  end interface
+
+  !  \brief Perform given operation on all leafs of the treevector
+  ! 
+  !   \param treevec1 I  handle of first COSTA tree-vector
+  !   \param treevec2 I  handle of second COSTA tree-vector
+  !   \param treevec  I  handle of a COSTA tree-vector
+  !   \param op       I  operation to perform on the leafs
+  !   \param arg      I  additional argument of operation
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_TreeVector_OpOnLeafs
+    subroutine CTA_TreeVector_OpOnLeafs( treevec1, treevec2, op, arg, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  treevec1
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  treevec2
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  op
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  arg
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_TreeVector_OpOnLeafs
+  end interface
+
+  !  \brief Elementwise division of two vectors
+  !   \note y:=y./x
+  ! 
+  !   \param y       I  handle of a COSTA tree-vector (y)
+  !   \param x       I  handle of a COSTA tree-vector (y)
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_TreeVector_ElmDiv
+    subroutine CTA_TreeVector_ElmDiv( y, x, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  y
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  x
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_TreeVector_ElmDiv
+  end interface
+
+  !  \brief Elementwise multiplication of two vectors
+  !   \note y:=y.*x
+  ! 
+  !   \param y       I  handle of a COSTA tree-vector (y)
+  !   \param x       I  handle of a COSTA tree-vector (y)
+  !   \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_TreeVector_ElmProd
+    subroutine CTA_TreeVector_ElmProd( y, x, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  y
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  x
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_TreeVector_ElmProd
+  end interface
+
+  !  \brief Elementwise sqare root
+  !   \note y:=sqrt(y)
+  ! 
+  !   \param y       I  handle of a COSTA tree-vector (y)
+  !   \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_TreeVector_ElmSqrt
+    subroutine CTA_TreeVector_ElmSqrt( y, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  y
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_TreeVector_ElmSqrt
+  end interface
+
+  !  \brief Set nocompute flag of a sub-tree vector
+  ! 
+  !   When this flag is set, the values of the sub-treevector will
+  !   be ignored in all basic vector operations (including asking the
+  !   total length of the tree-vector). This propertie is used for
+  !   additionally adding some meta information
+  ! 
+  !  \note the nocompute flag is set at the level of the parent!
+  !  so the "isolated" sub-treevector can be used in basic vector
+  !  operations.
+  ! 
+  !   \param x       I  handle of a COSTA tree-vector (y)
+  !   \param tag     I  tag of sub-treevector
+  !   \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_TreeVector_SetSubTreeNocompute
+    subroutine CTA_TreeVector_SetSubTreeNocompute( x, tag, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  x
+      character(len=*)              , intent(in   )     ::  tag
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_TreeVector_SetSubTreeNocompute
+  end interface
+
+  !  \brief Increase the reference count of a treevector and all subtrevectors
+  ! 
+  !  \param treevec  I handle of a COSTA tree-vector
+  !   \param status O error status: CTA_OK if successful
+  ! 
+  !
+  interface CTA_F90_TreeVector_IncRefCount
+    subroutine CTA_TreeVector_IncRefCount( treevec, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  treevec
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_TreeVector_IncRefCount
+  end interface
+
+
+contains
+
+    subroutine CTA_TreeVector_SetConstant_integer( treevec, val, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_INTEGER
+      use CTA_F90_Parameters, only : CTA_OK
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  treevec
+      integer                       , intent(in   )     ::  val
+      integer                       , intent(out  )     ::  status
+      call CTA_TreeVector_SetConstant( treevec, val, CTA_INTEGER, status )
+    end subroutine CTA_TreeVector_SetConstant_integer
+
+    subroutine CTA_TreeVector_SetConstant_real4( treevec, val, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_REAL
+      use CTA_F90_Parameters, only : CTA_OK
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  treevec
+      real(4)                       , intent(in   )     ::  val
+      integer                       , intent(out  )     ::  status
+      call CTA_TreeVector_SetConstant( treevec, val, CTA_REAL, status )
+    end subroutine CTA_TreeVector_SetConstant_real4
+
+    subroutine CTA_TreeVector_SetConstant_real8( treevec, val, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_DOUBLE
+      use CTA_F90_Parameters, only : CTA_OK
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  treevec
+      real(8)                       , intent(in   )     ::  val
+      integer                       , intent(out  )     ::  status
+      call CTA_TreeVector_SetConstant( treevec, val, CTA_DOUBLE, status )
+    end subroutine CTA_TreeVector_SetConstant_real8
+
+    subroutine CTA_TreeVector_SetVals_integer_1d( treevec, val, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_INTEGER
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  treevec
+      integer                       , intent(in   )     ::  val(:)
+      integer                       , intent(out  )     ::  status
+      call CTA_TreeVector_SetVals( treevec, val, size(val), CTA_INTEGER, status )
+    end subroutine CTA_TreeVector_SetVals_integer_1d
+
+    subroutine CTA_TreeVector_SetVals_integer_2d( treevec, val, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_INTEGER
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  treevec
+      integer                       , intent(in   )     ::  val(:,:)
+      integer                       , intent(out  )     ::  status
+      call CTA_TreeVector_SetVals( treevec, val, size(val), CTA_INTEGER, status )
+    end subroutine CTA_TreeVector_SetVals_integer_2d
+
+    subroutine CTA_TreeVector_SetVals_integer_3d( treevec, val, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_INTEGER
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  treevec
+      integer                       , intent(in   )     ::  val(:,:,:)
+      integer                       , intent(out  )     ::  status
+      call CTA_TreeVector_SetVals( treevec, val, size(val), CTA_INTEGER, status )
+    end subroutine CTA_TreeVector_SetVals_integer_3d
+
+    subroutine CTA_TreeVector_SetVals_integer_4d( treevec, val, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_INTEGER
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  treevec
+      integer                       , intent(in   )     ::  val(:,:,:,:)
+      integer                       , intent(out  )     ::  status
+      call CTA_TreeVector_SetVals( treevec, val, size(val), CTA_INTEGER, status )
+    end subroutine CTA_TreeVector_SetVals_integer_4d
+
+    subroutine CTA_TreeVector_SetVals_integer_5d( treevec, val, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_INTEGER
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  treevec
+      integer                       , intent(in   )     ::  val(:,:,:,:,:)
+      integer                       , intent(out  )     ::  status
+      call CTA_TreeVector_SetVals( treevec, val, size(val), CTA_INTEGER, status )
+    end subroutine CTA_TreeVector_SetVals_integer_5d
+
+    subroutine CTA_TreeVector_SetVals_integer_6d( treevec, val, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_INTEGER
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  treevec
+      integer                       , intent(in   )     ::  val(:,:,:,:,:,:)
+      integer                       , intent(out  )     ::  status
+      call CTA_TreeVector_SetVals( treevec, val, size(val), CTA_INTEGER, status )
+    end subroutine CTA_TreeVector_SetVals_integer_6d
+
+    subroutine CTA_TreeVector_SetVals_integer_7d( treevec, val, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_INTEGER
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  treevec
+      integer                       , intent(in   )     ::  val(:,:,:,:,:,:,:)
+      integer                       , intent(out  )     ::  status
+      call CTA_TreeVector_SetVals( treevec, val, size(val), CTA_INTEGER, status )
+    end subroutine CTA_TreeVector_SetVals_integer_7d
+
+    subroutine CTA_TreeVector_SetVals_real4_1d( treevec, val, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_REAL
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  treevec
+      real(4)                       , intent(in   )     ::  val(:)
+      integer                       , intent(out  )     ::  status
+      call CTA_TreeVector_SetVals( treevec, val, size(val), CTA_REAL, status )
+    end subroutine CTA_TreeVector_SetVals_real4_1d
+
+    subroutine CTA_TreeVector_SetVals_real4_2d( treevec, val, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_REAL
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  treevec
+      real(4)                       , intent(in   )     ::  val(:,:)
+      integer                       , intent(out  )     ::  status
+      call CTA_TreeVector_SetVals( treevec, val, size(val), CTA_REAL, status )
+    end subroutine CTA_TreeVector_SetVals_real4_2d
+
+    subroutine CTA_TreeVector_SetVals_real4_3d( treevec, val, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_REAL
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  treevec
+      real(4)                       , intent(in   )     ::  val(:,:,:)
+      integer                       , intent(out  )     ::  status
+      call CTA_TreeVector_SetVals( treevec, val, size(val), CTA_REAL, status )
+    end subroutine CTA_TreeVector_SetVals_real4_3d
+
+    subroutine CTA_TreeVector_SetVals_real4_4d( treevec, val, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_REAL
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  treevec
+      real(4)                       , intent(in   )     ::  val(:,:,:,:)
+      integer                       , intent(out  )     ::  status
+      call CTA_TreeVector_SetVals( treevec, val, size(val), CTA_REAL, status )
+    end subroutine CTA_TreeVector_SetVals_real4_4d
+
+    subroutine CTA_TreeVector_SetVals_real4_5d( treevec, val, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_REAL
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  treevec
+      real(4)                       , intent(in   )     ::  val(:,:,:,:,:)
+      integer                       , intent(out  )     ::  status
+      call CTA_TreeVector_SetVals( treevec, val, size(val), CTA_REAL, status )
+    end subroutine CTA_TreeVector_SetVals_real4_5d
+
+    subroutine CTA_TreeVector_SetVals_real4_6d( treevec, val, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_REAL
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  treevec
+      real(4)                       , intent(in   )     ::  val(:,:,:,:,:,:)
+      integer                       , intent(out  )     ::  status
+      call CTA_TreeVector_SetVals( treevec, val, size(val), CTA_REAL, status )
+    end subroutine CTA_TreeVector_SetVals_real4_6d
+
+    subroutine CTA_TreeVector_SetVals_real4_7d( treevec, val, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_REAL
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  treevec
+      real(4)                       , intent(in   )     ::  val(:,:,:,:,:,:,:)
+      integer                       , intent(out  )     ::  status
+      call CTA_TreeVector_SetVals( treevec, val, size(val), CTA_REAL, status )
+    end subroutine CTA_TreeVector_SetVals_real4_7d
+
+    subroutine CTA_TreeVector_SetVals_real8_1d( treevec, val, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_DOUBLE
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  treevec
+      real(8)                       , intent(in   )     ::  val(:)
+      integer                       , intent(out  )     ::  status
+      call CTA_TreeVector_SetVals( treevec, val, size(val), CTA_DOUBLE, status )
+    end subroutine CTA_TreeVector_SetVals_real8_1d
+
+    subroutine CTA_TreeVector_SetVals_real8_2d( treevec, val, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_DOUBLE
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  treevec
+      real(8)                       , intent(in   )     ::  val(:,:)
+      integer                       , intent(out  )     ::  status
+      call CTA_TreeVector_SetVals( treevec, val, size(val), CTA_DOUBLE, status )
+    end subroutine CTA_TreeVector_SetVals_real8_2d
+
+    subroutine CTA_TreeVector_SetVals_real8_3d( treevec, val, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_DOUBLE
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  treevec
+      real(8)                       , intent(in   )     ::  val(:,:,:)
+      integer                       , intent(out  )     ::  status
+      call CTA_TreeVector_SetVals( treevec, val, size(val), CTA_DOUBLE, status )
+    end subroutine CTA_TreeVector_SetVals_real8_3d
+
+    subroutine CTA_TreeVector_SetVals_real8_4d( treevec, val, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_DOUBLE
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  treevec
+      real(8)                       , intent(in   )     ::  val(:,:,:,:)
+      integer                       , intent(out  )     ::  status
+      call CTA_TreeVector_SetVals( treevec, val, size(val), CTA_DOUBLE, status )
+    end subroutine CTA_TreeVector_SetVals_real8_4d
+
+    subroutine CTA_TreeVector_SetVals_real8_5d( treevec, val, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_DOUBLE
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  treevec
+      real(8)                       , intent(in   )     ::  val(:,:,:,:,:)
+      integer                       , intent(out  )     ::  status
+      call CTA_TreeVector_SetVals( treevec, val, size(val), CTA_DOUBLE, status )
+    end subroutine CTA_TreeVector_SetVals_real8_5d
+
+    subroutine CTA_TreeVector_SetVals_real8_6d( treevec, val, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_DOUBLE
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  treevec
+      real(8)                       , intent(in   )     ::  val(:,:,:,:,:,:)
+      integer                       , intent(out  )     ::  status
+      call CTA_TreeVector_SetVals( treevec, val, size(val), CTA_DOUBLE, status )
+    end subroutine CTA_TreeVector_SetVals_real8_6d
+
+    subroutine CTA_TreeVector_SetVals_real8_7d( treevec, val, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_DOUBLE
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  treevec
+      real(8)                       , intent(in   )     ::  val(:,:,:,:,:,:,:)
+      integer                       , intent(out  )     ::  status
+      call CTA_TreeVector_SetVals( treevec, val, size(val), CTA_DOUBLE, status )
+    end subroutine CTA_TreeVector_SetVals_real8_7d
+
+    subroutine CTA_TreeVector_GetVals_integer_1d( treevec, val, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_INTEGER
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  treevec
+      integer                       , intent(out  )     ::  val(:)
+      integer                       , intent(out  )     ::  status
+      call CTA_TreeVector_GetVals( treevec, val, size(val), CTA_INTEGER, status )
+    end subroutine CTA_TreeVector_GetVals_integer_1d
+
+    subroutine CTA_TreeVector_GetVals_integer_2d( treevec, val, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_INTEGER
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  treevec
+      integer                       , intent(out  )     ::  val(:,:)
+      integer                       , intent(out  )     ::  status
+      call CTA_TreeVector_GetVals( treevec, val, size(val), CTA_INTEGER, status )
+    end subroutine CTA_TreeVector_GetVals_integer_2d
+
+    subroutine CTA_TreeVector_GetVals_integer_3d( treevec, val, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_INTEGER
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  treevec
+      integer                       , intent(out  )     ::  val(:,:,:)
+      integer                       , intent(out  )     ::  status
+      call CTA_TreeVector_GetVals( treevec, val, size(val), CTA_INTEGER, status )
+    end subroutine CTA_TreeVector_GetVals_integer_3d
+
+    subroutine CTA_TreeVector_GetVals_integer_4d( treevec, val, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_INTEGER
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  treevec
+      integer                       , intent(out  )     ::  val(:,:,:,:)
+      integer                       , intent(out  )     ::  status
+      call CTA_TreeVector_GetVals( treevec, val, size(val), CTA_INTEGER, status )
+    end subroutine CTA_TreeVector_GetVals_integer_4d
+
+    subroutine CTA_TreeVector_GetVals_integer_5d( treevec, val, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_INTEGER
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  treevec
+      integer                       , intent(out  )     ::  val(:,:,:,:,:)
+      integer                       , intent(out  )     ::  status
+      call CTA_TreeVector_GetVals( treevec, val, size(val), CTA_INTEGER, status )
+    end subroutine CTA_TreeVector_GetVals_integer_5d
+
+    subroutine CTA_TreeVector_GetVals_integer_6d( treevec, val, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_INTEGER
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  treevec
+      integer                       , intent(out  )     ::  val(:,:,:,:,:,:)
+      integer                       , intent(out  )     ::  status
+      call CTA_TreeVector_GetVals( treevec, val, size(val), CTA_INTEGER, status )
+    end subroutine CTA_TreeVector_GetVals_integer_6d
+
+    subroutine CTA_TreeVector_GetVals_integer_7d( treevec, val, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_INTEGER
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  treevec
+      integer                       , intent(out  )     ::  val(:,:,:,:,:,:,:)
+      integer                       , intent(out  )     ::  status
+      call CTA_TreeVector_GetVals( treevec, val, size(val), CTA_INTEGER, status )
+    end subroutine CTA_TreeVector_GetVals_integer_7d
+
+    subroutine CTA_TreeVector_GetVals_real4_1d( treevec, val, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_REAL
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  treevec
+      real(4)                       , intent(out  )     ::  val(:)
+      integer                       , intent(out  )     ::  status
+      call CTA_TreeVector_GetVals( treevec, val, size(val), CTA_REAL, status )
+    end subroutine CTA_TreeVector_GetVals_real4_1d
+
+    subroutine CTA_TreeVector_GetVals_real4_2d( treevec, val, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_REAL
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  treevec
+      real(4)                       , intent(out  )     ::  val(:,:)
+      integer                       , intent(out  )     ::  status
+      call CTA_TreeVector_GetVals( treevec, val, size(val), CTA_REAL, status )
+    end subroutine CTA_TreeVector_GetVals_real4_2d
+
+    subroutine CTA_TreeVector_GetVals_real4_3d( treevec, val, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_REAL
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  treevec
+      real(4)                       , intent(out  )     ::  val(:,:,:)
+      integer                       , intent(out  )     ::  status
+      call CTA_TreeVector_GetVals( treevec, val, size(val), CTA_REAL, status )
+    end subroutine CTA_TreeVector_GetVals_real4_3d
+
+    subroutine CTA_TreeVector_GetVals_real4_4d( treevec, val, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_REAL
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  treevec
+      real(4)                       , intent(out  )     ::  val(:,:,:,:)
+      integer                       , intent(out  )     ::  status
+      call CTA_TreeVector_GetVals( treevec, val, size(val), CTA_REAL, status )
+    end subroutine CTA_TreeVector_GetVals_real4_4d
+
+    subroutine CTA_TreeVector_GetVals_real4_5d( treevec, val, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_REAL
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  treevec
+      real(4)                       , intent(out  )     ::  val(:,:,:,:,:)
+      integer                       , intent(out  )     ::  status
+      call CTA_TreeVector_GetVals( treevec, val, size(val), CTA_REAL, status )
+    end subroutine CTA_TreeVector_GetVals_real4_5d
+
+    subroutine CTA_TreeVector_GetVals_real4_6d( treevec, val, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_REAL
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  treevec
+      real(4)                       , intent(out  )     ::  val(:,:,:,:,:,:)
+      integer                       , intent(out  )     ::  status
+      call CTA_TreeVector_GetVals( treevec, val, size(val), CTA_REAL, status )
+    end subroutine CTA_TreeVector_GetVals_real4_6d
+
+    subroutine CTA_TreeVector_GetVals_real4_7d( treevec, val, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_REAL
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  treevec
+      real(4)                       , intent(out  )     ::  val(:,:,:,:,:,:,:)
+      integer                       , intent(out  )     ::  status
+      call CTA_TreeVector_GetVals( treevec, val, size(val), CTA_REAL, status )
+    end subroutine CTA_TreeVector_GetVals_real4_7d
+
+    subroutine CTA_TreeVector_GetVals_real8_1d( treevec, val, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_DOUBLE
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  treevec
+      real(8)                       , intent(out  )     ::  val(:)
+      integer                       , intent(out  )     ::  status
+      call CTA_TreeVector_GetVals( treevec, val, size(val), CTA_DOUBLE, status )
+    end subroutine CTA_TreeVector_GetVals_real8_1d
+
+    subroutine CTA_TreeVector_GetVals_real8_2d( treevec, val, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_DOUBLE
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  treevec
+      real(8)                       , intent(out  )     ::  val(:,:)
+      integer                       , intent(out  )     ::  status
+      call CTA_TreeVector_GetVals( treevec, val, size(val), CTA_DOUBLE, status )
+    end subroutine CTA_TreeVector_GetVals_real8_2d
+
+    subroutine CTA_TreeVector_GetVals_real8_3d( treevec, val, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_DOUBLE
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  treevec
+      real(8)                       , intent(out  )     ::  val(:,:,:)
+      integer                       , intent(out  )     ::  status
+      call CTA_TreeVector_GetVals( treevec, val, size(val), CTA_DOUBLE, status )
+    end subroutine CTA_TreeVector_GetVals_real8_3d
+
+    subroutine CTA_TreeVector_GetVals_real8_4d( treevec, val, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_DOUBLE
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  treevec
+      real(8)                       , intent(out  )     ::  val(:,:,:,:)
+      integer                       , intent(out  )     ::  status
+      call CTA_TreeVector_GetVals( treevec, val, size(val), CTA_DOUBLE, status )
+    end subroutine CTA_TreeVector_GetVals_real8_4d
+
+    subroutine CTA_TreeVector_GetVals_real8_5d( treevec, val, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_DOUBLE
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  treevec
+      real(8)                       , intent(out  )     ::  val(:,:,:,:,:)
+      integer                       , intent(out  )     ::  status
+      call CTA_TreeVector_GetVals( treevec, val, size(val), CTA_DOUBLE, status )
+    end subroutine CTA_TreeVector_GetVals_real8_5d
+
+    subroutine CTA_TreeVector_GetVals_real8_6d( treevec, val, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_DOUBLE
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  treevec
+      real(8)                       , intent(out  )     ::  val(:,:,:,:,:,:)
+      integer                       , intent(out  )     ::  status
+      call CTA_TreeVector_GetVals( treevec, val, size(val), CTA_DOUBLE, status )
+    end subroutine CTA_TreeVector_GetVals_real8_6d
+
+    subroutine CTA_TreeVector_GetVals_real8_7d( treevec, val, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_DOUBLE
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  treevec
+      real(8)                       , intent(out  )     ::  val(:,:,:,:,:,:,:)
+      integer                       , intent(out  )     ::  status
+      call CTA_TreeVector_GetVals( treevec, val, size(val), CTA_DOUBLE, status )
+    end subroutine CTA_TreeVector_GetVals_real8_7d
+
+    subroutine CTA_TreeVector_SetVal_integer( treevec, i, val, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_INTEGER
+      use CTA_F90_Parameters, only : CTA_OK
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  treevec
+      integer                       , intent(in   )     ::  i
+      integer                       , intent(in   )     ::  val
+      integer                       , intent(out  )     ::  status
+      call CTA_TreeVector_SetVal( treevec, i, val, CTA_INTEGER, status )
+    end subroutine CTA_TreeVector_SetVal_integer
+
+    subroutine CTA_TreeVector_SetVal_real4( treevec, i, val, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_REAL
+      use CTA_F90_Parameters, only : CTA_OK
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  treevec
+      integer                       , intent(in   )     ::  i
+      real(4)                       , intent(in   )     ::  val
+      integer                       , intent(out  )     ::  status
+      call CTA_TreeVector_SetVal( treevec, i, val, CTA_REAL, status )
+    end subroutine CTA_TreeVector_SetVal_real4
+
+    subroutine CTA_TreeVector_SetVal_real8( treevec, i, val, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_DOUBLE
+      use CTA_F90_Parameters, only : CTA_OK
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  treevec
+      integer                       , intent(in   )     ::  i
+      real(8)                       , intent(in   )     ::  val
+      integer                       , intent(out  )     ::  status
+      call CTA_TreeVector_SetVal( treevec, i, val, CTA_DOUBLE, status )
+    end subroutine CTA_TreeVector_SetVal_real8
+
+    subroutine CTA_TreeVector_GetVal_integer( treevec, i, val, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_INTEGER
+      use CTA_F90_Parameters, only : CTA_OK
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  treevec
+      integer                       , intent(in   )     ::  i
+      integer                       , intent(out  )     ::  val
+      integer                       , intent(out  )     ::  status
+      call CTA_TreeVector_GetVal( treevec, i, val, CTA_INTEGER, status )
+    end subroutine CTA_TreeVector_GetVal_integer
+
+    subroutine CTA_TreeVector_GetVal_real4( treevec, i, val, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_REAL
+      use CTA_F90_Parameters, only : CTA_OK
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  treevec
+      integer                       , intent(in   )     ::  i
+      real(4)                       , intent(out  )     ::  val
+      integer                       , intent(out  )     ::  status
+      call CTA_TreeVector_GetVal( treevec, i, val, CTA_REAL, status )
+    end subroutine CTA_TreeVector_GetVal_real4
+
+    subroutine CTA_TreeVector_GetVal_real8( treevec, i, val, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_DOUBLE
+      use CTA_F90_Parameters, only : CTA_OK
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  treevec
+      integer                       , intent(in   )     ::  i
+      real(8)                       , intent(out  )     ::  val
+      integer                       , intent(out  )     ::  status
+      call CTA_TreeVector_GetVal( treevec, i, val, CTA_DOUBLE, status )
+    end subroutine CTA_TreeVector_GetVal_real8
+
+end module cta_f90_treevector
+
diff --git a/costa/native/cta_f90/generated/cta_f90_util_methods.f90 b/costa/native/cta_f90/generated/cta_f90_util_methods.f90
new file mode 100644
index 000000000..ce421430c
--- /dev/null
+++ b/costa/native/cta_f90/generated/cta_f90_util_methods.f90
@@ -0,0 +1,85 @@
+module cta_f90_util_methods
+
+  implicit none
+
+  public
+
+  !  \brief print the predicted values and the observed values
+  ! 
+  !  \param fgModel     O Model of foreground run (with data
+  !                       assimilation) or Vector with the predicted
+  !                       values of the foreground run.
+  !                       if set to CTA_NULL NaN will be printed as result
+  !  \param bgModel     I Model of background run (without data
+  !                       assimilation) or Vector with the predicted
+  !                       values of the background run.
+  !                       if set to CTA_NULL NaN will be printed as result
+  !  \param sObs        I Stochastic observer
+  !  \param time        I Corresponding time
+  !  \param file        I Output file (note CTA_FILE_STDOUT prints to screen)
+  !  \param printHeader I Print header CTA_TRUE/CTA_FALSE
+  ! 
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Util_MethodsPrintObservations
+    subroutine CTA_Util_MethodsPrintObservations( fgModel, bgModel, sObs, time, file, printHeader, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(out  )     ::  fgModel
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  bgModel
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  sObs
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  time
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  file
+      integer                       , intent(in   )     ::  printHeader
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Util_MethodsPrintObservations
+  end interface
+
+  !  \brief Make an initial selection of the observations
+  ! 
+  !   The selection of observations is based on the given simulation timespan
+  !   and the criterion provided by the model (CTA_Model_GetObsSelect)
+  ! 
+  !   \note sObsSel is created and should be freed by the caller of this routine
+  ! 
+  !  \param model      I Model (for CTA_Model_GetObsSelect)
+  !  \param sObsAll    I All observations
+  !  \param spanSim    I Simulation timespan
+  !  \param sObsSel    O Selection of observations
+  ! 
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Util_MethodsSelectObservations
+    subroutine CTA_Util_MethodsSelectObservations( model, sObsAll, spanSim, sObsSel, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  model
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  sObsAll
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  spanSim
+      integer(CTA_HANDLE_IKIND)     , intent(out  )     ::  sObsSel
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Util_MethodsSelectObservations
+  end interface
+
+  !  \brief Create an output file for filter predictions at station
+  !          locations
+  ! 
+  !   The routine  CTA_Util_MethodsPrintObservations can be used for writing
+  !   the results.
+  !   \note the header is written by this call
+  ! 
+  !  \param   stationFile  (I) Name of result file
+  !  \param   fStationFile (O) Handle to result file
+  ! 
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Util_MethodsOpenResultFile
+    subroutine CTA_Util_MethodsOpenResultFile( stationFile, fStationFile, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      character(len=*)              , intent(in   )     ::  stationFile(*)
+      integer(CTA_HANDLE_IKIND)     , intent(out  )     ::  fStationFile
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Util_MethodsOpenResultFile
+  end interface
+
+
+end module cta_f90_util_methods
+
diff --git a/costa/native/cta_f90/generated/cta_f90_util_sort.f90 b/costa/native/cta_f90/generated/cta_f90_util_sort.f90
new file mode 100644
index 000000000..3fca0c13e
--- /dev/null
+++ b/costa/native/cta_f90/generated/cta_f90_util_sort.f90
@@ -0,0 +1,31 @@
+module cta_f90_util_sort
+
+  implicit none
+
+  public
+
+  !  \brief Sort an integer array using the Quicksort algorithm.
+  !   An additional interger array is permutated in way as the unsorted array
+  ! 
+  ! 
+  !   \param list       (input/output) array, dimension (N)
+  !           On entry, the array to be sorted.
+  !           On exit, list has been sorted into increasing order
+  ! 
+  !  \param indx (input/output) array, dimension (N)
+  ! 
+  !   \param n       (input) INTEGER
+  !           The length of the array D.
+  ! 
+  !
+  interface CTA_F90_Util_IQSort2
+    subroutine CTA_Util_IQSort2( list, indx, n )
+      integer                       , intent(inout)     ::  list
+      integer                       , intent(inout)     ::  indx
+      integer                       , intent(in   )     ::  n
+    end subroutine CTA_Util_IQSort2
+  end interface
+
+
+end module cta_f90_util_sort
+
diff --git a/costa/native/cta_f90/generated/cta_f90_util_statistics.f90 b/costa/native/cta_f90/generated/cta_f90_util_statistics.f90
new file mode 100644
index 000000000..e2c7e5879
--- /dev/null
+++ b/costa/native/cta_f90/generated/cta_f90_util_statistics.f90
@@ -0,0 +1,44 @@
+module cta_f90_util_statistics
+
+  implicit none
+
+  public
+
+  !  \brief Initialize the random generator.
+  !   \note only initialize the random generator once.
+  !  \param seed     I some positive initial seed
+  !
+  interface CTA_F90_rand_seed
+    subroutine CTA_rand_seed( seed )
+      integer                       , intent(in   )     ::  seed
+    end subroutine CTA_rand_seed
+  end interface
+
+  !  \brief Get an uniform random number from the interval [0 1].
+  ! 
+  !  \param x     O  receives the random number
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_rand_u
+    subroutine CTA_rand_u( x, status )
+      real(8)                       , intent(out  )     ::  x
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_rand_u
+  end interface
+
+  !  \brief Get a random number from a normal distribution whit mean 0 and
+  !  standard deviation 1.
+  ! 
+  !  \param x     O  receives the random number
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_rand_n
+    subroutine CTA_rand_n( x, status )
+      real(8)                       , intent(out  )     ::  x
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_rand_n
+  end interface
+
+
+end module cta_f90_util_statistics
+
diff --git a/costa/native/cta_f90/generated/cta_f90_vector.f90 b/costa/native/cta_f90/generated/cta_f90_vector.f90
new file mode 100644
index 000000000..3ccf2899d
--- /dev/null
+++ b/costa/native/cta_f90/generated/cta_f90_vector.f90
@@ -0,0 +1,1013 @@
+module cta_f90_vector
+
+  implicit none
+
+  public
+
+  !  \brief Create a new class (=implementation) of a COSTA vector component.
+  ! 
+  !  \param name     I  name of the new vector class
+  !  \param h_func   I  COSTA function handles for functions that implement class.
+  !                     Missing functions must have value CTA_NULL
+  !  \param hveccl   O  receives handle of new vector class
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Vector_DefineClass
+    subroutine CTA_Vector_DefineClass( name, h_func, hveccl, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      character(len=*)              , intent(in   )     ::  name
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  h_func(*)
+      integer(CTA_HANDLE_IKIND)     , intent(out  )     ::  hveccl
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Vector_DefineClass
+  end interface
+
+  !  \brief Duplicate a vector object.
+  ! 
+  !  \note Only size, data type and type (class) are duplicated, the content is not
+  !        copied.
+  ! 
+  !  \param hvector1  I  handle of vector to be duplicated
+  !  \param hvector2  O  receives handle of new duplicate vector, empty before calling
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Vector_Duplicate
+    subroutine CTA_Vector_Duplicate( hvector1, hvector2, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hvector1
+      integer(CTA_HANDLE_IKIND)     , intent(out  )     ::  hvector2
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Vector_Duplicate
+  end interface
+
+  !  \brief Create a new vector.
+  ! 
+  !  \note
+  ! 
+  !  \param hveccl   I  vector class of new vector
+  !  \param n        I  number of elements
+  !  \param datatype I  data type of elements in vector
+  !  \param userdata IO user data for creation (depends on class)
+  !  \param hvector  O  receives handle of new vector
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Vector_Create
+    subroutine CTA_Vector_Create( hveccl, n, datatype, userdata, hvector, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hveccl
+      integer                       , intent(in   )     ::  n
+      integer                       , intent(in   )     ::  datatype
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  userdata
+      integer(CTA_HANDLE_IKIND)     , intent(out  )     ::  hvector
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Vector_Create
+  end interface
+
+  !  \brief Get size of vector.
+  ! 
+  !  \param hvec  I  handle of vector
+  !  \param n     O  receives number of elements
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Vector_GetSize
+    subroutine CTA_Vector_GetSize( hvec, n, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hvec
+      integer                       , intent(out  )     ::  n
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Vector_GetSize
+  end interface
+
+  !  \brief Get a copy of a single element in the vector.
+  ! 
+  !  \param hvec     I  handle of vector
+  !  \param i        I  index of element
+  !  \param vals     O  receives copy of value in vector, must exist before calling
+  !  \param datatype I  data type of *vals, must be the same as data type of elements in vector
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Vector_GetVal
+    module procedure CTA_Vector_GetVal_integer
+    module procedure CTA_Vector_GetVal_real4
+    module procedure CTA_Vector_GetVal_real8
+  end interface
+
+  !  \brief Get a copy of all elements in the vector.
+  ! 
+  !  \param hvec     I  handle of vector
+  !  \param vals     O  receives copy of all elements in vector, must exist before calling
+  !  \param n        I  length of array vals
+  !  \param datatype I  data type of *vals, must be the same as data type of elements in vector
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Vector_GetVals
+    module procedure CTA_Vector_GetVals_integer_1d
+    module procedure CTA_Vector_GetVals_integer_2d
+    module procedure CTA_Vector_GetVals_integer_3d
+    module procedure CTA_Vector_GetVals_integer_4d
+    module procedure CTA_Vector_GetVals_integer_5d
+    module procedure CTA_Vector_GetVals_integer_6d
+    module procedure CTA_Vector_GetVals_integer_7d
+    module procedure CTA_Vector_GetVals_real4_1d
+    module procedure CTA_Vector_GetVals_real4_2d
+    module procedure CTA_Vector_GetVals_real4_3d
+    module procedure CTA_Vector_GetVals_real4_4d
+    module procedure CTA_Vector_GetVals_real4_5d
+    module procedure CTA_Vector_GetVals_real4_6d
+    module procedure CTA_Vector_GetVals_real4_7d
+    module procedure CTA_Vector_GetVals_real8_1d
+    module procedure CTA_Vector_GetVals_real8_2d
+    module procedure CTA_Vector_GetVals_real8_3d
+    module procedure CTA_Vector_GetVals_real8_4d
+    module procedure CTA_Vector_GetVals_real8_5d
+    module procedure CTA_Vector_GetVals_real8_6d
+    module procedure CTA_Vector_GetVals_real8_7d
+  end interface
+
+  !  \brief Set a copy of an element in the vector.
+  ! 
+  !  \note The value is copied in the vector.
+  ! 
+  !  \param hvec     IO handle of vector
+  !  \param i        I  index of element
+  !  \param val      I  new value that is copied into element at given index
+  !  \param datatype I  data type of *val, must be the same as data type of elements in vector
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Vector_SetVal
+    module procedure CTA_Vector_SetVal_integer
+    module procedure CTA_Vector_SetVal_real4
+    module procedure CTA_Vector_SetVal_real8
+    module procedure CTA_Vector_SetVal_char
+  end interface
+
+  !  \brief Set all elementes in the vector.
+  ! 
+  !  \note The values are copied in the vector.
+  ! 
+  !  \param hvec     IO handle of vector
+  !  \param vals     I  values that need to be copied to vector
+  !  \param n        I  number of elements in vals
+  !  \param datatype I  data type of *vals, must be the same as data type of elements in vector
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Vector_SetVals
+    module procedure CTA_Vector_SetVals_integer_1d
+    module procedure CTA_Vector_SetVals_integer_2d
+    module procedure CTA_Vector_SetVals_integer_3d
+    module procedure CTA_Vector_SetVals_integer_4d
+    module procedure CTA_Vector_SetVals_integer_5d
+    module procedure CTA_Vector_SetVals_integer_6d
+    module procedure CTA_Vector_SetVals_integer_7d
+    module procedure CTA_Vector_SetVals_real4_1d
+    module procedure CTA_Vector_SetVals_real4_2d
+    module procedure CTA_Vector_SetVals_real4_3d
+    module procedure CTA_Vector_SetVals_real4_4d
+    module procedure CTA_Vector_SetVals_real4_5d
+    module procedure CTA_Vector_SetVals_real4_6d
+    module procedure CTA_Vector_SetVals_real4_7d
+    module procedure CTA_Vector_SetVals_real8_1d
+    module procedure CTA_Vector_SetVals_real8_2d
+    module procedure CTA_Vector_SetVals_real8_3d
+    module procedure CTA_Vector_SetVals_real8_4d
+    module procedure CTA_Vector_SetVals_real8_5d
+    module procedure CTA_Vector_SetVals_real8_6d
+    module procedure CTA_Vector_SetVals_real8_7d
+  end interface
+
+  !  \brief Scale a vector.
+  ! 
+  !  \note scale: x=alpha*x
+  ! 
+  !  \param hvec     IO handle of vector
+  !  \param alpha    I  scalar
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Vector_Scal
+    subroutine CTA_Vector_Scal( hvec, alpha, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hvec
+      real(8)                       , intent(in   )     ::  alpha
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Vector_Scal
+  end interface
+
+  !  \brief Copy a vector
+  ! 
+  !  \note This function copies the vector content (y=x), but does not make a new vector.
+  ! 
+  !  \param hvec_x  I  handle of sending vector
+  !  \param hvec_y  O  handle of receiving vector
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Vector_Copy
+    subroutine CTA_Vector_Copy( hvec_x, hvec_y, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hvec_x
+      integer(CTA_HANDLE_IKIND)     , intent(out  )     ::  hvec_y
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Vector_Copy
+  end interface
+
+  !  \brief Operation axpy for two vectors x and y.
+  ! 
+  !  \note axpy: y=alpha*x+y
+  ! 
+  !  \param hvec_y  IO handle of vector y
+  !  \param alpha   I  scalar
+  !  \param hvec_x  I  handle of vector x
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Vector_Axpy
+    subroutine CTA_Vector_Axpy( hvec_y, alpha, hvec_x, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hvec_y
+      real(8)                       , intent(in   )     ::  alpha
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hvec_x
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Vector_Axpy
+  end interface
+
+  !  \brief Dot product operation of two vectors.
+  ! 
+  !  \note dot: dotprod=x^t*y
+  ! 
+  !  \param hvec_x  I  handle of vector x
+  !  \param hvec_y  I  handle of vector y
+  !  \param dotprod O  receives dot product
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Vector_Dot
+    subroutine CTA_Vector_Dot( hvec_x, hvec_y, dotprod, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hvec_x
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hvec_y
+      real(8)                       , intent(out  )     ::  dotprod
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Vector_Dot
+  end interface
+
+  !  \brief Compute 2-norm of vector.
+  ! 
+  !  \note 2-norm: sqrt(x^t*x)
+  ! 
+  !  \param hvec_x  I  handle of vector x
+  !  \param norm2   O  receives 2-norm
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Vector_Nrm2
+    subroutine CTA_Vector_Nrm2( hvec_x, norm2, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hvec_x
+      real(8)                       , intent(out  )     ::  norm2
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Vector_Nrm2
+  end interface
+
+  !  \brief Find index of element in vector with largest absolute value
+  ! 
+  !  \param hvec_x  I  handle of vector
+  !  \param iloc    O  receives index of largest absolute value
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Vector_Amax
+    subroutine CTA_Vector_Amax( hvec_x, iloc, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hvec_x
+      integer                       , intent(out  )     ::  iloc
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Vector_Amax
+  end interface
+
+  !  \brief Find largest length of elements in vector.
+  ! 
+  !  \note e.g. length of string in a vector of strings
+  ! 
+  !  \param hvec_x  I  handle of vector
+  !  \param maxlen  O  receives largest length of elements in vector
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Vector_GetMaxLen
+    subroutine CTA_Vector_GetMaxLen( hvec_x, maxlen, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hvec_x
+      integer                       , intent(out  )     ::  maxlen
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Vector_GetMaxLen
+  end interface
+
+  !  \brief Export a vector to file, stdout or pack object.
+  ! 
+  !  \Note CTA_DEFAULT_VECTOR supports exporting to:\n
+  !        file (usrdata a handle of COSTA file)\n
+  !        pack object (usrdata a handle of COSTA pack object)\n
+  ! 
+  !  \param hvec     I  handle of vector
+  !  \param usrdata  I  configuration of output
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Vector_Export
+    subroutine CTA_Vector_Export( hvec, usrdata, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hvec
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  usrdata
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Vector_Export
+  end interface
+
+  !  \brief Import a vector.
+  ! 
+  !  \note CTA_DEFAULT_VECTOR supports importing from pack object (usrdata[0] a pack handle).
+  !  \note Data type and size of vector can be changed due to this action
+  ! 
+  !  \param hvec     I  handle of vector
+  !  \param usrdata  I  configuration of output
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Vector_Import
+    subroutine CTA_Vector_Import( hvec, usrdata, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hvec
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  usrdata
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Vector_Import
+  end interface
+
+  !  \brief Print table, each column built up by a vector.
+  ! 
+  !  \note CTA_DEFAULT_VECTOR    TODO
+  ! 
+  !  \param table    I  array of vector handles, these vectors form the table to be filled
+  !  \param ncolumns I  number of columns in table
+  !                     (number of vector handles in table)
+  !  \param vformats I  handle of vector of string (size ncolumns) containing the
+  !                     formats for printing each column (C-format)
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Vector_Print_Table
+    subroutine CTA_Vector_Print_Table( table, ncolumns, vformats, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  table
+      integer                       , intent(in   )     ::  ncolumns
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  vformats
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Vector_Print_Table
+  end interface
+
+  !  \brief Free the vector object.
+  ! 
+  ! 
+  !  \Note hvec_x=CTA_NULL is allowed
+  ! 
+  !  \param hvec_x  IO handle of vector, replaced by CTA_NULL on return
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Vector_Free
+    subroutine CTA_Vector_Free( hvec_x, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hvec_x
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Vector_Free
+  end interface
+
+  !  \brief Get type of vector.
+  ! 
+  !  \param hvec     I  handle of vector
+  !  \param datatype O  receives data type of vector
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Vector_GetDatatype
+    subroutine CTA_Vector_GetDatatype( hvec, datatype, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hvec
+      integer                       , intent(out  )     ::  datatype
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Vector_GetDatatype
+  end interface
+
+  !  \brief Set whole vector to one single value.
+  ! 
+  !  \Note hvec=CTA_NULL is allowed, function returns error code
+  ! 
+  !  \param hvec     IO handle of vector
+  !  \param val      I  value that must be set
+  !  \param datatype I  data type of *val, must be the same as data type of elements in vector
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Vector_SetConstant
+    module procedure CTA_Vector_SetConstant_integer
+    module procedure CTA_Vector_SetConstant_real4
+    module procedure CTA_Vector_SetConstant_real8
+  end interface
+
+  !  \brief Element wise division
+  ! 
+  !  \note y=x./y
+  ! 
+  !  \param hvec_y  IO handle of vector y
+  !  \param hvec_x  I  handle of vector x
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Vector_ElmDiv
+    subroutine CTA_Vector_ElmDiv( hvec_y, hvec_x, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hvec_y
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hvec_x
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Vector_ElmDiv
+  end interface
+
+  !  \brief Element wise product
+  ! 
+  !  \note y=x.y
+  ! 
+  !  \param hvec_y  IO handle of vector y
+  !  \param hvec_x  I  handle of vector x
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Vector_ElmProd
+    subroutine CTA_Vector_ElmProd( hvec_y, hvec_x, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hvec_y
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hvec_x
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Vector_ElmProd
+  end interface
+
+  !  \brief Element wise square root
+  ! 
+  !  \note y=y.^0.5
+  ! 
+  !  \param  hvec_x  IO handle of vector y
+  !  \param status O error status: CTA_OK if successful
+  !
+  interface CTA_F90_Vector_ElmSqrt
+    subroutine CTA_Vector_ElmSqrt( hvec_x, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hvec_x
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_Vector_ElmSqrt
+  end interface
+
+
+contains
+
+    subroutine CTA_Vector_GetVal_integer( hvec, i, vals, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_INTEGER
+      use CTA_F90_Parameters, only : CTA_OK
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hvec
+      integer                       , intent(in   )     ::  i
+      integer                       , intent(out  )     ::  vals
+      integer                       , intent(out  )     ::  status
+      call CTA_Vector_GetVal( hvec, i, vals, CTA_INTEGER, status )
+    end subroutine CTA_Vector_GetVal_integer
+
+    subroutine CTA_Vector_GetVal_real4( hvec, i, vals, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_REAL
+      use CTA_F90_Parameters, only : CTA_OK
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hvec
+      integer                       , intent(in   )     ::  i
+      real(4)                       , intent(out  )     ::  vals
+      integer                       , intent(out  )     ::  status
+      call CTA_Vector_GetVal( hvec, i, vals, CTA_REAL, status )
+    end subroutine CTA_Vector_GetVal_real4
+
+    subroutine CTA_Vector_GetVal_real8( hvec, i, vals, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_DOUBLE
+      use CTA_F90_Parameters, only : CTA_OK
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hvec
+      integer                       , intent(in   )     ::  i
+      real(8)                       , intent(out  )     ::  vals
+      integer                       , intent(out  )     ::  status
+      call CTA_Vector_GetVal( hvec, i, vals, CTA_DOUBLE, status )
+    end subroutine CTA_Vector_GetVal_real8
+
+    subroutine CTA_F90_Vector_GetHandle( hvec, i, vals, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_HANDLE
+      use CTA_F90_Parameters, only : CTA_OK
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hvec
+      integer                       , intent(in   )     ::  i
+      integer                       , intent(out  )     ::  vals
+      integer                       , intent(out  )     ::  status
+      call CTA_Vector_GetVal( hvec, i, vals, CTA_HANDLE, status )
+    end subroutine CTA_F90_Vector_GetHandle
+
+    subroutine CTA_Vector_GetVals_integer_1d( hvec, vals, n, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_INTEGER
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hvec
+      integer                       , intent(out  )     ::  vals(:)
+      integer                       , intent(in   )     ::  n
+      integer                       , intent(out  )     ::  status
+      call CTA_Vector_GetVals( hvec, vals, n, CTA_INTEGER, status )
+    end subroutine CTA_Vector_GetVals_integer_1d
+
+    subroutine CTA_Vector_GetVals_integer_2d( hvec, vals, n, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_INTEGER
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hvec
+      integer                       , intent(out  )     ::  vals(:,:)
+      integer                       , intent(in   )     ::  n
+      integer                       , intent(out  )     ::  status
+      call CTA_Vector_GetVals( hvec, vals, n, CTA_INTEGER, status )
+    end subroutine CTA_Vector_GetVals_integer_2d
+
+    subroutine CTA_Vector_GetVals_integer_3d( hvec, vals, n, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_INTEGER
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hvec
+      integer                       , intent(out  )     ::  vals(:,:,:)
+      integer                       , intent(in   )     ::  n
+      integer                       , intent(out  )     ::  status
+      call CTA_Vector_GetVals( hvec, vals, n, CTA_INTEGER, status )
+    end subroutine CTA_Vector_GetVals_integer_3d
+
+    subroutine CTA_Vector_GetVals_integer_4d( hvec, vals, n, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_INTEGER
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hvec
+      integer                       , intent(out  )     ::  vals(:,:,:,:)
+      integer                       , intent(in   )     ::  n
+      integer                       , intent(out  )     ::  status
+      call CTA_Vector_GetVals( hvec, vals, n, CTA_INTEGER, status )
+    end subroutine CTA_Vector_GetVals_integer_4d
+
+    subroutine CTA_Vector_GetVals_integer_5d( hvec, vals, n, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_INTEGER
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hvec
+      integer                       , intent(out  )     ::  vals(:,:,:,:,:)
+      integer                       , intent(in   )     ::  n
+      integer                       , intent(out  )     ::  status
+      call CTA_Vector_GetVals( hvec, vals, n, CTA_INTEGER, status )
+    end subroutine CTA_Vector_GetVals_integer_5d
+
+    subroutine CTA_Vector_GetVals_integer_6d( hvec, vals, n, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_INTEGER
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hvec
+      integer                       , intent(out  )     ::  vals(:,:,:,:,:,:)
+      integer                       , intent(in   )     ::  n
+      integer                       , intent(out  )     ::  status
+      call CTA_Vector_GetVals( hvec, vals, n, CTA_INTEGER, status )
+    end subroutine CTA_Vector_GetVals_integer_6d
+
+    subroutine CTA_Vector_GetVals_integer_7d( hvec, vals, n, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_INTEGER
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hvec
+      integer                       , intent(out  )     ::  vals(:,:,:,:,:,:,:)
+      integer                       , intent(in   )     ::  n
+      integer                       , intent(out  )     ::  status
+      call CTA_Vector_GetVals( hvec, vals, n, CTA_INTEGER, status )
+    end subroutine CTA_Vector_GetVals_integer_7d
+
+    subroutine CTA_Vector_GetVals_real4_1d( hvec, vals, n, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_REAL
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hvec
+      real(4)                       , intent(out  )     ::  vals(:)
+      integer                       , intent(in   )     ::  n
+      integer                       , intent(out  )     ::  status
+      call CTA_Vector_GetVals( hvec, vals, n, CTA_REAL, status )
+    end subroutine CTA_Vector_GetVals_real4_1d
+
+    subroutine CTA_Vector_GetVals_real4_2d( hvec, vals, n, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_REAL
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hvec
+      real(4)                       , intent(out  )     ::  vals(:,:)
+      integer                       , intent(in   )     ::  n
+      integer                       , intent(out  )     ::  status
+      call CTA_Vector_GetVals( hvec, vals, n, CTA_REAL, status )
+    end subroutine CTA_Vector_GetVals_real4_2d
+
+    subroutine CTA_Vector_GetVals_real4_3d( hvec, vals, n, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_REAL
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hvec
+      real(4)                       , intent(out  )     ::  vals(:,:,:)
+      integer                       , intent(in   )     ::  n
+      integer                       , intent(out  )     ::  status
+      call CTA_Vector_GetVals( hvec, vals, n, CTA_REAL, status )
+    end subroutine CTA_Vector_GetVals_real4_3d
+
+    subroutine CTA_Vector_GetVals_real4_4d( hvec, vals, n, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_REAL
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hvec
+      real(4)                       , intent(out  )     ::  vals(:,:,:,:)
+      integer                       , intent(in   )     ::  n
+      integer                       , intent(out  )     ::  status
+      call CTA_Vector_GetVals( hvec, vals, n, CTA_REAL, status )
+    end subroutine CTA_Vector_GetVals_real4_4d
+
+    subroutine CTA_Vector_GetVals_real4_5d( hvec, vals, n, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_REAL
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hvec
+      real(4)                       , intent(out  )     ::  vals(:,:,:,:,:)
+      integer                       , intent(in   )     ::  n
+      integer                       , intent(out  )     ::  status
+      call CTA_Vector_GetVals( hvec, vals, n, CTA_REAL, status )
+    end subroutine CTA_Vector_GetVals_real4_5d
+
+    subroutine CTA_Vector_GetVals_real4_6d( hvec, vals, n, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_REAL
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hvec
+      real(4)                       , intent(out  )     ::  vals(:,:,:,:,:,:)
+      integer                       , intent(in   )     ::  n
+      integer                       , intent(out  )     ::  status
+      call CTA_Vector_GetVals( hvec, vals, n, CTA_REAL, status )
+    end subroutine CTA_Vector_GetVals_real4_6d
+
+    subroutine CTA_Vector_GetVals_real4_7d( hvec, vals, n, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_REAL
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hvec
+      real(4)                       , intent(out  )     ::  vals(:,:,:,:,:,:,:)
+      integer                       , intent(in   )     ::  n
+      integer                       , intent(out  )     ::  status
+      call CTA_Vector_GetVals( hvec, vals, n, CTA_REAL, status )
+    end subroutine CTA_Vector_GetVals_real4_7d
+
+    subroutine CTA_Vector_GetVals_real8_1d( hvec, vals, n, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_DOUBLE
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hvec
+      real(8)                       , intent(out  )     ::  vals(:)
+      integer                       , intent(in   )     ::  n
+      integer                       , intent(out  )     ::  status
+      call CTA_Vector_GetVals( hvec, vals, n, CTA_DOUBLE, status )
+    end subroutine CTA_Vector_GetVals_real8_1d
+
+    subroutine CTA_Vector_GetVals_real8_2d( hvec, vals, n, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_DOUBLE
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hvec
+      real(8)                       , intent(out  )     ::  vals(:,:)
+      integer                       , intent(in   )     ::  n
+      integer                       , intent(out  )     ::  status
+      call CTA_Vector_GetVals( hvec, vals, n, CTA_DOUBLE, status )
+    end subroutine CTA_Vector_GetVals_real8_2d
+
+    subroutine CTA_Vector_GetVals_real8_3d( hvec, vals, n, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_DOUBLE
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hvec
+      real(8)                       , intent(out  )     ::  vals(:,:,:)
+      integer                       , intent(in   )     ::  n
+      integer                       , intent(out  )     ::  status
+      call CTA_Vector_GetVals( hvec, vals, n, CTA_DOUBLE, status )
+    end subroutine CTA_Vector_GetVals_real8_3d
+
+    subroutine CTA_Vector_GetVals_real8_4d( hvec, vals, n, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_DOUBLE
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hvec
+      real(8)                       , intent(out  )     ::  vals(:,:,:,:)
+      integer                       , intent(in   )     ::  n
+      integer                       , intent(out  )     ::  status
+      call CTA_Vector_GetVals( hvec, vals, n, CTA_DOUBLE, status )
+    end subroutine CTA_Vector_GetVals_real8_4d
+
+    subroutine CTA_Vector_GetVals_real8_5d( hvec, vals, n, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_DOUBLE
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hvec
+      real(8)                       , intent(out  )     ::  vals(:,:,:,:,:)
+      integer                       , intent(in   )     ::  n
+      integer                       , intent(out  )     ::  status
+      call CTA_Vector_GetVals( hvec, vals, n, CTA_DOUBLE, status )
+    end subroutine CTA_Vector_GetVals_real8_5d
+
+    subroutine CTA_Vector_GetVals_real8_6d( hvec, vals, n, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_DOUBLE
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hvec
+      real(8)                       , intent(out  )     ::  vals(:,:,:,:,:,:)
+      integer                       , intent(in   )     ::  n
+      integer                       , intent(out  )     ::  status
+      call CTA_Vector_GetVals( hvec, vals, n, CTA_DOUBLE, status )
+    end subroutine CTA_Vector_GetVals_real8_6d
+
+    subroutine CTA_Vector_GetVals_real8_7d( hvec, vals, n, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_DOUBLE
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hvec
+      real(8)                       , intent(out  )     ::  vals(:,:,:,:,:,:,:)
+      integer                       , intent(in   )     ::  n
+      integer                       , intent(out  )     ::  status
+      call CTA_Vector_GetVals( hvec, vals, n, CTA_DOUBLE, status )
+    end subroutine CTA_Vector_GetVals_real8_7d
+
+    subroutine CTA_Vector_SetVal_integer( hvec, i, val, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_INTEGER
+      use CTA_F90_Parameters, only : CTA_OK
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hvec
+      integer                       , intent(in   )     ::  i
+      integer                       , intent(in   )     ::  val
+      integer                       , intent(out  )     ::  status
+      call CTA_Vector_SetVal( hvec, i, val, CTA_INTEGER, status )
+    end subroutine CTA_Vector_SetVal_integer
+
+    subroutine CTA_Vector_SetVal_real4( hvec, i, val, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_REAL
+      use CTA_F90_Parameters, only : CTA_OK
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hvec
+      integer                       , intent(in   )     ::  i
+      real(4)                       , intent(in   )     ::  val
+      integer                       , intent(out  )     ::  status
+      call CTA_Vector_SetVal( hvec, i, val, CTA_REAL, status )
+    end subroutine CTA_Vector_SetVal_real4
+
+    subroutine CTA_Vector_SetVal_real8( hvec, i, val, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_DOUBLE
+      use CTA_F90_Parameters, only : CTA_OK
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hvec
+      integer                       , intent(in   )     ::  i
+      real(8)                       , intent(in   )     ::  val
+      integer                       , intent(out  )     ::  status
+      call CTA_Vector_SetVal( hvec, i, val, CTA_DOUBLE, status )
+    end subroutine CTA_Vector_SetVal_real8
+
+    subroutine CTA_Vector_SetVal_char( hvec, i, val, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_STRING
+      use CTA_F90_Parameters, only : CTA_OK
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hvec
+      integer                       , intent(in   )     ::  i
+      character(len=*)              , intent(in   )     ::  val
+      integer                       , intent(out  )     ::  status
+      integer(CTA_HANDLE_IKIND)  ::  sval
+      call CTA_String_Create( sval, status )
+      if (status/=CTA_OK) return
+      call CTA_String_Set( sval, val, status )
+      if (status/=CTA_OK) return
+      call CTA_Vector_SetVal( hvec, i, sval, CTA_STRING, status )
+      if (status/=CTA_OK) return
+      call CTA_String_Free( sval, status )
+      if (status/=CTA_OK) return
+    end subroutine CTA_Vector_SetVal_char
+
+    subroutine CTA_F90_Vector_SetHandle( hvec, i, val, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_HANDLE
+      use CTA_F90_Parameters, only : CTA_OK
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hvec
+      integer                       , intent(in   )     ::  i
+      integer                       , intent(in   )     ::  val
+      integer                       , intent(out  )     ::  status
+      call CTA_Vector_SetVal( hvec, i, val, CTA_HANDLE, status )
+    end subroutine CTA_F90_Vector_SetHandle
+
+    subroutine CTA_Vector_SetVals_integer_1d( hvec, vals, n, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_INTEGER
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hvec
+      integer                       , intent(in   )     ::  vals(:)
+      integer                       , intent(in   )     ::  n
+      integer                       , intent(out  )     ::  status
+      call CTA_Vector_SetVals( hvec, vals, n, CTA_INTEGER, status )
+    end subroutine CTA_Vector_SetVals_integer_1d
+
+    subroutine CTA_Vector_SetVals_integer_2d( hvec, vals, n, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_INTEGER
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hvec
+      integer                       , intent(in   )     ::  vals(:,:)
+      integer                       , intent(in   )     ::  n
+      integer                       , intent(out  )     ::  status
+      call CTA_Vector_SetVals( hvec, vals, n, CTA_INTEGER, status )
+    end subroutine CTA_Vector_SetVals_integer_2d
+
+    subroutine CTA_Vector_SetVals_integer_3d( hvec, vals, n, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_INTEGER
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hvec
+      integer                       , intent(in   )     ::  vals(:,:,:)
+      integer                       , intent(in   )     ::  n
+      integer                       , intent(out  )     ::  status
+      call CTA_Vector_SetVals( hvec, vals, n, CTA_INTEGER, status )
+    end subroutine CTA_Vector_SetVals_integer_3d
+
+    subroutine CTA_Vector_SetVals_integer_4d( hvec, vals, n, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_INTEGER
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hvec
+      integer                       , intent(in   )     ::  vals(:,:,:,:)
+      integer                       , intent(in   )     ::  n
+      integer                       , intent(out  )     ::  status
+      call CTA_Vector_SetVals( hvec, vals, n, CTA_INTEGER, status )
+    end subroutine CTA_Vector_SetVals_integer_4d
+
+    subroutine CTA_Vector_SetVals_integer_5d( hvec, vals, n, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_INTEGER
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hvec
+      integer                       , intent(in   )     ::  vals(:,:,:,:,:)
+      integer                       , intent(in   )     ::  n
+      integer                       , intent(out  )     ::  status
+      call CTA_Vector_SetVals( hvec, vals, n, CTA_INTEGER, status )
+    end subroutine CTA_Vector_SetVals_integer_5d
+
+    subroutine CTA_Vector_SetVals_integer_6d( hvec, vals, n, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_INTEGER
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hvec
+      integer                       , intent(in   )     ::  vals(:,:,:,:,:,:)
+      integer                       , intent(in   )     ::  n
+      integer                       , intent(out  )     ::  status
+      call CTA_Vector_SetVals( hvec, vals, n, CTA_INTEGER, status )
+    end subroutine CTA_Vector_SetVals_integer_6d
+
+    subroutine CTA_Vector_SetVals_integer_7d( hvec, vals, n, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_INTEGER
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hvec
+      integer                       , intent(in   )     ::  vals(:,:,:,:,:,:,:)
+      integer                       , intent(in   )     ::  n
+      integer                       , intent(out  )     ::  status
+      call CTA_Vector_SetVals( hvec, vals, n, CTA_INTEGER, status )
+    end subroutine CTA_Vector_SetVals_integer_7d
+
+    subroutine CTA_Vector_SetVals_real4_1d( hvec, vals, n, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_REAL
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hvec
+      real(4)                       , intent(in   )     ::  vals(:)
+      integer                       , intent(in   )     ::  n
+      integer                       , intent(out  )     ::  status
+      call CTA_Vector_SetVals( hvec, vals, n, CTA_REAL, status )
+    end subroutine CTA_Vector_SetVals_real4_1d
+
+    subroutine CTA_Vector_SetVals_real4_2d( hvec, vals, n, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_REAL
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hvec
+      real(4)                       , intent(in   )     ::  vals(:,:)
+      integer                       , intent(in   )     ::  n
+      integer                       , intent(out  )     ::  status
+      call CTA_Vector_SetVals( hvec, vals, n, CTA_REAL, status )
+    end subroutine CTA_Vector_SetVals_real4_2d
+
+    subroutine CTA_Vector_SetVals_real4_3d( hvec, vals, n, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_REAL
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hvec
+      real(4)                       , intent(in   )     ::  vals(:,:,:)
+      integer                       , intent(in   )     ::  n
+      integer                       , intent(out  )     ::  status
+      call CTA_Vector_SetVals( hvec, vals, n, CTA_REAL, status )
+    end subroutine CTA_Vector_SetVals_real4_3d
+
+    subroutine CTA_Vector_SetVals_real4_4d( hvec, vals, n, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_REAL
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hvec
+      real(4)                       , intent(in   )     ::  vals(:,:,:,:)
+      integer                       , intent(in   )     ::  n
+      integer                       , intent(out  )     ::  status
+      call CTA_Vector_SetVals( hvec, vals, n, CTA_REAL, status )
+    end subroutine CTA_Vector_SetVals_real4_4d
+
+    subroutine CTA_Vector_SetVals_real4_5d( hvec, vals, n, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_REAL
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hvec
+      real(4)                       , intent(in   )     ::  vals(:,:,:,:,:)
+      integer                       , intent(in   )     ::  n
+      integer                       , intent(out  )     ::  status
+      call CTA_Vector_SetVals( hvec, vals, n, CTA_REAL, status )
+    end subroutine CTA_Vector_SetVals_real4_5d
+
+    subroutine CTA_Vector_SetVals_real4_6d( hvec, vals, n, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_REAL
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hvec
+      real(4)                       , intent(in   )     ::  vals(:,:,:,:,:,:)
+      integer                       , intent(in   )     ::  n
+      integer                       , intent(out  )     ::  status
+      call CTA_Vector_SetVals( hvec, vals, n, CTA_REAL, status )
+    end subroutine CTA_Vector_SetVals_real4_6d
+
+    subroutine CTA_Vector_SetVals_real4_7d( hvec, vals, n, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_REAL
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hvec
+      real(4)                       , intent(in   )     ::  vals(:,:,:,:,:,:,:)
+      integer                       , intent(in   )     ::  n
+      integer                       , intent(out  )     ::  status
+      call CTA_Vector_SetVals( hvec, vals, n, CTA_REAL, status )
+    end subroutine CTA_Vector_SetVals_real4_7d
+
+    subroutine CTA_Vector_SetVals_real8_1d( hvec, vals, n, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_DOUBLE
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hvec
+      real(8)                       , intent(in   )     ::  vals(:)
+      integer                       , intent(in   )     ::  n
+      integer                       , intent(out  )     ::  status
+      call CTA_Vector_SetVals( hvec, vals, n, CTA_DOUBLE, status )
+    end subroutine CTA_Vector_SetVals_real8_1d
+
+    subroutine CTA_Vector_SetVals_real8_2d( hvec, vals, n, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_DOUBLE
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hvec
+      real(8)                       , intent(in   )     ::  vals(:,:)
+      integer                       , intent(in   )     ::  n
+      integer                       , intent(out  )     ::  status
+      call CTA_Vector_SetVals( hvec, vals, n, CTA_DOUBLE, status )
+    end subroutine CTA_Vector_SetVals_real8_2d
+
+    subroutine CTA_Vector_SetVals_real8_3d( hvec, vals, n, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_DOUBLE
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hvec
+      real(8)                       , intent(in   )     ::  vals(:,:,:)
+      integer                       , intent(in   )     ::  n
+      integer                       , intent(out  )     ::  status
+      call CTA_Vector_SetVals( hvec, vals, n, CTA_DOUBLE, status )
+    end subroutine CTA_Vector_SetVals_real8_3d
+
+    subroutine CTA_Vector_SetVals_real8_4d( hvec, vals, n, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_DOUBLE
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hvec
+      real(8)                       , intent(in   )     ::  vals(:,:,:,:)
+      integer                       , intent(in   )     ::  n
+      integer                       , intent(out  )     ::  status
+      call CTA_Vector_SetVals( hvec, vals, n, CTA_DOUBLE, status )
+    end subroutine CTA_Vector_SetVals_real8_4d
+
+    subroutine CTA_Vector_SetVals_real8_5d( hvec, vals, n, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_DOUBLE
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hvec
+      real(8)                       , intent(in   )     ::  vals(:,:,:,:,:)
+      integer                       , intent(in   )     ::  n
+      integer                       , intent(out  )     ::  status
+      call CTA_Vector_SetVals( hvec, vals, n, CTA_DOUBLE, status )
+    end subroutine CTA_Vector_SetVals_real8_5d
+
+    subroutine CTA_Vector_SetVals_real8_6d( hvec, vals, n, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_DOUBLE
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hvec
+      real(8)                       , intent(in   )     ::  vals(:,:,:,:,:,:)
+      integer                       , intent(in   )     ::  n
+      integer                       , intent(out  )     ::  status
+      call CTA_Vector_SetVals( hvec, vals, n, CTA_DOUBLE, status )
+    end subroutine CTA_Vector_SetVals_real8_6d
+
+    subroutine CTA_Vector_SetVals_real8_7d( hvec, vals, n, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_DOUBLE
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hvec
+      real(8)                       , intent(in   )     ::  vals(:,:,:,:,:,:,:)
+      integer                       , intent(in   )     ::  n
+      integer                       , intent(out  )     ::  status
+      call CTA_Vector_SetVals( hvec, vals, n, CTA_DOUBLE, status )
+    end subroutine CTA_Vector_SetVals_real8_7d
+
+    subroutine CTA_Vector_SetConstant_integer( hvec, val, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_INTEGER
+      use CTA_F90_Parameters, only : CTA_OK
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hvec
+      integer                       , intent(in   )     ::  val
+      integer                       , intent(out  )     ::  status
+      call CTA_Vector_SetConstant( hvec, val, CTA_INTEGER, status )
+    end subroutine CTA_Vector_SetConstant_integer
+
+    subroutine CTA_Vector_SetConstant_real4( hvec, val, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_REAL
+      use CTA_F90_Parameters, only : CTA_OK
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hvec
+      real(4)                       , intent(in   )     ::  val
+      integer                       , intent(out  )     ::  status
+      call CTA_Vector_SetConstant( hvec, val, CTA_REAL, status )
+    end subroutine CTA_Vector_SetConstant_real4
+
+    subroutine CTA_Vector_SetConstant_real8( hvec, val, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      use CTA_F90_Parameters, only : CTA_DOUBLE
+      use CTA_F90_Parameters, only : CTA_OK
+      integer(CTA_HANDLE_IKIND)     , intent(inout)     ::  hvec
+      real(8)                       , intent(in   )     ::  val
+      integer                       , intent(out  )     ::  status
+      call CTA_Vector_SetConstant( hvec, val, CTA_DOUBLE, status )
+    end subroutine CTA_Vector_SetConstant_real8
+
+end module cta_f90_vector
+
diff --git a/costa/native/cta_f90/generated/cta_f90_xml.f90 b/costa/native/cta_f90/generated/cta_f90_xml.f90
new file mode 100644
index 000000000..72de01add
--- /dev/null
+++ b/costa/native/cta_f90/generated/cta_f90_xml.f90
@@ -0,0 +1,39 @@
+module cta_f90_xml
+
+  implicit none
+
+  public
+
+  !  \brief Read a COSTA XML file into a new tree.
+  ! 
+  !   \param hfname  I  file name of XML file to read
+  !   \param hroot   O  handle of a new COSTA tree
+  !   \param status O CTA_OK if successful
+  !
+  interface CTA_F90_XML_Read
+    subroutine CTA_XML_Read( hfname, hroot, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hfname
+      integer(CTA_HANDLE_IKIND)     , intent(out  )     ::  hroot
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_XML_Read
+  end interface
+
+  !  \brief Write a tree to a COSTA XML file
+  ! 
+  !   \param hfname  I  file name of XML file to write
+  !   \param hroot   I  handle of a COSTA tree
+  !   \param status O CTA_OK if successful
+  !
+  interface CTA_F90_XML_Write
+    subroutine CTA_XML_Write( hfname, hroot, status )
+      use CTA_F90_Parameters, only : CTA_HANDLE_IKIND
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hfname
+      integer(CTA_HANDLE_IKIND)     , intent(in   )     ::  hroot
+      integer                       , intent(out  )     ::  status
+    end subroutine CTA_XML_Write
+  end interface
+
+
+end module cta_f90_xml
+
diff --git a/costa/native/cta_f90/include/cta_f77.inc b/costa/native/cta_f90/include/cta_f77.inc
new file mode 100644
index 000000000..8f6764131
--- /dev/null
+++ b/costa/native/cta_f90/include/cta_f77.inc
@@ -0,0 +1,247 @@
+!
+! COSTA: Problem solving environment for data assimilation
+! Copyright (C) 2005  Nils van Velzen
+! 
+! This library is free software; you can redistribute it and/or
+! modify it under the terms of the GNU Lesser General Public
+! License as published by the Free Software Foundation; either
+! version 2.1 of the License, or (at your option) any later version.
+! 
+! This library is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+! Lesser General Public License for more details.
+! 
+! You should have received a copy of the GNU Lesser General Public
+! License along with this library; if not, write to the Free Software
+! Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+!
+ 
+!     FORTRAN file unit for standard output
+      integer FCTA_STDOUT
+      parameter (FCTA_STDOUT = 6)
+
+!     handle to a COSTA object
+      integer CTA_HANDLE
+      parameter (CTA_HANDLE=-1) 
+!     A handle of a COSTA interface object
+      integer CTA_INTERFACE
+      parameter (CTA_INTERFACE=-2) 
+!     A handle of a COSTA function object
+      integer CTA_FUNCTION
+      parameter (CTA_FUNCTION=-3)
+!     A handle of a COSTA vector object
+      integer CTA_VECTOR
+      parameter (CTA_VECTOR=-4)
+!     A handle of a COSTA Vector class
+      integer CTA_VECTORCLASS
+      parameter (CTA_VECTORCLASS=-5)
+!     A handle of a COSTA (sub) state
+      integer CTA_STATE
+      parameter (CTA_STATE=-6)
+!/*! A handle of a COSTA Matrix class     */
+      integer CTA_MATRIXCLASS
+      parameter (CTA_MATRIXCLASS=-7)
+!/*! A handle of a COSTA Matrix      */
+      integer CTA_MATRIX
+      parameter (CTA_MATRIX=-8)
+!/*!     A COSTA model class  */
+      integer CTA_MODELCLASS
+      parameter (CTA_MODELCLASS=-16)
+
+      integer CTA_VOID
+      parameter (CTA_VOID        =-100)
+      integer CTA_INTEGER
+      parameter (CTA_INTEGER     =-101)
+      integer CTA_REAL
+      parameter (CTA_REAL        =-102)
+      integer CTA_DOUBLE
+      parameter (CTA_DOUBLE      =-103)
+      integer CTA_FSTRING
+      parameter (CTA_FSTRING     =-104)
+      integer CTA_CSTRING
+      parameter (CTA_CSTRING     =-105)
+      integer CTA_STRING
+      parameter (CTA_STRING      =-28)
+      integer CTA_TIME
+      parameter (CTA_TIME        = -15)
+      integer CTA_ARRAY
+      parameter (CTA_ARRAY        = -30)
+
+      integer CTA_1DINTEGER
+      parameter (CTA_1DINTEGER   =-201)
+      integer CTA_1DREAL
+      parameter (CTA_1DREAL      =-202)
+      integer CTA_1DDOUBLE
+      parameter (CTA_1DDOUBLE    =-203)
+      integer CTA_1DFSTRING
+      parameter (CTA_1DFSTRING   =-204)
+      integer CTA_1DCSTRING
+      parameter (CTA_1DCSTRING   =-205)
+
+!     Common errors
+      integer CTA_OK
+      parameter (CTA_OK=0)
+      integer CTA_ARRAY_TOO_SHORT
+      parameter (CTA_ARRAY_TOO_SHORT=10)
+      integer CTA_ILLEGAL_DATATYPE
+      parameter (CTA_ILLEGAL_DATATYPE=11)
+      integer CTA_DIMENSION_ERROR
+      parameter (CTA_DIMENSION_ERROR=12)
+      integer CTA_INCOMPATIBLE_VECTORS
+      parameter (CTA_INCOMPATIBLE_VECTORS=13)
+      integer CTA_CONCAT_NOT_POSSIBLE
+      parameter (CTA_CONCAT_NOT_POSSIBLE=14)
+      integer CTA_SETVAL_NOT_POSSIBLE
+      parameter (CTA_SETVAL_NOT_POSSIBLE=15)
+      integer CTA_ITEM_NOT_FOUND
+      parameter (CTA_ITEM_NOT_FOUND=16)
+      integer CTA_UNINITIALISED_SUBSTATES 
+      parameter (CTA_UNINITIALISED_SUBSTATES=17)
+      integer CTA_STATES_NOT_COMPATIBLE   
+      parameter (CTA_STATES_NOT_COMPATIBLE  =18)
+      integer CTA_INCOMPATIBLE_MATRICES   
+      parameter (CTA_INCOMPATIBLE_MATRICES  =19)
+      integer CTA_NOT_IMPLEMENTED         
+      parameter (CTA_NOT_IMPLEMENTED        =20)
+
+!     Common Constants
+      integer CTA_DEFAULT
+      parameter (CTA_DEFAULT=0)
+      integer CTA_NULL
+      parameter (CTA_NULL=0)
+      integer CTA_TRUE
+      parameter(CTA_TRUE=1)
+      integer CTA_FALSE
+      parameter(CTA_FALSE=0)
+
+      integer CTA_STRLEN_NAME, CTA_STRLEN_TAG
+      parameter (CTA_STRLEN_NAME=80, CTA_STRLEN_TAG=80)
+
+      integer CTA_ASSIMOBS, CTA_VALIDATEOBS, CTA_ALLOBS
+      parameter(CTA_ASSIMOBS=1, CTA_VALIDATEOBS=2, CTA_ALLOBS=3)
+
+!     parameters from cta_pack.h
+
+!     Reset pack/unpack pointer of pack object
+      integer, parameter ::CTA_PACK_RESET=-1
+
+!     parameters from cta_model.h
+
+      integer, parameter ::FCTA_MODEL_CREATE_SIZE        =1
+      integer, parameter ::FCTA_MODEL_CREATE_INIT        =2
+      integer, parameter ::FCTA_MODEL_FREE               =3
+      integer, parameter ::FCTA_MODEL_COMPUTE            =4
+      integer, parameter ::FCTA_MODEL_SET_STATE          =5
+      integer, parameter ::FCTA_MODEL_GET_STATE          =6
+      integer, parameter ::FCTA_MODEL_AXPY_STATE         =7
+      integer, parameter ::FCTA_MODEL_AXPY_MODEL         =8
+      integer, parameter ::FCTA_MODEL_SET_FORC           =9
+      integer, parameter ::FCTA_MODEL_GET_FORC           =10
+      integer, parameter ::FCTA_MODEL_AXPY_FORC          =11
+      integer, parameter ::FCTA_MODEL_SET_PARAM          =12
+      integer, parameter ::FCTA_MODEL_GET_PARAM          =13
+      integer, parameter ::FCTA_MODEL_AXPY_PARAM         =14
+      integer, parameter ::FCTA_MODEL_GET_STATESCALING   =15
+      integer, parameter ::FCTA_MODEL_GET_TIMEHORIZON    =16
+      integer, parameter ::FCTA_MODEL_GET_CURRENTTIME    =17
+
+
+      integer, parameter ::FCTA_MODEL_GET_NOISE_COUNT    =18
+      integer, parameter ::FCTA_MODEL_GET_NOISE_COVAR    =19
+      integer, parameter ::FCTA_MODEL_GET_OBSVALUES      =20
+      integer, parameter ::FCTA_MODEL_GET_OBSSELECT      =21
+      integer, parameter ::FCTA_MODEL_ANNOUNCE_OBSVALUES =22
+      integer, parameter ::FCTA_MODEL_ADD_NOISE          =23
+      integer, parameter ::FCTA_MODEL_EXPORT             =24
+      integer, parameter ::FCTA_MODEL_IMPORT             =25
+      integer, parameter ::FCTA_MODEL_ADJ_SET_FORC       =26
+      integer, parameter ::FCTA_MODEL_ADJ_COMPUTE        =27
+      integer, parameter ::FCTA_MODEL_ADJ_PREPARE        =28
+      integer, parameter ::FCTA_MODEL_NUMFUNC            =28
+
+      integer, parameter ::FCTA_MODEL_GETNUMDOMAINS      =30
+      integer, parameter ::FCTA_MODEL_GETOBSSELECTOR     =31
+      integer, parameter ::FCTA_MODEL_GETOBSLOCALIZATIONDOMAIN =32
+      integer, parameter ::FCTA_MODEL_GETSTATEDOMAIN     =33
+      integer, parameter ::FCTA_MODEL_AXPYSTATEDOMAIN    =34
+
+
+
+
+
+!     Observation description parameters
+      integer, parameter ::FCTA_OBSDESCR_CREATE_SIZE        =  1+1
+      integer, parameter ::FCTA_OBSDESCR_CREATE_INIT        =  2+1
+      integer, parameter ::FCTA_OBSDESCR_FREE               =  3+1
+      integer, parameter ::FCTA_OBSDESCR_GET_PROPERTIES     =  4+1
+      integer, parameter ::FCTA_OBSDESCR_GET_KEYS           =  5+1
+      integer, parameter ::FCTA_OBSDESCR_COUNT_OBSERVATIONS =  6+1
+      integer, parameter ::FCTA_OBSDESCR_COUNT_PROPERTIES   =  7+1
+      integer, parameter ::FCTA_OBSDESCR_EXPORT             =  8+1
+      integer, parameter ::FCTA_OBSDESCR_SELECTION          =  9+1
+      integer, parameter ::FCTA_OBSDESCR_NUMFUNC            = 10+1
+
+      integer, parameter ::FCTA_SOBS_CREATE_SIZE        =  1+1
+      integer, parameter ::FCTA_SOBS_CREATE_INIT        =  2+1
+      integer, parameter ::FCTA_SOBS_FREE               =  3+1
+      integer, parameter ::FCTA_SOBS_CREATE_SELECTION   =  4+1
+      integer, parameter ::FCTA_SOBS_COUNT              =  5+1
+      integer, parameter ::FCTA_SOBS_GET_OBS_DESCRIPTION=  6+1
+      integer, parameter ::FCTA_SOBS_GET_VALUES         =  7+1
+      integer, parameter ::FCTA_SOBS_GET_REALISATION    =  8+1
+      integer, parameter ::FCTA_SOBS_GET_EXPECTATION    =  9+1
+      integer, parameter ::FCTA_SOBS_EVALUATE_PDF       = 10+1
+      integer, parameter ::FCTA_SOBS_GET_COV_MATRIX     = 11+1
+      integer, parameter ::FCTA_SOBS_GET_VARIANCE       = 12+1
+      integer, parameter ::FCTA_SOBS_EXPORT             = 13+1
+      integer, parameter ::FCTA_SOBS_GET_TIMES          = 14+1
+      integer, parameter ::FCTA_SOBS_NUMFUNC            = 15+1
+
+
+!     Common block holding "constants" that are set at initialisation
+
+!NOT HAPPY BUT HAVE SOME PROBLEMS WITH LOADING OF SO-FILES UNDER LINUX
+!WHERE WE HAVE 2 INSTANCES OF COMMON BLOCK JUST SET VALUES!!!!
+
+      integer, parameter :: CTA_MODBUILD_PAR   =  0
+      integer, parameter :: CTA_FILE_STDOUT    =  1
+      integer, parameter :: CTA_DEFAULT_SOBS   = 23
+      integer, parameter :: CTA_COMBINE_SOBS   = 45
+      integer, parameter :: CTA_NETCDF_SOBS    = 67
+      integer, parameter :: CTA_MAORI_SOBS     = -173
+      integer, parameter :: CTA_OBSDESCR_TABLE = 77
+      integer, parameter :: CTA_DEFAULT_VECTOR = 100
+      integer, parameter :: CTA_DEFAULT_MATRIX = 116
+      integer, parameter :: CTA_MODBUILD_SP    = 147
+      integer, parameter :: CTA_MODELCOMBINER  = 0
+      integer, parameter :: CTA_MODBUILD_B3B   = 0
+      integer, parameter :: CTA_MODBUILD_BB    = 0
+      integer, parameter :: CTA_OP_ROOT_RMS    = 148
+      integer, parameter :: CTA_OP_ROOT_AMAX   = 149 
+      integer, parameter :: CTA_OP_ROOT_PRINTI = 150
+      integer, parameter :: CTA_OP_ROOT_SSQ    = 151
+
+!      external CTA_DEFAULT_VECTOR, CTA_DEFAULT_MATRIX,                    &
+!     &         CTA_DEFAULT_SOBS, CTA_MODBUILD_SP,                         &
+!     &         CTA_MODBUILD_PAR, CTA_OBSDESCR_TABLE,                      &
+!     &         CTA_FILE_STDOUT, CTA_MODELCOMBINER,                        &
+!     &         CTA_MODBUILD_B3B
+
+
+
+!!!DEC$ IF DEFINED(CTALIB)
+!!!DEC$ ATTRIBUTES DLLEXPORT:: /ctaf77/
+!!!DEC$ ELSE
+!!!DEC$ ATTRIBUTES DLLIMPORT:: /ctaf77/
+!!!DEC$ ENDIF
+!!      common /ctaf77/ CTA_DEFAULT_VECTOR, CTA_DEFAULT_MATRIX,             &
+!!     &                CTA_DEFAULT_SOBS, CTA_MODBUILD_SP,                  &
+!!     &                CTA_MODBUILD_PAR, CTA_OBSDESCR_TABLE,               &
+!!     &                CTA_FILE_STDOUT,  CTA_MODELCOMBINER,                &
+!!     &                CTA_MODBUILD_B3B
+
+
+
+
+
diff --git a/costa/native/cta_f90/include/cta_f90.inc b/costa/native/cta_f90/include/cta_f90.inc
new file mode 100644
index 000000000..daa7d8726
--- /dev/null
+++ b/costa/native/cta_f90/include/cta_f90.inc
@@ -0,0 +1,19 @@
+!
+! COSTA: Problem solving environment for data assimilation
+! Copyright (C) 2005  Nils van Velzen
+! 
+! This library is free software; you can redistribute it and/or
+! modify it under the terms of the GNU Lesser General Public
+! License as published by the Free Software Foundation; either
+! version 2.1 of the License, or (at your option) any later version.
+! 
+! This library is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+! Lesser General Public License for more details.
+! 
+! You should have received a copy of the GNU Lesser General Public
+! License along with this library; if not, write to the Free Software
+! Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+!
+include 'cta_f77.inc'
diff --git a/costa/native/cta_f90/include/cta_f90_contains.inc b/costa/native/cta_f90/include/cta_f90_contains.inc
new file mode 100644
index 000000000..5805d90f9
--- /dev/null
+++ b/costa/native/cta_f90/include/cta_f90_contains.inc
@@ -0,0 +1,42 @@
+  ! return error message given error code
+
+  function CTA_F90_StrError( ierr )
+
+    use CTA_F90_Parameters, only : CTA_OK
+    use CTA_F90_Parameters, only : CTA_ARRAY_TOO_SHORT
+    use CTA_F90_Parameters, only : CTA_ILLEGAL_DATATYPE
+    use CTA_F90_Parameters, only : CTA_DIMENSION_ERROR
+    use CTA_F90_Parameters, only : CTA_INCOMPATIBLE_VECTORS
+    use CTA_F90_Parameters, only : CTA_CONCAT_NOT_POSSIBLE
+    use CTA_F90_Parameters, only : CTA_SETVAL_NOT_POSSIBLE
+    use CTA_F90_Parameters, only : CTA_ITEM_NOT_FOUND
+    use CTA_F90_Parameters, only : CTA_UNINITIALISED_SUBSTATES
+    use CTA_F90_Parameters, only : CTA_STATES_NOT_COMPATIBLE
+    use CTA_F90_Parameters, only : CTA_INCOMPATIBLE_MATRICES
+    use CTA_F90_Parameters, only : CTA_NOT_IMPLEMENTED
+
+    ! --- in/out ---------------------------------
+
+    character(len=80)     ::  CTA_F90_StrError
+    integer, intent(in)   ::  ierr
+
+    ! --- begin ----------------------------------
+
+    select case ( ierr )
+      case ( CTA_OK                      ) ! ok, no message
+      case ( CTA_ARRAY_TOO_SHORT         ) ; CTA_F90_StrError = 'CTA - ERROR - Array too short'
+      case ( CTA_ILLEGAL_DATATYPE        ) ; CTA_F90_StrError = 'CTA - ERROR - Illegal datatype'
+      case ( CTA_DIMENSION_ERROR         ) ; CTA_F90_StrError = 'CTA - ERROR - Dimension error'
+      case ( CTA_INCOMPATIBLE_VECTORS    ) ; CTA_F90_StrError = 'CTA - ERROR - Incompatible vectors'
+      case ( CTA_CONCAT_NOT_POSSIBLE     ) ; CTA_F90_StrError = 'CTA - ERROR - Concat not possible'
+      case ( CTA_SETVAL_NOT_POSSIBLE     ) ; CTA_F90_StrError = 'CTA - ERROR - Setval not possible'
+      case ( CTA_ITEM_NOT_FOUND          ) ; CTA_F90_StrError = 'CTA - ERROR - Item not found'
+      case ( CTA_UNINITIALISED_SUBSTATES ) ; CTA_F90_StrError = 'CTA - ERROR - Uninitialised substates'
+      case ( CTA_STATES_NOT_COMPATIBLE   ) ; CTA_F90_StrError = 'CTA - ERROR - States not compatible'
+      case ( CTA_INCOMPATIBLE_MATRICES   ) ; CTA_F90_StrError = 'CTA - ERROR - Incompatible matrices'
+      case ( CTA_NOT_IMPLEMENTED         ) ; CTA_F90_StrError = 'CTA - ERROR - Not implemented'
+      case default
+        write (CTA_F90_StrError,'("CTA - ERROR - Unknown error code: ",i8)') ierr
+    end select
+
+  end function CTA_F90_StrError
diff --git a/costa/native/external/blas/caxpy.f b/costa/native/external/blas/caxpy.f
new file mode 100644
index 000000000..7037c5a54
--- /dev/null
+++ b/costa/native/external/blas/caxpy.f
@@ -0,0 +1,34 @@
+      subroutine caxpy(n,ca,cx,incx,cy,incy)
+c
+c     constant times a vector plus a vector.
+c     jack dongarra, linpack, 3/11/78.
+c     modified 12/3/93, array(1) declarations changed to array(*)
+c
+      complex cx(*),cy(*),ca
+      integer i,incx,incy,ix,iy,n
+c
+      if(n.le.0)return
+      if (abs(real(ca)) + abs(aimag(ca)) .eq. 0.0 ) return
+      if(incx.eq.1.and.incy.eq.1)go to 20
+c
+c        code for unequal increments or equal increments
+c          not equal to 1
+c
+      ix = 1
+      iy = 1
+      if(incx.lt.0)ix = (-n+1)*incx + 1
+      if(incy.lt.0)iy = (-n+1)*incy + 1
+      do 10 i = 1,n
+        cy(iy) = cy(iy) + ca*cx(ix)
+        ix = ix + incx
+        iy = iy + incy
+   10 continue
+      return
+c
+c        code for both increments equal to 1
+c
+   20 do 30 i = 1,n
+        cy(i) = cy(i) + ca*cx(i)
+   30 continue
+      return
+      end
diff --git a/costa/native/external/blas/ccopy.f b/costa/native/external/blas/ccopy.f
new file mode 100644
index 000000000..61d5267e5
--- /dev/null
+++ b/costa/native/external/blas/ccopy.f
@@ -0,0 +1,33 @@
+      subroutine  ccopy(n,cx,incx,cy,incy)
+c
+c     copies a vector, x, to a vector, y.
+c     jack dongarra, linpack, 3/11/78.
+c     modified 12/3/93, array(1) declarations changed to array(*)
+c
+      complex cx(*),cy(*)
+      integer i,incx,incy,ix,iy,n
+c
+      if(n.le.0)return
+      if(incx.eq.1.and.incy.eq.1)go to 20
+c
+c        code for unequal increments or equal increments
+c          not equal to 1
+c
+      ix = 1
+      iy = 1
+      if(incx.lt.0)ix = (-n+1)*incx + 1
+      if(incy.lt.0)iy = (-n+1)*incy + 1
+      do 10 i = 1,n
+        cy(iy) = cx(ix)
+        ix = ix + incx
+        iy = iy + incy
+   10 continue
+      return
+c
+c        code for both increments equal to 1
+c
+   20 do 30 i = 1,n
+        cy(i) = cx(i)
+   30 continue
+      return
+      end
diff --git a/costa/native/external/blas/cdotc.f b/costa/native/external/blas/cdotc.f
new file mode 100644
index 000000000..1d5890596
--- /dev/null
+++ b/costa/native/external/blas/cdotc.f
@@ -0,0 +1,38 @@
+      complex function cdotc(n,cx,incx,cy,incy)
+c
+c     forms the dot product of two vectors, conjugating the first
+c     vector.
+c     jack dongarra, linpack,  3/11/78.
+c     modified 12/3/93, array(1) declarations changed to array(*)
+c
+      complex cx(*),cy(*),ctemp
+      integer i,incx,incy,ix,iy,n
+c
+      ctemp = (0.0,0.0)
+      cdotc = (0.0,0.0)
+      if(n.le.0)return
+      if(incx.eq.1.and.incy.eq.1)go to 20
+c
+c        code for unequal increments or equal increments
+c          not equal to 1
+c
+      ix = 1
+      iy = 1
+      if(incx.lt.0)ix = (-n+1)*incx + 1
+      if(incy.lt.0)iy = (-n+1)*incy + 1
+      do 10 i = 1,n
+        ctemp = ctemp + conjg(cx(ix))*cy(iy)
+        ix = ix + incx
+        iy = iy + incy
+   10 continue
+      cdotc = ctemp
+      return
+c
+c        code for both increments equal to 1
+c
+   20 do 30 i = 1,n
+        ctemp = ctemp + conjg(cx(i))*cy(i)
+   30 continue
+      cdotc = ctemp
+      return
+      end
diff --git a/costa/native/external/blas/cdotu.f b/costa/native/external/blas/cdotu.f
new file mode 100644
index 000000000..d88cea45e
--- /dev/null
+++ b/costa/native/external/blas/cdotu.f
@@ -0,0 +1,37 @@
+      complex function cdotu(n,cx,incx,cy,incy)
+c
+c     forms the dot product of two vectors.
+c     jack dongarra, linpack, 3/11/78.
+c     modified 12/3/93, array(1) declarations changed to array(*)
+c
+      complex cx(*),cy(*),ctemp
+      integer i,incx,incy,ix,iy,n
+c
+      ctemp = (0.0,0.0)
+      cdotu = (0.0,0.0)
+      if(n.le.0)return
+      if(incx.eq.1.and.incy.eq.1)go to 20
+c
+c        code for unequal increments or equal increments
+c          not equal to 1
+c
+      ix = 1
+      iy = 1
+      if(incx.lt.0)ix = (-n+1)*incx + 1
+      if(incy.lt.0)iy = (-n+1)*incy + 1
+      do 10 i = 1,n
+        ctemp = ctemp + cx(ix)*cy(iy)
+        ix = ix + incx
+        iy = iy + incy
+   10 continue
+      cdotu = ctemp
+      return
+c
+c        code for both increments equal to 1
+c
+   20 do 30 i = 1,n
+        ctemp = ctemp + cx(i)*cy(i)
+   30 continue
+      cdotu = ctemp
+      return
+      end
diff --git a/costa/native/external/blas/cgbmv.f b/costa/native/external/blas/cgbmv.f
new file mode 100644
index 000000000..5b559c12d
--- /dev/null
+++ b/costa/native/external/blas/cgbmv.f
@@ -0,0 +1,322 @@
+      SUBROUTINE CGBMV ( TRANS, M, N, KL, KU, ALPHA, A, LDA, X, INCX,
+     $                   BETA, Y, INCY )
+*     .. Scalar Arguments ..
+      COMPLEX            ALPHA, BETA
+      INTEGER            INCX, INCY, KL, KU, LDA, M, N
+      CHARACTER*1        TRANS
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), X( * ), Y( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CGBMV  performs one of the matrix-vector operations
+*
+*     y := alpha*A*x + beta*y,   or   y := alpha*A'*x + beta*y,   or
+*
+*     y := alpha*conjg( A' )*x + beta*y,
+*
+*  where alpha and beta are scalars, x and y are vectors and A is an
+*  m by n band matrix, with kl sub-diagonals and ku super-diagonals.
+*
+*  Parameters
+*  ==========
+*
+*  TRANS  - CHARACTER*1.
+*           On entry, TRANS specifies the operation to be performed as
+*           follows:
+*
+*              TRANS = 'N' or 'n'   y := alpha*A*x + beta*y.
+*
+*              TRANS = 'T' or 't'   y := alpha*A'*x + beta*y.
+*
+*              TRANS = 'C' or 'c'   y := alpha*conjg( A' )*x + beta*y.
+*
+*           Unchanged on exit.
+*
+*  M      - INTEGER.
+*           On entry, M specifies the number of rows of the matrix A.
+*           M must be at least zero.
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the number of columns of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  KL     - INTEGER.
+*           On entry, KL specifies the number of sub-diagonals of the
+*           matrix A. KL must satisfy  0 .le. KL.
+*           Unchanged on exit.
+*
+*  KU     - INTEGER.
+*           On entry, KU specifies the number of super-diagonals of the
+*           matrix A. KU must satisfy  0 .le. KU.
+*           Unchanged on exit.
+*
+*  ALPHA  - COMPLEX         .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  A      - COMPLEX          array of DIMENSION ( LDA, n ).
+*           Before entry, the leading ( kl + ku + 1 ) by n part of the
+*           array A must contain the matrix of coefficients, supplied
+*           column by column, with the leading diagonal of the matrix in
+*           row ( ku + 1 ) of the array, the first super-diagonal
+*           starting at position 2 in row ku, the first sub-diagonal
+*           starting at position 1 in row ( ku + 2 ), and so on.
+*           Elements in the array A that do not correspond to elements
+*           in the band matrix (such as the top left ku by ku triangle)
+*           are not referenced.
+*           The following program segment will transfer a band matrix
+*           from conventional full matrix storage to band storage:
+*
+*                 DO 20, J = 1, N
+*                    K = KU + 1 - J
+*                    DO 10, I = MAX( 1, J - KU ), MIN( M, J + KL )
+*                       A( K + I, J ) = matrix( I, J )
+*              10    CONTINUE
+*              20 CONTINUE
+*
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           ( kl + ku + 1 ).
+*           Unchanged on exit.
+*
+*  X      - COMPLEX          array of DIMENSION at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'
+*           and at least
+*           ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.
+*           Before entry, the incremented array X must contain the
+*           vector x.
+*           Unchanged on exit.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*  BETA   - COMPLEX         .
+*           On entry, BETA specifies the scalar beta. When BETA is
+*           supplied as zero then Y need not be set on input.
+*           Unchanged on exit.
+*
+*  Y      - COMPLEX          array of DIMENSION at least
+*           ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'
+*           and at least
+*           ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.
+*           Before entry, the incremented array Y must contain the
+*           vector y. On exit, Y is overwritten by the updated vector y.
+*
+*
+*  INCY   - INTEGER.
+*           On entry, INCY specifies the increment for the elements of
+*           Y. INCY must not be zero.
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      COMPLEX            ONE
+      PARAMETER        ( ONE  = ( 1.0E+0, 0.0E+0 ) )
+      COMPLEX            ZERO
+      PARAMETER        ( ZERO = ( 0.0E+0, 0.0E+0 ) )
+*     .. Local Scalars ..
+      COMPLEX            TEMP
+      INTEGER            I, INFO, IX, IY, J, JX, JY, K, KUP1, KX, KY,
+     $                   LENX, LENY
+      LOGICAL            NOCONJ
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          CONJG, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( TRANS, 'N' ).AND.
+     $         .NOT.LSAME( TRANS, 'T' ).AND.
+     $         .NOT.LSAME( TRANS, 'C' )      )THEN
+         INFO = 1
+      ELSE IF( M.LT.0 )THEN
+         INFO = 2
+      ELSE IF( N.LT.0 )THEN
+         INFO = 3
+      ELSE IF( KL.LT.0 )THEN
+         INFO = 4
+      ELSE IF( KU.LT.0 )THEN
+         INFO = 5
+      ELSE IF( LDA.LT.( KL + KU + 1 ) )THEN
+         INFO = 8
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 10
+      ELSE IF( INCY.EQ.0 )THEN
+         INFO = 13
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'CGBMV ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR.
+     $    ( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) )
+     $   RETURN
+*
+      NOCONJ = LSAME( TRANS, 'T' )
+*
+*     Set  LENX  and  LENY, the lengths of the vectors x and y, and set
+*     up the start points in  X  and  Y.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+         LENX = N
+         LENY = M
+      ELSE
+         LENX = M
+         LENY = N
+      END IF
+      IF( INCX.GT.0 )THEN
+         KX = 1
+      ELSE
+         KX = 1 - ( LENX - 1 )*INCX
+      END IF
+      IF( INCY.GT.0 )THEN
+         KY = 1
+      ELSE
+         KY = 1 - ( LENY - 1 )*INCY
+      END IF
+*
+*     Start the operations. In this version the elements of A are
+*     accessed sequentially with one pass through the band part of A.
+*
+*     First form  y := beta*y.
+*
+      IF( BETA.NE.ONE )THEN
+         IF( INCY.EQ.1 )THEN
+            IF( BETA.EQ.ZERO )THEN
+               DO 10, I = 1, LENY
+                  Y( I ) = ZERO
+   10          CONTINUE
+            ELSE
+               DO 20, I = 1, LENY
+                  Y( I ) = BETA*Y( I )
+   20          CONTINUE
+            END IF
+         ELSE
+            IY = KY
+            IF( BETA.EQ.ZERO )THEN
+               DO 30, I = 1, LENY
+                  Y( IY ) = ZERO
+                  IY      = IY   + INCY
+   30          CONTINUE
+            ELSE
+               DO 40, I = 1, LENY
+                  Y( IY ) = BETA*Y( IY )
+                  IY      = IY           + INCY
+   40          CONTINUE
+            END IF
+         END IF
+      END IF
+      IF( ALPHA.EQ.ZERO )
+     $   RETURN
+      KUP1 = KU + 1
+      IF( LSAME( TRANS, 'N' ) )THEN
+*
+*        Form  y := alpha*A*x + y.
+*
+         JX = KX
+         IF( INCY.EQ.1 )THEN
+            DO 60, J = 1, N
+               IF( X( JX ).NE.ZERO )THEN
+                  TEMP = ALPHA*X( JX )
+                  K    = KUP1 - J
+                  DO 50, I = MAX( 1, J - KU ), MIN( M, J + KL )
+                     Y( I ) = Y( I ) + TEMP*A( K + I, J )
+   50             CONTINUE
+               END IF
+               JX = JX + INCX
+   60       CONTINUE
+         ELSE
+            DO 80, J = 1, N
+               IF( X( JX ).NE.ZERO )THEN
+                  TEMP = ALPHA*X( JX )
+                  IY   = KY
+                  K    = KUP1 - J
+                  DO 70, I = MAX( 1, J - KU ), MIN( M, J + KL )
+                     Y( IY ) = Y( IY ) + TEMP*A( K + I, J )
+                     IY      = IY      + INCY
+   70             CONTINUE
+               END IF
+               JX = JX + INCX
+               IF( J.GT.KU )
+     $            KY = KY + INCY
+   80       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  y := alpha*A'*x + y  or  y := alpha*conjg( A' )*x + y.
+*
+         JY = KY
+         IF( INCX.EQ.1 )THEN
+            DO 110, J = 1, N
+               TEMP = ZERO
+               K    = KUP1 - J
+               IF( NOCONJ )THEN
+                  DO 90, I = MAX( 1, J - KU ), MIN( M, J + KL )
+                     TEMP = TEMP + A( K + I, J )*X( I )
+   90             CONTINUE
+               ELSE
+                  DO 100, I = MAX( 1, J - KU ), MIN( M, J + KL )
+                     TEMP = TEMP + CONJG( A( K + I, J ) )*X( I )
+  100             CONTINUE
+               END IF
+               Y( JY ) = Y( JY ) + ALPHA*TEMP
+               JY      = JY      + INCY
+  110       CONTINUE
+         ELSE
+            DO 140, J = 1, N
+               TEMP = ZERO
+               IX   = KX
+               K    = KUP1 - J
+               IF( NOCONJ )THEN
+                  DO 120, I = MAX( 1, J - KU ), MIN( M, J + KL )
+                     TEMP = TEMP + A( K + I, J )*X( IX )
+                     IX   = IX   + INCX
+  120             CONTINUE
+               ELSE
+                  DO 130, I = MAX( 1, J - KU ), MIN( M, J + KL )
+                     TEMP = TEMP + CONJG( A( K + I, J ) )*X( IX )
+                     IX   = IX   + INCX
+  130             CONTINUE
+               END IF
+               Y( JY ) = Y( JY ) + ALPHA*TEMP
+               JY      = JY      + INCY
+               IF( J.GT.KU )
+     $            KX = KX + INCX
+  140       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of CGBMV .
+*
+      END
diff --git a/costa/native/external/blas/cgemm.f b/costa/native/external/blas/cgemm.f
new file mode 100644
index 000000000..14ebdc072
--- /dev/null
+++ b/costa/native/external/blas/cgemm.f
@@ -0,0 +1,414 @@
+      SUBROUTINE CGEMM ( TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB,
+     $                   BETA, C, LDC )
+*     .. Scalar Arguments ..
+      CHARACTER*1        TRANSA, TRANSB
+      INTEGER            M, N, K, LDA, LDB, LDC
+      COMPLEX            ALPHA, BETA
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), B( LDB, * ), C( LDC, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CGEMM  performs one of the matrix-matrix operations
+*
+*     C := alpha*op( A )*op( B ) + beta*C,
+*
+*  where  op( X ) is one of
+*
+*     op( X ) = X   or   op( X ) = X'   or   op( X ) = conjg( X' ),
+*
+*  alpha and beta are scalars, and A, B and C are matrices, with op( A )
+*  an m by k matrix,  op( B )  a  k by n matrix and  C an m by n matrix.
+*
+*  Parameters
+*  ==========
+*
+*  TRANSA - CHARACTER*1.
+*           On entry, TRANSA specifies the form of op( A ) to be used in
+*           the matrix multiplication as follows:
+*
+*              TRANSA = 'N' or 'n',  op( A ) = A.
+*
+*              TRANSA = 'T' or 't',  op( A ) = A'.
+*
+*              TRANSA = 'C' or 'c',  op( A ) = conjg( A' ).
+*
+*           Unchanged on exit.
+*
+*  TRANSB - CHARACTER*1.
+*           On entry, TRANSB specifies the form of op( B ) to be used in
+*           the matrix multiplication as follows:
+*
+*              TRANSB = 'N' or 'n',  op( B ) = B.
+*
+*              TRANSB = 'T' or 't',  op( B ) = B'.
+*
+*              TRANSB = 'C' or 'c',  op( B ) = conjg( B' ).
+*
+*           Unchanged on exit.
+*
+*  M      - INTEGER.
+*           On entry,  M  specifies  the number  of rows  of the  matrix
+*           op( A )  and of the  matrix  C.  M  must  be at least  zero.
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry,  N  specifies the number  of columns of the matrix
+*           op( B ) and the number of columns of the matrix C. N must be
+*           at least zero.
+*           Unchanged on exit.
+*
+*  K      - INTEGER.
+*           On entry,  K  specifies  the number of columns of the matrix
+*           op( A ) and the number of rows of the matrix op( B ). K must
+*           be at least  zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - COMPLEX         .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  A      - COMPLEX          array of DIMENSION ( LDA, ka ), where ka is
+*           k  when  TRANSA = 'N' or 'n',  and is  m  otherwise.
+*           Before entry with  TRANSA = 'N' or 'n',  the leading  m by k
+*           part of the array  A  must contain the matrix  A,  otherwise
+*           the leading  k by m  part of the array  A  must contain  the
+*           matrix A.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. When  TRANSA = 'N' or 'n' then
+*           LDA must be at least  max( 1, m ), otherwise  LDA must be at
+*           least  max( 1, k ).
+*           Unchanged on exit.
+*
+*  B      - COMPLEX          array of DIMENSION ( LDB, kb ), where kb is
+*           n  when  TRANSB = 'N' or 'n',  and is  k  otherwise.
+*           Before entry with  TRANSB = 'N' or 'n',  the leading  k by n
+*           part of the array  B  must contain the matrix  B,  otherwise
+*           the leading  n by k  part of the array  B  must contain  the
+*           matrix B.
+*           Unchanged on exit.
+*
+*  LDB    - INTEGER.
+*           On entry, LDB specifies the first dimension of B as declared
+*           in the calling (sub) program. When  TRANSB = 'N' or 'n' then
+*           LDB must be at least  max( 1, k ), otherwise  LDB must be at
+*           least  max( 1, n ).
+*           Unchanged on exit.
+*
+*  BETA   - COMPLEX         .
+*           On entry,  BETA  specifies the scalar  beta.  When  BETA  is
+*           supplied as zero then C need not be set on input.
+*           Unchanged on exit.
+*
+*  C      - COMPLEX          array of DIMENSION ( LDC, n ).
+*           Before entry, the leading  m by n  part of the array  C must
+*           contain the matrix  C,  except when  beta  is zero, in which
+*           case C need not be set on entry.
+*           On exit, the array  C  is overwritten by the  m by n  matrix
+*           ( alpha*op( A )*op( B ) + beta*C ).
+*
+*  LDC    - INTEGER.
+*           On entry, LDC specifies the first dimension of C as declared
+*           in  the  calling  (sub)  program.   LDC  must  be  at  least
+*           max( 1, m ).
+*           Unchanged on exit.
+*
+*
+*  Level 3 Blas routine.
+*
+*  -- Written on 8-February-1989.
+*     Jack Dongarra, Argonne National Laboratory.
+*     Iain Duff, AERE Harwell.
+*     Jeremy Du Croz, Numerical Algorithms Group Ltd.
+*     Sven Hammarling, Numerical Algorithms Group Ltd.
+*
+*
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          CONJG, MAX
+*     .. Local Scalars ..
+      LOGICAL            CONJA, CONJB, NOTA, NOTB
+      INTEGER            I, INFO, J, L, NCOLA, NROWA, NROWB
+      COMPLEX            TEMP
+*     .. Parameters ..
+      COMPLEX            ONE
+      PARAMETER        ( ONE  = ( 1.0E+0, 0.0E+0 ) )
+      COMPLEX            ZERO
+      PARAMETER        ( ZERO = ( 0.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Set  NOTA  and  NOTB  as  true if  A  and  B  respectively are not
+*     conjugated or transposed, set  CONJA and CONJB  as true if  A  and
+*     B  respectively are to be  transposed but  not conjugated  and set
+*     NROWA, NCOLA and  NROWB  as the number of rows and  columns  of  A
+*     and the number of rows of  B  respectively.
+*
+      NOTA  = LSAME( TRANSA, 'N' )
+      NOTB  = LSAME( TRANSB, 'N' )
+      CONJA = LSAME( TRANSA, 'C' )
+      CONJB = LSAME( TRANSB, 'C' )
+      IF( NOTA )THEN
+         NROWA = M
+         NCOLA = K
+      ELSE
+         NROWA = K
+         NCOLA = M
+      END IF
+      IF( NOTB )THEN
+         NROWB = K
+      ELSE
+         NROWB = N
+      END IF
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF(      ( .NOT.NOTA                 ).AND.
+     $         ( .NOT.CONJA                ).AND.
+     $         ( .NOT.LSAME( TRANSA, 'T' ) )      )THEN
+         INFO = 1
+      ELSE IF( ( .NOT.NOTB                 ).AND.
+     $         ( .NOT.CONJB                ).AND.
+     $         ( .NOT.LSAME( TRANSB, 'T' ) )      )THEN
+         INFO = 2
+      ELSE IF( M  .LT.0               )THEN
+         INFO = 3
+      ELSE IF( N  .LT.0               )THEN
+         INFO = 4
+      ELSE IF( K  .LT.0               )THEN
+         INFO = 5
+      ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN
+         INFO = 8
+      ELSE IF( LDB.LT.MAX( 1, NROWB ) )THEN
+         INFO = 10
+      ELSE IF( LDC.LT.MAX( 1, M     ) )THEN
+         INFO = 13
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'CGEMM ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR.
+     $    ( ( ( ALPHA.EQ.ZERO ).OR.( K.EQ.0 ) ).AND.( BETA.EQ.ONE ) ) )
+     $   RETURN
+*
+*     And when  alpha.eq.zero.
+*
+      IF( ALPHA.EQ.ZERO )THEN
+         IF( BETA.EQ.ZERO )THEN
+            DO 20, J = 1, N
+               DO 10, I = 1, M
+                  C( I, J ) = ZERO
+   10          CONTINUE
+   20       CONTINUE
+         ELSE
+            DO 40, J = 1, N
+               DO 30, I = 1, M
+                  C( I, J ) = BETA*C( I, J )
+   30          CONTINUE
+   40       CONTINUE
+         END IF
+         RETURN
+      END IF
+*
+*     Start the operations.
+*
+      IF( NOTB )THEN
+         IF( NOTA )THEN
+*
+*           Form  C := alpha*A*B + beta*C.
+*
+            DO 90, J = 1, N
+               IF( BETA.EQ.ZERO )THEN
+                  DO 50, I = 1, M
+                     C( I, J ) = ZERO
+   50             CONTINUE
+               ELSE IF( BETA.NE.ONE )THEN
+                  DO 60, I = 1, M
+                     C( I, J ) = BETA*C( I, J )
+   60             CONTINUE
+               END IF
+               DO 80, L = 1, K
+                  IF( B( L, J ).NE.ZERO )THEN
+                     TEMP = ALPHA*B( L, J )
+                     DO 70, I = 1, M
+                        C( I, J ) = C( I, J ) + TEMP*A( I, L )
+   70                CONTINUE
+                  END IF
+   80          CONTINUE
+   90       CONTINUE
+         ELSE IF( CONJA )THEN
+*
+*           Form  C := alpha*conjg( A' )*B + beta*C.
+*
+            DO 120, J = 1, N
+               DO 110, I = 1, M
+                  TEMP = ZERO
+                  DO 100, L = 1, K
+                     TEMP = TEMP + CONJG( A( L, I ) )*B( L, J )
+  100             CONTINUE
+                  IF( BETA.EQ.ZERO )THEN
+                     C( I, J ) = ALPHA*TEMP
+                  ELSE
+                     C( I, J ) = ALPHA*TEMP + BETA*C( I, J )
+                  END IF
+  110          CONTINUE
+  120       CONTINUE
+         ELSE
+*
+*           Form  C := alpha*A'*B + beta*C
+*
+            DO 150, J = 1, N
+               DO 140, I = 1, M
+                  TEMP = ZERO
+                  DO 130, L = 1, K
+                     TEMP = TEMP + A( L, I )*B( L, J )
+  130             CONTINUE
+                  IF( BETA.EQ.ZERO )THEN
+                     C( I, J ) = ALPHA*TEMP
+                  ELSE
+                     C( I, J ) = ALPHA*TEMP + BETA*C( I, J )
+                  END IF
+  140          CONTINUE
+  150       CONTINUE
+         END IF
+      ELSE IF( NOTA )THEN
+         IF( CONJB )THEN
+*
+*           Form  C := alpha*A*conjg( B' ) + beta*C.
+*
+            DO 200, J = 1, N
+               IF( BETA.EQ.ZERO )THEN
+                  DO 160, I = 1, M
+                     C( I, J ) = ZERO
+  160             CONTINUE
+               ELSE IF( BETA.NE.ONE )THEN
+                  DO 170, I = 1, M
+                     C( I, J ) = BETA*C( I, J )
+  170             CONTINUE
+               END IF
+               DO 190, L = 1, K
+                  IF( B( J, L ).NE.ZERO )THEN
+                     TEMP = ALPHA*CONJG( B( J, L ) )
+                     DO 180, I = 1, M
+                        C( I, J ) = C( I, J ) + TEMP*A( I, L )
+  180                CONTINUE
+                  END IF
+  190          CONTINUE
+  200       CONTINUE
+         ELSE
+*
+*           Form  C := alpha*A*B'          + beta*C
+*
+            DO 250, J = 1, N
+               IF( BETA.EQ.ZERO )THEN
+                  DO 210, I = 1, M
+                     C( I, J ) = ZERO
+  210             CONTINUE
+               ELSE IF( BETA.NE.ONE )THEN
+                  DO 220, I = 1, M
+                     C( I, J ) = BETA*C( I, J )
+  220             CONTINUE
+               END IF
+               DO 240, L = 1, K
+                  IF( B( J, L ).NE.ZERO )THEN
+                     TEMP = ALPHA*B( J, L )
+                     DO 230, I = 1, M
+                        C( I, J ) = C( I, J ) + TEMP*A( I, L )
+  230                CONTINUE
+                  END IF
+  240          CONTINUE
+  250       CONTINUE
+         END IF
+      ELSE IF( CONJA )THEN
+         IF( CONJB )THEN
+*
+*           Form  C := alpha*conjg( A' )*conjg( B' ) + beta*C.
+*
+            DO 280, J = 1, N
+               DO 270, I = 1, M
+                  TEMP = ZERO
+                  DO 260, L = 1, K
+                     TEMP = TEMP + CONJG( A( L, I ) )*CONJG( B( J, L ) )
+  260             CONTINUE
+                  IF( BETA.EQ.ZERO )THEN
+                     C( I, J ) = ALPHA*TEMP
+                  ELSE
+                     C( I, J ) = ALPHA*TEMP + BETA*C( I, J )
+                  END IF
+  270          CONTINUE
+  280       CONTINUE
+         ELSE
+*
+*           Form  C := alpha*conjg( A' )*B' + beta*C
+*
+            DO 310, J = 1, N
+               DO 300, I = 1, M
+                  TEMP = ZERO
+                  DO 290, L = 1, K
+                     TEMP = TEMP + CONJG( A( L, I ) )*B( J, L )
+  290             CONTINUE
+                  IF( BETA.EQ.ZERO )THEN
+                     C( I, J ) = ALPHA*TEMP
+                  ELSE
+                     C( I, J ) = ALPHA*TEMP + BETA*C( I, J )
+                  END IF
+  300          CONTINUE
+  310       CONTINUE
+         END IF
+      ELSE
+         IF( CONJB )THEN
+*
+*           Form  C := alpha*A'*conjg( B' ) + beta*C
+*
+            DO 340, J = 1, N
+               DO 330, I = 1, M
+                  TEMP = ZERO
+                  DO 320, L = 1, K
+                     TEMP = TEMP + A( L, I )*CONJG( B( J, L ) )
+  320             CONTINUE
+                  IF( BETA.EQ.ZERO )THEN
+                     C( I, J ) = ALPHA*TEMP
+                  ELSE
+                     C( I, J ) = ALPHA*TEMP + BETA*C( I, J )
+                  END IF
+  330          CONTINUE
+  340       CONTINUE
+         ELSE
+*
+*           Form  C := alpha*A'*B' + beta*C
+*
+            DO 370, J = 1, N
+               DO 360, I = 1, M
+                  TEMP = ZERO
+                  DO 350, L = 1, K
+                     TEMP = TEMP + A( L, I )*B( J, L )
+  350             CONTINUE
+                  IF( BETA.EQ.ZERO )THEN
+                     C( I, J ) = ALPHA*TEMP
+                  ELSE
+                     C( I, J ) = ALPHA*TEMP + BETA*C( I, J )
+                  END IF
+  360          CONTINUE
+  370       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of CGEMM .
+*
+      END
diff --git a/costa/native/external/blas/cgemv.f b/costa/native/external/blas/cgemv.f
new file mode 100644
index 000000000..04872d8d9
--- /dev/null
+++ b/costa/native/external/blas/cgemv.f
@@ -0,0 +1,281 @@
+      SUBROUTINE CGEMV ( TRANS, M, N, ALPHA, A, LDA, X, INCX,
+     $                   BETA, Y, INCY )
+*     .. Scalar Arguments ..
+      COMPLEX            ALPHA, BETA
+      INTEGER            INCX, INCY, LDA, M, N
+      CHARACTER*1        TRANS
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), X( * ), Y( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CGEMV  performs one of the matrix-vector operations
+*
+*     y := alpha*A*x + beta*y,   or   y := alpha*A'*x + beta*y,   or
+*
+*     y := alpha*conjg( A' )*x + beta*y,
+*
+*  where alpha and beta are scalars, x and y are vectors and A is an
+*  m by n matrix.
+*
+*  Parameters
+*  ==========
+*
+*  TRANS  - CHARACTER*1.
+*           On entry, TRANS specifies the operation to be performed as
+*           follows:
+*
+*              TRANS = 'N' or 'n'   y := alpha*A*x + beta*y.
+*
+*              TRANS = 'T' or 't'   y := alpha*A'*x + beta*y.
+*
+*              TRANS = 'C' or 'c'   y := alpha*conjg( A' )*x + beta*y.
+*
+*           Unchanged on exit.
+*
+*  M      - INTEGER.
+*           On entry, M specifies the number of rows of the matrix A.
+*           M must be at least zero.
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the number of columns of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - COMPLEX         .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  A      - COMPLEX          array of DIMENSION ( LDA, n ).
+*           Before entry, the leading m by n part of the array A must
+*           contain the matrix of coefficients.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           max( 1, m ).
+*           Unchanged on exit.
+*
+*  X      - COMPLEX          array of DIMENSION at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'
+*           and at least
+*           ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.
+*           Before entry, the incremented array X must contain the
+*           vector x.
+*           Unchanged on exit.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*  BETA   - COMPLEX         .
+*           On entry, BETA specifies the scalar beta. When BETA is
+*           supplied as zero then Y need not be set on input.
+*           Unchanged on exit.
+*
+*  Y      - COMPLEX          array of DIMENSION at least
+*           ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'
+*           and at least
+*           ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.
+*           Before entry with BETA non-zero, the incremented array Y
+*           must contain the vector y. On exit, Y is overwritten by the
+*           updated vector y.
+*
+*  INCY   - INTEGER.
+*           On entry, INCY specifies the increment for the elements of
+*           Y. INCY must not be zero.
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      COMPLEX            ONE
+      PARAMETER        ( ONE  = ( 1.0E+0, 0.0E+0 ) )
+      COMPLEX            ZERO
+      PARAMETER        ( ZERO = ( 0.0E+0, 0.0E+0 ) )
+*     .. Local Scalars ..
+      COMPLEX            TEMP
+      INTEGER            I, INFO, IX, IY, J, JX, JY, KX, KY, LENX, LENY
+      LOGICAL            NOCONJ
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          CONJG, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( TRANS, 'N' ).AND.
+     $         .NOT.LSAME( TRANS, 'T' ).AND.
+     $         .NOT.LSAME( TRANS, 'C' )      )THEN
+         INFO = 1
+      ELSE IF( M.LT.0 )THEN
+         INFO = 2
+      ELSE IF( N.LT.0 )THEN
+         INFO = 3
+      ELSE IF( LDA.LT.MAX( 1, M ) )THEN
+         INFO = 6
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 8
+      ELSE IF( INCY.EQ.0 )THEN
+         INFO = 11
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'CGEMV ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR.
+     $    ( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) )
+     $   RETURN
+*
+      NOCONJ = LSAME( TRANS, 'T' )
+*
+*     Set  LENX  and  LENY, the lengths of the vectors x and y, and set
+*     up the start points in  X  and  Y.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+         LENX = N
+         LENY = M
+      ELSE
+         LENX = M
+         LENY = N
+      END IF
+      IF( INCX.GT.0 )THEN
+         KX = 1
+      ELSE
+         KX = 1 - ( LENX - 1 )*INCX
+      END IF
+      IF( INCY.GT.0 )THEN
+         KY = 1
+      ELSE
+         KY = 1 - ( LENY - 1 )*INCY
+      END IF
+*
+*     Start the operations. In this version the elements of A are
+*     accessed sequentially with one pass through A.
+*
+*     First form  y := beta*y.
+*
+      IF( BETA.NE.ONE )THEN
+         IF( INCY.EQ.1 )THEN
+            IF( BETA.EQ.ZERO )THEN
+               DO 10, I = 1, LENY
+                  Y( I ) = ZERO
+   10          CONTINUE
+            ELSE
+               DO 20, I = 1, LENY
+                  Y( I ) = BETA*Y( I )
+   20          CONTINUE
+            END IF
+         ELSE
+            IY = KY
+            IF( BETA.EQ.ZERO )THEN
+               DO 30, I = 1, LENY
+                  Y( IY ) = ZERO
+                  IY      = IY   + INCY
+   30          CONTINUE
+            ELSE
+               DO 40, I = 1, LENY
+                  Y( IY ) = BETA*Y( IY )
+                  IY      = IY           + INCY
+   40          CONTINUE
+            END IF
+         END IF
+      END IF
+      IF( ALPHA.EQ.ZERO )
+     $   RETURN
+      IF( LSAME( TRANS, 'N' ) )THEN
+*
+*        Form  y := alpha*A*x + y.
+*
+         JX = KX
+         IF( INCY.EQ.1 )THEN
+            DO 60, J = 1, N
+               IF( X( JX ).NE.ZERO )THEN
+                  TEMP = ALPHA*X( JX )
+                  DO 50, I = 1, M
+                     Y( I ) = Y( I ) + TEMP*A( I, J )
+   50             CONTINUE
+               END IF
+               JX = JX + INCX
+   60       CONTINUE
+         ELSE
+            DO 80, J = 1, N
+               IF( X( JX ).NE.ZERO )THEN
+                  TEMP = ALPHA*X( JX )
+                  IY   = KY
+                  DO 70, I = 1, M
+                     Y( IY ) = Y( IY ) + TEMP*A( I, J )
+                     IY      = IY      + INCY
+   70             CONTINUE
+               END IF
+               JX = JX + INCX
+   80       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  y := alpha*A'*x + y  or  y := alpha*conjg( A' )*x + y.
+*
+         JY = KY
+         IF( INCX.EQ.1 )THEN
+            DO 110, J = 1, N
+               TEMP = ZERO
+               IF( NOCONJ )THEN
+                  DO 90, I = 1, M
+                     TEMP = TEMP + A( I, J )*X( I )
+   90             CONTINUE
+               ELSE
+                  DO 100, I = 1, M
+                     TEMP = TEMP + CONJG( A( I, J ) )*X( I )
+  100             CONTINUE
+               END IF
+               Y( JY ) = Y( JY ) + ALPHA*TEMP
+               JY      = JY      + INCY
+  110       CONTINUE
+         ELSE
+            DO 140, J = 1, N
+               TEMP = ZERO
+               IX   = KX
+               IF( NOCONJ )THEN
+                  DO 120, I = 1, M
+                     TEMP = TEMP + A( I, J )*X( IX )
+                     IX   = IX   + INCX
+  120             CONTINUE
+               ELSE
+                  DO 130, I = 1, M
+                     TEMP = TEMP + CONJG( A( I, J ) )*X( IX )
+                     IX   = IX   + INCX
+  130             CONTINUE
+               END IF
+               Y( JY ) = Y( JY ) + ALPHA*TEMP
+               JY      = JY      + INCY
+  140       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of CGEMV .
+*
+      END
diff --git a/costa/native/external/blas/cgerc.f b/costa/native/external/blas/cgerc.f
new file mode 100644
index 000000000..288e192d9
--- /dev/null
+++ b/costa/native/external/blas/cgerc.f
@@ -0,0 +1,157 @@
+      SUBROUTINE CGERC ( M, N, ALPHA, X, INCX, Y, INCY, A, LDA )
+*     .. Scalar Arguments ..
+      COMPLEX            ALPHA
+      INTEGER            INCX, INCY, LDA, M, N
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), X( * ), Y( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CGERC  performs the rank 1 operation
+*
+*     A := alpha*x*conjg( y' ) + A,
+*
+*  where alpha is a scalar, x is an m element vector, y is an n element
+*  vector and A is an m by n matrix.
+*
+*  Parameters
+*  ==========
+*
+*  M      - INTEGER.
+*           On entry, M specifies the number of rows of the matrix A.
+*           M must be at least zero.
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the number of columns of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - COMPLEX         .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  X      - COMPLEX          array of dimension at least
+*           ( 1 + ( m - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the m
+*           element vector x.
+*           Unchanged on exit.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*  Y      - COMPLEX          array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCY ) ).
+*           Before entry, the incremented array Y must contain the n
+*           element vector y.
+*           Unchanged on exit.
+*
+*  INCY   - INTEGER.
+*           On entry, INCY specifies the increment for the elements of
+*           Y. INCY must not be zero.
+*           Unchanged on exit.
+*
+*  A      - COMPLEX          array of DIMENSION ( LDA, n ).
+*           Before entry, the leading m by n part of the array A must
+*           contain the matrix of coefficients. On exit, A is
+*           overwritten by the updated matrix.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           max( 1, m ).
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      COMPLEX            ZERO
+      PARAMETER        ( ZERO = ( 0.0E+0, 0.0E+0 ) )
+*     .. Local Scalars ..
+      COMPLEX            TEMP
+      INTEGER            I, INFO, IX, J, JY, KX
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          CONJG, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( M.LT.0 )THEN
+         INFO = 1
+      ELSE IF( N.LT.0 )THEN
+         INFO = 2
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 5
+      ELSE IF( INCY.EQ.0 )THEN
+         INFO = 7
+      ELSE IF( LDA.LT.MAX( 1, M ) )THEN
+         INFO = 9
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'CGERC ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR.( ALPHA.EQ.ZERO ) )
+     $   RETURN
+*
+*     Start the operations. In this version the elements of A are
+*     accessed sequentially with one pass through A.
+*
+      IF( INCY.GT.0 )THEN
+         JY = 1
+      ELSE
+         JY = 1 - ( N - 1 )*INCY
+      END IF
+      IF( INCX.EQ.1 )THEN
+         DO 20, J = 1, N
+            IF( Y( JY ).NE.ZERO )THEN
+               TEMP = ALPHA*CONJG( Y( JY ) )
+               DO 10, I = 1, M
+                  A( I, J ) = A( I, J ) + X( I )*TEMP
+   10          CONTINUE
+            END IF
+            JY = JY + INCY
+   20    CONTINUE
+      ELSE
+         IF( INCX.GT.0 )THEN
+            KX = 1
+         ELSE
+            KX = 1 - ( M - 1 )*INCX
+         END IF
+         DO 40, J = 1, N
+            IF( Y( JY ).NE.ZERO )THEN
+               TEMP = ALPHA*CONJG( Y( JY ) )
+               IX   = KX
+               DO 30, I = 1, M
+                  A( I, J ) = A( I, J ) + X( IX )*TEMP
+                  IX        = IX        + INCX
+   30          CONTINUE
+            END IF
+            JY = JY + INCY
+   40    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of CGERC .
+*
+      END
diff --git a/costa/native/external/blas/cgeru.f b/costa/native/external/blas/cgeru.f
new file mode 100644
index 000000000..8a9ac390e
--- /dev/null
+++ b/costa/native/external/blas/cgeru.f
@@ -0,0 +1,157 @@
+      SUBROUTINE CGERU ( M, N, ALPHA, X, INCX, Y, INCY, A, LDA )
+*     .. Scalar Arguments ..
+      COMPLEX            ALPHA
+      INTEGER            INCX, INCY, LDA, M, N
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), X( * ), Y( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CGERU  performs the rank 1 operation
+*
+*     A := alpha*x*y' + A,
+*
+*  where alpha is a scalar, x is an m element vector, y is an n element
+*  vector and A is an m by n matrix.
+*
+*  Parameters
+*  ==========
+*
+*  M      - INTEGER.
+*           On entry, M specifies the number of rows of the matrix A.
+*           M must be at least zero.
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the number of columns of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - COMPLEX         .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  X      - COMPLEX          array of dimension at least
+*           ( 1 + ( m - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the m
+*           element vector x.
+*           Unchanged on exit.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*  Y      - COMPLEX          array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCY ) ).
+*           Before entry, the incremented array Y must contain the n
+*           element vector y.
+*           Unchanged on exit.
+*
+*  INCY   - INTEGER.
+*           On entry, INCY specifies the increment for the elements of
+*           Y. INCY must not be zero.
+*           Unchanged on exit.
+*
+*  A      - COMPLEX          array of DIMENSION ( LDA, n ).
+*           Before entry, the leading m by n part of the array A must
+*           contain the matrix of coefficients. On exit, A is
+*           overwritten by the updated matrix.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           max( 1, m ).
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      COMPLEX            ZERO
+      PARAMETER        ( ZERO = ( 0.0E+0, 0.0E+0 ) )
+*     .. Local Scalars ..
+      COMPLEX            TEMP
+      INTEGER            I, INFO, IX, J, JY, KX
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( M.LT.0 )THEN
+         INFO = 1
+      ELSE IF( N.LT.0 )THEN
+         INFO = 2
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 5
+      ELSE IF( INCY.EQ.0 )THEN
+         INFO = 7
+      ELSE IF( LDA.LT.MAX( 1, M ) )THEN
+         INFO = 9
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'CGERU ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR.( ALPHA.EQ.ZERO ) )
+     $   RETURN
+*
+*     Start the operations. In this version the elements of A are
+*     accessed sequentially with one pass through A.
+*
+      IF( INCY.GT.0 )THEN
+         JY = 1
+      ELSE
+         JY = 1 - ( N - 1 )*INCY
+      END IF
+      IF( INCX.EQ.1 )THEN
+         DO 20, J = 1, N
+            IF( Y( JY ).NE.ZERO )THEN
+               TEMP = ALPHA*Y( JY )
+               DO 10, I = 1, M
+                  A( I, J ) = A( I, J ) + X( I )*TEMP
+   10          CONTINUE
+            END IF
+            JY = JY + INCY
+   20    CONTINUE
+      ELSE
+         IF( INCX.GT.0 )THEN
+            KX = 1
+         ELSE
+            KX = 1 - ( M - 1 )*INCX
+         END IF
+         DO 40, J = 1, N
+            IF( Y( JY ).NE.ZERO )THEN
+               TEMP = ALPHA*Y( JY )
+               IX   = KX
+               DO 30, I = 1, M
+                  A( I, J ) = A( I, J ) + X( IX )*TEMP
+                  IX        = IX        + INCX
+   30          CONTINUE
+            END IF
+            JY = JY + INCY
+   40    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of CGERU .
+*
+      END
diff --git a/costa/native/external/blas/chbmv.f b/costa/native/external/blas/chbmv.f
new file mode 100644
index 000000000..2de4c46c8
--- /dev/null
+++ b/costa/native/external/blas/chbmv.f
@@ -0,0 +1,309 @@
+      SUBROUTINE CHBMV ( UPLO, N, K, ALPHA, A, LDA, X, INCX,
+     $                   BETA, Y, INCY )
+*     .. Scalar Arguments ..
+      COMPLEX            ALPHA, BETA
+      INTEGER            INCX, INCY, K, LDA, N
+      CHARACTER*1        UPLO
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), X( * ), Y( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CHBMV  performs the matrix-vector  operation
+*
+*     y := alpha*A*x + beta*y,
+*
+*  where alpha and beta are scalars, x and y are n element vectors and
+*  A is an n by n hermitian band matrix, with k super-diagonals.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the upper or lower
+*           triangular part of the band matrix A is being supplied as
+*           follows:
+*
+*              UPLO = 'U' or 'u'   The upper triangular part of A is
+*                                  being supplied.
+*
+*              UPLO = 'L' or 'l'   The lower triangular part of A is
+*                                  being supplied.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  K      - INTEGER.
+*           On entry, K specifies the number of super-diagonals of the
+*           matrix A. K must satisfy  0 .le. K.
+*           Unchanged on exit.
+*
+*  ALPHA  - COMPLEX         .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  A      - COMPLEX          array of DIMENSION ( LDA, n ).
+*           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 )
+*           by n part of the array A must contain the upper triangular
+*           band part of the hermitian matrix, supplied column by
+*           column, with the leading diagonal of the matrix in row
+*           ( k + 1 ) of the array, the first super-diagonal starting at
+*           position 2 in row k, and so on. The top left k by k triangle
+*           of the array A is not referenced.
+*           The following program segment will transfer the upper
+*           triangular part of a hermitian band matrix from conventional
+*           full matrix storage to band storage:
+*
+*                 DO 20, J = 1, N
+*                    M = K + 1 - J
+*                    DO 10, I = MAX( 1, J - K ), J
+*                       A( M + I, J ) = matrix( I, J )
+*              10    CONTINUE
+*              20 CONTINUE
+*
+*           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 )
+*           by n part of the array A must contain the lower triangular
+*           band part of the hermitian matrix, supplied column by
+*           column, with the leading diagonal of the matrix in row 1 of
+*           the array, the first sub-diagonal starting at position 1 in
+*           row 2, and so on. The bottom right k by k triangle of the
+*           array A is not referenced.
+*           The following program segment will transfer the lower
+*           triangular part of a hermitian band matrix from conventional
+*           full matrix storage to band storage:
+*
+*                 DO 20, J = 1, N
+*                    M = 1 - J
+*                    DO 10, I = J, MIN( N, J + K )
+*                       A( M + I, J ) = matrix( I, J )
+*              10    CONTINUE
+*              20 CONTINUE
+*
+*           Note that the imaginary parts of the diagonal elements need
+*           not be set and are assumed to be zero.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           ( k + 1 ).
+*           Unchanged on exit.
+*
+*  X      - COMPLEX          array of DIMENSION at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the
+*           vector x.
+*           Unchanged on exit.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*  BETA   - COMPLEX         .
+*           On entry, BETA specifies the scalar beta.
+*           Unchanged on exit.
+*
+*  Y      - COMPLEX          array of DIMENSION at least
+*           ( 1 + ( n - 1 )*abs( INCY ) ).
+*           Before entry, the incremented array Y must contain the
+*           vector y. On exit, Y is overwritten by the updated vector y.
+*
+*  INCY   - INTEGER.
+*           On entry, INCY specifies the increment for the elements of
+*           Y. INCY must not be zero.
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      COMPLEX            ONE
+      PARAMETER        ( ONE  = ( 1.0E+0, 0.0E+0 ) )
+      COMPLEX            ZERO
+      PARAMETER        ( ZERO = ( 0.0E+0, 0.0E+0 ) )
+*     .. Local Scalars ..
+      COMPLEX            TEMP1, TEMP2
+      INTEGER            I, INFO, IX, IY, J, JX, JY, KPLUS1, KX, KY, L
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          CONJG, MAX, MIN, REAL
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO, 'U' ).AND.
+     $         .NOT.LSAME( UPLO, 'L' )      )THEN
+         INFO = 1
+      ELSE IF( N.LT.0 )THEN
+         INFO = 2
+      ELSE IF( K.LT.0 )THEN
+         INFO = 3
+      ELSE IF( LDA.LT.( K + 1 ) )THEN
+         INFO = 6
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 8
+      ELSE IF( INCY.EQ.0 )THEN
+         INFO = 11
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'CHBMV ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( N.EQ.0 ).OR.( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) )
+     $   RETURN
+*
+*     Set up the start points in  X  and  Y.
+*
+      IF( INCX.GT.0 )THEN
+         KX = 1
+      ELSE
+         KX = 1 - ( N - 1 )*INCX
+      END IF
+      IF( INCY.GT.0 )THEN
+         KY = 1
+      ELSE
+         KY = 1 - ( N - 1 )*INCY
+      END IF
+*
+*     Start the operations. In this version the elements of the array A
+*     are accessed sequentially with one pass through A.
+*
+*     First form  y := beta*y.
+*
+      IF( BETA.NE.ONE )THEN
+         IF( INCY.EQ.1 )THEN
+            IF( BETA.EQ.ZERO )THEN
+               DO 10, I = 1, N
+                  Y( I ) = ZERO
+   10          CONTINUE
+            ELSE
+               DO 20, I = 1, N
+                  Y( I ) = BETA*Y( I )
+   20          CONTINUE
+            END IF
+         ELSE
+            IY = KY
+            IF( BETA.EQ.ZERO )THEN
+               DO 30, I = 1, N
+                  Y( IY ) = ZERO
+                  IY      = IY   + INCY
+   30          CONTINUE
+            ELSE
+               DO 40, I = 1, N
+                  Y( IY ) = BETA*Y( IY )
+                  IY      = IY           + INCY
+   40          CONTINUE
+            END IF
+         END IF
+      END IF
+      IF( ALPHA.EQ.ZERO )
+     $   RETURN
+      IF( LSAME( UPLO, 'U' ) )THEN
+*
+*        Form  y  when upper triangle of A is stored.
+*
+         KPLUS1 = K + 1
+         IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN
+            DO 60, J = 1, N
+               TEMP1 = ALPHA*X( J )
+               TEMP2 = ZERO
+               L     = KPLUS1 - J
+               DO 50, I = MAX( 1, J - K ), J - 1
+                  Y( I ) = Y( I ) + TEMP1*A( L + I, J )
+                  TEMP2  = TEMP2  + CONJG( A( L + I, J ) )*X( I )
+   50          CONTINUE
+               Y( J ) = Y( J ) + TEMP1*REAL( A( KPLUS1, J ) )
+     $                         + ALPHA*TEMP2
+   60       CONTINUE
+         ELSE
+            JX = KX
+            JY = KY
+            DO 80, J = 1, N
+               TEMP1 = ALPHA*X( JX )
+               TEMP2 = ZERO
+               IX    = KX
+               IY    = KY
+               L     = KPLUS1 - J
+               DO 70, I = MAX( 1, J - K ), J - 1
+                  Y( IY ) = Y( IY ) + TEMP1*A( L + I, J )
+                  TEMP2   = TEMP2   + CONJG( A( L + I, J ) )*X( IX )
+                  IX      = IX      + INCX
+                  IY      = IY      + INCY
+   70          CONTINUE
+               Y( JY ) = Y( JY ) + TEMP1*REAL( A( KPLUS1, J ) )
+     $                           + ALPHA*TEMP2
+               JX      = JX      + INCX
+               JY      = JY      + INCY
+               IF( J.GT.K )THEN
+                  KX = KX + INCX
+                  KY = KY + INCY
+               END IF
+   80       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  y  when lower triangle of A is stored.
+*
+         IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN
+            DO 100, J = 1, N
+               TEMP1  = ALPHA*X( J )
+               TEMP2  = ZERO
+               Y( J ) = Y( J ) + TEMP1*REAL( A( 1, J ) )
+               L      = 1      - J
+               DO 90, I = J + 1, MIN( N, J + K )
+                  Y( I ) = Y( I ) + TEMP1*A( L + I, J )
+                  TEMP2  = TEMP2  + CONJG( A( L + I, J ) )*X( I )
+   90          CONTINUE
+               Y( J ) = Y( J ) + ALPHA*TEMP2
+  100       CONTINUE
+         ELSE
+            JX = KX
+            JY = KY
+            DO 120, J = 1, N
+               TEMP1   = ALPHA*X( JX )
+               TEMP2   = ZERO
+               Y( JY ) = Y( JY ) + TEMP1*REAL( A( 1, J ) )
+               L       = 1       - J
+               IX      = JX
+               IY      = JY
+               DO 110, I = J + 1, MIN( N, J + K )
+                  IX      = IX      + INCX
+                  IY      = IY      + INCY
+                  Y( IY ) = Y( IY ) + TEMP1*A( L + I, J )
+                  TEMP2   = TEMP2   + CONJG( A( L + I, J ) )*X( IX )
+  110          CONTINUE
+               Y( JY ) = Y( JY ) + ALPHA*TEMP2
+               JX      = JX      + INCX
+               JY      = JY      + INCY
+  120       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of CHBMV .
+*
+      END
diff --git a/costa/native/external/blas/chemm.f b/costa/native/external/blas/chemm.f
new file mode 100644
index 000000000..374d74d0b
--- /dev/null
+++ b/costa/native/external/blas/chemm.f
@@ -0,0 +1,304 @@
+      SUBROUTINE CHEMM ( SIDE, UPLO, M, N, ALPHA, A, LDA, B, LDB,
+     $                   BETA, C, LDC )
+*     .. Scalar Arguments ..
+      CHARACTER*1        SIDE, UPLO
+      INTEGER            M, N, LDA, LDB, LDC
+      COMPLEX            ALPHA, BETA
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), B( LDB, * ), C( LDC, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CHEMM  performs one of the matrix-matrix operations
+*
+*     C := alpha*A*B + beta*C,
+*
+*  or
+*
+*     C := alpha*B*A + beta*C,
+*
+*  where alpha and beta are scalars, A is an hermitian matrix and  B and
+*  C are m by n matrices.
+*
+*  Parameters
+*  ==========
+*
+*  SIDE   - CHARACTER*1.
+*           On entry,  SIDE  specifies whether  the  hermitian matrix  A
+*           appears on the  left or right  in the  operation as follows:
+*
+*              SIDE = 'L' or 'l'   C := alpha*A*B + beta*C,
+*
+*              SIDE = 'R' or 'r'   C := alpha*B*A + beta*C,
+*
+*           Unchanged on exit.
+*
+*  UPLO   - CHARACTER*1.
+*           On  entry,   UPLO  specifies  whether  the  upper  or  lower
+*           triangular  part  of  the  hermitian  matrix   A  is  to  be
+*           referenced as follows:
+*
+*              UPLO = 'U' or 'u'   Only the upper triangular part of the
+*                                  hermitian matrix is to be referenced.
+*
+*              UPLO = 'L' or 'l'   Only the lower triangular part of the
+*                                  hermitian matrix is to be referenced.
+*
+*           Unchanged on exit.
+*
+*  M      - INTEGER.
+*           On entry,  M  specifies the number of rows of the matrix  C.
+*           M  must be at least zero.
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the number of columns of the matrix C.
+*           N  must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - COMPLEX         .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  A      - COMPLEX          array of DIMENSION ( LDA, ka ), where ka is
+*           m  when  SIDE = 'L' or 'l'  and is n  otherwise.
+*           Before entry  with  SIDE = 'L' or 'l',  the  m by m  part of
+*           the array  A  must contain the  hermitian matrix,  such that
+*           when  UPLO = 'U' or 'u', the leading m by m upper triangular
+*           part of the array  A  must contain the upper triangular part
+*           of the  hermitian matrix and the  strictly  lower triangular
+*           part of  A  is not referenced,  and when  UPLO = 'L' or 'l',
+*           the leading  m by m  lower triangular part  of the  array  A
+*           must  contain  the  lower triangular part  of the  hermitian
+*           matrix and the  strictly upper triangular part of  A  is not
+*           referenced.
+*           Before entry  with  SIDE = 'R' or 'r',  the  n by n  part of
+*           the array  A  must contain the  hermitian matrix,  such that
+*           when  UPLO = 'U' or 'u', the leading n by n upper triangular
+*           part of the array  A  must contain the upper triangular part
+*           of the  hermitian matrix and the  strictly  lower triangular
+*           part of  A  is not referenced,  and when  UPLO = 'L' or 'l',
+*           the leading  n by n  lower triangular part  of the  array  A
+*           must  contain  the  lower triangular part  of the  hermitian
+*           matrix and the  strictly upper triangular part of  A  is not
+*           referenced.
+*           Note that the imaginary parts  of the diagonal elements need
+*           not be set, they are assumed to be zero.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the  calling (sub) program. When  SIDE = 'L' or 'l'  then
+*           LDA must be at least  max( 1, m ), otherwise  LDA must be at
+*           least max( 1, n ).
+*           Unchanged on exit.
+*
+*  B      - COMPLEX          array of DIMENSION ( LDB, n ).
+*           Before entry, the leading  m by n part of the array  B  must
+*           contain the matrix B.
+*           Unchanged on exit.
+*
+*  LDB    - INTEGER.
+*           On entry, LDB specifies the first dimension of B as declared
+*           in  the  calling  (sub)  program.   LDB  must  be  at  least
+*           max( 1, m ).
+*           Unchanged on exit.
+*
+*  BETA   - COMPLEX         .
+*           On entry,  BETA  specifies the scalar  beta.  When  BETA  is
+*           supplied as zero then C need not be set on input.
+*           Unchanged on exit.
+*
+*  C      - COMPLEX          array of DIMENSION ( LDC, n ).
+*           Before entry, the leading  m by n  part of the array  C must
+*           contain the matrix  C,  except when  beta  is zero, in which
+*           case C need not be set on entry.
+*           On exit, the array  C  is overwritten by the  m by n updated
+*           matrix.
+*
+*  LDC    - INTEGER.
+*           On entry, LDC specifies the first dimension of C as declared
+*           in  the  calling  (sub)  program.   LDC  must  be  at  least
+*           max( 1, m ).
+*           Unchanged on exit.
+*
+*
+*  Level 3 Blas routine.
+*
+*  -- Written on 8-February-1989.
+*     Jack Dongarra, Argonne National Laboratory.
+*     Iain Duff, AERE Harwell.
+*     Jeremy Du Croz, Numerical Algorithms Group Ltd.
+*     Sven Hammarling, Numerical Algorithms Group Ltd.
+*
+*
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          CONJG, MAX, REAL
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            I, INFO, J, K, NROWA
+      COMPLEX            TEMP1, TEMP2
+*     .. Parameters ..
+      COMPLEX            ONE
+      PARAMETER        ( ONE  = ( 1.0E+0, 0.0E+0 ) )
+      COMPLEX            ZERO
+      PARAMETER        ( ZERO = ( 0.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Set NROWA as the number of rows of A.
+*
+      IF( LSAME( SIDE, 'L' ) )THEN
+         NROWA = M
+      ELSE
+         NROWA = N
+      END IF
+      UPPER = LSAME( UPLO, 'U' )
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF(      ( .NOT.LSAME( SIDE, 'L' ) ).AND.
+     $         ( .NOT.LSAME( SIDE, 'R' ) )      )THEN
+         INFO = 1
+      ELSE IF( ( .NOT.UPPER              ).AND.
+     $         ( .NOT.LSAME( UPLO, 'L' ) )      )THEN
+         INFO = 2
+      ELSE IF( M  .LT.0               )THEN
+         INFO = 3
+      ELSE IF( N  .LT.0               )THEN
+         INFO = 4
+      ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN
+         INFO = 7
+      ELSE IF( LDB.LT.MAX( 1, M     ) )THEN
+         INFO = 9
+      ELSE IF( LDC.LT.MAX( 1, M     ) )THEN
+         INFO = 12
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'CHEMM ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR.
+     $    ( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) )
+     $   RETURN
+*
+*     And when  alpha.eq.zero.
+*
+      IF( ALPHA.EQ.ZERO )THEN
+         IF( BETA.EQ.ZERO )THEN
+            DO 20, J = 1, N
+               DO 10, I = 1, M
+                  C( I, J ) = ZERO
+   10          CONTINUE
+   20       CONTINUE
+         ELSE
+            DO 40, J = 1, N
+               DO 30, I = 1, M
+                  C( I, J ) = BETA*C( I, J )
+   30          CONTINUE
+   40       CONTINUE
+         END IF
+         RETURN
+      END IF
+*
+*     Start the operations.
+*
+      IF( LSAME( SIDE, 'L' ) )THEN
+*
+*        Form  C := alpha*A*B + beta*C.
+*
+         IF( UPPER )THEN
+            DO 70, J = 1, N
+               DO 60, I = 1, M
+                  TEMP1 = ALPHA*B( I, J )
+                  TEMP2 = ZERO
+                  DO 50, K = 1, I - 1
+                     C( K, J ) = C( K, J ) + TEMP1*A( K, I )
+                     TEMP2     = TEMP2     +
+     $                           B( K, J )*CONJG(  A( K, I ) )
+   50             CONTINUE
+                  IF( BETA.EQ.ZERO )THEN
+                     C( I, J ) = TEMP1*REAL( A( I, I ) ) +
+     $                           ALPHA*TEMP2
+                  ELSE
+                     C( I, J ) = BETA *C( I, J )         +
+     $                           TEMP1*REAL( A( I, I ) ) +
+     $                           ALPHA*TEMP2
+                  END IF
+   60          CONTINUE
+   70       CONTINUE
+         ELSE
+            DO 100, J = 1, N
+               DO 90, I = M, 1, -1
+                  TEMP1 = ALPHA*B( I, J )
+                  TEMP2 = ZERO
+                  DO 80, K = I + 1, M
+                     C( K, J ) = C( K, J ) + TEMP1*A( K, I )
+                     TEMP2     = TEMP2     +
+     $                           B( K, J )*CONJG(  A( K, I ) )
+   80             CONTINUE
+                  IF( BETA.EQ.ZERO )THEN
+                     C( I, J ) = TEMP1*REAL( A( I, I ) ) +
+     $                           ALPHA*TEMP2
+                  ELSE
+                     C( I, J ) = BETA *C( I, J )         +
+     $                           TEMP1*REAL( A( I, I ) ) +
+     $                           ALPHA*TEMP2
+                  END IF
+   90          CONTINUE
+  100       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  C := alpha*B*A + beta*C.
+*
+         DO 170, J = 1, N
+            TEMP1 = ALPHA*REAL( A( J, J ) )
+            IF( BETA.EQ.ZERO )THEN
+               DO 110, I = 1, M
+                  C( I, J ) = TEMP1*B( I, J )
+  110          CONTINUE
+            ELSE
+               DO 120, I = 1, M
+                  C( I, J ) = BETA*C( I, J ) + TEMP1*B( I, J )
+  120          CONTINUE
+            END IF
+            DO 140, K = 1, J - 1
+               IF( UPPER )THEN
+                  TEMP1 = ALPHA*A( K, J )
+               ELSE
+                  TEMP1 = ALPHA*CONJG( A( J, K ) )
+               END IF
+               DO 130, I = 1, M
+                  C( I, J ) = C( I, J ) + TEMP1*B( I, K )
+  130          CONTINUE
+  140       CONTINUE
+            DO 160, K = J + 1, N
+               IF( UPPER )THEN
+                  TEMP1 = ALPHA*CONJG( A( J, K ) )
+               ELSE
+                  TEMP1 = ALPHA*A( K, J )
+               END IF
+               DO 150, I = 1, M
+                  C( I, J ) = C( I, J ) + TEMP1*B( I, K )
+  150          CONTINUE
+  160       CONTINUE
+  170    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of CHEMM .
+*
+      END
diff --git a/costa/native/external/blas/chemv.f b/costa/native/external/blas/chemv.f
new file mode 100644
index 000000000..34c6ba393
--- /dev/null
+++ b/costa/native/external/blas/chemv.f
@@ -0,0 +1,266 @@
+      SUBROUTINE CHEMV ( UPLO, N, ALPHA, A, LDA, X, INCX,
+     $                   BETA, Y, INCY )
+*     .. Scalar Arguments ..
+      COMPLEX            ALPHA, BETA
+      INTEGER            INCX, INCY, LDA, N
+      CHARACTER*1        UPLO
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), X( * ), Y( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CHEMV  performs the matrix-vector  operation
+*
+*     y := alpha*A*x + beta*y,
+*
+*  where alpha and beta are scalars, x and y are n element vectors and
+*  A is an n by n hermitian matrix.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the upper or lower
+*           triangular part of the array A is to be referenced as
+*           follows:
+*
+*              UPLO = 'U' or 'u'   Only the upper triangular part of A
+*                                  is to be referenced.
+*
+*              UPLO = 'L' or 'l'   Only the lower triangular part of A
+*                                  is to be referenced.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - COMPLEX         .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  A      - COMPLEX          array of DIMENSION ( LDA, n ).
+*           Before entry with  UPLO = 'U' or 'u', the leading n by n
+*           upper triangular part of the array A must contain the upper
+*           triangular part of the hermitian matrix and the strictly
+*           lower triangular part of A is not referenced.
+*           Before entry with UPLO = 'L' or 'l', the leading n by n
+*           lower triangular part of the array A must contain the lower
+*           triangular part of the hermitian matrix and the strictly
+*           upper triangular part of A is not referenced.
+*           Note that the imaginary parts of the diagonal elements need
+*           not be set and are assumed to be zero.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           max( 1, n ).
+*           Unchanged on exit.
+*
+*  X      - COMPLEX          array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element vector x.
+*           Unchanged on exit.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*  BETA   - COMPLEX         .
+*           On entry, BETA specifies the scalar beta. When BETA is
+*           supplied as zero then Y need not be set on input.
+*           Unchanged on exit.
+*
+*  Y      - COMPLEX          array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCY ) ).
+*           Before entry, the incremented array Y must contain the n
+*           element vector y. On exit, Y is overwritten by the updated
+*           vector y.
+*
+*  INCY   - INTEGER.
+*           On entry, INCY specifies the increment for the elements of
+*           Y. INCY must not be zero.
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      COMPLEX            ONE
+      PARAMETER        ( ONE  = ( 1.0E+0, 0.0E+0 ) )
+      COMPLEX            ZERO
+      PARAMETER        ( ZERO = ( 0.0E+0, 0.0E+0 ) )
+*     .. Local Scalars ..
+      COMPLEX            TEMP1, TEMP2
+      INTEGER            I, INFO, IX, IY, J, JX, JY, KX, KY
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          CONJG, MAX, REAL
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO, 'U' ).AND.
+     $         .NOT.LSAME( UPLO, 'L' )      )THEN
+         INFO = 1
+      ELSE IF( N.LT.0 )THEN
+         INFO = 2
+      ELSE IF( LDA.LT.MAX( 1, N ) )THEN
+         INFO = 5
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 7
+      ELSE IF( INCY.EQ.0 )THEN
+         INFO = 10
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'CHEMV ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( N.EQ.0 ).OR.( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) )
+     $   RETURN
+*
+*     Set up the start points in  X  and  Y.
+*
+      IF( INCX.GT.0 )THEN
+         KX = 1
+      ELSE
+         KX = 1 - ( N - 1 )*INCX
+      END IF
+      IF( INCY.GT.0 )THEN
+         KY = 1
+      ELSE
+         KY = 1 - ( N - 1 )*INCY
+      END IF
+*
+*     Start the operations. In this version the elements of A are
+*     accessed sequentially with one pass through the triangular part
+*     of A.
+*
+*     First form  y := beta*y.
+*
+      IF( BETA.NE.ONE )THEN
+         IF( INCY.EQ.1 )THEN
+            IF( BETA.EQ.ZERO )THEN
+               DO 10, I = 1, N
+                  Y( I ) = ZERO
+   10          CONTINUE
+            ELSE
+               DO 20, I = 1, N
+                  Y( I ) = BETA*Y( I )
+   20          CONTINUE
+            END IF
+         ELSE
+            IY = KY
+            IF( BETA.EQ.ZERO )THEN
+               DO 30, I = 1, N
+                  Y( IY ) = ZERO
+                  IY      = IY   + INCY
+   30          CONTINUE
+            ELSE
+               DO 40, I = 1, N
+                  Y( IY ) = BETA*Y( IY )
+                  IY      = IY           + INCY
+   40          CONTINUE
+            END IF
+         END IF
+      END IF
+      IF( ALPHA.EQ.ZERO )
+     $   RETURN
+      IF( LSAME( UPLO, 'U' ) )THEN
+*
+*        Form  y  when A is stored in upper triangle.
+*
+         IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN
+            DO 60, J = 1, N
+               TEMP1 = ALPHA*X( J )
+               TEMP2 = ZERO
+               DO 50, I = 1, J - 1
+                  Y( I ) = Y( I ) + TEMP1*A( I, J )
+                  TEMP2  = TEMP2  + CONJG( A( I, J ) )*X( I )
+   50          CONTINUE
+               Y( J ) = Y( J ) + TEMP1*REAL( A( J, J ) ) + ALPHA*TEMP2
+   60       CONTINUE
+         ELSE
+            JX = KX
+            JY = KY
+            DO 80, J = 1, N
+               TEMP1 = ALPHA*X( JX )
+               TEMP2 = ZERO
+               IX    = KX
+               IY    = KY
+               DO 70, I = 1, J - 1
+                  Y( IY ) = Y( IY ) + TEMP1*A( I, J )
+                  TEMP2   = TEMP2   + CONJG( A( I, J ) )*X( IX )
+                  IX      = IX      + INCX
+                  IY      = IY      + INCY
+   70          CONTINUE
+               Y( JY ) = Y( JY ) + TEMP1*REAL( A( J, J ) ) + ALPHA*TEMP2
+               JX      = JX      + INCX
+               JY      = JY      + INCY
+   80       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  y  when A is stored in lower triangle.
+*
+         IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN
+            DO 100, J = 1, N
+               TEMP1  = ALPHA*X( J )
+               TEMP2  = ZERO
+               Y( J ) = Y( J ) + TEMP1*REAL( A( J, J ) )
+               DO 90, I = J + 1, N
+                  Y( I ) = Y( I ) + TEMP1*A( I, J )
+                  TEMP2  = TEMP2  + CONJG( A( I, J ) )*X( I )
+   90          CONTINUE
+               Y( J ) = Y( J ) + ALPHA*TEMP2
+  100       CONTINUE
+         ELSE
+            JX = KX
+            JY = KY
+            DO 120, J = 1, N
+               TEMP1   = ALPHA*X( JX )
+               TEMP2   = ZERO
+               Y( JY ) = Y( JY ) + TEMP1*REAL( A( J, J ) )
+               IX      = JX
+               IY      = JY
+               DO 110, I = J + 1, N
+                  IX      = IX      + INCX
+                  IY      = IY      + INCY
+                  Y( IY ) = Y( IY ) + TEMP1*A( I, J )
+                  TEMP2   = TEMP2   + CONJG( A( I, J ) )*X( IX )
+  110          CONTINUE
+               Y( JY ) = Y( JY ) + ALPHA*TEMP2
+               JX      = JX      + INCX
+               JY      = JY      + INCY
+  120       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of CHEMV .
+*
+      END
diff --git a/costa/native/external/blas/cher.f b/costa/native/external/blas/cher.f
new file mode 100644
index 000000000..1934f93b0
--- /dev/null
+++ b/costa/native/external/blas/cher.f
@@ -0,0 +1,212 @@
+      SUBROUTINE CHER  ( UPLO, N, ALPHA, X, INCX, A, LDA )
+*     .. Scalar Arguments ..
+      REAL               ALPHA
+      INTEGER            INCX, LDA, N
+      CHARACTER*1        UPLO
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CHER   performs the hermitian rank 1 operation
+*
+*     A := alpha*x*conjg( x' ) + A,
+*
+*  where alpha is a real scalar, x is an n element vector and A is an
+*  n by n hermitian matrix.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the upper or lower
+*           triangular part of the array A is to be referenced as
+*           follows:
+*
+*              UPLO = 'U' or 'u'   Only the upper triangular part of A
+*                                  is to be referenced.
+*
+*              UPLO = 'L' or 'l'   Only the lower triangular part of A
+*                                  is to be referenced.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - REAL            .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  X      - COMPLEX          array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element vector x.
+*           Unchanged on exit.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*  A      - COMPLEX          array of DIMENSION ( LDA, n ).
+*           Before entry with  UPLO = 'U' or 'u', the leading n by n
+*           upper triangular part of the array A must contain the upper
+*           triangular part of the hermitian matrix and the strictly
+*           lower triangular part of A is not referenced. On exit, the
+*           upper triangular part of the array A is overwritten by the
+*           upper triangular part of the updated matrix.
+*           Before entry with UPLO = 'L' or 'l', the leading n by n
+*           lower triangular part of the array A must contain the lower
+*           triangular part of the hermitian matrix and the strictly
+*           upper triangular part of A is not referenced. On exit, the
+*           lower triangular part of the array A is overwritten by the
+*           lower triangular part of the updated matrix.
+*           Note that the imaginary parts of the diagonal elements need
+*           not be set, they are assumed to be zero, and on exit they
+*           are set to zero.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           max( 1, n ).
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      COMPLEX            ZERO
+      PARAMETER        ( ZERO = ( 0.0E+0, 0.0E+0 ) )
+*     .. Local Scalars ..
+      COMPLEX            TEMP
+      INTEGER            I, INFO, IX, J, JX, KX
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          CONJG, MAX, REAL
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO, 'U' ).AND.
+     $         .NOT.LSAME( UPLO, 'L' )      )THEN
+         INFO = 1
+      ELSE IF( N.LT.0 )THEN
+         INFO = 2
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 5
+      ELSE IF( LDA.LT.MAX( 1, N ) )THEN
+         INFO = 7
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'CHER  ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( N.EQ.0 ).OR.( ALPHA.EQ.REAL( ZERO ) ) )
+     $   RETURN
+*
+*     Set the start point in X if the increment is not unity.
+*
+      IF( INCX.LE.0 )THEN
+         KX = 1 - ( N - 1 )*INCX
+      ELSE IF( INCX.NE.1 )THEN
+         KX = 1
+      END IF
+*
+*     Start the operations. In this version the elements of A are
+*     accessed sequentially with one pass through the triangular part
+*     of A.
+*
+      IF( LSAME( UPLO, 'U' ) )THEN
+*
+*        Form  A  when A is stored in upper triangle.
+*
+         IF( INCX.EQ.1 )THEN
+            DO 20, J = 1, N
+               IF( X( J ).NE.ZERO )THEN
+                  TEMP = ALPHA*CONJG( X( J ) )
+                  DO 10, I = 1, J - 1
+                     A( I, J ) = A( I, J ) + X( I )*TEMP
+   10             CONTINUE
+                  A( J, J ) = REAL( A( J, J ) ) + REAL( X( J )*TEMP )
+               ELSE
+                  A( J, J ) = REAL( A( J, J ) )
+               END IF
+   20       CONTINUE
+         ELSE
+            JX = KX
+            DO 40, J = 1, N
+               IF( X( JX ).NE.ZERO )THEN
+                  TEMP = ALPHA*CONJG( X( JX ) )
+                  IX   = KX
+                  DO 30, I = 1, J - 1
+                     A( I, J ) = A( I, J ) + X( IX )*TEMP
+                     IX        = IX        + INCX
+   30             CONTINUE
+                  A( J, J ) = REAL( A( J, J ) ) + REAL( X( JX )*TEMP )
+               ELSE
+                  A( J, J ) = REAL( A( J, J ) )
+               END IF
+               JX = JX + INCX
+   40       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  A  when A is stored in lower triangle.
+*
+         IF( INCX.EQ.1 )THEN
+            DO 60, J = 1, N
+               IF( X( J ).NE.ZERO )THEN
+                  TEMP      = ALPHA*CONJG( X( J ) )
+                  A( J, J ) = REAL( A( J, J ) ) + REAL( TEMP*X( J ) )
+                  DO 50, I = J + 1, N
+                     A( I, J ) = A( I, J ) + X( I )*TEMP
+   50             CONTINUE
+               ELSE
+                  A( J, J ) = REAL( A( J, J ) )
+               END IF
+   60       CONTINUE
+         ELSE
+            JX = KX
+            DO 80, J = 1, N
+               IF( X( JX ).NE.ZERO )THEN
+                  TEMP      = ALPHA*CONJG( X( JX ) )
+                  A( J, J ) = REAL( A( J, J ) ) + REAL( TEMP*X( JX ) )
+                  IX        = JX
+                  DO 70, I = J + 1, N
+                     IX        = IX        + INCX
+                     A( I, J ) = A( I, J ) + X( IX )*TEMP
+   70             CONTINUE
+               ELSE
+                  A( J, J ) = REAL( A( J, J ) )
+               END IF
+               JX = JX + INCX
+   80       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of CHER  .
+*
+      END
diff --git a/costa/native/external/blas/cher2.f b/costa/native/external/blas/cher2.f
new file mode 100644
index 000000000..d7660ad63
--- /dev/null
+++ b/costa/native/external/blas/cher2.f
@@ -0,0 +1,249 @@
+      SUBROUTINE CHER2 ( UPLO, N, ALPHA, X, INCX, Y, INCY, A, LDA )
+*     .. Scalar Arguments ..
+      COMPLEX            ALPHA
+      INTEGER            INCX, INCY, LDA, N
+      CHARACTER*1        UPLO
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), X( * ), Y( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CHER2  performs the hermitian rank 2 operation
+*
+*     A := alpha*x*conjg( y' ) + conjg( alpha )*y*conjg( x' ) + A,
+*
+*  where alpha is a scalar, x and y are n element vectors and A is an n
+*  by n hermitian matrix.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the upper or lower
+*           triangular part of the array A is to be referenced as
+*           follows:
+*
+*              UPLO = 'U' or 'u'   Only the upper triangular part of A
+*                                  is to be referenced.
+*
+*              UPLO = 'L' or 'l'   Only the lower triangular part of A
+*                                  is to be referenced.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - COMPLEX         .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  X      - COMPLEX          array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element vector x.
+*           Unchanged on exit.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*  Y      - COMPLEX          array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCY ) ).
+*           Before entry, the incremented array Y must contain the n
+*           element vector y.
+*           Unchanged on exit.
+*
+*  INCY   - INTEGER.
+*           On entry, INCY specifies the increment for the elements of
+*           Y. INCY must not be zero.
+*           Unchanged on exit.
+*
+*  A      - COMPLEX          array of DIMENSION ( LDA, n ).
+*           Before entry with  UPLO = 'U' or 'u', the leading n by n
+*           upper triangular part of the array A must contain the upper
+*           triangular part of the hermitian matrix and the strictly
+*           lower triangular part of A is not referenced. On exit, the
+*           upper triangular part of the array A is overwritten by the
+*           upper triangular part of the updated matrix.
+*           Before entry with UPLO = 'L' or 'l', the leading n by n
+*           lower triangular part of the array A must contain the lower
+*           triangular part of the hermitian matrix and the strictly
+*           upper triangular part of A is not referenced. On exit, the
+*           lower triangular part of the array A is overwritten by the
+*           lower triangular part of the updated matrix.
+*           Note that the imaginary parts of the diagonal elements need
+*           not be set, they are assumed to be zero, and on exit they
+*           are set to zero.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           max( 1, n ).
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      COMPLEX            ZERO
+      PARAMETER        ( ZERO = ( 0.0E+0, 0.0E+0 ) )
+*     .. Local Scalars ..
+      COMPLEX            TEMP1, TEMP2
+      INTEGER            I, INFO, IX, IY, J, JX, JY, KX, KY
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          CONJG, MAX, REAL
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO, 'U' ).AND.
+     $         .NOT.LSAME( UPLO, 'L' )      )THEN
+         INFO = 1
+      ELSE IF( N.LT.0 )THEN
+         INFO = 2
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 5
+      ELSE IF( INCY.EQ.0 )THEN
+         INFO = 7
+      ELSE IF( LDA.LT.MAX( 1, N ) )THEN
+         INFO = 9
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'CHER2 ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( N.EQ.0 ).OR.( ALPHA.EQ.ZERO ) )
+     $   RETURN
+*
+*     Set up the start points in X and Y if the increments are not both
+*     unity.
+*
+      IF( ( INCX.NE.1 ).OR.( INCY.NE.1 ) )THEN
+         IF( INCX.GT.0 )THEN
+            KX = 1
+         ELSE
+            KX = 1 - ( N - 1 )*INCX
+         END IF
+         IF( INCY.GT.0 )THEN
+            KY = 1
+         ELSE
+            KY = 1 - ( N - 1 )*INCY
+         END IF
+         JX = KX
+         JY = KY
+      END IF
+*
+*     Start the operations. In this version the elements of A are
+*     accessed sequentially with one pass through the triangular part
+*     of A.
+*
+      IF( LSAME( UPLO, 'U' ) )THEN
+*
+*        Form  A  when A is stored in the upper triangle.
+*
+         IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN
+            DO 20, J = 1, N
+               IF( ( X( J ).NE.ZERO ).OR.( Y( J ).NE.ZERO ) )THEN
+                  TEMP1 = ALPHA*CONJG( Y( J ) )
+                  TEMP2 = CONJG( ALPHA*X( J ) )
+                  DO 10, I = 1, J - 1
+                     A( I, J ) = A( I, J ) + X( I )*TEMP1 + Y( I )*TEMP2
+   10             CONTINUE
+                  A( J, J ) = REAL( A( J, J ) ) +
+     $                        REAL( X( J )*TEMP1 + Y( J )*TEMP2 )
+               ELSE
+                  A( J, J ) = REAL( A( J, J ) )
+               END IF
+   20       CONTINUE
+         ELSE
+            DO 40, J = 1, N
+               IF( ( X( JX ).NE.ZERO ).OR.( Y( JY ).NE.ZERO ) )THEN
+                  TEMP1 = ALPHA*CONJG( Y( JY ) )
+                  TEMP2 = CONJG( ALPHA*X( JX ) )
+                  IX    = KX
+                  IY    = KY
+                  DO 30, I = 1, J - 1
+                     A( I, J ) = A( I, J ) + X( IX )*TEMP1
+     $                                     + Y( IY )*TEMP2
+                     IX        = IX        + INCX
+                     IY        = IY        + INCY
+   30             CONTINUE
+                  A( J, J ) = REAL( A( J, J ) ) +
+     $                        REAL( X( JX )*TEMP1 + Y( JY )*TEMP2 )
+               ELSE
+                  A( J, J ) = REAL( A( J, J ) )
+               END IF
+               JX = JX + INCX
+               JY = JY + INCY
+   40       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  A  when A is stored in the lower triangle.
+*
+         IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN
+            DO 60, J = 1, N
+               IF( ( X( J ).NE.ZERO ).OR.( Y( J ).NE.ZERO ) )THEN
+                  TEMP1     = ALPHA*CONJG( Y( J ) )
+                  TEMP2     = CONJG( ALPHA*X( J ) )
+                  A( J, J ) = REAL( A( J, J ) ) +
+     $                        REAL( X( J )*TEMP1 + Y( J )*TEMP2 )
+                  DO 50, I = J + 1, N
+                     A( I, J ) = A( I, J ) + X( I )*TEMP1 + Y( I )*TEMP2
+   50             CONTINUE
+               ELSE
+                  A( J, J ) = REAL( A( J, J ) )
+               END IF
+   60       CONTINUE
+         ELSE
+            DO 80, J = 1, N
+               IF( ( X( JX ).NE.ZERO ).OR.( Y( JY ).NE.ZERO ) )THEN
+                  TEMP1     = ALPHA*CONJG( Y( JY ) )
+                  TEMP2     = CONJG( ALPHA*X( JX ) )
+                  A( J, J ) = REAL( A( J, J ) ) +
+     $                        REAL( X( JX )*TEMP1 + Y( JY )*TEMP2 )
+                  IX        = JX
+                  IY        = JY
+                  DO 70, I = J + 1, N
+                     IX        = IX        + INCX
+                     IY        = IY        + INCY
+                     A( I, J ) = A( I, J ) + X( IX )*TEMP1
+     $                                     + Y( IY )*TEMP2
+   70             CONTINUE
+               ELSE
+                  A( J, J ) = REAL( A( J, J ) )
+               END IF
+               JX = JX + INCX
+               JY = JY + INCY
+   80       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of CHER2 .
+*
+      END
diff --git a/costa/native/external/blas/cher2k.f b/costa/native/external/blas/cher2k.f
new file mode 100644
index 000000000..c9a00aede
--- /dev/null
+++ b/costa/native/external/blas/cher2k.f
@@ -0,0 +1,371 @@
+      SUBROUTINE CHER2K( UPLO, TRANS, N, K, ALPHA, A, LDA, B, LDB,
+     $                   BETA, C, LDC )
+*     .. Scalar Arguments ..
+      CHARACTER*1        UPLO, TRANS
+      INTEGER            N, K, LDA, LDB, LDC
+      REAL               BETA
+      COMPLEX            ALPHA
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), B( LDB, * ), C( LDC, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CHER2K  performs one of the hermitian rank 2k operations
+*
+*     C := alpha*A*conjg( B' ) + conjg( alpha )*B*conjg( A' ) + beta*C,
+*
+*  or
+*
+*     C := alpha*conjg( A' )*B + conjg( alpha )*conjg( B' )*A + beta*C,
+*
+*  where  alpha and beta  are scalars with  beta  real,  C is an  n by n
+*  hermitian matrix and  A and B  are  n by k matrices in the first case
+*  and  k by n  matrices in the second case.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On  entry,   UPLO  specifies  whether  the  upper  or  lower
+*           triangular  part  of the  array  C  is to be  referenced  as
+*           follows:
+*
+*              UPLO = 'U' or 'u'   Only the  upper triangular part of  C
+*                                  is to be referenced.
+*
+*              UPLO = 'L' or 'l'   Only the  lower triangular part of  C
+*                                  is to be referenced.
+*
+*           Unchanged on exit.
+*
+*  TRANS  - CHARACTER*1.
+*           On entry,  TRANS  specifies the operation to be performed as
+*           follows:
+*
+*              TRANS = 'N' or 'n'    C := alpha*A*conjg( B' )          +
+*                                         conjg( alpha )*B*conjg( A' ) +
+*                                         beta*C.
+*
+*              TRANS = 'C' or 'c'    C := alpha*conjg( A' )*B          +
+*                                         conjg( alpha )*conjg( B' )*A +
+*                                         beta*C.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry,  N specifies the order of the matrix C.  N must be
+*           at least zero.
+*           Unchanged on exit.
+*
+*  K      - INTEGER.
+*           On entry with  TRANS = 'N' or 'n',  K  specifies  the number
+*           of  columns  of the  matrices  A and B,  and on  entry  with
+*           TRANS = 'C' or 'c',  K  specifies  the number of rows of the
+*           matrices  A and B.  K must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - COMPLEX         .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  A      - COMPLEX          array of DIMENSION ( LDA, ka ), where ka is
+*           k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
+*           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
+*           part of the array  A  must contain the matrix  A,  otherwise
+*           the leading  k by n  part of the array  A  must contain  the
+*           matrix A.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
+*           then  LDA must be at least  max( 1, n ), otherwise  LDA must
+*           be at least  max( 1, k ).
+*           Unchanged on exit.
+*
+*  B      - COMPLEX          array of DIMENSION ( LDB, kb ), where kb is
+*           k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
+*           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
+*           part of the array  B  must contain the matrix  B,  otherwise
+*           the leading  k by n  part of the array  B  must contain  the
+*           matrix B.
+*           Unchanged on exit.
+*
+*  LDB    - INTEGER.
+*           On entry, LDB specifies the first dimension of B as declared
+*           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
+*           then  LDB must be at least  max( 1, n ), otherwise  LDB must
+*           be at least  max( 1, k ).
+*           Unchanged on exit.
+*
+*  BETA   - REAL            .
+*           On entry, BETA specifies the scalar beta.
+*           Unchanged on exit.
+*
+*  C      - COMPLEX          array of DIMENSION ( LDC, n ).
+*           Before entry  with  UPLO = 'U' or 'u',  the leading  n by n
+*           upper triangular part of the array C must contain the upper
+*           triangular part  of the  hermitian matrix  and the strictly
+*           lower triangular part of C is not referenced.  On exit, the
+*           upper triangular part of the array  C is overwritten by the
+*           upper triangular part of the updated matrix.
+*           Before entry  with  UPLO = 'L' or 'l',  the leading  n by n
+*           lower triangular part of the array C must contain the lower
+*           triangular part  of the  hermitian matrix  and the strictly
+*           upper triangular part of C is not referenced.  On exit, the
+*           lower triangular part of the array  C is overwritten by the
+*           lower triangular part of the updated matrix.
+*           Note that the imaginary parts of the diagonal elements need
+*           not be set,  they are assumed to be zero,  and on exit they
+*           are set to zero.
+*
+*  LDC    - INTEGER.
+*           On entry, LDC specifies the first dimension of C as declared
+*           in  the  calling  (sub)  program.   LDC  must  be  at  least
+*           max( 1, n ).
+*           Unchanged on exit.
+*
+*
+*  Level 3 Blas routine.
+*
+*  -- Written on 8-February-1989.
+*     Jack Dongarra, Argonne National Laboratory.
+*     Iain Duff, AERE Harwell.
+*     Jeremy Du Croz, Numerical Algorithms Group Ltd.
+*     Sven Hammarling, Numerical Algorithms Group Ltd.
+*
+*  -- Modified 8-Nov-93 to set C(J,J) to REAL( C(J,J) ) when BETA = 1.
+*     Ed Anderson, Cray Research Inc.
+*
+*
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          CONJG, MAX, REAL
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            I, INFO, J, L, NROWA
+      COMPLEX            TEMP1, TEMP2
+*     .. Parameters ..
+      REAL               ONE
+      PARAMETER        ( ONE  = 1.0E+0 )
+      COMPLEX            ZERO
+      PARAMETER        ( ZERO = ( 0.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+         NROWA = N
+      ELSE
+         NROWA = K
+      END IF
+      UPPER = LSAME( UPLO, 'U' )
+*
+      INFO = 0
+      IF(      ( .NOT.UPPER               ).AND.
+     $         ( .NOT.LSAME( UPLO , 'L' ) )      )THEN
+         INFO = 1
+      ELSE IF( ( .NOT.LSAME( TRANS, 'N' ) ).AND.
+     $         ( .NOT.LSAME( TRANS, 'C' ) )      )THEN
+         INFO = 2
+      ELSE IF( N  .LT.0               )THEN
+         INFO = 3
+      ELSE IF( K  .LT.0               )THEN
+         INFO = 4
+      ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN
+         INFO = 7
+      ELSE IF( LDB.LT.MAX( 1, NROWA ) )THEN
+         INFO = 9
+      ELSE IF( LDC.LT.MAX( 1, N     ) )THEN
+         INFO = 12
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'CHER2K', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( N.EQ.0 ).OR.
+     $    ( ( ( ALPHA.EQ.ZERO ).OR.( K.EQ.0 ) ).AND.( BETA.EQ.ONE ) ) )
+     $   RETURN
+*
+*     And when  alpha.eq.zero.
+*
+      IF( ALPHA.EQ.ZERO )THEN
+         IF( UPPER )THEN
+            IF( BETA.EQ.REAL( ZERO ) )THEN
+               DO 20, J = 1, N
+                  DO 10, I = 1, J
+                     C( I, J ) = ZERO
+   10             CONTINUE
+   20          CONTINUE
+            ELSE
+               DO 40, J = 1, N
+                  DO 30, I = 1, J - 1
+                     C( I, J ) = BETA*C( I, J )
+   30             CONTINUE
+                  C( J, J ) = BETA*REAL( C( J, J ) )
+   40          CONTINUE
+            END IF
+         ELSE
+            IF( BETA.EQ.REAL( ZERO ) )THEN
+               DO 60, J = 1, N
+                  DO 50, I = J, N
+                     C( I, J ) = ZERO
+   50             CONTINUE
+   60          CONTINUE
+            ELSE
+               DO 80, J = 1, N
+                  C( J, J ) = BETA*REAL( C( J, J ) )
+                  DO 70, I = J + 1, N
+                     C( I, J ) = BETA*C( I, J )
+   70             CONTINUE
+   80          CONTINUE
+            END IF
+         END IF
+         RETURN
+      END IF
+*
+*     Start the operations.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+*
+*        Form  C := alpha*A*conjg( B' ) + conjg( alpha )*B*conjg( A' ) +
+*                   C.
+*
+         IF( UPPER )THEN
+            DO 130, J = 1, N
+               IF( BETA.EQ.REAL( ZERO ) )THEN
+                  DO 90, I = 1, J
+                     C( I, J ) = ZERO
+   90             CONTINUE
+               ELSE IF( BETA.NE.ONE )THEN
+                  DO 100, I = 1, J - 1
+                     C( I, J ) = BETA*C( I, J )
+  100             CONTINUE
+                  C( J, J ) = BETA*REAL( C( J, J ) )
+               ELSE
+                  C( J, J ) = REAL( C( J, J ) )
+               END IF
+               DO 120, L = 1, K
+                  IF( ( A( J, L ).NE.ZERO ).OR.
+     $                ( B( J, L ).NE.ZERO )     )THEN
+                     TEMP1 = ALPHA*CONJG( B( J, L ) )
+                     TEMP2 = CONJG( ALPHA*A( J, L ) )
+                     DO 110, I = 1, J - 1
+                        C( I, J ) = C( I, J ) + A( I, L )*TEMP1 +
+     $                                          B( I, L )*TEMP2
+  110                CONTINUE
+                     C( J, J ) = REAL( C( J, J ) )         +
+     $                           REAL( A( J, L )*TEMP1 +
+     $                                 B( J, L )*TEMP2   )
+                  END IF
+  120          CONTINUE
+  130       CONTINUE
+         ELSE
+            DO 180, J = 1, N
+               IF( BETA.EQ.REAL( ZERO ) )THEN
+                  DO 140, I = J, N
+                     C( I, J ) = ZERO
+  140             CONTINUE
+               ELSE IF( BETA.NE.ONE )THEN
+                  DO 150, I = J + 1, N
+                     C( I, J ) = BETA*C( I, J )
+  150             CONTINUE
+                  C( J, J ) = BETA*REAL( C( J, J ) )
+               ELSE
+                  C( J, J ) = REAL( C( J, J ) )
+               END IF
+               DO 170, L = 1, K
+                  IF( ( A( J, L ).NE.ZERO ).OR.
+     $                ( B( J, L ).NE.ZERO )     )THEN
+                     TEMP1 = ALPHA*CONJG( B( J, L ) )
+                     TEMP2 = CONJG( ALPHA*A( J, L ) )
+                     DO 160, I = J + 1, N
+                        C( I, J ) = C( I, J ) + A( I, L )*TEMP1 +
+     $                                          B( I, L )*TEMP2
+  160                CONTINUE
+                     C( J, J ) = REAL( C( J, J ) )         +
+     $                           REAL( A( J, L )*TEMP1 +
+     $                                 B( J, L )*TEMP2   )
+                  END IF
+  170          CONTINUE
+  180       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  C := alpha*conjg( A' )*B + conjg( alpha )*conjg( B' )*A +
+*                   C.
+*
+         IF( UPPER )THEN
+            DO 210, J = 1, N
+               DO 200, I = 1, J
+                  TEMP1 = ZERO
+                  TEMP2 = ZERO
+                  DO 190, L = 1, K
+                     TEMP1 = TEMP1 + CONJG( A( L, I ) )*B( L, J )
+                     TEMP2 = TEMP2 + CONJG( B( L, I ) )*A( L, J )
+  190             CONTINUE
+                  IF( I.EQ.J )THEN
+                     IF( BETA.EQ.REAL( ZERO ) )THEN
+                        C( J, J ) = REAL(        ALPHA  *TEMP1 +
+     $                                    CONJG( ALPHA )*TEMP2   )
+                     ELSE
+                        C( J, J ) = BETA*REAL( C( J, J ) )         +
+     $                              REAL(        ALPHA  *TEMP1 +
+     $                                    CONJG( ALPHA )*TEMP2   )
+                     END IF
+                  ELSE
+                     IF( BETA.EQ.REAL( ZERO ) )THEN
+                        C( I, J ) = ALPHA*TEMP1 + CONJG( ALPHA )*TEMP2
+                     ELSE
+                        C( I, J ) = BETA *C( I, J ) +
+     $                              ALPHA*TEMP1 + CONJG( ALPHA )*TEMP2
+                     END IF
+                  END IF
+  200          CONTINUE
+  210       CONTINUE
+         ELSE
+            DO 240, J = 1, N
+               DO 230, I = J, N
+                  TEMP1 = ZERO
+                  TEMP2 = ZERO
+                  DO 220, L = 1, K
+                     TEMP1 = TEMP1 + CONJG( A( L, I ) )*B( L, J )
+                     TEMP2 = TEMP2 + CONJG( B( L, I ) )*A( L, J )
+  220             CONTINUE
+                  IF( I.EQ.J )THEN
+                     IF( BETA.EQ.REAL( ZERO ) )THEN
+                        C( J, J ) = REAL(        ALPHA  *TEMP1 +
+     $                                    CONJG( ALPHA )*TEMP2   )
+                     ELSE
+                        C( J, J ) = BETA*REAL( C( J, J ) )         +
+     $                              REAL(        ALPHA  *TEMP1 +
+     $                                    CONJG( ALPHA )*TEMP2   )
+                     END IF
+                  ELSE
+                     IF( BETA.EQ.REAL( ZERO ) )THEN
+                        C( I, J ) = ALPHA*TEMP1 + CONJG( ALPHA )*TEMP2
+                     ELSE
+                        C( I, J ) = BETA *C( I, J ) +
+     $                              ALPHA*TEMP1 + CONJG( ALPHA )*TEMP2
+                     END IF
+                  END IF
+  230          CONTINUE
+  240       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of CHER2K.
+*
+      END
diff --git a/costa/native/external/blas/cherk.f b/costa/native/external/blas/cherk.f
new file mode 100644
index 000000000..f5cd99974
--- /dev/null
+++ b/costa/native/external/blas/cherk.f
@@ -0,0 +1,328 @@
+      SUBROUTINE CHERK ( UPLO, TRANS, N, K, ALPHA, A, LDA,
+     $                   BETA, C, LDC )
+*     .. Scalar Arguments ..
+      CHARACTER*1        UPLO, TRANS
+      INTEGER            N, K, LDA, LDC
+      REAL               ALPHA, BETA
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), C( LDC, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CHERK  performs one of the hermitian rank k operations
+*
+*     C := alpha*A*conjg( A' ) + beta*C,
+*
+*  or
+*
+*     C := alpha*conjg( A' )*A + beta*C,
+*
+*  where  alpha and beta  are  real scalars,  C is an  n by n  hermitian
+*  matrix and  A  is an  n by k  matrix in the  first case and a  k by n
+*  matrix in the second case.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On  entry,   UPLO  specifies  whether  the  upper  or  lower
+*           triangular  part  of the  array  C  is to be  referenced  as
+*           follows:
+*
+*              UPLO = 'U' or 'u'   Only the  upper triangular part of  C
+*                                  is to be referenced.
+*
+*              UPLO = 'L' or 'l'   Only the  lower triangular part of  C
+*                                  is to be referenced.
+*
+*           Unchanged on exit.
+*
+*  TRANS  - CHARACTER*1.
+*           On entry,  TRANS  specifies the operation to be performed as
+*           follows:
+*
+*              TRANS = 'N' or 'n'   C := alpha*A*conjg( A' ) + beta*C.
+*
+*              TRANS = 'C' or 'c'   C := alpha*conjg( A' )*A + beta*C.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry,  N specifies the order of the matrix C.  N must be
+*           at least zero.
+*           Unchanged on exit.
+*
+*  K      - INTEGER.
+*           On entry with  TRANS = 'N' or 'n',  K  specifies  the number
+*           of  columns   of  the   matrix   A,   and  on   entry   with
+*           TRANS = 'C' or 'c',  K  specifies  the number of rows of the
+*           matrix A.  K must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - REAL            .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  A      - COMPLEX          array of DIMENSION ( LDA, ka ), where ka is
+*           k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
+*           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
+*           part of the array  A  must contain the matrix  A,  otherwise
+*           the leading  k by n  part of the array  A  must contain  the
+*           matrix A.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
+*           then  LDA must be at least  max( 1, n ), otherwise  LDA must
+*           be at least  max( 1, k ).
+*           Unchanged on exit.
+*
+*  BETA   - REAL            .
+*           On entry, BETA specifies the scalar beta.
+*           Unchanged on exit.
+*
+*  C      - COMPLEX          array of DIMENSION ( LDC, n ).
+*           Before entry  with  UPLO = 'U' or 'u',  the leading  n by n
+*           upper triangular part of the array C must contain the upper
+*           triangular part  of the  hermitian matrix  and the strictly
+*           lower triangular part of C is not referenced.  On exit, the
+*           upper triangular part of the array  C is overwritten by the
+*           upper triangular part of the updated matrix.
+*           Before entry  with  UPLO = 'L' or 'l',  the leading  n by n
+*           lower triangular part of the array C must contain the lower
+*           triangular part  of the  hermitian matrix  and the strictly
+*           upper triangular part of C is not referenced.  On exit, the
+*           lower triangular part of the array  C is overwritten by the
+*           lower triangular part of the updated matrix.
+*           Note that the imaginary parts of the diagonal elements need
+*           not be set,  they are assumed to be zero,  and on exit they
+*           are set to zero.
+*
+*  LDC    - INTEGER.
+*           On entry, LDC specifies the first dimension of C as declared
+*           in  the  calling  (sub)  program.   LDC  must  be  at  least
+*           max( 1, n ).
+*           Unchanged on exit.
+*
+*
+*  Level 3 Blas routine.
+*
+*  -- Written on 8-February-1989.
+*     Jack Dongarra, Argonne National Laboratory.
+*     Iain Duff, AERE Harwell.
+*     Jeremy Du Croz, Numerical Algorithms Group Ltd.
+*     Sven Hammarling, Numerical Algorithms Group Ltd.
+*
+*  -- Modified 8-Nov-93 to set C(J,J) to REAL( C(J,J) ) when BETA = 1.
+*     Ed Anderson, Cray Research Inc.
+*
+*
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          CMPLX, CONJG, MAX, REAL
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            I, INFO, J, L, NROWA
+      REAL               RTEMP
+      COMPLEX            TEMP
+*     .. Parameters ..
+      REAL               ONE ,         ZERO
+      PARAMETER        ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+         NROWA = N
+      ELSE
+         NROWA = K
+      END IF
+      UPPER = LSAME( UPLO, 'U' )
+*
+      INFO = 0
+      IF(      ( .NOT.UPPER               ).AND.
+     $         ( .NOT.LSAME( UPLO , 'L' ) )      )THEN
+         INFO = 1
+      ELSE IF( ( .NOT.LSAME( TRANS, 'N' ) ).AND.
+     $         ( .NOT.LSAME( TRANS, 'C' ) )      )THEN
+         INFO = 2
+      ELSE IF( N  .LT.0               )THEN
+         INFO = 3
+      ELSE IF( K  .LT.0               )THEN
+         INFO = 4
+      ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN
+         INFO = 7
+      ELSE IF( LDC.LT.MAX( 1, N     ) )THEN
+         INFO = 10
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'CHERK ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( N.EQ.0 ).OR.
+     $    ( ( ( ALPHA.EQ.ZERO ).OR.( K.EQ.0 ) ).AND.( BETA.EQ.ONE ) ) )
+     $   RETURN
+*
+*     And when  alpha.eq.zero.
+*
+      IF( ALPHA.EQ.ZERO )THEN
+         IF( UPPER )THEN
+            IF( BETA.EQ.ZERO )THEN
+               DO 20, J = 1, N
+                  DO 10, I = 1, J
+                     C( I, J ) = ZERO
+   10             CONTINUE
+   20          CONTINUE
+            ELSE
+               DO 40, J = 1, N
+                  DO 30, I = 1, J - 1
+                     C( I, J ) = BETA*C( I, J )
+   30             CONTINUE
+                  C( J, J ) = BETA*REAL( C( J, J ) )
+   40          CONTINUE
+            END IF
+         ELSE
+            IF( BETA.EQ.ZERO )THEN
+               DO 60, J = 1, N
+                  DO 50, I = J, N
+                     C( I, J ) = ZERO
+   50             CONTINUE
+   60          CONTINUE
+            ELSE
+               DO 80, J = 1, N
+                  C( J, J ) = BETA*REAL( C( J, J ) )
+                  DO 70, I = J + 1, N
+                     C( I, J ) = BETA*C( I, J )
+   70             CONTINUE
+   80          CONTINUE
+            END IF
+         END IF
+         RETURN
+      END IF
+*
+*     Start the operations.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+*
+*        Form  C := alpha*A*conjg( A' ) + beta*C.
+*
+         IF( UPPER )THEN
+            DO 130, J = 1, N
+               IF( BETA.EQ.ZERO )THEN
+                  DO 90, I = 1, J
+                     C( I, J ) = ZERO
+   90             CONTINUE
+               ELSE IF( BETA.NE.ONE )THEN
+                  DO 100, I = 1, J - 1
+                     C( I, J ) = BETA*C( I, J )
+  100             CONTINUE
+                  C( J, J ) = BETA*REAL( C( J, J ) )
+               ELSE
+                  C( J, J ) = REAL( C( J, J ) )
+               END IF
+               DO 120, L = 1, K
+                  IF( A( J, L ).NE.CMPLX( ZERO ) )THEN
+                     TEMP = ALPHA*CONJG( A( J, L ) )
+                     DO 110, I = 1, J - 1
+                        C( I, J ) = C( I, J ) + TEMP*A( I, L )
+  110                CONTINUE
+                     C( J, J ) = REAL( C( J, J )      ) +
+     $                           REAL( TEMP*A( I, L ) )
+                  END IF
+  120          CONTINUE
+  130       CONTINUE
+         ELSE
+            DO 180, J = 1, N
+               IF( BETA.EQ.ZERO )THEN
+                  DO 140, I = J, N
+                     C( I, J ) = ZERO
+  140             CONTINUE
+               ELSE IF( BETA.NE.ONE )THEN
+                  C( J, J ) = BETA*REAL( C( J, J ) )
+                  DO 150, I = J + 1, N
+                     C( I, J ) = BETA*C( I, J )
+  150             CONTINUE
+               ELSE
+                  C( J, J ) = REAL( C( J, J ) )
+               END IF
+               DO 170, L = 1, K
+                  IF( A( J, L ).NE.CMPLX( ZERO ) )THEN
+                     TEMP      = ALPHA*CONJG( A( J, L ) )
+                     C( J, J ) = REAL( C( J, J )      )   +
+     $                           REAL( TEMP*A( J, L ) )
+                     DO 160, I = J + 1, N
+                        C( I, J ) = C( I, J ) + TEMP*A( I, L )
+  160                CONTINUE
+                  END IF
+  170          CONTINUE
+  180       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  C := alpha*conjg( A' )*A + beta*C.
+*
+         IF( UPPER )THEN
+            DO 220, J = 1, N
+               DO 200, I = 1, J - 1
+                  TEMP = ZERO
+                  DO 190, L = 1, K
+                     TEMP = TEMP + CONJG( A( L, I ) )*A( L, J )
+  190             CONTINUE
+                  IF( BETA.EQ.ZERO )THEN
+                     C( I, J ) = ALPHA*TEMP
+                  ELSE
+                     C( I, J ) = ALPHA*TEMP + BETA*C( I, J )
+                  END IF
+  200          CONTINUE
+               RTEMP = ZERO
+               DO 210, L = 1, K
+                  RTEMP = RTEMP + CONJG( A( L, J ) )*A( L, J )
+  210          CONTINUE
+               IF( BETA.EQ.ZERO )THEN
+                  C( J, J ) = ALPHA*RTEMP
+               ELSE
+                  C( J, J ) = ALPHA*RTEMP + BETA*REAL( C( J, J ) )
+               END IF
+  220       CONTINUE
+         ELSE
+            DO 260, J = 1, N
+               RTEMP = ZERO
+               DO 230, L = 1, K
+                  RTEMP = RTEMP + CONJG( A( L, J ) )*A( L, J )
+  230          CONTINUE
+               IF( BETA.EQ.ZERO )THEN
+                  C( J, J ) = ALPHA*RTEMP
+               ELSE
+                  C( J, J ) = ALPHA*RTEMP + BETA*REAL( C( J, J ) )
+               END IF
+               DO 250, I = J + 1, N
+                  TEMP = ZERO
+                  DO 240, L = 1, K
+                     TEMP = TEMP + CONJG( A( L, I ) )*A( L, J )
+  240             CONTINUE
+                  IF( BETA.EQ.ZERO )THEN
+                     C( I, J ) = ALPHA*TEMP
+                  ELSE
+                     C( I, J ) = ALPHA*TEMP + BETA*C( I, J )
+                  END IF
+  250          CONTINUE
+  260       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of CHERK .
+*
+      END
diff --git a/costa/native/external/blas/chpmv.f b/costa/native/external/blas/chpmv.f
new file mode 100644
index 000000000..f5a566e3e
--- /dev/null
+++ b/costa/native/external/blas/chpmv.f
@@ -0,0 +1,270 @@
+      SUBROUTINE CHPMV ( UPLO, N, ALPHA, AP, X, INCX, BETA, Y, INCY )
+*     .. Scalar Arguments ..
+      COMPLEX            ALPHA, BETA
+      INTEGER            INCX, INCY, N
+      CHARACTER*1        UPLO
+*     .. Array Arguments ..
+      COMPLEX            AP( * ), X( * ), Y( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CHPMV  performs the matrix-vector operation
+*
+*     y := alpha*A*x + beta*y,
+*
+*  where alpha and beta are scalars, x and y are n element vectors and
+*  A is an n by n hermitian matrix, supplied in packed form.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the upper or lower
+*           triangular part of the matrix A is supplied in the packed
+*           array AP as follows:
+*
+*              UPLO = 'U' or 'u'   The upper triangular part of A is
+*                                  supplied in AP.
+*
+*              UPLO = 'L' or 'l'   The lower triangular part of A is
+*                                  supplied in AP.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - COMPLEX         .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  AP     - COMPLEX          array of DIMENSION at least
+*           ( ( n*( n + 1 ) )/2 ).
+*           Before entry with UPLO = 'U' or 'u', the array AP must
+*           contain the upper triangular part of the hermitian matrix
+*           packed sequentially, column by column, so that AP( 1 )
+*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 )
+*           and a( 2, 2 ) respectively, and so on.
+*           Before entry with UPLO = 'L' or 'l', the array AP must
+*           contain the lower triangular part of the hermitian matrix
+*           packed sequentially, column by column, so that AP( 1 )
+*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 )
+*           and a( 3, 1 ) respectively, and so on.
+*           Note that the imaginary parts of the diagonal elements need
+*           not be set and are assumed to be zero.
+*           Unchanged on exit.
+*
+*  X      - COMPLEX          array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element vector x.
+*           Unchanged on exit.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*  BETA   - COMPLEX         .
+*           On entry, BETA specifies the scalar beta. When BETA is
+*           supplied as zero then Y need not be set on input.
+*           Unchanged on exit.
+*
+*  Y      - COMPLEX          array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCY ) ).
+*           Before entry, the incremented array Y must contain the n
+*           element vector y. On exit, Y is overwritten by the updated
+*           vector y.
+*
+*  INCY   - INTEGER.
+*           On entry, INCY specifies the increment for the elements of
+*           Y. INCY must not be zero.
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      COMPLEX            ONE
+      PARAMETER        ( ONE  = ( 1.0E+0, 0.0E+0 ) )
+      COMPLEX            ZERO
+      PARAMETER        ( ZERO = ( 0.0E+0, 0.0E+0 ) )
+*     .. Local Scalars ..
+      COMPLEX            TEMP1, TEMP2
+      INTEGER            I, INFO, IX, IY, J, JX, JY, K, KK, KX, KY
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          CONJG, REAL
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO, 'U' ).AND.
+     $         .NOT.LSAME( UPLO, 'L' )      )THEN
+         INFO = 1
+      ELSE IF( N.LT.0 )THEN
+         INFO = 2
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 6
+      ELSE IF( INCY.EQ.0 )THEN
+         INFO = 9
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'CHPMV ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( N.EQ.0 ).OR.( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) )
+     $   RETURN
+*
+*     Set up the start points in  X  and  Y.
+*
+      IF( INCX.GT.0 )THEN
+         KX = 1
+      ELSE
+         KX = 1 - ( N - 1 )*INCX
+      END IF
+      IF( INCY.GT.0 )THEN
+         KY = 1
+      ELSE
+         KY = 1 - ( N - 1 )*INCY
+      END IF
+*
+*     Start the operations. In this version the elements of the array AP
+*     are accessed sequentially with one pass through AP.
+*
+*     First form  y := beta*y.
+*
+      IF( BETA.NE.ONE )THEN
+         IF( INCY.EQ.1 )THEN
+            IF( BETA.EQ.ZERO )THEN
+               DO 10, I = 1, N
+                  Y( I ) = ZERO
+   10          CONTINUE
+            ELSE
+               DO 20, I = 1, N
+                  Y( I ) = BETA*Y( I )
+   20          CONTINUE
+            END IF
+         ELSE
+            IY = KY
+            IF( BETA.EQ.ZERO )THEN
+               DO 30, I = 1, N
+                  Y( IY ) = ZERO
+                  IY      = IY   + INCY
+   30          CONTINUE
+            ELSE
+               DO 40, I = 1, N
+                  Y( IY ) = BETA*Y( IY )
+                  IY      = IY           + INCY
+   40          CONTINUE
+            END IF
+         END IF
+      END IF
+      IF( ALPHA.EQ.ZERO )
+     $   RETURN
+      KK = 1
+      IF( LSAME( UPLO, 'U' ) )THEN
+*
+*        Form  y  when AP contains the upper triangle.
+*
+         IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN
+            DO 60, J = 1, N
+               TEMP1 = ALPHA*X( J )
+               TEMP2 = ZERO
+               K     = KK
+               DO 50, I = 1, J - 1
+                  Y( I ) = Y( I ) + TEMP1*AP( K )
+                  TEMP2  = TEMP2  + CONJG( AP( K ) )*X( I )
+                  K      = K      + 1
+   50          CONTINUE
+               Y( J ) = Y( J ) + TEMP1*REAL( AP( KK + J - 1 ) )
+     $                         + ALPHA*TEMP2
+               KK     = KK     + J
+   60       CONTINUE
+         ELSE
+            JX = KX
+            JY = KY
+            DO 80, J = 1, N
+               TEMP1 = ALPHA*X( JX )
+               TEMP2 = ZERO
+               IX    = KX
+               IY    = KY
+               DO 70, K = KK, KK + J - 2
+                  Y( IY ) = Y( IY ) + TEMP1*AP( K )
+                  TEMP2   = TEMP2   + CONJG( AP( K ) )*X( IX )
+                  IX      = IX      + INCX
+                  IY      = IY      + INCY
+   70          CONTINUE
+               Y( JY ) = Y( JY ) + TEMP1*REAL( AP( KK + J - 1 ) )
+     $                           + ALPHA*TEMP2
+               JX      = JX      + INCX
+               JY      = JY      + INCY
+               KK      = KK      + J
+   80       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  y  when AP contains the lower triangle.
+*
+         IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN
+            DO 100, J = 1, N
+               TEMP1  = ALPHA*X( J )
+               TEMP2  = ZERO
+               Y( J ) = Y( J ) + TEMP1*REAL( AP( KK ) )
+               K      = KK     + 1
+               DO 90, I = J + 1, N
+                  Y( I ) = Y( I ) + TEMP1*AP( K )
+                  TEMP2  = TEMP2  + CONJG( AP( K ) )*X( I )
+                  K      = K      + 1
+   90          CONTINUE
+               Y( J ) = Y( J ) + ALPHA*TEMP2
+               KK     = KK     + ( N - J + 1 )
+  100       CONTINUE
+         ELSE
+            JX = KX
+            JY = KY
+            DO 120, J = 1, N
+               TEMP1   = ALPHA*X( JX )
+               TEMP2   = ZERO
+               Y( JY ) = Y( JY ) + TEMP1*REAL( AP( KK ) )
+               IX      = JX
+               IY      = JY
+               DO 110, K = KK + 1, KK + N - J
+                  IX      = IX      + INCX
+                  IY      = IY      + INCY
+                  Y( IY ) = Y( IY ) + TEMP1*AP( K )
+                  TEMP2   = TEMP2   + CONJG( AP( K ) )*X( IX )
+  110          CONTINUE
+               Y( JY ) = Y( JY ) + ALPHA*TEMP2
+               JX      = JX      + INCX
+               JY      = JY      + INCY
+               KK      = KK      + ( N - J + 1 )
+  120       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of CHPMV .
+*
+      END
diff --git a/costa/native/external/blas/chpr.f b/costa/native/external/blas/chpr.f
new file mode 100644
index 000000000..bb41b231b
--- /dev/null
+++ b/costa/native/external/blas/chpr.f
@@ -0,0 +1,217 @@
+      SUBROUTINE CHPR  ( UPLO, N, ALPHA, X, INCX, AP )
+*     .. Scalar Arguments ..
+      REAL               ALPHA
+      INTEGER            INCX, N
+      CHARACTER*1        UPLO
+*     .. Array Arguments ..
+      COMPLEX            AP( * ), X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CHPR    performs the hermitian rank 1 operation
+*
+*     A := alpha*x*conjg( x' ) + A,
+*
+*  where alpha is a real scalar, x is an n element vector and A is an
+*  n by n hermitian matrix, supplied in packed form.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the upper or lower
+*           triangular part of the matrix A is supplied in the packed
+*           array AP as follows:
+*
+*              UPLO = 'U' or 'u'   The upper triangular part of A is
+*                                  supplied in AP.
+*
+*              UPLO = 'L' or 'l'   The lower triangular part of A is
+*                                  supplied in AP.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - REAL            .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  X      - COMPLEX          array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element vector x.
+*           Unchanged on exit.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*  AP     - COMPLEX          array of DIMENSION at least
+*           ( ( n*( n + 1 ) )/2 ).
+*           Before entry with  UPLO = 'U' or 'u', the array AP must
+*           contain the upper triangular part of the hermitian matrix
+*           packed sequentially, column by column, so that AP( 1 )
+*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 )
+*           and a( 2, 2 ) respectively, and so on. On exit, the array
+*           AP is overwritten by the upper triangular part of the
+*           updated matrix.
+*           Before entry with UPLO = 'L' or 'l', the array AP must
+*           contain the lower triangular part of the hermitian matrix
+*           packed sequentially, column by column, so that AP( 1 )
+*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 )
+*           and a( 3, 1 ) respectively, and so on. On exit, the array
+*           AP is overwritten by the lower triangular part of the
+*           updated matrix.
+*           Note that the imaginary parts of the diagonal elements need
+*           not be set, they are assumed to be zero, and on exit they
+*           are set to zero.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      COMPLEX            ZERO
+      PARAMETER        ( ZERO = ( 0.0E+0, 0.0E+0 ) )
+*     .. Local Scalars ..
+      COMPLEX            TEMP
+      INTEGER            I, INFO, IX, J, JX, K, KK, KX
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          CONJG, REAL
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO, 'U' ).AND.
+     $         .NOT.LSAME( UPLO, 'L' )      )THEN
+         INFO = 1
+      ELSE IF( N.LT.0 )THEN
+         INFO = 2
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 5
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'CHPR  ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( N.EQ.0 ).OR.( ALPHA.EQ.REAL( ZERO ) ) )
+     $   RETURN
+*
+*     Set the start point in X if the increment is not unity.
+*
+      IF( INCX.LE.0 )THEN
+         KX = 1 - ( N - 1 )*INCX
+      ELSE IF( INCX.NE.1 )THEN
+         KX = 1
+      END IF
+*
+*     Start the operations. In this version the elements of the array AP
+*     are accessed sequentially with one pass through AP.
+*
+      KK = 1
+      IF( LSAME( UPLO, 'U' ) )THEN
+*
+*        Form  A  when upper triangle is stored in AP.
+*
+         IF( INCX.EQ.1 )THEN
+            DO 20, J = 1, N
+               IF( X( J ).NE.ZERO )THEN
+                  TEMP = ALPHA*CONJG( X( J ) )
+                  K    = KK
+                  DO 10, I = 1, J - 1
+                     AP( K ) = AP( K ) + X( I )*TEMP
+                     K       = K       + 1
+   10             CONTINUE
+                  AP( KK + J - 1 ) = REAL( AP( KK + J - 1 ) )
+     $                               + REAL( X( J )*TEMP )
+               ELSE
+                  AP( KK + J - 1 ) = REAL( AP( KK + J - 1 ) )
+               END IF
+               KK = KK + J
+   20       CONTINUE
+         ELSE
+            JX = KX
+            DO 40, J = 1, N
+               IF( X( JX ).NE.ZERO )THEN
+                  TEMP = ALPHA*CONJG( X( JX ) )
+                  IX   = KX
+                  DO 30, K = KK, KK + J - 2
+                     AP( K ) = AP( K ) + X( IX )*TEMP
+                     IX      = IX      + INCX
+   30             CONTINUE
+                  AP( KK + J - 1 ) = REAL( AP( KK + J - 1 ) )
+     $                               + REAL( X( JX )*TEMP )
+               ELSE
+                  AP( KK + J - 1 ) = REAL( AP( KK + J - 1 ) )
+               END IF
+               JX = JX + INCX
+               KK = KK + J
+   40       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  A  when lower triangle is stored in AP.
+*
+         IF( INCX.EQ.1 )THEN
+            DO 60, J = 1, N
+               IF( X( J ).NE.ZERO )THEN
+                  TEMP     = ALPHA*CONJG( X( J ) )
+                  AP( KK ) = REAL( AP( KK ) ) + REAL( TEMP*X( J ) )
+                  K        = KK               + 1
+                  DO 50, I = J + 1, N
+                     AP( K ) = AP( K ) + X( I )*TEMP
+                     K       = K       + 1
+   50             CONTINUE
+               ELSE
+                  AP( KK ) = REAL( AP( KK ) )
+               END IF
+               KK = KK + N - J + 1
+   60       CONTINUE
+         ELSE
+            JX = KX
+            DO 80, J = 1, N
+               IF( X( JX ).NE.ZERO )THEN
+                  TEMP    = ALPHA*CONJG( X( JX ) )
+                  AP( KK ) = REAL( AP( KK ) ) + REAL( TEMP*X( JX ) )
+                  IX      = JX
+                  DO 70, K = KK + 1, KK + N - J
+                     IX      = IX      + INCX
+                     AP( K ) = AP( K ) + X( IX )*TEMP
+   70             CONTINUE
+               ELSE
+                  AP( KK ) = REAL( AP( KK ) )
+               END IF
+               JX = JX + INCX
+               KK = KK + N - J + 1
+   80       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of CHPR  .
+*
+      END
diff --git a/costa/native/external/blas/chpr2.f b/costa/native/external/blas/chpr2.f
new file mode 100644
index 000000000..474d99b96
--- /dev/null
+++ b/costa/native/external/blas/chpr2.f
@@ -0,0 +1,251 @@
+      SUBROUTINE CHPR2 ( UPLO, N, ALPHA, X, INCX, Y, INCY, AP )
+*     .. Scalar Arguments ..
+      COMPLEX            ALPHA
+      INTEGER            INCX, INCY, N
+      CHARACTER*1        UPLO
+*     .. Array Arguments ..
+      COMPLEX            AP( * ), X( * ), Y( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CHPR2  performs the hermitian rank 2 operation
+*
+*     A := alpha*x*conjg( y' ) + conjg( alpha )*y*conjg( x' ) + A,
+*
+*  where alpha is a scalar, x and y are n element vectors and A is an
+*  n by n hermitian matrix, supplied in packed form.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the upper or lower
+*           triangular part of the matrix A is supplied in the packed
+*           array AP as follows:
+*
+*              UPLO = 'U' or 'u'   The upper triangular part of A is
+*                                  supplied in AP.
+*
+*              UPLO = 'L' or 'l'   The lower triangular part of A is
+*                                  supplied in AP.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - COMPLEX         .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  X      - COMPLEX          array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element vector x.
+*           Unchanged on exit.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*  Y      - COMPLEX          array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCY ) ).
+*           Before entry, the incremented array Y must contain the n
+*           element vector y.
+*           Unchanged on exit.
+*
+*  INCY   - INTEGER.
+*           On entry, INCY specifies the increment for the elements of
+*           Y. INCY must not be zero.
+*           Unchanged on exit.
+*
+*  AP     - COMPLEX          array of DIMENSION at least
+*           ( ( n*( n + 1 ) )/2 ).
+*           Before entry with  UPLO = 'U' or 'u', the array AP must
+*           contain the upper triangular part of the hermitian matrix
+*           packed sequentially, column by column, so that AP( 1 )
+*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 )
+*           and a( 2, 2 ) respectively, and so on. On exit, the array
+*           AP is overwritten by the upper triangular part of the
+*           updated matrix.
+*           Before entry with UPLO = 'L' or 'l', the array AP must
+*           contain the lower triangular part of the hermitian matrix
+*           packed sequentially, column by column, so that AP( 1 )
+*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 )
+*           and a( 3, 1 ) respectively, and so on. On exit, the array
+*           AP is overwritten by the lower triangular part of the
+*           updated matrix.
+*           Note that the imaginary parts of the diagonal elements need
+*           not be set, they are assumed to be zero, and on exit they
+*           are set to zero.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      COMPLEX            ZERO
+      PARAMETER        ( ZERO = ( 0.0E+0, 0.0E+0 ) )
+*     .. Local Scalars ..
+      COMPLEX            TEMP1, TEMP2
+      INTEGER            I, INFO, IX, IY, J, JX, JY, K, KK, KX, KY
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          CONJG, REAL
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO, 'U' ).AND.
+     $         .NOT.LSAME( UPLO, 'L' )      )THEN
+         INFO = 1
+      ELSE IF( N.LT.0 )THEN
+         INFO = 2
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 5
+      ELSE IF( INCY.EQ.0 )THEN
+         INFO = 7
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'CHPR2 ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( N.EQ.0 ).OR.( ALPHA.EQ.ZERO ) )
+     $   RETURN
+*
+*     Set up the start points in X and Y if the increments are not both
+*     unity.
+*
+      IF( ( INCX.NE.1 ).OR.( INCY.NE.1 ) )THEN
+         IF( INCX.GT.0 )THEN
+            KX = 1
+         ELSE
+            KX = 1 - ( N - 1 )*INCX
+         END IF
+         IF( INCY.GT.0 )THEN
+            KY = 1
+         ELSE
+            KY = 1 - ( N - 1 )*INCY
+         END IF
+         JX = KX
+         JY = KY
+      END IF
+*
+*     Start the operations. In this version the elements of the array AP
+*     are accessed sequentially with one pass through AP.
+*
+      KK = 1
+      IF( LSAME( UPLO, 'U' ) )THEN
+*
+*        Form  A  when upper triangle is stored in AP.
+*
+         IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN
+            DO 20, J = 1, N
+               IF( ( X( J ).NE.ZERO ).OR.( Y( J ).NE.ZERO ) )THEN
+                  TEMP1 = ALPHA*CONJG( Y( J ) )
+                  TEMP2 = CONJG( ALPHA*X( J ) )
+                  K     = KK
+                  DO 10, I = 1, J - 1
+                     AP( K ) = AP( K ) + X( I )*TEMP1 + Y( I )*TEMP2
+                     K       = K       + 1
+   10             CONTINUE
+                  AP( KK + J - 1 ) = REAL( AP( KK + J - 1 ) ) +
+     $                               REAL( X( J )*TEMP1 + Y( J )*TEMP2 )
+               ELSE
+                  AP( KK + J - 1 ) = REAL( AP( KK + J - 1 ) )
+               END IF
+               KK = KK + J
+   20       CONTINUE
+         ELSE
+            DO 40, J = 1, N
+               IF( ( X( JX ).NE.ZERO ).OR.( Y( JY ).NE.ZERO ) )THEN
+                  TEMP1 = ALPHA*CONJG( Y( JY ) )
+                  TEMP2 = CONJG( ALPHA*X( JX ) )
+                  IX    = KX
+                  IY    = KY
+                  DO 30, K = KK, KK + J - 2
+                     AP( K ) = AP( K ) + X( IX )*TEMP1 + Y( IY )*TEMP2
+                     IX      = IX      + INCX
+                     IY      = IY      + INCY
+   30             CONTINUE
+                  AP( KK + J - 1 ) = REAL( AP( KK + J - 1 ) ) +
+     $                               REAL( X( JX )*TEMP1 +
+     $                                     Y( JY )*TEMP2 )
+               ELSE
+                  AP( KK + J - 1 ) = REAL( AP( KK + J - 1 ) )
+               END IF
+               JX = JX + INCX
+               JY = JY + INCY
+               KK = KK + J
+   40       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  A  when lower triangle is stored in AP.
+*
+         IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN
+            DO 60, J = 1, N
+               IF( ( X( J ).NE.ZERO ).OR.( Y( J ).NE.ZERO ) )THEN
+                  TEMP1   = ALPHA*CONJG( Y( J ) )
+                  TEMP2   = CONJG( ALPHA*X( J ) )
+                  AP( KK ) = REAL( AP( KK ) ) +
+     $                       REAL( X( J )*TEMP1 + Y( J )*TEMP2 )
+                  K        = KK               + 1
+                  DO 50, I = J + 1, N
+                     AP( K ) = AP( K ) + X( I )*TEMP1 + Y( I )*TEMP2
+                     K       = K       + 1
+   50             CONTINUE
+               ELSE
+                  AP( KK ) = REAL( AP( KK ) )
+               END IF
+               KK = KK + N - J + 1
+   60       CONTINUE
+         ELSE
+            DO 80, J = 1, N
+               IF( ( X( JX ).NE.ZERO ).OR.( Y( JY ).NE.ZERO ) )THEN
+                  TEMP1    = ALPHA*CONJG( Y( JY ) )
+                  TEMP2    = CONJG( ALPHA*X( JX ) )
+                  AP( KK ) = REAL( AP( KK ) ) +
+     $                       REAL( X( JX )*TEMP1 + Y( JY )*TEMP2 )
+                  IX       = JX
+                  IY       = JY
+                  DO 70, K = KK + 1, KK + N - J
+                     IX      = IX      + INCX
+                     IY      = IY      + INCY
+                     AP( K ) = AP( K ) + X( IX )*TEMP1 + Y( IY )*TEMP2
+   70             CONTINUE
+               ELSE
+                  AP( KK ) = REAL( AP( KK ) )
+               END IF
+               JX = JX + INCX
+               JY = JY + INCY
+               KK = KK + N - J + 1
+   80       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of CHPR2 .
+*
+      END
diff --git a/costa/native/external/blas/crotg.f b/costa/native/external/blas/crotg.f
new file mode 100644
index 000000000..659807950
--- /dev/null
+++ b/costa/native/external/blas/crotg.f
@@ -0,0 +1,20 @@
+      subroutine crotg(ca,cb,c,s)
+      complex ca,cb,s
+      real c
+      real norm,scale
+      complex alpha
+      if (cabs(ca) .ne. 0.) go to 10
+         c = 0.
+         s = (1.,0.)
+         ca = cb
+         go to 20
+   10 continue
+         scale = cabs(ca) + cabs(cb)
+         norm = scale * sqrt((cabs(ca/scale))**2 + (cabs(cb/scale))**2)
+         alpha = ca /cabs(ca)
+         c = cabs(ca) / norm
+         s = alpha * conjg(cb) / norm
+         ca = alpha * norm
+   20 continue
+      return
+      end
diff --git a/costa/native/external/blas/cscal.f b/costa/native/external/blas/cscal.f
new file mode 100644
index 000000000..56eeebac8
--- /dev/null
+++ b/costa/native/external/blas/cscal.f
@@ -0,0 +1,28 @@
+      subroutine  cscal(n,ca,cx,incx)
+c
+c     scales a vector by a constant.
+c     jack dongarra, linpack,  3/11/78.
+c     modified 3/93 to return if incx .le. 0.
+c     modified 12/3/93, array(1) declarations changed to array(*)
+c
+      complex ca,cx(*)
+      integer i,incx,n,nincx
+c
+      if( n.le.0 .or. incx.le.0 )return
+      if(incx.eq.1)go to 20
+c
+c        code for increment not equal to 1
+c
+      nincx = n*incx
+      do 10 i = 1,nincx,incx
+        cx(i) = ca*cx(i)
+   10 continue
+      return
+c
+c        code for increment equal to 1
+c
+   20 do 30 i = 1,n
+        cx(i) = ca*cx(i)
+   30 continue
+      return
+      end
diff --git a/costa/native/external/blas/csscal.f b/costa/native/external/blas/csscal.f
new file mode 100644
index 000000000..edd7e5554
--- /dev/null
+++ b/costa/native/external/blas/csscal.f
@@ -0,0 +1,29 @@
+      subroutine  csscal(n,sa,cx,incx)
+c
+c     scales a complex vector by a real constant.
+c     jack dongarra, linpack, 3/11/78.
+c     modified 3/93 to return if incx .le. 0.
+c     modified 12/3/93, array(1) declarations changed to array(*)
+c
+      complex cx(*)
+      real sa
+      integer i,incx,n,nincx
+c
+      if( n.le.0 .or. incx.le.0 )return
+      if(incx.eq.1)go to 20
+c
+c        code for increment not equal to 1
+c
+      nincx = n*incx
+      do 10 i = 1,nincx,incx
+        cx(i) = cmplx(sa*real(cx(i)),sa*aimag(cx(i)))
+   10 continue
+      return
+c
+c        code for increment equal to 1
+c
+   20 do 30 i = 1,n
+        cx(i) = cmplx(sa*real(cx(i)),sa*aimag(cx(i)))
+   30 continue
+      return
+      end
diff --git a/costa/native/external/blas/cswap.f b/costa/native/external/blas/cswap.f
new file mode 100644
index 000000000..ede4495f6
--- /dev/null
+++ b/costa/native/external/blas/cswap.f
@@ -0,0 +1,36 @@
+      subroutine  cswap (n,cx,incx,cy,incy)
+c
+c     interchanges two vectors.
+c     jack dongarra, linpack, 3/11/78.
+c     modified 12/3/93, array(1) declarations changed to array(*)
+c
+      complex cx(*),cy(*),ctemp
+      integer i,incx,incy,ix,iy,n
+c
+      if(n.le.0)return
+      if(incx.eq.1.and.incy.eq.1)go to 20
+c
+c       code for unequal increments or equal increments not equal
+c         to 1
+c
+      ix = 1
+      iy = 1
+      if(incx.lt.0)ix = (-n+1)*incx + 1
+      if(incy.lt.0)iy = (-n+1)*incy + 1
+      do 10 i = 1,n
+        ctemp = cx(ix)
+        cx(ix) = cy(iy)
+        cy(iy) = ctemp
+        ix = ix + incx
+        iy = iy + incy
+   10 continue
+      return
+c
+c       code for both increments equal to 1
+   20 do 30 i = 1,n
+        ctemp = cx(i)
+        cx(i) = cy(i)
+        cy(i) = ctemp
+   30 continue
+      return
+      end
diff --git a/costa/native/external/blas/csymm.f b/costa/native/external/blas/csymm.f
new file mode 100644
index 000000000..f4eda9017
--- /dev/null
+++ b/costa/native/external/blas/csymm.f
@@ -0,0 +1,296 @@
+      SUBROUTINE CSYMM ( SIDE, UPLO, M, N, ALPHA, A, LDA, B, LDB,
+     $                   BETA, C, LDC )
+*     .. Scalar Arguments ..
+      CHARACTER*1        SIDE, UPLO
+      INTEGER            M, N, LDA, LDB, LDC
+      COMPLEX            ALPHA, BETA
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), B( LDB, * ), C( LDC, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CSYMM  performs one of the matrix-matrix operations
+*
+*     C := alpha*A*B + beta*C,
+*
+*  or
+*
+*     C := alpha*B*A + beta*C,
+*
+*  where  alpha and beta are scalars, A is a symmetric matrix and  B and
+*  C are m by n matrices.
+*
+*  Parameters
+*  ==========
+*
+*  SIDE   - CHARACTER*1.
+*           On entry,  SIDE  specifies whether  the  symmetric matrix  A
+*           appears on the  left or right  in the  operation as follows:
+*
+*              SIDE = 'L' or 'l'   C := alpha*A*B + beta*C,
+*
+*              SIDE = 'R' or 'r'   C := alpha*B*A + beta*C,
+*
+*           Unchanged on exit.
+*
+*  UPLO   - CHARACTER*1.
+*           On  entry,   UPLO  specifies  whether  the  upper  or  lower
+*           triangular  part  of  the  symmetric  matrix   A  is  to  be
+*           referenced as follows:
+*
+*              UPLO = 'U' or 'u'   Only the upper triangular part of the
+*                                  symmetric matrix is to be referenced.
+*
+*              UPLO = 'L' or 'l'   Only the lower triangular part of the
+*                                  symmetric matrix is to be referenced.
+*
+*           Unchanged on exit.
+*
+*  M      - INTEGER.
+*           On entry,  M  specifies the number of rows of the matrix  C.
+*           M  must be at least zero.
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the number of columns of the matrix C.
+*           N  must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - COMPLEX         .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  A      - COMPLEX          array of DIMENSION ( LDA, ka ), where ka is
+*           m  when  SIDE = 'L' or 'l'  and is n  otherwise.
+*           Before entry  with  SIDE = 'L' or 'l',  the  m by m  part of
+*           the array  A  must contain the  symmetric matrix,  such that
+*           when  UPLO = 'U' or 'u', the leading m by m upper triangular
+*           part of the array  A  must contain the upper triangular part
+*           of the  symmetric matrix and the  strictly  lower triangular
+*           part of  A  is not referenced,  and when  UPLO = 'L' or 'l',
+*           the leading  m by m  lower triangular part  of the  array  A
+*           must  contain  the  lower triangular part  of the  symmetric
+*           matrix and the  strictly upper triangular part of  A  is not
+*           referenced.
+*           Before entry  with  SIDE = 'R' or 'r',  the  n by n  part of
+*           the array  A  must contain the  symmetric matrix,  such that
+*           when  UPLO = 'U' or 'u', the leading n by n upper triangular
+*           part of the array  A  must contain the upper triangular part
+*           of the  symmetric matrix and the  strictly  lower triangular
+*           part of  A  is not referenced,  and when  UPLO = 'L' or 'l',
+*           the leading  n by n  lower triangular part  of the  array  A
+*           must  contain  the  lower triangular part  of the  symmetric
+*           matrix and the  strictly upper triangular part of  A  is not
+*           referenced.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the  calling (sub) program. When  SIDE = 'L' or 'l'  then
+*           LDA must be at least  max( 1, m ), otherwise  LDA must be at
+*           least max( 1, n ).
+*           Unchanged on exit.
+*
+*  B      - COMPLEX          array of DIMENSION ( LDB, n ).
+*           Before entry, the leading  m by n part of the array  B  must
+*           contain the matrix B.
+*           Unchanged on exit.
+*
+*  LDB    - INTEGER.
+*           On entry, LDB specifies the first dimension of B as declared
+*           in  the  calling  (sub)  program.   LDB  must  be  at  least
+*           max( 1, m ).
+*           Unchanged on exit.
+*
+*  BETA   - COMPLEX         .
+*           On entry,  BETA  specifies the scalar  beta.  When  BETA  is
+*           supplied as zero then C need not be set on input.
+*           Unchanged on exit.
+*
+*  C      - COMPLEX          array of DIMENSION ( LDC, n ).
+*           Before entry, the leading  m by n  part of the array  C must
+*           contain the matrix  C,  except when  beta  is zero, in which
+*           case C need not be set on entry.
+*           On exit, the array  C  is overwritten by the  m by n updated
+*           matrix.
+*
+*  LDC    - INTEGER.
+*           On entry, LDC specifies the first dimension of C as declared
+*           in  the  calling  (sub)  program.   LDC  must  be  at  least
+*           max( 1, m ).
+*           Unchanged on exit.
+*
+*
+*  Level 3 Blas routine.
+*
+*  -- Written on 8-February-1989.
+*     Jack Dongarra, Argonne National Laboratory.
+*     Iain Duff, AERE Harwell.
+*     Jeremy Du Croz, Numerical Algorithms Group Ltd.
+*     Sven Hammarling, Numerical Algorithms Group Ltd.
+*
+*
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            I, INFO, J, K, NROWA
+      COMPLEX            TEMP1, TEMP2
+*     .. Parameters ..
+      COMPLEX            ONE
+      PARAMETER        ( ONE  = ( 1.0E+0, 0.0E+0 ) )
+      COMPLEX            ZERO
+      PARAMETER        ( ZERO = ( 0.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Set NROWA as the number of rows of A.
+*
+      IF( LSAME( SIDE, 'L' ) )THEN
+         NROWA = M
+      ELSE
+         NROWA = N
+      END IF
+      UPPER = LSAME( UPLO, 'U' )
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF(      ( .NOT.LSAME( SIDE, 'L' ) ).AND.
+     $         ( .NOT.LSAME( SIDE, 'R' ) )      )THEN
+         INFO = 1
+      ELSE IF( ( .NOT.UPPER              ).AND.
+     $         ( .NOT.LSAME( UPLO, 'L' ) )      )THEN
+         INFO = 2
+      ELSE IF( M  .LT.0               )THEN
+         INFO = 3
+      ELSE IF( N  .LT.0               )THEN
+         INFO = 4
+      ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN
+         INFO = 7
+      ELSE IF( LDB.LT.MAX( 1, M     ) )THEN
+         INFO = 9
+      ELSE IF( LDC.LT.MAX( 1, M     ) )THEN
+         INFO = 12
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'CSYMM ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR.
+     $    ( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) )
+     $   RETURN
+*
+*     And when  alpha.eq.zero.
+*
+      IF( ALPHA.EQ.ZERO )THEN
+         IF( BETA.EQ.ZERO )THEN
+            DO 20, J = 1, N
+               DO 10, I = 1, M
+                  C( I, J ) = ZERO
+   10          CONTINUE
+   20       CONTINUE
+         ELSE
+            DO 40, J = 1, N
+               DO 30, I = 1, M
+                  C( I, J ) = BETA*C( I, J )
+   30          CONTINUE
+   40       CONTINUE
+         END IF
+         RETURN
+      END IF
+*
+*     Start the operations.
+*
+      IF( LSAME( SIDE, 'L' ) )THEN
+*
+*        Form  C := alpha*A*B + beta*C.
+*
+         IF( UPPER )THEN
+            DO 70, J = 1, N
+               DO 60, I = 1, M
+                  TEMP1 = ALPHA*B( I, J )
+                  TEMP2 = ZERO
+                  DO 50, K = 1, I - 1
+                     C( K, J ) = C( K, J ) + TEMP1    *A( K, I )
+                     TEMP2     = TEMP2     + B( K, J )*A( K, I )
+   50             CONTINUE
+                  IF( BETA.EQ.ZERO )THEN
+                     C( I, J ) = TEMP1*A( I, I ) + ALPHA*TEMP2
+                  ELSE
+                     C( I, J ) = BETA *C( I, J ) +
+     $                           TEMP1*A( I, I ) + ALPHA*TEMP2
+                  END IF
+   60          CONTINUE
+   70       CONTINUE
+         ELSE
+            DO 100, J = 1, N
+               DO 90, I = M, 1, -1
+                  TEMP1 = ALPHA*B( I, J )
+                  TEMP2 = ZERO
+                  DO 80, K = I + 1, M
+                     C( K, J ) = C( K, J ) + TEMP1    *A( K, I )
+                     TEMP2     = TEMP2     + B( K, J )*A( K, I )
+   80             CONTINUE
+                  IF( BETA.EQ.ZERO )THEN
+                     C( I, J ) = TEMP1*A( I, I ) + ALPHA*TEMP2
+                  ELSE
+                     C( I, J ) = BETA *C( I, J ) +
+     $                           TEMP1*A( I, I ) + ALPHA*TEMP2
+                  END IF
+   90          CONTINUE
+  100       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  C := alpha*B*A + beta*C.
+*
+         DO 170, J = 1, N
+            TEMP1 = ALPHA*A( J, J )
+            IF( BETA.EQ.ZERO )THEN
+               DO 110, I = 1, M
+                  C( I, J ) = TEMP1*B( I, J )
+  110          CONTINUE
+            ELSE
+               DO 120, I = 1, M
+                  C( I, J ) = BETA*C( I, J ) + TEMP1*B( I, J )
+  120          CONTINUE
+            END IF
+            DO 140, K = 1, J - 1
+               IF( UPPER )THEN
+                  TEMP1 = ALPHA*A( K, J )
+               ELSE
+                  TEMP1 = ALPHA*A( J, K )
+               END IF
+               DO 130, I = 1, M
+                  C( I, J ) = C( I, J ) + TEMP1*B( I, K )
+  130          CONTINUE
+  140       CONTINUE
+            DO 160, K = J + 1, N
+               IF( UPPER )THEN
+                  TEMP1 = ALPHA*A( J, K )
+               ELSE
+                  TEMP1 = ALPHA*A( K, J )
+               END IF
+               DO 150, I = 1, M
+                  C( I, J ) = C( I, J ) + TEMP1*B( I, K )
+  150          CONTINUE
+  160       CONTINUE
+  170    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of CSYMM .
+*
+      END
diff --git a/costa/native/external/blas/csyr2k.f b/costa/native/external/blas/csyr2k.f
new file mode 100644
index 000000000..eb3861077
--- /dev/null
+++ b/costa/native/external/blas/csyr2k.f
@@ -0,0 +1,324 @@
+      SUBROUTINE CSYR2K( UPLO, TRANS, N, K, ALPHA, A, LDA, B, LDB,
+     $                   BETA, C, LDC )
+*     .. Scalar Arguments ..
+      CHARACTER*1        UPLO, TRANS
+      INTEGER            N, K, LDA, LDB, LDC
+      COMPLEX            ALPHA, BETA
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), B( LDB, * ), C( LDC, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CSYR2K  performs one of the symmetric rank 2k operations
+*
+*     C := alpha*A*B' + alpha*B*A' + beta*C,
+*
+*  or
+*
+*     C := alpha*A'*B + alpha*B'*A + beta*C,
+*
+*  where  alpha and beta  are scalars,  C is an  n by n symmetric matrix
+*  and  A and B  are  n by k  matrices  in the  first  case  and  k by n
+*  matrices in the second case.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On  entry,   UPLO  specifies  whether  the  upper  or  lower
+*           triangular  part  of the  array  C  is to be  referenced  as
+*           follows:
+*
+*              UPLO = 'U' or 'u'   Only the  upper triangular part of  C
+*                                  is to be referenced.
+*
+*              UPLO = 'L' or 'l'   Only the  lower triangular part of  C
+*                                  is to be referenced.
+*
+*           Unchanged on exit.
+*
+*  TRANS  - CHARACTER*1.
+*           On entry,  TRANS  specifies the operation to be performed as
+*           follows:
+*
+*              TRANS = 'N' or 'n'    C := alpha*A*B' + alpha*B*A' +
+*                                         beta*C.
+*
+*              TRANS = 'T' or 't'    C := alpha*A'*B + alpha*B'*A +
+*                                         beta*C.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry,  N specifies the order of the matrix C.  N must be
+*           at least zero.
+*           Unchanged on exit.
+*
+*  K      - INTEGER.
+*           On entry with  TRANS = 'N' or 'n',  K  specifies  the number
+*           of  columns  of the  matrices  A and B,  and on  entry  with
+*           TRANS = 'T' or 't',  K  specifies  the number of rows of the
+*           matrices  A and B.  K must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - COMPLEX         .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  A      - COMPLEX          array of DIMENSION ( LDA, ka ), where ka is
+*           k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
+*           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
+*           part of the array  A  must contain the matrix  A,  otherwise
+*           the leading  k by n  part of the array  A  must contain  the
+*           matrix A.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
+*           then  LDA must be at least  max( 1, n ), otherwise  LDA must
+*           be at least  max( 1, k ).
+*           Unchanged on exit.
+*
+*  B      - COMPLEX          array of DIMENSION ( LDB, kb ), where kb is
+*           k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
+*           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
+*           part of the array  B  must contain the matrix  B,  otherwise
+*           the leading  k by n  part of the array  B  must contain  the
+*           matrix B.
+*           Unchanged on exit.
+*
+*  LDB    - INTEGER.
+*           On entry, LDB specifies the first dimension of B as declared
+*           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
+*           then  LDB must be at least  max( 1, n ), otherwise  LDB must
+*           be at least  max( 1, k ).
+*           Unchanged on exit.
+*
+*  BETA   - COMPLEX         .
+*           On entry, BETA specifies the scalar beta.
+*           Unchanged on exit.
+*
+*  C      - COMPLEX          array of DIMENSION ( LDC, n ).
+*           Before entry  with  UPLO = 'U' or 'u',  the leading  n by n
+*           upper triangular part of the array C must contain the upper
+*           triangular part  of the  symmetric matrix  and the strictly
+*           lower triangular part of C is not referenced.  On exit, the
+*           upper triangular part of the array  C is overwritten by the
+*           upper triangular part of the updated matrix.
+*           Before entry  with  UPLO = 'L' or 'l',  the leading  n by n
+*           lower triangular part of the array C must contain the lower
+*           triangular part  of the  symmetric matrix  and the strictly
+*           upper triangular part of C is not referenced.  On exit, the
+*           lower triangular part of the array  C is overwritten by the
+*           lower triangular part of the updated matrix.
+*
+*  LDC    - INTEGER.
+*           On entry, LDC specifies the first dimension of C as declared
+*           in  the  calling  (sub)  program.   LDC  must  be  at  least
+*           max( 1, n ).
+*           Unchanged on exit.
+*
+*
+*  Level 3 Blas routine.
+*
+*  -- Written on 8-February-1989.
+*     Jack Dongarra, Argonne National Laboratory.
+*     Iain Duff, AERE Harwell.
+*     Jeremy Du Croz, Numerical Algorithms Group Ltd.
+*     Sven Hammarling, Numerical Algorithms Group Ltd.
+*
+*
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            I, INFO, J, L, NROWA
+      COMPLEX            TEMP1, TEMP2
+*     .. Parameters ..
+      COMPLEX            ONE
+      PARAMETER        ( ONE  = ( 1.0E+0, 0.0E+0 ) )
+      COMPLEX            ZERO
+      PARAMETER        ( ZERO = ( 0.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+         NROWA = N
+      ELSE
+         NROWA = K
+      END IF
+      UPPER = LSAME( UPLO, 'U' )
+*
+      INFO = 0
+      IF(      ( .NOT.UPPER               ).AND.
+     $         ( .NOT.LSAME( UPLO , 'L' ) )      )THEN
+         INFO = 1
+      ELSE IF( ( .NOT.LSAME( TRANS, 'N' ) ).AND.
+     $         ( .NOT.LSAME( TRANS, 'T' ) )      )THEN
+         INFO = 2
+      ELSE IF( N  .LT.0               )THEN
+         INFO = 3
+      ELSE IF( K  .LT.0               )THEN
+         INFO = 4
+      ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN
+         INFO = 7
+      ELSE IF( LDB.LT.MAX( 1, NROWA ) )THEN
+         INFO = 9
+      ELSE IF( LDC.LT.MAX( 1, N     ) )THEN
+         INFO = 12
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'CSYR2K', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( N.EQ.0 ).OR.
+     $    ( ( ( ALPHA.EQ.ZERO ).OR.( K.EQ.0 ) ).AND.( BETA.EQ.ONE ) ) )
+     $   RETURN
+*
+*     And when  alpha.eq.zero.
+*
+      IF( ALPHA.EQ.ZERO )THEN
+         IF( UPPER )THEN
+            IF( BETA.EQ.ZERO )THEN
+               DO 20, J = 1, N
+                  DO 10, I = 1, J
+                     C( I, J ) = ZERO
+   10             CONTINUE
+   20          CONTINUE
+            ELSE
+               DO 40, J = 1, N
+                  DO 30, I = 1, J
+                     C( I, J ) = BETA*C( I, J )
+   30             CONTINUE
+   40          CONTINUE
+            END IF
+         ELSE
+            IF( BETA.EQ.ZERO )THEN
+               DO 60, J = 1, N
+                  DO 50, I = J, N
+                     C( I, J ) = ZERO
+   50             CONTINUE
+   60          CONTINUE
+            ELSE
+               DO 80, J = 1, N
+                  DO 70, I = J, N
+                     C( I, J ) = BETA*C( I, J )
+   70             CONTINUE
+   80          CONTINUE
+            END IF
+         END IF
+         RETURN
+      END IF
+*
+*     Start the operations.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+*
+*        Form  C := alpha*A*B' + alpha*B*A' + C.
+*
+         IF( UPPER )THEN
+            DO 130, J = 1, N
+               IF( BETA.EQ.ZERO )THEN
+                  DO 90, I = 1, J
+                     C( I, J ) = ZERO
+   90             CONTINUE
+               ELSE IF( BETA.NE.ONE )THEN
+                  DO 100, I = 1, J
+                     C( I, J ) = BETA*C( I, J )
+  100             CONTINUE
+               END IF
+               DO 120, L = 1, K
+                  IF( ( A( J, L ).NE.ZERO ).OR.
+     $                ( B( J, L ).NE.ZERO )     )THEN
+                     TEMP1 = ALPHA*B( J, L )
+                     TEMP2 = ALPHA*A( J, L )
+                     DO 110, I = 1, J
+                        C( I, J ) = C( I, J ) + A( I, L )*TEMP1 +
+     $                                          B( I, L )*TEMP2
+  110                CONTINUE
+                  END IF
+  120          CONTINUE
+  130       CONTINUE
+         ELSE
+            DO 180, J = 1, N
+               IF( BETA.EQ.ZERO )THEN
+                  DO 140, I = J, N
+                     C( I, J ) = ZERO
+  140             CONTINUE
+               ELSE IF( BETA.NE.ONE )THEN
+                  DO 150, I = J, N
+                     C( I, J ) = BETA*C( I, J )
+  150             CONTINUE
+               END IF
+               DO 170, L = 1, K
+                  IF( ( A( J, L ).NE.ZERO ).OR.
+     $                ( B( J, L ).NE.ZERO )     )THEN
+                     TEMP1 = ALPHA*B( J, L )
+                     TEMP2 = ALPHA*A( J, L )
+                     DO 160, I = J, N
+                        C( I, J ) = C( I, J ) + A( I, L )*TEMP1 +
+     $                                          B( I, L )*TEMP2
+  160                CONTINUE
+                  END IF
+  170          CONTINUE
+  180       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  C := alpha*A'*B + alpha*B'*A + C.
+*
+         IF( UPPER )THEN
+            DO 210, J = 1, N
+               DO 200, I = 1, J
+                  TEMP1 = ZERO
+                  TEMP2 = ZERO
+                  DO 190, L = 1, K
+                     TEMP1 = TEMP1 + A( L, I )*B( L, J )
+                     TEMP2 = TEMP2 + B( L, I )*A( L, J )
+  190             CONTINUE
+                  IF( BETA.EQ.ZERO )THEN
+                     C( I, J ) = ALPHA*TEMP1 + ALPHA*TEMP2
+                  ELSE
+                     C( I, J ) = BETA *C( I, J ) +
+     $                           ALPHA*TEMP1 + ALPHA*TEMP2
+                  END IF
+  200          CONTINUE
+  210       CONTINUE
+         ELSE
+            DO 240, J = 1, N
+               DO 230, I = J, N
+                  TEMP1 = ZERO
+                  TEMP2 = ZERO
+                  DO 220, L = 1, K
+                     TEMP1 = TEMP1 + A( L, I )*B( L, J )
+                     TEMP2 = TEMP2 + B( L, I )*A( L, J )
+  220             CONTINUE
+                  IF( BETA.EQ.ZERO )THEN
+                     C( I, J ) = ALPHA*TEMP1 + ALPHA*TEMP2
+                  ELSE
+                     C( I, J ) = BETA *C( I, J ) +
+     $                           ALPHA*TEMP1 + ALPHA*TEMP2
+                  END IF
+  230          CONTINUE
+  240       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of CSYR2K.
+*
+      END
diff --git a/costa/native/external/blas/csyrk.f b/costa/native/external/blas/csyrk.f
new file mode 100644
index 000000000..d76bce052
--- /dev/null
+++ b/costa/native/external/blas/csyrk.f
@@ -0,0 +1,293 @@
+      SUBROUTINE CSYRK ( UPLO, TRANS, N, K, ALPHA, A, LDA,
+     $                   BETA, C, LDC )
+*     .. Scalar Arguments ..
+      CHARACTER*1        UPLO, TRANS
+      INTEGER            N, K, LDA, LDC
+      COMPLEX            ALPHA, BETA
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), C( LDC, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CSYRK  performs one of the symmetric rank k operations
+*
+*     C := alpha*A*A' + beta*C,
+*
+*  or
+*
+*     C := alpha*A'*A + beta*C,
+*
+*  where  alpha and beta  are scalars,  C is an  n by n symmetric matrix
+*  and  A  is an  n by k  matrix in the first case and a  k by n  matrix
+*  in the second case.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On  entry,   UPLO  specifies  whether  the  upper  or  lower
+*           triangular  part  of the  array  C  is to be  referenced  as
+*           follows:
+*
+*              UPLO = 'U' or 'u'   Only the  upper triangular part of  C
+*                                  is to be referenced.
+*
+*              UPLO = 'L' or 'l'   Only the  lower triangular part of  C
+*                                  is to be referenced.
+*
+*           Unchanged on exit.
+*
+*  TRANS  - CHARACTER*1.
+*           On entry,  TRANS  specifies the operation to be performed as
+*           follows:
+*
+*              TRANS = 'N' or 'n'   C := alpha*A*A' + beta*C.
+*
+*              TRANS = 'T' or 't'   C := alpha*A'*A + beta*C.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry,  N specifies the order of the matrix C.  N must be
+*           at least zero.
+*           Unchanged on exit.
+*
+*  K      - INTEGER.
+*           On entry with  TRANS = 'N' or 'n',  K  specifies  the number
+*           of  columns   of  the   matrix   A,   and  on   entry   with
+*           TRANS = 'T' or 't',  K  specifies  the number of rows of the
+*           matrix A.  K must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - COMPLEX         .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  A      - COMPLEX          array of DIMENSION ( LDA, ka ), where ka is
+*           k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
+*           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
+*           part of the array  A  must contain the matrix  A,  otherwise
+*           the leading  k by n  part of the array  A  must contain  the
+*           matrix A.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
+*           then  LDA must be at least  max( 1, n ), otherwise  LDA must
+*           be at least  max( 1, k ).
+*           Unchanged on exit.
+*
+*  BETA   - COMPLEX         .
+*           On entry, BETA specifies the scalar beta.
+*           Unchanged on exit.
+*
+*  C      - COMPLEX          array of DIMENSION ( LDC, n ).
+*           Before entry  with  UPLO = 'U' or 'u',  the leading  n by n
+*           upper triangular part of the array C must contain the upper
+*           triangular part  of the  symmetric matrix  and the strictly
+*           lower triangular part of C is not referenced.  On exit, the
+*           upper triangular part of the array  C is overwritten by the
+*           upper triangular part of the updated matrix.
+*           Before entry  with  UPLO = 'L' or 'l',  the leading  n by n
+*           lower triangular part of the array C must contain the lower
+*           triangular part  of the  symmetric matrix  and the strictly
+*           upper triangular part of C is not referenced.  On exit, the
+*           lower triangular part of the array  C is overwritten by the
+*           lower triangular part of the updated matrix.
+*
+*  LDC    - INTEGER.
+*           On entry, LDC specifies the first dimension of C as declared
+*           in  the  calling  (sub)  program.   LDC  must  be  at  least
+*           max( 1, n ).
+*           Unchanged on exit.
+*
+*
+*  Level 3 Blas routine.
+*
+*  -- Written on 8-February-1989.
+*     Jack Dongarra, Argonne National Laboratory.
+*     Iain Duff, AERE Harwell.
+*     Jeremy Du Croz, Numerical Algorithms Group Ltd.
+*     Sven Hammarling, Numerical Algorithms Group Ltd.
+*
+*
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            I, INFO, J, L, NROWA
+      COMPLEX            TEMP
+*     .. Parameters ..
+      COMPLEX            ONE
+      PARAMETER        ( ONE  = ( 1.0E+0, 0.0E+0 ) )
+      COMPLEX            ZERO
+      PARAMETER        ( ZERO = ( 0.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+         NROWA = N
+      ELSE
+         NROWA = K
+      END IF
+      UPPER = LSAME( UPLO, 'U' )
+*
+      INFO = 0
+      IF(      ( .NOT.UPPER               ).AND.
+     $         ( .NOT.LSAME( UPLO , 'L' ) )      )THEN
+         INFO = 1
+      ELSE IF( ( .NOT.LSAME( TRANS, 'N' ) ).AND.
+     $         ( .NOT.LSAME( TRANS, 'T' ) )      )THEN
+         INFO = 2
+      ELSE IF( N  .LT.0               )THEN
+         INFO = 3
+      ELSE IF( K  .LT.0               )THEN
+         INFO = 4
+      ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN
+         INFO = 7
+      ELSE IF( LDC.LT.MAX( 1, N     ) )THEN
+         INFO = 10
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'CSYRK ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( N.EQ.0 ).OR.
+     $    ( ( ( ALPHA.EQ.ZERO ).OR.( K.EQ.0 ) ).AND.( BETA.EQ.ONE ) ) )
+     $   RETURN
+*
+*     And when  alpha.eq.zero.
+*
+      IF( ALPHA.EQ.ZERO )THEN
+         IF( UPPER )THEN
+            IF( BETA.EQ.ZERO )THEN
+               DO 20, J = 1, N
+                  DO 10, I = 1, J
+                     C( I, J ) = ZERO
+   10             CONTINUE
+   20          CONTINUE
+            ELSE
+               DO 40, J = 1, N
+                  DO 30, I = 1, J
+                     C( I, J ) = BETA*C( I, J )
+   30             CONTINUE
+   40          CONTINUE
+            END IF
+         ELSE
+            IF( BETA.EQ.ZERO )THEN
+               DO 60, J = 1, N
+                  DO 50, I = J, N
+                     C( I, J ) = ZERO
+   50             CONTINUE
+   60          CONTINUE
+            ELSE
+               DO 80, J = 1, N
+                  DO 70, I = J, N
+                     C( I, J ) = BETA*C( I, J )
+   70             CONTINUE
+   80          CONTINUE
+            END IF
+         END IF
+         RETURN
+      END IF
+*
+*     Start the operations.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+*
+*        Form  C := alpha*A*A' + beta*C.
+*
+         IF( UPPER )THEN
+            DO 130, J = 1, N
+               IF( BETA.EQ.ZERO )THEN
+                  DO 90, I = 1, J
+                     C( I, J ) = ZERO
+   90             CONTINUE
+               ELSE IF( BETA.NE.ONE )THEN
+                  DO 100, I = 1, J
+                     C( I, J ) = BETA*C( I, J )
+  100             CONTINUE
+               END IF
+               DO 120, L = 1, K
+                  IF( A( J, L ).NE.ZERO )THEN
+                     TEMP = ALPHA*A( J, L )
+                     DO 110, I = 1, J
+                        C( I, J ) = C( I, J ) + TEMP*A( I, L )
+  110                CONTINUE
+                  END IF
+  120          CONTINUE
+  130       CONTINUE
+         ELSE
+            DO 180, J = 1, N
+               IF( BETA.EQ.ZERO )THEN
+                  DO 140, I = J, N
+                     C( I, J ) = ZERO
+  140             CONTINUE
+               ELSE IF( BETA.NE.ONE )THEN
+                  DO 150, I = J, N
+                     C( I, J ) = BETA*C( I, J )
+  150             CONTINUE
+               END IF
+               DO 170, L = 1, K
+                  IF( A( J, L ).NE.ZERO )THEN
+                     TEMP      = ALPHA*A( J, L )
+                     DO 160, I = J, N
+                        C( I, J ) = C( I, J ) + TEMP*A( I, L )
+  160                CONTINUE
+                  END IF
+  170          CONTINUE
+  180       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  C := alpha*A'*A + beta*C.
+*
+         IF( UPPER )THEN
+            DO 210, J = 1, N
+               DO 200, I = 1, J
+                  TEMP = ZERO
+                  DO 190, L = 1, K
+                     TEMP = TEMP + A( L, I )*A( L, J )
+  190             CONTINUE
+                  IF( BETA.EQ.ZERO )THEN
+                     C( I, J ) = ALPHA*TEMP
+                  ELSE
+                     C( I, J ) = ALPHA*TEMP + BETA*C( I, J )
+                  END IF
+  200          CONTINUE
+  210       CONTINUE
+         ELSE
+            DO 240, J = 1, N
+               DO 230, I = J, N
+                  TEMP = ZERO
+                  DO 220, L = 1, K
+                     TEMP = TEMP + A( L, I )*A( L, J )
+  220             CONTINUE
+                  IF( BETA.EQ.ZERO )THEN
+                     C( I, J ) = ALPHA*TEMP
+                  ELSE
+                     C( I, J ) = ALPHA*TEMP + BETA*C( I, J )
+                  END IF
+  230          CONTINUE
+  240       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of CSYRK .
+*
+      END
diff --git a/costa/native/external/blas/ctbmv.f b/costa/native/external/blas/ctbmv.f
new file mode 100644
index 000000000..5b9ceb43f
--- /dev/null
+++ b/costa/native/external/blas/ctbmv.f
@@ -0,0 +1,377 @@
+      SUBROUTINE CTBMV ( UPLO, TRANS, DIAG, N, K, A, LDA, X, INCX )
+*     .. Scalar Arguments ..
+      INTEGER            INCX, K, LDA, N
+      CHARACTER*1        DIAG, TRANS, UPLO
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CTBMV  performs one of the matrix-vector operations
+*
+*     x := A*x,   or   x := A'*x,   or   x := conjg( A' )*x,
+*
+*  where x is an n element vector and  A is an n by n unit, or non-unit,
+*  upper or lower triangular band matrix, with ( k + 1 ) diagonals.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the matrix is an upper or
+*           lower triangular matrix as follows:
+*
+*              UPLO = 'U' or 'u'   A is an upper triangular matrix.
+*
+*              UPLO = 'L' or 'l'   A is a lower triangular matrix.
+*
+*           Unchanged on exit.
+*
+*  TRANS  - CHARACTER*1.
+*           On entry, TRANS specifies the operation to be performed as
+*           follows:
+*
+*              TRANS = 'N' or 'n'   x := A*x.
+*
+*              TRANS = 'T' or 't'   x := A'*x.
+*
+*              TRANS = 'C' or 'c'   x := conjg( A' )*x.
+*
+*           Unchanged on exit.
+*
+*  DIAG   - CHARACTER*1.
+*           On entry, DIAG specifies whether or not A is unit
+*           triangular as follows:
+*
+*              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
+*
+*              DIAG = 'N' or 'n'   A is not assumed to be unit
+*                                  triangular.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  K      - INTEGER.
+*           On entry with UPLO = 'U' or 'u', K specifies the number of
+*           super-diagonals of the matrix A.
+*           On entry with UPLO = 'L' or 'l', K specifies the number of
+*           sub-diagonals of the matrix A.
+*           K must satisfy  0 .le. K.
+*           Unchanged on exit.
+*
+*  A      - COMPLEX          array of DIMENSION ( LDA, n ).
+*           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 )
+*           by n part of the array A must contain the upper triangular
+*           band part of the matrix of coefficients, supplied column by
+*           column, with the leading diagonal of the matrix in row
+*           ( k + 1 ) of the array, the first super-diagonal starting at
+*           position 2 in row k, and so on. The top left k by k triangle
+*           of the array A is not referenced.
+*           The following program segment will transfer an upper
+*           triangular band matrix from conventional full matrix storage
+*           to band storage:
+*
+*                 DO 20, J = 1, N
+*                    M = K + 1 - J
+*                    DO 10, I = MAX( 1, J - K ), J
+*                       A( M + I, J ) = matrix( I, J )
+*              10    CONTINUE
+*              20 CONTINUE
+*
+*           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 )
+*           by n part of the array A must contain the lower triangular
+*           band part of the matrix of coefficients, supplied column by
+*           column, with the leading diagonal of the matrix in row 1 of
+*           the array, the first sub-diagonal starting at position 1 in
+*           row 2, and so on. The bottom right k by k triangle of the
+*           array A is not referenced.
+*           The following program segment will transfer a lower
+*           triangular band matrix from conventional full matrix storage
+*           to band storage:
+*
+*                 DO 20, J = 1, N
+*                    M = 1 - J
+*                    DO 10, I = J, MIN( N, J + K )
+*                       A( M + I, J ) = matrix( I, J )
+*              10    CONTINUE
+*              20 CONTINUE
+*
+*           Note that when DIAG = 'U' or 'u' the elements of the array A
+*           corresponding to the diagonal elements of the matrix are not
+*           referenced, but are assumed to be unity.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           ( k + 1 ).
+*           Unchanged on exit.
+*
+*  X      - COMPLEX          array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element vector x. On exit, X is overwritten with the
+*           tranformed vector x.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      COMPLEX            ZERO
+      PARAMETER        ( ZERO = ( 0.0E+0, 0.0E+0 ) )
+*     .. Local Scalars ..
+      COMPLEX            TEMP
+      INTEGER            I, INFO, IX, J, JX, KPLUS1, KX, L
+      LOGICAL            NOCONJ, NOUNIT
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          CONJG, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO , 'U' ).AND.
+     $         .NOT.LSAME( UPLO , 'L' )      )THEN
+         INFO = 1
+      ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND.
+     $         .NOT.LSAME( TRANS, 'T' ).AND.
+     $         .NOT.LSAME( TRANS, 'C' )      )THEN
+         INFO = 2
+      ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND.
+     $         .NOT.LSAME( DIAG , 'N' )      )THEN
+         INFO = 3
+      ELSE IF( N.LT.0 )THEN
+         INFO = 4
+      ELSE IF( K.LT.0 )THEN
+         INFO = 5
+      ELSE IF( LDA.LT.( K + 1 ) )THEN
+         INFO = 7
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 9
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'CTBMV ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      NOCONJ = LSAME( TRANS, 'T' )
+      NOUNIT = LSAME( DIAG , 'N' )
+*
+*     Set up the start point in X if the increment is not unity. This
+*     will be  ( N - 1 )*INCX   too small for descending loops.
+*
+      IF( INCX.LE.0 )THEN
+         KX = 1 - ( N - 1 )*INCX
+      ELSE IF( INCX.NE.1 )THEN
+         KX = 1
+      END IF
+*
+*     Start the operations. In this version the elements of A are
+*     accessed sequentially with one pass through A.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+*
+*         Form  x := A*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            KPLUS1 = K + 1
+            IF( INCX.EQ.1 )THEN
+               DO 20, J = 1, N
+                  IF( X( J ).NE.ZERO )THEN
+                     TEMP = X( J )
+                     L    = KPLUS1 - J
+                     DO 10, I = MAX( 1, J - K ), J - 1
+                        X( I ) = X( I ) + TEMP*A( L + I, J )
+   10                CONTINUE
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )*A( KPLUS1, J )
+                  END IF
+   20          CONTINUE
+            ELSE
+               JX = KX
+               DO 40, J = 1, N
+                  IF( X( JX ).NE.ZERO )THEN
+                     TEMP = X( JX )
+                     IX   = KX
+                     L    = KPLUS1  - J
+                     DO 30, I = MAX( 1, J - K ), J - 1
+                        X( IX ) = X( IX ) + TEMP*A( L + I, J )
+                        IX      = IX      + INCX
+   30                CONTINUE
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )*A( KPLUS1, J )
+                  END IF
+                  JX = JX + INCX
+                  IF( J.GT.K )
+     $               KX = KX + INCX
+   40          CONTINUE
+            END IF
+         ELSE
+            IF( INCX.EQ.1 )THEN
+               DO 60, J = N, 1, -1
+                  IF( X( J ).NE.ZERO )THEN
+                     TEMP = X( J )
+                     L    = 1      - J
+                     DO 50, I = MIN( N, J + K ), J + 1, -1
+                        X( I ) = X( I ) + TEMP*A( L + I, J )
+   50                CONTINUE
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )*A( 1, J )
+                  END IF
+   60          CONTINUE
+            ELSE
+               KX = KX + ( N - 1 )*INCX
+               JX = KX
+               DO 80, J = N, 1, -1
+                  IF( X( JX ).NE.ZERO )THEN
+                     TEMP = X( JX )
+                     IX   = KX
+                     L    = 1       - J
+                     DO 70, I = MIN( N, J + K ), J + 1, -1
+                        X( IX ) = X( IX ) + TEMP*A( L + I, J )
+                        IX      = IX      - INCX
+   70                CONTINUE
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )*A( 1, J )
+                  END IF
+                  JX = JX - INCX
+                  IF( ( N - J ).GE.K )
+     $               KX = KX - INCX
+   80          CONTINUE
+            END IF
+         END IF
+      ELSE
+*
+*        Form  x := A'*x  or  x := conjg( A' )*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            KPLUS1 = K + 1
+            IF( INCX.EQ.1 )THEN
+               DO 110, J = N, 1, -1
+                  TEMP = X( J )
+                  L    = KPLUS1 - J
+                  IF( NOCONJ )THEN
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*A( KPLUS1, J )
+                     DO 90, I = J - 1, MAX( 1, J - K ), -1
+                        TEMP = TEMP + A( L + I, J )*X( I )
+   90                CONTINUE
+                  ELSE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*CONJG( A( KPLUS1, J ) )
+                     DO 100, I = J - 1, MAX( 1, J - K ), -1
+                        TEMP = TEMP + CONJG( A( L + I, J ) )*X( I )
+  100                CONTINUE
+                  END IF
+                  X( J ) = TEMP
+  110          CONTINUE
+            ELSE
+               KX = KX + ( N - 1 )*INCX
+               JX = KX
+               DO 140, J = N, 1, -1
+                  TEMP = X( JX )
+                  KX   = KX      - INCX
+                  IX   = KX
+                  L    = KPLUS1  - J
+                  IF( NOCONJ )THEN
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*A( KPLUS1, J )
+                     DO 120, I = J - 1, MAX( 1, J - K ), -1
+                        TEMP = TEMP + A( L + I, J )*X( IX )
+                        IX   = IX   - INCX
+  120                CONTINUE
+                  ELSE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*CONJG( A( KPLUS1, J ) )
+                     DO 130, I = J - 1, MAX( 1, J - K ), -1
+                        TEMP = TEMP + CONJG( A( L + I, J ) )*X( IX )
+                        IX   = IX   - INCX
+  130                CONTINUE
+                  END IF
+                  X( JX ) = TEMP
+                  JX      = JX   - INCX
+  140          CONTINUE
+            END IF
+         ELSE
+            IF( INCX.EQ.1 )THEN
+               DO 170, J = 1, N
+                  TEMP = X( J )
+                  L    = 1      - J
+                  IF( NOCONJ )THEN
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*A( 1, J )
+                     DO 150, I = J + 1, MIN( N, J + K )
+                        TEMP = TEMP + A( L + I, J )*X( I )
+  150                CONTINUE
+                  ELSE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*CONJG( A( 1, J ) )
+                     DO 160, I = J + 1, MIN( N, J + K )
+                        TEMP = TEMP + CONJG( A( L + I, J ) )*X( I )
+  160                CONTINUE
+                  END IF
+                  X( J ) = TEMP
+  170          CONTINUE
+            ELSE
+               JX = KX
+               DO 200, J = 1, N
+                  TEMP = X( JX )
+                  KX   = KX      + INCX
+                  IX   = KX
+                  L    = 1       - J
+                  IF( NOCONJ )THEN
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*A( 1, J )
+                     DO 180, I = J + 1, MIN( N, J + K )
+                        TEMP = TEMP + A( L + I, J )*X( IX )
+                        IX   = IX   + INCX
+  180                CONTINUE
+                  ELSE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*CONJG( A( 1, J ) )
+                     DO 190, I = J + 1, MIN( N, J + K )
+                        TEMP = TEMP + CONJG( A( L + I, J ) )*X( IX )
+                        IX   = IX   + INCX
+  190                CONTINUE
+                  END IF
+                  X( JX ) = TEMP
+                  JX      = JX   + INCX
+  200          CONTINUE
+            END IF
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of CTBMV .
+*
+      END
diff --git a/costa/native/external/blas/ctbsv.f b/costa/native/external/blas/ctbsv.f
new file mode 100644
index 000000000..abe4c77a3
--- /dev/null
+++ b/costa/native/external/blas/ctbsv.f
@@ -0,0 +1,381 @@
+      SUBROUTINE CTBSV ( UPLO, TRANS, DIAG, N, K, A, LDA, X, INCX )
+*     .. Scalar Arguments ..
+      INTEGER            INCX, K, LDA, N
+      CHARACTER*1        DIAG, TRANS, UPLO
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CTBSV  solves one of the systems of equations
+*
+*     A*x = b,   or   A'*x = b,   or   conjg( A' )*x = b,
+*
+*  where b and x are n element vectors and A is an n by n unit, or
+*  non-unit, upper or lower triangular band matrix, with ( k + 1 )
+*  diagonals.
+*
+*  No test for singularity or near-singularity is included in this
+*  routine. Such tests must be performed before calling this routine.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the matrix is an upper or
+*           lower triangular matrix as follows:
+*
+*              UPLO = 'U' or 'u'   A is an upper triangular matrix.
+*
+*              UPLO = 'L' or 'l'   A is a lower triangular matrix.
+*
+*           Unchanged on exit.
+*
+*  TRANS  - CHARACTER*1.
+*           On entry, TRANS specifies the equations to be solved as
+*           follows:
+*
+*              TRANS = 'N' or 'n'   A*x = b.
+*
+*              TRANS = 'T' or 't'   A'*x = b.
+*
+*              TRANS = 'C' or 'c'   conjg( A' )*x = b.
+*
+*           Unchanged on exit.
+*
+*  DIAG   - CHARACTER*1.
+*           On entry, DIAG specifies whether or not A is unit
+*           triangular as follows:
+*
+*              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
+*
+*              DIAG = 'N' or 'n'   A is not assumed to be unit
+*                                  triangular.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  K      - INTEGER.
+*           On entry with UPLO = 'U' or 'u', K specifies the number of
+*           super-diagonals of the matrix A.
+*           On entry with UPLO = 'L' or 'l', K specifies the number of
+*           sub-diagonals of the matrix A.
+*           K must satisfy  0 .le. K.
+*           Unchanged on exit.
+*
+*  A      - COMPLEX          array of DIMENSION ( LDA, n ).
+*           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 )
+*           by n part of the array A must contain the upper triangular
+*           band part of the matrix of coefficients, supplied column by
+*           column, with the leading diagonal of the matrix in row
+*           ( k + 1 ) of the array, the first super-diagonal starting at
+*           position 2 in row k, and so on. The top left k by k triangle
+*           of the array A is not referenced.
+*           The following program segment will transfer an upper
+*           triangular band matrix from conventional full matrix storage
+*           to band storage:
+*
+*                 DO 20, J = 1, N
+*                    M = K + 1 - J
+*                    DO 10, I = MAX( 1, J - K ), J
+*                       A( M + I, J ) = matrix( I, J )
+*              10    CONTINUE
+*              20 CONTINUE
+*
+*           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 )
+*           by n part of the array A must contain the lower triangular
+*           band part of the matrix of coefficients, supplied column by
+*           column, with the leading diagonal of the matrix in row 1 of
+*           the array, the first sub-diagonal starting at position 1 in
+*           row 2, and so on. The bottom right k by k triangle of the
+*           array A is not referenced.
+*           The following program segment will transfer a lower
+*           triangular band matrix from conventional full matrix storage
+*           to band storage:
+*
+*                 DO 20, J = 1, N
+*                    M = 1 - J
+*                    DO 10, I = J, MIN( N, J + K )
+*                       A( M + I, J ) = matrix( I, J )
+*              10    CONTINUE
+*              20 CONTINUE
+*
+*           Note that when DIAG = 'U' or 'u' the elements of the array A
+*           corresponding to the diagonal elements of the matrix are not
+*           referenced, but are assumed to be unity.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           ( k + 1 ).
+*           Unchanged on exit.
+*
+*  X      - COMPLEX          array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element right-hand side vector b. On exit, X is overwritten
+*           with the solution vector x.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      COMPLEX            ZERO
+      PARAMETER        ( ZERO = ( 0.0E+0, 0.0E+0 ) )
+*     .. Local Scalars ..
+      COMPLEX            TEMP
+      INTEGER            I, INFO, IX, J, JX, KPLUS1, KX, L
+      LOGICAL            NOCONJ, NOUNIT
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          CONJG, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO , 'U' ).AND.
+     $         .NOT.LSAME( UPLO , 'L' )      )THEN
+         INFO = 1
+      ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND.
+     $         .NOT.LSAME( TRANS, 'T' ).AND.
+     $         .NOT.LSAME( TRANS, 'C' )      )THEN
+         INFO = 2
+      ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND.
+     $         .NOT.LSAME( DIAG , 'N' )      )THEN
+         INFO = 3
+      ELSE IF( N.LT.0 )THEN
+         INFO = 4
+      ELSE IF( K.LT.0 )THEN
+         INFO = 5
+      ELSE IF( LDA.LT.( K + 1 ) )THEN
+         INFO = 7
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 9
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'CTBSV ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      NOCONJ = LSAME( TRANS, 'T' )
+      NOUNIT = LSAME( DIAG , 'N' )
+*
+*     Set up the start point in X if the increment is not unity. This
+*     will be  ( N - 1 )*INCX  too small for descending loops.
+*
+      IF( INCX.LE.0 )THEN
+         KX = 1 - ( N - 1 )*INCX
+      ELSE IF( INCX.NE.1 )THEN
+         KX = 1
+      END IF
+*
+*     Start the operations. In this version the elements of A are
+*     accessed by sequentially with one pass through A.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+*
+*        Form  x := inv( A )*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            KPLUS1 = K + 1
+            IF( INCX.EQ.1 )THEN
+               DO 20, J = N, 1, -1
+                  IF( X( J ).NE.ZERO )THEN
+                     L = KPLUS1 - J
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )/A( KPLUS1, J )
+                     TEMP = X( J )
+                     DO 10, I = J - 1, MAX( 1, J - K ), -1
+                        X( I ) = X( I ) - TEMP*A( L + I, J )
+   10                CONTINUE
+                  END IF
+   20          CONTINUE
+            ELSE
+               KX = KX + ( N - 1 )*INCX
+               JX = KX
+               DO 40, J = N, 1, -1
+                  KX = KX - INCX
+                  IF( X( JX ).NE.ZERO )THEN
+                     IX = KX
+                     L  = KPLUS1 - J
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )/A( KPLUS1, J )
+                     TEMP = X( JX )
+                     DO 30, I = J - 1, MAX( 1, J - K ), -1
+                        X( IX ) = X( IX ) - TEMP*A( L + I, J )
+                        IX      = IX      - INCX
+   30                CONTINUE
+                  END IF
+                  JX = JX - INCX
+   40          CONTINUE
+            END IF
+         ELSE
+            IF( INCX.EQ.1 )THEN
+               DO 60, J = 1, N
+                  IF( X( J ).NE.ZERO )THEN
+                     L = 1 - J
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )/A( 1, J )
+                     TEMP = X( J )
+                     DO 50, I = J + 1, MIN( N, J + K )
+                        X( I ) = X( I ) - TEMP*A( L + I, J )
+   50                CONTINUE
+                  END IF
+   60          CONTINUE
+            ELSE
+               JX = KX
+               DO 80, J = 1, N
+                  KX = KX + INCX
+                  IF( X( JX ).NE.ZERO )THEN
+                     IX = KX
+                     L  = 1  - J
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )/A( 1, J )
+                     TEMP = X( JX )
+                     DO 70, I = J + 1, MIN( N, J + K )
+                        X( IX ) = X( IX ) - TEMP*A( L + I, J )
+                        IX      = IX      + INCX
+   70                CONTINUE
+                  END IF
+                  JX = JX + INCX
+   80          CONTINUE
+            END IF
+         END IF
+      ELSE
+*
+*        Form  x := inv( A' )*x  or  x := inv( conjg( A') )*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            KPLUS1 = K + 1
+            IF( INCX.EQ.1 )THEN
+               DO 110, J = 1, N
+                  TEMP = X( J )
+                  L    = KPLUS1 - J
+                  IF( NOCONJ )THEN
+                     DO 90, I = MAX( 1, J - K ), J - 1
+                        TEMP = TEMP - A( L + I, J )*X( I )
+   90                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/A( KPLUS1, J )
+                  ELSE
+                     DO 100, I = MAX( 1, J - K ), J - 1
+                        TEMP = TEMP - CONJG( A( L + I, J ) )*X( I )
+  100                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/CONJG( A( KPLUS1, J ) )
+                  END IF
+                  X( J ) = TEMP
+  110          CONTINUE
+            ELSE
+               JX = KX
+               DO 140, J = 1, N
+                  TEMP = X( JX )
+                  IX   = KX
+                  L    = KPLUS1  - J
+                  IF( NOCONJ )THEN
+                     DO 120, I = MAX( 1, J - K ), J - 1
+                        TEMP = TEMP - A( L + I, J )*X( IX )
+                        IX   = IX   + INCX
+  120                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/A( KPLUS1, J )
+                  ELSE
+                     DO 130, I = MAX( 1, J - K ), J - 1
+                        TEMP = TEMP - CONJG( A( L + I, J ) )*X( IX )
+                        IX   = IX   + INCX
+  130                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/CONJG( A( KPLUS1, J ) )
+                  END IF
+                  X( JX ) = TEMP
+                  JX      = JX   + INCX
+                  IF( J.GT.K )
+     $               KX = KX + INCX
+  140          CONTINUE
+            END IF
+         ELSE
+            IF( INCX.EQ.1 )THEN
+               DO 170, J = N, 1, -1
+                  TEMP = X( J )
+                  L    = 1      - J
+                  IF( NOCONJ )THEN
+                     DO 150, I = MIN( N, J + K ), J + 1, -1
+                        TEMP = TEMP - A( L + I, J )*X( I )
+  150                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/A( 1, J )
+                  ELSE
+                     DO 160, I = MIN( N, J + K ), J + 1, -1
+                        TEMP = TEMP - CONJG( A( L + I, J ) )*X( I )
+  160                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/CONJG( A( 1, J ) )
+                  END IF
+                  X( J ) = TEMP
+  170          CONTINUE
+            ELSE
+               KX = KX + ( N - 1 )*INCX
+               JX = KX
+               DO 200, J = N, 1, -1
+                  TEMP = X( JX )
+                  IX   = KX
+                  L    = 1       - J
+                  IF( NOCONJ )THEN
+                     DO 180, I = MIN( N, J + K ), J + 1, -1
+                        TEMP = TEMP - A( L + I, J )*X( IX )
+                        IX   = IX   - INCX
+  180                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/A( 1, J )
+                  ELSE
+                     DO 190, I = MIN( N, J + K ), J + 1, -1
+                        TEMP = TEMP - CONJG( A( L + I, J ) )*X( IX )
+                        IX   = IX   - INCX
+  190                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/CONJG( A( 1, J ) )
+                  END IF
+                  X( JX ) = TEMP
+                  JX      = JX   - INCX
+                  IF( ( N - J ).GE.K )
+     $               KX = KX - INCX
+  200          CONTINUE
+            END IF
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of CTBSV .
+*
+      END
diff --git a/costa/native/external/blas/ctpmv.f b/costa/native/external/blas/ctpmv.f
new file mode 100644
index 000000000..86cb35f79
--- /dev/null
+++ b/costa/native/external/blas/ctpmv.f
@@ -0,0 +1,338 @@
+      SUBROUTINE CTPMV ( UPLO, TRANS, DIAG, N, AP, X, INCX )
+*     .. Scalar Arguments ..
+      INTEGER            INCX, N
+      CHARACTER*1        DIAG, TRANS, UPLO
+*     .. Array Arguments ..
+      COMPLEX            AP( * ), X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CTPMV  performs one of the matrix-vector operations
+*
+*     x := A*x,   or   x := A'*x,   or   x := conjg( A' )*x,
+*
+*  where x is an n element vector and  A is an n by n unit, or non-unit,
+*  upper or lower triangular matrix, supplied in packed form.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the matrix is an upper or
+*           lower triangular matrix as follows:
+*
+*              UPLO = 'U' or 'u'   A is an upper triangular matrix.
+*
+*              UPLO = 'L' or 'l'   A is a lower triangular matrix.
+*
+*           Unchanged on exit.
+*
+*  TRANS  - CHARACTER*1.
+*           On entry, TRANS specifies the operation to be performed as
+*           follows:
+*
+*              TRANS = 'N' or 'n'   x := A*x.
+*
+*              TRANS = 'T' or 't'   x := A'*x.
+*
+*              TRANS = 'C' or 'c'   x := conjg( A' )*x.
+*
+*           Unchanged on exit.
+*
+*  DIAG   - CHARACTER*1.
+*           On entry, DIAG specifies whether or not A is unit
+*           triangular as follows:
+*
+*              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
+*
+*              DIAG = 'N' or 'n'   A is not assumed to be unit
+*                                  triangular.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  AP     - COMPLEX          array of DIMENSION at least
+*           ( ( n*( n + 1 ) )/2 ).
+*           Before entry with  UPLO = 'U' or 'u', the array AP must
+*           contain the upper triangular matrix packed sequentially,
+*           column by column, so that AP( 1 ) contains a( 1, 1 ),
+*           AP( 2 ) and AP( 3 ) contain a( 1, 2 ) and a( 2, 2 )
+*           respectively, and so on.
+*           Before entry with UPLO = 'L' or 'l', the array AP must
+*           contain the lower triangular matrix packed sequentially,
+*           column by column, so that AP( 1 ) contains a( 1, 1 ),
+*           AP( 2 ) and AP( 3 ) contain a( 2, 1 ) and a( 3, 1 )
+*           respectively, and so on.
+*           Note that when  DIAG = 'U' or 'u', the diagonal elements of
+*           A are not referenced, but are assumed to be unity.
+*           Unchanged on exit.
+*
+*  X      - COMPLEX          array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element vector x. On exit, X is overwritten with the
+*           tranformed vector x.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      COMPLEX            ZERO
+      PARAMETER        ( ZERO = ( 0.0E+0, 0.0E+0 ) )
+*     .. Local Scalars ..
+      COMPLEX            TEMP
+      INTEGER            I, INFO, IX, J, JX, K, KK, KX
+      LOGICAL            NOCONJ, NOUNIT
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          CONJG
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO , 'U' ).AND.
+     $         .NOT.LSAME( UPLO , 'L' )      )THEN
+         INFO = 1
+      ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND.
+     $         .NOT.LSAME( TRANS, 'T' ).AND.
+     $         .NOT.LSAME( TRANS, 'C' )      )THEN
+         INFO = 2
+      ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND.
+     $         .NOT.LSAME( DIAG , 'N' )      )THEN
+         INFO = 3
+      ELSE IF( N.LT.0 )THEN
+         INFO = 4
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 7
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'CTPMV ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      NOCONJ = LSAME( TRANS, 'T' )
+      NOUNIT = LSAME( DIAG , 'N' )
+*
+*     Set up the start point in X if the increment is not unity. This
+*     will be  ( N - 1 )*INCX  too small for descending loops.
+*
+      IF( INCX.LE.0 )THEN
+         KX = 1 - ( N - 1 )*INCX
+      ELSE IF( INCX.NE.1 )THEN
+         KX = 1
+      END IF
+*
+*     Start the operations. In this version the elements of AP are
+*     accessed sequentially with one pass through AP.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+*
+*        Form  x:= A*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            KK = 1
+            IF( INCX.EQ.1 )THEN
+               DO 20, J = 1, N
+                  IF( X( J ).NE.ZERO )THEN
+                     TEMP = X( J )
+                     K    = KK
+                     DO 10, I = 1, J - 1
+                        X( I ) = X( I ) + TEMP*AP( K )
+                        K      = K      + 1
+   10                CONTINUE
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )*AP( KK + J - 1 )
+                  END IF
+                  KK = KK + J
+   20          CONTINUE
+            ELSE
+               JX = KX
+               DO 40, J = 1, N
+                  IF( X( JX ).NE.ZERO )THEN
+                     TEMP = X( JX )
+                     IX   = KX
+                     DO 30, K = KK, KK + J - 2
+                        X( IX ) = X( IX ) + TEMP*AP( K )
+                        IX      = IX      + INCX
+   30                CONTINUE
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )*AP( KK + J - 1 )
+                  END IF
+                  JX = JX + INCX
+                  KK = KK + J
+   40          CONTINUE
+            END IF
+         ELSE
+            KK = ( N*( N + 1 ) )/2
+            IF( INCX.EQ.1 )THEN
+               DO 60, J = N, 1, -1
+                  IF( X( J ).NE.ZERO )THEN
+                     TEMP = X( J )
+                     K    = KK
+                     DO 50, I = N, J + 1, -1
+                        X( I ) = X( I ) + TEMP*AP( K )
+                        K      = K      - 1
+   50                CONTINUE
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )*AP( KK - N + J )
+                  END IF
+                  KK = KK - ( N - J + 1 )
+   60          CONTINUE
+            ELSE
+               KX = KX + ( N - 1 )*INCX
+               JX = KX
+               DO 80, J = N, 1, -1
+                  IF( X( JX ).NE.ZERO )THEN
+                     TEMP = X( JX )
+                     IX   = KX
+                     DO 70, K = KK, KK - ( N - ( J + 1 ) ), -1
+                        X( IX ) = X( IX ) + TEMP*AP( K )
+                        IX      = IX      - INCX
+   70                CONTINUE
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )*AP( KK - N + J )
+                  END IF
+                  JX = JX - INCX
+                  KK = KK - ( N - J + 1 )
+   80          CONTINUE
+            END IF
+         END IF
+      ELSE
+*
+*        Form  x := A'*x  or  x := conjg( A' )*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            KK = ( N*( N + 1 ) )/2
+            IF( INCX.EQ.1 )THEN
+               DO 110, J = N, 1, -1
+                  TEMP = X( J )
+                  K    = KK     - 1
+                  IF( NOCONJ )THEN
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*AP( KK )
+                     DO 90, I = J - 1, 1, -1
+                        TEMP = TEMP + AP( K )*X( I )
+                        K    = K    - 1
+   90                CONTINUE
+                  ELSE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*CONJG( AP( KK ) )
+                     DO 100, I = J - 1, 1, -1
+                        TEMP = TEMP + CONJG( AP( K ) )*X( I )
+                        K    = K    - 1
+  100                CONTINUE
+                  END IF
+                  X( J ) = TEMP
+                  KK     = KK   - J
+  110          CONTINUE
+            ELSE
+               JX = KX + ( N - 1 )*INCX
+               DO 140, J = N, 1, -1
+                  TEMP = X( JX )
+                  IX   = JX
+                  IF( NOCONJ )THEN
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*AP( KK )
+                     DO 120, K = KK - 1, KK - J + 1, -1
+                        IX   = IX   - INCX
+                        TEMP = TEMP + AP( K )*X( IX )
+  120                CONTINUE
+                  ELSE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*CONJG( AP( KK ) )
+                     DO 130, K = KK - 1, KK - J + 1, -1
+                        IX   = IX   - INCX
+                        TEMP = TEMP + CONJG( AP( K ) )*X( IX )
+  130                CONTINUE
+                  END IF
+                  X( JX ) = TEMP
+                  JX      = JX   - INCX
+                  KK      = KK   - J
+  140          CONTINUE
+            END IF
+         ELSE
+            KK = 1
+            IF( INCX.EQ.1 )THEN
+               DO 170, J = 1, N
+                  TEMP = X( J )
+                  K    = KK     + 1
+                  IF( NOCONJ )THEN
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*AP( KK )
+                     DO 150, I = J + 1, N
+                        TEMP = TEMP + AP( K )*X( I )
+                        K    = K    + 1
+  150                CONTINUE
+                  ELSE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*CONJG( AP( KK ) )
+                     DO 160, I = J + 1, N
+                        TEMP = TEMP + CONJG( AP( K ) )*X( I )
+                        K    = K    + 1
+  160                CONTINUE
+                  END IF
+                  X( J ) = TEMP
+                  KK     = KK   + ( N - J + 1 )
+  170          CONTINUE
+            ELSE
+               JX = KX
+               DO 200, J = 1, N
+                  TEMP = X( JX )
+                  IX   = JX
+                  IF( NOCONJ )THEN
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*AP( KK )
+                     DO 180, K = KK + 1, KK + N - J
+                        IX   = IX   + INCX
+                        TEMP = TEMP + AP( K )*X( IX )
+  180                CONTINUE
+                  ELSE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*CONJG( AP( KK ) )
+                     DO 190, K = KK + 1, KK + N - J
+                        IX   = IX   + INCX
+                        TEMP = TEMP + CONJG( AP( K ) )*X( IX )
+  190                CONTINUE
+                  END IF
+                  X( JX ) = TEMP
+                  JX      = JX   + INCX
+                  KK      = KK   + ( N - J + 1 )
+  200          CONTINUE
+            END IF
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of CTPMV .
+*
+      END
diff --git a/costa/native/external/blas/ctpsv.f b/costa/native/external/blas/ctpsv.f
new file mode 100644
index 000000000..2973949db
--- /dev/null
+++ b/costa/native/external/blas/ctpsv.f
@@ -0,0 +1,341 @@
+      SUBROUTINE CTPSV ( UPLO, TRANS, DIAG, N, AP, X, INCX )
+*     .. Scalar Arguments ..
+      INTEGER            INCX, N
+      CHARACTER*1        DIAG, TRANS, UPLO
+*     .. Array Arguments ..
+      COMPLEX            AP( * ), X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CTPSV  solves one of the systems of equations
+*
+*     A*x = b,   or   A'*x = b,   or   conjg( A' )*x = b,
+*
+*  where b and x are n element vectors and A is an n by n unit, or
+*  non-unit, upper or lower triangular matrix, supplied in packed form.
+*
+*  No test for singularity or near-singularity is included in this
+*  routine. Such tests must be performed before calling this routine.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the matrix is an upper or
+*           lower triangular matrix as follows:
+*
+*              UPLO = 'U' or 'u'   A is an upper triangular matrix.
+*
+*              UPLO = 'L' or 'l'   A is a lower triangular matrix.
+*
+*           Unchanged on exit.
+*
+*  TRANS  - CHARACTER*1.
+*           On entry, TRANS specifies the equations to be solved as
+*           follows:
+*
+*              TRANS = 'N' or 'n'   A*x = b.
+*
+*              TRANS = 'T' or 't'   A'*x = b.
+*
+*              TRANS = 'C' or 'c'   conjg( A' )*x = b.
+*
+*           Unchanged on exit.
+*
+*  DIAG   - CHARACTER*1.
+*           On entry, DIAG specifies whether or not A is unit
+*           triangular as follows:
+*
+*              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
+*
+*              DIAG = 'N' or 'n'   A is not assumed to be unit
+*                                  triangular.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  AP     - COMPLEX          array of DIMENSION at least
+*           ( ( n*( n + 1 ) )/2 ).
+*           Before entry with  UPLO = 'U' or 'u', the array AP must
+*           contain the upper triangular matrix packed sequentially,
+*           column by column, so that AP( 1 ) contains a( 1, 1 ),
+*           AP( 2 ) and AP( 3 ) contain a( 1, 2 ) and a( 2, 2 )
+*           respectively, and so on.
+*           Before entry with UPLO = 'L' or 'l', the array AP must
+*           contain the lower triangular matrix packed sequentially,
+*           column by column, so that AP( 1 ) contains a( 1, 1 ),
+*           AP( 2 ) and AP( 3 ) contain a( 2, 1 ) and a( 3, 1 )
+*           respectively, and so on.
+*           Note that when  DIAG = 'U' or 'u', the diagonal elements of
+*           A are not referenced, but are assumed to be unity.
+*           Unchanged on exit.
+*
+*  X      - COMPLEX          array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element right-hand side vector b. On exit, X is overwritten
+*           with the solution vector x.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      COMPLEX            ZERO
+      PARAMETER        ( ZERO = ( 0.0E+0, 0.0E+0 ) )
+*     .. Local Scalars ..
+      COMPLEX            TEMP
+      INTEGER            I, INFO, IX, J, JX, K, KK, KX
+      LOGICAL            NOCONJ, NOUNIT
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          CONJG
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO , 'U' ).AND.
+     $         .NOT.LSAME( UPLO , 'L' )      )THEN
+         INFO = 1
+      ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND.
+     $         .NOT.LSAME( TRANS, 'T' ).AND.
+     $         .NOT.LSAME( TRANS, 'C' )      )THEN
+         INFO = 2
+      ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND.
+     $         .NOT.LSAME( DIAG , 'N' )      )THEN
+         INFO = 3
+      ELSE IF( N.LT.0 )THEN
+         INFO = 4
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 7
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'CTPSV ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      NOCONJ = LSAME( TRANS, 'T' )
+      NOUNIT = LSAME( DIAG , 'N' )
+*
+*     Set up the start point in X if the increment is not unity. This
+*     will be  ( N - 1 )*INCX  too small for descending loops.
+*
+      IF( INCX.LE.0 )THEN
+         KX = 1 - ( N - 1 )*INCX
+      ELSE IF( INCX.NE.1 )THEN
+         KX = 1
+      END IF
+*
+*     Start the operations. In this version the elements of AP are
+*     accessed sequentially with one pass through AP.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+*
+*        Form  x := inv( A )*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            KK = ( N*( N + 1 ) )/2
+            IF( INCX.EQ.1 )THEN
+               DO 20, J = N, 1, -1
+                  IF( X( J ).NE.ZERO )THEN
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )/AP( KK )
+                     TEMP = X( J )
+                     K    = KK     - 1
+                     DO 10, I = J - 1, 1, -1
+                        X( I ) = X( I ) - TEMP*AP( K )
+                        K      = K      - 1
+   10                CONTINUE
+                  END IF
+                  KK = KK - J
+   20          CONTINUE
+            ELSE
+               JX = KX + ( N - 1 )*INCX
+               DO 40, J = N, 1, -1
+                  IF( X( JX ).NE.ZERO )THEN
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )/AP( KK )
+                     TEMP = X( JX )
+                     IX   = JX
+                     DO 30, K = KK - 1, KK - J + 1, -1
+                        IX      = IX      - INCX
+                        X( IX ) = X( IX ) - TEMP*AP( K )
+   30                CONTINUE
+                  END IF
+                  JX = JX - INCX
+                  KK = KK - J
+   40          CONTINUE
+            END IF
+         ELSE
+            KK = 1
+            IF( INCX.EQ.1 )THEN
+               DO 60, J = 1, N
+                  IF( X( J ).NE.ZERO )THEN
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )/AP( KK )
+                     TEMP = X( J )
+                     K    = KK     + 1
+                     DO 50, I = J + 1, N
+                        X( I ) = X( I ) - TEMP*AP( K )
+                        K      = K      + 1
+   50                CONTINUE
+                  END IF
+                  KK = KK + ( N - J + 1 )
+   60          CONTINUE
+            ELSE
+               JX = KX
+               DO 80, J = 1, N
+                  IF( X( JX ).NE.ZERO )THEN
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )/AP( KK )
+                     TEMP = X( JX )
+                     IX   = JX
+                     DO 70, K = KK + 1, KK + N - J
+                        IX      = IX      + INCX
+                        X( IX ) = X( IX ) - TEMP*AP( K )
+   70                CONTINUE
+                  END IF
+                  JX = JX + INCX
+                  KK = KK + ( N - J + 1 )
+   80          CONTINUE
+            END IF
+         END IF
+      ELSE
+*
+*        Form  x := inv( A' )*x  or  x := inv( conjg( A' ) )*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            KK = 1
+            IF( INCX.EQ.1 )THEN
+               DO 110, J = 1, N
+                  TEMP = X( J )
+                  K    = KK
+                  IF( NOCONJ )THEN
+                     DO 90, I = 1, J - 1
+                        TEMP = TEMP - AP( K )*X( I )
+                        K    = K    + 1
+   90                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/AP( KK + J - 1 )
+                  ELSE
+                     DO 100, I = 1, J - 1
+                        TEMP = TEMP - CONJG( AP( K ) )*X( I )
+                        K    = K    + 1
+  100                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/CONJG( AP( KK + J - 1 ) )
+                  END IF
+                  X( J ) = TEMP
+                  KK     = KK   + J
+  110          CONTINUE
+            ELSE
+               JX = KX
+               DO 140, J = 1, N
+                  TEMP = X( JX )
+                  IX   = KX
+                  IF( NOCONJ )THEN
+                     DO 120, K = KK, KK + J - 2
+                        TEMP = TEMP - AP( K )*X( IX )
+                        IX   = IX   + INCX
+  120                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/AP( KK + J - 1 )
+                  ELSE
+                     DO 130, K = KK, KK + J - 2
+                        TEMP = TEMP - CONJG( AP( K ) )*X( IX )
+                        IX   = IX   + INCX
+  130                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/CONJG( AP( KK + J - 1 ) )
+                  END IF
+                  X( JX ) = TEMP
+                  JX      = JX   + INCX
+                  KK      = KK   + J
+  140          CONTINUE
+            END IF
+         ELSE
+            KK = ( N*( N + 1 ) )/2
+            IF( INCX.EQ.1 )THEN
+               DO 170, J = N, 1, -1
+                  TEMP = X( J )
+                  K    = KK
+                  IF( NOCONJ )THEN
+                     DO 150, I = N, J + 1, -1
+                        TEMP = TEMP - AP( K )*X( I )
+                        K    = K    - 1
+  150                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/AP( KK - N + J )
+                  ELSE
+                     DO 160, I = N, J + 1, -1
+                        TEMP = TEMP - CONJG( AP( K ) )*X( I )
+                        K    = K    - 1
+  160                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/CONJG( AP( KK - N + J ) )
+                  END IF
+                  X( J ) = TEMP
+                  KK     = KK   - ( N - J + 1 )
+  170          CONTINUE
+            ELSE
+               KX = KX + ( N - 1 )*INCX
+               JX = KX
+               DO 200, J = N, 1, -1
+                  TEMP = X( JX )
+                  IX   = KX
+                  IF( NOCONJ )THEN
+                     DO 180, K = KK, KK - ( N - ( J + 1 ) ), -1
+                        TEMP = TEMP - AP( K )*X( IX )
+                        IX   = IX   - INCX
+  180                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/AP( KK - N + J )
+                  ELSE
+                     DO 190, K = KK, KK - ( N - ( J + 1 ) ), -1
+                        TEMP = TEMP - CONJG( AP( K ) )*X( IX )
+                        IX   = IX   - INCX
+  190                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/CONJG( AP( KK - N + J ) )
+                  END IF
+                  X( JX ) = TEMP
+                  JX      = JX   - INCX
+                  KK      = KK   - ( N - J + 1 )
+  200          CONTINUE
+            END IF
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of CTPSV .
+*
+      END
diff --git a/costa/native/external/blas/ctrmm.f b/costa/native/external/blas/ctrmm.f
new file mode 100644
index 000000000..521fe6e51
--- /dev/null
+++ b/costa/native/external/blas/ctrmm.f
@@ -0,0 +1,392 @@
+      SUBROUTINE CTRMM ( SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, A, LDA,
+     $                   B, LDB )
+*     .. Scalar Arguments ..
+      CHARACTER*1        SIDE, UPLO, TRANSA, DIAG
+      INTEGER            M, N, LDA, LDB
+      COMPLEX            ALPHA
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CTRMM  performs one of the matrix-matrix operations
+*
+*     B := alpha*op( A )*B,   or   B := alpha*B*op( A )
+*
+*  where  alpha  is a scalar,  B  is an m by n matrix,  A  is a unit, or
+*  non-unit,  upper or lower triangular matrix  and  op( A )  is one  of
+*
+*     op( A ) = A   or   op( A ) = A'   or   op( A ) = conjg( A' ).
+*
+*  Parameters
+*  ==========
+*
+*  SIDE   - CHARACTER*1.
+*           On entry,  SIDE specifies whether  op( A ) multiplies B from
+*           the left or right as follows:
+*
+*              SIDE = 'L' or 'l'   B := alpha*op( A )*B.
+*
+*              SIDE = 'R' or 'r'   B := alpha*B*op( A ).
+*
+*           Unchanged on exit.
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the matrix A is an upper or
+*           lower triangular matrix as follows:
+*
+*              UPLO = 'U' or 'u'   A is an upper triangular matrix.
+*
+*              UPLO = 'L' or 'l'   A is a lower triangular matrix.
+*
+*           Unchanged on exit.
+*
+*  TRANSA - CHARACTER*1.
+*           On entry, TRANSA specifies the form of op( A ) to be used in
+*           the matrix multiplication as follows:
+*
+*              TRANSA = 'N' or 'n'   op( A ) = A.
+*
+*              TRANSA = 'T' or 't'   op( A ) = A'.
+*
+*              TRANSA = 'C' or 'c'   op( A ) = conjg( A' ).
+*
+*           Unchanged on exit.
+*
+*  DIAG   - CHARACTER*1.
+*           On entry, DIAG specifies whether or not A is unit triangular
+*           as follows:
+*
+*              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
+*
+*              DIAG = 'N' or 'n'   A is not assumed to be unit
+*                                  triangular.
+*
+*           Unchanged on exit.
+*
+*  M      - INTEGER.
+*           On entry, M specifies the number of rows of B. M must be at
+*           least zero.
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the number of columns of B.  N must be
+*           at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - COMPLEX         .
+*           On entry,  ALPHA specifies the scalar  alpha. When  alpha is
+*           zero then  A is not referenced and  B need not be set before
+*           entry.
+*           Unchanged on exit.
+*
+*  A      - COMPLEX          array of DIMENSION ( LDA, k ), where k is m
+*           when  SIDE = 'L' or 'l'  and is  n  when  SIDE = 'R' or 'r'.
+*           Before entry  with  UPLO = 'U' or 'u',  the  leading  k by k
+*           upper triangular part of the array  A must contain the upper
+*           triangular matrix  and the strictly lower triangular part of
+*           A is not referenced.
+*           Before entry  with  UPLO = 'L' or 'l',  the  leading  k by k
+*           lower triangular part of the array  A must contain the lower
+*           triangular matrix  and the strictly upper triangular part of
+*           A is not referenced.
+*           Note that when  DIAG = 'U' or 'u',  the diagonal elements of
+*           A  are not referenced either,  but are assumed to be  unity.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program.  When  SIDE = 'L' or 'l'  then
+*           LDA  must be at least  max( 1, m ),  when  SIDE = 'R' or 'r'
+*           then LDA must be at least max( 1, n ).
+*           Unchanged on exit.
+*
+*  B      - COMPLEX          array of DIMENSION ( LDB, n ).
+*           Before entry,  the leading  m by n part of the array  B must
+*           contain the matrix  B,  and  on exit  is overwritten  by the
+*           transformed matrix.
+*
+*  LDB    - INTEGER.
+*           On entry, LDB specifies the first dimension of B as declared
+*           in  the  calling  (sub)  program.   LDB  must  be  at  least
+*           max( 1, m ).
+*           Unchanged on exit.
+*
+*
+*  Level 3 Blas routine.
+*
+*  -- Written on 8-February-1989.
+*     Jack Dongarra, Argonne National Laboratory.
+*     Iain Duff, AERE Harwell.
+*     Jeremy Du Croz, Numerical Algorithms Group Ltd.
+*     Sven Hammarling, Numerical Algorithms Group Ltd.
+*
+*
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          CONJG, MAX
+*     .. Local Scalars ..
+      LOGICAL            LSIDE, NOCONJ, NOUNIT, UPPER
+      INTEGER            I, INFO, J, K, NROWA
+      COMPLEX            TEMP
+*     .. Parameters ..
+      COMPLEX            ONE
+      PARAMETER        ( ONE  = ( 1.0E+0, 0.0E+0 ) )
+      COMPLEX            ZERO
+      PARAMETER        ( ZERO = ( 0.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      LSIDE  = LSAME( SIDE  , 'L' )
+      IF( LSIDE )THEN
+         NROWA = M
+      ELSE
+         NROWA = N
+      END IF
+      NOCONJ = LSAME( TRANSA, 'T' )
+      NOUNIT = LSAME( DIAG  , 'N' )
+      UPPER  = LSAME( UPLO  , 'U' )
+*
+      INFO   = 0
+      IF(      ( .NOT.LSIDE                ).AND.
+     $         ( .NOT.LSAME( SIDE  , 'R' ) )      )THEN
+         INFO = 1
+      ELSE IF( ( .NOT.UPPER                ).AND.
+     $         ( .NOT.LSAME( UPLO  , 'L' ) )      )THEN
+         INFO = 2
+      ELSE IF( ( .NOT.LSAME( TRANSA, 'N' ) ).AND.
+     $         ( .NOT.LSAME( TRANSA, 'T' ) ).AND.
+     $         ( .NOT.LSAME( TRANSA, 'C' ) )      )THEN
+         INFO = 3
+      ELSE IF( ( .NOT.LSAME( DIAG  , 'U' ) ).AND.
+     $         ( .NOT.LSAME( DIAG  , 'N' ) )      )THEN
+         INFO = 4
+      ELSE IF( M  .LT.0               )THEN
+         INFO = 5
+      ELSE IF( N  .LT.0               )THEN
+         INFO = 6
+      ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN
+         INFO = 9
+      ELSE IF( LDB.LT.MAX( 1, M     ) )THEN
+         INFO = 11
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'CTRMM ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     And when  alpha.eq.zero.
+*
+      IF( ALPHA.EQ.ZERO )THEN
+         DO 20, J = 1, N
+            DO 10, I = 1, M
+               B( I, J ) = ZERO
+   10       CONTINUE
+   20    CONTINUE
+         RETURN
+      END IF
+*
+*     Start the operations.
+*
+      IF( LSIDE )THEN
+         IF( LSAME( TRANSA, 'N' ) )THEN
+*
+*           Form  B := alpha*A*B.
+*
+            IF( UPPER )THEN
+               DO 50, J = 1, N
+                  DO 40, K = 1, M
+                     IF( B( K, J ).NE.ZERO )THEN
+                        TEMP = ALPHA*B( K, J )
+                        DO 30, I = 1, K - 1
+                           B( I, J ) = B( I, J ) + TEMP*A( I, K )
+   30                   CONTINUE
+                        IF( NOUNIT )
+     $                     TEMP = TEMP*A( K, K )
+                        B( K, J ) = TEMP
+                     END IF
+   40             CONTINUE
+   50          CONTINUE
+            ELSE
+               DO 80, J = 1, N
+                  DO 70 K = M, 1, -1
+                     IF( B( K, J ).NE.ZERO )THEN
+                        TEMP      = ALPHA*B( K, J )
+                        B( K, J ) = TEMP
+                        IF( NOUNIT )
+     $                     B( K, J ) = B( K, J )*A( K, K )
+                        DO 60, I = K + 1, M
+                           B( I, J ) = B( I, J ) + TEMP*A( I, K )
+   60                   CONTINUE
+                     END IF
+   70             CONTINUE
+   80          CONTINUE
+            END IF
+         ELSE
+*
+*           Form  B := alpha*A'*B   or   B := alpha*conjg( A' )*B.
+*
+            IF( UPPER )THEN
+               DO 120, J = 1, N
+                  DO 110, I = M, 1, -1
+                     TEMP = B( I, J )
+                     IF( NOCONJ )THEN
+                        IF( NOUNIT )
+     $                     TEMP = TEMP*A( I, I )
+                        DO 90, K = 1, I - 1
+                           TEMP = TEMP + A( K, I )*B( K, J )
+   90                   CONTINUE
+                     ELSE
+                        IF( NOUNIT )
+     $                     TEMP = TEMP*CONJG( A( I, I ) )
+                        DO 100, K = 1, I - 1
+                           TEMP = TEMP + CONJG( A( K, I ) )*B( K, J )
+  100                   CONTINUE
+                     END IF
+                     B( I, J ) = ALPHA*TEMP
+  110             CONTINUE
+  120          CONTINUE
+            ELSE
+               DO 160, J = 1, N
+                  DO 150, I = 1, M
+                     TEMP = B( I, J )
+                     IF( NOCONJ )THEN
+                        IF( NOUNIT )
+     $                     TEMP = TEMP*A( I, I )
+                        DO 130, K = I + 1, M
+                           TEMP = TEMP + A( K, I )*B( K, J )
+  130                   CONTINUE
+                     ELSE
+                        IF( NOUNIT )
+     $                     TEMP = TEMP*CONJG( A( I, I ) )
+                        DO 140, K = I + 1, M
+                           TEMP = TEMP + CONJG( A( K, I ) )*B( K, J )
+  140                   CONTINUE
+                     END IF
+                     B( I, J ) = ALPHA*TEMP
+  150             CONTINUE
+  160          CONTINUE
+            END IF
+         END IF
+      ELSE
+         IF( LSAME( TRANSA, 'N' ) )THEN
+*
+*           Form  B := alpha*B*A.
+*
+            IF( UPPER )THEN
+               DO 200, J = N, 1, -1
+                  TEMP = ALPHA
+                  IF( NOUNIT )
+     $               TEMP = TEMP*A( J, J )
+                  DO 170, I = 1, M
+                     B( I, J ) = TEMP*B( I, J )
+  170             CONTINUE
+                  DO 190, K = 1, J - 1
+                     IF( A( K, J ).NE.ZERO )THEN
+                        TEMP = ALPHA*A( K, J )
+                        DO 180, I = 1, M
+                           B( I, J ) = B( I, J ) + TEMP*B( I, K )
+  180                   CONTINUE
+                     END IF
+  190             CONTINUE
+  200          CONTINUE
+            ELSE
+               DO 240, J = 1, N
+                  TEMP = ALPHA
+                  IF( NOUNIT )
+     $               TEMP = TEMP*A( J, J )
+                  DO 210, I = 1, M
+                     B( I, J ) = TEMP*B( I, J )
+  210             CONTINUE
+                  DO 230, K = J + 1, N
+                     IF( A( K, J ).NE.ZERO )THEN
+                        TEMP = ALPHA*A( K, J )
+                        DO 220, I = 1, M
+                           B( I, J ) = B( I, J ) + TEMP*B( I, K )
+  220                   CONTINUE
+                     END IF
+  230             CONTINUE
+  240          CONTINUE
+            END IF
+         ELSE
+*
+*           Form  B := alpha*B*A'   or   B := alpha*B*conjg( A' ).
+*
+            IF( UPPER )THEN
+               DO 280, K = 1, N
+                  DO 260, J = 1, K - 1
+                     IF( A( J, K ).NE.ZERO )THEN
+                        IF( NOCONJ )THEN
+                           TEMP = ALPHA*A( J, K )
+                        ELSE
+                           TEMP = ALPHA*CONJG( A( J, K ) )
+                        END IF
+                        DO 250, I = 1, M
+                           B( I, J ) = B( I, J ) + TEMP*B( I, K )
+  250                   CONTINUE
+                     END IF
+  260             CONTINUE
+                  TEMP = ALPHA
+                  IF( NOUNIT )THEN
+                     IF( NOCONJ )THEN
+                        TEMP = TEMP*A( K, K )
+                     ELSE
+                        TEMP = TEMP*CONJG( A( K, K ) )
+                     END IF
+                  END IF
+                  IF( TEMP.NE.ONE )THEN
+                     DO 270, I = 1, M
+                        B( I, K ) = TEMP*B( I, K )
+  270                CONTINUE
+                  END IF
+  280          CONTINUE
+            ELSE
+               DO 320, K = N, 1, -1
+                  DO 300, J = K + 1, N
+                     IF( A( J, K ).NE.ZERO )THEN
+                        IF( NOCONJ )THEN
+                           TEMP = ALPHA*A( J, K )
+                        ELSE
+                           TEMP = ALPHA*CONJG( A( J, K ) )
+                        END IF
+                        DO 290, I = 1, M
+                           B( I, J ) = B( I, J ) + TEMP*B( I, K )
+  290                   CONTINUE
+                     END IF
+  300             CONTINUE
+                  TEMP = ALPHA
+                  IF( NOUNIT )THEN
+                     IF( NOCONJ )THEN
+                        TEMP = TEMP*A( K, K )
+                     ELSE
+                        TEMP = TEMP*CONJG( A( K, K ) )
+                     END IF
+                  END IF
+                  IF( TEMP.NE.ONE )THEN
+                     DO 310, I = 1, M
+                        B( I, K ) = TEMP*B( I, K )
+  310                CONTINUE
+                  END IF
+  320          CONTINUE
+            END IF
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of CTRMM .
+*
+      END
diff --git a/costa/native/external/blas/ctrmv.f b/costa/native/external/blas/ctrmv.f
new file mode 100644
index 000000000..cdf948961
--- /dev/null
+++ b/costa/native/external/blas/ctrmv.f
@@ -0,0 +1,321 @@
+      SUBROUTINE CTRMV ( UPLO, TRANS, DIAG, N, A, LDA, X, INCX )
+*     .. Scalar Arguments ..
+      INTEGER            INCX, LDA, N
+      CHARACTER*1        DIAG, TRANS, UPLO
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CTRMV  performs one of the matrix-vector operations
+*
+*     x := A*x,   or   x := A'*x,   or   x := conjg( A' )*x,
+*
+*  where x is an n element vector and  A is an n by n unit, or non-unit,
+*  upper or lower triangular matrix.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the matrix is an upper or
+*           lower triangular matrix as follows:
+*
+*              UPLO = 'U' or 'u'   A is an upper triangular matrix.
+*
+*              UPLO = 'L' or 'l'   A is a lower triangular matrix.
+*
+*           Unchanged on exit.
+*
+*  TRANS  - CHARACTER*1.
+*           On entry, TRANS specifies the operation to be performed as
+*           follows:
+*
+*              TRANS = 'N' or 'n'   x := A*x.
+*
+*              TRANS = 'T' or 't'   x := A'*x.
+*
+*              TRANS = 'C' or 'c'   x := conjg( A' )*x.
+*
+*           Unchanged on exit.
+*
+*  DIAG   - CHARACTER*1.
+*           On entry, DIAG specifies whether or not A is unit
+*           triangular as follows:
+*
+*              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
+*
+*              DIAG = 'N' or 'n'   A is not assumed to be unit
+*                                  triangular.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  A      - COMPLEX          array of DIMENSION ( LDA, n ).
+*           Before entry with  UPLO = 'U' or 'u', the leading n by n
+*           upper triangular part of the array A must contain the upper
+*           triangular matrix and the strictly lower triangular part of
+*           A is not referenced.
+*           Before entry with UPLO = 'L' or 'l', the leading n by n
+*           lower triangular part of the array A must contain the lower
+*           triangular matrix and the strictly upper triangular part of
+*           A is not referenced.
+*           Note that when  DIAG = 'U' or 'u', the diagonal elements of
+*           A are not referenced either, but are assumed to be unity.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           max( 1, n ).
+*           Unchanged on exit.
+*
+*  X      - COMPLEX          array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element vector x. On exit, X is overwritten with the
+*           tranformed vector x.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      COMPLEX            ZERO
+      PARAMETER        ( ZERO = ( 0.0E+0, 0.0E+0 ) )
+*     .. Local Scalars ..
+      COMPLEX            TEMP
+      INTEGER            I, INFO, IX, J, JX, KX
+      LOGICAL            NOCONJ, NOUNIT
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          CONJG, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO , 'U' ).AND.
+     $         .NOT.LSAME( UPLO , 'L' )      )THEN
+         INFO = 1
+      ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND.
+     $         .NOT.LSAME( TRANS, 'T' ).AND.
+     $         .NOT.LSAME( TRANS, 'C' )      )THEN
+         INFO = 2
+      ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND.
+     $         .NOT.LSAME( DIAG , 'N' )      )THEN
+         INFO = 3
+      ELSE IF( N.LT.0 )THEN
+         INFO = 4
+      ELSE IF( LDA.LT.MAX( 1, N ) )THEN
+         INFO = 6
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 8
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'CTRMV ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      NOCONJ = LSAME( TRANS, 'T' )
+      NOUNIT = LSAME( DIAG , 'N' )
+*
+*     Set up the start point in X if the increment is not unity. This
+*     will be  ( N - 1 )*INCX  too small for descending loops.
+*
+      IF( INCX.LE.0 )THEN
+         KX = 1 - ( N - 1 )*INCX
+      ELSE IF( INCX.NE.1 )THEN
+         KX = 1
+      END IF
+*
+*     Start the operations. In this version the elements of A are
+*     accessed sequentially with one pass through A.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+*
+*        Form  x := A*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            IF( INCX.EQ.1 )THEN
+               DO 20, J = 1, N
+                  IF( X( J ).NE.ZERO )THEN
+                     TEMP = X( J )
+                     DO 10, I = 1, J - 1
+                        X( I ) = X( I ) + TEMP*A( I, J )
+   10                CONTINUE
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )*A( J, J )
+                  END IF
+   20          CONTINUE
+            ELSE
+               JX = KX
+               DO 40, J = 1, N
+                  IF( X( JX ).NE.ZERO )THEN
+                     TEMP = X( JX )
+                     IX   = KX
+                     DO 30, I = 1, J - 1
+                        X( IX ) = X( IX ) + TEMP*A( I, J )
+                        IX      = IX      + INCX
+   30                CONTINUE
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )*A( J, J )
+                  END IF
+                  JX = JX + INCX
+   40          CONTINUE
+            END IF
+         ELSE
+            IF( INCX.EQ.1 )THEN
+               DO 60, J = N, 1, -1
+                  IF( X( J ).NE.ZERO )THEN
+                     TEMP = X( J )
+                     DO 50, I = N, J + 1, -1
+                        X( I ) = X( I ) + TEMP*A( I, J )
+   50                CONTINUE
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )*A( J, J )
+                  END IF
+   60          CONTINUE
+            ELSE
+               KX = KX + ( N - 1 )*INCX
+               JX = KX
+               DO 80, J = N, 1, -1
+                  IF( X( JX ).NE.ZERO )THEN
+                     TEMP = X( JX )
+                     IX   = KX
+                     DO 70, I = N, J + 1, -1
+                        X( IX ) = X( IX ) + TEMP*A( I, J )
+                        IX      = IX      - INCX
+   70                CONTINUE
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )*A( J, J )
+                  END IF
+                  JX = JX - INCX
+   80          CONTINUE
+            END IF
+         END IF
+      ELSE
+*
+*        Form  x := A'*x  or  x := conjg( A' )*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            IF( INCX.EQ.1 )THEN
+               DO 110, J = N, 1, -1
+                  TEMP = X( J )
+                  IF( NOCONJ )THEN
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*A( J, J )
+                     DO 90, I = J - 1, 1, -1
+                        TEMP = TEMP + A( I, J )*X( I )
+   90                CONTINUE
+                  ELSE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*CONJG( A( J, J ) )
+                     DO 100, I = J - 1, 1, -1
+                        TEMP = TEMP + CONJG( A( I, J ) )*X( I )
+  100                CONTINUE
+                  END IF
+                  X( J ) = TEMP
+  110          CONTINUE
+            ELSE
+               JX = KX + ( N - 1 )*INCX
+               DO 140, J = N, 1, -1
+                  TEMP = X( JX )
+                  IX   = JX
+                  IF( NOCONJ )THEN
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*A( J, J )
+                     DO 120, I = J - 1, 1, -1
+                        IX   = IX   - INCX
+                        TEMP = TEMP + A( I, J )*X( IX )
+  120                CONTINUE
+                  ELSE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*CONJG( A( J, J ) )
+                     DO 130, I = J - 1, 1, -1
+                        IX   = IX   - INCX
+                        TEMP = TEMP + CONJG( A( I, J ) )*X( IX )
+  130                CONTINUE
+                  END IF
+                  X( JX ) = TEMP
+                  JX      = JX   - INCX
+  140          CONTINUE
+            END IF
+         ELSE
+            IF( INCX.EQ.1 )THEN
+               DO 170, J = 1, N
+                  TEMP = X( J )
+                  IF( NOCONJ )THEN
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*A( J, J )
+                     DO 150, I = J + 1, N
+                        TEMP = TEMP + A( I, J )*X( I )
+  150                CONTINUE
+                  ELSE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*CONJG( A( J, J ) )
+                     DO 160, I = J + 1, N
+                        TEMP = TEMP + CONJG( A( I, J ) )*X( I )
+  160                CONTINUE
+                  END IF
+                  X( J ) = TEMP
+  170          CONTINUE
+            ELSE
+               JX = KX
+               DO 200, J = 1, N
+                  TEMP = X( JX )
+                  IX   = JX
+                  IF( NOCONJ )THEN
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*A( J, J )
+                     DO 180, I = J + 1, N
+                        IX   = IX   + INCX
+                        TEMP = TEMP + A( I, J )*X( IX )
+  180                CONTINUE
+                  ELSE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*CONJG( A( J, J ) )
+                     DO 190, I = J + 1, N
+                        IX   = IX   + INCX
+                        TEMP = TEMP + CONJG( A( I, J ) )*X( IX )
+  190                CONTINUE
+                  END IF
+                  X( JX ) = TEMP
+                  JX      = JX   + INCX
+  200          CONTINUE
+            END IF
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of CTRMV .
+*
+      END
diff --git a/costa/native/external/blas/ctrsm.f b/costa/native/external/blas/ctrsm.f
new file mode 100644
index 000000000..f9b74dccd
--- /dev/null
+++ b/costa/native/external/blas/ctrsm.f
@@ -0,0 +1,414 @@
+      SUBROUTINE CTRSM ( SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, A, LDA,
+     $                   B, LDB )
+*     .. Scalar Arguments ..
+      CHARACTER*1        SIDE, UPLO, TRANSA, DIAG
+      INTEGER            M, N, LDA, LDB
+      COMPLEX            ALPHA
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CTRSM  solves one of the matrix equations
+*
+*     op( A )*X = alpha*B,   or   X*op( A ) = alpha*B,
+*
+*  where alpha is a scalar, X and B are m by n matrices, A is a unit, or
+*  non-unit,  upper or lower triangular matrix  and  op( A )  is one  of
+*
+*     op( A ) = A   or   op( A ) = A'   or   op( A ) = conjg( A' ).
+*
+*  The matrix X is overwritten on B.
+*
+*  Parameters
+*  ==========
+*
+*  SIDE   - CHARACTER*1.
+*           On entry, SIDE specifies whether op( A ) appears on the left
+*           or right of X as follows:
+*
+*              SIDE = 'L' or 'l'   op( A )*X = alpha*B.
+*
+*              SIDE = 'R' or 'r'   X*op( A ) = alpha*B.
+*
+*           Unchanged on exit.
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the matrix A is an upper or
+*           lower triangular matrix as follows:
+*
+*              UPLO = 'U' or 'u'   A is an upper triangular matrix.
+*
+*              UPLO = 'L' or 'l'   A is a lower triangular matrix.
+*
+*           Unchanged on exit.
+*
+*  TRANSA - CHARACTER*1.
+*           On entry, TRANSA specifies the form of op( A ) to be used in
+*           the matrix multiplication as follows:
+*
+*              TRANSA = 'N' or 'n'   op( A ) = A.
+*
+*              TRANSA = 'T' or 't'   op( A ) = A'.
+*
+*              TRANSA = 'C' or 'c'   op( A ) = conjg( A' ).
+*
+*           Unchanged on exit.
+*
+*  DIAG   - CHARACTER*1.
+*           On entry, DIAG specifies whether or not A is unit triangular
+*           as follows:
+*
+*              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
+*
+*              DIAG = 'N' or 'n'   A is not assumed to be unit
+*                                  triangular.
+*
+*           Unchanged on exit.
+*
+*  M      - INTEGER.
+*           On entry, M specifies the number of rows of B. M must be at
+*           least zero.
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the number of columns of B.  N must be
+*           at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - COMPLEX         .
+*           On entry,  ALPHA specifies the scalar  alpha. When  alpha is
+*           zero then  A is not referenced and  B need not be set before
+*           entry.
+*           Unchanged on exit.
+*
+*  A      - COMPLEX          array of DIMENSION ( LDA, k ), where k is m
+*           when  SIDE = 'L' or 'l'  and is  n  when  SIDE = 'R' or 'r'.
+*           Before entry  with  UPLO = 'U' or 'u',  the  leading  k by k
+*           upper triangular part of the array  A must contain the upper
+*           triangular matrix  and the strictly lower triangular part of
+*           A is not referenced.
+*           Before entry  with  UPLO = 'L' or 'l',  the  leading  k by k
+*           lower triangular part of the array  A must contain the lower
+*           triangular matrix  and the strictly upper triangular part of
+*           A is not referenced.
+*           Note that when  DIAG = 'U' or 'u',  the diagonal elements of
+*           A  are not referenced either,  but are assumed to be  unity.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program.  When  SIDE = 'L' or 'l'  then
+*           LDA  must be at least  max( 1, m ),  when  SIDE = 'R' or 'r'
+*           then LDA must be at least max( 1, n ).
+*           Unchanged on exit.
+*
+*  B      - COMPLEX          array of DIMENSION ( LDB, n ).
+*           Before entry,  the leading  m by n part of the array  B must
+*           contain  the  right-hand  side  matrix  B,  and  on exit  is
+*           overwritten by the solution matrix  X.
+*
+*  LDB    - INTEGER.
+*           On entry, LDB specifies the first dimension of B as declared
+*           in  the  calling  (sub)  program.   LDB  must  be  at  least
+*           max( 1, m ).
+*           Unchanged on exit.
+*
+*
+*  Level 3 Blas routine.
+*
+*  -- Written on 8-February-1989.
+*     Jack Dongarra, Argonne National Laboratory.
+*     Iain Duff, AERE Harwell.
+*     Jeremy Du Croz, Numerical Algorithms Group Ltd.
+*     Sven Hammarling, Numerical Algorithms Group Ltd.
+*
+*
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          CONJG, MAX
+*     .. Local Scalars ..
+      LOGICAL            LSIDE, NOCONJ, NOUNIT, UPPER
+      INTEGER            I, INFO, J, K, NROWA
+      COMPLEX            TEMP
+*     .. Parameters ..
+      COMPLEX            ONE
+      PARAMETER        ( ONE  = ( 1.0E+0, 0.0E+0 ) )
+      COMPLEX            ZERO
+      PARAMETER        ( ZERO = ( 0.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      LSIDE  = LSAME( SIDE  , 'L' )
+      IF( LSIDE )THEN
+         NROWA = M
+      ELSE
+         NROWA = N
+      END IF
+      NOCONJ = LSAME( TRANSA, 'T' )
+      NOUNIT = LSAME( DIAG  , 'N' )
+      UPPER  = LSAME( UPLO  , 'U' )
+*
+      INFO   = 0
+      IF(      ( .NOT.LSIDE                ).AND.
+     $         ( .NOT.LSAME( SIDE  , 'R' ) )      )THEN
+         INFO = 1
+      ELSE IF( ( .NOT.UPPER                ).AND.
+     $         ( .NOT.LSAME( UPLO  , 'L' ) )      )THEN
+         INFO = 2
+      ELSE IF( ( .NOT.LSAME( TRANSA, 'N' ) ).AND.
+     $         ( .NOT.LSAME( TRANSA, 'T' ) ).AND.
+     $         ( .NOT.LSAME( TRANSA, 'C' ) )      )THEN
+         INFO = 3
+      ELSE IF( ( .NOT.LSAME( DIAG  , 'U' ) ).AND.
+     $         ( .NOT.LSAME( DIAG  , 'N' ) )      )THEN
+         INFO = 4
+      ELSE IF( M  .LT.0               )THEN
+         INFO = 5
+      ELSE IF( N  .LT.0               )THEN
+         INFO = 6
+      ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN
+         INFO = 9
+      ELSE IF( LDB.LT.MAX( 1, M     ) )THEN
+         INFO = 11
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'CTRSM ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     And when  alpha.eq.zero.
+*
+      IF( ALPHA.EQ.ZERO )THEN
+         DO 20, J = 1, N
+            DO 10, I = 1, M
+               B( I, J ) = ZERO
+   10       CONTINUE
+   20    CONTINUE
+         RETURN
+      END IF
+*
+*     Start the operations.
+*
+      IF( LSIDE )THEN
+         IF( LSAME( TRANSA, 'N' ) )THEN
+*
+*           Form  B := alpha*inv( A )*B.
+*
+            IF( UPPER )THEN
+               DO 60, J = 1, N
+                  IF( ALPHA.NE.ONE )THEN
+                     DO 30, I = 1, M
+                        B( I, J ) = ALPHA*B( I, J )
+   30                CONTINUE
+                  END IF
+                  DO 50, K = M, 1, -1
+                     IF( B( K, J ).NE.ZERO )THEN
+                        IF( NOUNIT )
+     $                     B( K, J ) = B( K, J )/A( K, K )
+                        DO 40, I = 1, K - 1
+                           B( I, J ) = B( I, J ) - B( K, J )*A( I, K )
+   40                   CONTINUE
+                     END IF
+   50             CONTINUE
+   60          CONTINUE
+            ELSE
+               DO 100, J = 1, N
+                  IF( ALPHA.NE.ONE )THEN
+                     DO 70, I = 1, M
+                        B( I, J ) = ALPHA*B( I, J )
+   70                CONTINUE
+                  END IF
+                  DO 90 K = 1, M
+                     IF( B( K, J ).NE.ZERO )THEN
+                        IF( NOUNIT )
+     $                     B( K, J ) = B( K, J )/A( K, K )
+                        DO 80, I = K + 1, M
+                           B( I, J ) = B( I, J ) - B( K, J )*A( I, K )
+   80                   CONTINUE
+                     END IF
+   90             CONTINUE
+  100          CONTINUE
+            END IF
+         ELSE
+*
+*           Form  B := alpha*inv( A' )*B
+*           or    B := alpha*inv( conjg( A' ) )*B.
+*
+            IF( UPPER )THEN
+               DO 140, J = 1, N
+                  DO 130, I = 1, M
+                     TEMP = ALPHA*B( I, J )
+                     IF( NOCONJ )THEN
+                        DO 110, K = 1, I - 1
+                           TEMP = TEMP - A( K, I )*B( K, J )
+  110                   CONTINUE
+                        IF( NOUNIT )
+     $                     TEMP = TEMP/A( I, I )
+                     ELSE
+                        DO 120, K = 1, I - 1
+                           TEMP = TEMP - CONJG( A( K, I ) )*B( K, J )
+  120                   CONTINUE
+                        IF( NOUNIT )
+     $                     TEMP = TEMP/CONJG( A( I, I ) )
+                     END IF
+                     B( I, J ) = TEMP
+  130             CONTINUE
+  140          CONTINUE
+            ELSE
+               DO 180, J = 1, N
+                  DO 170, I = M, 1, -1
+                     TEMP = ALPHA*B( I, J )
+                     IF( NOCONJ )THEN
+                        DO 150, K = I + 1, M
+                           TEMP = TEMP - A( K, I )*B( K, J )
+  150                   CONTINUE
+                        IF( NOUNIT )
+     $                     TEMP = TEMP/A( I, I )
+                     ELSE
+                        DO 160, K = I + 1, M
+                           TEMP = TEMP - CONJG( A( K, I ) )*B( K, J )
+  160                   CONTINUE
+                        IF( NOUNIT )
+     $                     TEMP = TEMP/CONJG( A( I, I ) )
+                     END IF
+                     B( I, J ) = TEMP
+  170             CONTINUE
+  180          CONTINUE
+            END IF
+         END IF
+      ELSE
+         IF( LSAME( TRANSA, 'N' ) )THEN
+*
+*           Form  B := alpha*B*inv( A ).
+*
+            IF( UPPER )THEN
+               DO 230, J = 1, N
+                  IF( ALPHA.NE.ONE )THEN
+                     DO 190, I = 1, M
+                        B( I, J ) = ALPHA*B( I, J )
+  190                CONTINUE
+                  END IF
+                  DO 210, K = 1, J - 1
+                     IF( A( K, J ).NE.ZERO )THEN
+                        DO 200, I = 1, M
+                           B( I, J ) = B( I, J ) - A( K, J )*B( I, K )
+  200                   CONTINUE
+                     END IF
+  210             CONTINUE
+                  IF( NOUNIT )THEN
+                     TEMP = ONE/A( J, J )
+                     DO 220, I = 1, M
+                        B( I, J ) = TEMP*B( I, J )
+  220                CONTINUE
+                  END IF
+  230          CONTINUE
+            ELSE
+               DO 280, J = N, 1, -1
+                  IF( ALPHA.NE.ONE )THEN
+                     DO 240, I = 1, M
+                        B( I, J ) = ALPHA*B( I, J )
+  240                CONTINUE
+                  END IF
+                  DO 260, K = J + 1, N
+                     IF( A( K, J ).NE.ZERO )THEN
+                        DO 250, I = 1, M
+                           B( I, J ) = B( I, J ) - A( K, J )*B( I, K )
+  250                   CONTINUE
+                     END IF
+  260             CONTINUE
+                  IF( NOUNIT )THEN
+                     TEMP = ONE/A( J, J )
+                     DO 270, I = 1, M
+                       B( I, J ) = TEMP*B( I, J )
+  270                CONTINUE
+                  END IF
+  280          CONTINUE
+            END IF
+         ELSE
+*
+*           Form  B := alpha*B*inv( A' )
+*           or    B := alpha*B*inv( conjg( A' ) ).
+*
+            IF( UPPER )THEN
+               DO 330, K = N, 1, -1
+                  IF( NOUNIT )THEN
+                     IF( NOCONJ )THEN
+                        TEMP = ONE/A( K, K )
+                     ELSE
+                        TEMP = ONE/CONJG( A( K, K ) )
+                     END IF
+                     DO 290, I = 1, M
+                        B( I, K ) = TEMP*B( I, K )
+  290                CONTINUE
+                  END IF
+                  DO 310, J = 1, K - 1
+                     IF( A( J, K ).NE.ZERO )THEN
+                        IF( NOCONJ )THEN
+                           TEMP = A( J, K )
+                        ELSE
+                           TEMP = CONJG( A( J, K ) )
+                        END IF
+                        DO 300, I = 1, M
+                           B( I, J ) = B( I, J ) - TEMP*B( I, K )
+  300                   CONTINUE
+                     END IF
+  310             CONTINUE
+                  IF( ALPHA.NE.ONE )THEN
+                     DO 320, I = 1, M
+                        B( I, K ) = ALPHA*B( I, K )
+  320                CONTINUE
+                  END IF
+  330          CONTINUE
+            ELSE
+               DO 380, K = 1, N
+                  IF( NOUNIT )THEN
+                     IF( NOCONJ )THEN
+                        TEMP = ONE/A( K, K )
+                     ELSE
+                        TEMP = ONE/CONJG( A( K, K ) )
+                     END IF
+                     DO 340, I = 1, M
+                        B( I, K ) = TEMP*B( I, K )
+  340                CONTINUE
+                  END IF
+                  DO 360, J = K + 1, N
+                     IF( A( J, K ).NE.ZERO )THEN
+                        IF( NOCONJ )THEN
+                           TEMP = A( J, K )
+                        ELSE
+                           TEMP = CONJG( A( J, K ) )
+                        END IF
+                        DO 350, I = 1, M
+                           B( I, J ) = B( I, J ) - TEMP*B( I, K )
+  350                   CONTINUE
+                     END IF
+  360             CONTINUE
+                  IF( ALPHA.NE.ONE )THEN
+                     DO 370, I = 1, M
+                        B( I, K ) = ALPHA*B( I, K )
+  370                CONTINUE
+                  END IF
+  380          CONTINUE
+            END IF
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of CTRSM .
+*
+      END
diff --git a/costa/native/external/blas/ctrsv.f b/costa/native/external/blas/ctrsv.f
new file mode 100644
index 000000000..a14b1abf0
--- /dev/null
+++ b/costa/native/external/blas/ctrsv.f
@@ -0,0 +1,324 @@
+      SUBROUTINE CTRSV ( UPLO, TRANS, DIAG, N, A, LDA, X, INCX )
+*     .. Scalar Arguments ..
+      INTEGER            INCX, LDA, N
+      CHARACTER*1        DIAG, TRANS, UPLO
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CTRSV  solves one of the systems of equations
+*
+*     A*x = b,   or   A'*x = b,   or   conjg( A' )*x = b,
+*
+*  where b and x are n element vectors and A is an n by n unit, or
+*  non-unit, upper or lower triangular matrix.
+*
+*  No test for singularity or near-singularity is included in this
+*  routine. Such tests must be performed before calling this routine.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the matrix is an upper or
+*           lower triangular matrix as follows:
+*
+*              UPLO = 'U' or 'u'   A is an upper triangular matrix.
+*
+*              UPLO = 'L' or 'l'   A is a lower triangular matrix.
+*
+*           Unchanged on exit.
+*
+*  TRANS  - CHARACTER*1.
+*           On entry, TRANS specifies the equations to be solved as
+*           follows:
+*
+*              TRANS = 'N' or 'n'   A*x = b.
+*
+*              TRANS = 'T' or 't'   A'*x = b.
+*
+*              TRANS = 'C' or 'c'   conjg( A' )*x = b.
+*
+*           Unchanged on exit.
+*
+*  DIAG   - CHARACTER*1.
+*           On entry, DIAG specifies whether or not A is unit
+*           triangular as follows:
+*
+*              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
+*
+*              DIAG = 'N' or 'n'   A is not assumed to be unit
+*                                  triangular.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  A      - COMPLEX          array of DIMENSION ( LDA, n ).
+*           Before entry with  UPLO = 'U' or 'u', the leading n by n
+*           upper triangular part of the array A must contain the upper
+*           triangular matrix and the strictly lower triangular part of
+*           A is not referenced.
+*           Before entry with UPLO = 'L' or 'l', the leading n by n
+*           lower triangular part of the array A must contain the lower
+*           triangular matrix and the strictly upper triangular part of
+*           A is not referenced.
+*           Note that when  DIAG = 'U' or 'u', the diagonal elements of
+*           A are not referenced either, but are assumed to be unity.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           max( 1, n ).
+*           Unchanged on exit.
+*
+*  X      - COMPLEX          array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element right-hand side vector b. On exit, X is overwritten
+*           with the solution vector x.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      COMPLEX            ZERO
+      PARAMETER        ( ZERO = ( 0.0E+0, 0.0E+0 ) )
+*     .. Local Scalars ..
+      COMPLEX            TEMP
+      INTEGER            I, INFO, IX, J, JX, KX
+      LOGICAL            NOCONJ, NOUNIT
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          CONJG, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO , 'U' ).AND.
+     $         .NOT.LSAME( UPLO , 'L' )      )THEN
+         INFO = 1
+      ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND.
+     $         .NOT.LSAME( TRANS, 'T' ).AND.
+     $         .NOT.LSAME( TRANS, 'C' )      )THEN
+         INFO = 2
+      ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND.
+     $         .NOT.LSAME( DIAG , 'N' )      )THEN
+         INFO = 3
+      ELSE IF( N.LT.0 )THEN
+         INFO = 4
+      ELSE IF( LDA.LT.MAX( 1, N ) )THEN
+         INFO = 6
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 8
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'CTRSV ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      NOCONJ = LSAME( TRANS, 'T' )
+      NOUNIT = LSAME( DIAG , 'N' )
+*
+*     Set up the start point in X if the increment is not unity. This
+*     will be  ( N - 1 )*INCX  too small for descending loops.
+*
+      IF( INCX.LE.0 )THEN
+         KX = 1 - ( N - 1 )*INCX
+      ELSE IF( INCX.NE.1 )THEN
+         KX = 1
+      END IF
+*
+*     Start the operations. In this version the elements of A are
+*     accessed sequentially with one pass through A.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+*
+*        Form  x := inv( A )*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            IF( INCX.EQ.1 )THEN
+               DO 20, J = N, 1, -1
+                  IF( X( J ).NE.ZERO )THEN
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )/A( J, J )
+                     TEMP = X( J )
+                     DO 10, I = J - 1, 1, -1
+                        X( I ) = X( I ) - TEMP*A( I, J )
+   10                CONTINUE
+                  END IF
+   20          CONTINUE
+            ELSE
+               JX = KX + ( N - 1 )*INCX
+               DO 40, J = N, 1, -1
+                  IF( X( JX ).NE.ZERO )THEN
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )/A( J, J )
+                     TEMP = X( JX )
+                     IX   = JX
+                     DO 30, I = J - 1, 1, -1
+                        IX      = IX      - INCX
+                        X( IX ) = X( IX ) - TEMP*A( I, J )
+   30                CONTINUE
+                  END IF
+                  JX = JX - INCX
+   40          CONTINUE
+            END IF
+         ELSE
+            IF( INCX.EQ.1 )THEN
+               DO 60, J = 1, N
+                  IF( X( J ).NE.ZERO )THEN
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )/A( J, J )
+                     TEMP = X( J )
+                     DO 50, I = J + 1, N
+                        X( I ) = X( I ) - TEMP*A( I, J )
+   50                CONTINUE
+                  END IF
+   60          CONTINUE
+            ELSE
+               JX = KX
+               DO 80, J = 1, N
+                  IF( X( JX ).NE.ZERO )THEN
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )/A( J, J )
+                     TEMP = X( JX )
+                     IX   = JX
+                     DO 70, I = J + 1, N
+                        IX      = IX      + INCX
+                        X( IX ) = X( IX ) - TEMP*A( I, J )
+   70                CONTINUE
+                  END IF
+                  JX = JX + INCX
+   80          CONTINUE
+            END IF
+         END IF
+      ELSE
+*
+*        Form  x := inv( A' )*x  or  x := inv( conjg( A' ) )*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            IF( INCX.EQ.1 )THEN
+               DO 110, J = 1, N
+                  TEMP = X( J )
+                  IF( NOCONJ )THEN
+                     DO 90, I = 1, J - 1
+                        TEMP = TEMP - A( I, J )*X( I )
+   90                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/A( J, J )
+                  ELSE
+                     DO 100, I = 1, J - 1
+                        TEMP = TEMP - CONJG( A( I, J ) )*X( I )
+  100                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/CONJG( A( J, J ) )
+                  END IF
+                  X( J ) = TEMP
+  110          CONTINUE
+            ELSE
+               JX = KX
+               DO 140, J = 1, N
+                  IX   = KX
+                  TEMP = X( JX )
+                  IF( NOCONJ )THEN
+                     DO 120, I = 1, J - 1
+                        TEMP = TEMP - A( I, J )*X( IX )
+                        IX   = IX   + INCX
+  120                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/A( J, J )
+                  ELSE
+                     DO 130, I = 1, J - 1
+                        TEMP = TEMP - CONJG( A( I, J ) )*X( IX )
+                        IX   = IX   + INCX
+  130                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/CONJG( A( J, J ) )
+                  END IF
+                  X( JX ) = TEMP
+                  JX      = JX   + INCX
+  140          CONTINUE
+            END IF
+         ELSE
+            IF( INCX.EQ.1 )THEN
+               DO 170, J = N, 1, -1
+                  TEMP = X( J )
+                  IF( NOCONJ )THEN
+                     DO 150, I = N, J + 1, -1
+                        TEMP = TEMP - A( I, J )*X( I )
+  150                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/A( J, J )
+                  ELSE
+                     DO 160, I = N, J + 1, -1
+                        TEMP = TEMP - CONJG( A( I, J ) )*X( I )
+  160                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/CONJG( A( J, J ) )
+                  END IF
+                  X( J ) = TEMP
+  170          CONTINUE
+            ELSE
+               KX = KX + ( N - 1 )*INCX
+               JX = KX
+               DO 200, J = N, 1, -1
+                  IX   = KX
+                  TEMP = X( JX )
+                  IF( NOCONJ )THEN
+                     DO 180, I = N, J + 1, -1
+                        TEMP = TEMP - A( I, J )*X( IX )
+                        IX   = IX   - INCX
+  180                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/A( J, J )
+                  ELSE
+                     DO 190, I = N, J + 1, -1
+                        TEMP = TEMP - CONJG( A( I, J ) )*X( IX )
+                        IX   = IX   - INCX
+  190                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/CONJG( A( J, J ) )
+                  END IF
+                  X( JX ) = TEMP
+                  JX      = JX   - INCX
+  200          CONTINUE
+            END IF
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of CTRSV .
+*
+      END
diff --git a/costa/native/external/blas/dasum.f b/costa/native/external/blas/dasum.f
new file mode 100644
index 000000000..28b128a84
--- /dev/null
+++ b/costa/native/external/blas/dasum.f
@@ -0,0 +1,43 @@
+      double precision function dasum(n,dx,incx)
+c
+c     takes the sum of the absolute values.
+c     jack dongarra, linpack, 3/11/78.
+c     modified 3/93 to return if incx .le. 0.
+c     modified 12/3/93, array(1) declarations changed to array(*)
+c
+      double precision dx(*),dtemp
+      integer i,incx,m,mp1,n,nincx
+c
+      dasum = 0.0d0
+      dtemp = 0.0d0
+      if( n.le.0 .or. incx.le.0 )return
+      if(incx.eq.1)go to 20
+c
+c        code for increment not equal to 1
+c
+      nincx = n*incx
+      do 10 i = 1,nincx,incx
+        dtemp = dtemp + dabs(dx(i))
+   10 continue
+      dasum = dtemp
+      return
+c
+c        code for increment equal to 1
+c
+c
+c        clean-up loop
+c
+   20 m = mod(n,6)
+      if( m .eq. 0 ) go to 40
+      do 30 i = 1,m
+        dtemp = dtemp + dabs(dx(i))
+   30 continue
+      if( n .lt. 6 ) go to 60
+   40 mp1 = m + 1
+      do 50 i = mp1,n,6
+        dtemp = dtemp + dabs(dx(i)) + dabs(dx(i + 1)) + dabs(dx(i + 2))
+     *  + dabs(dx(i + 3)) + dabs(dx(i + 4)) + dabs(dx(i + 5))
+   50 continue
+   60 dasum = dtemp
+      return
+      end
diff --git a/costa/native/external/blas/daxpy.f b/costa/native/external/blas/daxpy.f
new file mode 100644
index 000000000..91daa3c64
--- /dev/null
+++ b/costa/native/external/blas/daxpy.f
@@ -0,0 +1,48 @@
+      subroutine daxpy(n,da,dx,incx,dy,incy)
+c
+c     constant times a vector plus a vector.
+c     uses unrolled loops for increments equal to one.
+c     jack dongarra, linpack, 3/11/78.
+c     modified 12/3/93, array(1) declarations changed to array(*)
+c
+      double precision dx(*),dy(*),da
+      integer i,incx,incy,ix,iy,m,mp1,n
+c
+      if(n.le.0)return
+      if (da .eq. 0.0d0) return
+      if(incx.eq.1.and.incy.eq.1)go to 20
+c
+c        code for unequal increments or equal increments
+c          not equal to 1
+c
+      ix = 1
+      iy = 1
+      if(incx.lt.0)ix = (-n+1)*incx + 1
+      if(incy.lt.0)iy = (-n+1)*incy + 1
+      do 10 i = 1,n
+        dy(iy) = dy(iy) + da*dx(ix)
+        ix = ix + incx
+        iy = iy + incy
+   10 continue
+      return
+c
+c        code for both increments equal to 1
+c
+c
+c        clean-up loop
+c
+   20 m = mod(n,4)
+      if( m .eq. 0 ) go to 40
+      do 30 i = 1,m
+        dy(i) = dy(i) + da*dx(i)
+   30 continue
+      if( n .lt. 4 ) return
+   40 mp1 = m + 1
+      do 50 i = mp1,n,4
+        dy(i) = dy(i) + da*dx(i)
+        dy(i + 1) = dy(i + 1) + da*dx(i + 1)
+        dy(i + 2) = dy(i + 2) + da*dx(i + 2)
+        dy(i + 3) = dy(i + 3) + da*dx(i + 3)
+   50 continue
+      return
+      end
diff --git a/costa/native/external/blas/dcabs1.f b/costa/native/external/blas/dcabs1.f
new file mode 100644
index 000000000..385ea5e1a
--- /dev/null
+++ b/costa/native/external/blas/dcabs1.f
@@ -0,0 +1,8 @@
+      double precision function dcabs1(z)
+      double complex z,zz
+      double precision t(2)
+      equivalence (zz,t(1))
+      zz = z
+      dcabs1 = dabs(t(1)) + dabs(t(2))
+      return
+      end
diff --git a/costa/native/external/blas/dcopy.f b/costa/native/external/blas/dcopy.f
new file mode 100644
index 000000000..e16892716
--- /dev/null
+++ b/costa/native/external/blas/dcopy.f
@@ -0,0 +1,50 @@
+      subroutine  dcopy(n,dx,incx,dy,incy)
+c
+c     copies a vector, x, to a vector, y.
+c     uses unrolled loops for increments equal to one.
+c     jack dongarra, linpack, 3/11/78.
+c     modified 12/3/93, array(1) declarations changed to array(*)
+c
+      double precision dx(*),dy(*)
+      integer i,incx,incy,ix,iy,m,mp1,n
+c
+      if(n.le.0)return
+      if(incx.eq.1.and.incy.eq.1)go to 20
+c
+c        code for unequal increments or equal increments
+c          not equal to 1
+c
+      ix = 1
+      iy = 1
+      if(incx.lt.0)ix = (-n+1)*incx + 1
+      if(incy.lt.0)iy = (-n+1)*incy + 1
+      do 10 i = 1,n
+        dy(iy) = dx(ix)
+        ix = ix + incx
+        iy = iy + incy
+   10 continue
+      return
+c
+c        code for both increments equal to 1
+c
+c
+c        clean-up loop
+c
+   20 m = mod(n,7)
+      if( m .eq. 0 ) go to 40
+      do 30 i = 1,m
+        dy(i) = dx(i)
+   30 continue
+      if( n .lt. 7 ) return
+   40 mp1 = m + 1
+      do 50 i = mp1,n,7
+        dy(i) = dx(i)
+        dy(i + 1) = dx(i + 1)
+        dy(i + 2) = dx(i + 2)
+        dy(i + 3) = dx(i + 3)
+        dy(i + 4) = dx(i + 4)
+        dy(i + 5) = dx(i + 5)
+        dy(i + 6) = dx(i + 6)
+   50 continue
+      return
+      end
diff --git a/costa/native/external/blas/ddot.f b/costa/native/external/blas/ddot.f
new file mode 100644
index 000000000..e04c7c25e
--- /dev/null
+++ b/costa/native/external/blas/ddot.f
@@ -0,0 +1,49 @@
+      double precision function ddot(n,dx,incx,dy,incy)
+c
+c     forms the dot product of two vectors.
+c     uses unrolled loops for increments equal to one.
+c     jack dongarra, linpack, 3/11/78.
+c     modified 12/3/93, array(1) declarations changed to array(*)
+c
+      double precision dx(*),dy(*),dtemp
+      integer i,incx,incy,ix,iy,m,mp1,n
+c
+      ddot = 0.0d0
+      dtemp = 0.0d0
+      if(n.le.0)return
+      if(incx.eq.1.and.incy.eq.1)go to 20
+c
+c        code for unequal increments or equal increments
+c          not equal to 1
+c
+      ix = 1
+      iy = 1
+      if(incx.lt.0)ix = (-n+1)*incx + 1
+      if(incy.lt.0)iy = (-n+1)*incy + 1
+      do 10 i = 1,n
+        dtemp = dtemp + dx(ix)*dy(iy)
+        ix = ix + incx
+        iy = iy + incy
+   10 continue
+      ddot = dtemp
+      return
+c
+c        code for both increments equal to 1
+c
+c
+c        clean-up loop
+c
+   20 m = mod(n,5)
+      if( m .eq. 0 ) go to 40
+      do 30 i = 1,m
+        dtemp = dtemp + dx(i)*dy(i)
+   30 continue
+      if( n .lt. 5 ) go to 60
+   40 mp1 = m + 1
+      do 50 i = mp1,n,5
+        dtemp = dtemp + dx(i)*dy(i) + dx(i + 1)*dy(i + 1) +
+     *   dx(i + 2)*dy(i + 2) + dx(i + 3)*dy(i + 3) + dx(i + 4)*dy(i + 4)
+   50 continue
+   60 ddot = dtemp
+      return
+      end
diff --git a/costa/native/external/blas/dgbmv.f b/costa/native/external/blas/dgbmv.f
new file mode 100644
index 000000000..e9c8f76fb
--- /dev/null
+++ b/costa/native/external/blas/dgbmv.f
@@ -0,0 +1,300 @@
+      SUBROUTINE DGBMV ( TRANS, M, N, KL, KU, ALPHA, A, LDA, X, INCX,
+     $                   BETA, Y, INCY )
+*     .. Scalar Arguments ..
+      DOUBLE PRECISION   ALPHA, BETA
+      INTEGER            INCX, INCY, KL, KU, LDA, M, N
+      CHARACTER*1        TRANS
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), X( * ), Y( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DGBMV  performs one of the matrix-vector operations
+*
+*     y := alpha*A*x + beta*y,   or   y := alpha*A'*x + beta*y,
+*
+*  where alpha and beta are scalars, x and y are vectors and A is an
+*  m by n band matrix, with kl sub-diagonals and ku super-diagonals.
+*
+*  Parameters
+*  ==========
+*
+*  TRANS  - CHARACTER*1.
+*           On entry, TRANS specifies the operation to be performed as
+*           follows:
+*
+*              TRANS = 'N' or 'n'   y := alpha*A*x + beta*y.
+*
+*              TRANS = 'T' or 't'   y := alpha*A'*x + beta*y.
+*
+*              TRANS = 'C' or 'c'   y := alpha*A'*x + beta*y.
+*
+*           Unchanged on exit.
+*
+*  M      - INTEGER.
+*           On entry, M specifies the number of rows of the matrix A.
+*           M must be at least zero.
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the number of columns of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  KL     - INTEGER.
+*           On entry, KL specifies the number of sub-diagonals of the
+*           matrix A. KL must satisfy  0 .le. KL.
+*           Unchanged on exit.
+*
+*  KU     - INTEGER.
+*           On entry, KU specifies the number of super-diagonals of the
+*           matrix A. KU must satisfy  0 .le. KU.
+*           Unchanged on exit.
+*
+*  ALPHA  - DOUBLE PRECISION.
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  A      - DOUBLE PRECISION array of DIMENSION ( LDA, n ).
+*           Before entry, the leading ( kl + ku + 1 ) by n part of the
+*           array A must contain the matrix of coefficients, supplied
+*           column by column, with the leading diagonal of the matrix in
+*           row ( ku + 1 ) of the array, the first super-diagonal
+*           starting at position 2 in row ku, the first sub-diagonal
+*           starting at position 1 in row ( ku + 2 ), and so on.
+*           Elements in the array A that do not correspond to elements
+*           in the band matrix (such as the top left ku by ku triangle)
+*           are not referenced.
+*           The following program segment will transfer a band matrix
+*           from conventional full matrix storage to band storage:
+*
+*                 DO 20, J = 1, N
+*                    K = KU + 1 - J
+*                    DO 10, I = MAX( 1, J - KU ), MIN( M, J + KL )
+*                       A( K + I, J ) = matrix( I, J )
+*              10    CONTINUE
+*              20 CONTINUE
+*
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           ( kl + ku + 1 ).
+*           Unchanged on exit.
+*
+*  X      - DOUBLE PRECISION array of DIMENSION at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'
+*           and at least
+*           ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.
+*           Before entry, the incremented array X must contain the
+*           vector x.
+*           Unchanged on exit.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*  BETA   - DOUBLE PRECISION.
+*           On entry, BETA specifies the scalar beta. When BETA is
+*           supplied as zero then Y need not be set on input.
+*           Unchanged on exit.
+*
+*  Y      - DOUBLE PRECISION array of DIMENSION at least
+*           ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'
+*           and at least
+*           ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.
+*           Before entry, the incremented array Y must contain the
+*           vector y. On exit, Y is overwritten by the updated vector y.
+*
+*  INCY   - INTEGER.
+*           On entry, INCY specifies the increment for the elements of
+*           Y. INCY must not be zero.
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE         , ZERO
+      PARAMETER        ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     .. Local Scalars ..
+      DOUBLE PRECISION   TEMP
+      INTEGER            I, INFO, IX, IY, J, JX, JY, K, KUP1, KX, KY,
+     $                   LENX, LENY
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( TRANS, 'N' ).AND.
+     $         .NOT.LSAME( TRANS, 'T' ).AND.
+     $         .NOT.LSAME( TRANS, 'C' )      )THEN
+         INFO = 1
+      ELSE IF( M.LT.0 )THEN
+         INFO = 2
+      ELSE IF( N.LT.0 )THEN
+         INFO = 3
+      ELSE IF( KL.LT.0 )THEN
+         INFO = 4
+      ELSE IF( KU.LT.0 )THEN
+         INFO = 5
+      ELSE IF( LDA.LT.( KL + KU + 1 ) )THEN
+         INFO = 8
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 10
+      ELSE IF( INCY.EQ.0 )THEN
+         INFO = 13
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'DGBMV ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR.
+     $    ( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) )
+     $   RETURN
+*
+*     Set  LENX  and  LENY, the lengths of the vectors x and y, and set
+*     up the start points in  X  and  Y.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+         LENX = N
+         LENY = M
+      ELSE
+         LENX = M
+         LENY = N
+      END IF
+      IF( INCX.GT.0 )THEN
+         KX = 1
+      ELSE
+         KX = 1 - ( LENX - 1 )*INCX
+      END IF
+      IF( INCY.GT.0 )THEN
+         KY = 1
+      ELSE
+         KY = 1 - ( LENY - 1 )*INCY
+      END IF
+*
+*     Start the operations. In this version the elements of A are
+*     accessed sequentially with one pass through the band part of A.
+*
+*     First form  y := beta*y.
+*
+      IF( BETA.NE.ONE )THEN
+         IF( INCY.EQ.1 )THEN
+            IF( BETA.EQ.ZERO )THEN
+               DO 10, I = 1, LENY
+                  Y( I ) = ZERO
+   10          CONTINUE
+            ELSE
+               DO 20, I = 1, LENY
+                  Y( I ) = BETA*Y( I )
+   20          CONTINUE
+            END IF
+         ELSE
+            IY = KY
+            IF( BETA.EQ.ZERO )THEN
+               DO 30, I = 1, LENY
+                  Y( IY ) = ZERO
+                  IY      = IY   + INCY
+   30          CONTINUE
+            ELSE
+               DO 40, I = 1, LENY
+                  Y( IY ) = BETA*Y( IY )
+                  IY      = IY           + INCY
+   40          CONTINUE
+            END IF
+         END IF
+      END IF
+      IF( ALPHA.EQ.ZERO )
+     $   RETURN
+      KUP1 = KU + 1
+      IF( LSAME( TRANS, 'N' ) )THEN
+*
+*        Form  y := alpha*A*x + y.
+*
+         JX = KX
+         IF( INCY.EQ.1 )THEN
+            DO 60, J = 1, N
+               IF( X( JX ).NE.ZERO )THEN
+                  TEMP = ALPHA*X( JX )
+                  K    = KUP1 - J
+                  DO 50, I = MAX( 1, J - KU ), MIN( M, J + KL )
+                     Y( I ) = Y( I ) + TEMP*A( K + I, J )
+   50             CONTINUE
+               END IF
+               JX = JX + INCX
+   60       CONTINUE
+         ELSE
+            DO 80, J = 1, N
+               IF( X( JX ).NE.ZERO )THEN
+                  TEMP = ALPHA*X( JX )
+                  IY   = KY
+                  K    = KUP1 - J
+                  DO 70, I = MAX( 1, J - KU ), MIN( M, J + KL )
+                     Y( IY ) = Y( IY ) + TEMP*A( K + I, J )
+                     IY      = IY      + INCY
+   70             CONTINUE
+               END IF
+               JX = JX + INCX
+               IF( J.GT.KU )
+     $            KY = KY + INCY
+   80       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  y := alpha*A'*x + y.
+*
+         JY = KY
+         IF( INCX.EQ.1 )THEN
+            DO 100, J = 1, N
+               TEMP = ZERO
+               K    = KUP1 - J
+               DO 90, I = MAX( 1, J - KU ), MIN( M, J + KL )
+                  TEMP = TEMP + A( K + I, J )*X( I )
+   90          CONTINUE
+               Y( JY ) = Y( JY ) + ALPHA*TEMP
+               JY      = JY      + INCY
+  100       CONTINUE
+         ELSE
+            DO 120, J = 1, N
+               TEMP = ZERO
+               IX   = KX
+               K    = KUP1 - J
+               DO 110, I = MAX( 1, J - KU ), MIN( M, J + KL )
+                  TEMP = TEMP + A( K + I, J )*X( IX )
+                  IX   = IX   + INCX
+  110          CONTINUE
+               Y( JY ) = Y( JY ) + ALPHA*TEMP
+               JY      = JY      + INCY
+               IF( J.GT.KU )
+     $            KX = KX + INCX
+  120       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of DGBMV .
+*
+      END
diff --git a/costa/native/external/blas/dgemm.f b/costa/native/external/blas/dgemm.f
new file mode 100644
index 000000000..baabe4c52
--- /dev/null
+++ b/costa/native/external/blas/dgemm.f
@@ -0,0 +1,313 @@
+      SUBROUTINE DGEMM ( TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB,
+     $                   BETA, C, LDC )
+*     .. Scalar Arguments ..
+      CHARACTER*1        TRANSA, TRANSB
+      INTEGER            M, N, K, LDA, LDB, LDC
+      DOUBLE PRECISION   ALPHA, BETA
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), B( LDB, * ), C( LDC, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DGEMM  performs one of the matrix-matrix operations
+*
+*     C := alpha*op( A )*op( B ) + beta*C,
+*
+*  where  op( X ) is one of
+*
+*     op( X ) = X   or   op( X ) = X',
+*
+*  alpha and beta are scalars, and A, B and C are matrices, with op( A )
+*  an m by k matrix,  op( B )  a  k by n matrix and  C an m by n matrix.
+*
+*  Parameters
+*  ==========
+*
+*  TRANSA - CHARACTER*1.
+*           On entry, TRANSA specifies the form of op( A ) to be used in
+*           the matrix multiplication as follows:
+*
+*              TRANSA = 'N' or 'n',  op( A ) = A.
+*
+*              TRANSA = 'T' or 't',  op( A ) = A'.
+*
+*              TRANSA = 'C' or 'c',  op( A ) = A'.
+*
+*           Unchanged on exit.
+*
+*  TRANSB - CHARACTER*1.
+*           On entry, TRANSB specifies the form of op( B ) to be used in
+*           the matrix multiplication as follows:
+*
+*              TRANSB = 'N' or 'n',  op( B ) = B.
+*
+*              TRANSB = 'T' or 't',  op( B ) = B'.
+*
+*              TRANSB = 'C' or 'c',  op( B ) = B'.
+*
+*           Unchanged on exit.
+*
+*  M      - INTEGER.
+*           On entry,  M  specifies  the number  of rows  of the  matrix
+*           op( A )  and of the  matrix  C.  M  must  be at least  zero.
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry,  N  specifies the number  of columns of the matrix
+*           op( B ) and the number of columns of the matrix C. N must be
+*           at least zero.
+*           Unchanged on exit.
+*
+*  K      - INTEGER.
+*           On entry,  K  specifies  the number of columns of the matrix
+*           op( A ) and the number of rows of the matrix op( B ). K must
+*           be at least  zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - DOUBLE PRECISION.
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  A      - DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is
+*           k  when  TRANSA = 'N' or 'n',  and is  m  otherwise.
+*           Before entry with  TRANSA = 'N' or 'n',  the leading  m by k
+*           part of the array  A  must contain the matrix  A,  otherwise
+*           the leading  k by m  part of the array  A  must contain  the
+*           matrix A.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. When  TRANSA = 'N' or 'n' then
+*           LDA must be at least  max( 1, m ), otherwise  LDA must be at
+*           least  max( 1, k ).
+*           Unchanged on exit.
+*
+*  B      - DOUBLE PRECISION array of DIMENSION ( LDB, kb ), where kb is
+*           n  when  TRANSB = 'N' or 'n',  and is  k  otherwise.
+*           Before entry with  TRANSB = 'N' or 'n',  the leading  k by n
+*           part of the array  B  must contain the matrix  B,  otherwise
+*           the leading  n by k  part of the array  B  must contain  the
+*           matrix B.
+*           Unchanged on exit.
+*
+*  LDB    - INTEGER.
+*           On entry, LDB specifies the first dimension of B as declared
+*           in the calling (sub) program. When  TRANSB = 'N' or 'n' then
+*           LDB must be at least  max( 1, k ), otherwise  LDB must be at
+*           least  max( 1, n ).
+*           Unchanged on exit.
+*
+*  BETA   - DOUBLE PRECISION.
+*           On entry,  BETA  specifies the scalar  beta.  When  BETA  is
+*           supplied as zero then C need not be set on input.
+*           Unchanged on exit.
+*
+*  C      - DOUBLE PRECISION array of DIMENSION ( LDC, n ).
+*           Before entry, the leading  m by n  part of the array  C must
+*           contain the matrix  C,  except when  beta  is zero, in which
+*           case C need not be set on entry.
+*           On exit, the array  C  is overwritten by the  m by n  matrix
+*           ( alpha*op( A )*op( B ) + beta*C ).
+*
+*  LDC    - INTEGER.
+*           On entry, LDC specifies the first dimension of C as declared
+*           in  the  calling  (sub)  program.   LDC  must  be  at  least
+*           max( 1, m ).
+*           Unchanged on exit.
+*
+*
+*  Level 3 Blas routine.
+*
+*  -- Written on 8-February-1989.
+*     Jack Dongarra, Argonne National Laboratory.
+*     Iain Duff, AERE Harwell.
+*     Jeremy Du Croz, Numerical Algorithms Group Ltd.
+*     Sven Hammarling, Numerical Algorithms Group Ltd.
+*
+*
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     .. Local Scalars ..
+      LOGICAL            NOTA, NOTB
+      INTEGER            I, INFO, J, L, NCOLA, NROWA, NROWB
+      DOUBLE PRECISION   TEMP
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE         , ZERO
+      PARAMETER        ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Executable Statements ..
+*
+*     Set  NOTA  and  NOTB  as  true if  A  and  B  respectively are not
+*     transposed and set  NROWA, NCOLA and  NROWB  as the number of rows
+*     and  columns of  A  and the  number of  rows  of  B  respectively.
+*
+      NOTA  = LSAME( TRANSA, 'N' )
+      NOTB  = LSAME( TRANSB, 'N' )
+      IF( NOTA )THEN
+         NROWA = M
+         NCOLA = K
+      ELSE
+         NROWA = K
+         NCOLA = M
+      END IF
+      IF( NOTB )THEN
+         NROWB = K
+      ELSE
+         NROWB = N
+      END IF
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF(      ( .NOT.NOTA                 ).AND.
+     $         ( .NOT.LSAME( TRANSA, 'C' ) ).AND.
+     $         ( .NOT.LSAME( TRANSA, 'T' ) )      )THEN
+         INFO = 1
+      ELSE IF( ( .NOT.NOTB                 ).AND.
+     $         ( .NOT.LSAME( TRANSB, 'C' ) ).AND.
+     $         ( .NOT.LSAME( TRANSB, 'T' ) )      )THEN
+         INFO = 2
+      ELSE IF( M  .LT.0               )THEN
+         INFO = 3
+      ELSE IF( N  .LT.0               )THEN
+         INFO = 4
+      ELSE IF( K  .LT.0               )THEN
+         INFO = 5
+      ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN
+         INFO = 8
+      ELSE IF( LDB.LT.MAX( 1, NROWB ) )THEN
+         INFO = 10
+      ELSE IF( LDC.LT.MAX( 1, M     ) )THEN
+         INFO = 13
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'DGEMM ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR.
+     $    ( ( ( ALPHA.EQ.ZERO ).OR.( K.EQ.0 ) ).AND.( BETA.EQ.ONE ) ) )
+     $   RETURN
+*
+*     And if  alpha.eq.zero.
+*
+      IF( ALPHA.EQ.ZERO )THEN
+         IF( BETA.EQ.ZERO )THEN
+            DO 20, J = 1, N
+               DO 10, I = 1, M
+                  C( I, J ) = ZERO
+   10          CONTINUE
+   20       CONTINUE
+         ELSE
+            DO 40, J = 1, N
+               DO 30, I = 1, M
+                  C( I, J ) = BETA*C( I, J )
+   30          CONTINUE
+   40       CONTINUE
+         END IF
+         RETURN
+      END IF
+*
+*     Start the operations.
+*
+      IF( NOTB )THEN
+         IF( NOTA )THEN
+*
+*           Form  C := alpha*A*B + beta*C.
+*
+            DO 90, J = 1, N
+               IF( BETA.EQ.ZERO )THEN
+                  DO 50, I = 1, M
+                     C( I, J ) = ZERO
+   50             CONTINUE
+               ELSE IF( BETA.NE.ONE )THEN
+                  DO 60, I = 1, M
+                     C( I, J ) = BETA*C( I, J )
+   60             CONTINUE
+               END IF
+               DO 80, L = 1, K
+                  IF( B( L, J ).NE.ZERO )THEN
+                     TEMP = ALPHA*B( L, J )
+                     DO 70, I = 1, M
+                        C( I, J ) = C( I, J ) + TEMP*A( I, L )
+   70                CONTINUE
+                  END IF
+   80          CONTINUE
+   90       CONTINUE
+         ELSE
+*
+*           Form  C := alpha*A'*B + beta*C
+*
+            DO 120, J = 1, N
+               DO 110, I = 1, M
+                  TEMP = ZERO
+                  DO 100, L = 1, K
+                     TEMP = TEMP + A( L, I )*B( L, J )
+  100             CONTINUE
+                  IF( BETA.EQ.ZERO )THEN
+                     C( I, J ) = ALPHA*TEMP
+                  ELSE
+                     C( I, J ) = ALPHA*TEMP + BETA*C( I, J )
+                  END IF
+  110          CONTINUE
+  120       CONTINUE
+         END IF
+      ELSE
+         IF( NOTA )THEN
+*
+*           Form  C := alpha*A*B' + beta*C
+*
+            DO 170, J = 1, N
+               IF( BETA.EQ.ZERO )THEN
+                  DO 130, I = 1, M
+                     C( I, J ) = ZERO
+  130             CONTINUE
+               ELSE IF( BETA.NE.ONE )THEN
+                  DO 140, I = 1, M
+                     C( I, J ) = BETA*C( I, J )
+  140             CONTINUE
+               END IF
+               DO 160, L = 1, K
+                  IF( B( J, L ).NE.ZERO )THEN
+                     TEMP = ALPHA*B( J, L )
+                     DO 150, I = 1, M
+                        C( I, J ) = C( I, J ) + TEMP*A( I, L )
+  150                CONTINUE
+                  END IF
+  160          CONTINUE
+  170       CONTINUE
+         ELSE
+*
+*           Form  C := alpha*A'*B' + beta*C
+*
+            DO 200, J = 1, N
+               DO 190, I = 1, M
+                  TEMP = ZERO
+                  DO 180, L = 1, K
+                     TEMP = TEMP + A( L, I )*B( J, L )
+  180             CONTINUE
+                  IF( BETA.EQ.ZERO )THEN
+                     C( I, J ) = ALPHA*TEMP
+                  ELSE
+                     C( I, J ) = ALPHA*TEMP + BETA*C( I, J )
+                  END IF
+  190          CONTINUE
+  200       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of DGEMM .
+*
+      END
diff --git a/costa/native/external/blas/dgemv.f b/costa/native/external/blas/dgemv.f
new file mode 100644
index 000000000..8ef80b3a5
--- /dev/null
+++ b/costa/native/external/blas/dgemv.f
@@ -0,0 +1,261 @@
+      SUBROUTINE DGEMV ( TRANS, M, N, ALPHA, A, LDA, X, INCX,
+     $                   BETA, Y, INCY )
+*     .. Scalar Arguments ..
+      DOUBLE PRECISION   ALPHA, BETA
+      INTEGER            INCX, INCY, LDA, M, N
+      CHARACTER*1        TRANS
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), X( * ), Y( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DGEMV  performs one of the matrix-vector operations
+*
+*     y := alpha*A*x + beta*y,   or   y := alpha*A'*x + beta*y,
+*
+*  where alpha and beta are scalars, x and y are vectors and A is an
+*  m by n matrix.
+*
+*  Parameters
+*  ==========
+*
+*  TRANS  - CHARACTER*1.
+*           On entry, TRANS specifies the operation to be performed as
+*           follows:
+*
+*              TRANS = 'N' or 'n'   y := alpha*A*x + beta*y.
+*
+*              TRANS = 'T' or 't'   y := alpha*A'*x + beta*y.
+*
+*              TRANS = 'C' or 'c'   y := alpha*A'*x + beta*y.
+*
+*           Unchanged on exit.
+*
+*  M      - INTEGER.
+*           On entry, M specifies the number of rows of the matrix A.
+*           M must be at least zero.
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the number of columns of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - DOUBLE PRECISION.
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  A      - DOUBLE PRECISION array of DIMENSION ( LDA, n ).
+*           Before entry, the leading m by n part of the array A must
+*           contain the matrix of coefficients.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           max( 1, m ).
+*           Unchanged on exit.
+*
+*  X      - DOUBLE PRECISION array of DIMENSION at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'
+*           and at least
+*           ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.
+*           Before entry, the incremented array X must contain the
+*           vector x.
+*           Unchanged on exit.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*  BETA   - DOUBLE PRECISION.
+*           On entry, BETA specifies the scalar beta. When BETA is
+*           supplied as zero then Y need not be set on input.
+*           Unchanged on exit.
+*
+*  Y      - DOUBLE PRECISION array of DIMENSION at least
+*           ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'
+*           and at least
+*           ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.
+*           Before entry with BETA non-zero, the incremented array Y
+*           must contain the vector y. On exit, Y is overwritten by the
+*           updated vector y.
+*
+*  INCY   - INTEGER.
+*           On entry, INCY specifies the increment for the elements of
+*           Y. INCY must not be zero.
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE         , ZERO
+      PARAMETER        ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     .. Local Scalars ..
+      DOUBLE PRECISION   TEMP
+      INTEGER            I, INFO, IX, IY, J, JX, JY, KX, KY, LENX, LENY
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( TRANS, 'N' ).AND.
+     $         .NOT.LSAME( TRANS, 'T' ).AND.
+     $         .NOT.LSAME( TRANS, 'C' )      )THEN
+         INFO = 1
+      ELSE IF( M.LT.0 )THEN
+         INFO = 2
+      ELSE IF( N.LT.0 )THEN
+         INFO = 3
+      ELSE IF( LDA.LT.MAX( 1, M ) )THEN
+         INFO = 6
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 8
+      ELSE IF( INCY.EQ.0 )THEN
+         INFO = 11
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'DGEMV ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR.
+     $    ( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) )
+     $   RETURN
+*
+*     Set  LENX  and  LENY, the lengths of the vectors x and y, and set
+*     up the start points in  X  and  Y.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+         LENX = N
+         LENY = M
+      ELSE
+         LENX = M
+         LENY = N
+      END IF
+      IF( INCX.GT.0 )THEN
+         KX = 1
+      ELSE
+         KX = 1 - ( LENX - 1 )*INCX
+      END IF
+      IF( INCY.GT.0 )THEN
+         KY = 1
+      ELSE
+         KY = 1 - ( LENY - 1 )*INCY
+      END IF
+*
+*     Start the operations. In this version the elements of A are
+*     accessed sequentially with one pass through A.
+*
+*     First form  y := beta*y.
+*
+      IF( BETA.NE.ONE )THEN
+         IF( INCY.EQ.1 )THEN
+            IF( BETA.EQ.ZERO )THEN
+               DO 10, I = 1, LENY
+                  Y( I ) = ZERO
+   10          CONTINUE
+            ELSE
+               DO 20, I = 1, LENY
+                  Y( I ) = BETA*Y( I )
+   20          CONTINUE
+            END IF
+         ELSE
+            IY = KY
+            IF( BETA.EQ.ZERO )THEN
+               DO 30, I = 1, LENY
+                  Y( IY ) = ZERO
+                  IY      = IY   + INCY
+   30          CONTINUE
+            ELSE
+               DO 40, I = 1, LENY
+                  Y( IY ) = BETA*Y( IY )
+                  IY      = IY           + INCY
+   40          CONTINUE
+            END IF
+         END IF
+      END IF
+      IF( ALPHA.EQ.ZERO )
+     $   RETURN
+      IF( LSAME( TRANS, 'N' ) )THEN
+*
+*        Form  y := alpha*A*x + y.
+*
+         JX = KX
+         IF( INCY.EQ.1 )THEN
+            DO 60, J = 1, N
+               IF( X( JX ).NE.ZERO )THEN
+                  TEMP = ALPHA*X( JX )
+                  DO 50, I = 1, M
+                     Y( I ) = Y( I ) + TEMP*A( I, J )
+   50             CONTINUE
+               END IF
+               JX = JX + INCX
+   60       CONTINUE
+         ELSE
+            DO 80, J = 1, N
+               IF( X( JX ).NE.ZERO )THEN
+                  TEMP = ALPHA*X( JX )
+                  IY   = KY
+                  DO 70, I = 1, M
+                     Y( IY ) = Y( IY ) + TEMP*A( I, J )
+                     IY      = IY      + INCY
+   70             CONTINUE
+               END IF
+               JX = JX + INCX
+   80       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  y := alpha*A'*x + y.
+*
+         JY = KY
+         IF( INCX.EQ.1 )THEN
+            DO 100, J = 1, N
+               TEMP = ZERO
+               DO 90, I = 1, M
+                  TEMP = TEMP + A( I, J )*X( I )
+   90          CONTINUE
+               Y( JY ) = Y( JY ) + ALPHA*TEMP
+               JY      = JY      + INCY
+  100       CONTINUE
+         ELSE
+            DO 120, J = 1, N
+               TEMP = ZERO
+               IX   = KX
+               DO 110, I = 1, M
+                  TEMP = TEMP + A( I, J )*X( IX )
+                  IX   = IX   + INCX
+  110          CONTINUE
+               Y( JY ) = Y( JY ) + ALPHA*TEMP
+               JY      = JY      + INCY
+  120       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of DGEMV .
+*
+      END
diff --git a/costa/native/external/blas/dger.f b/costa/native/external/blas/dger.f
new file mode 100644
index 000000000..d316000ab
--- /dev/null
+++ b/costa/native/external/blas/dger.f
@@ -0,0 +1,157 @@
+      SUBROUTINE DGER  ( M, N, ALPHA, X, INCX, Y, INCY, A, LDA )
+*     .. Scalar Arguments ..
+      DOUBLE PRECISION   ALPHA
+      INTEGER            INCX, INCY, LDA, M, N
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), X( * ), Y( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DGER   performs the rank 1 operation
+*
+*     A := alpha*x*y' + A,
+*
+*  where alpha is a scalar, x is an m element vector, y is an n element
+*  vector and A is an m by n matrix.
+*
+*  Parameters
+*  ==========
+*
+*  M      - INTEGER.
+*           On entry, M specifies the number of rows of the matrix A.
+*           M must be at least zero.
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the number of columns of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - DOUBLE PRECISION.
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  X      - DOUBLE PRECISION array of dimension at least
+*           ( 1 + ( m - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the m
+*           element vector x.
+*           Unchanged on exit.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*  Y      - DOUBLE PRECISION array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCY ) ).
+*           Before entry, the incremented array Y must contain the n
+*           element vector y.
+*           Unchanged on exit.
+*
+*  INCY   - INTEGER.
+*           On entry, INCY specifies the increment for the elements of
+*           Y. INCY must not be zero.
+*           Unchanged on exit.
+*
+*  A      - DOUBLE PRECISION array of DIMENSION ( LDA, n ).
+*           Before entry, the leading m by n part of the array A must
+*           contain the matrix of coefficients. On exit, A is
+*           overwritten by the updated matrix.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           max( 1, m ).
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO
+      PARAMETER        ( ZERO = 0.0D+0 )
+*     .. Local Scalars ..
+      DOUBLE PRECISION   TEMP
+      INTEGER            I, INFO, IX, J, JY, KX
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( M.LT.0 )THEN
+         INFO = 1
+      ELSE IF( N.LT.0 )THEN
+         INFO = 2
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 5
+      ELSE IF( INCY.EQ.0 )THEN
+         INFO = 7
+      ELSE IF( LDA.LT.MAX( 1, M ) )THEN
+         INFO = 9
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'DGER  ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR.( ALPHA.EQ.ZERO ) )
+     $   RETURN
+*
+*     Start the operations. In this version the elements of A are
+*     accessed sequentially with one pass through A.
+*
+      IF( INCY.GT.0 )THEN
+         JY = 1
+      ELSE
+         JY = 1 - ( N - 1 )*INCY
+      END IF
+      IF( INCX.EQ.1 )THEN
+         DO 20, J = 1, N
+            IF( Y( JY ).NE.ZERO )THEN
+               TEMP = ALPHA*Y( JY )
+               DO 10, I = 1, M
+                  A( I, J ) = A( I, J ) + X( I )*TEMP
+   10          CONTINUE
+            END IF
+            JY = JY + INCY
+   20    CONTINUE
+      ELSE
+         IF( INCX.GT.0 )THEN
+            KX = 1
+         ELSE
+            KX = 1 - ( M - 1 )*INCX
+         END IF
+         DO 40, J = 1, N
+            IF( Y( JY ).NE.ZERO )THEN
+               TEMP = ALPHA*Y( JY )
+               IX   = KX
+               DO 30, I = 1, M
+                  A( I, J ) = A( I, J ) + X( IX )*TEMP
+                  IX        = IX        + INCX
+   30          CONTINUE
+            END IF
+            JY = JY + INCY
+   40    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of DGER  .
+*
+      END
diff --git a/costa/native/external/blas/dnrm2.f b/costa/native/external/blas/dnrm2.f
new file mode 100644
index 000000000..119d0477e
--- /dev/null
+++ b/costa/native/external/blas/dnrm2.f
@@ -0,0 +1,60 @@
+      DOUBLE PRECISION FUNCTION DNRM2 ( N, X, INCX )
+*     .. Scalar Arguments ..
+      INTEGER                           INCX, N
+*     .. Array Arguments ..
+      DOUBLE PRECISION                  X( * )
+*     ..
+*
+*  DNRM2 returns the euclidean norm of a vector via the function
+*  name, so that
+*
+*     DNRM2 := sqrt( x'*x )
+*
+*
+*
+*  -- This version written on 25-October-1982.
+*     Modified on 14-October-1993 to inline the call to DLASSQ.
+*     Sven Hammarling, Nag Ltd.
+*
+*
+*     .. Parameters ..
+      DOUBLE PRECISION      ONE         , ZERO
+      PARAMETER           ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     .. Local Scalars ..
+      INTEGER               IX
+      DOUBLE PRECISION      ABSXI, NORM, SCALE, SSQ
+*     .. Intrinsic Functions ..
+      INTRINSIC             ABS, SQRT
+*     ..
+*     .. Executable Statements ..
+      IF( N.LT.1 .OR. INCX.LT.1 )THEN
+         NORM  = ZERO
+      ELSE IF( N.EQ.1 )THEN
+         NORM  = ABS( X( 1 ) )
+      ELSE
+         SCALE = ZERO
+         SSQ   = ONE
+*        The following loop is equivalent to this call to the LAPACK
+*        auxiliary routine:
+*        CALL DLASSQ( N, X, INCX, SCALE, SSQ )
+*
+         DO 10, IX = 1, 1 + ( N - 1 )*INCX, INCX
+            IF( X( IX ).NE.ZERO )THEN
+               ABSXI = ABS( X( IX ) )
+               IF( SCALE.LT.ABSXI )THEN
+                  SSQ   = ONE   + SSQ*( SCALE/ABSXI )**2
+                  SCALE = ABSXI
+               ELSE
+                  SSQ   = SSQ   +     ( ABSXI/SCALE )**2
+               END IF
+            END IF
+   10    CONTINUE
+         NORM  = SCALE * SQRT( SSQ )
+      END IF
+*
+      DNRM2 = NORM
+      RETURN
+*
+*     End of DNRM2.
+*
+      END
diff --git a/costa/native/external/blas/drot.f b/costa/native/external/blas/drot.f
new file mode 100644
index 000000000..b9ea3bd91
--- /dev/null
+++ b/costa/native/external/blas/drot.f
@@ -0,0 +1,37 @@
+      subroutine  drot (n,dx,incx,dy,incy,c,s)
+c
+c     applies a plane rotation.
+c     jack dongarra, linpack, 3/11/78.
+c     modified 12/3/93, array(1) declarations changed to array(*)
+c
+      double precision dx(*),dy(*),dtemp,c,s
+      integer i,incx,incy,ix,iy,n
+c
+      if(n.le.0)return
+      if(incx.eq.1.and.incy.eq.1)go to 20
+c
+c       code for unequal increments or equal increments not equal
+c         to 1
+c
+      ix = 1
+      iy = 1
+      if(incx.lt.0)ix = (-n+1)*incx + 1
+      if(incy.lt.0)iy = (-n+1)*incy + 1
+      do 10 i = 1,n
+        dtemp = c*dx(ix) + s*dy(iy)
+        dy(iy) = c*dy(iy) - s*dx(ix)
+        dx(ix) = dtemp
+        ix = ix + incx
+        iy = iy + incy
+   10 continue
+      return
+c
+c       code for both increments equal to 1
+c
+   20 do 30 i = 1,n
+        dtemp = c*dx(i) + s*dy(i)
+        dy(i) = c*dy(i) - s*dx(i)
+        dx(i) = dtemp
+   30 continue
+      return
+      end
diff --git a/costa/native/external/blas/drotg.f b/costa/native/external/blas/drotg.f
new file mode 100644
index 000000000..67838e2cb
--- /dev/null
+++ b/costa/native/external/blas/drotg.f
@@ -0,0 +1,27 @@
+      subroutine drotg(da,db,c,s)
+c
+c     construct givens plane rotation.
+c     jack dongarra, linpack, 3/11/78.
+c
+      double precision da,db,c,s,roe,scale,r,z
+c
+      roe = db
+      if( dabs(da) .gt. dabs(db) ) roe = da
+      scale = dabs(da) + dabs(db)
+      if( scale .ne. 0.0d0 ) go to 10
+         c = 1.0d0
+         s = 0.0d0
+         r = 0.0d0
+         z = 0.0d0
+         go to 20
+   10 r = scale*dsqrt((da/scale)**2 + (db/scale)**2)
+      r = dsign(1.0d0,roe)*r
+      c = da/r
+      s = db/r
+      z = 1.0d0
+      if( dabs(da) .gt. dabs(db) ) z = s
+      if( dabs(db) .ge. dabs(da) .and. c .ne. 0.0d0 ) z = 1.0d0/c
+   20 da = r
+      db = z
+      return
+      end
diff --git a/costa/native/external/blas/dsbmv.f b/costa/native/external/blas/dsbmv.f
new file mode 100644
index 000000000..272042af6
--- /dev/null
+++ b/costa/native/external/blas/dsbmv.f
@@ -0,0 +1,303 @@
+      SUBROUTINE DSBMV ( UPLO, N, K, ALPHA, A, LDA, X, INCX,
+     $                   BETA, Y, INCY )
+*     .. Scalar Arguments ..
+      DOUBLE PRECISION   ALPHA, BETA
+      INTEGER            INCX, INCY, K, LDA, N
+      CHARACTER*1        UPLO
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), X( * ), Y( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DSBMV  performs the matrix-vector  operation
+*
+*     y := alpha*A*x + beta*y,
+*
+*  where alpha and beta are scalars, x and y are n element vectors and
+*  A is an n by n symmetric band matrix, with k super-diagonals.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the upper or lower
+*           triangular part of the band matrix A is being supplied as
+*           follows:
+*
+*              UPLO = 'U' or 'u'   The upper triangular part of A is
+*                                  being supplied.
+*
+*              UPLO = 'L' or 'l'   The lower triangular part of A is
+*                                  being supplied.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  K      - INTEGER.
+*           On entry, K specifies the number of super-diagonals of the
+*           matrix A. K must satisfy  0 .le. K.
+*           Unchanged on exit.
+*
+*  ALPHA  - DOUBLE PRECISION.
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  A      - DOUBLE PRECISION array of DIMENSION ( LDA, n ).
+*           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 )
+*           by n part of the array A must contain the upper triangular
+*           band part of the symmetric matrix, supplied column by
+*           column, with the leading diagonal of the matrix in row
+*           ( k + 1 ) of the array, the first super-diagonal starting at
+*           position 2 in row k, and so on. The top left k by k triangle
+*           of the array A is not referenced.
+*           The following program segment will transfer the upper
+*           triangular part of a symmetric band matrix from conventional
+*           full matrix storage to band storage:
+*
+*                 DO 20, J = 1, N
+*                    M = K + 1 - J
+*                    DO 10, I = MAX( 1, J - K ), J
+*                       A( M + I, J ) = matrix( I, J )
+*              10    CONTINUE
+*              20 CONTINUE
+*
+*           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 )
+*           by n part of the array A must contain the lower triangular
+*           band part of the symmetric matrix, supplied column by
+*           column, with the leading diagonal of the matrix in row 1 of
+*           the array, the first sub-diagonal starting at position 1 in
+*           row 2, and so on. The bottom right k by k triangle of the
+*           array A is not referenced.
+*           The following program segment will transfer the lower
+*           triangular part of a symmetric band matrix from conventional
+*           full matrix storage to band storage:
+*
+*                 DO 20, J = 1, N
+*                    M = 1 - J
+*                    DO 10, I = J, MIN( N, J + K )
+*                       A( M + I, J ) = matrix( I, J )
+*              10    CONTINUE
+*              20 CONTINUE
+*
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           ( k + 1 ).
+*           Unchanged on exit.
+*
+*  X      - DOUBLE PRECISION array of DIMENSION at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the
+*           vector x.
+*           Unchanged on exit.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*  BETA   - DOUBLE PRECISION.
+*           On entry, BETA specifies the scalar beta.
+*           Unchanged on exit.
+*
+*  Y      - DOUBLE PRECISION array of DIMENSION at least
+*           ( 1 + ( n - 1 )*abs( INCY ) ).
+*           Before entry, the incremented array Y must contain the
+*           vector y. On exit, Y is overwritten by the updated vector y.
+*
+*  INCY   - INTEGER.
+*           On entry, INCY specifies the increment for the elements of
+*           Y. INCY must not be zero.
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE         , ZERO
+      PARAMETER        ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     .. Local Scalars ..
+      DOUBLE PRECISION   TEMP1, TEMP2
+      INTEGER            I, INFO, IX, IY, J, JX, JY, KPLUS1, KX, KY, L
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO, 'U' ).AND.
+     $         .NOT.LSAME( UPLO, 'L' )      )THEN
+         INFO = 1
+      ELSE IF( N.LT.0 )THEN
+         INFO = 2
+      ELSE IF( K.LT.0 )THEN
+         INFO = 3
+      ELSE IF( LDA.LT.( K + 1 ) )THEN
+         INFO = 6
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 8
+      ELSE IF( INCY.EQ.0 )THEN
+         INFO = 11
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'DSBMV ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( N.EQ.0 ).OR.( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) )
+     $   RETURN
+*
+*     Set up the start points in  X  and  Y.
+*
+      IF( INCX.GT.0 )THEN
+         KX = 1
+      ELSE
+         KX = 1 - ( N - 1 )*INCX
+      END IF
+      IF( INCY.GT.0 )THEN
+         KY = 1
+      ELSE
+         KY = 1 - ( N - 1 )*INCY
+      END IF
+*
+*     Start the operations. In this version the elements of the array A
+*     are accessed sequentially with one pass through A.
+*
+*     First form  y := beta*y.
+*
+      IF( BETA.NE.ONE )THEN
+         IF( INCY.EQ.1 )THEN
+            IF( BETA.EQ.ZERO )THEN
+               DO 10, I = 1, N
+                  Y( I ) = ZERO
+   10          CONTINUE
+            ELSE
+               DO 20, I = 1, N
+                  Y( I ) = BETA*Y( I )
+   20          CONTINUE
+            END IF
+         ELSE
+            IY = KY
+            IF( BETA.EQ.ZERO )THEN
+               DO 30, I = 1, N
+                  Y( IY ) = ZERO
+                  IY      = IY   + INCY
+   30          CONTINUE
+            ELSE
+               DO 40, I = 1, N
+                  Y( IY ) = BETA*Y( IY )
+                  IY      = IY           + INCY
+   40          CONTINUE
+            END IF
+         END IF
+      END IF
+      IF( ALPHA.EQ.ZERO )
+     $   RETURN
+      IF( LSAME( UPLO, 'U' ) )THEN
+*
+*        Form  y  when upper triangle of A is stored.
+*
+         KPLUS1 = K + 1
+         IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN
+            DO 60, J = 1, N
+               TEMP1 = ALPHA*X( J )
+               TEMP2 = ZERO
+               L     = KPLUS1 - J
+               DO 50, I = MAX( 1, J - K ), J - 1
+                  Y( I ) = Y( I ) + TEMP1*A( L + I, J )
+                  TEMP2  = TEMP2  + A( L + I, J )*X( I )
+   50          CONTINUE
+               Y( J ) = Y( J ) + TEMP1*A( KPLUS1, J ) + ALPHA*TEMP2
+   60       CONTINUE
+         ELSE
+            JX = KX
+            JY = KY
+            DO 80, J = 1, N
+               TEMP1 = ALPHA*X( JX )
+               TEMP2 = ZERO
+               IX    = KX
+               IY    = KY
+               L     = KPLUS1 - J
+               DO 70, I = MAX( 1, J - K ), J - 1
+                  Y( IY ) = Y( IY ) + TEMP1*A( L + I, J )
+                  TEMP2   = TEMP2   + A( L + I, J )*X( IX )
+                  IX      = IX      + INCX
+                  IY      = IY      + INCY
+   70          CONTINUE
+               Y( JY ) = Y( JY ) + TEMP1*A( KPLUS1, J ) + ALPHA*TEMP2
+               JX      = JX      + INCX
+               JY      = JY      + INCY
+               IF( J.GT.K )THEN
+                  KX = KX + INCX
+                  KY = KY + INCY
+               END IF
+   80       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  y  when lower triangle of A is stored.
+*
+         IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN
+            DO 100, J = 1, N
+               TEMP1  = ALPHA*X( J )
+               TEMP2  = ZERO
+               Y( J ) = Y( J )       + TEMP1*A( 1, J )
+               L      = 1            - J
+               DO 90, I = J + 1, MIN( N, J + K )
+                  Y( I ) = Y( I ) + TEMP1*A( L + I, J )
+                  TEMP2  = TEMP2  + A( L + I, J )*X( I )
+   90          CONTINUE
+               Y( J ) = Y( J ) + ALPHA*TEMP2
+  100       CONTINUE
+         ELSE
+            JX = KX
+            JY = KY
+            DO 120, J = 1, N
+               TEMP1   = ALPHA*X( JX )
+               TEMP2   = ZERO
+               Y( JY ) = Y( JY )       + TEMP1*A( 1, J )
+               L       = 1             - J
+               IX      = JX
+               IY      = JY
+               DO 110, I = J + 1, MIN( N, J + K )
+                  IX      = IX      + INCX
+                  IY      = IY      + INCY
+                  Y( IY ) = Y( IY ) + TEMP1*A( L + I, J )
+                  TEMP2   = TEMP2   + A( L + I, J )*X( IX )
+  110          CONTINUE
+               Y( JY ) = Y( JY ) + ALPHA*TEMP2
+               JX      = JX      + INCX
+               JY      = JY      + INCY
+  120       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of DSBMV .
+*
+      END
diff --git a/costa/native/external/blas/dscal.f b/costa/native/external/blas/dscal.f
new file mode 100644
index 000000000..e1467faf2
--- /dev/null
+++ b/costa/native/external/blas/dscal.f
@@ -0,0 +1,43 @@
+      subroutine  dscal(n,da,dx,incx)
+c
+c     scales a vector by a constant.
+c     uses unrolled loops for increment equal to one.
+c     jack dongarra, linpack, 3/11/78.
+c     modified 3/93 to return if incx .le. 0.
+c     modified 12/3/93, array(1) declarations changed to array(*)
+c
+      double precision da,dx(*)
+      integer i,incx,m,mp1,n,nincx
+c
+      if( n.le.0 .or. incx.le.0 )return
+      if(incx.eq.1)go to 20
+c
+c        code for increment not equal to 1
+c
+      nincx = n*incx
+      do 10 i = 1,nincx,incx
+        dx(i) = da*dx(i)
+   10 continue
+      return
+c
+c        code for increment equal to 1
+c
+c
+c        clean-up loop
+c
+   20 m = mod(n,5)
+      if( m .eq. 0 ) go to 40
+      do 30 i = 1,m
+        dx(i) = da*dx(i)
+   30 continue
+      if( n .lt. 5 ) return
+   40 mp1 = m + 1
+      do 50 i = mp1,n,5
+        dx(i) = da*dx(i)
+        dx(i + 1) = da*dx(i + 1)
+        dx(i + 2) = da*dx(i + 2)
+        dx(i + 3) = da*dx(i + 3)
+        dx(i + 4) = da*dx(i + 4)
+   50 continue
+      return
+      end
diff --git a/costa/native/external/blas/dspmv.f b/costa/native/external/blas/dspmv.f
new file mode 100644
index 000000000..3ace7bf26
--- /dev/null
+++ b/costa/native/external/blas/dspmv.f
@@ -0,0 +1,262 @@
+      SUBROUTINE DSPMV ( UPLO, N, ALPHA, AP, X, INCX, BETA, Y, INCY )
+*     .. Scalar Arguments ..
+      DOUBLE PRECISION   ALPHA, BETA
+      INTEGER            INCX, INCY, N
+      CHARACTER*1        UPLO
+*     .. Array Arguments ..
+      DOUBLE PRECISION   AP( * ), X( * ), Y( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DSPMV  performs the matrix-vector operation
+*
+*     y := alpha*A*x + beta*y,
+*
+*  where alpha and beta are scalars, x and y are n element vectors and
+*  A is an n by n symmetric matrix, supplied in packed form.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the upper or lower
+*           triangular part of the matrix A is supplied in the packed
+*           array AP as follows:
+*
+*              UPLO = 'U' or 'u'   The upper triangular part of A is
+*                                  supplied in AP.
+*
+*              UPLO = 'L' or 'l'   The lower triangular part of A is
+*                                  supplied in AP.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - DOUBLE PRECISION.
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  AP     - DOUBLE PRECISION array of DIMENSION at least
+*           ( ( n*( n + 1 ) )/2 ).
+*           Before entry with UPLO = 'U' or 'u', the array AP must
+*           contain the upper triangular part of the symmetric matrix
+*           packed sequentially, column by column, so that AP( 1 )
+*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 )
+*           and a( 2, 2 ) respectively, and so on.
+*           Before entry with UPLO = 'L' or 'l', the array AP must
+*           contain the lower triangular part of the symmetric matrix
+*           packed sequentially, column by column, so that AP( 1 )
+*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 )
+*           and a( 3, 1 ) respectively, and so on.
+*           Unchanged on exit.
+*
+*  X      - DOUBLE PRECISION array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element vector x.
+*           Unchanged on exit.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*  BETA   - DOUBLE PRECISION.
+*           On entry, BETA specifies the scalar beta. When BETA is
+*           supplied as zero then Y need not be set on input.
+*           Unchanged on exit.
+*
+*  Y      - DOUBLE PRECISION array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCY ) ).
+*           Before entry, the incremented array Y must contain the n
+*           element vector y. On exit, Y is overwritten by the updated
+*           vector y.
+*
+*  INCY   - INTEGER.
+*           On entry, INCY specifies the increment for the elements of
+*           Y. INCY must not be zero.
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE         , ZERO
+      PARAMETER        ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     .. Local Scalars ..
+      DOUBLE PRECISION   TEMP1, TEMP2
+      INTEGER            I, INFO, IX, IY, J, JX, JY, K, KK, KX, KY
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO, 'U' ).AND.
+     $         .NOT.LSAME( UPLO, 'L' )      )THEN
+         INFO = 1
+      ELSE IF( N.LT.0 )THEN
+         INFO = 2
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 6
+      ELSE IF( INCY.EQ.0 )THEN
+         INFO = 9
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'DSPMV ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( N.EQ.0 ).OR.( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) )
+     $   RETURN
+*
+*     Set up the start points in  X  and  Y.
+*
+      IF( INCX.GT.0 )THEN
+         KX = 1
+      ELSE
+         KX = 1 - ( N - 1 )*INCX
+      END IF
+      IF( INCY.GT.0 )THEN
+         KY = 1
+      ELSE
+         KY = 1 - ( N - 1 )*INCY
+      END IF
+*
+*     Start the operations. In this version the elements of the array AP
+*     are accessed sequentially with one pass through AP.
+*
+*     First form  y := beta*y.
+*
+      IF( BETA.NE.ONE )THEN
+         IF( INCY.EQ.1 )THEN
+            IF( BETA.EQ.ZERO )THEN
+               DO 10, I = 1, N
+                  Y( I ) = ZERO
+   10          CONTINUE
+            ELSE
+               DO 20, I = 1, N
+                  Y( I ) = BETA*Y( I )
+   20          CONTINUE
+            END IF
+         ELSE
+            IY = KY
+            IF( BETA.EQ.ZERO )THEN
+               DO 30, I = 1, N
+                  Y( IY ) = ZERO
+                  IY      = IY   + INCY
+   30          CONTINUE
+            ELSE
+               DO 40, I = 1, N
+                  Y( IY ) = BETA*Y( IY )
+                  IY      = IY           + INCY
+   40          CONTINUE
+            END IF
+         END IF
+      END IF
+      IF( ALPHA.EQ.ZERO )
+     $   RETURN
+      KK = 1
+      IF( LSAME( UPLO, 'U' ) )THEN
+*
+*        Form  y  when AP contains the upper triangle.
+*
+         IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN
+            DO 60, J = 1, N
+               TEMP1 = ALPHA*X( J )
+               TEMP2 = ZERO
+               K     = KK
+               DO 50, I = 1, J - 1
+                  Y( I ) = Y( I ) + TEMP1*AP( K )
+                  TEMP2  = TEMP2  + AP( K )*X( I )
+                  K      = K      + 1
+   50          CONTINUE
+               Y( J ) = Y( J ) + TEMP1*AP( KK + J - 1 ) + ALPHA*TEMP2
+               KK     = KK     + J
+   60       CONTINUE
+         ELSE
+            JX = KX
+            JY = KY
+            DO 80, J = 1, N
+               TEMP1 = ALPHA*X( JX )
+               TEMP2 = ZERO
+               IX    = KX
+               IY    = KY
+               DO 70, K = KK, KK + J - 2
+                  Y( IY ) = Y( IY ) + TEMP1*AP( K )
+                  TEMP2   = TEMP2   + AP( K )*X( IX )
+                  IX      = IX      + INCX
+                  IY      = IY      + INCY
+   70          CONTINUE
+               Y( JY ) = Y( JY ) + TEMP1*AP( KK + J - 1 ) + ALPHA*TEMP2
+               JX      = JX      + INCX
+               JY      = JY      + INCY
+               KK      = KK      + J
+   80       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  y  when AP contains the lower triangle.
+*
+         IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN
+            DO 100, J = 1, N
+               TEMP1  = ALPHA*X( J )
+               TEMP2  = ZERO
+               Y( J ) = Y( J )       + TEMP1*AP( KK )
+               K      = KK           + 1
+               DO 90, I = J + 1, N
+                  Y( I ) = Y( I ) + TEMP1*AP( K )
+                  TEMP2  = TEMP2  + AP( K )*X( I )
+                  K      = K      + 1
+   90          CONTINUE
+               Y( J ) = Y( J ) + ALPHA*TEMP2
+               KK     = KK     + ( N - J + 1 )
+  100       CONTINUE
+         ELSE
+            JX = KX
+            JY = KY
+            DO 120, J = 1, N
+               TEMP1   = ALPHA*X( JX )
+               TEMP2   = ZERO
+               Y( JY ) = Y( JY )       + TEMP1*AP( KK )
+               IX      = JX
+               IY      = JY
+               DO 110, K = KK + 1, KK + N - J
+                  IX      = IX      + INCX
+                  IY      = IY      + INCY
+                  Y( IY ) = Y( IY ) + TEMP1*AP( K )
+                  TEMP2   = TEMP2   + AP( K )*X( IX )
+  110          CONTINUE
+               Y( JY ) = Y( JY ) + ALPHA*TEMP2
+               JX      = JX      + INCX
+               JY      = JY      + INCY
+               KK      = KK      + ( N - J + 1 )
+  120       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of DSPMV .
+*
+      END
diff --git a/costa/native/external/blas/dspr.f b/costa/native/external/blas/dspr.f
new file mode 100644
index 000000000..3da6889c9
--- /dev/null
+++ b/costa/native/external/blas/dspr.f
@@ -0,0 +1,198 @@
+      SUBROUTINE DSPR  ( UPLO, N, ALPHA, X, INCX, AP )
+*     .. Scalar Arguments ..
+      DOUBLE PRECISION   ALPHA
+      INTEGER            INCX, N
+      CHARACTER*1        UPLO
+*     .. Array Arguments ..
+      DOUBLE PRECISION   AP( * ), X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DSPR    performs the symmetric rank 1 operation
+*
+*     A := alpha*x*x' + A,
+*
+*  where alpha is a real scalar, x is an n element vector and A is an
+*  n by n symmetric matrix, supplied in packed form.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the upper or lower
+*           triangular part of the matrix A is supplied in the packed
+*           array AP as follows:
+*
+*              UPLO = 'U' or 'u'   The upper triangular part of A is
+*                                  supplied in AP.
+*
+*              UPLO = 'L' or 'l'   The lower triangular part of A is
+*                                  supplied in AP.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - DOUBLE PRECISION.
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  X      - DOUBLE PRECISION array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element vector x.
+*           Unchanged on exit.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*  AP     - DOUBLE PRECISION array of DIMENSION at least
+*           ( ( n*( n + 1 ) )/2 ).
+*           Before entry with  UPLO = 'U' or 'u', the array AP must
+*           contain the upper triangular part of the symmetric matrix
+*           packed sequentially, column by column, so that AP( 1 )
+*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 )
+*           and a( 2, 2 ) respectively, and so on. On exit, the array
+*           AP is overwritten by the upper triangular part of the
+*           updated matrix.
+*           Before entry with UPLO = 'L' or 'l', the array AP must
+*           contain the lower triangular part of the symmetric matrix
+*           packed sequentially, column by column, so that AP( 1 )
+*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 )
+*           and a( 3, 1 ) respectively, and so on. On exit, the array
+*           AP is overwritten by the lower triangular part of the
+*           updated matrix.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO
+      PARAMETER        ( ZERO = 0.0D+0 )
+*     .. Local Scalars ..
+      DOUBLE PRECISION   TEMP
+      INTEGER            I, INFO, IX, J, JX, K, KK, KX
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO, 'U' ).AND.
+     $         .NOT.LSAME( UPLO, 'L' )      )THEN
+         INFO = 1
+      ELSE IF( N.LT.0 )THEN
+         INFO = 2
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 5
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'DSPR  ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( N.EQ.0 ).OR.( ALPHA.EQ.ZERO ) )
+     $   RETURN
+*
+*     Set the start point in X if the increment is not unity.
+*
+      IF( INCX.LE.0 )THEN
+         KX = 1 - ( N - 1 )*INCX
+      ELSE IF( INCX.NE.1 )THEN
+         KX = 1
+      END IF
+*
+*     Start the operations. In this version the elements of the array AP
+*     are accessed sequentially with one pass through AP.
+*
+      KK = 1
+      IF( LSAME( UPLO, 'U' ) )THEN
+*
+*        Form  A  when upper triangle is stored in AP.
+*
+         IF( INCX.EQ.1 )THEN
+            DO 20, J = 1, N
+               IF( X( J ).NE.ZERO )THEN
+                  TEMP = ALPHA*X( J )
+                  K    = KK
+                  DO 10, I = 1, J
+                     AP( K ) = AP( K ) + X( I )*TEMP
+                     K       = K       + 1
+   10             CONTINUE
+               END IF
+               KK = KK + J
+   20       CONTINUE
+         ELSE
+            JX = KX
+            DO 40, J = 1, N
+               IF( X( JX ).NE.ZERO )THEN
+                  TEMP = ALPHA*X( JX )
+                  IX   = KX
+                  DO 30, K = KK, KK + J - 1
+                     AP( K ) = AP( K ) + X( IX )*TEMP
+                     IX      = IX      + INCX
+   30             CONTINUE
+               END IF
+               JX = JX + INCX
+               KK = KK + J
+   40       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  A  when lower triangle is stored in AP.
+*
+         IF( INCX.EQ.1 )THEN
+            DO 60, J = 1, N
+               IF( X( J ).NE.ZERO )THEN
+                  TEMP = ALPHA*X( J )
+                  K    = KK
+                  DO 50, I = J, N
+                     AP( K ) = AP( K ) + X( I )*TEMP
+                     K       = K       + 1
+   50             CONTINUE
+               END IF
+               KK = KK + N - J + 1
+   60       CONTINUE
+         ELSE
+            JX = KX
+            DO 80, J = 1, N
+               IF( X( JX ).NE.ZERO )THEN
+                  TEMP = ALPHA*X( JX )
+                  IX   = JX
+                  DO 70, K = KK, KK + N - J
+                     AP( K ) = AP( K ) + X( IX )*TEMP
+                     IX      = IX      + INCX
+   70             CONTINUE
+               END IF
+               JX = JX + INCX
+               KK = KK + N - J + 1
+   80       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of DSPR  .
+*
+      END
diff --git a/costa/native/external/blas/dspr2.f b/costa/native/external/blas/dspr2.f
new file mode 100644
index 000000000..1cfce21b0
--- /dev/null
+++ b/costa/native/external/blas/dspr2.f
@@ -0,0 +1,229 @@
+      SUBROUTINE DSPR2 ( UPLO, N, ALPHA, X, INCX, Y, INCY, AP )
+*     .. Scalar Arguments ..
+      DOUBLE PRECISION   ALPHA
+      INTEGER            INCX, INCY, N
+      CHARACTER*1        UPLO
+*     .. Array Arguments ..
+      DOUBLE PRECISION   AP( * ), X( * ), Y( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DSPR2  performs the symmetric rank 2 operation
+*
+*     A := alpha*x*y' + alpha*y*x' + A,
+*
+*  where alpha is a scalar, x and y are n element vectors and A is an
+*  n by n symmetric matrix, supplied in packed form.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the upper or lower
+*           triangular part of the matrix A is supplied in the packed
+*           array AP as follows:
+*
+*              UPLO = 'U' or 'u'   The upper triangular part of A is
+*                                  supplied in AP.
+*
+*              UPLO = 'L' or 'l'   The lower triangular part of A is
+*                                  supplied in AP.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - DOUBLE PRECISION.
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  X      - DOUBLE PRECISION array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element vector x.
+*           Unchanged on exit.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*  Y      - DOUBLE PRECISION array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCY ) ).
+*           Before entry, the incremented array Y must contain the n
+*           element vector y.
+*           Unchanged on exit.
+*
+*  INCY   - INTEGER.
+*           On entry, INCY specifies the increment for the elements of
+*           Y. INCY must not be zero.
+*           Unchanged on exit.
+*
+*  AP     - DOUBLE PRECISION array of DIMENSION at least
+*           ( ( n*( n + 1 ) )/2 ).
+*           Before entry with  UPLO = 'U' or 'u', the array AP must
+*           contain the upper triangular part of the symmetric matrix
+*           packed sequentially, column by column, so that AP( 1 )
+*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 )
+*           and a( 2, 2 ) respectively, and so on. On exit, the array
+*           AP is overwritten by the upper triangular part of the
+*           updated matrix.
+*           Before entry with UPLO = 'L' or 'l', the array AP must
+*           contain the lower triangular part of the symmetric matrix
+*           packed sequentially, column by column, so that AP( 1 )
+*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 )
+*           and a( 3, 1 ) respectively, and so on. On exit, the array
+*           AP is overwritten by the lower triangular part of the
+*           updated matrix.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO
+      PARAMETER        ( ZERO = 0.0D+0 )
+*     .. Local Scalars ..
+      DOUBLE PRECISION   TEMP1, TEMP2
+      INTEGER            I, INFO, IX, IY, J, JX, JY, K, KK, KX, KY
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO, 'U' ).AND.
+     $         .NOT.LSAME( UPLO, 'L' )      )THEN
+         INFO = 1
+      ELSE IF( N.LT.0 )THEN
+         INFO = 2
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 5
+      ELSE IF( INCY.EQ.0 )THEN
+         INFO = 7
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'DSPR2 ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( N.EQ.0 ).OR.( ALPHA.EQ.ZERO ) )
+     $   RETURN
+*
+*     Set up the start points in X and Y if the increments are not both
+*     unity.
+*
+      IF( ( INCX.NE.1 ).OR.( INCY.NE.1 ) )THEN
+         IF( INCX.GT.0 )THEN
+            KX = 1
+         ELSE
+            KX = 1 - ( N - 1 )*INCX
+         END IF
+         IF( INCY.GT.0 )THEN
+            KY = 1
+         ELSE
+            KY = 1 - ( N - 1 )*INCY
+         END IF
+         JX = KX
+         JY = KY
+      END IF
+*
+*     Start the operations. In this version the elements of the array AP
+*     are accessed sequentially with one pass through AP.
+*
+      KK = 1
+      IF( LSAME( UPLO, 'U' ) )THEN
+*
+*        Form  A  when upper triangle is stored in AP.
+*
+         IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN
+            DO 20, J = 1, N
+               IF( ( X( J ).NE.ZERO ).OR.( Y( J ).NE.ZERO ) )THEN
+                  TEMP1 = ALPHA*Y( J )
+                  TEMP2 = ALPHA*X( J )
+                  K     = KK
+                  DO 10, I = 1, J
+                     AP( K ) = AP( K ) + X( I )*TEMP1 + Y( I )*TEMP2
+                     K       = K       + 1
+   10             CONTINUE
+               END IF
+               KK = KK + J
+   20       CONTINUE
+         ELSE
+            DO 40, J = 1, N
+               IF( ( X( JX ).NE.ZERO ).OR.( Y( JY ).NE.ZERO ) )THEN
+                  TEMP1 = ALPHA*Y( JY )
+                  TEMP2 = ALPHA*X( JX )
+                  IX    = KX
+                  IY    = KY
+                  DO 30, K = KK, KK + J - 1
+                     AP( K ) = AP( K ) + X( IX )*TEMP1 + Y( IY )*TEMP2
+                     IX      = IX      + INCX
+                     IY      = IY      + INCY
+   30             CONTINUE
+               END IF
+               JX = JX + INCX
+               JY = JY + INCY
+               KK = KK + J
+   40       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  A  when lower triangle is stored in AP.
+*
+         IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN
+            DO 60, J = 1, N
+               IF( ( X( J ).NE.ZERO ).OR.( Y( J ).NE.ZERO ) )THEN
+                  TEMP1 = ALPHA*Y( J )
+                  TEMP2 = ALPHA*X( J )
+                  K     = KK
+                  DO 50, I = J, N
+                     AP( K ) = AP( K ) + X( I )*TEMP1 + Y( I )*TEMP2
+                     K       = K       + 1
+   50             CONTINUE
+               END IF
+               KK = KK + N - J + 1
+   60       CONTINUE
+         ELSE
+            DO 80, J = 1, N
+               IF( ( X( JX ).NE.ZERO ).OR.( Y( JY ).NE.ZERO ) )THEN
+                  TEMP1 = ALPHA*Y( JY )
+                  TEMP2 = ALPHA*X( JX )
+                  IX    = JX
+                  IY    = JY
+                  DO 70, K = KK, KK + N - J
+                     AP( K ) = AP( K ) + X( IX )*TEMP1 + Y( IY )*TEMP2
+                     IX      = IX      + INCX
+                     IY      = IY      + INCY
+   70             CONTINUE
+               END IF
+               JX = JX + INCX
+               JY = JY + INCY
+               KK = KK + N - J + 1
+   80       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of DSPR2 .
+*
+      END
diff --git a/costa/native/external/blas/dswap.f b/costa/native/external/blas/dswap.f
new file mode 100644
index 000000000..7f7d1fbba
--- /dev/null
+++ b/costa/native/external/blas/dswap.f
@@ -0,0 +1,56 @@
+      subroutine  dswap (n,dx,incx,dy,incy)
+c
+c     interchanges two vectors.
+c     uses unrolled loops for increments equal one.
+c     jack dongarra, linpack, 3/11/78.
+c     modified 12/3/93, array(1) declarations changed to array(*)
+c
+      double precision dx(*),dy(*),dtemp
+      integer i,incx,incy,ix,iy,m,mp1,n
+c
+      if(n.le.0)return
+      if(incx.eq.1.and.incy.eq.1)go to 20
+c
+c       code for unequal increments or equal increments not equal
+c         to 1
+c
+      ix = 1
+      iy = 1
+      if(incx.lt.0)ix = (-n+1)*incx + 1
+      if(incy.lt.0)iy = (-n+1)*incy + 1
+      do 10 i = 1,n
+        dtemp = dx(ix)
+        dx(ix) = dy(iy)
+        dy(iy) = dtemp
+        ix = ix + incx
+        iy = iy + incy
+   10 continue
+      return
+c
+c       code for both increments equal to 1
+c
+c
+c       clean-up loop
+c
+   20 m = mod(n,3)
+      if( m .eq. 0 ) go to 40
+      do 30 i = 1,m
+        dtemp = dx(i)
+        dx(i) = dy(i)
+        dy(i) = dtemp
+   30 continue
+      if( n .lt. 3 ) return
+   40 mp1 = m + 1
+      do 50 i = mp1,n,3
+        dtemp = dx(i)
+        dx(i) = dy(i)
+        dy(i) = dtemp
+        dtemp = dx(i + 1)
+        dx(i + 1) = dy(i + 1)
+        dy(i + 1) = dtemp
+        dtemp = dx(i + 2)
+        dx(i + 2) = dy(i + 2)
+        dy(i + 2) = dtemp
+   50 continue
+      return
+      end
diff --git a/costa/native/external/blas/dsymm.f b/costa/native/external/blas/dsymm.f
new file mode 100644
index 000000000..0f2514170
--- /dev/null
+++ b/costa/native/external/blas/dsymm.f
@@ -0,0 +1,294 @@
+      SUBROUTINE DSYMM ( SIDE, UPLO, M, N, ALPHA, A, LDA, B, LDB,
+     $                   BETA, C, LDC )
+*     .. Scalar Arguments ..
+      CHARACTER*1        SIDE, UPLO
+      INTEGER            M, N, LDA, LDB, LDC
+      DOUBLE PRECISION   ALPHA, BETA
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), B( LDB, * ), C( LDC, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DSYMM  performs one of the matrix-matrix operations
+*
+*     C := alpha*A*B + beta*C,
+*
+*  or
+*
+*     C := alpha*B*A + beta*C,
+*
+*  where alpha and beta are scalars,  A is a symmetric matrix and  B and
+*  C are  m by n matrices.
+*
+*  Parameters
+*  ==========
+*
+*  SIDE   - CHARACTER*1.
+*           On entry,  SIDE  specifies whether  the  symmetric matrix  A
+*           appears on the  left or right  in the  operation as follows:
+*
+*              SIDE = 'L' or 'l'   C := alpha*A*B + beta*C,
+*
+*              SIDE = 'R' or 'r'   C := alpha*B*A + beta*C,
+*
+*           Unchanged on exit.
+*
+*  UPLO   - CHARACTER*1.
+*           On  entry,   UPLO  specifies  whether  the  upper  or  lower
+*           triangular  part  of  the  symmetric  matrix   A  is  to  be
+*           referenced as follows:
+*
+*              UPLO = 'U' or 'u'   Only the upper triangular part of the
+*                                  symmetric matrix is to be referenced.
+*
+*              UPLO = 'L' or 'l'   Only the lower triangular part of the
+*                                  symmetric matrix is to be referenced.
+*
+*           Unchanged on exit.
+*
+*  M      - INTEGER.
+*           On entry,  M  specifies the number of rows of the matrix  C.
+*           M  must be at least zero.
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the number of columns of the matrix C.
+*           N  must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - DOUBLE PRECISION.
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  A      - DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is
+*           m  when  SIDE = 'L' or 'l'  and is  n otherwise.
+*           Before entry  with  SIDE = 'L' or 'l',  the  m by m  part of
+*           the array  A  must contain the  symmetric matrix,  such that
+*           when  UPLO = 'U' or 'u', the leading m by m upper triangular
+*           part of the array  A  must contain the upper triangular part
+*           of the  symmetric matrix and the  strictly  lower triangular
+*           part of  A  is not referenced,  and when  UPLO = 'L' or 'l',
+*           the leading  m by m  lower triangular part  of the  array  A
+*           must  contain  the  lower triangular part  of the  symmetric
+*           matrix and the  strictly upper triangular part of  A  is not
+*           referenced.
+*           Before entry  with  SIDE = 'R' or 'r',  the  n by n  part of
+*           the array  A  must contain the  symmetric matrix,  such that
+*           when  UPLO = 'U' or 'u', the leading n by n upper triangular
+*           part of the array  A  must contain the upper triangular part
+*           of the  symmetric matrix and the  strictly  lower triangular
+*           part of  A  is not referenced,  and when  UPLO = 'L' or 'l',
+*           the leading  n by n  lower triangular part  of the  array  A
+*           must  contain  the  lower triangular part  of the  symmetric
+*           matrix and the  strictly upper triangular part of  A  is not
+*           referenced.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program.  When  SIDE = 'L' or 'l'  then
+*           LDA must be at least  max( 1, m ), otherwise  LDA must be at
+*           least  max( 1, n ).
+*           Unchanged on exit.
+*
+*  B      - DOUBLE PRECISION array of DIMENSION ( LDB, n ).
+*           Before entry, the leading  m by n part of the array  B  must
+*           contain the matrix B.
+*           Unchanged on exit.
+*
+*  LDB    - INTEGER.
+*           On entry, LDB specifies the first dimension of B as declared
+*           in  the  calling  (sub)  program.   LDB  must  be  at  least
+*           max( 1, m ).
+*           Unchanged on exit.
+*
+*  BETA   - DOUBLE PRECISION.
+*           On entry,  BETA  specifies the scalar  beta.  When  BETA  is
+*           supplied as zero then C need not be set on input.
+*           Unchanged on exit.
+*
+*  C      - DOUBLE PRECISION array of DIMENSION ( LDC, n ).
+*           Before entry, the leading  m by n  part of the array  C must
+*           contain the matrix  C,  except when  beta  is zero, in which
+*           case C need not be set on entry.
+*           On exit, the array  C  is overwritten by the  m by n updated
+*           matrix.
+*
+*  LDC    - INTEGER.
+*           On entry, LDC specifies the first dimension of C as declared
+*           in  the  calling  (sub)  program.   LDC  must  be  at  least
+*           max( 1, m ).
+*           Unchanged on exit.
+*
+*
+*  Level 3 Blas routine.
+*
+*  -- Written on 8-February-1989.
+*     Jack Dongarra, Argonne National Laboratory.
+*     Iain Duff, AERE Harwell.
+*     Jeremy Du Croz, Numerical Algorithms Group Ltd.
+*     Sven Hammarling, Numerical Algorithms Group Ltd.
+*
+*
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            I, INFO, J, K, NROWA
+      DOUBLE PRECISION   TEMP1, TEMP2
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE         , ZERO
+      PARAMETER        ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Executable Statements ..
+*
+*     Set NROWA as the number of rows of A.
+*
+      IF( LSAME( SIDE, 'L' ) )THEN
+         NROWA = M
+      ELSE
+         NROWA = N
+      END IF
+      UPPER = LSAME( UPLO, 'U' )
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF(      ( .NOT.LSAME( SIDE, 'L' ) ).AND.
+     $         ( .NOT.LSAME( SIDE, 'R' ) )      )THEN
+         INFO = 1
+      ELSE IF( ( .NOT.UPPER              ).AND.
+     $         ( .NOT.LSAME( UPLO, 'L' ) )      )THEN
+         INFO = 2
+      ELSE IF( M  .LT.0               )THEN
+         INFO = 3
+      ELSE IF( N  .LT.0               )THEN
+         INFO = 4
+      ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN
+         INFO = 7
+      ELSE IF( LDB.LT.MAX( 1, M     ) )THEN
+         INFO = 9
+      ELSE IF( LDC.LT.MAX( 1, M     ) )THEN
+         INFO = 12
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'DSYMM ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR.
+     $    ( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) )
+     $   RETURN
+*
+*     And when  alpha.eq.zero.
+*
+      IF( ALPHA.EQ.ZERO )THEN
+         IF( BETA.EQ.ZERO )THEN
+            DO 20, J = 1, N
+               DO 10, I = 1, M
+                  C( I, J ) = ZERO
+   10          CONTINUE
+   20       CONTINUE
+         ELSE
+            DO 40, J = 1, N
+               DO 30, I = 1, M
+                  C( I, J ) = BETA*C( I, J )
+   30          CONTINUE
+   40       CONTINUE
+         END IF
+         RETURN
+      END IF
+*
+*     Start the operations.
+*
+      IF( LSAME( SIDE, 'L' ) )THEN
+*
+*        Form  C := alpha*A*B + beta*C.
+*
+         IF( UPPER )THEN
+            DO 70, J = 1, N
+               DO 60, I = 1, M
+                  TEMP1 = ALPHA*B( I, J )
+                  TEMP2 = ZERO
+                  DO 50, K = 1, I - 1
+                     C( K, J ) = C( K, J ) + TEMP1    *A( K, I )
+                     TEMP2     = TEMP2     + B( K, J )*A( K, I )
+   50             CONTINUE
+                  IF( BETA.EQ.ZERO )THEN
+                     C( I, J ) = TEMP1*A( I, I ) + ALPHA*TEMP2
+                  ELSE
+                     C( I, J ) = BETA *C( I, J ) +
+     $                           TEMP1*A( I, I ) + ALPHA*TEMP2
+                  END IF
+   60          CONTINUE
+   70       CONTINUE
+         ELSE
+            DO 100, J = 1, N
+               DO 90, I = M, 1, -1
+                  TEMP1 = ALPHA*B( I, J )
+                  TEMP2 = ZERO
+                  DO 80, K = I + 1, M
+                     C( K, J ) = C( K, J ) + TEMP1    *A( K, I )
+                     TEMP2     = TEMP2     + B( K, J )*A( K, I )
+   80             CONTINUE
+                  IF( BETA.EQ.ZERO )THEN
+                     C( I, J ) = TEMP1*A( I, I ) + ALPHA*TEMP2
+                  ELSE
+                     C( I, J ) = BETA *C( I, J ) +
+     $                           TEMP1*A( I, I ) + ALPHA*TEMP2
+                  END IF
+   90          CONTINUE
+  100       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  C := alpha*B*A + beta*C.
+*
+         DO 170, J = 1, N
+            TEMP1 = ALPHA*A( J, J )
+            IF( BETA.EQ.ZERO )THEN
+               DO 110, I = 1, M
+                  C( I, J ) = TEMP1*B( I, J )
+  110          CONTINUE
+            ELSE
+               DO 120, I = 1, M
+                  C( I, J ) = BETA*C( I, J ) + TEMP1*B( I, J )
+  120          CONTINUE
+            END IF
+            DO 140, K = 1, J - 1
+               IF( UPPER )THEN
+                  TEMP1 = ALPHA*A( K, J )
+               ELSE
+                  TEMP1 = ALPHA*A( J, K )
+               END IF
+               DO 130, I = 1, M
+                  C( I, J ) = C( I, J ) + TEMP1*B( I, K )
+  130          CONTINUE
+  140       CONTINUE
+            DO 160, K = J + 1, N
+               IF( UPPER )THEN
+                  TEMP1 = ALPHA*A( J, K )
+               ELSE
+                  TEMP1 = ALPHA*A( K, J )
+               END IF
+               DO 150, I = 1, M
+                  C( I, J ) = C( I, J ) + TEMP1*B( I, K )
+  150          CONTINUE
+  160       CONTINUE
+  170    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of DSYMM .
+*
+      END
diff --git a/costa/native/external/blas/dsymv.f b/costa/native/external/blas/dsymv.f
new file mode 100644
index 000000000..7592d156b
--- /dev/null
+++ b/costa/native/external/blas/dsymv.f
@@ -0,0 +1,262 @@
+      SUBROUTINE DSYMV ( UPLO, N, ALPHA, A, LDA, X, INCX,
+     $                   BETA, Y, INCY )
+*     .. Scalar Arguments ..
+      DOUBLE PRECISION   ALPHA, BETA
+      INTEGER            INCX, INCY, LDA, N
+      CHARACTER*1        UPLO
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), X( * ), Y( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DSYMV  performs the matrix-vector  operation
+*
+*     y := alpha*A*x + beta*y,
+*
+*  where alpha and beta are scalars, x and y are n element vectors and
+*  A is an n by n symmetric matrix.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the upper or lower
+*           triangular part of the array A is to be referenced as
+*           follows:
+*
+*              UPLO = 'U' or 'u'   Only the upper triangular part of A
+*                                  is to be referenced.
+*
+*              UPLO = 'L' or 'l'   Only the lower triangular part of A
+*                                  is to be referenced.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - DOUBLE PRECISION.
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  A      - DOUBLE PRECISION array of DIMENSION ( LDA, n ).
+*           Before entry with  UPLO = 'U' or 'u', the leading n by n
+*           upper triangular part of the array A must contain the upper
+*           triangular part of the symmetric matrix and the strictly
+*           lower triangular part of A is not referenced.
+*           Before entry with UPLO = 'L' or 'l', the leading n by n
+*           lower triangular part of the array A must contain the lower
+*           triangular part of the symmetric matrix and the strictly
+*           upper triangular part of A is not referenced.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           max( 1, n ).
+*           Unchanged on exit.
+*
+*  X      - DOUBLE PRECISION array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element vector x.
+*           Unchanged on exit.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*  BETA   - DOUBLE PRECISION.
+*           On entry, BETA specifies the scalar beta. When BETA is
+*           supplied as zero then Y need not be set on input.
+*           Unchanged on exit.
+*
+*  Y      - DOUBLE PRECISION array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCY ) ).
+*           Before entry, the incremented array Y must contain the n
+*           element vector y. On exit, Y is overwritten by the updated
+*           vector y.
+*
+*  INCY   - INTEGER.
+*           On entry, INCY specifies the increment for the elements of
+*           Y. INCY must not be zero.
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE         , ZERO
+      PARAMETER        ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     .. Local Scalars ..
+      DOUBLE PRECISION   TEMP1, TEMP2
+      INTEGER            I, INFO, IX, IY, J, JX, JY, KX, KY
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO, 'U' ).AND.
+     $         .NOT.LSAME( UPLO, 'L' )      )THEN
+         INFO = 1
+      ELSE IF( N.LT.0 )THEN
+         INFO = 2
+      ELSE IF( LDA.LT.MAX( 1, N ) )THEN
+         INFO = 5
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 7
+      ELSE IF( INCY.EQ.0 )THEN
+         INFO = 10
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'DSYMV ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( N.EQ.0 ).OR.( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) )
+     $   RETURN
+*
+*     Set up the start points in  X  and  Y.
+*
+      IF( INCX.GT.0 )THEN
+         KX = 1
+      ELSE
+         KX = 1 - ( N - 1 )*INCX
+      END IF
+      IF( INCY.GT.0 )THEN
+         KY = 1
+      ELSE
+         KY = 1 - ( N - 1 )*INCY
+      END IF
+*
+*     Start the operations. In this version the elements of A are
+*     accessed sequentially with one pass through the triangular part
+*     of A.
+*
+*     First form  y := beta*y.
+*
+      IF( BETA.NE.ONE )THEN
+         IF( INCY.EQ.1 )THEN
+            IF( BETA.EQ.ZERO )THEN
+               DO 10, I = 1, N
+                  Y( I ) = ZERO
+   10          CONTINUE
+            ELSE
+               DO 20, I = 1, N
+                  Y( I ) = BETA*Y( I )
+   20          CONTINUE
+            END IF
+         ELSE
+            IY = KY
+            IF( BETA.EQ.ZERO )THEN
+               DO 30, I = 1, N
+                  Y( IY ) = ZERO
+                  IY      = IY   + INCY
+   30          CONTINUE
+            ELSE
+               DO 40, I = 1, N
+                  Y( IY ) = BETA*Y( IY )
+                  IY      = IY           + INCY
+   40          CONTINUE
+            END IF
+         END IF
+      END IF
+      IF( ALPHA.EQ.ZERO )
+     $   RETURN
+      IF( LSAME( UPLO, 'U' ) )THEN
+*
+*        Form  y  when A is stored in upper triangle.
+*
+         IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN
+            DO 60, J = 1, N
+               TEMP1 = ALPHA*X( J )
+               TEMP2 = ZERO
+               DO 50, I = 1, J - 1
+                  Y( I ) = Y( I ) + TEMP1*A( I, J )
+                  TEMP2  = TEMP2  + A( I, J )*X( I )
+   50          CONTINUE
+               Y( J ) = Y( J ) + TEMP1*A( J, J ) + ALPHA*TEMP2
+   60       CONTINUE
+         ELSE
+            JX = KX
+            JY = KY
+            DO 80, J = 1, N
+               TEMP1 = ALPHA*X( JX )
+               TEMP2 = ZERO
+               IX    = KX
+               IY    = KY
+               DO 70, I = 1, J - 1
+                  Y( IY ) = Y( IY ) + TEMP1*A( I, J )
+                  TEMP2   = TEMP2   + A( I, J )*X( IX )
+                  IX      = IX      + INCX
+                  IY      = IY      + INCY
+   70          CONTINUE
+               Y( JY ) = Y( JY ) + TEMP1*A( J, J ) + ALPHA*TEMP2
+               JX      = JX      + INCX
+               JY      = JY      + INCY
+   80       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  y  when A is stored in lower triangle.
+*
+         IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN
+            DO 100, J = 1, N
+               TEMP1  = ALPHA*X( J )
+               TEMP2  = ZERO
+               Y( J ) = Y( J )       + TEMP1*A( J, J )
+               DO 90, I = J + 1, N
+                  Y( I ) = Y( I ) + TEMP1*A( I, J )
+                  TEMP2  = TEMP2  + A( I, J )*X( I )
+   90          CONTINUE
+               Y( J ) = Y( J ) + ALPHA*TEMP2
+  100       CONTINUE
+         ELSE
+            JX = KX
+            JY = KY
+            DO 120, J = 1, N
+               TEMP1   = ALPHA*X( JX )
+               TEMP2   = ZERO
+               Y( JY ) = Y( JY )       + TEMP1*A( J, J )
+               IX      = JX
+               IY      = JY
+               DO 110, I = J + 1, N
+                  IX      = IX      + INCX
+                  IY      = IY      + INCY
+                  Y( IY ) = Y( IY ) + TEMP1*A( I, J )
+                  TEMP2   = TEMP2   + A( I, J )*X( IX )
+  110          CONTINUE
+               Y( JY ) = Y( JY ) + ALPHA*TEMP2
+               JX      = JX      + INCX
+               JY      = JY      + INCY
+  120       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of DSYMV .
+*
+      END
diff --git a/costa/native/external/blas/dsyr.f b/costa/native/external/blas/dsyr.f
new file mode 100644
index 000000000..873771967
--- /dev/null
+++ b/costa/native/external/blas/dsyr.f
@@ -0,0 +1,197 @@
+      SUBROUTINE DSYR  ( UPLO, N, ALPHA, X, INCX, A, LDA )
+*     .. Scalar Arguments ..
+      DOUBLE PRECISION   ALPHA
+      INTEGER            INCX, LDA, N
+      CHARACTER*1        UPLO
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DSYR   performs the symmetric rank 1 operation
+*
+*     A := alpha*x*x' + A,
+*
+*  where alpha is a real scalar, x is an n element vector and A is an
+*  n by n symmetric matrix.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the upper or lower
+*           triangular part of the array A is to be referenced as
+*           follows:
+*
+*              UPLO = 'U' or 'u'   Only the upper triangular part of A
+*                                  is to be referenced.
+*
+*              UPLO = 'L' or 'l'   Only the lower triangular part of A
+*                                  is to be referenced.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - DOUBLE PRECISION.
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  X      - DOUBLE PRECISION array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element vector x.
+*           Unchanged on exit.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*  A      - DOUBLE PRECISION array of DIMENSION ( LDA, n ).
+*           Before entry with  UPLO = 'U' or 'u', the leading n by n
+*           upper triangular part of the array A must contain the upper
+*           triangular part of the symmetric matrix and the strictly
+*           lower triangular part of A is not referenced. On exit, the
+*           upper triangular part of the array A is overwritten by the
+*           upper triangular part of the updated matrix.
+*           Before entry with UPLO = 'L' or 'l', the leading n by n
+*           lower triangular part of the array A must contain the lower
+*           triangular part of the symmetric matrix and the strictly
+*           upper triangular part of A is not referenced. On exit, the
+*           lower triangular part of the array A is overwritten by the
+*           lower triangular part of the updated matrix.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           max( 1, n ).
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO
+      PARAMETER        ( ZERO = 0.0D+0 )
+*     .. Local Scalars ..
+      DOUBLE PRECISION   TEMP
+      INTEGER            I, INFO, IX, J, JX, KX
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO, 'U' ).AND.
+     $         .NOT.LSAME( UPLO, 'L' )      )THEN
+         INFO = 1
+      ELSE IF( N.LT.0 )THEN
+         INFO = 2
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 5
+      ELSE IF( LDA.LT.MAX( 1, N ) )THEN
+         INFO = 7
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'DSYR  ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( N.EQ.0 ).OR.( ALPHA.EQ.ZERO ) )
+     $   RETURN
+*
+*     Set the start point in X if the increment is not unity.
+*
+      IF( INCX.LE.0 )THEN
+         KX = 1 - ( N - 1 )*INCX
+      ELSE IF( INCX.NE.1 )THEN
+         KX = 1
+      END IF
+*
+*     Start the operations. In this version the elements of A are
+*     accessed sequentially with one pass through the triangular part
+*     of A.
+*
+      IF( LSAME( UPLO, 'U' ) )THEN
+*
+*        Form  A  when A is stored in upper triangle.
+*
+         IF( INCX.EQ.1 )THEN
+            DO 20, J = 1, N
+               IF( X( J ).NE.ZERO )THEN
+                  TEMP = ALPHA*X( J )
+                  DO 10, I = 1, J
+                     A( I, J ) = A( I, J ) + X( I )*TEMP
+   10             CONTINUE
+               END IF
+   20       CONTINUE
+         ELSE
+            JX = KX
+            DO 40, J = 1, N
+               IF( X( JX ).NE.ZERO )THEN
+                  TEMP = ALPHA*X( JX )
+                  IX   = KX
+                  DO 30, I = 1, J
+                     A( I, J ) = A( I, J ) + X( IX )*TEMP
+                     IX        = IX        + INCX
+   30             CONTINUE
+               END IF
+               JX = JX + INCX
+   40       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  A  when A is stored in lower triangle.
+*
+         IF( INCX.EQ.1 )THEN
+            DO 60, J = 1, N
+               IF( X( J ).NE.ZERO )THEN
+                  TEMP = ALPHA*X( J )
+                  DO 50, I = J, N
+                     A( I, J ) = A( I, J ) + X( I )*TEMP
+   50             CONTINUE
+               END IF
+   60       CONTINUE
+         ELSE
+            JX = KX
+            DO 80, J = 1, N
+               IF( X( JX ).NE.ZERO )THEN
+                  TEMP = ALPHA*X( JX )
+                  IX   = JX
+                  DO 70, I = J, N
+                     A( I, J ) = A( I, J ) + X( IX )*TEMP
+                     IX        = IX        + INCX
+   70             CONTINUE
+               END IF
+               JX = JX + INCX
+   80       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of DSYR  .
+*
+      END
diff --git a/costa/native/external/blas/dsyr2.f b/costa/native/external/blas/dsyr2.f
new file mode 100644
index 000000000..918ad8a7d
--- /dev/null
+++ b/costa/native/external/blas/dsyr2.f
@@ -0,0 +1,230 @@
+      SUBROUTINE DSYR2 ( UPLO, N, ALPHA, X, INCX, Y, INCY, A, LDA )
+*     .. Scalar Arguments ..
+      DOUBLE PRECISION   ALPHA
+      INTEGER            INCX, INCY, LDA, N
+      CHARACTER*1        UPLO
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), X( * ), Y( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DSYR2  performs the symmetric rank 2 operation
+*
+*     A := alpha*x*y' + alpha*y*x' + A,
+*
+*  where alpha is a scalar, x and y are n element vectors and A is an n
+*  by n symmetric matrix.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the upper or lower
+*           triangular part of the array A is to be referenced as
+*           follows:
+*
+*              UPLO = 'U' or 'u'   Only the upper triangular part of A
+*                                  is to be referenced.
+*
+*              UPLO = 'L' or 'l'   Only the lower triangular part of A
+*                                  is to be referenced.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - DOUBLE PRECISION.
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  X      - DOUBLE PRECISION array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element vector x.
+*           Unchanged on exit.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*  Y      - DOUBLE PRECISION array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCY ) ).
+*           Before entry, the incremented array Y must contain the n
+*           element vector y.
+*           Unchanged on exit.
+*
+*  INCY   - INTEGER.
+*           On entry, INCY specifies the increment for the elements of
+*           Y. INCY must not be zero.
+*           Unchanged on exit.
+*
+*  A      - DOUBLE PRECISION array of DIMENSION ( LDA, n ).
+*           Before entry with  UPLO = 'U' or 'u', the leading n by n
+*           upper triangular part of the array A must contain the upper
+*           triangular part of the symmetric matrix and the strictly
+*           lower triangular part of A is not referenced. On exit, the
+*           upper triangular part of the array A is overwritten by the
+*           upper triangular part of the updated matrix.
+*           Before entry with UPLO = 'L' or 'l', the leading n by n
+*           lower triangular part of the array A must contain the lower
+*           triangular part of the symmetric matrix and the strictly
+*           upper triangular part of A is not referenced. On exit, the
+*           lower triangular part of the array A is overwritten by the
+*           lower triangular part of the updated matrix.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           max( 1, n ).
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO
+      PARAMETER        ( ZERO = 0.0D+0 )
+*     .. Local Scalars ..
+      DOUBLE PRECISION   TEMP1, TEMP2
+      INTEGER            I, INFO, IX, IY, J, JX, JY, KX, KY
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO, 'U' ).AND.
+     $         .NOT.LSAME( UPLO, 'L' )      )THEN
+         INFO = 1
+      ELSE IF( N.LT.0 )THEN
+         INFO = 2
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 5
+      ELSE IF( INCY.EQ.0 )THEN
+         INFO = 7
+      ELSE IF( LDA.LT.MAX( 1, N ) )THEN
+         INFO = 9
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'DSYR2 ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( N.EQ.0 ).OR.( ALPHA.EQ.ZERO ) )
+     $   RETURN
+*
+*     Set up the start points in X and Y if the increments are not both
+*     unity.
+*
+      IF( ( INCX.NE.1 ).OR.( INCY.NE.1 ) )THEN
+         IF( INCX.GT.0 )THEN
+            KX = 1
+         ELSE
+            KX = 1 - ( N - 1 )*INCX
+         END IF
+         IF( INCY.GT.0 )THEN
+            KY = 1
+         ELSE
+            KY = 1 - ( N - 1 )*INCY
+         END IF
+         JX = KX
+         JY = KY
+      END IF
+*
+*     Start the operations. In this version the elements of A are
+*     accessed sequentially with one pass through the triangular part
+*     of A.
+*
+      IF( LSAME( UPLO, 'U' ) )THEN
+*
+*        Form  A  when A is stored in the upper triangle.
+*
+         IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN
+            DO 20, J = 1, N
+               IF( ( X( J ).NE.ZERO ).OR.( Y( J ).NE.ZERO ) )THEN
+                  TEMP1 = ALPHA*Y( J )
+                  TEMP2 = ALPHA*X( J )
+                  DO 10, I = 1, J
+                     A( I, J ) = A( I, J ) + X( I )*TEMP1 + Y( I )*TEMP2
+   10             CONTINUE
+               END IF
+   20       CONTINUE
+         ELSE
+            DO 40, J = 1, N
+               IF( ( X( JX ).NE.ZERO ).OR.( Y( JY ).NE.ZERO ) )THEN
+                  TEMP1 = ALPHA*Y( JY )
+                  TEMP2 = ALPHA*X( JX )
+                  IX    = KX
+                  IY    = KY
+                  DO 30, I = 1, J
+                     A( I, J ) = A( I, J ) + X( IX )*TEMP1
+     $                                     + Y( IY )*TEMP2
+                     IX        = IX        + INCX
+                     IY        = IY        + INCY
+   30             CONTINUE
+               END IF
+               JX = JX + INCX
+               JY = JY + INCY
+   40       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  A  when A is stored in the lower triangle.
+*
+         IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN
+            DO 60, J = 1, N
+               IF( ( X( J ).NE.ZERO ).OR.( Y( J ).NE.ZERO ) )THEN
+                  TEMP1 = ALPHA*Y( J )
+                  TEMP2 = ALPHA*X( J )
+                  DO 50, I = J, N
+                     A( I, J ) = A( I, J ) + X( I )*TEMP1 + Y( I )*TEMP2
+   50             CONTINUE
+               END IF
+   60       CONTINUE
+         ELSE
+            DO 80, J = 1, N
+               IF( ( X( JX ).NE.ZERO ).OR.( Y( JY ).NE.ZERO ) )THEN
+                  TEMP1 = ALPHA*Y( JY )
+                  TEMP2 = ALPHA*X( JX )
+                  IX    = JX
+                  IY    = JY
+                  DO 70, I = J, N
+                     A( I, J ) = A( I, J ) + X( IX )*TEMP1
+     $                                     + Y( IY )*TEMP2
+                     IX        = IX        + INCX
+                     IY        = IY        + INCY
+   70             CONTINUE
+               END IF
+               JX = JX + INCX
+               JY = JY + INCY
+   80       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of DSYR2 .
+*
+      END
diff --git a/costa/native/external/blas/dsyr2k.f b/costa/native/external/blas/dsyr2k.f
new file mode 100644
index 000000000..ac7d97de6
--- /dev/null
+++ b/costa/native/external/blas/dsyr2k.f
@@ -0,0 +1,327 @@
+      SUBROUTINE DSYR2K( UPLO, TRANS, N, K, ALPHA, A, LDA, B, LDB,
+     $                   BETA, C, LDC )
+*     .. Scalar Arguments ..
+      CHARACTER*1        UPLO, TRANS
+      INTEGER            N, K, LDA, LDB, LDC
+      DOUBLE PRECISION   ALPHA, BETA
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), B( LDB, * ), C( LDC, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DSYR2K  performs one of the symmetric rank 2k operations
+*
+*     C := alpha*A*B' + alpha*B*A' + beta*C,
+*
+*  or
+*
+*     C := alpha*A'*B + alpha*B'*A + beta*C,
+*
+*  where  alpha and beta  are scalars, C is an  n by n  symmetric matrix
+*  and  A and B  are  n by k  matrices  in the  first  case  and  k by n
+*  matrices in the second case.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On  entry,   UPLO  specifies  whether  the  upper  or  lower
+*           triangular  part  of the  array  C  is to be  referenced  as
+*           follows:
+*
+*              UPLO = 'U' or 'u'   Only the  upper triangular part of  C
+*                                  is to be referenced.
+*
+*              UPLO = 'L' or 'l'   Only the  lower triangular part of  C
+*                                  is to be referenced.
+*
+*           Unchanged on exit.
+*
+*  TRANS  - CHARACTER*1.
+*           On entry,  TRANS  specifies the operation to be performed as
+*           follows:
+*
+*              TRANS = 'N' or 'n'   C := alpha*A*B' + alpha*B*A' +
+*                                        beta*C.
+*
+*              TRANS = 'T' or 't'   C := alpha*A'*B + alpha*B'*A +
+*                                        beta*C.
+*
+*              TRANS = 'C' or 'c'   C := alpha*A'*B + alpha*B'*A +
+*                                        beta*C.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry,  N specifies the order of the matrix C.  N must be
+*           at least zero.
+*           Unchanged on exit.
+*
+*  K      - INTEGER.
+*           On entry with  TRANS = 'N' or 'n',  K  specifies  the number
+*           of  columns  of the  matrices  A and B,  and on  entry  with
+*           TRANS = 'T' or 't' or 'C' or 'c',  K  specifies  the  number
+*           of rows of the matrices  A and B.  K must be at least  zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - DOUBLE PRECISION.
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  A      - DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is
+*           k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
+*           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
+*           part of the array  A  must contain the matrix  A,  otherwise
+*           the leading  k by n  part of the array  A  must contain  the
+*           matrix A.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
+*           then  LDA must be at least  max( 1, n ), otherwise  LDA must
+*           be at least  max( 1, k ).
+*           Unchanged on exit.
+*
+*  B      - DOUBLE PRECISION array of DIMENSION ( LDB, kb ), where kb is
+*           k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
+*           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
+*           part of the array  B  must contain the matrix  B,  otherwise
+*           the leading  k by n  part of the array  B  must contain  the
+*           matrix B.
+*           Unchanged on exit.
+*
+*  LDB    - INTEGER.
+*           On entry, LDB specifies the first dimension of B as declared
+*           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
+*           then  LDB must be at least  max( 1, n ), otherwise  LDB must
+*           be at least  max( 1, k ).
+*           Unchanged on exit.
+*
+*  BETA   - DOUBLE PRECISION.
+*           On entry, BETA specifies the scalar beta.
+*           Unchanged on exit.
+*
+*  C      - DOUBLE PRECISION array of DIMENSION ( LDC, n ).
+*           Before entry  with  UPLO = 'U' or 'u',  the leading  n by n
+*           upper triangular part of the array C must contain the upper
+*           triangular part  of the  symmetric matrix  and the strictly
+*           lower triangular part of C is not referenced.  On exit, the
+*           upper triangular part of the array  C is overwritten by the
+*           upper triangular part of the updated matrix.
+*           Before entry  with  UPLO = 'L' or 'l',  the leading  n by n
+*           lower triangular part of the array C must contain the lower
+*           triangular part  of the  symmetric matrix  and the strictly
+*           upper triangular part of C is not referenced.  On exit, the
+*           lower triangular part of the array  C is overwritten by the
+*           lower triangular part of the updated matrix.
+*
+*  LDC    - INTEGER.
+*           On entry, LDC specifies the first dimension of C as declared
+*           in  the  calling  (sub)  program.   LDC  must  be  at  least
+*           max( 1, n ).
+*           Unchanged on exit.
+*
+*
+*  Level 3 Blas routine.
+*
+*
+*  -- Written on 8-February-1989.
+*     Jack Dongarra, Argonne National Laboratory.
+*     Iain Duff, AERE Harwell.
+*     Jeremy Du Croz, Numerical Algorithms Group Ltd.
+*     Sven Hammarling, Numerical Algorithms Group Ltd.
+*
+*
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            I, INFO, J, L, NROWA
+      DOUBLE PRECISION   TEMP1, TEMP2
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE         , ZERO
+      PARAMETER        ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+         NROWA = N
+      ELSE
+         NROWA = K
+      END IF
+      UPPER = LSAME( UPLO, 'U' )
+*
+      INFO = 0
+      IF(      ( .NOT.UPPER               ).AND.
+     $         ( .NOT.LSAME( UPLO , 'L' ) )      )THEN
+         INFO = 1
+      ELSE IF( ( .NOT.LSAME( TRANS, 'N' ) ).AND.
+     $         ( .NOT.LSAME( TRANS, 'T' ) ).AND.
+     $         ( .NOT.LSAME( TRANS, 'C' ) )      )THEN
+         INFO = 2
+      ELSE IF( N  .LT.0               )THEN
+         INFO = 3
+      ELSE IF( K  .LT.0               )THEN
+         INFO = 4
+      ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN
+         INFO = 7
+      ELSE IF( LDB.LT.MAX( 1, NROWA ) )THEN
+         INFO = 9
+      ELSE IF( LDC.LT.MAX( 1, N     ) )THEN
+         INFO = 12
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'DSYR2K', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( N.EQ.0 ).OR.
+     $    ( ( ( ALPHA.EQ.ZERO ).OR.( K.EQ.0 ) ).AND.( BETA.EQ.ONE ) ) )
+     $   RETURN
+*
+*     And when  alpha.eq.zero.
+*
+      IF( ALPHA.EQ.ZERO )THEN
+         IF( UPPER )THEN
+            IF( BETA.EQ.ZERO )THEN
+               DO 20, J = 1, N
+                  DO 10, I = 1, J
+                     C( I, J ) = ZERO
+   10             CONTINUE
+   20          CONTINUE
+            ELSE
+               DO 40, J = 1, N
+                  DO 30, I = 1, J
+                     C( I, J ) = BETA*C( I, J )
+   30             CONTINUE
+   40          CONTINUE
+            END IF
+         ELSE
+            IF( BETA.EQ.ZERO )THEN
+               DO 60, J = 1, N
+                  DO 50, I = J, N
+                     C( I, J ) = ZERO
+   50             CONTINUE
+   60          CONTINUE
+            ELSE
+               DO 80, J = 1, N
+                  DO 70, I = J, N
+                     C( I, J ) = BETA*C( I, J )
+   70             CONTINUE
+   80          CONTINUE
+            END IF
+         END IF
+         RETURN
+      END IF
+*
+*     Start the operations.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+*
+*        Form  C := alpha*A*B' + alpha*B*A' + C.
+*
+         IF( UPPER )THEN
+            DO 130, J = 1, N
+               IF( BETA.EQ.ZERO )THEN
+                  DO 90, I = 1, J
+                     C( I, J ) = ZERO
+   90             CONTINUE
+               ELSE IF( BETA.NE.ONE )THEN
+                  DO 100, I = 1, J
+                     C( I, J ) = BETA*C( I, J )
+  100             CONTINUE
+               END IF
+               DO 120, L = 1, K
+                  IF( ( A( J, L ).NE.ZERO ).OR.
+     $                ( B( J, L ).NE.ZERO )     )THEN
+                     TEMP1 = ALPHA*B( J, L )
+                     TEMP2 = ALPHA*A( J, L )
+                     DO 110, I = 1, J
+                        C( I, J ) = C( I, J ) +
+     $                              A( I, L )*TEMP1 + B( I, L )*TEMP2
+  110                CONTINUE
+                  END IF
+  120          CONTINUE
+  130       CONTINUE
+         ELSE
+            DO 180, J = 1, N
+               IF( BETA.EQ.ZERO )THEN
+                  DO 140, I = J, N
+                     C( I, J ) = ZERO
+  140             CONTINUE
+               ELSE IF( BETA.NE.ONE )THEN
+                  DO 150, I = J, N
+                     C( I, J ) = BETA*C( I, J )
+  150             CONTINUE
+               END IF
+               DO 170, L = 1, K
+                  IF( ( A( J, L ).NE.ZERO ).OR.
+     $                ( B( J, L ).NE.ZERO )     )THEN
+                     TEMP1 = ALPHA*B( J, L )
+                     TEMP2 = ALPHA*A( J, L )
+                     DO 160, I = J, N
+                        C( I, J ) = C( I, J ) +
+     $                              A( I, L )*TEMP1 + B( I, L )*TEMP2
+  160                CONTINUE
+                  END IF
+  170          CONTINUE
+  180       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  C := alpha*A'*B + alpha*B'*A + C.
+*
+         IF( UPPER )THEN
+            DO 210, J = 1, N
+               DO 200, I = 1, J
+                  TEMP1 = ZERO
+                  TEMP2 = ZERO
+                  DO 190, L = 1, K
+                     TEMP1 = TEMP1 + A( L, I )*B( L, J )
+                     TEMP2 = TEMP2 + B( L, I )*A( L, J )
+  190             CONTINUE
+                  IF( BETA.EQ.ZERO )THEN
+                     C( I, J ) = ALPHA*TEMP1 + ALPHA*TEMP2
+                  ELSE
+                     C( I, J ) = BETA *C( I, J ) +
+     $                           ALPHA*TEMP1 + ALPHA*TEMP2
+                  END IF
+  200          CONTINUE
+  210       CONTINUE
+         ELSE
+            DO 240, J = 1, N
+               DO 230, I = J, N
+                  TEMP1 = ZERO
+                  TEMP2 = ZERO
+                  DO 220, L = 1, K
+                     TEMP1 = TEMP1 + A( L, I )*B( L, J )
+                     TEMP2 = TEMP2 + B( L, I )*A( L, J )
+  220             CONTINUE
+                  IF( BETA.EQ.ZERO )THEN
+                     C( I, J ) = ALPHA*TEMP1 + ALPHA*TEMP2
+                  ELSE
+                     C( I, J ) = BETA *C( I, J ) +
+     $                           ALPHA*TEMP1 + ALPHA*TEMP2
+                  END IF
+  230          CONTINUE
+  240       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of DSYR2K.
+*
+      END
diff --git a/costa/native/external/blas/dsyrk.f b/costa/native/external/blas/dsyrk.f
new file mode 100644
index 000000000..b618b2968
--- /dev/null
+++ b/costa/native/external/blas/dsyrk.f
@@ -0,0 +1,294 @@
+      SUBROUTINE DSYRK ( UPLO, TRANS, N, K, ALPHA, A, LDA,
+     $                   BETA, C, LDC )
+*     .. Scalar Arguments ..
+      CHARACTER*1        UPLO, TRANS
+      INTEGER            N, K, LDA, LDC
+      DOUBLE PRECISION   ALPHA, BETA
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), C( LDC, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DSYRK  performs one of the symmetric rank k operations
+*
+*     C := alpha*A*A' + beta*C,
+*
+*  or
+*
+*     C := alpha*A'*A + beta*C,
+*
+*  where  alpha and beta  are scalars, C is an  n by n  symmetric matrix
+*  and  A  is an  n by k  matrix in the first case and a  k by n  matrix
+*  in the second case.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On  entry,   UPLO  specifies  whether  the  upper  or  lower
+*           triangular  part  of the  array  C  is to be  referenced  as
+*           follows:
+*
+*              UPLO = 'U' or 'u'   Only the  upper triangular part of  C
+*                                  is to be referenced.
+*
+*              UPLO = 'L' or 'l'   Only the  lower triangular part of  C
+*                                  is to be referenced.
+*
+*           Unchanged on exit.
+*
+*  TRANS  - CHARACTER*1.
+*           On entry,  TRANS  specifies the operation to be performed as
+*           follows:
+*
+*              TRANS = 'N' or 'n'   C := alpha*A*A' + beta*C.
+*
+*              TRANS = 'T' or 't'   C := alpha*A'*A + beta*C.
+*
+*              TRANS = 'C' or 'c'   C := alpha*A'*A + beta*C.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry,  N specifies the order of the matrix C.  N must be
+*           at least zero.
+*           Unchanged on exit.
+*
+*  K      - INTEGER.
+*           On entry with  TRANS = 'N' or 'n',  K  specifies  the number
+*           of  columns   of  the   matrix   A,   and  on   entry   with
+*           TRANS = 'T' or 't' or 'C' or 'c',  K  specifies  the  number
+*           of rows of the matrix  A.  K must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - DOUBLE PRECISION.
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  A      - DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is
+*           k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
+*           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
+*           part of the array  A  must contain the matrix  A,  otherwise
+*           the leading  k by n  part of the array  A  must contain  the
+*           matrix A.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
+*           then  LDA must be at least  max( 1, n ), otherwise  LDA must
+*           be at least  max( 1, k ).
+*           Unchanged on exit.
+*
+*  BETA   - DOUBLE PRECISION.
+*           On entry, BETA specifies the scalar beta.
+*           Unchanged on exit.
+*
+*  C      - DOUBLE PRECISION array of DIMENSION ( LDC, n ).
+*           Before entry  with  UPLO = 'U' or 'u',  the leading  n by n
+*           upper triangular part of the array C must contain the upper
+*           triangular part  of the  symmetric matrix  and the strictly
+*           lower triangular part of C is not referenced.  On exit, the
+*           upper triangular part of the array  C is overwritten by the
+*           upper triangular part of the updated matrix.
+*           Before entry  with  UPLO = 'L' or 'l',  the leading  n by n
+*           lower triangular part of the array C must contain the lower
+*           triangular part  of the  symmetric matrix  and the strictly
+*           upper triangular part of C is not referenced.  On exit, the
+*           lower triangular part of the array  C is overwritten by the
+*           lower triangular part of the updated matrix.
+*
+*  LDC    - INTEGER.
+*           On entry, LDC specifies the first dimension of C as declared
+*           in  the  calling  (sub)  program.   LDC  must  be  at  least
+*           max( 1, n ).
+*           Unchanged on exit.
+*
+*
+*  Level 3 Blas routine.
+*
+*  -- Written on 8-February-1989.
+*     Jack Dongarra, Argonne National Laboratory.
+*     Iain Duff, AERE Harwell.
+*     Jeremy Du Croz, Numerical Algorithms Group Ltd.
+*     Sven Hammarling, Numerical Algorithms Group Ltd.
+*
+*
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            I, INFO, J, L, NROWA
+      DOUBLE PRECISION   TEMP
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE ,         ZERO
+      PARAMETER        ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+         NROWA = N
+      ELSE
+         NROWA = K
+      END IF
+      UPPER = LSAME( UPLO, 'U' )
+*
+      INFO = 0
+      IF(      ( .NOT.UPPER               ).AND.
+     $         ( .NOT.LSAME( UPLO , 'L' ) )      )THEN
+         INFO = 1
+      ELSE IF( ( .NOT.LSAME( TRANS, 'N' ) ).AND.
+     $         ( .NOT.LSAME( TRANS, 'T' ) ).AND.
+     $         ( .NOT.LSAME( TRANS, 'C' ) )      )THEN
+         INFO = 2
+      ELSE IF( N  .LT.0               )THEN
+         INFO = 3
+      ELSE IF( K  .LT.0               )THEN
+         INFO = 4
+      ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN
+         INFO = 7
+      ELSE IF( LDC.LT.MAX( 1, N     ) )THEN
+         INFO = 10
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'DSYRK ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( N.EQ.0 ).OR.
+     $    ( ( ( ALPHA.EQ.ZERO ).OR.( K.EQ.0 ) ).AND.( BETA.EQ.ONE ) ) )
+     $   RETURN
+*
+*     And when  alpha.eq.zero.
+*
+      IF( ALPHA.EQ.ZERO )THEN
+         IF( UPPER )THEN
+            IF( BETA.EQ.ZERO )THEN
+               DO 20, J = 1, N
+                  DO 10, I = 1, J
+                     C( I, J ) = ZERO
+   10             CONTINUE
+   20          CONTINUE
+            ELSE
+               DO 40, J = 1, N
+                  DO 30, I = 1, J
+                     C( I, J ) = BETA*C( I, J )
+   30             CONTINUE
+   40          CONTINUE
+            END IF
+         ELSE
+            IF( BETA.EQ.ZERO )THEN
+               DO 60, J = 1, N
+                  DO 50, I = J, N
+                     C( I, J ) = ZERO
+   50             CONTINUE
+   60          CONTINUE
+            ELSE
+               DO 80, J = 1, N
+                  DO 70, I = J, N
+                     C( I, J ) = BETA*C( I, J )
+   70             CONTINUE
+   80          CONTINUE
+            END IF
+         END IF
+         RETURN
+      END IF
+*
+*     Start the operations.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+*
+*        Form  C := alpha*A*A' + beta*C.
+*
+         IF( UPPER )THEN
+            DO 130, J = 1, N
+               IF( BETA.EQ.ZERO )THEN
+                  DO 90, I = 1, J
+                     C( I, J ) = ZERO
+   90             CONTINUE
+               ELSE IF( BETA.NE.ONE )THEN
+                  DO 100, I = 1, J
+                     C( I, J ) = BETA*C( I, J )
+  100             CONTINUE
+               END IF
+               DO 120, L = 1, K
+                  IF( A( J, L ).NE.ZERO )THEN
+                     TEMP = ALPHA*A( J, L )
+                     DO 110, I = 1, J
+                        C( I, J ) = C( I, J ) + TEMP*A( I, L )
+  110                CONTINUE
+                  END IF
+  120          CONTINUE
+  130       CONTINUE
+         ELSE
+            DO 180, J = 1, N
+               IF( BETA.EQ.ZERO )THEN
+                  DO 140, I = J, N
+                     C( I, J ) = ZERO
+  140             CONTINUE
+               ELSE IF( BETA.NE.ONE )THEN
+                  DO 150, I = J, N
+                     C( I, J ) = BETA*C( I, J )
+  150             CONTINUE
+               END IF
+               DO 170, L = 1, K
+                  IF( A( J, L ).NE.ZERO )THEN
+                     TEMP      = ALPHA*A( J, L )
+                     DO 160, I = J, N
+                        C( I, J ) = C( I, J ) + TEMP*A( I, L )
+  160                CONTINUE
+                  END IF
+  170          CONTINUE
+  180       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  C := alpha*A'*A + beta*C.
+*
+         IF( UPPER )THEN
+            DO 210, J = 1, N
+               DO 200, I = 1, J
+                  TEMP = ZERO
+                  DO 190, L = 1, K
+                     TEMP = TEMP + A( L, I )*A( L, J )
+  190             CONTINUE
+                  IF( BETA.EQ.ZERO )THEN
+                     C( I, J ) = ALPHA*TEMP
+                  ELSE
+                     C( I, J ) = ALPHA*TEMP + BETA*C( I, J )
+                  END IF
+  200          CONTINUE
+  210       CONTINUE
+         ELSE
+            DO 240, J = 1, N
+               DO 230, I = J, N
+                  TEMP = ZERO
+                  DO 220, L = 1, K
+                     TEMP = TEMP + A( L, I )*A( L, J )
+  220             CONTINUE
+                  IF( BETA.EQ.ZERO )THEN
+                     C( I, J ) = ALPHA*TEMP
+                  ELSE
+                     C( I, J ) = ALPHA*TEMP + BETA*C( I, J )
+                  END IF
+  230          CONTINUE
+  240       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of DSYRK .
+*
+      END
diff --git a/costa/native/external/blas/dtbmv.f b/costa/native/external/blas/dtbmv.f
new file mode 100644
index 000000000..1363db79c
--- /dev/null
+++ b/costa/native/external/blas/dtbmv.f
@@ -0,0 +1,342 @@
+      SUBROUTINE DTBMV ( UPLO, TRANS, DIAG, N, K, A, LDA, X, INCX )
+*     .. Scalar Arguments ..
+      INTEGER            INCX, K, LDA, N
+      CHARACTER*1        DIAG, TRANS, UPLO
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DTBMV  performs one of the matrix-vector operations
+*
+*     x := A*x,   or   x := A'*x,
+*
+*  where x is an n element vector and  A is an n by n unit, or non-unit,
+*  upper or lower triangular band matrix, with ( k + 1 ) diagonals.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the matrix is an upper or
+*           lower triangular matrix as follows:
+*
+*              UPLO = 'U' or 'u'   A is an upper triangular matrix.
+*
+*              UPLO = 'L' or 'l'   A is a lower triangular matrix.
+*
+*           Unchanged on exit.
+*
+*  TRANS  - CHARACTER*1.
+*           On entry, TRANS specifies the operation to be performed as
+*           follows:
+*
+*              TRANS = 'N' or 'n'   x := A*x.
+*
+*              TRANS = 'T' or 't'   x := A'*x.
+*
+*              TRANS = 'C' or 'c'   x := A'*x.
+*
+*           Unchanged on exit.
+*
+*  DIAG   - CHARACTER*1.
+*           On entry, DIAG specifies whether or not A is unit
+*           triangular as follows:
+*
+*              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
+*
+*              DIAG = 'N' or 'n'   A is not assumed to be unit
+*                                  triangular.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  K      - INTEGER.
+*           On entry with UPLO = 'U' or 'u', K specifies the number of
+*           super-diagonals of the matrix A.
+*           On entry with UPLO = 'L' or 'l', K specifies the number of
+*           sub-diagonals of the matrix A.
+*           K must satisfy  0 .le. K.
+*           Unchanged on exit.
+*
+*  A      - DOUBLE PRECISION array of DIMENSION ( LDA, n ).
+*           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 )
+*           by n part of the array A must contain the upper triangular
+*           band part of the matrix of coefficients, supplied column by
+*           column, with the leading diagonal of the matrix in row
+*           ( k + 1 ) of the array, the first super-diagonal starting at
+*           position 2 in row k, and so on. The top left k by k triangle
+*           of the array A is not referenced.
+*           The following program segment will transfer an upper
+*           triangular band matrix from conventional full matrix storage
+*           to band storage:
+*
+*                 DO 20, J = 1, N
+*                    M = K + 1 - J
+*                    DO 10, I = MAX( 1, J - K ), J
+*                       A( M + I, J ) = matrix( I, J )
+*              10    CONTINUE
+*              20 CONTINUE
+*
+*           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 )
+*           by n part of the array A must contain the lower triangular
+*           band part of the matrix of coefficients, supplied column by
+*           column, with the leading diagonal of the matrix in row 1 of
+*           the array, the first sub-diagonal starting at position 1 in
+*           row 2, and so on. The bottom right k by k triangle of the
+*           array A is not referenced.
+*           The following program segment will transfer a lower
+*           triangular band matrix from conventional full matrix storage
+*           to band storage:
+*
+*                 DO 20, J = 1, N
+*                    M = 1 - J
+*                    DO 10, I = J, MIN( N, J + K )
+*                       A( M + I, J ) = matrix( I, J )
+*              10    CONTINUE
+*              20 CONTINUE
+*
+*           Note that when DIAG = 'U' or 'u' the elements of the array A
+*           corresponding to the diagonal elements of the matrix are not
+*           referenced, but are assumed to be unity.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           ( k + 1 ).
+*           Unchanged on exit.
+*
+*  X      - DOUBLE PRECISION array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element vector x. On exit, X is overwritten with the
+*           tranformed vector x.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO
+      PARAMETER        ( ZERO = 0.0D+0 )
+*     .. Local Scalars ..
+      DOUBLE PRECISION   TEMP
+      INTEGER            I, INFO, IX, J, JX, KPLUS1, KX, L
+      LOGICAL            NOUNIT
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO , 'U' ).AND.
+     $         .NOT.LSAME( UPLO , 'L' )      )THEN
+         INFO = 1
+      ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND.
+     $         .NOT.LSAME( TRANS, 'T' ).AND.
+     $         .NOT.LSAME( TRANS, 'C' )      )THEN
+         INFO = 2
+      ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND.
+     $         .NOT.LSAME( DIAG , 'N' )      )THEN
+         INFO = 3
+      ELSE IF( N.LT.0 )THEN
+         INFO = 4
+      ELSE IF( K.LT.0 )THEN
+         INFO = 5
+      ELSE IF( LDA.LT.( K + 1 ) )THEN
+         INFO = 7
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 9
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'DTBMV ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      NOUNIT = LSAME( DIAG, 'N' )
+*
+*     Set up the start point in X if the increment is not unity. This
+*     will be  ( N - 1 )*INCX   too small for descending loops.
+*
+      IF( INCX.LE.0 )THEN
+         KX = 1 - ( N - 1 )*INCX
+      ELSE IF( INCX.NE.1 )THEN
+         KX = 1
+      END IF
+*
+*     Start the operations. In this version the elements of A are
+*     accessed sequentially with one pass through A.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+*
+*         Form  x := A*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            KPLUS1 = K + 1
+            IF( INCX.EQ.1 )THEN
+               DO 20, J = 1, N
+                  IF( X( J ).NE.ZERO )THEN
+                     TEMP = X( J )
+                     L    = KPLUS1 - J
+                     DO 10, I = MAX( 1, J - K ), J - 1
+                        X( I ) = X( I ) + TEMP*A( L + I, J )
+   10                CONTINUE
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )*A( KPLUS1, J )
+                  END IF
+   20          CONTINUE
+            ELSE
+               JX = KX
+               DO 40, J = 1, N
+                  IF( X( JX ).NE.ZERO )THEN
+                     TEMP = X( JX )
+                     IX   = KX
+                     L    = KPLUS1  - J
+                     DO 30, I = MAX( 1, J - K ), J - 1
+                        X( IX ) = X( IX ) + TEMP*A( L + I, J )
+                        IX      = IX      + INCX
+   30                CONTINUE
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )*A( KPLUS1, J )
+                  END IF
+                  JX = JX + INCX
+                  IF( J.GT.K )
+     $               KX = KX + INCX
+   40          CONTINUE
+            END IF
+         ELSE
+            IF( INCX.EQ.1 )THEN
+               DO 60, J = N, 1, -1
+                  IF( X( J ).NE.ZERO )THEN
+                     TEMP = X( J )
+                     L    = 1      - J
+                     DO 50, I = MIN( N, J + K ), J + 1, -1
+                        X( I ) = X( I ) + TEMP*A( L + I, J )
+   50                CONTINUE
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )*A( 1, J )
+                  END IF
+   60          CONTINUE
+            ELSE
+               KX = KX + ( N - 1 )*INCX
+               JX = KX
+               DO 80, J = N, 1, -1
+                  IF( X( JX ).NE.ZERO )THEN
+                     TEMP = X( JX )
+                     IX   = KX
+                     L    = 1       - J
+                     DO 70, I = MIN( N, J + K ), J + 1, -1
+                        X( IX ) = X( IX ) + TEMP*A( L + I, J )
+                        IX      = IX      - INCX
+   70                CONTINUE
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )*A( 1, J )
+                  END IF
+                  JX = JX - INCX
+                  IF( ( N - J ).GE.K )
+     $               KX = KX - INCX
+   80          CONTINUE
+            END IF
+         END IF
+      ELSE
+*
+*        Form  x := A'*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            KPLUS1 = K + 1
+            IF( INCX.EQ.1 )THEN
+               DO 100, J = N, 1, -1
+                  TEMP = X( J )
+                  L    = KPLUS1 - J
+                  IF( NOUNIT )
+     $               TEMP = TEMP*A( KPLUS1, J )
+                  DO 90, I = J - 1, MAX( 1, J - K ), -1
+                     TEMP = TEMP + A( L + I, J )*X( I )
+   90             CONTINUE
+                  X( J ) = TEMP
+  100          CONTINUE
+            ELSE
+               KX = KX + ( N - 1 )*INCX
+               JX = KX
+               DO 120, J = N, 1, -1
+                  TEMP = X( JX )
+                  KX   = KX      - INCX
+                  IX   = KX
+                  L    = KPLUS1  - J
+                  IF( NOUNIT )
+     $               TEMP = TEMP*A( KPLUS1, J )
+                  DO 110, I = J - 1, MAX( 1, J - K ), -1
+                     TEMP = TEMP + A( L + I, J )*X( IX )
+                     IX   = IX   - INCX
+  110             CONTINUE
+                  X( JX ) = TEMP
+                  JX      = JX   - INCX
+  120          CONTINUE
+            END IF
+         ELSE
+            IF( INCX.EQ.1 )THEN
+               DO 140, J = 1, N
+                  TEMP = X( J )
+                  L    = 1      - J
+                  IF( NOUNIT )
+     $               TEMP = TEMP*A( 1, J )
+                  DO 130, I = J + 1, MIN( N, J + K )
+                     TEMP = TEMP + A( L + I, J )*X( I )
+  130             CONTINUE
+                  X( J ) = TEMP
+  140          CONTINUE
+            ELSE
+               JX = KX
+               DO 160, J = 1, N
+                  TEMP = X( JX )
+                  KX   = KX      + INCX
+                  IX   = KX
+                  L    = 1       - J
+                  IF( NOUNIT )
+     $               TEMP = TEMP*A( 1, J )
+                  DO 150, I = J + 1, MIN( N, J + K )
+                     TEMP = TEMP + A( L + I, J )*X( IX )
+                     IX   = IX   + INCX
+  150             CONTINUE
+                  X( JX ) = TEMP
+                  JX      = JX   + INCX
+  160          CONTINUE
+            END IF
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of DTBMV .
+*
+      END
diff --git a/costa/native/external/blas/dtbsv.f b/costa/native/external/blas/dtbsv.f
new file mode 100644
index 000000000..d87ed82d5
--- /dev/null
+++ b/costa/native/external/blas/dtbsv.f
@@ -0,0 +1,346 @@
+      SUBROUTINE DTBSV ( UPLO, TRANS, DIAG, N, K, A, LDA, X, INCX )
+*     .. Scalar Arguments ..
+      INTEGER            INCX, K, LDA, N
+      CHARACTER*1        DIAG, TRANS, UPLO
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DTBSV  solves one of the systems of equations
+*
+*     A*x = b,   or   A'*x = b,
+*
+*  where b and x are n element vectors and A is an n by n unit, or
+*  non-unit, upper or lower triangular band matrix, with ( k + 1 )
+*  diagonals.
+*
+*  No test for singularity or near-singularity is included in this
+*  routine. Such tests must be performed before calling this routine.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the matrix is an upper or
+*           lower triangular matrix as follows:
+*
+*              UPLO = 'U' or 'u'   A is an upper triangular matrix.
+*
+*              UPLO = 'L' or 'l'   A is a lower triangular matrix.
+*
+*           Unchanged on exit.
+*
+*  TRANS  - CHARACTER*1.
+*           On entry, TRANS specifies the equations to be solved as
+*           follows:
+*
+*              TRANS = 'N' or 'n'   A*x = b.
+*
+*              TRANS = 'T' or 't'   A'*x = b.
+*
+*              TRANS = 'C' or 'c'   A'*x = b.
+*
+*           Unchanged on exit.
+*
+*  DIAG   - CHARACTER*1.
+*           On entry, DIAG specifies whether or not A is unit
+*           triangular as follows:
+*
+*              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
+*
+*              DIAG = 'N' or 'n'   A is not assumed to be unit
+*                                  triangular.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  K      - INTEGER.
+*           On entry with UPLO = 'U' or 'u', K specifies the number of
+*           super-diagonals of the matrix A.
+*           On entry with UPLO = 'L' or 'l', K specifies the number of
+*           sub-diagonals of the matrix A.
+*           K must satisfy  0 .le. K.
+*           Unchanged on exit.
+*
+*  A      - DOUBLE PRECISION array of DIMENSION ( LDA, n ).
+*           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 )
+*           by n part of the array A must contain the upper triangular
+*           band part of the matrix of coefficients, supplied column by
+*           column, with the leading diagonal of the matrix in row
+*           ( k + 1 ) of the array, the first super-diagonal starting at
+*           position 2 in row k, and so on. The top left k by k triangle
+*           of the array A is not referenced.
+*           The following program segment will transfer an upper
+*           triangular band matrix from conventional full matrix storage
+*           to band storage:
+*
+*                 DO 20, J = 1, N
+*                    M = K + 1 - J
+*                    DO 10, I = MAX( 1, J - K ), J
+*                       A( M + I, J ) = matrix( I, J )
+*              10    CONTINUE
+*              20 CONTINUE
+*
+*           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 )
+*           by n part of the array A must contain the lower triangular
+*           band part of the matrix of coefficients, supplied column by
+*           column, with the leading diagonal of the matrix in row 1 of
+*           the array, the first sub-diagonal starting at position 1 in
+*           row 2, and so on. The bottom right k by k triangle of the
+*           array A is not referenced.
+*           The following program segment will transfer a lower
+*           triangular band matrix from conventional full matrix storage
+*           to band storage:
+*
+*                 DO 20, J = 1, N
+*                    M = 1 - J
+*                    DO 10, I = J, MIN( N, J + K )
+*                       A( M + I, J ) = matrix( I, J )
+*              10    CONTINUE
+*              20 CONTINUE
+*
+*           Note that when DIAG = 'U' or 'u' the elements of the array A
+*           corresponding to the diagonal elements of the matrix are not
+*           referenced, but are assumed to be unity.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           ( k + 1 ).
+*           Unchanged on exit.
+*
+*  X      - DOUBLE PRECISION array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element right-hand side vector b. On exit, X is overwritten
+*           with the solution vector x.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO
+      PARAMETER        ( ZERO = 0.0D+0 )
+*     .. Local Scalars ..
+      DOUBLE PRECISION   TEMP
+      INTEGER            I, INFO, IX, J, JX, KPLUS1, KX, L
+      LOGICAL            NOUNIT
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO , 'U' ).AND.
+     $         .NOT.LSAME( UPLO , 'L' )      )THEN
+         INFO = 1
+      ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND.
+     $         .NOT.LSAME( TRANS, 'T' ).AND.
+     $         .NOT.LSAME( TRANS, 'C' )      )THEN
+         INFO = 2
+      ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND.
+     $         .NOT.LSAME( DIAG , 'N' )      )THEN
+         INFO = 3
+      ELSE IF( N.LT.0 )THEN
+         INFO = 4
+      ELSE IF( K.LT.0 )THEN
+         INFO = 5
+      ELSE IF( LDA.LT.( K + 1 ) )THEN
+         INFO = 7
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 9
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'DTBSV ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      NOUNIT = LSAME( DIAG, 'N' )
+*
+*     Set up the start point in X if the increment is not unity. This
+*     will be  ( N - 1 )*INCX  too small for descending loops.
+*
+      IF( INCX.LE.0 )THEN
+         KX = 1 - ( N - 1 )*INCX
+      ELSE IF( INCX.NE.1 )THEN
+         KX = 1
+      END IF
+*
+*     Start the operations. In this version the elements of A are
+*     accessed by sequentially with one pass through A.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+*
+*        Form  x := inv( A )*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            KPLUS1 = K + 1
+            IF( INCX.EQ.1 )THEN
+               DO 20, J = N, 1, -1
+                  IF( X( J ).NE.ZERO )THEN
+                     L = KPLUS1 - J
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )/A( KPLUS1, J )
+                     TEMP = X( J )
+                     DO 10, I = J - 1, MAX( 1, J - K ), -1
+                        X( I ) = X( I ) - TEMP*A( L + I, J )
+   10                CONTINUE
+                  END IF
+   20          CONTINUE
+            ELSE
+               KX = KX + ( N - 1 )*INCX
+               JX = KX
+               DO 40, J = N, 1, -1
+                  KX = KX - INCX
+                  IF( X( JX ).NE.ZERO )THEN
+                     IX = KX
+                     L  = KPLUS1 - J
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )/A( KPLUS1, J )
+                     TEMP = X( JX )
+                     DO 30, I = J - 1, MAX( 1, J - K ), -1
+                        X( IX ) = X( IX ) - TEMP*A( L + I, J )
+                        IX      = IX      - INCX
+   30                CONTINUE
+                  END IF
+                  JX = JX - INCX
+   40          CONTINUE
+            END IF
+         ELSE
+            IF( INCX.EQ.1 )THEN
+               DO 60, J = 1, N
+                  IF( X( J ).NE.ZERO )THEN
+                     L = 1 - J
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )/A( 1, J )
+                     TEMP = X( J )
+                     DO 50, I = J + 1, MIN( N, J + K )
+                        X( I ) = X( I ) - TEMP*A( L + I, J )
+   50                CONTINUE
+                  END IF
+   60          CONTINUE
+            ELSE
+               JX = KX
+               DO 80, J = 1, N
+                  KX = KX + INCX
+                  IF( X( JX ).NE.ZERO )THEN
+                     IX = KX
+                     L  = 1  - J
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )/A( 1, J )
+                     TEMP = X( JX )
+                     DO 70, I = J + 1, MIN( N, J + K )
+                        X( IX ) = X( IX ) - TEMP*A( L + I, J )
+                        IX      = IX      + INCX
+   70                CONTINUE
+                  END IF
+                  JX = JX + INCX
+   80          CONTINUE
+            END IF
+         END IF
+      ELSE
+*
+*        Form  x := inv( A')*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            KPLUS1 = K + 1
+            IF( INCX.EQ.1 )THEN
+               DO 100, J = 1, N
+                  TEMP = X( J )
+                  L    = KPLUS1 - J
+                  DO 90, I = MAX( 1, J - K ), J - 1
+                     TEMP = TEMP - A( L + I, J )*X( I )
+   90             CONTINUE
+                  IF( NOUNIT )
+     $               TEMP = TEMP/A( KPLUS1, J )
+                  X( J ) = TEMP
+  100          CONTINUE
+            ELSE
+               JX = KX
+               DO 120, J = 1, N
+                  TEMP = X( JX )
+                  IX   = KX
+                  L    = KPLUS1  - J
+                  DO 110, I = MAX( 1, J - K ), J - 1
+                     TEMP = TEMP - A( L + I, J )*X( IX )
+                     IX   = IX   + INCX
+  110             CONTINUE
+                  IF( NOUNIT )
+     $               TEMP = TEMP/A( KPLUS1, J )
+                  X( JX ) = TEMP
+                  JX      = JX   + INCX
+                  IF( J.GT.K )
+     $               KX = KX + INCX
+  120          CONTINUE
+            END IF
+         ELSE
+            IF( INCX.EQ.1 )THEN
+               DO 140, J = N, 1, -1
+                  TEMP = X( J )
+                  L    = 1      - J
+                  DO 130, I = MIN( N, J + K ), J + 1, -1
+                     TEMP = TEMP - A( L + I, J )*X( I )
+  130             CONTINUE
+                  IF( NOUNIT )
+     $               TEMP = TEMP/A( 1, J )
+                  X( J ) = TEMP
+  140          CONTINUE
+            ELSE
+               KX = KX + ( N - 1 )*INCX
+               JX = KX
+               DO 160, J = N, 1, -1
+                  TEMP = X( JX )
+                  IX   = KX
+                  L    = 1       - J
+                  DO 150, I = MIN( N, J + K ), J + 1, -1
+                     TEMP = TEMP - A( L + I, J )*X( IX )
+                     IX   = IX   - INCX
+  150             CONTINUE
+                  IF( NOUNIT )
+     $               TEMP = TEMP/A( 1, J )
+                  X( JX ) = TEMP
+                  JX      = JX   - INCX
+                  IF( ( N - J ).GE.K )
+     $               KX = KX - INCX
+  160          CONTINUE
+            END IF
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of DTBSV .
+*
+      END
diff --git a/costa/native/external/blas/dtpmv.f b/costa/native/external/blas/dtpmv.f
new file mode 100644
index 000000000..ee11bc1b0
--- /dev/null
+++ b/costa/native/external/blas/dtpmv.f
@@ -0,0 +1,299 @@
+      SUBROUTINE DTPMV ( UPLO, TRANS, DIAG, N, AP, X, INCX )
+*     .. Scalar Arguments ..
+      INTEGER            INCX, N
+      CHARACTER*1        DIAG, TRANS, UPLO
+*     .. Array Arguments ..
+      DOUBLE PRECISION   AP( * ), X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DTPMV  performs one of the matrix-vector operations
+*
+*     x := A*x,   or   x := A'*x,
+*
+*  where x is an n element vector and  A is an n by n unit, or non-unit,
+*  upper or lower triangular matrix, supplied in packed form.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the matrix is an upper or
+*           lower triangular matrix as follows:
+*
+*              UPLO = 'U' or 'u'   A is an upper triangular matrix.
+*
+*              UPLO = 'L' or 'l'   A is a lower triangular matrix.
+*
+*           Unchanged on exit.
+*
+*  TRANS  - CHARACTER*1.
+*           On entry, TRANS specifies the operation to be performed as
+*           follows:
+*
+*              TRANS = 'N' or 'n'   x := A*x.
+*
+*              TRANS = 'T' or 't'   x := A'*x.
+*
+*              TRANS = 'C' or 'c'   x := A'*x.
+*
+*           Unchanged on exit.
+*
+*  DIAG   - CHARACTER*1.
+*           On entry, DIAG specifies whether or not A is unit
+*           triangular as follows:
+*
+*              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
+*
+*              DIAG = 'N' or 'n'   A is not assumed to be unit
+*                                  triangular.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  AP     - DOUBLE PRECISION array of DIMENSION at least
+*           ( ( n*( n + 1 ) )/2 ).
+*           Before entry with  UPLO = 'U' or 'u', the array AP must
+*           contain the upper triangular matrix packed sequentially,
+*           column by column, so that AP( 1 ) contains a( 1, 1 ),
+*           AP( 2 ) and AP( 3 ) contain a( 1, 2 ) and a( 2, 2 )
+*           respectively, and so on.
+*           Before entry with UPLO = 'L' or 'l', the array AP must
+*           contain the lower triangular matrix packed sequentially,
+*           column by column, so that AP( 1 ) contains a( 1, 1 ),
+*           AP( 2 ) and AP( 3 ) contain a( 2, 1 ) and a( 3, 1 )
+*           respectively, and so on.
+*           Note that when  DIAG = 'U' or 'u', the diagonal elements of
+*           A are not referenced, but are assumed to be unity.
+*           Unchanged on exit.
+*
+*  X      - DOUBLE PRECISION array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element vector x. On exit, X is overwritten with the
+*           tranformed vector x.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO
+      PARAMETER        ( ZERO = 0.0D+0 )
+*     .. Local Scalars ..
+      DOUBLE PRECISION   TEMP
+      INTEGER            I, INFO, IX, J, JX, K, KK, KX
+      LOGICAL            NOUNIT
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO , 'U' ).AND.
+     $         .NOT.LSAME( UPLO , 'L' )      )THEN
+         INFO = 1
+      ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND.
+     $         .NOT.LSAME( TRANS, 'T' ).AND.
+     $         .NOT.LSAME( TRANS, 'C' )      )THEN
+         INFO = 2
+      ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND.
+     $         .NOT.LSAME( DIAG , 'N' )      )THEN
+         INFO = 3
+      ELSE IF( N.LT.0 )THEN
+         INFO = 4
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 7
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'DTPMV ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      NOUNIT = LSAME( DIAG, 'N' )
+*
+*     Set up the start point in X if the increment is not unity. This
+*     will be  ( N - 1 )*INCX  too small for descending loops.
+*
+      IF( INCX.LE.0 )THEN
+         KX = 1 - ( N - 1 )*INCX
+      ELSE IF( INCX.NE.1 )THEN
+         KX = 1
+      END IF
+*
+*     Start the operations. In this version the elements of AP are
+*     accessed sequentially with one pass through AP.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+*
+*        Form  x:= A*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            KK =1
+            IF( INCX.EQ.1 )THEN
+               DO 20, J = 1, N
+                  IF( X( J ).NE.ZERO )THEN
+                     TEMP = X( J )
+                     K    = KK
+                     DO 10, I = 1, J - 1
+                        X( I ) = X( I ) + TEMP*AP( K )
+                        K      = K      + 1
+   10                CONTINUE
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )*AP( KK + J - 1 )
+                  END IF
+                  KK = KK + J
+   20          CONTINUE
+            ELSE
+               JX = KX
+               DO 40, J = 1, N
+                  IF( X( JX ).NE.ZERO )THEN
+                     TEMP = X( JX )
+                     IX   = KX
+                     DO 30, K = KK, KK + J - 2
+                        X( IX ) = X( IX ) + TEMP*AP( K )
+                        IX      = IX      + INCX
+   30                CONTINUE
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )*AP( KK + J - 1 )
+                  END IF
+                  JX = JX + INCX
+                  KK = KK + J
+   40          CONTINUE
+            END IF
+         ELSE
+            KK = ( N*( N + 1 ) )/2
+            IF( INCX.EQ.1 )THEN
+               DO 60, J = N, 1, -1
+                  IF( X( J ).NE.ZERO )THEN
+                     TEMP = X( J )
+                     K    = KK
+                     DO 50, I = N, J + 1, -1
+                        X( I ) = X( I ) + TEMP*AP( K )
+                        K      = K      - 1
+   50                CONTINUE
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )*AP( KK - N + J )
+                  END IF
+                  KK = KK - ( N - J + 1 )
+   60          CONTINUE
+            ELSE
+               KX = KX + ( N - 1 )*INCX
+               JX = KX
+               DO 80, J = N, 1, -1
+                  IF( X( JX ).NE.ZERO )THEN
+                     TEMP = X( JX )
+                     IX   = KX
+                     DO 70, K = KK, KK - ( N - ( J + 1 ) ), -1
+                        X( IX ) = X( IX ) + TEMP*AP( K )
+                        IX      = IX      - INCX
+   70                CONTINUE
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )*AP( KK - N + J )
+                  END IF
+                  JX = JX - INCX
+                  KK = KK - ( N - J + 1 )
+   80          CONTINUE
+            END IF
+         END IF
+      ELSE
+*
+*        Form  x := A'*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            KK = ( N*( N + 1 ) )/2
+            IF( INCX.EQ.1 )THEN
+               DO 100, J = N, 1, -1
+                  TEMP = X( J )
+                  IF( NOUNIT )
+     $               TEMP = TEMP*AP( KK )
+                  K = KK - 1
+                  DO 90, I = J - 1, 1, -1
+                     TEMP = TEMP + AP( K )*X( I )
+                     K    = K    - 1
+   90             CONTINUE
+                  X( J ) = TEMP
+                  KK     = KK   - J
+  100          CONTINUE
+            ELSE
+               JX = KX + ( N - 1 )*INCX
+               DO 120, J = N, 1, -1
+                  TEMP = X( JX )
+                  IX   = JX
+                  IF( NOUNIT )
+     $               TEMP = TEMP*AP( KK )
+                  DO 110, K = KK - 1, KK - J + 1, -1
+                     IX   = IX   - INCX
+                     TEMP = TEMP + AP( K )*X( IX )
+  110             CONTINUE
+                  X( JX ) = TEMP
+                  JX      = JX   - INCX
+                  KK      = KK   - J
+  120          CONTINUE
+            END IF
+         ELSE
+            KK = 1
+            IF( INCX.EQ.1 )THEN
+               DO 140, J = 1, N
+                  TEMP = X( J )
+                  IF( NOUNIT )
+     $               TEMP = TEMP*AP( KK )
+                  K = KK + 1
+                  DO 130, I = J + 1, N
+                     TEMP = TEMP + AP( K )*X( I )
+                     K    = K    + 1
+  130             CONTINUE
+                  X( J ) = TEMP
+                  KK     = KK   + ( N - J + 1 )
+  140          CONTINUE
+            ELSE
+               JX = KX
+               DO 160, J = 1, N
+                  TEMP = X( JX )
+                  IX   = JX
+                  IF( NOUNIT )
+     $               TEMP = TEMP*AP( KK )
+                  DO 150, K = KK + 1, KK + N - J
+                     IX   = IX   + INCX
+                     TEMP = TEMP + AP( K )*X( IX )
+  150             CONTINUE
+                  X( JX ) = TEMP
+                  JX      = JX   + INCX
+                  KK      = KK   + ( N - J + 1 )
+  160          CONTINUE
+            END IF
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of DTPMV .
+*
+      END
diff --git a/costa/native/external/blas/dtpsv.f b/costa/native/external/blas/dtpsv.f
new file mode 100644
index 000000000..91930d9fb
--- /dev/null
+++ b/costa/native/external/blas/dtpsv.f
@@ -0,0 +1,302 @@
+      SUBROUTINE DTPSV ( UPLO, TRANS, DIAG, N, AP, X, INCX )
+*     .. Scalar Arguments ..
+      INTEGER            INCX, N
+      CHARACTER*1        DIAG, TRANS, UPLO
+*     .. Array Arguments ..
+      DOUBLE PRECISION   AP( * ), X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DTPSV  solves one of the systems of equations
+*
+*     A*x = b,   or   A'*x = b,
+*
+*  where b and x are n element vectors and A is an n by n unit, or
+*  non-unit, upper or lower triangular matrix, supplied in packed form.
+*
+*  No test for singularity or near-singularity is included in this
+*  routine. Such tests must be performed before calling this routine.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the matrix is an upper or
+*           lower triangular matrix as follows:
+*
+*              UPLO = 'U' or 'u'   A is an upper triangular matrix.
+*
+*              UPLO = 'L' or 'l'   A is a lower triangular matrix.
+*
+*           Unchanged on exit.
+*
+*  TRANS  - CHARACTER*1.
+*           On entry, TRANS specifies the equations to be solved as
+*           follows:
+*
+*              TRANS = 'N' or 'n'   A*x = b.
+*
+*              TRANS = 'T' or 't'   A'*x = b.
+*
+*              TRANS = 'C' or 'c'   A'*x = b.
+*
+*           Unchanged on exit.
+*
+*  DIAG   - CHARACTER*1.
+*           On entry, DIAG specifies whether or not A is unit
+*           triangular as follows:
+*
+*              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
+*
+*              DIAG = 'N' or 'n'   A is not assumed to be unit
+*                                  triangular.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  AP     - DOUBLE PRECISION array of DIMENSION at least
+*           ( ( n*( n + 1 ) )/2 ).
+*           Before entry with  UPLO = 'U' or 'u', the array AP must
+*           contain the upper triangular matrix packed sequentially,
+*           column by column, so that AP( 1 ) contains a( 1, 1 ),
+*           AP( 2 ) and AP( 3 ) contain a( 1, 2 ) and a( 2, 2 )
+*           respectively, and so on.
+*           Before entry with UPLO = 'L' or 'l', the array AP must
+*           contain the lower triangular matrix packed sequentially,
+*           column by column, so that AP( 1 ) contains a( 1, 1 ),
+*           AP( 2 ) and AP( 3 ) contain a( 2, 1 ) and a( 3, 1 )
+*           respectively, and so on.
+*           Note that when  DIAG = 'U' or 'u', the diagonal elements of
+*           A are not referenced, but are assumed to be unity.
+*           Unchanged on exit.
+*
+*  X      - DOUBLE PRECISION array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element right-hand side vector b. On exit, X is overwritten
+*           with the solution vector x.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO
+      PARAMETER        ( ZERO = 0.0D+0 )
+*     .. Local Scalars ..
+      DOUBLE PRECISION   TEMP
+      INTEGER            I, INFO, IX, J, JX, K, KK, KX
+      LOGICAL            NOUNIT
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO , 'U' ).AND.
+     $         .NOT.LSAME( UPLO , 'L' )      )THEN
+         INFO = 1
+      ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND.
+     $         .NOT.LSAME( TRANS, 'T' ).AND.
+     $         .NOT.LSAME( TRANS, 'C' )      )THEN
+         INFO = 2
+      ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND.
+     $         .NOT.LSAME( DIAG , 'N' )      )THEN
+         INFO = 3
+      ELSE IF( N.LT.0 )THEN
+         INFO = 4
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 7
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'DTPSV ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      NOUNIT = LSAME( DIAG, 'N' )
+*
+*     Set up the start point in X if the increment is not unity. This
+*     will be  ( N - 1 )*INCX  too small for descending loops.
+*
+      IF( INCX.LE.0 )THEN
+         KX = 1 - ( N - 1 )*INCX
+      ELSE IF( INCX.NE.1 )THEN
+         KX = 1
+      END IF
+*
+*     Start the operations. In this version the elements of AP are
+*     accessed sequentially with one pass through AP.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+*
+*        Form  x := inv( A )*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            KK = ( N*( N + 1 ) )/2
+            IF( INCX.EQ.1 )THEN
+               DO 20, J = N, 1, -1
+                  IF( X( J ).NE.ZERO )THEN
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )/AP( KK )
+                     TEMP = X( J )
+                     K    = KK     - 1
+                     DO 10, I = J - 1, 1, -1
+                        X( I ) = X( I ) - TEMP*AP( K )
+                        K      = K      - 1
+   10                CONTINUE
+                  END IF
+                  KK = KK - J
+   20          CONTINUE
+            ELSE
+               JX = KX + ( N - 1 )*INCX
+               DO 40, J = N, 1, -1
+                  IF( X( JX ).NE.ZERO )THEN
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )/AP( KK )
+                     TEMP = X( JX )
+                     IX   = JX
+                     DO 30, K = KK - 1, KK - J + 1, -1
+                        IX      = IX      - INCX
+                        X( IX ) = X( IX ) - TEMP*AP( K )
+   30                CONTINUE
+                  END IF
+                  JX = JX - INCX
+                  KK = KK - J
+   40          CONTINUE
+            END IF
+         ELSE
+            KK = 1
+            IF( INCX.EQ.1 )THEN
+               DO 60, J = 1, N
+                  IF( X( J ).NE.ZERO )THEN
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )/AP( KK )
+                     TEMP = X( J )
+                     K    = KK     + 1
+                     DO 50, I = J + 1, N
+                        X( I ) = X( I ) - TEMP*AP( K )
+                        K      = K      + 1
+   50                CONTINUE
+                  END IF
+                  KK = KK + ( N - J + 1 )
+   60          CONTINUE
+            ELSE
+               JX = KX
+               DO 80, J = 1, N
+                  IF( X( JX ).NE.ZERO )THEN
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )/AP( KK )
+                     TEMP = X( JX )
+                     IX   = JX
+                     DO 70, K = KK + 1, KK + N - J
+                        IX      = IX      + INCX
+                        X( IX ) = X( IX ) - TEMP*AP( K )
+   70                CONTINUE
+                  END IF
+                  JX = JX + INCX
+                  KK = KK + ( N - J + 1 )
+   80          CONTINUE
+            END IF
+         END IF
+      ELSE
+*
+*        Form  x := inv( A' )*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            KK = 1
+            IF( INCX.EQ.1 )THEN
+               DO 100, J = 1, N
+                  TEMP = X( J )
+                  K    = KK
+                  DO 90, I = 1, J - 1
+                     TEMP = TEMP - AP( K )*X( I )
+                     K    = K    + 1
+   90             CONTINUE
+                  IF( NOUNIT )
+     $               TEMP = TEMP/AP( KK + J - 1 )
+                  X( J ) = TEMP
+                  KK     = KK   + J
+  100          CONTINUE
+            ELSE
+               JX = KX
+               DO 120, J = 1, N
+                  TEMP = X( JX )
+                  IX   = KX
+                  DO 110, K = KK, KK + J - 2
+                     TEMP = TEMP - AP( K )*X( IX )
+                     IX   = IX   + INCX
+  110             CONTINUE
+                  IF( NOUNIT )
+     $               TEMP = TEMP/AP( KK + J - 1 )
+                  X( JX ) = TEMP
+                  JX      = JX   + INCX
+                  KK      = KK   + J
+  120          CONTINUE
+            END IF
+         ELSE
+            KK = ( N*( N + 1 ) )/2
+            IF( INCX.EQ.1 )THEN
+               DO 140, J = N, 1, -1
+                  TEMP = X( J )
+                  K = KK
+                  DO 130, I = N, J + 1, -1
+                     TEMP = TEMP - AP( K )*X( I )
+                     K    = K    - 1
+  130             CONTINUE
+                  IF( NOUNIT )
+     $               TEMP = TEMP/AP( KK - N + J )
+                  X( J ) = TEMP
+                  KK     = KK   - ( N - J + 1 )
+  140          CONTINUE
+            ELSE
+               KX = KX + ( N - 1 )*INCX
+               JX = KX
+               DO 160, J = N, 1, -1
+                  TEMP = X( JX )
+                  IX   = KX
+                  DO 150, K = KK, KK - ( N - ( J + 1 ) ), -1
+                     TEMP = TEMP - AP( K )*X( IX )
+                     IX   = IX   - INCX
+  150             CONTINUE
+                  IF( NOUNIT )
+     $               TEMP = TEMP/AP( KK - N + J )
+                  X( JX ) = TEMP
+                  JX      = JX   - INCX
+                  KK      = KK   - (N - J + 1 )
+  160          CONTINUE
+            END IF
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of DTPSV .
+*
+      END
diff --git a/costa/native/external/blas/dtrmm.f b/costa/native/external/blas/dtrmm.f
new file mode 100644
index 000000000..40c7740c9
--- /dev/null
+++ b/costa/native/external/blas/dtrmm.f
@@ -0,0 +1,355 @@
+      SUBROUTINE DTRMM ( SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, A, LDA,
+     $                   B, LDB )
+*     .. Scalar Arguments ..
+      CHARACTER*1        SIDE, UPLO, TRANSA, DIAG
+      INTEGER            M, N, LDA, LDB
+      DOUBLE PRECISION   ALPHA
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DTRMM  performs one of the matrix-matrix operations
+*
+*     B := alpha*op( A )*B,   or   B := alpha*B*op( A ),
+*
+*  where  alpha  is a scalar,  B  is an m by n matrix,  A  is a unit, or
+*  non-unit,  upper or lower triangular matrix  and  op( A )  is one  of
+*
+*     op( A ) = A   or   op( A ) = A'.
+*
+*  Parameters
+*  ==========
+*
+*  SIDE   - CHARACTER*1.
+*           On entry,  SIDE specifies whether  op( A ) multiplies B from
+*           the left or right as follows:
+*
+*              SIDE = 'L' or 'l'   B := alpha*op( A )*B.
+*
+*              SIDE = 'R' or 'r'   B := alpha*B*op( A ).
+*
+*           Unchanged on exit.
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the matrix A is an upper or
+*           lower triangular matrix as follows:
+*
+*              UPLO = 'U' or 'u'   A is an upper triangular matrix.
+*
+*              UPLO = 'L' or 'l'   A is a lower triangular matrix.
+*
+*           Unchanged on exit.
+*
+*  TRANSA - CHARACTER*1.
+*           On entry, TRANSA specifies the form of op( A ) to be used in
+*           the matrix multiplication as follows:
+*
+*              TRANSA = 'N' or 'n'   op( A ) = A.
+*
+*              TRANSA = 'T' or 't'   op( A ) = A'.
+*
+*              TRANSA = 'C' or 'c'   op( A ) = A'.
+*
+*           Unchanged on exit.
+*
+*  DIAG   - CHARACTER*1.
+*           On entry, DIAG specifies whether or not A is unit triangular
+*           as follows:
+*
+*              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
+*
+*              DIAG = 'N' or 'n'   A is not assumed to be unit
+*                                  triangular.
+*
+*           Unchanged on exit.
+*
+*  M      - INTEGER.
+*           On entry, M specifies the number of rows of B. M must be at
+*           least zero.
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the number of columns of B.  N must be
+*           at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - DOUBLE PRECISION.
+*           On entry,  ALPHA specifies the scalar  alpha. When  alpha is
+*           zero then  A is not referenced and  B need not be set before
+*           entry.
+*           Unchanged on exit.
+*
+*  A      - DOUBLE PRECISION array of DIMENSION ( LDA, k ), where k is m
+*           when  SIDE = 'L' or 'l'  and is  n  when  SIDE = 'R' or 'r'.
+*           Before entry  with  UPLO = 'U' or 'u',  the  leading  k by k
+*           upper triangular part of the array  A must contain the upper
+*           triangular matrix  and the strictly lower triangular part of
+*           A is not referenced.
+*           Before entry  with  UPLO = 'L' or 'l',  the  leading  k by k
+*           lower triangular part of the array  A must contain the lower
+*           triangular matrix  and the strictly upper triangular part of
+*           A is not referenced.
+*           Note that when  DIAG = 'U' or 'u',  the diagonal elements of
+*           A  are not referenced either,  but are assumed to be  unity.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program.  When  SIDE = 'L' or 'l'  then
+*           LDA  must be at least  max( 1, m ),  when  SIDE = 'R' or 'r'
+*           then LDA must be at least max( 1, n ).
+*           Unchanged on exit.
+*
+*  B      - DOUBLE PRECISION array of DIMENSION ( LDB, n ).
+*           Before entry,  the leading  m by n part of the array  B must
+*           contain the matrix  B,  and  on exit  is overwritten  by the
+*           transformed matrix.
+*
+*  LDB    - INTEGER.
+*           On entry, LDB specifies the first dimension of B as declared
+*           in  the  calling  (sub)  program.   LDB  must  be  at  least
+*           max( 1, m ).
+*           Unchanged on exit.
+*
+*
+*  Level 3 Blas routine.
+*
+*  -- Written on 8-February-1989.
+*     Jack Dongarra, Argonne National Laboratory.
+*     Iain Duff, AERE Harwell.
+*     Jeremy Du Croz, Numerical Algorithms Group Ltd.
+*     Sven Hammarling, Numerical Algorithms Group Ltd.
+*
+*
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     .. Local Scalars ..
+      LOGICAL            LSIDE, NOUNIT, UPPER
+      INTEGER            I, INFO, J, K, NROWA
+      DOUBLE PRECISION   TEMP
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE         , ZERO
+      PARAMETER        ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      LSIDE  = LSAME( SIDE  , 'L' )
+      IF( LSIDE )THEN
+         NROWA = M
+      ELSE
+         NROWA = N
+      END IF
+      NOUNIT = LSAME( DIAG  , 'N' )
+      UPPER  = LSAME( UPLO  , 'U' )
+*
+      INFO   = 0
+      IF(      ( .NOT.LSIDE                ).AND.
+     $         ( .NOT.LSAME( SIDE  , 'R' ) )      )THEN
+         INFO = 1
+      ELSE IF( ( .NOT.UPPER                ).AND.
+     $         ( .NOT.LSAME( UPLO  , 'L' ) )      )THEN
+         INFO = 2
+      ELSE IF( ( .NOT.LSAME( TRANSA, 'N' ) ).AND.
+     $         ( .NOT.LSAME( TRANSA, 'T' ) ).AND.
+     $         ( .NOT.LSAME( TRANSA, 'C' ) )      )THEN
+         INFO = 3
+      ELSE IF( ( .NOT.LSAME( DIAG  , 'U' ) ).AND.
+     $         ( .NOT.LSAME( DIAG  , 'N' ) )      )THEN
+         INFO = 4
+      ELSE IF( M  .LT.0               )THEN
+         INFO = 5
+      ELSE IF( N  .LT.0               )THEN
+         INFO = 6
+      ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN
+         INFO = 9
+      ELSE IF( LDB.LT.MAX( 1, M     ) )THEN
+         INFO = 11
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'DTRMM ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     And when  alpha.eq.zero.
+*
+      IF( ALPHA.EQ.ZERO )THEN
+         DO 20, J = 1, N
+            DO 10, I = 1, M
+               B( I, J ) = ZERO
+   10       CONTINUE
+   20    CONTINUE
+         RETURN
+      END IF
+*
+*     Start the operations.
+*
+      IF( LSIDE )THEN
+         IF( LSAME( TRANSA, 'N' ) )THEN
+*
+*           Form  B := alpha*A*B.
+*
+            IF( UPPER )THEN
+               DO 50, J = 1, N
+                  DO 40, K = 1, M
+                     IF( B( K, J ).NE.ZERO )THEN
+                        TEMP = ALPHA*B( K, J )
+                        DO 30, I = 1, K - 1
+                           B( I, J ) = B( I, J ) + TEMP*A( I, K )
+   30                   CONTINUE
+                        IF( NOUNIT )
+     $                     TEMP = TEMP*A( K, K )
+                        B( K, J ) = TEMP
+                     END IF
+   40             CONTINUE
+   50          CONTINUE
+            ELSE
+               DO 80, J = 1, N
+                  DO 70 K = M, 1, -1
+                     IF( B( K, J ).NE.ZERO )THEN
+                        TEMP      = ALPHA*B( K, J )
+                        B( K, J ) = TEMP
+                        IF( NOUNIT )
+     $                     B( K, J ) = B( K, J )*A( K, K )
+                        DO 60, I = K + 1, M
+                           B( I, J ) = B( I, J ) + TEMP*A( I, K )
+   60                   CONTINUE
+                     END IF
+   70             CONTINUE
+   80          CONTINUE
+            END IF
+         ELSE
+*
+*           Form  B := alpha*A'*B.
+*
+            IF( UPPER )THEN
+               DO 110, J = 1, N
+                  DO 100, I = M, 1, -1
+                     TEMP = B( I, J )
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*A( I, I )
+                     DO 90, K = 1, I - 1
+                        TEMP = TEMP + A( K, I )*B( K, J )
+   90                CONTINUE
+                     B( I, J ) = ALPHA*TEMP
+  100             CONTINUE
+  110          CONTINUE
+            ELSE
+               DO 140, J = 1, N
+                  DO 130, I = 1, M
+                     TEMP = B( I, J )
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*A( I, I )
+                     DO 120, K = I + 1, M
+                        TEMP = TEMP + A( K, I )*B( K, J )
+  120                CONTINUE
+                     B( I, J ) = ALPHA*TEMP
+  130             CONTINUE
+  140          CONTINUE
+            END IF
+         END IF
+      ELSE
+         IF( LSAME( TRANSA, 'N' ) )THEN
+*
+*           Form  B := alpha*B*A.
+*
+            IF( UPPER )THEN
+               DO 180, J = N, 1, -1
+                  TEMP = ALPHA
+                  IF( NOUNIT )
+     $               TEMP = TEMP*A( J, J )
+                  DO 150, I = 1, M
+                     B( I, J ) = TEMP*B( I, J )
+  150             CONTINUE
+                  DO 170, K = 1, J - 1
+                     IF( A( K, J ).NE.ZERO )THEN
+                        TEMP = ALPHA*A( K, J )
+                        DO 160, I = 1, M
+                           B( I, J ) = B( I, J ) + TEMP*B( I, K )
+  160                   CONTINUE
+                     END IF
+  170             CONTINUE
+  180          CONTINUE
+            ELSE
+               DO 220, J = 1, N
+                  TEMP = ALPHA
+                  IF( NOUNIT )
+     $               TEMP = TEMP*A( J, J )
+                  DO 190, I = 1, M
+                     B( I, J ) = TEMP*B( I, J )
+  190             CONTINUE
+                  DO 210, K = J + 1, N
+                     IF( A( K, J ).NE.ZERO )THEN
+                        TEMP = ALPHA*A( K, J )
+                        DO 200, I = 1, M
+                           B( I, J ) = B( I, J ) + TEMP*B( I, K )
+  200                   CONTINUE
+                     END IF
+  210             CONTINUE
+  220          CONTINUE
+            END IF
+         ELSE
+*
+*           Form  B := alpha*B*A'.
+*
+            IF( UPPER )THEN
+               DO 260, K = 1, N
+                  DO 240, J = 1, K - 1
+                     IF( A( J, K ).NE.ZERO )THEN
+                        TEMP = ALPHA*A( J, K )
+                        DO 230, I = 1, M
+                           B( I, J ) = B( I, J ) + TEMP*B( I, K )
+  230                   CONTINUE
+                     END IF
+  240             CONTINUE
+                  TEMP = ALPHA
+                  IF( NOUNIT )
+     $               TEMP = TEMP*A( K, K )
+                  IF( TEMP.NE.ONE )THEN
+                     DO 250, I = 1, M
+                        B( I, K ) = TEMP*B( I, K )
+  250                CONTINUE
+                  END IF
+  260          CONTINUE
+            ELSE
+               DO 300, K = N, 1, -1
+                  DO 280, J = K + 1, N
+                     IF( A( J, K ).NE.ZERO )THEN
+                        TEMP = ALPHA*A( J, K )
+                        DO 270, I = 1, M
+                           B( I, J ) = B( I, J ) + TEMP*B( I, K )
+  270                   CONTINUE
+                     END IF
+  280             CONTINUE
+                  TEMP = ALPHA
+                  IF( NOUNIT )
+     $               TEMP = TEMP*A( K, K )
+                  IF( TEMP.NE.ONE )THEN
+                     DO 290, I = 1, M
+                        B( I, K ) = TEMP*B( I, K )
+  290                CONTINUE
+                  END IF
+  300          CONTINUE
+            END IF
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of DTRMM .
+*
+      END
diff --git a/costa/native/external/blas/dtrmv.f b/costa/native/external/blas/dtrmv.f
new file mode 100644
index 000000000..3d5c61b20
--- /dev/null
+++ b/costa/native/external/blas/dtrmv.f
@@ -0,0 +1,286 @@
+      SUBROUTINE DTRMV ( UPLO, TRANS, DIAG, N, A, LDA, X, INCX )
+*     .. Scalar Arguments ..
+      INTEGER            INCX, LDA, N
+      CHARACTER*1        DIAG, TRANS, UPLO
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DTRMV  performs one of the matrix-vector operations
+*
+*     x := A*x,   or   x := A'*x,
+*
+*  where x is an n element vector and  A is an n by n unit, or non-unit,
+*  upper or lower triangular matrix.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the matrix is an upper or
+*           lower triangular matrix as follows:
+*
+*              UPLO = 'U' or 'u'   A is an upper triangular matrix.
+*
+*              UPLO = 'L' or 'l'   A is a lower triangular matrix.
+*
+*           Unchanged on exit.
+*
+*  TRANS  - CHARACTER*1.
+*           On entry, TRANS specifies the operation to be performed as
+*           follows:
+*
+*              TRANS = 'N' or 'n'   x := A*x.
+*
+*              TRANS = 'T' or 't'   x := A'*x.
+*
+*              TRANS = 'C' or 'c'   x := A'*x.
+*
+*           Unchanged on exit.
+*
+*  DIAG   - CHARACTER*1.
+*           On entry, DIAG specifies whether or not A is unit
+*           triangular as follows:
+*
+*              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
+*
+*              DIAG = 'N' or 'n'   A is not assumed to be unit
+*                                  triangular.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  A      - DOUBLE PRECISION array of DIMENSION ( LDA, n ).
+*           Before entry with  UPLO = 'U' or 'u', the leading n by n
+*           upper triangular part of the array A must contain the upper
+*           triangular matrix and the strictly lower triangular part of
+*           A is not referenced.
+*           Before entry with UPLO = 'L' or 'l', the leading n by n
+*           lower triangular part of the array A must contain the lower
+*           triangular matrix and the strictly upper triangular part of
+*           A is not referenced.
+*           Note that when  DIAG = 'U' or 'u', the diagonal elements of
+*           A are not referenced either, but are assumed to be unity.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           max( 1, n ).
+*           Unchanged on exit.
+*
+*  X      - DOUBLE PRECISION array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element vector x. On exit, X is overwritten with the
+*           tranformed vector x.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO
+      PARAMETER        ( ZERO = 0.0D+0 )
+*     .. Local Scalars ..
+      DOUBLE PRECISION   TEMP
+      INTEGER            I, INFO, IX, J, JX, KX
+      LOGICAL            NOUNIT
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO , 'U' ).AND.
+     $         .NOT.LSAME( UPLO , 'L' )      )THEN
+         INFO = 1
+      ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND.
+     $         .NOT.LSAME( TRANS, 'T' ).AND.
+     $         .NOT.LSAME( TRANS, 'C' )      )THEN
+         INFO = 2
+      ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND.
+     $         .NOT.LSAME( DIAG , 'N' )      )THEN
+         INFO = 3
+      ELSE IF( N.LT.0 )THEN
+         INFO = 4
+      ELSE IF( LDA.LT.MAX( 1, N ) )THEN
+         INFO = 6
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 8
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'DTRMV ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      NOUNIT = LSAME( DIAG, 'N' )
+*
+*     Set up the start point in X if the increment is not unity. This
+*     will be  ( N - 1 )*INCX  too small for descending loops.
+*
+      IF( INCX.LE.0 )THEN
+         KX = 1 - ( N - 1 )*INCX
+      ELSE IF( INCX.NE.1 )THEN
+         KX = 1
+      END IF
+*
+*     Start the operations. In this version the elements of A are
+*     accessed sequentially with one pass through A.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+*
+*        Form  x := A*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            IF( INCX.EQ.1 )THEN
+               DO 20, J = 1, N
+                  IF( X( J ).NE.ZERO )THEN
+                     TEMP = X( J )
+                     DO 10, I = 1, J - 1
+                        X( I ) = X( I ) + TEMP*A( I, J )
+   10                CONTINUE
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )*A( J, J )
+                  END IF
+   20          CONTINUE
+            ELSE
+               JX = KX
+               DO 40, J = 1, N
+                  IF( X( JX ).NE.ZERO )THEN
+                     TEMP = X( JX )
+                     IX   = KX
+                     DO 30, I = 1, J - 1
+                        X( IX ) = X( IX ) + TEMP*A( I, J )
+                        IX      = IX      + INCX
+   30                CONTINUE
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )*A( J, J )
+                  END IF
+                  JX = JX + INCX
+   40          CONTINUE
+            END IF
+         ELSE
+            IF( INCX.EQ.1 )THEN
+               DO 60, J = N, 1, -1
+                  IF( X( J ).NE.ZERO )THEN
+                     TEMP = X( J )
+                     DO 50, I = N, J + 1, -1
+                        X( I ) = X( I ) + TEMP*A( I, J )
+   50                CONTINUE
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )*A( J, J )
+                  END IF
+   60          CONTINUE
+            ELSE
+               KX = KX + ( N - 1 )*INCX
+               JX = KX
+               DO 80, J = N, 1, -1
+                  IF( X( JX ).NE.ZERO )THEN
+                     TEMP = X( JX )
+                     IX   = KX
+                     DO 70, I = N, J + 1, -1
+                        X( IX ) = X( IX ) + TEMP*A( I, J )
+                        IX      = IX      - INCX
+   70                CONTINUE
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )*A( J, J )
+                  END IF
+                  JX = JX - INCX
+   80          CONTINUE
+            END IF
+         END IF
+      ELSE
+*
+*        Form  x := A'*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            IF( INCX.EQ.1 )THEN
+               DO 100, J = N, 1, -1
+                  TEMP = X( J )
+                  IF( NOUNIT )
+     $               TEMP = TEMP*A( J, J )
+                  DO 90, I = J - 1, 1, -1
+                     TEMP = TEMP + A( I, J )*X( I )
+   90             CONTINUE
+                  X( J ) = TEMP
+  100          CONTINUE
+            ELSE
+               JX = KX + ( N - 1 )*INCX
+               DO 120, J = N, 1, -1
+                  TEMP = X( JX )
+                  IX   = JX
+                  IF( NOUNIT )
+     $               TEMP = TEMP*A( J, J )
+                  DO 110, I = J - 1, 1, -1
+                     IX   = IX   - INCX
+                     TEMP = TEMP + A( I, J )*X( IX )
+  110             CONTINUE
+                  X( JX ) = TEMP
+                  JX      = JX   - INCX
+  120          CONTINUE
+            END IF
+         ELSE
+            IF( INCX.EQ.1 )THEN
+               DO 140, J = 1, N
+                  TEMP = X( J )
+                  IF( NOUNIT )
+     $               TEMP = TEMP*A( J, J )
+                  DO 130, I = J + 1, N
+                     TEMP = TEMP + A( I, J )*X( I )
+  130             CONTINUE
+                  X( J ) = TEMP
+  140          CONTINUE
+            ELSE
+               JX = KX
+               DO 160, J = 1, N
+                  TEMP = X( JX )
+                  IX   = JX
+                  IF( NOUNIT )
+     $               TEMP = TEMP*A( J, J )
+                  DO 150, I = J + 1, N
+                     IX   = IX   + INCX
+                     TEMP = TEMP + A( I, J )*X( IX )
+  150             CONTINUE
+                  X( JX ) = TEMP
+                  JX      = JX   + INCX
+  160          CONTINUE
+            END IF
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of DTRMV .
+*
+      END
diff --git a/costa/native/external/blas/dtrsm.f b/costa/native/external/blas/dtrsm.f
new file mode 100644
index 000000000..e8425142b
--- /dev/null
+++ b/costa/native/external/blas/dtrsm.f
@@ -0,0 +1,378 @@
+      SUBROUTINE DTRSM ( SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, A, LDA,
+     $                   B, LDB )
+*     .. Scalar Arguments ..
+      CHARACTER*1        SIDE, UPLO, TRANSA, DIAG
+      INTEGER            M, N, LDA, LDB
+      DOUBLE PRECISION   ALPHA
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DTRSM  solves one of the matrix equations
+*
+*     op( A )*X = alpha*B,   or   X*op( A ) = alpha*B,
+*
+*  where alpha is a scalar, X and B are m by n matrices, A is a unit, or
+*  non-unit,  upper or lower triangular matrix  and  op( A )  is one  of
+*
+*     op( A ) = A   or   op( A ) = A'.
+*
+*  The matrix X is overwritten on B.
+*
+*  Parameters
+*  ==========
+*
+*  SIDE   - CHARACTER*1.
+*           On entry, SIDE specifies whether op( A ) appears on the left
+*           or right of X as follows:
+*
+*              SIDE = 'L' or 'l'   op( A )*X = alpha*B.
+*
+*              SIDE = 'R' or 'r'   X*op( A ) = alpha*B.
+*
+*           Unchanged on exit.
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the matrix A is an upper or
+*           lower triangular matrix as follows:
+*
+*              UPLO = 'U' or 'u'   A is an upper triangular matrix.
+*
+*              UPLO = 'L' or 'l'   A is a lower triangular matrix.
+*
+*           Unchanged on exit.
+*
+*  TRANSA - CHARACTER*1.
+*           On entry, TRANSA specifies the form of op( A ) to be used in
+*           the matrix multiplication as follows:
+*
+*              TRANSA = 'N' or 'n'   op( A ) = A.
+*
+*              TRANSA = 'T' or 't'   op( A ) = A'.
+*
+*              TRANSA = 'C' or 'c'   op( A ) = A'.
+*
+*           Unchanged on exit.
+*
+*  DIAG   - CHARACTER*1.
+*           On entry, DIAG specifies whether or not A is unit triangular
+*           as follows:
+*
+*              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
+*
+*              DIAG = 'N' or 'n'   A is not assumed to be unit
+*                                  triangular.
+*
+*           Unchanged on exit.
+*
+*  M      - INTEGER.
+*           On entry, M specifies the number of rows of B. M must be at
+*           least zero.
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the number of columns of B.  N must be
+*           at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - DOUBLE PRECISION.
+*           On entry,  ALPHA specifies the scalar  alpha. When  alpha is
+*           zero then  A is not referenced and  B need not be set before
+*           entry.
+*           Unchanged on exit.
+*
+*  A      - DOUBLE PRECISION array of DIMENSION ( LDA, k ), where k is m
+*           when  SIDE = 'L' or 'l'  and is  n  when  SIDE = 'R' or 'r'.
+*           Before entry  with  UPLO = 'U' or 'u',  the  leading  k by k
+*           upper triangular part of the array  A must contain the upper
+*           triangular matrix  and the strictly lower triangular part of
+*           A is not referenced.
+*           Before entry  with  UPLO = 'L' or 'l',  the  leading  k by k
+*           lower triangular part of the array  A must contain the lower
+*           triangular matrix  and the strictly upper triangular part of
+*           A is not referenced.
+*           Note that when  DIAG = 'U' or 'u',  the diagonal elements of
+*           A  are not referenced either,  but are assumed to be  unity.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program.  When  SIDE = 'L' or 'l'  then
+*           LDA  must be at least  max( 1, m ),  when  SIDE = 'R' or 'r'
+*           then LDA must be at least max( 1, n ).
+*           Unchanged on exit.
+*
+*  B      - DOUBLE PRECISION array of DIMENSION ( LDB, n ).
+*           Before entry,  the leading  m by n part of the array  B must
+*           contain  the  right-hand  side  matrix  B,  and  on exit  is
+*           overwritten by the solution matrix  X.
+*
+*  LDB    - INTEGER.
+*           On entry, LDB specifies the first dimension of B as declared
+*           in  the  calling  (sub)  program.   LDB  must  be  at  least
+*           max( 1, m ).
+*           Unchanged on exit.
+*
+*
+*  Level 3 Blas routine.
+*
+*
+*  -- Written on 8-February-1989.
+*     Jack Dongarra, Argonne National Laboratory.
+*     Iain Duff, AERE Harwell.
+*     Jeremy Du Croz, Numerical Algorithms Group Ltd.
+*     Sven Hammarling, Numerical Algorithms Group Ltd.
+*
+*
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     .. Local Scalars ..
+      LOGICAL            LSIDE, NOUNIT, UPPER
+      INTEGER            I, INFO, J, K, NROWA
+      DOUBLE PRECISION   TEMP
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE         , ZERO
+      PARAMETER        ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      LSIDE  = LSAME( SIDE  , 'L' )
+      IF( LSIDE )THEN
+         NROWA = M
+      ELSE
+         NROWA = N
+      END IF
+      NOUNIT = LSAME( DIAG  , 'N' )
+      UPPER  = LSAME( UPLO  , 'U' )
+*
+      INFO   = 0
+      IF(      ( .NOT.LSIDE                ).AND.
+     $         ( .NOT.LSAME( SIDE  , 'R' ) )      )THEN
+         INFO = 1
+      ELSE IF( ( .NOT.UPPER                ).AND.
+     $         ( .NOT.LSAME( UPLO  , 'L' ) )      )THEN
+         INFO = 2
+      ELSE IF( ( .NOT.LSAME( TRANSA, 'N' ) ).AND.
+     $         ( .NOT.LSAME( TRANSA, 'T' ) ).AND.
+     $         ( .NOT.LSAME( TRANSA, 'C' ) )      )THEN
+         INFO = 3
+      ELSE IF( ( .NOT.LSAME( DIAG  , 'U' ) ).AND.
+     $         ( .NOT.LSAME( DIAG  , 'N' ) )      )THEN
+         INFO = 4
+      ELSE IF( M  .LT.0               )THEN
+         INFO = 5
+      ELSE IF( N  .LT.0               )THEN
+         INFO = 6
+      ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN
+         INFO = 9
+      ELSE IF( LDB.LT.MAX( 1, M     ) )THEN
+         INFO = 11
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'DTRSM ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     And when  alpha.eq.zero.
+*
+      IF( ALPHA.EQ.ZERO )THEN
+         DO 20, J = 1, N
+            DO 10, I = 1, M
+               B( I, J ) = ZERO
+   10       CONTINUE
+   20    CONTINUE
+         RETURN
+      END IF
+*
+*     Start the operations.
+*
+      IF( LSIDE )THEN
+         IF( LSAME( TRANSA, 'N' ) )THEN
+*
+*           Form  B := alpha*inv( A )*B.
+*
+            IF( UPPER )THEN
+               DO 60, J = 1, N
+                  IF( ALPHA.NE.ONE )THEN
+                     DO 30, I = 1, M
+                        B( I, J ) = ALPHA*B( I, J )
+   30                CONTINUE
+                  END IF
+                  DO 50, K = M, 1, -1
+                     IF( B( K, J ).NE.ZERO )THEN
+                        IF( NOUNIT )
+     $                     B( K, J ) = B( K, J )/A( K, K )
+                        DO 40, I = 1, K - 1
+                           B( I, J ) = B( I, J ) - B( K, J )*A( I, K )
+   40                   CONTINUE
+                     END IF
+   50             CONTINUE
+   60          CONTINUE
+            ELSE
+               DO 100, J = 1, N
+                  IF( ALPHA.NE.ONE )THEN
+                     DO 70, I = 1, M
+                        B( I, J ) = ALPHA*B( I, J )
+   70                CONTINUE
+                  END IF
+                  DO 90 K = 1, M
+                     IF( B( K, J ).NE.ZERO )THEN
+                        IF( NOUNIT )
+     $                     B( K, J ) = B( K, J )/A( K, K )
+                        DO 80, I = K + 1, M
+                           B( I, J ) = B( I, J ) - B( K, J )*A( I, K )
+   80                   CONTINUE
+                     END IF
+   90             CONTINUE
+  100          CONTINUE
+            END IF
+         ELSE
+*
+*           Form  B := alpha*inv( A' )*B.
+*
+            IF( UPPER )THEN
+               DO 130, J = 1, N
+                  DO 120, I = 1, M
+                     TEMP = ALPHA*B( I, J )
+                     DO 110, K = 1, I - 1
+                        TEMP = TEMP - A( K, I )*B( K, J )
+  110                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/A( I, I )
+                     B( I, J ) = TEMP
+  120             CONTINUE
+  130          CONTINUE
+            ELSE
+               DO 160, J = 1, N
+                  DO 150, I = M, 1, -1
+                     TEMP = ALPHA*B( I, J )
+                     DO 140, K = I + 1, M
+                        TEMP = TEMP - A( K, I )*B( K, J )
+  140                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/A( I, I )
+                     B( I, J ) = TEMP
+  150             CONTINUE
+  160          CONTINUE
+            END IF
+         END IF
+      ELSE
+         IF( LSAME( TRANSA, 'N' ) )THEN
+*
+*           Form  B := alpha*B*inv( A ).
+*
+            IF( UPPER )THEN
+               DO 210, J = 1, N
+                  IF( ALPHA.NE.ONE )THEN
+                     DO 170, I = 1, M
+                        B( I, J ) = ALPHA*B( I, J )
+  170                CONTINUE
+                  END IF
+                  DO 190, K = 1, J - 1
+                     IF( A( K, J ).NE.ZERO )THEN
+                        DO 180, I = 1, M
+                           B( I, J ) = B( I, J ) - A( K, J )*B( I, K )
+  180                   CONTINUE
+                     END IF
+  190             CONTINUE
+                  IF( NOUNIT )THEN
+                     TEMP = ONE/A( J, J )
+                     DO 200, I = 1, M
+                        B( I, J ) = TEMP*B( I, J )
+  200                CONTINUE
+                  END IF
+  210          CONTINUE
+            ELSE
+               DO 260, J = N, 1, -1
+                  IF( ALPHA.NE.ONE )THEN
+                     DO 220, I = 1, M
+                        B( I, J ) = ALPHA*B( I, J )
+  220                CONTINUE
+                  END IF
+                  DO 240, K = J + 1, N
+                     IF( A( K, J ).NE.ZERO )THEN
+                        DO 230, I = 1, M
+                           B( I, J ) = B( I, J ) - A( K, J )*B( I, K )
+  230                   CONTINUE
+                     END IF
+  240             CONTINUE
+                  IF( NOUNIT )THEN
+                     TEMP = ONE/A( J, J )
+                     DO 250, I = 1, M
+                       B( I, J ) = TEMP*B( I, J )
+  250                CONTINUE
+                  END IF
+  260          CONTINUE
+            END IF
+         ELSE
+*
+*           Form  B := alpha*B*inv( A' ).
+*
+            IF( UPPER )THEN
+               DO 310, K = N, 1, -1
+                  IF( NOUNIT )THEN
+                     TEMP = ONE/A( K, K )
+                     DO 270, I = 1, M
+                        B( I, K ) = TEMP*B( I, K )
+  270                CONTINUE
+                  END IF
+                  DO 290, J = 1, K - 1
+                     IF( A( J, K ).NE.ZERO )THEN
+                        TEMP = A( J, K )
+                        DO 280, I = 1, M
+                           B( I, J ) = B( I, J ) - TEMP*B( I, K )
+  280                   CONTINUE
+                     END IF
+  290             CONTINUE
+                  IF( ALPHA.NE.ONE )THEN
+                     DO 300, I = 1, M
+                        B( I, K ) = ALPHA*B( I, K )
+  300                CONTINUE
+                  END IF
+  310          CONTINUE
+            ELSE
+               DO 360, K = 1, N
+                  IF( NOUNIT )THEN
+                     TEMP = ONE/A( K, K )
+                     DO 320, I = 1, M
+                        B( I, K ) = TEMP*B( I, K )
+  320                CONTINUE
+                  END IF
+                  DO 340, J = K + 1, N
+                     IF( A( J, K ).NE.ZERO )THEN
+                        TEMP = A( J, K )
+                        DO 330, I = 1, M
+                           B( I, J ) = B( I, J ) - TEMP*B( I, K )
+  330                   CONTINUE
+                     END IF
+  340             CONTINUE
+                  IF( ALPHA.NE.ONE )THEN
+                     DO 350, I = 1, M
+                        B( I, K ) = ALPHA*B( I, K )
+  350                CONTINUE
+                  END IF
+  360          CONTINUE
+            END IF
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of DTRSM .
+*
+      END
diff --git a/costa/native/external/blas/dtrsv.f b/costa/native/external/blas/dtrsv.f
new file mode 100644
index 000000000..9c3e90a97
--- /dev/null
+++ b/costa/native/external/blas/dtrsv.f
@@ -0,0 +1,289 @@
+      SUBROUTINE DTRSV ( UPLO, TRANS, DIAG, N, A, LDA, X, INCX )
+*     .. Scalar Arguments ..
+      INTEGER            INCX, LDA, N
+      CHARACTER*1        DIAG, TRANS, UPLO
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DTRSV  solves one of the systems of equations
+*
+*     A*x = b,   or   A'*x = b,
+*
+*  where b and x are n element vectors and A is an n by n unit, or
+*  non-unit, upper or lower triangular matrix.
+*
+*  No test for singularity or near-singularity is included in this
+*  routine. Such tests must be performed before calling this routine.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the matrix is an upper or
+*           lower triangular matrix as follows:
+*
+*              UPLO = 'U' or 'u'   A is an upper triangular matrix.
+*
+*              UPLO = 'L' or 'l'   A is a lower triangular matrix.
+*
+*           Unchanged on exit.
+*
+*  TRANS  - CHARACTER*1.
+*           On entry, TRANS specifies the equations to be solved as
+*           follows:
+*
+*              TRANS = 'N' or 'n'   A*x = b.
+*
+*              TRANS = 'T' or 't'   A'*x = b.
+*
+*              TRANS = 'C' or 'c'   A'*x = b.
+*
+*           Unchanged on exit.
+*
+*  DIAG   - CHARACTER*1.
+*           On entry, DIAG specifies whether or not A is unit
+*           triangular as follows:
+*
+*              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
+*
+*              DIAG = 'N' or 'n'   A is not assumed to be unit
+*                                  triangular.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  A      - DOUBLE PRECISION array of DIMENSION ( LDA, n ).
+*           Before entry with  UPLO = 'U' or 'u', the leading n by n
+*           upper triangular part of the array A must contain the upper
+*           triangular matrix and the strictly lower triangular part of
+*           A is not referenced.
+*           Before entry with UPLO = 'L' or 'l', the leading n by n
+*           lower triangular part of the array A must contain the lower
+*           triangular matrix and the strictly upper triangular part of
+*           A is not referenced.
+*           Note that when  DIAG = 'U' or 'u', the diagonal elements of
+*           A are not referenced either, but are assumed to be unity.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           max( 1, n ).
+*           Unchanged on exit.
+*
+*  X      - DOUBLE PRECISION array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element right-hand side vector b. On exit, X is overwritten
+*           with the solution vector x.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO
+      PARAMETER        ( ZERO = 0.0D+0 )
+*     .. Local Scalars ..
+      DOUBLE PRECISION   TEMP
+      INTEGER            I, INFO, IX, J, JX, KX
+      LOGICAL            NOUNIT
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO , 'U' ).AND.
+     $         .NOT.LSAME( UPLO , 'L' )      )THEN
+         INFO = 1
+      ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND.
+     $         .NOT.LSAME( TRANS, 'T' ).AND.
+     $         .NOT.LSAME( TRANS, 'C' )      )THEN
+         INFO = 2
+      ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND.
+     $         .NOT.LSAME( DIAG , 'N' )      )THEN
+         INFO = 3
+      ELSE IF( N.LT.0 )THEN
+         INFO = 4
+      ELSE IF( LDA.LT.MAX( 1, N ) )THEN
+         INFO = 6
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 8
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'DTRSV ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      NOUNIT = LSAME( DIAG, 'N' )
+*
+*     Set up the start point in X if the increment is not unity. This
+*     will be  ( N - 1 )*INCX  too small for descending loops.
+*
+      IF( INCX.LE.0 )THEN
+         KX = 1 - ( N - 1 )*INCX
+      ELSE IF( INCX.NE.1 )THEN
+         KX = 1
+      END IF
+*
+*     Start the operations. In this version the elements of A are
+*     accessed sequentially with one pass through A.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+*
+*        Form  x := inv( A )*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            IF( INCX.EQ.1 )THEN
+               DO 20, J = N, 1, -1
+                  IF( X( J ).NE.ZERO )THEN
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )/A( J, J )
+                     TEMP = X( J )
+                     DO 10, I = J - 1, 1, -1
+                        X( I ) = X( I ) - TEMP*A( I, J )
+   10                CONTINUE
+                  END IF
+   20          CONTINUE
+            ELSE
+               JX = KX + ( N - 1 )*INCX
+               DO 40, J = N, 1, -1
+                  IF( X( JX ).NE.ZERO )THEN
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )/A( J, J )
+                     TEMP = X( JX )
+                     IX   = JX
+                     DO 30, I = J - 1, 1, -1
+                        IX      = IX      - INCX
+                        X( IX ) = X( IX ) - TEMP*A( I, J )
+   30                CONTINUE
+                  END IF
+                  JX = JX - INCX
+   40          CONTINUE
+            END IF
+         ELSE
+            IF( INCX.EQ.1 )THEN
+               DO 60, J = 1, N
+                  IF( X( J ).NE.ZERO )THEN
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )/A( J, J )
+                     TEMP = X( J )
+                     DO 50, I = J + 1, N
+                        X( I ) = X( I ) - TEMP*A( I, J )
+   50                CONTINUE
+                  END IF
+   60          CONTINUE
+            ELSE
+               JX = KX
+               DO 80, J = 1, N
+                  IF( X( JX ).NE.ZERO )THEN
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )/A( J, J )
+                     TEMP = X( JX )
+                     IX   = JX
+                     DO 70, I = J + 1, N
+                        IX      = IX      + INCX
+                        X( IX ) = X( IX ) - TEMP*A( I, J )
+   70                CONTINUE
+                  END IF
+                  JX = JX + INCX
+   80          CONTINUE
+            END IF
+         END IF
+      ELSE
+*
+*        Form  x := inv( A' )*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            IF( INCX.EQ.1 )THEN
+               DO 100, J = 1, N
+                  TEMP = X( J )
+                  DO 90, I = 1, J - 1
+                     TEMP = TEMP - A( I, J )*X( I )
+   90             CONTINUE
+                  IF( NOUNIT )
+     $               TEMP = TEMP/A( J, J )
+                  X( J ) = TEMP
+  100          CONTINUE
+            ELSE
+               JX = KX
+               DO 120, J = 1, N
+                  TEMP = X( JX )
+                  IX   = KX
+                  DO 110, I = 1, J - 1
+                     TEMP = TEMP - A( I, J )*X( IX )
+                     IX   = IX   + INCX
+  110             CONTINUE
+                  IF( NOUNIT )
+     $               TEMP = TEMP/A( J, J )
+                  X( JX ) = TEMP
+                  JX      = JX   + INCX
+  120          CONTINUE
+            END IF
+         ELSE
+            IF( INCX.EQ.1 )THEN
+               DO 140, J = N, 1, -1
+                  TEMP = X( J )
+                  DO 130, I = N, J + 1, -1
+                     TEMP = TEMP - A( I, J )*X( I )
+  130             CONTINUE
+                  IF( NOUNIT )
+     $               TEMP = TEMP/A( J, J )
+                  X( J ) = TEMP
+  140          CONTINUE
+            ELSE
+               KX = KX + ( N - 1 )*INCX
+               JX = KX
+               DO 160, J = N, 1, -1
+                  TEMP = X( JX )
+                  IX   = KX
+                  DO 150, I = N, J + 1, -1
+                     TEMP = TEMP - A( I, J )*X( IX )
+                     IX   = IX   - INCX
+  150             CONTINUE
+                  IF( NOUNIT )
+     $               TEMP = TEMP/A( J, J )
+                  X( JX ) = TEMP
+                  JX      = JX   - INCX
+  160          CONTINUE
+            END IF
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of DTRSV .
+*
+      END
diff --git a/costa/native/external/blas/dzasum.f b/costa/native/external/blas/dzasum.f
new file mode 100644
index 000000000..d21c1ffc9
--- /dev/null
+++ b/costa/native/external/blas/dzasum.f
@@ -0,0 +1,34 @@
+      double precision function dzasum(n,zx,incx)
+c
+c     takes the sum of the absolute values.
+c     jack dongarra, 3/11/78.
+c     modified 3/93 to return if incx .le. 0.
+c     modified 12/3/93, array(1) declarations changed to array(*)
+c
+      double complex zx(*)
+      double precision stemp,dcabs1
+      integer i,incx,ix,n
+c
+      dzasum = 0.0d0
+      stemp = 0.0d0
+      if( n.le.0 .or. incx.le.0 )return
+      if(incx.eq.1)go to 20
+c
+c        code for increment not equal to 1
+c
+      ix = 1
+      do 10 i = 1,n
+        stemp = stemp + dcabs1(zx(ix))
+        ix = ix + incx
+   10 continue
+      dzasum = stemp
+      return
+c
+c        code for increment equal to 1
+c
+   20 do 30 i = 1,n
+        stemp = stemp + dcabs1(zx(i))
+   30 continue
+      dzasum = stemp
+      return
+      end
diff --git a/costa/native/external/blas/dznrm2.f b/costa/native/external/blas/dznrm2.f
new file mode 100644
index 000000000..205ce3932
--- /dev/null
+++ b/costa/native/external/blas/dznrm2.f
@@ -0,0 +1,67 @@
+      DOUBLE PRECISION FUNCTION DZNRM2( N, X, INCX )
+*     .. Scalar Arguments ..
+      INTEGER                           INCX, N
+*     .. Array Arguments ..
+      COMPLEX*16                        X( * )
+*     ..
+*
+*  DZNRM2 returns the euclidean norm of a vector via the function
+*  name, so that
+*
+*     DZNRM2 := sqrt( conjg( x' )*x )
+*
+*
+*
+*  -- This version written on 25-October-1982.
+*     Modified on 14-October-1993 to inline the call to ZLASSQ.
+*     Sven Hammarling, Nag Ltd.
+*
+*
+*     .. Parameters ..
+      DOUBLE PRECISION      ONE         , ZERO
+      PARAMETER           ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     .. Local Scalars ..
+      INTEGER               IX
+      DOUBLE PRECISION      NORM, SCALE, SSQ, TEMP
+*     .. Intrinsic Functions ..
+      INTRINSIC             ABS, DIMAG, DBLE, SQRT
+*     ..
+*     .. Executable Statements ..
+      IF( N.LT.1 .OR. INCX.LT.1 )THEN
+         NORM  = ZERO
+      ELSE
+         SCALE = ZERO
+         SSQ   = ONE
+*        The following loop is equivalent to this call to the LAPACK
+*        auxiliary routine:
+*        CALL ZLASSQ( N, X, INCX, SCALE, SSQ )
+*
+         DO 10, IX = 1, 1 + ( N - 1 )*INCX, INCX
+            IF( DBLE( X( IX ) ).NE.ZERO )THEN
+               TEMP = ABS( DBLE( X( IX ) ) )
+               IF( SCALE.LT.TEMP )THEN
+                  SSQ   = ONE   + SSQ*( SCALE/TEMP )**2
+                  SCALE = TEMP
+               ELSE
+                  SSQ   = SSQ   +     ( TEMP/SCALE )**2
+               END IF
+            END IF
+            IF( DIMAG( X( IX ) ).NE.ZERO )THEN
+               TEMP = ABS( DIMAG( X( IX ) ) )
+               IF( SCALE.LT.TEMP )THEN
+                  SSQ   = ONE   + SSQ*( SCALE/TEMP )**2
+                  SCALE = TEMP
+               ELSE
+                  SSQ   = SSQ   +     ( TEMP/SCALE )**2
+               END IF
+            END IF
+   10    CONTINUE
+         NORM  = SCALE * SQRT( SSQ )
+      END IF
+*
+      DZNRM2 = NORM
+      RETURN
+*
+*     End of DZNRM2.
+*
+      END
diff --git a/costa/native/external/blas/icamax.f b/costa/native/external/blas/icamax.f
new file mode 100644
index 000000000..b13d4904f
--- /dev/null
+++ b/costa/native/external/blas/icamax.f
@@ -0,0 +1,43 @@
+      integer function icamax(n,cx,incx)
+c
+c     finds the index of element having max. absolute value.
+c     jack dongarra, linpack, 3/11/78.
+c     modified 3/93 to return if incx .le. 0.
+c     modified 12/3/93, array(1) declarations changed to array(*)
+c
+      complex cx(*)
+      real smax
+      integer i,incx,ix,n
+      complex zdum
+      real cabs1
+      cabs1(zdum) = abs(real(zdum)) + abs(aimag(zdum))
+c
+      icamax = 0
+      if( n.lt.1 .or. incx.le.0 ) return
+      icamax = 1
+      if(n.eq.1)return
+      if(incx.eq.1)go to 20
+c
+c        code for increment not equal to 1
+c
+      ix = 1
+      smax = cabs1(cx(1))
+      ix = ix + incx
+      do 10 i = 2,n
+         if(cabs1(cx(ix)).le.smax) go to 5
+         icamax = i
+         smax = cabs1(cx(ix))
+    5    ix = ix + incx
+   10 continue
+      return
+c
+c        code for increment equal to 1
+c
+   20 smax = cabs1(cx(1))
+      do 30 i = 2,n
+         if(cabs1(cx(i)).le.smax) go to 30
+         icamax = i
+         smax = cabs1(cx(i))
+   30 continue
+      return
+      end
diff --git a/costa/native/external/blas/idamax.f b/costa/native/external/blas/idamax.f
new file mode 100644
index 000000000..59d80dc41
--- /dev/null
+++ b/costa/native/external/blas/idamax.f
@@ -0,0 +1,39 @@
+      integer function idamax(n,dx,incx)
+c
+c     finds the index of element having max. absolute value.
+c     jack dongarra, linpack, 3/11/78.
+c     modified 3/93 to return if incx .le. 0.
+c     modified 12/3/93, array(1) declarations changed to array(*)
+c
+      double precision dx(*),dmax
+      integer i,incx,ix,n
+c
+      idamax = 0
+      if( n.lt.1 .or. incx.le.0 ) return
+      idamax = 1
+      if(n.eq.1)return
+      if(incx.eq.1)go to 20
+c
+c        code for increment not equal to 1
+c
+      ix = 1
+      dmax = dabs(dx(1))
+      ix = ix + incx
+      do 10 i = 2,n
+         if(dabs(dx(ix)).le.dmax) go to 5
+         idamax = i
+         dmax = dabs(dx(ix))
+    5    ix = ix + incx
+   10 continue
+      return
+c
+c        code for increment equal to 1
+c
+   20 dmax = dabs(dx(1))
+      do 30 i = 2,n
+         if(dabs(dx(i)).le.dmax) go to 30
+         idamax = i
+         dmax = dabs(dx(i))
+   30 continue
+      return
+      end
diff --git a/costa/native/external/blas/isamax.f b/costa/native/external/blas/isamax.f
new file mode 100644
index 000000000..a649e0281
--- /dev/null
+++ b/costa/native/external/blas/isamax.f
@@ -0,0 +1,39 @@
+      integer function isamax(n,sx,incx)
+c
+c     finds the index of element having max. absolute value.
+c     jack dongarra, linpack, 3/11/78.
+c     modified 3/93 to return if incx .le. 0.
+c     modified 12/3/93, array(1) declarations changed to array(*)
+c
+      real sx(*),smax
+      integer i,incx,ix,n
+c
+      isamax = 0
+      if( n.lt.1 .or. incx.le.0 ) return
+      isamax = 1
+      if(n.eq.1)return
+      if(incx.eq.1)go to 20
+c
+c        code for increment not equal to 1
+c
+      ix = 1
+      smax = abs(sx(1))
+      ix = ix + incx
+      do 10 i = 2,n
+         if(abs(sx(ix)).le.smax) go to 5
+         isamax = i
+         smax = abs(sx(ix))
+    5    ix = ix + incx
+   10 continue
+      return
+c
+c        code for increment equal to 1
+c
+   20 smax = abs(sx(1))
+      do 30 i = 2,n
+         if(abs(sx(i)).le.smax) go to 30
+         isamax = i
+         smax = abs(sx(i))
+   30 continue
+      return
+      end
diff --git a/costa/native/external/blas/izamax.f b/costa/native/external/blas/izamax.f
new file mode 100644
index 000000000..ec14f827d
--- /dev/null
+++ b/costa/native/external/blas/izamax.f
@@ -0,0 +1,41 @@
+      integer function izamax(n,zx,incx)
+c
+c     finds the index of element having max. absolute value.
+c     jack dongarra, 1/15/85.
+c     modified 3/93 to return if incx .le. 0.
+c     modified 12/3/93, array(1) declarations changed to array(*)
+c
+      double complex zx(*)
+      double precision smax
+      integer i,incx,ix,n
+      double precision dcabs1
+c
+      izamax = 0
+      if( n.lt.1 .or. incx.le.0 )return
+      izamax = 1
+      if(n.eq.1)return
+      if(incx.eq.1)go to 20
+c
+c        code for increment not equal to 1
+c
+      ix = 1
+      smax = dcabs1(zx(1))
+      ix = ix + incx
+      do 10 i = 2,n
+         if(dcabs1(zx(ix)).le.smax) go to 5
+         izamax = i
+         smax = dcabs1(zx(ix))
+    5    ix = ix + incx
+   10 continue
+      return
+c
+c        code for increment equal to 1
+c
+   20 smax = dcabs1(zx(1))
+      do 30 i = 2,n
+         if(dcabs1(zx(i)).le.smax) go to 30
+         izamax = i
+         smax = dcabs1(zx(i))
+   30 continue
+      return
+      end
diff --git a/costa/native/external/blas/lsame.f b/costa/native/external/blas/lsame.f
new file mode 100644
index 000000000..bf25d86f2
--- /dev/null
+++ b/costa/native/external/blas/lsame.f
@@ -0,0 +1,87 @@
+      LOGICAL          FUNCTION LSAME( CA, CB )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          CA, CB
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  LSAME returns .TRUE. if CA is the same letter as CB regardless of
+*  case.
+*
+*  Arguments
+*  =========
+*
+*  CA      (input) CHARACTER*1
+*  CB      (input) CHARACTER*1
+*          CA and CB specify the single characters to be compared.
+*
+* =====================================================================
+*
+*     .. Intrinsic Functions ..
+      INTRINSIC          ICHAR
+*     ..
+*     .. Local Scalars ..
+      INTEGER            INTA, INTB, ZCODE
+*     ..
+*     .. Executable Statements ..
+*
+*     Test if the characters are equal
+*
+      LSAME = CA.EQ.CB
+      IF( LSAME )
+     $   RETURN
+*
+*     Now test for equivalence if both characters are alphabetic.
+*
+      ZCODE = ICHAR( 'Z' )
+*
+*     Use 'Z' rather than 'A' so that ASCII can be detected on Prime
+*     machines, on which ICHAR returns a value with bit 8 set.
+*     ICHAR('A') on Prime machines returns 193 which is the same as
+*     ICHAR('A') on an EBCDIC machine.
+*
+      INTA = ICHAR( CA )
+      INTB = ICHAR( CB )
+*
+      IF( ZCODE.EQ.90 .OR. ZCODE.EQ.122 ) THEN
+*
+*        ASCII is assumed - ZCODE is the ASCII code of either lower or
+*        upper case 'Z'.
+*
+         IF( INTA.GE.97 .AND. INTA.LE.122 ) INTA = INTA - 32
+         IF( INTB.GE.97 .AND. INTB.LE.122 ) INTB = INTB - 32
+*
+      ELSE IF( ZCODE.EQ.233 .OR. ZCODE.EQ.169 ) THEN
+*
+*        EBCDIC is assumed - ZCODE is the EBCDIC code of either lower or
+*        upper case 'Z'.
+*
+         IF( INTA.GE.129 .AND. INTA.LE.137 .OR.
+     $       INTA.GE.145 .AND. INTA.LE.153 .OR.
+     $       INTA.GE.162 .AND. INTA.LE.169 ) INTA = INTA + 64
+         IF( INTB.GE.129 .AND. INTB.LE.137 .OR.
+     $       INTB.GE.145 .AND. INTB.LE.153 .OR.
+     $       INTB.GE.162 .AND. INTB.LE.169 ) INTB = INTB + 64
+*
+      ELSE IF( ZCODE.EQ.218 .OR. ZCODE.EQ.250 ) THEN
+*
+*        ASCII is assumed, on Prime machines - ZCODE is the ASCII code
+*        plus 128 of either lower or upper case 'Z'.
+*
+         IF( INTA.GE.225 .AND. INTA.LE.250 ) INTA = INTA - 32
+         IF( INTB.GE.225 .AND. INTB.LE.250 ) INTB = INTB - 32
+      END IF
+      LSAME = INTA.EQ.INTB
+*
+*     RETURN
+*
+*     End of LSAME
+*
+      END
diff --git a/costa/native/external/blas/sasum.f b/costa/native/external/blas/sasum.f
new file mode 100644
index 000000000..8697579d4
--- /dev/null
+++ b/costa/native/external/blas/sasum.f
@@ -0,0 +1,44 @@
+      real function sasum(n,sx,incx)
+c
+c     takes the sum of the absolute values.
+c     uses unrolled loops for increment equal to one.
+c     jack dongarra, linpack, 3/11/78.
+c     modified 3/93 to return if incx .le. 0.
+c     modified 12/3/93, array(1) declarations changed to array(*)
+c
+      real sx(*),stemp
+      integer i,incx,m,mp1,n,nincx
+c
+      sasum = 0.0e0
+      stemp = 0.0e0
+      if( n.le.0 .or. incx.le.0 )return
+      if(incx.eq.1)go to 20
+c
+c        code for increment not equal to 1
+c
+      nincx = n*incx
+      do 10 i = 1,nincx,incx
+        stemp = stemp + abs(sx(i))
+   10 continue
+      sasum = stemp
+      return
+c
+c        code for increment equal to 1
+c
+c
+c        clean-up loop
+c
+   20 m = mod(n,6)
+      if( m .eq. 0 ) go to 40
+      do 30 i = 1,m
+        stemp = stemp + abs(sx(i))
+   30 continue
+      if( n .lt. 6 ) go to 60
+   40 mp1 = m + 1
+      do 50 i = mp1,n,6
+        stemp = stemp + abs(sx(i)) + abs(sx(i + 1)) + abs(sx(i + 2))
+     *  + abs(sx(i + 3)) + abs(sx(i + 4)) + abs(sx(i + 5))
+   50 continue
+   60 sasum = stemp
+      return
+      end
diff --git a/costa/native/external/blas/saxpy.f b/costa/native/external/blas/saxpy.f
new file mode 100644
index 000000000..4b1c70387
--- /dev/null
+++ b/costa/native/external/blas/saxpy.f
@@ -0,0 +1,48 @@
+      subroutine saxpy(n,sa,sx,incx,sy,incy)
+c
+c     constant times a vector plus a vector.
+c     uses unrolled loop for increments equal to one.
+c     jack dongarra, linpack, 3/11/78.
+c     modified 12/3/93, array(1) declarations changed to array(*)
+c
+      real sx(*),sy(*),sa
+      integer i,incx,incy,ix,iy,m,mp1,n
+c
+      if(n.le.0)return
+      if (sa .eq. 0.0) return
+      if(incx.eq.1.and.incy.eq.1)go to 20
+c
+c        code for unequal increments or equal increments
+c          not equal to 1
+c
+      ix = 1
+      iy = 1
+      if(incx.lt.0)ix = (-n+1)*incx + 1
+      if(incy.lt.0)iy = (-n+1)*incy + 1
+      do 10 i = 1,n
+        sy(iy) = sy(iy) + sa*sx(ix)
+        ix = ix + incx
+        iy = iy + incy
+   10 continue
+      return
+c
+c        code for both increments equal to 1
+c
+c
+c        clean-up loop
+c
+   20 m = mod(n,4)
+      if( m .eq. 0 ) go to 40
+      do 30 i = 1,m
+        sy(i) = sy(i) + sa*sx(i)
+   30 continue
+      if( n .lt. 4 ) return
+   40 mp1 = m + 1
+      do 50 i = mp1,n,4
+        sy(i) = sy(i) + sa*sx(i)
+        sy(i + 1) = sy(i + 1) + sa*sx(i + 1)
+        sy(i + 2) = sy(i + 2) + sa*sx(i + 2)
+        sy(i + 3) = sy(i + 3) + sa*sx(i + 3)
+   50 continue
+      return
+      end
diff --git a/costa/native/external/blas/scasum.f b/costa/native/external/blas/scasum.f
new file mode 100644
index 000000000..b6c0ea077
--- /dev/null
+++ b/costa/native/external/blas/scasum.f
@@ -0,0 +1,34 @@
+      real function scasum(n,cx,incx)
+c
+c     takes the sum of the absolute values of a complex vector and
+c     returns a single precision result.
+c     jack dongarra, linpack, 3/11/78.
+c     modified 3/93 to return if incx .le. 0.
+c     modified 12/3/93, array(1) declarations changed to array(*)
+c
+      complex cx(*)
+      real stemp
+      integer i,incx,n,nincx
+c
+      scasum = 0.0e0
+      stemp = 0.0e0
+      if( n.le.0 .or. incx.le.0 )return
+      if(incx.eq.1)go to 20
+c
+c        code for increment not equal to 1
+c
+      nincx = n*incx
+      do 10 i = 1,nincx,incx
+        stemp = stemp + abs(real(cx(i))) + abs(aimag(cx(i)))
+   10 continue
+      scasum = stemp
+      return
+c
+c        code for increment equal to 1
+c
+   20 do 30 i = 1,n
+        stemp = stemp + abs(real(cx(i))) + abs(aimag(cx(i)))
+   30 continue
+      scasum = stemp
+      return
+      end
diff --git a/costa/native/external/blas/scnrm2.f b/costa/native/external/blas/scnrm2.f
new file mode 100644
index 000000000..8bfe9ae7f
--- /dev/null
+++ b/costa/native/external/blas/scnrm2.f
@@ -0,0 +1,67 @@
+      REAL             FUNCTION SCNRM2( N, X, INCX )
+*     .. Scalar Arguments ..
+      INTEGER                           INCX, N
+*     .. Array Arguments ..
+      COMPLEX                           X( * )
+*     ..
+*
+*  SCNRM2 returns the euclidean norm of a vector via the function
+*  name, so that
+*
+*     SCNRM2 := sqrt( conjg( x' )*x )
+*
+*
+*
+*  -- This version written on 25-October-1982.
+*     Modified on 14-October-1993 to inline the call to CLASSQ.
+*     Sven Hammarling, Nag Ltd.
+*
+*
+*     .. Parameters ..
+      REAL                  ONE         , ZERO
+      PARAMETER           ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     .. Local Scalars ..
+      INTEGER               IX
+      REAL                  NORM, SCALE, SSQ, TEMP
+*     .. Intrinsic Functions ..
+      INTRINSIC             ABS, AIMAG, REAL, SQRT
+*     ..
+*     .. Executable Statements ..
+      IF( N.LT.1 .OR. INCX.LT.1 )THEN
+         NORM  = ZERO
+      ELSE
+         SCALE = ZERO
+         SSQ   = ONE
+*        The following loop is equivalent to this call to the LAPACK
+*        auxiliary routine:
+*        CALL CLASSQ( N, X, INCX, SCALE, SSQ )
+*
+         DO 10, IX = 1, 1 + ( N - 1 )*INCX, INCX
+            IF( REAL( X( IX ) ).NE.ZERO )THEN
+               TEMP = ABS( REAL( X( IX ) ) )
+               IF( SCALE.LT.TEMP )THEN
+                  SSQ   = ONE   + SSQ*( SCALE/TEMP )**2
+                  SCALE = TEMP
+               ELSE
+                  SSQ   = SSQ   +     ( TEMP/SCALE )**2
+               END IF
+            END IF
+            IF( AIMAG( X( IX ) ).NE.ZERO )THEN
+               TEMP = ABS( AIMAG( X( IX ) ) )
+               IF( SCALE.LT.TEMP )THEN
+                  SSQ   = ONE   + SSQ*( SCALE/TEMP )**2
+                  SCALE = TEMP
+               ELSE
+                  SSQ   = SSQ   +     ( TEMP/SCALE )**2
+               END IF
+            END IF
+   10    CONTINUE
+         NORM  = SCALE * SQRT( SSQ )
+      END IF
+*
+      SCNRM2 = NORM
+      RETURN
+*
+*     End of SCNRM2.
+*
+      END
diff --git a/costa/native/external/blas/scopy.f b/costa/native/external/blas/scopy.f
new file mode 100644
index 000000000..0202778ca
--- /dev/null
+++ b/costa/native/external/blas/scopy.f
@@ -0,0 +1,50 @@
+      subroutine scopy(n,sx,incx,sy,incy)
+c
+c     copies a vector, x, to a vector, y.
+c     uses unrolled loops for increments equal to 1.
+c     jack dongarra, linpack, 3/11/78.
+c     modified 12/3/93, array(1) declarations changed to array(*)
+c
+      real sx(*),sy(*)
+      integer i,incx,incy,ix,iy,m,mp1,n
+c
+      if(n.le.0)return
+      if(incx.eq.1.and.incy.eq.1)go to 20
+c
+c        code for unequal increments or equal increments
+c          not equal to 1
+c
+      ix = 1
+      iy = 1
+      if(incx.lt.0)ix = (-n+1)*incx + 1
+      if(incy.lt.0)iy = (-n+1)*incy + 1
+      do 10 i = 1,n
+        sy(iy) = sx(ix)
+        ix = ix + incx
+        iy = iy + incy
+   10 continue
+      return
+c
+c        code for both increments equal to 1
+c
+c
+c        clean-up loop
+c
+   20 m = mod(n,7)
+      if( m .eq. 0 ) go to 40
+      do 30 i = 1,m
+        sy(i) = sx(i)
+   30 continue
+      if( n .lt. 7 ) return
+   40 mp1 = m + 1
+      do 50 i = mp1,n,7
+        sy(i) = sx(i)
+        sy(i + 1) = sx(i + 1)
+        sy(i + 2) = sx(i + 2)
+        sy(i + 3) = sx(i + 3)
+        sy(i + 4) = sx(i + 4)
+        sy(i + 5) = sx(i + 5)
+        sy(i + 6) = sx(i + 6)
+   50 continue
+      return
+      end
diff --git a/costa/native/external/blas/sdot.f b/costa/native/external/blas/sdot.f
new file mode 100644
index 000000000..416d90a59
--- /dev/null
+++ b/costa/native/external/blas/sdot.f
@@ -0,0 +1,49 @@
+      real function sdot(n,sx,incx,sy,incy)
+c
+c     forms the dot product of two vectors.
+c     uses unrolled loops for increments equal to one.
+c     jack dongarra, linpack, 3/11/78.
+c     modified 12/3/93, array(1) declarations changed to array(*)
+c
+      real sx(*),sy(*),stemp
+      integer i,incx,incy,ix,iy,m,mp1,n
+c
+      stemp = 0.0e0
+      sdot = 0.0e0
+      if(n.le.0)return
+      if(incx.eq.1.and.incy.eq.1)go to 20
+c
+c        code for unequal increments or equal increments
+c          not equal to 1
+c
+      ix = 1
+      iy = 1
+      if(incx.lt.0)ix = (-n+1)*incx + 1
+      if(incy.lt.0)iy = (-n+1)*incy + 1
+      do 10 i = 1,n
+        stemp = stemp + sx(ix)*sy(iy)
+        ix = ix + incx
+        iy = iy + incy
+   10 continue
+      sdot = stemp
+      return
+c
+c        code for both increments equal to 1
+c
+c
+c        clean-up loop
+c
+   20 m = mod(n,5)
+      if( m .eq. 0 ) go to 40
+      do 30 i = 1,m
+        stemp = stemp + sx(i)*sy(i)
+   30 continue
+      if( n .lt. 5 ) go to 60
+   40 mp1 = m + 1
+      do 50 i = mp1,n,5
+        stemp = stemp + sx(i)*sy(i) + sx(i + 1)*sy(i + 1) +
+     *   sx(i + 2)*sy(i + 2) + sx(i + 3)*sy(i + 3) + sx(i + 4)*sy(i + 4)
+   50 continue
+   60 sdot = stemp
+      return
+      end
diff --git a/costa/native/external/blas/sgbmv.f b/costa/native/external/blas/sgbmv.f
new file mode 100644
index 000000000..926abd735
--- /dev/null
+++ b/costa/native/external/blas/sgbmv.f
@@ -0,0 +1,300 @@
+      SUBROUTINE SGBMV ( TRANS, M, N, KL, KU, ALPHA, A, LDA, X, INCX,
+     $                   BETA, Y, INCY )
+*     .. Scalar Arguments ..
+      REAL               ALPHA, BETA
+      INTEGER            INCX, INCY, KL, KU, LDA, M, N
+      CHARACTER*1        TRANS
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), X( * ), Y( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SGBMV  performs one of the matrix-vector operations
+*
+*     y := alpha*A*x + beta*y,   or   y := alpha*A'*x + beta*y,
+*
+*  where alpha and beta are scalars, x and y are vectors and A is an
+*  m by n band matrix, with kl sub-diagonals and ku super-diagonals.
+*
+*  Parameters
+*  ==========
+*
+*  TRANS  - CHARACTER*1.
+*           On entry, TRANS specifies the operation to be performed as
+*           follows:
+*
+*              TRANS = 'N' or 'n'   y := alpha*A*x + beta*y.
+*
+*              TRANS = 'T' or 't'   y := alpha*A'*x + beta*y.
+*
+*              TRANS = 'C' or 'c'   y := alpha*A'*x + beta*y.
+*
+*           Unchanged on exit.
+*
+*  M      - INTEGER.
+*           On entry, M specifies the number of rows of the matrix A.
+*           M must be at least zero.
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the number of columns of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  KL     - INTEGER.
+*           On entry, KL specifies the number of sub-diagonals of the
+*           matrix A. KL must satisfy  0 .le. KL.
+*           Unchanged on exit.
+*
+*  KU     - INTEGER.
+*           On entry, KU specifies the number of super-diagonals of the
+*           matrix A. KU must satisfy  0 .le. KU.
+*           Unchanged on exit.
+*
+*  ALPHA  - REAL            .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  A      - REAL             array of DIMENSION ( LDA, n ).
+*           Before entry, the leading ( kl + ku + 1 ) by n part of the
+*           array A must contain the matrix of coefficients, supplied
+*           column by column, with the leading diagonal of the matrix in
+*           row ( ku + 1 ) of the array, the first super-diagonal
+*           starting at position 2 in row ku, the first sub-diagonal
+*           starting at position 1 in row ( ku + 2 ), and so on.
+*           Elements in the array A that do not correspond to elements
+*           in the band matrix (such as the top left ku by ku triangle)
+*           are not referenced.
+*           The following program segment will transfer a band matrix
+*           from conventional full matrix storage to band storage:
+*
+*                 DO 20, J = 1, N
+*                    K = KU + 1 - J
+*                    DO 10, I = MAX( 1, J - KU ), MIN( M, J + KL )
+*                       A( K + I, J ) = matrix( I, J )
+*              10    CONTINUE
+*              20 CONTINUE
+*
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           ( kl + ku + 1 ).
+*           Unchanged on exit.
+*
+*  X      - REAL             array of DIMENSION at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'
+*           and at least
+*           ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.
+*           Before entry, the incremented array X must contain the
+*           vector x.
+*           Unchanged on exit.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*  BETA   - REAL            .
+*           On entry, BETA specifies the scalar beta. When BETA is
+*           supplied as zero then Y need not be set on input.
+*           Unchanged on exit.
+*
+*  Y      - REAL             array of DIMENSION at least
+*           ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'
+*           and at least
+*           ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.
+*           Before entry, the incremented array Y must contain the
+*           vector y. On exit, Y is overwritten by the updated vector y.
+*
+*  INCY   - INTEGER.
+*           On entry, INCY specifies the increment for the elements of
+*           Y. INCY must not be zero.
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*     .. Parameters ..
+      REAL               ONE         , ZERO
+      PARAMETER        ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     .. Local Scalars ..
+      REAL               TEMP
+      INTEGER            I, INFO, IX, IY, J, JX, JY, K, KUP1, KX, KY,
+     $                   LENX, LENY
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( TRANS, 'N' ).AND.
+     $         .NOT.LSAME( TRANS, 'T' ).AND.
+     $         .NOT.LSAME( TRANS, 'C' )      )THEN
+         INFO = 1
+      ELSE IF( M.LT.0 )THEN
+         INFO = 2
+      ELSE IF( N.LT.0 )THEN
+         INFO = 3
+      ELSE IF( KL.LT.0 )THEN
+         INFO = 4
+      ELSE IF( KU.LT.0 )THEN
+         INFO = 5
+      ELSE IF( LDA.LT.( KL + KU + 1 ) )THEN
+         INFO = 8
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 10
+      ELSE IF( INCY.EQ.0 )THEN
+         INFO = 13
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'SGBMV ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR.
+     $    ( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) )
+     $   RETURN
+*
+*     Set  LENX  and  LENY, the lengths of the vectors x and y, and set
+*     up the start points in  X  and  Y.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+         LENX = N
+         LENY = M
+      ELSE
+         LENX = M
+         LENY = N
+      END IF
+      IF( INCX.GT.0 )THEN
+         KX = 1
+      ELSE
+         KX = 1 - ( LENX - 1 )*INCX
+      END IF
+      IF( INCY.GT.0 )THEN
+         KY = 1
+      ELSE
+         KY = 1 - ( LENY - 1 )*INCY
+      END IF
+*
+*     Start the operations. In this version the elements of A are
+*     accessed sequentially with one pass through the band part of A.
+*
+*     First form  y := beta*y.
+*
+      IF( BETA.NE.ONE )THEN
+         IF( INCY.EQ.1 )THEN
+            IF( BETA.EQ.ZERO )THEN
+               DO 10, I = 1, LENY
+                  Y( I ) = ZERO
+   10          CONTINUE
+            ELSE
+               DO 20, I = 1, LENY
+                  Y( I ) = BETA*Y( I )
+   20          CONTINUE
+            END IF
+         ELSE
+            IY = KY
+            IF( BETA.EQ.ZERO )THEN
+               DO 30, I = 1, LENY
+                  Y( IY ) = ZERO
+                  IY      = IY   + INCY
+   30          CONTINUE
+            ELSE
+               DO 40, I = 1, LENY
+                  Y( IY ) = BETA*Y( IY )
+                  IY      = IY           + INCY
+   40          CONTINUE
+            END IF
+         END IF
+      END IF
+      IF( ALPHA.EQ.ZERO )
+     $   RETURN
+      KUP1 = KU + 1
+      IF( LSAME( TRANS, 'N' ) )THEN
+*
+*        Form  y := alpha*A*x + y.
+*
+         JX = KX
+         IF( INCY.EQ.1 )THEN
+            DO 60, J = 1, N
+               IF( X( JX ).NE.ZERO )THEN
+                  TEMP = ALPHA*X( JX )
+                  K    = KUP1 - J
+                  DO 50, I = MAX( 1, J - KU ), MIN( M, J + KL )
+                     Y( I ) = Y( I ) + TEMP*A( K + I, J )
+   50             CONTINUE
+               END IF
+               JX = JX + INCX
+   60       CONTINUE
+         ELSE
+            DO 80, J = 1, N
+               IF( X( JX ).NE.ZERO )THEN
+                  TEMP = ALPHA*X( JX )
+                  IY   = KY
+                  K    = KUP1 - J
+                  DO 70, I = MAX( 1, J - KU ), MIN( M, J + KL )
+                     Y( IY ) = Y( IY ) + TEMP*A( K + I, J )
+                     IY      = IY      + INCY
+   70             CONTINUE
+               END IF
+               JX = JX + INCX
+               IF( J.GT.KU )
+     $            KY = KY + INCY
+   80       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  y := alpha*A'*x + y.
+*
+         JY = KY
+         IF( INCX.EQ.1 )THEN
+            DO 100, J = 1, N
+               TEMP = ZERO
+               K    = KUP1 - J
+               DO 90, I = MAX( 1, J - KU ), MIN( M, J + KL )
+                  TEMP = TEMP + A( K + I, J )*X( I )
+   90          CONTINUE
+               Y( JY ) = Y( JY ) + ALPHA*TEMP
+               JY      = JY      + INCY
+  100       CONTINUE
+         ELSE
+            DO 120, J = 1, N
+               TEMP = ZERO
+               IX   = KX
+               K    = KUP1 - J
+               DO 110, I = MAX( 1, J - KU ), MIN( M, J + KL )
+                  TEMP = TEMP + A( K + I, J )*X( IX )
+                  IX   = IX   + INCX
+  110          CONTINUE
+               Y( JY ) = Y( JY ) + ALPHA*TEMP
+               JY      = JY      + INCY
+               IF( J.GT.KU )
+     $            KX = KX + INCX
+  120       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of SGBMV .
+*
+      END
diff --git a/costa/native/external/blas/sgemm.f b/costa/native/external/blas/sgemm.f
new file mode 100644
index 000000000..8dc772974
--- /dev/null
+++ b/costa/native/external/blas/sgemm.f
@@ -0,0 +1,313 @@
+      SUBROUTINE SGEMM ( TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB,
+     $                   BETA, C, LDC )
+*     .. Scalar Arguments ..
+      CHARACTER*1        TRANSA, TRANSB
+      INTEGER            M, N, K, LDA, LDB, LDC
+      REAL               ALPHA, BETA
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), B( LDB, * ), C( LDC, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SGEMM  performs one of the matrix-matrix operations
+*
+*     C := alpha*op( A )*op( B ) + beta*C,
+*
+*  where  op( X ) is one of
+*
+*     op( X ) = X   or   op( X ) = X',
+*
+*  alpha and beta are scalars, and A, B and C are matrices, with op( A )
+*  an m by k matrix,  op( B )  a  k by n matrix and  C an m by n matrix.
+*
+*  Parameters
+*  ==========
+*
+*  TRANSA - CHARACTER*1.
+*           On entry, TRANSA specifies the form of op( A ) to be used in
+*           the matrix multiplication as follows:
+*
+*              TRANSA = 'N' or 'n',  op( A ) = A.
+*
+*              TRANSA = 'T' or 't',  op( A ) = A'.
+*
+*              TRANSA = 'C' or 'c',  op( A ) = A'.
+*
+*           Unchanged on exit.
+*
+*  TRANSB - CHARACTER*1.
+*           On entry, TRANSB specifies the form of op( B ) to be used in
+*           the matrix multiplication as follows:
+*
+*              TRANSB = 'N' or 'n',  op( B ) = B.
+*
+*              TRANSB = 'T' or 't',  op( B ) = B'.
+*
+*              TRANSB = 'C' or 'c',  op( B ) = B'.
+*
+*           Unchanged on exit.
+*
+*  M      - INTEGER.
+*           On entry,  M  specifies  the number  of rows  of the  matrix
+*           op( A )  and of the  matrix  C.  M  must  be at least  zero.
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry,  N  specifies the number  of columns of the matrix
+*           op( B ) and the number of columns of the matrix C. N must be
+*           at least zero.
+*           Unchanged on exit.
+*
+*  K      - INTEGER.
+*           On entry,  K  specifies  the number of columns of the matrix
+*           op( A ) and the number of rows of the matrix op( B ). K must
+*           be at least  zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - REAL            .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  A      - REAL             array of DIMENSION ( LDA, ka ), where ka is
+*           k  when  TRANSA = 'N' or 'n',  and is  m  otherwise.
+*           Before entry with  TRANSA = 'N' or 'n',  the leading  m by k
+*           part of the array  A  must contain the matrix  A,  otherwise
+*           the leading  k by m  part of the array  A  must contain  the
+*           matrix A.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. When  TRANSA = 'N' or 'n' then
+*           LDA must be at least  max( 1, m ), otherwise  LDA must be at
+*           least  max( 1, k ).
+*           Unchanged on exit.
+*
+*  B      - REAL             array of DIMENSION ( LDB, kb ), where kb is
+*           n  when  TRANSB = 'N' or 'n',  and is  k  otherwise.
+*           Before entry with  TRANSB = 'N' or 'n',  the leading  k by n
+*           part of the array  B  must contain the matrix  B,  otherwise
+*           the leading  n by k  part of the array  B  must contain  the
+*           matrix B.
+*           Unchanged on exit.
+*
+*  LDB    - INTEGER.
+*           On entry, LDB specifies the first dimension of B as declared
+*           in the calling (sub) program. When  TRANSB = 'N' or 'n' then
+*           LDB must be at least  max( 1, k ), otherwise  LDB must be at
+*           least  max( 1, n ).
+*           Unchanged on exit.
+*
+*  BETA   - REAL            .
+*           On entry,  BETA  specifies the scalar  beta.  When  BETA  is
+*           supplied as zero then C need not be set on input.
+*           Unchanged on exit.
+*
+*  C      - REAL             array of DIMENSION ( LDC, n ).
+*           Before entry, the leading  m by n  part of the array  C must
+*           contain the matrix  C,  except when  beta  is zero, in which
+*           case C need not be set on entry.
+*           On exit, the array  C  is overwritten by the  m by n  matrix
+*           ( alpha*op( A )*op( B ) + beta*C ).
+*
+*  LDC    - INTEGER.
+*           On entry, LDC specifies the first dimension of C as declared
+*           in  the  calling  (sub)  program.   LDC  must  be  at  least
+*           max( 1, m ).
+*           Unchanged on exit.
+*
+*
+*  Level 3 Blas routine.
+*
+*  -- Written on 8-February-1989.
+*     Jack Dongarra, Argonne National Laboratory.
+*     Iain Duff, AERE Harwell.
+*     Jeremy Du Croz, Numerical Algorithms Group Ltd.
+*     Sven Hammarling, Numerical Algorithms Group Ltd.
+*
+*
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     .. Local Scalars ..
+      LOGICAL            NOTA, NOTB
+      INTEGER            I, INFO, J, L, NCOLA, NROWA, NROWB
+      REAL               TEMP
+*     .. Parameters ..
+      REAL               ONE         , ZERO
+      PARAMETER        ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Executable Statements ..
+*
+*     Set  NOTA  and  NOTB  as  true if  A  and  B  respectively are not
+*     transposed and set  NROWA, NCOLA and  NROWB  as the number of rows
+*     and  columns of  A  and the  number of  rows  of  B  respectively.
+*
+      NOTA  = LSAME( TRANSA, 'N' )
+      NOTB  = LSAME( TRANSB, 'N' )
+      IF( NOTA )THEN
+         NROWA = M
+         NCOLA = K
+      ELSE
+         NROWA = K
+         NCOLA = M
+      END IF
+      IF( NOTB )THEN
+         NROWB = K
+      ELSE
+         NROWB = N
+      END IF
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF(      ( .NOT.NOTA                 ).AND.
+     $         ( .NOT.LSAME( TRANSA, 'C' ) ).AND.
+     $         ( .NOT.LSAME( TRANSA, 'T' ) )      )THEN
+         INFO = 1
+      ELSE IF( ( .NOT.NOTB                 ).AND.
+     $         ( .NOT.LSAME( TRANSB, 'C' ) ).AND.
+     $         ( .NOT.LSAME( TRANSB, 'T' ) )      )THEN
+         INFO = 2
+      ELSE IF( M  .LT.0               )THEN
+         INFO = 3
+      ELSE IF( N  .LT.0               )THEN
+         INFO = 4
+      ELSE IF( K  .LT.0               )THEN
+         INFO = 5
+      ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN
+         INFO = 8
+      ELSE IF( LDB.LT.MAX( 1, NROWB ) )THEN
+         INFO = 10
+      ELSE IF( LDC.LT.MAX( 1, M     ) )THEN
+         INFO = 13
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'SGEMM ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR.
+     $    ( ( ( ALPHA.EQ.ZERO ).OR.( K.EQ.0 ) ).AND.( BETA.EQ.ONE ) ) )
+     $   RETURN
+*
+*     And if  alpha.eq.zero.
+*
+      IF( ALPHA.EQ.ZERO )THEN
+         IF( BETA.EQ.ZERO )THEN
+            DO 20, J = 1, N
+               DO 10, I = 1, M
+                  C( I, J ) = ZERO
+   10          CONTINUE
+   20       CONTINUE
+         ELSE
+            DO 40, J = 1, N
+               DO 30, I = 1, M
+                  C( I, J ) = BETA*C( I, J )
+   30          CONTINUE
+   40       CONTINUE
+         END IF
+         RETURN
+      END IF
+*
+*     Start the operations.
+*
+      IF( NOTB )THEN
+         IF( NOTA )THEN
+*
+*           Form  C := alpha*A*B + beta*C.
+*
+            DO 90, J = 1, N
+               IF( BETA.EQ.ZERO )THEN
+                  DO 50, I = 1, M
+                     C( I, J ) = ZERO
+   50             CONTINUE
+               ELSE IF( BETA.NE.ONE )THEN
+                  DO 60, I = 1, M
+                     C( I, J ) = BETA*C( I, J )
+   60             CONTINUE
+               END IF
+               DO 80, L = 1, K
+                  IF( B( L, J ).NE.ZERO )THEN
+                     TEMP = ALPHA*B( L, J )
+                     DO 70, I = 1, M
+                        C( I, J ) = C( I, J ) + TEMP*A( I, L )
+   70                CONTINUE
+                  END IF
+   80          CONTINUE
+   90       CONTINUE
+         ELSE
+*
+*           Form  C := alpha*A'*B + beta*C
+*
+            DO 120, J = 1, N
+               DO 110, I = 1, M
+                  TEMP = ZERO
+                  DO 100, L = 1, K
+                     TEMP = TEMP + A( L, I )*B( L, J )
+  100             CONTINUE
+                  IF( BETA.EQ.ZERO )THEN
+                     C( I, J ) = ALPHA*TEMP
+                  ELSE
+                     C( I, J ) = ALPHA*TEMP + BETA*C( I, J )
+                  END IF
+  110          CONTINUE
+  120       CONTINUE
+         END IF
+      ELSE
+         IF( NOTA )THEN
+*
+*           Form  C := alpha*A*B' + beta*C
+*
+            DO 170, J = 1, N
+               IF( BETA.EQ.ZERO )THEN
+                  DO 130, I = 1, M
+                     C( I, J ) = ZERO
+  130             CONTINUE
+               ELSE IF( BETA.NE.ONE )THEN
+                  DO 140, I = 1, M
+                     C( I, J ) = BETA*C( I, J )
+  140             CONTINUE
+               END IF
+               DO 160, L = 1, K
+                  IF( B( J, L ).NE.ZERO )THEN
+                     TEMP = ALPHA*B( J, L )
+                     DO 150, I = 1, M
+                        C( I, J ) = C( I, J ) + TEMP*A( I, L )
+  150                CONTINUE
+                  END IF
+  160          CONTINUE
+  170       CONTINUE
+         ELSE
+*
+*           Form  C := alpha*A'*B' + beta*C
+*
+            DO 200, J = 1, N
+               DO 190, I = 1, M
+                  TEMP = ZERO
+                  DO 180, L = 1, K
+                     TEMP = TEMP + A( L, I )*B( J, L )
+  180             CONTINUE
+                  IF( BETA.EQ.ZERO )THEN
+                     C( I, J ) = ALPHA*TEMP
+                  ELSE
+                     C( I, J ) = ALPHA*TEMP + BETA*C( I, J )
+                  END IF
+  190          CONTINUE
+  200       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of SGEMM .
+*
+      END
diff --git a/costa/native/external/blas/sgemv.f b/costa/native/external/blas/sgemv.f
new file mode 100644
index 000000000..4b47f0470
--- /dev/null
+++ b/costa/native/external/blas/sgemv.f
@@ -0,0 +1,261 @@
+      SUBROUTINE SGEMV ( TRANS, M, N, ALPHA, A, LDA, X, INCX,
+     $                   BETA, Y, INCY )
+*     .. Scalar Arguments ..
+      REAL               ALPHA, BETA
+      INTEGER            INCX, INCY, LDA, M, N
+      CHARACTER*1        TRANS
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), X( * ), Y( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SGEMV  performs one of the matrix-vector operations
+*
+*     y := alpha*A*x + beta*y,   or   y := alpha*A'*x + beta*y,
+*
+*  where alpha and beta are scalars, x and y are vectors and A is an
+*  m by n matrix.
+*
+*  Parameters
+*  ==========
+*
+*  TRANS  - CHARACTER*1.
+*           On entry, TRANS specifies the operation to be performed as
+*           follows:
+*
+*              TRANS = 'N' or 'n'   y := alpha*A*x + beta*y.
+*
+*              TRANS = 'T' or 't'   y := alpha*A'*x + beta*y.
+*
+*              TRANS = 'C' or 'c'   y := alpha*A'*x + beta*y.
+*
+*           Unchanged on exit.
+*
+*  M      - INTEGER.
+*           On entry, M specifies the number of rows of the matrix A.
+*           M must be at least zero.
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the number of columns of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - REAL            .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  A      - REAL             array of DIMENSION ( LDA, n ).
+*           Before entry, the leading m by n part of the array A must
+*           contain the matrix of coefficients.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           max( 1, m ).
+*           Unchanged on exit.
+*
+*  X      - REAL             array of DIMENSION at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'
+*           and at least
+*           ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.
+*           Before entry, the incremented array X must contain the
+*           vector x.
+*           Unchanged on exit.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*  BETA   - REAL            .
+*           On entry, BETA specifies the scalar beta. When BETA is
+*           supplied as zero then Y need not be set on input.
+*           Unchanged on exit.
+*
+*  Y      - REAL             array of DIMENSION at least
+*           ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'
+*           and at least
+*           ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.
+*           Before entry with BETA non-zero, the incremented array Y
+*           must contain the vector y. On exit, Y is overwritten by the
+*           updated vector y.
+*
+*  INCY   - INTEGER.
+*           On entry, INCY specifies the increment for the elements of
+*           Y. INCY must not be zero.
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      REAL               ONE         , ZERO
+      PARAMETER        ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     .. Local Scalars ..
+      REAL               TEMP
+      INTEGER            I, INFO, IX, IY, J, JX, JY, KX, KY, LENX, LENY
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( TRANS, 'N' ).AND.
+     $         .NOT.LSAME( TRANS, 'T' ).AND.
+     $         .NOT.LSAME( TRANS, 'C' )      )THEN
+         INFO = 1
+      ELSE IF( M.LT.0 )THEN
+         INFO = 2
+      ELSE IF( N.LT.0 )THEN
+         INFO = 3
+      ELSE IF( LDA.LT.MAX( 1, M ) )THEN
+         INFO = 6
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 8
+      ELSE IF( INCY.EQ.0 )THEN
+         INFO = 11
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'SGEMV ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR.
+     $    ( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) )
+     $   RETURN
+*
+*     Set  LENX  and  LENY, the lengths of the vectors x and y, and set
+*     up the start points in  X  and  Y.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+         LENX = N
+         LENY = M
+      ELSE
+         LENX = M
+         LENY = N
+      END IF
+      IF( INCX.GT.0 )THEN
+         KX = 1
+      ELSE
+         KX = 1 - ( LENX - 1 )*INCX
+      END IF
+      IF( INCY.GT.0 )THEN
+         KY = 1
+      ELSE
+         KY = 1 - ( LENY - 1 )*INCY
+      END IF
+*
+*     Start the operations. In this version the elements of A are
+*     accessed sequentially with one pass through A.
+*
+*     First form  y := beta*y.
+*
+      IF( BETA.NE.ONE )THEN
+         IF( INCY.EQ.1 )THEN
+            IF( BETA.EQ.ZERO )THEN
+               DO 10, I = 1, LENY
+                  Y( I ) = ZERO
+   10          CONTINUE
+            ELSE
+               DO 20, I = 1, LENY
+                  Y( I ) = BETA*Y( I )
+   20          CONTINUE
+            END IF
+         ELSE
+            IY = KY
+            IF( BETA.EQ.ZERO )THEN
+               DO 30, I = 1, LENY
+                  Y( IY ) = ZERO
+                  IY      = IY   + INCY
+   30          CONTINUE
+            ELSE
+               DO 40, I = 1, LENY
+                  Y( IY ) = BETA*Y( IY )
+                  IY      = IY           + INCY
+   40          CONTINUE
+            END IF
+         END IF
+      END IF
+      IF( ALPHA.EQ.ZERO )
+     $   RETURN
+      IF( LSAME( TRANS, 'N' ) )THEN
+*
+*        Form  y := alpha*A*x + y.
+*
+         JX = KX
+         IF( INCY.EQ.1 )THEN
+            DO 60, J = 1, N
+               IF( X( JX ).NE.ZERO )THEN
+                  TEMP = ALPHA*X( JX )
+                  DO 50, I = 1, M
+                     Y( I ) = Y( I ) + TEMP*A( I, J )
+   50             CONTINUE
+               END IF
+               JX = JX + INCX
+   60       CONTINUE
+         ELSE
+            DO 80, J = 1, N
+               IF( X( JX ).NE.ZERO )THEN
+                  TEMP = ALPHA*X( JX )
+                  IY   = KY
+                  DO 70, I = 1, M
+                     Y( IY ) = Y( IY ) + TEMP*A( I, J )
+                     IY      = IY      + INCY
+   70             CONTINUE
+               END IF
+               JX = JX + INCX
+   80       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  y := alpha*A'*x + y.
+*
+         JY = KY
+         IF( INCX.EQ.1 )THEN
+            DO 100, J = 1, N
+               TEMP = ZERO
+               DO 90, I = 1, M
+                  TEMP = TEMP + A( I, J )*X( I )
+   90          CONTINUE
+               Y( JY ) = Y( JY ) + ALPHA*TEMP
+               JY      = JY      + INCY
+  100       CONTINUE
+         ELSE
+            DO 120, J = 1, N
+               TEMP = ZERO
+               IX   = KX
+               DO 110, I = 1, M
+                  TEMP = TEMP + A( I, J )*X( IX )
+                  IX   = IX   + INCX
+  110          CONTINUE
+               Y( JY ) = Y( JY ) + ALPHA*TEMP
+               JY      = JY      + INCY
+  120       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of SGEMV .
+*
+      END
diff --git a/costa/native/external/blas/sger.f b/costa/native/external/blas/sger.f
new file mode 100644
index 000000000..f336b417d
--- /dev/null
+++ b/costa/native/external/blas/sger.f
@@ -0,0 +1,157 @@
+      SUBROUTINE SGER  ( M, N, ALPHA, X, INCX, Y, INCY, A, LDA )
+*     .. Scalar Arguments ..
+      REAL               ALPHA
+      INTEGER            INCX, INCY, LDA, M, N
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), X( * ), Y( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SGER   performs the rank 1 operation
+*
+*     A := alpha*x*y' + A,
+*
+*  where alpha is a scalar, x is an m element vector, y is an n element
+*  vector and A is an m by n matrix.
+*
+*  Parameters
+*  ==========
+*
+*  M      - INTEGER.
+*           On entry, M specifies the number of rows of the matrix A.
+*           M must be at least zero.
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the number of columns of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - REAL            .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  X      - REAL             array of dimension at least
+*           ( 1 + ( m - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the m
+*           element vector x.
+*           Unchanged on exit.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*  Y      - REAL             array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCY ) ).
+*           Before entry, the incremented array Y must contain the n
+*           element vector y.
+*           Unchanged on exit.
+*
+*  INCY   - INTEGER.
+*           On entry, INCY specifies the increment for the elements of
+*           Y. INCY must not be zero.
+*           Unchanged on exit.
+*
+*  A      - REAL             array of DIMENSION ( LDA, n ).
+*           Before entry, the leading m by n part of the array A must
+*           contain the matrix of coefficients. On exit, A is
+*           overwritten by the updated matrix.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           max( 1, m ).
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      REAL               ZERO
+      PARAMETER        ( ZERO = 0.0E+0 )
+*     .. Local Scalars ..
+      REAL               TEMP
+      INTEGER            I, INFO, IX, J, JY, KX
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( M.LT.0 )THEN
+         INFO = 1
+      ELSE IF( N.LT.0 )THEN
+         INFO = 2
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 5
+      ELSE IF( INCY.EQ.0 )THEN
+         INFO = 7
+      ELSE IF( LDA.LT.MAX( 1, M ) )THEN
+         INFO = 9
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'SGER  ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR.( ALPHA.EQ.ZERO ) )
+     $   RETURN
+*
+*     Start the operations. In this version the elements of A are
+*     accessed sequentially with one pass through A.
+*
+      IF( INCY.GT.0 )THEN
+         JY = 1
+      ELSE
+         JY = 1 - ( N - 1 )*INCY
+      END IF
+      IF( INCX.EQ.1 )THEN
+         DO 20, J = 1, N
+            IF( Y( JY ).NE.ZERO )THEN
+               TEMP = ALPHA*Y( JY )
+               DO 10, I = 1, M
+                  A( I, J ) = A( I, J ) + X( I )*TEMP
+   10          CONTINUE
+            END IF
+            JY = JY + INCY
+   20    CONTINUE
+      ELSE
+         IF( INCX.GT.0 )THEN
+            KX = 1
+         ELSE
+            KX = 1 - ( M - 1 )*INCX
+         END IF
+         DO 40, J = 1, N
+            IF( Y( JY ).NE.ZERO )THEN
+               TEMP = ALPHA*Y( JY )
+               IX   = KX
+               DO 30, I = 1, M
+                  A( I, J ) = A( I, J ) + X( IX )*TEMP
+                  IX        = IX        + INCX
+   30          CONTINUE
+            END IF
+            JY = JY + INCY
+   40    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of SGER  .
+*
+      END
diff --git a/costa/native/external/blas/snrm2.f b/costa/native/external/blas/snrm2.f
new file mode 100644
index 000000000..81dc0cda6
--- /dev/null
+++ b/costa/native/external/blas/snrm2.f
@@ -0,0 +1,60 @@
+      REAL             FUNCTION SNRM2 ( N, X, INCX )
+*     .. Scalar Arguments ..
+      INTEGER                           INCX, N
+*     .. Array Arguments ..
+      REAL                              X( * )
+*     ..
+*
+*  SNRM2 returns the euclidean norm of a vector via the function
+*  name, so that
+*
+*     SNRM2 := sqrt( x'*x )
+*
+*
+*
+*  -- This version written on 25-October-1982.
+*     Modified on 14-October-1993 to inline the call to SLASSQ.
+*     Sven Hammarling, Nag Ltd.
+*
+*
+*     .. Parameters ..
+      REAL                  ONE         , ZERO
+      PARAMETER           ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     .. Local Scalars ..
+      INTEGER               IX
+      REAL                  ABSXI, NORM, SCALE, SSQ
+*     .. Intrinsic Functions ..
+      INTRINSIC             ABS, SQRT
+*     ..
+*     .. Executable Statements ..
+      IF( N.LT.1 .OR. INCX.LT.1 )THEN
+         NORM  = ZERO
+      ELSE IF( N.EQ.1 )THEN
+         NORM  = ABS( X( 1 ) )
+      ELSE
+         SCALE = ZERO
+         SSQ   = ONE
+*        The following loop is equivalent to this call to the LAPACK
+*        auxiliary routine:
+*        CALL SLASSQ( N, X, INCX, SCALE, SSQ )
+*
+         DO 10, IX = 1, 1 + ( N - 1 )*INCX, INCX
+            IF( X( IX ).NE.ZERO )THEN
+               ABSXI = ABS( X( IX ) )
+               IF( SCALE.LT.ABSXI )THEN
+                  SSQ   = ONE   + SSQ*( SCALE/ABSXI )**2
+                  SCALE = ABSXI
+               ELSE
+                  SSQ   = SSQ   +     ( ABSXI/SCALE )**2
+               END IF
+            END IF
+   10    CONTINUE
+         NORM  = SCALE * SQRT( SSQ )
+      END IF
+*
+      SNRM2 = NORM
+      RETURN
+*
+*     End of SNRM2.
+*
+      END
diff --git a/costa/native/external/blas/srot.f b/costa/native/external/blas/srot.f
new file mode 100644
index 000000000..e0ee1e50c
--- /dev/null
+++ b/costa/native/external/blas/srot.f
@@ -0,0 +1,37 @@
+      subroutine srot (n,sx,incx,sy,incy,c,s)
+c
+c     applies a plane rotation.
+c     jack dongarra, linpack, 3/11/78.
+c     modified 12/3/93, array(1) declarations changed to array(*)
+c
+      real sx(*),sy(*),stemp,c,s
+      integer i,incx,incy,ix,iy,n
+c
+      if(n.le.0)return
+      if(incx.eq.1.and.incy.eq.1)go to 20
+c
+c       code for unequal increments or equal increments not equal
+c         to 1
+c
+      ix = 1
+      iy = 1
+      if(incx.lt.0)ix = (-n+1)*incx + 1
+      if(incy.lt.0)iy = (-n+1)*incy + 1
+      do 10 i = 1,n
+        stemp = c*sx(ix) + s*sy(iy)
+        sy(iy) = c*sy(iy) - s*sx(ix)
+        sx(ix) = stemp
+        ix = ix + incx
+        iy = iy + incy
+   10 continue
+      return
+c
+c       code for both increments equal to 1
+c
+   20 do 30 i = 1,n
+        stemp = c*sx(i) + s*sy(i)
+        sy(i) = c*sy(i) - s*sx(i)
+        sx(i) = stemp
+   30 continue
+      return
+      end
diff --git a/costa/native/external/blas/srotg.f b/costa/native/external/blas/srotg.f
new file mode 100644
index 000000000..84d1922af
--- /dev/null
+++ b/costa/native/external/blas/srotg.f
@@ -0,0 +1,27 @@
+      subroutine srotg(sa,sb,c,s)
+c
+c     construct givens plane rotation.
+c     jack dongarra, linpack, 3/11/78.
+c
+      real sa,sb,c,s,roe,scale,r,z
+c
+      roe = sb
+      if( abs(sa) .gt. abs(sb) ) roe = sa
+      scale = abs(sa) + abs(sb)
+      if( scale .ne. 0.0 ) go to 10
+         c = 1.0
+         s = 0.0
+         r = 0.0
+         z = 0.0
+         go to 20
+   10 r = scale*sqrt((sa/scale)**2 + (sb/scale)**2)
+      r = sign(1.0,roe)*r
+      c = sa/r
+      s = sb/r
+      z = 1.0
+      if( abs(sa) .gt. abs(sb) ) z = s
+      if( abs(sb) .ge. abs(sa) .and. c .ne. 0.0 ) z = 1.0/c
+   20 sa = r
+      sb = z
+      return
+      end
diff --git a/costa/native/external/blas/ssbmv.f b/costa/native/external/blas/ssbmv.f
new file mode 100644
index 000000000..7761be874
--- /dev/null
+++ b/costa/native/external/blas/ssbmv.f
@@ -0,0 +1,303 @@
+      SUBROUTINE SSBMV ( UPLO, N, K, ALPHA, A, LDA, X, INCX,
+     $                   BETA, Y, INCY )
+*     .. Scalar Arguments ..
+      REAL               ALPHA, BETA
+      INTEGER            INCX, INCY, K, LDA, N
+      CHARACTER*1        UPLO
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), X( * ), Y( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SSBMV  performs the matrix-vector  operation
+*
+*     y := alpha*A*x + beta*y,
+*
+*  where alpha and beta are scalars, x and y are n element vectors and
+*  A is an n by n symmetric band matrix, with k super-diagonals.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the upper or lower
+*           triangular part of the band matrix A is being supplied as
+*           follows:
+*
+*              UPLO = 'U' or 'u'   The upper triangular part of A is
+*                                  being supplied.
+*
+*              UPLO = 'L' or 'l'   The lower triangular part of A is
+*                                  being supplied.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  K      - INTEGER.
+*           On entry, K specifies the number of super-diagonals of the
+*           matrix A. K must satisfy  0 .le. K.
+*           Unchanged on exit.
+*
+*  ALPHA  - REAL            .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  A      - REAL             array of DIMENSION ( LDA, n ).
+*           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 )
+*           by n part of the array A must contain the upper triangular
+*           band part of the symmetric matrix, supplied column by
+*           column, with the leading diagonal of the matrix in row
+*           ( k + 1 ) of the array, the first super-diagonal starting at
+*           position 2 in row k, and so on. The top left k by k triangle
+*           of the array A is not referenced.
+*           The following program segment will transfer the upper
+*           triangular part of a symmetric band matrix from conventional
+*           full matrix storage to band storage:
+*
+*                 DO 20, J = 1, N
+*                    M = K + 1 - J
+*                    DO 10, I = MAX( 1, J - K ), J
+*                       A( M + I, J ) = matrix( I, J )
+*              10    CONTINUE
+*              20 CONTINUE
+*
+*           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 )
+*           by n part of the array A must contain the lower triangular
+*           band part of the symmetric matrix, supplied column by
+*           column, with the leading diagonal of the matrix in row 1 of
+*           the array, the first sub-diagonal starting at position 1 in
+*           row 2, and so on. The bottom right k by k triangle of the
+*           array A is not referenced.
+*           The following program segment will transfer the lower
+*           triangular part of a symmetric band matrix from conventional
+*           full matrix storage to band storage:
+*
+*                 DO 20, J = 1, N
+*                    M = 1 - J
+*                    DO 10, I = J, MIN( N, J + K )
+*                       A( M + I, J ) = matrix( I, J )
+*              10    CONTINUE
+*              20 CONTINUE
+*
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           ( k + 1 ).
+*           Unchanged on exit.
+*
+*  X      - REAL             array of DIMENSION at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the
+*           vector x.
+*           Unchanged on exit.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*  BETA   - REAL            .
+*           On entry, BETA specifies the scalar beta.
+*           Unchanged on exit.
+*
+*  Y      - REAL             array of DIMENSION at least
+*           ( 1 + ( n - 1 )*abs( INCY ) ).
+*           Before entry, the incremented array Y must contain the
+*           vector y. On exit, Y is overwritten by the updated vector y.
+*
+*  INCY   - INTEGER.
+*           On entry, INCY specifies the increment for the elements of
+*           Y. INCY must not be zero.
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      REAL               ONE         , ZERO
+      PARAMETER        ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     .. Local Scalars ..
+      REAL               TEMP1, TEMP2
+      INTEGER            I, INFO, IX, IY, J, JX, JY, KPLUS1, KX, KY, L
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO, 'U' ).AND.
+     $         .NOT.LSAME( UPLO, 'L' )      )THEN
+         INFO = 1
+      ELSE IF( N.LT.0 )THEN
+         INFO = 2
+      ELSE IF( K.LT.0 )THEN
+         INFO = 3
+      ELSE IF( LDA.LT.( K + 1 ) )THEN
+         INFO = 6
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 8
+      ELSE IF( INCY.EQ.0 )THEN
+         INFO = 11
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'SSBMV ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( N.EQ.0 ).OR.( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) )
+     $   RETURN
+*
+*     Set up the start points in  X  and  Y.
+*
+      IF( INCX.GT.0 )THEN
+         KX = 1
+      ELSE
+         KX = 1 - ( N - 1 )*INCX
+      END IF
+      IF( INCY.GT.0 )THEN
+         KY = 1
+      ELSE
+         KY = 1 - ( N - 1 )*INCY
+      END IF
+*
+*     Start the operations. In this version the elements of the array A
+*     are accessed sequentially with one pass through A.
+*
+*     First form  y := beta*y.
+*
+      IF( BETA.NE.ONE )THEN
+         IF( INCY.EQ.1 )THEN
+            IF( BETA.EQ.ZERO )THEN
+               DO 10, I = 1, N
+                  Y( I ) = ZERO
+   10          CONTINUE
+            ELSE
+               DO 20, I = 1, N
+                  Y( I ) = BETA*Y( I )
+   20          CONTINUE
+            END IF
+         ELSE
+            IY = KY
+            IF( BETA.EQ.ZERO )THEN
+               DO 30, I = 1, N
+                  Y( IY ) = ZERO
+                  IY      = IY   + INCY
+   30          CONTINUE
+            ELSE
+               DO 40, I = 1, N
+                  Y( IY ) = BETA*Y( IY )
+                  IY      = IY           + INCY
+   40          CONTINUE
+            END IF
+         END IF
+      END IF
+      IF( ALPHA.EQ.ZERO )
+     $   RETURN
+      IF( LSAME( UPLO, 'U' ) )THEN
+*
+*        Form  y  when upper triangle of A is stored.
+*
+         KPLUS1 = K + 1
+         IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN
+            DO 60, J = 1, N
+               TEMP1 = ALPHA*X( J )
+               TEMP2 = ZERO
+               L     = KPLUS1 - J
+               DO 50, I = MAX( 1, J - K ), J - 1
+                  Y( I ) = Y( I ) + TEMP1*A( L + I, J )
+                  TEMP2  = TEMP2  + A( L + I, J )*X( I )
+   50          CONTINUE
+               Y( J ) = Y( J ) + TEMP1*A( KPLUS1, J ) + ALPHA*TEMP2
+   60       CONTINUE
+         ELSE
+            JX = KX
+            JY = KY
+            DO 80, J = 1, N
+               TEMP1 = ALPHA*X( JX )
+               TEMP2 = ZERO
+               IX    = KX
+               IY    = KY
+               L     = KPLUS1 - J
+               DO 70, I = MAX( 1, J - K ), J - 1
+                  Y( IY ) = Y( IY ) + TEMP1*A( L + I, J )
+                  TEMP2   = TEMP2   + A( L + I, J )*X( IX )
+                  IX      = IX      + INCX
+                  IY      = IY      + INCY
+   70          CONTINUE
+               Y( JY ) = Y( JY ) + TEMP1*A( KPLUS1, J ) + ALPHA*TEMP2
+               JX      = JX      + INCX
+               JY      = JY      + INCY
+               IF( J.GT.K )THEN
+                  KX = KX + INCX
+                  KY = KY + INCY
+               END IF
+   80       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  y  when lower triangle of A is stored.
+*
+         IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN
+            DO 100, J = 1, N
+               TEMP1  = ALPHA*X( J )
+               TEMP2  = ZERO
+               Y( J ) = Y( J )       + TEMP1*A( 1, J )
+               L      = 1            - J
+               DO 90, I = J + 1, MIN( N, J + K )
+                  Y( I ) = Y( I ) + TEMP1*A( L + I, J )
+                  TEMP2  = TEMP2  + A( L + I, J )*X( I )
+   90          CONTINUE
+               Y( J ) = Y( J ) + ALPHA*TEMP2
+  100       CONTINUE
+         ELSE
+            JX = KX
+            JY = KY
+            DO 120, J = 1, N
+               TEMP1   = ALPHA*X( JX )
+               TEMP2   = ZERO
+               Y( JY ) = Y( JY )       + TEMP1*A( 1, J )
+               L       = 1             - J
+               IX      = JX
+               IY      = JY
+               DO 110, I = J + 1, MIN( N, J + K )
+                  IX      = IX      + INCX
+                  IY      = IY      + INCY
+                  Y( IY ) = Y( IY ) + TEMP1*A( L + I, J )
+                  TEMP2   = TEMP2   + A( L + I, J )*X( IX )
+  110          CONTINUE
+               Y( JY ) = Y( JY ) + ALPHA*TEMP2
+               JX      = JX      + INCX
+               JY      = JY      + INCY
+  120       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of SSBMV .
+*
+      END
diff --git a/costa/native/external/blas/sscal.f b/costa/native/external/blas/sscal.f
new file mode 100644
index 000000000..ac7ee0010
--- /dev/null
+++ b/costa/native/external/blas/sscal.f
@@ -0,0 +1,43 @@
+      subroutine sscal(n,sa,sx,incx)
+c
+c     scales a vector by a constant.
+c     uses unrolled loops for increment equal to 1.
+c     jack dongarra, linpack, 3/11/78.
+c     modified 3/93 to return if incx .le. 0.
+c     modified 12/3/93, array(1) declarations changed to array(*)
+c
+      real sa,sx(*)
+      integer i,incx,m,mp1,n,nincx
+c
+      if( n.le.0 .or. incx.le.0 )return
+      if(incx.eq.1)go to 20
+c
+c        code for increment not equal to 1
+c
+      nincx = n*incx
+      do 10 i = 1,nincx,incx
+        sx(i) = sa*sx(i)
+   10 continue
+      return
+c
+c        code for increment equal to 1
+c
+c
+c        clean-up loop
+c
+   20 m = mod(n,5)
+      if( m .eq. 0 ) go to 40
+      do 30 i = 1,m
+        sx(i) = sa*sx(i)
+   30 continue
+      if( n .lt. 5 ) return
+   40 mp1 = m + 1
+      do 50 i = mp1,n,5
+        sx(i) = sa*sx(i)
+        sx(i + 1) = sa*sx(i + 1)
+        sx(i + 2) = sa*sx(i + 2)
+        sx(i + 3) = sa*sx(i + 3)
+        sx(i + 4) = sa*sx(i + 4)
+   50 continue
+      return
+      end
diff --git a/costa/native/external/blas/sspmv.f b/costa/native/external/blas/sspmv.f
new file mode 100644
index 000000000..f7bd25e11
--- /dev/null
+++ b/costa/native/external/blas/sspmv.f
@@ -0,0 +1,262 @@
+      SUBROUTINE SSPMV ( UPLO, N, ALPHA, AP, X, INCX, BETA, Y, INCY )
+*     .. Scalar Arguments ..
+      REAL               ALPHA, BETA
+      INTEGER            INCX, INCY, N
+      CHARACTER*1        UPLO
+*     .. Array Arguments ..
+      REAL               AP( * ), X( * ), Y( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SSPMV  performs the matrix-vector operation
+*
+*     y := alpha*A*x + beta*y,
+*
+*  where alpha and beta are scalars, x and y are n element vectors and
+*  A is an n by n symmetric matrix, supplied in packed form.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the upper or lower
+*           triangular part of the matrix A is supplied in the packed
+*           array AP as follows:
+*
+*              UPLO = 'U' or 'u'   The upper triangular part of A is
+*                                  supplied in AP.
+*
+*              UPLO = 'L' or 'l'   The lower triangular part of A is
+*                                  supplied in AP.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - REAL            .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  AP     - REAL             array of DIMENSION at least
+*           ( ( n*( n + 1 ) )/2 ).
+*           Before entry with UPLO = 'U' or 'u', the array AP must
+*           contain the upper triangular part of the symmetric matrix
+*           packed sequentially, column by column, so that AP( 1 )
+*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 )
+*           and a( 2, 2 ) respectively, and so on.
+*           Before entry with UPLO = 'L' or 'l', the array AP must
+*           contain the lower triangular part of the symmetric matrix
+*           packed sequentially, column by column, so that AP( 1 )
+*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 )
+*           and a( 3, 1 ) respectively, and so on.
+*           Unchanged on exit.
+*
+*  X      - REAL             array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element vector x.
+*           Unchanged on exit.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*  BETA   - REAL            .
+*           On entry, BETA specifies the scalar beta. When BETA is
+*           supplied as zero then Y need not be set on input.
+*           Unchanged on exit.
+*
+*  Y      - REAL             array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCY ) ).
+*           Before entry, the incremented array Y must contain the n
+*           element vector y. On exit, Y is overwritten by the updated
+*           vector y.
+*
+*  INCY   - INTEGER.
+*           On entry, INCY specifies the increment for the elements of
+*           Y. INCY must not be zero.
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      REAL               ONE         , ZERO
+      PARAMETER        ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     .. Local Scalars ..
+      REAL               TEMP1, TEMP2
+      INTEGER            I, INFO, IX, IY, J, JX, JY, K, KK, KX, KY
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO, 'U' ).AND.
+     $         .NOT.LSAME( UPLO, 'L' )      )THEN
+         INFO = 1
+      ELSE IF( N.LT.0 )THEN
+         INFO = 2
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 6
+      ELSE IF( INCY.EQ.0 )THEN
+         INFO = 9
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'SSPMV ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( N.EQ.0 ).OR.( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) )
+     $   RETURN
+*
+*     Set up the start points in  X  and  Y.
+*
+      IF( INCX.GT.0 )THEN
+         KX = 1
+      ELSE
+         KX = 1 - ( N - 1 )*INCX
+      END IF
+      IF( INCY.GT.0 )THEN
+         KY = 1
+      ELSE
+         KY = 1 - ( N - 1 )*INCY
+      END IF
+*
+*     Start the operations. In this version the elements of the array AP
+*     are accessed sequentially with one pass through AP.
+*
+*     First form  y := beta*y.
+*
+      IF( BETA.NE.ONE )THEN
+         IF( INCY.EQ.1 )THEN
+            IF( BETA.EQ.ZERO )THEN
+               DO 10, I = 1, N
+                  Y( I ) = ZERO
+   10          CONTINUE
+            ELSE
+               DO 20, I = 1, N
+                  Y( I ) = BETA*Y( I )
+   20          CONTINUE
+            END IF
+         ELSE
+            IY = KY
+            IF( BETA.EQ.ZERO )THEN
+               DO 30, I = 1, N
+                  Y( IY ) = ZERO
+                  IY      = IY   + INCY
+   30          CONTINUE
+            ELSE
+               DO 40, I = 1, N
+                  Y( IY ) = BETA*Y( IY )
+                  IY      = IY           + INCY
+   40          CONTINUE
+            END IF
+         END IF
+      END IF
+      IF( ALPHA.EQ.ZERO )
+     $   RETURN
+      KK = 1
+      IF( LSAME( UPLO, 'U' ) )THEN
+*
+*        Form  y  when AP contains the upper triangle.
+*
+         IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN
+            DO 60, J = 1, N
+               TEMP1 = ALPHA*X( J )
+               TEMP2 = ZERO
+               K     = KK
+               DO 50, I = 1, J - 1
+                  Y( I ) = Y( I ) + TEMP1*AP( K )
+                  TEMP2  = TEMP2  + AP( K )*X( I )
+                  K      = K      + 1
+   50          CONTINUE
+               Y( J ) = Y( J ) + TEMP1*AP( KK + J - 1 ) + ALPHA*TEMP2
+               KK     = KK     + J
+   60       CONTINUE
+         ELSE
+            JX = KX
+            JY = KY
+            DO 80, J = 1, N
+               TEMP1 = ALPHA*X( JX )
+               TEMP2 = ZERO
+               IX    = KX
+               IY    = KY
+               DO 70, K = KK, KK + J - 2
+                  Y( IY ) = Y( IY ) + TEMP1*AP( K )
+                  TEMP2   = TEMP2   + AP( K )*X( IX )
+                  IX      = IX      + INCX
+                  IY      = IY      + INCY
+   70          CONTINUE
+               Y( JY ) = Y( JY ) + TEMP1*AP( KK + J - 1 ) + ALPHA*TEMP2
+               JX      = JX      + INCX
+               JY      = JY      + INCY
+               KK      = KK      + J
+   80       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  y  when AP contains the lower triangle.
+*
+         IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN
+            DO 100, J = 1, N
+               TEMP1  = ALPHA*X( J )
+               TEMP2  = ZERO
+               Y( J ) = Y( J )       + TEMP1*AP( KK )
+               K      = KK           + 1
+               DO 90, I = J + 1, N
+                  Y( I ) = Y( I ) + TEMP1*AP( K )
+                  TEMP2  = TEMP2  + AP( K )*X( I )
+                  K      = K      + 1
+   90          CONTINUE
+               Y( J ) = Y( J ) + ALPHA*TEMP2
+               KK     = KK     + ( N - J + 1 )
+  100       CONTINUE
+         ELSE
+            JX = KX
+            JY = KY
+            DO 120, J = 1, N
+               TEMP1   = ALPHA*X( JX )
+               TEMP2   = ZERO
+               Y( JY ) = Y( JY )       + TEMP1*AP( KK )
+               IX      = JX
+               IY      = JY
+               DO 110, K = KK + 1, KK + N - J
+                  IX      = IX      + INCX
+                  IY      = IY      + INCY
+                  Y( IY ) = Y( IY ) + TEMP1*AP( K )
+                  TEMP2   = TEMP2   + AP( K )*X( IX )
+  110          CONTINUE
+               Y( JY ) = Y( JY ) + ALPHA*TEMP2
+               JX      = JX      + INCX
+               JY      = JY      + INCY
+               KK      = KK      + ( N - J + 1 )
+  120       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of SSPMV .
+*
+      END
diff --git a/costa/native/external/blas/sspr.f b/costa/native/external/blas/sspr.f
new file mode 100644
index 000000000..e3865daba
--- /dev/null
+++ b/costa/native/external/blas/sspr.f
@@ -0,0 +1,198 @@
+      SUBROUTINE SSPR  ( UPLO, N, ALPHA, X, INCX, AP )
+*     .. Scalar Arguments ..
+      REAL               ALPHA
+      INTEGER            INCX, N
+      CHARACTER*1        UPLO
+*     .. Array Arguments ..
+      REAL               AP( * ), X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SSPR    performs the symmetric rank 1 operation
+*
+*     A := alpha*x*x' + A,
+*
+*  where alpha is a real scalar, x is an n element vector and A is an
+*  n by n symmetric matrix, supplied in packed form.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the upper or lower
+*           triangular part of the matrix A is supplied in the packed
+*           array AP as follows:
+*
+*              UPLO = 'U' or 'u'   The upper triangular part of A is
+*                                  supplied in AP.
+*
+*              UPLO = 'L' or 'l'   The lower triangular part of A is
+*                                  supplied in AP.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - REAL            .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  X      - REAL             array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element vector x.
+*           Unchanged on exit.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*  AP     - REAL             array of DIMENSION at least
+*           ( ( n*( n + 1 ) )/2 ).
+*           Before entry with  UPLO = 'U' or 'u', the array AP must
+*           contain the upper triangular part of the symmetric matrix
+*           packed sequentially, column by column, so that AP( 1 )
+*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 )
+*           and a( 2, 2 ) respectively, and so on. On exit, the array
+*           AP is overwritten by the upper triangular part of the
+*           updated matrix.
+*           Before entry with UPLO = 'L' or 'l', the array AP must
+*           contain the lower triangular part of the symmetric matrix
+*           packed sequentially, column by column, so that AP( 1 )
+*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 )
+*           and a( 3, 1 ) respectively, and so on. On exit, the array
+*           AP is overwritten by the lower triangular part of the
+*           updated matrix.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      REAL               ZERO
+      PARAMETER        ( ZERO = 0.0E+0 )
+*     .. Local Scalars ..
+      REAL               TEMP
+      INTEGER            I, INFO, IX, J, JX, K, KK, KX
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO, 'U' ).AND.
+     $         .NOT.LSAME( UPLO, 'L' )      )THEN
+         INFO = 1
+      ELSE IF( N.LT.0 )THEN
+         INFO = 2
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 5
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'SSPR  ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( N.EQ.0 ).OR.( ALPHA.EQ.ZERO ) )
+     $   RETURN
+*
+*     Set the start point in X if the increment is not unity.
+*
+      IF( INCX.LE.0 )THEN
+         KX = 1 - ( N - 1 )*INCX
+      ELSE IF( INCX.NE.1 )THEN
+         KX = 1
+      END IF
+*
+*     Start the operations. In this version the elements of the array AP
+*     are accessed sequentially with one pass through AP.
+*
+      KK = 1
+      IF( LSAME( UPLO, 'U' ) )THEN
+*
+*        Form  A  when upper triangle is stored in AP.
+*
+         IF( INCX.EQ.1 )THEN
+            DO 20, J = 1, N
+               IF( X( J ).NE.ZERO )THEN
+                  TEMP = ALPHA*X( J )
+                  K    = KK
+                  DO 10, I = 1, J
+                     AP( K ) = AP( K ) + X( I )*TEMP
+                     K       = K       + 1
+   10             CONTINUE
+               END IF
+               KK = KK + J
+   20       CONTINUE
+         ELSE
+            JX = KX
+            DO 40, J = 1, N
+               IF( X( JX ).NE.ZERO )THEN
+                  TEMP = ALPHA*X( JX )
+                  IX   = KX
+                  DO 30, K = KK, KK + J - 1
+                     AP( K ) = AP( K ) + X( IX )*TEMP
+                     IX      = IX      + INCX
+   30             CONTINUE
+               END IF
+               JX = JX + INCX
+               KK = KK + J
+   40       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  A  when lower triangle is stored in AP.
+*
+         IF( INCX.EQ.1 )THEN
+            DO 60, J = 1, N
+               IF( X( J ).NE.ZERO )THEN
+                  TEMP = ALPHA*X( J )
+                  K    = KK
+                  DO 50, I = J, N
+                     AP( K ) = AP( K ) + X( I )*TEMP
+                     K       = K       + 1
+   50             CONTINUE
+               END IF
+               KK = KK + N - J + 1
+   60       CONTINUE
+         ELSE
+            JX = KX
+            DO 80, J = 1, N
+               IF( X( JX ).NE.ZERO )THEN
+                  TEMP = ALPHA*X( JX )
+                  IX   = JX
+                  DO 70, K = KK, KK + N - J
+                     AP( K ) = AP( K ) + X( IX )*TEMP
+                     IX      = IX      + INCX
+   70             CONTINUE
+               END IF
+               JX = JX + INCX
+               KK = KK + N - J + 1
+   80       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of SSPR  .
+*
+      END
diff --git a/costa/native/external/blas/sspr2.f b/costa/native/external/blas/sspr2.f
new file mode 100644
index 000000000..8be0098b1
--- /dev/null
+++ b/costa/native/external/blas/sspr2.f
@@ -0,0 +1,229 @@
+      SUBROUTINE SSPR2 ( UPLO, N, ALPHA, X, INCX, Y, INCY, AP )
+*     .. Scalar Arguments ..
+      REAL               ALPHA
+      INTEGER            INCX, INCY, N
+      CHARACTER*1        UPLO
+*     .. Array Arguments ..
+      REAL               AP( * ), X( * ), Y( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SSPR2  performs the symmetric rank 2 operation
+*
+*     A := alpha*x*y' + alpha*y*x' + A,
+*
+*  where alpha is a scalar, x and y are n element vectors and A is an
+*  n by n symmetric matrix, supplied in packed form.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the upper or lower
+*           triangular part of the matrix A is supplied in the packed
+*           array AP as follows:
+*
+*              UPLO = 'U' or 'u'   The upper triangular part of A is
+*                                  supplied in AP.
+*
+*              UPLO = 'L' or 'l'   The lower triangular part of A is
+*                                  supplied in AP.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - REAL            .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  X      - REAL             array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element vector x.
+*           Unchanged on exit.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*  Y      - REAL             array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCY ) ).
+*           Before entry, the incremented array Y must contain the n
+*           element vector y.
+*           Unchanged on exit.
+*
+*  INCY   - INTEGER.
+*           On entry, INCY specifies the increment for the elements of
+*           Y. INCY must not be zero.
+*           Unchanged on exit.
+*
+*  AP     - REAL             array of DIMENSION at least
+*           ( ( n*( n + 1 ) )/2 ).
+*           Before entry with  UPLO = 'U' or 'u', the array AP must
+*           contain the upper triangular part of the symmetric matrix
+*           packed sequentially, column by column, so that AP( 1 )
+*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 )
+*           and a( 2, 2 ) respectively, and so on. On exit, the array
+*           AP is overwritten by the upper triangular part of the
+*           updated matrix.
+*           Before entry with UPLO = 'L' or 'l', the array AP must
+*           contain the lower triangular part of the symmetric matrix
+*           packed sequentially, column by column, so that AP( 1 )
+*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 )
+*           and a( 3, 1 ) respectively, and so on. On exit, the array
+*           AP is overwritten by the lower triangular part of the
+*           updated matrix.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      REAL               ZERO
+      PARAMETER        ( ZERO = 0.0E+0 )
+*     .. Local Scalars ..
+      REAL               TEMP1, TEMP2
+      INTEGER            I, INFO, IX, IY, J, JX, JY, K, KK, KX, KY
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO, 'U' ).AND.
+     $         .NOT.LSAME( UPLO, 'L' )      )THEN
+         INFO = 1
+      ELSE IF( N.LT.0 )THEN
+         INFO = 2
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 5
+      ELSE IF( INCY.EQ.0 )THEN
+         INFO = 7
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'SSPR2 ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( N.EQ.0 ).OR.( ALPHA.EQ.ZERO ) )
+     $   RETURN
+*
+*     Set up the start points in X and Y if the increments are not both
+*     unity.
+*
+      IF( ( INCX.NE.1 ).OR.( INCY.NE.1 ) )THEN
+         IF( INCX.GT.0 )THEN
+            KX = 1
+         ELSE
+            KX = 1 - ( N - 1 )*INCX
+         END IF
+         IF( INCY.GT.0 )THEN
+            KY = 1
+         ELSE
+            KY = 1 - ( N - 1 )*INCY
+         END IF
+         JX = KX
+         JY = KY
+      END IF
+*
+*     Start the operations. In this version the elements of the array AP
+*     are accessed sequentially with one pass through AP.
+*
+      KK = 1
+      IF( LSAME( UPLO, 'U' ) )THEN
+*
+*        Form  A  when upper triangle is stored in AP.
+*
+         IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN
+            DO 20, J = 1, N
+               IF( ( X( J ).NE.ZERO ).OR.( Y( J ).NE.ZERO ) )THEN
+                  TEMP1 = ALPHA*Y( J )
+                  TEMP2 = ALPHA*X( J )
+                  K     = KK
+                  DO 10, I = 1, J
+                     AP( K ) = AP( K ) + X( I )*TEMP1 + Y( I )*TEMP2
+                     K       = K       + 1
+   10             CONTINUE
+               END IF
+               KK = KK + J
+   20       CONTINUE
+         ELSE
+            DO 40, J = 1, N
+               IF( ( X( JX ).NE.ZERO ).OR.( Y( JY ).NE.ZERO ) )THEN
+                  TEMP1 = ALPHA*Y( JY )
+                  TEMP2 = ALPHA*X( JX )
+                  IX    = KX
+                  IY    = KY
+                  DO 30, K = KK, KK + J - 1
+                     AP( K ) = AP( K ) + X( IX )*TEMP1 + Y( IY )*TEMP2
+                     IX      = IX      + INCX
+                     IY      = IY      + INCY
+   30             CONTINUE
+               END IF
+               JX = JX + INCX
+               JY = JY + INCY
+               KK = KK + J
+   40       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  A  when lower triangle is stored in AP.
+*
+         IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN
+            DO 60, J = 1, N
+               IF( ( X( J ).NE.ZERO ).OR.( Y( J ).NE.ZERO ) )THEN
+                  TEMP1 = ALPHA*Y( J )
+                  TEMP2 = ALPHA*X( J )
+                  K     = KK
+                  DO 50, I = J, N
+                     AP( K ) = AP( K ) + X( I )*TEMP1 + Y( I )*TEMP2
+                     K       = K       + 1
+   50             CONTINUE
+               END IF
+               KK = KK + N - J + 1
+   60       CONTINUE
+         ELSE
+            DO 80, J = 1, N
+               IF( ( X( JX ).NE.ZERO ).OR.( Y( JY ).NE.ZERO ) )THEN
+                  TEMP1 = ALPHA*Y( JY )
+                  TEMP2 = ALPHA*X( JX )
+                  IX    = JX
+                  IY    = JY
+                  DO 70, K = KK, KK + N - J
+                     AP( K ) = AP( K ) + X( IX )*TEMP1 + Y( IY )*TEMP2
+                     IX      = IX      + INCX
+                     IY      = IY      + INCY
+   70             CONTINUE
+               END IF
+               JX = JX + INCX
+               JY = JY + INCY
+               KK = KK + N - J + 1
+   80       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of SSPR2 .
+*
+      END
diff --git a/costa/native/external/blas/sswap.f b/costa/native/external/blas/sswap.f
new file mode 100644
index 000000000..ef0722272
--- /dev/null
+++ b/costa/native/external/blas/sswap.f
@@ -0,0 +1,56 @@
+      subroutine sswap (n,sx,incx,sy,incy)
+c
+c     interchanges two vectors.
+c     uses unrolled loops for increments equal to 1.
+c     jack dongarra, linpack, 3/11/78.
+c     modified 12/3/93, array(1) declarations changed to array(*)
+c
+      real sx(*),sy(*),stemp
+      integer i,incx,incy,ix,iy,m,mp1,n
+c
+      if(n.le.0)return
+      if(incx.eq.1.and.incy.eq.1)go to 20
+c
+c       code for unequal increments or equal increments not equal
+c         to 1
+c
+      ix = 1
+      iy = 1
+      if(incx.lt.0)ix = (-n+1)*incx + 1
+      if(incy.lt.0)iy = (-n+1)*incy + 1
+      do 10 i = 1,n
+        stemp = sx(ix)
+        sx(ix) = sy(iy)
+        sy(iy) = stemp
+        ix = ix + incx
+        iy = iy + incy
+   10 continue
+      return
+c
+c       code for both increments equal to 1
+c
+c
+c       clean-up loop
+c
+   20 m = mod(n,3)
+      if( m .eq. 0 ) go to 40
+      do 30 i = 1,m
+        stemp = sx(i)
+        sx(i) = sy(i)
+        sy(i) = stemp
+   30 continue
+      if( n .lt. 3 ) return
+   40 mp1 = m + 1
+      do 50 i = mp1,n,3
+        stemp = sx(i)
+        sx(i) = sy(i)
+        sy(i) = stemp
+        stemp = sx(i + 1)
+        sx(i + 1) = sy(i + 1)
+        sy(i + 1) = stemp
+        stemp = sx(i + 2)
+        sx(i + 2) = sy(i + 2)
+        sy(i + 2) = stemp
+   50 continue
+      return
+      end
diff --git a/costa/native/external/blas/ssymm.f b/costa/native/external/blas/ssymm.f
new file mode 100644
index 000000000..861bef906
--- /dev/null
+++ b/costa/native/external/blas/ssymm.f
@@ -0,0 +1,294 @@
+      SUBROUTINE SSYMM ( SIDE, UPLO, M, N, ALPHA, A, LDA, B, LDB,
+     $                   BETA, C, LDC )
+*     .. Scalar Arguments ..
+      CHARACTER*1        SIDE, UPLO
+      INTEGER            M, N, LDA, LDB, LDC
+      REAL               ALPHA, BETA
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), B( LDB, * ), C( LDC, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SSYMM  performs one of the matrix-matrix operations
+*
+*     C := alpha*A*B + beta*C,
+*
+*  or
+*
+*     C := alpha*B*A + beta*C,
+*
+*  where alpha and beta are scalars,  A is a symmetric matrix and  B and
+*  C are  m by n matrices.
+*
+*  Parameters
+*  ==========
+*
+*  SIDE   - CHARACTER*1.
+*           On entry,  SIDE  specifies whether  the  symmetric matrix  A
+*           appears on the  left or right  in the  operation as follows:
+*
+*              SIDE = 'L' or 'l'   C := alpha*A*B + beta*C,
+*
+*              SIDE = 'R' or 'r'   C := alpha*B*A + beta*C,
+*
+*           Unchanged on exit.
+*
+*  UPLO   - CHARACTER*1.
+*           On  entry,   UPLO  specifies  whether  the  upper  or  lower
+*           triangular  part  of  the  symmetric  matrix   A  is  to  be
+*           referenced as follows:
+*
+*              UPLO = 'U' or 'u'   Only the upper triangular part of the
+*                                  symmetric matrix is to be referenced.
+*
+*              UPLO = 'L' or 'l'   Only the lower triangular part of the
+*                                  symmetric matrix is to be referenced.
+*
+*           Unchanged on exit.
+*
+*  M      - INTEGER.
+*           On entry,  M  specifies the number of rows of the matrix  C.
+*           M  must be at least zero.
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the number of columns of the matrix C.
+*           N  must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - REAL            .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  A      - REAL             array of DIMENSION ( LDA, ka ), where ka is
+*           m  when  SIDE = 'L' or 'l'  and is  n otherwise.
+*           Before entry  with  SIDE = 'L' or 'l',  the  m by m  part of
+*           the array  A  must contain the  symmetric matrix,  such that
+*           when  UPLO = 'U' or 'u', the leading m by m upper triangular
+*           part of the array  A  must contain the upper triangular part
+*           of the  symmetric matrix and the  strictly  lower triangular
+*           part of  A  is not referenced,  and when  UPLO = 'L' or 'l',
+*           the leading  m by m  lower triangular part  of the  array  A
+*           must  contain  the  lower triangular part  of the  symmetric
+*           matrix and the  strictly upper triangular part of  A  is not
+*           referenced.
+*           Before entry  with  SIDE = 'R' or 'r',  the  n by n  part of
+*           the array  A  must contain the  symmetric matrix,  such that
+*           when  UPLO = 'U' or 'u', the leading n by n upper triangular
+*           part of the array  A  must contain the upper triangular part
+*           of the  symmetric matrix and the  strictly  lower triangular
+*           part of  A  is not referenced,  and when  UPLO = 'L' or 'l',
+*           the leading  n by n  lower triangular part  of the  array  A
+*           must  contain  the  lower triangular part  of the  symmetric
+*           matrix and the  strictly upper triangular part of  A  is not
+*           referenced.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program.  When  SIDE = 'L' or 'l'  then
+*           LDA must be at least  max( 1, m ), otherwise  LDA must be at
+*           least  max( 1, n ).
+*           Unchanged on exit.
+*
+*  B      - REAL             array of DIMENSION ( LDB, n ).
+*           Before entry, the leading  m by n part of the array  B  must
+*           contain the matrix B.
+*           Unchanged on exit.
+*
+*  LDB    - INTEGER.
+*           On entry, LDB specifies the first dimension of B as declared
+*           in  the  calling  (sub)  program.   LDB  must  be  at  least
+*           max( 1, m ).
+*           Unchanged on exit.
+*
+*  BETA   - REAL            .
+*           On entry,  BETA  specifies the scalar  beta.  When  BETA  is
+*           supplied as zero then C need not be set on input.
+*           Unchanged on exit.
+*
+*  C      - REAL             array of DIMENSION ( LDC, n ).
+*           Before entry, the leading  m by n  part of the array  C must
+*           contain the matrix  C,  except when  beta  is zero, in which
+*           case C need not be set on entry.
+*           On exit, the array  C  is overwritten by the  m by n updated
+*           matrix.
+*
+*  LDC    - INTEGER.
+*           On entry, LDC specifies the first dimension of C as declared
+*           in  the  calling  (sub)  program.   LDC  must  be  at  least
+*           max( 1, m ).
+*           Unchanged on exit.
+*
+*
+*  Level 3 Blas routine.
+*
+*  -- Written on 8-February-1989.
+*     Jack Dongarra, Argonne National Laboratory.
+*     Iain Duff, AERE Harwell.
+*     Jeremy Du Croz, Numerical Algorithms Group Ltd.
+*     Sven Hammarling, Numerical Algorithms Group Ltd.
+*
+*
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            I, INFO, J, K, NROWA
+      REAL               TEMP1, TEMP2
+*     .. Parameters ..
+      REAL               ONE         , ZERO
+      PARAMETER        ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Executable Statements ..
+*
+*     Set NROWA as the number of rows of A.
+*
+      IF( LSAME( SIDE, 'L' ) )THEN
+         NROWA = M
+      ELSE
+         NROWA = N
+      END IF
+      UPPER = LSAME( UPLO, 'U' )
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF(      ( .NOT.LSAME( SIDE, 'L' ) ).AND.
+     $         ( .NOT.LSAME( SIDE, 'R' ) )      )THEN
+         INFO = 1
+      ELSE IF( ( .NOT.UPPER              ).AND.
+     $         ( .NOT.LSAME( UPLO, 'L' ) )      )THEN
+         INFO = 2
+      ELSE IF( M  .LT.0               )THEN
+         INFO = 3
+      ELSE IF( N  .LT.0               )THEN
+         INFO = 4
+      ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN
+         INFO = 7
+      ELSE IF( LDB.LT.MAX( 1, M     ) )THEN
+         INFO = 9
+      ELSE IF( LDC.LT.MAX( 1, M     ) )THEN
+         INFO = 12
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'SSYMM ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR.
+     $    ( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) )
+     $   RETURN
+*
+*     And when  alpha.eq.zero.
+*
+      IF( ALPHA.EQ.ZERO )THEN
+         IF( BETA.EQ.ZERO )THEN
+            DO 20, J = 1, N
+               DO 10, I = 1, M
+                  C( I, J ) = ZERO
+   10          CONTINUE
+   20       CONTINUE
+         ELSE
+            DO 40, J = 1, N
+               DO 30, I = 1, M
+                  C( I, J ) = BETA*C( I, J )
+   30          CONTINUE
+   40       CONTINUE
+         END IF
+         RETURN
+      END IF
+*
+*     Start the operations.
+*
+      IF( LSAME( SIDE, 'L' ) )THEN
+*
+*        Form  C := alpha*A*B + beta*C.
+*
+         IF( UPPER )THEN
+            DO 70, J = 1, N
+               DO 60, I = 1, M
+                  TEMP1 = ALPHA*B( I, J )
+                  TEMP2 = ZERO
+                  DO 50, K = 1, I - 1
+                     C( K, J ) = C( K, J ) + TEMP1    *A( K, I )
+                     TEMP2     = TEMP2     + B( K, J )*A( K, I )
+   50             CONTINUE
+                  IF( BETA.EQ.ZERO )THEN
+                     C( I, J ) = TEMP1*A( I, I ) + ALPHA*TEMP2
+                  ELSE
+                     C( I, J ) = BETA *C( I, J ) +
+     $                           TEMP1*A( I, I ) + ALPHA*TEMP2
+                  END IF
+   60          CONTINUE
+   70       CONTINUE
+         ELSE
+            DO 100, J = 1, N
+               DO 90, I = M, 1, -1
+                  TEMP1 = ALPHA*B( I, J )
+                  TEMP2 = ZERO
+                  DO 80, K = I + 1, M
+                     C( K, J ) = C( K, J ) + TEMP1    *A( K, I )
+                     TEMP2     = TEMP2     + B( K, J )*A( K, I )
+   80             CONTINUE
+                  IF( BETA.EQ.ZERO )THEN
+                     C( I, J ) = TEMP1*A( I, I ) + ALPHA*TEMP2
+                  ELSE
+                     C( I, J ) = BETA *C( I, J ) +
+     $                           TEMP1*A( I, I ) + ALPHA*TEMP2
+                  END IF
+   90          CONTINUE
+  100       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  C := alpha*B*A + beta*C.
+*
+         DO 170, J = 1, N
+            TEMP1 = ALPHA*A( J, J )
+            IF( BETA.EQ.ZERO )THEN
+               DO 110, I = 1, M
+                  C( I, J ) = TEMP1*B( I, J )
+  110          CONTINUE
+            ELSE
+               DO 120, I = 1, M
+                  C( I, J ) = BETA*C( I, J ) + TEMP1*B( I, J )
+  120          CONTINUE
+            END IF
+            DO 140, K = 1, J - 1
+               IF( UPPER )THEN
+                  TEMP1 = ALPHA*A( K, J )
+               ELSE
+                  TEMP1 = ALPHA*A( J, K )
+               END IF
+               DO 130, I = 1, M
+                  C( I, J ) = C( I, J ) + TEMP1*B( I, K )
+  130          CONTINUE
+  140       CONTINUE
+            DO 160, K = J + 1, N
+               IF( UPPER )THEN
+                  TEMP1 = ALPHA*A( J, K )
+               ELSE
+                  TEMP1 = ALPHA*A( K, J )
+               END IF
+               DO 150, I = 1, M
+                  C( I, J ) = C( I, J ) + TEMP1*B( I, K )
+  150          CONTINUE
+  160       CONTINUE
+  170    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of SSYMM .
+*
+      END
diff --git a/costa/native/external/blas/ssymv.f b/costa/native/external/blas/ssymv.f
new file mode 100644
index 000000000..9819ba802
--- /dev/null
+++ b/costa/native/external/blas/ssymv.f
@@ -0,0 +1,262 @@
+      SUBROUTINE SSYMV ( UPLO, N, ALPHA, A, LDA, X, INCX,
+     $                   BETA, Y, INCY )
+*     .. Scalar Arguments ..
+      REAL               ALPHA, BETA
+      INTEGER            INCX, INCY, LDA, N
+      CHARACTER*1        UPLO
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), X( * ), Y( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SSYMV  performs the matrix-vector  operation
+*
+*     y := alpha*A*x + beta*y,
+*
+*  where alpha and beta are scalars, x and y are n element vectors and
+*  A is an n by n symmetric matrix.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the upper or lower
+*           triangular part of the array A is to be referenced as
+*           follows:
+*
+*              UPLO = 'U' or 'u'   Only the upper triangular part of A
+*                                  is to be referenced.
+*
+*              UPLO = 'L' or 'l'   Only the lower triangular part of A
+*                                  is to be referenced.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - REAL            .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  A      - REAL             array of DIMENSION ( LDA, n ).
+*           Before entry with  UPLO = 'U' or 'u', the leading n by n
+*           upper triangular part of the array A must contain the upper
+*           triangular part of the symmetric matrix and the strictly
+*           lower triangular part of A is not referenced.
+*           Before entry with UPLO = 'L' or 'l', the leading n by n
+*           lower triangular part of the array A must contain the lower
+*           triangular part of the symmetric matrix and the strictly
+*           upper triangular part of A is not referenced.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           max( 1, n ).
+*           Unchanged on exit.
+*
+*  X      - REAL             array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element vector x.
+*           Unchanged on exit.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*  BETA   - REAL            .
+*           On entry, BETA specifies the scalar beta. When BETA is
+*           supplied as zero then Y need not be set on input.
+*           Unchanged on exit.
+*
+*  Y      - REAL             array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCY ) ).
+*           Before entry, the incremented array Y must contain the n
+*           element vector y. On exit, Y is overwritten by the updated
+*           vector y.
+*
+*  INCY   - INTEGER.
+*           On entry, INCY specifies the increment for the elements of
+*           Y. INCY must not be zero.
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      REAL               ONE         , ZERO
+      PARAMETER        ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     .. Local Scalars ..
+      REAL               TEMP1, TEMP2
+      INTEGER            I, INFO, IX, IY, J, JX, JY, KX, KY
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO, 'U' ).AND.
+     $         .NOT.LSAME( UPLO, 'L' )      )THEN
+         INFO = 1
+      ELSE IF( N.LT.0 )THEN
+         INFO = 2
+      ELSE IF( LDA.LT.MAX( 1, N ) )THEN
+         INFO = 5
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 7
+      ELSE IF( INCY.EQ.0 )THEN
+         INFO = 10
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'SSYMV ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( N.EQ.0 ).OR.( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) )
+     $   RETURN
+*
+*     Set up the start points in  X  and  Y.
+*
+      IF( INCX.GT.0 )THEN
+         KX = 1
+      ELSE
+         KX = 1 - ( N - 1 )*INCX
+      END IF
+      IF( INCY.GT.0 )THEN
+         KY = 1
+      ELSE
+         KY = 1 - ( N - 1 )*INCY
+      END IF
+*
+*     Start the operations. In this version the elements of A are
+*     accessed sequentially with one pass through the triangular part
+*     of A.
+*
+*     First form  y := beta*y.
+*
+      IF( BETA.NE.ONE )THEN
+         IF( INCY.EQ.1 )THEN
+            IF( BETA.EQ.ZERO )THEN
+               DO 10, I = 1, N
+                  Y( I ) = ZERO
+   10          CONTINUE
+            ELSE
+               DO 20, I = 1, N
+                  Y( I ) = BETA*Y( I )
+   20          CONTINUE
+            END IF
+         ELSE
+            IY = KY
+            IF( BETA.EQ.ZERO )THEN
+               DO 30, I = 1, N
+                  Y( IY ) = ZERO
+                  IY      = IY   + INCY
+   30          CONTINUE
+            ELSE
+               DO 40, I = 1, N
+                  Y( IY ) = BETA*Y( IY )
+                  IY      = IY           + INCY
+   40          CONTINUE
+            END IF
+         END IF
+      END IF
+      IF( ALPHA.EQ.ZERO )
+     $   RETURN
+      IF( LSAME( UPLO, 'U' ) )THEN
+*
+*        Form  y  when A is stored in upper triangle.
+*
+         IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN
+            DO 60, J = 1, N
+               TEMP1 = ALPHA*X( J )
+               TEMP2 = ZERO
+               DO 50, I = 1, J - 1
+                  Y( I ) = Y( I ) + TEMP1*A( I, J )
+                  TEMP2  = TEMP2  + A( I, J )*X( I )
+   50          CONTINUE
+               Y( J ) = Y( J ) + TEMP1*A( J, J ) + ALPHA*TEMP2
+   60       CONTINUE
+         ELSE
+            JX = KX
+            JY = KY
+            DO 80, J = 1, N
+               TEMP1 = ALPHA*X( JX )
+               TEMP2 = ZERO
+               IX    = KX
+               IY    = KY
+               DO 70, I = 1, J - 1
+                  Y( IY ) = Y( IY ) + TEMP1*A( I, J )
+                  TEMP2   = TEMP2   + A( I, J )*X( IX )
+                  IX      = IX      + INCX
+                  IY      = IY      + INCY
+   70          CONTINUE
+               Y( JY ) = Y( JY ) + TEMP1*A( J, J ) + ALPHA*TEMP2
+               JX      = JX      + INCX
+               JY      = JY      + INCY
+   80       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  y  when A is stored in lower triangle.
+*
+         IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN
+            DO 100, J = 1, N
+               TEMP1  = ALPHA*X( J )
+               TEMP2  = ZERO
+               Y( J ) = Y( J )       + TEMP1*A( J, J )
+               DO 90, I = J + 1, N
+                  Y( I ) = Y( I ) + TEMP1*A( I, J )
+                  TEMP2  = TEMP2  + A( I, J )*X( I )
+   90          CONTINUE
+               Y( J ) = Y( J ) + ALPHA*TEMP2
+  100       CONTINUE
+         ELSE
+            JX = KX
+            JY = KY
+            DO 120, J = 1, N
+               TEMP1   = ALPHA*X( JX )
+               TEMP2   = ZERO
+               Y( JY ) = Y( JY )       + TEMP1*A( J, J )
+               IX      = JX
+               IY      = JY
+               DO 110, I = J + 1, N
+                  IX      = IX      + INCX
+                  IY      = IY      + INCY
+                  Y( IY ) = Y( IY ) + TEMP1*A( I, J )
+                  TEMP2   = TEMP2   + A( I, J )*X( IX )
+  110          CONTINUE
+               Y( JY ) = Y( JY ) + ALPHA*TEMP2
+               JX      = JX      + INCX
+               JY      = JY      + INCY
+  120       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of SSYMV .
+*
+      END
diff --git a/costa/native/external/blas/ssyr.f b/costa/native/external/blas/ssyr.f
new file mode 100644
index 000000000..964872884
--- /dev/null
+++ b/costa/native/external/blas/ssyr.f
@@ -0,0 +1,197 @@
+      SUBROUTINE SSYR  ( UPLO, N, ALPHA, X, INCX, A, LDA )
+*     .. Scalar Arguments ..
+      REAL               ALPHA
+      INTEGER            INCX, LDA, N
+      CHARACTER*1        UPLO
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SSYR   performs the symmetric rank 1 operation
+*
+*     A := alpha*x*x' + A,
+*
+*  where alpha is a real scalar, x is an n element vector and A is an
+*  n by n symmetric matrix.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the upper or lower
+*           triangular part of the array A is to be referenced as
+*           follows:
+*
+*              UPLO = 'U' or 'u'   Only the upper triangular part of A
+*                                  is to be referenced.
+*
+*              UPLO = 'L' or 'l'   Only the lower triangular part of A
+*                                  is to be referenced.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - REAL            .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  X      - REAL             array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element vector x.
+*           Unchanged on exit.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*  A      - REAL             array of DIMENSION ( LDA, n ).
+*           Before entry with  UPLO = 'U' or 'u', the leading n by n
+*           upper triangular part of the array A must contain the upper
+*           triangular part of the symmetric matrix and the strictly
+*           lower triangular part of A is not referenced. On exit, the
+*           upper triangular part of the array A is overwritten by the
+*           upper triangular part of the updated matrix.
+*           Before entry with UPLO = 'L' or 'l', the leading n by n
+*           lower triangular part of the array A must contain the lower
+*           triangular part of the symmetric matrix and the strictly
+*           upper triangular part of A is not referenced. On exit, the
+*           lower triangular part of the array A is overwritten by the
+*           lower triangular part of the updated matrix.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           max( 1, n ).
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      REAL               ZERO
+      PARAMETER        ( ZERO = 0.0E+0 )
+*     .. Local Scalars ..
+      REAL               TEMP
+      INTEGER            I, INFO, IX, J, JX, KX
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO, 'U' ).AND.
+     $         .NOT.LSAME( UPLO, 'L' )      )THEN
+         INFO = 1
+      ELSE IF( N.LT.0 )THEN
+         INFO = 2
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 5
+      ELSE IF( LDA.LT.MAX( 1, N ) )THEN
+         INFO = 7
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'SSYR  ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( N.EQ.0 ).OR.( ALPHA.EQ.ZERO ) )
+     $   RETURN
+*
+*     Set the start point in X if the increment is not unity.
+*
+      IF( INCX.LE.0 )THEN
+         KX = 1 - ( N - 1 )*INCX
+      ELSE IF( INCX.NE.1 )THEN
+         KX = 1
+      END IF
+*
+*     Start the operations. In this version the elements of A are
+*     accessed sequentially with one pass through the triangular part
+*     of A.
+*
+      IF( LSAME( UPLO, 'U' ) )THEN
+*
+*        Form  A  when A is stored in upper triangle.
+*
+         IF( INCX.EQ.1 )THEN
+            DO 20, J = 1, N
+               IF( X( J ).NE.ZERO )THEN
+                  TEMP = ALPHA*X( J )
+                  DO 10, I = 1, J
+                     A( I, J ) = A( I, J ) + X( I )*TEMP
+   10             CONTINUE
+               END IF
+   20       CONTINUE
+         ELSE
+            JX = KX
+            DO 40, J = 1, N
+               IF( X( JX ).NE.ZERO )THEN
+                  TEMP = ALPHA*X( JX )
+                  IX   = KX
+                  DO 30, I = 1, J
+                     A( I, J ) = A( I, J ) + X( IX )*TEMP
+                     IX        = IX        + INCX
+   30             CONTINUE
+               END IF
+               JX = JX + INCX
+   40       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  A  when A is stored in lower triangle.
+*
+         IF( INCX.EQ.1 )THEN
+            DO 60, J = 1, N
+               IF( X( J ).NE.ZERO )THEN
+                  TEMP = ALPHA*X( J )
+                  DO 50, I = J, N
+                     A( I, J ) = A( I, J ) + X( I )*TEMP
+   50             CONTINUE
+               END IF
+   60       CONTINUE
+         ELSE
+            JX = KX
+            DO 80, J = 1, N
+               IF( X( JX ).NE.ZERO )THEN
+                  TEMP = ALPHA*X( JX )
+                  IX   = JX
+                  DO 70, I = J, N
+                     A( I, J ) = A( I, J ) + X( IX )*TEMP
+                     IX        = IX        + INCX
+   70             CONTINUE
+               END IF
+               JX = JX + INCX
+   80       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of SSYR  .
+*
+      END
diff --git a/costa/native/external/blas/ssyr2.f b/costa/native/external/blas/ssyr2.f
new file mode 100644
index 000000000..ac20fc9d4
--- /dev/null
+++ b/costa/native/external/blas/ssyr2.f
@@ -0,0 +1,230 @@
+      SUBROUTINE SSYR2 ( UPLO, N, ALPHA, X, INCX, Y, INCY, A, LDA )
+*     .. Scalar Arguments ..
+      REAL               ALPHA
+      INTEGER            INCX, INCY, LDA, N
+      CHARACTER*1        UPLO
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), X( * ), Y( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SSYR2  performs the symmetric rank 2 operation
+*
+*     A := alpha*x*y' + alpha*y*x' + A,
+*
+*  where alpha is a scalar, x and y are n element vectors and A is an n
+*  by n symmetric matrix.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the upper or lower
+*           triangular part of the array A is to be referenced as
+*           follows:
+*
+*              UPLO = 'U' or 'u'   Only the upper triangular part of A
+*                                  is to be referenced.
+*
+*              UPLO = 'L' or 'l'   Only the lower triangular part of A
+*                                  is to be referenced.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - REAL            .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  X      - REAL             array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element vector x.
+*           Unchanged on exit.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*  Y      - REAL             array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCY ) ).
+*           Before entry, the incremented array Y must contain the n
+*           element vector y.
+*           Unchanged on exit.
+*
+*  INCY   - INTEGER.
+*           On entry, INCY specifies the increment for the elements of
+*           Y. INCY must not be zero.
+*           Unchanged on exit.
+*
+*  A      - REAL             array of DIMENSION ( LDA, n ).
+*           Before entry with  UPLO = 'U' or 'u', the leading n by n
+*           upper triangular part of the array A must contain the upper
+*           triangular part of the symmetric matrix and the strictly
+*           lower triangular part of A is not referenced. On exit, the
+*           upper triangular part of the array A is overwritten by the
+*           upper triangular part of the updated matrix.
+*           Before entry with UPLO = 'L' or 'l', the leading n by n
+*           lower triangular part of the array A must contain the lower
+*           triangular part of the symmetric matrix and the strictly
+*           upper triangular part of A is not referenced. On exit, the
+*           lower triangular part of the array A is overwritten by the
+*           lower triangular part of the updated matrix.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           max( 1, n ).
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      REAL               ZERO
+      PARAMETER        ( ZERO = 0.0E+0 )
+*     .. Local Scalars ..
+      REAL               TEMP1, TEMP2
+      INTEGER            I, INFO, IX, IY, J, JX, JY, KX, KY
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO, 'U' ).AND.
+     $         .NOT.LSAME( UPLO, 'L' )      )THEN
+         INFO = 1
+      ELSE IF( N.LT.0 )THEN
+         INFO = 2
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 5
+      ELSE IF( INCY.EQ.0 )THEN
+         INFO = 7
+      ELSE IF( LDA.LT.MAX( 1, N ) )THEN
+         INFO = 9
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'SSYR2 ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( N.EQ.0 ).OR.( ALPHA.EQ.ZERO ) )
+     $   RETURN
+*
+*     Set up the start points in X and Y if the increments are not both
+*     unity.
+*
+      IF( ( INCX.NE.1 ).OR.( INCY.NE.1 ) )THEN
+         IF( INCX.GT.0 )THEN
+            KX = 1
+         ELSE
+            KX = 1 - ( N - 1 )*INCX
+         END IF
+         IF( INCY.GT.0 )THEN
+            KY = 1
+         ELSE
+            KY = 1 - ( N - 1 )*INCY
+         END IF
+         JX = KX
+         JY = KY
+      END IF
+*
+*     Start the operations. In this version the elements of A are
+*     accessed sequentially with one pass through the triangular part
+*     of A.
+*
+      IF( LSAME( UPLO, 'U' ) )THEN
+*
+*        Form  A  when A is stored in the upper triangle.
+*
+         IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN
+            DO 20, J = 1, N
+               IF( ( X( J ).NE.ZERO ).OR.( Y( J ).NE.ZERO ) )THEN
+                  TEMP1 = ALPHA*Y( J )
+                  TEMP2 = ALPHA*X( J )
+                  DO 10, I = 1, J
+                     A( I, J ) = A( I, J ) + X( I )*TEMP1 + Y( I )*TEMP2
+   10             CONTINUE
+               END IF
+   20       CONTINUE
+         ELSE
+            DO 40, J = 1, N
+               IF( ( X( JX ).NE.ZERO ).OR.( Y( JY ).NE.ZERO ) )THEN
+                  TEMP1 = ALPHA*Y( JY )
+                  TEMP2 = ALPHA*X( JX )
+                  IX    = KX
+                  IY    = KY
+                  DO 30, I = 1, J
+                     A( I, J ) = A( I, J ) + X( IX )*TEMP1
+     $                                     + Y( IY )*TEMP2
+                     IX        = IX        + INCX
+                     IY        = IY        + INCY
+   30             CONTINUE
+               END IF
+               JX = JX + INCX
+               JY = JY + INCY
+   40       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  A  when A is stored in the lower triangle.
+*
+         IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN
+            DO 60, J = 1, N
+               IF( ( X( J ).NE.ZERO ).OR.( Y( J ).NE.ZERO ) )THEN
+                  TEMP1 = ALPHA*Y( J )
+                  TEMP2 = ALPHA*X( J )
+                  DO 50, I = J, N
+                     A( I, J ) = A( I, J ) + X( I )*TEMP1 + Y( I )*TEMP2
+   50             CONTINUE
+               END IF
+   60       CONTINUE
+         ELSE
+            DO 80, J = 1, N
+               IF( ( X( JX ).NE.ZERO ).OR.( Y( JY ).NE.ZERO ) )THEN
+                  TEMP1 = ALPHA*Y( JY )
+                  TEMP2 = ALPHA*X( JX )
+                  IX    = JX
+                  IY    = JY
+                  DO 70, I = J, N
+                     A( I, J ) = A( I, J ) + X( IX )*TEMP1
+     $                                     + Y( IY )*TEMP2
+                     IX        = IX        + INCX
+                     IY        = IY        + INCY
+   70             CONTINUE
+               END IF
+               JX = JX + INCX
+               JY = JY + INCY
+   80       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of SSYR2 .
+*
+      END
diff --git a/costa/native/external/blas/ssyr2k.f b/costa/native/external/blas/ssyr2k.f
new file mode 100644
index 000000000..d269164d3
--- /dev/null
+++ b/costa/native/external/blas/ssyr2k.f
@@ -0,0 +1,327 @@
+      SUBROUTINE SSYR2K( UPLO, TRANS, N, K, ALPHA, A, LDA, B, LDB,
+     $                   BETA, C, LDC )
+*     .. Scalar Arguments ..
+      CHARACTER*1        UPLO, TRANS
+      INTEGER            N, K, LDA, LDB, LDC
+      REAL               ALPHA, BETA
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), B( LDB, * ), C( LDC, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SSYR2K  performs one of the symmetric rank 2k operations
+*
+*     C := alpha*A*B' + alpha*B*A' + beta*C,
+*
+*  or
+*
+*     C := alpha*A'*B + alpha*B'*A + beta*C,
+*
+*  where  alpha and beta  are scalars, C is an  n by n  symmetric matrix
+*  and  A and B  are  n by k  matrices  in the  first  case  and  k by n
+*  matrices in the second case.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On  entry,   UPLO  specifies  whether  the  upper  or  lower
+*           triangular  part  of the  array  C  is to be  referenced  as
+*           follows:
+*
+*              UPLO = 'U' or 'u'   Only the  upper triangular part of  C
+*                                  is to be referenced.
+*
+*              UPLO = 'L' or 'l'   Only the  lower triangular part of  C
+*                                  is to be referenced.
+*
+*           Unchanged on exit.
+*
+*  TRANS  - CHARACTER*1.
+*           On entry,  TRANS  specifies the operation to be performed as
+*           follows:
+*
+*              TRANS = 'N' or 'n'   C := alpha*A*B' + alpha*B*A' +
+*                                        beta*C.
+*
+*              TRANS = 'T' or 't'   C := alpha*A'*B + alpha*B'*A +
+*                                        beta*C.
+*
+*              TRANS = 'C' or 'c'   C := alpha*A'*B + alpha*B'*A +
+*                                        beta*C.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry,  N specifies the order of the matrix C.  N must be
+*           at least zero.
+*           Unchanged on exit.
+*
+*  K      - INTEGER.
+*           On entry with  TRANS = 'N' or 'n',  K  specifies  the number
+*           of  columns  of the  matrices  A and B,  and on  entry  with
+*           TRANS = 'T' or 't' or 'C' or 'c',  K  specifies  the  number
+*           of rows of the matrices  A and B.  K must be at least  zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - REAL            .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  A      - REAL             array of DIMENSION ( LDA, ka ), where ka is
+*           k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
+*           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
+*           part of the array  A  must contain the matrix  A,  otherwise
+*           the leading  k by n  part of the array  A  must contain  the
+*           matrix A.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
+*           then  LDA must be at least  max( 1, n ), otherwise  LDA must
+*           be at least  max( 1, k ).
+*           Unchanged on exit.
+*
+*  B      - REAL             array of DIMENSION ( LDB, kb ), where kb is
+*           k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
+*           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
+*           part of the array  B  must contain the matrix  B,  otherwise
+*           the leading  k by n  part of the array  B  must contain  the
+*           matrix B.
+*           Unchanged on exit.
+*
+*  LDB    - INTEGER.
+*           On entry, LDB specifies the first dimension of B as declared
+*           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
+*           then  LDB must be at least  max( 1, n ), otherwise  LDB must
+*           be at least  max( 1, k ).
+*           Unchanged on exit.
+*
+*  BETA   - REAL            .
+*           On entry, BETA specifies the scalar beta.
+*           Unchanged on exit.
+*
+*  C      - REAL             array of DIMENSION ( LDC, n ).
+*           Before entry  with  UPLO = 'U' or 'u',  the leading  n by n
+*           upper triangular part of the array C must contain the upper
+*           triangular part  of the  symmetric matrix  and the strictly
+*           lower triangular part of C is not referenced.  On exit, the
+*           upper triangular part of the array  C is overwritten by the
+*           upper triangular part of the updated matrix.
+*           Before entry  with  UPLO = 'L' or 'l',  the leading  n by n
+*           lower triangular part of the array C must contain the lower
+*           triangular part  of the  symmetric matrix  and the strictly
+*           upper triangular part of C is not referenced.  On exit, the
+*           lower triangular part of the array  C is overwritten by the
+*           lower triangular part of the updated matrix.
+*
+*  LDC    - INTEGER.
+*           On entry, LDC specifies the first dimension of C as declared
+*           in  the  calling  (sub)  program.   LDC  must  be  at  least
+*           max( 1, n ).
+*           Unchanged on exit.
+*
+*
+*  Level 3 Blas routine.
+*
+*
+*  -- Written on 8-February-1989.
+*     Jack Dongarra, Argonne National Laboratory.
+*     Iain Duff, AERE Harwell.
+*     Jeremy Du Croz, Numerical Algorithms Group Ltd.
+*     Sven Hammarling, Numerical Algorithms Group Ltd.
+*
+*
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            I, INFO, J, L, NROWA
+      REAL               TEMP1, TEMP2
+*     .. Parameters ..
+      REAL               ONE         , ZERO
+      PARAMETER        ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+         NROWA = N
+      ELSE
+         NROWA = K
+      END IF
+      UPPER = LSAME( UPLO, 'U' )
+*
+      INFO = 0
+      IF(      ( .NOT.UPPER               ).AND.
+     $         ( .NOT.LSAME( UPLO , 'L' ) )      )THEN
+         INFO = 1
+      ELSE IF( ( .NOT.LSAME( TRANS, 'N' ) ).AND.
+     $         ( .NOT.LSAME( TRANS, 'T' ) ).AND.
+     $         ( .NOT.LSAME( TRANS, 'C' ) )      )THEN
+         INFO = 2
+      ELSE IF( N  .LT.0               )THEN
+         INFO = 3
+      ELSE IF( K  .LT.0               )THEN
+         INFO = 4
+      ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN
+         INFO = 7
+      ELSE IF( LDB.LT.MAX( 1, NROWA ) )THEN
+         INFO = 9
+      ELSE IF( LDC.LT.MAX( 1, N     ) )THEN
+         INFO = 12
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'SSYR2K', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( N.EQ.0 ).OR.
+     $    ( ( ( ALPHA.EQ.ZERO ).OR.( K.EQ.0 ) ).AND.( BETA.EQ.ONE ) ) )
+     $   RETURN
+*
+*     And when  alpha.eq.zero.
+*
+      IF( ALPHA.EQ.ZERO )THEN
+         IF( UPPER )THEN
+            IF( BETA.EQ.ZERO )THEN
+               DO 20, J = 1, N
+                  DO 10, I = 1, J
+                     C( I, J ) = ZERO
+   10             CONTINUE
+   20          CONTINUE
+            ELSE
+               DO 40, J = 1, N
+                  DO 30, I = 1, J
+                     C( I, J ) = BETA*C( I, J )
+   30             CONTINUE
+   40          CONTINUE
+            END IF
+         ELSE
+            IF( BETA.EQ.ZERO )THEN
+               DO 60, J = 1, N
+                  DO 50, I = J, N
+                     C( I, J ) = ZERO
+   50             CONTINUE
+   60          CONTINUE
+            ELSE
+               DO 80, J = 1, N
+                  DO 70, I = J, N
+                     C( I, J ) = BETA*C( I, J )
+   70             CONTINUE
+   80          CONTINUE
+            END IF
+         END IF
+         RETURN
+      END IF
+*
+*     Start the operations.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+*
+*        Form  C := alpha*A*B' + alpha*B*A' + C.
+*
+         IF( UPPER )THEN
+            DO 130, J = 1, N
+               IF( BETA.EQ.ZERO )THEN
+                  DO 90, I = 1, J
+                     C( I, J ) = ZERO
+   90             CONTINUE
+               ELSE IF( BETA.NE.ONE )THEN
+                  DO 100, I = 1, J
+                     C( I, J ) = BETA*C( I, J )
+  100             CONTINUE
+               END IF
+               DO 120, L = 1, K
+                  IF( ( A( J, L ).NE.ZERO ).OR.
+     $                ( B( J, L ).NE.ZERO )     )THEN
+                     TEMP1 = ALPHA*B( J, L )
+                     TEMP2 = ALPHA*A( J, L )
+                     DO 110, I = 1, J
+                        C( I, J ) = C( I, J ) +
+     $                              A( I, L )*TEMP1 + B( I, L )*TEMP2
+  110                CONTINUE
+                  END IF
+  120          CONTINUE
+  130       CONTINUE
+         ELSE
+            DO 180, J = 1, N
+               IF( BETA.EQ.ZERO )THEN
+                  DO 140, I = J, N
+                     C( I, J ) = ZERO
+  140             CONTINUE
+               ELSE IF( BETA.NE.ONE )THEN
+                  DO 150, I = J, N
+                     C( I, J ) = BETA*C( I, J )
+  150             CONTINUE
+               END IF
+               DO 170, L = 1, K
+                  IF( ( A( J, L ).NE.ZERO ).OR.
+     $                ( B( J, L ).NE.ZERO )     )THEN
+                     TEMP1 = ALPHA*B( J, L )
+                     TEMP2 = ALPHA*A( J, L )
+                     DO 160, I = J, N
+                        C( I, J ) = C( I, J ) +
+     $                              A( I, L )*TEMP1 + B( I, L )*TEMP2
+  160                CONTINUE
+                  END IF
+  170          CONTINUE
+  180       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  C := alpha*A'*B + alpha*B'*A + C.
+*
+         IF( UPPER )THEN
+            DO 210, J = 1, N
+               DO 200, I = 1, J
+                  TEMP1 = ZERO
+                  TEMP2 = ZERO
+                  DO 190, L = 1, K
+                     TEMP1 = TEMP1 + A( L, I )*B( L, J )
+                     TEMP2 = TEMP2 + B( L, I )*A( L, J )
+  190             CONTINUE
+                  IF( BETA.EQ.ZERO )THEN
+                     C( I, J ) = ALPHA*TEMP1 + ALPHA*TEMP2
+                  ELSE
+                     C( I, J ) = BETA *C( I, J ) +
+     $                           ALPHA*TEMP1 + ALPHA*TEMP2
+                  END IF
+  200          CONTINUE
+  210       CONTINUE
+         ELSE
+            DO 240, J = 1, N
+               DO 230, I = J, N
+                  TEMP1 = ZERO
+                  TEMP2 = ZERO
+                  DO 220, L = 1, K
+                     TEMP1 = TEMP1 + A( L, I )*B( L, J )
+                     TEMP2 = TEMP2 + B( L, I )*A( L, J )
+  220             CONTINUE
+                  IF( BETA.EQ.ZERO )THEN
+                     C( I, J ) = ALPHA*TEMP1 + ALPHA*TEMP2
+                  ELSE
+                     C( I, J ) = BETA *C( I, J ) +
+     $                           ALPHA*TEMP1 + ALPHA*TEMP2
+                  END IF
+  230          CONTINUE
+  240       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of SSYR2K.
+*
+      END
diff --git a/costa/native/external/blas/ssyrk.f b/costa/native/external/blas/ssyrk.f
new file mode 100644
index 000000000..3c3b0ee88
--- /dev/null
+++ b/costa/native/external/blas/ssyrk.f
@@ -0,0 +1,294 @@
+      SUBROUTINE SSYRK ( UPLO, TRANS, N, K, ALPHA, A, LDA,
+     $                   BETA, C, LDC )
+*     .. Scalar Arguments ..
+      CHARACTER*1        UPLO, TRANS
+      INTEGER            N, K, LDA, LDC
+      REAL               ALPHA, BETA
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), C( LDC, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SSYRK  performs one of the symmetric rank k operations
+*
+*     C := alpha*A*A' + beta*C,
+*
+*  or
+*
+*     C := alpha*A'*A + beta*C,
+*
+*  where  alpha and beta  are scalars, C is an  n by n  symmetric matrix
+*  and  A  is an  n by k  matrix in the first case and a  k by n  matrix
+*  in the second case.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On  entry,   UPLO  specifies  whether  the  upper  or  lower
+*           triangular  part  of the  array  C  is to be  referenced  as
+*           follows:
+*
+*              UPLO = 'U' or 'u'   Only the  upper triangular part of  C
+*                                  is to be referenced.
+*
+*              UPLO = 'L' or 'l'   Only the  lower triangular part of  C
+*                                  is to be referenced.
+*
+*           Unchanged on exit.
+*
+*  TRANS  - CHARACTER*1.
+*           On entry,  TRANS  specifies the operation to be performed as
+*           follows:
+*
+*              TRANS = 'N' or 'n'   C := alpha*A*A' + beta*C.
+*
+*              TRANS = 'T' or 't'   C := alpha*A'*A + beta*C.
+*
+*              TRANS = 'C' or 'c'   C := alpha*A'*A + beta*C.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry,  N specifies the order of the matrix C.  N must be
+*           at least zero.
+*           Unchanged on exit.
+*
+*  K      - INTEGER.
+*           On entry with  TRANS = 'N' or 'n',  K  specifies  the number
+*           of  columns   of  the   matrix   A,   and  on   entry   with
+*           TRANS = 'T' or 't' or 'C' or 'c',  K  specifies  the  number
+*           of rows of the matrix  A.  K must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - REAL            .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  A      - REAL             array of DIMENSION ( LDA, ka ), where ka is
+*           k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
+*           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
+*           part of the array  A  must contain the matrix  A,  otherwise
+*           the leading  k by n  part of the array  A  must contain  the
+*           matrix A.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
+*           then  LDA must be at least  max( 1, n ), otherwise  LDA must
+*           be at least  max( 1, k ).
+*           Unchanged on exit.
+*
+*  BETA   - REAL            .
+*           On entry, BETA specifies the scalar beta.
+*           Unchanged on exit.
+*
+*  C      - REAL             array of DIMENSION ( LDC, n ).
+*           Before entry  with  UPLO = 'U' or 'u',  the leading  n by n
+*           upper triangular part of the array C must contain the upper
+*           triangular part  of the  symmetric matrix  and the strictly
+*           lower triangular part of C is not referenced.  On exit, the
+*           upper triangular part of the array  C is overwritten by the
+*           upper triangular part of the updated matrix.
+*           Before entry  with  UPLO = 'L' or 'l',  the leading  n by n
+*           lower triangular part of the array C must contain the lower
+*           triangular part  of the  symmetric matrix  and the strictly
+*           upper triangular part of C is not referenced.  On exit, the
+*           lower triangular part of the array  C is overwritten by the
+*           lower triangular part of the updated matrix.
+*
+*  LDC    - INTEGER.
+*           On entry, LDC specifies the first dimension of C as declared
+*           in  the  calling  (sub)  program.   LDC  must  be  at  least
+*           max( 1, n ).
+*           Unchanged on exit.
+*
+*
+*  Level 3 Blas routine.
+*
+*  -- Written on 8-February-1989.
+*     Jack Dongarra, Argonne National Laboratory.
+*     Iain Duff, AERE Harwell.
+*     Jeremy Du Croz, Numerical Algorithms Group Ltd.
+*     Sven Hammarling, Numerical Algorithms Group Ltd.
+*
+*
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            I, INFO, J, L, NROWA
+      REAL               TEMP
+*     .. Parameters ..
+      REAL               ONE ,         ZERO
+      PARAMETER        ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+         NROWA = N
+      ELSE
+         NROWA = K
+      END IF
+      UPPER = LSAME( UPLO, 'U' )
+*
+      INFO = 0
+      IF(      ( .NOT.UPPER               ).AND.
+     $         ( .NOT.LSAME( UPLO , 'L' ) )      )THEN
+         INFO = 1
+      ELSE IF( ( .NOT.LSAME( TRANS, 'N' ) ).AND.
+     $         ( .NOT.LSAME( TRANS, 'T' ) ).AND.
+     $         ( .NOT.LSAME( TRANS, 'C' ) )      )THEN
+         INFO = 2
+      ELSE IF( N  .LT.0               )THEN
+         INFO = 3
+      ELSE IF( K  .LT.0               )THEN
+         INFO = 4
+      ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN
+         INFO = 7
+      ELSE IF( LDC.LT.MAX( 1, N     ) )THEN
+         INFO = 10
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'SSYRK ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( N.EQ.0 ).OR.
+     $    ( ( ( ALPHA.EQ.ZERO ).OR.( K.EQ.0 ) ).AND.( BETA.EQ.ONE ) ) )
+     $   RETURN
+*
+*     And when  alpha.eq.zero.
+*
+      IF( ALPHA.EQ.ZERO )THEN
+         IF( UPPER )THEN
+            IF( BETA.EQ.ZERO )THEN
+               DO 20, J = 1, N
+                  DO 10, I = 1, J
+                     C( I, J ) = ZERO
+   10             CONTINUE
+   20          CONTINUE
+            ELSE
+               DO 40, J = 1, N
+                  DO 30, I = 1, J
+                     C( I, J ) = BETA*C( I, J )
+   30             CONTINUE
+   40          CONTINUE
+            END IF
+         ELSE
+            IF( BETA.EQ.ZERO )THEN
+               DO 60, J = 1, N
+                  DO 50, I = J, N
+                     C( I, J ) = ZERO
+   50             CONTINUE
+   60          CONTINUE
+            ELSE
+               DO 80, J = 1, N
+                  DO 70, I = J, N
+                     C( I, J ) = BETA*C( I, J )
+   70             CONTINUE
+   80          CONTINUE
+            END IF
+         END IF
+         RETURN
+      END IF
+*
+*     Start the operations.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+*
+*        Form  C := alpha*A*A' + beta*C.
+*
+         IF( UPPER )THEN
+            DO 130, J = 1, N
+               IF( BETA.EQ.ZERO )THEN
+                  DO 90, I = 1, J
+                     C( I, J ) = ZERO
+   90             CONTINUE
+               ELSE IF( BETA.NE.ONE )THEN
+                  DO 100, I = 1, J
+                     C( I, J ) = BETA*C( I, J )
+  100             CONTINUE
+               END IF
+               DO 120, L = 1, K
+                  IF( A( J, L ).NE.ZERO )THEN
+                     TEMP = ALPHA*A( J, L )
+                     DO 110, I = 1, J
+                        C( I, J ) = C( I, J ) + TEMP*A( I, L )
+  110                CONTINUE
+                  END IF
+  120          CONTINUE
+  130       CONTINUE
+         ELSE
+            DO 180, J = 1, N
+               IF( BETA.EQ.ZERO )THEN
+                  DO 140, I = J, N
+                     C( I, J ) = ZERO
+  140             CONTINUE
+               ELSE IF( BETA.NE.ONE )THEN
+                  DO 150, I = J, N
+                     C( I, J ) = BETA*C( I, J )
+  150             CONTINUE
+               END IF
+               DO 170, L = 1, K
+                  IF( A( J, L ).NE.ZERO )THEN
+                     TEMP      = ALPHA*A( J, L )
+                     DO 160, I = J, N
+                        C( I, J ) = C( I, J ) + TEMP*A( I, L )
+  160                CONTINUE
+                  END IF
+  170          CONTINUE
+  180       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  C := alpha*A'*A + beta*C.
+*
+         IF( UPPER )THEN
+            DO 210, J = 1, N
+               DO 200, I = 1, J
+                  TEMP = ZERO
+                  DO 190, L = 1, K
+                     TEMP = TEMP + A( L, I )*A( L, J )
+  190             CONTINUE
+                  IF( BETA.EQ.ZERO )THEN
+                     C( I, J ) = ALPHA*TEMP
+                  ELSE
+                     C( I, J ) = ALPHA*TEMP + BETA*C( I, J )
+                  END IF
+  200          CONTINUE
+  210       CONTINUE
+         ELSE
+            DO 240, J = 1, N
+               DO 230, I = J, N
+                  TEMP = ZERO
+                  DO 220, L = 1, K
+                     TEMP = TEMP + A( L, I )*A( L, J )
+  220             CONTINUE
+                  IF( BETA.EQ.ZERO )THEN
+                     C( I, J ) = ALPHA*TEMP
+                  ELSE
+                     C( I, J ) = ALPHA*TEMP + BETA*C( I, J )
+                  END IF
+  230          CONTINUE
+  240       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of SSYRK .
+*
+      END
diff --git a/costa/native/external/blas/stbmv.f b/costa/native/external/blas/stbmv.f
new file mode 100644
index 000000000..92a0a563b
--- /dev/null
+++ b/costa/native/external/blas/stbmv.f
@@ -0,0 +1,342 @@
+      SUBROUTINE STBMV ( UPLO, TRANS, DIAG, N, K, A, LDA, X, INCX )
+*     .. Scalar Arguments ..
+      INTEGER            INCX, K, LDA, N
+      CHARACTER*1        DIAG, TRANS, UPLO
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  STBMV  performs one of the matrix-vector operations
+*
+*     x := A*x,   or   x := A'*x,
+*
+*  where x is an n element vector and  A is an n by n unit, or non-unit,
+*  upper or lower triangular band matrix, with ( k + 1 ) diagonals.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the matrix is an upper or
+*           lower triangular matrix as follows:
+*
+*              UPLO = 'U' or 'u'   A is an upper triangular matrix.
+*
+*              UPLO = 'L' or 'l'   A is a lower triangular matrix.
+*
+*           Unchanged on exit.
+*
+*  TRANS  - CHARACTER*1.
+*           On entry, TRANS specifies the operation to be performed as
+*           follows:
+*
+*              TRANS = 'N' or 'n'   x := A*x.
+*
+*              TRANS = 'T' or 't'   x := A'*x.
+*
+*              TRANS = 'C' or 'c'   x := A'*x.
+*
+*           Unchanged on exit.
+*
+*  DIAG   - CHARACTER*1.
+*           On entry, DIAG specifies whether or not A is unit
+*           triangular as follows:
+*
+*              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
+*
+*              DIAG = 'N' or 'n'   A is not assumed to be unit
+*                                  triangular.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  K      - INTEGER.
+*           On entry with UPLO = 'U' or 'u', K specifies the number of
+*           super-diagonals of the matrix A.
+*           On entry with UPLO = 'L' or 'l', K specifies the number of
+*           sub-diagonals of the matrix A.
+*           K must satisfy  0 .le. K.
+*           Unchanged on exit.
+*
+*  A      - REAL             array of DIMENSION ( LDA, n ).
+*           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 )
+*           by n part of the array A must contain the upper triangular
+*           band part of the matrix of coefficients, supplied column by
+*           column, with the leading diagonal of the matrix in row
+*           ( k + 1 ) of the array, the first super-diagonal starting at
+*           position 2 in row k, and so on. The top left k by k triangle
+*           of the array A is not referenced.
+*           The following program segment will transfer an upper
+*           triangular band matrix from conventional full matrix storage
+*           to band storage:
+*
+*                 DO 20, J = 1, N
+*                    M = K + 1 - J
+*                    DO 10, I = MAX( 1, J - K ), J
+*                       A( M + I, J ) = matrix( I, J )
+*              10    CONTINUE
+*              20 CONTINUE
+*
+*           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 )
+*           by n part of the array A must contain the lower triangular
+*           band part of the matrix of coefficients, supplied column by
+*           column, with the leading diagonal of the matrix in row 1 of
+*           the array, the first sub-diagonal starting at position 1 in
+*           row 2, and so on. The bottom right k by k triangle of the
+*           array A is not referenced.
+*           The following program segment will transfer a lower
+*           triangular band matrix from conventional full matrix storage
+*           to band storage:
+*
+*                 DO 20, J = 1, N
+*                    M = 1 - J
+*                    DO 10, I = J, MIN( N, J + K )
+*                       A( M + I, J ) = matrix( I, J )
+*              10    CONTINUE
+*              20 CONTINUE
+*
+*           Note that when DIAG = 'U' or 'u' the elements of the array A
+*           corresponding to the diagonal elements of the matrix are not
+*           referenced, but are assumed to be unity.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           ( k + 1 ).
+*           Unchanged on exit.
+*
+*  X      - REAL             array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element vector x. On exit, X is overwritten with the
+*           tranformed vector x.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      REAL               ZERO
+      PARAMETER        ( ZERO = 0.0E+0 )
+*     .. Local Scalars ..
+      REAL               TEMP
+      INTEGER            I, INFO, IX, J, JX, KPLUS1, KX, L
+      LOGICAL            NOUNIT
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO , 'U' ).AND.
+     $         .NOT.LSAME( UPLO , 'L' )      )THEN
+         INFO = 1
+      ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND.
+     $         .NOT.LSAME( TRANS, 'T' ).AND.
+     $         .NOT.LSAME( TRANS, 'C' )      )THEN
+         INFO = 2
+      ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND.
+     $         .NOT.LSAME( DIAG , 'N' )      )THEN
+         INFO = 3
+      ELSE IF( N.LT.0 )THEN
+         INFO = 4
+      ELSE IF( K.LT.0 )THEN
+         INFO = 5
+      ELSE IF( LDA.LT.( K + 1 ) )THEN
+         INFO = 7
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 9
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'STBMV ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      NOUNIT = LSAME( DIAG, 'N' )
+*
+*     Set up the start point in X if the increment is not unity. This
+*     will be  ( N - 1 )*INCX   too small for descending loops.
+*
+      IF( INCX.LE.0 )THEN
+         KX = 1 - ( N - 1 )*INCX
+      ELSE IF( INCX.NE.1 )THEN
+         KX = 1
+      END IF
+*
+*     Start the operations. In this version the elements of A are
+*     accessed sequentially with one pass through A.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+*
+*         Form  x := A*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            KPLUS1 = K + 1
+            IF( INCX.EQ.1 )THEN
+               DO 20, J = 1, N
+                  IF( X( J ).NE.ZERO )THEN
+                     TEMP = X( J )
+                     L    = KPLUS1 - J
+                     DO 10, I = MAX( 1, J - K ), J - 1
+                        X( I ) = X( I ) + TEMP*A( L + I, J )
+   10                CONTINUE
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )*A( KPLUS1, J )
+                  END IF
+   20          CONTINUE
+            ELSE
+               JX = KX
+               DO 40, J = 1, N
+                  IF( X( JX ).NE.ZERO )THEN
+                     TEMP = X( JX )
+                     IX   = KX
+                     L    = KPLUS1  - J
+                     DO 30, I = MAX( 1, J - K ), J - 1
+                        X( IX ) = X( IX ) + TEMP*A( L + I, J )
+                        IX      = IX      + INCX
+   30                CONTINUE
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )*A( KPLUS1, J )
+                  END IF
+                  JX = JX + INCX
+                  IF( J.GT.K )
+     $               KX = KX + INCX
+   40          CONTINUE
+            END IF
+         ELSE
+            IF( INCX.EQ.1 )THEN
+               DO 60, J = N, 1, -1
+                  IF( X( J ).NE.ZERO )THEN
+                     TEMP = X( J )
+                     L    = 1      - J
+                     DO 50, I = MIN( N, J + K ), J + 1, -1
+                        X( I ) = X( I ) + TEMP*A( L + I, J )
+   50                CONTINUE
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )*A( 1, J )
+                  END IF
+   60          CONTINUE
+            ELSE
+               KX = KX + ( N - 1 )*INCX
+               JX = KX
+               DO 80, J = N, 1, -1
+                  IF( X( JX ).NE.ZERO )THEN
+                     TEMP = X( JX )
+                     IX   = KX
+                     L    = 1       - J
+                     DO 70, I = MIN( N, J + K ), J + 1, -1
+                        X( IX ) = X( IX ) + TEMP*A( L + I, J )
+                        IX      = IX      - INCX
+   70                CONTINUE
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )*A( 1, J )
+                  END IF
+                  JX = JX - INCX
+                  IF( ( N - J ).GE.K )
+     $               KX = KX - INCX
+   80          CONTINUE
+            END IF
+         END IF
+      ELSE
+*
+*        Form  x := A'*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            KPLUS1 = K + 1
+            IF( INCX.EQ.1 )THEN
+               DO 100, J = N, 1, -1
+                  TEMP = X( J )
+                  L    = KPLUS1 - J
+                  IF( NOUNIT )
+     $               TEMP = TEMP*A( KPLUS1, J )
+                  DO 90, I = J - 1, MAX( 1, J - K ), -1
+                     TEMP = TEMP + A( L + I, J )*X( I )
+   90             CONTINUE
+                  X( J ) = TEMP
+  100          CONTINUE
+            ELSE
+               KX = KX + ( N - 1 )*INCX
+               JX = KX
+               DO 120, J = N, 1, -1
+                  TEMP = X( JX )
+                  KX   = KX      - INCX
+                  IX   = KX
+                  L    = KPLUS1  - J
+                  IF( NOUNIT )
+     $               TEMP = TEMP*A( KPLUS1, J )
+                  DO 110, I = J - 1, MAX( 1, J - K ), -1
+                     TEMP = TEMP + A( L + I, J )*X( IX )
+                     IX   = IX   - INCX
+  110             CONTINUE
+                  X( JX ) = TEMP
+                  JX      = JX   - INCX
+  120          CONTINUE
+            END IF
+         ELSE
+            IF( INCX.EQ.1 )THEN
+               DO 140, J = 1, N
+                  TEMP = X( J )
+                  L    = 1      - J
+                  IF( NOUNIT )
+     $               TEMP = TEMP*A( 1, J )
+                  DO 130, I = J + 1, MIN( N, J + K )
+                     TEMP = TEMP + A( L + I, J )*X( I )
+  130             CONTINUE
+                  X( J ) = TEMP
+  140          CONTINUE
+            ELSE
+               JX = KX
+               DO 160, J = 1, N
+                  TEMP = X( JX )
+                  KX   = KX      + INCX
+                  IX   = KX
+                  L    = 1       - J
+                  IF( NOUNIT )
+     $               TEMP = TEMP*A( 1, J )
+                  DO 150, I = J + 1, MIN( N, J + K )
+                     TEMP = TEMP + A( L + I, J )*X( IX )
+                     IX   = IX   + INCX
+  150             CONTINUE
+                  X( JX ) = TEMP
+                  JX      = JX   + INCX
+  160          CONTINUE
+            END IF
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of STBMV .
+*
+      END
diff --git a/costa/native/external/blas/stbsv.f b/costa/native/external/blas/stbsv.f
new file mode 100644
index 000000000..96ebe26c3
--- /dev/null
+++ b/costa/native/external/blas/stbsv.f
@@ -0,0 +1,346 @@
+      SUBROUTINE STBSV ( UPLO, TRANS, DIAG, N, K, A, LDA, X, INCX )
+*     .. Scalar Arguments ..
+      INTEGER            INCX, K, LDA, N
+      CHARACTER*1        DIAG, TRANS, UPLO
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  STBSV  solves one of the systems of equations
+*
+*     A*x = b,   or   A'*x = b,
+*
+*  where b and x are n element vectors and A is an n by n unit, or
+*  non-unit, upper or lower triangular band matrix, with ( k + 1 )
+*  diagonals.
+*
+*  No test for singularity or near-singularity is included in this
+*  routine. Such tests must be performed before calling this routine.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the matrix is an upper or
+*           lower triangular matrix as follows:
+*
+*              UPLO = 'U' or 'u'   A is an upper triangular matrix.
+*
+*              UPLO = 'L' or 'l'   A is a lower triangular matrix.
+*
+*           Unchanged on exit.
+*
+*  TRANS  - CHARACTER*1.
+*           On entry, TRANS specifies the equations to be solved as
+*           follows:
+*
+*              TRANS = 'N' or 'n'   A*x = b.
+*
+*              TRANS = 'T' or 't'   A'*x = b.
+*
+*              TRANS = 'C' or 'c'   A'*x = b.
+*
+*           Unchanged on exit.
+*
+*  DIAG   - CHARACTER*1.
+*           On entry, DIAG specifies whether or not A is unit
+*           triangular as follows:
+*
+*              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
+*
+*              DIAG = 'N' or 'n'   A is not assumed to be unit
+*                                  triangular.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  K      - INTEGER.
+*           On entry with UPLO = 'U' or 'u', K specifies the number of
+*           super-diagonals of the matrix A.
+*           On entry with UPLO = 'L' or 'l', K specifies the number of
+*           sub-diagonals of the matrix A.
+*           K must satisfy  0 .le. K.
+*           Unchanged on exit.
+*
+*  A      - REAL             array of DIMENSION ( LDA, n ).
+*           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 )
+*           by n part of the array A must contain the upper triangular
+*           band part of the matrix of coefficients, supplied column by
+*           column, with the leading diagonal of the matrix in row
+*           ( k + 1 ) of the array, the first super-diagonal starting at
+*           position 2 in row k, and so on. The top left k by k triangle
+*           of the array A is not referenced.
+*           The following program segment will transfer an upper
+*           triangular band matrix from conventional full matrix storage
+*           to band storage:
+*
+*                 DO 20, J = 1, N
+*                    M = K + 1 - J
+*                    DO 10, I = MAX( 1, J - K ), J
+*                       A( M + I, J ) = matrix( I, J )
+*              10    CONTINUE
+*              20 CONTINUE
+*
+*           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 )
+*           by n part of the array A must contain the lower triangular
+*           band part of the matrix of coefficients, supplied column by
+*           column, with the leading diagonal of the matrix in row 1 of
+*           the array, the first sub-diagonal starting at position 1 in
+*           row 2, and so on. The bottom right k by k triangle of the
+*           array A is not referenced.
+*           The following program segment will transfer a lower
+*           triangular band matrix from conventional full matrix storage
+*           to band storage:
+*
+*                 DO 20, J = 1, N
+*                    M = 1 - J
+*                    DO 10, I = J, MIN( N, J + K )
+*                       A( M + I, J ) = matrix( I, J )
+*              10    CONTINUE
+*              20 CONTINUE
+*
+*           Note that when DIAG = 'U' or 'u' the elements of the array A
+*           corresponding to the diagonal elements of the matrix are not
+*           referenced, but are assumed to be unity.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           ( k + 1 ).
+*           Unchanged on exit.
+*
+*  X      - REAL             array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element right-hand side vector b. On exit, X is overwritten
+*           with the solution vector x.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      REAL               ZERO
+      PARAMETER        ( ZERO = 0.0E+0 )
+*     .. Local Scalars ..
+      REAL               TEMP
+      INTEGER            I, INFO, IX, J, JX, KPLUS1, KX, L
+      LOGICAL            NOUNIT
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO , 'U' ).AND.
+     $         .NOT.LSAME( UPLO , 'L' )      )THEN
+         INFO = 1
+      ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND.
+     $         .NOT.LSAME( TRANS, 'T' ).AND.
+     $         .NOT.LSAME( TRANS, 'C' )      )THEN
+         INFO = 2
+      ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND.
+     $         .NOT.LSAME( DIAG , 'N' )      )THEN
+         INFO = 3
+      ELSE IF( N.LT.0 )THEN
+         INFO = 4
+      ELSE IF( K.LT.0 )THEN
+         INFO = 5
+      ELSE IF( LDA.LT.( K + 1 ) )THEN
+         INFO = 7
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 9
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'STBSV ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      NOUNIT = LSAME( DIAG, 'N' )
+*
+*     Set up the start point in X if the increment is not unity. This
+*     will be  ( N - 1 )*INCX  too small for descending loops.
+*
+      IF( INCX.LE.0 )THEN
+         KX = 1 - ( N - 1 )*INCX
+      ELSE IF( INCX.NE.1 )THEN
+         KX = 1
+      END IF
+*
+*     Start the operations. In this version the elements of A are
+*     accessed by sequentially with one pass through A.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+*
+*        Form  x := inv( A )*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            KPLUS1 = K + 1
+            IF( INCX.EQ.1 )THEN
+               DO 20, J = N, 1, -1
+                  IF( X( J ).NE.ZERO )THEN
+                     L = KPLUS1 - J
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )/A( KPLUS1, J )
+                     TEMP = X( J )
+                     DO 10, I = J - 1, MAX( 1, J - K ), -1
+                        X( I ) = X( I ) - TEMP*A( L + I, J )
+   10                CONTINUE
+                  END IF
+   20          CONTINUE
+            ELSE
+               KX = KX + ( N - 1 )*INCX
+               JX = KX
+               DO 40, J = N, 1, -1
+                  KX = KX - INCX
+                  IF( X( JX ).NE.ZERO )THEN
+                     IX = KX
+                     L  = KPLUS1 - J
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )/A( KPLUS1, J )
+                     TEMP = X( JX )
+                     DO 30, I = J - 1, MAX( 1, J - K ), -1
+                        X( IX ) = X( IX ) - TEMP*A( L + I, J )
+                        IX      = IX      - INCX
+   30                CONTINUE
+                  END IF
+                  JX = JX - INCX
+   40          CONTINUE
+            END IF
+         ELSE
+            IF( INCX.EQ.1 )THEN
+               DO 60, J = 1, N
+                  IF( X( J ).NE.ZERO )THEN
+                     L = 1 - J
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )/A( 1, J )
+                     TEMP = X( J )
+                     DO 50, I = J + 1, MIN( N, J + K )
+                        X( I ) = X( I ) - TEMP*A( L + I, J )
+   50                CONTINUE
+                  END IF
+   60          CONTINUE
+            ELSE
+               JX = KX
+               DO 80, J = 1, N
+                  KX = KX + INCX
+                  IF( X( JX ).NE.ZERO )THEN
+                     IX = KX
+                     L  = 1  - J
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )/A( 1, J )
+                     TEMP = X( JX )
+                     DO 70, I = J + 1, MIN( N, J + K )
+                        X( IX ) = X( IX ) - TEMP*A( L + I, J )
+                        IX      = IX      + INCX
+   70                CONTINUE
+                  END IF
+                  JX = JX + INCX
+   80          CONTINUE
+            END IF
+         END IF
+      ELSE
+*
+*        Form  x := inv( A')*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            KPLUS1 = K + 1
+            IF( INCX.EQ.1 )THEN
+               DO 100, J = 1, N
+                  TEMP = X( J )
+                  L    = KPLUS1 - J
+                  DO 90, I = MAX( 1, J - K ), J - 1
+                     TEMP = TEMP - A( L + I, J )*X( I )
+   90             CONTINUE
+                  IF( NOUNIT )
+     $               TEMP = TEMP/A( KPLUS1, J )
+                  X( J ) = TEMP
+  100          CONTINUE
+            ELSE
+               JX = KX
+               DO 120, J = 1, N
+                  TEMP = X( JX )
+                  IX   = KX
+                  L    = KPLUS1  - J
+                  DO 110, I = MAX( 1, J - K ), J - 1
+                     TEMP = TEMP - A( L + I, J )*X( IX )
+                     IX   = IX   + INCX
+  110             CONTINUE
+                  IF( NOUNIT )
+     $               TEMP = TEMP/A( KPLUS1, J )
+                  X( JX ) = TEMP
+                  JX      = JX   + INCX
+                  IF( J.GT.K )
+     $               KX = KX + INCX
+  120          CONTINUE
+            END IF
+         ELSE
+            IF( INCX.EQ.1 )THEN
+               DO 140, J = N, 1, -1
+                  TEMP = X( J )
+                  L    = 1      - J
+                  DO 130, I = MIN( N, J + K ), J + 1, -1
+                     TEMP = TEMP - A( L + I, J )*X( I )
+  130             CONTINUE
+                  IF( NOUNIT )
+     $               TEMP = TEMP/A( 1, J )
+                  X( J ) = TEMP
+  140          CONTINUE
+            ELSE
+               KX = KX + ( N - 1 )*INCX
+               JX = KX
+               DO 160, J = N, 1, -1
+                  TEMP = X( JX )
+                  IX   = KX
+                  L    = 1       - J
+                  DO 150, I = MIN( N, J + K ), J + 1, -1
+                     TEMP = TEMP - A( L + I, J )*X( IX )
+                     IX   = IX   - INCX
+  150             CONTINUE
+                  IF( NOUNIT )
+     $               TEMP = TEMP/A( 1, J )
+                  X( JX ) = TEMP
+                  JX      = JX   - INCX
+                  IF( ( N - J ).GE.K )
+     $               KX = KX - INCX
+  160          CONTINUE
+            END IF
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of STBSV .
+*
+      END
diff --git a/costa/native/external/blas/stpmv.f b/costa/native/external/blas/stpmv.f
new file mode 100644
index 000000000..c2acf8636
--- /dev/null
+++ b/costa/native/external/blas/stpmv.f
@@ -0,0 +1,299 @@
+      SUBROUTINE STPMV ( UPLO, TRANS, DIAG, N, AP, X, INCX )
+*     .. Scalar Arguments ..
+      INTEGER            INCX, N
+      CHARACTER*1        DIAG, TRANS, UPLO
+*     .. Array Arguments ..
+      REAL               AP( * ), X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  STPMV  performs one of the matrix-vector operations
+*
+*     x := A*x,   or   x := A'*x,
+*
+*  where x is an n element vector and  A is an n by n unit, or non-unit,
+*  upper or lower triangular matrix, supplied in packed form.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the matrix is an upper or
+*           lower triangular matrix as follows:
+*
+*              UPLO = 'U' or 'u'   A is an upper triangular matrix.
+*
+*              UPLO = 'L' or 'l'   A is a lower triangular matrix.
+*
+*           Unchanged on exit.
+*
+*  TRANS  - CHARACTER*1.
+*           On entry, TRANS specifies the operation to be performed as
+*           follows:
+*
+*              TRANS = 'N' or 'n'   x := A*x.
+*
+*              TRANS = 'T' or 't'   x := A'*x.
+*
+*              TRANS = 'C' or 'c'   x := A'*x.
+*
+*           Unchanged on exit.
+*
+*  DIAG   - CHARACTER*1.
+*           On entry, DIAG specifies whether or not A is unit
+*           triangular as follows:
+*
+*              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
+*
+*              DIAG = 'N' or 'n'   A is not assumed to be unit
+*                                  triangular.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  AP     - REAL             array of DIMENSION at least
+*           ( ( n*( n + 1 ) )/2 ).
+*           Before entry with  UPLO = 'U' or 'u', the array AP must
+*           contain the upper triangular matrix packed sequentially,
+*           column by column, so that AP( 1 ) contains a( 1, 1 ),
+*           AP( 2 ) and AP( 3 ) contain a( 1, 2 ) and a( 2, 2 )
+*           respectively, and so on.
+*           Before entry with UPLO = 'L' or 'l', the array AP must
+*           contain the lower triangular matrix packed sequentially,
+*           column by column, so that AP( 1 ) contains a( 1, 1 ),
+*           AP( 2 ) and AP( 3 ) contain a( 2, 1 ) and a( 3, 1 )
+*           respectively, and so on.
+*           Note that when  DIAG = 'U' or 'u', the diagonal elements of
+*           A are not referenced, but are assumed to be unity.
+*           Unchanged on exit.
+*
+*  X      - REAL             array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element vector x. On exit, X is overwritten with the
+*           tranformed vector x.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      REAL               ZERO
+      PARAMETER        ( ZERO = 0.0E+0 )
+*     .. Local Scalars ..
+      REAL               TEMP
+      INTEGER            I, INFO, IX, J, JX, K, KK, KX
+      LOGICAL            NOUNIT
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO , 'U' ).AND.
+     $         .NOT.LSAME( UPLO , 'L' )      )THEN
+         INFO = 1
+      ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND.
+     $         .NOT.LSAME( TRANS, 'T' ).AND.
+     $         .NOT.LSAME( TRANS, 'C' )      )THEN
+         INFO = 2
+      ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND.
+     $         .NOT.LSAME( DIAG , 'N' )      )THEN
+         INFO = 3
+      ELSE IF( N.LT.0 )THEN
+         INFO = 4
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 7
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'STPMV ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      NOUNIT = LSAME( DIAG, 'N' )
+*
+*     Set up the start point in X if the increment is not unity. This
+*     will be  ( N - 1 )*INCX  too small for descending loops.
+*
+      IF( INCX.LE.0 )THEN
+         KX = 1 - ( N - 1 )*INCX
+      ELSE IF( INCX.NE.1 )THEN
+         KX = 1
+      END IF
+*
+*     Start the operations. In this version the elements of AP are
+*     accessed sequentially with one pass through AP.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+*
+*        Form  x:= A*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            KK =1
+            IF( INCX.EQ.1 )THEN
+               DO 20, J = 1, N
+                  IF( X( J ).NE.ZERO )THEN
+                     TEMP = X( J )
+                     K    = KK
+                     DO 10, I = 1, J - 1
+                        X( I ) = X( I ) + TEMP*AP( K )
+                        K      = K      + 1
+   10                CONTINUE
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )*AP( KK + J - 1 )
+                  END IF
+                  KK = KK + J
+   20          CONTINUE
+            ELSE
+               JX = KX
+               DO 40, J = 1, N
+                  IF( X( JX ).NE.ZERO )THEN
+                     TEMP = X( JX )
+                     IX   = KX
+                     DO 30, K = KK, KK + J - 2
+                        X( IX ) = X( IX ) + TEMP*AP( K )
+                        IX      = IX      + INCX
+   30                CONTINUE
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )*AP( KK + J - 1 )
+                  END IF
+                  JX = JX + INCX
+                  KK = KK + J
+   40          CONTINUE
+            END IF
+         ELSE
+            KK = ( N*( N + 1 ) )/2
+            IF( INCX.EQ.1 )THEN
+               DO 60, J = N, 1, -1
+                  IF( X( J ).NE.ZERO )THEN
+                     TEMP = X( J )
+                     K    = KK
+                     DO 50, I = N, J + 1, -1
+                        X( I ) = X( I ) + TEMP*AP( K )
+                        K      = K      - 1
+   50                CONTINUE
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )*AP( KK - N + J )
+                  END IF
+                  KK = KK - ( N - J + 1 )
+   60          CONTINUE
+            ELSE
+               KX = KX + ( N - 1 )*INCX
+               JX = KX
+               DO 80, J = N, 1, -1
+                  IF( X( JX ).NE.ZERO )THEN
+                     TEMP = X( JX )
+                     IX   = KX
+                     DO 70, K = KK, KK - ( N - ( J + 1 ) ), -1
+                        X( IX ) = X( IX ) + TEMP*AP( K )
+                        IX      = IX      - INCX
+   70                CONTINUE
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )*AP( KK - N + J )
+                  END IF
+                  JX = JX - INCX
+                  KK = KK - ( N - J + 1 )
+   80          CONTINUE
+            END IF
+         END IF
+      ELSE
+*
+*        Form  x := A'*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            KK = ( N*( N + 1 ) )/2
+            IF( INCX.EQ.1 )THEN
+               DO 100, J = N, 1, -1
+                  TEMP = X( J )
+                  IF( NOUNIT )
+     $               TEMP = TEMP*AP( KK )
+                  K = KK - 1
+                  DO 90, I = J - 1, 1, -1
+                     TEMP = TEMP + AP( K )*X( I )
+                     K    = K    - 1
+   90             CONTINUE
+                  X( J ) = TEMP
+                  KK     = KK   - J
+  100          CONTINUE
+            ELSE
+               JX = KX + ( N - 1 )*INCX
+               DO 120, J = N, 1, -1
+                  TEMP = X( JX )
+                  IX   = JX
+                  IF( NOUNIT )
+     $               TEMP = TEMP*AP( KK )
+                  DO 110, K = KK - 1, KK - J + 1, -1
+                     IX   = IX   - INCX
+                     TEMP = TEMP + AP( K )*X( IX )
+  110             CONTINUE
+                  X( JX ) = TEMP
+                  JX      = JX   - INCX
+                  KK      = KK   - J
+  120          CONTINUE
+            END IF
+         ELSE
+            KK = 1
+            IF( INCX.EQ.1 )THEN
+               DO 140, J = 1, N
+                  TEMP = X( J )
+                  IF( NOUNIT )
+     $               TEMP = TEMP*AP( KK )
+                  K = KK + 1
+                  DO 130, I = J + 1, N
+                     TEMP = TEMP + AP( K )*X( I )
+                     K    = K    + 1
+  130             CONTINUE
+                  X( J ) = TEMP
+                  KK     = KK   + ( N - J + 1 )
+  140          CONTINUE
+            ELSE
+               JX = KX
+               DO 160, J = 1, N
+                  TEMP = X( JX )
+                  IX   = JX
+                  IF( NOUNIT )
+     $               TEMP = TEMP*AP( KK )
+                  DO 150, K = KK + 1, KK + N - J
+                     IX   = IX   + INCX
+                     TEMP = TEMP + AP( K )*X( IX )
+  150             CONTINUE
+                  X( JX ) = TEMP
+                  JX      = JX   + INCX
+                  KK      = KK   + ( N - J + 1 )
+  160          CONTINUE
+            END IF
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of STPMV .
+*
+      END
diff --git a/costa/native/external/blas/stpsv.f b/costa/native/external/blas/stpsv.f
new file mode 100644
index 000000000..f22d6c65a
--- /dev/null
+++ b/costa/native/external/blas/stpsv.f
@@ -0,0 +1,302 @@
+      SUBROUTINE STPSV ( UPLO, TRANS, DIAG, N, AP, X, INCX )
+*     .. Scalar Arguments ..
+      INTEGER            INCX, N
+      CHARACTER*1        DIAG, TRANS, UPLO
+*     .. Array Arguments ..
+      REAL               AP( * ), X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  STPSV  solves one of the systems of equations
+*
+*     A*x = b,   or   A'*x = b,
+*
+*  where b and x are n element vectors and A is an n by n unit, or
+*  non-unit, upper or lower triangular matrix, supplied in packed form.
+*
+*  No test for singularity or near-singularity is included in this
+*  routine. Such tests must be performed before calling this routine.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the matrix is an upper or
+*           lower triangular matrix as follows:
+*
+*              UPLO = 'U' or 'u'   A is an upper triangular matrix.
+*
+*              UPLO = 'L' or 'l'   A is a lower triangular matrix.
+*
+*           Unchanged on exit.
+*
+*  TRANS  - CHARACTER*1.
+*           On entry, TRANS specifies the equations to be solved as
+*           follows:
+*
+*              TRANS = 'N' or 'n'   A*x = b.
+*
+*              TRANS = 'T' or 't'   A'*x = b.
+*
+*              TRANS = 'C' or 'c'   A'*x = b.
+*
+*           Unchanged on exit.
+*
+*  DIAG   - CHARACTER*1.
+*           On entry, DIAG specifies whether or not A is unit
+*           triangular as follows:
+*
+*              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
+*
+*              DIAG = 'N' or 'n'   A is not assumed to be unit
+*                                  triangular.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  AP     - REAL             array of DIMENSION at least
+*           ( ( n*( n + 1 ) )/2 ).
+*           Before entry with  UPLO = 'U' or 'u', the array AP must
+*           contain the upper triangular matrix packed sequentially,
+*           column by column, so that AP( 1 ) contains a( 1, 1 ),
+*           AP( 2 ) and AP( 3 ) contain a( 1, 2 ) and a( 2, 2 )
+*           respectively, and so on.
+*           Before entry with UPLO = 'L' or 'l', the array AP must
+*           contain the lower triangular matrix packed sequentially,
+*           column by column, so that AP( 1 ) contains a( 1, 1 ),
+*           AP( 2 ) and AP( 3 ) contain a( 2, 1 ) and a( 3, 1 )
+*           respectively, and so on.
+*           Note that when  DIAG = 'U' or 'u', the diagonal elements of
+*           A are not referenced, but are assumed to be unity.
+*           Unchanged on exit.
+*
+*  X      - REAL             array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element right-hand side vector b. On exit, X is overwritten
+*           with the solution vector x.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      REAL               ZERO
+      PARAMETER        ( ZERO = 0.0E+0 )
+*     .. Local Scalars ..
+      REAL               TEMP
+      INTEGER            I, INFO, IX, J, JX, K, KK, KX
+      LOGICAL            NOUNIT
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO , 'U' ).AND.
+     $         .NOT.LSAME( UPLO , 'L' )      )THEN
+         INFO = 1
+      ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND.
+     $         .NOT.LSAME( TRANS, 'T' ).AND.
+     $         .NOT.LSAME( TRANS, 'C' )      )THEN
+         INFO = 2
+      ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND.
+     $         .NOT.LSAME( DIAG , 'N' )      )THEN
+         INFO = 3
+      ELSE IF( N.LT.0 )THEN
+         INFO = 4
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 7
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'STPSV ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      NOUNIT = LSAME( DIAG, 'N' )
+*
+*     Set up the start point in X if the increment is not unity. This
+*     will be  ( N - 1 )*INCX  too small for descending loops.
+*
+      IF( INCX.LE.0 )THEN
+         KX = 1 - ( N - 1 )*INCX
+      ELSE IF( INCX.NE.1 )THEN
+         KX = 1
+      END IF
+*
+*     Start the operations. In this version the elements of AP are
+*     accessed sequentially with one pass through AP.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+*
+*        Form  x := inv( A )*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            KK = ( N*( N + 1 ) )/2
+            IF( INCX.EQ.1 )THEN
+               DO 20, J = N, 1, -1
+                  IF( X( J ).NE.ZERO )THEN
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )/AP( KK )
+                     TEMP = X( J )
+                     K    = KK     - 1
+                     DO 10, I = J - 1, 1, -1
+                        X( I ) = X( I ) - TEMP*AP( K )
+                        K      = K      - 1
+   10                CONTINUE
+                  END IF
+                  KK = KK - J
+   20          CONTINUE
+            ELSE
+               JX = KX + ( N - 1 )*INCX
+               DO 40, J = N, 1, -1
+                  IF( X( JX ).NE.ZERO )THEN
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )/AP( KK )
+                     TEMP = X( JX )
+                     IX   = JX
+                     DO 30, K = KK - 1, KK - J + 1, -1
+                        IX      = IX      - INCX
+                        X( IX ) = X( IX ) - TEMP*AP( K )
+   30                CONTINUE
+                  END IF
+                  JX = JX - INCX
+                  KK = KK - J
+   40          CONTINUE
+            END IF
+         ELSE
+            KK = 1
+            IF( INCX.EQ.1 )THEN
+               DO 60, J = 1, N
+                  IF( X( J ).NE.ZERO )THEN
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )/AP( KK )
+                     TEMP = X( J )
+                     K    = KK     + 1
+                     DO 50, I = J + 1, N
+                        X( I ) = X( I ) - TEMP*AP( K )
+                        K      = K      + 1
+   50                CONTINUE
+                  END IF
+                  KK = KK + ( N - J + 1 )
+   60          CONTINUE
+            ELSE
+               JX = KX
+               DO 80, J = 1, N
+                  IF( X( JX ).NE.ZERO )THEN
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )/AP( KK )
+                     TEMP = X( JX )
+                     IX   = JX
+                     DO 70, K = KK + 1, KK + N - J
+                        IX      = IX      + INCX
+                        X( IX ) = X( IX ) - TEMP*AP( K )
+   70                CONTINUE
+                  END IF
+                  JX = JX + INCX
+                  KK = KK + ( N - J + 1 )
+   80          CONTINUE
+            END IF
+         END IF
+      ELSE
+*
+*        Form  x := inv( A' )*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            KK = 1
+            IF( INCX.EQ.1 )THEN
+               DO 100, J = 1, N
+                  TEMP = X( J )
+                  K    = KK
+                  DO 90, I = 1, J - 1
+                     TEMP = TEMP - AP( K )*X( I )
+                     K    = K    + 1
+   90             CONTINUE
+                  IF( NOUNIT )
+     $               TEMP = TEMP/AP( KK + J - 1 )
+                  X( J ) = TEMP
+                  KK     = KK   + J
+  100          CONTINUE
+            ELSE
+               JX = KX
+               DO 120, J = 1, N
+                  TEMP = X( JX )
+                  IX   = KX
+                  DO 110, K = KK, KK + J - 2
+                     TEMP = TEMP - AP( K )*X( IX )
+                     IX   = IX   + INCX
+  110             CONTINUE
+                  IF( NOUNIT )
+     $               TEMP = TEMP/AP( KK + J - 1 )
+                  X( JX ) = TEMP
+                  JX      = JX   + INCX
+                  KK      = KK   + J
+  120          CONTINUE
+            END IF
+         ELSE
+            KK = ( N*( N + 1 ) )/2
+            IF( INCX.EQ.1 )THEN
+               DO 140, J = N, 1, -1
+                  TEMP = X( J )
+                  K = KK
+                  DO 130, I = N, J + 1, -1
+                     TEMP = TEMP - AP( K )*X( I )
+                     K    = K    - 1
+  130             CONTINUE
+                  IF( NOUNIT )
+     $               TEMP = TEMP/AP( KK - N + J )
+                  X( J ) = TEMP
+                  KK     = KK   - ( N - J + 1 )
+  140          CONTINUE
+            ELSE
+               KX = KX + ( N - 1 )*INCX
+               JX = KX
+               DO 160, J = N, 1, -1
+                  TEMP = X( JX )
+                  IX   = KX
+                  DO 150, K = KK, KK - ( N - ( J + 1 ) ), -1
+                     TEMP = TEMP - AP( K )*X( IX )
+                     IX   = IX   - INCX
+  150             CONTINUE
+                  IF( NOUNIT )
+     $               TEMP = TEMP/AP( KK - N + J )
+                  X( JX ) = TEMP
+                  JX      = JX   - INCX
+                  KK      = KK   - (N - J + 1 )
+  160          CONTINUE
+            END IF
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of STPSV .
+*
+      END
diff --git a/costa/native/external/blas/strmm.f b/costa/native/external/blas/strmm.f
new file mode 100644
index 000000000..b1556c634
--- /dev/null
+++ b/costa/native/external/blas/strmm.f
@@ -0,0 +1,355 @@
+      SUBROUTINE STRMM ( SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, A, LDA,
+     $                   B, LDB )
+*     .. Scalar Arguments ..
+      CHARACTER*1        SIDE, UPLO, TRANSA, DIAG
+      INTEGER            M, N, LDA, LDB
+      REAL               ALPHA
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  STRMM  performs one of the matrix-matrix operations
+*
+*     B := alpha*op( A )*B,   or   B := alpha*B*op( A ),
+*
+*  where  alpha  is a scalar,  B  is an m by n matrix,  A  is a unit, or
+*  non-unit,  upper or lower triangular matrix  and  op( A )  is one  of
+*
+*     op( A ) = A   or   op( A ) = A'.
+*
+*  Parameters
+*  ==========
+*
+*  SIDE   - CHARACTER*1.
+*           On entry,  SIDE specifies whether  op( A ) multiplies B from
+*           the left or right as follows:
+*
+*              SIDE = 'L' or 'l'   B := alpha*op( A )*B.
+*
+*              SIDE = 'R' or 'r'   B := alpha*B*op( A ).
+*
+*           Unchanged on exit.
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the matrix A is an upper or
+*           lower triangular matrix as follows:
+*
+*              UPLO = 'U' or 'u'   A is an upper triangular matrix.
+*
+*              UPLO = 'L' or 'l'   A is a lower triangular matrix.
+*
+*           Unchanged on exit.
+*
+*  TRANSA - CHARACTER*1.
+*           On entry, TRANSA specifies the form of op( A ) to be used in
+*           the matrix multiplication as follows:
+*
+*              TRANSA = 'N' or 'n'   op( A ) = A.
+*
+*              TRANSA = 'T' or 't'   op( A ) = A'.
+*
+*              TRANSA = 'C' or 'c'   op( A ) = A'.
+*
+*           Unchanged on exit.
+*
+*  DIAG   - CHARACTER*1.
+*           On entry, DIAG specifies whether or not A is unit triangular
+*           as follows:
+*
+*              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
+*
+*              DIAG = 'N' or 'n'   A is not assumed to be unit
+*                                  triangular.
+*
+*           Unchanged on exit.
+*
+*  M      - INTEGER.
+*           On entry, M specifies the number of rows of B. M must be at
+*           least zero.
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the number of columns of B.  N must be
+*           at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - REAL            .
+*           On entry,  ALPHA specifies the scalar  alpha. When  alpha is
+*           zero then  A is not referenced and  B need not be set before
+*           entry.
+*           Unchanged on exit.
+*
+*  A      - REAL             array of DIMENSION ( LDA, k ), where k is m
+*           when  SIDE = 'L' or 'l'  and is  n  when  SIDE = 'R' or 'r'.
+*           Before entry  with  UPLO = 'U' or 'u',  the  leading  k by k
+*           upper triangular part of the array  A must contain the upper
+*           triangular matrix  and the strictly lower triangular part of
+*           A is not referenced.
+*           Before entry  with  UPLO = 'L' or 'l',  the  leading  k by k
+*           lower triangular part of the array  A must contain the lower
+*           triangular matrix  and the strictly upper triangular part of
+*           A is not referenced.
+*           Note that when  DIAG = 'U' or 'u',  the diagonal elements of
+*           A  are not referenced either,  but are assumed to be  unity.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program.  When  SIDE = 'L' or 'l'  then
+*           LDA  must be at least  max( 1, m ),  when  SIDE = 'R' or 'r'
+*           then LDA must be at least max( 1, n ).
+*           Unchanged on exit.
+*
+*  B      - REAL             array of DIMENSION ( LDB, n ).
+*           Before entry,  the leading  m by n part of the array  B must
+*           contain the matrix  B,  and  on exit  is overwritten  by the
+*           transformed matrix.
+*
+*  LDB    - INTEGER.
+*           On entry, LDB specifies the first dimension of B as declared
+*           in  the  calling  (sub)  program.   LDB  must  be  at  least
+*           max( 1, m ).
+*           Unchanged on exit.
+*
+*
+*  Level 3 Blas routine.
+*
+*  -- Written on 8-February-1989.
+*     Jack Dongarra, Argonne National Laboratory.
+*     Iain Duff, AERE Harwell.
+*     Jeremy Du Croz, Numerical Algorithms Group Ltd.
+*     Sven Hammarling, Numerical Algorithms Group Ltd.
+*
+*
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     .. Local Scalars ..
+      LOGICAL            LSIDE, NOUNIT, UPPER
+      INTEGER            I, INFO, J, K, NROWA
+      REAL               TEMP
+*     .. Parameters ..
+      REAL               ONE         , ZERO
+      PARAMETER        ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      LSIDE  = LSAME( SIDE  , 'L' )
+      IF( LSIDE )THEN
+         NROWA = M
+      ELSE
+         NROWA = N
+      END IF
+      NOUNIT = LSAME( DIAG  , 'N' )
+      UPPER  = LSAME( UPLO  , 'U' )
+*
+      INFO   = 0
+      IF(      ( .NOT.LSIDE                ).AND.
+     $         ( .NOT.LSAME( SIDE  , 'R' ) )      )THEN
+         INFO = 1
+      ELSE IF( ( .NOT.UPPER                ).AND.
+     $         ( .NOT.LSAME( UPLO  , 'L' ) )      )THEN
+         INFO = 2
+      ELSE IF( ( .NOT.LSAME( TRANSA, 'N' ) ).AND.
+     $         ( .NOT.LSAME( TRANSA, 'T' ) ).AND.
+     $         ( .NOT.LSAME( TRANSA, 'C' ) )      )THEN
+         INFO = 3
+      ELSE IF( ( .NOT.LSAME( DIAG  , 'U' ) ).AND.
+     $         ( .NOT.LSAME( DIAG  , 'N' ) )      )THEN
+         INFO = 4
+      ELSE IF( M  .LT.0               )THEN
+         INFO = 5
+      ELSE IF( N  .LT.0               )THEN
+         INFO = 6
+      ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN
+         INFO = 9
+      ELSE IF( LDB.LT.MAX( 1, M     ) )THEN
+         INFO = 11
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'STRMM ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     And when  alpha.eq.zero.
+*
+      IF( ALPHA.EQ.ZERO )THEN
+         DO 20, J = 1, N
+            DO 10, I = 1, M
+               B( I, J ) = ZERO
+   10       CONTINUE
+   20    CONTINUE
+         RETURN
+      END IF
+*
+*     Start the operations.
+*
+      IF( LSIDE )THEN
+         IF( LSAME( TRANSA, 'N' ) )THEN
+*
+*           Form  B := alpha*A*B.
+*
+            IF( UPPER )THEN
+               DO 50, J = 1, N
+                  DO 40, K = 1, M
+                     IF( B( K, J ).NE.ZERO )THEN
+                        TEMP = ALPHA*B( K, J )
+                        DO 30, I = 1, K - 1
+                           B( I, J ) = B( I, J ) + TEMP*A( I, K )
+   30                   CONTINUE
+                        IF( NOUNIT )
+     $                     TEMP = TEMP*A( K, K )
+                        B( K, J ) = TEMP
+                     END IF
+   40             CONTINUE
+   50          CONTINUE
+            ELSE
+               DO 80, J = 1, N
+                  DO 70 K = M, 1, -1
+                     IF( B( K, J ).NE.ZERO )THEN
+                        TEMP      = ALPHA*B( K, J )
+                        B( K, J ) = TEMP
+                        IF( NOUNIT )
+     $                     B( K, J ) = B( K, J )*A( K, K )
+                        DO 60, I = K + 1, M
+                           B( I, J ) = B( I, J ) + TEMP*A( I, K )
+   60                   CONTINUE
+                     END IF
+   70             CONTINUE
+   80          CONTINUE
+            END IF
+         ELSE
+*
+*           Form  B := alpha*A'*B.
+*
+            IF( UPPER )THEN
+               DO 110, J = 1, N
+                  DO 100, I = M, 1, -1
+                     TEMP = B( I, J )
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*A( I, I )
+                     DO 90, K = 1, I - 1
+                        TEMP = TEMP + A( K, I )*B( K, J )
+   90                CONTINUE
+                     B( I, J ) = ALPHA*TEMP
+  100             CONTINUE
+  110          CONTINUE
+            ELSE
+               DO 140, J = 1, N
+                  DO 130, I = 1, M
+                     TEMP = B( I, J )
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*A( I, I )
+                     DO 120, K = I + 1, M
+                        TEMP = TEMP + A( K, I )*B( K, J )
+  120                CONTINUE
+                     B( I, J ) = ALPHA*TEMP
+  130             CONTINUE
+  140          CONTINUE
+            END IF
+         END IF
+      ELSE
+         IF( LSAME( TRANSA, 'N' ) )THEN
+*
+*           Form  B := alpha*B*A.
+*
+            IF( UPPER )THEN
+               DO 180, J = N, 1, -1
+                  TEMP = ALPHA
+                  IF( NOUNIT )
+     $               TEMP = TEMP*A( J, J )
+                  DO 150, I = 1, M
+                     B( I, J ) = TEMP*B( I, J )
+  150             CONTINUE
+                  DO 170, K = 1, J - 1
+                     IF( A( K, J ).NE.ZERO )THEN
+                        TEMP = ALPHA*A( K, J )
+                        DO 160, I = 1, M
+                           B( I, J ) = B( I, J ) + TEMP*B( I, K )
+  160                   CONTINUE
+                     END IF
+  170             CONTINUE
+  180          CONTINUE
+            ELSE
+               DO 220, J = 1, N
+                  TEMP = ALPHA
+                  IF( NOUNIT )
+     $               TEMP = TEMP*A( J, J )
+                  DO 190, I = 1, M
+                     B( I, J ) = TEMP*B( I, J )
+  190             CONTINUE
+                  DO 210, K = J + 1, N
+                     IF( A( K, J ).NE.ZERO )THEN
+                        TEMP = ALPHA*A( K, J )
+                        DO 200, I = 1, M
+                           B( I, J ) = B( I, J ) + TEMP*B( I, K )
+  200                   CONTINUE
+                     END IF
+  210             CONTINUE
+  220          CONTINUE
+            END IF
+         ELSE
+*
+*           Form  B := alpha*B*A'.
+*
+            IF( UPPER )THEN
+               DO 260, K = 1, N
+                  DO 240, J = 1, K - 1
+                     IF( A( J, K ).NE.ZERO )THEN
+                        TEMP = ALPHA*A( J, K )
+                        DO 230, I = 1, M
+                           B( I, J ) = B( I, J ) + TEMP*B( I, K )
+  230                   CONTINUE
+                     END IF
+  240             CONTINUE
+                  TEMP = ALPHA
+                  IF( NOUNIT )
+     $               TEMP = TEMP*A( K, K )
+                  IF( TEMP.NE.ONE )THEN
+                     DO 250, I = 1, M
+                        B( I, K ) = TEMP*B( I, K )
+  250                CONTINUE
+                  END IF
+  260          CONTINUE
+            ELSE
+               DO 300, K = N, 1, -1
+                  DO 280, J = K + 1, N
+                     IF( A( J, K ).NE.ZERO )THEN
+                        TEMP = ALPHA*A( J, K )
+                        DO 270, I = 1, M
+                           B( I, J ) = B( I, J ) + TEMP*B( I, K )
+  270                   CONTINUE
+                     END IF
+  280             CONTINUE
+                  TEMP = ALPHA
+                  IF( NOUNIT )
+     $               TEMP = TEMP*A( K, K )
+                  IF( TEMP.NE.ONE )THEN
+                     DO 290, I = 1, M
+                        B( I, K ) = TEMP*B( I, K )
+  290                CONTINUE
+                  END IF
+  300          CONTINUE
+            END IF
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of STRMM .
+*
+      END
diff --git a/costa/native/external/blas/strmv.f b/costa/native/external/blas/strmv.f
new file mode 100644
index 000000000..f2f7d75dd
--- /dev/null
+++ b/costa/native/external/blas/strmv.f
@@ -0,0 +1,286 @@
+      SUBROUTINE STRMV ( UPLO, TRANS, DIAG, N, A, LDA, X, INCX )
+*     .. Scalar Arguments ..
+      INTEGER            INCX, LDA, N
+      CHARACTER*1        DIAG, TRANS, UPLO
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  STRMV  performs one of the matrix-vector operations
+*
+*     x := A*x,   or   x := A'*x,
+*
+*  where x is an n element vector and  A is an n by n unit, or non-unit,
+*  upper or lower triangular matrix.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the matrix is an upper or
+*           lower triangular matrix as follows:
+*
+*              UPLO = 'U' or 'u'   A is an upper triangular matrix.
+*
+*              UPLO = 'L' or 'l'   A is a lower triangular matrix.
+*
+*           Unchanged on exit.
+*
+*  TRANS  - CHARACTER*1.
+*           On entry, TRANS specifies the operation to be performed as
+*           follows:
+*
+*              TRANS = 'N' or 'n'   x := A*x.
+*
+*              TRANS = 'T' or 't'   x := A'*x.
+*
+*              TRANS = 'C' or 'c'   x := A'*x.
+*
+*           Unchanged on exit.
+*
+*  DIAG   - CHARACTER*1.
+*           On entry, DIAG specifies whether or not A is unit
+*           triangular as follows:
+*
+*              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
+*
+*              DIAG = 'N' or 'n'   A is not assumed to be unit
+*                                  triangular.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  A      - REAL             array of DIMENSION ( LDA, n ).
+*           Before entry with  UPLO = 'U' or 'u', the leading n by n
+*           upper triangular part of the array A must contain the upper
+*           triangular matrix and the strictly lower triangular part of
+*           A is not referenced.
+*           Before entry with UPLO = 'L' or 'l', the leading n by n
+*           lower triangular part of the array A must contain the lower
+*           triangular matrix and the strictly upper triangular part of
+*           A is not referenced.
+*           Note that when  DIAG = 'U' or 'u', the diagonal elements of
+*           A are not referenced either, but are assumed to be unity.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           max( 1, n ).
+*           Unchanged on exit.
+*
+*  X      - REAL             array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element vector x. On exit, X is overwritten with the
+*           tranformed vector x.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      REAL               ZERO
+      PARAMETER        ( ZERO = 0.0E+0 )
+*     .. Local Scalars ..
+      REAL               TEMP
+      INTEGER            I, INFO, IX, J, JX, KX
+      LOGICAL            NOUNIT
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO , 'U' ).AND.
+     $         .NOT.LSAME( UPLO , 'L' )      )THEN
+         INFO = 1
+      ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND.
+     $         .NOT.LSAME( TRANS, 'T' ).AND.
+     $         .NOT.LSAME( TRANS, 'C' )      )THEN
+         INFO = 2
+      ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND.
+     $         .NOT.LSAME( DIAG , 'N' )      )THEN
+         INFO = 3
+      ELSE IF( N.LT.0 )THEN
+         INFO = 4
+      ELSE IF( LDA.LT.MAX( 1, N ) )THEN
+         INFO = 6
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 8
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'STRMV ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      NOUNIT = LSAME( DIAG, 'N' )
+*
+*     Set up the start point in X if the increment is not unity. This
+*     will be  ( N - 1 )*INCX  too small for descending loops.
+*
+      IF( INCX.LE.0 )THEN
+         KX = 1 - ( N - 1 )*INCX
+      ELSE IF( INCX.NE.1 )THEN
+         KX = 1
+      END IF
+*
+*     Start the operations. In this version the elements of A are
+*     accessed sequentially with one pass through A.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+*
+*        Form  x := A*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            IF( INCX.EQ.1 )THEN
+               DO 20, J = 1, N
+                  IF( X( J ).NE.ZERO )THEN
+                     TEMP = X( J )
+                     DO 10, I = 1, J - 1
+                        X( I ) = X( I ) + TEMP*A( I, J )
+   10                CONTINUE
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )*A( J, J )
+                  END IF
+   20          CONTINUE
+            ELSE
+               JX = KX
+               DO 40, J = 1, N
+                  IF( X( JX ).NE.ZERO )THEN
+                     TEMP = X( JX )
+                     IX   = KX
+                     DO 30, I = 1, J - 1
+                        X( IX ) = X( IX ) + TEMP*A( I, J )
+                        IX      = IX      + INCX
+   30                CONTINUE
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )*A( J, J )
+                  END IF
+                  JX = JX + INCX
+   40          CONTINUE
+            END IF
+         ELSE
+            IF( INCX.EQ.1 )THEN
+               DO 60, J = N, 1, -1
+                  IF( X( J ).NE.ZERO )THEN
+                     TEMP = X( J )
+                     DO 50, I = N, J + 1, -1
+                        X( I ) = X( I ) + TEMP*A( I, J )
+   50                CONTINUE
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )*A( J, J )
+                  END IF
+   60          CONTINUE
+            ELSE
+               KX = KX + ( N - 1 )*INCX
+               JX = KX
+               DO 80, J = N, 1, -1
+                  IF( X( JX ).NE.ZERO )THEN
+                     TEMP = X( JX )
+                     IX   = KX
+                     DO 70, I = N, J + 1, -1
+                        X( IX ) = X( IX ) + TEMP*A( I, J )
+                        IX      = IX      - INCX
+   70                CONTINUE
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )*A( J, J )
+                  END IF
+                  JX = JX - INCX
+   80          CONTINUE
+            END IF
+         END IF
+      ELSE
+*
+*        Form  x := A'*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            IF( INCX.EQ.1 )THEN
+               DO 100, J = N, 1, -1
+                  TEMP = X( J )
+                  IF( NOUNIT )
+     $               TEMP = TEMP*A( J, J )
+                  DO 90, I = J - 1, 1, -1
+                     TEMP = TEMP + A( I, J )*X( I )
+   90             CONTINUE
+                  X( J ) = TEMP
+  100          CONTINUE
+            ELSE
+               JX = KX + ( N - 1 )*INCX
+               DO 120, J = N, 1, -1
+                  TEMP = X( JX )
+                  IX   = JX
+                  IF( NOUNIT )
+     $               TEMP = TEMP*A( J, J )
+                  DO 110, I = J - 1, 1, -1
+                     IX   = IX   - INCX
+                     TEMP = TEMP + A( I, J )*X( IX )
+  110             CONTINUE
+                  X( JX ) = TEMP
+                  JX      = JX   - INCX
+  120          CONTINUE
+            END IF
+         ELSE
+            IF( INCX.EQ.1 )THEN
+               DO 140, J = 1, N
+                  TEMP = X( J )
+                  IF( NOUNIT )
+     $               TEMP = TEMP*A( J, J )
+                  DO 130, I = J + 1, N
+                     TEMP = TEMP + A( I, J )*X( I )
+  130             CONTINUE
+                  X( J ) = TEMP
+  140          CONTINUE
+            ELSE
+               JX = KX
+               DO 160, J = 1, N
+                  TEMP = X( JX )
+                  IX   = JX
+                  IF( NOUNIT )
+     $               TEMP = TEMP*A( J, J )
+                  DO 150, I = J + 1, N
+                     IX   = IX   + INCX
+                     TEMP = TEMP + A( I, J )*X( IX )
+  150             CONTINUE
+                  X( JX ) = TEMP
+                  JX      = JX   + INCX
+  160          CONTINUE
+            END IF
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of STRMV .
+*
+      END
diff --git a/costa/native/external/blas/strsm.f b/costa/native/external/blas/strsm.f
new file mode 100644
index 000000000..1c80a7aee
--- /dev/null
+++ b/costa/native/external/blas/strsm.f
@@ -0,0 +1,378 @@
+      SUBROUTINE STRSM ( SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, A, LDA,
+     $                   B, LDB )
+*     .. Scalar Arguments ..
+      CHARACTER*1        SIDE, UPLO, TRANSA, DIAG
+      INTEGER            M, N, LDA, LDB
+      REAL               ALPHA
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  STRSM  solves one of the matrix equations
+*
+*     op( A )*X = alpha*B,   or   X*op( A ) = alpha*B,
+*
+*  where alpha is a scalar, X and B are m by n matrices, A is a unit, or
+*  non-unit,  upper or lower triangular matrix  and  op( A )  is one  of
+*
+*     op( A ) = A   or   op( A ) = A'.
+*
+*  The matrix X is overwritten on B.
+*
+*  Parameters
+*  ==========
+*
+*  SIDE   - CHARACTER*1.
+*           On entry, SIDE specifies whether op( A ) appears on the left
+*           or right of X as follows:
+*
+*              SIDE = 'L' or 'l'   op( A )*X = alpha*B.
+*
+*              SIDE = 'R' or 'r'   X*op( A ) = alpha*B.
+*
+*           Unchanged on exit.
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the matrix A is an upper or
+*           lower triangular matrix as follows:
+*
+*              UPLO = 'U' or 'u'   A is an upper triangular matrix.
+*
+*              UPLO = 'L' or 'l'   A is a lower triangular matrix.
+*
+*           Unchanged on exit.
+*
+*  TRANSA - CHARACTER*1.
+*           On entry, TRANSA specifies the form of op( A ) to be used in
+*           the matrix multiplication as follows:
+*
+*              TRANSA = 'N' or 'n'   op( A ) = A.
+*
+*              TRANSA = 'T' or 't'   op( A ) = A'.
+*
+*              TRANSA = 'C' or 'c'   op( A ) = A'.
+*
+*           Unchanged on exit.
+*
+*  DIAG   - CHARACTER*1.
+*           On entry, DIAG specifies whether or not A is unit triangular
+*           as follows:
+*
+*              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
+*
+*              DIAG = 'N' or 'n'   A is not assumed to be unit
+*                                  triangular.
+*
+*           Unchanged on exit.
+*
+*  M      - INTEGER.
+*           On entry, M specifies the number of rows of B. M must be at
+*           least zero.
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the number of columns of B.  N must be
+*           at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - REAL            .
+*           On entry,  ALPHA specifies the scalar  alpha. When  alpha is
+*           zero then  A is not referenced and  B need not be set before
+*           entry.
+*           Unchanged on exit.
+*
+*  A      - REAL             array of DIMENSION ( LDA, k ), where k is m
+*           when  SIDE = 'L' or 'l'  and is  n  when  SIDE = 'R' or 'r'.
+*           Before entry  with  UPLO = 'U' or 'u',  the  leading  k by k
+*           upper triangular part of the array  A must contain the upper
+*           triangular matrix  and the strictly lower triangular part of
+*           A is not referenced.
+*           Before entry  with  UPLO = 'L' or 'l',  the  leading  k by k
+*           lower triangular part of the array  A must contain the lower
+*           triangular matrix  and the strictly upper triangular part of
+*           A is not referenced.
+*           Note that when  DIAG = 'U' or 'u',  the diagonal elements of
+*           A  are not referenced either,  but are assumed to be  unity.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program.  When  SIDE = 'L' or 'l'  then
+*           LDA  must be at least  max( 1, m ),  when  SIDE = 'R' or 'r'
+*           then LDA must be at least max( 1, n ).
+*           Unchanged on exit.
+*
+*  B      - REAL             array of DIMENSION ( LDB, n ).
+*           Before entry,  the leading  m by n part of the array  B must
+*           contain  the  right-hand  side  matrix  B,  and  on exit  is
+*           overwritten by the solution matrix  X.
+*
+*  LDB    - INTEGER.
+*           On entry, LDB specifies the first dimension of B as declared
+*           in  the  calling  (sub)  program.   LDB  must  be  at  least
+*           max( 1, m ).
+*           Unchanged on exit.
+*
+*
+*  Level 3 Blas routine.
+*
+*
+*  -- Written on 8-February-1989.
+*     Jack Dongarra, Argonne National Laboratory.
+*     Iain Duff, AERE Harwell.
+*     Jeremy Du Croz, Numerical Algorithms Group Ltd.
+*     Sven Hammarling, Numerical Algorithms Group Ltd.
+*
+*
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     .. Local Scalars ..
+      LOGICAL            LSIDE, NOUNIT, UPPER
+      INTEGER            I, INFO, J, K, NROWA
+      REAL               TEMP
+*     .. Parameters ..
+      REAL               ONE         , ZERO
+      PARAMETER        ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      LSIDE  = LSAME( SIDE  , 'L' )
+      IF( LSIDE )THEN
+         NROWA = M
+      ELSE
+         NROWA = N
+      END IF
+      NOUNIT = LSAME( DIAG  , 'N' )
+      UPPER  = LSAME( UPLO  , 'U' )
+*
+      INFO   = 0
+      IF(      ( .NOT.LSIDE                ).AND.
+     $         ( .NOT.LSAME( SIDE  , 'R' ) )      )THEN
+         INFO = 1
+      ELSE IF( ( .NOT.UPPER                ).AND.
+     $         ( .NOT.LSAME( UPLO  , 'L' ) )      )THEN
+         INFO = 2
+      ELSE IF( ( .NOT.LSAME( TRANSA, 'N' ) ).AND.
+     $         ( .NOT.LSAME( TRANSA, 'T' ) ).AND.
+     $         ( .NOT.LSAME( TRANSA, 'C' ) )      )THEN
+         INFO = 3
+      ELSE IF( ( .NOT.LSAME( DIAG  , 'U' ) ).AND.
+     $         ( .NOT.LSAME( DIAG  , 'N' ) )      )THEN
+         INFO = 4
+      ELSE IF( M  .LT.0               )THEN
+         INFO = 5
+      ELSE IF( N  .LT.0               )THEN
+         INFO = 6
+      ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN
+         INFO = 9
+      ELSE IF( LDB.LT.MAX( 1, M     ) )THEN
+         INFO = 11
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'STRSM ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     And when  alpha.eq.zero.
+*
+      IF( ALPHA.EQ.ZERO )THEN
+         DO 20, J = 1, N
+            DO 10, I = 1, M
+               B( I, J ) = ZERO
+   10       CONTINUE
+   20    CONTINUE
+         RETURN
+      END IF
+*
+*     Start the operations.
+*
+      IF( LSIDE )THEN
+         IF( LSAME( TRANSA, 'N' ) )THEN
+*
+*           Form  B := alpha*inv( A )*B.
+*
+            IF( UPPER )THEN
+               DO 60, J = 1, N
+                  IF( ALPHA.NE.ONE )THEN
+                     DO 30, I = 1, M
+                        B( I, J ) = ALPHA*B( I, J )
+   30                CONTINUE
+                  END IF
+                  DO 50, K = M, 1, -1
+                     IF( B( K, J ).NE.ZERO )THEN
+                        IF( NOUNIT )
+     $                     B( K, J ) = B( K, J )/A( K, K )
+                        DO 40, I = 1, K - 1
+                           B( I, J ) = B( I, J ) - B( K, J )*A( I, K )
+   40                   CONTINUE
+                     END IF
+   50             CONTINUE
+   60          CONTINUE
+            ELSE
+               DO 100, J = 1, N
+                  IF( ALPHA.NE.ONE )THEN
+                     DO 70, I = 1, M
+                        B( I, J ) = ALPHA*B( I, J )
+   70                CONTINUE
+                  END IF
+                  DO 90 K = 1, M
+                     IF( B( K, J ).NE.ZERO )THEN
+                        IF( NOUNIT )
+     $                     B( K, J ) = B( K, J )/A( K, K )
+                        DO 80, I = K + 1, M
+                           B( I, J ) = B( I, J ) - B( K, J )*A( I, K )
+   80                   CONTINUE
+                     END IF
+   90             CONTINUE
+  100          CONTINUE
+            END IF
+         ELSE
+*
+*           Form  B := alpha*inv( A' )*B.
+*
+            IF( UPPER )THEN
+               DO 130, J = 1, N
+                  DO 120, I = 1, M
+                     TEMP = ALPHA*B( I, J )
+                     DO 110, K = 1, I - 1
+                        TEMP = TEMP - A( K, I )*B( K, J )
+  110                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/A( I, I )
+                     B( I, J ) = TEMP
+  120             CONTINUE
+  130          CONTINUE
+            ELSE
+               DO 160, J = 1, N
+                  DO 150, I = M, 1, -1
+                     TEMP = ALPHA*B( I, J )
+                     DO 140, K = I + 1, M
+                        TEMP = TEMP - A( K, I )*B( K, J )
+  140                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/A( I, I )
+                     B( I, J ) = TEMP
+  150             CONTINUE
+  160          CONTINUE
+            END IF
+         END IF
+      ELSE
+         IF( LSAME( TRANSA, 'N' ) )THEN
+*
+*           Form  B := alpha*B*inv( A ).
+*
+            IF( UPPER )THEN
+               DO 210, J = 1, N
+                  IF( ALPHA.NE.ONE )THEN
+                     DO 170, I = 1, M
+                        B( I, J ) = ALPHA*B( I, J )
+  170                CONTINUE
+                  END IF
+                  DO 190, K = 1, J - 1
+                     IF( A( K, J ).NE.ZERO )THEN
+                        DO 180, I = 1, M
+                           B( I, J ) = B( I, J ) - A( K, J )*B( I, K )
+  180                   CONTINUE
+                     END IF
+  190             CONTINUE
+                  IF( NOUNIT )THEN
+                     TEMP = ONE/A( J, J )
+                     DO 200, I = 1, M
+                        B( I, J ) = TEMP*B( I, J )
+  200                CONTINUE
+                  END IF
+  210          CONTINUE
+            ELSE
+               DO 260, J = N, 1, -1
+                  IF( ALPHA.NE.ONE )THEN
+                     DO 220, I = 1, M
+                        B( I, J ) = ALPHA*B( I, J )
+  220                CONTINUE
+                  END IF
+                  DO 240, K = J + 1, N
+                     IF( A( K, J ).NE.ZERO )THEN
+                        DO 230, I = 1, M
+                           B( I, J ) = B( I, J ) - A( K, J )*B( I, K )
+  230                   CONTINUE
+                     END IF
+  240             CONTINUE
+                  IF( NOUNIT )THEN
+                     TEMP = ONE/A( J, J )
+                     DO 250, I = 1, M
+                       B( I, J ) = TEMP*B( I, J )
+  250                CONTINUE
+                  END IF
+  260          CONTINUE
+            END IF
+         ELSE
+*
+*           Form  B := alpha*B*inv( A' ).
+*
+            IF( UPPER )THEN
+               DO 310, K = N, 1, -1
+                  IF( NOUNIT )THEN
+                     TEMP = ONE/A( K, K )
+                     DO 270, I = 1, M
+                        B( I, K ) = TEMP*B( I, K )
+  270                CONTINUE
+                  END IF
+                  DO 290, J = 1, K - 1
+                     IF( A( J, K ).NE.ZERO )THEN
+                        TEMP = A( J, K )
+                        DO 280, I = 1, M
+                           B( I, J ) = B( I, J ) - TEMP*B( I, K )
+  280                   CONTINUE
+                     END IF
+  290             CONTINUE
+                  IF( ALPHA.NE.ONE )THEN
+                     DO 300, I = 1, M
+                        B( I, K ) = ALPHA*B( I, K )
+  300                CONTINUE
+                  END IF
+  310          CONTINUE
+            ELSE
+               DO 360, K = 1, N
+                  IF( NOUNIT )THEN
+                     TEMP = ONE/A( K, K )
+                     DO 320, I = 1, M
+                        B( I, K ) = TEMP*B( I, K )
+  320                CONTINUE
+                  END IF
+                  DO 340, J = K + 1, N
+                     IF( A( J, K ).NE.ZERO )THEN
+                        TEMP = A( J, K )
+                        DO 330, I = 1, M
+                           B( I, J ) = B( I, J ) - TEMP*B( I, K )
+  330                   CONTINUE
+                     END IF
+  340             CONTINUE
+                  IF( ALPHA.NE.ONE )THEN
+                     DO 350, I = 1, M
+                        B( I, K ) = ALPHA*B( I, K )
+  350                CONTINUE
+                  END IF
+  360          CONTINUE
+            END IF
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of STRSM .
+*
+      END
diff --git a/costa/native/external/blas/strsv.f b/costa/native/external/blas/strsv.f
new file mode 100644
index 000000000..6fb05aafe
--- /dev/null
+++ b/costa/native/external/blas/strsv.f
@@ -0,0 +1,289 @@
+      SUBROUTINE STRSV ( UPLO, TRANS, DIAG, N, A, LDA, X, INCX )
+*     .. Scalar Arguments ..
+      INTEGER            INCX, LDA, N
+      CHARACTER*1        DIAG, TRANS, UPLO
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  STRSV  solves one of the systems of equations
+*
+*     A*x = b,   or   A'*x = b,
+*
+*  where b and x are n element vectors and A is an n by n unit, or
+*  non-unit, upper or lower triangular matrix.
+*
+*  No test for singularity or near-singularity is included in this
+*  routine. Such tests must be performed before calling this routine.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the matrix is an upper or
+*           lower triangular matrix as follows:
+*
+*              UPLO = 'U' or 'u'   A is an upper triangular matrix.
+*
+*              UPLO = 'L' or 'l'   A is a lower triangular matrix.
+*
+*           Unchanged on exit.
+*
+*  TRANS  - CHARACTER*1.
+*           On entry, TRANS specifies the equations to be solved as
+*           follows:
+*
+*              TRANS = 'N' or 'n'   A*x = b.
+*
+*              TRANS = 'T' or 't'   A'*x = b.
+*
+*              TRANS = 'C' or 'c'   A'*x = b.
+*
+*           Unchanged on exit.
+*
+*  DIAG   - CHARACTER*1.
+*           On entry, DIAG specifies whether or not A is unit
+*           triangular as follows:
+*
+*              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
+*
+*              DIAG = 'N' or 'n'   A is not assumed to be unit
+*                                  triangular.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  A      - REAL             array of DIMENSION ( LDA, n ).
+*           Before entry with  UPLO = 'U' or 'u', the leading n by n
+*           upper triangular part of the array A must contain the upper
+*           triangular matrix and the strictly lower triangular part of
+*           A is not referenced.
+*           Before entry with UPLO = 'L' or 'l', the leading n by n
+*           lower triangular part of the array A must contain the lower
+*           triangular matrix and the strictly upper triangular part of
+*           A is not referenced.
+*           Note that when  DIAG = 'U' or 'u', the diagonal elements of
+*           A are not referenced either, but are assumed to be unity.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           max( 1, n ).
+*           Unchanged on exit.
+*
+*  X      - REAL             array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element right-hand side vector b. On exit, X is overwritten
+*           with the solution vector x.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      REAL               ZERO
+      PARAMETER        ( ZERO = 0.0E+0 )
+*     .. Local Scalars ..
+      REAL               TEMP
+      INTEGER            I, INFO, IX, J, JX, KX
+      LOGICAL            NOUNIT
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO , 'U' ).AND.
+     $         .NOT.LSAME( UPLO , 'L' )      )THEN
+         INFO = 1
+      ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND.
+     $         .NOT.LSAME( TRANS, 'T' ).AND.
+     $         .NOT.LSAME( TRANS, 'C' )      )THEN
+         INFO = 2
+      ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND.
+     $         .NOT.LSAME( DIAG , 'N' )      )THEN
+         INFO = 3
+      ELSE IF( N.LT.0 )THEN
+         INFO = 4
+      ELSE IF( LDA.LT.MAX( 1, N ) )THEN
+         INFO = 6
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 8
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'STRSV ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      NOUNIT = LSAME( DIAG, 'N' )
+*
+*     Set up the start point in X if the increment is not unity. This
+*     will be  ( N - 1 )*INCX  too small for descending loops.
+*
+      IF( INCX.LE.0 )THEN
+         KX = 1 - ( N - 1 )*INCX
+      ELSE IF( INCX.NE.1 )THEN
+         KX = 1
+      END IF
+*
+*     Start the operations. In this version the elements of A are
+*     accessed sequentially with one pass through A.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+*
+*        Form  x := inv( A )*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            IF( INCX.EQ.1 )THEN
+               DO 20, J = N, 1, -1
+                  IF( X( J ).NE.ZERO )THEN
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )/A( J, J )
+                     TEMP = X( J )
+                     DO 10, I = J - 1, 1, -1
+                        X( I ) = X( I ) - TEMP*A( I, J )
+   10                CONTINUE
+                  END IF
+   20          CONTINUE
+            ELSE
+               JX = KX + ( N - 1 )*INCX
+               DO 40, J = N, 1, -1
+                  IF( X( JX ).NE.ZERO )THEN
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )/A( J, J )
+                     TEMP = X( JX )
+                     IX   = JX
+                     DO 30, I = J - 1, 1, -1
+                        IX      = IX      - INCX
+                        X( IX ) = X( IX ) - TEMP*A( I, J )
+   30                CONTINUE
+                  END IF
+                  JX = JX - INCX
+   40          CONTINUE
+            END IF
+         ELSE
+            IF( INCX.EQ.1 )THEN
+               DO 60, J = 1, N
+                  IF( X( J ).NE.ZERO )THEN
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )/A( J, J )
+                     TEMP = X( J )
+                     DO 50, I = J + 1, N
+                        X( I ) = X( I ) - TEMP*A( I, J )
+   50                CONTINUE
+                  END IF
+   60          CONTINUE
+            ELSE
+               JX = KX
+               DO 80, J = 1, N
+                  IF( X( JX ).NE.ZERO )THEN
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )/A( J, J )
+                     TEMP = X( JX )
+                     IX   = JX
+                     DO 70, I = J + 1, N
+                        IX      = IX      + INCX
+                        X( IX ) = X( IX ) - TEMP*A( I, J )
+   70                CONTINUE
+                  END IF
+                  JX = JX + INCX
+   80          CONTINUE
+            END IF
+         END IF
+      ELSE
+*
+*        Form  x := inv( A' )*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            IF( INCX.EQ.1 )THEN
+               DO 100, J = 1, N
+                  TEMP = X( J )
+                  DO 90, I = 1, J - 1
+                     TEMP = TEMP - A( I, J )*X( I )
+   90             CONTINUE
+                  IF( NOUNIT )
+     $               TEMP = TEMP/A( J, J )
+                  X( J ) = TEMP
+  100          CONTINUE
+            ELSE
+               JX = KX
+               DO 120, J = 1, N
+                  TEMP = X( JX )
+                  IX   = KX
+                  DO 110, I = 1, J - 1
+                     TEMP = TEMP - A( I, J )*X( IX )
+                     IX   = IX   + INCX
+  110             CONTINUE
+                  IF( NOUNIT )
+     $               TEMP = TEMP/A( J, J )
+                  X( JX ) = TEMP
+                  JX      = JX   + INCX
+  120          CONTINUE
+            END IF
+         ELSE
+            IF( INCX.EQ.1 )THEN
+               DO 140, J = N, 1, -1
+                  TEMP = X( J )
+                  DO 130, I = N, J + 1, -1
+                     TEMP = TEMP - A( I, J )*X( I )
+  130             CONTINUE
+                  IF( NOUNIT )
+     $               TEMP = TEMP/A( J, J )
+                  X( J ) = TEMP
+  140          CONTINUE
+            ELSE
+               KX = KX + ( N - 1 )*INCX
+               JX = KX
+               DO 160, J = N, 1, -1
+                  TEMP = X( JX )
+                  IX   = KX
+                  DO 150, I = N, J + 1, -1
+                     TEMP = TEMP - A( I, J )*X( IX )
+                     IX   = IX   - INCX
+  150             CONTINUE
+                  IF( NOUNIT )
+     $               TEMP = TEMP/A( J, J )
+                  X( JX ) = TEMP
+                  JX      = JX   - INCX
+  160          CONTINUE
+            END IF
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of STRSV .
+*
+      END
diff --git a/costa/native/external/blas/xerbla.f b/costa/native/external/blas/xerbla.f
new file mode 100644
index 000000000..18100082c
--- /dev/null
+++ b/costa/native/external/blas/xerbla.f
@@ -0,0 +1,43 @@
+      SUBROUTINE XERBLA( SRNAME, INFO )
+*
+*  -- LAPACK auxiliary routine (preliminary version) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER*6        SRNAME
+      INTEGER            INFO
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  XERBLA  is an error handler for the LAPACK routines.
+*  It is called by an LAPACK routine if an input parameter has an
+*  invalid value.  A message is printed and execution stops.
+*
+*  Installers may consider modifying the STOP statement in order to
+*  call system-specific exception-handling facilities.
+*
+*  Arguments
+*  =========
+*
+*  SRNAME  (input) CHARACTER*6
+*          The name of the routine which called XERBLA.
+*
+*  INFO    (input) INTEGER
+*          The position of the invalid parameter in the parameter list
+*          of the calling routine.
+*
+*
+      WRITE( *, FMT = 9999 )SRNAME, INFO
+*
+      STOP
+*
+ 9999 FORMAT( ' ** On entry to ', A6, ' parameter number ', I2, ' had ',
+     $      'an illegal value' )
+*
+*     End of XERBLA
+*
+      END
diff --git a/costa/native/external/blas/zaxpy.f b/costa/native/external/blas/zaxpy.f
new file mode 100644
index 000000000..4fa3b1e43
--- /dev/null
+++ b/costa/native/external/blas/zaxpy.f
@@ -0,0 +1,34 @@
+      subroutine zaxpy(n,za,zx,incx,zy,incy)
+c
+c     constant times a vector plus a vector.
+c     jack dongarra, 3/11/78.
+c     modified 12/3/93, array(1) declarations changed to array(*)
+c
+      double complex zx(*),zy(*),za
+      integer i,incx,incy,ix,iy,n
+      double precision dcabs1
+      if(n.le.0)return
+      if (dcabs1(za) .eq. 0.0d0) return
+      if (incx.eq.1.and.incy.eq.1)go to 20
+c
+c        code for unequal increments or equal increments
+c          not equal to 1
+c
+      ix = 1
+      iy = 1
+      if(incx.lt.0)ix = (-n+1)*incx + 1
+      if(incy.lt.0)iy = (-n+1)*incy + 1
+      do 10 i = 1,n
+        zy(iy) = zy(iy) + za*zx(ix)
+        ix = ix + incx
+        iy = iy + incy
+   10 continue
+      return
+c
+c        code for both increments equal to 1
+c
+   20 do 30 i = 1,n
+        zy(i) = zy(i) + za*zx(i)
+   30 continue
+      return
+      end
diff --git a/costa/native/external/blas/zcopy.f b/costa/native/external/blas/zcopy.f
new file mode 100644
index 000000000..9ccfa880f
--- /dev/null
+++ b/costa/native/external/blas/zcopy.f
@@ -0,0 +1,33 @@
+      subroutine  zcopy(n,zx,incx,zy,incy)
+c
+c     copies a vector, x, to a vector, y.
+c     jack dongarra, linpack, 4/11/78.
+c     modified 12/3/93, array(1) declarations changed to array(*)
+c
+      double complex zx(*),zy(*)
+      integer i,incx,incy,ix,iy,n
+c
+      if(n.le.0)return
+      if(incx.eq.1.and.incy.eq.1)go to 20
+c
+c        code for unequal increments or equal increments
+c          not equal to 1
+c
+      ix = 1
+      iy = 1
+      if(incx.lt.0)ix = (-n+1)*incx + 1
+      if(incy.lt.0)iy = (-n+1)*incy + 1
+      do 10 i = 1,n
+        zy(iy) = zx(ix)
+        ix = ix + incx
+        iy = iy + incy
+   10 continue
+      return
+c
+c        code for both increments equal to 1
+c
+   20 do 30 i = 1,n
+        zy(i) = zx(i)
+   30 continue
+      return
+      end
diff --git a/costa/native/external/blas/zdotc.f b/costa/native/external/blas/zdotc.f
new file mode 100644
index 000000000..d6ac68537
--- /dev/null
+++ b/costa/native/external/blas/zdotc.f
@@ -0,0 +1,36 @@
+      double complex function zdotc(n,zx,incx,zy,incy)
+c
+c     forms the dot product of a vector.
+c     jack dongarra, 3/11/78.
+c     modified 12/3/93, array(1) declarations changed to array(*)
+c
+      double complex zx(*),zy(*),ztemp
+      integer i,incx,incy,ix,iy,n
+      ztemp = (0.0d0,0.0d0)
+      zdotc = (0.0d0,0.0d0)
+      if(n.le.0)return
+      if(incx.eq.1.and.incy.eq.1)go to 20
+c
+c        code for unequal increments or equal increments
+c          not equal to 1
+c
+      ix = 1
+      iy = 1
+      if(incx.lt.0)ix = (-n+1)*incx + 1
+      if(incy.lt.0)iy = (-n+1)*incy + 1
+      do 10 i = 1,n
+        ztemp = ztemp + dconjg(zx(ix))*zy(iy)
+        ix = ix + incx
+        iy = iy + incy
+   10 continue
+      zdotc = ztemp
+      return
+c
+c        code for both increments equal to 1
+c
+   20 do 30 i = 1,n
+        ztemp = ztemp + dconjg(zx(i))*zy(i)
+   30 continue
+      zdotc = ztemp
+      return
+      end
diff --git a/costa/native/external/blas/zdotu.f b/costa/native/external/blas/zdotu.f
new file mode 100644
index 000000000..329e98855
--- /dev/null
+++ b/costa/native/external/blas/zdotu.f
@@ -0,0 +1,36 @@
+      double complex function zdotu(n,zx,incx,zy,incy)
+c
+c     forms the dot product of two vectors.
+c     jack dongarra, 3/11/78.
+c     modified 12/3/93, array(1) declarations changed to array(*)
+c
+      double complex zx(*),zy(*),ztemp
+      integer i,incx,incy,ix,iy,n
+      ztemp = (0.0d0,0.0d0)
+      zdotu = (0.0d0,0.0d0)
+      if(n.le.0)return
+      if(incx.eq.1.and.incy.eq.1)go to 20
+c
+c        code for unequal increments or equal increments
+c          not equal to 1
+c
+      ix = 1
+      iy = 1
+      if(incx.lt.0)ix = (-n+1)*incx + 1
+      if(incy.lt.0)iy = (-n+1)*incy + 1
+      do 10 i = 1,n
+        ztemp = ztemp + zx(ix)*zy(iy)
+        ix = ix + incx
+        iy = iy + incy
+   10 continue
+      zdotu = ztemp
+      return
+c
+c        code for both increments equal to 1
+c
+   20 do 30 i = 1,n
+        ztemp = ztemp + zx(i)*zy(i)
+   30 continue
+      zdotu = ztemp
+      return
+      end
diff --git a/costa/native/external/blas/zdscal.f b/costa/native/external/blas/zdscal.f
new file mode 100644
index 000000000..8123424de
--- /dev/null
+++ b/costa/native/external/blas/zdscal.f
@@ -0,0 +1,30 @@
+      subroutine  zdscal(n,da,zx,incx)
+c
+c     scales a vector by a constant.
+c     jack dongarra, 3/11/78.
+c     modified 3/93 to return if incx .le. 0.
+c     modified 12/3/93, array(1) declarations changed to array(*)
+c
+      double complex zx(*)
+      double precision da
+      integer i,incx,ix,n
+c
+      if( n.le.0 .or. incx.le.0 )return
+      if(incx.eq.1)go to 20
+c
+c        code for increment not equal to 1
+c
+      ix = 1
+      do 10 i = 1,n
+        zx(ix) = dcmplx(da,0.0d0)*zx(ix)
+        ix = ix + incx
+   10 continue
+      return
+c
+c        code for increment equal to 1
+c
+   20 do 30 i = 1,n
+        zx(i) = dcmplx(da,0.0d0)*zx(i)
+   30 continue
+      return
+      end
diff --git a/costa/native/external/blas/zgbmv.f b/costa/native/external/blas/zgbmv.f
new file mode 100644
index 000000000..91ce9a60b
--- /dev/null
+++ b/costa/native/external/blas/zgbmv.f
@@ -0,0 +1,322 @@
+      SUBROUTINE ZGBMV ( TRANS, M, N, KL, KU, ALPHA, A, LDA, X, INCX,
+     $                   BETA, Y, INCY )
+*     .. Scalar Arguments ..
+      COMPLEX*16         ALPHA, BETA
+      INTEGER            INCX, INCY, KL, KU, LDA, M, N
+      CHARACTER*1        TRANS
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), X( * ), Y( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZGBMV  performs one of the matrix-vector operations
+*
+*     y := alpha*A*x + beta*y,   or   y := alpha*A'*x + beta*y,   or
+*
+*     y := alpha*conjg( A' )*x + beta*y,
+*
+*  where alpha and beta are scalars, x and y are vectors and A is an
+*  m by n band matrix, with kl sub-diagonals and ku super-diagonals.
+*
+*  Parameters
+*  ==========
+*
+*  TRANS  - CHARACTER*1.
+*           On entry, TRANS specifies the operation to be performed as
+*           follows:
+*
+*              TRANS = 'N' or 'n'   y := alpha*A*x + beta*y.
+*
+*              TRANS = 'T' or 't'   y := alpha*A'*x + beta*y.
+*
+*              TRANS = 'C' or 'c'   y := alpha*conjg( A' )*x + beta*y.
+*
+*           Unchanged on exit.
+*
+*  M      - INTEGER.
+*           On entry, M specifies the number of rows of the matrix A.
+*           M must be at least zero.
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the number of columns of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  KL     - INTEGER.
+*           On entry, KL specifies the number of sub-diagonals of the
+*           matrix A. KL must satisfy  0 .le. KL.
+*           Unchanged on exit.
+*
+*  KU     - INTEGER.
+*           On entry, KU specifies the number of super-diagonals of the
+*           matrix A. KU must satisfy  0 .le. KU.
+*           Unchanged on exit.
+*
+*  ALPHA  - COMPLEX*16      .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  A      - COMPLEX*16       array of DIMENSION ( LDA, n ).
+*           Before entry, the leading ( kl + ku + 1 ) by n part of the
+*           array A must contain the matrix of coefficients, supplied
+*           column by column, with the leading diagonal of the matrix in
+*           row ( ku + 1 ) of the array, the first super-diagonal
+*           starting at position 2 in row ku, the first sub-diagonal
+*           starting at position 1 in row ( ku + 2 ), and so on.
+*           Elements in the array A that do not correspond to elements
+*           in the band matrix (such as the top left ku by ku triangle)
+*           are not referenced.
+*           The following program segment will transfer a band matrix
+*           from conventional full matrix storage to band storage:
+*
+*                 DO 20, J = 1, N
+*                    K = KU + 1 - J
+*                    DO 10, I = MAX( 1, J - KU ), MIN( M, J + KL )
+*                       A( K + I, J ) = matrix( I, J )
+*              10    CONTINUE
+*              20 CONTINUE
+*
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           ( kl + ku + 1 ).
+*           Unchanged on exit.
+*
+*  X      - COMPLEX*16       array of DIMENSION at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'
+*           and at least
+*           ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.
+*           Before entry, the incremented array X must contain the
+*           vector x.
+*           Unchanged on exit.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*  BETA   - COMPLEX*16      .
+*           On entry, BETA specifies the scalar beta. When BETA is
+*           supplied as zero then Y need not be set on input.
+*           Unchanged on exit.
+*
+*  Y      - COMPLEX*16       array of DIMENSION at least
+*           ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'
+*           and at least
+*           ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.
+*           Before entry, the incremented array Y must contain the
+*           vector y. On exit, Y is overwritten by the updated vector y.
+*
+*
+*  INCY   - INTEGER.
+*           On entry, INCY specifies the increment for the elements of
+*           Y. INCY must not be zero.
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      COMPLEX*16         ONE
+      PARAMETER        ( ONE  = ( 1.0D+0, 0.0D+0 ) )
+      COMPLEX*16         ZERO
+      PARAMETER        ( ZERO = ( 0.0D+0, 0.0D+0 ) )
+*     .. Local Scalars ..
+      COMPLEX*16         TEMP
+      INTEGER            I, INFO, IX, IY, J, JX, JY, K, KUP1, KX, KY,
+     $                   LENX, LENY
+      LOGICAL            NOCONJ
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          DCONJG, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( TRANS, 'N' ).AND.
+     $         .NOT.LSAME( TRANS, 'T' ).AND.
+     $         .NOT.LSAME( TRANS, 'C' )      )THEN
+         INFO = 1
+      ELSE IF( M.LT.0 )THEN
+         INFO = 2
+      ELSE IF( N.LT.0 )THEN
+         INFO = 3
+      ELSE IF( KL.LT.0 )THEN
+         INFO = 4
+      ELSE IF( KU.LT.0 )THEN
+         INFO = 5
+      ELSE IF( LDA.LT.( KL + KU + 1 ) )THEN
+         INFO = 8
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 10
+      ELSE IF( INCY.EQ.0 )THEN
+         INFO = 13
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'ZGBMV ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR.
+     $    ( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) )
+     $   RETURN
+*
+      NOCONJ = LSAME( TRANS, 'T' )
+*
+*     Set  LENX  and  LENY, the lengths of the vectors x and y, and set
+*     up the start points in  X  and  Y.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+         LENX = N
+         LENY = M
+      ELSE
+         LENX = M
+         LENY = N
+      END IF
+      IF( INCX.GT.0 )THEN
+         KX = 1
+      ELSE
+         KX = 1 - ( LENX - 1 )*INCX
+      END IF
+      IF( INCY.GT.0 )THEN
+         KY = 1
+      ELSE
+         KY = 1 - ( LENY - 1 )*INCY
+      END IF
+*
+*     Start the operations. In this version the elements of A are
+*     accessed sequentially with one pass through the band part of A.
+*
+*     First form  y := beta*y.
+*
+      IF( BETA.NE.ONE )THEN
+         IF( INCY.EQ.1 )THEN
+            IF( BETA.EQ.ZERO )THEN
+               DO 10, I = 1, LENY
+                  Y( I ) = ZERO
+   10          CONTINUE
+            ELSE
+               DO 20, I = 1, LENY
+                  Y( I ) = BETA*Y( I )
+   20          CONTINUE
+            END IF
+         ELSE
+            IY = KY
+            IF( BETA.EQ.ZERO )THEN
+               DO 30, I = 1, LENY
+                  Y( IY ) = ZERO
+                  IY      = IY   + INCY
+   30          CONTINUE
+            ELSE
+               DO 40, I = 1, LENY
+                  Y( IY ) = BETA*Y( IY )
+                  IY      = IY           + INCY
+   40          CONTINUE
+            END IF
+         END IF
+      END IF
+      IF( ALPHA.EQ.ZERO )
+     $   RETURN
+      KUP1 = KU + 1
+      IF( LSAME( TRANS, 'N' ) )THEN
+*
+*        Form  y := alpha*A*x + y.
+*
+         JX = KX
+         IF( INCY.EQ.1 )THEN
+            DO 60, J = 1, N
+               IF( X( JX ).NE.ZERO )THEN
+                  TEMP = ALPHA*X( JX )
+                  K    = KUP1 - J
+                  DO 50, I = MAX( 1, J - KU ), MIN( M, J + KL )
+                     Y( I ) = Y( I ) + TEMP*A( K + I, J )
+   50             CONTINUE
+               END IF
+               JX = JX + INCX
+   60       CONTINUE
+         ELSE
+            DO 80, J = 1, N
+               IF( X( JX ).NE.ZERO )THEN
+                  TEMP = ALPHA*X( JX )
+                  IY   = KY
+                  K    = KUP1 - J
+                  DO 70, I = MAX( 1, J - KU ), MIN( M, J + KL )
+                     Y( IY ) = Y( IY ) + TEMP*A( K + I, J )
+                     IY      = IY      + INCY
+   70             CONTINUE
+               END IF
+               JX = JX + INCX
+               IF( J.GT.KU )
+     $            KY = KY + INCY
+   80       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  y := alpha*A'*x + y  or  y := alpha*conjg( A' )*x + y.
+*
+         JY = KY
+         IF( INCX.EQ.1 )THEN
+            DO 110, J = 1, N
+               TEMP = ZERO
+               K    = KUP1 - J
+               IF( NOCONJ )THEN
+                  DO 90, I = MAX( 1, J - KU ), MIN( M, J + KL )
+                     TEMP = TEMP + A( K + I, J )*X( I )
+   90             CONTINUE
+               ELSE
+                  DO 100, I = MAX( 1, J - KU ), MIN( M, J + KL )
+                     TEMP = TEMP + DCONJG( A( K + I, J ) )*X( I )
+  100             CONTINUE
+               END IF
+               Y( JY ) = Y( JY ) + ALPHA*TEMP
+               JY      = JY      + INCY
+  110       CONTINUE
+         ELSE
+            DO 140, J = 1, N
+               TEMP = ZERO
+               IX   = KX
+               K    = KUP1 - J
+               IF( NOCONJ )THEN
+                  DO 120, I = MAX( 1, J - KU ), MIN( M, J + KL )
+                     TEMP = TEMP + A( K + I, J )*X( IX )
+                     IX   = IX   + INCX
+  120             CONTINUE
+               ELSE
+                  DO 130, I = MAX( 1, J - KU ), MIN( M, J + KL )
+                     TEMP = TEMP + DCONJG( A( K + I, J ) )*X( IX )
+                     IX   = IX   + INCX
+  130             CONTINUE
+               END IF
+               Y( JY ) = Y( JY ) + ALPHA*TEMP
+               JY      = JY      + INCY
+               IF( J.GT.KU )
+     $            KX = KX + INCX
+  140       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of ZGBMV .
+*
+      END
diff --git a/costa/native/external/blas/zgemm.f b/costa/native/external/blas/zgemm.f
new file mode 100644
index 000000000..09cd151ee
--- /dev/null
+++ b/costa/native/external/blas/zgemm.f
@@ -0,0 +1,415 @@
+      SUBROUTINE ZGEMM ( TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB,
+     $                   BETA, C, LDC )
+*     .. Scalar Arguments ..
+      CHARACTER*1        TRANSA, TRANSB
+      INTEGER            M, N, K, LDA, LDB, LDC
+      COMPLEX*16         ALPHA, BETA
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), B( LDB, * ), C( LDC, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZGEMM  performs one of the matrix-matrix operations
+*
+*     C := alpha*op( A )*op( B ) + beta*C,
+*
+*  where  op( X ) is one of
+*
+*     op( X ) = X   or   op( X ) = X'   or   op( X ) = conjg( X' ),
+*
+*  alpha and beta are scalars, and A, B and C are matrices, with op( A )
+*  an m by k matrix,  op( B )  a  k by n matrix and  C an m by n matrix.
+*
+*  Parameters
+*  ==========
+*
+*  TRANSA - CHARACTER*1.
+*           On entry, TRANSA specifies the form of op( A ) to be used in
+*           the matrix multiplication as follows:
+*
+*              TRANSA = 'N' or 'n',  op( A ) = A.
+*
+*              TRANSA = 'T' or 't',  op( A ) = A'.
+*
+*              TRANSA = 'C' or 'c',  op( A ) = conjg( A' ).
+*
+*           Unchanged on exit.
+*
+*  TRANSB - CHARACTER*1.
+*           On entry, TRANSB specifies the form of op( B ) to be used in
+*           the matrix multiplication as follows:
+*
+*              TRANSB = 'N' or 'n',  op( B ) = B.
+*
+*              TRANSB = 'T' or 't',  op( B ) = B'.
+*
+*              TRANSB = 'C' or 'c',  op( B ) = conjg( B' ).
+*
+*           Unchanged on exit.
+*
+*  M      - INTEGER.
+*           On entry,  M  specifies  the number  of rows  of the  matrix
+*           op( A )  and of the  matrix  C.  M  must  be at least  zero.
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry,  N  specifies the number  of columns of the matrix
+*           op( B ) and the number of columns of the matrix C. N must be
+*           at least zero.
+*           Unchanged on exit.
+*
+*  K      - INTEGER.
+*           On entry,  K  specifies  the number of columns of the matrix
+*           op( A ) and the number of rows of the matrix op( B ). K must
+*           be at least  zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - COMPLEX*16      .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  A      - COMPLEX*16       array of DIMENSION ( LDA, ka ), where ka is
+*           k  when  TRANSA = 'N' or 'n',  and is  m  otherwise.
+*           Before entry with  TRANSA = 'N' or 'n',  the leading  m by k
+*           part of the array  A  must contain the matrix  A,  otherwise
+*           the leading  k by m  part of the array  A  must contain  the
+*           matrix A.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. When  TRANSA = 'N' or 'n' then
+*           LDA must be at least  max( 1, m ), otherwise  LDA must be at
+*           least  max( 1, k ).
+*           Unchanged on exit.
+*
+*  B      - COMPLEX*16       array of DIMENSION ( LDB, kb ), where kb is
+*           n  when  TRANSB = 'N' or 'n',  and is  k  otherwise.
+*           Before entry with  TRANSB = 'N' or 'n',  the leading  k by n
+*           part of the array  B  must contain the matrix  B,  otherwise
+*           the leading  n by k  part of the array  B  must contain  the
+*           matrix B.
+*           Unchanged on exit.
+*
+*  LDB    - INTEGER.
+*           On entry, LDB specifies the first dimension of B as declared
+*           in the calling (sub) program. When  TRANSB = 'N' or 'n' then
+*           LDB must be at least  max( 1, k ), otherwise  LDB must be at
+*           least  max( 1, n ).
+*           Unchanged on exit.
+*
+*  BETA   - COMPLEX*16      .
+*           On entry,  BETA  specifies the scalar  beta.  When  BETA  is
+*           supplied as zero then C need not be set on input.
+*           Unchanged on exit.
+*
+*  C      - COMPLEX*16       array of DIMENSION ( LDC, n ).
+*           Before entry, the leading  m by n  part of the array  C must
+*           contain the matrix  C,  except when  beta  is zero, in which
+*           case C need not be set on entry.
+*           On exit, the array  C  is overwritten by the  m by n  matrix
+*           ( alpha*op( A )*op( B ) + beta*C ).
+*
+*  LDC    - INTEGER.
+*           On entry, LDC specifies the first dimension of C as declared
+*           in  the  calling  (sub)  program.   LDC  must  be  at  least
+*           max( 1, m ).
+*           Unchanged on exit.
+*
+*
+*  Level 3 Blas routine.
+*
+*  -- Written on 8-February-1989.
+*     Jack Dongarra, Argonne National Laboratory.
+*     Iain Duff, AERE Harwell.
+*     Jeremy Du Croz, Numerical Algorithms Group Ltd.
+*     Sven Hammarling, Numerical Algorithms Group Ltd.
+*
+*
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          DCONJG, MAX
+*     .. Local Scalars ..
+      LOGICAL            CONJA, CONJB, NOTA, NOTB
+      INTEGER            I, INFO, J, L, NCOLA, NROWA, NROWB
+      COMPLEX*16         TEMP
+*     .. Parameters ..
+      COMPLEX*16         ONE
+      PARAMETER        ( ONE  = ( 1.0D+0, 0.0D+0 ) )
+      COMPLEX*16         ZERO
+      PARAMETER        ( ZERO = ( 0.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Set  NOTA  and  NOTB  as  true if  A  and  B  respectively are not
+*     conjugated or transposed, set  CONJA and CONJB  as true if  A  and
+*     B  respectively are to be  transposed but  not conjugated  and set
+*     NROWA, NCOLA and  NROWB  as the number of rows and  columns  of  A
+*     and the number of rows of  B  respectively.
+*
+      NOTA  = LSAME( TRANSA, 'N' )
+      NOTB  = LSAME( TRANSB, 'N' )
+      CONJA = LSAME( TRANSA, 'C' )
+      CONJB = LSAME( TRANSB, 'C' )
+      IF( NOTA )THEN
+         NROWA = M
+         NCOLA = K
+      ELSE
+         NROWA = K
+         NCOLA = M
+      END IF
+      IF( NOTB )THEN
+         NROWB = K
+      ELSE
+         NROWB = N
+      END IF
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF(      ( .NOT.NOTA                 ).AND.
+     $         ( .NOT.CONJA                ).AND.
+     $         ( .NOT.LSAME( TRANSA, 'T' ) )      )THEN
+         INFO = 1
+      ELSE IF( ( .NOT.NOTB                 ).AND.
+     $         ( .NOT.CONJB                ).AND.
+     $         ( .NOT.LSAME( TRANSB, 'T' ) )      )THEN
+         INFO = 2
+      ELSE IF( M  .LT.0               )THEN
+         INFO = 3
+      ELSE IF( N  .LT.0               )THEN
+         INFO = 4
+      ELSE IF( K  .LT.0               )THEN
+         INFO = 5
+      ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN
+         INFO = 8
+      ELSE IF( LDB.LT.MAX( 1, NROWB ) )THEN
+         INFO = 10
+      ELSE IF( LDC.LT.MAX( 1, M     ) )THEN
+         INFO = 13
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'ZGEMM ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR.
+     $    ( ( ( ALPHA.EQ.ZERO ).OR.( K.EQ.0 ) ).AND.( BETA.EQ.ONE ) ) )
+     $   RETURN
+*
+*     And when  alpha.eq.zero.
+*
+      IF( ALPHA.EQ.ZERO )THEN
+         IF( BETA.EQ.ZERO )THEN
+            DO 20, J = 1, N
+               DO 10, I = 1, M
+                  C( I, J ) = ZERO
+   10          CONTINUE
+   20       CONTINUE
+         ELSE
+            DO 40, J = 1, N
+               DO 30, I = 1, M
+                  C( I, J ) = BETA*C( I, J )
+   30          CONTINUE
+   40       CONTINUE
+         END IF
+         RETURN
+      END IF
+*
+*     Start the operations.
+*
+      IF( NOTB )THEN
+         IF( NOTA )THEN
+*
+*           Form  C := alpha*A*B + beta*C.
+*
+            DO 90, J = 1, N
+               IF( BETA.EQ.ZERO )THEN
+                  DO 50, I = 1, M
+                     C( I, J ) = ZERO
+   50             CONTINUE
+               ELSE IF( BETA.NE.ONE )THEN
+                  DO 60, I = 1, M
+                     C( I, J ) = BETA*C( I, J )
+   60             CONTINUE
+               END IF
+               DO 80, L = 1, K
+                  IF( B( L, J ).NE.ZERO )THEN
+                     TEMP = ALPHA*B( L, J )
+                     DO 70, I = 1, M
+                        C( I, J ) = C( I, J ) + TEMP*A( I, L )
+   70                CONTINUE
+                  END IF
+   80          CONTINUE
+   90       CONTINUE
+         ELSE IF( CONJA )THEN
+*
+*           Form  C := alpha*conjg( A' )*B + beta*C.
+*
+            DO 120, J = 1, N
+               DO 110, I = 1, M
+                  TEMP = ZERO
+                  DO 100, L = 1, K
+                     TEMP = TEMP + DCONJG( A( L, I ) )*B( L, J )
+  100             CONTINUE
+                  IF( BETA.EQ.ZERO )THEN
+                     C( I, J ) = ALPHA*TEMP
+                  ELSE
+                     C( I, J ) = ALPHA*TEMP + BETA*C( I, J )
+                  END IF
+  110          CONTINUE
+  120       CONTINUE
+         ELSE
+*
+*           Form  C := alpha*A'*B + beta*C
+*
+            DO 150, J = 1, N
+               DO 140, I = 1, M
+                  TEMP = ZERO
+                  DO 130, L = 1, K
+                     TEMP = TEMP + A( L, I )*B( L, J )
+  130             CONTINUE
+                  IF( BETA.EQ.ZERO )THEN
+                     C( I, J ) = ALPHA*TEMP
+                  ELSE
+                     C( I, J ) = ALPHA*TEMP + BETA*C( I, J )
+                  END IF
+  140          CONTINUE
+  150       CONTINUE
+         END IF
+      ELSE IF( NOTA )THEN
+         IF( CONJB )THEN
+*
+*           Form  C := alpha*A*conjg( B' ) + beta*C.
+*
+            DO 200, J = 1, N
+               IF( BETA.EQ.ZERO )THEN
+                  DO 160, I = 1, M
+                     C( I, J ) = ZERO
+  160             CONTINUE
+               ELSE IF( BETA.NE.ONE )THEN
+                  DO 170, I = 1, M
+                     C( I, J ) = BETA*C( I, J )
+  170             CONTINUE
+               END IF
+               DO 190, L = 1, K
+                  IF( B( J, L ).NE.ZERO )THEN
+                     TEMP = ALPHA*DCONJG( B( J, L ) )
+                     DO 180, I = 1, M
+                        C( I, J ) = C( I, J ) + TEMP*A( I, L )
+  180                CONTINUE
+                  END IF
+  190          CONTINUE
+  200       CONTINUE
+         ELSE
+*
+*           Form  C := alpha*A*B'          + beta*C
+*
+            DO 250, J = 1, N
+               IF( BETA.EQ.ZERO )THEN
+                  DO 210, I = 1, M
+                     C( I, J ) = ZERO
+  210             CONTINUE
+               ELSE IF( BETA.NE.ONE )THEN
+                  DO 220, I = 1, M
+                     C( I, J ) = BETA*C( I, J )
+  220             CONTINUE
+               END IF
+               DO 240, L = 1, K
+                  IF( B( J, L ).NE.ZERO )THEN
+                     TEMP = ALPHA*B( J, L )
+                     DO 230, I = 1, M
+                        C( I, J ) = C( I, J ) + TEMP*A( I, L )
+  230                CONTINUE
+                  END IF
+  240          CONTINUE
+  250       CONTINUE
+         END IF
+      ELSE IF( CONJA )THEN
+         IF( CONJB )THEN
+*
+*           Form  C := alpha*conjg( A' )*conjg( B' ) + beta*C.
+*
+            DO 280, J = 1, N
+               DO 270, I = 1, M
+                  TEMP = ZERO
+                  DO 260, L = 1, K
+                     TEMP = TEMP +
+     $                      DCONJG( A( L, I ) )*DCONJG( B( J, L ) )
+  260             CONTINUE
+                  IF( BETA.EQ.ZERO )THEN
+                     C( I, J ) = ALPHA*TEMP
+                  ELSE
+                     C( I, J ) = ALPHA*TEMP + BETA*C( I, J )
+                  END IF
+  270          CONTINUE
+  280       CONTINUE
+         ELSE
+*
+*           Form  C := alpha*conjg( A' )*B' + beta*C
+*
+            DO 310, J = 1, N
+               DO 300, I = 1, M
+                  TEMP = ZERO
+                  DO 290, L = 1, K
+                     TEMP = TEMP + DCONJG( A( L, I ) )*B( J, L )
+  290             CONTINUE
+                  IF( BETA.EQ.ZERO )THEN
+                     C( I, J ) = ALPHA*TEMP
+                  ELSE
+                     C( I, J ) = ALPHA*TEMP + BETA*C( I, J )
+                  END IF
+  300          CONTINUE
+  310       CONTINUE
+         END IF
+      ELSE
+         IF( CONJB )THEN
+*
+*           Form  C := alpha*A'*conjg( B' ) + beta*C
+*
+            DO 340, J = 1, N
+               DO 330, I = 1, M
+                  TEMP = ZERO
+                  DO 320, L = 1, K
+                     TEMP = TEMP + A( L, I )*DCONJG( B( J, L ) )
+  320             CONTINUE
+                  IF( BETA.EQ.ZERO )THEN
+                     C( I, J ) = ALPHA*TEMP
+                  ELSE
+                     C( I, J ) = ALPHA*TEMP + BETA*C( I, J )
+                  END IF
+  330          CONTINUE
+  340       CONTINUE
+         ELSE
+*
+*           Form  C := alpha*A'*B' + beta*C
+*
+            DO 370, J = 1, N
+               DO 360, I = 1, M
+                  TEMP = ZERO
+                  DO 350, L = 1, K
+                     TEMP = TEMP + A( L, I )*B( J, L )
+  350             CONTINUE
+                  IF( BETA.EQ.ZERO )THEN
+                     C( I, J ) = ALPHA*TEMP
+                  ELSE
+                     C( I, J ) = ALPHA*TEMP + BETA*C( I, J )
+                  END IF
+  360          CONTINUE
+  370       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of ZGEMM .
+*
+      END
diff --git a/costa/native/external/blas/zgemv.f b/costa/native/external/blas/zgemv.f
new file mode 100644
index 000000000..014a5e02b
--- /dev/null
+++ b/costa/native/external/blas/zgemv.f
@@ -0,0 +1,281 @@
+      SUBROUTINE ZGEMV ( TRANS, M, N, ALPHA, A, LDA, X, INCX,
+     $                   BETA, Y, INCY )
+*     .. Scalar Arguments ..
+      COMPLEX*16         ALPHA, BETA
+      INTEGER            INCX, INCY, LDA, M, N
+      CHARACTER*1        TRANS
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), X( * ), Y( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZGEMV  performs one of the matrix-vector operations
+*
+*     y := alpha*A*x + beta*y,   or   y := alpha*A'*x + beta*y,   or
+*
+*     y := alpha*conjg( A' )*x + beta*y,
+*
+*  where alpha and beta are scalars, x and y are vectors and A is an
+*  m by n matrix.
+*
+*  Parameters
+*  ==========
+*
+*  TRANS  - CHARACTER*1.
+*           On entry, TRANS specifies the operation to be performed as
+*           follows:
+*
+*              TRANS = 'N' or 'n'   y := alpha*A*x + beta*y.
+*
+*              TRANS = 'T' or 't'   y := alpha*A'*x + beta*y.
+*
+*              TRANS = 'C' or 'c'   y := alpha*conjg( A' )*x + beta*y.
+*
+*           Unchanged on exit.
+*
+*  M      - INTEGER.
+*           On entry, M specifies the number of rows of the matrix A.
+*           M must be at least zero.
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the number of columns of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - COMPLEX*16      .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  A      - COMPLEX*16       array of DIMENSION ( LDA, n ).
+*           Before entry, the leading m by n part of the array A must
+*           contain the matrix of coefficients.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           max( 1, m ).
+*           Unchanged on exit.
+*
+*  X      - COMPLEX*16       array of DIMENSION at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'
+*           and at least
+*           ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.
+*           Before entry, the incremented array X must contain the
+*           vector x.
+*           Unchanged on exit.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*  BETA   - COMPLEX*16      .
+*           On entry, BETA specifies the scalar beta. When BETA is
+*           supplied as zero then Y need not be set on input.
+*           Unchanged on exit.
+*
+*  Y      - COMPLEX*16       array of DIMENSION at least
+*           ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'
+*           and at least
+*           ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.
+*           Before entry with BETA non-zero, the incremented array Y
+*           must contain the vector y. On exit, Y is overwritten by the
+*           updated vector y.
+*
+*  INCY   - INTEGER.
+*           On entry, INCY specifies the increment for the elements of
+*           Y. INCY must not be zero.
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      COMPLEX*16         ONE
+      PARAMETER        ( ONE  = ( 1.0D+0, 0.0D+0 ) )
+      COMPLEX*16         ZERO
+      PARAMETER        ( ZERO = ( 0.0D+0, 0.0D+0 ) )
+*     .. Local Scalars ..
+      COMPLEX*16         TEMP
+      INTEGER            I, INFO, IX, IY, J, JX, JY, KX, KY, LENX, LENY
+      LOGICAL            NOCONJ
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          DCONJG, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( TRANS, 'N' ).AND.
+     $         .NOT.LSAME( TRANS, 'T' ).AND.
+     $         .NOT.LSAME( TRANS, 'C' )      )THEN
+         INFO = 1
+      ELSE IF( M.LT.0 )THEN
+         INFO = 2
+      ELSE IF( N.LT.0 )THEN
+         INFO = 3
+      ELSE IF( LDA.LT.MAX( 1, M ) )THEN
+         INFO = 6
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 8
+      ELSE IF( INCY.EQ.0 )THEN
+         INFO = 11
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'ZGEMV ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR.
+     $    ( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) )
+     $   RETURN
+*
+      NOCONJ = LSAME( TRANS, 'T' )
+*
+*     Set  LENX  and  LENY, the lengths of the vectors x and y, and set
+*     up the start points in  X  and  Y.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+         LENX = N
+         LENY = M
+      ELSE
+         LENX = M
+         LENY = N
+      END IF
+      IF( INCX.GT.0 )THEN
+         KX = 1
+      ELSE
+         KX = 1 - ( LENX - 1 )*INCX
+      END IF
+      IF( INCY.GT.0 )THEN
+         KY = 1
+      ELSE
+         KY = 1 - ( LENY - 1 )*INCY
+      END IF
+*
+*     Start the operations. In this version the elements of A are
+*     accessed sequentially with one pass through A.
+*
+*     First form  y := beta*y.
+*
+      IF( BETA.NE.ONE )THEN
+         IF( INCY.EQ.1 )THEN
+            IF( BETA.EQ.ZERO )THEN
+               DO 10, I = 1, LENY
+                  Y( I ) = ZERO
+   10          CONTINUE
+            ELSE
+               DO 20, I = 1, LENY
+                  Y( I ) = BETA*Y( I )
+   20          CONTINUE
+            END IF
+         ELSE
+            IY = KY
+            IF( BETA.EQ.ZERO )THEN
+               DO 30, I = 1, LENY
+                  Y( IY ) = ZERO
+                  IY      = IY   + INCY
+   30          CONTINUE
+            ELSE
+               DO 40, I = 1, LENY
+                  Y( IY ) = BETA*Y( IY )
+                  IY      = IY           + INCY
+   40          CONTINUE
+            END IF
+         END IF
+      END IF
+      IF( ALPHA.EQ.ZERO )
+     $   RETURN
+      IF( LSAME( TRANS, 'N' ) )THEN
+*
+*        Form  y := alpha*A*x + y.
+*
+         JX = KX
+         IF( INCY.EQ.1 )THEN
+            DO 60, J = 1, N
+               IF( X( JX ).NE.ZERO )THEN
+                  TEMP = ALPHA*X( JX )
+                  DO 50, I = 1, M
+                     Y( I ) = Y( I ) + TEMP*A( I, J )
+   50             CONTINUE
+               END IF
+               JX = JX + INCX
+   60       CONTINUE
+         ELSE
+            DO 80, J = 1, N
+               IF( X( JX ).NE.ZERO )THEN
+                  TEMP = ALPHA*X( JX )
+                  IY   = KY
+                  DO 70, I = 1, M
+                     Y( IY ) = Y( IY ) + TEMP*A( I, J )
+                     IY      = IY      + INCY
+   70             CONTINUE
+               END IF
+               JX = JX + INCX
+   80       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  y := alpha*A'*x + y  or  y := alpha*conjg( A' )*x + y.
+*
+         JY = KY
+         IF( INCX.EQ.1 )THEN
+            DO 110, J = 1, N
+               TEMP = ZERO
+               IF( NOCONJ )THEN
+                  DO 90, I = 1, M
+                     TEMP = TEMP + A( I, J )*X( I )
+   90             CONTINUE
+               ELSE
+                  DO 100, I = 1, M
+                     TEMP = TEMP + DCONJG( A( I, J ) )*X( I )
+  100             CONTINUE
+               END IF
+               Y( JY ) = Y( JY ) + ALPHA*TEMP
+               JY      = JY      + INCY
+  110       CONTINUE
+         ELSE
+            DO 140, J = 1, N
+               TEMP = ZERO
+               IX   = KX
+               IF( NOCONJ )THEN
+                  DO 120, I = 1, M
+                     TEMP = TEMP + A( I, J )*X( IX )
+                     IX   = IX   + INCX
+  120             CONTINUE
+               ELSE
+                  DO 130, I = 1, M
+                     TEMP = TEMP + DCONJG( A( I, J ) )*X( IX )
+                     IX   = IX   + INCX
+  130             CONTINUE
+               END IF
+               Y( JY ) = Y( JY ) + ALPHA*TEMP
+               JY      = JY      + INCY
+  140       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of ZGEMV .
+*
+      END
diff --git a/costa/native/external/blas/zgerc.f b/costa/native/external/blas/zgerc.f
new file mode 100644
index 000000000..968c5b472
--- /dev/null
+++ b/costa/native/external/blas/zgerc.f
@@ -0,0 +1,157 @@
+      SUBROUTINE ZGERC ( M, N, ALPHA, X, INCX, Y, INCY, A, LDA )
+*     .. Scalar Arguments ..
+      COMPLEX*16         ALPHA
+      INTEGER            INCX, INCY, LDA, M, N
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), X( * ), Y( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZGERC  performs the rank 1 operation
+*
+*     A := alpha*x*conjg( y' ) + A,
+*
+*  where alpha is a scalar, x is an m element vector, y is an n element
+*  vector and A is an m by n matrix.
+*
+*  Parameters
+*  ==========
+*
+*  M      - INTEGER.
+*           On entry, M specifies the number of rows of the matrix A.
+*           M must be at least zero.
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the number of columns of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - COMPLEX*16      .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  X      - COMPLEX*16       array of dimension at least
+*           ( 1 + ( m - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the m
+*           element vector x.
+*           Unchanged on exit.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*  Y      - COMPLEX*16       array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCY ) ).
+*           Before entry, the incremented array Y must contain the n
+*           element vector y.
+*           Unchanged on exit.
+*
+*  INCY   - INTEGER.
+*           On entry, INCY specifies the increment for the elements of
+*           Y. INCY must not be zero.
+*           Unchanged on exit.
+*
+*  A      - COMPLEX*16       array of DIMENSION ( LDA, n ).
+*           Before entry, the leading m by n part of the array A must
+*           contain the matrix of coefficients. On exit, A is
+*           overwritten by the updated matrix.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           max( 1, m ).
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      COMPLEX*16         ZERO
+      PARAMETER        ( ZERO = ( 0.0D+0, 0.0D+0 ) )
+*     .. Local Scalars ..
+      COMPLEX*16         TEMP
+      INTEGER            I, INFO, IX, J, JY, KX
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          DCONJG, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( M.LT.0 )THEN
+         INFO = 1
+      ELSE IF( N.LT.0 )THEN
+         INFO = 2
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 5
+      ELSE IF( INCY.EQ.0 )THEN
+         INFO = 7
+      ELSE IF( LDA.LT.MAX( 1, M ) )THEN
+         INFO = 9
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'ZGERC ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR.( ALPHA.EQ.ZERO ) )
+     $   RETURN
+*
+*     Start the operations. In this version the elements of A are
+*     accessed sequentially with one pass through A.
+*
+      IF( INCY.GT.0 )THEN
+         JY = 1
+      ELSE
+         JY = 1 - ( N - 1 )*INCY
+      END IF
+      IF( INCX.EQ.1 )THEN
+         DO 20, J = 1, N
+            IF( Y( JY ).NE.ZERO )THEN
+               TEMP = ALPHA*DCONJG( Y( JY ) )
+               DO 10, I = 1, M
+                  A( I, J ) = A( I, J ) + X( I )*TEMP
+   10          CONTINUE
+            END IF
+            JY = JY + INCY
+   20    CONTINUE
+      ELSE
+         IF( INCX.GT.0 )THEN
+            KX = 1
+         ELSE
+            KX = 1 - ( M - 1 )*INCX
+         END IF
+         DO 40, J = 1, N
+            IF( Y( JY ).NE.ZERO )THEN
+               TEMP = ALPHA*DCONJG( Y( JY ) )
+               IX   = KX
+               DO 30, I = 1, M
+                  A( I, J ) = A( I, J ) + X( IX )*TEMP
+                  IX        = IX        + INCX
+   30          CONTINUE
+            END IF
+            JY = JY + INCY
+   40    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of ZGERC .
+*
+      END
diff --git a/costa/native/external/blas/zgeru.f b/costa/native/external/blas/zgeru.f
new file mode 100644
index 000000000..5283af641
--- /dev/null
+++ b/costa/native/external/blas/zgeru.f
@@ -0,0 +1,157 @@
+      SUBROUTINE ZGERU ( M, N, ALPHA, X, INCX, Y, INCY, A, LDA )
+*     .. Scalar Arguments ..
+      COMPLEX*16         ALPHA
+      INTEGER            INCX, INCY, LDA, M, N
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), X( * ), Y( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZGERU  performs the rank 1 operation
+*
+*     A := alpha*x*y' + A,
+*
+*  where alpha is a scalar, x is an m element vector, y is an n element
+*  vector and A is an m by n matrix.
+*
+*  Parameters
+*  ==========
+*
+*  M      - INTEGER.
+*           On entry, M specifies the number of rows of the matrix A.
+*           M must be at least zero.
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the number of columns of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - COMPLEX*16      .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  X      - COMPLEX*16       array of dimension at least
+*           ( 1 + ( m - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the m
+*           element vector x.
+*           Unchanged on exit.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*  Y      - COMPLEX*16       array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCY ) ).
+*           Before entry, the incremented array Y must contain the n
+*           element vector y.
+*           Unchanged on exit.
+*
+*  INCY   - INTEGER.
+*           On entry, INCY specifies the increment for the elements of
+*           Y. INCY must not be zero.
+*           Unchanged on exit.
+*
+*  A      - COMPLEX*16       array of DIMENSION ( LDA, n ).
+*           Before entry, the leading m by n part of the array A must
+*           contain the matrix of coefficients. On exit, A is
+*           overwritten by the updated matrix.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           max( 1, m ).
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      COMPLEX*16         ZERO
+      PARAMETER        ( ZERO = ( 0.0D+0, 0.0D+0 ) )
+*     .. Local Scalars ..
+      COMPLEX*16         TEMP
+      INTEGER            I, INFO, IX, J, JY, KX
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( M.LT.0 )THEN
+         INFO = 1
+      ELSE IF( N.LT.0 )THEN
+         INFO = 2
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 5
+      ELSE IF( INCY.EQ.0 )THEN
+         INFO = 7
+      ELSE IF( LDA.LT.MAX( 1, M ) )THEN
+         INFO = 9
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'ZGERU ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR.( ALPHA.EQ.ZERO ) )
+     $   RETURN
+*
+*     Start the operations. In this version the elements of A are
+*     accessed sequentially with one pass through A.
+*
+      IF( INCY.GT.0 )THEN
+         JY = 1
+      ELSE
+         JY = 1 - ( N - 1 )*INCY
+      END IF
+      IF( INCX.EQ.1 )THEN
+         DO 20, J = 1, N
+            IF( Y( JY ).NE.ZERO )THEN
+               TEMP = ALPHA*Y( JY )
+               DO 10, I = 1, M
+                  A( I, J ) = A( I, J ) + X( I )*TEMP
+   10          CONTINUE
+            END IF
+            JY = JY + INCY
+   20    CONTINUE
+      ELSE
+         IF( INCX.GT.0 )THEN
+            KX = 1
+         ELSE
+            KX = 1 - ( M - 1 )*INCX
+         END IF
+         DO 40, J = 1, N
+            IF( Y( JY ).NE.ZERO )THEN
+               TEMP = ALPHA*Y( JY )
+               IX   = KX
+               DO 30, I = 1, M
+                  A( I, J ) = A( I, J ) + X( IX )*TEMP
+                  IX        = IX        + INCX
+   30          CONTINUE
+            END IF
+            JY = JY + INCY
+   40    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of ZGERU .
+*
+      END
diff --git a/costa/native/external/blas/zhbmv.f b/costa/native/external/blas/zhbmv.f
new file mode 100644
index 000000000..1c0449362
--- /dev/null
+++ b/costa/native/external/blas/zhbmv.f
@@ -0,0 +1,309 @@
+      SUBROUTINE ZHBMV ( UPLO, N, K, ALPHA, A, LDA, X, INCX,
+     $                   BETA, Y, INCY )
+*     .. Scalar Arguments ..
+      COMPLEX*16         ALPHA, BETA
+      INTEGER            INCX, INCY, K, LDA, N
+      CHARACTER*1        UPLO
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), X( * ), Y( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZHBMV  performs the matrix-vector  operation
+*
+*     y := alpha*A*x + beta*y,
+*
+*  where alpha and beta are scalars, x and y are n element vectors and
+*  A is an n by n hermitian band matrix, with k super-diagonals.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the upper or lower
+*           triangular part of the band matrix A is being supplied as
+*           follows:
+*
+*              UPLO = 'U' or 'u'   The upper triangular part of A is
+*                                  being supplied.
+*
+*              UPLO = 'L' or 'l'   The lower triangular part of A is
+*                                  being supplied.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  K      - INTEGER.
+*           On entry, K specifies the number of super-diagonals of the
+*           matrix A. K must satisfy  0 .le. K.
+*           Unchanged on exit.
+*
+*  ALPHA  - COMPLEX*16      .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  A      - COMPLEX*16       array of DIMENSION ( LDA, n ).
+*           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 )
+*           by n part of the array A must contain the upper triangular
+*           band part of the hermitian matrix, supplied column by
+*           column, with the leading diagonal of the matrix in row
+*           ( k + 1 ) of the array, the first super-diagonal starting at
+*           position 2 in row k, and so on. The top left k by k triangle
+*           of the array A is not referenced.
+*           The following program segment will transfer the upper
+*           triangular part of a hermitian band matrix from conventional
+*           full matrix storage to band storage:
+*
+*                 DO 20, J = 1, N
+*                    M = K + 1 - J
+*                    DO 10, I = MAX( 1, J - K ), J
+*                       A( M + I, J ) = matrix( I, J )
+*              10    CONTINUE
+*              20 CONTINUE
+*
+*           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 )
+*           by n part of the array A must contain the lower triangular
+*           band part of the hermitian matrix, supplied column by
+*           column, with the leading diagonal of the matrix in row 1 of
+*           the array, the first sub-diagonal starting at position 1 in
+*           row 2, and so on. The bottom right k by k triangle of the
+*           array A is not referenced.
+*           The following program segment will transfer the lower
+*           triangular part of a hermitian band matrix from conventional
+*           full matrix storage to band storage:
+*
+*                 DO 20, J = 1, N
+*                    M = 1 - J
+*                    DO 10, I = J, MIN( N, J + K )
+*                       A( M + I, J ) = matrix( I, J )
+*              10    CONTINUE
+*              20 CONTINUE
+*
+*           Note that the imaginary parts of the diagonal elements need
+*           not be set and are assumed to be zero.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           ( k + 1 ).
+*           Unchanged on exit.
+*
+*  X      - COMPLEX*16       array of DIMENSION at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the
+*           vector x.
+*           Unchanged on exit.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*  BETA   - COMPLEX*16      .
+*           On entry, BETA specifies the scalar beta.
+*           Unchanged on exit.
+*
+*  Y      - COMPLEX*16       array of DIMENSION at least
+*           ( 1 + ( n - 1 )*abs( INCY ) ).
+*           Before entry, the incremented array Y must contain the
+*           vector y. On exit, Y is overwritten by the updated vector y.
+*
+*  INCY   - INTEGER.
+*           On entry, INCY specifies the increment for the elements of
+*           Y. INCY must not be zero.
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      COMPLEX*16         ONE
+      PARAMETER        ( ONE  = ( 1.0D+0, 0.0D+0 ) )
+      COMPLEX*16         ZERO
+      PARAMETER        ( ZERO = ( 0.0D+0, 0.0D+0 ) )
+*     .. Local Scalars ..
+      COMPLEX*16         TEMP1, TEMP2
+      INTEGER            I, INFO, IX, IY, J, JX, JY, KPLUS1, KX, KY, L
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          DCONJG, MAX, MIN, DBLE
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO, 'U' ).AND.
+     $         .NOT.LSAME( UPLO, 'L' )      )THEN
+         INFO = 1
+      ELSE IF( N.LT.0 )THEN
+         INFO = 2
+      ELSE IF( K.LT.0 )THEN
+         INFO = 3
+      ELSE IF( LDA.LT.( K + 1 ) )THEN
+         INFO = 6
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 8
+      ELSE IF( INCY.EQ.0 )THEN
+         INFO = 11
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'ZHBMV ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( N.EQ.0 ).OR.( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) )
+     $   RETURN
+*
+*     Set up the start points in  X  and  Y.
+*
+      IF( INCX.GT.0 )THEN
+         KX = 1
+      ELSE
+         KX = 1 - ( N - 1 )*INCX
+      END IF
+      IF( INCY.GT.0 )THEN
+         KY = 1
+      ELSE
+         KY = 1 - ( N - 1 )*INCY
+      END IF
+*
+*     Start the operations. In this version the elements of the array A
+*     are accessed sequentially with one pass through A.
+*
+*     First form  y := beta*y.
+*
+      IF( BETA.NE.ONE )THEN
+         IF( INCY.EQ.1 )THEN
+            IF( BETA.EQ.ZERO )THEN
+               DO 10, I = 1, N
+                  Y( I ) = ZERO
+   10          CONTINUE
+            ELSE
+               DO 20, I = 1, N
+                  Y( I ) = BETA*Y( I )
+   20          CONTINUE
+            END IF
+         ELSE
+            IY = KY
+            IF( BETA.EQ.ZERO )THEN
+               DO 30, I = 1, N
+                  Y( IY ) = ZERO
+                  IY      = IY   + INCY
+   30          CONTINUE
+            ELSE
+               DO 40, I = 1, N
+                  Y( IY ) = BETA*Y( IY )
+                  IY      = IY           + INCY
+   40          CONTINUE
+            END IF
+         END IF
+      END IF
+      IF( ALPHA.EQ.ZERO )
+     $   RETURN
+      IF( LSAME( UPLO, 'U' ) )THEN
+*
+*        Form  y  when upper triangle of A is stored.
+*
+         KPLUS1 = K + 1
+         IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN
+            DO 60, J = 1, N
+               TEMP1 = ALPHA*X( J )
+               TEMP2 = ZERO
+               L     = KPLUS1 - J
+               DO 50, I = MAX( 1, J - K ), J - 1
+                  Y( I ) = Y( I ) + TEMP1*A( L + I, J )
+                  TEMP2  = TEMP2  + DCONJG( A( L + I, J ) )*X( I )
+   50          CONTINUE
+               Y( J ) = Y( J ) + TEMP1*DBLE( A( KPLUS1, J ) )
+     $                         + ALPHA*TEMP2
+   60       CONTINUE
+         ELSE
+            JX = KX
+            JY = KY
+            DO 80, J = 1, N
+               TEMP1 = ALPHA*X( JX )
+               TEMP2 = ZERO
+               IX    = KX
+               IY    = KY
+               L     = KPLUS1 - J
+               DO 70, I = MAX( 1, J - K ), J - 1
+                  Y( IY ) = Y( IY ) + TEMP1*A( L + I, J )
+                  TEMP2   = TEMP2   + DCONJG( A( L + I, J ) )*X( IX )
+                  IX      = IX      + INCX
+                  IY      = IY      + INCY
+   70          CONTINUE
+               Y( JY ) = Y( JY ) + TEMP1*DBLE( A( KPLUS1, J ) )
+     $                           + ALPHA*TEMP2
+               JX      = JX      + INCX
+               JY      = JY      + INCY
+               IF( J.GT.K )THEN
+                  KX = KX + INCX
+                  KY = KY + INCY
+               END IF
+   80       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  y  when lower triangle of A is stored.
+*
+         IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN
+            DO 100, J = 1, N
+               TEMP1  = ALPHA*X( J )
+               TEMP2  = ZERO
+               Y( J ) = Y( J ) + TEMP1*DBLE( A( 1, J ) )
+               L      = 1      - J
+               DO 90, I = J + 1, MIN( N, J + K )
+                  Y( I ) = Y( I ) + TEMP1*A( L + I, J )
+                  TEMP2  = TEMP2  + DCONJG( A( L + I, J ) )*X( I )
+   90          CONTINUE
+               Y( J ) = Y( J ) + ALPHA*TEMP2
+  100       CONTINUE
+         ELSE
+            JX = KX
+            JY = KY
+            DO 120, J = 1, N
+               TEMP1   = ALPHA*X( JX )
+               TEMP2   = ZERO
+               Y( JY ) = Y( JY ) + TEMP1*DBLE( A( 1, J ) )
+               L       = 1       - J
+               IX      = JX
+               IY      = JY
+               DO 110, I = J + 1, MIN( N, J + K )
+                  IX      = IX      + INCX
+                  IY      = IY      + INCY
+                  Y( IY ) = Y( IY ) + TEMP1*A( L + I, J )
+                  TEMP2   = TEMP2   + DCONJG( A( L + I, J ) )*X( IX )
+  110          CONTINUE
+               Y( JY ) = Y( JY ) + ALPHA*TEMP2
+               JX      = JX      + INCX
+               JY      = JY      + INCY
+  120       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of ZHBMV .
+*
+      END
diff --git a/costa/native/external/blas/zhemm.f b/costa/native/external/blas/zhemm.f
new file mode 100644
index 000000000..d3912c084
--- /dev/null
+++ b/costa/native/external/blas/zhemm.f
@@ -0,0 +1,304 @@
+      SUBROUTINE ZHEMM ( SIDE, UPLO, M, N, ALPHA, A, LDA, B, LDB,
+     $                   BETA, C, LDC )
+*     .. Scalar Arguments ..
+      CHARACTER*1        SIDE, UPLO
+      INTEGER            M, N, LDA, LDB, LDC
+      COMPLEX*16         ALPHA, BETA
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), B( LDB, * ), C( LDC, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZHEMM  performs one of the matrix-matrix operations
+*
+*     C := alpha*A*B + beta*C,
+*
+*  or
+*
+*     C := alpha*B*A + beta*C,
+*
+*  where alpha and beta are scalars, A is an hermitian matrix and  B and
+*  C are m by n matrices.
+*
+*  Parameters
+*  ==========
+*
+*  SIDE   - CHARACTER*1.
+*           On entry,  SIDE  specifies whether  the  hermitian matrix  A
+*           appears on the  left or right  in the  operation as follows:
+*
+*              SIDE = 'L' or 'l'   C := alpha*A*B + beta*C,
+*
+*              SIDE = 'R' or 'r'   C := alpha*B*A + beta*C,
+*
+*           Unchanged on exit.
+*
+*  UPLO   - CHARACTER*1.
+*           On  entry,   UPLO  specifies  whether  the  upper  or  lower
+*           triangular  part  of  the  hermitian  matrix   A  is  to  be
+*           referenced as follows:
+*
+*              UPLO = 'U' or 'u'   Only the upper triangular part of the
+*                                  hermitian matrix is to be referenced.
+*
+*              UPLO = 'L' or 'l'   Only the lower triangular part of the
+*                                  hermitian matrix is to be referenced.
+*
+*           Unchanged on exit.
+*
+*  M      - INTEGER.
+*           On entry,  M  specifies the number of rows of the matrix  C.
+*           M  must be at least zero.
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the number of columns of the matrix C.
+*           N  must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - COMPLEX*16      .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  A      - COMPLEX*16       array of DIMENSION ( LDA, ka ), where ka is
+*           m  when  SIDE = 'L' or 'l'  and is n  otherwise.
+*           Before entry  with  SIDE = 'L' or 'l',  the  m by m  part of
+*           the array  A  must contain the  hermitian matrix,  such that
+*           when  UPLO = 'U' or 'u', the leading m by m upper triangular
+*           part of the array  A  must contain the upper triangular part
+*           of the  hermitian matrix and the  strictly  lower triangular
+*           part of  A  is not referenced,  and when  UPLO = 'L' or 'l',
+*           the leading  m by m  lower triangular part  of the  array  A
+*           must  contain  the  lower triangular part  of the  hermitian
+*           matrix and the  strictly upper triangular part of  A  is not
+*           referenced.
+*           Before entry  with  SIDE = 'R' or 'r',  the  n by n  part of
+*           the array  A  must contain the  hermitian matrix,  such that
+*           when  UPLO = 'U' or 'u', the leading n by n upper triangular
+*           part of the array  A  must contain the upper triangular part
+*           of the  hermitian matrix and the  strictly  lower triangular
+*           part of  A  is not referenced,  and when  UPLO = 'L' or 'l',
+*           the leading  n by n  lower triangular part  of the  array  A
+*           must  contain  the  lower triangular part  of the  hermitian
+*           matrix and the  strictly upper triangular part of  A  is not
+*           referenced.
+*           Note that the imaginary parts  of the diagonal elements need
+*           not be set, they are assumed to be zero.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the  calling (sub) program. When  SIDE = 'L' or 'l'  then
+*           LDA must be at least  max( 1, m ), otherwise  LDA must be at
+*           least max( 1, n ).
+*           Unchanged on exit.
+*
+*  B      - COMPLEX*16       array of DIMENSION ( LDB, n ).
+*           Before entry, the leading  m by n part of the array  B  must
+*           contain the matrix B.
+*           Unchanged on exit.
+*
+*  LDB    - INTEGER.
+*           On entry, LDB specifies the first dimension of B as declared
+*           in  the  calling  (sub)  program.   LDB  must  be  at  least
+*           max( 1, m ).
+*           Unchanged on exit.
+*
+*  BETA   - COMPLEX*16      .
+*           On entry,  BETA  specifies the scalar  beta.  When  BETA  is
+*           supplied as zero then C need not be set on input.
+*           Unchanged on exit.
+*
+*  C      - COMPLEX*16       array of DIMENSION ( LDC, n ).
+*           Before entry, the leading  m by n  part of the array  C must
+*           contain the matrix  C,  except when  beta  is zero, in which
+*           case C need not be set on entry.
+*           On exit, the array  C  is overwritten by the  m by n updated
+*           matrix.
+*
+*  LDC    - INTEGER.
+*           On entry, LDC specifies the first dimension of C as declared
+*           in  the  calling  (sub)  program.   LDC  must  be  at  least
+*           max( 1, m ).
+*           Unchanged on exit.
+*
+*
+*  Level 3 Blas routine.
+*
+*  -- Written on 8-February-1989.
+*     Jack Dongarra, Argonne National Laboratory.
+*     Iain Duff, AERE Harwell.
+*     Jeremy Du Croz, Numerical Algorithms Group Ltd.
+*     Sven Hammarling, Numerical Algorithms Group Ltd.
+*
+*
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          DCONJG, MAX, DBLE
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            I, INFO, J, K, NROWA
+      COMPLEX*16         TEMP1, TEMP2
+*     .. Parameters ..
+      COMPLEX*16         ONE
+      PARAMETER        ( ONE  = ( 1.0D+0, 0.0D+0 ) )
+      COMPLEX*16         ZERO
+      PARAMETER        ( ZERO = ( 0.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Set NROWA as the number of rows of A.
+*
+      IF( LSAME( SIDE, 'L' ) )THEN
+         NROWA = M
+      ELSE
+         NROWA = N
+      END IF
+      UPPER = LSAME( UPLO, 'U' )
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF(      ( .NOT.LSAME( SIDE, 'L' ) ).AND.
+     $         ( .NOT.LSAME( SIDE, 'R' ) )      )THEN
+         INFO = 1
+      ELSE IF( ( .NOT.UPPER              ).AND.
+     $         ( .NOT.LSAME( UPLO, 'L' ) )      )THEN
+         INFO = 2
+      ELSE IF( M  .LT.0               )THEN
+         INFO = 3
+      ELSE IF( N  .LT.0               )THEN
+         INFO = 4
+      ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN
+         INFO = 7
+      ELSE IF( LDB.LT.MAX( 1, M     ) )THEN
+         INFO = 9
+      ELSE IF( LDC.LT.MAX( 1, M     ) )THEN
+         INFO = 12
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'ZHEMM ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR.
+     $    ( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) )
+     $   RETURN
+*
+*     And when  alpha.eq.zero.
+*
+      IF( ALPHA.EQ.ZERO )THEN
+         IF( BETA.EQ.ZERO )THEN
+            DO 20, J = 1, N
+               DO 10, I = 1, M
+                  C( I, J ) = ZERO
+   10          CONTINUE
+   20       CONTINUE
+         ELSE
+            DO 40, J = 1, N
+               DO 30, I = 1, M
+                  C( I, J ) = BETA*C( I, J )
+   30          CONTINUE
+   40       CONTINUE
+         END IF
+         RETURN
+      END IF
+*
+*     Start the operations.
+*
+      IF( LSAME( SIDE, 'L' ) )THEN
+*
+*        Form  C := alpha*A*B + beta*C.
+*
+         IF( UPPER )THEN
+            DO 70, J = 1, N
+               DO 60, I = 1, M
+                  TEMP1 = ALPHA*B( I, J )
+                  TEMP2 = ZERO
+                  DO 50, K = 1, I - 1
+                     C( K, J ) = C( K, J ) + TEMP1*A( K, I )
+                     TEMP2     = TEMP2     +
+     $                           B( K, J )*DCONJG( A( K, I ) )
+   50             CONTINUE
+                  IF( BETA.EQ.ZERO )THEN
+                     C( I, J ) = TEMP1*DBLE( A( I, I ) ) +
+     $                           ALPHA*TEMP2
+                  ELSE
+                     C( I, J ) = BETA *C( I, J )         +
+     $                           TEMP1*DBLE( A( I, I ) ) +
+     $                           ALPHA*TEMP2
+                  END IF
+   60          CONTINUE
+   70       CONTINUE
+         ELSE
+            DO 100, J = 1, N
+               DO 90, I = M, 1, -1
+                  TEMP1 = ALPHA*B( I, J )
+                  TEMP2 = ZERO
+                  DO 80, K = I + 1, M
+                     C( K, J ) = C( K, J ) + TEMP1*A( K, I )
+                     TEMP2     = TEMP2     +
+     $                           B( K, J )*DCONJG( A( K, I ) )
+   80             CONTINUE
+                  IF( BETA.EQ.ZERO )THEN
+                     C( I, J ) = TEMP1*DBLE( A( I, I ) ) +
+     $                           ALPHA*TEMP2
+                  ELSE
+                     C( I, J ) = BETA *C( I, J )         +
+     $                           TEMP1*DBLE( A( I, I ) ) +
+     $                           ALPHA*TEMP2
+                  END IF
+   90          CONTINUE
+  100       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  C := alpha*B*A + beta*C.
+*
+         DO 170, J = 1, N
+            TEMP1 = ALPHA*DBLE( A( J, J ) )
+            IF( BETA.EQ.ZERO )THEN
+               DO 110, I = 1, M
+                  C( I, J ) = TEMP1*B( I, J )
+  110          CONTINUE
+            ELSE
+               DO 120, I = 1, M
+                  C( I, J ) = BETA*C( I, J ) + TEMP1*B( I, J )
+  120          CONTINUE
+            END IF
+            DO 140, K = 1, J - 1
+               IF( UPPER )THEN
+                  TEMP1 = ALPHA*A( K, J )
+               ELSE
+                  TEMP1 = ALPHA*DCONJG( A( J, K ) )
+               END IF
+               DO 130, I = 1, M
+                  C( I, J ) = C( I, J ) + TEMP1*B( I, K )
+  130          CONTINUE
+  140       CONTINUE
+            DO 160, K = J + 1, N
+               IF( UPPER )THEN
+                  TEMP1 = ALPHA*DCONJG( A( J, K ) )
+               ELSE
+                  TEMP1 = ALPHA*A( K, J )
+               END IF
+               DO 150, I = 1, M
+                  C( I, J ) = C( I, J ) + TEMP1*B( I, K )
+  150          CONTINUE
+  160       CONTINUE
+  170    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of ZHEMM .
+*
+      END
diff --git a/costa/native/external/blas/zhemv.f b/costa/native/external/blas/zhemv.f
new file mode 100644
index 000000000..54aa7b900
--- /dev/null
+++ b/costa/native/external/blas/zhemv.f
@@ -0,0 +1,266 @@
+      SUBROUTINE ZHEMV ( UPLO, N, ALPHA, A, LDA, X, INCX,
+     $                   BETA, Y, INCY )
+*     .. Scalar Arguments ..
+      COMPLEX*16         ALPHA, BETA
+      INTEGER            INCX, INCY, LDA, N
+      CHARACTER*1        UPLO
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), X( * ), Y( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZHEMV  performs the matrix-vector  operation
+*
+*     y := alpha*A*x + beta*y,
+*
+*  where alpha and beta are scalars, x and y are n element vectors and
+*  A is an n by n hermitian matrix.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the upper or lower
+*           triangular part of the array A is to be referenced as
+*           follows:
+*
+*              UPLO = 'U' or 'u'   Only the upper triangular part of A
+*                                  is to be referenced.
+*
+*              UPLO = 'L' or 'l'   Only the lower triangular part of A
+*                                  is to be referenced.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - COMPLEX*16      .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  A      - COMPLEX*16       array of DIMENSION ( LDA, n ).
+*           Before entry with  UPLO = 'U' or 'u', the leading n by n
+*           upper triangular part of the array A must contain the upper
+*           triangular part of the hermitian matrix and the strictly
+*           lower triangular part of A is not referenced.
+*           Before entry with UPLO = 'L' or 'l', the leading n by n
+*           lower triangular part of the array A must contain the lower
+*           triangular part of the hermitian matrix and the strictly
+*           upper triangular part of A is not referenced.
+*           Note that the imaginary parts of the diagonal elements need
+*           not be set and are assumed to be zero.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           max( 1, n ).
+*           Unchanged on exit.
+*
+*  X      - COMPLEX*16       array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element vector x.
+*           Unchanged on exit.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*  BETA   - COMPLEX*16      .
+*           On entry, BETA specifies the scalar beta. When BETA is
+*           supplied as zero then Y need not be set on input.
+*           Unchanged on exit.
+*
+*  Y      - COMPLEX*16       array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCY ) ).
+*           Before entry, the incremented array Y must contain the n
+*           element vector y. On exit, Y is overwritten by the updated
+*           vector y.
+*
+*  INCY   - INTEGER.
+*           On entry, INCY specifies the increment for the elements of
+*           Y. INCY must not be zero.
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      COMPLEX*16         ONE
+      PARAMETER        ( ONE  = ( 1.0D+0, 0.0D+0 ) )
+      COMPLEX*16         ZERO
+      PARAMETER        ( ZERO = ( 0.0D+0, 0.0D+0 ) )
+*     .. Local Scalars ..
+      COMPLEX*16         TEMP1, TEMP2
+      INTEGER            I, INFO, IX, IY, J, JX, JY, KX, KY
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          DCONJG, MAX, DBLE
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO, 'U' ).AND.
+     $         .NOT.LSAME( UPLO, 'L' )      )THEN
+         INFO = 1
+      ELSE IF( N.LT.0 )THEN
+         INFO = 2
+      ELSE IF( LDA.LT.MAX( 1, N ) )THEN
+         INFO = 5
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 7
+      ELSE IF( INCY.EQ.0 )THEN
+         INFO = 10
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'ZHEMV ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( N.EQ.0 ).OR.( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) )
+     $   RETURN
+*
+*     Set up the start points in  X  and  Y.
+*
+      IF( INCX.GT.0 )THEN
+         KX = 1
+      ELSE
+         KX = 1 - ( N - 1 )*INCX
+      END IF
+      IF( INCY.GT.0 )THEN
+         KY = 1
+      ELSE
+         KY = 1 - ( N - 1 )*INCY
+      END IF
+*
+*     Start the operations. In this version the elements of A are
+*     accessed sequentially with one pass through the triangular part
+*     of A.
+*
+*     First form  y := beta*y.
+*
+      IF( BETA.NE.ONE )THEN
+         IF( INCY.EQ.1 )THEN
+            IF( BETA.EQ.ZERO )THEN
+               DO 10, I = 1, N
+                  Y( I ) = ZERO
+   10          CONTINUE
+            ELSE
+               DO 20, I = 1, N
+                  Y( I ) = BETA*Y( I )
+   20          CONTINUE
+            END IF
+         ELSE
+            IY = KY
+            IF( BETA.EQ.ZERO )THEN
+               DO 30, I = 1, N
+                  Y( IY ) = ZERO
+                  IY      = IY   + INCY
+   30          CONTINUE
+            ELSE
+               DO 40, I = 1, N
+                  Y( IY ) = BETA*Y( IY )
+                  IY      = IY           + INCY
+   40          CONTINUE
+            END IF
+         END IF
+      END IF
+      IF( ALPHA.EQ.ZERO )
+     $   RETURN
+      IF( LSAME( UPLO, 'U' ) )THEN
+*
+*        Form  y  when A is stored in upper triangle.
+*
+         IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN
+            DO 60, J = 1, N
+               TEMP1 = ALPHA*X( J )
+               TEMP2 = ZERO
+               DO 50, I = 1, J - 1
+                  Y( I ) = Y( I ) + TEMP1*A( I, J )
+                  TEMP2  = TEMP2  + DCONJG( A( I, J ) )*X( I )
+   50          CONTINUE
+               Y( J ) = Y( J ) + TEMP1*DBLE( A( J, J ) ) + ALPHA*TEMP2
+   60       CONTINUE
+         ELSE
+            JX = KX
+            JY = KY
+            DO 80, J = 1, N
+               TEMP1 = ALPHA*X( JX )
+               TEMP2 = ZERO
+               IX    = KX
+               IY    = KY
+               DO 70, I = 1, J - 1
+                  Y( IY ) = Y( IY ) + TEMP1*A( I, J )
+                  TEMP2   = TEMP2   + DCONJG( A( I, J ) )*X( IX )
+                  IX      = IX      + INCX
+                  IY      = IY      + INCY
+   70          CONTINUE
+               Y( JY ) = Y( JY ) + TEMP1*DBLE( A( J, J ) ) + ALPHA*TEMP2
+               JX      = JX      + INCX
+               JY      = JY      + INCY
+   80       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  y  when A is stored in lower triangle.
+*
+         IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN
+            DO 100, J = 1, N
+               TEMP1  = ALPHA*X( J )
+               TEMP2  = ZERO
+               Y( J ) = Y( J ) + TEMP1*DBLE( A( J, J ) )
+               DO 90, I = J + 1, N
+                  Y( I ) = Y( I ) + TEMP1*A( I, J )
+                  TEMP2  = TEMP2  + DCONJG( A( I, J ) )*X( I )
+   90          CONTINUE
+               Y( J ) = Y( J ) + ALPHA*TEMP2
+  100       CONTINUE
+         ELSE
+            JX = KX
+            JY = KY
+            DO 120, J = 1, N
+               TEMP1   = ALPHA*X( JX )
+               TEMP2   = ZERO
+               Y( JY ) = Y( JY ) + TEMP1*DBLE( A( J, J ) )
+               IX      = JX
+               IY      = JY
+               DO 110, I = J + 1, N
+                  IX      = IX      + INCX
+                  IY      = IY      + INCY
+                  Y( IY ) = Y( IY ) + TEMP1*A( I, J )
+                  TEMP2   = TEMP2   + DCONJG( A( I, J ) )*X( IX )
+  110          CONTINUE
+               Y( JY ) = Y( JY ) + ALPHA*TEMP2
+               JX      = JX      + INCX
+               JY      = JY      + INCY
+  120       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of ZHEMV .
+*
+      END
diff --git a/costa/native/external/blas/zher.f b/costa/native/external/blas/zher.f
new file mode 100644
index 000000000..fcf40a5eb
--- /dev/null
+++ b/costa/native/external/blas/zher.f
@@ -0,0 +1,212 @@
+      SUBROUTINE ZHER  ( UPLO, N, ALPHA, X, INCX, A, LDA )
+*     .. Scalar Arguments ..
+      DOUBLE PRECISION   ALPHA
+      INTEGER            INCX, LDA, N
+      CHARACTER*1        UPLO
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZHER   performs the hermitian rank 1 operation
+*
+*     A := alpha*x*conjg( x' ) + A,
+*
+*  where alpha is a real scalar, x is an n element vector and A is an
+*  n by n hermitian matrix.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the upper or lower
+*           triangular part of the array A is to be referenced as
+*           follows:
+*
+*              UPLO = 'U' or 'u'   Only the upper triangular part of A
+*                                  is to be referenced.
+*
+*              UPLO = 'L' or 'l'   Only the lower triangular part of A
+*                                  is to be referenced.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - DOUBLE PRECISION.
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  X      - COMPLEX*16       array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element vector x.
+*           Unchanged on exit.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*  A      - COMPLEX*16       array of DIMENSION ( LDA, n ).
+*           Before entry with  UPLO = 'U' or 'u', the leading n by n
+*           upper triangular part of the array A must contain the upper
+*           triangular part of the hermitian matrix and the strictly
+*           lower triangular part of A is not referenced. On exit, the
+*           upper triangular part of the array A is overwritten by the
+*           upper triangular part of the updated matrix.
+*           Before entry with UPLO = 'L' or 'l', the leading n by n
+*           lower triangular part of the array A must contain the lower
+*           triangular part of the hermitian matrix and the strictly
+*           upper triangular part of A is not referenced. On exit, the
+*           lower triangular part of the array A is overwritten by the
+*           lower triangular part of the updated matrix.
+*           Note that the imaginary parts of the diagonal elements need
+*           not be set, they are assumed to be zero, and on exit they
+*           are set to zero.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           max( 1, n ).
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      COMPLEX*16         ZERO
+      PARAMETER        ( ZERO = ( 0.0D+0, 0.0D+0 ) )
+*     .. Local Scalars ..
+      COMPLEX*16         TEMP
+      INTEGER            I, INFO, IX, J, JX, KX
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          DCONJG, MAX, DBLE
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO, 'U' ).AND.
+     $         .NOT.LSAME( UPLO, 'L' )      )THEN
+         INFO = 1
+      ELSE IF( N.LT.0 )THEN
+         INFO = 2
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 5
+      ELSE IF( LDA.LT.MAX( 1, N ) )THEN
+         INFO = 7
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'ZHER  ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( N.EQ.0 ).OR.( ALPHA.EQ.DBLE( ZERO ) ) )
+     $   RETURN
+*
+*     Set the start point in X if the increment is not unity.
+*
+      IF( INCX.LE.0 )THEN
+         KX = 1 - ( N - 1 )*INCX
+      ELSE IF( INCX.NE.1 )THEN
+         KX = 1
+      END IF
+*
+*     Start the operations. In this version the elements of A are
+*     accessed sequentially with one pass through the triangular part
+*     of A.
+*
+      IF( LSAME( UPLO, 'U' ) )THEN
+*
+*        Form  A  when A is stored in upper triangle.
+*
+         IF( INCX.EQ.1 )THEN
+            DO 20, J = 1, N
+               IF( X( J ).NE.ZERO )THEN
+                  TEMP = ALPHA*DCONJG( X( J ) )
+                  DO 10, I = 1, J - 1
+                     A( I, J ) = A( I, J ) + X( I )*TEMP
+   10             CONTINUE
+                  A( J, J ) = DBLE( A( J, J ) ) + DBLE( X( J )*TEMP )
+               ELSE
+                  A( J, J ) = DBLE( A( J, J ) )
+               END IF
+   20       CONTINUE
+         ELSE
+            JX = KX
+            DO 40, J = 1, N
+               IF( X( JX ).NE.ZERO )THEN
+                  TEMP = ALPHA*DCONJG( X( JX ) )
+                  IX   = KX
+                  DO 30, I = 1, J - 1
+                     A( I, J ) = A( I, J ) + X( IX )*TEMP
+                     IX        = IX        + INCX
+   30             CONTINUE
+                  A( J, J ) = DBLE( A( J, J ) ) + DBLE( X( JX )*TEMP )
+               ELSE
+                  A( J, J ) = DBLE( A( J, J ) )
+               END IF
+               JX = JX + INCX
+   40       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  A  when A is stored in lower triangle.
+*
+         IF( INCX.EQ.1 )THEN
+            DO 60, J = 1, N
+               IF( X( J ).NE.ZERO )THEN
+                  TEMP      = ALPHA*DCONJG( X( J ) )
+                  A( J, J ) = DBLE( A( J, J ) ) + DBLE( TEMP*X( J ) )
+                  DO 50, I = J + 1, N
+                     A( I, J ) = A( I, J ) + X( I )*TEMP
+   50             CONTINUE
+               ELSE
+                  A( J, J ) = DBLE( A( J, J ) )
+               END IF
+   60       CONTINUE
+         ELSE
+            JX = KX
+            DO 80, J = 1, N
+               IF( X( JX ).NE.ZERO )THEN
+                  TEMP      = ALPHA*DCONJG( X( JX ) )
+                  A( J, J ) = DBLE( A( J, J ) ) + DBLE( TEMP*X( JX ) )
+                  IX        = JX
+                  DO 70, I = J + 1, N
+                     IX        = IX        + INCX
+                     A( I, J ) = A( I, J ) + X( IX )*TEMP
+   70             CONTINUE
+               ELSE
+                  A( J, J ) = DBLE( A( J, J ) )
+               END IF
+               JX = JX + INCX
+   80       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of ZHER  .
+*
+      END
diff --git a/costa/native/external/blas/zher2.f b/costa/native/external/blas/zher2.f
new file mode 100644
index 000000000..06acdff7b
--- /dev/null
+++ b/costa/native/external/blas/zher2.f
@@ -0,0 +1,249 @@
+      SUBROUTINE ZHER2 ( UPLO, N, ALPHA, X, INCX, Y, INCY, A, LDA )
+*     .. Scalar Arguments ..
+      COMPLEX*16         ALPHA
+      INTEGER            INCX, INCY, LDA, N
+      CHARACTER*1        UPLO
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), X( * ), Y( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZHER2  performs the hermitian rank 2 operation
+*
+*     A := alpha*x*conjg( y' ) + conjg( alpha )*y*conjg( x' ) + A,
+*
+*  where alpha is a scalar, x and y are n element vectors and A is an n
+*  by n hermitian matrix.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the upper or lower
+*           triangular part of the array A is to be referenced as
+*           follows:
+*
+*              UPLO = 'U' or 'u'   Only the upper triangular part of A
+*                                  is to be referenced.
+*
+*              UPLO = 'L' or 'l'   Only the lower triangular part of A
+*                                  is to be referenced.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - COMPLEX*16      .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  X      - COMPLEX*16       array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element vector x.
+*           Unchanged on exit.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*  Y      - COMPLEX*16       array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCY ) ).
+*           Before entry, the incremented array Y must contain the n
+*           element vector y.
+*           Unchanged on exit.
+*
+*  INCY   - INTEGER.
+*           On entry, INCY specifies the increment for the elements of
+*           Y. INCY must not be zero.
+*           Unchanged on exit.
+*
+*  A      - COMPLEX*16       array of DIMENSION ( LDA, n ).
+*           Before entry with  UPLO = 'U' or 'u', the leading n by n
+*           upper triangular part of the array A must contain the upper
+*           triangular part of the hermitian matrix and the strictly
+*           lower triangular part of A is not referenced. On exit, the
+*           upper triangular part of the array A is overwritten by the
+*           upper triangular part of the updated matrix.
+*           Before entry with UPLO = 'L' or 'l', the leading n by n
+*           lower triangular part of the array A must contain the lower
+*           triangular part of the hermitian matrix and the strictly
+*           upper triangular part of A is not referenced. On exit, the
+*           lower triangular part of the array A is overwritten by the
+*           lower triangular part of the updated matrix.
+*           Note that the imaginary parts of the diagonal elements need
+*           not be set, they are assumed to be zero, and on exit they
+*           are set to zero.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           max( 1, n ).
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      COMPLEX*16         ZERO
+      PARAMETER        ( ZERO = ( 0.0D+0, 0.0D+0 ) )
+*     .. Local Scalars ..
+      COMPLEX*16         TEMP1, TEMP2
+      INTEGER            I, INFO, IX, IY, J, JX, JY, KX, KY
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          DCONJG, MAX, DBLE
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO, 'U' ).AND.
+     $         .NOT.LSAME( UPLO, 'L' )      )THEN
+         INFO = 1
+      ELSE IF( N.LT.0 )THEN
+         INFO = 2
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 5
+      ELSE IF( INCY.EQ.0 )THEN
+         INFO = 7
+      ELSE IF( LDA.LT.MAX( 1, N ) )THEN
+         INFO = 9
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'ZHER2 ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( N.EQ.0 ).OR.( ALPHA.EQ.ZERO ) )
+     $   RETURN
+*
+*     Set up the start points in X and Y if the increments are not both
+*     unity.
+*
+      IF( ( INCX.NE.1 ).OR.( INCY.NE.1 ) )THEN
+         IF( INCX.GT.0 )THEN
+            KX = 1
+         ELSE
+            KX = 1 - ( N - 1 )*INCX
+         END IF
+         IF( INCY.GT.0 )THEN
+            KY = 1
+         ELSE
+            KY = 1 - ( N - 1 )*INCY
+         END IF
+         JX = KX
+         JY = KY
+      END IF
+*
+*     Start the operations. In this version the elements of A are
+*     accessed sequentially with one pass through the triangular part
+*     of A.
+*
+      IF( LSAME( UPLO, 'U' ) )THEN
+*
+*        Form  A  when A is stored in the upper triangle.
+*
+         IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN
+            DO 20, J = 1, N
+               IF( ( X( J ).NE.ZERO ).OR.( Y( J ).NE.ZERO ) )THEN
+                  TEMP1 = ALPHA*DCONJG( Y( J ) )
+                  TEMP2 = DCONJG( ALPHA*X( J ) )
+                  DO 10, I = 1, J - 1
+                     A( I, J ) = A( I, J ) + X( I )*TEMP1 + Y( I )*TEMP2
+   10             CONTINUE
+                  A( J, J ) = DBLE( A( J, J ) ) +
+     $                        DBLE( X( J )*TEMP1 + Y( J )*TEMP2 )
+               ELSE
+                  A( J, J ) = DBLE( A( J, J ) )
+               END IF
+   20       CONTINUE
+         ELSE
+            DO 40, J = 1, N
+               IF( ( X( JX ).NE.ZERO ).OR.( Y( JY ).NE.ZERO ) )THEN
+                  TEMP1 = ALPHA*DCONJG( Y( JY ) )
+                  TEMP2 = DCONJG( ALPHA*X( JX ) )
+                  IX    = KX
+                  IY    = KY
+                  DO 30, I = 1, J - 1
+                     A( I, J ) = A( I, J ) + X( IX )*TEMP1
+     $                                     + Y( IY )*TEMP2
+                     IX        = IX        + INCX
+                     IY        = IY        + INCY
+   30             CONTINUE
+                  A( J, J ) = DBLE( A( J, J ) ) +
+     $                        DBLE( X( JX )*TEMP1 + Y( JY )*TEMP2 )
+               ELSE
+                  A( J, J ) = DBLE( A( J, J ) )
+               END IF
+               JX = JX + INCX
+               JY = JY + INCY
+   40       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  A  when A is stored in the lower triangle.
+*
+         IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN
+            DO 60, J = 1, N
+               IF( ( X( J ).NE.ZERO ).OR.( Y( J ).NE.ZERO ) )THEN
+                  TEMP1     = ALPHA*DCONJG( Y( J ) )
+                  TEMP2     = DCONJG( ALPHA*X( J ) )
+                  A( J, J ) = DBLE( A( J, J ) ) +
+     $                        DBLE( X( J )*TEMP1 + Y( J )*TEMP2 )
+                  DO 50, I = J + 1, N
+                     A( I, J ) = A( I, J ) + X( I )*TEMP1 + Y( I )*TEMP2
+   50             CONTINUE
+               ELSE
+                  A( J, J ) = DBLE( A( J, J ) )
+               END IF
+   60       CONTINUE
+         ELSE
+            DO 80, J = 1, N
+               IF( ( X( JX ).NE.ZERO ).OR.( Y( JY ).NE.ZERO ) )THEN
+                  TEMP1     = ALPHA*DCONJG( Y( JY ) )
+                  TEMP2     = DCONJG( ALPHA*X( JX ) )
+                  A( J, J ) = DBLE( A( J, J ) ) +
+     $                        DBLE( X( JX )*TEMP1 + Y( JY )*TEMP2 )
+                  IX        = JX
+                  IY        = JY
+                  DO 70, I = J + 1, N
+                     IX        = IX        + INCX
+                     IY        = IY        + INCY
+                     A( I, J ) = A( I, J ) + X( IX )*TEMP1
+     $                                     + Y( IY )*TEMP2
+   70             CONTINUE
+               ELSE
+                  A( J, J ) = DBLE( A( J, J ) )
+               END IF
+               JX = JX + INCX
+               JY = JY + INCY
+   80       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of ZHER2 .
+*
+      END
diff --git a/costa/native/external/blas/zher2k.f b/costa/native/external/blas/zher2k.f
new file mode 100644
index 000000000..408d75cf3
--- /dev/null
+++ b/costa/native/external/blas/zher2k.f
@@ -0,0 +1,372 @@
+      SUBROUTINE ZHER2K( UPLO, TRANS, N, K, ALPHA, A, LDA, B, LDB, BETA,
+     $                   C, LDC )
+*     .. Scalar Arguments ..
+      CHARACTER          TRANS, UPLO
+      INTEGER            K, LDA, LDB, LDC, N
+      DOUBLE PRECISION   BETA
+      COMPLEX*16         ALPHA
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), B( LDB, * ), C( LDC, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZHER2K  performs one of the hermitian rank 2k operations
+*
+*     C := alpha*A*conjg( B' ) + conjg( alpha )*B*conjg( A' ) + beta*C,
+*
+*  or
+*
+*     C := alpha*conjg( A' )*B + conjg( alpha )*conjg( B' )*A + beta*C,
+*
+*  where  alpha and beta  are scalars with  beta  real,  C is an  n by n
+*  hermitian matrix and  A and B  are  n by k matrices in the first case
+*  and  k by n  matrices in the second case.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On  entry,   UPLO  specifies  whether  the  upper  or  lower
+*           triangular  part  of the  array  C  is to be  referenced  as
+*           follows:
+*
+*              UPLO = 'U' or 'u'   Only the  upper triangular part of  C
+*                                  is to be referenced.
+*
+*              UPLO = 'L' or 'l'   Only the  lower triangular part of  C
+*                                  is to be referenced.
+*
+*           Unchanged on exit.
+*
+*  TRANS  - CHARACTER*1.
+*           On entry,  TRANS  specifies the operation to be performed as
+*           follows:
+*
+*              TRANS = 'N' or 'n'    C := alpha*A*conjg( B' )          +
+*                                         conjg( alpha )*B*conjg( A' ) +
+*                                         beta*C.
+*
+*              TRANS = 'C' or 'c'    C := alpha*conjg( A' )*B          +
+*                                         conjg( alpha )*conjg( B' )*A +
+*                                         beta*C.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry,  N specifies the order of the matrix C.  N must be
+*           at least zero.
+*           Unchanged on exit.
+*
+*  K      - INTEGER.
+*           On entry with  TRANS = 'N' or 'n',  K  specifies  the number
+*           of  columns  of the  matrices  A and B,  and on  entry  with
+*           TRANS = 'C' or 'c',  K  specifies  the number of rows of the
+*           matrices  A and B.  K must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - COMPLEX*16         .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  A      - COMPLEX*16       array of DIMENSION ( LDA, ka ), where ka is
+*           k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
+*           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
+*           part of the array  A  must contain the matrix  A,  otherwise
+*           the leading  k by n  part of the array  A  must contain  the
+*           matrix A.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
+*           then  LDA must be at least  max( 1, n ), otherwise  LDA must
+*           be at least  max( 1, k ).
+*           Unchanged on exit.
+*
+*  B      - COMPLEX*16       array of DIMENSION ( LDB, kb ), where kb is
+*           k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
+*           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
+*           part of the array  B  must contain the matrix  B,  otherwise
+*           the leading  k by n  part of the array  B  must contain  the
+*           matrix B.
+*           Unchanged on exit.
+*
+*  LDB    - INTEGER.
+*           On entry, LDB specifies the first dimension of B as declared
+*           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
+*           then  LDB must be at least  max( 1, n ), otherwise  LDB must
+*           be at least  max( 1, k ).
+*           Unchanged on exit.
+*
+*  BETA   - DOUBLE PRECISION            .
+*           On entry, BETA specifies the scalar beta.
+*           Unchanged on exit.
+*
+*  C      - COMPLEX*16          array of DIMENSION ( LDC, n ).
+*           Before entry  with  UPLO = 'U' or 'u',  the leading  n by n
+*           upper triangular part of the array C must contain the upper
+*           triangular part  of the  hermitian matrix  and the strictly
+*           lower triangular part of C is not referenced.  On exit, the
+*           upper triangular part of the array  C is overwritten by the
+*           upper triangular part of the updated matrix.
+*           Before entry  with  UPLO = 'L' or 'l',  the leading  n by n
+*           lower triangular part of the array C must contain the lower
+*           triangular part  of the  hermitian matrix  and the strictly
+*           upper triangular part of C is not referenced.  On exit, the
+*           lower triangular part of the array  C is overwritten by the
+*           lower triangular part of the updated matrix.
+*           Note that the imaginary parts of the diagonal elements need
+*           not be set,  they are assumed to be zero,  and on exit they
+*           are set to zero.
+*
+*  LDC    - INTEGER.
+*           On entry, LDC specifies the first dimension of C as declared
+*           in  the  calling  (sub)  program.   LDC  must  be  at  least
+*           max( 1, n ).
+*           Unchanged on exit.
+*
+*
+*  Level 3 Blas routine.
+*
+*  -- Written on 8-February-1989.
+*     Jack Dongarra, Argonne National Laboratory.
+*     Iain Duff, AERE Harwell.
+*     Jeremy Du Croz, Numerical Algorithms Group Ltd.
+*     Sven Hammarling, Numerical Algorithms Group Ltd.
+*
+*  -- Modified 8-Nov-93 to set C(J,J) to DBLE( C(J,J) ) when BETA = 1.
+*     Ed Anderson, Cray Research Inc.
+*
+*
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          DBLE, DCONJG, MAX
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            I, INFO, J, L, NROWA
+      COMPLEX*16         TEMP1, TEMP2
+*     ..
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE
+      PARAMETER          ( ONE = 1.0D+0 )
+      COMPLEX*16         ZERO
+      PARAMETER          ( ZERO = ( 0.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      IF( LSAME( TRANS, 'N' ) ) THEN
+         NROWA = N
+      ELSE
+         NROWA = K
+      END IF
+      UPPER = LSAME( UPLO, 'U' )
+*
+      INFO = 0
+      IF( ( .NOT.UPPER ) .AND. ( .NOT.LSAME( UPLO, 'L' ) ) ) THEN
+         INFO = 1
+      ELSE IF( ( .NOT.LSAME( TRANS, 'N' ) ) .AND.
+     $         ( .NOT.LSAME( TRANS, 'C' ) ) ) THEN
+         INFO = 2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = 3
+      ELSE IF( K.LT.0 ) THEN
+         INFO = 4
+      ELSE IF( LDA.LT.MAX( 1, NROWA ) ) THEN
+         INFO = 7
+      ELSE IF( LDB.LT.MAX( 1, NROWA ) ) THEN
+         INFO = 9
+      ELSE IF( LDC.LT.MAX( 1, N ) ) THEN
+         INFO = 12
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZHER2K', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( N.EQ.0 ) .OR. ( ( ( ALPHA.EQ.ZERO ) .OR. ( K.EQ.0 ) ) .AND.
+     $    ( BETA.EQ.ONE ) ) )RETURN
+*
+*     And when  alpha.eq.zero.
+*
+      IF( ALPHA.EQ.ZERO ) THEN
+         IF( UPPER ) THEN
+            IF( BETA.EQ.DBLE( ZERO ) ) THEN
+               DO 20 J = 1, N
+                  DO 10 I = 1, J
+                     C( I, J ) = ZERO
+   10             CONTINUE
+   20          CONTINUE
+            ELSE
+               DO 40 J = 1, N
+                  DO 30 I = 1, J - 1
+                     C( I, J ) = BETA*C( I, J )
+   30             CONTINUE
+                  C( J, J ) = BETA*DBLE( C( J, J ) )
+   40          CONTINUE
+            END IF
+         ELSE
+            IF( BETA.EQ.DBLE( ZERO ) ) THEN
+               DO 60 J = 1, N
+                  DO 50 I = J, N
+                     C( I, J ) = ZERO
+   50             CONTINUE
+   60          CONTINUE
+            ELSE
+               DO 80 J = 1, N
+                  C( J, J ) = BETA*DBLE( C( J, J ) )
+                  DO 70 I = J + 1, N
+                     C( I, J ) = BETA*C( I, J )
+   70             CONTINUE
+   80          CONTINUE
+            END IF
+         END IF
+         RETURN
+      END IF
+*
+*     Start the operations.
+*
+      IF( LSAME( TRANS, 'N' ) ) THEN
+*
+*        Form  C := alpha*A*conjg( B' ) + conjg( alpha )*B*conjg( A' ) +
+*                   C.
+*
+         IF( UPPER ) THEN
+            DO 130 J = 1, N
+               IF( BETA.EQ.DBLE( ZERO ) ) THEN
+                  DO 90 I = 1, J
+                     C( I, J ) = ZERO
+   90             CONTINUE
+               ELSE IF( BETA.NE.ONE ) THEN
+                  DO 100 I = 1, J - 1
+                     C( I, J ) = BETA*C( I, J )
+  100             CONTINUE
+                  C( J, J ) = BETA*DBLE( C( J, J ) )
+               ELSE
+                  C( J, J ) = DBLE( C( J, J ) )
+               END IF
+               DO 120 L = 1, K
+                  IF( ( A( J, L ).NE.ZERO ) .OR. ( B( J, L ).NE.ZERO ) )
+     $                 THEN
+                     TEMP1 = ALPHA*DCONJG( B( J, L ) )
+                     TEMP2 = DCONJG( ALPHA*A( J, L ) )
+                     DO 110 I = 1, J - 1
+                        C( I, J ) = C( I, J ) + A( I, L )*TEMP1 +
+     $                              B( I, L )*TEMP2
+  110                CONTINUE
+                     C( J, J ) = DBLE( C( J, J ) ) +
+     $                           DBLE( A( J, L )*TEMP1+B( J, L )*TEMP2 )
+                  END IF
+  120          CONTINUE
+  130       CONTINUE
+         ELSE
+            DO 180 J = 1, N
+               IF( BETA.EQ.DBLE( ZERO ) ) THEN
+                  DO 140 I = J, N
+                     C( I, J ) = ZERO
+  140             CONTINUE
+               ELSE IF( BETA.NE.ONE ) THEN
+                  DO 150 I = J + 1, N
+                     C( I, J ) = BETA*C( I, J )
+  150             CONTINUE
+                  C( J, J ) = BETA*DBLE( C( J, J ) )
+               ELSE
+                  C( J, J ) = DBLE( C( J, J ) )
+               END IF
+               DO 170 L = 1, K
+                  IF( ( A( J, L ).NE.ZERO ) .OR. ( B( J, L ).NE.ZERO ) )
+     $                 THEN
+                     TEMP1 = ALPHA*DCONJG( B( J, L ) )
+                     TEMP2 = DCONJG( ALPHA*A( J, L ) )
+                     DO 160 I = J + 1, N
+                        C( I, J ) = C( I, J ) + A( I, L )*TEMP1 +
+     $                              B( I, L )*TEMP2
+  160                CONTINUE
+                     C( J, J ) = DBLE( C( J, J ) ) +
+     $                           DBLE( A( J, L )*TEMP1+B( J, L )*TEMP2 )
+                  END IF
+  170          CONTINUE
+  180       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  C := alpha*conjg( A' )*B + conjg( alpha )*conjg( B' )*A +
+*                   C.
+*
+         IF( UPPER ) THEN
+            DO 210 J = 1, N
+               DO 200 I = 1, J
+                  TEMP1 = ZERO
+                  TEMP2 = ZERO
+                  DO 190 L = 1, K
+                     TEMP1 = TEMP1 + DCONJG( A( L, I ) )*B( L, J )
+                     TEMP2 = TEMP2 + DCONJG( B( L, I ) )*A( L, J )
+  190             CONTINUE
+                  IF( I.EQ.J ) THEN
+                     IF( BETA.EQ.DBLE( ZERO ) ) THEN
+                        C( J, J ) = DBLE( ALPHA*TEMP1+DCONJG( ALPHA )*
+     $                              TEMP2 )
+                     ELSE
+                        C( J, J ) = BETA*DBLE( C( J, J ) ) +
+     $                              DBLE( ALPHA*TEMP1+DCONJG( ALPHA )*
+     $                              TEMP2 )
+                     END IF
+                  ELSE
+                     IF( BETA.EQ.DBLE( ZERO ) ) THEN
+                        C( I, J ) = ALPHA*TEMP1 + DCONJG( ALPHA )*TEMP2
+                     ELSE
+                        C( I, J ) = BETA*C( I, J ) + ALPHA*TEMP1 +
+     $                              DCONJG( ALPHA )*TEMP2
+                     END IF
+                  END IF
+  200          CONTINUE
+  210       CONTINUE
+         ELSE
+            DO 240 J = 1, N
+               DO 230 I = J, N
+                  TEMP1 = ZERO
+                  TEMP2 = ZERO
+                  DO 220 L = 1, K
+                     TEMP1 = TEMP1 + DCONJG( A( L, I ) )*B( L, J )
+                     TEMP2 = TEMP2 + DCONJG( B( L, I ) )*A( L, J )
+  220             CONTINUE
+                  IF( I.EQ.J ) THEN
+                     IF( BETA.EQ.DBLE( ZERO ) ) THEN
+                        C( J, J ) = DBLE( ALPHA*TEMP1+DCONJG( ALPHA )*
+     $                              TEMP2 )
+                     ELSE
+                        C( J, J ) = BETA*DBLE( C( J, J ) ) +
+     $                              DBLE( ALPHA*TEMP1+DCONJG( ALPHA )*
+     $                              TEMP2 )
+                     END IF
+                  ELSE
+                     IF( BETA.EQ.DBLE( ZERO ) ) THEN
+                        C( I, J ) = ALPHA*TEMP1 + DCONJG( ALPHA )*TEMP2
+                     ELSE
+                        C( I, J ) = BETA*C( I, J ) + ALPHA*TEMP1 +
+     $                              DCONJG( ALPHA )*TEMP2
+                     END IF
+                  END IF
+  230          CONTINUE
+  240       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of ZHER2K.
+*
+      END
diff --git a/costa/native/external/blas/zherk.f b/costa/native/external/blas/zherk.f
new file mode 100644
index 000000000..cfbf71802
--- /dev/null
+++ b/costa/native/external/blas/zherk.f
@@ -0,0 +1,330 @@
+      SUBROUTINE ZHERK( UPLO, TRANS, N, K, ALPHA, A, LDA, BETA, C, LDC )
+*     .. Scalar Arguments ..
+      CHARACTER          TRANS, UPLO
+      INTEGER            K, LDA, LDC, N
+      DOUBLE PRECISION   ALPHA, BETA
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), C( LDC, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZHERK  performs one of the hermitian rank k operations
+*
+*     C := alpha*A*conjg( A' ) + beta*C,
+*
+*  or
+*
+*     C := alpha*conjg( A' )*A + beta*C,
+*
+*  where  alpha and beta  are  real scalars,  C is an  n by n  hermitian
+*  matrix and  A  is an  n by k  matrix in the  first case and a  k by n
+*  matrix in the second case.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On  entry,   UPLO  specifies  whether  the  upper  or  lower
+*           triangular  part  of the  array  C  is to be  referenced  as
+*           follows:
+*
+*              UPLO = 'U' or 'u'   Only the  upper triangular part of  C
+*                                  is to be referenced.
+*
+*              UPLO = 'L' or 'l'   Only the  lower triangular part of  C
+*                                  is to be referenced.
+*
+*           Unchanged on exit.
+*
+*  TRANS  - CHARACTER*1.
+*           On entry,  TRANS  specifies the operation to be performed as
+*           follows:
+*
+*              TRANS = 'N' or 'n'   C := alpha*A*conjg( A' ) + beta*C.
+*
+*              TRANS = 'C' or 'c'   C := alpha*conjg( A' )*A + beta*C.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry,  N specifies the order of the matrix C.  N must be
+*           at least zero.
+*           Unchanged on exit.
+*
+*  K      - INTEGER.
+*           On entry with  TRANS = 'N' or 'n',  K  specifies  the number
+*           of  columns   of  the   matrix   A,   and  on   entry   with
+*           TRANS = 'C' or 'c',  K  specifies  the number of rows of the
+*           matrix A.  K must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - DOUBLE PRECISION            .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  A      - COMPLEX*16       array of DIMENSION ( LDA, ka ), where ka is
+*           k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
+*           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
+*           part of the array  A  must contain the matrix  A,  otherwise
+*           the leading  k by n  part of the array  A  must contain  the
+*           matrix A.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
+*           then  LDA must be at least  max( 1, n ), otherwise  LDA must
+*           be at least  max( 1, k ).
+*           Unchanged on exit.
+*
+*  BETA   - DOUBLE PRECISION.
+*           On entry, BETA specifies the scalar beta.
+*           Unchanged on exit.
+*
+*  C      - COMPLEX*16          array of DIMENSION ( LDC, n ).
+*           Before entry  with  UPLO = 'U' or 'u',  the leading  n by n
+*           upper triangular part of the array C must contain the upper
+*           triangular part  of the  hermitian matrix  and the strictly
+*           lower triangular part of C is not referenced.  On exit, the
+*           upper triangular part of the array  C is overwritten by the
+*           upper triangular part of the updated matrix.
+*           Before entry  with  UPLO = 'L' or 'l',  the leading  n by n
+*           lower triangular part of the array C must contain the lower
+*           triangular part  of the  hermitian matrix  and the strictly
+*           upper triangular part of C is not referenced.  On exit, the
+*           lower triangular part of the array  C is overwritten by the
+*           lower triangular part of the updated matrix.
+*           Note that the imaginary parts of the diagonal elements need
+*           not be set,  they are assumed to be zero,  and on exit they
+*           are set to zero.
+*
+*  LDC    - INTEGER.
+*           On entry, LDC specifies the first dimension of C as declared
+*           in  the  calling  (sub)  program.   LDC  must  be  at  least
+*           max( 1, n ).
+*           Unchanged on exit.
+*
+*
+*  Level 3 Blas routine.
+*
+*  -- Written on 8-February-1989.
+*     Jack Dongarra, Argonne National Laboratory.
+*     Iain Duff, AERE Harwell.
+*     Jeremy Du Croz, Numerical Algorithms Group Ltd.
+*     Sven Hammarling, Numerical Algorithms Group Ltd.
+*
+*  -- Modified 8-Nov-93 to set C(J,J) to DBLE( C(J,J) ) when BETA = 1.
+*     Ed Anderson, Cray Research Inc.
+*
+*
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          DBLE, DCMPLX, DCONJG, MAX
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            I, INFO, J, L, NROWA
+      DOUBLE PRECISION   RTEMP
+      COMPLEX*16         TEMP
+*     ..
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      IF( LSAME( TRANS, 'N' ) ) THEN
+         NROWA = N
+      ELSE
+         NROWA = K
+      END IF
+      UPPER = LSAME( UPLO, 'U' )
+*
+      INFO = 0
+      IF( ( .NOT.UPPER ) .AND. ( .NOT.LSAME( UPLO, 'L' ) ) ) THEN
+         INFO = 1
+      ELSE IF( ( .NOT.LSAME( TRANS, 'N' ) ) .AND.
+     $         ( .NOT.LSAME( TRANS, 'C' ) ) ) THEN
+         INFO = 2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = 3
+      ELSE IF( K.LT.0 ) THEN
+         INFO = 4
+      ELSE IF( LDA.LT.MAX( 1, NROWA ) ) THEN
+         INFO = 7
+      ELSE IF( LDC.LT.MAX( 1, N ) ) THEN
+         INFO = 10
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZHERK ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( N.EQ.0 ) .OR. ( ( ( ALPHA.EQ.ZERO ) .OR. ( K.EQ.0 ) ) .AND.
+     $    ( BETA.EQ.ONE ) ) )RETURN
+*
+*     And when  alpha.eq.zero.
+*
+      IF( ALPHA.EQ.ZERO ) THEN
+         IF( UPPER ) THEN
+            IF( BETA.EQ.ZERO ) THEN
+               DO 20 J = 1, N
+                  DO 10 I = 1, J
+                     C( I, J ) = ZERO
+   10             CONTINUE
+   20          CONTINUE
+            ELSE
+               DO 40 J = 1, N
+                  DO 30 I = 1, J - 1
+                     C( I, J ) = BETA*C( I, J )
+   30             CONTINUE
+                  C( J, J ) = BETA*DBLE( C( J, J ) )
+   40          CONTINUE
+            END IF
+         ELSE
+            IF( BETA.EQ.ZERO ) THEN
+               DO 60 J = 1, N
+                  DO 50 I = J, N
+                     C( I, J ) = ZERO
+   50             CONTINUE
+   60          CONTINUE
+            ELSE
+               DO 80 J = 1, N
+                  C( J, J ) = BETA*DBLE( C( J, J ) )
+                  DO 70 I = J + 1, N
+                     C( I, J ) = BETA*C( I, J )
+   70             CONTINUE
+   80          CONTINUE
+            END IF
+         END IF
+         RETURN
+      END IF
+*
+*     Start the operations.
+*
+      IF( LSAME( TRANS, 'N' ) ) THEN
+*
+*        Form  C := alpha*A*conjg( A' ) + beta*C.
+*
+         IF( UPPER ) THEN
+            DO 130 J = 1, N
+               IF( BETA.EQ.ZERO ) THEN
+                  DO 90 I = 1, J
+                     C( I, J ) = ZERO
+   90             CONTINUE
+               ELSE IF( BETA.NE.ONE ) THEN
+                  DO 100 I = 1, J - 1
+                     C( I, J ) = BETA*C( I, J )
+  100             CONTINUE
+                  C( J, J ) = BETA*DBLE( C( J, J ) )
+               ELSE
+                  C( J, J ) = DBLE( C( J, J ) )
+               END IF
+               DO 120 L = 1, K
+                  IF( A( J, L ).NE.DCMPLX( ZERO ) ) THEN
+                     TEMP = ALPHA*DCONJG( A( J, L ) )
+                     DO 110 I = 1, J - 1
+                        C( I, J ) = C( I, J ) + TEMP*A( I, L )
+  110                CONTINUE
+                     C( J, J ) = DBLE( C( J, J ) ) +
+     $                           DBLE( TEMP*A( I, L ) )
+                  END IF
+  120          CONTINUE
+  130       CONTINUE
+         ELSE
+            DO 180 J = 1, N
+               IF( BETA.EQ.ZERO ) THEN
+                  DO 140 I = J, N
+                     C( I, J ) = ZERO
+  140             CONTINUE
+               ELSE IF( BETA.NE.ONE ) THEN
+                  C( J, J ) = BETA*DBLE( C( J, J ) )
+                  DO 150 I = J + 1, N
+                     C( I, J ) = BETA*C( I, J )
+  150             CONTINUE
+               ELSE
+                  C( J, J ) = DBLE( C( J, J ) )
+               END IF
+               DO 170 L = 1, K
+                  IF( A( J, L ).NE.DCMPLX( ZERO ) ) THEN
+                     TEMP = ALPHA*DCONJG( A( J, L ) )
+                     C( J, J ) = DBLE( C( J, J ) ) +
+     $                           DBLE( TEMP*A( J, L ) )
+                     DO 160 I = J + 1, N
+                        C( I, J ) = C( I, J ) + TEMP*A( I, L )
+  160                CONTINUE
+                  END IF
+  170          CONTINUE
+  180       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  C := alpha*conjg( A' )*A + beta*C.
+*
+         IF( UPPER ) THEN
+            DO 220 J = 1, N
+               DO 200 I = 1, J - 1
+                  TEMP = ZERO
+                  DO 190 L = 1, K
+                     TEMP = TEMP + DCONJG( A( L, I ) )*A( L, J )
+  190             CONTINUE
+                  IF( BETA.EQ.ZERO ) THEN
+                     C( I, J ) = ALPHA*TEMP
+                  ELSE
+                     C( I, J ) = ALPHA*TEMP + BETA*C( I, J )
+                  END IF
+  200          CONTINUE
+               RTEMP = ZERO
+               DO 210 L = 1, K
+                  RTEMP = RTEMP + DCONJG( A( L, J ) )*A( L, J )
+  210          CONTINUE
+               IF( BETA.EQ.ZERO ) THEN
+                  C( J, J ) = ALPHA*RTEMP
+               ELSE
+                  C( J, J ) = ALPHA*RTEMP + BETA*DBLE( C( J, J ) )
+               END IF
+  220       CONTINUE
+         ELSE
+            DO 260 J = 1, N
+               RTEMP = ZERO
+               DO 230 L = 1, K
+                  RTEMP = RTEMP + DCONJG( A( L, J ) )*A( L, J )
+  230          CONTINUE
+               IF( BETA.EQ.ZERO ) THEN
+                  C( J, J ) = ALPHA*RTEMP
+               ELSE
+                  C( J, J ) = ALPHA*RTEMP + BETA*DBLE( C( J, J ) )
+               END IF
+               DO 250 I = J + 1, N
+                  TEMP = ZERO
+                  DO 240 L = 1, K
+                     TEMP = TEMP + DCONJG( A( L, I ) )*A( L, J )
+  240             CONTINUE
+                  IF( BETA.EQ.ZERO ) THEN
+                     C( I, J ) = ALPHA*TEMP
+                  ELSE
+                     C( I, J ) = ALPHA*TEMP + BETA*C( I, J )
+                  END IF
+  250          CONTINUE
+  260       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of ZHERK .
+*
+      END
diff --git a/costa/native/external/blas/zhpmv.f b/costa/native/external/blas/zhpmv.f
new file mode 100644
index 000000000..9cde9234c
--- /dev/null
+++ b/costa/native/external/blas/zhpmv.f
@@ -0,0 +1,270 @@
+      SUBROUTINE ZHPMV ( UPLO, N, ALPHA, AP, X, INCX, BETA, Y, INCY )
+*     .. Scalar Arguments ..
+      COMPLEX*16         ALPHA, BETA
+      INTEGER            INCX, INCY, N
+      CHARACTER*1        UPLO
+*     .. Array Arguments ..
+      COMPLEX*16         AP( * ), X( * ), Y( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZHPMV  performs the matrix-vector operation
+*
+*     y := alpha*A*x + beta*y,
+*
+*  where alpha and beta are scalars, x and y are n element vectors and
+*  A is an n by n hermitian matrix, supplied in packed form.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the upper or lower
+*           triangular part of the matrix A is supplied in the packed
+*           array AP as follows:
+*
+*              UPLO = 'U' or 'u'   The upper triangular part of A is
+*                                  supplied in AP.
+*
+*              UPLO = 'L' or 'l'   The lower triangular part of A is
+*                                  supplied in AP.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - COMPLEX*16      .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  AP     - COMPLEX*16       array of DIMENSION at least
+*           ( ( n*( n + 1 ) )/2 ).
+*           Before entry with UPLO = 'U' or 'u', the array AP must
+*           contain the upper triangular part of the hermitian matrix
+*           packed sequentially, column by column, so that AP( 1 )
+*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 )
+*           and a( 2, 2 ) respectively, and so on.
+*           Before entry with UPLO = 'L' or 'l', the array AP must
+*           contain the lower triangular part of the hermitian matrix
+*           packed sequentially, column by column, so that AP( 1 )
+*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 )
+*           and a( 3, 1 ) respectively, and so on.
+*           Note that the imaginary parts of the diagonal elements need
+*           not be set and are assumed to be zero.
+*           Unchanged on exit.
+*
+*  X      - COMPLEX*16       array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element vector x.
+*           Unchanged on exit.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*  BETA   - COMPLEX*16      .
+*           On entry, BETA specifies the scalar beta. When BETA is
+*           supplied as zero then Y need not be set on input.
+*           Unchanged on exit.
+*
+*  Y      - COMPLEX*16       array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCY ) ).
+*           Before entry, the incremented array Y must contain the n
+*           element vector y. On exit, Y is overwritten by the updated
+*           vector y.
+*
+*  INCY   - INTEGER.
+*           On entry, INCY specifies the increment for the elements of
+*           Y. INCY must not be zero.
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      COMPLEX*16         ONE
+      PARAMETER        ( ONE  = ( 1.0D+0, 0.0D+0 ) )
+      COMPLEX*16         ZERO
+      PARAMETER        ( ZERO = ( 0.0D+0, 0.0D+0 ) )
+*     .. Local Scalars ..
+      COMPLEX*16         TEMP1, TEMP2
+      INTEGER            I, INFO, IX, IY, J, JX, JY, K, KK, KX, KY
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          DCONJG, DBLE
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO, 'U' ).AND.
+     $         .NOT.LSAME( UPLO, 'L' )      )THEN
+         INFO = 1
+      ELSE IF( N.LT.0 )THEN
+         INFO = 2
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 6
+      ELSE IF( INCY.EQ.0 )THEN
+         INFO = 9
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'ZHPMV ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( N.EQ.0 ).OR.( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) )
+     $   RETURN
+*
+*     Set up the start points in  X  and  Y.
+*
+      IF( INCX.GT.0 )THEN
+         KX = 1
+      ELSE
+         KX = 1 - ( N - 1 )*INCX
+      END IF
+      IF( INCY.GT.0 )THEN
+         KY = 1
+      ELSE
+         KY = 1 - ( N - 1 )*INCY
+      END IF
+*
+*     Start the operations. In this version the elements of the array AP
+*     are accessed sequentially with one pass through AP.
+*
+*     First form  y := beta*y.
+*
+      IF( BETA.NE.ONE )THEN
+         IF( INCY.EQ.1 )THEN
+            IF( BETA.EQ.ZERO )THEN
+               DO 10, I = 1, N
+                  Y( I ) = ZERO
+   10          CONTINUE
+            ELSE
+               DO 20, I = 1, N
+                  Y( I ) = BETA*Y( I )
+   20          CONTINUE
+            END IF
+         ELSE
+            IY = KY
+            IF( BETA.EQ.ZERO )THEN
+               DO 30, I = 1, N
+                  Y( IY ) = ZERO
+                  IY      = IY   + INCY
+   30          CONTINUE
+            ELSE
+               DO 40, I = 1, N
+                  Y( IY ) = BETA*Y( IY )
+                  IY      = IY           + INCY
+   40          CONTINUE
+            END IF
+         END IF
+      END IF
+      IF( ALPHA.EQ.ZERO )
+     $   RETURN
+      KK = 1
+      IF( LSAME( UPLO, 'U' ) )THEN
+*
+*        Form  y  when AP contains the upper triangle.
+*
+         IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN
+            DO 60, J = 1, N
+               TEMP1 = ALPHA*X( J )
+               TEMP2 = ZERO
+               K     = KK
+               DO 50, I = 1, J - 1
+                  Y( I ) = Y( I ) + TEMP1*AP( K )
+                  TEMP2  = TEMP2  + DCONJG( AP( K ) )*X( I )
+                  K      = K      + 1
+   50          CONTINUE
+               Y( J ) = Y( J ) + TEMP1*DBLE( AP( KK + J - 1 ) )
+     $                         + ALPHA*TEMP2
+               KK     = KK     + J
+   60       CONTINUE
+         ELSE
+            JX = KX
+            JY = KY
+            DO 80, J = 1, N
+               TEMP1 = ALPHA*X( JX )
+               TEMP2 = ZERO
+               IX    = KX
+               IY    = KY
+               DO 70, K = KK, KK + J - 2
+                  Y( IY ) = Y( IY ) + TEMP1*AP( K )
+                  TEMP2   = TEMP2   + DCONJG( AP( K ) )*X( IX )
+                  IX      = IX      + INCX
+                  IY      = IY      + INCY
+   70          CONTINUE
+               Y( JY ) = Y( JY ) + TEMP1*DBLE( AP( KK + J - 1 ) )
+     $                           + ALPHA*TEMP2
+               JX      = JX      + INCX
+               JY      = JY      + INCY
+               KK      = KK      + J
+   80       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  y  when AP contains the lower triangle.
+*
+         IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN
+            DO 100, J = 1, N
+               TEMP1  = ALPHA*X( J )
+               TEMP2  = ZERO
+               Y( J ) = Y( J ) + TEMP1*DBLE( AP( KK ) )
+               K      = KK     + 1
+               DO 90, I = J + 1, N
+                  Y( I ) = Y( I ) + TEMP1*AP( K )
+                  TEMP2  = TEMP2  + DCONJG( AP( K ) )*X( I )
+                  K      = K      + 1
+   90          CONTINUE
+               Y( J ) = Y( J ) + ALPHA*TEMP2
+               KK     = KK     + ( N - J + 1 )
+  100       CONTINUE
+         ELSE
+            JX = KX
+            JY = KY
+            DO 120, J = 1, N
+               TEMP1   = ALPHA*X( JX )
+               TEMP2   = ZERO
+               Y( JY ) = Y( JY ) + TEMP1*DBLE( AP( KK ) )
+               IX      = JX
+               IY      = JY
+               DO 110, K = KK + 1, KK + N - J
+                  IX      = IX      + INCX
+                  IY      = IY      + INCY
+                  Y( IY ) = Y( IY ) + TEMP1*AP( K )
+                  TEMP2   = TEMP2   + DCONJG( AP( K ) )*X( IX )
+  110          CONTINUE
+               Y( JY ) = Y( JY ) + ALPHA*TEMP2
+               JX      = JX      + INCX
+               JY      = JY      + INCY
+               KK      = KK      + ( N - J + 1 )
+  120       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of ZHPMV .
+*
+      END
diff --git a/costa/native/external/blas/zhpr.f b/costa/native/external/blas/zhpr.f
new file mode 100644
index 000000000..2e368de49
--- /dev/null
+++ b/costa/native/external/blas/zhpr.f
@@ -0,0 +1,217 @@
+      SUBROUTINE ZHPR  ( UPLO, N, ALPHA, X, INCX, AP )
+*     .. Scalar Arguments ..
+      DOUBLE PRECISION   ALPHA
+      INTEGER            INCX, N
+      CHARACTER*1        UPLO
+*     .. Array Arguments ..
+      COMPLEX*16         AP( * ), X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZHPR    performs the hermitian rank 1 operation
+*
+*     A := alpha*x*conjg( x' ) + A,
+*
+*  where alpha is a real scalar, x is an n element vector and A is an
+*  n by n hermitian matrix, supplied in packed form.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the upper or lower
+*           triangular part of the matrix A is supplied in the packed
+*           array AP as follows:
+*
+*              UPLO = 'U' or 'u'   The upper triangular part of A is
+*                                  supplied in AP.
+*
+*              UPLO = 'L' or 'l'   The lower triangular part of A is
+*                                  supplied in AP.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - DOUBLE PRECISION.
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  X      - COMPLEX*16       array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element vector x.
+*           Unchanged on exit.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*  AP     - COMPLEX*16       array of DIMENSION at least
+*           ( ( n*( n + 1 ) )/2 ).
+*           Before entry with  UPLO = 'U' or 'u', the array AP must
+*           contain the upper triangular part of the hermitian matrix
+*           packed sequentially, column by column, so that AP( 1 )
+*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 )
+*           and a( 2, 2 ) respectively, and so on. On exit, the array
+*           AP is overwritten by the upper triangular part of the
+*           updated matrix.
+*           Before entry with UPLO = 'L' or 'l', the array AP must
+*           contain the lower triangular part of the hermitian matrix
+*           packed sequentially, column by column, so that AP( 1 )
+*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 )
+*           and a( 3, 1 ) respectively, and so on. On exit, the array
+*           AP is overwritten by the lower triangular part of the
+*           updated matrix.
+*           Note that the imaginary parts of the diagonal elements need
+*           not be set, they are assumed to be zero, and on exit they
+*           are set to zero.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      COMPLEX*16         ZERO
+      PARAMETER        ( ZERO = ( 0.0D+0, 0.0D+0 ) )
+*     .. Local Scalars ..
+      COMPLEX*16         TEMP
+      INTEGER            I, INFO, IX, J, JX, K, KK, KX
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          DCONJG, DBLE
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO, 'U' ).AND.
+     $         .NOT.LSAME( UPLO, 'L' )      )THEN
+         INFO = 1
+      ELSE IF( N.LT.0 )THEN
+         INFO = 2
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 5
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'ZHPR  ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( N.EQ.0 ).OR.( ALPHA.EQ.DBLE( ZERO ) ) )
+     $   RETURN
+*
+*     Set the start point in X if the increment is not unity.
+*
+      IF( INCX.LE.0 )THEN
+         KX = 1 - ( N - 1 )*INCX
+      ELSE IF( INCX.NE.1 )THEN
+         KX = 1
+      END IF
+*
+*     Start the operations. In this version the elements of the array AP
+*     are accessed sequentially with one pass through AP.
+*
+      KK = 1
+      IF( LSAME( UPLO, 'U' ) )THEN
+*
+*        Form  A  when upper triangle is stored in AP.
+*
+         IF( INCX.EQ.1 )THEN
+            DO 20, J = 1, N
+               IF( X( J ).NE.ZERO )THEN
+                  TEMP = ALPHA*DCONJG( X( J ) )
+                  K    = KK
+                  DO 10, I = 1, J - 1
+                     AP( K ) = AP( K ) + X( I )*TEMP
+                     K       = K       + 1
+   10             CONTINUE
+                  AP( KK + J - 1 ) = DBLE( AP( KK + J - 1 ) )
+     $                               + DBLE( X( J )*TEMP )
+               ELSE
+                  AP( KK + J - 1 ) = DBLE( AP( KK + J - 1 ) )
+               END IF
+               KK = KK + J
+   20       CONTINUE
+         ELSE
+            JX = KX
+            DO 40, J = 1, N
+               IF( X( JX ).NE.ZERO )THEN
+                  TEMP = ALPHA*DCONJG( X( JX ) )
+                  IX   = KX
+                  DO 30, K = KK, KK + J - 2
+                     AP( K ) = AP( K ) + X( IX )*TEMP
+                     IX      = IX      + INCX
+   30             CONTINUE
+                  AP( KK + J - 1 ) = DBLE( AP( KK + J - 1 ) )
+     $                               + DBLE( X( JX )*TEMP )
+               ELSE
+                  AP( KK + J - 1 ) = DBLE( AP( KK + J - 1 ) )
+               END IF
+               JX = JX + INCX
+               KK = KK + J
+   40       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  A  when lower triangle is stored in AP.
+*
+         IF( INCX.EQ.1 )THEN
+            DO 60, J = 1, N
+               IF( X( J ).NE.ZERO )THEN
+                  TEMP     = ALPHA*DCONJG( X( J ) )
+                  AP( KK ) = DBLE( AP( KK ) ) + DBLE( TEMP*X( J ) )
+                  K        = KK               + 1
+                  DO 50, I = J + 1, N
+                     AP( K ) = AP( K ) + X( I )*TEMP
+                     K       = K       + 1
+   50             CONTINUE
+               ELSE
+                  AP( KK ) = DBLE( AP( KK ) )
+               END IF
+               KK = KK + N - J + 1
+   60       CONTINUE
+         ELSE
+            JX = KX
+            DO 80, J = 1, N
+               IF( X( JX ).NE.ZERO )THEN
+                  TEMP    = ALPHA*DCONJG( X( JX ) )
+                  AP( KK ) = DBLE( AP( KK ) ) + DBLE( TEMP*X( JX ) )
+                  IX      = JX
+                  DO 70, K = KK + 1, KK + N - J
+                     IX      = IX      + INCX
+                     AP( K ) = AP( K ) + X( IX )*TEMP
+   70             CONTINUE
+               ELSE
+                  AP( KK ) = DBLE( AP( KK ) )
+               END IF
+               JX = JX + INCX
+               KK = KK + N - J + 1
+   80       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of ZHPR  .
+*
+      END
diff --git a/costa/native/external/blas/zhpr2.f b/costa/native/external/blas/zhpr2.f
new file mode 100644
index 000000000..e10774b14
--- /dev/null
+++ b/costa/native/external/blas/zhpr2.f
@@ -0,0 +1,251 @@
+      SUBROUTINE ZHPR2 ( UPLO, N, ALPHA, X, INCX, Y, INCY, AP )
+*     .. Scalar Arguments ..
+      COMPLEX*16         ALPHA
+      INTEGER            INCX, INCY, N
+      CHARACTER*1        UPLO
+*     .. Array Arguments ..
+      COMPLEX*16         AP( * ), X( * ), Y( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZHPR2  performs the hermitian rank 2 operation
+*
+*     A := alpha*x*conjg( y' ) + conjg( alpha )*y*conjg( x' ) + A,
+*
+*  where alpha is a scalar, x and y are n element vectors and A is an
+*  n by n hermitian matrix, supplied in packed form.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the upper or lower
+*           triangular part of the matrix A is supplied in the packed
+*           array AP as follows:
+*
+*              UPLO = 'U' or 'u'   The upper triangular part of A is
+*                                  supplied in AP.
+*
+*              UPLO = 'L' or 'l'   The lower triangular part of A is
+*                                  supplied in AP.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - COMPLEX*16      .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  X      - COMPLEX*16       array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element vector x.
+*           Unchanged on exit.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*  Y      - COMPLEX*16       array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCY ) ).
+*           Before entry, the incremented array Y must contain the n
+*           element vector y.
+*           Unchanged on exit.
+*
+*  INCY   - INTEGER.
+*           On entry, INCY specifies the increment for the elements of
+*           Y. INCY must not be zero.
+*           Unchanged on exit.
+*
+*  AP     - COMPLEX*16       array of DIMENSION at least
+*           ( ( n*( n + 1 ) )/2 ).
+*           Before entry with  UPLO = 'U' or 'u', the array AP must
+*           contain the upper triangular part of the hermitian matrix
+*           packed sequentially, column by column, so that AP( 1 )
+*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 )
+*           and a( 2, 2 ) respectively, and so on. On exit, the array
+*           AP is overwritten by the upper triangular part of the
+*           updated matrix.
+*           Before entry with UPLO = 'L' or 'l', the array AP must
+*           contain the lower triangular part of the hermitian matrix
+*           packed sequentially, column by column, so that AP( 1 )
+*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 )
+*           and a( 3, 1 ) respectively, and so on. On exit, the array
+*           AP is overwritten by the lower triangular part of the
+*           updated matrix.
+*           Note that the imaginary parts of the diagonal elements need
+*           not be set, they are assumed to be zero, and on exit they
+*           are set to zero.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      COMPLEX*16         ZERO
+      PARAMETER        ( ZERO = ( 0.0D+0, 0.0D+0 ) )
+*     .. Local Scalars ..
+      COMPLEX*16         TEMP1, TEMP2
+      INTEGER            I, INFO, IX, IY, J, JX, JY, K, KK, KX, KY
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          DCONJG, DBLE
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO, 'U' ).AND.
+     $         .NOT.LSAME( UPLO, 'L' )      )THEN
+         INFO = 1
+      ELSE IF( N.LT.0 )THEN
+         INFO = 2
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 5
+      ELSE IF( INCY.EQ.0 )THEN
+         INFO = 7
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'ZHPR2 ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( N.EQ.0 ).OR.( ALPHA.EQ.ZERO ) )
+     $   RETURN
+*
+*     Set up the start points in X and Y if the increments are not both
+*     unity.
+*
+      IF( ( INCX.NE.1 ).OR.( INCY.NE.1 ) )THEN
+         IF( INCX.GT.0 )THEN
+            KX = 1
+         ELSE
+            KX = 1 - ( N - 1 )*INCX
+         END IF
+         IF( INCY.GT.0 )THEN
+            KY = 1
+         ELSE
+            KY = 1 - ( N - 1 )*INCY
+         END IF
+         JX = KX
+         JY = KY
+      END IF
+*
+*     Start the operations. In this version the elements of the array AP
+*     are accessed sequentially with one pass through AP.
+*
+      KK = 1
+      IF( LSAME( UPLO, 'U' ) )THEN
+*
+*        Form  A  when upper triangle is stored in AP.
+*
+         IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN
+            DO 20, J = 1, N
+               IF( ( X( J ).NE.ZERO ).OR.( Y( J ).NE.ZERO ) )THEN
+                  TEMP1 = ALPHA*DCONJG( Y( J ) )
+                  TEMP2 = DCONJG( ALPHA*X( J ) )
+                  K     = KK
+                  DO 10, I = 1, J - 1
+                     AP( K ) = AP( K ) + X( I )*TEMP1 + Y( I )*TEMP2
+                     K       = K       + 1
+   10             CONTINUE
+                  AP( KK + J - 1 ) = DBLE( AP( KK + J - 1 ) ) +
+     $                               DBLE( X( J )*TEMP1 + Y( J )*TEMP2 )
+               ELSE
+                  AP( KK + J - 1 ) = DBLE( AP( KK + J - 1 ) )
+               END IF
+               KK = KK + J
+   20       CONTINUE
+         ELSE
+            DO 40, J = 1, N
+               IF( ( X( JX ).NE.ZERO ).OR.( Y( JY ).NE.ZERO ) )THEN
+                  TEMP1 = ALPHA*DCONJG( Y( JY ) )
+                  TEMP2 = DCONJG( ALPHA*X( JX ) )
+                  IX    = KX
+                  IY    = KY
+                  DO 30, K = KK, KK + J - 2
+                     AP( K ) = AP( K ) + X( IX )*TEMP1 + Y( IY )*TEMP2
+                     IX      = IX      + INCX
+                     IY      = IY      + INCY
+   30             CONTINUE
+                  AP( KK + J - 1 ) = DBLE( AP( KK + J - 1 ) ) +
+     $                               DBLE( X( JX )*TEMP1 +
+     $                                     Y( JY )*TEMP2 )
+               ELSE
+                  AP( KK + J - 1 ) = DBLE( AP( KK + J - 1 ) )
+               END IF
+               JX = JX + INCX
+               JY = JY + INCY
+               KK = KK + J
+   40       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  A  when lower triangle is stored in AP.
+*
+         IF( ( INCX.EQ.1 ).AND.( INCY.EQ.1 ) )THEN
+            DO 60, J = 1, N
+               IF( ( X( J ).NE.ZERO ).OR.( Y( J ).NE.ZERO ) )THEN
+                  TEMP1   = ALPHA*DCONJG( Y( J ) )
+                  TEMP2   = DCONJG( ALPHA*X( J ) )
+                  AP( KK ) = DBLE( AP( KK ) ) +
+     $                       DBLE( X( J )*TEMP1 + Y( J )*TEMP2 )
+                  K        = KK               + 1
+                  DO 50, I = J + 1, N
+                     AP( K ) = AP( K ) + X( I )*TEMP1 + Y( I )*TEMP2
+                     K       = K       + 1
+   50             CONTINUE
+               ELSE
+                  AP( KK ) = DBLE( AP( KK ) )
+               END IF
+               KK = KK + N - J + 1
+   60       CONTINUE
+         ELSE
+            DO 80, J = 1, N
+               IF( ( X( JX ).NE.ZERO ).OR.( Y( JY ).NE.ZERO ) )THEN
+                  TEMP1    = ALPHA*DCONJG( Y( JY ) )
+                  TEMP2    = DCONJG( ALPHA*X( JX ) )
+                  AP( KK ) = DBLE( AP( KK ) ) +
+     $                       DBLE( X( JX )*TEMP1 + Y( JY )*TEMP2 )
+                  IX       = JX
+                  IY       = JY
+                  DO 70, K = KK + 1, KK + N - J
+                     IX      = IX      + INCX
+                     IY      = IY      + INCY
+                     AP( K ) = AP( K ) + X( IX )*TEMP1 + Y( IY )*TEMP2
+   70             CONTINUE
+               ELSE
+                  AP( KK ) = DBLE( AP( KK ) )
+               END IF
+               JX = JX + INCX
+               JY = JY + INCY
+               KK = KK + N - J + 1
+   80       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of ZHPR2 .
+*
+      END
diff --git a/costa/native/external/blas/zrotg.f b/costa/native/external/blas/zrotg.f
new file mode 100644
index 000000000..f6a4aa127
--- /dev/null
+++ b/costa/native/external/blas/zrotg.f
@@ -0,0 +1,21 @@
+      subroutine zrotg(ca,cb,c,s)
+      double complex ca,cb,s
+      double precision c
+      double precision norm,scale
+      double complex alpha
+      if (cdabs(ca) .ne. 0.0d0) go to 10
+         c = 0.0d0
+         s = (1.0d0,0.0d0)
+         ca = cb
+         go to 20
+   10 continue
+         scale = cdabs(ca) + cdabs(cb)
+         norm = scale*dsqrt((cdabs(ca/dcmplx(scale,0.0d0)))**2 +
+     *                      (cdabs(cb/dcmplx(scale,0.0d0)))**2)
+         alpha = ca /cdabs(ca)
+         c = cdabs(ca) / norm
+         s = alpha * dconjg(cb) / norm
+         ca = alpha * norm
+   20 continue
+      return
+      end
diff --git a/costa/native/external/blas/zscal.f b/costa/native/external/blas/zscal.f
new file mode 100644
index 000000000..6fa857639
--- /dev/null
+++ b/costa/native/external/blas/zscal.f
@@ -0,0 +1,29 @@
+      subroutine  zscal(n,za,zx,incx)
+c
+c     scales a vector by a constant.
+c     jack dongarra, 3/11/78.
+c     modified 3/93 to return if incx .le. 0.
+c     modified 12/3/93, array(1) declarations changed to array(*)
+c
+      double complex za,zx(*)
+      integer i,incx,ix,n
+c
+      if( n.le.0 .or. incx.le.0 )return
+      if(incx.eq.1)go to 20
+c
+c        code for increment not equal to 1
+c
+      ix = 1
+      do 10 i = 1,n
+        zx(ix) = za*zx(ix)
+        ix = ix + incx
+   10 continue
+      return
+c
+c        code for increment equal to 1
+c
+   20 do 30 i = 1,n
+        zx(i) = za*zx(i)
+   30 continue
+      return
+      end
diff --git a/costa/native/external/blas/zswap.f b/costa/native/external/blas/zswap.f
new file mode 100644
index 000000000..f28a4e415
--- /dev/null
+++ b/costa/native/external/blas/zswap.f
@@ -0,0 +1,36 @@
+      subroutine  zswap (n,zx,incx,zy,incy)
+c
+c     interchanges two vectors.
+c     jack dongarra, 3/11/78.
+c     modified 12/3/93, array(1) declarations changed to array(*)
+c
+      double complex zx(*),zy(*),ztemp
+      integer i,incx,incy,ix,iy,n
+c
+      if(n.le.0)return
+      if(incx.eq.1.and.incy.eq.1)go to 20
+c
+c       code for unequal increments or equal increments not equal
+c         to 1
+c
+      ix = 1
+      iy = 1
+      if(incx.lt.0)ix = (-n+1)*incx + 1
+      if(incy.lt.0)iy = (-n+1)*incy + 1
+      do 10 i = 1,n
+        ztemp = zx(ix)
+        zx(ix) = zy(iy)
+        zy(iy) = ztemp
+        ix = ix + incx
+        iy = iy + incy
+   10 continue
+      return
+c
+c       code for both increments equal to 1
+   20 do 30 i = 1,n
+        ztemp = zx(i)
+        zx(i) = zy(i)
+        zy(i) = ztemp
+   30 continue
+      return
+      end
diff --git a/costa/native/external/blas/zsymm.f b/costa/native/external/blas/zsymm.f
new file mode 100644
index 000000000..20b7c08d8
--- /dev/null
+++ b/costa/native/external/blas/zsymm.f
@@ -0,0 +1,296 @@
+      SUBROUTINE ZSYMM ( SIDE, UPLO, M, N, ALPHA, A, LDA, B, LDB,
+     $                   BETA, C, LDC )
+*     .. Scalar Arguments ..
+      CHARACTER*1        SIDE, UPLO
+      INTEGER            M, N, LDA, LDB, LDC
+      COMPLEX*16         ALPHA, BETA
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), B( LDB, * ), C( LDC, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZSYMM  performs one of the matrix-matrix operations
+*
+*     C := alpha*A*B + beta*C,
+*
+*  or
+*
+*     C := alpha*B*A + beta*C,
+*
+*  where  alpha and beta are scalars, A is a symmetric matrix and  B and
+*  C are m by n matrices.
+*
+*  Parameters
+*  ==========
+*
+*  SIDE   - CHARACTER*1.
+*           On entry,  SIDE  specifies whether  the  symmetric matrix  A
+*           appears on the  left or right  in the  operation as follows:
+*
+*              SIDE = 'L' or 'l'   C := alpha*A*B + beta*C,
+*
+*              SIDE = 'R' or 'r'   C := alpha*B*A + beta*C,
+*
+*           Unchanged on exit.
+*
+*  UPLO   - CHARACTER*1.
+*           On  entry,   UPLO  specifies  whether  the  upper  or  lower
+*           triangular  part  of  the  symmetric  matrix   A  is  to  be
+*           referenced as follows:
+*
+*              UPLO = 'U' or 'u'   Only the upper triangular part of the
+*                                  symmetric matrix is to be referenced.
+*
+*              UPLO = 'L' or 'l'   Only the lower triangular part of the
+*                                  symmetric matrix is to be referenced.
+*
+*           Unchanged on exit.
+*
+*  M      - INTEGER.
+*           On entry,  M  specifies the number of rows of the matrix  C.
+*           M  must be at least zero.
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the number of columns of the matrix C.
+*           N  must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - COMPLEX*16      .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  A      - COMPLEX*16       array of DIMENSION ( LDA, ka ), where ka is
+*           m  when  SIDE = 'L' or 'l'  and is n  otherwise.
+*           Before entry  with  SIDE = 'L' or 'l',  the  m by m  part of
+*           the array  A  must contain the  symmetric matrix,  such that
+*           when  UPLO = 'U' or 'u', the leading m by m upper triangular
+*           part of the array  A  must contain the upper triangular part
+*           of the  symmetric matrix and the  strictly  lower triangular
+*           part of  A  is not referenced,  and when  UPLO = 'L' or 'l',
+*           the leading  m by m  lower triangular part  of the  array  A
+*           must  contain  the  lower triangular part  of the  symmetric
+*           matrix and the  strictly upper triangular part of  A  is not
+*           referenced.
+*           Before entry  with  SIDE = 'R' or 'r',  the  n by n  part of
+*           the array  A  must contain the  symmetric matrix,  such that
+*           when  UPLO = 'U' or 'u', the leading n by n upper triangular
+*           part of the array  A  must contain the upper triangular part
+*           of the  symmetric matrix and the  strictly  lower triangular
+*           part of  A  is not referenced,  and when  UPLO = 'L' or 'l',
+*           the leading  n by n  lower triangular part  of the  array  A
+*           must  contain  the  lower triangular part  of the  symmetric
+*           matrix and the  strictly upper triangular part of  A  is not
+*           referenced.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the  calling (sub) program. When  SIDE = 'L' or 'l'  then
+*           LDA must be at least  max( 1, m ), otherwise  LDA must be at
+*           least max( 1, n ).
+*           Unchanged on exit.
+*
+*  B      - COMPLEX*16       array of DIMENSION ( LDB, n ).
+*           Before entry, the leading  m by n part of the array  B  must
+*           contain the matrix B.
+*           Unchanged on exit.
+*
+*  LDB    - INTEGER.
+*           On entry, LDB specifies the first dimension of B as declared
+*           in  the  calling  (sub)  program.   LDB  must  be  at  least
+*           max( 1, m ).
+*           Unchanged on exit.
+*
+*  BETA   - COMPLEX*16      .
+*           On entry,  BETA  specifies the scalar  beta.  When  BETA  is
+*           supplied as zero then C need not be set on input.
+*           Unchanged on exit.
+*
+*  C      - COMPLEX*16       array of DIMENSION ( LDC, n ).
+*           Before entry, the leading  m by n  part of the array  C must
+*           contain the matrix  C,  except when  beta  is zero, in which
+*           case C need not be set on entry.
+*           On exit, the array  C  is overwritten by the  m by n updated
+*           matrix.
+*
+*  LDC    - INTEGER.
+*           On entry, LDC specifies the first dimension of C as declared
+*           in  the  calling  (sub)  program.   LDC  must  be  at  least
+*           max( 1, m ).
+*           Unchanged on exit.
+*
+*
+*  Level 3 Blas routine.
+*
+*  -- Written on 8-February-1989.
+*     Jack Dongarra, Argonne National Laboratory.
+*     Iain Duff, AERE Harwell.
+*     Jeremy Du Croz, Numerical Algorithms Group Ltd.
+*     Sven Hammarling, Numerical Algorithms Group Ltd.
+*
+*
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            I, INFO, J, K, NROWA
+      COMPLEX*16         TEMP1, TEMP2
+*     .. Parameters ..
+      COMPLEX*16         ONE
+      PARAMETER        ( ONE  = ( 1.0D+0, 0.0D+0 ) )
+      COMPLEX*16         ZERO
+      PARAMETER        ( ZERO = ( 0.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Set NROWA as the number of rows of A.
+*
+      IF( LSAME( SIDE, 'L' ) )THEN
+         NROWA = M
+      ELSE
+         NROWA = N
+      END IF
+      UPPER = LSAME( UPLO, 'U' )
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF(      ( .NOT.LSAME( SIDE, 'L' ) ).AND.
+     $         ( .NOT.LSAME( SIDE, 'R' ) )      )THEN
+         INFO = 1
+      ELSE IF( ( .NOT.UPPER              ).AND.
+     $         ( .NOT.LSAME( UPLO, 'L' ) )      )THEN
+         INFO = 2
+      ELSE IF( M  .LT.0               )THEN
+         INFO = 3
+      ELSE IF( N  .LT.0               )THEN
+         INFO = 4
+      ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN
+         INFO = 7
+      ELSE IF( LDB.LT.MAX( 1, M     ) )THEN
+         INFO = 9
+      ELSE IF( LDC.LT.MAX( 1, M     ) )THEN
+         INFO = 12
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'ZSYMM ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( M.EQ.0 ).OR.( N.EQ.0 ).OR.
+     $    ( ( ALPHA.EQ.ZERO ).AND.( BETA.EQ.ONE ) ) )
+     $   RETURN
+*
+*     And when  alpha.eq.zero.
+*
+      IF( ALPHA.EQ.ZERO )THEN
+         IF( BETA.EQ.ZERO )THEN
+            DO 20, J = 1, N
+               DO 10, I = 1, M
+                  C( I, J ) = ZERO
+   10          CONTINUE
+   20       CONTINUE
+         ELSE
+            DO 40, J = 1, N
+               DO 30, I = 1, M
+                  C( I, J ) = BETA*C( I, J )
+   30          CONTINUE
+   40       CONTINUE
+         END IF
+         RETURN
+      END IF
+*
+*     Start the operations.
+*
+      IF( LSAME( SIDE, 'L' ) )THEN
+*
+*        Form  C := alpha*A*B + beta*C.
+*
+         IF( UPPER )THEN
+            DO 70, J = 1, N
+               DO 60, I = 1, M
+                  TEMP1 = ALPHA*B( I, J )
+                  TEMP2 = ZERO
+                  DO 50, K = 1, I - 1
+                     C( K, J ) = C( K, J ) + TEMP1    *A( K, I )
+                     TEMP2     = TEMP2     + B( K, J )*A( K, I )
+   50             CONTINUE
+                  IF( BETA.EQ.ZERO )THEN
+                     C( I, J ) = TEMP1*A( I, I ) + ALPHA*TEMP2
+                  ELSE
+                     C( I, J ) = BETA *C( I, J ) +
+     $                           TEMP1*A( I, I ) + ALPHA*TEMP2
+                  END IF
+   60          CONTINUE
+   70       CONTINUE
+         ELSE
+            DO 100, J = 1, N
+               DO 90, I = M, 1, -1
+                  TEMP1 = ALPHA*B( I, J )
+                  TEMP2 = ZERO
+                  DO 80, K = I + 1, M
+                     C( K, J ) = C( K, J ) + TEMP1    *A( K, I )
+                     TEMP2     = TEMP2     + B( K, J )*A( K, I )
+   80             CONTINUE
+                  IF( BETA.EQ.ZERO )THEN
+                     C( I, J ) = TEMP1*A( I, I ) + ALPHA*TEMP2
+                  ELSE
+                     C( I, J ) = BETA *C( I, J ) +
+     $                           TEMP1*A( I, I ) + ALPHA*TEMP2
+                  END IF
+   90          CONTINUE
+  100       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  C := alpha*B*A + beta*C.
+*
+         DO 170, J = 1, N
+            TEMP1 = ALPHA*A( J, J )
+            IF( BETA.EQ.ZERO )THEN
+               DO 110, I = 1, M
+                  C( I, J ) = TEMP1*B( I, J )
+  110          CONTINUE
+            ELSE
+               DO 120, I = 1, M
+                  C( I, J ) = BETA*C( I, J ) + TEMP1*B( I, J )
+  120          CONTINUE
+            END IF
+            DO 140, K = 1, J - 1
+               IF( UPPER )THEN
+                  TEMP1 = ALPHA*A( K, J )
+               ELSE
+                  TEMP1 = ALPHA*A( J, K )
+               END IF
+               DO 130, I = 1, M
+                  C( I, J ) = C( I, J ) + TEMP1*B( I, K )
+  130          CONTINUE
+  140       CONTINUE
+            DO 160, K = J + 1, N
+               IF( UPPER )THEN
+                  TEMP1 = ALPHA*A( J, K )
+               ELSE
+                  TEMP1 = ALPHA*A( K, J )
+               END IF
+               DO 150, I = 1, M
+                  C( I, J ) = C( I, J ) + TEMP1*B( I, K )
+  150          CONTINUE
+  160       CONTINUE
+  170    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of ZSYMM .
+*
+      END
diff --git a/costa/native/external/blas/zsyr2k.f b/costa/native/external/blas/zsyr2k.f
new file mode 100644
index 000000000..aba2071ac
--- /dev/null
+++ b/costa/native/external/blas/zsyr2k.f
@@ -0,0 +1,324 @@
+      SUBROUTINE ZSYR2K( UPLO, TRANS, N, K, ALPHA, A, LDA, B, LDB,
+     $                   BETA, C, LDC )
+*     .. Scalar Arguments ..
+      CHARACTER*1        UPLO, TRANS
+      INTEGER            N, K, LDA, LDB, LDC
+      COMPLEX*16         ALPHA, BETA
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), B( LDB, * ), C( LDC, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZSYR2K  performs one of the symmetric rank 2k operations
+*
+*     C := alpha*A*B' + alpha*B*A' + beta*C,
+*
+*  or
+*
+*     C := alpha*A'*B + alpha*B'*A + beta*C,
+*
+*  where  alpha and beta  are scalars,  C is an  n by n symmetric matrix
+*  and  A and B  are  n by k  matrices  in the  first  case  and  k by n
+*  matrices in the second case.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On  entry,   UPLO  specifies  whether  the  upper  or  lower
+*           triangular  part  of the  array  C  is to be  referenced  as
+*           follows:
+*
+*              UPLO = 'U' or 'u'   Only the  upper triangular part of  C
+*                                  is to be referenced.
+*
+*              UPLO = 'L' or 'l'   Only the  lower triangular part of  C
+*                                  is to be referenced.
+*
+*           Unchanged on exit.
+*
+*  TRANS  - CHARACTER*1.
+*           On entry,  TRANS  specifies the operation to be performed as
+*           follows:
+*
+*              TRANS = 'N' or 'n'    C := alpha*A*B' + alpha*B*A' +
+*                                         beta*C.
+*
+*              TRANS = 'T' or 't'    C := alpha*A'*B + alpha*B'*A +
+*                                         beta*C.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry,  N specifies the order of the matrix C.  N must be
+*           at least zero.
+*           Unchanged on exit.
+*
+*  K      - INTEGER.
+*           On entry with  TRANS = 'N' or 'n',  K  specifies  the number
+*           of  columns  of the  matrices  A and B,  and on  entry  with
+*           TRANS = 'T' or 't',  K  specifies  the number of rows of the
+*           matrices  A and B.  K must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - COMPLEX*16      .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  A      - COMPLEX*16       array of DIMENSION ( LDA, ka ), where ka is
+*           k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
+*           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
+*           part of the array  A  must contain the matrix  A,  otherwise
+*           the leading  k by n  part of the array  A  must contain  the
+*           matrix A.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
+*           then  LDA must be at least  max( 1, n ), otherwise  LDA must
+*           be at least  max( 1, k ).
+*           Unchanged on exit.
+*
+*  B      - COMPLEX*16       array of DIMENSION ( LDB, kb ), where kb is
+*           k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
+*           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
+*           part of the array  B  must contain the matrix  B,  otherwise
+*           the leading  k by n  part of the array  B  must contain  the
+*           matrix B.
+*           Unchanged on exit.
+*
+*  LDB    - INTEGER.
+*           On entry, LDB specifies the first dimension of B as declared
+*           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
+*           then  LDB must be at least  max( 1, n ), otherwise  LDB must
+*           be at least  max( 1, k ).
+*           Unchanged on exit.
+*
+*  BETA   - COMPLEX*16      .
+*           On entry, BETA specifies the scalar beta.
+*           Unchanged on exit.
+*
+*  C      - COMPLEX*16       array of DIMENSION ( LDC, n ).
+*           Before entry  with  UPLO = 'U' or 'u',  the leading  n by n
+*           upper triangular part of the array C must contain the upper
+*           triangular part  of the  symmetric matrix  and the strictly
+*           lower triangular part of C is not referenced.  On exit, the
+*           upper triangular part of the array  C is overwritten by the
+*           upper triangular part of the updated matrix.
+*           Before entry  with  UPLO = 'L' or 'l',  the leading  n by n
+*           lower triangular part of the array C must contain the lower
+*           triangular part  of the  symmetric matrix  and the strictly
+*           upper triangular part of C is not referenced.  On exit, the
+*           lower triangular part of the array  C is overwritten by the
+*           lower triangular part of the updated matrix.
+*
+*  LDC    - INTEGER.
+*           On entry, LDC specifies the first dimension of C as declared
+*           in  the  calling  (sub)  program.   LDC  must  be  at  least
+*           max( 1, n ).
+*           Unchanged on exit.
+*
+*
+*  Level 3 Blas routine.
+*
+*  -- Written on 8-February-1989.
+*     Jack Dongarra, Argonne National Laboratory.
+*     Iain Duff, AERE Harwell.
+*     Jeremy Du Croz, Numerical Algorithms Group Ltd.
+*     Sven Hammarling, Numerical Algorithms Group Ltd.
+*
+*
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            I, INFO, J, L, NROWA
+      COMPLEX*16         TEMP1, TEMP2
+*     .. Parameters ..
+      COMPLEX*16         ONE
+      PARAMETER        ( ONE  = ( 1.0D+0, 0.0D+0 ) )
+      COMPLEX*16         ZERO
+      PARAMETER        ( ZERO = ( 0.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+         NROWA = N
+      ELSE
+         NROWA = K
+      END IF
+      UPPER = LSAME( UPLO, 'U' )
+*
+      INFO = 0
+      IF(      ( .NOT.UPPER               ).AND.
+     $         ( .NOT.LSAME( UPLO , 'L' ) )      )THEN
+         INFO = 1
+      ELSE IF( ( .NOT.LSAME( TRANS, 'N' ) ).AND.
+     $         ( .NOT.LSAME( TRANS, 'T' ) )      )THEN
+         INFO = 2
+      ELSE IF( N  .LT.0               )THEN
+         INFO = 3
+      ELSE IF( K  .LT.0               )THEN
+         INFO = 4
+      ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN
+         INFO = 7
+      ELSE IF( LDB.LT.MAX( 1, NROWA ) )THEN
+         INFO = 9
+      ELSE IF( LDC.LT.MAX( 1, N     ) )THEN
+         INFO = 12
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'ZSYR2K', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( N.EQ.0 ).OR.
+     $    ( ( ( ALPHA.EQ.ZERO ).OR.( K.EQ.0 ) ).AND.( BETA.EQ.ONE ) ) )
+     $   RETURN
+*
+*     And when  alpha.eq.zero.
+*
+      IF( ALPHA.EQ.ZERO )THEN
+         IF( UPPER )THEN
+            IF( BETA.EQ.ZERO )THEN
+               DO 20, J = 1, N
+                  DO 10, I = 1, J
+                     C( I, J ) = ZERO
+   10             CONTINUE
+   20          CONTINUE
+            ELSE
+               DO 40, J = 1, N
+                  DO 30, I = 1, J
+                     C( I, J ) = BETA*C( I, J )
+   30             CONTINUE
+   40          CONTINUE
+            END IF
+         ELSE
+            IF( BETA.EQ.ZERO )THEN
+               DO 60, J = 1, N
+                  DO 50, I = J, N
+                     C( I, J ) = ZERO
+   50             CONTINUE
+   60          CONTINUE
+            ELSE
+               DO 80, J = 1, N
+                  DO 70, I = J, N
+                     C( I, J ) = BETA*C( I, J )
+   70             CONTINUE
+   80          CONTINUE
+            END IF
+         END IF
+         RETURN
+      END IF
+*
+*     Start the operations.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+*
+*        Form  C := alpha*A*B' + alpha*B*A' + C.
+*
+         IF( UPPER )THEN
+            DO 130, J = 1, N
+               IF( BETA.EQ.ZERO )THEN
+                  DO 90, I = 1, J
+                     C( I, J ) = ZERO
+   90             CONTINUE
+               ELSE IF( BETA.NE.ONE )THEN
+                  DO 100, I = 1, J
+                     C( I, J ) = BETA*C( I, J )
+  100             CONTINUE
+               END IF
+               DO 120, L = 1, K
+                  IF( ( A( J, L ).NE.ZERO ).OR.
+     $                ( B( J, L ).NE.ZERO )     )THEN
+                     TEMP1 = ALPHA*B( J, L )
+                     TEMP2 = ALPHA*A( J, L )
+                     DO 110, I = 1, J
+                        C( I, J ) = C( I, J ) + A( I, L )*TEMP1 +
+     $                                          B( I, L )*TEMP2
+  110                CONTINUE
+                  END IF
+  120          CONTINUE
+  130       CONTINUE
+         ELSE
+            DO 180, J = 1, N
+               IF( BETA.EQ.ZERO )THEN
+                  DO 140, I = J, N
+                     C( I, J ) = ZERO
+  140             CONTINUE
+               ELSE IF( BETA.NE.ONE )THEN
+                  DO 150, I = J, N
+                     C( I, J ) = BETA*C( I, J )
+  150             CONTINUE
+               END IF
+               DO 170, L = 1, K
+                  IF( ( A( J, L ).NE.ZERO ).OR.
+     $                ( B( J, L ).NE.ZERO )     )THEN
+                     TEMP1 = ALPHA*B( J, L )
+                     TEMP2 = ALPHA*A( J, L )
+                     DO 160, I = J, N
+                        C( I, J ) = C( I, J ) + A( I, L )*TEMP1 +
+     $                                          B( I, L )*TEMP2
+  160                CONTINUE
+                  END IF
+  170          CONTINUE
+  180       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  C := alpha*A'*B + alpha*B'*A + C.
+*
+         IF( UPPER )THEN
+            DO 210, J = 1, N
+               DO 200, I = 1, J
+                  TEMP1 = ZERO
+                  TEMP2 = ZERO
+                  DO 190, L = 1, K
+                     TEMP1 = TEMP1 + A( L, I )*B( L, J )
+                     TEMP2 = TEMP2 + B( L, I )*A( L, J )
+  190             CONTINUE
+                  IF( BETA.EQ.ZERO )THEN
+                     C( I, J ) = ALPHA*TEMP1 + ALPHA*TEMP2
+                  ELSE
+                     C( I, J ) = BETA *C( I, J ) +
+     $                           ALPHA*TEMP1 + ALPHA*TEMP2
+                  END IF
+  200          CONTINUE
+  210       CONTINUE
+         ELSE
+            DO 240, J = 1, N
+               DO 230, I = J, N
+                  TEMP1 = ZERO
+                  TEMP2 = ZERO
+                  DO 220, L = 1, K
+                     TEMP1 = TEMP1 + A( L, I )*B( L, J )
+                     TEMP2 = TEMP2 + B( L, I )*A( L, J )
+  220             CONTINUE
+                  IF( BETA.EQ.ZERO )THEN
+                     C( I, J ) = ALPHA*TEMP1 + ALPHA*TEMP2
+                  ELSE
+                     C( I, J ) = BETA *C( I, J ) +
+     $                           ALPHA*TEMP1 + ALPHA*TEMP2
+                  END IF
+  230          CONTINUE
+  240       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of ZSYR2K.
+*
+      END
diff --git a/costa/native/external/blas/zsyrk.f b/costa/native/external/blas/zsyrk.f
new file mode 100644
index 000000000..77e2c20aa
--- /dev/null
+++ b/costa/native/external/blas/zsyrk.f
@@ -0,0 +1,293 @@
+      SUBROUTINE ZSYRK ( UPLO, TRANS, N, K, ALPHA, A, LDA,
+     $                   BETA, C, LDC )
+*     .. Scalar Arguments ..
+      CHARACTER*1        UPLO, TRANS
+      INTEGER            N, K, LDA, LDC
+      COMPLEX*16         ALPHA, BETA
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), C( LDC, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZSYRK  performs one of the symmetric rank k operations
+*
+*     C := alpha*A*A' + beta*C,
+*
+*  or
+*
+*     C := alpha*A'*A + beta*C,
+*
+*  where  alpha and beta  are scalars,  C is an  n by n symmetric matrix
+*  and  A  is an  n by k  matrix in the first case and a  k by n  matrix
+*  in the second case.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On  entry,   UPLO  specifies  whether  the  upper  or  lower
+*           triangular  part  of the  array  C  is to be  referenced  as
+*           follows:
+*
+*              UPLO = 'U' or 'u'   Only the  upper triangular part of  C
+*                                  is to be referenced.
+*
+*              UPLO = 'L' or 'l'   Only the  lower triangular part of  C
+*                                  is to be referenced.
+*
+*           Unchanged on exit.
+*
+*  TRANS  - CHARACTER*1.
+*           On entry,  TRANS  specifies the operation to be performed as
+*           follows:
+*
+*              TRANS = 'N' or 'n'   C := alpha*A*A' + beta*C.
+*
+*              TRANS = 'T' or 't'   C := alpha*A'*A + beta*C.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry,  N specifies the order of the matrix C.  N must be
+*           at least zero.
+*           Unchanged on exit.
+*
+*  K      - INTEGER.
+*           On entry with  TRANS = 'N' or 'n',  K  specifies  the number
+*           of  columns   of  the   matrix   A,   and  on   entry   with
+*           TRANS = 'T' or 't',  K  specifies  the number of rows of the
+*           matrix A.  K must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - COMPLEX*16      .
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  A      - COMPLEX*16       array of DIMENSION ( LDA, ka ), where ka is
+*           k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
+*           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
+*           part of the array  A  must contain the matrix  A,  otherwise
+*           the leading  k by n  part of the array  A  must contain  the
+*           matrix A.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
+*           then  LDA must be at least  max( 1, n ), otherwise  LDA must
+*           be at least  max( 1, k ).
+*           Unchanged on exit.
+*
+*  BETA   - COMPLEX*16      .
+*           On entry, BETA specifies the scalar beta.
+*           Unchanged on exit.
+*
+*  C      - COMPLEX*16       array of DIMENSION ( LDC, n ).
+*           Before entry  with  UPLO = 'U' or 'u',  the leading  n by n
+*           upper triangular part of the array C must contain the upper
+*           triangular part  of the  symmetric matrix  and the strictly
+*           lower triangular part of C is not referenced.  On exit, the
+*           upper triangular part of the array  C is overwritten by the
+*           upper triangular part of the updated matrix.
+*           Before entry  with  UPLO = 'L' or 'l',  the leading  n by n
+*           lower triangular part of the array C must contain the lower
+*           triangular part  of the  symmetric matrix  and the strictly
+*           upper triangular part of C is not referenced.  On exit, the
+*           lower triangular part of the array  C is overwritten by the
+*           lower triangular part of the updated matrix.
+*
+*  LDC    - INTEGER.
+*           On entry, LDC specifies the first dimension of C as declared
+*           in  the  calling  (sub)  program.   LDC  must  be  at  least
+*           max( 1, n ).
+*           Unchanged on exit.
+*
+*
+*  Level 3 Blas routine.
+*
+*  -- Written on 8-February-1989.
+*     Jack Dongarra, Argonne National Laboratory.
+*     Iain Duff, AERE Harwell.
+*     Jeremy Du Croz, Numerical Algorithms Group Ltd.
+*     Sven Hammarling, Numerical Algorithms Group Ltd.
+*
+*
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            I, INFO, J, L, NROWA
+      COMPLEX*16         TEMP
+*     .. Parameters ..
+      COMPLEX*16         ONE
+      PARAMETER        ( ONE  = ( 1.0D+0, 0.0D+0 ) )
+      COMPLEX*16         ZERO
+      PARAMETER        ( ZERO = ( 0.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+         NROWA = N
+      ELSE
+         NROWA = K
+      END IF
+      UPPER = LSAME( UPLO, 'U' )
+*
+      INFO = 0
+      IF(      ( .NOT.UPPER               ).AND.
+     $         ( .NOT.LSAME( UPLO , 'L' ) )      )THEN
+         INFO = 1
+      ELSE IF( ( .NOT.LSAME( TRANS, 'N' ) ).AND.
+     $         ( .NOT.LSAME( TRANS, 'T' ) )      )THEN
+         INFO = 2
+      ELSE IF( N  .LT.0               )THEN
+         INFO = 3
+      ELSE IF( K  .LT.0               )THEN
+         INFO = 4
+      ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN
+         INFO = 7
+      ELSE IF( LDC.LT.MAX( 1, N     ) )THEN
+         INFO = 10
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'ZSYRK ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( N.EQ.0 ).OR.
+     $    ( ( ( ALPHA.EQ.ZERO ).OR.( K.EQ.0 ) ).AND.( BETA.EQ.ONE ) ) )
+     $   RETURN
+*
+*     And when  alpha.eq.zero.
+*
+      IF( ALPHA.EQ.ZERO )THEN
+         IF( UPPER )THEN
+            IF( BETA.EQ.ZERO )THEN
+               DO 20, J = 1, N
+                  DO 10, I = 1, J
+                     C( I, J ) = ZERO
+   10             CONTINUE
+   20          CONTINUE
+            ELSE
+               DO 40, J = 1, N
+                  DO 30, I = 1, J
+                     C( I, J ) = BETA*C( I, J )
+   30             CONTINUE
+   40          CONTINUE
+            END IF
+         ELSE
+            IF( BETA.EQ.ZERO )THEN
+               DO 60, J = 1, N
+                  DO 50, I = J, N
+                     C( I, J ) = ZERO
+   50             CONTINUE
+   60          CONTINUE
+            ELSE
+               DO 80, J = 1, N
+                  DO 70, I = J, N
+                     C( I, J ) = BETA*C( I, J )
+   70             CONTINUE
+   80          CONTINUE
+            END IF
+         END IF
+         RETURN
+      END IF
+*
+*     Start the operations.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+*
+*        Form  C := alpha*A*A' + beta*C.
+*
+         IF( UPPER )THEN
+            DO 130, J = 1, N
+               IF( BETA.EQ.ZERO )THEN
+                  DO 90, I = 1, J
+                     C( I, J ) = ZERO
+   90             CONTINUE
+               ELSE IF( BETA.NE.ONE )THEN
+                  DO 100, I = 1, J
+                     C( I, J ) = BETA*C( I, J )
+  100             CONTINUE
+               END IF
+               DO 120, L = 1, K
+                  IF( A( J, L ).NE.ZERO )THEN
+                     TEMP = ALPHA*A( J, L )
+                     DO 110, I = 1, J
+                        C( I, J ) = C( I, J ) + TEMP*A( I, L )
+  110                CONTINUE
+                  END IF
+  120          CONTINUE
+  130       CONTINUE
+         ELSE
+            DO 180, J = 1, N
+               IF( BETA.EQ.ZERO )THEN
+                  DO 140, I = J, N
+                     C( I, J ) = ZERO
+  140             CONTINUE
+               ELSE IF( BETA.NE.ONE )THEN
+                  DO 150, I = J, N
+                     C( I, J ) = BETA*C( I, J )
+  150             CONTINUE
+               END IF
+               DO 170, L = 1, K
+                  IF( A( J, L ).NE.ZERO )THEN
+                     TEMP      = ALPHA*A( J, L )
+                     DO 160, I = J, N
+                        C( I, J ) = C( I, J ) + TEMP*A( I, L )
+  160                CONTINUE
+                  END IF
+  170          CONTINUE
+  180       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  C := alpha*A'*A + beta*C.
+*
+         IF( UPPER )THEN
+            DO 210, J = 1, N
+               DO 200, I = 1, J
+                  TEMP = ZERO
+                  DO 190, L = 1, K
+                     TEMP = TEMP + A( L, I )*A( L, J )
+  190             CONTINUE
+                  IF( BETA.EQ.ZERO )THEN
+                     C( I, J ) = ALPHA*TEMP
+                  ELSE
+                     C( I, J ) = ALPHA*TEMP + BETA*C( I, J )
+                  END IF
+  200          CONTINUE
+  210       CONTINUE
+         ELSE
+            DO 240, J = 1, N
+               DO 230, I = J, N
+                  TEMP = ZERO
+                  DO 220, L = 1, K
+                     TEMP = TEMP + A( L, I )*A( L, J )
+  220             CONTINUE
+                  IF( BETA.EQ.ZERO )THEN
+                     C( I, J ) = ALPHA*TEMP
+                  ELSE
+                     C( I, J ) = ALPHA*TEMP + BETA*C( I, J )
+                  END IF
+  230          CONTINUE
+  240       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of ZSYRK .
+*
+      END
diff --git a/costa/native/external/blas/ztbmv.f b/costa/native/external/blas/ztbmv.f
new file mode 100644
index 000000000..17944082c
--- /dev/null
+++ b/costa/native/external/blas/ztbmv.f
@@ -0,0 +1,377 @@
+      SUBROUTINE ZTBMV ( UPLO, TRANS, DIAG, N, K, A, LDA, X, INCX )
+*     .. Scalar Arguments ..
+      INTEGER            INCX, K, LDA, N
+      CHARACTER*1        DIAG, TRANS, UPLO
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZTBMV  performs one of the matrix-vector operations
+*
+*     x := A*x,   or   x := A'*x,   or   x := conjg( A' )*x,
+*
+*  where x is an n element vector and  A is an n by n unit, or non-unit,
+*  upper or lower triangular band matrix, with ( k + 1 ) diagonals.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the matrix is an upper or
+*           lower triangular matrix as follows:
+*
+*              UPLO = 'U' or 'u'   A is an upper triangular matrix.
+*
+*              UPLO = 'L' or 'l'   A is a lower triangular matrix.
+*
+*           Unchanged on exit.
+*
+*  TRANS  - CHARACTER*1.
+*           On entry, TRANS specifies the operation to be performed as
+*           follows:
+*
+*              TRANS = 'N' or 'n'   x := A*x.
+*
+*              TRANS = 'T' or 't'   x := A'*x.
+*
+*              TRANS = 'C' or 'c'   x := conjg( A' )*x.
+*
+*           Unchanged on exit.
+*
+*  DIAG   - CHARACTER*1.
+*           On entry, DIAG specifies whether or not A is unit
+*           triangular as follows:
+*
+*              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
+*
+*              DIAG = 'N' or 'n'   A is not assumed to be unit
+*                                  triangular.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  K      - INTEGER.
+*           On entry with UPLO = 'U' or 'u', K specifies the number of
+*           super-diagonals of the matrix A.
+*           On entry with UPLO = 'L' or 'l', K specifies the number of
+*           sub-diagonals of the matrix A.
+*           K must satisfy  0 .le. K.
+*           Unchanged on exit.
+*
+*  A      - COMPLEX*16       array of DIMENSION ( LDA, n ).
+*           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 )
+*           by n part of the array A must contain the upper triangular
+*           band part of the matrix of coefficients, supplied column by
+*           column, with the leading diagonal of the matrix in row
+*           ( k + 1 ) of the array, the first super-diagonal starting at
+*           position 2 in row k, and so on. The top left k by k triangle
+*           of the array A is not referenced.
+*           The following program segment will transfer an upper
+*           triangular band matrix from conventional full matrix storage
+*           to band storage:
+*
+*                 DO 20, J = 1, N
+*                    M = K + 1 - J
+*                    DO 10, I = MAX( 1, J - K ), J
+*                       A( M + I, J ) = matrix( I, J )
+*              10    CONTINUE
+*              20 CONTINUE
+*
+*           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 )
+*           by n part of the array A must contain the lower triangular
+*           band part of the matrix of coefficients, supplied column by
+*           column, with the leading diagonal of the matrix in row 1 of
+*           the array, the first sub-diagonal starting at position 1 in
+*           row 2, and so on. The bottom right k by k triangle of the
+*           array A is not referenced.
+*           The following program segment will transfer a lower
+*           triangular band matrix from conventional full matrix storage
+*           to band storage:
+*
+*                 DO 20, J = 1, N
+*                    M = 1 - J
+*                    DO 10, I = J, MIN( N, J + K )
+*                       A( M + I, J ) = matrix( I, J )
+*              10    CONTINUE
+*              20 CONTINUE
+*
+*           Note that when DIAG = 'U' or 'u' the elements of the array A
+*           corresponding to the diagonal elements of the matrix are not
+*           referenced, but are assumed to be unity.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           ( k + 1 ).
+*           Unchanged on exit.
+*
+*  X      - COMPLEX*16       array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element vector x. On exit, X is overwritten with the
+*           tranformed vector x.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      COMPLEX*16         ZERO
+      PARAMETER        ( ZERO = ( 0.0D+0, 0.0D+0 ) )
+*     .. Local Scalars ..
+      COMPLEX*16         TEMP
+      INTEGER            I, INFO, IX, J, JX, KPLUS1, KX, L
+      LOGICAL            NOCONJ, NOUNIT
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          DCONJG, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO , 'U' ).AND.
+     $         .NOT.LSAME( UPLO , 'L' )      )THEN
+         INFO = 1
+      ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND.
+     $         .NOT.LSAME( TRANS, 'T' ).AND.
+     $         .NOT.LSAME( TRANS, 'C' )      )THEN
+         INFO = 2
+      ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND.
+     $         .NOT.LSAME( DIAG , 'N' )      )THEN
+         INFO = 3
+      ELSE IF( N.LT.0 )THEN
+         INFO = 4
+      ELSE IF( K.LT.0 )THEN
+         INFO = 5
+      ELSE IF( LDA.LT.( K + 1 ) )THEN
+         INFO = 7
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 9
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'ZTBMV ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      NOCONJ = LSAME( TRANS, 'T' )
+      NOUNIT = LSAME( DIAG , 'N' )
+*
+*     Set up the start point in X if the increment is not unity. This
+*     will be  ( N - 1 )*INCX   too small for descending loops.
+*
+      IF( INCX.LE.0 )THEN
+         KX = 1 - ( N - 1 )*INCX
+      ELSE IF( INCX.NE.1 )THEN
+         KX = 1
+      END IF
+*
+*     Start the operations. In this version the elements of A are
+*     accessed sequentially with one pass through A.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+*
+*         Form  x := A*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            KPLUS1 = K + 1
+            IF( INCX.EQ.1 )THEN
+               DO 20, J = 1, N
+                  IF( X( J ).NE.ZERO )THEN
+                     TEMP = X( J )
+                     L    = KPLUS1 - J
+                     DO 10, I = MAX( 1, J - K ), J - 1
+                        X( I ) = X( I ) + TEMP*A( L + I, J )
+   10                CONTINUE
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )*A( KPLUS1, J )
+                  END IF
+   20          CONTINUE
+            ELSE
+               JX = KX
+               DO 40, J = 1, N
+                  IF( X( JX ).NE.ZERO )THEN
+                     TEMP = X( JX )
+                     IX   = KX
+                     L    = KPLUS1  - J
+                     DO 30, I = MAX( 1, J - K ), J - 1
+                        X( IX ) = X( IX ) + TEMP*A( L + I, J )
+                        IX      = IX      + INCX
+   30                CONTINUE
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )*A( KPLUS1, J )
+                  END IF
+                  JX = JX + INCX
+                  IF( J.GT.K )
+     $               KX = KX + INCX
+   40          CONTINUE
+            END IF
+         ELSE
+            IF( INCX.EQ.1 )THEN
+               DO 60, J = N, 1, -1
+                  IF( X( J ).NE.ZERO )THEN
+                     TEMP = X( J )
+                     L    = 1      - J
+                     DO 50, I = MIN( N, J + K ), J + 1, -1
+                        X( I ) = X( I ) + TEMP*A( L + I, J )
+   50                CONTINUE
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )*A( 1, J )
+                  END IF
+   60          CONTINUE
+            ELSE
+               KX = KX + ( N - 1 )*INCX
+               JX = KX
+               DO 80, J = N, 1, -1
+                  IF( X( JX ).NE.ZERO )THEN
+                     TEMP = X( JX )
+                     IX   = KX
+                     L    = 1       - J
+                     DO 70, I = MIN( N, J + K ), J + 1, -1
+                        X( IX ) = X( IX ) + TEMP*A( L + I, J )
+                        IX      = IX      - INCX
+   70                CONTINUE
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )*A( 1, J )
+                  END IF
+                  JX = JX - INCX
+                  IF( ( N - J ).GE.K )
+     $               KX = KX - INCX
+   80          CONTINUE
+            END IF
+         END IF
+      ELSE
+*
+*        Form  x := A'*x  or  x := conjg( A' )*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            KPLUS1 = K + 1
+            IF( INCX.EQ.1 )THEN
+               DO 110, J = N, 1, -1
+                  TEMP = X( J )
+                  L    = KPLUS1 - J
+                  IF( NOCONJ )THEN
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*A( KPLUS1, J )
+                     DO 90, I = J - 1, MAX( 1, J - K ), -1
+                        TEMP = TEMP + A( L + I, J )*X( I )
+   90                CONTINUE
+                  ELSE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*DCONJG( A( KPLUS1, J ) )
+                     DO 100, I = J - 1, MAX( 1, J - K ), -1
+                        TEMP = TEMP + DCONJG( A( L + I, J ) )*X( I )
+  100                CONTINUE
+                  END IF
+                  X( J ) = TEMP
+  110          CONTINUE
+            ELSE
+               KX = KX + ( N - 1 )*INCX
+               JX = KX
+               DO 140, J = N, 1, -1
+                  TEMP = X( JX )
+                  KX   = KX      - INCX
+                  IX   = KX
+                  L    = KPLUS1  - J
+                  IF( NOCONJ )THEN
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*A( KPLUS1, J )
+                     DO 120, I = J - 1, MAX( 1, J - K ), -1
+                        TEMP = TEMP + A( L + I, J )*X( IX )
+                        IX   = IX   - INCX
+  120                CONTINUE
+                  ELSE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*DCONJG( A( KPLUS1, J ) )
+                     DO 130, I = J - 1, MAX( 1, J - K ), -1
+                        TEMP = TEMP + DCONJG( A( L + I, J ) )*X( IX )
+                        IX   = IX   - INCX
+  130                CONTINUE
+                  END IF
+                  X( JX ) = TEMP
+                  JX      = JX   - INCX
+  140          CONTINUE
+            END IF
+         ELSE
+            IF( INCX.EQ.1 )THEN
+               DO 170, J = 1, N
+                  TEMP = X( J )
+                  L    = 1      - J
+                  IF( NOCONJ )THEN
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*A( 1, J )
+                     DO 150, I = J + 1, MIN( N, J + K )
+                        TEMP = TEMP + A( L + I, J )*X( I )
+  150                CONTINUE
+                  ELSE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*DCONJG( A( 1, J ) )
+                     DO 160, I = J + 1, MIN( N, J + K )
+                        TEMP = TEMP + DCONJG( A( L + I, J ) )*X( I )
+  160                CONTINUE
+                  END IF
+                  X( J ) = TEMP
+  170          CONTINUE
+            ELSE
+               JX = KX
+               DO 200, J = 1, N
+                  TEMP = X( JX )
+                  KX   = KX      + INCX
+                  IX   = KX
+                  L    = 1       - J
+                  IF( NOCONJ )THEN
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*A( 1, J )
+                     DO 180, I = J + 1, MIN( N, J + K )
+                        TEMP = TEMP + A( L + I, J )*X( IX )
+                        IX   = IX   + INCX
+  180                CONTINUE
+                  ELSE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*DCONJG( A( 1, J ) )
+                     DO 190, I = J + 1, MIN( N, J + K )
+                        TEMP = TEMP + DCONJG( A( L + I, J ) )*X( IX )
+                        IX   = IX   + INCX
+  190                CONTINUE
+                  END IF
+                  X( JX ) = TEMP
+                  JX      = JX   + INCX
+  200          CONTINUE
+            END IF
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of ZTBMV .
+*
+      END
diff --git a/costa/native/external/blas/ztbsv.f b/costa/native/external/blas/ztbsv.f
new file mode 100644
index 000000000..f3ded8193
--- /dev/null
+++ b/costa/native/external/blas/ztbsv.f
@@ -0,0 +1,381 @@
+      SUBROUTINE ZTBSV ( UPLO, TRANS, DIAG, N, K, A, LDA, X, INCX )
+*     .. Scalar Arguments ..
+      INTEGER            INCX, K, LDA, N
+      CHARACTER*1        DIAG, TRANS, UPLO
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZTBSV  solves one of the systems of equations
+*
+*     A*x = b,   or   A'*x = b,   or   conjg( A' )*x = b,
+*
+*  where b and x are n element vectors and A is an n by n unit, or
+*  non-unit, upper or lower triangular band matrix, with ( k + 1 )
+*  diagonals.
+*
+*  No test for singularity or near-singularity is included in this
+*  routine. Such tests must be performed before calling this routine.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the matrix is an upper or
+*           lower triangular matrix as follows:
+*
+*              UPLO = 'U' or 'u'   A is an upper triangular matrix.
+*
+*              UPLO = 'L' or 'l'   A is a lower triangular matrix.
+*
+*           Unchanged on exit.
+*
+*  TRANS  - CHARACTER*1.
+*           On entry, TRANS specifies the equations to be solved as
+*           follows:
+*
+*              TRANS = 'N' or 'n'   A*x = b.
+*
+*              TRANS = 'T' or 't'   A'*x = b.
+*
+*              TRANS = 'C' or 'c'   conjg( A' )*x = b.
+*
+*           Unchanged on exit.
+*
+*  DIAG   - CHARACTER*1.
+*           On entry, DIAG specifies whether or not A is unit
+*           triangular as follows:
+*
+*              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
+*
+*              DIAG = 'N' or 'n'   A is not assumed to be unit
+*                                  triangular.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  K      - INTEGER.
+*           On entry with UPLO = 'U' or 'u', K specifies the number of
+*           super-diagonals of the matrix A.
+*           On entry with UPLO = 'L' or 'l', K specifies the number of
+*           sub-diagonals of the matrix A.
+*           K must satisfy  0 .le. K.
+*           Unchanged on exit.
+*
+*  A      - COMPLEX*16       array of DIMENSION ( LDA, n ).
+*           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 )
+*           by n part of the array A must contain the upper triangular
+*           band part of the matrix of coefficients, supplied column by
+*           column, with the leading diagonal of the matrix in row
+*           ( k + 1 ) of the array, the first super-diagonal starting at
+*           position 2 in row k, and so on. The top left k by k triangle
+*           of the array A is not referenced.
+*           The following program segment will transfer an upper
+*           triangular band matrix from conventional full matrix storage
+*           to band storage:
+*
+*                 DO 20, J = 1, N
+*                    M = K + 1 - J
+*                    DO 10, I = MAX( 1, J - K ), J
+*                       A( M + I, J ) = matrix( I, J )
+*              10    CONTINUE
+*              20 CONTINUE
+*
+*           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 )
+*           by n part of the array A must contain the lower triangular
+*           band part of the matrix of coefficients, supplied column by
+*           column, with the leading diagonal of the matrix in row 1 of
+*           the array, the first sub-diagonal starting at position 1 in
+*           row 2, and so on. The bottom right k by k triangle of the
+*           array A is not referenced.
+*           The following program segment will transfer a lower
+*           triangular band matrix from conventional full matrix storage
+*           to band storage:
+*
+*                 DO 20, J = 1, N
+*                    M = 1 - J
+*                    DO 10, I = J, MIN( N, J + K )
+*                       A( M + I, J ) = matrix( I, J )
+*              10    CONTINUE
+*              20 CONTINUE
+*
+*           Note that when DIAG = 'U' or 'u' the elements of the array A
+*           corresponding to the diagonal elements of the matrix are not
+*           referenced, but are assumed to be unity.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           ( k + 1 ).
+*           Unchanged on exit.
+*
+*  X      - COMPLEX*16       array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element right-hand side vector b. On exit, X is overwritten
+*           with the solution vector x.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      COMPLEX*16         ZERO
+      PARAMETER        ( ZERO = ( 0.0D+0, 0.0D+0 ) )
+*     .. Local Scalars ..
+      COMPLEX*16         TEMP
+      INTEGER            I, INFO, IX, J, JX, KPLUS1, KX, L
+      LOGICAL            NOCONJ, NOUNIT
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          DCONJG, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO , 'U' ).AND.
+     $         .NOT.LSAME( UPLO , 'L' )      )THEN
+         INFO = 1
+      ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND.
+     $         .NOT.LSAME( TRANS, 'T' ).AND.
+     $         .NOT.LSAME( TRANS, 'C' )      )THEN
+         INFO = 2
+      ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND.
+     $         .NOT.LSAME( DIAG , 'N' )      )THEN
+         INFO = 3
+      ELSE IF( N.LT.0 )THEN
+         INFO = 4
+      ELSE IF( K.LT.0 )THEN
+         INFO = 5
+      ELSE IF( LDA.LT.( K + 1 ) )THEN
+         INFO = 7
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 9
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'ZTBSV ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      NOCONJ = LSAME( TRANS, 'T' )
+      NOUNIT = LSAME( DIAG , 'N' )
+*
+*     Set up the start point in X if the increment is not unity. This
+*     will be  ( N - 1 )*INCX  too small for descending loops.
+*
+      IF( INCX.LE.0 )THEN
+         KX = 1 - ( N - 1 )*INCX
+      ELSE IF( INCX.NE.1 )THEN
+         KX = 1
+      END IF
+*
+*     Start the operations. In this version the elements of A are
+*     accessed by sequentially with one pass through A.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+*
+*        Form  x := inv( A )*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            KPLUS1 = K + 1
+            IF( INCX.EQ.1 )THEN
+               DO 20, J = N, 1, -1
+                  IF( X( J ).NE.ZERO )THEN
+                     L = KPLUS1 - J
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )/A( KPLUS1, J )
+                     TEMP = X( J )
+                     DO 10, I = J - 1, MAX( 1, J - K ), -1
+                        X( I ) = X( I ) - TEMP*A( L + I, J )
+   10                CONTINUE
+                  END IF
+   20          CONTINUE
+            ELSE
+               KX = KX + ( N - 1 )*INCX
+               JX = KX
+               DO 40, J = N, 1, -1
+                  KX = KX - INCX
+                  IF( X( JX ).NE.ZERO )THEN
+                     IX = KX
+                     L  = KPLUS1 - J
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )/A( KPLUS1, J )
+                     TEMP = X( JX )
+                     DO 30, I = J - 1, MAX( 1, J - K ), -1
+                        X( IX ) = X( IX ) - TEMP*A( L + I, J )
+                        IX      = IX      - INCX
+   30                CONTINUE
+                  END IF
+                  JX = JX - INCX
+   40          CONTINUE
+            END IF
+         ELSE
+            IF( INCX.EQ.1 )THEN
+               DO 60, J = 1, N
+                  IF( X( J ).NE.ZERO )THEN
+                     L = 1 - J
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )/A( 1, J )
+                     TEMP = X( J )
+                     DO 50, I = J + 1, MIN( N, J + K )
+                        X( I ) = X( I ) - TEMP*A( L + I, J )
+   50                CONTINUE
+                  END IF
+   60          CONTINUE
+            ELSE
+               JX = KX
+               DO 80, J = 1, N
+                  KX = KX + INCX
+                  IF( X( JX ).NE.ZERO )THEN
+                     IX = KX
+                     L  = 1  - J
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )/A( 1, J )
+                     TEMP = X( JX )
+                     DO 70, I = J + 1, MIN( N, J + K )
+                        X( IX ) = X( IX ) - TEMP*A( L + I, J )
+                        IX      = IX      + INCX
+   70                CONTINUE
+                  END IF
+                  JX = JX + INCX
+   80          CONTINUE
+            END IF
+         END IF
+      ELSE
+*
+*        Form  x := inv( A' )*x  or  x := inv( conjg( A') )*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            KPLUS1 = K + 1
+            IF( INCX.EQ.1 )THEN
+               DO 110, J = 1, N
+                  TEMP = X( J )
+                  L    = KPLUS1 - J
+                  IF( NOCONJ )THEN
+                     DO 90, I = MAX( 1, J - K ), J - 1
+                        TEMP = TEMP - A( L + I, J )*X( I )
+   90                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/A( KPLUS1, J )
+                  ELSE
+                     DO 100, I = MAX( 1, J - K ), J - 1
+                        TEMP = TEMP - DCONJG( A( L + I, J ) )*X( I )
+  100                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/DCONJG( A( KPLUS1, J ) )
+                  END IF
+                  X( J ) = TEMP
+  110          CONTINUE
+            ELSE
+               JX = KX
+               DO 140, J = 1, N
+                  TEMP = X( JX )
+                  IX   = KX
+                  L    = KPLUS1  - J
+                  IF( NOCONJ )THEN
+                     DO 120, I = MAX( 1, J - K ), J - 1
+                        TEMP = TEMP - A( L + I, J )*X( IX )
+                        IX   = IX   + INCX
+  120                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/A( KPLUS1, J )
+                  ELSE
+                     DO 130, I = MAX( 1, J - K ), J - 1
+                        TEMP = TEMP - DCONJG( A( L + I, J ) )*X( IX )
+                        IX   = IX   + INCX
+  130                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/DCONJG( A( KPLUS1, J ) )
+                  END IF
+                  X( JX ) = TEMP
+                  JX      = JX   + INCX
+                  IF( J.GT.K )
+     $               KX = KX + INCX
+  140          CONTINUE
+            END IF
+         ELSE
+            IF( INCX.EQ.1 )THEN
+               DO 170, J = N, 1, -1
+                  TEMP = X( J )
+                  L    = 1      - J
+                  IF( NOCONJ )THEN
+                     DO 150, I = MIN( N, J + K ), J + 1, -1
+                        TEMP = TEMP - A( L + I, J )*X( I )
+  150                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/A( 1, J )
+                  ELSE
+                     DO 160, I = MIN( N, J + K ), J + 1, -1
+                        TEMP = TEMP - DCONJG( A( L + I, J ) )*X( I )
+  160                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/DCONJG( A( 1, J ) )
+                  END IF
+                  X( J ) = TEMP
+  170          CONTINUE
+            ELSE
+               KX = KX + ( N - 1 )*INCX
+               JX = KX
+               DO 200, J = N, 1, -1
+                  TEMP = X( JX )
+                  IX   = KX
+                  L    = 1       - J
+                  IF( NOCONJ )THEN
+                     DO 180, I = MIN( N, J + K ), J + 1, -1
+                        TEMP = TEMP - A( L + I, J )*X( IX )
+                        IX   = IX   - INCX
+  180                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/A( 1, J )
+                  ELSE
+                     DO 190, I = MIN( N, J + K ), J + 1, -1
+                        TEMP = TEMP - DCONJG( A( L + I, J ) )*X( IX )
+                        IX   = IX   - INCX
+  190                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/DCONJG( A( 1, J ) )
+                  END IF
+                  X( JX ) = TEMP
+                  JX      = JX   - INCX
+                  IF( ( N - J ).GE.K )
+     $               KX = KX - INCX
+  200          CONTINUE
+            END IF
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of ZTBSV .
+*
+      END
diff --git a/costa/native/external/blas/ztpmv.f b/costa/native/external/blas/ztpmv.f
new file mode 100644
index 000000000..4fad3a8bd
--- /dev/null
+++ b/costa/native/external/blas/ztpmv.f
@@ -0,0 +1,338 @@
+      SUBROUTINE ZTPMV ( UPLO, TRANS, DIAG, N, AP, X, INCX )
+*     .. Scalar Arguments ..
+      INTEGER            INCX, N
+      CHARACTER*1        DIAG, TRANS, UPLO
+*     .. Array Arguments ..
+      COMPLEX*16         AP( * ), X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZTPMV  performs one of the matrix-vector operations
+*
+*     x := A*x,   or   x := A'*x,   or   x := conjg( A' )*x,
+*
+*  where x is an n element vector and  A is an n by n unit, or non-unit,
+*  upper or lower triangular matrix, supplied in packed form.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the matrix is an upper or
+*           lower triangular matrix as follows:
+*
+*              UPLO = 'U' or 'u'   A is an upper triangular matrix.
+*
+*              UPLO = 'L' or 'l'   A is a lower triangular matrix.
+*
+*           Unchanged on exit.
+*
+*  TRANS  - CHARACTER*1.
+*           On entry, TRANS specifies the operation to be performed as
+*           follows:
+*
+*              TRANS = 'N' or 'n'   x := A*x.
+*
+*              TRANS = 'T' or 't'   x := A'*x.
+*
+*              TRANS = 'C' or 'c'   x := conjg( A' )*x.
+*
+*           Unchanged on exit.
+*
+*  DIAG   - CHARACTER*1.
+*           On entry, DIAG specifies whether or not A is unit
+*           triangular as follows:
+*
+*              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
+*
+*              DIAG = 'N' or 'n'   A is not assumed to be unit
+*                                  triangular.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  AP     - COMPLEX*16       array of DIMENSION at least
+*           ( ( n*( n + 1 ) )/2 ).
+*           Before entry with  UPLO = 'U' or 'u', the array AP must
+*           contain the upper triangular matrix packed sequentially,
+*           column by column, so that AP( 1 ) contains a( 1, 1 ),
+*           AP( 2 ) and AP( 3 ) contain a( 1, 2 ) and a( 2, 2 )
+*           respectively, and so on.
+*           Before entry with UPLO = 'L' or 'l', the array AP must
+*           contain the lower triangular matrix packed sequentially,
+*           column by column, so that AP( 1 ) contains a( 1, 1 ),
+*           AP( 2 ) and AP( 3 ) contain a( 2, 1 ) and a( 3, 1 )
+*           respectively, and so on.
+*           Note that when  DIAG = 'U' or 'u', the diagonal elements of
+*           A are not referenced, but are assumed to be unity.
+*           Unchanged on exit.
+*
+*  X      - COMPLEX*16       array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element vector x. On exit, X is overwritten with the
+*           tranformed vector x.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      COMPLEX*16         ZERO
+      PARAMETER        ( ZERO = ( 0.0D+0, 0.0D+0 ) )
+*     .. Local Scalars ..
+      COMPLEX*16         TEMP
+      INTEGER            I, INFO, IX, J, JX, K, KK, KX
+      LOGICAL            NOCONJ, NOUNIT
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          DCONJG
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO , 'U' ).AND.
+     $         .NOT.LSAME( UPLO , 'L' )      )THEN
+         INFO = 1
+      ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND.
+     $         .NOT.LSAME( TRANS, 'T' ).AND.
+     $         .NOT.LSAME( TRANS, 'C' )      )THEN
+         INFO = 2
+      ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND.
+     $         .NOT.LSAME( DIAG , 'N' )      )THEN
+         INFO = 3
+      ELSE IF( N.LT.0 )THEN
+         INFO = 4
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 7
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'ZTPMV ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      NOCONJ = LSAME( TRANS, 'T' )
+      NOUNIT = LSAME( DIAG , 'N' )
+*
+*     Set up the start point in X if the increment is not unity. This
+*     will be  ( N - 1 )*INCX  too small for descending loops.
+*
+      IF( INCX.LE.0 )THEN
+         KX = 1 - ( N - 1 )*INCX
+      ELSE IF( INCX.NE.1 )THEN
+         KX = 1
+      END IF
+*
+*     Start the operations. In this version the elements of AP are
+*     accessed sequentially with one pass through AP.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+*
+*        Form  x:= A*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            KK = 1
+            IF( INCX.EQ.1 )THEN
+               DO 20, J = 1, N
+                  IF( X( J ).NE.ZERO )THEN
+                     TEMP = X( J )
+                     K    = KK
+                     DO 10, I = 1, J - 1
+                        X( I ) = X( I ) + TEMP*AP( K )
+                        K      = K      + 1
+   10                CONTINUE
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )*AP( KK + J - 1 )
+                  END IF
+                  KK = KK + J
+   20          CONTINUE
+            ELSE
+               JX = KX
+               DO 40, J = 1, N
+                  IF( X( JX ).NE.ZERO )THEN
+                     TEMP = X( JX )
+                     IX   = KX
+                     DO 30, K = KK, KK + J - 2
+                        X( IX ) = X( IX ) + TEMP*AP( K )
+                        IX      = IX      + INCX
+   30                CONTINUE
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )*AP( KK + J - 1 )
+                  END IF
+                  JX = JX + INCX
+                  KK = KK + J
+   40          CONTINUE
+            END IF
+         ELSE
+            KK = ( N*( N + 1 ) )/2
+            IF( INCX.EQ.1 )THEN
+               DO 60, J = N, 1, -1
+                  IF( X( J ).NE.ZERO )THEN
+                     TEMP = X( J )
+                     K    = KK
+                     DO 50, I = N, J + 1, -1
+                        X( I ) = X( I ) + TEMP*AP( K )
+                        K      = K      - 1
+   50                CONTINUE
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )*AP( KK - N + J )
+                  END IF
+                  KK = KK - ( N - J + 1 )
+   60          CONTINUE
+            ELSE
+               KX = KX + ( N - 1 )*INCX
+               JX = KX
+               DO 80, J = N, 1, -1
+                  IF( X( JX ).NE.ZERO )THEN
+                     TEMP = X( JX )
+                     IX   = KX
+                     DO 70, K = KK, KK - ( N - ( J + 1 ) ), -1
+                        X( IX ) = X( IX ) + TEMP*AP( K )
+                        IX      = IX      - INCX
+   70                CONTINUE
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )*AP( KK - N + J )
+                  END IF
+                  JX = JX - INCX
+                  KK = KK - ( N - J + 1 )
+   80          CONTINUE
+            END IF
+         END IF
+      ELSE
+*
+*        Form  x := A'*x  or  x := conjg( A' )*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            KK = ( N*( N + 1 ) )/2
+            IF( INCX.EQ.1 )THEN
+               DO 110, J = N, 1, -1
+                  TEMP = X( J )
+                  K    = KK     - 1
+                  IF( NOCONJ )THEN
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*AP( KK )
+                     DO 90, I = J - 1, 1, -1
+                        TEMP = TEMP + AP( K )*X( I )
+                        K    = K    - 1
+   90                CONTINUE
+                  ELSE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*DCONJG( AP( KK ) )
+                     DO 100, I = J - 1, 1, -1
+                        TEMP = TEMP + DCONJG( AP( K ) )*X( I )
+                        K    = K    - 1
+  100                CONTINUE
+                  END IF
+                  X( J ) = TEMP
+                  KK     = KK   - J
+  110          CONTINUE
+            ELSE
+               JX = KX + ( N - 1 )*INCX
+               DO 140, J = N, 1, -1
+                  TEMP = X( JX )
+                  IX   = JX
+                  IF( NOCONJ )THEN
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*AP( KK )
+                     DO 120, K = KK - 1, KK - J + 1, -1
+                        IX   = IX   - INCX
+                        TEMP = TEMP + AP( K )*X( IX )
+  120                CONTINUE
+                  ELSE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*DCONJG( AP( KK ) )
+                     DO 130, K = KK - 1, KK - J + 1, -1
+                        IX   = IX   - INCX
+                        TEMP = TEMP + DCONJG( AP( K ) )*X( IX )
+  130                CONTINUE
+                  END IF
+                  X( JX ) = TEMP
+                  JX      = JX   - INCX
+                  KK      = KK   - J
+  140          CONTINUE
+            END IF
+         ELSE
+            KK = 1
+            IF( INCX.EQ.1 )THEN
+               DO 170, J = 1, N
+                  TEMP = X( J )
+                  K    = KK     + 1
+                  IF( NOCONJ )THEN
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*AP( KK )
+                     DO 150, I = J + 1, N
+                        TEMP = TEMP + AP( K )*X( I )
+                        K    = K    + 1
+  150                CONTINUE
+                  ELSE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*DCONJG( AP( KK ) )
+                     DO 160, I = J + 1, N
+                        TEMP = TEMP + DCONJG( AP( K ) )*X( I )
+                        K    = K    + 1
+  160                CONTINUE
+                  END IF
+                  X( J ) = TEMP
+                  KK     = KK   + ( N - J + 1 )
+  170          CONTINUE
+            ELSE
+               JX = KX
+               DO 200, J = 1, N
+                  TEMP = X( JX )
+                  IX   = JX
+                  IF( NOCONJ )THEN
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*AP( KK )
+                     DO 180, K = KK + 1, KK + N - J
+                        IX   = IX   + INCX
+                        TEMP = TEMP + AP( K )*X( IX )
+  180                CONTINUE
+                  ELSE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*DCONJG( AP( KK ) )
+                     DO 190, K = KK + 1, KK + N - J
+                        IX   = IX   + INCX
+                        TEMP = TEMP + DCONJG( AP( K ) )*X( IX )
+  190                CONTINUE
+                  END IF
+                  X( JX ) = TEMP
+                  JX      = JX   + INCX
+                  KK      = KK   + ( N - J + 1 )
+  200          CONTINUE
+            END IF
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of ZTPMV .
+*
+      END
diff --git a/costa/native/external/blas/ztpsv.f b/costa/native/external/blas/ztpsv.f
new file mode 100644
index 000000000..8649f4748
--- /dev/null
+++ b/costa/native/external/blas/ztpsv.f
@@ -0,0 +1,341 @@
+      SUBROUTINE ZTPSV ( UPLO, TRANS, DIAG, N, AP, X, INCX )
+*     .. Scalar Arguments ..
+      INTEGER            INCX, N
+      CHARACTER*1        DIAG, TRANS, UPLO
+*     .. Array Arguments ..
+      COMPLEX*16         AP( * ), X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZTPSV  solves one of the systems of equations
+*
+*     A*x = b,   or   A'*x = b,   or   conjg( A' )*x = b,
+*
+*  where b and x are n element vectors and A is an n by n unit, or
+*  non-unit, upper or lower triangular matrix, supplied in packed form.
+*
+*  No test for singularity or near-singularity is included in this
+*  routine. Such tests must be performed before calling this routine.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the matrix is an upper or
+*           lower triangular matrix as follows:
+*
+*              UPLO = 'U' or 'u'   A is an upper triangular matrix.
+*
+*              UPLO = 'L' or 'l'   A is a lower triangular matrix.
+*
+*           Unchanged on exit.
+*
+*  TRANS  - CHARACTER*1.
+*           On entry, TRANS specifies the equations to be solved as
+*           follows:
+*
+*              TRANS = 'N' or 'n'   A*x = b.
+*
+*              TRANS = 'T' or 't'   A'*x = b.
+*
+*              TRANS = 'C' or 'c'   conjg( A' )*x = b.
+*
+*           Unchanged on exit.
+*
+*  DIAG   - CHARACTER*1.
+*           On entry, DIAG specifies whether or not A is unit
+*           triangular as follows:
+*
+*              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
+*
+*              DIAG = 'N' or 'n'   A is not assumed to be unit
+*                                  triangular.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  AP     - COMPLEX*16       array of DIMENSION at least
+*           ( ( n*( n + 1 ) )/2 ).
+*           Before entry with  UPLO = 'U' or 'u', the array AP must
+*           contain the upper triangular matrix packed sequentially,
+*           column by column, so that AP( 1 ) contains a( 1, 1 ),
+*           AP( 2 ) and AP( 3 ) contain a( 1, 2 ) and a( 2, 2 )
+*           respectively, and so on.
+*           Before entry with UPLO = 'L' or 'l', the array AP must
+*           contain the lower triangular matrix packed sequentially,
+*           column by column, so that AP( 1 ) contains a( 1, 1 ),
+*           AP( 2 ) and AP( 3 ) contain a( 2, 1 ) and a( 3, 1 )
+*           respectively, and so on.
+*           Note that when  DIAG = 'U' or 'u', the diagonal elements of
+*           A are not referenced, but are assumed to be unity.
+*           Unchanged on exit.
+*
+*  X      - COMPLEX*16       array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element right-hand side vector b. On exit, X is overwritten
+*           with the solution vector x.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      COMPLEX*16         ZERO
+      PARAMETER        ( ZERO = ( 0.0D+0, 0.0D+0 ) )
+*     .. Local Scalars ..
+      COMPLEX*16         TEMP
+      INTEGER            I, INFO, IX, J, JX, K, KK, KX
+      LOGICAL            NOCONJ, NOUNIT
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          DCONJG
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO , 'U' ).AND.
+     $         .NOT.LSAME( UPLO , 'L' )      )THEN
+         INFO = 1
+      ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND.
+     $         .NOT.LSAME( TRANS, 'T' ).AND.
+     $         .NOT.LSAME( TRANS, 'C' )      )THEN
+         INFO = 2
+      ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND.
+     $         .NOT.LSAME( DIAG , 'N' )      )THEN
+         INFO = 3
+      ELSE IF( N.LT.0 )THEN
+         INFO = 4
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 7
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'ZTPSV ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      NOCONJ = LSAME( TRANS, 'T' )
+      NOUNIT = LSAME( DIAG , 'N' )
+*
+*     Set up the start point in X if the increment is not unity. This
+*     will be  ( N - 1 )*INCX  too small for descending loops.
+*
+      IF( INCX.LE.0 )THEN
+         KX = 1 - ( N - 1 )*INCX
+      ELSE IF( INCX.NE.1 )THEN
+         KX = 1
+      END IF
+*
+*     Start the operations. In this version the elements of AP are
+*     accessed sequentially with one pass through AP.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+*
+*        Form  x := inv( A )*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            KK = ( N*( N + 1 ) )/2
+            IF( INCX.EQ.1 )THEN
+               DO 20, J = N, 1, -1
+                  IF( X( J ).NE.ZERO )THEN
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )/AP( KK )
+                     TEMP = X( J )
+                     K    = KK     - 1
+                     DO 10, I = J - 1, 1, -1
+                        X( I ) = X( I ) - TEMP*AP( K )
+                        K      = K      - 1
+   10                CONTINUE
+                  END IF
+                  KK = KK - J
+   20          CONTINUE
+            ELSE
+               JX = KX + ( N - 1 )*INCX
+               DO 40, J = N, 1, -1
+                  IF( X( JX ).NE.ZERO )THEN
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )/AP( KK )
+                     TEMP = X( JX )
+                     IX   = JX
+                     DO 30, K = KK - 1, KK - J + 1, -1
+                        IX      = IX      - INCX
+                        X( IX ) = X( IX ) - TEMP*AP( K )
+   30                CONTINUE
+                  END IF
+                  JX = JX - INCX
+                  KK = KK - J
+   40          CONTINUE
+            END IF
+         ELSE
+            KK = 1
+            IF( INCX.EQ.1 )THEN
+               DO 60, J = 1, N
+                  IF( X( J ).NE.ZERO )THEN
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )/AP( KK )
+                     TEMP = X( J )
+                     K    = KK     + 1
+                     DO 50, I = J + 1, N
+                        X( I ) = X( I ) - TEMP*AP( K )
+                        K      = K      + 1
+   50                CONTINUE
+                  END IF
+                  KK = KK + ( N - J + 1 )
+   60          CONTINUE
+            ELSE
+               JX = KX
+               DO 80, J = 1, N
+                  IF( X( JX ).NE.ZERO )THEN
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )/AP( KK )
+                     TEMP = X( JX )
+                     IX   = JX
+                     DO 70, K = KK + 1, KK + N - J
+                        IX      = IX      + INCX
+                        X( IX ) = X( IX ) - TEMP*AP( K )
+   70                CONTINUE
+                  END IF
+                  JX = JX + INCX
+                  KK = KK + ( N - J + 1 )
+   80          CONTINUE
+            END IF
+         END IF
+      ELSE
+*
+*        Form  x := inv( A' )*x  or  x := inv( conjg( A' ) )*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            KK = 1
+            IF( INCX.EQ.1 )THEN
+               DO 110, J = 1, N
+                  TEMP = X( J )
+                  K    = KK
+                  IF( NOCONJ )THEN
+                     DO 90, I = 1, J - 1
+                        TEMP = TEMP - AP( K )*X( I )
+                        K    = K    + 1
+   90                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/AP( KK + J - 1 )
+                  ELSE
+                     DO 100, I = 1, J - 1
+                        TEMP = TEMP - DCONJG( AP( K ) )*X( I )
+                        K    = K    + 1
+  100                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/DCONJG( AP( KK + J - 1 ) )
+                  END IF
+                  X( J ) = TEMP
+                  KK     = KK   + J
+  110          CONTINUE
+            ELSE
+               JX = KX
+               DO 140, J = 1, N
+                  TEMP = X( JX )
+                  IX   = KX
+                  IF( NOCONJ )THEN
+                     DO 120, K = KK, KK + J - 2
+                        TEMP = TEMP - AP( K )*X( IX )
+                        IX   = IX   + INCX
+  120                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/AP( KK + J - 1 )
+                  ELSE
+                     DO 130, K = KK, KK + J - 2
+                        TEMP = TEMP - DCONJG( AP( K ) )*X( IX )
+                        IX   = IX   + INCX
+  130                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/DCONJG( AP( KK + J - 1 ) )
+                  END IF
+                  X( JX ) = TEMP
+                  JX      = JX   + INCX
+                  KK      = KK   + J
+  140          CONTINUE
+            END IF
+         ELSE
+            KK = ( N*( N + 1 ) )/2
+            IF( INCX.EQ.1 )THEN
+               DO 170, J = N, 1, -1
+                  TEMP = X( J )
+                  K    = KK
+                  IF( NOCONJ )THEN
+                     DO 150, I = N, J + 1, -1
+                        TEMP = TEMP - AP( K )*X( I )
+                        K    = K    - 1
+  150                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/AP( KK - N + J )
+                  ELSE
+                     DO 160, I = N, J + 1, -1
+                        TEMP = TEMP - DCONJG( AP( K ) )*X( I )
+                        K    = K    - 1
+  160                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/DCONJG( AP( KK - N + J ) )
+                  END IF
+                  X( J ) = TEMP
+                  KK     = KK   - ( N - J + 1 )
+  170          CONTINUE
+            ELSE
+               KX = KX + ( N - 1 )*INCX
+               JX = KX
+               DO 200, J = N, 1, -1
+                  TEMP = X( JX )
+                  IX   = KX
+                  IF( NOCONJ )THEN
+                     DO 180, K = KK, KK - ( N - ( J + 1 ) ), -1
+                        TEMP = TEMP - AP( K )*X( IX )
+                        IX   = IX   - INCX
+  180                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/AP( KK - N + J )
+                  ELSE
+                     DO 190, K = KK, KK - ( N - ( J + 1 ) ), -1
+                        TEMP = TEMP - DCONJG( AP( K ) )*X( IX )
+                        IX   = IX   - INCX
+  190                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/DCONJG( AP( KK - N + J ) )
+                  END IF
+                  X( JX ) = TEMP
+                  JX      = JX   - INCX
+                  KK      = KK   - ( N - J + 1 )
+  200          CONTINUE
+            END IF
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of ZTPSV .
+*
+      END
diff --git a/costa/native/external/blas/ztrmm.f b/costa/native/external/blas/ztrmm.f
new file mode 100644
index 000000000..30910d1da
--- /dev/null
+++ b/costa/native/external/blas/ztrmm.f
@@ -0,0 +1,392 @@
+      SUBROUTINE ZTRMM ( SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, A, LDA,
+     $                   B, LDB )
+*     .. Scalar Arguments ..
+      CHARACTER*1        SIDE, UPLO, TRANSA, DIAG
+      INTEGER            M, N, LDA, LDB
+      COMPLEX*16         ALPHA
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZTRMM  performs one of the matrix-matrix operations
+*
+*     B := alpha*op( A )*B,   or   B := alpha*B*op( A )
+*
+*  where  alpha  is a scalar,  B  is an m by n matrix,  A  is a unit, or
+*  non-unit,  upper or lower triangular matrix  and  op( A )  is one  of
+*
+*     op( A ) = A   or   op( A ) = A'   or   op( A ) = conjg( A' ).
+*
+*  Parameters
+*  ==========
+*
+*  SIDE   - CHARACTER*1.
+*           On entry,  SIDE specifies whether  op( A ) multiplies B from
+*           the left or right as follows:
+*
+*              SIDE = 'L' or 'l'   B := alpha*op( A )*B.
+*
+*              SIDE = 'R' or 'r'   B := alpha*B*op( A ).
+*
+*           Unchanged on exit.
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the matrix A is an upper or
+*           lower triangular matrix as follows:
+*
+*              UPLO = 'U' or 'u'   A is an upper triangular matrix.
+*
+*              UPLO = 'L' or 'l'   A is a lower triangular matrix.
+*
+*           Unchanged on exit.
+*
+*  TRANSA - CHARACTER*1.
+*           On entry, TRANSA specifies the form of op( A ) to be used in
+*           the matrix multiplication as follows:
+*
+*              TRANSA = 'N' or 'n'   op( A ) = A.
+*
+*              TRANSA = 'T' or 't'   op( A ) = A'.
+*
+*              TRANSA = 'C' or 'c'   op( A ) = conjg( A' ).
+*
+*           Unchanged on exit.
+*
+*  DIAG   - CHARACTER*1.
+*           On entry, DIAG specifies whether or not A is unit triangular
+*           as follows:
+*
+*              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
+*
+*              DIAG = 'N' or 'n'   A is not assumed to be unit
+*                                  triangular.
+*
+*           Unchanged on exit.
+*
+*  M      - INTEGER.
+*           On entry, M specifies the number of rows of B. M must be at
+*           least zero.
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the number of columns of B.  N must be
+*           at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - COMPLEX*16      .
+*           On entry,  ALPHA specifies the scalar  alpha. When  alpha is
+*           zero then  A is not referenced and  B need not be set before
+*           entry.
+*           Unchanged on exit.
+*
+*  A      - COMPLEX*16       array of DIMENSION ( LDA, k ), where k is m
+*           when  SIDE = 'L' or 'l'  and is  n  when  SIDE = 'R' or 'r'.
+*           Before entry  with  UPLO = 'U' or 'u',  the  leading  k by k
+*           upper triangular part of the array  A must contain the upper
+*           triangular matrix  and the strictly lower triangular part of
+*           A is not referenced.
+*           Before entry  with  UPLO = 'L' or 'l',  the  leading  k by k
+*           lower triangular part of the array  A must contain the lower
+*           triangular matrix  and the strictly upper triangular part of
+*           A is not referenced.
+*           Note that when  DIAG = 'U' or 'u',  the diagonal elements of
+*           A  are not referenced either,  but are assumed to be  unity.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program.  When  SIDE = 'L' or 'l'  then
+*           LDA  must be at least  max( 1, m ),  when  SIDE = 'R' or 'r'
+*           then LDA must be at least max( 1, n ).
+*           Unchanged on exit.
+*
+*  B      - COMPLEX*16       array of DIMENSION ( LDB, n ).
+*           Before entry,  the leading  m by n part of the array  B must
+*           contain the matrix  B,  and  on exit  is overwritten  by the
+*           transformed matrix.
+*
+*  LDB    - INTEGER.
+*           On entry, LDB specifies the first dimension of B as declared
+*           in  the  calling  (sub)  program.   LDB  must  be  at  least
+*           max( 1, m ).
+*           Unchanged on exit.
+*
+*
+*  Level 3 Blas routine.
+*
+*  -- Written on 8-February-1989.
+*     Jack Dongarra, Argonne National Laboratory.
+*     Iain Duff, AERE Harwell.
+*     Jeremy Du Croz, Numerical Algorithms Group Ltd.
+*     Sven Hammarling, Numerical Algorithms Group Ltd.
+*
+*
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          DCONJG, MAX
+*     .. Local Scalars ..
+      LOGICAL            LSIDE, NOCONJ, NOUNIT, UPPER
+      INTEGER            I, INFO, J, K, NROWA
+      COMPLEX*16         TEMP
+*     .. Parameters ..
+      COMPLEX*16         ONE
+      PARAMETER        ( ONE  = ( 1.0D+0, 0.0D+0 ) )
+      COMPLEX*16         ZERO
+      PARAMETER        ( ZERO = ( 0.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      LSIDE  = LSAME( SIDE  , 'L' )
+      IF( LSIDE )THEN
+         NROWA = M
+      ELSE
+         NROWA = N
+      END IF
+      NOCONJ = LSAME( TRANSA, 'T' )
+      NOUNIT = LSAME( DIAG  , 'N' )
+      UPPER  = LSAME( UPLO  , 'U' )
+*
+      INFO   = 0
+      IF(      ( .NOT.LSIDE                ).AND.
+     $         ( .NOT.LSAME( SIDE  , 'R' ) )      )THEN
+         INFO = 1
+      ELSE IF( ( .NOT.UPPER                ).AND.
+     $         ( .NOT.LSAME( UPLO  , 'L' ) )      )THEN
+         INFO = 2
+      ELSE IF( ( .NOT.LSAME( TRANSA, 'N' ) ).AND.
+     $         ( .NOT.LSAME( TRANSA, 'T' ) ).AND.
+     $         ( .NOT.LSAME( TRANSA, 'C' ) )      )THEN
+         INFO = 3
+      ELSE IF( ( .NOT.LSAME( DIAG  , 'U' ) ).AND.
+     $         ( .NOT.LSAME( DIAG  , 'N' ) )      )THEN
+         INFO = 4
+      ELSE IF( M  .LT.0               )THEN
+         INFO = 5
+      ELSE IF( N  .LT.0               )THEN
+         INFO = 6
+      ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN
+         INFO = 9
+      ELSE IF( LDB.LT.MAX( 1, M     ) )THEN
+         INFO = 11
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'ZTRMM ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     And when  alpha.eq.zero.
+*
+      IF( ALPHA.EQ.ZERO )THEN
+         DO 20, J = 1, N
+            DO 10, I = 1, M
+               B( I, J ) = ZERO
+   10       CONTINUE
+   20    CONTINUE
+         RETURN
+      END IF
+*
+*     Start the operations.
+*
+      IF( LSIDE )THEN
+         IF( LSAME( TRANSA, 'N' ) )THEN
+*
+*           Form  B := alpha*A*B.
+*
+            IF( UPPER )THEN
+               DO 50, J = 1, N
+                  DO 40, K = 1, M
+                     IF( B( K, J ).NE.ZERO )THEN
+                        TEMP = ALPHA*B( K, J )
+                        DO 30, I = 1, K - 1
+                           B( I, J ) = B( I, J ) + TEMP*A( I, K )
+   30                   CONTINUE
+                        IF( NOUNIT )
+     $                     TEMP = TEMP*A( K, K )
+                        B( K, J ) = TEMP
+                     END IF
+   40             CONTINUE
+   50          CONTINUE
+            ELSE
+               DO 80, J = 1, N
+                  DO 70 K = M, 1, -1
+                     IF( B( K, J ).NE.ZERO )THEN
+                        TEMP      = ALPHA*B( K, J )
+                        B( K, J ) = TEMP
+                        IF( NOUNIT )
+     $                     B( K, J ) = B( K, J )*A( K, K )
+                        DO 60, I = K + 1, M
+                           B( I, J ) = B( I, J ) + TEMP*A( I, K )
+   60                   CONTINUE
+                     END IF
+   70             CONTINUE
+   80          CONTINUE
+            END IF
+         ELSE
+*
+*           Form  B := alpha*A'*B   or   B := alpha*conjg( A' )*B.
+*
+            IF( UPPER )THEN
+               DO 120, J = 1, N
+                  DO 110, I = M, 1, -1
+                     TEMP = B( I, J )
+                     IF( NOCONJ )THEN
+                        IF( NOUNIT )
+     $                     TEMP = TEMP*A( I, I )
+                        DO 90, K = 1, I - 1
+                           TEMP = TEMP + A( K, I )*B( K, J )
+   90                   CONTINUE
+                     ELSE
+                        IF( NOUNIT )
+     $                     TEMP = TEMP*DCONJG( A( I, I ) )
+                        DO 100, K = 1, I - 1
+                           TEMP = TEMP + DCONJG( A( K, I ) )*B( K, J )
+  100                   CONTINUE
+                     END IF
+                     B( I, J ) = ALPHA*TEMP
+  110             CONTINUE
+  120          CONTINUE
+            ELSE
+               DO 160, J = 1, N
+                  DO 150, I = 1, M
+                     TEMP = B( I, J )
+                     IF( NOCONJ )THEN
+                        IF( NOUNIT )
+     $                     TEMP = TEMP*A( I, I )
+                        DO 130, K = I + 1, M
+                           TEMP = TEMP + A( K, I )*B( K, J )
+  130                   CONTINUE
+                     ELSE
+                        IF( NOUNIT )
+     $                     TEMP = TEMP*DCONJG( A( I, I ) )
+                        DO 140, K = I + 1, M
+                           TEMP = TEMP + DCONJG( A( K, I ) )*B( K, J )
+  140                   CONTINUE
+                     END IF
+                     B( I, J ) = ALPHA*TEMP
+  150             CONTINUE
+  160          CONTINUE
+            END IF
+         END IF
+      ELSE
+         IF( LSAME( TRANSA, 'N' ) )THEN
+*
+*           Form  B := alpha*B*A.
+*
+            IF( UPPER )THEN
+               DO 200, J = N, 1, -1
+                  TEMP = ALPHA
+                  IF( NOUNIT )
+     $               TEMP = TEMP*A( J, J )
+                  DO 170, I = 1, M
+                     B( I, J ) = TEMP*B( I, J )
+  170             CONTINUE
+                  DO 190, K = 1, J - 1
+                     IF( A( K, J ).NE.ZERO )THEN
+                        TEMP = ALPHA*A( K, J )
+                        DO 180, I = 1, M
+                           B( I, J ) = B( I, J ) + TEMP*B( I, K )
+  180                   CONTINUE
+                     END IF
+  190             CONTINUE
+  200          CONTINUE
+            ELSE
+               DO 240, J = 1, N
+                  TEMP = ALPHA
+                  IF( NOUNIT )
+     $               TEMP = TEMP*A( J, J )
+                  DO 210, I = 1, M
+                     B( I, J ) = TEMP*B( I, J )
+  210             CONTINUE
+                  DO 230, K = J + 1, N
+                     IF( A( K, J ).NE.ZERO )THEN
+                        TEMP = ALPHA*A( K, J )
+                        DO 220, I = 1, M
+                           B( I, J ) = B( I, J ) + TEMP*B( I, K )
+  220                   CONTINUE
+                     END IF
+  230             CONTINUE
+  240          CONTINUE
+            END IF
+         ELSE
+*
+*           Form  B := alpha*B*A'   or   B := alpha*B*conjg( A' ).
+*
+            IF( UPPER )THEN
+               DO 280, K = 1, N
+                  DO 260, J = 1, K - 1
+                     IF( A( J, K ).NE.ZERO )THEN
+                        IF( NOCONJ )THEN
+                           TEMP = ALPHA*A( J, K )
+                        ELSE
+                           TEMP = ALPHA*DCONJG( A( J, K ) )
+                        END IF
+                        DO 250, I = 1, M
+                           B( I, J ) = B( I, J ) + TEMP*B( I, K )
+  250                   CONTINUE
+                     END IF
+  260             CONTINUE
+                  TEMP = ALPHA
+                  IF( NOUNIT )THEN
+                     IF( NOCONJ )THEN
+                        TEMP = TEMP*A( K, K )
+                     ELSE
+                        TEMP = TEMP*DCONJG( A( K, K ) )
+                     END IF
+                  END IF
+                  IF( TEMP.NE.ONE )THEN
+                     DO 270, I = 1, M
+                        B( I, K ) = TEMP*B( I, K )
+  270                CONTINUE
+                  END IF
+  280          CONTINUE
+            ELSE
+               DO 320, K = N, 1, -1
+                  DO 300, J = K + 1, N
+                     IF( A( J, K ).NE.ZERO )THEN
+                        IF( NOCONJ )THEN
+                           TEMP = ALPHA*A( J, K )
+                        ELSE
+                           TEMP = ALPHA*DCONJG( A( J, K ) )
+                        END IF
+                        DO 290, I = 1, M
+                           B( I, J ) = B( I, J ) + TEMP*B( I, K )
+  290                   CONTINUE
+                     END IF
+  300             CONTINUE
+                  TEMP = ALPHA
+                  IF( NOUNIT )THEN
+                     IF( NOCONJ )THEN
+                        TEMP = TEMP*A( K, K )
+                     ELSE
+                        TEMP = TEMP*DCONJG( A( K, K ) )
+                     END IF
+                  END IF
+                  IF( TEMP.NE.ONE )THEN
+                     DO 310, I = 1, M
+                        B( I, K ) = TEMP*B( I, K )
+  310                CONTINUE
+                  END IF
+  320          CONTINUE
+            END IF
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of ZTRMM .
+*
+      END
diff --git a/costa/native/external/blas/ztrmv.f b/costa/native/external/blas/ztrmv.f
new file mode 100644
index 000000000..677e212b5
--- /dev/null
+++ b/costa/native/external/blas/ztrmv.f
@@ -0,0 +1,321 @@
+      SUBROUTINE ZTRMV ( UPLO, TRANS, DIAG, N, A, LDA, X, INCX )
+*     .. Scalar Arguments ..
+      INTEGER            INCX, LDA, N
+      CHARACTER*1        DIAG, TRANS, UPLO
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZTRMV  performs one of the matrix-vector operations
+*
+*     x := A*x,   or   x := A'*x,   or   x := conjg( A' )*x,
+*
+*  where x is an n element vector and  A is an n by n unit, or non-unit,
+*  upper or lower triangular matrix.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the matrix is an upper or
+*           lower triangular matrix as follows:
+*
+*              UPLO = 'U' or 'u'   A is an upper triangular matrix.
+*
+*              UPLO = 'L' or 'l'   A is a lower triangular matrix.
+*
+*           Unchanged on exit.
+*
+*  TRANS  - CHARACTER*1.
+*           On entry, TRANS specifies the operation to be performed as
+*           follows:
+*
+*              TRANS = 'N' or 'n'   x := A*x.
+*
+*              TRANS = 'T' or 't'   x := A'*x.
+*
+*              TRANS = 'C' or 'c'   x := conjg( A' )*x.
+*
+*           Unchanged on exit.
+*
+*  DIAG   - CHARACTER*1.
+*           On entry, DIAG specifies whether or not A is unit
+*           triangular as follows:
+*
+*              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
+*
+*              DIAG = 'N' or 'n'   A is not assumed to be unit
+*                                  triangular.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  A      - COMPLEX*16       array of DIMENSION ( LDA, n ).
+*           Before entry with  UPLO = 'U' or 'u', the leading n by n
+*           upper triangular part of the array A must contain the upper
+*           triangular matrix and the strictly lower triangular part of
+*           A is not referenced.
+*           Before entry with UPLO = 'L' or 'l', the leading n by n
+*           lower triangular part of the array A must contain the lower
+*           triangular matrix and the strictly upper triangular part of
+*           A is not referenced.
+*           Note that when  DIAG = 'U' or 'u', the diagonal elements of
+*           A are not referenced either, but are assumed to be unity.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           max( 1, n ).
+*           Unchanged on exit.
+*
+*  X      - COMPLEX*16       array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element vector x. On exit, X is overwritten with the
+*           tranformed vector x.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      COMPLEX*16         ZERO
+      PARAMETER        ( ZERO = ( 0.0D+0, 0.0D+0 ) )
+*     .. Local Scalars ..
+      COMPLEX*16         TEMP
+      INTEGER            I, INFO, IX, J, JX, KX
+      LOGICAL            NOCONJ, NOUNIT
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          DCONJG, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO , 'U' ).AND.
+     $         .NOT.LSAME( UPLO , 'L' )      )THEN
+         INFO = 1
+      ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND.
+     $         .NOT.LSAME( TRANS, 'T' ).AND.
+     $         .NOT.LSAME( TRANS, 'C' )      )THEN
+         INFO = 2
+      ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND.
+     $         .NOT.LSAME( DIAG , 'N' )      )THEN
+         INFO = 3
+      ELSE IF( N.LT.0 )THEN
+         INFO = 4
+      ELSE IF( LDA.LT.MAX( 1, N ) )THEN
+         INFO = 6
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 8
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'ZTRMV ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      NOCONJ = LSAME( TRANS, 'T' )
+      NOUNIT = LSAME( DIAG , 'N' )
+*
+*     Set up the start point in X if the increment is not unity. This
+*     will be  ( N - 1 )*INCX  too small for descending loops.
+*
+      IF( INCX.LE.0 )THEN
+         KX = 1 - ( N - 1 )*INCX
+      ELSE IF( INCX.NE.1 )THEN
+         KX = 1
+      END IF
+*
+*     Start the operations. In this version the elements of A are
+*     accessed sequentially with one pass through A.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+*
+*        Form  x := A*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            IF( INCX.EQ.1 )THEN
+               DO 20, J = 1, N
+                  IF( X( J ).NE.ZERO )THEN
+                     TEMP = X( J )
+                     DO 10, I = 1, J - 1
+                        X( I ) = X( I ) + TEMP*A( I, J )
+   10                CONTINUE
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )*A( J, J )
+                  END IF
+   20          CONTINUE
+            ELSE
+               JX = KX
+               DO 40, J = 1, N
+                  IF( X( JX ).NE.ZERO )THEN
+                     TEMP = X( JX )
+                     IX   = KX
+                     DO 30, I = 1, J - 1
+                        X( IX ) = X( IX ) + TEMP*A( I, J )
+                        IX      = IX      + INCX
+   30                CONTINUE
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )*A( J, J )
+                  END IF
+                  JX = JX + INCX
+   40          CONTINUE
+            END IF
+         ELSE
+            IF( INCX.EQ.1 )THEN
+               DO 60, J = N, 1, -1
+                  IF( X( J ).NE.ZERO )THEN
+                     TEMP = X( J )
+                     DO 50, I = N, J + 1, -1
+                        X( I ) = X( I ) + TEMP*A( I, J )
+   50                CONTINUE
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )*A( J, J )
+                  END IF
+   60          CONTINUE
+            ELSE
+               KX = KX + ( N - 1 )*INCX
+               JX = KX
+               DO 80, J = N, 1, -1
+                  IF( X( JX ).NE.ZERO )THEN
+                     TEMP = X( JX )
+                     IX   = KX
+                     DO 70, I = N, J + 1, -1
+                        X( IX ) = X( IX ) + TEMP*A( I, J )
+                        IX      = IX      - INCX
+   70                CONTINUE
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )*A( J, J )
+                  END IF
+                  JX = JX - INCX
+   80          CONTINUE
+            END IF
+         END IF
+      ELSE
+*
+*        Form  x := A'*x  or  x := conjg( A' )*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            IF( INCX.EQ.1 )THEN
+               DO 110, J = N, 1, -1
+                  TEMP = X( J )
+                  IF( NOCONJ )THEN
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*A( J, J )
+                     DO 90, I = J - 1, 1, -1
+                        TEMP = TEMP + A( I, J )*X( I )
+   90                CONTINUE
+                  ELSE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*DCONJG( A( J, J ) )
+                     DO 100, I = J - 1, 1, -1
+                        TEMP = TEMP + DCONJG( A( I, J ) )*X( I )
+  100                CONTINUE
+                  END IF
+                  X( J ) = TEMP
+  110          CONTINUE
+            ELSE
+               JX = KX + ( N - 1 )*INCX
+               DO 140, J = N, 1, -1
+                  TEMP = X( JX )
+                  IX   = JX
+                  IF( NOCONJ )THEN
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*A( J, J )
+                     DO 120, I = J - 1, 1, -1
+                        IX   = IX   - INCX
+                        TEMP = TEMP + A( I, J )*X( IX )
+  120                CONTINUE
+                  ELSE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*DCONJG( A( J, J ) )
+                     DO 130, I = J - 1, 1, -1
+                        IX   = IX   - INCX
+                        TEMP = TEMP + DCONJG( A( I, J ) )*X( IX )
+  130                CONTINUE
+                  END IF
+                  X( JX ) = TEMP
+                  JX      = JX   - INCX
+  140          CONTINUE
+            END IF
+         ELSE
+            IF( INCX.EQ.1 )THEN
+               DO 170, J = 1, N
+                  TEMP = X( J )
+                  IF( NOCONJ )THEN
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*A( J, J )
+                     DO 150, I = J + 1, N
+                        TEMP = TEMP + A( I, J )*X( I )
+  150                CONTINUE
+                  ELSE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*DCONJG( A( J, J ) )
+                     DO 160, I = J + 1, N
+                        TEMP = TEMP + DCONJG( A( I, J ) )*X( I )
+  160                CONTINUE
+                  END IF
+                  X( J ) = TEMP
+  170          CONTINUE
+            ELSE
+               JX = KX
+               DO 200, J = 1, N
+                  TEMP = X( JX )
+                  IX   = JX
+                  IF( NOCONJ )THEN
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*A( J, J )
+                     DO 180, I = J + 1, N
+                        IX   = IX   + INCX
+                        TEMP = TEMP + A( I, J )*X( IX )
+  180                CONTINUE
+                  ELSE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP*DCONJG( A( J, J ) )
+                     DO 190, I = J + 1, N
+                        IX   = IX   + INCX
+                        TEMP = TEMP + DCONJG( A( I, J ) )*X( IX )
+  190                CONTINUE
+                  END IF
+                  X( JX ) = TEMP
+                  JX      = JX   + INCX
+  200          CONTINUE
+            END IF
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of ZTRMV .
+*
+      END
diff --git a/costa/native/external/blas/ztrsm.f b/costa/native/external/blas/ztrsm.f
new file mode 100644
index 000000000..e414ec666
--- /dev/null
+++ b/costa/native/external/blas/ztrsm.f
@@ -0,0 +1,414 @@
+      SUBROUTINE ZTRSM ( SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, A, LDA,
+     $                   B, LDB )
+*     .. Scalar Arguments ..
+      CHARACTER*1        SIDE, UPLO, TRANSA, DIAG
+      INTEGER            M, N, LDA, LDB
+      COMPLEX*16         ALPHA
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZTRSM  solves one of the matrix equations
+*
+*     op( A )*X = alpha*B,   or   X*op( A ) = alpha*B,
+*
+*  where alpha is a scalar, X and B are m by n matrices, A is a unit, or
+*  non-unit,  upper or lower triangular matrix  and  op( A )  is one  of
+*
+*     op( A ) = A   or   op( A ) = A'   or   op( A ) = conjg( A' ).
+*
+*  The matrix X is overwritten on B.
+*
+*  Parameters
+*  ==========
+*
+*  SIDE   - CHARACTER*1.
+*           On entry, SIDE specifies whether op( A ) appears on the left
+*           or right of X as follows:
+*
+*              SIDE = 'L' or 'l'   op( A )*X = alpha*B.
+*
+*              SIDE = 'R' or 'r'   X*op( A ) = alpha*B.
+*
+*           Unchanged on exit.
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the matrix A is an upper or
+*           lower triangular matrix as follows:
+*
+*              UPLO = 'U' or 'u'   A is an upper triangular matrix.
+*
+*              UPLO = 'L' or 'l'   A is a lower triangular matrix.
+*
+*           Unchanged on exit.
+*
+*  TRANSA - CHARACTER*1.
+*           On entry, TRANSA specifies the form of op( A ) to be used in
+*           the matrix multiplication as follows:
+*
+*              TRANSA = 'N' or 'n'   op( A ) = A.
+*
+*              TRANSA = 'T' or 't'   op( A ) = A'.
+*
+*              TRANSA = 'C' or 'c'   op( A ) = conjg( A' ).
+*
+*           Unchanged on exit.
+*
+*  DIAG   - CHARACTER*1.
+*           On entry, DIAG specifies whether or not A is unit triangular
+*           as follows:
+*
+*              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
+*
+*              DIAG = 'N' or 'n'   A is not assumed to be unit
+*                                  triangular.
+*
+*           Unchanged on exit.
+*
+*  M      - INTEGER.
+*           On entry, M specifies the number of rows of B. M must be at
+*           least zero.
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the number of columns of B.  N must be
+*           at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - COMPLEX*16      .
+*           On entry,  ALPHA specifies the scalar  alpha. When  alpha is
+*           zero then  A is not referenced and  B need not be set before
+*           entry.
+*           Unchanged on exit.
+*
+*  A      - COMPLEX*16       array of DIMENSION ( LDA, k ), where k is m
+*           when  SIDE = 'L' or 'l'  and is  n  when  SIDE = 'R' or 'r'.
+*           Before entry  with  UPLO = 'U' or 'u',  the  leading  k by k
+*           upper triangular part of the array  A must contain the upper
+*           triangular matrix  and the strictly lower triangular part of
+*           A is not referenced.
+*           Before entry  with  UPLO = 'L' or 'l',  the  leading  k by k
+*           lower triangular part of the array  A must contain the lower
+*           triangular matrix  and the strictly upper triangular part of
+*           A is not referenced.
+*           Note that when  DIAG = 'U' or 'u',  the diagonal elements of
+*           A  are not referenced either,  but are assumed to be  unity.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program.  When  SIDE = 'L' or 'l'  then
+*           LDA  must be at least  max( 1, m ),  when  SIDE = 'R' or 'r'
+*           then LDA must be at least max( 1, n ).
+*           Unchanged on exit.
+*
+*  B      - COMPLEX*16       array of DIMENSION ( LDB, n ).
+*           Before entry,  the leading  m by n part of the array  B must
+*           contain  the  right-hand  side  matrix  B,  and  on exit  is
+*           overwritten by the solution matrix  X.
+*
+*  LDB    - INTEGER.
+*           On entry, LDB specifies the first dimension of B as declared
+*           in  the  calling  (sub)  program.   LDB  must  be  at  least
+*           max( 1, m ).
+*           Unchanged on exit.
+*
+*
+*  Level 3 Blas routine.
+*
+*  -- Written on 8-February-1989.
+*     Jack Dongarra, Argonne National Laboratory.
+*     Iain Duff, AERE Harwell.
+*     Jeremy Du Croz, Numerical Algorithms Group Ltd.
+*     Sven Hammarling, Numerical Algorithms Group Ltd.
+*
+*
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          DCONJG, MAX
+*     .. Local Scalars ..
+      LOGICAL            LSIDE, NOCONJ, NOUNIT, UPPER
+      INTEGER            I, INFO, J, K, NROWA
+      COMPLEX*16         TEMP
+*     .. Parameters ..
+      COMPLEX*16         ONE
+      PARAMETER        ( ONE  = ( 1.0D+0, 0.0D+0 ) )
+      COMPLEX*16         ZERO
+      PARAMETER        ( ZERO = ( 0.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      LSIDE  = LSAME( SIDE  , 'L' )
+      IF( LSIDE )THEN
+         NROWA = M
+      ELSE
+         NROWA = N
+      END IF
+      NOCONJ = LSAME( TRANSA, 'T' )
+      NOUNIT = LSAME( DIAG  , 'N' )
+      UPPER  = LSAME( UPLO  , 'U' )
+*
+      INFO   = 0
+      IF(      ( .NOT.LSIDE                ).AND.
+     $         ( .NOT.LSAME( SIDE  , 'R' ) )      )THEN
+         INFO = 1
+      ELSE IF( ( .NOT.UPPER                ).AND.
+     $         ( .NOT.LSAME( UPLO  , 'L' ) )      )THEN
+         INFO = 2
+      ELSE IF( ( .NOT.LSAME( TRANSA, 'N' ) ).AND.
+     $         ( .NOT.LSAME( TRANSA, 'T' ) ).AND.
+     $         ( .NOT.LSAME( TRANSA, 'C' ) )      )THEN
+         INFO = 3
+      ELSE IF( ( .NOT.LSAME( DIAG  , 'U' ) ).AND.
+     $         ( .NOT.LSAME( DIAG  , 'N' ) )      )THEN
+         INFO = 4
+      ELSE IF( M  .LT.0               )THEN
+         INFO = 5
+      ELSE IF( N  .LT.0               )THEN
+         INFO = 6
+      ELSE IF( LDA.LT.MAX( 1, NROWA ) )THEN
+         INFO = 9
+      ELSE IF( LDB.LT.MAX( 1, M     ) )THEN
+         INFO = 11
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'ZTRSM ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     And when  alpha.eq.zero.
+*
+      IF( ALPHA.EQ.ZERO )THEN
+         DO 20, J = 1, N
+            DO 10, I = 1, M
+               B( I, J ) = ZERO
+   10       CONTINUE
+   20    CONTINUE
+         RETURN
+      END IF
+*
+*     Start the operations.
+*
+      IF( LSIDE )THEN
+         IF( LSAME( TRANSA, 'N' ) )THEN
+*
+*           Form  B := alpha*inv( A )*B.
+*
+            IF( UPPER )THEN
+               DO 60, J = 1, N
+                  IF( ALPHA.NE.ONE )THEN
+                     DO 30, I = 1, M
+                        B( I, J ) = ALPHA*B( I, J )
+   30                CONTINUE
+                  END IF
+                  DO 50, K = M, 1, -1
+                     IF( B( K, J ).NE.ZERO )THEN
+                        IF( NOUNIT )
+     $                     B( K, J ) = B( K, J )/A( K, K )
+                        DO 40, I = 1, K - 1
+                           B( I, J ) = B( I, J ) - B( K, J )*A( I, K )
+   40                   CONTINUE
+                     END IF
+   50             CONTINUE
+   60          CONTINUE
+            ELSE
+               DO 100, J = 1, N
+                  IF( ALPHA.NE.ONE )THEN
+                     DO 70, I = 1, M
+                        B( I, J ) = ALPHA*B( I, J )
+   70                CONTINUE
+                  END IF
+                  DO 90 K = 1, M
+                     IF( B( K, J ).NE.ZERO )THEN
+                        IF( NOUNIT )
+     $                     B( K, J ) = B( K, J )/A( K, K )
+                        DO 80, I = K + 1, M
+                           B( I, J ) = B( I, J ) - B( K, J )*A( I, K )
+   80                   CONTINUE
+                     END IF
+   90             CONTINUE
+  100          CONTINUE
+            END IF
+         ELSE
+*
+*           Form  B := alpha*inv( A' )*B
+*           or    B := alpha*inv( conjg( A' ) )*B.
+*
+            IF( UPPER )THEN
+               DO 140, J = 1, N
+                  DO 130, I = 1, M
+                     TEMP = ALPHA*B( I, J )
+                     IF( NOCONJ )THEN
+                        DO 110, K = 1, I - 1
+                           TEMP = TEMP - A( K, I )*B( K, J )
+  110                   CONTINUE
+                        IF( NOUNIT )
+     $                     TEMP = TEMP/A( I, I )
+                     ELSE
+                        DO 120, K = 1, I - 1
+                           TEMP = TEMP - DCONJG( A( K, I ) )*B( K, J )
+  120                   CONTINUE
+                        IF( NOUNIT )
+     $                     TEMP = TEMP/DCONJG( A( I, I ) )
+                     END IF
+                     B( I, J ) = TEMP
+  130             CONTINUE
+  140          CONTINUE
+            ELSE
+               DO 180, J = 1, N
+                  DO 170, I = M, 1, -1
+                     TEMP = ALPHA*B( I, J )
+                     IF( NOCONJ )THEN
+                        DO 150, K = I + 1, M
+                           TEMP = TEMP - A( K, I )*B( K, J )
+  150                   CONTINUE
+                        IF( NOUNIT )
+     $                     TEMP = TEMP/A( I, I )
+                     ELSE
+                        DO 160, K = I + 1, M
+                           TEMP = TEMP - DCONJG( A( K, I ) )*B( K, J )
+  160                   CONTINUE
+                        IF( NOUNIT )
+     $                     TEMP = TEMP/DCONJG( A( I, I ) )
+                     END IF
+                     B( I, J ) = TEMP
+  170             CONTINUE
+  180          CONTINUE
+            END IF
+         END IF
+      ELSE
+         IF( LSAME( TRANSA, 'N' ) )THEN
+*
+*           Form  B := alpha*B*inv( A ).
+*
+            IF( UPPER )THEN
+               DO 230, J = 1, N
+                  IF( ALPHA.NE.ONE )THEN
+                     DO 190, I = 1, M
+                        B( I, J ) = ALPHA*B( I, J )
+  190                CONTINUE
+                  END IF
+                  DO 210, K = 1, J - 1
+                     IF( A( K, J ).NE.ZERO )THEN
+                        DO 200, I = 1, M
+                           B( I, J ) = B( I, J ) - A( K, J )*B( I, K )
+  200                   CONTINUE
+                     END IF
+  210             CONTINUE
+                  IF( NOUNIT )THEN
+                     TEMP = ONE/A( J, J )
+                     DO 220, I = 1, M
+                        B( I, J ) = TEMP*B( I, J )
+  220                CONTINUE
+                  END IF
+  230          CONTINUE
+            ELSE
+               DO 280, J = N, 1, -1
+                  IF( ALPHA.NE.ONE )THEN
+                     DO 240, I = 1, M
+                        B( I, J ) = ALPHA*B( I, J )
+  240                CONTINUE
+                  END IF
+                  DO 260, K = J + 1, N
+                     IF( A( K, J ).NE.ZERO )THEN
+                        DO 250, I = 1, M
+                           B( I, J ) = B( I, J ) - A( K, J )*B( I, K )
+  250                   CONTINUE
+                     END IF
+  260             CONTINUE
+                  IF( NOUNIT )THEN
+                     TEMP = ONE/A( J, J )
+                     DO 270, I = 1, M
+                       B( I, J ) = TEMP*B( I, J )
+  270                CONTINUE
+                  END IF
+  280          CONTINUE
+            END IF
+         ELSE
+*
+*           Form  B := alpha*B*inv( A' )
+*           or    B := alpha*B*inv( conjg( A' ) ).
+*
+            IF( UPPER )THEN
+               DO 330, K = N, 1, -1
+                  IF( NOUNIT )THEN
+                     IF( NOCONJ )THEN
+                        TEMP = ONE/A( K, K )
+                     ELSE
+                        TEMP = ONE/DCONJG( A( K, K ) )
+                     END IF
+                     DO 290, I = 1, M
+                        B( I, K ) = TEMP*B( I, K )
+  290                CONTINUE
+                  END IF
+                  DO 310, J = 1, K - 1
+                     IF( A( J, K ).NE.ZERO )THEN
+                        IF( NOCONJ )THEN
+                           TEMP = A( J, K )
+                        ELSE
+                           TEMP = DCONJG( A( J, K ) )
+                        END IF
+                        DO 300, I = 1, M
+                           B( I, J ) = B( I, J ) - TEMP*B( I, K )
+  300                   CONTINUE
+                     END IF
+  310             CONTINUE
+                  IF( ALPHA.NE.ONE )THEN
+                     DO 320, I = 1, M
+                        B( I, K ) = ALPHA*B( I, K )
+  320                CONTINUE
+                  END IF
+  330          CONTINUE
+            ELSE
+               DO 380, K = 1, N
+                  IF( NOUNIT )THEN
+                     IF( NOCONJ )THEN
+                        TEMP = ONE/A( K, K )
+                     ELSE
+                        TEMP = ONE/DCONJG( A( K, K ) )
+                     END IF
+                     DO 340, I = 1, M
+                        B( I, K ) = TEMP*B( I, K )
+  340                CONTINUE
+                  END IF
+                  DO 360, J = K + 1, N
+                     IF( A( J, K ).NE.ZERO )THEN
+                        IF( NOCONJ )THEN
+                           TEMP = A( J, K )
+                        ELSE
+                           TEMP = DCONJG( A( J, K ) )
+                        END IF
+                        DO 350, I = 1, M
+                           B( I, J ) = B( I, J ) - TEMP*B( I, K )
+  350                   CONTINUE
+                     END IF
+  360             CONTINUE
+                  IF( ALPHA.NE.ONE )THEN
+                     DO 370, I = 1, M
+                        B( I, K ) = ALPHA*B( I, K )
+  370                CONTINUE
+                  END IF
+  380          CONTINUE
+            END IF
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of ZTRSM .
+*
+      END
diff --git a/costa/native/external/blas/ztrsv.f b/costa/native/external/blas/ztrsv.f
new file mode 100644
index 000000000..d0a57c447
--- /dev/null
+++ b/costa/native/external/blas/ztrsv.f
@@ -0,0 +1,324 @@
+      SUBROUTINE ZTRSV ( UPLO, TRANS, DIAG, N, A, LDA, X, INCX )
+*     .. Scalar Arguments ..
+      INTEGER            INCX, LDA, N
+      CHARACTER*1        DIAG, TRANS, UPLO
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZTRSV  solves one of the systems of equations
+*
+*     A*x = b,   or   A'*x = b,   or   conjg( A' )*x = b,
+*
+*  where b and x are n element vectors and A is an n by n unit, or
+*  non-unit, upper or lower triangular matrix.
+*
+*  No test for singularity or near-singularity is included in this
+*  routine. Such tests must be performed before calling this routine.
+*
+*  Parameters
+*  ==========
+*
+*  UPLO   - CHARACTER*1.
+*           On entry, UPLO specifies whether the matrix is an upper or
+*           lower triangular matrix as follows:
+*
+*              UPLO = 'U' or 'u'   A is an upper triangular matrix.
+*
+*              UPLO = 'L' or 'l'   A is a lower triangular matrix.
+*
+*           Unchanged on exit.
+*
+*  TRANS  - CHARACTER*1.
+*           On entry, TRANS specifies the equations to be solved as
+*           follows:
+*
+*              TRANS = 'N' or 'n'   A*x = b.
+*
+*              TRANS = 'T' or 't'   A'*x = b.
+*
+*              TRANS = 'C' or 'c'   conjg( A' )*x = b.
+*
+*           Unchanged on exit.
+*
+*  DIAG   - CHARACTER*1.
+*           On entry, DIAG specifies whether or not A is unit
+*           triangular as follows:
+*
+*              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
+*
+*              DIAG = 'N' or 'n'   A is not assumed to be unit
+*                                  triangular.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER.
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  A      - COMPLEX*16       array of DIMENSION ( LDA, n ).
+*           Before entry with  UPLO = 'U' or 'u', the leading n by n
+*           upper triangular part of the array A must contain the upper
+*           triangular matrix and the strictly lower triangular part of
+*           A is not referenced.
+*           Before entry with UPLO = 'L' or 'l', the leading n by n
+*           lower triangular part of the array A must contain the lower
+*           triangular matrix and the strictly upper triangular part of
+*           A is not referenced.
+*           Note that when  DIAG = 'U' or 'u', the diagonal elements of
+*           A are not referenced either, but are assumed to be unity.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER.
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           max( 1, n ).
+*           Unchanged on exit.
+*
+*  X      - COMPLEX*16       array of dimension at least
+*           ( 1 + ( n - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the n
+*           element right-hand side vector b. On exit, X is overwritten
+*           with the solution vector x.
+*
+*  INCX   - INTEGER.
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*
+*  Level 2 Blas routine.
+*
+*  -- Written on 22-October-1986.
+*     Jack Dongarra, Argonne National Lab.
+*     Jeremy Du Croz, Nag Central Office.
+*     Sven Hammarling, Nag Central Office.
+*     Richard Hanson, Sandia National Labs.
+*
+*
+*     .. Parameters ..
+      COMPLEX*16         ZERO
+      PARAMETER        ( ZERO = ( 0.0D+0, 0.0D+0 ) )
+*     .. Local Scalars ..
+      COMPLEX*16         TEMP
+      INTEGER            I, INFO, IX, J, JX, KX
+      LOGICAL            NOCONJ, NOUNIT
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     .. Intrinsic Functions ..
+      INTRINSIC          DCONJG, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF     ( .NOT.LSAME( UPLO , 'U' ).AND.
+     $         .NOT.LSAME( UPLO , 'L' )      )THEN
+         INFO = 1
+      ELSE IF( .NOT.LSAME( TRANS, 'N' ).AND.
+     $         .NOT.LSAME( TRANS, 'T' ).AND.
+     $         .NOT.LSAME( TRANS, 'C' )      )THEN
+         INFO = 2
+      ELSE IF( .NOT.LSAME( DIAG , 'U' ).AND.
+     $         .NOT.LSAME( DIAG , 'N' )      )THEN
+         INFO = 3
+      ELSE IF( N.LT.0 )THEN
+         INFO = 4
+      ELSE IF( LDA.LT.MAX( 1, N ) )THEN
+         INFO = 6
+      ELSE IF( INCX.EQ.0 )THEN
+         INFO = 8
+      END IF
+      IF( INFO.NE.0 )THEN
+         CALL XERBLA( 'ZTRSV ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      NOCONJ = LSAME( TRANS, 'T' )
+      NOUNIT = LSAME( DIAG , 'N' )
+*
+*     Set up the start point in X if the increment is not unity. This
+*     will be  ( N - 1 )*INCX  too small for descending loops.
+*
+      IF( INCX.LE.0 )THEN
+         KX = 1 - ( N - 1 )*INCX
+      ELSE IF( INCX.NE.1 )THEN
+         KX = 1
+      END IF
+*
+*     Start the operations. In this version the elements of A are
+*     accessed sequentially with one pass through A.
+*
+      IF( LSAME( TRANS, 'N' ) )THEN
+*
+*        Form  x := inv( A )*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            IF( INCX.EQ.1 )THEN
+               DO 20, J = N, 1, -1
+                  IF( X( J ).NE.ZERO )THEN
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )/A( J, J )
+                     TEMP = X( J )
+                     DO 10, I = J - 1, 1, -1
+                        X( I ) = X( I ) - TEMP*A( I, J )
+   10                CONTINUE
+                  END IF
+   20          CONTINUE
+            ELSE
+               JX = KX + ( N - 1 )*INCX
+               DO 40, J = N, 1, -1
+                  IF( X( JX ).NE.ZERO )THEN
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )/A( J, J )
+                     TEMP = X( JX )
+                     IX   = JX
+                     DO 30, I = J - 1, 1, -1
+                        IX      = IX      - INCX
+                        X( IX ) = X( IX ) - TEMP*A( I, J )
+   30                CONTINUE
+                  END IF
+                  JX = JX - INCX
+   40          CONTINUE
+            END IF
+         ELSE
+            IF( INCX.EQ.1 )THEN
+               DO 60, J = 1, N
+                  IF( X( J ).NE.ZERO )THEN
+                     IF( NOUNIT )
+     $                  X( J ) = X( J )/A( J, J )
+                     TEMP = X( J )
+                     DO 50, I = J + 1, N
+                        X( I ) = X( I ) - TEMP*A( I, J )
+   50                CONTINUE
+                  END IF
+   60          CONTINUE
+            ELSE
+               JX = KX
+               DO 80, J = 1, N
+                  IF( X( JX ).NE.ZERO )THEN
+                     IF( NOUNIT )
+     $                  X( JX ) = X( JX )/A( J, J )
+                     TEMP = X( JX )
+                     IX   = JX
+                     DO 70, I = J + 1, N
+                        IX      = IX      + INCX
+                        X( IX ) = X( IX ) - TEMP*A( I, J )
+   70                CONTINUE
+                  END IF
+                  JX = JX + INCX
+   80          CONTINUE
+            END IF
+         END IF
+      ELSE
+*
+*        Form  x := inv( A' )*x  or  x := inv( conjg( A' ) )*x.
+*
+         IF( LSAME( UPLO, 'U' ) )THEN
+            IF( INCX.EQ.1 )THEN
+               DO 110, J = 1, N
+                  TEMP = X( J )
+                  IF( NOCONJ )THEN
+                     DO 90, I = 1, J - 1
+                        TEMP = TEMP - A( I, J )*X( I )
+   90                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/A( J, J )
+                  ELSE
+                     DO 100, I = 1, J - 1
+                        TEMP = TEMP - DCONJG( A( I, J ) )*X( I )
+  100                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/DCONJG( A( J, J ) )
+                  END IF
+                  X( J ) = TEMP
+  110          CONTINUE
+            ELSE
+               JX = KX
+               DO 140, J = 1, N
+                  IX   = KX
+                  TEMP = X( JX )
+                  IF( NOCONJ )THEN
+                     DO 120, I = 1, J - 1
+                        TEMP = TEMP - A( I, J )*X( IX )
+                        IX   = IX   + INCX
+  120                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/A( J, J )
+                  ELSE
+                     DO 130, I = 1, J - 1
+                        TEMP = TEMP - DCONJG( A( I, J ) )*X( IX )
+                        IX   = IX   + INCX
+  130                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/DCONJG( A( J, J ) )
+                  END IF
+                  X( JX ) = TEMP
+                  JX      = JX   + INCX
+  140          CONTINUE
+            END IF
+         ELSE
+            IF( INCX.EQ.1 )THEN
+               DO 170, J = N, 1, -1
+                  TEMP = X( J )
+                  IF( NOCONJ )THEN
+                     DO 150, I = N, J + 1, -1
+                        TEMP = TEMP - A( I, J )*X( I )
+  150                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/A( J, J )
+                  ELSE
+                     DO 160, I = N, J + 1, -1
+                        TEMP = TEMP - DCONJG( A( I, J ) )*X( I )
+  160                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/DCONJG( A( J, J ) )
+                  END IF
+                  X( J ) = TEMP
+  170          CONTINUE
+            ELSE
+               KX = KX + ( N - 1 )*INCX
+               JX = KX
+               DO 200, J = N, 1, -1
+                  IX   = KX
+                  TEMP = X( JX )
+                  IF( NOCONJ )THEN
+                     DO 180, I = N, J + 1, -1
+                        TEMP = TEMP - A( I, J )*X( IX )
+                        IX   = IX   - INCX
+  180                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/A( J, J )
+                  ELSE
+                     DO 190, I = N, J + 1, -1
+                        TEMP = TEMP - DCONJG( A( I, J ) )*X( IX )
+                        IX   = IX   - INCX
+  190                CONTINUE
+                     IF( NOUNIT )
+     $                  TEMP = TEMP/DCONJG( A( J, J ) )
+                  END IF
+                  X( JX ) = TEMP
+                  JX      = JX   - INCX
+  200          CONTINUE
+            END IF
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of ZTRSV .
+*
+      END
diff --git a/costa/native/external/lapack/cbdsqr.f b/costa/native/external/lapack/cbdsqr.f
new file mode 100644
index 000000000..6be44f18c
--- /dev/null
+++ b/costa/native/external/lapack/cbdsqr.f
@@ -0,0 +1,733 @@
+      SUBROUTINE CBDSQR( UPLO, N, NCVT, NRU, NCC, D, E, VT, LDVT, U,
+     $                   LDU, C, LDC, RWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDC, LDU, LDVT, N, NCC, NCVT, NRU
+*     ..
+*     .. Array Arguments ..
+      REAL               D( * ), E( * ), RWORK( * )
+      COMPLEX            C( LDC, * ), U( LDU, * ), VT( LDVT, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CBDSQR computes the singular value decomposition (SVD) of a real
+*  N-by-N (upper or lower) bidiagonal matrix B:  B = Q * S * P' (P'
+*  denotes the transpose of P), where S is a diagonal matrix with
+*  non-negative diagonal elements (the singular values of B), and Q
+*  and P are orthogonal matrices.
+*
+*  The routine computes S, and optionally computes U * Q, P' * VT,
+*  or Q' * C, for given complex input matrices U, VT, and C.
+*
+*  See "Computing  Small Singular Values of Bidiagonal Matrices With
+*  Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan,
+*  LAPACK Working Note #3 (or SIAM J. Sci. Statist. Comput. vol. 11,
+*  no. 5, pp. 873-912, Sept 1990) and
+*  "Accurate singular values and differential qd algorithms," by
+*  B. Parlett and V. Fernando, Technical Report CPAM-554, Mathematics
+*  Department, University of California at Berkeley, July 1992
+*  for a detailed description of the algorithm.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  B is upper bidiagonal;
+*          = 'L':  B is lower bidiagonal.
+*
+*  N       (input) INTEGER
+*          The order of the matrix B.  N >= 0.
+*
+*  NCVT    (input) INTEGER
+*          The number of columns of the matrix VT. NCVT >= 0.
+*
+*  NRU     (input) INTEGER
+*          The number of rows of the matrix U. NRU >= 0.
+*
+*  NCC     (input) INTEGER
+*          The number of columns of the matrix C. NCC >= 0.
+*
+*  D       (input/output) REAL array, dimension (N)
+*          On entry, the n diagonal elements of the bidiagonal matrix B.
+*          On exit, if INFO=0, the singular values of B in decreasing
+*          order.
+*
+*  E       (input/output) REAL array, dimension (N)
+*          On entry, the elements of E contain the
+*          offdiagonal elements of of the bidiagonal matrix whose SVD
+*          is desired. On normal exit (INFO = 0), E is destroyed.
+*          If the algorithm does not converge (INFO > 0), D and E
+*          will contain the diagonal and superdiagonal elements of a
+*          bidiagonal matrix orthogonally equivalent to the one given
+*          as input. E(N) is used for workspace.
+*
+*  VT      (input/output) COMPLEX array, dimension (LDVT, NCVT)
+*          On entry, an N-by-NCVT matrix VT.
+*          On exit, VT is overwritten by P' * VT.
+*          VT is not referenced if NCVT = 0.
+*
+*  LDVT    (input) INTEGER
+*          The leading dimension of the array VT.
+*          LDVT >= max(1,N) if NCVT > 0; LDVT >= 1 if NCVT = 0.
+*
+*  U       (input/output) COMPLEX array, dimension (LDU, N)
+*          On entry, an NRU-by-N matrix U.
+*          On exit, U is overwritten by U * Q.
+*          U is not referenced if NRU = 0.
+*
+*  LDU     (input) INTEGER
+*          The leading dimension of the array U.  LDU >= max(1,NRU).
+*
+*  C       (input/output) COMPLEX array, dimension (LDC, NCC)
+*          On entry, an N-by-NCC matrix C.
+*          On exit, C is overwritten by Q' * C.
+*          C is not referenced if NCC = 0.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C.
+*          LDC >= max(1,N) if NCC > 0; LDC >=1 if NCC = 0.
+*
+*  RWORK   (workspace) REAL array, dimension (4*N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  If INFO = -i, the i-th argument had an illegal value
+*          > 0:  the algorithm did not converge; D and E contain the
+*                elements of a bidiagonal matrix which is orthogonally
+*                similar to the input matrix B;  if INFO = i, i
+*                elements of E have not converged to zero.
+*
+*  Internal Parameters
+*  ===================
+*
+*  TOLMUL  REAL, default = max(10,min(100,EPS**(-1/8)))
+*          TOLMUL controls the convergence criterion of the QR loop.
+*          If it is positive, TOLMUL*EPS is the desired relative
+*             precision in the computed singular values.
+*          If it is negative, abs(TOLMUL*EPS*sigma_max) is the
+*             desired absolute accuracy in the computed singular
+*             values (corresponds to relative accuracy
+*             abs(TOLMUL*EPS) in the largest singular value.
+*          abs(TOLMUL) should be between 1 and 1/EPS, and preferably
+*             between 10 (for fast convergence) and .1/EPS
+*             (for there to be some accuracy in the results).
+*          Default is to lose at either one eighth or 2 of the
+*             available decimal digits in each computed singular value
+*             (whichever is smaller).
+*
+*  MAXITR  INTEGER, default = 6
+*          MAXITR controls the maximum number of passes of the
+*          algorithm through its inner loop. The algorithms stops
+*          (and so fails to converge) if the number of passes
+*          through the inner loop exceeds MAXITR*N**2.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO
+      PARAMETER          ( ZERO = 0.0E0 )
+      REAL               ONE
+      PARAMETER          ( ONE = 1.0E0 )
+      REAL               NEGONE
+      PARAMETER          ( NEGONE = -1.0E0 )
+      REAL               HNDRTH
+      PARAMETER          ( HNDRTH = 0.01E0 )
+      REAL               TEN
+      PARAMETER          ( TEN = 10.0E0 )
+      REAL               HNDRD
+      PARAMETER          ( HNDRD = 100.0E0 )
+      REAL               MEIGTH
+      PARAMETER          ( MEIGTH = -0.125E0 )
+      INTEGER            MAXITR
+      PARAMETER          ( MAXITR = 6 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LOWER, ROTATE
+      INTEGER            I, IDIR, ISUB, ITER, J, LL, LLL, M, MAXIT, NM1,
+     $                   NM12, NM13, OLDLL, OLDM
+      REAL               ABSE, ABSS, COSL, COSR, CS, EPS, F, G, H, MU,
+     $                   OLDCS, OLDSN, R, SHIFT, SIGMN, SIGMX, SINL,
+     $                   SINR, SLL, SMAX, SMIN, SMINL, SMINLO, SMINOA,
+     $                   SN, THRESH, TOL, TOLMUL, UNFL
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               SLAMCH
+      EXTERNAL           LSAME, SLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CLASR, CSROT, CSSCAL, CSWAP, SLARTG, SLAS2,
+     $                   SLASQ1, SLASV2, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN, REAL, SIGN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      LOWER = LSAME( UPLO, 'L' )
+      IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LOWER ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NCVT.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( NRU.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( NCC.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( ( NCVT.EQ.0 .AND. LDVT.LT.1 ) .OR.
+     $         ( NCVT.GT.0 .AND. LDVT.LT.MAX( 1, N ) ) ) THEN
+         INFO = -9
+      ELSE IF( LDU.LT.MAX( 1, NRU ) ) THEN
+         INFO = -11
+      ELSE IF( ( NCC.EQ.0 .AND. LDC.LT.1 ) .OR.
+     $         ( NCC.GT.0 .AND. LDC.LT.MAX( 1, N ) ) ) THEN
+         INFO = -13
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CBDSQR', -INFO )
+         RETURN
+      END IF
+      IF( N.EQ.0 )
+     $   RETURN
+      IF( N.EQ.1 )
+     $   GO TO 160
+*
+*     ROTATE is true if any singular vectors desired, false otherwise
+*
+      ROTATE = ( NCVT.GT.0 ) .OR. ( NRU.GT.0 ) .OR. ( NCC.GT.0 )
+*
+*     If no singular vectors desired, use qd algorithm
+*
+      IF( .NOT.ROTATE ) THEN
+         CALL SLASQ1( N, D, E, RWORK, INFO )
+         RETURN
+      END IF
+*
+      NM1 = N - 1
+      NM12 = NM1 + NM1
+      NM13 = NM12 + NM1
+      IDIR = 0
+*
+*     Get machine constants
+*
+      EPS = SLAMCH( 'Epsilon' )
+      UNFL = SLAMCH( 'Safe minimum' )
+*
+*     If matrix lower bidiagonal, rotate to be upper bidiagonal
+*     by applying Givens rotations on the left
+*
+      IF( LOWER ) THEN
+         DO 10 I = 1, N - 1
+            CALL SLARTG( D( I ), E( I ), CS, SN, R )
+            D( I ) = R
+            E( I ) = SN*D( I+1 )
+            D( I+1 ) = CS*D( I+1 )
+            RWORK( I ) = CS
+            RWORK( NM1+I ) = SN
+   10    CONTINUE
+*
+*        Update singular vectors if desired
+*
+         IF( NRU.GT.0 )
+     $      CALL CLASR( 'R', 'V', 'F', NRU, N, RWORK( 1 ), RWORK( N ),
+     $                  U, LDU )
+         IF( NCC.GT.0 )
+     $      CALL CLASR( 'L', 'V', 'F', N, NCC, RWORK( 1 ), RWORK( N ),
+     $                  C, LDC )
+      END IF
+*
+*     Compute singular values to relative accuracy TOL
+*     (By setting TOL to be negative, algorithm will compute
+*     singular values to absolute accuracy ABS(TOL)*norm(input matrix))
+*
+      TOLMUL = MAX( TEN, MIN( HNDRD, EPS**MEIGTH ) )
+      TOL = TOLMUL*EPS
+*
+*     Compute approximate maximum, minimum singular values
+*
+      SMAX = ZERO
+      DO 20 I = 1, N
+         SMAX = MAX( SMAX, ABS( D( I ) ) )
+   20 CONTINUE
+      DO 30 I = 1, N - 1
+         SMAX = MAX( SMAX, ABS( E( I ) ) )
+   30 CONTINUE
+      SMINL = ZERO
+      IF( TOL.GE.ZERO ) THEN
+*
+*        Relative accuracy desired
+*
+         SMINOA = ABS( D( 1 ) )
+         IF( SMINOA.EQ.ZERO )
+     $      GO TO 50
+         MU = SMINOA
+         DO 40 I = 2, N
+            MU = ABS( D( I ) )*( MU / ( MU+ABS( E( I-1 ) ) ) )
+            SMINOA = MIN( SMINOA, MU )
+            IF( SMINOA.EQ.ZERO )
+     $         GO TO 50
+   40    CONTINUE
+   50    CONTINUE
+         SMINOA = SMINOA / SQRT( REAL( N ) )
+         THRESH = MAX( TOL*SMINOA, MAXITR*N*N*UNFL )
+      ELSE
+*
+*        Absolute accuracy desired
+*
+         THRESH = MAX( ABS( TOL )*SMAX, MAXITR*N*N*UNFL )
+      END IF
+*
+*     Prepare for main iteration loop for the singular values
+*     (MAXIT is the maximum number of passes through the inner
+*     loop permitted before nonconvergence signalled.)
+*
+      MAXIT = MAXITR*N*N
+      ITER = 0
+      OLDLL = -1
+      OLDM = -1
+*
+*     M points to last element of unconverged part of matrix
+*
+      M = N
+*
+*     Begin main iteration loop
+*
+   60 CONTINUE
+*
+*     Check for convergence or exceeding iteration count
+*
+      IF( M.LE.1 )
+     $   GO TO 160
+      IF( ITER.GT.MAXIT )
+     $   GO TO 200
+*
+*     Find diagonal block of matrix to work on
+*
+      IF( TOL.LT.ZERO .AND. ABS( D( M ) ).LE.THRESH )
+     $   D( M ) = ZERO
+      SMAX = ABS( D( M ) )
+      SMIN = SMAX
+      DO 70 LLL = 1, M - 1
+         LL = M - LLL
+         ABSS = ABS( D( LL ) )
+         ABSE = ABS( E( LL ) )
+         IF( TOL.LT.ZERO .AND. ABSS.LE.THRESH )
+     $      D( LL ) = ZERO
+         IF( ABSE.LE.THRESH )
+     $      GO TO 80
+         SMIN = MIN( SMIN, ABSS )
+         SMAX = MAX( SMAX, ABSS, ABSE )
+   70 CONTINUE
+      LL = 0
+      GO TO 90
+   80 CONTINUE
+      E( LL ) = ZERO
+*
+*     Matrix splits since E(LL) = 0
+*
+      IF( LL.EQ.M-1 ) THEN
+*
+*        Convergence of bottom singular value, return to top of loop
+*
+         M = M - 1
+         GO TO 60
+      END IF
+   90 CONTINUE
+      LL = LL + 1
+*
+*     E(LL) through E(M-1) are nonzero, E(LL-1) is zero
+*
+      IF( LL.EQ.M-1 ) THEN
+*
+*        2 by 2 block, handle separately
+*
+         CALL SLASV2( D( M-1 ), E( M-1 ), D( M ), SIGMN, SIGMX, SINR,
+     $                COSR, SINL, COSL )
+         D( M-1 ) = SIGMX
+         E( M-1 ) = ZERO
+         D( M ) = SIGMN
+*
+*        Compute singular vectors, if desired
+*
+         IF( NCVT.GT.0 )
+     $      CALL CSROT( NCVT, VT( M-1, 1 ), LDVT, VT( M, 1 ), LDVT,
+     $                  COSR, SINR )
+         IF( NRU.GT.0 )
+     $      CALL CSROT( NRU, U( 1, M-1 ), 1, U( 1, M ), 1, COSL, SINL )
+         IF( NCC.GT.0 )
+     $      CALL CSROT( NCC, C( M-1, 1 ), LDC, C( M, 1 ), LDC, COSL,
+     $                  SINL )
+         M = M - 2
+         GO TO 60
+      END IF
+*
+*     If working on new submatrix, choose shift direction
+*     (from larger end diagonal element towards smaller)
+*
+      IF( LL.GT.OLDM .OR. M.LT.OLDLL ) THEN
+         IF( ABS( D( LL ) ).GE.ABS( D( M ) ) ) THEN
+*
+*           Chase bulge from top (big end) to bottom (small end)
+*
+            IDIR = 1
+         ELSE
+*
+*           Chase bulge from bottom (big end) to top (small end)
+*
+            IDIR = 2
+         END IF
+      END IF
+*
+*     Apply convergence tests
+*
+      IF( IDIR.EQ.1 ) THEN
+*
+*        Run convergence test in forward direction
+*        First apply standard test to bottom of matrix
+*
+         IF( ABS( E( M-1 ) ).LE.ABS( TOL )*ABS( D( M ) ) .OR.
+     $       ( TOL.LT.ZERO .AND. ABS( E( M-1 ) ).LE.THRESH ) ) THEN
+            E( M-1 ) = ZERO
+            GO TO 60
+         END IF
+*
+         IF( TOL.GE.ZERO ) THEN
+*
+*           If relative accuracy desired,
+*           apply convergence criterion forward
+*
+            MU = ABS( D( LL ) )
+            SMINL = MU
+            DO 100 LLL = LL, M - 1
+               IF( ABS( E( LLL ) ).LE.TOL*MU ) THEN
+                  E( LLL ) = ZERO
+                  GO TO 60
+               END IF
+               SMINLO = SMINL
+               MU = ABS( D( LLL+1 ) )*( MU / ( MU+ABS( E( LLL ) ) ) )
+               SMINL = MIN( SMINL, MU )
+  100       CONTINUE
+         END IF
+*
+      ELSE
+*
+*        Run convergence test in backward direction
+*        First apply standard test to top of matrix
+*
+         IF( ABS( E( LL ) ).LE.ABS( TOL )*ABS( D( LL ) ) .OR.
+     $       ( TOL.LT.ZERO .AND. ABS( E( LL ) ).LE.THRESH ) ) THEN
+            E( LL ) = ZERO
+            GO TO 60
+         END IF
+*
+         IF( TOL.GE.ZERO ) THEN
+*
+*           If relative accuracy desired,
+*           apply convergence criterion backward
+*
+            MU = ABS( D( M ) )
+            SMINL = MU
+            DO 110 LLL = M - 1, LL, -1
+               IF( ABS( E( LLL ) ).LE.TOL*MU ) THEN
+                  E( LLL ) = ZERO
+                  GO TO 60
+               END IF
+               SMINLO = SMINL
+               MU = ABS( D( LLL ) )*( MU / ( MU+ABS( E( LLL ) ) ) )
+               SMINL = MIN( SMINL, MU )
+  110       CONTINUE
+         END IF
+      END IF
+      OLDLL = LL
+      OLDM = M
+*
+*     Compute shift.  First, test if shifting would ruin relative
+*     accuracy, and if so set the shift to zero.
+*
+      IF( TOL.GE.ZERO .AND. N*TOL*( SMINL / SMAX ).LE.
+     $    MAX( EPS, HNDRTH*TOL ) ) THEN
+*
+*        Use a zero shift to avoid loss of relative accuracy
+*
+         SHIFT = ZERO
+      ELSE
+*
+*        Compute the shift from 2-by-2 block at end of matrix
+*
+         IF( IDIR.EQ.1 ) THEN
+            SLL = ABS( D( LL ) )
+            CALL SLAS2( D( M-1 ), E( M-1 ), D( M ), SHIFT, R )
+         ELSE
+            SLL = ABS( D( M ) )
+            CALL SLAS2( D( LL ), E( LL ), D( LL+1 ), SHIFT, R )
+         END IF
+*
+*        Test if shift negligible, and if so set to zero
+*
+         IF( SLL.GT.ZERO ) THEN
+            IF( ( SHIFT / SLL )**2.LT.EPS )
+     $         SHIFT = ZERO
+         END IF
+      END IF
+*
+*     Increment iteration count
+*
+      ITER = ITER + M - LL
+*
+*     If SHIFT = 0, do simplified QR iteration
+*
+      IF( SHIFT.EQ.ZERO ) THEN
+         IF( IDIR.EQ.1 ) THEN
+*
+*           Chase bulge from top to bottom
+*           Save cosines and sines for later singular vector updates
+*
+            CS = ONE
+            OLDCS = ONE
+            DO 120 I = LL, M - 1
+               CALL SLARTG( D( I )*CS, E( I ), CS, SN, R )
+               IF( I.GT.LL )
+     $            E( I-1 ) = OLDSN*R
+               CALL SLARTG( OLDCS*R, D( I+1 )*SN, OLDCS, OLDSN, D( I ) )
+               RWORK( I-LL+1 ) = CS
+               RWORK( I-LL+1+NM1 ) = SN
+               RWORK( I-LL+1+NM12 ) = OLDCS
+               RWORK( I-LL+1+NM13 ) = OLDSN
+  120       CONTINUE
+            H = D( M )*CS
+            D( M ) = H*OLDCS
+            E( M-1 ) = H*OLDSN
+*
+*           Update singular vectors
+*
+            IF( NCVT.GT.0 )
+     $         CALL CLASR( 'L', 'V', 'F', M-LL+1, NCVT, RWORK( 1 ),
+     $                     RWORK( N ), VT( LL, 1 ), LDVT )
+            IF( NRU.GT.0 )
+     $         CALL CLASR( 'R', 'V', 'F', NRU, M-LL+1, RWORK( NM12+1 ),
+     $                     RWORK( NM13+1 ), U( 1, LL ), LDU )
+            IF( NCC.GT.0 )
+     $         CALL CLASR( 'L', 'V', 'F', M-LL+1, NCC, RWORK( NM12+1 ),
+     $                     RWORK( NM13+1 ), C( LL, 1 ), LDC )
+*
+*           Test convergence
+*
+            IF( ABS( E( M-1 ) ).LE.THRESH )
+     $         E( M-1 ) = ZERO
+*
+         ELSE
+*
+*           Chase bulge from bottom to top
+*           Save cosines and sines for later singular vector updates
+*
+            CS = ONE
+            OLDCS = ONE
+            DO 130 I = M, LL + 1, -1
+               CALL SLARTG( D( I )*CS, E( I-1 ), CS, SN, R )
+               IF( I.LT.M )
+     $            E( I ) = OLDSN*R
+               CALL SLARTG( OLDCS*R, D( I-1 )*SN, OLDCS, OLDSN, D( I ) )
+               RWORK( I-LL ) = CS
+               RWORK( I-LL+NM1 ) = -SN
+               RWORK( I-LL+NM12 ) = OLDCS
+               RWORK( I-LL+NM13 ) = -OLDSN
+  130       CONTINUE
+            H = D( LL )*CS
+            D( LL ) = H*OLDCS
+            E( LL ) = H*OLDSN
+*
+*           Update singular vectors
+*
+            IF( NCVT.GT.0 )
+     $         CALL CLASR( 'L', 'V', 'B', M-LL+1, NCVT, RWORK( NM12+1 ),
+     $                     RWORK( NM13+1 ), VT( LL, 1 ), LDVT )
+            IF( NRU.GT.0 )
+     $         CALL CLASR( 'R', 'V', 'B', NRU, M-LL+1, RWORK( 1 ),
+     $                     RWORK( N ), U( 1, LL ), LDU )
+            IF( NCC.GT.0 )
+     $         CALL CLASR( 'L', 'V', 'B', M-LL+1, NCC, RWORK( 1 ),
+     $                     RWORK( N ), C( LL, 1 ), LDC )
+*
+*           Test convergence
+*
+            IF( ABS( E( LL ) ).LE.THRESH )
+     $         E( LL ) = ZERO
+         END IF
+      ELSE
+*
+*        Use nonzero shift
+*
+         IF( IDIR.EQ.1 ) THEN
+*
+*           Chase bulge from top to bottom
+*           Save cosines and sines for later singular vector updates
+*
+            F = ( ABS( D( LL ) )-SHIFT )*
+     $          ( SIGN( ONE, D( LL ) )+SHIFT / D( LL ) )
+            G = E( LL )
+            DO 140 I = LL, M - 1
+               CALL SLARTG( F, G, COSR, SINR, R )
+               IF( I.GT.LL )
+     $            E( I-1 ) = R
+               F = COSR*D( I ) + SINR*E( I )
+               E( I ) = COSR*E( I ) - SINR*D( I )
+               G = SINR*D( I+1 )
+               D( I+1 ) = COSR*D( I+1 )
+               CALL SLARTG( F, G, COSL, SINL, R )
+               D( I ) = R
+               F = COSL*E( I ) + SINL*D( I+1 )
+               D( I+1 ) = COSL*D( I+1 ) - SINL*E( I )
+               IF( I.LT.M-1 ) THEN
+                  G = SINL*E( I+1 )
+                  E( I+1 ) = COSL*E( I+1 )
+               END IF
+               RWORK( I-LL+1 ) = COSR
+               RWORK( I-LL+1+NM1 ) = SINR
+               RWORK( I-LL+1+NM12 ) = COSL
+               RWORK( I-LL+1+NM13 ) = SINL
+  140       CONTINUE
+            E( M-1 ) = F
+*
+*           Update singular vectors
+*
+            IF( NCVT.GT.0 )
+     $         CALL CLASR( 'L', 'V', 'F', M-LL+1, NCVT, RWORK( 1 ),
+     $                     RWORK( N ), VT( LL, 1 ), LDVT )
+            IF( NRU.GT.0 )
+     $         CALL CLASR( 'R', 'V', 'F', NRU, M-LL+1, RWORK( NM12+1 ),
+     $                     RWORK( NM13+1 ), U( 1, LL ), LDU )
+            IF( NCC.GT.0 )
+     $         CALL CLASR( 'L', 'V', 'F', M-LL+1, NCC, RWORK( NM12+1 ),
+     $                     RWORK( NM13+1 ), C( LL, 1 ), LDC )
+*
+*           Test convergence
+*
+            IF( ABS( E( M-1 ) ).LE.THRESH )
+     $         E( M-1 ) = ZERO
+*
+         ELSE
+*
+*           Chase bulge from bottom to top
+*           Save cosines and sines for later singular vector updates
+*
+            F = ( ABS( D( M ) )-SHIFT )*( SIGN( ONE, D( M ) )+SHIFT /
+     $          D( M ) )
+            G = E( M-1 )
+            DO 150 I = M, LL + 1, -1
+               CALL SLARTG( F, G, COSR, SINR, R )
+               IF( I.LT.M )
+     $            E( I ) = R
+               F = COSR*D( I ) + SINR*E( I-1 )
+               E( I-1 ) = COSR*E( I-1 ) - SINR*D( I )
+               G = SINR*D( I-1 )
+               D( I-1 ) = COSR*D( I-1 )
+               CALL SLARTG( F, G, COSL, SINL, R )
+               D( I ) = R
+               F = COSL*E( I-1 ) + SINL*D( I-1 )
+               D( I-1 ) = COSL*D( I-1 ) - SINL*E( I-1 )
+               IF( I.GT.LL+1 ) THEN
+                  G = SINL*E( I-2 )
+                  E( I-2 ) = COSL*E( I-2 )
+               END IF
+               RWORK( I-LL ) = COSR
+               RWORK( I-LL+NM1 ) = -SINR
+               RWORK( I-LL+NM12 ) = COSL
+               RWORK( I-LL+NM13 ) = -SINL
+  150       CONTINUE
+            E( LL ) = F
+*
+*           Test convergence
+*
+            IF( ABS( E( LL ) ).LE.THRESH )
+     $         E( LL ) = ZERO
+*
+*           Update singular vectors if desired
+*
+            IF( NCVT.GT.0 )
+     $         CALL CLASR( 'L', 'V', 'B', M-LL+1, NCVT, RWORK( NM12+1 ),
+     $                     RWORK( NM13+1 ), VT( LL, 1 ), LDVT )
+            IF( NRU.GT.0 )
+     $         CALL CLASR( 'R', 'V', 'B', NRU, M-LL+1, RWORK( 1 ),
+     $                     RWORK( N ), U( 1, LL ), LDU )
+            IF( NCC.GT.0 )
+     $         CALL CLASR( 'L', 'V', 'B', M-LL+1, NCC, RWORK( 1 ),
+     $                     RWORK( N ), C( LL, 1 ), LDC )
+         END IF
+      END IF
+*
+*     QR iteration finished, go back and check convergence
+*
+      GO TO 60
+*
+*     All singular values converged, so make them positive
+*
+  160 CONTINUE
+      DO 170 I = 1, N
+         IF( D( I ).LT.ZERO ) THEN
+            D( I ) = -D( I )
+*
+*           Change sign of singular vectors, if desired
+*
+            IF( NCVT.GT.0 )
+     $         CALL CSSCAL( NCVT, NEGONE, VT( I, 1 ), LDVT )
+         END IF
+  170 CONTINUE
+*
+*     Sort the singular values into decreasing order (insertion sort on
+*     singular values, but only one transposition per singular vector)
+*
+      DO 190 I = 1, N - 1
+*
+*        Scan for smallest D(I)
+*
+         ISUB = 1
+         SMIN = D( 1 )
+         DO 180 J = 2, N + 1 - I
+            IF( D( J ).LE.SMIN ) THEN
+               ISUB = J
+               SMIN = D( J )
+            END IF
+  180    CONTINUE
+         IF( ISUB.NE.N+1-I ) THEN
+*
+*           Swap singular values and vectors
+*
+            D( ISUB ) = D( N+1-I )
+            D( N+1-I ) = SMIN
+            IF( NCVT.GT.0 )
+     $         CALL CSWAP( NCVT, VT( ISUB, 1 ), LDVT, VT( N+1-I, 1 ),
+     $                     LDVT )
+            IF( NRU.GT.0 )
+     $         CALL CSWAP( NRU, U( 1, ISUB ), 1, U( 1, N+1-I ), 1 )
+            IF( NCC.GT.0 )
+     $         CALL CSWAP( NCC, C( ISUB, 1 ), LDC, C( N+1-I, 1 ), LDC )
+         END IF
+  190 CONTINUE
+      GO TO 220
+*
+*     Maximum number of iterations exceeded, failure to converge
+*
+  200 CONTINUE
+      INFO = 0
+      DO 210 I = 1, N - 1
+         IF( E( I ).NE.ZERO )
+     $      INFO = INFO + 1
+  210 CONTINUE
+  220 CONTINUE
+      RETURN
+*
+*     End of CBDSQR
+*
+      END
diff --git a/costa/native/external/lapack/cgbbrd.f b/costa/native/external/lapack/cgbbrd.f
new file mode 100644
index 000000000..6546788a1
--- /dev/null
+++ b/costa/native/external/lapack/cgbbrd.f
@@ -0,0 +1,466 @@
+      SUBROUTINE CGBBRD( VECT, M, N, NCC, KL, KU, AB, LDAB, D, E, Q,
+     $                   LDQ, PT, LDPT, C, LDC, WORK, RWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          VECT
+      INTEGER            INFO, KL, KU, LDAB, LDC, LDPT, LDQ, M, N, NCC
+*     ..
+*     .. Array Arguments ..
+      REAL               D( * ), E( * ), RWORK( * )
+      COMPLEX            AB( LDAB, * ), C( LDC, * ), PT( LDPT, * ),
+     $                   Q( LDQ, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CGBBRD reduces a complex general m-by-n band matrix A to real upper
+*  bidiagonal form B by a unitary transformation: Q' * A * P = B.
+*
+*  The routine computes B, and optionally forms Q or P', or computes
+*  Q'*C for a given matrix C.
+*
+*  Arguments
+*  =========
+*
+*  VECT    (input) CHARACTER*1
+*          Specifies whether or not the matrices Q and P' are to be
+*          formed.
+*          = 'N': do not form Q or P';
+*          = 'Q': form Q only;
+*          = 'P': form P' only;
+*          = 'B': form both.
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  NCC     (input) INTEGER
+*          The number of columns of the matrix C.  NCC >= 0.
+*
+*  KL      (input) INTEGER
+*          The number of subdiagonals of the matrix A. KL >= 0.
+*
+*  KU      (input) INTEGER
+*          The number of superdiagonals of the matrix A. KU >= 0.
+*
+*  AB      (input/output) COMPLEX array, dimension (LDAB,N)
+*          On entry, the m-by-n band matrix A, stored in rows 1 to
+*          KL+KU+1. The j-th column of A is stored in the j-th column of
+*          the array AB as follows:
+*          AB(ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl).
+*          On exit, A is overwritten by values generated during the
+*          reduction.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array A. LDAB >= KL+KU+1.
+*
+*  D       (output) REAL array, dimension (min(M,N))
+*          The diagonal elements of the bidiagonal matrix B.
+*
+*  E       (output) REAL array, dimension (min(M,N)-1)
+*          The superdiagonal elements of the bidiagonal matrix B.
+*
+*  Q       (output) COMPLEX array, dimension (LDQ,M)
+*          If VECT = 'Q' or 'B', the m-by-m unitary matrix Q.
+*          If VECT = 'N' or 'P', the array Q is not referenced.
+*
+*  LDQ     (input) INTEGER
+*          The leading dimension of the array Q.
+*          LDQ >= max(1,M) if VECT = 'Q' or 'B'; LDQ >= 1 otherwise.
+*
+*  PT      (output) COMPLEX array, dimension (LDPT,N)
+*          If VECT = 'P' or 'B', the n-by-n unitary matrix P'.
+*          If VECT = 'N' or 'Q', the array PT is not referenced.
+*
+*  LDPT    (input) INTEGER
+*          The leading dimension of the array PT.
+*          LDPT >= max(1,N) if VECT = 'P' or 'B'; LDPT >= 1 otherwise.
+*
+*  C       (input/output) COMPLEX array, dimension (LDC,NCC)
+*          On entry, an m-by-ncc matrix C.
+*          On exit, C is overwritten by Q'*C.
+*          C is not referenced if NCC = 0.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C.
+*          LDC >= max(1,M) if NCC > 0; LDC >= 1 if NCC = 0.
+*
+*  WORK    (workspace) COMPLEX array, dimension (max(M,N))
+*
+*  RWORK   (workspace) REAL array, dimension (max(M,N))
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO
+      PARAMETER          ( ZERO = 0.0E+0 )
+      COMPLEX            CZERO, CONE
+      PARAMETER          ( CZERO = ( 0.0E+0, 0.0E+0 ),
+     $                   CONE = ( 1.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            WANTB, WANTC, WANTPT, WANTQ
+      INTEGER            I, INCA, J, J1, J2, KB, KB1, KK, KLM, KLU1,
+     $                   KUN, L, MINMN, ML, ML0, MU, MU0, NR, NRT
+      REAL               ABST, RC
+      COMPLEX            RA, RB, RS, T
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CLARGV, CLARTG, CLARTV, CLASET, CROT, CSCAL,
+     $                   XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, CONJG, MAX, MIN
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters
+*
+      WANTB = LSAME( VECT, 'B' )
+      WANTQ = LSAME( VECT, 'Q' ) .OR. WANTB
+      WANTPT = LSAME( VECT, 'P' ) .OR. WANTB
+      WANTC = NCC.GT.0
+      KLU1 = KL + KU + 1
+      INFO = 0
+      IF( .NOT.WANTQ .AND. .NOT.WANTPT .AND. .NOT.LSAME( VECT, 'N' ) )
+     $     THEN
+         INFO = -1
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( NCC.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( KL.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( KU.LT.0 ) THEN
+         INFO = -6
+      ELSE IF( LDAB.LT.KLU1 ) THEN
+         INFO = -8
+      ELSE IF( LDQ.LT.1 .OR. WANTQ .AND. LDQ.LT.MAX( 1, M ) ) THEN
+         INFO = -12
+      ELSE IF( LDPT.LT.1 .OR. WANTPT .AND. LDPT.LT.MAX( 1, N ) ) THEN
+         INFO = -14
+      ELSE IF( LDC.LT.1 .OR. WANTC .AND. LDC.LT.MAX( 1, M ) ) THEN
+         INFO = -16
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CGBBRD', -INFO )
+         RETURN
+      END IF
+*
+*     Initialize Q and P' to the unit matrix, if needed
+*
+      IF( WANTQ )
+     $   CALL CLASET( 'Full', M, M, CZERO, CONE, Q, LDQ )
+      IF( WANTPT )
+     $   CALL CLASET( 'Full', N, N, CZERO, CONE, PT, LDPT )
+*
+*     Quick return if possible.
+*
+      IF( M.EQ.0 .OR. N.EQ.0 )
+     $   RETURN
+*
+      MINMN = MIN( M, N )
+*
+      IF( KL+KU.GT.1 ) THEN
+*
+*        Reduce to upper bidiagonal form if KU > 0; if KU = 0, reduce
+*        first to lower bidiagonal form and then transform to upper
+*        bidiagonal
+*
+         IF( KU.GT.0 ) THEN
+            ML0 = 1
+            MU0 = 2
+         ELSE
+            ML0 = 2
+            MU0 = 1
+         END IF
+*
+*        Wherever possible, plane rotations are generated and applied in
+*        vector operations of length NR over the index set J1:J2:KLU1.
+*
+*        The complex sines of the plane rotations are stored in WORK,
+*        and the real cosines in RWORK.
+*
+         KLM = MIN( M-1, KL )
+         KUN = MIN( N-1, KU )
+         KB = KLM + KUN
+         KB1 = KB + 1
+         INCA = KB1*LDAB
+         NR = 0
+         J1 = KLM + 2
+         J2 = 1 - KUN
+*
+         DO 90 I = 1, MINMN
+*
+*           Reduce i-th column and i-th row of matrix to bidiagonal form
+*
+            ML = KLM + 1
+            MU = KUN + 1
+            DO 80 KK = 1, KB
+               J1 = J1 + KB
+               J2 = J2 + KB
+*
+*              generate plane rotations to annihilate nonzero elements
+*              which have been created below the band
+*
+               IF( NR.GT.0 )
+     $            CALL CLARGV( NR, AB( KLU1, J1-KLM-1 ), INCA,
+     $                         WORK( J1 ), KB1, RWORK( J1 ), KB1 )
+*
+*              apply plane rotations from the left
+*
+               DO 10 L = 1, KB
+                  IF( J2-KLM+L-1.GT.N ) THEN
+                     NRT = NR - 1
+                  ELSE
+                     NRT = NR
+                  END IF
+                  IF( NRT.GT.0 )
+     $               CALL CLARTV( NRT, AB( KLU1-L, J1-KLM+L-1 ), INCA,
+     $                            AB( KLU1-L+1, J1-KLM+L-1 ), INCA,
+     $                            RWORK( J1 ), WORK( J1 ), KB1 )
+   10          CONTINUE
+*
+               IF( ML.GT.ML0 ) THEN
+                  IF( ML.LE.M-I+1 ) THEN
+*
+*                    generate plane rotation to annihilate a(i+ml-1,i)
+*                    within the band, and apply rotation from the left
+*
+                     CALL CLARTG( AB( KU+ML-1, I ), AB( KU+ML, I ),
+     $                            RWORK( I+ML-1 ), WORK( I+ML-1 ), RA )
+                     AB( KU+ML-1, I ) = RA
+                     IF( I.LT.N )
+     $                  CALL CROT( MIN( KU+ML-2, N-I ),
+     $                             AB( KU+ML-2, I+1 ), LDAB-1,
+     $                             AB( KU+ML-1, I+1 ), LDAB-1,
+     $                             RWORK( I+ML-1 ), WORK( I+ML-1 ) )
+                  END IF
+                  NR = NR + 1
+                  J1 = J1 - KB1
+               END IF
+*
+               IF( WANTQ ) THEN
+*
+*                 accumulate product of plane rotations in Q
+*
+                  DO 20 J = J1, J2, KB1
+                     CALL CROT( M, Q( 1, J-1 ), 1, Q( 1, J ), 1,
+     $                          RWORK( J ), CONJG( WORK( J ) ) )
+   20             CONTINUE
+               END IF
+*
+               IF( WANTC ) THEN
+*
+*                 apply plane rotations to C
+*
+                  DO 30 J = J1, J2, KB1
+                     CALL CROT( NCC, C( J-1, 1 ), LDC, C( J, 1 ), LDC,
+     $                          RWORK( J ), WORK( J ) )
+   30             CONTINUE
+               END IF
+*
+               IF( J2+KUN.GT.N ) THEN
+*
+*                 adjust J2 to keep within the bounds of the matrix
+*
+                  NR = NR - 1
+                  J2 = J2 - KB1
+               END IF
+*
+               DO 40 J = J1, J2, KB1
+*
+*                 create nonzero element a(j-1,j+ku) above the band
+*                 and store it in WORK(n+1:2*n)
+*
+                  WORK( J+KUN ) = WORK( J )*AB( 1, J+KUN )
+                  AB( 1, J+KUN ) = RWORK( J )*AB( 1, J+KUN )
+   40          CONTINUE
+*
+*              generate plane rotations to annihilate nonzero elements
+*              which have been generated above the band
+*
+               IF( NR.GT.0 )
+     $            CALL CLARGV( NR, AB( 1, J1+KUN-1 ), INCA,
+     $                         WORK( J1+KUN ), KB1, RWORK( J1+KUN ),
+     $                         KB1 )
+*
+*              apply plane rotations from the right
+*
+               DO 50 L = 1, KB
+                  IF( J2+L-1.GT.M ) THEN
+                     NRT = NR - 1
+                  ELSE
+                     NRT = NR
+                  END IF
+                  IF( NRT.GT.0 )
+     $               CALL CLARTV( NRT, AB( L+1, J1+KUN-1 ), INCA,
+     $                            AB( L, J1+KUN ), INCA,
+     $                            RWORK( J1+KUN ), WORK( J1+KUN ), KB1 )
+   50          CONTINUE
+*
+               IF( ML.EQ.ML0 .AND. MU.GT.MU0 ) THEN
+                  IF( MU.LE.N-I+1 ) THEN
+*
+*                    generate plane rotation to annihilate a(i,i+mu-1)
+*                    within the band, and apply rotation from the right
+*
+                     CALL CLARTG( AB( KU-MU+3, I+MU-2 ),
+     $                            AB( KU-MU+2, I+MU-1 ),
+     $                            RWORK( I+MU-1 ), WORK( I+MU-1 ), RA )
+                     AB( KU-MU+3, I+MU-2 ) = RA
+                     CALL CROT( MIN( KL+MU-2, M-I ),
+     $                          AB( KU-MU+4, I+MU-2 ), 1,
+     $                          AB( KU-MU+3, I+MU-1 ), 1,
+     $                          RWORK( I+MU-1 ), WORK( I+MU-1 ) )
+                  END IF
+                  NR = NR + 1
+                  J1 = J1 - KB1
+               END IF
+*
+               IF( WANTPT ) THEN
+*
+*                 accumulate product of plane rotations in P'
+*
+                  DO 60 J = J1, J2, KB1
+                     CALL CROT( N, PT( J+KUN-1, 1 ), LDPT,
+     $                          PT( J+KUN, 1 ), LDPT, RWORK( J+KUN ),
+     $                          CONJG( WORK( J+KUN ) ) )
+   60             CONTINUE
+               END IF
+*
+               IF( J2+KB.GT.M ) THEN
+*
+*                 adjust J2 to keep within the bounds of the matrix
+*
+                  NR = NR - 1
+                  J2 = J2 - KB1
+               END IF
+*
+               DO 70 J = J1, J2, KB1
+*
+*                 create nonzero element a(j+kl+ku,j+ku-1) below the
+*                 band and store it in WORK(1:n)
+*
+                  WORK( J+KB ) = WORK( J+KUN )*AB( KLU1, J+KUN )
+                  AB( KLU1, J+KUN ) = RWORK( J+KUN )*AB( KLU1, J+KUN )
+   70          CONTINUE
+*
+               IF( ML.GT.ML0 ) THEN
+                  ML = ML - 1
+               ELSE
+                  MU = MU - 1
+               END IF
+   80       CONTINUE
+   90    CONTINUE
+      END IF
+*
+      IF( KU.EQ.0 .AND. KL.GT.0 ) THEN
+*
+*        A has been reduced to complex lower bidiagonal form
+*
+*        Transform lower bidiagonal form to upper bidiagonal by applying
+*        plane rotations from the left, overwriting superdiagonal
+*        elements on subdiagonal elements
+*
+         DO 100 I = 1, MIN( M-1, N )
+            CALL CLARTG( AB( 1, I ), AB( 2, I ), RC, RS, RA )
+            AB( 1, I ) = RA
+            IF( I.LT.N ) THEN
+               AB( 2, I ) = RS*AB( 1, I+1 )
+               AB( 1, I+1 ) = RC*AB( 1, I+1 )
+            END IF
+            IF( WANTQ )
+     $         CALL CROT( M, Q( 1, I ), 1, Q( 1, I+1 ), 1, RC,
+     $                    CONJG( RS ) )
+            IF( WANTC )
+     $         CALL CROT( NCC, C( I, 1 ), LDC, C( I+1, 1 ), LDC, RC,
+     $                    RS )
+  100    CONTINUE
+      ELSE
+*
+*        A has been reduced to complex upper bidiagonal form or is
+*        diagonal
+*
+         IF( KU.GT.0 .AND. M.LT.N ) THEN
+*
+*           Annihilate a(m,m+1) by applying plane rotations from the
+*           right
+*
+            RB = AB( KU, M+1 )
+            DO 110 I = M, 1, -1
+               CALL CLARTG( AB( KU+1, I ), RB, RC, RS, RA )
+               AB( KU+1, I ) = RA
+               IF( I.GT.1 ) THEN
+                  RB = -CONJG( RS )*AB( KU, I )
+                  AB( KU, I ) = RC*AB( KU, I )
+               END IF
+               IF( WANTPT )
+     $            CALL CROT( N, PT( I, 1 ), LDPT, PT( M+1, 1 ), LDPT,
+     $                       RC, CONJG( RS ) )
+  110       CONTINUE
+         END IF
+      END IF
+*
+*     Make diagonal and superdiagonal elements real, storing them in D
+*     and E
+*
+      T = AB( KU+1, 1 )
+      DO 120 I = 1, MINMN
+         ABST = ABS( T )
+         D( I ) = ABST
+         IF( ABST.NE.ZERO ) THEN
+            T = T / ABST
+         ELSE
+            T = CONE
+         END IF
+         IF( WANTQ )
+     $      CALL CSCAL( M, T, Q( 1, I ), 1 )
+         IF( WANTC )
+     $      CALL CSCAL( NCC, CONJG( T ), C( I, 1 ), LDC )
+         IF( I.LT.MINMN ) THEN
+            IF( KU.EQ.0 .AND. KL.EQ.0 ) THEN
+               E( I ) = ZERO
+               T = AB( 1, I+1 )
+            ELSE
+               IF( KU.EQ.0 ) THEN
+                  T = AB( 2, I )*CONJG( T )
+               ELSE
+                  T = AB( KU, I+1 )*CONJG( T )
+               END IF
+               ABST = ABS( T )
+               E( I ) = ABST
+               IF( ABST.NE.ZERO ) THEN
+                  T = T / ABST
+               ELSE
+                  T = CONE
+               END IF
+               IF( WANTPT )
+     $            CALL CSCAL( N, T, PT( I+1, 1 ), LDPT )
+               T = AB( KU+1, I+1 )*CONJG( T )
+            END IF
+         END IF
+  120 CONTINUE
+      RETURN
+*
+*     End of CGBBRD
+*
+      END
diff --git a/costa/native/external/lapack/cgbcon.f b/costa/native/external/lapack/cgbcon.f
new file mode 100644
index 000000000..4cf580f61
--- /dev/null
+++ b/costa/native/external/lapack/cgbcon.f
@@ -0,0 +1,230 @@
+      SUBROUTINE CGBCON( NORM, N, KL, KU, AB, LDAB, IPIV, ANORM, RCOND,
+     $                   WORK, RWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          NORM
+      INTEGER            INFO, KL, KU, LDAB, N
+      REAL               ANORM, RCOND
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      REAL               RWORK( * )
+      COMPLEX            AB( LDAB, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CGBCON estimates the reciprocal of the condition number of a complex
+*  general band matrix A, in either the 1-norm or the infinity-norm,
+*  using the LU factorization computed by CGBTRF.
+*
+*  An estimate is obtained for norm(inv(A)), and the reciprocal of the
+*  condition number is computed as
+*     RCOND = 1 / ( norm(A) * norm(inv(A)) ).
+*
+*  Arguments
+*  =========
+*
+*  NORM    (input) CHARACTER*1
+*          Specifies whether the 1-norm condition number or the
+*          infinity-norm condition number is required:
+*          = '1' or 'O':  1-norm;
+*          = 'I':         Infinity-norm.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  KL      (input) INTEGER
+*          The number of subdiagonals within the band of A.  KL >= 0.
+*
+*  KU      (input) INTEGER
+*          The number of superdiagonals within the band of A.  KU >= 0.
+*
+*  AB      (input) COMPLEX array, dimension (LDAB,N)
+*          Details of the LU factorization of the band matrix A, as
+*          computed by CGBTRF.  U is stored as an upper triangular band
+*          matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, and
+*          the multipliers used during the factorization are stored in
+*          rows KL+KU+2 to 2*KL+KU+1.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= 2*KL+KU+1.
+*
+*  IPIV    (input) INTEGER array, dimension (N)
+*          The pivot indices; for 1 <= i <= N, row i of the matrix was
+*          interchanged with row IPIV(i).
+*
+*  ANORM   (input) REAL
+*          If NORM = '1' or 'O', the 1-norm of the original matrix A.
+*          If NORM = 'I', the infinity-norm of the original matrix A.
+*
+*  RCOND   (output) REAL
+*          The reciprocal of the condition number of the matrix A,
+*          computed as RCOND = 1/(norm(A) * norm(inv(A))).
+*
+*  WORK    (workspace) COMPLEX array, dimension (2*N)
+*
+*  RWORK   (workspace) REAL array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LNOTI, ONENRM
+      CHARACTER          NORMIN
+      INTEGER            IX, J, JP, KASE, KASE1, KD, LM
+      REAL               AINVNM, SCALE, SMLNUM
+      COMPLEX            T, ZDUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ICAMAX
+      REAL               SLAMCH
+      COMPLEX            CDOTC
+      EXTERNAL           LSAME, ICAMAX, SLAMCH, CDOTC
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CAXPY, CLACON, CLATBS, CSRSCL, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, AIMAG, MIN, REAL
+*     ..
+*     .. Statement Functions ..
+      REAL               CABS1
+*     ..
+*     .. Statement Function definitions ..
+      CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      ONENRM = NORM.EQ.'1' .OR. LSAME( NORM, 'O' )
+      IF( .NOT.ONENRM .AND. .NOT.LSAME( NORM, 'I' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( KL.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( KU.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDAB.LT.2*KL+KU+1 ) THEN
+         INFO = -6
+      ELSE IF( ANORM.LT.ZERO ) THEN
+         INFO = -8
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CGBCON', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      RCOND = ZERO
+      IF( N.EQ.0 ) THEN
+         RCOND = ONE
+         RETURN
+      ELSE IF( ANORM.EQ.ZERO ) THEN
+         RETURN
+      END IF
+*
+      SMLNUM = SLAMCH( 'Safe minimum' )
+*
+*     Estimate the norm of inv(A).
+*
+      AINVNM = ZERO
+      NORMIN = 'N'
+      IF( ONENRM ) THEN
+         KASE1 = 1
+      ELSE
+         KASE1 = 2
+      END IF
+      KD = KL + KU + 1
+      LNOTI = KL.GT.0
+      KASE = 0
+   10 CONTINUE
+      CALL CLACON( N, WORK( N+1 ), WORK, AINVNM, KASE )
+      IF( KASE.NE.0 ) THEN
+         IF( KASE.EQ.KASE1 ) THEN
+*
+*           Multiply by inv(L).
+*
+            IF( LNOTI ) THEN
+               DO 20 J = 1, N - 1
+                  LM = MIN( KL, N-J )
+                  JP = IPIV( J )
+                  T = WORK( JP )
+                  IF( JP.NE.J ) THEN
+                     WORK( JP ) = WORK( J )
+                     WORK( J ) = T
+                  END IF
+                  CALL CAXPY( LM, -T, AB( KD+1, J ), 1, WORK( J+1 ), 1 )
+   20          CONTINUE
+            END IF
+*
+*           Multiply by inv(U).
+*
+            CALL CLATBS( 'Upper', 'No transpose', 'Non-unit', NORMIN, N,
+     $                   KL+KU, AB, LDAB, WORK, SCALE, RWORK, INFO )
+         ELSE
+*
+*           Multiply by inv(U').
+*
+            CALL CLATBS( 'Upper', 'Conjugate transpose', 'Non-unit',
+     $                   NORMIN, N, KL+KU, AB, LDAB, WORK, SCALE, RWORK,
+     $                   INFO )
+*
+*           Multiply by inv(L').
+*
+            IF( LNOTI ) THEN
+               DO 30 J = N - 1, 1, -1
+                  LM = MIN( KL, N-J )
+                  WORK( J ) = WORK( J ) - CDOTC( LM, AB( KD+1, J ), 1,
+     $                        WORK( J+1 ), 1 )
+                  JP = IPIV( J )
+                  IF( JP.NE.J ) THEN
+                     T = WORK( JP )
+                     WORK( JP ) = WORK( J )
+                     WORK( J ) = T
+                  END IF
+   30          CONTINUE
+            END IF
+         END IF
+*
+*        Divide X by 1/SCALE if doing so will not cause overflow.
+*
+         NORMIN = 'Y'
+         IF( SCALE.NE.ONE ) THEN
+            IX = ICAMAX( N, WORK, 1 )
+            IF( SCALE.LT.CABS1( WORK( IX ) )*SMLNUM .OR. SCALE.EQ.ZERO )
+     $         GO TO 40
+            CALL CSRSCL( N, SCALE, WORK, 1 )
+         END IF
+         GO TO 10
+      END IF
+*
+*     Compute the estimate of the reciprocal condition number.
+*
+      IF( AINVNM.NE.ZERO )
+     $   RCOND = ( ONE / AINVNM ) / ANORM
+*
+   40 CONTINUE
+      RETURN
+*
+*     End of CGBCON
+*
+      END
diff --git a/costa/native/external/lapack/cgbequ.f b/costa/native/external/lapack/cgbequ.f
new file mode 100644
index 000000000..bb4de99fd
--- /dev/null
+++ b/costa/native/external/lapack/cgbequ.f
@@ -0,0 +1,248 @@
+      SUBROUTINE CGBEQU( M, N, KL, KU, AB, LDAB, R, C, ROWCND, COLCND,
+     $                   AMAX, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1992
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, KL, KU, LDAB, M, N
+      REAL               AMAX, COLCND, ROWCND
+*     ..
+*     .. Array Arguments ..
+      REAL               C( * ), R( * )
+      COMPLEX            AB( LDAB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CGBEQU computes row and column scalings intended to equilibrate an
+*  M-by-N band matrix A and reduce its condition number.  R returns the
+*  row scale factors and C the column scale factors, chosen to try to
+*  make the largest element in each row and column of the matrix B with
+*  elements B(i,j)=R(i)*A(i,j)*C(j) have absolute value 1.
+*
+*  R(i) and C(j) are restricted to be between SMLNUM = smallest safe
+*  number and BIGNUM = largest safe number.  Use of these scaling
+*  factors is not guaranteed to reduce the condition number of A but
+*  works well in practice.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  KL      (input) INTEGER
+*          The number of subdiagonals within the band of A.  KL >= 0.
+*
+*  KU      (input) INTEGER
+*          The number of superdiagonals within the band of A.  KU >= 0.
+*
+*  AB      (input) COMPLEX array, dimension (LDAB,N)
+*          The band matrix A, stored in rows 1 to KL+KU+1.  The j-th
+*          column of A is stored in the j-th column of the array AB as
+*          follows:
+*          AB(ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl).
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KL+KU+1.
+*
+*  R       (output) REAL array, dimension (M)
+*          If INFO = 0, or INFO > M, R contains the row scale factors
+*          for A.
+*
+*  C       (output) REAL array, dimension (N)
+*          If INFO = 0, C contains the column scale factors for A.
+*
+*  ROWCND  (output) REAL
+*          If INFO = 0 or INFO > M, ROWCND contains the ratio of the
+*          smallest R(i) to the largest R(i).  If ROWCND >= 0.1 and
+*          AMAX is neither too large nor too small, it is not worth
+*          scaling by R.
+*
+*  COLCND  (output) REAL
+*          If INFO = 0, COLCND contains the ratio of the smallest
+*          C(i) to the largest C(i).  If COLCND >= 0.1, it is not
+*          worth scaling by C.
+*
+*  AMAX    (output) REAL
+*          Absolute value of largest matrix element.  If AMAX is very
+*          close to overflow or very close to underflow, the matrix
+*          should be scaled.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, and i is
+*                <= M:  the i-th row of A is exactly zero
+*                >  M:  the (i-M)-th column of A is exactly zero
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J, KD
+      REAL               BIGNUM, RCMAX, RCMIN, SMLNUM
+      COMPLEX            ZDUM
+*     ..
+*     .. External Functions ..
+      REAL               SLAMCH
+      EXTERNAL           SLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, AIMAG, MAX, MIN, REAL
+*     ..
+*     .. Statement Functions ..
+      REAL               CABS1
+*     ..
+*     .. Statement Function definitions ..
+      CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters
+*
+      INFO = 0
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( KL.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( KU.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDAB.LT.KL+KU+1 ) THEN
+         INFO = -6
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CGBEQU', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 ) THEN
+         ROWCND = ONE
+         COLCND = ONE
+         AMAX = ZERO
+         RETURN
+      END IF
+*
+*     Get machine constants.
+*
+      SMLNUM = SLAMCH( 'S' )
+      BIGNUM = ONE / SMLNUM
+*
+*     Compute row scale factors.
+*
+      DO 10 I = 1, M
+         R( I ) = ZERO
+   10 CONTINUE
+*
+*     Find the maximum element in each row.
+*
+      KD = KU + 1
+      DO 30 J = 1, N
+         DO 20 I = MAX( J-KU, 1 ), MIN( J+KL, M )
+            R( I ) = MAX( R( I ), CABS1( AB( KD+I-J, J ) ) )
+   20    CONTINUE
+   30 CONTINUE
+*
+*     Find the maximum and minimum scale factors.
+*
+      RCMIN = BIGNUM
+      RCMAX = ZERO
+      DO 40 I = 1, M
+         RCMAX = MAX( RCMAX, R( I ) )
+         RCMIN = MIN( RCMIN, R( I ) )
+   40 CONTINUE
+      AMAX = RCMAX
+*
+      IF( RCMIN.EQ.ZERO ) THEN
+*
+*        Find the first zero scale factor and return an error code.
+*
+         DO 50 I = 1, M
+            IF( R( I ).EQ.ZERO ) THEN
+               INFO = I
+               RETURN
+            END IF
+   50    CONTINUE
+      ELSE
+*
+*        Invert the scale factors.
+*
+         DO 60 I = 1, M
+            R( I ) = ONE / MIN( MAX( R( I ), SMLNUM ), BIGNUM )
+   60    CONTINUE
+*
+*        Compute ROWCND = min(R(I)) / max(R(I))
+*
+         ROWCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM )
+      END IF
+*
+*     Compute column scale factors
+*
+      DO 70 J = 1, N
+         C( J ) = ZERO
+   70 CONTINUE
+*
+*     Find the maximum element in each column,
+*     assuming the row scaling computed above.
+*
+      KD = KU + 1
+      DO 90 J = 1, N
+         DO 80 I = MAX( J-KU, 1 ), MIN( J+KL, M )
+            C( J ) = MAX( C( J ), CABS1( AB( KD+I-J, J ) )*R( I ) )
+   80    CONTINUE
+   90 CONTINUE
+*
+*     Find the maximum and minimum scale factors.
+*
+      RCMIN = BIGNUM
+      RCMAX = ZERO
+      DO 100 J = 1, N
+         RCMIN = MIN( RCMIN, C( J ) )
+         RCMAX = MAX( RCMAX, C( J ) )
+  100 CONTINUE
+*
+      IF( RCMIN.EQ.ZERO ) THEN
+*
+*        Find the first zero scale factor and return an error code.
+*
+         DO 110 J = 1, N
+            IF( C( J ).EQ.ZERO ) THEN
+               INFO = M + J
+               RETURN
+            END IF
+  110    CONTINUE
+      ELSE
+*
+*        Invert the scale factors.
+*
+         DO 120 J = 1, N
+            C( J ) = ONE / MIN( MAX( C( J ), SMLNUM ), BIGNUM )
+  120    CONTINUE
+*
+*        Compute COLCND = min(C(J)) / max(C(J))
+*
+         COLCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM )
+      END IF
+*
+      RETURN
+*
+*     End of CGBEQU
+*
+      END
diff --git a/costa/native/external/lapack/cgbrfs.f b/costa/native/external/lapack/cgbrfs.f
new file mode 100644
index 000000000..b2137a348
--- /dev/null
+++ b/costa/native/external/lapack/cgbrfs.f
@@ -0,0 +1,361 @@
+      SUBROUTINE CGBRFS( TRANS, N, KL, KU, NRHS, AB, LDAB, AFB, LDAFB,
+     $                   IPIV, B, LDB, X, LDX, FERR, BERR, WORK, RWORK,
+     $                   INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          TRANS
+      INTEGER            INFO, KL, KU, LDAB, LDAFB, LDB, LDX, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      REAL               BERR( * ), FERR( * ), RWORK( * )
+      COMPLEX            AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ),
+     $                   WORK( * ), X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CGBRFS improves the computed solution to a system of linear
+*  equations when the coefficient matrix is banded, and provides
+*  error bounds and backward error estimates for the solution.
+*
+*  Arguments
+*  =========
+*
+*  TRANS   (input) CHARACTER*1
+*          Specifies the form of the system of equations:
+*          = 'N':  A * X = B     (No transpose)
+*          = 'T':  A**T * X = B  (Transpose)
+*          = 'C':  A**H * X = B  (Conjugate transpose)
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  KL      (input) INTEGER
+*          The number of subdiagonals within the band of A.  KL >= 0.
+*
+*  KU      (input) INTEGER
+*          The number of superdiagonals within the band of A.  KU >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrices B and X.  NRHS >= 0.
+*
+*  AB      (input) COMPLEX array, dimension (LDAB,N)
+*          The original band matrix A, stored in rows 1 to KL+KU+1.
+*          The j-th column of A is stored in the j-th column of the
+*          array AB as follows:
+*          AB(ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(n,j+kl).
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KL+KU+1.
+*
+*  AFB     (input) COMPLEX array, dimension (LDAFB,N)
+*          Details of the LU factorization of the band matrix A, as
+*          computed by CGBTRF.  U is stored as an upper triangular band
+*          matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, and
+*          the multipliers used during the factorization are stored in
+*          rows KL+KU+2 to 2*KL+KU+1.
+*
+*  LDAFB   (input) INTEGER
+*          The leading dimension of the array AFB.  LDAFB >= 2*KL*KU+1.
+*
+*  IPIV    (input) INTEGER array, dimension (N)
+*          The pivot indices from CGBTRF; for 1<=i<=N, row i of the
+*          matrix was interchanged with row IPIV(i).
+*
+*  B       (input) COMPLEX array, dimension (LDB,NRHS)
+*          The right hand side matrix B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  X       (input/output) COMPLEX array, dimension (LDX,NRHS)
+*          On entry, the solution matrix X, as computed by CGBTRS.
+*          On exit, the improved solution matrix X.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(1,N).
+*
+*  FERR    (output) REAL array, dimension (NRHS)
+*          The estimated forward error bound for each solution vector
+*          X(j) (the j-th column of the solution matrix X).
+*          If XTRUE is the true solution corresponding to X(j), FERR(j)
+*          is an estimated upper bound for the magnitude of the largest
+*          element in (X(j) - XTRUE) divided by the magnitude of the
+*          largest element in X(j).  The estimate is as reliable as
+*          the estimate for RCOND, and is almost always a slight
+*          overestimate of the true error.
+*
+*  BERR    (output) REAL array, dimension (NRHS)
+*          The componentwise relative backward error of each solution
+*          vector X(j) (i.e., the smallest relative change in
+*          any element of A or B that makes X(j) an exact solution).
+*
+*  WORK    (workspace) COMPLEX array, dimension (2*N)
+*
+*  RWORK   (workspace) REAL array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Internal Parameters
+*  ===================
+*
+*  ITMAX is the maximum number of steps of iterative refinement.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            ITMAX
+      PARAMETER          ( ITMAX = 5 )
+      REAL               ZERO
+      PARAMETER          ( ZERO = 0.0E+0 )
+      COMPLEX            CONE
+      PARAMETER          ( CONE = ( 1.0E+0, 0.0E+0 ) )
+      REAL               TWO
+      PARAMETER          ( TWO = 2.0E+0 )
+      REAL               THREE
+      PARAMETER          ( THREE = 3.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOTRAN
+      CHARACTER          TRANSN, TRANST
+      INTEGER            COUNT, I, J, K, KASE, KK, NZ
+      REAL               EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK
+      COMPLEX            ZDUM
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CAXPY, CCOPY, CGBMV, CGBTRS, CLACON, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, AIMAG, MAX, MIN, REAL
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               SLAMCH
+      EXTERNAL           LSAME, SLAMCH
+*     ..
+*     .. Statement Functions ..
+      REAL               CABS1
+*     ..
+*     .. Statement Function definitions ..
+      CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      NOTRAN = LSAME( TRANS, 'N' )
+      IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT.
+     $    LSAME( TRANS, 'C' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( KL.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( KU.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( LDAB.LT.KL+KU+1 ) THEN
+         INFO = -7
+      ELSE IF( LDAFB.LT.2*KL+KU+1 ) THEN
+         INFO = -9
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -12
+      ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+         INFO = -14
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CGBRFS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN
+         DO 10 J = 1, NRHS
+            FERR( J ) = ZERO
+            BERR( J ) = ZERO
+   10    CONTINUE
+         RETURN
+      END IF
+*
+      IF( NOTRAN ) THEN
+         TRANSN = 'N'
+         TRANST = 'C'
+      ELSE
+         TRANSN = 'C'
+         TRANST = 'N'
+      END IF
+*
+*     NZ = maximum number of nonzero elements in each row of A, plus 1
+*
+      NZ = MIN( KL+KU+2, N+1 )
+      EPS = SLAMCH( 'Epsilon' )
+      SAFMIN = SLAMCH( 'Safe minimum' )
+      SAFE1 = NZ*SAFMIN
+      SAFE2 = SAFE1 / EPS
+*
+*     Do for each right hand side
+*
+      DO 140 J = 1, NRHS
+*
+         COUNT = 1
+         LSTRES = THREE
+   20    CONTINUE
+*
+*        Loop until stopping criterion is satisfied.
+*
+*        Compute residual R = B - op(A) * X,
+*        where op(A) = A, A**T, or A**H, depending on TRANS.
+*
+         CALL CCOPY( N, B( 1, J ), 1, WORK, 1 )
+         CALL CGBMV( TRANS, N, N, KL, KU, -CONE, AB, LDAB, X( 1, J ), 1,
+     $               CONE, WORK, 1 )
+*
+*        Compute componentwise relative backward error from formula
+*
+*        max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) )
+*
+*        where abs(Z) is the componentwise absolute value of the matrix
+*        or vector Z.  If the i-th component of the denominator is less
+*        than SAFE2, then SAFE1 is added to the i-th components of the
+*        numerator and denominator before dividing.
+*
+         DO 30 I = 1, N
+            RWORK( I ) = CABS1( B( I, J ) )
+   30    CONTINUE
+*
+*        Compute abs(op(A))*abs(X) + abs(B).
+*
+         IF( NOTRAN ) THEN
+            DO 50 K = 1, N
+               KK = KU + 1 - K
+               XK = CABS1( X( K, J ) )
+               DO 40 I = MAX( 1, K-KU ), MIN( N, K+KL )
+                  RWORK( I ) = RWORK( I ) + CABS1( AB( KK+I, K ) )*XK
+   40          CONTINUE
+   50       CONTINUE
+         ELSE
+            DO 70 K = 1, N
+               S = ZERO
+               KK = KU + 1 - K
+               DO 60 I = MAX( 1, K-KU ), MIN( N, K+KL )
+                  S = S + CABS1( AB( KK+I, K ) )*CABS1( X( I, J ) )
+   60          CONTINUE
+               RWORK( K ) = RWORK( K ) + S
+   70       CONTINUE
+         END IF
+         S = ZERO
+         DO 80 I = 1, N
+            IF( RWORK( I ).GT.SAFE2 ) THEN
+               S = MAX( S, CABS1( WORK( I ) ) / RWORK( I ) )
+            ELSE
+               S = MAX( S, ( CABS1( WORK( I ) )+SAFE1 ) /
+     $             ( RWORK( I )+SAFE1 ) )
+            END IF
+   80    CONTINUE
+         BERR( J ) = S
+*
+*        Test stopping criterion. Continue iterating if
+*           1) The residual BERR(J) is larger than machine epsilon, and
+*           2) BERR(J) decreased by at least a factor of 2 during the
+*              last iteration, and
+*           3) At most ITMAX iterations tried.
+*
+         IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND.
+     $       COUNT.LE.ITMAX ) THEN
+*
+*           Update solution and try again.
+*
+            CALL CGBTRS( TRANS, N, KL, KU, 1, AFB, LDAFB, IPIV, WORK, N,
+     $                   INFO )
+            CALL CAXPY( N, CONE, WORK, 1, X( 1, J ), 1 )
+            LSTRES = BERR( J )
+            COUNT = COUNT + 1
+            GO TO 20
+         END IF
+*
+*        Bound error from formula
+*
+*        norm(X - XTRUE) / norm(X) .le. FERR =
+*        norm( abs(inv(op(A)))*
+*           ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X)
+*
+*        where
+*          norm(Z) is the magnitude of the largest component of Z
+*          inv(op(A)) is the inverse of op(A)
+*          abs(Z) is the componentwise absolute value of the matrix or
+*             vector Z
+*          NZ is the maximum number of nonzeros in any row of A, plus 1
+*          EPS is machine epsilon
+*
+*        The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B))
+*        is incremented by SAFE1 if the i-th component of
+*        abs(op(A))*abs(X) + abs(B) is less than SAFE2.
+*
+*        Use CLACON to estimate the infinity-norm of the matrix
+*           inv(op(A)) * diag(W),
+*        where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) )))
+*
+         DO 90 I = 1, N
+            IF( RWORK( I ).GT.SAFE2 ) THEN
+               RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I )
+            ELSE
+               RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) +
+     $                      SAFE1
+            END IF
+   90    CONTINUE
+*
+         KASE = 0
+  100    CONTINUE
+         CALL CLACON( N, WORK( N+1 ), WORK, FERR( J ), KASE )
+         IF( KASE.NE.0 ) THEN
+            IF( KASE.EQ.1 ) THEN
+*
+*              Multiply by diag(W)*inv(op(A)**H).
+*
+               CALL CGBTRS( TRANST, N, KL, KU, 1, AFB, LDAFB, IPIV,
+     $                      WORK, N, INFO )
+               DO 110 I = 1, N
+                  WORK( I ) = RWORK( I )*WORK( I )
+  110          CONTINUE
+            ELSE
+*
+*              Multiply by inv(op(A))*diag(W).
+*
+               DO 120 I = 1, N
+                  WORK( I ) = RWORK( I )*WORK( I )
+  120          CONTINUE
+               CALL CGBTRS( TRANSN, N, KL, KU, 1, AFB, LDAFB, IPIV,
+     $                      WORK, N, INFO )
+            END IF
+            GO TO 100
+         END IF
+*
+*        Normalize error.
+*
+         LSTRES = ZERO
+         DO 130 I = 1, N
+            LSTRES = MAX( LSTRES, CABS1( X( I, J ) ) )
+  130    CONTINUE
+         IF( LSTRES.NE.ZERO )
+     $      FERR( J ) = FERR( J ) / LSTRES
+*
+  140 CONTINUE
+*
+      RETURN
+*
+*     End of CGBRFS
+*
+      END
diff --git a/costa/native/external/lapack/cgbsv.f b/costa/native/external/lapack/cgbsv.f
new file mode 100644
index 000000000..ad5630ed5
--- /dev/null
+++ b/costa/native/external/lapack/cgbsv.f
@@ -0,0 +1,143 @@
+      SUBROUTINE CGBSV( N, KL, KU, NRHS, AB, LDAB, IPIV, B, LDB, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, KL, KU, LDAB, LDB, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      COMPLEX            AB( LDAB, * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CGBSV computes the solution to a complex system of linear equations
+*  A * X = B, where A is a band matrix of order N with KL subdiagonals
+*  and KU superdiagonals, and X and B are N-by-NRHS matrices.
+*
+*  The LU decomposition with partial pivoting and row interchanges is
+*  used to factor A as A = L * U, where L is a product of permutation
+*  and unit lower triangular matrices with KL subdiagonals, and U is
+*  upper triangular with KL+KU superdiagonals.  The factored form of A
+*  is then used to solve the system of equations A * X = B.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The number of linear equations, i.e., the order of the
+*          matrix A.  N >= 0.
+*
+*  KL      (input) INTEGER
+*          The number of subdiagonals within the band of A.  KL >= 0.
+*
+*  KU      (input) INTEGER
+*          The number of superdiagonals within the band of A.  KU >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  AB      (input/output) COMPLEX array, dimension (LDAB,N)
+*          On entry, the matrix A in band storage, in rows KL+1 to
+*          2*KL+KU+1; rows 1 to KL of the array need not be set.
+*          The j-th column of A is stored in the j-th column of the
+*          array AB as follows:
+*          AB(KL+KU+1+i-j,j) = A(i,j) for max(1,j-KU)<=i<=min(N,j+KL)
+*          On exit, details of the factorization: U is stored as an
+*          upper triangular band matrix with KL+KU superdiagonals in
+*          rows 1 to KL+KU+1, and the multipliers used during the
+*          factorization are stored in rows KL+KU+2 to 2*KL+KU+1.
+*          See below for further details.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= 2*KL+KU+1.
+*
+*  IPIV    (output) INTEGER array, dimension (N)
+*          The pivot indices that define the permutation matrix P;
+*          row i of the matrix was interchanged with row IPIV(i).
+*
+*  B       (input/output) COMPLEX array, dimension (LDB,NRHS)
+*          On entry, the N-by-NRHS right hand side matrix B.
+*          On exit, if INFO = 0, the N-by-NRHS solution matrix X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, U(i,i) is exactly zero.  The factorization
+*                has been completed, but the factor U is exactly
+*                singular, and the solution has not been computed.
+*
+*  Further Details
+*  ===============
+*
+*  The band storage scheme is illustrated by the following example, when
+*  M = N = 6, KL = 2, KU = 1:
+*
+*  On entry:                       On exit:
+*
+*      *    *    *    +    +    +       *    *    *   u14  u25  u36
+*      *    *    +    +    +    +       *    *   u13  u24  u35  u46
+*      *   a12  a23  a34  a45  a56      *   u12  u23  u34  u45  u56
+*     a11  a22  a33  a44  a55  a66     u11  u22  u33  u44  u55  u66
+*     a21  a32  a43  a54  a65   *      m21  m32  m43  m54  m65   *
+*     a31  a42  a53  a64   *    *      m31  m42  m53  m64   *    *
+*
+*  Array elements marked * are not used by the routine; elements marked
+*  + need not be set on entry, but are required by the routine to store
+*  elements of U because of fill-in resulting from the row interchanges.
+*
+*  =====================================================================
+*
+*     .. External Subroutines ..
+      EXTERNAL           CGBTRF, CGBTRS, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( N.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( KL.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( KU.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDAB.LT.2*KL+KU+1 ) THEN
+         INFO = -6
+      ELSE IF( LDB.LT.MAX( N, 1 ) ) THEN
+         INFO = -9
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CGBSV ', -INFO )
+         RETURN
+      END IF
+*
+*     Compute the LU factorization of the band matrix A.
+*
+      CALL CGBTRF( N, N, KL, KU, AB, LDAB, IPIV, INFO )
+      IF( INFO.EQ.0 ) THEN
+*
+*        Solve the system A*X = B, overwriting B with X.
+*
+         CALL CGBTRS( 'No transpose', N, KL, KU, NRHS, AB, LDAB, IPIV,
+     $                B, LDB, INFO )
+      END IF
+      RETURN
+*
+*     End of CGBSV
+*
+      END
diff --git a/costa/native/external/lapack/cgbsvx.f b/costa/native/external/lapack/cgbsvx.f
new file mode 100644
index 000000000..78289c60d
--- /dev/null
+++ b/costa/native/external/lapack/cgbsvx.f
@@ -0,0 +1,518 @@
+      SUBROUTINE CGBSVX( FACT, TRANS, N, KL, KU, NRHS, AB, LDAB, AFB,
+     $                   LDAFB, IPIV, EQUED, R, C, B, LDB, X, LDX,
+     $                   RCOND, FERR, BERR, WORK, RWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          EQUED, FACT, TRANS
+      INTEGER            INFO, KL, KU, LDAB, LDAFB, LDB, LDX, N, NRHS
+      REAL               RCOND
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      REAL               BERR( * ), C( * ), FERR( * ), R( * ),
+     $                   RWORK( * )
+      COMPLEX            AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ),
+     $                   WORK( * ), X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CGBSVX uses the LU factorization to compute the solution to a complex
+*  system of linear equations A * X = B, A**T * X = B, or A**H * X = B,
+*  where A is a band matrix of order N with KL subdiagonals and KU
+*  superdiagonals, and X and B are N-by-NRHS matrices.
+*
+*  Error bounds on the solution and a condition estimate are also
+*  provided.
+*
+*  Description
+*  ===========
+*
+*  The following steps are performed by this subroutine:
+*
+*  1. If FACT = 'E', real scaling factors are computed to equilibrate
+*     the system:
+*        TRANS = 'N':  diag(R)*A*diag(C)     *inv(diag(C))*X = diag(R)*B
+*        TRANS = 'T': (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B
+*        TRANS = 'C': (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B
+*     Whether or not the system will be equilibrated depends on the
+*     scaling of the matrix A, but if equilibration is used, A is
+*     overwritten by diag(R)*A*diag(C) and B by diag(R)*B (if TRANS='N')
+*     or diag(C)*B (if TRANS = 'T' or 'C').
+*
+*  2. If FACT = 'N' or 'E', the LU decomposition is used to factor the
+*     matrix A (after equilibration if FACT = 'E') as
+*        A = L * U,
+*     where L is a product of permutation and unit lower triangular
+*     matrices with KL subdiagonals, and U is upper triangular with
+*     KL+KU superdiagonals.
+*
+*  3. If some U(i,i)=0, so that U is exactly singular, then the routine
+*     returns with INFO = i. Otherwise, the factored form of A is used
+*     to estimate the condition number of the matrix A.  If the
+*     reciprocal of the condition number is less than machine precision,
+*     INFO = N+1 is returned as a warning, but the routine still goes on
+*     to solve for X and compute error bounds as described below.
+*
+*  4. The system of equations is solved for X using the factored form
+*     of A.
+*
+*  5. Iterative refinement is applied to improve the computed solution
+*     matrix and calculate error bounds and backward error estimates
+*     for it.
+*
+*  6. If equilibration was used, the matrix X is premultiplied by
+*     diag(C) (if TRANS = 'N') or diag(R) (if TRANS = 'T' or 'C') so
+*     that it solves the original system before equilibration.
+*
+*  Arguments
+*  =========
+*
+*  FACT    (input) CHARACTER*1
+*          Specifies whether or not the factored form of the matrix A is
+*          supplied on entry, and if not, whether the matrix A should be
+*          equilibrated before it is factored.
+*          = 'F':  On entry, AFB and IPIV contain the factored form of
+*                  A.  If EQUED is not 'N', the matrix A has been
+*                  equilibrated with scaling factors given by R and C.
+*                  AB, AFB, and IPIV are not modified.
+*          = 'N':  The matrix A will be copied to AFB and factored.
+*          = 'E':  The matrix A will be equilibrated if necessary, then
+*                  copied to AFB and factored.
+*
+*  TRANS   (input) CHARACTER*1
+*          Specifies the form of the system of equations.
+*          = 'N':  A * X = B     (No transpose)
+*          = 'T':  A**T * X = B  (Transpose)
+*          = 'C':  A**H * X = B  (Conjugate transpose)
+*
+*  N       (input) INTEGER
+*          The number of linear equations, i.e., the order of the
+*          matrix A.  N >= 0.
+*
+*  KL      (input) INTEGER
+*          The number of subdiagonals within the band of A.  KL >= 0.
+*
+*  KU      (input) INTEGER
+*          The number of superdiagonals within the band of A.  KU >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrices B and X.  NRHS >= 0.
+*
+*  AB      (input/output) COMPLEX array, dimension (LDAB,N)
+*          On entry, the matrix A in band storage, in rows 1 to KL+KU+1.
+*          The j-th column of A is stored in the j-th column of the
+*          array AB as follows:
+*          AB(KU+1+i-j,j) = A(i,j) for max(1,j-KU)<=i<=min(N,j+kl)
+*
+*          If FACT = 'F' and EQUED is not 'N', then A must have been
+*          equilibrated by the scaling factors in R and/or C.  AB is not
+*          modified if FACT = 'F' or 'N', or if FACT = 'E' and
+*          EQUED = 'N' on exit.
+*
+*          On exit, if EQUED .ne. 'N', A is scaled as follows:
+*          EQUED = 'R':  A := diag(R) * A
+*          EQUED = 'C':  A := A * diag(C)
+*          EQUED = 'B':  A := diag(R) * A * diag(C).
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KL+KU+1.
+*
+*  AFB     (input or output) COMPLEX array, dimension (LDAFB,N)
+*          If FACT = 'F', then AFB is an input argument and on entry
+*          contains details of the LU factorization of the band matrix
+*          A, as computed by CGBTRF.  U is stored as an upper triangular
+*          band matrix with KL+KU superdiagonals in rows 1 to KL+KU+1,
+*          and the multipliers used during the factorization are stored
+*          in rows KL+KU+2 to 2*KL+KU+1.  If EQUED .ne. 'N', then AFB is
+*          the factored form of the equilibrated matrix A.
+*
+*          If FACT = 'N', then AFB is an output argument and on exit
+*          returns details of the LU factorization of A.
+*
+*          If FACT = 'E', then AFB is an output argument and on exit
+*          returns details of the LU factorization of the equilibrated
+*          matrix A (see the description of AB for the form of the
+*          equilibrated matrix).
+*
+*  LDAFB   (input) INTEGER
+*          The leading dimension of the array AFB.  LDAFB >= 2*KL+KU+1.
+*
+*  IPIV    (input or output) INTEGER array, dimension (N)
+*          If FACT = 'F', then IPIV is an input argument and on entry
+*          contains the pivot indices from the factorization A = L*U
+*          as computed by CGBTRF; row i of the matrix was interchanged
+*          with row IPIV(i).
+*
+*          If FACT = 'N', then IPIV is an output argument and on exit
+*          contains the pivot indices from the factorization A = L*U
+*          of the original matrix A.
+*
+*          If FACT = 'E', then IPIV is an output argument and on exit
+*          contains the pivot indices from the factorization A = L*U
+*          of the equilibrated matrix A.
+*
+*  EQUED   (input or output) CHARACTER*1
+*          Specifies the form of equilibration that was done.
+*          = 'N':  No equilibration (always true if FACT = 'N').
+*          = 'R':  Row equilibration, i.e., A has been premultiplied by
+*                  diag(R).
+*          = 'C':  Column equilibration, i.e., A has been postmultiplied
+*                  by diag(C).
+*          = 'B':  Both row and column equilibration, i.e., A has been
+*                  replaced by diag(R) * A * diag(C).
+*          EQUED is an input argument if FACT = 'F'; otherwise, it is an
+*          output argument.
+*
+*  R       (input or output) REAL array, dimension (N)
+*          The row scale factors for A.  If EQUED = 'R' or 'B', A is
+*          multiplied on the left by diag(R); if EQUED = 'N' or 'C', R
+*          is not accessed.  R is an input argument if FACT = 'F';
+*          otherwise, R is an output argument.  If FACT = 'F' and
+*          EQUED = 'R' or 'B', each element of R must be positive.
+*
+*  C       (input or output) REAL array, dimension (N)
+*          The column scale factors for A.  If EQUED = 'C' or 'B', A is
+*          multiplied on the right by diag(C); if EQUED = 'N' or 'R', C
+*          is not accessed.  C is an input argument if FACT = 'F';
+*          otherwise, C is an output argument.  If FACT = 'F' and
+*          EQUED = 'C' or 'B', each element of C must be positive.
+*
+*  B       (input/output) COMPLEX array, dimension (LDB,NRHS)
+*          On entry, the right hand side matrix B.
+*          On exit,
+*          if EQUED = 'N', B is not modified;
+*          if TRANS = 'N' and EQUED = 'R' or 'B', B is overwritten by
+*          diag(R)*B;
+*          if TRANS = 'T' or 'C' and EQUED = 'C' or 'B', B is
+*          overwritten by diag(C)*B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  X       (output) COMPLEX array, dimension (LDX,NRHS)
+*          If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X
+*          to the original system of equations.  Note that A and B are
+*          modified on exit if EQUED .ne. 'N', and the solution to the
+*          equilibrated system is inv(diag(C))*X if TRANS = 'N' and
+*          EQUED = 'C' or 'B', or inv(diag(R))*X if TRANS = 'T' or 'C'
+*          and EQUED = 'R' or 'B'.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(1,N).
+*
+*  RCOND   (output) REAL
+*          The estimate of the reciprocal condition number of the matrix
+*          A after equilibration (if done).  If RCOND is less than the
+*          machine precision (in particular, if RCOND = 0), the matrix
+*          is singular to working precision.  This condition is
+*          indicated by a return code of INFO > 0.
+*
+*  FERR    (output) REAL array, dimension (NRHS)
+*          The estimated forward error bound for each solution vector
+*          X(j) (the j-th column of the solution matrix X).
+*          If XTRUE is the true solution corresponding to X(j), FERR(j)
+*          is an estimated upper bound for the magnitude of the largest
+*          element in (X(j) - XTRUE) divided by the magnitude of the
+*          largest element in X(j).  The estimate is as reliable as
+*          the estimate for RCOND, and is almost always a slight
+*          overestimate of the true error.
+*
+*  BERR    (output) REAL array, dimension (NRHS)
+*          The componentwise relative backward error of each solution
+*          vector X(j) (i.e., the smallest relative change in
+*          any element of A or B that makes X(j) an exact solution).
+*
+*  WORK    (workspace) COMPLEX array, dimension (2*N)
+*
+*  RWORK   (workspace/output) REAL array, dimension (N)
+*          On exit, RWORK(1) contains the reciprocal pivot growth
+*          factor norm(A)/norm(U). The "max absolute element" norm is
+*          used. If RWORK(1) is much less than 1, then the stability
+*          of the LU factorization of the (equilibrated) matrix A
+*          could be poor. This also means that the solution X, condition
+*          estimator RCOND, and forward error bound FERR could be
+*          unreliable. If factorization fails with 0<INFO<=N, then
+*          RWORK(1) contains the reciprocal pivot growth factor for the
+*          leading INFO columns of A.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, and i is
+*                <= N:  U(i,i) is exactly zero.  The factorization
+*                       has been completed, but the factor U is exactly
+*                       singular, so the solution and error bounds
+*                       could not be computed. RCOND = 0 is returned.
+*                = N+1: U is nonsingular, but RCOND is less than machine
+*                       precision, meaning that the matrix is singular
+*                       to working precision.  Nevertheless, the
+*                       solution and error bounds are computed because
+*                       there are a number of situations where the
+*                       computed solution can be more accurate than the
+*                       value of RCOND would suggest.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            COLEQU, EQUIL, NOFACT, NOTRAN, ROWEQU
+      CHARACTER          NORM
+      INTEGER            I, INFEQU, J, J1, J2
+      REAL               AMAX, ANORM, BIGNUM, COLCND, RCMAX, RCMIN,
+     $                   ROWCND, RPVGRW, SMLNUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               CLANGB, CLANTB, SLAMCH
+      EXTERNAL           LSAME, CLANGB, CLANTB, SLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CCOPY, CGBCON, CGBEQU, CGBRFS, CGBTRF, CGBTRS,
+     $                   CLACPY, CLAQGB, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+      NOFACT = LSAME( FACT, 'N' )
+      EQUIL = LSAME( FACT, 'E' )
+      NOTRAN = LSAME( TRANS, 'N' )
+      IF( NOFACT .OR. EQUIL ) THEN
+         EQUED = 'N'
+         ROWEQU = .FALSE.
+         COLEQU = .FALSE.
+      ELSE
+         ROWEQU = LSAME( EQUED, 'R' ) .OR. LSAME( EQUED, 'B' )
+         COLEQU = LSAME( EQUED, 'C' ) .OR. LSAME( EQUED, 'B' )
+         SMLNUM = SLAMCH( 'Safe minimum' )
+         BIGNUM = ONE / SMLNUM
+      END IF
+*
+*     Test the input parameters.
+*
+      IF( .NOT.NOFACT .AND. .NOT.EQUIL .AND. .NOT.LSAME( FACT, 'F' ) )
+     $     THEN
+         INFO = -1
+      ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT.
+     $         LSAME( TRANS, 'C' ) ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( KL.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( KU.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -6
+      ELSE IF( LDAB.LT.KL+KU+1 ) THEN
+         INFO = -8
+      ELSE IF( LDAFB.LT.2*KL+KU+1 ) THEN
+         INFO = -10
+      ELSE IF( LSAME( FACT, 'F' ) .AND. .NOT.
+     $         ( ROWEQU .OR. COLEQU .OR. LSAME( EQUED, 'N' ) ) ) THEN
+         INFO = -12
+      ELSE
+         IF( ROWEQU ) THEN
+            RCMIN = BIGNUM
+            RCMAX = ZERO
+            DO 10 J = 1, N
+               RCMIN = MIN( RCMIN, R( J ) )
+               RCMAX = MAX( RCMAX, R( J ) )
+   10       CONTINUE
+            IF( RCMIN.LE.ZERO ) THEN
+               INFO = -13
+            ELSE IF( N.GT.0 ) THEN
+               ROWCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM )
+            ELSE
+               ROWCND = ONE
+            END IF
+         END IF
+         IF( COLEQU .AND. INFO.EQ.0 ) THEN
+            RCMIN = BIGNUM
+            RCMAX = ZERO
+            DO 20 J = 1, N
+               RCMIN = MIN( RCMIN, C( J ) )
+               RCMAX = MAX( RCMAX, C( J ) )
+   20       CONTINUE
+            IF( RCMIN.LE.ZERO ) THEN
+               INFO = -14
+            ELSE IF( N.GT.0 ) THEN
+               COLCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM )
+            ELSE
+               COLCND = ONE
+            END IF
+         END IF
+         IF( INFO.EQ.0 ) THEN
+            IF( LDB.LT.MAX( 1, N ) ) THEN
+               INFO = -16
+            ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+               INFO = -18
+            END IF
+         END IF
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CGBSVX', -INFO )
+         RETURN
+      END IF
+*
+      IF( EQUIL ) THEN
+*
+*        Compute row and column scalings to equilibrate the matrix A.
+*
+         CALL CGBEQU( N, N, KL, KU, AB, LDAB, R, C, ROWCND, COLCND,
+     $                AMAX, INFEQU )
+         IF( INFEQU.EQ.0 ) THEN
+*
+*           Equilibrate the matrix.
+*
+            CALL CLAQGB( N, N, KL, KU, AB, LDAB, R, C, ROWCND, COLCND,
+     $                   AMAX, EQUED )
+            ROWEQU = LSAME( EQUED, 'R' ) .OR. LSAME( EQUED, 'B' )
+            COLEQU = LSAME( EQUED, 'C' ) .OR. LSAME( EQUED, 'B' )
+         END IF
+      END IF
+*
+*     Scale the right hand side.
+*
+      IF( NOTRAN ) THEN
+         IF( ROWEQU ) THEN
+            DO 40 J = 1, NRHS
+               DO 30 I = 1, N
+                  B( I, J ) = R( I )*B( I, J )
+   30          CONTINUE
+   40       CONTINUE
+         END IF
+      ELSE IF( COLEQU ) THEN
+         DO 60 J = 1, NRHS
+            DO 50 I = 1, N
+               B( I, J ) = C( I )*B( I, J )
+   50       CONTINUE
+   60    CONTINUE
+      END IF
+*
+      IF( NOFACT .OR. EQUIL ) THEN
+*
+*        Compute the LU factorization of the band matrix A.
+*
+         DO 70 J = 1, N
+            J1 = MAX( J-KU, 1 )
+            J2 = MIN( J+KL, N )
+            CALL CCOPY( J2-J1+1, AB( KU+1-J+J1, J ), 1,
+     $                  AFB( KL+KU+1-J+J1, J ), 1 )
+   70    CONTINUE
+*
+         CALL CGBTRF( N, N, KL, KU, AFB, LDAFB, IPIV, INFO )
+*
+*        Return if INFO is non-zero.
+*
+         IF( INFO.NE.0 ) THEN
+            IF( INFO.GT.0 ) THEN
+*
+*              Compute the reciprocal pivot growth factor of the
+*              leading rank-deficient INFO columns of A.
+*
+               ANORM = ZERO
+               DO 90 J = 1, INFO
+                  DO 80 I = MAX( KU+2-J, 1 ),
+     $                    MIN( N+KU+1-J, KL+KU+1 )
+                     ANORM = MAX( ANORM, ABS( AB( I, J ) ) )
+   80             CONTINUE
+   90          CONTINUE
+               RPVGRW = CLANTB( 'M', 'U', 'N', INFO,
+     $                  MIN( INFO-1, KL+KU ), AFB( MAX( 1,
+     $                  KL+KU+2-INFO ), 1 ), LDAFB, RWORK )
+               IF( RPVGRW.EQ.ZERO ) THEN
+                  RPVGRW = ONE
+               ELSE
+                  RPVGRW = ANORM / RPVGRW
+               END IF
+               RWORK( 1 ) = RPVGRW
+               RCOND = ZERO
+            END IF
+            RETURN
+         END IF
+      END IF
+*
+*     Compute the norm of the matrix A and the
+*     reciprocal pivot growth factor RPVGRW.
+*
+      IF( NOTRAN ) THEN
+         NORM = '1'
+      ELSE
+         NORM = 'I'
+      END IF
+      ANORM = CLANGB( NORM, N, KL, KU, AB, LDAB, RWORK )
+      RPVGRW = CLANTB( 'M', 'U', 'N', N, KL+KU, AFB, LDAFB, RWORK )
+      IF( RPVGRW.EQ.ZERO ) THEN
+         RPVGRW = ONE
+      ELSE
+         RPVGRW = CLANGB( 'M', N, KL, KU, AB, LDAB, RWORK ) / RPVGRW
+      END IF
+*
+*     Compute the reciprocal of the condition number of A.
+*
+      CALL CGBCON( NORM, N, KL, KU, AFB, LDAFB, IPIV, ANORM, RCOND,
+     $             WORK, RWORK, INFO )
+*
+*     Set INFO = N+1 if the matrix is singular to working precision.
+*
+      IF( RCOND.LT.SLAMCH( 'Epsilon' ) )
+     $   INFO = N + 1
+*
+*     Compute the solution matrix X.
+*
+      CALL CLACPY( 'Full', N, NRHS, B, LDB, X, LDX )
+      CALL CGBTRS( TRANS, N, KL, KU, NRHS, AFB, LDAFB, IPIV, X, LDX,
+     $             INFO )
+*
+*     Use iterative refinement to improve the computed solution and
+*     compute error bounds and backward error estimates for it.
+*
+      CALL CGBRFS( TRANS, N, KL, KU, NRHS, AB, LDAB, AFB, LDAFB, IPIV,
+     $             B, LDB, X, LDX, FERR, BERR, WORK, RWORK, INFO )
+*
+*     Transform the solution matrix X to a solution of the original
+*     system.
+*
+      IF( NOTRAN ) THEN
+         IF( COLEQU ) THEN
+            DO 110 J = 1, NRHS
+               DO 100 I = 1, N
+                  X( I, J ) = C( I )*X( I, J )
+  100          CONTINUE
+  110       CONTINUE
+            DO 120 J = 1, NRHS
+               FERR( J ) = FERR( J ) / COLCND
+  120       CONTINUE
+         END IF
+      ELSE IF( ROWEQU ) THEN
+         DO 140 J = 1, NRHS
+            DO 130 I = 1, N
+               X( I, J ) = R( I )*X( I, J )
+  130       CONTINUE
+  140    CONTINUE
+         DO 150 J = 1, NRHS
+            FERR( J ) = FERR( J ) / ROWCND
+  150    CONTINUE
+      END IF
+*
+      RWORK( 1 ) = RPVGRW
+      RETURN
+*
+*     End of CGBSVX
+*
+      END
diff --git a/costa/native/external/lapack/cgbtf2.f b/costa/native/external/lapack/cgbtf2.f
new file mode 100644
index 000000000..8a8b5ca95
--- /dev/null
+++ b/costa/native/external/lapack/cgbtf2.f
@@ -0,0 +1,203 @@
+      SUBROUTINE CGBTF2( M, N, KL, KU, AB, LDAB, IPIV, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, KL, KU, LDAB, M, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      COMPLEX            AB( LDAB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CGBTF2 computes an LU factorization of a complex m-by-n band matrix
+*  A using partial pivoting with row interchanges.
+*
+*  This is the unblocked version of the algorithm, calling Level 2 BLAS.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  KL      (input) INTEGER
+*          The number of subdiagonals within the band of A.  KL >= 0.
+*
+*  KU      (input) INTEGER
+*          The number of superdiagonals within the band of A.  KU >= 0.
+*
+*  AB      (input/output) COMPLEX array, dimension (LDAB,N)
+*          On entry, the matrix A in band storage, in rows KL+1 to
+*          2*KL+KU+1; rows 1 to KL of the array need not be set.
+*          The j-th column of A is stored in the j-th column of the
+*          array AB as follows:
+*          AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl)
+*
+*          On exit, details of the factorization: U is stored as an
+*          upper triangular band matrix with KL+KU superdiagonals in
+*          rows 1 to KL+KU+1, and the multipliers used during the
+*          factorization are stored in rows KL+KU+2 to 2*KL+KU+1.
+*          See below for further details.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= 2*KL+KU+1.
+*
+*  IPIV    (output) INTEGER array, dimension (min(M,N))
+*          The pivot indices; for 1 <= i <= min(M,N), row i of the
+*          matrix was interchanged with row IPIV(i).
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*          > 0: if INFO = +i, U(i,i) is exactly zero. The factorization
+*               has been completed, but the factor U is exactly
+*               singular, and division by zero will occur if it is used
+*               to solve a system of equations.
+*
+*  Further Details
+*  ===============
+*
+*  The band storage scheme is illustrated by the following example, when
+*  M = N = 6, KL = 2, KU = 1:
+*
+*  On entry:                       On exit:
+*
+*      *    *    *    +    +    +       *    *    *   u14  u25  u36
+*      *    *    +    +    +    +       *    *   u13  u24  u35  u46
+*      *   a12  a23  a34  a45  a56      *   u12  u23  u34  u45  u56
+*     a11  a22  a33  a44  a55  a66     u11  u22  u33  u44  u55  u66
+*     a21  a32  a43  a54  a65   *      m21  m32  m43  m54  m65   *
+*     a31  a42  a53  a64   *    *      m31  m42  m53  m64   *    *
+*
+*  Array elements marked * are not used by the routine; elements marked
+*  + need not be set on entry, but are required by the routine to store
+*  elements of U, because of fill-in resulting from the row
+*  interchanges.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX            ONE, ZERO
+      PARAMETER          ( ONE = ( 1.0E+0, 0.0E+0 ),
+     $                   ZERO = ( 0.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J, JP, JU, KM, KV
+*     ..
+*     .. External Functions ..
+      INTEGER            ICAMAX
+      EXTERNAL           ICAMAX
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CGERU, CSCAL, CSWAP, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     KV is the number of superdiagonals in the factor U, allowing for
+*     fill-in.
+*
+      KV = KU + KL
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( KL.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( KU.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDAB.LT.KL+KV+1 ) THEN
+         INFO = -6
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CGBTF2', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 )
+     $   RETURN
+*
+*     Gaussian elimination with partial pivoting
+*
+*     Set fill-in elements in columns KU+2 to KV to zero.
+*
+      DO 20 J = KU + 2, MIN( KV, N )
+         DO 10 I = KV - J + 2, KL
+            AB( I, J ) = ZERO
+   10    CONTINUE
+   20 CONTINUE
+*
+*     JU is the index of the last column affected by the current stage
+*     of the factorization.
+*
+      JU = 1
+*
+      DO 40 J = 1, MIN( M, N )
+*
+*        Set fill-in elements in column J+KV to zero.
+*
+         IF( J+KV.LE.N ) THEN
+            DO 30 I = 1, KL
+               AB( I, J+KV ) = ZERO
+   30       CONTINUE
+         END IF
+*
+*        Find pivot and test for singularity. KM is the number of
+*        subdiagonal elements in the current column.
+*
+         KM = MIN( KL, M-J )
+         JP = ICAMAX( KM+1, AB( KV+1, J ), 1 )
+         IPIV( J ) = JP + J - 1
+         IF( AB( KV+JP, J ).NE.ZERO ) THEN
+            JU = MAX( JU, MIN( J+KU+JP-1, N ) )
+*
+*           Apply interchange to columns J to JU.
+*
+            IF( JP.NE.1 )
+     $         CALL CSWAP( JU-J+1, AB( KV+JP, J ), LDAB-1,
+     $                     AB( KV+1, J ), LDAB-1 )
+            IF( KM.GT.0 ) THEN
+*
+*              Compute multipliers.
+*
+               CALL CSCAL( KM, ONE / AB( KV+1, J ), AB( KV+2, J ), 1 )
+*
+*              Update trailing submatrix within the band.
+*
+               IF( JU.GT.J )
+     $            CALL CGERU( KM, JU-J, -ONE, AB( KV+2, J ), 1,
+     $                        AB( KV, J+1 ), LDAB-1, AB( KV+1, J+1 ),
+     $                        LDAB-1 )
+            END IF
+         ELSE
+*
+*           If pivot is zero, set INFO to the index of the pivot
+*           unless a zero pivot has already been found.
+*
+            IF( INFO.EQ.0 )
+     $         INFO = J
+         END IF
+   40 CONTINUE
+      RETURN
+*
+*     End of CGBTF2
+*
+      END
diff --git a/costa/native/external/lapack/cgbtrf.f b/costa/native/external/lapack/cgbtrf.f
new file mode 100644
index 000000000..bf61bb148
--- /dev/null
+++ b/costa/native/external/lapack/cgbtrf.f
@@ -0,0 +1,443 @@
+      SUBROUTINE CGBTRF( M, N, KL, KU, AB, LDAB, IPIV, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, KL, KU, LDAB, M, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      COMPLEX            AB( LDAB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CGBTRF computes an LU factorization of a complex m-by-n band matrix A
+*  using partial pivoting with row interchanges.
+*
+*  This is the blocked version of the algorithm, calling Level 3 BLAS.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  KL      (input) INTEGER
+*          The number of subdiagonals within the band of A.  KL >= 0.
+*
+*  KU      (input) INTEGER
+*          The number of superdiagonals within the band of A.  KU >= 0.
+*
+*  AB      (input/output) COMPLEX array, dimension (LDAB,N)
+*          On entry, the matrix A in band storage, in rows KL+1 to
+*          2*KL+KU+1; rows 1 to KL of the array need not be set.
+*          The j-th column of A is stored in the j-th column of the
+*          array AB as follows:
+*          AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl)
+*
+*          On exit, details of the factorization: U is stored as an
+*          upper triangular band matrix with KL+KU superdiagonals in
+*          rows 1 to KL+KU+1, and the multipliers used during the
+*          factorization are stored in rows KL+KU+2 to 2*KL+KU+1.
+*          See below for further details.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= 2*KL+KU+1.
+*
+*  IPIV    (output) INTEGER array, dimension (min(M,N))
+*          The pivot indices; for 1 <= i <= min(M,N), row i of the
+*          matrix was interchanged with row IPIV(i).
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*          > 0: if INFO = +i, U(i,i) is exactly zero. The factorization
+*               has been completed, but the factor U is exactly
+*               singular, and division by zero will occur if it is used
+*               to solve a system of equations.
+*
+*  Further Details
+*  ===============
+*
+*  The band storage scheme is illustrated by the following example, when
+*  M = N = 6, KL = 2, KU = 1:
+*
+*  On entry:                       On exit:
+*
+*      *    *    *    +    +    +       *    *    *   u14  u25  u36
+*      *    *    +    +    +    +       *    *   u13  u24  u35  u46
+*      *   a12  a23  a34  a45  a56      *   u12  u23  u34  u45  u56
+*     a11  a22  a33  a44  a55  a66     u11  u22  u33  u44  u55  u66
+*     a21  a32  a43  a54  a65   *      m21  m32  m43  m54  m65   *
+*     a31  a42  a53  a64   *    *      m31  m42  m53  m64   *    *
+*
+*  Array elements marked * are not used by the routine; elements marked
+*  + need not be set on entry, but are required by the routine to store
+*  elements of U because of fill-in resulting from the row interchanges.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX            ONE, ZERO
+      PARAMETER          ( ONE = ( 1.0E+0, 0.0E+0 ),
+     $                   ZERO = ( 0.0E+0, 0.0E+0 ) )
+      INTEGER            NBMAX, LDWORK
+      PARAMETER          ( NBMAX = 64, LDWORK = NBMAX+1 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, I2, I3, II, IP, J, J2, J3, JB, JJ, JM, JP,
+     $                   JU, K2, KM, KV, NB, NW
+      COMPLEX            TEMP
+*     ..
+*     .. Local Arrays ..
+      COMPLEX            WORK13( LDWORK, NBMAX ),
+     $                   WORK31( LDWORK, NBMAX )
+*     ..
+*     .. External Functions ..
+      INTEGER            ICAMAX, ILAENV
+      EXTERNAL           ICAMAX, ILAENV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CCOPY, CGBTF2, CGEMM, CGERU, CLASWP, CSCAL,
+     $                   CSWAP, CTRSM, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     KV is the number of superdiagonals in the factor U, allowing for
+*     fill-in
+*
+      KV = KU + KL
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( KL.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( KU.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDAB.LT.KL+KV+1 ) THEN
+         INFO = -6
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CGBTRF', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 )
+     $   RETURN
+*
+*     Determine the block size for this environment
+*
+      NB = ILAENV( 1, 'CGBTRF', ' ', M, N, KL, KU )
+*
+*     The block size must not exceed the limit set by the size of the
+*     local arrays WORK13 and WORK31.
+*
+      NB = MIN( NB, NBMAX )
+*
+      IF( NB.LE.1 .OR. NB.GT.KL ) THEN
+*
+*        Use unblocked code
+*
+         CALL CGBTF2( M, N, KL, KU, AB, LDAB, IPIV, INFO )
+      ELSE
+*
+*        Use blocked code
+*
+*        Zero the superdiagonal elements of the work array WORK13
+*
+         DO 20 J = 1, NB
+            DO 10 I = 1, J - 1
+               WORK13( I, J ) = ZERO
+   10       CONTINUE
+   20    CONTINUE
+*
+*        Zero the subdiagonal elements of the work array WORK31
+*
+         DO 40 J = 1, NB
+            DO 30 I = J + 1, NB
+               WORK31( I, J ) = ZERO
+   30       CONTINUE
+   40    CONTINUE
+*
+*        Gaussian elimination with partial pivoting
+*
+*        Set fill-in elements in columns KU+2 to KV to zero
+*
+         DO 60 J = KU + 2, MIN( KV, N )
+            DO 50 I = KV - J + 2, KL
+               AB( I, J ) = ZERO
+   50       CONTINUE
+   60    CONTINUE
+*
+*        JU is the index of the last column affected by the current
+*        stage of the factorization
+*
+         JU = 1
+*
+         DO 180 J = 1, MIN( M, N ), NB
+            JB = MIN( NB, MIN( M, N )-J+1 )
+*
+*           The active part of the matrix is partitioned
+*
+*              A11   A12   A13
+*              A21   A22   A23
+*              A31   A32   A33
+*
+*           Here A11, A21 and A31 denote the current block of JB columns
+*           which is about to be factorized. The number of rows in the
+*           partitioning are JB, I2, I3 respectively, and the numbers
+*           of columns are JB, J2, J3. The superdiagonal elements of A13
+*           and the subdiagonal elements of A31 lie outside the band.
+*
+            I2 = MIN( KL-JB, M-J-JB+1 )
+            I3 = MIN( JB, M-J-KL+1 )
+*
+*           J2 and J3 are computed after JU has been updated.
+*
+*           Factorize the current block of JB columns
+*
+            DO 80 JJ = J, J + JB - 1
+*
+*              Set fill-in elements in column JJ+KV to zero
+*
+               IF( JJ+KV.LE.N ) THEN
+                  DO 70 I = 1, KL
+                     AB( I, JJ+KV ) = ZERO
+   70             CONTINUE
+               END IF
+*
+*              Find pivot and test for singularity. KM is the number of
+*              subdiagonal elements in the current column.
+*
+               KM = MIN( KL, M-JJ )
+               JP = ICAMAX( KM+1, AB( KV+1, JJ ), 1 )
+               IPIV( JJ ) = JP + JJ - J
+               IF( AB( KV+JP, JJ ).NE.ZERO ) THEN
+                  JU = MAX( JU, MIN( JJ+KU+JP-1, N ) )
+                  IF( JP.NE.1 ) THEN
+*
+*                    Apply interchange to columns J to J+JB-1
+*
+                     IF( JP+JJ-1.LT.J+KL ) THEN
+*
+                        CALL CSWAP( JB, AB( KV+1+JJ-J, J ), LDAB-1,
+     $                              AB( KV+JP+JJ-J, J ), LDAB-1 )
+                     ELSE
+*
+*                       The interchange affects columns J to JJ-1 of A31
+*                       which are stored in the work array WORK31
+*
+                        CALL CSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,
+     $                              WORK31( JP+JJ-J-KL, 1 ), LDWORK )
+                        CALL CSWAP( J+JB-JJ, AB( KV+1, JJ ), LDAB-1,
+     $                              AB( KV+JP, JJ ), LDAB-1 )
+                     END IF
+                  END IF
+*
+*                 Compute multipliers
+*
+                  CALL CSCAL( KM, ONE / AB( KV+1, JJ ), AB( KV+2, JJ ),
+     $                        1 )
+*
+*                 Update trailing submatrix within the band and within
+*                 the current block. JM is the index of the last column
+*                 which needs to be updated.
+*
+                  JM = MIN( JU, J+JB-1 )
+                  IF( JM.GT.JJ )
+     $               CALL CGERU( KM, JM-JJ, -ONE, AB( KV+2, JJ ), 1,
+     $                           AB( KV, JJ+1 ), LDAB-1,
+     $                           AB( KV+1, JJ+1 ), LDAB-1 )
+               ELSE
+*
+*                 If pivot is zero, set INFO to the index of the pivot
+*                 unless a zero pivot has already been found.
+*
+                  IF( INFO.EQ.0 )
+     $               INFO = JJ
+               END IF
+*
+*              Copy current column of A31 into the work array WORK31
+*
+               NW = MIN( JJ-J+1, I3 )
+               IF( NW.GT.0 )
+     $            CALL CCOPY( NW, AB( KV+KL+1-JJ+J, JJ ), 1,
+     $                        WORK31( 1, JJ-J+1 ), 1 )
+   80       CONTINUE
+            IF( J+JB.LE.N ) THEN
+*
+*              Apply the row interchanges to the other blocks.
+*
+               J2 = MIN( JU-J+1, KV ) - JB
+               J3 = MAX( 0, JU-J-KV+1 )
+*
+*              Use CLASWP to apply the row interchanges to A12, A22, and
+*              A32.
+*
+               CALL CLASWP( J2, AB( KV+1-JB, J+JB ), LDAB-1, 1, JB,
+     $                      IPIV( J ), 1 )
+*
+*              Adjust the pivot indices.
+*
+               DO 90 I = J, J + JB - 1
+                  IPIV( I ) = IPIV( I ) + J - 1
+   90          CONTINUE
+*
+*              Apply the row interchanges to A13, A23, and A33
+*              columnwise.
+*
+               K2 = J - 1 + JB + J2
+               DO 110 I = 1, J3
+                  JJ = K2 + I
+                  DO 100 II = J + I - 1, J + JB - 1
+                     IP = IPIV( II )
+                     IF( IP.NE.II ) THEN
+                        TEMP = AB( KV+1+II-JJ, JJ )
+                        AB( KV+1+II-JJ, JJ ) = AB( KV+1+IP-JJ, JJ )
+                        AB( KV+1+IP-JJ, JJ ) = TEMP
+                     END IF
+  100             CONTINUE
+  110          CONTINUE
+*
+*              Update the relevant part of the trailing submatrix
+*
+               IF( J2.GT.0 ) THEN
+*
+*                 Update A12
+*
+                  CALL CTRSM( 'Left', 'Lower', 'No transpose', 'Unit',
+     $                        JB, J2, ONE, AB( KV+1, J ), LDAB-1,
+     $                        AB( KV+1-JB, J+JB ), LDAB-1 )
+*
+                  IF( I2.GT.0 ) THEN
+*
+*                    Update A22
+*
+                     CALL CGEMM( 'No transpose', 'No transpose', I2, J2,
+     $                           JB, -ONE, AB( KV+1+JB, J ), LDAB-1,
+     $                           AB( KV+1-JB, J+JB ), LDAB-1, ONE,
+     $                           AB( KV+1, J+JB ), LDAB-1 )
+                  END IF
+*
+                  IF( I3.GT.0 ) THEN
+*
+*                    Update A32
+*
+                     CALL CGEMM( 'No transpose', 'No transpose', I3, J2,
+     $                           JB, -ONE, WORK31, LDWORK,
+     $                           AB( KV+1-JB, J+JB ), LDAB-1, ONE,
+     $                           AB( KV+KL+1-JB, J+JB ), LDAB-1 )
+                  END IF
+               END IF
+*
+               IF( J3.GT.0 ) THEN
+*
+*                 Copy the lower triangle of A13 into the work array
+*                 WORK13
+*
+                  DO 130 JJ = 1, J3
+                     DO 120 II = JJ, JB
+                        WORK13( II, JJ ) = AB( II-JJ+1, JJ+J+KV-1 )
+  120                CONTINUE
+  130             CONTINUE
+*
+*                 Update A13 in the work array
+*
+                  CALL CTRSM( 'Left', 'Lower', 'No transpose', 'Unit',
+     $                        JB, J3, ONE, AB( KV+1, J ), LDAB-1,
+     $                        WORK13, LDWORK )
+*
+                  IF( I2.GT.0 ) THEN
+*
+*                    Update A23
+*
+                     CALL CGEMM( 'No transpose', 'No transpose', I2, J3,
+     $                           JB, -ONE, AB( KV+1+JB, J ), LDAB-1,
+     $                           WORK13, LDWORK, ONE, AB( 1+JB, J+KV ),
+     $                           LDAB-1 )
+                  END IF
+*
+                  IF( I3.GT.0 ) THEN
+*
+*                    Update A33
+*
+                     CALL CGEMM( 'No transpose', 'No transpose', I3, J3,
+     $                           JB, -ONE, WORK31, LDWORK, WORK13,
+     $                           LDWORK, ONE, AB( 1+KL, J+KV ), LDAB-1 )
+                  END IF
+*
+*                 Copy the lower triangle of A13 back into place
+*
+                  DO 150 JJ = 1, J3
+                     DO 140 II = JJ, JB
+                        AB( II-JJ+1, JJ+J+KV-1 ) = WORK13( II, JJ )
+  140                CONTINUE
+  150             CONTINUE
+               END IF
+            ELSE
+*
+*              Adjust the pivot indices.
+*
+               DO 160 I = J, J + JB - 1
+                  IPIV( I ) = IPIV( I ) + J - 1
+  160          CONTINUE
+            END IF
+*
+*           Partially undo the interchanges in the current block to
+*           restore the upper triangular form of A31 and copy the upper
+*           triangle of A31 back into place
+*
+            DO 170 JJ = J + JB - 1, J, -1
+               JP = IPIV( JJ ) - JJ + 1
+               IF( JP.NE.1 ) THEN
+*
+*                 Apply interchange to columns J to JJ-1
+*
+                  IF( JP+JJ-1.LT.J+KL ) THEN
+*
+*                    The interchange does not affect A31
+*
+                     CALL CSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,
+     $                           AB( KV+JP+JJ-J, J ), LDAB-1 )
+                  ELSE
+*
+*                    The interchange does affect A31
+*
+                     CALL CSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,
+     $                           WORK31( JP+JJ-J-KL, 1 ), LDWORK )
+                  END IF
+               END IF
+*
+*              Copy the current column of A31 back into place
+*
+               NW = MIN( I3, JJ-J+1 )
+               IF( NW.GT.0 )
+     $            CALL CCOPY( NW, WORK31( 1, JJ-J+1 ), 1,
+     $                        AB( KV+KL+1-JJ+J, JJ ), 1 )
+  170       CONTINUE
+  180    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of CGBTRF
+*
+      END
diff --git a/costa/native/external/lapack/cgbtrs.f b/costa/native/external/lapack/cgbtrs.f
new file mode 100644
index 000000000..b4b55b86b
--- /dev/null
+++ b/costa/native/external/lapack/cgbtrs.f
@@ -0,0 +1,215 @@
+      SUBROUTINE CGBTRS( TRANS, N, KL, KU, NRHS, AB, LDAB, IPIV, B, LDB,
+     $                   INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          TRANS
+      INTEGER            INFO, KL, KU, LDAB, LDB, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      COMPLEX            AB( LDAB, * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CGBTRS solves a system of linear equations
+*     A * X = B,  A**T * X = B,  or  A**H * X = B
+*  with a general band matrix A using the LU factorization computed
+*  by CGBTRF.
+*
+*  Arguments
+*  =========
+*
+*  TRANS   (input) CHARACTER*1
+*          Specifies the form of the system of equations.
+*          = 'N':  A * X = B     (No transpose)
+*          = 'T':  A**T * X = B  (Transpose)
+*          = 'C':  A**H * X = B  (Conjugate transpose)
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  KL      (input) INTEGER
+*          The number of subdiagonals within the band of A.  KL >= 0.
+*
+*  KU      (input) INTEGER
+*          The number of superdiagonals within the band of A.  KU >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  AB      (input) COMPLEX array, dimension (LDAB,N)
+*          Details of the LU factorization of the band matrix A, as
+*          computed by CGBTRF.  U is stored as an upper triangular band
+*          matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, and
+*          the multipliers used during the factorization are stored in
+*          rows KL+KU+2 to 2*KL+KU+1.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= 2*KL+KU+1.
+*
+*  IPIV    (input) INTEGER array, dimension (N)
+*          The pivot indices; for 1 <= i <= N, row i of the matrix was
+*          interchanged with row IPIV(i).
+*
+*  B       (input/output) COMPLEX array, dimension (LDB,NRHS)
+*          On entry, the right hand side matrix B.
+*          On exit, the solution matrix X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX            ONE
+      PARAMETER          ( ONE = ( 1.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LNOTI, NOTRAN
+      INTEGER            I, J, KD, L, LM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CGEMV, CGERU, CLACGV, CSWAP, CTBSV, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      NOTRAN = LSAME( TRANS, 'N' )
+      IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT.
+     $    LSAME( TRANS, 'C' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( KL.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( KU.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( LDAB.LT.( 2*KL+KU+1 ) ) THEN
+         INFO = -7
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -10
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CGBTRS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 )
+     $   RETURN
+*
+      KD = KU + KL + 1
+      LNOTI = KL.GT.0
+*
+      IF( NOTRAN ) THEN
+*
+*        Solve  A*X = B.
+*
+*        Solve L*X = B, overwriting B with X.
+*
+*        L is represented as a product of permutations and unit lower
+*        triangular matrices L = P(1) * L(1) * ... * P(n-1) * L(n-1),
+*        where each transformation L(i) is a rank-one modification of
+*        the identity matrix.
+*
+         IF( LNOTI ) THEN
+            DO 10 J = 1, N - 1
+               LM = MIN( KL, N-J )
+               L = IPIV( J )
+               IF( L.NE.J )
+     $            CALL CSWAP( NRHS, B( L, 1 ), LDB, B( J, 1 ), LDB )
+               CALL CGERU( LM, NRHS, -ONE, AB( KD+1, J ), 1, B( J, 1 ),
+     $                     LDB, B( J+1, 1 ), LDB )
+   10       CONTINUE
+         END IF
+*
+         DO 20 I = 1, NRHS
+*
+*           Solve U*X = B, overwriting B with X.
+*
+            CALL CTBSV( 'Upper', 'No transpose', 'Non-unit', N, KL+KU,
+     $                  AB, LDAB, B( 1, I ), 1 )
+   20    CONTINUE
+*
+      ELSE IF( LSAME( TRANS, 'T' ) ) THEN
+*
+*        Solve A**T * X = B.
+*
+         DO 30 I = 1, NRHS
+*
+*           Solve U**T * X = B, overwriting B with X.
+*
+            CALL CTBSV( 'Upper', 'Transpose', 'Non-unit', N, KL+KU, AB,
+     $                  LDAB, B( 1, I ), 1 )
+   30    CONTINUE
+*
+*        Solve L**T * X = B, overwriting B with X.
+*
+         IF( LNOTI ) THEN
+            DO 40 J = N - 1, 1, -1
+               LM = MIN( KL, N-J )
+               CALL CGEMV( 'Transpose', LM, NRHS, -ONE, B( J+1, 1 ),
+     $                     LDB, AB( KD+1, J ), 1, ONE, B( J, 1 ), LDB )
+               L = IPIV( J )
+               IF( L.NE.J )
+     $            CALL CSWAP( NRHS, B( L, 1 ), LDB, B( J, 1 ), LDB )
+   40       CONTINUE
+         END IF
+*
+      ELSE
+*
+*        Solve A**H * X = B.
+*
+         DO 50 I = 1, NRHS
+*
+*           Solve U**H * X = B, overwriting B with X.
+*
+            CALL CTBSV( 'Upper', 'Conjugate transpose', 'Non-unit', N,
+     $                  KL+KU, AB, LDAB, B( 1, I ), 1 )
+   50    CONTINUE
+*
+*        Solve L**H * X = B, overwriting B with X.
+*
+         IF( LNOTI ) THEN
+            DO 60 J = N - 1, 1, -1
+               LM = MIN( KL, N-J )
+               CALL CLACGV( NRHS, B( J, 1 ), LDB )
+               CALL CGEMV( 'Conjugate transpose', LM, NRHS, -ONE,
+     $                     B( J+1, 1 ), LDB, AB( KD+1, J ), 1, ONE,
+     $                     B( J, 1 ), LDB )
+               CALL CLACGV( NRHS, B( J, 1 ), LDB )
+               L = IPIV( J )
+               IF( L.NE.J )
+     $            CALL CSWAP( NRHS, B( L, 1 ), LDB, B( J, 1 ), LDB )
+   60       CONTINUE
+         END IF
+      END IF
+      RETURN
+*
+*     End of CGBTRS
+*
+      END
diff --git a/costa/native/external/lapack/cgebak.f b/costa/native/external/lapack/cgebak.f
new file mode 100644
index 000000000..4c81e0534
--- /dev/null
+++ b/costa/native/external/lapack/cgebak.f
@@ -0,0 +1,190 @@
+      SUBROUTINE CGEBAK( JOB, SIDE, N, ILO, IHI, SCALE, M, V, LDV,
+     $                   INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOB, SIDE
+      INTEGER            IHI, ILO, INFO, LDV, M, N
+*     ..
+*     .. Array Arguments ..
+      REAL               SCALE( * )
+      COMPLEX            V( LDV, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CGEBAK forms the right or left eigenvectors of a complex general
+*  matrix by backward transformation on the computed eigenvectors of the
+*  balanced matrix output by CGEBAL.
+*
+*  Arguments
+*  =========
+*
+*  JOB     (input) CHARACTER*1
+*          Specifies the type of backward transformation required:
+*          = 'N', do nothing, return immediately;
+*          = 'P', do backward transformation for permutation only;
+*          = 'S', do backward transformation for scaling only;
+*          = 'B', do backward transformations for both permutation and
+*                 scaling.
+*          JOB must be the same as the argument JOB supplied to CGEBAL.
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'R':  V contains right eigenvectors;
+*          = 'L':  V contains left eigenvectors.
+*
+*  N       (input) INTEGER
+*          The number of rows of the matrix V.  N >= 0.
+*
+*  ILO     (input) INTEGER
+*  IHI     (input) INTEGER
+*          The integers ILO and IHI determined by CGEBAL.
+*          1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
+*
+*  SCALE   (input) REAL array, dimension (N)
+*          Details of the permutation and scaling factors, as returned
+*          by CGEBAL.
+*
+*  M       (input) INTEGER
+*          The number of columns of the matrix V.  M >= 0.
+*
+*  V       (input/output) COMPLEX array, dimension (LDV,M)
+*          On entry, the matrix of right or left eigenvectors to be
+*          transformed, as returned by CHSEIN or CTREVC.
+*          On exit, V is overwritten by the transformed eigenvectors.
+*
+*  LDV     (input) INTEGER
+*          The leading dimension of the array V. LDV >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE
+      PARAMETER          ( ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LEFTV, RIGHTV
+      INTEGER            I, II, K
+      REAL               S
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CSSCAL, CSWAP, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode and Test the input parameters
+*
+      RIGHTV = LSAME( SIDE, 'R' )
+      LEFTV = LSAME( SIDE, 'L' )
+*
+      INFO = 0
+      IF( .NOT.LSAME( JOB, 'N' ) .AND. .NOT.LSAME( JOB, 'P' ) .AND.
+     $    .NOT.LSAME( JOB, 'S' ) .AND. .NOT.LSAME( JOB, 'B' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.RIGHTV .AND. .NOT.LEFTV ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, N ) ) THEN
+         INFO = -4
+      ELSE IF( IHI.LT.MIN( ILO, N ) .OR. IHI.GT.N ) THEN
+         INFO = -5
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -7
+      ELSE IF( LDV.LT.MAX( 1, N ) ) THEN
+         INFO = -9
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CGEBAK', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+      IF( M.EQ.0 )
+     $   RETURN
+      IF( LSAME( JOB, 'N' ) )
+     $   RETURN
+*
+      IF( ILO.EQ.IHI )
+     $   GO TO 30
+*
+*     Backward balance
+*
+      IF( LSAME( JOB, 'S' ) .OR. LSAME( JOB, 'B' ) ) THEN
+*
+         IF( RIGHTV ) THEN
+            DO 10 I = ILO, IHI
+               S = SCALE( I )
+               CALL CSSCAL( M, S, V( I, 1 ), LDV )
+   10       CONTINUE
+         END IF
+*
+         IF( LEFTV ) THEN
+            DO 20 I = ILO, IHI
+               S = ONE / SCALE( I )
+               CALL CSSCAL( M, S, V( I, 1 ), LDV )
+   20       CONTINUE
+         END IF
+*
+      END IF
+*
+*     Backward permutation
+*
+*     For  I = ILO-1 step -1 until 1,
+*              IHI+1 step 1 until N do --
+*
+   30 CONTINUE
+      IF( LSAME( JOB, 'P' ) .OR. LSAME( JOB, 'B' ) ) THEN
+         IF( RIGHTV ) THEN
+            DO 40 II = 1, N
+               I = II
+               IF( I.GE.ILO .AND. I.LE.IHI )
+     $            GO TO 40
+               IF( I.LT.ILO )
+     $            I = ILO - II
+               K = SCALE( I )
+               IF( K.EQ.I )
+     $            GO TO 40
+               CALL CSWAP( M, V( I, 1 ), LDV, V( K, 1 ), LDV )
+   40       CONTINUE
+         END IF
+*
+         IF( LEFTV ) THEN
+            DO 50 II = 1, N
+               I = II
+               IF( I.GE.ILO .AND. I.LE.IHI )
+     $            GO TO 50
+               IF( I.LT.ILO )
+     $            I = ILO - II
+               K = SCALE( I )
+               IF( K.EQ.I )
+     $            GO TO 50
+               CALL CSWAP( M, V( I, 1 ), LDV, V( K, 1 ), LDV )
+   50       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of CGEBAK
+*
+      END
diff --git a/costa/native/external/lapack/cgebal.f b/costa/native/external/lapack/cgebal.f
new file mode 100644
index 000000000..b7e7663bf
--- /dev/null
+++ b/costa/native/external/lapack/cgebal.f
@@ -0,0 +1,331 @@
+      SUBROUTINE CGEBAL( JOB, N, A, LDA, ILO, IHI, SCALE, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOB
+      INTEGER            IHI, ILO, INFO, LDA, N
+*     ..
+*     .. Array Arguments ..
+      REAL               SCALE( * )
+      COMPLEX            A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CGEBAL balances a general complex matrix A.  This involves, first,
+*  permuting A by a similarity transformation to isolate eigenvalues
+*  in the first 1 to ILO-1 and last IHI+1 to N elements on the
+*  diagonal; and second, applying a diagonal similarity transformation
+*  to rows and columns ILO to IHI to make the rows and columns as
+*  close in norm as possible.  Both steps are optional.
+*
+*  Balancing may reduce the 1-norm of the matrix, and improve the
+*  accuracy of the computed eigenvalues and/or eigenvectors.
+*
+*  Arguments
+*  =========
+*
+*  JOB     (input) CHARACTER*1
+*          Specifies the operations to be performed on A:
+*          = 'N':  none:  simply set ILO = 1, IHI = N, SCALE(I) = 1.0
+*                  for i = 1,...,N;
+*          = 'P':  permute only;
+*          = 'S':  scale only;
+*          = 'B':  both permute and scale.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,N)
+*          On entry, the input matrix A.
+*          On exit,  A is overwritten by the balanced matrix.
+*          If JOB = 'N', A is not referenced.
+*          See Further Details.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  ILO     (output) INTEGER
+*  IHI     (output) INTEGER
+*          ILO and IHI are set to integers such that on exit
+*          A(i,j) = 0 if i > j and j = 1,...,ILO-1 or I = IHI+1,...,N.
+*          If JOB = 'N' or 'S', ILO = 1 and IHI = N.
+*
+*  SCALE   (output) REAL array, dimension (N)
+*          Details of the permutations and scaling factors applied to
+*          A.  If P(j) is the index of the row and column interchanged
+*          with row and column j and D(j) is the scaling factor
+*          applied to row and column j, then
+*          SCALE(j) = P(j)    for j = 1,...,ILO-1
+*                   = D(j)    for j = ILO,...,IHI
+*                   = P(j)    for j = IHI+1,...,N.
+*          The order in which the interchanges are made is N to IHI+1,
+*          then 1 to ILO-1.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*
+*  Further Details
+*  ===============
+*
+*  The permutations consist of row and column interchanges which put
+*  the matrix in the form
+*
+*             ( T1   X   Y  )
+*     P A P = (  0   B   Z  )
+*             (  0   0   T2 )
+*
+*  where T1 and T2 are upper triangular matrices whose eigenvalues lie
+*  along the diagonal.  The column indices ILO and IHI mark the starting
+*  and ending columns of the submatrix B. Balancing consists of applying
+*  a diagonal similarity transformation inv(D) * B * D to make the
+*  1-norms of each row of B and its corresponding column nearly equal.
+*  The output matrix is
+*
+*     ( T1     X*D          Y    )
+*     (  0  inv(D)*B*D  inv(D)*Z ).
+*     (  0      0           T2   )
+*
+*  Information about the permutations P and the diagonal matrix D is
+*  returned in the vector SCALE.
+*
+*  This subroutine is based on the EISPACK routine CBAL.
+*
+*  Modified by Tzu-Yi Chen, Computer Science Division, University of
+*    California at Berkeley, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+      REAL               SCLFAC
+      PARAMETER          ( SCLFAC = 0.8E+1 )
+      REAL               FACTOR
+      PARAMETER          ( FACTOR = 0.95E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOCONV
+      INTEGER            I, ICA, IEXC, IRA, J, K, L, M
+      REAL               C, CA, F, G, R, RA, S, SFMAX1, SFMAX2, SFMIN1,
+     $                   SFMIN2
+      COMPLEX            CDUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ICAMAX
+      REAL               SLAMCH
+      EXTERNAL           LSAME, ICAMAX, SLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CSSCAL, CSWAP, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, AIMAG, MAX, MIN, REAL
+*     ..
+*     .. Statement Functions ..
+      REAL               CABS1
+*     ..
+*     .. Statement Function definitions ..
+      CABS1( CDUM ) = ABS( REAL( CDUM ) ) + ABS( AIMAG( CDUM ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters
+*
+      INFO = 0
+      IF( .NOT.LSAME( JOB, 'N' ) .AND. .NOT.LSAME( JOB, 'P' ) .AND.
+     $    .NOT.LSAME( JOB, 'S' ) .AND. .NOT.LSAME( JOB, 'B' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CGEBAL', -INFO )
+         RETURN
+      END IF
+*
+      K = 1
+      L = N
+*
+      IF( N.EQ.0 )
+     $   GO TO 210
+*
+      IF( LSAME( JOB, 'N' ) ) THEN
+         DO 10 I = 1, N
+            SCALE( I ) = ONE
+   10    CONTINUE
+         GO TO 210
+      END IF
+*
+      IF( LSAME( JOB, 'S' ) )
+     $   GO TO 120
+*
+*     Permutation to isolate eigenvalues if possible
+*
+      GO TO 50
+*
+*     Row and column exchange.
+*
+   20 CONTINUE
+      SCALE( M ) = J
+      IF( J.EQ.M )
+     $   GO TO 30
+*
+      CALL CSWAP( L, A( 1, J ), 1, A( 1, M ), 1 )
+      CALL CSWAP( N-K+1, A( J, K ), LDA, A( M, K ), LDA )
+*
+   30 CONTINUE
+      GO TO ( 40, 80 )IEXC
+*
+*     Search for rows isolating an eigenvalue and push them down.
+*
+   40 CONTINUE
+      IF( L.EQ.1 )
+     $   GO TO 210
+      L = L - 1
+*
+   50 CONTINUE
+      DO 70 J = L, 1, -1
+*
+         DO 60 I = 1, L
+            IF( I.EQ.J )
+     $         GO TO 60
+            IF( REAL( A( J, I ) ).NE.ZERO .OR. AIMAG( A( J, I ) ).NE.
+     $          ZERO )GO TO 70
+   60    CONTINUE
+*
+         M = L
+         IEXC = 1
+         GO TO 20
+   70 CONTINUE
+*
+      GO TO 90
+*
+*     Search for columns isolating an eigenvalue and push them left.
+*
+   80 CONTINUE
+      K = K + 1
+*
+   90 CONTINUE
+      DO 110 J = K, L
+*
+         DO 100 I = K, L
+            IF( I.EQ.J )
+     $         GO TO 100
+            IF( REAL( A( I, J ) ).NE.ZERO .OR. AIMAG( A( I, J ) ).NE.
+     $          ZERO )GO TO 110
+  100    CONTINUE
+*
+         M = K
+         IEXC = 2
+         GO TO 20
+  110 CONTINUE
+*
+  120 CONTINUE
+      DO 130 I = K, L
+         SCALE( I ) = ONE
+  130 CONTINUE
+*
+      IF( LSAME( JOB, 'P' ) )
+     $   GO TO 210
+*
+*     Balance the submatrix in rows K to L.
+*
+*     Iterative loop for norm reduction
+*
+      SFMIN1 = SLAMCH( 'S' ) / SLAMCH( 'P' )
+      SFMAX1 = ONE / SFMIN1
+      SFMIN2 = SFMIN1*SCLFAC
+      SFMAX2 = ONE / SFMIN2
+  140 CONTINUE
+      NOCONV = .FALSE.
+*
+      DO 200 I = K, L
+         C = ZERO
+         R = ZERO
+*
+         DO 150 J = K, L
+            IF( J.EQ.I )
+     $         GO TO 150
+            C = C + CABS1( A( J, I ) )
+            R = R + CABS1( A( I, J ) )
+  150    CONTINUE
+         ICA = ICAMAX( L, A( 1, I ), 1 )
+         CA = ABS( A( ICA, I ) )
+         IRA = ICAMAX( N-K+1, A( I, K ), LDA )
+         RA = ABS( A( I, IRA+K-1 ) )
+*
+*        Guard against zero C or R due to underflow.
+*
+         IF( C.EQ.ZERO .OR. R.EQ.ZERO )
+     $      GO TO 200
+         G = R / SCLFAC
+         F = ONE
+         S = C + R
+  160    CONTINUE
+         IF( C.GE.G .OR. MAX( F, C, CA ).GE.SFMAX2 .OR.
+     $       MIN( R, G, RA ).LE.SFMIN2 )GO TO 170
+         F = F*SCLFAC
+         C = C*SCLFAC
+         CA = CA*SCLFAC
+         R = R / SCLFAC
+         G = G / SCLFAC
+         RA = RA / SCLFAC
+         GO TO 160
+*
+  170    CONTINUE
+         G = C / SCLFAC
+  180    CONTINUE
+         IF( G.LT.R .OR. MAX( R, RA ).GE.SFMAX2 .OR.
+     $       MIN( F, C, G, CA ).LE.SFMIN2 )GO TO 190
+         F = F / SCLFAC
+         C = C / SCLFAC
+         G = G / SCLFAC
+         CA = CA / SCLFAC
+         R = R*SCLFAC
+         RA = RA*SCLFAC
+         GO TO 180
+*
+*        Now balance.
+*
+  190    CONTINUE
+         IF( ( C+R ).GE.FACTOR*S )
+     $      GO TO 200
+         IF( F.LT.ONE .AND. SCALE( I ).LT.ONE ) THEN
+            IF( F*SCALE( I ).LE.SFMIN1 )
+     $         GO TO 200
+         END IF
+         IF( F.GT.ONE .AND. SCALE( I ).GT.ONE ) THEN
+            IF( SCALE( I ).GE.SFMAX1 / F )
+     $         GO TO 200
+         END IF
+         G = ONE / F
+         SCALE( I ) = SCALE( I )*F
+         NOCONV = .TRUE.
+*
+         CALL CSSCAL( N-K+1, G, A( I, K ), LDA )
+         CALL CSSCAL( L, F, A( 1, I ), 1 )
+*
+  200 CONTINUE
+*
+      IF( NOCONV )
+     $   GO TO 140
+*
+  210 CONTINUE
+      ILO = K
+      IHI = L
+*
+      RETURN
+*
+*     End of CGEBAL
+*
+      END
diff --git a/costa/native/external/lapack/cgebd2.f b/costa/native/external/lapack/cgebd2.f
new file mode 100644
index 000000000..fdec366ac
--- /dev/null
+++ b/costa/native/external/lapack/cgebd2.f
@@ -0,0 +1,249 @@
+      SUBROUTINE CGEBD2( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, M, N
+*     ..
+*     .. Array Arguments ..
+      REAL               D( * ), E( * )
+      COMPLEX            A( LDA, * ), TAUP( * ), TAUQ( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CGEBD2 reduces a complex general m by n matrix A to upper or lower
+*  real bidiagonal form B by a unitary transformation: Q' * A * P = B.
+*
+*  If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows in the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns in the matrix A.  N >= 0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,N)
+*          On entry, the m by n general matrix to be reduced.
+*          On exit,
+*          if m >= n, the diagonal and the first superdiagonal are
+*            overwritten with the upper bidiagonal matrix B; the
+*            elements below the diagonal, with the array TAUQ, represent
+*            the unitary matrix Q as a product of elementary
+*            reflectors, and the elements above the first superdiagonal,
+*            with the array TAUP, represent the unitary matrix P as
+*            a product of elementary reflectors;
+*          if m < n, the diagonal and the first subdiagonal are
+*            overwritten with the lower bidiagonal matrix B; the
+*            elements below the first subdiagonal, with the array TAUQ,
+*            represent the unitary matrix Q as a product of
+*            elementary reflectors, and the elements above the diagonal,
+*            with the array TAUP, represent the unitary matrix P as
+*            a product of elementary reflectors.
+*          See Further Details.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  D       (output) REAL array, dimension (min(M,N))
+*          The diagonal elements of the bidiagonal matrix B:
+*          D(i) = A(i,i).
+*
+*  E       (output) REAL array, dimension (min(M,N)-1)
+*          The off-diagonal elements of the bidiagonal matrix B:
+*          if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1;
+*          if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1.
+*
+*  TAUQ    (output) COMPLEX array dimension (min(M,N))
+*          The scalar factors of the elementary reflectors which
+*          represent the unitary matrix Q. See Further Details.
+*
+*  TAUP    (output) COMPLEX array, dimension (min(M,N))
+*          The scalar factors of the elementary reflectors which
+*          represent the unitary matrix P. See Further Details.
+*
+*  WORK    (workspace) COMPLEX array, dimension (max(M,N))
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit 
+*          < 0: if INFO = -i, the i-th argument had an illegal value.
+*
+*  Further Details
+*  ===============
+*
+*  The matrices Q and P are represented as products of elementary
+*  reflectors:
+*
+*  If m >= n,
+*
+*     Q = H(1) H(2) . . . H(n)  and  P = G(1) G(2) . . . G(n-1)
+*
+*  Each H(i) and G(i) has the form:
+*
+*     H(i) = I - tauq * v * v'  and G(i) = I - taup * u * u'
+*
+*  where tauq and taup are complex scalars, and v and u are complex
+*  vectors; v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in
+*  A(i+1:m,i); u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in
+*  A(i,i+2:n); tauq is stored in TAUQ(i) and taup in TAUP(i).
+*
+*  If m < n,
+*
+*     Q = H(1) H(2) . . . H(m-1)  and  P = G(1) G(2) . . . G(m)
+*
+*  Each H(i) and G(i) has the form:
+*
+*     H(i) = I - tauq * v * v'  and G(i) = I - taup * u * u'
+*
+*  where tauq and taup are complex scalars, v and u are complex vectors;
+*  v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i);
+*  u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n);
+*  tauq is stored in TAUQ(i) and taup in TAUP(i).
+*
+*  The contents of A on exit are illustrated by the following examples:
+*
+*  m = 6 and n = 5 (m > n):          m = 5 and n = 6 (m < n):
+*
+*    (  d   e   u1  u1  u1 )           (  d   u1  u1  u1  u1  u1 )
+*    (  v1  d   e   u2  u2 )           (  e   d   u2  u2  u2  u2 )
+*    (  v1  v2  d   e   u3 )           (  v1  e   d   u3  u3  u3 )
+*    (  v1  v2  v3  d   e  )           (  v1  v2  e   d   u4  u4 )
+*    (  v1  v2  v3  v4  d  )           (  v1  v2  v3  e   d   u5 )
+*    (  v1  v2  v3  v4  v5 )
+*
+*  where d and e denote diagonal and off-diagonal elements of B, vi
+*  denotes an element of the vector defining H(i), and ui an element of
+*  the vector defining G(i).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX            ZERO, ONE
+      PARAMETER          ( ZERO = ( 0.0E+0, 0.0E+0 ),
+     $                   ONE = ( 1.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I
+      COMPLEX            ALPHA
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CLACGV, CLARF, CLARFG, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          CONJG, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters
+*
+      INFO = 0
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.LT.0 ) THEN
+         CALL XERBLA( 'CGEBD2', -INFO )
+         RETURN
+      END IF
+*
+      IF( M.GE.N ) THEN
+*
+*        Reduce to upper bidiagonal form
+*
+         DO 10 I = 1, N
+*
+*           Generate elementary reflector H(i) to annihilate A(i+1:m,i)
+*
+            ALPHA = A( I, I )
+            CALL CLARFG( M-I+1, ALPHA, A( MIN( I+1, M ), I ), 1,
+     $                   TAUQ( I ) )
+            D( I ) = ALPHA
+            A( I, I ) = ONE
+*
+*           Apply H(i)' to A(i:m,i+1:n) from the left
+*
+            CALL CLARF( 'Left', M-I+1, N-I, A( I, I ), 1,
+     $                  CONJG( TAUQ( I ) ), A( I, I+1 ), LDA, WORK )
+            A( I, I ) = D( I )
+*
+            IF( I.LT.N ) THEN
+*
+*              Generate elementary reflector G(i) to annihilate
+*              A(i,i+2:n)
+*
+               CALL CLACGV( N-I, A( I, I+1 ), LDA )
+               ALPHA = A( I, I+1 )
+               CALL CLARFG( N-I, ALPHA, A( I, MIN( I+2, N ) ),
+     $                      LDA, TAUP( I ) )
+               E( I ) = ALPHA
+               A( I, I+1 ) = ONE
+*
+*              Apply G(i) to A(i+1:m,i+1:n) from the right
+*
+               CALL CLARF( 'Right', M-I, N-I, A( I, I+1 ), LDA,
+     $                     TAUP( I ), A( I+1, I+1 ), LDA, WORK )
+               CALL CLACGV( N-I, A( I, I+1 ), LDA )
+               A( I, I+1 ) = E( I )
+            ELSE
+               TAUP( I ) = ZERO
+            END IF
+   10    CONTINUE
+      ELSE
+*
+*        Reduce to lower bidiagonal form
+*
+         DO 20 I = 1, M
+*
+*           Generate elementary reflector G(i) to annihilate A(i,i+1:n)
+*
+            CALL CLACGV( N-I+1, A( I, I ), LDA )
+            ALPHA = A( I, I )
+            CALL CLARFG( N-I+1, ALPHA, A( I, MIN( I+1, N ) ), LDA,
+     $                   TAUP( I ) )
+            D( I ) = ALPHA
+            A( I, I ) = ONE
+*
+*           Apply G(i) to A(i+1:m,i:n) from the right
+*
+            CALL CLARF( 'Right', M-I, N-I+1, A( I, I ), LDA, TAUP( I ),
+     $                  A( MIN( I+1, M ), I ), LDA, WORK )
+            CALL CLACGV( N-I+1, A( I, I ), LDA )
+            A( I, I ) = D( I )
+*
+            IF( I.LT.M ) THEN
+*
+*              Generate elementary reflector H(i) to annihilate
+*              A(i+2:m,i)
+*
+               ALPHA = A( I+1, I )
+               CALL CLARFG( M-I, ALPHA, A( MIN( I+2, M ), I ), 1,
+     $                      TAUQ( I ) )
+               E( I ) = ALPHA
+               A( I+1, I ) = ONE
+*
+*              Apply H(i)' to A(i+1:m,i+1:n) from the left
+*
+               CALL CLARF( 'Left', M-I, N-I, A( I+1, I ), 1,
+     $                     CONJG( TAUQ( I ) ), A( I+1, I+1 ), LDA,
+     $                     WORK )
+               A( I+1, I ) = E( I )
+            ELSE
+               TAUQ( I ) = ZERO
+            END IF
+   20    CONTINUE
+      END IF
+      RETURN
+*
+*     End of CGEBD2
+*
+      END
diff --git a/costa/native/external/lapack/cgebrd.f b/costa/native/external/lapack/cgebrd.f
new file mode 100644
index 000000000..058da28c6
--- /dev/null
+++ b/costa/native/external/lapack/cgebrd.f
@@ -0,0 +1,270 @@
+      SUBROUTINE CGEBRD( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, LWORK,
+     $                   INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, LWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      REAL               D( * ), E( * )
+      COMPLEX            A( LDA, * ), TAUP( * ), TAUQ( * ),
+     $                   WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CGEBRD reduces a general complex M-by-N matrix A to upper or lower
+*  bidiagonal form B by a unitary transformation: Q**H * A * P = B.
+*
+*  If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows in the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns in the matrix A.  N >= 0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,N)
+*          On entry, the M-by-N general matrix to be reduced.
+*          On exit,
+*          if m >= n, the diagonal and the first superdiagonal are
+*            overwritten with the upper bidiagonal matrix B; the
+*            elements below the diagonal, with the array TAUQ, represent
+*            the unitary matrix Q as a product of elementary
+*            reflectors, and the elements above the first superdiagonal,
+*            with the array TAUP, represent the unitary matrix P as
+*            a product of elementary reflectors;
+*          if m < n, the diagonal and the first subdiagonal are
+*            overwritten with the lower bidiagonal matrix B; the
+*            elements below the first subdiagonal, with the array TAUQ,
+*            represent the unitary matrix Q as a product of
+*            elementary reflectors, and the elements above the diagonal,
+*            with the array TAUP, represent the unitary matrix P as
+*            a product of elementary reflectors.
+*          See Further Details.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  D       (output) REAL array, dimension (min(M,N))
+*          The diagonal elements of the bidiagonal matrix B:
+*          D(i) = A(i,i).
+*
+*  E       (output) REAL array, dimension (min(M,N)-1)
+*          The off-diagonal elements of the bidiagonal matrix B:
+*          if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1;
+*          if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1.
+*
+*  TAUQ    (output) COMPLEX array dimension (min(M,N))
+*          The scalar factors of the elementary reflectors which
+*          represent the unitary matrix Q. See Further Details.
+*
+*  TAUP    (output) COMPLEX array, dimension (min(M,N))
+*          The scalar factors of the elementary reflectors which
+*          represent the unitary matrix P. See Further Details.
+*
+*  WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The length of the array WORK.  LWORK >= max(1,M,N).
+*          For optimum performance LWORK >= (M+N)*NB, where NB
+*          is the optimal blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*
+*  Further Details
+*  ===============
+*
+*  The matrices Q and P are represented as products of elementary
+*  reflectors:
+*
+*  If m >= n,
+*
+*     Q = H(1) H(2) . . . H(n)  and  P = G(1) G(2) . . . G(n-1)
+*
+*  Each H(i) and G(i) has the form:
+*
+*     H(i) = I - tauq * v * v'  and G(i) = I - taup * u * u'
+*
+*  where tauq and taup are complex scalars, and v and u are complex
+*  vectors; v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in
+*  A(i+1:m,i); u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in
+*  A(i,i+2:n); tauq is stored in TAUQ(i) and taup in TAUP(i).
+*
+*  If m < n,
+*
+*     Q = H(1) H(2) . . . H(m-1)  and  P = G(1) G(2) . . . G(m)
+*
+*  Each H(i) and G(i) has the form:
+*
+*     H(i) = I - tauq * v * v'  and G(i) = I - taup * u * u'
+*
+*  where tauq and taup are complex scalars, and v and u are complex
+*  vectors; v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in
+*  A(i+2:m,i); u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in
+*  A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i).
+*
+*  The contents of A on exit are illustrated by the following examples:
+*
+*  m = 6 and n = 5 (m > n):          m = 5 and n = 6 (m < n):
+*
+*    (  d   e   u1  u1  u1 )           (  d   u1  u1  u1  u1  u1 )
+*    (  v1  d   e   u2  u2 )           (  e   d   u2  u2  u2  u2 )
+*    (  v1  v2  d   e   u3 )           (  v1  e   d   u3  u3  u3 )
+*    (  v1  v2  v3  d   e  )           (  v1  v2  e   d   u4  u4 )
+*    (  v1  v2  v3  v4  d  )           (  v1  v2  v3  e   d   u5 )
+*    (  v1  v2  v3  v4  v5 )
+*
+*  where d and e denote diagonal and off-diagonal elements of B, vi
+*  denotes an element of the vector defining H(i), and ui an element of
+*  the vector defining G(i).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX            ONE
+      PARAMETER          ( ONE = ( 1.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            I, IINFO, J, LDWRKX, LDWRKY, LWKOPT, MINMN, NB,
+     $                   NBMIN, NX
+      REAL               WS
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CGEBD2, CGEMM, CLABRD, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN, REAL
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters
+*
+      INFO = 0
+      NB = MAX( 1, ILAENV( 1, 'CGEBRD', ' ', M, N, -1, -1 ) )
+      LWKOPT = ( M+N )*NB
+      WORK( 1 ) = REAL( LWKOPT )
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -4
+      ELSE IF( LWORK.LT.MAX( 1, M, N ) .AND. .NOT.LQUERY ) THEN
+         INFO = -10
+      END IF
+      IF( INFO.LT.0 ) THEN
+         CALL XERBLA( 'CGEBRD', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      MINMN = MIN( M, N )
+      IF( MINMN.EQ.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      WS = MAX( M, N )
+      LDWRKX = M
+      LDWRKY = N
+*
+      IF( NB.GT.1 .AND. NB.LT.MINMN ) THEN
+*
+*        Set the crossover point NX.
+*
+         NX = MAX( NB, ILAENV( 3, 'CGEBRD', ' ', M, N, -1, -1 ) )
+*
+*        Determine when to switch from blocked to unblocked code.
+*
+         IF( NX.LT.MINMN ) THEN
+            WS = ( M+N )*NB
+            IF( LWORK.LT.WS ) THEN
+*
+*              Not enough work space for the optimal NB, consider using
+*              a smaller block size.
+*
+               NBMIN = ILAENV( 2, 'CGEBRD', ' ', M, N, -1, -1 )
+               IF( LWORK.GE.( M+N )*NBMIN ) THEN
+                  NB = LWORK / ( M+N )
+               ELSE
+                  NB = 1
+                  NX = MINMN
+               END IF
+            END IF
+         END IF
+      ELSE
+         NX = MINMN
+      END IF
+*
+      DO 30 I = 1, MINMN - NX, NB
+*
+*        Reduce rows and columns i:i+ib-1 to bidiagonal form and return
+*        the matrices X and Y which are needed to update the unreduced
+*        part of the matrix
+*
+         CALL CLABRD( M-I+1, N-I+1, NB, A( I, I ), LDA, D( I ), E( I ),
+     $                TAUQ( I ), TAUP( I ), WORK, LDWRKX,
+     $                WORK( LDWRKX*NB+1 ), LDWRKY )
+*
+*        Update the trailing submatrix A(i+ib:m,i+ib:n), using
+*        an update of the form  A := A - V*Y' - X*U'
+*
+         CALL CGEMM( 'No transpose', 'Conjugate transpose', M-I-NB+1,
+     $               N-I-NB+1, NB, -ONE, A( I+NB, I ), LDA,
+     $               WORK( LDWRKX*NB+NB+1 ), LDWRKY, ONE,
+     $               A( I+NB, I+NB ), LDA )
+         CALL CGEMM( 'No transpose', 'No transpose', M-I-NB+1, N-I-NB+1,
+     $               NB, -ONE, WORK( NB+1 ), LDWRKX, A( I, I+NB ), LDA,
+     $               ONE, A( I+NB, I+NB ), LDA )
+*
+*        Copy diagonal and off-diagonal elements of B back into A
+*
+         IF( M.GE.N ) THEN
+            DO 10 J = I, I + NB - 1
+               A( J, J ) = D( J )
+               A( J, J+1 ) = E( J )
+   10       CONTINUE
+         ELSE
+            DO 20 J = I, I + NB - 1
+               A( J, J ) = D( J )
+               A( J+1, J ) = E( J )
+   20       CONTINUE
+         END IF
+   30 CONTINUE
+*
+*     Use unblocked code to reduce the remainder of the matrix
+*
+      CALL CGEBD2( M-I+1, N-I+1, A( I, I ), LDA, D( I ), E( I ),
+     $             TAUQ( I ), TAUP( I ), WORK, IINFO )
+      WORK( 1 ) = WS
+      RETURN
+*
+*     End of CGEBRD
+*
+      END
diff --git a/costa/native/external/lapack/cgecon.f b/costa/native/external/lapack/cgecon.f
new file mode 100644
index 000000000..ff7133ffc
--- /dev/null
+++ b/costa/native/external/lapack/cgecon.f
@@ -0,0 +1,189 @@
+      SUBROUTINE CGECON( NORM, N, A, LDA, ANORM, RCOND, WORK, RWORK,
+     $                   INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993 
+*
+*     .. Scalar Arguments ..
+      CHARACTER          NORM
+      INTEGER            INFO, LDA, N
+      REAL               ANORM, RCOND
+*     ..
+*     .. Array Arguments ..
+      REAL               RWORK( * )
+      COMPLEX            A( LDA, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CGECON estimates the reciprocal of the condition number of a general
+*  complex matrix A, in either the 1-norm or the infinity-norm, using
+*  the LU factorization computed by CGETRF.
+*
+*  An estimate is obtained for norm(inv(A)), and the reciprocal of the
+*  condition number is computed as
+*     RCOND = 1 / ( norm(A) * norm(inv(A)) ).
+*
+*  Arguments
+*  =========
+*
+*  NORM    (input) CHARACTER*1
+*          Specifies whether the 1-norm condition number or the
+*          infinity-norm condition number is required:
+*          = '1' or 'O':  1-norm;
+*          = 'I':         Infinity-norm.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input) COMPLEX array, dimension (LDA,N)
+*          The factors L and U from the factorization A = P*L*U
+*          as computed by CGETRF.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  ANORM   (input) REAL
+*          If NORM = '1' or 'O', the 1-norm of the original matrix A.
+*          If NORM = 'I', the infinity-norm of the original matrix A.
+*
+*  RCOND   (output) REAL
+*          The reciprocal of the condition number of the matrix A,
+*          computed as RCOND = 1/(norm(A) * norm(inv(A))).
+*
+*  WORK    (workspace) COMPLEX array, dimension (2*N)
+*
+*  RWORK   (workspace) REAL array, dimension (2*N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            ONENRM
+      CHARACTER          NORMIN
+      INTEGER            IX, KASE, KASE1
+      REAL               AINVNM, SCALE, SL, SMLNUM, SU
+      COMPLEX            ZDUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ICAMAX
+      REAL               SLAMCH
+      EXTERNAL           LSAME, ICAMAX, SLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CLACON, CLATRS, CSRSCL, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, AIMAG, MAX, REAL
+*     ..
+*     .. Statement Functions ..
+      REAL               CABS1
+*     ..
+*     .. Statement Function definitions ..
+      CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      ONENRM = NORM.EQ.'1' .OR. LSAME( NORM, 'O' )
+      IF( .NOT.ONENRM .AND. .NOT.LSAME( NORM, 'I' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -4
+      ELSE IF( ANORM.LT.ZERO ) THEN
+         INFO = -5
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CGECON', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      RCOND = ZERO
+      IF( N.EQ.0 ) THEN
+         RCOND = ONE
+         RETURN
+      ELSE IF( ANORM.EQ.ZERO ) THEN
+         RETURN
+      END IF
+*
+      SMLNUM = SLAMCH( 'Safe minimum' )
+*
+*     Estimate the norm of inv(A).
+*
+      AINVNM = ZERO
+      NORMIN = 'N'
+      IF( ONENRM ) THEN
+         KASE1 = 1
+      ELSE
+         KASE1 = 2
+      END IF
+      KASE = 0
+   10 CONTINUE
+      CALL CLACON( N, WORK( N+1 ), WORK, AINVNM, KASE )
+      IF( KASE.NE.0 ) THEN
+         IF( KASE.EQ.KASE1 ) THEN
+*
+*           Multiply by inv(L).
+*
+            CALL CLATRS( 'Lower', 'No transpose', 'Unit', NORMIN, N, A,
+     $                   LDA, WORK, SL, RWORK, INFO )
+*
+*           Multiply by inv(U).
+*
+            CALL CLATRS( 'Upper', 'No transpose', 'Non-unit', NORMIN, N,
+     $                   A, LDA, WORK, SU, RWORK( N+1 ), INFO )
+         ELSE
+*
+*           Multiply by inv(U').
+*
+            CALL CLATRS( 'Upper', 'Conjugate transpose', 'Non-unit',
+     $                   NORMIN, N, A, LDA, WORK, SU, RWORK( N+1 ),
+     $                   INFO )
+*
+*           Multiply by inv(L').
+*
+            CALL CLATRS( 'Lower', 'Conjugate transpose', 'Unit', NORMIN,
+     $                   N, A, LDA, WORK, SL, RWORK, INFO )
+         END IF
+*
+*        Divide X by 1/(SL*SU) if doing so will not cause overflow.
+*
+         SCALE = SL*SU
+         NORMIN = 'Y'
+         IF( SCALE.NE.ONE ) THEN
+            IX = ICAMAX( N, WORK, 1 )
+            IF( SCALE.LT.CABS1( WORK( IX ) )*SMLNUM .OR. SCALE.EQ.ZERO )
+     $         GO TO 20
+            CALL CSRSCL( N, SCALE, WORK, 1 )
+         END IF
+         GO TO 10
+      END IF
+*
+*     Compute the estimate of the reciprocal condition number.
+*
+      IF( AINVNM.NE.ZERO )
+     $   RCOND = ( ONE / AINVNM ) / ANORM
+*
+   20 CONTINUE
+      RETURN
+*
+*     End of CGECON
+*
+      END
diff --git a/costa/native/external/lapack/cgeequ.f b/costa/native/external/lapack/cgeequ.f
new file mode 100644
index 000000000..5d59b6cd6
--- /dev/null
+++ b/costa/native/external/lapack/cgeequ.f
@@ -0,0 +1,234 @@
+      SUBROUTINE CGEEQU( M, N, A, LDA, R, C, ROWCND, COLCND, AMAX,
+     $                   INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993 
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, M, N
+      REAL               AMAX, COLCND, ROWCND
+*     ..
+*     .. Array Arguments ..
+      REAL               C( * ), R( * )
+      COMPLEX            A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CGEEQU computes row and column scalings intended to equilibrate an
+*  M-by-N matrix A and reduce its condition number.  R returns the row
+*  scale factors and C the column scale factors, chosen to try to make
+*  the largest element in each row and column of the matrix B with
+*  elements B(i,j)=R(i)*A(i,j)*C(j) have absolute value 1.
+*
+*  R(i) and C(j) are restricted to be between SMLNUM = smallest safe
+*  number and BIGNUM = largest safe number.  Use of these scaling
+*  factors is not guaranteed to reduce the condition number of A but
+*  works well in practice.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  A       (input) COMPLEX array, dimension (LDA,N)
+*          The M-by-N matrix whose equilibration factors are
+*          to be computed.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  R       (output) REAL array, dimension (M)
+*          If INFO = 0 or INFO > M, R contains the row scale factors
+*          for A.
+*
+*  C       (output) REAL array, dimension (N)
+*          If INFO = 0,  C contains the column scale factors for A.
+*
+*  ROWCND  (output) REAL
+*          If INFO = 0 or INFO > M, ROWCND contains the ratio of the
+*          smallest R(i) to the largest R(i).  If ROWCND >= 0.1 and
+*          AMAX is neither too large nor too small, it is not worth
+*          scaling by R.
+*
+*  COLCND  (output) REAL
+*          If INFO = 0, COLCND contains the ratio of the smallest
+*          C(i) to the largest C(i).  If COLCND >= 0.1, it is not
+*          worth scaling by C.
+*
+*  AMAX    (output) REAL
+*          Absolute value of largest matrix element.  If AMAX is very
+*          close to overflow or very close to underflow, the matrix
+*          should be scaled.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i,  and i is
+*                <= M:  the i-th row of A is exactly zero
+*                >  M:  the (i-M)-th column of A is exactly zero
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J
+      REAL               BIGNUM, RCMAX, RCMIN, SMLNUM
+      COMPLEX            ZDUM
+*     ..
+*     .. External Functions ..
+      REAL               SLAMCH
+      EXTERNAL           SLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, AIMAG, MAX, MIN, REAL
+*     ..
+*     .. Statement Functions ..
+      REAL               CABS1
+*     ..
+*     .. Statement Function definitions ..
+      CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CGEEQU', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 ) THEN
+         ROWCND = ONE
+         COLCND = ONE
+         AMAX = ZERO
+         RETURN
+      END IF
+*
+*     Get machine constants.
+*
+      SMLNUM = SLAMCH( 'S' )
+      BIGNUM = ONE / SMLNUM
+*
+*     Compute row scale factors.
+*
+      DO 10 I = 1, M
+         R( I ) = ZERO
+   10 CONTINUE
+*
+*     Find the maximum element in each row.
+*
+      DO 30 J = 1, N
+         DO 20 I = 1, M
+            R( I ) = MAX( R( I ), CABS1( A( I, J ) ) )
+   20    CONTINUE
+   30 CONTINUE
+*
+*     Find the maximum and minimum scale factors.
+*
+      RCMIN = BIGNUM
+      RCMAX = ZERO
+      DO 40 I = 1, M
+         RCMAX = MAX( RCMAX, R( I ) )
+         RCMIN = MIN( RCMIN, R( I ) )
+   40 CONTINUE
+      AMAX = RCMAX
+*
+      IF( RCMIN.EQ.ZERO ) THEN
+*
+*        Find the first zero scale factor and return an error code.
+*
+         DO 50 I = 1, M
+            IF( R( I ).EQ.ZERO ) THEN
+               INFO = I
+               RETURN
+            END IF
+   50    CONTINUE
+      ELSE
+*
+*        Invert the scale factors.
+*
+         DO 60 I = 1, M
+            R( I ) = ONE / MIN( MAX( R( I ), SMLNUM ), BIGNUM )
+   60    CONTINUE
+*
+*        Compute ROWCND = min(R(I)) / max(R(I))
+*
+         ROWCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM )
+      END IF
+*
+*     Compute column scale factors
+*
+      DO 70 J = 1, N
+         C( J ) = ZERO
+   70 CONTINUE
+*
+*     Find the maximum element in each column,
+*     assuming the row scaling computed above.
+*
+      DO 90 J = 1, N
+         DO 80 I = 1, M
+            C( J ) = MAX( C( J ), CABS1( A( I, J ) )*R( I ) )
+   80    CONTINUE
+   90 CONTINUE
+*
+*     Find the maximum and minimum scale factors.
+*
+      RCMIN = BIGNUM
+      RCMAX = ZERO
+      DO 100 J = 1, N
+         RCMIN = MIN( RCMIN, C( J ) )
+         RCMAX = MAX( RCMAX, C( J ) )
+  100 CONTINUE
+*
+      IF( RCMIN.EQ.ZERO ) THEN
+*
+*        Find the first zero scale factor and return an error code.
+*
+         DO 110 J = 1, N
+            IF( C( J ).EQ.ZERO ) THEN
+               INFO = M + J
+               RETURN
+            END IF
+  110    CONTINUE
+      ELSE
+*
+*        Invert the scale factors.
+*
+         DO 120 J = 1, N
+            C( J ) = ONE / MIN( MAX( C( J ), SMLNUM ), BIGNUM )
+  120    CONTINUE
+*
+*        Compute COLCND = min(C(J)) / max(C(J))
+*
+         COLCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM )
+      END IF
+*
+      RETURN
+*
+*     End of CGEEQU
+*
+      END
diff --git a/costa/native/external/lapack/cgees.f b/costa/native/external/lapack/cgees.f
new file mode 100644
index 000000000..a081854bd
--- /dev/null
+++ b/costa/native/external/lapack/cgees.f
@@ -0,0 +1,322 @@
+      SUBROUTINE CGEES( JOBVS, SORT, SELECT, N, A, LDA, SDIM, W, VS,
+     $                  LDVS, WORK, LWORK, RWORK, BWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBVS, SORT
+      INTEGER            INFO, LDA, LDVS, LWORK, N, SDIM
+*     ..
+*     .. Array Arguments ..
+      LOGICAL            BWORK( * )
+      REAL               RWORK( * )
+      COMPLEX            A( LDA, * ), VS( LDVS, * ), W( * ), WORK( * )
+*     ..
+*     .. Function Arguments ..
+      LOGICAL            SELECT
+      EXTERNAL           SELECT
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CGEES computes for an N-by-N complex nonsymmetric matrix A, the
+*  eigenvalues, the Schur form T, and, optionally, the matrix of Schur
+*  vectors Z.  This gives the Schur factorization A = Z*T*(Z**H).
+*
+*  Optionally, it also orders the eigenvalues on the diagonal of the
+*  Schur form so that selected eigenvalues are at the top left.
+*  The leading columns of Z then form an orthonormal basis for the
+*  invariant subspace corresponding to the selected eigenvalues.
+
+*  A complex matrix is in Schur form if it is upper triangular.
+*
+*  Arguments
+*  =========
+*
+*  JOBVS   (input) CHARACTER*1
+*          = 'N': Schur vectors are not computed;
+*          = 'V': Schur vectors are computed.
+*
+*  SORT    (input) CHARACTER*1
+*          Specifies whether or not to order the eigenvalues on the
+*          diagonal of the Schur form.
+*          = 'N': Eigenvalues are not ordered:
+*          = 'S': Eigenvalues are ordered (see SELECT).
+*
+*  SELECT  (input) LOGICAL FUNCTION of one COMPLEX argument
+*          SELECT must be declared EXTERNAL in the calling subroutine.
+*          If SORT = 'S', SELECT is used to select eigenvalues to order
+*          to the top left of the Schur form.
+*          IF SORT = 'N', SELECT is not referenced.
+*          The eigenvalue W(j) is selected if SELECT(W(j)) is true.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A. N >= 0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,N)
+*          On entry, the N-by-N matrix A.
+*          On exit, A has been overwritten by its Schur form T.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  SDIM    (output) INTEGER
+*          If SORT = 'N', SDIM = 0.
+*          If SORT = 'S', SDIM = number of eigenvalues for which
+*                         SELECT is true.
+*
+*  W       (output) COMPLEX array, dimension (N)
+*          W contains the computed eigenvalues, in the same order that
+*          they appear on the diagonal of the output Schur form T.
+*
+*  VS      (output) COMPLEX array, dimension (LDVS,N)
+*          If JOBVS = 'V', VS contains the unitary matrix Z of Schur
+*          vectors.
+*          If JOBVS = 'N', VS is not referenced.
+*
+*  LDVS    (input) INTEGER
+*          The leading dimension of the array VS.  LDVS >= 1; if
+*          JOBVS = 'V', LDVS >= N.
+*
+*  WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.  LWORK >= max(1,2*N).
+*          For good performance, LWORK must generally be larger.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  RWORK   (workspace) REAL array, dimension (N)
+*
+*  BWORK   (workspace) LOGICAL array, dimension (N)
+*          Not referenced if SORT = 'N'.
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value.
+*          > 0: if INFO = i, and i is
+*               <= N:  the QR algorithm failed to compute all the
+*                      eigenvalues; elements 1:ILO-1 and i+1:N of W
+*                      contain those eigenvalues which have converged;
+*                      if JOBVS = 'V', VS contains the matrix which
+*                      reduces A to its partially converged Schur form.
+*               = N+1: the eigenvalues could not be reordered because
+*                      some eigenvalues were too close to separate (the
+*                      problem is very ill-conditioned);
+*               = N+2: after reordering, roundoff changed values of
+*                      some complex eigenvalues so that leading
+*                      eigenvalues in the Schur form no longer satisfy
+*                      SELECT = .TRUE..  This could also be caused by
+*                      underflow due to scaling.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E0, ONE = 1.0E0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY, SCALEA, WANTST, WANTVS
+      INTEGER            HSWORK, I, IBAL, ICOND, IERR, IEVAL, IHI, ILO,
+     $                   ITAU, IWRK, K, MAXB, MAXWRK, MINWRK
+      REAL               ANRM, BIGNUM, CSCALE, EPS, S, SEP, SMLNUM
+*     ..
+*     .. Local Arrays ..
+      REAL               DUM( 1 )
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CCOPY, CGEBAK, CGEBAL, CGEHRD, CHSEQR, CLACPY,
+     $                   CLASCL, CTRSEN, CUNGHR, SLABAD, XERBLA
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      REAL               CLANGE, SLAMCH
+      EXTERNAL           LSAME, ILAENV, CLANGE, SLAMCH
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      LQUERY = ( LWORK.EQ.-1 )
+      WANTVS = LSAME( JOBVS, 'V' )
+      WANTST = LSAME( SORT, 'S' )
+      IF( ( .NOT.WANTVS ) .AND. ( .NOT.LSAME( JOBVS, 'N' ) ) ) THEN
+         INFO = -1
+      ELSE IF( ( .NOT.WANTST ) .AND. ( .NOT.LSAME( SORT, 'N' ) ) ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -6
+      ELSE IF( LDVS.LT.1 .OR. ( WANTVS .AND. LDVS.LT.N ) ) THEN
+         INFO = -10
+      END IF
+*
+*     Compute workspace
+*      (Note: Comments in the code beginning "Workspace:" describe the
+*       minimal amount of workspace needed at that point in the code,
+*       as well as the preferred amount for good performance.
+*       CWorkspace refers to complex workspace, and RWorkspace to real
+*       workspace. NB refers to the optimal block size for the
+*       immediately following subroutine, as returned by ILAENV.
+*       HSWORK refers to the workspace preferred by CHSEQR, as
+*       calculated below. HSWORK is computed assuming ILO=1 and IHI=N,
+*       the worst case.)
+*
+      MINWRK = 1
+      IF( INFO.EQ.0 .AND. ( LWORK.GE.1 .OR. LQUERY ) ) THEN
+         MAXWRK = N + N*ILAENV( 1, 'CGEHRD', ' ', N, 1, N, 0 )
+         MINWRK = MAX( 1, 2*N )
+         IF( .NOT.WANTVS ) THEN
+            MAXB = MAX( ILAENV( 8, 'CHSEQR', 'SN', N, 1, N, -1 ), 2 )
+            K = MIN( MAXB, N, MAX( 2, ILAENV( 4, 'CHSEQR', 'SN', N, 1,
+     $          N, -1 ) ) )
+            HSWORK = MAX( K*( K+2 ), 2*N )
+            MAXWRK = MAX( MAXWRK, HSWORK, 1 )
+         ELSE
+            MAXWRK = MAX( MAXWRK, N+( N-1 )*
+     $               ILAENV( 1, 'CUNGHR', ' ', N, 1, N, -1 ) )
+            MAXB = MAX( ILAENV( 8, 'CHSEQR', 'EN', N, 1, N, -1 ), 2 )
+            K = MIN( MAXB, N, MAX( 2, ILAENV( 4, 'CHSEQR', 'EN', N, 1,
+     $          N, -1 ) ) )
+            HSWORK = MAX( K*( K+2 ), 2*N )
+            MAXWRK = MAX( MAXWRK, HSWORK, 1 )
+         END IF
+         WORK( 1 ) = MAXWRK
+      END IF
+      IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN
+         INFO = -12
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CGEES ', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 ) THEN
+         SDIM = 0
+         RETURN
+      END IF
+*
+*     Get machine constants
+*
+      EPS = SLAMCH( 'P' )
+      SMLNUM = SLAMCH( 'S' )
+      BIGNUM = ONE / SMLNUM
+      CALL SLABAD( SMLNUM, BIGNUM )
+      SMLNUM = SQRT( SMLNUM ) / EPS
+      BIGNUM = ONE / SMLNUM
+*
+*     Scale A if max element outside range [SMLNUM,BIGNUM]
+*
+      ANRM = CLANGE( 'M', N, N, A, LDA, DUM )
+      SCALEA = .FALSE.
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
+         SCALEA = .TRUE.
+         CSCALE = SMLNUM
+      ELSE IF( ANRM.GT.BIGNUM ) THEN
+         SCALEA = .TRUE.
+         CSCALE = BIGNUM
+      END IF
+      IF( SCALEA )
+     $   CALL CLASCL( 'G', 0, 0, ANRM, CSCALE, N, N, A, LDA, IERR )
+*
+*     Permute the matrix to make it more nearly triangular
+*     (CWorkspace: none)
+*     (RWorkspace: need N)
+*
+      IBAL = 1
+      CALL CGEBAL( 'P', N, A, LDA, ILO, IHI, RWORK( IBAL ), IERR )
+*
+*     Reduce to upper Hessenberg form
+*     (CWorkspace: need 2*N, prefer N+N*NB)
+*     (RWorkspace: none)
+*
+      ITAU = 1
+      IWRK = N + ITAU
+      CALL CGEHRD( N, ILO, IHI, A, LDA, WORK( ITAU ), WORK( IWRK ),
+     $             LWORK-IWRK+1, IERR )
+*
+      IF( WANTVS ) THEN
+*
+*        Copy Householder vectors to VS
+*
+         CALL CLACPY( 'L', N, N, A, LDA, VS, LDVS )
+*
+*        Generate unitary matrix in VS
+*        (CWorkspace: need 2*N-1, prefer N+(N-1)*NB)
+*        (RWorkspace: none)
+*
+         CALL CUNGHR( N, ILO, IHI, VS, LDVS, WORK( ITAU ), WORK( IWRK ),
+     $                LWORK-IWRK+1, IERR )
+      END IF
+*
+      SDIM = 0
+*
+*     Perform QR iteration, accumulating Schur vectors in VS if desired
+*     (CWorkspace: need 1, prefer HSWORK (see comments) )
+*     (RWorkspace: none)
+*
+      IWRK = ITAU
+      CALL CHSEQR( 'S', JOBVS, N, ILO, IHI, A, LDA, W, VS, LDVS,
+     $             WORK( IWRK ), LWORK-IWRK+1, IEVAL )
+      IF( IEVAL.GT.0 )
+     $   INFO = IEVAL
+*
+*     Sort eigenvalues if desired
+*
+      IF( WANTST .AND. INFO.EQ.0 ) THEN
+         IF( SCALEA )
+     $      CALL CLASCL( 'G', 0, 0, CSCALE, ANRM, N, 1, W, N, IERR )
+         DO 10 I = 1, N
+            BWORK( I ) = SELECT( W( I ) )
+   10    CONTINUE
+*
+*        Reorder eigenvalues and transform Schur vectors
+*        (CWorkspace: none)
+*        (RWorkspace: none)
+*
+         CALL CTRSEN( 'N', JOBVS, BWORK, N, A, LDA, VS, LDVS, W, SDIM,
+     $                S, SEP, WORK( IWRK ), LWORK-IWRK+1, ICOND )
+      END IF
+*
+      IF( WANTVS ) THEN
+*
+*        Undo balancing
+*        (CWorkspace: none)
+*        (RWorkspace: need N)
+*
+         CALL CGEBAK( 'P', 'R', N, ILO, IHI, RWORK( IBAL ), N, VS, LDVS,
+     $                IERR )
+      END IF
+*
+      IF( SCALEA ) THEN
+*
+*        Undo scaling for the Schur form of A
+*
+         CALL CLASCL( 'U', 0, 0, CSCALE, ANRM, N, N, A, LDA, IERR )
+         CALL CCOPY( N, A, LDA+1, W, 1 )
+      END IF
+*
+      WORK( 1 ) = MAXWRK
+      RETURN
+*
+*     End of CGEES
+*
+      END
diff --git a/costa/native/external/lapack/cgeesx.f b/costa/native/external/lapack/cgeesx.f
new file mode 100644
index 000000000..a096e9f59
--- /dev/null
+++ b/costa/native/external/lapack/cgeesx.f
@@ -0,0 +1,371 @@
+      SUBROUTINE CGEESX( JOBVS, SORT, SELECT, SENSE, N, A, LDA, SDIM, W,
+     $                   VS, LDVS, RCONDE, RCONDV, WORK, LWORK, RWORK,
+     $                   BWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBVS, SENSE, SORT
+      INTEGER            INFO, LDA, LDVS, LWORK, N, SDIM
+      REAL               RCONDE, RCONDV
+*     ..
+*     .. Array Arguments ..
+      LOGICAL            BWORK( * )
+      REAL               RWORK( * )
+      COMPLEX            A( LDA, * ), VS( LDVS, * ), W( * ), WORK( * )
+*     ..
+*     .. Function Arguments ..
+      LOGICAL            SELECT
+      EXTERNAL           SELECT
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CGEESX computes for an N-by-N complex nonsymmetric matrix A, the
+*  eigenvalues, the Schur form T, and, optionally, the matrix of Schur
+*  vectors Z.  This gives the Schur factorization A = Z*T*(Z**H).
+*
+*  Optionally, it also orders the eigenvalues on the diagonal of the
+*  Schur form so that selected eigenvalues are at the top left;
+*  computes a reciprocal condition number for the average of the
+*  selected eigenvalues (RCONDE); and computes a reciprocal condition
+*  number for the right invariant subspace corresponding to the
+*  selected eigenvalues (RCONDV).  The leading columns of Z form an
+*  orthonormal basis for this invariant subspace.
+*
+*  For further explanation of the reciprocal condition numbers RCONDE
+*  and RCONDV, see Section 4.10 of the LAPACK Users' Guide (where
+*  these quantities are called s and sep respectively).
+*
+*  A complex matrix is in Schur form if it is upper triangular.
+*
+*  Arguments
+*  =========
+*
+*  JOBVS   (input) CHARACTER*1
+*          = 'N': Schur vectors are not computed;
+*          = 'V': Schur vectors are computed.
+*
+*  SORT    (input) CHARACTER*1
+*          Specifies whether or not to order the eigenvalues on the
+*          diagonal of the Schur form.
+*          = 'N': Eigenvalues are not ordered;
+*          = 'S': Eigenvalues are ordered (see SELECT).
+*
+*  SELECT  (input) LOGICAL FUNCTION of one COMPLEX argument
+*          SELECT must be declared EXTERNAL in the calling subroutine.
+*          If SORT = 'S', SELECT is used to select eigenvalues to order
+*          to the top left of the Schur form.
+*          If SORT = 'N', SELECT is not referenced.
+*          An eigenvalue W(j) is selected if SELECT(W(j)) is true.
+*
+*  SENSE   (input) CHARACTER*1
+*          Determines which reciprocal condition numbers are computed.
+*          = 'N': None are computed;
+*          = 'E': Computed for average of selected eigenvalues only;
+*          = 'V': Computed for selected right invariant subspace only;
+*          = 'B': Computed for both.
+*          If SENSE = 'E', 'V' or 'B', SORT must equal 'S'.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A. N >= 0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA, N)
+*          On entry, the N-by-N matrix A.
+*          On exit, A is overwritten by its Schur form T.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  SDIM    (output) INTEGER
+*          If SORT = 'N', SDIM = 0.
+*          If SORT = 'S', SDIM = number of eigenvalues for which
+*                         SELECT is true.
+*
+*  W       (output) COMPLEX array, dimension (N)
+*          W contains the computed eigenvalues, in the same order
+*          that they appear on the diagonal of the output Schur form T.
+*
+*  VS      (output) COMPLEX array, dimension (LDVS,N)
+*          If JOBVS = 'V', VS contains the unitary matrix Z of Schur
+*          vectors.
+*          If JOBVS = 'N', VS is not referenced.
+*
+*  LDVS    (input) INTEGER
+*          The leading dimension of the array VS.  LDVS >= 1, and if
+*          JOBVS = 'V', LDVS >= N.
+*
+*  RCONDE  (output) REAL
+*          If SENSE = 'E' or 'B', RCONDE contains the reciprocal
+*          condition number for the average of the selected eigenvalues.
+*          Not referenced if SENSE = 'N' or 'V'.
+*
+*  RCONDV  (output) REAL
+*          If SENSE = 'V' or 'B', RCONDV contains the reciprocal
+*          condition number for the selected right invariant subspace.
+*          Not referenced if SENSE = 'N' or 'E'.
+*
+*  WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.  LWORK >= max(1,2*N).
+*          Also, if SENSE = 'E' or 'V' or 'B', LWORK >= 2*SDIM*(N-SDIM),
+*          where SDIM is the number of selected eigenvalues computed by
+*          this routine.  Note that 2*SDIM*(N-SDIM) <= N*N/2.
+*          For good performance, LWORK must generally be larger.
+*
+*  RWORK   (workspace) REAL array, dimension (N)
+*
+*  BWORK   (workspace) LOGICAL array, dimension (N)
+*          Not referenced if SORT = 'N'.
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value.
+*          > 0: if INFO = i, and i is
+*             <= N: the QR algorithm failed to compute all the
+*                   eigenvalues; elements 1:ILO-1 and i+1:N of W
+*                   contain those eigenvalues which have converged; if
+*                   JOBVS = 'V', VS contains the transformation which
+*                   reduces A to its partially converged Schur form.
+*             = N+1: the eigenvalues could not be reordered because some
+*                   eigenvalues were too close to separate (the problem
+*                   is very ill-conditioned);
+*             = N+2: after reordering, roundoff changed values of some
+*                   complex eigenvalues so that leading eigenvalues in
+*                   the Schur form no longer satisfy SELECT=.TRUE.  This
+*                   could also be caused by underflow due to scaling.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E0, ONE = 1.0E0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            SCALEA, WANTSB, WANTSE, WANTSN, WANTST,
+     $                   WANTSV, WANTVS
+      INTEGER            HSWORK, I, IBAL, ICOND, IERR, IEVAL, IHI, ILO,
+     $                   ITAU, IWRK, K, MAXB, MAXWRK, MINWRK
+      REAL               ANRM, BIGNUM, CSCALE, EPS, SMLNUM
+*     ..
+*     .. Local Arrays ..
+      REAL               DUM( 1 )
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CCOPY, CGEBAK, CGEBAL, CGEHRD, CHSEQR, CLACPY,
+     $                   CLASCL, CTRSEN, CUNGHR, SLABAD, SLASCL, XERBLA
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      REAL               CLANGE, SLAMCH
+      EXTERNAL           LSAME, ILAENV, CLANGE, SLAMCH
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      WANTVS = LSAME( JOBVS, 'V' )
+      WANTST = LSAME( SORT, 'S' )
+      WANTSN = LSAME( SENSE, 'N' )
+      WANTSE = LSAME( SENSE, 'E' )
+      WANTSV = LSAME( SENSE, 'V' )
+      WANTSB = LSAME( SENSE, 'B' )
+      IF( ( .NOT.WANTVS ) .AND. ( .NOT.LSAME( JOBVS, 'N' ) ) ) THEN
+         INFO = -1
+      ELSE IF( ( .NOT.WANTST ) .AND. ( .NOT.LSAME( SORT, 'N' ) ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.( WANTSN .OR. WANTSE .OR. WANTSV .OR. WANTSB ) .OR.
+     $         ( .NOT.WANTST .AND. .NOT.WANTSN ) ) THEN
+         INFO = -4
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      ELSE IF( LDVS.LT.1 .OR. ( WANTVS .AND. LDVS.LT.N ) ) THEN
+         INFO = -11
+      END IF
+*
+*     Compute workspace
+*      (Note: Comments in the code beginning "Workspace:" describe the
+*       minimal amount of real workspace needed at that point in the
+*       code, as well as the preferred amount for good performance.
+*       CWorkspace refers to complex workspace, and RWorkspace to real
+*       workspace. NB refers to the optimal block size for the
+*       immediately following subroutine, as returned by ILAENV.
+*       HSWORK refers to the workspace preferred by CHSEQR, as
+*       calculated below. HSWORK is computed assuming ILO=1 and IHI=N,
+*       the worst case.
+*       If SENSE = 'E', 'V' or 'B', then the amount of workspace needed
+*       depends on SDIM, which is computed by the routine CTRSEN later
+*       in the code.)
+*
+      MINWRK = 1
+      IF( INFO.EQ.0 .AND. ( LWORK.GE.1 ) ) THEN
+         MAXWRK = N + N*ILAENV( 1, 'CGEHRD', ' ', N, 1, N, 0 )
+         MINWRK = MAX( 1, 2*N )
+         IF( .NOT.WANTVS ) THEN
+            MAXB = MAX( ILAENV( 8, 'CHSEQR', 'SN', N, 1, N, -1 ), 2 )
+            K = MIN( MAXB, N, MAX( 2, ILAENV( 4, 'CHSEQR', 'SN', N, 1,
+     $          N, -1 ) ) )
+            HSWORK = MAX( K*( K+2 ), 2*N )
+            MAXWRK = MAX( MAXWRK, HSWORK, 1 )
+         ELSE
+            MAXWRK = MAX( MAXWRK, N+( N-1 )*
+     $               ILAENV( 1, 'CUNGHR', ' ', N, 1, N, -1 ) )
+            MAXB = MAX( ILAENV( 8, 'CHSEQR', 'SV', N, 1, N, -1 ), 2 )
+            K = MIN( MAXB, N, MAX( 2, ILAENV( 4, 'CHSEQR', 'SV', N, 1,
+     $          N, -1 ) ) )
+            HSWORK = MAX( K*( K+2 ), 2*N )
+            MAXWRK = MAX( MAXWRK, HSWORK, 1 )
+         END IF
+         WORK( 1 ) = MAXWRK
+      END IF
+      IF( LWORK.LT.MINWRK ) THEN
+         INFO = -15
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CGEESX', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 ) THEN
+         SDIM = 0
+         RETURN
+      END IF
+*
+*     Get machine constants
+*
+      EPS = SLAMCH( 'P' )
+      SMLNUM = SLAMCH( 'S' )
+      BIGNUM = ONE / SMLNUM
+      CALL SLABAD( SMLNUM, BIGNUM )
+      SMLNUM = SQRT( SMLNUM ) / EPS
+      BIGNUM = ONE / SMLNUM
+*
+*     Scale A if max element outside range [SMLNUM,BIGNUM]
+*
+      ANRM = CLANGE( 'M', N, N, A, LDA, DUM )
+      SCALEA = .FALSE.
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
+         SCALEA = .TRUE.
+         CSCALE = SMLNUM
+      ELSE IF( ANRM.GT.BIGNUM ) THEN
+         SCALEA = .TRUE.
+         CSCALE = BIGNUM
+      END IF
+      IF( SCALEA )
+     $   CALL CLASCL( 'G', 0, 0, ANRM, CSCALE, N, N, A, LDA, IERR )
+*
+*
+*     Permute the matrix to make it more nearly triangular
+*     (CWorkspace: none)
+*     (RWorkspace: need N)
+*
+      IBAL = 1
+      CALL CGEBAL( 'P', N, A, LDA, ILO, IHI, RWORK( IBAL ), IERR )
+*
+*     Reduce to upper Hessenberg form
+*     (CWorkspace: need 2*N, prefer N+N*NB)
+*     (RWorkspace: none)
+*
+      ITAU = 1
+      IWRK = N + ITAU
+      CALL CGEHRD( N, ILO, IHI, A, LDA, WORK( ITAU ), WORK( IWRK ),
+     $             LWORK-IWRK+1, IERR )
+*
+      IF( WANTVS ) THEN
+*
+*        Copy Householder vectors to VS
+*
+         CALL CLACPY( 'L', N, N, A, LDA, VS, LDVS )
+*
+*        Generate unitary matrix in VS
+*        (CWorkspace: need 2*N-1, prefer N+(N-1)*NB)
+*        (RWorkspace: none)
+*
+         CALL CUNGHR( N, ILO, IHI, VS, LDVS, WORK( ITAU ), WORK( IWRK ),
+     $                LWORK-IWRK+1, IERR )
+      END IF
+*
+      SDIM = 0
+*
+*     Perform QR iteration, accumulating Schur vectors in VS if desired
+*     (CWorkspace: need 1, prefer HSWORK (see comments) )
+*     (RWorkspace: none)
+*
+      IWRK = ITAU
+      CALL CHSEQR( 'S', JOBVS, N, ILO, IHI, A, LDA, W, VS, LDVS,
+     $             WORK( IWRK ), LWORK-IWRK+1, IEVAL )
+      IF( IEVAL.GT.0 )
+     $   INFO = IEVAL
+*
+*     Sort eigenvalues if desired
+*
+      IF( WANTST .AND. INFO.EQ.0 ) THEN
+         IF( SCALEA )
+     $      CALL CLASCL( 'G', 0, 0, CSCALE, ANRM, N, 1, W, N, IERR )
+         DO 10 I = 1, N
+            BWORK( I ) = SELECT( W( I ) )
+   10    CONTINUE
+*
+*        Reorder eigenvalues, transform Schur vectors, and compute
+*        reciprocal condition numbers
+*        (CWorkspace: if SENSE is not 'N', need 2*SDIM*(N-SDIM)
+*                     otherwise, need none )
+*        (RWorkspace: none)
+*
+         CALL CTRSEN( SENSE, JOBVS, BWORK, N, A, LDA, VS, LDVS, W, SDIM,
+     $                RCONDE, RCONDV, WORK( IWRK ), LWORK-IWRK+1,
+     $                ICOND )
+         IF( .NOT.WANTSN )
+     $      MAXWRK = MAX( MAXWRK, 2*SDIM*( N-SDIM ) )
+         IF( ICOND.EQ.-14 ) THEN
+*
+*           Not enough complex workspace
+*
+            INFO = -15
+         END IF
+      END IF
+*
+      IF( WANTVS ) THEN
+*
+*        Undo balancing
+*        (CWorkspace: none)
+*        (RWorkspace: need N)
+*
+         CALL CGEBAK( 'P', 'R', N, ILO, IHI, RWORK( IBAL ), N, VS, LDVS,
+     $                IERR )
+      END IF
+*
+      IF( SCALEA ) THEN
+*
+*        Undo scaling for the Schur form of A
+*
+         CALL CLASCL( 'U', 0, 0, CSCALE, ANRM, N, N, A, LDA, IERR )
+         CALL CCOPY( N, A, LDA+1, W, 1 )
+         IF( ( WANTSV .OR. WANTSB ) .AND. INFO.EQ.0 ) THEN
+            DUM( 1 ) = RCONDV
+            CALL SLASCL( 'G', 0, 0, CSCALE, ANRM, 1, 1, DUM, 1, IERR )
+            RCONDV = DUM( 1 )
+         END IF
+      END IF
+*
+      WORK( 1 ) = MAXWRK
+      RETURN
+*
+*     End of CGEESX
+*
+      END
diff --git a/costa/native/external/lapack/cgeev.f b/costa/native/external/lapack/cgeev.f
new file mode 100644
index 000000000..77221d74a
--- /dev/null
+++ b/costa/native/external/lapack/cgeev.f
@@ -0,0 +1,391 @@
+      SUBROUTINE CGEEV( JOBVL, JOBVR, N, A, LDA, W, VL, LDVL, VR, LDVR,
+     $                  WORK, LWORK, RWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBVL, JOBVR
+      INTEGER            INFO, LDA, LDVL, LDVR, LWORK, N
+*     ..
+*     .. Array Arguments ..
+      REAL               RWORK( * )
+      COMPLEX            A( LDA, * ), VL( LDVL, * ), VR( LDVR, * ),
+     $                   W( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CGEEV computes for an N-by-N complex nonsymmetric matrix A, the
+*  eigenvalues and, optionally, the left and/or right eigenvectors.
+*
+*  The right eigenvector v(j) of A satisfies
+*                   A * v(j) = lambda(j) * v(j)
+*  where lambda(j) is its eigenvalue.
+*  The left eigenvector u(j) of A satisfies
+*                u(j)**H * A = lambda(j) * u(j)**H
+*  where u(j)**H denotes the conjugate transpose of u(j).
+*
+*  The computed eigenvectors are normalized to have Euclidean norm
+*  equal to 1 and largest component real.
+*
+*  Arguments
+*  =========
+*
+*  JOBVL   (input) CHARACTER*1
+*          = 'N': left eigenvectors of A are not computed;
+*          = 'V': left eigenvectors of are computed.
+*
+*  JOBVR   (input) CHARACTER*1
+*          = 'N': right eigenvectors of A are not computed;
+*          = 'V': right eigenvectors of A are computed.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A. N >= 0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,N)
+*          On entry, the N-by-N matrix A.
+*          On exit, A has been overwritten.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  W       (output) COMPLEX array, dimension (N)
+*          W contains the computed eigenvalues.
+*
+*  VL      (output) COMPLEX array, dimension (LDVL,N)
+*          If JOBVL = 'V', the left eigenvectors u(j) are stored one
+*          after another in the columns of VL, in the same order
+*          as their eigenvalues.
+*          If JOBVL = 'N', VL is not referenced.
+*          u(j) = VL(:,j), the j-th column of VL.
+*
+*  LDVL    (input) INTEGER
+*          The leading dimension of the array VL.  LDVL >= 1; if
+*          JOBVL = 'V', LDVL >= N.
+*
+*  VR      (output) COMPLEX array, dimension (LDVR,N)
+*          If JOBVR = 'V', the right eigenvectors v(j) are stored one
+*          after another in the columns of VR, in the same order
+*          as their eigenvalues.
+*          If JOBVR = 'N', VR is not referenced.
+*          v(j) = VR(:,j), the j-th column of VR.
+*
+*  LDVR    (input) INTEGER
+*          The leading dimension of the array VR.  LDVR >= 1; if
+*          JOBVR = 'V', LDVR >= N.
+*
+*  WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.  LWORK >= max(1,2*N).
+*          For good performance, LWORK must generally be larger.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  RWORK   (workspace) REAL array, dimension (2*N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          > 0:  if INFO = i, the QR algorithm failed to compute all the
+*                eigenvalues, and no eigenvectors have been computed;
+*                elements and i+1:N of W contain eigenvalues which have
+*                converged.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E0, ONE = 1.0E0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY, SCALEA, WANTVL, WANTVR
+      CHARACTER          SIDE
+      INTEGER            HSWORK, I, IBAL, IERR, IHI, ILO, IRWORK, ITAU,
+     $                   IWRK, K, MAXB, MAXWRK, MINWRK, NOUT
+      REAL               ANRM, BIGNUM, CSCALE, EPS, SCL, SMLNUM
+      COMPLEX            TMP
+*     ..
+*     .. Local Arrays ..
+      LOGICAL            SELECT( 1 )
+      REAL               DUM( 1 )
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CGEBAK, CGEBAL, CGEHRD, CHSEQR, CLACPY, CLASCL,
+     $                   CSCAL, CSSCAL, CTREVC, CUNGHR, SLABAD, XERBLA
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV, ISAMAX
+      REAL               CLANGE, SCNRM2, SLAMCH
+      EXTERNAL           LSAME, ILAENV, ISAMAX, CLANGE, SCNRM2, SLAMCH
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          AIMAG, CMPLX, CONJG, MAX, MIN, REAL, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      LQUERY = ( LWORK.EQ.-1 )
+      WANTVL = LSAME( JOBVL, 'V' )
+      WANTVR = LSAME( JOBVR, 'V' )
+      IF( ( .NOT.WANTVL ) .AND. ( .NOT.LSAME( JOBVL, 'N' ) ) ) THEN
+         INFO = -1
+      ELSE IF( ( .NOT.WANTVR ) .AND. ( .NOT.LSAME( JOBVR, 'N' ) ) ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( LDVL.LT.1 .OR. ( WANTVL .AND. LDVL.LT.N ) ) THEN
+         INFO = -8
+      ELSE IF( LDVR.LT.1 .OR. ( WANTVR .AND. LDVR.LT.N ) ) THEN
+         INFO = -10
+      END IF
+*
+*     Compute workspace
+*      (Note: Comments in the code beginning "Workspace:" describe the
+*       minimal amount of workspace needed at that point in the code,
+*       as well as the preferred amount for good performance.
+*       CWorkspace refers to complex workspace, and RWorkspace to real
+*       workspace. NB refers to the optimal block size for the
+*       immediately following subroutine, as returned by ILAENV.
+*       HSWORK refers to the workspace preferred by CHSEQR, as
+*       calculated below. HSWORK is computed assuming ILO=1 and IHI=N,
+*       the worst case.)
+*
+      MINWRK = 1
+      IF( INFO.EQ.0 .AND. ( LWORK.GE.1 .OR. LQUERY ) ) THEN
+         MAXWRK = N + N*ILAENV( 1, 'CGEHRD', ' ', N, 1, N, 0 )
+         IF( ( .NOT.WANTVL ) .AND. ( .NOT.WANTVR ) ) THEN
+            MINWRK = MAX( 1, 2*N )
+            MAXB = MAX( ILAENV( 8, 'CHSEQR', 'EN', N, 1, N, -1 ), 2 )
+            K = MIN( MAXB, N, MAX( 2, ILAENV( 4, 'CHSEQR', 'EN', N, 1,
+     $          N, -1 ) ) )
+            HSWORK = MAX( K*( K+2 ), 2*N )
+            MAXWRK = MAX( MAXWRK, HSWORK )
+         ELSE
+            MINWRK = MAX( 1, 2*N )
+            MAXWRK = MAX( MAXWRK, N+( N-1 )*
+     $               ILAENV( 1, 'CUNGHR', ' ', N, 1, N, -1 ) )
+            MAXB = MAX( ILAENV( 8, 'CHSEQR', 'SV', N, 1, N, -1 ), 2 )
+            K = MIN( MAXB, N, MAX( 2, ILAENV( 4, 'CHSEQR', 'SV', N, 1,
+     $          N, -1 ) ) )
+            HSWORK = MAX( K*( K+2 ), 2*N )
+            MAXWRK = MAX( MAXWRK, HSWORK, 2*N )
+         END IF
+         WORK( 1 ) = MAXWRK
+      END IF
+      IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN
+         INFO = -12
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CGEEV ', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Get machine constants
+*
+      EPS = SLAMCH( 'P' )
+      SMLNUM = SLAMCH( 'S' )
+      BIGNUM = ONE / SMLNUM
+      CALL SLABAD( SMLNUM, BIGNUM )
+      SMLNUM = SQRT( SMLNUM ) / EPS
+      BIGNUM = ONE / SMLNUM
+*
+*     Scale A if max element outside range [SMLNUM,BIGNUM]
+*
+      ANRM = CLANGE( 'M', N, N, A, LDA, DUM )
+      SCALEA = .FALSE.
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
+         SCALEA = .TRUE.
+         CSCALE = SMLNUM
+      ELSE IF( ANRM.GT.BIGNUM ) THEN
+         SCALEA = .TRUE.
+         CSCALE = BIGNUM
+      END IF
+      IF( SCALEA )
+     $   CALL CLASCL( 'G', 0, 0, ANRM, CSCALE, N, N, A, LDA, IERR )
+*
+*     Balance the matrix
+*     (CWorkspace: none)
+*     (RWorkspace: need N)
+*
+      IBAL = 1
+      CALL CGEBAL( 'B', N, A, LDA, ILO, IHI, RWORK( IBAL ), IERR )
+*
+*     Reduce to upper Hessenberg form
+*     (CWorkspace: need 2*N, prefer N+N*NB)
+*     (RWorkspace: none)
+*
+      ITAU = 1
+      IWRK = ITAU + N
+      CALL CGEHRD( N, ILO, IHI, A, LDA, WORK( ITAU ), WORK( IWRK ),
+     $             LWORK-IWRK+1, IERR )
+*
+      IF( WANTVL ) THEN
+*
+*        Want left eigenvectors
+*        Copy Householder vectors to VL
+*
+         SIDE = 'L'
+         CALL CLACPY( 'L', N, N, A, LDA, VL, LDVL )
+*
+*        Generate unitary matrix in VL
+*        (CWorkspace: need 2*N-1, prefer N+(N-1)*NB)
+*        (RWorkspace: none)
+*
+         CALL CUNGHR( N, ILO, IHI, VL, LDVL, WORK( ITAU ), WORK( IWRK ),
+     $                LWORK-IWRK+1, IERR )
+*
+*        Perform QR iteration, accumulating Schur vectors in VL
+*        (CWorkspace: need 1, prefer HSWORK (see comments) )
+*        (RWorkspace: none)
+*
+         IWRK = ITAU
+         CALL CHSEQR( 'S', 'V', N, ILO, IHI, A, LDA, W, VL, LDVL,
+     $                WORK( IWRK ), LWORK-IWRK+1, INFO )
+*
+         IF( WANTVR ) THEN
+*
+*           Want left and right eigenvectors
+*           Copy Schur vectors to VR
+*
+            SIDE = 'B'
+            CALL CLACPY( 'F', N, N, VL, LDVL, VR, LDVR )
+         END IF
+*
+      ELSE IF( WANTVR ) THEN
+*
+*        Want right eigenvectors
+*        Copy Householder vectors to VR
+*
+         SIDE = 'R'
+         CALL CLACPY( 'L', N, N, A, LDA, VR, LDVR )
+*
+*        Generate unitary matrix in VR
+*        (CWorkspace: need 2*N-1, prefer N+(N-1)*NB)
+*        (RWorkspace: none)
+*
+         CALL CUNGHR( N, ILO, IHI, VR, LDVR, WORK( ITAU ), WORK( IWRK ),
+     $                LWORK-IWRK+1, IERR )
+*
+*        Perform QR iteration, accumulating Schur vectors in VR
+*        (CWorkspace: need 1, prefer HSWORK (see comments) )
+*        (RWorkspace: none)
+*
+         IWRK = ITAU
+         CALL CHSEQR( 'S', 'V', N, ILO, IHI, A, LDA, W, VR, LDVR,
+     $                WORK( IWRK ), LWORK-IWRK+1, INFO )
+*
+      ELSE
+*
+*        Compute eigenvalues only
+*        (CWorkspace: need 1, prefer HSWORK (see comments) )
+*        (RWorkspace: none)
+*
+         IWRK = ITAU
+         CALL CHSEQR( 'E', 'N', N, ILO, IHI, A, LDA, W, VR, LDVR,
+     $                WORK( IWRK ), LWORK-IWRK+1, INFO )
+      END IF
+*
+*     If INFO > 0 from CHSEQR, then quit
+*
+      IF( INFO.GT.0 )
+     $   GO TO 50
+*
+      IF( WANTVL .OR. WANTVR ) THEN
+*
+*        Compute left and/or right eigenvectors
+*        (CWorkspace: need 2*N)
+*        (RWorkspace: need 2*N)
+*
+         IRWORK = IBAL + N
+         CALL CTREVC( SIDE, 'B', SELECT, N, A, LDA, VL, LDVL, VR, LDVR,
+     $                N, NOUT, WORK( IWRK ), RWORK( IRWORK ), IERR )
+      END IF
+*
+      IF( WANTVL ) THEN
+*
+*        Undo balancing of left eigenvectors
+*        (CWorkspace: none)
+*        (RWorkspace: need N)
+*
+         CALL CGEBAK( 'B', 'L', N, ILO, IHI, RWORK( IBAL ), N, VL, LDVL,
+     $                IERR )
+*
+*        Normalize left eigenvectors and make largest component real
+*
+         DO 20 I = 1, N
+            SCL = ONE / SCNRM2( N, VL( 1, I ), 1 )
+            CALL CSSCAL( N, SCL, VL( 1, I ), 1 )
+            DO 10 K = 1, N
+               RWORK( IRWORK+K-1 ) = REAL( VL( K, I ) )**2 +
+     $                               AIMAG( VL( K, I ) )**2
+   10       CONTINUE
+            K = ISAMAX( N, RWORK( IRWORK ), 1 )
+            TMP = CONJG( VL( K, I ) ) / SQRT( RWORK( IRWORK+K-1 ) )
+            CALL CSCAL( N, TMP, VL( 1, I ), 1 )
+            VL( K, I ) = CMPLX( REAL( VL( K, I ) ), ZERO )
+   20    CONTINUE
+      END IF
+*
+      IF( WANTVR ) THEN
+*
+*        Undo balancing of right eigenvectors
+*        (CWorkspace: none)
+*        (RWorkspace: need N)
+*
+         CALL CGEBAK( 'B', 'R', N, ILO, IHI, RWORK( IBAL ), N, VR, LDVR,
+     $                IERR )
+*
+*        Normalize right eigenvectors and make largest component real
+*
+         DO 40 I = 1, N
+            SCL = ONE / SCNRM2( N, VR( 1, I ), 1 )
+            CALL CSSCAL( N, SCL, VR( 1, I ), 1 )
+            DO 30 K = 1, N
+               RWORK( IRWORK+K-1 ) = REAL( VR( K, I ) )**2 +
+     $                               AIMAG( VR( K, I ) )**2
+   30       CONTINUE
+            K = ISAMAX( N, RWORK( IRWORK ), 1 )
+            TMP = CONJG( VR( K, I ) ) / SQRT( RWORK( IRWORK+K-1 ) )
+            CALL CSCAL( N, TMP, VR( 1, I ), 1 )
+            VR( K, I ) = CMPLX( REAL( VR( K, I ) ), ZERO )
+   40    CONTINUE
+      END IF
+*
+*     Undo scaling if necessary
+*
+   50 CONTINUE
+      IF( SCALEA ) THEN
+         CALL CLASCL( 'G', 0, 0, CSCALE, ANRM, N-INFO, 1, W( INFO+1 ),
+     $                MAX( N-INFO, 1 ), IERR )
+         IF( INFO.GT.0 ) THEN
+            CALL CLASCL( 'G', 0, 0, CSCALE, ANRM, ILO-1, 1, W, N, IERR )
+         END IF
+      END IF
+*
+      WORK( 1 ) = MAXWRK
+      RETURN
+*
+*     End of CGEEV
+*
+      END
diff --git a/costa/native/external/lapack/cgeevx.f b/costa/native/external/lapack/cgeevx.f
new file mode 100644
index 000000000..4e2e3da64
--- /dev/null
+++ b/costa/native/external/lapack/cgeevx.f
@@ -0,0 +1,520 @@
+      SUBROUTINE CGEEVX( BALANC, JOBVL, JOBVR, SENSE, N, A, LDA, W, VL,
+     $                   LDVL, VR, LDVR, ILO, IHI, SCALE, ABNRM, RCONDE,
+     $                   RCONDV, WORK, LWORK, RWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          BALANC, JOBVL, JOBVR, SENSE
+      INTEGER            IHI, ILO, INFO, LDA, LDVL, LDVR, LWORK, N
+      REAL               ABNRM
+*     ..
+*     .. Array Arguments ..
+      REAL               RCONDE( * ), RCONDV( * ), RWORK( * ),
+     $                   SCALE( * )
+      COMPLEX            A( LDA, * ), VL( LDVL, * ), VR( LDVR, * ),
+     $                   W( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CGEEVX computes for an N-by-N complex nonsymmetric matrix A, the
+*  eigenvalues and, optionally, the left and/or right eigenvectors.
+*
+*  Optionally also, it computes a balancing transformation to improve
+*  the conditioning of the eigenvalues and eigenvectors (ILO, IHI,
+*  SCALE, and ABNRM), reciprocal condition numbers for the eigenvalues
+*  (RCONDE), and reciprocal condition numbers for the right
+*  eigenvectors (RCONDV).
+*
+*  The right eigenvector v(j) of A satisfies
+*                   A * v(j) = lambda(j) * v(j)
+*  where lambda(j) is its eigenvalue.
+*  The left eigenvector u(j) of A satisfies
+*                u(j)**H * A = lambda(j) * u(j)**H
+*  where u(j)**H denotes the conjugate transpose of u(j).
+*
+*  The computed eigenvectors are normalized to have Euclidean norm
+*  equal to 1 and largest component real.
+*
+*  Balancing a matrix means permuting the rows and columns to make it
+*  more nearly upper triangular, and applying a diagonal similarity
+*  transformation D * A * D**(-1), where D is a diagonal matrix, to
+*  make its rows and columns closer in norm and the condition numbers
+*  of its eigenvalues and eigenvectors smaller.  The computed
+*  reciprocal condition numbers correspond to the balanced matrix.
+*  Permuting rows and columns will not change the condition numbers
+*  (in exact arithmetic) but diagonal scaling will.  For further
+*  explanation of balancing, see section 4.10.2 of the LAPACK
+*  Users' Guide.
+*
+*  Arguments
+*  =========
+*
+*  BALANC  (input) CHARACTER*1
+*          Indicates how the input matrix should be diagonally scaled
+*          and/or permuted to improve the conditioning of its
+*          eigenvalues.
+*          = 'N': Do not diagonally scale or permute;
+*          = 'P': Perform permutations to make the matrix more nearly
+*                 upper triangular. Do not diagonally scale;
+*          = 'S': Diagonally scale the matrix, ie. replace A by
+*                 D*A*D**(-1), where D is a diagonal matrix chosen
+*                 to make the rows and columns of A more equal in
+*                 norm. Do not permute;
+*          = 'B': Both diagonally scale and permute A.
+*
+*          Computed reciprocal condition numbers will be for the matrix
+*          after balancing and/or permuting. Permuting does not change
+*          condition numbers (in exact arithmetic), but balancing does.
+*
+*  JOBVL   (input) CHARACTER*1
+*          = 'N': left eigenvectors of A are not computed;
+*          = 'V': left eigenvectors of A are computed.
+*          If SENSE = 'E' or 'B', JOBVL must = 'V'.
+*
+*  JOBVR   (input) CHARACTER*1
+*          = 'N': right eigenvectors of A are not computed;
+*          = 'V': right eigenvectors of A are computed.
+*          If SENSE = 'E' or 'B', JOBVR must = 'V'.
+*
+*  SENSE   (input) CHARACTER*1
+*          Determines which reciprocal condition numbers are computed.
+*          = 'N': None are computed;
+*          = 'E': Computed for eigenvalues only;
+*          = 'V': Computed for right eigenvectors only;
+*          = 'B': Computed for eigenvalues and right eigenvectors.
+*
+*          If SENSE = 'E' or 'B', both left and right eigenvectors
+*          must also be computed (JOBVL = 'V' and JOBVR = 'V').
+*
+*  N       (input) INTEGER
+*          The order of the matrix A. N >= 0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,N)
+*          On entry, the N-by-N matrix A.
+*          On exit, A has been overwritten.  If JOBVL = 'V' or
+*          JOBVR = 'V', A contains the Schur form of the balanced 
+*          version of the matrix A.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  W       (output) COMPLEX array, dimension (N)
+*          W contains the computed eigenvalues.
+*
+*  VL      (output) COMPLEX array, dimension (LDVL,N)
+*          If JOBVL = 'V', the left eigenvectors u(j) are stored one
+*          after another in the columns of VL, in the same order
+*          as their eigenvalues.
+*          If JOBVL = 'N', VL is not referenced.
+*          u(j) = VL(:,j), the j-th column of VL.
+*
+*  LDVL    (input) INTEGER
+*          The leading dimension of the array VL.  LDVL >= 1; if
+*          JOBVL = 'V', LDVL >= N.
+*
+*  VR      (output) COMPLEX array, dimension (LDVR,N)
+*          If JOBVR = 'V', the right eigenvectors v(j) are stored one
+*          after another in the columns of VR, in the same order
+*          as their eigenvalues.
+*          If JOBVR = 'N', VR is not referenced.
+*          v(j) = VR(:,j), the j-th column of VR.
+*
+*  LDVR    (input) INTEGER
+*          The leading dimension of the array VR.  LDVR >= 1; if
+*          JOBVR = 'V', LDVR >= N.
+*
+*  ILO,IHI (output) INTEGER
+*          ILO and IHI are integer values determined when A was
+*          balanced.  The balanced A(i,j) = 0 if I > J and
+*          J = 1,...,ILO-1 or I = IHI+1,...,N.
+*
+*  SCALE   (output) REAL array, dimension (N)
+*          Details of the permutations and scaling factors applied
+*          when balancing A.  If P(j) is the index of the row and column
+*          interchanged with row and column j, and D(j) is the scaling
+*          factor applied to row and column j, then
+*          SCALE(J) = P(J),    for J = 1,...,ILO-1
+*                   = D(J),    for J = ILO,...,IHI
+*                   = P(J)     for J = IHI+1,...,N.
+*          The order in which the interchanges are made is N to IHI+1,
+*          then 1 to ILO-1.
+*
+*  ABNRM   (output) REAL
+*          The one-norm of the balanced matrix (the maximum
+*          of the sum of absolute values of elements of any column).
+*
+*  RCONDE  (output) REAL array, dimension (N)
+*          RCONDE(j) is the reciprocal condition number of the j-th
+*          eigenvalue.
+*
+*  RCONDV  (output) REAL array, dimension (N)
+*          RCONDV(j) is the reciprocal condition number of the j-th
+*          right eigenvector.
+*
+*  WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.  If SENSE = 'N' or 'E',
+*          LWORK >= max(1,2*N), and if SENSE = 'V' or 'B',
+*          LWORK >= N*N+2*N.
+*          For good performance, LWORK must generally be larger.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  RWORK   (workspace) REAL array, dimension (2*N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          > 0:  if INFO = i, the QR algorithm failed to compute all the
+*                eigenvalues, and no eigenvectors or condition numbers
+*                have been computed; elements 1:ILO-1 and i+1:N of W
+*                contain eigenvalues which have converged.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E0, ONE = 1.0E0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY, SCALEA, WANTVL, WANTVR, WNTSNB, WNTSNE,
+     $                   WNTSNN, WNTSNV
+      CHARACTER          JOB, SIDE
+      INTEGER            HSWORK, I, ICOND, IERR, ITAU, IWRK, K, MAXB,
+     $                   MAXWRK, MINWRK, NOUT
+      REAL               ANRM, BIGNUM, CSCALE, EPS, SCL, SMLNUM
+      COMPLEX            TMP
+*     ..
+*     .. Local Arrays ..
+      LOGICAL            SELECT( 1 )
+      REAL               DUM( 1 )
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CGEBAK, CGEBAL, CGEHRD, CHSEQR, CLACPY, CLASCL,
+     $                   CSCAL, CSSCAL, CTREVC, CTRSNA, CUNGHR, SLABAD,
+     $                   SLASCL, XERBLA
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV, ISAMAX
+      REAL               CLANGE, SCNRM2, SLAMCH
+      EXTERNAL           LSAME, ILAENV, ISAMAX, CLANGE, SCNRM2, SLAMCH
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          AIMAG, CMPLX, CONJG, MAX, MIN, REAL, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      LQUERY = ( LWORK.EQ.-1 )
+      WANTVL = LSAME( JOBVL, 'V' )
+      WANTVR = LSAME( JOBVR, 'V' )
+      WNTSNN = LSAME( SENSE, 'N' )
+      WNTSNE = LSAME( SENSE, 'E' )
+      WNTSNV = LSAME( SENSE, 'V' )
+      WNTSNB = LSAME( SENSE, 'B' )
+      IF( .NOT.( LSAME( BALANC, 'N' ) .OR. LSAME( BALANC, 'S' ) .OR.
+     $    LSAME( BALANC, 'P' ) .OR. LSAME( BALANC, 'B' ) ) ) THEN
+         INFO = -1
+      ELSE IF( ( .NOT.WANTVL ) .AND. ( .NOT.LSAME( JOBVL, 'N' ) ) ) THEN
+         INFO = -2
+      ELSE IF( ( .NOT.WANTVR ) .AND. ( .NOT.LSAME( JOBVR, 'N' ) ) ) THEN
+         INFO = -3
+      ELSE IF( .NOT.( WNTSNN .OR. WNTSNE .OR. WNTSNB .OR. WNTSNV ) .OR.
+     $         ( ( WNTSNE .OR. WNTSNB ) .AND. .NOT.( WANTVL .AND.
+     $         WANTVR ) ) ) THEN
+         INFO = -4
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      ELSE IF( LDVL.LT.1 .OR. ( WANTVL .AND. LDVL.LT.N ) ) THEN
+         INFO = -10
+      ELSE IF( LDVR.LT.1 .OR. ( WANTVR .AND. LDVR.LT.N ) ) THEN
+         INFO = -12
+      END IF
+*
+*     Compute workspace
+*      (Note: Comments in the code beginning "Workspace:" describe the
+*       minimal amount of workspace needed at that point in the code,
+*       as well as the preferred amount for good performance.
+*       CWorkspace refers to complex workspace, and RWorkspace to real
+*       workspace. NB refers to the optimal block size for the
+*       immediately following subroutine, as returned by ILAENV.
+*       HSWORK refers to the workspace preferred by CHSEQR, as
+*       calculated below. HSWORK is computed assuming ILO=1 and IHI=N,
+*       the worst case.)
+*
+      MINWRK = 1
+      IF( INFO.EQ.0 .AND. ( LWORK.GE.1 .OR. LQUERY ) ) THEN
+         MAXWRK = N + N*ILAENV( 1, 'CGEHRD', ' ', N, 1, N, 0 )
+         IF( ( .NOT.WANTVL ) .AND. ( .NOT.WANTVR ) ) THEN
+            MINWRK = MAX( 1, 2*N )
+            IF( .NOT.( WNTSNN .OR. WNTSNE ) )
+     $         MINWRK = MAX( MINWRK, N*N+2*N )
+            MAXB = MAX( ILAENV( 8, 'CHSEQR', 'SN', N, 1, N, -1 ), 2 )
+            IF( WNTSNN ) THEN
+               K = MIN( MAXB, N, MAX( 2, ILAENV( 4, 'CHSEQR', 'EN', N,
+     $             1, N, -1 ) ) )
+            ELSE
+               K = MIN( MAXB, N, MAX( 2, ILAENV( 4, 'CHSEQR', 'SN', N,
+     $             1, N, -1 ) ) )
+            END IF
+            HSWORK = MAX( K*( K+2 ), 2*N )
+            MAXWRK = MAX( MAXWRK, 1, HSWORK )
+            IF( .NOT.( WNTSNN .OR. WNTSNE ) )
+     $         MAXWRK = MAX( MAXWRK, N*N+2*N )
+         ELSE
+            MINWRK = MAX( 1, 2*N )
+            IF( .NOT.( WNTSNN .OR. WNTSNE ) )
+     $         MINWRK = MAX( MINWRK, N*N+2*N )
+            MAXB = MAX( ILAENV( 8, 'CHSEQR', 'SN', N, 1, N, -1 ), 2 )
+            K = MIN( MAXB, N, MAX( 2, ILAENV( 4, 'CHSEQR', 'EN', N, 1,
+     $          N, -1 ) ) )
+            HSWORK = MAX( K*( K+2 ), 2*N )
+            MAXWRK = MAX( MAXWRK, 1, HSWORK )
+            MAXWRK = MAX( MAXWRK, N+( N-1 )*
+     $               ILAENV( 1, 'CUNGHR', ' ', N, 1, N, -1 ) )
+            IF( .NOT.( WNTSNN .OR. WNTSNE ) )
+     $         MAXWRK = MAX( MAXWRK, N*N+2*N )
+            MAXWRK = MAX( MAXWRK, 2*N, 1 )
+         END IF
+         WORK( 1 ) = MAXWRK
+      END IF
+      IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN
+         INFO = -20
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CGEEVX', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Get machine constants
+*
+      EPS = SLAMCH( 'P' )
+      SMLNUM = SLAMCH( 'S' )
+      BIGNUM = ONE / SMLNUM
+      CALL SLABAD( SMLNUM, BIGNUM )
+      SMLNUM = SQRT( SMLNUM ) / EPS
+      BIGNUM = ONE / SMLNUM
+*
+*     Scale A if max element outside range [SMLNUM,BIGNUM]
+*
+      ICOND = 0
+      ANRM = CLANGE( 'M', N, N, A, LDA, DUM )
+      SCALEA = .FALSE.
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
+         SCALEA = .TRUE.
+         CSCALE = SMLNUM
+      ELSE IF( ANRM.GT.BIGNUM ) THEN
+         SCALEA = .TRUE.
+         CSCALE = BIGNUM
+      END IF
+      IF( SCALEA )
+     $   CALL CLASCL( 'G', 0, 0, ANRM, CSCALE, N, N, A, LDA, IERR )
+*
+*     Balance the matrix and compute ABNRM
+*
+      CALL CGEBAL( BALANC, N, A, LDA, ILO, IHI, SCALE, IERR )
+      ABNRM = CLANGE( '1', N, N, A, LDA, DUM )
+      IF( SCALEA ) THEN
+         DUM( 1 ) = ABNRM
+         CALL SLASCL( 'G', 0, 0, CSCALE, ANRM, 1, 1, DUM, 1, IERR )
+         ABNRM = DUM( 1 )
+      END IF
+*
+*     Reduce to upper Hessenberg form
+*     (CWorkspace: need 2*N, prefer N+N*NB)
+*     (RWorkspace: none)
+*
+      ITAU = 1
+      IWRK = ITAU + N
+      CALL CGEHRD( N, ILO, IHI, A, LDA, WORK( ITAU ), WORK( IWRK ),
+     $             LWORK-IWRK+1, IERR )
+*
+      IF( WANTVL ) THEN
+*
+*        Want left eigenvectors
+*        Copy Householder vectors to VL
+*
+         SIDE = 'L'
+         CALL CLACPY( 'L', N, N, A, LDA, VL, LDVL )
+*
+*        Generate unitary matrix in VL
+*        (CWorkspace: need 2*N-1, prefer N+(N-1)*NB)
+*        (RWorkspace: none)
+*
+         CALL CUNGHR( N, ILO, IHI, VL, LDVL, WORK( ITAU ), WORK( IWRK ),
+     $                LWORK-IWRK+1, IERR )
+*
+*        Perform QR iteration, accumulating Schur vectors in VL
+*        (CWorkspace: need 1, prefer HSWORK (see comments) )
+*        (RWorkspace: none)
+*
+         IWRK = ITAU
+         CALL CHSEQR( 'S', 'V', N, ILO, IHI, A, LDA, W, VL, LDVL,
+     $                WORK( IWRK ), LWORK-IWRK+1, INFO )
+*
+         IF( WANTVR ) THEN
+*
+*           Want left and right eigenvectors
+*           Copy Schur vectors to VR
+*
+            SIDE = 'B'
+            CALL CLACPY( 'F', N, N, VL, LDVL, VR, LDVR )
+         END IF
+*
+      ELSE IF( WANTVR ) THEN
+*
+*        Want right eigenvectors
+*        Copy Householder vectors to VR
+*
+         SIDE = 'R'
+         CALL CLACPY( 'L', N, N, A, LDA, VR, LDVR )
+*
+*        Generate unitary matrix in VR
+*        (CWorkspace: need 2*N-1, prefer N+(N-1)*NB)
+*        (RWorkspace: none)
+*
+         CALL CUNGHR( N, ILO, IHI, VR, LDVR, WORK( ITAU ), WORK( IWRK ),
+     $                LWORK-IWRK+1, IERR )
+*
+*        Perform QR iteration, accumulating Schur vectors in VR
+*        (CWorkspace: need 1, prefer HSWORK (see comments) )
+*        (RWorkspace: none)
+*
+         IWRK = ITAU
+         CALL CHSEQR( 'S', 'V', N, ILO, IHI, A, LDA, W, VR, LDVR,
+     $                WORK( IWRK ), LWORK-IWRK+1, INFO )
+*
+      ELSE
+*
+*        Compute eigenvalues only
+*        If condition numbers desired, compute Schur form
+*
+         IF( WNTSNN ) THEN
+            JOB = 'E'
+         ELSE
+            JOB = 'S'
+         END IF
+*
+*        (CWorkspace: need 1, prefer HSWORK (see comments) )
+*        (RWorkspace: none)
+*
+         IWRK = ITAU
+         CALL CHSEQR( JOB, 'N', N, ILO, IHI, A, LDA, W, VR, LDVR,
+     $                WORK( IWRK ), LWORK-IWRK+1, INFO )
+      END IF
+*
+*     If INFO > 0 from CHSEQR, then quit
+*
+      IF( INFO.GT.0 )
+     $   GO TO 50
+*
+      IF( WANTVL .OR. WANTVR ) THEN
+*
+*        Compute left and/or right eigenvectors
+*        (CWorkspace: need 2*N)
+*        (RWorkspace: need N)
+*
+         CALL CTREVC( SIDE, 'B', SELECT, N, A, LDA, VL, LDVL, VR, LDVR,
+     $                N, NOUT, WORK( IWRK ), RWORK, IERR )
+      END IF
+*
+*     Compute condition numbers if desired
+*     (CWorkspace: need N*N+2*N unless SENSE = 'E')
+*     (RWorkspace: need 2*N unless SENSE = 'E')
+*
+      IF( .NOT.WNTSNN ) THEN
+         CALL CTRSNA( SENSE, 'A', SELECT, N, A, LDA, VL, LDVL, VR, LDVR,
+     $                RCONDE, RCONDV, N, NOUT, WORK( IWRK ), N, RWORK,
+     $                ICOND )
+      END IF
+*
+      IF( WANTVL ) THEN
+*
+*        Undo balancing of left eigenvectors
+*
+         CALL CGEBAK( BALANC, 'L', N, ILO, IHI, SCALE, N, VL, LDVL,
+     $                IERR )
+*
+*        Normalize left eigenvectors and make largest component real
+*
+         DO 20 I = 1, N
+            SCL = ONE / SCNRM2( N, VL( 1, I ), 1 )
+            CALL CSSCAL( N, SCL, VL( 1, I ), 1 )
+            DO 10 K = 1, N
+               RWORK( K ) = REAL( VL( K, I ) )**2 +
+     $                      AIMAG( VL( K, I ) )**2
+   10       CONTINUE
+            K = ISAMAX( N, RWORK, 1 )
+            TMP = CONJG( VL( K, I ) ) / SQRT( RWORK( K ) )
+            CALL CSCAL( N, TMP, VL( 1, I ), 1 )
+            VL( K, I ) = CMPLX( REAL( VL( K, I ) ), ZERO )
+   20    CONTINUE
+      END IF
+*
+      IF( WANTVR ) THEN
+*
+*        Undo balancing of right eigenvectors
+*
+         CALL CGEBAK( BALANC, 'R', N, ILO, IHI, SCALE, N, VR, LDVR,
+     $                IERR )
+*
+*        Normalize right eigenvectors and make largest component real
+*
+         DO 40 I = 1, N
+            SCL = ONE / SCNRM2( N, VR( 1, I ), 1 )
+            CALL CSSCAL( N, SCL, VR( 1, I ), 1 )
+            DO 30 K = 1, N
+               RWORK( K ) = REAL( VR( K, I ) )**2 +
+     $                      AIMAG( VR( K, I ) )**2
+   30       CONTINUE
+            K = ISAMAX( N, RWORK, 1 )
+            TMP = CONJG( VR( K, I ) ) / SQRT( RWORK( K ) )
+            CALL CSCAL( N, TMP, VR( 1, I ), 1 )
+            VR( K, I ) = CMPLX( REAL( VR( K, I ) ), ZERO )
+   40    CONTINUE
+      END IF
+*
+*     Undo scaling if necessary
+*
+   50 CONTINUE
+      IF( SCALEA ) THEN
+         CALL CLASCL( 'G', 0, 0, CSCALE, ANRM, N-INFO, 1, W( INFO+1 ),
+     $                MAX( N-INFO, 1 ), IERR )
+         IF( INFO.EQ.0 ) THEN
+            IF( ( WNTSNV .OR. WNTSNB ) .AND. ICOND.EQ.0 )
+     $         CALL SLASCL( 'G', 0, 0, CSCALE, ANRM, N, 1, RCONDV, N,
+     $                      IERR )
+         ELSE
+            CALL CLASCL( 'G', 0, 0, CSCALE, ANRM, ILO-1, 1, W, N, IERR )
+         END IF
+      END IF
+*
+      WORK( 1 ) = MAXWRK
+      RETURN
+*
+*     End of CGEEVX
+*
+      END
diff --git a/costa/native/external/lapack/cgegs.f b/costa/native/external/lapack/cgegs.f
new file mode 100644
index 000000000..f41a49b6b
--- /dev/null
+++ b/costa/native/external/lapack/cgegs.f
@@ -0,0 +1,442 @@
+      SUBROUTINE CGEGS( JOBVSL, JOBVSR, N, A, LDA, B, LDB, ALPHA, BETA,
+     $                  VSL, LDVSL, VSR, LDVSR, WORK, LWORK, RWORK,
+     $                  INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBVSL, JOBVSR
+      INTEGER            INFO, LDA, LDB, LDVSL, LDVSR, LWORK, N
+*     ..
+*     .. Array Arguments ..
+      REAL               RWORK( * )
+      COMPLEX            A( LDA, * ), ALPHA( * ), B( LDB, * ),
+     $                   BETA( * ), VSL( LDVSL, * ), VSR( LDVSR, * ),
+     $                   WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  This routine is deprecated and has been replaced by routine CGGES.
+*
+*  CGEGS computes for a pair of N-by-N complex nonsymmetric matrices A,
+*  B:  the generalized eigenvalues (alpha, beta), the complex Schur
+*  form (A, B), and optionally left and/or right Schur vectors
+*  (VSL and VSR).
+*
+*  (If only the generalized eigenvalues are needed, use the driver CGEGV
+*  instead.)
+*
+*  A generalized eigenvalue for a pair of matrices (A,B) is, roughly
+*  speaking, a scalar w or a ratio  alpha/beta = w, such that  A - w*B
+*  is singular.  It is usually represented as the pair (alpha,beta),
+*  as there is a reasonable interpretation for beta=0, and even for
+*  both being zero.  A good beginning reference is the book, "Matrix
+*  Computations", by G. Golub & C. van Loan (Johns Hopkins U. Press)
+*
+*  The (generalized) Schur form of a pair of matrices is the result of
+*  multiplying both matrices on the left by one unitary matrix and
+*  both on the right by another unitary matrix, these two unitary
+*  matrices being chosen so as to bring the pair of matrices into
+*  upper triangular form with the diagonal elements of B being
+*  non-negative real numbers (this is also called complex Schur form.)
+*
+*  The left and right Schur vectors are the columns of VSL and VSR,
+*  respectively, where VSL and VSR are the unitary matrices
+*  which reduce A and B to Schur form:
+*
+*  Schur form of (A,B) = ( (VSL)**H A (VSR), (VSL)**H B (VSR) )
+*
+*  Arguments
+*  =========
+*
+*  JOBVSL   (input) CHARACTER*1
+*          = 'N':  do not compute the left Schur vectors;
+*          = 'V':  compute the left Schur vectors.
+*
+*  JOBVSR   (input) CHARACTER*1
+*          = 'N':  do not compute the right Schur vectors;
+*          = 'V':  compute the right Schur vectors.
+*
+*  N       (input) INTEGER
+*          The order of the matrices A, B, VSL, and VSR.  N >= 0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA, N)
+*          On entry, the first of the pair of matrices whose generalized
+*          eigenvalues and (optionally) Schur vectors are to be
+*          computed.
+*          On exit, the generalized Schur form of A.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of A.  LDA >= max(1,N).
+*
+*  B       (input/output) COMPLEX array, dimension (LDB, N)
+*          On entry, the second of the pair of matrices whose
+*          generalized eigenvalues and (optionally) Schur vectors are
+*          to be computed.
+*          On exit, the generalized Schur form of B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of B.  LDB >= max(1,N).
+*
+*  ALPHA   (output) COMPLEX array, dimension (N)
+*  BETA    (output) COMPLEX array, dimension (N)
+*          On exit,  ALPHA(j)/BETA(j), j=1,...,N, will be the
+*          generalized eigenvalues.  ALPHA(j), j=1,...,N  and  BETA(j),
+*          j=1,...,N  are the diagonals of the complex Schur form (A,B)
+*          output by CGEGS.  The  BETA(j) will be non-negative real.
+*
+*          Note: the quotients ALPHA(j)/BETA(j) may easily over- or
+*          underflow, and BETA(j) may even be zero.  Thus, the user
+*          should avoid naively computing the ratio alpha/beta.
+*          However, ALPHA will be always less than and usually
+*          comparable with norm(A) in magnitude, and BETA always less
+*          than and usually comparable with norm(B).
+*
+*  VSL     (output) COMPLEX array, dimension (LDVSL,N)
+*          If JOBVSL = 'V', VSL will contain the left Schur vectors.
+*          (See "Purpose", above.)
+*          Not referenced if JOBVSL = 'N'.
+*
+*  LDVSL   (input) INTEGER
+*          The leading dimension of the matrix VSL. LDVSL >= 1, and
+*          if JOBVSL = 'V', LDVSL >= N.
+*
+*  VSR     (output) COMPLEX array, dimension (LDVSR,N)
+*          If JOBVSR = 'V', VSR will contain the right Schur vectors.
+*          (See "Purpose", above.)
+*          Not referenced if JOBVSR = 'N'.
+*
+*  LDVSR   (input) INTEGER
+*          The leading dimension of the matrix VSR. LDVSR >= 1, and
+*          if JOBVSR = 'V', LDVSR >= N.
+*
+*  WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.  LWORK >= max(1,2*N).
+*          For good performance, LWORK must generally be larger.
+*          To compute the optimal value of LWORK, call ILAENV to get
+*          blocksizes (for CGEQRF, CUNMQR, and CUNGQR.)  Then compute:
+*          NB  -- MAX of the blocksizes for CGEQRF, CUNMQR, and CUNGQR;
+*          the optimal LWORK is N*(NB+1).
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  RWORK   (workspace) REAL array, dimension (3*N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          =1,...,N:
+*                The QZ iteration failed.  (A,B) are not in Schur
+*                form, but ALPHA(j) and BETA(j) should be correct for
+*                j=INFO+1,...,N.
+*          > N:  errors that usually indicate LAPACK problems:
+*                =N+1: error return from CGGBAL
+*                =N+2: error return from CGEQRF
+*                =N+3: error return from CUNMQR
+*                =N+4: error return from CUNGQR
+*                =N+5: error return from CGGHRD
+*                =N+6: error return from CHGEQZ (other than failed
+*                                               iteration)
+*                =N+7: error return from CGGBAK (computing VSL)
+*                =N+8: error return from CGGBAK (computing VSR)
+*                =N+9: error return from CLASCL (various places)
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E0, ONE = 1.0E0 )
+      COMPLEX            CZERO, CONE
+      PARAMETER          ( CZERO = ( 0.0E0, 0.0E0 ),
+     $                   CONE = ( 1.0E0, 0.0E0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            ILASCL, ILBSCL, ILVSL, ILVSR, LQUERY
+      INTEGER            ICOLS, IHI, IINFO, IJOBVL, IJOBVR, ILEFT,
+     $                   ILO, IRIGHT, IROWS, IRWORK, ITAU, IWORK,
+     $                   LOPT, LWKMIN, LWKOPT, NB, NB1, NB2, NB3
+      REAL               ANRM, ANRMTO, BIGNUM, BNRM, BNRMTO, EPS,
+     $                   SAFMIN, SMLNUM
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CGEQRF, CGGBAK, CGGBAL, CGGHRD, CHGEQZ, CLACPY,
+     $                   CLASCL, CLASET, CUNGQR, CUNMQR, XERBLA
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      REAL               CLANGE, SLAMCH
+      EXTERNAL           ILAENV, LSAME, CLANGE, SLAMCH
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          INT, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode the input arguments
+*
+      IF( LSAME( JOBVSL, 'N' ) ) THEN
+         IJOBVL = 1
+         ILVSL = .FALSE.
+      ELSE IF( LSAME( JOBVSL, 'V' ) ) THEN
+         IJOBVL = 2
+         ILVSL = .TRUE.
+      ELSE
+         IJOBVL = -1
+         ILVSL = .FALSE.
+      END IF
+*
+      IF( LSAME( JOBVSR, 'N' ) ) THEN
+         IJOBVR = 1
+         ILVSR = .FALSE.
+      ELSE IF( LSAME( JOBVSR, 'V' ) ) THEN
+         IJOBVR = 2
+         ILVSR = .TRUE.
+      ELSE
+         IJOBVR = -1
+         ILVSR = .FALSE.
+      END IF
+*
+*     Test the input arguments
+*
+      LWKMIN = MAX( 2*N, 1 )
+      LWKOPT = LWKMIN
+      WORK( 1 ) = LWKOPT
+      LQUERY = ( LWORK.EQ.-1 )
+      INFO = 0
+      IF( IJOBVL.LE.0 ) THEN
+         INFO = -1
+      ELSE IF( IJOBVR.LE.0 ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      ELSE IF( LDVSL.LT.1 .OR. ( ILVSL .AND. LDVSL.LT.N ) ) THEN
+         INFO = -11
+      ELSE IF( LDVSR.LT.1 .OR. ( ILVSR .AND. LDVSR.LT.N ) ) THEN
+         INFO = -13
+      ELSE IF( LWORK.LT.LWKMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -15
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         NB1 = ILAENV( 1, 'CGEQRF', ' ', N, N, -1, -1 )
+         NB2 = ILAENV( 1, 'CUNMQR', ' ', N, N, N, -1 )
+         NB3 = ILAENV( 1, 'CUNGQR', ' ', N, N, N, -1 )
+         NB = MAX( NB1, NB2, NB3 )
+         LOPT = N*(NB+1)
+         WORK( 1 ) = LOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CGEGS ', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Get machine constants
+*
+      EPS = SLAMCH( 'E' )*SLAMCH( 'B' )
+      SAFMIN = SLAMCH( 'S' )
+      SMLNUM = N*SAFMIN / EPS
+      BIGNUM = ONE / SMLNUM
+*
+*     Scale A if max element outside range [SMLNUM,BIGNUM]
+*
+      ANRM = CLANGE( 'M', N, N, A, LDA, RWORK )
+      ILASCL = .FALSE.
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
+         ANRMTO = SMLNUM
+         ILASCL = .TRUE.
+      ELSE IF( ANRM.GT.BIGNUM ) THEN
+         ANRMTO = BIGNUM
+         ILASCL = .TRUE.
+      END IF
+*
+      IF( ILASCL ) THEN
+         CALL CLASCL( 'G', -1, -1, ANRM, ANRMTO, N, N, A, LDA, IINFO )
+         IF( IINFO.NE.0 ) THEN
+            INFO = N + 9
+            RETURN
+         END IF
+      END IF
+*
+*     Scale B if max element outside range [SMLNUM,BIGNUM]
+*
+      BNRM = CLANGE( 'M', N, N, B, LDB, RWORK )
+      ILBSCL = .FALSE.
+      IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN
+         BNRMTO = SMLNUM
+         ILBSCL = .TRUE.
+      ELSE IF( BNRM.GT.BIGNUM ) THEN
+         BNRMTO = BIGNUM
+         ILBSCL = .TRUE.
+      END IF
+*
+      IF( ILBSCL ) THEN
+         CALL CLASCL( 'G', -1, -1, BNRM, BNRMTO, N, N, B, LDB, IINFO )
+         IF( IINFO.NE.0 ) THEN
+            INFO = N + 9
+            RETURN
+         END IF
+      END IF
+*
+*     Permute the matrix to make it more nearly triangular
+*
+      ILEFT = 1
+      IRIGHT = N + 1
+      IRWORK = IRIGHT + N
+      IWORK = 1
+      CALL CGGBAL( 'P', N, A, LDA, B, LDB, ILO, IHI, RWORK( ILEFT ),
+     $             RWORK( IRIGHT ), RWORK( IRWORK ), IINFO )
+      IF( IINFO.NE.0 ) THEN
+         INFO = N + 1
+         GO TO 10
+      END IF
+*
+*     Reduce B to triangular form, and initialize VSL and/or VSR
+*
+      IROWS = IHI + 1 - ILO
+      ICOLS = N + 1 - ILO
+      ITAU = IWORK
+      IWORK = ITAU + IROWS
+      CALL CGEQRF( IROWS, ICOLS, B( ILO, ILO ), LDB, WORK( ITAU ),
+     $             WORK( IWORK ), LWORK+1-IWORK, IINFO )
+      IF( IINFO.GE.0 )
+     $   LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 )
+      IF( IINFO.NE.0 ) THEN
+         INFO = N + 2
+         GO TO 10
+      END IF
+*
+      CALL CUNMQR( 'L', 'C', IROWS, ICOLS, IROWS, B( ILO, ILO ), LDB,
+     $             WORK( ITAU ), A( ILO, ILO ), LDA, WORK( IWORK ),
+     $             LWORK+1-IWORK, IINFO )
+      IF( IINFO.GE.0 )
+     $   LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 )
+      IF( IINFO.NE.0 ) THEN
+         INFO = N + 3
+         GO TO 10
+      END IF
+*
+      IF( ILVSL ) THEN
+         CALL CLASET( 'Full', N, N, CZERO, CONE, VSL, LDVSL )
+         CALL CLACPY( 'L', IROWS-1, IROWS-1, B( ILO+1, ILO ), LDB,
+     $                VSL( ILO+1, ILO ), LDVSL )
+         CALL CUNGQR( IROWS, IROWS, IROWS, VSL( ILO, ILO ), LDVSL,
+     $                WORK( ITAU ), WORK( IWORK ), LWORK+1-IWORK,
+     $                IINFO )
+         IF( IINFO.GE.0 )
+     $      LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 )
+         IF( IINFO.NE.0 ) THEN
+            INFO = N + 4
+            GO TO 10
+         END IF
+      END IF
+*
+      IF( ILVSR )
+     $   CALL CLASET( 'Full', N, N, CZERO, CONE, VSR, LDVSR )
+*
+*     Reduce to generalized Hessenberg form
+*
+      CALL CGGHRD( JOBVSL, JOBVSR, N, ILO, IHI, A, LDA, B, LDB, VSL,
+     $             LDVSL, VSR, LDVSR, IINFO )
+      IF( IINFO.NE.0 ) THEN
+         INFO = N + 5
+         GO TO 10
+      END IF
+*
+*     Perform QZ algorithm, computing Schur vectors if desired
+*
+      IWORK = ITAU
+      CALL CHGEQZ( 'S', JOBVSL, JOBVSR, N, ILO, IHI, A, LDA, B, LDB,
+     $             ALPHA, BETA, VSL, LDVSL, VSR, LDVSR, WORK( IWORK ),
+     $             LWORK+1-IWORK, RWORK( IRWORK ), IINFO )
+      IF( IINFO.GE.0 )
+     $   LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 )
+      IF( IINFO.NE.0 ) THEN
+         IF( IINFO.GT.0 .AND. IINFO.LE.N ) THEN
+            INFO = IINFO
+         ELSE IF( IINFO.GT.N .AND. IINFO.LE.2*N ) THEN
+            INFO = IINFO - N
+         ELSE
+            INFO = N + 6
+         END IF
+         GO TO 10
+      END IF
+*
+*     Apply permutation to VSL and VSR
+*
+      IF( ILVSL ) THEN
+         CALL CGGBAK( 'P', 'L', N, ILO, IHI, RWORK( ILEFT ),
+     $                RWORK( IRIGHT ), N, VSL, LDVSL, IINFO )
+         IF( IINFO.NE.0 ) THEN
+            INFO = N + 7
+            GO TO 10
+         END IF
+      END IF
+      IF( ILVSR ) THEN
+         CALL CGGBAK( 'P', 'R', N, ILO, IHI, RWORK( ILEFT ),
+     $                RWORK( IRIGHT ), N, VSR, LDVSR, IINFO )
+         IF( IINFO.NE.0 ) THEN
+            INFO = N + 8
+            GO TO 10
+         END IF
+      END IF
+*
+*     Undo scaling
+*
+      IF( ILASCL ) THEN
+         CALL CLASCL( 'U', -1, -1, ANRMTO, ANRM, N, N, A, LDA, IINFO )
+         IF( IINFO.NE.0 ) THEN
+            INFO = N + 9
+            RETURN
+         END IF
+         CALL CLASCL( 'G', -1, -1, ANRMTO, ANRM, N, 1, ALPHA, N, IINFO )
+         IF( IINFO.NE.0 ) THEN
+            INFO = N + 9
+            RETURN
+         END IF
+      END IF
+*
+      IF( ILBSCL ) THEN
+         CALL CLASCL( 'U', -1, -1, BNRMTO, BNRM, N, N, B, LDB, IINFO )
+         IF( IINFO.NE.0 ) THEN
+            INFO = N + 9
+            RETURN
+         END IF
+         CALL CLASCL( 'G', -1, -1, BNRMTO, BNRM, N, 1, BETA, N, IINFO )
+         IF( IINFO.NE.0 ) THEN
+            INFO = N + 9
+            RETURN
+         END IF
+      END IF
+*
+   10 CONTINUE
+      WORK( 1 ) = LWKOPT
+*
+      RETURN
+*
+*     End of CGEGS
+*
+      END
diff --git a/costa/native/external/lapack/cgegv.f b/costa/native/external/lapack/cgegv.f
new file mode 100644
index 000000000..ca4525437
--- /dev/null
+++ b/costa/native/external/lapack/cgegv.f
@@ -0,0 +1,591 @@
+      SUBROUTINE CGEGV( JOBVL, JOBVR, N, A, LDA, B, LDB, ALPHA, BETA,
+     $                  VL, LDVL, VR, LDVR, WORK, LWORK, RWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBVL, JOBVR
+      INTEGER            INFO, LDA, LDB, LDVL, LDVR, LWORK, N
+*     ..
+*     .. Array Arguments ..
+      REAL               RWORK( * )
+      COMPLEX            A( LDA, * ), ALPHA( * ), B( LDB, * ),
+     $                   BETA( * ), VL( LDVL, * ), VR( LDVR, * ),
+     $                   WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  This routine is deprecated and has been replaced by routine CGGEV.
+*
+*  CGEGV computes for a pair of N-by-N complex nonsymmetric matrices A
+*  and B, the generalized eigenvalues (alpha, beta), and optionally,
+*  the left and/or right generalized eigenvectors (VL and VR).
+*
+*  A generalized eigenvalue for a pair of matrices (A,B) is, roughly
+*  speaking, a scalar w or a ratio  alpha/beta = w, such that  A - w*B
+*  is singular.  It is usually represented as the pair (alpha,beta),
+*  as there is a reasonable interpretation for beta=0, and even for
+*  both being zero.  A good beginning reference is the book, "Matrix
+*  Computations", by G. Golub & C. van Loan (Johns Hopkins U. Press)
+*
+*  A right generalized eigenvector corresponding to a generalized
+*  eigenvalue  w  for a pair of matrices (A,B) is a vector  r  such
+*  that  (A - w B) r = 0 .  A left generalized eigenvector is a vector
+*  l such that l**H * (A - w B) = 0, where l**H is the
+*  conjugate-transpose of l.
+*
+*  Note: this routine performs "full balancing" on A and B -- see
+*  "Further Details", below.
+*
+*  Arguments
+*  =========
+*
+*  JOBVL   (input) CHARACTER*1
+*          = 'N':  do not compute the left generalized eigenvectors;
+*          = 'V':  compute the left generalized eigenvectors.
+*
+*  JOBVR   (input) CHARACTER*1
+*          = 'N':  do not compute the right generalized eigenvectors;
+*          = 'V':  compute the right generalized eigenvectors.
+*
+*  N       (input) INTEGER
+*          The order of the matrices A, B, VL, and VR.  N >= 0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA, N)
+*          On entry, the first of the pair of matrices whose
+*          generalized eigenvalues and (optionally) generalized
+*          eigenvectors are to be computed.
+*          On exit, the contents will have been destroyed.  (For a
+*          description of the contents of A on exit, see "Further
+*          Details", below.)
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of A.  LDA >= max(1,N).
+*
+*  B       (input/output) COMPLEX array, dimension (LDB, N)
+*          On entry, the second of the pair of matrices whose
+*          generalized eigenvalues and (optionally) generalized
+*          eigenvectors are to be computed.
+*          On exit, the contents will have been destroyed.  (For a
+*          description of the contents of B on exit, see "Further
+*          Details", below.)
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of B.  LDB >= max(1,N).
+*
+*  ALPHA   (output) COMPLEX array, dimension (N)
+*  BETA    (output) COMPLEX array, dimension (N)
+*          On exit, ALPHA(j)/BETA(j), j=1,...,N, will be the
+*          generalized eigenvalues.
+*
+*          Note: the quotients ALPHA(j)/BETA(j) may easily over- or
+*          underflow, and BETA(j) may even be zero.  Thus, the user
+*          should avoid naively computing the ratio alpha/beta.
+*          However, ALPHA will be always less than and usually
+*          comparable with norm(A) in magnitude, and BETA always less
+*          than and usually comparable with norm(B).
+*
+*  VL      (output) COMPLEX array, dimension (LDVL,N)
+*          If JOBVL = 'V', the left generalized eigenvectors.  (See
+*          "Purpose", above.)
+*          Each eigenvector will be scaled so the largest component
+*          will have abs(real part) + abs(imag. part) = 1, *except*
+*          that for eigenvalues with alpha=beta=0, a zero vector will
+*          be returned as the corresponding eigenvector.
+*          Not referenced if JOBVL = 'N'.
+*
+*  LDVL    (input) INTEGER
+*          The leading dimension of the matrix VL. LDVL >= 1, and
+*          if JOBVL = 'V', LDVL >= N.
+*
+*  VR      (output) COMPLEX array, dimension (LDVR,N)
+*          If JOBVR = 'V', the right generalized eigenvectors.  (See
+*          "Purpose", above.)
+*          Each eigenvector will be scaled so the largest component
+*          will have abs(real part) + abs(imag. part) = 1, *except*
+*          that for eigenvalues with alpha=beta=0, a zero vector will
+*          be returned as the corresponding eigenvector.
+*          Not referenced if JOBVR = 'N'.
+*
+*  LDVR    (input) INTEGER
+*          The leading dimension of the matrix VR. LDVR >= 1, and
+*          if JOBVR = 'V', LDVR >= N.
+*
+*  WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.  LWORK >= max(1,2*N).
+*          For good performance, LWORK must generally be larger.
+*          To compute the optimal value of LWORK, call ILAENV to get
+*          blocksizes (for CGEQRF, CUNMQR, and CUNGQR.)  Then compute:
+*          NB  -- MAX of the blocksizes for CGEQRF, CUNMQR, and CUNGQR;
+*          The optimal LWORK is  MAX( 2*N, N*(NB+1) ).
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  RWORK   (workspace/output) REAL array, dimension (8*N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          =1,...,N:
+*                The QZ iteration failed.  No eigenvectors have been
+*                calculated, but ALPHA(j) and BETA(j) should be
+*                correct for j=INFO+1,...,N.
+*          > N:  errors that usually indicate LAPACK problems:
+*                =N+1: error return from CGGBAL
+*                =N+2: error return from CGEQRF
+*                =N+3: error return from CUNMQR
+*                =N+4: error return from CUNGQR
+*                =N+5: error return from CGGHRD
+*                =N+6: error return from CHGEQZ (other than failed
+*                                               iteration)
+*                =N+7: error return from CTGEVC
+*                =N+8: error return from CGGBAK (computing VL)
+*                =N+9: error return from CGGBAK (computing VR)
+*                =N+10: error return from CLASCL (various calls)
+*
+*  Further Details
+*  ===============
+*
+*  Balancing
+*  ---------
+*
+*  This driver calls CGGBAL to both permute and scale rows and columns
+*  of A and B.  The permutations PL and PR are chosen so that PL*A*PR
+*  and PL*B*R will be upper triangular except for the diagonal blocks
+*  A(i:j,i:j) and B(i:j,i:j), with i and j as close together as
+*  possible.  The diagonal scaling matrices DL and DR are chosen so
+*  that the pair  DL*PL*A*PR*DR, DL*PL*B*PR*DR have elements close to
+*  one (except for the elements that start out zero.)
+*
+*  After the eigenvalues and eigenvectors of the balanced matrices
+*  have been computed, CGGBAK transforms the eigenvectors back to what
+*  they would have been (in perfect arithmetic) if they had not been
+*  balanced.
+*
+*  Contents of A and B on Exit
+*  -------- -- - --- - -- ----
+*
+*  If any eigenvectors are computed (either JOBVL='V' or JOBVR='V' or
+*  both), then on exit the arrays A and B will contain the complex Schur
+*  form[*] of the "balanced" versions of A and B.  If no eigenvectors
+*  are computed, then only the diagonal blocks will be correct.
+*
+*  [*] In other words, upper triangular form.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E0, ONE = 1.0E0 )
+      COMPLEX            CZERO, CONE
+      PARAMETER          ( CZERO = ( 0.0E0, 0.0E0 ),
+     $                   CONE = ( 1.0E0, 0.0E0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            ILIMIT, ILV, ILVL, ILVR, LQUERY
+      CHARACTER          CHTEMP
+      INTEGER            ICOLS, IHI, IINFO, IJOBVL, IJOBVR, ILEFT, ILO,
+     $                   IN, IRIGHT, IROWS, IRWORK, ITAU, IWORK, JC, JR,
+     $                   LOPT, LWKMIN, LWKOPT, NB, NB1, NB2, NB3
+      REAL               ABSAI, ABSAR, ABSB, ANRM, ANRM1, ANRM2, BNRM,
+     $                   BNRM1, BNRM2, EPS, SAFMAX, SAFMIN, SALFAI,
+     $                   SALFAR, SBETA, SCALE, TEMP
+      COMPLEX            X
+*     ..
+*     .. Local Arrays ..
+      LOGICAL            LDUMMA( 1 )
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CGEQRF, CGGBAK, CGGBAL, CGGHRD, CHGEQZ, CLACPY,
+     $                   CLASCL, CLASET, CTGEVC, CUNGQR, CUNMQR, XERBLA
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      REAL               CLANGE, SLAMCH
+      EXTERNAL           ILAENV, LSAME, CLANGE, SLAMCH
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, AIMAG, CMPLX, INT, MAX, REAL
+*     ..
+*     .. Statement Functions ..
+      REAL               ABS1
+*     ..
+*     .. Statement Function definitions ..
+      ABS1( X ) = ABS( REAL( X ) ) + ABS( AIMAG( X ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode the input arguments
+*
+      IF( LSAME( JOBVL, 'N' ) ) THEN
+         IJOBVL = 1
+         ILVL = .FALSE.
+      ELSE IF( LSAME( JOBVL, 'V' ) ) THEN
+         IJOBVL = 2
+         ILVL = .TRUE.
+      ELSE
+         IJOBVL = -1
+         ILVL = .FALSE.
+      END IF
+*
+      IF( LSAME( JOBVR, 'N' ) ) THEN
+         IJOBVR = 1
+         ILVR = .FALSE.
+      ELSE IF( LSAME( JOBVR, 'V' ) ) THEN
+         IJOBVR = 2
+         ILVR = .TRUE.
+      ELSE
+         IJOBVR = -1
+         ILVR = .FALSE.
+      END IF
+      ILV = ILVL .OR. ILVR
+*
+*     Test the input arguments
+*
+      LWKMIN = MAX( 2*N, 1 )
+      LWKOPT = LWKMIN
+      WORK( 1 ) = LWKOPT
+      LQUERY = ( LWORK.EQ.-1 )
+      INFO = 0
+      IF( IJOBVL.LE.0 ) THEN
+         INFO = -1
+      ELSE IF( IJOBVR.LE.0 ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      ELSE IF( LDVL.LT.1 .OR. ( ILVL .AND. LDVL.LT.N ) ) THEN
+         INFO = -11
+      ELSE IF( LDVR.LT.1 .OR. ( ILVR .AND. LDVR.LT.N ) ) THEN
+         INFO = -13
+      ELSE IF( LWORK.LT.LWKMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -15
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         NB1 = ILAENV( 1, 'CGEQRF', ' ', N, N, -1, -1 )
+         NB2 = ILAENV( 1, 'CUNMQR', ' ', N, N, N, -1 )
+         NB3 = ILAENV( 1, 'CUNGQR', ' ', N, N, N, -1 )
+         NB = MAX( NB1, NB2, NB3 )
+         LOPT = MAX( 2*N, N*(NB+1) )
+         WORK( 1 ) = LOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CGEGV ', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Get machine constants
+*
+      EPS = SLAMCH( 'E' )*SLAMCH( 'B' )
+      SAFMIN = SLAMCH( 'S' )
+      SAFMIN = SAFMIN + SAFMIN
+      SAFMAX = ONE / SAFMIN
+*
+*     Scale A
+*
+      ANRM = CLANGE( 'M', N, N, A, LDA, RWORK )
+      ANRM1 = ANRM
+      ANRM2 = ONE
+      IF( ANRM.LT.ONE ) THEN
+         IF( SAFMAX*ANRM.LT.ONE ) THEN
+            ANRM1 = SAFMIN
+            ANRM2 = SAFMAX*ANRM
+         END IF
+      END IF
+*
+      IF( ANRM.GT.ZERO ) THEN
+         CALL CLASCL( 'G', -1, -1, ANRM, ONE, N, N, A, LDA, IINFO )
+         IF( IINFO.NE.0 ) THEN
+            INFO = N + 10
+            RETURN
+         END IF
+      END IF
+*
+*     Scale B
+*
+      BNRM = CLANGE( 'M', N, N, B, LDB, RWORK )
+      BNRM1 = BNRM
+      BNRM2 = ONE
+      IF( BNRM.LT.ONE ) THEN
+         IF( SAFMAX*BNRM.LT.ONE ) THEN
+            BNRM1 = SAFMIN
+            BNRM2 = SAFMAX*BNRM
+         END IF
+      END IF
+*
+      IF( BNRM.GT.ZERO ) THEN
+         CALL CLASCL( 'G', -1, -1, BNRM, ONE, N, N, B, LDB, IINFO )
+         IF( IINFO.NE.0 ) THEN
+            INFO = N + 10
+            RETURN
+         END IF
+      END IF
+*
+*     Permute the matrix to make it more nearly triangular
+*     Also "balance" the matrix.
+*
+      ILEFT = 1
+      IRIGHT = N + 1
+      IRWORK = IRIGHT + N
+      CALL CGGBAL( 'P', N, A, LDA, B, LDB, ILO, IHI, RWORK( ILEFT ),
+     $             RWORK( IRIGHT ), RWORK( IRWORK ), IINFO )
+      IF( IINFO.NE.0 ) THEN
+         INFO = N + 1
+         GO TO 80
+      END IF
+*
+*     Reduce B to triangular form, and initialize VL and/or VR
+*
+      IROWS = IHI + 1 - ILO
+      IF( ILV ) THEN
+         ICOLS = N + 1 - ILO
+      ELSE
+         ICOLS = IROWS
+      END IF
+      ITAU = 1
+      IWORK = ITAU + IROWS
+      CALL CGEQRF( IROWS, ICOLS, B( ILO, ILO ), LDB, WORK( ITAU ),
+     $             WORK( IWORK ), LWORK+1-IWORK, IINFO )
+      IF( IINFO.GE.0 )
+     $   LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 )
+      IF( IINFO.NE.0 ) THEN
+         INFO = N + 2
+         GO TO 80
+      END IF
+*
+      CALL CUNMQR( 'L', 'C', IROWS, ICOLS, IROWS, B( ILO, ILO ), LDB,
+     $             WORK( ITAU ), A( ILO, ILO ), LDA, WORK( IWORK ),
+     $             LWORK+1-IWORK, IINFO )
+      IF( IINFO.GE.0 )
+     $   LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 )
+      IF( IINFO.NE.0 ) THEN
+         INFO = N + 3
+         GO TO 80
+      END IF
+*
+      IF( ILVL ) THEN
+         CALL CLASET( 'Full', N, N, CZERO, CONE, VL, LDVL )
+         CALL CLACPY( 'L', IROWS-1, IROWS-1, B( ILO+1, ILO ), LDB,
+     $                VL( ILO+1, ILO ), LDVL )
+         CALL CUNGQR( IROWS, IROWS, IROWS, VL( ILO, ILO ), LDVL,
+     $                WORK( ITAU ), WORK( IWORK ), LWORK+1-IWORK,
+     $                IINFO )
+         IF( IINFO.GE.0 )
+     $      LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 )
+         IF( IINFO.NE.0 ) THEN
+            INFO = N + 4
+            GO TO 80
+         END IF
+      END IF
+*
+      IF( ILVR )
+     $   CALL CLASET( 'Full', N, N, CZERO, CONE, VR, LDVR )
+*
+*     Reduce to generalized Hessenberg form
+*
+      IF( ILV ) THEN
+*
+*        Eigenvectors requested -- work on whole matrix.
+*
+         CALL CGGHRD( JOBVL, JOBVR, N, ILO, IHI, A, LDA, B, LDB, VL,
+     $                LDVL, VR, LDVR, IINFO )
+      ELSE
+         CALL CGGHRD( 'N', 'N', IROWS, 1, IROWS, A( ILO, ILO ), LDA,
+     $                B( ILO, ILO ), LDB, VL, LDVL, VR, LDVR, IINFO )
+      END IF
+      IF( IINFO.NE.0 ) THEN
+         INFO = N + 5
+         GO TO 80
+      END IF
+*
+*     Perform QZ algorithm
+*
+      IWORK = ITAU
+      IF( ILV ) THEN
+         CHTEMP = 'S'
+      ELSE
+         CHTEMP = 'E'
+      END IF
+      CALL CHGEQZ( CHTEMP, JOBVL, JOBVR, N, ILO, IHI, A, LDA, B, LDB,
+     $             ALPHA, BETA, VL, LDVL, VR, LDVR, WORK( IWORK ),
+     $             LWORK+1-IWORK, RWORK( IRWORK ), IINFO )
+      IF( IINFO.GE.0 )
+     $   LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 )
+      IF( IINFO.NE.0 ) THEN
+         IF( IINFO.GT.0 .AND. IINFO.LE.N ) THEN
+            INFO = IINFO
+         ELSE IF( IINFO.GT.N .AND. IINFO.LE.2*N ) THEN
+            INFO = IINFO - N
+         ELSE
+            INFO = N + 6
+         END IF
+         GO TO 80
+      END IF
+*
+      IF( ILV ) THEN
+*
+*        Compute Eigenvectors
+*
+         IF( ILVL ) THEN
+            IF( ILVR ) THEN
+               CHTEMP = 'B'
+            ELSE
+               CHTEMP = 'L'
+            END IF
+         ELSE
+            CHTEMP = 'R'
+         END IF
+*
+         CALL CTGEVC( CHTEMP, 'B', LDUMMA, N, A, LDA, B, LDB, VL, LDVL,
+     $                VR, LDVR, N, IN, WORK( IWORK ), RWORK( IRWORK ),
+     $                IINFO )
+         IF( IINFO.NE.0 ) THEN
+            INFO = N + 7
+            GO TO 80
+         END IF
+*
+*        Undo balancing on VL and VR, rescale
+*
+         IF( ILVL ) THEN
+            CALL CGGBAK( 'P', 'L', N, ILO, IHI, RWORK( ILEFT ),
+     $                   RWORK( IRIGHT ), N, VL, LDVL, IINFO )
+            IF( IINFO.NE.0 ) THEN
+               INFO = N + 8
+               GO TO 80
+            END IF
+            DO 30 JC = 1, N
+               TEMP = ZERO
+               DO 10 JR = 1, N
+                  TEMP = MAX( TEMP, ABS1( VL( JR, JC ) ) )
+   10          CONTINUE
+               IF( TEMP.LT.SAFMIN )
+     $            GO TO 30
+               TEMP = ONE / TEMP
+               DO 20 JR = 1, N
+                  VL( JR, JC ) = VL( JR, JC )*TEMP
+   20          CONTINUE
+   30       CONTINUE
+         END IF
+         IF( ILVR ) THEN
+            CALL CGGBAK( 'P', 'R', N, ILO, IHI, RWORK( ILEFT ),
+     $                   RWORK( IRIGHT ), N, VR, LDVR, IINFO )
+            IF( IINFO.NE.0 ) THEN
+               INFO = N + 9
+               GO TO 80
+            END IF
+            DO 60 JC = 1, N
+               TEMP = ZERO
+               DO 40 JR = 1, N
+                  TEMP = MAX( TEMP, ABS1( VR( JR, JC ) ) )
+   40          CONTINUE
+               IF( TEMP.LT.SAFMIN )
+     $            GO TO 60
+               TEMP = ONE / TEMP
+               DO 50 JR = 1, N
+                  VR( JR, JC ) = VR( JR, JC )*TEMP
+   50          CONTINUE
+   60       CONTINUE
+         END IF
+*
+*        End of eigenvector calculation
+*
+      END IF
+*
+*     Undo scaling in alpha, beta
+*
+*     Note: this does not give the alpha and beta for the unscaled
+*     problem.
+*
+*     Un-scaling is limited to avoid underflow in alpha and beta
+*     if they are significant.
+*
+      DO 70 JC = 1, N
+         ABSAR = ABS( REAL( ALPHA( JC ) ) )
+         ABSAI = ABS( AIMAG( ALPHA( JC ) ) )
+         ABSB = ABS( REAL( BETA( JC ) ) )
+         SALFAR = ANRM*REAL( ALPHA( JC ) )
+         SALFAI = ANRM*AIMAG( ALPHA( JC ) )
+         SBETA = BNRM*REAL( BETA( JC ) )
+         ILIMIT = .FALSE.
+         SCALE = ONE
+*
+*        Check for significant underflow in imaginary part of ALPHA
+*
+         IF( ABS( SALFAI ).LT.SAFMIN .AND. ABSAI.GE.
+     $       MAX( SAFMIN, EPS*ABSAR, EPS*ABSB ) ) THEN
+            ILIMIT = .TRUE.
+            SCALE = ( SAFMIN / ANRM1 ) / MAX( SAFMIN, ANRM2*ABSAI )
+         END IF
+*
+*        Check for significant underflow in real part of ALPHA
+*
+         IF( ABS( SALFAR ).LT.SAFMIN .AND. ABSAR.GE.
+     $       MAX( SAFMIN, EPS*ABSAI, EPS*ABSB ) ) THEN
+            ILIMIT = .TRUE.
+            SCALE = MAX( SCALE, ( SAFMIN / ANRM1 ) /
+     $              MAX( SAFMIN, ANRM2*ABSAR ) )
+         END IF
+*
+*        Check for significant underflow in BETA
+*
+         IF( ABS( SBETA ).LT.SAFMIN .AND. ABSB.GE.
+     $       MAX( SAFMIN, EPS*ABSAR, EPS*ABSAI ) ) THEN
+            ILIMIT = .TRUE.
+            SCALE = MAX( SCALE, ( SAFMIN / BNRM1 ) /
+     $              MAX( SAFMIN, BNRM2*ABSB ) )
+         END IF
+*
+*        Check for possible overflow when limiting scaling
+*
+         IF( ILIMIT ) THEN
+            TEMP = ( SCALE*SAFMIN )*MAX( ABS( SALFAR ), ABS( SALFAI ),
+     $             ABS( SBETA ) )
+            IF( TEMP.GT.ONE )
+     $         SCALE = SCALE / TEMP
+            IF( SCALE.LT.ONE )
+     $         ILIMIT = .FALSE.
+         END IF
+*
+*        Recompute un-scaled ALPHA, BETA if necessary.
+*
+         IF( ILIMIT ) THEN
+            SALFAR = ( SCALE*REAL( ALPHA( JC ) ) )*ANRM
+            SALFAI = ( SCALE*AIMAG( ALPHA( JC ) ) )*ANRM
+            SBETA = ( SCALE*BETA( JC ) )*BNRM
+         END IF
+         ALPHA( JC ) = CMPLX( SALFAR, SALFAI )
+         BETA( JC ) = SBETA
+   70 CONTINUE
+*
+   80 CONTINUE
+      WORK( 1 ) = LWKOPT
+*
+      RETURN
+*
+*     End of CGEGV
+*
+      END
diff --git a/costa/native/external/lapack/cgehd2.f b/costa/native/external/lapack/cgehd2.f
new file mode 100644
index 000000000..ab206ff6c
--- /dev/null
+++ b/costa/native/external/lapack/cgehd2.f
@@ -0,0 +1,149 @@
+      SUBROUTINE CGEHD2( N, ILO, IHI, A, LDA, TAU, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      INTEGER            IHI, ILO, INFO, LDA, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CGEHD2 reduces a complex general matrix A to upper Hessenberg form H
+*  by a unitary similarity transformation:  Q' * A * Q = H .
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  ILO     (input) INTEGER
+*  IHI     (input) INTEGER
+*          It is assumed that A is already upper triangular in rows
+*          and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally
+*          set by a previous call to CGEBAL; otherwise they should be
+*          set to 1 and N respectively. See Further Details.
+*          1 <= ILO <= IHI <= max(1,N).
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,N)
+*          On entry, the n by n general matrix to be reduced.
+*          On exit, the upper triangle and the first subdiagonal of A
+*          are overwritten with the upper Hessenberg matrix H, and the
+*          elements below the first subdiagonal, with the array TAU,
+*          represent the unitary matrix Q as a product of elementary
+*          reflectors. See Further Details.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  TAU     (output) COMPLEX array, dimension (N-1)
+*          The scalar factors of the elementary reflectors (see Further
+*          Details).
+*
+*  WORK    (workspace) COMPLEX array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*
+*  Further Details
+*  ===============
+*
+*  The matrix Q is represented as a product of (ihi-ilo) elementary
+*  reflectors
+*
+*     Q = H(ilo) H(ilo+1) . . . H(ihi-1).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - tau * v * v'
+*
+*  where tau is a complex scalar, and v is a complex vector with
+*  v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on
+*  exit in A(i+2:ihi,i), and tau in TAU(i).
+*
+*  The contents of A are illustrated by the following example, with
+*  n = 7, ilo = 2 and ihi = 6:
+*
+*  on entry,                        on exit,
+*
+*  ( a   a   a   a   a   a   a )    (  a   a   h   h   h   h   a )
+*  (     a   a   a   a   a   a )    (      a   h   h   h   h   a )
+*  (     a   a   a   a   a   a )    (      h   h   h   h   h   h )
+*  (     a   a   a   a   a   a )    (      v2  h   h   h   h   h )
+*  (     a   a   a   a   a   a )    (      v2  v3  h   h   h   h )
+*  (     a   a   a   a   a   a )    (      v2  v3  v4  h   h   h )
+*  (                         a )    (                          a )
+*
+*  where a denotes an element of the original matrix A, h denotes a
+*  modified element of the upper Hessenberg matrix H, and vi denotes an
+*  element of the vector defining H(i).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX            ONE
+      PARAMETER          ( ONE = ( 1.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I
+      COMPLEX            ALPHA
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CLARF, CLARFG, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          CONJG, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters
+*
+      INFO = 0
+      IF( N.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, N ) ) THEN
+         INFO = -2
+      ELSE IF( IHI.LT.MIN( ILO, N ) .OR. IHI.GT.N ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CGEHD2', -INFO )
+         RETURN
+      END IF
+*
+      DO 10 I = ILO, IHI - 1
+*
+*        Compute elementary reflector H(i) to annihilate A(i+2:ihi,i)
+*
+         ALPHA = A( I+1, I )
+         CALL CLARFG( IHI-I, ALPHA, A( MIN( I+2, N ), I ), 1, TAU( I ) )
+         A( I+1, I ) = ONE
+*
+*        Apply H(i) to A(1:ihi,i+1:ihi) from the right
+*
+         CALL CLARF( 'Right', IHI, IHI-I, A( I+1, I ), 1, TAU( I ),
+     $               A( 1, I+1 ), LDA, WORK )
+*
+*        Apply H(i)' to A(i+1:ihi,i+1:n) from the left
+*
+         CALL CLARF( 'Left', IHI-I, N-I, A( I+1, I ), 1,
+     $               CONJG( TAU( I ) ), A( I+1, I+1 ), LDA, WORK )
+*
+         A( I+1, I ) = ALPHA
+   10 CONTINUE
+*
+      RETURN
+*
+*     End of CGEHD2
+*
+      END
diff --git a/costa/native/external/lapack/cgehrd.f b/costa/native/external/lapack/cgehrd.f
new file mode 100644
index 000000000..de63da494
--- /dev/null
+++ b/costa/native/external/lapack/cgehrd.f
@@ -0,0 +1,254 @@
+      SUBROUTINE CGEHRD( N, ILO, IHI, A, LDA, TAU, WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            IHI, ILO, INFO, LDA, LWORK, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CGEHRD reduces a complex general matrix A to upper Hessenberg form H
+*  by a unitary similarity transformation:  Q' * A * Q = H .
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  ILO     (input) INTEGER
+*  IHI     (input) INTEGER
+*          It is assumed that A is already upper triangular in rows
+*          and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally
+*          set by a previous call to CGEBAL; otherwise they should be
+*          set to 1 and N respectively. See Further Details.
+*          1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,N)
+*          On entry, the N-by-N general matrix to be reduced.
+*          On exit, the upper triangle and the first subdiagonal of A
+*          are overwritten with the upper Hessenberg matrix H, and the
+*          elements below the first subdiagonal, with the array TAU,
+*          represent the unitary matrix Q as a product of elementary
+*          reflectors. See Further Details.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  TAU     (output) COMPLEX array, dimension (N-1)
+*          The scalar factors of the elementary reflectors (see Further
+*          Details). Elements 1:ILO-1 and IHI:N-1 of TAU are set to
+*          zero.
+*
+*  WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The length of the array WORK.  LWORK >= max(1,N).
+*          For optimum performance LWORK >= N*NB, where NB is the
+*          optimal blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*
+*  Further Details
+*  ===============
+*
+*  The matrix Q is represented as a product of (ihi-ilo) elementary
+*  reflectors
+*
+*     Q = H(ilo) H(ilo+1) . . . H(ihi-1).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - tau * v * v'
+*
+*  where tau is a complex scalar, and v is a complex vector with
+*  v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on
+*  exit in A(i+2:ihi,i), and tau in TAU(i).
+*
+*  The contents of A are illustrated by the following example, with
+*  n = 7, ilo = 2 and ihi = 6:
+*
+*  on entry,                        on exit,
+*
+*  ( a   a   a   a   a   a   a )    (  a   a   h   h   h   h   a )
+*  (     a   a   a   a   a   a )    (      a   h   h   h   h   a )
+*  (     a   a   a   a   a   a )    (      h   h   h   h   h   h )
+*  (     a   a   a   a   a   a )    (      v2  h   h   h   h   h )
+*  (     a   a   a   a   a   a )    (      v2  v3  h   h   h   h )
+*  (     a   a   a   a   a   a )    (      v2  v3  v4  h   h   h )
+*  (                         a )    (                          a )
+*
+*  where a denotes an element of the original matrix A, h denotes a
+*  modified element of the upper Hessenberg matrix H, and vi denotes an
+*  element of the vector defining H(i).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            NBMAX, LDT
+      PARAMETER          ( NBMAX = 64, LDT = NBMAX+1 )
+      COMPLEX            ZERO, ONE
+      PARAMETER          ( ZERO = ( 0.0E+0, 0.0E+0 ),
+     $                   ONE = ( 1.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            I, IB, IINFO, IWS, LDWORK, LWKOPT, NB, NBMIN,
+     $                   NH, NX
+      COMPLEX            EI
+*     ..
+*     .. Local Arrays ..
+      COMPLEX            T( LDT, NBMAX )
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CGEHD2, CGEMM, CLAHRD, CLARFB, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters
+*
+      INFO = 0
+      NB = MIN( NBMAX, ILAENV( 1, 'CGEHRD', ' ', N, ILO, IHI, -1 ) )
+      LWKOPT = N*NB
+      WORK( 1 ) = LWKOPT
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( N.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, N ) ) THEN
+         INFO = -2
+      ELSE IF( IHI.LT.MIN( ILO, N ) .OR. IHI.GT.N ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN
+         INFO = -8
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CGEHRD', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Set elements 1:ILO-1 and IHI:N-1 of TAU to zero
+*
+      DO 10 I = 1, ILO - 1
+         TAU( I ) = ZERO
+   10 CONTINUE
+      DO 20 I = MAX( 1, IHI ), N - 1
+         TAU( I ) = ZERO
+   20 CONTINUE
+*
+*     Quick return if possible
+*
+      NH = IHI - ILO + 1
+      IF( NH.LE.1 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      NBMIN = 2
+      IWS = 1
+      IF( NB.GT.1 .AND. NB.LT.NH ) THEN
+*
+*        Determine when to cross over from blocked to unblocked code
+*        (last block is always handled by unblocked code).
+*
+         NX = MAX( NB, ILAENV( 3, 'CGEHRD', ' ', N, ILO, IHI, -1 ) )
+         IF( NX.LT.NH ) THEN
+*
+*           Determine if workspace is large enough for blocked code.
+*
+            IWS = N*NB
+            IF( LWORK.LT.IWS ) THEN
+*
+*              Not enough workspace to use optimal NB:  determine the
+*              minimum value of NB, and reduce NB or force use of
+*              unblocked code.
+*
+               NBMIN = MAX( 2, ILAENV( 2, 'CGEHRD', ' ', N, ILO, IHI,
+     $                 -1 ) )
+               IF( LWORK.GE.N*NBMIN ) THEN
+                  NB = LWORK / N
+               ELSE
+                  NB = 1
+               END IF
+            END IF
+         END IF
+      END IF
+      LDWORK = N
+*
+      IF( NB.LT.NBMIN .OR. NB.GE.NH ) THEN
+*
+*        Use unblocked code below
+*
+         I = ILO
+*
+      ELSE
+*
+*        Use blocked code
+*
+         DO 30 I = ILO, IHI - 1 - NX, NB
+            IB = MIN( NB, IHI-I )
+*
+*           Reduce columns i:i+ib-1 to Hessenberg form, returning the
+*           matrices V and T of the block reflector H = I - V*T*V'
+*           which performs the reduction, and also the matrix Y = A*V*T
+*
+            CALL CLAHRD( IHI, I, IB, A( 1, I ), LDA, TAU( I ), T, LDT,
+     $                   WORK, LDWORK )
+*
+*           Apply the block reflector H to A(1:ihi,i+ib:ihi) from the
+*           right, computing  A := A - Y * V'. V(i+ib,ib-1) must be set
+*           to 1.
+*
+            EI = A( I+IB, I+IB-1 )
+            A( I+IB, I+IB-1 ) = ONE
+            CALL CGEMM( 'No transpose', 'Conjugate transpose', IHI,
+     $                  IHI-I-IB+1, IB, -ONE, WORK, LDWORK,
+     $                  A( I+IB, I ), LDA, ONE, A( 1, I+IB ), LDA )
+            A( I+IB, I+IB-1 ) = EI
+*
+*           Apply the block reflector H to A(i+1:ihi,i+ib:n) from the
+*           left
+*
+            CALL CLARFB( 'Left', 'Conjugate transpose', 'Forward',
+     $                   'Columnwise', IHI-I, N-I-IB+1, IB, A( I+1, I ),
+     $                   LDA, T, LDT, A( I+1, I+IB ), LDA, WORK,
+     $                   LDWORK )
+   30    CONTINUE
+      END IF
+*
+*     Use unblocked code to reduce the rest of the matrix
+*
+      CALL CGEHD2( N, I, IHI, A, LDA, TAU, WORK, IINFO )
+      WORK( 1 ) = IWS
+*
+      RETURN
+*
+*     End of CGEHRD
+*
+      END
diff --git a/costa/native/external/lapack/cgelq2.f b/costa/native/external/lapack/cgelq2.f
new file mode 100644
index 000000000..449ed20fe
--- /dev/null
+++ b/costa/native/external/lapack/cgelq2.f
@@ -0,0 +1,124 @@
+      SUBROUTINE CGELQ2( M, N, A, LDA, TAU, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, M, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CGELQ2 computes an LQ factorization of a complex m by n matrix A:
+*  A = L * Q.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,N)
+*          On entry, the m by n matrix A.
+*          On exit, the elements on and below the diagonal of the array
+*          contain the m by min(m,n) lower trapezoidal matrix L (L is
+*          lower triangular if m <= n); the elements above the diagonal,
+*          with the array TAU, represent the unitary matrix Q as a
+*          product of elementary reflectors (see Further Details).
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  TAU     (output) COMPLEX array, dimension (min(M,N))
+*          The scalar factors of the elementary reflectors (see Further
+*          Details).
+*
+*  WORK    (workspace) COMPLEX array, dimension (M)
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*
+*  Further Details
+*  ===============
+*
+*  The matrix Q is represented as a product of elementary reflectors
+*
+*     Q = H(k)' . . . H(2)' H(1)', where k = min(m,n).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - tau * v * v'
+*
+*  where tau is a complex scalar, and v is a complex vector with
+*  v(1:i-1) = 0 and v(i) = 1; conjg(v(i+1:n)) is stored on exit in
+*  A(i,i+1:n), and tau in TAU(i).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX            ONE
+      PARAMETER          ( ONE = ( 1.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, K
+      COMPLEX            ALPHA
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CLACGV, CLARF, CLARFG, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CGELQ2', -INFO )
+         RETURN
+      END IF
+*
+      K = MIN( M, N )
+*
+      DO 10 I = 1, K
+*
+*        Generate elementary reflector H(i) to annihilate A(i,i+1:n)
+*
+         CALL CLACGV( N-I+1, A( I, I ), LDA )
+         ALPHA = A( I, I )
+         CALL CLARFG( N-I+1, ALPHA, A( I, MIN( I+1, N ) ), LDA,
+     $                TAU( I ) )
+         IF( I.LT.M ) THEN
+*
+*           Apply H(i) to A(i+1:m,i:n) from the right
+*
+            A( I, I ) = ONE
+            CALL CLARF( 'Right', M-I, N-I+1, A( I, I ), LDA, TAU( I ),
+     $                  A( I+1, I ), LDA, WORK )
+         END IF
+         A( I, I ) = ALPHA
+         CALL CLACGV( N-I+1, A( I, I ), LDA )
+   10 CONTINUE
+      RETURN
+*
+*     End of CGELQ2
+*
+      END
diff --git a/costa/native/external/lapack/cgelqf.f b/costa/native/external/lapack/cgelqf.f
new file mode 100644
index 000000000..b5484c718
--- /dev/null
+++ b/costa/native/external/lapack/cgelqf.f
@@ -0,0 +1,196 @@
+      SUBROUTINE CGELQF( M, N, A, LDA, TAU, WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, LWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CGELQF computes an LQ factorization of a complex M-by-N matrix A:
+*  A = L * Q.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,N)
+*          On entry, the M-by-N matrix A.
+*          On exit, the elements on and below the diagonal of the array
+*          contain the m-by-min(m,n) lower trapezoidal matrix L (L is
+*          lower triangular if m <= n); the elements above the diagonal,
+*          with the array TAU, represent the unitary matrix Q as a
+*          product of elementary reflectors (see Further Details).
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  TAU     (output) COMPLEX array, dimension (min(M,N))
+*          The scalar factors of the elementary reflectors (see Further
+*          Details).
+*
+*  WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.  LWORK >= max(1,M).
+*          For optimum performance LWORK >= M*NB, where NB is the
+*          optimal blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Further Details
+*  ===============
+*
+*  The matrix Q is represented as a product of elementary reflectors
+*
+*     Q = H(k)' . . . H(2)' H(1)', where k = min(m,n).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - tau * v * v'
+*
+*  where tau is a complex scalar, and v is a complex vector with
+*  v(1:i-1) = 0 and v(i) = 1; conjg(v(i+1:n)) is stored on exit in
+*  A(i,i+1:n), and tau in TAU(i).
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            I, IB, IINFO, IWS, K, LDWORK, LWKOPT, NB,
+     $                   NBMIN, NX
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CGELQ2, CLARFB, CLARFT, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      NB = ILAENV( 1, 'CGELQF', ' ', M, N, -1, -1 )
+      LWKOPT = M*NB
+      WORK( 1 ) = LWKOPT
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -4
+      ELSE IF( LWORK.LT.MAX( 1, M ) .AND. .NOT.LQUERY ) THEN
+         INFO = -7
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CGELQF', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      K = MIN( M, N )
+      IF( K.EQ.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      NBMIN = 2
+      NX = 0
+      IWS = M
+      IF( NB.GT.1 .AND. NB.LT.K ) THEN
+*
+*        Determine when to cross over from blocked to unblocked code.
+*
+         NX = MAX( 0, ILAENV( 3, 'CGELQF', ' ', M, N, -1, -1 ) )
+         IF( NX.LT.K ) THEN
+*
+*           Determine if workspace is large enough for blocked code.
+*
+            LDWORK = M
+            IWS = LDWORK*NB
+            IF( LWORK.LT.IWS ) THEN
+*
+*              Not enough workspace to use optimal NB:  reduce NB and
+*              determine the minimum value of NB.
+*
+               NB = LWORK / LDWORK
+               NBMIN = MAX( 2, ILAENV( 2, 'CGELQF', ' ', M, N, -1,
+     $                 -1 ) )
+            END IF
+         END IF
+      END IF
+*
+      IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN
+*
+*        Use blocked code initially
+*
+         DO 10 I = 1, K - NX, NB
+            IB = MIN( K-I+1, NB )
+*
+*           Compute the LQ factorization of the current block
+*           A(i:i+ib-1,i:n)
+*
+            CALL CGELQ2( IB, N-I+1, A( I, I ), LDA, TAU( I ), WORK,
+     $                   IINFO )
+            IF( I+IB.LE.M ) THEN
+*
+*              Form the triangular factor of the block reflector
+*              H = H(i) H(i+1) . . . H(i+ib-1)
+*
+               CALL CLARFT( 'Forward', 'Rowwise', N-I+1, IB, A( I, I ),
+     $                      LDA, TAU( I ), WORK, LDWORK )
+*
+*              Apply H to A(i+ib:m,i:n) from the right
+*
+               CALL CLARFB( 'Right', 'No transpose', 'Forward',
+     $                      'Rowwise', M-I-IB+1, N-I+1, IB, A( I, I ),
+     $                      LDA, WORK, LDWORK, A( I+IB, I ), LDA,
+     $                      WORK( IB+1 ), LDWORK )
+            END IF
+   10    CONTINUE
+      ELSE
+         I = 1
+      END IF
+*
+*     Use unblocked code to factor the last or only block.
+*
+      IF( I.LE.K )
+     $   CALL CGELQ2( M-I+1, N-I+1, A( I, I ), LDA, TAU( I ), WORK,
+     $                IINFO )
+*
+      WORK( 1 ) = IWS
+      RETURN
+*
+*     End of CGELQF
+*
+      END
diff --git a/costa/native/external/lapack/cgels.f b/costa/native/external/lapack/cgels.f
new file mode 100644
index 000000000..fd3de112d
--- /dev/null
+++ b/costa/native/external/lapack/cgels.f
@@ -0,0 +1,405 @@
+      SUBROUTINE CGELS( TRANS, M, N, NRHS, A, LDA, B, LDB, WORK, LWORK,
+     $                  INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          TRANS
+      INTEGER            INFO, LDA, LDB, LWORK, M, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), B( LDB, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CGELS solves overdetermined or underdetermined complex linear systems
+*  involving an M-by-N matrix A, or its conjugate-transpose, using a QR
+*  or LQ factorization of A.  It is assumed that A has full rank.
+*
+*  The following options are provided:
+*
+*  1. If TRANS = 'N' and m >= n:  find the least squares solution of
+*     an overdetermined system, i.e., solve the least squares problem
+*                  minimize || B - A*X ||.
+*
+*  2. If TRANS = 'N' and m < n:  find the minimum norm solution of
+*     an underdetermined system A * X = B.
+*
+*  3. If TRANS = 'C' and m >= n:  find the minimum norm solution of
+*     an undetermined system A**H * X = B.
+*
+*  4. If TRANS = 'C' and m < n:  find the least squares solution of
+*     an overdetermined system, i.e., solve the least squares problem
+*                  minimize || B - A**H * X ||.
+*
+*  Several right hand side vectors b and solution vectors x can be
+*  handled in a single call; they are stored as the columns of the
+*  M-by-NRHS right hand side matrix B and the N-by-NRHS solution
+*  matrix X.
+*
+*  Arguments
+*  =========
+*
+*  TRANS   (input) CHARACTER
+*          = 'N': the linear system involves A;
+*          = 'C': the linear system involves A**H.
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of
+*          columns of the matrices B and X. NRHS >= 0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,N)
+*          On entry, the M-by-N matrix A.
+*            if M >= N, A is overwritten by details of its QR
+*                       factorization as returned by CGEQRF;
+*            if M <  N, A is overwritten by details of its LQ
+*                       factorization as returned by CGELQF.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  B       (input/output) COMPLEX array, dimension (LDB,NRHS)
+*          On entry, the matrix B of right hand side vectors, stored
+*          columnwise; B is M-by-NRHS if TRANS = 'N', or N-by-NRHS
+*          if TRANS = 'C'.
+*          On exit, B is overwritten by the solution vectors, stored
+*          columnwise:
+*          if TRANS = 'N' and m >= n, rows 1 to n of B contain the least
+*          squares solution vectors; the residual sum of squares for the
+*          solution in each column is given by the sum of squares of
+*          elements N+1 to M in that column;
+*          if TRANS = 'N' and m < n, rows 1 to N of B contain the
+*          minimum norm solution vectors;
+*          if TRANS = 'C' and m >= n, rows 1 to M of B contain the
+*          minimum norm solution vectors;
+*          if TRANS = 'C' and m < n, rows 1 to M of B contain the
+*          least squares solution vectors; the residual sum of squares
+*          for the solution in each column is given by the sum of
+*          squares of elements M+1 to N in that column.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B. LDB >= MAX(1,M,N).
+*
+*  WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.
+*          LWORK >= max( 1, MN + max( MN, NRHS ) ).
+*          For optimal performance,
+*          LWORK >= max( 1, MN + max( MN, NRHS )*NB ).
+*          where MN = min(M,N) and NB is the optimum block size.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+      COMPLEX            CZERO, CONE
+      PARAMETER          ( CZERO = ( 0.0E+0, 0.0E+0 ),
+     $                   CONE = ( 1.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY, TPSD
+      INTEGER            BROW, I, IASCL, IBSCL, J, MN, NB, SCLLEN, WSIZE
+      REAL               ANRM, BIGNUM, BNRM, SMLNUM
+*     ..
+*     .. Local Arrays ..
+      REAL               RWORK( 1 )
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      REAL               CLANGE, SLAMCH
+      EXTERNAL           LSAME, ILAENV, CLANGE, SLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CGELQF, CGEQRF, CLASCL, CLASET, CTRSM, CUNMLQ,
+     $                   CUNMQR, SLABAD, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN, REAL
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments.
+*
+      INFO = 0
+      MN = MIN( M, N )
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( .NOT.( LSAME( TRANS, 'N' ) .OR. LSAME( TRANS, 'C' ) ) ) THEN
+         INFO = -1
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -6
+      ELSE IF( LDB.LT.MAX( 1, M, N ) ) THEN
+         INFO = -8
+      ELSE IF( LWORK.LT.MAX( 1, MN+MAX( MN, NRHS ) ) .AND.
+     $   .NOT.LQUERY ) THEN
+         INFO = -10
+      END IF
+*
+*     Figure out optimal block size
+*
+      IF( INFO.EQ.0 .OR. INFO.EQ.-10 ) THEN
+*
+         TPSD = .TRUE.
+         IF( LSAME( TRANS, 'N' ) )
+     $      TPSD = .FALSE.
+*
+         IF( M.GE.N ) THEN
+            NB = ILAENV( 1, 'CGEQRF', ' ', M, N, -1, -1 )
+            IF( TPSD ) THEN
+               NB = MAX( NB, ILAENV( 1, 'CUNMQR', 'LN', M, NRHS, N,
+     $              -1 ) )
+            ELSE
+               NB = MAX( NB, ILAENV( 1, 'CUNMQR', 'LC', M, NRHS, N,
+     $              -1 ) )
+            END IF
+         ELSE
+            NB = ILAENV( 1, 'CGELQF', ' ', M, N, -1, -1 )
+            IF( TPSD ) THEN
+               NB = MAX( NB, ILAENV( 1, 'CUNMLQ', 'LC', N, NRHS, M,
+     $              -1 ) )
+            ELSE
+               NB = MAX( NB, ILAENV( 1, 'CUNMLQ', 'LN', N, NRHS, M,
+     $              -1 ) )
+            END IF
+         END IF
+*
+         WSIZE = MAX( 1, MN + MAX( MN, NRHS )*NB )
+         WORK( 1 ) = REAL( WSIZE )
+*
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CGELS ', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( MIN( M, N, NRHS ).EQ.0 ) THEN
+         CALL CLASET( 'Full', MAX( M, N ), NRHS, CZERO, CZERO, B, LDB )
+         RETURN
+      END IF
+*
+*     Get machine parameters
+*
+      SMLNUM = SLAMCH( 'S' ) / SLAMCH( 'P' )
+      BIGNUM = ONE / SMLNUM
+      CALL SLABAD( SMLNUM, BIGNUM )
+*
+*     Scale A, B if max element outside range [SMLNUM,BIGNUM]
+*
+      ANRM = CLANGE( 'M', M, N, A, LDA, RWORK )
+      IASCL = 0
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
+*
+*        Scale matrix norm up to SMLNUM
+*
+         CALL CLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO )
+         IASCL = 1
+      ELSE IF( ANRM.GT.BIGNUM ) THEN
+*
+*        Scale matrix norm down to BIGNUM
+*
+         CALL CLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO )
+         IASCL = 2
+      ELSE IF( ANRM.EQ.ZERO ) THEN
+*
+*        Matrix all zero. Return zero solution.
+*
+         CALL CLASET( 'F', MAX( M, N ), NRHS, CZERO, CZERO, B, LDB )
+         GO TO 50
+      END IF
+*
+      BROW = M
+      IF( TPSD )
+     $   BROW = N
+      BNRM = CLANGE( 'M', BROW, NRHS, B, LDB, RWORK )
+      IBSCL = 0
+      IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN
+*
+*        Scale matrix norm up to SMLNUM
+*
+         CALL CLASCL( 'G', 0, 0, BNRM, SMLNUM, BROW, NRHS, B, LDB,
+     $                INFO )
+         IBSCL = 1
+      ELSE IF( BNRM.GT.BIGNUM ) THEN
+*
+*        Scale matrix norm down to BIGNUM
+*
+         CALL CLASCL( 'G', 0, 0, BNRM, BIGNUM, BROW, NRHS, B, LDB,
+     $                INFO )
+         IBSCL = 2
+      END IF
+*
+      IF( M.GE.N ) THEN
+*
+*        compute QR factorization of A
+*
+         CALL CGEQRF( M, N, A, LDA, WORK( 1 ), WORK( MN+1 ), LWORK-MN,
+     $                INFO )
+*
+*        workspace at least N, optimally N*NB
+*
+         IF( .NOT.TPSD ) THEN
+*
+*           Least-Squares Problem min || A * X - B ||
+*
+*           B(1:M,1:NRHS) := Q' * B(1:M,1:NRHS)
+*
+            CALL CUNMQR( 'Left', 'Conjugate transpose', M, NRHS, N, A,
+     $                   LDA, WORK( 1 ), B, LDB, WORK( MN+1 ), LWORK-MN,
+     $                   INFO )
+*
+*           workspace at least NRHS, optimally NRHS*NB
+*
+*           B(1:N,1:NRHS) := inv(R) * B(1:N,1:NRHS)
+*
+            CALL CTRSM( 'Left', 'Upper', 'No transpose', 'Non-unit', N,
+     $                  NRHS, CONE, A, LDA, B, LDB )
+*
+            SCLLEN = N
+*
+         ELSE
+*
+*           Overdetermined system of equations A' * X = B
+*
+*           B(1:N,1:NRHS) := inv(R') * B(1:N,1:NRHS)
+*
+            CALL CTRSM( 'Left', 'Upper', 'Conjugate transpose',
+     $                  'Non-unit', N, NRHS, CONE, A, LDA, B, LDB )
+*
+*           B(N+1:M,1:NRHS) = ZERO
+*
+            DO 20 J = 1, NRHS
+               DO 10 I = N + 1, M
+                  B( I, J ) = CZERO
+   10          CONTINUE
+   20       CONTINUE
+*
+*           B(1:M,1:NRHS) := Q(1:N,:) * B(1:N,1:NRHS)
+*
+            CALL CUNMQR( 'Left', 'No transpose', M, NRHS, N, A, LDA,
+     $                   WORK( 1 ), B, LDB, WORK( MN+1 ), LWORK-MN,
+     $                   INFO )
+*
+*           workspace at least NRHS, optimally NRHS*NB
+*
+            SCLLEN = M
+*
+         END IF
+*
+      ELSE
+*
+*        Compute LQ factorization of A
+*
+         CALL CGELQF( M, N, A, LDA, WORK( 1 ), WORK( MN+1 ), LWORK-MN,
+     $                INFO )
+*
+*        workspace at least M, optimally M*NB.
+*
+         IF( .NOT.TPSD ) THEN
+*
+*           underdetermined system of equations A * X = B
+*
+*           B(1:M,1:NRHS) := inv(L) * B(1:M,1:NRHS)
+*
+            CALL CTRSM( 'Left', 'Lower', 'No transpose', 'Non-unit', M,
+     $                  NRHS, CONE, A, LDA, B, LDB )
+*
+*           B(M+1:N,1:NRHS) = 0
+*
+            DO 40 J = 1, NRHS
+               DO 30 I = M + 1, N
+                  B( I, J ) = CZERO
+   30          CONTINUE
+   40       CONTINUE
+*
+*           B(1:N,1:NRHS) := Q(1:N,:)' * B(1:M,1:NRHS)
+*
+            CALL CUNMLQ( 'Left', 'Conjugate transpose', N, NRHS, M, A,
+     $                   LDA, WORK( 1 ), B, LDB, WORK( MN+1 ), LWORK-MN,
+     $                   INFO )
+*
+*           workspace at least NRHS, optimally NRHS*NB
+*
+            SCLLEN = N
+*
+         ELSE
+*
+*           overdetermined system min || A' * X - B ||
+*
+*           B(1:N,1:NRHS) := Q * B(1:N,1:NRHS)
+*
+            CALL CUNMLQ( 'Left', 'No transpose', N, NRHS, M, A, LDA,
+     $                   WORK( 1 ), B, LDB, WORK( MN+1 ), LWORK-MN,
+     $                   INFO )
+*
+*           workspace at least NRHS, optimally NRHS*NB
+*
+*           B(1:M,1:NRHS) := inv(L') * B(1:M,1:NRHS)
+*
+            CALL CTRSM( 'Left', 'Lower', 'Conjugate transpose',
+     $                  'Non-unit', M, NRHS, CONE, A, LDA, B, LDB )
+*
+            SCLLEN = M
+*
+         END IF
+*
+      END IF
+*
+*     Undo scaling
+*
+      IF( IASCL.EQ.1 ) THEN
+         CALL CLASCL( 'G', 0, 0, ANRM, SMLNUM, SCLLEN, NRHS, B, LDB,
+     $                INFO )
+      ELSE IF( IASCL.EQ.2 ) THEN
+         CALL CLASCL( 'G', 0, 0, ANRM, BIGNUM, SCLLEN, NRHS, B, LDB,
+     $                INFO )
+      END IF
+      IF( IBSCL.EQ.1 ) THEN
+         CALL CLASCL( 'G', 0, 0, SMLNUM, BNRM, SCLLEN, NRHS, B, LDB,
+     $                INFO )
+      ELSE IF( IBSCL.EQ.2 ) THEN
+         CALL CLASCL( 'G', 0, 0, BIGNUM, BNRM, SCLLEN, NRHS, B, LDB,
+     $                INFO )
+      END IF
+*
+   50 CONTINUE
+      WORK( 1 ) = REAL( WSIZE )
+*
+      RETURN
+*
+*     End of CGELS
+*
+      END
diff --git a/costa/native/external/lapack/cgelsd.f b/costa/native/external/lapack/cgelsd.f
new file mode 100644
index 000000000..e4adf2101
--- /dev/null
+++ b/costa/native/external/lapack/cgelsd.f
@@ -0,0 +1,547 @@
+      SUBROUTINE CGELSD( M, N, NRHS, A, LDA, B, LDB, S, RCOND, RANK,
+     $                   WORK, LWORK, RWORK, IWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, LDB, LWORK, M, N, NRHS, RANK
+      REAL               RCOND
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      REAL               RWORK( * ), S( * )
+      COMPLEX            A( LDA, * ), B( LDB, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CGELSD computes the minimum-norm solution to a real linear least
+*  squares problem:
+*      minimize 2-norm(| b - A*x |)
+*  using the singular value decomposition (SVD) of A. A is an M-by-N
+*  matrix which may be rank-deficient.
+*
+*  Several right hand side vectors b and solution vectors x can be
+*  handled in a single call; they are stored as the columns of the
+*  M-by-NRHS right hand side matrix B and the N-by-NRHS solution
+*  matrix X.
+*
+*  The problem is solved in three steps:
+*  (1) Reduce the coefficient matrix A to bidiagonal form with
+*      Householder tranformations, reducing the original problem
+*      into a "bidiagonal least squares problem" (BLS)
+*  (2) Solve the BLS using a divide and conquer approach.
+*  (3) Apply back all the Householder tranformations to solve
+*      the original least squares problem.
+*
+*  The effective rank of A is determined by treating as zero those
+*  singular values which are less than RCOND times the largest singular
+*  value.
+*
+*  The divide and conquer algorithm makes very mild assumptions about
+*  floating point arithmetic. It will work on machines with a guard
+*  digit in add/subtract, or on those binary machines without guard
+*  digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
+*  Cray-2. It could conceivably fail on hexadecimal or decimal machines
+*  without guard digits, but we know of none.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A. N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrices B and X. NRHS >= 0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,N)
+*          On entry, the M-by-N matrix A.
+*          On exit, A has been destroyed.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,M).
+*
+*  B       (input/output) COMPLEX array, dimension (LDB,NRHS)
+*          On entry, the M-by-NRHS right hand side matrix B.
+*          On exit, B is overwritten by the N-by-NRHS solution matrix X.
+*          If m >= n and RANK = n, the residual sum-of-squares for
+*          the solution in the i-th column is given by the sum of
+*          squares of elements n+1:m in that column.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,M,N).
+*
+*  S       (output) REAL array, dimension (min(M,N))
+*          The singular values of A in decreasing order.
+*          The condition number of A in the 2-norm = S(1)/S(min(m,n)).
+*
+*  RCOND   (input) REAL
+*          RCOND is used to determine the effective rank of A.
+*          Singular values S(i) <= RCOND*S(1) are treated as zero.
+*          If RCOND < 0, machine precision is used instead.
+*
+*  RANK    (output) INTEGER
+*          The effective rank of A, i.e., the number of singular values
+*          which are greater than RCOND*S(1).
+*
+*  WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK. LWORK must be at least 1.
+*          The exact minimum amount of workspace needed depends on M,
+*          N and NRHS. As long as LWORK is at least
+*              2 * N + N * NRHS
+*          if M is greater than or equal to N or
+*              2 * M + M * NRHS
+*          if M is less than N, the code will execute correctly.
+*          For good performance, LWORK should generally be larger.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*
+*  RWORK   (workspace) REAL array, dimension at least
+*             10*N + 2*N*SMLSIZ + 8*N*NLVL + 3*SMLSIZ*NRHS +
+*             (SMLSIZ+1)**2
+*          if M is greater than or equal to N or
+*             10*M + 2*M*SMLSIZ + 8*M*NLVL + 3*SMLSIZ*NRHS +
+*             (SMLSIZ+1)**2
+*          if M is less than N, the code will execute correctly.
+*          SMLSIZ is returned by ILAENV and is equal to the maximum
+*          size of the subproblems at the bottom of the computation
+*          tree (usually about 25), and
+*             NLVL = MAX( 0, INT( LOG_2( MIN( M,N )/(SMLSIZ+1) ) ) + 1 )
+*
+*  IWORK   (workspace) INTEGER array, dimension (LIWORK)
+*          LIWORK >= 3 * MINMN * NLVL + 11 * MINMN,
+*          where MINMN = MIN( M,N ).
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value.
+*          > 0:  the algorithm for computing the SVD failed to converge;
+*                if INFO = i, i off-diagonal elements of an intermediate
+*                bidiagonal form did not converge to zero.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Ming Gu and Ren-Cang Li, Computer Science Division, University of
+*       California at Berkeley, USA
+*     Osni Marques, LBNL/NERSC, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+      COMPLEX            CZERO
+      PARAMETER          ( CZERO = ( 0.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            IASCL, IBSCL, IE, IL, ITAU, ITAUP, ITAUQ,
+     $                   LDWORK, MAXMN, MAXWRK, MINMN, MINWRK, MM,
+     $                   MNTHR, NRWORK, NWORK, SMLSIZ
+      REAL               ANRM, BIGNUM, BNRM, EPS, SFMIN, SMLNUM
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CGEBRD, CGELQF, CGEQRF, CLACPY,
+     $                   CLALSD, CLASCL, CLASET, CUNMBR,
+     $                   CUNMLQ, CUNMQR, SLABAD, SLASCL,
+     $                   SLASET, XERBLA
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      REAL               CLANGE, SLAMCH
+      EXTERNAL           CLANGE, SLAMCH, ILAENV
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          CMPLX, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments.
+*
+      INFO = 0
+      MINMN = MIN( M, N )
+      MAXMN = MAX( M, N )
+      MNTHR = ILAENV( 6, 'CGELSD', ' ', M, N, NRHS, -1 )
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, MAXMN ) ) THEN
+         INFO = -7
+      END IF
+*
+      SMLSIZ = ILAENV( 9, 'CGELSD', ' ', 0, 0, 0, 0 )
+*
+*     Compute workspace.
+*     (Note: Comments in the code beginning "Workspace:" describe the
+*     minimal amount of workspace needed at that point in the code,
+*     as well as the preferred amount for good performance.
+*     NB refers to the optimal block size for the immediately
+*     following subroutine, as returned by ILAENV.)
+*
+      MINWRK = 1
+      IF( INFO.EQ.0 ) THEN
+         MAXWRK = 0
+         MM = M
+         IF( M.GE.N .AND. M.GE.MNTHR ) THEN
+*
+*           Path 1a - overdetermined, with many more rows than columns.
+*
+            MM = N
+            MAXWRK = MAX( MAXWRK, N*ILAENV( 1, 'CGEQRF', ' ', M, N, -1,
+     $               -1 ) )
+            MAXWRK = MAX( MAXWRK, NRHS*ILAENV( 1, 'CUNMQR', 'LC', M,
+     $               NRHS, N, -1 ) )
+         END IF
+         IF( M.GE.N ) THEN
+*
+*           Path 1 - overdetermined or exactly determined.
+*
+            MAXWRK = MAX( MAXWRK, 2*N+( MM+N )*
+     $               ILAENV( 1, 'CGEBRD', ' ', MM, N, -1, -1 ) )
+            MAXWRK = MAX( MAXWRK, 2*N+NRHS*
+     $               ILAENV( 1, 'CUNMBR', 'QLC', MM, NRHS, N, -1 ) )
+            MAXWRK = MAX( MAXWRK, 2*N+( N-1 )*
+     $               ILAENV( 1, 'CUNMBR', 'PLN', N, NRHS, N, -1 ) )
+            MAXWRK = MAX( MAXWRK, 2*N+N*NRHS )
+            MINWRK = MAX( 2*N+MM, 2*N+N*NRHS )
+         END IF
+         IF( N.GT.M ) THEN
+            IF( N.GE.MNTHR ) THEN
+*
+*              Path 2a - underdetermined, with many more columns
+*              than rows.
+*
+               MAXWRK = M + M*ILAENV( 1, 'CGELQF', ' ', M, N, -1, -1 )
+               MAXWRK = MAX( MAXWRK, M*M+4*M+2*M*
+     $                  ILAENV( 1, 'CGEBRD', ' ', M, M, -1, -1 ) )
+               MAXWRK = MAX( MAXWRK, M*M+4*M+NRHS*
+     $                  ILAENV( 1, 'CUNMBR', 'QLC', M, NRHS, M, -1 ) )
+               MAXWRK = MAX( MAXWRK, M*M+4*M+( M-1 )*
+     $                  ILAENV( 1, 'CUNMLQ', 'LC', N, NRHS, M, -1 ) )
+               IF( NRHS.GT.1 ) THEN
+                  MAXWRK = MAX( MAXWRK, M*M+M+M*NRHS )
+               ELSE
+                  MAXWRK = MAX( MAXWRK, M*M+2*M )
+               END IF
+               MAXWRK = MAX( MAXWRK, M*M+4*M+M*NRHS )
+            ELSE
+*
+*              Path 2 - underdetermined.
+*
+               MAXWRK = 2*M + ( N+M )*ILAENV( 1, 'CGEBRD', ' ', M, N,
+     $                  -1, -1 )
+               MAXWRK = MAX( MAXWRK, 2*M+NRHS*
+     $                  ILAENV( 1, 'CUNMBR', 'QLC', M, NRHS, M, -1 ) )
+               MAXWRK = MAX( MAXWRK, 2*M+M*
+     $                  ILAENV( 1, 'CUNMBR', 'PLN', N, NRHS, M, -1 ) )
+               MAXWRK = MAX( MAXWRK, 2*M+M*NRHS )
+            END IF
+            MINWRK = MAX( 2*M+N, 2*M+M*NRHS )
+         END IF
+         MINWRK = MIN( MINWRK, MAXWRK )
+         WORK( 1 ) = CMPLX( MAXWRK, 0 )
+         IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN
+            INFO = -12
+         END IF
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CGELSD', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         GO TO 10
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( M.EQ.0 .OR. N.EQ.0 ) THEN
+         RANK = 0
+         RETURN
+      END IF
+*
+*     Get machine parameters.
+*
+      EPS = SLAMCH( 'P' )
+      SFMIN = SLAMCH( 'S' )
+      SMLNUM = SFMIN / EPS
+      BIGNUM = ONE / SMLNUM
+      CALL SLABAD( SMLNUM, BIGNUM )
+*
+*     Scale A if max entry outside range [SMLNUM,BIGNUM].
+*
+      ANRM = CLANGE( 'M', M, N, A, LDA, RWORK )
+      IASCL = 0
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
+*
+*        Scale matrix norm up to SMLNUM
+*
+         CALL CLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO )
+         IASCL = 1
+      ELSE IF( ANRM.GT.BIGNUM ) THEN
+*
+*        Scale matrix norm down to BIGNUM.
+*
+         CALL CLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO )
+         IASCL = 2
+      ELSE IF( ANRM.EQ.ZERO ) THEN
+*
+*        Matrix all zero. Return zero solution.
+*
+         CALL CLASET( 'F', MAX( M, N ), NRHS, CZERO, CZERO, B, LDB )
+         CALL SLASET( 'F', MINMN, 1, ZERO, ZERO, S, 1 )
+         RANK = 0
+         GO TO 10
+      END IF
+*
+*     Scale B if max entry outside range [SMLNUM,BIGNUM].
+*
+      BNRM = CLANGE( 'M', M, NRHS, B, LDB, RWORK )
+      IBSCL = 0
+      IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN
+*
+*        Scale matrix norm up to SMLNUM.
+*
+         CALL CLASCL( 'G', 0, 0, BNRM, SMLNUM, M, NRHS, B, LDB, INFO )
+         IBSCL = 1
+      ELSE IF( BNRM.GT.BIGNUM ) THEN
+*
+*        Scale matrix norm down to BIGNUM.
+*
+         CALL CLASCL( 'G', 0, 0, BNRM, BIGNUM, M, NRHS, B, LDB, INFO )
+         IBSCL = 2
+      END IF
+*
+*     If M < N make sure B(M+1:N,:) = 0
+*
+      IF( M.LT.N )
+     $   CALL CLASET( 'F', N-M, NRHS, CZERO, CZERO, B( M+1, 1 ), LDB )
+*
+*     Overdetermined case.
+*
+      IF( M.GE.N ) THEN
+*
+*        Path 1 - overdetermined or exactly determined.
+*
+         MM = M
+         IF( M.GE.MNTHR ) THEN
+*
+*           Path 1a - overdetermined, with many more rows than columns
+*
+            MM = N
+            ITAU = 1
+            NWORK = ITAU + N
+*
+*           Compute A=Q*R.
+*           (RWorkspace: need N)
+*           (CWorkspace: need N, prefer N*NB)
+*
+            CALL CGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ),
+     $                   LWORK-NWORK+1, INFO )
+*
+*           Multiply B by transpose(Q).
+*           (RWorkspace: need N)
+*           (CWorkspace: need NRHS, prefer NRHS*NB)
+*
+            CALL CUNMQR( 'L', 'C', M, NRHS, N, A, LDA, WORK( ITAU ), B,
+     $                   LDB, WORK( NWORK ), LWORK-NWORK+1, INFO )
+*
+*           Zero out below R.
+*
+            IF( N.GT.1 ) THEN
+               CALL CLASET( 'L', N-1, N-1, CZERO, CZERO, A( 2, 1 ),
+     $                      LDA )
+            END IF
+         END IF
+*
+         ITAUQ = 1
+         ITAUP = ITAUQ + N
+         NWORK = ITAUP + N
+         IE = 1
+         NRWORK = IE + N
+*
+*        Bidiagonalize R in A.
+*        (RWorkspace: need N)
+*        (CWorkspace: need 2*N+MM, prefer 2*N+(MM+N)*NB)
+*
+         CALL CGEBRD( MM, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ),
+     $                WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1,
+     $                INFO )
+*
+*        Multiply B by transpose of left bidiagonalizing vectors of R.
+*        (CWorkspace: need 2*N+NRHS, prefer 2*N+NRHS*NB)
+*
+         CALL CUNMBR( 'Q', 'L', 'C', MM, NRHS, N, A, LDA, WORK( ITAUQ ),
+     $                B, LDB, WORK( NWORK ), LWORK-NWORK+1, INFO )
+*
+*        Solve the bidiagonal least squares problem.
+*
+         CALL CLALSD( 'U', SMLSIZ, N, NRHS, S, RWORK( IE ), B, LDB,
+     $                RCOND, RANK, WORK( NWORK ), RWORK( NRWORK ),
+     $                IWORK, INFO )
+         IF( INFO.NE.0 ) THEN
+            GO TO 10
+         END IF
+*
+*        Multiply B by right bidiagonalizing vectors of R.
+*
+         CALL CUNMBR( 'P', 'L', 'N', N, NRHS, N, A, LDA, WORK( ITAUP ),
+     $                B, LDB, WORK( NWORK ), LWORK-NWORK+1, INFO )
+*
+      ELSE IF( N.GE.MNTHR .AND. LWORK.GE.4*M+M*M+
+     $         MAX( M, 2*M-4, NRHS, N-3*M ) ) THEN
+*
+*        Path 2a - underdetermined, with many more columns than rows
+*        and sufficient workspace for an efficient algorithm.
+*
+         LDWORK = M
+         IF( LWORK.GE.MAX( 4*M+M*LDA+MAX( M, 2*M-4, NRHS, N-3*M ),
+     $       M*LDA+M+M*NRHS ) )LDWORK = LDA
+         ITAU = 1
+         NWORK = M + 1
+*
+*        Compute A=L*Q.
+*        (CWorkspace: need 2*M, prefer M+M*NB)
+*
+         CALL CGELQF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ),
+     $                LWORK-NWORK+1, INFO )
+         IL = NWORK
+*
+*        Copy L to WORK(IL), zeroing out above its diagonal.
+*
+         CALL CLACPY( 'L', M, M, A, LDA, WORK( IL ), LDWORK )
+         CALL CLASET( 'U', M-1, M-1, CZERO, CZERO, WORK( IL+LDWORK ),
+     $                LDWORK )
+         ITAUQ = IL + LDWORK*M
+         ITAUP = ITAUQ + M
+         NWORK = ITAUP + M
+         IE = 1
+         NRWORK = IE + M
+*
+*        Bidiagonalize L in WORK(IL).
+*        (RWorkspace: need M)
+*        (CWorkspace: need M*M+4*M, prefer M*M+4*M+2*M*NB)
+*
+         CALL CGEBRD( M, M, WORK( IL ), LDWORK, S, RWORK( IE ),
+     $                WORK( ITAUQ ), WORK( ITAUP ), WORK( NWORK ),
+     $                LWORK-NWORK+1, INFO )
+*
+*        Multiply B by transpose of left bidiagonalizing vectors of L.
+*        (CWorkspace: need M*M+4*M+NRHS, prefer M*M+4*M+NRHS*NB)
+*
+         CALL CUNMBR( 'Q', 'L', 'C', M, NRHS, M, WORK( IL ), LDWORK,
+     $                WORK( ITAUQ ), B, LDB, WORK( NWORK ),
+     $                LWORK-NWORK+1, INFO )
+*
+*        Solve the bidiagonal least squares problem.
+*
+         CALL CLALSD( 'U', SMLSIZ, M, NRHS, S, RWORK( IE ), B, LDB,
+     $                RCOND, RANK, WORK( NWORK ), RWORK( NRWORK ),
+     $                IWORK, INFO )
+         IF( INFO.NE.0 ) THEN
+            GO TO 10
+         END IF
+*
+*        Multiply B by right bidiagonalizing vectors of L.
+*
+         CALL CUNMBR( 'P', 'L', 'N', M, NRHS, M, WORK( IL ), LDWORK,
+     $                WORK( ITAUP ), B, LDB, WORK( NWORK ),
+     $                LWORK-NWORK+1, INFO )
+*
+*        Zero out below first M rows of B.
+*
+         CALL CLASET( 'F', N-M, NRHS, CZERO, CZERO, B( M+1, 1 ), LDB )
+         NWORK = ITAU + M
+*
+*        Multiply transpose(Q) by B.
+*        (CWorkspace: need NRHS, prefer NRHS*NB)
+*
+         CALL CUNMLQ( 'L', 'C', N, NRHS, M, A, LDA, WORK( ITAU ), B,
+     $                LDB, WORK( NWORK ), LWORK-NWORK+1, INFO )
+*
+      ELSE
+*
+*        Path 2 - remaining underdetermined cases.
+*
+         ITAUQ = 1
+         ITAUP = ITAUQ + M
+         NWORK = ITAUP + M
+         IE = 1
+         NRWORK = IE + M
+*
+*        Bidiagonalize A.
+*        (RWorkspace: need M)
+*        (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB)
+*
+         CALL CGEBRD( M, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ),
+     $                WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1,
+     $                INFO )
+*
+*        Multiply B by transpose of left bidiagonalizing vectors.
+*        (CWorkspace: need 2*M+NRHS, prefer 2*M+NRHS*NB)
+*
+         CALL CUNMBR( 'Q', 'L', 'C', M, NRHS, N, A, LDA, WORK( ITAUQ ),
+     $                B, LDB, WORK( NWORK ), LWORK-NWORK+1, INFO )
+*
+*        Solve the bidiagonal least squares problem.
+*
+         CALL CLALSD( 'L', SMLSIZ, M, NRHS, S, RWORK( IE ), B, LDB,
+     $                RCOND, RANK, WORK( NWORK ), RWORK( NRWORK ),
+     $                IWORK, INFO )
+         IF( INFO.NE.0 ) THEN
+            GO TO 10
+         END IF
+*
+*        Multiply B by right bidiagonalizing vectors of A.
+*
+         CALL CUNMBR( 'P', 'L', 'N', N, NRHS, M, A, LDA, WORK( ITAUP ),
+     $                B, LDB, WORK( NWORK ), LWORK-NWORK+1, INFO )
+*
+      END IF
+*
+*     Undo scaling.
+*
+      IF( IASCL.EQ.1 ) THEN
+         CALL CLASCL( 'G', 0, 0, ANRM, SMLNUM, N, NRHS, B, LDB, INFO )
+         CALL SLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN,
+     $                INFO )
+      ELSE IF( IASCL.EQ.2 ) THEN
+         CALL CLASCL( 'G', 0, 0, ANRM, BIGNUM, N, NRHS, B, LDB, INFO )
+         CALL SLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN,
+     $                INFO )
+      END IF
+      IF( IBSCL.EQ.1 ) THEN
+         CALL CLASCL( 'G', 0, 0, SMLNUM, BNRM, N, NRHS, B, LDB, INFO )
+      ELSE IF( IBSCL.EQ.2 ) THEN
+         CALL CLASCL( 'G', 0, 0, BIGNUM, BNRM, N, NRHS, B, LDB, INFO )
+      END IF
+*
+   10 CONTINUE
+      WORK( 1 ) = CMPLX( MAXWRK, 0 )
+      RETURN
+*
+*     End of CGELSD
+*
+      END
diff --git a/costa/native/external/lapack/cgelss.f b/costa/native/external/lapack/cgelss.f
new file mode 100644
index 000000000..c80bb89aa
--- /dev/null
+++ b/costa/native/external/lapack/cgelss.f
@@ -0,0 +1,639 @@
+      SUBROUTINE CGELSS( M, N, NRHS, A, LDA, B, LDB, S, RCOND, RANK,
+     $                   WORK, LWORK, RWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, LDB, LWORK, M, N, NRHS, RANK
+      REAL               RCOND
+*     ..
+*     .. Array Arguments ..
+      REAL               RWORK( * ), S( * )
+      COMPLEX            A( LDA, * ), B( LDB, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CGELSS computes the minimum norm solution to a complex linear
+*  least squares problem:
+*
+*  Minimize 2-norm(| b - A*x |).
+*
+*  using the singular value decomposition (SVD) of A. A is an M-by-N
+*  matrix which may be rank-deficient.
+*
+*  Several right hand side vectors b and solution vectors x can be
+*  handled in a single call; they are stored as the columns of the
+*  M-by-NRHS right hand side matrix B and the N-by-NRHS solution matrix
+*  X.
+*
+*  The effective rank of A is determined by treating as zero those
+*  singular values which are less than RCOND times the largest singular
+*  value.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A. N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrices B and X. NRHS >= 0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,N)
+*          On entry, the M-by-N matrix A.
+*          On exit, the first min(m,n) rows of A are overwritten with
+*          its right singular vectors, stored rowwise.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,M).
+*
+*  B       (input/output) COMPLEX array, dimension (LDB,NRHS)
+*          On entry, the M-by-NRHS right hand side matrix B.
+*          On exit, B is overwritten by the N-by-NRHS solution matrix X.
+*          If m >= n and RANK = n, the residual sum-of-squares for
+*          the solution in the i-th column is given by the sum of
+*          squares of elements n+1:m in that column.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,M,N).
+*
+*  S       (output) REAL array, dimension (min(M,N))
+*          The singular values of A in decreasing order.
+*          The condition number of A in the 2-norm = S(1)/S(min(m,n)).
+*
+*  RCOND   (input) REAL
+*          RCOND is used to determine the effective rank of A.
+*          Singular values S(i) <= RCOND*S(1) are treated as zero.
+*          If RCOND < 0, machine precision is used instead.
+*
+*  RANK    (output) INTEGER
+*          The effective rank of A, i.e., the number of singular values
+*          which are greater than RCOND*S(1).
+*
+*  WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK. LWORK >= 1, and also:
+*          LWORK >=  2*min(M,N) + max(M,N,NRHS)
+*          For good performance, LWORK should generally be larger.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  RWORK   (workspace) REAL array, dimension (5*min(M,N))
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          > 0:  the algorithm for computing the SVD failed to converge;
+*                if INFO = i, i off-diagonal elements of an intermediate
+*                bidiagonal form did not converge to zero.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+      COMPLEX            CZERO, CONE
+      PARAMETER          ( CZERO = ( 0.0E+0, 0.0E+0 ),
+     $                   CONE = ( 1.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            BL, CHUNK, I, IASCL, IBSCL, IE, IL, IRWORK,
+     $                   ITAU, ITAUP, ITAUQ, IWORK, LDWORK, MAXMN,
+     $                   MAXWRK, MINMN, MINWRK, MM, MNTHR
+      REAL               ANRM, BIGNUM, BNRM, EPS, SFMIN, SMLNUM, THR
+*     ..
+*     .. Local Arrays ..
+      COMPLEX            VDUM( 1 )
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CBDSQR, CCOPY, CGEBRD, CGELQF, CGEMM, CGEMV,
+     $                   CGEQRF, CLACPY, CLASCL, CLASET, CSRSCL, CUNGBR,
+     $                   CUNMBR, CUNMLQ, CUNMQR, SLABAD, SLASCL, SLASET,
+     $                   XERBLA
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      REAL               CLANGE, SLAMCH
+      EXTERNAL           ILAENV, CLANGE, SLAMCH
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      MINMN = MIN( M, N )
+      MAXMN = MAX( M, N )
+      MNTHR = ILAENV( 6, 'CGELSS', ' ', M, N, NRHS, -1 )
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, MAXMN ) ) THEN
+         INFO = -7
+      END IF
+*
+*     Compute workspace
+*      (Note: Comments in the code beginning "Workspace:" describe the
+*       minimal amount of workspace needed at that point in the code,
+*       as well as the preferred amount for good performance.
+*       CWorkspace refers to complex workspace, and RWorkspace refers
+*       to real workspace. NB refers to the optimal block size for the
+*       immediately following subroutine, as returned by ILAENV.)
+*
+      MINWRK = 1
+      IF( INFO.EQ.0 .AND. ( LWORK.GE.1 .OR. LQUERY ) ) THEN
+         MAXWRK = 0
+         MM = M
+         IF( M.GE.N .AND. M.GE.MNTHR ) THEN
+*
+*           Path 1a - overdetermined, with many more rows than columns
+*
+*           Space needed for CBDSQR is BDSPAC = 5*N
+*
+            MM = N
+            MAXWRK = MAX( MAXWRK, N+N*ILAENV( 1, 'CGEQRF', ' ', M, N,
+     $               -1, -1 ) )
+            MAXWRK = MAX( MAXWRK, N+NRHS*
+     $               ILAENV( 1, 'CUNMQR', 'LC', M, NRHS, N, -1 ) )
+         END IF
+         IF( M.GE.N ) THEN
+*
+*           Path 1 - overdetermined or exactly determined
+*
+*           Space needed for CBDSQR is BDSPC = 7*N+12
+*
+            MAXWRK = MAX( MAXWRK, 2*N+( MM+N )*
+     $               ILAENV( 1, 'CGEBRD', ' ', MM, N, -1, -1 ) )
+            MAXWRK = MAX( MAXWRK, 2*N+NRHS*
+     $               ILAENV( 1, 'CUNMBR', 'QLC', MM, NRHS, N, -1 ) )
+            MAXWRK = MAX( MAXWRK, 2*N+( N-1 )*
+     $               ILAENV( 1, 'CUNGBR', 'P', N, N, N, -1 ) )
+            MAXWRK = MAX( MAXWRK, N*NRHS )
+            MINWRK = 2*N + MAX( NRHS, M )
+         END IF
+         IF( N.GT.M ) THEN
+            MINWRK = 2*M + MAX( NRHS, N )
+            IF( N.GE.MNTHR ) THEN
+*
+*              Path 2a - underdetermined, with many more columns
+*              than rows
+*
+*              Space needed for CBDSQR is BDSPAC = 5*M
+*
+               MAXWRK = M + M*ILAENV( 1, 'CGELQF', ' ', M, N, -1, -1 )
+               MAXWRK = MAX( MAXWRK, 3*M+M*M+2*M*
+     $                  ILAENV( 1, 'CGEBRD', ' ', M, M, -1, -1 ) )
+               MAXWRK = MAX( MAXWRK, 3*M+M*M+NRHS*
+     $                  ILAENV( 1, 'CUNMBR', 'QLC', M, NRHS, M, -1 ) )
+               MAXWRK = MAX( MAXWRK, 3*M+M*M+( M-1 )*
+     $                  ILAENV( 1, 'CUNGBR', 'P', M, M, M, -1 ) )
+               IF( NRHS.GT.1 ) THEN
+                  MAXWRK = MAX( MAXWRK, M*M+M+M*NRHS )
+               ELSE
+                  MAXWRK = MAX( MAXWRK, M*M+2*M )
+               END IF
+               MAXWRK = MAX( MAXWRK, M+NRHS*
+     $                  ILAENV( 1, 'CUNMLQ', 'LC', N, NRHS, M, -1 ) )
+            ELSE
+*
+*              Path 2 - underdetermined
+*
+*              Space needed for CBDSQR is BDSPAC = 5*M
+*
+               MAXWRK = 2*M + ( N+M )*ILAENV( 1, 'CGEBRD', ' ', M, N,
+     $                  -1, -1 )
+               MAXWRK = MAX( MAXWRK, 2*M+NRHS*
+     $                  ILAENV( 1, 'CUNMBR', 'QLC', M, NRHS, M, -1 ) )
+               MAXWRK = MAX( MAXWRK, 2*M+M*
+     $                  ILAENV( 1, 'CUNGBR', 'P', M, N, M, -1 ) )
+               MAXWRK = MAX( MAXWRK, N*NRHS )
+            END IF
+         END IF
+         MINWRK = MAX( MINWRK, 1 )
+         MAXWRK = MAX( MINWRK, MAXWRK )
+         WORK( 1 ) = MAXWRK
+      END IF
+*
+      IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY )
+     $   INFO = -12
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CGELSS', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 ) THEN
+         RANK = 0
+         RETURN
+      END IF
+*
+*     Get machine parameters
+*
+      EPS = SLAMCH( 'P' )
+      SFMIN = SLAMCH( 'S' )
+      SMLNUM = SFMIN / EPS
+      BIGNUM = ONE / SMLNUM
+      CALL SLABAD( SMLNUM, BIGNUM )
+*
+*     Scale A if max element outside range [SMLNUM,BIGNUM]
+*
+      ANRM = CLANGE( 'M', M, N, A, LDA, RWORK )
+      IASCL = 0
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
+*
+*        Scale matrix norm up to SMLNUM
+*
+         CALL CLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO )
+         IASCL = 1
+      ELSE IF( ANRM.GT.BIGNUM ) THEN
+*
+*        Scale matrix norm down to BIGNUM
+*
+         CALL CLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO )
+         IASCL = 2
+      ELSE IF( ANRM.EQ.ZERO ) THEN
+*
+*        Matrix all zero. Return zero solution.
+*
+         CALL CLASET( 'F', MAX( M, N ), NRHS, CZERO, CZERO, B, LDB )
+         CALL SLASET( 'F', MINMN, 1, ZERO, ZERO, S, MINMN )
+         RANK = 0
+         GO TO 70
+      END IF
+*
+*     Scale B if max element outside range [SMLNUM,BIGNUM]
+*
+      BNRM = CLANGE( 'M', M, NRHS, B, LDB, RWORK )
+      IBSCL = 0
+      IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN
+*
+*        Scale matrix norm up to SMLNUM
+*
+         CALL CLASCL( 'G', 0, 0, BNRM, SMLNUM, M, NRHS, B, LDB, INFO )
+         IBSCL = 1
+      ELSE IF( BNRM.GT.BIGNUM ) THEN
+*
+*        Scale matrix norm down to BIGNUM
+*
+         CALL CLASCL( 'G', 0, 0, BNRM, BIGNUM, M, NRHS, B, LDB, INFO )
+         IBSCL = 2
+      END IF
+*
+*     Overdetermined case
+*
+      IF( M.GE.N ) THEN
+*
+*        Path 1 - overdetermined or exactly determined
+*
+         MM = M
+         IF( M.GE.MNTHR ) THEN
+*
+*           Path 1a - overdetermined, with many more rows than columns
+*
+            MM = N
+            ITAU = 1
+            IWORK = ITAU + N
+*
+*           Compute A=Q*R
+*           (CWorkspace: need 2*N, prefer N+N*NB)
+*           (RWorkspace: none)
+*
+            CALL CGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ),
+     $                   LWORK-IWORK+1, INFO )
+*
+*           Multiply B by transpose(Q)
+*           (CWorkspace: need N+NRHS, prefer N+NRHS*NB)
+*           (RWorkspace: none)
+*
+            CALL CUNMQR( 'L', 'C', M, NRHS, N, A, LDA, WORK( ITAU ), B,
+     $                   LDB, WORK( IWORK ), LWORK-IWORK+1, INFO )
+*
+*           Zero out below R
+*
+            IF( N.GT.1 )
+     $         CALL CLASET( 'L', N-1, N-1, CZERO, CZERO, A( 2, 1 ),
+     $                      LDA )
+         END IF
+*
+         IE = 1
+         ITAUQ = 1
+         ITAUP = ITAUQ + N
+         IWORK = ITAUP + N
+*
+*        Bidiagonalize R in A
+*        (CWorkspace: need 2*N+MM, prefer 2*N+(MM+N)*NB)
+*        (RWorkspace: need N)
+*
+         CALL CGEBRD( MM, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ),
+     $                WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
+     $                INFO )
+*
+*        Multiply B by transpose of left bidiagonalizing vectors of R
+*        (CWorkspace: need 2*N+NRHS, prefer 2*N+NRHS*NB)
+*        (RWorkspace: none)
+*
+         CALL CUNMBR( 'Q', 'L', 'C', MM, NRHS, N, A, LDA, WORK( ITAUQ ),
+     $                B, LDB, WORK( IWORK ), LWORK-IWORK+1, INFO )
+*
+*        Generate right bidiagonalizing vectors of R in A
+*        (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
+*        (RWorkspace: none)
+*
+         CALL CUNGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
+     $                WORK( IWORK ), LWORK-IWORK+1, INFO )
+         IRWORK = IE + N
+*
+*        Perform bidiagonal QR iteration
+*          multiply B by transpose of left singular vectors
+*          compute right singular vectors in A
+*        (CWorkspace: none)
+*        (RWorkspace: need BDSPAC)
+*
+         CALL CBDSQR( 'U', N, N, 0, NRHS, S, RWORK( IE ), A, LDA, VDUM,
+     $                1, B, LDB, RWORK( IRWORK ), INFO )
+         IF( INFO.NE.0 )
+     $      GO TO 70
+*
+*        Multiply B by reciprocals of singular values
+*
+         THR = MAX( RCOND*S( 1 ), SFMIN )
+         IF( RCOND.LT.ZERO )
+     $      THR = MAX( EPS*S( 1 ), SFMIN )
+         RANK = 0
+         DO 10 I = 1, N
+            IF( S( I ).GT.THR ) THEN
+               CALL CSRSCL( NRHS, S( I ), B( I, 1 ), LDB )
+               RANK = RANK + 1
+            ELSE
+               CALL CLASET( 'F', 1, NRHS, CZERO, CZERO, B( I, 1 ), LDB )
+            END IF
+   10    CONTINUE
+*
+*        Multiply B by right singular vectors
+*        (CWorkspace: need N, prefer N*NRHS)
+*        (RWorkspace: none)
+*
+         IF( LWORK.GE.LDB*NRHS .AND. NRHS.GT.1 ) THEN
+            CALL CGEMM( 'C', 'N', N, NRHS, N, CONE, A, LDA, B, LDB,
+     $                  CZERO, WORK, LDB )
+            CALL CLACPY( 'G', N, NRHS, WORK, LDB, B, LDB )
+         ELSE IF( NRHS.GT.1 ) THEN
+            CHUNK = LWORK / N
+            DO 20 I = 1, NRHS, CHUNK
+               BL = MIN( NRHS-I+1, CHUNK )
+               CALL CGEMM( 'C', 'N', N, BL, N, CONE, A, LDA, B( 1, I ),
+     $                     LDB, CZERO, WORK, N )
+               CALL CLACPY( 'G', N, BL, WORK, N, B( 1, I ), LDB )
+   20       CONTINUE
+         ELSE
+            CALL CGEMV( 'C', N, N, CONE, A, LDA, B, 1, CZERO, WORK, 1 )
+            CALL CCOPY( N, WORK, 1, B, 1 )
+         END IF
+*
+      ELSE IF( N.GE.MNTHR .AND. LWORK.GE.3*M+M*M+MAX( M, NRHS, N-2*M ) )
+     $          THEN
+*
+*        Underdetermined case, M much less than N
+*
+*        Path 2a - underdetermined, with many more columns than rows
+*        and sufficient workspace for an efficient algorithm
+*
+         LDWORK = M
+         IF( LWORK.GE.3*M+M*LDA+MAX( M, NRHS, N-2*M ) )
+     $      LDWORK = LDA
+         ITAU = 1
+         IWORK = M + 1
+*
+*        Compute A=L*Q
+*        (CWorkspace: need 2*M, prefer M+M*NB)
+*        (RWorkspace: none)
+*
+         CALL CGELQF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ),
+     $                LWORK-IWORK+1, INFO )
+         IL = IWORK
+*
+*        Copy L to WORK(IL), zeroing out above it
+*
+         CALL CLACPY( 'L', M, M, A, LDA, WORK( IL ), LDWORK )
+         CALL CLASET( 'U', M-1, M-1, CZERO, CZERO, WORK( IL+LDWORK ),
+     $                LDWORK )
+         IE = 1
+         ITAUQ = IL + LDWORK*M
+         ITAUP = ITAUQ + M
+         IWORK = ITAUP + M
+*
+*        Bidiagonalize L in WORK(IL)
+*        (CWorkspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
+*        (RWorkspace: need M)
+*
+         CALL CGEBRD( M, M, WORK( IL ), LDWORK, S, RWORK( IE ),
+     $                WORK( ITAUQ ), WORK( ITAUP ), WORK( IWORK ),
+     $                LWORK-IWORK+1, INFO )
+*
+*        Multiply B by transpose of left bidiagonalizing vectors of L
+*        (CWorkspace: need M*M+3*M+NRHS, prefer M*M+3*M+NRHS*NB)
+*        (RWorkspace: none)
+*
+         CALL CUNMBR( 'Q', 'L', 'C', M, NRHS, M, WORK( IL ), LDWORK,
+     $                WORK( ITAUQ ), B, LDB, WORK( IWORK ),
+     $                LWORK-IWORK+1, INFO )
+*
+*        Generate right bidiagonalizing vectors of R in WORK(IL)
+*        (CWorkspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB)
+*        (RWorkspace: none)
+*
+         CALL CUNGBR( 'P', M, M, M, WORK( IL ), LDWORK, WORK( ITAUP ),
+     $                WORK( IWORK ), LWORK-IWORK+1, INFO )
+         IRWORK = IE + M
+*
+*        Perform bidiagonal QR iteration, computing right singular
+*        vectors of L in WORK(IL) and multiplying B by transpose of
+*        left singular vectors
+*        (CWorkspace: need M*M)
+*        (RWorkspace: need BDSPAC)
+*
+         CALL CBDSQR( 'U', M, M, 0, NRHS, S, RWORK( IE ), WORK( IL ),
+     $                LDWORK, A, LDA, B, LDB, RWORK( IRWORK ), INFO )
+         IF( INFO.NE.0 )
+     $      GO TO 70
+*
+*        Multiply B by reciprocals of singular values
+*
+         THR = MAX( RCOND*S( 1 ), SFMIN )
+         IF( RCOND.LT.ZERO )
+     $      THR = MAX( EPS*S( 1 ), SFMIN )
+         RANK = 0
+         DO 30 I = 1, M
+            IF( S( I ).GT.THR ) THEN
+               CALL CSRSCL( NRHS, S( I ), B( I, 1 ), LDB )
+               RANK = RANK + 1
+            ELSE
+               CALL CLASET( 'F', 1, NRHS, CZERO, CZERO, B( I, 1 ), LDB )
+            END IF
+   30    CONTINUE
+         IWORK = IL + M*LDWORK
+*
+*        Multiply B by right singular vectors of L in WORK(IL)
+*        (CWorkspace: need M*M+2*M, prefer M*M+M+M*NRHS)
+*        (RWorkspace: none)
+*
+         IF( LWORK.GE.LDB*NRHS+IWORK-1 .AND. NRHS.GT.1 ) THEN
+            CALL CGEMM( 'C', 'N', M, NRHS, M, CONE, WORK( IL ), LDWORK,
+     $                  B, LDB, CZERO, WORK( IWORK ), LDB )
+            CALL CLACPY( 'G', M, NRHS, WORK( IWORK ), LDB, B, LDB )
+         ELSE IF( NRHS.GT.1 ) THEN
+            CHUNK = ( LWORK-IWORK+1 ) / M
+            DO 40 I = 1, NRHS, CHUNK
+               BL = MIN( NRHS-I+1, CHUNK )
+               CALL CGEMM( 'C', 'N', M, BL, M, CONE, WORK( IL ), LDWORK,
+     $                     B( 1, I ), LDB, CZERO, WORK( IWORK ), N )
+               CALL CLACPY( 'G', M, BL, WORK( IWORK ), N, B( 1, I ),
+     $                      LDB )
+   40       CONTINUE
+         ELSE
+            CALL CGEMV( 'C', M, M, CONE, WORK( IL ), LDWORK, B( 1, 1 ),
+     $                  1, CZERO, WORK( IWORK ), 1 )
+            CALL CCOPY( M, WORK( IWORK ), 1, B( 1, 1 ), 1 )
+         END IF
+*
+*        Zero out below first M rows of B
+*
+         CALL CLASET( 'F', N-M, NRHS, CZERO, CZERO, B( M+1, 1 ), LDB )
+         IWORK = ITAU + M
+*
+*        Multiply transpose(Q) by B
+*        (CWorkspace: need M+NRHS, prefer M+NHRS*NB)
+*        (RWorkspace: none)
+*
+         CALL CUNMLQ( 'L', 'C', N, NRHS, M, A, LDA, WORK( ITAU ), B,
+     $                LDB, WORK( IWORK ), LWORK-IWORK+1, INFO )
+*
+      ELSE
+*
+*        Path 2 - remaining underdetermined cases
+*
+         IE = 1
+         ITAUQ = 1
+         ITAUP = ITAUQ + M
+         IWORK = ITAUP + M
+*
+*        Bidiagonalize A
+*        (CWorkspace: need 3*M, prefer 2*M+(M+N)*NB)
+*        (RWorkspace: need N)
+*
+         CALL CGEBRD( M, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ),
+     $                WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
+     $                INFO )
+*
+*        Multiply B by transpose of left bidiagonalizing vectors
+*        (CWorkspace: need 2*M+NRHS, prefer 2*M+NRHS*NB)
+*        (RWorkspace: none)
+*
+         CALL CUNMBR( 'Q', 'L', 'C', M, NRHS, N, A, LDA, WORK( ITAUQ ),
+     $                B, LDB, WORK( IWORK ), LWORK-IWORK+1, INFO )
+*
+*        Generate right bidiagonalizing vectors in A
+*        (CWorkspace: need 3*M, prefer 2*M+M*NB)
+*        (RWorkspace: none)
+*
+         CALL CUNGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ),
+     $                WORK( IWORK ), LWORK-IWORK+1, INFO )
+         IRWORK = IE + M
+*
+*        Perform bidiagonal QR iteration,
+*           computing right singular vectors of A in A and
+*           multiplying B by transpose of left singular vectors
+*        (CWorkspace: none)
+*        (RWorkspace: need BDSPAC)
+*
+         CALL CBDSQR( 'L', M, N, 0, NRHS, S, RWORK( IE ), A, LDA, VDUM,
+     $                1, B, LDB, RWORK( IRWORK ), INFO )
+         IF( INFO.NE.0 )
+     $      GO TO 70
+*
+*        Multiply B by reciprocals of singular values
+*
+         THR = MAX( RCOND*S( 1 ), SFMIN )
+         IF( RCOND.LT.ZERO )
+     $      THR = MAX( EPS*S( 1 ), SFMIN )
+         RANK = 0
+         DO 50 I = 1, M
+            IF( S( I ).GT.THR ) THEN
+               CALL CSRSCL( NRHS, S( I ), B( I, 1 ), LDB )
+               RANK = RANK + 1
+            ELSE
+               CALL CLASET( 'F', 1, NRHS, CZERO, CZERO, B( I, 1 ), LDB )
+            END IF
+   50    CONTINUE
+*
+*        Multiply B by right singular vectors of A
+*        (CWorkspace: need N, prefer N*NRHS)
+*        (RWorkspace: none)
+*
+         IF( LWORK.GE.LDB*NRHS .AND. NRHS.GT.1 ) THEN
+            CALL CGEMM( 'C', 'N', N, NRHS, M, CONE, A, LDA, B, LDB,
+     $                  CZERO, WORK, LDB )
+            CALL CLACPY( 'G', N, NRHS, WORK, LDB, B, LDB )
+         ELSE IF( NRHS.GT.1 ) THEN
+            CHUNK = LWORK / N
+            DO 60 I = 1, NRHS, CHUNK
+               BL = MIN( NRHS-I+1, CHUNK )
+               CALL CGEMM( 'C', 'N', N, BL, M, CONE, A, LDA, B( 1, I ),
+     $                     LDB, CZERO, WORK, N )
+               CALL CLACPY( 'F', N, BL, WORK, N, B( 1, I ), LDB )
+   60       CONTINUE
+         ELSE
+            CALL CGEMV( 'C', M, N, CONE, A, LDA, B, 1, CZERO, WORK, 1 )
+            CALL CCOPY( N, WORK, 1, B, 1 )
+         END IF
+      END IF
+*
+*     Undo scaling
+*
+      IF( IASCL.EQ.1 ) THEN
+         CALL CLASCL( 'G', 0, 0, ANRM, SMLNUM, N, NRHS, B, LDB, INFO )
+         CALL SLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN,
+     $                INFO )
+      ELSE IF( IASCL.EQ.2 ) THEN
+         CALL CLASCL( 'G', 0, 0, ANRM, BIGNUM, N, NRHS, B, LDB, INFO )
+         CALL SLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN,
+     $                INFO )
+      END IF
+      IF( IBSCL.EQ.1 ) THEN
+         CALL CLASCL( 'G', 0, 0, SMLNUM, BNRM, N, NRHS, B, LDB, INFO )
+      ELSE IF( IBSCL.EQ.2 ) THEN
+         CALL CLASCL( 'G', 0, 0, BIGNUM, BNRM, N, NRHS, B, LDB, INFO )
+      END IF
+   70 CONTINUE
+      WORK( 1 ) = MAXWRK
+      RETURN
+*
+*     End of CGELSS
+*
+      END
diff --git a/costa/native/external/lapack/cgelsx.f b/costa/native/external/lapack/cgelsx.f
new file mode 100644
index 000000000..748954179
--- /dev/null
+++ b/costa/native/external/lapack/cgelsx.f
@@ -0,0 +1,358 @@
+      SUBROUTINE CGELSX( M, N, NRHS, A, LDA, B, LDB, JPVT, RCOND, RANK,
+     $                   WORK, RWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, LDB, M, N, NRHS, RANK
+      REAL               RCOND
+*     ..
+*     .. Array Arguments ..
+      INTEGER            JPVT( * )
+      REAL               RWORK( * )
+      COMPLEX            A( LDA, * ), B( LDB, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  This routine is deprecated and has been replaced by routine CGELSY.
+*
+*  CGELSX computes the minimum-norm solution to a complex linear least
+*  squares problem:
+*      minimize || A * X - B ||
+*  using a complete orthogonal factorization of A.  A is an M-by-N
+*  matrix which may be rank-deficient.
+*
+*  Several right hand side vectors b and solution vectors x can be
+*  handled in a single call; they are stored as the columns of the
+*  M-by-NRHS right hand side matrix B and the N-by-NRHS solution
+*  matrix X.
+*
+*  The routine first computes a QR factorization with column pivoting:
+*      A * P = Q * [ R11 R12 ]
+*                  [  0  R22 ]
+*  with R11 defined as the largest leading submatrix whose estimated
+*  condition number is less than 1/RCOND.  The order of R11, RANK,
+*  is the effective rank of A.
+*
+*  Then, R22 is considered to be negligible, and R12 is annihilated
+*  by unitary transformations from the right, arriving at the
+*  complete orthogonal factorization:
+*     A * P = Q * [ T11 0 ] * Z
+*                 [  0  0 ]
+*  The minimum-norm solution is then
+*     X = P * Z' [ inv(T11)*Q1'*B ]
+*                [        0       ]
+*  where Q1 consists of the first RANK columns of Q.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of
+*          columns of matrices B and X. NRHS >= 0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,N)
+*          On entry, the M-by-N matrix A.
+*          On exit, A has been overwritten by details of its
+*          complete orthogonal factorization.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  B       (input/output) COMPLEX array, dimension (LDB,NRHS)
+*          On entry, the M-by-NRHS right hand side matrix B.
+*          On exit, the N-by-NRHS solution matrix X.
+*          If m >= n and RANK = n, the residual sum-of-squares for
+*          the solution in the i-th column is given by the sum of
+*          squares of elements N+1:M in that column.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B. LDB >= max(1,M,N).
+*
+*  JPVT    (input/output) INTEGER array, dimension (N)
+*          On entry, if JPVT(i) .ne. 0, the i-th column of A is an
+*          initial column, otherwise it is a free column.  Before
+*          the QR factorization of A, all initial columns are
+*          permuted to the leading positions; only the remaining
+*          free columns are moved as a result of column pivoting
+*          during the factorization.
+*          On exit, if JPVT(i) = k, then the i-th column of A*P
+*          was the k-th column of A.
+*
+*  RCOND   (input) REAL
+*          RCOND is used to determine the effective rank of A, which
+*          is defined as the order of the largest leading triangular
+*          submatrix R11 in the QR factorization with pivoting of A,
+*          whose estimated condition number < 1/RCOND.
+*
+*  RANK    (output) INTEGER
+*          The effective rank of A, i.e., the order of the submatrix
+*          R11.  This is the same as the order of the submatrix T11
+*          in the complete orthogonal factorization of A.
+*
+*  WORK    (workspace) COMPLEX array, dimension
+*                      (min(M,N) + max( N, 2*min(M,N)+NRHS )),
+*
+*  RWORK   (workspace) REAL array, dimension (2*N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            IMAX, IMIN
+      PARAMETER          ( IMAX = 1, IMIN = 2 )
+      REAL               ZERO, ONE, DONE, NTDONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0, DONE = ZERO,
+     $                   NTDONE = ONE )
+      COMPLEX            CZERO, CONE
+      PARAMETER          ( CZERO = ( 0.0E+0, 0.0E+0 ),
+     $                   CONE = ( 1.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, IASCL, IBSCL, ISMAX, ISMIN, J, K, MN
+      REAL               ANRM, BIGNUM, BNRM, SMAX, SMAXPR, SMIN, SMINPR,
+     $                   SMLNUM
+      COMPLEX            C1, C2, S1, S2, T1, T2
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CGEQPF, CLAIC1, CLASCL, CLASET, CLATZM, CTRSM,
+     $                   CTZRQF, CUNM2R, SLABAD, XERBLA
+*     ..
+*     .. External Functions ..
+      REAL               CLANGE, SLAMCH
+      EXTERNAL           CLANGE, SLAMCH
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, CONJG, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+      MN = MIN( M, N )
+      ISMIN = MN + 1
+      ISMAX = 2*MN + 1
+*
+*     Test the input arguments.
+*
+      INFO = 0
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, M, N ) ) THEN
+         INFO = -7
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CGELSX', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( MIN( M, N, NRHS ).EQ.0 ) THEN
+         RANK = 0
+         RETURN
+      END IF
+*
+*     Get machine parameters
+*
+      SMLNUM = SLAMCH( 'S' ) / SLAMCH( 'P' )
+      BIGNUM = ONE / SMLNUM
+      CALL SLABAD( SMLNUM, BIGNUM )
+*
+*     Scale A, B if max elements outside range [SMLNUM,BIGNUM]
+*
+      ANRM = CLANGE( 'M', M, N, A, LDA, RWORK )
+      IASCL = 0
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
+*
+*        Scale matrix norm up to SMLNUM
+*
+         CALL CLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO )
+         IASCL = 1
+      ELSE IF( ANRM.GT.BIGNUM ) THEN
+*
+*        Scale matrix norm down to BIGNUM
+*
+         CALL CLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO )
+         IASCL = 2
+      ELSE IF( ANRM.EQ.ZERO ) THEN
+*
+*        Matrix all zero. Return zero solution.
+*
+         CALL CLASET( 'F', MAX( M, N ), NRHS, CZERO, CZERO, B, LDB )
+         RANK = 0
+         GO TO 100
+      END IF
+*
+      BNRM = CLANGE( 'M', M, NRHS, B, LDB, RWORK )
+      IBSCL = 0
+      IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN
+*
+*        Scale matrix norm up to SMLNUM
+*
+         CALL CLASCL( 'G', 0, 0, BNRM, SMLNUM, M, NRHS, B, LDB, INFO )
+         IBSCL = 1
+      ELSE IF( BNRM.GT.BIGNUM ) THEN
+*
+*        Scale matrix norm down to BIGNUM
+*
+         CALL CLASCL( 'G', 0, 0, BNRM, BIGNUM, M, NRHS, B, LDB, INFO )
+         IBSCL = 2
+      END IF
+*
+*     Compute QR factorization with column pivoting of A:
+*        A * P = Q * R
+*
+      CALL CGEQPF( M, N, A, LDA, JPVT, WORK( 1 ), WORK( MN+1 ), RWORK,
+     $             INFO )
+*
+*     complex workspace MN+N. Real workspace 2*N. Details of Householder
+*     rotations stored in WORK(1:MN).
+*
+*     Determine RANK using incremental condition estimation
+*
+      WORK( ISMIN ) = CONE
+      WORK( ISMAX ) = CONE
+      SMAX = ABS( A( 1, 1 ) )
+      SMIN = SMAX
+      IF( ABS( A( 1, 1 ) ).EQ.ZERO ) THEN
+         RANK = 0
+         CALL CLASET( 'F', MAX( M, N ), NRHS, CZERO, CZERO, B, LDB )
+         GO TO 100
+      ELSE
+         RANK = 1
+      END IF
+*
+   10 CONTINUE
+      IF( RANK.LT.MN ) THEN
+         I = RANK + 1
+         CALL CLAIC1( IMIN, RANK, WORK( ISMIN ), SMIN, A( 1, I ),
+     $                A( I, I ), SMINPR, S1, C1 )
+         CALL CLAIC1( IMAX, RANK, WORK( ISMAX ), SMAX, A( 1, I ),
+     $                A( I, I ), SMAXPR, S2, C2 )
+*
+         IF( SMAXPR*RCOND.LE.SMINPR ) THEN
+            DO 20 I = 1, RANK
+               WORK( ISMIN+I-1 ) = S1*WORK( ISMIN+I-1 )
+               WORK( ISMAX+I-1 ) = S2*WORK( ISMAX+I-1 )
+   20       CONTINUE
+            WORK( ISMIN+RANK ) = C1
+            WORK( ISMAX+RANK ) = C2
+            SMIN = SMINPR
+            SMAX = SMAXPR
+            RANK = RANK + 1
+            GO TO 10
+         END IF
+      END IF
+*
+*     Logically partition R = [ R11 R12 ]
+*                             [  0  R22 ]
+*     where R11 = R(1:RANK,1:RANK)
+*
+*     [R11,R12] = [ T11, 0 ] * Y
+*
+      IF( RANK.LT.N )
+     $   CALL CTZRQF( RANK, N, A, LDA, WORK( MN+1 ), INFO )
+*
+*     Details of Householder rotations stored in WORK(MN+1:2*MN)
+*
+*     B(1:M,1:NRHS) := Q' * B(1:M,1:NRHS)
+*
+      CALL CUNM2R( 'Left', 'Conjugate transpose', M, NRHS, MN, A, LDA,
+     $             WORK( 1 ), B, LDB, WORK( 2*MN+1 ), INFO )
+*
+*     workspace NRHS
+*
+*      B(1:RANK,1:NRHS) := inv(T11) * B(1:RANK,1:NRHS)
+*
+      CALL CTRSM( 'Left', 'Upper', 'No transpose', 'Non-unit', RANK,
+     $            NRHS, CONE, A, LDA, B, LDB )
+*
+      DO 40 I = RANK + 1, N
+         DO 30 J = 1, NRHS
+            B( I, J ) = CZERO
+   30    CONTINUE
+   40 CONTINUE
+*
+*     B(1:N,1:NRHS) := Y' * B(1:N,1:NRHS)
+*
+      IF( RANK.LT.N ) THEN
+         DO 50 I = 1, RANK
+            CALL CLATZM( 'Left', N-RANK+1, NRHS, A( I, RANK+1 ), LDA,
+     $                   CONJG( WORK( MN+I ) ), B( I, 1 ),
+     $                   B( RANK+1, 1 ), LDB, WORK( 2*MN+1 ) )
+   50    CONTINUE
+      END IF
+*
+*     workspace NRHS
+*
+*     B(1:N,1:NRHS) := P * B(1:N,1:NRHS)
+*
+      DO 90 J = 1, NRHS
+         DO 60 I = 1, N
+            WORK( 2*MN+I ) = NTDONE
+   60    CONTINUE
+         DO 80 I = 1, N
+            IF( WORK( 2*MN+I ).EQ.NTDONE ) THEN
+               IF( JPVT( I ).NE.I ) THEN
+                  K = I
+                  T1 = B( K, J )
+                  T2 = B( JPVT( K ), J )
+   70             CONTINUE
+                  B( JPVT( K ), J ) = T1
+                  WORK( 2*MN+K ) = DONE
+                  T1 = T2
+                  K = JPVT( K )
+                  T2 = B( JPVT( K ), J )
+                  IF( JPVT( K ).NE.I )
+     $               GO TO 70
+                  B( I, J ) = T1
+                  WORK( 2*MN+K ) = DONE
+               END IF
+            END IF
+   80    CONTINUE
+   90 CONTINUE
+*
+*     Undo scaling
+*
+      IF( IASCL.EQ.1 ) THEN
+         CALL CLASCL( 'G', 0, 0, ANRM, SMLNUM, N, NRHS, B, LDB, INFO )
+         CALL CLASCL( 'U', 0, 0, SMLNUM, ANRM, RANK, RANK, A, LDA,
+     $                INFO )
+      ELSE IF( IASCL.EQ.2 ) THEN
+         CALL CLASCL( 'G', 0, 0, ANRM, BIGNUM, N, NRHS, B, LDB, INFO )
+         CALL CLASCL( 'U', 0, 0, BIGNUM, ANRM, RANK, RANK, A, LDA,
+     $                INFO )
+      END IF
+      IF( IBSCL.EQ.1 ) THEN
+         CALL CLASCL( 'G', 0, 0, SMLNUM, BNRM, N, NRHS, B, LDB, INFO )
+      ELSE IF( IBSCL.EQ.2 ) THEN
+         CALL CLASCL( 'G', 0, 0, BIGNUM, BNRM, N, NRHS, B, LDB, INFO )
+      END IF
+*
+  100 CONTINUE
+*
+      RETURN
+*
+*     End of CGELSX
+*
+      END
diff --git a/costa/native/external/lapack/cgelsy.f b/costa/native/external/lapack/cgelsy.f
new file mode 100644
index 000000000..a4b96de7b
--- /dev/null
+++ b/costa/native/external/lapack/cgelsy.f
@@ -0,0 +1,386 @@
+      SUBROUTINE CGELSY( M, N, NRHS, A, LDA, B, LDB, JPVT, RCOND, RANK,
+     $                   WORK, LWORK, RWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, LDB, LWORK, M, N, NRHS, RANK
+      REAL               RCOND
+*     ..
+*     .. Array Arguments ..
+      INTEGER            JPVT( * )
+      REAL               RWORK( * )
+      COMPLEX            A( LDA, * ), B( LDB, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CGELSY computes the minimum-norm solution to a complex linear least
+*  squares problem:
+*      minimize || A * X - B ||
+*  using a complete orthogonal factorization of A.  A is an M-by-N
+*  matrix which may be rank-deficient.
+*
+*  Several right hand side vectors b and solution vectors x can be
+*  handled in a single call; they are stored as the columns of the
+*  M-by-NRHS right hand side matrix B and the N-by-NRHS solution
+*  matrix X.
+*
+*  The routine first computes a QR factorization with column pivoting:
+*      A * P = Q * [ R11 R12 ]
+*                  [  0  R22 ]
+*  with R11 defined as the largest leading submatrix whose estimated
+*  condition number is less than 1/RCOND.  The order of R11, RANK,
+*  is the effective rank of A.
+*
+*  Then, R22 is considered to be negligible, and R12 is annihilated
+*  by unitary transformations from the right, arriving at the
+*  complete orthogonal factorization:
+*     A * P = Q * [ T11 0 ] * Z
+*                 [  0  0 ]
+*  The minimum-norm solution is then
+*     X = P * Z' [ inv(T11)*Q1'*B ]
+*                [        0       ]
+*  where Q1 consists of the first RANK columns of Q.
+*
+*  This routine is basically identical to the original xGELSX except
+*  three differences:
+*    o The permutation of matrix B (the right hand side) is faster and
+*      more simple.
+*    o The call to the subroutine xGEQPF has been substituted by the
+*      the call to the subroutine xGEQP3. This subroutine is a Blas-3
+*      version of the QR factorization with column pivoting.
+*    o Matrix B (the right hand side) is updated with Blas-3.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of
+*          columns of matrices B and X. NRHS >= 0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,N)
+*          On entry, the M-by-N matrix A.
+*          On exit, A has been overwritten by details of its
+*          complete orthogonal factorization.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  B       (input/output) COMPLEX array, dimension (LDB,NRHS)
+*          On entry, the M-by-NRHS right hand side matrix B.
+*          On exit, the N-by-NRHS solution matrix X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B. LDB >= max(1,M,N).
+*
+*  JPVT    (input/output) INTEGER array, dimension (N)
+*          On entry, if JPVT(i) .ne. 0, the i-th column of A is permuted
+*          to the front of AP, otherwise column i is a free column.
+*          On exit, if JPVT(i) = k, then the i-th column of A*P
+*          was the k-th column of A.
+*
+*  RCOND   (input) REAL
+*          RCOND is used to determine the effective rank of A, which
+*          is defined as the order of the largest leading triangular
+*          submatrix R11 in the QR factorization with pivoting of A,
+*          whose estimated condition number < 1/RCOND.
+*
+*  RANK    (output) INTEGER
+*          The effective rank of A, i.e., the order of the submatrix
+*          R11.  This is the same as the order of the submatrix T11
+*          in the complete orthogonal factorization of A.
+*
+*  WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.
+*          The unblocked strategy requires that:
+*            LWORK >= MN + MAX( 2*MN, N+1, MN+NRHS )
+*          where MN = min(M,N).
+*          The block algorithm requires that:
+*            LWORK >= MN + MAX( 2*MN, NB*(N+1), MN+MN*NB, MN+NB*NRHS )
+*          where NB is an upper bound on the blocksize returned
+*          by ILAENV for the routines CGEQP3, CTZRZF, CTZRQF, CUNMQR,
+*          and CUNMRZ.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  RWORK   (workspace) REAL array, dimension (2*N)
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*    A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA
+*    E. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain
+*    G. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            IMAX, IMIN
+      PARAMETER          ( IMAX = 1, IMIN = 2 )
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+      COMPLEX            CZERO, CONE
+      PARAMETER          ( CZERO = ( 0.0E+0, 0.0E+0 ),
+     $                   CONE = ( 1.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            I, IASCL, IBSCL, ISMAX, ISMIN, J, LWKOPT, MN,
+     $                   NB, NB1, NB2, NB3, NB4
+      REAL               ANRM, BIGNUM, BNRM, SMAX, SMAXPR, SMIN, SMINPR,
+     $                   SMLNUM, WSIZE
+      COMPLEX            C1, C2, S1, S2
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CCOPY, CGEQP3, CLAIC1, CLASCL, CLASET, CTRSM,
+     $                   CTZRZF, CUNMQR, CUNMRZ, SLABAD, XERBLA
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      REAL               CLANGE, SLAMCH
+      EXTERNAL           CLANGE, ILAENV, SLAMCH
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN, REAL, CMPLX
+*     ..
+*     .. Executable Statements ..
+*
+      MN = MIN( M, N )
+      ISMIN = MN + 1
+      ISMAX = 2*MN + 1
+*
+*     Test the input arguments.
+*
+      INFO = 0
+      NB1 = ILAENV( 1, 'CGEQRF', ' ', M, N, -1, -1 )
+      NB2 = ILAENV( 1, 'CGERQF', ' ', M, N, -1, -1 )
+      NB3 = ILAENV( 1, 'CUNMQR', ' ', M, N, NRHS, -1 )
+      NB4 = ILAENV( 1, 'CUNMRQ', ' ', M, N, NRHS, -1 )
+      NB = MAX( NB1, NB2, NB3, NB4 )
+      LWKOPT = MAX( 1, MN+2*N+NB*(N+1), 2*MN+NB*NRHS )
+      WORK( 1 ) = CMPLX( LWKOPT )
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, M, N ) ) THEN
+         INFO = -7
+      ELSE IF( LWORK.LT.( MN+MAX( 2*MN, N+1, MN+NRHS ) ) .AND.
+     $   .NOT.LQUERY ) THEN
+         INFO = -12
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CGELSY', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( MIN( M, N, NRHS ).EQ.0 ) THEN
+         RANK = 0
+         RETURN
+      END IF
+*
+*     Get machine parameters
+*
+      SMLNUM = SLAMCH( 'S' ) / SLAMCH( 'P' )
+      BIGNUM = ONE / SMLNUM
+      CALL SLABAD( SMLNUM, BIGNUM )
+*
+*     Scale A, B if max entries outside range [SMLNUM,BIGNUM]
+*
+      ANRM = CLANGE( 'M', M, N, A, LDA, RWORK )
+      IASCL = 0
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
+*
+*        Scale matrix norm up to SMLNUM
+*
+         CALL CLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO )
+         IASCL = 1
+      ELSE IF( ANRM.GT.BIGNUM ) THEN
+*
+*        Scale matrix norm down to BIGNUM
+*
+         CALL CLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO )
+         IASCL = 2
+      ELSE IF( ANRM.EQ.ZERO ) THEN
+*
+*        Matrix all zero. Return zero solution.
+*
+         CALL CLASET( 'F', MAX( M, N ), NRHS, CZERO, CZERO, B, LDB )
+         RANK = 0
+         GO TO 70
+      END IF
+*
+      BNRM = CLANGE( 'M', M, NRHS, B, LDB, RWORK )
+      IBSCL = 0
+      IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN
+*
+*        Scale matrix norm up to SMLNUM
+*
+         CALL CLASCL( 'G', 0, 0, BNRM, SMLNUM, M, NRHS, B, LDB, INFO )
+         IBSCL = 1
+      ELSE IF( BNRM.GT.BIGNUM ) THEN
+*
+*        Scale matrix norm down to BIGNUM
+*
+         CALL CLASCL( 'G', 0, 0, BNRM, BIGNUM, M, NRHS, B, LDB, INFO )
+         IBSCL = 2
+      END IF
+*
+*     Compute QR factorization with column pivoting of A:
+*        A * P = Q * R
+*
+      CALL CGEQP3( M, N, A, LDA, JPVT, WORK( 1 ), WORK( MN+1 ),
+     $             LWORK-MN, RWORK, INFO )
+      WSIZE = MN + REAL( WORK( MN+1 ) )
+*
+*     complex workspace: MN+NB*(N+1). real workspace 2*N.
+*     Details of Householder rotations stored in WORK(1:MN).
+*
+*     Determine RANK using incremental condition estimation
+*
+      WORK( ISMIN ) = CONE
+      WORK( ISMAX ) = CONE
+      SMAX = ABS( A( 1, 1 ) )
+      SMIN = SMAX
+      IF( ABS( A( 1, 1 ) ).EQ.ZERO ) THEN
+         RANK = 0
+         CALL CLASET( 'F', MAX( M, N ), NRHS, CZERO, CZERO, B, LDB )
+         GO TO 70
+      ELSE
+         RANK = 1
+      END IF
+*
+   10 CONTINUE
+      IF( RANK.LT.MN ) THEN
+         I = RANK + 1
+         CALL CLAIC1( IMIN, RANK, WORK( ISMIN ), SMIN, A( 1, I ),
+     $                A( I, I ), SMINPR, S1, C1 )
+         CALL CLAIC1( IMAX, RANK, WORK( ISMAX ), SMAX, A( 1, I ),
+     $                A( I, I ), SMAXPR, S2, C2 )
+*
+         IF( SMAXPR*RCOND.LE.SMINPR ) THEN
+            DO 20 I = 1, RANK
+               WORK( ISMIN+I-1 ) = S1*WORK( ISMIN+I-1 )
+               WORK( ISMAX+I-1 ) = S2*WORK( ISMAX+I-1 )
+   20       CONTINUE
+            WORK( ISMIN+RANK ) = C1
+            WORK( ISMAX+RANK ) = C2
+            SMIN = SMINPR
+            SMAX = SMAXPR
+            RANK = RANK + 1
+            GO TO 10
+         END IF
+      END IF
+*
+*     complex workspace: 3*MN.
+*
+*     Logically partition R = [ R11 R12 ]
+*                             [  0  R22 ]
+*     where R11 = R(1:RANK,1:RANK)
+*
+*     [R11,R12] = [ T11, 0 ] * Y
+*
+      IF( RANK.LT.N )
+     $   CALL CTZRZF( RANK, N, A, LDA, WORK( MN+1 ), WORK( 2*MN+1 ),
+     $                LWORK-2*MN, INFO )
+*
+*     complex workspace: 2*MN.
+*     Details of Householder rotations stored in WORK(MN+1:2*MN)
+*
+*     B(1:M,1:NRHS) := Q' * B(1:M,1:NRHS)
+*
+      CALL CUNMQR( 'Left', 'Conjugate transpose', M, NRHS, MN, A, LDA,
+     $             WORK( 1 ), B, LDB, WORK( 2*MN+1 ), LWORK-2*MN, INFO )
+      WSIZE = MAX( WSIZE, 2*MN+REAL( WORK( 2*MN+1 ) ) )
+*
+*     complex workspace: 2*MN+NB*NRHS.
+*
+*     B(1:RANK,1:NRHS) := inv(T11) * B(1:RANK,1:NRHS)
+*
+      CALL CTRSM( 'Left', 'Upper', 'No transpose', 'Non-unit', RANK,
+     $            NRHS, CONE, A, LDA, B, LDB )
+*
+      DO 40 J = 1, NRHS
+         DO 30 I = RANK + 1, N
+            B( I, J ) = CZERO
+   30    CONTINUE
+   40 CONTINUE
+*
+*     B(1:N,1:NRHS) := Y' * B(1:N,1:NRHS)
+*
+      IF( RANK.LT.N ) THEN
+         CALL CUNMRZ( 'Left', 'Conjugate transpose', N, NRHS, RANK,
+     $                N-RANK, A, LDA, WORK( MN+1 ), B, LDB,
+     $                WORK( 2*MN+1 ), LWORK-2*MN, INFO )
+      END IF
+*
+*     complex workspace: 2*MN+NRHS.
+*
+*     B(1:N,1:NRHS) := P * B(1:N,1:NRHS)
+*
+      DO 60 J = 1, NRHS
+         DO 50 I = 1, N
+            WORK( JPVT( I ) ) = B( I, J )
+   50    CONTINUE
+         CALL CCOPY( N, WORK( 1 ), 1, B( 1, J ), 1 )
+   60 CONTINUE
+*
+*     complex workspace: N.
+*
+*     Undo scaling
+*
+      IF( IASCL.EQ.1 ) THEN
+         CALL CLASCL( 'G', 0, 0, ANRM, SMLNUM, N, NRHS, B, LDB, INFO )
+         CALL CLASCL( 'U', 0, 0, SMLNUM, ANRM, RANK, RANK, A, LDA,
+     $                INFO )
+      ELSE IF( IASCL.EQ.2 ) THEN
+         CALL CLASCL( 'G', 0, 0, ANRM, BIGNUM, N, NRHS, B, LDB, INFO )
+         CALL CLASCL( 'U', 0, 0, BIGNUM, ANRM, RANK, RANK, A, LDA,
+     $                INFO )
+      END IF
+      IF( IBSCL.EQ.1 ) THEN
+         CALL CLASCL( 'G', 0, 0, SMLNUM, BNRM, N, NRHS, B, LDB, INFO )
+      ELSE IF( IBSCL.EQ.2 ) THEN
+         CALL CLASCL( 'G', 0, 0, BIGNUM, BNRM, N, NRHS, B, LDB, INFO )
+      END IF
+*
+   70 CONTINUE
+      WORK( 1 ) = CMPLX( LWKOPT )
+*
+      RETURN
+*
+*     End of CGELSY
+*
+      END
diff --git a/costa/native/external/lapack/cgeql2.f b/costa/native/external/lapack/cgeql2.f
new file mode 100644
index 000000000..56121e1f6
--- /dev/null
+++ b/costa/native/external/lapack/cgeql2.f
@@ -0,0 +1,122 @@
+      SUBROUTINE CGEQL2( M, N, A, LDA, TAU, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, M, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CGEQL2 computes a QL factorization of a complex m by n matrix A:
+*  A = Q * L.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,N)
+*          On entry, the m by n matrix A.
+*          On exit, if m >= n, the lower triangle of the subarray
+*          A(m-n+1:m,1:n) contains the n by n lower triangular matrix L;
+*          if m <= n, the elements on and below the (n-m)-th
+*          superdiagonal contain the m by n lower trapezoidal matrix L;
+*          the remaining elements, with the array TAU, represent the
+*          unitary matrix Q as a product of elementary reflectors
+*          (see Further Details).
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  TAU     (output) COMPLEX array, dimension (min(M,N))
+*          The scalar factors of the elementary reflectors (see Further
+*          Details).
+*
+*  WORK    (workspace) COMPLEX array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*
+*  Further Details
+*  ===============
+*
+*  The matrix Q is represented as a product of elementary reflectors
+*
+*     Q = H(k) . . . H(2) H(1), where k = min(m,n).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - tau * v * v'
+*
+*  where tau is a complex scalar, and v is a complex vector with
+*  v(m-k+i+1:m) = 0 and v(m-k+i) = 1; v(1:m-k+i-1) is stored on exit in
+*  A(1:m-k+i-1,n-k+i), and tau in TAU(i).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX            ONE
+      PARAMETER          ( ONE = ( 1.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, K
+      COMPLEX            ALPHA
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CLARF, CLARFG, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          CONJG, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CGEQL2', -INFO )
+         RETURN
+      END IF
+*
+      K = MIN( M, N )
+*
+      DO 10 I = K, 1, -1
+*
+*        Generate elementary reflector H(i) to annihilate
+*        A(1:m-k+i-1,n-k+i)
+*
+         ALPHA = A( M-K+I, N-K+I )
+         CALL CLARFG( M-K+I, ALPHA, A( 1, N-K+I ), 1, TAU( I ) )
+*
+*        Apply H(i)' to A(1:m-k+i,1:n-k+i-1) from the left
+*
+         A( M-K+I, N-K+I ) = ONE
+         CALL CLARF( 'Left', M-K+I, N-K+I-1, A( 1, N-K+I ), 1,
+     $               CONJG( TAU( I ) ), A, LDA, WORK )
+         A( M-K+I, N-K+I ) = ALPHA
+   10 CONTINUE
+      RETURN
+*
+*     End of CGEQL2
+*
+      END
diff --git a/costa/native/external/lapack/cgeqlf.f b/costa/native/external/lapack/cgeqlf.f
new file mode 100644
index 000000000..88c05d26e
--- /dev/null
+++ b/costa/native/external/lapack/cgeqlf.f
@@ -0,0 +1,205 @@
+      SUBROUTINE CGEQLF( M, N, A, LDA, TAU, WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, LWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CGEQLF computes a QL factorization of a complex M-by-N matrix A:
+*  A = Q * L.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,N)
+*          On entry, the M-by-N matrix A.
+*          On exit,
+*          if m >= n, the lower triangle of the subarray
+*          A(m-n+1:m,1:n) contains the N-by-N lower triangular matrix L;
+*          if m <= n, the elements on and below the (n-m)-th
+*          superdiagonal contain the M-by-N lower trapezoidal matrix L;
+*          the remaining elements, with the array TAU, represent the
+*          unitary matrix Q as a product of elementary reflectors
+*          (see Further Details).
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  TAU     (output) COMPLEX array, dimension (min(M,N))
+*          The scalar factors of the elementary reflectors (see Further
+*          Details).
+*
+*  WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.  LWORK >= max(1,N).
+*          For optimum performance LWORK >= N*NB, where NB is
+*          the optimal blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Further Details
+*  ===============
+*
+*  The matrix Q is represented as a product of elementary reflectors
+*
+*     Q = H(k) . . . H(2) H(1), where k = min(m,n).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - tau * v * v'
+*
+*  where tau is a complex scalar, and v is a complex vector with
+*  v(m-k+i+1:m) = 0 and v(m-k+i) = 1; v(1:m-k+i-1) is stored on exit in
+*  A(1:m-k+i-1,n-k+i), and tau in TAU(i).
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            I, IB, IINFO, IWS, K, KI, KK, LDWORK, LWKOPT,
+     $                   MU, NB, NBMIN, NU, NX
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CGEQL2, CLARFB, CLARFT, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      NB = ILAENV( 1, 'CGEQLF', ' ', M, N, -1, -1 )
+      LWKOPT = N*NB
+      WORK( 1 ) = LWKOPT
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -4
+      ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN
+         INFO = -7
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CGEQLF', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      K = MIN( M, N )
+      IF( K.EQ.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      NBMIN = 2
+      NX = 1
+      IWS = N
+      IF( NB.GT.1 .AND. NB.LT.K ) THEN
+*
+*        Determine when to cross over from blocked to unblocked code.
+*
+         NX = MAX( 0, ILAENV( 3, 'CGEQLF', ' ', M, N, -1, -1 ) )
+         IF( NX.LT.K ) THEN
+*
+*           Determine if workspace is large enough for blocked code.
+*
+            LDWORK = N
+            IWS = LDWORK*NB
+            IF( LWORK.LT.IWS ) THEN
+*
+*              Not enough workspace to use optimal NB:  reduce NB and
+*              determine the minimum value of NB.
+*
+               NB = LWORK / LDWORK
+               NBMIN = MAX( 2, ILAENV( 2, 'CGEQLF', ' ', M, N, -1,
+     $                 -1 ) )
+            END IF
+         END IF
+      END IF
+*
+      IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN
+*
+*        Use blocked code initially.
+*        The last kk columns are handled by the block method.
+*
+         KI = ( ( K-NX-1 ) / NB )*NB
+         KK = MIN( K, KI+NB )
+*
+         DO 10 I = K - KK + KI + 1, K - KK + 1, -NB
+            IB = MIN( K-I+1, NB )
+*
+*           Compute the QL factorization of the current block
+*           A(1:m-k+i+ib-1,n-k+i:n-k+i+ib-1)
+*
+            CALL CGEQL2( M-K+I+IB-1, IB, A( 1, N-K+I ), LDA, TAU( I ),
+     $                   WORK, IINFO )
+            IF( N-K+I.GT.1 ) THEN
+*
+*              Form the triangular factor of the block reflector
+*              H = H(i+ib-1) . . . H(i+1) H(i)
+*
+               CALL CLARFT( 'Backward', 'Columnwise', M-K+I+IB-1, IB,
+     $                      A( 1, N-K+I ), LDA, TAU( I ), WORK, LDWORK )
+*
+*              Apply H' to A(1:m-k+i+ib-1,1:n-k+i-1) from the left
+*
+               CALL CLARFB( 'Left', 'Conjugate transpose', 'Backward',
+     $                      'Columnwise', M-K+I+IB-1, N-K+I-1, IB,
+     $                      A( 1, N-K+I ), LDA, WORK, LDWORK, A, LDA,
+     $                      WORK( IB+1 ), LDWORK )
+            END IF
+   10    CONTINUE
+         MU = M - K + I + NB - 1
+         NU = N - K + I + NB - 1
+      ELSE
+         MU = M
+         NU = N
+      END IF
+*
+*     Use unblocked code to factor the last or only block
+*
+      IF( MU.GT.0 .AND. NU.GT.0 )
+     $   CALL CGEQL2( MU, NU, A, LDA, TAU, WORK, IINFO )
+*
+      WORK( 1 ) = IWS
+      RETURN
+*
+*     End of CGEQLF
+*
+      END
diff --git a/costa/native/external/lapack/cgeqp3.f b/costa/native/external/lapack/cgeqp3.f
new file mode 100644
index 000000000..44c43e6b1
--- /dev/null
+++ b/costa/native/external/lapack/cgeqp3.f
@@ -0,0 +1,285 @@
+      SUBROUTINE CGEQP3( M, N, A, LDA, JPVT, TAU, WORK, LWORK, RWORK,
+     $                   INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, LWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            JPVT( * )
+      REAL               RWORK( * )
+      COMPLEX            A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CGEQP3 computes a QR factorization with column pivoting of a
+*  matrix A:  A*P = Q*R  using Level 3 BLAS.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,N)
+*          On entry, the M-by-N matrix A.
+*          On exit, the upper triangle of the array contains the
+*          min(M,N)-by-N upper trapezoidal matrix R; the elements below
+*          the diagonal, together with the array TAU, represent the
+*          unitary matrix Q as a product of min(M,N) elementary
+*          reflectors.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,M).
+*
+*  JPVT    (input/output) INTEGER array, dimension (N)
+*          On entry, if JPVT(J).ne.0, the J-th column of A is permuted
+*          to the front of A*P (a leading column); if JPVT(J)=0,
+*          the J-th column of A is a free column.
+*          On exit, if JPVT(J)=K, then the J-th column of A*P was the
+*          the K-th column of A.
+*
+*  TAU     (output) COMPLEX array, dimension (min(M,N))
+*          The scalar factors of the elementary reflectors.
+*
+*  WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+*          On exit, if INFO=0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK. LWORK >= N+1.
+*          For optimal performance LWORK >= ( N+1 )*NB, where NB
+*          is the optimal blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  RWORK   (workspace) REAL array, dimension (2*N)
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit.
+*          < 0: if INFO = -i, the i-th argument had an illegal value.
+*
+*  Further Details
+*  ===============
+*
+*  The matrix Q is represented as a product of elementary reflectors
+*
+*     Q = H(1) H(2) . . . H(k), where k = min(m,n).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - tau * v * v'
+*
+*  where tau is a real/complex scalar, and v is a real/complex vector
+*  with v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in
+*  A(i+1:m,i), and tau in TAU(i).
+*
+*  Based on contributions by
+*    G. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain
+*    X. Sun, Computer Science Dept., Duke University, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            INB, INBMIN, IXOVER
+      PARAMETER          ( INB = 1, INBMIN = 2, IXOVER = 3 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            FJB, IWS, J, JB, LWKOPT, MINMN, MINWS, NA, NB,
+     $                   NBMIN, NFXD, NX, SM, SMINMN, SN, TOPBMN
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CGEQRF, CLAQP2, CLAQPS, CSWAP, CUNMQR, XERBLA
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      REAL               SCNRM2
+      EXTERNAL           ILAENV, SCNRM2
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          INT, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+      IWS = N + 1
+      MINMN = MIN( M, N )
+*
+*     Test input arguments
+*     ====================
+*
+      INFO = 0
+      NB = ILAENV( INB, 'CGEQRF', ' ', M, N, -1, -1 )
+      LWKOPT = ( N+1 )*NB
+      WORK( 1 ) = LWKOPT
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -4
+      ELSE IF( ( LWORK.LT.IWS ) .AND. .NOT.LQUERY ) THEN
+         INFO = -8
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CGEQP3', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( MINMN.EQ.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+*     Move initial columns up front.
+*
+      NFXD = 1
+      DO 10 J = 1, N
+         IF( JPVT( J ).NE.0 ) THEN
+            IF( J.NE.NFXD ) THEN
+               CALL CSWAP( M, A( 1, J ), 1, A( 1, NFXD ), 1 )
+               JPVT( J ) = JPVT( NFXD )
+               JPVT( NFXD ) = J
+            ELSE
+               JPVT( J ) = J
+            END IF
+            NFXD = NFXD + 1
+         ELSE
+            JPVT( J ) = J
+         END IF
+   10 CONTINUE
+      NFXD = NFXD - 1
+*
+*     Factorize fixed columns
+*     =======================
+*
+*     Compute the QR factorization of fixed columns and update
+*     remaining columns.
+*
+      IF( NFXD.GT.0 ) THEN
+         NA = MIN( M, NFXD )
+*CC      CALL CGEQR2( M, NA, A, LDA, TAU, WORK, INFO )
+         CALL CGEQRF( M, NA, A, LDA, TAU, WORK, LWORK, INFO )
+         IWS = MAX( IWS, INT( WORK( 1 ) ) )
+         IF( NA.LT.N ) THEN
+*CC         CALL CUNM2R( 'Left', 'Conjugate Transpose', M, N-NA,
+*CC  $                   NA, A, LDA, TAU, A( 1, NA+1 ), LDA, WORK,
+*CC  $                   INFO )
+            CALL CUNMQR( 'Left', 'Conjugate Transpose', M, N-NA, NA, A,
+     $                   LDA, TAU, A( 1, NA+1 ), LDA, WORK, LWORK,
+     $                   INFO )
+            IWS = MAX( IWS, INT( WORK( 1 ) ) )
+         END IF
+      END IF
+*
+*     Factorize free columns
+*     ======================
+*
+      IF( NFXD.LT.MINMN ) THEN
+*
+         SM = M - NFXD
+         SN = N - NFXD
+         SMINMN = MINMN - NFXD
+*
+*        Determine the block size.
+*
+         NB = ILAENV( INB, 'CGEQRF', ' ', SM, SN, -1, -1 )
+         NBMIN = 2
+         NX = 0
+*
+         IF( ( NB.GT.1 ) .AND. ( NB.LT.SMINMN ) ) THEN
+*
+*           Determine when to cross over from blocked to unblocked code.
+*
+            NX = MAX( 0, ILAENV( IXOVER, 'CGEQRF', ' ', SM, SN, -1,
+     $           -1 ) )
+*
+*
+            IF( NX.LT.SMINMN ) THEN
+*
+*              Determine if workspace is large enough for blocked code.
+*
+               MINWS = ( SN+1 )*NB
+               IWS = MAX( IWS, MINWS )
+               IF( LWORK.LT.MINWS ) THEN
+*
+*                 Not enough workspace to use optimal NB: Reduce NB and
+*                 determine the minimum value of NB.
+*
+                  NB = LWORK / ( SN+1 )
+                  NBMIN = MAX( 2, ILAENV( INBMIN, 'CGEQRF', ' ', SM, SN,
+     $                    -1, -1 ) )
+*
+*
+               END IF
+            END IF
+         END IF
+*
+*        Initialize partial column norms. The first N elements of work
+*        store the exact column norms.
+*
+         DO 20 J = NFXD + 1, N
+            RWORK( J ) = SCNRM2( SM, A( NFXD+1, J ), 1 )
+            RWORK( N+J ) = RWORK( J )
+   20    CONTINUE
+*
+         IF( ( NB.GE.NBMIN ) .AND. ( NB.LT.SMINMN ) .AND.
+     $       ( NX.LT.SMINMN ) ) THEN
+*
+*           Use blocked code initially.
+*
+            J = NFXD + 1
+*
+*           Compute factorization: while loop.
+*
+*
+            TOPBMN = MINMN - NX
+   30       CONTINUE
+            IF( J.LE.TOPBMN ) THEN
+               JB = MIN( NB, TOPBMN-J+1 )
+*
+*              Factorize JB columns among columns J:N.
+*
+               CALL CLAQPS( M, N-J+1, J-1, JB, FJB, A( 1, J ), LDA,
+     $                      JPVT( J ), TAU( J ), RWORK( J ),
+     $                      RWORK( N+J ), WORK( 1 ), WORK( JB+1 ),
+     $                      N-J+1 )
+*
+               J = J + FJB
+               GO TO 30
+            END IF
+         ELSE
+            J = NFXD + 1
+         END IF
+*
+*        Use unblocked code to factor the last or only block.
+*
+*
+         IF( J.LE.MINMN )
+     $      CALL CLAQP2( M, N-J+1, J-1, A( 1, J ), LDA, JPVT( J ),
+     $                   TAU( J ), RWORK( J ), RWORK( N+J ), WORK( 1 ) )
+*
+      END IF
+*
+      WORK( 1 ) = IWS
+      RETURN
+*
+*     End of CGEQP3
+*
+      END
diff --git a/costa/native/external/lapack/cgeqpf.f b/costa/native/external/lapack/cgeqpf.f
new file mode 100644
index 000000000..5836e5503
--- /dev/null
+++ b/costa/native/external/lapack/cgeqpf.f
@@ -0,0 +1,225 @@
+      SUBROUTINE CGEQPF( M, N, A, LDA, JPVT, TAU, WORK, RWORK, INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, M, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            JPVT( * )
+      REAL               RWORK( * )
+      COMPLEX            A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  This routine is deprecated and has been replaced by routine CGEQP3.
+*
+*  CGEQPF computes a QR factorization with column pivoting of a
+*  complex M-by-N matrix A: A*P = Q*R.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A. N >= 0
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,N)
+*          On entry, the M-by-N matrix A.
+*          On exit, the upper triangle of the array contains the
+*          min(M,N)-by-N upper triangular matrix R; the elements
+*          below the diagonal, together with the array TAU,
+*          represent the unitary matrix Q as a product of
+*          min(m,n) elementary reflectors.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,M).
+*
+*  JPVT    (input/output) INTEGER array, dimension (N)
+*          On entry, if JPVT(i) .ne. 0, the i-th column of A is permuted
+*          to the front of A*P (a leading column); if JPVT(i) = 0,
+*          the i-th column of A is a free column.
+*          On exit, if JPVT(i) = k, then the i-th column of A*P
+*          was the k-th column of A.
+*
+*  TAU     (output) COMPLEX array, dimension (min(M,N))
+*          The scalar factors of the elementary reflectors.
+*
+*  WORK    (workspace) COMPLEX array, dimension (N)
+*
+*  RWORK   (workspace) REAL array, dimension (2*N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Further Details
+*  ===============
+*
+*  The matrix Q is represented as a product of elementary reflectors
+*
+*     Q = H(1) H(2) . . . H(n)
+*
+*  Each H(i) has the form
+*
+*     H = I - tau * v * v'
+*
+*  where tau is a complex scalar, and v is a complex vector with
+*  v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i).
+*
+*  The matrix P is represented in jpvt as follows: If
+*     jpvt(j) = i
+*  then the jth column of P is the ith canonical unit vector.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, ITEMP, J, MA, MN, PVT
+      REAL               TEMP, TEMP2
+      COMPLEX            AII
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CGEQR2, CLARF, CLARFG, CSWAP, CUNM2R, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, CMPLX, CONJG, MAX, MIN, SQRT
+*     ..
+*     .. External Functions ..
+      INTEGER            ISAMAX
+      REAL               SCNRM2
+      EXTERNAL           ISAMAX, SCNRM2
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CGEQPF', -INFO )
+         RETURN
+      END IF
+*
+      MN = MIN( M, N )
+*
+*     Move initial columns up front
+*
+      ITEMP = 1
+      DO 10 I = 1, N
+         IF( JPVT( I ).NE.0 ) THEN
+            IF( I.NE.ITEMP ) THEN
+               CALL CSWAP( M, A( 1, I ), 1, A( 1, ITEMP ), 1 )
+               JPVT( I ) = JPVT( ITEMP )
+               JPVT( ITEMP ) = I
+            ELSE
+               JPVT( I ) = I
+            END IF
+            ITEMP = ITEMP + 1
+         ELSE
+            JPVT( I ) = I
+         END IF
+   10 CONTINUE
+      ITEMP = ITEMP - 1
+*
+*     Compute the QR factorization and update remaining columns
+*
+      IF( ITEMP.GT.0 ) THEN
+         MA = MIN( ITEMP, M )
+         CALL CGEQR2( M, MA, A, LDA, TAU, WORK, INFO )
+         IF( MA.LT.N ) THEN
+            CALL CUNM2R( 'Left', 'Conjugate transpose', M, N-MA, MA, A,
+     $                   LDA, TAU, A( 1, MA+1 ), LDA, WORK, INFO )
+         END IF
+      END IF
+*
+      IF( ITEMP.LT.MN ) THEN
+*
+*        Initialize partial column norms. The first n elements of
+*        work store the exact column norms.
+*
+         DO 20 I = ITEMP + 1, N
+            RWORK( I ) = SCNRM2( M-ITEMP, A( ITEMP+1, I ), 1 )
+            RWORK( N+I ) = RWORK( I )
+   20    CONTINUE
+*
+*        Compute factorization
+*
+         DO 40 I = ITEMP + 1, MN
+*
+*           Determine ith pivot column and swap if necessary
+*
+            PVT = ( I-1 ) + ISAMAX( N-I+1, RWORK( I ), 1 )
+*
+            IF( PVT.NE.I ) THEN
+               CALL CSWAP( M, A( 1, PVT ), 1, A( 1, I ), 1 )
+               ITEMP = JPVT( PVT )
+               JPVT( PVT ) = JPVT( I )
+               JPVT( I ) = ITEMP
+               RWORK( PVT ) = RWORK( I )
+               RWORK( N+PVT ) = RWORK( N+I )
+            END IF
+*
+*           Generate elementary reflector H(i)
+*
+            AII = A( I, I )
+            CALL CLARFG( M-I+1, AII, A( MIN( I+1, M ), I ), 1,
+     $                   TAU( I ) )
+            A( I, I ) = AII
+*
+            IF( I.LT.N ) THEN
+*
+*              Apply H(i) to A(i:m,i+1:n) from the left
+*
+               AII = A( I, I )
+               A( I, I ) = CMPLX( ONE )
+               CALL CLARF( 'Left', M-I+1, N-I, A( I, I ), 1,
+     $                     CONJG( TAU( I ) ), A( I, I+1 ), LDA, WORK )
+               A( I, I ) = AII
+            END IF
+*
+*           Update partial column norms
+*
+            DO 30 J = I + 1, N
+               IF( RWORK( J ).NE.ZERO ) THEN
+                  TEMP = ONE - ( ABS( A( I, J ) ) / RWORK( J ) )**2
+                  TEMP = MAX( TEMP, ZERO )
+                  TEMP2 = ONE + 0.05*TEMP*( RWORK( J ) / RWORK( N+J ) )
+     $                    **2
+                  IF( TEMP2.EQ.ONE ) THEN
+                     IF( M-I.GT.0 ) THEN
+                        RWORK( J ) = SCNRM2( M-I, A( I+1, J ), 1 )
+                        RWORK( N+J ) = RWORK( J )
+                     ELSE
+                        RWORK( J ) = ZERO
+                        RWORK( N+J ) = ZERO
+                     END IF
+                  ELSE
+                     RWORK( J ) = RWORK( J )*SQRT( TEMP )
+                  END IF
+               END IF
+   30       CONTINUE
+*
+   40    CONTINUE
+      END IF
+      RETURN
+*
+*     End of CGEQPF
+*
+      END
diff --git a/costa/native/external/lapack/cgeqr2.f b/costa/native/external/lapack/cgeqr2.f
new file mode 100644
index 000000000..b06c32e49
--- /dev/null
+++ b/costa/native/external/lapack/cgeqr2.f
@@ -0,0 +1,122 @@
+      SUBROUTINE CGEQR2( M, N, A, LDA, TAU, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, M, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CGEQR2 computes a QR factorization of a complex m by n matrix A:
+*  A = Q * R.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,N)
+*          On entry, the m by n matrix A.
+*          On exit, the elements on and above the diagonal of the array
+*          contain the min(m,n) by n upper trapezoidal matrix R (R is
+*          upper triangular if m >= n); the elements below the diagonal,
+*          with the array TAU, represent the unitary matrix Q as a
+*          product of elementary reflectors (see Further Details).
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  TAU     (output) COMPLEX array, dimension (min(M,N))
+*          The scalar factors of the elementary reflectors (see Further
+*          Details).
+*
+*  WORK    (workspace) COMPLEX array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*
+*  Further Details
+*  ===============
+*
+*  The matrix Q is represented as a product of elementary reflectors
+*
+*     Q = H(1) H(2) . . . H(k), where k = min(m,n).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - tau * v * v'
+*
+*  where tau is a complex scalar, and v is a complex vector with
+*  v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i),
+*  and tau in TAU(i).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX            ONE
+      PARAMETER          ( ONE = ( 1.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, K
+      COMPLEX            ALPHA
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CLARF, CLARFG, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          CONJG, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CGEQR2', -INFO )
+         RETURN
+      END IF
+*
+      K = MIN( M, N )
+*
+      DO 10 I = 1, K
+*
+*        Generate elementary reflector H(i) to annihilate A(i+1:m,i)
+*
+         CALL CLARFG( M-I+1, A( I, I ), A( MIN( I+1, M ), I ), 1,
+     $                TAU( I ) )
+         IF( I.LT.N ) THEN
+*
+*           Apply H(i)' to A(i:m,i+1:n) from the left
+*
+            ALPHA = A( I, I )
+            A( I, I ) = ONE
+            CALL CLARF( 'Left', M-I+1, N-I, A( I, I ), 1,
+     $                  CONJG( TAU( I ) ), A( I, I+1 ), LDA, WORK )
+            A( I, I ) = ALPHA
+         END IF
+   10 CONTINUE
+      RETURN
+*
+*     End of CGEQR2
+*
+      END
diff --git a/costa/native/external/lapack/cgeqrf.f b/costa/native/external/lapack/cgeqrf.f
new file mode 100644
index 000000000..eaf2e80df
--- /dev/null
+++ b/costa/native/external/lapack/cgeqrf.f
@@ -0,0 +1,197 @@
+      SUBROUTINE CGEQRF( M, N, A, LDA, TAU, WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, LWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CGEQRF computes a QR factorization of a complex M-by-N matrix A:
+*  A = Q * R.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,N)
+*          On entry, the M-by-N matrix A.
+*          On exit, the elements on and above the diagonal of the array
+*          contain the min(M,N)-by-N upper trapezoidal matrix R (R is
+*          upper triangular if m >= n); the elements below the diagonal,
+*          with the array TAU, represent the unitary matrix Q as a
+*          product of min(m,n) elementary reflectors (see Further
+*          Details).
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  TAU     (output) COMPLEX array, dimension (min(M,N))
+*          The scalar factors of the elementary reflectors (see Further
+*          Details).
+*
+*  WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.  LWORK >= max(1,N).
+*          For optimum performance LWORK >= N*NB, where NB is
+*          the optimal blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Further Details
+*  ===============
+*
+*  The matrix Q is represented as a product of elementary reflectors
+*
+*     Q = H(1) H(2) . . . H(k), where k = min(m,n).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - tau * v * v'
+*
+*  where tau is a complex scalar, and v is a complex vector with
+*  v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i),
+*  and tau in TAU(i).
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            I, IB, IINFO, IWS, K, LDWORK, LWKOPT, NB,
+     $                   NBMIN, NX
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CGEQR2, CLARFB, CLARFT, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      NB = ILAENV( 1, 'CGEQRF', ' ', M, N, -1, -1 )
+      LWKOPT = N*NB
+      WORK( 1 ) = LWKOPT
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -4
+      ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN
+         INFO = -7
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CGEQRF', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      K = MIN( M, N )
+      IF( K.EQ.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      NBMIN = 2
+      NX = 0
+      IWS = N
+      IF( NB.GT.1 .AND. NB.LT.K ) THEN
+*
+*        Determine when to cross over from blocked to unblocked code.
+*
+         NX = MAX( 0, ILAENV( 3, 'CGEQRF', ' ', M, N, -1, -1 ) )
+         IF( NX.LT.K ) THEN
+*
+*           Determine if workspace is large enough for blocked code.
+*
+            LDWORK = N
+            IWS = LDWORK*NB
+            IF( LWORK.LT.IWS ) THEN
+*
+*              Not enough workspace to use optimal NB:  reduce NB and
+*              determine the minimum value of NB.
+*
+               NB = LWORK / LDWORK
+               NBMIN = MAX( 2, ILAENV( 2, 'CGEQRF', ' ', M, N, -1,
+     $                 -1 ) )
+            END IF
+         END IF
+      END IF
+*
+      IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN
+*
+*        Use blocked code initially
+*
+         DO 10 I = 1, K - NX, NB
+            IB = MIN( K-I+1, NB )
+*
+*           Compute the QR factorization of the current block
+*           A(i:m,i:i+ib-1)
+*
+            CALL CGEQR2( M-I+1, IB, A( I, I ), LDA, TAU( I ), WORK,
+     $                   IINFO )
+            IF( I+IB.LE.N ) THEN
+*
+*              Form the triangular factor of the block reflector
+*              H = H(i) H(i+1) . . . H(i+ib-1)
+*
+               CALL CLARFT( 'Forward', 'Columnwise', M-I+1, IB,
+     $                      A( I, I ), LDA, TAU( I ), WORK, LDWORK )
+*
+*              Apply H' to A(i:m,i+ib:n) from the left
+*
+               CALL CLARFB( 'Left', 'Conjugate transpose', 'Forward',
+     $                      'Columnwise', M-I+1, N-I-IB+1, IB,
+     $                      A( I, I ), LDA, WORK, LDWORK, A( I, I+IB ),
+     $                      LDA, WORK( IB+1 ), LDWORK )
+            END IF
+   10    CONTINUE
+      ELSE
+         I = 1
+      END IF
+*
+*     Use unblocked code to factor the last or only block.
+*
+      IF( I.LE.K )
+     $   CALL CGEQR2( M-I+1, N-I+1, A( I, I ), LDA, TAU( I ), WORK,
+     $                IINFO )
+*
+      WORK( 1 ) = IWS
+      RETURN
+*
+*     End of CGEQRF
+*
+      END
diff --git a/costa/native/external/lapack/cgerfs.f b/costa/native/external/lapack/cgerfs.f
new file mode 100644
index 000000000..6ccdff204
--- /dev/null
+++ b/costa/native/external/lapack/cgerfs.f
@@ -0,0 +1,341 @@
+      SUBROUTINE CGERFS( TRANS, N, NRHS, A, LDA, AF, LDAF, IPIV, B, LDB,
+     $                   X, LDX, FERR, BERR, WORK, RWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          TRANS
+      INTEGER            INFO, LDA, LDAF, LDB, LDX, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      REAL               BERR( * ), FERR( * ), RWORK( * )
+      COMPLEX            A( LDA, * ), AF( LDAF, * ), B( LDB, * ),
+     $                   WORK( * ), X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CGERFS improves the computed solution to a system of linear
+*  equations and provides error bounds and backward error estimates for
+*  the solution.
+*
+*  Arguments
+*  =========
+*
+*  TRANS   (input) CHARACTER*1
+*          Specifies the form of the system of equations:
+*          = 'N':  A * X = B     (No transpose)
+*          = 'T':  A**T * X = B  (Transpose)
+*          = 'C':  A**H * X = B  (Conjugate transpose)
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrices B and X.  NRHS >= 0.
+*
+*  A       (input) COMPLEX array, dimension (LDA,N)
+*          The original N-by-N matrix A.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  AF      (input) COMPLEX array, dimension (LDAF,N)
+*          The factors L and U from the factorization A = P*L*U
+*          as computed by CGETRF.
+*
+*  LDAF    (input) INTEGER
+*          The leading dimension of the array AF.  LDAF >= max(1,N).
+*
+*  IPIV    (input) INTEGER array, dimension (N)
+*          The pivot indices from CGETRF; for 1<=i<=N, row i of the
+*          matrix was interchanged with row IPIV(i).
+*
+*  B       (input) COMPLEX array, dimension (LDB,NRHS)
+*          The right hand side matrix B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  X       (input/output) COMPLEX array, dimension (LDX,NRHS)
+*          On entry, the solution matrix X, as computed by CGETRS.
+*          On exit, the improved solution matrix X.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(1,N).
+*
+*  FERR    (output) REAL array, dimension (NRHS)
+*          The estimated forward error bound for each solution vector
+*          X(j) (the j-th column of the solution matrix X).
+*          If XTRUE is the true solution corresponding to X(j), FERR(j)
+*          is an estimated upper bound for the magnitude of the largest
+*          element in (X(j) - XTRUE) divided by the magnitude of the
+*          largest element in X(j).  The estimate is as reliable as
+*          the estimate for RCOND, and is almost always a slight
+*          overestimate of the true error.
+*
+*  BERR    (output) REAL array, dimension (NRHS)
+*          The componentwise relative backward error of each solution
+*          vector X(j) (i.e., the smallest relative change in
+*          any element of A or B that makes X(j) an exact solution).
+*
+*  WORK    (workspace) COMPLEX array, dimension (2*N)
+*
+*  RWORK   (workspace) REAL array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Internal Parameters
+*  ===================
+*
+*  ITMAX is the maximum number of steps of iterative refinement.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            ITMAX
+      PARAMETER          ( ITMAX = 5 )
+      REAL               ZERO
+      PARAMETER          ( ZERO = 0.0E+0 )
+      COMPLEX            ONE
+      PARAMETER          ( ONE = ( 1.0E+0, 0.0E+0 ) )
+      REAL               TWO
+      PARAMETER          ( TWO = 2.0E+0 )
+      REAL               THREE
+      PARAMETER          ( THREE = 3.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOTRAN
+      CHARACTER          TRANSN, TRANST
+      INTEGER            COUNT, I, J, K, KASE, NZ
+      REAL               EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK
+      COMPLEX            ZDUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               SLAMCH
+      EXTERNAL           LSAME, SLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CAXPY, CCOPY, CGEMV, CGETRS, CLACON, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, AIMAG, MAX, REAL
+*     ..
+*     .. Statement Functions ..
+      REAL               CABS1
+*     ..
+*     .. Statement Function definitions ..
+      CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      NOTRAN = LSAME( TRANS, 'N' )
+      IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT.
+     $    LSAME( TRANS, 'C' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -10
+      ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+         INFO = -12
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CGERFS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN
+         DO 10 J = 1, NRHS
+            FERR( J ) = ZERO
+            BERR( J ) = ZERO
+   10    CONTINUE
+         RETURN
+      END IF
+*
+      IF( NOTRAN ) THEN
+         TRANSN = 'N'
+         TRANST = 'C'
+      ELSE
+         TRANSN = 'C'
+         TRANST = 'N'
+      END IF
+*
+*     NZ = maximum number of nonzero elements in each row of A, plus 1
+*
+      NZ = N + 1
+      EPS = SLAMCH( 'Epsilon' )
+      SAFMIN = SLAMCH( 'Safe minimum' )
+      SAFE1 = NZ*SAFMIN
+      SAFE2 = SAFE1 / EPS
+*
+*     Do for each right hand side
+*
+      DO 140 J = 1, NRHS
+*
+         COUNT = 1
+         LSTRES = THREE
+   20    CONTINUE
+*
+*        Loop until stopping criterion is satisfied.
+*
+*        Compute residual R = B - op(A) * X,
+*        where op(A) = A, A**T, or A**H, depending on TRANS.
+*
+         CALL CCOPY( N, B( 1, J ), 1, WORK, 1 )
+         CALL CGEMV( TRANS, N, N, -ONE, A, LDA, X( 1, J ), 1, ONE, WORK,
+     $               1 )
+*
+*        Compute componentwise relative backward error from formula
+*
+*        max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) )
+*
+*        where abs(Z) is the componentwise absolute value of the matrix
+*        or vector Z.  If the i-th component of the denominator is less
+*        than SAFE2, then SAFE1 is added to the i-th components of the
+*        numerator and denominator before dividing.
+*
+         DO 30 I = 1, N
+            RWORK( I ) = CABS1( B( I, J ) )
+   30    CONTINUE
+*
+*        Compute abs(op(A))*abs(X) + abs(B).
+*
+         IF( NOTRAN ) THEN
+            DO 50 K = 1, N
+               XK = CABS1( X( K, J ) )
+               DO 40 I = 1, N
+                  RWORK( I ) = RWORK( I ) + CABS1( A( I, K ) )*XK
+   40          CONTINUE
+   50       CONTINUE
+         ELSE
+            DO 70 K = 1, N
+               S = ZERO
+               DO 60 I = 1, N
+                  S = S + CABS1( A( I, K ) )*CABS1( X( I, J ) )
+   60          CONTINUE
+               RWORK( K ) = RWORK( K ) + S
+   70       CONTINUE
+         END IF
+         S = ZERO
+         DO 80 I = 1, N
+            IF( RWORK( I ).GT.SAFE2 ) THEN
+               S = MAX( S, CABS1( WORK( I ) ) / RWORK( I ) )
+            ELSE
+               S = MAX( S, ( CABS1( WORK( I ) )+SAFE1 ) /
+     $             ( RWORK( I )+SAFE1 ) )
+            END IF
+   80    CONTINUE
+         BERR( J ) = S
+*
+*        Test stopping criterion. Continue iterating if
+*           1) The residual BERR(J) is larger than machine epsilon, and
+*           2) BERR(J) decreased by at least a factor of 2 during the
+*              last iteration, and
+*           3) At most ITMAX iterations tried.
+*
+         IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND.
+     $       COUNT.LE.ITMAX ) THEN
+*
+*           Update solution and try again.
+*
+            CALL CGETRS( TRANS, N, 1, AF, LDAF, IPIV, WORK, N, INFO )
+            CALL CAXPY( N, ONE, WORK, 1, X( 1, J ), 1 )
+            LSTRES = BERR( J )
+            COUNT = COUNT + 1
+            GO TO 20
+         END IF
+*
+*        Bound error from formula
+*
+*        norm(X - XTRUE) / norm(X) .le. FERR =
+*        norm( abs(inv(op(A)))*
+*           ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X)
+*
+*        where
+*          norm(Z) is the magnitude of the largest component of Z
+*          inv(op(A)) is the inverse of op(A)
+*          abs(Z) is the componentwise absolute value of the matrix or
+*             vector Z
+*          NZ is the maximum number of nonzeros in any row of A, plus 1
+*          EPS is machine epsilon
+*
+*        The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B))
+*        is incremented by SAFE1 if the i-th component of
+*        abs(op(A))*abs(X) + abs(B) is less than SAFE2.
+*
+*        Use CLACON to estimate the infinity-norm of the matrix
+*           inv(op(A)) * diag(W),
+*        where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) )))
+*
+         DO 90 I = 1, N
+            IF( RWORK( I ).GT.SAFE2 ) THEN
+               RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I )
+            ELSE
+               RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) +
+     $                      SAFE1
+            END IF
+   90    CONTINUE
+*
+         KASE = 0
+  100    CONTINUE
+         CALL CLACON( N, WORK( N+1 ), WORK, FERR( J ), KASE )
+         IF( KASE.NE.0 ) THEN
+            IF( KASE.EQ.1 ) THEN
+*
+*              Multiply by diag(W)*inv(op(A)**H).
+*
+               CALL CGETRS( TRANST, N, 1, AF, LDAF, IPIV, WORK, N,
+     $                      INFO )
+               DO 110 I = 1, N
+                  WORK( I ) = RWORK( I )*WORK( I )
+  110          CONTINUE
+            ELSE
+*
+*              Multiply by inv(op(A))*diag(W).
+*
+               DO 120 I = 1, N
+                  WORK( I ) = RWORK( I )*WORK( I )
+  120          CONTINUE
+               CALL CGETRS( TRANSN, N, 1, AF, LDAF, IPIV, WORK, N,
+     $                      INFO )
+            END IF
+            GO TO 100
+         END IF
+*
+*        Normalize error.
+*
+         LSTRES = ZERO
+         DO 130 I = 1, N
+            LSTRES = MAX( LSTRES, CABS1( X( I, J ) ) )
+  130    CONTINUE
+         IF( LSTRES.NE.ZERO )
+     $      FERR( J ) = FERR( J ) / LSTRES
+*
+  140 CONTINUE
+*
+      RETURN
+*
+*     End of CGERFS
+*
+      END
diff --git a/costa/native/external/lapack/cgerq2.f b/costa/native/external/lapack/cgerq2.f
new file mode 100644
index 000000000..024d633f4
--- /dev/null
+++ b/costa/native/external/lapack/cgerq2.f
@@ -0,0 +1,125 @@
+      SUBROUTINE CGERQ2( M, N, A, LDA, TAU, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, M, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CGERQ2 computes an RQ factorization of a complex m by n matrix A:
+*  A = R * Q.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,N)
+*          On entry, the m by n matrix A.
+*          On exit, if m <= n, the upper triangle of the subarray
+*          A(1:m,n-m+1:n) contains the m by m upper triangular matrix R;
+*          if m >= n, the elements on and above the (m-n)-th subdiagonal
+*          contain the m by n upper trapezoidal matrix R; the remaining
+*          elements, with the array TAU, represent the unitary matrix
+*          Q as a product of elementary reflectors (see Further
+*          Details).
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  TAU     (output) COMPLEX array, dimension (min(M,N))
+*          The scalar factors of the elementary reflectors (see Further
+*          Details).
+*
+*  WORK    (workspace) COMPLEX array, dimension (M)
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*
+*  Further Details
+*  ===============
+*
+*  The matrix Q is represented as a product of elementary reflectors
+*
+*     Q = H(1)' H(2)' . . . H(k)', where k = min(m,n).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - tau * v * v'
+*
+*  where tau is a complex scalar, and v is a complex vector with
+*  v(n-k+i+1:n) = 0 and v(n-k+i) = 1; conjg(v(1:n-k+i-1)) is stored on
+*  exit in A(m-k+i,1:n-k+i-1), and tau in TAU(i).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX            ONE
+      PARAMETER          ( ONE = ( 1.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, K
+      COMPLEX            ALPHA
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CLACGV, CLARF, CLARFG, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CGERQ2', -INFO )
+         RETURN
+      END IF
+*
+      K = MIN( M, N )
+*
+      DO 10 I = K, 1, -1
+*
+*        Generate elementary reflector H(i) to annihilate
+*        A(m-k+i,1:n-k+i-1)
+*
+         CALL CLACGV( N-K+I, A( M-K+I, 1 ), LDA )
+         ALPHA = A( M-K+I, N-K+I )
+         CALL CLARFG( N-K+I, ALPHA, A( M-K+I, 1 ), LDA,
+     $                TAU( I ) )
+*
+*        Apply H(i) to A(1:m-k+i-1,1:n-k+i) from the right
+*
+         A( M-K+I, N-K+I ) = ONE
+         CALL CLARF( 'Right', M-K+I-1, N-K+I, A( M-K+I, 1 ), LDA,
+     $               TAU( I ), A, LDA, WORK )
+         A( M-K+I, N-K+I ) = ALPHA
+         CALL CLACGV( N-K+I-1, A( M-K+I, 1 ), LDA )
+   10 CONTINUE
+      RETURN
+*
+*     End of CGERQ2
+*
+      END
diff --git a/costa/native/external/lapack/cgerqf.f b/costa/native/external/lapack/cgerqf.f
new file mode 100644
index 000000000..2091a8201
--- /dev/null
+++ b/costa/native/external/lapack/cgerqf.f
@@ -0,0 +1,205 @@
+      SUBROUTINE CGERQF( M, N, A, LDA, TAU, WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, LWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CGERQF computes an RQ factorization of a complex M-by-N matrix A:
+*  A = R * Q.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,N)
+*          On entry, the M-by-N matrix A.
+*          On exit,
+*          if m <= n, the upper triangle of the subarray
+*          A(1:m,n-m+1:n) contains the M-by-M upper triangular matrix R;
+*          if m >= n, the elements on and above the (m-n)-th subdiagonal
+*          contain the M-by-N upper trapezoidal matrix R;
+*          the remaining elements, with the array TAU, represent the
+*          unitary matrix Q as a product of min(m,n) elementary
+*          reflectors (see Further Details).
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  TAU     (output) COMPLEX array, dimension (min(M,N))
+*          The scalar factors of the elementary reflectors (see Further
+*          Details).
+*
+*  WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.  LWORK >= max(1,M).
+*          For optimum performance LWORK >= M*NB, where NB is
+*          the optimal blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Further Details
+*  ===============
+*
+*  The matrix Q is represented as a product of elementary reflectors
+*
+*     Q = H(1)' H(2)' . . . H(k)', where k = min(m,n).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - tau * v * v'
+*
+*  where tau is a complex scalar, and v is a complex vector with
+*  v(n-k+i+1:n) = 0 and v(n-k+i) = 1; conjg(v(1:n-k+i-1)) is stored on
+*  exit in A(m-k+i,1:n-k+i-1), and tau in TAU(i).
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            I, IB, IINFO, IWS, K, KI, KK, LDWORK, LWKOPT,
+     $                   MU, NB, NBMIN, NU, NX
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CGERQ2, CLARFB, CLARFT, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      NB = ILAENV( 1, 'CGERQF', ' ', M, N, -1, -1 )
+      LWKOPT = M*NB
+      WORK( 1 ) = LWKOPT
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -4
+      ELSE IF( LWORK.LT.MAX( 1, M ) .AND. .NOT.LQUERY ) THEN
+         INFO = -7
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CGERQF', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      K = MIN( M, N )
+      IF( K.EQ.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      NBMIN = 2
+      NX = 1
+      IWS = M
+      IF( NB.GT.1 .AND. NB.LT.K ) THEN
+*
+*        Determine when to cross over from blocked to unblocked code.
+*
+         NX = MAX( 0, ILAENV( 3, 'CGERQF', ' ', M, N, -1, -1 ) )
+         IF( NX.LT.K ) THEN
+*
+*           Determine if workspace is large enough for blocked code.
+*
+            LDWORK = M
+            IWS = LDWORK*NB
+            IF( LWORK.LT.IWS ) THEN
+*
+*              Not enough workspace to use optimal NB:  reduce NB and
+*              determine the minimum value of NB.
+*
+               NB = LWORK / LDWORK
+               NBMIN = MAX( 2, ILAENV( 2, 'CGERQF', ' ', M, N, -1,
+     $                 -1 ) )
+            END IF
+         END IF
+      END IF
+*
+      IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN
+*
+*        Use blocked code initially.
+*        The last kk rows are handled by the block method.
+*
+         KI = ( ( K-NX-1 ) / NB )*NB
+         KK = MIN( K, KI+NB )
+*
+         DO 10 I = K - KK + KI + 1, K - KK + 1, -NB
+            IB = MIN( K-I+1, NB )
+*
+*           Compute the RQ factorization of the current block
+*           A(m-k+i:m-k+i+ib-1,1:n-k+i+ib-1)
+*
+            CALL CGERQ2( IB, N-K+I+IB-1, A( M-K+I, 1 ), LDA, TAU( I ),
+     $                   WORK, IINFO )
+            IF( M-K+I.GT.1 ) THEN
+*
+*              Form the triangular factor of the block reflector
+*              H = H(i+ib-1) . . . H(i+1) H(i)
+*
+               CALL CLARFT( 'Backward', 'Rowwise', N-K+I+IB-1, IB,
+     $                      A( M-K+I, 1 ), LDA, TAU( I ), WORK, LDWORK )
+*
+*              Apply H to A(1:m-k+i-1,1:n-k+i+ib-1) from the right
+*
+               CALL CLARFB( 'Right', 'No transpose', 'Backward',
+     $                      'Rowwise', M-K+I-1, N-K+I+IB-1, IB,
+     $                      A( M-K+I, 1 ), LDA, WORK, LDWORK, A, LDA,
+     $                      WORK( IB+1 ), LDWORK )
+            END IF
+   10    CONTINUE
+         MU = M - K + I + NB - 1
+         NU = N - K + I + NB - 1
+      ELSE
+         MU = M
+         NU = N
+      END IF
+*
+*     Use unblocked code to factor the last or only block
+*
+      IF( MU.GT.0 .AND. NU.GT.0 )
+     $   CALL CGERQ2( MU, NU, A, LDA, TAU, WORK, IINFO )
+*
+      WORK( 1 ) = IWS
+      RETURN
+*
+*     End of CGERQF
+*
+      END
diff --git a/costa/native/external/lapack/cgesc2.f b/costa/native/external/lapack/cgesc2.f
new file mode 100644
index 000000000..236494f56
--- /dev/null
+++ b/costa/native/external/lapack/cgesc2.f
@@ -0,0 +1,134 @@
+      SUBROUTINE CGESC2( N, A, LDA, RHS, IPIV, JPIV, SCALE )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            LDA, N
+      REAL               SCALE
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * ), JPIV( * )
+      COMPLEX            A( LDA, * ), RHS( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CGESC2 solves a system of linear equations
+*
+*            A * X = scale* RHS
+*
+*  with a general N-by-N matrix A using the LU factorization with
+*  complete pivoting computed by CGETC2.
+*
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.
+*
+*  A       (input) COMPLEX array, dimension (LDA, N)
+*          On entry, the  LU part of the factorization of the n-by-n
+*          matrix A computed by CGETC2:  A = P * L * U * Q
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1, N).
+*
+*  RHS     (input/output) COMPLEX array, dimension N.
+*          On entry, the right hand side vector b.
+*          On exit, the solution vector X.
+*
+*  IPIV    (iput) INTEGER array, dimension (N).
+*          The pivot indices; for 1 <= i <= N, row i of the
+*          matrix has been interchanged with row IPIV(i).
+*
+*  JPIV    (iput) INTEGER array, dimension (N).
+*          The pivot indices; for 1 <= j <= N, column j of the
+*          matrix has been interchanged with column JPIV(j).
+*
+*  SCALE    (output) REAL
+*           On exit, SCALE contains the scale factor. SCALE is chosen
+*           0 <= SCALE <= 1 to prevent owerflow in the solution.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Bo Kagstrom and Peter Poromaa, Department of Computing Science,
+*     Umea University, S-901 87 Umea, Sweden.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE, TWO
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0, TWO = 2.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J
+      REAL               BIGNUM, EPS, SMLNUM
+      COMPLEX            TEMP
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CLASWP, CSCAL, SLABAD
+*     ..
+*     .. External Functions ..
+      INTEGER            ICAMAX
+      REAL               SLAMCH
+      EXTERNAL           ICAMAX, SLAMCH
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, CMPLX, REAL
+*     ..
+*     .. Executable Statements ..
+*
+*     Set constant to control overflow
+*
+      EPS = SLAMCH( 'P' )
+      SMLNUM = SLAMCH( 'S' ) / EPS
+      BIGNUM = ONE / SMLNUM
+      CALL SLABAD( SMLNUM, BIGNUM )
+*
+*     Apply permutations IPIV to RHS
+*
+      CALL CLASWP( 1, RHS, LDA, 1, N-1, IPIV, 1 )
+*
+*     Solve for L part
+*
+      DO 20 I = 1, N - 1
+         DO 10 J = I + 1, N
+            RHS( J ) = RHS( J ) - A( J, I )*RHS( I )
+   10    CONTINUE
+   20 CONTINUE
+*
+*     Solve for U part
+*
+      SCALE = ONE
+*
+*     Check for scaling
+*
+      I = ICAMAX( N, RHS, 1 )
+      IF( TWO*SMLNUM*ABS( RHS( I ) ).GT.ABS( A( N, N ) ) ) THEN
+         TEMP = CMPLX( ONE / TWO, ZERO ) / ABS( RHS( I ) )
+         CALL CSCAL( N, TEMP, RHS( 1 ), 1 )
+         SCALE = SCALE*REAL( TEMP )
+      END IF
+      DO 40 I = N, 1, -1
+         TEMP = CMPLX( ONE, ZERO ) / A( I, I )
+         RHS( I ) = RHS( I )*TEMP
+         DO 30 J = I + 1, N
+            RHS( I ) = RHS( I ) - RHS( J )*( A( I, J )*TEMP )
+   30    CONTINUE
+   40 CONTINUE
+*
+*     Apply permutations JPIV to the solution (RHS)
+*
+      CALL CLASWP( 1, RHS, LDA, 1, N-1, JPIV, -1 )
+      RETURN
+*
+*     End of CGESC2
+*
+      END
diff --git a/costa/native/external/lapack/cgesdd.f b/costa/native/external/lapack/cgesdd.f
new file mode 100644
index 000000000..d9445dc94
--- /dev/null
+++ b/costa/native/external/lapack/cgesdd.f
@@ -0,0 +1,1950 @@
+      SUBROUTINE CGESDD( JOBZ, M, N, A, LDA, S, U, LDU, VT, LDVT, WORK,
+     $                   LWORK, RWORK, IWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBZ
+      INTEGER            INFO, LDA, LDU, LDVT, LWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      REAL               RWORK( * ), S( * )
+      COMPLEX            A( LDA, * ), U( LDU, * ), VT( LDVT, * ),
+     $                   WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CGESDD computes the singular value decomposition (SVD) of a complex
+*  M-by-N matrix A, optionally computing the left and/or right singular
+*  vectors, by using divide-and-conquer method. The SVD is written
+*
+*       A = U * SIGMA * conjugate-transpose(V)
+*
+*  where SIGMA is an M-by-N matrix which is zero except for its
+*  min(m,n) diagonal elements, U is an M-by-M unitary matrix, and
+*  V is an N-by-N unitary matrix.  The diagonal elements of SIGMA
+*  are the singular values of A; they are real and non-negative, and
+*  are returned in descending order.  The first min(m,n) columns of
+*  U and V are the left and right singular vectors of A.
+*
+*  Note that the routine returns VT = V**H, not V.
+*
+*  The divide and conquer algorithm makes very mild assumptions about
+*  floating point arithmetic. It will work on machines with a guard
+*  digit in add/subtract, or on those binary machines without guard
+*  digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
+*  Cray-2. It could conceivably fail on hexadecimal or decimal machines
+*  without guard digits, but we know of none.
+*
+*  Arguments
+*  =========
+*
+*  JOBZ    (input) CHARACTER*1
+*          Specifies options for computing all or part of the matrix U:
+*          = 'A':  all M columns of U and all N rows of V**H are
+*                  returned in the arrays U and VT;
+*          = 'S':  the first min(M,N) columns of U and the first
+*                  min(M,N) rows of V**H are returned in the arrays U
+*                  and VT;
+*          = 'O':  If M >= N, the first N columns of U are overwritten
+*                  on the array A and all rows of V**H are returned in
+*                  the array VT;
+*                  otherwise, all columns of U are returned in the
+*                  array U and the first M rows of V**H are overwritten
+*                  in the array VT;
+*          = 'N':  no columns of U or rows of V**H are computed.
+*
+*  M       (input) INTEGER
+*          The number of rows of the input matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the input matrix A.  N >= 0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,N)
+*          On entry, the M-by-N matrix A.
+*          On exit,
+*          if JOBZ = 'O',  A is overwritten with the first N columns
+*                          of U (the left singular vectors, stored
+*                          columnwise) if M >= N;
+*                          A is overwritten with the first M rows
+*                          of V**H (the right singular vectors, stored
+*                          rowwise) otherwise.
+*          if JOBZ .ne. 'O', the contents of A are destroyed.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  S       (output) REAL array, dimension (min(M,N))
+*          The singular values of A, sorted so that S(i) >= S(i+1).
+*
+*  U       (output) COMPLEX array, dimension (LDU,UCOL)
+*          UCOL = M if JOBZ = 'A' or JOBZ = 'O' and M < N;
+*          UCOL = min(M,N) if JOBZ = 'S'.
+*          If JOBZ = 'A' or JOBZ = 'O' and M < N, U contains the M-by-M
+*          unitary matrix U;
+*          if JOBZ = 'S', U contains the first min(M,N) columns of U
+*          (the left singular vectors, stored columnwise);
+*          if JOBZ = 'O' and M >= N, or JOBZ = 'N', U is not referenced.
+*
+*  LDU     (input) INTEGER
+*          The leading dimension of the array U.  LDU >= 1; if
+*          JOBZ = 'S' or 'A' or JOBZ = 'O' and M < N, LDU >= M.
+*
+*  VT      (output) COMPLEX array, dimension (LDVT,N)
+*          If JOBZ = 'A' or JOBZ = 'O' and M >= N, VT contains the
+*          N-by-N unitary matrix V**H;
+*          if JOBZ = 'S', VT contains the first min(M,N) rows of
+*          V**H (the right singular vectors, stored rowwise);
+*          if JOBZ = 'O' and M < N, or JOBZ = 'N', VT is not referenced.
+*
+*  LDVT    (input) INTEGER
+*          The leading dimension of the array VT.  LDVT >= 1; if
+*          JOBZ = 'A' or JOBZ = 'O' and M >= N, LDVT >= N;
+*          if JOBZ = 'S', LDVT >= min(M,N).
+*
+*  WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK. LWORK >= 1.
+*          if JOBZ = 'N', LWORK >= 2*min(M,N)+max(M,N).
+*          if JOBZ = 'O',
+*                LWORK >= 2*min(M,N)*min(M,N)+2*min(M,N)+max(M,N).
+*          if JOBZ = 'S' or 'A',
+*                LWORK >= min(M,N)*min(M,N)+2*min(M,N)+max(M,N).
+*          For good performance, LWORK should generally be larger.
+*          If LWORK < 0 but other input arguments are legal, WORK(1)
+*          returns the optimal LWORK.
+*
+*  RWORK   (workspace) REAL array, dimension (LRWORK)
+*          If JOBZ = 'N', LRWORK >= 7*min(M,N).
+*          Otherwise, LRWORK >= 5*min(M,N)*min(M,N) + 5*min(M,N)
+*
+*  IWORK   (workspace) INTEGER array, dimension (8*min(M,N))
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          > 0:  The updating process of SBDSDC did not converge.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Ming Gu and Huan Ren, Computer Science Division, University of
+*     California at Berkeley, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX            CZERO, CONE
+      PARAMETER          ( CZERO = ( 0.0E0, 0.0E0 ),
+     $                   CONE = ( 1.0E0, 0.0E0 ) )
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E0, ONE = 1.0E0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY, WNTQA, WNTQAS, WNTQN, WNTQO, WNTQS
+      INTEGER            BLK, CHUNK, I, IE, IERR, IL, IR, IRU, IRVT,
+     $                   ISCL, ITAU, ITAUP, ITAUQ, IU, IVT, LDWKVT,
+     $                   LDWRKL, LDWRKR, LDWRKU, MAXWRK, MINMN, MINWRK,
+     $                   MNTHR1, MNTHR2, NRWORK, NWORK, WRKBL
+      REAL               ANRM, BIGNUM, EPS, SMLNUM
+*     ..
+*     .. Local Arrays ..
+      INTEGER            IDUM( 1 )
+      REAL               DUM( 1 )
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CGEBRD, CGELQF, CGEMM, CGEQRF, CLACP2, CLACPY, 
+     $                   CLACRM, CLARCM, CLASCL, CLASET, CUNGBR, CUNGLQ, 
+     $                   CUNGQR, CUNMBR, SBDSDC, SLASCL, XERBLA
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      REAL               CLANGE, SLAMCH
+      EXTERNAL           CLANGE, ILAENV, LSAME, SLAMCH
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          INT, MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      MINMN = MIN( M, N )
+      MNTHR1 = INT( MINMN*17.0E0 / 9.0E0 )
+      MNTHR2 = INT( MINMN*5.0E0 / 3.0E0 )
+      WNTQA = LSAME( JOBZ, 'A' )
+      WNTQS = LSAME( JOBZ, 'S' )
+      WNTQAS = WNTQA .OR. WNTQS
+      WNTQO = LSAME( JOBZ, 'O' )
+      WNTQN = LSAME( JOBZ, 'N' )
+      MINWRK = 1
+      MAXWRK = 1
+      LQUERY = ( LWORK.EQ.-1 )
+*
+      IF( .NOT.( WNTQA .OR. WNTQS .OR. WNTQO .OR. WNTQN ) ) THEN
+         INFO = -1
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -5
+      ELSE IF( LDU.LT.1 .OR. ( WNTQAS .AND. LDU.LT.M ) .OR.
+     $         ( WNTQO .AND. M.LT.N .AND. LDU.LT.M ) ) THEN
+         INFO = -8
+      ELSE IF( LDVT.LT.1 .OR. ( WNTQA .AND. LDVT.LT.N ) .OR.
+     $         ( WNTQS .AND. LDVT.LT.MINMN ) .OR.
+     $         ( WNTQO .AND. M.GE.N .AND. LDVT.LT.N ) ) THEN
+         INFO = -10
+      END IF
+*
+*     Compute workspace
+*      (Note: Comments in the code beginning "Workspace:" describe the
+*       minimal amount of workspace needed at that point in the code,
+*       as well as the preferred amount for good performance.
+*       CWorkspace refers to complex workspace, and RWorkspace to
+*       real workspace. NB refers to the optimal block size for the
+*       immediately following subroutine, as returned by ILAENV.)
+*
+      IF( INFO.EQ.0 .AND. M.GT.0 .AND. N.GT.0 ) THEN
+         IF( M.GE.N ) THEN
+*
+*           There is no complex work space needed for bidiagonal SVD
+*           The real work space needed for bidiagonal SVD is BDSPAC,
+*           BDSPAC = 3*N*N + 4*N
+*
+            IF( M.GE.MNTHR1 ) THEN
+               IF( WNTQN ) THEN
+*
+*                 Path 1 (M much larger than N, JOBZ='N')
+*
+                  WRKBL = N + N*ILAENV( 1, 'CGEQRF', ' ', M, N, -1,
+     $                    -1 )
+                  WRKBL = MAX( WRKBL, 2*N+2*N*
+     $                    ILAENV( 1, 'CGEBRD', ' ', N, N, -1, -1 ) )
+                  MAXWRK = WRKBL
+                  MINWRK = 3*N
+               ELSE IF( WNTQO ) THEN
+*
+*                 Path 2 (M much larger than N, JOBZ='O')
+*
+                  WRKBL = N + N*ILAENV( 1, 'CGEQRF', ' ', M, N, -1, -1 )
+                  WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'CUNGQR', ' ', M,
+     $                    N, N, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*N+2*N*
+     $                    ILAENV( 1, 'CGEBRD', ' ', N, N, -1, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*N+N*
+     $                    ILAENV( 1, 'CUNMBR', 'QLN', N, N, N, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*N+N*
+     $                    ILAENV( 1, 'CUNMBR', 'PRC', N, N, N, -1 ) )
+                  MAXWRK = M*N + N*N + WRKBL
+                  MINWRK = 2*N*N + 3*N
+               ELSE IF( WNTQS ) THEN
+*
+*                 Path 3 (M much larger than N, JOBZ='S')
+*
+                  WRKBL = N + N*ILAENV( 1, 'CGEQRF', ' ', M, N, -1, -1 )
+                  WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'CUNGQR', ' ', M,
+     $                    N, N, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*N+2*N*
+     $                    ILAENV( 1, 'CGEBRD', ' ', N, N, -1, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*N+N*
+     $                    ILAENV( 1, 'CUNMBR', 'QLN', N, N, N, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*N+N*
+     $                    ILAENV( 1, 'CUNMBR', 'PRC', N, N, N, -1 ) )
+                  MAXWRK = N*N + WRKBL
+                  MINWRK = N*N + 3*N
+               ELSE IF( WNTQA ) THEN
+*
+*                 Path 4 (M much larger than N, JOBZ='A')
+*
+                  WRKBL = N + N*ILAENV( 1, 'CGEQRF', ' ', M, N, -1, -1 )
+                  WRKBL = MAX( WRKBL, N+M*ILAENV( 1, 'CUNGQR', ' ', M,
+     $                    M, N, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*N+2*N*
+     $                    ILAENV( 1, 'CGEBRD', ' ', N, N, -1, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*N+N*
+     $                    ILAENV( 1, 'CUNMBR', 'QLN', N, N, N, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*N+N*
+     $                    ILAENV( 1, 'CUNMBR', 'PRC', N, N, N, -1 ) )
+                  MAXWRK = N*N + WRKBL
+                  MINWRK = N*N + 2*N + M
+               END IF
+            ELSE IF( M.GE.MNTHR2 ) THEN
+*
+*              Path 5 (M much larger than N, but not as much as MNTHR1)
+*
+               MAXWRK = 2*N + ( M+N )*ILAENV( 1, 'CGEBRD', ' ', M, N,
+     $                  -1, -1 )
+               MINWRK = 2*N + M
+               IF( WNTQO ) THEN
+                  MAXWRK = MAX( MAXWRK, 2*N+N*
+     $                     ILAENV( 1, 'CUNGBR', 'P', N, N, N, -1 ) )
+                  MAXWRK = MAX( MAXWRK, 2*N+N*
+     $                     ILAENV( 1, 'CUNGBR', 'Q', M, N, N, -1 ) )
+                  MAXWRK = MAXWRK + M*N
+                  MINWRK = MINWRK + N*N
+               ELSE IF( WNTQS ) THEN
+                  MAXWRK = MAX( MAXWRK, 2*N+N*
+     $                     ILAENV( 1, 'CUNGBR', 'P', N, N, N, -1 ) )
+                  MAXWRK = MAX( MAXWRK, 2*N+N*
+     $                     ILAENV( 1, 'CUNGBR', 'Q', M, N, N, -1 ) )
+               ELSE IF( WNTQA ) THEN
+                  MAXWRK = MAX( MAXWRK, 2*N+N*
+     $                     ILAENV( 1, 'CUNGBR', 'P', N, N, N, -1 ) )
+                  MAXWRK = MAX( MAXWRK, 2*N+M*
+     $                     ILAENV( 1, 'CUNGBR', 'Q', M, M, N, -1 ) )
+               END IF
+            ELSE
+*
+*              Path 6 (M at least N, but not much larger)
+*
+               MAXWRK = 2*N + ( M+N )*ILAENV( 1, 'CGEBRD', ' ', M, N,
+     $                  -1, -1 )
+               MINWRK = 2*N + M
+               IF( WNTQO ) THEN
+                  MAXWRK = MAX( MAXWRK, 2*N+N*
+     $                     ILAENV( 1, 'CUNMBR', 'PRC', N, N, N, -1 ) )
+                  MAXWRK = MAX( MAXWRK, 2*N+N*
+     $                     ILAENV( 1, 'CUNMBR', 'QLN', M, N, N, -1 ) )
+                  MAXWRK = MAXWRK + M*N
+                  MINWRK = MINWRK + N*N
+               ELSE IF( WNTQS ) THEN
+                  MAXWRK = MAX( MAXWRK, 2*N+N*
+     $                     ILAENV( 1, 'CUNMBR', 'PRC', N, N, N, -1 ) )
+                  MAXWRK = MAX( MAXWRK, 2*N+N*
+     $                     ILAENV( 1, 'CUNMBR', 'QLN', M, N, N, -1 ) )
+               ELSE IF( WNTQA ) THEN
+                  MAXWRK = MAX( MAXWRK, 2*N+N*
+     $                     ILAENV( 1, 'CUNGBR', 'PRC', N, N, N, -1 ) )
+                  MAXWRK = MAX( MAXWRK, 2*N+M*
+     $                     ILAENV( 1, 'CUNGBR', 'QLN', M, M, N, -1 ) )
+               END IF
+            END IF
+         ELSE
+*
+*           There is no complex work space needed for bidiagonal SVD
+*           The real work space needed for bidiagonal SVD is BDSPAC,
+*           BDSPAC = 3*M*M + 4*M
+*
+            IF( N.GE.MNTHR1 ) THEN
+               IF( WNTQN ) THEN
+*
+*                 Path 1t (N much larger than M, JOBZ='N')
+*
+                  MAXWRK = M + M*ILAENV( 1, 'CGELQF', ' ', M, N, -1,
+     $                     -1 )
+                  MAXWRK = MAX( MAXWRK, 2*M+2*M*
+     $                     ILAENV( 1, 'CGEBRD', ' ', M, M, -1, -1 ) )
+                  MINWRK = 3*M
+               ELSE IF( WNTQO ) THEN
+*
+*                 Path 2t (N much larger than M, JOBZ='O')
+*
+                  WRKBL = M + M*ILAENV( 1, 'CGELQF', ' ', M, N, -1, -1 )
+                  WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'CUNGLQ', ' ', M,
+     $                    N, M, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*M+2*M*
+     $                    ILAENV( 1, 'CGEBRD', ' ', M, M, -1, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*M+M*
+     $                    ILAENV( 1, 'CUNMBR', 'PRC', M, M, M, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*M+M*
+     $                    ILAENV( 1, 'CUNMBR', 'QLN', M, M, M, -1 ) )
+                  MAXWRK = M*N + M*M + WRKBL
+                  MINWRK = 2*M*M + 3*M
+               ELSE IF( WNTQS ) THEN
+*
+*                 Path 3t (N much larger than M, JOBZ='S')
+*
+                  WRKBL = M + M*ILAENV( 1, 'CGELQF', ' ', M, N, -1, -1 )
+                  WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'CUNGLQ', ' ', M,
+     $                    N, M, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*M+2*M*
+     $                    ILAENV( 1, 'CGEBRD', ' ', M, M, -1, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*M+M*
+     $                    ILAENV( 1, 'CUNMBR', 'PRC', M, M, M, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*M+M*
+     $                    ILAENV( 1, 'CUNMBR', 'QLN', M, M, M, -1 ) )
+                  MAXWRK = M*M + WRKBL
+                  MINWRK = M*M + 3*M
+               ELSE IF( WNTQA ) THEN
+*
+*                 Path 4t (N much larger than M, JOBZ='A')
+*
+                  WRKBL = M + M*ILAENV( 1, 'CGELQF', ' ', M, N, -1, -1 )
+                  WRKBL = MAX( WRKBL, M+N*ILAENV( 1, 'CUNGLQ', ' ', N,
+     $                    N, M, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*M+2*M*
+     $                    ILAENV( 1, 'CGEBRD', ' ', M, M, -1, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*M+M*
+     $                    ILAENV( 1, 'CUNMBR', 'PRC', M, M, M, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*M+M*
+     $                    ILAENV( 1, 'CUNMBR', 'QLN', M, M, M, -1 ) )
+                  MAXWRK = M*M + WRKBL
+                  MINWRK = M*M + 2*M + N
+               END IF
+            ELSE IF( N.GE.MNTHR2 ) THEN
+*
+*              Path 5t (N much larger than M, but not as much as MNTHR1)
+*
+               MAXWRK = 2*M + ( M+N )*ILAENV( 1, 'CGEBRD', ' ', M, N,
+     $                  -1, -1 )
+               MINWRK = 2*M + N
+               IF( WNTQO ) THEN
+                  MAXWRK = MAX( MAXWRK, 2*M+M*
+     $                     ILAENV( 1, 'CUNGBR', 'P', M, N, M, -1 ) )
+                  MAXWRK = MAX( MAXWRK, 2*M+M*
+     $                     ILAENV( 1, 'CUNGBR', 'Q', M, M, N, -1 ) )
+                  MAXWRK = MAXWRK + M*N
+                  MINWRK = MINWRK + M*M
+               ELSE IF( WNTQS ) THEN
+                  MAXWRK = MAX( MAXWRK, 2*M+M*
+     $                     ILAENV( 1, 'CUNGBR', 'P', M, N, M, -1 ) )
+                  MAXWRK = MAX( MAXWRK, 2*M+M*
+     $                     ILAENV( 1, 'CUNGBR', 'Q', M, M, N, -1 ) )
+               ELSE IF( WNTQA ) THEN
+                  MAXWRK = MAX( MAXWRK, 2*M+N*
+     $                     ILAENV( 1, 'CUNGBR', 'P', N, N, M, -1 ) )
+                  MAXWRK = MAX( MAXWRK, 2*M+M*
+     $                     ILAENV( 1, 'CUNGBR', 'Q', M, M, N, -1 ) )
+               END IF
+            ELSE
+*
+*              Path 6t (N greater than M, but not much larger)
+*
+               MAXWRK = 2*M + ( M+N )*ILAENV( 1, 'CGEBRD', ' ', M, N,
+     $                  -1, -1 )
+               MINWRK = 2*M + N
+               IF( WNTQO ) THEN
+                  MAXWRK = MAX( MAXWRK, 2*M+M*
+     $                     ILAENV( 1, 'CUNMBR', 'PRC', M, N, M, -1 ) )
+                  MAXWRK = MAX( MAXWRK, 2*M+M*
+     $                     ILAENV( 1, 'CUNMBR', 'QLN', M, M, N, -1 ) )
+                  MAXWRK = MAXWRK + M*N
+                  MINWRK = MINWRK + M*M
+               ELSE IF( WNTQS ) THEN
+                  MAXWRK = MAX( MAXWRK, 2*M+M*
+     $                     ILAENV( 1, 'CUNGBR', 'PRC', M, N, M, -1 ) )
+                  MAXWRK = MAX( MAXWRK, 2*M+M*
+     $                     ILAENV( 1, 'CUNGBR', 'QLN', M, M, N, -1 ) )
+               ELSE IF( WNTQA ) THEN
+                  MAXWRK = MAX( MAXWRK, 2*M+N*
+     $                     ILAENV( 1, 'CUNGBR', 'PRC', N, N, M, -1 ) )
+                  MAXWRK = MAX( MAXWRK, 2*M+M*
+     $                     ILAENV( 1, 'CUNGBR', 'QLN', M, M, N, -1 ) )
+               END IF
+            END IF
+         END IF
+         MAXWRK = MAX( MAXWRK, MINWRK )
+         WORK( 1 ) = MAXWRK
+      END IF
+*
+      IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN
+         INFO = -13
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CGESDD', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 ) THEN
+         IF( LWORK.GE.1 )
+     $      WORK( 1 ) = ONE
+         RETURN
+      END IF
+*
+*     Get machine constants
+*
+      EPS = SLAMCH( 'P' )
+      SMLNUM = SQRT( SLAMCH( 'S' ) ) / EPS
+      BIGNUM = ONE / SMLNUM
+*
+*     Scale A if max element outside range [SMLNUM,BIGNUM]
+*
+      ANRM = CLANGE( 'M', M, N, A, LDA, DUM )
+      ISCL = 0
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
+         ISCL = 1
+         CALL CLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, IERR )
+      ELSE IF( ANRM.GT.BIGNUM ) THEN
+         ISCL = 1
+         CALL CLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, IERR )
+      END IF
+*
+      IF( M.GE.N ) THEN
+*
+*        A has at least as many rows as columns. If A has sufficiently
+*        more rows than columns, first reduce using the QR
+*        decomposition (if sufficient workspace available)
+*
+         IF( M.GE.MNTHR1 ) THEN
+*
+            IF( WNTQN ) THEN
+*
+*              Path 1 (M much larger than N, JOBZ='N')
+*              No singular vectors to be computed
+*
+               ITAU = 1
+               NWORK = ITAU + N
+*
+*              Compute A=Q*R
+*              (CWorkspace: need 2*N, prefer N+N*NB)
+*              (RWorkspace: need 0)
+*
+               CALL CGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+*
+*              Zero out below R
+*
+               CALL CLASET( 'L', N-1, N-1, CZERO, CZERO, A( 2, 1 ),
+     $                      LDA )
+               IE = 1
+               ITAUQ = 1
+               ITAUP = ITAUQ + N
+               NWORK = ITAUP + N
+*
+*              Bidiagonalize R in A
+*              (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
+*              (RWorkspace: need N)
+*
+               CALL CGEBRD( N, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ),
+     $                      WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1,
+     $                      IERR )
+               NRWORK = IE + N
+*
+*              Perform bidiagonal SVD, compute singular values only
+*              (CWorkspace: 0)
+*              (RWorkspace: need BDSPAC)
+*
+               CALL SBDSDC( 'U', 'N', N, S, RWORK( IE ), DUM, 1, DUM, 1,
+     $                      DUM, IDUM, RWORK( NRWORK ), IWORK, INFO )
+*
+            ELSE IF( WNTQO ) THEN
+*
+*              Path 2 (M much larger than N, JOBZ='O')
+*              N left singular vectors to be overwritten on A and
+*              N right singular vectors to be computed in VT
+*
+               IU = 1
+*
+*              WORK(IU) is N by N
+*
+               LDWRKU = N
+               IR = IU + LDWRKU*N
+               IF( LWORK.GE.M*N+N*N+3*N ) THEN
+*
+*                 WORK(IR) is M by N
+*
+                  LDWRKR = M
+               ELSE
+                  LDWRKR = ( LWORK-N*N-3*N ) / N
+               END IF
+               ITAU = IR + LDWRKR*N
+               NWORK = ITAU + N
+*
+*              Compute A=Q*R
+*              (CWorkspace: need N*N+2*N, prefer M*N+N+N*NB)
+*              (RWorkspace: 0)
+*
+               CALL CGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+*
+*              Copy R to WORK( IR ), zeroing out below it
+*
+               CALL CLACPY( 'U', N, N, A, LDA, WORK( IR ), LDWRKR )
+               CALL CLASET( 'L', N-1, N-1, CZERO, CZERO, WORK( IR+1 ),
+     $                      LDWRKR )
+*
+*              Generate Q in A
+*              (CWorkspace: need 2*N, prefer N+N*NB)
+*              (RWorkspace: 0)
+*
+               CALL CUNGQR( M, N, N, A, LDA, WORK( ITAU ),
+     $                      WORK( NWORK ), LWORK-NWORK+1, IERR )
+               IE = 1
+               ITAUQ = ITAU
+               ITAUP = ITAUQ + N
+               NWORK = ITAUP + N
+*
+*              Bidiagonalize R in WORK(IR)
+*              (CWorkspace: need N*N+3*N, prefer M*N+2*N+2*N*NB)
+*              (RWorkspace: need N)
+*
+               CALL CGEBRD( N, N, WORK( IR ), LDWRKR, S, RWORK( IE ),
+     $                      WORK( ITAUQ ), WORK( ITAUP ), WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+*
+*              Perform bidiagonal SVD, computing left singular vectors
+*              of R in WORK(IRU) and computing right singular vectors
+*              of R in WORK(IRVT)
+*              (CWorkspace: need 0)
+*              (RWorkspace: need BDSPAC)
+*
+               IRU = IE + N
+               IRVT = IRU + N*N
+               NRWORK = IRVT + N*N
+               CALL SBDSDC( 'U', 'I', N, S, RWORK( IE ), RWORK( IRU ),
+     $                      N, RWORK( IRVT ), N, DUM, IDUM,
+     $                      RWORK( NRWORK ), IWORK, INFO )
+*
+*              Copy real matrix RWORK(IRU) to complex matrix WORK(IU)
+*              Overwrite WORK(IU) by the left singular vectors of R
+*              (CWorkspace: need 2*N*N+3*N, prefer M*N+N*N+2*N+N*NB)
+*              (RWorkspace: 0)
+*
+               CALL CLACP2( 'F', N, N, RWORK( IRU ), N, WORK( IU ),
+     $                      LDWRKU )
+               CALL CUNMBR( 'Q', 'L', 'N', N, N, N, WORK( IR ), LDWRKR,
+     $                      WORK( ITAUQ ), WORK( IU ), LDWRKU,
+     $                      WORK( NWORK ), LWORK-NWORK+1, IERR )
+*
+*              Copy real matrix RWORK(IRVT) to complex matrix VT
+*              Overwrite VT by the right singular vectors of R
+*              (CWorkspace: need N*N+3*N, prefer M*N+2*N+N*NB)
+*              (RWorkspace: 0)
+*
+               CALL CLACP2( 'F', N, N, RWORK( IRVT ), N, VT, LDVT )
+               CALL CUNMBR( 'P', 'R', 'C', N, N, N, WORK( IR ), LDWRKR,
+     $                      WORK( ITAUP ), VT, LDVT, WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+*
+*              Multiply Q in A by left singular vectors of R in
+*              WORK(IU), storing result in WORK(IR) and copying to A
+*              (CWorkspace: need 2*N*N, prefer N*N+M*N)
+*              (RWorkspace: 0)
+*
+               DO 10 I = 1, M, LDWRKR
+                  CHUNK = MIN( M-I+1, LDWRKR )
+                  CALL CGEMM( 'N', 'N', CHUNK, N, N, CONE, A( I, 1 ),
+     $                        LDA, WORK( IU ), LDWRKU, CZERO,
+     $                        WORK( IR ), LDWRKR )
+                  CALL CLACPY( 'F', CHUNK, N, WORK( IR ), LDWRKR,
+     $                         A( I, 1 ), LDA )
+   10          CONTINUE
+*
+            ELSE IF( WNTQS ) THEN
+*
+*              Path 3 (M much larger than N, JOBZ='S')
+*              N left singular vectors to be computed in U and
+*              N right singular vectors to be computed in VT
+*
+               IR = 1
+*
+*              WORK(IR) is N by N
+*
+               LDWRKR = N
+               ITAU = IR + LDWRKR*N
+               NWORK = ITAU + N
+*
+*              Compute A=Q*R
+*              (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
+*              (RWorkspace: 0)
+*
+               CALL CGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+*
+*              Copy R to WORK(IR), zeroing out below it
+*
+               CALL CLACPY( 'U', N, N, A, LDA, WORK( IR ), LDWRKR )
+               CALL CLASET( 'L', N-1, N-1, CZERO, CZERO, WORK( IR+1 ),
+     $                      LDWRKR )
+*
+*              Generate Q in A
+*              (CWorkspace: need 2*N, prefer N+N*NB)
+*              (RWorkspace: 0)
+*
+               CALL CUNGQR( M, N, N, A, LDA, WORK( ITAU ),
+     $                      WORK( NWORK ), LWORK-NWORK+1, IERR )
+               IE = 1
+               ITAUQ = ITAU
+               ITAUP = ITAUQ + N
+               NWORK = ITAUP + N
+*
+*              Bidiagonalize R in WORK(IR)
+*              (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
+*              (RWorkspace: need N)
+*
+               CALL CGEBRD( N, N, WORK( IR ), LDWRKR, S, RWORK( IE ),
+     $                      WORK( ITAUQ ), WORK( ITAUP ), WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+*
+*              Perform bidiagonal SVD, computing left singular vectors
+*              of bidiagonal matrix in RWORK(IRU) and computing right
+*              singular vectors of bidiagonal matrix in RWORK(IRVT)
+*              (CWorkspace: need 0)
+*              (RWorkspace: need BDSPAC)
+*
+               IRU = IE + N
+               IRVT = IRU + N*N
+               NRWORK = IRVT + N*N
+               CALL SBDSDC( 'U', 'I', N, S, RWORK( IE ), RWORK( IRU ),
+     $                      N, RWORK( IRVT ), N, DUM, IDUM,
+     $                      RWORK( NRWORK ), IWORK, INFO )
+*
+*              Copy real matrix RWORK(IRU) to complex matrix U
+*              Overwrite U by left singular vectors of R
+*              (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
+*              (RWorkspace: 0)
+*
+               CALL CLACP2( 'F', N, N, RWORK( IRU ), N, U, LDU )
+               CALL CUNMBR( 'Q', 'L', 'N', N, N, N, WORK( IR ), LDWRKR,
+     $                      WORK( ITAUQ ), U, LDU, WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+*
+*              Copy real matrix RWORK(IRVT) to complex matrix VT
+*              Overwrite VT by right singular vectors of R
+*              (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
+*              (RWorkspace: 0)
+*
+               CALL CLACP2( 'F', N, N, RWORK( IRVT ), N, VT, LDVT )
+               CALL CUNMBR( 'P', 'R', 'C', N, N, N, WORK( IR ), LDWRKR,
+     $                      WORK( ITAUP ), VT, LDVT, WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+*
+*              Multiply Q in A by left singular vectors of R in
+*              WORK(IR), storing result in U
+*              (CWorkspace: need N*N)
+*              (RWorkspace: 0)
+*
+               CALL CLACPY( 'F', N, N, U, LDU, WORK( IR ), LDWRKR )
+               CALL CGEMM( 'N', 'N', M, N, N, CONE, A, LDA, WORK( IR ),
+     $                     LDWRKR, CZERO, U, LDU )
+*
+            ELSE IF( WNTQA ) THEN
+*
+*              Path 4 (M much larger than N, JOBZ='A')
+*              M left singular vectors to be computed in U and
+*              N right singular vectors to be computed in VT
+*
+               IU = 1
+*
+*              WORK(IU) is N by N
+*
+               LDWRKU = N
+               ITAU = IU + LDWRKU*N
+               NWORK = ITAU + N
+*
+*              Compute A=Q*R, copying result to U
+*              (CWorkspace: need 2*N, prefer N+N*NB)
+*              (RWorkspace: 0)
+*
+               CALL CGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+               CALL CLACPY( 'L', M, N, A, LDA, U, LDU )
+*
+*              Generate Q in U
+*              (CWorkspace: need N+M, prefer N+M*NB)
+*              (RWorkspace: 0)
+*
+               CALL CUNGQR( M, M, N, U, LDU, WORK( ITAU ),
+     $                      WORK( NWORK ), LWORK-NWORK+1, IERR )
+*
+*              Produce R in A, zeroing out below it
+*
+               CALL CLASET( 'L', N-1, N-1, CZERO, CZERO, A( 2, 1 ),
+     $                      LDA )
+               IE = 1
+               ITAUQ = ITAU
+               ITAUP = ITAUQ + N
+               NWORK = ITAUP + N
+*
+*              Bidiagonalize R in A
+*              (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
+*              (RWorkspace: need N)
+*
+               CALL CGEBRD( N, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ),
+     $                      WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1,
+     $                      IERR )
+               IRU = IE + N
+               IRVT = IRU + N*N
+               NRWORK = IRVT + N*N
+*
+*              Perform bidiagonal SVD, computing left singular vectors
+*              of bidiagonal matrix in RWORK(IRU) and computing right
+*              singular vectors of bidiagonal matrix in RWORK(IRVT)
+*              (CWorkspace: need 0)
+*              (RWorkspace: need BDSPAC)
+*
+               CALL SBDSDC( 'U', 'I', N, S, RWORK( IE ), RWORK( IRU ),
+     $                      N, RWORK( IRVT ), N, DUM, IDUM,
+     $                      RWORK( NRWORK ), IWORK, INFO )
+*
+*              Copy real matrix RWORK(IRU) to complex matrix WORK(IU)
+*              Overwrite WORK(IU) by left singular vectors of R
+*              (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
+*              (RWorkspace: 0)
+*
+               CALL CLACP2( 'F', N, N, RWORK( IRU ), N, WORK( IU ),
+     $                      LDWRKU )
+               CALL CUNMBR( 'Q', 'L', 'N', N, N, N, A, LDA,
+     $                      WORK( ITAUQ ), WORK( IU ), LDWRKU,
+     $                      WORK( NWORK ), LWORK-NWORK+1, IERR )
+*
+*              Copy real matrix RWORK(IRVT) to complex matrix VT
+*              Overwrite VT by right singular vectors of R
+*              (CWorkspace: need 3*N, prefer 2*N+N*NB)
+*              (RWorkspace: 0)
+*
+               CALL CLACP2( 'F', N, N, RWORK( IRVT ), N, VT, LDVT )
+               CALL CUNMBR( 'P', 'R', 'C', N, N, N, A, LDA,
+     $                      WORK( ITAUP ), VT, LDVT, WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+*
+*              Multiply Q in U by left singular vectors of R in
+*              WORK(IU), storing result in A
+*              (CWorkspace: need N*N)
+*              (RWorkspace: 0)
+*
+               CALL CGEMM( 'N', 'N', M, N, N, CONE, U, LDU, WORK( IU ),
+     $                     LDWRKU, CZERO, A, LDA )
+*
+*              Copy left singular vectors of A from A to U
+*
+               CALL CLACPY( 'F', M, N, A, LDA, U, LDU )
+*
+            END IF
+*
+         ELSE IF( M.GE.MNTHR2 ) THEN
+*
+*           MNTHR2 <= M < MNTHR1
+*
+*           Path 5 (M much larger than N, but not as much as MNTHR1)
+*           Reduce to bidiagonal form without QR decomposition, use
+*           CUNGBR and matrix multiplication to compute singular vectors
+*
+            IE = 1
+            NRWORK = IE + N
+            ITAUQ = 1
+            ITAUP = ITAUQ + N
+            NWORK = ITAUP + N
+*
+*           Bidiagonalize A
+*           (CWorkspace: need 2*N+M, prefer 2*N+(M+N)*NB)
+*           (RWorkspace: need N)
+*
+            CALL CGEBRD( M, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ),
+     $                   WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1,
+     $                   IERR )
+            IF( WNTQN ) THEN
+*
+*              Compute singular values only
+*              (Cworkspace: 0)
+*              (Rworkspace: need BDSPAC)
+*
+               CALL SBDSDC( 'U', 'N', N, S, RWORK( IE ), DUM, 1, DUM, 1,
+     $                      DUM, IDUM, RWORK( NRWORK ), IWORK, INFO )
+            ELSE IF( WNTQO ) THEN
+               IU = NWORK
+               IRU = NRWORK
+               IRVT = IRU + N*N
+               NRWORK = IRVT + N*N
+*
+*              Copy A to VT, generate P**H
+*              (Cworkspace: need 2*N, prefer N+N*NB)
+*              (Rworkspace: 0)
+*
+               CALL CLACPY( 'U', N, N, A, LDA, VT, LDVT )
+               CALL CUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
+     $                      WORK( NWORK ), LWORK-NWORK+1, IERR )
+*
+*              Generate Q in A
+*              (CWorkspace: need 2*N, prefer N+N*NB)
+*              (RWorkspace: 0)
+*
+               CALL CUNGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ),
+     $                      WORK( NWORK ), LWORK-NWORK+1, IERR )
+*
+               IF( LWORK.GE.M*N+3*N ) THEN
+*
+*                 WORK( IU ) is M by N
+*
+                  LDWRKU = M
+               ELSE
+*
+*                 WORK(IU) is LDWRKU by N
+*
+                  LDWRKU = ( LWORK-3*N ) / N
+               END IF
+               NWORK = IU + LDWRKU*N
+*
+*              Perform bidiagonal SVD, computing left singular vectors
+*              of bidiagonal matrix in RWORK(IRU) and computing right
+*              singular vectors of bidiagonal matrix in RWORK(IRVT)
+*              (CWorkspace: need 0)
+*              (RWorkspace: need BDSPAC)
+*
+               CALL SBDSDC( 'U', 'I', N, S, RWORK( IE ), RWORK( IRU ),
+     $                      N, RWORK( IRVT ), N, DUM, IDUM,
+     $                      RWORK( NRWORK ), IWORK, INFO )
+*
+*              Multiply real matrix RWORK(IRVT) by P**H in VT,
+*              storing the result in WORK(IU), copying to VT
+*              (Cworkspace: need 0)
+*              (Rworkspace: need 3*N*N)
+*
+               CALL CLARCM( N, N, RWORK( IRVT ), N, VT, LDVT,
+     $                      WORK( IU ), LDWRKU, RWORK( NRWORK ) )
+               CALL CLACPY( 'F', N, N, WORK( IU ), LDWRKU, VT, LDVT )
+*
+*              Multiply Q in A by real matrix RWORK(IRU), storing the
+*              result in WORK(IU), copying to A
+*              (CWorkspace: need N*N, prefer M*N)
+*              (Rworkspace: need 3*N*N, prefer N*N+2*M*N)
+*
+               NRWORK = IRVT
+               DO 20 I = 1, M, LDWRKU
+                  CHUNK = MIN( M-I+1, LDWRKU )
+                  CALL CLACRM( CHUNK, N, A( I, 1 ), LDA, RWORK( IRU ),
+     $                         N, WORK( IU ), LDWRKU, RWORK( NRWORK ) )
+                  CALL CLACPY( 'F', CHUNK, N, WORK( IU ), LDWRKU,
+     $                         A( I, 1 ), LDA )
+   20          CONTINUE
+*
+            ELSE IF( WNTQS ) THEN
+*
+*              Copy A to VT, generate P**H
+*              (Cworkspace: need 2*N, prefer N+N*NB)
+*              (Rworkspace: 0)
+*
+               CALL CLACPY( 'U', N, N, A, LDA, VT, LDVT )
+               CALL CUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
+     $                      WORK( NWORK ), LWORK-NWORK+1, IERR )
+*
+*              Copy A to U, generate Q
+*              (Cworkspace: need 2*N, prefer N+N*NB)
+*              (Rworkspace: 0)
+*
+               CALL CLACPY( 'L', M, N, A, LDA, U, LDU )
+               CALL CUNGBR( 'Q', M, N, N, U, LDU, WORK( ITAUQ ),
+     $                      WORK( NWORK ), LWORK-NWORK+1, IERR )
+*
+*              Perform bidiagonal SVD, computing left singular vectors
+*              of bidiagonal matrix in RWORK(IRU) and computing right
+*              singular vectors of bidiagonal matrix in RWORK(IRVT)
+*              (CWorkspace: need 0)
+*              (RWorkspace: need BDSPAC)
+*
+               IRU = NRWORK
+               IRVT = IRU + N*N
+               NRWORK = IRVT + N*N
+               CALL SBDSDC( 'U', 'I', N, S, RWORK( IE ), RWORK( IRU ),
+     $                      N, RWORK( IRVT ), N, DUM, IDUM,
+     $                      RWORK( NRWORK ), IWORK, INFO )
+*
+*              Multiply real matrix RWORK(IRVT) by P**H in VT,
+*              storing the result in A, copying to VT
+*              (Cworkspace: need 0)
+*              (Rworkspace: need 3*N*N)
+*
+               CALL CLARCM( N, N, RWORK( IRVT ), N, VT, LDVT, A, LDA,
+     $                      RWORK( NRWORK ) )
+               CALL CLACPY( 'F', N, N, A, LDA, VT, LDVT )
+*
+*              Multiply Q in U by real matrix RWORK(IRU), storing the
+*              result in A, copying to U
+*              (CWorkspace: need 0)
+*              (Rworkspace: need N*N+2*M*N)
+*
+               NRWORK = IRVT
+               CALL CLACRM( M, N, U, LDU, RWORK( IRU ), N, A, LDA,
+     $                      RWORK( NRWORK ) )
+               CALL CLACPY( 'F', M, N, A, LDA, U, LDU )
+            ELSE
+*
+*              Copy A to VT, generate P**H
+*              (Cworkspace: need 2*N, prefer N+N*NB)
+*              (Rworkspace: 0)
+*
+               CALL CLACPY( 'U', N, N, A, LDA, VT, LDVT )
+               CALL CUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
+     $                      WORK( NWORK ), LWORK-NWORK+1, IERR )
+*
+*              Copy A to U, generate Q
+*              (Cworkspace: need 2*N, prefer N+N*NB)
+*              (Rworkspace: 0)
+*
+               CALL CLACPY( 'L', M, N, A, LDA, U, LDU )
+               CALL CUNGBR( 'Q', M, M, N, U, LDU, WORK( ITAUQ ),
+     $                      WORK( NWORK ), LWORK-NWORK+1, IERR )
+*
+*              Perform bidiagonal SVD, computing left singular vectors
+*              of bidiagonal matrix in RWORK(IRU) and computing right
+*              singular vectors of bidiagonal matrix in RWORK(IRVT)
+*              (CWorkspace: need 0)
+*              (RWorkspace: need BDSPAC)
+*
+               IRU = NRWORK
+               IRVT = IRU + N*N
+               NRWORK = IRVT + N*N
+               CALL SBDSDC( 'U', 'I', N, S, RWORK( IE ), RWORK( IRU ),
+     $                      N, RWORK( IRVT ), N, DUM, IDUM,
+     $                      RWORK( NRWORK ), IWORK, INFO )
+*
+*              Multiply real matrix RWORK(IRVT) by P**H in VT,
+*              storing the result in A, copying to VT
+*              (Cworkspace: need 0)
+*              (Rworkspace: need 3*N*N)
+*
+               CALL CLARCM( N, N, RWORK( IRVT ), N, VT, LDVT, A, LDA,
+     $                      RWORK( NRWORK ) )
+               CALL CLACPY( 'F', N, N, A, LDA, VT, LDVT )
+*
+*              Multiply Q in U by real matrix RWORK(IRU), storing the
+*              result in A, copying to U
+*              (CWorkspace: 0)
+*              (Rworkspace: need 3*N*N)
+*
+               NRWORK = IRVT
+               CALL CLACRM( M, N, U, LDU, RWORK( IRU ), N, A, LDA,
+     $                      RWORK( NRWORK ) )
+               CALL CLACPY( 'F', M, N, A, LDA, U, LDU )
+            END IF
+*
+         ELSE
+*
+*           M .LT. MNTHR2
+*
+*           Path 6 (M at least N, but not much larger)
+*           Reduce to bidiagonal form without QR decomposition
+*           Use CUNMBR to compute singular vectors
+*
+            IE = 1
+            NRWORK = IE + N
+            ITAUQ = 1
+            ITAUP = ITAUQ + N
+            NWORK = ITAUP + N
+*
+*           Bidiagonalize A
+*           (CWorkspace: need 2*N+M, prefer 2*N+(M+N)*NB)
+*           (RWorkspace: need N)
+*
+            CALL CGEBRD( M, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ),
+     $                   WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1,
+     $                   IERR )
+            IF( WNTQN ) THEN
+*
+*              Compute singular values only
+*              (Cworkspace: 0)
+*              (Rworkspace: need BDSPAC)
+*
+               CALL SBDSDC( 'U', 'N', N, S, RWORK( IE ), DUM, 1, DUM, 1,
+     $                      DUM, IDUM, RWORK( NRWORK ), IWORK, INFO )
+            ELSE IF( WNTQO ) THEN
+               IU = NWORK
+               IRU = NRWORK
+               IRVT = IRU + N*N
+               NRWORK = IRVT + N*N
+               IF( LWORK.GE.M*N+3*N ) THEN
+*
+*                 WORK( IU ) is M by N
+*
+                  LDWRKU = M
+               ELSE
+*
+*                 WORK( IU ) is LDWRKU by N
+*
+                  LDWRKU = ( LWORK-3*N ) / N
+               END IF
+               NWORK = IU + LDWRKU*N
+*
+*              Perform bidiagonal SVD, computing left singular vectors
+*              of bidiagonal matrix in RWORK(IRU) and computing right
+*              singular vectors of bidiagonal matrix in RWORK(IRVT)
+*              (CWorkspace: need 0)
+*              (RWorkspace: need BDSPAC)
+*
+               CALL SBDSDC( 'U', 'I', N, S, RWORK( IE ), RWORK( IRU ),
+     $                      N, RWORK( IRVT ), N, DUM, IDUM,
+     $                      RWORK( NRWORK ), IWORK, INFO )
+*
+*              Copy real matrix RWORK(IRVT) to complex matrix VT
+*              Overwrite VT by right singular vectors of A
+*              (Cworkspace: need 2*N, prefer N+N*NB)
+*              (Rworkspace: need 0)
+*
+               CALL CLACP2( 'F', N, N, RWORK( IRVT ), N, VT, LDVT )
+               CALL CUNMBR( 'P', 'R', 'C', N, N, N, A, LDA,
+     $                      WORK( ITAUP ), VT, LDVT, WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+*
+               IF( LWORK.GE.M*N+3*N ) THEN
+*
+*              Copy real matrix RWORK(IRU) to complex matrix WORK(IU)
+*              Overwrite WORK(IU) by left singular vectors of A, copying
+*              to A
+*              (Cworkspace: need M*N+2*N, prefer M*N+N+N*NB)
+*              (Rworkspace: need 0)
+*
+                  CALL CLASET( 'F', M, N, CZERO, CZERO, WORK( IU ),
+     $                         LDWRKU )
+                  CALL CLACP2( 'F', N, N, RWORK( IRU ), N, WORK( IU ),
+     $                         LDWRKU )
+                  CALL CUNMBR( 'Q', 'L', 'N', M, N, N, A, LDA,
+     $                         WORK( ITAUQ ), WORK( IU ), LDWRKU,
+     $                         WORK( NWORK ), LWORK-NWORK+1, IERR )
+                  CALL CLACPY( 'F', M, N, WORK( IU ), LDWRKU, A, LDA )
+               ELSE
+*
+*                 Generate Q in A
+*                 (Cworkspace: need 2*N, prefer N+N*NB)
+*                 (Rworkspace: need 0)
+*
+                  CALL CUNGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ),
+     $                         WORK( NWORK ), LWORK-NWORK+1, IERR )
+*
+*                 Multiply Q in A by real matrix RWORK(IRU), storing the
+*                 result in WORK(IU), copying to A
+*                 (CWorkspace: need N*N, prefer M*N)
+*                 (Rworkspace: need 3*N*N, prefer N*N+2*M*N)
+*
+                  NRWORK = IRVT
+                  DO 30 I = 1, M, LDWRKU
+                     CHUNK = MIN( M-I+1, LDWRKU )
+                     CALL CLACRM( CHUNK, N, A( I, 1 ), LDA,
+     $                            RWORK( IRU ), N, WORK( IU ), LDWRKU,
+     $                            RWORK( NRWORK ) )
+                     CALL CLACPY( 'F', CHUNK, N, WORK( IU ), LDWRKU,
+     $                            A( I, 1 ), LDA )
+   30             CONTINUE
+               END IF
+*
+            ELSE IF( WNTQS ) THEN
+*
+*              Perform bidiagonal SVD, computing left singular vectors
+*              of bidiagonal matrix in RWORK(IRU) and computing right
+*              singular vectors of bidiagonal matrix in RWORK(IRVT)
+*              (CWorkspace: need 0)
+*              (RWorkspace: need BDSPAC)
+*
+               IRU = NRWORK
+               IRVT = IRU + N*N
+               NRWORK = IRVT + N*N
+               CALL SBDSDC( 'U', 'I', N, S, RWORK( IE ), RWORK( IRU ),
+     $                      N, RWORK( IRVT ), N, DUM, IDUM,
+     $                      RWORK( NRWORK ), IWORK, INFO )
+*
+*              Copy real matrix RWORK(IRU) to complex matrix U
+*              Overwrite U by left singular vectors of A
+*              (CWorkspace: need 3*N, prefer 2*N+N*NB)
+*              (RWorkspace: 0)
+*
+               CALL CLASET( 'F', M, N, CZERO, CZERO, U, LDU )
+               CALL CLACP2( 'F', N, N, RWORK( IRU ), N, U, LDU )
+               CALL CUNMBR( 'Q', 'L', 'N', M, N, N, A, LDA,
+     $                      WORK( ITAUQ ), U, LDU, WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+*
+*              Copy real matrix RWORK(IRVT) to complex matrix VT
+*              Overwrite VT by right singular vectors of A
+*              (CWorkspace: need 3*N, prefer 2*N+N*NB)
+*              (RWorkspace: 0)
+*
+               CALL CLACP2( 'F', N, N, RWORK( IRVT ), N, VT, LDVT )
+               CALL CUNMBR( 'P', 'R', 'C', N, N, N, A, LDA,
+     $                      WORK( ITAUP ), VT, LDVT, WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+            ELSE
+*
+*              Perform bidiagonal SVD, computing left singular vectors
+*              of bidiagonal matrix in RWORK(IRU) and computing right
+*              singular vectors of bidiagonal matrix in RWORK(IRVT)
+*              (CWorkspace: need 0)
+*              (RWorkspace: need BDSPAC)
+*
+               IRU = NRWORK
+               IRVT = IRU + N*N
+               NRWORK = IRVT + N*N
+               CALL SBDSDC( 'U', 'I', N, S, RWORK( IE ), RWORK( IRU ),
+     $                      N, RWORK( IRVT ), N, DUM, IDUM,
+     $                      RWORK( NRWORK ), IWORK, INFO )
+*
+*              Set the right corner of U to identity matrix
+*
+               CALL CLASET( 'F', M, M, CZERO, CZERO, U, LDU )
+               CALL CLASET( 'F', M-N, M-N, CZERO, CONE, U( N+1, N+1 ),
+     $                      LDU )
+*
+*              Copy real matrix RWORK(IRU) to complex matrix U
+*              Overwrite U by left singular vectors of A
+*              (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
+*              (RWorkspace: 0)
+*
+               CALL CLACP2( 'F', N, N, RWORK( IRU ), N, U, LDU )
+               CALL CUNMBR( 'Q', 'L', 'N', M, M, N, A, LDA,
+     $                      WORK( ITAUQ ), U, LDU, WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+*
+*              Copy real matrix RWORK(IRVT) to complex matrix VT
+*              Overwrite VT by right singular vectors of A
+*              (CWorkspace: need 3*N, prefer 2*N+N*NB)
+*              (RWorkspace: 0)
+*
+               CALL CLACP2( 'F', N, N, RWORK( IRVT ), N, VT, LDVT )
+               CALL CUNMBR( 'P', 'R', 'C', N, N, N, A, LDA,
+     $                      WORK( ITAUP ), VT, LDVT, WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+            END IF
+*
+         END IF
+*
+      ELSE
+*
+*        A has more columns than rows. If A has sufficiently more
+*        columns than rows, first reduce using the LQ decomposition
+*        (if sufficient workspace available)
+*
+         IF( N.GE.MNTHR1 ) THEN
+*
+            IF( WNTQN ) THEN
+*
+*              Path 1t (N much larger than M, JOBZ='N')
+*              No singular vectors to be computed
+*
+               ITAU = 1
+               NWORK = ITAU + M
+*
+*              Compute A=L*Q
+*              (CWorkspace: need 2*M, prefer M+M*NB)
+*              (RWorkspace: 0)
+*
+               CALL CGELQF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+*
+*              Zero out above L
+*
+               CALL CLASET( 'U', M-1, M-1, CZERO, CZERO, A( 1, 2 ),
+     $                      LDA )
+               IE = 1
+               ITAUQ = 1
+               ITAUP = ITAUQ + M
+               NWORK = ITAUP + M
+*
+*              Bidiagonalize L in A
+*              (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
+*              (RWorkspace: need M)
+*
+               CALL CGEBRD( M, M, A, LDA, S, RWORK( IE ), WORK( ITAUQ ),
+     $                      WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1,
+     $                      IERR )
+               NRWORK = IE + M
+*
+*              Perform bidiagonal SVD, compute singular values only
+*              (CWorkspace: 0)
+*              (RWorkspace: need BDSPAC)
+*
+               CALL SBDSDC( 'U', 'N', M, S, RWORK( IE ), DUM, 1, DUM, 1,
+     $                      DUM, IDUM, RWORK( NRWORK ), IWORK, INFO )
+*
+            ELSE IF( WNTQO ) THEN
+*
+*              Path 2t (N much larger than M, JOBZ='O')
+*              M right singular vectors to be overwritten on A and
+*              M left singular vectors to be computed in U
+*
+               IVT = 1
+               LDWKVT = M
+*
+*              WORK(IVT) is M by M
+*
+               IL = IVT + LDWKVT*M
+               IF( LWORK.GE.M*N+M*M+3*M ) THEN
+*
+*                 WORK(IL) M by N
+*
+                  LDWRKL = M
+                  CHUNK = N
+               ELSE
+*
+*                 WORK(IL) is M by CHUNK
+*
+                  LDWRKL = M
+                  CHUNK = ( LWORK-M*M-3*M ) / M
+               END IF
+               ITAU = IL + LDWRKL*CHUNK
+               NWORK = ITAU + M
+*
+*              Compute A=L*Q
+*              (CWorkspace: need 2*M, prefer M+M*NB)
+*              (RWorkspace: 0)
+*
+               CALL CGELQF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+*
+*              Copy L to WORK(IL), zeroing about above it
+*
+               CALL CLACPY( 'L', M, M, A, LDA, WORK( IL ), LDWRKL )
+               CALL CLASET( 'U', M-1, M-1, CZERO, CZERO,
+     $                      WORK( IL+LDWRKL ), LDWRKL )
+*
+*              Generate Q in A
+*              (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
+*              (RWorkspace: 0)
+*
+               CALL CUNGLQ( M, N, M, A, LDA, WORK( ITAU ),
+     $                      WORK( NWORK ), LWORK-NWORK+1, IERR )
+               IE = 1
+               ITAUQ = ITAU
+               ITAUP = ITAUQ + M
+               NWORK = ITAUP + M
+*
+*              Bidiagonalize L in WORK(IL)
+*              (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
+*              (RWorkspace: need M)
+*
+               CALL CGEBRD( M, M, WORK( IL ), LDWRKL, S, RWORK( IE ),
+     $                      WORK( ITAUQ ), WORK( ITAUP ), WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+*
+*              Perform bidiagonal SVD, computing left singular vectors
+*              of bidiagonal matrix in RWORK(IRU) and computing right
+*              singular vectors of bidiagonal matrix in RWORK(IRVT)
+*              (CWorkspace: need 0)
+*              (RWorkspace: need BDSPAC)
+*
+               IRU = IE + M
+               IRVT = IRU + M*M
+               NRWORK = IRVT + M*M
+               CALL SBDSDC( 'U', 'I', M, S, RWORK( IE ), RWORK( IRU ),
+     $                      M, RWORK( IRVT ), M, DUM, IDUM,
+     $                      RWORK( NRWORK ), IWORK, INFO )
+*
+*              Copy real matrix RWORK(IRU) to complex matrix WORK(IU)
+*              Overwrite WORK(IU) by the left singular vectors of L
+*              (CWorkspace: need N*N+3*N, prefer M*N+2*N+N*NB)
+*              (RWorkspace: 0)
+*
+               CALL CLACP2( 'F', M, M, RWORK( IRU ), M, U, LDU )
+               CALL CUNMBR( 'Q', 'L', 'N', M, M, M, WORK( IL ), LDWRKL,
+     $                      WORK( ITAUQ ), U, LDU, WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+*
+*              Copy real matrix RWORK(IRVT) to complex matrix WORK(IVT)
+*              Overwrite WORK(IVT) by the right singular vectors of L
+*              (CWorkspace: need N*N+3*N, prefer M*N+2*N+N*NB)
+*              (RWorkspace: 0)
+*
+               CALL CLACP2( 'F', M, M, RWORK( IRVT ), M, WORK( IVT ),
+     $                      LDWKVT )
+               CALL CUNMBR( 'P', 'R', 'C', M, M, M, WORK( IL ), LDWRKL,
+     $                      WORK( ITAUP ), WORK( IVT ), LDWKVT,
+     $                      WORK( NWORK ), LWORK-NWORK+1, IERR )
+*
+*              Multiply right singular vectors of L in WORK(IL) by Q
+*              in A, storing result in WORK(IL) and copying to A
+*              (CWorkspace: need 2*M*M, prefer M*M+M*N))
+*              (RWorkspace: 0)
+*
+               DO 40 I = 1, N, CHUNK
+                  BLK = MIN( N-I+1, CHUNK )
+                  CALL CGEMM( 'N', 'N', M, BLK, M, CONE, WORK( IVT ), M,
+     $                        A( 1, I ), LDA, CZERO, WORK( IL ),
+     $                        LDWRKL )
+                  CALL CLACPY( 'F', M, BLK, WORK( IL ), LDWRKL,
+     $                         A( 1, I ), LDA )
+   40          CONTINUE
+*
+            ELSE IF( WNTQS ) THEN
+*
+*             Path 3t (N much larger than M, JOBZ='S')
+*             M right singular vectors to be computed in VT and
+*             M left singular vectors to be computed in U
+*
+               IL = 1
+*
+*              WORK(IL) is M by M
+*
+               LDWRKL = M
+               ITAU = IL + LDWRKL*M
+               NWORK = ITAU + M
+*
+*              Compute A=L*Q
+*              (CWorkspace: need 2*M, prefer M+M*NB)
+*              (RWorkspace: 0)
+*
+               CALL CGELQF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+*
+*              Copy L to WORK(IL), zeroing out above it
+*
+               CALL CLACPY( 'L', M, M, A, LDA, WORK( IL ), LDWRKL )
+               CALL CLASET( 'U', M-1, M-1, CZERO, CZERO,
+     $                      WORK( IL+LDWRKL ), LDWRKL )
+*
+*              Generate Q in A
+*              (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
+*              (RWorkspace: 0)
+*
+               CALL CUNGLQ( M, N, M, A, LDA, WORK( ITAU ),
+     $                      WORK( NWORK ), LWORK-NWORK+1, IERR )
+               IE = 1
+               ITAUQ = ITAU
+               ITAUP = ITAUQ + M
+               NWORK = ITAUP + M
+*
+*              Bidiagonalize L in WORK(IL)
+*              (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
+*              (RWorkspace: need M)
+*
+               CALL CGEBRD( M, M, WORK( IL ), LDWRKL, S, RWORK( IE ),
+     $                      WORK( ITAUQ ), WORK( ITAUP ), WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+*
+*              Perform bidiagonal SVD, computing left singular vectors
+*              of bidiagonal matrix in RWORK(IRU) and computing right
+*              singular vectors of bidiagonal matrix in RWORK(IRVT)
+*              (CWorkspace: need 0)
+*              (RWorkspace: need BDSPAC)
+*
+               IRU = IE + M
+               IRVT = IRU + M*M
+               NRWORK = IRVT + M*M
+               CALL SBDSDC( 'U', 'I', M, S, RWORK( IE ), RWORK( IRU ),
+     $                      M, RWORK( IRVT ), M, DUM, IDUM,
+     $                      RWORK( NRWORK ), IWORK, INFO )
+*
+*              Copy real matrix RWORK(IRU) to complex matrix U
+*              Overwrite U by left singular vectors of L
+*              (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB)
+*              (RWorkspace: 0)
+*
+               CALL CLACP2( 'F', M, M, RWORK( IRU ), M, U, LDU )
+               CALL CUNMBR( 'Q', 'L', 'N', M, M, M, WORK( IL ), LDWRKL,
+     $                      WORK( ITAUQ ), U, LDU, WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+*
+*              Copy real matrix RWORK(IRVT) to complex matrix VT
+*              Overwrite VT by left singular vectors of L
+*              (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB)
+*              (RWorkspace: 0)
+*
+               CALL CLACP2( 'F', M, M, RWORK( IRVT ), M, VT, LDVT )
+               CALL CUNMBR( 'P', 'R', 'C', M, M, M, WORK( IL ), LDWRKL,
+     $                      WORK( ITAUP ), VT, LDVT, WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+*
+*              Copy VT to WORK(IL), multiply right singular vectors of L
+*              in WORK(IL) by Q in A, storing result in VT
+*              (CWorkspace: need M*M)
+*              (RWorkspace: 0)
+*
+               CALL CLACPY( 'F', M, M, VT, LDVT, WORK( IL ), LDWRKL )
+               CALL CGEMM( 'N', 'N', M, N, M, CONE, WORK( IL ), LDWRKL,
+     $                     A, LDA, CZERO, VT, LDVT )
+*
+            ELSE IF( WNTQA ) THEN
+*
+*              Path 9t (N much larger than M, JOBZ='A')
+*              N right singular vectors to be computed in VT and
+*              M left singular vectors to be computed in U
+*
+               IVT = 1
+*
+*              WORK(IVT) is M by M
+*
+               LDWKVT = M
+               ITAU = IVT + LDWKVT*M
+               NWORK = ITAU + M
+*
+*              Compute A=L*Q, copying result to VT
+*              (CWorkspace: need 2*M, prefer M+M*NB)
+*              (RWorkspace: 0)
+*
+               CALL CGELQF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+               CALL CLACPY( 'U', M, N, A, LDA, VT, LDVT )
+*
+*              Generate Q in VT
+*              (CWorkspace: need M+N, prefer M+N*NB)
+*              (RWorkspace: 0)
+*
+               CALL CUNGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
+     $                      WORK( NWORK ), LWORK-NWORK+1, IERR )
+*
+*              Produce L in A, zeroing out above it
+*
+               CALL CLASET( 'U', M-1, M-1, CZERO, CZERO, A( 1, 2 ),
+     $                      LDA )
+               IE = 1
+               ITAUQ = ITAU
+               ITAUP = ITAUQ + M
+               NWORK = ITAUP + M
+*
+*              Bidiagonalize L in A
+*              (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
+*              (RWorkspace: need M)
+*
+               CALL CGEBRD( M, M, A, LDA, S, RWORK( IE ), WORK( ITAUQ ),
+     $                      WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1,
+     $                      IERR )
+*
+*              Perform bidiagonal SVD, computing left singular vectors
+*              of bidiagonal matrix in RWORK(IRU) and computing right
+*              singular vectors of bidiagonal matrix in RWORK(IRVT)
+*              (CWorkspace: need 0)
+*              (RWorkspace: need BDSPAC)
+*
+               IRU = IE + M
+               IRVT = IRU + M*M
+               NRWORK = IRVT + M*M
+               CALL SBDSDC( 'U', 'I', M, S, RWORK( IE ), RWORK( IRU ),
+     $                      M, RWORK( IRVT ), M, DUM, IDUM,
+     $                      RWORK( NRWORK ), IWORK, INFO )
+*
+*              Copy real matrix RWORK(IRU) to complex matrix U
+*              Overwrite U by left singular vectors of L
+*              (CWorkspace: need 3*M, prefer 2*M+M*NB)
+*              (RWorkspace: 0)
+*
+               CALL CLACP2( 'F', M, M, RWORK( IRU ), M, U, LDU )
+               CALL CUNMBR( 'Q', 'L', 'N', M, M, M, A, LDA,
+     $                      WORK( ITAUQ ), U, LDU, WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+*
+*              Copy real matrix RWORK(IRVT) to complex matrix WORK(IVT)
+*              Overwrite WORK(IVT) by right singular vectors of L
+*              (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB)
+*              (RWorkspace: 0)
+*
+               CALL CLACP2( 'F', M, M, RWORK( IRVT ), M, WORK( IVT ),
+     $                      LDWKVT )
+               CALL CUNMBR( 'P', 'R', 'C', M, M, M, A, LDA,
+     $                      WORK( ITAUP ), WORK( IVT ), LDWKVT,
+     $                      WORK( NWORK ), LWORK-NWORK+1, IERR )
+*
+*              Multiply right singular vectors of L in WORK(IVT) by
+*              Q in VT, storing result in A
+*              (CWorkspace: need M*M)
+*              (RWorkspace: 0)
+*
+               CALL CGEMM( 'N', 'N', M, N, M, CONE, WORK( IVT ), LDWKVT,
+     $                     VT, LDVT, CZERO, A, LDA )
+*
+*              Copy right singular vectors of A from A to VT
+*
+               CALL CLACPY( 'F', M, N, A, LDA, VT, LDVT )
+*
+            END IF
+*
+         ELSE IF( N.GE.MNTHR2 ) THEN
+*
+*           MNTHR2 <= N < MNTHR1
+*
+*           Path 5t (N much larger than M, but not as much as MNTHR1)
+*           Reduce to bidiagonal form without QR decomposition, use
+*           CUNGBR and matrix multiplication to compute singular vectors
+*
+*
+            IE = 1
+            NRWORK = IE + M
+            ITAUQ = 1
+            ITAUP = ITAUQ + M
+            NWORK = ITAUP + M
+*
+*           Bidiagonalize A
+*           (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB)
+*           (RWorkspace: M)
+*
+            CALL CGEBRD( M, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ),
+     $                   WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1,
+     $                   IERR )
+*
+            IF( WNTQN ) THEN
+*
+*              Compute singular values only
+*              (Cworkspace: 0)
+*              (Rworkspace: need BDSPAC)
+*
+               CALL SBDSDC( 'L', 'N', M, S, RWORK( IE ), DUM, 1, DUM, 1,
+     $                      DUM, IDUM, RWORK( NRWORK ), IWORK, INFO )
+            ELSE IF( WNTQO ) THEN
+               IRVT = NRWORK
+               IRU = IRVT + M*M
+               NRWORK = IRU + M*M
+               IVT = NWORK
+*
+*              Copy A to U, generate Q
+*              (Cworkspace: need 2*M, prefer M+M*NB)
+*              (Rworkspace: 0)
+*
+               CALL CLACPY( 'L', M, M, A, LDA, U, LDU )
+               CALL CUNGBR( 'Q', M, M, N, U, LDU, WORK( ITAUQ ),
+     $                      WORK( NWORK ), LWORK-NWORK+1, IERR )
+*
+*              Generate P**H in A
+*              (Cworkspace: need 2*M, prefer M+M*NB)
+*              (Rworkspace: 0)
+*
+               CALL CUNGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ),
+     $                      WORK( NWORK ), LWORK-NWORK+1, IERR )
+*
+               LDWKVT = M
+               IF( LWORK.GE.M*N+3*M ) THEN
+*
+*                 WORK( IVT ) is M by N
+*
+                  NWORK = IVT + LDWKVT*N
+                  CHUNK = N
+               ELSE
+*
+*                 WORK( IVT ) is M by CHUNK
+*
+                  CHUNK = ( LWORK-3*M ) / M
+                  NWORK = IVT + LDWKVT*CHUNK
+               END IF
+*
+*              Perform bidiagonal SVD, computing left singular vectors
+*              of bidiagonal matrix in RWORK(IRU) and computing right
+*              singular vectors of bidiagonal matrix in RWORK(IRVT)
+*              (CWorkspace: need 0)
+*              (RWorkspace: need BDSPAC)
+*
+               CALL SBDSDC( 'L', 'I', M, S, RWORK( IE ), RWORK( IRU ),
+     $                      M, RWORK( IRVT ), M, DUM, IDUM,
+     $                      RWORK( NRWORK ), IWORK, INFO )
+*
+*              Multiply Q in U by real matrix RWORK(IRVT)
+*              storing the result in WORK(IVT), copying to U
+*              (Cworkspace: need 0)
+*              (Rworkspace: need 2*M*M)
+*
+               CALL CLACRM( M, M, U, LDU, RWORK( IRU ), M, WORK( IVT ),
+     $                      LDWKVT, RWORK( NRWORK ) )
+               CALL CLACPY( 'F', M, M, WORK( IVT ), LDWKVT, U, LDU )
+*
+*              Multiply RWORK(IRVT) by P**H in A, storing the
+*              result in WORK(IVT), copying to A
+*              (CWorkspace: need M*M, prefer M*N)
+*              (Rworkspace: need 2*M*M, prefer 2*M*N)
+*
+               NRWORK = IRU
+               DO 50 I = 1, N, CHUNK
+                  BLK = MIN( N-I+1, CHUNK )
+                  CALL CLARCM( M, BLK, RWORK( IRVT ), M, A( 1, I ), LDA,
+     $                         WORK( IVT ), LDWKVT, RWORK( NRWORK ) )
+                  CALL CLACPY( 'F', M, BLK, WORK( IVT ), LDWKVT,
+     $                         A( 1, I ), LDA )
+   50          CONTINUE
+            ELSE IF( WNTQS ) THEN
+*
+*              Copy A to U, generate Q
+*              (Cworkspace: need 2*M, prefer M+M*NB)
+*              (Rworkspace: 0)
+*
+               CALL CLACPY( 'L', M, M, A, LDA, U, LDU )
+               CALL CUNGBR( 'Q', M, M, N, U, LDU, WORK( ITAUQ ),
+     $                      WORK( NWORK ), LWORK-NWORK+1, IERR )
+*
+*              Copy A to VT, generate P**H
+*              (Cworkspace: need 2*M, prefer M+M*NB)
+*              (Rworkspace: 0)
+*
+               CALL CLACPY( 'U', M, N, A, LDA, VT, LDVT )
+               CALL CUNGBR( 'P', M, N, M, VT, LDVT, WORK( ITAUP ),
+     $                      WORK( NWORK ), LWORK-NWORK+1, IERR )
+*
+*              Perform bidiagonal SVD, computing left singular vectors
+*              of bidiagonal matrix in RWORK(IRU) and computing right
+*              singular vectors of bidiagonal matrix in RWORK(IRVT)
+*              (CWorkspace: need 0)
+*              (RWorkspace: need BDSPAC)
+*
+               IRVT = NRWORK
+               IRU = IRVT + M*M
+               NRWORK = IRU + M*M
+               CALL SBDSDC( 'L', 'I', M, S, RWORK( IE ), RWORK( IRU ),
+     $                      M, RWORK( IRVT ), M, DUM, IDUM,
+     $                      RWORK( NRWORK ), IWORK, INFO )
+*
+*              Multiply Q in U by real matrix RWORK(IRU), storing the
+*              result in A, copying to U
+*              (CWorkspace: need 0)
+*              (Rworkspace: need 3*M*M)
+*
+               CALL CLACRM( M, M, U, LDU, RWORK( IRU ), M, A, LDA,
+     $                      RWORK( NRWORK ) )
+               CALL CLACPY( 'F', M, M, A, LDA, U, LDU )
+*
+*              Multiply real matrix RWORK(IRVT) by P**H in VT,
+*              storing the result in A, copying to VT
+*              (Cworkspace: need 0)
+*              (Rworkspace: need M*M+2*M*N)
+*
+               NRWORK = IRU
+               CALL CLARCM( M, N, RWORK( IRVT ), M, VT, LDVT, A, LDA,
+     $                      RWORK( NRWORK ) )
+               CALL CLACPY( 'F', M, N, A, LDA, VT, LDVT )
+            ELSE
+*
+*              Copy A to U, generate Q
+*              (Cworkspace: need 2*M, prefer M+M*NB)
+*              (Rworkspace: 0)
+*
+               CALL CLACPY( 'L', M, M, A, LDA, U, LDU )
+               CALL CUNGBR( 'Q', M, M, N, U, LDU, WORK( ITAUQ ),
+     $                      WORK( NWORK ), LWORK-NWORK+1, IERR )
+*
+*              Copy A to VT, generate P**H
+*              (Cworkspace: need 2*M, prefer M+M*NB)
+*              (Rworkspace: 0)
+*
+               CALL CLACPY( 'U', M, N, A, LDA, VT, LDVT )
+               CALL CUNGBR( 'P', N, N, M, VT, LDVT, WORK( ITAUP ),
+     $                      WORK( NWORK ), LWORK-NWORK+1, IERR )
+*
+*              Perform bidiagonal SVD, computing left singular vectors
+*              of bidiagonal matrix in RWORK(IRU) and computing right
+*              singular vectors of bidiagonal matrix in RWORK(IRVT)
+*              (CWorkspace: need 0)
+*              (RWorkspace: need BDSPAC)
+*
+               IRVT = NRWORK
+               IRU = IRVT + M*M
+               NRWORK = IRU + M*M
+               CALL SBDSDC( 'L', 'I', M, S, RWORK( IE ), RWORK( IRU ),
+     $                      M, RWORK( IRVT ), M, DUM, IDUM,
+     $                      RWORK( NRWORK ), IWORK, INFO )
+*
+*              Multiply Q in U by real matrix RWORK(IRU), storing the
+*              result in A, copying to U
+*              (CWorkspace: need 0)
+*              (Rworkspace: need 3*M*M)
+*
+               CALL CLACRM( M, M, U, LDU, RWORK( IRU ), M, A, LDA,
+     $                      RWORK( NRWORK ) )
+               CALL CLACPY( 'F', M, M, A, LDA, U, LDU )
+*
+*              Multiply real matrix RWORK(IRVT) by P**H in VT,
+*              storing the result in A, copying to VT
+*              (Cworkspace: need 0)
+*              (Rworkspace: need M*M+2*M*N)
+*
+               CALL CLARCM( M, N, RWORK( IRVT ), M, VT, LDVT, A, LDA,
+     $                      RWORK( NRWORK ) )
+               CALL CLACPY( 'F', M, N, A, LDA, VT, LDVT )
+            END IF
+*
+         ELSE
+*
+*           N .LT. MNTHR2
+*
+*           Path 6t (N greater than M, but not much larger)
+*           Reduce to bidiagonal form without LQ decomposition
+*           Use CUNMBR to compute singular vectors
+*
+            IE = 1
+            NRWORK = IE + M
+            ITAUQ = 1
+            ITAUP = ITAUQ + M
+            NWORK = ITAUP + M
+*
+*           Bidiagonalize A
+*           (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB)
+*           (RWorkspace: M)
+*
+            CALL CGEBRD( M, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ),
+     $                   WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1,
+     $                   IERR )
+            IF( WNTQN ) THEN
+*
+*              Compute singular values only
+*              (Cworkspace: 0)
+*              (Rworkspace: need BDSPAC)
+*
+               CALL SBDSDC( 'L', 'N', M, S, RWORK( IE ), DUM, 1, DUM, 1,
+     $                      DUM, IDUM, RWORK( NRWORK ), IWORK, INFO )
+            ELSE IF( WNTQO ) THEN
+               LDWKVT = M
+               IVT = NWORK
+               IF( LWORK.GE.M*N+3*M ) THEN
+*
+*                 WORK( IVT ) is M by N
+*
+                  CALL CLASET( 'F', M, N, CZERO, CZERO, WORK( IVT ),
+     $                         LDWKVT )
+                  NWORK = IVT + LDWKVT*N
+               ELSE
+*
+*                 WORK( IVT ) is M by CHUNK
+*
+                  CHUNK = ( LWORK-3*M ) / M
+                  NWORK = IVT + LDWKVT*CHUNK
+               END IF
+*
+*              Perform bidiagonal SVD, computing left singular vectors
+*              of bidiagonal matrix in RWORK(IRU) and computing right
+*              singular vectors of bidiagonal matrix in RWORK(IRVT)
+*              (CWorkspace: need 0)
+*              (RWorkspace: need BDSPAC)
+*
+               IRVT = NRWORK
+               IRU = IRVT + M*M
+               NRWORK = IRU + M*M
+               CALL SBDSDC( 'L', 'I', M, S, RWORK( IE ), RWORK( IRU ),
+     $                      M, RWORK( IRVT ), M, DUM, IDUM,
+     $                      RWORK( NRWORK ), IWORK, INFO )
+*
+*              Copy real matrix RWORK(IRU) to complex matrix U
+*              Overwrite U by left singular vectors of A
+*              (Cworkspace: need 2*M, prefer M+M*NB)
+*              (Rworkspace: need 0)
+*
+               CALL CLACP2( 'F', M, M, RWORK( IRU ), M, U, LDU )
+               CALL CUNMBR( 'Q', 'L', 'N', M, M, N, A, LDA,
+     $                      WORK( ITAUQ ), U, LDU, WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+*
+               IF( LWORK.GE.M*N+3*M ) THEN
+*
+*              Copy real matrix RWORK(IRVT) to complex matrix WORK(IVT)
+*              Overwrite WORK(IVT) by right singular vectors of A,
+*              copying to A
+*              (Cworkspace: need M*N+2*M, prefer M*N+M+M*NB)
+*              (Rworkspace: need 0)
+*
+                  CALL CLACP2( 'F', M, M, RWORK( IRVT ), M, WORK( IVT ),
+     $                         LDWKVT )
+                  CALL CUNMBR( 'P', 'R', 'C', M, N, M, A, LDA,
+     $                         WORK( ITAUP ), WORK( IVT ), LDWKVT,
+     $                         WORK( NWORK ), LWORK-NWORK+1, IERR )
+                  CALL CLACPY( 'F', M, N, WORK( IVT ), LDWKVT, A, LDA )
+               ELSE
+*
+*                 Generate P**H in A
+*                 (Cworkspace: need 2*M, prefer M+M*NB)
+*                 (Rworkspace: need 0)
+*
+                  CALL CUNGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ),
+     $                         WORK( NWORK ), LWORK-NWORK+1, IERR )
+*
+*                 Multiply Q in A by real matrix RWORK(IRU), storing the
+*                 result in WORK(IU), copying to A
+*                 (CWorkspace: need M*M, prefer M*N)
+*                 (Rworkspace: need 3*M*M, prefer M*M+2*M*N)
+*
+                  NRWORK = IRU
+                  DO 60 I = 1, N, CHUNK
+                     BLK = MIN( N-I+1, CHUNK )
+                     CALL CLARCM( M, BLK, RWORK( IRVT ), M, A( 1, I ),
+     $                            LDA, WORK( IVT ), LDWKVT,
+     $                            RWORK( NRWORK ) )
+                     CALL CLACPY( 'F', M, BLK, WORK( IVT ), LDWKVT,
+     $                            A( 1, I ), LDA )
+   60             CONTINUE
+               END IF
+            ELSE IF( WNTQS ) THEN
+*
+*              Perform bidiagonal SVD, computing left singular vectors
+*              of bidiagonal matrix in RWORK(IRU) and computing right
+*              singular vectors of bidiagonal matrix in RWORK(IRVT)
+*              (CWorkspace: need 0)
+*              (RWorkspace: need BDSPAC)
+*
+               IRVT = NRWORK
+               IRU = IRVT + M*M
+               NRWORK = IRU + M*M
+               CALL SBDSDC( 'L', 'I', M, S, RWORK( IE ), RWORK( IRU ),
+     $                      M, RWORK( IRVT ), M, DUM, IDUM,
+     $                      RWORK( NRWORK ), IWORK, INFO )
+*
+*              Copy real matrix RWORK(IRU) to complex matrix U
+*              Overwrite U by left singular vectors of A
+*              (CWorkspace: need 3*M, prefer 2*M+M*NB)
+*              (RWorkspace: M*M)
+*
+               CALL CLACP2( 'F', M, M, RWORK( IRU ), M, U, LDU )
+               CALL CUNMBR( 'Q', 'L', 'N', M, M, N, A, LDA,
+     $                      WORK( ITAUQ ), U, LDU, WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+*
+*              Copy real matrix RWORK(IRVT) to complex matrix VT
+*              Overwrite VT by right singular vectors of A
+*              (CWorkspace: need 3*M, prefer 2*M+M*NB)
+*              (RWorkspace: M*M)
+*
+               CALL CLASET( 'F', M, N, CZERO, CZERO, VT, LDVT )
+               CALL CLACP2( 'F', M, M, RWORK( IRVT ), M, VT, LDVT )
+               CALL CUNMBR( 'P', 'R', 'C', M, N, M, A, LDA,
+     $                      WORK( ITAUP ), VT, LDVT, WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+            ELSE
+*
+*              Perform bidiagonal SVD, computing left singular vectors
+*              of bidiagonal matrix in RWORK(IRU) and computing right
+*              singular vectors of bidiagonal matrix in RWORK(IRVT)
+*              (CWorkspace: need 0)
+*              (RWorkspace: need BDSPAC)
+*
+               IRVT = NRWORK
+               IRU = IRVT + M*M
+               NRWORK = IRU + M*M
+*
+               CALL SBDSDC( 'L', 'I', M, S, RWORK( IE ), RWORK( IRU ),
+     $                      M, RWORK( IRVT ), M, DUM, IDUM,
+     $                      RWORK( NRWORK ), IWORK, INFO )
+*
+*              Copy real matrix RWORK(IRU) to complex matrix U
+*              Overwrite U by left singular vectors of A
+*              (CWorkspace: need 3*M, prefer 2*M+M*NB)
+*              (RWorkspace: M*M)
+*
+               CALL CLACP2( 'F', M, M, RWORK( IRU ), M, U, LDU )
+               CALL CUNMBR( 'Q', 'L', 'N', M, M, N, A, LDA,
+     $                      WORK( ITAUQ ), U, LDU, WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+*
+*              Set the right corner of VT to identity matrix
+*
+               CALL CLASET( 'F', N-M, N-M, CZERO, CONE, VT( M+1, M+1 ),
+     $                      LDVT )
+*
+*              Copy real matrix RWORK(IRVT) to complex matrix VT
+*              Overwrite VT by right singular vectors of A
+*              (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
+*              (RWorkspace: M*M)
+*
+               CALL CLASET( 'F', N, N, CZERO, CZERO, VT, LDVT )
+               CALL CLACP2( 'F', M, M, RWORK( IRVT ), M, VT, LDVT )
+               CALL CUNMBR( 'P', 'R', 'C', N, N, M, A, LDA,
+     $                      WORK( ITAUP ), VT, LDVT, WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+            END IF
+*
+         END IF
+*
+      END IF
+*
+*     Undo scaling if necessary
+*
+      IF( ISCL.EQ.1 ) THEN
+         IF( ANRM.GT.BIGNUM )
+     $      CALL SLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN,
+     $                   IERR )
+         IF( ANRM.LT.SMLNUM )
+     $      CALL SLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN,
+     $                   IERR )
+      END IF
+*
+*     Return optimal workspace in WORK(1)
+*
+      WORK( 1 ) = MAXWRK
+*
+      RETURN
+*
+*     End of CGESDD
+*
+      END
diff --git a/costa/native/external/lapack/cgesv.f b/costa/native/external/lapack/cgesv.f
new file mode 100644
index 000000000..0ed85cd42
--- /dev/null
+++ b/costa/native/external/lapack/cgesv.f
@@ -0,0 +1,108 @@
+      SUBROUTINE CGESV( N, NRHS, A, LDA, IPIV, B, LDB, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993 
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, LDB, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      COMPLEX            A( LDA, * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CGESV computes the solution to a complex system of linear equations
+*     A * X = B,
+*  where A is an N-by-N matrix and X and B are N-by-NRHS matrices.
+*
+*  The LU decomposition with partial pivoting and row interchanges is
+*  used to factor A as
+*     A = P * L * U,
+*  where P is a permutation matrix, L is unit lower triangular, and U is
+*  upper triangular.  The factored form of A is then used to solve the
+*  system of equations A * X = B.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The number of linear equations, i.e., the order of the
+*          matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,N)
+*          On entry, the N-by-N coefficient matrix A.
+*          On exit, the factors L and U from the factorization
+*          A = P*L*U; the unit diagonal elements of L are not stored.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  IPIV    (output) INTEGER array, dimension (N)
+*          The pivot indices that define the permutation matrix P;
+*          row i of the matrix was interchanged with row IPIV(i).
+*
+*  B       (input/output) COMPLEX array, dimension (LDB,NRHS)
+*          On entry, the N-by-NRHS matrix of right hand side matrix B.
+*          On exit, if INFO = 0, the N-by-NRHS solution matrix X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, U(i,i) is exactly zero.  The factorization
+*                has been completed, but the factor U is exactly
+*                singular, so the solution could not be computed.
+*
+*  =====================================================================
+*
+*     .. External Subroutines ..
+      EXTERNAL           CGETRF, CGETRS, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( N.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -4
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CGESV ', -INFO )
+         RETURN
+      END IF
+*
+*     Compute the LU factorization of A.
+*
+      CALL CGETRF( N, N, A, LDA, IPIV, INFO )
+      IF( INFO.EQ.0 ) THEN
+*
+*        Solve the system A*X = B, overwriting B with X.
+*
+         CALL CGETRS( 'No transpose', N, NRHS, A, LDA, IPIV, B, LDB,
+     $                INFO )
+      END IF
+      RETURN
+*
+*     End of CGESV
+*
+      END
diff --git a/costa/native/external/lapack/cgesvd.f b/costa/native/external/lapack/cgesvd.f
new file mode 100644
index 000000000..137ed7ed7
--- /dev/null
+++ b/costa/native/external/lapack/cgesvd.f
@@ -0,0 +1,3618 @@
+      SUBROUTINE CGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT,
+     $                   WORK, LWORK, RWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBU, JOBVT
+      INTEGER            INFO, LDA, LDU, LDVT, LWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      REAL               RWORK( * ), S( * )
+      COMPLEX            A( LDA, * ), U( LDU, * ), VT( LDVT, * ),
+     $                   WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CGESVD computes the singular value decomposition (SVD) of a complex
+*  M-by-N matrix A, optionally computing the left and/or right singular
+*  vectors. The SVD is written
+*
+*       A = U * SIGMA * conjugate-transpose(V)
+*
+*  where SIGMA is an M-by-N matrix which is zero except for its
+*  min(m,n) diagonal elements, U is an M-by-M unitary matrix, and
+*  V is an N-by-N unitary matrix.  The diagonal elements of SIGMA
+*  are the singular values of A; they are real and non-negative, and
+*  are returned in descending order.  The first min(m,n) columns of
+*  U and V are the left and right singular vectors of A.
+*
+*  Note that the routine returns V**H, not V.
+*
+*  Arguments
+*  =========
+*
+*  JOBU    (input) CHARACTER*1
+*          Specifies options for computing all or part of the matrix U:
+*          = 'A':  all M columns of U are returned in array U:
+*          = 'S':  the first min(m,n) columns of U (the left singular
+*                  vectors) are returned in the array U;
+*          = 'O':  the first min(m,n) columns of U (the left singular
+*                  vectors) are overwritten on the array A;
+*          = 'N':  no columns of U (no left singular vectors) are
+*                  computed.
+*
+*  JOBVT   (input) CHARACTER*1
+*          Specifies options for computing all or part of the matrix
+*          V**H:
+*          = 'A':  all N rows of V**H are returned in the array VT;
+*          = 'S':  the first min(m,n) rows of V**H (the right singular
+*                  vectors) are returned in the array VT;
+*          = 'O':  the first min(m,n) rows of V**H (the right singular
+*                  vectors) are overwritten on the array A;
+*          = 'N':  no rows of V**H (no right singular vectors) are
+*                  computed.
+*
+*          JOBVT and JOBU cannot both be 'O'.
+*
+*  M       (input) INTEGER
+*          The number of rows of the input matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the input matrix A.  N >= 0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,N)
+*          On entry, the M-by-N matrix A.
+*          On exit,
+*          if JOBU = 'O',  A is overwritten with the first min(m,n)
+*                          columns of U (the left singular vectors,
+*                          stored columnwise);
+*          if JOBVT = 'O', A is overwritten with the first min(m,n)
+*                          rows of V**H (the right singular vectors,
+*                          stored rowwise);
+*          if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A
+*                          are destroyed.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  S       (output) REAL array, dimension (min(M,N))
+*          The singular values of A, sorted so that S(i) >= S(i+1).
+*
+*  U       (output) COMPLEX array, dimension (LDU,UCOL)
+*          (LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'.
+*          If JOBU = 'A', U contains the M-by-M unitary matrix U;
+*          if JOBU = 'S', U contains the first min(m,n) columns of U
+*          (the left singular vectors, stored columnwise);
+*          if JOBU = 'N' or 'O', U is not referenced.
+*
+*  LDU     (input) INTEGER
+*          The leading dimension of the array U.  LDU >= 1; if
+*          JOBU = 'S' or 'A', LDU >= M.
+*
+*  VT      (output) COMPLEX array, dimension (LDVT,N)
+*          If JOBVT = 'A', VT contains the N-by-N unitary matrix
+*          V**H;
+*          if JOBVT = 'S', VT contains the first min(m,n) rows of
+*          V**H (the right singular vectors, stored rowwise);
+*          if JOBVT = 'N' or 'O', VT is not referenced.
+*
+*  LDVT    (input) INTEGER
+*          The leading dimension of the array VT.  LDVT >= 1; if
+*          JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N).
+*
+*  WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK. LWORK >= 1.
+*          LWORK >=  2*MIN(M,N)+MAX(M,N).
+*          For good performance, LWORK should generally be larger.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  RWORK   (workspace) REAL array, dimension (5*min(M,N))
+*          On exit, if INFO > 0, RWORK(1:MIN(M,N)-1) contains the
+*          unconverged superdiagonal elements of an upper bidiagonal
+*          matrix B whose diagonal is in S (not necessarily sorted).
+*          B satisfies A = U * B * VT, so it has the same singular
+*          values as A, and singular vectors related by U and VT.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          > 0:  if CBDSQR did not converge, INFO specifies how many
+*                superdiagonals of an intermediate bidiagonal form B
+*                did not converge to zero. See the description of RWORK
+*                above for details.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX            CZERO, CONE
+      PARAMETER          ( CZERO = ( 0.0E0, 0.0E0 ),
+     $                   CONE = ( 1.0E0, 0.0E0 ) )
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E0, ONE = 1.0E0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY, WNTUA, WNTUAS, WNTUN, WNTUO, WNTUS,
+     $                   WNTVA, WNTVAS, WNTVN, WNTVO, WNTVS
+      INTEGER            BLK, CHUNK, I, IE, IERR, IR, IRWORK, ISCL,
+     $                   ITAU, ITAUP, ITAUQ, IU, IWORK, LDWRKR, LDWRKU,
+     $                   MAXWRK, MINMN, MINWRK, MNTHR, NCU, NCVT, NRU,
+     $                   NRVT, WRKBL
+      REAL               ANRM, BIGNUM, EPS, SMLNUM
+*     ..
+*     .. Local Arrays ..
+      REAL               DUM( 1 )
+      COMPLEX            CDUM( 1 )
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CBDSQR, CGEBRD, CGELQF, CGEMM, CGEQRF, CLACPY,
+     $                   CLASCL, CLASET, CUNGBR, CUNGLQ, CUNGQR, CUNMBR,
+     $                   SLASCL, XERBLA
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      REAL               CLANGE, SLAMCH
+      EXTERNAL           LSAME, ILAENV, CLANGE, SLAMCH
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      MINMN = MIN( M, N )
+      MNTHR = ILAENV( 6, 'CGESVD', JOBU // JOBVT, M, N, 0, 0 )
+      WNTUA = LSAME( JOBU, 'A' )
+      WNTUS = LSAME( JOBU, 'S' )
+      WNTUAS = WNTUA .OR. WNTUS
+      WNTUO = LSAME( JOBU, 'O' )
+      WNTUN = LSAME( JOBU, 'N' )
+      WNTVA = LSAME( JOBVT, 'A' )
+      WNTVS = LSAME( JOBVT, 'S' )
+      WNTVAS = WNTVA .OR. WNTVS
+      WNTVO = LSAME( JOBVT, 'O' )
+      WNTVN = LSAME( JOBVT, 'N' )
+      MINWRK = 1
+      LQUERY = ( LWORK.EQ.-1 )
+*
+      IF( .NOT.( WNTUA .OR. WNTUS .OR. WNTUO .OR. WNTUN ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( WNTVA .OR. WNTVS .OR. WNTVO .OR. WNTVN ) .OR.
+     $         ( WNTVO .AND. WNTUO ) ) THEN
+         INFO = -2
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -6
+      ELSE IF( LDU.LT.1 .OR. ( WNTUAS .AND. LDU.LT.M ) ) THEN
+         INFO = -9
+      ELSE IF( LDVT.LT.1 .OR. ( WNTVA .AND. LDVT.LT.N ) .OR.
+     $         ( WNTVS .AND. LDVT.LT.MINMN ) ) THEN
+         INFO = -11
+      END IF
+*
+*     Compute workspace
+*      (Note: Comments in the code beginning "Workspace:" describe the
+*       minimal amount of workspace needed at that point in the code,
+*       as well as the preferred amount for good performance.
+*       CWorkspace refers to complex workspace, and RWorkspace to
+*       real workspace. NB refers to the optimal block size for the
+*       immediately following subroutine, as returned by ILAENV.)
+*
+      IF( INFO.EQ.0 .AND. ( LWORK.GE.1 .OR. LQUERY ) .AND. M.GT.0 .AND.
+     $   N.GT.0 ) THEN
+         IF( M.GE.N ) THEN
+*
+*           Space needed for CBDSQR is BDSPAC = 5*N
+*
+            IF( M.GE.MNTHR ) THEN
+               IF( WNTUN ) THEN
+*
+*                 Path 1 (M much larger than N, JOBU='N')
+*
+                  MAXWRK = N + N*ILAENV( 1, 'CGEQRF', ' ', M, N, -1,
+     $                     -1 )
+                  MAXWRK = MAX( MAXWRK, 2*N+2*N*
+     $                     ILAENV( 1, 'CGEBRD', ' ', N, N, -1, -1 ) )
+                  IF( WNTVO .OR. WNTVAS )
+     $               MAXWRK = MAX( MAXWRK, 2*N+( N-1 )*
+     $                        ILAENV( 1, 'CUNGBR', 'P', N, N, N, -1 ) )
+                  MINWRK = 3*N
+                  MAXWRK = MAX( MINWRK, MAXWRK )
+               ELSE IF( WNTUO .AND. WNTVN ) THEN
+*
+*                 Path 2 (M much larger than N, JOBU='O', JOBVT='N')
+*
+                  WRKBL = N + N*ILAENV( 1, 'CGEQRF', ' ', M, N, -1, -1 )
+                  WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'CUNGQR', ' ', M,
+     $                    N, N, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*N+2*N*
+     $                    ILAENV( 1, 'CGEBRD', ' ', N, N, -1, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*N+N*
+     $                    ILAENV( 1, 'CUNGBR', 'Q', N, N, N, -1 ) )
+                  MAXWRK = MAX( N*N+WRKBL, N*N+M*N )
+                  MINWRK = 2*N + M
+                  MAXWRK = MAX( MINWRK, MAXWRK )
+               ELSE IF( WNTUO .AND. WNTVAS ) THEN
+*
+*                 Path 3 (M much larger than N, JOBU='O', JOBVT='S' or
+*                 'A')
+*
+                  WRKBL = N + N*ILAENV( 1, 'CGEQRF', ' ', M, N, -1, -1 )
+                  WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'CUNGQR', ' ', M,
+     $                    N, N, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*N+2*N*
+     $                    ILAENV( 1, 'CGEBRD', ' ', N, N, -1, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*N+N*
+     $                    ILAENV( 1, 'CUNGBR', 'Q', N, N, N, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*N+( N-1 )*
+     $                    ILAENV( 1, 'CUNGBR', 'P', N, N, N, -1 ) )
+                  MAXWRK = MAX( N*N+WRKBL, N*N+M*N )
+                  MINWRK = 2*N + M
+                  MAXWRK = MAX( MINWRK, MAXWRK )
+               ELSE IF( WNTUS .AND. WNTVN ) THEN
+*
+*                 Path 4 (M much larger than N, JOBU='S', JOBVT='N')
+*
+                  WRKBL = N + N*ILAENV( 1, 'CGEQRF', ' ', M, N, -1, -1 )
+                  WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'CUNGQR', ' ', M,
+     $                    N, N, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*N+2*N*
+     $                    ILAENV( 1, 'CGEBRD', ' ', N, N, -1, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*N+N*
+     $                    ILAENV( 1, 'CUNGBR', 'Q', N, N, N, -1 ) )
+                  MAXWRK = N*N + WRKBL
+                  MINWRK = 2*N + M
+                  MAXWRK = MAX( MINWRK, MAXWRK )
+               ELSE IF( WNTUS .AND. WNTVO ) THEN
+*
+*                 Path 5 (M much larger than N, JOBU='S', JOBVT='O')
+*
+                  WRKBL = N + N*ILAENV( 1, 'CGEQRF', ' ', M, N, -1, -1 )
+                  WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'CUNGQR', ' ', M,
+     $                    N, N, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*N+2*N*
+     $                    ILAENV( 1, 'CGEBRD', ' ', N, N, -1, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*N+N*
+     $                    ILAENV( 1, 'CUNGBR', 'Q', N, N, N, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*N+( N-1 )*
+     $                    ILAENV( 1, 'CUNGBR', 'P', N, N, N, -1 ) )
+                  MAXWRK = 2*N*N + WRKBL
+                  MINWRK = 2*N + M
+                  MAXWRK = MAX( MINWRK, MAXWRK )
+               ELSE IF( WNTUS .AND. WNTVAS ) THEN
+*
+*                 Path 6 (M much larger than N, JOBU='S', JOBVT='S' or
+*                 'A')
+*
+                  WRKBL = N + N*ILAENV( 1, 'CGEQRF', ' ', M, N, -1, -1 )
+                  WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'CUNGQR', ' ', M,
+     $                    N, N, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*N+2*N*
+     $                    ILAENV( 1, 'CGEBRD', ' ', N, N, -1, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*N+N*
+     $                    ILAENV( 1, 'CUNGBR', 'Q', N, N, N, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*N+( N-1 )*
+     $                    ILAENV( 1, 'CUNGBR', 'P', N, N, N, -1 ) )
+                  MAXWRK = N*N + WRKBL
+                  MINWRK = 2*N + M
+                  MAXWRK = MAX( MINWRK, MAXWRK )
+               ELSE IF( WNTUA .AND. WNTVN ) THEN
+*
+*                 Path 7 (M much larger than N, JOBU='A', JOBVT='N')
+*
+                  WRKBL = N + N*ILAENV( 1, 'CGEQRF', ' ', M, N, -1, -1 )
+                  WRKBL = MAX( WRKBL, N+M*ILAENV( 1, 'CUNGQR', ' ', M,
+     $                    M, N, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*N+2*N*
+     $                    ILAENV( 1, 'CGEBRD', ' ', N, N, -1, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*N+N*
+     $                    ILAENV( 1, 'CUNGBR', 'Q', N, N, N, -1 ) )
+                  MAXWRK = N*N + WRKBL
+                  MINWRK = 2*N + M
+                  MAXWRK = MAX( MINWRK, MAXWRK )
+               ELSE IF( WNTUA .AND. WNTVO ) THEN
+*
+*                 Path 8 (M much larger than N, JOBU='A', JOBVT='O')
+*
+                  WRKBL = N + N*ILAENV( 1, 'CGEQRF', ' ', M, N, -1, -1 )
+                  WRKBL = MAX( WRKBL, N+M*ILAENV( 1, 'CUNGQR', ' ', M,
+     $                    M, N, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*N+2*N*
+     $                    ILAENV( 1, 'CGEBRD', ' ', N, N, -1, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*N+N*
+     $                    ILAENV( 1, 'CUNGBR', 'Q', N, N, N, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*N+( N-1 )*
+     $                    ILAENV( 1, 'CUNGBR', 'P', N, N, N, -1 ) )
+                  MAXWRK = 2*N*N + WRKBL
+                  MINWRK = 2*N + M
+                  MAXWRK = MAX( MINWRK, MAXWRK )
+               ELSE IF( WNTUA .AND. WNTVAS ) THEN
+*
+*                 Path 9 (M much larger than N, JOBU='A', JOBVT='S' or
+*                 'A')
+*
+                  WRKBL = N + N*ILAENV( 1, 'CGEQRF', ' ', M, N, -1, -1 )
+                  WRKBL = MAX( WRKBL, N+M*ILAENV( 1, 'CUNGQR', ' ', M,
+     $                    M, N, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*N+2*N*
+     $                    ILAENV( 1, 'CGEBRD', ' ', N, N, -1, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*N+N*
+     $                    ILAENV( 1, 'CUNGBR', 'Q', N, N, N, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*N+( N-1 )*
+     $                    ILAENV( 1, 'CUNGBR', 'P', N, N, N, -1 ) )
+                  MAXWRK = N*N + WRKBL
+                  MINWRK = 2*N + M
+                  MAXWRK = MAX( MINWRK, MAXWRK )
+               END IF
+            ELSE
+*
+*              Path 10 (M at least N, but not much larger)
+*
+               MAXWRK = 2*N + ( M+N )*ILAENV( 1, 'CGEBRD', ' ', M, N,
+     $                  -1, -1 )
+               IF( WNTUS .OR. WNTUO )
+     $            MAXWRK = MAX( MAXWRK, 2*N+N*
+     $                     ILAENV( 1, 'CUNGBR', 'Q', M, N, N, -1 ) )
+               IF( WNTUA )
+     $            MAXWRK = MAX( MAXWRK, 2*N+M*
+     $                     ILAENV( 1, 'CUNGBR', 'Q', M, M, N, -1 ) )
+               IF( .NOT.WNTVN )
+     $            MAXWRK = MAX( MAXWRK, 2*N+( N-1 )*
+     $                     ILAENV( 1, 'CUNGBR', 'P', N, N, N, -1 ) )
+               MINWRK = 2*N + M
+               MAXWRK = MAX( MINWRK, MAXWRK )
+            END IF
+         ELSE
+*
+*           Space needed for CBDSQR is BDSPAC = 5*M
+*
+            IF( N.GE.MNTHR ) THEN
+               IF( WNTVN ) THEN
+*
+*                 Path 1t(N much larger than M, JOBVT='N')
+*
+                  MAXWRK = M + M*ILAENV( 1, 'CGELQF', ' ', M, N, -1,
+     $                     -1 )
+                  MAXWRK = MAX( MAXWRK, 2*M+2*M*
+     $                     ILAENV( 1, 'CGEBRD', ' ', M, M, -1, -1 ) )
+                  IF( WNTUO .OR. WNTUAS )
+     $               MAXWRK = MAX( MAXWRK, 2*M+M*
+     $                        ILAENV( 1, 'CUNGBR', 'Q', M, M, M, -1 ) )
+                  MINWRK = 3*M
+                  MAXWRK = MAX( MINWRK, MAXWRK )
+               ELSE IF( WNTVO .AND. WNTUN ) THEN
+*
+*                 Path 2t(N much larger than M, JOBU='N', JOBVT='O')
+*
+                  WRKBL = M + M*ILAENV( 1, 'CGELQF', ' ', M, N, -1, -1 )
+                  WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'CUNGLQ', ' ', M,
+     $                    N, M, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*M+2*M*
+     $                    ILAENV( 1, 'CGEBRD', ' ', M, M, -1, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*M+( M-1 )*
+     $                    ILAENV( 1, 'CUNGBR', 'P', M, M, M, -1 ) )
+                  MAXWRK = MAX( M*M+WRKBL, M*M+M*N )
+                  MINWRK = 2*M + N
+                  MAXWRK = MAX( MINWRK, MAXWRK )
+               ELSE IF( WNTVO .AND. WNTUAS ) THEN
+*
+*                 Path 3t(N much larger than M, JOBU='S' or 'A',
+*                 JOBVT='O')
+*
+                  WRKBL = M + M*ILAENV( 1, 'CGELQF', ' ', M, N, -1, -1 )
+                  WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'CUNGLQ', ' ', M,
+     $                    N, M, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*M+2*M*
+     $                    ILAENV( 1, 'CGEBRD', ' ', M, M, -1, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*M+( M-1 )*
+     $                    ILAENV( 1, 'CUNGBR', 'P', M, M, M, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*M+M*
+     $                    ILAENV( 1, 'CUNGBR', 'Q', M, M, M, -1 ) )
+                  MAXWRK = MAX( M*M+WRKBL, M*M+M*N )
+                  MINWRK = 2*M + N
+                  MAXWRK = MAX( MINWRK, MAXWRK )
+               ELSE IF( WNTVS .AND. WNTUN ) THEN
+*
+*                 Path 4t(N much larger than M, JOBU='N', JOBVT='S')
+*
+                  WRKBL = M + M*ILAENV( 1, 'CGELQF', ' ', M, N, -1, -1 )
+                  WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'CUNGLQ', ' ', M,
+     $                    N, M, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*M+2*M*
+     $                    ILAENV( 1, 'CGEBRD', ' ', M, M, -1, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*M+( M-1 )*
+     $                    ILAENV( 1, 'CUNGBR', 'P', M, M, M, -1 ) )
+                  MAXWRK = M*M + WRKBL
+                  MINWRK = 2*M + N
+                  MAXWRK = MAX( MINWRK, MAXWRK )
+               ELSE IF( WNTVS .AND. WNTUO ) THEN
+*
+*                 Path 5t(N much larger than M, JOBU='O', JOBVT='S')
+*
+                  WRKBL = M + M*ILAENV( 1, 'CGELQF', ' ', M, N, -1, -1 )
+                  WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'CUNGLQ', ' ', M,
+     $                    N, M, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*M+2*M*
+     $                    ILAENV( 1, 'CGEBRD', ' ', M, M, -1, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*M+( M-1 )*
+     $                    ILAENV( 1, 'CUNGBR', 'P', M, M, M, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*M+M*
+     $                    ILAENV( 1, 'CUNGBR', 'Q', M, M, M, -1 ) )
+                  MAXWRK = 2*M*M + WRKBL
+                  MINWRK = 2*M + N
+                  MAXWRK = MAX( MINWRK, MAXWRK )
+               ELSE IF( WNTVS .AND. WNTUAS ) THEN
+*
+*                 Path 6t(N much larger than M, JOBU='S' or 'A',
+*                 JOBVT='S')
+*
+                  WRKBL = M + M*ILAENV( 1, 'CGELQF', ' ', M, N, -1, -1 )
+                  WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'CUNGLQ', ' ', M,
+     $                    N, M, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*M+2*M*
+     $                    ILAENV( 1, 'CGEBRD', ' ', M, M, -1, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*M+( M-1 )*
+     $                    ILAENV( 1, 'CUNGBR', 'P', M, M, M, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*M+M*
+     $                    ILAENV( 1, 'CUNGBR', 'Q', M, M, M, -1 ) )
+                  MAXWRK = M*M + WRKBL
+                  MINWRK = 2*M + N
+                  MAXWRK = MAX( MINWRK, MAXWRK )
+               ELSE IF( WNTVA .AND. WNTUN ) THEN
+*
+*                 Path 7t(N much larger than M, JOBU='N', JOBVT='A')
+*
+                  WRKBL = M + M*ILAENV( 1, 'CGELQF', ' ', M, N, -1, -1 )
+                  WRKBL = MAX( WRKBL, M+N*ILAENV( 1, 'CUNGLQ', ' ', N,
+     $                    N, M, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*M+2*M*
+     $                    ILAENV( 1, 'CGEBRD', ' ', M, M, -1, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*M+( M-1 )*
+     $                    ILAENV( 1, 'CUNGBR', 'P', M, M, M, -1 ) )
+                  MAXWRK = M*M + WRKBL
+                  MINWRK = 2*M + N
+                  MAXWRK = MAX( MINWRK, MAXWRK )
+               ELSE IF( WNTVA .AND. WNTUO ) THEN
+*
+*                 Path 8t(N much larger than M, JOBU='O', JOBVT='A')
+*
+                  WRKBL = M + M*ILAENV( 1, 'CGELQF', ' ', M, N, -1, -1 )
+                  WRKBL = MAX( WRKBL, M+N*ILAENV( 1, 'CUNGLQ', ' ', N,
+     $                    N, M, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*M+2*M*
+     $                    ILAENV( 1, 'CGEBRD', ' ', M, M, -1, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*M+( M-1 )*
+     $                    ILAENV( 1, 'CUNGBR', 'P', M, M, M, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*M+M*
+     $                    ILAENV( 1, 'CUNGBR', 'Q', M, M, M, -1 ) )
+                  MAXWRK = 2*M*M + WRKBL
+                  MINWRK = 2*M + N
+                  MAXWRK = MAX( MINWRK, MAXWRK )
+               ELSE IF( WNTVA .AND. WNTUAS ) THEN
+*
+*                 Path 9t(N much larger than M, JOBU='S' or 'A',
+*                 JOBVT='A')
+*
+                  WRKBL = M + M*ILAENV( 1, 'CGELQF', ' ', M, N, -1, -1 )
+                  WRKBL = MAX( WRKBL, M+N*ILAENV( 1, 'CUNGLQ', ' ', N,
+     $                    N, M, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*M+2*M*
+     $                    ILAENV( 1, 'CGEBRD', ' ', M, M, -1, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*M+( M-1 )*
+     $                    ILAENV( 1, 'CUNGBR', 'P', M, M, M, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*M+M*
+     $                    ILAENV( 1, 'CUNGBR', 'Q', M, M, M, -1 ) )
+                  MAXWRK = M*M + WRKBL
+                  MINWRK = 2*M + N
+                  MAXWRK = MAX( MINWRK, MAXWRK )
+               END IF
+            ELSE
+*
+*              Path 10t(N greater than M, but not much larger)
+*
+               MAXWRK = 2*M + ( M+N )*ILAENV( 1, 'CGEBRD', ' ', M, N,
+     $                  -1, -1 )
+               IF( WNTVS .OR. WNTVO )
+     $            MAXWRK = MAX( MAXWRK, 2*M+M*
+     $                     ILAENV( 1, 'CUNGBR', 'P', M, N, M, -1 ) )
+               IF( WNTVA )
+     $            MAXWRK = MAX( MAXWRK, 2*M+N*
+     $                     ILAENV( 1, 'CUNGBR', 'P', N, N, M, -1 ) )
+               IF( .NOT.WNTUN )
+     $            MAXWRK = MAX( MAXWRK, 2*M+( M-1 )*
+     $                     ILAENV( 1, 'CUNGBR', 'Q', M, M, M, -1 ) )
+               MINWRK = 2*M + N
+               MAXWRK = MAX( MINWRK, MAXWRK )
+            END IF
+         END IF
+         WORK( 1 ) = MAXWRK
+      END IF
+*
+      IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN
+         INFO = -13
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CGESVD', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 ) THEN
+         IF( LWORK.GE.1 )
+     $      WORK( 1 ) = ONE
+         RETURN
+      END IF
+*
+*     Get machine constants
+*
+      EPS = SLAMCH( 'P' )
+      SMLNUM = SQRT( SLAMCH( 'S' ) ) / EPS
+      BIGNUM = ONE / SMLNUM
+*
+*     Scale A if max element outside range [SMLNUM,BIGNUM]
+*
+      ANRM = CLANGE( 'M', M, N, A, LDA, DUM )
+      ISCL = 0
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
+         ISCL = 1
+         CALL CLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, IERR )
+      ELSE IF( ANRM.GT.BIGNUM ) THEN
+         ISCL = 1
+         CALL CLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, IERR )
+      END IF
+*
+      IF( M.GE.N ) THEN
+*
+*        A has at least as many rows as columns. If A has sufficiently
+*        more rows than columns, first reduce using the QR
+*        decomposition (if sufficient workspace available)
+*
+         IF( M.GE.MNTHR ) THEN
+*
+            IF( WNTUN ) THEN
+*
+*              Path 1 (M much larger than N, JOBU='N')
+*              No left singular vectors to be computed
+*
+               ITAU = 1
+               IWORK = ITAU + N
+*
+*              Compute A=Q*R
+*              (CWorkspace: need 2*N, prefer N+N*NB)
+*              (RWorkspace: need 0)
+*
+               CALL CGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ),
+     $                      LWORK-IWORK+1, IERR )
+*
+*              Zero out below R
+*
+               CALL CLASET( 'L', N-1, N-1, CZERO, CZERO, A( 2, 1 ),
+     $                      LDA )
+               IE = 1
+               ITAUQ = 1
+               ITAUP = ITAUQ + N
+               IWORK = ITAUP + N
+*
+*              Bidiagonalize R in A
+*              (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
+*              (RWorkspace: need N)
+*
+               CALL CGEBRD( N, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ),
+     $                      WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
+     $                      IERR )
+               NCVT = 0
+               IF( WNTVO .OR. WNTVAS ) THEN
+*
+*                 If right singular vectors desired, generate P'.
+*                 (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
+*                 (RWorkspace: 0)
+*
+                  CALL CUNGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+                  NCVT = N
+               END IF
+               IRWORK = IE + N
+*
+*              Perform bidiagonal QR iteration, computing right
+*              singular vectors of A in A if desired
+*              (CWorkspace: 0)
+*              (RWorkspace: need BDSPAC)
+*
+               CALL CBDSQR( 'U', N, NCVT, 0, 0, S, RWORK( IE ), A, LDA,
+     $                      CDUM, 1, CDUM, 1, RWORK( IRWORK ), INFO )
+*
+*              If right singular vectors desired in VT, copy them there
+*
+               IF( WNTVAS )
+     $            CALL CLACPY( 'F', N, N, A, LDA, VT, LDVT )
+*
+            ELSE IF( WNTUO .AND. WNTVN ) THEN
+*
+*              Path 2 (M much larger than N, JOBU='O', JOBVT='N')
+*              N left singular vectors to be overwritten on A and
+*              no right singular vectors to be computed
+*
+               IF( LWORK.GE.N*N+3*N ) THEN
+*
+*                 Sufficient workspace for a fast algorithm
+*
+                  IR = 1
+                  IF( LWORK.GE.MAX( WRKBL, LDA*N )+LDA*N ) THEN
+*
+*                    WORK(IU) is LDA by N, WORK(IR) is LDA by N
+*
+                     LDWRKU = LDA
+                     LDWRKR = LDA
+                  ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N )+N*N ) THEN
+*
+*                    WORK(IU) is LDA by N, WORK(IR) is N by N
+*
+                     LDWRKU = LDA
+                     LDWRKR = N
+                  ELSE
+*
+*                    WORK(IU) is LDWRKU by N, WORK(IR) is N by N
+*
+                     LDWRKU = ( LWORK-N*N ) / N
+                     LDWRKR = N
+                  END IF
+                  ITAU = IR + LDWRKR*N
+                  IWORK = ITAU + N
+*
+*                 Compute A=Q*R
+*                 (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
+*                 (RWorkspace: 0)
+*
+                  CALL CGEQRF( M, N, A, LDA, WORK( ITAU ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                 Copy R to WORK(IR) and zero out below it
+*
+                  CALL CLACPY( 'U', N, N, A, LDA, WORK( IR ), LDWRKR )
+                  CALL CLASET( 'L', N-1, N-1, CZERO, CZERO,
+     $                         WORK( IR+1 ), LDWRKR )
+*
+*                 Generate Q in A
+*                 (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
+*                 (RWorkspace: 0)
+*
+                  CALL CUNGQR( M, N, N, A, LDA, WORK( ITAU ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+                  IE = 1
+                  ITAUQ = ITAU
+                  ITAUP = ITAUQ + N
+                  IWORK = ITAUP + N
+*
+*                 Bidiagonalize R in WORK(IR)
+*                 (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
+*                 (RWorkspace: need N)
+*
+                  CALL CGEBRD( N, N, WORK( IR ), LDWRKR, S, RWORK( IE ),
+     $                         WORK( ITAUQ ), WORK( ITAUP ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                 Generate left vectors bidiagonalizing R
+*                 (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
+*                 (RWorkspace: need 0)
+*
+                  CALL CUNGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
+     $                         WORK( ITAUQ ), WORK( IWORK ),
+     $                         LWORK-IWORK+1, IERR )
+                  IRWORK = IE + N
+*
+*                 Perform bidiagonal QR iteration, computing left
+*                 singular vectors of R in WORK(IR)
+*                 (CWorkspace: need N*N)
+*                 (RWorkspace: need BDSPAC)
+*
+                  CALL CBDSQR( 'U', N, 0, N, 0, S, RWORK( IE ), CDUM, 1,
+     $                         WORK( IR ), LDWRKR, CDUM, 1,
+     $                         RWORK( IRWORK ), INFO )
+                  IU = ITAUQ
+*
+*                 Multiply Q in A by left singular vectors of R in
+*                 WORK(IR), storing result in WORK(IU) and copying to A
+*                 (CWorkspace: need N*N+N, prefer N*N+M*N)
+*                 (RWorkspace: 0)
+*
+                  DO 10 I = 1, M, LDWRKU
+                     CHUNK = MIN( M-I+1, LDWRKU )
+                     CALL CGEMM( 'N', 'N', CHUNK, N, N, CONE, A( I, 1 ),
+     $                           LDA, WORK( IR ), LDWRKR, CZERO,
+     $                           WORK( IU ), LDWRKU )
+                     CALL CLACPY( 'F', CHUNK, N, WORK( IU ), LDWRKU,
+     $                            A( I, 1 ), LDA )
+   10             CONTINUE
+*
+               ELSE
+*
+*                 Insufficient workspace for a fast algorithm
+*
+                  IE = 1
+                  ITAUQ = 1
+                  ITAUP = ITAUQ + N
+                  IWORK = ITAUP + N
+*
+*                 Bidiagonalize A
+*                 (CWorkspace: need 2*N+M, prefer 2*N+(M+N)*NB)
+*                 (RWorkspace: N)
+*
+                  CALL CGEBRD( M, N, A, LDA, S, RWORK( IE ),
+     $                         WORK( ITAUQ ), WORK( ITAUP ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                 Generate left vectors bidiagonalizing A
+*                 (CWorkspace: need 3*N, prefer 2*N+N*NB)
+*                 (RWorkspace: 0)
+*
+                  CALL CUNGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+                  IRWORK = IE + N
+*
+*                 Perform bidiagonal QR iteration, computing left
+*                 singular vectors of A in A
+*                 (CWorkspace: need 0)
+*                 (RWorkspace: need BDSPAC)
+*
+                  CALL CBDSQR( 'U', N, 0, M, 0, S, RWORK( IE ), CDUM, 1,
+     $                         A, LDA, CDUM, 1, RWORK( IRWORK ), INFO )
+*
+               END IF
+*
+            ELSE IF( WNTUO .AND. WNTVAS ) THEN
+*
+*              Path 3 (M much larger than N, JOBU='O', JOBVT='S' or 'A')
+*              N left singular vectors to be overwritten on A and
+*              N right singular vectors to be computed in VT
+*
+               IF( LWORK.GE.N*N+3*N ) THEN
+*
+*                 Sufficient workspace for a fast algorithm
+*
+                  IR = 1
+                  IF( LWORK.GE.MAX( WRKBL, LDA*N )+LDA*N ) THEN
+*
+*                    WORK(IU) is LDA by N and WORK(IR) is LDA by N
+*
+                     LDWRKU = LDA
+                     LDWRKR = LDA
+                  ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N )+N*N ) THEN
+*
+*                    WORK(IU) is LDA by N and WORK(IR) is N by N
+*
+                     LDWRKU = LDA
+                     LDWRKR = N
+                  ELSE
+*
+*                    WORK(IU) is LDWRKU by N and WORK(IR) is N by N
+*
+                     LDWRKU = ( LWORK-N*N ) / N
+                     LDWRKR = N
+                  END IF
+                  ITAU = IR + LDWRKR*N
+                  IWORK = ITAU + N
+*
+*                 Compute A=Q*R
+*                 (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
+*                 (RWorkspace: 0)
+*
+                  CALL CGEQRF( M, N, A, LDA, WORK( ITAU ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                 Copy R to VT, zeroing out below it
+*
+                  CALL CLACPY( 'U', N, N, A, LDA, VT, LDVT )
+                  CALL CLASET( 'L', N-1, N-1, CZERO, CZERO, VT( 2, 1 ),
+     $                         LDVT )
+*
+*                 Generate Q in A
+*                 (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
+*                 (RWorkspace: 0)
+*
+                  CALL CUNGQR( M, N, N, A, LDA, WORK( ITAU ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+                  IE = 1
+                  ITAUQ = ITAU
+                  ITAUP = ITAUQ + N
+                  IWORK = ITAUP + N
+*
+*                 Bidiagonalize R in VT, copying result to WORK(IR)
+*                 (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
+*                 (RWorkspace: need N)
+*
+                  CALL CGEBRD( N, N, VT, LDVT, S, RWORK( IE ),
+     $                         WORK( ITAUQ ), WORK( ITAUP ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+                  CALL CLACPY( 'L', N, N, VT, LDVT, WORK( IR ), LDWRKR )
+*
+*                 Generate left vectors bidiagonalizing R in WORK(IR)
+*                 (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
+*                 (RWorkspace: 0)
+*
+                  CALL CUNGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
+     $                         WORK( ITAUQ ), WORK( IWORK ),
+     $                         LWORK-IWORK+1, IERR )
+*
+*                 Generate right vectors bidiagonalizing R in VT
+*                 (CWorkspace: need N*N+3*N-1, prefer N*N+2*N+(N-1)*NB)
+*                 (RWorkspace: 0)
+*
+                  CALL CUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+                  IRWORK = IE + N
+*
+*                 Perform bidiagonal QR iteration, computing left
+*                 singular vectors of R in WORK(IR) and computing right
+*                 singular vectors of R in VT
+*                 (CWorkspace: need N*N)
+*                 (RWorkspace: need BDSPAC)
+*
+                  CALL CBDSQR( 'U', N, N, N, 0, S, RWORK( IE ), VT,
+     $                         LDVT, WORK( IR ), LDWRKR, CDUM, 1,
+     $                         RWORK( IRWORK ), INFO )
+                  IU = ITAUQ
+*
+*                 Multiply Q in A by left singular vectors of R in
+*                 WORK(IR), storing result in WORK(IU) and copying to A
+*                 (CWorkspace: need N*N+N, prefer N*N+M*N)
+*                 (RWorkspace: 0)
+*
+                  DO 20 I = 1, M, LDWRKU
+                     CHUNK = MIN( M-I+1, LDWRKU )
+                     CALL CGEMM( 'N', 'N', CHUNK, N, N, CONE, A( I, 1 ),
+     $                           LDA, WORK( IR ), LDWRKR, CZERO,
+     $                           WORK( IU ), LDWRKU )
+                     CALL CLACPY( 'F', CHUNK, N, WORK( IU ), LDWRKU,
+     $                            A( I, 1 ), LDA )
+   20             CONTINUE
+*
+               ELSE
+*
+*                 Insufficient workspace for a fast algorithm
+*
+                  ITAU = 1
+                  IWORK = ITAU + N
+*
+*                 Compute A=Q*R
+*                 (CWorkspace: need 2*N, prefer N+N*NB)
+*                 (RWorkspace: 0)
+*
+                  CALL CGEQRF( M, N, A, LDA, WORK( ITAU ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                 Copy R to VT, zeroing out below it
+*
+                  CALL CLACPY( 'U', N, N, A, LDA, VT, LDVT )
+                  CALL CLASET( 'L', N-1, N-1, CZERO, CZERO, VT( 2, 1 ),
+     $                         LDVT )
+*
+*                 Generate Q in A
+*                 (CWorkspace: need 2*N, prefer N+N*NB)
+*                 (RWorkspace: 0)
+*
+                  CALL CUNGQR( M, N, N, A, LDA, WORK( ITAU ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+                  IE = 1
+                  ITAUQ = ITAU
+                  ITAUP = ITAUQ + N
+                  IWORK = ITAUP + N
+*
+*                 Bidiagonalize R in VT
+*                 (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
+*                 (RWorkspace: N)
+*
+                  CALL CGEBRD( N, N, VT, LDVT, S, RWORK( IE ),
+     $                         WORK( ITAUQ ), WORK( ITAUP ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                 Multiply Q in A by left vectors bidiagonalizing R
+*                 (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
+*                 (RWorkspace: 0)
+*
+                  CALL CUNMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT,
+     $                         WORK( ITAUQ ), A, LDA, WORK( IWORK ),
+     $                         LWORK-IWORK+1, IERR )
+*
+*                 Generate right vectors bidiagonalizing R in VT
+*                 (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
+*                 (RWorkspace: 0)
+*
+                  CALL CUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+                  IRWORK = IE + N
+*
+*                 Perform bidiagonal QR iteration, computing left
+*                 singular vectors of A in A and computing right
+*                 singular vectors of A in VT
+*                 (CWorkspace: 0)
+*                 (RWorkspace: need BDSPAC)
+*
+                  CALL CBDSQR( 'U', N, N, M, 0, S, RWORK( IE ), VT,
+     $                         LDVT, A, LDA, CDUM, 1, RWORK( IRWORK ),
+     $                         INFO )
+*
+               END IF
+*
+            ELSE IF( WNTUS ) THEN
+*
+               IF( WNTVN ) THEN
+*
+*                 Path 4 (M much larger than N, JOBU='S', JOBVT='N')
+*                 N left singular vectors to be computed in U and
+*                 no right singular vectors to be computed
+*
+                  IF( LWORK.GE.N*N+3*N ) THEN
+*
+*                    Sufficient workspace for a fast algorithm
+*
+                     IR = 1
+                     IF( LWORK.GE.WRKBL+LDA*N ) THEN
+*
+*                       WORK(IR) is LDA by N
+*
+                        LDWRKR = LDA
+                     ELSE
+*
+*                       WORK(IR) is N by N
+*
+                        LDWRKR = N
+                     END IF
+                     ITAU = IR + LDWRKR*N
+                     IWORK = ITAU + N
+*
+*                    Compute A=Q*R
+*                    (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL CGEQRF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Copy R to WORK(IR), zeroing out below it
+*
+                     CALL CLACPY( 'U', N, N, A, LDA, WORK( IR ),
+     $                            LDWRKR )
+                     CALL CLASET( 'L', N-1, N-1, CZERO, CZERO,
+     $                            WORK( IR+1 ), LDWRKR )
+*
+*                    Generate Q in A
+*                    (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL CUNGQR( M, N, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IE = 1
+                     ITAUQ = ITAU
+                     ITAUP = ITAUQ + N
+                     IWORK = ITAUP + N
+*
+*                    Bidiagonalize R in WORK(IR)
+*                    (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
+*                    (RWorkspace: need N)
+*
+                     CALL CGEBRD( N, N, WORK( IR ), LDWRKR, S,
+     $                            RWORK( IE ), WORK( ITAUQ ),
+     $                            WORK( ITAUP ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+*
+*                    Generate left vectors bidiagonalizing R in WORK(IR)
+*                    (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL CUNGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
+     $                            WORK( ITAUQ ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+                     IRWORK = IE + N
+*
+*                    Perform bidiagonal QR iteration, computing left
+*                    singular vectors of R in WORK(IR)
+*                    (CWorkspace: need N*N)
+*                    (RWorkspace: need BDSPAC)
+*
+                     CALL CBDSQR( 'U', N, 0, N, 0, S, RWORK( IE ), CDUM,
+     $                            1, WORK( IR ), LDWRKR, CDUM, 1,
+     $                            RWORK( IRWORK ), INFO )
+*
+*                    Multiply Q in A by left singular vectors of R in
+*                    WORK(IR), storing result in U
+*                    (CWorkspace: need N*N)
+*                    (RWorkspace: 0)
+*
+                     CALL CGEMM( 'N', 'N', M, N, N, CONE, A, LDA,
+     $                           WORK( IR ), LDWRKR, CZERO, U, LDU )
+*
+                  ELSE
+*
+*                    Insufficient workspace for a fast algorithm
+*
+                     ITAU = 1
+                     IWORK = ITAU + N
+*
+*                    Compute A=Q*R, copying result to U
+*                    (CWorkspace: need 2*N, prefer N+N*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL CGEQRF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     CALL CLACPY( 'L', M, N, A, LDA, U, LDU )
+*
+*                    Generate Q in U
+*                    (CWorkspace: need 2*N, prefer N+N*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL CUNGQR( M, N, N, U, LDU, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IE = 1
+                     ITAUQ = ITAU
+                     ITAUP = ITAUQ + N
+                     IWORK = ITAUP + N
+*
+*                    Zero out below R in A
+*
+                     CALL CLASET( 'L', N-1, N-1, CZERO, CZERO,
+     $                            A( 2, 1 ), LDA )
+*
+*                    Bidiagonalize R in A
+*                    (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
+*                    (RWorkspace: need N)
+*
+                     CALL CGEBRD( N, N, A, LDA, S, RWORK( IE ),
+     $                            WORK( ITAUQ ), WORK( ITAUP ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Multiply Q in U by left vectors bidiagonalizing R
+*                    (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL CUNMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
+     $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+                     IRWORK = IE + N
+*
+*                    Perform bidiagonal QR iteration, computing left
+*                    singular vectors of A in U
+*                    (CWorkspace: 0)
+*                    (RWorkspace: need BDSPAC)
+*
+                     CALL CBDSQR( 'U', N, 0, M, 0, S, RWORK( IE ), CDUM,
+     $                            1, U, LDU, CDUM, 1, RWORK( IRWORK ),
+     $                            INFO )
+*
+                  END IF
+*
+               ELSE IF( WNTVO ) THEN
+*
+*                 Path 5 (M much larger than N, JOBU='S', JOBVT='O')
+*                 N left singular vectors to be computed in U and
+*                 N right singular vectors to be overwritten on A
+*
+                  IF( LWORK.GE.2*N*N+3*N ) THEN
+*
+*                    Sufficient workspace for a fast algorithm
+*
+                     IU = 1
+                     IF( LWORK.GE.WRKBL+2*LDA*N ) THEN
+*
+*                       WORK(IU) is LDA by N and WORK(IR) is LDA by N
+*
+                        LDWRKU = LDA
+                        IR = IU + LDWRKU*N
+                        LDWRKR = LDA
+                     ELSE IF( LWORK.GE.WRKBL+( LDA+N )*N ) THEN
+*
+*                       WORK(IU) is LDA by N and WORK(IR) is N by N
+*
+                        LDWRKU = LDA
+                        IR = IU + LDWRKU*N
+                        LDWRKR = N
+                     ELSE
+*
+*                       WORK(IU) is N by N and WORK(IR) is N by N
+*
+                        LDWRKU = N
+                        IR = IU + LDWRKU*N
+                        LDWRKR = N
+                     END IF
+                     ITAU = IR + LDWRKR*N
+                     IWORK = ITAU + N
+*
+*                    Compute A=Q*R
+*                    (CWorkspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL CGEQRF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Copy R to WORK(IU), zeroing out below it
+*
+                     CALL CLACPY( 'U', N, N, A, LDA, WORK( IU ),
+     $                            LDWRKU )
+                     CALL CLASET( 'L', N-1, N-1, CZERO, CZERO,
+     $                            WORK( IU+1 ), LDWRKU )
+*
+*                    Generate Q in A
+*                    (CWorkspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL CUNGQR( M, N, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IE = 1
+                     ITAUQ = ITAU
+                     ITAUP = ITAUQ + N
+                     IWORK = ITAUP + N
+*
+*                    Bidiagonalize R in WORK(IU), copying result to
+*                    WORK(IR)
+*                    (CWorkspace: need   2*N*N+3*N,
+*                                 prefer 2*N*N+2*N+2*N*NB)
+*                    (RWorkspace: need   N)
+*
+                     CALL CGEBRD( N, N, WORK( IU ), LDWRKU, S,
+     $                            RWORK( IE ), WORK( ITAUQ ),
+     $                            WORK( ITAUP ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+                     CALL CLACPY( 'U', N, N, WORK( IU ), LDWRKU,
+     $                            WORK( IR ), LDWRKR )
+*
+*                    Generate left bidiagonalizing vectors in WORK(IU)
+*                    (CWorkspace: need 2*N*N+3*N, prefer 2*N*N+2*N+N*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL CUNGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
+     $                            WORK( ITAUQ ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+*
+*                    Generate right bidiagonalizing vectors in WORK(IR)
+*                    (CWorkspace: need   2*N*N+3*N-1,
+*                                 prefer 2*N*N+2*N+(N-1)*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL CUNGBR( 'P', N, N, N, WORK( IR ), LDWRKR,
+     $                            WORK( ITAUP ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+                     IRWORK = IE + N
+*
+*                    Perform bidiagonal QR iteration, computing left
+*                    singular vectors of R in WORK(IU) and computing
+*                    right singular vectors of R in WORK(IR)
+*                    (CWorkspace: need 2*N*N)
+*                    (RWorkspace: need BDSPAC)
+*
+                     CALL CBDSQR( 'U', N, N, N, 0, S, RWORK( IE ),
+     $                            WORK( IR ), LDWRKR, WORK( IU ),
+     $                            LDWRKU, CDUM, 1, RWORK( IRWORK ),
+     $                            INFO )
+*
+*                    Multiply Q in A by left singular vectors of R in
+*                    WORK(IU), storing result in U
+*                    (CWorkspace: need N*N)
+*                    (RWorkspace: 0)
+*
+                     CALL CGEMM( 'N', 'N', M, N, N, CONE, A, LDA,
+     $                           WORK( IU ), LDWRKU, CZERO, U, LDU )
+*
+*                    Copy right singular vectors of R to A
+*                    (CWorkspace: need N*N)
+*                    (RWorkspace: 0)
+*
+                     CALL CLACPY( 'F', N, N, WORK( IR ), LDWRKR, A,
+     $                            LDA )
+*
+                  ELSE
+*
+*                    Insufficient workspace for a fast algorithm
+*
+                     ITAU = 1
+                     IWORK = ITAU + N
+*
+*                    Compute A=Q*R, copying result to U
+*                    (CWorkspace: need 2*N, prefer N+N*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL CGEQRF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     CALL CLACPY( 'L', M, N, A, LDA, U, LDU )
+*
+*                    Generate Q in U
+*                    (CWorkspace: need 2*N, prefer N+N*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL CUNGQR( M, N, N, U, LDU, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IE = 1
+                     ITAUQ = ITAU
+                     ITAUP = ITAUQ + N
+                     IWORK = ITAUP + N
+*
+*                    Zero out below R in A
+*
+                     CALL CLASET( 'L', N-1, N-1, CZERO, CZERO,
+     $                            A( 2, 1 ), LDA )
+*
+*                    Bidiagonalize R in A
+*                    (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
+*                    (RWorkspace: need N)
+*
+                     CALL CGEBRD( N, N, A, LDA, S, RWORK( IE ),
+     $                            WORK( ITAUQ ), WORK( ITAUP ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Multiply Q in U by left vectors bidiagonalizing R
+*                    (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL CUNMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
+     $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+*
+*                    Generate right vectors bidiagonalizing R in A
+*                    (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL CUNGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IRWORK = IE + N
+*
+*                    Perform bidiagonal QR iteration, computing left
+*                    singular vectors of A in U and computing right
+*                    singular vectors of A in A
+*                    (CWorkspace: 0)
+*                    (RWorkspace: need BDSPAC)
+*
+                     CALL CBDSQR( 'U', N, N, M, 0, S, RWORK( IE ), A,
+     $                            LDA, U, LDU, CDUM, 1, RWORK( IRWORK ),
+     $                            INFO )
+*
+                  END IF
+*
+               ELSE IF( WNTVAS ) THEN
+*
+*                 Path 6 (M much larger than N, JOBU='S', JOBVT='S'
+*                         or 'A')
+*                 N left singular vectors to be computed in U and
+*                 N right singular vectors to be computed in VT
+*
+                  IF( LWORK.GE.N*N+3*N ) THEN
+*
+*                    Sufficient workspace for a fast algorithm
+*
+                     IU = 1
+                     IF( LWORK.GE.WRKBL+LDA*N ) THEN
+*
+*                       WORK(IU) is LDA by N
+*
+                        LDWRKU = LDA
+                     ELSE
+*
+*                       WORK(IU) is N by N
+*
+                        LDWRKU = N
+                     END IF
+                     ITAU = IU + LDWRKU*N
+                     IWORK = ITAU + N
+*
+*                    Compute A=Q*R
+*                    (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL CGEQRF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Copy R to WORK(IU), zeroing out below it
+*
+                     CALL CLACPY( 'U', N, N, A, LDA, WORK( IU ),
+     $                            LDWRKU )
+                     CALL CLASET( 'L', N-1, N-1, CZERO, CZERO,
+     $                            WORK( IU+1 ), LDWRKU )
+*
+*                    Generate Q in A
+*                    (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL CUNGQR( M, N, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IE = 1
+                     ITAUQ = ITAU
+                     ITAUP = ITAUQ + N
+                     IWORK = ITAUP + N
+*
+*                    Bidiagonalize R in WORK(IU), copying result to VT
+*                    (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
+*                    (RWorkspace: need N)
+*
+                     CALL CGEBRD( N, N, WORK( IU ), LDWRKU, S,
+     $                            RWORK( IE ), WORK( ITAUQ ),
+     $                            WORK( ITAUP ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+                     CALL CLACPY( 'U', N, N, WORK( IU ), LDWRKU, VT,
+     $                            LDVT )
+*
+*                    Generate left bidiagonalizing vectors in WORK(IU)
+*                    (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL CUNGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
+     $                            WORK( ITAUQ ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+*
+*                    Generate right bidiagonalizing vectors in VT
+*                    (CWorkspace: need   N*N+3*N-1,
+*                                 prefer N*N+2*N+(N-1)*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL CUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IRWORK = IE + N
+*
+*                    Perform bidiagonal QR iteration, computing left
+*                    singular vectors of R in WORK(IU) and computing
+*                    right singular vectors of R in VT
+*                    (CWorkspace: need N*N)
+*                    (RWorkspace: need BDSPAC)
+*
+                     CALL CBDSQR( 'U', N, N, N, 0, S, RWORK( IE ), VT,
+     $                            LDVT, WORK( IU ), LDWRKU, CDUM, 1,
+     $                            RWORK( IRWORK ), INFO )
+*
+*                    Multiply Q in A by left singular vectors of R in
+*                    WORK(IU), storing result in U
+*                    (CWorkspace: need N*N)
+*                    (RWorkspace: 0)
+*
+                     CALL CGEMM( 'N', 'N', M, N, N, CONE, A, LDA,
+     $                           WORK( IU ), LDWRKU, CZERO, U, LDU )
+*
+                  ELSE
+*
+*                    Insufficient workspace for a fast algorithm
+*
+                     ITAU = 1
+                     IWORK = ITAU + N
+*
+*                    Compute A=Q*R, copying result to U
+*                    (CWorkspace: need 2*N, prefer N+N*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL CGEQRF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     CALL CLACPY( 'L', M, N, A, LDA, U, LDU )
+*
+*                    Generate Q in U
+*                    (CWorkspace: need 2*N, prefer N+N*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL CUNGQR( M, N, N, U, LDU, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Copy R to VT, zeroing out below it
+*
+                     CALL CLACPY( 'U', N, N, A, LDA, VT, LDVT )
+                     CALL CLASET( 'L', N-1, N-1, CZERO, CZERO,
+     $                            VT( 2, 1 ), LDVT )
+                     IE = 1
+                     ITAUQ = ITAU
+                     ITAUP = ITAUQ + N
+                     IWORK = ITAUP + N
+*
+*                    Bidiagonalize R in VT
+*                    (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
+*                    (RWorkspace: need N)
+*
+                     CALL CGEBRD( N, N, VT, LDVT, S, RWORK( IE ),
+     $                            WORK( ITAUQ ), WORK( ITAUP ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Multiply Q in U by left bidiagonalizing vectors
+*                    in VT
+*                    (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL CUNMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT,
+     $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+*
+*                    Generate right bidiagonalizing vectors in VT
+*                    (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL CUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IRWORK = IE + N
+*
+*                    Perform bidiagonal QR iteration, computing left
+*                    singular vectors of A in U and computing right
+*                    singular vectors of A in VT
+*                    (CWorkspace: 0)
+*                    (RWorkspace: need BDSPAC)
+*
+                     CALL CBDSQR( 'U', N, N, M, 0, S, RWORK( IE ), VT,
+     $                            LDVT, U, LDU, CDUM, 1,
+     $                            RWORK( IRWORK ), INFO )
+*
+                  END IF
+*
+               END IF
+*
+            ELSE IF( WNTUA ) THEN
+*
+               IF( WNTVN ) THEN
+*
+*                 Path 7 (M much larger than N, JOBU='A', JOBVT='N')
+*                 M left singular vectors to be computed in U and
+*                 no right singular vectors to be computed
+*
+                  IF( LWORK.GE.N*N+MAX( N+M, 3*N ) ) THEN
+*
+*                    Sufficient workspace for a fast algorithm
+*
+                     IR = 1
+                     IF( LWORK.GE.WRKBL+LDA*N ) THEN
+*
+*                       WORK(IR) is LDA by N
+*
+                        LDWRKR = LDA
+                     ELSE
+*
+*                       WORK(IR) is N by N
+*
+                        LDWRKR = N
+                     END IF
+                     ITAU = IR + LDWRKR*N
+                     IWORK = ITAU + N
+*
+*                    Compute A=Q*R, copying result to U
+*                    (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL CGEQRF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     CALL CLACPY( 'L', M, N, A, LDA, U, LDU )
+*
+*                    Copy R to WORK(IR), zeroing out below it
+*
+                     CALL CLACPY( 'U', N, N, A, LDA, WORK( IR ),
+     $                            LDWRKR )
+                     CALL CLASET( 'L', N-1, N-1, CZERO, CZERO,
+     $                            WORK( IR+1 ), LDWRKR )
+*
+*                    Generate Q in U
+*                    (CWorkspace: need N*N+N+M, prefer N*N+N+M*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL CUNGQR( M, M, N, U, LDU, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IE = 1
+                     ITAUQ = ITAU
+                     ITAUP = ITAUQ + N
+                     IWORK = ITAUP + N
+*
+*                    Bidiagonalize R in WORK(IR)
+*                    (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
+*                    (RWorkspace: need N)
+*
+                     CALL CGEBRD( N, N, WORK( IR ), LDWRKR, S,
+     $                            RWORK( IE ), WORK( ITAUQ ),
+     $                            WORK( ITAUP ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+*
+*                    Generate left bidiagonalizing vectors in WORK(IR)
+*                    (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL CUNGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
+     $                            WORK( ITAUQ ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+                     IRWORK = IE + N
+*
+*                    Perform bidiagonal QR iteration, computing left
+*                    singular vectors of R in WORK(IR)
+*                    (CWorkspace: need N*N)
+*                    (RWorkspace: need BDSPAC)
+*
+                     CALL CBDSQR( 'U', N, 0, N, 0, S, RWORK( IE ), CDUM,
+     $                            1, WORK( IR ), LDWRKR, CDUM, 1,
+     $                            RWORK( IRWORK ), INFO )
+*
+*                    Multiply Q in U by left singular vectors of R in
+*                    WORK(IR), storing result in A
+*                    (CWorkspace: need N*N)
+*                    (RWorkspace: 0)
+*
+                     CALL CGEMM( 'N', 'N', M, N, N, CONE, U, LDU,
+     $                           WORK( IR ), LDWRKR, CZERO, A, LDA )
+*
+*                    Copy left singular vectors of A from A to U
+*
+                     CALL CLACPY( 'F', M, N, A, LDA, U, LDU )
+*
+                  ELSE
+*
+*                    Insufficient workspace for a fast algorithm
+*
+                     ITAU = 1
+                     IWORK = ITAU + N
+*
+*                    Compute A=Q*R, copying result to U
+*                    (CWorkspace: need 2*N, prefer N+N*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL CGEQRF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     CALL CLACPY( 'L', M, N, A, LDA, U, LDU )
+*
+*                    Generate Q in U
+*                    (CWorkspace: need N+M, prefer N+M*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL CUNGQR( M, M, N, U, LDU, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IE = 1
+                     ITAUQ = ITAU
+                     ITAUP = ITAUQ + N
+                     IWORK = ITAUP + N
+*
+*                    Zero out below R in A
+*
+                     CALL CLASET( 'L', N-1, N-1, CZERO, CZERO,
+     $                            A( 2, 1 ), LDA )
+*
+*                    Bidiagonalize R in A
+*                    (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
+*                    (RWorkspace: need N)
+*
+                     CALL CGEBRD( N, N, A, LDA, S, RWORK( IE ),
+     $                            WORK( ITAUQ ), WORK( ITAUP ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Multiply Q in U by left bidiagonalizing vectors
+*                    in A
+*                    (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL CUNMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
+     $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+                     IRWORK = IE + N
+*
+*                    Perform bidiagonal QR iteration, computing left
+*                    singular vectors of A in U
+*                    (CWorkspace: 0)
+*                    (RWorkspace: need BDSPAC)
+*
+                     CALL CBDSQR( 'U', N, 0, M, 0, S, RWORK( IE ), CDUM,
+     $                            1, U, LDU, CDUM, 1, RWORK( IRWORK ),
+     $                            INFO )
+*
+                  END IF
+*
+               ELSE IF( WNTVO ) THEN
+*
+*                 Path 8 (M much larger than N, JOBU='A', JOBVT='O')
+*                 M left singular vectors to be computed in U and
+*                 N right singular vectors to be overwritten on A
+*
+                  IF( LWORK.GE.2*N*N+MAX( N+M, 3*N ) ) THEN
+*
+*                    Sufficient workspace for a fast algorithm
+*
+                     IU = 1
+                     IF( LWORK.GE.WRKBL+2*LDA*N ) THEN
+*
+*                       WORK(IU) is LDA by N and WORK(IR) is LDA by N
+*
+                        LDWRKU = LDA
+                        IR = IU + LDWRKU*N
+                        LDWRKR = LDA
+                     ELSE IF( LWORK.GE.WRKBL+( LDA+N )*N ) THEN
+*
+*                       WORK(IU) is LDA by N and WORK(IR) is N by N
+*
+                        LDWRKU = LDA
+                        IR = IU + LDWRKU*N
+                        LDWRKR = N
+                     ELSE
+*
+*                       WORK(IU) is N by N and WORK(IR) is N by N
+*
+                        LDWRKU = N
+                        IR = IU + LDWRKU*N
+                        LDWRKR = N
+                     END IF
+                     ITAU = IR + LDWRKR*N
+                     IWORK = ITAU + N
+*
+*                    Compute A=Q*R, copying result to U
+*                    (CWorkspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL CGEQRF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     CALL CLACPY( 'L', M, N, A, LDA, U, LDU )
+*
+*                    Generate Q in U
+*                    (CWorkspace: need 2*N*N+N+M, prefer 2*N*N+N+M*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL CUNGQR( M, M, N, U, LDU, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Copy R to WORK(IU), zeroing out below it
+*
+                     CALL CLACPY( 'U', N, N, A, LDA, WORK( IU ),
+     $                            LDWRKU )
+                     CALL CLASET( 'L', N-1, N-1, CZERO, CZERO,
+     $                            WORK( IU+1 ), LDWRKU )
+                     IE = 1
+                     ITAUQ = ITAU
+                     ITAUP = ITAUQ + N
+                     IWORK = ITAUP + N
+*
+*                    Bidiagonalize R in WORK(IU), copying result to
+*                    WORK(IR)
+*                    (CWorkspace: need   2*N*N+3*N,
+*                                 prefer 2*N*N+2*N+2*N*NB)
+*                    (RWorkspace: need   N)
+*
+                     CALL CGEBRD( N, N, WORK( IU ), LDWRKU, S,
+     $                            RWORK( IE ), WORK( ITAUQ ),
+     $                            WORK( ITAUP ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+                     CALL CLACPY( 'U', N, N, WORK( IU ), LDWRKU,
+     $                            WORK( IR ), LDWRKR )
+*
+*                    Generate left bidiagonalizing vectors in WORK(IU)
+*                    (CWorkspace: need 2*N*N+3*N, prefer 2*N*N+2*N+N*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL CUNGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
+     $                            WORK( ITAUQ ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+*
+*                    Generate right bidiagonalizing vectors in WORK(IR)
+*                    (CWorkspace: need   2*N*N+3*N-1,
+*                                 prefer 2*N*N+2*N+(N-1)*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL CUNGBR( 'P', N, N, N, WORK( IR ), LDWRKR,
+     $                            WORK( ITAUP ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+                     IRWORK = IE + N
+*
+*                    Perform bidiagonal QR iteration, computing left
+*                    singular vectors of R in WORK(IU) and computing
+*                    right singular vectors of R in WORK(IR)
+*                    (CWorkspace: need 2*N*N)
+*                    (RWorkspace: need BDSPAC)
+*
+                     CALL CBDSQR( 'U', N, N, N, 0, S, RWORK( IE ),
+     $                            WORK( IR ), LDWRKR, WORK( IU ),
+     $                            LDWRKU, CDUM, 1, RWORK( IRWORK ),
+     $                            INFO )
+*
+*                    Multiply Q in U by left singular vectors of R in
+*                    WORK(IU), storing result in A
+*                    (CWorkspace: need N*N)
+*                    (RWorkspace: 0)
+*
+                     CALL CGEMM( 'N', 'N', M, N, N, CONE, U, LDU,
+     $                           WORK( IU ), LDWRKU, CZERO, A, LDA )
+*
+*                    Copy left singular vectors of A from A to U
+*
+                     CALL CLACPY( 'F', M, N, A, LDA, U, LDU )
+*
+*                    Copy right singular vectors of R from WORK(IR) to A
+*
+                     CALL CLACPY( 'F', N, N, WORK( IR ), LDWRKR, A,
+     $                            LDA )
+*
+                  ELSE
+*
+*                    Insufficient workspace for a fast algorithm
+*
+                     ITAU = 1
+                     IWORK = ITAU + N
+*
+*                    Compute A=Q*R, copying result to U
+*                    (CWorkspace: need 2*N, prefer N+N*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL CGEQRF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     CALL CLACPY( 'L', M, N, A, LDA, U, LDU )
+*
+*                    Generate Q in U
+*                    (CWorkspace: need N+M, prefer N+M*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL CUNGQR( M, M, N, U, LDU, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IE = 1
+                     ITAUQ = ITAU
+                     ITAUP = ITAUQ + N
+                     IWORK = ITAUP + N
+*
+*                    Zero out below R in A
+*
+                     CALL CLASET( 'L', N-1, N-1, CZERO, CZERO,
+     $                            A( 2, 1 ), LDA )
+*
+*                    Bidiagonalize R in A
+*                    (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
+*                    (RWorkspace: need N)
+*
+                     CALL CGEBRD( N, N, A, LDA, S, RWORK( IE ),
+     $                            WORK( ITAUQ ), WORK( ITAUP ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Multiply Q in U by left bidiagonalizing vectors
+*                    in A
+*                    (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL CUNMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
+     $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+*
+*                    Generate right bidiagonalizing vectors in A
+*                    (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL CUNGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IRWORK = IE + N
+*
+*                    Perform bidiagonal QR iteration, computing left
+*                    singular vectors of A in U and computing right
+*                    singular vectors of A in A
+*                    (CWorkspace: 0)
+*                    (RWorkspace: need BDSPAC)
+*
+                     CALL CBDSQR( 'U', N, N, M, 0, S, RWORK( IE ), A,
+     $                            LDA, U, LDU, CDUM, 1, RWORK( IRWORK ),
+     $                            INFO )
+*
+                  END IF
+*
+               ELSE IF( WNTVAS ) THEN
+*
+*                 Path 9 (M much larger than N, JOBU='A', JOBVT='S'
+*                         or 'A')
+*                 M left singular vectors to be computed in U and
+*                 N right singular vectors to be computed in VT
+*
+                  IF( LWORK.GE.N*N+MAX( N+M, 3*N ) ) THEN
+*
+*                    Sufficient workspace for a fast algorithm
+*
+                     IU = 1
+                     IF( LWORK.GE.WRKBL+LDA*N ) THEN
+*
+*                       WORK(IU) is LDA by N
+*
+                        LDWRKU = LDA
+                     ELSE
+*
+*                       WORK(IU) is N by N
+*
+                        LDWRKU = N
+                     END IF
+                     ITAU = IU + LDWRKU*N
+                     IWORK = ITAU + N
+*
+*                    Compute A=Q*R, copying result to U
+*                    (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL CGEQRF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     CALL CLACPY( 'L', M, N, A, LDA, U, LDU )
+*
+*                    Generate Q in U
+*                    (CWorkspace: need N*N+N+M, prefer N*N+N+M*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL CUNGQR( M, M, N, U, LDU, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Copy R to WORK(IU), zeroing out below it
+*
+                     CALL CLACPY( 'U', N, N, A, LDA, WORK( IU ),
+     $                            LDWRKU )
+                     CALL CLASET( 'L', N-1, N-1, CZERO, CZERO,
+     $                            WORK( IU+1 ), LDWRKU )
+                     IE = 1
+                     ITAUQ = ITAU
+                     ITAUP = ITAUQ + N
+                     IWORK = ITAUP + N
+*
+*                    Bidiagonalize R in WORK(IU), copying result to VT
+*                    (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
+*                    (RWorkspace: need N)
+*
+                     CALL CGEBRD( N, N, WORK( IU ), LDWRKU, S,
+     $                            RWORK( IE ), WORK( ITAUQ ),
+     $                            WORK( ITAUP ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+                     CALL CLACPY( 'U', N, N, WORK( IU ), LDWRKU, VT,
+     $                            LDVT )
+*
+*                    Generate left bidiagonalizing vectors in WORK(IU)
+*                    (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL CUNGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
+     $                            WORK( ITAUQ ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+*
+*                    Generate right bidiagonalizing vectors in VT
+*                    (CWorkspace: need   N*N+3*N-1,
+*                                 prefer N*N+2*N+(N-1)*NB)
+*                    (RWorkspace: need   0)
+*
+                     CALL CUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IRWORK = IE + N
+*
+*                    Perform bidiagonal QR iteration, computing left
+*                    singular vectors of R in WORK(IU) and computing
+*                    right singular vectors of R in VT
+*                    (CWorkspace: need N*N)
+*                    (RWorkspace: need BDSPAC)
+*
+                     CALL CBDSQR( 'U', N, N, N, 0, S, RWORK( IE ), VT,
+     $                            LDVT, WORK( IU ), LDWRKU, CDUM, 1,
+     $                            RWORK( IRWORK ), INFO )
+*
+*                    Multiply Q in U by left singular vectors of R in
+*                    WORK(IU), storing result in A
+*                    (CWorkspace: need N*N)
+*                    (RWorkspace: 0)
+*
+                     CALL CGEMM( 'N', 'N', M, N, N, CONE, U, LDU,
+     $                           WORK( IU ), LDWRKU, CZERO, A, LDA )
+*
+*                    Copy left singular vectors of A from A to U
+*
+                     CALL CLACPY( 'F', M, N, A, LDA, U, LDU )
+*
+                  ELSE
+*
+*                    Insufficient workspace for a fast algorithm
+*
+                     ITAU = 1
+                     IWORK = ITAU + N
+*
+*                    Compute A=Q*R, copying result to U
+*                    (CWorkspace: need 2*N, prefer N+N*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL CGEQRF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     CALL CLACPY( 'L', M, N, A, LDA, U, LDU )
+*
+*                    Generate Q in U
+*                    (CWorkspace: need N+M, prefer N+M*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL CUNGQR( M, M, N, U, LDU, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Copy R from A to VT, zeroing out below it
+*
+                     CALL CLACPY( 'U', N, N, A, LDA, VT, LDVT )
+                     CALL CLASET( 'L', N-1, N-1, CZERO, CZERO,
+     $                            VT( 2, 1 ), LDVT )
+                     IE = 1
+                     ITAUQ = ITAU
+                     ITAUP = ITAUQ + N
+                     IWORK = ITAUP + N
+*
+*                    Bidiagonalize R in VT
+*                    (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
+*                    (RWorkspace: need N)
+*
+                     CALL CGEBRD( N, N, VT, LDVT, S, RWORK( IE ),
+     $                            WORK( ITAUQ ), WORK( ITAUP ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Multiply Q in U by left bidiagonalizing vectors
+*                    in VT
+*                    (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL CUNMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT,
+     $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+*
+*                    Generate right bidiagonalizing vectors in VT
+*                    (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL CUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IRWORK = IE + N
+*
+*                    Perform bidiagonal QR iteration, computing left
+*                    singular vectors of A in U and computing right
+*                    singular vectors of A in VT
+*                    (CWorkspace: 0)
+*                    (RWorkspace: need BDSPAC)
+*
+                     CALL CBDSQR( 'U', N, N, M, 0, S, RWORK( IE ), VT,
+     $                            LDVT, U, LDU, CDUM, 1,
+     $                            RWORK( IRWORK ), INFO )
+*
+                  END IF
+*
+               END IF
+*
+            END IF
+*
+         ELSE
+*
+*           M .LT. MNTHR
+*
+*           Path 10 (M at least N, but not much larger)
+*           Reduce to bidiagonal form without QR decomposition
+*
+            IE = 1
+            ITAUQ = 1
+            ITAUP = ITAUQ + N
+            IWORK = ITAUP + N
+*
+*           Bidiagonalize A
+*           (CWorkspace: need 2*N+M, prefer 2*N+(M+N)*NB)
+*           (RWorkspace: need N)
+*
+            CALL CGEBRD( M, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ),
+     $                   WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
+     $                   IERR )
+            IF( WNTUAS ) THEN
+*
+*              If left singular vectors desired in U, copy result to U
+*              and generate left bidiagonalizing vectors in U
+*              (CWorkspace: need 2*N+NCU, prefer 2*N+NCU*NB)
+*              (RWorkspace: 0)
+*
+               CALL CLACPY( 'L', M, N, A, LDA, U, LDU )
+               IF( WNTUS )
+     $            NCU = N
+               IF( WNTUA )
+     $            NCU = M
+               CALL CUNGBR( 'Q', M, NCU, N, U, LDU, WORK( ITAUQ ),
+     $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
+            END IF
+            IF( WNTVAS ) THEN
+*
+*              If right singular vectors desired in VT, copy result to
+*              VT and generate right bidiagonalizing vectors in VT
+*              (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
+*              (RWorkspace: 0)
+*
+               CALL CLACPY( 'U', N, N, A, LDA, VT, LDVT )
+               CALL CUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
+     $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
+            END IF
+            IF( WNTUO ) THEN
+*
+*              If left singular vectors desired in A, generate left
+*              bidiagonalizing vectors in A
+*              (CWorkspace: need 3*N, prefer 2*N+N*NB)
+*              (RWorkspace: 0)
+*
+               CALL CUNGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ),
+     $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
+            END IF
+            IF( WNTVO ) THEN
+*
+*              If right singular vectors desired in A, generate right
+*              bidiagonalizing vectors in A
+*              (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
+*              (RWorkspace: 0)
+*
+               CALL CUNGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
+     $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
+            END IF
+            IRWORK = IE + N
+            IF( WNTUAS .OR. WNTUO )
+     $         NRU = M
+            IF( WNTUN )
+     $         NRU = 0
+            IF( WNTVAS .OR. WNTVO )
+     $         NCVT = N
+            IF( WNTVN )
+     $         NCVT = 0
+            IF( ( .NOT.WNTUO ) .AND. ( .NOT.WNTVO ) ) THEN
+*
+*              Perform bidiagonal QR iteration, if desired, computing
+*              left singular vectors in U and computing right singular
+*              vectors in VT
+*              (CWorkspace: 0)
+*              (RWorkspace: need BDSPAC)
+*
+               CALL CBDSQR( 'U', N, NCVT, NRU, 0, S, RWORK( IE ), VT,
+     $                      LDVT, U, LDU, CDUM, 1, RWORK( IRWORK ),
+     $                      INFO )
+            ELSE IF( ( .NOT.WNTUO ) .AND. WNTVO ) THEN
+*
+*              Perform bidiagonal QR iteration, if desired, computing
+*              left singular vectors in U and computing right singular
+*              vectors in A
+*              (CWorkspace: 0)
+*              (RWorkspace: need BDSPAC)
+*
+               CALL CBDSQR( 'U', N, NCVT, NRU, 0, S, RWORK( IE ), A,
+     $                      LDA, U, LDU, CDUM, 1, RWORK( IRWORK ),
+     $                      INFO )
+            ELSE
+*
+*              Perform bidiagonal QR iteration, if desired, computing
+*              left singular vectors in A and computing right singular
+*              vectors in VT
+*              (CWorkspace: 0)
+*              (RWorkspace: need BDSPAC)
+*
+               CALL CBDSQR( 'U', N, NCVT, NRU, 0, S, RWORK( IE ), VT,
+     $                      LDVT, A, LDA, CDUM, 1, RWORK( IRWORK ),
+     $                      INFO )
+            END IF
+*
+         END IF
+*
+      ELSE
+*
+*        A has more columns than rows. If A has sufficiently more
+*        columns than rows, first reduce using the LQ decomposition (if
+*        sufficient workspace available)
+*
+         IF( N.GE.MNTHR ) THEN
+*
+            IF( WNTVN ) THEN
+*
+*              Path 1t(N much larger than M, JOBVT='N')
+*              No right singular vectors to be computed
+*
+               ITAU = 1
+               IWORK = ITAU + M
+*
+*              Compute A=L*Q
+*              (CWorkspace: need 2*M, prefer M+M*NB)
+*              (RWorkspace: 0)
+*
+               CALL CGELQF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ),
+     $                      LWORK-IWORK+1, IERR )
+*
+*              Zero out above L
+*
+               CALL CLASET( 'U', M-1, M-1, CZERO, CZERO, A( 1, 2 ),
+     $                      LDA )
+               IE = 1
+               ITAUQ = 1
+               ITAUP = ITAUQ + M
+               IWORK = ITAUP + M
+*
+*              Bidiagonalize L in A
+*              (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
+*              (RWorkspace: need M)
+*
+               CALL CGEBRD( M, M, A, LDA, S, RWORK( IE ), WORK( ITAUQ ),
+     $                      WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
+     $                      IERR )
+               IF( WNTUO .OR. WNTUAS ) THEN
+*
+*                 If left singular vectors desired, generate Q
+*                 (CWorkspace: need 3*M, prefer 2*M+M*NB)
+*                 (RWorkspace: 0)
+*
+                  CALL CUNGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+               END IF
+               IRWORK = IE + M
+               NRU = 0
+               IF( WNTUO .OR. WNTUAS )
+     $            NRU = M
+*
+*              Perform bidiagonal QR iteration, computing left singular
+*              vectors of A in A if desired
+*              (CWorkspace: 0)
+*              (RWorkspace: need BDSPAC)
+*
+               CALL CBDSQR( 'U', M, 0, NRU, 0, S, RWORK( IE ), CDUM, 1,
+     $                      A, LDA, CDUM, 1, RWORK( IRWORK ), INFO )
+*
+*              If left singular vectors desired in U, copy them there
+*
+               IF( WNTUAS )
+     $            CALL CLACPY( 'F', M, M, A, LDA, U, LDU )
+*
+            ELSE IF( WNTVO .AND. WNTUN ) THEN
+*
+*              Path 2t(N much larger than M, JOBU='N', JOBVT='O')
+*              M right singular vectors to be overwritten on A and
+*              no left singular vectors to be computed
+*
+               IF( LWORK.GE.M*M+3*M ) THEN
+*
+*                 Sufficient workspace for a fast algorithm
+*
+                  IR = 1
+                  IF( LWORK.GE.MAX( WRKBL, LDA*N )+LDA*M ) THEN
+*
+*                    WORK(IU) is LDA by N and WORK(IR) is LDA by M
+*
+                     LDWRKU = LDA
+                     CHUNK = N
+                     LDWRKR = LDA
+                  ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N )+M*M ) THEN
+*
+*                    WORK(IU) is LDA by N and WORK(IR) is M by M
+*
+                     LDWRKU = LDA
+                     CHUNK = N
+                     LDWRKR = M
+                  ELSE
+*
+*                    WORK(IU) is M by CHUNK and WORK(IR) is M by M
+*
+                     LDWRKU = M
+                     CHUNK = ( LWORK-M*M ) / M
+                     LDWRKR = M
+                  END IF
+                  ITAU = IR + LDWRKR*M
+                  IWORK = ITAU + M
+*
+*                 Compute A=L*Q
+*                 (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
+*                 (RWorkspace: 0)
+*
+                  CALL CGELQF( M, N, A, LDA, WORK( ITAU ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                 Copy L to WORK(IR) and zero out above it
+*
+                  CALL CLACPY( 'L', M, M, A, LDA, WORK( IR ), LDWRKR )
+                  CALL CLASET( 'U', M-1, M-1, CZERO, CZERO,
+     $                         WORK( IR+LDWRKR ), LDWRKR )
+*
+*                 Generate Q in A
+*                 (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
+*                 (RWorkspace: 0)
+*
+                  CALL CUNGLQ( M, N, M, A, LDA, WORK( ITAU ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+                  IE = 1
+                  ITAUQ = ITAU
+                  ITAUP = ITAUQ + M
+                  IWORK = ITAUP + M
+*
+*                 Bidiagonalize L in WORK(IR)
+*                 (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
+*                 (RWorkspace: need M)
+*
+                  CALL CGEBRD( M, M, WORK( IR ), LDWRKR, S, RWORK( IE ),
+     $                         WORK( ITAUQ ), WORK( ITAUP ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                 Generate right vectors bidiagonalizing L
+*                 (CWorkspace: need M*M+3*M-1, prefer M*M+2*M+(M-1)*NB)
+*                 (RWorkspace: 0)
+*
+                  CALL CUNGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
+     $                         WORK( ITAUP ), WORK( IWORK ),
+     $                         LWORK-IWORK+1, IERR )
+                  IRWORK = IE + M
+*
+*                 Perform bidiagonal QR iteration, computing right
+*                 singular vectors of L in WORK(IR)
+*                 (CWorkspace: need M*M)
+*                 (RWorkspace: need BDSPAC)
+*
+                  CALL CBDSQR( 'U', M, M, 0, 0, S, RWORK( IE ),
+     $                         WORK( IR ), LDWRKR, CDUM, 1, CDUM, 1,
+     $                         RWORK( IRWORK ), INFO )
+                  IU = ITAUQ
+*
+*                 Multiply right singular vectors of L in WORK(IR) by Q
+*                 in A, storing result in WORK(IU) and copying to A
+*                 (CWorkspace: need M*M+M, prefer M*M+M*N)
+*                 (RWorkspace: 0)
+*
+                  DO 30 I = 1, N, CHUNK
+                     BLK = MIN( N-I+1, CHUNK )
+                     CALL CGEMM( 'N', 'N', M, BLK, M, CONE, WORK( IR ),
+     $                           LDWRKR, A( 1, I ), LDA, CZERO,
+     $                           WORK( IU ), LDWRKU )
+                     CALL CLACPY( 'F', M, BLK, WORK( IU ), LDWRKU,
+     $                            A( 1, I ), LDA )
+   30             CONTINUE
+*
+               ELSE
+*
+*                 Insufficient workspace for a fast algorithm
+*
+                  IE = 1
+                  ITAUQ = 1
+                  ITAUP = ITAUQ + M
+                  IWORK = ITAUP + M
+*
+*                 Bidiagonalize A
+*                 (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB)
+*                 (RWorkspace: need M)
+*
+                  CALL CGEBRD( M, N, A, LDA, S, RWORK( IE ),
+     $                         WORK( ITAUQ ), WORK( ITAUP ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                 Generate right vectors bidiagonalizing A
+*                 (CWorkspace: need 3*M, prefer 2*M+M*NB)
+*                 (RWorkspace: 0)
+*
+                  CALL CUNGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+                  IRWORK = IE + M
+*
+*                 Perform bidiagonal QR iteration, computing right
+*                 singular vectors of A in A
+*                 (CWorkspace: 0)
+*                 (RWorkspace: need BDSPAC)
+*
+                  CALL CBDSQR( 'L', M, N, 0, 0, S, RWORK( IE ), A, LDA,
+     $                         CDUM, 1, CDUM, 1, RWORK( IRWORK ), INFO )
+*
+               END IF
+*
+            ELSE IF( WNTVO .AND. WNTUAS ) THEN
+*
+*              Path 3t(N much larger than M, JOBU='S' or 'A', JOBVT='O')
+*              M right singular vectors to be overwritten on A and
+*              M left singular vectors to be computed in U
+*
+               IF( LWORK.GE.M*M+3*M ) THEN
+*
+*                 Sufficient workspace for a fast algorithm
+*
+                  IR = 1
+                  IF( LWORK.GE.MAX( WRKBL, LDA*N )+LDA*M ) THEN
+*
+*                    WORK(IU) is LDA by N and WORK(IR) is LDA by M
+*
+                     LDWRKU = LDA
+                     CHUNK = N
+                     LDWRKR = LDA
+                  ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N )+M*M ) THEN
+*
+*                    WORK(IU) is LDA by N and WORK(IR) is M by M
+*
+                     LDWRKU = LDA
+                     CHUNK = N
+                     LDWRKR = M
+                  ELSE
+*
+*                    WORK(IU) is M by CHUNK and WORK(IR) is M by M
+*
+                     LDWRKU = M
+                     CHUNK = ( LWORK-M*M ) / M
+                     LDWRKR = M
+                  END IF
+                  ITAU = IR + LDWRKR*M
+                  IWORK = ITAU + M
+*
+*                 Compute A=L*Q
+*                 (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
+*                 (RWorkspace: 0)
+*
+                  CALL CGELQF( M, N, A, LDA, WORK( ITAU ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                 Copy L to U, zeroing about above it
+*
+                  CALL CLACPY( 'L', M, M, A, LDA, U, LDU )
+                  CALL CLASET( 'U', M-1, M-1, CZERO, CZERO, U( 1, 2 ),
+     $                         LDU )
+*
+*                 Generate Q in A
+*                 (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
+*                 (RWorkspace: 0)
+*
+                  CALL CUNGLQ( M, N, M, A, LDA, WORK( ITAU ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+                  IE = 1
+                  ITAUQ = ITAU
+                  ITAUP = ITAUQ + M
+                  IWORK = ITAUP + M
+*
+*                 Bidiagonalize L in U, copying result to WORK(IR)
+*                 (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
+*                 (RWorkspace: need M)
+*
+                  CALL CGEBRD( M, M, U, LDU, S, RWORK( IE ),
+     $                         WORK( ITAUQ ), WORK( ITAUP ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+                  CALL CLACPY( 'U', M, M, U, LDU, WORK( IR ), LDWRKR )
+*
+*                 Generate right vectors bidiagonalizing L in WORK(IR)
+*                 (CWorkspace: need M*M+3*M-1, prefer M*M+2*M+(M-1)*NB)
+*                 (RWorkspace: 0)
+*
+                  CALL CUNGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
+     $                         WORK( ITAUP ), WORK( IWORK ),
+     $                         LWORK-IWORK+1, IERR )
+*
+*                 Generate left vectors bidiagonalizing L in U
+*                 (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB)
+*                 (RWorkspace: 0)
+*
+                  CALL CUNGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+                  IRWORK = IE + M
+*
+*                 Perform bidiagonal QR iteration, computing left
+*                 singular vectors of L in U, and computing right
+*                 singular vectors of L in WORK(IR)
+*                 (CWorkspace: need M*M)
+*                 (RWorkspace: need BDSPAC)
+*
+                  CALL CBDSQR( 'U', M, M, M, 0, S, RWORK( IE ),
+     $                         WORK( IR ), LDWRKR, U, LDU, CDUM, 1,
+     $                         RWORK( IRWORK ), INFO )
+                  IU = ITAUQ
+*
+*                 Multiply right singular vectors of L in WORK(IR) by Q
+*                 in A, storing result in WORK(IU) and copying to A
+*                 (CWorkspace: need M*M+M, prefer M*M+M*N))
+*                 (RWorkspace: 0)
+*
+                  DO 40 I = 1, N, CHUNK
+                     BLK = MIN( N-I+1, CHUNK )
+                     CALL CGEMM( 'N', 'N', M, BLK, M, CONE, WORK( IR ),
+     $                           LDWRKR, A( 1, I ), LDA, CZERO,
+     $                           WORK( IU ), LDWRKU )
+                     CALL CLACPY( 'F', M, BLK, WORK( IU ), LDWRKU,
+     $                            A( 1, I ), LDA )
+   40             CONTINUE
+*
+               ELSE
+*
+*                 Insufficient workspace for a fast algorithm
+*
+                  ITAU = 1
+                  IWORK = ITAU + M
+*
+*                 Compute A=L*Q
+*                 (CWorkspace: need 2*M, prefer M+M*NB)
+*                 (RWorkspace: 0)
+*
+                  CALL CGELQF( M, N, A, LDA, WORK( ITAU ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                 Copy L to U, zeroing out above it
+*
+                  CALL CLACPY( 'L', M, M, A, LDA, U, LDU )
+                  CALL CLASET( 'U', M-1, M-1, CZERO, CZERO, U( 1, 2 ),
+     $                         LDU )
+*
+*                 Generate Q in A
+*                 (CWorkspace: need 2*M, prefer M+M*NB)
+*                 (RWorkspace: 0)
+*
+                  CALL CUNGLQ( M, N, M, A, LDA, WORK( ITAU ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+                  IE = 1
+                  ITAUQ = ITAU
+                  ITAUP = ITAUQ + M
+                  IWORK = ITAUP + M
+*
+*                 Bidiagonalize L in U
+*                 (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
+*                 (RWorkspace: need M)
+*
+                  CALL CGEBRD( M, M, U, LDU, S, RWORK( IE ),
+     $                         WORK( ITAUQ ), WORK( ITAUP ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                 Multiply right vectors bidiagonalizing L by Q in A
+*                 (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
+*                 (RWorkspace: 0)
+*
+                  CALL CUNMBR( 'P', 'L', 'C', M, N, M, U, LDU,
+     $                         WORK( ITAUP ), A, LDA, WORK( IWORK ),
+     $                         LWORK-IWORK+1, IERR )
+*
+*                 Generate left vectors bidiagonalizing L in U
+*                 (CWorkspace: need 3*M, prefer 2*M+M*NB)
+*                 (RWorkspace: 0)
+*
+                  CALL CUNGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+                  IRWORK = IE + M
+*
+*                 Perform bidiagonal QR iteration, computing left
+*                 singular vectors of A in U and computing right
+*                 singular vectors of A in A
+*                 (CWorkspace: 0)
+*                 (RWorkspace: need BDSPAC)
+*
+                  CALL CBDSQR( 'U', M, N, M, 0, S, RWORK( IE ), A, LDA,
+     $                         U, LDU, CDUM, 1, RWORK( IRWORK ), INFO )
+*
+               END IF
+*
+            ELSE IF( WNTVS ) THEN
+*
+               IF( WNTUN ) THEN
+*
+*                 Path 4t(N much larger than M, JOBU='N', JOBVT='S')
+*                 M right singular vectors to be computed in VT and
+*                 no left singular vectors to be computed
+*
+                  IF( LWORK.GE.M*M+3*M ) THEN
+*
+*                    Sufficient workspace for a fast algorithm
+*
+                     IR = 1
+                     IF( LWORK.GE.WRKBL+LDA*M ) THEN
+*
+*                       WORK(IR) is LDA by M
+*
+                        LDWRKR = LDA
+                     ELSE
+*
+*                       WORK(IR) is M by M
+*
+                        LDWRKR = M
+                     END IF
+                     ITAU = IR + LDWRKR*M
+                     IWORK = ITAU + M
+*
+*                    Compute A=L*Q
+*                    (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL CGELQF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Copy L to WORK(IR), zeroing out above it
+*
+                     CALL CLACPY( 'L', M, M, A, LDA, WORK( IR ),
+     $                            LDWRKR )
+                     CALL CLASET( 'U', M-1, M-1, CZERO, CZERO,
+     $                            WORK( IR+LDWRKR ), LDWRKR )
+*
+*                    Generate Q in A
+*                    (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL CUNGLQ( M, N, M, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IE = 1
+                     ITAUQ = ITAU
+                     ITAUP = ITAUQ + M
+                     IWORK = ITAUP + M
+*
+*                    Bidiagonalize L in WORK(IR)
+*                    (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
+*                    (RWorkspace: need M)
+*
+                     CALL CGEBRD( M, M, WORK( IR ), LDWRKR, S,
+     $                            RWORK( IE ), WORK( ITAUQ ),
+     $                            WORK( ITAUP ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+*
+*                    Generate right vectors bidiagonalizing L in
+*                    WORK(IR)
+*                    (CWorkspace: need M*M+3*M, prefer M*M+2*M+(M-1)*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL CUNGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
+     $                            WORK( ITAUP ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+                     IRWORK = IE + M
+*
+*                    Perform bidiagonal QR iteration, computing right
+*                    singular vectors of L in WORK(IR)
+*                    (CWorkspace: need M*M)
+*                    (RWorkspace: need BDSPAC)
+*
+                     CALL CBDSQR( 'U', M, M, 0, 0, S, RWORK( IE ),
+     $                            WORK( IR ), LDWRKR, CDUM, 1, CDUM, 1,
+     $                            RWORK( IRWORK ), INFO )
+*
+*                    Multiply right singular vectors of L in WORK(IR) by
+*                    Q in A, storing result in VT
+*                    (CWorkspace: need M*M)
+*                    (RWorkspace: 0)
+*
+                     CALL CGEMM( 'N', 'N', M, N, M, CONE, WORK( IR ),
+     $                           LDWRKR, A, LDA, CZERO, VT, LDVT )
+*
+                  ELSE
+*
+*                    Insufficient workspace for a fast algorithm
+*
+                     ITAU = 1
+                     IWORK = ITAU + M
+*
+*                    Compute A=L*Q
+*                    (CWorkspace: need 2*M, prefer M+M*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL CGELQF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Copy result to VT
+*
+                     CALL CLACPY( 'U', M, N, A, LDA, VT, LDVT )
+*
+*                    Generate Q in VT
+*                    (CWorkspace: need 2*M, prefer M+M*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL CUNGLQ( M, N, M, VT, LDVT, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IE = 1
+                     ITAUQ = ITAU
+                     ITAUP = ITAUQ + M
+                     IWORK = ITAUP + M
+*
+*                    Zero out above L in A
+*
+                     CALL CLASET( 'U', M-1, M-1, CZERO, CZERO,
+     $                            A( 1, 2 ), LDA )
+*
+*                    Bidiagonalize L in A
+*                    (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
+*                    (RWorkspace: need M)
+*
+                     CALL CGEBRD( M, M, A, LDA, S, RWORK( IE ),
+     $                            WORK( ITAUQ ), WORK( ITAUP ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Multiply right vectors bidiagonalizing L by Q in VT
+*                    (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL CUNMBR( 'P', 'L', 'C', M, N, M, A, LDA,
+     $                            WORK( ITAUP ), VT, LDVT,
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IRWORK = IE + M
+*
+*                    Perform bidiagonal QR iteration, computing right
+*                    singular vectors of A in VT
+*                    (CWorkspace: 0)
+*                    (RWorkspace: need BDSPAC)
+*
+                     CALL CBDSQR( 'U', M, N, 0, 0, S, RWORK( IE ), VT,
+     $                            LDVT, CDUM, 1, CDUM, 1,
+     $                            RWORK( IRWORK ), INFO )
+*
+                  END IF
+*
+               ELSE IF( WNTUO ) THEN
+*
+*                 Path 5t(N much larger than M, JOBU='O', JOBVT='S')
+*                 M right singular vectors to be computed in VT and
+*                 M left singular vectors to be overwritten on A
+*
+                  IF( LWORK.GE.2*M*M+3*M ) THEN
+*
+*                    Sufficient workspace for a fast algorithm
+*
+                     IU = 1
+                     IF( LWORK.GE.WRKBL+2*LDA*M ) THEN
+*
+*                       WORK(IU) is LDA by M and WORK(IR) is LDA by M
+*
+                        LDWRKU = LDA
+                        IR = IU + LDWRKU*M
+                        LDWRKR = LDA
+                     ELSE IF( LWORK.GE.WRKBL+( LDA+M )*M ) THEN
+*
+*                       WORK(IU) is LDA by M and WORK(IR) is M by M
+*
+                        LDWRKU = LDA
+                        IR = IU + LDWRKU*M
+                        LDWRKR = M
+                     ELSE
+*
+*                       WORK(IU) is M by M and WORK(IR) is M by M
+*
+                        LDWRKU = M
+                        IR = IU + LDWRKU*M
+                        LDWRKR = M
+                     END IF
+                     ITAU = IR + LDWRKR*M
+                     IWORK = ITAU + M
+*
+*                    Compute A=L*Q
+*                    (CWorkspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL CGELQF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Copy L to WORK(IU), zeroing out below it
+*
+                     CALL CLACPY( 'L', M, M, A, LDA, WORK( IU ),
+     $                            LDWRKU )
+                     CALL CLASET( 'U', M-1, M-1, CZERO, CZERO,
+     $                            WORK( IU+LDWRKU ), LDWRKU )
+*
+*                    Generate Q in A
+*                    (CWorkspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL CUNGLQ( M, N, M, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IE = 1
+                     ITAUQ = ITAU
+                     ITAUP = ITAUQ + M
+                     IWORK = ITAUP + M
+*
+*                    Bidiagonalize L in WORK(IU), copying result to
+*                    WORK(IR)
+*                    (CWorkspace: need   2*M*M+3*M,
+*                                 prefer 2*M*M+2*M+2*M*NB)
+*                    (RWorkspace: need   M)
+*
+                     CALL CGEBRD( M, M, WORK( IU ), LDWRKU, S,
+     $                            RWORK( IE ), WORK( ITAUQ ),
+     $                            WORK( ITAUP ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+                     CALL CLACPY( 'L', M, M, WORK( IU ), LDWRKU,
+     $                            WORK( IR ), LDWRKR )
+*
+*                    Generate right bidiagonalizing vectors in WORK(IU)
+*                    (CWorkspace: need   2*M*M+3*M-1,
+*                                 prefer 2*M*M+2*M+(M-1)*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL CUNGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
+     $                            WORK( ITAUP ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+*
+*                    Generate left bidiagonalizing vectors in WORK(IR)
+*                    (CWorkspace: need 2*M*M+3*M, prefer 2*M*M+2*M+M*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL CUNGBR( 'Q', M, M, M, WORK( IR ), LDWRKR,
+     $                            WORK( ITAUQ ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+                     IRWORK = IE + M
+*
+*                    Perform bidiagonal QR iteration, computing left
+*                    singular vectors of L in WORK(IR) and computing
+*                    right singular vectors of L in WORK(IU)
+*                    (CWorkspace: need 2*M*M)
+*                    (RWorkspace: need BDSPAC)
+*
+                     CALL CBDSQR( 'U', M, M, M, 0, S, RWORK( IE ),
+     $                            WORK( IU ), LDWRKU, WORK( IR ),
+     $                            LDWRKR, CDUM, 1, RWORK( IRWORK ),
+     $                            INFO )
+*
+*                    Multiply right singular vectors of L in WORK(IU) by
+*                    Q in A, storing result in VT
+*                    (CWorkspace: need M*M)
+*                    (RWorkspace: 0)
+*
+                     CALL CGEMM( 'N', 'N', M, N, M, CONE, WORK( IU ),
+     $                           LDWRKU, A, LDA, CZERO, VT, LDVT )
+*
+*                    Copy left singular vectors of L to A
+*                    (CWorkspace: need M*M)
+*                    (RWorkspace: 0)
+*
+                     CALL CLACPY( 'F', M, M, WORK( IR ), LDWRKR, A,
+     $                            LDA )
+*
+                  ELSE
+*
+*                    Insufficient workspace for a fast algorithm
+*
+                     ITAU = 1
+                     IWORK = ITAU + M
+*
+*                    Compute A=L*Q, copying result to VT
+*                    (CWorkspace: need 2*M, prefer M+M*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL CGELQF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     CALL CLACPY( 'U', M, N, A, LDA, VT, LDVT )
+*
+*                    Generate Q in VT
+*                    (CWorkspace: need 2*M, prefer M+M*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL CUNGLQ( M, N, M, VT, LDVT, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IE = 1
+                     ITAUQ = ITAU
+                     ITAUP = ITAUQ + M
+                     IWORK = ITAUP + M
+*
+*                    Zero out above L in A
+*
+                     CALL CLASET( 'U', M-1, M-1, CZERO, CZERO,
+     $                            A( 1, 2 ), LDA )
+*
+*                    Bidiagonalize L in A
+*                    (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
+*                    (RWorkspace: need M)
+*
+                     CALL CGEBRD( M, M, A, LDA, S, RWORK( IE ),
+     $                            WORK( ITAUQ ), WORK( ITAUP ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Multiply right vectors bidiagonalizing L by Q in VT
+*                    (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL CUNMBR( 'P', 'L', 'C', M, N, M, A, LDA,
+     $                            WORK( ITAUP ), VT, LDVT,
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Generate left bidiagonalizing vectors of L in A
+*                    (CWorkspace: need 3*M, prefer 2*M+M*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL CUNGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IRWORK = IE + M
+*
+*                    Perform bidiagonal QR iteration, computing left
+*                    singular vectors of A in A and computing right
+*                    singular vectors of A in VT
+*                    (CWorkspace: 0)
+*                    (RWorkspace: need BDSPAC)
+*
+                     CALL CBDSQR( 'U', M, N, M, 0, S, RWORK( IE ), VT,
+     $                            LDVT, A, LDA, CDUM, 1,
+     $                            RWORK( IRWORK ), INFO )
+*
+                  END IF
+*
+               ELSE IF( WNTUAS ) THEN
+*
+*                 Path 6t(N much larger than M, JOBU='S' or 'A',
+*                         JOBVT='S')
+*                 M right singular vectors to be computed in VT and
+*                 M left singular vectors to be computed in U
+*
+                  IF( LWORK.GE.M*M+3*M ) THEN
+*
+*                    Sufficient workspace for a fast algorithm
+*
+                     IU = 1
+                     IF( LWORK.GE.WRKBL+LDA*M ) THEN
+*
+*                       WORK(IU) is LDA by N
+*
+                        LDWRKU = LDA
+                     ELSE
+*
+*                       WORK(IU) is LDA by M
+*
+                        LDWRKU = M
+                     END IF
+                     ITAU = IU + LDWRKU*M
+                     IWORK = ITAU + M
+*
+*                    Compute A=L*Q
+*                    (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL CGELQF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Copy L to WORK(IU), zeroing out above it
+*
+                     CALL CLACPY( 'L', M, M, A, LDA, WORK( IU ),
+     $                            LDWRKU )
+                     CALL CLASET( 'U', M-1, M-1, CZERO, CZERO,
+     $                            WORK( IU+LDWRKU ), LDWRKU )
+*
+*                    Generate Q in A
+*                    (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL CUNGLQ( M, N, M, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IE = 1
+                     ITAUQ = ITAU
+                     ITAUP = ITAUQ + M
+                     IWORK = ITAUP + M
+*
+*                    Bidiagonalize L in WORK(IU), copying result to U
+*                    (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
+*                    (RWorkspace: need M)
+*
+                     CALL CGEBRD( M, M, WORK( IU ), LDWRKU, S,
+     $                            RWORK( IE ), WORK( ITAUQ ),
+     $                            WORK( ITAUP ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+                     CALL CLACPY( 'L', M, M, WORK( IU ), LDWRKU, U,
+     $                            LDU )
+*
+*                    Generate right bidiagonalizing vectors in WORK(IU)
+*                    (CWorkspace: need   M*M+3*M-1,
+*                                 prefer M*M+2*M+(M-1)*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL CUNGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
+     $                            WORK( ITAUP ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+*
+*                    Generate left bidiagonalizing vectors in U
+*                    (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL CUNGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IRWORK = IE + M
+*
+*                    Perform bidiagonal QR iteration, computing left
+*                    singular vectors of L in U and computing right
+*                    singular vectors of L in WORK(IU)
+*                    (CWorkspace: need M*M)
+*                    (RWorkspace: need BDSPAC)
+*
+                     CALL CBDSQR( 'U', M, M, M, 0, S, RWORK( IE ),
+     $                            WORK( IU ), LDWRKU, U, LDU, CDUM, 1,
+     $                            RWORK( IRWORK ), INFO )
+*
+*                    Multiply right singular vectors of L in WORK(IU) by
+*                    Q in A, storing result in VT
+*                    (CWorkspace: need M*M)
+*                    (RWorkspace: 0)
+*
+                     CALL CGEMM( 'N', 'N', M, N, M, CONE, WORK( IU ),
+     $                           LDWRKU, A, LDA, CZERO, VT, LDVT )
+*
+                  ELSE
+*
+*                    Insufficient workspace for a fast algorithm
+*
+                     ITAU = 1
+                     IWORK = ITAU + M
+*
+*                    Compute A=L*Q, copying result to VT
+*                    (CWorkspace: need 2*M, prefer M+M*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL CGELQF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     CALL CLACPY( 'U', M, N, A, LDA, VT, LDVT )
+*
+*                    Generate Q in VT
+*                    (CWorkspace: need 2*M, prefer M+M*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL CUNGLQ( M, N, M, VT, LDVT, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Copy L to U, zeroing out above it
+*
+                     CALL CLACPY( 'L', M, M, A, LDA, U, LDU )
+                     CALL CLASET( 'U', M-1, M-1, CZERO, CZERO,
+     $                            U( 1, 2 ), LDU )
+                     IE = 1
+                     ITAUQ = ITAU
+                     ITAUP = ITAUQ + M
+                     IWORK = ITAUP + M
+*
+*                    Bidiagonalize L in U
+*                    (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
+*                    (RWorkspace: need M)
+*
+                     CALL CGEBRD( M, M, U, LDU, S, RWORK( IE ),
+     $                            WORK( ITAUQ ), WORK( ITAUP ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Multiply right bidiagonalizing vectors in U by Q
+*                    in VT
+*                    (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL CUNMBR( 'P', 'L', 'C', M, N, M, U, LDU,
+     $                            WORK( ITAUP ), VT, LDVT,
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Generate left bidiagonalizing vectors in U
+*                    (CWorkspace: need 3*M, prefer 2*M+M*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL CUNGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IRWORK = IE + M
+*
+*                    Perform bidiagonal QR iteration, computing left
+*                    singular vectors of A in U and computing right
+*                    singular vectors of A in VT
+*                    (CWorkspace: 0)
+*                    (RWorkspace: need BDSPAC)
+*
+                     CALL CBDSQR( 'U', M, N, M, 0, S, RWORK( IE ), VT,
+     $                            LDVT, U, LDU, CDUM, 1,
+     $                            RWORK( IRWORK ), INFO )
+*
+                  END IF
+*
+               END IF
+*
+            ELSE IF( WNTVA ) THEN
+*
+               IF( WNTUN ) THEN
+*
+*                 Path 7t(N much larger than M, JOBU='N', JOBVT='A')
+*                 N right singular vectors to be computed in VT and
+*                 no left singular vectors to be computed
+*
+                  IF( LWORK.GE.M*M+MAX( N+M, 3*M ) ) THEN
+*
+*                    Sufficient workspace for a fast algorithm
+*
+                     IR = 1
+                     IF( LWORK.GE.WRKBL+LDA*M ) THEN
+*
+*                       WORK(IR) is LDA by M
+*
+                        LDWRKR = LDA
+                     ELSE
+*
+*                       WORK(IR) is M by M
+*
+                        LDWRKR = M
+                     END IF
+                     ITAU = IR + LDWRKR*M
+                     IWORK = ITAU + M
+*
+*                    Compute A=L*Q, copying result to VT
+*                    (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL CGELQF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     CALL CLACPY( 'U', M, N, A, LDA, VT, LDVT )
+*
+*                    Copy L to WORK(IR), zeroing out above it
+*
+                     CALL CLACPY( 'L', M, M, A, LDA, WORK( IR ),
+     $                            LDWRKR )
+                     CALL CLASET( 'U', M-1, M-1, CZERO, CZERO,
+     $                            WORK( IR+LDWRKR ), LDWRKR )
+*
+*                    Generate Q in VT
+*                    (CWorkspace: need M*M+M+N, prefer M*M+M+N*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL CUNGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IE = 1
+                     ITAUQ = ITAU
+                     ITAUP = ITAUQ + M
+                     IWORK = ITAUP + M
+*
+*                    Bidiagonalize L in WORK(IR)
+*                    (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
+*                    (RWorkspace: need M)
+*
+                     CALL CGEBRD( M, M, WORK( IR ), LDWRKR, S,
+     $                            RWORK( IE ), WORK( ITAUQ ),
+     $                            WORK( ITAUP ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+*
+*                    Generate right bidiagonalizing vectors in WORK(IR)
+*                    (CWorkspace: need   M*M+3*M-1,
+*                                 prefer M*M+2*M+(M-1)*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL CUNGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
+     $                            WORK( ITAUP ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+                     IRWORK = IE + M
+*
+*                    Perform bidiagonal QR iteration, computing right
+*                    singular vectors of L in WORK(IR)
+*                    (CWorkspace: need M*M)
+*                    (RWorkspace: need BDSPAC)
+*
+                     CALL CBDSQR( 'U', M, M, 0, 0, S, RWORK( IE ),
+     $                            WORK( IR ), LDWRKR, CDUM, 1, CDUM, 1,
+     $                            RWORK( IRWORK ), INFO )
+*
+*                    Multiply right singular vectors of L in WORK(IR) by
+*                    Q in VT, storing result in A
+*                    (CWorkspace: need M*M)
+*                    (RWorkspace: 0)
+*
+                     CALL CGEMM( 'N', 'N', M, N, M, CONE, WORK( IR ),
+     $                           LDWRKR, VT, LDVT, CZERO, A, LDA )
+*
+*                    Copy right singular vectors of A from A to VT
+*
+                     CALL CLACPY( 'F', M, N, A, LDA, VT, LDVT )
+*
+                  ELSE
+*
+*                    Insufficient workspace for a fast algorithm
+*
+                     ITAU = 1
+                     IWORK = ITAU + M
+*
+*                    Compute A=L*Q, copying result to VT
+*                    (CWorkspace: need 2*M, prefer M+M*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL CGELQF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     CALL CLACPY( 'U', M, N, A, LDA, VT, LDVT )
+*
+*                    Generate Q in VT
+*                    (CWorkspace: need M+N, prefer M+N*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL CUNGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IE = 1
+                     ITAUQ = ITAU
+                     ITAUP = ITAUQ + M
+                     IWORK = ITAUP + M
+*
+*                    Zero out above L in A
+*
+                     CALL CLASET( 'U', M-1, M-1, CZERO, CZERO,
+     $                            A( 1, 2 ), LDA )
+*
+*                    Bidiagonalize L in A
+*                    (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
+*                    (RWorkspace: need M)
+*
+                     CALL CGEBRD( M, M, A, LDA, S, RWORK( IE ),
+     $                            WORK( ITAUQ ), WORK( ITAUP ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Multiply right bidiagonalizing vectors in A by Q
+*                    in VT
+*                    (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL CUNMBR( 'P', 'L', 'C', M, N, M, A, LDA,
+     $                            WORK( ITAUP ), VT, LDVT,
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IRWORK = IE + M
+*
+*                    Perform bidiagonal QR iteration, computing right
+*                    singular vectors of A in VT
+*                    (CWorkspace: 0)
+*                    (RWorkspace: need BDSPAC)
+*
+                     CALL CBDSQR( 'U', M, N, 0, 0, S, RWORK( IE ), VT,
+     $                            LDVT, CDUM, 1, CDUM, 1,
+     $                            RWORK( IRWORK ), INFO )
+*
+                  END IF
+*
+               ELSE IF( WNTUO ) THEN
+*
+*                 Path 8t(N much larger than M, JOBU='O', JOBVT='A')
+*                 N right singular vectors to be computed in VT and
+*                 M left singular vectors to be overwritten on A
+*
+                  IF( LWORK.GE.2*M*M+MAX( N+M, 3*M ) ) THEN
+*
+*                    Sufficient workspace for a fast algorithm
+*
+                     IU = 1
+                     IF( LWORK.GE.WRKBL+2*LDA*M ) THEN
+*
+*                       WORK(IU) is LDA by M and WORK(IR) is LDA by M
+*
+                        LDWRKU = LDA
+                        IR = IU + LDWRKU*M
+                        LDWRKR = LDA
+                     ELSE IF( LWORK.GE.WRKBL+( LDA+M )*M ) THEN
+*
+*                       WORK(IU) is LDA by M and WORK(IR) is M by M
+*
+                        LDWRKU = LDA
+                        IR = IU + LDWRKU*M
+                        LDWRKR = M
+                     ELSE
+*
+*                       WORK(IU) is M by M and WORK(IR) is M by M
+*
+                        LDWRKU = M
+                        IR = IU + LDWRKU*M
+                        LDWRKR = M
+                     END IF
+                     ITAU = IR + LDWRKR*M
+                     IWORK = ITAU + M
+*
+*                    Compute A=L*Q, copying result to VT
+*                    (CWorkspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL CGELQF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     CALL CLACPY( 'U', M, N, A, LDA, VT, LDVT )
+*
+*                    Generate Q in VT
+*                    (CWorkspace: need 2*M*M+M+N, prefer 2*M*M+M+N*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL CUNGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Copy L to WORK(IU), zeroing out above it
+*
+                     CALL CLACPY( 'L', M, M, A, LDA, WORK( IU ),
+     $                            LDWRKU )
+                     CALL CLASET( 'U', M-1, M-1, CZERO, CZERO,
+     $                            WORK( IU+LDWRKU ), LDWRKU )
+                     IE = 1
+                     ITAUQ = ITAU
+                     ITAUP = ITAUQ + M
+                     IWORK = ITAUP + M
+*
+*                    Bidiagonalize L in WORK(IU), copying result to
+*                    WORK(IR)
+*                    (CWorkspace: need   2*M*M+3*M,
+*                                 prefer 2*M*M+2*M+2*M*NB)
+*                    (RWorkspace: need   M)
+*
+                     CALL CGEBRD( M, M, WORK( IU ), LDWRKU, S,
+     $                            RWORK( IE ), WORK( ITAUQ ),
+     $                            WORK( ITAUP ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+                     CALL CLACPY( 'L', M, M, WORK( IU ), LDWRKU,
+     $                            WORK( IR ), LDWRKR )
+*
+*                    Generate right bidiagonalizing vectors in WORK(IU)
+*                    (CWorkspace: need   2*M*M+3*M-1,
+*                                 prefer 2*M*M+2*M+(M-1)*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL CUNGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
+     $                            WORK( ITAUP ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+*
+*                    Generate left bidiagonalizing vectors in WORK(IR)
+*                    (CWorkspace: need 2*M*M+3*M, prefer 2*M*M+2*M+M*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL CUNGBR( 'Q', M, M, M, WORK( IR ), LDWRKR,
+     $                            WORK( ITAUQ ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+                     IRWORK = IE + M
+*
+*                    Perform bidiagonal QR iteration, computing left
+*                    singular vectors of L in WORK(IR) and computing
+*                    right singular vectors of L in WORK(IU)
+*                    (CWorkspace: need 2*M*M)
+*                    (RWorkspace: need BDSPAC)
+*
+                     CALL CBDSQR( 'U', M, M, M, 0, S, RWORK( IE ),
+     $                            WORK( IU ), LDWRKU, WORK( IR ),
+     $                            LDWRKR, CDUM, 1, RWORK( IRWORK ),
+     $                            INFO )
+*
+*                    Multiply right singular vectors of L in WORK(IU) by
+*                    Q in VT, storing result in A
+*                    (CWorkspace: need M*M)
+*                    (RWorkspace: 0)
+*
+                     CALL CGEMM( 'N', 'N', M, N, M, CONE, WORK( IU ),
+     $                           LDWRKU, VT, LDVT, CZERO, A, LDA )
+*
+*                    Copy right singular vectors of A from A to VT
+*
+                     CALL CLACPY( 'F', M, N, A, LDA, VT, LDVT )
+*
+*                    Copy left singular vectors of A from WORK(IR) to A
+*
+                     CALL CLACPY( 'F', M, M, WORK( IR ), LDWRKR, A,
+     $                            LDA )
+*
+                  ELSE
+*
+*                    Insufficient workspace for a fast algorithm
+*
+                     ITAU = 1
+                     IWORK = ITAU + M
+*
+*                    Compute A=L*Q, copying result to VT
+*                    (CWorkspace: need 2*M, prefer M+M*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL CGELQF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     CALL CLACPY( 'U', M, N, A, LDA, VT, LDVT )
+*
+*                    Generate Q in VT
+*                    (CWorkspace: need M+N, prefer M+N*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL CUNGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IE = 1
+                     ITAUQ = ITAU
+                     ITAUP = ITAUQ + M
+                     IWORK = ITAUP + M
+*
+*                    Zero out above L in A
+*
+                     CALL CLASET( 'U', M-1, M-1, CZERO, CZERO,
+     $                            A( 1, 2 ), LDA )
+*
+*                    Bidiagonalize L in A
+*                    (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
+*                    (RWorkspace: need M)
+*
+                     CALL CGEBRD( M, M, A, LDA, S, RWORK( IE ),
+     $                            WORK( ITAUQ ), WORK( ITAUP ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Multiply right bidiagonalizing vectors in A by Q
+*                    in VT
+*                    (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL CUNMBR( 'P', 'L', 'C', M, N, M, A, LDA,
+     $                            WORK( ITAUP ), VT, LDVT,
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Generate left bidiagonalizing vectors in A
+*                    (CWorkspace: need 3*M, prefer 2*M+M*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL CUNGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IRWORK = IE + M
+*
+*                    Perform bidiagonal QR iteration, computing left
+*                    singular vectors of A in A and computing right
+*                    singular vectors of A in VT
+*                    (CWorkspace: 0)
+*                    (RWorkspace: need BDSPAC)
+*
+                     CALL CBDSQR( 'U', M, N, M, 0, S, RWORK( IE ), VT,
+     $                            LDVT, A, LDA, CDUM, 1,
+     $                            RWORK( IRWORK ), INFO )
+*
+                  END IF
+*
+               ELSE IF( WNTUAS ) THEN
+*
+*                 Path 9t(N much larger than M, JOBU='S' or 'A',
+*                         JOBVT='A')
+*                 N right singular vectors to be computed in VT and
+*                 M left singular vectors to be computed in U
+*
+                  IF( LWORK.GE.M*M+MAX( N+M, 3*M ) ) THEN
+*
+*                    Sufficient workspace for a fast algorithm
+*
+                     IU = 1
+                     IF( LWORK.GE.WRKBL+LDA*M ) THEN
+*
+*                       WORK(IU) is LDA by M
+*
+                        LDWRKU = LDA
+                     ELSE
+*
+*                       WORK(IU) is M by M
+*
+                        LDWRKU = M
+                     END IF
+                     ITAU = IU + LDWRKU*M
+                     IWORK = ITAU + M
+*
+*                    Compute A=L*Q, copying result to VT
+*                    (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL CGELQF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     CALL CLACPY( 'U', M, N, A, LDA, VT, LDVT )
+*
+*                    Generate Q in VT
+*                    (CWorkspace: need M*M+M+N, prefer M*M+M+N*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL CUNGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Copy L to WORK(IU), zeroing out above it
+*
+                     CALL CLACPY( 'L', M, M, A, LDA, WORK( IU ),
+     $                            LDWRKU )
+                     CALL CLASET( 'U', M-1, M-1, CZERO, CZERO,
+     $                            WORK( IU+LDWRKU ), LDWRKU )
+                     IE = 1
+                     ITAUQ = ITAU
+                     ITAUP = ITAUQ + M
+                     IWORK = ITAUP + M
+*
+*                    Bidiagonalize L in WORK(IU), copying result to U
+*                    (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
+*                    (RWorkspace: need M)
+*
+                     CALL CGEBRD( M, M, WORK( IU ), LDWRKU, S,
+     $                            RWORK( IE ), WORK( ITAUQ ),
+     $                            WORK( ITAUP ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+                     CALL CLACPY( 'L', M, M, WORK( IU ), LDWRKU, U,
+     $                            LDU )
+*
+*                    Generate right bidiagonalizing vectors in WORK(IU)
+*                    (CWorkspace: need M*M+3*M, prefer M*M+2*M+(M-1)*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL CUNGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
+     $                            WORK( ITAUP ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+*
+*                    Generate left bidiagonalizing vectors in U
+*                    (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL CUNGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IRWORK = IE + M
+*
+*                    Perform bidiagonal QR iteration, computing left
+*                    singular vectors of L in U and computing right
+*                    singular vectors of L in WORK(IU)
+*                    (CWorkspace: need M*M)
+*                    (RWorkspace: need BDSPAC)
+*
+                     CALL CBDSQR( 'U', M, M, M, 0, S, RWORK( IE ),
+     $                            WORK( IU ), LDWRKU, U, LDU, CDUM, 1,
+     $                            RWORK( IRWORK ), INFO )
+*
+*                    Multiply right singular vectors of L in WORK(IU) by
+*                    Q in VT, storing result in A
+*                    (CWorkspace: need M*M)
+*                    (RWorkspace: 0)
+*
+                     CALL CGEMM( 'N', 'N', M, N, M, CONE, WORK( IU ),
+     $                           LDWRKU, VT, LDVT, CZERO, A, LDA )
+*
+*                    Copy right singular vectors of A from A to VT
+*
+                     CALL CLACPY( 'F', M, N, A, LDA, VT, LDVT )
+*
+                  ELSE
+*
+*                    Insufficient workspace for a fast algorithm
+*
+                     ITAU = 1
+                     IWORK = ITAU + M
+*
+*                    Compute A=L*Q, copying result to VT
+*                    (CWorkspace: need 2*M, prefer M+M*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL CGELQF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     CALL CLACPY( 'U', M, N, A, LDA, VT, LDVT )
+*
+*                    Generate Q in VT
+*                    (CWorkspace: need M+N, prefer M+N*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL CUNGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Copy L to U, zeroing out above it
+*
+                     CALL CLACPY( 'L', M, M, A, LDA, U, LDU )
+                     CALL CLASET( 'U', M-1, M-1, CZERO, CZERO,
+     $                            U( 1, 2 ), LDU )
+                     IE = 1
+                     ITAUQ = ITAU
+                     ITAUP = ITAUQ + M
+                     IWORK = ITAUP + M
+*
+*                    Bidiagonalize L in U
+*                    (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
+*                    (RWorkspace: need M)
+*
+                     CALL CGEBRD( M, M, U, LDU, S, RWORK( IE ),
+     $                            WORK( ITAUQ ), WORK( ITAUP ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Multiply right bidiagonalizing vectors in U by Q
+*                    in VT
+*                    (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL CUNMBR( 'P', 'L', 'C', M, N, M, U, LDU,
+     $                            WORK( ITAUP ), VT, LDVT,
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Generate left bidiagonalizing vectors in U
+*                    (CWorkspace: need 3*M, prefer 2*M+M*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL CUNGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IRWORK = IE + M
+*
+*                    Perform bidiagonal QR iteration, computing left
+*                    singular vectors of A in U and computing right
+*                    singular vectors of A in VT
+*                    (CWorkspace: 0)
+*                    (RWorkspace: need BDSPAC)
+*
+                     CALL CBDSQR( 'U', M, N, M, 0, S, RWORK( IE ), VT,
+     $                            LDVT, U, LDU, CDUM, 1,
+     $                            RWORK( IRWORK ), INFO )
+*
+                  END IF
+*
+               END IF
+*
+            END IF
+*
+         ELSE
+*
+*           N .LT. MNTHR
+*
+*           Path 10t(N greater than M, but not much larger)
+*           Reduce to bidiagonal form without LQ decomposition
+*
+            IE = 1
+            ITAUQ = 1
+            ITAUP = ITAUQ + M
+            IWORK = ITAUP + M
+*
+*           Bidiagonalize A
+*           (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB)
+*           (RWorkspace: M)
+*
+            CALL CGEBRD( M, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ),
+     $                   WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
+     $                   IERR )
+            IF( WNTUAS ) THEN
+*
+*              If left singular vectors desired in U, copy result to U
+*              and generate left bidiagonalizing vectors in U
+*              (CWorkspace: need 3*M-1, prefer 2*M+(M-1)*NB)
+*              (RWorkspace: 0)
+*
+               CALL CLACPY( 'L', M, M, A, LDA, U, LDU )
+               CALL CUNGBR( 'Q', M, M, N, U, LDU, WORK( ITAUQ ),
+     $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
+            END IF
+            IF( WNTVAS ) THEN
+*
+*              If right singular vectors desired in VT, copy result to
+*              VT and generate right bidiagonalizing vectors in VT
+*              (CWorkspace: need 2*M+NRVT, prefer 2*M+NRVT*NB)
+*              (RWorkspace: 0)
+*
+               CALL CLACPY( 'U', M, N, A, LDA, VT, LDVT )
+               IF( WNTVA )
+     $            NRVT = N
+               IF( WNTVS )
+     $            NRVT = M
+               CALL CUNGBR( 'P', NRVT, N, M, VT, LDVT, WORK( ITAUP ),
+     $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
+            END IF
+            IF( WNTUO ) THEN
+*
+*              If left singular vectors desired in A, generate left
+*              bidiagonalizing vectors in A
+*              (CWorkspace: need 3*M-1, prefer 2*M+(M-1)*NB)
+*              (RWorkspace: 0)
+*
+               CALL CUNGBR( 'Q', M, M, N, A, LDA, WORK( ITAUQ ),
+     $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
+            END IF
+            IF( WNTVO ) THEN
+*
+*              If right singular vectors desired in A, generate right
+*              bidiagonalizing vectors in A
+*              (CWorkspace: need 3*M, prefer 2*M+M*NB)
+*              (RWorkspace: 0)
+*
+               CALL CUNGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ),
+     $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
+            END IF
+            IRWORK = IE + M
+            IF( WNTUAS .OR. WNTUO )
+     $         NRU = M
+            IF( WNTUN )
+     $         NRU = 0
+            IF( WNTVAS .OR. WNTVO )
+     $         NCVT = N
+            IF( WNTVN )
+     $         NCVT = 0
+            IF( ( .NOT.WNTUO ) .AND. ( .NOT.WNTVO ) ) THEN
+*
+*              Perform bidiagonal QR iteration, if desired, computing
+*              left singular vectors in U and computing right singular
+*              vectors in VT
+*              (CWorkspace: 0)
+*              (RWorkspace: need BDSPAC)
+*
+               CALL CBDSQR( 'L', M, NCVT, NRU, 0, S, RWORK( IE ), VT,
+     $                      LDVT, U, LDU, CDUM, 1, RWORK( IRWORK ),
+     $                      INFO )
+            ELSE IF( ( .NOT.WNTUO ) .AND. WNTVO ) THEN
+*
+*              Perform bidiagonal QR iteration, if desired, computing
+*              left singular vectors in U and computing right singular
+*              vectors in A
+*              (CWorkspace: 0)
+*              (RWorkspace: need BDSPAC)
+*
+               CALL CBDSQR( 'L', M, NCVT, NRU, 0, S, RWORK( IE ), A,
+     $                      LDA, U, LDU, CDUM, 1, RWORK( IRWORK ),
+     $                      INFO )
+            ELSE
+*
+*              Perform bidiagonal QR iteration, if desired, computing
+*              left singular vectors in A and computing right singular
+*              vectors in VT
+*              (CWorkspace: 0)
+*              (RWorkspace: need BDSPAC)
+*
+               CALL CBDSQR( 'L', M, NCVT, NRU, 0, S, RWORK( IE ), VT,
+     $                      LDVT, A, LDA, CDUM, 1, RWORK( IRWORK ),
+     $                      INFO )
+            END IF
+*
+         END IF
+*
+      END IF
+*
+*     Undo scaling if necessary
+*
+      IF( ISCL.EQ.1 ) THEN
+         IF( ANRM.GT.BIGNUM )
+     $      CALL SLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN,
+     $                   IERR )
+         IF( INFO.NE.0 .AND. ANRM.GT.BIGNUM )
+     $      CALL SLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN-1, 1,
+     $                   RWORK( IE ), MINMN, IERR )
+         IF( ANRM.LT.SMLNUM )
+     $      CALL SLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN,
+     $                   IERR )
+         IF( INFO.NE.0 .AND. ANRM.LT.SMLNUM )
+     $      CALL SLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN-1, 1,
+     $                   RWORK( IE ), MINMN, IERR )
+      END IF
+*
+*     Return optimal workspace in WORK(1)
+*
+      WORK( 1 ) = MAXWRK
+*
+      RETURN
+*
+*     End of CGESVD
+*
+      END
diff --git a/costa/native/external/lapack/cgesvx.f b/costa/native/external/lapack/cgesvx.f
new file mode 100644
index 000000000..04f20659e
--- /dev/null
+++ b/costa/native/external/lapack/cgesvx.f
@@ -0,0 +1,484 @@
+      SUBROUTINE CGESVX( FACT, TRANS, N, NRHS, A, LDA, AF, LDAF, IPIV,
+     $                   EQUED, R, C, B, LDB, X, LDX, RCOND, FERR, BERR,
+     $                   WORK, RWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          EQUED, FACT, TRANS
+      INTEGER            INFO, LDA, LDAF, LDB, LDX, N, NRHS
+      REAL               RCOND
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      REAL               BERR( * ), C( * ), FERR( * ), R( * ),
+     $                   RWORK( * )
+      COMPLEX            A( LDA, * ), AF( LDAF, * ), B( LDB, * ),
+     $                   WORK( * ), X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CGESVX uses the LU factorization to compute the solution to a complex
+*  system of linear equations
+*     A * X = B,
+*  where A is an N-by-N matrix and X and B are N-by-NRHS matrices.
+*
+*  Error bounds on the solution and a condition estimate are also
+*  provided.
+*
+*  Description
+*  ===========
+*
+*  The following steps are performed:
+*
+*  1. If FACT = 'E', real scaling factors are computed to equilibrate
+*     the system:
+*        TRANS = 'N':  diag(R)*A*diag(C)     *inv(diag(C))*X = diag(R)*B
+*        TRANS = 'T': (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B
+*        TRANS = 'C': (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B
+*     Whether or not the system will be equilibrated depends on the
+*     scaling of the matrix A, but if equilibration is used, A is
+*     overwritten by diag(R)*A*diag(C) and B by diag(R)*B (if TRANS='N')
+*     or diag(C)*B (if TRANS = 'T' or 'C').
+*
+*  2. If FACT = 'N' or 'E', the LU decomposition is used to factor the
+*     matrix A (after equilibration if FACT = 'E') as
+*        A = P * L * U,
+*     where P is a permutation matrix, L is a unit lower triangular
+*     matrix, and U is upper triangular.
+*
+*  3. If some U(i,i)=0, so that U is exactly singular, then the routine
+*     returns with INFO = i. Otherwise, the factored form of A is used
+*     to estimate the condition number of the matrix A.  If the
+*     reciprocal of the condition number is less than machine precision,
+*     INFO = N+1 is returned as a warning, but the routine still goes on
+*     to solve for X and compute error bounds as described below.
+*
+*  4. The system of equations is solved for X using the factored form
+*     of A.
+*
+*  5. Iterative refinement is applied to improve the computed solution
+*     matrix and calculate error bounds and backward error estimates
+*     for it.
+*
+*  6. If equilibration was used, the matrix X is premultiplied by
+*     diag(C) (if TRANS = 'N') or diag(R) (if TRANS = 'T' or 'C') so
+*     that it solves the original system before equilibration.
+*
+*  Arguments
+*  =========
+*
+*  FACT    (input) CHARACTER*1
+*          Specifies whether or not the factored form of the matrix A is
+*          supplied on entry, and if not, whether the matrix A should be
+*          equilibrated before it is factored.
+*          = 'F':  On entry, AF and IPIV contain the factored form of A.
+*                  If EQUED is not 'N', the matrix A has been
+*                  equilibrated with scaling factors given by R and C.
+*                  A, AF, and IPIV are not modified.
+*          = 'N':  The matrix A will be copied to AF and factored.
+*          = 'E':  The matrix A will be equilibrated if necessary, then
+*                  copied to AF and factored.
+*
+*  TRANS   (input) CHARACTER*1
+*          Specifies the form of the system of equations:
+*          = 'N':  A * X = B     (No transpose)
+*          = 'T':  A**T * X = B  (Transpose)
+*          = 'C':  A**H * X = B  (Conjugate transpose)
+*
+*  N       (input) INTEGER
+*          The number of linear equations, i.e., the order of the
+*          matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrices B and X.  NRHS >= 0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,N)
+*          On entry, the N-by-N matrix A.  If FACT = 'F' and EQUED is
+*          not 'N', then A must have been equilibrated by the scaling
+*          factors in R and/or C.  A is not modified if FACT = 'F' or
+*          'N', or if FACT = 'E' and EQUED = 'N' on exit.
+*
+*          On exit, if EQUED .ne. 'N', A is scaled as follows:
+*          EQUED = 'R':  A := diag(R) * A
+*          EQUED = 'C':  A := A * diag(C)
+*          EQUED = 'B':  A := diag(R) * A * diag(C).
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  AF      (input or output) COMPLEX array, dimension (LDAF,N)
+*          If FACT = 'F', then AF is an input argument and on entry
+*          contains the factors L and U from the factorization
+*          A = P*L*U as computed by CGETRF.  If EQUED .ne. 'N', then
+*          AF is the factored form of the equilibrated matrix A.
+*
+*          If FACT = 'N', then AF is an output argument and on exit
+*          returns the factors L and U from the factorization A = P*L*U
+*          of the original matrix A.
+*
+*          If FACT = 'E', then AF is an output argument and on exit
+*          returns the factors L and U from the factorization A = P*L*U
+*          of the equilibrated matrix A (see the description of A for
+*          the form of the equilibrated matrix).
+*
+*  LDAF    (input) INTEGER
+*          The leading dimension of the array AF.  LDAF >= max(1,N).
+*
+*  IPIV    (input or output) INTEGER array, dimension (N)
+*          If FACT = 'F', then IPIV is an input argument and on entry
+*          contains the pivot indices from the factorization A = P*L*U
+*          as computed by CGETRF; row i of the matrix was interchanged
+*          with row IPIV(i).
+*
+*          If FACT = 'N', then IPIV is an output argument and on exit
+*          contains the pivot indices from the factorization A = P*L*U
+*          of the original matrix A.
+*
+*          If FACT = 'E', then IPIV is an output argument and on exit
+*          contains the pivot indices from the factorization A = P*L*U
+*          of the equilibrated matrix A.
+*
+*  EQUED   (input or output) CHARACTER*1
+*          Specifies the form of equilibration that was done.
+*          = 'N':  No equilibration (always true if FACT = 'N').
+*          = 'R':  Row equilibration, i.e., A has been premultiplied by
+*                  diag(R).
+*          = 'C':  Column equilibration, i.e., A has been postmultiplied
+*                  by diag(C).
+*          = 'B':  Both row and column equilibration, i.e., A has been
+*                  replaced by diag(R) * A * diag(C).
+*          EQUED is an input argument if FACT = 'F'; otherwise, it is an
+*          output argument.
+*
+*  R       (input or output) REAL array, dimension (N)
+*          The row scale factors for A.  If EQUED = 'R' or 'B', A is
+*          multiplied on the left by diag(R); if EQUED = 'N' or 'C', R
+*          is not accessed.  R is an input argument if FACT = 'F';
+*          otherwise, R is an output argument.  If FACT = 'F' and
+*          EQUED = 'R' or 'B', each element of R must be positive.
+*
+*  C       (input or output) REAL array, dimension (N)
+*          The column scale factors for A.  If EQUED = 'C' or 'B', A is
+*          multiplied on the right by diag(C); if EQUED = 'N' or 'R', C
+*          is not accessed.  C is an input argument if FACT = 'F';
+*          otherwise, C is an output argument.  If FACT = 'F' and
+*          EQUED = 'C' or 'B', each element of C must be positive.
+*
+*  B       (input/output) COMPLEX array, dimension (LDB,NRHS)
+*          On entry, the N-by-NRHS right hand side matrix B.
+*          On exit,
+*          if EQUED = 'N', B is not modified;
+*          if TRANS = 'N' and EQUED = 'R' or 'B', B is overwritten by
+*          diag(R)*B;
+*          if TRANS = 'T' or 'C' and EQUED = 'C' or 'B', B is
+*          overwritten by diag(C)*B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  X       (output) COMPLEX array, dimension (LDX,NRHS)
+*          If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X
+*          to the original system of equations.  Note that A and B are
+*          modified on exit if EQUED .ne. 'N', and the solution to the
+*          equilibrated system is inv(diag(C))*X if TRANS = 'N' and
+*          EQUED = 'C' or 'B', or inv(diag(R))*X if TRANS = 'T' or 'C'
+*          and EQUED = 'R' or 'B'.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(1,N).
+*
+*  RCOND   (output) REAL
+*          The estimate of the reciprocal condition number of the matrix
+*          A after equilibration (if done).  If RCOND is less than the
+*          machine precision (in particular, if RCOND = 0), the matrix
+*          is singular to working precision.  This condition is
+*          indicated by a return code of INFO > 0.
+*
+*  FERR    (output) REAL array, dimension (NRHS)
+*          The estimated forward error bound for each solution vector
+*          X(j) (the j-th column of the solution matrix X).
+*          If XTRUE is the true solution corresponding to X(j), FERR(j)
+*          is an estimated upper bound for the magnitude of the largest
+*          element in (X(j) - XTRUE) divided by the magnitude of the
+*          largest element in X(j).  The estimate is as reliable as
+*          the estimate for RCOND, and is almost always a slight
+*          overestimate of the true error.
+*
+*  BERR    (output) REAL array, dimension (NRHS)
+*          The componentwise relative backward error of each solution
+*          vector X(j) (i.e., the smallest relative change in
+*          any element of A or B that makes X(j) an exact solution).
+*
+*  WORK    (workspace) COMPLEX array, dimension (2*N)
+*
+*  RWORK   (workspace/output) REAL array, dimension (2*N)
+*          On exit, RWORK(1) contains the reciprocal pivot growth
+*          factor norm(A)/norm(U). The "max absolute element" norm is
+*          used. If RWORK(1) is much less than 1, then the stability
+*          of the LU factorization of the (equilibrated) matrix A
+*          could be poor. This also means that the solution X, condition
+*          estimator RCOND, and forward error bound FERR could be
+*          unreliable. If factorization fails with 0<INFO<=N, then
+*          RWORK(1) contains the reciprocal pivot growth factor for the
+*          leading INFO columns of A.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, and i is
+*                <= N:  U(i,i) is exactly zero.  The factorization has
+*                       been completed, but the factor U is exactly
+*                       singular, so the solution and error bounds
+*                       could not be computed. RCOND = 0 is returned.
+*                = N+1: U is nonsingular, but RCOND is less than machine
+*                       precision, meaning that the matrix is singular
+*                       to working precision.  Nevertheless, the
+*                       solution and error bounds are computed because
+*                       there are a number of situations where the
+*                       computed solution can be more accurate than the
+*                       value of RCOND would suggest.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            COLEQU, EQUIL, NOFACT, NOTRAN, ROWEQU
+      CHARACTER          NORM
+      INTEGER            I, INFEQU, J
+      REAL               AMAX, ANORM, BIGNUM, COLCND, RCMAX, RCMIN,
+     $                   ROWCND, RPVGRW, SMLNUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               CLANGE, CLANTR, SLAMCH
+      EXTERNAL           LSAME, CLANGE, CLANTR, SLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CGECON, CGEEQU, CGERFS, CGETRF, CGETRS, CLACPY,
+     $                   CLAQGE, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+      NOFACT = LSAME( FACT, 'N' )
+      EQUIL = LSAME( FACT, 'E' )
+      NOTRAN = LSAME( TRANS, 'N' )
+      IF( NOFACT .OR. EQUIL ) THEN
+         EQUED = 'N'
+         ROWEQU = .FALSE.
+         COLEQU = .FALSE.
+      ELSE
+         ROWEQU = LSAME( EQUED, 'R' ) .OR. LSAME( EQUED, 'B' )
+         COLEQU = LSAME( EQUED, 'C' ) .OR. LSAME( EQUED, 'B' )
+         SMLNUM = SLAMCH( 'Safe minimum' )
+         BIGNUM = ONE / SMLNUM
+      END IF
+*
+*     Test the input parameters.
+*
+      IF( .NOT.NOFACT .AND. .NOT.EQUIL .AND. .NOT.LSAME( FACT, 'F' ) )
+     $     THEN
+         INFO = -1
+      ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT.
+     $         LSAME( TRANS, 'C' ) ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -6
+      ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN
+         INFO = -8
+      ELSE IF( LSAME( FACT, 'F' ) .AND. .NOT.
+     $         ( ROWEQU .OR. COLEQU .OR. LSAME( EQUED, 'N' ) ) ) THEN
+         INFO = -10
+      ELSE
+         IF( ROWEQU ) THEN
+            RCMIN = BIGNUM
+            RCMAX = ZERO
+            DO 10 J = 1, N
+               RCMIN = MIN( RCMIN, R( J ) )
+               RCMAX = MAX( RCMAX, R( J ) )
+   10       CONTINUE
+            IF( RCMIN.LE.ZERO ) THEN
+               INFO = -11
+            ELSE IF( N.GT.0 ) THEN
+               ROWCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM )
+            ELSE
+               ROWCND = ONE
+            END IF
+         END IF
+         IF( COLEQU .AND. INFO.EQ.0 ) THEN
+            RCMIN = BIGNUM
+            RCMAX = ZERO
+            DO 20 J = 1, N
+               RCMIN = MIN( RCMIN, C( J ) )
+               RCMAX = MAX( RCMAX, C( J ) )
+   20       CONTINUE
+            IF( RCMIN.LE.ZERO ) THEN
+               INFO = -12
+            ELSE IF( N.GT.0 ) THEN
+               COLCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM )
+            ELSE
+               COLCND = ONE
+            END IF
+         END IF
+         IF( INFO.EQ.0 ) THEN
+            IF( LDB.LT.MAX( 1, N ) ) THEN
+               INFO = -14
+            ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+               INFO = -16
+            END IF
+         END IF
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CGESVX', -INFO )
+         RETURN
+      END IF
+*
+      IF( EQUIL ) THEN
+*
+*        Compute row and column scalings to equilibrate the matrix A.
+*
+         CALL CGEEQU( N, N, A, LDA, R, C, ROWCND, COLCND, AMAX, INFEQU )
+         IF( INFEQU.EQ.0 ) THEN
+*
+*           Equilibrate the matrix.
+*
+            CALL CLAQGE( N, N, A, LDA, R, C, ROWCND, COLCND, AMAX,
+     $                   EQUED )
+            ROWEQU = LSAME( EQUED, 'R' ) .OR. LSAME( EQUED, 'B' )
+            COLEQU = LSAME( EQUED, 'C' ) .OR. LSAME( EQUED, 'B' )
+         END IF
+      END IF
+*
+*     Scale the right hand side.
+*
+      IF( NOTRAN ) THEN
+         IF( ROWEQU ) THEN
+            DO 40 J = 1, NRHS
+               DO 30 I = 1, N
+                  B( I, J ) = R( I )*B( I, J )
+   30          CONTINUE
+   40       CONTINUE
+         END IF
+      ELSE IF( COLEQU ) THEN
+         DO 60 J = 1, NRHS
+            DO 50 I = 1, N
+               B( I, J ) = C( I )*B( I, J )
+   50       CONTINUE
+   60    CONTINUE
+      END IF
+*
+      IF( NOFACT .OR. EQUIL ) THEN
+*
+*        Compute the LU factorization of A.
+*
+         CALL CLACPY( 'Full', N, N, A, LDA, AF, LDAF )
+         CALL CGETRF( N, N, AF, LDAF, IPIV, INFO )
+*
+*        Return if INFO is non-zero.
+*
+         IF( INFO.NE.0 ) THEN
+            IF( INFO.GT.0 ) THEN
+*
+*              Compute the reciprocal pivot growth factor of the
+*              leading rank-deficient INFO columns of A.
+*
+               RPVGRW = CLANTR( 'M', 'U', 'N', INFO, INFO, AF, LDAF,
+     $                  RWORK )
+               IF( RPVGRW.EQ.ZERO ) THEN
+                  RPVGRW = ONE
+               ELSE
+                  RPVGRW = CLANGE( 'M', N, INFO, A, LDA, RWORK ) /
+     $                     RPVGRW
+               END IF
+               RWORK( 1 ) = RPVGRW
+               RCOND = ZERO
+            END IF
+            RETURN
+         END IF
+      END IF
+*
+*     Compute the norm of the matrix A and the
+*     reciprocal pivot growth factor RPVGRW.
+*
+      IF( NOTRAN ) THEN
+         NORM = '1'
+      ELSE
+         NORM = 'I'
+      END IF
+      ANORM = CLANGE( NORM, N, N, A, LDA, RWORK )
+      RPVGRW = CLANTR( 'M', 'U', 'N', N, N, AF, LDAF, RWORK )
+      IF( RPVGRW.EQ.ZERO ) THEN
+         RPVGRW = ONE
+      ELSE
+         RPVGRW = CLANGE( 'M', N, N, A, LDA, RWORK ) / RPVGRW
+      END IF
+*
+*     Compute the reciprocal of the condition number of A.
+*
+      CALL CGECON( NORM, N, AF, LDAF, ANORM, RCOND, WORK, RWORK, INFO )
+*
+*     Set INFO = N+1 if the matrix is singular to working precision.
+*
+      IF( RCOND.LT.SLAMCH( 'Epsilon' ) )
+     $   INFO = N + 1
+*
+*     Compute the solution matrix X.
+*
+      CALL CLACPY( 'Full', N, NRHS, B, LDB, X, LDX )
+      CALL CGETRS( TRANS, N, NRHS, AF, LDAF, IPIV, X, LDX, INFO )
+*
+*     Use iterative refinement to improve the computed solution and
+*     compute error bounds and backward error estimates for it.
+*
+      CALL CGERFS( TRANS, N, NRHS, A, LDA, AF, LDAF, IPIV, B, LDB, X,
+     $             LDX, FERR, BERR, WORK, RWORK, INFO )
+*
+*     Transform the solution matrix X to a solution of the original
+*     system.
+*
+      IF( NOTRAN ) THEN
+         IF( COLEQU ) THEN
+            DO 80 J = 1, NRHS
+               DO 70 I = 1, N
+                  X( I, J ) = C( I )*X( I, J )
+   70          CONTINUE
+   80       CONTINUE
+            DO 90 J = 1, NRHS
+               FERR( J ) = FERR( J ) / COLCND
+   90       CONTINUE
+         END IF
+      ELSE IF( ROWEQU ) THEN
+         DO 110 J = 1, NRHS
+            DO 100 I = 1, N
+               X( I, J ) = R( I )*X( I, J )
+  100       CONTINUE
+  110    CONTINUE
+         DO 120 J = 1, NRHS
+            FERR( J ) = FERR( J ) / ROWCND
+  120    CONTINUE
+      END IF
+*
+      RWORK( 1 ) = RPVGRW
+      RETURN
+*
+*     End of CGESVX
+*
+      END
diff --git a/costa/native/external/lapack/cgetc2.f b/costa/native/external/lapack/cgetc2.f
new file mode 100644
index 000000000..1ab4b9d05
--- /dev/null
+++ b/costa/native/external/lapack/cgetc2.f
@@ -0,0 +1,146 @@
+      SUBROUTINE CGETC2( N, A, LDA, IPIV, JPIV, INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * ), JPIV( * )
+      COMPLEX            A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CGETC2 computes an LU factorization, using complete pivoting, of the
+*  n-by-n matrix A. The factorization has the form A = P * L * U * Q,
+*  where P and Q are permutation matrices, L is lower triangular with
+*  unit diagonal elements and U is upper triangular.
+*
+*  This is a level 1 BLAS version of the algorithm.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The order of the matrix A. N >= 0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA, N)
+*          On entry, the n-by-n matrix to be factored.
+*          On exit, the factors L and U from the factorization
+*          A = P*L*U*Q; the unit diagonal elements of L are not stored.
+*          If U(k, k) appears to be less than SMIN, U(k, k) is given the
+*          value of SMIN, giving a nonsingular perturbed system.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1, N).
+*
+*  IPIV    (output) INTEGER array, dimension (N).
+*          The pivot indices; for 1 <= i <= N, row i of the
+*          matrix has been interchanged with row IPIV(i).
+*
+*  JPIV    (output) INTEGER array, dimension (N).
+*          The pivot indices; for 1 <= j <= N, column j of the
+*          matrix has been interchanged with column JPIV(j).
+*
+*  INFO    (output) INTEGER
+*           = 0: successful exit
+*           > 0: if INFO = k, U(k, k) is likely to produce overflow if
+*                one tries to solve for x in Ax = b. So U is perturbed
+*                to avoid the overflow.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Bo Kagstrom and Peter Poromaa, Department of Computing Science,
+*     Umea University, S-901 87 Umea, Sweden.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, IP, IPV, J, JP, JPV
+      REAL               BIGNUM, EPS, SMIN, SMLNUM, XMAX
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CGERU, CSWAP, SLABAD
+*     ..
+*     .. External Functions ..
+      REAL               SLAMCH
+      EXTERNAL           SLAMCH
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, CMPLX, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Set constants to control overflow
+*
+      INFO = 0
+      EPS = SLAMCH( 'P' )
+      SMLNUM = SLAMCH( 'S' ) / EPS
+      BIGNUM = ONE / SMLNUM
+      CALL SLABAD( SMLNUM, BIGNUM )
+*
+*     Factorize A using complete pivoting.
+*     Set pivots less than SMIN to SMIN
+*
+      DO 40 I = 1, N - 1
+*
+*        Find max element in matrix A
+*
+         XMAX = ZERO
+         DO 20 IP = I, N
+            DO 10 JP = I, N
+               IF( ABS( A( IP, JP ) ).GE.XMAX ) THEN
+                  XMAX = ABS( A( IP, JP ) )
+                  IPV = IP
+                  JPV = JP
+               END IF
+   10       CONTINUE
+   20    CONTINUE
+         IF( I.EQ.1 )
+     $      SMIN = MAX( EPS*XMAX, SMLNUM )
+*
+*        Swap rows
+*
+         IF( IPV.NE.I )
+     $      CALL CSWAP( N, A( IPV, 1 ), LDA, A( I, 1 ), LDA )
+         IPIV( I ) = IPV
+*
+*        Swap columns
+*
+         IF( JPV.NE.I )
+     $      CALL CSWAP( N, A( 1, JPV ), 1, A( 1, I ), 1 )
+         JPIV( I ) = JPV
+*
+*        Check for singularity
+*
+         IF( ABS( A( I, I ) ).LT.SMIN ) THEN
+            INFO = I
+            A( I, I ) = CMPLX( SMIN, ZERO )
+         END IF
+         DO 30 J = I + 1, N
+            A( J, I ) = A( J, I ) / A( I, I )
+   30    CONTINUE
+         CALL CGERU( N-I, N-I, -CMPLX( ONE ), A( I+1, I ), 1,
+     $               A( I, I+1 ), LDA, A( I+1, I+1 ), LDA )
+   40 CONTINUE
+*
+      IF( ABS( A( N, N ) ).LT.SMIN ) THEN
+         INFO = N
+         A( N, N ) = CMPLX( SMIN, ZERO )
+      END IF
+      RETURN
+*
+*     End of CGETC2
+*
+      END
diff --git a/costa/native/external/lapack/cgetf2.f b/costa/native/external/lapack/cgetf2.f
new file mode 100644
index 000000000..9abcaba47
--- /dev/null
+++ b/costa/native/external/lapack/cgetf2.f
@@ -0,0 +1,136 @@
+      SUBROUTINE CGETF2( M, N, A, LDA, IPIV, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, M, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      COMPLEX            A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CGETF2 computes an LU factorization of a general m-by-n matrix A
+*  using partial pivoting with row interchanges.
+*
+*  The factorization has the form
+*     A = P * L * U
+*  where P is a permutation matrix, L is lower triangular with unit
+*  diagonal elements (lower trapezoidal if m > n), and U is upper
+*  triangular (upper trapezoidal if m < n).
+*
+*  This is the right-looking Level 2 BLAS version of the algorithm.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,N)
+*          On entry, the m by n matrix to be factored.
+*          On exit, the factors L and U from the factorization
+*          A = P*L*U; the unit diagonal elements of L are not stored.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  IPIV    (output) INTEGER array, dimension (min(M,N))
+*          The pivot indices; for 1 <= i <= min(M,N), row i of the
+*          matrix was interchanged with row IPIV(i).
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -k, the k-th argument had an illegal value
+*          > 0: if INFO = k, U(k,k) is exactly zero. The factorization
+*               has been completed, but the factor U is exactly
+*               singular, and division by zero will occur if it is used
+*               to solve a system of equations.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX            ONE, ZERO
+      PARAMETER          ( ONE = ( 1.0E+0, 0.0E+0 ),
+     $                   ZERO = ( 0.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            J, JP
+*     ..
+*     .. External Functions ..
+      INTEGER            ICAMAX
+      EXTERNAL           ICAMAX
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CGERU, CSCAL, CSWAP, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CGETF2', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 )
+     $   RETURN
+*
+      DO 10 J = 1, MIN( M, N )
+*
+*        Find pivot and test for singularity.
+*
+         JP = J - 1 + ICAMAX( M-J+1, A( J, J ), 1 )
+         IPIV( J ) = JP
+         IF( A( JP, J ).NE.ZERO ) THEN
+*
+*           Apply the interchange to columns 1:N.
+*
+            IF( JP.NE.J )
+     $         CALL CSWAP( N, A( J, 1 ), LDA, A( JP, 1 ), LDA )
+*
+*           Compute elements J+1:M of J-th column.
+*
+            IF( J.LT.M )
+     $         CALL CSCAL( M-J, ONE / A( J, J ), A( J+1, J ), 1 )
+*
+         ELSE IF( INFO.EQ.0 ) THEN
+*
+            INFO = J
+         END IF
+*
+         IF( J.LT.MIN( M, N ) ) THEN
+*
+*           Update trailing submatrix.
+*
+            CALL CGERU( M-J, N-J, -ONE, A( J+1, J ), 1, A( J, J+1 ),
+     $                  LDA, A( J+1, J+1 ), LDA )
+         END IF
+   10 CONTINUE
+      RETURN
+*
+*     End of CGETF2
+*
+      END
diff --git a/costa/native/external/lapack/cgetrf.f b/costa/native/external/lapack/cgetrf.f
new file mode 100644
index 000000000..4d288f8c3
--- /dev/null
+++ b/costa/native/external/lapack/cgetrf.f
@@ -0,0 +1,160 @@
+      SUBROUTINE CGETRF( M, N, A, LDA, IPIV, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, M, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      COMPLEX            A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CGETRF computes an LU factorization of a general M-by-N matrix A
+*  using partial pivoting with row interchanges.
+*
+*  The factorization has the form
+*     A = P * L * U
+*  where P is a permutation matrix, L is lower triangular with unit
+*  diagonal elements (lower trapezoidal if m > n), and U is upper
+*  triangular (upper trapezoidal if m < n).
+*
+*  This is the right-looking Level 3 BLAS version of the algorithm.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,N)
+*          On entry, the M-by-N matrix to be factored.
+*          On exit, the factors L and U from the factorization
+*          A = P*L*U; the unit diagonal elements of L are not stored.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  IPIV    (output) INTEGER array, dimension (min(M,N))
+*          The pivot indices; for 1 <= i <= min(M,N), row i of the
+*          matrix was interchanged with row IPIV(i).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, U(i,i) is exactly zero. The factorization
+*                has been completed, but the factor U is exactly
+*                singular, and division by zero will occur if it is used
+*                to solve a system of equations.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX            ONE
+      PARAMETER          ( ONE = ( 1.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, IINFO, J, JB, NB
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CGEMM, CGETF2, CLASWP, CTRSM, XERBLA
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CGETRF', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 )
+     $   RETURN
+*
+*     Determine the block size for this environment.
+*
+      NB = ILAENV( 1, 'CGETRF', ' ', M, N, -1, -1 )
+      IF( NB.LE.1 .OR. NB.GE.MIN( M, N ) ) THEN
+*
+*        Use unblocked code.
+*
+         CALL CGETF2( M, N, A, LDA, IPIV, INFO )
+      ELSE
+*
+*        Use blocked code.
+*
+         DO 20 J = 1, MIN( M, N ), NB
+            JB = MIN( MIN( M, N )-J+1, NB )
+*
+*           Factor diagonal and subdiagonal blocks and test for exact
+*           singularity.
+*
+            CALL CGETF2( M-J+1, JB, A( J, J ), LDA, IPIV( J ), IINFO )
+*
+*           Adjust INFO and the pivot indices.
+*
+            IF( INFO.EQ.0 .AND. IINFO.GT.0 )
+     $         INFO = IINFO + J - 1
+            DO 10 I = J, MIN( M, J+JB-1 )
+               IPIV( I ) = J - 1 + IPIV( I )
+   10       CONTINUE
+*
+*           Apply interchanges to columns 1:J-1.
+*
+            CALL CLASWP( J-1, A, LDA, J, J+JB-1, IPIV, 1 )
+*
+            IF( J+JB.LE.N ) THEN
+*
+*              Apply interchanges to columns J+JB:N.
+*
+               CALL CLASWP( N-J-JB+1, A( 1, J+JB ), LDA, J, J+JB-1,
+     $                      IPIV, 1 )
+*
+*              Compute block row of U.
+*
+               CALL CTRSM( 'Left', 'Lower', 'No transpose', 'Unit', JB,
+     $                     N-J-JB+1, ONE, A( J, J ), LDA, A( J, J+JB ),
+     $                     LDA )
+               IF( J+JB.LE.M ) THEN
+*
+*                 Update trailing submatrix.
+*
+                  CALL CGEMM( 'No transpose', 'No transpose', M-J-JB+1,
+     $                        N-J-JB+1, JB, -ONE, A( J+JB, J ), LDA,
+     $                        A( J, J+JB ), LDA, ONE, A( J+JB, J+JB ),
+     $                        LDA )
+               END IF
+            END IF
+   20    CONTINUE
+      END IF
+      RETURN
+*
+*     End of CGETRF
+*
+      END
diff --git a/costa/native/external/lapack/cgetri.f b/costa/native/external/lapack/cgetri.f
new file mode 100644
index 000000000..6840f531c
--- /dev/null
+++ b/costa/native/external/lapack/cgetri.f
@@ -0,0 +1,194 @@
+      SUBROUTINE CGETRI( N, A, LDA, IPIV, WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, LWORK, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      COMPLEX            A( LDA, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CGETRI computes the inverse of a matrix using the LU factorization
+*  computed by CGETRF.
+*
+*  This method inverts U and then computes inv(A) by solving the system
+*  inv(A)*L = inv(U) for inv(A).
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,N)
+*          On entry, the factors L and U from the factorization
+*          A = P*L*U as computed by CGETRF.
+*          On exit, if INFO = 0, the inverse of the original matrix A.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  IPIV    (input) INTEGER array, dimension (N)
+*          The pivot indices from CGETRF; for 1<=i<=N, row i of the
+*          matrix was interchanged with row IPIV(i).
+*
+*  WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+*          On exit, if INFO=0, then WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.  LWORK >= max(1,N).
+*          For optimal performance LWORK >= N*NB, where NB is
+*          the optimal blocksize returned by ILAENV.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, U(i,i) is exactly zero; the matrix is
+*                singular and its inverse could not be computed.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX            ZERO, ONE
+      PARAMETER          ( ZERO = ( 0.0E+0, 0.0E+0 ),
+     $                   ONE = ( 1.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            I, IWS, J, JB, JJ, JP, LDWORK, LWKOPT, NB,
+     $                   NBMIN, NN
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CGEMM, CGEMV, CSWAP, CTRSM, CTRTRI, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      NB = ILAENV( 1, 'CGETRI', ' ', N, -1, -1, -1 )
+      LWKOPT = N*NB
+      WORK( 1 ) = LWKOPT
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( N.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -3
+      ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN
+         INFO = -6
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CGETRI', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Form inv(U).  If INFO > 0 from CTRTRI, then U is singular,
+*     and the inverse is not computed.
+*
+      CALL CTRTRI( 'Upper', 'Non-unit', N, A, LDA, INFO )
+      IF( INFO.GT.0 )
+     $   RETURN
+*
+      NBMIN = 2
+      LDWORK = N
+      IF( NB.GT.1 .AND. NB.LT.N ) THEN
+         IWS = MAX( LDWORK*NB, 1 )
+         IF( LWORK.LT.IWS ) THEN
+            NB = LWORK / LDWORK
+            NBMIN = MAX( 2, ILAENV( 2, 'CGETRI', ' ', N, -1, -1, -1 ) )
+         END IF
+      ELSE
+         IWS = N
+      END IF
+*
+*     Solve the equation inv(A)*L = inv(U) for inv(A).
+*
+      IF( NB.LT.NBMIN .OR. NB.GE.N ) THEN
+*
+*        Use unblocked code.
+*
+         DO 20 J = N, 1, -1
+*
+*           Copy current column of L to WORK and replace with zeros.
+*
+            DO 10 I = J + 1, N
+               WORK( I ) = A( I, J )
+               A( I, J ) = ZERO
+   10       CONTINUE
+*
+*           Compute current column of inv(A).
+*
+            IF( J.LT.N )
+     $         CALL CGEMV( 'No transpose', N, N-J, -ONE, A( 1, J+1 ),
+     $                     LDA, WORK( J+1 ), 1, ONE, A( 1, J ), 1 )
+   20    CONTINUE
+      ELSE
+*
+*        Use blocked code.
+*
+         NN = ( ( N-1 ) / NB )*NB + 1
+         DO 50 J = NN, 1, -NB
+            JB = MIN( NB, N-J+1 )
+*
+*           Copy current block column of L to WORK and replace with
+*           zeros.
+*
+            DO 40 JJ = J, J + JB - 1
+               DO 30 I = JJ + 1, N
+                  WORK( I+( JJ-J )*LDWORK ) = A( I, JJ )
+                  A( I, JJ ) = ZERO
+   30          CONTINUE
+   40       CONTINUE
+*
+*           Compute current block column of inv(A).
+*
+            IF( J+JB.LE.N )
+     $         CALL CGEMM( 'No transpose', 'No transpose', N, JB,
+     $                     N-J-JB+1, -ONE, A( 1, J+JB ), LDA,
+     $                     WORK( J+JB ), LDWORK, ONE, A( 1, J ), LDA )
+            CALL CTRSM( 'Right', 'Lower', 'No transpose', 'Unit', N, JB,
+     $                  ONE, WORK( J ), LDWORK, A( 1, J ), LDA )
+   50    CONTINUE
+      END IF
+*
+*     Apply column interchanges.
+*
+      DO 60 J = N - 1, 1, -1
+         JP = IPIV( J )
+         IF( JP.NE.J )
+     $      CALL CSWAP( N, A( 1, J ), 1, A( 1, JP ), 1 )
+   60 CONTINUE
+*
+      WORK( 1 ) = IWS
+      RETURN
+*
+*     End of CGETRI
+*
+      END
diff --git a/costa/native/external/lapack/cgetrs.f b/costa/native/external/lapack/cgetrs.f
new file mode 100644
index 000000000..d8a78f845
--- /dev/null
+++ b/costa/native/external/lapack/cgetrs.f
@@ -0,0 +1,150 @@
+      SUBROUTINE CGETRS( TRANS, N, NRHS, A, LDA, IPIV, B, LDB, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          TRANS
+      INTEGER            INFO, LDA, LDB, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      COMPLEX            A( LDA, * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CGETRS solves a system of linear equations
+*     A * X = B,  A**T * X = B,  or  A**H * X = B
+*  with a general N-by-N matrix A using the LU factorization computed
+*  by CGETRF.
+*
+*  Arguments
+*  =========
+*
+*  TRANS   (input) CHARACTER*1
+*          Specifies the form of the system of equations:
+*          = 'N':  A * X = B     (No transpose)
+*          = 'T':  A**T * X = B  (Transpose)
+*          = 'C':  A**H * X = B  (Conjugate transpose)
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  A       (input) COMPLEX array, dimension (LDA,N)
+*          The factors L and U from the factorization A = P*L*U
+*          as computed by CGETRF.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  IPIV    (input) INTEGER array, dimension (N)
+*          The pivot indices from CGETRF; for 1<=i<=N, row i of the
+*          matrix was interchanged with row IPIV(i).
+*
+*  B       (input/output) COMPLEX array, dimension (LDB,NRHS)
+*          On entry, the right hand side matrix B.
+*          On exit, the solution matrix X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX            ONE
+      PARAMETER          ( ONE = ( 1.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOTRAN
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CLASWP, CTRSM, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      NOTRAN = LSAME( TRANS, 'N' )
+      IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT.
+     $    LSAME( TRANS, 'C' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -8
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CGETRS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 )
+     $   RETURN
+*
+      IF( NOTRAN ) THEN
+*
+*        Solve A * X = B.
+*
+*        Apply row interchanges to the right hand sides.
+*
+         CALL CLASWP( NRHS, B, LDB, 1, N, IPIV, 1 )
+*
+*        Solve L*X = B, overwriting B with X.
+*
+         CALL CTRSM( 'Left', 'Lower', 'No transpose', 'Unit', N, NRHS,
+     $               ONE, A, LDA, B, LDB )
+*
+*        Solve U*X = B, overwriting B with X.
+*
+         CALL CTRSM( 'Left', 'Upper', 'No transpose', 'Non-unit', N,
+     $               NRHS, ONE, A, LDA, B, LDB )
+      ELSE
+*
+*        Solve A**T * X = B  or A**H * X = B.
+*
+*        Solve U'*X = B, overwriting B with X.
+*
+         CALL CTRSM( 'Left', 'Upper', TRANS, 'Non-unit', N, NRHS, ONE,
+     $               A, LDA, B, LDB )
+*
+*        Solve L'*X = B, overwriting B with X.
+*
+         CALL CTRSM( 'Left', 'Lower', TRANS, 'Unit', N, NRHS, ONE, A,
+     $               LDA, B, LDB )
+*
+*        Apply row interchanges to the solution vectors.
+*
+         CALL CLASWP( NRHS, B, LDB, 1, N, IPIV, -1 )
+      END IF
+*
+      RETURN
+*
+*     End of CGETRS
+*
+      END
diff --git a/costa/native/external/lapack/cggbak.f b/costa/native/external/lapack/cggbak.f
new file mode 100644
index 000000000..0ae1d2994
--- /dev/null
+++ b/costa/native/external/lapack/cggbak.f
@@ -0,0 +1,216 @@
+      SUBROUTINE CGGBAK( JOB, SIDE, N, ILO, IHI, LSCALE, RSCALE, M, V,
+     $                   LDV, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOB, SIDE
+      INTEGER            IHI, ILO, INFO, LDV, M, N
+*     ..
+*     .. Array Arguments ..
+      REAL               LSCALE( * ), RSCALE( * )
+      COMPLEX            V( LDV, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CGGBAK forms the right or left eigenvectors of a complex generalized
+*  eigenvalue problem A*x = lambda*B*x, by backward transformation on
+*  the computed eigenvectors of the balanced pair of matrices output by
+*  CGGBAL.
+*
+*  Arguments
+*  =========
+*
+*  JOB     (input) CHARACTER*1
+*          Specifies the type of backward transformation required:
+*          = 'N':  do nothing, return immediately;
+*          = 'P':  do backward transformation for permutation only;
+*          = 'S':  do backward transformation for scaling only;
+*          = 'B':  do backward transformations for both permutation and
+*                  scaling.
+*          JOB must be the same as the argument JOB supplied to CGGBAL.
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'R':  V contains right eigenvectors;
+*          = 'L':  V contains left eigenvectors.
+*
+*  N       (input) INTEGER
+*          The number of rows of the matrix V.  N >= 0.
+*
+*  ILO     (input) INTEGER
+*  IHI     (input) INTEGER
+*          The integers ILO and IHI determined by CGGBAL.
+*          1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
+*
+*  LSCALE  (input) REAL array, dimension (N)
+*          Details of the permutations and/or scaling factors applied
+*          to the left side of A and B, as returned by CGGBAL.
+*
+*  RSCALE  (input) REAL array, dimension (N)
+*          Details of the permutations and/or scaling factors applied
+*          to the right side of A and B, as returned by CGGBAL.
+*
+*  M       (input) INTEGER
+*          The number of columns of the matrix V.  M >= 0.
+*
+*  V       (input/output) COMPLEX array, dimension (LDV,M)
+*          On entry, the matrix of right or left eigenvectors to be
+*          transformed, as returned by CTGEVC.
+*          On exit, V is overwritten by the transformed eigenvectors.
+*
+*  LDV     (input) INTEGER
+*          The leading dimension of the matrix V. LDV >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*
+*  Further Details
+*  ===============
+*
+*  See R.C. Ward, Balancing the generalized eigenvalue problem,
+*                 SIAM J. Sci. Stat. Comp. 2 (1981), 141-152.
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            LEFTV, RIGHTV
+      INTEGER            I, K
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CSSCAL, CSWAP, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters
+*
+      RIGHTV = LSAME( SIDE, 'R' )
+      LEFTV = LSAME( SIDE, 'L' )
+*
+      INFO = 0
+      IF( .NOT.LSAME( JOB, 'N' ) .AND. .NOT.LSAME( JOB, 'P' ) .AND.
+     $    .NOT.LSAME( JOB, 'S' ) .AND. .NOT.LSAME( JOB, 'B' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.RIGHTV .AND. .NOT.LEFTV ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( ILO.LT.1 ) THEN
+         INFO = -4
+      ELSE IF( IHI.LT.ILO .OR. IHI.GT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -6
+      ELSE IF( LDV.LT.MAX( 1, N ) ) THEN
+         INFO = -10
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CGGBAK', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+      IF( M.EQ.0 )
+     $   RETURN
+      IF( LSAME( JOB, 'N' ) )
+     $   RETURN
+*
+      IF( ILO.EQ.IHI )
+     $   GO TO 30
+*
+*     Backward balance
+*
+      IF( LSAME( JOB, 'S' ) .OR. LSAME( JOB, 'B' ) ) THEN
+*
+*        Backward transformation on right eigenvectors
+*
+         IF( RIGHTV ) THEN
+            DO 10 I = ILO, IHI
+               CALL CSSCAL( M, RSCALE( I ), V( I, 1 ), LDV )
+   10       CONTINUE
+         END IF
+*
+*        Backward transformation on left eigenvectors
+*
+         IF( LEFTV ) THEN
+            DO 20 I = ILO, IHI
+               CALL CSSCAL( M, LSCALE( I ), V( I, 1 ), LDV )
+   20       CONTINUE
+         END IF
+      END IF
+*
+*     Backward permutation
+*
+   30 CONTINUE
+      IF( LSAME( JOB, 'P' ) .OR. LSAME( JOB, 'B' ) ) THEN
+*
+*        Backward permutation on right eigenvectors
+*
+         IF( RIGHTV ) THEN
+            IF( ILO.EQ.1 )
+     $         GO TO 50
+            DO 40 I = ILO - 1, 1, -1
+               K = RSCALE( I )
+               IF( K.EQ.I )
+     $            GO TO 40
+               CALL CSWAP( M, V( I, 1 ), LDV, V( K, 1 ), LDV )
+   40       CONTINUE
+*
+   50       CONTINUE
+            IF( IHI.EQ.N )
+     $         GO TO 70
+            DO 60 I = IHI + 1, N
+               K = RSCALE( I )
+               IF( K.EQ.I )
+     $            GO TO 60
+               CALL CSWAP( M, V( I, 1 ), LDV, V( K, 1 ), LDV )
+   60       CONTINUE
+         END IF
+*
+*        Backward permutation on left eigenvectors
+*
+   70    CONTINUE
+         IF( LEFTV ) THEN
+            IF( ILO.EQ.1 )
+     $         GO TO 90
+            DO 80 I = ILO - 1, 1, -1
+               K = LSCALE( I )
+               IF( K.EQ.I )
+     $            GO TO 80
+               CALL CSWAP( M, V( I, 1 ), LDV, V( K, 1 ), LDV )
+   80       CONTINUE
+*
+   90       CONTINUE
+            IF( IHI.EQ.N )
+     $         GO TO 110
+            DO 100 I = IHI + 1, N
+               K = LSCALE( I )
+               IF( K.EQ.I )
+     $            GO TO 100
+               CALL CSWAP( M, V( I, 1 ), LDV, V( K, 1 ), LDV )
+  100       CONTINUE
+         END IF
+      END IF
+*
+  110 CONTINUE
+*
+      RETURN
+*
+*     End of CGGBAK
+*
+      END
diff --git a/costa/native/external/lapack/cggbal.f b/costa/native/external/lapack/cggbal.f
new file mode 100644
index 000000000..cc258d485
--- /dev/null
+++ b/costa/native/external/lapack/cggbal.f
@@ -0,0 +1,474 @@
+      SUBROUTINE CGGBAL( JOB, N, A, LDA, B, LDB, ILO, IHI, LSCALE,
+     $                   RSCALE, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOB
+      INTEGER            IHI, ILO, INFO, LDA, LDB, N
+*     ..
+*     .. Array Arguments ..
+      REAL               LSCALE( * ), RSCALE( * ), WORK( * )
+      COMPLEX            A( LDA, * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CGGBAL balances a pair of general complex matrices (A,B).  This
+*  involves, first, permuting A and B by similarity transformations to
+*  isolate eigenvalues in the first 1 to ILO$-$1 and last IHI+1 to N
+*  elements on the diagonal; and second, applying a diagonal similarity
+*  transformation to rows and columns ILO to IHI to make the rows
+*  and columns as close in norm as possible. Both steps are optional.
+*
+*  Balancing may reduce the 1-norm of the matrices, and improve the
+*  accuracy of the computed eigenvalues and/or eigenvectors in the
+*  generalized eigenvalue problem A*x = lambda*B*x.
+*
+*  Arguments
+*  =========
+*
+*  JOB     (input) CHARACTER*1
+*          Specifies the operations to be performed on A and B:
+*          = 'N':  none:  simply set ILO = 1, IHI = N, LSCALE(I) = 1.0
+*                  and RSCALE(I) = 1.0 for i=1,...,N;
+*          = 'P':  permute only;
+*          = 'S':  scale only;
+*          = 'B':  both permute and scale.
+*
+*  N       (input) INTEGER
+*          The order of the matrices A and B.  N >= 0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,N)
+*          On entry, the input matrix A.
+*          On exit, A is overwritten by the balanced matrix.
+*          If JOB = 'N', A is not referenced.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,N).
+*
+*  B       (input/output) COMPLEX array, dimension (LDB,N)
+*          On entry, the input matrix B.
+*          On exit, B is overwritten by the balanced matrix.
+*          If JOB = 'N', B is not referenced.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B. LDB >= max(1,N).
+*
+*  ILO     (output) INTEGER
+*  IHI     (output) INTEGER
+*          ILO and IHI are set to integers such that on exit
+*          A(i,j) = 0 and B(i,j) = 0 if i > j and
+*          j = 1,...,ILO-1 or i = IHI+1,...,N.
+*          If JOB = 'N' or 'S', ILO = 1 and IHI = N.
+*
+*  LSCALE  (output) REAL array, dimension (N)
+*          Details of the permutations and scaling factors applied
+*          to the left side of A and B.  If P(j) is the index of the
+*          row interchanged with row j, and D(j) is the scaling factor
+*          applied to row j, then
+*            LSCALE(j) = P(j)    for J = 1,...,ILO-1
+*                      = D(j)    for J = ILO,...,IHI
+*                      = P(j)    for J = IHI+1,...,N.
+*          The order in which the interchanges are made is N to IHI+1,
+*          then 1 to ILO-1.
+*
+*  RSCALE  (output) REAL array, dimension (N)
+*          Details of the permutations and scaling factors applied
+*          to the right side of A and B.  If P(j) is the index of the
+*          column interchanged with column j, and D(j) is the scaling
+*          factor applied to column j, then
+*            RSCALE(j) = P(j)    for J = 1,...,ILO-1
+*                      = D(j)    for J = ILO,...,IHI
+*                      = P(j)    for J = IHI+1,...,N.
+*          The order in which the interchanges are made is N to IHI+1,
+*          then 1 to ILO-1.
+*
+*  WORK    (workspace) REAL array, dimension (6*N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*
+*  Further Details
+*  ===============
+*
+*  See R.C. WARD, Balancing the generalized eigenvalue problem,
+*                 SIAM J. Sci. Stat. Comp. 2 (1981), 141-152.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, HALF, ONE
+      PARAMETER          ( ZERO = 0.0E+0, HALF = 0.5E+0, ONE = 1.0E+0 )
+      REAL               THREE, SCLFAC
+      PARAMETER          ( THREE = 3.0E+0, SCLFAC = 1.0E+1 )
+      COMPLEX            CZERO
+      PARAMETER          ( CZERO = ( 0.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, ICAB, IFLOW, IP1, IR, IRAB, IT, J, JC, JP1,
+     $                   K, KOUNT, L, LCAB, LM1, LRAB, LSFMAX, LSFMIN,
+     $                   M, NR, NRP2
+      REAL               ALPHA, BASL, BETA, CAB, CMAX, COEF, COEF2,
+     $                   COEF5, COR, EW, EWC, GAMMA, PGAMMA, RAB, SFMAX,
+     $                   SFMIN, SUM, T, TA, TB, TC
+      COMPLEX            CDUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ICAMAX
+      REAL               SDOT, SLAMCH
+      EXTERNAL           LSAME, ICAMAX, SDOT, SLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CSSCAL, CSWAP, SAXPY, SSCAL, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, AIMAG, INT, LOG10, MAX, MIN, REAL, SIGN
+*     ..
+*     .. Statement Functions ..
+      REAL               CABS1
+*     ..
+*     .. Statement Function definitions ..
+      CABS1( CDUM ) = ABS( REAL( CDUM ) ) + ABS( AIMAG( CDUM ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters
+*
+      INFO = 0
+      IF( .NOT.LSAME( JOB, 'N' ) .AND. .NOT.LSAME( JOB, 'P' ) .AND.
+     $    .NOT.LSAME( JOB, 'S' ) .AND. .NOT.LSAME( JOB, 'B' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -4
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CGGBAL', -INFO )
+         RETURN
+      END IF
+*
+      K = 1
+      L = N
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      IF( LSAME( JOB, 'N' ) ) THEN
+         ILO = 1
+         IHI = N
+         DO 10 I = 1, N
+            LSCALE( I ) = ONE
+            RSCALE( I ) = ONE
+   10    CONTINUE
+         RETURN
+      END IF
+*
+      IF( K.EQ.L ) THEN
+         ILO = 1
+         IHI = 1
+         LSCALE( 1 ) = ONE
+         RSCALE( 1 ) = ONE
+         RETURN
+      END IF
+*
+      IF( LSAME( JOB, 'S' ) )
+     $   GO TO 190
+*
+      GO TO 30
+*
+*     Permute the matrices A and B to isolate the eigenvalues.
+*
+*     Find row with one nonzero in columns 1 through L
+*
+   20 CONTINUE
+      L = LM1
+      IF( L.NE.1 )
+     $   GO TO 30
+*
+      RSCALE( 1 ) = 1
+      LSCALE( 1 ) = 1
+      GO TO 190
+*
+   30 CONTINUE
+      LM1 = L - 1
+      DO 80 I = L, 1, -1
+         DO 40 J = 1, LM1
+            JP1 = J + 1
+            IF( A( I, J ).NE.CZERO .OR. B( I, J ).NE.CZERO )
+     $         GO TO 50
+   40    CONTINUE
+         J = L
+         GO TO 70
+*
+   50    CONTINUE
+         DO 60 J = JP1, L
+            IF( A( I, J ).NE.CZERO .OR. B( I, J ).NE.CZERO )
+     $         GO TO 80
+   60    CONTINUE
+         J = JP1 - 1
+*
+   70    CONTINUE
+         M = L
+         IFLOW = 1
+         GO TO 160
+   80 CONTINUE
+      GO TO 100
+*
+*     Find column with one nonzero in rows K through N
+*
+   90 CONTINUE
+      K = K + 1
+*
+  100 CONTINUE
+      DO 150 J = K, L
+         DO 110 I = K, LM1
+            IP1 = I + 1
+            IF( A( I, J ).NE.CZERO .OR. B( I, J ).NE.CZERO )
+     $         GO TO 120
+  110    CONTINUE
+         I = L
+         GO TO 140
+  120    CONTINUE
+         DO 130 I = IP1, L
+            IF( A( I, J ).NE.CZERO .OR. B( I, J ).NE.CZERO )
+     $         GO TO 150
+  130    CONTINUE
+         I = IP1 - 1
+  140    CONTINUE
+         M = K
+         IFLOW = 2
+         GO TO 160
+  150 CONTINUE
+      GO TO 190
+*
+*     Permute rows M and I
+*
+  160 CONTINUE
+      LSCALE( M ) = I
+      IF( I.EQ.M )
+     $   GO TO 170
+      CALL CSWAP( N-K+1, A( I, K ), LDA, A( M, K ), LDA )
+      CALL CSWAP( N-K+1, B( I, K ), LDB, B( M, K ), LDB )
+*
+*     Permute columns M and J
+*
+  170 CONTINUE
+      RSCALE( M ) = J
+      IF( J.EQ.M )
+     $   GO TO 180
+      CALL CSWAP( L, A( 1, J ), 1, A( 1, M ), 1 )
+      CALL CSWAP( L, B( 1, J ), 1, B( 1, M ), 1 )
+*
+  180 CONTINUE
+      GO TO ( 20, 90 )IFLOW
+*
+  190 CONTINUE
+      ILO = K
+      IHI = L
+*
+      IF( ILO.EQ.IHI )
+     $   RETURN
+*
+      IF( LSAME( JOB, 'P' ) )
+     $   RETURN
+*
+*     Balance the submatrix in rows ILO to IHI.
+*
+      NR = IHI - ILO + 1
+      DO 200 I = ILO, IHI
+         RSCALE( I ) = ZERO
+         LSCALE( I ) = ZERO
+*
+         WORK( I ) = ZERO
+         WORK( I+N ) = ZERO
+         WORK( I+2*N ) = ZERO
+         WORK( I+3*N ) = ZERO
+         WORK( I+4*N ) = ZERO
+         WORK( I+5*N ) = ZERO
+  200 CONTINUE
+*
+*     Compute right side vector in resulting linear equations
+*
+      BASL = LOG10( SCLFAC )
+      DO 240 I = ILO, IHI
+         DO 230 J = ILO, IHI
+            IF( A( I, J ).EQ.CZERO ) THEN
+               TA = ZERO
+               GO TO 210
+            END IF
+            TA = LOG10( CABS1( A( I, J ) ) ) / BASL
+*
+  210       CONTINUE
+            IF( B( I, J ).EQ.CZERO ) THEN
+               TB = ZERO
+               GO TO 220
+            END IF
+            TB = LOG10( CABS1( B( I, J ) ) ) / BASL
+*
+  220       CONTINUE
+            WORK( I+4*N ) = WORK( I+4*N ) - TA - TB
+            WORK( J+5*N ) = WORK( J+5*N ) - TA - TB
+  230    CONTINUE
+  240 CONTINUE
+*
+      COEF = ONE / REAL( 2*NR )
+      COEF2 = COEF*COEF
+      COEF5 = HALF*COEF2
+      NRP2 = NR + 2
+      BETA = ZERO
+      IT = 1
+*
+*     Start generalized conjugate gradient iteration
+*
+  250 CONTINUE
+*
+      GAMMA = SDOT( NR, WORK( ILO+4*N ), 1, WORK( ILO+4*N ), 1 ) +
+     $        SDOT( NR, WORK( ILO+5*N ), 1, WORK( ILO+5*N ), 1 )
+*
+      EW = ZERO
+      EWC = ZERO
+      DO 260 I = ILO, IHI
+         EW = EW + WORK( I+4*N )
+         EWC = EWC + WORK( I+5*N )
+  260 CONTINUE
+*
+      GAMMA = COEF*GAMMA - COEF2*( EW**2+EWC**2 ) - COEF5*( EW-EWC )**2
+      IF( GAMMA.EQ.ZERO )
+     $   GO TO 350
+      IF( IT.NE.1 )
+     $   BETA = GAMMA / PGAMMA
+      T = COEF5*( EWC-THREE*EW )
+      TC = COEF5*( EW-THREE*EWC )
+*
+      CALL SSCAL( NR, BETA, WORK( ILO ), 1 )
+      CALL SSCAL( NR, BETA, WORK( ILO+N ), 1 )
+*
+      CALL SAXPY( NR, COEF, WORK( ILO+4*N ), 1, WORK( ILO+N ), 1 )
+      CALL SAXPY( NR, COEF, WORK( ILO+5*N ), 1, WORK( ILO ), 1 )
+*
+      DO 270 I = ILO, IHI
+         WORK( I ) = WORK( I ) + TC
+         WORK( I+N ) = WORK( I+N ) + T
+  270 CONTINUE
+*
+*     Apply matrix to vector
+*
+      DO 300 I = ILO, IHI
+         KOUNT = 0
+         SUM = ZERO
+         DO 290 J = ILO, IHI
+            IF( A( I, J ).EQ.CZERO )
+     $         GO TO 280
+            KOUNT = KOUNT + 1
+            SUM = SUM + WORK( J )
+  280       CONTINUE
+            IF( B( I, J ).EQ.CZERO )
+     $         GO TO 290
+            KOUNT = KOUNT + 1
+            SUM = SUM + WORK( J )
+  290    CONTINUE
+         WORK( I+2*N ) = REAL( KOUNT )*WORK( I+N ) + SUM
+  300 CONTINUE
+*
+      DO 330 J = ILO, IHI
+         KOUNT = 0
+         SUM = ZERO
+         DO 320 I = ILO, IHI
+            IF( A( I, J ).EQ.CZERO )
+     $         GO TO 310
+            KOUNT = KOUNT + 1
+            SUM = SUM + WORK( I+N )
+  310       CONTINUE
+            IF( B( I, J ).EQ.CZERO )
+     $         GO TO 320
+            KOUNT = KOUNT + 1
+            SUM = SUM + WORK( I+N )
+  320    CONTINUE
+         WORK( J+3*N ) = REAL( KOUNT )*WORK( J ) + SUM
+  330 CONTINUE
+*
+      SUM = SDOT( NR, WORK( ILO+N ), 1, WORK( ILO+2*N ), 1 ) +
+     $      SDOT( NR, WORK( ILO ), 1, WORK( ILO+3*N ), 1 )
+      ALPHA = GAMMA / SUM
+*
+*     Determine correction to current iteration
+*
+      CMAX = ZERO
+      DO 340 I = ILO, IHI
+         COR = ALPHA*WORK( I+N )
+         IF( ABS( COR ).GT.CMAX )
+     $      CMAX = ABS( COR )
+         LSCALE( I ) = LSCALE( I ) + COR
+         COR = ALPHA*WORK( I )
+         IF( ABS( COR ).GT.CMAX )
+     $      CMAX = ABS( COR )
+         RSCALE( I ) = RSCALE( I ) + COR
+  340 CONTINUE
+      IF( CMAX.LT.HALF )
+     $   GO TO 350
+*
+      CALL SAXPY( NR, -ALPHA, WORK( ILO+2*N ), 1, WORK( ILO+4*N ), 1 )
+      CALL SAXPY( NR, -ALPHA, WORK( ILO+3*N ), 1, WORK( ILO+5*N ), 1 )
+*
+      PGAMMA = GAMMA
+      IT = IT + 1
+      IF( IT.LE.NRP2 )
+     $   GO TO 250
+*
+*     End generalized conjugate gradient iteration
+*
+  350 CONTINUE
+      SFMIN = SLAMCH( 'S' )
+      SFMAX = ONE / SFMIN
+      LSFMIN = INT( LOG10( SFMIN ) / BASL+ONE )
+      LSFMAX = INT( LOG10( SFMAX ) / BASL )
+      DO 360 I = ILO, IHI
+         IRAB = ICAMAX( N-ILO+1, A( I, ILO ), LDA )
+         RAB = ABS( A( I, IRAB+ILO-1 ) )
+         IRAB = ICAMAX( N-ILO+1, B( I, ILO ), LDA )
+         RAB = MAX( RAB, ABS( B( I, IRAB+ILO-1 ) ) )
+         LRAB = INT( LOG10( RAB+SFMIN ) / BASL+ONE )
+         IR = LSCALE( I ) + SIGN( HALF, LSCALE( I ) )
+         IR = MIN( MAX( IR, LSFMIN ), LSFMAX, LSFMAX-LRAB )
+         LSCALE( I ) = SCLFAC**IR
+         ICAB = ICAMAX( IHI, A( 1, I ), 1 )
+         CAB = ABS( A( ICAB, I ) )
+         ICAB = ICAMAX( IHI, B( 1, I ), 1 )
+         CAB = MAX( CAB, ABS( B( ICAB, I ) ) )
+         LCAB = INT( LOG10( CAB+SFMIN ) / BASL+ONE )
+         JC = RSCALE( I ) + SIGN( HALF, RSCALE( I ) )
+         JC = MIN( MAX( JC, LSFMIN ), LSFMAX, LSFMAX-LCAB )
+         RSCALE( I ) = SCLFAC**JC
+  360 CONTINUE
+*
+*     Row scaling of matrices A and B
+*
+      DO 370 I = ILO, IHI
+         CALL CSSCAL( N-ILO+1, LSCALE( I ), A( I, ILO ), LDA )
+         CALL CSSCAL( N-ILO+1, LSCALE( I ), B( I, ILO ), LDB )
+  370 CONTINUE
+*
+*     Column scaling of matrices A and B
+*
+      DO 380 J = ILO, IHI
+         CALL CSSCAL( IHI, RSCALE( J ), A( 1, J ), 1 )
+         CALL CSSCAL( IHI, RSCALE( J ), B( 1, J ), 1 )
+  380 CONTINUE
+*
+      RETURN
+*
+*     End of CGGBAL
+*
+      END
diff --git a/costa/native/external/lapack/cgges.f b/costa/native/external/lapack/cgges.f
new file mode 100644
index 000000000..9511c13de
--- /dev/null
+++ b/costa/native/external/lapack/cgges.f
@@ -0,0 +1,476 @@
+      SUBROUTINE CGGES( JOBVSL, JOBVSR, SORT, SELCTG, N, A, LDA, B, LDB,
+     $                  SDIM, ALPHA, BETA, VSL, LDVSL, VSR, LDVSR, WORK,
+     $                  LWORK, RWORK, BWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBVSL, JOBVSR, SORT
+      INTEGER            INFO, LDA, LDB, LDVSL, LDVSR, LWORK, N, SDIM
+*     ..
+*     .. Array Arguments ..
+      LOGICAL            BWORK( * )
+      REAL               RWORK( * )
+      COMPLEX            A( LDA, * ), ALPHA( * ), B( LDB, * ),
+     $                   BETA( * ), VSL( LDVSL, * ), VSR( LDVSR, * ),
+     $                   WORK( * )
+*     ..
+*     .. Function Arguments ..
+      LOGICAL            SELCTG
+      EXTERNAL           SELCTG
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CGGES computes for a pair of N-by-N complex nonsymmetric matrices
+*  (A,B), the generalized eigenvalues, the generalized complex Schur
+*  form (S, T), and optionally left and/or right Schur vectors (VSL
+*  and VSR). This gives the generalized Schur factorization
+*
+*          (A,B) = ( (VSL)*S*(VSR)**H, (VSL)*T*(VSR)**H )
+*
+*  where (VSR)**H is the conjugate-transpose of VSR.
+*
+*  Optionally, it also orders the eigenvalues so that a selected cluster
+*  of eigenvalues appears in the leading diagonal blocks of the upper
+*  triangular matrix S and the upper triangular matrix T. The leading
+*  columns of VSL and VSR then form an unitary basis for the
+*  corresponding left and right eigenspaces (deflating subspaces).
+*
+*  (If only the generalized eigenvalues are needed, use the driver
+*  CGGEV instead, which is faster.)
+*
+*  A generalized eigenvalue for a pair of matrices (A,B) is a scalar w
+*  or a ratio alpha/beta = w, such that  A - w*B is singular.  It is
+*  usually represented as the pair (alpha,beta), as there is a
+*  reasonable interpretation for beta=0, and even for both being zero.
+*
+*  A pair of matrices (S,T) is in generalized complex Schur form if S
+*  and T are upper triangular and, in addition, the diagonal elements
+*  of T are non-negative real numbers.
+*
+*  Arguments
+*  =========
+*
+*  JOBVSL  (input) CHARACTER*1
+*          = 'N':  do not compute the left Schur vectors;
+*          = 'V':  compute the left Schur vectors.
+*
+*  JOBVSR  (input) CHARACTER*1
+*          = 'N':  do not compute the right Schur vectors;
+*          = 'V':  compute the right Schur vectors.
+*
+*  SORT    (input) CHARACTER*1
+*          Specifies whether or not to order the eigenvalues on the
+*          diagonal of the generalized Schur form.
+*          = 'N':  Eigenvalues are not ordered;
+*          = 'S':  Eigenvalues are ordered (see SELCTG).
+*
+*  SELCTG  (input) LOGICAL FUNCTION of two COMPLEX arguments
+*          SELCTG must be declared EXTERNAL in the calling subroutine.
+*          If SORT = 'N', SELCTG is not referenced.
+*          If SORT = 'S', SELCTG is used to select eigenvalues to sort
+*          to the top left of the Schur form.
+*          An eigenvalue ALPHA(j)/BETA(j) is selected if
+*          SELCTG(ALPHA(j),BETA(j)) is true.
+*
+*          Note that a selected complex eigenvalue may no longer satisfy
+*          SELCTG(ALPHA(j),BETA(j)) = .TRUE. after ordering, since
+*          ordering may change the value of complex eigenvalues
+*          (especially if the eigenvalue is ill-conditioned), in this
+*          case INFO is set to N+2 (See INFO below).
+*
+*  N       (input) INTEGER
+*          The order of the matrices A, B, VSL, and VSR.  N >= 0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA, N)
+*          On entry, the first of the pair of matrices.
+*          On exit, A has been overwritten by its generalized Schur
+*          form S.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of A.  LDA >= max(1,N).
+*
+*  B       (input/output) COMPLEX array, dimension (LDB, N)
+*          On entry, the second of the pair of matrices.
+*          On exit, B has been overwritten by its generalized Schur
+*          form T.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of B.  LDB >= max(1,N).
+*
+*  SDIM    (output) INTEGER
+*          If SORT = 'N', SDIM = 0.
+*          If SORT = 'S', SDIM = number of eigenvalues (after sorting)
+*          for which SELCTG is true.
+*
+*  ALPHA   (output) COMPLEX array, dimension (N)
+*  BETA    (output) COMPLEX array, dimension (N)
+*          On exit,  ALPHA(j)/BETA(j), j=1,...,N, will be the
+*          generalized eigenvalues.  ALPHA(j), j=1,...,N  and  BETA(j),
+*          j=1,...,N  are the diagonals of the complex Schur form (A,B)
+*          output by CGGES. The  BETA(j) will be non-negative real.
+*
+*          Note: the quotients ALPHA(j)/BETA(j) may easily over- or
+*          underflow, and BETA(j) may even be zero.  Thus, the user
+*          should avoid naively computing the ratio alpha/beta.
+*          However, ALPHA will be always less than and usually
+*          comparable with norm(A) in magnitude, and BETA always less
+*          than and usually comparable with norm(B).
+*
+*  VSL     (output) COMPLEX array, dimension (LDVSL,N)
+*          If JOBVSL = 'V', VSL will contain the left Schur vectors.
+*          Not referenced if JOBVSL = 'N'.
+*
+*  LDVSL   (input) INTEGER
+*          The leading dimension of the matrix VSL. LDVSL >= 1, and
+*          if JOBVSL = 'V', LDVSL >= N.
+*
+*  VSR     (output) COMPLEX array, dimension (LDVSR,N)
+*          If JOBVSR = 'V', VSR will contain the right Schur vectors.
+*          Not referenced if JOBVSR = 'N'.
+*
+*  LDVSR   (input) INTEGER
+*          The leading dimension of the matrix VSR. LDVSR >= 1, and
+*          if JOBVSR = 'V', LDVSR >= N.
+*
+*  WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.  LWORK >= max(1,2*N).
+*          For good performance, LWORK must generally be larger.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  RWORK   (workspace) REAL array, dimension (8*N)
+*
+*  BWORK   (workspace) LOGICAL array, dimension (N)
+*          Not referenced if SORT = 'N'.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          =1,...,N:
+*                The QZ iteration failed.  (A,B) are not in Schur
+*                form, but ALPHA(j) and BETA(j) should be correct for
+*                j=INFO+1,...,N.
+*          > N:  =N+1: other than QZ iteration failed in CHGEQZ
+*                =N+2: after reordering, roundoff changed values of
+*                      some complex eigenvalues so that leading
+*                      eigenvalues in the Generalized Schur form no
+*                      longer satisfy SELCTG=.TRUE.  This could also
+*                      be caused due to scaling.
+*                =N+3: reordering falied in CTGSEN.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E0, ONE = 1.0E0 )
+      COMPLEX            CZERO, CONE
+      PARAMETER          ( CZERO = ( 0.0E0, 0.0E0 ),
+     $                   CONE = ( 1.0E0, 0.0E0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            CURSL, ILASCL, ILBSCL, ILVSL, ILVSR, LASTSL,
+     $                   LQUERY, WANTST
+      INTEGER            I, ICOLS, IERR, IHI, IJOBVL, IJOBVR, ILEFT,
+     $                   ILO, IRIGHT, IROWS, IRWRK, ITAU, IWRK, LWKMIN,
+     $                   LWKOPT
+      REAL               ANRM, ANRMTO, BIGNUM, BNRM, BNRMTO, EPS, PVSL,
+     $                   PVSR, SMLNUM
+*     ..
+*     .. Local Arrays ..
+      INTEGER            IDUM( 1 )
+      REAL               DIF( 2 )
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CGEQRF, CGGBAK, CGGBAL, CGGHRD, CHGEQZ, CLACPY,
+     $                   CLASCL, CLASET, CTGSEN, CUNGQR, CUNMQR, SLABAD,
+     $                   XERBLA
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      REAL               CLANGE, SLAMCH
+      EXTERNAL           LSAME, ILAENV, CLANGE, SLAMCH
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode the input arguments
+*
+      IF( LSAME( JOBVSL, 'N' ) ) THEN
+         IJOBVL = 1
+         ILVSL = .FALSE.
+      ELSE IF( LSAME( JOBVSL, 'V' ) ) THEN
+         IJOBVL = 2
+         ILVSL = .TRUE.
+      ELSE
+         IJOBVL = -1
+         ILVSL = .FALSE.
+      END IF
+*
+      IF( LSAME( JOBVSR, 'N' ) ) THEN
+         IJOBVR = 1
+         ILVSR = .FALSE.
+      ELSE IF( LSAME( JOBVSR, 'V' ) ) THEN
+         IJOBVR = 2
+         ILVSR = .TRUE.
+      ELSE
+         IJOBVR = -1
+         ILVSR = .FALSE.
+      END IF
+*
+      WANTST = LSAME( SORT, 'S' )
+*
+*     Test the input arguments
+*
+      INFO = 0
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( IJOBVL.LE.0 ) THEN
+         INFO = -1
+      ELSE IF( IJOBVR.LE.0 ) THEN
+         INFO = -2
+      ELSE IF( ( .NOT.WANTST ) .AND. ( .NOT.LSAME( SORT, 'N' ) ) ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -9
+      ELSE IF( LDVSL.LT.1 .OR. ( ILVSL .AND. LDVSL.LT.N ) ) THEN
+         INFO = -14
+      ELSE IF( LDVSR.LT.1 .OR. ( ILVSR .AND. LDVSR.LT.N ) ) THEN
+         INFO = -16
+      END IF
+*
+*     Compute workspace
+*      (Note: Comments in the code beginning "Workspace:" describe the
+*       minimal amount of workspace needed at that point in the code,
+*       as well as the preferred amount for good performance.
+*       NB refers to the optimal block size for the immediately
+*       following subroutine, as returned by ILAENV.)
+*
+      LWKMIN = 1
+      IF( INFO.EQ.0 .AND. ( LWORK.GE.1 .OR. LQUERY ) ) THEN
+         LWKMIN = MAX( 1, 2*N )
+         LWKOPT = N + N*ILAENV( 1, 'CGEQRF', ' ', N, 1, N, 0 )
+         IF( ILVSL ) THEN
+            LWKOPT = MAX( LWKOPT, N+N*ILAENV( 1, 'CUNGQR', ' ', N, 1, N,
+     $               -1 ) )
+         END IF
+         WORK( 1 ) = LWKOPT
+      END IF
+*
+      IF( LWORK.LT.LWKMIN .AND. .NOT.LQUERY )
+     $   INFO = -18
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CGGES ', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      WORK( 1 ) = LWKOPT
+      IF( N.EQ.0 ) THEN
+         SDIM = 0
+         RETURN
+      END IF
+*
+*     Get machine constants
+*
+      EPS = SLAMCH( 'P' )
+      SMLNUM = SLAMCH( 'S' )
+      BIGNUM = ONE / SMLNUM
+      CALL SLABAD( SMLNUM, BIGNUM )
+      SMLNUM = SQRT( SMLNUM ) / EPS
+      BIGNUM = ONE / SMLNUM
+*
+*     Scale A if max element outside range [SMLNUM,BIGNUM]
+*
+      ANRM = CLANGE( 'M', N, N, A, LDA, RWORK )
+      ILASCL = .FALSE.
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
+         ANRMTO = SMLNUM
+         ILASCL = .TRUE.
+      ELSE IF( ANRM.GT.BIGNUM ) THEN
+         ANRMTO = BIGNUM
+         ILASCL = .TRUE.
+      END IF
+*
+      IF( ILASCL )
+     $   CALL CLASCL( 'G', 0, 0, ANRM, ANRMTO, N, N, A, LDA, IERR )
+*
+*     Scale B if max element outside range [SMLNUM,BIGNUM]
+*
+      BNRM = CLANGE( 'M', N, N, B, LDB, RWORK )
+      ILBSCL = .FALSE.
+      IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN
+         BNRMTO = SMLNUM
+         ILBSCL = .TRUE.
+      ELSE IF( BNRM.GT.BIGNUM ) THEN
+         BNRMTO = BIGNUM
+         ILBSCL = .TRUE.
+      END IF
+*
+      IF( ILBSCL )
+     $   CALL CLASCL( 'G', 0, 0, BNRM, BNRMTO, N, N, B, LDB, IERR )
+*
+*     Permute the matrix to make it more nearly triangular
+*     (Real Workspace: need 6*N)
+*
+      ILEFT = 1
+      IRIGHT = N + 1
+      IRWRK = IRIGHT + N
+      CALL CGGBAL( 'P', N, A, LDA, B, LDB, ILO, IHI, RWORK( ILEFT ),
+     $             RWORK( IRIGHT ), RWORK( IRWRK ), IERR )
+*
+*     Reduce B to triangular form (QR decomposition of B)
+*     (Complex Workspace: need N, prefer N*NB)
+*
+      IROWS = IHI + 1 - ILO
+      ICOLS = N + 1 - ILO
+      ITAU = 1
+      IWRK = ITAU + IROWS
+      CALL CGEQRF( IROWS, ICOLS, B( ILO, ILO ), LDB, WORK( ITAU ),
+     $             WORK( IWRK ), LWORK+1-IWRK, IERR )
+*
+*     Apply the orthogonal transformation to matrix A
+*     (Complex Workspace: need N, prefer N*NB)
+*
+      CALL CUNMQR( 'L', 'C', IROWS, ICOLS, IROWS, B( ILO, ILO ), LDB,
+     $             WORK( ITAU ), A( ILO, ILO ), LDA, WORK( IWRK ),
+     $             LWORK+1-IWRK, IERR )
+*
+*     Initialize VSL
+*     (Complex Workspace: need N, prefer N*NB)
+*
+      IF( ILVSL ) THEN
+         CALL CLASET( 'Full', N, N, CZERO, CONE, VSL, LDVSL )
+         CALL CLACPY( 'L', IROWS-1, IROWS-1, B( ILO+1, ILO ), LDB,
+     $                VSL( ILO+1, ILO ), LDVSL )
+         CALL CUNGQR( IROWS, IROWS, IROWS, VSL( ILO, ILO ), LDVSL,
+     $                WORK( ITAU ), WORK( IWRK ), LWORK+1-IWRK, IERR )
+      END IF
+*
+*     Initialize VSR
+*
+      IF( ILVSR )
+     $   CALL CLASET( 'Full', N, N, CZERO, CONE, VSR, LDVSR )
+*
+*     Reduce to generalized Hessenberg form
+*     (Workspace: none needed)
+*
+      CALL CGGHRD( JOBVSL, JOBVSR, N, ILO, IHI, A, LDA, B, LDB, VSL,
+     $             LDVSL, VSR, LDVSR, IERR )
+*
+      SDIM = 0
+*
+*     Perform QZ algorithm, computing Schur vectors if desired
+*     (Complex Workspace: need N)
+*     (Real Workspace: need N)
+*
+      IWRK = ITAU
+      CALL CHGEQZ( 'S', JOBVSL, JOBVSR, N, ILO, IHI, A, LDA, B, LDB,
+     $             ALPHA, BETA, VSL, LDVSL, VSR, LDVSR, WORK( IWRK ),
+     $             LWORK+1-IWRK, RWORK( IRWRK ), IERR )
+      IF( IERR.NE.0 ) THEN
+         IF( IERR.GT.0 .AND. IERR.LE.N ) THEN
+            INFO = IERR
+         ELSE IF( IERR.GT.N .AND. IERR.LE.2*N ) THEN
+            INFO = IERR - N
+         ELSE
+            INFO = N + 1
+         END IF
+         GO TO 30
+      END IF
+*
+*     Sort eigenvalues ALPHA/BETA if desired
+*     (Workspace: none needed)
+*
+      IF( WANTST ) THEN
+*
+*        Undo scaling on eigenvalues before selecting
+*
+         IF( ILASCL )
+     $      CALL CLASCL( 'G', 0, 0, ANRM, ANRMTO, N, 1, ALPHA, N, IERR )
+         IF( ILBSCL )
+     $      CALL CLASCL( 'G', 0, 0, BNRM, BNRMTO, N, 1, BETA, N, IERR )
+*
+*        Select eigenvalues
+*
+         DO 10 I = 1, N
+            BWORK( I ) = SELCTG( ALPHA( I ), BETA( I ) )
+   10    CONTINUE
+*
+         CALL CTGSEN( 0, ILVSL, ILVSR, BWORK, N, A, LDA, B, LDB, ALPHA,
+     $                BETA, VSL, LDVSL, VSR, LDVSR, SDIM, PVSL, PVSR,
+     $                DIF, WORK( IWRK ), LWORK-IWRK+1, IDUM, 1, IERR )
+         IF( IERR.EQ.1 )
+     $      INFO = N + 3
+*
+      END IF
+*
+*     Apply back-permutation to VSL and VSR
+*     (Workspace: none needed)
+*
+      IF( ILVSL )
+     $   CALL CGGBAK( 'P', 'L', N, ILO, IHI, RWORK( ILEFT ),
+     $                RWORK( IRIGHT ), N, VSL, LDVSL, IERR )
+      IF( ILVSR )
+     $   CALL CGGBAK( 'P', 'R', N, ILO, IHI, RWORK( ILEFT ),
+     $                RWORK( IRIGHT ), N, VSR, LDVSR, IERR )
+*
+*     Undo scaling
+*
+      IF( ILASCL ) THEN
+         CALL CLASCL( 'U', 0, 0, ANRMTO, ANRM, N, N, A, LDA, IERR )
+         CALL CLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHA, N, IERR )
+      END IF
+*
+      IF( ILBSCL ) THEN
+         CALL CLASCL( 'U', 0, 0, BNRMTO, BNRM, N, N, B, LDB, IERR )
+         CALL CLASCL( 'G', 0, 0, BNRMTO, BNRM, N, 1, BETA, N, IERR )
+      END IF
+*
+      IF( WANTST ) THEN
+*
+*        Check if reordering is correct
+*
+         LASTSL = .TRUE.
+         SDIM = 0
+         DO 20 I = 1, N
+            CURSL = SELCTG( ALPHA( I ), BETA( I ) )
+            IF( CURSL )
+     $         SDIM = SDIM + 1
+            IF( CURSL .AND. .NOT.LASTSL )
+     $         INFO = N + 2
+            LASTSL = CURSL
+   20    CONTINUE
+*
+      END IF
+*
+   30 CONTINUE
+*
+      WORK( 1 ) = LWKOPT
+*
+      RETURN
+*
+*     End of CGGES
+*
+      END
diff --git a/costa/native/external/lapack/cggesx.f b/costa/native/external/lapack/cggesx.f
new file mode 100644
index 000000000..2585ca34c
--- /dev/null
+++ b/costa/native/external/lapack/cggesx.f
@@ -0,0 +1,545 @@
+      SUBROUTINE CGGESX( JOBVSL, JOBVSR, SORT, SELCTG, SENSE, N, A, LDA,
+     $                   B, LDB, SDIM, ALPHA, BETA, VSL, LDVSL, VSR,
+     $                   LDVSR, RCONDE, RCONDV, WORK, LWORK, RWORK,
+     $                   IWORK, LIWORK, BWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBVSL, JOBVSR, SENSE, SORT
+      INTEGER            INFO, LDA, LDB, LDVSL, LDVSR, LIWORK, LWORK, N,
+     $                   SDIM
+*     ..
+*     .. Array Arguments ..
+      LOGICAL            BWORK( * )
+      INTEGER            IWORK( * )
+      REAL               RCONDE( 2 ), RCONDV( 2 ), RWORK( * )
+      COMPLEX            A( LDA, * ), ALPHA( * ), B( LDB, * ),
+     $                   BETA( * ), VSL( LDVSL, * ), VSR( LDVSR, * ),
+     $                   WORK( * )
+*     ..
+*     .. Function Arguments ..
+      LOGICAL            SELCTG
+      EXTERNAL           SELCTG
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CGGESX computes for a pair of N-by-N complex nonsymmetric matrices
+*  (A,B), the generalized eigenvalues, the complex Schur form (S,T),
+*  and, optionally, the left and/or right matrices of Schur vectors (VSL
+*  and VSR).  This gives the generalized Schur factorization
+*
+*       (A,B) = ( (VSL) S (VSR)**H, (VSL) T (VSR)**H )
+*
+*  where (VSR)**H is the conjugate-transpose of VSR.
+*
+*  Optionally, it also orders the eigenvalues so that a selected cluster
+*  of eigenvalues appears in the leading diagonal blocks of the upper
+*  triangular matrix S and the upper triangular matrix T; computes
+*  a reciprocal condition number for the average of the selected
+*  eigenvalues (RCONDE); and computes a reciprocal condition number for
+*  the right and left deflating subspaces corresponding to the selected
+*  eigenvalues (RCONDV). The leading columns of VSL and VSR then form
+*  an orthonormal basis for the corresponding left and right eigenspaces
+*  (deflating subspaces).
+*
+*  A generalized eigenvalue for a pair of matrices (A,B) is a scalar w
+*  or a ratio alpha/beta = w, such that  A - w*B is singular.  It is
+*  usually represented as the pair (alpha,beta), as there is a
+*  reasonable interpretation for beta=0 or for both being zero.
+*
+*  A pair of matrices (S,T) is in generalized complex Schur form if T is
+*  upper triangular with non-negative diagonal and S is upper
+*  triangular.
+*
+*  Arguments
+*  =========
+*
+*  JOBVSL  (input) CHARACTER*1
+*          = 'N':  do not compute the left Schur vectors;
+*          = 'V':  compute the left Schur vectors.
+*
+*  JOBVSR  (input) CHARACTER*1
+*          = 'N':  do not compute the right Schur vectors;
+*          = 'V':  compute the right Schur vectors.
+*
+*  SORT    (input) CHARACTER*1
+*          Specifies whether or not to order the eigenvalues on the
+*          diagonal of the generalized Schur form.
+*          = 'N':  Eigenvalues are not ordered;
+*          = 'S':  Eigenvalues are ordered (see SELCTG).
+*
+*  SELCTG  (input) LOGICAL FUNCTION of two COMPLEX arguments
+*          SELCTG must be declared EXTERNAL in the calling subroutine.
+*          If SORT = 'N', SELCTG is not referenced.
+*          If SORT = 'S', SELCTG is used to select eigenvalues to sort
+*          to the top left of the Schur form.
+*          Note that a selected complex eigenvalue may no longer satisfy
+*          SELCTG(ALPHA(j),BETA(j)) = .TRUE. after ordering, since
+*          ordering may change the value of complex eigenvalues
+*          (especially if the eigenvalue is ill-conditioned), in this
+*          case INFO is set to N+3 see INFO below).
+*
+*  SENSE   (input) CHARACTER
+*          Determines which reciprocal condition numbers are computed.
+*          = 'N' : None are computed;
+*          = 'E' : Computed for average of selected eigenvalues only;
+*          = 'V' : Computed for selected deflating subspaces only;
+*          = 'B' : Computed for both.
+*          If SENSE = 'E', 'V', or 'B', SORT must equal 'S'.
+*
+*  N       (input) INTEGER
+*          The order of the matrices A, B, VSL, and VSR.  N >= 0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA, N)
+*          On entry, the first of the pair of matrices.
+*          On exit, A has been overwritten by its generalized Schur
+*          form S.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of A.  LDA >= max(1,N).
+*
+*  B       (input/output) COMPLEX array, dimension (LDB, N)
+*          On entry, the second of the pair of matrices.
+*          On exit, B has been overwritten by its generalized Schur
+*          form T.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of B.  LDB >= max(1,N).
+*
+*  SDIM    (output) INTEGER
+*          If SORT = 'N', SDIM = 0.
+*          If SORT = 'S', SDIM = number of eigenvalues (after sorting)
+*          for which SELCTG is true.
+*
+*  ALPHA   (output) COMPLEX array, dimension (N)
+*  BETA    (output) COMPLEX array, dimension (N)
+*          On exit, ALPHA(j)/BETA(j), j=1,...,N, will be the
+*          generalized eigenvalues.  ALPHA(j) and BETA(j),j=1,...,N  are
+*          the diagonals of the complex Schur form (S,T).  BETA(j) will
+*          be non-negative real.
+*
+*          Note: the quotients ALPHA(j)/BETA(j) may easily over- or
+*          underflow, and BETA(j) may even be zero.  Thus, the user
+*          should avoid naively computing the ratio alpha/beta.
+*          However, ALPHA will be always less than and usually
+*          comparable with norm(A) in magnitude, and BETA always less
+*          than and usually comparable with norm(B).
+*
+*  VSL     (output) COMPLEX array, dimension (LDVSL,N)
+*          If JOBVSL = 'V', VSL will contain the left Schur vectors.
+*          Not referenced if JOBVSL = 'N'.
+*
+*  LDVSL   (input) INTEGER
+*          The leading dimension of the matrix VSL. LDVSL >=1, and
+*          if JOBVSL = 'V', LDVSL >= N.
+*
+*  VSR     (output) COMPLEX array, dimension (LDVSR,N)
+*          If JOBVSR = 'V', VSR will contain the right Schur vectors.
+*          Not referenced if JOBVSR = 'N'.
+*
+*  LDVSR   (input) INTEGER
+*          The leading dimension of the matrix VSR. LDVSR >= 1, and
+*          if JOBVSR = 'V', LDVSR >= N.
+*
+*  RCONDE  (output) REAL array, dimension ( 2 )
+*          If SENSE = 'E' or 'B', RCONDE(1) and RCONDE(2) contain the
+*          reciprocal condition numbers for the average of the selected
+*          eigenvalues.
+*          Not referenced if SENSE = 'N' or 'V'.
+*
+*  RCONDV  (output) REAL array, dimension ( 2 )
+*          If SENSE = 'V' or 'B', RCONDV(1) and RCONDV(2) contain the
+*          reciprocal condition number for the selected deflating
+*          subspaces.
+*          Not referenced if SENSE = 'N' or 'E'.
+*
+*  WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.  LWORK >= 2*N.
+*          If SENSE = 'E', 'V', or 'B',
+*          LWORK >= MAX(2*N, 2*SDIM*(N-SDIM)).
+*
+*  RWORK   (workspace) REAL array, dimension ( 8*N )
+*          Real workspace.
+*
+*  IWORK   (workspace/output) INTEGER array, dimension (LIWORK)
+*          Not referenced if SENSE = 'N'.
+*          On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK.
+*
+*  LIWORK  (input) INTEGER
+*          The dimension of the array WORK. LIWORK >= N+2.
+*
+*  BWORK   (workspace) LOGICAL array, dimension (N)
+*          Not referenced if SORT = 'N'.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          = 1,...,N:
+*                The QZ iteration failed.  (A,B) are not in Schur
+*                form, but ALPHA(j) and BETA(j) should be correct for
+*                j=INFO+1,...,N.
+*          > N:  =N+1: other than QZ iteration failed in CHGEQZ
+*                =N+2: after reordering, roundoff changed values of
+*                      some complex eigenvalues so that leading
+*                      eigenvalues in the Generalized Schur form no
+*                      longer satisfy SELCTG=.TRUE.  This could also
+*                      be caused due to scaling.
+*                =N+3: reordering failed in CTGSEN.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+      COMPLEX            CZERO, CONE
+      PARAMETER          ( CZERO = ( 0.0E+0, 0.0E+0 ),
+     $                   CONE = ( 1.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            CURSL, ILASCL, ILBSCL, ILVSL, ILVSR, LASTSL,
+     $                   WANTSB, WANTSE, WANTSN, WANTST, WANTSV
+      INTEGER            I, ICOLS, IERR, IHI, IJOB, IJOBVL, IJOBVR,
+     $                   ILEFT, ILO, IRIGHT, IROWS, IRWRK, ITAU, IWRK,
+     $                   LIWMIN, MAXWRK, MINWRK
+      REAL               ANRM, ANRMTO, BIGNUM, BNRM, BNRMTO, EPS, PL,
+     $                   PR, SMLNUM
+*     ..
+*     .. Local Arrays ..
+      REAL               DIF( 2 )
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CGEQRF, CGGBAK, CGGBAL, CGGHRD, CHGEQZ, CLACPY,
+     $                   CLASCL, CLASET, CTGSEN, CUNGQR, CUNMQR, SLABAD,
+     $                   XERBLA
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      REAL               CLANGE, SLAMCH
+      EXTERNAL           LSAME, ILAENV, CLANGE, SLAMCH
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode the input arguments
+*
+      IF( LSAME( JOBVSL, 'N' ) ) THEN
+         IJOBVL = 1
+         ILVSL = .FALSE.
+      ELSE IF( LSAME( JOBVSL, 'V' ) ) THEN
+         IJOBVL = 2
+         ILVSL = .TRUE.
+      ELSE
+         IJOBVL = -1
+         ILVSL = .FALSE.
+      END IF
+*
+      IF( LSAME( JOBVSR, 'N' ) ) THEN
+         IJOBVR = 1
+         ILVSR = .FALSE.
+      ELSE IF( LSAME( JOBVSR, 'V' ) ) THEN
+         IJOBVR = 2
+         ILVSR = .TRUE.
+      ELSE
+         IJOBVR = -1
+         ILVSR = .FALSE.
+      END IF
+*
+      WANTST = LSAME( SORT, 'S' )
+      WANTSN = LSAME( SENSE, 'N' )
+      WANTSE = LSAME( SENSE, 'E' )
+      WANTSV = LSAME( SENSE, 'V' )
+      WANTSB = LSAME( SENSE, 'B' )
+      IF( WANTSN ) THEN
+         IJOB = 0
+         IWORK( 1 ) = 1
+      ELSE IF( WANTSE ) THEN
+         IJOB = 1
+      ELSE IF( WANTSV ) THEN
+         IJOB = 2
+      ELSE IF( WANTSB ) THEN
+         IJOB = 4
+      END IF
+*
+*     Test the input arguments
+*
+      INFO = 0
+      IF( IJOBVL.LE.0 ) THEN
+         INFO = -1
+      ELSE IF( IJOBVR.LE.0 ) THEN
+         INFO = -2
+      ELSE IF( ( .NOT.WANTST ) .AND. ( .NOT.LSAME( SORT, 'N' ) ) ) THEN
+         INFO = -3
+      ELSE IF( .NOT.( WANTSN .OR. WANTSE .OR. WANTSV .OR. WANTSB ) .OR.
+     $         ( .NOT.WANTST .AND. .NOT.WANTSN ) ) THEN
+         INFO = -5
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -6
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -8
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -10
+      ELSE IF( LDVSL.LT.1 .OR. ( ILVSL .AND. LDVSL.LT.N ) ) THEN
+         INFO = -15
+      ELSE IF( LDVSR.LT.1 .OR. ( ILVSR .AND. LDVSR.LT.N ) ) THEN
+         INFO = -17
+      END IF
+*
+*     Compute workspace
+*      (Note: Comments in the code beginning "Workspace:" describe the
+*       minimal amount of workspace needed at that point in the code,
+*       as well as the preferred amount for good performance.
+*       NB refers to the optimal block size for the immediately
+*       following subroutine, as returned by ILAENV.)
+*
+      MINWRK = 1
+      IF( INFO.EQ.0 .AND. LWORK.GE.1 ) THEN
+         MINWRK = MAX( 1, 2*N )
+         MAXWRK = N + N*ILAENV( 1, 'CGEQRF', ' ', N, 1, N, 0 )
+         IF( ILVSL ) THEN
+            MAXWRK = MAX( MAXWRK, N+N*ILAENV( 1, 'CUNGQR', ' ', N, 1, N,
+     $               -1 ) )
+         END IF
+         WORK( 1 ) = MAXWRK
+      END IF
+      IF( .NOT.WANTSN ) THEN
+         LIWMIN = N+2
+      ELSE
+         LIWMIN = 1
+      END IF
+      IWORK( 1 ) = LIWMIN
+*
+      IF( INFO.EQ.0 .AND. LWORK.LT.MINWRK ) THEN
+         INFO = -21
+      ELSE IF( INFO.EQ.0 .AND. IJOB.GE.1 ) THEN
+         IF( LIWORK.LT.LIWMIN )
+     $      INFO = -24
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CGGESX', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 ) THEN
+         SDIM = 0
+         RETURN
+      END IF
+*
+*     Get machine constants
+*
+      EPS = SLAMCH( 'P' )
+      SMLNUM = SLAMCH( 'S' )
+      BIGNUM = ONE / SMLNUM
+      CALL SLABAD( SMLNUM, BIGNUM )
+      SMLNUM = SQRT( SMLNUM ) / EPS
+      BIGNUM = ONE / SMLNUM
+*
+*     Scale A if max element outside range [SMLNUM,BIGNUM]
+*
+      ANRM = CLANGE( 'M', N, N, A, LDA, RWORK )
+      ILASCL = .FALSE.
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
+         ANRMTO = SMLNUM
+         ILASCL = .TRUE.
+      ELSE IF( ANRM.GT.BIGNUM ) THEN
+         ANRMTO = BIGNUM
+         ILASCL = .TRUE.
+      END IF
+      IF( ILASCL )
+     $   CALL CLASCL( 'G', 0, 0, ANRM, ANRMTO, N, N, A, LDA, IERR )
+*
+*     Scale B if max element outside range [SMLNUM,BIGNUM]
+*
+      BNRM = CLANGE( 'M', N, N, B, LDB, RWORK )
+      ILBSCL = .FALSE.
+      IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN
+         BNRMTO = SMLNUM
+         ILBSCL = .TRUE.
+      ELSE IF( BNRM.GT.BIGNUM ) THEN
+         BNRMTO = BIGNUM
+         ILBSCL = .TRUE.
+      END IF
+      IF( ILBSCL )
+     $   CALL CLASCL( 'G', 0, 0, BNRM, BNRMTO, N, N, B, LDB, IERR )
+*
+*     Permute the matrix to make it more nearly triangular
+*     (Real Workspace: need 6*N)
+*
+      ILEFT = 1
+      IRIGHT = N + 1
+      IRWRK = IRIGHT + N
+      CALL CGGBAL( 'P', N, A, LDA, B, LDB, ILO, IHI, RWORK( ILEFT ),
+     $             RWORK( IRIGHT ), RWORK( IRWRK ), IERR )
+*
+*     Reduce B to triangular form (QR decomposition of B)
+*     (Complex Workspace: need N, prefer N*NB)
+*
+      IROWS = IHI + 1 - ILO
+      ICOLS = N + 1 - ILO
+      ITAU = 1
+      IWRK = ITAU + IROWS
+      CALL CGEQRF( IROWS, ICOLS, B( ILO, ILO ), LDB, WORK( ITAU ),
+     $             WORK( IWRK ), LWORK+1-IWRK, IERR )
+*
+*     Apply the unitary transformation to matrix A
+*     (Complex Workspace: need N, prefer N*NB)
+*
+      CALL CUNMQR( 'L', 'C', IROWS, ICOLS, IROWS, B( ILO, ILO ), LDB,
+     $             WORK( ITAU ), A( ILO, ILO ), LDA, WORK( IWRK ),
+     $             LWORK+1-IWRK, IERR )
+*
+*     Initialize VSL
+*     (Complex Workspace: need N, prefer N*NB)
+*
+      IF( ILVSL ) THEN
+         CALL CLASET( 'Full', N, N, CZERO, CONE, VSL, LDVSL )
+         CALL CLACPY( 'L', IROWS-1, IROWS-1, B( ILO+1, ILO ), LDB,
+     $                VSL( ILO+1, ILO ), LDVSL )
+         CALL CUNGQR( IROWS, IROWS, IROWS, VSL( ILO, ILO ), LDVSL,
+     $                WORK( ITAU ), WORK( IWRK ), LWORK+1-IWRK, IERR )
+      END IF
+*
+*     Initialize VSR
+*
+      IF( ILVSR )
+     $   CALL CLASET( 'Full', N, N, CZERO, CONE, VSR, LDVSR )
+*
+*     Reduce to generalized Hessenberg form
+*     (Workspace: none needed)
+*
+      CALL CGGHRD( JOBVSL, JOBVSR, N, ILO, IHI, A, LDA, B, LDB, VSL,
+     $             LDVSL, VSR, LDVSR, IERR )
+*
+      SDIM = 0
+*
+*     Perform QZ algorithm, computing Schur vectors if desired
+*     (Complex Workspace: need N)
+*     (Real Workspace:    need N)
+*
+      IWRK = ITAU
+      CALL CHGEQZ( 'S', JOBVSL, JOBVSR, N, ILO, IHI, A, LDA, B, LDB,
+     $             ALPHA, BETA, VSL, LDVSL, VSR, LDVSR, WORK( IWRK ),
+     $             LWORK+1-IWRK, RWORK( IRWRK ), IERR )
+      IF( IERR.NE.0 ) THEN
+         IF( IERR.GT.0 .AND. IERR.LE.N ) THEN
+            INFO = IERR
+         ELSE IF( IERR.GT.N .AND. IERR.LE.2*N ) THEN
+            INFO = IERR - N
+         ELSE
+            INFO = N + 1
+         END IF
+         GO TO 40
+      END IF
+*
+*     Sort eigenvalues ALPHA/BETA and compute the reciprocal of
+*     condition number(s)
+*
+      IF( WANTST ) THEN
+*
+*        Undo scaling on eigenvalues before SELCTGing
+*
+         IF( ILASCL )
+     $      CALL CLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHA, N, IERR )
+         IF( ILBSCL )
+     $      CALL CLASCL( 'G', 0, 0, BNRMTO, BNRM, N, 1, BETA, N, IERR )
+*
+*        Select eigenvalues
+*
+         DO 10 I = 1, N
+            BWORK( I ) = SELCTG( ALPHA( I ), BETA( I ) )
+   10    CONTINUE
+*
+*        Reorder eigenvalues, transform Generalized Schur vectors, and
+*        compute reciprocal condition numbers
+*        (Complex Workspace: If IJOB >= 1, need MAX(1, 2*SDIM*(N-SDIM))
+*                            otherwise, need 1 )
+*
+         CALL CTGSEN( IJOB, ILVSL, ILVSR, BWORK, N, A, LDA, B, LDB,
+     $                ALPHA, BETA, VSL, LDVSL, VSR, LDVSR, SDIM, PL, PR,
+     $                DIF, WORK( IWRK ), LWORK-IWRK+1, IWORK, LIWORK,
+     $                IERR )
+*
+         IF( IJOB.GE.1 )
+     $      MAXWRK = MAX( MAXWRK, 2*SDIM*( N-SDIM ) )
+         IF( IERR.EQ.-21 ) THEN
+*
+*            not enough complex workspace
+*
+            INFO = -21
+         ELSE
+            RCONDE( 1 ) = PL
+            RCONDE( 2 ) = PL
+            RCONDV( 1 ) = DIF( 1 )
+            RCONDV( 2 ) = DIF( 2 )
+            IF( IERR.EQ.1 )
+     $         INFO = N + 3
+         END IF
+*
+      END IF
+*
+*     Apply permutation to VSL and VSR
+*     (Workspace: none needed)
+*
+      IF( ILVSL )
+     $   CALL CGGBAK( 'P', 'L', N, ILO, IHI, RWORK( ILEFT ),
+     $                RWORK( IRIGHT ), N, VSL, LDVSL, IERR )
+*
+      IF( ILVSR )
+     $   CALL CGGBAK( 'P', 'R', N, ILO, IHI, RWORK( ILEFT ),
+     $                RWORK( IRIGHT ), N, VSR, LDVSR, IERR )
+*
+*     Undo scaling
+*
+      IF( ILASCL ) THEN
+         CALL CLASCL( 'U', 0, 0, ANRMTO, ANRM, N, N, A, LDA, IERR )
+         CALL CLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHA, N, IERR )
+      END IF
+*
+      IF( ILBSCL ) THEN
+         CALL CLASCL( 'U', 0, 0, BNRMTO, BNRM, N, N, B, LDB, IERR )
+         CALL CLASCL( 'G', 0, 0, BNRMTO, BNRM, N, 1, BETA, N, IERR )
+      END IF
+*
+   20 CONTINUE
+*
+      IF( WANTST ) THEN
+*
+*        Check if reordering is correct
+*
+         LASTSL = .TRUE.
+         SDIM = 0
+         DO 30 I = 1, N
+            CURSL = SELCTG( ALPHA( I ), BETA( I ) )
+            IF( CURSL )
+     $         SDIM = SDIM + 1
+            IF( CURSL .AND. .NOT.LASTSL )
+     $         INFO = N + 2
+            LASTSL = CURSL
+   30    CONTINUE
+*
+      END IF
+*
+   40 CONTINUE
+*
+      WORK( 1 ) = MAXWRK
+      IWORK( 1 ) = LIWMIN
+*
+      RETURN
+*
+*     End of CGGESX
+*
+      END
diff --git a/costa/native/external/lapack/cggev.f b/costa/native/external/lapack/cggev.f
new file mode 100644
index 000000000..74315981a
--- /dev/null
+++ b/costa/native/external/lapack/cggev.f
@@ -0,0 +1,449 @@
+      SUBROUTINE CGGEV( JOBVL, JOBVR, N, A, LDA, B, LDB, ALPHA, BETA,
+     $                  VL, LDVL, VR, LDVR, WORK, LWORK, RWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBVL, JOBVR
+      INTEGER            INFO, LDA, LDB, LDVL, LDVR, LWORK, N
+*     ..
+*     .. Array Arguments ..
+      REAL               RWORK( * )
+      COMPLEX            A( LDA, * ), ALPHA( * ), B( LDB, * ),
+     $                   BETA( * ), VL( LDVL, * ), VR( LDVR, * ),
+     $                   WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CGGEV computes for a pair of N-by-N complex nonsymmetric matrices
+*  (A,B), the generalized eigenvalues, and optionally, the left and/or
+*  right generalized eigenvectors.
+*
+*  A generalized eigenvalue for a pair of matrices (A,B) is a scalar
+*  lambda or a ratio alpha/beta = lambda, such that A - lambda*B is
+*  singular. It is usually represented as the pair (alpha,beta), as
+*  there is a reasonable interpretation for beta=0, and even for both
+*  being zero.
+*
+*  The right generalized eigenvector v(j) corresponding to the
+*  generalized eigenvalue lambda(j) of (A,B) satisfies
+*
+*               A * v(j) = lambda(j) * B * v(j).
+*
+*  The left generalized eigenvector u(j) corresponding to the
+*  generalized eigenvalues lambda(j) of (A,B) satisfies
+*
+*               u(j)**H * A = lambda(j) * u(j)**H * B
+*
+*  where u(j)**H is the conjugate-transpose of u(j).
+*
+*  Arguments
+*  =========
+*
+*  JOBVL   (input) CHARACTER*1
+*          = 'N':  do not compute the left generalized eigenvectors;
+*          = 'V':  compute the left generalized eigenvectors.
+*
+*  JOBVR   (input) CHARACTER*1
+*          = 'N':  do not compute the right generalized eigenvectors;
+*          = 'V':  compute the right generalized eigenvectors.
+*
+*  N       (input) INTEGER
+*          The order of the matrices A, B, VL, and VR.  N >= 0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA, N)
+*          On entry, the matrix A in the pair (A,B).
+*          On exit, A has been overwritten.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of A.  LDA >= max(1,N).
+*
+*  B       (input/output) COMPLEX array, dimension (LDB, N)
+*          On entry, the matrix B in the pair (A,B).
+*          On exit, B has been overwritten.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of B.  LDB >= max(1,N).
+*
+*  ALPHA   (output) COMPLEX array, dimension (N)
+*  BETA    (output) COMPLEX array, dimension (N)
+*          On exit, ALPHA(j)/BETA(j), j=1,...,N, will be the
+*          generalized eigenvalues.
+*
+*          Note: the quotients ALPHA(j)/BETA(j) may easily over- or
+*          underflow, and BETA(j) may even be zero.  Thus, the user
+*          should avoid naively computing the ratio alpha/beta.
+*          However, ALPHA will be always less than and usually
+*          comparable with norm(A) in magnitude, and BETA always less
+*          than and usually comparable with norm(B).
+*
+*  VL      (output) COMPLEX array, dimension (LDVL,N)
+*          If JOBVL = 'V', the left generalized eigenvectors u(j) are
+*          stored one after another in the columns of VL, in the same
+*          order as their eigenvalues.
+*          Each eigenvector will be scaled so the largest component
+*          will have abs(real part) + abs(imag. part) = 1.
+*          Not referenced if JOBVL = 'N'.
+*
+*  LDVL    (input) INTEGER
+*          The leading dimension of the matrix VL. LDVL >= 1, and
+*          if JOBVL = 'V', LDVL >= N.
+*
+*  VR      (output) COMPLEX array, dimension (LDVR,N)
+*          If JOBVR = 'V', the right generalized eigenvectors v(j) are
+*          stored one after another in the columns of VR, in the same
+*          order as their eigenvalues.
+*          Each eigenvector will be scaled so the largest component
+*          will have abs(real part) + abs(imag. part) = 1.
+*          Not referenced if JOBVR = 'N'.
+*
+*  LDVR    (input) INTEGER
+*          The leading dimension of the matrix VR. LDVR >= 1, and
+*          if JOBVR = 'V', LDVR >= N.
+*
+*  WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.  LWORK >= max(1,2*N).
+*          For good performance, LWORK must generally be larger.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  RWORK   (workspace/output) REAL array, dimension (8*N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          =1,...,N:
+*                The QZ iteration failed.  No eigenvectors have been
+*                calculated, but ALPHA(j) and BETA(j) should be
+*                correct for j=INFO+1,...,N.
+*          > N:  =N+1: other then QZ iteration failed in SHGEQZ,
+*                =N+2: error return from STGEVC.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E0, ONE = 1.0E0 )
+      COMPLEX            CZERO, CONE
+      PARAMETER          ( CZERO = ( 0.0E0, 0.0E0 ),
+     $                   CONE = ( 1.0E0, 0.0E0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            ILASCL, ILBSCL, ILV, ILVL, ILVR, LQUERY
+      CHARACTER          CHTEMP
+      INTEGER            ICOLS, IERR, IHI, IJOBVL, IJOBVR, ILEFT, ILO,
+     $                   IN, IRIGHT, IROWS, IRWRK, ITAU, IWRK, JC, JR,
+     $                   LWKMIN, LWKOPT
+      REAL               ANRM, ANRMTO, BIGNUM, BNRM, BNRMTO, EPS,
+     $                   SMLNUM, TEMP
+      COMPLEX            X
+*     ..
+*     .. Local Arrays ..
+      LOGICAL            LDUMMA( 1 )
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CGEQRF, CGGBAK, CGGBAL, CGGHRD, CHGEQZ, CLACPY,
+     $                   CLASCL, CLASET, CTGEVC, CUNGQR, CUNMQR, SLABAD,
+     $                   XERBLA
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      REAL               CLANGE, SLAMCH
+      EXTERNAL           LSAME, ILAENV, CLANGE, SLAMCH
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, AIMAG, MAX, REAL, SQRT
+*     ..
+*     .. Statement Functions ..
+      REAL               ABS1
+*     ..
+*     .. Statement Function definitions ..
+      ABS1( X ) = ABS( REAL( X ) ) + ABS( AIMAG( X ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode the input arguments
+*
+      IF( LSAME( JOBVL, 'N' ) ) THEN
+         IJOBVL = 1
+         ILVL = .FALSE.
+      ELSE IF( LSAME( JOBVL, 'V' ) ) THEN
+         IJOBVL = 2
+         ILVL = .TRUE.
+      ELSE
+         IJOBVL = -1
+         ILVL = .FALSE.
+      END IF
+*
+      IF( LSAME( JOBVR, 'N' ) ) THEN
+         IJOBVR = 1
+         ILVR = .FALSE.
+      ELSE IF( LSAME( JOBVR, 'V' ) ) THEN
+         IJOBVR = 2
+         ILVR = .TRUE.
+      ELSE
+         IJOBVR = -1
+         ILVR = .FALSE.
+      END IF
+      ILV = ILVL .OR. ILVR
+*
+*     Test the input arguments
+*
+      INFO = 0
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( IJOBVL.LE.0 ) THEN
+         INFO = -1
+      ELSE IF( IJOBVR.LE.0 ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      ELSE IF( LDVL.LT.1 .OR. ( ILVL .AND. LDVL.LT.N ) ) THEN
+         INFO = -11
+      ELSE IF( LDVR.LT.1 .OR. ( ILVR .AND. LDVR.LT.N ) ) THEN
+         INFO = -13
+      END IF
+*
+*     Compute workspace
+*      (Note: Comments in the code beginning "Workspace:" describe the
+*       minimal amount of workspace needed at that point in the code,
+*       as well as the preferred amount for good performance.
+*       NB refers to the optimal block size for the immediately
+*       following subroutine, as returned by ILAENV. The workspace is
+*       computed assuming ILO = 1 and IHI = N, the worst case.)
+*
+      LWKMIN = 1
+      IF( INFO.EQ.0 .AND. ( LWORK.GE.1 .OR. LQUERY ) ) THEN
+         LWKOPT = N + N*ILAENV( 1, 'CGEQRF', ' ', N, 1, N, 0 )
+         LWKMIN = MAX( 1, 2*N )
+         WORK( 1 ) = LWKOPT
+      END IF
+*
+      IF( LWORK.LT.LWKMIN .AND. .NOT.LQUERY )
+     $   INFO = -15
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CGGEV ', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      WORK( 1 ) = LWKOPT
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Get machine constants
+*
+      EPS = SLAMCH( 'E' )*SLAMCH( 'B' )
+      SMLNUM = SLAMCH( 'S' )
+      BIGNUM = ONE / SMLNUM
+      CALL SLABAD( SMLNUM, BIGNUM )
+      SMLNUM = SQRT( SMLNUM ) / EPS
+      BIGNUM = ONE / SMLNUM
+*
+*     Scale A if max element outside range [SMLNUM,BIGNUM]
+*
+      ANRM = CLANGE( 'M', N, N, A, LDA, RWORK )
+      ILASCL = .FALSE.
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
+         ANRMTO = SMLNUM
+         ILASCL = .TRUE.
+      ELSE IF( ANRM.GT.BIGNUM ) THEN
+         ANRMTO = BIGNUM
+         ILASCL = .TRUE.
+      END IF
+      IF( ILASCL )
+     $   CALL CLASCL( 'G', 0, 0, ANRM, ANRMTO, N, N, A, LDA, IERR )
+*
+*     Scale B if max element outside range [SMLNUM,BIGNUM]
+*
+      BNRM = CLANGE( 'M', N, N, B, LDB, RWORK )
+      ILBSCL = .FALSE.
+      IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN
+         BNRMTO = SMLNUM
+         ILBSCL = .TRUE.
+      ELSE IF( BNRM.GT.BIGNUM ) THEN
+         BNRMTO = BIGNUM
+         ILBSCL = .TRUE.
+      END IF
+      IF( ILBSCL )
+     $   CALL CLASCL( 'G', 0, 0, BNRM, BNRMTO, N, N, B, LDB, IERR )
+*
+*     Permute the matrices A, B to isolate eigenvalues if possible
+*     (Real Workspace: need 6*N)
+*
+      ILEFT = 1
+      IRIGHT = N + 1
+      IRWRK = IRIGHT + N
+      CALL CGGBAL( 'P', N, A, LDA, B, LDB, ILO, IHI, RWORK( ILEFT ),
+     $             RWORK( IRIGHT ), RWORK( IRWRK ), IERR )
+*
+*     Reduce B to triangular form (QR decomposition of B)
+*     (Complex Workspace: need N, prefer N*NB)
+*
+      IROWS = IHI + 1 - ILO
+      IF( ILV ) THEN
+         ICOLS = N + 1 - ILO
+      ELSE
+         ICOLS = IROWS
+      END IF
+      ITAU = 1
+      IWRK = ITAU + IROWS
+      CALL CGEQRF( IROWS, ICOLS, B( ILO, ILO ), LDB, WORK( ITAU ),
+     $             WORK( IWRK ), LWORK+1-IWRK, IERR )
+*
+*     Apply the orthogonal transformation to matrix A
+*     (Complex Workspace: need N, prefer N*NB)
+*
+      CALL CUNMQR( 'L', 'C', IROWS, ICOLS, IROWS, B( ILO, ILO ), LDB,
+     $             WORK( ITAU ), A( ILO, ILO ), LDA, WORK( IWRK ),
+     $             LWORK+1-IWRK, IERR )
+*
+*     Initialize VL
+*     (Complex Workspace: need N, prefer N*NB)
+*
+      IF( ILVL ) THEN
+         CALL CLASET( 'Full', N, N, CZERO, CONE, VL, LDVL )
+         CALL CLACPY( 'L', IROWS-1, IROWS-1, B( ILO+1, ILO ), LDB,
+     $                VL( ILO+1, ILO ), LDVL )
+         CALL CUNGQR( IROWS, IROWS, IROWS, VL( ILO, ILO ), LDVL,
+     $                WORK( ITAU ), WORK( IWRK ), LWORK+1-IWRK, IERR )
+      END IF
+*
+*     Initialize VR
+*
+      IF( ILVR )
+     $   CALL CLASET( 'Full', N, N, CZERO, CONE, VR, LDVR )
+*
+*     Reduce to generalized Hessenberg form
+*
+      IF( ILV ) THEN
+*
+*        Eigenvectors requested -- work on whole matrix.
+*
+         CALL CGGHRD( JOBVL, JOBVR, N, ILO, IHI, A, LDA, B, LDB, VL,
+     $                LDVL, VR, LDVR, IERR )
+      ELSE
+         CALL CGGHRD( 'N', 'N', IROWS, 1, IROWS, A( ILO, ILO ), LDA,
+     $                B( ILO, ILO ), LDB, VL, LDVL, VR, LDVR, IERR )
+      END IF
+*
+*     Perform QZ algorithm (Compute eigenvalues, and optionally, the
+*     Schur form and Schur vectors)
+*     (Complex Workspace: need N)
+*     (Real Workspace: need N)
+*
+      IWRK = ITAU
+      IF( ILV ) THEN
+         CHTEMP = 'S'
+      ELSE
+         CHTEMP = 'E'
+      END IF
+      CALL CHGEQZ( CHTEMP, JOBVL, JOBVR, N, ILO, IHI, A, LDA, B, LDB,
+     $             ALPHA, BETA, VL, LDVL, VR, LDVR, WORK( IWRK ),
+     $             LWORK+1-IWRK, RWORK( IRWRK ), IERR )
+      IF( IERR.NE.0 ) THEN
+         IF( IERR.GT.0 .AND. IERR.LE.N ) THEN
+            INFO = IERR
+         ELSE IF( IERR.GT.N .AND. IERR.LE.2*N ) THEN
+            INFO = IERR - N
+         ELSE
+            INFO = N + 1
+         END IF
+         GO TO 70
+      END IF
+*
+*     Compute Eigenvectors
+*     (Real Workspace: need 2*N)
+*     (Complex Workspace: need 2*N)
+*
+      IF( ILV ) THEN
+         IF( ILVL ) THEN
+            IF( ILVR ) THEN
+               CHTEMP = 'B'
+            ELSE
+               CHTEMP = 'L'
+            END IF
+         ELSE
+            CHTEMP = 'R'
+         END IF
+*
+         CALL CTGEVC( CHTEMP, 'B', LDUMMA, N, A, LDA, B, LDB, VL, LDVL,
+     $                VR, LDVR, N, IN, WORK( IWRK ), RWORK( IRWRK ),
+     $                IERR )
+         IF( IERR.NE.0 ) THEN
+            INFO = N + 2
+            GO TO 70
+         END IF
+*
+*        Undo balancing on VL and VR and normalization
+*        (Workspace: none needed)
+*
+         IF( ILVL ) THEN
+            CALL CGGBAK( 'P', 'L', N, ILO, IHI, RWORK( ILEFT ),
+     $                   RWORK( IRIGHT ), N, VL, LDVL, IERR )
+            DO 30 JC = 1, N
+               TEMP = ZERO
+               DO 10 JR = 1, N
+                  TEMP = MAX( TEMP, ABS1( VL( JR, JC ) ) )
+   10          CONTINUE
+               IF( TEMP.LT.SMLNUM )
+     $            GO TO 30
+               TEMP = ONE / TEMP
+               DO 20 JR = 1, N
+                  VL( JR, JC ) = VL( JR, JC )*TEMP
+   20          CONTINUE
+   30       CONTINUE
+         END IF
+         IF( ILVR ) THEN
+            CALL CGGBAK( 'P', 'R', N, ILO, IHI, RWORK( ILEFT ),
+     $                   RWORK( IRIGHT ), N, VR, LDVR, IERR )
+            DO 60 JC = 1, N
+               TEMP = ZERO
+               DO 40 JR = 1, N
+                  TEMP = MAX( TEMP, ABS1( VR( JR, JC ) ) )
+   40          CONTINUE
+               IF( TEMP.LT.SMLNUM )
+     $            GO TO 60
+               TEMP = ONE / TEMP
+               DO 50 JR = 1, N
+                  VR( JR, JC ) = VR( JR, JC )*TEMP
+   50          CONTINUE
+   60       CONTINUE
+         END IF
+      END IF
+*
+*     Undo scaling if necessary
+*
+      IF( ILASCL )
+     $   CALL CLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHA, N, IERR )
+*
+      IF( ILBSCL )
+     $   CALL CLASCL( 'G', 0, 0, BNRMTO, BNRM, N, 1, BETA, N, IERR )
+*
+   70 CONTINUE
+      WORK( 1 ) = LWKOPT
+*
+      RETURN
+*
+*     End of CGGEV
+*
+      END
diff --git a/costa/native/external/lapack/cggevx.f b/costa/native/external/lapack/cggevx.f
new file mode 100644
index 000000000..d9fdc2b0f
--- /dev/null
+++ b/costa/native/external/lapack/cggevx.f
@@ -0,0 +1,640 @@
+      SUBROUTINE CGGEVX( BALANC, JOBVL, JOBVR, SENSE, N, A, LDA, B, LDB,
+     $                   ALPHA, BETA, VL, LDVL, VR, LDVR, ILO, IHI,
+     $                   LSCALE, RSCALE, ABNRM, BBNRM, RCONDE, RCONDV,
+     $                   WORK, LWORK, RWORK, IWORK, BWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          BALANC, JOBVL, JOBVR, SENSE
+      INTEGER            IHI, ILO, INFO, LDA, LDB, LDVL, LDVR, LWORK, N
+      REAL               ABNRM, BBNRM
+*     ..
+*     .. Array Arguments ..
+      LOGICAL            BWORK( * )
+      INTEGER            IWORK( * )
+      REAL               LSCALE( * ), RCONDE( * ), RCONDV( * ),
+     $                   RSCALE( * ), RWORK( * )
+      COMPLEX            A( LDA, * ), ALPHA( * ), B( LDB, * ),
+     $                   BETA( * ), VL( LDVL, * ), VR( LDVR, * ),
+     $                   WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CGGEVX computes for a pair of N-by-N complex nonsymmetric matrices
+*  (A,B) the generalized eigenvalues, and optionally, the left and/or
+*  right generalized eigenvectors.
+*
+*  Optionally, it also computes a balancing transformation to improve
+*  the conditioning of the eigenvalues and eigenvectors (ILO, IHI,
+*  LSCALE, RSCALE, ABNRM, and BBNRM), reciprocal condition numbers for
+*  the eigenvalues (RCONDE), and reciprocal condition numbers for the
+*  right eigenvectors (RCONDV).
+*
+*  A generalized eigenvalue for a pair of matrices (A,B) is a scalar
+*  lambda or a ratio alpha/beta = lambda, such that A - lambda*B is
+*  singular. It is usually represented as the pair (alpha,beta), as
+*  there is a reasonable interpretation for beta=0, and even for both
+*  being zero.
+*
+*  The right eigenvector v(j) corresponding to the eigenvalue lambda(j)
+*  of (A,B) satisfies
+*                   A * v(j) = lambda(j) * B * v(j) .
+*  The left eigenvector u(j) corresponding to the eigenvalue lambda(j)
+*  of (A,B) satisfies
+*                   u(j)**H * A  = lambda(j) * u(j)**H * B.
+*  where u(j)**H is the conjugate-transpose of u(j).
+*
+*
+*  Arguments
+*  =========
+*
+*  BALANC  (input) CHARACTER*1
+*          Specifies the balance option to be performed:
+*          = 'N':  do not diagonally scale or permute;
+*          = 'P':  permute only;
+*          = 'S':  scale only;
+*          = 'B':  both permute and scale.
+*          Computed reciprocal condition numbers will be for the
+*          matrices after permuting and/or balancing. Permuting does
+*          not change condition numbers (in exact arithmetic), but
+*          balancing does.
+*
+*  JOBVL   (input) CHARACTER*1
+*          = 'N':  do not compute the left generalized eigenvectors;
+*          = 'V':  compute the left generalized eigenvectors.
+*
+*  JOBVR   (input) CHARACTER*1
+*          = 'N':  do not compute the right generalized eigenvectors;
+*          = 'V':  compute the right generalized eigenvectors.
+*
+*  SENSE   (input) CHARACTER*1
+*          Determines which reciprocal condition numbers are computed.
+*          = 'N': none are computed;
+*          = 'E': computed for eigenvalues only;
+*          = 'V': computed for eigenvectors only;
+*          = 'B': computed for eigenvalues and eigenvectors.
+*
+*  N       (input) INTEGER
+*          The order of the matrices A, B, VL, and VR.  N >= 0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA, N)
+*          On entry, the matrix A in the pair (A,B).
+*          On exit, A has been overwritten. If JOBVL='V' or JOBVR='V'
+*          or both, then A contains the first part of the complex Schur
+*          form of the "balanced" versions of the input A and B.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of A.  LDA >= max(1,N).
+*
+*  B       (input/output) COMPLEX array, dimension (LDB, N)
+*          On entry, the matrix B in the pair (A,B).
+*          On exit, B has been overwritten. If JOBVL='V' or JOBVR='V'
+*          or both, then B contains the second part of the complex
+*          Schur form of the "balanced" versions of the input A and B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of B.  LDB >= max(1,N).
+*
+*  ALPHA   (output) COMPLEX array, dimension (N)
+*  BETA    (output) COMPLEX array, dimension (N)
+*          On exit, ALPHA(j)/BETA(j), j=1,...,N, will be the generalized
+*          eigenvalues.
+*
+*          Note: the quotient ALPHA(j)/BETA(j) ) may easily over- or
+*          underflow, and BETA(j) may even be zero.  Thus, the user
+*          should avoid naively computing the ratio ALPHA/BETA.
+*          However, ALPHA will be always less than and usually
+*          comparable with norm(A) in magnitude, and BETA always less
+*          than and usually comparable with norm(B).
+*
+*  VL      (output) COMPLEX array, dimension (LDVL,N)
+*          If JOBVL = 'V', the left generalized eigenvectors u(j) are
+*          stored one after another in the columns of VL, in the same
+*          order as their eigenvalues.
+*          Each eigenvector will be scaled so the largest component
+*          will have abs(real part) + abs(imag. part) = 1.
+*          Not referenced if JOBVL = 'N'.
+*
+*  LDVL    (input) INTEGER
+*          The leading dimension of the matrix VL. LDVL >= 1, and
+*          if JOBVL = 'V', LDVL >= N.
+*
+*  VR      (output) COMPLEX array, dimension (LDVR,N)
+*          If JOBVR = 'V', the right generalized eigenvectors v(j) are
+*          stored one after another in the columns of VR, in the same
+*          order as their eigenvalues.
+*          Each eigenvector will be scaled so the largest component
+*          will have abs(real part) + abs(imag. part) = 1.
+*          Not referenced if JOBVR = 'N'.
+*
+*  LDVR    (input) INTEGER
+*          The leading dimension of the matrix VR. LDVR >= 1, and
+*          if JOBVR = 'V', LDVR >= N.
+*
+*  ILO,IHI (output) INTEGER
+*          ILO and IHI are integer values such that on exit
+*          A(i,j) = 0 and B(i,j) = 0 if i > j and
+*          j = 1,...,ILO-1 or i = IHI+1,...,N.
+*          If BALANC = 'N' or 'S', ILO = 1 and IHI = N.
+*
+*  LSCALE  (output) REAL array, dimension (N)
+*          Details of the permutations and scaling factors applied
+*          to the left side of A and B.  If PL(j) is the index of the
+*          row interchanged with row j, and DL(j) is the scaling
+*          factor applied to row j, then
+*            LSCALE(j) = PL(j)  for j = 1,...,ILO-1
+*                      = DL(j)  for j = ILO,...,IHI
+*                      = PL(j)  for j = IHI+1,...,N.
+*          The order in which the interchanges are made is N to IHI+1,
+*          then 1 to ILO-1.
+*
+*  RSCALE  (output) REAL array, dimension (N)
+*          Details of the permutations and scaling factors applied
+*          to the right side of A and B.  If PR(j) is the index of the
+*          column interchanged with column j, and DR(j) is the scaling
+*          factor applied to column j, then
+*            RSCALE(j) = PR(j)  for j = 1,...,ILO-1
+*                      = DR(j)  for j = ILO,...,IHI
+*                      = PR(j)  for j = IHI+1,...,N
+*          The order in which the interchanges are made is N to IHI+1,
+*          then 1 to ILO-1.
+*
+*  ABNRM   (output) REAL
+*          The one-norm of the balanced matrix A.
+*
+*  BBNRM   (output) REAL
+*          The one-norm of the balanced matrix B.
+*
+*  RCONDE  (output) REAL array, dimension (N)
+*          If SENSE = 'E' or 'B', the reciprocal condition numbers of
+*          the selected eigenvalues, stored in consecutive elements of
+*          the array.
+*          If SENSE = 'V', RCONDE is not referenced.
+*
+*  RCONDV  (output) REAL array, dimension (N)
+*          If JOB = 'V' or 'B', the estimated reciprocal condition
+*          numbers of the selected eigenvectors, stored in consecutive
+*          elements of the array. If the eigenvalues cannot be reordered
+*          to compute RCONDV(j), RCONDV(j) is set to 0; this can only
+*          occur when the true value would be very small anyway.
+*          If SENSE = 'E', RCONDV is not referenced.
+*          Not referenced if JOB = 'E'.
+*
+*  WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK. LWORK >= max(1,2*N).
+*          If SENSE = 'N' or 'E', LWORK >= 2*N.
+*          If SENSE = 'V' or 'B', LWORK >= 2*N*N+2*N.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  RWORK   (workspace) REAL array, dimension (6*N)
+*          Real workspace.
+*
+*  IWORK   (workspace) INTEGER array, dimension (N+2)
+*          If SENSE = 'E', IWORK is not referenced.
+*
+*  BWORK   (workspace) LOGICAL array, dimension (N)
+*          If SENSE = 'N', BWORK is not referenced.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          = 1,...,N:
+*                The QZ iteration failed.  No eigenvectors have been
+*                calculated, but ALPHA(j) and BETA(j) should be correct
+*                for j=INFO+1,...,N.
+*          > N:  =N+1: other than QZ iteration failed in CHGEQZ.
+*                =N+2: error return from CTGEVC.
+*
+*  Further Details
+*  ===============
+*
+*  Balancing a matrix pair (A,B) includes, first, permuting rows and
+*  columns to isolate eigenvalues, second, applying diagonal similarity
+*  transformation to the rows and columns to make the rows and columns
+*  as close in norm as possible. The computed reciprocal condition
+*  numbers correspond to the balanced matrix. Permuting rows and columns
+*  will not change the condition numbers (in exact arithmetic) but
+*  diagonal scaling will.  For further explanation of balancing, see
+*  section 4.11.1.2 of LAPACK Users' Guide.
+*
+*  An approximate error bound on the chordal distance between the i-th
+*  computed generalized eigenvalue w and the corresponding exact
+*  eigenvalue lambda is
+*
+*       chord(w, lambda) <= EPS * norm(ABNRM, BBNRM) / RCONDE(I)
+*
+*  An approximate error bound for the angle between the i-th computed
+*  eigenvector VL(i) or VR(i) is given by
+*
+*       EPS * norm(ABNRM, BBNRM) / DIF(i).
+*
+*  For further explanation of the reciprocal condition numbers RCONDE
+*  and RCONDV, see section 4.11 of LAPACK User's Guide.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+      COMPLEX            CZERO, CONE
+      PARAMETER          ( CZERO = ( 0.0E+0, 0.0E+0 ),
+     $                   CONE = ( 1.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            ILASCL, ILBSCL, ILV, ILVL, ILVR, LQUERY,
+     $                   WANTSB, WANTSE, WANTSN, WANTSV
+      CHARACTER          CHTEMP
+      INTEGER            I, ICOLS, IERR, IJOBVL, IJOBVR, IN, IROWS,
+     $                   ITAU, IWRK, IWRK1, J, JC, JR, M, MAXWRK, MINWRK
+      REAL               ANRM, ANRMTO, BIGNUM, BNRM, BNRMTO, EPS,
+     $                   SMLNUM, TEMP
+      COMPLEX            X
+*     ..
+*     .. Local Arrays ..
+      LOGICAL            LDUMMA( 1 )
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CGEQRF, CGGBAK, CGGBAL, CGGHRD, CHGEQZ, CLACPY,
+     $                   CLASCL, CLASET, CTGEVC, CTGSNA, CUNGQR, CUNMQR,
+     $                   SLABAD, SLASCL, XERBLA
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      REAL               CLANGE, SLAMCH
+      EXTERNAL           LSAME, ILAENV, CLANGE, SLAMCH
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, AIMAG, MAX, REAL, SQRT
+*     ..
+*     .. Statement Functions ..
+      REAL               ABS1
+*     ..
+*     .. Statement Function definitions ..
+      ABS1( X ) = ABS( REAL( X ) ) + ABS( AIMAG( X ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode the input arguments
+*
+      IF( LSAME( JOBVL, 'N' ) ) THEN
+         IJOBVL = 1
+         ILVL = .FALSE.
+      ELSE IF( LSAME( JOBVL, 'V' ) ) THEN
+         IJOBVL = 2
+         ILVL = .TRUE.
+      ELSE
+         IJOBVL = -1
+         ILVL = .FALSE.
+      END IF
+*
+      IF( LSAME( JOBVR, 'N' ) ) THEN
+         IJOBVR = 1
+         ILVR = .FALSE.
+      ELSE IF( LSAME( JOBVR, 'V' ) ) THEN
+         IJOBVR = 2
+         ILVR = .TRUE.
+      ELSE
+         IJOBVR = -1
+         ILVR = .FALSE.
+      END IF
+      ILV = ILVL .OR. ILVR
+*
+      WANTSN = LSAME( SENSE, 'N' )
+      WANTSE = LSAME( SENSE, 'E' )
+      WANTSV = LSAME( SENSE, 'V' )
+      WANTSB = LSAME( SENSE, 'B' )
+*
+*     Test the input arguments
+*
+      INFO = 0
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( .NOT.( LSAME( BALANC, 'N' ) .OR. LSAME( BALANC,
+     $    'S' ) .OR. LSAME( BALANC, 'P' ) .OR. LSAME( BALANC, 'B' ) ) )
+     $     THEN
+         INFO = -1
+      ELSE IF( IJOBVL.LE.0 ) THEN
+         INFO = -2
+      ELSE IF( IJOBVR.LE.0 ) THEN
+         INFO = -3
+      ELSE IF( .NOT.( WANTSN .OR. WANTSE .OR. WANTSB .OR. WANTSV ) )
+     $          THEN
+         INFO = -4
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -9
+      ELSE IF( LDVL.LT.1 .OR. ( ILVL .AND. LDVL.LT.N ) ) THEN
+         INFO = -13
+      ELSE IF( LDVR.LT.1 .OR. ( ILVR .AND. LDVR.LT.N ) ) THEN
+         INFO = -15
+      END IF
+*
+*     Compute workspace
+*      (Note: Comments in the code beginning "Workspace:" describe the
+*       minimal amount of workspace needed at that point in the code,
+*       as well as the preferred amount for good performance.
+*       NB refers to the optimal block size for the immediately
+*       following subroutine, as returned by ILAENV. The workspace is
+*       computed assuming ILO = 1 and IHI = N, the worst case.)
+*
+      MINWRK = 1
+      IF( INFO.EQ.0 .AND. ( LWORK.GE.1 .OR. LQUERY ) ) THEN
+         MAXWRK = N + N*ILAENV( 1, 'CGEQRF', ' ', N, 1, N, 0 )
+         IF( WANTSE ) THEN
+            MINWRK = MAX( 1, 2*N )
+         ELSE IF( WANTSV .OR. WANTSB ) THEN
+            MINWRK = 2*N*N + 2*N
+            MAXWRK = MAX( MAXWRK, 2*N*N+2*N )
+         END IF
+         WORK( 1 ) = MAXWRK
+      END IF
+*
+      IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN
+         INFO = -25
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CGGEVX', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Get machine constants
+*
+      EPS = SLAMCH( 'P' )
+      SMLNUM = SLAMCH( 'S' )
+      BIGNUM = ONE / SMLNUM
+      CALL SLABAD( SMLNUM, BIGNUM )
+      SMLNUM = SQRT( SMLNUM ) / EPS
+      BIGNUM = ONE / SMLNUM
+*
+*     Scale A if max element outside range [SMLNUM,BIGNUM]
+*
+      ANRM = CLANGE( 'M', N, N, A, LDA, RWORK )
+      ILASCL = .FALSE.
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
+         ANRMTO = SMLNUM
+         ILASCL = .TRUE.
+      ELSE IF( ANRM.GT.BIGNUM ) THEN
+         ANRMTO = BIGNUM
+         ILASCL = .TRUE.
+      END IF
+      IF( ILASCL )
+     $   CALL CLASCL( 'G', 0, 0, ANRM, ANRMTO, N, N, A, LDA, IERR )
+*
+*     Scale B if max element outside range [SMLNUM,BIGNUM]
+*
+      BNRM = CLANGE( 'M', N, N, B, LDB, RWORK )
+      ILBSCL = .FALSE.
+      IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN
+         BNRMTO = SMLNUM
+         ILBSCL = .TRUE.
+      ELSE IF( BNRM.GT.BIGNUM ) THEN
+         BNRMTO = BIGNUM
+         ILBSCL = .TRUE.
+      END IF
+      IF( ILBSCL )
+     $   CALL CLASCL( 'G', 0, 0, BNRM, BNRMTO, N, N, B, LDB, IERR )
+*
+*     Permute and/or balance the matrix pair (A,B)
+*     (Real Workspace: need 6*N)
+*
+      CALL CGGBAL( BALANC, N, A, LDA, B, LDB, ILO, IHI, LSCALE, RSCALE,
+     $             RWORK, IERR )
+*
+*     Compute ABNRM and BBNRM
+*
+      ABNRM = CLANGE( '1', N, N, A, LDA, RWORK( 1 ) )
+      IF( ILASCL ) THEN
+         RWORK( 1 ) = ABNRM
+         CALL SLASCL( 'G', 0, 0, ANRMTO, ANRM, 1, 1, RWORK( 1 ), 1,
+     $                IERR )
+         ABNRM = RWORK( 1 )
+      END IF
+*
+      BBNRM = CLANGE( '1', N, N, B, LDB, RWORK( 1 ) )
+      IF( ILBSCL ) THEN
+         RWORK( 1 ) = BBNRM
+         CALL SLASCL( 'G', 0, 0, BNRMTO, BNRM, 1, 1, RWORK( 1 ), 1,
+     $                IERR )
+         BBNRM = RWORK( 1 )
+      END IF
+*
+*     Reduce B to triangular form (QR decomposition of B)
+*     (Complex Workspace: need N, prefer N*NB )
+*
+      IROWS = IHI + 1 - ILO
+      IF( ILV .OR. .NOT.WANTSN ) THEN
+         ICOLS = N + 1 - ILO
+      ELSE
+         ICOLS = IROWS
+      END IF
+      ITAU = 1
+      IWRK = ITAU + IROWS
+      CALL CGEQRF( IROWS, ICOLS, B( ILO, ILO ), LDB, WORK( ITAU ),
+     $             WORK( IWRK ), LWORK+1-IWRK, IERR )
+*
+*     Apply the unitary transformation to A
+*     (Complex Workspace: need N, prefer N*NB)
+*
+      CALL CUNMQR( 'L', 'C', IROWS, ICOLS, IROWS, B( ILO, ILO ), LDB,
+     $             WORK( ITAU ), A( ILO, ILO ), LDA, WORK( IWRK ),
+     $             LWORK+1-IWRK, IERR )
+*
+*     Initialize VL and/or VR
+*     (Workspace: need N, prefer N*NB)
+*
+      IF( ILVL ) THEN
+         CALL CLASET( 'Full', N, N, CZERO, CONE, VL, LDVL )
+         CALL CLACPY( 'L', IROWS-1, IROWS-1, B( ILO+1, ILO ), LDB,
+     $                VL( ILO+1, ILO ), LDVL )
+         CALL CUNGQR( IROWS, IROWS, IROWS, VL( ILO, ILO ), LDVL,
+     $                WORK( ITAU ), WORK( IWRK ), LWORK+1-IWRK, IERR )
+      END IF
+*
+      IF( ILVR )
+     $   CALL CLASET( 'Full', N, N, CZERO, CONE, VR, LDVR )
+*
+*     Reduce to generalized Hessenberg form
+*     (Workspace: none needed)
+*
+      IF( ILV .OR. .NOT.WANTSN ) THEN
+*
+*        Eigenvectors requested -- work on whole matrix.
+*
+         CALL CGGHRD( JOBVL, JOBVR, N, ILO, IHI, A, LDA, B, LDB, VL,
+     $                LDVL, VR, LDVR, IERR )
+      ELSE
+         CALL CGGHRD( 'N', 'N', IROWS, 1, IROWS, A( ILO, ILO ), LDA,
+     $                B( ILO, ILO ), LDB, VL, LDVL, VR, LDVR, IERR )
+      END IF
+*
+*     Perform QZ algorithm (Compute eigenvalues, and optionally, the
+*     Schur forms and Schur vectors)
+*     (Complex Workspace: need N)
+*     (Real Workspace: need N)
+*
+      IWRK = ITAU
+      IF( ILV .OR. .NOT.WANTSN ) THEN
+         CHTEMP = 'S'
+      ELSE
+         CHTEMP = 'E'
+      END IF
+*
+      CALL CHGEQZ( CHTEMP, JOBVL, JOBVR, N, ILO, IHI, A, LDA, B, LDB,
+     $             ALPHA, BETA, VL, LDVL, VR, LDVR, WORK( IWRK ),
+     $             LWORK+1-IWRK, RWORK, IERR )
+      IF( IERR.NE.0 ) THEN
+         IF( IERR.GT.0 .AND. IERR.LE.N ) THEN
+            INFO = IERR
+         ELSE IF( IERR.GT.N .AND. IERR.LE.2*N ) THEN
+            INFO = IERR - N
+         ELSE
+            INFO = N + 1
+         END IF
+         GO TO 90
+      END IF
+*
+*     Compute Eigenvectors and estimate condition numbers if desired
+*     CTGEVC: (Complex Workspace: need 2*N )
+*             (Real Workspace:    need 2*N )
+*     CTGSNA: (Complex Workspace: need 2*N*N if SENSE='V' or 'B')
+*             (Integer Workspace: need N+2 )
+*
+      IF( ILV .OR. .NOT.WANTSN ) THEN
+         IF( ILV ) THEN
+            IF( ILVL ) THEN
+               IF( ILVR ) THEN
+                  CHTEMP = 'B'
+               ELSE
+                  CHTEMP = 'L'
+               END IF
+            ELSE
+               CHTEMP = 'R'
+            END IF
+*
+            CALL CTGEVC( CHTEMP, 'B', LDUMMA, N, A, LDA, B, LDB, VL,
+     $                   LDVL, VR, LDVR, N, IN, WORK( IWRK ), RWORK,
+     $                   IERR )
+            IF( IERR.NE.0 ) THEN
+               INFO = N + 2
+               GO TO 90
+            END IF
+         END IF
+*
+         IF( .NOT.WANTSN ) THEN
+*
+*           compute eigenvectors (STGEVC) and estimate condition
+*           numbers (STGSNA). Note that the definition of the condition
+*           number is not invariant under transformation (u,v) to
+*           (Q*u, Z*v), where (u,v) are eigenvectors of the generalized
+*           Schur form (S,T), Q and Z are orthogonal matrices. In order
+*           to avoid using extra 2*N*N workspace, we have to
+*           re-calculate eigenvectors and estimate the condition numbers
+*           one at a time.
+*
+            DO 20 I = 1, N
+*
+               DO 10 J = 1, N
+                  BWORK( J ) = .FALSE.
+   10          CONTINUE
+               BWORK( I ) = .TRUE.
+*
+               IWRK = N + 1
+               IWRK1 = IWRK + N
+*
+               IF( WANTSE .OR. WANTSB ) THEN
+                  CALL CTGEVC( 'B', 'S', BWORK, N, A, LDA, B, LDB,
+     $                         WORK( 1 ), N, WORK( IWRK ), N, 1, M,
+     $                         WORK( IWRK1 ), RWORK, IERR )
+                  IF( IERR.NE.0 ) THEN
+                     INFO = N + 2
+                     GO TO 90
+                  END IF
+               END IF
+*
+               CALL CTGSNA( SENSE, 'S', BWORK, N, A, LDA, B, LDB,
+     $                      WORK( 1 ), N, WORK( IWRK ), N, RCONDE( I ),
+     $                      RCONDV( I ), 1, M, WORK( IWRK1 ),
+     $                      LWORK-IWRK1+1, IWORK, IERR )
+*
+   20       CONTINUE
+         END IF
+      END IF
+*
+*     Undo balancing on VL and VR and normalization
+*     (Workspace: none needed)
+*
+      IF( ILVL ) THEN
+         CALL CGGBAK( BALANC, 'L', N, ILO, IHI, LSCALE, RSCALE, N, VL,
+     $                LDVL, IERR )
+*
+         DO 50 JC = 1, N
+            TEMP = ZERO
+            DO 30 JR = 1, N
+               TEMP = MAX( TEMP, ABS1( VL( JR, JC ) ) )
+   30       CONTINUE
+            IF( TEMP.LT.SMLNUM )
+     $         GO TO 50
+            TEMP = ONE / TEMP
+            DO 40 JR = 1, N
+               VL( JR, JC ) = VL( JR, JC )*TEMP
+   40       CONTINUE
+   50    CONTINUE
+      END IF
+*
+      IF( ILVR ) THEN
+         CALL CGGBAK( BALANC, 'R', N, ILO, IHI, LSCALE, RSCALE, N, VR,
+     $                LDVR, IERR )
+         DO 80 JC = 1, N
+            TEMP = ZERO
+            DO 60 JR = 1, N
+               TEMP = MAX( TEMP, ABS1( VR( JR, JC ) ) )
+   60       CONTINUE
+            IF( TEMP.LT.SMLNUM )
+     $         GO TO 80
+            TEMP = ONE / TEMP
+            DO 70 JR = 1, N
+               VR( JR, JC ) = VR( JR, JC )*TEMP
+   70       CONTINUE
+   80    CONTINUE
+      END IF
+*
+*     Undo scaling if necessary
+*
+      IF( ILASCL )
+     $   CALL CLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHA, N, IERR )
+*
+      IF( ILBSCL )
+     $   CALL CLASCL( 'G', 0, 0, BNRMTO, BNRM, N, 1, BETA, N, IERR )
+*
+   90 CONTINUE
+      WORK( 1 ) = MAXWRK
+*
+      RETURN
+*
+*     End of CGGEVX
+*
+      END
diff --git a/costa/native/external/lapack/cggglm.f b/costa/native/external/lapack/cggglm.f
new file mode 100644
index 000000000..e81b8f932
--- /dev/null
+++ b/costa/native/external/lapack/cggglm.f
@@ -0,0 +1,213 @@
+      SUBROUTINE CGGGLM( N, M, P, A, LDA, B, LDB, D, X, Y, WORK, LWORK,
+     $                   INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, LDB, LWORK, M, N, P
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), B( LDB, * ), D( * ), WORK( * ),
+     $                   X( * ), Y( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CGGGLM solves a general Gauss-Markov linear model (GLM) problem:
+*
+*          minimize || y ||_2   subject to   d = A*x + B*y
+*              x
+*
+*  where A is an N-by-M matrix, B is an N-by-P matrix, and d is a
+*  given N-vector. It is assumed that M <= N <= M+P, and
+*
+*             rank(A) = M    and    rank( A B ) = N.
+*
+*  Under these assumptions, the constrained equation is always
+*  consistent, and there is a unique solution x and a minimal 2-norm
+*  solution y, which is obtained using a generalized QR factorization
+*  of A and B.
+*
+*  In particular, if matrix B is square nonsingular, then the problem
+*  GLM is equivalent to the following weighted linear least squares
+*  problem
+*
+*               minimize || inv(B)*(d-A*x) ||_2
+*                   x
+*
+*  where inv(B) denotes the inverse of B.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The number of rows of the matrices A and B.  N >= 0.
+*
+*  M       (input) INTEGER
+*          The number of columns of the matrix A.  0 <= M <= N.
+*
+*  P       (input) INTEGER
+*          The number of columns of the matrix B.  P >= N-M.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,M)
+*          On entry, the N-by-M matrix A.
+*          On exit, A is destroyed.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,N).
+*
+*  B       (input/output) COMPLEX array, dimension (LDB,P)
+*          On entry, the N-by-P matrix B.
+*          On exit, B is destroyed.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B. LDB >= max(1,N).
+*
+*  D       (input/output) COMPLEX array, dimension (N)
+*          On entry, D is the left hand side of the GLM equation.
+*          On exit, D is destroyed.
+*
+*  X       (output) COMPLEX array, dimension (M)
+*  Y       (output) COMPLEX array, dimension (P)
+*          On exit, X and Y are the solutions of the GLM problem.
+*
+*  WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK. LWORK >= max(1,N+M+P).
+*          For optimum performance, LWORK >= M+min(N,P)+max(N,P)*NB,
+*          where NB is an upper bound for the optimal blocksizes for
+*          CGEQRF, CGERQF, CUNMQR and CUNMRQ.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*
+*  ===================================================================
+*
+*     .. Parameters ..
+      COMPLEX            CZERO, CONE
+      PARAMETER          ( CZERO = ( 0.0E+0, 0.0E+0 ),
+     $                   CONE = ( 1.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            I, LOPT, LWKOPT, NB, NB1, NB2, NB3, NB4, NP
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CCOPY, CGEMV, CGGQRF, CTRSV, CUNMQR, CUNMRQ,
+     $                   XERBLA
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV 
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          INT, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters
+*
+      INFO = 0
+      NP = MIN( N, P )
+      NB1 = ILAENV( 1, 'CGEQRF', ' ', N, M, -1, -1 )
+      NB2 = ILAENV( 1, 'CGERQF', ' ', N, M, -1, -1 )
+      NB3 = ILAENV( 1, 'CUNMQR', ' ', N, M, P, -1 )
+      NB4 = ILAENV( 1, 'CUNMRQ', ' ', N, M, P, -1 )
+      NB = MAX( NB1, NB2, NB3, NB4 )
+      LWKOPT = M + NP + MAX( N, P )*NB
+      WORK( 1 ) = LWKOPT
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( N.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( M.LT.0 .OR. M.GT.N ) THEN
+         INFO = -2
+      ELSE IF( P.LT.0 .OR. P.LT.N-M ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      ELSE IF( LWORK.LT.MAX( 1, N+M+P ) .AND. .NOT.LQUERY ) THEN
+         INFO = -12
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CGGGLM', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Compute the GQR factorization of matrices A and B:
+*
+*            Q'*A = ( R11 ) M,    Q'*B*Z' = ( T11   T12 ) M
+*                   (  0  ) N-M             (  0    T22 ) N-M
+*                      M                     M+P-N  N-M
+*
+*     where R11 and T22 are upper triangular, and Q and Z are
+*     unitary.
+*
+      CALL CGGQRF( N, M, P, A, LDA, WORK, B, LDB, WORK( M+1 ),
+     $             WORK( M+NP+1 ), LWORK-M-NP, INFO )
+      LOPT = WORK( M+NP+1 )
+*
+*     Update left-hand-side vector d = Q'*d = ( d1 ) M
+*                                             ( d2 ) N-M
+*
+      CALL CUNMQR( 'Left', 'Conjugate transpose', N, 1, M, A, LDA, WORK,
+     $             D, MAX( 1, N ), WORK( M+NP+1 ), LWORK-M-NP, INFO )
+      LOPT = MAX( LOPT, INT( WORK( M+NP+1 ) ) )
+*
+*     Solve T22*y2 = d2 for y2
+*
+      CALL CTRSV( 'Upper', 'No transpose', 'Non unit', N-M,
+     $            B( M+1, M+P-N+1 ), LDB, D( M+1 ), 1 )
+      CALL CCOPY( N-M, D( M+1 ), 1, Y( M+P-N+1 ), 1 )
+*
+*     Set y1 = 0
+*
+      DO 10 I = 1, M + P - N
+         Y( I ) = CZERO
+   10 CONTINUE
+*
+*     Update d1 = d1 - T12*y2
+*
+      CALL CGEMV( 'No transpose', M, N-M, -CONE, B( 1, M+P-N+1 ), LDB,
+     $            Y( M+P-N+1 ), 1, CONE, D, 1 )
+*
+*     Solve triangular system: R11*x = d1
+*
+      CALL CTRSV( 'Upper', 'No Transpose', 'Non unit', M, A, LDA, D, 1 )
+*
+*     Copy D to X
+*
+      CALL CCOPY( M, D, 1, X, 1 )
+*
+*     Backward transformation y = Z'*y
+*
+      CALL CUNMRQ( 'Left', 'Conjugate transpose', P, 1, NP,
+     $             B( MAX( 1, N-P+1 ), 1 ), LDB, WORK( M+1 ), Y,
+     $             MAX( 1, P ), WORK( M+NP+1 ), LWORK-M-NP, INFO )
+      WORK( 1 ) = M + NP + MAX( LOPT, INT( WORK( M+NP+1 ) ) )
+*
+      RETURN
+*
+*     End of CGGGLM
+*
+      END
diff --git a/costa/native/external/lapack/cgghrd.f b/costa/native/external/lapack/cgghrd.f
new file mode 100644
index 000000000..c296c145d
--- /dev/null
+++ b/costa/native/external/lapack/cgghrd.f
@@ -0,0 +1,256 @@
+      SUBROUTINE CGGHRD( COMPQ, COMPZ, N, ILO, IHI, A, LDA, B, LDB, Q,
+     $                   LDQ, Z, LDZ, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          COMPQ, COMPZ
+      INTEGER            IHI, ILO, INFO, LDA, LDB, LDQ, LDZ, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), B( LDB, * ), Q( LDQ, * ),
+     $                   Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CGGHRD reduces a pair of complex matrices (A,B) to generalized upper
+*  Hessenberg form using unitary transformations, where A is a
+*  general matrix and B is upper triangular:  Q' * A * Z = H and
+*  Q' * B * Z = T, where H is upper Hessenberg, T is upper triangular,
+*  and Q and Z are unitary, and ' means conjugate transpose.
+*
+*  The unitary matrices Q and Z are determined as products of Givens
+*  rotations.  They may either be formed explicitly, or they may be
+*  postmultiplied into input matrices Q1 and Z1, so that
+*
+*       Q1 * A * Z1' = (Q1*Q) * H * (Z1*Z)'
+*       Q1 * B * Z1' = (Q1*Q) * T * (Z1*Z)'
+*
+*  Arguments
+*  =========
+*
+*  COMPQ   (input) CHARACTER*1
+*          = 'N': do not compute Q;
+*          = 'I': Q is initialized to the unit matrix, and the
+*                 unitary matrix Q is returned;
+*          = 'V': Q must contain a unitary matrix Q1 on entry,
+*                 and the product Q1*Q is returned.
+*
+*  COMPZ   (input) CHARACTER*1
+*          = 'N': do not compute Q;
+*          = 'I': Q is initialized to the unit matrix, and the
+*                 unitary matrix Q is returned;
+*          = 'V': Q must contain a unitary matrix Q1 on entry,
+*                 and the product Q1*Q is returned.
+*
+*  N       (input) INTEGER
+*          The order of the matrices A and B.  N >= 0.
+*
+*  ILO     (input) INTEGER
+*  IHI     (input) INTEGER
+*          It is assumed that A is already upper triangular in rows and
+*          columns 1:ILO-1 and IHI+1:N.  ILO and IHI are normally set
+*          by a previous call to CGGBAL; otherwise they should be set
+*          to 1 and N respectively.
+*          1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA, N)
+*          On entry, the N-by-N general matrix to be reduced.
+*          On exit, the upper triangle and the first subdiagonal of A
+*          are overwritten with the upper Hessenberg matrix H, and the
+*          rest is set to zero.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  B       (input/output) COMPLEX array, dimension (LDB, N)
+*          On entry, the N-by-N upper triangular matrix B.
+*          On exit, the upper triangular matrix T = Q' B Z.  The
+*          elements below the diagonal are set to zero.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  Q       (input/output) COMPLEX array, dimension (LDQ, N)
+*          If COMPQ='N':  Q is not referenced.
+*          If COMPQ='I':  on entry, Q need not be set, and on exit it
+*                         contains the unitary matrix Q, where Q'
+*                         is the product of the Givens transformations
+*                         which are applied to A and B on the left.
+*          If COMPQ='V':  on entry, Q must contain a unitary matrix
+*                         Q1, and on exit this is overwritten by Q1*Q.
+*
+*  LDQ     (input) INTEGER
+*          The leading dimension of the array Q.
+*          LDQ >= N if COMPQ='V' or 'I'; LDQ >= 1 otherwise.
+*
+*  Z       (input/output) COMPLEX array, dimension (LDZ, N)
+*          If COMPZ='N':  Z is not referenced.
+*          If COMPZ='I':  on entry, Z need not be set, and on exit it
+*                         contains the unitary matrix Z, which is
+*                         the product of the Givens transformations
+*                         which are applied to A and B on the right.
+*          If COMPZ='V':  on entry, Z must contain a unitary matrix
+*                         Z1, and on exit this is overwritten by Z1*Z.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.
+*          LDZ >= N if COMPZ='V' or 'I'; LDZ >= 1 otherwise.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*
+*  Further Details
+*  ===============
+*
+*  This routine reduces A to Hessenberg and B to triangular form by
+*  an unblocked reduction, as described in _Matrix_Computations_,
+*  by Golub and van Loan (Johns Hopkins Press).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX            CONE, CZERO
+      PARAMETER          ( CONE = ( 1.0E+0, 0.0E+0 ),
+     $                   CZERO = ( 0.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            ILQ, ILZ
+      INTEGER            ICOMPQ, ICOMPZ, JCOL, JROW
+      REAL               C
+      COMPLEX            CTEMP, S
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CLARTG, CLASET, CROT, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          CONJG, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode COMPQ
+*
+      IF( LSAME( COMPQ, 'N' ) ) THEN
+         ILQ = .FALSE.
+         ICOMPQ = 1
+      ELSE IF( LSAME( COMPQ, 'V' ) ) THEN
+         ILQ = .TRUE.
+         ICOMPQ = 2
+      ELSE IF( LSAME( COMPQ, 'I' ) ) THEN
+         ILQ = .TRUE.
+         ICOMPQ = 3
+      ELSE
+         ICOMPQ = 0
+      END IF
+*
+*     Decode COMPZ
+*
+      IF( LSAME( COMPZ, 'N' ) ) THEN
+         ILZ = .FALSE.
+         ICOMPZ = 1
+      ELSE IF( LSAME( COMPZ, 'V' ) ) THEN
+         ILZ = .TRUE.
+         ICOMPZ = 2
+      ELSE IF( LSAME( COMPZ, 'I' ) ) THEN
+         ILZ = .TRUE.
+         ICOMPZ = 3
+      ELSE
+         ICOMPZ = 0
+      END IF
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( ICOMPQ.LE.0 ) THEN
+         INFO = -1
+      ELSE IF( ICOMPZ.LE.0 ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( ILO.LT.1 ) THEN
+         INFO = -4
+      ELSE IF( IHI.GT.N .OR. IHI.LT.ILO-1 ) THEN
+         INFO = -5
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -9
+      ELSE IF( ( ILQ .AND. LDQ.LT.N ) .OR. LDQ.LT.1 ) THEN
+         INFO = -11
+      ELSE IF( ( ILZ .AND. LDZ.LT.N ) .OR. LDZ.LT.1 ) THEN
+         INFO = -13
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CGGHRD', -INFO )
+         RETURN
+      END IF
+*
+*     Initialize Q and Z if desired.
+*
+      IF( ICOMPQ.EQ.3 )
+     $   CALL CLASET( 'Full', N, N, CZERO, CONE, Q, LDQ )
+      IF( ICOMPZ.EQ.3 )
+     $   CALL CLASET( 'Full', N, N, CZERO, CONE, Z, LDZ )
+*
+*     Quick return if possible
+*
+      IF( N.LE.1 )
+     $   RETURN
+*
+*     Zero out lower triangle of B
+*
+      DO 20 JCOL = 1, N - 1
+         DO 10 JROW = JCOL + 1, N
+            B( JROW, JCOL ) = CZERO
+   10    CONTINUE
+   20 CONTINUE
+*
+*     Reduce A and B
+*
+      DO 40 JCOL = ILO, IHI - 2
+*
+         DO 30 JROW = IHI, JCOL + 2, -1
+*
+*           Step 1: rotate rows JROW-1, JROW to kill A(JROW,JCOL)
+*
+            CTEMP = A( JROW-1, JCOL )
+            CALL CLARTG( CTEMP, A( JROW, JCOL ), C, S,
+     $                   A( JROW-1, JCOL ) )
+            A( JROW, JCOL ) = CZERO
+            CALL CROT( N-JCOL, A( JROW-1, JCOL+1 ), LDA,
+     $                 A( JROW, JCOL+1 ), LDA, C, S )
+            CALL CROT( N+2-JROW, B( JROW-1, JROW-1 ), LDB,
+     $                 B( JROW, JROW-1 ), LDB, C, S )
+            IF( ILQ )
+     $         CALL CROT( N, Q( 1, JROW-1 ), 1, Q( 1, JROW ), 1, C,
+     $                    CONJG( S ) )
+*
+*           Step 2: rotate columns JROW, JROW-1 to kill B(JROW,JROW-1)
+*
+            CTEMP = B( JROW, JROW )
+            CALL CLARTG( CTEMP, B( JROW, JROW-1 ), C, S,
+     $                   B( JROW, JROW ) )
+            B( JROW, JROW-1 ) = CZERO
+            CALL CROT( IHI, A( 1, JROW ), 1, A( 1, JROW-1 ), 1, C, S )
+            CALL CROT( JROW-1, B( 1, JROW ), 1, B( 1, JROW-1 ), 1, C,
+     $                 S )
+            IF( ILZ )
+     $         CALL CROT( N, Z( 1, JROW ), 1, Z( 1, JROW-1 ), 1, C, S )
+   30    CONTINUE
+   40 CONTINUE
+*
+      RETURN
+*
+*     End of CGGHRD
+*
+      END
diff --git a/costa/native/external/lapack/cgglse.f b/costa/native/external/lapack/cgglse.f
new file mode 100644
index 000000000..da55eae03
--- /dev/null
+++ b/costa/native/external/lapack/cgglse.f
@@ -0,0 +1,218 @@
+      SUBROUTINE CGGLSE( M, N, P, A, LDA, B, LDB, C, D, X, WORK, LWORK,
+     $                   INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, LDB, LWORK, M, N, P
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), B( LDB, * ), C( * ), D( * ),
+     $                   WORK( * ), X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CGGLSE solves the linear equality-constrained least squares (LSE)
+*  problem:
+*
+*          minimize || c - A*x ||_2   subject to   B*x = d
+*
+*  where A is an M-by-N matrix, B is a P-by-N matrix, c is a given
+*  M-vector, and d is a given P-vector. It is assumed that
+*  P <= N <= M+P, and
+*
+*           rank(B) = P and  rank( ( A ) ) = N.
+*                                ( ( B ) )
+*
+*  These conditions ensure that the LSE problem has a unique solution,
+*  which is obtained using a GRQ factorization of the matrices B and A.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrices A and B. N >= 0.
+*
+*  P       (input) INTEGER
+*          The number of rows of the matrix B. 0 <= P <= N <= M+P.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,N)
+*          On entry, the M-by-N matrix A.
+*          On exit, A is destroyed.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,M).
+*
+*  B       (input/output) COMPLEX array, dimension (LDB,N)
+*          On entry, the P-by-N matrix B.
+*          On exit, B is destroyed.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B. LDB >= max(1,P).
+*
+*  C       (input/output) COMPLEX array, dimension (M)
+*          On entry, C contains the right hand side vector for the
+*          least squares part of the LSE problem.
+*          On exit, the residual sum of squares for the solution
+*          is given by the sum of squares of elements N-P+1 to M of
+*          vector C.
+*
+*  D       (input/output) COMPLEX array, dimension (P)
+*          On entry, D contains the right hand side vector for the
+*          constrained equation.
+*          On exit, D is destroyed.
+*
+*  X       (output) COMPLEX array, dimension (N)
+*          On exit, X is the solution of the LSE problem.
+*
+*  WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK. LWORK >= max(1,M+N+P).
+*          For optimum performance LWORK >= P+min(M,N)+max(M,N)*NB,
+*          where NB is an upper bound for the optimal blocksizes for
+*          CGEQRF, CGERQF, CUNMQR and CUNMRQ.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX            CONE
+      PARAMETER          ( CONE = ( 1.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            LOPT, LWKOPT, MN, NB, NB1, NB2, NB3, NB4, NR
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CAXPY, CCOPY, CGEMV, CGGRQF, CTRMV, CTRSV,
+     $                   CUNMQR, CUNMRQ, XERBLA
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV 
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          INT, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters
+*
+      INFO = 0
+      MN = MIN( M, N )
+      NB1 = ILAENV( 1, 'CGEQRF', ' ', M, N, -1, -1 )
+      NB2 = ILAENV( 1, 'CGERQF', ' ', M, N, -1, -1 )
+      NB3 = ILAENV( 1, 'CUNMQR', ' ', M, N, P, -1 )
+      NB4 = ILAENV( 1, 'CUNMRQ', ' ', M, N, P, -1 )
+      NB = MAX( NB1, NB2, NB3, NB4 )
+      LWKOPT = P + MN + MAX( M, N )*NB
+      WORK( 1 ) = LWKOPT
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( P.LT.0 .OR. P.GT.N .OR. P.LT.N-M ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, P ) ) THEN
+         INFO = -7
+      ELSE IF( LWORK.LT.MAX( 1, M+N+P ) .AND. .NOT.LQUERY ) THEN
+         INFO = -12
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CGGLSE', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Compute the GRQ factorization of matrices B and A:
+*
+*            B*Q' = (  0  T12 ) P   Z'*A*Q' = ( R11 R12 ) N-P
+*                     N-P  P                  (  0  R22 ) M+P-N
+*                                               N-P  P
+*
+*     where T12 and R11 are upper triangular, and Q and Z are
+*     unitary.
+*
+      CALL CGGRQF( P, M, N, B, LDB, WORK, A, LDA, WORK( P+1 ),
+     $             WORK( P+MN+1 ), LWORK-P-MN, INFO )
+      LOPT = WORK( P+MN+1 )
+*
+*     Update c = Z'*c = ( c1 ) N-P
+*                       ( c2 ) M+P-N
+*
+      CALL CUNMQR( 'Left', 'Conjugate Transpose', M, 1, MN, A, LDA,
+     $             WORK( P+1 ), C, MAX( 1, M ), WORK( P+MN+1 ),
+     $             LWORK-P-MN, INFO )
+      LOPT = MAX( LOPT, INT( WORK( P+MN+1 ) ) )
+*
+*     Solve T12*x2 = d for x2
+*
+      CALL CTRSV( 'Upper', 'No transpose', 'Non unit', P, B( 1, N-P+1 ),
+     $            LDB, D, 1 )
+*
+*     Update c1
+*
+      CALL CGEMV( 'No transpose', N-P, P, -CONE, A( 1, N-P+1 ), LDA, D,
+     $            1, CONE, C, 1 )
+*
+*     Sovle R11*x1 = c1 for x1
+*
+      CALL CTRSV( 'Upper', 'No transpose', 'Non unit', N-P, A, LDA, C,
+     $            1 )
+*
+*     Put the solutions in X
+*
+      CALL CCOPY( N-P, C, 1, X, 1 )
+      CALL CCOPY( P, D, 1, X( N-P+1 ), 1 )
+*
+*     Compute the residual vector:
+*
+      IF( M.LT.N ) THEN
+         NR = M + P - N
+         CALL CGEMV( 'No transpose', NR, N-M, -CONE, A( N-P+1, M+1 ),
+     $               LDA, D( NR+1 ), 1, CONE, C( N-P+1 ), 1 )
+      ELSE
+         NR = P
+      END IF
+      CALL CTRMV( 'Upper', 'No transpose', 'Non unit', NR,
+     $            A( N-P+1, N-P+1 ), LDA, D, 1 )
+      CALL CAXPY( NR, -CONE, D, 1, C( N-P+1 ), 1 )
+*
+*     Backward transformation x = Q'*x
+*
+      CALL CUNMRQ( 'Left', 'Conjugate Transpose', N, 1, P, B, LDB,
+     $             WORK( 1 ), X, N, WORK( P+MN+1 ), LWORK-P-MN, INFO )
+      WORK( 1 ) = P + MN + MAX( LOPT, INT( WORK( P+MN+1 ) ) )
+*
+      RETURN
+*
+*     End of CGGLSE
+*
+      END
diff --git a/costa/native/external/lapack/cggqrf.f b/costa/native/external/lapack/cggqrf.f
new file mode 100644
index 000000000..ee63759f7
--- /dev/null
+++ b/costa/native/external/lapack/cggqrf.f
@@ -0,0 +1,212 @@
+      SUBROUTINE CGGQRF( N, M, P, A, LDA, TAUA, B, LDB, TAUB, WORK,
+     $                   LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, LDB, LWORK, M, N, P
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), B( LDB, * ), TAUA( * ), TAUB( * ),
+     $                   WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CGGQRF computes a generalized QR factorization of an N-by-M matrix A
+*  and an N-by-P matrix B:
+*
+*              A = Q*R,        B = Q*T*Z,
+*
+*  where Q is an N-by-N unitary matrix, Z is a P-by-P unitary matrix,
+*  and R and T assume one of the forms:
+*
+*  if N >= M,  R = ( R11 ) M  ,   or if N < M,  R = ( R11  R12 ) N,
+*                  (  0  ) N-M                         N   M-N
+*                     M
+*
+*  where R11 is upper triangular, and
+*
+*  if N <= P,  T = ( 0  T12 ) N,   or if N > P,  T = ( T11 ) N-P,
+*                   P-N  N                           ( T21 ) P
+*                                                       P
+*
+*  where T12 or T21 is upper triangular.
+*
+*  In particular, if B is square and nonsingular, the GQR factorization
+*  of A and B implicitly gives the QR factorization of inv(B)*A:
+*
+*               inv(B)*A = Z'*(inv(T)*R)
+*
+*  where inv(B) denotes the inverse of the matrix B, and Z' denotes the
+*  conjugate transpose of matrix Z.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The number of rows of the matrices A and B. N >= 0.
+*
+*  M       (input) INTEGER
+*          The number of columns of the matrix A.  M >= 0.
+*
+*  P       (input) INTEGER
+*          The number of columns of the matrix B.  P >= 0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,M)
+*          On entry, the N-by-M matrix A.
+*          On exit, the elements on and above the diagonal of the array
+*          contain the min(N,M)-by-M upper trapezoidal matrix R (R is
+*          upper triangular if N >= M); the elements below the diagonal,
+*          with the array TAUA, represent the unitary matrix Q as a
+*          product of min(N,M) elementary reflectors (see Further
+*          Details).
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,N).
+*
+*  TAUA    (output) COMPLEX array, dimension (min(N,M))
+*          The scalar factors of the elementary reflectors which
+*          represent the unitary matrix Q (see Further Details).
+*
+*  B       (input/output) COMPLEX array, dimension (LDB,P)
+*          On entry, the N-by-P matrix B.
+*          On exit, if N <= P, the upper triangle of the subarray
+*          B(1:N,P-N+1:P) contains the N-by-N upper triangular matrix T;
+*          if N > P, the elements on and above the (N-P)-th subdiagonal
+*          contain the N-by-P upper trapezoidal matrix T; the remaining
+*          elements, with the array TAUB, represent the unitary
+*          matrix Z as a product of elementary reflectors (see Further
+*          Details).
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B. LDB >= max(1,N).
+*
+*  TAUB    (output) COMPLEX array, dimension (min(N,P))
+*          The scalar factors of the elementary reflectors which
+*          represent the unitary matrix Z (see Further Details).
+*
+*  WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK. LWORK >= max(1,N,M,P).
+*          For optimum performance LWORK >= max(N,M,P)*max(NB1,NB2,NB3),
+*          where NB1 is the optimal blocksize for the QR factorization
+*          of an N-by-M matrix, NB2 is the optimal blocksize for the
+*          RQ factorization of an N-by-P matrix, and NB3 is the optimal
+*          blocksize for a call of CUNMQR.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*           = 0:  successful exit
+*           < 0:  if INFO = -i, the i-th argument had an illegal value.
+*
+*  Further Details
+*  ===============
+*
+*  The matrix Q is represented as a product of elementary reflectors
+*
+*     Q = H(1) H(2) . . . H(k), where k = min(n,m).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - taua * v * v'
+*
+*  where taua is a complex scalar, and v is a complex vector with
+*  v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i+1:n,i),
+*  and taua in TAUA(i).
+*  To form Q explicitly, use LAPACK subroutine CUNGQR.
+*  To use Q to update another matrix, use LAPACK subroutine CUNMQR.
+*
+*  The matrix Z is represented as a product of elementary reflectors
+*
+*     Z = H(1) H(2) . . . H(k), where k = min(n,p).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - taub * v * v'
+*
+*  where taub is a complex scalar, and v is a complex vector with
+*  v(p-k+i+1:p) = 0 and v(p-k+i) = 1; v(1:p-k+i-1) is stored on exit in
+*  B(n-k+i,1:p-k+i-1), and taub in TAUB(i).
+*  To form Z explicitly, use LAPACK subroutine CUNGRQ.
+*  To use Z to update another matrix, use LAPACK subroutine CUNMRQ.
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            LOPT, LWKOPT, NB, NB1, NB2, NB3
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CGEQRF, CGERQF, CUNMQR, XERBLA
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV 
+      EXTERNAL           ILAENV
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          INT, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters
+*
+      INFO = 0
+      NB1 = ILAENV( 1, 'CGEQRF', ' ', N, M, -1, -1 )
+      NB2 = ILAENV( 1, 'CGERQF', ' ', N, P, -1, -1 )
+      NB3 = ILAENV( 1, 'CUNMQR', ' ', N, M, P, -1 )
+      NB = MAX( NB1, NB2, NB3 )
+      LWKOPT = MAX( N, M, P)*NB
+      WORK( 1 ) = LWKOPT
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( N.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( P.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -8
+      ELSE IF( LWORK.LT.MAX( 1, N, M, P ) .AND. .NOT.LQUERY ) THEN
+         INFO = -11
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CGGQRF', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     QR factorization of N-by-M matrix A: A = Q*R
+*
+      CALL CGEQRF( N, M, A, LDA, TAUA, WORK, LWORK, INFO )
+      LOPT = WORK( 1 )
+*
+*     Update B := Q'*B.
+*
+      CALL CUNMQR( 'Left', 'Conjugate Transpose', N, P, MIN( N, M ), A,
+     $             LDA, TAUA, B, LDB, WORK, LWORK, INFO )
+      LOPT = MAX( LOPT, INT( WORK( 1 ) ) )
+*
+*     RQ factorization of N-by-P matrix B: B = T*Z.
+*
+      CALL CGERQF( N, P, B, LDB, TAUB, WORK, LWORK, INFO )
+      WORK( 1 ) = MAX( LOPT, INT( WORK( 1 ) ) )
+*
+      RETURN
+*
+*     End of CGGQRF
+*
+      END
diff --git a/costa/native/external/lapack/cggrqf.f b/costa/native/external/lapack/cggrqf.f
new file mode 100644
index 000000000..455c9a454
--- /dev/null
+++ b/costa/native/external/lapack/cggrqf.f
@@ -0,0 +1,212 @@
+      SUBROUTINE CGGRQF( M, P, N, A, LDA, TAUA, B, LDB, TAUB, WORK,
+     $                   LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, LDB, LWORK, M, N, P
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), B( LDB, * ), TAUA( * ), TAUB( * ),
+     $                   WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CGGRQF computes a generalized RQ factorization of an M-by-N matrix A
+*  and a P-by-N matrix B:
+*
+*              A = R*Q,        B = Z*T*Q,
+*
+*  where Q is an N-by-N unitary matrix, Z is a P-by-P unitary
+*  matrix, and R and T assume one of the forms:
+*
+*  if M <= N,  R = ( 0  R12 ) M,   or if M > N,  R = ( R11 ) M-N,
+*                   N-M  M                           ( R21 ) N
+*                                                       N
+*
+*  where R12 or R21 is upper triangular, and
+*
+*  if P >= N,  T = ( T11 ) N  ,   or if P < N,  T = ( T11  T12 ) P,
+*                  (  0  ) P-N                         P   N-P
+*                     N
+*
+*  where T11 is upper triangular.
+*
+*  In particular, if B is square and nonsingular, the GRQ factorization
+*  of A and B implicitly gives the RQ factorization of A*inv(B):
+*
+*               A*inv(B) = (R*inv(T))*Z'
+*
+*  where inv(B) denotes the inverse of the matrix B, and Z' denotes the
+*  conjugate transpose of the matrix Z.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  P       (input) INTEGER
+*          The number of rows of the matrix B.  P >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrices A and B. N >= 0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,N)
+*          On entry, the M-by-N matrix A.
+*          On exit, if M <= N, the upper triangle of the subarray
+*          A(1:M,N-M+1:N) contains the M-by-M upper triangular matrix R;
+*          if M > N, the elements on and above the (M-N)-th subdiagonal
+*          contain the M-by-N upper trapezoidal matrix R; the remaining
+*          elements, with the array TAUA, represent the unitary
+*          matrix Q as a product of elementary reflectors (see Further
+*          Details).
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,M).
+*
+*  TAUA    (output) COMPLEX array, dimension (min(M,N))
+*          The scalar factors of the elementary reflectors which
+*          represent the unitary matrix Q (see Further Details).
+*
+*  B       (input/output) COMPLEX array, dimension (LDB,N)
+*          On entry, the P-by-N matrix B.
+*          On exit, the elements on and above the diagonal of the array
+*          contain the min(P,N)-by-N upper trapezoidal matrix T (T is
+*          upper triangular if P >= N); the elements below the diagonal,
+*          with the array TAUB, represent the unitary matrix Z as a
+*          product of elementary reflectors (see Further Details).
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B. LDB >= max(1,P).
+*
+*  TAUB    (output) COMPLEX array, dimension (min(P,N))
+*          The scalar factors of the elementary reflectors which
+*          represent the unitary matrix Z (see Further Details).
+*
+*  WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK. LWORK >= max(1,N,M,P).
+*          For optimum performance LWORK >= max(N,M,P)*max(NB1,NB2,NB3),
+*          where NB1 is the optimal blocksize for the RQ factorization
+*          of an M-by-N matrix, NB2 is the optimal blocksize for the
+*          QR factorization of a P-by-N matrix, and NB3 is the optimal
+*          blocksize for a call of CUNMRQ.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO=-i, the i-th argument had an illegal value.
+*
+*  Further Details
+*  ===============
+*
+*  The matrix Q is represented as a product of elementary reflectors
+*
+*     Q = H(1) H(2) . . . H(k), where k = min(m,n).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - taua * v * v'
+*
+*  where taua is a complex scalar, and v is a complex vector with
+*  v(n-k+i+1:n) = 0 and v(n-k+i) = 1; v(1:n-k+i-1) is stored on exit in
+*  A(m-k+i,1:n-k+i-1), and taua in TAUA(i).
+*  To form Q explicitly, use LAPACK subroutine CUNGRQ.
+*  To use Q to update another matrix, use LAPACK subroutine CUNMRQ.
+*
+*  The matrix Z is represented as a product of elementary reflectors
+*
+*     Z = H(1) H(2) . . . H(k), where k = min(p,n).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - taub * v * v'
+*
+*  where taub is a complex scalar, and v is a complex vector with
+*  v(1:i-1) = 0 and v(i) = 1; v(i+1:p) is stored on exit in B(i+1:p,i),
+*  and taub in TAUB(i).
+*  To form Z explicitly, use LAPACK subroutine CUNGQR.
+*  To use Z to update another matrix, use LAPACK subroutine CUNMQR.
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            LOPT, LWKOPT, NB, NB1, NB2, NB3
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CGEQRF, CGERQF, CUNMRQ, XERBLA
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV 
+      EXTERNAL           ILAENV
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          INT, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters
+*
+      INFO = 0
+      NB1 = ILAENV( 1, 'CGERQF', ' ', M, N, -1, -1 )
+      NB2 = ILAENV( 1, 'CGEQRF', ' ', P, N, -1, -1 )
+      NB3 = ILAENV( 1, 'CUNMRQ', ' ', M, N, P, -1 )
+      NB = MAX( NB1, NB2, NB3 )
+      LWKOPT = MAX( N, M, P)*NB
+      WORK( 1 ) = LWKOPT
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( P.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, P ) ) THEN
+         INFO = -8
+      ELSE IF( LWORK.LT.MAX( 1, M, P, N ) .AND. .NOT.LQUERY ) THEN
+         INFO = -11
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CGGRQF', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     RQ factorization of M-by-N matrix A: A = R*Q
+*
+      CALL CGERQF( M, N, A, LDA, TAUA, WORK, LWORK, INFO )
+      LOPT = WORK( 1 )
+*
+*     Update B := B*Q'
+*
+      CALL CUNMRQ( 'Right', 'Conjugate Transpose', P, N, MIN( M, N ),
+     $             A( MAX( 1, M-N+1 ), 1 ), LDA, TAUA, B, LDB, WORK,
+     $             LWORK, INFO )
+      LOPT = MAX( LOPT, INT( WORK( 1 ) ) )
+*
+*     QR factorization of P-by-N matrix B: B = Z*T
+*
+      CALL CGEQRF( P, N, B, LDB, TAUB, WORK, LWORK, INFO )
+      WORK( 1 ) = MAX( LOPT, INT( WORK( 1 ) ) )
+*
+      RETURN
+*
+*     End of CGGRQF
+*
+      END
diff --git a/costa/native/external/lapack/cggsvd.f b/costa/native/external/lapack/cggsvd.f
new file mode 100644
index 000000000..7bee097c2
--- /dev/null
+++ b/costa/native/external/lapack/cggsvd.f
@@ -0,0 +1,334 @@
+      SUBROUTINE CGGSVD( JOBU, JOBV, JOBQ, M, N, P, K, L, A, LDA, B,
+     $                   LDB, ALPHA, BETA, U, LDU, V, LDV, Q, LDQ, WORK,
+     $                   RWORK, IWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBQ, JOBU, JOBV
+      INTEGER            INFO, K, L, LDA, LDB, LDQ, LDU, LDV, M, N, P
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      REAL               ALPHA( * ), BETA( * ), RWORK( * )
+      COMPLEX            A( LDA, * ), B( LDB, * ), Q( LDQ, * ),
+     $                   U( LDU, * ), V( LDV, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CGGSVD computes the generalized singular value decomposition (GSVD)
+*  of an M-by-N complex matrix A and P-by-N complex matrix B:
+*
+*        U'*A*Q = D1*( 0 R ),    V'*B*Q = D2*( 0 R )
+*
+*  where U, V and Q are unitary matrices, and Z' means the conjugate
+*  transpose of Z.  Let K+L = the effective numerical rank of the
+*  matrix (A',B')', then R is a (K+L)-by-(K+L) nonsingular upper
+*  triangular matrix, D1 and D2 are M-by-(K+L) and P-by-(K+L) "diagonal"
+*  matrices and of the following structures, respectively:
+*
+*  If M-K-L >= 0,
+*
+*                      K  L
+*         D1 =     K ( I  0 )
+*                  L ( 0  C )
+*              M-K-L ( 0  0 )
+*
+*                    K  L
+*         D2 =   L ( 0  S )
+*              P-L ( 0  0 )
+*
+*                  N-K-L  K    L
+*    ( 0 R ) = K (  0   R11  R12 )
+*              L (  0    0   R22 )
+*  where
+*
+*    C = diag( ALPHA(K+1), ... , ALPHA(K+L) ),
+*    S = diag( BETA(K+1),  ... , BETA(K+L) ),
+*    C**2 + S**2 = I.
+*
+*    R is stored in A(1:K+L,N-K-L+1:N) on exit.
+*
+*  If M-K-L < 0,
+*
+*                    K M-K K+L-M
+*         D1 =   K ( I  0    0   )
+*              M-K ( 0  C    0   )
+*
+*                      K M-K K+L-M
+*         D2 =   M-K ( 0  S    0  )
+*              K+L-M ( 0  0    I  )
+*                P-L ( 0  0    0  )
+*
+*                     N-K-L  K   M-K  K+L-M
+*    ( 0 R ) =     K ( 0    R11  R12  R13  )
+*                M-K ( 0     0   R22  R23  )
+*              K+L-M ( 0     0    0   R33  )
+*
+*  where
+*
+*    C = diag( ALPHA(K+1), ... , ALPHA(M) ),
+*    S = diag( BETA(K+1),  ... , BETA(M) ),
+*    C**2 + S**2 = I.
+*
+*    (R11 R12 R13 ) is stored in A(1:M, N-K-L+1:N), and R33 is stored
+*    ( 0  R22 R23 )
+*    in B(M-K+1:L,N+M-K-L+1:N) on exit.
+*
+*  The routine computes C, S, R, and optionally the unitary
+*  transformation matrices U, V and Q.
+*
+*  In particular, if B is an N-by-N nonsingular matrix, then the GSVD of
+*  A and B implicitly gives the SVD of A*inv(B):
+*                       A*inv(B) = U*(D1*inv(D2))*V'.
+*  If ( A',B')' has orthnormal columns, then the GSVD of A and B is also
+*  equal to the CS decomposition of A and B. Furthermore, the GSVD can
+*  be used to derive the solution of the eigenvalue problem:
+*                       A'*A x = lambda* B'*B x.
+*  In some literature, the GSVD of A and B is presented in the form
+*                   U'*A*X = ( 0 D1 ),   V'*B*X = ( 0 D2 )
+*  where U and V are orthogonal and X is nonsingular, and D1 and D2 are
+*  ``diagonal''.  The former GSVD form can be converted to the latter
+*  form by taking the nonsingular matrix X as
+*
+*                        X = Q*(  I   0    )
+*                              (  0 inv(R) )
+*
+*  Arguments
+*  =========
+*
+*  JOBU    (input) CHARACTER*1
+*          = 'U':  Unitary matrix U is computed;
+*          = 'N':  U is not computed.
+*
+*  JOBV    (input) CHARACTER*1
+*          = 'V':  Unitary matrix V is computed;
+*          = 'N':  V is not computed.
+*
+*  JOBQ    (input) CHARACTER*1
+*          = 'Q':  Unitary matrix Q is computed;
+*          = 'N':  Q is not computed.
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrices A and B.  N >= 0.
+*
+*  P       (input) INTEGER
+*          The number of rows of the matrix B.  P >= 0.
+*
+*  K       (output) INTEGER
+*  L       (output) INTEGER
+*          On exit, K and L specify the dimension of the subblocks
+*          described in Purpose.
+*          K + L = effective numerical rank of (A',B')'.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,N)
+*          On entry, the M-by-N matrix A.
+*          On exit, A contains the triangular matrix R, or part of R.
+*          See Purpose for details.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,M).
+*
+*  B       (input/output) COMPLEX array, dimension (LDB,N)
+*          On entry, the P-by-N matrix B.
+*          On exit, B contains part of the triangular matrix R if
+*          M-K-L < 0.  See Purpose for details.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B. LDB >= max(1,P).
+*
+*  ALPHA   (output) REAL array, dimension (N)
+*  BETA    (output) REAL array, dimension (N)
+*          On exit, ALPHA and BETA contain the generalized singular
+*          value pairs of A and B;
+*            ALPHA(1:K) = 1,
+*            BETA(1:K)  = 0,
+*          and if M-K-L >= 0,
+*            ALPHA(K+1:K+L) = C,
+*            BETA(K+1:K+L)  = S,
+*          or if M-K-L < 0,
+*            ALPHA(K+1:M)= C, ALPHA(M+1:K+L)= 0
+*            BETA(K+1:M) = S, BETA(M+1:K+L) = 1
+*          and
+*            ALPHA(K+L+1:N) = 0
+*            BETA(K+L+1:N)  = 0
+*
+*  U       (output) COMPLEX array, dimension (LDU,M)
+*          If JOBU = 'U', U contains the M-by-M unitary matrix U.
+*          If JOBU = 'N', U is not referenced.
+*
+*  LDU     (input) INTEGER
+*          The leading dimension of the array U. LDU >= max(1,M) if
+*          JOBU = 'U'; LDU >= 1 otherwise.
+*
+*  V       (output) COMPLEX array, dimension (LDV,P)
+*          If JOBV = 'V', V contains the P-by-P unitary matrix V.
+*          If JOBV = 'N', V is not referenced.
+*
+*  LDV     (input) INTEGER
+*          The leading dimension of the array V. LDV >= max(1,P) if
+*          JOBV = 'V'; LDV >= 1 otherwise.
+*
+*  Q       (output) COMPLEX array, dimension (LDQ,N)
+*          If JOBQ = 'Q', Q contains the N-by-N unitary matrix Q.
+*          If JOBQ = 'N', Q is not referenced.
+*
+*  LDQ     (input) INTEGER
+*          The leading dimension of the array Q. LDQ >= max(1,N) if
+*          JOBQ = 'Q'; LDQ >= 1 otherwise.
+*
+*  WORK    (workspace) COMPLEX array, dimension (max(3*N,M,P)+N)
+*
+*  RWORK   (workspace) REAL array, dimension (2*N)
+*
+*  IWORK   (workspace/output) INTEGER array, dimension (N)
+*          On exit, IWORK stores the sorting information. More
+*          precisely, the following loop will sort ALPHA
+*             for I = K+1, min(M,K+L)
+*                 swap ALPHA(I) and ALPHA(IWORK(I))
+*             endfor
+*          such that ALPHA(1) >= ALPHA(2) >= ... >= ALPHA(N).
+*
+*  INFO    (output)INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          > 0:  if INFO = 1, the Jacobi-type procedure failed to
+*                converge.  For further details, see subroutine CTGSJA.
+*
+*  Internal Parameters
+*  ===================
+*
+*  TOLA    REAL
+*  TOLB    REAL
+*          TOLA and TOLB are the thresholds to determine the effective
+*          rank of (A',B')'. Generally, they are set to
+*                   TOLA = MAX(M,N)*norm(A)*MACHEPS,
+*                   TOLB = MAX(P,N)*norm(B)*MACHEPS.
+*          The size of TOLA and TOLB may affect the size of backward
+*          errors of the decomposition.
+*
+*  Further Details
+*  ===============
+*
+*  2-96 Based on modifications by
+*     Ming Gu and Huan Ren, Computer Science Division, University of
+*     California at Berkeley, USA
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            WANTQ, WANTU, WANTV
+      INTEGER            I, IBND, ISUB, J, NCYCLE
+      REAL               ANORM, BNORM, SMAX, TEMP, TOLA, TOLB, ULP, UNFL
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               CLANGE, SLAMCH
+      EXTERNAL           LSAME, CLANGE, SLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CGGSVP, CTGSJA, SCOPY, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode and test the input parameters
+*
+      WANTU = LSAME( JOBU, 'U' )
+      WANTV = LSAME( JOBV, 'V' )
+      WANTQ = LSAME( JOBQ, 'Q' )
+*
+      INFO = 0
+      IF( .NOT.( WANTU .OR. LSAME( JOBU, 'N' ) ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( WANTV .OR. LSAME( JOBV, 'N' ) ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.( WANTQ .OR. LSAME( JOBQ, 'N' ) ) ) THEN
+         INFO = -3
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( P.LT.0 ) THEN
+         INFO = -6
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -10
+      ELSE IF( LDB.LT.MAX( 1, P ) ) THEN
+         INFO = -12
+      ELSE IF( LDU.LT.1 .OR. ( WANTU .AND. LDU.LT.M ) ) THEN
+         INFO = -16
+      ELSE IF( LDV.LT.1 .OR. ( WANTV .AND. LDV.LT.P ) ) THEN
+         INFO = -18
+      ELSE IF( LDQ.LT.1 .OR. ( WANTQ .AND. LDQ.LT.N ) ) THEN
+         INFO = -20
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CGGSVD', -INFO )
+         RETURN
+      END IF
+*
+*     Compute the Frobenius norm of matrices A and B
+*
+      ANORM = CLANGE( '1', M, N, A, LDA, RWORK )
+      BNORM = CLANGE( '1', P, N, B, LDB, RWORK )
+*
+*     Get machine precision and set up threshold for determining
+*     the effective numerical rank of the matrices A and B.
+*
+      ULP = SLAMCH( 'Precision' )
+      UNFL = SLAMCH( 'Safe Minimum' )
+      TOLA = MAX( M, N )*MAX( ANORM, UNFL )*ULP
+      TOLB = MAX( P, N )*MAX( BNORM, UNFL )*ULP
+*
+      CALL CGGSVP( JOBU, JOBV, JOBQ, M, P, N, A, LDA, B, LDB, TOLA,
+     $             TOLB, K, L, U, LDU, V, LDV, Q, LDQ, IWORK, RWORK,
+     $             WORK, WORK( N+1 ), INFO )
+*
+*     Compute the GSVD of two upper "triangular" matrices
+*
+      CALL CTGSJA( JOBU, JOBV, JOBQ, M, P, N, K, L, A, LDA, B, LDB,
+     $             TOLA, TOLB, ALPHA, BETA, U, LDU, V, LDV, Q, LDQ,
+     $             WORK, NCYCLE, INFO )
+*
+*     Sort the singular values and store the pivot indices in IWORK
+*     Copy ALPHA to RWORK, then sort ALPHA in RWORK
+*
+      CALL SCOPY( N, ALPHA, 1, RWORK, 1 )
+      IBND = MIN( L, M-K )
+      DO 20 I = 1, IBND
+*
+*        Scan for largest ALPHA(K+I)
+*
+         ISUB = I
+         SMAX = RWORK( K+I )
+         DO 10 J = I + 1, IBND
+            TEMP = RWORK( K+J )
+            IF( TEMP.GT.SMAX ) THEN
+               ISUB = J
+               SMAX = TEMP
+            END IF
+   10    CONTINUE
+         IF( ISUB.NE.I ) THEN
+            RWORK( K+ISUB ) = RWORK( K+I )
+            RWORK( K+I ) = SMAX
+            IWORK( K+I ) = K + ISUB
+         ELSE
+            IWORK( K+I ) = K + I
+         END IF
+   20 CONTINUE
+*
+      RETURN
+*
+*     End of CGGSVD
+*
+      END
diff --git a/costa/native/external/lapack/cggsvp.f b/costa/native/external/lapack/cggsvp.f
new file mode 100644
index 000000000..fe0ed0bbd
--- /dev/null
+++ b/costa/native/external/lapack/cggsvp.f
@@ -0,0 +1,403 @@
+      SUBROUTINE CGGSVP( JOBU, JOBV, JOBQ, M, P, N, A, LDA, B, LDB,
+     $                   TOLA, TOLB, K, L, U, LDU, V, LDV, Q, LDQ,
+     $                   IWORK, RWORK, TAU, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBQ, JOBU, JOBV
+      INTEGER            INFO, K, L, LDA, LDB, LDQ, LDU, LDV, M, N, P
+      REAL               TOLA, TOLB
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      REAL               RWORK( * )
+      COMPLEX            A( LDA, * ), B( LDB, * ), Q( LDQ, * ),
+     $                   TAU( * ), U( LDU, * ), V( LDV, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CGGSVP computes unitary matrices U, V and Q such that
+*
+*                   N-K-L  K    L
+*   U'*A*Q =     K ( 0    A12  A13 )  if M-K-L >= 0;
+*                L ( 0     0   A23 )
+*            M-K-L ( 0     0    0  )
+*
+*                   N-K-L  K    L
+*          =     K ( 0    A12  A13 )  if M-K-L < 0;
+*              M-K ( 0     0   A23 )
+*
+*                 N-K-L  K    L
+*   V'*B*Q =   L ( 0     0   B13 )
+*            P-L ( 0     0    0  )
+*
+*  where the K-by-K matrix A12 and L-by-L matrix B13 are nonsingular
+*  upper triangular; A23 is L-by-L upper triangular if M-K-L >= 0,
+*  otherwise A23 is (M-K)-by-L upper trapezoidal.  K+L = the effective
+*  numerical rank of the (M+P)-by-N matrix (A',B')'.  Z' denotes the
+*  conjugate transpose of Z.
+*
+*  This decomposition is the preprocessing step for computing the
+*  Generalized Singular Value Decomposition (GSVD), see subroutine
+*  CGGSVD.
+*
+*  Arguments
+*  =========
+*
+*  JOBU    (input) CHARACTER*1
+*          = 'U':  Unitary matrix U is computed;
+*          = 'N':  U is not computed.
+*
+*  JOBV    (input) CHARACTER*1
+*          = 'V':  Unitary matrix V is computed;
+*          = 'N':  V is not computed.
+*
+*  JOBQ    (input) CHARACTER*1
+*          = 'Q':  Unitary matrix Q is computed;
+*          = 'N':  Q is not computed.
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  P       (input) INTEGER
+*          The number of rows of the matrix B.  P >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrices A and B.  N >= 0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,N)
+*          On entry, the M-by-N matrix A.
+*          On exit, A contains the triangular (or trapezoidal) matrix
+*          described in the Purpose section.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,M).
+*
+*  B       (input/output) COMPLEX array, dimension (LDB,N)
+*          On entry, the P-by-N matrix B.
+*          On exit, B contains the triangular matrix described in
+*          the Purpose section.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B. LDB >= max(1,P).
+*
+*  TOLA    (input) REAL
+*  TOLB    (input) REAL
+*          TOLA and TOLB are the thresholds to determine the effective
+*          numerical rank of matrix B and a subblock of A. Generally,
+*          they are set to
+*             TOLA = MAX(M,N)*norm(A)*MACHEPS,
+*             TOLB = MAX(P,N)*norm(B)*MACHEPS.
+*          The size of TOLA and TOLB may affect the size of backward
+*          errors of the decomposition.
+*
+*  K       (output) INTEGER
+*  L       (output) INTEGER
+*          On exit, K and L specify the dimension of the subblocks
+*          described in Purpose section.
+*          K + L = effective numerical rank of (A',B')'.
+*
+*  U       (output) COMPLEX array, dimension (LDU,M)
+*          If JOBU = 'U', U contains the unitary matrix U.
+*          If JOBU = 'N', U is not referenced.
+*
+*  LDU     (input) INTEGER
+*          The leading dimension of the array U. LDU >= max(1,M) if
+*          JOBU = 'U'; LDU >= 1 otherwise.
+*
+*  V       (output) COMPLEX array, dimension (LDV,M)
+*          If JOBV = 'V', V contains the unitary matrix V.
+*          If JOBV = 'N', V is not referenced.
+*
+*  LDV     (input) INTEGER
+*          The leading dimension of the array V. LDV >= max(1,P) if
+*          JOBV = 'V'; LDV >= 1 otherwise.
+*
+*  Q       (output) COMPLEX array, dimension (LDQ,N)
+*          If JOBQ = 'Q', Q contains the unitary matrix Q.
+*          If JOBQ = 'N', Q is not referenced.
+*
+*  LDQ     (input) INTEGER
+*          The leading dimension of the array Q. LDQ >= max(1,N) if
+*          JOBQ = 'Q'; LDQ >= 1 otherwise.
+*
+*  IWORK   (workspace) INTEGER array, dimension (N)
+*
+*  RWORK   (workspace) REAL array, dimension (2*N)
+*
+*  TAU     (workspace) COMPLEX array, dimension (N)
+*
+*  WORK    (workspace) COMPLEX array, dimension (max(3*N,M,P))
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*
+*  Further Details
+*  ===============
+*
+*  The subroutine uses LAPACK subroutine CGEQPF for the QR factorization
+*  with column pivoting to detect the effective numerical rank of the
+*  a matrix. It may be replaced by a better rank determination strategy.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX            CZERO, CONE
+      PARAMETER          ( CZERO = ( 0.0E+0, 0.0E+0 ),
+     $                   CONE = ( 1.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            FORWRD, WANTQ, WANTU, WANTV
+      INTEGER            I, J
+      COMPLEX            T
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CGEQPF, CGEQR2, CGERQ2, CLACPY, CLAPMT, CLASET,
+     $                   CUNG2R, CUNM2R, CUNMR2, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, AIMAG, MAX, MIN, REAL
+*     ..
+*     .. Statement Functions ..
+      REAL               CABS1
+*     ..
+*     .. Statement Function definitions ..
+      CABS1( T ) = ABS( REAL( T ) ) + ABS( AIMAG( T ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters
+*
+      WANTU = LSAME( JOBU, 'U' )
+      WANTV = LSAME( JOBV, 'V' )
+      WANTQ = LSAME( JOBQ, 'Q' )
+      FORWRD = .TRUE.
+*
+      INFO = 0
+      IF( .NOT.( WANTU .OR. LSAME( JOBU, 'N' ) ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( WANTV .OR. LSAME( JOBV, 'N' ) ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.( WANTQ .OR. LSAME( JOBQ, 'N' ) ) ) THEN
+         INFO = -3
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( P.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -6
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -8
+      ELSE IF( LDB.LT.MAX( 1, P ) ) THEN
+         INFO = -10
+      ELSE IF( LDU.LT.1 .OR. ( WANTU .AND. LDU.LT.M ) ) THEN
+         INFO = -16
+      ELSE IF( LDV.LT.1 .OR. ( WANTV .AND. LDV.LT.P ) ) THEN
+         INFO = -18
+      ELSE IF( LDQ.LT.1 .OR. ( WANTQ .AND. LDQ.LT.N ) ) THEN
+         INFO = -20
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CGGSVP', -INFO )
+         RETURN
+      END IF
+*
+*     QR with column pivoting of B: B*P = V*( S11 S12 )
+*                                           (  0   0  )
+*
+      DO 10 I = 1, N
+         IWORK( I ) = 0
+   10 CONTINUE
+      CALL CGEQPF( P, N, B, LDB, IWORK, TAU, WORK, RWORK, INFO )
+*
+*     Update A := A*P
+*
+      CALL CLAPMT( FORWRD, M, N, A, LDA, IWORK )
+*
+*     Determine the effective rank of matrix B.
+*
+      L = 0
+      DO 20 I = 1, MIN( P, N )
+         IF( CABS1( B( I, I ) ).GT.TOLB )
+     $      L = L + 1
+   20 CONTINUE
+*
+      IF( WANTV ) THEN
+*
+*        Copy the details of V, and form V.
+*
+         CALL CLASET( 'Full', P, P, CZERO, CZERO, V, LDV )
+         IF( P.GT.1 )
+     $      CALL CLACPY( 'Lower', P-1, N, B( 2, 1 ), LDB, V( 2, 1 ),
+     $                   LDV )
+         CALL CUNG2R( P, P, MIN( P, N ), V, LDV, TAU, WORK, INFO )
+      END IF
+*
+*     Clean up B
+*
+      DO 40 J = 1, L - 1
+         DO 30 I = J + 1, L
+            B( I, J ) = CZERO
+   30    CONTINUE
+   40 CONTINUE
+      IF( P.GT.L )
+     $   CALL CLASET( 'Full', P-L, N, CZERO, CZERO, B( L+1, 1 ), LDB )
+*
+      IF( WANTQ ) THEN
+*
+*        Set Q = I and Update Q := Q*P
+*
+         CALL CLASET( 'Full', N, N, CZERO, CONE, Q, LDQ )
+         CALL CLAPMT( FORWRD, N, N, Q, LDQ, IWORK )
+      END IF
+*
+      IF( P.GE.L .AND. N.NE.L ) THEN
+*
+*        RQ factorization of ( S11 S12 ) = ( 0 S12 )*Z
+*
+         CALL CGERQ2( L, N, B, LDB, TAU, WORK, INFO )
+*
+*        Update A := A*Z'
+*
+         CALL CUNMR2( 'Right', 'Conjugate transpose', M, N, L, B, LDB,
+     $                TAU, A, LDA, WORK, INFO )
+         IF( WANTQ ) THEN
+*
+*           Update Q := Q*Z'
+*
+            CALL CUNMR2( 'Right', 'Conjugate transpose', N, N, L, B,
+     $                   LDB, TAU, Q, LDQ, WORK, INFO )
+         END IF
+*
+*        Clean up B
+*
+         CALL CLASET( 'Full', L, N-L, CZERO, CZERO, B, LDB )
+         DO 60 J = N - L + 1, N
+            DO 50 I = J - N + L + 1, L
+               B( I, J ) = CZERO
+   50       CONTINUE
+   60    CONTINUE
+*
+      END IF
+*
+*     Let              N-L     L
+*                A = ( A11    A12 ) M,
+*
+*     then the following does the complete QR decomposition of A11:
+*
+*              A11 = U*(  0  T12 )*P1'
+*                      (  0   0  )
+*
+      DO 70 I = 1, N - L
+         IWORK( I ) = 0
+   70 CONTINUE
+      CALL CGEQPF( M, N-L, A, LDA, IWORK, TAU, WORK, RWORK, INFO )
+*
+*     Determine the effective rank of A11
+*
+      K = 0
+      DO 80 I = 1, MIN( M, N-L )
+         IF( CABS1( A( I, I ) ).GT.TOLA )
+     $      K = K + 1
+   80 CONTINUE
+*
+*     Update A12 := U'*A12, where A12 = A( 1:M, N-L+1:N )
+*
+      CALL CUNM2R( 'Left', 'Conjugate transpose', M, L, MIN( M, N-L ),
+     $             A, LDA, TAU, A( 1, N-L+1 ), LDA, WORK, INFO )
+*
+      IF( WANTU ) THEN
+*
+*        Copy the details of U, and form U
+*
+         CALL CLASET( 'Full', M, M, CZERO, CZERO, U, LDU )
+         IF( M.GT.1 )
+     $      CALL CLACPY( 'Lower', M-1, N-L, A( 2, 1 ), LDA, U( 2, 1 ),
+     $                   LDU )
+         CALL CUNG2R( M, M, MIN( M, N-L ), U, LDU, TAU, WORK, INFO )
+      END IF
+*
+      IF( WANTQ ) THEN
+*
+*        Update Q( 1:N, 1:N-L )  = Q( 1:N, 1:N-L )*P1
+*
+         CALL CLAPMT( FORWRD, N, N-L, Q, LDQ, IWORK )
+      END IF
+*
+*     Clean up A: set the strictly lower triangular part of
+*     A(1:K, 1:K) = 0, and A( K+1:M, 1:N-L ) = 0.
+*
+      DO 100 J = 1, K - 1
+         DO 90 I = J + 1, K
+            A( I, J ) = CZERO
+   90    CONTINUE
+  100 CONTINUE
+      IF( M.GT.K )
+     $   CALL CLASET( 'Full', M-K, N-L, CZERO, CZERO, A( K+1, 1 ), LDA )
+*
+      IF( N-L.GT.K ) THEN
+*
+*        RQ factorization of ( T11 T12 ) = ( 0 T12 )*Z1
+*
+         CALL CGERQ2( K, N-L, A, LDA, TAU, WORK, INFO )
+*
+         IF( WANTQ ) THEN
+*
+*           Update Q( 1:N,1:N-L ) = Q( 1:N,1:N-L )*Z1'
+*
+            CALL CUNMR2( 'Right', 'Conjugate transpose', N, N-L, K, A,
+     $                   LDA, TAU, Q, LDQ, WORK, INFO )
+         END IF
+*
+*        Clean up A
+*
+         CALL CLASET( 'Full', K, N-L-K, CZERO, CZERO, A, LDA )
+         DO 120 J = N - L - K + 1, N - L
+            DO 110 I = J - N + L + K + 1, K
+               A( I, J ) = CZERO
+  110       CONTINUE
+  120    CONTINUE
+*
+      END IF
+*
+      IF( M.GT.K ) THEN
+*
+*        QR factorization of A( K+1:M,N-L+1:N )
+*
+         CALL CGEQR2( M-K, L, A( K+1, N-L+1 ), LDA, TAU, WORK, INFO )
+*
+         IF( WANTU ) THEN
+*
+*           Update U(:,K+1:M) := U(:,K+1:M)*U1
+*
+            CALL CUNM2R( 'Right', 'No transpose', M, M-K, MIN( M-K, L ),
+     $                   A( K+1, N-L+1 ), LDA, TAU, U( 1, K+1 ), LDU,
+     $                   WORK, INFO )
+         END IF
+*
+*        Clean up
+*
+         DO 140 J = N - L + 1, N
+            DO 130 I = J - N + K + L + 1, M
+               A( I, J ) = CZERO
+  130       CONTINUE
+  140    CONTINUE
+*
+      END IF
+*
+      RETURN
+*
+*     End of CGGSVP
+*
+      END
diff --git a/costa/native/external/lapack/cgtcon.f b/costa/native/external/lapack/cgtcon.f
new file mode 100644
index 000000000..08d0bd833
--- /dev/null
+++ b/costa/native/external/lapack/cgtcon.f
@@ -0,0 +1,167 @@
+      SUBROUTINE CGTCON( NORM, N, DL, D, DU, DU2, IPIV, ANORM, RCOND,
+     $                   WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          NORM
+      INTEGER            INFO, N
+      REAL               ANORM, RCOND
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      COMPLEX            D( * ), DL( * ), DU( * ), DU2( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CGTCON estimates the reciprocal of the condition number of a complex
+*  tridiagonal matrix A using the LU factorization as computed by
+*  CGTTRF.
+*
+*  An estimate is obtained for norm(inv(A)), and the reciprocal of the
+*  condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))).
+*
+*  Arguments
+*  =========
+*
+*  NORM    (input) CHARACTER*1
+*          Specifies whether the 1-norm condition number or the
+*          infinity-norm condition number is required:
+*          = '1' or 'O':  1-norm;
+*          = 'I':         Infinity-norm.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  DL      (input) COMPLEX array, dimension (N-1)
+*          The (n-1) multipliers that define the matrix L from the
+*          LU factorization of A as computed by CGTTRF.
+*
+*  D       (input) COMPLEX array, dimension (N)
+*          The n diagonal elements of the upper triangular matrix U from
+*          the LU factorization of A.
+*
+*  DU      (input) COMPLEX array, dimension (N-1)
+*          The (n-1) elements of the first superdiagonal of U.
+*
+*  DU2     (input) COMPLEX array, dimension (N-2)
+*          The (n-2) elements of the second superdiagonal of U.
+*
+*  IPIV    (input) INTEGER array, dimension (N)
+*          The pivot indices; for 1 <= i <= n, row i of the matrix was
+*          interchanged with row IPIV(i).  IPIV(i) will always be either
+*          i or i+1; IPIV(i) = i indicates a row interchange was not
+*          required.
+*
+*  ANORM   (input) REAL
+*          If NORM = '1' or 'O', the 1-norm of the original matrix A.
+*          If NORM = 'I', the infinity-norm of the original matrix A.
+*
+*  RCOND   (output) REAL
+*          The reciprocal of the condition number of the matrix A,
+*          computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an
+*          estimate of the 1-norm of inv(A) computed in this routine.
+*
+*  WORK    (workspace) COMPLEX array, dimension (2*N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            ONENRM
+      INTEGER            I, KASE, KASE1
+      REAL               AINVNM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CGTTRS, CLACON, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          CMPLX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments.
+*
+      INFO = 0
+      ONENRM = NORM.EQ.'1' .OR. LSAME( NORM, 'O' )
+      IF( .NOT.ONENRM .AND. .NOT.LSAME( NORM, 'I' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( ANORM.LT.ZERO ) THEN
+         INFO = -8
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CGTCON', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      RCOND = ZERO
+      IF( N.EQ.0 ) THEN
+         RCOND = ONE
+         RETURN
+      ELSE IF( ANORM.EQ.ZERO ) THEN
+         RETURN
+      END IF
+*
+*     Check that D(1:N) is non-zero.
+*
+      DO 10 I = 1, N
+         IF( D( I ).EQ.CMPLX( ZERO ) )
+     $      RETURN
+   10 CONTINUE
+*
+      AINVNM = ZERO
+      IF( ONENRM ) THEN
+         KASE1 = 1
+      ELSE
+         KASE1 = 2
+      END IF
+      KASE = 0
+   20 CONTINUE
+      CALL CLACON( N, WORK( N+1 ), WORK, AINVNM, KASE )
+      IF( KASE.NE.0 ) THEN
+         IF( KASE.EQ.KASE1 ) THEN
+*
+*           Multiply by inv(U)*inv(L).
+*
+            CALL CGTTRS( 'No transpose', N, 1, DL, D, DU, DU2, IPIV,
+     $                   WORK, N, INFO )
+         ELSE
+*
+*           Multiply by inv(L')*inv(U').
+*
+            CALL CGTTRS( 'Conjugate transpose', N, 1, DL, D, DU, DU2,
+     $                   IPIV, WORK, N, INFO )
+         END IF
+         GO TO 20
+      END IF
+*
+*     Compute the estimate of the reciprocal condition number.
+*
+      IF( AINVNM.NE.ZERO )
+     $   RCOND = ( ONE / AINVNM ) / ANORM
+*
+      RETURN
+*
+*     End of CGTCON
+*
+      END
diff --git a/costa/native/external/lapack/cgtrfs.f b/costa/native/external/lapack/cgtrfs.f
new file mode 100644
index 000000000..e98545602
--- /dev/null
+++ b/costa/native/external/lapack/cgtrfs.f
@@ -0,0 +1,369 @@
+      SUBROUTINE CGTRFS( TRANS, N, NRHS, DL, D, DU, DLF, DF, DUF, DU2,
+     $                   IPIV, B, LDB, X, LDX, FERR, BERR, WORK, RWORK,
+     $                   INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          TRANS
+      INTEGER            INFO, LDB, LDX, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      REAL               BERR( * ), FERR( * ), RWORK( * )
+      COMPLEX            B( LDB, * ), D( * ), DF( * ), DL( * ),
+     $                   DLF( * ), DU( * ), DU2( * ), DUF( * ),
+     $                   WORK( * ), X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CGTRFS improves the computed solution to a system of linear
+*  equations when the coefficient matrix is tridiagonal, and provides
+*  error bounds and backward error estimates for the solution.
+*
+*  Arguments
+*  =========
+*
+*  TRANS   (input) CHARACTER*1
+*          Specifies the form of the system of equations:
+*          = 'N':  A * X = B     (No transpose)
+*          = 'T':  A**T * X = B  (Transpose)
+*          = 'C':  A**H * X = B  (Conjugate transpose)
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  DL      (input) COMPLEX array, dimension (N-1)
+*          The (n-1) subdiagonal elements of A.
+*
+*  D       (input) COMPLEX array, dimension (N)
+*          The diagonal elements of A.
+*
+*  DU      (input) COMPLEX array, dimension (N-1)
+*          The (n-1) superdiagonal elements of A.
+*
+*  DLF     (input) COMPLEX array, dimension (N-1)
+*          The (n-1) multipliers that define the matrix L from the
+*          LU factorization of A as computed by CGTTRF.
+*
+*  DF      (input) COMPLEX array, dimension (N)
+*          The n diagonal elements of the upper triangular matrix U from
+*          the LU factorization of A.
+*
+*  DUF     (input) COMPLEX array, dimension (N-1)
+*          The (n-1) elements of the first superdiagonal of U.
+*
+*  DU2     (input) COMPLEX array, dimension (N-2)
+*          The (n-2) elements of the second superdiagonal of U.
+*
+*  IPIV    (input) INTEGER array, dimension (N)
+*          The pivot indices; for 1 <= i <= n, row i of the matrix was
+*          interchanged with row IPIV(i).  IPIV(i) will always be either
+*          i or i+1; IPIV(i) = i indicates a row interchange was not
+*          required.
+*
+*  B       (input) COMPLEX array, dimension (LDB,NRHS)
+*          The right hand side matrix B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  X       (input/output) COMPLEX array, dimension (LDX,NRHS)
+*          On entry, the solution matrix X, as computed by CGTTRS.
+*          On exit, the improved solution matrix X.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(1,N).
+*
+*  FERR    (output) REAL array, dimension (NRHS)
+*          The estimated forward error bound for each solution vector
+*          X(j) (the j-th column of the solution matrix X).
+*          If XTRUE is the true solution corresponding to X(j), FERR(j)
+*          is an estimated upper bound for the magnitude of the largest
+*          element in (X(j) - XTRUE) divided by the magnitude of the
+*          largest element in X(j).  The estimate is as reliable as
+*          the estimate for RCOND, and is almost always a slight
+*          overestimate of the true error.
+*
+*  BERR    (output) REAL array, dimension (NRHS)
+*          The componentwise relative backward error of each solution
+*          vector X(j) (i.e., the smallest relative change in
+*          any element of A or B that makes X(j) an exact solution).
+*
+*  WORK    (workspace) COMPLEX array, dimension (2*N)
+*
+*  RWORK   (workspace) REAL array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Internal Parameters
+*  ===================
+*
+*  ITMAX is the maximum number of steps of iterative refinement.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            ITMAX
+      PARAMETER          ( ITMAX = 5 )
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+      REAL               TWO
+      PARAMETER          ( TWO = 2.0E+0 )
+      REAL               THREE
+      PARAMETER          ( THREE = 3.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOTRAN
+      CHARACTER          TRANSN, TRANST
+      INTEGER            COUNT, I, J, KASE, NZ
+      REAL               EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN
+      COMPLEX            ZDUM
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CAXPY, CCOPY, CGTTRS, CLACON, CLAGTM, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, AIMAG, CMPLX, MAX, REAL
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               SLAMCH
+      EXTERNAL           LSAME, SLAMCH
+*     ..
+*     .. Statement Functions ..
+      REAL               CABS1
+*     ..
+*     .. Statement Function definitions ..
+      CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      NOTRAN = LSAME( TRANS, 'N' )
+      IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT.
+     $    LSAME( TRANS, 'C' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -13
+      ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+         INFO = -15
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CGTRFS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN
+         DO 10 J = 1, NRHS
+            FERR( J ) = ZERO
+            BERR( J ) = ZERO
+   10    CONTINUE
+         RETURN
+      END IF
+*
+      IF( NOTRAN ) THEN
+         TRANSN = 'N'
+         TRANST = 'C'
+      ELSE
+         TRANSN = 'C'
+         TRANST = 'N'
+      END IF
+*
+*     NZ = maximum number of nonzero elements in each row of A, plus 1
+*
+      NZ = 4
+      EPS = SLAMCH( 'Epsilon' )
+      SAFMIN = SLAMCH( 'Safe minimum' )
+      SAFE1 = NZ*SAFMIN
+      SAFE2 = SAFE1 / EPS
+*
+*     Do for each right hand side
+*
+      DO 110 J = 1, NRHS
+*
+         COUNT = 1
+         LSTRES = THREE
+   20    CONTINUE
+*
+*        Loop until stopping criterion is satisfied.
+*
+*        Compute residual R = B - op(A) * X,
+*        where op(A) = A, A**T, or A**H, depending on TRANS.
+*
+         CALL CCOPY( N, B( 1, J ), 1, WORK, 1 )
+         CALL CLAGTM( TRANS, N, 1, -ONE, DL, D, DU, X( 1, J ), LDX, ONE,
+     $                WORK, N )
+*
+*        Compute abs(op(A))*abs(x) + abs(b) for use in the backward
+*        error bound.
+*
+         IF( NOTRAN ) THEN
+            IF( N.EQ.1 ) THEN
+               RWORK( 1 ) = CABS1( B( 1, J ) ) +
+     $                      CABS1( D( 1 ) )*CABS1( X( 1, J ) )
+            ELSE
+               RWORK( 1 ) = CABS1( B( 1, J ) ) +
+     $                      CABS1( D( 1 ) )*CABS1( X( 1, J ) ) +
+     $                      CABS1( DU( 1 ) )*CABS1( X( 2, J ) )
+               DO 30 I = 2, N - 1
+                  RWORK( I ) = CABS1( B( I, J ) ) +
+     $                         CABS1( DL( I-1 ) )*CABS1( X( I-1, J ) ) +
+     $                         CABS1( D( I ) )*CABS1( X( I, J ) ) +
+     $                         CABS1( DU( I ) )*CABS1( X( I+1, J ) )
+   30          CONTINUE
+               RWORK( N ) = CABS1( B( N, J ) ) +
+     $                      CABS1( DL( N-1 ) )*CABS1( X( N-1, J ) ) +
+     $                      CABS1( D( N ) )*CABS1( X( N, J ) )
+            END IF
+         ELSE
+            IF( N.EQ.1 ) THEN
+               RWORK( 1 ) = CABS1( B( 1, J ) ) +
+     $                      CABS1( D( 1 ) )*CABS1( X( 1, J ) )
+            ELSE
+               RWORK( 1 ) = CABS1( B( 1, J ) ) +
+     $                      CABS1( D( 1 ) )*CABS1( X( 1, J ) ) +
+     $                      CABS1( DL( 1 ) )*CABS1( X( 2, J ) )
+               DO 40 I = 2, N - 1
+                  RWORK( I ) = CABS1( B( I, J ) ) +
+     $                         CABS1( DU( I-1 ) )*CABS1( X( I-1, J ) ) +
+     $                         CABS1( D( I ) )*CABS1( X( I, J ) ) +
+     $                         CABS1( DL( I ) )*CABS1( X( I+1, J ) )
+   40          CONTINUE
+               RWORK( N ) = CABS1( B( N, J ) ) +
+     $                      CABS1( DU( N-1 ) )*CABS1( X( N-1, J ) ) +
+     $                      CABS1( D( N ) )*CABS1( X( N, J ) )
+            END IF
+         END IF
+*
+*        Compute componentwise relative backward error from formula
+*
+*        max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) )
+*
+*        where abs(Z) is the componentwise absolute value of the matrix
+*        or vector Z.  If the i-th component of the denominator is less
+*        than SAFE2, then SAFE1 is added to the i-th components of the
+*        numerator and denominator before dividing.
+*
+         S = ZERO
+         DO 50 I = 1, N
+            IF( RWORK( I ).GT.SAFE2 ) THEN
+               S = MAX( S, CABS1( WORK( I ) ) / RWORK( I ) )
+            ELSE
+               S = MAX( S, ( CABS1( WORK( I ) )+SAFE1 ) /
+     $             ( RWORK( I )+SAFE1 ) )
+            END IF
+   50    CONTINUE
+         BERR( J ) = S
+*
+*        Test stopping criterion. Continue iterating if
+*           1) The residual BERR(J) is larger than machine epsilon, and
+*           2) BERR(J) decreased by at least a factor of 2 during the
+*              last iteration, and
+*           3) At most ITMAX iterations tried.
+*
+         IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND.
+     $       COUNT.LE.ITMAX ) THEN
+*
+*           Update solution and try again.
+*
+            CALL CGTTRS( TRANS, N, 1, DLF, DF, DUF, DU2, IPIV, WORK, N,
+     $                   INFO )
+            CALL CAXPY( N, CMPLX( ONE ), WORK, 1, X( 1, J ), 1 )
+            LSTRES = BERR( J )
+            COUNT = COUNT + 1
+            GO TO 20
+         END IF
+*
+*        Bound error from formula
+*
+*        norm(X - XTRUE) / norm(X) .le. FERR =
+*        norm( abs(inv(op(A)))*
+*           ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X)
+*
+*        where
+*          norm(Z) is the magnitude of the largest component of Z
+*          inv(op(A)) is the inverse of op(A)
+*          abs(Z) is the componentwise absolute value of the matrix or
+*             vector Z
+*          NZ is the maximum number of nonzeros in any row of A, plus 1
+*          EPS is machine epsilon
+*
+*        The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B))
+*        is incremented by SAFE1 if the i-th component of
+*        abs(op(A))*abs(X) + abs(B) is less than SAFE2.
+*
+*        Use CLACON to estimate the infinity-norm of the matrix
+*           inv(op(A)) * diag(W),
+*        where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) )))
+*
+         DO 60 I = 1, N
+            IF( RWORK( I ).GT.SAFE2 ) THEN
+               RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I )
+            ELSE
+               RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) +
+     $                      SAFE1
+            END IF
+   60    CONTINUE
+*
+         KASE = 0
+   70    CONTINUE
+         CALL CLACON( N, WORK( N+1 ), WORK, FERR( J ), KASE )
+         IF( KASE.NE.0 ) THEN
+            IF( KASE.EQ.1 ) THEN
+*
+*              Multiply by diag(W)*inv(op(A)**H).
+*
+               CALL CGTTRS( TRANST, N, 1, DLF, DF, DUF, DU2, IPIV, WORK,
+     $                      N, INFO )
+               DO 80 I = 1, N
+                  WORK( I ) = RWORK( I )*WORK( I )
+   80          CONTINUE
+            ELSE
+*
+*              Multiply by inv(op(A))*diag(W).
+*
+               DO 90 I = 1, N
+                  WORK( I ) = RWORK( I )*WORK( I )
+   90          CONTINUE
+               CALL CGTTRS( TRANSN, N, 1, DLF, DF, DUF, DU2, IPIV, WORK,
+     $                      N, INFO )
+            END IF
+            GO TO 70
+         END IF
+*
+*        Normalize error.
+*
+         LSTRES = ZERO
+         DO 100 I = 1, N
+            LSTRES = MAX( LSTRES, CABS1( X( I, J ) ) )
+  100    CONTINUE
+         IF( LSTRES.NE.ZERO )
+     $      FERR( J ) = FERR( J ) / LSTRES
+*
+  110 CONTINUE
+*
+      RETURN
+*
+*     End of CGTRFS
+*
+      END
diff --git a/costa/native/external/lapack/cgtsv.f b/costa/native/external/lapack/cgtsv.f
new file mode 100644
index 000000000..7e2fd85d4
--- /dev/null
+++ b/costa/native/external/lapack/cgtsv.f
@@ -0,0 +1,174 @@
+      SUBROUTINE CGTSV( N, NRHS, DL, D, DU, B, LDB, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDB, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            B( LDB, * ), D( * ), DL( * ), DU( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CGTSV  solves the equation
+*
+*     A*X = B,
+*
+*  where A is an N-by-N tridiagonal matrix, by Gaussian elimination with
+*  partial pivoting.
+*
+*  Note that the equation  A'*X = B  may be solved by interchanging the
+*  order of the arguments DU and DL.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  DL      (input/output) COMPLEX array, dimension (N-1)
+*          On entry, DL must contain the (n-1) subdiagonal elements of
+*          A.
+*          On exit, DL is overwritten by the (n-2) elements of the
+*          second superdiagonal of the upper triangular matrix U from
+*          the LU factorization of A, in DL(1), ..., DL(n-2).
+*
+*  D       (input/output) COMPLEX array, dimension (N)
+*          On entry, D must contain the diagonal elements of A.
+*          On exit, D is overwritten by the n diagonal elements of U.
+*
+*  DU      (input/output) COMPLEX array, dimension (N-1)
+*          On entry, DU must contain the (n-1) superdiagonal elements
+*          of A.
+*          On exit, DU is overwritten by the (n-1) elements of the first
+*          superdiagonal of U.
+*
+*  B       (input/output) COMPLEX array, dimension (LDB,NRHS)
+*          On entry, the N-by-NRHS right hand side matrix B.
+*          On exit, if INFO = 0, the N-by-NRHS solution matrix X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, U(i,i) is exactly zero, and the solution
+*                has not been computed.  The factorization has not been
+*                completed unless i = N.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX            ZERO
+      PARAMETER          ( ZERO = ( 0.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            J, K
+      COMPLEX            MULT, TEMP, ZDUM
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     ..
+*     .. Statement Functions ..
+      REAL               CABS1
+*     ..
+*     .. Statement Function definitions ..
+      CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) )
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+      IF( N.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CGTSV ', -INFO )
+         RETURN
+      END IF
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      DO 30 K = 1, N - 1
+         IF( DL( K ).EQ.ZERO ) THEN
+*
+*           Subdiagonal is zero, no elimination is required.
+*
+            IF( D( K ).EQ.ZERO ) THEN
+*
+*              Diagonal is zero: set INFO = K and return; a unique
+*              solution can not be found.
+*
+               INFO = K
+               RETURN
+            END IF
+         ELSE IF( CABS1( D( K ) ).GE.CABS1( DL( K ) ) ) THEN
+*
+*           No row interchange required
+*
+            MULT = DL( K ) / D( K )
+            D( K+1 ) = D( K+1 ) - MULT*DU( K )
+            DO 10 J = 1, NRHS
+               B( K+1, J ) = B( K+1, J ) - MULT*B( K, J )
+   10       CONTINUE
+            IF( K.LT.( N-1 ) )
+     $         DL( K ) = ZERO
+         ELSE
+*
+*           Interchange rows K and K+1
+*
+            MULT = D( K ) / DL( K )
+            D( K ) = DL( K )
+            TEMP = D( K+1 )
+            D( K+1 ) = DU( K ) - MULT*TEMP
+            IF( K.LT.( N-1 ) ) THEN
+               DL( K ) = DU( K+1 )
+               DU( K+1 ) = -MULT*DL( K )
+            END IF
+            DU( K ) = TEMP
+            DO 20 J = 1, NRHS
+               TEMP = B( K, J )
+               B( K, J ) = B( K+1, J )
+               B( K+1, J ) = TEMP - MULT*B( K+1, J )
+   20       CONTINUE
+         END IF
+   30 CONTINUE
+      IF( D( N ).EQ.ZERO ) THEN
+         INFO = N
+         RETURN
+      END IF
+*
+*     Back solve with the matrix U from the factorization.
+*
+      DO 50 J = 1, NRHS
+         B( N, J ) = B( N, J ) / D( N )
+         IF( N.GT.1 )
+     $      B( N-1, J ) = ( B( N-1, J )-DU( N-1 )*B( N, J ) ) / D( N-1 )
+         DO 40 K = N - 2, 1, -1
+            B( K, J ) = ( B( K, J )-DU( K )*B( K+1, J )-DL( K )*
+     $                  B( K+2, J ) ) / D( K )
+   40    CONTINUE
+   50 CONTINUE
+*
+      RETURN
+*
+*     End of CGTSV
+*
+      END
diff --git a/costa/native/external/lapack/cgtsvx.f b/costa/native/external/lapack/cgtsvx.f
new file mode 100644
index 000000000..2bef165de
--- /dev/null
+++ b/costa/native/external/lapack/cgtsvx.f
@@ -0,0 +1,294 @@
+      SUBROUTINE CGTSVX( FACT, TRANS, N, NRHS, DL, D, DU, DLF, DF, DUF,
+     $                   DU2, IPIV, B, LDB, X, LDX, RCOND, FERR, BERR,
+     $                   WORK, RWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          FACT, TRANS
+      INTEGER            INFO, LDB, LDX, N, NRHS
+      REAL               RCOND
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      REAL               BERR( * ), FERR( * ), RWORK( * )
+      COMPLEX            B( LDB, * ), D( * ), DF( * ), DL( * ),
+     $                   DLF( * ), DU( * ), DU2( * ), DUF( * ),
+     $                   WORK( * ), X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CGTSVX uses the LU factorization to compute the solution to a complex
+*  system of linear equations A * X = B, A**T * X = B, or A**H * X = B,
+*  where A is a tridiagonal matrix of order N and X and B are N-by-NRHS
+*  matrices.
+*
+*  Error bounds on the solution and a condition estimate are also
+*  provided.
+*
+*  Description
+*  ===========
+*
+*  The following steps are performed:
+*
+*  1. If FACT = 'N', the LU decomposition is used to factor the matrix A
+*     as A = L * U, where L is a product of permutation and unit lower
+*     bidiagonal matrices and U is upper triangular with nonzeros in
+*     only the main diagonal and first two superdiagonals.
+*
+*  2. If some U(i,i)=0, so that U is exactly singular, then the routine
+*     returns with INFO = i. Otherwise, the factored form of A is used
+*     to estimate the condition number of the matrix A.  If the
+*     reciprocal of the condition number is less than machine precision,
+*     INFO = N+1 is returned as a warning, but the routine still goes on
+*     to solve for X and compute error bounds as described below.
+*
+*  3. The system of equations is solved for X using the factored form
+*     of A.
+*
+*  4. Iterative refinement is applied to improve the computed solution
+*     matrix and calculate error bounds and backward error estimates
+*     for it.
+*
+*  Arguments
+*  =========
+*
+*  FACT    (input) CHARACTER*1
+*          Specifies whether or not the factored form of A has been
+*          supplied on entry.
+*          = 'F':  DLF, DF, DUF, DU2, and IPIV contain the factored form
+*                  of A; DL, D, DU, DLF, DF, DUF, DU2 and IPIV will not
+*                  be modified.
+*          = 'N':  The matrix will be copied to DLF, DF, and DUF
+*                  and factored.
+*
+*  TRANS   (input) CHARACTER*1
+*          Specifies the form of the system of equations:
+*          = 'N':  A * X = B     (No transpose)
+*          = 'T':  A**T * X = B  (Transpose)
+*          = 'C':  A**H * X = B  (Conjugate transpose)
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  DL      (input) COMPLEX array, dimension (N-1)
+*          The (n-1) subdiagonal elements of A.
+*
+*  D       (input) COMPLEX array, dimension (N)
+*          The n diagonal elements of A.
+*
+*  DU      (input) COMPLEX array, dimension (N-1)
+*          The (n-1) superdiagonal elements of A.
+*
+*  DLF     (input or output) COMPLEX array, dimension (N-1)
+*          If FACT = 'F', then DLF is an input argument and on entry
+*          contains the (n-1) multipliers that define the matrix L from
+*          the LU factorization of A as computed by CGTTRF.
+*
+*          If FACT = 'N', then DLF is an output argument and on exit
+*          contains the (n-1) multipliers that define the matrix L from
+*          the LU factorization of A.
+*
+*  DF      (input or output) COMPLEX array, dimension (N)
+*          If FACT = 'F', then DF is an input argument and on entry
+*          contains the n diagonal elements of the upper triangular
+*          matrix U from the LU factorization of A.
+*
+*          If FACT = 'N', then DF is an output argument and on exit
+*          contains the n diagonal elements of the upper triangular
+*          matrix U from the LU factorization of A.
+*
+*  DUF     (input or output) COMPLEX array, dimension (N-1)
+*          If FACT = 'F', then DUF is an input argument and on entry
+*          contains the (n-1) elements of the first superdiagonal of U.
+*
+*          If FACT = 'N', then DUF is an output argument and on exit
+*          contains the (n-1) elements of the first superdiagonal of U.
+*
+*  DU2     (input or output) COMPLEX array, dimension (N-2)
+*          If FACT = 'F', then DU2 is an input argument and on entry
+*          contains the (n-2) elements of the second superdiagonal of
+*          U.
+*
+*          If FACT = 'N', then DU2 is an output argument and on exit
+*          contains the (n-2) elements of the second superdiagonal of
+*          U.
+*
+*  IPIV    (input or output) INTEGER array, dimension (N)
+*          If FACT = 'F', then IPIV is an input argument and on entry
+*          contains the pivot indices from the LU factorization of A as
+*          computed by CGTTRF.
+*
+*          If FACT = 'N', then IPIV is an output argument and on exit
+*          contains the pivot indices from the LU factorization of A;
+*          row i of the matrix was interchanged with row IPIV(i).
+*          IPIV(i) will always be either i or i+1; IPIV(i) = i indicates
+*          a row interchange was not required.
+*
+*  B       (input) COMPLEX array, dimension (LDB,NRHS)
+*          The N-by-NRHS right hand side matrix B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  X       (output) COMPLEX array, dimension (LDX,NRHS)
+*          If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(1,N).
+*
+*  RCOND   (output) REAL
+*          The estimate of the reciprocal condition number of the matrix
+*          A.  If RCOND is less than the machine precision (in
+*          particular, if RCOND = 0), the matrix is singular to working
+*          precision.  This condition is indicated by a return code of
+*          INFO > 0.
+*
+*  FERR    (output) REAL array, dimension (NRHS)
+*          The estimated forward error bound for each solution vector
+*          X(j) (the j-th column of the solution matrix X).
+*          If XTRUE is the true solution corresponding to X(j), FERR(j)
+*          is an estimated upper bound for the magnitude of the largest
+*          element in (X(j) - XTRUE) divided by the magnitude of the
+*          largest element in X(j).  The estimate is as reliable as
+*          the estimate for RCOND, and is almost always a slight
+*          overestimate of the true error.
+*
+*  BERR    (output) REAL array, dimension (NRHS)
+*          The componentwise relative backward error of each solution
+*          vector X(j) (i.e., the smallest relative change in
+*          any element of A or B that makes X(j) an exact solution).
+*
+*  WORK    (workspace) COMPLEX array, dimension (2*N)
+*
+*  RWORK   (workspace) REAL array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, and i is
+*                <= N:  U(i,i) is exactly zero.  The factorization
+*                       has not been completed unless i = N, but the
+*                       factor U is exactly singular, so the solution
+*                       and error bounds could not be computed.
+*                       RCOND = 0 is returned.
+*                = N+1: U is nonsingular, but RCOND is less than machine
+*                       precision, meaning that the matrix is singular
+*                       to working precision.  Nevertheless, the
+*                       solution and error bounds are computed because
+*                       there are a number of situations where the
+*                       computed solution can be more accurate than the
+*                       value of RCOND would suggest.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO
+      PARAMETER          ( ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOFACT, NOTRAN
+      CHARACTER          NORM
+      REAL               ANORM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               CLANGT, SLAMCH
+      EXTERNAL           LSAME, CLANGT, SLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CCOPY, CGTCON, CGTRFS, CGTTRF, CGTTRS, CLACPY,
+     $                   XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+      NOFACT = LSAME( FACT, 'N' )
+      NOTRAN = LSAME( TRANS, 'N' )
+      IF( .NOT.NOFACT .AND. .NOT.LSAME( FACT, 'F' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT.
+     $         LSAME( TRANS, 'C' ) ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -14
+      ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+         INFO = -16
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CGTSVX', -INFO )
+         RETURN
+      END IF
+*
+      IF( NOFACT ) THEN
+*
+*        Compute the LU factorization of A.
+*
+         CALL CCOPY( N, D, 1, DF, 1 )
+         IF( N.GT.1 ) THEN
+            CALL CCOPY( N-1, DL, 1, DLF, 1 )
+            CALL CCOPY( N-1, DU, 1, DUF, 1 )
+         END IF
+         CALL CGTTRF( N, DLF, DF, DUF, DU2, IPIV, INFO )
+*
+*        Return if INFO is non-zero.
+*
+         IF( INFO.NE.0 ) THEN
+            IF( INFO.GT.0 )
+     $         RCOND = ZERO
+            RETURN
+         END IF
+      END IF
+*
+*     Compute the norm of the matrix A.
+*
+      IF( NOTRAN ) THEN
+         NORM = '1'
+      ELSE
+         NORM = 'I'
+      END IF
+      ANORM = CLANGT( NORM, N, DL, D, DU )
+*
+*     Compute the reciprocal of the condition number of A.
+*
+      CALL CGTCON( NORM, N, DLF, DF, DUF, DU2, IPIV, ANORM, RCOND, WORK,
+     $             INFO )
+*
+*     Set INFO = N+1 if the matrix is singular to working precision.
+*
+      IF( RCOND.LT.SLAMCH( 'Epsilon' ) )
+     $   INFO = N + 1
+*
+*     Compute the solution vectors X.
+*
+      CALL CLACPY( 'Full', N, NRHS, B, LDB, X, LDX )
+      CALL CGTTRS( TRANS, N, NRHS, DLF, DF, DUF, DU2, IPIV, X, LDX,
+     $             INFO )
+*
+*     Use iterative refinement to improve the computed solutions and
+*     compute error bounds and backward error estimates for them.
+*
+      CALL CGTRFS( TRANS, N, NRHS, DL, D, DU, DLF, DF, DUF, DU2, IPIV,
+     $             B, LDB, X, LDX, FERR, BERR, WORK, RWORK, INFO )
+*
+      RETURN
+*
+*     End of CGTSVX
+*
+      END
diff --git a/costa/native/external/lapack/cgttrf.f b/costa/native/external/lapack/cgttrf.f
new file mode 100644
index 000000000..16753f107
--- /dev/null
+++ b/costa/native/external/lapack/cgttrf.f
@@ -0,0 +1,175 @@
+      SUBROUTINE CGTTRF( N, DL, D, DU, DU2, IPIV, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      COMPLEX            D( * ), DL( * ), DU( * ), DU2( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CGTTRF computes an LU factorization of a complex tridiagonal matrix A
+*  using elimination with partial pivoting and row interchanges.
+*
+*  The factorization has the form
+*     A = L * U
+*  where L is a product of permutation and unit lower bidiagonal
+*  matrices and U is upper triangular with nonzeros in only the main
+*  diagonal and first two superdiagonals.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.
+*
+*  DL      (input/output) COMPLEX array, dimension (N-1)
+*          On entry, DL must contain the (n-1) sub-diagonal elements of
+*          A.
+*
+*          On exit, DL is overwritten by the (n-1) multipliers that
+*          define the matrix L from the LU factorization of A.
+*
+*  D       (input/output) COMPLEX array, dimension (N)
+*          On entry, D must contain the diagonal elements of A.
+*
+*          On exit, D is overwritten by the n diagonal elements of the
+*          upper triangular matrix U from the LU factorization of A.
+*
+*  DU      (input/output) COMPLEX array, dimension (N-1)
+*          On entry, DU must contain the (n-1) super-diagonal elements
+*          of A.
+*
+*          On exit, DU is overwritten by the (n-1) elements of the first
+*          super-diagonal of U.
+*
+*  DU2     (output) COMPLEX array, dimension (N-2)
+*          On exit, DU2 is overwritten by the (n-2) elements of the
+*          second super-diagonal of U.
+*
+*  IPIV    (output) INTEGER array, dimension (N)
+*          The pivot indices; for 1 <= i <= n, row i of the matrix was
+*          interchanged with row IPIV(i).  IPIV(i) will always be either
+*          i or i+1; IPIV(i) = i indicates a row interchange was not
+*          required.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -k, the k-th argument had an illegal value
+*          > 0:  if INFO = k, U(k,k) is exactly zero. The factorization
+*                has been completed, but the factor U is exactly
+*                singular, and division by zero will occur if it is used
+*                to solve a system of equations.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO
+      PARAMETER          ( ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I
+      COMPLEX            FACT, TEMP, ZDUM
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, AIMAG, REAL
+*     ..
+*     .. Statement Functions ..
+      REAL               CABS1
+*     ..
+*     .. Statement Function definitions ..
+      CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) )
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+      IF( N.LT.0 ) THEN
+         INFO = -1
+         CALL XERBLA( 'CGTTRF', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Initialize IPIV(i) = i and DU2(i) = 0
+*
+      DO 10 I = 1, N
+         IPIV( I ) = I
+   10 CONTINUE
+      DO 20 I = 1, N - 2
+         DU2( I ) = ZERO
+   20 CONTINUE
+*
+      DO 30 I = 1, N - 2
+         IF( CABS1( D( I ) ).GE.CABS1( DL( I ) ) ) THEN
+*
+*           No row interchange required, eliminate DL(I)
+*
+            IF( CABS1( D( I ) ).NE.ZERO ) THEN
+               FACT = DL( I ) / D( I )
+               DL( I ) = FACT
+               D( I+1 ) = D( I+1 ) - FACT*DU( I )
+            END IF
+         ELSE
+*
+*           Interchange rows I and I+1, eliminate DL(I)
+*
+            FACT = D( I ) / DL( I )
+            D( I ) = DL( I )
+            DL( I ) = FACT
+            TEMP = DU( I )
+            DU( I ) = D( I+1 )
+            D( I+1 ) = TEMP - FACT*D( I+1 )
+            DU2( I ) = DU( I+1 )
+            DU( I+1 ) = -FACT*DU( I+1 )
+            IPIV( I ) = I + 1
+         END IF
+   30 CONTINUE
+      IF( N.GT.1 ) THEN
+         I = N - 1
+         IF( CABS1( D( I ) ).GE.CABS1( DL( I ) ) ) THEN
+            IF( CABS1( D( I ) ).NE.ZERO ) THEN
+               FACT = DL( I ) / D( I )
+               DL( I ) = FACT
+               D( I+1 ) = D( I+1 ) - FACT*DU( I )
+            END IF
+         ELSE
+            FACT = D( I ) / DL( I )
+            D( I ) = DL( I )
+            DL( I ) = FACT
+            TEMP = DU( I )
+            DU( I ) = D( I+1 )
+            D( I+1 ) = TEMP - FACT*D( I+1 )
+            IPIV( I ) = I + 1
+         END IF
+      END IF
+*
+*     Check for a zero on the diagonal of U.
+*
+      DO 40 I = 1, N
+         IF( CABS1( D( I ) ).EQ.ZERO ) THEN
+            INFO = I
+            GO TO 50
+         END IF
+   40 CONTINUE
+   50 CONTINUE
+*
+      RETURN
+*
+*     End of CGTTRF
+*
+      END
diff --git a/costa/native/external/lapack/cgttrs.f b/costa/native/external/lapack/cgttrs.f
new file mode 100644
index 000000000..fdf507112
--- /dev/null
+++ b/costa/native/external/lapack/cgttrs.f
@@ -0,0 +1,143 @@
+      SUBROUTINE CGTTRS( TRANS, N, NRHS, DL, D, DU, DU2, IPIV, B, LDB,
+     $                   INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          TRANS
+      INTEGER            INFO, LDB, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      COMPLEX            B( LDB, * ), D( * ), DL( * ), DU( * ), DU2( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CGTTRS solves one of the systems of equations
+*     A * X = B,  A**T * X = B,  or  A**H * X = B,
+*  with a tridiagonal matrix A using the LU factorization computed
+*  by CGTTRF.
+*
+*  Arguments
+*  =========
+*
+*  TRANS   (input) CHARACTER
+*          Specifies the form of the system of equations.
+*          = 'N':  A * X = B     (No transpose)
+*          = 'T':  A**T * X = B  (Transpose)
+*          = 'C':  A**H * X = B  (Conjugate transpose)
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  DL      (input) COMPLEX array, dimension (N-1)
+*          The (n-1) multipliers that define the matrix L from the
+*          LU factorization of A.
+*
+*  D       (input) COMPLEX array, dimension (N)
+*          The n diagonal elements of the upper triangular matrix U from
+*          the LU factorization of A.
+*
+*  DU      (input) COMPLEX array, dimension (N-1)
+*          The (n-1) elements of the first super-diagonal of U.
+*
+*  DU2     (input) COMPLEX array, dimension (N-2)
+*          The (n-2) elements of the second super-diagonal of U.
+*
+*  IPIV    (input) INTEGER array, dimension (N)
+*          The pivot indices; for 1 <= i <= n, row i of the matrix was
+*          interchanged with row IPIV(i).  IPIV(i) will always be either
+*          i or i+1; IPIV(i) = i indicates a row interchange was not
+*          required.
+*
+*  B       (input/output) COMPLEX array, dimension (LDB,NRHS)
+*          On entry, the matrix of right hand side vectors B.
+*          On exit, B is overwritten by the solution vectors X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -k, the k-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            NOTRAN
+      INTEGER            ITRANS, J, JB, NB
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CGTTS2, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+      NOTRAN = ( TRANS.EQ.'N' .OR. TRANS.EQ.'n' )
+      IF( .NOT.NOTRAN .AND. .NOT.( TRANS.EQ.'T' .OR. TRANS.EQ.
+     $    't' ) .AND. .NOT.( TRANS.EQ.'C' .OR. TRANS.EQ.'c' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDB.LT.MAX( N, 1 ) ) THEN
+         INFO = -10
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CGTTRS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 )
+     $   RETURN
+*
+*     Decode TRANS
+*
+      IF( NOTRAN ) THEN
+         ITRANS = 0
+      ELSE IF( TRANS.EQ.'T' .OR. TRANS.EQ.'t' ) THEN
+         ITRANS = 1
+      ELSE
+         ITRANS = 2
+      END IF
+*
+*     Determine the number of right-hand sides to solve at a time.
+*
+      IF( NRHS.EQ.1 ) THEN
+         NB = 1
+      ELSE
+         NB = MAX( 1, ILAENV( 1, 'CGTTRS', TRANS, N, NRHS, -1, -1 ) )
+      END IF
+*
+      IF( NB.GE.NRHS ) THEN
+         CALL CGTTS2( ITRANS, N, NRHS, DL, D, DU, DU2, IPIV, B, LDB )
+      ELSE
+         DO 10 J = 1, NRHS, NB
+            JB = MIN( NRHS-J+1, NB )
+            CALL CGTTS2( ITRANS, N, JB, DL, D, DU, DU2, IPIV, B( 1, J ),
+     $                   LDB )
+   10    CONTINUE
+      END IF
+*
+*     End of CGTTRS
+*
+      END
diff --git a/costa/native/external/lapack/cgtts2.f b/costa/native/external/lapack/cgtts2.f
new file mode 100644
index 000000000..7a9188ce9
--- /dev/null
+++ b/costa/native/external/lapack/cgtts2.f
@@ -0,0 +1,272 @@
+      SUBROUTINE CGTTS2( ITRANS, N, NRHS, DL, D, DU, DU2, IPIV, B, LDB )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            ITRANS, LDB, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      COMPLEX            B( LDB, * ), D( * ), DL( * ), DU( * ), DU2( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CGTTS2 solves one of the systems of equations
+*     A * X = B,  A**T * X = B,  or  A**H * X = B,
+*  with a tridiagonal matrix A using the LU factorization computed
+*  by CGTTRF.
+*
+*  Arguments
+*  =========
+*
+*  ITRANS  (input) INTEGER
+*          Specifies the form of the system of equations.
+*          = 0:  A * X = B     (No transpose)
+*          = 1:  A**T * X = B  (Transpose)
+*          = 2:  A**H * X = B  (Conjugate transpose)
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  DL      (input) COMPLEX array, dimension (N-1)
+*          The (n-1) multipliers that define the matrix L from the
+*          LU factorization of A.
+*
+*  D       (input) COMPLEX array, dimension (N)
+*          The n diagonal elements of the upper triangular matrix U from
+*          the LU factorization of A.
+*
+*  DU      (input) COMPLEX array, dimension (N-1)
+*          The (n-1) elements of the first super-diagonal of U.
+*
+*  DU2     (input) COMPLEX array, dimension (N-2)
+*          The (n-2) elements of the second super-diagonal of U.
+*
+*  IPIV    (input) INTEGER array, dimension (N)
+*          The pivot indices; for 1 <= i <= n, row i of the matrix was
+*          interchanged with row IPIV(i).  IPIV(i) will always be either
+*          i or i+1; IPIV(i) = i indicates a row interchange was not
+*          required.
+*
+*  B       (input/output) COMPLEX array, dimension (LDB,NRHS)
+*          On entry, the matrix of right hand side vectors B.
+*          On exit, B is overwritten by the solution vectors X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      INTEGER            I, J
+      COMPLEX            TEMP
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          CONJG
+*     ..
+*     .. Executable Statements ..
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 )
+     $   RETURN
+*
+      IF( ITRANS.EQ.0 ) THEN
+*
+*        Solve A*X = B using the LU factorization of A,
+*        overwriting each right hand side vector with its solution.
+*
+         IF( NRHS.LE.1 ) THEN
+            J = 1
+   10       CONTINUE
+*
+*           Solve L*x = b.
+*
+            DO 20 I = 1, N - 1
+               IF( IPIV( I ).EQ.I ) THEN
+                  B( I+1, J ) = B( I+1, J ) - DL( I )*B( I, J )
+               ELSE
+                  TEMP = B( I, J )
+                  B( I, J ) = B( I+1, J )
+                  B( I+1, J ) = TEMP - DL( I )*B( I, J )
+               END IF
+   20       CONTINUE
+*
+*           Solve U*x = b.
+*
+            B( N, J ) = B( N, J ) / D( N )
+            IF( N.GT.1 )
+     $         B( N-1, J ) = ( B( N-1, J )-DU( N-1 )*B( N, J ) ) /
+     $                       D( N-1 )
+            DO 30 I = N - 2, 1, -1
+               B( I, J ) = ( B( I, J )-DU( I )*B( I+1, J )-DU2( I )*
+     $                     B( I+2, J ) ) / D( I )
+   30       CONTINUE
+            IF( J.LT.NRHS ) THEN
+               J = J + 1
+               GO TO 10
+            END IF
+         ELSE
+            DO 60 J = 1, NRHS
+*
+*           Solve L*x = b.
+*
+               DO 40 I = 1, N - 1
+                  IF( IPIV( I ).EQ.I ) THEN
+                     B( I+1, J ) = B( I+1, J ) - DL( I )*B( I, J )
+                  ELSE
+                     TEMP = B( I, J )
+                     B( I, J ) = B( I+1, J )
+                     B( I+1, J ) = TEMP - DL( I )*B( I, J )
+                  END IF
+   40          CONTINUE
+*
+*           Solve U*x = b.
+*
+               B( N, J ) = B( N, J ) / D( N )
+               IF( N.GT.1 )
+     $            B( N-1, J ) = ( B( N-1, J )-DU( N-1 )*B( N, J ) ) /
+     $                          D( N-1 )
+               DO 50 I = N - 2, 1, -1
+                  B( I, J ) = ( B( I, J )-DU( I )*B( I+1, J )-DU2( I )*
+     $                        B( I+2, J ) ) / D( I )
+   50          CONTINUE
+   60       CONTINUE
+         END IF
+      ELSE IF( ITRANS.EQ.1 ) THEN
+*
+*        Solve A**T * X = B.
+*
+         IF( NRHS.LE.1 ) THEN
+            J = 1
+   70       CONTINUE
+*
+*           Solve U**T * x = b.
+*
+            B( 1, J ) = B( 1, J ) / D( 1 )
+            IF( N.GT.1 )
+     $         B( 2, J ) = ( B( 2, J )-DU( 1 )*B( 1, J ) ) / D( 2 )
+            DO 80 I = 3, N
+               B( I, J ) = ( B( I, J )-DU( I-1 )*B( I-1, J )-DU2( I-2 )*
+     $                     B( I-2, J ) ) / D( I )
+   80       CONTINUE
+*
+*           Solve L**T * x = b.
+*
+            DO 90 I = N - 1, 1, -1
+               IF( IPIV( I ).EQ.I ) THEN
+                  B( I, J ) = B( I, J ) - DL( I )*B( I+1, J )
+               ELSE
+                  TEMP = B( I+1, J )
+                  B( I+1, J ) = B( I, J ) - DL( I )*TEMP
+                  B( I, J ) = TEMP
+               END IF
+   90       CONTINUE
+            IF( J.LT.NRHS ) THEN
+               J = J + 1
+               GO TO 70
+            END IF
+         ELSE
+            DO 120 J = 1, NRHS
+*
+*           Solve U**T * x = b.
+*
+               B( 1, J ) = B( 1, J ) / D( 1 )
+               IF( N.GT.1 )
+     $            B( 2, J ) = ( B( 2, J )-DU( 1 )*B( 1, J ) ) / D( 2 )
+               DO 100 I = 3, N
+                  B( I, J ) = ( B( I, J )-DU( I-1 )*B( I-1, J )-
+     $                        DU2( I-2 )*B( I-2, J ) ) / D( I )
+  100          CONTINUE
+*
+*           Solve L**T * x = b.
+*
+               DO 110 I = N - 1, 1, -1
+                  IF( IPIV( I ).EQ.I ) THEN
+                     B( I, J ) = B( I, J ) - DL( I )*B( I+1, J )
+                  ELSE
+                     TEMP = B( I+1, J )
+                     B( I+1, J ) = B( I, J ) - DL( I )*TEMP
+                     B( I, J ) = TEMP
+                  END IF
+  110          CONTINUE
+  120       CONTINUE
+         END IF
+      ELSE
+*
+*        Solve A**H * X = B.
+*
+         IF( NRHS.LE.1 ) THEN
+            J = 1
+  130       CONTINUE
+*
+*           Solve U**H * x = b.
+*
+            B( 1, J ) = B( 1, J ) / CONJG( D( 1 ) )
+            IF( N.GT.1 )
+     $         B( 2, J ) = ( B( 2, J )-CONJG( DU( 1 ) )*B( 1, J ) ) /
+     $                     CONJG( D( 2 ) )
+            DO 140 I = 3, N
+               B( I, J ) = ( B( I, J )-CONJG( DU( I-1 ) )*B( I-1, J )-
+     $                     CONJG( DU2( I-2 ) )*B( I-2, J ) ) /
+     $                     CONJG( D( I ) )
+  140       CONTINUE
+*
+*           Solve L**H * x = b.
+*
+            DO 150 I = N - 1, 1, -1
+               IF( IPIV( I ).EQ.I ) THEN
+                  B( I, J ) = B( I, J ) - CONJG( DL( I ) )*B( I+1, J )
+               ELSE
+                  TEMP = B( I+1, J )
+                  B( I+1, J ) = B( I, J ) - CONJG( DL( I ) )*TEMP
+                  B( I, J ) = TEMP
+               END IF
+  150       CONTINUE
+            IF( J.LT.NRHS ) THEN
+               J = J + 1
+               GO TO 130
+            END IF
+         ELSE
+            DO 180 J = 1, NRHS
+*
+*           Solve U**H * x = b.
+*
+               B( 1, J ) = B( 1, J ) / CONJG( D( 1 ) )
+               IF( N.GT.1 )
+     $            B( 2, J ) = ( B( 2, J )-CONJG( DU( 1 ) )*B( 1, J ) ) /
+     $                        CONJG( D( 2 ) )
+               DO 160 I = 3, N
+                  B( I, J ) = ( B( I, J )-CONJG( DU( I-1 ) )*
+     $                        B( I-1, J )-CONJG( DU2( I-2 ) )*
+     $                        B( I-2, J ) ) / CONJG( D( I ) )
+  160          CONTINUE
+*
+*           Solve L**H * x = b.
+*
+               DO 170 I = N - 1, 1, -1
+                  IF( IPIV( I ).EQ.I ) THEN
+                     B( I, J ) = B( I, J ) - CONJG( DL( I ) )*
+     $                           B( I+1, J )
+                  ELSE
+                     TEMP = B( I+1, J )
+                     B( I+1, J ) = B( I, J ) - CONJG( DL( I ) )*TEMP
+                     B( I, J ) = TEMP
+                  END IF
+  170          CONTINUE
+  180       CONTINUE
+         END IF
+      END IF
+*
+*     End of CGTTS2
+*
+      END
diff --git a/costa/native/external/lapack/chbev.f b/costa/native/external/lapack/chbev.f
new file mode 100644
index 000000000..2375cc35c
--- /dev/null
+++ b/costa/native/external/lapack/chbev.f
@@ -0,0 +1,209 @@
+      SUBROUTINE CHBEV( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, WORK,
+     $                  RWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBZ, UPLO
+      INTEGER            INFO, KD, LDAB, LDZ, N
+*     ..
+*     .. Array Arguments ..
+      REAL               RWORK( * ), W( * )
+      COMPLEX            AB( LDAB, * ), WORK( * ), Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CHBEV computes all the eigenvalues and, optionally, eigenvectors of
+*  a complex Hermitian band matrix A.
+*
+*  Arguments
+*  =========
+*
+*  JOBZ    (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only;
+*          = 'V':  Compute eigenvalues and eigenvectors.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  KD      (input) INTEGER
+*          The number of superdiagonals of the matrix A if UPLO = 'U',
+*          or the number of subdiagonals if UPLO = 'L'.  KD >= 0.
+*
+*  AB      (input/output) COMPLEX array, dimension (LDAB, N)
+*          On entry, the upper or lower triangle of the Hermitian band
+*          matrix A, stored in the first KD+1 rows of the array.  The
+*          j-th column of A is stored in the j-th column of the array AB
+*          as follows:
+*          if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j;
+*          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+kd).
+*
+*          On exit, AB is overwritten by values generated during the
+*          reduction to tridiagonal form.  If UPLO = 'U', the first
+*          superdiagonal and the diagonal of the tridiagonal matrix T
+*          are returned in rows KD and KD+1 of AB, and if UPLO = 'L',
+*          the diagonal and first subdiagonal of T are returned in the
+*          first two rows of AB.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KD + 1.
+*
+*  W       (output) REAL array, dimension (N)
+*          If INFO = 0, the eigenvalues in ascending order.
+*
+*  Z       (output) COMPLEX array, dimension (LDZ, N)
+*          If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal
+*          eigenvectors of the matrix A, with the i-th column of Z
+*          holding the eigenvector associated with W(i).
+*          If JOBZ = 'N', then Z is not referenced.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= 1, and if
+*          JOBZ = 'V', LDZ >= max(1,N).
+*
+*  WORK    (workspace) COMPLEX array, dimension (N)
+*
+*  RWORK   (workspace) REAL array, dimension (max(1,3*N-2))
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          > 0:  if INFO = i, the algorithm failed to converge; i
+*                off-diagonal elements of an intermediate tridiagonal
+*                form did not converge to zero.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E0, ONE = 1.0E0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LOWER, WANTZ
+      INTEGER            IINFO, IMAX, INDE, INDRWK, ISCALE
+      REAL               ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA,
+     $                   SMLNUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               CLANHB, SLAMCH
+      EXTERNAL           LSAME, CLANHB, SLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CHBTRD, CLASCL, CSTEQR, SSCAL, SSTERF, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      WANTZ = LSAME( JOBZ, 'V' )
+      LOWER = LSAME( UPLO, 'L' )
+*
+      INFO = 0
+      IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( KD.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDAB.LT.KD+1 ) THEN
+         INFO = -6
+      ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN
+         INFO = -9
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CHBEV ', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      IF( N.EQ.1 ) THEN
+         IF( LOWER ) THEN
+            W( 1 ) = AB( 1, 1 )
+         ELSE
+            W( 1 ) = AB( KD+1, 1 )
+         END IF
+         IF( WANTZ )
+     $      Z( 1, 1 ) = ONE
+         RETURN
+      END IF
+*
+*     Get machine constants.
+*
+      SAFMIN = SLAMCH( 'Safe minimum' )
+      EPS = SLAMCH( 'Precision' )
+      SMLNUM = SAFMIN / EPS
+      BIGNUM = ONE / SMLNUM
+      RMIN = SQRT( SMLNUM )
+      RMAX = SQRT( BIGNUM )
+*
+*     Scale matrix to allowable range, if necessary.
+*
+      ANRM = CLANHB( 'M', UPLO, N, KD, AB, LDAB, RWORK )
+      ISCALE = 0
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN
+         ISCALE = 1
+         SIGMA = RMIN / ANRM
+      ELSE IF( ANRM.GT.RMAX ) THEN
+         ISCALE = 1
+         SIGMA = RMAX / ANRM
+      END IF
+      IF( ISCALE.EQ.1 ) THEN
+         IF( LOWER ) THEN
+            CALL CLASCL( 'B', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO )
+         ELSE
+            CALL CLASCL( 'Q', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO )
+         END IF
+      END IF
+*
+*     Call CHBTRD to reduce Hermitian band matrix to tridiagonal form.
+*
+      INDE = 1
+      CALL CHBTRD( JOBZ, UPLO, N, KD, AB, LDAB, W, RWORK( INDE ), Z,
+     $             LDZ, WORK, IINFO )
+*
+*     For eigenvalues only, call SSTERF.  For eigenvectors, call CSTEQR.
+*
+      IF( .NOT.WANTZ ) THEN
+         CALL SSTERF( N, W, RWORK( INDE ), INFO )
+      ELSE
+         INDRWK = INDE + N
+         CALL CSTEQR( JOBZ, N, W, RWORK( INDE ), Z, LDZ,
+     $                RWORK( INDRWK ), INFO )
+      END IF
+*
+*     If matrix was scaled, then rescale eigenvalues appropriately.
+*
+      IF( ISCALE.EQ.1 ) THEN
+         IF( INFO.EQ.0 ) THEN
+            IMAX = N
+         ELSE
+            IMAX = INFO - 1
+         END IF
+         CALL SSCAL( IMAX, ONE / SIGMA, W, 1 )
+      END IF
+*
+      RETURN
+*
+*     End of CHBEV
+*
+      END
diff --git a/costa/native/external/lapack/chbevd.f b/costa/native/external/lapack/chbevd.f
new file mode 100644
index 000000000..9573958eb
--- /dev/null
+++ b/costa/native/external/lapack/chbevd.f
@@ -0,0 +1,298 @@
+      SUBROUTINE CHBEVD( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, WORK,
+     $                   LWORK, RWORK, LRWORK, IWORK, LIWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBZ, UPLO
+      INTEGER            INFO, KD, LDAB, LDZ, LIWORK, LRWORK, LWORK, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      REAL               RWORK( * ), W( * )
+      COMPLEX            AB( LDAB, * ), WORK( * ), Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CHBEVD computes all the eigenvalues and, optionally, eigenvectors of
+*  a complex Hermitian band matrix A.  If eigenvectors are desired, it
+*  uses a divide and conquer algorithm.
+*
+*  The divide and conquer algorithm makes very mild assumptions about
+*  floating point arithmetic. It will work on machines with a guard
+*  digit in add/subtract, or on those binary machines without guard
+*  digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
+*  Cray-2. It could conceivably fail on hexadecimal or decimal machines
+*  without guard digits, but we know of none.
+*
+*  Arguments
+*  =========
+*
+*  JOBZ    (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only;
+*          = 'V':  Compute eigenvalues and eigenvectors.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  KD      (input) INTEGER
+*          The number of superdiagonals of the matrix A if UPLO = 'U',
+*          or the number of subdiagonals if UPLO = 'L'.  KD >= 0.
+*
+*  AB      (input/output) COMPLEX array, dimension (LDAB, N)
+*          On entry, the upper or lower triangle of the Hermitian band
+*          matrix A, stored in the first KD+1 rows of the array.  The
+*          j-th column of A is stored in the j-th column of the array AB
+*          as follows:
+*          if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j;
+*          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+kd).
+*
+*          On exit, AB is overwritten by values generated during the
+*          reduction to tridiagonal form.  If UPLO = 'U', the first
+*          superdiagonal and the diagonal of the tridiagonal matrix T
+*          are returned in rows KD and KD+1 of AB, and if UPLO = 'L',
+*          the diagonal and first subdiagonal of T are returned in the
+*          first two rows of AB.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KD + 1.
+*
+*  W       (output) REAL array, dimension (N)
+*          If INFO = 0, the eigenvalues in ascending order.
+*
+*  Z       (output) COMPLEX array, dimension (LDZ, N)
+*          If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal
+*          eigenvectors of the matrix A, with the i-th column of Z
+*          holding the eigenvector associated with W(i).
+*          If JOBZ = 'N', then Z is not referenced.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= 1, and if
+*          JOBZ = 'V', LDZ >= max(1,N).
+*
+*  WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.
+*          If N <= 1,               LWORK must be at least 1.
+*          If JOBZ = 'N' and N > 1, LWORK must be at least N.
+*          If JOBZ = 'V' and N > 1, LWORK must be at least 2*N**2.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  RWORK   (workspace/output) REAL array,
+*                                         dimension (LRWORK)
+*          On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK.
+*
+*  LRWORK  (input) INTEGER
+*          The dimension of array RWORK.
+*          If N <= 1,               LRWORK must be at least 1.
+*          If JOBZ = 'N' and N > 1, LRWORK must be at least N.
+*          If JOBZ = 'V' and N > 1, LRWORK must be at least
+*                        1 + 5*N + 2*N**2.
+*
+*          If LRWORK = -1, then a workspace query is assumed; the
+*          routine only calculates the optimal size of the RWORK array,
+*          returns this value as the first entry of the RWORK array, and
+*          no error message related to LRWORK is issued by XERBLA.
+*
+*  IWORK   (workspace/output) INTEGER array, dimension (LIWORK)
+*          On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK.
+*
+*  LIWORK  (input) INTEGER
+*          The dimension of array IWORK.
+*          If JOBZ = 'N' or N <= 1, LIWORK must be at least 1.
+*          If JOBZ = 'V' and N > 1, LIWORK must be at least 3 + 5*N .
+*
+*          If LIWORK = -1, then a workspace query is assumed; the
+*          routine only calculates the optimal size of the IWORK array,
+*          returns this value as the first entry of the IWORK array, and
+*          no error message related to LIWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          > 0:  if INFO = i, the algorithm failed to converge; i
+*                off-diagonal elements of an intermediate tridiagonal
+*                form did not converge to zero.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E0, ONE = 1.0E0 )
+      COMPLEX            CZERO, CONE
+      PARAMETER          ( CZERO = ( 0.0E0, 0.0E0 ),
+     $                   CONE = ( 1.0E0, 0.0E0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LOWER, LQUERY, WANTZ
+      INTEGER            IINFO, IMAX, INDE, INDWK2, INDWRK, ISCALE,
+     $                   LIWMIN, LLRWK, LLWK2, LRWMIN, LWMIN
+      REAL               ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA,
+     $                   SMLNUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               CLANHB, SLAMCH
+      EXTERNAL           LSAME, CLANHB, SLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CGEMM, CHBTRD, CLACPY, CLASCL, CSTEDC, SSCAL,
+     $                   SSTERF, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      WANTZ = LSAME( JOBZ, 'V' )
+      LOWER = LSAME( UPLO, 'L' )
+      LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 .OR. LRWORK.EQ.-1 )
+*
+      INFO = 0
+      IF( N.LE.1 ) THEN
+         LWMIN = 1
+         LRWMIN = 1
+         LIWMIN = 1
+      ELSE
+         IF( WANTZ ) THEN
+            LWMIN = 2*N**2
+            LRWMIN = 1 + 5*N + 2*N**2
+            LIWMIN = 3 + 5*N
+         ELSE
+            LWMIN = N
+            LRWMIN = N
+            LIWMIN = 1
+         END IF
+      END IF
+      IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( KD.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDAB.LT.KD+1 ) THEN
+         INFO = -6
+      ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN
+         INFO = -9
+      ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -11
+      ELSE IF( LRWORK.LT.LRWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -13
+      ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -15
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         WORK( 1 ) = LWMIN
+         RWORK( 1 ) = LRWMIN
+         IWORK( 1 ) = LIWMIN
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CHBEVD', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN 
+*
+      IF( N.EQ.1 ) THEN
+         W( 1 ) = AB( 1, 1 )
+         IF( WANTZ )
+     $      Z( 1, 1 ) = CONE
+         RETURN 
+      END IF
+*
+*     Get machine constants.
+*
+      SAFMIN = SLAMCH( 'Safe minimum' )
+      EPS = SLAMCH( 'Precision' )
+      SMLNUM = SAFMIN / EPS
+      BIGNUM = ONE / SMLNUM
+      RMIN = SQRT( SMLNUM )
+      RMAX = SQRT( BIGNUM )
+*
+*     Scale matrix to allowable range, if necessary.
+*
+      ANRM = CLANHB( 'M', UPLO, N, KD, AB, LDAB, RWORK )
+      ISCALE = 0
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN
+         ISCALE = 1
+         SIGMA = RMIN / ANRM
+      ELSE IF( ANRM.GT.RMAX ) THEN
+         ISCALE = 1
+         SIGMA = RMAX / ANRM
+      END IF
+      IF( ISCALE.EQ.1 ) THEN
+         IF( LOWER ) THEN
+            CALL CLASCL( 'B', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO )
+         ELSE
+            CALL CLASCL( 'Q', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO )
+         END IF
+      END IF
+*
+*     Call CHBTRD to reduce Hermitian band matrix to tridiagonal form.
+*
+      INDE = 1
+      INDWRK = INDE + N
+      INDWK2 = 1 + N*N
+      LLWK2 = LWORK - INDWK2 + 1
+      LLRWK = LRWORK - INDWRK + 1
+      CALL CHBTRD( JOBZ, UPLO, N, KD, AB, LDAB, W, RWORK( INDE ), Z,
+     $             LDZ, WORK, IINFO )
+*
+*     For eigenvalues only, call SSTERF.  For eigenvectors, call CSTEDC.
+*
+      IF( .NOT.WANTZ ) THEN
+         CALL SSTERF( N, W, RWORK( INDE ), INFO )
+      ELSE
+         CALL CSTEDC( 'I', N, W, RWORK( INDE ), WORK, N, WORK( INDWK2 ),
+     $                LLWK2, RWORK( INDWRK ), LLRWK, IWORK, LIWORK,
+     $                INFO )
+         CALL CGEMM( 'N', 'N', N, N, N, CONE, Z, LDZ, WORK, N, CZERO,
+     $               WORK( INDWK2 ), N )
+         CALL CLACPY( 'A', N, N, WORK( INDWK2 ), N, Z, LDZ )
+      END IF
+*
+*     If matrix was scaled, then rescale eigenvalues appropriately.
+*
+      IF( ISCALE.EQ.1 ) THEN
+         IF( INFO.EQ.0 ) THEN
+            IMAX = N
+         ELSE
+            IMAX = INFO - 1
+         END IF
+         CALL SSCAL( IMAX, ONE / SIGMA, W, 1 )
+      END IF
+*
+      WORK( 1 ) = LWMIN
+      RWORK( 1 ) = LRWMIN
+      IWORK( 1 ) = LIWMIN
+      RETURN
+*
+*     End of CHBEVD
+*
+      END
diff --git a/costa/native/external/lapack/chbevx.f b/costa/native/external/lapack/chbevx.f
new file mode 100644
index 000000000..02869d2d2
--- /dev/null
+++ b/costa/native/external/lapack/chbevx.f
@@ -0,0 +1,417 @@
+      SUBROUTINE CHBEVX( JOBZ, RANGE, UPLO, N, KD, AB, LDAB, Q, LDQ, VL,
+     $                   VU, IL, IU, ABSTOL, M, W, Z, LDZ, WORK, RWORK,
+     $                   IWORK, IFAIL, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBZ, RANGE, UPLO
+      INTEGER            IL, INFO, IU, KD, LDAB, LDQ, LDZ, M, N
+      REAL               ABSTOL, VL, VU
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IFAIL( * ), IWORK( * )
+      REAL               RWORK( * ), W( * )
+      COMPLEX            AB( LDAB, * ), Q( LDQ, * ), WORK( * ),
+     $                   Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CHBEVX computes selected eigenvalues and, optionally, eigenvectors
+*  of a complex Hermitian band matrix A.  Eigenvalues and eigenvectors
+*  can be selected by specifying either a range of values or a range of
+*  indices for the desired eigenvalues.
+*
+*  Arguments
+*  =========
+*
+*  JOBZ    (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only;
+*          = 'V':  Compute eigenvalues and eigenvectors.
+*
+*  RANGE   (input) CHARACTER*1
+*          = 'A': all eigenvalues will be found;
+*          = 'V': all eigenvalues in the half-open interval (VL,VU]
+*                 will be found;
+*          = 'I': the IL-th through IU-th eigenvalues will be found.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  KD      (input) INTEGER
+*          The number of superdiagonals of the matrix A if UPLO = 'U',
+*          or the number of subdiagonals if UPLO = 'L'.  KD >= 0.
+*
+*  AB      (input/output) COMPLEX array, dimension (LDAB, N)
+*          On entry, the upper or lower triangle of the Hermitian band
+*          matrix A, stored in the first KD+1 rows of the array.  The
+*          j-th column of A is stored in the j-th column of the array AB
+*          as follows:
+*          if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j;
+*          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+kd).
+*
+*          On exit, AB is overwritten by values generated during the
+*          reduction to tridiagonal form.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KD + 1.
+*
+*  Q       (output) COMPLEX array, dimension (LDQ, N)
+*          If JOBZ = 'V', the N-by-N unitary matrix used in the
+*                          reduction to tridiagonal form.
+*          If JOBZ = 'N', the array Q is not referenced.
+*
+*  LDQ     (input) INTEGER
+*          The leading dimension of the array Q.  If JOBZ = 'V', then
+*          LDQ >= max(1,N).
+*
+*  VL      (input) REAL
+*  VU      (input) REAL
+*          If RANGE='V', the lower and upper bounds of the interval to
+*          be searched for eigenvalues. VL < VU.
+*          Not referenced if RANGE = 'A' or 'I'.
+*
+*  IL      (input) INTEGER
+*  IU      (input) INTEGER
+*          If RANGE='I', the indices (in ascending order) of the
+*          smallest and largest eigenvalues to be returned.
+*          1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0.
+*          Not referenced if RANGE = 'A' or 'V'.
+*
+*  ABSTOL  (input) REAL
+*          The absolute error tolerance for the eigenvalues.
+*          An approximate eigenvalue is accepted as converged
+*          when it is determined to lie in an interval [a,b]
+*          of width less than or equal to
+*
+*                  ABSTOL + EPS *   max( |a|,|b| ) ,
+*
+*          where EPS is the machine precision.  If ABSTOL is less than
+*          or equal to zero, then  EPS*|T|  will be used in its place,
+*          where |T| is the 1-norm of the tridiagonal matrix obtained
+*          by reducing AB to tridiagonal form.
+*
+*          Eigenvalues will be computed most accurately when ABSTOL is
+*          set to twice the underflow threshold 2*SLAMCH('S'), not zero.
+*          If this routine returns with INFO>0, indicating that some
+*          eigenvectors did not converge, try setting ABSTOL to
+*          2*SLAMCH('S').
+*
+*          See "Computing Small Singular Values of Bidiagonal Matrices
+*          with Guaranteed High Relative Accuracy," by Demmel and
+*          Kahan, LAPACK Working Note #3.
+*
+*  M       (output) INTEGER
+*          The total number of eigenvalues found.  0 <= M <= N.
+*          If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1.
+*
+*  W       (output) REAL array, dimension (N)
+*          The first M elements contain the selected eigenvalues in
+*          ascending order.
+*
+*  Z       (output) COMPLEX array, dimension (LDZ, max(1,M))
+*          If JOBZ = 'V', then if INFO = 0, the first M columns of Z
+*          contain the orthonormal eigenvectors of the matrix A
+*          corresponding to the selected eigenvalues, with the i-th
+*          column of Z holding the eigenvector associated with W(i).
+*          If an eigenvector fails to converge, then that column of Z
+*          contains the latest approximation to the eigenvector, and the
+*          index of the eigenvector is returned in IFAIL.
+*          If JOBZ = 'N', then Z is not referenced.
+*          Note: the user must ensure that at least max(1,M) columns are
+*          supplied in the array Z; if RANGE = 'V', the exact value of M
+*          is not known in advance and an upper bound must be used.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= 1, and if
+*          JOBZ = 'V', LDZ >= max(1,N).
+*
+*  WORK    (workspace) COMPLEX array, dimension (N)
+*
+*  RWORK   (workspace) REAL array, dimension (7*N)
+*
+*  IWORK   (workspace) INTEGER array, dimension (5*N)
+*
+*  IFAIL   (output) INTEGER array, dimension (N)
+*          If JOBZ = 'V', then if INFO = 0, the first M elements of
+*          IFAIL are zero.  If INFO > 0, then IFAIL contains the
+*          indices of the eigenvectors that failed to converge.
+*          If JOBZ = 'N', then IFAIL is not referenced.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, then i eigenvectors failed to converge.
+*                Their indices are stored in array IFAIL.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E0, ONE = 1.0E0 )
+      COMPLEX            CZERO, CONE
+      PARAMETER          ( CZERO = ( 0.0E0, 0.0E0 ),
+     $                   CONE = ( 1.0E0, 0.0E0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            ALLEIG, INDEIG, LOWER, VALEIG, WANTZ
+      CHARACTER          ORDER
+      INTEGER            I, IINFO, IMAX, INDD, INDE, INDEE, INDIBL,
+     $                   INDISP, INDIWK, INDRWK, INDWRK, ISCALE, ITMP1,
+     $                   J, JJ, NSPLIT
+      REAL               ABSTLL, ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN,
+     $                   SIGMA, SMLNUM, TMP1, VLL, VUU
+      COMPLEX            CTMP1
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               CLANHB, SLAMCH
+      EXTERNAL           LSAME, CLANHB, SLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CCOPY, CGEMV, CHBTRD, CLACPY, CLASCL, CSTEIN,
+     $                   CSTEQR, CSWAP, SCOPY, SSCAL, SSTEBZ, SSTERF,
+     $                   XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN, REAL, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      WANTZ = LSAME( JOBZ, 'V' )
+      ALLEIG = LSAME( RANGE, 'A' )
+      VALEIG = LSAME( RANGE, 'V' )
+      INDEIG = LSAME( RANGE, 'I' )
+      LOWER = LSAME( UPLO, 'L' )
+*
+      INFO = 0
+      IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( KD.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( LDAB.LT.KD+1 ) THEN
+         INFO = -7
+      ELSE IF( WANTZ .AND. LDQ.LT.MAX( 1, N ) ) THEN
+         INFO = -9
+      ELSE
+         IF( VALEIG ) THEN
+            IF( N.GT.0 .AND. VU.LE.VL )
+     $         INFO = -11
+         ELSE IF( INDEIG ) THEN
+            IF( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) THEN
+               INFO = -12
+            ELSE IF( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN
+               INFO = -13
+            END IF
+         END IF
+      END IF
+      IF( INFO.EQ.0 ) THEN
+         IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) )
+     $     INFO = -18
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CHBEVX', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      M = 0
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      IF( N.EQ.1 ) THEN
+         M = 1
+         IF( LOWER ) THEN
+            CTMP1 = AB( 1, 1 )
+         ELSE
+            CTMP1 = AB( KD+1, 1 )
+         END IF
+         TMP1 = REAL( CTMP1 )
+         IF( VALEIG ) THEN
+            IF( .NOT.( VL.LT.TMP1 .AND. VU.GE.TMP1 ) )
+     $         M = 0
+         END IF
+         IF( M.EQ.1 ) THEN
+            W( 1 ) = CTMP1
+            IF( WANTZ )
+     $         Z( 1, 1 ) = CONE
+         END IF
+         RETURN
+      END IF
+*
+*     Get machine constants.
+*
+      SAFMIN = SLAMCH( 'Safe minimum' )
+      EPS = SLAMCH( 'Precision' )
+      SMLNUM = SAFMIN / EPS
+      BIGNUM = ONE / SMLNUM
+      RMIN = SQRT( SMLNUM )
+      RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) )
+*
+*     Scale matrix to allowable range, if necessary.
+*
+      ISCALE = 0
+      ABSTLL = ABSTOL
+      IF ( VALEIG ) THEN
+         VLL = VL
+         VUU = VU
+      ELSE
+         VLL = ZERO
+         VUU = ZERO
+      ENDIF
+      ANRM = CLANHB( 'M', UPLO, N, KD, AB, LDAB, RWORK )
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN
+         ISCALE = 1
+         SIGMA = RMIN / ANRM
+      ELSE IF( ANRM.GT.RMAX ) THEN
+         ISCALE = 1
+         SIGMA = RMAX / ANRM
+      END IF
+      IF( ISCALE.EQ.1 ) THEN
+         IF( LOWER ) THEN
+            CALL CLASCL( 'B', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO )
+         ELSE
+            CALL CLASCL( 'Q', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO )
+         END IF
+         IF( ABSTOL.GT.0 )
+     $      ABSTLL = ABSTOL*SIGMA
+         IF( VALEIG ) THEN
+            VLL = VL*SIGMA
+            VUU = VU*SIGMA
+         END IF
+      END IF
+*
+*     Call CHBTRD to reduce Hermitian band matrix to tridiagonal form.
+*
+      INDD = 1
+      INDE = INDD + N
+      INDRWK = INDE + N
+      INDWRK = 1
+      CALL CHBTRD( JOBZ, UPLO, N, KD, AB, LDAB, RWORK( INDD ),
+     $             RWORK( INDE ), Q, LDQ, WORK( INDWRK ), IINFO )
+*
+*     If all eigenvalues are desired and ABSTOL is less than or equal
+*     to zero, then call SSTERF or CSTEQR.  If this fails for some
+*     eigenvalue, then try SSTEBZ.
+*
+      IF( ( ALLEIG .OR. ( INDEIG .AND. IL.EQ.1 .AND. IU.EQ.N ) ) .AND.
+     $    ( ABSTOL.LE.ZERO ) ) THEN
+         CALL SCOPY( N, RWORK( INDD ), 1, W, 1 )
+         INDEE = INDRWK + 2*N
+         IF( .NOT.WANTZ ) THEN
+            CALL SCOPY( N-1, RWORK( INDE ), 1, RWORK( INDEE ), 1 )
+            CALL SSTERF( N, W, RWORK( INDEE ), INFO )
+         ELSE
+            CALL CLACPY( 'A', N, N, Q, LDQ, Z, LDZ )
+            CALL SCOPY( N-1, RWORK( INDE ), 1, RWORK( INDEE ), 1 )
+            CALL CSTEQR( JOBZ, N, W, RWORK( INDEE ), Z, LDZ,
+     $                   RWORK( INDRWK ), INFO )
+            IF( INFO.EQ.0 ) THEN
+               DO 10 I = 1, N
+                  IFAIL( I ) = 0
+   10          CONTINUE
+            END IF
+         END IF
+         IF( INFO.EQ.0 ) THEN
+            M = N
+            GO TO 30
+         END IF
+         INFO = 0
+      END IF
+*
+*     Otherwise, call SSTEBZ and, if eigenvectors are desired, CSTEIN.
+*
+      IF( WANTZ ) THEN
+         ORDER = 'B'
+      ELSE
+         ORDER = 'E'
+      END IF
+      INDIBL = 1
+      INDISP = INDIBL + N
+      INDIWK = INDISP + N
+      CALL SSTEBZ( RANGE, ORDER, N, VLL, VUU, IL, IU, ABSTLL,
+     $             RWORK( INDD ), RWORK( INDE ), M, NSPLIT, W,
+     $             IWORK( INDIBL ), IWORK( INDISP ), RWORK( INDRWK ),
+     $             IWORK( INDIWK ), INFO )
+*
+      IF( WANTZ ) THEN
+         CALL CSTEIN( N, RWORK( INDD ), RWORK( INDE ), M, W,
+     $                IWORK( INDIBL ), IWORK( INDISP ), Z, LDZ,
+     $                RWORK( INDRWK ), IWORK( INDIWK ), IFAIL, INFO )
+*
+*        Apply unitary matrix used in reduction to tridiagonal
+*        form to eigenvectors returned by CSTEIN.
+*
+         DO 20 J = 1, M
+            CALL CCOPY( N, Z( 1, J ), 1, WORK( 1 ), 1 )
+            CALL CGEMV( 'N', N, N, CONE, Q, LDQ, WORK, 1, CZERO,
+     $                  Z( 1, J ), 1 )
+   20    CONTINUE
+      END IF
+*
+*     If matrix was scaled, then rescale eigenvalues appropriately.
+*
+   30 CONTINUE
+      IF( ISCALE.EQ.1 ) THEN
+         IF( INFO.EQ.0 ) THEN
+            IMAX = M
+         ELSE
+            IMAX = INFO - 1
+         END IF
+         CALL SSCAL( IMAX, ONE / SIGMA, W, 1 )
+      END IF
+*
+*     If eigenvalues are not in order, then sort them, along with
+*     eigenvectors.
+*
+      IF( WANTZ ) THEN
+         DO 50 J = 1, M - 1
+            I = 0
+            TMP1 = W( J )
+            DO 40 JJ = J + 1, M
+               IF( W( JJ ).LT.TMP1 ) THEN
+                  I = JJ
+                  TMP1 = W( JJ )
+               END IF
+   40       CONTINUE
+*
+            IF( I.NE.0 ) THEN
+               ITMP1 = IWORK( INDIBL+I-1 )
+               W( I ) = W( J )
+               IWORK( INDIBL+I-1 ) = IWORK( INDIBL+J-1 )
+               W( J ) = TMP1
+               IWORK( INDIBL+J-1 ) = ITMP1
+               CALL CSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 )
+               IF( INFO.NE.0 ) THEN
+                  ITMP1 = IFAIL( I )
+                  IFAIL( I ) = IFAIL( J )
+                  IFAIL( J ) = ITMP1
+               END IF
+            END IF
+   50    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of CHBEVX
+*
+      END
diff --git a/costa/native/external/lapack/chbgst.f b/costa/native/external/lapack/chbgst.f
new file mode 100644
index 000000000..3bec2ad70
--- /dev/null
+++ b/costa/native/external/lapack/chbgst.f
@@ -0,0 +1,1377 @@
+      SUBROUTINE CHBGST( VECT, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, X,
+     $                   LDX, WORK, RWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO, VECT
+      INTEGER            INFO, KA, KB, LDAB, LDBB, LDX, N
+*     ..
+*     .. Array Arguments ..
+      REAL               RWORK( * )
+      COMPLEX            AB( LDAB, * ), BB( LDBB, * ), WORK( * ),
+     $                   X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CHBGST reduces a complex Hermitian-definite banded generalized
+*  eigenproblem  A*x = lambda*B*x  to standard form  C*y = lambda*y,
+*  such that C has the same bandwidth as A.
+*
+*  B must have been previously factorized as S**H*S by CPBSTF, using a
+*  split Cholesky factorization. A is overwritten by C = X**H*A*X, where
+*  X = S**(-1)*Q and Q is a unitary matrix chosen to preserve the
+*  bandwidth of A.
+*
+*  Arguments
+*  =========
+*
+*  VECT    (input) CHARACTER*1
+*          = 'N':  do not form the transformation matrix X;
+*          = 'V':  form X.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrices A and B.  N >= 0.
+*
+*  KA      (input) INTEGER
+*          The number of superdiagonals of the matrix A if UPLO = 'U',
+*          or the number of subdiagonals if UPLO = 'L'.  KA >= 0.
+*
+*  KB      (input) INTEGER
+*          The number of superdiagonals of the matrix B if UPLO = 'U',
+*          or the number of subdiagonals if UPLO = 'L'.  KA >= KB >= 0.
+*
+*  AB      (input/output) COMPLEX array, dimension (LDAB,N)
+*          On entry, the upper or lower triangle of the Hermitian band
+*          matrix A, stored in the first ka+1 rows of the array.  The
+*          j-th column of A is stored in the j-th column of the array AB
+*          as follows:
+*          if UPLO = 'U', AB(ka+1+i-j,j) = A(i,j) for max(1,j-ka)<=i<=j;
+*          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+ka).
+*
+*          On exit, the transformed matrix X**H*A*X, stored in the same
+*          format as A.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KA+1.
+*
+*  BB      (input) COMPLEX array, dimension (LDBB,N)
+*          The banded factor S from the split Cholesky factorization of
+*          B, as returned by CPBSTF, stored in the first kb+1 rows of
+*          the array.
+*
+*  LDBB    (input) INTEGER
+*          The leading dimension of the array BB.  LDBB >= KB+1.
+*
+*  X       (output) COMPLEX array, dimension (LDX,N)
+*          If VECT = 'V', the n-by-n matrix X.
+*          If VECT = 'N', the array X is not referenced.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.
+*          LDX >= max(1,N) if VECT = 'V'; LDX >= 1 otherwise.
+*
+*  WORK    (workspace) COMPLEX array, dimension (N)
+*
+*  RWORK   (workspace) REAL array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX            CZERO, CONE
+      REAL               ONE
+      PARAMETER          ( CZERO = ( 0.0E+0, 0.0E+0 ),
+     $                   CONE = ( 1.0E+0, 0.0E+0 ), ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPDATE, UPPER, WANTX
+      INTEGER            I, I0, I1, I2, INCA, J, J1, J1T, J2, J2T, K,
+     $                   KA1, KB1, KBT, L, M, NR, NRT, NX
+      REAL               BII
+      COMPLEX            RA, RA1, T
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CGERC, CGERU, CLACGV, CLAR2V, CLARGV, CLARTG,
+     $                   CLARTV, CLASET, CROT, CSSCAL, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          CONJG, MAX, MIN, REAL
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters
+*
+      WANTX = LSAME( VECT, 'V' )
+      UPPER = LSAME( UPLO, 'U' )
+      KA1 = KA + 1
+      KB1 = KB + 1
+      INFO = 0
+      IF( .NOT.WANTX .AND. .NOT.LSAME( VECT, 'N' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( KA.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( KB.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( LDAB.LT.KA+1 ) THEN
+         INFO = -7
+      ELSE IF( LDBB.LT.KB+1 ) THEN
+         INFO = -9
+      ELSE IF( LDX.LT.1 .OR. WANTX .AND. LDX.LT.MAX( 1, N ) ) THEN
+         INFO = -11
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CHBGST', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      INCA = LDAB*KA1
+*
+*     Initialize X to the unit matrix, if needed
+*
+      IF( WANTX )
+     $   CALL CLASET( 'Full', N, N, CZERO, CONE, X, LDX )
+*
+*     Set M to the splitting point m. It must be the same value as is
+*     used in CPBSTF. The chosen value allows the arrays WORK and RWORK
+*     to be of dimension (N).
+*
+      M = ( N+KB ) / 2
+*
+*     The routine works in two phases, corresponding to the two halves
+*     of the split Cholesky factorization of B as S**H*S where
+*
+*     S = ( U    )
+*         ( M  L )
+*
+*     with U upper triangular of order m, and L lower triangular of
+*     order n-m. S has the same bandwidth as B.
+*
+*     S is treated as a product of elementary matrices:
+*
+*     S = S(m)*S(m-1)*...*S(2)*S(1)*S(m+1)*S(m+2)*...*S(n-1)*S(n)
+*
+*     where S(i) is determined by the i-th row of S.
+*
+*     In phase 1, the index i takes the values n, n-1, ... , m+1;
+*     in phase 2, it takes the values 1, 2, ... , m.
+*
+*     For each value of i, the current matrix A is updated by forming
+*     inv(S(i))**H*A*inv(S(i)). This creates a triangular bulge outside
+*     the band of A. The bulge is then pushed down toward the bottom of
+*     A in phase 1, and up toward the top of A in phase 2, by applying
+*     plane rotations.
+*
+*     There are kb*(kb+1)/2 elements in the bulge, but at most 2*kb-1
+*     of them are linearly independent, so annihilating a bulge requires
+*     only 2*kb-1 plane rotations. The rotations are divided into a 1st
+*     set of kb-1 rotations, and a 2nd set of kb rotations.
+*
+*     Wherever possible, rotations are generated and applied in vector
+*     operations of length NR between the indices J1 and J2 (sometimes
+*     replaced by modified values NRT, J1T or J2T).
+*
+*     The real cosines and complex sines of the rotations are stored in
+*     the arrays RWORK and WORK, those of the 1st set in elements
+*     2:m-kb-1, and those of the 2nd set in elements m-kb+1:n.
+*
+*     The bulges are not formed explicitly; nonzero elements outside the
+*     band are created only when they are required for generating new
+*     rotations; they are stored in the array WORK, in positions where
+*     they are later overwritten by the sines of the rotations which
+*     annihilate them.
+*
+*     **************************** Phase 1 *****************************
+*
+*     The logical structure of this phase is:
+*
+*     UPDATE = .TRUE.
+*     DO I = N, M + 1, -1
+*        use S(i) to update A and create a new bulge
+*        apply rotations to push all bulges KA positions downward
+*     END DO
+*     UPDATE = .FALSE.
+*     DO I = M + KA + 1, N - 1
+*        apply rotations to push all bulges KA positions downward
+*     END DO
+*
+*     To avoid duplicating code, the two loops are merged.
+*
+      UPDATE = .TRUE.
+      I = N + 1
+   10 CONTINUE
+      IF( UPDATE ) THEN
+         I = I - 1
+         KBT = MIN( KB, I-1 )
+         I0 = I - 1
+         I1 = MIN( N, I+KA )
+         I2 = I - KBT + KA1
+         IF( I.LT.M+1 ) THEN
+            UPDATE = .FALSE.
+            I = I + 1
+            I0 = M
+            IF( KA.EQ.0 )
+     $         GO TO 480
+            GO TO 10
+         END IF
+      ELSE
+         I = I + KA
+         IF( I.GT.N-1 )
+     $      GO TO 480
+      END IF
+*
+      IF( UPPER ) THEN
+*
+*        Transform A, working with the upper triangle
+*
+         IF( UPDATE ) THEN
+*
+*           Form  inv(S(i))**H * A * inv(S(i))
+*
+            BII = REAL( BB( KB1, I ) )
+            AB( KA1, I ) = ( REAL( AB( KA1, I ) ) / BII ) / BII
+            DO 20 J = I + 1, I1
+               AB( I-J+KA1, J ) = AB( I-J+KA1, J ) / BII
+   20       CONTINUE
+            DO 30 J = MAX( 1, I-KA ), I - 1
+               AB( J-I+KA1, I ) = AB( J-I+KA1, I ) / BII
+   30       CONTINUE
+            DO 60 K = I - KBT, I - 1
+               DO 40 J = I - KBT, K
+                  AB( J-K+KA1, K ) = AB( J-K+KA1, K ) -
+     $                               BB( J-I+KB1, I )*
+     $                               CONJG( AB( K-I+KA1, I ) ) -
+     $                               CONJG( BB( K-I+KB1, I ) )*
+     $                               AB( J-I+KA1, I ) +
+     $                               REAL( AB( KA1, I ) )*
+     $                               BB( J-I+KB1, I )*
+     $                               CONJG( BB( K-I+KB1, I ) )
+   40          CONTINUE
+               DO 50 J = MAX( 1, I-KA ), I - KBT - 1
+                  AB( J-K+KA1, K ) = AB( J-K+KA1, K ) -
+     $                               CONJG( BB( K-I+KB1, I ) )*
+     $                               AB( J-I+KA1, I )
+   50          CONTINUE
+   60       CONTINUE
+            DO 80 J = I, I1
+               DO 70 K = MAX( J-KA, I-KBT ), I - 1
+                  AB( K-J+KA1, J ) = AB( K-J+KA1, J ) -
+     $                               BB( K-I+KB1, I )*AB( I-J+KA1, J )
+   70          CONTINUE
+   80       CONTINUE
+*
+            IF( WANTX ) THEN
+*
+*              post-multiply X by inv(S(i))
+*
+               CALL CSSCAL( N-M, ONE / BII, X( M+1, I ), 1 )
+               IF( KBT.GT.0 )
+     $            CALL CGERC( N-M, KBT, -CONE, X( M+1, I ), 1,
+     $                        BB( KB1-KBT, I ), 1, X( M+1, I-KBT ),
+     $                        LDX )
+            END IF
+*
+*           store a(i,i1) in RA1 for use in next loop over K
+*
+            RA1 = AB( I-I1+KA1, I1 )
+         END IF
+*
+*        Generate and apply vectors of rotations to chase all the
+*        existing bulges KA positions down toward the bottom of the
+*        band
+*
+         DO 130 K = 1, KB - 1
+            IF( UPDATE ) THEN
+*
+*              Determine the rotations which would annihilate the bulge
+*              which has in theory just been created
+*
+               IF( I-K+KA.LT.N .AND. I-K.GT.1 ) THEN
+*
+*                 generate rotation to annihilate a(i,i-k+ka+1)
+*
+                  CALL CLARTG( AB( K+1, I-K+KA ), RA1,
+     $                         RWORK( I-K+KA-M ), WORK( I-K+KA-M ), RA )
+*
+*                 create nonzero element a(i-k,i-k+ka+1) outside the
+*                 band and store it in WORK(i-k)
+*
+                  T = -BB( KB1-K, I )*RA1
+                  WORK( I-K ) = RWORK( I-K+KA-M )*T -
+     $                          CONJG( WORK( I-K+KA-M ) )*
+     $                          AB( 1, I-K+KA )
+                  AB( 1, I-K+KA ) = WORK( I-K+KA-M )*T +
+     $                              RWORK( I-K+KA-M )*AB( 1, I-K+KA )
+                  RA1 = RA
+               END IF
+            END IF
+            J2 = I - K - 1 + MAX( 1, K-I0+2 )*KA1
+            NR = ( N-J2+KA ) / KA1
+            J1 = J2 + ( NR-1 )*KA1
+            IF( UPDATE ) THEN
+               J2T = MAX( J2, I+2*KA-K+1 )
+            ELSE
+               J2T = J2
+            END IF
+            NRT = ( N-J2T+KA ) / KA1
+            DO 90 J = J2T, J1, KA1
+*
+*              create nonzero element a(j-ka,j+1) outside the band
+*              and store it in WORK(j-m)
+*
+               WORK( J-M ) = WORK( J-M )*AB( 1, J+1 )
+               AB( 1, J+1 ) = RWORK( J-M )*AB( 1, J+1 )
+   90       CONTINUE
+*
+*           generate rotations in 1st set to annihilate elements which
+*           have been created outside the band
+*
+            IF( NRT.GT.0 )
+     $         CALL CLARGV( NRT, AB( 1, J2T ), INCA, WORK( J2T-M ), KA1,
+     $                      RWORK( J2T-M ), KA1 )
+            IF( NR.GT.0 ) THEN
+*
+*              apply rotations in 1st set from the right
+*
+               DO 100 L = 1, KA - 1
+                  CALL CLARTV( NR, AB( KA1-L, J2 ), INCA,
+     $                         AB( KA-L, J2+1 ), INCA, RWORK( J2-M ),
+     $                         WORK( J2-M ), KA1 )
+  100          CONTINUE
+*
+*              apply rotations in 1st set from both sides to diagonal
+*              blocks
+*
+               CALL CLAR2V( NR, AB( KA1, J2 ), AB( KA1, J2+1 ),
+     $                      AB( KA, J2+1 ), INCA, RWORK( J2-M ),
+     $                      WORK( J2-M ), KA1 )
+*
+               CALL CLACGV( NR, WORK( J2-M ), KA1 )
+            END IF
+*
+*           start applying rotations in 1st set from the left
+*
+            DO 110 L = KA - 1, KB - K + 1, -1
+               NRT = ( N-J2+L ) / KA1
+               IF( NRT.GT.0 )
+     $            CALL CLARTV( NRT, AB( L, J2+KA1-L ), INCA,
+     $                         AB( L+1, J2+KA1-L ), INCA, RWORK( J2-M ),
+     $                         WORK( J2-M ), KA1 )
+  110       CONTINUE
+*
+            IF( WANTX ) THEN
+*
+*              post-multiply X by product of rotations in 1st set
+*
+               DO 120 J = J2, J1, KA1
+                  CALL CROT( N-M, X( M+1, J ), 1, X( M+1, J+1 ), 1,
+     $                       RWORK( J-M ), CONJG( WORK( J-M ) ) )
+  120          CONTINUE
+            END IF
+  130    CONTINUE
+*
+         IF( UPDATE ) THEN
+            IF( I2.LE.N .AND. KBT.GT.0 ) THEN
+*
+*              create nonzero element a(i-kbt,i-kbt+ka+1) outside the
+*              band and store it in WORK(i-kbt)
+*
+               WORK( I-KBT ) = -BB( KB1-KBT, I )*RA1
+            END IF
+         END IF
+*
+         DO 170 K = KB, 1, -1
+            IF( UPDATE ) THEN
+               J2 = I - K - 1 + MAX( 2, K-I0+1 )*KA1
+            ELSE
+               J2 = I - K - 1 + MAX( 1, K-I0+1 )*KA1
+            END IF
+*
+*           finish applying rotations in 2nd set from the left
+*
+            DO 140 L = KB - K, 1, -1
+               NRT = ( N-J2+KA+L ) / KA1
+               IF( NRT.GT.0 )
+     $            CALL CLARTV( NRT, AB( L, J2-L+1 ), INCA,
+     $                         AB( L+1, J2-L+1 ), INCA, RWORK( J2-KA ),
+     $                         WORK( J2-KA ), KA1 )
+  140       CONTINUE
+            NR = ( N-J2+KA ) / KA1
+            J1 = J2 + ( NR-1 )*KA1
+            DO 150 J = J1, J2, -KA1
+               WORK( J ) = WORK( J-KA )
+               RWORK( J ) = RWORK( J-KA )
+  150       CONTINUE
+            DO 160 J = J2, J1, KA1
+*
+*              create nonzero element a(j-ka,j+1) outside the band
+*              and store it in WORK(j)
+*
+               WORK( J ) = WORK( J )*AB( 1, J+1 )
+               AB( 1, J+1 ) = RWORK( J )*AB( 1, J+1 )
+  160       CONTINUE
+            IF( UPDATE ) THEN
+               IF( I-K.LT.N-KA .AND. K.LE.KBT )
+     $            WORK( I-K+KA ) = WORK( I-K )
+            END IF
+  170    CONTINUE
+*
+         DO 210 K = KB, 1, -1
+            J2 = I - K - 1 + MAX( 1, K-I0+1 )*KA1
+            NR = ( N-J2+KA ) / KA1
+            J1 = J2 + ( NR-1 )*KA1
+            IF( NR.GT.0 ) THEN
+*
+*              generate rotations in 2nd set to annihilate elements
+*              which have been created outside the band
+*
+               CALL CLARGV( NR, AB( 1, J2 ), INCA, WORK( J2 ), KA1,
+     $                      RWORK( J2 ), KA1 )
+*
+*              apply rotations in 2nd set from the right
+*
+               DO 180 L = 1, KA - 1
+                  CALL CLARTV( NR, AB( KA1-L, J2 ), INCA,
+     $                         AB( KA-L, J2+1 ), INCA, RWORK( J2 ),
+     $                         WORK( J2 ), KA1 )
+  180          CONTINUE
+*
+*              apply rotations in 2nd set from both sides to diagonal
+*              blocks
+*
+               CALL CLAR2V( NR, AB( KA1, J2 ), AB( KA1, J2+1 ),
+     $                      AB( KA, J2+1 ), INCA, RWORK( J2 ),
+     $                      WORK( J2 ), KA1 )
+*
+               CALL CLACGV( NR, WORK( J2 ), KA1 )
+            END IF
+*
+*           start applying rotations in 2nd set from the left
+*
+            DO 190 L = KA - 1, KB - K + 1, -1
+               NRT = ( N-J2+L ) / KA1
+               IF( NRT.GT.0 )
+     $            CALL CLARTV( NRT, AB( L, J2+KA1-L ), INCA,
+     $                         AB( L+1, J2+KA1-L ), INCA, RWORK( J2 ),
+     $                         WORK( J2 ), KA1 )
+  190       CONTINUE
+*
+            IF( WANTX ) THEN
+*
+*              post-multiply X by product of rotations in 2nd set
+*
+               DO 200 J = J2, J1, KA1
+                  CALL CROT( N-M, X( M+1, J ), 1, X( M+1, J+1 ), 1,
+     $                       RWORK( J ), CONJG( WORK( J ) ) )
+  200          CONTINUE
+            END IF
+  210    CONTINUE
+*
+         DO 230 K = 1, KB - 1
+            J2 = I - K - 1 + MAX( 1, K-I0+2 )*KA1
+*
+*           finish applying rotations in 1st set from the left
+*
+            DO 220 L = KB - K, 1, -1
+               NRT = ( N-J2+L ) / KA1
+               IF( NRT.GT.0 )
+     $            CALL CLARTV( NRT, AB( L, J2+KA1-L ), INCA,
+     $                         AB( L+1, J2+KA1-L ), INCA, RWORK( J2-M ),
+     $                         WORK( J2-M ), KA1 )
+  220       CONTINUE
+  230    CONTINUE
+*
+         IF( KB.GT.1 ) THEN
+            DO 240 J = N - 1, I2 + KA, -1
+               RWORK( J-M ) = RWORK( J-KA-M )
+               WORK( J-M ) = WORK( J-KA-M )
+  240       CONTINUE
+         END IF
+*
+      ELSE
+*
+*        Transform A, working with the lower triangle
+*
+         IF( UPDATE ) THEN
+*
+*           Form  inv(S(i))**H * A * inv(S(i))
+*
+            BII = REAL( BB( 1, I ) )
+            AB( 1, I ) = ( REAL( AB( 1, I ) ) / BII ) / BII
+            DO 250 J = I + 1, I1
+               AB( J-I+1, I ) = AB( J-I+1, I ) / BII
+  250       CONTINUE
+            DO 260 J = MAX( 1, I-KA ), I - 1
+               AB( I-J+1, J ) = AB( I-J+1, J ) / BII
+  260       CONTINUE
+            DO 290 K = I - KBT, I - 1
+               DO 270 J = I - KBT, K
+                  AB( K-J+1, J ) = AB( K-J+1, J ) -
+     $                             BB( I-J+1, J )*CONJG( AB( I-K+1,
+     $                             K ) ) - CONJG( BB( I-K+1, K ) )*
+     $                             AB( I-J+1, J ) + REAL( AB( 1, I ) )*
+     $                             BB( I-J+1, J )*CONJG( BB( I-K+1,
+     $                             K ) )
+  270          CONTINUE
+               DO 280 J = MAX( 1, I-KA ), I - KBT - 1
+                  AB( K-J+1, J ) = AB( K-J+1, J ) -
+     $                             CONJG( BB( I-K+1, K ) )*
+     $                             AB( I-J+1, J )
+  280          CONTINUE
+  290       CONTINUE
+            DO 310 J = I, I1
+               DO 300 K = MAX( J-KA, I-KBT ), I - 1
+                  AB( J-K+1, K ) = AB( J-K+1, K ) -
+     $                             BB( I-K+1, K )*AB( J-I+1, I )
+  300          CONTINUE
+  310       CONTINUE
+*
+            IF( WANTX ) THEN
+*
+*              post-multiply X by inv(S(i))
+*
+               CALL CSSCAL( N-M, ONE / BII, X( M+1, I ), 1 )
+               IF( KBT.GT.0 )
+     $            CALL CGERU( N-M, KBT, -CONE, X( M+1, I ), 1,
+     $                        BB( KBT+1, I-KBT ), LDBB-1,
+     $                        X( M+1, I-KBT ), LDX )
+            END IF
+*
+*           store a(i1,i) in RA1 for use in next loop over K
+*
+            RA1 = AB( I1-I+1, I )
+         END IF
+*
+*        Generate and apply vectors of rotations to chase all the
+*        existing bulges KA positions down toward the bottom of the
+*        band
+*
+         DO 360 K = 1, KB - 1
+            IF( UPDATE ) THEN
+*
+*              Determine the rotations which would annihilate the bulge
+*              which has in theory just been created
+*
+               IF( I-K+KA.LT.N .AND. I-K.GT.1 ) THEN
+*
+*                 generate rotation to annihilate a(i-k+ka+1,i)
+*
+                  CALL CLARTG( AB( KA1-K, I ), RA1, RWORK( I-K+KA-M ),
+     $                         WORK( I-K+KA-M ), RA )
+*
+*                 create nonzero element a(i-k+ka+1,i-k) outside the
+*                 band and store it in WORK(i-k)
+*
+                  T = -BB( K+1, I-K )*RA1
+                  WORK( I-K ) = RWORK( I-K+KA-M )*T -
+     $                          CONJG( WORK( I-K+KA-M ) )*AB( KA1, I-K )
+                  AB( KA1, I-K ) = WORK( I-K+KA-M )*T +
+     $                             RWORK( I-K+KA-M )*AB( KA1, I-K )
+                  RA1 = RA
+               END IF
+            END IF
+            J2 = I - K - 1 + MAX( 1, K-I0+2 )*KA1
+            NR = ( N-J2+KA ) / KA1
+            J1 = J2 + ( NR-1 )*KA1
+            IF( UPDATE ) THEN
+               J2T = MAX( J2, I+2*KA-K+1 )
+            ELSE
+               J2T = J2
+            END IF
+            NRT = ( N-J2T+KA ) / KA1
+            DO 320 J = J2T, J1, KA1
+*
+*              create nonzero element a(j+1,j-ka) outside the band
+*              and store it in WORK(j-m)
+*
+               WORK( J-M ) = WORK( J-M )*AB( KA1, J-KA+1 )
+               AB( KA1, J-KA+1 ) = RWORK( J-M )*AB( KA1, J-KA+1 )
+  320       CONTINUE
+*
+*           generate rotations in 1st set to annihilate elements which
+*           have been created outside the band
+*
+            IF( NRT.GT.0 )
+     $         CALL CLARGV( NRT, AB( KA1, J2T-KA ), INCA, WORK( J2T-M ),
+     $                      KA1, RWORK( J2T-M ), KA1 )
+            IF( NR.GT.0 ) THEN
+*
+*              apply rotations in 1st set from the left
+*
+               DO 330 L = 1, KA - 1
+                  CALL CLARTV( NR, AB( L+1, J2-L ), INCA,
+     $                         AB( L+2, J2-L ), INCA, RWORK( J2-M ),
+     $                         WORK( J2-M ), KA1 )
+  330          CONTINUE
+*
+*              apply rotations in 1st set from both sides to diagonal
+*              blocks
+*
+               CALL CLAR2V( NR, AB( 1, J2 ), AB( 1, J2+1 ), AB( 2, J2 ),
+     $                      INCA, RWORK( J2-M ), WORK( J2-M ), KA1 )
+*
+               CALL CLACGV( NR, WORK( J2-M ), KA1 )
+            END IF
+*
+*           start applying rotations in 1st set from the right
+*
+            DO 340 L = KA - 1, KB - K + 1, -1
+               NRT = ( N-J2+L ) / KA1
+               IF( NRT.GT.0 )
+     $            CALL CLARTV( NRT, AB( KA1-L+1, J2 ), INCA,
+     $                         AB( KA1-L, J2+1 ), INCA, RWORK( J2-M ),
+     $                         WORK( J2-M ), KA1 )
+  340       CONTINUE
+*
+            IF( WANTX ) THEN
+*
+*              post-multiply X by product of rotations in 1st set
+*
+               DO 350 J = J2, J1, KA1
+                  CALL CROT( N-M, X( M+1, J ), 1, X( M+1, J+1 ), 1,
+     $                       RWORK( J-M ), WORK( J-M ) )
+  350          CONTINUE
+            END IF
+  360    CONTINUE
+*
+         IF( UPDATE ) THEN
+            IF( I2.LE.N .AND. KBT.GT.0 ) THEN
+*
+*              create nonzero element a(i-kbt+ka+1,i-kbt) outside the
+*              band and store it in WORK(i-kbt)
+*
+               WORK( I-KBT ) = -BB( KBT+1, I-KBT )*RA1
+            END IF
+         END IF
+*
+         DO 400 K = KB, 1, -1
+            IF( UPDATE ) THEN
+               J2 = I - K - 1 + MAX( 2, K-I0+1 )*KA1
+            ELSE
+               J2 = I - K - 1 + MAX( 1, K-I0+1 )*KA1
+            END IF
+*
+*           finish applying rotations in 2nd set from the right
+*
+            DO 370 L = KB - K, 1, -1
+               NRT = ( N-J2+KA+L ) / KA1
+               IF( NRT.GT.0 )
+     $            CALL CLARTV( NRT, AB( KA1-L+1, J2-KA ), INCA,
+     $                         AB( KA1-L, J2-KA+1 ), INCA,
+     $                         RWORK( J2-KA ), WORK( J2-KA ), KA1 )
+  370       CONTINUE
+            NR = ( N-J2+KA ) / KA1
+            J1 = J2 + ( NR-1 )*KA1
+            DO 380 J = J1, J2, -KA1
+               WORK( J ) = WORK( J-KA )
+               RWORK( J ) = RWORK( J-KA )
+  380       CONTINUE
+            DO 390 J = J2, J1, KA1
+*
+*              create nonzero element a(j+1,j-ka) outside the band
+*              and store it in WORK(j)
+*
+               WORK( J ) = WORK( J )*AB( KA1, J-KA+1 )
+               AB( KA1, J-KA+1 ) = RWORK( J )*AB( KA1, J-KA+1 )
+  390       CONTINUE
+            IF( UPDATE ) THEN
+               IF( I-K.LT.N-KA .AND. K.LE.KBT )
+     $            WORK( I-K+KA ) = WORK( I-K )
+            END IF
+  400    CONTINUE
+*
+         DO 440 K = KB, 1, -1
+            J2 = I - K - 1 + MAX( 1, K-I0+1 )*KA1
+            NR = ( N-J2+KA ) / KA1
+            J1 = J2 + ( NR-1 )*KA1
+            IF( NR.GT.0 ) THEN
+*
+*              generate rotations in 2nd set to annihilate elements
+*              which have been created outside the band
+*
+               CALL CLARGV( NR, AB( KA1, J2-KA ), INCA, WORK( J2 ), KA1,
+     $                      RWORK( J2 ), KA1 )
+*
+*              apply rotations in 2nd set from the left
+*
+               DO 410 L = 1, KA - 1
+                  CALL CLARTV( NR, AB( L+1, J2-L ), INCA,
+     $                         AB( L+2, J2-L ), INCA, RWORK( J2 ),
+     $                         WORK( J2 ), KA1 )
+  410          CONTINUE
+*
+*              apply rotations in 2nd set from both sides to diagonal
+*              blocks
+*
+               CALL CLAR2V( NR, AB( 1, J2 ), AB( 1, J2+1 ), AB( 2, J2 ),
+     $                      INCA, RWORK( J2 ), WORK( J2 ), KA1 )
+*
+               CALL CLACGV( NR, WORK( J2 ), KA1 )
+            END IF
+*
+*           start applying rotations in 2nd set from the right
+*
+            DO 420 L = KA - 1, KB - K + 1, -1
+               NRT = ( N-J2+L ) / KA1
+               IF( NRT.GT.0 )
+     $            CALL CLARTV( NRT, AB( KA1-L+1, J2 ), INCA,
+     $                         AB( KA1-L, J2+1 ), INCA, RWORK( J2 ),
+     $                         WORK( J2 ), KA1 )
+  420       CONTINUE
+*
+            IF( WANTX ) THEN
+*
+*              post-multiply X by product of rotations in 2nd set
+*
+               DO 430 J = J2, J1, KA1
+                  CALL CROT( N-M, X( M+1, J ), 1, X( M+1, J+1 ), 1,
+     $                       RWORK( J ), WORK( J ) )
+  430          CONTINUE
+            END IF
+  440    CONTINUE
+*
+         DO 460 K = 1, KB - 1
+            J2 = I - K - 1 + MAX( 1, K-I0+2 )*KA1
+*
+*           finish applying rotations in 1st set from the right
+*
+            DO 450 L = KB - K, 1, -1
+               NRT = ( N-J2+L ) / KA1
+               IF( NRT.GT.0 )
+     $            CALL CLARTV( NRT, AB( KA1-L+1, J2 ), INCA,
+     $                         AB( KA1-L, J2+1 ), INCA, RWORK( J2-M ),
+     $                         WORK( J2-M ), KA1 )
+  450       CONTINUE
+  460    CONTINUE
+*
+         IF( KB.GT.1 ) THEN
+            DO 470 J = N - 1, I2 + KA, -1
+               RWORK( J-M ) = RWORK( J-KA-M )
+               WORK( J-M ) = WORK( J-KA-M )
+  470       CONTINUE
+         END IF
+*
+      END IF
+*
+      GO TO 10
+*
+  480 CONTINUE
+*
+*     **************************** Phase 2 *****************************
+*
+*     The logical structure of this phase is:
+*
+*     UPDATE = .TRUE.
+*     DO I = 1, M
+*        use S(i) to update A and create a new bulge
+*        apply rotations to push all bulges KA positions upward
+*     END DO
+*     UPDATE = .FALSE.
+*     DO I = M - KA - 1, 2, -1
+*        apply rotations to push all bulges KA positions upward
+*     END DO
+*
+*     To avoid duplicating code, the two loops are merged.
+*
+      UPDATE = .TRUE.
+      I = 0
+  490 CONTINUE
+      IF( UPDATE ) THEN
+         I = I + 1
+         KBT = MIN( KB, M-I )
+         I0 = I + 1
+         I1 = MAX( 1, I-KA )
+         I2 = I + KBT - KA1
+         IF( I.GT.M ) THEN
+            UPDATE = .FALSE.
+            I = I - 1
+            I0 = M + 1
+            IF( KA.EQ.0 )
+     $         RETURN
+            GO TO 490
+         END IF
+      ELSE
+         I = I - KA
+         IF( I.LT.2 )
+     $      RETURN
+      END IF
+*
+      IF( I.LT.M-KBT ) THEN
+         NX = M
+      ELSE
+         NX = N
+      END IF
+*
+      IF( UPPER ) THEN
+*
+*        Transform A, working with the upper triangle
+*
+         IF( UPDATE ) THEN
+*
+*           Form  inv(S(i))**H * A * inv(S(i))
+*
+            BII = REAL( BB( KB1, I ) )
+            AB( KA1, I ) = ( REAL( AB( KA1, I ) ) / BII ) / BII
+            DO 500 J = I1, I - 1
+               AB( J-I+KA1, I ) = AB( J-I+KA1, I ) / BII
+  500       CONTINUE
+            DO 510 J = I + 1, MIN( N, I+KA )
+               AB( I-J+KA1, J ) = AB( I-J+KA1, J ) / BII
+  510       CONTINUE
+            DO 540 K = I + 1, I + KBT
+               DO 520 J = K, I + KBT
+                  AB( K-J+KA1, J ) = AB( K-J+KA1, J ) -
+     $                               BB( I-J+KB1, J )*
+     $                               CONJG( AB( I-K+KA1, K ) ) -
+     $                               CONJG( BB( I-K+KB1, K ) )*
+     $                               AB( I-J+KA1, J ) +
+     $                               REAL( AB( KA1, I ) )*
+     $                               BB( I-J+KB1, J )*
+     $                               CONJG( BB( I-K+KB1, K ) )
+  520          CONTINUE
+               DO 530 J = I + KBT + 1, MIN( N, I+KA )
+                  AB( K-J+KA1, J ) = AB( K-J+KA1, J ) -
+     $                               CONJG( BB( I-K+KB1, K ) )*
+     $                               AB( I-J+KA1, J )
+  530          CONTINUE
+  540       CONTINUE
+            DO 560 J = I1, I
+               DO 550 K = I + 1, MIN( J+KA, I+KBT )
+                  AB( J-K+KA1, K ) = AB( J-K+KA1, K ) -
+     $                               BB( I-K+KB1, K )*AB( J-I+KA1, I )
+  550          CONTINUE
+  560       CONTINUE
+*
+            IF( WANTX ) THEN
+*
+*              post-multiply X by inv(S(i))
+*
+               CALL CSSCAL( NX, ONE / BII, X( 1, I ), 1 )
+               IF( KBT.GT.0 )
+     $            CALL CGERU( NX, KBT, -CONE, X( 1, I ), 1,
+     $                        BB( KB, I+1 ), LDBB-1, X( 1, I+1 ), LDX )
+            END IF
+*
+*           store a(i1,i) in RA1 for use in next loop over K
+*
+            RA1 = AB( I1-I+KA1, I )
+         END IF
+*
+*        Generate and apply vectors of rotations to chase all the
+*        existing bulges KA positions up toward the top of the band
+*
+         DO 610 K = 1, KB - 1
+            IF( UPDATE ) THEN
+*
+*              Determine the rotations which would annihilate the bulge
+*              which has in theory just been created
+*
+               IF( I+K-KA1.GT.0 .AND. I+K.LT.M ) THEN
+*
+*                 generate rotation to annihilate a(i+k-ka-1,i)
+*
+                  CALL CLARTG( AB( K+1, I ), RA1, RWORK( I+K-KA ),
+     $                         WORK( I+K-KA ), RA )
+*
+*                 create nonzero element a(i+k-ka-1,i+k) outside the
+*                 band and store it in WORK(m-kb+i+k)
+*
+                  T = -BB( KB1-K, I+K )*RA1
+                  WORK( M-KB+I+K ) = RWORK( I+K-KA )*T -
+     $                               CONJG( WORK( I+K-KA ) )*
+     $                               AB( 1, I+K )
+                  AB( 1, I+K ) = WORK( I+K-KA )*T +
+     $                           RWORK( I+K-KA )*AB( 1, I+K )
+                  RA1 = RA
+               END IF
+            END IF
+            J2 = I + K + 1 - MAX( 1, K+I0-M+1 )*KA1
+            NR = ( J2+KA-1 ) / KA1
+            J1 = J2 - ( NR-1 )*KA1
+            IF( UPDATE ) THEN
+               J2T = MIN( J2, I-2*KA+K-1 )
+            ELSE
+               J2T = J2
+            END IF
+            NRT = ( J2T+KA-1 ) / KA1
+            DO 570 J = J1, J2T, KA1
+*
+*              create nonzero element a(j-1,j+ka) outside the band
+*              and store it in WORK(j)
+*
+               WORK( J ) = WORK( J )*AB( 1, J+KA-1 )
+               AB( 1, J+KA-1 ) = RWORK( J )*AB( 1, J+KA-1 )
+  570       CONTINUE
+*
+*           generate rotations in 1st set to annihilate elements which
+*           have been created outside the band
+*
+            IF( NRT.GT.0 )
+     $         CALL CLARGV( NRT, AB( 1, J1+KA ), INCA, WORK( J1 ), KA1,
+     $                      RWORK( J1 ), KA1 )
+            IF( NR.GT.0 ) THEN
+*
+*              apply rotations in 1st set from the left
+*
+               DO 580 L = 1, KA - 1
+                  CALL CLARTV( NR, AB( KA1-L, J1+L ), INCA,
+     $                         AB( KA-L, J1+L ), INCA, RWORK( J1 ),
+     $                         WORK( J1 ), KA1 )
+  580          CONTINUE
+*
+*              apply rotations in 1st set from both sides to diagonal
+*              blocks
+*
+               CALL CLAR2V( NR, AB( KA1, J1 ), AB( KA1, J1-1 ),
+     $                      AB( KA, J1 ), INCA, RWORK( J1 ), WORK( J1 ),
+     $                      KA1 )
+*
+               CALL CLACGV( NR, WORK( J1 ), KA1 )
+            END IF
+*
+*           start applying rotations in 1st set from the right
+*
+            DO 590 L = KA - 1, KB - K + 1, -1
+               NRT = ( J2+L-1 ) / KA1
+               J1T = J2 - ( NRT-1 )*KA1
+               IF( NRT.GT.0 )
+     $            CALL CLARTV( NRT, AB( L, J1T ), INCA,
+     $                         AB( L+1, J1T-1 ), INCA, RWORK( J1T ),
+     $                         WORK( J1T ), KA1 )
+  590       CONTINUE
+*
+            IF( WANTX ) THEN
+*
+*              post-multiply X by product of rotations in 1st set
+*
+               DO 600 J = J1, J2, KA1
+                  CALL CROT( NX, X( 1, J ), 1, X( 1, J-1 ), 1,
+     $                       RWORK( J ), WORK( J ) )
+  600          CONTINUE
+            END IF
+  610    CONTINUE
+*
+         IF( UPDATE ) THEN
+            IF( I2.GT.0 .AND. KBT.GT.0 ) THEN
+*
+*              create nonzero element a(i+kbt-ka-1,i+kbt) outside the
+*              band and store it in WORK(m-kb+i+kbt)
+*
+               WORK( M-KB+I+KBT ) = -BB( KB1-KBT, I+KBT )*RA1
+            END IF
+         END IF
+*
+         DO 650 K = KB, 1, -1
+            IF( UPDATE ) THEN
+               J2 = I + K + 1 - MAX( 2, K+I0-M )*KA1
+            ELSE
+               J2 = I + K + 1 - MAX( 1, K+I0-M )*KA1
+            END IF
+*
+*           finish applying rotations in 2nd set from the right
+*
+            DO 620 L = KB - K, 1, -1
+               NRT = ( J2+KA+L-1 ) / KA1
+               J1T = J2 - ( NRT-1 )*KA1
+               IF( NRT.GT.0 )
+     $            CALL CLARTV( NRT, AB( L, J1T+KA ), INCA,
+     $                         AB( L+1, J1T+KA-1 ), INCA,
+     $                         RWORK( M-KB+J1T+KA ),
+     $                         WORK( M-KB+J1T+KA ), KA1 )
+  620       CONTINUE
+            NR = ( J2+KA-1 ) / KA1
+            J1 = J2 - ( NR-1 )*KA1
+            DO 630 J = J1, J2, KA1
+               WORK( M-KB+J ) = WORK( M-KB+J+KA )
+               RWORK( M-KB+J ) = RWORK( M-KB+J+KA )
+  630       CONTINUE
+            DO 640 J = J1, J2, KA1
+*
+*              create nonzero element a(j-1,j+ka) outside the band
+*              and store it in WORK(m-kb+j)
+*
+               WORK( M-KB+J ) = WORK( M-KB+J )*AB( 1, J+KA-1 )
+               AB( 1, J+KA-1 ) = RWORK( M-KB+J )*AB( 1, J+KA-1 )
+  640       CONTINUE
+            IF( UPDATE ) THEN
+               IF( I+K.GT.KA1 .AND. K.LE.KBT )
+     $            WORK( M-KB+I+K-KA ) = WORK( M-KB+I+K )
+            END IF
+  650    CONTINUE
+*
+         DO 690 K = KB, 1, -1
+            J2 = I + K + 1 - MAX( 1, K+I0-M )*KA1
+            NR = ( J2+KA-1 ) / KA1
+            J1 = J2 - ( NR-1 )*KA1
+            IF( NR.GT.0 ) THEN
+*
+*              generate rotations in 2nd set to annihilate elements
+*              which have been created outside the band
+*
+               CALL CLARGV( NR, AB( 1, J1+KA ), INCA, WORK( M-KB+J1 ),
+     $                      KA1, RWORK( M-KB+J1 ), KA1 )
+*
+*              apply rotations in 2nd set from the left
+*
+               DO 660 L = 1, KA - 1
+                  CALL CLARTV( NR, AB( KA1-L, J1+L ), INCA,
+     $                         AB( KA-L, J1+L ), INCA, RWORK( M-KB+J1 ),
+     $                         WORK( M-KB+J1 ), KA1 )
+  660          CONTINUE
+*
+*              apply rotations in 2nd set from both sides to diagonal
+*              blocks
+*
+               CALL CLAR2V( NR, AB( KA1, J1 ), AB( KA1, J1-1 ),
+     $                      AB( KA, J1 ), INCA, RWORK( M-KB+J1 ),
+     $                      WORK( M-KB+J1 ), KA1 )
+*
+               CALL CLACGV( NR, WORK( M-KB+J1 ), KA1 )
+            END IF
+*
+*           start applying rotations in 2nd set from the right
+*
+            DO 670 L = KA - 1, KB - K + 1, -1
+               NRT = ( J2+L-1 ) / KA1
+               J1T = J2 - ( NRT-1 )*KA1
+               IF( NRT.GT.0 )
+     $            CALL CLARTV( NRT, AB( L, J1T ), INCA,
+     $                         AB( L+1, J1T-1 ), INCA,
+     $                         RWORK( M-KB+J1T ), WORK( M-KB+J1T ),
+     $                         KA1 )
+  670       CONTINUE
+*
+            IF( WANTX ) THEN
+*
+*              post-multiply X by product of rotations in 2nd set
+*
+               DO 680 J = J1, J2, KA1
+                  CALL CROT( NX, X( 1, J ), 1, X( 1, J-1 ), 1,
+     $                       RWORK( M-KB+J ), WORK( M-KB+J ) )
+  680          CONTINUE
+            END IF
+  690    CONTINUE
+*
+         DO 710 K = 1, KB - 1
+            J2 = I + K + 1 - MAX( 1, K+I0-M+1 )*KA1
+*
+*           finish applying rotations in 1st set from the right
+*
+            DO 700 L = KB - K, 1, -1
+               NRT = ( J2+L-1 ) / KA1
+               J1T = J2 - ( NRT-1 )*KA1
+               IF( NRT.GT.0 )
+     $            CALL CLARTV( NRT, AB( L, J1T ), INCA,
+     $                         AB( L+1, J1T-1 ), INCA, RWORK( J1T ),
+     $                         WORK( J1T ), KA1 )
+  700       CONTINUE
+  710    CONTINUE
+*
+         IF( KB.GT.1 ) THEN
+            DO 720 J = 2, I2 - KA
+               RWORK( J ) = RWORK( J+KA )
+               WORK( J ) = WORK( J+KA )
+  720       CONTINUE
+         END IF
+*
+      ELSE
+*
+*        Transform A, working with the lower triangle
+*
+         IF( UPDATE ) THEN
+*
+*           Form  inv(S(i))**H * A * inv(S(i))
+*
+            BII = REAL( BB( 1, I ) )
+            AB( 1, I ) = ( REAL( AB( 1, I ) ) / BII ) / BII
+            DO 730 J = I1, I - 1
+               AB( I-J+1, J ) = AB( I-J+1, J ) / BII
+  730       CONTINUE
+            DO 740 J = I + 1, MIN( N, I+KA )
+               AB( J-I+1, I ) = AB( J-I+1, I ) / BII
+  740       CONTINUE
+            DO 770 K = I + 1, I + KBT
+               DO 750 J = K, I + KBT
+                  AB( J-K+1, K ) = AB( J-K+1, K ) -
+     $                             BB( J-I+1, I )*CONJG( AB( K-I+1,
+     $                             I ) ) - CONJG( BB( K-I+1, I ) )*
+     $                             AB( J-I+1, I ) + REAL( AB( 1, I ) )*
+     $                             BB( J-I+1, I )*CONJG( BB( K-I+1,
+     $                             I ) )
+  750          CONTINUE
+               DO 760 J = I + KBT + 1, MIN( N, I+KA )
+                  AB( J-K+1, K ) = AB( J-K+1, K ) -
+     $                             CONJG( BB( K-I+1, I ) )*
+     $                             AB( J-I+1, I )
+  760          CONTINUE
+  770       CONTINUE
+            DO 790 J = I1, I
+               DO 780 K = I + 1, MIN( J+KA, I+KBT )
+                  AB( K-J+1, J ) = AB( K-J+1, J ) -
+     $                             BB( K-I+1, I )*AB( I-J+1, J )
+  780          CONTINUE
+  790       CONTINUE
+*
+            IF( WANTX ) THEN
+*
+*              post-multiply X by inv(S(i))
+*
+               CALL CSSCAL( NX, ONE / BII, X( 1, I ), 1 )
+               IF( KBT.GT.0 )
+     $            CALL CGERC( NX, KBT, -CONE, X( 1, I ), 1, BB( 2, I ),
+     $                        1, X( 1, I+1 ), LDX )
+            END IF
+*
+*           store a(i,i1) in RA1 for use in next loop over K
+*
+            RA1 = AB( I-I1+1, I1 )
+         END IF
+*
+*        Generate and apply vectors of rotations to chase all the
+*        existing bulges KA positions up toward the top of the band
+*
+         DO 840 K = 1, KB - 1
+            IF( UPDATE ) THEN
+*
+*              Determine the rotations which would annihilate the bulge
+*              which has in theory just been created
+*
+               IF( I+K-KA1.GT.0 .AND. I+K.LT.M ) THEN
+*
+*                 generate rotation to annihilate a(i,i+k-ka-1)
+*
+                  CALL CLARTG( AB( KA1-K, I+K-KA ), RA1,
+     $                         RWORK( I+K-KA ), WORK( I+K-KA ), RA )
+*
+*                 create nonzero element a(i+k,i+k-ka-1) outside the
+*                 band and store it in WORK(m-kb+i+k)
+*
+                  T = -BB( K+1, I )*RA1
+                  WORK( M-KB+I+K ) = RWORK( I+K-KA )*T -
+     $                               CONJG( WORK( I+K-KA ) )*
+     $                               AB( KA1, I+K-KA )
+                  AB( KA1, I+K-KA ) = WORK( I+K-KA )*T +
+     $                                RWORK( I+K-KA )*AB( KA1, I+K-KA )
+                  RA1 = RA
+               END IF
+            END IF
+            J2 = I + K + 1 - MAX( 1, K+I0-M+1 )*KA1
+            NR = ( J2+KA-1 ) / KA1
+            J1 = J2 - ( NR-1 )*KA1
+            IF( UPDATE ) THEN
+               J2T = MIN( J2, I-2*KA+K-1 )
+            ELSE
+               J2T = J2
+            END IF
+            NRT = ( J2T+KA-1 ) / KA1
+            DO 800 J = J1, J2T, KA1
+*
+*              create nonzero element a(j+ka,j-1) outside the band
+*              and store it in WORK(j)
+*
+               WORK( J ) = WORK( J )*AB( KA1, J-1 )
+               AB( KA1, J-1 ) = RWORK( J )*AB( KA1, J-1 )
+  800       CONTINUE
+*
+*           generate rotations in 1st set to annihilate elements which
+*           have been created outside the band
+*
+            IF( NRT.GT.0 )
+     $         CALL CLARGV( NRT, AB( KA1, J1 ), INCA, WORK( J1 ), KA1,
+     $                      RWORK( J1 ), KA1 )
+            IF( NR.GT.0 ) THEN
+*
+*              apply rotations in 1st set from the right
+*
+               DO 810 L = 1, KA - 1
+                  CALL CLARTV( NR, AB( L+1, J1 ), INCA, AB( L+2, J1-1 ),
+     $                         INCA, RWORK( J1 ), WORK( J1 ), KA1 )
+  810          CONTINUE
+*
+*              apply rotations in 1st set from both sides to diagonal
+*              blocks
+*
+               CALL CLAR2V( NR, AB( 1, J1 ), AB( 1, J1-1 ),
+     $                      AB( 2, J1-1 ), INCA, RWORK( J1 ),
+     $                      WORK( J1 ), KA1 )
+*
+               CALL CLACGV( NR, WORK( J1 ), KA1 )
+            END IF
+*
+*           start applying rotations in 1st set from the left
+*
+            DO 820 L = KA - 1, KB - K + 1, -1
+               NRT = ( J2+L-1 ) / KA1
+               J1T = J2 - ( NRT-1 )*KA1
+               IF( NRT.GT.0 )
+     $            CALL CLARTV( NRT, AB( KA1-L+1, J1T-KA1+L ), INCA,
+     $                         AB( KA1-L, J1T-KA1+L ), INCA,
+     $                         RWORK( J1T ), WORK( J1T ), KA1 )
+  820       CONTINUE
+*
+            IF( WANTX ) THEN
+*
+*              post-multiply X by product of rotations in 1st set
+*
+               DO 830 J = J1, J2, KA1
+                  CALL CROT( NX, X( 1, J ), 1, X( 1, J-1 ), 1,
+     $                       RWORK( J ), CONJG( WORK( J ) ) )
+  830          CONTINUE
+            END IF
+  840    CONTINUE
+*
+         IF( UPDATE ) THEN
+            IF( I2.GT.0 .AND. KBT.GT.0 ) THEN
+*
+*              create nonzero element a(i+kbt,i+kbt-ka-1) outside the
+*              band and store it in WORK(m-kb+i+kbt)
+*
+               WORK( M-KB+I+KBT ) = -BB( KBT+1, I )*RA1
+            END IF
+         END IF
+*
+         DO 880 K = KB, 1, -1
+            IF( UPDATE ) THEN
+               J2 = I + K + 1 - MAX( 2, K+I0-M )*KA1
+            ELSE
+               J2 = I + K + 1 - MAX( 1, K+I0-M )*KA1
+            END IF
+*
+*           finish applying rotations in 2nd set from the left
+*
+            DO 850 L = KB - K, 1, -1
+               NRT = ( J2+KA+L-1 ) / KA1
+               J1T = J2 - ( NRT-1 )*KA1
+               IF( NRT.GT.0 )
+     $            CALL CLARTV( NRT, AB( KA1-L+1, J1T+L-1 ), INCA,
+     $                         AB( KA1-L, J1T+L-1 ), INCA,
+     $                         RWORK( M-KB+J1T+KA ),
+     $                         WORK( M-KB+J1T+KA ), KA1 )
+  850       CONTINUE
+            NR = ( J2+KA-1 ) / KA1
+            J1 = J2 - ( NR-1 )*KA1
+            DO 860 J = J1, J2, KA1
+               WORK( M-KB+J ) = WORK( M-KB+J+KA )
+               RWORK( M-KB+J ) = RWORK( M-KB+J+KA )
+  860       CONTINUE
+            DO 870 J = J1, J2, KA1
+*
+*              create nonzero element a(j+ka,j-1) outside the band
+*              and store it in WORK(m-kb+j)
+*
+               WORK( M-KB+J ) = WORK( M-KB+J )*AB( KA1, J-1 )
+               AB( KA1, J-1 ) = RWORK( M-KB+J )*AB( KA1, J-1 )
+  870       CONTINUE
+            IF( UPDATE ) THEN
+               IF( I+K.GT.KA1 .AND. K.LE.KBT )
+     $            WORK( M-KB+I+K-KA ) = WORK( M-KB+I+K )
+            END IF
+  880    CONTINUE
+*
+         DO 920 K = KB, 1, -1
+            J2 = I + K + 1 - MAX( 1, K+I0-M )*KA1
+            NR = ( J2+KA-1 ) / KA1
+            J1 = J2 - ( NR-1 )*KA1
+            IF( NR.GT.0 ) THEN
+*
+*              generate rotations in 2nd set to annihilate elements
+*              which have been created outside the band
+*
+               CALL CLARGV( NR, AB( KA1, J1 ), INCA, WORK( M-KB+J1 ),
+     $                      KA1, RWORK( M-KB+J1 ), KA1 )
+*
+*              apply rotations in 2nd set from the right
+*
+               DO 890 L = 1, KA - 1
+                  CALL CLARTV( NR, AB( L+1, J1 ), INCA, AB( L+2, J1-1 ),
+     $                         INCA, RWORK( M-KB+J1 ), WORK( M-KB+J1 ),
+     $                         KA1 )
+  890          CONTINUE
+*
+*              apply rotations in 2nd set from both sides to diagonal
+*              blocks
+*
+               CALL CLAR2V( NR, AB( 1, J1 ), AB( 1, J1-1 ),
+     $                      AB( 2, J1-1 ), INCA, RWORK( M-KB+J1 ),
+     $                      WORK( M-KB+J1 ), KA1 )
+*
+               CALL CLACGV( NR, WORK( M-KB+J1 ), KA1 )
+            END IF
+*
+*           start applying rotations in 2nd set from the left
+*
+            DO 900 L = KA - 1, KB - K + 1, -1
+               NRT = ( J2+L-1 ) / KA1
+               J1T = J2 - ( NRT-1 )*KA1
+               IF( NRT.GT.0 )
+     $            CALL CLARTV( NRT, AB( KA1-L+1, J1T-KA1+L ), INCA,
+     $                         AB( KA1-L, J1T-KA1+L ), INCA,
+     $                         RWORK( M-KB+J1T ), WORK( M-KB+J1T ),
+     $                         KA1 )
+  900       CONTINUE
+*
+            IF( WANTX ) THEN
+*
+*              post-multiply X by product of rotations in 2nd set
+*
+               DO 910 J = J1, J2, KA1
+                  CALL CROT( NX, X( 1, J ), 1, X( 1, J-1 ), 1,
+     $                       RWORK( M-KB+J ), CONJG( WORK( M-KB+J ) ) )
+  910          CONTINUE
+            END IF
+  920    CONTINUE
+*
+         DO 940 K = 1, KB - 1
+            J2 = I + K + 1 - MAX( 1, K+I0-M+1 )*KA1
+*
+*           finish applying rotations in 1st set from the left
+*
+            DO 930 L = KB - K, 1, -1
+               NRT = ( J2+L-1 ) / KA1
+               J1T = J2 - ( NRT-1 )*KA1
+               IF( NRT.GT.0 )
+     $            CALL CLARTV( NRT, AB( KA1-L+1, J1T-KA1+L ), INCA,
+     $                         AB( KA1-L, J1T-KA1+L ), INCA,
+     $                         RWORK( J1T ), WORK( J1T ), KA1 )
+  930       CONTINUE
+  940    CONTINUE
+*
+         IF( KB.GT.1 ) THEN
+            DO 950 J = 2, I2 - KA
+               RWORK( J ) = RWORK( J+KA )
+               WORK( J ) = WORK( J+KA )
+  950       CONTINUE
+         END IF
+*
+      END IF
+*
+      GO TO 490
+*
+*     End of CHBGST
+*
+      END
diff --git a/costa/native/external/lapack/chbgv.f b/costa/native/external/lapack/chbgv.f
new file mode 100644
index 000000000..4172aa683
--- /dev/null
+++ b/costa/native/external/lapack/chbgv.f
@@ -0,0 +1,192 @@
+      SUBROUTINE CHBGV( JOBZ, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, W, Z,
+     $                  LDZ, WORK, RWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBZ, UPLO
+      INTEGER            INFO, KA, KB, LDAB, LDBB, LDZ, N
+*     ..
+*     .. Array Arguments ..
+      REAL               RWORK( * ), W( * )
+      COMPLEX            AB( LDAB, * ), BB( LDBB, * ), WORK( * ),
+     $                   Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CHBGV computes all the eigenvalues, and optionally, the eigenvectors
+*  of a complex generalized Hermitian-definite banded eigenproblem, of
+*  the form A*x=(lambda)*B*x. Here A and B are assumed to be Hermitian
+*  and banded, and B is also positive definite.
+*
+*  Arguments
+*  =========
+*
+*  JOBZ    (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only;
+*          = 'V':  Compute eigenvalues and eigenvectors.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangles of A and B are stored;
+*          = 'L':  Lower triangles of A and B are stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrices A and B.  N >= 0.
+*
+*  KA      (input) INTEGER
+*          The number of superdiagonals of the matrix A if UPLO = 'U',
+*          or the number of subdiagonals if UPLO = 'L'. KA >= 0.
+*
+*  KB      (input) INTEGER
+*          The number of superdiagonals of the matrix B if UPLO = 'U',
+*          or the number of subdiagonals if UPLO = 'L'. KB >= 0.
+*
+*  AB      (input/output) COMPLEX array, dimension (LDAB, N)
+*          On entry, the upper or lower triangle of the Hermitian band
+*          matrix A, stored in the first ka+1 rows of the array.  The
+*          j-th column of A is stored in the j-th column of the array AB
+*          as follows:
+*          if UPLO = 'U', AB(ka+1+i-j,j) = A(i,j) for max(1,j-ka)<=i<=j;
+*          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+ka).
+*
+*          On exit, the contents of AB are destroyed.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KA+1.
+*
+*  BB      (input/output) COMPLEX array, dimension (LDBB, N)
+*          On entry, the upper or lower triangle of the Hermitian band
+*          matrix B, stored in the first kb+1 rows of the array.  The
+*          j-th column of B is stored in the j-th column of the array BB
+*          as follows:
+*          if UPLO = 'U', BB(kb+1+i-j,j) = B(i,j) for max(1,j-kb)<=i<=j;
+*          if UPLO = 'L', BB(1+i-j,j)    = B(i,j) for j<=i<=min(n,j+kb).
+*
+*          On exit, the factor S from the split Cholesky factorization
+*          B = S**H*S, as returned by CPBSTF.
+*
+*  LDBB    (input) INTEGER
+*          The leading dimension of the array BB.  LDBB >= KB+1.
+*
+*  W       (output) REAL array, dimension (N)
+*          If INFO = 0, the eigenvalues in ascending order.
+*
+*  Z       (output) COMPLEX array, dimension (LDZ, N)
+*          If JOBZ = 'V', then if INFO = 0, Z contains the matrix Z of
+*          eigenvectors, with the i-th column of Z holding the
+*          eigenvector associated with W(i). The eigenvectors are
+*          normalized so that Z**H*B*Z = I.
+*          If JOBZ = 'N', then Z is not referenced.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= 1, and if
+*          JOBZ = 'V', LDZ >= N.
+*
+*  WORK    (workspace) COMPLEX array, dimension (N)
+*
+*  RWORK   (workspace) REAL array, dimension (3*N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, and i is:
+*             <= N:  the algorithm failed to converge:
+*                    i off-diagonal elements of an intermediate
+*                    tridiagonal form did not converge to zero;
+*             > N:   if INFO = N + i, for 1 <= i <= N, then CPBSTF
+*                    returned INFO = i: B is not positive definite.
+*                    The factorization of B could not be completed and
+*                    no eigenvalues or eigenvectors were computed.
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            UPPER, WANTZ
+      CHARACTER          VECT
+      INTEGER            IINFO, INDE, INDWRK
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CHBGST, CHBTRD, CPBSTF, CSTEQR, SSTERF, XERBLA
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      WANTZ = LSAME( JOBZ, 'V' )
+      UPPER = LSAME( UPLO, 'U' )
+*
+      INFO = 0
+      IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( KA.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( KB.LT.0 .OR. KB.GT.KA ) THEN
+         INFO = -5
+      ELSE IF( LDAB.LT.KA+1 ) THEN
+         INFO = -7
+      ELSE IF( LDBB.LT.KB+1 ) THEN
+         INFO = -9
+      ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN
+         INFO = -12
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CHBGV ', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Form a split Cholesky factorization of B.
+*
+      CALL CPBSTF( UPLO, N, KB, BB, LDBB, INFO )
+      IF( INFO.NE.0 ) THEN
+         INFO = N + INFO
+         RETURN
+      END IF
+*
+*     Transform problem to standard eigenvalue problem.
+*
+      INDE = 1
+      INDWRK = INDE + N
+      CALL CHBGST( JOBZ, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, Z, LDZ,
+     $             WORK, RWORK( INDWRK ), IINFO )
+*
+*     Reduce to tridiagonal form.
+*
+      IF( WANTZ ) THEN
+         VECT = 'U'
+      ELSE
+         VECT = 'N'
+      END IF
+      CALL CHBTRD( VECT, UPLO, N, KA, AB, LDAB, W, RWORK( INDE ), Z,
+     $             LDZ, WORK, IINFO )
+*
+*     For eigenvalues only, call SSTERF.  For eigenvectors, call CSTEQR.
+*
+      IF( .NOT.WANTZ ) THEN
+         CALL SSTERF( N, W, RWORK( INDE ), INFO )
+      ELSE
+         CALL CSTEQR( JOBZ, N, W, RWORK( INDE ), Z, LDZ,
+     $                RWORK( INDWRK ), INFO )
+      END IF
+      RETURN
+*
+*     End of CHBGV
+*
+      END
diff --git a/costa/native/external/lapack/chbgvd.f b/costa/native/external/lapack/chbgvd.f
new file mode 100644
index 000000000..db4917efc
--- /dev/null
+++ b/costa/native/external/lapack/chbgvd.f
@@ -0,0 +1,295 @@
+      SUBROUTINE CHBGVD( JOBZ, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, W,
+     $                   Z, LDZ, WORK, LWORK, RWORK, LRWORK, IWORK,
+     $                   LIWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBZ, UPLO
+      INTEGER            INFO, KA, KB, LDAB, LDBB, LDZ, LIWORK, LRWORK,
+     $                   LWORK, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      REAL               RWORK( * ), W( * )
+      COMPLEX            AB( LDAB, * ), BB( LDBB, * ), WORK( * ),
+     $                   Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CHBGVD computes all the eigenvalues, and optionally, the eigenvectors
+*  of a complex generalized Hermitian-definite banded eigenproblem, of
+*  the form A*x=(lambda)*B*x. Here A and B are assumed to be Hermitian
+*  and banded, and B is also positive definite.  If eigenvectors are
+*  desired, it uses a divide and conquer algorithm.
+*
+*  The divide and conquer algorithm makes very mild assumptions about
+*  floating point arithmetic. It will work on machines with a guard
+*  digit in add/subtract, or on those binary machines without guard
+*  digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
+*  Cray-2. It could conceivably fail on hexadecimal or decimal machines
+*  without guard digits, but we know of none.
+*
+*  Arguments
+*  =========
+*
+*  JOBZ    (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only;
+*          = 'V':  Compute eigenvalues and eigenvectors.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangles of A and B are stored;
+*          = 'L':  Lower triangles of A and B are stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrices A and B.  N >= 0.
+*
+*  KA      (input) INTEGER
+*          The number of superdiagonals of the matrix A if UPLO = 'U',
+*          or the number of subdiagonals if UPLO = 'L'. KA >= 0.
+*
+*  KB      (input) INTEGER
+*          The number of superdiagonals of the matrix B if UPLO = 'U',
+*          or the number of subdiagonals if UPLO = 'L'. KB >= 0.
+*
+*  AB      (input/output) COMPLEX array, dimension (LDAB, N)
+*          On entry, the upper or lower triangle of the Hermitian band
+*          matrix A, stored in the first ka+1 rows of the array.  The
+*          j-th column of A is stored in the j-th column of the array AB
+*          as follows:
+*          if UPLO = 'U', AB(ka+1+i-j,j) = A(i,j) for max(1,j-ka)<=i<=j;
+*          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+ka).
+*
+*          On exit, the contents of AB are destroyed.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KA+1.
+*
+*  BB      (input/output) COMPLEX array, dimension (LDBB, N)
+*          On entry, the upper or lower triangle of the Hermitian band
+*          matrix B, stored in the first kb+1 rows of the array.  The
+*          j-th column of B is stored in the j-th column of the array BB
+*          as follows:
+*          if UPLO = 'U', BB(kb+1+i-j,j) = B(i,j) for max(1,j-kb)<=i<=j;
+*          if UPLO = 'L', BB(1+i-j,j)    = B(i,j) for j<=i<=min(n,j+kb).
+*
+*          On exit, the factor S from the split Cholesky factorization
+*          B = S**H*S, as returned by CPBSTF.
+*
+*  LDBB    (input) INTEGER
+*          The leading dimension of the array BB.  LDBB >= KB+1.
+*
+*  W       (output) REAL array, dimension (N)
+*          If INFO = 0, the eigenvalues in ascending order.
+*
+*  Z       (output) COMPLEX array, dimension (LDZ, N)
+*          If JOBZ = 'V', then if INFO = 0, Z contains the matrix Z of
+*          eigenvectors, with the i-th column of Z holding the
+*          eigenvector associated with W(i). The eigenvectors are
+*          normalized so that Z**H*B*Z = I.
+*          If JOBZ = 'N', then Z is not referenced.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= 1, and if
+*          JOBZ = 'V', LDZ >= N.
+*
+*  WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+*          On exit, if INFO=0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.
+*          If N <= 1,               LWORK >= 1.
+*          If JOBZ = 'N' and N > 1, LWORK >= N.
+*          If JOBZ = 'V' and N > 1, LWORK >= 2*N**2.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  RWORK   (workspace/output) REAL array, dimension (LRWORK)
+*          On exit, if INFO=0, RWORK(1) returns the optimal LRWORK.
+*
+*  LRWORK  (input) INTEGER
+*          The dimension of array RWORK.
+*          If N <= 1,               LRWORK >= 1.
+*          If JOBZ = 'N' and N > 1, LRWORK >= N.
+*          If JOBZ = 'V' and N > 1, LRWORK >= 1 + 5*N + 2*N**2.
+*
+*          If LRWORK = -1, then a workspace query is assumed; the
+*          routine only calculates the optimal size of the RWORK array,
+*          returns this value as the first entry of the RWORK array, and
+*          no error message related to LRWORK is issued by XERBLA.
+*
+*  IWORK   (workspace/output) INTEGER array, dimension (LIWORK)
+*          On exit, if INFO=0, IWORK(1) returns the optimal LIWORK.
+*
+*  LIWORK  (input) INTEGER
+*          The dimension of array IWORK.
+*          If JOBZ = 'N' or N <= 1, LIWORK >= 1.
+*          If JOBZ = 'V' and N > 1, LIWORK >= 3 + 5*N.
+*
+*          If LIWORK = -1, then a workspace query is assumed; the
+*          routine only calculates the optimal size of the IWORK array,
+*          returns this value as the first entry of the IWORK array, and
+*          no error message related to LIWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, and i is:
+*             <= N:  the algorithm failed to converge:
+*                    i off-diagonal elements of an intermediate
+*                    tridiagonal form did not converge to zero;
+*             > N:   if INFO = N + i, for 1 <= i <= N, then CPBSTF
+*                    returned INFO = i: B is not positive definite.
+*                    The factorization of B could not be completed and
+*                    no eigenvalues or eigenvectors were computed.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX            CONE, CZERO
+      PARAMETER          ( CONE = ( 1.0E+0, 0.0E+0 ),
+     $                   CZERO = ( 0.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY, UPPER, WANTZ
+      CHARACTER          VECT
+      INTEGER            IINFO, INDE, INDWK2, INDWRK, LIWMIN, LLRWK,
+     $                   LLWK2, LRWMIN, LWMIN
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CGEMM, CHBGST, CHBTRD, CLACPY, CPBSTF, CSTEDC,
+     $                   SSTERF, XERBLA
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      WANTZ = LSAME( JOBZ, 'V' )
+      UPPER = LSAME( UPLO, 'U' )
+      LQUERY = ( LWORK.EQ.-1 .OR. LRWORK.EQ.-1 .OR. LIWORK.EQ.-1 )
+*
+      INFO = 0
+      IF( N.LE.1 ) THEN
+         LWMIN = 1
+         LRWMIN = 1
+         LIWMIN = 1
+      ELSE
+         IF( WANTZ ) THEN
+            LWMIN = 2*N**2
+            LRWMIN = 1 + 5*N + 2*N**2
+            LIWMIN = 3 + 5*N
+         ELSE
+            LWMIN = N
+            LRWMIN = N
+            LIWMIN = 1
+         END IF
+      END IF
+      IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( KA.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( KB.LT.0 .OR. KB.GT.KA ) THEN
+         INFO = -5
+      ELSE IF( LDAB.LT.KA+1 ) THEN
+         INFO = -7
+      ELSE IF( LDBB.LT.KB+1 ) THEN
+         INFO = -9
+      ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN
+         INFO = -12
+      ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -14
+      ELSE IF( LRWORK.LT.LRWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -16
+      ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -18
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         WORK( 1 ) = LWMIN
+         RWORK( 1 ) = LRWMIN
+         IWORK( 1 ) = LIWMIN
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CHBGVD', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Form a split Cholesky factorization of B.
+*
+      CALL CPBSTF( UPLO, N, KB, BB, LDBB, INFO )
+      IF( INFO.NE.0 ) THEN
+         INFO = N + INFO
+         RETURN
+      END IF
+*
+*     Transform problem to standard eigenvalue problem.
+*
+      INDE = 1
+      INDWRK = INDE + N
+      INDWK2 = 1 + N*N
+      LLWK2 = LWORK - INDWK2 + 2
+      LLRWK = LRWORK - INDWRK + 2
+      CALL CHBGST( JOBZ, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, Z, LDZ,
+     $             WORK, RWORK( INDWRK ), IINFO )
+*
+*     Reduce Hermitian band matrix to tridiagonal form.
+*
+      IF( WANTZ ) THEN
+         VECT = 'U'
+      ELSE
+         VECT = 'N'
+      END IF
+      CALL CHBTRD( VECT, UPLO, N, KA, AB, LDAB, W, RWORK( INDE ), Z,
+     $             LDZ, WORK, IINFO )
+*
+*     For eigenvalues only, call SSTERF.  For eigenvectors, call CSTEDC.
+*
+      IF( .NOT.WANTZ ) THEN
+         CALL SSTERF( N, W, RWORK( INDE ), INFO )
+      ELSE
+         CALL CSTEDC( 'I', N, W, RWORK( INDE ), WORK, N, WORK( INDWK2 ),
+     $                LLWK2, RWORK( INDWRK ), LLRWK, IWORK, LIWORK,
+     $                INFO )
+         CALL CGEMM( 'N', 'N', N, N, N, CONE, Z, LDZ, WORK, N, CZERO,
+     $               WORK( INDWK2 ), N )
+         CALL CLACPY( 'A', N, N, WORK( INDWK2 ), N, Z, LDZ )
+      END IF
+*
+      WORK( 1 ) = LWMIN
+      RWORK( 1 ) = LRWMIN
+      IWORK( 1 ) = LIWMIN
+      RETURN
+*
+*     End of CHBGVD
+*
+      END
diff --git a/costa/native/external/lapack/chbgvx.f b/costa/native/external/lapack/chbgvx.f
new file mode 100644
index 000000000..32b5f7381
--- /dev/null
+++ b/costa/native/external/lapack/chbgvx.f
@@ -0,0 +1,374 @@
+      SUBROUTINE CHBGVX( JOBZ, RANGE, UPLO, N, KA, KB, AB, LDAB, BB,
+     $                   LDBB, Q, LDQ, VL, VU, IL, IU, ABSTOL, M, W, Z,
+     $                   LDZ, WORK, RWORK, IWORK, IFAIL, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBZ, RANGE, UPLO
+      INTEGER            IL, INFO, IU, KA, KB, LDAB, LDBB, LDQ, LDZ, M,
+     $                   N
+      REAL               ABSTOL, VL, VU
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IFAIL( * ), IWORK( * )
+      REAL               RWORK( * ), W( * )
+      COMPLEX            AB( LDAB, * ), BB( LDBB, * ), Q( LDQ, * ),
+     $                   WORK( * ), Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CHBGVX computes all the eigenvalues, and optionally, the eigenvectors
+*  of a complex generalized Hermitian-definite banded eigenproblem, of
+*  the form A*x=(lambda)*B*x. Here A and B are assumed to be Hermitian
+*  and banded, and B is also positive definite.  Eigenvalues and
+*  eigenvectors can be selected by specifying either all eigenvalues,
+*  a range of values or a range of indices for the desired eigenvalues.
+*
+*  Arguments
+*  =========
+*
+*  JOBZ    (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only;
+*          = 'V':  Compute eigenvalues and eigenvectors.
+*
+*  RANGE   (input) CHARACTER*1
+*          = 'A': all eigenvalues will be found;
+*          = 'V': all eigenvalues in the half-open interval (VL,VU]
+*                 will be found;
+*          = 'I': the IL-th through IU-th eigenvalues will be found.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangles of A and B are stored;
+*          = 'L':  Lower triangles of A and B are stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrices A and B.  N >= 0.
+*
+*  KA      (input) INTEGER
+*          The number of superdiagonals of the matrix A if UPLO = 'U',
+*          or the number of subdiagonals if UPLO = 'L'. KA >= 0.
+*
+*  KB      (input) INTEGER
+*          The number of superdiagonals of the matrix B if UPLO = 'U',
+*          or the number of subdiagonals if UPLO = 'L'. KB >= 0.
+*
+*  AB      (input/output) COMPLEX array, dimension (LDAB, N)
+*          On entry, the upper or lower triangle of the Hermitian band
+*          matrix A, stored in the first ka+1 rows of the array.  The
+*          j-th column of A is stored in the j-th column of the array AB
+*          as follows:
+*          if UPLO = 'U', AB(ka+1+i-j,j) = A(i,j) for max(1,j-ka)<=i<=j;
+*          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+ka).
+*
+*          On exit, the contents of AB are destroyed.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KA+1.
+*
+*  BB      (input/output) COMPLEX array, dimension (LDBB, N)
+*          On entry, the upper or lower triangle of the Hermitian band
+*          matrix B, stored in the first kb+1 rows of the array.  The
+*          j-th column of B is stored in the j-th column of the array BB
+*          as follows:
+*          if UPLO = 'U', BB(kb+1+i-j,j) = B(i,j) for max(1,j-kb)<=i<=j;
+*          if UPLO = 'L', BB(1+i-j,j)    = B(i,j) for j<=i<=min(n,j+kb).
+*
+*          On exit, the factor S from the split Cholesky factorization
+*          B = S**H*S, as returned by CPBSTF.
+*
+*  LDBB    (input) INTEGER
+*          The leading dimension of the array BB.  LDBB >= KB+1.
+*
+*  Q       (output) COMPLEX array, dimension (LDQ, N)
+*          If JOBZ = 'V', the n-by-n matrix used in the reduction of
+*          A*x = (lambda)*B*x to standard form, i.e. C*x = (lambda)*x,
+*          and consequently C to tridiagonal form.
+*          If JOBZ = 'N', the array Q is not referenced.
+*
+*  LDQ     (input) INTEGER
+*          The leading dimension of the array Q.  If JOBZ = 'N',
+*          LDQ >= 1. If JOBZ = 'V', LDQ >= max(1,N).
+*
+*  VL      (input) REAL
+*  VU      (input) REAL
+*          If RANGE='V', the lower and upper bounds of the interval to
+*          be searched for eigenvalues. VL < VU.
+*          Not referenced if RANGE = 'A' or 'I'.
+*
+*  IL      (input) INTEGER
+*  IU      (input) INTEGER
+*          If RANGE='I', the indices (in ascending order) of the
+*          smallest and largest eigenvalues to be returned.
+*          1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0.
+*          Not referenced if RANGE = 'A' or 'V'.
+*
+*  ABSTOL  (input) REAL
+*          The absolute error tolerance for the eigenvalues.
+*          An approximate eigenvalue is accepted as converged
+*          when it is determined to lie in an interval [a,b]
+*          of width less than or equal to
+*
+*                  ABSTOL + EPS *   max( |a|,|b| ) ,
+*
+*          where EPS is the machine precision.  If ABSTOL is less than
+*          or equal to zero, then  EPS*|T|  will be used in its place,
+*          where |T| is the 1-norm of the tridiagonal matrix obtained
+*          by reducing AP to tridiagonal form.
+*
+*          Eigenvalues will be computed most accurately when ABSTOL is
+*          set to twice the underflow threshold 2*SLAMCH('S'), not zero.
+*          If this routine returns with INFO>0, indicating that some
+*          eigenvectors did not converge, try setting ABSTOL to
+*          2*SLAMCH('S').
+*
+*  M       (output) INTEGER
+*          The total number of eigenvalues found.  0 <= M <= N.
+*          If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1.
+*
+*  W       (output) REAL array, dimension (N)
+*          If INFO = 0, the eigenvalues in ascending order.
+*
+*  Z       (output) COMPLEX array, dimension (LDZ, N)
+*          If JOBZ = 'V', then if INFO = 0, Z contains the matrix Z of
+*          eigenvectors, with the i-th column of Z holding the
+*          eigenvector associated with W(i). The eigenvectors are
+*          normalized so that Z**H*B*Z = I.
+*          If JOBZ = 'N', then Z is not referenced.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= 1, and if
+*          JOBZ = 'V', LDZ >= N.
+*
+*  WORK    (workspace) COMPLEX array, dimension (N)
+*
+*  RWORK   (workspace) REAL array, dimension (7*N)
+*
+*  IWORK   (workspace) INTEGER array, dimension (5*N)
+*
+*  IFAIL   (output) INTEGER array, dimension (N)
+*          If JOBZ = 'V', then if INFO = 0, the first M elements of
+*          IFAIL are zero.  If INFO > 0, then IFAIL contains the
+*          indices of the eigenvectors that failed to converge.
+*          If JOBZ = 'N', then IFAIL is not referenced.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, and i is:
+*             <= N:  then i eigenvectors failed to converge.  Their
+*                    indices are stored in array IFAIL.
+*             > N:   if INFO = N + i, for 1 <= i <= N, then CPBSTF
+*                    returned INFO = i: B is not positive definite.
+*                    The factorization of B could not be completed and
+*                    no eigenvalues or eigenvectors were computed.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO
+      PARAMETER          ( ZERO = 0.0E+0 )
+      COMPLEX            CZERO, CONE
+      PARAMETER          ( CZERO = ( 0.0E+0, 0.0E+0 ),
+     $                   CONE = ( 1.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            ALLEIG, INDEIG, UPPER, VALEIG, WANTZ
+      CHARACTER          ORDER, VECT
+      INTEGER            I, IINFO, INDD, INDE, INDEE, INDIBL, INDISP,
+     $                   INDIWK, INDRWK, INDWRK, ITMP1, J, JJ, NSPLIT
+      REAL               TMP1
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CCOPY, CGEMV, CHBGST, CHBTRD, CLACPY, CPBSTF,
+     $                   CSTEIN, CSTEQR, CSWAP, SCOPY, SSTEBZ, SSTERF,
+     $                   XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      WANTZ = LSAME( JOBZ, 'V' )
+      UPPER = LSAME( UPLO, 'U' )
+      ALLEIG = LSAME( RANGE, 'A' )
+      VALEIG = LSAME( RANGE, 'V' )
+      INDEIG = LSAME( RANGE, 'I' )
+*
+      INFO = 0
+      IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( KA.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( KB.LT.0 .OR. KB.GT.KA ) THEN
+         INFO = -6
+      ELSE IF( LDAB.LT.KA+1 ) THEN
+         INFO = -8
+      ELSE IF( LDBB.LT.KB+1 ) THEN
+         INFO = -10
+      ELSE IF( VALEIG .AND. N.GT.0 .AND. VU.LE.VL ) THEN
+         INFO = -12
+      ELSE IF( INDEIG .AND. IL.LT.1 ) THEN
+         INFO = -13
+      ELSE IF( INDEIG .AND. ( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) ) THEN
+         INFO = -14
+      ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN
+         INFO = -19
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CHBGVX', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Form a split Cholesky factorization of B.
+*
+      CALL CPBSTF( UPLO, N, KB, BB, LDBB, INFO )
+      IF( INFO.NE.0 ) THEN
+         INFO = N + INFO
+         RETURN
+      END IF
+*
+*     Transform problem to standard eigenvalue problem.
+*
+      CALL CHBGST( JOBZ, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, Q, LDQ,
+     $             WORK, RWORK, IINFO )
+*
+*     Solve the standard eigenvalue problem.
+*     Reduce Hermitian band matrix to tridiagonal form.
+*
+      INDD = 1
+      INDE = INDD + N
+      INDRWK = INDE + N
+      INDWRK = 1
+      IF( WANTZ ) THEN
+         VECT = 'U'
+      ELSE
+         VECT = 'N'
+      END IF
+      CALL CHBTRD( VECT, UPLO, N, KA, AB, LDAB, RWORK( INDD ),
+     $             RWORK( INDE ), Q, LDQ, WORK( INDWRK ), IINFO )
+*
+*     If all eigenvalues are desired and ABSTOL is less than or equal
+*     to zero, then call SSTERF or CSTEQR.  If this fails for some
+*     eigenvalue, then try SSTEBZ.
+*
+      IF( ( ALLEIG .OR. ( INDEIG .AND. IL.EQ.1 .AND. IU.EQ.N ) ) .AND.
+     $    ( ABSTOL.LE.ZERO ) ) THEN
+         CALL SCOPY( N, RWORK( INDD ), 1, W, 1 )
+         INDEE = INDRWK + 2*N
+         CALL SCOPY( N-1, RWORK( INDE ), 1, RWORK( INDEE ), 1 )
+         IF( .NOT.WANTZ ) THEN
+            CALL SSTERF( N, W, RWORK( INDEE ), INFO )
+         ELSE
+            CALL CLACPY( 'A', N, N, Q, LDQ, Z, LDZ )
+            CALL CSTEQR( JOBZ, N, W, RWORK( INDEE ), Z, LDZ,
+     $                   RWORK( INDRWK ), INFO )
+            IF( INFO.EQ.0 ) THEN
+               DO 10 I = 1, N
+                  IFAIL( I ) = 0
+   10          CONTINUE
+            END IF
+         END IF
+         IF( INFO.EQ.0 ) THEN
+            M = N
+            GO TO 30
+         END IF
+         INFO = 0
+      END IF
+*
+*     Otherwise, call SSTEBZ and, if eigenvectors are desired,
+*     call CSTEIN.
+*
+      IF( WANTZ ) THEN
+         ORDER = 'B'
+      ELSE
+         ORDER = 'E'
+      END IF
+      INDIBL = 1
+      INDISP = INDIBL + N
+      INDIWK = INDISP + N
+      CALL SSTEBZ( RANGE, ORDER, N, VL, VU, IL, IU, ABSTOL,
+     $             RWORK( INDD ), RWORK( INDE ), M, NSPLIT, W,
+     $             IWORK( INDIBL ), IWORK( INDISP ), RWORK( INDRWK ),
+     $             IWORK( INDIWK ), INFO )
+*
+      IF( WANTZ ) THEN
+         CALL CSTEIN( N, RWORK( INDD ), RWORK( INDE ), M, W,
+     $                IWORK( INDIBL ), IWORK( INDISP ), Z, LDZ,
+     $                RWORK( INDRWK ), IWORK( INDIWK ), IFAIL, INFO )
+*
+*        Apply unitary matrix used in reduction to tridiagonal
+*        form to eigenvectors returned by CSTEIN.
+*
+         DO 20 J = 1, M
+            CALL CCOPY( N, Z( 1, J ), 1, WORK( 1 ), 1 )
+            CALL CGEMV( 'N', N, N, CONE, Q, LDQ, WORK, 1, CZERO,
+     $                  Z( 1, J ), 1 )
+   20    CONTINUE
+      END IF
+*
+   30 CONTINUE
+*
+*     If eigenvalues are not in order, then sort them, along with
+*     eigenvectors.
+*
+      IF( WANTZ ) THEN
+         DO 50 J = 1, M - 1
+            I = 0
+            TMP1 = W( J )
+            DO 40 JJ = J + 1, M
+               IF( W( JJ ).LT.TMP1 ) THEN
+                  I = JJ
+                  TMP1 = W( JJ )
+               END IF
+   40       CONTINUE
+*
+            IF( I.NE.0 ) THEN
+               ITMP1 = IWORK( INDIBL+I-1 )
+               W( I ) = W( J )
+               IWORK( INDIBL+I-1 ) = IWORK( INDIBL+J-1 )
+               W( J ) = TMP1
+               IWORK( INDIBL+J-1 ) = ITMP1
+               CALL CSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 )
+               IF( INFO.NE.0 ) THEN
+                  ITMP1 = IFAIL( I )
+                  IFAIL( I ) = IFAIL( J )
+                  IFAIL( J ) = ITMP1
+               END IF
+            END IF
+   50    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of CHBGVX
+*
+      END
diff --git a/costa/native/external/lapack/chbtrd.f b/costa/native/external/lapack/chbtrd.f
new file mode 100644
index 000000000..a87ff602c
--- /dev/null
+++ b/costa/native/external/lapack/chbtrd.f
@@ -0,0 +1,589 @@
+      SUBROUTINE CHBTRD( VECT, UPLO, N, KD, AB, LDAB, D, E, Q, LDQ,
+     $                   WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO, VECT
+      INTEGER            INFO, KD, LDAB, LDQ, N
+*     ..
+*     .. Array Arguments ..
+      REAL               D( * ), E( * )
+      COMPLEX            AB( LDAB, * ), Q( LDQ, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CHBTRD reduces a complex Hermitian band matrix A to real symmetric
+*  tridiagonal form T by a unitary similarity transformation:
+*  Q**H * A * Q = T.
+*
+*  Arguments
+*  =========
+*
+*  VECT    (input) CHARACTER*1
+*          = 'N':  do not form Q;
+*          = 'V':  form Q;
+*          = 'U':  update a matrix X, by forming X*Q.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  KD      (input) INTEGER
+*          The number of superdiagonals of the matrix A if UPLO = 'U',
+*          or the number of subdiagonals if UPLO = 'L'.  KD >= 0.
+*
+*  AB      (input/output) COMPLEX array, dimension (LDAB,N)
+*          On entry, the upper or lower triangle of the Hermitian band
+*          matrix A, stored in the first KD+1 rows of the array.  The
+*          j-th column of A is stored in the j-th column of the array AB
+*          as follows:
+*          if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j;
+*          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+kd).
+*          On exit, the diagonal elements of AB are overwritten by the
+*          diagonal elements of the tridiagonal matrix T; if KD > 0, the
+*          elements on the first superdiagonal (if UPLO = 'U') or the
+*          first subdiagonal (if UPLO = 'L') are overwritten by the
+*          off-diagonal elements of T; the rest of AB is overwritten by
+*          values generated during the reduction.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KD+1.
+*
+*  D       (output) REAL array, dimension (N)
+*          The diagonal elements of the tridiagonal matrix T.
+*
+*  E       (output) REAL array, dimension (N-1)
+*          The off-diagonal elements of the tridiagonal matrix T:
+*          E(i) = T(i,i+1) if UPLO = 'U'; E(i) = T(i+1,i) if UPLO = 'L'.
+*
+*  Q       (input/output) COMPLEX array, dimension (LDQ,N)
+*          On entry, if VECT = 'U', then Q must contain an N-by-N
+*          matrix X; if VECT = 'N' or 'V', then Q need not be set.
+*
+*          On exit:
+*          if VECT = 'V', Q contains the N-by-N unitary matrix Q;
+*          if VECT = 'U', Q contains the product X*Q;
+*          if VECT = 'N', the array Q is not referenced.
+*
+*  LDQ     (input) INTEGER
+*          The leading dimension of the array Q.
+*          LDQ >= 1, and LDQ >= N if VECT = 'V' or 'U'.
+*
+*  WORK    (workspace) COMPLEX array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Further Details
+*  ===============
+*
+*  Modified by Linda Kaufman, Bell Labs.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO
+      PARAMETER          ( ZERO = 0.0E+0 )
+      COMPLEX            CZERO, CONE
+      PARAMETER          ( CZERO = ( 0.0E+0, 0.0E+0 ),
+     $                   CONE = ( 1.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            INITQ, UPPER, WANTQ
+      INTEGER            I, I2, IBL, INCA, INCX, IQAEND, IQB, IQEND, J,
+     $                   J1, J1END, J1INC, J2, JEND, JIN, JINC, K, KD1,
+     $                   KDM1, KDN, L, LAST, LEND, NQ, NR, NRT
+      REAL               ABST
+      COMPLEX            T, TEMP
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CLACGV, CLAR2V, CLARGV, CLARTG, CLARTV, CLASET,
+     $                   CROT, CSCAL, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, CONJG, MAX, MIN, REAL
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters
+*
+      INITQ = LSAME( VECT, 'V' )
+      WANTQ = INITQ .OR. LSAME( VECT, 'U' )
+      UPPER = LSAME( UPLO, 'U' )
+      KD1 = KD + 1
+      KDM1 = KD - 1
+      INCX = LDAB - 1
+      IQEND = 1
+*
+      INFO = 0
+      IF( .NOT.WANTQ .AND. .NOT.LSAME( VECT, 'N' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( KD.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDAB.LT.KD1 ) THEN
+         INFO = -6
+      ELSE IF( LDQ.LT.MAX( 1, N ) .AND. WANTQ ) THEN
+         INFO = -10
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CHBTRD', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Initialize Q to the unit matrix, if needed
+*
+      IF( INITQ )
+     $   CALL CLASET( 'Full', N, N, CZERO, CONE, Q, LDQ )
+*
+*     Wherever possible, plane rotations are generated and applied in
+*     vector operations of length NR over the index set J1:J2:KD1.
+*
+*     The real cosines and complex sines of the plane rotations are
+*     stored in the arrays D and WORK.
+*
+      INCA = KD1*LDAB
+      KDN = MIN( N-1, KD )
+      IF( UPPER ) THEN
+*
+         IF( KD.GT.1 ) THEN
+*
+*           Reduce to complex Hermitian tridiagonal form, working with
+*           the upper triangle
+*
+            NR = 0
+            J1 = KDN + 2
+            J2 = 1
+*
+            AB( KD1, 1 ) = REAL( AB( KD1, 1 ) )
+            DO 90 I = 1, N - 2
+*
+*              Reduce i-th row of matrix to tridiagonal form
+*
+               DO 80 K = KDN + 1, 2, -1
+                  J1 = J1 + KDN
+                  J2 = J2 + KDN
+*
+                  IF( NR.GT.0 ) THEN
+*
+*                    generate plane rotations to annihilate nonzero
+*                    elements which have been created outside the band
+*
+                     CALL CLARGV( NR, AB( 1, J1-1 ), INCA, WORK( J1 ),
+     $                            KD1, D( J1 ), KD1 )
+*
+*                    apply rotations from the right
+*
+*
+*                    Dependent on the the number of diagonals either
+*                    CLARTV or CROT is used
+*
+                     IF( NR.GE.2*KD-1 ) THEN
+                        DO 10 L = 1, KD - 1
+                           CALL CLARTV( NR, AB( L+1, J1-1 ), INCA,
+     $                                  AB( L, J1 ), INCA, D( J1 ),
+     $                                  WORK( J1 ), KD1 )
+   10                   CONTINUE
+*
+                     ELSE
+                        JEND = J1 + ( NR-1 )*KD1
+                        DO 20 JINC = J1, JEND, KD1
+                           CALL CROT( KDM1, AB( 2, JINC-1 ), 1,
+     $                                AB( 1, JINC ), 1, D( JINC ),
+     $                                WORK( JINC ) )
+   20                   CONTINUE
+                     END IF
+                  END IF
+*
+*
+                  IF( K.GT.2 ) THEN
+                     IF( K.LE.N-I+1 ) THEN
+*
+*                       generate plane rotation to annihilate a(i,i+k-1)
+*                       within the band
+*
+                        CALL CLARTG( AB( KD-K+3, I+K-2 ),
+     $                               AB( KD-K+2, I+K-1 ), D( I+K-1 ),
+     $                               WORK( I+K-1 ), TEMP )
+                        AB( KD-K+3, I+K-2 ) = TEMP
+*
+*                       apply rotation from the right
+*
+                        CALL CROT( K-3, AB( KD-K+4, I+K-2 ), 1,
+     $                             AB( KD-K+3, I+K-1 ), 1, D( I+K-1 ),
+     $                             WORK( I+K-1 ) )
+                     END IF
+                     NR = NR + 1
+                     J1 = J1 - KDN - 1
+                  END IF
+*
+*                 apply plane rotations from both sides to diagonal
+*                 blocks
+*
+                  IF( NR.GT.0 )
+     $               CALL CLAR2V( NR, AB( KD1, J1-1 ), AB( KD1, J1 ),
+     $                            AB( KD, J1 ), INCA, D( J1 ),
+     $                            WORK( J1 ), KD1 )
+*
+*                 apply plane rotations from the left
+*
+                  CALL CLACGV( NR, WORK( J1 ), KD1 )
+                  IF( NR.GT.0 ) THEN
+                     IF( 2*KD-1.LT.NR ) THEN
+*
+*                    Dependent on the the number of diagonals either
+*                    CLARTV or CROT is used
+*
+                        DO 30 L = 1, KD - 1
+                           IF( J2+L.GT.N ) THEN
+                              NRT = NR - 1
+                           ELSE
+                              NRT = NR
+                           END IF
+                           IF( NRT.GT.0 )
+     $                        CALL CLARTV( NRT, AB( KD-L, J1+L ), INCA,
+     $                                     AB( KD-L+1, J1+L ), INCA,
+     $                                     D( J1 ), WORK( J1 ), KD1 )
+   30                   CONTINUE
+                     ELSE
+                        J1END = J1 + KD1*( NR-2 )
+                        IF( J1END.GE.J1 ) THEN
+                           DO 40 JIN = J1, J1END, KD1
+                              CALL CROT( KD-1, AB( KD-1, JIN+1 ), INCX,
+     $                                   AB( KD, JIN+1 ), INCX,
+     $                                   D( JIN ), WORK( JIN ) )
+   40                      CONTINUE
+                        END IF
+                        LEND = MIN( KDM1, N-J2 )
+                        LAST = J1END + KD1
+                        IF( LEND.GT.0 )
+     $                     CALL CROT( LEND, AB( KD-1, LAST+1 ), INCX,
+     $                                AB( KD, LAST+1 ), INCX, D( LAST ),
+     $                                WORK( LAST ) )
+                     END IF
+                  END IF
+*
+                  IF( WANTQ ) THEN
+*
+*                    accumulate product of plane rotations in Q
+*
+                     IF( INITQ ) THEN
+*
+*                 take advantage of the fact that Q was
+*                 initially the Identity matrix
+*
+                        IQEND = MAX( IQEND, J2 )
+                        I2 = MAX( 0, K-3 )
+                        IQAEND = 1 + I*KD
+                        IF( K.EQ.2 )
+     $                     IQAEND = IQAEND + KD
+                        IQAEND = MIN( IQAEND, IQEND )
+                        DO 50 J = J1, J2, KD1
+                           IBL = I - I2 / KDM1
+                           I2 = I2 + 1
+                           IQB = MAX( 1, J-IBL )
+                           NQ = 1 + IQAEND - IQB
+                           IQAEND = MIN( IQAEND+KD, IQEND )
+                           CALL CROT( NQ, Q( IQB, J-1 ), 1, Q( IQB, J ),
+     $                                1, D( J ), CONJG( WORK( J ) ) )
+   50                   CONTINUE
+                     ELSE
+*
+                        DO 60 J = J1, J2, KD1
+                           CALL CROT( N, Q( 1, J-1 ), 1, Q( 1, J ), 1,
+     $                                D( J ), CONJG( WORK( J ) ) )
+   60                   CONTINUE
+                     END IF
+*
+                  END IF
+*
+                  IF( J2+KDN.GT.N ) THEN
+*
+*                    adjust J2 to keep within the bounds of the matrix
+*
+                     NR = NR - 1
+                     J2 = J2 - KDN - 1
+                  END IF
+*
+                  DO 70 J = J1, J2, KD1
+*
+*                    create nonzero element a(j-1,j+kd) outside the band
+*                    and store it in WORK
+*
+                     WORK( J+KD ) = WORK( J )*AB( 1, J+KD )
+                     AB( 1, J+KD ) = D( J )*AB( 1, J+KD )
+   70             CONTINUE
+   80          CONTINUE
+   90       CONTINUE
+         END IF
+*
+         IF( KD.GT.0 ) THEN
+*
+*           make off-diagonal elements real and copy them to E
+*
+            DO 100 I = 1, N - 1
+               T = AB( KD, I+1 )
+               ABST = ABS( T )
+               AB( KD, I+1 ) = ABST
+               E( I ) = ABST
+               IF( ABST.NE.ZERO ) THEN
+                  T = T / ABST
+               ELSE
+                  T = CONE
+               END IF
+               IF( I.LT.N-1 )
+     $            AB( KD, I+2 ) = AB( KD, I+2 )*T
+               IF( WANTQ ) THEN
+                  CALL CSCAL( N, CONJG( T ), Q( 1, I+1 ), 1 )
+               END IF
+  100       CONTINUE
+         ELSE
+*
+*           set E to zero if original matrix was diagonal
+*
+            DO 110 I = 1, N - 1
+               E( I ) = ZERO
+  110       CONTINUE
+         END IF
+*
+*        copy diagonal elements to D
+*
+         DO 120 I = 1, N
+            D( I ) = AB( KD1, I )
+  120    CONTINUE
+*
+      ELSE
+*
+         IF( KD.GT.1 ) THEN
+*
+*           Reduce to complex Hermitian tridiagonal form, working with
+*           the lower triangle
+*
+            NR = 0
+            J1 = KDN + 2
+            J2 = 1
+*
+            AB( 1, 1 ) = REAL( AB( 1, 1 ) )
+            DO 210 I = 1, N - 2
+*
+*              Reduce i-th column of matrix to tridiagonal form
+*
+               DO 200 K = KDN + 1, 2, -1
+                  J1 = J1 + KDN
+                  J2 = J2 + KDN
+*
+                  IF( NR.GT.0 ) THEN
+*
+*                    generate plane rotations to annihilate nonzero
+*                    elements which have been created outside the band
+*
+                     CALL CLARGV( NR, AB( KD1, J1-KD1 ), INCA,
+     $                            WORK( J1 ), KD1, D( J1 ), KD1 )
+*
+*                    apply plane rotations from one side
+*
+*
+*                    Dependent on the the number of diagonals either
+*                    CLARTV or CROT is used
+*
+                     IF( NR.GT.2*KD-1 ) THEN
+                        DO 130 L = 1, KD - 1
+                           CALL CLARTV( NR, AB( KD1-L, J1-KD1+L ), INCA,
+     $                                  AB( KD1-L+1, J1-KD1+L ), INCA,
+     $                                  D( J1 ), WORK( J1 ), KD1 )
+  130                   CONTINUE
+                     ELSE
+                        JEND = J1 + KD1*( NR-1 )
+                        DO 140 JINC = J1, JEND, KD1
+                           CALL CROT( KDM1, AB( KD, JINC-KD ), INCX,
+     $                                AB( KD1, JINC-KD ), INCX,
+     $                                D( JINC ), WORK( JINC ) )
+  140                   CONTINUE
+                     END IF
+*
+                  END IF
+*
+                  IF( K.GT.2 ) THEN
+                     IF( K.LE.N-I+1 ) THEN
+*
+*                       generate plane rotation to annihilate a(i+k-1,i)
+*                       within the band
+*
+                        CALL CLARTG( AB( K-1, I ), AB( K, I ),
+     $                               D( I+K-1 ), WORK( I+K-1 ), TEMP )
+                        AB( K-1, I ) = TEMP
+*
+*                       apply rotation from the left
+*
+                        CALL CROT( K-3, AB( K-2, I+1 ), LDAB-1,
+     $                             AB( K-1, I+1 ), LDAB-1, D( I+K-1 ),
+     $                             WORK( I+K-1 ) )
+                     END IF
+                     NR = NR + 1
+                     J1 = J1 - KDN - 1
+                  END IF
+*
+*                 apply plane rotations from both sides to diagonal
+*                 blocks
+*
+                  IF( NR.GT.0 )
+     $               CALL CLAR2V( NR, AB( 1, J1-1 ), AB( 1, J1 ),
+     $                            AB( 2, J1-1 ), INCA, D( J1 ),
+     $                            WORK( J1 ), KD1 )
+*
+*                 apply plane rotations from the right
+*
+*
+*                    Dependent on the the number of diagonals either
+*                    CLARTV or CROT is used
+*
+                  CALL CLACGV( NR, WORK( J1 ), KD1 )
+                  IF( NR.GT.0 ) THEN
+                     IF( NR.GT.2*KD-1 ) THEN
+                        DO 150 L = 1, KD - 1
+                           IF( J2+L.GT.N ) THEN
+                              NRT = NR - 1
+                           ELSE
+                              NRT = NR
+                           END IF
+                           IF( NRT.GT.0 )
+     $                        CALL CLARTV( NRT, AB( L+2, J1-1 ), INCA,
+     $                                     AB( L+1, J1 ), INCA, D( J1 ),
+     $                                     WORK( J1 ), KD1 )
+  150                   CONTINUE
+                     ELSE
+                        J1END = J1 + KD1*( NR-2 )
+                        IF( J1END.GE.J1 ) THEN
+                           DO 160 J1INC = J1, J1END, KD1
+                              CALL CROT( KDM1, AB( 3, J1INC-1 ), 1,
+     $                                   AB( 2, J1INC ), 1, D( J1INC ),
+     $                                   WORK( J1INC ) )
+  160                      CONTINUE
+                        END IF
+                        LEND = MIN( KDM1, N-J2 )
+                        LAST = J1END + KD1
+                        IF( LEND.GT.0 )
+     $                     CALL CROT( LEND, AB( 3, LAST-1 ), 1,
+     $                                AB( 2, LAST ), 1, D( LAST ),
+     $                                WORK( LAST ) )
+                     END IF
+                  END IF
+*
+*
+*
+                  IF( WANTQ ) THEN
+*
+*                    accumulate product of plane rotations in Q
+*
+                     IF( INITQ ) THEN
+*
+*                 take advantage of the fact that Q was
+*                 initially the Identity matrix
+*
+                        IQEND = MAX( IQEND, J2 )
+                        I2 = MAX( 0, K-3 )
+                        IQAEND = 1 + I*KD
+                        IF( K.EQ.2 )
+     $                     IQAEND = IQAEND + KD
+                        IQAEND = MIN( IQAEND, IQEND )
+                        DO 170 J = J1, J2, KD1
+                           IBL = I - I2 / KDM1
+                           I2 = I2 + 1
+                           IQB = MAX( 1, J-IBL )
+                           NQ = 1 + IQAEND - IQB
+                           IQAEND = MIN( IQAEND+KD, IQEND )
+                           CALL CROT( NQ, Q( IQB, J-1 ), 1, Q( IQB, J ),
+     $                                1, D( J ), WORK( J ) )
+  170                   CONTINUE
+                     ELSE
+*
+                        DO 180 J = J1, J2, KD1
+                           CALL CROT( N, Q( 1, J-1 ), 1, Q( 1, J ), 1,
+     $                                D( J ), WORK( J ) )
+  180                   CONTINUE
+                     END IF
+                  END IF
+*
+                  IF( J2+KDN.GT.N ) THEN
+*
+*                    adjust J2 to keep within the bounds of the matrix
+*
+                     NR = NR - 1
+                     J2 = J2 - KDN - 1
+                  END IF
+*
+                  DO 190 J = J1, J2, KD1
+*
+*                    create nonzero element a(j+kd,j-1) outside the
+*                    band and store it in WORK
+*
+                     WORK( J+KD ) = WORK( J )*AB( KD1, J )
+                     AB( KD1, J ) = D( J )*AB( KD1, J )
+  190             CONTINUE
+  200          CONTINUE
+  210       CONTINUE
+         END IF
+*
+         IF( KD.GT.0 ) THEN
+*
+*           make off-diagonal elements real and copy them to E
+*
+            DO 220 I = 1, N - 1
+               T = AB( 2, I )
+               ABST = ABS( T )
+               AB( 2, I ) = ABST
+               E( I ) = ABST
+               IF( ABST.NE.ZERO ) THEN
+                  T = T / ABST
+               ELSE
+                  T = CONE
+               END IF
+               IF( I.LT.N-1 )
+     $            AB( 2, I+1 ) = AB( 2, I+1 )*T
+               IF( WANTQ ) THEN
+                  CALL CSCAL( N, T, Q( 1, I+1 ), 1 )
+               END IF
+  220       CONTINUE
+         ELSE
+*
+*           set E to zero if original matrix was diagonal
+*
+            DO 230 I = 1, N - 1
+               E( I ) = ZERO
+  230       CONTINUE
+         END IF
+*
+*        copy diagonal elements to D
+*
+         DO 240 I = 1, N
+            D( I ) = AB( 1, I )
+  240    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of CHBTRD
+*
+      END
diff --git a/costa/native/external/lapack/checon.f b/costa/native/external/lapack/checon.f
new file mode 100644
index 000000000..9f3016f1b
--- /dev/null
+++ b/costa/native/external/lapack/checon.f
@@ -0,0 +1,159 @@
+      SUBROUTINE CHECON( UPLO, N, A, LDA, IPIV, ANORM, RCOND, WORK,
+     $                   INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, N
+      REAL               ANORM, RCOND
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      COMPLEX            A( LDA, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CHECON estimates the reciprocal of the condition number of a complex
+*  Hermitian matrix A using the factorization A = U*D*U**H or
+*  A = L*D*L**H computed by CHETRF.
+*
+*  An estimate is obtained for norm(inv(A)), and the reciprocal of the
+*  condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))).
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the details of the factorization are stored
+*          as an upper or lower triangular matrix.
+*          = 'U':  Upper triangular, form is A = U*D*U**H;
+*          = 'L':  Lower triangular, form is A = L*D*L**H.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input) COMPLEX array, dimension (LDA,N)
+*          The block diagonal matrix D and the multipliers used to
+*          obtain the factor U or L as computed by CHETRF.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  IPIV    (input) INTEGER array, dimension (N)
+*          Details of the interchanges and the block structure of D
+*          as determined by CHETRF.
+*
+*  ANORM   (input) REAL
+*          The 1-norm of the original matrix A.
+*
+*  RCOND   (output) REAL
+*          The reciprocal of the condition number of the matrix A,
+*          computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an
+*          estimate of the 1-norm of inv(A) computed in this routine.
+*
+*  WORK    (workspace) COMPLEX array, dimension (2*N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            I, KASE
+      REAL               AINVNM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CHETRS, CLACON, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -4
+      ELSE IF( ANORM.LT.ZERO ) THEN
+         INFO = -6
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CHECON', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      RCOND = ZERO
+      IF( N.EQ.0 ) THEN
+         RCOND = ONE
+         RETURN
+      ELSE IF( ANORM.LE.ZERO ) THEN
+         RETURN
+      END IF
+*
+*     Check that the diagonal matrix D is nonsingular.
+*
+      IF( UPPER ) THEN
+*
+*        Upper triangular storage: examine D from bottom to top
+*
+         DO 10 I = N, 1, -1
+            IF( IPIV( I ).GT.0 .AND. A( I, I ).EQ.ZERO )
+     $         RETURN
+   10    CONTINUE
+      ELSE
+*
+*        Lower triangular storage: examine D from top to bottom.
+*
+         DO 20 I = 1, N
+            IF( IPIV( I ).GT.0 .AND. A( I, I ).EQ.ZERO )
+     $         RETURN
+   20    CONTINUE
+      END IF
+*
+*     Estimate the 1-norm of the inverse.
+*
+      KASE = 0
+   30 CONTINUE
+      CALL CLACON( N, WORK( N+1 ), WORK, AINVNM, KASE )
+      IF( KASE.NE.0 ) THEN
+*
+*        Multiply by inv(L*D*L') or inv(U*D*U').
+*
+         CALL CHETRS( UPLO, N, 1, A, LDA, IPIV, WORK, N, INFO )
+         GO TO 30
+      END IF
+*
+*     Compute the estimate of the reciprocal condition number.
+*
+      IF( AINVNM.NE.ZERO )
+     $   RCOND = ( ONE / AINVNM ) / ANORM
+*
+      RETURN
+*
+*     End of CHECON
+*
+      END
diff --git a/costa/native/external/lapack/cheev.f b/costa/native/external/lapack/cheev.f
new file mode 100644
index 000000000..3309689e4
--- /dev/null
+++ b/costa/native/external/lapack/cheev.f
@@ -0,0 +1,220 @@
+      SUBROUTINE CHEEV( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, RWORK,
+     $                  INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBZ, UPLO
+      INTEGER            INFO, LDA, LWORK, N
+*     ..
+*     .. Array Arguments ..
+      REAL               RWORK( * ), W( * )
+      COMPLEX            A( LDA, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CHEEV computes all eigenvalues and, optionally, eigenvectors of a
+*  complex Hermitian matrix A.
+*
+*  Arguments
+*  =========
+*
+*  JOBZ    (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only;
+*          = 'V':  Compute eigenvalues and eigenvectors.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA, N)
+*          On entry, the Hermitian matrix A.  If UPLO = 'U', the
+*          leading N-by-N upper triangular part of A contains the
+*          upper triangular part of the matrix A.  If UPLO = 'L',
+*          the leading N-by-N lower triangular part of A contains
+*          the lower triangular part of the matrix A.
+*          On exit, if JOBZ = 'V', then if INFO = 0, A contains the
+*          orthonormal eigenvectors of the matrix A.
+*          If JOBZ = 'N', then on exit the lower triangle (if UPLO='L')
+*          or the upper triangle (if UPLO='U') of A, including the
+*          diagonal, is destroyed.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  W       (output) REAL array, dimension (N)
+*          If INFO = 0, the eigenvalues in ascending order.
+*
+*  WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The length of the array WORK.  LWORK >= max(1,2*N-1).
+*          For optimal efficiency, LWORK >= (NB+1)*N,
+*          where NB is the blocksize for CHETRD returned by ILAENV.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  RWORK   (workspace) REAL array, dimension (max(1, 3*N-2))
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, the algorithm failed to converge; i
+*                off-diagonal elements of an intermediate tridiagonal
+*                form did not converge to zero.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E0, ONE = 1.0E0 )
+      COMPLEX            CONE
+      PARAMETER          ( CONE = ( 1.0E0, 0.0E0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LOWER, LQUERY, WANTZ
+      INTEGER            IINFO, IMAX, INDE, INDTAU, INDWRK, ISCALE,
+     $                   LLWORK, LOPT, LWKOPT, NB
+      REAL               ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA,
+     $                   SMLNUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      REAL               CLANHE, SLAMCH
+      EXTERNAL           ILAENV, LSAME, CLANHE, SLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CHETRD, CLASCL, CSTEQR, CUNGTR, SSCAL, SSTERF,
+     $                   XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      WANTZ = LSAME( JOBZ, 'V' )
+      LOWER = LSAME( UPLO, 'L' )
+      LQUERY = ( LWORK.EQ.-1 )
+*
+      INFO = 0
+      IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( LWORK.LT.MAX( 1, 2*N-1 ) .AND. .NOT.LQUERY ) THEN
+         INFO = -8
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         NB = ILAENV( 1, 'CHETRD', UPLO, N, -1, -1, -1 )
+         LWKOPT = MAX( 1, ( NB+1 )*N )
+         WORK( 1 ) = LWKOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CHEEV ', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      IF( N.EQ.1 ) THEN
+         W( 1 ) = A( 1, 1 )
+         WORK( 1 ) = 3
+         IF( WANTZ )
+     $      A( 1, 1 ) = CONE
+         RETURN
+      END IF
+*
+*     Get machine constants.
+*
+      SAFMIN = SLAMCH( 'Safe minimum' )
+      EPS = SLAMCH( 'Precision' )
+      SMLNUM = SAFMIN / EPS
+      BIGNUM = ONE / SMLNUM
+      RMIN = SQRT( SMLNUM )
+      RMAX = SQRT( BIGNUM )
+*
+*     Scale matrix to allowable range, if necessary.
+*
+      ANRM = CLANHE( 'M', UPLO, N, A, LDA, RWORK )
+      ISCALE = 0
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN
+         ISCALE = 1
+         SIGMA = RMIN / ANRM
+      ELSE IF( ANRM.GT.RMAX ) THEN
+         ISCALE = 1
+         SIGMA = RMAX / ANRM
+      END IF
+      IF( ISCALE.EQ.1 )
+     $   CALL CLASCL( UPLO, 0, 0, ONE, SIGMA, N, N, A, LDA, INFO )
+*
+*     Call CHETRD to reduce Hermitian matrix to tridiagonal form.
+*
+      INDE = 1
+      INDTAU = 1
+      INDWRK = INDTAU + N
+      LLWORK = LWORK - INDWRK + 1
+      CALL CHETRD( UPLO, N, A, LDA, W, RWORK( INDE ), WORK( INDTAU ),
+     $             WORK( INDWRK ), LLWORK, IINFO )
+      LOPT = N + WORK( INDWRK )
+*
+*     For eigenvalues only, call SSTERF.  For eigenvectors, first call
+*     CUNGTR to generate the unitary matrix, then call CSTEQR.
+*
+      IF( .NOT.WANTZ ) THEN
+         CALL SSTERF( N, W, RWORK( INDE ), INFO )
+      ELSE
+         CALL CUNGTR( UPLO, N, A, LDA, WORK( INDTAU ), WORK( INDWRK ),
+     $                LLWORK, IINFO )
+         INDWRK = INDE + N
+         CALL CSTEQR( JOBZ, N, W, RWORK( INDE ), A, LDA,
+     $                RWORK( INDWRK ), INFO )
+      END IF
+*
+*     If matrix was scaled, then rescale eigenvalues appropriately.
+*
+      IF( ISCALE.EQ.1 ) THEN
+         IF( INFO.EQ.0 ) THEN
+            IMAX = N
+         ELSE
+            IMAX = INFO - 1
+         END IF
+         CALL SSCAL( IMAX, ONE / SIGMA, W, 1 )
+      END IF
+*
+*     Set WORK(1) to optimal complex workspace size.
+*
+      WORK( 1 ) = LWKOPT
+*
+      RETURN
+*
+*     End of CHEEV
+*
+      END
diff --git a/costa/native/external/lapack/cheevd.f b/costa/native/external/lapack/cheevd.f
new file mode 100644
index 000000000..4fef57448
--- /dev/null
+++ b/costa/native/external/lapack/cheevd.f
@@ -0,0 +1,296 @@
+      SUBROUTINE CHEEVD( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, RWORK,
+     $                   LRWORK, IWORK, LIWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBZ, UPLO
+      INTEGER            INFO, LDA, LIWORK, LRWORK, LWORK, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      REAL               RWORK( * ), W( * )
+      COMPLEX            A( LDA, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CHEEVD computes all eigenvalues and, optionally, eigenvectors of a
+*  complex Hermitian matrix A.  If eigenvectors are desired, it uses a
+*  divide and conquer algorithm.
+*
+*  The divide and conquer algorithm makes very mild assumptions about
+*  floating point arithmetic. It will work on machines with a guard
+*  digit in add/subtract, or on those binary machines without guard
+*  digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
+*  Cray-2. It could conceivably fail on hexadecimal or decimal machines
+*  without guard digits, but we know of none.
+*
+*  Arguments
+*  =========
+*
+*  JOBZ    (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only;
+*          = 'V':  Compute eigenvalues and eigenvectors.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA, N)
+*          On entry, the Hermitian matrix A.  If UPLO = 'U', the
+*          leading N-by-N upper triangular part of A contains the
+*          upper triangular part of the matrix A.  If UPLO = 'L',
+*          the leading N-by-N lower triangular part of A contains
+*          the lower triangular part of the matrix A.
+*          On exit, if JOBZ = 'V', then if INFO = 0, A contains the
+*          orthonormal eigenvectors of the matrix A.
+*          If JOBZ = 'N', then on exit the lower triangle (if UPLO='L')
+*          or the upper triangle (if UPLO='U') of A, including the
+*          diagonal, is destroyed.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  W       (output) REAL array, dimension (N)
+*          If INFO = 0, the eigenvalues in ascending order.
+*
+*  WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The length of the array WORK.
+*          If N <= 1,                LWORK must be at least 1.
+*          If JOBZ  = 'N' and N > 1, LWORK must be at least N + 1.
+*          If JOBZ  = 'V' and N > 1, LWORK must be at least 2*N + N**2.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  RWORK   (workspace/output) REAL array,
+*                                         dimension (LRWORK)
+*          On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK.
+*
+*  LRWORK  (input) INTEGER
+*          The dimension of the array RWORK.
+*          If N <= 1,                LRWORK must be at least 1.
+*          If JOBZ  = 'N' and N > 1, LRWORK must be at least N.
+*          If JOBZ  = 'V' and N > 1, LRWORK must be at least
+*                         1 + 5*N + 2*N**2.
+*
+*          If LRWORK = -1, then a workspace query is assumed; the
+*          routine only calculates the optimal size of the RWORK array,
+*          returns this value as the first entry of the RWORK array, and
+*          no error message related to LRWORK is issued by XERBLA.
+*
+*  IWORK   (workspace/output) INTEGER array, dimension (LIWORK)
+*          On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK.
+*
+*  LIWORK  (input) INTEGER
+*          The dimension of the array IWORK.
+*          If N <= 1,                LIWORK must be at least 1.
+*          If JOBZ  = 'N' and N > 1, LIWORK must be at least 1.
+*          If JOBZ  = 'V' and N > 1, LIWORK must be at least 3 + 5*N.
+*
+*          If LIWORK = -1, then a workspace query is assumed; the
+*          routine only calculates the optimal size of the IWORK array,
+*          returns this value as the first entry of the IWORK array, and
+*          no error message related to LIWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, the algorithm failed to converge; i
+*                off-diagonal elements of an intermediate tridiagonal
+*                form did not converge to zero.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Jeff Rutter, Computer Science Division, University of California
+*     at Berkeley, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E0, ONE = 1.0E0 )
+      COMPLEX            CONE
+      PARAMETER          ( CONE = ( 1.0E0, 0.0E0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LOWER, LQUERY, WANTZ
+      INTEGER            IINFO, IMAX, INDE, INDRWK, INDTAU, INDWK2,
+     $                   INDWRK, ISCALE, LIOPT, LIWMIN, LLRWK, LLWORK,
+     $                   LLWRK2, LOPT, LROPT, LRWMIN, LWMIN
+      REAL               ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA,
+     $                   SMLNUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               CLANHE, SLAMCH
+      EXTERNAL           LSAME, CLANHE, SLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CHETRD, CLACPY, CLASCL, CSTEDC, CUNMTR, SSCAL,
+     $                   SSTERF, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          INT, MAX, REAL, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      WANTZ = LSAME( JOBZ, 'V' )
+      LOWER = LSAME( UPLO, 'L' )
+      LQUERY = ( LWORK.EQ.-1 .OR. LRWORK.EQ.-1 .OR. LIWORK.EQ.-1 )
+*
+      INFO = 0
+      IF( N.LE.1 ) THEN
+         LWMIN = 1
+         LRWMIN = 1
+         LIWMIN = 1
+         LOPT = LWMIN
+         LROPT = LRWMIN
+         LIOPT = LIWMIN
+      ELSE
+         IF( WANTZ ) THEN
+            LWMIN = 2*N + N*N
+            LRWMIN = 1 + 5*N + 2*N**2
+            LIWMIN = 3 + 5*N
+         ELSE
+            LWMIN = N + 1
+            LRWMIN = N
+            LIWMIN = 1
+         END IF
+         LOPT = LWMIN
+         LROPT = LRWMIN
+         LIOPT = LIWMIN
+      END IF
+      IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -8
+      ELSE IF( LRWORK.LT.LRWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -10
+      ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -12
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         WORK( 1 ) = LOPT
+         RWORK( 1 ) = LROPT
+         IWORK( 1 ) = LIOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CHEEVD', -INFO )
+         RETURN 
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      IF( N.EQ.1 ) THEN
+         W( 1 ) = A( 1, 1 )
+         IF( WANTZ )
+     $      A( 1, 1 ) = CONE
+         RETURN
+      END IF
+*
+*     Get machine constants.
+*
+      SAFMIN = SLAMCH( 'Safe minimum' )
+      EPS = SLAMCH( 'Precision' )
+      SMLNUM = SAFMIN / EPS
+      BIGNUM = ONE / SMLNUM
+      RMIN = SQRT( SMLNUM )
+      RMAX = SQRT( BIGNUM )
+*
+*     Scale matrix to allowable range, if necessary.
+*
+      ANRM = CLANHE( 'M', UPLO, N, A, LDA, RWORK )
+      ISCALE = 0
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN
+         ISCALE = 1
+         SIGMA = RMIN / ANRM
+      ELSE IF( ANRM.GT.RMAX ) THEN
+         ISCALE = 1
+         SIGMA = RMAX / ANRM
+      END IF
+      IF( ISCALE.EQ.1 )
+     $   CALL CLASCL( UPLO, 0, 0, ONE, SIGMA, N, N, A, LDA, INFO )
+*
+*     Call CHETRD to reduce Hermitian matrix to tridiagonal form.
+*
+      INDE = 1
+      INDTAU = 1
+      INDWRK = INDTAU + N
+      INDRWK = INDE + N
+      INDWK2 = INDWRK + N*N
+      LLWORK = LWORK - INDWRK + 1
+      LLWRK2 = LWORK - INDWK2 + 1
+      LLRWK = LRWORK - INDRWK + 1
+      CALL CHETRD( UPLO, N, A, LDA, W, RWORK( INDE ), WORK( INDTAU ),
+     $             WORK( INDWRK ), LLWORK, IINFO )
+      LOPT = MAX( REAL( LOPT ), REAL( N )+REAL( WORK( INDWRK ) ) )
+*
+*     For eigenvalues only, call SSTERF.  For eigenvectors, first call
+*     CSTEDC to generate the eigenvector matrix, WORK(INDWRK), of the
+*     tridiagonal matrix, then call CUNMTR to multiply it to the
+*     Householder transformations represented as Householder vectors in
+*     A.
+*
+      IF( .NOT.WANTZ ) THEN
+         CALL SSTERF( N, W, RWORK( INDE ), INFO )
+      ELSE
+         CALL CSTEDC( 'I', N, W, RWORK( INDE ), WORK( INDWRK ), N,
+     $                WORK( INDWK2 ), LLWRK2, RWORK( INDRWK ), LLRWK,
+     $                IWORK, LIWORK, INFO )
+         CALL CUNMTR( 'L', UPLO, 'N', N, N, A, LDA, WORK( INDTAU ),
+     $                WORK( INDWRK ), N, WORK( INDWK2 ), LLWRK2, IINFO )
+         CALL CLACPY( 'A', N, N, WORK( INDWRK ), N, A, LDA )
+         LOPT = MAX( LOPT, N+N**2+INT( WORK( INDWK2 ) ) )
+      END IF
+*
+*     If matrix was scaled, then rescale eigenvalues appropriately.
+*
+      IF( ISCALE.EQ.1 ) THEN
+         IF( INFO.EQ.0 ) THEN
+            IMAX = N
+         ELSE
+            IMAX = INFO - 1
+         END IF
+         CALL SSCAL( IMAX, ONE / SIGMA, W, 1 )
+      END IF
+*
+      WORK( 1 ) = LOPT
+      RWORK( 1 ) = LROPT
+      IWORK( 1 ) = LIOPT
+*
+      RETURN
+*
+*     End of CHEEVD
+*
+      END
diff --git a/costa/native/external/lapack/cheevr.f b/costa/native/external/lapack/cheevr.f
new file mode 100644
index 000000000..80475f624
--- /dev/null
+++ b/costa/native/external/lapack/cheevr.f
@@ -0,0 +1,522 @@
+      SUBROUTINE CHEEVR( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, IL, IU,
+     $                   ABSTOL, M, W, Z, LDZ, ISUPPZ, WORK, LWORK,
+     $                   RWORK, LRWORK, IWORK, LIWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 20, 2000
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBZ, RANGE, UPLO
+      INTEGER            IL, INFO, IU, LDA, LDZ, LIWORK, LRWORK, LWORK,
+     $                   M, N
+      REAL               ABSTOL, VL, VU
+*     ..
+*     .. Array Arguments ..
+      INTEGER            ISUPPZ( * ), IWORK( * )
+      REAL               RWORK( * ), W( * )
+      COMPLEX            A( LDA, * ), WORK( * ), Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CHEEVR computes selected eigenvalues and, optionally, eigenvectors
+*  of a complex Hermitian matrix T.  Eigenvalues and eigenvectors can
+*  be selected by specifying either a range of values or a range of
+*  indices for the desired eigenvalues.
+*
+*  Whenever possible, CHEEVR calls CSTEGR to compute the
+*  eigenspectrum using Relatively Robust Representations.  CSTEGR
+*  computes eigenvalues by the dqds algorithm, while orthogonal
+*  eigenvectors are computed from various "good" L D L^T representations
+*  (also known as Relatively Robust Representations). Gram-Schmidt
+*  orthogonalization is avoided as far as possible. More specifically,
+*  the various steps of the algorithm are as follows. For the i-th
+*  unreduced block of T,
+*     (a) Compute T - sigma_i = L_i D_i L_i^T, such that L_i D_i L_i^T
+*          is a relatively robust representation,
+*     (b) Compute the eigenvalues, lambda_j, of L_i D_i L_i^T to high
+*         relative accuracy by the dqds algorithm,
+*     (c) If there is a cluster of close eigenvalues, "choose" sigma_i
+*         close to the cluster, and go to step (a),
+*     (d) Given the approximate eigenvalue lambda_j of L_i D_i L_i^T,
+*         compute the corresponding eigenvector by forming a
+*         rank-revealing twisted factorization.
+*  The desired accuracy of the output can be specified by the input
+*  parameter ABSTOL.
+*
+*  For more details, see "A new O(n^2) algorithm for the symmetric
+*  tridiagonal eigenvalue/eigenvector problem", by Inderjit Dhillon,
+*  Computer Science Division Technical Report No. UCB//CSD-97-971,
+*  UC Berkeley, May 1997.
+*
+*
+*  Note 1 : CHEEVR calls CSTEGR when the full spectrum is requested
+*  on machines which conform to the ieee-754 floating point standard.
+*  CHEEVR calls SSTEBZ and CSTEIN on non-ieee machines and
+*  when partial spectrum requests are made.
+*
+*  Normal execution of CSTEGR may create NaNs and infinities and
+*  hence may abort due to a floating point exception in environments
+*  which do not handle NaNs and infinities in the ieee standard default
+*  manner.
+*
+*  Arguments
+*  =========
+*
+*  JOBZ    (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only;
+*          = 'V':  Compute eigenvalues and eigenvectors.
+*
+*  RANGE   (input) CHARACTER*1
+*          = 'A': all eigenvalues will be found.
+*          = 'V': all eigenvalues in the half-open interval (VL,VU]
+*                 will be found.
+*          = 'I': the IL-th through IU-th eigenvalues will be found.
+********** For RANGE = 'V' or 'I' and IU - IL < N - 1, SSTEBZ and
+********** CSTEIN are called
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA, N)
+*          On entry, the Hermitian matrix A.  If UPLO = 'U', the
+*          leading N-by-N upper triangular part of A contains the
+*          upper triangular part of the matrix A.  If UPLO = 'L',
+*          the leading N-by-N lower triangular part of A contains
+*          the lower triangular part of the matrix A.
+*          On exit, the lower triangle (if UPLO='L') or the upper
+*          triangle (if UPLO='U') of A, including the diagonal, is
+*          destroyed.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  VL      (input) REAL
+*  VU      (input) REAL
+*          If RANGE='V', the lower and upper bounds of the interval to
+*          be searched for eigenvalues. VL < VU.
+*          Not referenced if RANGE = 'A' or 'I'.
+*
+*  IL      (input) INTEGER
+*  IU      (input) INTEGER
+*          If RANGE='I', the indices (in ascending order) of the
+*          smallest and largest eigenvalues to be returned.
+*          1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0.
+*          Not referenced if RANGE = 'A' or 'V'.
+*
+*  ABSTOL  (input) REAL
+*          The absolute error tolerance for the eigenvalues.
+*          An approximate eigenvalue is accepted as converged
+*          when it is determined to lie in an interval [a,b]
+*          of width less than or equal to
+*
+*                  ABSTOL + EPS *   max( |a|,|b| ) ,
+*
+*          where EPS is the machine precision.  If ABSTOL is less than
+*          or equal to zero, then  EPS*|T|  will be used in its place,
+*          where |T| is the 1-norm of the tridiagonal matrix obtained
+*          by reducing A to tridiagonal form.
+*
+*          See "Computing Small Singular Values of Bidiagonal Matrices
+*          with Guaranteed High Relative Accuracy," by Demmel and
+*          Kahan, LAPACK Working Note #3.
+*
+*          If high relative accuracy is important, set ABSTOL to
+*          SLAMCH( 'Safe minimum' ).  Doing so will guarantee that
+*          eigenvalues are computed to high relative accuracy when
+*          possible in future releases.  The current code does not
+*          make any guarantees about high relative accuracy, but
+*          furutre releases will. See J. Barlow and J. Demmel,
+*          "Computing Accurate Eigensystems of Scaled Diagonally
+*          Dominant Matrices", LAPACK Working Note #7, for a discussion
+*          of which matrices define their eigenvalues to high relative
+*          accuracy.
+*
+*  M       (output) INTEGER
+*          The total number of eigenvalues found.  0 <= M <= N.
+*          If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1.
+*
+*  W       (output) REAL array, dimension (N)
+*          The first M elements contain the selected eigenvalues in
+*          ascending order.
+*
+*  Z       (output) COMPLEX array, dimension (LDZ, max(1,M))
+*          If JOBZ = 'V', then if INFO = 0, the first M columns of Z
+*          contain the orthonormal eigenvectors of the matrix A
+*          corresponding to the selected eigenvalues, with the i-th
+*          column of Z holding the eigenvector associated with W(i).
+*          If JOBZ = 'N', then Z is not referenced.
+*          Note: the user must ensure that at least max(1,M) columns are
+*          supplied in the array Z; if RANGE = 'V', the exact value of M
+*          is not known in advance and an upper bound must be used.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= 1, and if
+*          JOBZ = 'V', LDZ >= max(1,N).
+*
+*  ISUPPZ  (output) INTEGER ARRAY, dimension ( 2*max(1,M) )
+*          The support of the eigenvectors in Z, i.e., the indices
+*          indicating the nonzero elements in Z. The i-th eigenvector
+*          is nonzero only in elements ISUPPZ( 2*i-1 ) through
+*          ISUPPZ( 2*i ).
+********** Implemented only for RANGE = 'A' or 'I' and IU - IL = N - 1
+*
+*  WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The length of the array WORK.  LWORK >= max(1,2*N).
+*          For optimal efficiency, LWORK >= (NB+1)*N,
+*          where NB is the max of the blocksize for CHETRD and for
+*          CUNMTR as returned by ILAENV.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  RWORK   (workspace/output) REAL array, dimension (LRWORK)
+*          On exit, if INFO = 0, RWORK(1) returns the optimal
+*          (and minimal) LRWORK.
+*
+* LRWORK  (input) INTEGER
+*         The length of the array RWORK.  LRWORK >= max(1,24*N).
+*
+*         If LRWORK = -1, then a workspace query is assumed; the routine
+*         only calculates the optimal size of the RWORK array, returns
+*         this value as the first entry of the RWORK array, and no error
+*         message related to LRWORK is issued by XERBLA.
+*
+*  IWORK   (workspace/output) INTEGER array, dimension (LIWORK)
+*          On exit, if INFO = 0, IWORK(1) returns the optimal
+*          (and minimal) LIWORK.
+*
+* LIWORK  (input) INTEGER
+*         The dimension of the array IWORK.  LIWORK >= max(1,10*N).
+*
+*         If LIWORK = -1, then a workspace query is assumed; the
+*         routine only calculates the optimal size of the IWORK array,
+*         returns this value as the first entry of the IWORK array, and
+*         no error message related to LIWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  Internal error
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Inderjit Dhillon, IBM Almaden, USA
+*     Osni Marques, LBNL/NERSC, USA
+*     Ken Stanley, Computer Science Division, University of
+*       California at Berkeley, USA
+*
+* =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            ALLEIG, INDEIG, LOWER, LQUERY, VALEIG, WANTZ
+      CHARACTER          ORDER
+      INTEGER            I, IEEEOK, IINFO, IMAX, INDIBL, INDIFL, INDISP,
+     $                   INDIWO, INDRD, INDRDD, INDRE, INDREE, INDRWK,
+     $                   INDTAU, INDWK, INDWKN, ISCALE, ITMP1, J, JJ,
+     $                   LIWMIN, LLWORK, LLWRKN, LRWMIN, LWKOPT, LWMIN,
+     $                   NB, NSPLIT
+      REAL               ABSTLL, ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN,
+     $                   SIGMA, SMLNUM, TMP1, VLL, VUU
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      REAL               CLANSY, SLAMCH
+      EXTERNAL           LSAME, ILAENV, CLANSY, SLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CHETRD, CSSCAL, CSTEGR, CSTEIN, CSWAP, CUNMTR,
+     $                   SCOPY, SSCAL, SSTEBZ, SSTERF, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN, REAL, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      IEEEOK = ILAENV( 10, 'CHEEVR', 'N', 1, 2, 3, 4 )
+*
+      LOWER = LSAME( UPLO, 'L' )
+      WANTZ = LSAME( JOBZ, 'V' )
+      ALLEIG = LSAME( RANGE, 'A' )
+      VALEIG = LSAME( RANGE, 'V' )
+      INDEIG = LSAME( RANGE, 'I' )
+*
+      LQUERY = ( ( LWORK.EQ.-1 ) .OR. ( LRWORK.EQ.-1 ) .OR.
+     $         ( LIWORK.EQ.-1 ) )
+*
+      LRWMIN = MAX( 1, 24*N )
+      LIWMIN = MAX( 1, 10*N )
+      LWMIN = MAX( 1, 2*N )
+*
+      INFO = 0
+      IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -6
+      ELSE
+         IF( VALEIG ) THEN
+            IF( N.GT.0 .AND. VU.LE.VL )
+     $         INFO = -8
+         ELSE IF( INDEIG ) THEN
+            IF( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) THEN
+               INFO = -9
+            ELSE IF( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN
+               INFO = -10
+            END IF
+         END IF
+      END IF
+      IF( INFO.EQ.0 ) THEN
+         IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN
+            INFO = -15
+         ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN
+            INFO = -18
+         ELSE IF( LRWORK.LT.LRWMIN .AND. .NOT.LQUERY ) THEN
+            INFO = -20
+         ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN
+            INFO = -22
+         END IF
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         NB = ILAENV( 1, 'CHETRD', UPLO, N, -1, -1, -1 )
+         NB = MAX( NB, ILAENV( 1, 'CUNMTR', UPLO, N, -1, -1, -1 ) )
+         LWKOPT = MAX( ( NB+1 )*N, LWMIN )
+         WORK( 1 ) = LWKOPT
+         RWORK( 1 ) = LRWMIN
+         IWORK( 1 ) = LIWMIN
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CHEEVR', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      M = 0
+      IF( N.EQ.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      IF( N.EQ.1 ) THEN
+         WORK( 1 ) = 7
+         IF( ALLEIG .OR. INDEIG ) THEN
+            M = 1
+            W( 1 ) = REAL( A( 1, 1 ) )
+         ELSE
+            IF( VL.LT.REAL( A( 1, 1 ) ) .AND. VU.GE.REAL( A( 1, 1 ) ) )
+     $           THEN
+               M = 1
+               W( 1 ) = REAL( A( 1, 1 ) )
+            END IF
+         END IF
+         IF( WANTZ )
+     $      Z( 1, 1 ) = ONE
+         RETURN
+      END IF
+*
+*     Get machine constants.
+*
+      SAFMIN = SLAMCH( 'Safe minimum' )
+      EPS = SLAMCH( 'Precision' )
+      SMLNUM = SAFMIN / EPS
+      BIGNUM = ONE / SMLNUM
+      RMIN = SQRT( SMLNUM )
+      RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) )
+*
+*     Scale matrix to allowable range, if necessary.
+*
+      ISCALE = 0
+      ABSTLL = ABSTOL
+      VLL = VL
+      VUU = VU
+      ANRM = CLANSY( 'M', UPLO, N, A, LDA, RWORK )
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN
+         ISCALE = 1
+         SIGMA = RMIN / ANRM
+      ELSE IF( ANRM.GT.RMAX ) THEN
+         ISCALE = 1
+         SIGMA = RMAX / ANRM
+      END IF
+      IF( ISCALE.EQ.1 ) THEN
+         IF( LOWER ) THEN
+            DO 10 J = 1, N
+               CALL CSSCAL( N-J+1, SIGMA, A( J, J ), 1 )
+   10       CONTINUE
+         ELSE
+            DO 20 J = 1, N
+               CALL CSSCAL( J, SIGMA, A( 1, J ), 1 )
+   20       CONTINUE
+         END IF
+         IF( ABSTOL.GT.0 )
+     $      ABSTLL = ABSTOL*SIGMA
+         IF( VALEIG ) THEN
+            VLL = VL*SIGMA
+            VUU = VU*SIGMA
+         END IF
+      END IF
+*
+*     Call CHETRD to reduce Hermitian matrix to tridiagonal form.
+*
+      INDTAU = 1
+      INDWK = INDTAU + N
+*
+      INDRE = 1
+      INDRD = INDRE + N
+      INDREE = INDRD + N
+      INDRDD = INDREE + N
+      INDRWK = INDRDD + N
+      LLWORK = LWORK - INDWK + 1
+      CALL CHETRD( UPLO, N, A, LDA, RWORK( INDRD ), RWORK( INDRE ),
+     $             WORK( INDTAU ), WORK( INDWK ), LLWORK, IINFO )
+*
+*     If all eigenvalues are desired
+*     then call SSTERF or CSTEGR and CUNMTR.
+*
+      IF( ( ALLEIG .OR. ( INDEIG .AND. IL.EQ.1 .AND. IU.EQ.N ) ) .AND.
+     $    IEEEOK.EQ.1 ) THEN
+         IF( .NOT.WANTZ ) THEN
+            CALL SCOPY( N, RWORK( INDRD ), 1, W, 1 )
+            CALL SCOPY( N-1, RWORK( INDRE ), 1, RWORK( INDREE ), 1 )
+            CALL SSTERF( N, W, RWORK( INDREE ), INFO )
+         ELSE
+            CALL SCOPY( N-1, RWORK( INDRE ), 1, RWORK( INDREE ), 1 )
+            CALL SCOPY( N, RWORK( INDRD ), 1, RWORK( INDRDD ), 1 )
+*
+            CALL CSTEGR( JOBZ, 'A', N, RWORK( INDRDD ),
+     $                   RWORK( INDREE ), VL, VU, IL, IU, ABSTOL, M, W,
+     $                   Z, LDZ, ISUPPZ, RWORK( INDRWK ), LWORK, IWORK,
+     $                   LIWORK, INFO )
+*
+*
+*
+*        Apply unitary matrix used in reduction to tridiagonal
+*        form to eigenvectors returned by CSTEIN.
+*
+            IF( WANTZ .AND. INFO.EQ.0 ) THEN
+               INDWKN = INDWK
+               LLWRKN = LWORK - INDWKN + 1
+               CALL CUNMTR( 'L', UPLO, 'N', N, M, A, LDA,
+     $                      WORK( INDTAU ), Z, LDZ, WORK( INDWKN ),
+     $                      LLWRKN, IINFO )
+            END IF
+         END IF
+*
+*
+         IF( INFO.EQ.0 ) THEN
+            M = N
+            GO TO 30
+         END IF
+         INFO = 0
+      END IF
+*
+*     Otherwise, call SSTEBZ and, if eigenvectors are desired, CSTEIN.
+*     Also call SSTEBZ and CSTEIN if CSTEGR fails.
+*
+      IF( WANTZ ) THEN
+         ORDER = 'B'
+      ELSE
+         ORDER = 'E'
+      END IF
+      INDIFL = 1
+      INDIBL = INDIFL + N
+      INDISP = INDIBL + N
+      INDIWO = INDISP + N
+      CALL SSTEBZ( RANGE, ORDER, N, VLL, VUU, IL, IU, ABSTLL,
+     $             RWORK( INDRD ), RWORK( INDRE ), M, NSPLIT, W,
+     $             IWORK( INDIBL ), IWORK( INDISP ), RWORK( INDRWK ),
+     $             IWORK( INDIWO ), INFO )
+*
+      IF( WANTZ ) THEN
+         CALL CSTEIN( N, RWORK( INDRD ), RWORK( INDRE ), M, W,
+     $                IWORK( INDIBL ), IWORK( INDISP ), Z, LDZ,
+     $                RWORK( INDRWK ), IWORK( INDIWO ), IWORK( INDIFL ),
+     $                INFO )
+*
+*        Apply unitary matrix used in reduction to tridiagonal
+*        form to eigenvectors returned by CSTEIN.
+*
+         INDWKN = INDWK
+         LLWRKN = LWORK - INDWKN + 1
+         CALL CUNMTR( 'L', UPLO, 'N', N, M, A, LDA, WORK( INDTAU ), Z,
+     $                LDZ, WORK( INDWKN ), LLWRKN, IINFO )
+      END IF
+*
+*     If matrix was scaled, then rescale eigenvalues appropriately.
+*
+   30 CONTINUE
+      IF( ISCALE.EQ.1 ) THEN
+         IF( INFO.EQ.0 ) THEN
+            IMAX = M
+         ELSE
+            IMAX = INFO - 1
+         END IF
+         CALL SSCAL( IMAX, ONE / SIGMA, W, 1 )
+      END IF
+*
+*     If eigenvalues are not in order, then sort them, along with
+*     eigenvectors.
+*
+      IF( WANTZ ) THEN
+         DO 50 J = 1, M - 1
+            I = 0
+            TMP1 = W( J )
+            DO 40 JJ = J + 1, M
+               IF( W( JJ ).LT.TMP1 ) THEN
+                  I = JJ
+                  TMP1 = W( JJ )
+               END IF
+   40       CONTINUE
+*
+            IF( I.NE.0 ) THEN
+               ITMP1 = IWORK( INDIBL+I-1 )
+               W( I ) = W( J )
+               IWORK( INDIBL+I-1 ) = IWORK( INDIBL+J-1 )
+               W( J ) = TMP1
+               IWORK( INDIBL+J-1 ) = ITMP1
+               CALL CSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 )
+            END IF
+   50    CONTINUE
+      END IF
+*
+*     Set WORK(1) to optimal workspace size.
+*
+      WORK( 1 ) = LWKOPT
+      RWORK( 1 ) = LRWMIN
+      IWORK( 1 ) = LIWMIN
+*
+      RETURN
+*
+*     End of CHEEVR
+*
+      END
diff --git a/costa/native/external/lapack/cheevx.f b/costa/native/external/lapack/cheevx.f
new file mode 100644
index 000000000..f95133620
--- /dev/null
+++ b/costa/native/external/lapack/cheevx.f
@@ -0,0 +1,426 @@
+      SUBROUTINE CHEEVX( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, IL, IU,
+     $                   ABSTOL, M, W, Z, LDZ, WORK, LWORK, RWORK,
+     $                   IWORK, IFAIL, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBZ, RANGE, UPLO
+      INTEGER            IL, INFO, IU, LDA, LDZ, LWORK, M, N
+      REAL               ABSTOL, VL, VU
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IFAIL( * ), IWORK( * )
+      REAL               RWORK( * ), W( * )
+      COMPLEX            A( LDA, * ), WORK( * ), Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CHEEVX computes selected eigenvalues and, optionally, eigenvectors
+*  of a complex Hermitian matrix A.  Eigenvalues and eigenvectors can
+*  be selected by specifying either a range of values or a range of
+*  indices for the desired eigenvalues.
+*
+*  Arguments
+*  =========
+*
+*  JOBZ    (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only;
+*          = 'V':  Compute eigenvalues and eigenvectors.
+*
+*  RANGE   (input) CHARACTER*1
+*          = 'A': all eigenvalues will be found.
+*          = 'V': all eigenvalues in the half-open interval (VL,VU]
+*                 will be found.
+*          = 'I': the IL-th through IU-th eigenvalues will be found.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA, N)
+*          On entry, the Hermitian matrix A.  If UPLO = 'U', the
+*          leading N-by-N upper triangular part of A contains the
+*          upper triangular part of the matrix A.  If UPLO = 'L',
+*          the leading N-by-N lower triangular part of A contains
+*          the lower triangular part of the matrix A.
+*          On exit, the lower triangle (if UPLO='L') or the upper
+*          triangle (if UPLO='U') of A, including the diagonal, is
+*          destroyed.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  VL      (input) REAL
+*  VU      (input) REAL
+*          If RANGE='V', the lower and upper bounds of the interval to
+*          be searched for eigenvalues. VL < VU.
+*          Not referenced if RANGE = 'A' or 'I'.
+*
+*  IL      (input) INTEGER
+*  IU      (input) INTEGER
+*          If RANGE='I', the indices (in ascending order) of the
+*          smallest and largest eigenvalues to be returned.
+*          1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0.
+*          Not referenced if RANGE = 'A' or 'V'.
+*
+*  ABSTOL  (input) REAL
+*          The absolute error tolerance for the eigenvalues.
+*          An approximate eigenvalue is accepted as converged
+*          when it is determined to lie in an interval [a,b]
+*          of width less than or equal to
+*
+*                  ABSTOL + EPS *   max( |a|,|b| ) ,
+*
+*          where EPS is the machine precision.  If ABSTOL is less than
+*          or equal to zero, then  EPS*|T|  will be used in its place,
+*          where |T| is the 1-norm of the tridiagonal matrix obtained
+*          by reducing A to tridiagonal form.
+*
+*          Eigenvalues will be computed most accurately when ABSTOL is
+*          set to twice the underflow threshold 2*SLAMCH('S'), not zero.
+*          If this routine returns with INFO>0, indicating that some
+*          eigenvectors did not converge, try setting ABSTOL to
+*          2*SLAMCH('S').
+*
+*          See "Computing Small Singular Values of Bidiagonal Matrices
+*          with Guaranteed High Relative Accuracy," by Demmel and
+*          Kahan, LAPACK Working Note #3.
+*
+*  M       (output) INTEGER
+*          The total number of eigenvalues found.  0 <= M <= N.
+*          If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1.
+*
+*  W       (output) REAL array, dimension (N)
+*          On normal exit, the first M elements contain the selected
+*          eigenvalues in ascending order.
+*
+*  Z       (output) COMPLEX array, dimension (LDZ, max(1,M))
+*          If JOBZ = 'V', then if INFO = 0, the first M columns of Z
+*          contain the orthonormal eigenvectors of the matrix A
+*          corresponding to the selected eigenvalues, with the i-th
+*          column of Z holding the eigenvector associated with W(i).
+*          If an eigenvector fails to converge, then that column of Z
+*          contains the latest approximation to the eigenvector, and the
+*          index of the eigenvector is returned in IFAIL.
+*          If JOBZ = 'N', then Z is not referenced.
+*          Note: the user must ensure that at least max(1,M) columns are
+*          supplied in the array Z; if RANGE = 'V', the exact value of M
+*          is not known in advance and an upper bound must be used.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= 1, and if
+*          JOBZ = 'V', LDZ >= max(1,N).
+*
+*  WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The length of the array WORK.  LWORK >= max(1,2*N-1).
+*          For optimal efficiency, LWORK >= (NB+1)*N,
+*          where NB is the max of the blocksize for CHETRD and for
+*          CUNMTR as returned by ILAENV.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  RWORK   (workspace) REAL array, dimension (7*N)
+*
+*  IWORK   (workspace) INTEGER array, dimension (5*N)
+*
+*  IFAIL   (output) INTEGER array, dimension (N)
+*          If JOBZ = 'V', then if INFO = 0, the first M elements of
+*          IFAIL are zero.  If INFO > 0, then IFAIL contains the
+*          indices of the eigenvectors that failed to converge.
+*          If JOBZ = 'N', then IFAIL is not referenced.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, then i eigenvectors failed to converge.
+*                Their indices are stored in array IFAIL.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+      COMPLEX            CONE
+      PARAMETER          ( CONE = ( 1.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            ALLEIG, INDEIG, LOWER, LQUERY, VALEIG, WANTZ
+      CHARACTER          ORDER
+      INTEGER            I, IINFO, IMAX, INDD, INDE, INDEE, INDIBL,
+     $                   INDISP, INDIWK, INDRWK, INDTAU, INDWRK, ISCALE,
+     $                   ITMP1, J, JJ, LLWORK, LOPT, LWKOPT, NB, NSPLIT
+      REAL               ABSTLL, ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN,
+     $                   SIGMA, SMLNUM, TMP1, VLL, VUU
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      REAL               CLANHE, SLAMCH
+      EXTERNAL           LSAME, ILAENV, CLANHE, SLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CHETRD, CLACPY, CSSCAL, CSTEIN, CSTEQR, CSWAP,
+     $                   CUNGTR, CUNMTR, SCOPY, SSCAL, SSTEBZ, SSTERF,
+     $                   XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN, REAL, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      LOWER = LSAME( UPLO, 'L' )
+      WANTZ = LSAME( JOBZ, 'V' )
+      ALLEIG = LSAME( RANGE, 'A' )
+      VALEIG = LSAME( RANGE, 'V' )
+      INDEIG = LSAME( RANGE, 'I' )
+      LQUERY = ( LWORK.EQ.-1 )
+*
+      INFO = 0
+      IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -6
+      ELSE
+         IF( VALEIG ) THEN
+            IF( N.GT.0 .AND. VU.LE.VL )
+     $         INFO = -8
+         ELSE IF( INDEIG ) THEN
+            IF( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) THEN
+               INFO = -9
+            ELSE IF( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN
+               INFO = -10
+            END IF
+         END IF
+      END IF
+      IF( INFO.EQ.0 ) THEN
+         IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN
+            INFO = -15
+         ELSE IF( LWORK.LT.MAX( 1, 2*N-1 ) .AND. .NOT.LQUERY ) THEN
+            INFO = -17
+         END IF
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         NB = ILAENV( 1, 'CHETRD', UPLO, N, -1, -1, -1 )
+         NB = MAX( NB, ILAENV( 1, 'CUNMTR', UPLO, N, -1, -1, -1 ) )
+         LWKOPT = ( NB+1 )*N
+         WORK( 1 ) = LWKOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CHEEVX', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      M = 0
+      IF( N.EQ.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      IF( N.EQ.1 ) THEN
+         WORK( 1 ) = 1
+         IF( ALLEIG .OR. INDEIG ) THEN
+            M = 1
+            W( 1 ) = A( 1, 1 )
+         ELSE IF( VALEIG ) THEN
+            IF( VL.LT.REAL( A( 1, 1 ) ) .AND. VU.GE.REAL( A( 1, 1 ) ) )
+     $           THEN
+               M = 1
+               W( 1 ) = A( 1, 1 )
+            END IF
+         END IF
+         IF( WANTZ )
+     $      Z( 1, 1 ) = CONE
+         RETURN
+      END IF
+*
+*     Get machine constants.
+*
+      SAFMIN = SLAMCH( 'Safe minimum' )
+      EPS = SLAMCH( 'Precision' )
+      SMLNUM = SAFMIN / EPS
+      BIGNUM = ONE / SMLNUM
+      RMIN = SQRT( SMLNUM )
+      RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) )
+*
+*     Scale matrix to allowable range, if necessary.
+*
+      ISCALE = 0
+      ABSTLL = ABSTOL
+      VLL = VL
+      VUU = VU
+      ANRM = CLANHE( 'M', UPLO, N, A, LDA, RWORK )
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN
+         ISCALE = 1
+         SIGMA = RMIN / ANRM
+      ELSE IF( ANRM.GT.RMAX ) THEN
+         ISCALE = 1
+         SIGMA = RMAX / ANRM
+      END IF
+      IF( ISCALE.EQ.1 ) THEN
+         IF( LOWER ) THEN
+            DO 10 J = 1, N
+               CALL CSSCAL( N-J+1, SIGMA, A( J, J ), 1 )
+   10       CONTINUE
+         ELSE
+            DO 20 J = 1, N
+               CALL CSSCAL( J, SIGMA, A( 1, J ), 1 )
+   20       CONTINUE
+         END IF
+         IF( ABSTOL.GT.0 )
+     $      ABSTLL = ABSTOL*SIGMA
+         IF( VALEIG ) THEN
+            VLL = VL*SIGMA
+            VUU = VU*SIGMA
+         END IF
+      END IF
+*
+*     Call CHETRD to reduce Hermitian matrix to tridiagonal form.
+*
+      INDD = 1
+      INDE = INDD + N
+      INDRWK = INDE + N
+      INDTAU = 1
+      INDWRK = INDTAU + N
+      LLWORK = LWORK - INDWRK + 1
+      CALL CHETRD( UPLO, N, A, LDA, RWORK( INDD ), RWORK( INDE ),
+     $             WORK( INDTAU ), WORK( INDWRK ), LLWORK, IINFO )
+      LOPT = N + WORK( INDWRK )
+*
+*     If all eigenvalues are desired and ABSTOL is less than or equal to
+*     zero, then call SSTERF or CUNGTR and CSTEQR.  If this fails for
+*     some eigenvalue, then try SSTEBZ.
+*
+      IF( ( ALLEIG .OR. ( INDEIG .AND. IL.EQ.1 .AND. IU.EQ.N ) ) .AND.
+     $    ( ABSTOL.LE.ZERO ) ) THEN
+         CALL SCOPY( N, RWORK( INDD ), 1, W, 1 )
+         INDEE = INDRWK + 2*N
+         IF( .NOT.WANTZ ) THEN
+            CALL SCOPY( N-1, RWORK( INDE ), 1, RWORK( INDEE ), 1 )
+            CALL SSTERF( N, W, RWORK( INDEE ), INFO )
+         ELSE
+            CALL CLACPY( 'A', N, N, A, LDA, Z, LDZ )
+            CALL CUNGTR( UPLO, N, Z, LDZ, WORK( INDTAU ),
+     $                   WORK( INDWRK ), LLWORK, IINFO )
+            CALL SCOPY( N-1, RWORK( INDE ), 1, RWORK( INDEE ), 1 )
+            CALL CSTEQR( JOBZ, N, W, RWORK( INDEE ), Z, LDZ,
+     $                   RWORK( INDRWK ), INFO )
+            IF( INFO.EQ.0 ) THEN
+               DO 30 I = 1, N
+                  IFAIL( I ) = 0
+   30          CONTINUE
+            END IF
+         END IF
+         IF( INFO.EQ.0 ) THEN
+            M = N
+            GO TO 40
+         END IF
+         INFO = 0
+      END IF
+*
+*     Otherwise, call SSTEBZ and, if eigenvectors are desired, CSTEIN.
+*
+      IF( WANTZ ) THEN
+         ORDER = 'B'
+      ELSE
+         ORDER = 'E'
+      END IF
+      INDIBL = 1
+      INDISP = INDIBL + N
+      INDIWK = INDISP + N
+      CALL SSTEBZ( RANGE, ORDER, N, VLL, VUU, IL, IU, ABSTLL,
+     $             RWORK( INDD ), RWORK( INDE ), M, NSPLIT, W,
+     $             IWORK( INDIBL ), IWORK( INDISP ), RWORK( INDRWK ),
+     $             IWORK( INDIWK ), INFO )
+*
+      IF( WANTZ ) THEN
+         CALL CSTEIN( N, RWORK( INDD ), RWORK( INDE ), M, W,
+     $                IWORK( INDIBL ), IWORK( INDISP ), Z, LDZ,
+     $                RWORK( INDRWK ), IWORK( INDIWK ), IFAIL, INFO )
+*
+*        Apply unitary matrix used in reduction to tridiagonal
+*        form to eigenvectors returned by CSTEIN.
+*
+         CALL CUNMTR( 'L', UPLO, 'N', N, M, A, LDA, WORK( INDTAU ), Z,
+     $                LDZ, WORK( INDWRK ), LLWORK, IINFO )
+      END IF
+*
+*     If matrix was scaled, then rescale eigenvalues appropriately.
+*
+   40 CONTINUE
+      IF( ISCALE.EQ.1 ) THEN
+         IF( INFO.EQ.0 ) THEN
+            IMAX = M
+         ELSE
+            IMAX = INFO - 1
+         END IF
+         CALL SSCAL( IMAX, ONE / SIGMA, W, 1 )
+      END IF
+*
+*     If eigenvalues are not in order, then sort them, along with
+*     eigenvectors.
+*
+      IF( WANTZ ) THEN
+         DO 60 J = 1, M - 1
+            I = 0
+            TMP1 = W( J )
+            DO 50 JJ = J + 1, M
+               IF( W( JJ ).LT.TMP1 ) THEN
+                  I = JJ
+                  TMP1 = W( JJ )
+               END IF
+   50       CONTINUE
+*
+            IF( I.NE.0 ) THEN
+               ITMP1 = IWORK( INDIBL+I-1 )
+               W( I ) = W( J )
+               IWORK( INDIBL+I-1 ) = IWORK( INDIBL+J-1 )
+               W( J ) = TMP1
+               IWORK( INDIBL+J-1 ) = ITMP1
+               CALL CSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 )
+               IF( INFO.NE.0 ) THEN
+                  ITMP1 = IFAIL( I )
+                  IFAIL( I ) = IFAIL( J )
+                  IFAIL( J ) = ITMP1
+               END IF
+            END IF
+   60    CONTINUE
+      END IF
+*
+*     Set WORK(1) to optimal complex workspace size.
+*
+      WORK( 1 ) = LWKOPT
+*
+      RETURN
+*
+*     End of CHEEVX
+*
+      END
diff --git a/costa/native/external/lapack/chegs2.f b/costa/native/external/lapack/chegs2.f
new file mode 100644
index 000000000..4e887a330
--- /dev/null
+++ b/costa/native/external/lapack/chegs2.f
@@ -0,0 +1,225 @@
+      SUBROUTINE CHEGS2( ITYPE, UPLO, N, A, LDA, B, LDB, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, ITYPE, LDA, LDB, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CHEGS2 reduces a complex Hermitian-definite generalized
+*  eigenproblem to standard form.
+*
+*  If ITYPE = 1, the problem is A*x = lambda*B*x,
+*  and A is overwritten by inv(U')*A*inv(U) or inv(L)*A*inv(L')
+*
+*  If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or
+*  B*A*x = lambda*x, and A is overwritten by U*A*U` or L'*A*L.
+*
+*  B must have been previously factorized as U'*U or L*L' by CPOTRF.
+*
+*  Arguments
+*  =========
+*
+*  ITYPE   (input) INTEGER
+*          = 1: compute inv(U')*A*inv(U) or inv(L)*A*inv(L');
+*          = 2 or 3: compute U*A*U' or L'*A*L.
+*
+*  UPLO    (input) CHARACTER
+*          Specifies whether the upper or lower triangular part of the
+*          Hermitian matrix A is stored, and how B has been factorized.
+*          = 'U':  Upper triangular
+*          = 'L':  Lower triangular
+*
+*  N       (input) INTEGER
+*          The order of the matrices A and B.  N >= 0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,N)
+*          On entry, the Hermitian matrix A.  If UPLO = 'U', the leading
+*          n by n upper triangular part of A contains the upper
+*          triangular part of the matrix A, and the strictly lower
+*          triangular part of A is not referenced.  If UPLO = 'L', the
+*          leading n by n lower triangular part of A contains the lower
+*          triangular part of the matrix A, and the strictly upper
+*          triangular part of A is not referenced.
+*
+*          On exit, if INFO = 0, the transformed matrix, stored in the
+*          same format as A.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  B       (input) COMPLEX array, dimension (LDB,N)
+*          The triangular factor from the Cholesky factorization of B,
+*          as returned by CPOTRF.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, HALF
+      PARAMETER          ( ONE = 1.0E+0, HALF = 0.5E+0 )
+      COMPLEX            CONE
+      PARAMETER          ( CONE = ( 1.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            K
+      REAL               AKK, BKK
+      COMPLEX            CT
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CAXPY, CHER2, CLACGV, CSSCAL, CTRMV, CTRSV,
+     $                   XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( ITYPE.LT.1 .OR. ITYPE.GT.3 ) THEN
+         INFO = -1
+      ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CHEGS2', -INFO )
+         RETURN
+      END IF
+*
+      IF( ITYPE.EQ.1 ) THEN
+         IF( UPPER ) THEN
+*
+*           Compute inv(U')*A*inv(U)
+*
+            DO 10 K = 1, N
+*
+*              Update the upper triangle of A(k:n,k:n)
+*
+               AKK = A( K, K )
+               BKK = B( K, K )
+               AKK = AKK / BKK**2
+               A( K, K ) = AKK
+               IF( K.LT.N ) THEN
+                  CALL CSSCAL( N-K, ONE / BKK, A( K, K+1 ), LDA )
+                  CT = -HALF*AKK
+                  CALL CLACGV( N-K, A( K, K+1 ), LDA )
+                  CALL CLACGV( N-K, B( K, K+1 ), LDB )
+                  CALL CAXPY( N-K, CT, B( K, K+1 ), LDB, A( K, K+1 ),
+     $                        LDA )
+                  CALL CHER2( UPLO, N-K, -CONE, A( K, K+1 ), LDA,
+     $                        B( K, K+1 ), LDB, A( K+1, K+1 ), LDA )
+                  CALL CAXPY( N-K, CT, B( K, K+1 ), LDB, A( K, K+1 ),
+     $                        LDA )
+                  CALL CLACGV( N-K, B( K, K+1 ), LDB )
+                  CALL CTRSV( UPLO, 'Conjugate transpose', 'Non-unit',
+     $                        N-K, B( K+1, K+1 ), LDB, A( K, K+1 ),
+     $                        LDA )
+                  CALL CLACGV( N-K, A( K, K+1 ), LDA )
+               END IF
+   10       CONTINUE
+         ELSE
+*
+*           Compute inv(L)*A*inv(L')
+*
+            DO 20 K = 1, N
+*
+*              Update the lower triangle of A(k:n,k:n)
+*
+               AKK = A( K, K )
+               BKK = B( K, K )
+               AKK = AKK / BKK**2
+               A( K, K ) = AKK
+               IF( K.LT.N ) THEN
+                  CALL CSSCAL( N-K, ONE / BKK, A( K+1, K ), 1 )
+                  CT = -HALF*AKK
+                  CALL CAXPY( N-K, CT, B( K+1, K ), 1, A( K+1, K ), 1 )
+                  CALL CHER2( UPLO, N-K, -CONE, A( K+1, K ), 1,
+     $                        B( K+1, K ), 1, A( K+1, K+1 ), LDA )
+                  CALL CAXPY( N-K, CT, B( K+1, K ), 1, A( K+1, K ), 1 )
+                  CALL CTRSV( UPLO, 'No transpose', 'Non-unit', N-K,
+     $                        B( K+1, K+1 ), LDB, A( K+1, K ), 1 )
+               END IF
+   20       CONTINUE
+         END IF
+      ELSE
+         IF( UPPER ) THEN
+*
+*           Compute U*A*U'
+*
+            DO 30 K = 1, N
+*
+*              Update the upper triangle of A(1:k,1:k)
+*
+               AKK = A( K, K )
+               BKK = B( K, K )
+               CALL CTRMV( UPLO, 'No transpose', 'Non-unit', K-1, B,
+     $                     LDB, A( 1, K ), 1 )
+               CT = HALF*AKK
+               CALL CAXPY( K-1, CT, B( 1, K ), 1, A( 1, K ), 1 )
+               CALL CHER2( UPLO, K-1, CONE, A( 1, K ), 1, B( 1, K ), 1,
+     $                     A, LDA )
+               CALL CAXPY( K-1, CT, B( 1, K ), 1, A( 1, K ), 1 )
+               CALL CSSCAL( K-1, BKK, A( 1, K ), 1 )
+               A( K, K ) = AKK*BKK**2
+   30       CONTINUE
+         ELSE
+*
+*           Compute L'*A*L
+*
+            DO 40 K = 1, N
+*
+*              Update the lower triangle of A(1:k,1:k)
+*
+               AKK = A( K, K )
+               BKK = B( K, K )
+               CALL CLACGV( K-1, A( K, 1 ), LDA )
+               CALL CTRMV( UPLO, 'Conjugate transpose', 'Non-unit', K-1,
+     $                     B, LDB, A( K, 1 ), LDA )
+               CT = HALF*AKK
+               CALL CLACGV( K-1, B( K, 1 ), LDB )
+               CALL CAXPY( K-1, CT, B( K, 1 ), LDB, A( K, 1 ), LDA )
+               CALL CHER2( UPLO, K-1, CONE, A( K, 1 ), LDA, B( K, 1 ),
+     $                     LDB, A, LDA )
+               CALL CAXPY( K-1, CT, B( K, 1 ), LDB, A( K, 1 ), LDA )
+               CALL CLACGV( K-1, B( K, 1 ), LDB )
+               CALL CSSCAL( K-1, BKK, A( K, 1 ), LDA )
+               CALL CLACGV( K-1, A( K, 1 ), LDA )
+               A( K, K ) = AKK*BKK**2
+   40       CONTINUE
+         END IF
+      END IF
+      RETURN
+*
+*     End of CHEGS2
+*
+      END
diff --git a/costa/native/external/lapack/chegst.f b/costa/native/external/lapack/chegst.f
new file mode 100644
index 000000000..0296054a1
--- /dev/null
+++ b/costa/native/external/lapack/chegst.f
@@ -0,0 +1,260 @@
+      SUBROUTINE CHEGST( ITYPE, UPLO, N, A, LDA, B, LDB, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, ITYPE, LDA, LDB, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CHEGST reduces a complex Hermitian-definite generalized
+*  eigenproblem to standard form.
+*
+*  If ITYPE = 1, the problem is A*x = lambda*B*x,
+*  and A is overwritten by inv(U**H)*A*inv(U) or inv(L)*A*inv(L**H)
+*
+*  If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or
+*  B*A*x = lambda*x, and A is overwritten by U*A*U**H or L**H*A*L.
+*
+*  B must have been previously factorized as U**H*U or L*L**H by CPOTRF.
+*
+*  Arguments
+*  =========
+*
+*  ITYPE   (input) INTEGER
+*          = 1: compute inv(U**H)*A*inv(U) or inv(L)*A*inv(L**H);
+*          = 2 or 3: compute U*A*U**H or L**H*A*L.
+*
+*  UPLO    (input) CHARACTER
+*          = 'U':  Upper triangle of A is stored and B is factored as
+*                  U**H*U;
+*          = 'L':  Lower triangle of A is stored and B is factored as
+*                  L*L**H.
+*
+*  N       (input) INTEGER
+*          The order of the matrices A and B.  N >= 0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,N)
+*          On entry, the Hermitian matrix A.  If UPLO = 'U', the leading
+*          N-by-N upper triangular part of A contains the upper
+*          triangular part of the matrix A, and the strictly lower
+*          triangular part of A is not referenced.  If UPLO = 'L', the
+*          leading N-by-N lower triangular part of A contains the lower
+*          triangular part of the matrix A, and the strictly upper
+*          triangular part of A is not referenced.
+*
+*          On exit, if INFO = 0, the transformed matrix, stored in the
+*          same format as A.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  B       (input) COMPLEX array, dimension (LDB,N)
+*          The triangular factor from the Cholesky factorization of B,
+*          as returned by CPOTRF.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE
+      PARAMETER          ( ONE = 1.0E+0 )
+      COMPLEX            CONE, HALF
+      PARAMETER          ( CONE = ( 1.0E+0, 0.0E+0 ),
+     $                   HALF = ( 0.5E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            K, KB, NB
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CHEGS2, CHEMM, CHER2K, CTRMM, CTRSM, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           LSAME, ILAENV
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( ITYPE.LT.1 .OR. ITYPE.GT.3 ) THEN
+         INFO = -1
+      ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CHEGST', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Determine the block size for this environment.
+*
+      NB = ILAENV( 1, 'CHEGST', UPLO, N, -1, -1, -1 )
+*
+      IF( NB.LE.1 .OR. NB.GE.N ) THEN
+*
+*        Use unblocked code
+*
+         CALL CHEGS2( ITYPE, UPLO, N, A, LDA, B, LDB, INFO )
+      ELSE
+*
+*        Use blocked code
+*
+         IF( ITYPE.EQ.1 ) THEN
+            IF( UPPER ) THEN
+*
+*              Compute inv(U')*A*inv(U)
+*
+               DO 10 K = 1, N, NB
+                  KB = MIN( N-K+1, NB )
+*
+*                 Update the upper triangle of A(k:n,k:n)
+*
+                  CALL CHEGS2( ITYPE, UPLO, KB, A( K, K ), LDA,
+     $                         B( K, K ), LDB, INFO )
+                  IF( K+KB.LE.N ) THEN
+                     CALL CTRSM( 'Left', UPLO, 'Conjugate transpose',
+     $                           'Non-unit', KB, N-K-KB+1, CONE,
+     $                           B( K, K ), LDB, A( K, K+KB ), LDA )
+                     CALL CHEMM( 'Left', UPLO, KB, N-K-KB+1, -HALF,
+     $                           A( K, K ), LDA, B( K, K+KB ), LDB,
+     $                           CONE, A( K, K+KB ), LDA )
+                     CALL CHER2K( UPLO, 'Conjugate transpose', N-K-KB+1,
+     $                            KB, -CONE, A( K, K+KB ), LDA,
+     $                            B( K, K+KB ), LDB, ONE,
+     $                            A( K+KB, K+KB ), LDA )
+                     CALL CHEMM( 'Left', UPLO, KB, N-K-KB+1, -HALF,
+     $                           A( K, K ), LDA, B( K, K+KB ), LDB,
+     $                           CONE, A( K, K+KB ), LDA )
+                     CALL CTRSM( 'Right', UPLO, 'No transpose',
+     $                           'Non-unit', KB, N-K-KB+1, CONE,
+     $                           B( K+KB, K+KB ), LDB, A( K, K+KB ),
+     $                           LDA )
+                  END IF
+   10          CONTINUE
+            ELSE
+*
+*              Compute inv(L)*A*inv(L')
+*
+               DO 20 K = 1, N, NB
+                  KB = MIN( N-K+1, NB )
+*
+*                 Update the lower triangle of A(k:n,k:n)
+*
+                  CALL CHEGS2( ITYPE, UPLO, KB, A( K, K ), LDA,
+     $                         B( K, K ), LDB, INFO )
+                  IF( K+KB.LE.N ) THEN
+                     CALL CTRSM( 'Right', UPLO, 'Conjugate transpose',
+     $                           'Non-unit', N-K-KB+1, KB, CONE,
+     $                           B( K, K ), LDB, A( K+KB, K ), LDA )
+                     CALL CHEMM( 'Right', UPLO, N-K-KB+1, KB, -HALF,
+     $                           A( K, K ), LDA, B( K+KB, K ), LDB,
+     $                           CONE, A( K+KB, K ), LDA )
+                     CALL CHER2K( UPLO, 'No transpose', N-K-KB+1, KB,
+     $                            -CONE, A( K+KB, K ), LDA,
+     $                            B( K+KB, K ), LDB, ONE,
+     $                            A( K+KB, K+KB ), LDA )
+                     CALL CHEMM( 'Right', UPLO, N-K-KB+1, KB, -HALF,
+     $                           A( K, K ), LDA, B( K+KB, K ), LDB,
+     $                           CONE, A( K+KB, K ), LDA )
+                     CALL CTRSM( 'Left', UPLO, 'No transpose',
+     $                           'Non-unit', N-K-KB+1, KB, CONE,
+     $                           B( K+KB, K+KB ), LDB, A( K+KB, K ),
+     $                           LDA )
+                  END IF
+   20          CONTINUE
+            END IF
+         ELSE
+            IF( UPPER ) THEN
+*
+*              Compute U*A*U'
+*
+               DO 30 K = 1, N, NB
+                  KB = MIN( N-K+1, NB )
+*
+*                 Update the upper triangle of A(1:k+kb-1,1:k+kb-1)
+*
+                  CALL CTRMM( 'Left', UPLO, 'No transpose', 'Non-unit',
+     $                        K-1, KB, CONE, B, LDB, A( 1, K ), LDA )
+                  CALL CHEMM( 'Right', UPLO, K-1, KB, HALF, A( K, K ),
+     $                        LDA, B( 1, K ), LDB, CONE, A( 1, K ),
+     $                        LDA )
+                  CALL CHER2K( UPLO, 'No transpose', K-1, KB, CONE,
+     $                         A( 1, K ), LDA, B( 1, K ), LDB, ONE, A,
+     $                         LDA )
+                  CALL CHEMM( 'Right', UPLO, K-1, KB, HALF, A( K, K ),
+     $                        LDA, B( 1, K ), LDB, CONE, A( 1, K ),
+     $                        LDA )
+                  CALL CTRMM( 'Right', UPLO, 'Conjugate transpose',
+     $                        'Non-unit', K-1, KB, CONE, B( K, K ), LDB,
+     $                        A( 1, K ), LDA )
+                  CALL CHEGS2( ITYPE, UPLO, KB, A( K, K ), LDA,
+     $                         B( K, K ), LDB, INFO )
+   30          CONTINUE
+            ELSE
+*
+*              Compute L'*A*L
+*
+               DO 40 K = 1, N, NB
+                  KB = MIN( N-K+1, NB )
+*
+*                 Update the lower triangle of A(1:k+kb-1,1:k+kb-1)
+*
+                  CALL CTRMM( 'Right', UPLO, 'No transpose', 'Non-unit',
+     $                        KB, K-1, CONE, B, LDB, A( K, 1 ), LDA )
+                  CALL CHEMM( 'Left', UPLO, KB, K-1, HALF, A( K, K ),
+     $                        LDA, B( K, 1 ), LDB, CONE, A( K, 1 ),
+     $                        LDA )
+                  CALL CHER2K( UPLO, 'Conjugate transpose', K-1, KB,
+     $                         CONE, A( K, 1 ), LDA, B( K, 1 ), LDB,
+     $                         ONE, A, LDA )
+                  CALL CHEMM( 'Left', UPLO, KB, K-1, HALF, A( K, K ),
+     $                        LDA, B( K, 1 ), LDB, CONE, A( K, 1 ),
+     $                        LDA )
+                  CALL CTRMM( 'Left', UPLO, 'Conjugate transpose',
+     $                        'Non-unit', KB, K-1, CONE, B( K, K ), LDB,
+     $                        A( K, 1 ), LDA )
+                  CALL CHEGS2( ITYPE, UPLO, KB, A( K, K ), LDA,
+     $                         B( K, K ), LDB, INFO )
+   40          CONTINUE
+            END IF
+         END IF
+      END IF
+      RETURN
+*
+*     End of CHEGST
+*
+      END
diff --git a/costa/native/external/lapack/chegv.f b/costa/native/external/lapack/chegv.f
new file mode 100644
index 000000000..2aa038b50
--- /dev/null
+++ b/costa/native/external/lapack/chegv.f
@@ -0,0 +1,229 @@
+      SUBROUTINE CHEGV( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, WORK,
+     $                  LWORK, RWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBZ, UPLO
+      INTEGER            INFO, ITYPE, LDA, LDB, LWORK, N
+*     ..
+*     .. Array Arguments ..
+      REAL               RWORK( * ), W( * )
+      COMPLEX            A( LDA, * ), B( LDB, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CHEGV computes all the eigenvalues, and optionally, the eigenvectors
+*  of a complex generalized Hermitian-definite eigenproblem, of the form
+*  A*x=(lambda)*B*x,  A*Bx=(lambda)*x,  or B*A*x=(lambda)*x.
+*  Here A and B are assumed to be Hermitian and B is also
+*  positive definite.
+*
+*  Arguments
+*  =========
+*
+*  ITYPE   (input) INTEGER
+*          Specifies the problem type to be solved:
+*          = 1:  A*x = (lambda)*B*x
+*          = 2:  A*B*x = (lambda)*x
+*          = 3:  B*A*x = (lambda)*x
+*
+*  JOBZ    (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only;
+*          = 'V':  Compute eigenvalues and eigenvectors.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangles of A and B are stored;
+*          = 'L':  Lower triangles of A and B are stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrices A and B.  N >= 0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA, N)
+*          On entry, the Hermitian matrix A.  If UPLO = 'U', the
+*          leading N-by-N upper triangular part of A contains the
+*          upper triangular part of the matrix A.  If UPLO = 'L',
+*          the leading N-by-N lower triangular part of A contains
+*          the lower triangular part of the matrix A.
+*
+*          On exit, if JOBZ = 'V', then if INFO = 0, A contains the
+*          matrix Z of eigenvectors.  The eigenvectors are normalized
+*          as follows:
+*          if ITYPE = 1 or 2, Z**H*B*Z = I;
+*          if ITYPE = 3, Z**H*inv(B)*Z = I.
+*          If JOBZ = 'N', then on exit the upper triangle (if UPLO='U')
+*          or the lower triangle (if UPLO='L') of A, including the
+*          diagonal, is destroyed.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  B       (input/output) COMPLEX array, dimension (LDB, N)
+*          On entry, the Hermitian positive definite matrix B.
+*          If UPLO = 'U', the leading N-by-N upper triangular part of B
+*          contains the upper triangular part of the matrix B.
+*          If UPLO = 'L', the leading N-by-N lower triangular part of B
+*          contains the lower triangular part of the matrix B.
+*
+*          On exit, if INFO <= N, the part of B containing the matrix is
+*          overwritten by the triangular factor U or L from the Cholesky
+*          factorization B = U**H*U or B = L*L**H.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  W       (output) REAL array, dimension (N)
+*          If INFO = 0, the eigenvalues in ascending order.
+*
+*  WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The length of the array WORK.  LWORK >= max(1,2*N-1).
+*          For optimal efficiency, LWORK >= (NB+1)*N,
+*          where NB is the blocksize for CHETRD returned by ILAENV.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  RWORK   (workspace) REAL array, dimension (max(1, 3*N-2))
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  CPOTRF or CHEEV returned an error code:
+*             <= N:  if INFO = i, CHEEV failed to converge;
+*                    i off-diagonal elements of an intermediate
+*                    tridiagonal form did not converge to zero;
+*             > N:   if INFO = N + i, for 1 <= i <= N, then the leading
+*                    minor of order i of B is not positive definite.
+*                    The factorization of B could not be completed and
+*                    no eigenvalues or eigenvectors were computed.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX            ONE
+      PARAMETER          ( ONE = ( 1.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY, UPPER, WANTZ
+      CHARACTER          TRANS
+      INTEGER            LWKOPT, NB, NEIG
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV, LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CHEEV, CHEGST, CPOTRF, CTRMM, CTRSM, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      WANTZ = LSAME( JOBZ, 'V' )
+      UPPER = LSAME( UPLO, 'U' )
+      LQUERY = ( LWORK.EQ. -1 )
+*
+      INFO = 0
+      IF( ITYPE.LT.1 .OR. ITYPE.GT.3 ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -6
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -8
+      ELSE IF( LWORK.LT.MAX( 1, 2*N-1 ) .AND. .NOT.LQUERY ) THEN
+         INFO = -11
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         NB = ILAENV( 1, 'CHETRD', UPLO, N, -1, -1, -1 )
+         LWKOPT = ( NB+1 )*N
+         WORK( 1 ) = LWKOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CHEGV ', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Form a Cholesky factorization of B.
+*
+      CALL CPOTRF( UPLO, N, B, LDB, INFO )
+      IF( INFO.NE.0 ) THEN
+         INFO = N + INFO
+         RETURN
+      END IF
+*
+*     Transform problem to standard eigenvalue problem and solve.
+*
+      CALL CHEGST( ITYPE, UPLO, N, A, LDA, B, LDB, INFO )
+      CALL CHEEV( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, RWORK, INFO )
+*
+      IF( WANTZ ) THEN
+*
+*        Backtransform eigenvectors to the original problem.
+*
+         NEIG = N
+         IF( INFO.GT.0 )
+     $      NEIG = INFO - 1
+         IF( ITYPE.EQ.1 .OR. ITYPE.EQ.2 ) THEN
+*
+*           For A*x=(lambda)*B*x and A*B*x=(lambda)*x;
+*           backtransform eigenvectors: x = inv(L)'*y or inv(U)*y
+*
+            IF( UPPER ) THEN
+               TRANS = 'N'
+            ELSE
+               TRANS = 'C'
+            END IF
+*
+            CALL CTRSM( 'Left', UPLO, TRANS, 'Non-unit', N, NEIG, ONE,
+     $                  B, LDB, A, LDA )
+*
+         ELSE IF( ITYPE.EQ.3 ) THEN
+*
+*           For B*A*x=(lambda)*x;
+*           backtransform eigenvectors: x = L*y or U'*y
+*
+            IF( UPPER ) THEN
+               TRANS = 'C'
+            ELSE
+               TRANS = 'N'
+            END IF
+*
+            CALL CTRMM( 'Left', UPLO, TRANS, 'Non-unit', N, NEIG, ONE,
+     $                  B, LDB, A, LDA )
+         END IF
+      END IF
+*
+      WORK( 1 ) = LWKOPT
+*
+      RETURN
+*
+*     End of CHEGV
+*
+      END
diff --git a/costa/native/external/lapack/chegvd.f b/costa/native/external/lapack/chegvd.f
new file mode 100644
index 000000000..05ceab081
--- /dev/null
+++ b/costa/native/external/lapack/chegvd.f
@@ -0,0 +1,297 @@
+      SUBROUTINE CHEGVD( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, WORK,
+     $                   LWORK, RWORK, LRWORK, IWORK, LIWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBZ, UPLO
+      INTEGER            INFO, ITYPE, LDA, LDB, LIWORK, LRWORK, LWORK, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      REAL               RWORK( * ), W( * )
+      COMPLEX            A( LDA, * ), B( LDB, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CHEGVD computes all the eigenvalues, and optionally, the eigenvectors
+*  of a complex generalized Hermitian-definite eigenproblem, of the form
+*  A*x=(lambda)*B*x,  A*Bx=(lambda)*x,  or B*A*x=(lambda)*x.  Here A and
+*  B are assumed to be Hermitian and B is also positive definite.
+*  If eigenvectors are desired, it uses a divide and conquer algorithm.
+*
+*  The divide and conquer algorithm makes very mild assumptions about
+*  floating point arithmetic. It will work on machines with a guard
+*  digit in add/subtract, or on those binary machines without guard
+*  digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
+*  Cray-2. It could conceivably fail on hexadecimal or decimal machines
+*  without guard digits, but we know of none.
+*
+*  Arguments
+*  =========
+*
+*  ITYPE   (input) INTEGER
+*          Specifies the problem type to be solved:
+*          = 1:  A*x = (lambda)*B*x
+*          = 2:  A*B*x = (lambda)*x
+*          = 3:  B*A*x = (lambda)*x
+*
+*  JOBZ    (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only;
+*          = 'V':  Compute eigenvalues and eigenvectors.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangles of A and B are stored;
+*          = 'L':  Lower triangles of A and B are stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrices A and B.  N >= 0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA, N)
+*          On entry, the Hermitian matrix A.  If UPLO = 'U', the
+*          leading N-by-N upper triangular part of A contains the
+*          upper triangular part of the matrix A.  If UPLO = 'L',
+*          the leading N-by-N lower triangular part of A contains
+*          the lower triangular part of the matrix A.
+*
+*          On exit, if JOBZ = 'V', then if INFO = 0, A contains the
+*          matrix Z of eigenvectors.  The eigenvectors are normalized
+*          as follows:
+*          if ITYPE = 1 or 2, Z**H*B*Z = I;
+*          if ITYPE = 3, Z**H*inv(B)*Z = I.
+*          If JOBZ = 'N', then on exit the upper triangle (if UPLO='U')
+*          or the lower triangle (if UPLO='L') of A, including the
+*          diagonal, is destroyed.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  B       (input/output) COMPLEX array, dimension (LDB, N)
+*          On entry, the Hermitian matrix B.  If UPLO = 'U', the
+*          leading N-by-N upper triangular part of B contains the
+*          upper triangular part of the matrix B.  If UPLO = 'L',
+*          the leading N-by-N lower triangular part of B contains
+*          the lower triangular part of the matrix B.
+*
+*          On exit, if INFO <= N, the part of B containing the matrix is
+*          overwritten by the triangular factor U or L from the Cholesky
+*          factorization B = U**H*U or B = L*L**H.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  W       (output) REAL array, dimension (N)
+*          If INFO = 0, the eigenvalues in ascending order.
+*
+*  WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The length of the array WORK.
+*          If N <= 1,                LWORK >= 1.
+*          If JOBZ  = 'N' and N > 1, LWORK >= N + 1.
+*          If JOBZ  = 'V' and N > 1, LWORK >= 2*N + N**2.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  RWORK   (workspace/output) REAL array, dimension (LRWORK)
+*          On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK.
+*
+*  LRWORK  (input) INTEGER
+*          The dimension of the array RWORK.
+*          If N <= 1,                LRWORK >= 1.
+*          If JOBZ  = 'N' and N > 1, LRWORK >= N.
+*          If JOBZ  = 'V' and N > 1, LRWORK >= 1 + 5*N + 2*N**2.
+*
+*          If LRWORK = -1, then a workspace query is assumed; the
+*          routine only calculates the optimal size of the RWORK array,
+*          returns this value as the first entry of the RWORK array, and
+*          no error message related to LRWORK is issued by XERBLA.
+*
+*  IWORK   (workspace/output) INTEGER array, dimension (LIWORK)
+*          On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK.
+*
+*  LIWORK  (input) INTEGER
+*          The dimension of the array IWORK.
+*          If N <= 1,                LIWORK >= 1.
+*          If JOBZ  = 'N' and N > 1, LIWORK >= 1.
+*          If JOBZ  = 'V' and N > 1, LIWORK >= 3 + 5*N.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  CPOTRF or CHEEVD returned an error code:
+*             <= N:  if INFO = i, CHEEVD failed to converge;
+*                    i off-diagonal elements of an intermediate
+*                    tridiagonal form did not converge to zero;
+*             > N:   if INFO = N + i, for 1 <= i <= N, then the leading
+*                    minor of order i of B is not positive definite.
+*                    The factorization of B could not be completed and
+*                    no eigenvalues or eigenvectors were computed.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX            CONE
+      PARAMETER          ( CONE = ( 1.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY, UPPER, WANTZ
+      CHARACTER          TRANS
+      INTEGER            LIOPT, LIWMIN, LOPT, LROPT, LRWMIN, LWMIN, NEIG
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CHEEVD, CHEGST, CPOTRF, CTRMM, CTRSM, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, REAL
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      WANTZ = LSAME( JOBZ, 'V' )
+      UPPER = LSAME( UPLO, 'U' )
+      LQUERY = ( LWORK.EQ.-1 .OR. LRWORK.EQ.-1 .OR. LIWORK.EQ.-1 )
+*
+      INFO = 0
+      IF( N.LE.1 ) THEN
+         LWMIN = 1
+         LRWMIN = 1
+         LIWMIN = 1
+         LOPT = LWMIN
+         LROPT = LRWMIN
+         LIOPT = LIWMIN
+      ELSE
+         IF( WANTZ ) THEN
+            LWMIN = 2*N + N*N
+            LRWMIN = 1 + 5*N + 2*N*N
+            LIWMIN = 3 + 5*N
+         ELSE
+            LWMIN = N + 1
+            LRWMIN = N
+            LIWMIN = 1
+         END IF
+         LOPT = LWMIN
+         LROPT = LRWMIN
+         LIOPT = LIWMIN
+      END IF
+      IF( ITYPE.LT.0 .OR. ITYPE.GT.3 ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -6
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -8
+      ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -11
+      ELSE IF( LRWORK.LT.LRWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -13
+      ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -15
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         WORK( 1 ) = LOPT
+         RWORK( 1 ) = LROPT
+         IWORK( 1 ) = LIOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CHEGVD', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Form a Cholesky factorization of B.
+*
+      CALL CPOTRF( UPLO, N, B, LDB, INFO )
+      IF( INFO.NE.0 ) THEN
+         INFO = N + INFO
+         RETURN
+      END IF
+*
+*     Transform problem to standard eigenvalue problem and solve.
+*
+      CALL CHEGST( ITYPE, UPLO, N, A, LDA, B, LDB, INFO )
+      CALL CHEEVD( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, RWORK, LRWORK,
+     $             IWORK, LIWORK, INFO )
+      LOPT = MAX( REAL( LOPT ), REAL( WORK( 1 ) ) )
+      LROPT = MAX( REAL( LROPT ), REAL( RWORK( 1 ) ) )
+      LIOPT = MAX( REAL( LIOPT ), REAL( IWORK( 1 ) ) )
+*
+      IF( WANTZ ) THEN
+*
+*        Backtransform eigenvectors to the original problem.
+*
+         NEIG = N
+         IF( INFO.GT.0 )
+     $      NEIG = INFO - 1
+         IF( ITYPE.EQ.1 .OR. ITYPE.EQ.2 ) THEN
+*
+*           For A*x=(lambda)*B*x and A*B*x=(lambda)*x;
+*           backtransform eigenvectors: x = inv(L)'*y or inv(U)*y
+*
+            IF( UPPER ) THEN
+               TRANS = 'N'
+            ELSE
+               TRANS = 'C'
+            END IF
+*
+            CALL CTRSM( 'Left', UPLO, TRANS, 'Non-unit', N, NEIG, CONE,
+     $                  B, LDB, A, LDA )
+*
+         ELSE IF( ITYPE.EQ.3 ) THEN
+*
+*           For B*A*x=(lambda)*x;
+*           backtransform eigenvectors: x = L*y or U'*y
+*
+            IF( UPPER ) THEN
+               TRANS = 'C'
+            ELSE
+               TRANS = 'N'
+            END IF
+*
+            CALL CTRMM( 'Left', UPLO, TRANS, 'Non-unit', N, NEIG, CONE,
+     $                  B, LDB, A, LDA )
+         END IF
+      END IF
+*
+      WORK( 1 ) = LOPT
+      RWORK( 1 ) = LROPT
+      IWORK( 1 ) = LIOPT
+*
+      RETURN
+*
+*     End of CHEGVD
+*
+      END
diff --git a/costa/native/external/lapack/chegvx.f b/costa/native/external/lapack/chegvx.f
new file mode 100644
index 000000000..39020a510
--- /dev/null
+++ b/costa/native/external/lapack/chegvx.f
@@ -0,0 +1,329 @@
+      SUBROUTINE CHEGVX( ITYPE, JOBZ, RANGE, UPLO, N, A, LDA, B, LDB,
+     $                   VL, VU, IL, IU, ABSTOL, M, W, Z, LDZ, WORK,
+     $                   LWORK, RWORK, IWORK, IFAIL, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBZ, RANGE, UPLO
+      INTEGER            IL, INFO, ITYPE, IU, LDA, LDB, LDZ, LWORK, M, N
+      REAL               ABSTOL, VL, VU
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IFAIL( * ), IWORK( * )
+      REAL               RWORK( * ), W( * )
+      COMPLEX            A( LDA, * ), B( LDB, * ), WORK( * ),
+     $                   Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CHEGVX computes selected eigenvalues, and optionally, eigenvectors
+*  of a complex generalized Hermitian-definite eigenproblem, of the form
+*  A*x=(lambda)*B*x,  A*Bx=(lambda)*x,  or B*A*x=(lambda)*x.  Here A and
+*  B are assumed to be Hermitian and B is also positive definite.
+*  Eigenvalues and eigenvectors can be selected by specifying either a
+*  range of values or a range of indices for the desired eigenvalues.
+*
+*  Arguments
+*  =========
+*
+*  ITYPE   (input) INTEGER
+*          Specifies the problem type to be solved:
+*          = 1:  A*x = (lambda)*B*x
+*          = 2:  A*B*x = (lambda)*x
+*          = 3:  B*A*x = (lambda)*x
+*
+*  JOBZ    (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only;
+*          = 'V':  Compute eigenvalues and eigenvectors.
+*
+*  RANGE   (input) CHARACTER*1
+*          = 'A': all eigenvalues will be found.
+*          = 'V': all eigenvalues in the half-open interval (VL,VU]
+*                 will be found.
+*          = 'I': the IL-th through IU-th eigenvalues will be found.
+**
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangles of A and B are stored;
+*          = 'L':  Lower triangles of A and B are stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrices A and B.  N >= 0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA, N)
+*          On entry, the Hermitian matrix A.  If UPLO = 'U', the
+*          leading N-by-N upper triangular part of A contains the
+*          upper triangular part of the matrix A.  If UPLO = 'L',
+*          the leading N-by-N lower triangular part of A contains
+*          the lower triangular part of the matrix A.
+*
+*          On exit,  the lower triangle (if UPLO='L') or the upper
+*          triangle (if UPLO='U') of A, including the diagonal, is
+*          destroyed.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  B       (input/output) COMPLEX array, dimension (LDB, N)
+*          On entry, the Hermitian matrix B.  If UPLO = 'U', the
+*          leading N-by-N upper triangular part of B contains the
+*          upper triangular part of the matrix B.  If UPLO = 'L',
+*          the leading N-by-N lower triangular part of B contains
+*          the lower triangular part of the matrix B.
+*
+*          On exit, if INFO <= N, the part of B containing the matrix is
+*          overwritten by the triangular factor U or L from the Cholesky
+*          factorization B = U**H*U or B = L*L**H.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  VL      (input) REAL
+*  VU      (input) REAL
+*          If RANGE='V', the lower and upper bounds of the interval to
+*          be searched for eigenvalues. VL < VU.
+*          Not referenced if RANGE = 'A' or 'I'.
+*
+*  IL      (input) INTEGER
+*  IU      (input) INTEGER
+*          If RANGE='I', the indices (in ascending order) of the
+*          smallest and largest eigenvalues to be returned.
+*          1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0.
+*          Not referenced if RANGE = 'A' or 'V'.
+*
+*  ABSTOL  (input) REAL
+*          The absolute error tolerance for the eigenvalues.
+*          An approximate eigenvalue is accepted as converged
+*          when it is determined to lie in an interval [a,b]
+*          of width less than or equal to
+*
+*                  ABSTOL + EPS *   max( |a|,|b| ) ,
+*
+*          where EPS is the machine precision.  If ABSTOL is less than
+*          or equal to zero, then  EPS*|T|  will be used in its place,
+*          where |T| is the 1-norm of the tridiagonal matrix obtained
+*          by reducing A to tridiagonal form.
+*
+*          Eigenvalues will be computed most accurately when ABSTOL is
+*          set to twice the underflow threshold 2*SLAMCH('S'), not zero.
+*          If this routine returns with INFO>0, indicating that some
+*          eigenvectors did not converge, try setting ABSTOL to
+*          2*SLAMCH('S').
+*
+*  M       (output) INTEGER
+*          The total number of eigenvalues found.  0 <= M <= N.
+*          If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1.
+*
+*  W       (output) REAL array, dimension (N)
+*          The first M elements contain the selected
+*          eigenvalues in ascending order.
+*
+*  Z       (output) COMPLEX array, dimension (LDZ, max(1,M))
+*          If JOBZ = 'N', then Z is not referenced.
+*          If JOBZ = 'V', then if INFO = 0, the first M columns of Z
+*          contain the orthonormal eigenvectors of the matrix A
+*          corresponding to the selected eigenvalues, with the i-th
+*          column of Z holding the eigenvector associated with W(i).
+*          The eigenvectors are normalized as follows:
+*          if ITYPE = 1 or 2, Z**T*B*Z = I;
+*          if ITYPE = 3, Z**T*inv(B)*Z = I.
+*
+*          If an eigenvector fails to converge, then that column of Z
+*          contains the latest approximation to the eigenvector, and the
+*          index of the eigenvector is returned in IFAIL.
+*          Note: the user must ensure that at least max(1,M) columns are
+*          supplied in the array Z; if RANGE = 'V', the exact value of M
+*          is not known in advance and an upper bound must be used.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= 1, and if
+*          JOBZ = 'V', LDZ >= max(1,N).
+*
+*  WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The length of the array WORK.  LWORK >= max(1,2*N-1).
+*          For optimal efficiency, LWORK >= (NB+1)*N,
+*          where NB is the blocksize for CHETRD returned by ILAENV.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  RWORK   (workspace) REAL array, dimension (7*N)
+*
+*  IWORK   (workspace) INTEGER array, dimension (5*N)
+*
+*  IFAIL   (output) INTEGER array, dimension (N)
+*          If JOBZ = 'V', then if INFO = 0, the first M elements of
+*          IFAIL are zero.  If INFO > 0, then IFAIL contains the
+*          indices of the eigenvectors that failed to converge.
+*          If JOBZ = 'N', then IFAIL is not referenced.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  CPOTRF or CHEEVX returned an error code:
+*             <= N:  if INFO = i, CHEEVX failed to converge;
+*                    i eigenvectors failed to converge.  Their indices
+*                    are stored in array IFAIL.
+*             > N:   if INFO = N + i, for 1 <= i <= N, then the leading
+*                    minor of order i of B is not positive definite.
+*                    The factorization of B could not be completed and
+*                    no eigenvalues or eigenvectors were computed.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX            CONE
+      PARAMETER          ( CONE = ( 1.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            ALLEIG, INDEIG, LQUERY, UPPER, VALEIG, WANTZ
+      CHARACTER          TRANS
+      INTEGER            LOPT, LWKOPT, NB
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV, LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CHEEVX, CHEGST, CPOTRF, CTRMM, CTRSM, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      WANTZ = LSAME( JOBZ, 'V' )
+      UPPER = LSAME( UPLO, 'U' )
+      ALLEIG = LSAME( RANGE, 'A' )
+      VALEIG = LSAME( RANGE, 'V' )
+      INDEIG = LSAME( RANGE, 'I' )
+      LQUERY = ( LWORK.EQ.-1 )
+*
+      INFO = 0
+      IF( ITYPE.LT.0 .OR. ITYPE.GT.3 ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN
+         INFO = -3
+      ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN
+         INFO = -4
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -9
+      ELSE IF( VALEIG .AND. N.GT.0 ) THEN
+         IF( VU.LE.VL ) INFO = -11
+      ELSE IF( INDEIG .AND. IL.LT.1 ) THEN
+         INFO = -12
+      ELSE IF( INDEIG .AND. ( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) ) THEN
+         INFO = -13
+      ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN
+         INFO = -18
+      ELSE IF( LWORK.LT.MAX( 1, 2*N-1 ) .AND. .NOT.LQUERY ) THEN
+         INFO = -20
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         NB = ILAENV( 1, 'CHETRD', UPLO, N, -1, -1, -1 )
+         LWKOPT = ( NB+1 )*N
+         WORK( 1 ) = LWKOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CHEGVX', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      M = 0
+      IF( N.EQ.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+*     Form a Cholesky factorization of B.
+*
+      CALL CPOTRF( UPLO, N, B, LDB, INFO )
+      IF( INFO.NE.0 ) THEN
+         INFO = N + INFO
+         RETURN
+      END IF
+*
+*     Transform problem to standard eigenvalue problem and solve.
+*
+      CALL CHEGST( ITYPE, UPLO, N, A, LDA, B, LDB, INFO )
+      CALL CHEEVX( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, IL, IU, ABSTOL,
+     $             M, W, Z, LDZ, WORK, LWORK, RWORK, IWORK, IFAIL,
+     $             INFO )
+      LOPT = WORK( 1 )
+*
+      IF( WANTZ ) THEN
+*
+*        Backtransform eigenvectors to the original problem.
+*
+         IF( INFO.GT.0 )
+     $      M = INFO - 1
+         IF( ITYPE.EQ.1 .OR. ITYPE.EQ.2 ) THEN
+*
+*           For A*x=(lambda)*B*x and A*B*x=(lambda)*x;
+*           backtransform eigenvectors: x = inv(L)'*y or inv(U)*y
+*
+            IF( UPPER ) THEN
+               TRANS = 'N'
+            ELSE
+               TRANS = 'C'
+            END IF
+*
+            CALL CTRSM( 'Left', UPLO, TRANS, 'Non-unit', N, M, CONE, B,
+     $                  LDB, Z, LDZ )
+*
+         ELSE IF( ITYPE.EQ.3 ) THEN
+*
+*           For B*A*x=(lambda)*x;
+*           backtransform eigenvectors: x = L*y or U'*y
+*
+            IF( UPPER ) THEN
+               TRANS = 'C'
+            ELSE
+               TRANS = 'N'
+            END IF
+*
+            CALL CTRMM( 'Left', UPLO, TRANS, 'Non-unit', N, M, CONE, B,
+     $                  LDB, Z, LDZ )
+         END IF
+      END IF
+*
+*     Set WORK(1) to optimal complex workspace size.
+*
+      WORK( 1 ) = LWKOPT
+*
+      RETURN
+*
+*     End of CHEGVX
+*
+      END
diff --git a/costa/native/external/lapack/cherfs.f b/costa/native/external/lapack/cherfs.f
new file mode 100644
index 000000000..669e2ad8b
--- /dev/null
+++ b/costa/native/external/lapack/cherfs.f
@@ -0,0 +1,339 @@
+      SUBROUTINE CHERFS( UPLO, N, NRHS, A, LDA, AF, LDAF, IPIV, B, LDB,
+     $                   X, LDX, FERR, BERR, WORK, RWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, LDAF, LDB, LDX, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      REAL               BERR( * ), FERR( * ), RWORK( * )
+      COMPLEX            A( LDA, * ), AF( LDAF, * ), B( LDB, * ),
+     $                   WORK( * ), X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CHERFS improves the computed solution to a system of linear
+*  equations when the coefficient matrix is Hermitian indefinite, and
+*  provides error bounds and backward error estimates for the solution.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrices B and X.  NRHS >= 0.
+*
+*  A       (input) COMPLEX array, dimension (LDA,N)
+*          The Hermitian matrix A.  If UPLO = 'U', the leading N-by-N
+*          upper triangular part of A contains the upper triangular part
+*          of the matrix A, and the strictly lower triangular part of A
+*          is not referenced.  If UPLO = 'L', the leading N-by-N lower
+*          triangular part of A contains the lower triangular part of
+*          the matrix A, and the strictly upper triangular part of A is
+*          not referenced.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  AF      (input) COMPLEX array, dimension (LDAF,N)
+*          The factored form of the matrix A.  AF contains the block
+*          diagonal matrix D and the multipliers used to obtain the
+*          factor U or L from the factorization A = U*D*U**H or
+*          A = L*D*L**H as computed by CHETRF.
+*
+*  LDAF    (input) INTEGER
+*          The leading dimension of the array AF.  LDAF >= max(1,N).
+*
+*  IPIV    (input) INTEGER array, dimension (N)
+*          Details of the interchanges and the block structure of D
+*          as determined by CHETRF.
+*
+*  B       (input) COMPLEX array, dimension (LDB,NRHS)
+*          The right hand side matrix B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  X       (input/output) COMPLEX array, dimension (LDX,NRHS)
+*          On entry, the solution matrix X, as computed by CHETRS.
+*          On exit, the improved solution matrix X.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(1,N).
+*
+*  FERR    (output) REAL array, dimension (NRHS)
+*          The estimated forward error bound for each solution vector
+*          X(j) (the j-th column of the solution matrix X).
+*          If XTRUE is the true solution corresponding to X(j), FERR(j)
+*          is an estimated upper bound for the magnitude of the largest
+*          element in (X(j) - XTRUE) divided by the magnitude of the
+*          largest element in X(j).  The estimate is as reliable as
+*          the estimate for RCOND, and is almost always a slight
+*          overestimate of the true error.
+*
+*  BERR    (output) REAL array, dimension (NRHS)
+*          The componentwise relative backward error of each solution
+*          vector X(j) (i.e., the smallest relative change in
+*          any element of A or B that makes X(j) an exact solution).
+*
+*  WORK    (workspace) COMPLEX array, dimension (2*N)
+*
+*  RWORK   (workspace) REAL array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Internal Parameters
+*  ===================
+*
+*  ITMAX is the maximum number of steps of iterative refinement.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            ITMAX
+      PARAMETER          ( ITMAX = 5 )
+      REAL               ZERO
+      PARAMETER          ( ZERO = 0.0E+0 )
+      COMPLEX            ONE
+      PARAMETER          ( ONE = ( 1.0E+0, 0.0E+0 ) )
+      REAL               TWO
+      PARAMETER          ( TWO = 2.0E+0 )
+      REAL               THREE
+      PARAMETER          ( THREE = 3.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            COUNT, I, J, K, KASE, NZ
+      REAL               EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK
+      COMPLEX            ZDUM
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CAXPY, CCOPY, CHEMV, CHETRS, CLACON, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, AIMAG, MAX, REAL
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               SLAMCH
+      EXTERNAL           LSAME, SLAMCH
+*     ..
+*     .. Statement Functions ..
+      REAL               CABS1
+*     ..
+*     .. Statement Function definitions ..
+      CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -10
+      ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+         INFO = -12
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CHERFS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN
+         DO 10 J = 1, NRHS
+            FERR( J ) = ZERO
+            BERR( J ) = ZERO
+   10    CONTINUE
+         RETURN
+      END IF
+*
+*     NZ = maximum number of nonzero elements in each row of A, plus 1
+*
+      NZ = N + 1
+      EPS = SLAMCH( 'Epsilon' )
+      SAFMIN = SLAMCH( 'Safe minimum' )
+      SAFE1 = NZ*SAFMIN
+      SAFE2 = SAFE1 / EPS
+*
+*     Do for each right hand side
+*
+      DO 140 J = 1, NRHS
+*
+         COUNT = 1
+         LSTRES = THREE
+   20    CONTINUE
+*
+*        Loop until stopping criterion is satisfied.
+*
+*        Compute residual R = B - A * X
+*
+         CALL CCOPY( N, B( 1, J ), 1, WORK, 1 )
+         CALL CHEMV( UPLO, N, -ONE, A, LDA, X( 1, J ), 1, ONE, WORK, 1 )
+*
+*        Compute componentwise relative backward error from formula
+*
+*        max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) )
+*
+*        where abs(Z) is the componentwise absolute value of the matrix
+*        or vector Z.  If the i-th component of the denominator is less
+*        than SAFE2, then SAFE1 is added to the i-th components of the
+*        numerator and denominator before dividing.
+*
+         DO 30 I = 1, N
+            RWORK( I ) = CABS1( B( I, J ) )
+   30    CONTINUE
+*
+*        Compute abs(A)*abs(X) + abs(B).
+*
+         IF( UPPER ) THEN
+            DO 50 K = 1, N
+               S = ZERO
+               XK = CABS1( X( K, J ) )
+               DO 40 I = 1, K - 1
+                  RWORK( I ) = RWORK( I ) + CABS1( A( I, K ) )*XK
+                  S = S + CABS1( A( I, K ) )*CABS1( X( I, J ) )
+   40          CONTINUE
+               RWORK( K ) = RWORK( K ) + ABS( REAL( A( K, K ) ) )*XK + S
+   50       CONTINUE
+         ELSE
+            DO 70 K = 1, N
+               S = ZERO
+               XK = CABS1( X( K, J ) )
+               RWORK( K ) = RWORK( K ) + ABS( REAL( A( K, K ) ) )*XK
+               DO 60 I = K + 1, N
+                  RWORK( I ) = RWORK( I ) + CABS1( A( I, K ) )*XK
+                  S = S + CABS1( A( I, K ) )*CABS1( X( I, J ) )
+   60          CONTINUE
+               RWORK( K ) = RWORK( K ) + S
+   70       CONTINUE
+         END IF
+         S = ZERO
+         DO 80 I = 1, N
+            IF( RWORK( I ).GT.SAFE2 ) THEN
+               S = MAX( S, CABS1( WORK( I ) ) / RWORK( I ) )
+            ELSE
+               S = MAX( S, ( CABS1( WORK( I ) )+SAFE1 ) /
+     $             ( RWORK( I )+SAFE1 ) )
+            END IF
+   80    CONTINUE
+         BERR( J ) = S
+*
+*        Test stopping criterion. Continue iterating if
+*           1) The residual BERR(J) is larger than machine epsilon, and
+*           2) BERR(J) decreased by at least a factor of 2 during the
+*              last iteration, and
+*           3) At most ITMAX iterations tried.
+*
+         IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND.
+     $       COUNT.LE.ITMAX ) THEN
+*
+*           Update solution and try again.
+*
+            CALL CHETRS( UPLO, N, 1, AF, LDAF, IPIV, WORK, N, INFO )
+            CALL CAXPY( N, ONE, WORK, 1, X( 1, J ), 1 )
+            LSTRES = BERR( J )
+            COUNT = COUNT + 1
+            GO TO 20
+         END IF
+*
+*        Bound error from formula
+*
+*        norm(X - XTRUE) / norm(X) .le. FERR =
+*        norm( abs(inv(A))*
+*           ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X)
+*
+*        where
+*          norm(Z) is the magnitude of the largest component of Z
+*          inv(A) is the inverse of A
+*          abs(Z) is the componentwise absolute value of the matrix or
+*             vector Z
+*          NZ is the maximum number of nonzeros in any row of A, plus 1
+*          EPS is machine epsilon
+*
+*        The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B))
+*        is incremented by SAFE1 if the i-th component of
+*        abs(A)*abs(X) + abs(B) is less than SAFE2.
+*
+*        Use CLACON to estimate the infinity-norm of the matrix
+*           inv(A) * diag(W),
+*        where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) )))
+*
+         DO 90 I = 1, N
+            IF( RWORK( I ).GT.SAFE2 ) THEN
+               RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I )
+            ELSE
+               RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) +
+     $                      SAFE1
+            END IF
+   90    CONTINUE
+*
+         KASE = 0
+  100    CONTINUE
+         CALL CLACON( N, WORK( N+1 ), WORK, FERR( J ), KASE )
+         IF( KASE.NE.0 ) THEN
+            IF( KASE.EQ.1 ) THEN
+*
+*              Multiply by diag(W)*inv(A').
+*
+               CALL CHETRS( UPLO, N, 1, AF, LDAF, IPIV, WORK, N, INFO )
+               DO 110 I = 1, N
+                  WORK( I ) = RWORK( I )*WORK( I )
+  110          CONTINUE
+            ELSE IF( KASE.EQ.2 ) THEN
+*
+*              Multiply by inv(A)*diag(W).
+*
+               DO 120 I = 1, N
+                  WORK( I ) = RWORK( I )*WORK( I )
+  120          CONTINUE
+               CALL CHETRS( UPLO, N, 1, AF, LDAF, IPIV, WORK, N, INFO )
+            END IF
+            GO TO 100
+         END IF
+*
+*        Normalize error.
+*
+         LSTRES = ZERO
+         DO 130 I = 1, N
+            LSTRES = MAX( LSTRES, CABS1( X( I, J ) ) )
+  130    CONTINUE
+         IF( LSTRES.NE.ZERO )
+     $      FERR( J ) = FERR( J ) / LSTRES
+*
+  140 CONTINUE
+*
+      RETURN
+*
+*     End of CHERFS
+*
+      END
diff --git a/costa/native/external/lapack/chesv.f b/costa/native/external/lapack/chesv.f
new file mode 100644
index 000000000..a53b36c77
--- /dev/null
+++ b/costa/native/external/lapack/chesv.f
@@ -0,0 +1,171 @@
+      SUBROUTINE CHESV( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK,
+     $                  LWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, LDB, LWORK, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      COMPLEX            A( LDA, * ), B( LDB, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CHESV computes the solution to a complex system of linear equations
+*     A * X = B,
+*  where A is an N-by-N Hermitian matrix and X and B are N-by-NRHS
+*  matrices.
+*
+*  The diagonal pivoting method is used to factor A as
+*     A = U * D * U**H,  if UPLO = 'U', or
+*     A = L * D * L**H,  if UPLO = 'L',
+*  where U (or L) is a product of permutation and unit upper (lower)
+*  triangular matrices, and D is Hermitian and block diagonal with 
+*  1-by-1 and 2-by-2 diagonal blocks.  The factored form of A is then
+*  used to solve the system of equations A * X = B.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The number of linear equations, i.e., the order of the
+*          matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,N)
+*          On entry, the Hermitian matrix A.  If UPLO = 'U', the leading
+*          N-by-N upper triangular part of A contains the upper
+*          triangular part of the matrix A, and the strictly lower
+*          triangular part of A is not referenced.  If UPLO = 'L', the
+*          leading N-by-N lower triangular part of A contains the lower
+*          triangular part of the matrix A, and the strictly upper
+*          triangular part of A is not referenced.
+*
+*          On exit, if INFO = 0, the block diagonal matrix D and the
+*          multipliers used to obtain the factor U or L from the
+*          factorization A = U*D*U**H or A = L*D*L**H as computed by
+*          CHETRF.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  IPIV    (output) INTEGER array, dimension (N)
+*          Details of the interchanges and the block structure of D, as
+*          determined by CHETRF.  If IPIV(k) > 0, then rows and columns
+*          k and IPIV(k) were interchanged, and D(k,k) is a 1-by-1
+*          diagonal block.  If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0,
+*          then rows and columns k-1 and -IPIV(k) were interchanged and
+*          D(k-1:k,k-1:k) is a 2-by-2 diagonal block.  If UPLO = 'L' and
+*          IPIV(k) = IPIV(k+1) < 0, then rows and columns k+1 and
+*          -IPIV(k) were interchanged and D(k:k+1,k:k+1) is a 2-by-2
+*          diagonal block.
+*
+*  B       (input/output) COMPLEX array, dimension (LDB,NRHS)
+*          On entry, the N-by-NRHS right hand side matrix B.
+*          On exit, if INFO = 0, the N-by-NRHS solution matrix X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The length of WORK.  LWORK >= 1, and for best performance
+*          LWORK >= N*NB, where NB is the optimal blocksize for
+*          CHETRF.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*          > 0: if INFO = i, D(i,i) is exactly zero.  The factorization
+*               has been completed, but the block diagonal matrix D is
+*               exactly singular, so the solution could not be computed.
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            LWKOPT, NB
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV, LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CHETRF, CHETRS, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -8
+      ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN
+         INFO = -10
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         NB = ILAENV( 1, 'CHETRF', UPLO, N, -1, -1, -1 )
+         LWKOPT = N*NB
+         WORK( 1 ) = LWKOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CHESV ', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Compute the factorization A = U*D*U' or A = L*D*L'.
+*
+      CALL CHETRF( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO )
+      IF( INFO.EQ.0 ) THEN
+*
+*        Solve the system A*X = B, overwriting B with X.
+*
+         CALL CHETRS( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, INFO )
+*
+      END IF
+*
+      WORK( 1 ) = LWKOPT
+*
+      RETURN
+*
+*     End of CHESV
+*
+      END
diff --git a/costa/native/external/lapack/chesvx.f b/costa/native/external/lapack/chesvx.f
new file mode 100644
index 000000000..6537575f7
--- /dev/null
+++ b/costa/native/external/lapack/chesvx.f
@@ -0,0 +1,299 @@
+      SUBROUTINE CHESVX( FACT, UPLO, N, NRHS, A, LDA, AF, LDAF, IPIV, B,
+     $                   LDB, X, LDX, RCOND, FERR, BERR, WORK, LWORK,
+     $                   RWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          FACT, UPLO
+      INTEGER            INFO, LDA, LDAF, LDB, LDX, LWORK, N, NRHS
+      REAL               RCOND
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      REAL               BERR( * ), FERR( * ), RWORK( * )
+      COMPLEX            A( LDA, * ), AF( LDAF, * ), B( LDB, * ),
+     $                   WORK( * ), X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CHESVX uses the diagonal pivoting factorization to compute the
+*  solution to a complex system of linear equations A * X = B,
+*  where A is an N-by-N Hermitian matrix and X and B are N-by-NRHS
+*  matrices.
+*
+*  Error bounds on the solution and a condition estimate are also
+*  provided.
+*
+*  Description
+*  ===========
+*
+*  The following steps are performed:
+*
+*  1. If FACT = 'N', the diagonal pivoting method is used to factor A.
+*     The form of the factorization is
+*        A = U * D * U**H,  if UPLO = 'U', or
+*        A = L * D * L**H,  if UPLO = 'L',
+*     where U (or L) is a product of permutation and unit upper (lower)
+*     triangular matrices, and D is Hermitian and block diagonal with
+*     1-by-1 and 2-by-2 diagonal blocks.
+*
+*  2. If some D(i,i)=0, so that D is exactly singular, then the routine
+*     returns with INFO = i. Otherwise, the factored form of A is used
+*     to estimate the condition number of the matrix A.  If the
+*     reciprocal of the condition number is less than machine precision,
+*     INFO = N+1 is returned as a warning, but the routine still goes on
+*     to solve for X and compute error bounds as described below.
+*
+*  3. The system of equations is solved for X using the factored form
+*     of A.
+*
+*  4. Iterative refinement is applied to improve the computed solution
+*     matrix and calculate error bounds and backward error estimates
+*     for it.
+*
+*  Arguments
+*  =========
+*
+*  FACT    (input) CHARACTER*1
+*          Specifies whether or not the factored form of A has been
+*          supplied on entry.
+*          = 'F':  On entry, AF and IPIV contain the factored form
+*                  of A.  A, AF and IPIV will not be modified.
+*          = 'N':  The matrix A will be copied to AF and factored.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The number of linear equations, i.e., the order of the
+*          matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrices B and X.  NRHS >= 0.
+*
+*  A       (input) COMPLEX array, dimension (LDA,N)
+*          The Hermitian matrix A.  If UPLO = 'U', the leading N-by-N
+*          upper triangular part of A contains the upper triangular part
+*          of the matrix A, and the strictly lower triangular part of A
+*          is not referenced.  If UPLO = 'L', the leading N-by-N lower
+*          triangular part of A contains the lower triangular part of
+*          the matrix A, and the strictly upper triangular part of A is
+*          not referenced.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  AF      (input or output) COMPLEX array, dimension (LDAF,N)
+*          If FACT = 'F', then AF is an input argument and on entry
+*          contains the block diagonal matrix D and the multipliers used
+*          to obtain the factor U or L from the factorization
+*          A = U*D*U**H or A = L*D*L**H as computed by CHETRF.
+*
+*          If FACT = 'N', then AF is an output argument and on exit
+*          returns the block diagonal matrix D and the multipliers used
+*          to obtain the factor U or L from the factorization
+*          A = U*D*U**H or A = L*D*L**H.
+*
+*  LDAF    (input) INTEGER
+*          The leading dimension of the array AF.  LDAF >= max(1,N).
+*
+*  IPIV    (input or output) INTEGER array, dimension (N)
+*          If FACT = 'F', then IPIV is an input argument and on entry
+*          contains details of the interchanges and the block structure
+*          of D, as determined by CHETRF.
+*          If IPIV(k) > 0, then rows and columns k and IPIV(k) were
+*          interchanged and D(k,k) is a 1-by-1 diagonal block.
+*          If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and
+*          columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k)
+*          is a 2-by-2 diagonal block.  If UPLO = 'L' and IPIV(k) =
+*          IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were
+*          interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block.
+*
+*          If FACT = 'N', then IPIV is an output argument and on exit
+*          contains details of the interchanges and the block structure
+*          of D, as determined by CHETRF.
+*
+*  B       (input) COMPLEX array, dimension (LDB,NRHS)
+*          The N-by-NRHS right hand side matrix B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  X       (output) COMPLEX array, dimension (LDX,NRHS)
+*          If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(1,N).
+*
+*  RCOND   (output) REAL
+*          The estimate of the reciprocal condition number of the matrix
+*          A.  If RCOND is less than the machine precision (in
+*          particular, if RCOND = 0), the matrix is singular to working
+*          precision.  This condition is indicated by a return code of
+*          INFO > 0.
+*
+*  FERR    (output) REAL array, dimension (NRHS)
+*          The estimated forward error bound for each solution vector
+*          X(j) (the j-th column of the solution matrix X).
+*          If XTRUE is the true solution corresponding to X(j), FERR(j)
+*          is an estimated upper bound for the magnitude of the largest
+*          element in (X(j) - XTRUE) divided by the magnitude of the
+*          largest element in X(j).  The estimate is as reliable as
+*          the estimate for RCOND, and is almost always a slight
+*          overestimate of the true error.
+*
+*  BERR    (output) REAL array, dimension (NRHS)
+*          The componentwise relative backward error of each solution
+*          vector X(j) (i.e., the smallest relative change in
+*          any element of A or B that makes X(j) an exact solution).
+*
+*  WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The length of WORK.  LWORK >= 2*N, and for best performance
+*          LWORK >= N*NB, where NB is the optimal blocksize for
+*          CHETRF.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  RWORK   (workspace) REAL array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*          > 0: if INFO = i, and i is
+*                <= N:  D(i,i) is exactly zero.  The factorization
+*                       has been completed but the factor D is exactly
+*                       singular, so the solution and error bounds could
+*                       not be computed. RCOND = 0 is returned.
+*                = N+1: D is nonsingular, but RCOND is less than machine
+*                       precision, meaning that the matrix is singular
+*                       to working precision.  Nevertheless, the
+*                       solution and error bounds are computed because
+*                       there are a number of situations where the
+*                       computed solution can be more accurate than the
+*                       value of RCOND would suggest.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO
+      PARAMETER          ( ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY, NOFACT
+      INTEGER            LWKOPT, NB
+      REAL               ANORM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      REAL               CLANHE, SLAMCH
+      EXTERNAL           ILAENV, LSAME, CLANHE, SLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CHECON, CHERFS, CHETRF, CHETRS, CLACPY, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      NOFACT = LSAME( FACT, 'N' )
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( .NOT.NOFACT .AND. .NOT.LSAME( FACT, 'F' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) )
+     $          THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -6
+      ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN
+         INFO = -8
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -11
+      ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+         INFO = -13
+      ELSE IF( LWORK.LT.MAX( 1, 2*N ) .AND. .NOT.LQUERY ) THEN
+         INFO = -18
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         NB = ILAENV( 1, 'CHETRF', UPLO, N, -1, -1, -1 )
+         LWKOPT = N*NB
+         WORK( 1 ) = LWKOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CHESVX', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+      IF( NOFACT ) THEN
+*
+*        Compute the factorization A = U*D*U' or A = L*D*L'.
+*
+         CALL CLACPY( UPLO, N, N, A, LDA, AF, LDAF )
+         CALL CHETRF( UPLO, N, AF, LDAF, IPIV, WORK, LWORK, INFO )
+*
+*        Return if INFO is non-zero.
+*
+         IF( INFO.NE.0 ) THEN
+            IF( INFO.GT.0 )
+     $         RCOND = ZERO
+            RETURN
+         END IF
+      END IF
+*
+*     Compute the norm of the matrix A.
+*
+      ANORM = CLANHE( 'I', UPLO, N, A, LDA, RWORK )
+*
+*     Compute the reciprocal of the condition number of A.
+*
+      CALL CHECON( UPLO, N, AF, LDAF, IPIV, ANORM, RCOND, WORK, INFO )
+*
+*     Set INFO = N+1 if the matrix is singular to working precision.
+*
+      IF( RCOND.LT.SLAMCH( 'Epsilon' ) )
+     $   INFO = N + 1
+*
+*     Compute the solution vectors X.
+*
+      CALL CLACPY( 'Full', N, NRHS, B, LDB, X, LDX )
+      CALL CHETRS( UPLO, N, NRHS, AF, LDAF, IPIV, X, LDX, INFO )
+*
+*     Use iterative refinement to improve the computed solutions and
+*     compute error bounds and backward error estimates for them.
+*
+      CALL CHERFS( UPLO, N, NRHS, A, LDA, AF, LDAF, IPIV, B, LDB, X,
+     $             LDX, FERR, BERR, WORK, RWORK, INFO )
+*
+      WORK( 1 ) = LWKOPT
+*
+      RETURN
+*
+*     End of CHESVX
+*
+      END
diff --git a/costa/native/external/lapack/chetd2.f b/costa/native/external/lapack/chetd2.f
new file mode 100644
index 000000000..449a29882
--- /dev/null
+++ b/costa/native/external/lapack/chetd2.f
@@ -0,0 +1,259 @@
+      SUBROUTINE CHETD2( UPLO, N, A, LDA, D, E, TAU, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, N
+*     ..
+*     .. Array Arguments ..
+      REAL               D( * ), E( * )
+      COMPLEX            A( LDA, * ), TAU( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CHETD2 reduces a complex Hermitian matrix A to real symmetric
+*  tridiagonal form T by a unitary similarity transformation:
+*  Q' * A * Q = T.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the upper or lower triangular part of the
+*          Hermitian matrix A is stored:
+*          = 'U':  Upper triangular
+*          = 'L':  Lower triangular
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,N)
+*          On entry, the Hermitian matrix A.  If UPLO = 'U', the leading
+*          n-by-n upper triangular part of A contains the upper
+*          triangular part of the matrix A, and the strictly lower
+*          triangular part of A is not referenced.  If UPLO = 'L', the
+*          leading n-by-n lower triangular part of A contains the lower
+*          triangular part of the matrix A, and the strictly upper
+*          triangular part of A is not referenced.
+*          On exit, if UPLO = 'U', the diagonal and first superdiagonal
+*          of A are overwritten by the corresponding elements of the
+*          tridiagonal matrix T, and the elements above the first
+*          superdiagonal, with the array TAU, represent the unitary
+*          matrix Q as a product of elementary reflectors; if UPLO
+*          = 'L', the diagonal and first subdiagonal of A are over-
+*          written by the corresponding elements of the tridiagonal
+*          matrix T, and the elements below the first subdiagonal, with
+*          the array TAU, represent the unitary matrix Q as a product
+*          of elementary reflectors. See Further Details.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  D       (output) REAL array, dimension (N)
+*          The diagonal elements of the tridiagonal matrix T:
+*          D(i) = A(i,i).
+*
+*  E       (output) REAL array, dimension (N-1)
+*          The off-diagonal elements of the tridiagonal matrix T:
+*          E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'.
+*
+*  TAU     (output) COMPLEX array, dimension (N-1)
+*          The scalar factors of the elementary reflectors (see Further
+*          Details).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*
+*  Further Details
+*  ===============
+*
+*  If UPLO = 'U', the matrix Q is represented as a product of elementary
+*  reflectors
+*
+*     Q = H(n-1) . . . H(2) H(1).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - tau * v * v'
+*
+*  where tau is a complex scalar, and v is a complex vector with
+*  v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in
+*  A(1:i-1,i+1), and tau in TAU(i).
+*
+*  If UPLO = 'L', the matrix Q is represented as a product of elementary
+*  reflectors
+*
+*     Q = H(1) H(2) . . . H(n-1).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - tau * v * v'
+*
+*  where tau is a complex scalar, and v is a complex vector with
+*  v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i),
+*  and tau in TAU(i).
+*
+*  The contents of A on exit are illustrated by the following examples
+*  with n = 5:
+*
+*  if UPLO = 'U':                       if UPLO = 'L':
+*
+*    (  d   e   v2  v3  v4 )              (  d                  )
+*    (      d   e   v3  v4 )              (  e   d              )
+*    (          d   e   v4 )              (  v1  e   d          )
+*    (              d   e  )              (  v1  v2  e   d      )
+*    (                  d  )              (  v1  v2  v3  e   d  )
+*
+*  where d and e denote diagonal and off-diagonal elements of T, and vi
+*  denotes an element of the vector defining H(i).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX            ONE, ZERO, HALF
+      PARAMETER          ( ONE = ( 1.0E+0, 0.0E+0 ),
+     $                   ZERO = ( 0.0E+0, 0.0E+0 ),
+     $                   HALF = ( 0.5E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            I
+      COMPLEX            ALPHA, TAUI
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CAXPY, CHEMV, CHER2, CLARFG, XERBLA
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      COMPLEX            CDOTC
+      EXTERNAL           LSAME, CDOTC
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN, REAL
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CHETD2', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.LE.0 )
+     $   RETURN
+*
+      IF( UPPER ) THEN
+*
+*        Reduce the upper triangle of A
+*
+         A( N, N ) = REAL( A( N, N ) )
+         DO 10 I = N - 1, 1, -1
+*
+*           Generate elementary reflector H(i) = I - tau * v * v'
+*           to annihilate A(1:i-1,i+1)
+*
+            ALPHA = A( I, I+1 )
+            CALL CLARFG( I, ALPHA, A( 1, I+1 ), 1, TAUI )
+            E( I ) = ALPHA
+*
+            IF( TAUI.NE.ZERO ) THEN
+*
+*              Apply H(i) from both sides to A(1:i,1:i)
+*
+               A( I, I+1 ) = ONE
+*
+*              Compute  x := tau * A * v  storing x in TAU(1:i)
+*
+               CALL CHEMV( UPLO, I, TAUI, A, LDA, A( 1, I+1 ), 1, ZERO,
+     $                     TAU, 1 )
+*
+*              Compute  w := x - 1/2 * tau * (x'*v) * v
+*
+               ALPHA = -HALF*TAUI*CDOTC( I, TAU, 1, A( 1, I+1 ), 1 )
+               CALL CAXPY( I, ALPHA, A( 1, I+1 ), 1, TAU, 1 )
+*
+*              Apply the transformation as a rank-2 update:
+*                 A := A - v * w' - w * v'
+*
+               CALL CHER2( UPLO, I, -ONE, A( 1, I+1 ), 1, TAU, 1, A,
+     $                     LDA )
+*
+            ELSE
+               A( I, I ) = REAL( A( I, I ) )
+            END IF
+            A( I, I+1 ) = E( I )
+            D( I+1 ) = A( I+1, I+1 )
+            TAU( I ) = TAUI
+   10    CONTINUE
+         D( 1 ) = A( 1, 1 )
+      ELSE
+*
+*        Reduce the lower triangle of A
+*
+         A( 1, 1 ) = REAL( A( 1, 1 ) )
+         DO 20 I = 1, N - 1
+*
+*           Generate elementary reflector H(i) = I - tau * v * v'
+*           to annihilate A(i+2:n,i)
+*
+            ALPHA = A( I+1, I )
+            CALL CLARFG( N-I, ALPHA, A( MIN( I+2, N ), I ), 1, TAUI )
+            E( I ) = ALPHA
+*
+            IF( TAUI.NE.ZERO ) THEN
+*
+*              Apply H(i) from both sides to A(i+1:n,i+1:n)
+*
+               A( I+1, I ) = ONE
+*
+*              Compute  x := tau * A * v  storing y in TAU(i:n-1)
+*
+               CALL CHEMV( UPLO, N-I, TAUI, A( I+1, I+1 ), LDA,
+     $                     A( I+1, I ), 1, ZERO, TAU( I ), 1 )
+*
+*              Compute  w := x - 1/2 * tau * (x'*v) * v
+*
+               ALPHA = -HALF*TAUI*CDOTC( N-I, TAU( I ), 1, A( I+1, I ),
+     $                 1 )
+               CALL CAXPY( N-I, ALPHA, A( I+1, I ), 1, TAU( I ), 1 )
+*
+*              Apply the transformation as a rank-2 update:
+*                 A := A - v * w' - w * v'
+*
+               CALL CHER2( UPLO, N-I, -ONE, A( I+1, I ), 1, TAU( I ), 1,
+     $                     A( I+1, I+1 ), LDA )
+*
+            ELSE
+               A( I+1, I+1 ) = REAL( A( I+1, I+1 ) )
+            END IF
+            A( I+1, I ) = E( I )
+            D( I ) = A( I, I )
+            TAU( I ) = TAUI
+   20    CONTINUE
+         D( N ) = A( N, N )
+      END IF
+*
+      RETURN
+*
+*     End of CHETD2
+*
+      END
diff --git a/costa/native/external/lapack/chetf2.f b/costa/native/external/lapack/chetf2.f
new file mode 100644
index 000000000..faa0a0385
--- /dev/null
+++ b/costa/native/external/lapack/chetf2.f
@@ -0,0 +1,544 @@
+      SUBROUTINE CHETF2( UPLO, N, A, LDA, IPIV, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      COMPLEX            A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CHETF2 computes the factorization of a complex Hermitian matrix A
+*  using the Bunch-Kaufman diagonal pivoting method:
+*
+*     A = U*D*U'  or  A = L*D*L'
+*
+*  where U (or L) is a product of permutation and unit upper (lower)
+*  triangular matrices, U' is the conjugate transpose of U, and D is
+*  Hermitian and block diagonal with 1-by-1 and 2-by-2 diagonal blocks.
+*
+*  This is the unblocked version of the algorithm, calling Level 2 BLAS.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the upper or lower triangular part of the
+*          Hermitian matrix A is stored:
+*          = 'U':  Upper triangular
+*          = 'L':  Lower triangular
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,N)
+*          On entry, the Hermitian matrix A.  If UPLO = 'U', the leading
+*          n-by-n upper triangular part of A contains the upper
+*          triangular part of the matrix A, and the strictly lower
+*          triangular part of A is not referenced.  If UPLO = 'L', the
+*          leading n-by-n lower triangular part of A contains the lower
+*          triangular part of the matrix A, and the strictly upper
+*          triangular part of A is not referenced.
+*
+*          On exit, the block diagonal matrix D and the multipliers used
+*          to obtain the factor U or L (see below for further details).
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  IPIV    (output) INTEGER array, dimension (N)
+*          Details of the interchanges and the block structure of D.
+*          If IPIV(k) > 0, then rows and columns k and IPIV(k) were
+*          interchanged and D(k,k) is a 1-by-1 diagonal block.
+*          If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and
+*          columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k)
+*          is a 2-by-2 diagonal block.  If UPLO = 'L' and IPIV(k) =
+*          IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were
+*          interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block.
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -k, the k-th argument had an illegal value
+*          > 0: if INFO = k, D(k,k) is exactly zero.  The factorization
+*               has been completed, but the block diagonal matrix D is
+*               exactly singular, and division by zero will occur if it
+*               is used to solve a system of equations.
+*
+*  Further Details
+*  ===============
+*
+*  1-96 - Based on modifications by
+*    J. Lewis, Boeing Computer Services Company
+*    A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA
+*
+*  If UPLO = 'U', then A = U*D*U', where
+*     U = P(n)*U(n)* ... *P(k)U(k)* ...,
+*  i.e., U is a product of terms P(k)*U(k), where k decreases from n to
+*  1 in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1
+*  and 2-by-2 diagonal blocks D(k).  P(k) is a permutation matrix as
+*  defined by IPIV(k), and U(k) is a unit upper triangular matrix, such
+*  that if the diagonal block D(k) is of order s (s = 1 or 2), then
+*
+*             (   I    v    0   )   k-s
+*     U(k) =  (   0    I    0   )   s
+*             (   0    0    I   )   n-k
+*                k-s   s   n-k
+*
+*  If s = 1, D(k) overwrites A(k,k), and v overwrites A(1:k-1,k).
+*  If s = 2, the upper triangle of D(k) overwrites A(k-1,k-1), A(k-1,k),
+*  and A(k,k), and v overwrites A(1:k-2,k-1:k).
+*
+*  If UPLO = 'L', then A = L*D*L', where
+*     L = P(1)*L(1)* ... *P(k)*L(k)* ...,
+*  i.e., L is a product of terms P(k)*L(k), where k increases from 1 to
+*  n in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1
+*  and 2-by-2 diagonal blocks D(k).  P(k) is a permutation matrix as
+*  defined by IPIV(k), and L(k) is a unit lower triangular matrix, such
+*  that if the diagonal block D(k) is of order s (s = 1 or 2), then
+*
+*             (   I    0     0   )  k-1
+*     L(k) =  (   0    I     0   )  s
+*             (   0    v     I   )  n-k-s+1
+*                k-1   s  n-k-s+1
+*
+*  If s = 1, D(k) overwrites A(k,k), and v overwrites A(k+1:n,k).
+*  If s = 2, the lower triangle of D(k) overwrites A(k,k), A(k+1,k),
+*  and A(k+1,k+1), and v overwrites A(k+2:n,k:k+1).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+      REAL               EIGHT, SEVTEN
+      PARAMETER          ( EIGHT = 8.0E+0, SEVTEN = 17.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            I, IMAX, J, JMAX, K, KK, KP, KSTEP
+      REAL               ABSAKK, ALPHA, COLMAX, D, D11, D22, R1, ROWMAX,
+     $                   TT
+      COMPLEX            D12, D21, T, WK, WKM1, WKP1, ZDUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ICAMAX
+      REAL               SLAPY2
+      EXTERNAL           LSAME, ICAMAX, SLAPY2
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CHER, CSSCAL, CSWAP, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, AIMAG, CMPLX, CONJG, MAX, REAL, SQRT
+*     ..
+*     .. Statement Functions ..
+      REAL               CABS1
+*     ..
+*     .. Statement Function definitions ..
+      CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CHETF2', -INFO )
+         RETURN
+      END IF
+*
+*     Initialize ALPHA for use in choosing pivot block size.
+*
+      ALPHA = ( ONE+SQRT( SEVTEN ) ) / EIGHT
+*
+      IF( UPPER ) THEN
+*
+*        Factorize A as U*D*U' using the upper triangle of A
+*
+*        K is the main loop index, decreasing from N to 1 in steps of
+*        1 or 2
+*
+         K = N
+   10    CONTINUE
+*
+*        If K < 1, exit from loop
+*
+         IF( K.LT.1 )
+     $      GO TO 90
+         KSTEP = 1
+*
+*        Determine rows and columns to be interchanged and whether
+*        a 1-by-1 or 2-by-2 pivot block will be used
+*
+         ABSAKK = ABS( REAL( A( K, K ) ) )
+*
+*        IMAX is the row-index of the largest off-diagonal element in
+*        column K, and COLMAX is its absolute value
+*
+         IF( K.GT.1 ) THEN
+            IMAX = ICAMAX( K-1, A( 1, K ), 1 )
+            COLMAX = CABS1( A( IMAX, K ) )
+         ELSE
+            COLMAX = ZERO
+         END IF
+*
+         IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN
+*
+*           Column K is zero: set INFO and continue
+*
+            IF( INFO.EQ.0 )
+     $         INFO = K
+            KP = K
+            A( K, K ) = REAL( A( K, K ) )
+         ELSE
+            IF( ABSAKK.GE.ALPHA*COLMAX ) THEN
+*
+*              no interchange, use 1-by-1 pivot block
+*
+               KP = K
+            ELSE
+*
+*              JMAX is the column-index of the largest off-diagonal
+*              element in row IMAX, and ROWMAX is its absolute value
+*
+               JMAX = IMAX + ICAMAX( K-IMAX, A( IMAX, IMAX+1 ), LDA )
+               ROWMAX = CABS1( A( IMAX, JMAX ) )
+               IF( IMAX.GT.1 ) THEN
+                  JMAX = ICAMAX( IMAX-1, A( 1, IMAX ), 1 )
+                  ROWMAX = MAX( ROWMAX, CABS1( A( JMAX, IMAX ) ) )
+               END IF
+*
+               IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN
+*
+*                 no interchange, use 1-by-1 pivot block
+*
+                  KP = K
+               ELSE IF( ABS( REAL( A( IMAX, IMAX ) ) ).GE.ALPHA*ROWMAX )
+     $                   THEN
+*
+*                 interchange rows and columns K and IMAX, use 1-by-1
+*                 pivot block
+*
+                  KP = IMAX
+               ELSE
+*
+*                 interchange rows and columns K-1 and IMAX, use 2-by-2
+*                 pivot block
+*
+                  KP = IMAX
+                  KSTEP = 2
+               END IF
+            END IF
+*
+            KK = K - KSTEP + 1
+            IF( KP.NE.KK ) THEN
+*
+*              Interchange rows and columns KK and KP in the leading
+*              submatrix A(1:k,1:k)
+*
+               CALL CSWAP( KP-1, A( 1, KK ), 1, A( 1, KP ), 1 )
+               DO 20 J = KP + 1, KK - 1
+                  T = CONJG( A( J, KK ) )
+                  A( J, KK ) = CONJG( A( KP, J ) )
+                  A( KP, J ) = T
+   20          CONTINUE
+               A( KP, KK ) = CONJG( A( KP, KK ) )
+               R1 = REAL( A( KK, KK ) )
+               A( KK, KK ) = REAL( A( KP, KP ) )
+               A( KP, KP ) = R1
+               IF( KSTEP.EQ.2 ) THEN
+                  A( K, K ) = REAL( A( K, K ) )
+                  T = A( K-1, K )
+                  A( K-1, K ) = A( KP, K )
+                  A( KP, K ) = T
+               END IF
+            ELSE
+               A( K, K ) = REAL( A( K, K ) )
+               IF( KSTEP.EQ.2 )
+     $            A( K-1, K-1 ) = REAL( A( K-1, K-1 ) )
+            END IF
+*
+*           Update the leading submatrix
+*
+            IF( KSTEP.EQ.1 ) THEN
+*
+*              1-by-1 pivot block D(k): column k now holds
+*
+*              W(k) = U(k)*D(k)
+*
+*              where U(k) is the k-th column of U
+*
+*              Perform a rank-1 update of A(1:k-1,1:k-1) as
+*
+*              A := A - U(k)*D(k)*U(k)' = A - W(k)*1/D(k)*W(k)'
+*
+               R1 = ONE / REAL( A( K, K ) )
+               CALL CHER( UPLO, K-1, -R1, A( 1, K ), 1, A, LDA )
+*
+*              Store U(k) in column k
+*
+               CALL CSSCAL( K-1, R1, A( 1, K ), 1 )
+            ELSE
+*
+*              2-by-2 pivot block D(k): columns k and k-1 now hold
+*
+*              ( W(k-1) W(k) ) = ( U(k-1) U(k) )*D(k)
+*
+*              where U(k) and U(k-1) are the k-th and (k-1)-th columns
+*              of U
+*
+*              Perform a rank-2 update of A(1:k-2,1:k-2) as
+*
+*              A := A - ( U(k-1) U(k) )*D(k)*( U(k-1) U(k) )'
+*                 = A - ( W(k-1) W(k) )*inv(D(k))*( W(k-1) W(k) )'
+*
+               IF( K.GT.2 ) THEN
+*
+                  D = SLAPY2( REAL( A( K-1, K ) ),
+     $                AIMAG( A( K-1, K ) ) )
+                  D22 = REAL( A( K-1, K-1 ) ) / D
+                  D11 = REAL( A( K, K ) ) / D
+                  TT = ONE / ( D11*D22-ONE )
+                  D12 = A( K-1, K ) / D
+                  D = TT / D
+*
+                  DO 40 J = K - 2, 1, -1
+                     WKM1 = D*( D11*A( J, K-1 )-CONJG( D12 )*A( J, K ) )
+                     WK = D*( D22*A( J, K )-D12*A( J, K-1 ) )
+                     DO 30 I = J, 1, -1
+                        A( I, J ) = A( I, J ) - A( I, K )*CONJG( WK ) -
+     $                              A( I, K-1 )*CONJG( WKM1 )
+   30                CONTINUE
+                     A( J, K ) = WK
+                     A( J, K-1 ) = WKM1
+                     A( J, J ) = CMPLX( REAL( A( J, J ) ), 0.0E+0 )
+   40             CONTINUE
+*
+               END IF
+*
+            END IF
+         END IF
+*
+*        Store details of the interchanges in IPIV
+*
+         IF( KSTEP.EQ.1 ) THEN
+            IPIV( K ) = KP
+         ELSE
+            IPIV( K ) = -KP
+            IPIV( K-1 ) = -KP
+         END IF
+*
+*        Decrease K and return to the start of the main loop
+*
+         K = K - KSTEP
+         GO TO 10
+*
+      ELSE
+*
+*        Factorize A as L*D*L' using the lower triangle of A
+*
+*        K is the main loop index, increasing from 1 to N in steps of
+*        1 or 2
+*
+         K = 1
+   50    CONTINUE
+*
+*        If K > N, exit from loop
+*
+         IF( K.GT.N )
+     $      GO TO 90
+         KSTEP = 1
+*
+*        Determine rows and columns to be interchanged and whether
+*        a 1-by-1 or 2-by-2 pivot block will be used
+*
+         ABSAKK = ABS( REAL( A( K, K ) ) )
+*
+*        IMAX is the row-index of the largest off-diagonal element in
+*        column K, and COLMAX is its absolute value
+*
+         IF( K.LT.N ) THEN
+            IMAX = K + ICAMAX( N-K, A( K+1, K ), 1 )
+            COLMAX = CABS1( A( IMAX, K ) )
+         ELSE
+            COLMAX = ZERO
+         END IF
+*
+         IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN
+*
+*           Column K is zero: set INFO and continue
+*
+            IF( INFO.EQ.0 )
+     $         INFO = K
+            KP = K
+            A( K, K ) = REAL( A( K, K ) )
+         ELSE
+            IF( ABSAKK.GE.ALPHA*COLMAX ) THEN
+*
+*              no interchange, use 1-by-1 pivot block
+*
+               KP = K
+            ELSE
+*
+*              JMAX is the column-index of the largest off-diagonal
+*              element in row IMAX, and ROWMAX is its absolute value
+*
+               JMAX = K - 1 + ICAMAX( IMAX-K, A( IMAX, K ), LDA )
+               ROWMAX = CABS1( A( IMAX, JMAX ) )
+               IF( IMAX.LT.N ) THEN
+                  JMAX = IMAX + ICAMAX( N-IMAX, A( IMAX+1, IMAX ), 1 )
+                  ROWMAX = MAX( ROWMAX, CABS1( A( JMAX, IMAX ) ) )
+               END IF
+*
+               IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN
+*
+*                 no interchange, use 1-by-1 pivot block
+*
+                  KP = K
+               ELSE IF( ABS( REAL( A( IMAX, IMAX ) ) ).GE.ALPHA*ROWMAX )
+     $                   THEN
+*
+*                 interchange rows and columns K and IMAX, use 1-by-1
+*                 pivot block
+*
+                  KP = IMAX
+               ELSE
+*
+*                 interchange rows and columns K+1 and IMAX, use 2-by-2
+*                 pivot block
+*
+                  KP = IMAX
+                  KSTEP = 2
+               END IF
+            END IF
+*
+            KK = K + KSTEP - 1
+            IF( KP.NE.KK ) THEN
+*
+*              Interchange rows and columns KK and KP in the trailing
+*              submatrix A(k:n,k:n)
+*
+               IF( KP.LT.N )
+     $            CALL CSWAP( N-KP, A( KP+1, KK ), 1, A( KP+1, KP ), 1 )
+               DO 60 J = KK + 1, KP - 1
+                  T = CONJG( A( J, KK ) )
+                  A( J, KK ) = CONJG( A( KP, J ) )
+                  A( KP, J ) = T
+   60          CONTINUE
+               A( KP, KK ) = CONJG( A( KP, KK ) )
+               R1 = REAL( A( KK, KK ) )
+               A( KK, KK ) = REAL( A( KP, KP ) )
+               A( KP, KP ) = R1
+               IF( KSTEP.EQ.2 ) THEN
+                  A( K, K ) = REAL( A( K, K ) )
+                  T = A( K+1, K )
+                  A( K+1, K ) = A( KP, K )
+                  A( KP, K ) = T
+               END IF
+            ELSE
+               A( K, K ) = REAL( A( K, K ) )
+               IF( KSTEP.EQ.2 )
+     $            A( K+1, K+1 ) = REAL( A( K+1, K+1 ) )
+            END IF
+*
+*           Update the trailing submatrix
+*
+            IF( KSTEP.EQ.1 ) THEN
+*
+*              1-by-1 pivot block D(k): column k now holds
+*
+*              W(k) = L(k)*D(k)
+*
+*              where L(k) is the k-th column of L
+*
+               IF( K.LT.N ) THEN
+*
+*                 Perform a rank-1 update of A(k+1:n,k+1:n) as
+*
+*                 A := A - L(k)*D(k)*L(k)' = A - W(k)*(1/D(k))*W(k)'
+*
+                  R1 = ONE / REAL( A( K, K ) )
+                  CALL CHER( UPLO, N-K, -R1, A( K+1, K ), 1,
+     $                       A( K+1, K+1 ), LDA )
+*
+*                 Store L(k) in column K
+*
+                  CALL CSSCAL( N-K, R1, A( K+1, K ), 1 )
+               END IF
+            ELSE
+*
+*              2-by-2 pivot block D(k)
+*
+               IF( K.LT.N-1 ) THEN
+*
+*                 Perform a rank-2 update of A(k+2:n,k+2:n) as
+*
+*                 A := A - ( L(k) L(k+1) )*D(k)*( L(k) L(k+1) )'
+*                    = A - ( W(k) W(k+1) )*inv(D(k))*( W(k) W(k+1) )'
+*
+*                 where L(k) and L(k+1) are the k-th and (k+1)-th
+*                 columns of L
+*
+                  D = SLAPY2( REAL( A( K+1, K ) ),
+     $                        AIMAG( A( K+1, K ) ) )
+                  D11 = REAL( A( K+1, K+1 ) ) / D
+                  D22 = REAL( A( K, K ) ) / D
+                  TT = ONE / ( D11*D22-ONE )
+                  D21 = A( K+1, K ) / D
+                  D =  TT / D
+*
+                  DO 80 J = K + 2, N
+                     WK = D*( D11*A( J, K )-D21*A( J, K+1 ) )
+                     WKP1 = D*( D22*A( J, K+1 )-CONJG( D21 )*A( J, K ) )
+                     DO 70 I = J, N
+                        A( I, J ) = A( I, J ) - A( I, K )*CONJG( WK ) -
+     $                              A( I, K+1 )*CONJG( WKP1 )
+   70                CONTINUE
+                     A( J, K ) = WK
+                     A( J, K+1 ) = WKP1
+                     A( J, J ) = CMPLX( REAL( A( J, J ) ), 0.0E+0 )
+   80             CONTINUE
+               END IF
+            END IF
+         END IF
+*
+*        Store details of the interchanges in IPIV
+*
+         IF( KSTEP.EQ.1 ) THEN
+            IPIV( K ) = KP
+         ELSE
+            IPIV( K ) = -KP
+            IPIV( K+1 ) = -KP
+         END IF
+*
+*        Increase K and return to the start of the main loop
+*
+         K = K + KSTEP
+         GO TO 50
+*
+      END IF
+*
+   90 CONTINUE
+      RETURN
+*
+*     End of CHETF2
+*
+      END
diff --git a/costa/native/external/lapack/chetrd.f b/costa/native/external/lapack/chetrd.f
new file mode 100644
index 000000000..6a453a076
--- /dev/null
+++ b/costa/native/external/lapack/chetrd.f
@@ -0,0 +1,297 @@
+      SUBROUTINE CHETRD( UPLO, N, A, LDA, D, E, TAU, WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, LWORK, N
+*     ..
+*     .. Array Arguments ..
+      REAL               D( * ), E( * )
+      COMPLEX            A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CHETRD reduces a complex Hermitian matrix A to real symmetric
+*  tridiagonal form T by a unitary similarity transformation:
+*  Q**H * A * Q = T.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,N)
+*          On entry, the Hermitian matrix A.  If UPLO = 'U', the leading
+*          N-by-N upper triangular part of A contains the upper
+*          triangular part of the matrix A, and the strictly lower
+*          triangular part of A is not referenced.  If UPLO = 'L', the
+*          leading N-by-N lower triangular part of A contains the lower
+*          triangular part of the matrix A, and the strictly upper
+*          triangular part of A is not referenced.
+*          On exit, if UPLO = 'U', the diagonal and first superdiagonal
+*          of A are overwritten by the corresponding elements of the
+*          tridiagonal matrix T, and the elements above the first
+*          superdiagonal, with the array TAU, represent the unitary
+*          matrix Q as a product of elementary reflectors; if UPLO
+*          = 'L', the diagonal and first subdiagonal of A are over-
+*          written by the corresponding elements of the tridiagonal
+*          matrix T, and the elements below the first subdiagonal, with
+*          the array TAU, represent the unitary matrix Q as a product
+*          of elementary reflectors. See Further Details.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  D       (output) REAL array, dimension (N)
+*          The diagonal elements of the tridiagonal matrix T:
+*          D(i) = A(i,i).
+*
+*  E       (output) REAL array, dimension (N-1)
+*          The off-diagonal elements of the tridiagonal matrix T:
+*          E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'.
+*
+*  TAU     (output) COMPLEX array, dimension (N-1)
+*          The scalar factors of the elementary reflectors (see Further
+*          Details).
+*
+*  WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.  LWORK >= 1.
+*          For optimum performance LWORK >= N*NB, where NB is the
+*          optimal blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Further Details
+*  ===============
+*
+*  If UPLO = 'U', the matrix Q is represented as a product of elementary
+*  reflectors
+*
+*     Q = H(n-1) . . . H(2) H(1).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - tau * v * v'
+*
+*  where tau is a complex scalar, and v is a complex vector with
+*  v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in
+*  A(1:i-1,i+1), and tau in TAU(i).
+*
+*  If UPLO = 'L', the matrix Q is represented as a product of elementary
+*  reflectors
+*
+*     Q = H(1) H(2) . . . H(n-1).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - tau * v * v'
+*
+*  where tau is a complex scalar, and v is a complex vector with
+*  v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i),
+*  and tau in TAU(i).
+*
+*  The contents of A on exit are illustrated by the following examples
+*  with n = 5:
+*
+*  if UPLO = 'U':                       if UPLO = 'L':
+*
+*    (  d   e   v2  v3  v4 )              (  d                  )
+*    (      d   e   v3  v4 )              (  e   d              )
+*    (          d   e   v4 )              (  v1  e   d          )
+*    (              d   e  )              (  v1  v2  e   d      )
+*    (                  d  )              (  v1  v2  v3  e   d  )
+*
+*  where d and e denote diagonal and off-diagonal elements of T, and vi
+*  denotes an element of the vector defining H(i).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE
+      PARAMETER          ( ONE = 1.0E+0 )
+      COMPLEX            CONE
+      PARAMETER          ( CONE = ( 1.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY, UPPER
+      INTEGER            I, IINFO, IWS, J, KK, LDWORK, LWKOPT, NB,
+     $                   NBMIN, NX
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CHER2K, CHETD2, CLATRD, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           LSAME, ILAENV
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -4
+      ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN
+         INFO = -9
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+*
+*        Determine the block size.
+*
+         NB = ILAENV( 1, 'CHETRD', UPLO, N, -1, -1, -1 )
+         LWKOPT = N*NB
+         WORK( 1 ) = LWKOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CHETRD', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      NX = N
+      IWS = 1
+      IF( NB.GT.1 .AND. NB.LT.N ) THEN
+*
+*        Determine when to cross over from blocked to unblocked code
+*        (last block is always handled by unblocked code).
+*
+         NX = MAX( NB, ILAENV( 3, 'CHETRD', UPLO, N, -1, -1, -1 ) )
+         IF( NX.LT.N ) THEN
+*
+*           Determine if workspace is large enough for blocked code.
+*
+            LDWORK = N
+            IWS = LDWORK*NB
+            IF( LWORK.LT.IWS ) THEN
+*
+*              Not enough workspace to use optimal NB:  determine the
+*              minimum value of NB, and reduce NB or force use of
+*              unblocked code by setting NX = N.
+*
+               NB = MAX( LWORK / LDWORK, 1 )
+               NBMIN = ILAENV( 2, 'CHETRD', UPLO, N, -1, -1, -1 )
+               IF( NB.LT.NBMIN )
+     $            NX = N
+            END IF
+         ELSE
+            NX = N
+         END IF
+      ELSE
+         NB = 1
+      END IF
+*
+      IF( UPPER ) THEN
+*
+*        Reduce the upper triangle of A.
+*        Columns 1:kk are handled by the unblocked method.
+*
+         KK = N - ( ( N-NX+NB-1 ) / NB )*NB
+         DO 20 I = N - NB + 1, KK + 1, -NB
+*
+*           Reduce columns i:i+nb-1 to tridiagonal form and form the
+*           matrix W which is needed to update the unreduced part of
+*           the matrix
+*
+            CALL CLATRD( UPLO, I+NB-1, NB, A, LDA, E, TAU, WORK,
+     $                   LDWORK )
+*
+*           Update the unreduced submatrix A(1:i-1,1:i-1), using an
+*           update of the form:  A := A - V*W' - W*V'
+*
+            CALL CHER2K( UPLO, 'No transpose', I-1, NB, -CONE,
+     $                   A( 1, I ), LDA, WORK, LDWORK, ONE, A, LDA )
+*
+*           Copy superdiagonal elements back into A, and diagonal
+*           elements into D
+*
+            DO 10 J = I, I + NB - 1
+               A( J-1, J ) = E( J-1 )
+               D( J ) = A( J, J )
+   10       CONTINUE
+   20    CONTINUE
+*
+*        Use unblocked code to reduce the last or only block
+*
+         CALL CHETD2( UPLO, KK, A, LDA, D, E, TAU, IINFO )
+      ELSE
+*
+*        Reduce the lower triangle of A
+*
+         DO 40 I = 1, N - NX, NB
+*
+*           Reduce columns i:i+nb-1 to tridiagonal form and form the
+*           matrix W which is needed to update the unreduced part of
+*           the matrix
+*
+            CALL CLATRD( UPLO, N-I+1, NB, A( I, I ), LDA, E( I ),
+     $                   TAU( I ), WORK, LDWORK )
+*
+*           Update the unreduced submatrix A(i+nb:n,i+nb:n), using
+*           an update of the form:  A := A - V*W' - W*V'
+*
+            CALL CHER2K( UPLO, 'No transpose', N-I-NB+1, NB, -CONE,
+     $                   A( I+NB, I ), LDA, WORK( NB+1 ), LDWORK, ONE,
+     $                   A( I+NB, I+NB ), LDA )
+*
+*           Copy subdiagonal elements back into A, and diagonal
+*           elements into D
+*
+            DO 30 J = I, I + NB - 1
+               A( J+1, J ) = E( J )
+               D( J ) = A( J, J )
+   30       CONTINUE
+   40    CONTINUE
+*
+*        Use unblocked code to reduce the last or only block
+*
+         CALL CHETD2( UPLO, N-I+1, A( I, I ), LDA, D( I ), E( I ),
+     $                TAU( I ), IINFO )
+      END IF
+*
+      WORK( 1 ) = LWKOPT
+      RETURN
+*
+*     End of CHETRD
+*
+      END
diff --git a/costa/native/external/lapack/chetrf.f b/costa/native/external/lapack/chetrf.f
new file mode 100644
index 000000000..21c74f081
--- /dev/null
+++ b/costa/native/external/lapack/chetrf.f
@@ -0,0 +1,282 @@
+      SUBROUTINE CHETRF( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, LWORK, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      COMPLEX            A( LDA, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CHETRF computes the factorization of a complex Hermitian matrix A
+*  using the Bunch-Kaufman diagonal pivoting method.  The form of the
+*  factorization is
+*
+*     A = U*D*U**H  or  A = L*D*L**H
+*
+*  where U (or L) is a product of permutation and unit upper (lower)
+*  triangular matrices, and D is Hermitian and block diagonal with 
+*  1-by-1 and 2-by-2 diagonal blocks.
+*
+*  This is the blocked version of the algorithm, calling Level 3 BLAS.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,N)
+*          On entry, the Hermitian matrix A.  If UPLO = 'U', the leading
+*          N-by-N upper triangular part of A contains the upper
+*          triangular part of the matrix A, and the strictly lower
+*          triangular part of A is not referenced.  If UPLO = 'L', the
+*          leading N-by-N lower triangular part of A contains the lower
+*          triangular part of the matrix A, and the strictly upper
+*          triangular part of A is not referenced.
+*
+*          On exit, the block diagonal matrix D and the multipliers used
+*          to obtain the factor U or L (see below for further details).
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  IPIV    (output) INTEGER array, dimension (N)
+*          Details of the interchanges and the block structure of D.
+*          If IPIV(k) > 0, then rows and columns k and IPIV(k) were
+*          interchanged and D(k,k) is a 1-by-1 diagonal block.
+*          If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and
+*          columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k)
+*          is a 2-by-2 diagonal block.  If UPLO = 'L' and IPIV(k) =
+*          IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were
+*          interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block.
+*
+*  WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The length of WORK.  LWORK >=1.  For best performance
+*          LWORK >= N*NB, where NB is the block size returned by ILAENV.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, D(i,i) is exactly zero.  The factorization
+*                has been completed, but the block diagonal matrix D is
+*                exactly singular, and division by zero will occur if it
+*                is used to solve a system of equations.
+*
+*  Further Details
+*  ===============
+*
+*  If UPLO = 'U', then A = U*D*U', where
+*     U = P(n)*U(n)* ... *P(k)U(k)* ...,
+*  i.e., U is a product of terms P(k)*U(k), where k decreases from n to
+*  1 in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1
+*  and 2-by-2 diagonal blocks D(k).  P(k) is a permutation matrix as
+*  defined by IPIV(k), and U(k) is a unit upper triangular matrix, such
+*  that if the diagonal block D(k) is of order s (s = 1 or 2), then
+*
+*             (   I    v    0   )   k-s
+*     U(k) =  (   0    I    0   )   s
+*             (   0    0    I   )   n-k
+*                k-s   s   n-k
+*
+*  If s = 1, D(k) overwrites A(k,k), and v overwrites A(1:k-1,k).
+*  If s = 2, the upper triangle of D(k) overwrites A(k-1,k-1), A(k-1,k),
+*  and A(k,k), and v overwrites A(1:k-2,k-1:k).
+*
+*  If UPLO = 'L', then A = L*D*L', where
+*     L = P(1)*L(1)* ... *P(k)*L(k)* ...,
+*  i.e., L is a product of terms P(k)*L(k), where k increases from 1 to
+*  n in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1
+*  and 2-by-2 diagonal blocks D(k).  P(k) is a permutation matrix as
+*  defined by IPIV(k), and L(k) is a unit lower triangular matrix, such
+*  that if the diagonal block D(k) is of order s (s = 1 or 2), then
+*
+*             (   I    0     0   )  k-1
+*     L(k) =  (   0    I     0   )  s
+*             (   0    v     I   )  n-k-s+1
+*                k-1   s  n-k-s+1
+*
+*  If s = 1, D(k) overwrites A(k,k), and v overwrites A(k+1:n,k).
+*  If s = 2, the lower triangle of D(k) overwrites A(k,k), A(k+1,k),
+*  and A(k+1,k+1), and v overwrites A(k+2:n,k:k+1).
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            LQUERY, UPPER
+      INTEGER            IINFO, IWS, J, K, KB, LDWORK, LWKOPT, NB, NBMIN
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           LSAME, ILAENV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CHETF2, CLAHEF, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -4
+      ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN
+         INFO = -7
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+*
+*        Determine the block size
+*
+         NB = ILAENV( 1, 'CHETRF', UPLO, N, -1, -1, -1 )
+         LWKOPT = N*NB
+         WORK( 1 ) = LWKOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CHETRF', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+      NBMIN = 2
+      LDWORK = N
+      IF( NB.GT.1 .AND. NB.LT.N ) THEN
+         IWS = LDWORK*NB
+         IF( LWORK.LT.IWS ) THEN
+            NB = MAX( LWORK / LDWORK, 1 )
+            NBMIN = MAX( 2, ILAENV( 2, 'CHETRF', UPLO, N, -1, -1, -1 ) )
+         END IF
+      ELSE
+         IWS = 1
+      END IF
+      IF( NB.LT.NBMIN )
+     $   NB = N
+*
+      IF( UPPER ) THEN
+*
+*        Factorize A as U*D*U' using the upper triangle of A
+*
+*        K is the main loop index, decreasing from N to 1 in steps of
+*        KB, where KB is the number of columns factorized by CLAHEF;
+*        KB is either NB or NB-1, or K for the last block
+*
+         K = N
+   10    CONTINUE
+*
+*        If K < 1, exit from loop
+*
+         IF( K.LT.1 )
+     $      GO TO 40
+*
+         IF( K.GT.NB ) THEN
+*
+*           Factorize columns k-kb+1:k of A and use blocked code to
+*           update columns 1:k-kb
+*
+            CALL CLAHEF( UPLO, K, NB, KB, A, LDA, IPIV, WORK, N, IINFO )
+         ELSE
+*
+*           Use unblocked code to factorize columns 1:k of A
+*
+            CALL CHETF2( UPLO, K, A, LDA, IPIV, IINFO )
+            KB = K
+         END IF
+*
+*        Set INFO on the first occurrence of a zero pivot
+*
+         IF( INFO.EQ.0 .AND. IINFO.GT.0 )
+     $      INFO = IINFO
+*
+*        Decrease K and return to the start of the main loop
+*
+         K = K - KB
+         GO TO 10
+*
+      ELSE
+*
+*        Factorize A as L*D*L' using the lower triangle of A
+*
+*        K is the main loop index, increasing from 1 to N in steps of
+*        KB, where KB is the number of columns factorized by CLAHEF;
+*        KB is either NB or NB-1, or N-K+1 for the last block
+*
+         K = 1
+   20    CONTINUE
+*
+*        If K > N, exit from loop
+*
+         IF( K.GT.N )
+     $      GO TO 40
+*
+         IF( K.LE.N-NB ) THEN
+*
+*           Factorize columns k:k+kb-1 of A and use blocked code to
+*           update columns k+kb:n
+*
+            CALL CLAHEF( UPLO, N-K+1, NB, KB, A( K, K ), LDA, IPIV( K ),
+     $                   WORK, N, IINFO )
+         ELSE
+*
+*           Use unblocked code to factorize columns k:n of A
+*
+            CALL CHETF2( UPLO, N-K+1, A( K, K ), LDA, IPIV( K ), IINFO )
+            KB = N - K + 1
+         END IF
+*
+*        Set INFO on the first occurrence of a zero pivot
+*
+         IF( INFO.EQ.0 .AND. IINFO.GT.0 )
+     $      INFO = IINFO + K - 1
+*
+*        Adjust IPIV
+*
+         DO 30 J = K, K + KB - 1
+            IF( IPIV( J ).GT.0 ) THEN
+               IPIV( J ) = IPIV( J ) + K - 1
+            ELSE
+               IPIV( J ) = IPIV( J ) - K + 1
+            END IF
+   30    CONTINUE
+*
+*        Increase K and return to the start of the main loop
+*
+         K = K + KB
+         GO TO 20
+*
+      END IF
+*
+   40 CONTINUE
+      WORK( 1 ) = LWKOPT
+      RETURN
+*
+*     End of CHETRF
+*
+      END
diff --git a/costa/native/external/lapack/chetri.f b/costa/native/external/lapack/chetri.f
new file mode 100644
index 000000000..e72cd3730
--- /dev/null
+++ b/costa/native/external/lapack/chetri.f
@@ -0,0 +1,328 @@
+      SUBROUTINE CHETRI( UPLO, N, A, LDA, IPIV, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      COMPLEX            A( LDA, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CHETRI computes the inverse of a complex Hermitian indefinite matrix
+*  A using the factorization A = U*D*U**H or A = L*D*L**H computed by
+*  CHETRF.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the details of the factorization are stored
+*          as an upper or lower triangular matrix.
+*          = 'U':  Upper triangular, form is A = U*D*U**H;
+*          = 'L':  Lower triangular, form is A = L*D*L**H.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,N)
+*          On entry, the block diagonal matrix D and the multipliers
+*          used to obtain the factor U or L as computed by CHETRF.
+*
+*          On exit, if INFO = 0, the (Hermitian) inverse of the original
+*          matrix.  If UPLO = 'U', the upper triangular part of the
+*          inverse is formed and the part of A below the diagonal is not
+*          referenced; if UPLO = 'L' the lower triangular part of the
+*          inverse is formed and the part of A above the diagonal is
+*          not referenced.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  IPIV    (input) INTEGER array, dimension (N)
+*          Details of the interchanges and the block structure of D
+*          as determined by CHETRF.
+*
+*  WORK    (workspace) COMPLEX array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*          > 0: if INFO = i, D(i,i) = 0; the matrix is singular and its
+*               inverse could not be computed.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE
+      COMPLEX            CONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, CONE = ( 1.0E+0, 0.0E+0 ),
+     $                   ZERO = ( 0.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            J, K, KP, KSTEP
+      REAL               AK, AKP1, D, T
+      COMPLEX            AKKP1, TEMP
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      COMPLEX            CDOTC
+      EXTERNAL           LSAME, CDOTC
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CCOPY, CHEMV, CSWAP, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, CONJG, MAX, REAL
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CHETRI', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Check that the diagonal matrix D is nonsingular.
+*
+      IF( UPPER ) THEN
+*
+*        Upper triangular storage: examine D from bottom to top
+*
+         DO 10 INFO = N, 1, -1
+            IF( IPIV( INFO ).GT.0 .AND. A( INFO, INFO ).EQ.ZERO )
+     $         RETURN
+   10    CONTINUE
+      ELSE
+*
+*        Lower triangular storage: examine D from top to bottom.
+*
+         DO 20 INFO = 1, N
+            IF( IPIV( INFO ).GT.0 .AND. A( INFO, INFO ).EQ.ZERO )
+     $         RETURN
+   20    CONTINUE
+      END IF
+      INFO = 0
+*
+      IF( UPPER ) THEN
+*
+*        Compute inv(A) from the factorization A = U*D*U'.
+*
+*        K is the main loop index, increasing from 1 to N in steps of
+*        1 or 2, depending on the size of the diagonal blocks.
+*
+         K = 1
+   30    CONTINUE
+*
+*        If K > N, exit from loop.
+*
+         IF( K.GT.N )
+     $      GO TO 50
+*
+         IF( IPIV( K ).GT.0 ) THEN
+*
+*           1 x 1 diagonal block
+*
+*           Invert the diagonal block.
+*
+            A( K, K ) = ONE / REAL( A( K, K ) )
+*
+*           Compute column K of the inverse.
+*
+            IF( K.GT.1 ) THEN
+               CALL CCOPY( K-1, A( 1, K ), 1, WORK, 1 )
+               CALL CHEMV( UPLO, K-1, -CONE, A, LDA, WORK, 1, ZERO,
+     $                     A( 1, K ), 1 )
+               A( K, K ) = A( K, K ) - REAL( CDOTC( K-1, WORK, 1, A( 1,
+     $                     K ), 1 ) )
+            END IF
+            KSTEP = 1
+         ELSE
+*
+*           2 x 2 diagonal block
+*
+*           Invert the diagonal block.
+*
+            T = ABS( A( K, K+1 ) )
+            AK = REAL( A( K, K ) ) / T
+            AKP1 = REAL( A( K+1, K+1 ) ) / T
+            AKKP1 = A( K, K+1 ) / T
+            D = T*( AK*AKP1-ONE )
+            A( K, K ) = AKP1 / D
+            A( K+1, K+1 ) = AK / D
+            A( K, K+1 ) = -AKKP1 / D
+*
+*           Compute columns K and K+1 of the inverse.
+*
+            IF( K.GT.1 ) THEN
+               CALL CCOPY( K-1, A( 1, K ), 1, WORK, 1 )
+               CALL CHEMV( UPLO, K-1, -CONE, A, LDA, WORK, 1, ZERO,
+     $                     A( 1, K ), 1 )
+               A( K, K ) = A( K, K ) - REAL( CDOTC( K-1, WORK, 1, A( 1,
+     $                     K ), 1 ) )
+               A( K, K+1 ) = A( K, K+1 ) -
+     $                       CDOTC( K-1, A( 1, K ), 1, A( 1, K+1 ), 1 )
+               CALL CCOPY( K-1, A( 1, K+1 ), 1, WORK, 1 )
+               CALL CHEMV( UPLO, K-1, -CONE, A, LDA, WORK, 1, ZERO,
+     $                     A( 1, K+1 ), 1 )
+               A( K+1, K+1 ) = A( K+1, K+1 ) -
+     $                         REAL( CDOTC( K-1, WORK, 1, A( 1, K+1 ),
+     $                         1 ) )
+            END IF
+            KSTEP = 2
+         END IF
+*
+         KP = ABS( IPIV( K ) )
+         IF( KP.NE.K ) THEN
+*
+*           Interchange rows and columns K and KP in the leading
+*           submatrix A(1:k+1,1:k+1)
+*
+            CALL CSWAP( KP-1, A( 1, K ), 1, A( 1, KP ), 1 )
+            DO 40 J = KP + 1, K - 1
+               TEMP = CONJG( A( J, K ) )
+               A( J, K ) = CONJG( A( KP, J ) )
+               A( KP, J ) = TEMP
+   40       CONTINUE
+            A( KP, K ) = CONJG( A( KP, K ) )
+            TEMP = A( K, K )
+            A( K, K ) = A( KP, KP )
+            A( KP, KP ) = TEMP
+            IF( KSTEP.EQ.2 ) THEN
+               TEMP = A( K, K+1 )
+               A( K, K+1 ) = A( KP, K+1 )
+               A( KP, K+1 ) = TEMP
+            END IF
+         END IF
+*
+         K = K + KSTEP
+         GO TO 30
+   50    CONTINUE
+*
+      ELSE
+*
+*        Compute inv(A) from the factorization A = L*D*L'.
+*
+*        K is the main loop index, increasing from 1 to N in steps of
+*        1 or 2, depending on the size of the diagonal blocks.
+*
+         K = N
+   60    CONTINUE
+*
+*        If K < 1, exit from loop.
+*
+         IF( K.LT.1 )
+     $      GO TO 80
+*
+         IF( IPIV( K ).GT.0 ) THEN
+*
+*           1 x 1 diagonal block
+*
+*           Invert the diagonal block.
+*
+            A( K, K ) = ONE / REAL( A( K, K ) )
+*
+*           Compute column K of the inverse.
+*
+            IF( K.LT.N ) THEN
+               CALL CCOPY( N-K, A( K+1, K ), 1, WORK, 1 )
+               CALL CHEMV( UPLO, N-K, -CONE, A( K+1, K+1 ), LDA, WORK,
+     $                     1, ZERO, A( K+1, K ), 1 )
+               A( K, K ) = A( K, K ) - REAL( CDOTC( N-K, WORK, 1,
+     $                     A( K+1, K ), 1 ) )
+            END IF
+            KSTEP = 1
+         ELSE
+*
+*           2 x 2 diagonal block
+*
+*           Invert the diagonal block.
+*
+            T = ABS( A( K, K-1 ) )
+            AK = REAL( A( K-1, K-1 ) ) / T
+            AKP1 = REAL( A( K, K ) ) / T
+            AKKP1 = A( K, K-1 ) / T
+            D = T*( AK*AKP1-ONE )
+            A( K-1, K-1 ) = AKP1 / D
+            A( K, K ) = AK / D
+            A( K, K-1 ) = -AKKP1 / D
+*
+*           Compute columns K-1 and K of the inverse.
+*
+            IF( K.LT.N ) THEN
+               CALL CCOPY( N-K, A( K+1, K ), 1, WORK, 1 )
+               CALL CHEMV( UPLO, N-K, -CONE, A( K+1, K+1 ), LDA, WORK,
+     $                     1, ZERO, A( K+1, K ), 1 )
+               A( K, K ) = A( K, K ) - REAL( CDOTC( N-K, WORK, 1,
+     $                     A( K+1, K ), 1 ) )
+               A( K, K-1 ) = A( K, K-1 ) -
+     $                       CDOTC( N-K, A( K+1, K ), 1, A( K+1, K-1 ),
+     $                       1 )
+               CALL CCOPY( N-K, A( K+1, K-1 ), 1, WORK, 1 )
+               CALL CHEMV( UPLO, N-K, -CONE, A( K+1, K+1 ), LDA, WORK,
+     $                     1, ZERO, A( K+1, K-1 ), 1 )
+               A( K-1, K-1 ) = A( K-1, K-1 ) -
+     $                         REAL( CDOTC( N-K, WORK, 1, A( K+1, K-1 ),
+     $                         1 ) )
+            END IF
+            KSTEP = 2
+         END IF
+*
+         KP = ABS( IPIV( K ) )
+         IF( KP.NE.K ) THEN
+*
+*           Interchange rows and columns K and KP in the trailing
+*           submatrix A(k-1:n,k-1:n)
+*
+            IF( KP.LT.N )
+     $         CALL CSWAP( N-KP, A( KP+1, K ), 1, A( KP+1, KP ), 1 )
+            DO 70 J = K + 1, KP - 1
+               TEMP = CONJG( A( J, K ) )
+               A( J, K ) = CONJG( A( KP, J ) )
+               A( KP, J ) = TEMP
+   70       CONTINUE
+            A( KP, K ) = CONJG( A( KP, K ) )
+            TEMP = A( K, K )
+            A( K, K ) = A( KP, KP )
+            A( KP, KP ) = TEMP
+            IF( KSTEP.EQ.2 ) THEN
+               TEMP = A( K, K-1 )
+               A( K, K-1 ) = A( KP, K-1 )
+               A( KP, K-1 ) = TEMP
+            END IF
+         END IF
+*
+         K = K - KSTEP
+         GO TO 60
+   80    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of CHETRI
+*
+      END
diff --git a/costa/native/external/lapack/chetrs.f b/costa/native/external/lapack/chetrs.f
new file mode 100644
index 000000000..73a61895d
--- /dev/null
+++ b/costa/native/external/lapack/chetrs.f
@@ -0,0 +1,394 @@
+      SUBROUTINE CHETRS( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994 
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, LDB, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      COMPLEX            A( LDA, * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CHETRS solves a system of linear equations A*X = B with a complex
+*  Hermitian matrix A using the factorization A = U*D*U**H or
+*  A = L*D*L**H computed by CHETRF.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the details of the factorization are stored
+*          as an upper or lower triangular matrix.
+*          = 'U':  Upper triangular, form is A = U*D*U**H;
+*          = 'L':  Lower triangular, form is A = L*D*L**H.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  A       (input) COMPLEX array, dimension (LDA,N)
+*          The block diagonal matrix D and the multipliers used to
+*          obtain the factor U or L as computed by CHETRF.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  IPIV    (input) INTEGER array, dimension (N)
+*          Details of the interchanges and the block structure of D
+*          as determined by CHETRF.
+*
+*  B       (input/output) COMPLEX array, dimension (LDB,NRHS)
+*          On entry, the right hand side matrix B.
+*          On exit, the solution matrix X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX            ONE
+      PARAMETER          ( ONE = ( 1.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            J, K, KP
+      REAL               S
+      COMPLEX            AK, AKM1, AKM1K, BK, BKM1, DENOM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CGEMV, CGERU, CLACGV, CSSCAL, CSWAP, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          CONJG, MAX, REAL
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -8
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CHETRS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 )
+     $   RETURN
+*
+      IF( UPPER ) THEN
+*
+*        Solve A*X = B, where A = U*D*U'.
+*
+*        First solve U*D*X = B, overwriting B with X.
+*
+*        K is the main loop index, decreasing from N to 1 in steps of
+*        1 or 2, depending on the size of the diagonal blocks.
+*
+         K = N
+   10    CONTINUE
+*
+*        If K < 1, exit from loop.
+*
+         IF( K.LT.1 )
+     $      GO TO 30
+*
+         IF( IPIV( K ).GT.0 ) THEN
+*
+*           1 x 1 diagonal block
+*
+*           Interchange rows K and IPIV(K).
+*
+            KP = IPIV( K )
+            IF( KP.NE.K )
+     $         CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB )
+*
+*           Multiply by inv(U(K)), where U(K) is the transformation
+*           stored in column K of A.
+*
+            CALL CGERU( K-1, NRHS, -ONE, A( 1, K ), 1, B( K, 1 ), LDB,
+     $                  B( 1, 1 ), LDB )
+*
+*           Multiply by the inverse of the diagonal block.
+*
+            S = REAL( ONE ) / REAL( A( K, K ) )
+            CALL CSSCAL( NRHS, S, B( K, 1 ), LDB )
+            K = K - 1
+         ELSE
+*
+*           2 x 2 diagonal block
+*
+*           Interchange rows K-1 and -IPIV(K).
+*
+            KP = -IPIV( K )
+            IF( KP.NE.K-1 )
+     $         CALL CSWAP( NRHS, B( K-1, 1 ), LDB, B( KP, 1 ), LDB )
+*
+*           Multiply by inv(U(K)), where U(K) is the transformation
+*           stored in columns K-1 and K of A.
+*
+            CALL CGERU( K-2, NRHS, -ONE, A( 1, K ), 1, B( K, 1 ), LDB,
+     $                  B( 1, 1 ), LDB )
+            CALL CGERU( K-2, NRHS, -ONE, A( 1, K-1 ), 1, B( K-1, 1 ),
+     $                  LDB, B( 1, 1 ), LDB )
+*
+*           Multiply by the inverse of the diagonal block.
+*
+            AKM1K = A( K-1, K )
+            AKM1 = A( K-1, K-1 ) / AKM1K
+            AK = A( K, K ) / CONJG( AKM1K )
+            DENOM = AKM1*AK - ONE
+            DO 20 J = 1, NRHS
+               BKM1 = B( K-1, J ) / AKM1K
+               BK = B( K, J ) / CONJG( AKM1K )
+               B( K-1, J ) = ( AK*BKM1-BK ) / DENOM
+               B( K, J ) = ( AKM1*BK-BKM1 ) / DENOM
+   20       CONTINUE
+            K = K - 2
+         END IF
+*
+         GO TO 10
+   30    CONTINUE
+*
+*        Next solve U'*X = B, overwriting B with X.
+*
+*        K is the main loop index, increasing from 1 to N in steps of
+*        1 or 2, depending on the size of the diagonal blocks.
+*
+         K = 1
+   40    CONTINUE
+*
+*        If K > N, exit from loop.
+*
+         IF( K.GT.N )
+     $      GO TO 50
+*
+         IF( IPIV( K ).GT.0 ) THEN
+*
+*           1 x 1 diagonal block
+*
+*           Multiply by inv(U'(K)), where U(K) is the transformation
+*           stored in column K of A.
+*
+            IF( K.GT.1 ) THEN
+               CALL CLACGV( NRHS, B( K, 1 ), LDB )
+               CALL CGEMV( 'Conjugate transpose', K-1, NRHS, -ONE, B,
+     $                     LDB, A( 1, K ), 1, ONE, B( K, 1 ), LDB )
+               CALL CLACGV( NRHS, B( K, 1 ), LDB )
+            END IF
+*
+*           Interchange rows K and IPIV(K).
+*
+            KP = IPIV( K )
+            IF( KP.NE.K )
+     $         CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB )
+            K = K + 1
+         ELSE
+*
+*           2 x 2 diagonal block
+*
+*           Multiply by inv(U'(K+1)), where U(K+1) is the transformation
+*           stored in columns K and K+1 of A.
+*
+            IF( K.GT.1 ) THEN
+               CALL CLACGV( NRHS, B( K, 1 ), LDB )
+               CALL CGEMV( 'Conjugate transpose', K-1, NRHS, -ONE, B,
+     $                     LDB, A( 1, K ), 1, ONE, B( K, 1 ), LDB )
+               CALL CLACGV( NRHS, B( K, 1 ), LDB )
+*
+               CALL CLACGV( NRHS, B( K+1, 1 ), LDB )
+               CALL CGEMV( 'Conjugate transpose', K-1, NRHS, -ONE, B,
+     $                     LDB, A( 1, K+1 ), 1, ONE, B( K+1, 1 ), LDB )
+               CALL CLACGV( NRHS, B( K+1, 1 ), LDB )
+            END IF
+*
+*           Interchange rows K and -IPIV(K).
+*
+            KP = -IPIV( K )
+            IF( KP.NE.K )
+     $         CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB )
+            K = K + 2
+         END IF
+*
+         GO TO 40
+   50    CONTINUE
+*
+      ELSE
+*
+*        Solve A*X = B, where A = L*D*L'.
+*
+*        First solve L*D*X = B, overwriting B with X.
+*
+*        K is the main loop index, increasing from 1 to N in steps of
+*        1 or 2, depending on the size of the diagonal blocks.
+*
+         K = 1
+   60    CONTINUE
+*
+*        If K > N, exit from loop.
+*
+         IF( K.GT.N )
+     $      GO TO 80
+*
+         IF( IPIV( K ).GT.0 ) THEN
+*
+*           1 x 1 diagonal block
+*
+*           Interchange rows K and IPIV(K).
+*
+            KP = IPIV( K )
+            IF( KP.NE.K )
+     $         CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB )
+*
+*           Multiply by inv(L(K)), where L(K) is the transformation
+*           stored in column K of A.
+*
+            IF( K.LT.N )
+     $         CALL CGERU( N-K, NRHS, -ONE, A( K+1, K ), 1, B( K, 1 ),
+     $                     LDB, B( K+1, 1 ), LDB )
+*
+*           Multiply by the inverse of the diagonal block.
+*
+            S = REAL( ONE ) / REAL( A( K, K ) )
+            CALL CSSCAL( NRHS, S, B( K, 1 ), LDB )
+            K = K + 1
+         ELSE
+*
+*           2 x 2 diagonal block
+*
+*           Interchange rows K+1 and -IPIV(K).
+*
+            KP = -IPIV( K )
+            IF( KP.NE.K+1 )
+     $         CALL CSWAP( NRHS, B( K+1, 1 ), LDB, B( KP, 1 ), LDB )
+*
+*           Multiply by inv(L(K)), where L(K) is the transformation
+*           stored in columns K and K+1 of A.
+*
+            IF( K.LT.N-1 ) THEN
+               CALL CGERU( N-K-1, NRHS, -ONE, A( K+2, K ), 1, B( K, 1 ),
+     $                     LDB, B( K+2, 1 ), LDB )
+               CALL CGERU( N-K-1, NRHS, -ONE, A( K+2, K+1 ), 1,
+     $                     B( K+1, 1 ), LDB, B( K+2, 1 ), LDB )
+            END IF
+*
+*           Multiply by the inverse of the diagonal block.
+*
+            AKM1K = A( K+1, K )
+            AKM1 = A( K, K ) / CONJG( AKM1K )
+            AK = A( K+1, K+1 ) / AKM1K
+            DENOM = AKM1*AK - ONE
+            DO 70 J = 1, NRHS
+               BKM1 = B( K, J ) / CONJG( AKM1K )
+               BK = B( K+1, J ) / AKM1K
+               B( K, J ) = ( AK*BKM1-BK ) / DENOM
+               B( K+1, J ) = ( AKM1*BK-BKM1 ) / DENOM
+   70       CONTINUE
+            K = K + 2
+         END IF
+*
+         GO TO 60
+   80    CONTINUE
+*
+*        Next solve L'*X = B, overwriting B with X.
+*
+*        K is the main loop index, decreasing from N to 1 in steps of
+*        1 or 2, depending on the size of the diagonal blocks.
+*
+         K = N
+   90    CONTINUE
+*
+*        If K < 1, exit from loop.
+*
+         IF( K.LT.1 )
+     $      GO TO 100
+*
+         IF( IPIV( K ).GT.0 ) THEN
+*
+*           1 x 1 diagonal block
+*
+*           Multiply by inv(L'(K)), where L(K) is the transformation
+*           stored in column K of A.
+*
+            IF( K.LT.N ) THEN
+               CALL CLACGV( NRHS, B( K, 1 ), LDB )
+               CALL CGEMV( 'Conjugate transpose', N-K, NRHS, -ONE,
+     $                     B( K+1, 1 ), LDB, A( K+1, K ), 1, ONE,
+     $                     B( K, 1 ), LDB )
+               CALL CLACGV( NRHS, B( K, 1 ), LDB )
+            END IF
+*
+*           Interchange rows K and IPIV(K).
+*
+            KP = IPIV( K )
+            IF( KP.NE.K )
+     $         CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB )
+            K = K - 1
+         ELSE
+*
+*           2 x 2 diagonal block
+*
+*           Multiply by inv(L'(K-1)), where L(K-1) is the transformation
+*           stored in columns K-1 and K of A.
+*
+            IF( K.LT.N ) THEN
+               CALL CLACGV( NRHS, B( K, 1 ), LDB )
+               CALL CGEMV( 'Conjugate transpose', N-K, NRHS, -ONE,
+     $                     B( K+1, 1 ), LDB, A( K+1, K ), 1, ONE,
+     $                     B( K, 1 ), LDB )
+               CALL CLACGV( NRHS, B( K, 1 ), LDB )
+*
+               CALL CLACGV( NRHS, B( K-1, 1 ), LDB )
+               CALL CGEMV( 'Conjugate transpose', N-K, NRHS, -ONE,
+     $                     B( K+1, 1 ), LDB, A( K+1, K-1 ), 1, ONE,
+     $                     B( K-1, 1 ), LDB )
+               CALL CLACGV( NRHS, B( K-1, 1 ), LDB )
+            END IF
+*
+*           Interchange rows K and -IPIV(K).
+*
+            KP = -IPIV( K )
+            IF( KP.NE.K )
+     $         CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB )
+            K = K - 2
+         END IF
+*
+         GO TO 90
+  100    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of CHETRS
+*
+      END
diff --git a/costa/native/external/lapack/chgeqz.f b/costa/native/external/lapack/chgeqz.f
new file mode 100644
index 000000000..5a9b4af83
--- /dev/null
+++ b/costa/native/external/lapack/chgeqz.f
@@ -0,0 +1,734 @@
+      SUBROUTINE CHGEQZ( JOB, COMPQ, COMPZ, N, ILO, IHI, A, LDA, B, LDB,
+     $                   ALPHA, BETA, Q, LDQ, Z, LDZ, WORK, LWORK,
+     $                   RWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          COMPQ, COMPZ, JOB
+      INTEGER            IHI, ILO, INFO, LDA, LDB, LDQ, LDZ, LWORK, N
+*     ..
+*     .. Array Arguments ..
+      REAL               RWORK( * )
+      COMPLEX            A( LDA, * ), ALPHA( * ), B( LDB, * ),
+     $                   BETA( * ), Q( LDQ, * ), WORK( * ), Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CHGEQZ implements a single-shift version of the QZ
+*  method for finding the generalized eigenvalues w(i)=ALPHA(i)/BETA(i)
+*  of the equation
+*
+*       det( A - w(i) B ) = 0
+*
+*  If JOB='S', then the pair (A,B) is simultaneously
+*  reduced to Schur form (i.e., A and B are both upper triangular) by
+*  applying one unitary tranformation (usually called Q) on the left and
+*  another (usually called Z) on the right.  The diagonal elements of
+*  A are then ALPHA(1),...,ALPHA(N), and of B are BETA(1),...,BETA(N).
+*
+*  If JOB='S' and COMPQ and COMPZ are 'V' or 'I', then the unitary
+*  transformations used to reduce (A,B) are accumulated into the arrays
+*  Q and Z s.t.:
+*
+*       Q(in) A(in) Z(in)* = Q(out) A(out) Z(out)*
+*       Q(in) B(in) Z(in)* = Q(out) B(out) Z(out)*
+*
+*  Ref: C.B. Moler & G.W. Stewart, "An Algorithm for Generalized Matrix
+*       Eigenvalue Problems", SIAM J. Numer. Anal., 10(1973),
+*       pp. 241--256.
+*
+*  Arguments
+*  =========
+*
+*  JOB     (input) CHARACTER*1
+*          = 'E': compute only ALPHA and BETA.  A and B will not
+*                 necessarily be put into generalized Schur form.
+*          = 'S': put A and B into generalized Schur form, as well
+*                 as computing ALPHA and BETA.
+*
+*  COMPQ   (input) CHARACTER*1
+*          = 'N': do not modify Q.
+*          = 'V': multiply the array Q on the right by the conjugate
+*                 transpose of the unitary tranformation that is
+*                 applied to the left side of A and B to reduce them
+*                 to Schur form.
+*          = 'I': like COMPQ='V', except that Q will be initialized to
+*                 the identity first.
+*
+*  COMPZ   (input) CHARACTER*1
+*          = 'N': do not modify Z.
+*          = 'V': multiply the array Z on the right by the unitary
+*                 tranformation that is applied to the right side of
+*                 A and B to reduce them to Schur form.
+*          = 'I': like COMPZ='V', except that Z will be initialized to
+*                 the identity first.
+*
+*  N       (input) INTEGER
+*          The order of the matrices A, B, Q, and Z.  N >= 0.
+*
+*  ILO     (input) INTEGER
+*  IHI     (input) INTEGER
+*          It is assumed that A is already upper triangular in rows and
+*          columns 1:ILO-1 and IHI+1:N.
+*          1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA, N)
+*          On entry, the N-by-N upper Hessenberg matrix A.  Elements
+*          below the subdiagonal must be zero.
+*          If JOB='S', then on exit A and B will have been
+*             simultaneously reduced to upper triangular form.
+*          If JOB='E', then on exit A will have been destroyed.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max( 1, N ).
+*
+*  B       (input/output) COMPLEX array, dimension (LDB, N)
+*          On entry, the N-by-N upper triangular matrix B.  Elements
+*          below the diagonal must be zero.
+*          If JOB='S', then on exit A and B will have been
+*             simultaneously reduced to upper triangular form.
+*          If JOB='E', then on exit B will have been destroyed.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max( 1, N ).
+*
+*  ALPHA   (output) COMPLEX array, dimension (N)
+*          The diagonal elements of A when the pair (A,B) has been
+*          reduced to Schur form.  ALPHA(i)/BETA(i) i=1,...,N
+*          are the generalized eigenvalues.
+*
+*  BETA    (output) COMPLEX array, dimension (N)
+*          The diagonal elements of B when the pair (A,B) has been
+*          reduced to Schur form.  ALPHA(i)/BETA(i) i=1,...,N
+*          are the generalized eigenvalues.  A and B are normalized
+*          so that BETA(1),...,BETA(N) are non-negative real numbers.
+*
+*  Q       (input/output) COMPLEX array, dimension (LDQ, N)
+*          If COMPQ='N', then Q will not be referenced.
+*          If COMPQ='V' or 'I', then the conjugate transpose of the
+*             unitary transformations which are applied to A and B on
+*             the left will be applied to the array Q on the right.
+*
+*  LDQ     (input) INTEGER
+*          The leading dimension of the array Q.  LDQ >= 1.
+*          If COMPQ='V' or 'I', then LDQ >= N.
+*
+*  Z       (input/output) COMPLEX array, dimension (LDZ, N)
+*          If COMPZ='N', then Z will not be referenced.
+*          If COMPZ='V' or 'I', then the unitary transformations which
+*             are applied to A and B on the right will be applied to the
+*             array Z on the right.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= 1.
+*          If COMPZ='V' or 'I', then LDZ >= N.
+*
+*  WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+*          On exit, if INFO >= 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.  LWORK >= max(1,N).
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  RWORK   (workspace) REAL array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*          = 1,...,N: the QZ iteration did not converge.  (A,B) is not
+*                     in Schur form, but ALPHA(i) and BETA(i),
+*                     i=INFO+1,...,N should be correct.
+*          = N+1,...,2*N: the shift calculation failed.  (A,B) is not
+*                     in Schur form, but ALPHA(i) and BETA(i),
+*                     i=INFO-N+1,...,N should be correct.
+*          > 2*N:     various "impossible" errors.
+*
+*  Further Details
+*  ===============
+*
+*  We assume that complex ABS works as long as its value is less than
+*  overflow.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX            CZERO, CONE
+      PARAMETER          ( CZERO = ( 0.0E+0, 0.0E+0 ),
+     $                   CONE = ( 1.0E+0, 0.0E+0 ) )
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+      REAL               HALF
+      PARAMETER          ( HALF = 0.5E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            ILAZR2, ILAZRO, ILQ, ILSCHR, ILZ, LQUERY
+      INTEGER            ICOMPQ, ICOMPZ, IFIRST, IFRSTM, IITER, ILAST,
+     $                   ILASTM, IN, ISCHUR, ISTART, J, JC, JCH, JITER,
+     $                   JR, MAXIT
+      REAL               ABSB, ANORM, ASCALE, ATOL, BNORM, BSCALE, BTOL,
+     $                   C, SAFMIN, TEMP, TEMP2, TEMPR, ULP
+      COMPLEX            ABI22, AD11, AD12, AD21, AD22, CTEMP, CTEMP2,
+     $                   CTEMP3, ESHIFT, RTDISC, S, SHIFT, SIGNBC, T,
+     $                   U12, X
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               CLANHS, SLAMCH
+      EXTERNAL           LSAME, CLANHS, SLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CLARTG, CLASET, CROT, CSCAL, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, AIMAG, CMPLX, CONJG, MAX, MIN, REAL, SQRT
+*     ..
+*     .. Statement Functions ..
+      REAL               ABS1
+*     ..
+*     .. Statement Function definitions ..
+      ABS1( X ) = ABS( REAL( X ) ) + ABS( AIMAG( X ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode JOB, COMPQ, COMPZ
+*
+      IF( LSAME( JOB, 'E' ) ) THEN
+         ILSCHR = .FALSE.
+         ISCHUR = 1
+      ELSE IF( LSAME( JOB, 'S' ) ) THEN
+         ILSCHR = .TRUE.
+         ISCHUR = 2
+      ELSE
+         ISCHUR = 0
+      END IF
+*
+      IF( LSAME( COMPQ, 'N' ) ) THEN
+         ILQ = .FALSE.
+         ICOMPQ = 1
+      ELSE IF( LSAME( COMPQ, 'V' ) ) THEN
+         ILQ = .TRUE.
+         ICOMPQ = 2
+      ELSE IF( LSAME( COMPQ, 'I' ) ) THEN
+         ILQ = .TRUE.
+         ICOMPQ = 3
+      ELSE
+         ICOMPQ = 0
+      END IF
+*
+      IF( LSAME( COMPZ, 'N' ) ) THEN
+         ILZ = .FALSE.
+         ICOMPZ = 1
+      ELSE IF( LSAME( COMPZ, 'V' ) ) THEN
+         ILZ = .TRUE.
+         ICOMPZ = 2
+      ELSE IF( LSAME( COMPZ, 'I' ) ) THEN
+         ILZ = .TRUE.
+         ICOMPZ = 3
+      ELSE
+         ICOMPZ = 0
+      END IF
+*
+*     Check Argument Values
+*
+      INFO = 0
+      WORK( 1 ) = MAX( 1, N )
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( ISCHUR.EQ.0 ) THEN
+         INFO = -1
+      ELSE IF( ICOMPQ.EQ.0 ) THEN
+         INFO = -2
+      ELSE IF( ICOMPZ.EQ.0 ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( ILO.LT.1 ) THEN
+         INFO = -5
+      ELSE IF( IHI.GT.N .OR. IHI.LT.ILO-1 ) THEN
+         INFO = -6
+      ELSE IF( LDA.LT.N ) THEN
+         INFO = -8
+      ELSE IF( LDB.LT.N ) THEN
+         INFO = -10
+      ELSE IF( LDQ.LT.1 .OR. ( ILQ .AND. LDQ.LT.N ) ) THEN
+         INFO = -14
+      ELSE IF( LDZ.LT.1 .OR. ( ILZ .AND. LDZ.LT.N ) ) THEN
+         INFO = -16
+      ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN
+         INFO = -18
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CHGEQZ', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+c     WORK( 1 ) = CMPLX( 1 )
+      IF( N.LE.0 ) THEN
+         WORK( 1 ) = CMPLX( 1 )
+         RETURN
+      END IF
+*
+*     Initialize Q and Z
+*
+      IF( ICOMPQ.EQ.3 )
+     $   CALL CLASET( 'Full', N, N, CZERO, CONE, Q, LDQ )
+      IF( ICOMPZ.EQ.3 )
+     $   CALL CLASET( 'Full', N, N, CZERO, CONE, Z, LDZ )
+*
+*     Machine Constants
+*
+      IN = IHI + 1 - ILO
+      SAFMIN = SLAMCH( 'S' )
+      ULP = SLAMCH( 'E' )*SLAMCH( 'B' )
+      ANORM = CLANHS( 'F', IN, A( ILO, ILO ), LDA, RWORK )
+      BNORM = CLANHS( 'F', IN, B( ILO, ILO ), LDB, RWORK )
+      ATOL = MAX( SAFMIN, ULP*ANORM )
+      BTOL = MAX( SAFMIN, ULP*BNORM )
+      ASCALE = ONE / MAX( SAFMIN, ANORM )
+      BSCALE = ONE / MAX( SAFMIN, BNORM )
+*
+*
+*     Set Eigenvalues IHI+1:N
+*
+      DO 10 J = IHI + 1, N
+         ABSB = ABS( B( J, J ) )
+         IF( ABSB.GT.SAFMIN ) THEN
+            SIGNBC = CONJG( B( J, J ) / ABSB )
+            B( J, J ) = ABSB
+            IF( ILSCHR ) THEN
+               CALL CSCAL( J-1, SIGNBC, B( 1, J ), 1 )
+               CALL CSCAL( J, SIGNBC, A( 1, J ), 1 )
+            ELSE
+               A( J, J ) = A( J, J )*SIGNBC
+            END IF
+            IF( ILZ )
+     $         CALL CSCAL( N, SIGNBC, Z( 1, J ), 1 )
+         ELSE
+            B( J, J ) = CZERO
+         END IF
+         ALPHA( J ) = A( J, J )
+         BETA( J ) = B( J, J )
+   10 CONTINUE
+*
+*     If IHI < ILO, skip QZ steps
+*
+      IF( IHI.LT.ILO )
+     $   GO TO 190
+*
+*     MAIN QZ ITERATION LOOP
+*
+*     Initialize dynamic indices
+*
+*     Eigenvalues ILAST+1:N have been found.
+*        Column operations modify rows IFRSTM:whatever
+*        Row operations modify columns whatever:ILASTM
+*
+*     If only eigenvalues are being computed, then
+*        IFRSTM is the row of the last splitting row above row ILAST;
+*        this is always at least ILO.
+*     IITER counts iterations since the last eigenvalue was found,
+*        to tell when to use an extraordinary shift.
+*     MAXIT is the maximum number of QZ sweeps allowed.
+*
+      ILAST = IHI
+      IF( ILSCHR ) THEN
+         IFRSTM = 1
+         ILASTM = N
+      ELSE
+         IFRSTM = ILO
+         ILASTM = IHI
+      END IF
+      IITER = 0
+      ESHIFT = CZERO
+      MAXIT = 30*( IHI-ILO+1 )
+*
+      DO 170 JITER = 1, MAXIT
+*
+*        Check for too many iterations.
+*
+         IF( JITER.GT.MAXIT )
+     $      GO TO 180
+*
+*        Split the matrix if possible.
+*
+*        Two tests:
+*           1: A(j,j-1)=0  or  j=ILO
+*           2: B(j,j)=0
+*
+*        Special case: j=ILAST
+*
+         IF( ILAST.EQ.ILO ) THEN
+            GO TO 60
+         ELSE
+            IF( ABS1( A( ILAST, ILAST-1 ) ).LE.ATOL ) THEN
+               A( ILAST, ILAST-1 ) = CZERO
+               GO TO 60
+            END IF
+         END IF
+*
+         IF( ABS( B( ILAST, ILAST ) ).LE.BTOL ) THEN
+            B( ILAST, ILAST ) = CZERO
+            GO TO 50
+         END IF
+*
+*        General case: j<ILAST
+*
+         DO 40 J = ILAST - 1, ILO, -1
+*
+*           Test 1: for A(j,j-1)=0 or j=ILO
+*
+            IF( J.EQ.ILO ) THEN
+               ILAZRO = .TRUE.
+            ELSE
+               IF( ABS1( A( J, J-1 ) ).LE.ATOL ) THEN
+                  A( J, J-1 ) = CZERO
+                  ILAZRO = .TRUE.
+               ELSE
+                  ILAZRO = .FALSE.
+               END IF
+            END IF
+*
+*           Test 2: for B(j,j)=0
+*
+            IF( ABS( B( J, J ) ).LT.BTOL ) THEN
+               B( J, J ) = CZERO
+*
+*              Test 1a: Check for 2 consecutive small subdiagonals in A
+*
+               ILAZR2 = .FALSE.
+               IF( .NOT.ILAZRO ) THEN
+                  IF( ABS1( A( J, J-1 ) )*( ASCALE*ABS1( A( J+1,
+     $                J ) ) ).LE.ABS1( A( J, J ) )*( ASCALE*ATOL ) )
+     $                ILAZR2 = .TRUE.
+               END IF
+*
+*              If both tests pass (1 & 2), i.e., the leading diagonal
+*              element of B in the block is zero, split a 1x1 block off
+*              at the top. (I.e., at the J-th row/column) The leading
+*              diagonal element of the remainder can also be zero, so
+*              this may have to be done repeatedly.
+*
+               IF( ILAZRO .OR. ILAZR2 ) THEN
+                  DO 20 JCH = J, ILAST - 1
+                     CTEMP = A( JCH, JCH )
+                     CALL CLARTG( CTEMP, A( JCH+1, JCH ), C, S,
+     $                            A( JCH, JCH ) )
+                     A( JCH+1, JCH ) = CZERO
+                     CALL CROT( ILASTM-JCH, A( JCH, JCH+1 ), LDA,
+     $                          A( JCH+1, JCH+1 ), LDA, C, S )
+                     CALL CROT( ILASTM-JCH, B( JCH, JCH+1 ), LDB,
+     $                          B( JCH+1, JCH+1 ), LDB, C, S )
+                     IF( ILQ )
+     $                  CALL CROT( N, Q( 1, JCH ), 1, Q( 1, JCH+1 ), 1,
+     $                             C, CONJG( S ) )
+                     IF( ILAZR2 )
+     $                  A( JCH, JCH-1 ) = A( JCH, JCH-1 )*C
+                     ILAZR2 = .FALSE.
+                     IF( ABS1( B( JCH+1, JCH+1 ) ).GE.BTOL ) THEN
+                        IF( JCH+1.GE.ILAST ) THEN
+                           GO TO 60
+                        ELSE
+                           IFIRST = JCH + 1
+                           GO TO 70
+                        END IF
+                     END IF
+                     B( JCH+1, JCH+1 ) = CZERO
+   20             CONTINUE
+                  GO TO 50
+               ELSE
+*
+*                 Only test 2 passed -- chase the zero to B(ILAST,ILAST)
+*                 Then process as in the case B(ILAST,ILAST)=0
+*
+                  DO 30 JCH = J, ILAST - 1
+                     CTEMP = B( JCH, JCH+1 )
+                     CALL CLARTG( CTEMP, B( JCH+1, JCH+1 ), C, S,
+     $                            B( JCH, JCH+1 ) )
+                     B( JCH+1, JCH+1 ) = CZERO
+                     IF( JCH.LT.ILASTM-1 )
+     $                  CALL CROT( ILASTM-JCH-1, B( JCH, JCH+2 ), LDB,
+     $                             B( JCH+1, JCH+2 ), LDB, C, S )
+                     CALL CROT( ILASTM-JCH+2, A( JCH, JCH-1 ), LDA,
+     $                          A( JCH+1, JCH-1 ), LDA, C, S )
+                     IF( ILQ )
+     $                  CALL CROT( N, Q( 1, JCH ), 1, Q( 1, JCH+1 ), 1,
+     $                             C, CONJG( S ) )
+                     CTEMP = A( JCH+1, JCH )
+                     CALL CLARTG( CTEMP, A( JCH+1, JCH-1 ), C, S,
+     $                            A( JCH+1, JCH ) )
+                     A( JCH+1, JCH-1 ) = CZERO
+                     CALL CROT( JCH+1-IFRSTM, A( IFRSTM, JCH ), 1,
+     $                          A( IFRSTM, JCH-1 ), 1, C, S )
+                     CALL CROT( JCH-IFRSTM, B( IFRSTM, JCH ), 1,
+     $                          B( IFRSTM, JCH-1 ), 1, C, S )
+                     IF( ILZ )
+     $                  CALL CROT( N, Z( 1, JCH ), 1, Z( 1, JCH-1 ), 1,
+     $                             C, S )
+   30             CONTINUE
+                  GO TO 50
+               END IF
+            ELSE IF( ILAZRO ) THEN
+*
+*              Only test 1 passed -- work on J:ILAST
+*
+               IFIRST = J
+               GO TO 70
+            END IF
+*
+*           Neither test passed -- try next J
+*
+   40    CONTINUE
+*
+*        (Drop-through is "impossible")
+*
+         INFO = 2*N + 1
+         GO TO 210
+*
+*        B(ILAST,ILAST)=0 -- clear A(ILAST,ILAST-1) to split off a
+*        1x1 block.
+*
+   50    CONTINUE
+         CTEMP = A( ILAST, ILAST )
+         CALL CLARTG( CTEMP, A( ILAST, ILAST-1 ), C, S,
+     $                A( ILAST, ILAST ) )
+         A( ILAST, ILAST-1 ) = CZERO
+         CALL CROT( ILAST-IFRSTM, A( IFRSTM, ILAST ), 1,
+     $              A( IFRSTM, ILAST-1 ), 1, C, S )
+         CALL CROT( ILAST-IFRSTM, B( IFRSTM, ILAST ), 1,
+     $              B( IFRSTM, ILAST-1 ), 1, C, S )
+         IF( ILZ )
+     $      CALL CROT( N, Z( 1, ILAST ), 1, Z( 1, ILAST-1 ), 1, C, S )
+*
+*        A(ILAST,ILAST-1)=0 -- Standardize B, set ALPHA and BETA
+*
+   60    CONTINUE
+         ABSB = ABS( B( ILAST, ILAST ) )
+         IF( ABSB.GT.SAFMIN ) THEN
+            SIGNBC = CONJG( B( ILAST, ILAST ) / ABSB )
+            B( ILAST, ILAST ) = ABSB
+            IF( ILSCHR ) THEN
+               CALL CSCAL( ILAST-IFRSTM, SIGNBC, B( IFRSTM, ILAST ), 1 )
+               CALL CSCAL( ILAST+1-IFRSTM, SIGNBC, A( IFRSTM, ILAST ),
+     $                     1 )
+            ELSE
+               A( ILAST, ILAST ) = A( ILAST, ILAST )*SIGNBC
+            END IF
+            IF( ILZ )
+     $         CALL CSCAL( N, SIGNBC, Z( 1, ILAST ), 1 )
+         ELSE
+            B( ILAST, ILAST ) = CZERO
+         END IF
+         ALPHA( ILAST ) = A( ILAST, ILAST )
+         BETA( ILAST ) = B( ILAST, ILAST )
+*
+*        Go to next block -- exit if finished.
+*
+         ILAST = ILAST - 1
+         IF( ILAST.LT.ILO )
+     $      GO TO 190
+*
+*        Reset counters
+*
+         IITER = 0
+         ESHIFT = CZERO
+         IF( .NOT.ILSCHR ) THEN
+            ILASTM = ILAST
+            IF( IFRSTM.GT.ILAST )
+     $         IFRSTM = ILO
+         END IF
+         GO TO 160
+*
+*        QZ step
+*
+*        This iteration only involves rows/columns IFIRST:ILAST.  We
+*        assume IFIRST < ILAST, and that the diagonal of B is non-zero.
+*
+   70    CONTINUE
+         IITER = IITER + 1
+         IF( .NOT.ILSCHR ) THEN
+            IFRSTM = IFIRST
+         END IF
+*
+*        Compute the Shift.
+*
+*        At this point, IFIRST < ILAST, and the diagonal elements of
+*        B(IFIRST:ILAST,IFIRST,ILAST) are larger than BTOL (in
+*        magnitude)
+*
+         IF( ( IITER / 10 )*10.NE.IITER ) THEN
+*
+*           The Wilkinson shift (AEP p.512), i.e., the eigenvalue of
+*           the bottom-right 2x2 block of A inv(B) which is nearest to
+*           the bottom-right element.
+*
+*           We factor B as U*D, where U has unit diagonals, and
+*           compute (A*inv(D))*inv(U).
+*
+            U12 = ( BSCALE*B( ILAST-1, ILAST ) ) /
+     $            ( BSCALE*B( ILAST, ILAST ) )
+            AD11 = ( ASCALE*A( ILAST-1, ILAST-1 ) ) /
+     $             ( BSCALE*B( ILAST-1, ILAST-1 ) )
+            AD21 = ( ASCALE*A( ILAST, ILAST-1 ) ) /
+     $             ( BSCALE*B( ILAST-1, ILAST-1 ) )
+            AD12 = ( ASCALE*A( ILAST-1, ILAST ) ) /
+     $             ( BSCALE*B( ILAST, ILAST ) )
+            AD22 = ( ASCALE*A( ILAST, ILAST ) ) /
+     $             ( BSCALE*B( ILAST, ILAST ) )
+            ABI22 = AD22 - U12*AD21
+*
+            T = HALF*( AD11+ABI22 )
+            RTDISC = SQRT( T**2+AD12*AD21-AD11*AD22 )
+            TEMP = REAL( T-ABI22 )*REAL( RTDISC ) +
+     $             AIMAG( T-ABI22 )*AIMAG( RTDISC )
+            IF( TEMP.LE.ZERO ) THEN
+               SHIFT = T + RTDISC
+            ELSE
+               SHIFT = T - RTDISC
+            END IF
+         ELSE
+*
+*           Exceptional shift.  Chosen for no particularly good reason.
+*
+            ESHIFT = ESHIFT + CONJG( ( ASCALE*A( ILAST-1, ILAST ) ) /
+     $               ( BSCALE*B( ILAST-1, ILAST-1 ) ) )
+            SHIFT = ESHIFT
+         END IF
+*
+*        Now check for two consecutive small subdiagonals.
+*
+         DO 80 J = ILAST - 1, IFIRST + 1, -1
+            ISTART = J
+            CTEMP = ASCALE*A( J, J ) - SHIFT*( BSCALE*B( J, J ) )
+            TEMP = ABS1( CTEMP )
+            TEMP2 = ASCALE*ABS1( A( J+1, J ) )
+            TEMPR = MAX( TEMP, TEMP2 )
+            IF( TEMPR.LT.ONE .AND. TEMPR.NE.ZERO ) THEN
+               TEMP = TEMP / TEMPR
+               TEMP2 = TEMP2 / TEMPR
+            END IF
+            IF( ABS1( A( J, J-1 ) )*TEMP2.LE.TEMP*ATOL )
+     $         GO TO 90
+   80    CONTINUE
+*
+         ISTART = IFIRST
+         CTEMP = ASCALE*A( IFIRST, IFIRST ) -
+     $           SHIFT*( BSCALE*B( IFIRST, IFIRST ) )
+   90    CONTINUE
+*
+*        Do an implicit-shift QZ sweep.
+*
+*        Initial Q
+*
+         CTEMP2 = ASCALE*A( ISTART+1, ISTART )
+         CALL CLARTG( CTEMP, CTEMP2, C, S, CTEMP3 )
+*
+*        Sweep
+*
+         DO 150 J = ISTART, ILAST - 1
+            IF( J.GT.ISTART ) THEN
+               CTEMP = A( J, J-1 )
+               CALL CLARTG( CTEMP, A( J+1, J-1 ), C, S, A( J, J-1 ) )
+               A( J+1, J-1 ) = CZERO
+            END IF
+*
+            DO 100 JC = J, ILASTM
+               CTEMP = C*A( J, JC ) + S*A( J+1, JC )
+               A( J+1, JC ) = -CONJG( S )*A( J, JC ) + C*A( J+1, JC )
+               A( J, JC ) = CTEMP
+               CTEMP2 = C*B( J, JC ) + S*B( J+1, JC )
+               B( J+1, JC ) = -CONJG( S )*B( J, JC ) + C*B( J+1, JC )
+               B( J, JC ) = CTEMP2
+  100       CONTINUE
+            IF( ILQ ) THEN
+               DO 110 JR = 1, N
+                  CTEMP = C*Q( JR, J ) + CONJG( S )*Q( JR, J+1 )
+                  Q( JR, J+1 ) = -S*Q( JR, J ) + C*Q( JR, J+1 )
+                  Q( JR, J ) = CTEMP
+  110          CONTINUE
+            END IF
+*
+            CTEMP = B( J+1, J+1 )
+            CALL CLARTG( CTEMP, B( J+1, J ), C, S, B( J+1, J+1 ) )
+            B( J+1, J ) = CZERO
+*
+            DO 120 JR = IFRSTM, MIN( J+2, ILAST )
+               CTEMP = C*A( JR, J+1 ) + S*A( JR, J )
+               A( JR, J ) = -CONJG( S )*A( JR, J+1 ) + C*A( JR, J )
+               A( JR, J+1 ) = CTEMP
+  120       CONTINUE
+            DO 130 JR = IFRSTM, J
+               CTEMP = C*B( JR, J+1 ) + S*B( JR, J )
+               B( JR, J ) = -CONJG( S )*B( JR, J+1 ) + C*B( JR, J )
+               B( JR, J+1 ) = CTEMP
+  130       CONTINUE
+            IF( ILZ ) THEN
+               DO 140 JR = 1, N
+                  CTEMP = C*Z( JR, J+1 ) + S*Z( JR, J )
+                  Z( JR, J ) = -CONJG( S )*Z( JR, J+1 ) + C*Z( JR, J )
+                  Z( JR, J+1 ) = CTEMP
+  140          CONTINUE
+            END IF
+  150    CONTINUE
+*
+  160    CONTINUE
+*
+  170 CONTINUE
+*
+*     Drop-through = non-convergence
+*
+  180 CONTINUE
+      INFO = ILAST
+      GO TO 210
+*
+*     Successful completion of all QZ steps
+*
+  190 CONTINUE
+*
+*     Set Eigenvalues 1:ILO-1
+*
+      DO 200 J = 1, ILO - 1
+         ABSB = ABS( B( J, J ) )
+         IF( ABSB.GT.SAFMIN ) THEN
+            SIGNBC = CONJG( B( J, J ) / ABSB )
+            B( J, J ) = ABSB
+            IF( ILSCHR ) THEN
+               CALL CSCAL( J-1, SIGNBC, B( 1, J ), 1 )
+               CALL CSCAL( J, SIGNBC, A( 1, J ), 1 )
+            ELSE
+               A( J, J ) = A( J, J )*SIGNBC
+            END IF
+            IF( ILZ )
+     $         CALL CSCAL( N, SIGNBC, Z( 1, J ), 1 )
+         ELSE
+            B( J, J ) = CZERO
+         END IF
+         ALPHA( J ) = A( J, J )
+         BETA( J ) = B( J, J )
+  200 CONTINUE
+*
+*     Normal Termination
+*
+      INFO = 0
+*
+*     Exit (other than argument error) -- return optimal workspace size
+*
+  210 CONTINUE
+      WORK( 1 ) = CMPLX( N )
+      RETURN
+*
+*     End of CHGEQZ
+*
+      END
diff --git a/costa/native/external/lapack/chpcon.f b/costa/native/external/lapack/chpcon.f
new file mode 100644
index 000000000..13029ab66
--- /dev/null
+++ b/costa/native/external/lapack/chpcon.f
@@ -0,0 +1,155 @@
+      SUBROUTINE CHPCON( UPLO, N, AP, IPIV, ANORM, RCOND, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993 
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, N
+      REAL               ANORM, RCOND
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      COMPLEX            AP( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CHPCON estimates the reciprocal of the condition number of a complex
+*  Hermitian packed matrix A using the factorization A = U*D*U**H or
+*  A = L*D*L**H computed by CHPTRF.
+*
+*  An estimate is obtained for norm(inv(A)), and the reciprocal of the
+*  condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))).
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the details of the factorization are stored
+*          as an upper or lower triangular matrix.
+*          = 'U':  Upper triangular, form is A = U*D*U**H;
+*          = 'L':  Lower triangular, form is A = L*D*L**H.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  AP      (input) COMPLEX array, dimension (N*(N+1)/2)
+*          The block diagonal matrix D and the multipliers used to
+*          obtain the factor U or L as computed by CHPTRF, stored as a
+*          packed triangular matrix.
+*
+*  IPIV    (input) INTEGER array, dimension (N)
+*          Details of the interchanges and the block structure of D
+*          as determined by CHPTRF.
+*
+*  ANORM   (input) REAL
+*          The 1-norm of the original matrix A.
+*
+*  RCOND   (output) REAL
+*          The reciprocal of the condition number of the matrix A,
+*          computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an
+*          estimate of the 1-norm of inv(A) computed in this routine.
+*
+*  WORK    (workspace) COMPLEX array, dimension (2*N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            I, IP, KASE
+      REAL               AINVNM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CHPTRS, CLACON, XERBLA
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( ANORM.LT.ZERO ) THEN
+         INFO = -5
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CHPCON', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      RCOND = ZERO
+      IF( N.EQ.0 ) THEN
+         RCOND = ONE
+         RETURN
+      ELSE IF( ANORM.LE.ZERO ) THEN
+         RETURN
+      END IF
+*
+*     Check that the diagonal matrix D is nonsingular.
+*
+      IF( UPPER ) THEN
+*
+*        Upper triangular storage: examine D from bottom to top
+*
+         IP = N*( N+1 ) / 2
+         DO 10 I = N, 1, -1
+            IF( IPIV( I ).GT.0 .AND. AP( IP ).EQ.ZERO )
+     $         RETURN
+            IP = IP - I
+   10    CONTINUE
+      ELSE
+*
+*        Lower triangular storage: examine D from top to bottom.
+*
+         IP = 1
+         DO 20 I = 1, N
+            IF( IPIV( I ).GT.0 .AND. AP( IP ).EQ.ZERO )
+     $         RETURN
+            IP = IP + N - I + 1
+   20    CONTINUE
+      END IF
+*
+*     Estimate the 1-norm of the inverse.
+*
+      KASE = 0
+   30 CONTINUE
+      CALL CLACON( N, WORK( N+1 ), WORK, AINVNM, KASE )
+      IF( KASE.NE.0 ) THEN
+*
+*        Multiply by inv(L*D*L') or inv(U*D*U').
+*
+         CALL CHPTRS( UPLO, N, 1, AP, IPIV, WORK, N, INFO )
+         GO TO 30
+      END IF
+*
+*     Compute the estimate of the reciprocal condition number.
+*
+      IF( AINVNM.NE.ZERO )
+     $   RCOND = ( ONE / AINVNM ) / ANORM
+*
+      RETURN
+*
+*     End of CHPCON
+*
+      END
diff --git a/costa/native/external/lapack/chpev.f b/costa/native/external/lapack/chpev.f
new file mode 100644
index 000000000..f118d7bc2
--- /dev/null
+++ b/costa/native/external/lapack/chpev.f
@@ -0,0 +1,197 @@
+      SUBROUTINE CHPEV( JOBZ, UPLO, N, AP, W, Z, LDZ, WORK, RWORK,
+     $                  INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993 
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBZ, UPLO
+      INTEGER            INFO, LDZ, N
+*     ..
+*     .. Array Arguments ..
+      REAL               RWORK( * ), W( * )
+      COMPLEX            AP( * ), WORK( * ), Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CHPEV computes all the eigenvalues and, optionally, eigenvectors of a
+*  complex Hermitian matrix in packed storage.
+*
+*  Arguments
+*  =========
+*
+*  JOBZ    (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only;
+*          = 'V':  Compute eigenvalues and eigenvectors.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  AP      (input/output) COMPLEX array, dimension (N*(N+1)/2)
+*          On entry, the upper or lower triangle of the Hermitian matrix
+*          A, packed columnwise in a linear array.  The j-th column of A
+*          is stored in the array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n.
+*
+*          On exit, AP is overwritten by values generated during the
+*          reduction to tridiagonal form.  If UPLO = 'U', the diagonal
+*          and first superdiagonal of the tridiagonal matrix T overwrite
+*          the corresponding elements of A, and if UPLO = 'L', the
+*          diagonal and first subdiagonal of T overwrite the
+*          corresponding elements of A.
+*
+*  W       (output) REAL array, dimension (N)
+*          If INFO = 0, the eigenvalues in ascending order.
+*
+*  Z       (output) COMPLEX array, dimension (LDZ, N)
+*          If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal
+*          eigenvectors of the matrix A, with the i-th column of Z
+*          holding the eigenvector associated with W(i).
+*          If JOBZ = 'N', then Z is not referenced.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= 1, and if
+*          JOBZ = 'V', LDZ >= max(1,N).
+*
+*  WORK    (workspace) COMPLEX array, dimension (max(1, 2*N-1))
+*
+*  RWORK   (workspace) REAL array, dimension (max(1, 3*N-2))
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          > 0:  if INFO = i, the algorithm failed to converge; i
+*                off-diagonal elements of an intermediate tridiagonal
+*                form did not converge to zero.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E0, ONE = 1.0E0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            WANTZ
+      INTEGER            IINFO, IMAX, INDE, INDRWK, INDTAU, INDWRK,
+     $                   ISCALE
+      REAL               ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA,
+     $                   SMLNUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               CLANHP, SLAMCH
+      EXTERNAL           LSAME, CLANHP, SLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CHPTRD, CSSCAL, CSTEQR, CUPGTR, SSCAL, SSTERF,
+     $                   XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      WANTZ = LSAME( JOBZ, 'V' )
+*
+      INFO = 0
+      IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( LSAME( UPLO, 'L' ) .OR. LSAME( UPLO, 'U' ) ) )
+     $          THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN
+         INFO = -7
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CHPEV ', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      IF( N.EQ.1 ) THEN
+         W( 1 ) = AP( 1 )
+         RWORK( 1 ) = 1
+         IF( WANTZ )
+     $      Z( 1, 1 ) = ONE
+         RETURN
+      END IF
+*
+*     Get machine constants.
+*
+      SAFMIN = SLAMCH( 'Safe minimum' )
+      EPS = SLAMCH( 'Precision' )
+      SMLNUM = SAFMIN / EPS
+      BIGNUM = ONE / SMLNUM
+      RMIN = SQRT( SMLNUM )
+      RMAX = SQRT( BIGNUM )
+*
+*     Scale matrix to allowable range, if necessary.
+*
+      ANRM = CLANHP( 'M', UPLO, N, AP, RWORK )
+      ISCALE = 0
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN
+         ISCALE = 1
+         SIGMA = RMIN / ANRM
+      ELSE IF( ANRM.GT.RMAX ) THEN
+         ISCALE = 1
+         SIGMA = RMAX / ANRM
+      END IF
+      IF( ISCALE.EQ.1 ) THEN
+         CALL CSSCAL( ( N*( N+1 ) ) / 2, SIGMA, AP, 1 )
+      END IF
+*
+*     Call CHPTRD to reduce Hermitian packed matrix to tridiagonal form.
+*
+      INDE = 1
+      INDTAU = 1
+      CALL CHPTRD( UPLO, N, AP, W, RWORK( INDE ), WORK( INDTAU ),
+     $             IINFO )
+*
+*     For eigenvalues only, call SSTERF.  For eigenvectors, first call
+*     CUPGTR to generate the orthogonal matrix, then call CSTEQR.
+*
+      IF( .NOT.WANTZ ) THEN
+         CALL SSTERF( N, W, RWORK( INDE ), INFO )
+      ELSE
+         INDWRK = INDTAU + N
+         CALL CUPGTR( UPLO, N, AP, WORK( INDTAU ), Z, LDZ,
+     $                WORK( INDWRK ), IINFO )
+         INDRWK = INDE + N
+         CALL CSTEQR( JOBZ, N, W, RWORK( INDE ), Z, LDZ,
+     $                RWORK( INDRWK ), INFO )
+      END IF
+*
+*     If matrix was scaled, then rescale eigenvalues appropriately.
+*
+      IF( ISCALE.EQ.1 ) THEN
+         IF( INFO.EQ.0 ) THEN
+            IMAX = N
+         ELSE
+            IMAX = INFO - 1
+         END IF
+         CALL SSCAL( IMAX, ONE / SIGMA, W, 1 )
+      END IF
+*
+      RETURN
+*
+*     End of CHPEV
+*
+      END
diff --git a/costa/native/external/lapack/chpevd.f b/costa/native/external/lapack/chpevd.f
new file mode 100644
index 000000000..6783119d6
--- /dev/null
+++ b/costa/native/external/lapack/chpevd.f
@@ -0,0 +1,280 @@
+      SUBROUTINE CHPEVD( JOBZ, UPLO, N, AP, W, Z, LDZ, WORK, LWORK,
+     $                   RWORK, LRWORK, IWORK, LIWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBZ, UPLO
+      INTEGER            INFO, LDZ, LIWORK, LRWORK, LWORK, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      REAL               RWORK( * ), W( * )
+      COMPLEX            AP( * ), WORK( * ), Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CHPEVD computes all the eigenvalues and, optionally, eigenvectors of
+*  a complex Hermitian matrix A in packed storage.  If eigenvectors are
+*  desired, it uses a divide and conquer algorithm.
+*
+*  The divide and conquer algorithm makes very mild assumptions about
+*  floating point arithmetic. It will work on machines with a guard
+*  digit in add/subtract, or on those binary machines without guard
+*  digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
+*  Cray-2. It could conceivably fail on hexadecimal or decimal machines
+*  without guard digits, but we know of none.
+*
+*  Arguments
+*  =========
+*
+*  JOBZ    (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only;
+*          = 'V':  Compute eigenvalues and eigenvectors.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  AP      (input/output) COMPLEX array, dimension (N*(N+1)/2)
+*          On entry, the upper or lower triangle of the Hermitian matrix
+*          A, packed columnwise in a linear array.  The j-th column of A
+*          is stored in the array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n.
+*
+*          On exit, AP is overwritten by values generated during the
+*          reduction to tridiagonal form.  If UPLO = 'U', the diagonal
+*          and first superdiagonal of the tridiagonal matrix T overwrite
+*          the corresponding elements of A, and if UPLO = 'L', the
+*          diagonal and first subdiagonal of T overwrite the
+*          corresponding elements of A.
+*
+*  W       (output) REAL array, dimension (N)
+*          If INFO = 0, the eigenvalues in ascending order.
+*
+*  Z       (output) COMPLEX array, dimension (LDZ, N)
+*          If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal
+*          eigenvectors of the matrix A, with the i-th column of Z
+*          holding the eigenvector associated with W(i).
+*          If JOBZ = 'N', then Z is not referenced.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= 1, and if
+*          JOBZ = 'V', LDZ >= max(1,N).
+*
+*  WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of array WORK.
+*          If N <= 1,               LWORK must be at least 1.
+*          If JOBZ = 'N' and N > 1, LWORK must be at least N.
+*          If JOBZ = 'V' and N > 1, LWORK must be at least 2*N.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  RWORK   (workspace/output) REAL array,
+*                                         dimension (LRWORK)
+*          On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK.
+*
+*  LRWORK  (input) INTEGER
+*          The dimension of array RWORK.
+*          If N <= 1,               LRWORK must be at least 1.
+*          If JOBZ = 'N' and N > 1, LRWORK must be at least N.
+*          If JOBZ = 'V' and N > 1, LRWORK must be at least
+*                    1 + 5*N + 2*N**2.
+*
+*          If LRWORK = -1, then a workspace query is assumed; the
+*          routine only calculates the optimal size of the RWORK array,
+*          returns this value as the first entry of the RWORK array, and
+*          no error message related to LRWORK is issued by XERBLA.
+*
+*  IWORK   (workspace/output) INTEGER array, dimension (LIWORK)
+*          On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK.
+*
+*  LIWORK  (input) INTEGER
+*          The dimension of array IWORK.
+*          If JOBZ  = 'N' or N <= 1, LIWORK must be at least 1.
+*          If JOBZ  = 'V' and N > 1, LIWORK must be at least 3 + 5*N.
+*
+*          If LIWORK = -1, then a workspace query is assumed; the
+*          routine only calculates the optimal size of the IWORK array,
+*          returns this value as the first entry of the IWORK array, and
+*          no error message related to LIWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          > 0:  if INFO = i, the algorithm failed to converge; i
+*                off-diagonal elements of an intermediate tridiagonal
+*                form did not converge to zero.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+      COMPLEX            CONE
+      PARAMETER          ( CONE = ( 1.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY, WANTZ
+      INTEGER            IINFO, IMAX, INDE, INDRWK, INDTAU, INDWRK,
+     $                   ISCALE, LIWMIN, LLRWK, LLWRK, LRWMIN, LWMIN
+      REAL               ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA,
+     $                   SMLNUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               CLANHP, SLAMCH
+      EXTERNAL           LSAME, CLANHP, SLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CHPTRD, CSSCAL, CSTEDC, CUPMTR, SSCAL, SSTERF,
+     $                   XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      WANTZ = LSAME( JOBZ, 'V' )
+      LQUERY = ( LWORK.EQ.-1 .OR. LRWORK.EQ.-1 .OR. LIWORK.EQ.-1 )
+*
+      INFO = 0
+      IF( N.LE.1 ) THEN
+         LWMIN = 1
+         LIWMIN = 1
+         LRWMIN = 1
+      ELSE
+         IF( WANTZ ) THEN
+            LWMIN = 2*N
+            LRWMIN = 1 + 5*N + 2*N**2
+            LIWMIN = 3 + 5*N
+         ELSE
+            LWMIN = N
+            LRWMIN = N
+            LIWMIN = 1
+         END IF
+      END IF
+      IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( LSAME( UPLO, 'L' ) .OR. LSAME( UPLO, 'U' ) ) )
+     $          THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN
+         INFO = -7
+      ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -9
+      ELSE IF( LRWORK.LT.LRWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -11
+      ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -13
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         WORK( 1 ) = LWMIN
+         RWORK( 1 ) = LRWMIN
+         IWORK( 1 ) = LIWMIN
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CHPEVD', -INFO )
+         RETURN 
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      IF( N.EQ.1 ) THEN
+         W( 1 ) = AP( 1 )
+         IF( WANTZ )
+     $      Z( 1, 1 ) = CONE
+         RETURN 
+      END IF
+*
+*     Get machine constants.
+*
+      SAFMIN = SLAMCH( 'Safe minimum' )
+      EPS = SLAMCH( 'Precision' )
+      SMLNUM = SAFMIN / EPS
+      BIGNUM = ONE / SMLNUM
+      RMIN = SQRT( SMLNUM )
+      RMAX = SQRT( BIGNUM )
+*
+*     Scale matrix to allowable range, if necessary.
+*
+      ANRM = CLANHP( 'M', UPLO, N, AP, RWORK )
+      ISCALE = 0
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN
+         ISCALE = 1
+         SIGMA = RMIN / ANRM
+      ELSE IF( ANRM.GT.RMAX ) THEN
+         ISCALE = 1
+         SIGMA = RMAX / ANRM
+      END IF
+      IF( ISCALE.EQ.1 ) THEN
+         CALL CSSCAL( ( N*( N+1 ) ) / 2, SIGMA, AP, 1 )
+      END IF
+*
+*     Call CHPTRD to reduce Hermitian packed matrix to tridiagonal form.
+*
+      INDE = 1
+      INDTAU = 1
+      INDRWK = INDE + N
+      INDWRK = INDTAU + N
+      LLWRK = LWORK - INDWRK + 1
+      LLRWK = LRWORK - INDRWK + 1
+      CALL CHPTRD( UPLO, N, AP, W, RWORK( INDE ), WORK( INDTAU ),
+     $             IINFO )
+*
+*     For eigenvalues only, call SSTERF.  For eigenvectors, first call
+*     CUPGTR to generate the orthogonal matrix, then call CSTEDC.
+*
+      IF( .NOT.WANTZ ) THEN
+         CALL SSTERF( N, W, RWORK( INDE ), INFO )
+      ELSE
+         CALL CSTEDC( 'I', N, W, RWORK( INDE ), Z, LDZ, WORK( INDWRK ),
+     $                LLWRK, RWORK( INDRWK ), LLRWK, IWORK, LIWORK,
+     $                INFO )
+         CALL CUPMTR( 'L', UPLO, 'N', N, N, AP, WORK( INDTAU ), Z, LDZ,
+     $                WORK( INDWRK ), IINFO )
+      END IF
+*
+*     If matrix was scaled, then rescale eigenvalues appropriately.
+*
+      IF( ISCALE.EQ.1 ) THEN
+         IF( INFO.EQ.0 ) THEN
+            IMAX = N
+         ELSE
+            IMAX = INFO - 1
+         END IF
+         CALL SSCAL( IMAX, ONE / SIGMA, W, 1 )
+      END IF
+*
+      WORK( 1 ) = LWMIN
+      RWORK( 1 ) = LRWMIN
+      IWORK( 1 ) = LIWMIN
+      RETURN
+*
+*     End of CHPEVD
+*
+      END
diff --git a/costa/native/external/lapack/chpevx.f b/costa/native/external/lapack/chpevx.f
new file mode 100644
index 000000000..3e7694904
--- /dev/null
+++ b/costa/native/external/lapack/chpevx.f
@@ -0,0 +1,384 @@
+      SUBROUTINE CHPEVX( JOBZ, RANGE, UPLO, N, AP, VL, VU, IL, IU,
+     $                   ABSTOL, M, W, Z, LDZ, WORK, RWORK, IWORK,
+     $                   IFAIL, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBZ, RANGE, UPLO
+      INTEGER            IL, INFO, IU, LDZ, M, N
+      REAL               ABSTOL, VL, VU
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IFAIL( * ), IWORK( * )
+      REAL               RWORK( * ), W( * )
+      COMPLEX            AP( * ), WORK( * ), Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CHPEVX computes selected eigenvalues and, optionally, eigenvectors
+*  of a complex Hermitian matrix A in packed storage.
+*  Eigenvalues/vectors can be selected by specifying either a range of
+*  values or a range of indices for the desired eigenvalues.
+*
+*  Arguments
+*  =========
+*
+*  JOBZ    (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only;
+*          = 'V':  Compute eigenvalues and eigenvectors.
+*
+*  RANGE   (input) CHARACTER*1
+*          = 'A': all eigenvalues will be found;
+*          = 'V': all eigenvalues in the half-open interval (VL,VU]
+*                 will be found;
+*          = 'I': the IL-th through IU-th eigenvalues will be found.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  AP      (input/output) COMPLEX array, dimension (N*(N+1)/2)
+*          On entry, the upper or lower triangle of the Hermitian matrix
+*          A, packed columnwise in a linear array.  The j-th column of A
+*          is stored in the array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n.
+*
+*          On exit, AP is overwritten by values generated during the
+*          reduction to tridiagonal form.  If UPLO = 'U', the diagonal
+*          and first superdiagonal of the tridiagonal matrix T overwrite
+*          the corresponding elements of A, and if UPLO = 'L', the
+*          diagonal and first subdiagonal of T overwrite the
+*          corresponding elements of A.
+*
+*  VL      (input) REAL
+*  VU      (input) REAL
+*          If RANGE='V', the lower and upper bounds of the interval to
+*          be searched for eigenvalues. VL < VU.
+*          Not referenced if RANGE = 'A' or 'I'.
+*
+*  IL      (input) INTEGER
+*  IU      (input) INTEGER
+*          If RANGE='I', the indices (in ascending order) of the
+*          smallest and largest eigenvalues to be returned.
+*          1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0.
+*          Not referenced if RANGE = 'A' or 'V'.
+*
+*  ABSTOL  (input) REAL
+*          The absolute error tolerance for the eigenvalues.
+*          An approximate eigenvalue is accepted as converged
+*          when it is determined to lie in an interval [a,b]
+*          of width less than or equal to
+*
+*                  ABSTOL + EPS *   max( |a|,|b| ) ,
+*
+*          where EPS is the machine precision.  If ABSTOL is less than
+*          or equal to zero, then  EPS*|T|  will be used in its place,
+*          where |T| is the 1-norm of the tridiagonal matrix obtained
+*          by reducing AP to tridiagonal form.
+*
+*          Eigenvalues will be computed most accurately when ABSTOL is
+*          set to twice the underflow threshold 2*SLAMCH('S'), not zero.
+*          If this routine returns with INFO>0, indicating that some
+*          eigenvectors did not converge, try setting ABSTOL to
+*          2*SLAMCH('S').
+*
+*          See "Computing Small Singular Values of Bidiagonal Matrices
+*          with Guaranteed High Relative Accuracy," by Demmel and
+*          Kahan, LAPACK Working Note #3.
+*
+*  M       (output) INTEGER
+*          The total number of eigenvalues found.  0 <= M <= N.
+*          If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1.
+*
+*  W       (output) REAL array, dimension (N)
+*          If INFO = 0, the selected eigenvalues in ascending order.
+*
+*  Z       (output) COMPLEX array, dimension (LDZ, max(1,M))
+*          If JOBZ = 'V', then if INFO = 0, the first M columns of Z
+*          contain the orthonormal eigenvectors of the matrix A
+*          corresponding to the selected eigenvalues, with the i-th
+*          column of Z holding the eigenvector associated with W(i).
+*          If an eigenvector fails to converge, then that column of Z
+*          contains the latest approximation to the eigenvector, and
+*          the index of the eigenvector is returned in IFAIL.
+*          If JOBZ = 'N', then Z is not referenced.
+*          Note: the user must ensure that at least max(1,M) columns are
+*          supplied in the array Z; if RANGE = 'V', the exact value of M
+*          is not known in advance and an upper bound must be used.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= 1, and if
+*          JOBZ = 'V', LDZ >= max(1,N).
+*
+*  WORK    (workspace) COMPLEX array, dimension (2*N)
+*
+*  RWORK   (workspace) REAL array, dimension (7*N)
+*
+*  IWORK   (workspace) INTEGER array, dimension (5*N)
+*
+*  IFAIL   (output) INTEGER array, dimension (N)
+*          If JOBZ = 'V', then if INFO = 0, the first M elements of
+*          IFAIL are zero.  If INFO > 0, then IFAIL contains the
+*          indices of the eigenvectors that failed to converge.
+*          If JOBZ = 'N', then IFAIL is not referenced.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, then i eigenvectors failed to converge.
+*                Their indices are stored in array IFAIL.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E0, ONE = 1.0E0 )
+      COMPLEX            CONE
+      PARAMETER          ( CONE = ( 1.0E0, 0.0E0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            ALLEIG, INDEIG, VALEIG, WANTZ
+      CHARACTER          ORDER
+      INTEGER            I, IINFO, IMAX, INDD, INDE, INDEE, INDIBL,
+     $                   INDISP, INDIWK, INDRWK, INDTAU, INDWRK, ISCALE,
+     $                   ITMP1, J, JJ, NSPLIT
+      REAL               ABSTLL, ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN,
+     $                   SIGMA, SMLNUM, TMP1, VLL, VUU
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               CLANHP, SLAMCH
+      EXTERNAL           LSAME, CLANHP, SLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CHPTRD, CSSCAL, CSTEIN, CSTEQR, CSWAP, CUPGTR,
+     $                   CUPMTR, SCOPY, SSCAL, SSTEBZ, SSTERF, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN, REAL, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      WANTZ = LSAME( JOBZ, 'V' )
+      ALLEIG = LSAME( RANGE, 'A' )
+      VALEIG = LSAME( RANGE, 'V' )
+      INDEIG = LSAME( RANGE, 'I' )
+*
+      INFO = 0
+      IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.( LSAME( UPLO, 'L' ) .OR. LSAME( UPLO, 'U' ) ) )
+     $          THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE
+         IF( VALEIG ) THEN
+            IF( N.GT.0 .AND. VU.LE.VL )
+     $         INFO = -7
+         ELSE IF( INDEIG ) THEN
+            IF( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) THEN
+               INFO = -8
+            ELSE IF( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN
+               INFO = -9
+            END IF
+         END IF
+      END IF
+      IF( INFO.EQ.0 ) THEN
+         IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) )
+     $      INFO = -14
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CHPEVX', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      M = 0
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      IF( N.EQ.1 ) THEN
+         IF( ALLEIG .OR. INDEIG ) THEN
+            M = 1
+            W( 1 ) = AP( 1 )
+         ELSE
+            IF( VL.LT.REAL( AP( 1 ) ) .AND. VU.GE.REAL( AP( 1 ) ) ) THEN
+               M = 1
+               W( 1 ) = AP( 1 )
+            END IF
+         END IF
+         IF( WANTZ )
+     $      Z( 1, 1 ) = CONE
+         RETURN
+      END IF
+*
+*     Get machine constants.
+*
+      SAFMIN = SLAMCH( 'Safe minimum' )
+      EPS = SLAMCH( 'Precision' )
+      SMLNUM = SAFMIN / EPS
+      BIGNUM = ONE / SMLNUM
+      RMIN = SQRT( SMLNUM )
+      RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) )
+*
+*     Scale matrix to allowable range, if necessary.
+*
+      ISCALE = 0
+      ABSTLL = ABSTOL
+      IF ( VALEIG ) THEN
+         VLL = VL
+         VUU = VU
+      ELSE
+         VLL = ZERO
+         VUU = ZERO
+      ENDIF
+      ANRM = CLANHP( 'M', UPLO, N, AP, RWORK )
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN
+         ISCALE = 1
+         SIGMA = RMIN / ANRM
+      ELSE IF( ANRM.GT.RMAX ) THEN
+         ISCALE = 1
+         SIGMA = RMAX / ANRM
+      END IF
+      IF( ISCALE.EQ.1 ) THEN
+         CALL CSSCAL( ( N*( N+1 ) ) / 2, SIGMA, AP, 1 )
+         IF( ABSTOL.GT.0 )
+     $      ABSTLL = ABSTOL*SIGMA
+         IF( VALEIG ) THEN
+            VLL = VL*SIGMA
+            VUU = VU*SIGMA
+         END IF
+      END IF
+*
+*     Call CHPTRD to reduce Hermitian packed matrix to tridiagonal form.
+*
+      INDD = 1
+      INDE = INDD + N
+      INDRWK = INDE + N
+      INDTAU = 1
+      INDWRK = INDTAU + N
+      CALL CHPTRD( UPLO, N, AP, RWORK( INDD ), RWORK( INDE ),
+     $             WORK( INDTAU ), IINFO )
+*
+*     If all eigenvalues are desired and ABSTOL is less than or equal
+*     to zero, then call SSTERF or CUPGTR and CSTEQR.  If this fails
+*     for some eigenvalue, then try SSTEBZ.
+*
+      IF( ( ALLEIG .OR. ( INDEIG .AND. IL.EQ.1 .AND. IU.EQ.N ) ) .AND.
+     $    ( ABSTOL.LE.ZERO ) ) THEN
+         CALL SCOPY( N, RWORK( INDD ), 1, W, 1 )
+         INDEE = INDRWK + 2*N
+         IF( .NOT.WANTZ ) THEN
+            CALL SCOPY( N-1, RWORK( INDE ), 1, RWORK( INDEE ), 1 )
+            CALL SSTERF( N, W, RWORK( INDEE ), INFO )
+         ELSE
+            CALL CUPGTR( UPLO, N, AP, WORK( INDTAU ), Z, LDZ,
+     $                   WORK( INDWRK ), IINFO )
+            CALL SCOPY( N-1, RWORK( INDE ), 1, RWORK( INDEE ), 1 )
+            CALL CSTEQR( JOBZ, N, W, RWORK( INDEE ), Z, LDZ,
+     $                   RWORK( INDRWK ), INFO )
+            IF( INFO.EQ.0 ) THEN
+               DO 10 I = 1, N
+                  IFAIL( I ) = 0
+   10          CONTINUE
+            END IF
+         END IF
+         IF( INFO.EQ.0 ) THEN
+            M = N
+            GO TO 20
+         END IF
+         INFO = 0
+      END IF
+*
+*     Otherwise, call SSTEBZ and, if eigenvectors are desired, CSTEIN.
+*
+      IF( WANTZ ) THEN
+         ORDER = 'B'
+      ELSE
+         ORDER = 'E'
+      END IF
+      INDIBL = 1
+      INDISP = INDIBL + N
+      INDIWK = INDISP + N
+      CALL SSTEBZ( RANGE, ORDER, N, VLL, VUU, IL, IU, ABSTLL,
+     $             RWORK( INDD ), RWORK( INDE ), M, NSPLIT, W,
+     $             IWORK( INDIBL ), IWORK( INDISP ), RWORK( INDRWK ),
+     $             IWORK( INDIWK ), INFO )
+*
+      IF( WANTZ ) THEN
+         CALL CSTEIN( N, RWORK( INDD ), RWORK( INDE ), M, W,
+     $                IWORK( INDIBL ), IWORK( INDISP ), Z, LDZ,
+     $                RWORK( INDRWK ), IWORK( INDIWK ), IFAIL, INFO )
+*
+*        Apply unitary matrix used in reduction to tridiagonal
+*        form to eigenvectors returned by CSTEIN.
+*
+         INDWRK = INDTAU + N
+         CALL CUPMTR( 'L', UPLO, 'N', N, M, AP, WORK( INDTAU ), Z, LDZ,
+     $                WORK( INDWRK ), INFO )
+      END IF
+*
+*     If matrix was scaled, then rescale eigenvalues appropriately.
+*
+   20 CONTINUE
+      IF( ISCALE.EQ.1 ) THEN
+         IF( INFO.EQ.0 ) THEN
+            IMAX = M
+         ELSE
+            IMAX = INFO - 1
+         END IF
+         CALL SSCAL( IMAX, ONE / SIGMA, W, 1 )
+      END IF
+*
+*     If eigenvalues are not in order, then sort them, along with
+*     eigenvectors.
+*
+      IF( WANTZ ) THEN
+         DO 40 J = 1, M - 1
+            I = 0
+            TMP1 = W( J )
+            DO 30 JJ = J + 1, M
+               IF( W( JJ ).LT.TMP1 ) THEN
+                  I = JJ
+                  TMP1 = W( JJ )
+               END IF
+   30       CONTINUE
+*
+            IF( I.NE.0 ) THEN
+               ITMP1 = IWORK( INDIBL+I-1 )
+               W( I ) = W( J )
+               IWORK( INDIBL+I-1 ) = IWORK( INDIBL+J-1 )
+               W( J ) = TMP1
+               IWORK( INDIBL+J-1 ) = ITMP1
+               CALL CSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 )
+               IF( INFO.NE.0 ) THEN
+                  ITMP1 = IFAIL( I )
+                  IFAIL( I ) = IFAIL( J )
+                  IFAIL( J ) = ITMP1
+               END IF
+            END IF
+   40    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of CHPEVX
+*
+      END
diff --git a/costa/native/external/lapack/chpgst.f b/costa/native/external/lapack/chpgst.f
new file mode 100644
index 000000000..64a158ae5
--- /dev/null
+++ b/costa/native/external/lapack/chpgst.f
@@ -0,0 +1,216 @@
+      SUBROUTINE CHPGST( ITYPE, UPLO, N, AP, BP, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, ITYPE, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            AP( * ), BP( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CHPGST reduces a complex Hermitian-definite generalized
+*  eigenproblem to standard form, using packed storage.
+*
+*  If ITYPE = 1, the problem is A*x = lambda*B*x,
+*  and A is overwritten by inv(U**H)*A*inv(U) or inv(L)*A*inv(L**H)
+*
+*  If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or
+*  B*A*x = lambda*x, and A is overwritten by U*A*U**H or L**H*A*L.
+*
+*  B must have been previously factorized as U**H*U or L*L**H by CPPTRF.
+*
+*  Arguments
+*  =========
+*
+*  ITYPE   (input) INTEGER
+*          = 1: compute inv(U**H)*A*inv(U) or inv(L)*A*inv(L**H);
+*          = 2 or 3: compute U*A*U**H or L**H*A*L.
+*
+*  UPLO    (input) CHARACTER
+*          = 'U':  Upper triangle of A is stored and B is factored as
+*                  U**H*U;
+*          = 'L':  Lower triangle of A is stored and B is factored as
+*                  L*L**H.
+*
+*  N       (input) INTEGER
+*          The order of the matrices A and B.  N >= 0.
+*
+*  AP      (input/output) COMPLEX array, dimension (N*(N+1)/2)
+*          On entry, the upper or lower triangle of the Hermitian matrix
+*          A, packed columnwise in a linear array.  The j-th column of A
+*          is stored in the array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n.
+*
+*          On exit, if INFO = 0, the transformed matrix, stored in the
+*          same format as A.
+*
+*  BP      (input) COMPLEX array, dimension (N*(N+1)/2)
+*          The triangular factor from the Cholesky factorization of B,
+*          stored in the same format as A, as returned by CPPTRF.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, HALF
+      PARAMETER          ( ONE = 1.0E+0, HALF = 0.5E+0 )
+      COMPLEX            CONE
+      PARAMETER          ( CONE = ( 1.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            J, J1, J1J1, JJ, K, K1, K1K1, KK
+      REAL               AJJ, AKK, BJJ, BKK
+      COMPLEX            CT
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CAXPY, CHPMV, CHPR2, CSSCAL, CTPMV, CTPSV,
+     $                   XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          REAL
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      COMPLEX            CDOTC
+      EXTERNAL           LSAME, CDOTC
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( ITYPE.LT.1 .OR. ITYPE.GT.3 ) THEN
+         INFO = -1
+      ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CHPGST', -INFO )
+         RETURN
+      END IF
+*
+      IF( ITYPE.EQ.1 ) THEN
+         IF( UPPER ) THEN
+*
+*           Compute inv(U')*A*inv(U)
+*
+*           J1 and JJ are the indices of A(1,j) and A(j,j)
+*
+            JJ = 0
+            DO 10 J = 1, N
+               J1 = JJ + 1
+               JJ = JJ + J
+*
+*              Compute the j-th column of the upper triangle of A
+*
+               AP( JJ ) = REAL( AP( JJ ) )
+               BJJ = BP( JJ )
+               CALL CTPSV( UPLO, 'Conjugate transpose', 'Non-unit', J,
+     $                     BP, AP( J1 ), 1 )
+               CALL CHPMV( UPLO, J-1, -CONE, AP, BP( J1 ), 1, CONE,
+     $                     AP( J1 ), 1 )
+               CALL CSSCAL( J-1, ONE / BJJ, AP( J1 ), 1 )
+               AP( JJ ) = ( AP( JJ )-CDOTC( J-1, AP( J1 ), 1, BP( J1 ),
+     $                    1 ) ) / BJJ
+   10       CONTINUE
+         ELSE
+*
+*           Compute inv(L)*A*inv(L')
+*
+*           KK and K1K1 are the indices of A(k,k) and A(k+1,k+1)
+*
+            KK = 1
+            DO 20 K = 1, N
+               K1K1 = KK + N - K + 1
+*
+*              Update the lower triangle of A(k:n,k:n)
+*
+               AKK = AP( KK )
+               BKK = BP( KK )
+               AKK = AKK / BKK**2
+               AP( KK ) = AKK
+               IF( K.LT.N ) THEN
+                  CALL CSSCAL( N-K, ONE / BKK, AP( KK+1 ), 1 )
+                  CT = -HALF*AKK
+                  CALL CAXPY( N-K, CT, BP( KK+1 ), 1, AP( KK+1 ), 1 )
+                  CALL CHPR2( UPLO, N-K, -CONE, AP( KK+1 ), 1,
+     $                        BP( KK+1 ), 1, AP( K1K1 ) )
+                  CALL CAXPY( N-K, CT, BP( KK+1 ), 1, AP( KK+1 ), 1 )
+                  CALL CTPSV( UPLO, 'No transpose', 'Non-unit', N-K,
+     $                        BP( K1K1 ), AP( KK+1 ), 1 )
+               END IF
+               KK = K1K1
+   20       CONTINUE
+         END IF
+      ELSE
+         IF( UPPER ) THEN
+*
+*           Compute U*A*U'
+*
+*           K1 and KK are the indices of A(1,k) and A(k,k)
+*
+            KK = 0
+            DO 30 K = 1, N
+               K1 = KK + 1
+               KK = KK + K
+*
+*              Update the upper triangle of A(1:k,1:k)
+*
+               AKK = AP( KK )
+               BKK = BP( KK )
+               CALL CTPMV( UPLO, 'No transpose', 'Non-unit', K-1, BP,
+     $                     AP( K1 ), 1 )
+               CT = HALF*AKK
+               CALL CAXPY( K-1, CT, BP( K1 ), 1, AP( K1 ), 1 )
+               CALL CHPR2( UPLO, K-1, CONE, AP( K1 ), 1, BP( K1 ), 1,
+     $                     AP )
+               CALL CAXPY( K-1, CT, BP( K1 ), 1, AP( K1 ), 1 )
+               CALL CSSCAL( K-1, BKK, AP( K1 ), 1 )
+               AP( KK ) = AKK*BKK**2
+   30       CONTINUE
+         ELSE
+*
+*           Compute L'*A*L
+*
+*           JJ and J1J1 are the indices of A(j,j) and A(j+1,j+1)
+*
+            JJ = 1
+            DO 40 J = 1, N
+               J1J1 = JJ + N - J + 1
+*
+*              Compute the j-th column of the lower triangle of A
+*
+               AJJ = AP( JJ )
+               BJJ = BP( JJ )
+               AP( JJ ) = AJJ*BJJ + CDOTC( N-J, AP( JJ+1 ), 1,
+     $                    BP( JJ+1 ), 1 )
+               CALL CSSCAL( N-J, BJJ, AP( JJ+1 ), 1 )
+               CALL CHPMV( UPLO, N-J, CONE, AP( J1J1 ), BP( JJ+1 ), 1,
+     $                     CONE, AP( JJ+1 ), 1 )
+               CALL CTPMV( UPLO, 'Conjugate transpose', 'Non-unit',
+     $                     N-J+1, BP( JJ ), AP( JJ ), 1 )
+               JJ = J1J1
+   40       CONTINUE
+         END IF
+      END IF
+      RETURN
+*
+*     End of CHPGST
+*
+      END
diff --git a/costa/native/external/lapack/chpgv.f b/costa/native/external/lapack/chpgv.f
new file mode 100644
index 000000000..b3de68ef8
--- /dev/null
+++ b/costa/native/external/lapack/chpgv.f
@@ -0,0 +1,197 @@
+      SUBROUTINE CHPGV( ITYPE, JOBZ, UPLO, N, AP, BP, W, Z, LDZ, WORK,
+     $                  RWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBZ, UPLO
+      INTEGER            INFO, ITYPE, LDZ, N
+*     ..
+*     .. Array Arguments ..
+      REAL               RWORK( * ), W( * )
+      COMPLEX            AP( * ), BP( * ), WORK( * ), Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CHPGV computes all the eigenvalues and, optionally, the eigenvectors
+*  of a complex generalized Hermitian-definite eigenproblem, of the form
+*  A*x=(lambda)*B*x,  A*Bx=(lambda)*x,  or B*A*x=(lambda)*x.
+*  Here A and B are assumed to be Hermitian, stored in packed format,
+*  and B is also positive definite.
+*
+*  Arguments
+*  =========
+*
+*  ITYPE   (input) INTEGER
+*          Specifies the problem type to be solved:
+*          = 1:  A*x = (lambda)*B*x
+*          = 2:  A*B*x = (lambda)*x
+*          = 3:  B*A*x = (lambda)*x
+*
+*  JOBZ    (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only;
+*          = 'V':  Compute eigenvalues and eigenvectors.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangles of A and B are stored;
+*          = 'L':  Lower triangles of A and B are stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrices A and B.  N >= 0.
+*
+*  AP      (input/output) COMPLEX array, dimension (N*(N+1)/2)
+*          On entry, the upper or lower triangle of the Hermitian matrix
+*          A, packed columnwise in a linear array.  The j-th column of A
+*          is stored in the array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n.
+*
+*          On exit, the contents of AP are destroyed.
+*
+*  BP      (input/output) COMPLEX array, dimension (N*(N+1)/2)
+*          On entry, the upper or lower triangle of the Hermitian matrix
+*          B, packed columnwise in a linear array.  The j-th column of B
+*          is stored in the array BP as follows:
+*          if UPLO = 'U', BP(i + (j-1)*j/2) = B(i,j) for 1<=i<=j;
+*          if UPLO = 'L', BP(i + (j-1)*(2*n-j)/2) = B(i,j) for j<=i<=n.
+*
+*          On exit, the triangular factor U or L from the Cholesky
+*          factorization B = U**H*U or B = L*L**H, in the same storage
+*          format as B.
+*
+*  W       (output) REAL array, dimension (N)
+*          If INFO = 0, the eigenvalues in ascending order.
+*
+*  Z       (output) COMPLEX array, dimension (LDZ, N)
+*          If JOBZ = 'V', then if INFO = 0, Z contains the matrix Z of
+*          eigenvectors.  The eigenvectors are normalized as follows:
+*          if ITYPE = 1 or 2, Z**H*B*Z = I;
+*          if ITYPE = 3, Z**H*inv(B)*Z = I.
+*          If JOBZ = 'N', then Z is not referenced.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= 1, and if
+*          JOBZ = 'V', LDZ >= max(1,N).
+*
+*  WORK    (workspace) COMPLEX array, dimension (max(1, 2*N-1))
+*
+*  RWORK   (workspace) REAL array, dimension (max(1, 3*N-2))
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  CPPTRF or CHPEV returned an error code:
+*             <= N:  if INFO = i, CHPEV failed to converge;
+*                    i off-diagonal elements of an intermediate
+*                    tridiagonal form did not convergeto zero;
+*             > N:   if INFO = N + i, for 1 <= i <= n, then the leading
+*                    minor of order i of B is not positive definite.
+*                    The factorization of B could not be completed and
+*                    no eigenvalues or eigenvectors were computed.
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            UPPER, WANTZ
+      CHARACTER          TRANS
+      INTEGER            J, NEIG
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CHPEV, CHPGST, CPPTRF, CTPMV, CTPSV, XERBLA
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      WANTZ = LSAME( JOBZ, 'V' )
+      UPPER = LSAME( UPLO, 'U' )
+*
+      INFO = 0
+      IF( ITYPE.LT.0 .OR. ITYPE.GT.3 ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN
+         INFO = -9
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CHPGV ', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Form a Cholesky factorization of B.
+*
+      CALL CPPTRF( UPLO, N, BP, INFO )
+      IF( INFO.NE.0 ) THEN
+         INFO = N + INFO
+         RETURN
+      END IF
+*
+*     Transform problem to standard eigenvalue problem and solve.
+*
+      CALL CHPGST( ITYPE, UPLO, N, AP, BP, INFO )
+      CALL CHPEV( JOBZ, UPLO, N, AP, W, Z, LDZ, WORK, RWORK, INFO )
+*
+      IF( WANTZ ) THEN
+*
+*        Backtransform eigenvectors to the original problem.
+*
+         NEIG = N
+         IF( INFO.GT.0 )
+     $      NEIG = INFO - 1
+         IF( ITYPE.EQ.1 .OR. ITYPE.EQ.2 ) THEN
+*
+*           For A*x=(lambda)*B*x and A*B*x=(lambda)*x;
+*           backtransform eigenvectors: x = inv(L)'*y or inv(U)*y
+*
+            IF( UPPER ) THEN
+               TRANS = 'N'
+            ELSE
+               TRANS = 'C'
+            END IF
+*
+            DO 10 J = 1, NEIG
+               CALL CTPSV( UPLO, TRANS, 'Non-unit', N, BP, Z( 1, J ),
+     $                     1 )
+   10       CONTINUE
+*
+         ELSE IF( ITYPE.EQ.3 ) THEN
+*
+*           For B*A*x=(lambda)*x;
+*           backtransform eigenvectors: x = L*y or U'*y
+*
+            IF( UPPER ) THEN
+               TRANS = 'C'
+            ELSE
+               TRANS = 'N'
+            END IF
+*
+            DO 20 J = 1, NEIG
+               CALL CTPMV( UPLO, TRANS, 'Non-unit', N, BP, Z( 1, J ),
+     $                     1 )
+   20       CONTINUE
+         END IF
+      END IF
+      RETURN
+*
+*     End of CHPGV
+*
+      END
diff --git a/costa/native/external/lapack/chpgvd.f b/costa/native/external/lapack/chpgvd.f
new file mode 100644
index 000000000..a2dcad50a
--- /dev/null
+++ b/costa/native/external/lapack/chpgvd.f
@@ -0,0 +1,291 @@
+      SUBROUTINE CHPGVD( ITYPE, JOBZ, UPLO, N, AP, BP, W, Z, LDZ, WORK,
+     $                   LWORK, RWORK, LRWORK, IWORK, LIWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBZ, UPLO
+      INTEGER            INFO, ITYPE, LDZ, LIWORK, LRWORK, LWORK, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      REAL               RWORK( * ), W( * )
+      COMPLEX            AP( * ), BP( * ), WORK( * ), Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CHPGVD computes all the eigenvalues and, optionally, the eigenvectors
+*  of a complex generalized Hermitian-definite eigenproblem, of the form
+*  A*x=(lambda)*B*x,  A*Bx=(lambda)*x,  or B*A*x=(lambda)*x.  Here A and
+*  B are assumed to be Hermitian, stored in packed format, and B is also
+*  positive definite.
+*  If eigenvectors are desired, it uses a divide and conquer algorithm.
+*
+*  The divide and conquer algorithm makes very mild assumptions about
+*  floating point arithmetic. It will work on machines with a guard
+*  digit in add/subtract, or on those binary machines without guard
+*  digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
+*  Cray-2. It could conceivably fail on hexadecimal or decimal machines
+*  without guard digits, but we know of none.
+*
+*  Arguments
+*  =========
+*
+*  ITYPE   (input) INTEGER
+*          Specifies the problem type to be solved:
+*          = 1:  A*x = (lambda)*B*x
+*          = 2:  A*B*x = (lambda)*x
+*          = 3:  B*A*x = (lambda)*x
+*
+*  JOBZ    (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only;
+*          = 'V':  Compute eigenvalues and eigenvectors.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangles of A and B are stored;
+*          = 'L':  Lower triangles of A and B are stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrices A and B.  N >= 0.
+*
+*  AP      (input/output) COMPLEX array, dimension (N*(N+1)/2)
+*          On entry, the upper or lower triangle of the Hermitian matrix
+*          A, packed columnwise in a linear array.  The j-th column of A
+*          is stored in the array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n.
+*
+*          On exit, the contents of AP are destroyed.
+*
+*  BP      (input/output) COMPLEX array, dimension (N*(N+1)/2)
+*          On entry, the upper or lower triangle of the Hermitian matrix
+*          B, packed columnwise in a linear array.  The j-th column of B
+*          is stored in the array BP as follows:
+*          if UPLO = 'U', BP(i + (j-1)*j/2) = B(i,j) for 1<=i<=j;
+*          if UPLO = 'L', BP(i + (j-1)*(2*n-j)/2) = B(i,j) for j<=i<=n.
+*
+*          On exit, the triangular factor U or L from the Cholesky
+*          factorization B = U**H*U or B = L*L**H, in the same storage
+*          format as B.
+*
+*  W       (output) REAL array, dimension (N)
+*          If INFO = 0, the eigenvalues in ascending order.
+*
+*  Z       (output) COMPLEX array, dimension (LDZ, N)
+*          If JOBZ = 'V', then if INFO = 0, Z contains the matrix Z of
+*          eigenvectors.  The eigenvectors are normalized as follows:
+*          if ITYPE = 1 or 2, Z**H*B*Z = I;
+*          if ITYPE = 3, Z**H*inv(B)*Z = I.
+*          If JOBZ = 'N', then Z is not referenced.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= 1, and if
+*          JOBZ = 'V', LDZ >= max(1,N).
+*
+*  WORK    (workspace) COMPLEX array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of array WORK.
+*          If N <= 1,               LWORK >= 1.
+*          If JOBZ = 'N' and N > 1, LWORK >= N.
+*          If JOBZ = 'V' and N > 1, LWORK >= 2*N.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  RWORK   (workspace) REAL array, dimension (LRWORK)
+*          On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK.
+*
+*  LRWORK  (input) INTEGER
+*          The dimension of array RWORK.
+*          If N <= 1,               LRWORK >= 1.
+*          If JOBZ = 'N' and N > 1, LRWORK >= N.
+*          If JOBZ = 'V' and N > 1, LRWORK >= 1 + 5*N + 2*N**2.
+*
+*          If LRWORK = -1, then a workspace query is assumed; the
+*          routine only calculates the optimal size of the RWORK array,
+*          returns this value as the first entry of the RWORK array, and
+*          no error message related to LRWORK is issued by XERBLA.
+*
+*  IWORK   (workspace/output) INTEGER array, dimension (LIWORK)
+*          On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK.
+*
+*  LIWORK  (input) INTEGER
+*          The dimension of array IWORK.
+*          If JOBZ  = 'N' or N <= 1, LIWORK >= 1.
+*          If JOBZ  = 'V' and N > 1, LIWORK >= 3 + 5*N.
+*
+*          If LIWORK = -1, then a workspace query is assumed; the
+*          routine only calculates the optimal size of the IWORK array,
+*          returns this value as the first entry of the IWORK array, and
+*          no error message related to LIWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  CPPTRF or CHPEVD returned an error code:
+*             <= N:  if INFO = i, CHPEVD failed to converge;
+*                    i off-diagonal elements of an intermediate
+*                    tridiagonal form did not convergeto zero;
+*             > N:   if INFO = N + i, for 1 <= i <= n, then the leading
+*                    minor of order i of B is not positive definite.
+*                    The factorization of B could not be completed and
+*                    no eigenvalues or eigenvectors were computed.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            LQUERY, UPPER, WANTZ
+      CHARACTER          TRANS
+      INTEGER            J, LIWMIN, LRWMIN, LWMIN, NEIG
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CHPEVD, CHPGST, CPPTRF, CTPMV, CTPSV, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, REAL
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      WANTZ = LSAME( JOBZ, 'V' )
+      UPPER = LSAME( UPLO, 'U' )
+      LQUERY = ( LWORK.EQ.-1 .OR. LRWORK.EQ.-1 .OR. LIWORK.EQ.-1 )
+*
+      INFO = 0
+      IF( N.LE.1 ) THEN
+         LWMIN = 1
+         LIWMIN = 1
+         LRWMIN = 1
+      ELSE
+         IF( WANTZ ) THEN
+            LWMIN = 2*N
+            LRWMIN = 1 + 5*N + 2*N**2
+            LIWMIN = 3 + 5*N
+         ELSE
+            LWMIN = N
+            LRWMIN = N
+            LIWMIN = 1
+         END IF
+      END IF
+      IF( ITYPE.LT.0 .OR. ITYPE.GT.3 ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN
+         INFO = -9
+      ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -11
+      ELSE IF( LRWORK.LT.LRWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -13
+      ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -15
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         WORK( 1 ) = LWMIN
+         RWORK( 1 ) = LRWMIN
+         IWORK( 1 ) = LIWMIN
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CHPGVD', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Form a Cholesky factorization of B.
+*
+      CALL CPPTRF( UPLO, N, BP, INFO )
+      IF( INFO.NE.0 ) THEN
+         INFO = N + INFO
+         RETURN
+      END IF
+*
+*     Transform problem to standard eigenvalue problem and solve.
+*
+      CALL CHPGST( ITYPE, UPLO, N, AP, BP, INFO )
+      CALL CHPEVD( JOBZ, UPLO, N, AP, W, Z, LDZ, WORK, LWORK, RWORK,
+     $             LRWORK, IWORK, LIWORK, INFO )
+      LWMIN = MAX( REAL( LWMIN ), REAL( WORK( 1 ) ) )
+      LRWMIN = MAX( REAL( LRWMIN ), REAL( RWORK( 1 ) ) )
+      LIWMIN = MAX( REAL( LIWMIN ), REAL( IWORK( 1 ) ) )
+*
+      IF( WANTZ ) THEN
+*
+*        Backtransform eigenvectors to the original problem.
+*
+         NEIG = N
+         IF( INFO.GT.0 )
+     $      NEIG = INFO - 1
+         IF( ITYPE.EQ.1 .OR. ITYPE.EQ.2 ) THEN
+*
+*           For A*x=(lambda)*B*x and A*B*x=(lambda)*x;
+*           backtransform eigenvectors: x = inv(L)'*y or inv(U)*y
+*
+            IF( UPPER ) THEN
+               TRANS = 'N'
+            ELSE
+               TRANS = 'C'
+            END IF
+*
+            DO 10 J = 1, NEIG
+               CALL CTPSV( UPLO, TRANS, 'Non-unit', N, BP, Z( 1, J ),
+     $                     1 )
+   10       CONTINUE
+*
+         ELSE IF( ITYPE.EQ.3 ) THEN
+*
+*           For B*A*x=(lambda)*x;
+*           backtransform eigenvectors: x = L*y or U'*y
+*
+            IF( UPPER ) THEN
+               TRANS = 'C'
+            ELSE
+               TRANS = 'N'
+            END IF
+*
+            DO 20 J = 1, NEIG
+               CALL CTPMV( UPLO, TRANS, 'Non-unit', N, BP, Z( 1, J ),
+     $                     1 )
+   20       CONTINUE
+         END IF
+      END IF
+*
+      WORK( 1 ) = LWMIN
+      RWORK( 1 ) = LRWMIN
+      IWORK( 1 ) = LIWMIN
+      RETURN
+*
+*     End of CHPGVD
+*
+      END
diff --git a/costa/native/external/lapack/chpgvx.f b/costa/native/external/lapack/chpgvx.f
new file mode 100644
index 000000000..8b0b97f9b
--- /dev/null
+++ b/costa/native/external/lapack/chpgvx.f
@@ -0,0 +1,284 @@
+      SUBROUTINE CHPGVX( ITYPE, JOBZ, RANGE, UPLO, N, AP, BP, VL, VU,
+     $                   IL, IU, ABSTOL, M, W, Z, LDZ, WORK, RWORK,
+     $                   IWORK, IFAIL, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBZ, RANGE, UPLO
+      INTEGER            IL, INFO, ITYPE, IU, LDZ, M, N
+      REAL               ABSTOL, VL, VU
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IFAIL( * ), IWORK( * )
+      REAL               RWORK( * ), W( * )
+      COMPLEX            AP( * ), BP( * ), WORK( * ), Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CHPGVX computes selected eigenvalues and, optionally, eigenvectors
+*  of a complex generalized Hermitian-definite eigenproblem, of the form
+*  A*x=(lambda)*B*x,  A*Bx=(lambda)*x,  or B*A*x=(lambda)*x.  Here A and
+*  B are assumed to be Hermitian, stored in packed format, and B is also
+*  positive definite.  Eigenvalues and eigenvectors can be selected by
+*  specifying either a range of values or a range of indices for the
+*  desired eigenvalues.
+*
+*  Arguments
+*  =========
+*
+*  ITYPE   (input) INTEGER
+*          Specifies the problem type to be solved:
+*          = 1:  A*x = (lambda)*B*x
+*          = 2:  A*B*x = (lambda)*x
+*          = 3:  B*A*x = (lambda)*x
+*
+*  JOBZ    (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only;
+*          = 'V':  Compute eigenvalues and eigenvectors.
+*
+*  RANGE   (input) CHARACTER*1
+*          = 'A': all eigenvalues will be found;
+*          = 'V': all eigenvalues in the half-open interval (VL,VU]
+*                 will be found;
+*          = 'I': the IL-th through IU-th eigenvalues will be found.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangles of A and B are stored;
+*          = 'L':  Lower triangles of A and B are stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrices A and B.  N >= 0.
+*
+*  AP      (input/output) COMPLEX array, dimension (N*(N+1)/2)
+*          On entry, the upper or lower triangle of the Hermitian matrix
+*          A, packed columnwise in a linear array.  The j-th column of A
+*          is stored in the array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n.
+*
+*          On exit, the contents of AP are destroyed.
+*
+*  BP      (input/output) COMPLEX array, dimension (N*(N+1)/2)
+*          On entry, the upper or lower triangle of the Hermitian matrix
+*          B, packed columnwise in a linear array.  The j-th column of B
+*          is stored in the array BP as follows:
+*          if UPLO = 'U', BP(i + (j-1)*j/2) = B(i,j) for 1<=i<=j;
+*          if UPLO = 'L', BP(i + (j-1)*(2*n-j)/2) = B(i,j) for j<=i<=n.
+*
+*          On exit, the triangular factor U or L from the Cholesky
+*          factorization B = U**H*U or B = L*L**H, in the same storage
+*          format as B.
+*
+*  VL      (input) REAL
+*  VU      (input) REAL
+*          If RANGE='V', the lower and upper bounds of the interval to
+*          be searched for eigenvalues. VL < VU.
+*          Not referenced if RANGE = 'A' or 'I'.
+*
+*  IL      (input) INTEGER
+*  IU      (input) INTEGER
+*          If RANGE='I', the indices (in ascending order) of the
+*          smallest and largest eigenvalues to be returned.
+*          1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0.
+*          Not referenced if RANGE = 'A' or 'V'.
+*
+*  ABSTOL  (input) REAL
+*          The absolute error tolerance for the eigenvalues.
+*          An approximate eigenvalue is accepted as converged
+*          when it is determined to lie in an interval [a,b]
+*          of width less than or equal to
+*
+*                  ABSTOL + EPS *   max( |a|,|b| ) ,
+*
+*          where EPS is the machine precision.  If ABSTOL is less than
+*          or equal to zero, then  EPS*|T|  will be used in its place,
+*          where |T| is the 1-norm of the tridiagonal matrix obtained
+*          by reducing AP to tridiagonal form.
+*
+*          Eigenvalues will be computed most accurately when ABSTOL is
+*          set to twice the underflow threshold 2*SLAMCH('S'), not zero.
+*          If this routine returns with INFO>0, indicating that some
+*          eigenvectors did not converge, try setting ABSTOL to
+*          2*SLAMCH('S').
+*
+*  M       (output) INTEGER
+*          The total number of eigenvalues found.  0 <= M <= N.
+*          If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1.
+*
+*  W       (output) REAL array, dimension (N)
+*          On normal exit, the first M elements contain the selected
+*          eigenvalues in ascending order.
+*
+*  Z       (output) COMPLEX array, dimension (LDZ, N)
+*          If JOBZ = 'N', then Z is not referenced.
+*          If JOBZ = 'V', then if INFO = 0, the first M columns of Z
+*          contain the orthonormal eigenvectors of the matrix A
+*          corresponding to the selected eigenvalues, with the i-th
+*          column of Z holding the eigenvector associated with W(i).
+*          The eigenvectors are normalized as follows:
+*          if ITYPE = 1 or 2, Z**H*B*Z = I;
+*          if ITYPE = 3, Z**H*inv(B)*Z = I.
+*
+*          If an eigenvector fails to converge, then that column of Z
+*          contains the latest approximation to the eigenvector, and the
+*          index of the eigenvector is returned in IFAIL.
+*          Note: the user must ensure that at least max(1,M) columns are
+*          supplied in the array Z; if RANGE = 'V', the exact value of M
+*          is not known in advance and an upper bound must be used.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= 1, and if
+*          JOBZ = 'V', LDZ >= max(1,N).
+*
+*  WORK    (workspace) COMPLEX array, dimension (2*N)
+*
+*  RWORK   (workspace) REAL array, dimension (7*N)
+*
+*  IWORK   (workspace) INTEGER array, dimension (5*N)
+*
+*  IFAIL   (output) INTEGER array, dimension (N)
+*          If JOBZ = 'V', then if INFO = 0, the first M elements of
+*          IFAIL are zero.  If INFO > 0, then IFAIL contains the
+*          indices of the eigenvectors that failed to converge.
+*          If JOBZ = 'N', then IFAIL is not referenced.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  CPPTRF or CHPEVX returned an error code:
+*             <= N:  if INFO = i, CHPEVX failed to converge;
+*                    i eigenvectors failed to converge.  Their indices
+*                    are stored in array IFAIL.
+*             > N:   if INFO = N + i, for 1 <= i <= n, then the leading
+*                    minor of order i of B is not positive definite.
+*                    The factorization of B could not be completed and
+*                    no eigenvalues or eigenvectors were computed.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            ALLEIG, INDEIG, UPPER, VALEIG, WANTZ
+      CHARACTER          TRANS
+      INTEGER            J
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CHPEVX, CHPGST, CPPTRF, CTPMV, CTPSV, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      WANTZ = LSAME( JOBZ, 'V' )
+      UPPER = LSAME( UPLO, 'U' )
+      ALLEIG = LSAME( RANGE, 'A' )
+      VALEIG = LSAME( RANGE, 'V' )
+      INDEIG = LSAME( RANGE, 'I' )
+*
+      INFO = 0
+      IF( ITYPE.LT.0 .OR. ITYPE.GT.3 ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN
+         INFO = -3
+      ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN
+         INFO = -4
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( VALEIG .AND. N.GT.0 .AND. VU.LE.VL ) THEN
+         INFO = -9
+      ELSE IF( INDEIG .AND. IL.LT.1 ) THEN
+         INFO = -10
+      ELSE IF( INDEIG .AND. ( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) ) THEN
+         INFO = -11
+      ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN
+         INFO = -16
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CHPGVX', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Form a Cholesky factorization of B.
+*
+      CALL CPPTRF( UPLO, N, BP, INFO )
+      IF( INFO.NE.0 ) THEN
+         INFO = N + INFO
+         RETURN
+      END IF
+*
+*     Transform problem to standard eigenvalue problem and solve.
+*
+      CALL CHPGST( ITYPE, UPLO, N, AP, BP, INFO )
+      CALL CHPEVX( JOBZ, RANGE, UPLO, N, AP, VL, VU, IL, IU, ABSTOL, M,
+     $             W, Z, LDZ, WORK, RWORK, IWORK, IFAIL, INFO )
+*
+      IF( WANTZ ) THEN
+*
+*        Backtransform eigenvectors to the original problem.
+*
+         IF( INFO.GT.0 )
+     $      M = INFO - 1
+         IF( ITYPE.EQ.1 .OR. ITYPE.EQ.2 ) THEN
+*
+*           For A*x=(lambda)*B*x and A*B*x=(lambda)*x;
+*           backtransform eigenvectors: x = inv(L)'*y or inv(U)*y
+*
+            IF( UPPER ) THEN
+               TRANS = 'N'
+            ELSE
+               TRANS = 'C'
+            END IF
+*
+            DO 10 J = 1, M
+               CALL CTPSV( UPLO, TRANS, 'Non-unit', N, BP, Z( 1, J ),
+     $                     1 )
+   10       CONTINUE
+*
+         ELSE IF( ITYPE.EQ.3 ) THEN
+*
+*           For B*A*x=(lambda)*x;
+*           backtransform eigenvectors: x = L*y or U'*y
+*
+            IF( UPPER ) THEN
+               TRANS = 'C'
+            ELSE
+               TRANS = 'N'
+            END IF
+*
+            DO 20 J = 1, M
+               CALL CTPMV( UPLO, TRANS, 'Non-unit', N, BP, Z( 1, J ),
+     $                     1 )
+   20       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of CHPGVX
+*
+      END
diff --git a/costa/native/external/lapack/chprfs.f b/costa/native/external/lapack/chprfs.f
new file mode 100644
index 000000000..c24e6a8cd
--- /dev/null
+++ b/costa/native/external/lapack/chprfs.f
@@ -0,0 +1,337 @@
+      SUBROUTINE CHPRFS( UPLO, N, NRHS, AP, AFP, IPIV, B, LDB, X, LDX,
+     $                   FERR, BERR, WORK, RWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDB, LDX, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      REAL               BERR( * ), FERR( * ), RWORK( * )
+      COMPLEX            AFP( * ), AP( * ), B( LDB, * ), WORK( * ),
+     $                   X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CHPRFS improves the computed solution to a system of linear
+*  equations when the coefficient matrix is Hermitian indefinite
+*  and packed, and provides error bounds and backward error estimates
+*  for the solution.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrices B and X.  NRHS >= 0.
+*
+*  AP      (input) COMPLEX array, dimension (N*(N+1)/2)
+*          The upper or lower triangle of the Hermitian matrix A, packed
+*          columnwise in a linear array.  The j-th column of A is stored
+*          in the array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n.
+*
+*  AFP     (input) COMPLEX array, dimension (N*(N+1)/2)
+*          The factored form of the matrix A.  AFP contains the block
+*          diagonal matrix D and the multipliers used to obtain the
+*          factor U or L from the factorization A = U*D*U**H or
+*          A = L*D*L**H as computed by CHPTRF, stored as a packed
+*          triangular matrix.
+*
+*  IPIV    (input) INTEGER array, dimension (N)
+*          Details of the interchanges and the block structure of D
+*          as determined by CHPTRF.
+*
+*  B       (input) COMPLEX array, dimension (LDB,NRHS)
+*          The right hand side matrix B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  X       (input/output) COMPLEX array, dimension (LDX,NRHS)
+*          On entry, the solution matrix X, as computed by CHPTRS.
+*          On exit, the improved solution matrix X.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(1,N).
+*
+*  FERR    (output) REAL array, dimension (NRHS)
+*          The estimated forward error bound for each solution vector
+*          X(j) (the j-th column of the solution matrix X).
+*          If XTRUE is the true solution corresponding to X(j), FERR(j)
+*          is an estimated upper bound for the magnitude of the largest
+*          element in (X(j) - XTRUE) divided by the magnitude of the
+*          largest element in X(j).  The estimate is as reliable as
+*          the estimate for RCOND, and is almost always a slight
+*          overestimate of the true error.
+*
+*  BERR    (output) REAL array, dimension (NRHS)
+*          The componentwise relative backward error of each solution
+*          vector X(j) (i.e., the smallest relative change in
+*          any element of A or B that makes X(j) an exact solution).
+*
+*  WORK    (workspace) COMPLEX array, dimension (2*N)
+*
+*  RWORK   (workspace) REAL array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Internal Parameters
+*  ===================
+*
+*  ITMAX is the maximum number of steps of iterative refinement.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            ITMAX
+      PARAMETER          ( ITMAX = 5 )
+      REAL               ZERO
+      PARAMETER          ( ZERO = 0.0E+0 )
+      COMPLEX            ONE
+      PARAMETER          ( ONE = ( 1.0E+0, 0.0E+0 ) )
+      REAL               TWO
+      PARAMETER          ( TWO = 2.0E+0 )
+      REAL               THREE
+      PARAMETER          ( THREE = 3.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            COUNT, I, IK, J, K, KASE, KK, NZ
+      REAL               EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK
+      COMPLEX            ZDUM
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CAXPY, CCOPY, CHPMV, CHPTRS, CLACON, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, AIMAG, MAX, REAL
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               SLAMCH
+      EXTERNAL           LSAME, SLAMCH
+*     ..
+*     .. Statement Functions ..
+      REAL               CABS1
+*     ..
+*     .. Statement Function definitions ..
+      CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -8
+      ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+         INFO = -10
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CHPRFS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN
+         DO 10 J = 1, NRHS
+            FERR( J ) = ZERO
+            BERR( J ) = ZERO
+   10    CONTINUE
+         RETURN
+      END IF
+*
+*     NZ = maximum number of nonzero elements in each row of A, plus 1
+*
+      NZ = N + 1
+      EPS = SLAMCH( 'Epsilon' )
+      SAFMIN = SLAMCH( 'Safe minimum' )
+      SAFE1 = NZ*SAFMIN
+      SAFE2 = SAFE1 / EPS
+*
+*     Do for each right hand side
+*
+      DO 140 J = 1, NRHS
+*
+         COUNT = 1
+         LSTRES = THREE
+   20    CONTINUE
+*
+*        Loop until stopping criterion is satisfied.
+*
+*        Compute residual R = B - A * X
+*
+         CALL CCOPY( N, B( 1, J ), 1, WORK, 1 )
+         CALL CHPMV( UPLO, N, -ONE, AP, X( 1, J ), 1, ONE, WORK, 1 )
+*
+*        Compute componentwise relative backward error from formula
+*
+*        max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) )
+*
+*        where abs(Z) is the componentwise absolute value of the matrix
+*        or vector Z.  If the i-th component of the denominator is less
+*        than SAFE2, then SAFE1 is added to the i-th components of the
+*        numerator and denominator before dividing.
+*
+         DO 30 I = 1, N
+            RWORK( I ) = CABS1( B( I, J ) )
+   30    CONTINUE
+*
+*        Compute abs(A)*abs(X) + abs(B).
+*
+         KK = 1
+         IF( UPPER ) THEN
+            DO 50 K = 1, N
+               S = ZERO
+               XK = CABS1( X( K, J ) )
+               IK = KK
+               DO 40 I = 1, K - 1
+                  RWORK( I ) = RWORK( I ) + CABS1( AP( IK ) )*XK
+                  S = S + CABS1( AP( IK ) )*CABS1( X( I, J ) )
+                  IK = IK + 1
+   40          CONTINUE
+               RWORK( K ) = RWORK( K ) + ABS( REAL( AP( KK+K-1 ) ) )*
+     $                      XK + S
+               KK = KK + K
+   50       CONTINUE
+         ELSE
+            DO 70 K = 1, N
+               S = ZERO
+               XK = CABS1( X( K, J ) )
+               RWORK( K ) = RWORK( K ) + ABS( REAL( AP( KK ) ) )*XK
+               IK = KK + 1
+               DO 60 I = K + 1, N
+                  RWORK( I ) = RWORK( I ) + CABS1( AP( IK ) )*XK
+                  S = S + CABS1( AP( IK ) )*CABS1( X( I, J ) )
+                  IK = IK + 1
+   60          CONTINUE
+               RWORK( K ) = RWORK( K ) + S
+               KK = KK + ( N-K+1 )
+   70       CONTINUE
+         END IF
+         S = ZERO
+         DO 80 I = 1, N
+            IF( RWORK( I ).GT.SAFE2 ) THEN
+               S = MAX( S, CABS1( WORK( I ) ) / RWORK( I ) )
+            ELSE
+               S = MAX( S, ( CABS1( WORK( I ) )+SAFE1 ) /
+     $             ( RWORK( I )+SAFE1 ) )
+            END IF
+   80    CONTINUE
+         BERR( J ) = S
+*
+*        Test stopping criterion. Continue iterating if
+*           1) The residual BERR(J) is larger than machine epsilon, and
+*           2) BERR(J) decreased by at least a factor of 2 during the
+*              last iteration, and
+*           3) At most ITMAX iterations tried.
+*
+         IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND.
+     $       COUNT.LE.ITMAX ) THEN
+*
+*           Update solution and try again.
+*
+            CALL CHPTRS( UPLO, N, 1, AFP, IPIV, WORK, N, INFO )
+            CALL CAXPY( N, ONE, WORK, 1, X( 1, J ), 1 )
+            LSTRES = BERR( J )
+            COUNT = COUNT + 1
+            GO TO 20
+         END IF
+*
+*        Bound error from formula
+*
+*        norm(X - XTRUE) / norm(X) .le. FERR =
+*        norm( abs(inv(A))*
+*           ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X)
+*
+*        where
+*          norm(Z) is the magnitude of the largest component of Z
+*          inv(A) is the inverse of A
+*          abs(Z) is the componentwise absolute value of the matrix or
+*             vector Z
+*          NZ is the maximum number of nonzeros in any row of A, plus 1
+*          EPS is machine epsilon
+*
+*        The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B))
+*        is incremented by SAFE1 if the i-th component of
+*        abs(A)*abs(X) + abs(B) is less than SAFE2.
+*
+*        Use CLACON to estimate the infinity-norm of the matrix
+*           inv(A) * diag(W),
+*        where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) )))
+*
+         DO 90 I = 1, N
+            IF( RWORK( I ).GT.SAFE2 ) THEN
+               RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I )
+            ELSE
+               RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) +
+     $                      SAFE1
+            END IF
+   90    CONTINUE
+*
+         KASE = 0
+  100    CONTINUE
+         CALL CLACON( N, WORK( N+1 ), WORK, FERR( J ), KASE )
+         IF( KASE.NE.0 ) THEN
+            IF( KASE.EQ.1 ) THEN
+*
+*              Multiply by diag(W)*inv(A').
+*
+               CALL CHPTRS( UPLO, N, 1, AFP, IPIV, WORK, N, INFO )
+               DO 110 I = 1, N
+                  WORK( I ) = RWORK( I )*WORK( I )
+  110          CONTINUE
+            ELSE IF( KASE.EQ.2 ) THEN
+*
+*              Multiply by inv(A)*diag(W).
+*
+               DO 120 I = 1, N
+                  WORK( I ) = RWORK( I )*WORK( I )
+  120          CONTINUE
+               CALL CHPTRS( UPLO, N, 1, AFP, IPIV, WORK, N, INFO )
+            END IF
+            GO TO 100
+         END IF
+*
+*        Normalize error.
+*
+         LSTRES = ZERO
+         DO 130 I = 1, N
+            LSTRES = MAX( LSTRES, CABS1( X( I, J ) ) )
+  130    CONTINUE
+         IF( LSTRES.NE.ZERO )
+     $      FERR( J ) = FERR( J ) / LSTRES
+*
+  140 CONTINUE
+*
+      RETURN
+*
+*     End of CHPRFS
+*
+      END
diff --git a/costa/native/external/lapack/chpsv.f b/costa/native/external/lapack/chpsv.f
new file mode 100644
index 000000000..6b2cd907d
--- /dev/null
+++ b/costa/native/external/lapack/chpsv.f
@@ -0,0 +1,149 @@
+      SUBROUTINE CHPSV( UPLO, N, NRHS, AP, IPIV, B, LDB, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993 
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDB, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      COMPLEX            AP( * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CHPSV computes the solution to a complex system of linear equations
+*     A * X = B,
+*  where A is an N-by-N Hermitian matrix stored in packed format and X
+*  and B are N-by-NRHS matrices.
+*
+*  The diagonal pivoting method is used to factor A as
+*     A = U * D * U**H,  if UPLO = 'U', or
+*     A = L * D * L**H,  if UPLO = 'L',
+*  where U (or L) is a product of permutation and unit upper (lower)
+*  triangular matrices, D is Hermitian and block diagonal with 1-by-1
+*  and 2-by-2 diagonal blocks.  The factored form of A is then used to
+*  solve the system of equations A * X = B.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The number of linear equations, i.e., the order of the
+*          matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  AP      (input/output) COMPLEX array, dimension (N*(N+1)/2)
+*          On entry, the upper or lower triangle of the Hermitian matrix
+*          A, packed columnwise in a linear array.  The j-th column of A
+*          is stored in the array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n.
+*          See below for further details.
+*
+*          On exit, the block diagonal matrix D and the multipliers used
+*          to obtain the factor U or L from the factorization
+*          A = U*D*U**H or A = L*D*L**H as computed by CHPTRF, stored as
+*          a packed triangular matrix in the same storage format as A.
+*
+*  IPIV    (output) INTEGER array, dimension (N)
+*          Details of the interchanges and the block structure of D, as
+*          determined by CHPTRF.  If IPIV(k) > 0, then rows and columns
+*          k and IPIV(k) were interchanged, and D(k,k) is a 1-by-1
+*          diagonal block.  If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0,
+*          then rows and columns k-1 and -IPIV(k) were interchanged and
+*          D(k-1:k,k-1:k) is a 2-by-2 diagonal block.  If UPLO = 'L' and
+*          IPIV(k) = IPIV(k+1) < 0, then rows and columns k+1 and
+*          -IPIV(k) were interchanged and D(k:k+1,k:k+1) is a 2-by-2
+*          diagonal block.
+*
+*  B       (input/output) COMPLEX array, dimension (LDB,NRHS)
+*          On entry, the N-by-NRHS right hand side matrix B.
+*          On exit, if INFO = 0, the N-by-NRHS solution matrix X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, D(i,i) is exactly zero.  The factorization
+*                has been completed, but the block diagonal matrix D is
+*                exactly singular, so the solution could not be
+*                computed.
+*
+*  Further Details
+*  ===============
+*
+*  The packed storage scheme is illustrated by the following example
+*  when N = 4, UPLO = 'U':
+*
+*  Two-dimensional storage of the Hermitian matrix A:
+*
+*     a11 a12 a13 a14
+*         a22 a23 a24
+*             a33 a34     (aij = conjg(aji))
+*                 a44
+*
+*  Packed storage of the upper triangle of A:
+*
+*  AP = [ a11, a12, a22, a13, a23, a33, a14, a24, a34, a44 ]
+*
+*  =====================================================================
+*
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CHPTRF, CHPTRS, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CHPSV ', -INFO )
+         RETURN
+      END IF
+*
+*     Compute the factorization A = U*D*U' or A = L*D*L'.
+*
+      CALL CHPTRF( UPLO, N, AP, IPIV, INFO )
+      IF( INFO.EQ.0 ) THEN
+*
+*        Solve the system A*X = B, overwriting B with X.
+*
+         CALL CHPTRS( UPLO, N, NRHS, AP, IPIV, B, LDB, INFO )
+*
+      END IF
+      RETURN
+*
+*     End of CHPSV
+*
+      END
diff --git a/costa/native/external/lapack/chpsvx.f b/costa/native/external/lapack/chpsvx.f
new file mode 100644
index 000000000..6694cfa96
--- /dev/null
+++ b/costa/native/external/lapack/chpsvx.f
@@ -0,0 +1,279 @@
+      SUBROUTINE CHPSVX( FACT, UPLO, N, NRHS, AP, AFP, IPIV, B, LDB, X,
+     $                   LDX, RCOND, FERR, BERR, WORK, RWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          FACT, UPLO
+      INTEGER            INFO, LDB, LDX, N, NRHS
+      REAL               RCOND
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      REAL               BERR( * ), FERR( * ), RWORK( * )
+      COMPLEX            AFP( * ), AP( * ), B( LDB, * ), WORK( * ),
+     $                   X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CHPSVX uses the diagonal pivoting factorization A = U*D*U**H or
+*  A = L*D*L**H to compute the solution to a complex system of linear
+*  equations A * X = B, where A is an N-by-N Hermitian matrix stored
+*  in packed format and X and B are N-by-NRHS matrices.
+*
+*  Error bounds on the solution and a condition estimate are also
+*  provided.
+*
+*  Description
+*  ===========
+*
+*  The following steps are performed:
+*
+*  1. If FACT = 'N', the diagonal pivoting method is used to factor A as
+*        A = U * D * U**H,  if UPLO = 'U', or
+*        A = L * D * L**H,  if UPLO = 'L',
+*     where U (or L) is a product of permutation and unit upper (lower)
+*     triangular matrices and D is Hermitian and block diagonal with
+*     1-by-1 and 2-by-2 diagonal blocks.
+*
+*  2. If some D(i,i)=0, so that D is exactly singular, then the routine
+*     returns with INFO = i. Otherwise, the factored form of A is used
+*     to estimate the condition number of the matrix A.  If the
+*     reciprocal of the condition number is less than machine precision,
+*     INFO = N+1 is returned as a warning, but the routine still goes on
+*     to solve for X and compute error bounds as described below.
+*
+*  3. The system of equations is solved for X using the factored form
+*     of A.
+*
+*  4. Iterative refinement is applied to improve the computed solution
+*     matrix and calculate error bounds and backward error estimates
+*     for it.
+*
+*  Arguments
+*  =========
+*
+*  FACT    (input) CHARACTER*1
+*          Specifies whether or not the factored form of A has been
+*          supplied on entry.
+*          = 'F':  On entry, AFP and IPIV contain the factored form of
+*                  A.  AFP and IPIV will not be modified.
+*          = 'N':  The matrix A will be copied to AFP and factored.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The number of linear equations, i.e., the order of the
+*          matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrices B and X.  NRHS >= 0.
+*
+*  AP      (input) COMPLEX array, dimension (N*(N+1)/2)
+*          The upper or lower triangle of the Hermitian matrix A, packed
+*          columnwise in a linear array.  The j-th column of A is stored
+*          in the array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n.
+*          See below for further details.
+*
+*  AFP     (input or output) COMPLEX array, dimension (N*(N+1)/2)
+*          If FACT = 'F', then AFP is an input argument and on entry
+*          contains the block diagonal matrix D and the multipliers used
+*          to obtain the factor U or L from the factorization
+*          A = U*D*U**H or A = L*D*L**H as computed by CHPTRF, stored as
+*          a packed triangular matrix in the same storage format as A.
+*
+*          If FACT = 'N', then AFP is an output argument and on exit
+*          contains the block diagonal matrix D and the multipliers used
+*          to obtain the factor U or L from the factorization
+*          A = U*D*U**H or A = L*D*L**H as computed by CHPTRF, stored as
+*          a packed triangular matrix in the same storage format as A.
+*
+*  IPIV    (input or output) INTEGER array, dimension (N)
+*          If FACT = 'F', then IPIV is an input argument and on entry
+*          contains details of the interchanges and the block structure
+*          of D, as determined by CHPTRF.
+*          If IPIV(k) > 0, then rows and columns k and IPIV(k) were
+*          interchanged and D(k,k) is a 1-by-1 diagonal block.
+*          If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and
+*          columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k)
+*          is a 2-by-2 diagonal block.  If UPLO = 'L' and IPIV(k) =
+*          IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were
+*          interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block.
+*
+*          If FACT = 'N', then IPIV is an output argument and on exit
+*          contains details of the interchanges and the block structure
+*          of D, as determined by CHPTRF.
+*
+*  B       (input) COMPLEX array, dimension (LDB,NRHS)
+*          The N-by-NRHS right hand side matrix B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  X       (output) COMPLEX array, dimension (LDX,NRHS)
+*          If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(1,N).
+*
+*  RCOND   (output) REAL
+*          The estimate of the reciprocal condition number of the matrix
+*          A.  If RCOND is less than the machine precision (in
+*          particular, if RCOND = 0), the matrix is singular to working
+*          precision.  This condition is indicated by a return code of
+*          INFO > 0.
+*
+*  FERR    (output) REAL array, dimension (NRHS)
+*          The estimated forward error bound for each solution vector
+*          X(j) (the j-th column of the solution matrix X).
+*          If XTRUE is the true solution corresponding to X(j), FERR(j)
+*          is an estimated upper bound for the magnitude of the largest
+*          element in (X(j) - XTRUE) divided by the magnitude of the
+*          largest element in X(j).  The estimate is as reliable as
+*          the estimate for RCOND, and is almost always a slight
+*          overestimate of the true error.
+*
+*  BERR    (output) REAL array, dimension (NRHS)
+*          The componentwise relative backward error of each solution
+*          vector X(j) (i.e., the smallest relative change in
+*          any element of A or B that makes X(j) an exact solution).
+*
+*  WORK    (workspace) COMPLEX array, dimension (2*N)
+*
+*  RWORK   (workspace) REAL array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, and i is
+*                <= N:  D(i,i) is exactly zero.  The factorization
+*                       has been completed but the factor D is exactly
+*                       singular, so the solution and error bounds could
+*                       not be computed. RCOND = 0 is returned.
+*                = N+1: D is nonsingular, but RCOND is less than machine
+*                       precision, meaning that the matrix is singular
+*                       to working precision.  Nevertheless, the
+*                       solution and error bounds are computed because
+*                       there are a number of situations where the
+*                       computed solution can be more accurate than the
+*                       value of RCOND would suggest.
+*
+*  Further Details
+*  ===============
+*
+*  The packed storage scheme is illustrated by the following example
+*  when N = 4, UPLO = 'U':
+*
+*  Two-dimensional storage of the Hermitian matrix A:
+*
+*     a11 a12 a13 a14
+*         a22 a23 a24
+*             a33 a34     (aij = conjg(aji))
+*                 a44
+*
+*  Packed storage of the upper triangle of A:
+*
+*  AP = [ a11, a12, a22, a13, a23, a33, a14, a24, a34, a44 ]
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO
+      PARAMETER          ( ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOFACT
+      REAL               ANORM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               CLANHP, SLAMCH
+      EXTERNAL           LSAME, CLANHP, SLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CCOPY, CHPCON, CHPRFS, CHPTRF, CHPTRS, CLACPY,
+     $                   XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      NOFACT = LSAME( FACT, 'N' )
+      IF( .NOT.NOFACT .AND. .NOT.LSAME( FACT, 'F' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) )
+     $          THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -9
+      ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+         INFO = -11
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CHPSVX', -INFO )
+         RETURN
+      END IF
+*
+      IF( NOFACT ) THEN
+*
+*        Compute the factorization A = U*D*U' or A = L*D*L'.
+*
+         CALL CCOPY( N*( N+1 ) / 2, AP, 1, AFP, 1 )
+         CALL CHPTRF( UPLO, N, AFP, IPIV, INFO )
+*
+*        Return if INFO is non-zero.
+*
+         IF( INFO.NE.0 ) THEN
+            IF( INFO.GT.0 )
+     $         RCOND = ZERO
+            RETURN
+         END IF
+      END IF
+*
+*     Compute the norm of the matrix A.
+*
+      ANORM = CLANHP( 'I', UPLO, N, AP, RWORK )
+*
+*     Compute the reciprocal of the condition number of A.
+*
+      CALL CHPCON( UPLO, N, AFP, IPIV, ANORM, RCOND, WORK, INFO )
+*
+*     Set INFO = N+1 if the matrix is singular to working precision.
+*
+      IF( RCOND.LT.SLAMCH( 'Epsilon' ) )
+     $   INFO = N + 1
+*
+*     Compute the solution vectors X.
+*
+      CALL CLACPY( 'Full', N, NRHS, B, LDB, X, LDX )
+      CALL CHPTRS( UPLO, N, NRHS, AFP, IPIV, X, LDX, INFO )
+*
+*     Use iterative refinement to improve the computed solutions and
+*     compute error bounds and backward error estimates for them.
+*
+      CALL CHPRFS( UPLO, N, NRHS, AP, AFP, IPIV, B, LDB, X, LDX, FERR,
+     $             BERR, WORK, RWORK, INFO )
+*
+      RETURN
+*
+*     End of CHPSVX
+*
+      END
diff --git a/costa/native/external/lapack/chptrd.f b/costa/native/external/lapack/chptrd.f
new file mode 100644
index 000000000..1a37238c8
--- /dev/null
+++ b/costa/native/external/lapack/chptrd.f
@@ -0,0 +1,238 @@
+      SUBROUTINE CHPTRD( UPLO, N, AP, D, E, TAU, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, N
+*     ..
+*     .. Array Arguments ..
+      REAL               D( * ), E( * )
+      COMPLEX            AP( * ), TAU( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CHPTRD reduces a complex Hermitian matrix A stored in packed form to
+*  real symmetric tridiagonal form T by a unitary similarity
+*  transformation: Q**H * A * Q = T.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  AP      (input/output) COMPLEX array, dimension (N*(N+1)/2)
+*          On entry, the upper or lower triangle of the Hermitian matrix
+*          A, packed columnwise in a linear array.  The j-th column of A
+*          is stored in the array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n.
+*          On exit, if UPLO = 'U', the diagonal and first superdiagonal
+*          of A are overwritten by the corresponding elements of the
+*          tridiagonal matrix T, and the elements above the first
+*          superdiagonal, with the array TAU, represent the unitary
+*          matrix Q as a product of elementary reflectors; if UPLO
+*          = 'L', the diagonal and first subdiagonal of A are over-
+*          written by the corresponding elements of the tridiagonal
+*          matrix T, and the elements below the first subdiagonal, with
+*          the array TAU, represent the unitary matrix Q as a product
+*          of elementary reflectors. See Further Details.
+*
+*  D       (output) REAL array, dimension (N)
+*          The diagonal elements of the tridiagonal matrix T:
+*          D(i) = A(i,i).
+*
+*  E       (output) REAL array, dimension (N-1)
+*          The off-diagonal elements of the tridiagonal matrix T:
+*          E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'.
+*
+*  TAU     (output) COMPLEX array, dimension (N-1)
+*          The scalar factors of the elementary reflectors (see Further
+*          Details).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Further Details
+*  ===============
+*
+*  If UPLO = 'U', the matrix Q is represented as a product of elementary
+*  reflectors
+*
+*     Q = H(n-1) . . . H(2) H(1).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - tau * v * v'
+*
+*  where tau is a complex scalar, and v is a complex vector with
+*  v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in AP,
+*  overwriting A(1:i-1,i+1), and tau is stored in TAU(i).
+*
+*  If UPLO = 'L', the matrix Q is represented as a product of elementary
+*  reflectors
+*
+*     Q = H(1) H(2) . . . H(n-1).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - tau * v * v'
+*
+*  where tau is a complex scalar, and v is a complex vector with
+*  v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in AP,
+*  overwriting A(i+2:n,i), and tau is stored in TAU(i).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX            ONE, ZERO, HALF
+      PARAMETER          ( ONE = ( 1.0E+0, 0.0E+0 ),
+     $                   ZERO = ( 0.0E+0, 0.0E+0 ),
+     $                   HALF = ( 0.5E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            I, I1, I1I1, II
+      COMPLEX            ALPHA, TAUI
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CAXPY, CHPMV, CHPR2, CLARFG, XERBLA
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      COMPLEX            CDOTC
+      EXTERNAL           LSAME, CDOTC
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          REAL
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CHPTRD', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.LE.0 )
+     $   RETURN
+*
+      IF( UPPER ) THEN
+*
+*        Reduce the upper triangle of A.
+*        I1 is the index in AP of A(1,I+1).
+*
+         I1 = N*( N-1 ) / 2 + 1
+         AP( I1+N-1 ) = REAL( AP( I1+N-1 ) )
+         DO 10 I = N - 1, 1, -1
+*
+*           Generate elementary reflector H(i) = I - tau * v * v'
+*           to annihilate A(1:i-1,i+1)
+*
+            ALPHA = AP( I1+I-1 )
+            CALL CLARFG( I, ALPHA, AP( I1 ), 1, TAUI )
+            E( I ) = ALPHA
+*
+            IF( TAUI.NE.ZERO ) THEN
+*
+*              Apply H(i) from both sides to A(1:i,1:i)
+*
+               AP( I1+I-1 ) = ONE
+*
+*              Compute  y := tau * A * v  storing y in TAU(1:i)
+*
+               CALL CHPMV( UPLO, I, TAUI, AP, AP( I1 ), 1, ZERO, TAU,
+     $                     1 )
+*
+*              Compute  w := y - 1/2 * tau * (y'*v) * v
+*
+               ALPHA = -HALF*TAUI*CDOTC( I, TAU, 1, AP( I1 ), 1 )
+               CALL CAXPY( I, ALPHA, AP( I1 ), 1, TAU, 1 )
+*
+*              Apply the transformation as a rank-2 update:
+*                 A := A - v * w' - w * v'
+*
+               CALL CHPR2( UPLO, I, -ONE, AP( I1 ), 1, TAU, 1, AP )
+*
+            END IF
+            AP( I1+I-1 ) = E( I )
+            D( I+1 ) = AP( I1+I )
+            TAU( I ) = TAUI
+            I1 = I1 - I
+   10    CONTINUE
+         D( 1 ) = AP( 1 )
+      ELSE
+*
+*        Reduce the lower triangle of A. II is the index in AP of
+*        A(i,i) and I1I1 is the index of A(i+1,i+1).
+*
+         II = 1
+         AP( 1 ) = REAL( AP( 1 ) )
+         DO 20 I = 1, N - 1
+            I1I1 = II + N - I + 1
+*
+*           Generate elementary reflector H(i) = I - tau * v * v'
+*           to annihilate A(i+2:n,i)
+*
+            ALPHA = AP( II+1 )
+            CALL CLARFG( N-I, ALPHA, AP( II+2 ), 1, TAUI )
+            E( I ) = ALPHA
+*
+            IF( TAUI.NE.ZERO ) THEN
+*
+*              Apply H(i) from both sides to A(i+1:n,i+1:n)
+*
+               AP( II+1 ) = ONE
+*
+*              Compute  y := tau * A * v  storing y in TAU(i:n-1)
+*
+               CALL CHPMV( UPLO, N-I, TAUI, AP( I1I1 ), AP( II+1 ), 1,
+     $                     ZERO, TAU( I ), 1 )
+*
+*              Compute  w := y - 1/2 * tau * (y'*v) * v
+*
+               ALPHA = -HALF*TAUI*CDOTC( N-I, TAU( I ), 1, AP( II+1 ),
+     $                 1 )
+               CALL CAXPY( N-I, ALPHA, AP( II+1 ), 1, TAU( I ), 1 )
+*
+*              Apply the transformation as a rank-2 update:
+*                 A := A - v * w' - w * v'
+*
+               CALL CHPR2( UPLO, N-I, -ONE, AP( II+1 ), 1, TAU( I ), 1,
+     $                     AP( I1I1 ) )
+*
+            END IF
+            AP( II+1 ) = E( I )
+            D( I ) = AP( II )
+            TAU( I ) = TAUI
+            II = I1I1
+   20    CONTINUE
+         D( N ) = AP( II )
+      END IF
+*
+      RETURN
+*
+*     End of CHPTRD
+*
+      END
diff --git a/costa/native/external/lapack/chptrf.f b/costa/native/external/lapack/chptrf.f
new file mode 100644
index 000000000..632c23935
--- /dev/null
+++ b/costa/native/external/lapack/chptrf.f
@@ -0,0 +1,581 @@
+      SUBROUTINE CHPTRF( UPLO, N, AP, IPIV, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      COMPLEX            AP( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CHPTRF computes the factorization of a complex Hermitian packed
+*  matrix A using the Bunch-Kaufman diagonal pivoting method:
+*
+*     A = U*D*U**H  or  A = L*D*L**H
+*
+*  where U (or L) is a product of permutation and unit upper (lower)
+*  triangular matrices, and D is Hermitian and block diagonal with
+*  1-by-1 and 2-by-2 diagonal blocks.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  AP      (input/output) COMPLEX array, dimension (N*(N+1)/2)
+*          On entry, the upper or lower triangle of the Hermitian matrix
+*          A, packed columnwise in a linear array.  The j-th column of A
+*          is stored in the array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n.
+*
+*          On exit, the block diagonal matrix D and the multipliers used
+*          to obtain the factor U or L, stored as a packed triangular
+*          matrix overwriting A (see below for further details).
+*
+*  IPIV    (output) INTEGER array, dimension (N)
+*          Details of the interchanges and the block structure of D.
+*          If IPIV(k) > 0, then rows and columns k and IPIV(k) were
+*          interchanged and D(k,k) is a 1-by-1 diagonal block.
+*          If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and
+*          columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k)
+*          is a 2-by-2 diagonal block.  If UPLO = 'L' and IPIV(k) =
+*          IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were
+*          interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block.
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*          > 0: if INFO = i, D(i,i) is exactly zero.  The factorization
+*               has been completed, but the block diagonal matrix D is
+*               exactly singular, and division by zero will occur if it
+*               is used to solve a system of equations.
+*
+*  Further Details
+*  ===============
+*
+*  5-96 - Based on modifications by J. Lewis, Boeing Computer Services
+*         Company
+*
+*  If UPLO = 'U', then A = U*D*U', where
+*     U = P(n)*U(n)* ... *P(k)U(k)* ...,
+*  i.e., U is a product of terms P(k)*U(k), where k decreases from n to
+*  1 in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1
+*  and 2-by-2 diagonal blocks D(k).  P(k) is a permutation matrix as
+*  defined by IPIV(k), and U(k) is a unit upper triangular matrix, such
+*  that if the diagonal block D(k) is of order s (s = 1 or 2), then
+*
+*             (   I    v    0   )   k-s
+*     U(k) =  (   0    I    0   )   s
+*             (   0    0    I   )   n-k
+*                k-s   s   n-k
+*
+*  If s = 1, D(k) overwrites A(k,k), and v overwrites A(1:k-1,k).
+*  If s = 2, the upper triangle of D(k) overwrites A(k-1,k-1), A(k-1,k),
+*  and A(k,k), and v overwrites A(1:k-2,k-1:k).
+*
+*  If UPLO = 'L', then A = L*D*L', where
+*     L = P(1)*L(1)* ... *P(k)*L(k)* ...,
+*  i.e., L is a product of terms P(k)*L(k), where k increases from 1 to
+*  n in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1
+*  and 2-by-2 diagonal blocks D(k).  P(k) is a permutation matrix as
+*  defined by IPIV(k), and L(k) is a unit lower triangular matrix, such
+*  that if the diagonal block D(k) is of order s (s = 1 or 2), then
+*
+*             (   I    0     0   )  k-1
+*     L(k) =  (   0    I     0   )  s
+*             (   0    v     I   )  n-k-s+1
+*                k-1   s  n-k-s+1
+*
+*  If s = 1, D(k) overwrites A(k,k), and v overwrites A(k+1:n,k).
+*  If s = 2, the lower triangle of D(k) overwrites A(k,k), A(k+1,k),
+*  and A(k+1,k+1), and v overwrites A(k+2:n,k:k+1).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+      REAL               EIGHT, SEVTEN
+      PARAMETER          ( EIGHT = 8.0E+0, SEVTEN = 17.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            I, IMAX, J, JMAX, K, KC, KK, KNC, KP, KPC,
+     $                   KSTEP, KX, NPP
+      REAL               ABSAKK, ALPHA, COLMAX, D, D11, D22, R1, ROWMAX,
+     $                   TT
+      COMPLEX            D12, D21, T, WK, WKM1, WKP1, ZDUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ICAMAX
+      REAL               SLAPY2
+      EXTERNAL           LSAME, ICAMAX, SLAPY2
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CHPR, CSSCAL, CSWAP, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, AIMAG, CMPLX, CONJG, MAX, REAL, SQRT
+*     ..
+*     .. Statement Functions ..
+      REAL               CABS1
+*     ..
+*     .. Statement Function definitions ..
+      CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CHPTRF', -INFO )
+         RETURN
+      END IF
+*
+*     Initialize ALPHA for use in choosing pivot block size.
+*
+      ALPHA = ( ONE+SQRT( SEVTEN ) ) / EIGHT
+*
+      IF( UPPER ) THEN
+*
+*        Factorize A as U*D*U' using the upper triangle of A
+*
+*        K is the main loop index, decreasing from N to 1 in steps of
+*        1 or 2
+*
+         K = N
+         KC = ( N-1 )*N / 2 + 1
+   10    CONTINUE
+         KNC = KC
+*
+*        If K < 1, exit from loop
+*
+         IF( K.LT.1 )
+     $      GO TO 110
+         KSTEP = 1
+*
+*        Determine rows and columns to be interchanged and whether
+*        a 1-by-1 or 2-by-2 pivot block will be used
+*
+         ABSAKK = ABS( REAL( AP( KC+K-1 ) ) )
+*
+*        IMAX is the row-index of the largest off-diagonal element in
+*        column K, and COLMAX is its absolute value
+*
+         IF( K.GT.1 ) THEN
+            IMAX = ICAMAX( K-1, AP( KC ), 1 )
+            COLMAX = CABS1( AP( KC+IMAX-1 ) )
+         ELSE
+            COLMAX = ZERO
+         END IF
+*
+         IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN
+*
+*           Column K is zero: set INFO and continue
+*
+            IF( INFO.EQ.0 )
+     $         INFO = K
+            KP = K
+            AP( KC+K-1 ) = REAL( AP( KC+K-1 ) )
+         ELSE
+            IF( ABSAKK.GE.ALPHA*COLMAX ) THEN
+*
+*              no interchange, use 1-by-1 pivot block
+*
+               KP = K
+            ELSE
+*
+*              JMAX is the column-index of the largest off-diagonal
+*              element in row IMAX, and ROWMAX is its absolute value
+*
+               ROWMAX = ZERO
+               JMAX = IMAX
+               KX = IMAX*( IMAX+1 ) / 2 + IMAX
+               DO 20 J = IMAX + 1, K
+                  IF( CABS1( AP( KX ) ).GT.ROWMAX ) THEN
+                     ROWMAX = CABS1( AP( KX ) )
+                     JMAX = J
+                  END IF
+                  KX = KX + J
+   20          CONTINUE
+               KPC = ( IMAX-1 )*IMAX / 2 + 1
+               IF( IMAX.GT.1 ) THEN
+                  JMAX = ICAMAX( IMAX-1, AP( KPC ), 1 )
+                  ROWMAX = MAX( ROWMAX, CABS1( AP( KPC+JMAX-1 ) ) )
+               END IF
+*
+               IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN
+*
+*                 no interchange, use 1-by-1 pivot block
+*
+                  KP = K
+               ELSE IF( ABS( REAL( AP( KPC+IMAX-1 ) ) ).GE.ALPHA*
+     $                  ROWMAX ) THEN
+*
+*                 interchange rows and columns K and IMAX, use 1-by-1
+*                 pivot block
+*
+                  KP = IMAX
+               ELSE
+*
+*                 interchange rows and columns K-1 and IMAX, use 2-by-2
+*                 pivot block
+*
+                  KP = IMAX
+                  KSTEP = 2
+               END IF
+            END IF
+*
+            KK = K - KSTEP + 1
+            IF( KSTEP.EQ.2 )
+     $         KNC = KNC - K + 1
+            IF( KP.NE.KK ) THEN
+*
+*              Interchange rows and columns KK and KP in the leading
+*              submatrix A(1:k,1:k)
+*
+               CALL CSWAP( KP-1, AP( KNC ), 1, AP( KPC ), 1 )
+               KX = KPC + KP - 1
+               DO 30 J = KP + 1, KK - 1
+                  KX = KX + J - 1
+                  T = CONJG( AP( KNC+J-1 ) )
+                  AP( KNC+J-1 ) = CONJG( AP( KX ) )
+                  AP( KX ) = T
+   30          CONTINUE
+               AP( KX+KK-1 ) = CONJG( AP( KX+KK-1 ) )
+               R1 = REAL( AP( KNC+KK-1 ) )
+               AP( KNC+KK-1 ) = REAL( AP( KPC+KP-1 ) )
+               AP( KPC+KP-1 ) = R1
+               IF( KSTEP.EQ.2 ) THEN
+                  AP( KC+K-1 ) = REAL( AP( KC+K-1 ) )
+                  T = AP( KC+K-2 )
+                  AP( KC+K-2 ) = AP( KC+KP-1 )
+                  AP( KC+KP-1 ) = T
+               END IF
+            ELSE
+               AP( KC+K-1 ) = REAL( AP( KC+K-1 ) )
+               IF( KSTEP.EQ.2 )
+     $            AP( KC-1 ) = REAL( AP( KC-1 ) )
+            END IF
+*
+*           Update the leading submatrix
+*
+            IF( KSTEP.EQ.1 ) THEN
+*
+*              1-by-1 pivot block D(k): column k now holds
+*
+*              W(k) = U(k)*D(k)
+*
+*              where U(k) is the k-th column of U
+*
+*              Perform a rank-1 update of A(1:k-1,1:k-1) as
+*
+*              A := A - U(k)*D(k)*U(k)' = A - W(k)*1/D(k)*W(k)'
+*
+               R1 = ONE / REAL( AP( KC+K-1 ) )
+               CALL CHPR( UPLO, K-1, -R1, AP( KC ), 1, AP )
+*
+*              Store U(k) in column k
+*
+               CALL CSSCAL( K-1, R1, AP( KC ), 1 )
+            ELSE
+*
+*              2-by-2 pivot block D(k): columns k and k-1 now hold
+*
+*              ( W(k-1) W(k) ) = ( U(k-1) U(k) )*D(k)
+*
+*              where U(k) and U(k-1) are the k-th and (k-1)-th columns
+*              of U
+*
+*              Perform a rank-2 update of A(1:k-2,1:k-2) as
+*
+*              A := A - ( U(k-1) U(k) )*D(k)*( U(k-1) U(k) )'
+*                 = A - ( W(k-1) W(k) )*inv(D(k))*( W(k-1) W(k) )'
+*
+               IF( K.GT.2 ) THEN
+*
+                  D = SLAPY2( REAL( AP( K-1+( K-1 )*K / 2 ) ),
+     $                AIMAG( AP( K-1+( K-1 )*K / 2 ) ) )
+                  D22 = REAL( AP( K-1+( K-2 )*( K-1 ) / 2 ) ) / D
+                  D11 = REAL( AP( K+( K-1 )*K / 2 ) ) / D
+                  TT = ONE / ( D11*D22-ONE )
+                  D12 = AP( K-1+( K-1 )*K / 2 ) / D
+                  D = TT / D
+*
+                  DO 50 J = K - 2, 1, -1
+                     WKM1 = D*( D11*AP( J+( K-2 )*( K-1 ) / 2 )-
+     $                      CONJG( D12 )*AP( J+( K-1 )*K / 2 ) )
+                     WK = D*( D22*AP( J+( K-1 )*K / 2 )-D12*
+     $                    AP( J+( K-2 )*( K-1 ) / 2 ) )
+                     DO 40 I = J, 1, -1
+                        AP( I+( J-1 )*J / 2 ) = AP( I+( J-1 )*J / 2 ) -
+     $                     AP( I+( K-1 )*K / 2 )*CONJG( WK ) -
+     $                     AP( I+( K-2 )*( K-1 ) / 2 )*CONJG( WKM1 )
+   40                CONTINUE
+                     AP( J+( K-1 )*K / 2 ) = WK
+                     AP( J+( K-2 )*( K-1 ) / 2 ) = WKM1
+                     AP( J+( J-1 )*J / 2 ) = CMPLX( REAL( AP( J+( J-1 )*
+     $                                       J / 2 ) ), 0.0E+0 )
+   50             CONTINUE
+*
+               END IF
+*
+            END IF
+         END IF
+*
+*        Store details of the interchanges in IPIV
+*
+         IF( KSTEP.EQ.1 ) THEN
+            IPIV( K ) = KP
+         ELSE
+            IPIV( K ) = -KP
+            IPIV( K-1 ) = -KP
+         END IF
+*
+*        Decrease K and return to the start of the main loop
+*
+         K = K - KSTEP
+         KC = KNC - K
+         GO TO 10
+*
+      ELSE
+*
+*        Factorize A as L*D*L' using the lower triangle of A
+*
+*        K is the main loop index, increasing from 1 to N in steps of
+*        1 or 2
+*
+         K = 1
+         KC = 1
+         NPP = N*( N+1 ) / 2
+   60    CONTINUE
+         KNC = KC
+*
+*        If K > N, exit from loop
+*
+         IF( K.GT.N )
+     $      GO TO 110
+         KSTEP = 1
+*
+*        Determine rows and columns to be interchanged and whether
+*        a 1-by-1 or 2-by-2 pivot block will be used
+*
+         ABSAKK = ABS( REAL( AP( KC ) ) )
+*
+*        IMAX is the row-index of the largest off-diagonal element in
+*        column K, and COLMAX is its absolute value
+*
+         IF( K.LT.N ) THEN
+            IMAX = K + ICAMAX( N-K, AP( KC+1 ), 1 )
+            COLMAX = CABS1( AP( KC+IMAX-K ) )
+         ELSE
+            COLMAX = ZERO
+         END IF
+*
+         IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN
+*
+*           Column K is zero: set INFO and continue
+*
+            IF( INFO.EQ.0 )
+     $         INFO = K
+            KP = K
+            AP( KC ) = REAL( AP( KC ) )
+         ELSE
+            IF( ABSAKK.GE.ALPHA*COLMAX ) THEN
+*
+*              no interchange, use 1-by-1 pivot block
+*
+               KP = K
+            ELSE
+*
+*              JMAX is the column-index of the largest off-diagonal
+*              element in row IMAX, and ROWMAX is its absolute value
+*
+               ROWMAX = ZERO
+               KX = KC + IMAX - K
+               DO 70 J = K, IMAX - 1
+                  IF( CABS1( AP( KX ) ).GT.ROWMAX ) THEN
+                     ROWMAX = CABS1( AP( KX ) )
+                     JMAX = J
+                  END IF
+                  KX = KX + N - J
+   70          CONTINUE
+               KPC = NPP - ( N-IMAX+1 )*( N-IMAX+2 ) / 2 + 1
+               IF( IMAX.LT.N ) THEN
+                  JMAX = IMAX + ICAMAX( N-IMAX, AP( KPC+1 ), 1 )
+                  ROWMAX = MAX( ROWMAX, CABS1( AP( KPC+JMAX-IMAX ) ) )
+               END IF
+*
+               IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN
+*
+*                 no interchange, use 1-by-1 pivot block
+*
+                  KP = K
+               ELSE IF( ABS( REAL( AP( KPC ) ) ).GE.ALPHA*ROWMAX ) THEN
+*
+*                 interchange rows and columns K and IMAX, use 1-by-1
+*                 pivot block
+*
+                  KP = IMAX
+               ELSE
+*
+*                 interchange rows and columns K+1 and IMAX, use 2-by-2
+*                 pivot block
+*
+                  KP = IMAX
+                  KSTEP = 2
+               END IF
+            END IF
+*
+            KK = K + KSTEP - 1
+            IF( KSTEP.EQ.2 )
+     $         KNC = KNC + N - K + 1
+            IF( KP.NE.KK ) THEN
+*
+*              Interchange rows and columns KK and KP in the trailing
+*              submatrix A(k:n,k:n)
+*
+               IF( KP.LT.N )
+     $            CALL CSWAP( N-KP, AP( KNC+KP-KK+1 ), 1, AP( KPC+1 ),
+     $                        1 )
+               KX = KNC + KP - KK
+               DO 80 J = KK + 1, KP - 1
+                  KX = KX + N - J + 1
+                  T = CONJG( AP( KNC+J-KK ) )
+                  AP( KNC+J-KK ) = CONJG( AP( KX ) )
+                  AP( KX ) = T
+   80          CONTINUE
+               AP( KNC+KP-KK ) = CONJG( AP( KNC+KP-KK ) )
+               R1 = REAL( AP( KNC ) )
+               AP( KNC ) = REAL( AP( KPC ) )
+               AP( KPC ) = R1
+               IF( KSTEP.EQ.2 ) THEN
+                  AP( KC ) = REAL( AP( KC ) )
+                  T = AP( KC+1 )
+                  AP( KC+1 ) = AP( KC+KP-K )
+                  AP( KC+KP-K ) = T
+               END IF
+            ELSE
+               AP( KC ) = REAL( AP( KC ) )
+               IF( KSTEP.EQ.2 )
+     $            AP( KNC ) = REAL( AP( KNC ) )
+            END IF
+*
+*           Update the trailing submatrix
+*
+            IF( KSTEP.EQ.1 ) THEN
+*
+*              1-by-1 pivot block D(k): column k now holds
+*
+*              W(k) = L(k)*D(k)
+*
+*              where L(k) is the k-th column of L
+*
+               IF( K.LT.N ) THEN
+*
+*                 Perform a rank-1 update of A(k+1:n,k+1:n) as
+*
+*                 A := A - L(k)*D(k)*L(k)' = A - W(k)*(1/D(k))*W(k)'
+*
+                  R1 = ONE / REAL( AP( KC ) )
+                  CALL CHPR( UPLO, N-K, -R1, AP( KC+1 ), 1,
+     $                       AP( KC+N-K+1 ) )
+*
+*                 Store L(k) in column K
+*
+                  CALL CSSCAL( N-K, R1, AP( KC+1 ), 1 )
+               END IF
+            ELSE
+*
+*              2-by-2 pivot block D(k): columns K and K+1 now hold
+*
+*              ( W(k) W(k+1) ) = ( L(k) L(k+1) )*D(k)
+*
+*              where L(k) and L(k+1) are the k-th and (k+1)-th columns
+*              of L
+*
+               IF( K.LT.N-1 ) THEN
+*
+*                 Perform a rank-2 update of A(k+2:n,k+2:n) as
+*
+*                 A := A - ( L(k) L(k+1) )*D(k)*( L(k) L(k+1) )'
+*                    = A - ( W(k) W(k+1) )*inv(D(k))*( W(k) W(k+1) )'
+*
+*                 where L(k) and L(k+1) are the k-th and (k+1)-th
+*                 columns of L
+*
+                  D = SLAPY2( REAL( AP( K+1+( K-1 )*( 2*N-K ) / 2 ) ),
+     $                AIMAG( AP( K+1+( K-1 )*( 2*N-K ) / 2 ) ) )
+                  D11 = REAL( AP( K+1+K*( 2*N-K-1 ) / 2 ) ) / D
+                  D22 = REAL( AP( K+( K-1 )*( 2*N-K ) / 2 ) ) / D
+                  TT = ONE / ( D11*D22-ONE )
+                  D21 = AP( K+1+( K-1 )*( 2*N-K ) / 2 ) / D
+                  D = TT / D
+*
+                  DO 100 J = K + 2, N
+                     WK = D*( D11*AP( J+( K-1 )*( 2*N-K ) / 2 )-D21*
+     $                    AP( J+K*( 2*N-K-1 ) / 2 ) )
+                     WKP1 = D*( D22*AP( J+K*( 2*N-K-1 ) / 2 )-
+     $                      CONJG( D21 )*AP( J+( K-1 )*( 2*N-K ) / 2 ) )
+                     DO 90 I = J, N
+                        AP( I+( J-1 )*( 2*N-J ) / 2 ) = AP( I+( J-1 )*
+     $                     ( 2*N-J ) / 2 ) - AP( I+( K-1 )*( 2*N-K ) /
+     $                     2 )*CONJG( WK ) - AP( I+K*( 2*N-K-1 ) / 2 )*
+     $                     CONJG( WKP1 )
+   90                CONTINUE
+                     AP( J+( K-1 )*( 2*N-K ) / 2 ) = WK
+                     AP( J+K*( 2*N-K-1 ) / 2 ) = WKP1
+                     AP( J+( J-1 )*( 2*N-J ) / 2 )
+     $                  = CMPLX( REAL( AP( J+( J-1 )*( 2*N-J ) / 2 ) ),
+     $                  0.0E+0 )
+  100             CONTINUE
+               END IF
+            END IF
+         END IF
+*
+*        Store details of the interchanges in IPIV
+*
+         IF( KSTEP.EQ.1 ) THEN
+            IPIV( K ) = KP
+         ELSE
+            IPIV( K ) = -KP
+            IPIV( K+1 ) = -KP
+         END IF
+*
+*        Increase K and return to the start of the main loop
+*
+         K = K + KSTEP
+         KC = KNC + N - K + 2
+         GO TO 60
+*
+      END IF
+*
+  110 CONTINUE
+      RETURN
+*
+*     End of CHPTRF
+*
+      END
diff --git a/costa/native/external/lapack/chptri.f b/costa/native/external/lapack/chptri.f
new file mode 100644
index 000000000..323fb2cfe
--- /dev/null
+++ b/costa/native/external/lapack/chptri.f
@@ -0,0 +1,344 @@
+      SUBROUTINE CHPTRI( UPLO, N, AP, IPIV, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      COMPLEX            AP( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CHPTRI computes the inverse of a complex Hermitian indefinite matrix
+*  A in packed storage using the factorization A = U*D*U**H or
+*  A = L*D*L**H computed by CHPTRF.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the details of the factorization are stored
+*          as an upper or lower triangular matrix.
+*          = 'U':  Upper triangular, form is A = U*D*U**H;
+*          = 'L':  Lower triangular, form is A = L*D*L**H.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  AP      (input/output) COMPLEX array, dimension (N*(N+1)/2)
+*          On entry, the block diagonal matrix D and the multipliers
+*          used to obtain the factor U or L as computed by CHPTRF,
+*          stored as a packed triangular matrix.
+*
+*          On exit, if INFO = 0, the (Hermitian) inverse of the original
+*          matrix, stored as a packed triangular matrix. The j-th column
+*          of inv(A) is stored in the array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = inv(A)(i,j) for 1<=i<=j;
+*          if UPLO = 'L',
+*             AP(i + (j-1)*(2n-j)/2) = inv(A)(i,j) for j<=i<=n.
+*
+*  IPIV    (input) INTEGER array, dimension (N)
+*          Details of the interchanges and the block structure of D
+*          as determined by CHPTRF.
+*
+*  WORK    (workspace) COMPLEX array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*          > 0: if INFO = i, D(i,i) = 0; the matrix is singular and its
+*               inverse could not be computed.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE
+      COMPLEX            CONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, CONE = ( 1.0E+0, 0.0E+0 ),
+     $                   ZERO = ( 0.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            J, K, KC, KCNEXT, KP, KPC, KSTEP, KX, NPP
+      REAL               AK, AKP1, D, T
+      COMPLEX            AKKP1, TEMP
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      COMPLEX            CDOTC
+      EXTERNAL           LSAME, CDOTC
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CCOPY, CHPMV, CSWAP, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, CONJG, REAL
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CHPTRI', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Check that the diagonal matrix D is nonsingular.
+*
+      IF( UPPER ) THEN
+*
+*        Upper triangular storage: examine D from bottom to top
+*
+         KP = N*( N+1 ) / 2
+         DO 10 INFO = N, 1, -1
+            IF( IPIV( INFO ).GT.0 .AND. AP( KP ).EQ.ZERO )
+     $         RETURN
+            KP = KP - INFO
+   10    CONTINUE
+      ELSE
+*
+*        Lower triangular storage: examine D from top to bottom.
+*
+         KP = 1
+         DO 20 INFO = 1, N
+            IF( IPIV( INFO ).GT.0 .AND. AP( KP ).EQ.ZERO )
+     $         RETURN
+            KP = KP + N - INFO + 1
+   20    CONTINUE
+      END IF
+      INFO = 0
+*
+      IF( UPPER ) THEN
+*
+*        Compute inv(A) from the factorization A = U*D*U'.
+*
+*        K is the main loop index, increasing from 1 to N in steps of
+*        1 or 2, depending on the size of the diagonal blocks.
+*
+         K = 1
+         KC = 1
+   30    CONTINUE
+*
+*        If K > N, exit from loop.
+*
+         IF( K.GT.N )
+     $      GO TO 50
+*
+         KCNEXT = KC + K
+         IF( IPIV( K ).GT.0 ) THEN
+*
+*           1 x 1 diagonal block
+*
+*           Invert the diagonal block.
+*
+            AP( KC+K-1 ) = ONE / REAL( AP( KC+K-1 ) )
+*
+*           Compute column K of the inverse.
+*
+            IF( K.GT.1 ) THEN
+               CALL CCOPY( K-1, AP( KC ), 1, WORK, 1 )
+               CALL CHPMV( UPLO, K-1, -CONE, AP, WORK, 1, ZERO,
+     $                     AP( KC ), 1 )
+               AP( KC+K-1 ) = AP( KC+K-1 ) -
+     $                        REAL( CDOTC( K-1, WORK, 1, AP( KC ), 1 ) )
+            END IF
+            KSTEP = 1
+         ELSE
+*
+*           2 x 2 diagonal block
+*
+*           Invert the diagonal block.
+*
+            T = ABS( AP( KCNEXT+K-1 ) )
+            AK = REAL( AP( KC+K-1 ) ) / T
+            AKP1 = REAL( AP( KCNEXT+K ) ) / T
+            AKKP1 = AP( KCNEXT+K-1 ) / T
+            D = T*( AK*AKP1-ONE )
+            AP( KC+K-1 ) = AKP1 / D
+            AP( KCNEXT+K ) = AK / D
+            AP( KCNEXT+K-1 ) = -AKKP1 / D
+*
+*           Compute columns K and K+1 of the inverse.
+*
+            IF( K.GT.1 ) THEN
+               CALL CCOPY( K-1, AP( KC ), 1, WORK, 1 )
+               CALL CHPMV( UPLO, K-1, -CONE, AP, WORK, 1, ZERO,
+     $                     AP( KC ), 1 )
+               AP( KC+K-1 ) = AP( KC+K-1 ) -
+     $                        REAL( CDOTC( K-1, WORK, 1, AP( KC ), 1 ) )
+               AP( KCNEXT+K-1 ) = AP( KCNEXT+K-1 ) -
+     $                            CDOTC( K-1, AP( KC ), 1, AP( KCNEXT ),
+     $                            1 )
+               CALL CCOPY( K-1, AP( KCNEXT ), 1, WORK, 1 )
+               CALL CHPMV( UPLO, K-1, -CONE, AP, WORK, 1, ZERO,
+     $                     AP( KCNEXT ), 1 )
+               AP( KCNEXT+K ) = AP( KCNEXT+K ) -
+     $                          REAL( CDOTC( K-1, WORK, 1, AP( KCNEXT ),
+     $                          1 ) )
+            END IF
+            KSTEP = 2
+            KCNEXT = KCNEXT + K + 1
+         END IF
+*
+         KP = ABS( IPIV( K ) )
+         IF( KP.NE.K ) THEN
+*
+*           Interchange rows and columns K and KP in the leading
+*           submatrix A(1:k+1,1:k+1)
+*
+            KPC = ( KP-1 )*KP / 2 + 1
+            CALL CSWAP( KP-1, AP( KC ), 1, AP( KPC ), 1 )
+            KX = KPC + KP - 1
+            DO 40 J = KP + 1, K - 1
+               KX = KX + J - 1
+               TEMP = CONJG( AP( KC+J-1 ) )
+               AP( KC+J-1 ) = CONJG( AP( KX ) )
+               AP( KX ) = TEMP
+   40       CONTINUE
+            AP( KC+KP-1 ) = CONJG( AP( KC+KP-1 ) )
+            TEMP = AP( KC+K-1 )
+            AP( KC+K-1 ) = AP( KPC+KP-1 )
+            AP( KPC+KP-1 ) = TEMP
+            IF( KSTEP.EQ.2 ) THEN
+               TEMP = AP( KC+K+K-1 )
+               AP( KC+K+K-1 ) = AP( KC+K+KP-1 )
+               AP( KC+K+KP-1 ) = TEMP
+            END IF
+         END IF
+*
+         K = K + KSTEP
+         KC = KCNEXT
+         GO TO 30
+   50    CONTINUE
+*
+      ELSE
+*
+*        Compute inv(A) from the factorization A = L*D*L'.
+*
+*        K is the main loop index, increasing from 1 to N in steps of
+*        1 or 2, depending on the size of the diagonal blocks.
+*
+         NPP = N*( N+1 ) / 2
+         K = N
+         KC = NPP
+   60    CONTINUE
+*
+*        If K < 1, exit from loop.
+*
+         IF( K.LT.1 )
+     $      GO TO 80
+*
+         KCNEXT = KC - ( N-K+2 )
+         IF( IPIV( K ).GT.0 ) THEN
+*
+*           1 x 1 diagonal block
+*
+*           Invert the diagonal block.
+*
+            AP( KC ) = ONE / REAL( AP( KC ) )
+*
+*           Compute column K of the inverse.
+*
+            IF( K.LT.N ) THEN
+               CALL CCOPY( N-K, AP( KC+1 ), 1, WORK, 1 )
+               CALL CHPMV( UPLO, N-K, -CONE, AP( KC+N-K+1 ), WORK, 1,
+     $                     ZERO, AP( KC+1 ), 1 )
+               AP( KC ) = AP( KC ) - REAL( CDOTC( N-K, WORK, 1,
+     $                    AP( KC+1 ), 1 ) )
+            END IF
+            KSTEP = 1
+         ELSE
+*
+*           2 x 2 diagonal block
+*
+*           Invert the diagonal block.
+*
+            T = ABS( AP( KCNEXT+1 ) )
+            AK = REAL( AP( KCNEXT ) ) / T
+            AKP1 = REAL( AP( KC ) ) / T
+            AKKP1 = AP( KCNEXT+1 ) / T
+            D = T*( AK*AKP1-ONE )
+            AP( KCNEXT ) = AKP1 / D
+            AP( KC ) = AK / D
+            AP( KCNEXT+1 ) = -AKKP1 / D
+*
+*           Compute columns K-1 and K of the inverse.
+*
+            IF( K.LT.N ) THEN
+               CALL CCOPY( N-K, AP( KC+1 ), 1, WORK, 1 )
+               CALL CHPMV( UPLO, N-K, -CONE, AP( KC+( N-K+1 ) ), WORK,
+     $                     1, ZERO, AP( KC+1 ), 1 )
+               AP( KC ) = AP( KC ) - REAL( CDOTC( N-K, WORK, 1,
+     $                    AP( KC+1 ), 1 ) )
+               AP( KCNEXT+1 ) = AP( KCNEXT+1 ) -
+     $                          CDOTC( N-K, AP( KC+1 ), 1,
+     $                          AP( KCNEXT+2 ), 1 )
+               CALL CCOPY( N-K, AP( KCNEXT+2 ), 1, WORK, 1 )
+               CALL CHPMV( UPLO, N-K, -CONE, AP( KC+( N-K+1 ) ), WORK,
+     $                     1, ZERO, AP( KCNEXT+2 ), 1 )
+               AP( KCNEXT ) = AP( KCNEXT ) -
+     $                        REAL( CDOTC( N-K, WORK, 1, AP( KCNEXT+2 ),
+     $                        1 ) )
+            END IF
+            KSTEP = 2
+            KCNEXT = KCNEXT - ( N-K+3 )
+         END IF
+*
+         KP = ABS( IPIV( K ) )
+         IF( KP.NE.K ) THEN
+*
+*           Interchange rows and columns K and KP in the trailing
+*           submatrix A(k-1:n,k-1:n)
+*
+            KPC = NPP - ( N-KP+1 )*( N-KP+2 ) / 2 + 1
+            IF( KP.LT.N )
+     $         CALL CSWAP( N-KP, AP( KC+KP-K+1 ), 1, AP( KPC+1 ), 1 )
+            KX = KC + KP - K
+            DO 70 J = K + 1, KP - 1
+               KX = KX + N - J + 1
+               TEMP = CONJG( AP( KC+J-K ) )
+               AP( KC+J-K ) = CONJG( AP( KX ) )
+               AP( KX ) = TEMP
+   70       CONTINUE
+            AP( KC+KP-K ) = CONJG( AP( KC+KP-K ) )
+            TEMP = AP( KC )
+            AP( KC ) = AP( KPC )
+            AP( KPC ) = TEMP
+            IF( KSTEP.EQ.2 ) THEN
+               TEMP = AP( KC-N+K-1 )
+               AP( KC-N+K-1 ) = AP( KC-N+KP-1 )
+               AP( KC-N+KP-1 ) = TEMP
+            END IF
+         END IF
+*
+         K = K - KSTEP
+         KC = KCNEXT
+         GO TO 60
+   80    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of CHPTRI
+*
+      END
diff --git a/costa/native/external/lapack/chptrs.f b/costa/native/external/lapack/chptrs.f
new file mode 100644
index 000000000..885551cdf
--- /dev/null
+++ b/costa/native/external/lapack/chptrs.f
@@ -0,0 +1,402 @@
+      SUBROUTINE CHPTRS( UPLO, N, NRHS, AP, IPIV, B, LDB, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDB, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      COMPLEX            AP( * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CHPTRS solves a system of linear equations A*X = B with a complex
+*  Hermitian matrix A stored in packed format using the factorization
+*  A = U*D*U**H or A = L*D*L**H computed by CHPTRF.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the details of the factorization are stored
+*          as an upper or lower triangular matrix.
+*          = 'U':  Upper triangular, form is A = U*D*U**H;
+*          = 'L':  Lower triangular, form is A = L*D*L**H.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  AP      (input) COMPLEX array, dimension (N*(N+1)/2)
+*          The block diagonal matrix D and the multipliers used to
+*          obtain the factor U or L as computed by CHPTRF, stored as a
+*          packed triangular matrix.
+*
+*  IPIV    (input) INTEGER array, dimension (N)
+*          Details of the interchanges and the block structure of D
+*          as determined by CHPTRF.
+*
+*  B       (input/output) COMPLEX array, dimension (LDB,NRHS)
+*          On entry, the right hand side matrix B.
+*          On exit, the solution matrix X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX            ONE
+      PARAMETER          ( ONE = ( 1.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            J, K, KC, KP
+      REAL               S
+      COMPLEX            AK, AKM1, AKM1K, BK, BKM1, DENOM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CGEMV, CGERU, CLACGV, CSSCAL, CSWAP, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          CONJG, MAX, REAL
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CHPTRS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 )
+     $   RETURN
+*
+      IF( UPPER ) THEN
+*
+*        Solve A*X = B, where A = U*D*U'.
+*
+*        First solve U*D*X = B, overwriting B with X.
+*
+*        K is the main loop index, decreasing from N to 1 in steps of
+*        1 or 2, depending on the size of the diagonal blocks.
+*
+         K = N
+         KC = N*( N+1 ) / 2 + 1
+   10    CONTINUE
+*
+*        If K < 1, exit from loop.
+*
+         IF( K.LT.1 )
+     $      GO TO 30
+*
+         KC = KC - K
+         IF( IPIV( K ).GT.0 ) THEN
+*
+*           1 x 1 diagonal block
+*
+*           Interchange rows K and IPIV(K).
+*
+            KP = IPIV( K )
+            IF( KP.NE.K )
+     $         CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB )
+*
+*           Multiply by inv(U(K)), where U(K) is the transformation
+*           stored in column K of A.
+*
+            CALL CGERU( K-1, NRHS, -ONE, AP( KC ), 1, B( K, 1 ), LDB,
+     $                  B( 1, 1 ), LDB )
+*
+*           Multiply by the inverse of the diagonal block.
+*
+            S = REAL( ONE ) / REAL( AP( KC+K-1 ) )
+            CALL CSSCAL( NRHS, S, B( K, 1 ), LDB )
+            K = K - 1
+         ELSE
+*
+*           2 x 2 diagonal block
+*
+*           Interchange rows K-1 and -IPIV(K).
+*
+            KP = -IPIV( K )
+            IF( KP.NE.K-1 )
+     $         CALL CSWAP( NRHS, B( K-1, 1 ), LDB, B( KP, 1 ), LDB )
+*
+*           Multiply by inv(U(K)), where U(K) is the transformation
+*           stored in columns K-1 and K of A.
+*
+            CALL CGERU( K-2, NRHS, -ONE, AP( KC ), 1, B( K, 1 ), LDB,
+     $                  B( 1, 1 ), LDB )
+            CALL CGERU( K-2, NRHS, -ONE, AP( KC-( K-1 ) ), 1,
+     $                  B( K-1, 1 ), LDB, B( 1, 1 ), LDB )
+*
+*           Multiply by the inverse of the diagonal block.
+*
+            AKM1K = AP( KC+K-2 )
+            AKM1 = AP( KC-1 ) / AKM1K
+            AK = AP( KC+K-1 ) / CONJG( AKM1K )
+            DENOM = AKM1*AK - ONE
+            DO 20 J = 1, NRHS
+               BKM1 = B( K-1, J ) / AKM1K
+               BK = B( K, J ) / CONJG( AKM1K )
+               B( K-1, J ) = ( AK*BKM1-BK ) / DENOM
+               B( K, J ) = ( AKM1*BK-BKM1 ) / DENOM
+   20       CONTINUE
+            KC = KC - K + 1
+            K = K - 2
+         END IF
+*
+         GO TO 10
+   30    CONTINUE
+*
+*        Next solve U'*X = B, overwriting B with X.
+*
+*        K is the main loop index, increasing from 1 to N in steps of
+*        1 or 2, depending on the size of the diagonal blocks.
+*
+         K = 1
+         KC = 1
+   40    CONTINUE
+*
+*        If K > N, exit from loop.
+*
+         IF( K.GT.N )
+     $      GO TO 50
+*
+         IF( IPIV( K ).GT.0 ) THEN
+*
+*           1 x 1 diagonal block
+*
+*           Multiply by inv(U'(K)), where U(K) is the transformation
+*           stored in column K of A.
+*
+            IF( K.GT.1 ) THEN
+               CALL CLACGV( NRHS, B( K, 1 ), LDB )
+               CALL CGEMV( 'Conjugate transpose', K-1, NRHS, -ONE, B,
+     $                     LDB, AP( KC ), 1, ONE, B( K, 1 ), LDB )
+               CALL CLACGV( NRHS, B( K, 1 ), LDB )
+            END IF
+*
+*           Interchange rows K and IPIV(K).
+*
+            KP = IPIV( K )
+            IF( KP.NE.K )
+     $         CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB )
+            KC = KC + K
+            K = K + 1
+         ELSE
+*
+*           2 x 2 diagonal block
+*
+*           Multiply by inv(U'(K+1)), where U(K+1) is the transformation
+*           stored in columns K and K+1 of A.
+*
+            IF( K.GT.1 ) THEN
+               CALL CLACGV( NRHS, B( K, 1 ), LDB )
+               CALL CGEMV( 'Conjugate transpose', K-1, NRHS, -ONE, B,
+     $                     LDB, AP( KC ), 1, ONE, B( K, 1 ), LDB )
+               CALL CLACGV( NRHS, B( K, 1 ), LDB )
+*
+               CALL CLACGV( NRHS, B( K+1, 1 ), LDB )
+               CALL CGEMV( 'Conjugate transpose', K-1, NRHS, -ONE, B,
+     $                     LDB, AP( KC+K ), 1, ONE, B( K+1, 1 ), LDB )
+               CALL CLACGV( NRHS, B( K+1, 1 ), LDB )
+            END IF
+*
+*           Interchange rows K and -IPIV(K).
+*
+            KP = -IPIV( K )
+            IF( KP.NE.K )
+     $         CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB )
+            KC = KC + 2*K + 1
+            K = K + 2
+         END IF
+*
+         GO TO 40
+   50    CONTINUE
+*
+      ELSE
+*
+*        Solve A*X = B, where A = L*D*L'.
+*
+*        First solve L*D*X = B, overwriting B with X.
+*
+*        K is the main loop index, increasing from 1 to N in steps of
+*        1 or 2, depending on the size of the diagonal blocks.
+*
+         K = 1
+         KC = 1
+   60    CONTINUE
+*
+*        If K > N, exit from loop.
+*
+         IF( K.GT.N )
+     $      GO TO 80
+*
+         IF( IPIV( K ).GT.0 ) THEN
+*
+*           1 x 1 diagonal block
+*
+*           Interchange rows K and IPIV(K).
+*
+            KP = IPIV( K )
+            IF( KP.NE.K )
+     $         CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB )
+*
+*           Multiply by inv(L(K)), where L(K) is the transformation
+*           stored in column K of A.
+*
+            IF( K.LT.N )
+     $         CALL CGERU( N-K, NRHS, -ONE, AP( KC+1 ), 1, B( K, 1 ),
+     $                     LDB, B( K+1, 1 ), LDB )
+*
+*           Multiply by the inverse of the diagonal block.
+*
+            S = REAL( ONE ) / REAL( AP( KC ) )
+            CALL CSSCAL( NRHS, S, B( K, 1 ), LDB )
+            KC = KC + N - K + 1
+            K = K + 1
+         ELSE
+*
+*           2 x 2 diagonal block
+*
+*           Interchange rows K+1 and -IPIV(K).
+*
+            KP = -IPIV( K )
+            IF( KP.NE.K+1 )
+     $         CALL CSWAP( NRHS, B( K+1, 1 ), LDB, B( KP, 1 ), LDB )
+*
+*           Multiply by inv(L(K)), where L(K) is the transformation
+*           stored in columns K and K+1 of A.
+*
+            IF( K.LT.N-1 ) THEN
+               CALL CGERU( N-K-1, NRHS, -ONE, AP( KC+2 ), 1, B( K, 1 ),
+     $                     LDB, B( K+2, 1 ), LDB )
+               CALL CGERU( N-K-1, NRHS, -ONE, AP( KC+N-K+2 ), 1,
+     $                     B( K+1, 1 ), LDB, B( K+2, 1 ), LDB )
+            END IF
+*
+*           Multiply by the inverse of the diagonal block.
+*
+            AKM1K = AP( KC+1 )
+            AKM1 = AP( KC ) / CONJG( AKM1K )
+            AK = AP( KC+N-K+1 ) / AKM1K
+            DENOM = AKM1*AK - ONE
+            DO 70 J = 1, NRHS
+               BKM1 = B( K, J ) / CONJG( AKM1K )
+               BK = B( K+1, J ) / AKM1K
+               B( K, J ) = ( AK*BKM1-BK ) / DENOM
+               B( K+1, J ) = ( AKM1*BK-BKM1 ) / DENOM
+   70       CONTINUE
+            KC = KC + 2*( N-K ) + 1
+            K = K + 2
+         END IF
+*
+         GO TO 60
+   80    CONTINUE
+*
+*        Next solve L'*X = B, overwriting B with X.
+*
+*        K is the main loop index, decreasing from N to 1 in steps of
+*        1 or 2, depending on the size of the diagonal blocks.
+*
+         K = N
+         KC = N*( N+1 ) / 2 + 1
+   90    CONTINUE
+*
+*        If K < 1, exit from loop.
+*
+         IF( K.LT.1 )
+     $      GO TO 100
+*
+         KC = KC - ( N-K+1 )
+         IF( IPIV( K ).GT.0 ) THEN
+*
+*           1 x 1 diagonal block
+*
+*           Multiply by inv(L'(K)), where L(K) is the transformation
+*           stored in column K of A.
+*
+            IF( K.LT.N ) THEN
+               CALL CLACGV( NRHS, B( K, 1 ), LDB )
+               CALL CGEMV( 'Conjugate transpose', N-K, NRHS, -ONE,
+     $                     B( K+1, 1 ), LDB, AP( KC+1 ), 1, ONE,
+     $                     B( K, 1 ), LDB )
+               CALL CLACGV( NRHS, B( K, 1 ), LDB )
+            END IF
+*
+*           Interchange rows K and IPIV(K).
+*
+            KP = IPIV( K )
+            IF( KP.NE.K )
+     $         CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB )
+            K = K - 1
+         ELSE
+*
+*           2 x 2 diagonal block
+*
+*           Multiply by inv(L'(K-1)), where L(K-1) is the transformation
+*           stored in columns K-1 and K of A.
+*
+            IF( K.LT.N ) THEN
+               CALL CLACGV( NRHS, B( K, 1 ), LDB )
+               CALL CGEMV( 'Conjugate transpose', N-K, NRHS, -ONE,
+     $                     B( K+1, 1 ), LDB, AP( KC+1 ), 1, ONE,
+     $                     B( K, 1 ), LDB )
+               CALL CLACGV( NRHS, B( K, 1 ), LDB )
+*
+               CALL CLACGV( NRHS, B( K-1, 1 ), LDB )
+               CALL CGEMV( 'Conjugate transpose', N-K, NRHS, -ONE,
+     $                     B( K+1, 1 ), LDB, AP( KC-( N-K ) ), 1, ONE,
+     $                     B( K-1, 1 ), LDB )
+               CALL CLACGV( NRHS, B( K-1, 1 ), LDB )
+            END IF
+*
+*           Interchange rows K and -IPIV(K).
+*
+            KP = -IPIV( K )
+            IF( KP.NE.K )
+     $         CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB )
+            KC = KC - ( N-K+2 )
+            K = K - 2
+         END IF
+*
+         GO TO 90
+  100    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of CHPTRS
+*
+      END
diff --git a/costa/native/external/lapack/chsein.f b/costa/native/external/lapack/chsein.f
new file mode 100644
index 000000000..7434efeb4
--- /dev/null
+++ b/costa/native/external/lapack/chsein.f
@@ -0,0 +1,351 @@
+      SUBROUTINE CHSEIN( SIDE, EIGSRC, INITV, SELECT, N, H, LDH, W, VL,
+     $                   LDVL, VR, LDVR, MM, M, WORK, RWORK, IFAILL,
+     $                   IFAILR, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          EIGSRC, INITV, SIDE
+      INTEGER            INFO, LDH, LDVL, LDVR, M, MM, N
+*     ..
+*     .. Array Arguments ..
+      LOGICAL            SELECT( * )
+      INTEGER            IFAILL( * ), IFAILR( * )
+      REAL               RWORK( * )
+      COMPLEX            H( LDH, * ), VL( LDVL, * ), VR( LDVR, * ),
+     $                   W( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CHSEIN uses inverse iteration to find specified right and/or left
+*  eigenvectors of a complex upper Hessenberg matrix H.
+*
+*  The right eigenvector x and the left eigenvector y of the matrix H
+*  corresponding to an eigenvalue w are defined by:
+*
+*               H * x = w * x,     y**h * H = w * y**h
+*
+*  where y**h denotes the conjugate transpose of the vector y.
+*
+*  Arguments
+*  =========
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'R': compute right eigenvectors only;
+*          = 'L': compute left eigenvectors only;
+*          = 'B': compute both right and left eigenvectors.
+*
+*  EIGSRC  (input) CHARACTER*1
+*          Specifies the source of eigenvalues supplied in W:
+*          = 'Q': the eigenvalues were found using CHSEQR; thus, if
+*                 H has zero subdiagonal elements, and so is
+*                 block-triangular, then the j-th eigenvalue can be
+*                 assumed to be an eigenvalue of the block containing
+*                 the j-th row/column.  This property allows CHSEIN to
+*                 perform inverse iteration on just one diagonal block.
+*          = 'N': no assumptions are made on the correspondence
+*                 between eigenvalues and diagonal blocks.  In this
+*                 case, CHSEIN must always perform inverse iteration
+*                 using the whole matrix H.
+*
+*  INITV   (input) CHARACTER*1
+*          = 'N': no initial vectors are supplied;
+*          = 'U': user-supplied initial vectors are stored in the arrays
+*                 VL and/or VR.
+*
+*  SELECT  (input) LOGICAL array, dimension (N)
+*          Specifies the eigenvectors to be computed. To select the
+*          eigenvector corresponding to the eigenvalue W(j),
+*          SELECT(j) must be set to .TRUE..
+*
+*  N       (input) INTEGER
+*          The order of the matrix H.  N >= 0.
+*
+*  H       (input) COMPLEX array, dimension (LDH,N)
+*          The upper Hessenberg matrix H.
+*
+*  LDH     (input) INTEGER
+*          The leading dimension of the array H.  LDH >= max(1,N).
+*
+*  W       (input/output) COMPLEX array, dimension (N)
+*          On entry, the eigenvalues of H.
+*          On exit, the real parts of W may have been altered since
+*          close eigenvalues are perturbed slightly in searching for
+*          independent eigenvectors.
+*
+*  VL      (input/output) COMPLEX array, dimension (LDVL,MM)
+*          On entry, if INITV = 'U' and SIDE = 'L' or 'B', VL must
+*          contain starting vectors for the inverse iteration for the
+*          left eigenvectors; the starting vector for each eigenvector
+*          must be in the same column in which the eigenvector will be
+*          stored.
+*          On exit, if SIDE = 'L' or 'B', the left eigenvectors
+*          specified by SELECT will be stored consecutively in the
+*          columns of VL, in the same order as their eigenvalues.
+*          If SIDE = 'R', VL is not referenced.
+*
+*  LDVL    (input) INTEGER
+*          The leading dimension of the array VL.
+*          LDVL >= max(1,N) if SIDE = 'L' or 'B'; LDVL >= 1 otherwise.
+*
+*  VR      (input/output) COMPLEX array, dimension (LDVR,MM)
+*          On entry, if INITV = 'U' and SIDE = 'R' or 'B', VR must
+*          contain starting vectors for the inverse iteration for the
+*          right eigenvectors; the starting vector for each eigenvector
+*          must be in the same column in which the eigenvector will be
+*          stored.
+*          On exit, if SIDE = 'R' or 'B', the right eigenvectors
+*          specified by SELECT will be stored consecutively in the
+*          columns of VR, in the same order as their eigenvalues.
+*          If SIDE = 'L', VR is not referenced.
+*
+*  LDVR    (input) INTEGER
+*          The leading dimension of the array VR.
+*          LDVR >= max(1,N) if SIDE = 'R' or 'B'; LDVR >= 1 otherwise.
+*
+*  MM      (input) INTEGER
+*          The number of columns in the arrays VL and/or VR. MM >= M.
+*
+*  M       (output) INTEGER
+*          The number of columns in the arrays VL and/or VR required to
+*          store the eigenvectors (= the number of .TRUE. elements in
+*          SELECT).
+*
+*  WORK    (workspace) COMPLEX array, dimension (N*N)
+*
+*  RWORK   (workspace) REAL array, dimension (N)
+*
+*  IFAILL  (output) INTEGER array, dimension (MM)
+*          If SIDE = 'L' or 'B', IFAILL(i) = j > 0 if the left
+*          eigenvector in the i-th column of VL (corresponding to the
+*          eigenvalue w(j)) failed to converge; IFAILL(i) = 0 if the
+*          eigenvector converged satisfactorily.
+*          If SIDE = 'R', IFAILL is not referenced.
+*
+*  IFAILR  (output) INTEGER array, dimension (MM)
+*          If SIDE = 'R' or 'B', IFAILR(i) = j > 0 if the right
+*          eigenvector in the i-th column of VR (corresponding to the
+*          eigenvalue w(j)) failed to converge; IFAILR(i) = 0 if the
+*          eigenvector converged satisfactorily.
+*          If SIDE = 'L', IFAILR is not referenced.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, i is the number of eigenvectors which
+*                failed to converge; see IFAILL and IFAILR for further
+*                details.
+*
+*  Further Details
+*  ===============
+*
+*  Each eigenvector is normalized so that the element of largest
+*  magnitude has magnitude 1; here the magnitude of a complex number
+*  (x,y) is taken to be |x|+|y|.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX            ZERO
+      PARAMETER          ( ZERO = ( 0.0E+0, 0.0E+0 ) )
+      REAL               RZERO
+      PARAMETER          ( RZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            BOTHV, FROMQR, LEFTV, NOINIT, RIGHTV
+      INTEGER            I, IINFO, K, KL, KLN, KR, KS, LDWORK
+      REAL               EPS3, HNORM, SMLNUM, ULP, UNFL
+      COMPLEX            CDUM, WK
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               CLANHS, SLAMCH
+      EXTERNAL           LSAME, CLANHS, SLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CLAEIN, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, AIMAG, MAX, REAL
+*     ..
+*     .. Statement Functions ..
+      REAL               CABS1
+*     ..
+*     .. Statement Function definitions ..
+      CABS1( CDUM ) = ABS( REAL( CDUM ) ) + ABS( AIMAG( CDUM ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode and test the input parameters.
+*
+      BOTHV = LSAME( SIDE, 'B' )
+      RIGHTV = LSAME( SIDE, 'R' ) .OR. BOTHV
+      LEFTV = LSAME( SIDE, 'L' ) .OR. BOTHV
+*
+      FROMQR = LSAME( EIGSRC, 'Q' )
+*
+      NOINIT = LSAME( INITV, 'N' )
+*
+*     Set M to the number of columns required to store the selected
+*     eigenvectors.
+*
+      M = 0
+      DO 10 K = 1, N
+         IF( SELECT( K ) )
+     $      M = M + 1
+   10 CONTINUE
+*
+      INFO = 0
+      IF( .NOT.RIGHTV .AND. .NOT.LEFTV ) THEN
+         INFO = -1
+      ELSE IF( .NOT.FROMQR .AND. .NOT.LSAME( EIGSRC, 'N' ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.NOINIT .AND. .NOT.LSAME( INITV, 'U' ) ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( LDH.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      ELSE IF( LDVL.LT.1 .OR. ( LEFTV .AND. LDVL.LT.N ) ) THEN
+         INFO = -10
+      ELSE IF( LDVR.LT.1 .OR. ( RIGHTV .AND. LDVR.LT.N ) ) THEN
+         INFO = -12
+      ELSE IF( MM.LT.M ) THEN
+         INFO = -13
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CHSEIN', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Set machine-dependent constants.
+*
+      UNFL = SLAMCH( 'Safe minimum' )
+      ULP = SLAMCH( 'Precision' )
+      SMLNUM = UNFL*( N / ULP )
+*
+      LDWORK = N
+*
+      KL = 1
+      KLN = 0
+      IF( FROMQR ) THEN
+         KR = 0
+      ELSE
+         KR = N
+      END IF
+      KS = 1
+*
+      DO 100 K = 1, N
+         IF( SELECT( K ) ) THEN
+*
+*           Compute eigenvector(s) corresponding to W(K).
+*
+            IF( FROMQR ) THEN
+*
+*              If affiliation of eigenvalues is known, check whether
+*              the matrix splits.
+*
+*              Determine KL and KR such that 1 <= KL <= K <= KR <= N
+*              and H(KL,KL-1) and H(KR+1,KR) are zero (or KL = 1 or
+*              KR = N).
+*
+*              Then inverse iteration can be performed with the
+*              submatrix H(KL:N,KL:N) for a left eigenvector, and with
+*              the submatrix H(1:KR,1:KR) for a right eigenvector.
+*
+               DO 20 I = K, KL + 1, -1
+                  IF( H( I, I-1 ).EQ.ZERO )
+     $               GO TO 30
+   20          CONTINUE
+   30          CONTINUE
+               KL = I
+               IF( K.GT.KR ) THEN
+                  DO 40 I = K, N - 1
+                     IF( H( I+1, I ).EQ.ZERO )
+     $                  GO TO 50
+   40             CONTINUE
+   50             CONTINUE
+                  KR = I
+               END IF
+            END IF
+*
+            IF( KL.NE.KLN ) THEN
+               KLN = KL
+*
+*              Compute infinity-norm of submatrix H(KL:KR,KL:KR) if it
+*              has not ben computed before.
+*
+               HNORM = CLANHS( 'I', KR-KL+1, H( KL, KL ), LDH, RWORK )
+               IF( HNORM.GT.RZERO ) THEN
+                  EPS3 = HNORM*ULP
+               ELSE
+                  EPS3 = SMLNUM
+               END IF
+            END IF
+*
+*           Perturb eigenvalue if it is close to any previous
+*           selected eigenvalues affiliated to the submatrix
+*           H(KL:KR,KL:KR). Close roots are modified by EPS3.
+*
+            WK = W( K )
+   60       CONTINUE
+            DO 70 I = K - 1, KL, -1
+               IF( SELECT( I ) .AND. CABS1( W( I )-WK ).LT.EPS3 ) THEN
+                  WK = WK + EPS3
+                  GO TO 60
+               END IF
+   70       CONTINUE
+            W( K ) = WK
+*
+            IF( LEFTV ) THEN
+*
+*              Compute left eigenvector.
+*
+               CALL CLAEIN( .FALSE., NOINIT, N-KL+1, H( KL, KL ), LDH,
+     $                      WK, VL( KL, KS ), WORK, LDWORK, RWORK, EPS3,
+     $                      SMLNUM, IINFO )
+               IF( IINFO.GT.0 ) THEN
+                  INFO = INFO + 1
+                  IFAILL( KS ) = K
+               ELSE
+                  IFAILL( KS ) = 0
+               END IF
+               DO 80 I = 1, KL - 1
+                  VL( I, KS ) = ZERO
+   80          CONTINUE
+            END IF
+            IF( RIGHTV ) THEN
+*
+*              Compute right eigenvector.
+*
+               CALL CLAEIN( .TRUE., NOINIT, KR, H, LDH, WK, VR( 1, KS ),
+     $                      WORK, LDWORK, RWORK, EPS3, SMLNUM, IINFO )
+               IF( IINFO.GT.0 ) THEN
+                  INFO = INFO + 1
+                  IFAILR( KS ) = K
+               ELSE
+                  IFAILR( KS ) = 0
+               END IF
+               DO 90 I = KR + 1, N
+                  VR( I, KS ) = ZERO
+   90          CONTINUE
+            END IF
+            KS = KS + 1
+         END IF
+  100 CONTINUE
+*
+      RETURN
+*
+*     End of CHSEIN
+*
+      END
diff --git a/costa/native/external/lapack/chseqr.f b/costa/native/external/lapack/chseqr.f
new file mode 100644
index 000000000..d26ff16e1
--- /dev/null
+++ b/costa/native/external/lapack/chseqr.f
@@ -0,0 +1,474 @@
+      SUBROUTINE CHSEQR( JOB, COMPZ, N, ILO, IHI, H, LDH, W, Z, LDZ,
+     $                   WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          COMPZ, JOB
+      INTEGER            IHI, ILO, INFO, LDH, LDZ, LWORK, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            H( LDH, * ), W( * ), WORK( * ), Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CHSEQR computes the eigenvalues of a complex upper Hessenberg
+*  matrix H, and, optionally, the matrices T and Z from the Schur
+*  decomposition H = Z T Z**H, where T is an upper triangular matrix
+*  (the Schur form), and Z is the unitary matrix of Schur vectors.
+*
+*  Optionally Z may be postmultiplied into an input unitary matrix Q,
+*  so that this routine can give the Schur factorization of a matrix A
+*  which has been reduced to the Hessenberg form H by the unitary
+*  matrix Q:  A = Q*H*Q**H = (QZ)*T*(QZ)**H.
+*
+*  Arguments
+*  =========
+*
+*  JOB     (input) CHARACTER*1
+*          = 'E': compute eigenvalues only;
+*          = 'S': compute eigenvalues and the Schur form T.
+*
+*  COMPZ   (input) CHARACTER*1
+*          = 'N': no Schur vectors are computed;
+*          = 'I': Z is initialized to the unit matrix and the matrix Z
+*                 of Schur vectors of H is returned;
+*          = 'V': Z must contain an unitary matrix Q on entry, and
+*                 the product Q*Z is returned.
+*
+*  N       (input) INTEGER
+*          The order of the matrix H.  N >= 0.
+*
+*  ILO     (input) INTEGER
+*  IHI     (input) INTEGER
+*          It is assumed that H is already upper triangular in rows
+*          and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally
+*          set by a previous call to CGEBAL, and then passed to CGEHRD
+*          when the matrix output by CGEBAL is reduced to Hessenberg
+*          form. Otherwise ILO and IHI should be set to 1 and N
+*          respectively.
+*          1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
+*
+*  H       (input/output) COMPLEX array, dimension (LDH,N)
+*          On entry, the upper Hessenberg matrix H.
+*          On exit, if JOB = 'S', H contains the upper triangular matrix
+*          T from the Schur decomposition (the Schur form). If
+*          JOB = 'E', the contents of H are unspecified on exit.
+*
+*  LDH     (input) INTEGER
+*          The leading dimension of the array H. LDH >= max(1,N).
+*
+*  W       (output) COMPLEX array, dimension (N)
+*          The computed eigenvalues. If JOB = 'S', the eigenvalues are
+*          stored in the same order as on the diagonal of the Schur form
+*          returned in H, with W(i) = H(i,i).
+*
+*  Z       (input/output) COMPLEX array, dimension (LDZ,N)
+*          If COMPZ = 'N': Z is not referenced.
+*          If COMPZ = 'I': on entry, Z need not be set, and on exit, Z
+*          contains the unitary matrix Z of the Schur vectors of H.
+*          If COMPZ = 'V': on entry Z must contain an N-by-N matrix Q,
+*          which is assumed to be equal to the unit matrix except for
+*          the submatrix Z(ILO:IHI,ILO:IHI); on exit Z contains Q*Z.
+*          Normally Q is the unitary matrix generated by CUNGHR after
+*          the call to CGEHRD which formed the Hessenberg matrix H.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.
+*          LDZ >= max(1,N) if COMPZ = 'I' or 'V'; LDZ >= 1 otherwise.
+*
+*  WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.  LWORK >= max(1,N).
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, CHSEQR failed to compute all the
+*                eigenvalues in a total of 30*(IHI-ILO+1) iterations;
+*                elements 1:ilo-1 and i+1:n of W contain those
+*                eigenvalues which have been successfully computed.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX            ZERO, ONE
+      PARAMETER          ( ZERO = ( 0.0E+0, 0.0E+0 ),
+     $                   ONE = ( 1.0E+0, 0.0E+0 ) )
+      REAL               RZERO, RONE, CONST
+      PARAMETER          ( RZERO = 0.0E+0, RONE = 1.0E+0,
+     $                   CONST = 1.5E+0 )
+      INTEGER            NSMAX, LDS
+      PARAMETER          ( NSMAX = 15, LDS = NSMAX )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            INITZ, LQUERY, WANTT, WANTZ
+      INTEGER            I, I1, I2, IERR, II, ITEMP, ITN, ITS, J, K, L,
+     $                   MAXB, NH, NR, NS, NV
+      REAL               OVFL, RTEMP, SMLNUM, TST1, ULP, UNFL
+      COMPLEX            CDUM, TAU, TEMP
+*     ..
+*     .. Local Arrays ..
+      REAL               RWORK( 1 )
+      COMPLEX            S( LDS, NSMAX ), V( NSMAX+1 ), VV( NSMAX+1 )
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ICAMAX, ILAENV
+      REAL               CLANHS, SLAMCH, SLAPY2
+      EXTERNAL           LSAME, ICAMAX, ILAENV, CLANHS, SLAMCH, SLAPY2
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CCOPY, CGEMV, CLACPY, CLAHQR, CLARFG, CLARFX,
+     $                   CLASET, CSCAL, CSSCAL, SLABAD, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, AIMAG, CONJG, MAX, MIN, REAL
+*     ..
+*     .. Statement Functions ..
+      REAL               CABS1
+*     ..
+*     .. Statement Function definitions ..
+      CABS1( CDUM ) = ABS( REAL( CDUM ) ) + ABS( AIMAG( CDUM ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode and test the input parameters
+*
+      WANTT = LSAME( JOB, 'S' )
+      INITZ = LSAME( COMPZ, 'I' )
+      WANTZ = INITZ .OR. LSAME( COMPZ, 'V' )
+*
+      INFO = 0
+      WORK( 1 ) = MAX( 1, N )
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( .NOT.LSAME( JOB, 'E' ) .AND. .NOT.WANTT ) THEN
+         INFO = -1
+      ELSE IF( .NOT.LSAME( COMPZ, 'N' ) .AND. .NOT.WANTZ ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, N ) ) THEN
+         INFO = -4
+      ELSE IF( IHI.LT.MIN( ILO, N ) .OR. IHI.GT.N ) THEN
+         INFO = -5
+      ELSE IF( LDH.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      ELSE IF( LDZ.LT.1 .OR. WANTZ .AND. LDZ.LT.MAX( 1, N ) ) THEN
+         INFO = -10
+      ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN
+         INFO = -12
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CHSEQR', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Initialize Z, if necessary
+*
+      IF( INITZ )
+     $   CALL CLASET( 'Full', N, N, ZERO, ONE, Z, LDZ )
+*
+*     Store the eigenvalues isolated by CGEBAL.
+*
+      DO 10 I = 1, ILO - 1
+         W( I ) = H( I, I )
+   10 CONTINUE
+      DO 20 I = IHI + 1, N
+         W( I ) = H( I, I )
+   20 CONTINUE
+*
+*     Quick return if possible.
+*
+      IF( N.EQ.0 )
+     $   RETURN
+      IF( ILO.EQ.IHI ) THEN
+         W( ILO ) = H( ILO, ILO )
+         RETURN
+      END IF
+*
+*     Set rows and columns ILO to IHI to zero below the first
+*     subdiagonal.
+*
+      DO 40 J = ILO, IHI - 2
+         DO 30 I = J + 2, N
+            H( I, J ) = ZERO
+   30    CONTINUE
+   40 CONTINUE
+      NH = IHI - ILO + 1
+*
+*     I1 and I2 are the indices of the first row and last column of H
+*     to which transformations must be applied. If eigenvalues only are
+*     being computed, I1 and I2 are re-set inside the main loop.
+*
+      IF( WANTT ) THEN
+         I1 = 1
+         I2 = N
+      ELSE
+         I1 = ILO
+         I2 = IHI
+      END IF
+*
+*     Ensure that the subdiagonal elements are real.
+*
+      DO 50 I = ILO + 1, IHI
+         TEMP = H( I, I-1 )
+         IF( AIMAG( TEMP ).NE.RZERO ) THEN
+            RTEMP = SLAPY2( REAL( TEMP ), AIMAG( TEMP ) )
+            H( I, I-1 ) = RTEMP
+            TEMP = TEMP / RTEMP
+            IF( I2.GT.I )
+     $         CALL CSCAL( I2-I, CONJG( TEMP ), H( I, I+1 ), LDH )
+            CALL CSCAL( I-I1, TEMP, H( I1, I ), 1 )
+            IF( I.LT.IHI )
+     $         H( I+1, I ) = TEMP*H( I+1, I )
+            IF( WANTZ )
+     $         CALL CSCAL( NH, TEMP, Z( ILO, I ), 1 )
+         END IF
+   50 CONTINUE
+*
+*     Determine the order of the multi-shift QR algorithm to be used.
+*
+      NS = ILAENV( 4, 'CHSEQR', JOB // COMPZ, N, ILO, IHI, -1 )
+      MAXB = ILAENV( 8, 'CHSEQR', JOB // COMPZ, N, ILO, IHI, -1 )
+      IF( NS.LE.1 .OR. NS.GT.NH .OR. MAXB.GE.NH ) THEN
+*
+*        Use the standard double-shift algorithm
+*
+         CALL CLAHQR( WANTT, WANTZ, N, ILO, IHI, H, LDH, W, ILO, IHI, Z,
+     $                LDZ, INFO )
+         RETURN
+      END IF
+      MAXB = MAX( 2, MAXB )
+      NS = MIN( NS, MAXB, NSMAX )
+*
+*     Now 1 < NS <= MAXB < NH.
+*
+*     Set machine-dependent constants for the stopping criterion.
+*     If norm(H) <= sqrt(OVFL), overflow should not occur.
+*
+      UNFL = SLAMCH( 'Safe minimum' )
+      OVFL = RONE / UNFL
+      CALL SLABAD( UNFL, OVFL )
+      ULP = SLAMCH( 'Precision' )
+      SMLNUM = UNFL*( NH / ULP )
+*
+*     ITN is the total number of multiple-shift QR iterations allowed.
+*
+      ITN = 30*NH
+*
+*     The main loop begins here. I is the loop index and decreases from
+*     IHI to ILO in steps of at most MAXB. Each iteration of the loop
+*     works with the active submatrix in rows and columns L to I.
+*     Eigenvalues I+1 to IHI have already converged. Either L = ILO, or
+*     H(L,L-1) is negligible so that the matrix splits.
+*
+      I = IHI
+   60 CONTINUE
+      IF( I.LT.ILO )
+     $   GO TO 180
+*
+*     Perform multiple-shift QR iterations on rows and columns ILO to I
+*     until a submatrix of order at most MAXB splits off at the bottom
+*     because a subdiagonal element has become negligible.
+*
+      L = ILO
+      DO 160 ITS = 0, ITN
+*
+*        Look for a single small subdiagonal element.
+*
+         DO 70 K = I, L + 1, -1
+            TST1 = CABS1( H( K-1, K-1 ) ) + CABS1( H( K, K ) )
+            IF( TST1.EQ.RZERO )
+     $         TST1 = CLANHS( '1', I-L+1, H( L, L ), LDH, RWORK )
+            IF( ABS( REAL( H( K, K-1 ) ) ).LE.MAX( ULP*TST1, SMLNUM ) )
+     $         GO TO 80
+   70    CONTINUE
+   80    CONTINUE
+         L = K
+         IF( L.GT.ILO ) THEN
+*
+*           H(L,L-1) is negligible.
+*
+            H( L, L-1 ) = ZERO
+         END IF
+*
+*        Exit from loop if a submatrix of order <= MAXB has split off.
+*
+         IF( L.GE.I-MAXB+1 )
+     $      GO TO 170
+*
+*        Now the active submatrix is in rows and columns L to I. If
+*        eigenvalues only are being computed, only the active submatrix
+*        need be transformed.
+*
+         IF( .NOT.WANTT ) THEN
+            I1 = L
+            I2 = I
+         END IF
+*
+         IF( ITS.EQ.20 .OR. ITS.EQ.30 ) THEN
+*
+*           Exceptional shifts.
+*
+            DO 90 II = I - NS + 1, I
+               W( II ) = CONST*( ABS( REAL( H( II, II-1 ) ) )+
+     $                   ABS( REAL( H( II, II ) ) ) )
+   90       CONTINUE
+         ELSE
+*
+*           Use eigenvalues of trailing submatrix of order NS as shifts.
+*
+            CALL CLACPY( 'Full', NS, NS, H( I-NS+1, I-NS+1 ), LDH, S,
+     $                   LDS )
+            CALL CLAHQR( .FALSE., .FALSE., NS, 1, NS, S, LDS,
+     $                   W( I-NS+1 ), 1, NS, Z, LDZ, IERR )
+            IF( IERR.GT.0 ) THEN
+*
+*              If CLAHQR failed to compute all NS eigenvalues, use the
+*              unconverged diagonal elements as the remaining shifts.
+*
+               DO 100 II = 1, IERR
+                  W( I-NS+II ) = S( II, II )
+  100          CONTINUE
+            END IF
+         END IF
+*
+*        Form the first column of (G-w(1)) (G-w(2)) . . . (G-w(ns))
+*        where G is the Hessenberg submatrix H(L:I,L:I) and w is
+*        the vector of shifts (stored in W). The result is
+*        stored in the local array V.
+*
+         V( 1 ) = ONE
+         DO 110 II = 2, NS + 1
+            V( II ) = ZERO
+  110    CONTINUE
+         NV = 1
+         DO 130 J = I - NS + 1, I
+            CALL CCOPY( NV+1, V, 1, VV, 1 )
+            CALL CGEMV( 'No transpose', NV+1, NV, ONE, H( L, L ), LDH,
+     $                  VV, 1, -W( J ), V, 1 )
+            NV = NV + 1
+*
+*           Scale V(1:NV) so that max(abs(V(i))) = 1. If V is zero,
+*           reset it to the unit vector.
+*
+            ITEMP = ICAMAX( NV, V, 1 )
+            RTEMP = CABS1( V( ITEMP ) )
+            IF( RTEMP.EQ.RZERO ) THEN
+               V( 1 ) = ONE
+               DO 120 II = 2, NV
+                  V( II ) = ZERO
+  120          CONTINUE
+            ELSE
+               RTEMP = MAX( RTEMP, SMLNUM )
+               CALL CSSCAL( NV, RONE / RTEMP, V, 1 )
+            END IF
+  130    CONTINUE
+*
+*        Multiple-shift QR step
+*
+         DO 150 K = L, I - 1
+*
+*           The first iteration of this loop determines a reflection G
+*           from the vector V and applies it from left and right to H,
+*           thus creating a nonzero bulge below the subdiagonal.
+*
+*           Each subsequent iteration determines a reflection G to
+*           restore the Hessenberg form in the (K-1)th column, and thus
+*           chases the bulge one step toward the bottom of the active
+*           submatrix. NR is the order of G.
+*
+            NR = MIN( NS+1, I-K+1 )
+            IF( K.GT.L )
+     $         CALL CCOPY( NR, H( K, K-1 ), 1, V, 1 )
+            CALL CLARFG( NR, V( 1 ), V( 2 ), 1, TAU )
+            IF( K.GT.L ) THEN
+               H( K, K-1 ) = V( 1 )
+               DO 140 II = K + 1, I
+                  H( II, K-1 ) = ZERO
+  140          CONTINUE
+            END IF
+            V( 1 ) = ONE
+*
+*           Apply G' from the left to transform the rows of the matrix
+*           in columns K to I2.
+*
+            CALL CLARFX( 'Left', NR, I2-K+1, V, CONJG( TAU ), H( K, K ),
+     $                   LDH, WORK )
+*
+*           Apply G from the right to transform the columns of the
+*           matrix in rows I1 to min(K+NR,I).
+*
+            CALL CLARFX( 'Right', MIN( K+NR, I )-I1+1, NR, V, TAU,
+     $                   H( I1, K ), LDH, WORK )
+*
+            IF( WANTZ ) THEN
+*
+*              Accumulate transformations in the matrix Z
+*
+               CALL CLARFX( 'Right', NH, NR, V, TAU, Z( ILO, K ), LDZ,
+     $                      WORK )
+            END IF
+  150    CONTINUE
+*
+*        Ensure that H(I,I-1) is real.
+*
+         TEMP = H( I, I-1 )
+         IF( AIMAG( TEMP ).NE.RZERO ) THEN
+            RTEMP = SLAPY2( REAL( TEMP ), AIMAG( TEMP ) )
+            H( I, I-1 ) = RTEMP
+            TEMP = TEMP / RTEMP
+            IF( I2.GT.I )
+     $         CALL CSCAL( I2-I, CONJG( TEMP ), H( I, I+1 ), LDH )
+            CALL CSCAL( I-I1, TEMP, H( I1, I ), 1 )
+            IF( WANTZ ) THEN
+               CALL CSCAL( NH, TEMP, Z( ILO, I ), 1 )
+            END IF
+         END IF
+*
+  160 CONTINUE
+*
+*     Failure to converge in remaining number of iterations
+*
+      INFO = I
+      RETURN
+*
+  170 CONTINUE
+*
+*     A submatrix of order <= MAXB in rows and columns L to I has split
+*     off. Use the double-shift QR algorithm to handle it.
+*
+      CALL CLAHQR( WANTT, WANTZ, N, L, I, H, LDH, W, ILO, IHI, Z, LDZ,
+     $             INFO )
+      IF( INFO.GT.0 )
+     $   RETURN
+*
+*     Decrement number of remaining iterations, and return to start of
+*     the main loop with a new value of I.
+*
+      ITN = ITN - ITS
+      I = L - 1
+      GO TO 60
+*
+  180 CONTINUE
+      WORK( 1 ) = MAX( 1, N )
+      RETURN
+*
+*     End of CHSEQR
+*
+      END
diff --git a/costa/native/external/lapack/clabrd.f b/costa/native/external/lapack/clabrd.f
new file mode 100644
index 000000000..aba02056c
--- /dev/null
+++ b/costa/native/external/lapack/clabrd.f
@@ -0,0 +1,329 @@
+      SUBROUTINE CLABRD( M, N, NB, A, LDA, D, E, TAUQ, TAUP, X, LDX, Y,
+     $                   LDY )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      INTEGER            LDA, LDX, LDY, M, N, NB
+*     ..
+*     .. Array Arguments ..
+      REAL               D( * ), E( * )
+      COMPLEX            A( LDA, * ), TAUP( * ), TAUQ( * ), X( LDX, * ),
+     $                   Y( LDY, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CLABRD reduces the first NB rows and columns of a complex general
+*  m by n matrix A to upper or lower real bidiagonal form by a unitary
+*  transformation Q' * A * P, and returns the matrices X and Y which
+*  are needed to apply the transformation to the unreduced part of A.
+*
+*  If m >= n, A is reduced to upper bidiagonal form; if m < n, to lower
+*  bidiagonal form.
+*
+*  This is an auxiliary routine called by CGEBRD
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows in the matrix A.
+*
+*  N       (input) INTEGER
+*          The number of columns in the matrix A.
+*
+*  NB      (input) INTEGER
+*          The number of leading rows and columns of A to be reduced.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,N)
+*          On entry, the m by n general matrix to be reduced.
+*          On exit, the first NB rows and columns of the matrix are
+*          overwritten; the rest of the array is unchanged.
+*          If m >= n, elements on and below the diagonal in the first NB
+*            columns, with the array TAUQ, represent the unitary
+*            matrix Q as a product of elementary reflectors; and
+*            elements above the diagonal in the first NB rows, with the
+*            array TAUP, represent the unitary matrix P as a product
+*            of elementary reflectors.
+*          If m < n, elements below the diagonal in the first NB
+*            columns, with the array TAUQ, represent the unitary
+*            matrix Q as a product of elementary reflectors, and
+*            elements on and above the diagonal in the first NB rows,
+*            with the array TAUP, represent the unitary matrix P as
+*            a product of elementary reflectors.
+*          See Further Details.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  D       (output) REAL array, dimension (NB)
+*          The diagonal elements of the first NB rows and columns of
+*          the reduced matrix.  D(i) = A(i,i).
+*
+*  E       (output) REAL array, dimension (NB)
+*          The off-diagonal elements of the first NB rows and columns of
+*          the reduced matrix.
+*
+*  TAUQ    (output) COMPLEX array dimension (NB)
+*          The scalar factors of the elementary reflectors which
+*          represent the unitary matrix Q. See Further Details.
+*
+*  TAUP    (output) COMPLEX array, dimension (NB)
+*          The scalar factors of the elementary reflectors which
+*          represent the unitary matrix P. See Further Details.
+*
+*  X       (output) COMPLEX array, dimension (LDX,NB)
+*          The m-by-nb matrix X required to update the unreduced part
+*          of A.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X. LDX >= max(1,M).
+*
+*  Y       (output) COMPLEX array, dimension (LDY,NB)
+*          The n-by-nb matrix Y required to update the unreduced part
+*          of A.
+*
+*  LDY     (output) INTEGER
+*          The leading dimension of the array Y. LDY >= max(1,N).
+*
+*  Further Details
+*  ===============
+*
+*  The matrices Q and P are represented as products of elementary
+*  reflectors:
+*
+*     Q = H(1) H(2) . . . H(nb)  and  P = G(1) G(2) . . . G(nb)
+*
+*  Each H(i) and G(i) has the form:
+*
+*     H(i) = I - tauq * v * v'  and G(i) = I - taup * u * u'
+*
+*  where tauq and taup are complex scalars, and v and u are complex
+*  vectors.
+*
+*  If m >= n, v(1:i-1) = 0, v(i) = 1, and v(i:m) is stored on exit in
+*  A(i:m,i); u(1:i) = 0, u(i+1) = 1, and u(i+1:n) is stored on exit in
+*  A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i).
+*
+*  If m < n, v(1:i) = 0, v(i+1) = 1, and v(i+1:m) is stored on exit in
+*  A(i+2:m,i); u(1:i-1) = 0, u(i) = 1, and u(i:n) is stored on exit in
+*  A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i).
+*
+*  The elements of the vectors v and u together form the m-by-nb matrix
+*  V and the nb-by-n matrix U' which are needed, with X and Y, to apply
+*  the transformation to the unreduced part of the matrix, using a block
+*  update of the form:  A := A - V*Y' - X*U'.
+*
+*  The contents of A on exit are illustrated by the following examples
+*  with nb = 2:
+*
+*  m = 6 and n = 5 (m > n):          m = 5 and n = 6 (m < n):
+*
+*    (  1   1   u1  u1  u1 )           (  1   u1  u1  u1  u1  u1 )
+*    (  v1  1   1   u2  u2 )           (  1   1   u2  u2  u2  u2 )
+*    (  v1  v2  a   a   a  )           (  v1  1   a   a   a   a  )
+*    (  v1  v2  a   a   a  )           (  v1  v2  a   a   a   a  )
+*    (  v1  v2  a   a   a  )           (  v1  v2  a   a   a   a  )
+*    (  v1  v2  a   a   a  )
+*
+*  where a denotes an element of the original matrix which is unchanged,
+*  vi denotes an element of the vector defining H(i), and ui an element
+*  of the vector defining G(i).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX            ZERO, ONE
+      PARAMETER          ( ZERO = ( 0.0E+0, 0.0E+0 ),
+     $                   ONE = ( 1.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I
+      COMPLEX            ALPHA
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CGEMV, CLACGV, CLARFG, CSCAL
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Quick return if possible
+*
+      IF( M.LE.0 .OR. N.LE.0 )
+     $   RETURN
+*
+      IF( M.GE.N ) THEN
+*
+*        Reduce to upper bidiagonal form
+*
+         DO 10 I = 1, NB
+*
+*           Update A(i:m,i)
+*
+            CALL CLACGV( I-1, Y( I, 1 ), LDY )
+            CALL CGEMV( 'No transpose', M-I+1, I-1, -ONE, A( I, 1 ),
+     $                  LDA, Y( I, 1 ), LDY, ONE, A( I, I ), 1 )
+            CALL CLACGV( I-1, Y( I, 1 ), LDY )
+            CALL CGEMV( 'No transpose', M-I+1, I-1, -ONE, X( I, 1 ),
+     $                  LDX, A( 1, I ), 1, ONE, A( I, I ), 1 )
+*
+*           Generate reflection Q(i) to annihilate A(i+1:m,i)
+*
+            ALPHA = A( I, I )
+            CALL CLARFG( M-I+1, ALPHA, A( MIN( I+1, M ), I ), 1,
+     $                   TAUQ( I ) )
+            D( I ) = ALPHA
+            IF( I.LT.N ) THEN
+               A( I, I ) = ONE
+*
+*              Compute Y(i+1:n,i)
+*
+               CALL CGEMV( 'Conjugate transpose', M-I+1, N-I, ONE,
+     $                     A( I, I+1 ), LDA, A( I, I ), 1, ZERO,
+     $                     Y( I+1, I ), 1 )
+               CALL CGEMV( 'Conjugate transpose', M-I+1, I-1, ONE,
+     $                     A( I, 1 ), LDA, A( I, I ), 1, ZERO,
+     $                     Y( 1, I ), 1 )
+               CALL CGEMV( 'No transpose', N-I, I-1, -ONE, Y( I+1, 1 ),
+     $                     LDY, Y( 1, I ), 1, ONE, Y( I+1, I ), 1 )
+               CALL CGEMV( 'Conjugate transpose', M-I+1, I-1, ONE,
+     $                     X( I, 1 ), LDX, A( I, I ), 1, ZERO,
+     $                     Y( 1, I ), 1 )
+               CALL CGEMV( 'Conjugate transpose', I-1, N-I, -ONE,
+     $                     A( 1, I+1 ), LDA, Y( 1, I ), 1, ONE,
+     $                     Y( I+1, I ), 1 )
+               CALL CSCAL( N-I, TAUQ( I ), Y( I+1, I ), 1 )
+*
+*              Update A(i,i+1:n)
+*
+               CALL CLACGV( N-I, A( I, I+1 ), LDA )
+               CALL CLACGV( I, A( I, 1 ), LDA )
+               CALL CGEMV( 'No transpose', N-I, I, -ONE, Y( I+1, 1 ),
+     $                     LDY, A( I, 1 ), LDA, ONE, A( I, I+1 ), LDA )
+               CALL CLACGV( I, A( I, 1 ), LDA )
+               CALL CLACGV( I-1, X( I, 1 ), LDX )
+               CALL CGEMV( 'Conjugate transpose', I-1, N-I, -ONE,
+     $                     A( 1, I+1 ), LDA, X( I, 1 ), LDX, ONE,
+     $                     A( I, I+1 ), LDA )
+               CALL CLACGV( I-1, X( I, 1 ), LDX )
+*
+*              Generate reflection P(i) to annihilate A(i,i+2:n)
+*
+               ALPHA = A( I, I+1 )
+               CALL CLARFG( N-I, ALPHA, A( I, MIN( I+2, N ) ),
+     $                      LDA, TAUP( I ) )
+               E( I ) = ALPHA
+               A( I, I+1 ) = ONE
+*
+*              Compute X(i+1:m,i)
+*
+               CALL CGEMV( 'No transpose', M-I, N-I, ONE, A( I+1, I+1 ),
+     $                     LDA, A( I, I+1 ), LDA, ZERO, X( I+1, I ), 1 )
+               CALL CGEMV( 'Conjugate transpose', N-I, I, ONE,
+     $                     Y( I+1, 1 ), LDY, A( I, I+1 ), LDA, ZERO,
+     $                     X( 1, I ), 1 )
+               CALL CGEMV( 'No transpose', M-I, I, -ONE, A( I+1, 1 ),
+     $                     LDA, X( 1, I ), 1, ONE, X( I+1, I ), 1 )
+               CALL CGEMV( 'No transpose', I-1, N-I, ONE, A( 1, I+1 ),
+     $                     LDA, A( I, I+1 ), LDA, ZERO, X( 1, I ), 1 )
+               CALL CGEMV( 'No transpose', M-I, I-1, -ONE, X( I+1, 1 ),
+     $                     LDX, X( 1, I ), 1, ONE, X( I+1, I ), 1 )
+               CALL CSCAL( M-I, TAUP( I ), X( I+1, I ), 1 )
+               CALL CLACGV( N-I, A( I, I+1 ), LDA )
+            END IF
+   10    CONTINUE
+      ELSE
+*
+*        Reduce to lower bidiagonal form
+*
+         DO 20 I = 1, NB
+*
+*           Update A(i,i:n)
+*
+            CALL CLACGV( N-I+1, A( I, I ), LDA )
+            CALL CLACGV( I-1, A( I, 1 ), LDA )
+            CALL CGEMV( 'No transpose', N-I+1, I-1, -ONE, Y( I, 1 ),
+     $                  LDY, A( I, 1 ), LDA, ONE, A( I, I ), LDA )
+            CALL CLACGV( I-1, A( I, 1 ), LDA )
+            CALL CLACGV( I-1, X( I, 1 ), LDX )
+            CALL CGEMV( 'Conjugate transpose', I-1, N-I+1, -ONE,
+     $                  A( 1, I ), LDA, X( I, 1 ), LDX, ONE, A( I, I ),
+     $                  LDA )
+            CALL CLACGV( I-1, X( I, 1 ), LDX )
+*
+*           Generate reflection P(i) to annihilate A(i,i+1:n)
+*
+            ALPHA = A( I, I )
+            CALL CLARFG( N-I+1, ALPHA, A( I, MIN( I+1, N ) ), LDA,
+     $                   TAUP( I ) )
+            D( I ) = ALPHA
+            IF( I.LT.M ) THEN
+               A( I, I ) = ONE
+*
+*              Compute X(i+1:m,i)
+*
+               CALL CGEMV( 'No transpose', M-I, N-I+1, ONE, A( I+1, I ),
+     $                     LDA, A( I, I ), LDA, ZERO, X( I+1, I ), 1 )
+               CALL CGEMV( 'Conjugate transpose', N-I+1, I-1, ONE,
+     $                     Y( I, 1 ), LDY, A( I, I ), LDA, ZERO,
+     $                     X( 1, I ), 1 )
+               CALL CGEMV( 'No transpose', M-I, I-1, -ONE, A( I+1, 1 ),
+     $                     LDA, X( 1, I ), 1, ONE, X( I+1, I ), 1 )
+               CALL CGEMV( 'No transpose', I-1, N-I+1, ONE, A( 1, I ),
+     $                     LDA, A( I, I ), LDA, ZERO, X( 1, I ), 1 )
+               CALL CGEMV( 'No transpose', M-I, I-1, -ONE, X( I+1, 1 ),
+     $                     LDX, X( 1, I ), 1, ONE, X( I+1, I ), 1 )
+               CALL CSCAL( M-I, TAUP( I ), X( I+1, I ), 1 )
+               CALL CLACGV( N-I+1, A( I, I ), LDA )
+*
+*              Update A(i+1:m,i)
+*
+               CALL CLACGV( I-1, Y( I, 1 ), LDY )
+               CALL CGEMV( 'No transpose', M-I, I-1, -ONE, A( I+1, 1 ),
+     $                     LDA, Y( I, 1 ), LDY, ONE, A( I+1, I ), 1 )
+               CALL CLACGV( I-1, Y( I, 1 ), LDY )
+               CALL CGEMV( 'No transpose', M-I, I, -ONE, X( I+1, 1 ),
+     $                     LDX, A( 1, I ), 1, ONE, A( I+1, I ), 1 )
+*
+*              Generate reflection Q(i) to annihilate A(i+2:m,i)
+*
+               ALPHA = A( I+1, I )
+               CALL CLARFG( M-I, ALPHA, A( MIN( I+2, M ), I ), 1,
+     $                      TAUQ( I ) )
+               E( I ) = ALPHA
+               A( I+1, I ) = ONE
+*
+*              Compute Y(i+1:n,i)
+*
+               CALL CGEMV( 'Conjugate transpose', M-I, N-I, ONE,
+     $                     A( I+1, I+1 ), LDA, A( I+1, I ), 1, ZERO,
+     $                     Y( I+1, I ), 1 )
+               CALL CGEMV( 'Conjugate transpose', M-I, I-1, ONE,
+     $                     A( I+1, 1 ), LDA, A( I+1, I ), 1, ZERO,
+     $                     Y( 1, I ), 1 )
+               CALL CGEMV( 'No transpose', N-I, I-1, -ONE, Y( I+1, 1 ),
+     $                     LDY, Y( 1, I ), 1, ONE, Y( I+1, I ), 1 )
+               CALL CGEMV( 'Conjugate transpose', M-I, I, ONE,
+     $                     X( I+1, 1 ), LDX, A( I+1, I ), 1, ZERO,
+     $                     Y( 1, I ), 1 )
+               CALL CGEMV( 'Conjugate transpose', I, N-I, -ONE,
+     $                     A( 1, I+1 ), LDA, Y( 1, I ), 1, ONE,
+     $                     Y( I+1, I ), 1 )
+               CALL CSCAL( N-I, TAUQ( I ), Y( I+1, I ), 1 )
+            ELSE
+               CALL CLACGV( N-I+1, A( I, I ), LDA )
+            END IF
+   20    CONTINUE
+      END IF
+      RETURN
+*
+*     End of CLABRD
+*
+      END
diff --git a/costa/native/external/lapack/clacgv.f b/costa/native/external/lapack/clacgv.f
new file mode 100644
index 000000000..a1c40dfc2
--- /dev/null
+++ b/costa/native/external/lapack/clacgv.f
@@ -0,0 +1,61 @@
+      SUBROUTINE CLACGV( N, X, INCX )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      INTEGER            INCX, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CLACGV conjugates a complex vector of length N.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The length of the vector X.  N >= 0.
+*
+*  X       (input/output) COMPLEX array, dimension
+*                         (1+(N-1)*abs(INCX))
+*          On entry, the vector of length N to be conjugated.
+*          On exit, X is overwritten with conjg(X).
+*
+*  INCX    (input) INTEGER
+*          The spacing between successive elements of X.
+*
+* =====================================================================
+*
+*     .. Local Scalars ..
+      INTEGER            I, IOFF
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          CONJG
+*     ..
+*     .. Executable Statements ..
+*
+      IF( INCX.EQ.1 ) THEN
+         DO 10 I = 1, N
+            X( I ) = CONJG( X( I ) )
+   10    CONTINUE
+      ELSE
+         IOFF = 1
+         IF( INCX.LT.0 )
+     $      IOFF = 1 - ( N-1 )*INCX
+         DO 20 I = 1, N
+            X( IOFF ) = CONJG( X( IOFF ) )
+            IOFF = IOFF + INCX
+   20    CONTINUE
+      END IF
+      RETURN
+*
+*     End of CLACGV
+*
+      END
diff --git a/costa/native/external/lapack/clacon.f b/costa/native/external/lapack/clacon.f
new file mode 100644
index 000000000..034930abe
--- /dev/null
+++ b/costa/native/external/lapack/clacon.f
@@ -0,0 +1,211 @@
+      SUBROUTINE CLACON( N, V, X, EST, KASE )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            KASE, N
+      REAL               EST
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            V( N ), X( N )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CLACON estimates the 1-norm of a square, complex matrix A.
+*  Reverse communication is used for evaluating matrix-vector products.
+*
+*  Arguments
+*  =========
+*
+*  N      (input) INTEGER
+*         The order of the matrix.  N >= 1.
+*
+*  V      (workspace) COMPLEX array, dimension (N)
+*         On the final return, V = A*W,  where  EST = norm(V)/norm(W)
+*         (W is not returned).
+*
+*  X      (input/output) COMPLEX array, dimension (N)
+*         On an intermediate return, X should be overwritten by
+*               A * X,   if KASE=1,
+*               A' * X,  if KASE=2,
+*         where A' is the conjugate transpose of A, and CLACON must be
+*         re-called with all the other parameters unchanged.
+*
+*  EST    (output) REAL
+*         An estimate (a lower bound) for norm(A).
+*
+*  KASE   (input/output) INTEGER
+*         On the initial call to CLACON, KASE should be 0.
+*         On an intermediate return, KASE will be 1 or 2, indicating
+*         whether X should be overwritten by A * X  or A' * X.
+*         On the final return from CLACON, KASE will again be 0.
+*
+*  Further Details
+*  ======= =======
+*
+*  Contributed by Nick Higham, University of Manchester.
+*  Originally named CONEST, dated March 16, 1988.
+*
+*  Reference: N.J. Higham, "FORTRAN codes for estimating the one-norm of
+*  a real or complex matrix, with applications to condition estimation",
+*  ACM Trans. Math. Soft., vol. 14, no. 4, pp. 381-396, December 1988.
+*
+*  Last modified:  April, 1999
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            ITMAX
+      PARAMETER          ( ITMAX = 5 )
+      REAL               ONE, TWO
+      PARAMETER          ( ONE = 1.0E0, TWO = 2.0E0 )
+      COMPLEX            CZERO, CONE
+      PARAMETER          ( CZERO = ( 0.0E0, 0.0E0 ),
+     $                   CONE = ( 1.0E0, 0.0E0 ) )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, ITER, J, JLAST, JUMP
+      REAL               ABSXI, ALTSGN, ESTOLD, SAFMIN, TEMP
+*     ..
+*     .. External Functions ..
+      INTEGER            ICMAX1
+      REAL               SCSUM1, SLAMCH
+      EXTERNAL           ICMAX1, SCSUM1, SLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CCOPY
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, AIMAG, CMPLX, REAL
+*     ..
+*     .. Save statement ..
+      SAVE
+*     ..
+*     .. Executable Statements ..
+*
+      SAFMIN = SLAMCH( 'Safe minimum' )
+      IF( KASE.EQ.0 ) THEN
+         DO 10 I = 1, N
+            X( I ) = CMPLX( ONE / REAL( N ) )
+   10    CONTINUE
+         KASE = 1
+         JUMP = 1
+         RETURN
+      END IF
+*
+      GO TO ( 20, 40, 70, 90, 120 )JUMP
+*
+*     ................ ENTRY   (JUMP = 1)
+*     FIRST ITERATION.  X HAS BEEN OVERWRITTEN BY A*X.
+*
+   20 CONTINUE
+      IF( N.EQ.1 ) THEN
+         V( 1 ) = X( 1 )
+         EST = ABS( V( 1 ) )
+*        ... QUIT
+         GO TO 130
+      END IF
+      EST = SCSUM1( N, X, 1 )
+*
+      DO 30 I = 1, N
+         ABSXI = ABS( X( I ) )
+         IF( ABSXI.GT.SAFMIN ) THEN
+            X( I ) = CMPLX( REAL( X( I ) ) / ABSXI,
+     $               AIMAG( X( I ) ) / ABSXI )
+         ELSE
+            X( I ) = CONE
+         END IF
+   30 CONTINUE
+      KASE = 2
+      JUMP = 2
+      RETURN
+*
+*     ................ ENTRY   (JUMP = 2)
+*     FIRST ITERATION.  X HAS BEEN OVERWRITTEN BY CTRANS(A)*X.
+*
+   40 CONTINUE
+      J = ICMAX1( N, X, 1 )
+      ITER = 2
+*
+*     MAIN LOOP - ITERATIONS 2,3,...,ITMAX.
+*
+   50 CONTINUE
+      DO 60 I = 1, N
+         X( I ) = CZERO
+   60 CONTINUE
+      X( J ) = CONE
+      KASE = 1
+      JUMP = 3
+      RETURN
+*
+*     ................ ENTRY   (JUMP = 3)
+*     X HAS BEEN OVERWRITTEN BY A*X.
+*
+   70 CONTINUE
+      CALL CCOPY( N, X, 1, V, 1 )
+      ESTOLD = EST
+      EST = SCSUM1( N, V, 1 )
+*
+*     TEST FOR CYCLING.
+      IF( EST.LE.ESTOLD )
+     $   GO TO 100
+*
+      DO 80 I = 1, N
+         ABSXI = ABS( X( I ) )
+         IF( ABSXI.GT.SAFMIN ) THEN
+            X( I ) = CMPLX( REAL( X( I ) ) / ABSXI,
+     $               AIMAG( X( I ) ) / ABSXI )
+         ELSE
+            X( I ) = CONE
+         END IF
+   80 CONTINUE
+      KASE = 2
+      JUMP = 4
+      RETURN
+*
+*     ................ ENTRY   (JUMP = 4)
+*     X HAS BEEN OVERWRITTEN BY CTRANS(A)*X.
+*
+   90 CONTINUE
+      JLAST = J
+      J = ICMAX1( N, X, 1 )
+      IF( ( ABS( X( JLAST ) ).NE.ABS( X( J ) ) ) .AND.
+     $    ( ITER.LT.ITMAX ) ) THEN
+         ITER = ITER + 1
+         GO TO 50
+      END IF
+*
+*     ITERATION COMPLETE.  FINAL STAGE.
+*
+  100 CONTINUE
+      ALTSGN = ONE
+      DO 110 I = 1, N
+         X( I ) = CMPLX( ALTSGN*( ONE+REAL( I-1 ) / REAL( N-1 ) ) )
+         ALTSGN = -ALTSGN
+  110 CONTINUE
+      KASE = 1
+      JUMP = 5
+      RETURN
+*
+*     ................ ENTRY   (JUMP = 5)
+*     X HAS BEEN OVERWRITTEN BY A*X.
+*
+  120 CONTINUE
+      TEMP = TWO*( SCSUM1( N, X, 1 ) / REAL( 3*N ) )
+      IF( TEMP.GT.EST ) THEN
+         CALL CCOPY( N, X, 1, V, 1 )
+         EST = TEMP
+      END IF
+*
+  130 CONTINUE
+      KASE = 0
+      RETURN
+*
+*     End of CLACON
+*
+      END
diff --git a/costa/native/external/lapack/clacp2.f b/costa/native/external/lapack/clacp2.f
new file mode 100644
index 000000000..ca7805ae6
--- /dev/null
+++ b/costa/native/external/lapack/clacp2.f
@@ -0,0 +1,92 @@
+      SUBROUTINE CLACP2( UPLO, M, N, A, LDA, B, LDB )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            LDA, LDB, M, N
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * )
+      COMPLEX            B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CLACP2 copies all or part of a real two-dimensional matrix A to a
+*  complex matrix B.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies the part of the matrix A to be copied to B.
+*          = 'U':      Upper triangular part
+*          = 'L':      Lower triangular part
+*          Otherwise:  All of the matrix A
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  A       (input) REAL array, dimension (LDA,N)
+*          The m by n matrix A.  If UPLO = 'U', only the upper trapezium
+*          is accessed; if UPLO = 'L', only the lower trapezium is
+*          accessed.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  B       (output) COMPLEX array, dimension (LDB,N)
+*          On exit, B = A in the locations specified by UPLO.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,M).
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      INTEGER            I, J
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MIN
+*     ..
+*     .. Executable Statements ..
+*
+      IF( LSAME( UPLO, 'U' ) ) THEN
+         DO 20 J = 1, N
+            DO 10 I = 1, MIN( J, M )
+               B( I, J ) = A( I, J )
+   10       CONTINUE
+   20    CONTINUE
+*
+      ELSE IF( LSAME( UPLO, 'L' ) ) THEN
+         DO 40 J = 1, N
+            DO 30 I = J, M
+               B( I, J ) = A( I, J )
+   30       CONTINUE
+   40    CONTINUE
+*
+      ELSE
+         DO 60 J = 1, N
+            DO 50 I = 1, M
+               B( I, J ) = A( I, J )
+   50       CONTINUE
+   60    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of CLACP2
+*
+      END
diff --git a/costa/native/external/lapack/clacpy.f b/costa/native/external/lapack/clacpy.f
new file mode 100644
index 000000000..ca983e1c4
--- /dev/null
+++ b/costa/native/external/lapack/clacpy.f
@@ -0,0 +1,91 @@
+      SUBROUTINE CLACPY( UPLO, M, N, A, LDA, B, LDB )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            LDA, LDB, M, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CLACPY copies all or part of a two-dimensional matrix A to another
+*  matrix B.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies the part of the matrix A to be copied to B.
+*          = 'U':      Upper triangular part
+*          = 'L':      Lower triangular part
+*          Otherwise:  All of the matrix A
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  A       (input) COMPLEX array, dimension (LDA,N)
+*          The m by n matrix A.  If UPLO = 'U', only the upper trapezium
+*          is accessed; if UPLO = 'L', only the lower trapezium is
+*          accessed.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  B       (output) COMPLEX array, dimension (LDB,N)
+*          On exit, B = A in the locations specified by UPLO.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,M).
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      INTEGER            I, J
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MIN
+*     ..
+*     .. Executable Statements ..
+*
+      IF( LSAME( UPLO, 'U' ) ) THEN
+         DO 20 J = 1, N
+            DO 10 I = 1, MIN( J, M )
+               B( I, J ) = A( I, J )
+   10       CONTINUE
+   20    CONTINUE
+*
+      ELSE IF( LSAME( UPLO, 'L' ) ) THEN
+         DO 40 J = 1, N
+            DO 30 I = J, M
+               B( I, J ) = A( I, J )
+   30       CONTINUE
+   40    CONTINUE
+*
+      ELSE
+         DO 60 J = 1, N
+            DO 50 I = 1, M
+               B( I, J ) = A( I, J )
+   50       CONTINUE
+   60    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of CLACPY
+*
+      END
diff --git a/costa/native/external/lapack/clacrm.f b/costa/native/external/lapack/clacrm.f
new file mode 100644
index 000000000..15647c1f9
--- /dev/null
+++ b/costa/native/external/lapack/clacrm.f
@@ -0,0 +1,111 @@
+      SUBROUTINE CLACRM( M, N, A, LDA, B, LDB, C, LDC, RWORK )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      INTEGER            LDA, LDB, LDC, M, N
+*     ..
+*     .. Array Arguments ..
+      REAL               B( LDB, * ), RWORK( * )
+      COMPLEX            A( LDA, * ), C( LDC, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CLACRM performs a very simple matrix-matrix multiplication:
+*           C := A * B,
+*  where A is M by N and complex; B is N by N and real;
+*  C is M by N and complex.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A and of the matrix C.
+*          M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns and rows of the matrix B and
+*          the number of columns of the matrix C.
+*          N >= 0.
+*
+*  A       (input) COMPLEX array, dimension (LDA, N)
+*          A contains the M by N matrix A.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >=max(1,M).
+*
+*  B       (input) REAL array, dimension (LDB, N)
+*          B contains the N by N matrix B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B. LDB >=max(1,N).
+*
+*  C       (input) COMPLEX array, dimension (LDC, N)
+*          C contains the M by N matrix C.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C. LDC >=max(1,N).
+*
+*  RWORK   (workspace) REAL array, dimension (2*M*N)
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E0, ZERO = 0.0E0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J, L
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          AIMAG, CMPLX, REAL
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SGEMM
+*     ..
+*     .. Executable Statements ..
+*
+*     Quick return if possible.
+*
+      IF( ( M.EQ.0 ) .OR. ( N.EQ.0 ) )
+     $   RETURN
+*
+      DO 20 J = 1, N
+         DO 10 I = 1, M
+            RWORK( ( J-1 )*M+I ) = REAL( A( I, J ) )
+   10    CONTINUE
+   20 CONTINUE
+*
+      L = M*N + 1
+      CALL SGEMM( 'N', 'N', M, N, N, ONE, RWORK, M, B, LDB, ZERO,
+     $            RWORK( L ), M )
+      DO 40 J = 1, N
+         DO 30 I = 1, M
+            C( I, J ) = RWORK( L+( J-1 )*M+I-1 )
+   30    CONTINUE
+   40 CONTINUE
+*
+      DO 60 J = 1, N
+         DO 50 I = 1, M
+            RWORK( ( J-1 )*M+I ) = AIMAG( A( I, J ) )
+   50    CONTINUE
+   60 CONTINUE
+      CALL SGEMM( 'N', 'N', M, N, N, ONE, RWORK, M, B, LDB, ZERO,
+     $            RWORK( L ), M )
+      DO 80 J = 1, N
+         DO 70 I = 1, M
+            C( I, J ) = CMPLX( REAL( C( I, J ) ),
+     $                  RWORK( L+( J-1 )*M+I-1 ) )
+   70    CONTINUE
+   80 CONTINUE
+*
+      RETURN
+*
+*     End of CLACRM
+*
+      END
diff --git a/costa/native/external/lapack/clacrt.f b/costa/native/external/lapack/clacrt.f
new file mode 100644
index 000000000..5211de83a
--- /dev/null
+++ b/costa/native/external/lapack/clacrt.f
@@ -0,0 +1,91 @@
+      SUBROUTINE CLACRT( N, CX, INCX, CY, INCY, C, S )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      INTEGER            INCX, INCY, N
+      COMPLEX            C, S
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            CX( * ), CY( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CLACRT performs the operation
+*
+*     (  c  s )( x )  ==> ( x )
+*     ( -s  c )( y )      ( y )
+*
+*  where c and s are complex and the vectors x and y are complex.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The number of elements in the vectors CX and CY.
+*
+*  CX      (input/output) COMPLEX array, dimension (N)
+*          On input, the vector x.
+*          On output, CX is overwritten with c*x + s*y.
+*
+*  INCX    (input) INTEGER
+*          The increment between successive values of CX.  INCX <> 0.
+*
+*  CY      (input/output) COMPLEX array, dimension (N)
+*          On input, the vector y.
+*          On output, CY is overwritten with -s*x + c*y.
+*
+*  INCY    (input) INTEGER
+*          The increment between successive values of CY.  INCY <> 0.
+*
+*  C       (input) COMPLEX
+*  S       (input) COMPLEX
+*          C and S define the matrix
+*             [  C   S  ].
+*             [ -S   C  ]
+*
+* =====================================================================
+*
+*     .. Local Scalars ..
+      INTEGER            I, IX, IY
+      COMPLEX            CTEMP
+*     ..
+*     .. Executable Statements ..
+*
+      IF( N.LE.0 )
+     $   RETURN
+      IF( INCX.EQ.1 .AND. INCY.EQ.1 )
+     $   GO TO 20
+*
+*     Code for unequal increments or equal increments not equal to 1
+*
+      IX = 1
+      IY = 1
+      IF( INCX.LT.0 )
+     $   IX = ( -N+1 )*INCX + 1
+      IF( INCY.LT.0 )
+     $   IY = ( -N+1 )*INCY + 1
+      DO 10 I = 1, N
+         CTEMP = C*CX( IX ) + S*CY( IY )
+         CY( IY ) = C*CY( IY ) - S*CX( IX )
+         CX( IX ) = CTEMP
+         IX = IX + INCX
+         IY = IY + INCY
+   10 CONTINUE
+      RETURN
+*
+*     Code for both increments equal to 1
+*
+   20 CONTINUE
+      DO 30 I = 1, N
+         CTEMP = C*CX( I ) + S*CY( I )
+         CY( I ) = C*CY( I ) - S*CX( I )
+         CX( I ) = CTEMP
+   30 CONTINUE
+      RETURN
+      END
diff --git a/costa/native/external/lapack/cladiv.f b/costa/native/external/lapack/cladiv.f
new file mode 100644
index 000000000..64d22f83f
--- /dev/null
+++ b/costa/native/external/lapack/cladiv.f
@@ -0,0 +1,47 @@
+      COMPLEX FUNCTION CLADIV( X, Y )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      COMPLEX            X, Y
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CLADIV := X / Y, where X and Y are complex.  The computation of X / Y
+*  will not overflow on an intermediary step unless the results
+*  overflows.
+*
+*  Arguments
+*  =========
+*
+*  X       (input) COMPLEX
+*  Y       (input) COMPLEX
+*          The complex scalars X and Y.
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      REAL               ZI, ZR
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLADIV
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          AIMAG, CMPLX, REAL
+*     ..
+*     .. Executable Statements ..
+*
+      CALL SLADIV( REAL( X ), AIMAG( X ), REAL( Y ), AIMAG( Y ), ZR,
+     $             ZI )
+      CLADIV = CMPLX( ZR, ZI )
+*
+      RETURN
+*
+*     End of CLADIV
+*
+      END
diff --git a/costa/native/external/lapack/claed0.f b/costa/native/external/lapack/claed0.f
new file mode 100644
index 000000000..6695249cc
--- /dev/null
+++ b/costa/native/external/lapack/claed0.f
@@ -0,0 +1,289 @@
+      SUBROUTINE CLAED0( QSIZ, N, D, E, Q, LDQ, QSTORE, LDQS, RWORK,
+     $                   IWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDQ, LDQS, N, QSIZ
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      REAL               D( * ), E( * ), RWORK( * )
+      COMPLEX            Q( LDQ, * ), QSTORE( LDQS, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  Using the divide and conquer method, CLAED0 computes all eigenvalues
+*  of a symmetric tridiagonal matrix which is one diagonal block of
+*  those from reducing a dense or band Hermitian matrix and
+*  corresponding eigenvectors of the dense or band matrix.
+*
+*  Arguments
+*  =========
+*
+*  QSIZ   (input) INTEGER
+*         The dimension of the unitary matrix used to reduce
+*         the full matrix to tridiagonal form.  QSIZ >= N if ICOMPQ = 1.
+*
+*  N      (input) INTEGER
+*         The dimension of the symmetric tridiagonal matrix.  N >= 0.
+*
+*  D      (input/output) REAL array, dimension (N)
+*         On entry, the diagonal elements of the tridiagonal matrix.
+*         On exit, the eigenvalues in ascending order.
+*
+*  E      (input/output) REAL array, dimension (N-1)
+*         On entry, the off-diagonal elements of the tridiagonal matrix.
+*         On exit, E has been destroyed.
+*
+*  Q      (input/output) COMPLEX array, dimension (LDQ,N)
+*         On entry, Q must contain an QSIZ x N matrix whose columns
+*         unitarily orthonormal. It is a part of the unitary matrix
+*         that reduces the full dense Hermitian matrix to a
+*         (reducible) symmetric tridiagonal matrix.
+*
+*  LDQ    (input) INTEGER
+*         The leading dimension of the array Q.  LDQ >= max(1,N).
+*
+*  IWORK  (workspace) INTEGER array,
+*         the dimension of IWORK must be at least
+*                      6 + 6*N + 5*N*lg N
+*                      ( lg( N ) = smallest integer k
+*                                  such that 2^k >= N )
+*
+*  RWORK  (workspace) REAL array,
+*                               dimension (1 + 3*N + 2*N*lg N + 3*N**2)
+*                        ( lg( N ) = smallest integer k
+*                                    such that 2^k >= N )
+*
+*  QSTORE (workspace) COMPLEX array, dimension (LDQS, N)
+*         Used to store parts of
+*         the eigenvector matrix when the updating matrix multiplies
+*         take place.
+*
+*  LDQS   (input) INTEGER
+*         The leading dimension of the array QSTORE.
+*         LDQS >= max(1,N).
+*
+*  INFO   (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          > 0:  The algorithm failed to compute an eigenvalue while
+*                working on the submatrix lying in rows and columns
+*                INFO/(N+1) through mod(INFO,N+1).
+*
+*  =====================================================================
+*
+*  Warning:      N could be as big as QSIZ!
+*
+*     .. Parameters ..
+      REAL               TWO
+      PARAMETER          ( TWO = 2.E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            CURLVL, CURPRB, CURR, I, IGIVCL, IGIVNM,
+     $                   IGIVPT, INDXQ, IPERM, IPRMPT, IQ, IQPTR, IWREM,
+     $                   J, K, LGN, LL, MATSIZ, MSD2, SMLSIZ, SMM1,
+     $                   SPM1, SPM2, SUBMAT, SUBPBS, TLVLS
+      REAL               TEMP
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CCOPY, CLACRM, CLAED7, SCOPY, SSTEQR, XERBLA
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, INT, LOG, MAX, REAL
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+*
+*     IF( ICOMPQ .LT. 0 .OR. ICOMPQ .GT. 2 ) THEN
+*        INFO = -1
+*     ELSE IF( ( ICOMPQ .EQ. 1 ) .AND. ( QSIZ .LT. MAX( 0, N ) ) )
+*    $        THEN
+      IF( QSIZ.LT.MAX( 0, N ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN
+         INFO = -6
+      ELSE IF( LDQS.LT.MAX( 1, N ) ) THEN
+         INFO = -8
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CLAED0', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      SMLSIZ = ILAENV( 9, 'CLAED0', ' ', 0, 0, 0, 0 )
+*
+*     Determine the size and placement of the submatrices, and save in
+*     the leading elements of IWORK.
+*
+      IWORK( 1 ) = N
+      SUBPBS = 1
+      TLVLS = 0
+   10 CONTINUE
+      IF( IWORK( SUBPBS ).GT.SMLSIZ ) THEN
+         DO 20 J = SUBPBS, 1, -1
+            IWORK( 2*J ) = ( IWORK( J )+1 ) / 2
+            IWORK( 2*J-1 ) = IWORK( J ) / 2
+   20    CONTINUE
+         TLVLS = TLVLS + 1
+         SUBPBS = 2*SUBPBS
+         GO TO 10
+      END IF
+      DO 30 J = 2, SUBPBS
+         IWORK( J ) = IWORK( J ) + IWORK( J-1 )
+   30 CONTINUE
+*
+*     Divide the matrix into SUBPBS submatrices of size at most SMLSIZ+1
+*     using rank-1 modifications (cuts).
+*
+      SPM1 = SUBPBS - 1
+      DO 40 I = 1, SPM1
+         SUBMAT = IWORK( I ) + 1
+         SMM1 = SUBMAT - 1
+         D( SMM1 ) = D( SMM1 ) - ABS( E( SMM1 ) )
+         D( SUBMAT ) = D( SUBMAT ) - ABS( E( SMM1 ) )
+   40 CONTINUE
+*
+      INDXQ = 4*N + 3
+*
+*     Set up workspaces for eigenvalues only/accumulate new vectors
+*     routine
+*
+      TEMP = LOG( REAL( N ) ) / LOG( TWO )
+      LGN = INT( TEMP )
+      IF( 2**LGN.LT.N )
+     $   LGN = LGN + 1
+      IF( 2**LGN.LT.N )
+     $   LGN = LGN + 1
+      IPRMPT = INDXQ + N + 1
+      IPERM = IPRMPT + N*LGN
+      IQPTR = IPERM + N*LGN
+      IGIVPT = IQPTR + N + 2
+      IGIVCL = IGIVPT + N*LGN
+*
+      IGIVNM = 1
+      IQ = IGIVNM + 2*N*LGN
+      IWREM = IQ + N**2 + 1
+*     Initialize pointers
+      DO 50 I = 0, SUBPBS
+         IWORK( IPRMPT+I ) = 1
+         IWORK( IGIVPT+I ) = 1
+   50 CONTINUE
+      IWORK( IQPTR ) = 1
+*
+*     Solve each submatrix eigenproblem at the bottom of the divide and
+*     conquer tree.
+*
+      CURR = 0
+      DO 70 I = 0, SPM1
+         IF( I.EQ.0 ) THEN
+            SUBMAT = 1
+            MATSIZ = IWORK( 1 )
+         ELSE
+            SUBMAT = IWORK( I ) + 1
+            MATSIZ = IWORK( I+1 ) - IWORK( I )
+         END IF
+         LL = IQ - 1 + IWORK( IQPTR+CURR )
+         CALL SSTEQR( 'I', MATSIZ, D( SUBMAT ), E( SUBMAT ),
+     $                RWORK( LL ), MATSIZ, RWORK, INFO )
+         CALL CLACRM( QSIZ, MATSIZ, Q( 1, SUBMAT ), LDQ, RWORK( LL ),
+     $                MATSIZ, QSTORE( 1, SUBMAT ), LDQS,
+     $                RWORK( IWREM ) )
+         IWORK( IQPTR+CURR+1 ) = IWORK( IQPTR+CURR ) + MATSIZ**2
+         CURR = CURR + 1
+         IF( INFO.GT.0 ) THEN
+            INFO = SUBMAT*( N+1 ) + SUBMAT + MATSIZ - 1
+            RETURN
+         END IF
+         K = 1
+         DO 60 J = SUBMAT, IWORK( I+1 )
+            IWORK( INDXQ+J ) = K
+            K = K + 1
+   60    CONTINUE
+   70 CONTINUE
+*
+*     Successively merge eigensystems of adjacent submatrices
+*     into eigensystem for the corresponding larger matrix.
+*
+*     while ( SUBPBS > 1 )
+*
+      CURLVL = 1
+   80 CONTINUE
+      IF( SUBPBS.GT.1 ) THEN
+         SPM2 = SUBPBS - 2
+         DO 90 I = 0, SPM2, 2
+            IF( I.EQ.0 ) THEN
+               SUBMAT = 1
+               MATSIZ = IWORK( 2 )
+               MSD2 = IWORK( 1 )
+               CURPRB = 0
+            ELSE
+               SUBMAT = IWORK( I ) + 1
+               MATSIZ = IWORK( I+2 ) - IWORK( I )
+               MSD2 = MATSIZ / 2
+               CURPRB = CURPRB + 1
+            END IF
+*
+*     Merge lower order eigensystems (of size MSD2 and MATSIZ - MSD2)
+*     into an eigensystem of size MATSIZ.  CLAED7 handles the case
+*     when the eigenvectors of a full or band Hermitian matrix (which
+*     was reduced to tridiagonal form) are desired.
+*
+*     I am free to use Q as a valuable working space until Loop 150.
+*
+            CALL CLAED7( MATSIZ, MSD2, QSIZ, TLVLS, CURLVL, CURPRB,
+     $                   D( SUBMAT ), QSTORE( 1, SUBMAT ), LDQS,
+     $                   E( SUBMAT+MSD2-1 ), IWORK( INDXQ+SUBMAT ),
+     $                   RWORK( IQ ), IWORK( IQPTR ), IWORK( IPRMPT ),
+     $                   IWORK( IPERM ), IWORK( IGIVPT ),
+     $                   IWORK( IGIVCL ), RWORK( IGIVNM ),
+     $                   Q( 1, SUBMAT ), RWORK( IWREM ),
+     $                   IWORK( SUBPBS+1 ), INFO )
+            IF( INFO.GT.0 ) THEN
+               INFO = SUBMAT*( N+1 ) + SUBMAT + MATSIZ - 1
+               RETURN
+            END IF
+            IWORK( I / 2+1 ) = IWORK( I+2 )
+   90    CONTINUE
+         SUBPBS = SUBPBS / 2
+         CURLVL = CURLVL + 1
+         GO TO 80
+      END IF
+*
+*     end while
+*
+*     Re-merge the eigenvalues/vectors which were deflated at the final
+*     merge step.
+*
+      DO 100 I = 1, N
+         J = IWORK( INDXQ+I )
+         RWORK( I ) = D( J )
+         CALL CCOPY( QSIZ, QSTORE( 1, J ), 1, Q( 1, I ), 1 )
+  100 CONTINUE
+      CALL SCOPY( N, RWORK, 1, D, 1 )
+*
+      RETURN
+*
+*     End of CLAED0
+*
+      END
diff --git a/costa/native/external/lapack/claed7.f b/costa/native/external/lapack/claed7.f
new file mode 100644
index 000000000..83af549ed
--- /dev/null
+++ b/costa/native/external/lapack/claed7.f
@@ -0,0 +1,267 @@
+      SUBROUTINE CLAED7( N, CUTPNT, QSIZ, TLVLS, CURLVL, CURPBM, D, Q,
+     $                   LDQ, RHO, INDXQ, QSTORE, QPTR, PRMPTR, PERM,
+     $                   GIVPTR, GIVCOL, GIVNUM, WORK, RWORK, IWORK,
+     $                   INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      INTEGER            CURLVL, CURPBM, CUTPNT, INFO, LDQ, N, QSIZ,
+     $                   TLVLS
+      REAL               RHO
+*     ..
+*     .. Array Arguments ..
+      INTEGER            GIVCOL( 2, * ), GIVPTR( * ), INDXQ( * ),
+     $                   IWORK( * ), PERM( * ), PRMPTR( * ), QPTR( * )
+      REAL               D( * ), GIVNUM( 2, * ), QSTORE( * ), RWORK( * )
+      COMPLEX            Q( LDQ, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CLAED7 computes the updated eigensystem of a diagonal
+*  matrix after modification by a rank-one symmetric matrix. This
+*  routine is used only for the eigenproblem which requires all
+*  eigenvalues and optionally eigenvectors of a dense or banded
+*  Hermitian matrix that has been reduced to tridiagonal form.
+*
+*    T = Q(in) ( D(in) + RHO * Z*Z' ) Q'(in) = Q(out) * D(out) * Q'(out)
+*
+*    where Z = Q'u, u is a vector of length N with ones in the
+*    CUTPNT and CUTPNT + 1 th elements and zeros elsewhere.
+*
+*     The eigenvectors of the original matrix are stored in Q, and the
+*     eigenvalues are in D.  The algorithm consists of three stages:
+*
+*        The first stage consists of deflating the size of the problem
+*        when there are multiple eigenvalues or if there is a zero in
+*        the Z vector.  For each such occurence the dimension of the
+*        secular equation problem is reduced by one.  This stage is
+*        performed by the routine SLAED2.
+*
+*        The second stage consists of calculating the updated
+*        eigenvalues. This is done by finding the roots of the secular
+*        equation via the routine SLAED4 (as called by SLAED3).
+*        This routine also calculates the eigenvectors of the current
+*        problem.
+*
+*        The final stage consists of computing the updated eigenvectors
+*        directly using the updated eigenvalues.  The eigenvectors for
+*        the current problem are multiplied with the eigenvectors from
+*        the overall problem.
+*
+*  Arguments
+*  =========
+*
+*  N      (input) INTEGER
+*         The dimension of the symmetric tridiagonal matrix.  N >= 0.
+*
+*  CUTPNT (input) INTEGER
+*         Contains the location of the last eigenvalue in the leading
+*         sub-matrix.  min(1,N) <= CUTPNT <= N.
+*
+*  QSIZ   (input) INTEGER
+*         The dimension of the unitary matrix used to reduce
+*         the full matrix to tridiagonal form.  QSIZ >= N.
+*
+*  TLVLS  (input) INTEGER
+*         The total number of merging levels in the overall divide and
+*         conquer tree.
+*
+*  CURLVL (input) INTEGER
+*         The current level in the overall merge routine,
+*         0 <= curlvl <= tlvls.
+*
+*  CURPBM (input) INTEGER
+*         The current problem in the current level in the overall
+*         merge routine (counting from upper left to lower right).
+*
+*  D      (input/output) REAL array, dimension (N)
+*         On entry, the eigenvalues of the rank-1-perturbed matrix.
+*         On exit, the eigenvalues of the repaired matrix.
+*
+*  Q      (input/output) COMPLEX array, dimension (LDQ,N)
+*         On entry, the eigenvectors of the rank-1-perturbed matrix.
+*         On exit, the eigenvectors of the repaired tridiagonal matrix.
+*
+*  LDQ    (input) INTEGER
+*         The leading dimension of the array Q.  LDQ >= max(1,N).
+*
+*  RHO    (input) REAL
+*         Contains the subdiagonal element used to create the rank-1
+*         modification.
+*
+*  INDXQ  (output) INTEGER array, dimension (N)
+*         This contains the permutation which will reintegrate the
+*         subproblem just solved back into sorted order,
+*         ie. D( INDXQ( I = 1, N ) ) will be in ascending order.
+*
+*  IWORK  (workspace) INTEGER array, dimension (4*N)
+*
+*  RWORK  (workspace) REAL array,
+*                                 dimension (3*N+2*QSIZ*N)
+*
+*  WORK   (workspace) COMPLEX array, dimension (QSIZ*N)
+*
+*  QSTORE (input/output) REAL array, dimension (N**2+1)
+*         Stores eigenvectors of submatrices encountered during
+*         divide and conquer, packed together. QPTR points to
+*         beginning of the submatrices.
+*
+*  QPTR   (input/output) INTEGER array, dimension (N+2)
+*         List of indices pointing to beginning of submatrices stored
+*         in QSTORE. The submatrices are numbered starting at the
+*         bottom left of the divide and conquer tree, from left to
+*         right and bottom to top.
+*
+*  PRMPTR (input) INTEGER array, dimension (N lg N)
+*         Contains a list of pointers which indicate where in PERM a
+*         level's permutation is stored.  PRMPTR(i+1) - PRMPTR(i)
+*         indicates the size of the permutation and also the size of
+*         the full, non-deflated problem.
+*
+*  PERM   (input) INTEGER array, dimension (N lg N)
+*         Contains the permutations (from deflation and sorting) to be
+*         applied to each eigenblock.
+*
+*  GIVPTR (input) INTEGER array, dimension (N lg N)
+*         Contains a list of pointers which indicate where in GIVCOL a
+*         level's Givens rotations are stored.  GIVPTR(i+1) - GIVPTR(i)
+*         indicates the number of Givens rotations.
+*
+*  GIVCOL (input) INTEGER array, dimension (2, N lg N)
+*         Each pair of numbers indicates a pair of columns to take place
+*         in a Givens rotation.
+*
+*  GIVNUM (input) REAL array, dimension (2, N lg N)
+*         Each number indicates the S value to be used in the
+*         corresponding Givens rotation.
+*
+*  INFO   (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          > 0:  if INFO = 1, an eigenvalue did not converge
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      INTEGER            COLTYP, CURR, I, IDLMDA, IND1, IND2, INDX,
+     $                   INDXC, INDXP, IQ, IW, IZ, K, N1, N2, PTR
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CLACRM, CLAED8, SLAED9, SLAEDA, SLAMRG, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+*
+*     IF( ICOMPQ.LT.0 .OR. ICOMPQ.GT.1 ) THEN
+*        INFO = -1
+*     ELSE IF( N.LT.0 ) THEN
+      IF( N.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( MIN( 1, N ).GT.CUTPNT .OR. N.LT.CUTPNT ) THEN
+         INFO = -2
+      ELSE IF( QSIZ.LT.N ) THEN
+         INFO = -3
+      ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN
+         INFO = -9
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CLAED7', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     The following values are for bookkeeping purposes only.  They are
+*     integer pointers which indicate the portion of the workspace
+*     used by a particular array in SLAED2 and SLAED3.
+*
+      IZ = 1
+      IDLMDA = IZ + N
+      IW = IDLMDA + N
+      IQ = IW + N
+*
+      INDX = 1
+      INDXC = INDX + N
+      COLTYP = INDXC + N
+      INDXP = COLTYP + N
+*
+*     Form the z-vector which consists of the last row of Q_1 and the
+*     first row of Q_2.
+*
+      PTR = 1 + 2**TLVLS
+      DO 10 I = 1, CURLVL - 1
+         PTR = PTR + 2**( TLVLS-I )
+   10 CONTINUE
+      CURR = PTR + CURPBM
+      CALL SLAEDA( N, TLVLS, CURLVL, CURPBM, PRMPTR, PERM, GIVPTR,
+     $             GIVCOL, GIVNUM, QSTORE, QPTR, RWORK( IZ ),
+     $             RWORK( IZ+N ), INFO )
+*
+*     When solving the final problem, we no longer need the stored data,
+*     so we will overwrite the data from this level onto the previously
+*     used storage space.
+*
+      IF( CURLVL.EQ.TLVLS ) THEN
+         QPTR( CURR ) = 1
+         PRMPTR( CURR ) = 1
+         GIVPTR( CURR ) = 1
+      END IF
+*
+*     Sort and Deflate eigenvalues.
+*
+      CALL CLAED8( K, N, QSIZ, Q, LDQ, D, RHO, CUTPNT, RWORK( IZ ),
+     $             RWORK( IDLMDA ), WORK, QSIZ, RWORK( IW ),
+     $             IWORK( INDXP ), IWORK( INDX ), INDXQ,
+     $             PERM( PRMPTR( CURR ) ), GIVPTR( CURR+1 ),
+     $             GIVCOL( 1, GIVPTR( CURR ) ),
+     $             GIVNUM( 1, GIVPTR( CURR ) ), INFO )
+      PRMPTR( CURR+1 ) = PRMPTR( CURR ) + N
+      GIVPTR( CURR+1 ) = GIVPTR( CURR+1 ) + GIVPTR( CURR )
+*
+*     Solve Secular Equation.
+*
+      IF( K.NE.0 ) THEN
+         CALL SLAED9( K, 1, K, N, D, RWORK( IQ ), K, RHO,
+     $                RWORK( IDLMDA ), RWORK( IW ),
+     $                QSTORE( QPTR( CURR ) ), K, INFO )
+         CALL CLACRM( QSIZ, K, WORK, QSIZ, QSTORE( QPTR( CURR ) ), K, Q,
+     $                LDQ, RWORK( IQ ) )
+         QPTR( CURR+1 ) = QPTR( CURR ) + K**2
+         IF( INFO.NE.0 ) THEN
+            RETURN
+         END IF
+*
+*     Prepare the INDXQ sorting premutation.
+*
+         N1 = K
+         N2 = N - K
+         IND1 = 1
+         IND2 = N
+         CALL SLAMRG( N1, N2, D, 1, -1, INDXQ )
+      ELSE
+         QPTR( CURR+1 ) = QPTR( CURR )
+         DO 20 I = 1, N
+            INDXQ( I ) = I
+   20    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of CLAED7
+*
+      END
diff --git a/costa/native/external/lapack/claed8.f b/costa/native/external/lapack/claed8.f
new file mode 100644
index 000000000..0b3c222dd
--- /dev/null
+++ b/costa/native/external/lapack/claed8.f
@@ -0,0 +1,364 @@
+      SUBROUTINE CLAED8( K, N, QSIZ, Q, LDQ, D, RHO, CUTPNT, Z, DLAMDA,
+     $                   Q2, LDQ2, W, INDXP, INDX, INDXQ, PERM, GIVPTR,
+     $                   GIVCOL, GIVNUM, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
+*     Courant Institute, NAG Ltd., and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      INTEGER            CUTPNT, GIVPTR, INFO, K, LDQ, LDQ2, N, QSIZ
+      REAL               RHO
+*     ..
+*     .. Array Arguments ..
+      INTEGER            GIVCOL( 2, * ), INDX( * ), INDXP( * ),
+     $                   INDXQ( * ), PERM( * )
+      REAL               D( * ), DLAMDA( * ), GIVNUM( 2, * ), W( * ),
+     $                   Z( * )
+      COMPLEX            Q( LDQ, * ), Q2( LDQ2, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CLAED8 merges the two sets of eigenvalues together into a single
+*  sorted set.  Then it tries to deflate the size of the problem.
+*  There are two ways in which deflation can occur:  when two or more
+*  eigenvalues are close together or if there is a tiny element in the
+*  Z vector.  For each such occurrence the order of the related secular
+*  equation problem is reduced by one.
+*
+*  Arguments
+*  =========
+*
+*  K      (output) INTEGER
+*         Contains the number of non-deflated eigenvalues.
+*         This is the order of the related secular equation.
+*
+*  N      (input) INTEGER
+*         The dimension of the symmetric tridiagonal matrix.  N >= 0.
+*
+*  QSIZ   (input) INTEGER
+*         The dimension of the unitary matrix used to reduce
+*         the dense or band matrix to tridiagonal form.
+*         QSIZ >= N if ICOMPQ = 1.
+*
+*  Q      (input/output) COMPLEX array, dimension (LDQ,N)
+*         On entry, Q contains the eigenvectors of the partially solved
+*         system which has been previously updated in matrix
+*         multiplies with other partially solved eigensystems.
+*         On exit, Q contains the trailing (N-K) updated eigenvectors
+*         (those which were deflated) in its last N-K columns.
+*
+*  LDQ    (input) INTEGER
+*         The leading dimension of the array Q.  LDQ >= max( 1, N ).
+*
+*  D      (input/output) REAL array, dimension (N)
+*         On entry, D contains the eigenvalues of the two submatrices to
+*         be combined.  On exit, D contains the trailing (N-K) updated
+*         eigenvalues (those which were deflated) sorted into increasing
+*         order.
+*
+*  RHO    (input/output) REAL
+*         Contains the off diagonal element associated with the rank-1
+*         cut which originally split the two submatrices which are now
+*         being recombined. RHO is modified during the computation to
+*         the value required by SLAED3.
+*
+*  CUTPNT (input) INTEGER
+*         Contains the location of the last eigenvalue in the leading
+*         sub-matrix.  MIN(1,N) <= CUTPNT <= N.
+*
+*  Z      (input) REAL array, dimension (N)
+*         On input this vector contains the updating vector (the last
+*         row of the first sub-eigenvector matrix and the first row of
+*         the second sub-eigenvector matrix).  The contents of Z are
+*         destroyed during the updating process.
+*
+*  DLAMDA (output) REAL array, dimension (N)
+*         Contains a copy of the first K eigenvalues which will be used
+*         by SLAED3 to form the secular equation.
+*
+*  Q2     (output) COMPLEX array, dimension (LDQ2,N)
+*         If ICOMPQ = 0, Q2 is not referenced.  Otherwise,
+*         Contains a copy of the first K eigenvectors which will be used
+*         by SLAED7 in a matrix multiply (SGEMM) to update the new
+*         eigenvectors.
+*
+*  LDQ2   (input) INTEGER
+*         The leading dimension of the array Q2.  LDQ2 >= max( 1, N ).
+*
+*  W      (output) REAL array, dimension (N)
+*         This will hold the first k values of the final
+*         deflation-altered z-vector and will be passed to SLAED3.
+*
+*  INDXP  (workspace) INTEGER array, dimension (N)
+*         This will contain the permutation used to place deflated
+*         values of D at the end of the array. On output INDXP(1:K)
+*         points to the nondeflated D-values and INDXP(K+1:N)
+*         points to the deflated eigenvalues.
+*
+*  INDX   (workspace) INTEGER array, dimension (N)
+*         This will contain the permutation used to sort the contents of
+*         D into ascending order.
+*
+*  INDXQ  (input) INTEGER array, dimension (N)
+*         This contains the permutation which separately sorts the two
+*         sub-problems in D into ascending order.  Note that elements in
+*         the second half of this permutation must first have CUTPNT
+*         added to their values in order to be accurate.
+*
+*  PERM   (output) INTEGER array, dimension (N)
+*         Contains the permutations (from deflation and sorting) to be
+*         applied to each eigenblock.
+*
+*  GIVPTR (output) INTEGER
+*         Contains the number of Givens rotations which took place in
+*         this subproblem.
+*
+*  GIVCOL (output) INTEGER array, dimension (2, N)
+*         Each pair of numbers indicates a pair of columns to take place
+*         in a Givens rotation.
+*
+*  GIVNUM (output) REAL array, dimension (2, N)
+*         Each number indicates the S value to be used in the
+*         corresponding Givens rotation.
+*
+*  INFO   (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               MONE, ZERO, ONE, TWO, EIGHT
+      PARAMETER          ( MONE = -1.0E0, ZERO = 0.0E0, ONE = 1.0E0,
+     $                   TWO = 2.0E0, EIGHT = 8.0E0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, IMAX, J, JLAM, JMAX, JP, K2, N1, N1P1, N2
+      REAL               C, EPS, S, T, TAU, TOL
+*     ..
+*     .. External Functions ..
+      INTEGER            ISAMAX
+      REAL               SLAMCH, SLAPY2
+      EXTERNAL           ISAMAX, SLAMCH, SLAPY2
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CCOPY, CLACPY, CSROT, SCOPY, SLAMRG, SSCAL,
+     $                   XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+*
+      IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( QSIZ.LT.N ) THEN
+         INFO = -3
+      ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( CUTPNT.LT.MIN( 1, N ) .OR. CUTPNT.GT.N ) THEN
+         INFO = -8
+      ELSE IF( LDQ2.LT.MAX( 1, N ) ) THEN
+         INFO = -12
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CLAED8', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      N1 = CUTPNT
+      N2 = N - N1
+      N1P1 = N1 + 1
+*
+      IF( RHO.LT.ZERO ) THEN
+         CALL SSCAL( N2, MONE, Z( N1P1 ), 1 )
+      END IF
+*
+*     Normalize z so that norm(z) = 1
+*
+      T = ONE / SQRT( TWO )
+      DO 10 J = 1, N
+         INDX( J ) = J
+   10 CONTINUE
+      CALL SSCAL( N, T, Z, 1 )
+      RHO = ABS( TWO*RHO )
+*
+*     Sort the eigenvalues into increasing order
+*
+      DO 20 I = CUTPNT + 1, N
+         INDXQ( I ) = INDXQ( I ) + CUTPNT
+   20 CONTINUE
+      DO 30 I = 1, N
+         DLAMDA( I ) = D( INDXQ( I ) )
+         W( I ) = Z( INDXQ( I ) )
+   30 CONTINUE
+      I = 1
+      J = CUTPNT + 1
+      CALL SLAMRG( N1, N2, DLAMDA, 1, 1, INDX )
+      DO 40 I = 1, N
+         D( I ) = DLAMDA( INDX( I ) )
+         Z( I ) = W( INDX( I ) )
+   40 CONTINUE
+*
+*     Calculate the allowable deflation tolerance
+*
+      IMAX = ISAMAX( N, Z, 1 )
+      JMAX = ISAMAX( N, D, 1 )
+      EPS = SLAMCH( 'Epsilon' )
+      TOL = EIGHT*EPS*ABS( D( JMAX ) )
+*
+*     If the rank-1 modifier is small enough, no more needs to be done
+*     -- except to reorganize Q so that its columns correspond with the
+*     elements in D.
+*
+      IF( RHO*ABS( Z( IMAX ) ).LE.TOL ) THEN
+         K = 0
+         DO 50 J = 1, N
+            PERM( J ) = INDXQ( INDX( J ) )
+            CALL CCOPY( QSIZ, Q( 1, PERM( J ) ), 1, Q2( 1, J ), 1 )
+   50    CONTINUE
+         CALL CLACPY( 'A', QSIZ, N, Q2( 1, 1 ), LDQ2, Q( 1, 1 ), LDQ )
+         RETURN
+      END IF
+*
+*     If there are multiple eigenvalues then the problem deflates.  Here
+*     the number of equal eigenvalues are found.  As each equal
+*     eigenvalue is found, an elementary reflector is computed to rotate
+*     the corresponding eigensubspace so that the corresponding
+*     components of Z are zero in this new basis.
+*
+      K = 0
+      GIVPTR = 0
+      K2 = N + 1
+      DO 60 J = 1, N
+         IF( RHO*ABS( Z( J ) ).LE.TOL ) THEN
+*
+*           Deflate due to small z component.
+*
+            K2 = K2 - 1
+            INDXP( K2 ) = J
+            IF( J.EQ.N )
+     $         GO TO 100
+         ELSE
+            JLAM = J
+            GO TO 70
+         END IF
+   60 CONTINUE
+   70 CONTINUE
+      J = J + 1
+      IF( J.GT.N )
+     $   GO TO 90
+      IF( RHO*ABS( Z( J ) ).LE.TOL ) THEN
+*
+*        Deflate due to small z component.
+*
+         K2 = K2 - 1
+         INDXP( K2 ) = J
+      ELSE
+*
+*        Check if eigenvalues are close enough to allow deflation.
+*
+         S = Z( JLAM )
+         C = Z( J )
+*
+*        Find sqrt(a**2+b**2) without overflow or
+*        destructive underflow.
+*
+         TAU = SLAPY2( C, S )
+         T = D( J ) - D( JLAM )
+         C = C / TAU
+         S = -S / TAU
+         IF( ABS( T*C*S ).LE.TOL ) THEN
+*
+*           Deflation is possible.
+*
+            Z( J ) = TAU
+            Z( JLAM ) = ZERO
+*
+*           Record the appropriate Givens rotation
+*
+            GIVPTR = GIVPTR + 1
+            GIVCOL( 1, GIVPTR ) = INDXQ( INDX( JLAM ) )
+            GIVCOL( 2, GIVPTR ) = INDXQ( INDX( J ) )
+            GIVNUM( 1, GIVPTR ) = C
+            GIVNUM( 2, GIVPTR ) = S
+            CALL CSROT( QSIZ, Q( 1, INDXQ( INDX( JLAM ) ) ), 1,
+     $                  Q( 1, INDXQ( INDX( J ) ) ), 1, C, S )
+            T = D( JLAM )*C*C + D( J )*S*S
+            D( J ) = D( JLAM )*S*S + D( J )*C*C
+            D( JLAM ) = T
+            K2 = K2 - 1
+            I = 1
+   80       CONTINUE
+            IF( K2+I.LE.N ) THEN
+               IF( D( JLAM ).LT.D( INDXP( K2+I ) ) ) THEN
+                  INDXP( K2+I-1 ) = INDXP( K2+I )
+                  INDXP( K2+I ) = JLAM
+                  I = I + 1
+                  GO TO 80
+               ELSE
+                  INDXP( K2+I-1 ) = JLAM
+               END IF
+            ELSE
+               INDXP( K2+I-1 ) = JLAM
+            END IF
+            JLAM = J
+         ELSE
+            K = K + 1
+            W( K ) = Z( JLAM )
+            DLAMDA( K ) = D( JLAM )
+            INDXP( K ) = JLAM
+            JLAM = J
+         END IF
+      END IF
+      GO TO 70
+   90 CONTINUE
+*
+*     Record the last eigenvalue.
+*
+      K = K + 1
+      W( K ) = Z( JLAM )
+      DLAMDA( K ) = D( JLAM )
+      INDXP( K ) = JLAM
+*
+  100 CONTINUE
+*
+*     Sort the eigenvalues and corresponding eigenvectors into DLAMDA
+*     and Q2 respectively.  The eigenvalues/vectors which were not
+*     deflated go into the first K slots of DLAMDA and Q2 respectively,
+*     while those which were deflated go into the last N - K slots.
+*
+      DO 110 J = 1, N
+         JP = INDXP( J )
+         DLAMDA( J ) = D( JP )
+         PERM( J ) = INDXQ( INDX( JP ) )
+         CALL CCOPY( QSIZ, Q( 1, PERM( J ) ), 1, Q2( 1, J ), 1 )
+  110 CONTINUE
+*
+*     The deflated eigenvalues and their corresponding vectors go back
+*     into the last N - K slots of D and Q respectively.
+*
+      IF( K.LT.N ) THEN
+         CALL SCOPY( N-K, DLAMDA( K+1 ), 1, D( K+1 ), 1 )
+         CALL CLACPY( 'A', QSIZ, N-K, Q2( 1, K+1 ), LDQ2, Q( 1, K+1 ),
+     $                LDQ )
+      END IF
+*
+      RETURN
+*
+*     End of CLAED8
+*
+      END
diff --git a/costa/native/external/lapack/claein.f b/costa/native/external/lapack/claein.f
new file mode 100644
index 000000000..85e6c7e72
--- /dev/null
+++ b/costa/native/external/lapack/claein.f
@@ -0,0 +1,264 @@
+      SUBROUTINE CLAEIN( RIGHTV, NOINIT, N, H, LDH, W, V, B, LDB, RWORK,
+     $                   EPS3, SMLNUM, INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      LOGICAL            NOINIT, RIGHTV
+      INTEGER            INFO, LDB, LDH, N
+      REAL               EPS3, SMLNUM
+      COMPLEX            W
+*     ..
+*     .. Array Arguments ..
+      REAL               RWORK( * )
+      COMPLEX            B( LDB, * ), H( LDH, * ), V( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CLAEIN uses inverse iteration to find a right or left eigenvector
+*  corresponding to the eigenvalue W of a complex upper Hessenberg
+*  matrix H.
+*
+*  Arguments
+*  =========
+*
+*  RIGHTV   (input) LOGICAL
+*          = .TRUE. : compute right eigenvector;
+*          = .FALSE.: compute left eigenvector.
+*
+*  NOINIT   (input) LOGICAL
+*          = .TRUE. : no initial vector supplied in V
+*          = .FALSE.: initial vector supplied in V.
+*
+*  N       (input) INTEGER
+*          The order of the matrix H.  N >= 0.
+*
+*  H       (input) COMPLEX array, dimension (LDH,N)
+*          The upper Hessenberg matrix H.
+*
+*  LDH     (input) INTEGER
+*          The leading dimension of the array H.  LDH >= max(1,N).
+*
+*  W       (input) COMPLEX
+*          The eigenvalue of H whose corresponding right or left
+*          eigenvector is to be computed.
+*
+*  V       (input/output) COMPLEX array, dimension (N)
+*          On entry, if NOINIT = .FALSE., V must contain a starting
+*          vector for inverse iteration; otherwise V need not be set.
+*          On exit, V contains the computed eigenvector, normalized so
+*          that the component of largest magnitude has magnitude 1; here
+*          the magnitude of a complex number (x,y) is taken to be
+*          |x| + |y|.
+*
+*  B       (workspace) COMPLEX array, dimension (LDB,N)
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  RWORK   (workspace) REAL array, dimension (N)
+*
+*  EPS3    (input) REAL
+*          A small machine-dependent value which is used to perturb
+*          close eigenvalues, and to replace zero pivots.
+*
+*  SMLNUM  (input) REAL
+*          A machine-dependent value close to the underflow threshold.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          = 1:  inverse iteration did not converge; V is set to the
+*                last iterate.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, TENTH
+      PARAMETER          ( ONE = 1.0E+0, TENTH = 1.0E-1 )
+      COMPLEX            ZERO
+      PARAMETER          ( ZERO = ( 0.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      CHARACTER          NORMIN, TRANS
+      INTEGER            I, IERR, ITS, J
+      REAL               GROWTO, NRMSML, ROOTN, RTEMP, SCALE, VNORM
+      COMPLEX            CDUM, EI, EJ, TEMP, X
+*     ..
+*     .. External Functions ..
+      INTEGER            ICAMAX
+      REAL               SCASUM, SCNRM2
+      COMPLEX            CLADIV
+      EXTERNAL           ICAMAX, SCASUM, SCNRM2, CLADIV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CLATRS, CSSCAL
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, AIMAG, MAX, REAL, SQRT
+*     ..
+*     .. Statement Functions ..
+      REAL               CABS1
+*     ..
+*     .. Statement Function definitions ..
+      CABS1( CDUM ) = ABS( REAL( CDUM ) ) + ABS( AIMAG( CDUM ) )
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+*
+*     GROWTO is the threshold used in the acceptance test for an
+*     eigenvector.
+*
+      ROOTN = SQRT( REAL( N ) )
+      GROWTO = TENTH / ROOTN
+      NRMSML = MAX( ONE, EPS3*ROOTN )*SMLNUM
+*
+*     Form B = H - W*I (except that the subdiagonal elements are not
+*     stored).
+*
+      DO 20 J = 1, N
+         DO 10 I = 1, J - 1
+            B( I, J ) = H( I, J )
+   10    CONTINUE
+         B( J, J ) = H( J, J ) - W
+   20 CONTINUE
+*
+      IF( NOINIT ) THEN
+*
+*        Initialize V.
+*
+         DO 30 I = 1, N
+            V( I ) = EPS3
+   30    CONTINUE
+      ELSE
+*
+*        Scale supplied initial vector.
+*
+         VNORM = SCNRM2( N, V, 1 )
+         CALL CSSCAL( N, ( EPS3*ROOTN ) / MAX( VNORM, NRMSML ), V, 1 )
+      END IF
+*
+      IF( RIGHTV ) THEN
+*
+*        LU decomposition with partial pivoting of B, replacing zero
+*        pivots by EPS3.
+*
+         DO 60 I = 1, N - 1
+            EI = H( I+1, I )
+            IF( CABS1( B( I, I ) ).LT.CABS1( EI ) ) THEN
+*
+*              Interchange rows and eliminate.
+*
+               X = CLADIV( B( I, I ), EI )
+               B( I, I ) = EI
+               DO 40 J = I + 1, N
+                  TEMP = B( I+1, J )
+                  B( I+1, J ) = B( I, J ) - X*TEMP
+                  B( I, J ) = TEMP
+   40          CONTINUE
+            ELSE
+*
+*              Eliminate without interchange.
+*
+               IF( B( I, I ).EQ.ZERO )
+     $            B( I, I ) = EPS3
+               X = CLADIV( EI, B( I, I ) )
+               IF( X.NE.ZERO ) THEN
+                  DO 50 J = I + 1, N
+                     B( I+1, J ) = B( I+1, J ) - X*B( I, J )
+   50             CONTINUE
+               END IF
+            END IF
+   60    CONTINUE
+         IF( B( N, N ).EQ.ZERO )
+     $      B( N, N ) = EPS3
+*
+         TRANS = 'N'
+*
+      ELSE
+*
+*        UL decomposition with partial pivoting of B, replacing zero
+*        pivots by EPS3.
+*
+         DO 90 J = N, 2, -1
+            EJ = H( J, J-1 )
+            IF( CABS1( B( J, J ) ).LT.CABS1( EJ ) ) THEN
+*
+*              Interchange columns and eliminate.
+*
+               X = CLADIV( B( J, J ), EJ )
+               B( J, J ) = EJ
+               DO 70 I = 1, J - 1
+                  TEMP = B( I, J-1 )
+                  B( I, J-1 ) = B( I, J ) - X*TEMP
+                  B( I, J ) = TEMP
+   70          CONTINUE
+            ELSE
+*
+*              Eliminate without interchange.
+*
+               IF( B( J, J ).EQ.ZERO )
+     $            B( J, J ) = EPS3
+               X = CLADIV( EJ, B( J, J ) )
+               IF( X.NE.ZERO ) THEN
+                  DO 80 I = 1, J - 1
+                     B( I, J-1 ) = B( I, J-1 ) - X*B( I, J )
+   80             CONTINUE
+               END IF
+            END IF
+   90    CONTINUE
+         IF( B( 1, 1 ).EQ.ZERO )
+     $      B( 1, 1 ) = EPS3
+*
+         TRANS = 'C'
+*
+      END IF
+*
+      NORMIN = 'N'
+      DO 110 ITS = 1, N
+*
+*        Solve U*x = scale*v for a right eigenvector
+*          or U'*x = scale*v for a left eigenvector,
+*        overwriting x on v.
+*
+         CALL CLATRS( 'Upper', TRANS, 'Nonunit', NORMIN, N, B, LDB, V,
+     $                SCALE, RWORK, IERR )
+         NORMIN = 'Y'
+*
+*        Test for sufficient growth in the norm of v.
+*
+         VNORM = SCASUM( N, V, 1 )
+         IF( VNORM.GE.GROWTO*SCALE )
+     $      GO TO 120
+*
+*        Choose new orthogonal starting vector and try again.
+*
+         RTEMP = EPS3 / ( ROOTN+ONE )
+         V( 1 ) = EPS3
+         DO 100 I = 2, N
+            V( I ) = RTEMP
+  100    CONTINUE
+         V( N-ITS+1 ) = V( N-ITS+1 ) - EPS3*ROOTN
+  110 CONTINUE
+*
+*     Failure to find eigenvector in N iterations.
+*
+      INFO = 1
+*
+  120 CONTINUE
+*
+*     Normalize eigenvector.
+*
+      I = ICAMAX( N, V, 1 )
+      CALL CSSCAL( N, ONE / CABS1( V( I ) ), V, 1 )
+*
+      RETURN
+*
+*     End of CLAEIN
+*
+      END
diff --git a/costa/native/external/lapack/claesy.f b/costa/native/external/lapack/claesy.f
new file mode 100644
index 000000000..15dc18685
--- /dev/null
+++ b/costa/native/external/lapack/claesy.f
@@ -0,0 +1,153 @@
+      SUBROUTINE CLAESY( A, B, C, RT1, RT2, EVSCAL, CS1, SN1 )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      COMPLEX            A, B, C, CS1, EVSCAL, RT1, RT2, SN1
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CLAESY computes the eigendecomposition of a 2-by-2 symmetric matrix
+*     ( ( A, B );( B, C ) )
+*  provided the norm of the matrix of eigenvectors is larger than
+*  some threshold value.
+*
+*  RT1 is the eigenvalue of larger absolute value, and RT2 of
+*  smaller absolute value.  If the eigenvectors are computed, then
+*  on return ( CS1, SN1 ) is the unit eigenvector for RT1, hence
+*
+*  [  CS1     SN1   ] . [ A  B ] . [ CS1    -SN1   ] = [ RT1  0  ]
+*  [ -SN1     CS1   ]   [ B  C ]   [ SN1     CS1   ]   [  0  RT2 ]
+*
+*  Arguments
+*  =========
+*
+*  A       (input) COMPLEX
+*          The ( 1, 1 ) element of input matrix.
+*
+*  B       (input) COMPLEX
+*          The ( 1, 2 ) element of input matrix.  The ( 2, 1 ) element
+*          is also given by B, since the 2-by-2 matrix is symmetric.
+*
+*  C       (input) COMPLEX
+*          The ( 2, 2 ) element of input matrix.
+*
+*  RT1     (output) COMPLEX
+*          The eigenvalue of larger modulus.
+*
+*  RT2     (output) COMPLEX
+*          The eigenvalue of smaller modulus.
+*
+*  EVSCAL  (output) COMPLEX
+*          The complex value by which the eigenvector matrix was scaled
+*          to make it orthonormal.  If EVSCAL is zero, the eigenvectors
+*          were not computed.  This means one of two things:  the 2-by-2
+*          matrix could not be diagonalized, or the norm of the matrix
+*          of eigenvectors before scaling was larger than the threshold
+*          value THRESH (set below).
+*
+*  CS1     (output) COMPLEX
+*  SN1     (output) COMPLEX
+*          If EVSCAL .NE. 0,  ( CS1, SN1 ) is the unit right eigenvector
+*          for RT1.
+*
+* =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO
+      PARAMETER          ( ZERO = 0.0E0 )
+      REAL               ONE
+      PARAMETER          ( ONE = 1.0E0 )
+      COMPLEX            CONE
+      PARAMETER          ( CONE = ( 1.0E0, 0.0E0 ) )
+      REAL               HALF
+      PARAMETER          ( HALF = 0.5E0 )
+      REAL               THRESH
+      PARAMETER          ( THRESH = 0.1E0 )
+*     ..
+*     .. Local Scalars ..
+      REAL               BABS, EVNORM, TABS, Z
+      COMPLEX            S, T, TMP
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*
+*     Special case:  The matrix is actually diagonal.
+*     To avoid divide by zero later, we treat this case separately.
+*
+      IF( ABS( B ).EQ.ZERO ) THEN
+         RT1 = A
+         RT2 = C
+         IF( ABS( RT1 ).LT.ABS( RT2 ) ) THEN
+            TMP = RT1
+            RT1 = RT2
+            RT2 = TMP
+            CS1 = ZERO
+            SN1 = ONE
+         ELSE
+            CS1 = ONE
+            SN1 = ZERO
+         END IF
+      ELSE
+*
+*        Compute the eigenvalues and eigenvectors.
+*        The characteristic equation is
+*           lambda **2 - (A+C) lambda + (A*C - B*B)
+*        and we solve it using the quadratic formula.
+*
+         S = ( A+C )*HALF
+         T = ( A-C )*HALF
+*
+*        Take the square root carefully to avoid over/under flow.
+*
+         BABS = ABS( B )
+         TABS = ABS( T )
+         Z = MAX( BABS, TABS )
+         IF( Z.GT.ZERO )
+     $      T = Z*SQRT( ( T / Z )**2+( B / Z )**2 )
+*
+*        Compute the two eigenvalues.  RT1 and RT2 are exchanged
+*        if necessary so that RT1 will have the greater magnitude.
+*
+         RT1 = S + T
+         RT2 = S - T
+         IF( ABS( RT1 ).LT.ABS( RT2 ) ) THEN
+            TMP = RT1
+            RT1 = RT2
+            RT2 = TMP
+         END IF
+*
+*        Choose CS1 = 1 and SN1 to satisfy the first equation, then
+*        scale the components of this eigenvector so that the matrix
+*        of eigenvectors X satisfies  X * X' = I .  (No scaling is
+*        done if the norm of the eigenvalue matrix is less than THRESH.)
+*
+         SN1 = ( RT1-A ) / B
+         TABS = ABS( SN1 )
+         IF( TABS.GT.ONE ) THEN
+            T = TABS*SQRT( ( ONE / TABS )**2+( SN1 / TABS )**2 )
+         ELSE
+            T = SQRT( CONE+SN1*SN1 )
+         END IF
+         EVNORM = ABS( T )
+         IF( EVNORM.GE.THRESH ) THEN
+            EVSCAL = CONE / T
+            CS1 = EVSCAL
+            SN1 = SN1*EVSCAL
+         ELSE
+            EVSCAL = ZERO
+         END IF
+      END IF
+      RETURN
+*
+*     End of CLAESY
+*
+      END
diff --git a/costa/native/external/lapack/claev2.f b/costa/native/external/lapack/claev2.f
new file mode 100644
index 000000000..a97529270
--- /dev/null
+++ b/costa/native/external/lapack/claev2.f
@@ -0,0 +1,96 @@
+      SUBROUTINE CLAEV2( A, B, C, RT1, RT2, CS1, SN1 )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      REAL               CS1, RT1, RT2
+      COMPLEX            A, B, C, SN1
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CLAEV2 computes the eigendecomposition of a 2-by-2 Hermitian matrix
+*     [  A         B  ]
+*     [  CONJG(B)  C  ].
+*  On return, RT1 is the eigenvalue of larger absolute value, RT2 is the
+*  eigenvalue of smaller absolute value, and (CS1,SN1) is the unit right
+*  eigenvector for RT1, giving the decomposition
+*
+*  [ CS1  CONJG(SN1) ] [    A     B ] [ CS1 -CONJG(SN1) ] = [ RT1  0  ]
+*  [-SN1     CS1     ] [ CONJG(B) C ] [ SN1     CS1     ]   [  0  RT2 ].
+*
+*  Arguments
+*  =========
+*
+*  A      (input) COMPLEX
+*         The (1,1) element of the 2-by-2 matrix.
+*
+*  B      (input) COMPLEX
+*         The (1,2) element and the conjugate of the (2,1) element of
+*         the 2-by-2 matrix.
+*
+*  C      (input) COMPLEX
+*         The (2,2) element of the 2-by-2 matrix.
+*
+*  RT1    (output) REAL
+*         The eigenvalue of larger absolute value.
+*
+*  RT2    (output) REAL
+*         The eigenvalue of smaller absolute value.
+*
+*  CS1    (output) REAL
+*  SN1    (output) COMPLEX
+*         The vector (CS1, SN1) is a unit right eigenvector for RT1.
+*
+*  Further Details
+*  ===============
+*
+*  RT1 is accurate to a few ulps barring over/underflow.
+*
+*  RT2 may be inaccurate if there is massive cancellation in the
+*  determinant A*C-B*B; higher precision or correctly rounded or
+*  correctly truncated arithmetic would be needed to compute RT2
+*  accurately in all cases.
+*
+*  CS1 and SN1 are accurate to a few ulps barring over/underflow.
+*
+*  Overflow is possible only if RT1 is within a factor of 5 of overflow.
+*  Underflow is harmless if the input data is 0 or exceeds
+*     underflow_threshold / macheps.
+*
+* =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO
+      PARAMETER          ( ZERO = 0.0E0 )
+      REAL               ONE
+      PARAMETER          ( ONE = 1.0E0 )
+*     ..
+*     .. Local Scalars ..
+      REAL               T
+      COMPLEX            W
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLAEV2
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, CONJG, REAL
+*     ..
+*     .. Executable Statements ..
+*
+      IF( ABS( B ).EQ.ZERO ) THEN
+         W = ONE
+      ELSE
+         W = CONJG( B ) / ABS( B )
+      END IF
+      CALL SLAEV2( REAL( A ), ABS( B ), REAL( C ), RT1, RT2, CS1, T )
+      SN1 = W*T
+      RETURN
+*
+*     End of CLAEV2
+*
+      END
diff --git a/costa/native/external/lapack/clags2.f b/costa/native/external/lapack/clags2.f
new file mode 100644
index 000000000..6d01a6811
--- /dev/null
+++ b/costa/native/external/lapack/clags2.f
@@ -0,0 +1,305 @@
+      SUBROUTINE CLAGS2( UPPER, A1, A2, A3, B1, B2, B3, CSU, SNU, CSV,
+     $                   SNV, CSQ, SNQ )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994 
+*
+*     .. Scalar Arguments ..
+      LOGICAL            UPPER
+      REAL               A1, A3, B1, B3, CSQ, CSU, CSV
+      COMPLEX            A2, B2, SNQ, SNU, SNV
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CLAGS2 computes 2-by-2 unitary matrices U, V and Q, such
+*  that if ( UPPER ) then
+*
+*            U'*A*Q = U'*( A1 A2 )*Q = ( x  0  )
+*                        ( 0  A3 )     ( x  x  )
+*  and
+*            V'*B*Q = V'*( B1 B2 )*Q = ( x  0  )
+*                        ( 0  B3 )     ( x  x  )
+*
+*  or if ( .NOT.UPPER ) then
+*
+*            U'*A*Q = U'*( A1 0  )*Q = ( x  x  )
+*                        ( A2 A3 )     ( 0  x  )
+*  and
+*            V'*B*Q = V'*( B1 0  )*Q = ( x  x  )
+*                        ( B2 B3 )     ( 0  x  )
+*  where
+*
+*    U = (     CSU      SNU ), V = (     CSV     SNV ),
+*        ( -CONJG(SNU)  CSU )      ( -CONJG(SNV) CSV )
+*
+*    Q = (     CSQ      SNQ )
+*        ( -CONJG(SNQ)  CSQ )
+*
+*  Z' denotes the conjugate transpose of Z.
+*
+*  The rows of the transformed A and B are parallel. Moreover, if the
+*  input 2-by-2 matrix A is not zero, then the transformed (1,1) entry
+*  of A is not zero. If the input matrices A and B are both not zero,
+*  then the transformed (2,2) element of B is not zero, except when the
+*  first rows of input A and B are parallel and the second rows are
+*  zero.
+*
+*  Arguments
+*  =========
+*
+*  UPPER   (input) LOGICAL
+*          = .TRUE.: the input matrices A and B are upper triangular.
+*          = .FALSE.: the input matrices A and B are lower triangular.
+*
+*  A1      (input) REAL
+*  A2      (input) COMPLEX
+*  A3      (input) REAL
+*          On entry, A1, A2 and A3 are elements of the input 2-by-2
+*          upper (lower) triangular matrix A.
+*
+*  B1      (input) REAL
+*  B2      (input) COMPLEX
+*  B3      (input) REAL
+*          On entry, B1, B2 and B3 are elements of the input 2-by-2
+*          upper (lower) triangular matrix B.
+*
+*  CSU     (output) REAL
+*  SNU     (output) COMPLEX
+*          The desired unitary matrix U.
+*
+*  CSV     (output) REAL
+*  SNV     (output) COMPLEX
+*          The desired unitary matrix V.
+*
+*  CSQ     (output) REAL
+*  SNQ     (output) COMPLEX
+*          The desired unitary matrix Q.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      REAL               A, AUA11, AUA12, AUA21, AUA22, AVB11, AVB12,
+     $                   AVB21, AVB22, CSL, CSR, D, FB, FC, S1, S2, SNL,
+     $                   SNR, UA11R, UA22R, VB11R, VB22R
+      COMPLEX            B, C, D1, R, T, UA11, UA12, UA21, UA22, VB11,
+     $                   VB12, VB21, VB22
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CLARTG, SLASV2
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, AIMAG, CMPLX, CONJG, REAL
+*     ..
+*     .. Statement Functions ..
+      REAL               ABS1
+*     ..
+*     .. Statement Function definitions ..
+      ABS1( T ) = ABS( REAL( T ) ) + ABS( AIMAG( T ) )
+*     ..
+*     .. Executable Statements ..
+*
+      IF( UPPER ) THEN
+*
+*        Input matrices A and B are upper triangular matrices
+*
+*        Form matrix C = A*adj(B) = ( a b )
+*                                   ( 0 d )
+*
+         A = A1*B3
+         D = A3*B1
+         B = A2*B1 - A1*B2
+         FB = ABS( B )
+*
+*        Transform complex 2-by-2 matrix C to real matrix by unitary
+*        diagonal matrix diag(1,D1).
+*
+         D1 = ONE
+         IF( FB.NE.ZERO )
+     $      D1 = B / FB
+*
+*        The SVD of real 2 by 2 triangular C
+*
+*         ( CSL -SNL )*( A B )*(  CSR  SNR ) = ( R 0 )
+*         ( SNL  CSL ) ( 0 D ) ( -SNR  CSR )   ( 0 T )
+*
+         CALL SLASV2( A, FB, D, S1, S2, SNR, CSR, SNL, CSL )
+*
+         IF( ABS( CSL ).GE.ABS( SNL ) .OR. ABS( CSR ).GE.ABS( SNR ) )
+     $        THEN
+*
+*           Compute the (1,1) and (1,2) elements of U'*A and V'*B,
+*           and (1,2) element of |U|'*|A| and |V|'*|B|.
+*
+            UA11R = CSL*A1
+            UA12 = CSL*A2 + D1*SNL*A3
+*
+            VB11R = CSR*B1
+            VB12 = CSR*B2 + D1*SNR*B3
+*
+            AUA12 = ABS( CSL )*ABS1( A2 ) + ABS( SNL )*ABS( A3 )
+            AVB12 = ABS( CSR )*ABS1( B2 ) + ABS( SNR )*ABS( B3 )
+*
+*           zero (1,2) elements of U'*A and V'*B
+*
+            IF( ( ABS( UA11R )+ABS1( UA12 ) ).EQ.ZERO ) THEN
+               CALL CLARTG( -CMPLX( VB11R ), CONJG( VB12 ), CSQ, SNQ,
+     $                      R )
+            ELSE IF( ( ABS( VB11R )+ABS1( VB12 ) ).EQ.ZERO ) THEN
+               CALL CLARTG( -CMPLX( UA11R ), CONJG( UA12 ), CSQ, SNQ,
+     $                      R )
+            ELSE IF( AUA12 / ( ABS( UA11R )+ABS1( UA12 ) ).LE.AVB12 /
+     $               ( ABS( VB11R )+ABS1( VB12 ) ) ) THEN
+               CALL CLARTG( -CMPLX( UA11R ), CONJG( UA12 ), CSQ, SNQ,
+     $                      R )
+            ELSE
+               CALL CLARTG( -CMPLX( VB11R ), CONJG( VB12 ), CSQ, SNQ,
+     $                      R )
+            END IF
+*
+            CSU = CSL
+            SNU = -D1*SNL
+            CSV = CSR
+            SNV = -D1*SNR
+*
+         ELSE
+*
+*           Compute the (2,1) and (2,2) elements of U'*A and V'*B,
+*           and (2,2) element of |U|'*|A| and |V|'*|B|.
+*
+            UA21 = -CONJG( D1 )*SNL*A1
+            UA22 = -CONJG( D1 )*SNL*A2 + CSL*A3
+*
+            VB21 = -CONJG( D1 )*SNR*B1
+            VB22 = -CONJG( D1 )*SNR*B2 + CSR*B3
+*
+            AUA22 = ABS( SNL )*ABS1( A2 ) + ABS( CSL )*ABS( A3 )
+            AVB22 = ABS( SNR )*ABS1( B2 ) + ABS( CSR )*ABS( B3 )
+*
+*           zero (2,2) elements of U'*A and V'*B, and then swap.
+*
+            IF( ( ABS1( UA21 )+ABS1( UA22 ) ).EQ.ZERO ) THEN
+               CALL CLARTG( -CONJG( VB21 ), CONJG( VB22 ), CSQ, SNQ, R )
+            ELSE IF( ( ABS1( VB21 )+ABS( VB22 ) ).EQ.ZERO ) THEN
+               CALL CLARTG( -CONJG( UA21 ), CONJG( UA22 ), CSQ, SNQ, R )
+            ELSE IF( AUA22 / ( ABS1( UA21 )+ABS1( UA22 ) ).LE.AVB22 /
+     $               ( ABS1( VB21 )+ABS1( VB22 ) ) ) THEN
+               CALL CLARTG( -CONJG( UA21 ), CONJG( UA22 ), CSQ, SNQ, R )
+            ELSE
+               CALL CLARTG( -CONJG( VB21 ), CONJG( VB22 ), CSQ, SNQ, R )
+            END IF
+*
+            CSU = SNL
+            SNU = D1*CSL
+            CSV = SNR
+            SNV = D1*CSR
+*
+         END IF
+*
+      ELSE
+*
+*        Input matrices A and B are lower triangular matrices
+*
+*        Form matrix C = A*adj(B) = ( a 0 )
+*                                   ( c d )
+*
+         A = A1*B3
+         D = A3*B1
+         C = A2*B3 - A3*B2
+         FC = ABS( C )
+*
+*        Transform complex 2-by-2 matrix C to real matrix by unitary
+*        diagonal matrix diag(d1,1).
+*
+         D1 = ONE
+         IF( FC.NE.ZERO )
+     $      D1 = C / FC
+*
+*        The SVD of real 2 by 2 triangular C
+*
+*         ( CSL -SNL )*( A 0 )*(  CSR  SNR ) = ( R 0 )
+*         ( SNL  CSL ) ( C D ) ( -SNR  CSR )   ( 0 T )
+*
+         CALL SLASV2( A, FC, D, S1, S2, SNR, CSR, SNL, CSL )
+*
+         IF( ABS( CSR ).GE.ABS( SNR ) .OR. ABS( CSL ).GE.ABS( SNL ) )
+     $        THEN
+*
+*           Compute the (2,1) and (2,2) elements of U'*A and V'*B,
+*           and (2,1) element of |U|'*|A| and |V|'*|B|.
+*
+            UA21 = -D1*SNR*A1 + CSR*A2
+            UA22R = CSR*A3
+*
+            VB21 = -D1*SNL*B1 + CSL*B2
+            VB22R = CSL*B3
+*
+            AUA21 = ABS( SNR )*ABS( A1 ) + ABS( CSR )*ABS1( A2 )
+            AVB21 = ABS( SNL )*ABS( B1 ) + ABS( CSL )*ABS1( B2 )
+*
+*           zero (2,1) elements of U'*A and V'*B.
+*
+            IF( ( ABS1( UA21 )+ABS( UA22R ) ).EQ.ZERO ) THEN
+               CALL CLARTG( CMPLX( VB22R ), VB21, CSQ, SNQ, R )
+            ELSE IF( ( ABS1( VB21 )+ABS( VB22R ) ).EQ.ZERO ) THEN
+               CALL CLARTG( CMPLX( UA22R ), UA21, CSQ, SNQ, R )
+            ELSE IF( AUA21 / ( ABS1( UA21 )+ABS( UA22R ) ).LE.AVB21 /
+     $               ( ABS1( VB21 )+ABS( VB22R ) ) ) THEN
+               CALL CLARTG( CMPLX( UA22R ), UA21, CSQ, SNQ, R )
+            ELSE
+               CALL CLARTG( CMPLX( VB22R ), VB21, CSQ, SNQ, R )
+            END IF
+*
+            CSU = CSR
+            SNU = -CONJG( D1 )*SNR
+            CSV = CSL
+            SNV = -CONJG( D1 )*SNL
+*
+         ELSE
+*
+*           Compute the (1,1) and (1,2) elements of U'*A and V'*B,
+*           and (1,1) element of |U|'*|A| and |V|'*|B|.
+*
+            UA11 = CSR*A1 + CONJG( D1 )*SNR*A2
+            UA12 = CONJG( D1 )*SNR*A3
+*
+            VB11 = CSL*B1 + CONJG( D1 )*SNL*B2
+            VB12 = CONJG( D1 )*SNL*B3
+*
+            AUA11 = ABS( CSR )*ABS( A1 ) + ABS( SNR )*ABS1( A2 )
+            AVB11 = ABS( CSL )*ABS( B1 ) + ABS( SNL )*ABS1( B2 )
+*
+*           zero (1,1) elements of U'*A and V'*B, and then swap.
+*
+            IF( ( ABS1( UA11 )+ABS1( UA12 ) ).EQ.ZERO ) THEN
+               CALL CLARTG( VB12, VB11, CSQ, SNQ, R )
+            ELSE IF( ( ABS1( VB11 )+ABS1( VB12 ) ).EQ.ZERO ) THEN
+               CALL CLARTG( UA12, UA11, CSQ, SNQ, R )
+            ELSE IF( AUA11 / ( ABS1( UA11 )+ABS1( UA12 ) ).LE.AVB11 /
+     $               ( ABS1( VB11 )+ABS1( VB12 ) ) ) THEN
+               CALL CLARTG( UA12, UA11, CSQ, SNQ, R )
+            ELSE
+               CALL CLARTG( VB12, VB11, CSQ, SNQ, R )
+            END IF
+*
+            CSU = SNR
+            SNU = CONJG( D1 )*CSR
+            CSV = SNL
+            SNV = CONJG( D1 )*CSL
+*
+         END IF
+*
+      END IF
+*
+      RETURN
+*
+*     End of CLAGS2
+*
+      END
diff --git a/costa/native/external/lapack/clagtm.f b/costa/native/external/lapack/clagtm.f
new file mode 100644
index 000000000..7145c0625
--- /dev/null
+++ b/costa/native/external/lapack/clagtm.f
@@ -0,0 +1,234 @@
+      SUBROUTINE CLAGTM( TRANS, N, NRHS, ALPHA, DL, D, DU, X, LDX, BETA,
+     $                   B, LDB )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          TRANS
+      INTEGER            LDB, LDX, N, NRHS
+      REAL               ALPHA, BETA
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            B( LDB, * ), D( * ), DL( * ), DU( * ),
+     $                   X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CLAGTM performs a matrix-vector product of the form
+*
+*     B := alpha * A * X + beta * B
+*
+*  where A is a tridiagonal matrix of order N, B and X are N by NRHS
+*  matrices, and alpha and beta are real scalars, each of which may be
+*  0., 1., or -1.
+*
+*  Arguments
+*  =========
+*
+*  TRANS   (input) CHARACTER
+*          Specifies the operation applied to A.
+*          = 'N':  No transpose, B := alpha * A * X + beta * B
+*          = 'T':  Transpose,    B := alpha * A**T * X + beta * B
+*          = 'C':  Conjugate transpose, B := alpha * A**H * X + beta * B
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrices X and B.
+*
+*  ALPHA   (input) REAL
+*          The scalar alpha.  ALPHA must be 0., 1., or -1.; otherwise,
+*          it is assumed to be 0.
+*
+*  DL      (input) COMPLEX array, dimension (N-1)
+*          The (n-1) sub-diagonal elements of T.
+*
+*  D       (input) COMPLEX array, dimension (N)
+*          The diagonal elements of T.
+*
+*  DU      (input) COMPLEX array, dimension (N-1)
+*          The (n-1) super-diagonal elements of T.
+*
+*  X       (input) COMPLEX array, dimension (LDX,NRHS)
+*          The N by NRHS matrix X.
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(N,1).
+*
+*  BETA    (input) REAL
+*          The scalar beta.  BETA must be 0., 1., or -1.; otherwise,
+*          it is assumed to be 1.
+*
+*  B       (input/output) COMPLEX array, dimension (LDB,NRHS)
+*          On entry, the N by NRHS matrix B.
+*          On exit, B is overwritten by the matrix expression
+*          B := alpha * A * X + beta * B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(N,1).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          CONJG
+*     ..
+*     .. Executable Statements ..
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Multiply B by BETA if BETA.NE.1.
+*
+      IF( BETA.EQ.ZERO ) THEN
+         DO 20 J = 1, NRHS
+            DO 10 I = 1, N
+               B( I, J ) = ZERO
+   10       CONTINUE
+   20    CONTINUE
+      ELSE IF( BETA.EQ.-ONE ) THEN
+         DO 40 J = 1, NRHS
+            DO 30 I = 1, N
+               B( I, J ) = -B( I, J )
+   30       CONTINUE
+   40    CONTINUE
+      END IF
+*
+      IF( ALPHA.EQ.ONE ) THEN
+         IF( LSAME( TRANS, 'N' ) ) THEN
+*
+*           Compute B := B + A*X
+*
+            DO 60 J = 1, NRHS
+               IF( N.EQ.1 ) THEN
+                  B( 1, J ) = B( 1, J ) + D( 1 )*X( 1, J )
+               ELSE
+                  B( 1, J ) = B( 1, J ) + D( 1 )*X( 1, J ) +
+     $                        DU( 1 )*X( 2, J )
+                  B( N, J ) = B( N, J ) + DL( N-1 )*X( N-1, J ) +
+     $                        D( N )*X( N, J )
+                  DO 50 I = 2, N - 1
+                     B( I, J ) = B( I, J ) + DL( I-1 )*X( I-1, J ) +
+     $                           D( I )*X( I, J ) + DU( I )*X( I+1, J )
+   50             CONTINUE
+               END IF
+   60       CONTINUE
+         ELSE IF( LSAME( TRANS, 'T' ) ) THEN
+*
+*           Compute B := B + A**T * X
+*
+            DO 80 J = 1, NRHS
+               IF( N.EQ.1 ) THEN
+                  B( 1, J ) = B( 1, J ) + D( 1 )*X( 1, J )
+               ELSE
+                  B( 1, J ) = B( 1, J ) + D( 1 )*X( 1, J ) +
+     $                        DL( 1 )*X( 2, J )
+                  B( N, J ) = B( N, J ) + DU( N-1 )*X( N-1, J ) +
+     $                        D( N )*X( N, J )
+                  DO 70 I = 2, N - 1
+                     B( I, J ) = B( I, J ) + DU( I-1 )*X( I-1, J ) +
+     $                           D( I )*X( I, J ) + DL( I )*X( I+1, J )
+   70             CONTINUE
+               END IF
+   80       CONTINUE
+         ELSE IF( LSAME( TRANS, 'C' ) ) THEN
+*
+*           Compute B := B + A**H * X
+*
+            DO 100 J = 1, NRHS
+               IF( N.EQ.1 ) THEN
+                  B( 1, J ) = B( 1, J ) + CONJG( D( 1 ) )*X( 1, J )
+               ELSE
+                  B( 1, J ) = B( 1, J ) + CONJG( D( 1 ) )*X( 1, J ) +
+     $                        CONJG( DL( 1 ) )*X( 2, J )
+                  B( N, J ) = B( N, J ) + CONJG( DU( N-1 ) )*
+     $                        X( N-1, J ) + CONJG( D( N ) )*X( N, J )
+                  DO 90 I = 2, N - 1
+                     B( I, J ) = B( I, J ) + CONJG( DU( I-1 ) )*
+     $                           X( I-1, J ) + CONJG( D( I ) )*
+     $                           X( I, J ) + CONJG( DL( I ) )*
+     $                           X( I+1, J )
+   90             CONTINUE
+               END IF
+  100       CONTINUE
+         END IF
+      ELSE IF( ALPHA.EQ.-ONE ) THEN
+         IF( LSAME( TRANS, 'N' ) ) THEN
+*
+*           Compute B := B - A*X
+*
+            DO 120 J = 1, NRHS
+               IF( N.EQ.1 ) THEN
+                  B( 1, J ) = B( 1, J ) - D( 1 )*X( 1, J )
+               ELSE
+                  B( 1, J ) = B( 1, J ) - D( 1 )*X( 1, J ) -
+     $                        DU( 1 )*X( 2, J )
+                  B( N, J ) = B( N, J ) - DL( N-1 )*X( N-1, J ) -
+     $                        D( N )*X( N, J )
+                  DO 110 I = 2, N - 1
+                     B( I, J ) = B( I, J ) - DL( I-1 )*X( I-1, J ) -
+     $                           D( I )*X( I, J ) - DU( I )*X( I+1, J )
+  110             CONTINUE
+               END IF
+  120       CONTINUE
+         ELSE IF( LSAME( TRANS, 'T' ) ) THEN
+*
+*           Compute B := B - A'*X
+*
+            DO 140 J = 1, NRHS
+               IF( N.EQ.1 ) THEN
+                  B( 1, J ) = B( 1, J ) - D( 1 )*X( 1, J )
+               ELSE
+                  B( 1, J ) = B( 1, J ) - D( 1 )*X( 1, J ) -
+     $                        DL( 1 )*X( 2, J )
+                  B( N, J ) = B( N, J ) - DU( N-1 )*X( N-1, J ) -
+     $                        D( N )*X( N, J )
+                  DO 130 I = 2, N - 1
+                     B( I, J ) = B( I, J ) - DU( I-1 )*X( I-1, J ) -
+     $                           D( I )*X( I, J ) - DL( I )*X( I+1, J )
+  130             CONTINUE
+               END IF
+  140       CONTINUE
+         ELSE IF( LSAME( TRANS, 'C' ) ) THEN
+*
+*           Compute B := B - A'*X
+*
+            DO 160 J = 1, NRHS
+               IF( N.EQ.1 ) THEN
+                  B( 1, J ) = B( 1, J ) - CONJG( D( 1 ) )*X( 1, J )
+               ELSE
+                  B( 1, J ) = B( 1, J ) - CONJG( D( 1 ) )*X( 1, J ) -
+     $                        CONJG( DL( 1 ) )*X( 2, J )
+                  B( N, J ) = B( N, J ) - CONJG( DU( N-1 ) )*
+     $                        X( N-1, J ) - CONJG( D( N ) )*X( N, J )
+                  DO 150 I = 2, N - 1
+                     B( I, J ) = B( I, J ) - CONJG( DU( I-1 ) )*
+     $                           X( I-1, J ) - CONJG( D( I ) )*
+     $                           X( I, J ) - CONJG( DL( I ) )*
+     $                           X( I+1, J )
+  150             CONTINUE
+               END IF
+  160       CONTINUE
+         END IF
+      END IF
+      RETURN
+*
+*     End of CLAGTM
+*
+      END
diff --git a/costa/native/external/lapack/clahef.f b/costa/native/external/lapack/clahef.f
new file mode 100644
index 000000000..d5988bd4e
--- /dev/null
+++ b/costa/native/external/lapack/clahef.f
@@ -0,0 +1,648 @@
+      SUBROUTINE CLAHEF( UPLO, N, NB, KB, A, LDA, IPIV, W, LDW, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, KB, LDA, LDW, N, NB
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      COMPLEX            A( LDA, * ), W( LDW, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CLAHEF computes a partial factorization of a complex Hermitian
+*  matrix A using the Bunch-Kaufman diagonal pivoting method. The
+*  partial factorization has the form:
+*
+*  A  =  ( I  U12 ) ( A11  0  ) (  I    0   )  if UPLO = 'U', or:
+*        ( 0  U22 ) (  0   D  ) ( U12' U22' )
+*
+*  A  =  ( L11  0 ) (  D   0  ) ( L11' L21' )  if UPLO = 'L'
+*        ( L21  I ) (  0  A22 ) (  0    I   )
+*
+*  where the order of D is at most NB. The actual order is returned in
+*  the argument KB, and is either NB or NB-1, or N if N <= NB.
+*  Note that U' denotes the conjugate transpose of U.
+*
+*  CLAHEF is an auxiliary routine called by CHETRF. It uses blocked code
+*  (calling Level 3 BLAS) to update the submatrix A11 (if UPLO = 'U') or
+*  A22 (if UPLO = 'L').
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the upper or lower triangular part of the
+*          Hermitian matrix A is stored:
+*          = 'U':  Upper triangular
+*          = 'L':  Lower triangular
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NB      (input) INTEGER
+*          The maximum number of columns of the matrix A that should be
+*          factored.  NB should be at least 2 to allow for 2-by-2 pivot
+*          blocks.
+*
+*  KB      (output) INTEGER
+*          The number of columns of A that were actually factored.
+*          KB is either NB-1 or NB, or N if N <= NB.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,N)
+*          On entry, the Hermitian matrix A.  If UPLO = 'U', the leading
+*          n-by-n upper triangular part of A contains the upper
+*          triangular part of the matrix A, and the strictly lower
+*          triangular part of A is not referenced.  If UPLO = 'L', the
+*          leading n-by-n lower triangular part of A contains the lower
+*          triangular part of the matrix A, and the strictly upper
+*          triangular part of A is not referenced.
+*          On exit, A contains details of the partial factorization.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  IPIV    (output) INTEGER array, dimension (N)
+*          Details of the interchanges and the block structure of D.
+*          If UPLO = 'U', only the last KB elements of IPIV are set;
+*          if UPLO = 'L', only the first KB elements are set.
+*
+*          If IPIV(k) > 0, then rows and columns k and IPIV(k) were
+*          interchanged and D(k,k) is a 1-by-1 diagonal block.
+*          If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and
+*          columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k)
+*          is a 2-by-2 diagonal block.  If UPLO = 'L' and IPIV(k) =
+*          IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were
+*          interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block.
+*
+*  W       (workspace) COMPLEX array, dimension (LDW,NB)
+*
+*  LDW     (input) INTEGER
+*          The leading dimension of the array W.  LDW >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          > 0: if INFO = k, D(k,k) is exactly zero.  The factorization
+*               has been completed, but the block diagonal matrix D is
+*               exactly singular.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+      COMPLEX            CONE
+      PARAMETER          ( CONE = ( 1.0E+0, 0.0E+0 ) )
+      REAL               EIGHT, SEVTEN
+      PARAMETER          ( EIGHT = 8.0E+0, SEVTEN = 17.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            IMAX, J, JB, JJ, JMAX, JP, K, KK, KKW, KP,
+     $                   KSTEP, KW
+      REAL               ABSAKK, ALPHA, COLMAX, R1, ROWMAX, T
+      COMPLEX            D11, D21, D22, Z
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ICAMAX
+      EXTERNAL           LSAME, ICAMAX
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CCOPY, CGEMM, CGEMV, CLACGV, CSSCAL, CSWAP
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, AIMAG, CONJG, MAX, MIN, REAL, SQRT
+*     ..
+*     .. Statement Functions ..
+      REAL               CABS1
+*     ..
+*     .. Statement Function definitions ..
+      CABS1( Z ) = ABS( REAL( Z ) ) + ABS( AIMAG( Z ) )
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+*
+*     Initialize ALPHA for use in choosing pivot block size.
+*
+      ALPHA = ( ONE+SQRT( SEVTEN ) ) / EIGHT
+*
+      IF( LSAME( UPLO, 'U' ) ) THEN
+*
+*        Factorize the trailing columns of A using the upper triangle
+*        of A and working backwards, and compute the matrix W = U12*D
+*        for use in updating A11 (note that conjg(W) is actually stored)
+*
+*        K is the main loop index, decreasing from N in steps of 1 or 2
+*
+*        KW is the column of W which corresponds to column K of A
+*
+         K = N
+   10    CONTINUE
+         KW = NB + K - N
+*
+*        Exit from loop
+*
+         IF( ( K.LE.N-NB+1 .AND. NB.LT.N ) .OR. K.LT.1 )
+     $      GO TO 30
+*
+*        Copy column K of A to column KW of W and update it
+*
+         CALL CCOPY( K-1, A( 1, K ), 1, W( 1, KW ), 1 )
+         W( K, KW ) = REAL( A( K, K ) )
+         IF( K.LT.N ) THEN
+            CALL CGEMV( 'No transpose', K, N-K, -CONE, A( 1, K+1 ), LDA,
+     $                  W( K, KW+1 ), LDW, CONE, W( 1, KW ), 1 )
+            W( K, KW ) = REAL( W( K, KW ) )
+         END IF
+*
+         KSTEP = 1
+*
+*        Determine rows and columns to be interchanged and whether
+*        a 1-by-1 or 2-by-2 pivot block will be used
+*
+         ABSAKK = ABS( REAL( W( K, KW ) ) )
+*
+*        IMAX is the row-index of the largest off-diagonal element in
+*        column K, and COLMAX is its absolute value
+*
+         IF( K.GT.1 ) THEN
+            IMAX = ICAMAX( K-1, W( 1, KW ), 1 )
+            COLMAX = CABS1( W( IMAX, KW ) )
+         ELSE
+            COLMAX = ZERO
+         END IF
+*
+         IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN
+*
+*           Column K is zero: set INFO and continue
+*
+            IF( INFO.EQ.0 )
+     $         INFO = K
+            KP = K
+            A( K, K ) = REAL( A( K, K ) )
+         ELSE
+            IF( ABSAKK.GE.ALPHA*COLMAX ) THEN
+*
+*              no interchange, use 1-by-1 pivot block
+*
+               KP = K
+            ELSE
+*
+*              Copy column IMAX to column KW-1 of W and update it
+*
+               CALL CCOPY( IMAX-1, A( 1, IMAX ), 1, W( 1, KW-1 ), 1 )
+               W( IMAX, KW-1 ) = REAL( A( IMAX, IMAX ) )
+               CALL CCOPY( K-IMAX, A( IMAX, IMAX+1 ), LDA,
+     $                     W( IMAX+1, KW-1 ), 1 )
+               CALL CLACGV( K-IMAX, W( IMAX+1, KW-1 ), 1 )
+               IF( K.LT.N ) THEN
+                  CALL CGEMV( 'No transpose', K, N-K, -CONE,
+     $                        A( 1, K+1 ), LDA, W( IMAX, KW+1 ), LDW,
+     $                        CONE, W( 1, KW-1 ), 1 )
+                  W( IMAX, KW-1 ) = REAL( W( IMAX, KW-1 ) )
+               END IF
+*
+*              JMAX is the column-index of the largest off-diagonal
+*              element in row IMAX, and ROWMAX is its absolute value
+*
+               JMAX = IMAX + ICAMAX( K-IMAX, W( IMAX+1, KW-1 ), 1 )
+               ROWMAX = CABS1( W( JMAX, KW-1 ) )
+               IF( IMAX.GT.1 ) THEN
+                  JMAX = ICAMAX( IMAX-1, W( 1, KW-1 ), 1 )
+                  ROWMAX = MAX( ROWMAX, CABS1( W( JMAX, KW-1 ) ) )
+               END IF
+*
+               IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN
+*
+*                 no interchange, use 1-by-1 pivot block
+*
+                  KP = K
+               ELSE IF( ABS( REAL( W( IMAX, KW-1 ) ) ).GE.ALPHA*ROWMAX )
+     $                   THEN
+*
+*                 interchange rows and columns K and IMAX, use 1-by-1
+*                 pivot block
+*
+                  KP = IMAX
+*
+*                 copy column KW-1 of W to column KW
+*
+                  CALL CCOPY( K, W( 1, KW-1 ), 1, W( 1, KW ), 1 )
+               ELSE
+*
+*                 interchange rows and columns K-1 and IMAX, use 2-by-2
+*                 pivot block
+*
+                  KP = IMAX
+                  KSTEP = 2
+               END IF
+            END IF
+*
+            KK = K - KSTEP + 1
+            KKW = NB + KK - N
+*
+*           Updated column KP is already stored in column KKW of W
+*
+            IF( KP.NE.KK ) THEN
+*
+*              Copy non-updated column KK to column KP
+*
+               A( KP, KP ) = REAL( A( KK, KK ) )
+               CALL CCOPY( KK-1-KP, A( KP+1, KK ), 1, A( KP, KP+1 ),
+     $                     LDA )
+               CALL CLACGV( KK-1-KP, A( KP, KP+1 ), LDA )
+               CALL CCOPY( KP-1, A( 1, KK ), 1, A( 1, KP ), 1 )
+*
+*              Interchange rows KK and KP in last KK columns of A and W
+*
+               IF( KK.LT.N )
+     $            CALL CSWAP( N-KK, A( KK, KK+1 ), LDA, A( KP, KK+1 ),
+     $                        LDA )
+               CALL CSWAP( N-KK+1, W( KK, KKW ), LDW, W( KP, KKW ),
+     $                     LDW )
+            END IF
+*
+            IF( KSTEP.EQ.1 ) THEN
+*
+*              1-by-1 pivot block D(k): column KW of W now holds
+*
+*              W(k) = U(k)*D(k)
+*
+*              where U(k) is the k-th column of U
+*
+*              Store U(k) in column k of A
+*
+               CALL CCOPY( K, W( 1, KW ), 1, A( 1, K ), 1 )
+               R1 = ONE / REAL( A( K, K ) )
+               CALL CSSCAL( K-1, R1, A( 1, K ), 1 )
+*
+*              Conjugate W(k)
+*
+               CALL CLACGV( K-1, W( 1, KW ), 1 )
+            ELSE
+*
+*              2-by-2 pivot block D(k): columns KW and KW-1 of W now
+*              hold
+*
+*              ( W(k-1) W(k) ) = ( U(k-1) U(k) )*D(k)
+*
+*              where U(k) and U(k-1) are the k-th and (k-1)-th columns
+*              of U
+*
+               IF( K.GT.2 ) THEN
+*
+*                 Store U(k) and U(k-1) in columns k and k-1 of A
+*
+                  D21 = W( K-1, KW )
+                  D11 = W( K, KW ) / CONJG( D21 )
+                  D22 = W( K-1, KW-1 ) / D21
+                  T = ONE / ( REAL( D11*D22 )-ONE )
+                  D21 = T / D21
+                  DO 20 J = 1, K - 2
+                     A( J, K-1 ) = D21*( D11*W( J, KW-1 )-W( J, KW ) )
+                     A( J, K ) = CONJG( D21 )*
+     $                           ( D22*W( J, KW )-W( J, KW-1 ) )
+   20             CONTINUE
+               END IF
+*
+*              Copy D(k) to A
+*
+               A( K-1, K-1 ) = W( K-1, KW-1 )
+               A( K-1, K ) = W( K-1, KW )
+               A( K, K ) = W( K, KW )
+*
+*              Conjugate W(k) and W(k-1)
+*
+               CALL CLACGV( K-1, W( 1, KW ), 1 )
+               CALL CLACGV( K-2, W( 1, KW-1 ), 1 )
+            END IF
+         END IF
+*
+*        Store details of the interchanges in IPIV
+*
+         IF( KSTEP.EQ.1 ) THEN
+            IPIV( K ) = KP
+         ELSE
+            IPIV( K ) = -KP
+            IPIV( K-1 ) = -KP
+         END IF
+*
+*        Decrease K and return to the start of the main loop
+*
+         K = K - KSTEP
+         GO TO 10
+*
+   30    CONTINUE
+*
+*        Update the upper triangle of A11 (= A(1:k,1:k)) as
+*
+*        A11 := A11 - U12*D*U12' = A11 - U12*W'
+*
+*        computing blocks of NB columns at a time (note that conjg(W) is
+*        actually stored)
+*
+         DO 50 J = ( ( K-1 ) / NB )*NB + 1, 1, -NB
+            JB = MIN( NB, K-J+1 )
+*
+*           Update the upper triangle of the diagonal block
+*
+            DO 40 JJ = J, J + JB - 1
+               A( JJ, JJ ) = REAL( A( JJ, JJ ) )
+               CALL CGEMV( 'No transpose', JJ-J+1, N-K, -CONE,
+     $                     A( J, K+1 ), LDA, W( JJ, KW+1 ), LDW, CONE,
+     $                     A( J, JJ ), 1 )
+               A( JJ, JJ ) = REAL( A( JJ, JJ ) )
+   40       CONTINUE
+*
+*           Update the rectangular superdiagonal block
+*
+            CALL CGEMM( 'No transpose', 'Transpose', J-1, JB, N-K,
+     $                  -CONE, A( 1, K+1 ), LDA, W( J, KW+1 ), LDW,
+     $                  CONE, A( 1, J ), LDA )
+   50    CONTINUE
+*
+*        Put U12 in standard form by partially undoing the interchanges
+*        in columns k+1:n
+*
+         J = K + 1
+   60    CONTINUE
+         JJ = J
+         JP = IPIV( J )
+         IF( JP.LT.0 ) THEN
+            JP = -JP
+            J = J + 1
+         END IF
+         J = J + 1
+         IF( JP.NE.JJ .AND. J.LE.N )
+     $      CALL CSWAP( N-J+1, A( JP, J ), LDA, A( JJ, J ), LDA )
+         IF( J.LE.N )
+     $      GO TO 60
+*
+*        Set KB to the number of columns factorized
+*
+         KB = N - K
+*
+      ELSE
+*
+*        Factorize the leading columns of A using the lower triangle
+*        of A and working forwards, and compute the matrix W = L21*D
+*        for use in updating A22 (note that conjg(W) is actually stored)
+*
+*        K is the main loop index, increasing from 1 in steps of 1 or 2
+*
+         K = 1
+   70    CONTINUE
+*
+*        Exit from loop
+*
+         IF( ( K.GE.NB .AND. NB.LT.N ) .OR. K.GT.N )
+     $      GO TO 90
+*
+*        Copy column K of A to column K of W and update it
+*
+         W( K, K ) = REAL( A( K, K ) )
+         IF( K.LT.N )
+     $      CALL CCOPY( N-K, A( K+1, K ), 1, W( K+1, K ), 1 )
+         CALL CGEMV( 'No transpose', N-K+1, K-1, -CONE, A( K, 1 ), LDA,
+     $               W( K, 1 ), LDW, CONE, W( K, K ), 1 )
+         W( K, K ) = REAL( W( K, K ) )
+*
+         KSTEP = 1
+*
+*        Determine rows and columns to be interchanged and whether
+*        a 1-by-1 or 2-by-2 pivot block will be used
+*
+         ABSAKK = ABS( REAL( W( K, K ) ) )
+*
+*        IMAX is the row-index of the largest off-diagonal element in
+*        column K, and COLMAX is its absolute value
+*
+         IF( K.LT.N ) THEN
+            IMAX = K + ICAMAX( N-K, W( K+1, K ), 1 )
+            COLMAX = CABS1( W( IMAX, K ) )
+         ELSE
+            COLMAX = ZERO
+         END IF
+*
+         IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN
+*
+*           Column K is zero: set INFO and continue
+*
+            IF( INFO.EQ.0 )
+     $         INFO = K
+            KP = K
+            A( K, K ) = REAL( A( K, K ) )
+         ELSE
+            IF( ABSAKK.GE.ALPHA*COLMAX ) THEN
+*
+*              no interchange, use 1-by-1 pivot block
+*
+               KP = K
+            ELSE
+*
+*              Copy column IMAX to column K+1 of W and update it
+*
+               CALL CCOPY( IMAX-K, A( IMAX, K ), LDA, W( K, K+1 ), 1 )
+               CALL CLACGV( IMAX-K, W( K, K+1 ), 1 )
+               W( IMAX, K+1 ) = REAL( A( IMAX, IMAX ) )
+               IF( IMAX.LT.N )
+     $            CALL CCOPY( N-IMAX, A( IMAX+1, IMAX ), 1,
+     $                        W( IMAX+1, K+1 ), 1 )
+               CALL CGEMV( 'No transpose', N-K+1, K-1, -CONE, A( K, 1 ),
+     $                     LDA, W( IMAX, 1 ), LDW, CONE, W( K, K+1 ),
+     $                     1 )
+               W( IMAX, K+1 ) = REAL( W( IMAX, K+1 ) )
+*
+*              JMAX is the column-index of the largest off-diagonal
+*              element in row IMAX, and ROWMAX is its absolute value
+*
+               JMAX = K - 1 + ICAMAX( IMAX-K, W( K, K+1 ), 1 )
+               ROWMAX = CABS1( W( JMAX, K+1 ) )
+               IF( IMAX.LT.N ) THEN
+                  JMAX = IMAX + ICAMAX( N-IMAX, W( IMAX+1, K+1 ), 1 )
+                  ROWMAX = MAX( ROWMAX, CABS1( W( JMAX, K+1 ) ) )
+               END IF
+*
+               IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN
+*
+*                 no interchange, use 1-by-1 pivot block
+*
+                  KP = K
+               ELSE IF( ABS( REAL( W( IMAX, K+1 ) ) ).GE.ALPHA*ROWMAX )
+     $                   THEN
+*
+*                 interchange rows and columns K and IMAX, use 1-by-1
+*                 pivot block
+*
+                  KP = IMAX
+*
+*                 copy column K+1 of W to column K
+*
+                  CALL CCOPY( N-K+1, W( K, K+1 ), 1, W( K, K ), 1 )
+               ELSE
+*
+*                 interchange rows and columns K+1 and IMAX, use 2-by-2
+*                 pivot block
+*
+                  KP = IMAX
+                  KSTEP = 2
+               END IF
+            END IF
+*
+            KK = K + KSTEP - 1
+*
+*           Updated column KP is already stored in column KK of W
+*
+            IF( KP.NE.KK ) THEN
+*
+*              Copy non-updated column KK to column KP
+*
+               A( KP, KP ) = REAL( A( KK, KK ) )
+               CALL CCOPY( KP-KK-1, A( KK+1, KK ), 1, A( KP, KK+1 ),
+     $                     LDA )
+               CALL CLACGV( KP-KK-1, A( KP, KK+1 ), LDA )
+               IF( KP.LT.N )
+     $            CALL CCOPY( N-KP, A( KP+1, KK ), 1, A( KP+1, KP ), 1 )
+*
+*              Interchange rows KK and KP in first KK columns of A and W
+*
+               CALL CSWAP( KK-1, A( KK, 1 ), LDA, A( KP, 1 ), LDA )
+               CALL CSWAP( KK, W( KK, 1 ), LDW, W( KP, 1 ), LDW )
+            END IF
+*
+            IF( KSTEP.EQ.1 ) THEN
+*
+*              1-by-1 pivot block D(k): column k of W now holds
+*
+*              W(k) = L(k)*D(k)
+*
+*              where L(k) is the k-th column of L
+*
+*              Store L(k) in column k of A
+*
+               CALL CCOPY( N-K+1, W( K, K ), 1, A( K, K ), 1 )
+               IF( K.LT.N ) THEN
+                  R1 = ONE / REAL( A( K, K ) )
+                  CALL CSSCAL( N-K, R1, A( K+1, K ), 1 )
+*
+*                 Conjugate W(k)
+*
+                  CALL CLACGV( N-K, W( K+1, K ), 1 )
+               END IF
+            ELSE
+*
+*              2-by-2 pivot block D(k): columns k and k+1 of W now hold
+*
+*              ( W(k) W(k+1) ) = ( L(k) L(k+1) )*D(k)
+*
+*              where L(k) and L(k+1) are the k-th and (k+1)-th columns
+*              of L
+*
+               IF( K.LT.N-1 ) THEN
+*
+*                 Store L(k) and L(k+1) in columns k and k+1 of A
+*
+                  D21 = W( K+1, K )
+                  D11 = W( K+1, K+1 ) / D21
+                  D22 = W( K, K ) / CONJG( D21 )
+                  T = ONE / ( REAL( D11*D22 )-ONE )
+                  D21 = T / D21
+                  DO 80 J = K + 2, N
+                     A( J, K ) = CONJG( D21 )*
+     $                           ( D11*W( J, K )-W( J, K+1 ) )
+                     A( J, K+1 ) = D21*( D22*W( J, K+1 )-W( J, K ) )
+   80             CONTINUE
+               END IF
+*
+*              Copy D(k) to A
+*
+               A( K, K ) = W( K, K )
+               A( K+1, K ) = W( K+1, K )
+               A( K+1, K+1 ) = W( K+1, K+1 )
+*
+*              Conjugate W(k) and W(k+1)
+*
+               CALL CLACGV( N-K, W( K+1, K ), 1 )
+               CALL CLACGV( N-K-1, W( K+2, K+1 ), 1 )
+            END IF
+         END IF
+*
+*        Store details of the interchanges in IPIV
+*
+         IF( KSTEP.EQ.1 ) THEN
+            IPIV( K ) = KP
+         ELSE
+            IPIV( K ) = -KP
+            IPIV( K+1 ) = -KP
+         END IF
+*
+*        Increase K and return to the start of the main loop
+*
+         K = K + KSTEP
+         GO TO 70
+*
+   90    CONTINUE
+*
+*        Update the lower triangle of A22 (= A(k:n,k:n)) as
+*
+*        A22 := A22 - L21*D*L21' = A22 - L21*W'
+*
+*        computing blocks of NB columns at a time (note that conjg(W) is
+*        actually stored)
+*
+         DO 110 J = K, N, NB
+            JB = MIN( NB, N-J+1 )
+*
+*           Update the lower triangle of the diagonal block
+*
+            DO 100 JJ = J, J + JB - 1
+               A( JJ, JJ ) = REAL( A( JJ, JJ ) )
+               CALL CGEMV( 'No transpose', J+JB-JJ, K-1, -CONE,
+     $                     A( JJ, 1 ), LDA, W( JJ, 1 ), LDW, CONE,
+     $                     A( JJ, JJ ), 1 )
+               A( JJ, JJ ) = REAL( A( JJ, JJ ) )
+  100       CONTINUE
+*
+*           Update the rectangular subdiagonal block
+*
+            IF( J+JB.LE.N )
+     $         CALL CGEMM( 'No transpose', 'Transpose', N-J-JB+1, JB,
+     $                     K-1, -CONE, A( J+JB, 1 ), LDA, W( J, 1 ),
+     $                     LDW, CONE, A( J+JB, J ), LDA )
+  110    CONTINUE
+*
+*        Put L21 in standard form by partially undoing the interchanges
+*        in columns 1:k-1
+*
+         J = K - 1
+  120    CONTINUE
+         JJ = J
+         JP = IPIV( J )
+         IF( JP.LT.0 ) THEN
+            JP = -JP
+            J = J - 1
+         END IF
+         J = J - 1
+         IF( JP.NE.JJ .AND. J.GE.1 )
+     $      CALL CSWAP( J, A( JP, 1 ), LDA, A( JJ, 1 ), LDA )
+         IF( J.GE.1 )
+     $      GO TO 120
+*
+*        Set KB to the number of columns factorized
+*
+         KB = K - 1
+*
+      END IF
+      RETURN
+*
+*     End of CLAHEF
+*
+      END
diff --git a/costa/native/external/lapack/clahqr.f b/costa/native/external/lapack/clahqr.f
new file mode 100644
index 000000000..d21dec643
--- /dev/null
+++ b/costa/native/external/lapack/clahqr.f
@@ -0,0 +1,383 @@
+      SUBROUTINE CLAHQR( WANTT, WANTZ, N, ILO, IHI, H, LDH, W, ILOZ,
+     $                   IHIZ, Z, LDZ, INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      LOGICAL            WANTT, WANTZ
+      INTEGER            IHI, IHIZ, ILO, ILOZ, INFO, LDH, LDZ, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            H( LDH, * ), W( * ), Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CLAHQR is an auxiliary routine called by CHSEQR to update the
+*  eigenvalues and Schur decomposition already computed by CHSEQR, by
+*  dealing with the Hessenberg submatrix in rows and columns ILO to IHI.
+*
+*  Arguments
+*  =========
+*
+*  WANTT   (input) LOGICAL
+*          = .TRUE. : the full Schur form T is required;
+*          = .FALSE.: only eigenvalues are required.
+*
+*  WANTZ   (input) LOGICAL
+*          = .TRUE. : the matrix of Schur vectors Z is required;
+*          = .FALSE.: Schur vectors are not required.
+*
+*  N       (input) INTEGER
+*          The order of the matrix H.  N >= 0.
+*
+*  ILO     (input) INTEGER
+*  IHI     (input) INTEGER
+*          It is assumed that H is already upper triangular in rows and
+*          columns IHI+1:N, and that H(ILO,ILO-1) = 0 (unless ILO = 1).
+*          CLAHQR works primarily with the Hessenberg submatrix in rows
+*          and columns ILO to IHI, but applies transformations to all of
+*          H if WANTT is .TRUE..
+*          1 <= ILO <= max(1,IHI); IHI <= N.
+*
+*  H       (input/output) COMPLEX array, dimension (LDH,N)
+*          On entry, the upper Hessenberg matrix H.
+*          On exit, if WANTT is .TRUE., H is upper triangular in rows
+*          and columns ILO:IHI, with any 2-by-2 diagonal blocks in
+*          standard form. If WANTT is .FALSE., the contents of H are
+*          unspecified on exit.
+*
+*  LDH     (input) INTEGER
+*          The leading dimension of the array H. LDH >= max(1,N).
+*
+*  W       (output) COMPLEX array, dimension (N)
+*          The computed eigenvalues ILO to IHI are stored in the
+*          corresponding elements of W. If WANTT is .TRUE., the
+*          eigenvalues are stored in the same order as on the diagonal
+*          of the Schur form returned in H, with W(i) = H(i,i).
+*
+*  ILOZ    (input) INTEGER
+*  IHIZ    (input) INTEGER
+*          Specify the rows of Z to which transformations must be
+*          applied if WANTZ is .TRUE..
+*          1 <= ILOZ <= ILO; IHI <= IHIZ <= N.
+*
+*  Z       (input/output) COMPLEX array, dimension (LDZ,N)
+*          If WANTZ is .TRUE., on entry Z must contain the current
+*          matrix Z of transformations accumulated by CHSEQR, and on
+*          exit Z has been updated; transformations are applied only to
+*          the submatrix Z(ILOZ:IHIZ,ILO:IHI).
+*          If WANTZ is .FALSE., Z is not referenced.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z. LDZ >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          > 0: if INFO = i, CLAHQR failed to compute all the
+*               eigenvalues ILO to IHI in a total of 30*(IHI-ILO+1)
+*               iterations; elements i+1:ihi of W contain those
+*               eigenvalues which have been successfully computed.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX            ZERO, ONE
+      PARAMETER          ( ZERO = ( 0.0E+0, 0.0E+0 ),
+     $                   ONE = ( 1.0E+0, 0.0E+0 ) )
+      REAL               RZERO, HALF
+      PARAMETER          ( RZERO = 0.0E+0, HALF = 0.5E+0 )
+      REAL               DAT1
+      PARAMETER          ( DAT1 = 0.75E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, I1, I2, ITN, ITS, J, K, L, M, NH, NZ
+      REAL               H10, H21, RTEMP, S, SMLNUM, T2, TST1, ULP
+      COMPLEX            CDUM, H11, H11S, H22, SUM, T, T1, TEMP, U, V2,
+     $                   X, Y
+*     ..
+*     .. Local Arrays ..
+      REAL               RWORK( 1 )
+      COMPLEX            V( 2 )
+*     ..
+*     .. External Functions ..
+      REAL               CLANHS, SLAMCH
+      COMPLEX            CLADIV
+      EXTERNAL           CLANHS, SLAMCH, CLADIV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CCOPY, CLARFG, CSCAL
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, AIMAG, CONJG, MAX, MIN, REAL, SQRT
+*     ..
+*     .. Statement Functions ..
+      REAL               CABS1
+*     ..
+*     .. Statement Function definitions ..
+      CABS1( CDUM ) = ABS( REAL( CDUM ) ) + ABS( AIMAG( CDUM ) )
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+      IF( ILO.EQ.IHI ) THEN
+         W( ILO ) = H( ILO, ILO )
+         RETURN
+      END IF
+*
+      NH = IHI - ILO + 1
+      NZ = IHIZ - ILOZ + 1
+*
+*     Set machine-dependent constants for the stopping criterion.
+*     If norm(H) <= sqrt(OVFL), overflow should not occur.
+*
+      ULP = SLAMCH( 'Precision' )
+      SMLNUM = SLAMCH( 'Safe minimum' ) / ULP
+*
+*     I1 and I2 are the indices of the first row and last column of H
+*     to which transformations must be applied. If eigenvalues only are
+*     being computed, I1 and I2 are set inside the main loop.
+*
+      IF( WANTT ) THEN
+         I1 = 1
+         I2 = N
+      END IF
+*
+*     ITN is the total number of QR iterations allowed.
+*
+      ITN = 30*NH
+*
+*     The main loop begins here. I is the loop index and decreases from
+*     IHI to ILO in steps of 1. Each iteration of the loop works
+*     with the active submatrix in rows and columns L to I.
+*     Eigenvalues I+1 to IHI have already converged. Either L = ILO, or
+*     H(L,L-1) is negligible so that the matrix splits.
+*
+      I = IHI
+   10 CONTINUE
+      IF( I.LT.ILO )
+     $   GO TO 130
+*
+*     Perform QR iterations on rows and columns ILO to I until a
+*     submatrix of order 1 splits off at the bottom because a
+*     subdiagonal element has become negligible.
+*
+      L = ILO
+      DO 110 ITS = 0, ITN
+*
+*        Look for a single small subdiagonal element.
+*
+         DO 20 K = I, L + 1, -1
+            TST1 = CABS1( H( K-1, K-1 ) ) + CABS1( H( K, K ) )
+            IF( TST1.EQ.RZERO )
+     $         TST1 = CLANHS( '1', I-L+1, H( L, L ), LDH, RWORK )
+            IF( ABS( REAL( H( K, K-1 ) ) ).LE.MAX( ULP*TST1, SMLNUM ) )
+     $         GO TO 30
+   20    CONTINUE
+   30    CONTINUE
+         L = K
+         IF( L.GT.ILO ) THEN
+*
+*           H(L,L-1) is negligible
+*
+            H( L, L-1 ) = ZERO
+         END IF
+*
+*        Exit from loop if a submatrix of order 1 has split off.
+*
+         IF( L.GE.I )
+     $      GO TO 120
+*
+*        Now the active submatrix is in rows and columns L to I. If
+*        eigenvalues only are being computed, only the active submatrix
+*        need be transformed.
+*
+         IF( .NOT.WANTT ) THEN
+            I1 = L
+            I2 = I
+         END IF
+*
+         IF( ITS.EQ.10 .OR. ITS.EQ.20 ) THEN
+*
+*           Exceptional shift.
+*
+            S = DAT1*ABS( REAL( H( I, I-1 ) ) )
+            T = S + H( I, I ) 
+         ELSE
+*
+*           Wilkinson's shift.
+*
+            T = H( I, I )
+            U = H( I-1, I )*REAL( H( I, I-1 ) )
+            IF( U.NE.ZERO ) THEN
+               X = HALF*( H( I-1, I-1 )-T )
+               Y = SQRT( X*X+U )
+               IF( REAL( X )*REAL( Y )+AIMAG( X )*AIMAG( Y ).LT.RZERO )
+     $            Y = -Y
+               T = T - CLADIV( U, ( X+Y ) )
+            END IF
+         END IF
+*
+*        Look for two consecutive small subdiagonal elements.
+*
+         DO 40 M = I - 1, L + 1, -1
+*
+*           Determine the effect of starting the single-shift QR
+*           iteration at row M, and see if this would make H(M,M-1)
+*           negligible.
+*
+            H11 = H( M, M )
+            H22 = H( M+1, M+1 )
+            H11S = H11 - T
+            H21 = H( M+1, M )
+            S = CABS1( H11S ) + ABS( H21 )
+            H11S = H11S / S
+            H21 = H21 / S
+            V( 1 ) = H11S
+            V( 2 ) = H21
+            H10 = H( M, M-1 )
+            TST1 = CABS1( H11S )*( CABS1( H11 )+CABS1( H22 ) )
+            IF( ABS( H10*H21 ).LE.ULP*TST1 )
+     $         GO TO 50
+   40    CONTINUE
+         H11 = H( L, L )
+         H22 = H( L+1, L+1 )
+         H11S = H11 - T
+         H21 = H( L+1, L )
+         S = CABS1( H11S ) + ABS( H21 )
+         H11S = H11S / S
+         H21 = H21 / S
+         V( 1 ) = H11S
+         V( 2 ) = H21
+   50    CONTINUE
+*
+*        Single-shift QR step
+*
+         DO 100 K = M, I - 1
+*
+*           The first iteration of this loop determines a reflection G
+*           from the vector V and applies it from left and right to H,
+*           thus creating a nonzero bulge below the subdiagonal.
+*
+*           Each subsequent iteration determines a reflection G to
+*           restore the Hessenberg form in the (K-1)th column, and thus
+*           chases the bulge one step toward the bottom of the active
+*           submatrix.
+*
+*           V(2) is always real before the call to CLARFG, and hence
+*           after the call T2 ( = T1*V(2) ) is also real.
+*
+            IF( K.GT.M )
+     $         CALL CCOPY( 2, H( K, K-1 ), 1, V, 1 )
+            CALL CLARFG( 2, V( 1 ), V( 2 ), 1, T1 )
+            IF( K.GT.M ) THEN
+               H( K, K-1 ) = V( 1 )
+               H( K+1, K-1 ) = ZERO
+            END IF
+            V2 = V( 2 )
+            T2 = REAL( T1*V2 )
+*
+*           Apply G from the left to transform the rows of the matrix
+*           in columns K to I2.
+*
+            DO 60 J = K, I2
+               SUM = CONJG( T1 )*H( K, J ) + T2*H( K+1, J )
+               H( K, J ) = H( K, J ) - SUM
+               H( K+1, J ) = H( K+1, J ) - SUM*V2
+   60       CONTINUE
+*
+*           Apply G from the right to transform the columns of the
+*           matrix in rows I1 to min(K+2,I).
+*
+            DO 70 J = I1, MIN( K+2, I )
+               SUM = T1*H( J, K ) + T2*H( J, K+1 )
+               H( J, K ) = H( J, K ) - SUM
+               H( J, K+1 ) = H( J, K+1 ) - SUM*CONJG( V2 )
+   70       CONTINUE
+*
+            IF( WANTZ ) THEN
+*
+*              Accumulate transformations in the matrix Z
+*
+               DO 80 J = ILOZ, IHIZ
+                  SUM = T1*Z( J, K ) + T2*Z( J, K+1 )
+                  Z( J, K ) = Z( J, K ) - SUM
+                  Z( J, K+1 ) = Z( J, K+1 ) - SUM*CONJG( V2 )
+   80          CONTINUE
+            END IF
+*
+            IF( K.EQ.M .AND. M.GT.L ) THEN
+*
+*              If the QR step was started at row M > L because two
+*              consecutive small subdiagonals were found, then extra
+*              scaling must be performed to ensure that H(M,M-1) remains
+*              real.
+*
+               TEMP = ONE - T1
+               TEMP = TEMP / ABS( TEMP )
+               H( M+1, M ) = H( M+1, M )*CONJG( TEMP )
+               IF( M+2.LE.I )
+     $            H( M+2, M+1 ) = H( M+2, M+1 )*TEMP
+               DO 90 J = M, I
+                  IF( J.NE.M+1 ) THEN
+                     IF( I2.GT.J )
+     $                  CALL CSCAL( I2-J, TEMP, H( J, J+1 ), LDH )
+                     CALL CSCAL( J-I1, CONJG( TEMP ), H( I1, J ), 1 )
+                     IF( WANTZ ) THEN
+                        CALL CSCAL( NZ, CONJG( TEMP ), Z( ILOZ, J ), 1 )
+                     END IF
+                  END IF
+   90          CONTINUE
+            END IF
+  100    CONTINUE
+*
+*        Ensure that H(I,I-1) is real.
+*
+         TEMP = H( I, I-1 )
+         IF( AIMAG( TEMP ).NE.RZERO ) THEN
+            RTEMP = ABS( TEMP )
+            H( I, I-1 ) = RTEMP
+            TEMP = TEMP / RTEMP
+            IF( I2.GT.I )
+     $         CALL CSCAL( I2-I, CONJG( TEMP ), H( I, I+1 ), LDH )
+            CALL CSCAL( I-I1, TEMP, H( I1, I ), 1 )
+            IF( WANTZ ) THEN
+               CALL CSCAL( NZ, TEMP, Z( ILOZ, I ), 1 )
+            END IF
+         END IF
+*
+  110 CONTINUE
+*
+*     Failure to converge in remaining number of iterations
+*
+      INFO = I
+      RETURN
+*
+  120 CONTINUE
+*
+*     H(I,I-1) is negligible: one eigenvalue has converged.
+*
+      W( I ) = H( I, I )
+*
+*     Decrement number of remaining iterations, and return to start of
+*     the main loop with new value of I.
+*
+      ITN = ITN - ITS
+      I = L - 1
+      GO TO 10
+*
+  130 CONTINUE
+      RETURN
+*
+*     End of CLAHQR
+*
+      END
diff --git a/costa/native/external/lapack/clahrd.f b/costa/native/external/lapack/clahrd.f
new file mode 100644
index 000000000..2d21b6968
--- /dev/null
+++ b/costa/native/external/lapack/clahrd.f
@@ -0,0 +1,212 @@
+      SUBROUTINE CLAHRD( N, K, NB, A, LDA, TAU, T, LDT, Y, LDY )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            K, LDA, LDT, LDY, N, NB
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), T( LDT, NB ), TAU( NB ),
+     $                   Y( LDY, NB )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CLAHRD reduces the first NB columns of a complex general n-by-(n-k+1)
+*  matrix A so that elements below the k-th subdiagonal are zero. The
+*  reduction is performed by a unitary similarity transformation
+*  Q' * A * Q. The routine returns the matrices V and T which determine
+*  Q as a block reflector I - V*T*V', and also the matrix Y = A * V * T.
+*
+*  This is an auxiliary routine called by CGEHRD.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.
+*
+*  K       (input) INTEGER
+*          The offset for the reduction. Elements below the k-th
+*          subdiagonal in the first NB columns are reduced to zero.
+*
+*  NB      (input) INTEGER
+*          The number of columns to be reduced.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,N-K+1)
+*          On entry, the n-by-(n-k+1) general matrix A.
+*          On exit, the elements on and above the k-th subdiagonal in
+*          the first NB columns are overwritten with the corresponding
+*          elements of the reduced matrix; the elements below the k-th
+*          subdiagonal, with the array TAU, represent the matrix Q as a
+*          product of elementary reflectors. The other columns of A are
+*          unchanged. See Further Details.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  TAU     (output) COMPLEX array, dimension (NB)
+*          The scalar factors of the elementary reflectors. See Further
+*          Details.
+*
+*  T       (output) COMPLEX array, dimension (LDT,NB)
+*          The upper triangular matrix T.
+*
+*  LDT     (input) INTEGER
+*          The leading dimension of the array T.  LDT >= NB.
+*
+*  Y       (output) COMPLEX array, dimension (LDY,NB)
+*          The n-by-nb matrix Y.
+*
+*  LDY     (input) INTEGER
+*          The leading dimension of the array Y. LDY >= max(1,N).
+*
+*  Further Details
+*  ===============
+*
+*  The matrix Q is represented as a product of nb elementary reflectors
+*
+*     Q = H(1) H(2) . . . H(nb).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - tau * v * v'
+*
+*  where tau is a complex scalar, and v is a complex vector with
+*  v(1:i+k-1) = 0, v(i+k) = 1; v(i+k+1:n) is stored on exit in
+*  A(i+k+1:n,i), and tau in TAU(i).
+*
+*  The elements of the vectors v together form the (n-k+1)-by-nb matrix
+*  V which is needed, with T and Y, to apply the transformation to the
+*  unreduced part of the matrix, using an update of the form:
+*  A := (I - V*T*V') * (A - Y*V').
+*
+*  The contents of A on exit are illustrated by the following example
+*  with n = 7, k = 3 and nb = 2:
+*
+*     ( a   h   a   a   a )
+*     ( a   h   a   a   a )
+*     ( a   h   a   a   a )
+*     ( h   h   a   a   a )
+*     ( v1  h   a   a   a )
+*     ( v1  v2  a   a   a )
+*     ( v1  v2  a   a   a )
+*
+*  where a denotes an element of the original matrix A, h denotes a
+*  modified element of the upper Hessenberg matrix H, and vi denotes an
+*  element of the vector defining H(i).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX            ZERO, ONE
+      PARAMETER          ( ZERO = ( 0.0E+0, 0.0E+0 ),
+     $                   ONE = ( 1.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I
+      COMPLEX            EI
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CAXPY, CCOPY, CGEMV, CLACGV, CLARFG, CSCAL,
+     $                   CTRMV
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Quick return if possible
+*
+      IF( N.LE.1 )
+     $   RETURN
+*
+      DO 10 I = 1, NB
+         IF( I.GT.1 ) THEN
+*
+*           Update A(1:n,i)
+*
+*           Compute i-th column of A - Y * V'
+*
+            CALL CLACGV( I-1, A( K+I-1, 1 ), LDA )
+            CALL CGEMV( 'No transpose', N, I-1, -ONE, Y, LDY,
+     $                  A( K+I-1, 1 ), LDA, ONE, A( 1, I ), 1 )
+            CALL CLACGV( I-1, A( K+I-1, 1 ), LDA )
+*
+*           Apply I - V * T' * V' to this column (call it b) from the
+*           left, using the last column of T as workspace
+*
+*           Let  V = ( V1 )   and   b = ( b1 )   (first I-1 rows)
+*                    ( V2 )             ( b2 )
+*
+*           where V1 is unit lower triangular
+*
+*           w := V1' * b1
+*
+            CALL CCOPY( I-1, A( K+1, I ), 1, T( 1, NB ), 1 )
+            CALL CTRMV( 'Lower', 'Conjugate transpose', 'Unit', I-1,
+     $                  A( K+1, 1 ), LDA, T( 1, NB ), 1 )
+*
+*           w := w + V2'*b2
+*
+            CALL CGEMV( 'Conjugate transpose', N-K-I+1, I-1, ONE,
+     $                  A( K+I, 1 ), LDA, A( K+I, I ), 1, ONE,
+     $                  T( 1, NB ), 1 )
+*
+*           w := T'*w
+*
+            CALL CTRMV( 'Upper', 'Conjugate transpose', 'Non-unit', I-1,
+     $                  T, LDT, T( 1, NB ), 1 )
+*
+*           b2 := b2 - V2*w
+*
+            CALL CGEMV( 'No transpose', N-K-I+1, I-1, -ONE, A( K+I, 1 ),
+     $                  LDA, T( 1, NB ), 1, ONE, A( K+I, I ), 1 )
+*
+*           b1 := b1 - V1*w
+*
+            CALL CTRMV( 'Lower', 'No transpose', 'Unit', I-1,
+     $                  A( K+1, 1 ), LDA, T( 1, NB ), 1 )
+            CALL CAXPY( I-1, -ONE, T( 1, NB ), 1, A( K+1, I ), 1 )
+*
+            A( K+I-1, I-1 ) = EI
+         END IF
+*
+*        Generate the elementary reflector H(i) to annihilate
+*        A(k+i+1:n,i)
+*
+         EI = A( K+I, I )
+         CALL CLARFG( N-K-I+1, EI, A( MIN( K+I+1, N ), I ), 1,
+     $                TAU( I ) )
+         A( K+I, I ) = ONE
+*
+*        Compute  Y(1:n,i)
+*
+         CALL CGEMV( 'No transpose', N, N-K-I+1, ONE, A( 1, I+1 ), LDA,
+     $               A( K+I, I ), 1, ZERO, Y( 1, I ), 1 )
+         CALL CGEMV( 'Conjugate transpose', N-K-I+1, I-1, ONE,
+     $               A( K+I, 1 ), LDA, A( K+I, I ), 1, ZERO, T( 1, I ),
+     $               1 )
+         CALL CGEMV( 'No transpose', N, I-1, -ONE, Y, LDY, T( 1, I ), 1,
+     $               ONE, Y( 1, I ), 1 )
+         CALL CSCAL( N, TAU( I ), Y( 1, I ), 1 )
+*
+*        Compute T(1:i,i)
+*
+         CALL CSCAL( I-1, -TAU( I ), T( 1, I ), 1 )
+         CALL CTRMV( 'Upper', 'No transpose', 'Non-unit', I-1, T, LDT,
+     $               T( 1, I ), 1 )
+         T( I, I ) = TAU( I )
+*
+   10 CONTINUE
+      A( K+NB, NB ) = EI
+*
+      RETURN
+*
+*     End of CLAHRD
+*
+      END
diff --git a/costa/native/external/lapack/claic1.f b/costa/native/external/lapack/claic1.f
new file mode 100644
index 000000000..2dc74add6
--- /dev/null
+++ b/costa/native/external/lapack/claic1.f
@@ -0,0 +1,296 @@
+      SUBROUTINE CLAIC1( JOB, J, X, SEST, W, GAMMA, SESTPR, S, C )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      INTEGER            J, JOB
+      REAL               SEST, SESTPR
+      COMPLEX            C, GAMMA, S
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            W( J ), X( J )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CLAIC1 applies one step of incremental condition estimation in
+*  its simplest version:
+*
+*  Let x, twonorm(x) = 1, be an approximate singular vector of an j-by-j
+*  lower triangular matrix L, such that
+*           twonorm(L*x) = sest
+*  Then CLAIC1 computes sestpr, s, c such that
+*  the vector
+*                  [ s*x ]
+*           xhat = [  c  ]
+*  is an approximate singular vector of
+*                  [ L     0  ]
+*           Lhat = [ w' gamma ]
+*  in the sense that
+*           twonorm(Lhat*xhat) = sestpr.
+*
+*  Depending on JOB, an estimate for the largest or smallest singular
+*  value is computed.
+*
+*  Note that [s c]' and sestpr**2 is an eigenpair of the system
+*
+*      diag(sest*sest, 0) + [alpha  gamma] * [ conjg(alpha) ]
+*                                            [ conjg(gamma) ]
+*
+*  where  alpha =  conjg(x)'*w.
+*
+*  Arguments
+*  =========
+*
+*  JOB     (input) INTEGER
+*          = 1: an estimate for the largest singular value is computed.
+*          = 2: an estimate for the smallest singular value is computed.
+*
+*  J       (input) INTEGER
+*          Length of X and W
+*
+*  X       (input) COMPLEX array, dimension (J)
+*          The j-vector x.
+*
+*  SEST    (input) REAL
+*          Estimated singular value of j by j matrix L
+*
+*  W       (input) COMPLEX array, dimension (J)
+*          The j-vector w.
+*
+*  GAMMA   (input) COMPLEX
+*          The diagonal element gamma.
+*
+*  SESTPR  (output) REAL
+*          Estimated singular value of (j+1) by (j+1) matrix Lhat.
+*
+*  S       (output) COMPLEX
+*          Sine needed in forming xhat.
+*
+*  C       (output) COMPLEX
+*          Cosine needed in forming xhat.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE, TWO
+      PARAMETER          ( ZERO = 0.0E0, ONE = 1.0E0, TWO = 2.0E0 )
+      REAL               HALF, FOUR
+      PARAMETER          ( HALF = 0.5E0, FOUR = 4.0E0 )
+*     ..
+*     .. Local Scalars ..
+      REAL               ABSALP, ABSEST, ABSGAM, B, EPS, NORMA, S1, S2,
+     $                   SCL, T, TEST, TMP, ZETA1, ZETA2
+      COMPLEX            ALPHA, COSINE, SINE
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, CONJG, MAX, SQRT
+*     ..
+*     .. External Functions ..
+      REAL               SLAMCH
+      COMPLEX            CDOTC
+      EXTERNAL           SLAMCH, CDOTC
+*     ..
+*     .. Executable Statements ..
+*
+      EPS = SLAMCH( 'Epsilon' )
+      ALPHA = CDOTC( J, X, 1, W, 1 )
+*
+      ABSALP = ABS( ALPHA )
+      ABSGAM = ABS( GAMMA )
+      ABSEST = ABS( SEST )
+*
+      IF( JOB.EQ.1 ) THEN
+*
+*        Estimating largest singular value
+*
+*        special cases
+*
+         IF( SEST.EQ.ZERO ) THEN
+            S1 = MAX( ABSGAM, ABSALP )
+            IF( S1.EQ.ZERO ) THEN
+               S = ZERO
+               C = ONE
+               SESTPR = ZERO
+            ELSE
+               S = ALPHA / S1
+               C = GAMMA / S1
+               TMP = SQRT( S*CONJG( S )+C*CONJG( C ) )
+               S = S / TMP
+               C = C / TMP
+               SESTPR = S1*TMP
+            END IF
+            RETURN
+         ELSE IF( ABSGAM.LE.EPS*ABSEST ) THEN
+            S = ONE
+            C = ZERO
+            TMP = MAX( ABSEST, ABSALP )
+            S1 = ABSEST / TMP
+            S2 = ABSALP / TMP
+            SESTPR = TMP*SQRT( S1*S1+S2*S2 )
+            RETURN
+         ELSE IF( ABSALP.LE.EPS*ABSEST ) THEN
+            S1 = ABSGAM
+            S2 = ABSEST
+            IF( S1.LE.S2 ) THEN
+               S = ONE
+               C = ZERO
+               SESTPR = S2
+            ELSE
+               S = ZERO
+               C = ONE
+               SESTPR = S1
+            END IF
+            RETURN
+         ELSE IF( ABSEST.LE.EPS*ABSALP .OR. ABSEST.LE.EPS*ABSGAM ) THEN
+            S1 = ABSGAM
+            S2 = ABSALP
+            IF( S1.LE.S2 ) THEN
+               TMP = S1 / S2
+               SCL = SQRT( ONE+TMP*TMP )
+               SESTPR = S2*SCL
+               S = ( ALPHA / S2 ) / SCL
+               C = ( GAMMA / S2 ) / SCL
+            ELSE
+               TMP = S2 / S1
+               SCL = SQRT( ONE+TMP*TMP )
+               SESTPR = S1*SCL
+               S = ( ALPHA / S1 ) / SCL
+               C = ( GAMMA / S1 ) / SCL
+            END IF
+            RETURN
+         ELSE
+*
+*           normal case
+*
+            ZETA1 = ABSALP / ABSEST
+            ZETA2 = ABSGAM / ABSEST
+*
+            B = ( ONE-ZETA1*ZETA1-ZETA2*ZETA2 )*HALF
+            C = ZETA1*ZETA1
+            IF( B.GT.ZERO ) THEN
+               T = C / ( B+SQRT( B*B+C ) )
+            ELSE
+               T = SQRT( B*B+C ) - B
+            END IF
+*
+            SINE = -( ALPHA / ABSEST ) / T
+            COSINE = -( GAMMA / ABSEST ) / ( ONE+T )
+            TMP = SQRT( SINE*CONJG( SINE )+COSINE*CONJG( COSINE ) )
+            S = SINE / TMP
+            C = COSINE / TMP
+            SESTPR = SQRT( T+ONE )*ABSEST
+            RETURN
+         END IF
+*
+      ELSE IF( JOB.EQ.2 ) THEN
+*
+*        Estimating smallest singular value
+*
+*        special cases
+*
+         IF( SEST.EQ.ZERO ) THEN
+            SESTPR = ZERO
+            IF( MAX( ABSGAM, ABSALP ).EQ.ZERO ) THEN
+               SINE = ONE
+               COSINE = ZERO
+            ELSE
+               SINE = -CONJG( GAMMA )
+               COSINE = CONJG( ALPHA )
+            END IF
+            S1 = MAX( ABS( SINE ), ABS( COSINE ) )
+            S = SINE / S1
+            C = COSINE / S1
+            TMP = SQRT( S*CONJG( S )+C*CONJG( C ) )
+            S = S / TMP
+            C = C / TMP
+            RETURN
+         ELSE IF( ABSGAM.LE.EPS*ABSEST ) THEN
+            S = ZERO
+            C = ONE
+            SESTPR = ABSGAM
+            RETURN
+         ELSE IF( ABSALP.LE.EPS*ABSEST ) THEN
+            S1 = ABSGAM
+            S2 = ABSEST
+            IF( S1.LE.S2 ) THEN
+               S = ZERO
+               C = ONE
+               SESTPR = S1
+            ELSE
+               S = ONE
+               C = ZERO
+               SESTPR = S2
+            END IF
+            RETURN
+         ELSE IF( ABSEST.LE.EPS*ABSALP .OR. ABSEST.LE.EPS*ABSGAM ) THEN
+            S1 = ABSGAM
+            S2 = ABSALP
+            IF( S1.LE.S2 ) THEN
+               TMP = S1 / S2
+               SCL = SQRT( ONE+TMP*TMP )
+               SESTPR = ABSEST*( TMP / SCL )
+               S = -( CONJG( GAMMA ) / S2 ) / SCL
+               C = ( CONJG( ALPHA ) / S2 ) / SCL
+            ELSE
+               TMP = S2 / S1
+               SCL = SQRT( ONE+TMP*TMP )
+               SESTPR = ABSEST / SCL
+               S = -( CONJG( GAMMA ) / S1 ) / SCL
+               C = ( CONJG( ALPHA ) / S1 ) / SCL
+            END IF
+            RETURN
+         ELSE
+*
+*           normal case
+*
+            ZETA1 = ABSALP / ABSEST
+            ZETA2 = ABSGAM / ABSEST
+*
+            NORMA = MAX( ONE+ZETA1*ZETA1+ZETA1*ZETA2,
+     $              ZETA1*ZETA2+ZETA2*ZETA2 )
+*
+*           See if root is closer to zero or to ONE
+*
+            TEST = ONE + TWO*( ZETA1-ZETA2 )*( ZETA1+ZETA2 )
+            IF( TEST.GE.ZERO ) THEN
+*
+*              root is close to zero, compute directly
+*
+               B = ( ZETA1*ZETA1+ZETA2*ZETA2+ONE )*HALF
+               C = ZETA2*ZETA2
+               T = C / ( B+SQRT( ABS( B*B-C ) ) )
+               SINE = ( ALPHA / ABSEST ) / ( ONE-T )
+               COSINE = -( GAMMA / ABSEST ) / T
+               SESTPR = SQRT( T+FOUR*EPS*EPS*NORMA )*ABSEST
+            ELSE
+*
+*              root is closer to ONE, shift by that amount
+*
+               B = ( ZETA2*ZETA2+ZETA1*ZETA1-ONE )*HALF
+               C = ZETA1*ZETA1
+               IF( B.GE.ZERO ) THEN
+                  T = -C / ( B+SQRT( B*B+C ) )
+               ELSE
+                  T = B - SQRT( B*B+C )
+               END IF
+               SINE = -( ALPHA / ABSEST ) / T
+               COSINE = -( GAMMA / ABSEST ) / ( ONE+T )
+               SESTPR = SQRT( ONE+T+FOUR*EPS*EPS*NORMA )*ABSEST
+            END IF
+            TMP = SQRT( SINE*CONJG( SINE )+COSINE*CONJG( COSINE ) )
+            S = SINE / TMP
+            C = COSINE / TMP
+            RETURN
+*
+         END IF
+      END IF
+      RETURN
+*
+*     End of CLAIC1
+*
+      END
diff --git a/costa/native/external/lapack/clals0.f b/costa/native/external/lapack/clals0.f
new file mode 100644
index 000000000..9434b8206
--- /dev/null
+++ b/costa/native/external/lapack/clals0.f
@@ -0,0 +1,434 @@
+      SUBROUTINE CLALS0( ICOMPQ, NL, NR, SQRE, NRHS, B, LDB, BX, LDBX,
+     $                   PERM, GIVPTR, GIVCOL, LDGCOL, GIVNUM, LDGNUM,
+     $                   POLES, DIFL, DIFR, Z, K, C, S, RWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     December 1, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            GIVPTR, ICOMPQ, INFO, K, LDB, LDBX, LDGCOL,
+     $                   LDGNUM, NL, NR, NRHS, SQRE
+      REAL               C, S
+*     ..
+*     .. Array Arguments ..
+      INTEGER            GIVCOL( LDGCOL, * ), PERM( * )
+      REAL               DIFL( * ), DIFR( LDGNUM, * ),
+     $                   GIVNUM( LDGNUM, * ), POLES( LDGNUM, * ),
+     $                   RWORK( * ), Z( * )
+      COMPLEX            B( LDB, * ), BX( LDBX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CLALS0 applies back the multiplying factors of either the left or the
+*  right singular vector matrix of a diagonal matrix appended by a row
+*  to the right hand side matrix B in solving the least squares problem
+*  using the divide-and-conquer SVD approach.
+*
+*  For the left singular vector matrix, three types of orthogonal
+*  matrices are involved:
+*
+*  (1L) Givens rotations: the number of such rotations is GIVPTR; the
+*       pairs of columns/rows they were applied to are stored in GIVCOL;
+*       and the C- and S-values of these rotations are stored in GIVNUM.
+*
+*  (2L) Permutation. The (NL+1)-st row of B is to be moved to the first
+*       row, and for J=2:N, PERM(J)-th row of B is to be moved to the
+*       J-th row.
+*
+*  (3L) The left singular vector matrix of the remaining matrix.
+*
+*  For the right singular vector matrix, four types of orthogonal
+*  matrices are involved:
+*
+*  (1R) The right singular vector matrix of the remaining matrix.
+*
+*  (2R) If SQRE = 1, one extra Givens rotation to generate the right
+*       null space.
+*
+*  (3R) The inverse transformation of (2L).
+*
+*  (4R) The inverse transformation of (1L).
+*
+*  Arguments
+*  =========
+*
+*  ICOMPQ (input) INTEGER
+*         Specifies whether singular vectors are to be computed in
+*         factored form:
+*         = 0: Left singular vector matrix.
+*         = 1: Right singular vector matrix.
+*
+*  NL     (input) INTEGER
+*         The row dimension of the upper block. NL >= 1.
+*
+*  NR     (input) INTEGER
+*         The row dimension of the lower block. NR >= 1.
+*
+*  SQRE   (input) INTEGER
+*         = 0: the lower block is an NR-by-NR square matrix.
+*         = 1: the lower block is an NR-by-(NR+1) rectangular matrix.
+*
+*         The bidiagonal matrix has row dimension N = NL + NR + 1,
+*         and column dimension M = N + SQRE.
+*
+*  NRHS   (input) INTEGER
+*         The number of columns of B and BX. NRHS must be at least 1.
+*
+*  B      (input/output) COMPLEX array, dimension ( LDB, NRHS )
+*         On input, B contains the right hand sides of the least
+*         squares problem in rows 1 through M. On output, B contains
+*         the solution X in rows 1 through N.
+*
+*  LDB    (input) INTEGER
+*         The leading dimension of B. LDB must be at least
+*         max(1,MAX( M, N ) ).
+*
+*  BX     (workspace) COMPLEX array, dimension ( LDBX, NRHS )
+*
+*  LDBX   (input) INTEGER
+*         The leading dimension of BX.
+*
+*  PERM   (input) INTEGER array, dimension ( N )
+*         The permutations (from deflation and sorting) applied
+*         to the two blocks.
+*
+*  GIVPTR (input) INTEGER
+*         The number of Givens rotations which took place in this
+*         subproblem.
+*
+*  GIVCOL (input) INTEGER array, dimension ( LDGCOL, 2 )
+*         Each pair of numbers indicates a pair of rows/columns
+*         involved in a Givens rotation.
+*
+*  LDGCOL (input) INTEGER
+*         The leading dimension of GIVCOL, must be at least N.
+*
+*  GIVNUM (input) REAL array, dimension ( LDGNUM, 2 )
+*         Each number indicates the C or S value used in the
+*         corresponding Givens rotation.
+*
+*  LDGNUM (input) INTEGER
+*         The leading dimension of arrays DIFR, POLES and
+*         GIVNUM, must be at least K.
+*
+*  POLES  (input) REAL array, dimension ( LDGNUM, 2 )
+*         On entry, POLES(1:K, 1) contains the new singular
+*         values obtained from solving the secular equation, and
+*         POLES(1:K, 2) is an array containing the poles in the secular
+*         equation.
+*
+*  DIFL   (input) REAL array, dimension ( K ).
+*         On entry, DIFL(I) is the distance between I-th updated
+*         (undeflated) singular value and the I-th (undeflated) old
+*         singular value.
+*
+*  DIFR   (input) REAL array, dimension ( LDGNUM, 2 ).
+*         On entry, DIFR(I, 1) contains the distances between I-th
+*         updated (undeflated) singular value and the I+1-th
+*         (undeflated) old singular value. And DIFR(I, 2) is the
+*         normalizing factor for the I-th right singular vector.
+*
+*  Z      (input) REAL array, dimension ( K )
+*         Contain the components of the deflation-adjusted updating row
+*         vector.
+*
+*  K      (input) INTEGER
+*         Contains the dimension of the non-deflated matrix,
+*         This is the order of the related secular equation. 1 <= K <=N.
+*
+*  C      (input) REAL
+*         C contains garbage if SQRE =0 and the C-value of a Givens
+*         rotation related to the right null space if SQRE = 1.
+*
+*  S      (input) REAL
+*         S contains garbage if SQRE =0 and the S-value of a Givens
+*         rotation related to the right null space if SQRE = 1.
+*
+*  RWORK  (workspace) REAL array, dimension
+*         ( K*(1+NRHS) + 2*NRHS )
+*
+*  INFO   (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Ming Gu and Ren-Cang Li, Computer Science Division, University of
+*       California at Berkeley, USA
+*     Osni Marques, LBNL/NERSC, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO, NEGONE
+      PARAMETER          ( ONE = 1.0E0, ZERO = 0.0E0, NEGONE = -1.0E0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J, JCOL, JROW, M, N, NLP1
+      REAL               DIFLJ, DIFRJ, DJ, DSIGJ, DSIGJP, TEMP
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CCOPY, CLACPY, CLASCL, CSROT, CSSCAL, SGEMV,
+     $                   XERBLA
+*     ..
+*     .. External Functions ..
+      REAL               SLAMC3, SNRM2
+      EXTERNAL           SLAMC3, SNRM2
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          AIMAG, CMPLX, REAL
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+*
+      IF( ( ICOMPQ.LT.0 ) .OR. ( ICOMPQ.GT.1 ) ) THEN
+         INFO = -1
+      ELSE IF( NL.LT.1 ) THEN
+         INFO = -2
+      ELSE IF( NR.LT.1 ) THEN
+         INFO = -3
+      ELSE IF( ( SQRE.LT.0 ) .OR. ( SQRE.GT.1 ) ) THEN
+         INFO = -4
+      END IF
+*
+      N = NL + NR + 1
+*
+      IF( NRHS.LT.1 ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.N ) THEN
+         INFO = -7
+      ELSE IF( LDBX.LT.N ) THEN
+         INFO = -9
+      ELSE IF( GIVPTR.LT.0 ) THEN
+         INFO = -11
+      ELSE IF( LDGCOL.LT.N ) THEN
+         INFO = -13
+      ELSE IF( LDGNUM.LT.N ) THEN
+         INFO = -15
+      ELSE IF( K.LT.1 ) THEN
+         INFO = -20
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CLALS0', -INFO )
+         RETURN
+      END IF
+*
+      M = N + SQRE
+      NLP1 = NL + 1
+*
+      IF( ICOMPQ.EQ.0 ) THEN
+*
+*        Apply back orthogonal transformations from the left.
+*
+*        Step (1L): apply back the Givens rotations performed.
+*
+         DO 10 I = 1, GIVPTR
+            CALL CSROT( NRHS, B( GIVCOL( I, 2 ), 1 ), LDB,
+     $                  B( GIVCOL( I, 1 ), 1 ), LDB, GIVNUM( I, 2 ),
+     $                  GIVNUM( I, 1 ) )
+   10    CONTINUE
+*
+*        Step (2L): permute rows of B.
+*
+         CALL CCOPY( NRHS, B( NLP1, 1 ), LDB, BX( 1, 1 ), LDBX )
+         DO 20 I = 2, N
+            CALL CCOPY( NRHS, B( PERM( I ), 1 ), LDB, BX( I, 1 ), LDBX )
+   20    CONTINUE
+*
+*        Step (3L): apply the inverse of the left singular vector
+*        matrix to BX.
+*
+         IF( K.EQ.1 ) THEN
+            CALL CCOPY( NRHS, BX, LDBX, B, LDB )
+            IF( Z( 1 ).LT.ZERO ) THEN
+               CALL CSSCAL( NRHS, NEGONE, B, LDB )
+            END IF
+         ELSE
+            DO 100 J = 1, K
+               DIFLJ = DIFL( J )
+               DJ = POLES( J, 1 )
+               DSIGJ = -POLES( J, 2 )
+               IF( J.LT.K ) THEN
+                  DIFRJ = -DIFR( J, 1 )
+                  DSIGJP = -POLES( J+1, 2 )
+               END IF
+               IF( ( Z( J ).EQ.ZERO ) .OR. ( POLES( J, 2 ).EQ.ZERO ) )
+     $              THEN
+                  RWORK( J ) = ZERO
+               ELSE
+                  RWORK( J ) = -POLES( J, 2 )*Z( J ) / DIFLJ /
+     $                         ( POLES( J, 2 )+DJ )
+               END IF
+               DO 30 I = 1, J - 1
+                  IF( ( Z( I ).EQ.ZERO ) .OR.
+     $                ( POLES( I, 2 ).EQ.ZERO ) ) THEN
+                     RWORK( I ) = ZERO
+                  ELSE
+                     RWORK( I ) = POLES( I, 2 )*Z( I ) /
+     $                            ( SLAMC3( POLES( I, 2 ), DSIGJ )-
+     $                            DIFLJ ) / ( POLES( I, 2 )+DJ )
+                  END IF
+   30          CONTINUE
+               DO 40 I = J + 1, K
+                  IF( ( Z( I ).EQ.ZERO ) .OR.
+     $                ( POLES( I, 2 ).EQ.ZERO ) ) THEN
+                     RWORK( I ) = ZERO
+                  ELSE
+                     RWORK( I ) = POLES( I, 2 )*Z( I ) /
+     $                            ( SLAMC3( POLES( I, 2 ), DSIGJP )+
+     $                            DIFRJ ) / ( POLES( I, 2 )+DJ )
+                  END IF
+   40          CONTINUE
+               RWORK( 1 ) = NEGONE
+               TEMP = SNRM2( K, RWORK, 1 )
+*
+*              Since B and BX are complex, the following call to SGEMV
+*              is performed in two steps (real and imaginary parts).
+*
+*              CALL SGEMV( 'T', K, NRHS, ONE, BX, LDBX, WORK, 1, ZERO,
+*    $                     B( J, 1 ), LDB )
+*
+               I = K + NRHS*2
+               DO 60 JCOL = 1, NRHS
+                  DO 50 JROW = 1, K
+                     I = I + 1
+                     RWORK( I ) = REAL( BX( JROW, JCOL ) )
+   50             CONTINUE
+   60          CONTINUE
+               CALL SGEMV( 'T', K, NRHS, ONE, RWORK( 1+K+NRHS*2 ), K,
+     $                     RWORK( 1 ), 1, ZERO, RWORK( 1+K ), 1 )
+               I = K + NRHS*2
+               DO 80 JCOL = 1, NRHS
+                  DO 70 JROW = 1, K
+                     I = I + 1
+                     RWORK( I ) = AIMAG( BX( JROW, JCOL ) )
+   70             CONTINUE
+   80          CONTINUE
+               CALL SGEMV( 'T', K, NRHS, ONE, RWORK( 1+K+NRHS*2 ), K,
+     $                     RWORK( 1 ), 1, ZERO, RWORK( 1+K+NRHS ), 1 )
+               DO 90 JCOL = 1, NRHS
+                  B( J, JCOL ) = CMPLX( RWORK( JCOL+K ),
+     $                           RWORK( JCOL+K+NRHS ) )
+   90          CONTINUE
+               CALL CLASCL( 'G', 0, 0, TEMP, ONE, 1, NRHS, B( J, 1 ),
+     $                      LDB, INFO )
+  100       CONTINUE
+         END IF
+*
+*        Move the deflated rows of BX to B also.
+*
+         IF( K.LT.MAX( M, N ) )
+     $      CALL CLACPY( 'A', N-K, NRHS, BX( K+1, 1 ), LDBX,
+     $                   B( K+1, 1 ), LDB )
+      ELSE
+*
+*        Apply back the right orthogonal transformations.
+*
+*        Step (1R): apply back the new right singular vector matrix
+*        to B.
+*
+         IF( K.EQ.1 ) THEN
+            CALL CCOPY( NRHS, B, LDB, BX, LDBX )
+         ELSE
+            DO 180 J = 1, K
+               DSIGJ = POLES( J, 2 )
+               IF( Z( J ).EQ.ZERO ) THEN
+                  RWORK( J ) = ZERO
+               ELSE
+                  RWORK( J ) = -Z( J ) / DIFL( J ) /
+     $                         ( DSIGJ+POLES( J, 1 ) ) / DIFR( J, 2 )
+               END IF
+               DO 110 I = 1, J - 1
+                  IF( Z( J ).EQ.ZERO ) THEN
+                     RWORK( I ) = ZERO
+                  ELSE
+                     RWORK( I ) = Z( J ) / ( SLAMC3( DSIGJ, -POLES( I+1,
+     $                            2 ) )-DIFR( I, 1 ) ) /
+     $                            ( DSIGJ+POLES( I, 1 ) ) / DIFR( I, 2 )
+                  END IF
+  110          CONTINUE
+               DO 120 I = J + 1, K
+                  IF( Z( J ).EQ.ZERO ) THEN
+                     RWORK( I ) = ZERO
+                  ELSE
+                     RWORK( I ) = Z( J ) / ( SLAMC3( DSIGJ, -POLES( I,
+     $                            2 ) )-DIFL( I ) ) /
+     $                            ( DSIGJ+POLES( I, 1 ) ) / DIFR( I, 2 )
+                  END IF
+  120          CONTINUE
+*
+*              Since B and BX are complex, the following call to SGEMV
+*              is performed in two steps (real and imaginary parts).
+*
+*              CALL SGEMV( 'T', K, NRHS, ONE, B, LDB, WORK, 1, ZERO,
+*    $                     BX( J, 1 ), LDBX )
+*
+               I = K + NRHS*2
+               DO 140 JCOL = 1, NRHS
+                  DO 130 JROW = 1, K
+                     I = I + 1
+                     RWORK( I ) = REAL( B( JROW, JCOL ) )
+  130             CONTINUE
+  140          CONTINUE
+               CALL SGEMV( 'T', K, NRHS, ONE, RWORK( 1+K+NRHS*2 ), K,
+     $                     RWORK( 1 ), 1, ZERO, RWORK( 1+K ), 1 )
+               I = K + NRHS*2
+               DO 160 JCOL = 1, NRHS
+                  DO 150 JROW = 1, K
+                     I = I + 1
+                     RWORK( I ) = AIMAG( B( JROW, JCOL ) )
+  150             CONTINUE
+  160          CONTINUE
+               CALL SGEMV( 'T', K, NRHS, ONE, RWORK( 1+K+NRHS*2 ), K,
+     $                     RWORK( 1 ), 1, ZERO, RWORK( 1+K+NRHS ), 1 )
+               DO 170 JCOL = 1, NRHS
+                  BX( J, JCOL ) = CMPLX( RWORK( JCOL+K ),
+     $                            RWORK( JCOL+K+NRHS ) )
+  170          CONTINUE
+  180       CONTINUE
+         END IF
+*
+*        Step (2R): if SQRE = 1, apply back the rotation that is
+*        related to the right null space of the subproblem.
+*
+         IF( SQRE.EQ.1 ) THEN
+            CALL CCOPY( NRHS, B( M, 1 ), LDB, BX( M, 1 ), LDBX )
+            CALL CSROT( NRHS, BX( 1, 1 ), LDBX, BX( M, 1 ), LDBX, C, S )
+         END IF
+         IF( K.LT.MAX( M, N ) )
+     $      CALL CLACPY( 'A', N-K, NRHS, B( K+1, 1 ), LDB,
+     $                   BX( K+1, 1 ), LDBX )
+*
+*        Step (3R): permute rows of B.
+*
+         CALL CCOPY( NRHS, BX( 1, 1 ), LDBX, B( NLP1, 1 ), LDB )
+         IF( SQRE.EQ.1 ) THEN
+            CALL CCOPY( NRHS, BX( M, 1 ), LDBX, B( M, 1 ), LDB )
+         END IF
+         DO 190 I = 2, N
+            CALL CCOPY( NRHS, BX( I, 1 ), LDBX, B( PERM( I ), 1 ), LDB )
+  190    CONTINUE
+*
+*        Step (4R): apply back the Givens rotations performed.
+*
+         DO 200 I = GIVPTR, 1, -1
+            CALL CSROT( NRHS, B( GIVCOL( I, 2 ), 1 ), LDB,
+     $                  B( GIVCOL( I, 1 ), 1 ), LDB, GIVNUM( I, 2 ),
+     $                  -GIVNUM( I, 1 ) )
+  200    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of CLALS0
+*
+      END
diff --git a/costa/native/external/lapack/clalsa.f b/costa/native/external/lapack/clalsa.f
new file mode 100644
index 000000000..496572ade
--- /dev/null
+++ b/costa/native/external/lapack/clalsa.f
@@ -0,0 +1,504 @@
+      SUBROUTINE CLALSA( ICOMPQ, SMLSIZ, N, NRHS, B, LDB, BX, LDBX, U,
+     $                   LDU, VT, K, DIFL, DIFR, Z, POLES, GIVPTR,
+     $                   GIVCOL, LDGCOL, PERM, GIVNUM, C, S, RWORK,
+     $                   IWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            ICOMPQ, INFO, LDB, LDBX, LDGCOL, LDU, N, NRHS,
+     $                   SMLSIZ
+*     ..
+*     .. Array Arguments ..
+      INTEGER            GIVCOL( LDGCOL, * ), GIVPTR( * ), IWORK( * ),
+     $                   K( * ), PERM( LDGCOL, * )
+      REAL               C( * ), DIFL( LDU, * ), DIFR( LDU, * ),
+     $                   GIVNUM( LDU, * ), POLES( LDU, * ), RWORK( * ),
+     $                   S( * ), U( LDU, * ), VT( LDU, * ), Z( LDU, * )
+      COMPLEX            B( LDB, * ), BX( LDBX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CLALSA is an itermediate step in solving the least squares problem
+*  by computing the SVD of the coefficient matrix in compact form (The
+*  singular vectors are computed as products of simple orthorgonal
+*  matrices.).
+*
+*  If ICOMPQ = 0, CLALSA applies the inverse of the left singular vector
+*  matrix of an upper bidiagonal matrix to the right hand side; and if
+*  ICOMPQ = 1, CLALSA applies the right singular vector matrix to the
+*  right hand side. The singular vector matrices were generated in
+*  compact form by CLALSA.
+*
+*  Arguments
+*  =========
+*
+*  ICOMPQ (input) INTEGER
+*         Specifies whether the left or the right singular vector
+*         matrix is involved.
+*         = 0: Left singular vector matrix
+*         = 1: Right singular vector matrix
+*
+*  SMLSIZ (input) INTEGER
+*         The maximum size of the subproblems at the bottom of the
+*         computation tree.
+*
+*  N      (input) INTEGER
+*         The row and column dimensions of the upper bidiagonal matrix.
+*
+*  NRHS   (input) INTEGER
+*         The number of columns of B and BX. NRHS must be at least 1.
+*
+*  B      (input) COMPLEX array, dimension ( LDB, NRHS )
+*         On input, B contains the right hand sides of the least
+*         squares problem in rows 1 through M. On output, B contains
+*         the solution X in rows 1 through N.
+*
+*  LDB    (input) INTEGER
+*         The leading dimension of B in the calling subprogram.
+*         LDB must be at least max(1,MAX( M, N ) ).
+*
+*  BX     (output) COMPLEX array, dimension ( LDBX, NRHS )
+*         On exit, the result of applying the left or right singular
+*         vector matrix to B.
+*
+*  LDBX   (input) INTEGER
+*         The leading dimension of BX.
+*
+*  U      (input) REAL array, dimension ( LDU, SMLSIZ ).
+*         On entry, U contains the left singular vector matrices of all
+*         subproblems at the bottom level.
+*
+*  LDU    (input) INTEGER, LDU = > N.
+*         The leading dimension of arrays U, VT, DIFL, DIFR,
+*         POLES, GIVNUM, and Z.
+*
+*  VT     (input) REAL array, dimension ( LDU, SMLSIZ+1 ).
+*         On entry, VT' contains the right singular vector matrices of
+*         all subproblems at the bottom level.
+*
+*  K      (input) INTEGER array, dimension ( N ).
+*
+*  DIFL   (input) REAL array, dimension ( LDU, NLVL ).
+*         where NLVL = INT(log_2 (N/(SMLSIZ+1))) + 1.
+*
+*  DIFR   (input) REAL array, dimension ( LDU, 2 * NLVL ).
+*         On entry, DIFL(*, I) and DIFR(*, 2 * I -1) record
+*         distances between singular values on the I-th level and
+*         singular values on the (I -1)-th level, and DIFR(*, 2 * I)
+*         record the normalizing factors of the right singular vectors
+*         matrices of subproblems on I-th level.
+*
+*  Z      (input) REAL array, dimension ( LDU, NLVL ).
+*         On entry, Z(1, I) contains the components of the deflation-
+*         adjusted updating row vector for subproblems on the I-th
+*         level.
+*
+*  POLES  (input) REAL array, dimension ( LDU, 2 * NLVL ).
+*         On entry, POLES(*, 2 * I -1: 2 * I) contains the new and old
+*         singular values involved in the secular equations on the I-th
+*         level.
+*
+*  GIVPTR (input) INTEGER array, dimension ( N ).
+*         On entry, GIVPTR( I ) records the number of Givens
+*         rotations performed on the I-th problem on the computation
+*         tree.
+*
+*  GIVCOL (input) INTEGER array, dimension ( LDGCOL, 2 * NLVL ).
+*         On entry, for each I, GIVCOL(*, 2 * I - 1: 2 * I) records the
+*         locations of Givens rotations performed on the I-th level on
+*         the computation tree.
+*
+*  LDGCOL (input) INTEGER, LDGCOL = > N.
+*         The leading dimension of arrays GIVCOL and PERM.
+*
+*  PERM   (input) INTEGER array, dimension ( LDGCOL, NLVL ).
+*         On entry, PERM(*, I) records permutations done on the I-th
+*         level of the computation tree.
+*
+*  GIVNUM (input) REAL array, dimension ( LDU, 2 * NLVL ).
+*         On entry, GIVNUM(*, 2 *I -1 : 2 * I) records the C- and S-
+*         values of Givens rotations performed on the I-th level on the
+*         computation tree.
+*
+*  C      (input) REAL array, dimension ( N ).
+*         On entry, if the I-th subproblem is not square,
+*         C( I ) contains the C-value of a Givens rotation related to
+*         the right null space of the I-th subproblem.
+*
+*  S      (input) REAL array, dimension ( N ).
+*         On entry, if the I-th subproblem is not square,
+*         S( I ) contains the S-value of a Givens rotation related to
+*         the right null space of the I-th subproblem.
+*
+*  RWORK  (workspace) REAL array, dimension at least
+*         max ( N, (SMLSZ+1)*NRHS*3 ).
+*
+*  IWORK  (workspace) INTEGER array.
+*         The dimension must be at least 3 * N
+*
+*  INFO   (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Ming Gu and Ren-Cang Li, Computer Science Division, University of
+*       California at Berkeley, USA
+*     Osni Marques, LBNL/NERSC, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E0, ONE = 1.0E0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, I1, IC, IM1, INODE, J, JCOL, JIMAG, JREAL,
+     $                   JROW, LF, LL, LVL, LVL2, ND, NDB1, NDIML,
+     $                   NDIMR, NL, NLF, NLP1, NLVL, NR, NRF, NRP1, SQRE
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CCOPY, CLALS0, SGEMM, SLASDT, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          AIMAG, CMPLX, REAL
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+*
+      IF( ( ICOMPQ.LT.0 ) .OR. ( ICOMPQ.GT.1 ) ) THEN
+         INFO = -1
+      ELSE IF( SMLSIZ.LT.3 ) THEN
+         INFO = -2
+      ELSE IF( N.LT.SMLSIZ ) THEN
+         INFO = -3
+      ELSE IF( NRHS.LT.1 ) THEN
+         INFO = -4
+      ELSE IF( LDB.LT.N ) THEN
+         INFO = -6
+      ELSE IF( LDBX.LT.N ) THEN
+         INFO = -8
+      ELSE IF( LDU.LT.N ) THEN
+         INFO = -10
+      ELSE IF( LDGCOL.LT.N ) THEN
+         INFO = -19
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CLALSA', -INFO )
+         RETURN
+      END IF
+*
+*     Book-keeping and  setting up the computation tree.
+*
+      INODE = 1
+      NDIML = INODE + N
+      NDIMR = NDIML + N
+*
+      CALL SLASDT( N, NLVL, ND, IWORK( INODE ), IWORK( NDIML ),
+     $             IWORK( NDIMR ), SMLSIZ )
+*
+*     The following code applies back the left singular vector factors.
+*     For applying back the right singular vector factors, go to 170.
+*
+      IF( ICOMPQ.EQ.1 ) THEN
+         GO TO 170
+      END IF
+*
+*     The nodes on the bottom level of the tree were solved
+*     by SLASDQ. The corresponding left and right singular vector
+*     matrices are in explicit form. First apply back the left
+*     singular vector matrices.
+*
+      NDB1 = ( ND+1 ) / 2
+      DO 130 I = NDB1, ND
+*
+*        IC : center row of each node
+*        NL : number of rows of left  subproblem
+*        NR : number of rows of right subproblem
+*        NLF: starting row of the left   subproblem
+*        NRF: starting row of the right  subproblem
+*
+         I1 = I - 1
+         IC = IWORK( INODE+I1 )
+         NL = IWORK( NDIML+I1 )
+         NR = IWORK( NDIMR+I1 )
+         NLF = IC - NL
+         NRF = IC + 1
+*
+*        Since B and BX are complex, the following call to SGEMM
+*        is performed in two steps (real and imaginary parts).
+*
+*        CALL SGEMM( 'T', 'N', NL, NRHS, NL, ONE, U( NLF, 1 ), LDU,
+*     $               B( NLF, 1 ), LDB, ZERO, BX( NLF, 1 ), LDBX )
+*
+         J = NL*NRHS*2
+         DO 20 JCOL = 1, NRHS
+            DO 10 JROW = NLF, NLF + NL - 1
+               J = J + 1
+               RWORK( J ) = REAL( B( JROW, JCOL ) )
+   10       CONTINUE
+   20    CONTINUE
+         CALL SGEMM( 'T', 'N', NL, NRHS, NL, ONE, U( NLF, 1 ), LDU,
+     $               RWORK( 1+NL*NRHS*2 ), NL, ZERO, RWORK( 1 ), NL )
+         J = NL*NRHS*2
+         DO 40 JCOL = 1, NRHS
+            DO 30 JROW = NLF, NLF + NL - 1
+               J = J + 1
+               RWORK( J ) = AIMAG( B( JROW, JCOL ) )
+   30       CONTINUE
+   40    CONTINUE
+         CALL SGEMM( 'T', 'N', NL, NRHS, NL, ONE, U( NLF, 1 ), LDU,
+     $               RWORK( 1+NL*NRHS*2 ), NL, ZERO, RWORK( 1+NL*NRHS ),
+     $               NL )
+         JREAL = 0
+         JIMAG = NL*NRHS
+         DO 60 JCOL = 1, NRHS
+            DO 50 JROW = NLF, NLF + NL - 1
+               JREAL = JREAL + 1
+               JIMAG = JIMAG + 1
+               BX( JROW, JCOL ) = CMPLX( RWORK( JREAL ),
+     $                            RWORK( JIMAG ) )
+   50       CONTINUE
+   60    CONTINUE
+*
+*        Since B and BX are complex, the following call to SGEMM
+*        is performed in two steps (real and imaginary parts).
+*
+*        CALL SGEMM( 'T', 'N', NR, NRHS, NR, ONE, U( NRF, 1 ), LDU,
+*    $               B( NRF, 1 ), LDB, ZERO, BX( NRF, 1 ), LDBX )
+*
+         J = NR*NRHS*2
+         DO 80 JCOL = 1, NRHS
+            DO 70 JROW = NRF, NRF + NR - 1
+               J = J + 1
+               RWORK( J ) = REAL( B( JROW, JCOL ) )
+   70       CONTINUE
+   80    CONTINUE
+         CALL SGEMM( 'T', 'N', NR, NRHS, NR, ONE, U( NRF, 1 ), LDU,
+     $               RWORK( 1+NR*NRHS*2 ), NR, ZERO, RWORK( 1 ), NR )
+         J = NR*NRHS*2
+         DO 100 JCOL = 1, NRHS
+            DO 90 JROW = NRF, NRF + NR - 1
+               J = J + 1
+               RWORK( J ) = AIMAG( B( JROW, JCOL ) )
+   90       CONTINUE
+  100    CONTINUE
+         CALL SGEMM( 'T', 'N', NR, NRHS, NR, ONE, U( NRF, 1 ), LDU,
+     $               RWORK( 1+NR*NRHS*2 ), NR, ZERO, RWORK( 1+NR*NRHS ),
+     $               NR )
+         JREAL = 0
+         JIMAG = NR*NRHS
+         DO 120 JCOL = 1, NRHS
+            DO 110 JROW = NRF, NRF + NR - 1
+               JREAL = JREAL + 1
+               JIMAG = JIMAG + 1
+               BX( JROW, JCOL ) = CMPLX( RWORK( JREAL ),
+     $                            RWORK( JIMAG ) )
+  110       CONTINUE
+  120    CONTINUE
+*
+  130 CONTINUE
+*
+*     Next copy the rows of B that correspond to unchanged rows
+*     in the bidiagonal matrix to BX.
+*
+      DO 140 I = 1, ND
+         IC = IWORK( INODE+I-1 )
+         CALL CCOPY( NRHS, B( IC, 1 ), LDB, BX( IC, 1 ), LDBX )
+  140 CONTINUE
+*
+*     Finally go through the left singular vector matrices of all
+*     the other subproblems bottom-up on the tree.
+*
+      J = 2**NLVL
+      SQRE = 0
+*
+      DO 160 LVL = NLVL, 1, -1
+         LVL2 = 2*LVL - 1
+*
+*        find the first node LF and last node LL on
+*        the current level LVL
+*
+         IF( LVL.EQ.1 ) THEN
+            LF = 1
+            LL = 1
+         ELSE
+            LF = 2**( LVL-1 )
+            LL = 2*LF - 1
+         END IF
+         DO 150 I = LF, LL
+            IM1 = I - 1
+            IC = IWORK( INODE+IM1 )
+            NL = IWORK( NDIML+IM1 )
+            NR = IWORK( NDIMR+IM1 )
+            NLF = IC - NL
+            NRF = IC + 1
+            J = J - 1
+            CALL CLALS0( ICOMPQ, NL, NR, SQRE, NRHS, BX( NLF, 1 ), LDBX,
+     $                   B( NLF, 1 ), LDB, PERM( NLF, LVL ),
+     $                   GIVPTR( J ), GIVCOL( NLF, LVL2 ), LDGCOL,
+     $                   GIVNUM( NLF, LVL2 ), LDU, POLES( NLF, LVL2 ),
+     $                   DIFL( NLF, LVL ), DIFR( NLF, LVL2 ),
+     $                   Z( NLF, LVL ), K( J ), C( J ), S( J ), RWORK,
+     $                   INFO )
+  150    CONTINUE
+  160 CONTINUE
+      GO TO 330
+*
+*     ICOMPQ = 1: applying back the right singular vector factors.
+*
+  170 CONTINUE
+*
+*     First now go through the right singular vector matrices of all
+*     the tree nodes top-down.
+*
+      J = 0
+      DO 190 LVL = 1, NLVL
+         LVL2 = 2*LVL - 1
+*
+*        Find the first node LF and last node LL on
+*        the current level LVL.
+*
+         IF( LVL.EQ.1 ) THEN
+            LF = 1
+            LL = 1
+         ELSE
+            LF = 2**( LVL-1 )
+            LL = 2*LF - 1
+         END IF
+         DO 180 I = LL, LF, -1
+            IM1 = I - 1
+            IC = IWORK( INODE+IM1 )
+            NL = IWORK( NDIML+IM1 )
+            NR = IWORK( NDIMR+IM1 )
+            NLF = IC - NL
+            NRF = IC + 1
+            IF( I.EQ.LL ) THEN
+               SQRE = 0
+            ELSE
+               SQRE = 1
+            END IF
+            J = J + 1
+            CALL CLALS0( ICOMPQ, NL, NR, SQRE, NRHS, B( NLF, 1 ), LDB,
+     $                   BX( NLF, 1 ), LDBX, PERM( NLF, LVL ),
+     $                   GIVPTR( J ), GIVCOL( NLF, LVL2 ), LDGCOL,
+     $                   GIVNUM( NLF, LVL2 ), LDU, POLES( NLF, LVL2 ),
+     $                   DIFL( NLF, LVL ), DIFR( NLF, LVL2 ),
+     $                   Z( NLF, LVL ), K( J ), C( J ), S( J ), RWORK,
+     $                   INFO )
+  180    CONTINUE
+  190 CONTINUE
+*
+*     The nodes on the bottom level of the tree were solved
+*     by SLASDQ. The corresponding right singular vector
+*     matrices are in explicit form. Apply them back.
+*
+      NDB1 = ( ND+1 ) / 2
+      DO 320 I = NDB1, ND
+         I1 = I - 1
+         IC = IWORK( INODE+I1 )
+         NL = IWORK( NDIML+I1 )
+         NR = IWORK( NDIMR+I1 )
+         NLP1 = NL + 1
+         IF( I.EQ.ND ) THEN
+            NRP1 = NR
+         ELSE
+            NRP1 = NR + 1
+         END IF
+         NLF = IC - NL
+         NRF = IC + 1
+*
+*        Since B and BX are complex, the following call to SGEMM is
+*        performed in two steps (real and imaginary parts).
+*
+*        CALL SGEMM( 'T', 'N', NLP1, NRHS, NLP1, ONE, VT( NLF, 1 ), LDU,
+*    $               B( NLF, 1 ), LDB, ZERO, BX( NLF, 1 ), LDBX )
+*
+         J = NLP1*NRHS*2
+         DO 210 JCOL = 1, NRHS
+            DO 200 JROW = NLF, NLF + NLP1 - 1
+               J = J + 1
+               RWORK( J ) = REAL( B( JROW, JCOL ) )
+  200       CONTINUE
+  210    CONTINUE
+         CALL SGEMM( 'T', 'N', NLP1, NRHS, NLP1, ONE, VT( NLF, 1 ), LDU,
+     $               RWORK( 1+NLP1*NRHS*2 ), NLP1, ZERO, RWORK( 1 ),
+     $               NLP1 )
+         J = NLP1*NRHS*2
+         DO 230 JCOL = 1, NRHS
+            DO 220 JROW = NLF, NLF + NLP1 - 1
+               J = J + 1
+               RWORK( J ) = AIMAG( B( JROW, JCOL ) )
+  220       CONTINUE
+  230    CONTINUE
+         CALL SGEMM( 'T', 'N', NLP1, NRHS, NLP1, ONE, VT( NLF, 1 ), LDU,
+     $               RWORK( 1+NLP1*NRHS*2 ), NLP1, ZERO,
+     $               RWORK( 1+NLP1*NRHS ), NLP1 )
+         JREAL = 0
+         JIMAG = NLP1*NRHS
+         DO 250 JCOL = 1, NRHS
+            DO 240 JROW = NLF, NLF + NLP1 - 1
+               JREAL = JREAL + 1
+               JIMAG = JIMAG + 1
+               BX( JROW, JCOL ) = CMPLX( RWORK( JREAL ),
+     $                            RWORK( JIMAG ) )
+  240       CONTINUE
+  250    CONTINUE
+*
+*        Since B and BX are complex, the following call to SGEMM is
+*        performed in two steps (real and imaginary parts).
+*
+*        CALL SGEMM( 'T', 'N', NRP1, NRHS, NRP1, ONE, VT( NRF, 1 ), LDU,
+*    $               B( NRF, 1 ), LDB, ZERO, BX( NRF, 1 ), LDBX )
+*
+         J = NRP1*NRHS*2
+         DO 270 JCOL = 1, NRHS
+            DO 260 JROW = NRF, NRF + NRP1 - 1
+               J = J + 1
+               RWORK( J ) = REAL( B( JROW, JCOL ) )
+  260       CONTINUE
+  270    CONTINUE
+         CALL SGEMM( 'T', 'N', NRP1, NRHS, NRP1, ONE, VT( NRF, 1 ), LDU,
+     $               RWORK( 1+NRP1*NRHS*2 ), NRP1, ZERO, RWORK( 1 ),
+     $               NRP1 )
+         J = NRP1*NRHS*2
+         DO 290 JCOL = 1, NRHS
+            DO 280 JROW = NRF, NRF + NRP1 - 1
+               J = J + 1
+               RWORK( J ) = AIMAG( B( JROW, JCOL ) )
+  280       CONTINUE
+  290    CONTINUE
+         CALL SGEMM( 'T', 'N', NRP1, NRHS, NRP1, ONE, VT( NRF, 1 ), LDU,
+     $               RWORK( 1+NRP1*NRHS*2 ), NRP1, ZERO,
+     $               RWORK( 1+NRP1*NRHS ), NRP1 )
+         JREAL = 0
+         JIMAG = NRP1*NRHS
+         DO 310 JCOL = 1, NRHS
+            DO 300 JROW = NRF, NRF + NRP1 - 1
+               JREAL = JREAL + 1
+               JIMAG = JIMAG + 1
+               BX( JROW, JCOL ) = CMPLX( RWORK( JREAL ),
+     $                            RWORK( JIMAG ) )
+  300       CONTINUE
+  310    CONTINUE
+*
+  320 CONTINUE
+*
+  330 CONTINUE
+*
+      RETURN
+*
+*     End of CLALSA
+*
+      END
diff --git a/costa/native/external/lapack/clalsd.f b/costa/native/external/lapack/clalsd.f
new file mode 100644
index 000000000..21896d84c
--- /dev/null
+++ b/costa/native/external/lapack/clalsd.f
@@ -0,0 +1,597 @@
+      SUBROUTINE CLALSD( UPLO, SMLSIZ, N, NRHS, D, E, B, LDB, RCOND,
+     $                   RANK, WORK, RWORK, IWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDB, N, NRHS, RANK, SMLSIZ
+      REAL               RCOND
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      REAL               D( * ), E( * ), RWORK( * )
+      COMPLEX            B( LDB, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CLALSD uses the singular value decomposition of A to solve the least
+*  squares problem of finding X to minimize the Euclidean norm of each
+*  column of A*X-B, where A is N-by-N upper bidiagonal, and X and B
+*  are N-by-NRHS. The solution X overwrites B.
+*
+*  The singular values of A smaller than RCOND times the largest
+*  singular value are treated as zero in solving the least squares
+*  problem; in this case a minimum norm solution is returned.
+*  The actual singular values are returned in D in ascending order.
+*
+*  This code makes very mild assumptions about floating point
+*  arithmetic. It will work on machines with a guard digit in
+*  add/subtract, or on those binary machines without guard digits
+*  which subtract like the Cray XMP, Cray YMP, Cray C 90, or Cray 2.
+*  It could conceivably fail on hexadecimal or decimal machines
+*  without guard digits, but we know of none.
+*
+*  Arguments
+*  =========
+*
+*  UPLO   (input) CHARACTER*1
+*         = 'U': D and E define an upper bidiagonal matrix.
+*         = 'L': D and E define a  lower bidiagonal matrix.
+*
+*  SMLSIZ (input) INTEGER
+*         The maximum size of the subproblems at the bottom of the
+*         computation tree.
+*
+*  N      (input) INTEGER
+*         The dimension of the  bidiagonal matrix.  N >= 0.
+*
+*  NRHS   (input) INTEGER
+*         The number of columns of B. NRHS must be at least 1.
+*
+*  D      (input/output) REAL array, dimension (N)
+*         On entry D contains the main diagonal of the bidiagonal
+*         matrix. On exit, if INFO = 0, D contains its singular values.
+*
+*  E      (input) REAL array, dimension (N-1)
+*         Contains the super-diagonal entries of the bidiagonal matrix.
+*         On exit, E has been destroyed.
+*
+*  B      (input/output) COMPLEX array, dimension (LDB,NRHS)
+*         On input, B contains the right hand sides of the least
+*         squares problem. On output, B contains the solution X.
+*
+*  LDB    (input) INTEGER
+*         The leading dimension of B in the calling subprogram.
+*         LDB must be at least max(1,N).
+*
+*  RCOND  (input) REAL
+*         The singular values of A less than or equal to RCOND times
+*         the largest singular value are treated as zero in solving
+*         the least squares problem. If RCOND is negative,
+*         machine precision is used instead.
+*         For example, if diag(S)*X=B were the least squares problem,
+*         where diag(S) is a diagonal matrix of singular values, the
+*         solution would be X(i) = B(i) / S(i) if S(i) is greater than
+*         RCOND*max(S), and X(i) = 0 if S(i) is less than or equal to
+*         RCOND*max(S).
+*
+*  RANK   (output) INTEGER
+*         The number of singular values of A greater than RCOND times
+*         the largest singular value.
+*
+*  WORK   (workspace) COMPLEX array, dimension at least
+*         (N * NRHS).
+*
+*  RWORK  (workspace) REAL array, dimension at least
+*         (9*N + 2*N*SMLSIZ + 8*N*NLVL + 3*SMLSIZ*NRHS + (SMLSIZ+1)**2),
+*         where
+*         NLVL = MAX( 0, INT( LOG_2( MIN( M,N )/(SMLSIZ+1) ) ) + 1 )
+*
+*  IWORK  (workspace) INTEGER array, dimension at least
+*         (3*N*NLVL + 11*N).
+*
+*  INFO   (output) INTEGER
+*         = 0:  successful exit.
+*         < 0:  if INFO = -i, the i-th argument had an illegal value.
+*         > 0:  The algorithm failed to compute an singular value while
+*               working on the submatrix lying in rows and columns
+*               INFO/(N+1) through MOD(INFO,N+1).
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Ming Gu and Ren-Cang Li, Computer Science Division, University of
+*       California at Berkeley, USA
+*     Osni Marques, LBNL/NERSC, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE, TWO
+      PARAMETER          ( ZERO = 0.0E0, ONE = 1.0E0, TWO = 2.0E0 )
+      COMPLEX            CZERO
+      PARAMETER          ( CZERO = ( 0.0E0, 0.0E0 ) )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            BX, BXST, C, DIFL, DIFR, GIVCOL, GIVNUM,
+     $                   GIVPTR, I, ICMPQ1, ICMPQ2, IRWB, IRWIB, IRWRB,
+     $                   IRWU, IRWVT, IRWWRK, IWK, J, JCOL, JIMAG,
+     $                   JREAL, JROW, K, NLVL, NM1, NRWORK, NSIZE, NSUB,
+     $                   PERM, POLES, S, SIZEI, SMLSZP, SQRE, ST, ST1,
+     $                   U, VT, Z
+      REAL               CS, EPS, ORGNRM, R, SN, TOL
+*     ..
+*     .. External Functions ..
+      INTEGER            ISAMAX
+      REAL               SLAMCH, SLANST
+      EXTERNAL           ISAMAX, SLAMCH, SLANST
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CCOPY, CLACPY, CLALSA, CLASCL, CLASET, CSROT,
+     $                   SGEMM, SLARTG, SLASCL, SLASDA, SLASDQ, SLASET,
+     $                   SLASRT, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, AIMAG, CMPLX, INT, LOG, REAL, SIGN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+*
+      IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( NRHS.LT.1 ) THEN
+         INFO = -4
+      ELSE IF( ( LDB.LT.1 ) .OR. ( LDB.LT.N ) ) THEN
+         INFO = -8
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CLALSD', -INFO )
+         RETURN
+      END IF
+*
+      EPS = SLAMCH( 'Epsilon' )
+*
+*     Set up the tolerance.
+*
+      IF( ( RCOND.LE.ZERO ) .OR. ( RCOND.GE.ONE ) ) THEN
+         RCOND = EPS
+      END IF
+*
+      RANK = 0
+*
+*     Quick return if possible.
+*
+      IF( N.EQ.0 ) THEN
+         RETURN
+      ELSE IF( N.EQ.1 ) THEN
+         IF( D( 1 ).EQ.ZERO ) THEN
+            CALL CLASET( 'A', 1, NRHS, CZERO, CZERO, B, LDB )
+         ELSE
+            RANK = 1
+            CALL CLASCL( 'G', 0, 0, D( 1 ), ONE, 1, NRHS, B, LDB, INFO )
+            D( 1 ) = ABS( D( 1 ) )
+         END IF
+         RETURN
+      END IF
+*
+*     Rotate the matrix if it is lower bidiagonal.
+*
+      IF( UPLO.EQ.'L' ) THEN
+         DO 10 I = 1, N - 1
+            CALL SLARTG( D( I ), E( I ), CS, SN, R )
+            D( I ) = R
+            E( I ) = SN*D( I+1 )
+            D( I+1 ) = CS*D( I+1 )
+            IF( NRHS.EQ.1 ) THEN
+               CALL CSROT( 1, B( I, 1 ), 1, B( I+1, 1 ), 1, CS, SN )
+            ELSE
+               RWORK( I*2-1 ) = CS
+               RWORK( I*2 ) = SN
+            END IF
+   10    CONTINUE
+         IF( NRHS.GT.1 ) THEN
+            DO 30 I = 1, NRHS
+               DO 20 J = 1, N - 1
+                  CS = RWORK( J*2-1 )
+                  SN = RWORK( J*2 )
+                  CALL CSROT( 1, B( J, I ), 1, B( J+1, I ), 1, CS, SN )
+   20          CONTINUE
+   30       CONTINUE
+         END IF
+      END IF
+*
+*     Scale.
+*
+      NM1 = N - 1
+      ORGNRM = SLANST( 'M', N, D, E )
+      IF( ORGNRM.EQ.ZERO ) THEN
+         CALL CLASET( 'A', N, NRHS, CZERO, CZERO, B, LDB )
+         RETURN
+      END IF
+*
+      CALL SLASCL( 'G', 0, 0, ORGNRM, ONE, N, 1, D, N, INFO )
+      CALL SLASCL( 'G', 0, 0, ORGNRM, ONE, NM1, 1, E, NM1, INFO )
+*
+*     If N is smaller than the minimum divide size SMLSIZ, then solve
+*     the problem with another solver.
+*
+      IF( N.LE.SMLSIZ ) THEN
+         IRWU = 1
+         IRWVT = IRWU + N*N
+         IRWWRK = IRWVT + N*N
+         IRWRB = IRWWRK
+         IRWIB = IRWRB + N*NRHS
+         IRWB = IRWIB + N*NRHS
+         CALL SLASET( 'A', N, N, ZERO, ONE, RWORK( IRWU ), N )
+         CALL SLASET( 'A', N, N, ZERO, ONE, RWORK( IRWVT ), N )
+         CALL SLASDQ( 'U', 0, N, N, N, 0, D, E, RWORK( IRWVT ), N,
+     $                RWORK( IRWU ), N, RWORK( IRWWRK ), 1,
+     $                RWORK( IRWWRK ), INFO )
+         IF( INFO.NE.0 ) THEN
+            RETURN
+         END IF
+*
+*        In the real version, B is passed to SLASDQ and multiplied
+*        internally by Q'. Here B is complex and that product is
+*        computed below in two steps (real and imaginary parts).
+*
+         J = IRWB - 1
+         DO 50 JCOL = 1, NRHS
+            DO 40 JROW = 1, N
+               J = J + 1
+               RWORK( J ) = REAL( B( JROW, JCOL ) )
+   40       CONTINUE
+   50    CONTINUE
+         CALL SGEMM( 'T', 'N', N, NRHS, N, ONE, RWORK( IRWU ), N,
+     $               RWORK( IRWB ), N, ZERO, RWORK( IRWRB ), N )
+         J = IRWB - 1
+         DO 70 JCOL = 1, NRHS
+            DO 60 JROW = 1, N
+               J = J + 1
+               RWORK( J ) = AIMAG( B( JROW, JCOL ) )
+   60       CONTINUE
+   70    CONTINUE
+         CALL SGEMM( 'T', 'N', N, NRHS, N, ONE, RWORK( IRWU ), N,
+     $               RWORK( IRWB ), N, ZERO, RWORK( IRWIB ), N )
+         JREAL = IRWRB - 1
+         JIMAG = IRWIB - 1
+         DO 90 JCOL = 1, NRHS
+            DO 80 JROW = 1, N
+               JREAL = JREAL + 1
+               JIMAG = JIMAG + 1
+               B( JROW, JCOL ) = CMPLX( RWORK( JREAL ), RWORK( JIMAG ) )
+   80       CONTINUE
+   90    CONTINUE
+*
+         TOL = RCOND*ABS( D( ISAMAX( N, D, 1 ) ) )
+         DO 100 I = 1, N
+            IF( D( I ).LE.TOL ) THEN
+               CALL CLASET( 'A', 1, NRHS, CZERO, CZERO, B( I, 1 ), LDB )
+            ELSE
+               CALL CLASCL( 'G', 0, 0, D( I ), ONE, 1, NRHS, B( I, 1 ),
+     $                      LDB, INFO )
+               RANK = RANK + 1
+            END IF
+  100    CONTINUE
+*
+*        Since B is complex, the following call to SGEMM is performed
+*        in two steps (real and imaginary parts). That is for V * B
+*        (in the real version of the code V' is stored in WORK).
+*
+*        CALL SGEMM( 'T', 'N', N, NRHS, N, ONE, WORK, N, B, LDB, ZERO,
+*    $               WORK( NWORK ), N )
+*
+         J = IRWB - 1
+         DO 120 JCOL = 1, NRHS
+            DO 110 JROW = 1, N
+               J = J + 1
+               RWORK( J ) = REAL( B( JROW, JCOL ) )
+  110       CONTINUE
+  120    CONTINUE
+         CALL SGEMM( 'T', 'N', N, NRHS, N, ONE, RWORK( IRWVT ), N,
+     $               RWORK( IRWB ), N, ZERO, RWORK( IRWRB ), N )
+         J = IRWB - 1
+         DO 140 JCOL = 1, NRHS
+            DO 130 JROW = 1, N
+               J = J + 1
+               RWORK( J ) = AIMAG( B( JROW, JCOL ) )
+  130       CONTINUE
+  140    CONTINUE
+         CALL SGEMM( 'T', 'N', N, NRHS, N, ONE, RWORK( IRWVT ), N,
+     $               RWORK( IRWB ), N, ZERO, RWORK( IRWIB ), N )
+         JREAL = IRWRB - 1
+         JIMAG = IRWIB - 1
+         DO 160 JCOL = 1, NRHS
+            DO 150 JROW = 1, N
+               JREAL = JREAL + 1
+               JIMAG = JIMAG + 1
+               B( JROW, JCOL ) = CMPLX( RWORK( JREAL ), RWORK( JIMAG ) )
+  150       CONTINUE
+  160    CONTINUE
+*
+*        Unscale.
+*
+         CALL SLASCL( 'G', 0, 0, ONE, ORGNRM, N, 1, D, N, INFO )
+         CALL SLASRT( 'D', N, D, INFO )
+         CALL CLASCL( 'G', 0, 0, ORGNRM, ONE, N, NRHS, B, LDB, INFO )
+*
+         RETURN
+      END IF
+*
+*     Book-keeping and setting up some constants.
+*
+      NLVL = INT( LOG( REAL( N ) / REAL( SMLSIZ+1 ) ) / LOG( TWO ) ) + 1
+*
+      SMLSZP = SMLSIZ + 1
+*
+      U = 1
+      VT = 1 + SMLSIZ*N
+      DIFL = VT + SMLSZP*N
+      DIFR = DIFL + NLVL*N
+      Z = DIFR + NLVL*N*2
+      C = Z + NLVL*N
+      S = C + N
+      POLES = S + N
+      GIVNUM = POLES + 2*NLVL*N
+      NRWORK = GIVNUM + 2*NLVL*N
+      BX = 1
+*
+      IRWRB = NRWORK
+      IRWIB = IRWRB + SMLSIZ*NRHS
+      IRWB = IRWIB + SMLSIZ*NRHS
+*
+      SIZEI = 1 + N
+      K = SIZEI + N
+      GIVPTR = K + N
+      PERM = GIVPTR + N
+      GIVCOL = PERM + NLVL*N
+      IWK = GIVCOL + NLVL*N*2
+*
+      ST = 1
+      SQRE = 0
+      ICMPQ1 = 1
+      ICMPQ2 = 0
+      NSUB = 0
+*
+      DO 170 I = 1, N
+         IF( ABS( D( I ) ).LT.EPS ) THEN
+            D( I ) = SIGN( EPS, D( I ) )
+         END IF
+  170 CONTINUE
+*
+      DO 240 I = 1, NM1
+         IF( ( ABS( E( I ) ).LT.EPS ) .OR. ( I.EQ.NM1 ) ) THEN
+            NSUB = NSUB + 1
+            IWORK( NSUB ) = ST
+*
+*           Subproblem found. First determine its size and then
+*           apply divide and conquer on it.
+*
+            IF( I.LT.NM1 ) THEN
+*
+*              A subproblem with E(I) small for I < NM1.
+*
+               NSIZE = I - ST + 1
+               IWORK( SIZEI+NSUB-1 ) = NSIZE
+            ELSE IF( ABS( E( I ) ).GE.EPS ) THEN
+*
+*              A subproblem with E(NM1) not too small but I = NM1.
+*
+               NSIZE = N - ST + 1
+               IWORK( SIZEI+NSUB-1 ) = NSIZE
+            ELSE
+*
+*              A subproblem with E(NM1) small. This implies an
+*              1-by-1 subproblem at D(N), which is not solved
+*              explicitly.
+*
+               NSIZE = I - ST + 1
+               IWORK( SIZEI+NSUB-1 ) = NSIZE
+               NSUB = NSUB + 1
+               IWORK( NSUB ) = N
+               IWORK( SIZEI+NSUB-1 ) = 1
+               CALL CCOPY( NRHS, B( N, 1 ), LDB, WORK( BX+NM1 ), N )
+            END IF
+            ST1 = ST - 1
+            IF( NSIZE.EQ.1 ) THEN
+*
+*              This is a 1-by-1 subproblem and is not solved
+*              explicitly.
+*
+               CALL CCOPY( NRHS, B( ST, 1 ), LDB, WORK( BX+ST1 ), N )
+            ELSE IF( NSIZE.LE.SMLSIZ ) THEN
+*
+*              This is a small subproblem and is solved by SLASDQ.
+*
+               CALL SLASET( 'A', NSIZE, NSIZE, ZERO, ONE,
+     $                      RWORK( VT+ST1 ), N )
+               CALL SLASET( 'A', NSIZE, NSIZE, ZERO, ONE,
+     $                      RWORK( U+ST1 ), N )
+               CALL SLASDQ( 'U', 0, NSIZE, NSIZE, NSIZE, 0, D( ST ),
+     $                      E( ST ), RWORK( VT+ST1 ), N, RWORK( U+ST1 ),
+     $                      N, RWORK( NRWORK ), 1, RWORK( NRWORK ),
+     $                      INFO )
+               IF( INFO.NE.0 ) THEN
+                  RETURN
+               END IF
+*
+*              In the real version, B is passed to SLASDQ and multiplied
+*              internally by Q'. Here B is complex and that product is
+*              computed below in two steps (real and imaginary parts).
+*
+               J = IRWB - 1
+               DO 190 JCOL = 1, NRHS
+                  DO 180 JROW = ST, ST + NSIZE - 1
+                     J = J + 1
+                     RWORK( J ) = REAL( B( JROW, JCOL ) )
+  180             CONTINUE
+  190          CONTINUE
+               CALL SGEMM( 'T', 'N', NSIZE, NRHS, NSIZE, ONE,
+     $                     RWORK( U+ST1 ), N, RWORK( IRWB ), NSIZE,
+     $                     ZERO, RWORK( IRWRB ), NSIZE )
+               J = IRWB - 1
+               DO 210 JCOL = 1, NRHS
+                  DO 200 JROW = ST, ST + NSIZE - 1
+                     J = J + 1
+                     RWORK( J ) = AIMAG( B( JROW, JCOL ) )
+  200             CONTINUE
+  210          CONTINUE
+               CALL SGEMM( 'T', 'N', NSIZE, NRHS, NSIZE, ONE,
+     $                     RWORK( U+ST1 ), N, RWORK( IRWB ), NSIZE,
+     $                     ZERO, RWORK( IRWIB ), NSIZE )
+               JREAL = IRWRB - 1
+               JIMAG = IRWIB - 1
+               DO 230 JCOL = 1, NRHS
+                  DO 220 JROW = ST, ST + NSIZE - 1
+                     JREAL = JREAL + 1
+                     JIMAG = JIMAG + 1
+                     B( JROW, JCOL ) = CMPLX( RWORK( JREAL ),
+     $                                 RWORK( JIMAG ) )
+  220             CONTINUE
+  230          CONTINUE
+*
+               CALL CLACPY( 'A', NSIZE, NRHS, B( ST, 1 ), LDB,
+     $                      WORK( BX+ST1 ), N )
+            ELSE
+*
+*              A large problem. Solve it using divide and conquer.
+*
+               CALL SLASDA( ICMPQ1, SMLSIZ, NSIZE, SQRE, D( ST ),
+     $                      E( ST ), RWORK( U+ST1 ), N, RWORK( VT+ST1 ),
+     $                      IWORK( K+ST1 ), RWORK( DIFL+ST1 ),
+     $                      RWORK( DIFR+ST1 ), RWORK( Z+ST1 ),
+     $                      RWORK( POLES+ST1 ), IWORK( GIVPTR+ST1 ),
+     $                      IWORK( GIVCOL+ST1 ), N, IWORK( PERM+ST1 ),
+     $                      RWORK( GIVNUM+ST1 ), RWORK( C+ST1 ),
+     $                      RWORK( S+ST1 ), RWORK( NRWORK ),
+     $                      IWORK( IWK ), INFO )
+               IF( INFO.NE.0 ) THEN
+                  RETURN
+               END IF
+               BXST = BX + ST1
+               CALL CLALSA( ICMPQ2, SMLSIZ, NSIZE, NRHS, B( ST, 1 ),
+     $                      LDB, WORK( BXST ), N, RWORK( U+ST1 ), N,
+     $                      RWORK( VT+ST1 ), IWORK( K+ST1 ),
+     $                      RWORK( DIFL+ST1 ), RWORK( DIFR+ST1 ),
+     $                      RWORK( Z+ST1 ), RWORK( POLES+ST1 ),
+     $                      IWORK( GIVPTR+ST1 ), IWORK( GIVCOL+ST1 ), N,
+     $                      IWORK( PERM+ST1 ), RWORK( GIVNUM+ST1 ),
+     $                      RWORK( C+ST1 ), RWORK( S+ST1 ),
+     $                      RWORK( NRWORK ), IWORK( IWK ), INFO )
+               IF( INFO.NE.0 ) THEN
+                  RETURN
+               END IF
+            END IF
+            ST = I + 1
+         END IF
+  240 CONTINUE
+*
+*     Apply the singular values and treat the tiny ones as zero.
+*
+      TOL = RCOND*ABS( D( ISAMAX( N, D, 1 ) ) )
+*
+      DO 250 I = 1, N
+*
+*        Some of the elements in D can be negative because 1-by-1
+*        subproblems were not solved explicitly.
+*
+         IF( ABS( D( I ) ).LE.TOL ) THEN
+            CALL CLASET( 'A', 1, NRHS, CZERO, CZERO, WORK( BX+I-1 ), N )
+         ELSE
+            RANK = RANK + 1
+            CALL CLASCL( 'G', 0, 0, D( I ), ONE, 1, NRHS,
+     $                   WORK( BX+I-1 ), N, INFO )
+         END IF
+         D( I ) = ABS( D( I ) )
+  250 CONTINUE
+*
+*     Now apply back the right singular vectors.
+*
+      ICMPQ2 = 1
+      DO 320 I = 1, NSUB
+         ST = IWORK( I )
+         ST1 = ST - 1
+         NSIZE = IWORK( SIZEI+I-1 )
+         BXST = BX + ST1
+         IF( NSIZE.EQ.1 ) THEN
+            CALL CCOPY( NRHS, WORK( BXST ), N, B( ST, 1 ), LDB )
+         ELSE IF( NSIZE.LE.SMLSIZ ) THEN
+*
+*           Since B and BX are complex, the following call to SGEMM
+*           is performed in two steps (real and imaginary parts).
+*
+*           CALL SGEMM( 'T', 'N', NSIZE, NRHS, NSIZE, ONE,
+*    $                  RWORK( VT+ST1 ), N, RWORK( BXST ), N, ZERO,
+*    $                  B( ST, 1 ), LDB )
+*
+            J = BXST - N - 1
+            JREAL = IRWB - 1
+            DO 270 JCOL = 1, NRHS
+               J = J + N
+               DO 260 JROW = 1, NSIZE
+                  JREAL = JREAL + 1
+                  RWORK( JREAL ) = REAL( WORK( J+JROW ) )
+  260          CONTINUE
+  270       CONTINUE
+            CALL SGEMM( 'T', 'N', NSIZE, NRHS, NSIZE, ONE,
+     $                  RWORK( VT+ST1 ), N, RWORK( IRWB ), NSIZE, ZERO,
+     $                  RWORK( IRWRB ), NSIZE )
+            J = BXST - N - 1
+            JIMAG = IRWB - 1
+            DO 290 JCOL = 1, NRHS
+               J = J + N
+               DO 280 JROW = 1, NSIZE
+                  JIMAG = JIMAG + 1
+                  RWORK( JIMAG ) = AIMAG( WORK( J+JROW ) )
+  280          CONTINUE
+  290       CONTINUE
+            CALL SGEMM( 'T', 'N', NSIZE, NRHS, NSIZE, ONE,
+     $                  RWORK( VT+ST1 ), N, RWORK( IRWB ), NSIZE, ZERO,
+     $                  RWORK( IRWIB ), NSIZE )
+            JREAL = IRWRB - 1
+            JIMAG = IRWIB - 1
+            DO 310 JCOL = 1, NRHS
+               DO 300 JROW = ST, ST + NSIZE - 1
+                  JREAL = JREAL + 1
+                  JIMAG = JIMAG + 1
+                  B( JROW, JCOL ) = CMPLX( RWORK( JREAL ),
+     $                              RWORK( JIMAG ) )
+  300          CONTINUE
+  310       CONTINUE
+         ELSE
+            CALL CLALSA( ICMPQ2, SMLSIZ, NSIZE, NRHS, WORK( BXST ), N,
+     $                   B( ST, 1 ), LDB, RWORK( U+ST1 ), N,
+     $                   RWORK( VT+ST1 ), IWORK( K+ST1 ),
+     $                   RWORK( DIFL+ST1 ), RWORK( DIFR+ST1 ),
+     $                   RWORK( Z+ST1 ), RWORK( POLES+ST1 ),
+     $                   IWORK( GIVPTR+ST1 ), IWORK( GIVCOL+ST1 ), N,
+     $                   IWORK( PERM+ST1 ), RWORK( GIVNUM+ST1 ),
+     $                   RWORK( C+ST1 ), RWORK( S+ST1 ),
+     $                   RWORK( NRWORK ), IWORK( IWK ), INFO )
+            IF( INFO.NE.0 ) THEN
+               RETURN
+            END IF
+         END IF
+  320 CONTINUE
+*
+*     Unscale and sort the singular values.
+*
+      CALL SLASCL( 'G', 0, 0, ONE, ORGNRM, N, 1, D, N, INFO )
+      CALL SLASRT( 'D', N, D, INFO )
+      CALL CLASCL( 'G', 0, 0, ORGNRM, ONE, N, NRHS, B, LDB, INFO )
+*
+      RETURN
+*
+*     End of CLALSD
+*
+      END
diff --git a/costa/native/external/lapack/clangb.f b/costa/native/external/lapack/clangb.f
new file mode 100644
index 000000000..3288d307a
--- /dev/null
+++ b/costa/native/external/lapack/clangb.f
@@ -0,0 +1,155 @@
+      REAL             FUNCTION CLANGB( NORM, N, KL, KU, AB, LDAB,
+     $                 WORK )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          NORM
+      INTEGER            KL, KU, LDAB, N
+*     ..
+*     .. Array Arguments ..
+      REAL               WORK( * )
+      COMPLEX            AB( LDAB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CLANGB  returns the value of the one norm,  or the Frobenius norm, or
+*  the  infinity norm,  or the element of  largest absolute value  of an
+*  n by n band matrix  A,  with kl sub-diagonals and ku super-diagonals.
+*
+*  Description
+*  ===========
+*
+*  CLANGB returns the value
+*
+*     CLANGB = ( max(abs(A(i,j))), NORM = 'M' or 'm'
+*              (
+*              ( norm1(A),         NORM = '1', 'O' or 'o'
+*              (
+*              ( normI(A),         NORM = 'I' or 'i'
+*              (
+*              ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
+*
+*  where  norm1  denotes the  one norm of a matrix (maximum column sum),
+*  normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
+*  normF  denotes the  Frobenius norm of a matrix (square root of sum of
+*  squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
+*
+*  Arguments
+*  =========
+*
+*  NORM    (input) CHARACTER*1
+*          Specifies the value to be returned in CLANGB as described
+*          above.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.  When N = 0, CLANGB is
+*          set to zero.
+*
+*  KL      (input) INTEGER
+*          The number of sub-diagonals of the matrix A.  KL >= 0.
+*
+*  KU      (input) INTEGER
+*          The number of super-diagonals of the matrix A.  KU >= 0.
+*
+*  AB      (input) COMPLEX array, dimension (LDAB,N)
+*          The band matrix A, stored in rows 1 to KL+KU+1.  The j-th
+*          column of A is stored in the j-th column of the array AB as
+*          follows:
+*          AB(ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(n,j+kl).
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KL+KU+1.
+*
+*  WORK    (workspace) REAL array, dimension (LWORK),
+*          where LWORK >= N when NORM = 'I'; otherwise, WORK is not
+*          referenced.
+*
+* =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J, K, L
+      REAL               SCALE, SUM, VALUE
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CLASSQ
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+      IF( N.EQ.0 ) THEN
+         VALUE = ZERO
+      ELSE IF( LSAME( NORM, 'M' ) ) THEN
+*
+*        Find max(abs(A(i,j))).
+*
+         VALUE = ZERO
+         DO 20 J = 1, N
+            DO 10 I = MAX( KU+2-J, 1 ), MIN( N+KU+1-J, KL+KU+1 )
+               VALUE = MAX( VALUE, ABS( AB( I, J ) ) )
+   10       CONTINUE
+   20    CONTINUE
+      ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN
+*
+*        Find norm1(A).
+*
+         VALUE = ZERO
+         DO 40 J = 1, N
+            SUM = ZERO
+            DO 30 I = MAX( KU+2-J, 1 ), MIN( N+KU+1-J, KL+KU+1 )
+               SUM = SUM + ABS( AB( I, J ) )
+   30       CONTINUE
+            VALUE = MAX( VALUE, SUM )
+   40    CONTINUE
+      ELSE IF( LSAME( NORM, 'I' ) ) THEN
+*
+*        Find normI(A).
+*
+         DO 50 I = 1, N
+            WORK( I ) = ZERO
+   50    CONTINUE
+         DO 70 J = 1, N
+            K = KU + 1 - J
+            DO 60 I = MAX( 1, J-KU ), MIN( N, J+KL )
+               WORK( I ) = WORK( I ) + ABS( AB( K+I, J ) )
+   60       CONTINUE
+   70    CONTINUE
+         VALUE = ZERO
+         DO 80 I = 1, N
+            VALUE = MAX( VALUE, WORK( I ) )
+   80    CONTINUE
+      ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
+*
+*        Find normF(A).
+*
+         SCALE = ZERO
+         SUM = ONE
+         DO 90 J = 1, N
+            L = MAX( 1, J-KU )
+            K = KU + 1 - J + L
+            CALL CLASSQ( MIN( N, J+KL )-L+1, AB( K, J ), 1, SCALE, SUM )
+   90    CONTINUE
+         VALUE = SCALE*SQRT( SUM )
+      END IF
+*
+      CLANGB = VALUE
+      RETURN
+*
+*     End of CLANGB
+*
+      END
diff --git a/costa/native/external/lapack/clange.f b/costa/native/external/lapack/clange.f
new file mode 100644
index 000000000..99eb75cd6
--- /dev/null
+++ b/costa/native/external/lapack/clange.f
@@ -0,0 +1,146 @@
+      REAL             FUNCTION CLANGE( NORM, M, N, A, LDA, WORK )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          NORM
+      INTEGER            LDA, M, N
+*     ..
+*     .. Array Arguments ..
+      REAL               WORK( * )
+      COMPLEX            A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CLANGE  returns the value of the one norm,  or the Frobenius norm, or
+*  the  infinity norm,  or the  element of  largest absolute value  of a
+*  complex matrix A.
+*
+*  Description
+*  ===========
+*
+*  CLANGE returns the value
+*
+*     CLANGE = ( max(abs(A(i,j))), NORM = 'M' or 'm'
+*              (
+*              ( norm1(A),         NORM = '1', 'O' or 'o'
+*              (
+*              ( normI(A),         NORM = 'I' or 'i'
+*              (
+*              ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
+*
+*  where  norm1  denotes the  one norm of a matrix (maximum column sum),
+*  normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
+*  normF  denotes the  Frobenius norm of a matrix (square root of sum of
+*  squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
+*
+*  Arguments
+*  =========
+*
+*  NORM    (input) CHARACTER*1
+*          Specifies the value to be returned in CLANGE as described
+*          above.
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.  When M = 0,
+*          CLANGE is set to zero.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.  When N = 0,
+*          CLANGE is set to zero.
+*
+*  A       (input) COMPLEX array, dimension (LDA,N)
+*          The m by n matrix A.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(M,1).
+*
+*  WORK    (workspace) REAL array, dimension (LWORK),
+*          where LWORK >= M when NORM = 'I'; otherwise, WORK is not
+*          referenced.
+*
+* =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J
+      REAL               SCALE, SUM, VALUE
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CLASSQ
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+      IF( MIN( M, N ).EQ.0 ) THEN
+         VALUE = ZERO
+      ELSE IF( LSAME( NORM, 'M' ) ) THEN
+*
+*        Find max(abs(A(i,j))).
+*
+         VALUE = ZERO
+         DO 20 J = 1, N
+            DO 10 I = 1, M
+               VALUE = MAX( VALUE, ABS( A( I, J ) ) )
+   10       CONTINUE
+   20    CONTINUE
+      ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN
+*
+*        Find norm1(A).
+*
+         VALUE = ZERO
+         DO 40 J = 1, N
+            SUM = ZERO
+            DO 30 I = 1, M
+               SUM = SUM + ABS( A( I, J ) )
+   30       CONTINUE
+            VALUE = MAX( VALUE, SUM )
+   40    CONTINUE
+      ELSE IF( LSAME( NORM, 'I' ) ) THEN
+*
+*        Find normI(A).
+*
+         DO 50 I = 1, M
+            WORK( I ) = ZERO
+   50    CONTINUE
+         DO 70 J = 1, N
+            DO 60 I = 1, M
+               WORK( I ) = WORK( I ) + ABS( A( I, J ) )
+   60       CONTINUE
+   70    CONTINUE
+         VALUE = ZERO
+         DO 80 I = 1, M
+            VALUE = MAX( VALUE, WORK( I ) )
+   80    CONTINUE
+      ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
+*
+*        Find normF(A).
+*
+         SCALE = ZERO
+         SUM = ONE
+         DO 90 J = 1, N
+            CALL CLASSQ( M, A( 1, J ), 1, SCALE, SUM )
+   90    CONTINUE
+         VALUE = SCALE*SQRT( SUM )
+      END IF
+*
+      CLANGE = VALUE
+      RETURN
+*
+*     End of CLANGE
+*
+      END
diff --git a/costa/native/external/lapack/clangt.f b/costa/native/external/lapack/clangt.f
new file mode 100644
index 000000000..b147fdda1
--- /dev/null
+++ b/costa/native/external/lapack/clangt.f
@@ -0,0 +1,142 @@
+      REAL             FUNCTION CLANGT( NORM, N, DL, D, DU )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          NORM
+      INTEGER            N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            D( * ), DL( * ), DU( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CLANGT  returns the value of the one norm,  or the Frobenius norm, or
+*  the  infinity norm,  or the  element of  largest absolute value  of a
+*  complex tridiagonal matrix A.
+*
+*  Description
+*  ===========
+*
+*  CLANGT returns the value
+*
+*     CLANGT = ( max(abs(A(i,j))), NORM = 'M' or 'm'
+*              (
+*              ( norm1(A),         NORM = '1', 'O' or 'o'
+*              (
+*              ( normI(A),         NORM = 'I' or 'i'
+*              (
+*              ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
+*
+*  where  norm1  denotes the  one norm of a matrix (maximum column sum),
+*  normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
+*  normF  denotes the  Frobenius norm of a matrix (square root of sum of
+*  squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
+*
+*  Arguments
+*  =========
+*
+*  NORM    (input) CHARACTER*1
+*          Specifies the value to be returned in CLANGT as described
+*          above.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.  When N = 0, CLANGT is
+*          set to zero.
+*
+*  DL      (input) COMPLEX array, dimension (N-1)
+*          The (n-1) sub-diagonal elements of A.
+*
+*  D       (input) COMPLEX array, dimension (N)
+*          The diagonal elements of A.
+*
+*  DU      (input) COMPLEX array, dimension (N-1)
+*          The (n-1) super-diagonal elements of A.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I
+      REAL               ANORM, SCALE, SUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CLASSQ
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+      IF( N.LE.0 ) THEN
+         ANORM = ZERO
+      ELSE IF( LSAME( NORM, 'M' ) ) THEN
+*
+*        Find max(abs(A(i,j))).
+*
+         ANORM = ABS( D( N ) )
+         DO 10 I = 1, N - 1
+            ANORM = MAX( ANORM, ABS( DL( I ) ) )
+            ANORM = MAX( ANORM, ABS( D( I ) ) )
+            ANORM = MAX( ANORM, ABS( DU( I ) ) )
+   10    CONTINUE
+      ELSE IF( LSAME( NORM, 'O' ) .OR. NORM.EQ.'1' ) THEN
+*
+*        Find norm1(A).
+*
+         IF( N.EQ.1 ) THEN
+            ANORM = ABS( D( 1 ) )
+         ELSE
+            ANORM = MAX( ABS( D( 1 ) )+ABS( DL( 1 ) ),
+     $              ABS( D( N ) )+ABS( DU( N-1 ) ) )
+            DO 20 I = 2, N - 1
+               ANORM = MAX( ANORM, ABS( D( I ) )+ABS( DL( I ) )+
+     $                 ABS( DU( I-1 ) ) )
+   20       CONTINUE
+         END IF
+      ELSE IF( LSAME( NORM, 'I' ) ) THEN
+*
+*        Find normI(A).
+*
+         IF( N.EQ.1 ) THEN
+            ANORM = ABS( D( 1 ) )
+         ELSE
+            ANORM = MAX( ABS( D( 1 ) )+ABS( DU( 1 ) ),
+     $              ABS( D( N ) )+ABS( DL( N-1 ) ) )
+            DO 30 I = 2, N - 1
+               ANORM = MAX( ANORM, ABS( D( I ) )+ABS( DU( I ) )+
+     $                 ABS( DL( I-1 ) ) )
+   30       CONTINUE
+         END IF
+      ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
+*
+*        Find normF(A).
+*
+         SCALE = ZERO
+         SUM = ONE
+         CALL CLASSQ( N, D, 1, SCALE, SUM )
+         IF( N.GT.1 ) THEN
+            CALL CLASSQ( N-1, DL, 1, SCALE, SUM )
+            CALL CLASSQ( N-1, DU, 1, SCALE, SUM )
+         END IF
+         ANORM = SCALE*SQRT( SUM )
+      END IF
+*
+      CLANGT = ANORM
+      RETURN
+*
+*     End of CLANGT
+*
+      END
diff --git a/costa/native/external/lapack/clanhb.f b/costa/native/external/lapack/clanhb.f
new file mode 100644
index 000000000..48946c606
--- /dev/null
+++ b/costa/native/external/lapack/clanhb.f
@@ -0,0 +1,202 @@
+      REAL             FUNCTION CLANHB( NORM, UPLO, N, K, AB, LDAB,
+     $                 WORK )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          NORM, UPLO
+      INTEGER            K, LDAB, N
+*     ..
+*     .. Array Arguments ..
+      REAL               WORK( * )
+      COMPLEX            AB( LDAB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CLANHB  returns the value of the one norm,  or the Frobenius norm, or
+*  the  infinity norm,  or the element of  largest absolute value  of an
+*  n by n hermitian band matrix A,  with k super-diagonals.
+*
+*  Description
+*  ===========
+*
+*  CLANHB returns the value
+*
+*     CLANHB = ( max(abs(A(i,j))), NORM = 'M' or 'm'
+*              (
+*              ( norm1(A),         NORM = '1', 'O' or 'o'
+*              (
+*              ( normI(A),         NORM = 'I' or 'i'
+*              (
+*              ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
+*
+*  where  norm1  denotes the  one norm of a matrix (maximum column sum),
+*  normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
+*  normF  denotes the  Frobenius norm of a matrix (square root of sum of
+*  squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
+*
+*  Arguments
+*  =========
+*
+*  NORM    (input) CHARACTER*1
+*          Specifies the value to be returned in CLANHB as described
+*          above.
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the upper or lower triangular part of the
+*          band matrix A is supplied.
+*          = 'U':  Upper triangular
+*          = 'L':  Lower triangular
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.  When N = 0, CLANHB is
+*          set to zero.
+*
+*  K       (input) INTEGER
+*          The number of super-diagonals or sub-diagonals of the
+*          band matrix A.  K >= 0.
+*
+*  AB      (input) COMPLEX array, dimension (LDAB,N)
+*          The upper or lower triangle of the hermitian band matrix A,
+*          stored in the first K+1 rows of AB.  The j-th column of A is
+*          stored in the j-th column of the array AB as follows:
+*          if UPLO = 'U', AB(k+1+i-j,j) = A(i,j) for max(1,j-k)<=i<=j;
+*          if UPLO = 'L', AB(1+i-j,j)   = A(i,j) for j<=i<=min(n,j+k).
+*          Note that the imaginary parts of the diagonal elements need
+*          not be set and are assumed to be zero.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= K+1.
+*
+*  WORK    (workspace) REAL array, dimension (LWORK),
+*          where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise,
+*          WORK is not referenced.
+*
+* =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J, L
+      REAL               ABSA, SCALE, SUM, VALUE
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CLASSQ
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN, REAL, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+      IF( N.EQ.0 ) THEN
+         VALUE = ZERO
+      ELSE IF( LSAME( NORM, 'M' ) ) THEN
+*
+*        Find max(abs(A(i,j))).
+*
+         VALUE = ZERO
+         IF( LSAME( UPLO, 'U' ) ) THEN
+            DO 20 J = 1, N
+               DO 10 I = MAX( K+2-J, 1 ), K
+                  VALUE = MAX( VALUE, ABS( AB( I, J ) ) )
+   10          CONTINUE
+               VALUE = MAX( VALUE, ABS( REAL( AB( K+1, J ) ) ) )
+   20       CONTINUE
+         ELSE
+            DO 40 J = 1, N
+               VALUE = MAX( VALUE, ABS( REAL( AB( 1, J ) ) ) )
+               DO 30 I = 2, MIN( N+1-J, K+1 )
+                  VALUE = MAX( VALUE, ABS( AB( I, J ) ) )
+   30          CONTINUE
+   40       CONTINUE
+         END IF
+      ELSE IF( ( LSAME( NORM, 'I' ) ) .OR. ( LSAME( NORM, 'O' ) ) .OR.
+     $         ( NORM.EQ.'1' ) ) THEN
+*
+*        Find normI(A) ( = norm1(A), since A is hermitian).
+*
+         VALUE = ZERO
+         IF( LSAME( UPLO, 'U' ) ) THEN
+            DO 60 J = 1, N
+               SUM = ZERO
+               L = K + 1 - J
+               DO 50 I = MAX( 1, J-K ), J - 1
+                  ABSA = ABS( AB( L+I, J ) )
+                  SUM = SUM + ABSA
+                  WORK( I ) = WORK( I ) + ABSA
+   50          CONTINUE
+               WORK( J ) = SUM + ABS( REAL( AB( K+1, J ) ) )
+   60       CONTINUE
+            DO 70 I = 1, N
+               VALUE = MAX( VALUE, WORK( I ) )
+   70       CONTINUE
+         ELSE
+            DO 80 I = 1, N
+               WORK( I ) = ZERO
+   80       CONTINUE
+            DO 100 J = 1, N
+               SUM = WORK( J ) + ABS( REAL( AB( 1, J ) ) )
+               L = 1 - J
+               DO 90 I = J + 1, MIN( N, J+K )
+                  ABSA = ABS( AB( L+I, J ) )
+                  SUM = SUM + ABSA
+                  WORK( I ) = WORK( I ) + ABSA
+   90          CONTINUE
+               VALUE = MAX( VALUE, SUM )
+  100       CONTINUE
+         END IF
+      ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
+*
+*        Find normF(A).
+*
+         SCALE = ZERO
+         SUM = ONE
+         IF( K.GT.0 ) THEN
+            IF( LSAME( UPLO, 'U' ) ) THEN
+               DO 110 J = 2, N
+                  CALL CLASSQ( MIN( J-1, K ), AB( MAX( K+2-J, 1 ), J ),
+     $                         1, SCALE, SUM )
+  110          CONTINUE
+               L = K + 1
+            ELSE
+               DO 120 J = 1, N - 1
+                  CALL CLASSQ( MIN( N-J, K ), AB( 2, J ), 1, SCALE,
+     $                         SUM )
+  120          CONTINUE
+               L = 1
+            END IF
+            SUM = 2*SUM
+         ELSE
+            L = 1
+         END IF
+         DO 130 J = 1, N
+            IF( REAL( AB( L, J ) ).NE.ZERO ) THEN
+               ABSA = ABS( REAL( AB( L, J ) ) )
+               IF( SCALE.LT.ABSA ) THEN
+                  SUM = ONE + SUM*( SCALE / ABSA )**2
+                  SCALE = ABSA
+               ELSE
+                  SUM = SUM + ( ABSA / SCALE )**2
+               END IF
+            END IF
+  130    CONTINUE
+         VALUE = SCALE*SQRT( SUM )
+      END IF
+*
+      CLANHB = VALUE
+      RETURN
+*
+*     End of CLANHB
+*
+      END
diff --git a/costa/native/external/lapack/clanhe.f b/costa/native/external/lapack/clanhe.f
new file mode 100644
index 000000000..49de6e746
--- /dev/null
+++ b/costa/native/external/lapack/clanhe.f
@@ -0,0 +1,188 @@
+      REAL             FUNCTION CLANHE( NORM, UPLO, N, A, LDA, WORK )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          NORM, UPLO
+      INTEGER            LDA, N
+*     ..
+*     .. Array Arguments ..
+      REAL               WORK( * )
+      COMPLEX            A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CLANHE  returns the value of the one norm,  or the Frobenius norm, or
+*  the  infinity norm,  or the  element of  largest absolute value  of a
+*  complex hermitian matrix A.
+*
+*  Description
+*  ===========
+*
+*  CLANHE returns the value
+*
+*     CLANHE = ( max(abs(A(i,j))), NORM = 'M' or 'm'
+*              (
+*              ( norm1(A),         NORM = '1', 'O' or 'o'
+*              (
+*              ( normI(A),         NORM = 'I' or 'i'
+*              (
+*              ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
+*
+*  where  norm1  denotes the  one norm of a matrix (maximum column sum),
+*  normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
+*  normF  denotes the  Frobenius norm of a matrix (square root of sum of
+*  squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
+*
+*  Arguments
+*  =========
+*
+*  NORM    (input) CHARACTER*1
+*          Specifies the value to be returned in CLANHE as described
+*          above.
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the upper or lower triangular part of the
+*          hermitian matrix A is to be referenced.
+*          = 'U':  Upper triangular part of A is referenced
+*          = 'L':  Lower triangular part of A is referenced
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.  When N = 0, CLANHE is
+*          set to zero.
+*
+*  A       (input) COMPLEX array, dimension (LDA,N)
+*          The hermitian matrix A.  If UPLO = 'U', the leading n by n
+*          upper triangular part of A contains the upper triangular part
+*          of the matrix A, and the strictly lower triangular part of A
+*          is not referenced.  If UPLO = 'L', the leading n by n lower
+*          triangular part of A contains the lower triangular part of
+*          the matrix A, and the strictly upper triangular part of A is
+*          not referenced. Note that the imaginary parts of the diagonal
+*          elements need not be set and are assumed to be zero.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(N,1).
+*
+*  WORK    (workspace) REAL array, dimension (LWORK),
+*          where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise,
+*          WORK is not referenced.
+*
+* =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J
+      REAL               ABSA, SCALE, SUM, VALUE
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CLASSQ
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, REAL, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+      IF( N.EQ.0 ) THEN
+         VALUE = ZERO
+      ELSE IF( LSAME( NORM, 'M' ) ) THEN
+*
+*        Find max(abs(A(i,j))).
+*
+         VALUE = ZERO
+         IF( LSAME( UPLO, 'U' ) ) THEN
+            DO 20 J = 1, N
+               DO 10 I = 1, J - 1
+                  VALUE = MAX( VALUE, ABS( A( I, J ) ) )
+   10          CONTINUE
+               VALUE = MAX( VALUE, ABS( REAL( A( J, J ) ) ) )
+   20       CONTINUE
+         ELSE
+            DO 40 J = 1, N
+               VALUE = MAX( VALUE, ABS( REAL( A( J, J ) ) ) )
+               DO 30 I = J + 1, N
+                  VALUE = MAX( VALUE, ABS( A( I, J ) ) )
+   30          CONTINUE
+   40       CONTINUE
+         END IF
+      ELSE IF( ( LSAME( NORM, 'I' ) ) .OR. ( LSAME( NORM, 'O' ) ) .OR.
+     $         ( NORM.EQ.'1' ) ) THEN
+*
+*        Find normI(A) ( = norm1(A), since A is hermitian).
+*
+         VALUE = ZERO
+         IF( LSAME( UPLO, 'U' ) ) THEN
+            DO 60 J = 1, N
+               SUM = ZERO
+               DO 50 I = 1, J - 1
+                  ABSA = ABS( A( I, J ) )
+                  SUM = SUM + ABSA
+                  WORK( I ) = WORK( I ) + ABSA
+   50          CONTINUE
+               WORK( J ) = SUM + ABS( REAL( A( J, J ) ) )
+   60       CONTINUE
+            DO 70 I = 1, N
+               VALUE = MAX( VALUE, WORK( I ) )
+   70       CONTINUE
+         ELSE
+            DO 80 I = 1, N
+               WORK( I ) = ZERO
+   80       CONTINUE
+            DO 100 J = 1, N
+               SUM = WORK( J ) + ABS( REAL( A( J, J ) ) )
+               DO 90 I = J + 1, N
+                  ABSA = ABS( A( I, J ) )
+                  SUM = SUM + ABSA
+                  WORK( I ) = WORK( I ) + ABSA
+   90          CONTINUE
+               VALUE = MAX( VALUE, SUM )
+  100       CONTINUE
+         END IF
+      ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
+*
+*        Find normF(A).
+*
+         SCALE = ZERO
+         SUM = ONE
+         IF( LSAME( UPLO, 'U' ) ) THEN
+            DO 110 J = 2, N
+               CALL CLASSQ( J-1, A( 1, J ), 1, SCALE, SUM )
+  110       CONTINUE
+         ELSE
+            DO 120 J = 1, N - 1
+               CALL CLASSQ( N-J, A( J+1, J ), 1, SCALE, SUM )
+  120       CONTINUE
+         END IF
+         SUM = 2*SUM
+         DO 130 I = 1, N
+            IF( REAL( A( I, I ) ).NE.ZERO ) THEN
+               ABSA = ABS( REAL( A( I, I ) ) )
+               IF( SCALE.LT.ABSA ) THEN
+                  SUM = ONE + SUM*( SCALE / ABSA )**2
+                  SCALE = ABSA
+               ELSE
+                  SUM = SUM + ( ABSA / SCALE )**2
+               END IF
+            END IF
+  130    CONTINUE
+         VALUE = SCALE*SQRT( SUM )
+      END IF
+*
+      CLANHE = VALUE
+      RETURN
+*
+*     End of CLANHE
+*
+      END
diff --git a/costa/native/external/lapack/clanhp.f b/costa/native/external/lapack/clanhp.f
new file mode 100644
index 000000000..666821d14
--- /dev/null
+++ b/costa/native/external/lapack/clanhp.f
@@ -0,0 +1,202 @@
+      REAL             FUNCTION CLANHP( NORM, UPLO, N, AP, WORK )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          NORM, UPLO
+      INTEGER            N
+*     ..
+*     .. Array Arguments ..
+      REAL               WORK( * )
+      COMPLEX            AP( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CLANHP  returns the value of the one norm,  or the Frobenius norm, or
+*  the  infinity norm,  or the  element of  largest absolute value  of a
+*  complex hermitian matrix A,  supplied in packed form.
+*
+*  Description
+*  ===========
+*
+*  CLANHP returns the value
+*
+*     CLANHP = ( max(abs(A(i,j))), NORM = 'M' or 'm'
+*              (
+*              ( norm1(A),         NORM = '1', 'O' or 'o'
+*              (
+*              ( normI(A),         NORM = 'I' or 'i'
+*              (
+*              ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
+*
+*  where  norm1  denotes the  one norm of a matrix (maximum column sum),
+*  normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
+*  normF  denotes the  Frobenius norm of a matrix (square root of sum of
+*  squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
+*
+*  Arguments
+*  =========
+*
+*  NORM    (input) CHARACTER*1
+*          Specifies the value to be returned in CLANHP as described
+*          above.
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the upper or lower triangular part of the
+*          hermitian matrix A is supplied.
+*          = 'U':  Upper triangular part of A is supplied
+*          = 'L':  Lower triangular part of A is supplied
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.  When N = 0, CLANHP is
+*          set to zero.
+*
+*  AP      (input) COMPLEX array, dimension (N*(N+1)/2)
+*          The upper or lower triangle of the hermitian matrix A, packed
+*          columnwise in a linear array.  The j-th column of A is stored
+*          in the array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n.
+*          Note that the  imaginary parts of the diagonal elements need
+*          not be set and are assumed to be zero.
+*
+*  WORK    (workspace) REAL array, dimension (LWORK),
+*          where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise,
+*          WORK is not referenced.
+*
+* =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J, K
+      REAL               ABSA, SCALE, SUM, VALUE
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CLASSQ
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, REAL, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+      IF( N.EQ.0 ) THEN
+         VALUE = ZERO
+      ELSE IF( LSAME( NORM, 'M' ) ) THEN
+*
+*        Find max(abs(A(i,j))).
+*
+         VALUE = ZERO
+         IF( LSAME( UPLO, 'U' ) ) THEN
+            K = 0
+            DO 20 J = 1, N
+               DO 10 I = K + 1, K + J - 1
+                  VALUE = MAX( VALUE, ABS( AP( I ) ) )
+   10          CONTINUE
+               K = K + J
+               VALUE = MAX( VALUE, ABS( REAL( AP( K ) ) ) )
+   20       CONTINUE
+         ELSE
+            K = 1
+            DO 40 J = 1, N
+               VALUE = MAX( VALUE, ABS( REAL( AP( K ) ) ) )
+               DO 30 I = K + 1, K + N - J
+                  VALUE = MAX( VALUE, ABS( AP( I ) ) )
+   30          CONTINUE
+               K = K + N - J + 1
+   40       CONTINUE
+         END IF
+      ELSE IF( ( LSAME( NORM, 'I' ) ) .OR. ( LSAME( NORM, 'O' ) ) .OR.
+     $         ( NORM.EQ.'1' ) ) THEN
+*
+*        Find normI(A) ( = norm1(A), since A is hermitian).
+*
+         VALUE = ZERO
+         K = 1
+         IF( LSAME( UPLO, 'U' ) ) THEN
+            DO 60 J = 1, N
+               SUM = ZERO
+               DO 50 I = 1, J - 1
+                  ABSA = ABS( AP( K ) )
+                  SUM = SUM + ABSA
+                  WORK( I ) = WORK( I ) + ABSA
+                  K = K + 1
+   50          CONTINUE
+               WORK( J ) = SUM + ABS( REAL( AP( K ) ) )
+               K = K + 1
+   60       CONTINUE
+            DO 70 I = 1, N
+               VALUE = MAX( VALUE, WORK( I ) )
+   70       CONTINUE
+         ELSE
+            DO 80 I = 1, N
+               WORK( I ) = ZERO
+   80       CONTINUE
+            DO 100 J = 1, N
+               SUM = WORK( J ) + ABS( REAL( AP( K ) ) )
+               K = K + 1
+               DO 90 I = J + 1, N
+                  ABSA = ABS( AP( K ) )
+                  SUM = SUM + ABSA
+                  WORK( I ) = WORK( I ) + ABSA
+                  K = K + 1
+   90          CONTINUE
+               VALUE = MAX( VALUE, SUM )
+  100       CONTINUE
+         END IF
+      ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
+*
+*        Find normF(A).
+*
+         SCALE = ZERO
+         SUM = ONE
+         K = 2
+         IF( LSAME( UPLO, 'U' ) ) THEN
+            DO 110 J = 2, N
+               CALL CLASSQ( J-1, AP( K ), 1, SCALE, SUM )
+               K = K + J
+  110       CONTINUE
+         ELSE
+            DO 120 J = 1, N - 1
+               CALL CLASSQ( N-J, AP( K ), 1, SCALE, SUM )
+               K = K + N - J + 1
+  120       CONTINUE
+         END IF
+         SUM = 2*SUM
+         K = 1
+         DO 130 I = 1, N
+            IF( REAL( AP( K ) ).NE.ZERO ) THEN
+               ABSA = ABS( REAL( AP( K ) ) )
+               IF( SCALE.LT.ABSA ) THEN
+                  SUM = ONE + SUM*( SCALE / ABSA )**2
+                  SCALE = ABSA
+               ELSE
+                  SUM = SUM + ( ABSA / SCALE )**2
+               END IF
+            END IF
+            IF( LSAME( UPLO, 'U' ) ) THEN
+               K = K + I + 1
+            ELSE
+               K = K + N - I + 1
+            END IF
+  130    CONTINUE
+         VALUE = SCALE*SQRT( SUM )
+      END IF
+*
+      CLANHP = VALUE
+      RETURN
+*
+*     End of CLANHP
+*
+      END
diff --git a/costa/native/external/lapack/clanhs.f b/costa/native/external/lapack/clanhs.f
new file mode 100644
index 000000000..98d1062e7
--- /dev/null
+++ b/costa/native/external/lapack/clanhs.f
@@ -0,0 +1,143 @@
+      REAL             FUNCTION CLANHS( NORM, N, A, LDA, WORK )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          NORM
+      INTEGER            LDA, N
+*     ..
+*     .. Array Arguments ..
+      REAL               WORK( * )
+      COMPLEX            A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CLANHS  returns the value of the one norm,  or the Frobenius norm, or
+*  the  infinity norm,  or the  element of  largest absolute value  of a
+*  Hessenberg matrix A.
+*
+*  Description
+*  ===========
+*
+*  CLANHS returns the value
+*
+*     CLANHS = ( max(abs(A(i,j))), NORM = 'M' or 'm'
+*              (
+*              ( norm1(A),         NORM = '1', 'O' or 'o'
+*              (
+*              ( normI(A),         NORM = 'I' or 'i'
+*              (
+*              ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
+*
+*  where  norm1  denotes the  one norm of a matrix (maximum column sum),
+*  normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
+*  normF  denotes the  Frobenius norm of a matrix (square root of sum of
+*  squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
+*
+*  Arguments
+*  =========
+*
+*  NORM    (input) CHARACTER*1
+*          Specifies the value to be returned in CLANHS as described
+*          above.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.  When N = 0, CLANHS is
+*          set to zero.
+*
+*  A       (input) COMPLEX array, dimension (LDA,N)
+*          The n by n upper Hessenberg matrix A; the part of A below the
+*          first sub-diagonal is not referenced.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(N,1).
+*
+*  WORK    (workspace) REAL array, dimension (LWORK),
+*          where LWORK >= N when NORM = 'I'; otherwise, WORK is not
+*          referenced.
+*
+* =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J
+      REAL               SCALE, SUM, VALUE
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CLASSQ
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+      IF( N.EQ.0 ) THEN
+         VALUE = ZERO
+      ELSE IF( LSAME( NORM, 'M' ) ) THEN
+*
+*        Find max(abs(A(i,j))).
+*
+         VALUE = ZERO
+         DO 20 J = 1, N
+            DO 10 I = 1, MIN( N, J+1 )
+               VALUE = MAX( VALUE, ABS( A( I, J ) ) )
+   10       CONTINUE
+   20    CONTINUE
+      ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN
+*
+*        Find norm1(A).
+*
+         VALUE = ZERO
+         DO 40 J = 1, N
+            SUM = ZERO
+            DO 30 I = 1, MIN( N, J+1 )
+               SUM = SUM + ABS( A( I, J ) )
+   30       CONTINUE
+            VALUE = MAX( VALUE, SUM )
+   40    CONTINUE
+      ELSE IF( LSAME( NORM, 'I' ) ) THEN
+*
+*        Find normI(A).
+*
+         DO 50 I = 1, N
+            WORK( I ) = ZERO
+   50    CONTINUE
+         DO 70 J = 1, N
+            DO 60 I = 1, MIN( N, J+1 )
+               WORK( I ) = WORK( I ) + ABS( A( I, J ) )
+   60       CONTINUE
+   70    CONTINUE
+         VALUE = ZERO
+         DO 80 I = 1, N
+            VALUE = MAX( VALUE, WORK( I ) )
+   80    CONTINUE
+      ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
+*
+*        Find normF(A).
+*
+         SCALE = ZERO
+         SUM = ONE
+         DO 90 J = 1, N
+            CALL CLASSQ( MIN( N, J+1 ), A( 1, J ), 1, SCALE, SUM )
+   90    CONTINUE
+         VALUE = SCALE*SQRT( SUM )
+      END IF
+*
+      CLANHS = VALUE
+      RETURN
+*
+*     End of CLANHS
+*
+      END
diff --git a/costa/native/external/lapack/clanht.f b/costa/native/external/lapack/clanht.f
new file mode 100644
index 000000000..16b4ac910
--- /dev/null
+++ b/costa/native/external/lapack/clanht.f
@@ -0,0 +1,126 @@
+      REAL             FUNCTION CLANHT( NORM, N, D, E )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          NORM
+      INTEGER            N
+*     ..
+*     .. Array Arguments ..
+      REAL               D( * )
+      COMPLEX            E( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CLANHT  returns the value of the one norm,  or the Frobenius norm, or
+*  the  infinity norm,  or the  element of  largest absolute value  of a
+*  complex Hermitian tridiagonal matrix A.
+*
+*  Description
+*  ===========
+*
+*  CLANHT returns the value
+*
+*     CLANHT = ( max(abs(A(i,j))), NORM = 'M' or 'm'
+*              (
+*              ( norm1(A),         NORM = '1', 'O' or 'o'
+*              (
+*              ( normI(A),         NORM = 'I' or 'i'
+*              (
+*              ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
+*
+*  where  norm1  denotes the  one norm of a matrix (maximum column sum),
+*  normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
+*  normF  denotes the  Frobenius norm of a matrix (square root of sum of
+*  squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
+*
+*  Arguments
+*  =========
+*
+*  NORM    (input) CHARACTER*1
+*          Specifies the value to be returned in CLANHT as described
+*          above.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.  When N = 0, CLANHT is
+*          set to zero.
+*
+*  D       (input) REAL array, dimension (N)
+*          The diagonal elements of A.
+*
+*  E       (input) COMPLEX array, dimension (N-1)
+*          The (n-1) sub-diagonal or super-diagonal elements of A.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I
+      REAL               ANORM, SCALE, SUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CLASSQ, SLASSQ
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+      IF( N.LE.0 ) THEN
+         ANORM = ZERO
+      ELSE IF( LSAME( NORM, 'M' ) ) THEN
+*
+*        Find max(abs(A(i,j))).
+*
+         ANORM = ABS( D( N ) )
+         DO 10 I = 1, N - 1
+            ANORM = MAX( ANORM, ABS( D( I ) ) )
+            ANORM = MAX( ANORM, ABS( E( I ) ) )
+   10    CONTINUE
+      ELSE IF( LSAME( NORM, 'O' ) .OR. NORM.EQ.'1' .OR.
+     $         LSAME( NORM, 'I' ) ) THEN
+*
+*        Find norm1(A).
+*
+         IF( N.EQ.1 ) THEN
+            ANORM = ABS( D( 1 ) )
+         ELSE
+            ANORM = MAX( ABS( D( 1 ) )+ABS( E( 1 ) ),
+     $              ABS( E( N-1 ) )+ABS( D( N ) ) )
+            DO 20 I = 2, N - 1
+               ANORM = MAX( ANORM, ABS( D( I ) )+ABS( E( I ) )+
+     $                 ABS( E( I-1 ) ) )
+   20       CONTINUE
+         END IF
+      ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
+*
+*        Find normF(A).
+*
+         SCALE = ZERO
+         SUM = ONE
+         IF( N.GT.1 ) THEN
+            CALL CLASSQ( N-1, E, 1, SCALE, SUM )
+            SUM = 2*SUM
+         END IF
+         CALL SLASSQ( N, D, 1, SCALE, SUM )
+         ANORM = SCALE*SQRT( SUM )
+      END IF
+*
+      CLANHT = ANORM
+      RETURN
+*
+*     End of CLANHT
+*
+      END
diff --git a/costa/native/external/lapack/clansb.f b/costa/native/external/lapack/clansb.f
new file mode 100644
index 000000000..5b6fd116d
--- /dev/null
+++ b/costa/native/external/lapack/clansb.f
@@ -0,0 +1,188 @@
+      REAL             FUNCTION CLANSB( NORM, UPLO, N, K, AB, LDAB,
+     $                 WORK )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          NORM, UPLO
+      INTEGER            K, LDAB, N
+*     ..
+*     .. Array Arguments ..
+      REAL               WORK( * )
+      COMPLEX            AB( LDAB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CLANSB  returns the value of the one norm,  or the Frobenius norm, or
+*  the  infinity norm,  or the element of  largest absolute value  of an
+*  n by n symmetric band matrix A,  with k super-diagonals.
+*
+*  Description
+*  ===========
+*
+*  CLANSB returns the value
+*
+*     CLANSB = ( max(abs(A(i,j))), NORM = 'M' or 'm'
+*              (
+*              ( norm1(A),         NORM = '1', 'O' or 'o'
+*              (
+*              ( normI(A),         NORM = 'I' or 'i'
+*              (
+*              ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
+*
+*  where  norm1  denotes the  one norm of a matrix (maximum column sum),
+*  normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
+*  normF  denotes the  Frobenius norm of a matrix (square root of sum of
+*  squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
+*
+*  Arguments
+*  =========
+*
+*  NORM    (input) CHARACTER*1
+*          Specifies the value to be returned in CLANSB as described
+*          above.
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the upper or lower triangular part of the
+*          band matrix A is supplied.
+*          = 'U':  Upper triangular part is supplied
+*          = 'L':  Lower triangular part is supplied
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.  When N = 0, CLANSB is
+*          set to zero.
+*
+*  K       (input) INTEGER
+*          The number of super-diagonals or sub-diagonals of the
+*          band matrix A.  K >= 0.
+*
+*  AB      (input) COMPLEX array, dimension (LDAB,N)
+*          The upper or lower triangle of the symmetric band matrix A,
+*          stored in the first K+1 rows of AB.  The j-th column of A is
+*          stored in the j-th column of the array AB as follows:
+*          if UPLO = 'U', AB(k+1+i-j,j) = A(i,j) for max(1,j-k)<=i<=j;
+*          if UPLO = 'L', AB(1+i-j,j)   = A(i,j) for j<=i<=min(n,j+k).
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= K+1.
+*
+*  WORK    (workspace) REAL array, dimension (LWORK),
+*          where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise,
+*          WORK is not referenced.
+*
+* =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J, L
+      REAL               ABSA, SCALE, SUM, VALUE
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CLASSQ
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+      IF( N.EQ.0 ) THEN
+         VALUE = ZERO
+      ELSE IF( LSAME( NORM, 'M' ) ) THEN
+*
+*        Find max(abs(A(i,j))).
+*
+         VALUE = ZERO
+         IF( LSAME( UPLO, 'U' ) ) THEN
+            DO 20 J = 1, N
+               DO 10 I = MAX( K+2-J, 1 ), K + 1
+                  VALUE = MAX( VALUE, ABS( AB( I, J ) ) )
+   10          CONTINUE
+   20       CONTINUE
+         ELSE
+            DO 40 J = 1, N
+               DO 30 I = 1, MIN( N+1-J, K+1 )
+                  VALUE = MAX( VALUE, ABS( AB( I, J ) ) )
+   30          CONTINUE
+   40       CONTINUE
+         END IF
+      ELSE IF( ( LSAME( NORM, 'I' ) ) .OR. ( LSAME( NORM, 'O' ) ) .OR.
+     $         ( NORM.EQ.'1' ) ) THEN
+*
+*        Find normI(A) ( = norm1(A), since A is symmetric).
+*
+         VALUE = ZERO
+         IF( LSAME( UPLO, 'U' ) ) THEN
+            DO 60 J = 1, N
+               SUM = ZERO
+               L = K + 1 - J
+               DO 50 I = MAX( 1, J-K ), J - 1
+                  ABSA = ABS( AB( L+I, J ) )
+                  SUM = SUM + ABSA
+                  WORK( I ) = WORK( I ) + ABSA
+   50          CONTINUE
+               WORK( J ) = SUM + ABS( AB( K+1, J ) )
+   60       CONTINUE
+            DO 70 I = 1, N
+               VALUE = MAX( VALUE, WORK( I ) )
+   70       CONTINUE
+         ELSE
+            DO 80 I = 1, N
+               WORK( I ) = ZERO
+   80       CONTINUE
+            DO 100 J = 1, N
+               SUM = WORK( J ) + ABS( AB( 1, J ) )
+               L = 1 - J
+               DO 90 I = J + 1, MIN( N, J+K )
+                  ABSA = ABS( AB( L+I, J ) )
+                  SUM = SUM + ABSA
+                  WORK( I ) = WORK( I ) + ABSA
+   90          CONTINUE
+               VALUE = MAX( VALUE, SUM )
+  100       CONTINUE
+         END IF
+      ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
+*
+*        Find normF(A).
+*
+         SCALE = ZERO
+         SUM = ONE
+         IF( K.GT.0 ) THEN
+            IF( LSAME( UPLO, 'U' ) ) THEN
+               DO 110 J = 2, N
+                  CALL CLASSQ( MIN( J-1, K ), AB( MAX( K+2-J, 1 ), J ),
+     $                         1, SCALE, SUM )
+  110          CONTINUE
+               L = K + 1
+            ELSE
+               DO 120 J = 1, N - 1
+                  CALL CLASSQ( MIN( N-J, K ), AB( 2, J ), 1, SCALE,
+     $                         SUM )
+  120          CONTINUE
+               L = 1
+            END IF
+            SUM = 2*SUM
+         ELSE
+            L = 1
+         END IF
+         CALL CLASSQ( N, AB( L, 1 ), LDAB, SCALE, SUM )
+         VALUE = SCALE*SQRT( SUM )
+      END IF
+*
+      CLANSB = VALUE
+      RETURN
+*
+*     End of CLANSB
+*
+      END
diff --git a/costa/native/external/lapack/clansp.f b/costa/native/external/lapack/clansp.f
new file mode 100644
index 000000000..b57471cf0
--- /dev/null
+++ b/costa/native/external/lapack/clansp.f
@@ -0,0 +1,207 @@
+      REAL             FUNCTION CLANSP( NORM, UPLO, N, AP, WORK )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          NORM, UPLO
+      INTEGER            N
+*     ..
+*     .. Array Arguments ..
+      REAL               WORK( * )
+      COMPLEX            AP( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CLANSP  returns the value of the one norm,  or the Frobenius norm, or
+*  the  infinity norm,  or the  element of  largest absolute value  of a
+*  complex symmetric matrix A,  supplied in packed form.
+*
+*  Description
+*  ===========
+*
+*  CLANSP returns the value
+*
+*     CLANSP = ( max(abs(A(i,j))), NORM = 'M' or 'm'
+*              (
+*              ( norm1(A),         NORM = '1', 'O' or 'o'
+*              (
+*              ( normI(A),         NORM = 'I' or 'i'
+*              (
+*              ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
+*
+*  where  norm1  denotes the  one norm of a matrix (maximum column sum),
+*  normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
+*  normF  denotes the  Frobenius norm of a matrix (square root of sum of
+*  squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
+*
+*  Arguments
+*  =========
+*
+*  NORM    (input) CHARACTER*1
+*          Specifies the value to be returned in CLANSP as described
+*          above.
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the upper or lower triangular part of the
+*          symmetric matrix A is supplied.
+*          = 'U':  Upper triangular part of A is supplied
+*          = 'L':  Lower triangular part of A is supplied
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.  When N = 0, CLANSP is
+*          set to zero.
+*
+*  AP      (input) COMPLEX array, dimension (N*(N+1)/2)
+*          The upper or lower triangle of the symmetric matrix A, packed
+*          columnwise in a linear array.  The j-th column of A is stored
+*          in the array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n.
+*
+*  WORK    (workspace) REAL array, dimension (LWORK),
+*          where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise,
+*          WORK is not referenced.
+*
+* =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J, K
+      REAL               ABSA, SCALE, SUM, VALUE
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CLASSQ
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, AIMAG, MAX, REAL, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+      IF( N.EQ.0 ) THEN
+         VALUE = ZERO
+      ELSE IF( LSAME( NORM, 'M' ) ) THEN
+*
+*        Find max(abs(A(i,j))).
+*
+         VALUE = ZERO
+         IF( LSAME( UPLO, 'U' ) ) THEN
+            K = 1
+            DO 20 J = 1, N
+               DO 10 I = K, K + J - 1
+                  VALUE = MAX( VALUE, ABS( AP( I ) ) )
+   10          CONTINUE
+               K = K + J
+   20       CONTINUE
+         ELSE
+            K = 1
+            DO 40 J = 1, N
+               DO 30 I = K, K + N - J
+                  VALUE = MAX( VALUE, ABS( AP( I ) ) )
+   30          CONTINUE
+               K = K + N - J + 1
+   40       CONTINUE
+         END IF
+      ELSE IF( ( LSAME( NORM, 'I' ) ) .OR. ( LSAME( NORM, 'O' ) ) .OR.
+     $         ( NORM.EQ.'1' ) ) THEN
+*
+*        Find normI(A) ( = norm1(A), since A is symmetric).
+*
+         VALUE = ZERO
+         K = 1
+         IF( LSAME( UPLO, 'U' ) ) THEN
+            DO 60 J = 1, N
+               SUM = ZERO
+               DO 50 I = 1, J - 1
+                  ABSA = ABS( AP( K ) )
+                  SUM = SUM + ABSA
+                  WORK( I ) = WORK( I ) + ABSA
+                  K = K + 1
+   50          CONTINUE
+               WORK( J ) = SUM + ABS( AP( K ) )
+               K = K + 1
+   60       CONTINUE
+            DO 70 I = 1, N
+               VALUE = MAX( VALUE, WORK( I ) )
+   70       CONTINUE
+         ELSE
+            DO 80 I = 1, N
+               WORK( I ) = ZERO
+   80       CONTINUE
+            DO 100 J = 1, N
+               SUM = WORK( J ) + ABS( AP( K ) )
+               K = K + 1
+               DO 90 I = J + 1, N
+                  ABSA = ABS( AP( K ) )
+                  SUM = SUM + ABSA
+                  WORK( I ) = WORK( I ) + ABSA
+                  K = K + 1
+   90          CONTINUE
+               VALUE = MAX( VALUE, SUM )
+  100       CONTINUE
+         END IF
+      ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
+*
+*        Find normF(A).
+*
+         SCALE = ZERO
+         SUM = ONE
+         K = 2
+         IF( LSAME( UPLO, 'U' ) ) THEN
+            DO 110 J = 2, N
+               CALL CLASSQ( J-1, AP( K ), 1, SCALE, SUM )
+               K = K + J
+  110       CONTINUE
+         ELSE
+            DO 120 J = 1, N - 1
+               CALL CLASSQ( N-J, AP( K ), 1, SCALE, SUM )
+               K = K + N - J + 1
+  120       CONTINUE
+         END IF
+         SUM = 2*SUM
+         K = 1
+         DO 130 I = 1, N
+            IF( REAL( AP( K ) ).NE.ZERO ) THEN
+               ABSA = ABS( REAL( AP( K ) ) )
+               IF( SCALE.LT.ABSA ) THEN
+                  SUM = ONE + SUM*( SCALE / ABSA )**2
+                  SCALE = ABSA
+               ELSE
+                  SUM = SUM + ( ABSA / SCALE )**2
+               END IF
+            END IF
+            IF( AIMAG( AP( K ) ).NE.ZERO ) THEN
+               ABSA = ABS( AIMAG( AP( K ) ) )
+               IF( SCALE.LT.ABSA ) THEN
+                  SUM = ONE + SUM*( SCALE / ABSA )**2
+                  SCALE = ABSA
+               ELSE
+                  SUM = SUM + ( ABSA / SCALE )**2
+               END IF
+            END IF
+            IF( LSAME( UPLO, 'U' ) ) THEN
+               K = K + I + 1
+            ELSE
+               K = K + N - I + 1
+            END IF
+  130    CONTINUE
+         VALUE = SCALE*SQRT( SUM )
+      END IF
+*
+      CLANSP = VALUE
+      RETURN
+*
+*     End of CLANSP
+*
+      END
diff --git a/costa/native/external/lapack/clansy.f b/costa/native/external/lapack/clansy.f
new file mode 100644
index 000000000..eb0523cb5
--- /dev/null
+++ b/costa/native/external/lapack/clansy.f
@@ -0,0 +1,175 @@
+      REAL             FUNCTION CLANSY( NORM, UPLO, N, A, LDA, WORK )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          NORM, UPLO
+      INTEGER            LDA, N
+*     ..
+*     .. Array Arguments ..
+      REAL               WORK( * )
+      COMPLEX            A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CLANSY  returns the value of the one norm,  or the Frobenius norm, or
+*  the  infinity norm,  or the  element of  largest absolute value  of a
+*  complex symmetric matrix A.
+*
+*  Description
+*  ===========
+*
+*  CLANSY returns the value
+*
+*     CLANSY = ( max(abs(A(i,j))), NORM = 'M' or 'm'
+*              (
+*              ( norm1(A),         NORM = '1', 'O' or 'o'
+*              (
+*              ( normI(A),         NORM = 'I' or 'i'
+*              (
+*              ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
+*
+*  where  norm1  denotes the  one norm of a matrix (maximum column sum),
+*  normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
+*  normF  denotes the  Frobenius norm of a matrix (square root of sum of
+*  squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
+*
+*  Arguments
+*  =========
+*
+*  NORM    (input) CHARACTER*1
+*          Specifies the value to be returned in CLANSY as described
+*          above.
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the upper or lower triangular part of the
+*          symmetric matrix A is to be referenced.
+*          = 'U':  Upper triangular part of A is referenced
+*          = 'L':  Lower triangular part of A is referenced
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.  When N = 0, CLANSY is
+*          set to zero.
+*
+*  A       (input) COMPLEX array, dimension (LDA,N)
+*          The symmetric matrix A.  If UPLO = 'U', the leading n by n
+*          upper triangular part of A contains the upper triangular part
+*          of the matrix A, and the strictly lower triangular part of A
+*          is not referenced.  If UPLO = 'L', the leading n by n lower
+*          triangular part of A contains the lower triangular part of
+*          the matrix A, and the strictly upper triangular part of A is
+*          not referenced.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(N,1).
+*
+*  WORK    (workspace) REAL array, dimension (LWORK),
+*          where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise,
+*          WORK is not referenced.
+*
+* =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J
+      REAL               ABSA, SCALE, SUM, VALUE
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CLASSQ
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+      IF( N.EQ.0 ) THEN
+         VALUE = ZERO
+      ELSE IF( LSAME( NORM, 'M' ) ) THEN
+*
+*        Find max(abs(A(i,j))).
+*
+         VALUE = ZERO
+         IF( LSAME( UPLO, 'U' ) ) THEN
+            DO 20 J = 1, N
+               DO 10 I = 1, J
+                  VALUE = MAX( VALUE, ABS( A( I, J ) ) )
+   10          CONTINUE
+   20       CONTINUE
+         ELSE
+            DO 40 J = 1, N
+               DO 30 I = J, N
+                  VALUE = MAX( VALUE, ABS( A( I, J ) ) )
+   30          CONTINUE
+   40       CONTINUE
+         END IF
+      ELSE IF( ( LSAME( NORM, 'I' ) ) .OR. ( LSAME( NORM, 'O' ) ) .OR.
+     $         ( NORM.EQ.'1' ) ) THEN
+*
+*        Find normI(A) ( = norm1(A), since A is symmetric).
+*
+         VALUE = ZERO
+         IF( LSAME( UPLO, 'U' ) ) THEN
+            DO 60 J = 1, N
+               SUM = ZERO
+               DO 50 I = 1, J - 1
+                  ABSA = ABS( A( I, J ) )
+                  SUM = SUM + ABSA
+                  WORK( I ) = WORK( I ) + ABSA
+   50          CONTINUE
+               WORK( J ) = SUM + ABS( A( J, J ) )
+   60       CONTINUE
+            DO 70 I = 1, N
+               VALUE = MAX( VALUE, WORK( I ) )
+   70       CONTINUE
+         ELSE
+            DO 80 I = 1, N
+               WORK( I ) = ZERO
+   80       CONTINUE
+            DO 100 J = 1, N
+               SUM = WORK( J ) + ABS( A( J, J ) )
+               DO 90 I = J + 1, N
+                  ABSA = ABS( A( I, J ) )
+                  SUM = SUM + ABSA
+                  WORK( I ) = WORK( I ) + ABSA
+   90          CONTINUE
+               VALUE = MAX( VALUE, SUM )
+  100       CONTINUE
+         END IF
+      ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
+*
+*        Find normF(A).
+*
+         SCALE = ZERO
+         SUM = ONE
+         IF( LSAME( UPLO, 'U' ) ) THEN
+            DO 110 J = 2, N
+               CALL CLASSQ( J-1, A( 1, J ), 1, SCALE, SUM )
+  110       CONTINUE
+         ELSE
+            DO 120 J = 1, N - 1
+               CALL CLASSQ( N-J, A( J+1, J ), 1, SCALE, SUM )
+  120       CONTINUE
+         END IF
+         SUM = 2*SUM
+         CALL CLASSQ( N, A, LDA+1, SCALE, SUM )
+         VALUE = SCALE*SQRT( SUM )
+      END IF
+*
+      CLANSY = VALUE
+      RETURN
+*
+*     End of CLANSY
+*
+      END
diff --git a/costa/native/external/lapack/clantb.f b/costa/native/external/lapack/clantb.f
new file mode 100644
index 000000000..883634ee0
--- /dev/null
+++ b/costa/native/external/lapack/clantb.f
@@ -0,0 +1,286 @@
+      REAL             FUNCTION CLANTB( NORM, UPLO, DIAG, N, K, AB,
+     $                 LDAB, WORK )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIAG, NORM, UPLO
+      INTEGER            K, LDAB, N
+*     ..
+*     .. Array Arguments ..
+      REAL               WORK( * )
+      COMPLEX            AB( LDAB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CLANTB  returns the value of the one norm,  or the Frobenius norm, or
+*  the  infinity norm,  or the element of  largest absolute value  of an
+*  n by n triangular band matrix A,  with ( k + 1 ) diagonals.
+*
+*  Description
+*  ===========
+*
+*  CLANTB returns the value
+*
+*     CLANTB = ( max(abs(A(i,j))), NORM = 'M' or 'm'
+*              (
+*              ( norm1(A),         NORM = '1', 'O' or 'o'
+*              (
+*              ( normI(A),         NORM = 'I' or 'i'
+*              (
+*              ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
+*
+*  where  norm1  denotes the  one norm of a matrix (maximum column sum),
+*  normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
+*  normF  denotes the  Frobenius norm of a matrix (square root of sum of
+*  squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
+*
+*  Arguments
+*  =========
+*
+*  NORM    (input) CHARACTER*1
+*          Specifies the value to be returned in CLANTB as described
+*          above.
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the matrix A is upper or lower triangular.
+*          = 'U':  Upper triangular
+*          = 'L':  Lower triangular
+*
+*  DIAG    (input) CHARACTER*1
+*          Specifies whether or not the matrix A is unit triangular.
+*          = 'N':  Non-unit triangular
+*          = 'U':  Unit triangular
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.  When N = 0, CLANTB is
+*          set to zero.
+*
+*  K       (input) INTEGER
+*          The number of super-diagonals of the matrix A if UPLO = 'U',
+*          or the number of sub-diagonals of the matrix A if UPLO = 'L'.
+*          K >= 0.
+*
+*  AB      (input) COMPLEX array, dimension (LDAB,N)
+*          The upper or lower triangular band matrix A, stored in the
+*          first k+1 rows of AB.  The j-th column of A is stored
+*          in the j-th column of the array AB as follows:
+*          if UPLO = 'U', AB(k+1+i-j,j) = A(i,j) for max(1,j-k)<=i<=j;
+*          if UPLO = 'L', AB(1+i-j,j)   = A(i,j) for j<=i<=min(n,j+k).
+*          Note that when DIAG = 'U', the elements of the array AB
+*          corresponding to the diagonal elements of the matrix A are
+*          not referenced, but are assumed to be one.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= K+1.
+*
+*  WORK    (workspace) REAL array, dimension (LWORK),
+*          where LWORK >= N when NORM = 'I'; otherwise, WORK is not
+*          referenced.
+*
+* =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UDIAG
+      INTEGER            I, J, L
+      REAL               SCALE, SUM, VALUE
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CLASSQ
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+      IF( N.EQ.0 ) THEN
+         VALUE = ZERO
+      ELSE IF( LSAME( NORM, 'M' ) ) THEN
+*
+*        Find max(abs(A(i,j))).
+*
+         IF( LSAME( DIAG, 'U' ) ) THEN
+            VALUE = ONE
+            IF( LSAME( UPLO, 'U' ) ) THEN
+               DO 20 J = 1, N
+                  DO 10 I = MAX( K+2-J, 1 ), K
+                     VALUE = MAX( VALUE, ABS( AB( I, J ) ) )
+   10             CONTINUE
+   20          CONTINUE
+            ELSE
+               DO 40 J = 1, N
+                  DO 30 I = 2, MIN( N+1-J, K+1 )
+                     VALUE = MAX( VALUE, ABS( AB( I, J ) ) )
+   30             CONTINUE
+   40          CONTINUE
+            END IF
+         ELSE
+            VALUE = ZERO
+            IF( LSAME( UPLO, 'U' ) ) THEN
+               DO 60 J = 1, N
+                  DO 50 I = MAX( K+2-J, 1 ), K + 1
+                     VALUE = MAX( VALUE, ABS( AB( I, J ) ) )
+   50             CONTINUE
+   60          CONTINUE
+            ELSE
+               DO 80 J = 1, N
+                  DO 70 I = 1, MIN( N+1-J, K+1 )
+                     VALUE = MAX( VALUE, ABS( AB( I, J ) ) )
+   70             CONTINUE
+   80          CONTINUE
+            END IF
+         END IF
+      ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN
+*
+*        Find norm1(A).
+*
+         VALUE = ZERO
+         UDIAG = LSAME( DIAG, 'U' )
+         IF( LSAME( UPLO, 'U' ) ) THEN
+            DO 110 J = 1, N
+               IF( UDIAG ) THEN
+                  SUM = ONE
+                  DO 90 I = MAX( K+2-J, 1 ), K
+                     SUM = SUM + ABS( AB( I, J ) )
+   90             CONTINUE
+               ELSE
+                  SUM = ZERO
+                  DO 100 I = MAX( K+2-J, 1 ), K + 1
+                     SUM = SUM + ABS( AB( I, J ) )
+  100             CONTINUE
+               END IF
+               VALUE = MAX( VALUE, SUM )
+  110       CONTINUE
+         ELSE
+            DO 140 J = 1, N
+               IF( UDIAG ) THEN
+                  SUM = ONE
+                  DO 120 I = 2, MIN( N+1-J, K+1 )
+                     SUM = SUM + ABS( AB( I, J ) )
+  120             CONTINUE
+               ELSE
+                  SUM = ZERO
+                  DO 130 I = 1, MIN( N+1-J, K+1 )
+                     SUM = SUM + ABS( AB( I, J ) )
+  130             CONTINUE
+               END IF
+               VALUE = MAX( VALUE, SUM )
+  140       CONTINUE
+         END IF
+      ELSE IF( LSAME( NORM, 'I' ) ) THEN
+*
+*        Find normI(A).
+*
+         VALUE = ZERO
+         IF( LSAME( UPLO, 'U' ) ) THEN
+            IF( LSAME( DIAG, 'U' ) ) THEN
+               DO 150 I = 1, N
+                  WORK( I ) = ONE
+  150          CONTINUE
+               DO 170 J = 1, N
+                  L = K + 1 - J
+                  DO 160 I = MAX( 1, J-K ), J - 1
+                     WORK( I ) = WORK( I ) + ABS( AB( L+I, J ) )
+  160             CONTINUE
+  170          CONTINUE
+            ELSE
+               DO 180 I = 1, N
+                  WORK( I ) = ZERO
+  180          CONTINUE
+               DO 200 J = 1, N
+                  L = K + 1 - J
+                  DO 190 I = MAX( 1, J-K ), J
+                     WORK( I ) = WORK( I ) + ABS( AB( L+I, J ) )
+  190             CONTINUE
+  200          CONTINUE
+            END IF
+         ELSE
+            IF( LSAME( DIAG, 'U' ) ) THEN
+               DO 210 I = 1, N
+                  WORK( I ) = ONE
+  210          CONTINUE
+               DO 230 J = 1, N
+                  L = 1 - J
+                  DO 220 I = J + 1, MIN( N, J+K )
+                     WORK( I ) = WORK( I ) + ABS( AB( L+I, J ) )
+  220             CONTINUE
+  230          CONTINUE
+            ELSE
+               DO 240 I = 1, N
+                  WORK( I ) = ZERO
+  240          CONTINUE
+               DO 260 J = 1, N
+                  L = 1 - J
+                  DO 250 I = J, MIN( N, J+K )
+                     WORK( I ) = WORK( I ) + ABS( AB( L+I, J ) )
+  250             CONTINUE
+  260          CONTINUE
+            END IF
+         END IF
+         DO 270 I = 1, N
+            VALUE = MAX( VALUE, WORK( I ) )
+  270    CONTINUE
+      ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
+*
+*        Find normF(A).
+*
+         IF( LSAME( UPLO, 'U' ) ) THEN
+            IF( LSAME( DIAG, 'U' ) ) THEN
+               SCALE = ONE
+               SUM = N
+               IF( K.GT.0 ) THEN
+                  DO 280 J = 2, N
+                     CALL CLASSQ( MIN( J-1, K ),
+     $                            AB( MAX( K+2-J, 1 ), J ), 1, SCALE,
+     $                            SUM )
+  280             CONTINUE
+               END IF
+            ELSE
+               SCALE = ZERO
+               SUM = ONE
+               DO 290 J = 1, N
+                  CALL CLASSQ( MIN( J, K+1 ), AB( MAX( K+2-J, 1 ), J ),
+     $                         1, SCALE, SUM )
+  290          CONTINUE
+            END IF
+         ELSE
+            IF( LSAME( DIAG, 'U' ) ) THEN
+               SCALE = ONE
+               SUM = N
+               IF( K.GT.0 ) THEN
+                  DO 300 J = 1, N - 1
+                     CALL CLASSQ( MIN( N-J, K ), AB( 2, J ), 1, SCALE,
+     $                            SUM )
+  300             CONTINUE
+               END IF
+            ELSE
+               SCALE = ZERO
+               SUM = ONE
+               DO 310 J = 1, N
+                  CALL CLASSQ( MIN( N-J+1, K+1 ), AB( 1, J ), 1, SCALE,
+     $                         SUM )
+  310          CONTINUE
+            END IF
+         END IF
+         VALUE = SCALE*SQRT( SUM )
+      END IF
+*
+      CLANTB = VALUE
+      RETURN
+*
+*     End of CLANTB
+*
+      END
diff --git a/costa/native/external/lapack/clantp.f b/costa/native/external/lapack/clantp.f
new file mode 100644
index 000000000..8f27a3619
--- /dev/null
+++ b/costa/native/external/lapack/clantp.f
@@ -0,0 +1,287 @@
+      REAL             FUNCTION CLANTP( NORM, UPLO, DIAG, N, AP, WORK )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIAG, NORM, UPLO
+      INTEGER            N
+*     ..
+*     .. Array Arguments ..
+      REAL               WORK( * )
+      COMPLEX            AP( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CLANTP  returns the value of the one norm,  or the Frobenius norm, or
+*  the  infinity norm,  or the  element of  largest absolute value  of a
+*  triangular matrix A, supplied in packed form.
+*
+*  Description
+*  ===========
+*
+*  CLANTP returns the value
+*
+*     CLANTP = ( max(abs(A(i,j))), NORM = 'M' or 'm'
+*              (
+*              ( norm1(A),         NORM = '1', 'O' or 'o'
+*              (
+*              ( normI(A),         NORM = 'I' or 'i'
+*              (
+*              ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
+*
+*  where  norm1  denotes the  one norm of a matrix (maximum column sum),
+*  normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
+*  normF  denotes the  Frobenius norm of a matrix (square root of sum of
+*  squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
+*
+*  Arguments
+*  =========
+*
+*  NORM    (input) CHARACTER*1
+*          Specifies the value to be returned in CLANTP as described
+*          above.
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the matrix A is upper or lower triangular.
+*          = 'U':  Upper triangular
+*          = 'L':  Lower triangular
+*
+*  DIAG    (input) CHARACTER*1
+*          Specifies whether or not the matrix A is unit triangular.
+*          = 'N':  Non-unit triangular
+*          = 'U':  Unit triangular
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.  When N = 0, CLANTP is
+*          set to zero.
+*
+*  AP      (input) COMPLEX array, dimension (N*(N+1)/2)
+*          The upper or lower triangular matrix A, packed columnwise in
+*          a linear array.  The j-th column of A is stored in the array
+*          AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n.
+*          Note that when DIAG = 'U', the elements of the array AP
+*          corresponding to the diagonal elements of the matrix A are
+*          not referenced, but are assumed to be one.
+*
+*  WORK    (workspace) REAL array, dimension (LWORK),
+*          where LWORK >= N when NORM = 'I'; otherwise, WORK is not
+*          referenced.
+*
+* =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UDIAG
+      INTEGER            I, J, K
+      REAL               SCALE, SUM, VALUE
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CLASSQ
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+      IF( N.EQ.0 ) THEN
+         VALUE = ZERO
+      ELSE IF( LSAME( NORM, 'M' ) ) THEN
+*
+*        Find max(abs(A(i,j))).
+*
+         K = 1
+         IF( LSAME( DIAG, 'U' ) ) THEN
+            VALUE = ONE
+            IF( LSAME( UPLO, 'U' ) ) THEN
+               DO 20 J = 1, N
+                  DO 10 I = K, K + J - 2
+                     VALUE = MAX( VALUE, ABS( AP( I ) ) )
+   10             CONTINUE
+                  K = K + J
+   20          CONTINUE
+            ELSE
+               DO 40 J = 1, N
+                  DO 30 I = K + 1, K + N - J
+                     VALUE = MAX( VALUE, ABS( AP( I ) ) )
+   30             CONTINUE
+                  K = K + N - J + 1
+   40          CONTINUE
+            END IF
+         ELSE
+            VALUE = ZERO
+            IF( LSAME( UPLO, 'U' ) ) THEN
+               DO 60 J = 1, N
+                  DO 50 I = K, K + J - 1
+                     VALUE = MAX( VALUE, ABS( AP( I ) ) )
+   50             CONTINUE
+                  K = K + J
+   60          CONTINUE
+            ELSE
+               DO 80 J = 1, N
+                  DO 70 I = K, K + N - J
+                     VALUE = MAX( VALUE, ABS( AP( I ) ) )
+   70             CONTINUE
+                  K = K + N - J + 1
+   80          CONTINUE
+            END IF
+         END IF
+      ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN
+*
+*        Find norm1(A).
+*
+         VALUE = ZERO
+         K = 1
+         UDIAG = LSAME( DIAG, 'U' )
+         IF( LSAME( UPLO, 'U' ) ) THEN
+            DO 110 J = 1, N
+               IF( UDIAG ) THEN
+                  SUM = ONE
+                  DO 90 I = K, K + J - 2
+                     SUM = SUM + ABS( AP( I ) )
+   90             CONTINUE
+               ELSE
+                  SUM = ZERO
+                  DO 100 I = K, K + J - 1
+                     SUM = SUM + ABS( AP( I ) )
+  100             CONTINUE
+               END IF
+               K = K + J
+               VALUE = MAX( VALUE, SUM )
+  110       CONTINUE
+         ELSE
+            DO 140 J = 1, N
+               IF( UDIAG ) THEN
+                  SUM = ONE
+                  DO 120 I = K + 1, K + N - J
+                     SUM = SUM + ABS( AP( I ) )
+  120             CONTINUE
+               ELSE
+                  SUM = ZERO
+                  DO 130 I = K, K + N - J
+                     SUM = SUM + ABS( AP( I ) )
+  130             CONTINUE
+               END IF
+               K = K + N - J + 1
+               VALUE = MAX( VALUE, SUM )
+  140       CONTINUE
+         END IF
+      ELSE IF( LSAME( NORM, 'I' ) ) THEN
+*
+*        Find normI(A).
+*
+         K = 1
+         IF( LSAME( UPLO, 'U' ) ) THEN
+            IF( LSAME( DIAG, 'U' ) ) THEN
+               DO 150 I = 1, N
+                  WORK( I ) = ONE
+  150          CONTINUE
+               DO 170 J = 1, N
+                  DO 160 I = 1, J - 1
+                     WORK( I ) = WORK( I ) + ABS( AP( K ) )
+                     K = K + 1
+  160             CONTINUE
+                  K = K + 1
+  170          CONTINUE
+            ELSE
+               DO 180 I = 1, N
+                  WORK( I ) = ZERO
+  180          CONTINUE
+               DO 200 J = 1, N
+                  DO 190 I = 1, J
+                     WORK( I ) = WORK( I ) + ABS( AP( K ) )
+                     K = K + 1
+  190             CONTINUE
+  200          CONTINUE
+            END IF
+         ELSE
+            IF( LSAME( DIAG, 'U' ) ) THEN
+               DO 210 I = 1, N
+                  WORK( I ) = ONE
+  210          CONTINUE
+               DO 230 J = 1, N
+                  K = K + 1
+                  DO 220 I = J + 1, N
+                     WORK( I ) = WORK( I ) + ABS( AP( K ) )
+                     K = K + 1
+  220             CONTINUE
+  230          CONTINUE
+            ELSE
+               DO 240 I = 1, N
+                  WORK( I ) = ZERO
+  240          CONTINUE
+               DO 260 J = 1, N
+                  DO 250 I = J, N
+                     WORK( I ) = WORK( I ) + ABS( AP( K ) )
+                     K = K + 1
+  250             CONTINUE
+  260          CONTINUE
+            END IF
+         END IF
+         VALUE = ZERO
+         DO 270 I = 1, N
+            VALUE = MAX( VALUE, WORK( I ) )
+  270    CONTINUE
+      ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
+*
+*        Find normF(A).
+*
+         IF( LSAME( UPLO, 'U' ) ) THEN
+            IF( LSAME( DIAG, 'U' ) ) THEN
+               SCALE = ONE
+               SUM = N
+               K = 2
+               DO 280 J = 2, N
+                  CALL CLASSQ( J-1, AP( K ), 1, SCALE, SUM )
+                  K = K + J
+  280          CONTINUE
+            ELSE
+               SCALE = ZERO
+               SUM = ONE
+               K = 1
+               DO 290 J = 1, N
+                  CALL CLASSQ( J, AP( K ), 1, SCALE, SUM )
+                  K = K + J
+  290          CONTINUE
+            END IF
+         ELSE
+            IF( LSAME( DIAG, 'U' ) ) THEN
+               SCALE = ONE
+               SUM = N
+               K = 2
+               DO 300 J = 1, N - 1
+                  CALL CLASSQ( N-J, AP( K ), 1, SCALE, SUM )
+                  K = K + N - J + 1
+  300          CONTINUE
+            ELSE
+               SCALE = ZERO
+               SUM = ONE
+               K = 1
+               DO 310 J = 1, N
+                  CALL CLASSQ( N-J+1, AP( K ), 1, SCALE, SUM )
+                  K = K + N - J + 1
+  310          CONTINUE
+            END IF
+         END IF
+         VALUE = SCALE*SQRT( SUM )
+      END IF
+*
+      CLANTP = VALUE
+      RETURN
+*
+*     End of CLANTP
+*
+      END
diff --git a/costa/native/external/lapack/clantr.f b/costa/native/external/lapack/clantr.f
new file mode 100644
index 000000000..4cd8b5268
--- /dev/null
+++ b/costa/native/external/lapack/clantr.f
@@ -0,0 +1,278 @@
+      REAL             FUNCTION CLANTR( NORM, UPLO, DIAG, M, N, A, LDA,
+     $                 WORK )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIAG, NORM, UPLO
+      INTEGER            LDA, M, N
+*     ..
+*     .. Array Arguments ..
+      REAL               WORK( * )
+      COMPLEX            A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CLANTR  returns the value of the one norm,  or the Frobenius norm, or
+*  the  infinity norm,  or the  element of  largest absolute value  of a
+*  trapezoidal or triangular matrix A.
+*
+*  Description
+*  ===========
+*
+*  CLANTR returns the value
+*
+*     CLANTR = ( max(abs(A(i,j))), NORM = 'M' or 'm'
+*              (
+*              ( norm1(A),         NORM = '1', 'O' or 'o'
+*              (
+*              ( normI(A),         NORM = 'I' or 'i'
+*              (
+*              ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
+*
+*  where  norm1  denotes the  one norm of a matrix (maximum column sum),
+*  normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
+*  normF  denotes the  Frobenius norm of a matrix (square root of sum of
+*  squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
+*
+*  Arguments
+*  =========
+*
+*  NORM    (input) CHARACTER*1
+*          Specifies the value to be returned in CLANTR as described
+*          above.
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the matrix A is upper or lower trapezoidal.
+*          = 'U':  Upper trapezoidal
+*          = 'L':  Lower trapezoidal
+*          Note that A is triangular instead of trapezoidal if M = N.
+*
+*  DIAG    (input) CHARACTER*1
+*          Specifies whether or not the matrix A has unit diagonal.
+*          = 'N':  Non-unit diagonal
+*          = 'U':  Unit diagonal
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0, and if
+*          UPLO = 'U', M <= N.  When M = 0, CLANTR is set to zero.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0, and if
+*          UPLO = 'L', N <= M.  When N = 0, CLANTR is set to zero.
+*
+*  A       (input) COMPLEX array, dimension (LDA,N)
+*          The trapezoidal matrix A (A is triangular if M = N).
+*          If UPLO = 'U', the leading m by n upper trapezoidal part of
+*          the array A contains the upper trapezoidal matrix, and the
+*          strictly lower triangular part of A is not referenced.
+*          If UPLO = 'L', the leading m by n lower trapezoidal part of
+*          the array A contains the lower trapezoidal matrix, and the
+*          strictly upper triangular part of A is not referenced.  Note
+*          that when DIAG = 'U', the diagonal elements of A are not
+*          referenced and are assumed to be one.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(M,1).
+*
+*  WORK    (workspace) REAL array, dimension (LWORK),
+*          where LWORK >= M when NORM = 'I'; otherwise, WORK is not
+*          referenced.
+*
+* =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UDIAG
+      INTEGER            I, J
+      REAL               SCALE, SUM, VALUE
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CLASSQ
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+      IF( MIN( M, N ).EQ.0 ) THEN
+         VALUE = ZERO
+      ELSE IF( LSAME( NORM, 'M' ) ) THEN
+*
+*        Find max(abs(A(i,j))).
+*
+         IF( LSAME( DIAG, 'U' ) ) THEN
+            VALUE = ONE
+            IF( LSAME( UPLO, 'U' ) ) THEN
+               DO 20 J = 1, N
+                  DO 10 I = 1, MIN( M, J-1 )
+                     VALUE = MAX( VALUE, ABS( A( I, J ) ) )
+   10             CONTINUE
+   20          CONTINUE
+            ELSE
+               DO 40 J = 1, N
+                  DO 30 I = J + 1, M
+                     VALUE = MAX( VALUE, ABS( A( I, J ) ) )
+   30             CONTINUE
+   40          CONTINUE
+            END IF
+         ELSE
+            VALUE = ZERO
+            IF( LSAME( UPLO, 'U' ) ) THEN
+               DO 60 J = 1, N
+                  DO 50 I = 1, MIN( M, J )
+                     VALUE = MAX( VALUE, ABS( A( I, J ) ) )
+   50             CONTINUE
+   60          CONTINUE
+            ELSE
+               DO 80 J = 1, N
+                  DO 70 I = J, M
+                     VALUE = MAX( VALUE, ABS( A( I, J ) ) )
+   70             CONTINUE
+   80          CONTINUE
+            END IF
+         END IF
+      ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN
+*
+*        Find norm1(A).
+*
+         VALUE = ZERO
+         UDIAG = LSAME( DIAG, 'U' )
+         IF( LSAME( UPLO, 'U' ) ) THEN
+            DO 110 J = 1, N
+               IF( ( UDIAG ) .AND. ( J.LE.M ) ) THEN
+                  SUM = ONE
+                  DO 90 I = 1, J - 1
+                     SUM = SUM + ABS( A( I, J ) )
+   90             CONTINUE
+               ELSE
+                  SUM = ZERO
+                  DO 100 I = 1, MIN( M, J )
+                     SUM = SUM + ABS( A( I, J ) )
+  100             CONTINUE
+               END IF
+               VALUE = MAX( VALUE, SUM )
+  110       CONTINUE
+         ELSE
+            DO 140 J = 1, N
+               IF( UDIAG ) THEN
+                  SUM = ONE
+                  DO 120 I = J + 1, M
+                     SUM = SUM + ABS( A( I, J ) )
+  120             CONTINUE
+               ELSE
+                  SUM = ZERO
+                  DO 130 I = J, M
+                     SUM = SUM + ABS( A( I, J ) )
+  130             CONTINUE
+               END IF
+               VALUE = MAX( VALUE, SUM )
+  140       CONTINUE
+         END IF
+      ELSE IF( LSAME( NORM, 'I' ) ) THEN
+*
+*        Find normI(A).
+*
+         IF( LSAME( UPLO, 'U' ) ) THEN
+            IF( LSAME( DIAG, 'U' ) ) THEN
+               DO 150 I = 1, M
+                  WORK( I ) = ONE
+  150          CONTINUE
+               DO 170 J = 1, N
+                  DO 160 I = 1, MIN( M, J-1 )
+                     WORK( I ) = WORK( I ) + ABS( A( I, J ) )
+  160             CONTINUE
+  170          CONTINUE
+            ELSE
+               DO 180 I = 1, M
+                  WORK( I ) = ZERO
+  180          CONTINUE
+               DO 200 J = 1, N
+                  DO 190 I = 1, MIN( M, J )
+                     WORK( I ) = WORK( I ) + ABS( A( I, J ) )
+  190             CONTINUE
+  200          CONTINUE
+            END IF
+         ELSE
+            IF( LSAME( DIAG, 'U' ) ) THEN
+               DO 210 I = 1, N
+                  WORK( I ) = ONE
+  210          CONTINUE
+               DO 220 I = N + 1, M
+                  WORK( I ) = ZERO
+  220          CONTINUE
+               DO 240 J = 1, N
+                  DO 230 I = J + 1, M
+                     WORK( I ) = WORK( I ) + ABS( A( I, J ) )
+  230             CONTINUE
+  240          CONTINUE
+            ELSE
+               DO 250 I = 1, M
+                  WORK( I ) = ZERO
+  250          CONTINUE
+               DO 270 J = 1, N
+                  DO 260 I = J, M
+                     WORK( I ) = WORK( I ) + ABS( A( I, J ) )
+  260             CONTINUE
+  270          CONTINUE
+            END IF
+         END IF
+         VALUE = ZERO
+         DO 280 I = 1, M
+            VALUE = MAX( VALUE, WORK( I ) )
+  280    CONTINUE
+      ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
+*
+*        Find normF(A).
+*
+         IF( LSAME( UPLO, 'U' ) ) THEN
+            IF( LSAME( DIAG, 'U' ) ) THEN
+               SCALE = ONE
+               SUM = MIN( M, N )
+               DO 290 J = 2, N
+                  CALL CLASSQ( MIN( M, J-1 ), A( 1, J ), 1, SCALE, SUM )
+  290          CONTINUE
+            ELSE
+               SCALE = ZERO
+               SUM = ONE
+               DO 300 J = 1, N
+                  CALL CLASSQ( MIN( M, J ), A( 1, J ), 1, SCALE, SUM )
+  300          CONTINUE
+            END IF
+         ELSE
+            IF( LSAME( DIAG, 'U' ) ) THEN
+               SCALE = ONE
+               SUM = MIN( M, N )
+               DO 310 J = 1, N
+                  CALL CLASSQ( M-J, A( MIN( M, J+1 ), J ), 1, SCALE,
+     $                         SUM )
+  310          CONTINUE
+            ELSE
+               SCALE = ZERO
+               SUM = ONE
+               DO 320 J = 1, N
+                  CALL CLASSQ( M-J+1, A( J, J ), 1, SCALE, SUM )
+  320          CONTINUE
+            END IF
+         END IF
+         VALUE = SCALE*SQRT( SUM )
+      END IF
+*
+      CLANTR = VALUE
+      RETURN
+*
+*     End of CLANTR
+*
+      END
diff --git a/costa/native/external/lapack/clapll.f b/costa/native/external/lapack/clapll.f
new file mode 100644
index 000000000..faf36ae46
--- /dev/null
+++ b/costa/native/external/lapack/clapll.f
@@ -0,0 +1,104 @@
+      SUBROUTINE CLAPLL( N, X, INCX, Y, INCY, SSMIN )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      INTEGER            INCX, INCY, N
+      REAL               SSMIN
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            X( * ), Y( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  Given two column vectors X and Y, let
+*
+*                       A = ( X Y ).
+*
+*  The subroutine first computes the QR factorization of A = Q*R,
+*  and then computes the SVD of the 2-by-2 upper triangular matrix R.
+*  The smaller singular value of R is returned in SSMIN, which is used
+*  as the measurement of the linear dependency of the vectors X and Y.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The length of the vectors X and Y.
+*
+*  X       (input/output) COMPLEX array, dimension (1+(N-1)*INCX)
+*          On entry, X contains the N-vector X.
+*          On exit, X is overwritten.
+*
+*  INCX    (input) INTEGER
+*          The increment between successive elements of X. INCX > 0.
+*
+*  Y       (input/output) COMPLEX array, dimension (1+(N-1)*INCY)
+*          On entry, Y contains the N-vector Y.
+*          On exit, Y is overwritten.
+*
+*  INCY    (input) INTEGER
+*          The increment between successive elements of Y. INCY > 0.
+*
+*  SSMIN   (output) REAL
+*          The smallest singular value of the N-by-2 matrix A = ( X Y ).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO
+      PARAMETER          ( ZERO = 0.0E+0 )
+      COMPLEX            CONE
+      PARAMETER          ( CONE = ( 1.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      REAL               SSMAX
+      COMPLEX            A11, A12, A22, C, TAU
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, CONJG
+*     ..
+*     .. External Functions ..
+      COMPLEX            CDOTC
+      EXTERNAL           CDOTC
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CAXPY, CLARFG, SLAS2
+*     ..
+*     .. Executable Statements ..
+*
+*     Quick return if possible
+*
+      IF( N.LE.1 ) THEN
+         SSMIN = ZERO
+         RETURN
+      END IF
+*
+*     Compute the QR factorization of the N-by-2 matrix ( X Y )
+*
+      CALL CLARFG( N, X( 1 ), X( 1+INCX ), INCX, TAU )
+      A11 = X( 1 )
+      X( 1 ) = CONE
+*
+      C = -CONJG( TAU )*CDOTC( N, X, INCX, Y, INCY )
+      CALL CAXPY( N, C, X, INCX, Y, INCY )
+*
+      CALL CLARFG( N-1, Y( 1+INCY ), Y( 1+2*INCY ), INCY, TAU )
+*
+      A12 = Y( 1 )
+      A22 = Y( 1+INCY )
+*
+*     Compute the SVD of 2-by-2 Upper triangular matrix.
+*
+      CALL SLAS2( ABS( A11 ), ABS( A12 ), ABS( A22 ), SSMIN, SSMAX )
+*
+      RETURN
+*
+*     End of CLAPLL
+*
+      END
diff --git a/costa/native/external/lapack/clapmt.f b/costa/native/external/lapack/clapmt.f
new file mode 100644
index 000000000..58d43ce44
--- /dev/null
+++ b/costa/native/external/lapack/clapmt.f
@@ -0,0 +1,135 @@
+      SUBROUTINE CLAPMT( FORWRD, M, N, X, LDX, K )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993
+*
+*     .. Scalar Arguments ..
+      LOGICAL            FORWRD
+      INTEGER            LDX, M, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            K( * )
+      COMPLEX            X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CLAPMT rearranges the columns of the M by N matrix X as specified
+*  by the permutation K(1),K(2),...,K(N) of the integers 1,...,N.
+*  If FORWRD = .TRUE.,  forward permutation:
+*
+*       X(*,K(J)) is moved X(*,J) for J = 1,2,...,N.
+*
+*  If FORWRD = .FALSE., backward permutation:
+*
+*       X(*,J) is moved to X(*,K(J)) for J = 1,2,...,N.
+*
+*  Arguments
+*  =========
+*
+*  FORWRD  (input) LOGICAL
+*          = .TRUE., forward permutation
+*          = .FALSE., backward permutation
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix X. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix X. N >= 0.
+*
+*  X       (input/output) COMPLEX array, dimension (LDX,N)
+*          On entry, the M by N matrix X.
+*          On exit, X contains the permuted matrix X.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X, LDX >= MAX(1,M).
+*
+*  K       (input) INTEGER array, dimension (N)
+*          On entry, K contains the permutation vector.
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      INTEGER            I, II, J, IN
+      COMPLEX            TEMP
+*     ..
+*     .. Executable Statements ..
+*
+      IF( N.LE.1 )
+     $   RETURN
+*
+      DO 10 I = 1, N
+         K( I ) = -K( I )
+   10 CONTINUE
+*
+      IF( FORWRD ) THEN
+*
+*        Forward permutation
+*
+         DO 60 I = 1, N
+*
+            IF( K( I ).GT.0 )
+     $         GO TO 40
+*
+            J = I
+            K( J ) = -K( J )
+            IN = K( J )
+*
+   20       CONTINUE
+            IF( K( IN ).GT.0 )
+     $         GO TO 40
+*
+            DO 30 II = 1, M
+               TEMP = X( II, J )
+               X( II, J ) = X( II, IN )
+               X( II, IN ) = TEMP
+   30       CONTINUE
+*
+            K( IN ) = -K( IN )
+            J = IN
+            IN = K( IN )
+            GO TO 20
+*
+   40       CONTINUE
+*
+   60    CONTINUE
+*
+      ELSE
+*
+*        Backward permutation
+*
+         DO 110 I = 1, N
+*
+            IF( K( I ).GT.0 )
+     $         GO TO 100
+*
+            K( I ) = -K( I )
+            J = K( I )
+   80       CONTINUE
+            IF( J.EQ.I )
+     $         GO TO 100
+*
+            DO 90 II = 1, M
+               TEMP = X( II, I )
+               X( II, I ) = X( II, J )
+               X( II, J ) = TEMP
+   90       CONTINUE
+*
+            K( J ) = -K( J )
+            J = K( J )
+            GO TO 80
+*
+  100       CONTINUE
+
+  110    CONTINUE
+*
+      END IF
+*
+      RETURN
+*
+*     End of CLAPMT
+*
+      END
diff --git a/costa/native/external/lapack/claqgb.f b/costa/native/external/lapack/claqgb.f
new file mode 100644
index 000000000..9580d6292
--- /dev/null
+++ b/costa/native/external/lapack/claqgb.f
@@ -0,0 +1,170 @@
+      SUBROUTINE CLAQGB( M, N, KL, KU, AB, LDAB, R, C, ROWCND, COLCND,
+     $                   AMAX, EQUED )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          EQUED
+      INTEGER            KL, KU, LDAB, M, N
+      REAL               AMAX, COLCND, ROWCND
+*     ..
+*     .. Array Arguments ..
+      REAL               C( * ), R( * )
+      COMPLEX            AB( LDAB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CLAQGB equilibrates a general M by N band matrix A with KL
+*  subdiagonals and KU superdiagonals using the row and scaling factors
+*  in the vectors R and C.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  KL      (input) INTEGER
+*          The number of subdiagonals within the band of A.  KL >= 0.
+*
+*  KU      (input) INTEGER
+*          The number of superdiagonals within the band of A.  KU >= 0.
+*
+*  AB      (input/output) COMPLEX array, dimension (LDAB,N)
+*          On entry, the matrix A in band storage, in rows 1 to KL+KU+1.
+*          The j-th column of A is stored in the j-th column of the
+*          array AB as follows:
+*          AB(ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl)
+*
+*          On exit, the equilibrated matrix, in the same storage format
+*          as A.  See EQUED for the form of the equilibrated matrix.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDA >= KL+KU+1.
+*
+*  R       (output) REAL array, dimension (M)
+*          The row scale factors for A.
+*
+*  C       (output) REAL array, dimension (N)
+*          The column scale factors for A.
+*
+*  ROWCND  (output) REAL
+*          Ratio of the smallest R(i) to the largest R(i).
+*
+*  COLCND  (output) REAL
+*          Ratio of the smallest C(i) to the largest C(i).
+*
+*  AMAX    (input) REAL
+*          Absolute value of largest matrix entry.
+*
+*  EQUED   (output) CHARACTER*1
+*          Specifies the form of equilibration that was done.
+*          = 'N':  No equilibration
+*          = 'R':  Row equilibration, i.e., A has been premultiplied by
+*                  diag(R).
+*          = 'C':  Column equilibration, i.e., A has been postmultiplied
+*                  by diag(C).
+*          = 'B':  Both row and column equilibration, i.e., A has been
+*                  replaced by diag(R) * A * diag(C).
+*
+*  Internal Parameters
+*  ===================
+*
+*  THRESH is a threshold value used to decide if row or column scaling
+*  should be done based on the ratio of the row or column scaling
+*  factors.  If ROWCND < THRESH, row scaling is done, and if
+*  COLCND < THRESH, column scaling is done.
+*
+*  LARGE and SMALL are threshold values used to decide if row scaling
+*  should be done based on the absolute size of the largest matrix
+*  element.  If AMAX > LARGE or AMAX < SMALL, row scaling is done.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, THRESH
+      PARAMETER          ( ONE = 1.0E+0, THRESH = 0.1E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J
+      REAL               CJ, LARGE, SMALL
+*     ..
+*     .. External Functions ..
+      REAL               SLAMCH
+      EXTERNAL           SLAMCH
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Quick return if possible
+*
+      IF( M.LE.0 .OR. N.LE.0 ) THEN
+         EQUED = 'N'
+         RETURN
+      END IF
+*
+*     Initialize LARGE and SMALL.
+*
+      SMALL = SLAMCH( 'Safe minimum' ) / SLAMCH( 'Precision' )
+      LARGE = ONE / SMALL
+*
+      IF( ROWCND.GE.THRESH .AND. AMAX.GE.SMALL .AND. AMAX.LE.LARGE )
+     $     THEN
+*
+*        No row scaling
+*
+         IF( COLCND.GE.THRESH ) THEN
+*
+*           No column scaling
+*
+            EQUED = 'N'
+         ELSE
+*
+*           Column scaling
+*
+            DO 20 J = 1, N
+               CJ = C( J )
+               DO 10 I = MAX( 1, J-KU ), MIN( M, J+KL )
+                  AB( KU+1+I-J, J ) = CJ*AB( KU+1+I-J, J )
+   10          CONTINUE
+   20       CONTINUE
+            EQUED = 'C'
+         END IF
+      ELSE IF( COLCND.GE.THRESH ) THEN
+*
+*        Row scaling, no column scaling
+*
+         DO 40 J = 1, N
+            DO 30 I = MAX( 1, J-KU ), MIN( M, J+KL )
+               AB( KU+1+I-J, J ) = R( I )*AB( KU+1+I-J, J )
+   30       CONTINUE
+   40    CONTINUE
+         EQUED = 'R'
+      ELSE
+*
+*        Row and column scaling
+*
+         DO 60 J = 1, N
+            CJ = C( J )
+            DO 50 I = MAX( 1, J-KU ), MIN( M, J+KL )
+               AB( KU+1+I-J, J ) = CJ*R( I )*AB( KU+1+I-J, J )
+   50       CONTINUE
+   60    CONTINUE
+         EQUED = 'B'
+      END IF
+*
+      RETURN
+*
+*     End of CLAQGB
+*
+      END
diff --git a/costa/native/external/lapack/claqge.f b/costa/native/external/lapack/claqge.f
new file mode 100644
index 000000000..750e9a192
--- /dev/null
+++ b/costa/native/external/lapack/claqge.f
@@ -0,0 +1,156 @@
+      SUBROUTINE CLAQGE( M, N, A, LDA, R, C, ROWCND, COLCND, AMAX,
+     $                   EQUED )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          EQUED
+      INTEGER            LDA, M, N
+      REAL               AMAX, COLCND, ROWCND
+*     ..
+*     .. Array Arguments ..
+      REAL               C( * ), R( * )
+      COMPLEX            A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CLAQGE equilibrates a general M by N matrix A using the row and
+*  scaling factors in the vectors R and C.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,N)
+*          On entry, the M by N matrix A.
+*          On exit, the equilibrated matrix.  See EQUED for the form of
+*          the equilibrated matrix.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(M,1).
+*
+*  R       (input) REAL array, dimension (M)
+*          The row scale factors for A.
+*
+*  C       (input) REAL array, dimension (N)
+*          The column scale factors for A.
+*
+*  ROWCND  (input) REAL
+*          Ratio of the smallest R(i) to the largest R(i).
+*
+*  COLCND  (input) REAL
+*          Ratio of the smallest C(i) to the largest C(i).
+*
+*  AMAX    (input) REAL
+*          Absolute value of largest matrix entry.
+*
+*  EQUED   (output) CHARACTER*1
+*          Specifies the form of equilibration that was done.
+*          = 'N':  No equilibration
+*          = 'R':  Row equilibration, i.e., A has been premultiplied by
+*                  diag(R).
+*          = 'C':  Column equilibration, i.e., A has been postmultiplied
+*                  by diag(C).
+*          = 'B':  Both row and column equilibration, i.e., A has been
+*                  replaced by diag(R) * A * diag(C).
+*
+*  Internal Parameters
+*  ===================
+*
+*  THRESH is a threshold value used to decide if row or column scaling
+*  should be done based on the ratio of the row or column scaling
+*  factors.  If ROWCND < THRESH, row scaling is done, and if
+*  COLCND < THRESH, column scaling is done.
+*
+*  LARGE and SMALL are threshold values used to decide if row scaling
+*  should be done based on the absolute size of the largest matrix
+*  element.  If AMAX > LARGE or AMAX < SMALL, row scaling is done.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, THRESH
+      PARAMETER          ( ONE = 1.0E+0, THRESH = 0.1E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J
+      REAL               CJ, LARGE, SMALL
+*     ..
+*     .. External Functions ..
+      REAL               SLAMCH
+      EXTERNAL           SLAMCH
+*     ..
+*     .. Executable Statements ..
+*
+*     Quick return if possible
+*
+      IF( M.LE.0 .OR. N.LE.0 ) THEN
+         EQUED = 'N'
+         RETURN
+      END IF
+*
+*     Initialize LARGE and SMALL.
+*
+      SMALL = SLAMCH( 'Safe minimum' ) / SLAMCH( 'Precision' )
+      LARGE = ONE / SMALL
+*
+      IF( ROWCND.GE.THRESH .AND. AMAX.GE.SMALL .AND. AMAX.LE.LARGE )
+     $     THEN
+*
+*        No row scaling
+*
+         IF( COLCND.GE.THRESH ) THEN
+*
+*           No column scaling
+*
+            EQUED = 'N'
+         ELSE
+*
+*           Column scaling
+*
+            DO 20 J = 1, N
+               CJ = C( J )
+               DO 10 I = 1, M
+                  A( I, J ) = CJ*A( I, J )
+   10          CONTINUE
+   20       CONTINUE
+            EQUED = 'C'
+         END IF
+      ELSE IF( COLCND.GE.THRESH ) THEN
+*
+*        Row scaling, no column scaling
+*
+         DO 40 J = 1, N
+            DO 30 I = 1, M
+               A( I, J ) = R( I )*A( I, J )
+   30       CONTINUE
+   40    CONTINUE
+         EQUED = 'R'
+      ELSE
+*
+*        Row and column scaling
+*
+         DO 60 J = 1, N
+            CJ = C( J )
+            DO 50 I = 1, M
+               A( I, J ) = CJ*R( I )*A( I, J )
+   50       CONTINUE
+   60    CONTINUE
+         EQUED = 'B'
+      END IF
+*
+      RETURN
+*
+*     End of CLAQGE
+*
+      END
diff --git a/costa/native/external/lapack/claqhb.f b/costa/native/external/lapack/claqhb.f
new file mode 100644
index 000000000..eb43e2b5e
--- /dev/null
+++ b/costa/native/external/lapack/claqhb.f
@@ -0,0 +1,152 @@
+      SUBROUTINE CLAQHB( UPLO, N, KD, AB, LDAB, S, SCOND, AMAX, EQUED )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          EQUED, UPLO
+      INTEGER            KD, LDAB, N
+      REAL               AMAX, SCOND
+*     ..
+*     .. Array Arguments ..
+      REAL               S( * )
+      COMPLEX            AB( LDAB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CLAQHB equilibrates a symmetric band matrix A using the scaling
+*  factors in the vector S.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the upper or lower triangular part of the
+*          symmetric matrix A is stored.
+*          = 'U':  Upper triangular
+*          = 'L':  Lower triangular
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  KD      (input) INTEGER
+*          The number of super-diagonals of the matrix A if UPLO = 'U',
+*          or the number of sub-diagonals if UPLO = 'L'.  KD >= 0.
+*
+*  AB      (input/output) COMPLEX array, dimension (LDAB,N)
+*          On entry, the upper or lower triangle of the symmetric band
+*          matrix A, stored in the first KD+1 rows of the array.  The
+*          j-th column of A is stored in the j-th column of the array AB
+*          as follows:
+*          if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j;
+*          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+kd).
+*
+*          On exit, if INFO = 0, the triangular factor U or L from the
+*          Cholesky factorization A = U'*U or A = L*L' of the band
+*          matrix A, in the same storage format as A.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KD+1.
+*
+*  S       (output) REAL array, dimension (N)
+*          The scale factors for A.
+*
+*  SCOND   (input) REAL
+*          Ratio of the smallest S(i) to the largest S(i).
+*
+*  AMAX    (input) REAL
+*          Absolute value of largest matrix entry.
+*
+*  EQUED   (output) CHARACTER*1
+*          Specifies whether or not equilibration was done.
+*          = 'N':  No equilibration.
+*          = 'Y':  Equilibration was done, i.e., A has been replaced by
+*                  diag(S) * A * diag(S).
+*
+*  Internal Parameters
+*  ===================
+*
+*  THRESH is a threshold value used to decide if scaling should be done
+*  based on the ratio of the scaling factors.  If SCOND < THRESH,
+*  scaling is done.
+*
+*  LARGE and SMALL are threshold values used to decide if scaling should
+*  be done based on the absolute size of the largest matrix element.
+*  If AMAX > LARGE or AMAX < SMALL, scaling is done.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, THRESH
+      PARAMETER          ( ONE = 1.0E+0, THRESH = 0.1E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J
+      REAL               CJ, LARGE, SMALL
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               SLAMCH
+      EXTERNAL           LSAME, SLAMCH
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN, REAL
+*     ..
+*     .. Executable Statements ..
+*
+*     Quick return if possible
+*
+      IF( N.LE.0 ) THEN
+         EQUED = 'N'
+         RETURN
+      END IF
+*
+*     Initialize LARGE and SMALL.
+*
+      SMALL = SLAMCH( 'Safe minimum' ) / SLAMCH( 'Precision' )
+      LARGE = ONE / SMALL
+*
+      IF( SCOND.GE.THRESH .AND. AMAX.GE.SMALL .AND. AMAX.LE.LARGE ) THEN
+*
+*        No equilibration
+*
+         EQUED = 'N'
+      ELSE
+*
+*        Replace A by diag(S) * A * diag(S).
+*
+         IF( LSAME( UPLO, 'U' ) ) THEN
+*
+*           Upper triangle of A is stored in band format.
+*
+            DO 20 J = 1, N
+               CJ = S( J )
+               DO 10 I = MAX( 1, J-KD ), J - 1
+                  AB( KD+1+I-J, J ) = CJ*S( I )*AB( KD+1+I-J, J )
+   10          CONTINUE
+               AB( KD+1, J ) = CJ*CJ*REAL( AB( KD+1, J ) )
+   20       CONTINUE
+         ELSE
+*
+*           Lower triangle of A is stored.
+*
+            DO 40 J = 1, N
+               CJ = S( J )
+               AB( 1, J ) = CJ*CJ*REAL( AB( 1, J ) )
+               DO 30 I = J + 1, MIN( N, J+KD )
+                  AB( 1+I-J, J ) = CJ*S( I )*AB( 1+I-J, J )
+   30          CONTINUE
+   40       CONTINUE
+         END IF
+         EQUED = 'Y'
+      END IF
+*
+      RETURN
+*
+*     End of CLAQHB
+*
+      END
diff --git a/costa/native/external/lapack/claqhe.f b/costa/native/external/lapack/claqhe.f
new file mode 100644
index 000000000..3234b8dc4
--- /dev/null
+++ b/costa/native/external/lapack/claqhe.f
@@ -0,0 +1,148 @@
+      SUBROUTINE CLAQHE( UPLO, N, A, LDA, S, SCOND, AMAX, EQUED )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          EQUED, UPLO
+      INTEGER            LDA, N
+      REAL               AMAX, SCOND
+*     ..
+*     .. Array Arguments ..
+      REAL               S( * )
+      COMPLEX            A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CLAQHE equilibrates a Hermitian matrix A using the scaling factors
+*  in the vector S.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the upper or lower triangular part of the
+*          Hermitian matrix A is stored.
+*          = 'U':  Upper triangular
+*          = 'L':  Lower triangular
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,N)
+*          On entry, the Hermitian matrix A.  If UPLO = 'U', the leading
+*          n by n upper triangular part of A contains the upper
+*          triangular part of the matrix A, and the strictly lower
+*          triangular part of A is not referenced.  If UPLO = 'L', the
+*          leading n by n lower triangular part of A contains the lower
+*          triangular part of the matrix A, and the strictly upper
+*          triangular part of A is not referenced.
+*
+*          On exit, if EQUED = 'Y', the equilibrated matrix:
+*          diag(S) * A * diag(S).
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(N,1).
+*
+*  S       (input) REAL array, dimension (N)
+*          The scale factors for A.
+*
+*  SCOND   (input) REAL
+*          Ratio of the smallest S(i) to the largest S(i).
+*
+*  AMAX    (input) REAL
+*          Absolute value of largest matrix entry.
+*
+*  EQUED   (output) CHARACTER*1
+*          Specifies whether or not equilibration was done.
+*          = 'N':  No equilibration.
+*          = 'Y':  Equilibration was done, i.e., A has been replaced by
+*                  diag(S) * A * diag(S).
+*
+*  Internal Parameters
+*  ===================
+*
+*  THRESH is a threshold value used to decide if scaling should be done
+*  based on the ratio of the scaling factors.  If SCOND < THRESH,
+*  scaling is done.
+*
+*  LARGE and SMALL are threshold values used to decide if scaling should
+*  be done based on the absolute size of the largest matrix element.
+*  If AMAX > LARGE or AMAX < SMALL, scaling is done.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, THRESH
+      PARAMETER          ( ONE = 1.0E+0, THRESH = 0.1E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J
+      REAL               CJ, LARGE, SMALL
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               SLAMCH
+      EXTERNAL           LSAME, SLAMCH
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          REAL
+*     ..
+*     .. Executable Statements ..
+*
+*     Quick return if possible
+*
+      IF( N.LE.0 ) THEN
+         EQUED = 'N'
+         RETURN
+      END IF
+*
+*     Initialize LARGE and SMALL.
+*
+      SMALL = SLAMCH( 'Safe minimum' ) / SLAMCH( 'Precision' )
+      LARGE = ONE / SMALL
+*
+      IF( SCOND.GE.THRESH .AND. AMAX.GE.SMALL .AND. AMAX.LE.LARGE ) THEN
+*
+*        No equilibration
+*
+         EQUED = 'N'
+      ELSE
+*
+*        Replace A by diag(S) * A * diag(S).
+*
+         IF( LSAME( UPLO, 'U' ) ) THEN
+*
+*           Upper triangle of A is stored.
+*
+            DO 20 J = 1, N
+               CJ = S( J )
+               DO 10 I = 1, J - 1
+                  A( I, J ) = CJ*S( I )*A( I, J )
+   10          CONTINUE
+               A( J, J ) = CJ*CJ*REAL( A( J, J ) )
+   20       CONTINUE
+         ELSE
+*
+*           Lower triangle of A is stored.
+*
+            DO 40 J = 1, N
+               CJ = S( J )
+               A( J, J ) = CJ*CJ*REAL( A( J, J ) )
+               DO 30 I = J + 1, N
+                  A( I, J ) = CJ*S( I )*A( I, J )
+   30          CONTINUE
+   40       CONTINUE
+         END IF
+         EQUED = 'Y'
+      END IF
+*
+      RETURN
+*
+*     End of CLAQHE
+*
+      END
diff --git a/costa/native/external/lapack/claqhp.f b/costa/native/external/lapack/claqhp.f
new file mode 100644
index 000000000..dab8e8dae
--- /dev/null
+++ b/costa/native/external/lapack/claqhp.f
@@ -0,0 +1,147 @@
+      SUBROUTINE CLAQHP( UPLO, N, AP, S, SCOND, AMAX, EQUED )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          EQUED, UPLO
+      INTEGER            N
+      REAL               AMAX, SCOND
+*     ..
+*     .. Array Arguments ..
+      REAL               S( * )
+      COMPLEX            AP( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CLAQHP equilibrates a Hermitian matrix A using the scaling factors
+*  in the vector S.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the upper or lower triangular part of the
+*          Hermitian matrix A is stored.
+*          = 'U':  Upper triangular
+*          = 'L':  Lower triangular
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  AP      (input/output) COMPLEX array, dimension (N*(N+1)/2)
+*          On entry, the upper or lower triangle of the Hermitian matrix
+*          A, packed columnwise in a linear array.  The j-th column of A
+*          is stored in the array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n.
+*
+*          On exit, the equilibrated matrix:  diag(S) * A * diag(S), in
+*          the same storage format as A.
+*
+*  S       (input) REAL array, dimension (N)
+*          The scale factors for A.
+*
+*  SCOND   (input) REAL
+*          Ratio of the smallest S(i) to the largest S(i).
+*
+*  AMAX    (input) REAL
+*          Absolute value of largest matrix entry.
+*
+*  EQUED   (output) CHARACTER*1
+*          Specifies whether or not equilibration was done.
+*          = 'N':  No equilibration.
+*          = 'Y':  Equilibration was done, i.e., A has been replaced by
+*                  diag(S) * A * diag(S).
+*
+*  Internal Parameters
+*  ===================
+*
+*  THRESH is a threshold value used to decide if scaling should be done
+*  based on the ratio of the scaling factors.  If SCOND < THRESH,
+*  scaling is done.
+*
+*  LARGE and SMALL are threshold values used to decide if scaling should
+*  be done based on the absolute size of the largest matrix element.
+*  If AMAX > LARGE or AMAX < SMALL, scaling is done.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, THRESH
+      PARAMETER          ( ONE = 1.0E+0, THRESH = 0.1E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J, JC
+      REAL               CJ, LARGE, SMALL
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               SLAMCH
+      EXTERNAL           LSAME, SLAMCH
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          REAL
+*     ..
+*     .. Executable Statements ..
+*
+*     Quick return if possible
+*
+      IF( N.LE.0 ) THEN
+         EQUED = 'N'
+         RETURN
+      END IF
+*
+*     Initialize LARGE and SMALL.
+*
+      SMALL = SLAMCH( 'Safe minimum' ) / SLAMCH( 'Precision' )
+      LARGE = ONE / SMALL
+*
+      IF( SCOND.GE.THRESH .AND. AMAX.GE.SMALL .AND. AMAX.LE.LARGE ) THEN
+*
+*        No equilibration
+*
+         EQUED = 'N'
+      ELSE
+*
+*        Replace A by diag(S) * A * diag(S).
+*
+         IF( LSAME( UPLO, 'U' ) ) THEN
+*
+*           Upper triangle of A is stored.
+*
+            JC = 1
+            DO 20 J = 1, N
+               CJ = S( J )
+               DO 10 I = 1, J - 1
+                  AP( JC+I-1 ) = CJ*S( I )*AP( JC+I-1 )
+   10          CONTINUE
+               AP( JC+J-1 ) = CJ*CJ*REAL( AP( JC+J-1 ) )
+               JC = JC + J
+   20       CONTINUE
+         ELSE
+*
+*           Lower triangle of A is stored.
+*
+            JC = 1
+            DO 40 J = 1, N
+               CJ = S( J )
+               AP( JC ) = CJ*CJ*REAL( AP( JC ) )
+               DO 30 I = J + 1, N
+                  AP( JC+I-J ) = CJ*S( I )*AP( JC+I-J )
+   30          CONTINUE
+               JC = JC + N - J + 1
+   40       CONTINUE
+         END IF
+         EQUED = 'Y'
+      END IF
+*
+      RETURN
+*
+*     End of CLAQHP
+*
+      END
diff --git a/costa/native/external/lapack/claqp2.f b/costa/native/external/lapack/claqp2.f
new file mode 100644
index 000000000..28bbe6414
--- /dev/null
+++ b/costa/native/external/lapack/claqp2.f
@@ -0,0 +1,170 @@
+      SUBROUTINE CLAQP2( M, N, OFFSET, A, LDA, JPVT, TAU, VN1, VN2,
+     $                   WORK )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            LDA, M, N, OFFSET
+*     ..
+*     .. Array Arguments ..
+      INTEGER            JPVT( * )
+      REAL               VN1( * ), VN2( * )
+      COMPLEX            A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CLAQP2 computes a QR factorization with column pivoting of
+*  the block A(OFFSET+1:M,1:N).
+*  The block A(1:OFFSET,1:N) is accordingly pivoted, but not factorized.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A. N >= 0.
+*
+*  OFFSET  (input) INTEGER
+*          The number of rows of the matrix A that must be pivoted
+*          but no factorized. OFFSET >= 0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,N)
+*          On entry, the M-by-N matrix A.
+*          On exit, the upper triangle of block A(OFFSET+1:M,1:N) is 
+*          the triangular factor obtained; the elements in block 
+*          A(OFFSET+1:M,1:N) below the diagonal, together with the 
+*          array TAU, represent the orthogonal matrix Q as a product of
+*          elementary reflectors. Block A(1:OFFSET,1:N) has been 
+*          accordingly pivoted, but no factorized.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,M).
+*
+*  JPVT    (input/output) INTEGER array, dimension (N)
+*          On entry, if JPVT(i) .ne. 0, the i-th column of A is permuted
+*          to the front of A*P (a leading column); if JPVT(i) = 0,
+*          the i-th column of A is a free column.
+*          On exit, if JPVT(i) = k, then the i-th column of A*P
+*          was the k-th column of A.
+*
+*  TAU     (output) COMPLEX array, dimension (min(M,N))
+*          The scalar factors of the elementary reflectors.
+*
+*  VN1     (input/output) REAL array, dimension (N)
+*          The vector with the partial column norms.
+*
+*  VN2     (input/output) REAL array, dimension (N)
+*          The vector with the exact column norms.
+*
+*  WORK    (workspace) COMPLEX array, dimension (N)
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*    G. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain
+*    X. Sun, Computer Science Dept., Duke University, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      COMPLEX            CONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0,
+     $                   CONE = ( 1.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, ITEMP, J, MN, OFFPI, PVT
+      REAL               TEMP, TEMP2
+      COMPLEX            AII
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CLARF, CLARFG, CSWAP
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, CONJG, MAX, MIN, SQRT
+*     ..
+*     .. External Functions ..
+      INTEGER            ISAMAX
+      REAL               SCNRM2
+      EXTERNAL           ISAMAX, SCNRM2
+*     ..
+*     .. Executable Statements ..
+*
+      MN = MIN( M-OFFSET, N )
+*
+*     Compute factorization.
+*
+      DO 20 I = 1, MN
+*
+         OFFPI = OFFSET + I
+*
+*        Determine ith pivot column and swap if necessary.
+*
+         PVT = ( I-1 ) + ISAMAX( N-I+1, VN1( I ), 1 )
+*
+         IF( PVT.NE.I ) THEN
+            CALL CSWAP( M, A( 1, PVT ), 1, A( 1, I ), 1 )
+            ITEMP = JPVT( PVT )
+            JPVT( PVT ) = JPVT( I )
+            JPVT( I ) = ITEMP
+            VN1( PVT ) = VN1( I )
+            VN2( PVT ) = VN2( I )
+         END IF
+*
+*        Generate elementary reflector H(i).
+*
+         IF( OFFPI.LT.M ) THEN
+            CALL CLARFG( M-OFFPI+1, A( OFFPI, I ), A( OFFPI+1, I ), 1,
+     $                   TAU( I ) )
+         ELSE
+            CALL CLARFG( 1, A( M, I ), A( M, I ), 1, TAU( I ) )
+         END IF
+*
+         IF( I.LT.N ) THEN
+*
+*           Apply H(i)' to A(offset+i:m,i+1:n) from the left.
+*
+            AII = A( OFFPI, I )
+            A( OFFPI, I ) = CONE
+            CALL CLARF( 'Left', M-OFFPI+1, N-I, A( OFFPI, I ), 1,
+     $                  CONJG( TAU( I ) ), A( OFFPI, I+1 ), LDA,
+     $                  WORK( 1 ) )
+            A( OFFPI, I ) = AII
+         END IF
+*
+*        Update partial column norms.
+*
+         DO 10 J = I + 1, N
+            IF( VN1( J ).NE.ZERO ) THEN
+               TEMP = ONE - ( ABS( A( OFFPI, J ) ) / VN1( J ) )**2
+               TEMP = MAX( TEMP, ZERO )
+               TEMP2 = ONE + 0.05*TEMP*( VN1( J ) / VN2( J ) )**2
+               IF( TEMP2.EQ.ONE ) THEN
+                  IF( OFFPI.LT.M ) THEN
+                     VN1( J ) = SCNRM2( M-OFFPI, A( OFFPI+1, J ), 1 )
+                     VN2( J ) = VN1( J )
+                  ELSE
+                     VN1( J ) = ZERO
+                     VN2( J ) = ZERO
+                  END IF
+               ELSE
+                  VN1( J ) = VN1( J )*SQRT( TEMP )
+               END IF
+            END IF
+   10    CONTINUE
+*
+   20 CONTINUE
+*
+      RETURN
+*
+*     End of CLAQP2
+*
+      END
diff --git a/costa/native/external/lapack/claqps.f b/costa/native/external/lapack/claqps.f
new file mode 100644
index 000000000..ebfc09ea8
--- /dev/null
+++ b/costa/native/external/lapack/claqps.f
@@ -0,0 +1,260 @@
+      SUBROUTINE CLAQPS( M, N, OFFSET, NB, KB, A, LDA, JPVT, TAU, VN1,
+     $                   VN2, AUXV, F, LDF )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            KB, LDA, LDF, M, N, NB, OFFSET
+*     ..
+*     .. Array Arguments ..
+      INTEGER            JPVT( * )
+      REAL               VN1( * ), VN2( * )
+      COMPLEX            A( LDA, * ), AUXV( * ), F( LDF, * ), TAU( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CLAQPS computes a step of QR factorization with column pivoting
+*  of a complex M-by-N matrix A by using Blas-3.  It tries to factorize
+*  NB columns from A starting from the row OFFSET+1, and updates all
+*  of the matrix with Blas-3 xGEMM.
+*
+*  In some cases, due to catastrophic cancellations, it cannot
+*  factorize NB columns.  Hence, the actual number of factorized
+*  columns is returned in KB.
+*
+*  Block A(1:OFFSET,1:N) is accordingly pivoted, but not factorized.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A. N >= 0
+*
+*  OFFSET  (input) INTEGER
+*          The number of rows of A that have been factorized in
+*          previous steps.
+*
+*  NB      (input) INTEGER
+*          The number of columns to factorize.
+*
+*  KB      (output) INTEGER
+*          The number of columns actually factorized.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,N)
+*          On entry, the M-by-N matrix A.
+*          On exit, block A(OFFSET+1:M,1:KB) is the triangular
+*          factor obtained and block A(1:OFFSET,1:N) has been
+*          accordingly pivoted, but no factorized.
+*          The rest of the matrix, block A(OFFSET+1:M,KB+1:N) has
+*          been updated.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,M).
+*
+*  JPVT    (input/output) INTEGER array, dimension (N)
+*          JPVT(I) = K <==> Column K of the full matrix A has been
+*          permuted into position I in AP.
+*
+*  TAU     (output) COMPLEX array, dimension (KB)
+*          The scalar factors of the elementary reflectors.
+*
+*  VN1     (input/output) REAL array, dimension (N)
+*          The vector with the partial column norms.
+*
+*  VN2     (input/output) REAL array, dimension (N)
+*          The vector with the exact column norms.
+*
+*  AUXV    (input/output) COMPLEX array, dimension (NB)
+*          Auxiliar vector.
+*
+*  F       (input/output) COMPLEX array, dimension (LDF,NB)
+*          Matrix F' = L*Y'*A.
+*
+*  LDF     (input) INTEGER
+*          The leading dimension of the array F. LDF >= max(1,N).
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*    G. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain
+*    X. Sun, Computer Science Dept., Duke University, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      COMPLEX            CZERO, CONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0,
+     $                   CZERO = ( 0.0E+0, 0.0E+0 ),
+     $                   CONE = ( 1.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            ITEMP, J, K, LASTRK, LSTICC, PVT, RK
+      REAL               TEMP, TEMP2
+      COMPLEX            AKK
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CGEMM, CGEMV, CLARFG, CSWAP
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, CONJG, MAX, MIN, NINT, REAL, SQRT
+*     ..
+*     .. External Functions ..
+      INTEGER            ISAMAX
+      REAL               SCNRM2
+      EXTERNAL           ISAMAX, SCNRM2
+*     ..
+*     .. Executable Statements ..
+*
+      LASTRK = MIN( M, N+OFFSET )
+      LSTICC = 0
+      K = 0
+*
+*     Beginning of while loop.
+*
+   10 CONTINUE
+      IF( ( K.LT.NB ) .AND. ( LSTICC.EQ.0 ) ) THEN
+         K = K + 1
+         RK = OFFSET + K
+*
+*        Determine ith pivot column and swap if necessary
+*
+         PVT = ( K-1 ) + ISAMAX( N-K+1, VN1( K ), 1 )
+         IF( PVT.NE.K ) THEN
+            CALL CSWAP( M, A( 1, PVT ), 1, A( 1, K ), 1 )
+            CALL CSWAP( K-1, F( PVT, 1 ), LDF, F( K, 1 ), LDF )
+            ITEMP = JPVT( PVT )
+            JPVT( PVT ) = JPVT( K )
+            JPVT( K ) = ITEMP
+            VN1( PVT ) = VN1( K )
+            VN2( PVT ) = VN2( K )
+         END IF
+*
+*        Apply previous Householder reflectors to column K:
+*        A(RK:M,K) := A(RK:M,K) - A(RK:M,1:K-1)*F(K,1:K-1)'.
+*
+         IF( K.GT.1 ) THEN
+*CC            CALL CGEMM( 'No transpose', 'Conjugate transpose',
+*CC     $                  M-RK+1, 1, K-1, -CONE, A( RK, 1 ), LDA,
+*CC     $                  F( K, 1 ), LDF, CONE, A( RK, K ), LDA )
+            DO 20 J = 1, K - 1
+               F( K, J ) = CONJG( F( K, J ) )
+   20       CONTINUE
+            CALL CGEMV( 'No transpose', M-RK+1, K-1, -CONE, A( RK, 1 ),
+     $                  LDA, F( K, 1 ), LDF, CONE, A( RK, K ), 1 )
+            DO 30 J = 1, K - 1
+               F( K, J ) = CONJG( F( K, J ) )
+   30       CONTINUE
+         END IF
+*
+*        Generate elementary reflector H(k).
+*
+         IF( RK.LT.M ) THEN
+            CALL CLARFG( M-RK+1, A( RK, K ), A( RK+1, K ), 1, TAU( K ) )
+         ELSE
+            CALL CLARFG( 1, A( RK, K ), A( RK, K ), 1, TAU( K ) )
+         END IF
+*
+         AKK = A( RK, K )
+         A( RK, K ) = CONE
+*
+*        Compute Kth column of F:
+*
+*        Compute  F(K+1:N,K) := tau(K)*A(RK:M,K+1:N)'*A(RK:M,K).
+*
+         IF( K.LT.N ) THEN
+            CALL CGEMV( 'Conjugate transpose', M-RK+1, N-K, TAU( K ),
+     $                  A( RK, K+1 ), LDA, A( RK, K ), 1, CZERO,
+     $                  F( K+1, K ), 1 )
+         END IF
+*
+*        Padding F(1:K,K) with zeros.
+*
+         DO 40 J = 1, K
+            F( J, K ) = CZERO
+   40    CONTINUE
+*
+*        Incremental updating of F:
+*        F(1:N,K) := F(1:N,K) - tau(K)*F(1:N,1:K-1)*A(RK:M,1:K-1)'
+*                    *A(RK:M,K).
+*
+         IF( K.GT.1 ) THEN
+            CALL CGEMV( 'Conjugate transpose', M-RK+1, K-1, -TAU( K ),
+     $                  A( RK, 1 ), LDA, A( RK, K ), 1, CZERO,
+     $                  AUXV( 1 ), 1 )
+*
+            CALL CGEMV( 'No transpose', N, K-1, CONE, F( 1, 1 ), LDF,
+     $                  AUXV( 1 ), 1, CONE, F( 1, K ), 1 )
+         END IF
+*
+*        Update the current row of A:
+*        A(RK,K+1:N) := A(RK,K+1:N) - A(RK,1:K)*F(K+1:N,1:K)'.
+*
+         IF( K.LT.N ) THEN
+            CALL CGEMM( 'No transpose', 'Conjugate transpose', 1, N-K,
+     $                  K, -CONE, A( RK, 1 ), LDA, F( K+1, 1 ), LDF,
+     $                  CONE, A( RK, K+1 ), LDA )
+         END IF
+*
+*        Update partial column norms.
+*
+         IF( RK.LT.LASTRK ) THEN
+            DO 50 J = K + 1, N
+               IF( VN1( J ).NE.ZERO ) THEN
+                  TEMP = ABS( A( RK, J ) ) / VN1( J )
+                  TEMP = MAX( ZERO, ( ONE+TEMP )*( ONE-TEMP ) )
+                  TEMP2 = ONE + 0.05*TEMP*( VN1( J ) / VN2( J ) )**2
+                  IF( TEMP2.EQ.ONE ) THEN
+                     VN2( J ) = REAL( LSTICC )
+                     LSTICC = J
+                  ELSE
+                     VN1( J ) = VN1( J )*SQRT( TEMP )
+                  END IF
+               END IF
+   50       CONTINUE
+         END IF
+*
+         A( RK, K ) = AKK
+*
+*        End of while loop.
+*
+         GO TO 10
+      END IF
+      KB = K
+      RK = OFFSET + KB
+*
+*     Apply the block reflector to the rest of the matrix:
+*     A(OFFSET+KB+1:M,KB+1:N) := A(OFFSET+KB+1:M,KB+1:N) -
+*                         A(OFFSET+KB+1:M,1:KB)*F(KB+1:N,1:KB)'.
+*
+      IF( KB.LT.MIN( N, M-OFFSET ) ) THEN
+         CALL CGEMM( 'No transpose', 'Conjugate transpose', M-RK, N-KB,
+     $               KB, -CONE, A( RK+1, 1 ), LDA, F( KB+1, 1 ), LDF,
+     $               CONE, A( RK+1, KB+1 ), LDA )
+      END IF
+*
+*     Recomputation of difficult columns.
+*
+   60 CONTINUE
+      IF( LSTICC.GT.0 ) THEN
+         ITEMP = NINT( VN2( LSTICC ) )
+         VN1( LSTICC ) = SCNRM2( M-RK, A( RK+1, LSTICC ), 1 )
+         VN2( LSTICC ) = VN1( LSTICC )
+         LSTICC = ITEMP
+         GO TO 60
+      END IF
+*
+      RETURN
+*
+*     End of CLAQPS
+*
+      END
diff --git a/costa/native/external/lapack/claqsb.f b/costa/native/external/lapack/claqsb.f
new file mode 100644
index 000000000..8522262dc
--- /dev/null
+++ b/costa/native/external/lapack/claqsb.f
@@ -0,0 +1,150 @@
+      SUBROUTINE CLAQSB( UPLO, N, KD, AB, LDAB, S, SCOND, AMAX, EQUED )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          EQUED, UPLO
+      INTEGER            KD, LDAB, N
+      REAL               AMAX, SCOND
+*     ..
+*     .. Array Arguments ..
+      REAL               S( * )
+      COMPLEX            AB( LDAB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CLAQSB equilibrates a symmetric band matrix A using the scaling
+*  factors in the vector S.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the upper or lower triangular part of the
+*          symmetric matrix A is stored.
+*          = 'U':  Upper triangular
+*          = 'L':  Lower triangular
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  KD      (input) INTEGER
+*          The number of super-diagonals of the matrix A if UPLO = 'U',
+*          or the number of sub-diagonals if UPLO = 'L'.  KD >= 0.
+*
+*  AB      (input/output) COMPLEX array, dimension (LDAB,N)
+*          On entry, the upper or lower triangle of the symmetric band
+*          matrix A, stored in the first KD+1 rows of the array.  The
+*          j-th column of A is stored in the j-th column of the array AB
+*          as follows:
+*          if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j;
+*          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+kd).
+*
+*          On exit, if INFO = 0, the triangular factor U or L from the
+*          Cholesky factorization A = U'*U or A = L*L' of the band
+*          matrix A, in the same storage format as A.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KD+1.
+*
+*  S       (output) REAL array, dimension (N)
+*          The scale factors for A.
+*
+*  SCOND   (input) REAL
+*          Ratio of the smallest S(i) to the largest S(i).
+*
+*  AMAX    (input) REAL
+*          Absolute value of largest matrix entry.
+*
+*  EQUED   (output) CHARACTER*1
+*          Specifies whether or not equilibration was done.
+*          = 'N':  No equilibration.
+*          = 'Y':  Equilibration was done, i.e., A has been replaced by
+*                  diag(S) * A * diag(S).
+*
+*  Internal Parameters
+*  ===================
+*
+*  THRESH is a threshold value used to decide if scaling should be done
+*  based on the ratio of the scaling factors.  If SCOND < THRESH,
+*  scaling is done.
+*
+*  LARGE and SMALL are threshold values used to decide if scaling should
+*  be done based on the absolute size of the largest matrix element.
+*  If AMAX > LARGE or AMAX < SMALL, scaling is done.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, THRESH
+      PARAMETER          ( ONE = 1.0E+0, THRESH = 0.1E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J
+      REAL               CJ, LARGE, SMALL
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               SLAMCH
+      EXTERNAL           LSAME, SLAMCH
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Quick return if possible
+*
+      IF( N.LE.0 ) THEN
+         EQUED = 'N'
+         RETURN
+      END IF
+*
+*     Initialize LARGE and SMALL.
+*
+      SMALL = SLAMCH( 'Safe minimum' ) / SLAMCH( 'Precision' )
+      LARGE = ONE / SMALL
+*
+      IF( SCOND.GE.THRESH .AND. AMAX.GE.SMALL .AND. AMAX.LE.LARGE ) THEN
+*
+*        No equilibration
+*
+         EQUED = 'N'
+      ELSE
+*
+*        Replace A by diag(S) * A * diag(S).
+*
+         IF( LSAME( UPLO, 'U' ) ) THEN
+*
+*           Upper triangle of A is stored in band format.
+*
+            DO 20 J = 1, N
+               CJ = S( J )
+               DO 10 I = MAX( 1, J-KD ), J
+                  AB( KD+1+I-J, J ) = CJ*S( I )*AB( KD+1+I-J, J )
+   10          CONTINUE
+   20       CONTINUE
+         ELSE
+*
+*           Lower triangle of A is stored.
+*
+            DO 40 J = 1, N
+               CJ = S( J )
+               DO 30 I = J, MIN( N, J+KD )
+                  AB( 1+I-J, J ) = CJ*S( I )*AB( 1+I-J, J )
+   30          CONTINUE
+   40       CONTINUE
+         END IF
+         EQUED = 'Y'
+      END IF
+*
+      RETURN
+*
+*     End of CLAQSB
+*
+      END
diff --git a/costa/native/external/lapack/claqsp.f b/costa/native/external/lapack/claqsp.f
new file mode 100644
index 000000000..591d0ef44
--- /dev/null
+++ b/costa/native/external/lapack/claqsp.f
@@ -0,0 +1,142 @@
+      SUBROUTINE CLAQSP( UPLO, N, AP, S, SCOND, AMAX, EQUED )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          EQUED, UPLO
+      INTEGER            N
+      REAL               AMAX, SCOND
+*     ..
+*     .. Array Arguments ..
+      REAL               S( * )
+      COMPLEX            AP( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CLAQSP equilibrates a symmetric matrix A using the scaling factors
+*  in the vector S.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the upper or lower triangular part of the
+*          symmetric matrix A is stored.
+*          = 'U':  Upper triangular
+*          = 'L':  Lower triangular
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  AP      (input/output) COMPLEX array, dimension (N*(N+1)/2)
+*          On entry, the upper or lower triangle of the symmetric matrix
+*          A, packed columnwise in a linear array.  The j-th column of A
+*          is stored in the array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n.
+*
+*          On exit, the equilibrated matrix:  diag(S) * A * diag(S), in
+*          the same storage format as A.
+*
+*  S       (input) REAL array, dimension (N)
+*          The scale factors for A.
+*
+*  SCOND   (input) REAL
+*          Ratio of the smallest S(i) to the largest S(i).
+*
+*  AMAX    (input) REAL
+*          Absolute value of largest matrix entry.
+*
+*  EQUED   (output) CHARACTER*1
+*          Specifies whether or not equilibration was done.
+*          = 'N':  No equilibration.
+*          = 'Y':  Equilibration was done, i.e., A has been replaced by
+*                  diag(S) * A * diag(S).
+*
+*  Internal Parameters
+*  ===================
+*
+*  THRESH is a threshold value used to decide if scaling should be done
+*  based on the ratio of the scaling factors.  If SCOND < THRESH,
+*  scaling is done.
+*
+*  LARGE and SMALL are threshold values used to decide if scaling should
+*  be done based on the absolute size of the largest matrix element.
+*  If AMAX > LARGE or AMAX < SMALL, scaling is done.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, THRESH
+      PARAMETER          ( ONE = 1.0E+0, THRESH = 0.1E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J, JC
+      REAL               CJ, LARGE, SMALL
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               SLAMCH
+      EXTERNAL           LSAME, SLAMCH
+*     ..
+*     .. Executable Statements ..
+*
+*     Quick return if possible
+*
+      IF( N.LE.0 ) THEN
+         EQUED = 'N'
+         RETURN
+      END IF
+*
+*     Initialize LARGE and SMALL.
+*
+      SMALL = SLAMCH( 'Safe minimum' ) / SLAMCH( 'Precision' )
+      LARGE = ONE / SMALL
+*
+      IF( SCOND.GE.THRESH .AND. AMAX.GE.SMALL .AND. AMAX.LE.LARGE ) THEN
+*
+*        No equilibration
+*
+         EQUED = 'N'
+      ELSE
+*
+*        Replace A by diag(S) * A * diag(S).
+*
+         IF( LSAME( UPLO, 'U' ) ) THEN
+*
+*           Upper triangle of A is stored.
+*
+            JC = 1
+            DO 20 J = 1, N
+               CJ = S( J )
+               DO 10 I = 1, J
+                  AP( JC+I-1 ) = CJ*S( I )*AP( JC+I-1 )
+   10          CONTINUE
+               JC = JC + J
+   20       CONTINUE
+         ELSE
+*
+*           Lower triangle of A is stored.
+*
+            JC = 1
+            DO 40 J = 1, N
+               CJ = S( J )
+               DO 30 I = J, N
+                  AP( JC+I-J ) = CJ*S( I )*AP( JC+I-J )
+   30          CONTINUE
+               JC = JC + N - J + 1
+   40       CONTINUE
+         END IF
+         EQUED = 'Y'
+      END IF
+*
+      RETURN
+*
+*     End of CLAQSP
+*
+      END
diff --git a/costa/native/external/lapack/claqsy.f b/costa/native/external/lapack/claqsy.f
new file mode 100644
index 000000000..e920873c5
--- /dev/null
+++ b/costa/native/external/lapack/claqsy.f
@@ -0,0 +1,143 @@
+      SUBROUTINE CLAQSY( UPLO, N, A, LDA, S, SCOND, AMAX, EQUED )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          EQUED, UPLO
+      INTEGER            LDA, N
+      REAL               AMAX, SCOND
+*     ..
+*     .. Array Arguments ..
+      REAL               S( * )
+      COMPLEX            A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CLAQSY equilibrates a symmetric matrix A using the scaling factors
+*  in the vector S.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the upper or lower triangular part of the
+*          symmetric matrix A is stored.
+*          = 'U':  Upper triangular
+*          = 'L':  Lower triangular
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,N)
+*          On entry, the symmetric matrix A.  If UPLO = 'U', the leading
+*          n by n upper triangular part of A contains the upper
+*          triangular part of the matrix A, and the strictly lower
+*          triangular part of A is not referenced.  If UPLO = 'L', the
+*          leading n by n lower triangular part of A contains the lower
+*          triangular part of the matrix A, and the strictly upper
+*          triangular part of A is not referenced.
+*
+*          On exit, if EQUED = 'Y', the equilibrated matrix:
+*          diag(S) * A * diag(S).
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(N,1).
+*
+*  S       (input) REAL array, dimension (N)
+*          The scale factors for A.
+*
+*  SCOND   (input) REAL
+*          Ratio of the smallest S(i) to the largest S(i).
+*
+*  AMAX    (input) REAL
+*          Absolute value of largest matrix entry.
+*
+*  EQUED   (output) CHARACTER*1
+*          Specifies whether or not equilibration was done.
+*          = 'N':  No equilibration.
+*          = 'Y':  Equilibration was done, i.e., A has been replaced by
+*                  diag(S) * A * diag(S).
+*
+*  Internal Parameters
+*  ===================
+*
+*  THRESH is a threshold value used to decide if scaling should be done
+*  based on the ratio of the scaling factors.  If SCOND < THRESH,
+*  scaling is done.
+*
+*  LARGE and SMALL are threshold values used to decide if scaling should
+*  be done based on the absolute size of the largest matrix element.
+*  If AMAX > LARGE or AMAX < SMALL, scaling is done.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, THRESH
+      PARAMETER          ( ONE = 1.0E+0, THRESH = 0.1E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J
+      REAL               CJ, LARGE, SMALL
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               SLAMCH
+      EXTERNAL           LSAME, SLAMCH
+*     ..
+*     .. Executable Statements ..
+*
+*     Quick return if possible
+*
+      IF( N.LE.0 ) THEN
+         EQUED = 'N'
+         RETURN
+      END IF
+*
+*     Initialize LARGE and SMALL.
+*
+      SMALL = SLAMCH( 'Safe minimum' ) / SLAMCH( 'Precision' )
+      LARGE = ONE / SMALL
+*
+      IF( SCOND.GE.THRESH .AND. AMAX.GE.SMALL .AND. AMAX.LE.LARGE ) THEN
+*
+*        No equilibration
+*
+         EQUED = 'N'
+      ELSE
+*
+*        Replace A by diag(S) * A * diag(S).
+*
+         IF( LSAME( UPLO, 'U' ) ) THEN
+*
+*           Upper triangle of A is stored.
+*
+            DO 20 J = 1, N
+               CJ = S( J )
+               DO 10 I = 1, J
+                  A( I, J ) = CJ*S( I )*A( I, J )
+   10          CONTINUE
+   20       CONTINUE
+         ELSE
+*
+*           Lower triangle of A is stored.
+*
+            DO 40 J = 1, N
+               CJ = S( J )
+               DO 30 I = J, N
+                  A( I, J ) = CJ*S( I )*A( I, J )
+   30          CONTINUE
+   40       CONTINUE
+         END IF
+         EQUED = 'Y'
+      END IF
+*
+      RETURN
+*
+*     End of CLAQSY
+*
+      END
diff --git a/costa/native/external/lapack/clar1v.f b/costa/native/external/lapack/clar1v.f
new file mode 100644
index 000000000..558a4dade
--- /dev/null
+++ b/costa/native/external/lapack/clar1v.f
@@ -0,0 +1,328 @@
+      SUBROUTINE CLAR1V( N, B1, BN, SIGMA, D, L, LD, LLD, GERSCH, Z,
+     $                   ZTZ, MINGMA, R, ISUPPZ, WORK )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            B1, BN, N, R
+      REAL               MINGMA, SIGMA, ZTZ
+*     ..
+*     .. Array Arguments ..
+      INTEGER            ISUPPZ( * )
+      REAL               D( * ), GERSCH( * ), L( * ), LD( * ), LLD( * ),
+     $                   WORK( * )
+      COMPLEX            Z( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CLAR1V computes the (scaled) r-th column of the inverse of
+*  the sumbmatrix in rows B1 through BN of the tridiagonal matrix
+*  L D L^T - sigma I. The following steps accomplish this computation :
+*  (a) Stationary qd transform,  L D L^T - sigma I = L(+) D(+) L(+)^T,
+*  (b) Progressive qd transform, L D L^T - sigma I = U(-) D(-) U(-)^T,
+*  (c) Computation of the diagonal elements of the inverse of
+*      L D L^T - sigma I by combining the above transforms, and choosing
+*      r as the index where the diagonal of the inverse is (one of the)
+*      largest in magnitude.
+*  (d) Computation of the (scaled) r-th column of the inverse using the
+*      twisted factorization obtained by combining the top part of the
+*      the stationary and the bottom part of the progressive transform.
+*
+*  Arguments
+*  =========
+*
+*  N        (input) INTEGER
+*           The order of the matrix L D L^T.
+*
+*  B1       (input) INTEGER
+*           First index of the submatrix of L D L^T.
+*
+*  BN       (input) INTEGER
+*           Last index of the submatrix of L D L^T.
+*
+*  SIGMA    (input) REAL
+*           The shift. Initially, when R = 0, SIGMA should be a good
+*           approximation to an eigenvalue of L D L^T.
+*
+*  L        (input) REAL array, dimension (N-1)
+*           The (n-1) subdiagonal elements of the unit bidiagonal matrix
+*           L, in elements 1 to N-1.
+*
+*  D        (input) REAL array, dimension (N)
+*           The n diagonal elements of the diagonal matrix D.
+*
+*  LD       (input) REAL array, dimension (N-1)
+*           The n-1 elements L(i)*D(i).
+*
+*  LLD      (input) REAL array, dimension (N-1)
+*           The n-1 elements L(i)*L(i)*D(i).
+*
+*  GERSCH   (input) REAL array, dimension (2*N)
+*           The n Gerschgorin intervals. These are used to restrict
+*           the initial search for R, when R is input as 0.
+*
+*  Z        (output) COMPLEX array, dimension (N)
+*           The (scaled) r-th column of the inverse. Z(R) is returned
+*           to be 1.
+*
+*  ZTZ      (output) REAL
+*           The square of the norm of Z.
+*
+*  MINGMA   (output) REAL
+*           The reciprocal of the largest (in magnitude) diagonal
+*           element of the inverse of L D L^T - sigma I.
+*
+*  R        (input/output) INTEGER
+*           Initially, R should be input to be 0 and is then output as
+*           the index where the diagonal element of the inverse is
+*           largest in magnitude. In later iterations, this same value
+*           of R should be input.
+*
+*  ISUPPZ   (output) INTEGER array, dimension (2)
+*           The support of the vector in Z, i.e., the vector Z is
+*           nonzero only in elements ISUPPZ(1) through ISUPPZ( 2 ).
+*
+*  WORK     (workspace) REAL array, dimension (4*N)
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Inderjit Dhillon, IBM Almaden, USA
+*     Osni Marques, LBNL/NERSC, USA
+*     Ken Stanley, Computer Science Division, University of
+*       California at Berkeley, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            BLKSIZ
+      PARAMETER          ( BLKSIZ = 32 )
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E0, ONE = 1.0E0 )
+      COMPLEX            CONE
+      PARAMETER          ( CONE = ( 1.0E0, 0.0E0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            SAWNAN
+      INTEGER            FROM, I, INDP, INDS, INDUMN, J, R1, R2, TO
+      REAL               DMINUS, DPLUS, EPS, S, TMP
+*     ..
+*     .. External Functions ..
+      REAL               SLAMCH
+      EXTERNAL           SLAMCH
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN, REAL
+*     ..
+*     .. Executable Statements ..
+*
+      EPS = SLAMCH( 'Precision' )
+      IF( R.EQ.0 ) THEN
+*
+*        Eliminate the top and bottom indices from the possible values
+*        of R where the desired eigenvector is largest in magnitude.
+*
+         R1 = B1
+         DO 10 I = B1, BN
+            IF( SIGMA.GE.GERSCH( 2*I-1 ) .OR. SIGMA.LE.GERSCH( 2*I ) )
+     $           THEN
+               R1 = I
+               GO TO 20
+            END IF
+   10    CONTINUE
+   20    CONTINUE
+         R2 = BN
+         DO 30 I = BN, B1, -1
+            IF( SIGMA.GE.GERSCH( 2*I-1 ) .OR. SIGMA.LE.GERSCH( 2*I ) )
+     $           THEN
+               R2 = I
+               GO TO 40
+            END IF
+   30    CONTINUE
+   40    CONTINUE
+      ELSE
+         R1 = R
+         R2 = R
+      END IF
+*
+      INDUMN = N
+      INDS = 2*N + 1
+      INDP = 3*N + 1
+      SAWNAN = .FALSE.
+*
+*     Compute the stationary transform (using the differential form)
+*     untill the index R2
+*
+      IF( B1.EQ.1 ) THEN
+         WORK( INDS ) = ZERO
+      ELSE
+         WORK( INDS ) = LLD( B1-1 )
+      END IF
+      S = WORK( INDS ) - SIGMA
+      DO 50 I = B1, R2 - 1
+         DPLUS = D( I ) + S
+         WORK( I ) = LD( I ) / DPLUS
+         WORK( INDS+I ) = S*WORK( I )*L( I )
+         S = WORK( INDS+I ) - SIGMA
+   50 CONTINUE
+*
+      IF( .NOT.( S.GT.ZERO .OR. S.LT.ONE ) ) THEN
+*
+*        Run a slower version of the above loop if a NaN is detected
+*
+         SAWNAN = .TRUE.
+         J = B1 + 1
+   60    CONTINUE
+         IF( WORK( INDS+J ).GT.ZERO .OR. WORK( INDS+J ).LT.ONE ) THEN
+            J = J + 1
+            GO TO 60
+         END IF
+         WORK( INDS+J ) = LLD( J )
+         S = WORK( INDS+J ) - SIGMA
+         DO 70 I = J + 1, R2 - 1
+            DPLUS = D( I ) + S
+            WORK( I ) = LD( I ) / DPLUS
+            IF( WORK( I ).EQ.ZERO ) THEN
+               WORK( INDS+I ) = LLD( I )
+            ELSE
+               WORK( INDS+I ) = S*WORK( I )*L( I )
+            END IF
+            S = WORK( INDS+I ) - SIGMA
+   70    CONTINUE
+      END IF
+      WORK( INDP+BN-1 ) = D( BN ) - SIGMA
+      DO 80 I = BN - 1, R1, -1
+         DMINUS = LLD( I ) + WORK( INDP+I )
+         TMP = D( I ) / DMINUS
+         WORK( INDUMN+I ) = L( I )*TMP
+         WORK( INDP+I-1 ) = WORK( INDP+I )*TMP - SIGMA
+   80 CONTINUE
+      TMP = WORK( INDP+R1-1 )
+      IF( .NOT.( TMP.GT.ZERO .OR. TMP.LT.ONE ) ) THEN
+*
+*        Run a slower version of the above loop if a NaN is detected
+*
+         SAWNAN = .TRUE.
+         J = BN - 3
+   90    CONTINUE
+         IF( WORK( INDP+J ).GT.ZERO .OR. WORK( INDP+J ).LT.ONE ) THEN
+            J = J - 1
+            GO TO 90
+         END IF
+         WORK( INDP+J ) = D( J+1 ) - SIGMA
+         DO 100 I = J, R1, -1
+            DMINUS = LLD( I ) + WORK( INDP+I )
+            TMP = D( I ) / DMINUS
+            WORK( INDUMN+I ) = L( I )*TMP
+            IF( TMP.EQ.ZERO ) THEN
+               WORK( INDP+I-1 ) = D( I ) - SIGMA
+            ELSE
+               WORK( INDP+I-1 ) = WORK( INDP+I )*TMP - SIGMA
+            END IF
+  100    CONTINUE
+      END IF
+*
+*     Find the index (from R1 to R2) of the largest (in magnitude)
+*     diagonal element of the inverse
+*
+      MINGMA = WORK( INDS+R1-1 ) + WORK( INDP+R1-1 )
+      IF( MINGMA.EQ.ZERO )
+     $   MINGMA = EPS*WORK( INDS+R1-1 )
+      R = R1
+      DO 110 I = R1, R2 - 1
+         TMP = WORK( INDS+I ) + WORK( INDP+I )
+         IF( TMP.EQ.ZERO )
+     $      TMP = EPS*WORK( INDS+I )
+         IF( ABS( TMP ).LT.ABS( MINGMA ) ) THEN
+            MINGMA = TMP
+            R = I + 1
+         END IF
+  110 CONTINUE
+*
+*     Compute the (scaled) r-th column of the inverse
+*
+      ISUPPZ( 1 ) = B1
+      ISUPPZ( 2 ) = BN
+      Z( R ) = CONE
+      ZTZ = ONE
+      IF( .NOT.SAWNAN ) THEN
+         FROM = R - 1
+         TO = MAX( R-BLKSIZ, B1 )
+  120    CONTINUE
+         IF( FROM.GE.B1 ) THEN
+            DO 130 I = FROM, TO, -1
+               Z( I ) = -( WORK( I )*Z( I+1 ) )
+               ZTZ = ZTZ + REAL( Z( I )*Z( I ) )
+  130       CONTINUE
+            IF( ABS( Z( TO ) ).LE.EPS .AND. ABS( Z( TO+1 ) ).LE.EPS )
+     $           THEN
+               ISUPPZ( 1 ) = TO + 2
+            ELSE
+               FROM = TO - 1
+               TO = MAX( TO-BLKSIZ, B1 )
+               GO TO 120
+            END IF
+         END IF
+         FROM = R + 1
+         TO = MIN( R+BLKSIZ, BN )
+  140    CONTINUE
+         IF( FROM.LE.BN ) THEN
+            DO 150 I = FROM, TO
+               Z( I ) = -( WORK( INDUMN+I-1 )*Z( I-1 ) )
+               ZTZ = ZTZ + REAL( Z( I )*Z( I ) )
+  150       CONTINUE
+            IF( ABS( Z( TO ) ).LE.EPS .AND. ABS( Z( TO-1 ) ).LE.EPS )
+     $           THEN
+               ISUPPZ( 2 ) = TO - 2
+            ELSE
+               FROM = TO + 1
+               TO = MIN( TO+BLKSIZ, BN )
+               GO TO 140
+            END IF
+         END IF
+      ELSE
+         DO 160 I = R - 1, B1, -1
+            IF( Z( I+1 ).EQ.ZERO ) THEN
+               Z( I ) = -( LD( I+1 ) / LD( I ) )*Z( I+2 )
+            ELSE IF( ABS( Z( I+1 ) ).LE.EPS .AND. ABS( Z( I+2 ) ).LE.
+     $               EPS ) THEN
+               ISUPPZ( 1 ) = I + 3
+               GO TO 170
+            ELSE
+               Z( I ) = -( WORK( I )*Z( I+1 ) )
+            END IF
+            ZTZ = ZTZ + REAL( Z( I )*Z( I ) )
+  160    CONTINUE
+  170    CONTINUE
+         DO 180 I = R, BN - 1
+            IF( Z( I ).EQ.ZERO ) THEN
+               Z( I+1 ) = -( LD( I-1 ) / LD( I ) )*Z( I-1 )
+            ELSE IF( ABS( Z( I ) ).LE.EPS .AND. ABS( Z( I-1 ) ).LE.EPS )
+     $                THEN
+               ISUPPZ( 2 ) = I - 2
+               GO TO 190
+            ELSE
+               Z( I+1 ) = -( WORK( INDUMN+I )*Z( I ) )
+            END IF
+            ZTZ = ZTZ + REAL( Z( I+1 )*Z( I+1 ) )
+  180    CONTINUE
+  190    CONTINUE
+      END IF
+      DO 200 I = B1, ISUPPZ( 1 ) - 3
+         Z( I ) = ZERO
+  200 CONTINUE
+      DO 210 I = ISUPPZ( 2 ) + 3, BN
+         Z( I ) = ZERO
+  210 CONTINUE
+*
+      RETURN
+*
+*     End of CLAR1V
+*
+      END
diff --git a/costa/native/external/lapack/clar2v.f b/costa/native/external/lapack/clar2v.f
new file mode 100644
index 000000000..e883e37ce
--- /dev/null
+++ b/costa/native/external/lapack/clar2v.f
@@ -0,0 +1,98 @@
+      SUBROUTINE CLAR2V( N, X, Y, Z, INCX, C, S, INCC )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      INTEGER            INCC, INCX, N
+*     ..
+*     .. Array Arguments ..
+      REAL               C( * )
+      COMPLEX            S( * ), X( * ), Y( * ), Z( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CLAR2V applies a vector of complex plane rotations with real cosines
+*  from both sides to a sequence of 2-by-2 complex Hermitian matrices,
+*  defined by the elements of the vectors x, y and z. For i = 1,2,...,n
+*
+*     (       x(i)  z(i) ) :=
+*     ( conjg(z(i)) y(i) )
+*
+*       (  c(i) conjg(s(i)) ) (       x(i)  z(i) ) ( c(i) -conjg(s(i)) )
+*       ( -s(i)       c(i)  ) ( conjg(z(i)) y(i) ) ( s(i)        c(i)  )
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The number of plane rotations to be applied.
+*
+*  X       (input/output) COMPLEX array, dimension (1+(N-1)*INCX)
+*          The vector x; the elements of x are assumed to be real.
+*
+*  Y       (input/output) COMPLEX array, dimension (1+(N-1)*INCX)
+*          The vector y; the elements of y are assumed to be real.
+*
+*  Z       (input/output) COMPLEX array, dimension (1+(N-1)*INCX)
+*          The vector z.
+*
+*  INCX    (input) INTEGER
+*          The increment between elements of X, Y and Z. INCX > 0.
+*
+*  C       (input) REAL array, dimension (1+(N-1)*INCC)
+*          The cosines of the plane rotations.
+*
+*  S       (input) COMPLEX array, dimension (1+(N-1)*INCC)
+*          The sines of the plane rotations.
+*
+*  INCC    (input) INTEGER
+*          The increment between elements of C and S. INCC > 0.
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      INTEGER            I, IC, IX
+      REAL               CI, SII, SIR, T1I, T1R, T5, T6, XI, YI, ZII,
+     $                   ZIR
+      COMPLEX            SI, T2, T3, T4, ZI
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          AIMAG, CMPLX, CONJG, REAL
+*     ..
+*     .. Executable Statements ..
+*
+      IX = 1
+      IC = 1
+      DO 10 I = 1, N
+         XI = REAL( X( IX ) )
+         YI = REAL( Y( IX ) )
+         ZI = Z( IX )
+         ZIR = REAL( ZI )
+         ZII = AIMAG( ZI )
+         CI = C( IC )
+         SI = S( IC )
+         SIR = REAL( SI )
+         SII = AIMAG( SI )
+         T1R = SIR*ZIR - SII*ZII
+         T1I = SIR*ZII + SII*ZIR
+         T2 = CI*ZI
+         T3 = T2 - CONJG( SI )*XI
+         T4 = CONJG( T2 ) + SI*YI
+         T5 = CI*XI + T1R
+         T6 = CI*YI - T1R
+         X( IX ) = CI*T5 + ( SIR*REAL( T4 )+SII*AIMAG( T4 ) )
+         Y( IX ) = CI*T6 - ( SIR*REAL( T3 )-SII*AIMAG( T3 ) )
+         Z( IX ) = CI*T3 + CONJG( SI )*CMPLX( T6, T1I )
+         IX = IX + INCX
+         IC = IC + INCC
+   10 CONTINUE
+      RETURN
+*
+*     End of CLAR2V
+*
+      END
diff --git a/costa/native/external/lapack/clarcm.f b/costa/native/external/lapack/clarcm.f
new file mode 100644
index 000000000..d8b197856
--- /dev/null
+++ b/costa/native/external/lapack/clarcm.f
@@ -0,0 +1,111 @@
+      SUBROUTINE CLARCM( M, N, A, LDA, B, LDB, C, LDC, RWORK )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            LDA, LDB, LDC, M, N
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), RWORK( * )
+      COMPLEX            B( LDB, * ), C( LDC, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CLARCM performs a very simple matrix-matrix multiplication:
+*           C := A * B,
+*  where A is M by M and real; B is M by N and complex;
+*  C is M by N and complex.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A and of the matrix C.
+*          M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns and rows of the matrix B and
+*          the number of columns of the matrix C.
+*          N >= 0.
+*
+*  A       (input) REAL array, dimension (LDA, M)
+*          A contains the M by M matrix A.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >=max(1,M).
+*
+*  B       (input) REAL array, dimension (LDB, N)
+*          B contains the M by N matrix B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B. LDB >=max(1,M).
+*
+*  C       (input) COMPLEX array, dimension (LDC, N)
+*          C contains the M by N matrix C.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C. LDC >=max(1,M).
+*
+*  RWORK   (workspace) REAL array, dimension (2*M*N)
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E0, ZERO = 0.0E0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J, L
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          AIMAG, CMPLX, REAL
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SGEMM
+*     ..
+*     .. Executable Statements ..
+*
+*     Quick return if possible.
+*
+      IF( ( M.EQ.0 ) .OR. ( N.EQ.0 ) )
+     $   RETURN
+*
+      DO 20 J = 1, N
+         DO 10 I = 1, M
+            RWORK( ( J-1 )*M+I ) = REAL( B( I, J ) )
+   10    CONTINUE
+   20 CONTINUE
+*
+      L = M*N + 1
+      CALL SGEMM( 'N', 'N', M, N, M, ONE, A, LDA, RWORK, M, ZERO,
+     $            RWORK( L ), M )
+      DO 40 J = 1, N
+         DO 30 I = 1, M
+            C( I, J ) = RWORK( L+( J-1 )*M+I-1 )
+   30    CONTINUE
+   40 CONTINUE
+*
+      DO 60 J = 1, N
+         DO 50 I = 1, M
+            RWORK( ( J-1 )*M+I ) = AIMAG( B( I, J ) )
+   50    CONTINUE
+   60 CONTINUE
+      CALL SGEMM( 'N', 'N', M, N, M, ONE, A, LDA, RWORK, M, ZERO,
+     $            RWORK( L ), M )
+      DO 80 J = 1, N
+         DO 70 I = 1, M
+            C( I, J ) = CMPLX( REAL( C( I, J ) ),
+     $                  RWORK( L+( J-1 )*M+I-1 ) )
+   70    CONTINUE
+   80 CONTINUE
+*
+      RETURN
+*
+*     End of CLARCM
+*
+      END
diff --git a/costa/native/external/lapack/clarf.f b/costa/native/external/lapack/clarf.f
new file mode 100644
index 000000000..560d00867
--- /dev/null
+++ b/costa/native/external/lapack/clarf.f
@@ -0,0 +1,121 @@
+      SUBROUTINE CLARF( SIDE, M, N, V, INCV, TAU, C, LDC, WORK )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          SIDE
+      INTEGER            INCV, LDC, M, N
+      COMPLEX            TAU
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            C( LDC, * ), V( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CLARF applies a complex elementary reflector H to a complex M-by-N
+*  matrix C, from either the left or the right. H is represented in the
+*  form
+*
+*        H = I - tau * v * v'
+*
+*  where tau is a complex scalar and v is a complex vector.
+*
+*  If tau = 0, then H is taken to be the unit matrix.
+*
+*  To apply H' (the conjugate transpose of H), supply conjg(tau) instead
+*  tau.
+*
+*  Arguments
+*  =========
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'L': form  H * C
+*          = 'R': form  C * H
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix C.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix C.
+*
+*  V       (input) COMPLEX array, dimension
+*                     (1 + (M-1)*abs(INCV)) if SIDE = 'L'
+*                  or (1 + (N-1)*abs(INCV)) if SIDE = 'R'
+*          The vector v in the representation of H. V is not used if
+*          TAU = 0.
+*
+*  INCV    (input) INTEGER
+*          The increment between elements of v. INCV <> 0.
+*
+*  TAU     (input) COMPLEX
+*          The value tau in the representation of H.
+*
+*  C       (input/output) COMPLEX array, dimension (LDC,N)
+*          On entry, the M-by-N matrix C.
+*          On exit, C is overwritten by the matrix H * C if SIDE = 'L',
+*          or C * H if SIDE = 'R'.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C. LDC >= max(1,M).
+*
+*  WORK    (workspace) COMPLEX array, dimension
+*                         (N) if SIDE = 'L'
+*                      or (M) if SIDE = 'R'
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX            ONE, ZERO
+      PARAMETER          ( ONE = ( 1.0E+0, 0.0E+0 ),
+     $                   ZERO = ( 0.0E+0, 0.0E+0 ) )
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CGEMV, CGERC
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. Executable Statements ..
+*
+      IF( LSAME( SIDE, 'L' ) ) THEN
+*
+*        Form  H * C
+*
+         IF( TAU.NE.ZERO ) THEN
+*
+*           w := C' * v
+*
+            CALL CGEMV( 'Conjugate transpose', M, N, ONE, C, LDC, V,
+     $                  INCV, ZERO, WORK, 1 )
+*
+*           C := C - v * w'
+*
+            CALL CGERC( M, N, -TAU, V, INCV, WORK, 1, C, LDC )
+         END IF
+      ELSE
+*
+*        Form  C * H
+*
+         IF( TAU.NE.ZERO ) THEN
+*
+*           w := C * v
+*
+            CALL CGEMV( 'No transpose', M, N, ONE, C, LDC, V, INCV,
+     $                  ZERO, WORK, 1 )
+*
+*           C := C - w * v'
+*
+            CALL CGERC( M, N, -TAU, WORK, 1, V, INCV, C, LDC )
+         END IF
+      END IF
+      RETURN
+*
+*     End of CLARF
+*
+      END
diff --git a/costa/native/external/lapack/clarfb.f b/costa/native/external/lapack/clarfb.f
new file mode 100644
index 000000000..0f219a17c
--- /dev/null
+++ b/costa/native/external/lapack/clarfb.f
@@ -0,0 +1,609 @@
+      SUBROUTINE CLARFB( SIDE, TRANS, DIRECT, STOREV, M, N, K, V, LDV,
+     $                   T, LDT, C, LDC, WORK, LDWORK )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIRECT, SIDE, STOREV, TRANS
+      INTEGER            K, LDC, LDT, LDV, LDWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            C( LDC, * ), T( LDT, * ), V( LDV, * ),
+     $                   WORK( LDWORK, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CLARFB applies a complex block reflector H or its transpose H' to a
+*  complex M-by-N matrix C, from either the left or the right.
+*
+*  Arguments
+*  =========
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'L': apply H or H' from the Left
+*          = 'R': apply H or H' from the Right
+*
+*  TRANS   (input) CHARACTER*1
+*          = 'N': apply H (No transpose)
+*          = 'C': apply H' (Conjugate transpose)
+*
+*  DIRECT  (input) CHARACTER*1
+*          Indicates how H is formed from a product of elementary
+*          reflectors
+*          = 'F': H = H(1) H(2) . . . H(k) (Forward)
+*          = 'B': H = H(k) . . . H(2) H(1) (Backward)
+*
+*  STOREV  (input) CHARACTER*1
+*          Indicates how the vectors which define the elementary
+*          reflectors are stored:
+*          = 'C': Columnwise
+*          = 'R': Rowwise
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix C.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix C.
+*
+*  K       (input) INTEGER
+*          The order of the matrix T (= the number of elementary
+*          reflectors whose product defines the block reflector).
+*
+*  V       (input) COMPLEX array, dimension
+*                                (LDV,K) if STOREV = 'C'
+*                                (LDV,M) if STOREV = 'R' and SIDE = 'L'
+*                                (LDV,N) if STOREV = 'R' and SIDE = 'R'
+*          The matrix V. See further details.
+*
+*  LDV     (input) INTEGER
+*          The leading dimension of the array V.
+*          If STOREV = 'C' and SIDE = 'L', LDV >= max(1,M);
+*          if STOREV = 'C' and SIDE = 'R', LDV >= max(1,N);
+*          if STOREV = 'R', LDV >= K.
+*
+*  T       (input) COMPLEX array, dimension (LDT,K)
+*          The triangular K-by-K matrix T in the representation of the
+*          block reflector.
+*
+*  LDT     (input) INTEGER
+*          The leading dimension of the array T. LDT >= K.
+*
+*  C       (input/output) COMPLEX array, dimension (LDC,N)
+*          On entry, the M-by-N matrix C.
+*          On exit, C is overwritten by H*C or H'*C or C*H or C*H'.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C. LDC >= max(1,M).
+*
+*  WORK    (workspace) COMPLEX array, dimension (LDWORK,K)
+*
+*  LDWORK  (input) INTEGER
+*          The leading dimension of the array WORK.
+*          If SIDE = 'L', LDWORK >= max(1,N);
+*          if SIDE = 'R', LDWORK >= max(1,M).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX            ONE
+      PARAMETER          ( ONE = ( 1.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      CHARACTER          TRANST
+      INTEGER            I, J
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CCOPY, CGEMM, CLACGV, CTRMM
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          CONJG
+*     ..
+*     .. Executable Statements ..
+*
+*     Quick return if possible
+*
+      IF( M.LE.0 .OR. N.LE.0 )
+     $   RETURN
+*
+      IF( LSAME( TRANS, 'N' ) ) THEN
+         TRANST = 'C'
+      ELSE
+         TRANST = 'N'
+      END IF
+*
+      IF( LSAME( STOREV, 'C' ) ) THEN
+*
+         IF( LSAME( DIRECT, 'F' ) ) THEN
+*
+*           Let  V =  ( V1 )    (first K rows)
+*                     ( V2 )
+*           where  V1  is unit lower triangular.
+*
+            IF( LSAME( SIDE, 'L' ) ) THEN
+*
+*              Form  H * C  or  H' * C  where  C = ( C1 )
+*                                                  ( C2 )
+*
+*              W := C' * V  =  (C1'*V1 + C2'*V2)  (stored in WORK)
+*
+*              W := C1'
+*
+               DO 10 J = 1, K
+                  CALL CCOPY( N, C( J, 1 ), LDC, WORK( 1, J ), 1 )
+                  CALL CLACGV( N, WORK( 1, J ), 1 )
+   10          CONTINUE
+*
+*              W := W * V1
+*
+               CALL CTRMM( 'Right', 'Lower', 'No transpose', 'Unit', N,
+     $                     K, ONE, V, LDV, WORK, LDWORK )
+               IF( M.GT.K ) THEN
+*
+*                 W := W + C2'*V2
+*
+                  CALL CGEMM( 'Conjugate transpose', 'No transpose', N,
+     $                        K, M-K, ONE, C( K+1, 1 ), LDC,
+     $                        V( K+1, 1 ), LDV, ONE, WORK, LDWORK )
+               END IF
+*
+*              W := W * T'  or  W * T
+*
+               CALL CTRMM( 'Right', 'Upper', TRANST, 'Non-unit', N, K,
+     $                     ONE, T, LDT, WORK, LDWORK )
+*
+*              C := C - V * W'
+*
+               IF( M.GT.K ) THEN
+*
+*                 C2 := C2 - V2 * W'
+*
+                  CALL CGEMM( 'No transpose', 'Conjugate transpose',
+     $                        M-K, N, K, -ONE, V( K+1, 1 ), LDV, WORK,
+     $                        LDWORK, ONE, C( K+1, 1 ), LDC )
+               END IF
+*
+*              W := W * V1'
+*
+               CALL CTRMM( 'Right', 'Lower', 'Conjugate transpose',
+     $                     'Unit', N, K, ONE, V, LDV, WORK, LDWORK )
+*
+*              C1 := C1 - W'
+*
+               DO 30 J = 1, K
+                  DO 20 I = 1, N
+                     C( J, I ) = C( J, I ) - CONJG( WORK( I, J ) )
+   20             CONTINUE
+   30          CONTINUE
+*
+            ELSE IF( LSAME( SIDE, 'R' ) ) THEN
+*
+*              Form  C * H  or  C * H'  where  C = ( C1  C2 )
+*
+*              W := C * V  =  (C1*V1 + C2*V2)  (stored in WORK)
+*
+*              W := C1
+*
+               DO 40 J = 1, K
+                  CALL CCOPY( M, C( 1, J ), 1, WORK( 1, J ), 1 )
+   40          CONTINUE
+*
+*              W := W * V1
+*
+               CALL CTRMM( 'Right', 'Lower', 'No transpose', 'Unit', M,
+     $                     K, ONE, V, LDV, WORK, LDWORK )
+               IF( N.GT.K ) THEN
+*
+*                 W := W + C2 * V2
+*
+                  CALL CGEMM( 'No transpose', 'No transpose', M, K, N-K,
+     $                        ONE, C( 1, K+1 ), LDC, V( K+1, 1 ), LDV,
+     $                        ONE, WORK, LDWORK )
+               END IF
+*
+*              W := W * T  or  W * T'
+*
+               CALL CTRMM( 'Right', 'Upper', TRANS, 'Non-unit', M, K,
+     $                     ONE, T, LDT, WORK, LDWORK )
+*
+*              C := C - W * V'
+*
+               IF( N.GT.K ) THEN
+*
+*                 C2 := C2 - W * V2'
+*
+                  CALL CGEMM( 'No transpose', 'Conjugate transpose', M,
+     $                        N-K, K, -ONE, WORK, LDWORK, V( K+1, 1 ),
+     $                        LDV, ONE, C( 1, K+1 ), LDC )
+               END IF
+*
+*              W := W * V1'
+*
+               CALL CTRMM( 'Right', 'Lower', 'Conjugate transpose',
+     $                     'Unit', M, K, ONE, V, LDV, WORK, LDWORK )
+*
+*              C1 := C1 - W
+*
+               DO 60 J = 1, K
+                  DO 50 I = 1, M
+                     C( I, J ) = C( I, J ) - WORK( I, J )
+   50             CONTINUE
+   60          CONTINUE
+            END IF
+*
+         ELSE
+*
+*           Let  V =  ( V1 )
+*                     ( V2 )    (last K rows)
+*           where  V2  is unit upper triangular.
+*
+            IF( LSAME( SIDE, 'L' ) ) THEN
+*
+*              Form  H * C  or  H' * C  where  C = ( C1 )
+*                                                  ( C2 )
+*
+*              W := C' * V  =  (C1'*V1 + C2'*V2)  (stored in WORK)
+*
+*              W := C2'
+*
+               DO 70 J = 1, K
+                  CALL CCOPY( N, C( M-K+J, 1 ), LDC, WORK( 1, J ), 1 )
+                  CALL CLACGV( N, WORK( 1, J ), 1 )
+   70          CONTINUE
+*
+*              W := W * V2
+*
+               CALL CTRMM( 'Right', 'Upper', 'No transpose', 'Unit', N,
+     $                     K, ONE, V( M-K+1, 1 ), LDV, WORK, LDWORK )
+               IF( M.GT.K ) THEN
+*
+*                 W := W + C1'*V1
+*
+                  CALL CGEMM( 'Conjugate transpose', 'No transpose', N,
+     $                        K, M-K, ONE, C, LDC, V, LDV, ONE, WORK,
+     $                        LDWORK )
+               END IF
+*
+*              W := W * T'  or  W * T
+*
+               CALL CTRMM( 'Right', 'Lower', TRANST, 'Non-unit', N, K,
+     $                     ONE, T, LDT, WORK, LDWORK )
+*
+*              C := C - V * W'
+*
+               IF( M.GT.K ) THEN
+*
+*                 C1 := C1 - V1 * W'
+*
+                  CALL CGEMM( 'No transpose', 'Conjugate transpose',
+     $                        M-K, N, K, -ONE, V, LDV, WORK, LDWORK,
+     $                        ONE, C, LDC )
+               END IF
+*
+*              W := W * V2'
+*
+               CALL CTRMM( 'Right', 'Upper', 'Conjugate transpose',
+     $                     'Unit', N, K, ONE, V( M-K+1, 1 ), LDV, WORK,
+     $                     LDWORK )
+*
+*              C2 := C2 - W'
+*
+               DO 90 J = 1, K
+                  DO 80 I = 1, N
+                     C( M-K+J, I ) = C( M-K+J, I ) -
+     $                               CONJG( WORK( I, J ) )
+   80             CONTINUE
+   90          CONTINUE
+*
+            ELSE IF( LSAME( SIDE, 'R' ) ) THEN
+*
+*              Form  C * H  or  C * H'  where  C = ( C1  C2 )
+*
+*              W := C * V  =  (C1*V1 + C2*V2)  (stored in WORK)
+*
+*              W := C2
+*
+               DO 100 J = 1, K
+                  CALL CCOPY( M, C( 1, N-K+J ), 1, WORK( 1, J ), 1 )
+  100          CONTINUE
+*
+*              W := W * V2
+*
+               CALL CTRMM( 'Right', 'Upper', 'No transpose', 'Unit', M,
+     $                     K, ONE, V( N-K+1, 1 ), LDV, WORK, LDWORK )
+               IF( N.GT.K ) THEN
+*
+*                 W := W + C1 * V1
+*
+                  CALL CGEMM( 'No transpose', 'No transpose', M, K, N-K,
+     $                        ONE, C, LDC, V, LDV, ONE, WORK, LDWORK )
+               END IF
+*
+*              W := W * T  or  W * T'
+*
+               CALL CTRMM( 'Right', 'Lower', TRANS, 'Non-unit', M, K,
+     $                     ONE, T, LDT, WORK, LDWORK )
+*
+*              C := C - W * V'
+*
+               IF( N.GT.K ) THEN
+*
+*                 C1 := C1 - W * V1'
+*
+                  CALL CGEMM( 'No transpose', 'Conjugate transpose', M,
+     $                        N-K, K, -ONE, WORK, LDWORK, V, LDV, ONE,
+     $                        C, LDC )
+               END IF
+*
+*              W := W * V2'
+*
+               CALL CTRMM( 'Right', 'Upper', 'Conjugate transpose',
+     $                     'Unit', M, K, ONE, V( N-K+1, 1 ), LDV, WORK,
+     $                     LDWORK )
+*
+*              C2 := C2 - W
+*
+               DO 120 J = 1, K
+                  DO 110 I = 1, M
+                     C( I, N-K+J ) = C( I, N-K+J ) - WORK( I, J )
+  110             CONTINUE
+  120          CONTINUE
+            END IF
+         END IF
+*
+      ELSE IF( LSAME( STOREV, 'R' ) ) THEN
+*
+         IF( LSAME( DIRECT, 'F' ) ) THEN
+*
+*           Let  V =  ( V1  V2 )    (V1: first K columns)
+*           where  V1  is unit upper triangular.
+*
+            IF( LSAME( SIDE, 'L' ) ) THEN
+*
+*              Form  H * C  or  H' * C  where  C = ( C1 )
+*                                                  ( C2 )
+*
+*              W := C' * V'  =  (C1'*V1' + C2'*V2') (stored in WORK)
+*
+*              W := C1'
+*
+               DO 130 J = 1, K
+                  CALL CCOPY( N, C( J, 1 ), LDC, WORK( 1, J ), 1 )
+                  CALL CLACGV( N, WORK( 1, J ), 1 )
+  130          CONTINUE
+*
+*              W := W * V1'
+*
+               CALL CTRMM( 'Right', 'Upper', 'Conjugate transpose',
+     $                     'Unit', N, K, ONE, V, LDV, WORK, LDWORK )
+               IF( M.GT.K ) THEN
+*
+*                 W := W + C2'*V2'
+*
+                  CALL CGEMM( 'Conjugate transpose',
+     $                        'Conjugate transpose', N, K, M-K, ONE,
+     $                        C( K+1, 1 ), LDC, V( 1, K+1 ), LDV, ONE,
+     $                        WORK, LDWORK )
+               END IF
+*
+*              W := W * T'  or  W * T
+*
+               CALL CTRMM( 'Right', 'Upper', TRANST, 'Non-unit', N, K,
+     $                     ONE, T, LDT, WORK, LDWORK )
+*
+*              C := C - V' * W'
+*
+               IF( M.GT.K ) THEN
+*
+*                 C2 := C2 - V2' * W'
+*
+                  CALL CGEMM( 'Conjugate transpose',
+     $                        'Conjugate transpose', M-K, N, K, -ONE,
+     $                        V( 1, K+1 ), LDV, WORK, LDWORK, ONE,
+     $                        C( K+1, 1 ), LDC )
+               END IF
+*
+*              W := W * V1
+*
+               CALL CTRMM( 'Right', 'Upper', 'No transpose', 'Unit', N,
+     $                     K, ONE, V, LDV, WORK, LDWORK )
+*
+*              C1 := C1 - W'
+*
+               DO 150 J = 1, K
+                  DO 140 I = 1, N
+                     C( J, I ) = C( J, I ) - CONJG( WORK( I, J ) )
+  140             CONTINUE
+  150          CONTINUE
+*
+            ELSE IF( LSAME( SIDE, 'R' ) ) THEN
+*
+*              Form  C * H  or  C * H'  where  C = ( C1  C2 )
+*
+*              W := C * V'  =  (C1*V1' + C2*V2')  (stored in WORK)
+*
+*              W := C1
+*
+               DO 160 J = 1, K
+                  CALL CCOPY( M, C( 1, J ), 1, WORK( 1, J ), 1 )
+  160          CONTINUE
+*
+*              W := W * V1'
+*
+               CALL CTRMM( 'Right', 'Upper', 'Conjugate transpose',
+     $                     'Unit', M, K, ONE, V, LDV, WORK, LDWORK )
+               IF( N.GT.K ) THEN
+*
+*                 W := W + C2 * V2'
+*
+                  CALL CGEMM( 'No transpose', 'Conjugate transpose', M,
+     $                        K, N-K, ONE, C( 1, K+1 ), LDC,
+     $                        V( 1, K+1 ), LDV, ONE, WORK, LDWORK )
+               END IF
+*
+*              W := W * T  or  W * T'
+*
+               CALL CTRMM( 'Right', 'Upper', TRANS, 'Non-unit', M, K,
+     $                     ONE, T, LDT, WORK, LDWORK )
+*
+*              C := C - W * V
+*
+               IF( N.GT.K ) THEN
+*
+*                 C2 := C2 - W * V2
+*
+                  CALL CGEMM( 'No transpose', 'No transpose', M, N-K, K,
+     $                        -ONE, WORK, LDWORK, V( 1, K+1 ), LDV, ONE,
+     $                        C( 1, K+1 ), LDC )
+               END IF
+*
+*              W := W * V1
+*
+               CALL CTRMM( 'Right', 'Upper', 'No transpose', 'Unit', M,
+     $                     K, ONE, V, LDV, WORK, LDWORK )
+*
+*              C1 := C1 - W
+*
+               DO 180 J = 1, K
+                  DO 170 I = 1, M
+                     C( I, J ) = C( I, J ) - WORK( I, J )
+  170             CONTINUE
+  180          CONTINUE
+*
+            END IF
+*
+         ELSE
+*
+*           Let  V =  ( V1  V2 )    (V2: last K columns)
+*           where  V2  is unit lower triangular.
+*
+            IF( LSAME( SIDE, 'L' ) ) THEN
+*
+*              Form  H * C  or  H' * C  where  C = ( C1 )
+*                                                  ( C2 )
+*
+*              W := C' * V'  =  (C1'*V1' + C2'*V2') (stored in WORK)
+*
+*              W := C2'
+*
+               DO 190 J = 1, K
+                  CALL CCOPY( N, C( M-K+J, 1 ), LDC, WORK( 1, J ), 1 )
+                  CALL CLACGV( N, WORK( 1, J ), 1 )
+  190          CONTINUE
+*
+*              W := W * V2'
+*
+               CALL CTRMM( 'Right', 'Lower', 'Conjugate transpose',
+     $                     'Unit', N, K, ONE, V( 1, M-K+1 ), LDV, WORK,
+     $                     LDWORK )
+               IF( M.GT.K ) THEN
+*
+*                 W := W + C1'*V1'
+*
+                  CALL CGEMM( 'Conjugate transpose',
+     $                        'Conjugate transpose', N, K, M-K, ONE, C,
+     $                        LDC, V, LDV, ONE, WORK, LDWORK )
+               END IF
+*
+*              W := W * T'  or  W * T
+*
+               CALL CTRMM( 'Right', 'Lower', TRANST, 'Non-unit', N, K,
+     $                     ONE, T, LDT, WORK, LDWORK )
+*
+*              C := C - V' * W'
+*
+               IF( M.GT.K ) THEN
+*
+*                 C1 := C1 - V1' * W'
+*
+                  CALL CGEMM( 'Conjugate transpose',
+     $                        'Conjugate transpose', M-K, N, K, -ONE, V,
+     $                        LDV, WORK, LDWORK, ONE, C, LDC )
+               END IF
+*
+*              W := W * V2
+*
+               CALL CTRMM( 'Right', 'Lower', 'No transpose', 'Unit', N,
+     $                     K, ONE, V( 1, M-K+1 ), LDV, WORK, LDWORK )
+*
+*              C2 := C2 - W'
+*
+               DO 210 J = 1, K
+                  DO 200 I = 1, N
+                     C( M-K+J, I ) = C( M-K+J, I ) -
+     $                               CONJG( WORK( I, J ) )
+  200             CONTINUE
+  210          CONTINUE
+*
+            ELSE IF( LSAME( SIDE, 'R' ) ) THEN
+*
+*              Form  C * H  or  C * H'  where  C = ( C1  C2 )
+*
+*              W := C * V'  =  (C1*V1' + C2*V2')  (stored in WORK)
+*
+*              W := C2
+*
+               DO 220 J = 1, K
+                  CALL CCOPY( M, C( 1, N-K+J ), 1, WORK( 1, J ), 1 )
+  220          CONTINUE
+*
+*              W := W * V2'
+*
+               CALL CTRMM( 'Right', 'Lower', 'Conjugate transpose',
+     $                     'Unit', M, K, ONE, V( 1, N-K+1 ), LDV, WORK,
+     $                     LDWORK )
+               IF( N.GT.K ) THEN
+*
+*                 W := W + C1 * V1'
+*
+                  CALL CGEMM( 'No transpose', 'Conjugate transpose', M,
+     $                        K, N-K, ONE, C, LDC, V, LDV, ONE, WORK,
+     $                        LDWORK )
+               END IF
+*
+*              W := W * T  or  W * T'
+*
+               CALL CTRMM( 'Right', 'Lower', TRANS, 'Non-unit', M, K,
+     $                     ONE, T, LDT, WORK, LDWORK )
+*
+*              C := C - W * V
+*
+               IF( N.GT.K ) THEN
+*
+*                 C1 := C1 - W * V1
+*
+                  CALL CGEMM( 'No transpose', 'No transpose', M, N-K, K,
+     $                        -ONE, WORK, LDWORK, V, LDV, ONE, C, LDC )
+               END IF
+*
+*              W := W * V2
+*
+               CALL CTRMM( 'Right', 'Lower', 'No transpose', 'Unit', M,
+     $                     K, ONE, V( 1, N-K+1 ), LDV, WORK, LDWORK )
+*
+*              C1 := C1 - W
+*
+               DO 240 J = 1, K
+                  DO 230 I = 1, M
+                     C( I, N-K+J ) = C( I, N-K+J ) - WORK( I, J )
+  230             CONTINUE
+  240          CONTINUE
+*
+            END IF
+*
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of CLARFB
+*
+      END
diff --git a/costa/native/external/lapack/clarfg.f b/costa/native/external/lapack/clarfg.f
new file mode 100644
index 000000000..5b2568f2a
--- /dev/null
+++ b/costa/native/external/lapack/clarfg.f
@@ -0,0 +1,146 @@
+      SUBROUTINE CLARFG( N, ALPHA, X, INCX, TAU )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      INTEGER            INCX, N
+      COMPLEX            ALPHA, TAU
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CLARFG generates a complex elementary reflector H of order n, such
+*  that
+*
+*        H' * ( alpha ) = ( beta ),   H' * H = I.
+*             (   x   )   (   0  )
+*
+*  where alpha and beta are scalars, with beta real, and x is an
+*  (n-1)-element complex vector. H is represented in the form
+*
+*        H = I - tau * ( 1 ) * ( 1 v' ) ,
+*                      ( v )
+*
+*  where tau is a complex scalar and v is a complex (n-1)-element
+*  vector. Note that H is not hermitian.
+*
+*  If the elements of x are all zero and alpha is real, then tau = 0
+*  and H is taken to be the unit matrix.
+*
+*  Otherwise  1 <= real(tau) <= 2  and  abs(tau-1) <= 1 .
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The order of the elementary reflector.
+*
+*  ALPHA   (input/output) COMPLEX
+*          On entry, the value alpha.
+*          On exit, it is overwritten with the value beta.
+*
+*  X       (input/output) COMPLEX array, dimension
+*                         (1+(N-2)*abs(INCX))
+*          On entry, the vector x.
+*          On exit, it is overwritten with the vector v.
+*
+*  INCX    (input) INTEGER
+*          The increment between elements of X. INCX > 0.
+*
+*  TAU     (output) COMPLEX
+*          The value tau.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            J, KNT
+      REAL               ALPHI, ALPHR, BETA, RSAFMN, SAFMIN, XNORM
+*     ..
+*     .. External Functions ..
+      REAL               SCNRM2, SLAMCH, SLAPY3
+      COMPLEX            CLADIV
+      EXTERNAL           SCNRM2, SLAMCH, SLAPY3, CLADIV
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, AIMAG, CMPLX, REAL, SIGN
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CSCAL, CSSCAL
+*     ..
+*     .. Executable Statements ..
+*
+      IF( N.LE.0 ) THEN
+         TAU = ZERO
+         RETURN
+      END IF
+*
+      XNORM = SCNRM2( N-1, X, INCX )
+      ALPHR = REAL( ALPHA )
+      ALPHI = AIMAG( ALPHA )
+*
+      IF( XNORM.EQ.ZERO .AND. ALPHI.EQ.ZERO ) THEN
+*
+*        H  =  I
+*
+         TAU = ZERO
+      ELSE
+*
+*        general case
+*
+         BETA = -SIGN( SLAPY3( ALPHR, ALPHI, XNORM ), ALPHR )
+         SAFMIN = SLAMCH( 'S' ) / SLAMCH( 'E' )
+         RSAFMN = ONE / SAFMIN
+*
+         IF( ABS( BETA ).LT.SAFMIN ) THEN
+*
+*           XNORM, BETA may be inaccurate; scale X and recompute them
+*
+            KNT = 0
+   10       CONTINUE
+            KNT = KNT + 1
+            CALL CSSCAL( N-1, RSAFMN, X, INCX )
+            BETA = BETA*RSAFMN
+            ALPHI = ALPHI*RSAFMN
+            ALPHR = ALPHR*RSAFMN
+            IF( ABS( BETA ).LT.SAFMIN )
+     $         GO TO 10
+*
+*           New BETA is at most 1, at least SAFMIN
+*
+            XNORM = SCNRM2( N-1, X, INCX )
+            ALPHA = CMPLX( ALPHR, ALPHI )
+            BETA = -SIGN( SLAPY3( ALPHR, ALPHI, XNORM ), ALPHR )
+            TAU = CMPLX( ( BETA-ALPHR ) / BETA, -ALPHI / BETA )
+            ALPHA = CLADIV( CMPLX( ONE ), ALPHA-BETA )
+            CALL CSCAL( N-1, ALPHA, X, INCX )
+*
+*           If ALPHA is subnormal, it may lose relative accuracy
+*
+            ALPHA = BETA
+            DO 20 J = 1, KNT
+               ALPHA = ALPHA*SAFMIN
+   20       CONTINUE
+         ELSE
+            TAU = CMPLX( ( BETA-ALPHR ) / BETA, -ALPHI / BETA )
+            ALPHA = CLADIV( CMPLX( ONE ), ALPHA-BETA )
+            CALL CSCAL( N-1, ALPHA, X, INCX )
+            ALPHA = BETA
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of CLARFG
+*
+      END
diff --git a/costa/native/external/lapack/clarft.f b/costa/native/external/lapack/clarft.f
new file mode 100644
index 000000000..ff8483b17
--- /dev/null
+++ b/costa/native/external/lapack/clarft.f
@@ -0,0 +1,225 @@
+      SUBROUTINE CLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIRECT, STOREV
+      INTEGER            K, LDT, LDV, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            T( LDT, * ), TAU( * ), V( LDV, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CLARFT forms the triangular factor T of a complex block reflector H
+*  of order n, which is defined as a product of k elementary reflectors.
+*
+*  If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular;
+*
+*  If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular.
+*
+*  If STOREV = 'C', the vector which defines the elementary reflector
+*  H(i) is stored in the i-th column of the array V, and
+*
+*     H  =  I - V * T * V'
+*
+*  If STOREV = 'R', the vector which defines the elementary reflector
+*  H(i) is stored in the i-th row of the array V, and
+*
+*     H  =  I - V' * T * V
+*
+*  Arguments
+*  =========
+*
+*  DIRECT  (input) CHARACTER*1
+*          Specifies the order in which the elementary reflectors are
+*          multiplied to form the block reflector:
+*          = 'F': H = H(1) H(2) . . . H(k) (Forward)
+*          = 'B': H = H(k) . . . H(2) H(1) (Backward)
+*
+*  STOREV  (input) CHARACTER*1
+*          Specifies how the vectors which define the elementary
+*          reflectors are stored (see also Further Details):
+*          = 'C': columnwise
+*          = 'R': rowwise
+*
+*  N       (input) INTEGER
+*          The order of the block reflector H. N >= 0.
+*
+*  K       (input) INTEGER
+*          The order of the triangular factor T (= the number of
+*          elementary reflectors). K >= 1.
+*
+*  V       (input/output) COMPLEX array, dimension
+*                               (LDV,K) if STOREV = 'C'
+*                               (LDV,N) if STOREV = 'R'
+*          The matrix V. See further details.
+*
+*  LDV     (input) INTEGER
+*          The leading dimension of the array V.
+*          If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K.
+*
+*  TAU     (input) COMPLEX array, dimension (K)
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i).
+*
+*  T       (output) COMPLEX array, dimension (LDT,K)
+*          The k by k triangular factor T of the block reflector.
+*          If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is
+*          lower triangular. The rest of the array is not used.
+*
+*  LDT     (input) INTEGER
+*          The leading dimension of the array T. LDT >= K.
+*
+*  Further Details
+*  ===============
+*
+*  The shape of the matrix V and the storage of the vectors which define
+*  the H(i) is best illustrated by the following example with n = 5 and
+*  k = 3. The elements equal to 1 are not stored; the corresponding
+*  array elements are modified but restored on exit. The rest of the
+*  array is not used.
+*
+*  DIRECT = 'F' and STOREV = 'C':         DIRECT = 'F' and STOREV = 'R':
+*
+*               V = (  1       )                 V = (  1 v1 v1 v1 v1 )
+*                   ( v1  1    )                     (     1 v2 v2 v2 )
+*                   ( v1 v2  1 )                     (        1 v3 v3 )
+*                   ( v1 v2 v3 )
+*                   ( v1 v2 v3 )
+*
+*  DIRECT = 'B' and STOREV = 'C':         DIRECT = 'B' and STOREV = 'R':
+*
+*               V = ( v1 v2 v3 )                 V = ( v1 v1  1       )
+*                   ( v1 v2 v3 )                     ( v2 v2 v2  1    )
+*                   (  1 v2 v3 )                     ( v3 v3 v3 v3  1 )
+*                   (     1 v3 )
+*                   (        1 )
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX            ONE, ZERO
+      PARAMETER          ( ONE = ( 1.0E+0, 0.0E+0 ),
+     $                   ZERO = ( 0.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J
+      COMPLEX            VII
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CGEMV, CLACGV, CTRMV
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. Executable Statements ..
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      IF( LSAME( DIRECT, 'F' ) ) THEN
+         DO 20 I = 1, K
+            IF( TAU( I ).EQ.ZERO ) THEN
+*
+*              H(i)  =  I
+*
+               DO 10 J = 1, I
+                  T( J, I ) = ZERO
+   10          CONTINUE
+            ELSE
+*
+*              general case
+*
+               VII = V( I, I )
+               V( I, I ) = ONE
+               IF( LSAME( STOREV, 'C' ) ) THEN
+*
+*                 T(1:i-1,i) := - tau(i) * V(i:n,1:i-1)' * V(i:n,i)
+*
+                  CALL CGEMV( 'Conjugate transpose', N-I+1, I-1,
+     $                        -TAU( I ), V( I, 1 ), LDV, V( I, I ), 1,
+     $                        ZERO, T( 1, I ), 1 )
+               ELSE
+*
+*                 T(1:i-1,i) := - tau(i) * V(1:i-1,i:n) * V(i,i:n)'
+*
+                  IF( I.LT.N )
+     $               CALL CLACGV( N-I, V( I, I+1 ), LDV )
+                  CALL CGEMV( 'No transpose', I-1, N-I+1, -TAU( I ),
+     $                        V( 1, I ), LDV, V( I, I ), LDV, ZERO,
+     $                        T( 1, I ), 1 )
+                  IF( I.LT.N )
+     $               CALL CLACGV( N-I, V( I, I+1 ), LDV )
+               END IF
+               V( I, I ) = VII
+*
+*              T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i)
+*
+               CALL CTRMV( 'Upper', 'No transpose', 'Non-unit', I-1, T,
+     $                     LDT, T( 1, I ), 1 )
+               T( I, I ) = TAU( I )
+            END IF
+   20    CONTINUE
+      ELSE
+         DO 40 I = K, 1, -1
+            IF( TAU( I ).EQ.ZERO ) THEN
+*
+*              H(i)  =  I
+*
+               DO 30 J = I, K
+                  T( J, I ) = ZERO
+   30          CONTINUE
+            ELSE
+*
+*              general case
+*
+               IF( I.LT.K ) THEN
+                  IF( LSAME( STOREV, 'C' ) ) THEN
+                     VII = V( N-K+I, I )
+                     V( N-K+I, I ) = ONE
+*
+*                    T(i+1:k,i) :=
+*                            - tau(i) * V(1:n-k+i,i+1:k)' * V(1:n-k+i,i)
+*
+                     CALL CGEMV( 'Conjugate transpose', N-K+I, K-I,
+     $                           -TAU( I ), V( 1, I+1 ), LDV, V( 1, I ),
+     $                           1, ZERO, T( I+1, I ), 1 )
+                     V( N-K+I, I ) = VII
+                  ELSE
+                     VII = V( I, N-K+I )
+                     V( I, N-K+I ) = ONE
+*
+*                    T(i+1:k,i) :=
+*                            - tau(i) * V(i+1:k,1:n-k+i) * V(i,1:n-k+i)'
+*
+                     CALL CLACGV( N-K+I-1, V( I, 1 ), LDV )
+                     CALL CGEMV( 'No transpose', K-I, N-K+I, -TAU( I ),
+     $                           V( I+1, 1 ), LDV, V( I, 1 ), LDV, ZERO,
+     $                           T( I+1, I ), 1 )
+                     CALL CLACGV( N-K+I-1, V( I, 1 ), LDV )
+                     V( I, N-K+I ) = VII
+                  END IF
+*
+*                 T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i)
+*
+                  CALL CTRMV( 'Lower', 'No transpose', 'Non-unit', K-I,
+     $                        T( I+1, I+1 ), LDT, T( I+1, I ), 1 )
+               END IF
+               T( I, I ) = TAU( I )
+            END IF
+   40    CONTINUE
+      END IF
+      RETURN
+*
+*     End of CLARFT
+*
+      END
diff --git a/costa/native/external/lapack/clarfx.f b/costa/native/external/lapack/clarfx.f
new file mode 100644
index 000000000..c2dc07986
--- /dev/null
+++ b/costa/native/external/lapack/clarfx.f
@@ -0,0 +1,641 @@
+      SUBROUTINE CLARFX( SIDE, M, N, V, TAU, C, LDC, WORK )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          SIDE
+      INTEGER            LDC, M, N
+      COMPLEX            TAU
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            C( LDC, * ), V( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CLARFX applies a complex elementary reflector H to a complex m by n
+*  matrix C, from either the left or the right. H is represented in the
+*  form
+*
+*        H = I - tau * v * v'
+*
+*  where tau is a complex scalar and v is a complex vector.
+*
+*  If tau = 0, then H is taken to be the unit matrix
+*
+*  This version uses inline code if H has order < 11.
+*
+*  Arguments
+*  =========
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'L': form  H * C
+*          = 'R': form  C * H
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix C.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix C.
+*
+*  V       (input) COMPLEX array, dimension (M) if SIDE = 'L'
+*                                        or (N) if SIDE = 'R'
+*          The vector v in the representation of H.
+*
+*  TAU     (input) COMPLEX
+*          The value tau in the representation of H.
+*
+*  C       (input/output) COMPLEX array, dimension (LDC,N)
+*          On entry, the m by n matrix C.
+*          On exit, C is overwritten by the matrix H * C if SIDE = 'L',
+*          or C * H if SIDE = 'R'.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C. LDA >= max(1,M).
+*
+*  WORK    (workspace) COMPLEX array, dimension (N) if SIDE = 'L'
+*                                            or (M) if SIDE = 'R'
+*          WORK is not referenced if H has order < 11.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX            ZERO, ONE
+      PARAMETER          ( ZERO = ( 0.0E+0, 0.0E+0 ),
+     $                   ONE = ( 1.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            J
+      COMPLEX            SUM, T1, T10, T2, T3, T4, T5, T6, T7, T8, T9,
+     $                   V1, V10, V2, V3, V4, V5, V6, V7, V8, V9
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CGEMV, CGERC
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          CONJG
+*     ..
+*     .. Executable Statements ..
+*
+      IF( TAU.EQ.ZERO )
+     $   RETURN
+      IF( LSAME( SIDE, 'L' ) ) THEN
+*
+*        Form  H * C, where H has order m.
+*
+         GO TO ( 10, 30, 50, 70, 90, 110, 130, 150,
+     $           170, 190 )M
+*
+*        Code for general M
+*
+*        w := C'*v
+*
+         CALL CGEMV( 'Conjugate transpose', M, N, ONE, C, LDC, V, 1,
+     $               ZERO, WORK, 1 )
+*
+*        C := C - tau * v * w'
+*
+         CALL CGERC( M, N, -TAU, V, 1, WORK, 1, C, LDC )
+         GO TO 410
+   10    CONTINUE
+*
+*        Special code for 1 x 1 Householder
+*
+         T1 = ONE - TAU*V( 1 )*CONJG( V( 1 ) )
+         DO 20 J = 1, N
+            C( 1, J ) = T1*C( 1, J )
+   20    CONTINUE
+         GO TO 410
+   30    CONTINUE
+*
+*        Special code for 2 x 2 Householder
+*
+         V1 = CONJG( V( 1 ) )
+         T1 = TAU*CONJG( V1 )
+         V2 = CONJG( V( 2 ) )
+         T2 = TAU*CONJG( V2 )
+         DO 40 J = 1, N
+            SUM = V1*C( 1, J ) + V2*C( 2, J )
+            C( 1, J ) = C( 1, J ) - SUM*T1
+            C( 2, J ) = C( 2, J ) - SUM*T2
+   40    CONTINUE
+         GO TO 410
+   50    CONTINUE
+*
+*        Special code for 3 x 3 Householder
+*
+         V1 = CONJG( V( 1 ) )
+         T1 = TAU*CONJG( V1 )
+         V2 = CONJG( V( 2 ) )
+         T2 = TAU*CONJG( V2 )
+         V3 = CONJG( V( 3 ) )
+         T3 = TAU*CONJG( V3 )
+         DO 60 J = 1, N
+            SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J )
+            C( 1, J ) = C( 1, J ) - SUM*T1
+            C( 2, J ) = C( 2, J ) - SUM*T2
+            C( 3, J ) = C( 3, J ) - SUM*T3
+   60    CONTINUE
+         GO TO 410
+   70    CONTINUE
+*
+*        Special code for 4 x 4 Householder
+*
+         V1 = CONJG( V( 1 ) )
+         T1 = TAU*CONJG( V1 )
+         V2 = CONJG( V( 2 ) )
+         T2 = TAU*CONJG( V2 )
+         V3 = CONJG( V( 3 ) )
+         T3 = TAU*CONJG( V3 )
+         V4 = CONJG( V( 4 ) )
+         T4 = TAU*CONJG( V4 )
+         DO 80 J = 1, N
+            SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) +
+     $            V4*C( 4, J )
+            C( 1, J ) = C( 1, J ) - SUM*T1
+            C( 2, J ) = C( 2, J ) - SUM*T2
+            C( 3, J ) = C( 3, J ) - SUM*T3
+            C( 4, J ) = C( 4, J ) - SUM*T4
+   80    CONTINUE
+         GO TO 410
+   90    CONTINUE
+*
+*        Special code for 5 x 5 Householder
+*
+         V1 = CONJG( V( 1 ) )
+         T1 = TAU*CONJG( V1 )
+         V2 = CONJG( V( 2 ) )
+         T2 = TAU*CONJG( V2 )
+         V3 = CONJG( V( 3 ) )
+         T3 = TAU*CONJG( V3 )
+         V4 = CONJG( V( 4 ) )
+         T4 = TAU*CONJG( V4 )
+         V5 = CONJG( V( 5 ) )
+         T5 = TAU*CONJG( V5 )
+         DO 100 J = 1, N
+            SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) +
+     $            V4*C( 4, J ) + V5*C( 5, J )
+            C( 1, J ) = C( 1, J ) - SUM*T1
+            C( 2, J ) = C( 2, J ) - SUM*T2
+            C( 3, J ) = C( 3, J ) - SUM*T3
+            C( 4, J ) = C( 4, J ) - SUM*T4
+            C( 5, J ) = C( 5, J ) - SUM*T5
+  100    CONTINUE
+         GO TO 410
+  110    CONTINUE
+*
+*        Special code for 6 x 6 Householder
+*
+         V1 = CONJG( V( 1 ) )
+         T1 = TAU*CONJG( V1 )
+         V2 = CONJG( V( 2 ) )
+         T2 = TAU*CONJG( V2 )
+         V3 = CONJG( V( 3 ) )
+         T3 = TAU*CONJG( V3 )
+         V4 = CONJG( V( 4 ) )
+         T4 = TAU*CONJG( V4 )
+         V5 = CONJG( V( 5 ) )
+         T5 = TAU*CONJG( V5 )
+         V6 = CONJG( V( 6 ) )
+         T6 = TAU*CONJG( V6 )
+         DO 120 J = 1, N
+            SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) +
+     $            V4*C( 4, J ) + V5*C( 5, J ) + V6*C( 6, J )
+            C( 1, J ) = C( 1, J ) - SUM*T1
+            C( 2, J ) = C( 2, J ) - SUM*T2
+            C( 3, J ) = C( 3, J ) - SUM*T3
+            C( 4, J ) = C( 4, J ) - SUM*T4
+            C( 5, J ) = C( 5, J ) - SUM*T5
+            C( 6, J ) = C( 6, J ) - SUM*T6
+  120    CONTINUE
+         GO TO 410
+  130    CONTINUE
+*
+*        Special code for 7 x 7 Householder
+*
+         V1 = CONJG( V( 1 ) )
+         T1 = TAU*CONJG( V1 )
+         V2 = CONJG( V( 2 ) )
+         T2 = TAU*CONJG( V2 )
+         V3 = CONJG( V( 3 ) )
+         T3 = TAU*CONJG( V3 )
+         V4 = CONJG( V( 4 ) )
+         T4 = TAU*CONJG( V4 )
+         V5 = CONJG( V( 5 ) )
+         T5 = TAU*CONJG( V5 )
+         V6 = CONJG( V( 6 ) )
+         T6 = TAU*CONJG( V6 )
+         V7 = CONJG( V( 7 ) )
+         T7 = TAU*CONJG( V7 )
+         DO 140 J = 1, N
+            SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) +
+     $            V4*C( 4, J ) + V5*C( 5, J ) + V6*C( 6, J ) +
+     $            V7*C( 7, J )
+            C( 1, J ) = C( 1, J ) - SUM*T1
+            C( 2, J ) = C( 2, J ) - SUM*T2
+            C( 3, J ) = C( 3, J ) - SUM*T3
+            C( 4, J ) = C( 4, J ) - SUM*T4
+            C( 5, J ) = C( 5, J ) - SUM*T5
+            C( 6, J ) = C( 6, J ) - SUM*T6
+            C( 7, J ) = C( 7, J ) - SUM*T7
+  140    CONTINUE
+         GO TO 410
+  150    CONTINUE
+*
+*        Special code for 8 x 8 Householder
+*
+         V1 = CONJG( V( 1 ) )
+         T1 = TAU*CONJG( V1 )
+         V2 = CONJG( V( 2 ) )
+         T2 = TAU*CONJG( V2 )
+         V3 = CONJG( V( 3 ) )
+         T3 = TAU*CONJG( V3 )
+         V4 = CONJG( V( 4 ) )
+         T4 = TAU*CONJG( V4 )
+         V5 = CONJG( V( 5 ) )
+         T5 = TAU*CONJG( V5 )
+         V6 = CONJG( V( 6 ) )
+         T6 = TAU*CONJG( V6 )
+         V7 = CONJG( V( 7 ) )
+         T7 = TAU*CONJG( V7 )
+         V8 = CONJG( V( 8 ) )
+         T8 = TAU*CONJG( V8 )
+         DO 160 J = 1, N
+            SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) +
+     $            V4*C( 4, J ) + V5*C( 5, J ) + V6*C( 6, J ) +
+     $            V7*C( 7, J ) + V8*C( 8, J )
+            C( 1, J ) = C( 1, J ) - SUM*T1
+            C( 2, J ) = C( 2, J ) - SUM*T2
+            C( 3, J ) = C( 3, J ) - SUM*T3
+            C( 4, J ) = C( 4, J ) - SUM*T4
+            C( 5, J ) = C( 5, J ) - SUM*T5
+            C( 6, J ) = C( 6, J ) - SUM*T6
+            C( 7, J ) = C( 7, J ) - SUM*T7
+            C( 8, J ) = C( 8, J ) - SUM*T8
+  160    CONTINUE
+         GO TO 410
+  170    CONTINUE
+*
+*        Special code for 9 x 9 Householder
+*
+         V1 = CONJG( V( 1 ) )
+         T1 = TAU*CONJG( V1 )
+         V2 = CONJG( V( 2 ) )
+         T2 = TAU*CONJG( V2 )
+         V3 = CONJG( V( 3 ) )
+         T3 = TAU*CONJG( V3 )
+         V4 = CONJG( V( 4 ) )
+         T4 = TAU*CONJG( V4 )
+         V5 = CONJG( V( 5 ) )
+         T5 = TAU*CONJG( V5 )
+         V6 = CONJG( V( 6 ) )
+         T6 = TAU*CONJG( V6 )
+         V7 = CONJG( V( 7 ) )
+         T7 = TAU*CONJG( V7 )
+         V8 = CONJG( V( 8 ) )
+         T8 = TAU*CONJG( V8 )
+         V9 = CONJG( V( 9 ) )
+         T9 = TAU*CONJG( V9 )
+         DO 180 J = 1, N
+            SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) +
+     $            V4*C( 4, J ) + V5*C( 5, J ) + V6*C( 6, J ) +
+     $            V7*C( 7, J ) + V8*C( 8, J ) + V9*C( 9, J )
+            C( 1, J ) = C( 1, J ) - SUM*T1
+            C( 2, J ) = C( 2, J ) - SUM*T2
+            C( 3, J ) = C( 3, J ) - SUM*T3
+            C( 4, J ) = C( 4, J ) - SUM*T4
+            C( 5, J ) = C( 5, J ) - SUM*T5
+            C( 6, J ) = C( 6, J ) - SUM*T6
+            C( 7, J ) = C( 7, J ) - SUM*T7
+            C( 8, J ) = C( 8, J ) - SUM*T8
+            C( 9, J ) = C( 9, J ) - SUM*T9
+  180    CONTINUE
+         GO TO 410
+  190    CONTINUE
+*
+*        Special code for 10 x 10 Householder
+*
+         V1 = CONJG( V( 1 ) )
+         T1 = TAU*CONJG( V1 )
+         V2 = CONJG( V( 2 ) )
+         T2 = TAU*CONJG( V2 )
+         V3 = CONJG( V( 3 ) )
+         T3 = TAU*CONJG( V3 )
+         V4 = CONJG( V( 4 ) )
+         T4 = TAU*CONJG( V4 )
+         V5 = CONJG( V( 5 ) )
+         T5 = TAU*CONJG( V5 )
+         V6 = CONJG( V( 6 ) )
+         T6 = TAU*CONJG( V6 )
+         V7 = CONJG( V( 7 ) )
+         T7 = TAU*CONJG( V7 )
+         V8 = CONJG( V( 8 ) )
+         T8 = TAU*CONJG( V8 )
+         V9 = CONJG( V( 9 ) )
+         T9 = TAU*CONJG( V9 )
+         V10 = CONJG( V( 10 ) )
+         T10 = TAU*CONJG( V10 )
+         DO 200 J = 1, N
+            SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) +
+     $            V4*C( 4, J ) + V5*C( 5, J ) + V6*C( 6, J ) +
+     $            V7*C( 7, J ) + V8*C( 8, J ) + V9*C( 9, J ) +
+     $            V10*C( 10, J )
+            C( 1, J ) = C( 1, J ) - SUM*T1
+            C( 2, J ) = C( 2, J ) - SUM*T2
+            C( 3, J ) = C( 3, J ) - SUM*T3
+            C( 4, J ) = C( 4, J ) - SUM*T4
+            C( 5, J ) = C( 5, J ) - SUM*T5
+            C( 6, J ) = C( 6, J ) - SUM*T6
+            C( 7, J ) = C( 7, J ) - SUM*T7
+            C( 8, J ) = C( 8, J ) - SUM*T8
+            C( 9, J ) = C( 9, J ) - SUM*T9
+            C( 10, J ) = C( 10, J ) - SUM*T10
+  200    CONTINUE
+         GO TO 410
+      ELSE
+*
+*        Form  C * H, where H has order n.
+*
+         GO TO ( 210, 230, 250, 270, 290, 310, 330, 350,
+     $           370, 390 )N
+*
+*        Code for general N
+*
+*        w := C * v
+*
+         CALL CGEMV( 'No transpose', M, N, ONE, C, LDC, V, 1, ZERO,
+     $               WORK, 1 )
+*
+*        C := C - tau * w * v'
+*
+         CALL CGERC( M, N, -TAU, WORK, 1, V, 1, C, LDC )
+         GO TO 410
+  210    CONTINUE
+*
+*        Special code for 1 x 1 Householder
+*
+         T1 = ONE - TAU*V( 1 )*CONJG( V( 1 ) )
+         DO 220 J = 1, M
+            C( J, 1 ) = T1*C( J, 1 )
+  220    CONTINUE
+         GO TO 410
+  230    CONTINUE
+*
+*        Special code for 2 x 2 Householder
+*
+         V1 = V( 1 )
+         T1 = TAU*CONJG( V1 )
+         V2 = V( 2 )
+         T2 = TAU*CONJG( V2 )
+         DO 240 J = 1, M
+            SUM = V1*C( J, 1 ) + V2*C( J, 2 )
+            C( J, 1 ) = C( J, 1 ) - SUM*T1
+            C( J, 2 ) = C( J, 2 ) - SUM*T2
+  240    CONTINUE
+         GO TO 410
+  250    CONTINUE
+*
+*        Special code for 3 x 3 Householder
+*
+         V1 = V( 1 )
+         T1 = TAU*CONJG( V1 )
+         V2 = V( 2 )
+         T2 = TAU*CONJG( V2 )
+         V3 = V( 3 )
+         T3 = TAU*CONJG( V3 )
+         DO 260 J = 1, M
+            SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 )
+            C( J, 1 ) = C( J, 1 ) - SUM*T1
+            C( J, 2 ) = C( J, 2 ) - SUM*T2
+            C( J, 3 ) = C( J, 3 ) - SUM*T3
+  260    CONTINUE
+         GO TO 410
+  270    CONTINUE
+*
+*        Special code for 4 x 4 Householder
+*
+         V1 = V( 1 )
+         T1 = TAU*CONJG( V1 )
+         V2 = V( 2 )
+         T2 = TAU*CONJG( V2 )
+         V3 = V( 3 )
+         T3 = TAU*CONJG( V3 )
+         V4 = V( 4 )
+         T4 = TAU*CONJG( V4 )
+         DO 280 J = 1, M
+            SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) +
+     $            V4*C( J, 4 )
+            C( J, 1 ) = C( J, 1 ) - SUM*T1
+            C( J, 2 ) = C( J, 2 ) - SUM*T2
+            C( J, 3 ) = C( J, 3 ) - SUM*T3
+            C( J, 4 ) = C( J, 4 ) - SUM*T4
+  280    CONTINUE
+         GO TO 410
+  290    CONTINUE
+*
+*        Special code for 5 x 5 Householder
+*
+         V1 = V( 1 )
+         T1 = TAU*CONJG( V1 )
+         V2 = V( 2 )
+         T2 = TAU*CONJG( V2 )
+         V3 = V( 3 )
+         T3 = TAU*CONJG( V3 )
+         V4 = V( 4 )
+         T4 = TAU*CONJG( V4 )
+         V5 = V( 5 )
+         T5 = TAU*CONJG( V5 )
+         DO 300 J = 1, M
+            SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) +
+     $            V4*C( J, 4 ) + V5*C( J, 5 )
+            C( J, 1 ) = C( J, 1 ) - SUM*T1
+            C( J, 2 ) = C( J, 2 ) - SUM*T2
+            C( J, 3 ) = C( J, 3 ) - SUM*T3
+            C( J, 4 ) = C( J, 4 ) - SUM*T4
+            C( J, 5 ) = C( J, 5 ) - SUM*T5
+  300    CONTINUE
+         GO TO 410
+  310    CONTINUE
+*
+*        Special code for 6 x 6 Householder
+*
+         V1 = V( 1 )
+         T1 = TAU*CONJG( V1 )
+         V2 = V( 2 )
+         T2 = TAU*CONJG( V2 )
+         V3 = V( 3 )
+         T3 = TAU*CONJG( V3 )
+         V4 = V( 4 )
+         T4 = TAU*CONJG( V4 )
+         V5 = V( 5 )
+         T5 = TAU*CONJG( V5 )
+         V6 = V( 6 )
+         T6 = TAU*CONJG( V6 )
+         DO 320 J = 1, M
+            SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) +
+     $            V4*C( J, 4 ) + V5*C( J, 5 ) + V6*C( J, 6 )
+            C( J, 1 ) = C( J, 1 ) - SUM*T1
+            C( J, 2 ) = C( J, 2 ) - SUM*T2
+            C( J, 3 ) = C( J, 3 ) - SUM*T3
+            C( J, 4 ) = C( J, 4 ) - SUM*T4
+            C( J, 5 ) = C( J, 5 ) - SUM*T5
+            C( J, 6 ) = C( J, 6 ) - SUM*T6
+  320    CONTINUE
+         GO TO 410
+  330    CONTINUE
+*
+*        Special code for 7 x 7 Householder
+*
+         V1 = V( 1 )
+         T1 = TAU*CONJG( V1 )
+         V2 = V( 2 )
+         T2 = TAU*CONJG( V2 )
+         V3 = V( 3 )
+         T3 = TAU*CONJG( V3 )
+         V4 = V( 4 )
+         T4 = TAU*CONJG( V4 )
+         V5 = V( 5 )
+         T5 = TAU*CONJG( V5 )
+         V6 = V( 6 )
+         T6 = TAU*CONJG( V6 )
+         V7 = V( 7 )
+         T7 = TAU*CONJG( V7 )
+         DO 340 J = 1, M
+            SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) +
+     $            V4*C( J, 4 ) + V5*C( J, 5 ) + V6*C( J, 6 ) +
+     $            V7*C( J, 7 )
+            C( J, 1 ) = C( J, 1 ) - SUM*T1
+            C( J, 2 ) = C( J, 2 ) - SUM*T2
+            C( J, 3 ) = C( J, 3 ) - SUM*T3
+            C( J, 4 ) = C( J, 4 ) - SUM*T4
+            C( J, 5 ) = C( J, 5 ) - SUM*T5
+            C( J, 6 ) = C( J, 6 ) - SUM*T6
+            C( J, 7 ) = C( J, 7 ) - SUM*T7
+  340    CONTINUE
+         GO TO 410
+  350    CONTINUE
+*
+*        Special code for 8 x 8 Householder
+*
+         V1 = V( 1 )
+         T1 = TAU*CONJG( V1 )
+         V2 = V( 2 )
+         T2 = TAU*CONJG( V2 )
+         V3 = V( 3 )
+         T3 = TAU*CONJG( V3 )
+         V4 = V( 4 )
+         T4 = TAU*CONJG( V4 )
+         V5 = V( 5 )
+         T5 = TAU*CONJG( V5 )
+         V6 = V( 6 )
+         T6 = TAU*CONJG( V6 )
+         V7 = V( 7 )
+         T7 = TAU*CONJG( V7 )
+         V8 = V( 8 )
+         T8 = TAU*CONJG( V8 )
+         DO 360 J = 1, M
+            SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) +
+     $            V4*C( J, 4 ) + V5*C( J, 5 ) + V6*C( J, 6 ) +
+     $            V7*C( J, 7 ) + V8*C( J, 8 )
+            C( J, 1 ) = C( J, 1 ) - SUM*T1
+            C( J, 2 ) = C( J, 2 ) - SUM*T2
+            C( J, 3 ) = C( J, 3 ) - SUM*T3
+            C( J, 4 ) = C( J, 4 ) - SUM*T4
+            C( J, 5 ) = C( J, 5 ) - SUM*T5
+            C( J, 6 ) = C( J, 6 ) - SUM*T6
+            C( J, 7 ) = C( J, 7 ) - SUM*T7
+            C( J, 8 ) = C( J, 8 ) - SUM*T8
+  360    CONTINUE
+         GO TO 410
+  370    CONTINUE
+*
+*        Special code for 9 x 9 Householder
+*
+         V1 = V( 1 )
+         T1 = TAU*CONJG( V1 )
+         V2 = V( 2 )
+         T2 = TAU*CONJG( V2 )
+         V3 = V( 3 )
+         T3 = TAU*CONJG( V3 )
+         V4 = V( 4 )
+         T4 = TAU*CONJG( V4 )
+         V5 = V( 5 )
+         T5 = TAU*CONJG( V5 )
+         V6 = V( 6 )
+         T6 = TAU*CONJG( V6 )
+         V7 = V( 7 )
+         T7 = TAU*CONJG( V7 )
+         V8 = V( 8 )
+         T8 = TAU*CONJG( V8 )
+         V9 = V( 9 )
+         T9 = TAU*CONJG( V9 )
+         DO 380 J = 1, M
+            SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) +
+     $            V4*C( J, 4 ) + V5*C( J, 5 ) + V6*C( J, 6 ) +
+     $            V7*C( J, 7 ) + V8*C( J, 8 ) + V9*C( J, 9 )
+            C( J, 1 ) = C( J, 1 ) - SUM*T1
+            C( J, 2 ) = C( J, 2 ) - SUM*T2
+            C( J, 3 ) = C( J, 3 ) - SUM*T3
+            C( J, 4 ) = C( J, 4 ) - SUM*T4
+            C( J, 5 ) = C( J, 5 ) - SUM*T5
+            C( J, 6 ) = C( J, 6 ) - SUM*T6
+            C( J, 7 ) = C( J, 7 ) - SUM*T7
+            C( J, 8 ) = C( J, 8 ) - SUM*T8
+            C( J, 9 ) = C( J, 9 ) - SUM*T9
+  380    CONTINUE
+         GO TO 410
+  390    CONTINUE
+*
+*        Special code for 10 x 10 Householder
+*
+         V1 = V( 1 )
+         T1 = TAU*CONJG( V1 )
+         V2 = V( 2 )
+         T2 = TAU*CONJG( V2 )
+         V3 = V( 3 )
+         T3 = TAU*CONJG( V3 )
+         V4 = V( 4 )
+         T4 = TAU*CONJG( V4 )
+         V5 = V( 5 )
+         T5 = TAU*CONJG( V5 )
+         V6 = V( 6 )
+         T6 = TAU*CONJG( V6 )
+         V7 = V( 7 )
+         T7 = TAU*CONJG( V7 )
+         V8 = V( 8 )
+         T8 = TAU*CONJG( V8 )
+         V9 = V( 9 )
+         T9 = TAU*CONJG( V9 )
+         V10 = V( 10 )
+         T10 = TAU*CONJG( V10 )
+         DO 400 J = 1, M
+            SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) +
+     $            V4*C( J, 4 ) + V5*C( J, 5 ) + V6*C( J, 6 ) +
+     $            V7*C( J, 7 ) + V8*C( J, 8 ) + V9*C( J, 9 ) +
+     $            V10*C( J, 10 )
+            C( J, 1 ) = C( J, 1 ) - SUM*T1
+            C( J, 2 ) = C( J, 2 ) - SUM*T2
+            C( J, 3 ) = C( J, 3 ) - SUM*T3
+            C( J, 4 ) = C( J, 4 ) - SUM*T4
+            C( J, 5 ) = C( J, 5 ) - SUM*T5
+            C( J, 6 ) = C( J, 6 ) - SUM*T6
+            C( J, 7 ) = C( J, 7 ) - SUM*T7
+            C( J, 8 ) = C( J, 8 ) - SUM*T8
+            C( J, 9 ) = C( J, 9 ) - SUM*T9
+            C( J, 10 ) = C( J, 10 ) - SUM*T10
+  400    CONTINUE
+         GO TO 410
+      END IF
+  410 RETURN
+*
+*     End of CLARFX
+*
+      END
diff --git a/costa/native/external/lapack/clargv.f b/costa/native/external/lapack/clargv.f
new file mode 100644
index 000000000..478e6e328
--- /dev/null
+++ b/costa/native/external/lapack/clargv.f
@@ -0,0 +1,225 @@
+      SUBROUTINE CLARGV( N, X, INCX, Y, INCY, C, INCC )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INCC, INCX, INCY, N
+*     ..
+*     .. Array Arguments ..
+      REAL               C( * )
+      COMPLEX            X( * ), Y( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CLARGV generates a vector of complex plane rotations with real
+*  cosines, determined by elements of the complex vectors x and y.
+*  For i = 1,2,...,n
+*
+*     (        c(i)   s(i) ) ( x(i) ) = ( r(i) )
+*     ( -conjg(s(i))  c(i) ) ( y(i) ) = (   0  )
+*
+*     where c(i)**2 + ABS(s(i))**2 = 1
+*
+*  The following conventions are used (these are the same as in CLARTG,
+*  but differ from the BLAS1 routine CROTG):
+*     If y(i)=0, then c(i)=1 and s(i)=0.
+*     If x(i)=0, then c(i)=0 and s(i) is chosen so that r(i) is real.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The number of plane rotations to be generated.
+*
+*  X       (input/output) COMPLEX array, dimension (1+(N-1)*INCX)
+*          On entry, the vector x.
+*          On exit, x(i) is overwritten by r(i), for i = 1,...,n.
+*
+*  INCX    (input) INTEGER
+*          The increment between elements of X. INCX > 0.
+*
+*  Y       (input/output) COMPLEX array, dimension (1+(N-1)*INCY)
+*          On entry, the vector y.
+*          On exit, the sines of the plane rotations.
+*
+*  INCY    (input) INTEGER
+*          The increment between elements of Y. INCY > 0.
+*
+*  C       (output) REAL array, dimension (1+(N-1)*INCC)
+*          The cosines of the plane rotations.
+*
+*  INCC    (input) INTEGER
+*          The increment between elements of C. INCC > 0.
+*
+*  Further Details
+*  ======= =======
+*
+*  6-6-96 - Modified with a new algorithm by W. Kahan and J. Demmel
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               TWO, ONE, ZERO
+      PARAMETER          ( TWO = 2.0E+0, ONE = 1.0E+0, ZERO = 0.0E+0 )
+      COMPLEX            CZERO
+      PARAMETER          ( CZERO = ( 0.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            FIRST
+      INTEGER            COUNT, I, IC, IX, IY, J
+      REAL               CS, D, DI, DR, EPS, F2, F2S, G2, G2S, SAFMIN,
+     $                   SAFMN2, SAFMX2, SCALE
+      COMPLEX            F, FF, FS, G, GS, R, SN
+*     ..
+*     .. External Functions ..
+      REAL               SLAMCH, SLAPY2
+      EXTERNAL           SLAMCH, SLAPY2
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, AIMAG, CMPLX, CONJG, INT, LOG, MAX, REAL,
+     $                   SQRT
+*     ..
+*     .. Statement Functions ..
+      REAL               ABS1, ABSSQ
+*     ..
+*     .. Save statement ..
+      SAVE               FIRST, SAFMX2, SAFMIN, SAFMN2
+*     ..
+*     .. Data statements ..
+      DATA               FIRST / .TRUE. /
+*     ..
+*     .. Statement Function definitions ..
+      ABS1( FF ) = MAX( ABS( REAL( FF ) ), ABS( AIMAG( FF ) ) )
+      ABSSQ( FF ) = REAL( FF )**2 + AIMAG( FF )**2
+*     ..
+*     .. Executable Statements ..
+*
+      IF( FIRST ) THEN
+         FIRST = .FALSE.
+         SAFMIN = SLAMCH( 'S' )
+         EPS = SLAMCH( 'E' )
+         SAFMN2 = SLAMCH( 'B' )**INT( LOG( SAFMIN / EPS ) /
+     $            LOG( SLAMCH( 'B' ) ) / TWO )
+         SAFMX2 = ONE / SAFMN2
+      END IF
+      IX = 1
+      IY = 1
+      IC = 1
+      DO 60 I = 1, N
+         F = X( IX )
+         G = Y( IY )
+*
+*        Use identical algorithm as in CLARTG
+*
+         SCALE = MAX( ABS1( F ), ABS1( G ) )
+         FS = F
+         GS = G
+         COUNT = 0
+         IF( SCALE.GE.SAFMX2 ) THEN
+   10       CONTINUE
+            COUNT = COUNT + 1
+            FS = FS*SAFMN2
+            GS = GS*SAFMN2
+            SCALE = SCALE*SAFMN2
+            IF( SCALE.GE.SAFMX2 )
+     $         GO TO 10
+         ELSE IF( SCALE.LE.SAFMN2 ) THEN
+            IF( G.EQ.CZERO ) THEN
+               CS = ONE
+               SN = CZERO
+               R = F
+               GO TO 50
+            END IF
+   20       CONTINUE
+            COUNT = COUNT - 1
+            FS = FS*SAFMX2
+            GS = GS*SAFMX2
+            SCALE = SCALE*SAFMX2
+            IF( SCALE.LE.SAFMN2 )
+     $         GO TO 20
+         END IF
+         F2 = ABSSQ( FS )
+         G2 = ABSSQ( GS )
+         IF( F2.LE.MAX( G2, ONE )*SAFMIN ) THEN
+*
+*           This is a rare case: F is very small.
+*
+            IF( F.EQ.CZERO ) THEN
+               CS = ZERO
+               R = SLAPY2( REAL( G ), AIMAG( G ) )
+*              Do complex/real division explicitly with two real
+*              divisions
+               D = SLAPY2( REAL( GS ), AIMAG( GS ) )
+               SN = CMPLX( REAL( GS ) / D, -AIMAG( GS ) / D )
+               GO TO 50
+            END IF
+            F2S = SLAPY2( REAL( FS ), AIMAG( FS ) )
+*           G2 and G2S are accurate
+*           G2 is at least SAFMIN, and G2S is at least SAFMN2
+            G2S = SQRT( G2 )
+*           Error in CS from underflow in F2S is at most
+*           UNFL / SAFMN2 .lt. sqrt(UNFL*EPS) .lt. EPS
+*           If MAX(G2,ONE)=G2, then F2 .lt. G2*SAFMIN,
+*           and so CS .lt. sqrt(SAFMIN)
+*           If MAX(G2,ONE)=ONE, then F2 .lt. SAFMIN
+*           and so CS .lt. sqrt(SAFMIN)/SAFMN2 = sqrt(EPS)
+*           Therefore, CS = F2S/G2S / sqrt( 1 + (F2S/G2S)**2 ) = F2S/G2S
+            CS = F2S / G2S
+*           Make sure abs(FF) = 1
+*           Do complex/real division explicitly with 2 real divisions
+            IF( ABS1( F ).GT.ONE ) THEN
+               D = SLAPY2( REAL( F ), AIMAG( F ) )
+               FF = CMPLX( REAL( F ) / D, AIMAG( F ) / D )
+            ELSE
+               DR = SAFMX2*REAL( F )
+               DI = SAFMX2*AIMAG( F )
+               D = SLAPY2( DR, DI )
+               FF = CMPLX( DR / D, DI / D )
+            END IF
+            SN = FF*CMPLX( REAL( GS ) / G2S, -AIMAG( GS ) / G2S )
+            R = CS*F + SN*G
+         ELSE
+*
+*           This is the most common case.
+*           Neither F2 nor F2/G2 are less than SAFMIN
+*           F2S cannot overflow, and it is accurate
+*
+            F2S = SQRT( ONE+G2 / F2 )
+*           Do the F2S(real)*FS(complex) multiply with two real
+*           multiplies
+            R = CMPLX( F2S*REAL( FS ), F2S*AIMAG( FS ) )
+            CS = ONE / F2S
+            D = F2 + G2
+*           Do complex/real division explicitly with two real divisions
+            SN = CMPLX( REAL( R ) / D, AIMAG( R ) / D )
+            SN = SN*CONJG( GS )
+            IF( COUNT.NE.0 ) THEN
+               IF( COUNT.GT.0 ) THEN
+                  DO 30 J = 1, COUNT
+                     R = R*SAFMX2
+   30             CONTINUE
+               ELSE
+                  DO 40 J = 1, -COUNT
+                     R = R*SAFMN2
+   40             CONTINUE
+               END IF
+            END IF
+         END IF
+   50    CONTINUE
+         C( IC ) = CS
+         Y( IY ) = SN
+         X( IX ) = R
+         IC = IC + INCC
+         IY = IY + INCY
+         IX = IX + INCX
+   60 CONTINUE
+      RETURN
+*
+*     End of CLARGV
+*
+      END
diff --git a/costa/native/external/lapack/clarnv.f b/costa/native/external/lapack/clarnv.f
new file mode 100644
index 000000000..4ea0a86a4
--- /dev/null
+++ b/costa/native/external/lapack/clarnv.f
@@ -0,0 +1,131 @@
+      SUBROUTINE CLARNV( IDIST, ISEED, N, X )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      INTEGER            IDIST, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            ISEED( 4 )
+      COMPLEX            X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CLARNV returns a vector of n random complex numbers from a uniform or
+*  normal distribution.
+*
+*  Arguments
+*  =========
+*
+*  IDIST   (input) INTEGER
+*          Specifies the distribution of the random numbers:
+*          = 1:  real and imaginary parts each uniform (0,1)
+*          = 2:  real and imaginary parts each uniform (-1,1)
+*          = 3:  real and imaginary parts each normal (0,1)
+*          = 4:  uniformly distributed on the disc abs(z) < 1
+*          = 5:  uniformly distributed on the circle abs(z) = 1
+*
+*  ISEED   (input/output) INTEGER array, dimension (4)
+*          On entry, the seed of the random number generator; the array
+*          elements must be between 0 and 4095, and ISEED(4) must be
+*          odd.
+*          On exit, the seed is updated.
+*
+*  N       (input) INTEGER
+*          The number of random numbers to be generated.
+*
+*  X       (output) COMPLEX array, dimension (N)
+*          The generated random numbers.
+*
+*  Further Details
+*  ===============
+*
+*  This routine calls the auxiliary routine SLARUV to generate random
+*  real numbers from a uniform (0,1) distribution, in batches of up to
+*  128 using vectorisable code. The Box-Muller method is used to
+*  transform numbers from a uniform to a normal distribution.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE, TWO
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0, TWO = 2.0E+0 )
+      INTEGER            LV
+      PARAMETER          ( LV = 128 )
+      REAL               TWOPI
+      PARAMETER          ( TWOPI = 6.2831853071795864769252867663E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, IL, IV
+*     ..
+*     .. Local Arrays ..
+      REAL               U( LV )
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          CMPLX, EXP, LOG, MIN, SQRT
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLARUV
+*     ..
+*     .. Executable Statements ..
+*
+      DO 60 IV = 1, N, LV / 2
+         IL = MIN( LV / 2, N-IV+1 )
+*
+*        Call SLARUV to generate 2*IL real numbers from a uniform (0,1)
+*        distribution (2*IL <= LV)
+*
+         CALL SLARUV( ISEED, 2*IL, U )
+*
+         IF( IDIST.EQ.1 ) THEN
+*
+*           Copy generated numbers
+*
+            DO 10 I = 1, IL
+               X( IV+I-1 ) = CMPLX( U( 2*I-1 ), U( 2*I ) )
+   10       CONTINUE
+         ELSE IF( IDIST.EQ.2 ) THEN
+*
+*           Convert generated numbers to uniform (-1,1) distribution
+*
+            DO 20 I = 1, IL
+               X( IV+I-1 ) = CMPLX( TWO*U( 2*I-1 )-ONE,
+     $                       TWO*U( 2*I )-ONE )
+   20       CONTINUE
+         ELSE IF( IDIST.EQ.3 ) THEN
+*
+*           Convert generated numbers to normal (0,1) distribution
+*
+            DO 30 I = 1, IL
+               X( IV+I-1 ) = SQRT( -TWO*LOG( U( 2*I-1 ) ) )*
+     $                       EXP( CMPLX( ZERO, TWOPI*U( 2*I ) ) )
+   30       CONTINUE
+         ELSE IF( IDIST.EQ.4 ) THEN
+*
+*           Convert generated numbers to complex numbers uniformly
+*           distributed on the unit disk
+*
+            DO 40 I = 1, IL
+               X( IV+I-1 ) = SQRT( U( 2*I-1 ) )*
+     $                       EXP( CMPLX( ZERO, TWOPI*U( 2*I ) ) )
+   40       CONTINUE
+         ELSE IF( IDIST.EQ.5 ) THEN
+*
+*           Convert generated numbers to complex numbers uniformly
+*           distributed on the unit circle
+*
+            DO 50 I = 1, IL
+               X( IV+I-1 ) = EXP( CMPLX( ZERO, TWOPI*U( 2*I ) ) )
+   50       CONTINUE
+         END IF
+   60 CONTINUE
+      RETURN
+*
+*     End of CLARNV
+*
+      END
diff --git a/costa/native/external/lapack/clarrv.f b/costa/native/external/lapack/clarrv.f
new file mode 100644
index 000000000..77001bc04
--- /dev/null
+++ b/costa/native/external/lapack/clarrv.f
@@ -0,0 +1,433 @@
+      SUBROUTINE CLARRV( N, D, L, ISPLIT, M, W, IBLOCK, GERSCH, TOL, Z,
+     $                   LDZ, ISUPPZ, WORK, IWORK, INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDZ, M, N
+      REAL               TOL
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IBLOCK( * ), ISPLIT( * ), ISUPPZ( * ),
+     $                   IWORK( * )
+      REAL               D( * ), GERSCH( * ), L( * ), W( * ), WORK( * )
+      COMPLEX            Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CLARRV computes the eigenvectors of the tridiagonal matrix
+*  T = L D L^T given L, D and the eigenvalues of L D L^T.
+*  The input eigenvalues should have high relative accuracy with
+*  respect to the entries of L and D. The desired accuracy of the
+*  output can be specified by the input parameter TOL.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The order of the matrix.  N >= 0.
+*
+*  D       (input/output) REAL array, dimension (N)
+*          On entry, the n diagonal elements of the diagonal matrix D.
+*          On exit, D may be overwritten.
+*
+*  L       (input/output) REAL array, dimension (N-1)
+*          On entry, the (n-1) subdiagonal elements of the unit
+*          bidiagonal matrix L in elements 1 to N-1 of L. L(N) need
+*          not be set. On exit, L is overwritten.
+*
+*  ISPLIT  (input) INTEGER array, dimension (N)
+*          The splitting points, at which T breaks up into submatrices.
+*          The first submatrix consists of rows/columns 1 to
+*          ISPLIT( 1 ), the second of rows/columns ISPLIT( 1 )+1
+*          through ISPLIT( 2 ), etc.
+*
+*  TOL     (input) REAL
+*          The absolute error tolerance for the
+*          eigenvalues/eigenvectors.
+*          Errors in the input eigenvalues must be bounded by TOL.
+*          The eigenvectors output have residual norms
+*          bounded by TOL, and the dot products between different
+*          eigenvectors are bounded by TOL. TOL must be at least
+*          N*EPS*|T|, where EPS is the machine precision and |T| is
+*          the 1-norm of the tridiagonal matrix.
+*
+*  M       (input) INTEGER
+*          The total number of eigenvalues found.  0 <= M <= N.
+*          If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1.
+*
+*  W       (input) REAL array, dimension (N)
+*          The first M elements of W contain the eigenvalues for
+*          which eigenvectors are to be computed.  The eigenvalues
+*          should be grouped by split-off block and ordered from
+*          smallest to largest within the block ( The output array
+*          W from SLARRE is expected here ).
+*          Errors in W must be bounded by TOL (see above).
+*
+*  IBLOCK  (input) INTEGER array, dimension (N)
+*          The submatrix indices associated with the corresponding
+*          eigenvalues in W; IBLOCK(i)=1 if eigenvalue W(i) belongs to
+*          the first submatrix from the top, =2 if W(i) belongs to
+*          the second submatrix, etc. 
+*
+*  Z       (output) COMPLEX array, dimension (LDZ, max(1,M) )
+*          If JOBZ = 'V', then if INFO = 0, the first M columns of Z
+*          contain the orthonormal eigenvectors of the matrix T
+*          corresponding to the selected eigenvalues, with the i-th
+*          column of Z holding the eigenvector associated with W(i).
+*          If JOBZ = 'N', then Z is not referenced.
+*          Note: the user must ensure that at least max(1,M) columns are
+*          supplied in the array Z; if RANGE = 'V', the exact value of M
+*          is not known in advance and an upper bound must be used.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= 1, and if
+*          JOBZ = 'V', LDZ >= max(1,N).
+*
+*  ISUPPZ  (output) INTEGER ARRAY, dimension ( 2*max(1,M) )
+*          The support of the eigenvectors in Z, i.e., the indices
+*          indicating the nonzero elements in Z. The i-th eigenvector
+*          is nonzero only in elements ISUPPZ( 2*i-1 ) through
+*          ISUPPZ( 2*i ).
+*
+*  WORK    (workspace) REAL array, dimension (13*N)
+*
+*  IWORK   (workspace) INTEGER array, dimension (6*N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = 1, internal error in SLARRB
+*                if INFO = 2, internal error in CSTEIN
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Inderjit Dhillon, IBM Almaden, USA
+*     Osni Marques, LBNL/NERSC, USA
+*     Ken Stanley, Computer Science Division, University of
+*       California at Berkeley, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            MGSSIZ
+      PARAMETER          ( MGSSIZ = 20 )
+      REAL               ZERO, ONE, FOUR
+      PARAMETER          ( ZERO = 0.0E0, ONE = 1.0E0, FOUR = 4.0E0 )
+      COMPLEX            CZERO
+      PARAMETER          ( CZERO = ( 0.0E0, 0.0E0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            MGSCLS
+      INTEGER            I, IBEGIN, IEND, IINDC1, IINDC2, IINDR, IINDWK, 
+     $                   IINFO, IM, IN, INDERR, INDGAP, INDIN1, INDIN2, 
+     $                   INDLD, INDLLD, INDWRK, ITER, ITMP1, ITMP2, J, 
+     $                   JBLK, K, KTOT, LSBDPT, MAXITR, NCLUS, NDEPTH, 
+     $                   NDONE, NEWCLS, NEWFRS, NEWFTT, NEWLST, NEWSIZ, 
+     $                   NSPLIT, OLDCLS, OLDFST, OLDIEN, OLDLST, OLDNCL, 
+     $                   P, Q, TEMP( 1 )
+      REAL               EPS, GAP, LAMBDA, MGSTOL, MINGMA, MINRGP, 
+     $                   NRMINV, RELGAP, RELTOL, RESID, RQCORR, SIGMA, 
+     $                   TMP1, ZTZ
+*     ..
+*     .. External Functions ..
+      REAL               SCNRM2, SLAMCH
+      COMPLEX            CDOTU
+      EXTERNAL           CDOTU, SCNRM2, SLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CAXPY, CLAR1V, CLASET, CSTEIN, SCOPY, SLARRB 
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, CMPLX, MAX, MIN, REAL, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INDERR = N + 1
+      INDLD = 2*N
+      INDLLD = 3*N
+      INDGAP = 4*N
+      INDIN1 = 5*N + 1
+      INDIN2 = 6*N + 1
+      INDWRK = 7*N + 1
+*
+      IINDR = N
+      IINDC1 = 2*N
+      IINDC2 = 3*N
+      IINDWK = 4*N + 1
+*
+      EPS = SLAMCH( 'Precision' )
+*
+      DO 10 I = 1, 2*N
+         IWORK( I ) = 0
+   10 CONTINUE
+      DO 20 I = 1, M
+         WORK( INDERR+I-1 ) = EPS*ABS( W( I ) )
+   20 CONTINUE
+      CALL CLASET( 'Full', N, N, CZERO, CZERO, Z, LDZ )
+      MGSTOL = 5.0E0*EPS
+*
+      NSPLIT = IBLOCK( M )
+      IBEGIN = 1
+      DO 170 JBLK = 1, NSPLIT
+         IEND = ISPLIT( JBLK )
+*
+*        Find the eigenvectors of the submatrix indexed IBEGIN
+*        through IEND.
+*
+         IF( IBEGIN.EQ.IEND ) THEN
+            Z( IBEGIN, IBEGIN ) = ONE
+            ISUPPZ( 2*IBEGIN-1 ) = IBEGIN
+            ISUPPZ( 2*IBEGIN ) = IBEGIN
+            IBEGIN = IEND + 1
+            GO TO 170
+         END IF
+         OLDIEN = IBEGIN - 1
+         IN = IEND - OLDIEN
+         RELTOL = MIN( 1.0E-2, ONE / REAL( IN ) )
+         IM = IN
+         CALL SCOPY( IM, W( IBEGIN ), 1, WORK, 1 )
+         DO 30 I = 1, IN - 1
+            WORK( INDGAP+I ) = WORK( I+1 ) - WORK( I )
+   30    CONTINUE
+         WORK( INDGAP+IN ) = MAX( ABS( WORK( IN ) ), EPS )
+         NDONE = 0
+*
+         NDEPTH = 0
+         LSBDPT = 1
+         NCLUS = 1
+         IWORK( IINDC1+1 ) = 1
+         IWORK( IINDC1+2 ) = IN
+*
+*        While( NDONE.LT.IM ) do
+*
+   40    CONTINUE
+         IF( NDONE.LT.IM ) THEN
+            OLDNCL = NCLUS
+            NCLUS = 0
+            LSBDPT = 1 - LSBDPT
+            DO 150 I = 1, OLDNCL
+               IF( LSBDPT.EQ.0 ) THEN
+                  OLDCLS = IINDC1
+                  NEWCLS = IINDC2
+               ELSE
+                  OLDCLS = IINDC2
+                  NEWCLS = IINDC1
+               END IF
+*
+*              If NDEPTH > 1, retrieve the relatively robust
+*              representation (RRR) and perform limited bisection
+*              (if necessary) to get approximate eigenvalues.
+*
+               J = OLDCLS + 2*I
+               OLDFST = IWORK( J-1 )
+               OLDLST = IWORK( J )
+               IF( NDEPTH.GT.0 ) THEN
+                  J = OLDIEN + OLDFST
+                  DO 45 K = 1, IN
+                     D( IBEGIN+K-1 ) = REAL( Z( IBEGIN+K-1,
+     $                                 OLDIEN+OLDFST ) )
+                     L( IBEGIN+K-1 ) = REAL( Z( IBEGIN+K-1,
+     $                                 OLDIEN+OLDFST+1 ) )
+   45             CONTINUE
+                  SIGMA = L( IEND )
+               END IF
+               K = IBEGIN
+               DO 50 J = 1, IN - 1
+                  WORK( INDLD+J ) = D( K )*L( K )
+                  WORK( INDLLD+J ) = WORK( INDLD+J )*L( K )
+                  K = K + 1
+   50          CONTINUE
+               IF( NDEPTH.GT.0 ) THEN
+                  CALL SLARRB( IN, D( IBEGIN ), L( IBEGIN ),
+     $                         WORK( INDLD+1 ), WORK( INDLLD+1 ),
+     $                         OLDFST, OLDLST, SIGMA, RELTOL, WORK,
+     $                         WORK( INDGAP+1 ), WORK( INDERR ),
+     $                         WORK( INDWRK ), IWORK( IINDWK ), IINFO )
+                  IF( IINFO.NE.0 ) THEN
+                     INFO = 1
+                     RETURN
+                  END IF
+               END IF
+*
+*              Classify eigenvalues of the current representation (RRR)
+*              as (i) isolated, (ii) loosely clustered or (iii) tightly
+*              clustered
+*
+               NEWFRS = OLDFST
+               DO 140 J = OLDFST, OLDLST
+                  IF( J.EQ.OLDLST .OR. WORK( INDGAP+J ).GE.RELTOL*
+     $                ABS( WORK( J ) ) ) THEN
+                     NEWLST = J
+                  ELSE
+*
+*                    continue (to the next loop)
+*
+                     RELGAP = WORK( INDGAP+J ) / ABS( WORK( J ) )
+                     IF( J.EQ.NEWFRS ) THEN
+                        MINRGP = RELGAP
+                     ELSE
+                        MINRGP = MIN( MINRGP, RELGAP )
+                     END IF
+                     GO TO 140
+                  END IF
+                  NEWSIZ = NEWLST - NEWFRS + 1
+                  MAXITR = 10
+                  NEWFTT = OLDIEN + NEWFRS
+                  IF( NEWSIZ.GT.1 ) THEN
+                     MGSCLS = NEWSIZ.LE.MGSSIZ .AND. MINRGP.GE.MGSTOL
+                     IF( .NOT.MGSCLS ) THEN
+                        DO 55 K = 1, IN
+                           WORK( INDIN1+K-1 ) = REAL( Z( IBEGIN+K-1,
+     $                                          NEWFTT ) )
+                           WORK( INDIN2+K-1 ) = REAL( Z( IBEGIN+K-1,
+     $                                          NEWFTT+1 ) )
+   55                   CONTINUE
+                        CALL SLARRF( IN, D( IBEGIN ), L( IBEGIN ),
+     $                               WORK( INDLD+1 ), WORK( INDLLD+1 ),
+     $                               NEWFRS, NEWLST, WORK,
+     $                               WORK( INDIN1 ), WORK( INDIN2 ),
+     $                               WORK( INDWRK ), IWORK( IINDWK ),
+     $                               INFO )
+                        IF( INFO.EQ.0 ) THEN
+                           NCLUS = NCLUS + 1
+                           K = NEWCLS + 2*NCLUS
+                           IWORK( K-1 ) = NEWFRS
+                           IWORK( K ) = NEWLST
+                        ELSE
+                           INFO = 0
+                           IF( MINRGP.GE.MGSTOL ) THEN
+                              MGSCLS = .TRUE.
+                           ELSE
+*                           
+*                             Call CSTEIN to process this tight cluster.
+*                             This happens only if MINRGP <= MGSTOL
+*                             and SLARRF returns INFO = 1. The latter
+*                             means that a new RRR to "break" the
+*                             cluster could not be found.
+*
+                              WORK( INDWRK ) = D( IBEGIN )
+                              DO 60 K = 1, IN - 1
+                                 WORK( INDWRK+K ) = D( IBEGIN+K ) +
+     $                                              WORK( INDLLD+K )
+   60                         CONTINUE
+                              DO 70 K = 1, NEWSIZ
+                                 IWORK( IINDWK+K-1 ) = 1
+   70                         CONTINUE
+                              DO 80 K = NEWFRS, NEWLST
+                                 ISUPPZ( 2*( IBEGIN+K )-3 ) = 1
+                                 ISUPPZ( 2*( IBEGIN+K )-2 ) = IN
+   80                         CONTINUE
+                              TEMP( 1 ) = IN
+                              CALL CSTEIN( IN, WORK( INDWRK ),
+     $                                     WORK( INDLD+1 ), NEWSIZ,
+     $                                     WORK( NEWFRS ),
+     $                                     IWORK( IINDWK ), TEMP( 1 ),
+     $                                     Z( IBEGIN, NEWFTT ), LDZ,
+     $                                     WORK( INDWRK+IN ),
+     $                                     IWORK( IINDWK+IN ),
+     $                                     IWORK( IINDWK+2*IN ), IINFO )
+                              IF( IINFO.NE.0 ) THEN
+                                 INFO = 2
+                                 RETURN
+                              END IF
+                              NDONE = NDONE + NEWSIZ
+                           END IF
+                        END IF
+                     END IF
+                  ELSE
+                     MGSCLS = .FALSE.
+                  END IF
+                  IF( NEWSIZ.EQ.1 .OR. MGSCLS ) THEN
+                     KTOT = NEWFTT
+                     DO 100 K = NEWFRS, NEWLST
+                        ITER = 0
+   90                   CONTINUE
+                        LAMBDA = WORK( K )
+                        CALL CLAR1V( IN, 1, IN, LAMBDA, D( IBEGIN ),
+     $                               L( IBEGIN ), WORK( INDLD+1 ),
+     $                               WORK( INDLLD+1 ),
+     $                               GERSCH( 2*OLDIEN+1 ),
+     $                               Z( IBEGIN, KTOT ), ZTZ, MINGMA,
+     $                               IWORK( IINDR+KTOT ),
+     $                               ISUPPZ( 2*KTOT-1 ),
+     $                               WORK( INDWRK ) )
+                        TMP1 = ONE / ZTZ
+                        NRMINV = SQRT( TMP1 )
+                        RESID = ABS( MINGMA )*NRMINV
+                        RQCORR = MINGMA*TMP1
+                        IF( K.EQ.IN ) THEN
+                           GAP = WORK( INDGAP+K-1 )
+                        ELSE IF( K.EQ.1 ) THEN
+                           GAP = WORK( INDGAP+K )
+                        ELSE
+                           GAP = MIN( WORK( INDGAP+K-1 ),
+     $                           WORK( INDGAP+K ) )
+                        END IF
+                        ITER = ITER + 1
+                        IF( RESID.GT.TOL*GAP .AND. ABS( RQCORR ).GT.
+     $                      FOUR*EPS*ABS( LAMBDA ) ) THEN
+                           WORK( K ) = LAMBDA + RQCORR
+                           IF( ITER.LT.MAXITR ) THEN
+                              GO TO 90
+                           END IF
+                        END IF
+                        IWORK( KTOT ) = 1
+                        IF( NEWSIZ.EQ.1 )
+     $                     NDONE = NDONE + 1
+                        CALL CSSCAL( IN, NRMINV, Z( IBEGIN, KTOT ), 1 )
+                        KTOT = KTOT + 1
+  100                CONTINUE
+                     IF( NEWSIZ.GT.1 ) THEN
+                        ITMP1 = ISUPPZ( 2*NEWFTT-1 )
+                        ITMP2 = ISUPPZ( 2*NEWFTT )
+                        KTOT = OLDIEN + NEWLST
+                        DO 120 P = NEWFTT + 1, KTOT
+                           DO 110 Q = NEWFTT, P - 1
+                              TMP1 = -CDOTU( IN, Z( IBEGIN, P ), 1,
+     $                               Z( IBEGIN, Q ), 1 )
+                              CALL CAXPY( IN, CMPLX( TMP1, ZERO ),
+     $                                    Z( IBEGIN, Q ), 1,
+     $                                    Z( IBEGIN, P ), 1 )
+  110                      CONTINUE
+                           TMP1 = ONE / SCNRM2( IN, Z( IBEGIN, P ), 1 )
+                           CALL CSSCAL( IN, TMP1, Z( IBEGIN, P ), 1 )
+                           ITMP1 = MIN( ITMP1, ISUPPZ( 2*P-1 ) )
+                           ITMP2 = MAX( ITMP2, ISUPPZ( 2*P ) )
+  120                   CONTINUE
+                        DO 130 P = NEWFTT, KTOT
+                           ISUPPZ( 2*P-1 ) = ITMP1
+                           ISUPPZ( 2*P ) = ITMP2
+  130                   CONTINUE
+                        NDONE = NDONE + NEWSIZ
+                     END IF
+                  END IF
+                  NEWFRS = J + 1
+  140          CONTINUE
+  150       CONTINUE
+            NDEPTH = NDEPTH + 1
+            GO TO 40
+         END IF
+         J = 2*IBEGIN
+         DO 160 I = IBEGIN, IEND
+            ISUPPZ( J-1 ) = ISUPPZ( J-1 ) + OLDIEN
+            ISUPPZ( J ) = ISUPPZ( J ) + OLDIEN
+            J = J + 2
+  160    CONTINUE
+         IBEGIN = IEND + 1
+  170 CONTINUE
+*
+      RETURN
+*
+*     End of CLARRV
+*
+      END
diff --git a/costa/native/external/lapack/clartg.f b/costa/native/external/lapack/clartg.f
new file mode 100644
index 000000000..49b7b99b9
--- /dev/null
+++ b/costa/native/external/lapack/clartg.f
@@ -0,0 +1,193 @@
+      SUBROUTINE CLARTG( F, G, CS, SN, R )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      REAL               CS
+      COMPLEX            F, G, R, SN
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CLARTG generates a plane rotation so that
+*
+*     [  CS  SN  ]     [ F ]     [ R ]
+*     [  __      ]  .  [   ]  =  [   ]   where CS**2 + |SN|**2 = 1.
+*     [ -SN  CS  ]     [ G ]     [ 0 ]
+*
+*  This is a faster version of the BLAS1 routine CROTG, except for
+*  the following differences:
+*     F and G are unchanged on return.
+*     If G=0, then CS=1 and SN=0.
+*     If F=0, then CS=0 and SN is chosen so that R is real.
+*
+*  Arguments
+*  =========
+*
+*  F       (input) COMPLEX
+*          The first component of vector to be rotated.
+*
+*  G       (input) COMPLEX
+*          The second component of vector to be rotated.
+*
+*  CS      (output) REAL
+*          The cosine of the rotation.
+*
+*  SN      (output) COMPLEX
+*          The sine of the rotation.
+*
+*  R       (output) COMPLEX
+*          The nonzero component of the rotated vector.
+*
+*  Further Details
+*  ======= =======
+*
+*  3-5-96 - Modified with a new algorithm by W. Kahan and J. Demmel
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               TWO, ONE, ZERO
+      PARAMETER          ( TWO = 2.0E+0, ONE = 1.0E+0, ZERO = 0.0E+0 )
+      COMPLEX            CZERO
+      PARAMETER          ( CZERO = ( 0.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            FIRST
+      INTEGER            COUNT, I
+      REAL               D, DI, DR, EPS, F2, F2S, G2, G2S, SAFMIN,
+     $                   SAFMN2, SAFMX2, SCALE
+      COMPLEX            FF, FS, GS
+*     ..
+*     .. External Functions ..
+      REAL               SLAMCH, SLAPY2
+      EXTERNAL           SLAMCH, SLAPY2
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, AIMAG, CMPLX, CONJG, INT, LOG, MAX, REAL,
+     $                   SQRT
+*     ..
+*     .. Statement Functions ..
+      REAL               ABS1, ABSSQ
+*     ..
+*     .. Save statement ..
+      SAVE               FIRST, SAFMX2, SAFMIN, SAFMN2
+*     ..
+*     .. Data statements ..
+      DATA               FIRST / .TRUE. /
+*     ..
+*     .. Statement Function definitions ..
+      ABS1( FF ) = MAX( ABS( REAL( FF ) ), ABS( AIMAG( FF ) ) )
+      ABSSQ( FF ) = REAL( FF )**2 + AIMAG( FF )**2
+*     ..
+*     .. Executable Statements ..
+*
+      IF( FIRST ) THEN
+         FIRST = .FALSE.
+         SAFMIN = SLAMCH( 'S' )
+         EPS = SLAMCH( 'E' )
+         SAFMN2 = SLAMCH( 'B' )**INT( LOG( SAFMIN / EPS ) /
+     $            LOG( SLAMCH( 'B' ) ) / TWO )
+         SAFMX2 = ONE / SAFMN2
+      END IF
+      SCALE = MAX( ABS1( F ), ABS1( G ) )
+      FS = F
+      GS = G
+      COUNT = 0
+      IF( SCALE.GE.SAFMX2 ) THEN
+   10    CONTINUE
+         COUNT = COUNT + 1
+         FS = FS*SAFMN2
+         GS = GS*SAFMN2
+         SCALE = SCALE*SAFMN2
+         IF( SCALE.GE.SAFMX2 )
+     $      GO TO 10
+      ELSE IF( SCALE.LE.SAFMN2 ) THEN
+         IF( G.EQ.CZERO ) THEN
+            CS = ONE
+            SN = CZERO
+            R = F
+            RETURN
+         END IF
+   20    CONTINUE
+         COUNT = COUNT - 1
+         FS = FS*SAFMX2
+         GS = GS*SAFMX2
+         SCALE = SCALE*SAFMX2
+         IF( SCALE.LE.SAFMN2 )
+     $      GO TO 20
+      END IF
+      F2 = ABSSQ( FS )
+      G2 = ABSSQ( GS )
+      IF( F2.LE.MAX( G2, ONE )*SAFMIN ) THEN
+*
+*        This is a rare case: F is very small.
+*
+         IF( F.EQ.CZERO ) THEN
+            CS = ZERO
+            R = SLAPY2( REAL( G ), AIMAG( G ) )
+*           Do complex/real division explicitly with two real divisions
+            D = SLAPY2( REAL( GS ), AIMAG( GS ) )
+            SN = CMPLX( REAL( GS ) / D, -AIMAG( GS ) / D )
+            RETURN
+         END IF
+         F2S = SLAPY2( REAL( FS ), AIMAG( FS ) )
+*        G2 and G2S are accurate
+*        G2 is at least SAFMIN, and G2S is at least SAFMN2
+         G2S = SQRT( G2 )
+*        Error in CS from underflow in F2S is at most
+*        UNFL / SAFMN2 .lt. sqrt(UNFL*EPS) .lt. EPS
+*        If MAX(G2,ONE)=G2, then F2 .lt. G2*SAFMIN,
+*        and so CS .lt. sqrt(SAFMIN)
+*        If MAX(G2,ONE)=ONE, then F2 .lt. SAFMIN
+*        and so CS .lt. sqrt(SAFMIN)/SAFMN2 = sqrt(EPS)
+*        Therefore, CS = F2S/G2S / sqrt( 1 + (F2S/G2S)**2 ) = F2S/G2S
+         CS = F2S / G2S
+*        Make sure abs(FF) = 1
+*        Do complex/real division explicitly with 2 real divisions
+         IF( ABS1( F ).GT.ONE ) THEN
+            D = SLAPY2( REAL( F ), AIMAG( F ) )
+            FF = CMPLX( REAL( F ) / D, AIMAG( F ) / D )
+         ELSE
+            DR = SAFMX2*REAL( F )
+            DI = SAFMX2*AIMAG( F )
+            D = SLAPY2( DR, DI )
+            FF = CMPLX( DR / D, DI / D )
+         END IF
+         SN = FF*CMPLX( REAL( GS ) / G2S, -AIMAG( GS ) / G2S )
+         R = CS*F + SN*G
+      ELSE
+*
+*        This is the most common case.
+*        Neither F2 nor F2/G2 are less than SAFMIN
+*        F2S cannot overflow, and it is accurate
+*
+         F2S = SQRT( ONE+G2 / F2 )
+*        Do the F2S(real)*FS(complex) multiply with two real multiplies
+         R = CMPLX( F2S*REAL( FS ), F2S*AIMAG( FS ) )
+         CS = ONE / F2S
+         D = F2 + G2
+*        Do complex/real division explicitly with two real divisions
+         SN = CMPLX( REAL( R ) / D, AIMAG( R ) / D )
+         SN = SN*CONJG( GS )
+         IF( COUNT.NE.0 ) THEN
+            IF( COUNT.GT.0 ) THEN
+               DO 30 I = 1, COUNT
+                  R = R*SAFMX2
+   30          CONTINUE
+            ELSE
+               DO 40 I = 1, -COUNT
+                  R = R*SAFMN2
+   40          CONTINUE
+            END IF
+         END IF
+      END IF
+      RETURN
+*
+*     End of CLARTG
+*
+      END
diff --git a/costa/native/external/lapack/clartv.f b/costa/native/external/lapack/clartv.f
new file mode 100644
index 000000000..e653f763d
--- /dev/null
+++ b/costa/native/external/lapack/clartv.f
@@ -0,0 +1,79 @@
+      SUBROUTINE CLARTV( N, X, INCX, Y, INCY, C, S, INCC )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      INTEGER            INCC, INCX, INCY, N
+*     ..
+*     .. Array Arguments ..
+      REAL               C( * )
+      COMPLEX            S( * ), X( * ), Y( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CLARTV applies a vector of complex plane rotations with real cosines
+*  to elements of the complex vectors x and y. For i = 1,2,...,n
+*
+*     ( x(i) ) := (        c(i)   s(i) ) ( x(i) )
+*     ( y(i) )    ( -conjg(s(i))  c(i) ) ( y(i) )
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The number of plane rotations to be applied.
+*
+*  X       (input/output) COMPLEX array, dimension (1+(N-1)*INCX)
+*          The vector x.
+*
+*  INCX    (input) INTEGER
+*          The increment between elements of X. INCX > 0.
+*
+*  Y       (input/output) COMPLEX array, dimension (1+(N-1)*INCY)
+*          The vector y.
+*
+*  INCY    (input) INTEGER
+*          The increment between elements of Y. INCY > 0.
+*
+*  C       (input) REAL array, dimension (1+(N-1)*INCC)
+*          The cosines of the plane rotations.
+*
+*  S       (input) COMPLEX array, dimension (1+(N-1)*INCC)
+*          The sines of the plane rotations.
+*
+*  INCC    (input) INTEGER
+*          The increment between elements of C and S. INCC > 0.
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      INTEGER            I, IC, IX, IY
+      COMPLEX            XI, YI
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          CONJG
+*     ..
+*     .. Executable Statements ..
+*
+      IX = 1
+      IY = 1
+      IC = 1
+      DO 10 I = 1, N
+         XI = X( IX )
+         YI = Y( IY )
+         X( IX ) = C( IC )*XI + S( IC )*YI
+         Y( IY ) = C( IC )*YI - CONJG( S( IC ) )*XI
+         IX = IX + INCX
+         IY = IY + INCY
+         IC = IC + INCC
+   10 CONTINUE
+      RETURN
+*
+*     End of CLARTV
+*
+      END
diff --git a/costa/native/external/lapack/clarz.f b/costa/native/external/lapack/clarz.f
new file mode 100644
index 000000000..9c16f2ce2
--- /dev/null
+++ b/costa/native/external/lapack/clarz.f
@@ -0,0 +1,158 @@
+      SUBROUTINE CLARZ( SIDE, M, N, L, V, INCV, TAU, C, LDC, WORK )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          SIDE
+      INTEGER            INCV, L, LDC, M, N
+      COMPLEX            TAU
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            C( LDC, * ), V( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CLARZ applies a complex elementary reflector H to a complex
+*  M-by-N matrix C, from either the left or the right. H is represented
+*  in the form
+*
+*        H = I - tau * v * v'
+*
+*  where tau is a complex scalar and v is a complex vector.
+*
+*  If tau = 0, then H is taken to be the unit matrix.
+*
+*  To apply H' (the conjugate transpose of H), supply conjg(tau) instead
+*  tau.
+*
+*  H is a product of k elementary reflectors as returned by CTZRZF.
+*
+*  Arguments
+*  =========
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'L': form  H * C
+*          = 'R': form  C * H
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix C.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix C.
+*
+*  L       (input) INTEGER
+*          The number of entries of the vector V containing
+*          the meaningful part of the Householder vectors.
+*          If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0.
+*
+*  V       (input) COMPLEX array, dimension (1+(L-1)*abs(INCV))
+*          The vector v in the representation of H as returned by
+*          CTZRZF. V is not used if TAU = 0.
+*
+*  INCV    (input) INTEGER
+*          The increment between elements of v. INCV <> 0.
+*
+*  TAU     (input) COMPLEX
+*          The value tau in the representation of H.
+*
+*  C       (input/output) COMPLEX array, dimension (LDC,N)
+*          On entry, the M-by-N matrix C.
+*          On exit, C is overwritten by the matrix H * C if SIDE = 'L',
+*          or C * H if SIDE = 'R'.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C. LDC >= max(1,M).
+*
+*  WORK    (workspace) COMPLEX array, dimension
+*                         (N) if SIDE = 'L'
+*                      or (M) if SIDE = 'R'
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*    A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX            ONE, ZERO
+      PARAMETER          ( ONE = ( 1.0E+0, 0.0E+0 ),
+     $                   ZERO = ( 0.0E+0, 0.0E+0 ) )
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CAXPY, CCOPY, CGEMV, CGERC, CGERU, CLACGV
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. Executable Statements ..
+*
+      IF( LSAME( SIDE, 'L' ) ) THEN
+*
+*        Form  H * C
+*
+         IF( TAU.NE.ZERO ) THEN
+*
+*           w( 1:n ) = conjg( C( 1, 1:n ) )
+*
+            CALL CCOPY( N, C, LDC, WORK, 1 )
+            CALL CLACGV( N, WORK, 1 )
+*
+*           w( 1:n ) = conjg( w( 1:n ) + C( m-l+1:m, 1:n )' * v( 1:l ) )
+*
+            CALL CGEMV( 'Conjugate transpose', L, N, ONE, C( M-L+1, 1 ),
+     $                  LDC, V, INCV, ONE, WORK, 1 )
+            CALL CLACGV( N, WORK, 1 )
+*
+*           C( 1, 1:n ) = C( 1, 1:n ) - tau * w( 1:n )
+*
+            CALL CAXPY( N, -TAU, WORK, 1, C, LDC )
+*
+*           C( m-l+1:m, 1:n ) = C( m-l+1:m, 1:n ) - ...
+*                               tau * v( 1:l ) * conjg( w( 1:n )' )
+*
+            CALL CGERU( L, N, -TAU, V, INCV, WORK, 1, C( M-L+1, 1 ),
+     $                  LDC )
+         END IF
+*
+      ELSE
+*
+*        Form  C * H
+*
+         IF( TAU.NE.ZERO ) THEN
+*
+*           w( 1:m ) = C( 1:m, 1 )
+*
+            CALL CCOPY( M, C, 1, WORK, 1 )
+*
+*           w( 1:m ) = w( 1:m ) + C( 1:m, n-l+1:n, 1:n ) * v( 1:l )
+*
+            CALL CGEMV( 'No transpose', M, L, ONE, C( 1, N-L+1 ), LDC,
+     $                  V, INCV, ONE, WORK, 1 )
+*
+*           C( 1:m, 1 ) = C( 1:m, 1 ) - tau * w( 1:m )
+*
+            CALL CAXPY( M, -TAU, WORK, 1, C, 1 )
+*
+*           C( 1:m, n-l+1:n ) = C( 1:m, n-l+1:n ) - ...
+*                               tau * w( 1:m ) * v( 1:l )'
+*
+            CALL CGERC( M, L, -TAU, WORK, 1, V, INCV, C( 1, N-L+1 ),
+     $                  LDC )
+*
+         END IF
+*
+      END IF
+*
+      RETURN
+*
+*     End of CLARZ
+*
+      END
diff --git a/costa/native/external/lapack/clarzb.f b/costa/native/external/lapack/clarzb.f
new file mode 100644
index 000000000..aa691a2f1
--- /dev/null
+++ b/costa/native/external/lapack/clarzb.f
@@ -0,0 +1,235 @@
+      SUBROUTINE CLARZB( SIDE, TRANS, DIRECT, STOREV, M, N, K, L, V,
+     $                   LDV, T, LDT, C, LDC, WORK, LDWORK )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     December 1, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIRECT, SIDE, STOREV, TRANS
+      INTEGER            K, L, LDC, LDT, LDV, LDWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            C( LDC, * ), T( LDT, * ), V( LDV, * ),
+     $                   WORK( LDWORK, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CLARZB applies a complex block reflector H or its transpose H**H
+*  to a complex distributed M-by-N  C from the left or the right.
+*
+*  Currently, only STOREV = 'R' and DIRECT = 'B' are supported.
+*
+*  Arguments
+*  =========
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'L': apply H or H' from the Left
+*          = 'R': apply H or H' from the Right
+*
+*  TRANS   (input) CHARACTER*1
+*          = 'N': apply H (No transpose)
+*          = 'C': apply H' (Conjugate transpose)
+*
+*  DIRECT  (input) CHARACTER*1
+*          Indicates how H is formed from a product of elementary
+*          reflectors
+*          = 'F': H = H(1) H(2) . . . H(k) (Forward, not supported yet)
+*          = 'B': H = H(k) . . . H(2) H(1) (Backward)
+*
+*  STOREV  (input) CHARACTER*1
+*          Indicates how the vectors which define the elementary
+*          reflectors are stored:
+*          = 'C': Columnwise                        (not supported yet)
+*          = 'R': Rowwise
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix C.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix C.
+*
+*  K       (input) INTEGER
+*          The order of the matrix T (= the number of elementary
+*          reflectors whose product defines the block reflector).
+*
+*  L       (input) INTEGER
+*          The number of columns of the matrix V containing the
+*          meaningful part of the Householder reflectors.
+*          If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0.
+*
+*  V       (input) COMPLEX array, dimension (LDV,NV).
+*          If STOREV = 'C', NV = K; if STOREV = 'R', NV = L.
+*
+*  LDV     (input) INTEGER
+*          The leading dimension of the array V.
+*          If STOREV = 'C', LDV >= L; if STOREV = 'R', LDV >= K.
+*
+*  T       (input) COMPLEX array, dimension (LDT,K)
+*          The triangular K-by-K matrix T in the representation of the
+*          block reflector.
+*
+*  LDT     (input) INTEGER
+*          The leading dimension of the array T. LDT >= K.
+*
+*  C       (input/output) COMPLEX array, dimension (LDC,N)
+*          On entry, the M-by-N matrix C.
+*          On exit, C is overwritten by H*C or H'*C or C*H or C*H'.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C. LDC >= max(1,M).
+*
+*  WORK    (workspace) COMPLEX array, dimension (LDWORK,K)
+*
+*  LDWORK  (input) INTEGER
+*          The leading dimension of the array WORK.
+*          If SIDE = 'L', LDWORK >= max(1,N);
+*          if SIDE = 'R', LDWORK >= max(1,M).
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*    A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX            ONE
+      PARAMETER          ( ONE = ( 1.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      CHARACTER          TRANST
+      INTEGER            I, INFO, J
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CCOPY, CGEMM, CLACGV, CTRMM, XERBLA
+*     ..
+*     .. Executable Statements ..
+*
+*     Quick return if possible
+*
+      IF( M.LE.0 .OR. N.LE.0 )
+     $   RETURN
+*
+*     Check for currently supported options
+*
+      INFO = 0
+      IF( .NOT.LSAME( DIRECT, 'B' ) ) THEN
+         INFO = -3
+      ELSE IF( .NOT.LSAME( STOREV, 'R' ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CLARZB', -INFO )
+         RETURN
+      END IF
+*
+      IF( LSAME( TRANS, 'N' ) ) THEN
+         TRANST = 'C'
+      ELSE
+         TRANST = 'N'
+      END IF
+*
+      IF( LSAME( SIDE, 'L' ) ) THEN
+*
+*        Form  H * C  or  H' * C
+*
+*        W( 1:n, 1:k ) = conjg( C( 1:k, 1:n )' )
+*
+         DO 10 J = 1, K
+            CALL CCOPY( N, C( J, 1 ), LDC, WORK( 1, J ), 1 )
+   10    CONTINUE
+*
+*        W( 1:n, 1:k ) = W( 1:n, 1:k ) + ...
+*                        conjg( C( m-l+1:m, 1:n )' ) * V( 1:k, 1:l )'
+*
+         IF( L.GT.0 )
+     $      CALL CGEMM( 'Transpose', 'Conjugate transpose', N, K, L,
+     $                  ONE, C( M-L+1, 1 ), LDC, V, LDV, ONE, WORK,
+     $                  LDWORK )
+*
+*        W( 1:n, 1:k ) = W( 1:n, 1:k ) * T'  or  W( 1:m, 1:k ) * T
+*
+         CALL CTRMM( 'Right', 'Lower', TRANST, 'Non-unit', N, K, ONE, T,
+     $               LDT, WORK, LDWORK )
+*
+*        C( 1:k, 1:n ) = C( 1:k, 1:n ) - conjg( W( 1:n, 1:k )' )
+*
+         DO 30 J = 1, N
+            DO 20 I = 1, K
+               C( I, J ) = C( I, J ) - WORK( J, I )
+   20       CONTINUE
+   30    CONTINUE
+*
+*        C( m-l+1:m, 1:n ) = C( m-l+1:m, 1:n ) - ...
+*                    conjg( V( 1:k, 1:l )' ) * conjg( W( 1:n, 1:k )' )
+*
+         IF( L.GT.0 )
+     $      CALL CGEMM( 'Transpose', 'Transpose', L, N, K, -ONE, V, LDV,
+     $                  WORK, LDWORK, ONE, C( M-L+1, 1 ), LDC )
+*
+      ELSE IF( LSAME( SIDE, 'R' ) ) THEN
+*
+*        Form  C * H  or  C * H'
+*
+*        W( 1:m, 1:k ) = C( 1:m, 1:k )
+*
+         DO 40 J = 1, K
+            CALL CCOPY( M, C( 1, J ), 1, WORK( 1, J ), 1 )
+   40    CONTINUE
+*
+*        W( 1:m, 1:k ) = W( 1:m, 1:k ) + ...
+*                        C( 1:m, n-l+1:n ) * conjg( V( 1:k, 1:l )' )
+*
+         IF( L.GT.0 )
+     $      CALL CGEMM( 'No transpose', 'Transpose', M, K, L, ONE,
+     $                  C( 1, N-L+1 ), LDC, V, LDV, ONE, WORK, LDWORK )
+*
+*        W( 1:m, 1:k ) = W( 1:m, 1:k ) * conjg( T )  or
+*                        W( 1:m, 1:k ) * conjg( T' )
+*
+         DO 50 J = 1, K
+            CALL CLACGV( K-J+1, T( J, J ), 1 )
+   50    CONTINUE
+         CALL CTRMM( 'Right', 'Lower', TRANS, 'Non-unit', M, K, ONE, T,
+     $               LDT, WORK, LDWORK )
+         DO 60 J = 1, K
+            CALL CLACGV( K-J+1, T( J, J ), 1 )
+   60    CONTINUE
+*
+*        C( 1:m, 1:k ) = C( 1:m, 1:k ) - W( 1:m, 1:k )
+*
+         DO 80 J = 1, K
+            DO 70 I = 1, M
+               C( I, J ) = C( I, J ) - WORK( I, J )
+   70       CONTINUE
+   80    CONTINUE
+*
+*        C( 1:m, n-l+1:n ) = C( 1:m, n-l+1:n ) - ...
+*                            W( 1:m, 1:k ) * conjg( V( 1:k, 1:l ) )
+*
+         DO 90 J = 1, L
+            CALL CLACGV( K, V( 1, J ), 1 )
+   90    CONTINUE
+         IF( L.GT.0 )
+     $      CALL CGEMM( 'No transpose', 'No transpose', M, L, K, -ONE,
+     $                  WORK, LDWORK, V, LDV, ONE, C( 1, N-L+1 ), LDC )
+         DO 100 J = 1, L
+            CALL CLACGV( K, V( 1, J ), 1 )
+  100    CONTINUE
+*
+      END IF
+*
+      RETURN
+*
+*     End of CLARZB
+*
+      END
diff --git a/costa/native/external/lapack/clarzt.f b/costa/native/external/lapack/clarzt.f
new file mode 100644
index 000000000..0a35c6f99
--- /dev/null
+++ b/costa/native/external/lapack/clarzt.f
@@ -0,0 +1,187 @@
+      SUBROUTINE CLARZT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIRECT, STOREV
+      INTEGER            K, LDT, LDV, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            T( LDT, * ), TAU( * ), V( LDV, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CLARZT forms the triangular factor T of a complex block reflector
+*  H of order > n, which is defined as a product of k elementary
+*  reflectors.
+*
+*  If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular;
+*
+*  If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular.
+*
+*  If STOREV = 'C', the vector which defines the elementary reflector
+*  H(i) is stored in the i-th column of the array V, and
+*
+*     H  =  I - V * T * V'
+*
+*  If STOREV = 'R', the vector which defines the elementary reflector
+*  H(i) is stored in the i-th row of the array V, and
+*
+*     H  =  I - V' * T * V
+*
+*  Currently, only STOREV = 'R' and DIRECT = 'B' are supported.
+*
+*  Arguments
+*  =========
+*
+*  DIRECT  (input) CHARACTER*1
+*          Specifies the order in which the elementary reflectors are
+*          multiplied to form the block reflector:
+*          = 'F': H = H(1) H(2) . . . H(k) (Forward, not supported yet)
+*          = 'B': H = H(k) . . . H(2) H(1) (Backward)
+*
+*  STOREV  (input) CHARACTER*1
+*          Specifies how the vectors which define the elementary
+*          reflectors are stored (see also Further Details):
+*          = 'C': columnwise                        (not supported yet)
+*          = 'R': rowwise
+*
+*  N       (input) INTEGER
+*          The order of the block reflector H. N >= 0.
+*
+*  K       (input) INTEGER
+*          The order of the triangular factor T (= the number of
+*          elementary reflectors). K >= 1.
+*
+*  V       (input/output) COMPLEX array, dimension
+*                               (LDV,K) if STOREV = 'C'
+*                               (LDV,N) if STOREV = 'R'
+*          The matrix V. See further details.
+*
+*  LDV     (input) INTEGER
+*          The leading dimension of the array V.
+*          If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K.
+*
+*  TAU     (input) COMPLEX array, dimension (K)
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i).
+*
+*  T       (output) COMPLEX array, dimension (LDT,K)
+*          The k by k triangular factor T of the block reflector.
+*          If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is
+*          lower triangular. The rest of the array is not used.
+*
+*  LDT     (input) INTEGER
+*          The leading dimension of the array T. LDT >= K.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*    A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA
+*
+*  The shape of the matrix V and the storage of the vectors which define
+*  the H(i) is best illustrated by the following example with n = 5 and
+*  k = 3. The elements equal to 1 are not stored; the corresponding
+*  array elements are modified but restored on exit. The rest of the
+*  array is not used.
+*
+*  DIRECT = 'F' and STOREV = 'C':         DIRECT = 'F' and STOREV = 'R':
+*
+*                                              ______V_____
+*         ( v1 v2 v3 )                        /            \
+*         ( v1 v2 v3 )                      ( v1 v1 v1 v1 v1 . . . . 1 )
+*     V = ( v1 v2 v3 )                      ( v2 v2 v2 v2 v2 . . . 1   )
+*         ( v1 v2 v3 )                      ( v3 v3 v3 v3 v3 . . 1     )
+*         ( v1 v2 v3 )
+*            .  .  .
+*            .  .  .
+*            1  .  .
+*               1  .
+*                  1
+*
+*  DIRECT = 'B' and STOREV = 'C':         DIRECT = 'B' and STOREV = 'R':
+*
+*                                                        ______V_____
+*            1                                          /            \
+*            .  1                           ( 1 . . . . v1 v1 v1 v1 v1 )
+*            .  .  1                        ( . 1 . . . v2 v2 v2 v2 v2 )
+*            .  .  .                        ( . . 1 . . v3 v3 v3 v3 v3 )
+*            .  .  .
+*         ( v1 v2 v3 )
+*         ( v1 v2 v3 )
+*     V = ( v1 v2 v3 )
+*         ( v1 v2 v3 )
+*         ( v1 v2 v3 )
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX            ZERO
+      PARAMETER          ( ZERO = ( 0.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, INFO, J
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CGEMV, CLACGV, CTRMV, XERBLA
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. Executable Statements ..
+*
+*     Check for currently supported options
+*
+      INFO = 0
+      IF( .NOT.LSAME( DIRECT, 'B' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.LSAME( STOREV, 'R' ) ) THEN
+         INFO = -2
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CLARZT', -INFO )
+         RETURN
+      END IF
+*
+      DO 20 I = K, 1, -1
+         IF( TAU( I ).EQ.ZERO ) THEN
+*
+*           H(i)  =  I
+*
+            DO 10 J = I, K
+               T( J, I ) = ZERO
+   10       CONTINUE
+         ELSE
+*
+*           general case
+*
+            IF( I.LT.K ) THEN
+*
+*              T(i+1:k,i) = - tau(i) * V(i+1:k,1:n) * V(i,1:n)'
+*
+               CALL CLACGV( N, V( I, 1 ), LDV )
+               CALL CGEMV( 'No transpose', K-I, N, -TAU( I ),
+     $                     V( I+1, 1 ), LDV, V( I, 1 ), LDV, ZERO,
+     $                     T( I+1, I ), 1 )
+               CALL CLACGV( N, V( I, 1 ), LDV )
+*
+*              T(i+1:k,i) = T(i+1:k,i+1:k) * T(i+1:k,i)
+*
+               CALL CTRMV( 'Lower', 'No transpose', 'Non-unit', K-I,
+     $                     T( I+1, I+1 ), LDT, T( I+1, I ), 1 )
+            END IF
+            T( I, I ) = TAU( I )
+         END IF
+   20 CONTINUE
+      RETURN
+*
+*     End of CLARZT
+*
+      END
diff --git a/costa/native/external/lapack/clascl.f b/costa/native/external/lapack/clascl.f
new file mode 100644
index 000000000..2fc056376
--- /dev/null
+++ b/costa/native/external/lapack/clascl.f
@@ -0,0 +1,268 @@
+      SUBROUTINE CLASCL( TYPE, KL, KU, CFROM, CTO, M, N, A, LDA, INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          TYPE
+      INTEGER            INFO, KL, KU, LDA, M, N
+      REAL               CFROM, CTO
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CLASCL multiplies the M by N complex matrix A by the real scalar
+*  CTO/CFROM.  This is done without over/underflow as long as the final
+*  result CTO*A(I,J)/CFROM does not over/underflow. TYPE specifies that
+*  A may be full, upper triangular, lower triangular, upper Hessenberg,
+*  or banded.
+*
+*  Arguments
+*  =========
+*
+*  TYPE    (input) CHARACTER*1
+*          TYPE indices the storage type of the input matrix.
+*          = 'G':  A is a full matrix.
+*          = 'L':  A is a lower triangular matrix.
+*          = 'U':  A is an upper triangular matrix.
+*          = 'H':  A is an upper Hessenberg matrix.
+*          = 'B':  A is a symmetric band matrix with lower bandwidth KL
+*                  and upper bandwidth KU and with the only the lower
+*                  half stored.
+*          = 'Q':  A is a symmetric band matrix with lower bandwidth KL
+*                  and upper bandwidth KU and with the only the upper
+*                  half stored.
+*          = 'Z':  A is a band matrix with lower bandwidth KL and upper
+*                  bandwidth KU.
+*
+*  KL      (input) INTEGER
+*          The lower bandwidth of A.  Referenced only if TYPE = 'B',
+*          'Q' or 'Z'.
+*
+*  KU      (input) INTEGER
+*          The upper bandwidth of A.  Referenced only if TYPE = 'B',
+*          'Q' or 'Z'.
+*
+*  CFROM   (input) REAL
+*  CTO     (input) REAL
+*          The matrix A is multiplied by CTO/CFROM. A(I,J) is computed
+*          without over/underflow if the final result CTO*A(I,J)/CFROM
+*          can be represented without over/underflow.  CFROM must be
+*          nonzero.
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,M)
+*          The matrix to be multiplied by CTO/CFROM.  See TYPE for the
+*          storage type.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  INFO    (output) INTEGER
+*          0  - successful exit
+*          <0 - if INFO = -i, the i-th argument had an illegal value.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E0, ONE = 1.0E0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            DONE
+      INTEGER            I, ITYPE, J, K1, K2, K3, K4
+      REAL               BIGNUM, CFROM1, CFROMC, CTO1, CTOC, MUL, SMLNUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               SLAMCH
+      EXTERNAL           LSAME, SLAMCH
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+*
+      IF( LSAME( TYPE, 'G' ) ) THEN
+         ITYPE = 0
+      ELSE IF( LSAME( TYPE, 'L' ) ) THEN
+         ITYPE = 1
+      ELSE IF( LSAME( TYPE, 'U' ) ) THEN
+         ITYPE = 2
+      ELSE IF( LSAME( TYPE, 'H' ) ) THEN
+         ITYPE = 3
+      ELSE IF( LSAME( TYPE, 'B' ) ) THEN
+         ITYPE = 4
+      ELSE IF( LSAME( TYPE, 'Q' ) ) THEN
+         ITYPE = 5
+      ELSE IF( LSAME( TYPE, 'Z' ) ) THEN
+         ITYPE = 6
+      ELSE
+         ITYPE = -1
+      END IF
+*
+      IF( ITYPE.EQ.-1 ) THEN
+         INFO = -1
+      ELSE IF( CFROM.EQ.ZERO ) THEN
+         INFO = -4
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -6
+      ELSE IF( N.LT.0 .OR. ( ITYPE.EQ.4 .AND. N.NE.M ) .OR.
+     $         ( ITYPE.EQ.5 .AND. N.NE.M ) ) THEN
+         INFO = -7
+      ELSE IF( ITYPE.LE.3 .AND. LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -9
+      ELSE IF( ITYPE.GE.4 ) THEN
+         IF( KL.LT.0 .OR. KL.GT.MAX( M-1, 0 ) ) THEN
+            INFO = -2
+         ELSE IF( KU.LT.0 .OR. KU.GT.MAX( N-1, 0 ) .OR.
+     $            ( ( ITYPE.EQ.4 .OR. ITYPE.EQ.5 ) .AND. KL.NE.KU ) )
+     $             THEN
+            INFO = -3
+         ELSE IF( ( ITYPE.EQ.4 .AND. LDA.LT.KL+1 ) .OR.
+     $            ( ITYPE.EQ.5 .AND. LDA.LT.KU+1 ) .OR.
+     $            ( ITYPE.EQ.6 .AND. LDA.LT.2*KL+KU+1 ) ) THEN
+            INFO = -9
+         END IF
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CLASCL', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. M.EQ.0 )
+     $   RETURN
+*
+*     Get machine parameters
+*
+      SMLNUM = SLAMCH( 'S' )
+      BIGNUM = ONE / SMLNUM
+*
+      CFROMC = CFROM
+      CTOC = CTO
+*
+   10 CONTINUE
+      CFROM1 = CFROMC*SMLNUM
+      CTO1 = CTOC / BIGNUM
+      IF( ABS( CFROM1 ).GT.ABS( CTOC ) .AND. CTOC.NE.ZERO ) THEN
+         MUL = SMLNUM
+         DONE = .FALSE.
+         CFROMC = CFROM1
+      ELSE IF( ABS( CTO1 ).GT.ABS( CFROMC ) ) THEN
+         MUL = BIGNUM
+         DONE = .FALSE.
+         CTOC = CTO1
+      ELSE
+         MUL = CTOC / CFROMC
+         DONE = .TRUE.
+      END IF
+*
+      IF( ITYPE.EQ.0 ) THEN
+*
+*        Full matrix
+*
+         DO 30 J = 1, N
+            DO 20 I = 1, M
+               A( I, J ) = A( I, J )*MUL
+   20       CONTINUE
+   30    CONTINUE
+*
+      ELSE IF( ITYPE.EQ.1 ) THEN
+*
+*        Lower triangular matrix
+*
+         DO 50 J = 1, N
+            DO 40 I = J, M
+               A( I, J ) = A( I, J )*MUL
+   40       CONTINUE
+   50    CONTINUE
+*
+      ELSE IF( ITYPE.EQ.2 ) THEN
+*
+*        Upper triangular matrix
+*
+         DO 70 J = 1, N
+            DO 60 I = 1, MIN( J, M )
+               A( I, J ) = A( I, J )*MUL
+   60       CONTINUE
+   70    CONTINUE
+*
+      ELSE IF( ITYPE.EQ.3 ) THEN
+*
+*        Upper Hessenberg matrix
+*
+         DO 90 J = 1, N
+            DO 80 I = 1, MIN( J+1, M )
+               A( I, J ) = A( I, J )*MUL
+   80       CONTINUE
+   90    CONTINUE
+*
+      ELSE IF( ITYPE.EQ.4 ) THEN
+*
+*        Lower half of a symmetric band matrix
+*
+         K3 = KL + 1
+         K4 = N + 1
+         DO 110 J = 1, N
+            DO 100 I = 1, MIN( K3, K4-J )
+               A( I, J ) = A( I, J )*MUL
+  100       CONTINUE
+  110    CONTINUE
+*
+      ELSE IF( ITYPE.EQ.5 ) THEN
+*
+*        Upper half of a symmetric band matrix
+*
+         K1 = KU + 2
+         K3 = KU + 1
+         DO 130 J = 1, N
+            DO 120 I = MAX( K1-J, 1 ), K3
+               A( I, J ) = A( I, J )*MUL
+  120       CONTINUE
+  130    CONTINUE
+*
+      ELSE IF( ITYPE.EQ.6 ) THEN
+*
+*        Band matrix
+*
+         K1 = KL + KU + 2
+         K2 = KL + 1
+         K3 = 2*KL + KU + 1
+         K4 = KL + KU + 1 + M
+         DO 150 J = 1, N
+            DO 140 I = MAX( K1-J, K2 ), MIN( K3, K4-J )
+               A( I, J ) = A( I, J )*MUL
+  140       CONTINUE
+  150    CONTINUE
+*
+      END IF
+*
+      IF( .NOT.DONE )
+     $   GO TO 10
+*
+      RETURN
+*
+*     End of CLASCL
+*
+      END
diff --git a/costa/native/external/lapack/claset.f b/costa/native/external/lapack/claset.f
new file mode 100644
index 000000000..f75a800a4
--- /dev/null
+++ b/costa/native/external/lapack/claset.f
@@ -0,0 +1,115 @@
+      SUBROUTINE CLASET( UPLO, M, N, ALPHA, BETA, A, LDA )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            LDA, M, N
+      COMPLEX            ALPHA, BETA
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CLASET initializes a 2-D array A to BETA on the diagonal and
+*  ALPHA on the offdiagonals.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies the part of the matrix A to be set.
+*          = 'U':      Upper triangular part is set. The lower triangle
+*                      is unchanged.
+*          = 'L':      Lower triangular part is set. The upper triangle
+*                      is unchanged.
+*          Otherwise:  All of the matrix A is set.
+*
+*  M       (input) INTEGER
+*          On entry, M specifies the number of rows of A.
+*
+*  N       (input) INTEGER
+*          On entry, N specifies the number of columns of A.
+*
+*  ALPHA   (input) COMPLEX
+*          All the offdiagonal array elements are set to ALPHA.
+*
+*  BETA    (input) COMPLEX
+*          All the diagonal array elements are set to BETA.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,N)
+*          On entry, the m by n matrix A.
+*          On exit, A(i,j) = ALPHA, 1 <= i <= m, 1 <= j <= n, i.ne.j;
+*                   A(i,i) = BETA , 1 <= i <= min(m,n)
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      INTEGER            I, J
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MIN
+*     ..
+*     .. Executable Statements ..
+*
+      IF( LSAME( UPLO, 'U' ) ) THEN
+*
+*        Set the diagonal to BETA and the strictly upper triangular
+*        part of the array to ALPHA.
+*
+         DO 20 J = 2, N
+            DO 10 I = 1, MIN( J-1, M )
+               A( I, J ) = ALPHA
+   10       CONTINUE
+   20    CONTINUE
+         DO 30 I = 1, MIN( N, M )
+            A( I, I ) = BETA
+   30    CONTINUE
+*
+      ELSE IF( LSAME( UPLO, 'L' ) ) THEN
+*
+*        Set the diagonal to BETA and the strictly lower triangular
+*        part of the array to ALPHA.
+*
+         DO 50 J = 1, MIN( M, N )
+            DO 40 I = J + 1, M
+               A( I, J ) = ALPHA
+   40       CONTINUE
+   50    CONTINUE
+         DO 60 I = 1, MIN( N, M )
+            A( I, I ) = BETA
+   60    CONTINUE
+*
+      ELSE
+*
+*        Set the array to BETA on the diagonal and ALPHA on the
+*        offdiagonal.
+*
+         DO 80 J = 1, N
+            DO 70 I = 1, M
+               A( I, J ) = ALPHA
+   70       CONTINUE
+   80    CONTINUE
+         DO 90 I = 1, MIN( M, N )
+            A( I, I ) = BETA
+   90    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of CLASET
+*
+      END
diff --git a/costa/native/external/lapack/clasr.f b/costa/native/external/lapack/clasr.f
new file mode 100644
index 000000000..eb2103eb0
--- /dev/null
+++ b/costa/native/external/lapack/clasr.f
@@ -0,0 +1,325 @@
+      SUBROUTINE CLASR( SIDE, PIVOT, DIRECT, M, N, C, S, A, LDA )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIRECT, PIVOT, SIDE
+      INTEGER            LDA, M, N
+*     ..
+*     .. Array Arguments ..
+      REAL               C( * ), S( * )
+      COMPLEX            A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CLASR   performs the transformation
+*
+*     A := P*A,   when SIDE = 'L' or 'l'  (  Left-hand side )
+*
+*     A := A*P',  when SIDE = 'R' or 'r'  ( Right-hand side )
+*
+*  where A is an m by n complex matrix and P is an orthogonal matrix,
+*  consisting of a sequence of plane rotations determined by the
+*  parameters PIVOT and DIRECT as follows ( z = m when SIDE = 'L' or 'l'
+*  and z = n when SIDE = 'R' or 'r' ):
+*
+*  When  DIRECT = 'F' or 'f'  ( Forward sequence ) then
+*
+*     P = P( z - 1 )*...*P( 2 )*P( 1 ),
+*
+*  and when DIRECT = 'B' or 'b'  ( Backward sequence ) then
+*
+*     P = P( 1 )*P( 2 )*...*P( z - 1 ),
+*
+*  where  P( k ) is a plane rotation matrix for the following planes:
+*
+*     when  PIVOT = 'V' or 'v'  ( Variable pivot ),
+*        the plane ( k, k + 1 )
+*
+*     when  PIVOT = 'T' or 't'  ( Top pivot ),
+*        the plane ( 1, k + 1 )
+*
+*     when  PIVOT = 'B' or 'b'  ( Bottom pivot ),
+*        the plane ( k, z )
+*
+*  c( k ) and s( k )  must contain the  cosine and sine that define the
+*  matrix  P( k ).  The two by two plane rotation part of the matrix
+*  P( k ), R( k ), is assumed to be of the form
+*
+*     R( k ) = (  c( k )  s( k ) ).
+*              ( -s( k )  c( k ) )
+*
+*  Arguments
+*  =========
+*
+*  SIDE    (input) CHARACTER*1
+*          Specifies whether the plane rotation matrix P is applied to
+*          A on the left or the right.
+*          = 'L':  Left, compute A := P*A
+*          = 'R':  Right, compute A:= A*P'
+*
+*  DIRECT  (input) CHARACTER*1
+*          Specifies whether P is a forward or backward sequence of
+*          plane rotations.
+*          = 'F':  Forward, P = P( z - 1 )*...*P( 2 )*P( 1 )
+*          = 'B':  Backward, P = P( 1 )*P( 2 )*...*P( z - 1 )
+*
+*  PIVOT   (input) CHARACTER*1
+*          Specifies the plane for which P(k) is a plane rotation
+*          matrix.
+*          = 'V':  Variable pivot, the plane (k,k+1)
+*          = 'T':  Top pivot, the plane (1,k+1)
+*          = 'B':  Bottom pivot, the plane (k,z)
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  If m <= 1, an immediate
+*          return is effected.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  If n <= 1, an
+*          immediate return is effected.
+*
+*  C, S    (input) REAL arrays, dimension
+*                  (M-1) if SIDE = 'L'
+*                  (N-1) if SIDE = 'R'
+*          c(k) and s(k) contain the cosine and sine that define the
+*          matrix P(k).  The two by two plane rotation part of the
+*          matrix P(k), R(k), is assumed to be of the form
+*          R( k ) = (  c( k )  s( k ) ).
+*                   ( -s( k )  c( k ) )
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,N)
+*          The m by n matrix A.  On exit, A is overwritten by P*A if
+*          SIDE = 'R' or by A*P' if SIDE = 'L'.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, INFO, J
+      REAL               CTEMP, STEMP
+      COMPLEX            TEMP
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters
+*
+      INFO = 0
+      IF( .NOT.( LSAME( SIDE, 'L' ) .OR. LSAME( SIDE, 'R' ) ) ) THEN
+         INFO = 1
+      ELSE IF( .NOT.( LSAME( PIVOT, 'V' ) .OR. LSAME( PIVOT,
+     $         'T' ) .OR. LSAME( PIVOT, 'B' ) ) ) THEN
+         INFO = 2
+      ELSE IF( .NOT.( LSAME( DIRECT, 'F' ) .OR. LSAME( DIRECT, 'B' ) ) )
+     $          THEN
+         INFO = 3
+      ELSE IF( M.LT.0 ) THEN
+         INFO = 4
+      ELSE IF( N.LT.0 ) THEN
+         INFO = 5
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = 9
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CLASR ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( ( M.EQ.0 ) .OR. ( N.EQ.0 ) )
+     $   RETURN
+      IF( LSAME( SIDE, 'L' ) ) THEN
+*
+*        Form  P * A
+*
+         IF( LSAME( PIVOT, 'V' ) ) THEN
+            IF( LSAME( DIRECT, 'F' ) ) THEN
+               DO 20 J = 1, M - 1
+                  CTEMP = C( J )
+                  STEMP = S( J )
+                  IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN
+                     DO 10 I = 1, N
+                        TEMP = A( J+1, I )
+                        A( J+1, I ) = CTEMP*TEMP - STEMP*A( J, I )
+                        A( J, I ) = STEMP*TEMP + CTEMP*A( J, I )
+   10                CONTINUE
+                  END IF
+   20          CONTINUE
+            ELSE IF( LSAME( DIRECT, 'B' ) ) THEN
+               DO 40 J = M - 1, 1, -1
+                  CTEMP = C( J )
+                  STEMP = S( J )
+                  IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN
+                     DO 30 I = 1, N
+                        TEMP = A( J+1, I )
+                        A( J+1, I ) = CTEMP*TEMP - STEMP*A( J, I )
+                        A( J, I ) = STEMP*TEMP + CTEMP*A( J, I )
+   30                CONTINUE
+                  END IF
+   40          CONTINUE
+            END IF
+         ELSE IF( LSAME( PIVOT, 'T' ) ) THEN
+            IF( LSAME( DIRECT, 'F' ) ) THEN
+               DO 60 J = 2, M
+                  CTEMP = C( J-1 )
+                  STEMP = S( J-1 )
+                  IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN
+                     DO 50 I = 1, N
+                        TEMP = A( J, I )
+                        A( J, I ) = CTEMP*TEMP - STEMP*A( 1, I )
+                        A( 1, I ) = STEMP*TEMP + CTEMP*A( 1, I )
+   50                CONTINUE
+                  END IF
+   60          CONTINUE
+            ELSE IF( LSAME( DIRECT, 'B' ) ) THEN
+               DO 80 J = M, 2, -1
+                  CTEMP = C( J-1 )
+                  STEMP = S( J-1 )
+                  IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN
+                     DO 70 I = 1, N
+                        TEMP = A( J, I )
+                        A( J, I ) = CTEMP*TEMP - STEMP*A( 1, I )
+                        A( 1, I ) = STEMP*TEMP + CTEMP*A( 1, I )
+   70                CONTINUE
+                  END IF
+   80          CONTINUE
+            END IF
+         ELSE IF( LSAME( PIVOT, 'B' ) ) THEN
+            IF( LSAME( DIRECT, 'F' ) ) THEN
+               DO 100 J = 1, M - 1
+                  CTEMP = C( J )
+                  STEMP = S( J )
+                  IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN
+                     DO 90 I = 1, N
+                        TEMP = A( J, I )
+                        A( J, I ) = STEMP*A( M, I ) + CTEMP*TEMP
+                        A( M, I ) = CTEMP*A( M, I ) - STEMP*TEMP
+   90                CONTINUE
+                  END IF
+  100          CONTINUE
+            ELSE IF( LSAME( DIRECT, 'B' ) ) THEN
+               DO 120 J = M - 1, 1, -1
+                  CTEMP = C( J )
+                  STEMP = S( J )
+                  IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN
+                     DO 110 I = 1, N
+                        TEMP = A( J, I )
+                        A( J, I ) = STEMP*A( M, I ) + CTEMP*TEMP
+                        A( M, I ) = CTEMP*A( M, I ) - STEMP*TEMP
+  110                CONTINUE
+                  END IF
+  120          CONTINUE
+            END IF
+         END IF
+      ELSE IF( LSAME( SIDE, 'R' ) ) THEN
+*
+*        Form A * P'
+*
+         IF( LSAME( PIVOT, 'V' ) ) THEN
+            IF( LSAME( DIRECT, 'F' ) ) THEN
+               DO 140 J = 1, N - 1
+                  CTEMP = C( J )
+                  STEMP = S( J )
+                  IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN
+                     DO 130 I = 1, M
+                        TEMP = A( I, J+1 )
+                        A( I, J+1 ) = CTEMP*TEMP - STEMP*A( I, J )
+                        A( I, J ) = STEMP*TEMP + CTEMP*A( I, J )
+  130                CONTINUE
+                  END IF
+  140          CONTINUE
+            ELSE IF( LSAME( DIRECT, 'B' ) ) THEN
+               DO 160 J = N - 1, 1, -1
+                  CTEMP = C( J )
+                  STEMP = S( J )
+                  IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN
+                     DO 150 I = 1, M
+                        TEMP = A( I, J+1 )
+                        A( I, J+1 ) = CTEMP*TEMP - STEMP*A( I, J )
+                        A( I, J ) = STEMP*TEMP + CTEMP*A( I, J )
+  150                CONTINUE
+                  END IF
+  160          CONTINUE
+            END IF
+         ELSE IF( LSAME( PIVOT, 'T' ) ) THEN
+            IF( LSAME( DIRECT, 'F' ) ) THEN
+               DO 180 J = 2, N
+                  CTEMP = C( J-1 )
+                  STEMP = S( J-1 )
+                  IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN
+                     DO 170 I = 1, M
+                        TEMP = A( I, J )
+                        A( I, J ) = CTEMP*TEMP - STEMP*A( I, 1 )
+                        A( I, 1 ) = STEMP*TEMP + CTEMP*A( I, 1 )
+  170                CONTINUE
+                  END IF
+  180          CONTINUE
+            ELSE IF( LSAME( DIRECT, 'B' ) ) THEN
+               DO 200 J = N, 2, -1
+                  CTEMP = C( J-1 )
+                  STEMP = S( J-1 )
+                  IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN
+                     DO 190 I = 1, M
+                        TEMP = A( I, J )
+                        A( I, J ) = CTEMP*TEMP - STEMP*A( I, 1 )
+                        A( I, 1 ) = STEMP*TEMP + CTEMP*A( I, 1 )
+  190                CONTINUE
+                  END IF
+  200          CONTINUE
+            END IF
+         ELSE IF( LSAME( PIVOT, 'B' ) ) THEN
+            IF( LSAME( DIRECT, 'F' ) ) THEN
+               DO 220 J = 1, N - 1
+                  CTEMP = C( J )
+                  STEMP = S( J )
+                  IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN
+                     DO 210 I = 1, M
+                        TEMP = A( I, J )
+                        A( I, J ) = STEMP*A( I, N ) + CTEMP*TEMP
+                        A( I, N ) = CTEMP*A( I, N ) - STEMP*TEMP
+  210                CONTINUE
+                  END IF
+  220          CONTINUE
+            ELSE IF( LSAME( DIRECT, 'B' ) ) THEN
+               DO 240 J = N - 1, 1, -1
+                  CTEMP = C( J )
+                  STEMP = S( J )
+                  IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN
+                     DO 230 I = 1, M
+                        TEMP = A( I, J )
+                        A( I, J ) = STEMP*A( I, N ) + CTEMP*TEMP
+                        A( I, N ) = CTEMP*A( I, N ) - STEMP*TEMP
+  230                CONTINUE
+                  END IF
+  240          CONTINUE
+            END IF
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of CLASR
+*
+      END
diff --git a/costa/native/external/lapack/classq.f b/costa/native/external/lapack/classq.f
new file mode 100644
index 000000000..7adcd74b5
--- /dev/null
+++ b/costa/native/external/lapack/classq.f
@@ -0,0 +1,102 @@
+      SUBROUTINE CLASSQ( N, X, INCX, SCALE, SUMSQ )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INCX, N
+      REAL               SCALE, SUMSQ
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CLASSQ returns the values scl and ssq such that
+*
+*     ( scl**2 )*ssq = x( 1 )**2 +...+ x( n )**2 + ( scale**2 )*sumsq,
+*
+*  where x( i ) = abs( X( 1 + ( i - 1 )*INCX ) ). The value of sumsq is
+*  assumed to be at least unity and the value of ssq will then satisfy
+*
+*     1.0 .le. ssq .le. ( sumsq + 2*n ).
+*
+*  scale is assumed to be non-negative and scl returns the value
+*
+*     scl = max( scale, abs( real( x( i ) ) ), abs( aimag( x( i ) ) ) ),
+*            i
+*
+*  scale and sumsq must be supplied in SCALE and SUMSQ respectively.
+*  SCALE and SUMSQ are overwritten by scl and ssq respectively.
+*
+*  The routine makes only one pass through the vector X.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The number of elements to be used from the vector X.
+*
+*  X       (input) COMPLEX array, dimension (N)
+*          The vector x as described above.
+*             x( i )  = X( 1 + ( i - 1 )*INCX ), 1 <= i <= n.
+*
+*  INCX    (input) INTEGER
+*          The increment between successive values of the vector X.
+*          INCX > 0.
+*
+*  SCALE   (input/output) REAL
+*          On entry, the value  scale  in the equation above.
+*          On exit, SCALE is overwritten with the value  scl .
+*
+*  SUMSQ   (input/output) REAL
+*          On entry, the value  sumsq  in the equation above.
+*          On exit, SUMSQ is overwritten with the value  ssq .
+*
+* =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO
+      PARAMETER          ( ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            IX
+      REAL               TEMP1
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, AIMAG, REAL
+*     ..
+*     .. Executable Statements ..
+*
+      IF( N.GT.0 ) THEN
+         DO 10 IX = 1, 1 + ( N-1 )*INCX, INCX
+            IF( REAL( X( IX ) ).NE.ZERO ) THEN
+               TEMP1 = ABS( REAL( X( IX ) ) )
+               IF( SCALE.LT.TEMP1 ) THEN
+                  SUMSQ = 1 + SUMSQ*( SCALE / TEMP1 )**2
+                  SCALE = TEMP1
+               ELSE
+                  SUMSQ = SUMSQ + ( TEMP1 / SCALE )**2
+               END IF
+            END IF
+            IF( AIMAG( X( IX ) ).NE.ZERO ) THEN
+               TEMP1 = ABS( AIMAG( X( IX ) ) )
+               IF( SCALE.LT.TEMP1 ) THEN
+                  SUMSQ = 1 + SUMSQ*( SCALE / TEMP1 )**2
+                  SCALE = TEMP1
+               ELSE
+                  SUMSQ = SUMSQ + ( TEMP1 / SCALE )**2
+               END IF
+            END IF
+   10    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of CLASSQ
+*
+      END
diff --git a/costa/native/external/lapack/claswp.f b/costa/native/external/lapack/claswp.f
new file mode 100644
index 000000000..f993179bd
--- /dev/null
+++ b/costa/native/external/lapack/claswp.f
@@ -0,0 +1,120 @@
+      SUBROUTINE CLASWP( N, A, LDA, K1, K2, IPIV, INCX )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INCX, K1, K2, LDA, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      COMPLEX            A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CLASWP performs a series of row interchanges on the matrix A.
+*  One row interchange is initiated for each of rows K1 through K2 of A.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,N)
+*          On entry, the matrix of column dimension N to which the row
+*          interchanges will be applied.
+*          On exit, the permuted matrix.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.
+*
+*  K1      (input) INTEGER
+*          The first element of IPIV for which a row interchange will
+*          be done.
+*
+*  K2      (input) INTEGER
+*          The last element of IPIV for which a row interchange will
+*          be done.
+*
+*  IPIV    (input) INTEGER array, dimension (M*abs(INCX))
+*          The vector of pivot indices.  Only the elements in positions
+*          K1 through K2 of IPIV are accessed.
+*          IPIV(K) = L implies rows K and L are to be interchanged.
+*
+*  INCX    (input) INTEGER
+*          The increment between successive values of IPIV.  If IPIV
+*          is negative, the pivots are applied in reverse order.
+*
+*  Further Details
+*  ===============
+*
+*  Modified by
+*   R. C. Whaley, Computer Science Dept., Univ. of Tenn., Knoxville, USA
+*
+* =====================================================================
+*
+*     .. Local Scalars ..
+      INTEGER            I, I1, I2, INC, IP, IX, IX0, J, K, N32
+      COMPLEX            TEMP
+*     ..
+*     .. Executable Statements ..
+*
+*     Interchange row I with row IPIV(I) for each of rows K1 through K2.
+*
+      IF( INCX.GT.0 ) THEN
+         IX0 = K1
+         I1 = K1
+         I2 = K2
+         INC = 1
+      ELSE IF( INCX.LT.0 ) THEN
+         IX0 = 1 + ( 1-K2 )*INCX
+         I1 = K2
+         I2 = K1
+         INC = -1
+      ELSE
+         RETURN
+      END IF
+*
+      N32 = ( N / 32 )*32
+      IF( N32.NE.0 ) THEN
+         DO 30 J = 1, N32, 32
+            IX = IX0
+            DO 20 I = I1, I2, INC
+               IP = IPIV( IX )
+               IF( IP.NE.I ) THEN
+                  DO 10 K = J, J + 31
+                     TEMP = A( I, K )
+                     A( I, K ) = A( IP, K )
+                     A( IP, K ) = TEMP
+   10             CONTINUE
+               END IF
+               IX = IX + INCX
+   20       CONTINUE
+   30    CONTINUE
+      END IF
+      IF( N32.NE.N ) THEN
+         N32 = N32 + 1
+         IX = IX0
+         DO 50 I = I1, I2, INC
+            IP = IPIV( IX )
+            IF( IP.NE.I ) THEN
+               DO 40 K = N32, N
+                  TEMP = A( I, K )
+                  A( I, K ) = A( IP, K )
+                  A( IP, K ) = TEMP
+   40          CONTINUE
+            END IF
+            IX = IX + INCX
+   50    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of CLASWP
+*
+      END
diff --git a/costa/native/external/lapack/clasyf.f b/costa/native/external/lapack/clasyf.f
new file mode 100644
index 000000000..c35fa8bee
--- /dev/null
+++ b/costa/native/external/lapack/clasyf.f
@@ -0,0 +1,598 @@
+      SUBROUTINE CLASYF( UPLO, N, NB, KB, A, LDA, IPIV, W, LDW, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, KB, LDA, LDW, N, NB
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      COMPLEX            A( LDA, * ), W( LDW, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CLASYF computes a partial factorization of a complex symmetric matrix
+*  A using the Bunch-Kaufman diagonal pivoting method. The partial
+*  factorization has the form:
+*
+*  A  =  ( I  U12 ) ( A11  0  ) (  I    0   )  if UPLO = 'U', or:
+*        ( 0  U22 ) (  0   D  ) ( U12' U22' )
+*
+*  A  =  ( L11  0 ) ( D    0  ) ( L11' L21' )  if UPLO = 'L'
+*        ( L21  I ) ( 0   A22 ) (  0    I   )
+*
+*  where the order of D is at most NB. The actual order is returned in
+*  the argument KB, and is either NB or NB-1, or N if N <= NB.
+*  Note that U' denotes the transpose of U.
+*
+*  CLASYF is an auxiliary routine called by CSYTRF. It uses blocked code
+*  (calling Level 3 BLAS) to update the submatrix A11 (if UPLO = 'U') or
+*  A22 (if UPLO = 'L').
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the upper or lower triangular part of the
+*          symmetric matrix A is stored:
+*          = 'U':  Upper triangular
+*          = 'L':  Lower triangular
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NB      (input) INTEGER
+*          The maximum number of columns of the matrix A that should be
+*          factored.  NB should be at least 2 to allow for 2-by-2 pivot
+*          blocks.
+*
+*  KB      (output) INTEGER
+*          The number of columns of A that were actually factored.
+*          KB is either NB-1 or NB, or N if N <= NB.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,N)
+*          On entry, the symmetric matrix A.  If UPLO = 'U', the leading
+*          n-by-n upper triangular part of A contains the upper
+*          triangular part of the matrix A, and the strictly lower
+*          triangular part of A is not referenced.  If UPLO = 'L', the
+*          leading n-by-n lower triangular part of A contains the lower
+*          triangular part of the matrix A, and the strictly upper
+*          triangular part of A is not referenced.
+*          On exit, A contains details of the partial factorization.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  IPIV    (output) INTEGER array, dimension (N)
+*          Details of the interchanges and the block structure of D.
+*          If UPLO = 'U', only the last KB elements of IPIV are set;
+*          if UPLO = 'L', only the first KB elements are set.
+*
+*          If IPIV(k) > 0, then rows and columns k and IPIV(k) were
+*          interchanged and D(k,k) is a 1-by-1 diagonal block.
+*          If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and
+*          columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k)
+*          is a 2-by-2 diagonal block.  If UPLO = 'L' and IPIV(k) =
+*          IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were
+*          interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block.
+*
+*  W       (workspace) COMPLEX array, dimension (LDW,NB)
+*
+*  LDW     (input) INTEGER
+*          The leading dimension of the array W.  LDW >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          > 0: if INFO = k, D(k,k) is exactly zero.  The factorization
+*               has been completed, but the block diagonal matrix D is
+*               exactly singular.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+      REAL               EIGHT, SEVTEN
+      PARAMETER          ( EIGHT = 8.0E+0, SEVTEN = 17.0E+0 )
+      COMPLEX            CONE
+      PARAMETER          ( CONE = ( 1.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            IMAX, J, JB, JJ, JMAX, JP, K, KK, KKW, KP,
+     $                   KSTEP, KW
+      REAL               ABSAKK, ALPHA, COLMAX, ROWMAX
+      COMPLEX            D11, D21, D22, R1, T, Z
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ICAMAX
+      EXTERNAL           LSAME, ICAMAX
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CCOPY, CGEMM, CGEMV, CSCAL, CSWAP
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, AIMAG, MAX, MIN, REAL, SQRT
+*     ..
+*     .. Statement Functions ..
+      REAL               CABS1
+*     ..
+*     .. Statement Function definitions ..
+      CABS1( Z ) = ABS( REAL( Z ) ) + ABS( AIMAG( Z ) )
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+*
+*     Initialize ALPHA for use in choosing pivot block size.
+*
+      ALPHA = ( ONE+SQRT( SEVTEN ) ) / EIGHT
+*
+      IF( LSAME( UPLO, 'U' ) ) THEN
+*
+*        Factorize the trailing columns of A using the upper triangle
+*        of A and working backwards, and compute the matrix W = U12*D
+*        for use in updating A11
+*
+*        K is the main loop index, decreasing from N in steps of 1 or 2
+*
+*        KW is the column of W which corresponds to column K of A
+*
+         K = N
+   10    CONTINUE
+         KW = NB + K - N
+*
+*        Exit from loop
+*
+         IF( ( K.LE.N-NB+1 .AND. NB.LT.N ) .OR. K.LT.1 )
+     $      GO TO 30
+*
+*        Copy column K of A to column KW of W and update it
+*
+         CALL CCOPY( K, A( 1, K ), 1, W( 1, KW ), 1 )
+         IF( K.LT.N )
+     $      CALL CGEMV( 'No transpose', K, N-K, -CONE, A( 1, K+1 ), LDA,
+     $                  W( K, KW+1 ), LDW, CONE, W( 1, KW ), 1 )
+*
+         KSTEP = 1
+*
+*        Determine rows and columns to be interchanged and whether
+*        a 1-by-1 or 2-by-2 pivot block will be used
+*
+         ABSAKK = CABS1( W( K, KW ) )
+*
+*        IMAX is the row-index of the largest off-diagonal element in
+*        column K, and COLMAX is its absolute value
+*
+         IF( K.GT.1 ) THEN
+            IMAX = ICAMAX( K-1, W( 1, KW ), 1 )
+            COLMAX = CABS1( W( IMAX, KW ) )
+         ELSE
+            COLMAX = ZERO
+         END IF
+*
+         IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN
+*
+*           Column K is zero: set INFO and continue
+*
+            IF( INFO.EQ.0 )
+     $         INFO = K
+            KP = K
+         ELSE
+            IF( ABSAKK.GE.ALPHA*COLMAX ) THEN
+*
+*              no interchange, use 1-by-1 pivot block
+*
+               KP = K
+            ELSE
+*
+*              Copy column IMAX to column KW-1 of W and update it
+*
+               CALL CCOPY( IMAX, A( 1, IMAX ), 1, W( 1, KW-1 ), 1 )
+               CALL CCOPY( K-IMAX, A( IMAX, IMAX+1 ), LDA,
+     $                     W( IMAX+1, KW-1 ), 1 )
+               IF( K.LT.N )
+     $            CALL CGEMV( 'No transpose', K, N-K, -CONE,
+     $                        A( 1, K+1 ), LDA, W( IMAX, KW+1 ), LDW,
+     $                        CONE, W( 1, KW-1 ), 1 )
+*
+*              JMAX is the column-index of the largest off-diagonal
+*              element in row IMAX, and ROWMAX is its absolute value
+*
+               JMAX = IMAX + ICAMAX( K-IMAX, W( IMAX+1, KW-1 ), 1 )
+               ROWMAX = CABS1( W( JMAX, KW-1 ) )
+               IF( IMAX.GT.1 ) THEN
+                  JMAX = ICAMAX( IMAX-1, W( 1, KW-1 ), 1 )
+                  ROWMAX = MAX( ROWMAX, CABS1( W( JMAX, KW-1 ) ) )
+               END IF
+*
+               IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN
+*
+*                 no interchange, use 1-by-1 pivot block
+*
+                  KP = K
+               ELSE IF( CABS1( W( IMAX, KW-1 ) ).GE.ALPHA*ROWMAX ) THEN
+*
+*                 interchange rows and columns K and IMAX, use 1-by-1
+*                 pivot block
+*
+                  KP = IMAX
+*
+*                 copy column KW-1 of W to column KW
+*
+                  CALL CCOPY( K, W( 1, KW-1 ), 1, W( 1, KW ), 1 )
+               ELSE
+*
+*                 interchange rows and columns K-1 and IMAX, use 2-by-2
+*                 pivot block
+*
+                  KP = IMAX
+                  KSTEP = 2
+               END IF
+            END IF
+*
+            KK = K - KSTEP + 1
+            KKW = NB + KK - N
+*
+*           Updated column KP is already stored in column KKW of W
+*
+            IF( KP.NE.KK ) THEN
+*
+*              Copy non-updated column KK to column KP
+*
+               A( KP, K ) = A( KK, K )
+               CALL CCOPY( K-1-KP, A( KP+1, KK ), 1, A( KP, KP+1 ),
+     $                     LDA )
+               CALL CCOPY( KP, A( 1, KK ), 1, A( 1, KP ), 1 )
+*
+*              Interchange rows KK and KP in last KK columns of A and W
+*
+               CALL CSWAP( N-KK+1, A( KK, KK ), LDA, A( KP, KK ), LDA )
+               CALL CSWAP( N-KK+1, W( KK, KKW ), LDW, W( KP, KKW ),
+     $                     LDW )
+            END IF
+*
+            IF( KSTEP.EQ.1 ) THEN
+*
+*              1-by-1 pivot block D(k): column KW of W now holds
+*
+*              W(k) = U(k)*D(k)
+*
+*              where U(k) is the k-th column of U
+*
+*              Store U(k) in column k of A
+*
+               CALL CCOPY( K, W( 1, KW ), 1, A( 1, K ), 1 )
+               R1 = CONE / A( K, K )
+               CALL CSCAL( K-1, R1, A( 1, K ), 1 )
+            ELSE
+*
+*              2-by-2 pivot block D(k): columns KW and KW-1 of W now
+*              hold
+*
+*              ( W(k-1) W(k) ) = ( U(k-1) U(k) )*D(k)
+*
+*              where U(k) and U(k-1) are the k-th and (k-1)-th columns
+*              of U
+*
+               IF( K.GT.2 ) THEN
+*
+*                 Store U(k) and U(k-1) in columns k and k-1 of A
+*
+                  D21 = W( K-1, KW )
+                  D11 = W( K, KW ) / D21
+                  D22 = W( K-1, KW-1 ) / D21
+                  T = CONE / ( D11*D22-CONE )
+                  D21 = T / D21
+                  DO 20 J = 1, K - 2
+                     A( J, K-1 ) = D21*( D11*W( J, KW-1 )-W( J, KW ) )
+                     A( J, K ) = D21*( D22*W( J, KW )-W( J, KW-1 ) )
+   20             CONTINUE
+               END IF
+*
+*              Copy D(k) to A
+*
+               A( K-1, K-1 ) = W( K-1, KW-1 )
+               A( K-1, K ) = W( K-1, KW )
+               A( K, K ) = W( K, KW )
+            END IF
+         END IF
+*
+*        Store details of the interchanges in IPIV
+*
+         IF( KSTEP.EQ.1 ) THEN
+            IPIV( K ) = KP
+         ELSE
+            IPIV( K ) = -KP
+            IPIV( K-1 ) = -KP
+         END IF
+*
+*        Decrease K and return to the start of the main loop
+*
+         K = K - KSTEP
+         GO TO 10
+*
+   30    CONTINUE
+*
+*        Update the upper triangle of A11 (= A(1:k,1:k)) as
+*
+*        A11 := A11 - U12*D*U12' = A11 - U12*W'
+*
+*        computing blocks of NB columns at a time
+*
+         DO 50 J = ( ( K-1 ) / NB )*NB + 1, 1, -NB
+            JB = MIN( NB, K-J+1 )
+*
+*           Update the upper triangle of the diagonal block
+*
+            DO 40 JJ = J, J + JB - 1
+               CALL CGEMV( 'No transpose', JJ-J+1, N-K, -CONE,
+     $                     A( J, K+1 ), LDA, W( JJ, KW+1 ), LDW, CONE,
+     $                     A( J, JJ ), 1 )
+   40       CONTINUE
+*
+*           Update the rectangular superdiagonal block
+*
+            CALL CGEMM( 'No transpose', 'Transpose', J-1, JB, N-K,
+     $                  -CONE, A( 1, K+1 ), LDA, W( J, KW+1 ), LDW,
+     $                  CONE, A( 1, J ), LDA )
+   50    CONTINUE
+*
+*        Put U12 in standard form by partially undoing the interchanges
+*        in columns k+1:n
+*
+         J = K + 1
+   60    CONTINUE
+         JJ = J
+         JP = IPIV( J )
+         IF( JP.LT.0 ) THEN
+            JP = -JP
+            J = J + 1
+         END IF
+         J = J + 1
+         IF( JP.NE.JJ .AND. J.LE.N )
+     $      CALL CSWAP( N-J+1, A( JP, J ), LDA, A( JJ, J ), LDA )
+         IF( J.LE.N )
+     $      GO TO 60
+*
+*        Set KB to the number of columns factorized
+*
+         KB = N - K
+*
+      ELSE
+*
+*        Factorize the leading columns of A using the lower triangle
+*        of A and working forwards, and compute the matrix W = L21*D
+*        for use in updating A22
+*
+*        K is the main loop index, increasing from 1 in steps of 1 or 2
+*
+         K = 1
+   70    CONTINUE
+*
+*        Exit from loop
+*
+         IF( ( K.GE.NB .AND. NB.LT.N ) .OR. K.GT.N )
+     $      GO TO 90
+*
+*        Copy column K of A to column K of W and update it
+*
+         CALL CCOPY( N-K+1, A( K, K ), 1, W( K, K ), 1 )
+         CALL CGEMV( 'No transpose', N-K+1, K-1, -CONE, A( K, 1 ), LDA,
+     $               W( K, 1 ), LDW, CONE, W( K, K ), 1 )
+*
+         KSTEP = 1
+*
+*        Determine rows and columns to be interchanged and whether
+*        a 1-by-1 or 2-by-2 pivot block will be used
+*
+         ABSAKK = CABS1( W( K, K ) )
+*
+*        IMAX is the row-index of the largest off-diagonal element in
+*        column K, and COLMAX is its absolute value
+*
+         IF( K.LT.N ) THEN
+            IMAX = K + ICAMAX( N-K, W( K+1, K ), 1 )
+            COLMAX = CABS1( W( IMAX, K ) )
+         ELSE
+            COLMAX = ZERO
+         END IF
+*
+         IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN
+*
+*           Column K is zero: set INFO and continue
+*
+            IF( INFO.EQ.0 )
+     $         INFO = K
+            KP = K
+         ELSE
+            IF( ABSAKK.GE.ALPHA*COLMAX ) THEN
+*
+*              no interchange, use 1-by-1 pivot block
+*
+               KP = K
+            ELSE
+*
+*              Copy column IMAX to column K+1 of W and update it
+*
+               CALL CCOPY( IMAX-K, A( IMAX, K ), LDA, W( K, K+1 ), 1 )
+               CALL CCOPY( N-IMAX+1, A( IMAX, IMAX ), 1, W( IMAX, K+1 ),
+     $                     1 )
+               CALL CGEMV( 'No transpose', N-K+1, K-1, -CONE, A( K, 1 ),
+     $                     LDA, W( IMAX, 1 ), LDW, CONE, W( K, K+1 ),
+     $                     1 )
+*
+*              JMAX is the column-index of the largest off-diagonal
+*              element in row IMAX, and ROWMAX is its absolute value
+*
+               JMAX = K - 1 + ICAMAX( IMAX-K, W( K, K+1 ), 1 )
+               ROWMAX = CABS1( W( JMAX, K+1 ) )
+               IF( IMAX.LT.N ) THEN
+                  JMAX = IMAX + ICAMAX( N-IMAX, W( IMAX+1, K+1 ), 1 )
+                  ROWMAX = MAX( ROWMAX, CABS1( W( JMAX, K+1 ) ) )
+               END IF
+*
+               IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN
+*
+*                 no interchange, use 1-by-1 pivot block
+*
+                  KP = K
+               ELSE IF( CABS1( W( IMAX, K+1 ) ).GE.ALPHA*ROWMAX ) THEN
+*
+*                 interchange rows and columns K and IMAX, use 1-by-1
+*                 pivot block
+*
+                  KP = IMAX
+*
+*                 copy column K+1 of W to column K
+*
+                  CALL CCOPY( N-K+1, W( K, K+1 ), 1, W( K, K ), 1 )
+               ELSE
+*
+*                 interchange rows and columns K+1 and IMAX, use 2-by-2
+*                 pivot block
+*
+                  KP = IMAX
+                  KSTEP = 2
+               END IF
+            END IF
+*
+            KK = K + KSTEP - 1
+*
+*           Updated column KP is already stored in column KK of W
+*
+            IF( KP.NE.KK ) THEN
+*
+*              Copy non-updated column KK to column KP
+*
+               A( KP, K ) = A( KK, K )
+               CALL CCOPY( KP-K-1, A( K+1, KK ), 1, A( KP, K+1 ), LDA )
+               CALL CCOPY( N-KP+1, A( KP, KK ), 1, A( KP, KP ), 1 )
+*
+*              Interchange rows KK and KP in first KK columns of A and W
+*
+               CALL CSWAP( KK, A( KK, 1 ), LDA, A( KP, 1 ), LDA )
+               CALL CSWAP( KK, W( KK, 1 ), LDW, W( KP, 1 ), LDW )
+            END IF
+*
+            IF( KSTEP.EQ.1 ) THEN
+*
+*              1-by-1 pivot block D(k): column k of W now holds
+*
+*              W(k) = L(k)*D(k)
+*
+*              where L(k) is the k-th column of L
+*
+*              Store L(k) in column k of A
+*
+               CALL CCOPY( N-K+1, W( K, K ), 1, A( K, K ), 1 )
+               IF( K.LT.N ) THEN
+                  R1 = CONE / A( K, K )
+                  CALL CSCAL( N-K, R1, A( K+1, K ), 1 )
+               END IF
+            ELSE
+*
+*              2-by-2 pivot block D(k): columns k and k+1 of W now hold
+*
+*              ( W(k) W(k+1) ) = ( L(k) L(k+1) )*D(k)
+*
+*              where L(k) and L(k+1) are the k-th and (k+1)-th columns
+*              of L
+*
+               IF( K.LT.N-1 ) THEN
+*
+*                 Store L(k) and L(k+1) in columns k and k+1 of A
+*
+                  D21 = W( K+1, K )
+                  D11 = W( K+1, K+1 ) / D21
+                  D22 = W( K, K ) / D21
+                  T = CONE / ( D11*D22-CONE )
+                  D21 = T / D21
+                  DO 80 J = K + 2, N
+                     A( J, K ) = D21*( D11*W( J, K )-W( J, K+1 ) )
+                     A( J, K+1 ) = D21*( D22*W( J, K+1 )-W( J, K ) )
+   80             CONTINUE
+               END IF
+*
+*              Copy D(k) to A
+*
+               A( K, K ) = W( K, K )
+               A( K+1, K ) = W( K+1, K )
+               A( K+1, K+1 ) = W( K+1, K+1 )
+            END IF
+         END IF
+*
+*        Store details of the interchanges in IPIV
+*
+         IF( KSTEP.EQ.1 ) THEN
+            IPIV( K ) = KP
+         ELSE
+            IPIV( K ) = -KP
+            IPIV( K+1 ) = -KP
+         END IF
+*
+*        Increase K and return to the start of the main loop
+*
+         K = K + KSTEP
+         GO TO 70
+*
+   90    CONTINUE
+*
+*        Update the lower triangle of A22 (= A(k:n,k:n)) as
+*
+*        A22 := A22 - L21*D*L21' = A22 - L21*W'
+*
+*        computing blocks of NB columns at a time
+*
+         DO 110 J = K, N, NB
+            JB = MIN( NB, N-J+1 )
+*
+*           Update the lower triangle of the diagonal block
+*
+            DO 100 JJ = J, J + JB - 1
+               CALL CGEMV( 'No transpose', J+JB-JJ, K-1, -CONE,
+     $                     A( JJ, 1 ), LDA, W( JJ, 1 ), LDW, CONE,
+     $                     A( JJ, JJ ), 1 )
+  100       CONTINUE
+*
+*           Update the rectangular subdiagonal block
+*
+            IF( J+JB.LE.N )
+     $         CALL CGEMM( 'No transpose', 'Transpose', N-J-JB+1, JB,
+     $                     K-1, -CONE, A( J+JB, 1 ), LDA, W( J, 1 ),
+     $                     LDW, CONE, A( J+JB, J ), LDA )
+  110    CONTINUE
+*
+*        Put L21 in standard form by partially undoing the interchanges
+*        in columns 1:k-1
+*
+         J = K - 1
+  120    CONTINUE
+         JJ = J
+         JP = IPIV( J )
+         IF( JP.LT.0 ) THEN
+            JP = -JP
+            J = J - 1
+         END IF
+         J = J - 1
+         IF( JP.NE.JJ .AND. J.GE.1 )
+     $      CALL CSWAP( J, A( JP, 1 ), LDA, A( JJ, 1 ), LDA )
+         IF( J.GE.1 )
+     $      GO TO 120
+*
+*        Set KB to the number of columns factorized
+*
+         KB = K - 1
+*
+      END IF
+      RETURN
+*
+*     End of CLASYF
+*
+      END
diff --git a/costa/native/external/lapack/clatbs.f b/costa/native/external/lapack/clatbs.f
new file mode 100644
index 000000000..dbfac7532
--- /dev/null
+++ b/costa/native/external/lapack/clatbs.f
@@ -0,0 +1,909 @@
+      SUBROUTINE CLATBS( UPLO, TRANS, DIAG, NORMIN, N, KD, AB, LDAB, X,
+     $                   SCALE, CNORM, INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIAG, NORMIN, TRANS, UPLO
+      INTEGER            INFO, KD, LDAB, N
+      REAL               SCALE
+*     ..
+*     .. Array Arguments ..
+      REAL               CNORM( * )
+      COMPLEX            AB( LDAB, * ), X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CLATBS solves one of the triangular systems
+*
+*     A * x = s*b,  A**T * x = s*b,  or  A**H * x = s*b,
+*
+*  with scaling to prevent overflow, where A is an upper or lower
+*  triangular band matrix.  Here A' denotes the transpose of A, x and b
+*  are n-element vectors, and s is a scaling factor, usually less than
+*  or equal to 1, chosen so that the components of x will be less than
+*  the overflow threshold.  If the unscaled problem will not cause
+*  overflow, the Level 2 BLAS routine CTBSV is called.  If the matrix A
+*  is singular (A(j,j) = 0 for some j), then s is set to 0 and a
+*  non-trivial solution to A*x = 0 is returned.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the matrix A is upper or lower triangular.
+*          = 'U':  Upper triangular
+*          = 'L':  Lower triangular
+*
+*  TRANS   (input) CHARACTER*1
+*          Specifies the operation applied to A.
+*          = 'N':  Solve A * x = s*b     (No transpose)
+*          = 'T':  Solve A**T * x = s*b  (Transpose)
+*          = 'C':  Solve A**H * x = s*b  (Conjugate transpose)
+*
+*  DIAG    (input) CHARACTER*1
+*          Specifies whether or not the matrix A is unit triangular.
+*          = 'N':  Non-unit triangular
+*          = 'U':  Unit triangular
+*
+*  NORMIN  (input) CHARACTER*1
+*          Specifies whether CNORM has been set or not.
+*          = 'Y':  CNORM contains the column norms on entry
+*          = 'N':  CNORM is not set on entry.  On exit, the norms will
+*                  be computed and stored in CNORM.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  KD      (input) INTEGER
+*          The number of subdiagonals or superdiagonals in the
+*          triangular matrix A.  KD >= 0.
+*
+*  AB      (input) COMPLEX array, dimension (LDAB,N)
+*          The upper or lower triangular band matrix A, stored in the
+*          first KD+1 rows of the array. The j-th column of A is stored
+*          in the j-th column of the array AB as follows:
+*          if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j;
+*          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+kd).
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KD+1.
+*
+*  X       (input/output) COMPLEX array, dimension (N)
+*          On entry, the right hand side b of the triangular system.
+*          On exit, X is overwritten by the solution vector x.
+*
+*  SCALE   (output) REAL
+*          The scaling factor s for the triangular system
+*             A * x = s*b,  A**T * x = s*b,  or  A**H * x = s*b.
+*          If SCALE = 0, the matrix A is singular or badly scaled, and
+*          the vector x is an exact or approximate solution to A*x = 0.
+*
+*  CNORM   (input or output) REAL array, dimension (N)
+*
+*          If NORMIN = 'Y', CNORM is an input argument and CNORM(j)
+*          contains the norm of the off-diagonal part of the j-th column
+*          of A.  If TRANS = 'N', CNORM(j) must be greater than or equal
+*          to the infinity-norm, and if TRANS = 'T' or 'C', CNORM(j)
+*          must be greater than or equal to the 1-norm.
+*
+*          If NORMIN = 'N', CNORM is an output argument and CNORM(j)
+*          returns the 1-norm of the offdiagonal part of the j-th column
+*          of A.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -k, the k-th argument had an illegal value
+*
+*  Further Details
+*  ======= =======
+*
+*  A rough bound on x is computed; if that is less than overflow, CTBSV
+*  is called, otherwise, specific code is used which checks for possible
+*  overflow or divide-by-zero at every operation.
+*
+*  A columnwise scheme is used for solving A*x = b.  The basic algorithm
+*  if A is lower triangular is
+*
+*       x[1:n] := b[1:n]
+*       for j = 1, ..., n
+*            x(j) := x(j) / A(j,j)
+*            x[j+1:n] := x[j+1:n] - x(j) * A[j+1:n,j]
+*       end
+*
+*  Define bounds on the components of x after j iterations of the loop:
+*     M(j) = bound on x[1:j]
+*     G(j) = bound on x[j+1:n]
+*  Initially, let M(0) = 0 and G(0) = max{x(i), i=1,...,n}.
+*
+*  Then for iteration j+1 we have
+*     M(j+1) <= G(j) / | A(j+1,j+1) |
+*     G(j+1) <= G(j) + M(j+1) * | A[j+2:n,j+1] |
+*            <= G(j) ( 1 + CNORM(j+1) / | A(j+1,j+1) | )
+*
+*  where CNORM(j+1) is greater than or equal to the infinity-norm of
+*  column j+1 of A, not counting the diagonal.  Hence
+*
+*     G(j) <= G(0) product ( 1 + CNORM(i) / | A(i,i) | )
+*                  1<=i<=j
+*  and
+*
+*     |x(j)| <= ( G(0) / |A(j,j)| ) product ( 1 + CNORM(i) / |A(i,i)| )
+*                                   1<=i< j
+*
+*  Since |x(j)| <= M(j), we use the Level 2 BLAS routine CTBSV if the
+*  reciprocal of the largest M(j), j=1,..,n, is larger than
+*  max(underflow, 1/overflow).
+*
+*  The bound on x(j) is also used to determine when a step in the
+*  columnwise method can be performed without fear of overflow.  If
+*  the computed bound is greater than a large constant, x is scaled to
+*  prevent overflow, but if the bound overflows, x is set to 0, x(j) to
+*  1, and scale to 0, and a non-trivial solution to A*x = 0 is found.
+*
+*  Similarly, a row-wise scheme is used to solve A**T *x = b  or
+*  A**H *x = b.  The basic algorithm for A upper triangular is
+*
+*       for j = 1, ..., n
+*            x(j) := ( b(j) - A[1:j-1,j]' * x[1:j-1] ) / A(j,j)
+*       end
+*
+*  We simultaneously compute two bounds
+*       G(j) = bound on ( b(i) - A[1:i-1,i]' * x[1:i-1] ), 1<=i<=j
+*       M(j) = bound on x(i), 1<=i<=j
+*
+*  The initial values are G(0) = 0, M(0) = max{b(i), i=1,..,n}, and we
+*  add the constraint G(j) >= G(j-1) and M(j) >= M(j-1) for j >= 1.
+*  Then the bound on x(j) is
+*
+*       M(j) <= M(j-1) * ( 1 + CNORM(j) ) / | A(j,j) |
+*
+*            <= M(0) * product ( ( 1 + CNORM(i) ) / |A(i,i)| )
+*                      1<=i<=j
+*
+*  and we can safely call CTBSV if 1/M(n) and 1/G(n) are both greater
+*  than max(underflow, 1/overflow).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, HALF, ONE, TWO
+      PARAMETER          ( ZERO = 0.0E+0, HALF = 0.5E+0, ONE = 1.0E+0,
+     $                   TWO = 2.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOTRAN, NOUNIT, UPPER
+      INTEGER            I, IMAX, J, JFIRST, JINC, JLAST, JLEN, MAIND
+      REAL               BIGNUM, GROW, REC, SMLNUM, TJJ, TMAX, TSCAL,
+     $                   XBND, XJ, XMAX
+      COMPLEX            CSUMJ, TJJS, USCAL, ZDUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ICAMAX, ISAMAX
+      REAL               SCASUM, SLAMCH
+      COMPLEX            CDOTC, CDOTU, CLADIV
+      EXTERNAL           LSAME, ICAMAX, ISAMAX, SCASUM, SLAMCH, CDOTC,
+     $                   CDOTU, CLADIV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CAXPY, CSSCAL, CTBSV, SLABAD, SSCAL, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, AIMAG, CMPLX, CONJG, MAX, MIN, REAL
+*     ..
+*     .. Statement Functions ..
+      REAL               CABS1, CABS2
+*     ..
+*     .. Statement Function definitions ..
+      CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) )
+      CABS2( ZDUM ) = ABS( REAL( ZDUM ) / 2. ) +
+     $                ABS( AIMAG( ZDUM ) / 2. )
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      NOTRAN = LSAME( TRANS, 'N' )
+      NOUNIT = LSAME( DIAG, 'N' )
+*
+*     Test the input parameters.
+*
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT.
+     $         LSAME( TRANS, 'C' ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN
+         INFO = -3
+      ELSE IF( .NOT.LSAME( NORMIN, 'Y' ) .AND. .NOT.
+     $         LSAME( NORMIN, 'N' ) ) THEN
+         INFO = -4
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( KD.LT.0 ) THEN
+         INFO = -6
+      ELSE IF( LDAB.LT.KD+1 ) THEN
+         INFO = -8
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CLATBS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Determine machine dependent parameters to control overflow.
+*
+      SMLNUM = SLAMCH( 'Safe minimum' )
+      BIGNUM = ONE / SMLNUM
+      CALL SLABAD( SMLNUM, BIGNUM )
+      SMLNUM = SMLNUM / SLAMCH( 'Precision' )
+      BIGNUM = ONE / SMLNUM
+      SCALE = ONE
+*
+      IF( LSAME( NORMIN, 'N' ) ) THEN
+*
+*        Compute the 1-norm of each column, not including the diagonal.
+*
+         IF( UPPER ) THEN
+*
+*           A is upper triangular.
+*
+            DO 10 J = 1, N
+               JLEN = MIN( KD, J-1 )
+               CNORM( J ) = SCASUM( JLEN, AB( KD+1-JLEN, J ), 1 )
+   10       CONTINUE
+         ELSE
+*
+*           A is lower triangular.
+*
+            DO 20 J = 1, N
+               JLEN = MIN( KD, N-J )
+               IF( JLEN.GT.0 ) THEN
+                  CNORM( J ) = SCASUM( JLEN, AB( 2, J ), 1 )
+               ELSE
+                  CNORM( J ) = ZERO
+               END IF
+   20       CONTINUE
+         END IF
+      END IF
+*
+*     Scale the column norms by TSCAL if the maximum element in CNORM is
+*     greater than BIGNUM/2.
+*
+      IMAX = ISAMAX( N, CNORM, 1 )
+      TMAX = CNORM( IMAX )
+      IF( TMAX.LE.BIGNUM*HALF ) THEN
+         TSCAL = ONE
+      ELSE
+         TSCAL = HALF / ( SMLNUM*TMAX )
+         CALL SSCAL( N, TSCAL, CNORM, 1 )
+      END IF
+*
+*     Compute a bound on the computed solution vector to see if the
+*     Level 2 BLAS routine CTBSV can be used.
+*
+      XMAX = ZERO
+      DO 30 J = 1, N
+         XMAX = MAX( XMAX, CABS2( X( J ) ) )
+   30 CONTINUE
+      XBND = XMAX
+      IF( NOTRAN ) THEN
+*
+*        Compute the growth in A * x = b.
+*
+         IF( UPPER ) THEN
+            JFIRST = N
+            JLAST = 1
+            JINC = -1
+            MAIND = KD + 1
+         ELSE
+            JFIRST = 1
+            JLAST = N
+            JINC = 1
+            MAIND = 1
+         END IF
+*
+         IF( TSCAL.NE.ONE ) THEN
+            GROW = ZERO
+            GO TO 60
+         END IF
+*
+         IF( NOUNIT ) THEN
+*
+*           A is non-unit triangular.
+*
+*           Compute GROW = 1/G(j) and XBND = 1/M(j).
+*           Initially, G(0) = max{x(i), i=1,...,n}.
+*
+            GROW = HALF / MAX( XBND, SMLNUM )
+            XBND = GROW
+            DO 40 J = JFIRST, JLAST, JINC
+*
+*              Exit the loop if the growth factor is too small.
+*
+               IF( GROW.LE.SMLNUM )
+     $            GO TO 60
+*
+               TJJS = AB( MAIND, J )
+               TJJ = CABS1( TJJS )
+*
+               IF( TJJ.GE.SMLNUM ) THEN
+*
+*                 M(j) = G(j-1) / abs(A(j,j))
+*
+                  XBND = MIN( XBND, MIN( ONE, TJJ )*GROW )
+               ELSE
+*
+*                 M(j) could overflow, set XBND to 0.
+*
+                  XBND = ZERO
+               END IF
+*
+               IF( TJJ+CNORM( J ).GE.SMLNUM ) THEN
+*
+*                 G(j) = G(j-1)*( 1 + CNORM(j) / abs(A(j,j)) )
+*
+                  GROW = GROW*( TJJ / ( TJJ+CNORM( J ) ) )
+               ELSE
+*
+*                 G(j) could overflow, set GROW to 0.
+*
+                  GROW = ZERO
+               END IF
+   40       CONTINUE
+            GROW = XBND
+         ELSE
+*
+*           A is unit triangular.
+*
+*           Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}.
+*
+            GROW = MIN( ONE, HALF / MAX( XBND, SMLNUM ) )
+            DO 50 J = JFIRST, JLAST, JINC
+*
+*              Exit the loop if the growth factor is too small.
+*
+               IF( GROW.LE.SMLNUM )
+     $            GO TO 60
+*
+*              G(j) = G(j-1)*( 1 + CNORM(j) )
+*
+               GROW = GROW*( ONE / ( ONE+CNORM( J ) ) )
+   50       CONTINUE
+         END IF
+   60    CONTINUE
+*
+      ELSE
+*
+*        Compute the growth in A**T * x = b  or  A**H * x = b.
+*
+         IF( UPPER ) THEN
+            JFIRST = 1
+            JLAST = N
+            JINC = 1
+            MAIND = KD + 1
+         ELSE
+            JFIRST = N
+            JLAST = 1
+            JINC = -1
+            MAIND = 1
+         END IF
+*
+         IF( TSCAL.NE.ONE ) THEN
+            GROW = ZERO
+            GO TO 90
+         END IF
+*
+         IF( NOUNIT ) THEN
+*
+*           A is non-unit triangular.
+*
+*           Compute GROW = 1/G(j) and XBND = 1/M(j).
+*           Initially, M(0) = max{x(i), i=1,...,n}.
+*
+            GROW = HALF / MAX( XBND, SMLNUM )
+            XBND = GROW
+            DO 70 J = JFIRST, JLAST, JINC
+*
+*              Exit the loop if the growth factor is too small.
+*
+               IF( GROW.LE.SMLNUM )
+     $            GO TO 90
+*
+*              G(j) = max( G(j-1), M(j-1)*( 1 + CNORM(j) ) )
+*
+               XJ = ONE + CNORM( J )
+               GROW = MIN( GROW, XBND / XJ )
+*
+               TJJS = AB( MAIND, J )
+               TJJ = CABS1( TJJS )
+*
+               IF( TJJ.GE.SMLNUM ) THEN
+*
+*                 M(j) = M(j-1)*( 1 + CNORM(j) ) / abs(A(j,j))
+*
+                  IF( XJ.GT.TJJ )
+     $               XBND = XBND*( TJJ / XJ )
+               ELSE
+*
+*                 M(j) could overflow, set XBND to 0.
+*
+                  XBND = ZERO
+               END IF
+   70       CONTINUE
+            GROW = MIN( GROW, XBND )
+         ELSE
+*
+*           A is unit triangular.
+*
+*           Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}.
+*
+            GROW = MIN( ONE, HALF / MAX( XBND, SMLNUM ) )
+            DO 80 J = JFIRST, JLAST, JINC
+*
+*              Exit the loop if the growth factor is too small.
+*
+               IF( GROW.LE.SMLNUM )
+     $            GO TO 90
+*
+*              G(j) = ( 1 + CNORM(j) )*G(j-1)
+*
+               XJ = ONE + CNORM( J )
+               GROW = GROW / XJ
+   80       CONTINUE
+         END IF
+   90    CONTINUE
+      END IF
+*
+      IF( ( GROW*TSCAL ).GT.SMLNUM ) THEN
+*
+*        Use the Level 2 BLAS solve if the reciprocal of the bound on
+*        elements of X is not too small.
+*
+         CALL CTBSV( UPLO, TRANS, DIAG, N, KD, AB, LDAB, X, 1 )
+      ELSE
+*
+*        Use a Level 1 BLAS solve, scaling intermediate results.
+*
+         IF( XMAX.GT.BIGNUM*HALF ) THEN
+*
+*           Scale X so that its components are less than or equal to
+*           BIGNUM in absolute value.
+*
+            SCALE = ( BIGNUM*HALF ) / XMAX
+            CALL CSSCAL( N, SCALE, X, 1 )
+            XMAX = BIGNUM
+         ELSE
+            XMAX = XMAX*TWO
+         END IF
+*
+         IF( NOTRAN ) THEN
+*
+*           Solve A * x = b
+*
+            DO 110 J = JFIRST, JLAST, JINC
+*
+*              Compute x(j) = b(j) / A(j,j), scaling x if necessary.
+*
+               XJ = CABS1( X( J ) )
+               IF( NOUNIT ) THEN
+                  TJJS = AB( MAIND, J )*TSCAL
+               ELSE
+                  TJJS = TSCAL
+                  IF( TSCAL.EQ.ONE )
+     $               GO TO 105
+               END IF
+                  TJJ = CABS1( TJJS )
+                  IF( TJJ.GT.SMLNUM ) THEN
+*
+*                    abs(A(j,j)) > SMLNUM:
+*
+                     IF( TJJ.LT.ONE ) THEN
+                        IF( XJ.GT.TJJ*BIGNUM ) THEN
+*
+*                          Scale x by 1/b(j).
+*
+                           REC = ONE / XJ
+                           CALL CSSCAL( N, REC, X, 1 )
+                           SCALE = SCALE*REC
+                           XMAX = XMAX*REC
+                        END IF
+                     END IF
+                     X( J ) = CLADIV( X( J ), TJJS )
+                     XJ = CABS1( X( J ) )
+                  ELSE IF( TJJ.GT.ZERO ) THEN
+*
+*                    0 < abs(A(j,j)) <= SMLNUM:
+*
+                     IF( XJ.GT.TJJ*BIGNUM ) THEN
+*
+*                       Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM
+*                       to avoid overflow when dividing by A(j,j).
+*
+                        REC = ( TJJ*BIGNUM ) / XJ
+                        IF( CNORM( J ).GT.ONE ) THEN
+*
+*                          Scale by 1/CNORM(j) to avoid overflow when
+*                          multiplying x(j) times column j.
+*
+                           REC = REC / CNORM( J )
+                        END IF
+                        CALL CSSCAL( N, REC, X, 1 )
+                        SCALE = SCALE*REC
+                        XMAX = XMAX*REC
+                     END IF
+                     X( J ) = CLADIV( X( J ), TJJS )
+                     XJ = CABS1( X( J ) )
+                  ELSE
+*
+*                    A(j,j) = 0:  Set x(1:n) = 0, x(j) = 1, and
+*                    scale = 0, and compute a solution to A*x = 0.
+*
+                     DO 100 I = 1, N
+                        X( I ) = ZERO
+  100                CONTINUE
+                     X( J ) = ONE
+                     XJ = ONE
+                     SCALE = ZERO
+                     XMAX = ZERO
+                  END IF
+  105          CONTINUE
+*
+*              Scale x if necessary to avoid overflow when adding a
+*              multiple of column j of A.
+*
+               IF( XJ.GT.ONE ) THEN
+                  REC = ONE / XJ
+                  IF( CNORM( J ).GT.( BIGNUM-XMAX )*REC ) THEN
+*
+*                    Scale x by 1/(2*abs(x(j))).
+*
+                     REC = REC*HALF
+                     CALL CSSCAL( N, REC, X, 1 )
+                     SCALE = SCALE*REC
+                  END IF
+               ELSE IF( XJ*CNORM( J ).GT.( BIGNUM-XMAX ) ) THEN
+*
+*                 Scale x by 1/2.
+*
+                  CALL CSSCAL( N, HALF, X, 1 )
+                  SCALE = SCALE*HALF
+               END IF
+*
+               IF( UPPER ) THEN
+                  IF( J.GT.1 ) THEN
+*
+*                    Compute the update
+*                       x(max(1,j-kd):j-1) := x(max(1,j-kd):j-1) -
+*                                             x(j)* A(max(1,j-kd):j-1,j)
+*
+                     JLEN = MIN( KD, J-1 )
+                     CALL CAXPY( JLEN, -X( J )*TSCAL,
+     $                           AB( KD+1-JLEN, J ), 1, X( J-JLEN ), 1 )
+                     I = ICAMAX( J-1, X, 1 )
+                     XMAX = CABS1( X( I ) )
+                  END IF
+               ELSE IF( J.LT.N ) THEN
+*
+*                 Compute the update
+*                    x(j+1:min(j+kd,n)) := x(j+1:min(j+kd,n)) -
+*                                          x(j) * A(j+1:min(j+kd,n),j)
+*
+                  JLEN = MIN( KD, N-J )
+                  IF( JLEN.GT.0 )
+     $               CALL CAXPY( JLEN, -X( J )*TSCAL, AB( 2, J ), 1,
+     $                           X( J+1 ), 1 )
+                  I = J + ICAMAX( N-J, X( J+1 ), 1 )
+                  XMAX = CABS1( X( I ) )
+               END IF
+  110       CONTINUE
+*
+         ELSE IF( LSAME( TRANS, 'T' ) ) THEN
+*
+*           Solve A**T * x = b
+*
+            DO 150 J = JFIRST, JLAST, JINC
+*
+*              Compute x(j) = b(j) - sum A(k,j)*x(k).
+*                                    k<>j
+*
+               XJ = CABS1( X( J ) )
+               USCAL = TSCAL
+               REC = ONE / MAX( XMAX, ONE )
+               IF( CNORM( J ).GT.( BIGNUM-XJ )*REC ) THEN
+*
+*                 If x(j) could overflow, scale x by 1/(2*XMAX).
+*
+                  REC = REC*HALF
+                  IF( NOUNIT ) THEN
+                     TJJS = AB( MAIND, J )*TSCAL
+                  ELSE
+                     TJJS = TSCAL
+                  END IF
+                     TJJ = CABS1( TJJS )
+                     IF( TJJ.GT.ONE ) THEN
+*
+*                       Divide by A(j,j) when scaling x if A(j,j) > 1.
+*
+                        REC = MIN( ONE, REC*TJJ )
+                        USCAL = CLADIV( USCAL, TJJS )
+                     END IF
+                  IF( REC.LT.ONE ) THEN
+                     CALL CSSCAL( N, REC, X, 1 )
+                     SCALE = SCALE*REC
+                     XMAX = XMAX*REC
+                  END IF
+               END IF
+*
+               CSUMJ = ZERO
+               IF( USCAL.EQ.CMPLX( ONE ) ) THEN
+*
+*                 If the scaling needed for A in the dot product is 1,
+*                 call CDOTU to perform the dot product.
+*
+                  IF( UPPER ) THEN
+                     JLEN = MIN( KD, J-1 )
+                     CSUMJ = CDOTU( JLEN, AB( KD+1-JLEN, J ), 1,
+     $                       X( J-JLEN ), 1 )
+                  ELSE
+                     JLEN = MIN( KD, N-J )
+                     IF( JLEN.GT.1 )
+     $                  CSUMJ = CDOTU( JLEN, AB( 2, J ), 1, X( J+1 ),
+     $                          1 )
+                  END IF
+               ELSE
+*
+*                 Otherwise, use in-line code for the dot product.
+*
+                  IF( UPPER ) THEN
+                     JLEN = MIN( KD, J-1 )
+                     DO 120 I = 1, JLEN
+                        CSUMJ = CSUMJ + ( AB( KD+I-JLEN, J )*USCAL )*
+     $                          X( J-JLEN-1+I )
+  120                CONTINUE
+                  ELSE
+                     JLEN = MIN( KD, N-J )
+                     DO 130 I = 1, JLEN
+                        CSUMJ = CSUMJ + ( AB( I+1, J )*USCAL )*X( J+I )
+  130                CONTINUE
+                  END IF
+               END IF
+*
+               IF( USCAL.EQ.CMPLX( TSCAL ) ) THEN
+*
+*                 Compute x(j) := ( x(j) - CSUMJ ) / A(j,j) if 1/A(j,j)
+*                 was not used to scale the dotproduct.
+*
+                  X( J ) = X( J ) - CSUMJ
+                  XJ = CABS1( X( J ) )
+                  IF( NOUNIT ) THEN
+*
+*                    Compute x(j) = x(j) / A(j,j), scaling if necessary.
+*
+                     TJJS = AB( MAIND, J )*TSCAL
+                  ELSE
+                     TJJS = TSCAL
+                     IF( TSCAL.EQ.ONE )
+     $                  GO TO 145
+                  END IF
+                     TJJ = CABS1( TJJS )
+                     IF( TJJ.GT.SMLNUM ) THEN
+*
+*                       abs(A(j,j)) > SMLNUM:
+*
+                        IF( TJJ.LT.ONE ) THEN
+                           IF( XJ.GT.TJJ*BIGNUM ) THEN
+*
+*                             Scale X by 1/abs(x(j)).
+*
+                              REC = ONE / XJ
+                              CALL CSSCAL( N, REC, X, 1 )
+                              SCALE = SCALE*REC
+                              XMAX = XMAX*REC
+                           END IF
+                        END IF
+                        X( J ) = CLADIV( X( J ), TJJS )
+                     ELSE IF( TJJ.GT.ZERO ) THEN
+*
+*                       0 < abs(A(j,j)) <= SMLNUM:
+*
+                        IF( XJ.GT.TJJ*BIGNUM ) THEN
+*
+*                          Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM.
+*
+                           REC = ( TJJ*BIGNUM ) / XJ
+                           CALL CSSCAL( N, REC, X, 1 )
+                           SCALE = SCALE*REC
+                           XMAX = XMAX*REC
+                        END IF
+                        X( J ) = CLADIV( X( J ), TJJS )
+                     ELSE
+*
+*                       A(j,j) = 0:  Set x(1:n) = 0, x(j) = 1, and
+*                       scale = 0 and compute a solution to A**T *x = 0.
+*
+                        DO 140 I = 1, N
+                           X( I ) = ZERO
+  140                   CONTINUE
+                        X( J ) = ONE
+                        SCALE = ZERO
+                        XMAX = ZERO
+                     END IF
+  145             CONTINUE
+               ELSE
+*
+*                 Compute x(j) := x(j) / A(j,j) - CSUMJ if the dot
+*                 product has already been divided by 1/A(j,j).
+*
+                  X( J ) = CLADIV( X( J ), TJJS ) - CSUMJ
+               END IF
+               XMAX = MAX( XMAX, CABS1( X( J ) ) )
+  150       CONTINUE
+*
+         ELSE
+*
+*           Solve A**H * x = b
+*
+            DO 190 J = JFIRST, JLAST, JINC
+*
+*              Compute x(j) = b(j) - sum A(k,j)*x(k).
+*                                    k<>j
+*
+               XJ = CABS1( X( J ) )
+               USCAL = TSCAL
+               REC = ONE / MAX( XMAX, ONE )
+               IF( CNORM( J ).GT.( BIGNUM-XJ )*REC ) THEN
+*
+*                 If x(j) could overflow, scale x by 1/(2*XMAX).
+*
+                  REC = REC*HALF
+                  IF( NOUNIT ) THEN
+                     TJJS = CONJG( AB( MAIND, J ) )*TSCAL
+                  ELSE
+                     TJJS = TSCAL
+                  END IF
+                     TJJ = CABS1( TJJS )
+                     IF( TJJ.GT.ONE ) THEN
+*
+*                       Divide by A(j,j) when scaling x if A(j,j) > 1.
+*
+                        REC = MIN( ONE, REC*TJJ )
+                        USCAL = CLADIV( USCAL, TJJS )
+                     END IF
+                  IF( REC.LT.ONE ) THEN
+                     CALL CSSCAL( N, REC, X, 1 )
+                     SCALE = SCALE*REC
+                     XMAX = XMAX*REC
+                  END IF
+               END IF
+*
+               CSUMJ = ZERO
+               IF( USCAL.EQ.CMPLX( ONE ) ) THEN
+*
+*                 If the scaling needed for A in the dot product is 1,
+*                 call CDOTC to perform the dot product.
+*
+                  IF( UPPER ) THEN
+                     JLEN = MIN( KD, J-1 )
+                     CSUMJ = CDOTC( JLEN, AB( KD+1-JLEN, J ), 1,
+     $                       X( J-JLEN ), 1 )
+                  ELSE
+                     JLEN = MIN( KD, N-J )
+                     IF( JLEN.GT.1 )
+     $                  CSUMJ = CDOTC( JLEN, AB( 2, J ), 1, X( J+1 ),
+     $                          1 )
+                  END IF
+               ELSE
+*
+*                 Otherwise, use in-line code for the dot product.
+*
+                  IF( UPPER ) THEN
+                     JLEN = MIN( KD, J-1 )
+                     DO 160 I = 1, JLEN
+                        CSUMJ = CSUMJ + ( CONJG( AB( KD+I-JLEN, J ) )*
+     $                          USCAL )*X( J-JLEN-1+I )
+  160                CONTINUE
+                  ELSE
+                     JLEN = MIN( KD, N-J )
+                     DO 170 I = 1, JLEN
+                        CSUMJ = CSUMJ + ( CONJG( AB( I+1, J ) )*USCAL )*
+     $                          X( J+I )
+  170                CONTINUE
+                  END IF
+               END IF
+*
+               IF( USCAL.EQ.CMPLX( TSCAL ) ) THEN
+*
+*                 Compute x(j) := ( x(j) - CSUMJ ) / A(j,j) if 1/A(j,j)
+*                 was not used to scale the dotproduct.
+*
+                  X( J ) = X( J ) - CSUMJ
+                  XJ = CABS1( X( J ) )
+                  IF( NOUNIT ) THEN
+*
+*                    Compute x(j) = x(j) / A(j,j), scaling if necessary.
+*
+                     TJJS = CONJG( AB( MAIND, J ) )*TSCAL
+                  ELSE
+                     TJJS = TSCAL
+                     IF( TSCAL.EQ.ONE )
+     $                  GO TO 185
+                  END IF
+                     TJJ = CABS1( TJJS )
+                     IF( TJJ.GT.SMLNUM ) THEN
+*
+*                       abs(A(j,j)) > SMLNUM:
+*
+                        IF( TJJ.LT.ONE ) THEN
+                           IF( XJ.GT.TJJ*BIGNUM ) THEN
+*
+*                             Scale X by 1/abs(x(j)).
+*
+                              REC = ONE / XJ
+                              CALL CSSCAL( N, REC, X, 1 )
+                              SCALE = SCALE*REC
+                              XMAX = XMAX*REC
+                           END IF
+                        END IF
+                        X( J ) = CLADIV( X( J ), TJJS )
+                     ELSE IF( TJJ.GT.ZERO ) THEN
+*
+*                       0 < abs(A(j,j)) <= SMLNUM:
+*
+                        IF( XJ.GT.TJJ*BIGNUM ) THEN
+*
+*                          Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM.
+*
+                           REC = ( TJJ*BIGNUM ) / XJ
+                           CALL CSSCAL( N, REC, X, 1 )
+                           SCALE = SCALE*REC
+                           XMAX = XMAX*REC
+                        END IF
+                        X( J ) = CLADIV( X( J ), TJJS )
+                     ELSE
+*
+*                       A(j,j) = 0:  Set x(1:n) = 0, x(j) = 1, and
+*                       scale = 0 and compute a solution to A**H *x = 0.
+*
+                        DO 180 I = 1, N
+                           X( I ) = ZERO
+  180                   CONTINUE
+                        X( J ) = ONE
+                        SCALE = ZERO
+                        XMAX = ZERO
+                     END IF
+  185             CONTINUE
+               ELSE
+*
+*                 Compute x(j) := x(j) / A(j,j) - CSUMJ if the dot
+*                 product has already been divided by 1/A(j,j).
+*
+                  X( J ) = CLADIV( X( J ), TJJS ) - CSUMJ
+               END IF
+               XMAX = MAX( XMAX, CABS1( X( J ) ) )
+  190       CONTINUE
+         END IF
+         SCALE = SCALE / TSCAL
+      END IF
+*
+*     Scale the column norms by 1/TSCAL for return.
+*
+      IF( TSCAL.NE.ONE ) THEN
+         CALL SSCAL( N, ONE / TSCAL, CNORM, 1 )
+      END IF
+*
+      RETURN
+*
+*     End of CLATBS
+*
+      END
diff --git a/costa/native/external/lapack/clatdf.f b/costa/native/external/lapack/clatdf.f
new file mode 100644
index 000000000..c39102b9b
--- /dev/null
+++ b/costa/native/external/lapack/clatdf.f
@@ -0,0 +1,242 @@
+      SUBROUTINE CLATDF( IJOB, N, Z, LDZ, RHS, RDSUM, RDSCAL, IPIV,
+     $                   JPIV )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            IJOB, LDZ, N
+      REAL               RDSCAL, RDSUM
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * ), JPIV( * )
+      COMPLEX            RHS( * ), Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CLATDF computes the contribution to the reciprocal Dif-estimate
+*  by solving for x in Z * x = b, where b is chosen such that the norm
+*  of x is as large as possible. It is assumed that LU decomposition
+*  of Z has been computed by CGETC2. On entry RHS = f holds the
+*  contribution from earlier solved sub-systems, and on return RHS = x.
+*
+*  The factorization of Z returned by CGETC2 has the form
+*  Z = P * L * U * Q, where P and Q are permutation matrices. L is lower
+*  triangular with unit diagonal elements and U is upper triangular.
+*
+*  Arguments
+*  =========
+*
+*  IJOB    (input) INTEGER
+*          IJOB = 2: First compute an approximative null-vector e
+*              of Z using CGECON, e is normalized and solve for
+*              Zx = +-e - f with the sign giving the greater value of
+*              2-norm(x).  About 5 times as expensive as Default.
+*          IJOB .ne. 2: Local look ahead strategy where
+*              all entries of the r.h.s. b is choosen as either +1 or
+*              -1.  Default.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix Z.
+*
+*  Z       (input) REAL array, dimension (LDZ, N)
+*          On entry, the LU part of the factorization of the n-by-n
+*          matrix Z computed by CGETC2:  Z = P * L * U * Q
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDA >= max(1, N).
+*
+*  RHS     (input/output) REAL array, dimension (N).
+*          On entry, RHS contains contributions from other subsystems.
+*          On exit, RHS contains the solution of the subsystem with
+*          entries according to the value of IJOB (see above).
+*
+*  RDSUM   (input/output) REAL
+*          On entry, the sum of squares of computed contributions to
+*          the Dif-estimate under computation by CTGSYL, where the
+*          scaling factor RDSCAL (see below) has been factored out.
+*          On exit, the corresponding sum of squares updated with the
+*          contributions from the current sub-system.
+*          If TRANS = 'T' RDSUM is not touched.
+*          NOTE: RDSUM only makes sense when CTGSY2 is called by CTGSYL.
+*
+*  RDSCAL  (input/output) REAL
+*          On entry, scaling factor used to prevent overflow in RDSUM.
+*          On exit, RDSCAL is updated w.r.t. the current contributions
+*          in RDSUM.
+*          If TRANS = 'T', RDSCAL is not touched.
+*          NOTE: RDSCAL only makes sense when CTGSY2 is called by
+*          CTGSYL.
+*
+*  IPIV    (input) INTEGER array, dimension (N).
+*          The pivot indices; for 1 <= i <= N, row i of the
+*          matrix has been interchanged with row IPIV(i).
+*
+*  JPIV    (input) INTEGER array, dimension (N).
+*          The pivot indices; for 1 <= j <= N, column j of the
+*          matrix has been interchanged with column JPIV(j).
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Bo Kagstrom and Peter Poromaa, Department of Computing Science,
+*     Umea University, S-901 87 Umea, Sweden.
+*
+*  This routine is a further developed implementation of algorithm
+*  BSOLVE in [1] using complete pivoting in the LU factorization.
+*
+*   [1]   Bo Kagstrom and Lars Westin,
+*         Generalized Schur Methods with Condition Estimators for
+*         Solving the Generalized Sylvester Equation, IEEE Transactions
+*         on Automatic Control, Vol. 34, No. 7, July 1989, pp 745-751.
+*
+*   [2]   Peter Poromaa,
+*         On Efficient and Robust Estimators for the Separation
+*         between two Regular Matrix Pairs with Applications in
+*         Condition Estimation. Report UMINF-95.05, Department of
+*         Computing Science, Umea University, S-901 87 Umea, Sweden,
+*         1995.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            MAXDIM
+      PARAMETER          ( MAXDIM = 2 )
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+      COMPLEX            CONE
+      PARAMETER          ( CONE = ( 1.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, INFO, J, K
+      REAL               RTEMP, SCALE, SMINU, SPLUS
+      COMPLEX            BM, BP, PMONE, TEMP
+*     ..
+*     .. Local Arrays ..
+      REAL               RWORK( MAXDIM )
+      COMPLEX            WORK( 4*MAXDIM ), XM( MAXDIM ), XP( MAXDIM )
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CAXPY, CCOPY, CGECON, CGESC2, CLASSQ, CLASWP,
+     $                   CSCAL
+*     ..
+*     .. External Functions ..
+      REAL               SCASUM
+      COMPLEX            CDOTC
+      EXTERNAL           SCASUM, CDOTC
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, REAL, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+      IF( IJOB.NE.2 ) THEN
+*
+*        Apply permutations IPIV to RHS
+*
+         CALL CLASWP( 1, RHS, LDZ, 1, N-1, IPIV, 1 )
+*
+*        Solve for L-part choosing RHS either to +1 or -1.
+*
+         PMONE = -CONE
+         DO 10 J = 1, N - 1
+            BP = RHS( J ) + CONE
+            BM = RHS( J ) - CONE
+            SPLUS = ONE
+*
+*           Lockahead for L- part RHS(1:N-1) = +-1
+*           SPLUS and SMIN computed more efficiently than in BSOLVE[1].
+*
+            SPLUS = SPLUS + REAL( CDOTC( N-J, Z( J+1, J ), 1, Z( J+1,
+     $              J ), 1 ) )
+            SMINU = REAL( CDOTC( N-J, Z( J+1, J ), 1, RHS( J+1 ), 1 ) )
+            SPLUS = SPLUS*REAL( RHS( J ) )
+            IF( SPLUS.GT.SMINU ) THEN
+               RHS( J ) = BP
+            ELSE IF( SMINU.GT.SPLUS ) THEN
+               RHS( J ) = BM
+            ELSE
+*
+*              In this case the updating sums are equal and we can
+*              choose RHS(J) +1 or -1. The first time this happens we
+*              choose -1, thereafter +1. This is a simple way to get
+*              good estimates of matrices like Byers well-known example
+*              (see [1]). (Not done in BSOLVE.)
+*
+               RHS( J ) = RHS( J ) + PMONE
+               PMONE = CONE
+            END IF
+*
+*           Compute the remaining r.h.s.
+*
+            TEMP = -RHS( J )
+            CALL CAXPY( N-J, TEMP, Z( J+1, J ), 1, RHS( J+1 ), 1 )
+   10    CONTINUE
+*
+*        Solve for U- part, lockahead for RHS(N) = +-1. This is not done
+*        In BSOLVE and will hopefully give us a better estimate because
+*        any ill-conditioning of the original matrix is transfered to U
+*        and not to L. U(N, N) is an approximation to sigma_min(LU).
+*
+         CALL CCOPY( N-1, RHS, 1, WORK, 1 )
+         WORK( N ) = RHS( N ) + CONE
+         RHS( N ) = RHS( N ) - CONE
+         SPLUS = ZERO
+         SMINU = ZERO
+         DO 30 I = N, 1, -1
+            TEMP = CONE / Z( I, I )
+            WORK( I ) = WORK( I )*TEMP
+            RHS( I ) = RHS( I )*TEMP
+            DO 20 K = I + 1, N
+               WORK( I ) = WORK( I ) - WORK( K )*( Z( I, K )*TEMP )
+               RHS( I ) = RHS( I ) - RHS( K )*( Z( I, K )*TEMP )
+   20       CONTINUE
+            SPLUS = SPLUS + ABS( WORK( I ) )
+            SMINU = SMINU + ABS( RHS( I ) )
+   30    CONTINUE
+         IF( SPLUS.GT.SMINU )
+     $      CALL CCOPY( N, WORK, 1, RHS, 1 )
+*
+*        Apply the permutations JPIV to the computed solution (RHS)
+*
+         CALL CLASWP( 1, RHS, LDZ, 1, N-1, JPIV, -1 )
+*
+*        Compute the sum of squares
+*
+         CALL CLASSQ( N, RHS, 1, RDSCAL, RDSUM )
+         RETURN
+      END IF
+*
+*     ENTRY IJOB = 2
+*
+*     Compute approximate nullvector XM of Z
+*
+      CALL CGECON( 'I', N, Z, LDZ, ONE, RTEMP, WORK, RWORK, INFO )
+      CALL CCOPY( N, WORK( N+1 ), 1, XM, 1 )
+*
+*     Compute RHS
+*
+      CALL CLASWP( 1, XM, LDZ, 1, N-1, IPIV, -1 )
+      TEMP = CONE / SQRT( CDOTC( N, XM, 1, XM, 1 ) )
+      CALL CSCAL( N, TEMP, XM, 1 )
+      CALL CCOPY( N, XM, 1, XP, 1 )
+      CALL CAXPY( N, CONE, RHS, 1, XP, 1 )
+      CALL CAXPY( N, -CONE, XM, 1, RHS, 1 )
+      CALL CGESC2( N, Z, LDZ, RHS, IPIV, JPIV, SCALE )
+      CALL CGESC2( N, Z, LDZ, XP, IPIV, JPIV, SCALE )
+      IF( SCASUM( N, XP, 1 ).GT.SCASUM( N, RHS, 1 ) )
+     $   CALL CCOPY( N, XP, 1, RHS, 1 )
+*
+*     Compute the sum of squares
+*
+      CALL CLASSQ( N, RHS, 1, RDSCAL, RDSUM )
+      RETURN
+*
+*     End of CLATDF
+*
+      END
diff --git a/costa/native/external/lapack/clatps.f b/costa/native/external/lapack/clatps.f
new file mode 100644
index 000000000..aa8274b18
--- /dev/null
+++ b/costa/native/external/lapack/clatps.f
@@ -0,0 +1,895 @@
+      SUBROUTINE CLATPS( UPLO, TRANS, DIAG, NORMIN, N, AP, X, SCALE,
+     $                   CNORM, INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIAG, NORMIN, TRANS, UPLO
+      INTEGER            INFO, N
+      REAL               SCALE
+*     ..
+*     .. Array Arguments ..
+      REAL               CNORM( * )
+      COMPLEX            AP( * ), X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CLATPS solves one of the triangular systems
+*
+*     A * x = s*b,  A**T * x = s*b,  or  A**H * x = s*b,
+*
+*  with scaling to prevent overflow, where A is an upper or lower
+*  triangular matrix stored in packed form.  Here A**T denotes the
+*  transpose of A, A**H denotes the conjugate transpose of A, x and b
+*  are n-element vectors, and s is a scaling factor, usually less than
+*  or equal to 1, chosen so that the components of x will be less than
+*  the overflow threshold.  If the unscaled problem will not cause
+*  overflow, the Level 2 BLAS routine CTPSV is called. If the matrix A
+*  is singular (A(j,j) = 0 for some j), then s is set to 0 and a
+*  non-trivial solution to A*x = 0 is returned.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the matrix A is upper or lower triangular.
+*          = 'U':  Upper triangular
+*          = 'L':  Lower triangular
+*
+*  TRANS   (input) CHARACTER*1
+*          Specifies the operation applied to A.
+*          = 'N':  Solve A * x = s*b     (No transpose)
+*          = 'T':  Solve A**T * x = s*b  (Transpose)
+*          = 'C':  Solve A**H * x = s*b  (Conjugate transpose)
+*
+*  DIAG    (input) CHARACTER*1
+*          Specifies whether or not the matrix A is unit triangular.
+*          = 'N':  Non-unit triangular
+*          = 'U':  Unit triangular
+*
+*  NORMIN  (input) CHARACTER*1
+*          Specifies whether CNORM has been set or not.
+*          = 'Y':  CNORM contains the column norms on entry
+*          = 'N':  CNORM is not set on entry.  On exit, the norms will
+*                  be computed and stored in CNORM.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  AP      (input) COMPLEX array, dimension (N*(N+1)/2)
+*          The upper or lower triangular matrix A, packed columnwise in
+*          a linear array.  The j-th column of A is stored in the array
+*          AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n.
+*
+*  X       (input/output) COMPLEX array, dimension (N)
+*          On entry, the right hand side b of the triangular system.
+*          On exit, X is overwritten by the solution vector x.
+*
+*  SCALE   (output) REAL
+*          The scaling factor s for the triangular system
+*             A * x = s*b,  A**T * x = s*b,  or  A**H * x = s*b.
+*          If SCALE = 0, the matrix A is singular or badly scaled, and
+*          the vector x is an exact or approximate solution to A*x = 0.
+*
+*  CNORM   (input or output) REAL array, dimension (N)
+*
+*          If NORMIN = 'Y', CNORM is an input argument and CNORM(j)
+*          contains the norm of the off-diagonal part of the j-th column
+*          of A.  If TRANS = 'N', CNORM(j) must be greater than or equal
+*          to the infinity-norm, and if TRANS = 'T' or 'C', CNORM(j)
+*          must be greater than or equal to the 1-norm.
+*
+*          If NORMIN = 'N', CNORM is an output argument and CNORM(j)
+*          returns the 1-norm of the offdiagonal part of the j-th column
+*          of A.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -k, the k-th argument had an illegal value
+*
+*  Further Details
+*  ======= =======
+*
+*  A rough bound on x is computed; if that is less than overflow, CTPSV
+*  is called, otherwise, specific code is used which checks for possible
+*  overflow or divide-by-zero at every operation.
+*
+*  A columnwise scheme is used for solving A*x = b.  The basic algorithm
+*  if A is lower triangular is
+*
+*       x[1:n] := b[1:n]
+*       for j = 1, ..., n
+*            x(j) := x(j) / A(j,j)
+*            x[j+1:n] := x[j+1:n] - x(j) * A[j+1:n,j]
+*       end
+*
+*  Define bounds on the components of x after j iterations of the loop:
+*     M(j) = bound on x[1:j]
+*     G(j) = bound on x[j+1:n]
+*  Initially, let M(0) = 0 and G(0) = max{x(i), i=1,...,n}.
+*
+*  Then for iteration j+1 we have
+*     M(j+1) <= G(j) / | A(j+1,j+1) |
+*     G(j+1) <= G(j) + M(j+1) * | A[j+2:n,j+1] |
+*            <= G(j) ( 1 + CNORM(j+1) / | A(j+1,j+1) | )
+*
+*  where CNORM(j+1) is greater than or equal to the infinity-norm of
+*  column j+1 of A, not counting the diagonal.  Hence
+*
+*     G(j) <= G(0) product ( 1 + CNORM(i) / | A(i,i) | )
+*                  1<=i<=j
+*  and
+*
+*     |x(j)| <= ( G(0) / |A(j,j)| ) product ( 1 + CNORM(i) / |A(i,i)| )
+*                                   1<=i< j
+*
+*  Since |x(j)| <= M(j), we use the Level 2 BLAS routine CTPSV if the
+*  reciprocal of the largest M(j), j=1,..,n, is larger than
+*  max(underflow, 1/overflow).
+*
+*  The bound on x(j) is also used to determine when a step in the
+*  columnwise method can be performed without fear of overflow.  If
+*  the computed bound is greater than a large constant, x is scaled to
+*  prevent overflow, but if the bound overflows, x is set to 0, x(j) to
+*  1, and scale to 0, and a non-trivial solution to A*x = 0 is found.
+*
+*  Similarly, a row-wise scheme is used to solve A**T *x = b  or
+*  A**H *x = b.  The basic algorithm for A upper triangular is
+*
+*       for j = 1, ..., n
+*            x(j) := ( b(j) - A[1:j-1,j]' * x[1:j-1] ) / A(j,j)
+*       end
+*
+*  We simultaneously compute two bounds
+*       G(j) = bound on ( b(i) - A[1:i-1,i]' * x[1:i-1] ), 1<=i<=j
+*       M(j) = bound on x(i), 1<=i<=j
+*
+*  The initial values are G(0) = 0, M(0) = max{b(i), i=1,..,n}, and we
+*  add the constraint G(j) >= G(j-1) and M(j) >= M(j-1) for j >= 1.
+*  Then the bound on x(j) is
+*
+*       M(j) <= M(j-1) * ( 1 + CNORM(j) ) / | A(j,j) |
+*
+*            <= M(0) * product ( ( 1 + CNORM(i) ) / |A(i,i)| )
+*                      1<=i<=j
+*
+*  and we can safely call CTPSV if 1/M(n) and 1/G(n) are both greater
+*  than max(underflow, 1/overflow).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, HALF, ONE, TWO
+      PARAMETER          ( ZERO = 0.0E+0, HALF = 0.5E+0, ONE = 1.0E+0,
+     $                   TWO = 2.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOTRAN, NOUNIT, UPPER
+      INTEGER            I, IMAX, IP, J, JFIRST, JINC, JLAST, JLEN
+      REAL               BIGNUM, GROW, REC, SMLNUM, TJJ, TMAX, TSCAL,
+     $                   XBND, XJ, XMAX
+      COMPLEX            CSUMJ, TJJS, USCAL, ZDUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ICAMAX, ISAMAX
+      REAL               SCASUM, SLAMCH
+      COMPLEX            CDOTC, CDOTU, CLADIV
+      EXTERNAL           LSAME, ICAMAX, ISAMAX, SCASUM, SLAMCH, CDOTC,
+     $                   CDOTU, CLADIV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CAXPY, CSSCAL, CTPSV, SLABAD, SSCAL, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, AIMAG, CMPLX, CONJG, MAX, MIN, REAL
+*     ..
+*     .. Statement Functions ..
+      REAL               CABS1, CABS2
+*     ..
+*     .. Statement Function definitions ..
+      CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) )
+      CABS2( ZDUM ) = ABS( REAL( ZDUM ) / 2. ) +
+     $                ABS( AIMAG( ZDUM ) / 2. )
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      NOTRAN = LSAME( TRANS, 'N' )
+      NOUNIT = LSAME( DIAG, 'N' )
+*
+*     Test the input parameters.
+*
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT.
+     $         LSAME( TRANS, 'C' ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN
+         INFO = -3
+      ELSE IF( .NOT.LSAME( NORMIN, 'Y' ) .AND. .NOT.
+     $         LSAME( NORMIN, 'N' ) ) THEN
+         INFO = -4
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -5
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CLATPS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Determine machine dependent parameters to control overflow.
+*
+      SMLNUM = SLAMCH( 'Safe minimum' )
+      BIGNUM = ONE / SMLNUM
+      CALL SLABAD( SMLNUM, BIGNUM )
+      SMLNUM = SMLNUM / SLAMCH( 'Precision' )
+      BIGNUM = ONE / SMLNUM
+      SCALE = ONE
+*
+      IF( LSAME( NORMIN, 'N' ) ) THEN
+*
+*        Compute the 1-norm of each column, not including the diagonal.
+*
+         IF( UPPER ) THEN
+*
+*           A is upper triangular.
+*
+            IP = 1
+            DO 10 J = 1, N
+               CNORM( J ) = SCASUM( J-1, AP( IP ), 1 )
+               IP = IP + J
+   10       CONTINUE
+         ELSE
+*
+*           A is lower triangular.
+*
+            IP = 1
+            DO 20 J = 1, N - 1
+               CNORM( J ) = SCASUM( N-J, AP( IP+1 ), 1 )
+               IP = IP + N - J + 1
+   20       CONTINUE
+            CNORM( N ) = ZERO
+         END IF
+      END IF
+*
+*     Scale the column norms by TSCAL if the maximum element in CNORM is
+*     greater than BIGNUM/2.
+*
+      IMAX = ISAMAX( N, CNORM, 1 )
+      TMAX = CNORM( IMAX )
+      IF( TMAX.LE.BIGNUM*HALF ) THEN
+         TSCAL = ONE
+      ELSE
+         TSCAL = HALF / ( SMLNUM*TMAX )
+         CALL SSCAL( N, TSCAL, CNORM, 1 )
+      END IF
+*
+*     Compute a bound on the computed solution vector to see if the
+*     Level 2 BLAS routine CTPSV can be used.
+*
+      XMAX = ZERO
+      DO 30 J = 1, N
+         XMAX = MAX( XMAX, CABS2( X( J ) ) )
+   30 CONTINUE
+      XBND = XMAX
+      IF( NOTRAN ) THEN
+*
+*        Compute the growth in A * x = b.
+*
+         IF( UPPER ) THEN
+            JFIRST = N
+            JLAST = 1
+            JINC = -1
+         ELSE
+            JFIRST = 1
+            JLAST = N
+            JINC = 1
+         END IF
+*
+         IF( TSCAL.NE.ONE ) THEN
+            GROW = ZERO
+            GO TO 60
+         END IF
+*
+         IF( NOUNIT ) THEN
+*
+*           A is non-unit triangular.
+*
+*           Compute GROW = 1/G(j) and XBND = 1/M(j).
+*           Initially, G(0) = max{x(i), i=1,...,n}.
+*
+            GROW = HALF / MAX( XBND, SMLNUM )
+            XBND = GROW
+            IP = JFIRST*( JFIRST+1 ) / 2
+            JLEN = N
+            DO 40 J = JFIRST, JLAST, JINC
+*
+*              Exit the loop if the growth factor is too small.
+*
+               IF( GROW.LE.SMLNUM )
+     $            GO TO 60
+*
+               TJJS = AP( IP )
+               TJJ = CABS1( TJJS )
+*
+               IF( TJJ.GE.SMLNUM ) THEN
+*
+*                 M(j) = G(j-1) / abs(A(j,j))
+*
+                  XBND = MIN( XBND, MIN( ONE, TJJ )*GROW )
+               ELSE
+*
+*                 M(j) could overflow, set XBND to 0.
+*
+                  XBND = ZERO
+               END IF
+*
+               IF( TJJ+CNORM( J ).GE.SMLNUM ) THEN
+*
+*                 G(j) = G(j-1)*( 1 + CNORM(j) / abs(A(j,j)) )
+*
+                  GROW = GROW*( TJJ / ( TJJ+CNORM( J ) ) )
+               ELSE
+*
+*                 G(j) could overflow, set GROW to 0.
+*
+                  GROW = ZERO
+               END IF
+               IP = IP + JINC*JLEN
+               JLEN = JLEN - 1
+   40       CONTINUE
+            GROW = XBND
+         ELSE
+*
+*           A is unit triangular.
+*
+*           Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}.
+*
+            GROW = MIN( ONE, HALF / MAX( XBND, SMLNUM ) )
+            DO 50 J = JFIRST, JLAST, JINC
+*
+*              Exit the loop if the growth factor is too small.
+*
+               IF( GROW.LE.SMLNUM )
+     $            GO TO 60
+*
+*              G(j) = G(j-1)*( 1 + CNORM(j) )
+*
+               GROW = GROW*( ONE / ( ONE+CNORM( J ) ) )
+   50       CONTINUE
+         END IF
+   60    CONTINUE
+*
+      ELSE
+*
+*        Compute the growth in A**T * x = b  or  A**H * x = b.
+*
+         IF( UPPER ) THEN
+            JFIRST = 1
+            JLAST = N
+            JINC = 1
+         ELSE
+            JFIRST = N
+            JLAST = 1
+            JINC = -1
+         END IF
+*
+         IF( TSCAL.NE.ONE ) THEN
+            GROW = ZERO
+            GO TO 90
+         END IF
+*
+         IF( NOUNIT ) THEN
+*
+*           A is non-unit triangular.
+*
+*           Compute GROW = 1/G(j) and XBND = 1/M(j).
+*           Initially, M(0) = max{x(i), i=1,...,n}.
+*
+            GROW = HALF / MAX( XBND, SMLNUM )
+            XBND = GROW
+            IP = JFIRST*( JFIRST+1 ) / 2
+            JLEN = 1
+            DO 70 J = JFIRST, JLAST, JINC
+*
+*              Exit the loop if the growth factor is too small.
+*
+               IF( GROW.LE.SMLNUM )
+     $            GO TO 90
+*
+*              G(j) = max( G(j-1), M(j-1)*( 1 + CNORM(j) ) )
+*
+               XJ = ONE + CNORM( J )
+               GROW = MIN( GROW, XBND / XJ )
+*
+               TJJS = AP( IP )
+               TJJ = CABS1( TJJS )
+*
+               IF( TJJ.GE.SMLNUM ) THEN
+*
+*                 M(j) = M(j-1)*( 1 + CNORM(j) ) / abs(A(j,j))
+*
+                  IF( XJ.GT.TJJ )
+     $               XBND = XBND*( TJJ / XJ )
+               ELSE
+*
+*                 M(j) could overflow, set XBND to 0.
+*
+                  XBND = ZERO
+               END IF
+               JLEN = JLEN + 1
+               IP = IP + JINC*JLEN
+   70       CONTINUE
+            GROW = MIN( GROW, XBND )
+         ELSE
+*
+*           A is unit triangular.
+*
+*           Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}.
+*
+            GROW = MIN( ONE, HALF / MAX( XBND, SMLNUM ) )
+            DO 80 J = JFIRST, JLAST, JINC
+*
+*              Exit the loop if the growth factor is too small.
+*
+               IF( GROW.LE.SMLNUM )
+     $            GO TO 90
+*
+*              G(j) = ( 1 + CNORM(j) )*G(j-1)
+*
+               XJ = ONE + CNORM( J )
+               GROW = GROW / XJ
+   80       CONTINUE
+         END IF
+   90    CONTINUE
+      END IF
+*
+      IF( ( GROW*TSCAL ).GT.SMLNUM ) THEN
+*
+*        Use the Level 2 BLAS solve if the reciprocal of the bound on
+*        elements of X is not too small.
+*
+         CALL CTPSV( UPLO, TRANS, DIAG, N, AP, X, 1 )
+      ELSE
+*
+*        Use a Level 1 BLAS solve, scaling intermediate results.
+*
+         IF( XMAX.GT.BIGNUM*HALF ) THEN
+*
+*           Scale X so that its components are less than or equal to
+*           BIGNUM in absolute value.
+*
+            SCALE = ( BIGNUM*HALF ) / XMAX
+            CALL CSSCAL( N, SCALE, X, 1 )
+            XMAX = BIGNUM
+         ELSE
+            XMAX = XMAX*TWO
+         END IF
+*
+         IF( NOTRAN ) THEN
+*
+*           Solve A * x = b
+*
+            IP = JFIRST*( JFIRST+1 ) / 2
+            DO 110 J = JFIRST, JLAST, JINC
+*
+*              Compute x(j) = b(j) / A(j,j), scaling x if necessary.
+*
+               XJ = CABS1( X( J ) )
+               IF( NOUNIT ) THEN
+                  TJJS = AP( IP )*TSCAL
+               ELSE
+                  TJJS = TSCAL
+                  IF( TSCAL.EQ.ONE )
+     $               GO TO 105
+               END IF
+                  TJJ = CABS1( TJJS )
+                  IF( TJJ.GT.SMLNUM ) THEN
+*
+*                    abs(A(j,j)) > SMLNUM:
+*
+                     IF( TJJ.LT.ONE ) THEN
+                        IF( XJ.GT.TJJ*BIGNUM ) THEN
+*
+*                          Scale x by 1/b(j).
+*
+                           REC = ONE / XJ
+                           CALL CSSCAL( N, REC, X, 1 )
+                           SCALE = SCALE*REC
+                           XMAX = XMAX*REC
+                        END IF
+                     END IF
+                     X( J ) = CLADIV( X( J ), TJJS )
+                     XJ = CABS1( X( J ) )
+                  ELSE IF( TJJ.GT.ZERO ) THEN
+*
+*                    0 < abs(A(j,j)) <= SMLNUM:
+*
+                     IF( XJ.GT.TJJ*BIGNUM ) THEN
+*
+*                       Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM
+*                       to avoid overflow when dividing by A(j,j).
+*
+                        REC = ( TJJ*BIGNUM ) / XJ
+                        IF( CNORM( J ).GT.ONE ) THEN
+*
+*                          Scale by 1/CNORM(j) to avoid overflow when
+*                          multiplying x(j) times column j.
+*
+                           REC = REC / CNORM( J )
+                        END IF
+                        CALL CSSCAL( N, REC, X, 1 )
+                        SCALE = SCALE*REC
+                        XMAX = XMAX*REC
+                     END IF
+                     X( J ) = CLADIV( X( J ), TJJS )
+                     XJ = CABS1( X( J ) )
+                  ELSE
+*
+*                    A(j,j) = 0:  Set x(1:n) = 0, x(j) = 1, and
+*                    scale = 0, and compute a solution to A*x = 0.
+*
+                     DO 100 I = 1, N
+                        X( I ) = ZERO
+  100                CONTINUE
+                     X( J ) = ONE
+                     XJ = ONE
+                     SCALE = ZERO
+                     XMAX = ZERO
+                  END IF
+  105          CONTINUE
+*
+*              Scale x if necessary to avoid overflow when adding a
+*              multiple of column j of A.
+*
+               IF( XJ.GT.ONE ) THEN
+                  REC = ONE / XJ
+                  IF( CNORM( J ).GT.( BIGNUM-XMAX )*REC ) THEN
+*
+*                    Scale x by 1/(2*abs(x(j))).
+*
+                     REC = REC*HALF
+                     CALL CSSCAL( N, REC, X, 1 )
+                     SCALE = SCALE*REC
+                  END IF
+               ELSE IF( XJ*CNORM( J ).GT.( BIGNUM-XMAX ) ) THEN
+*
+*                 Scale x by 1/2.
+*
+                  CALL CSSCAL( N, HALF, X, 1 )
+                  SCALE = SCALE*HALF
+               END IF
+*
+               IF( UPPER ) THEN
+                  IF( J.GT.1 ) THEN
+*
+*                    Compute the update
+*                       x(1:j-1) := x(1:j-1) - x(j) * A(1:j-1,j)
+*
+                     CALL CAXPY( J-1, -X( J )*TSCAL, AP( IP-J+1 ), 1, X,
+     $                           1 )
+                     I = ICAMAX( J-1, X, 1 )
+                     XMAX = CABS1( X( I ) )
+                  END IF
+                  IP = IP - J
+               ELSE
+                  IF( J.LT.N ) THEN
+*
+*                    Compute the update
+*                       x(j+1:n) := x(j+1:n) - x(j) * A(j+1:n,j)
+*
+                     CALL CAXPY( N-J, -X( J )*TSCAL, AP( IP+1 ), 1,
+     $                           X( J+1 ), 1 )
+                     I = J + ICAMAX( N-J, X( J+1 ), 1 )
+                     XMAX = CABS1( X( I ) )
+                  END IF
+                  IP = IP + N - J + 1
+               END IF
+  110       CONTINUE
+*
+         ELSE IF( LSAME( TRANS, 'T' ) ) THEN
+*
+*           Solve A**T * x = b
+*
+            IP = JFIRST*( JFIRST+1 ) / 2
+            JLEN = 1
+            DO 150 J = JFIRST, JLAST, JINC
+*
+*              Compute x(j) = b(j) - sum A(k,j)*x(k).
+*                                    k<>j
+*
+               XJ = CABS1( X( J ) )
+               USCAL = TSCAL
+               REC = ONE / MAX( XMAX, ONE )
+               IF( CNORM( J ).GT.( BIGNUM-XJ )*REC ) THEN
+*
+*                 If x(j) could overflow, scale x by 1/(2*XMAX).
+*
+                  REC = REC*HALF
+                  IF( NOUNIT ) THEN
+                     TJJS = AP( IP )*TSCAL
+                  ELSE
+                     TJJS = TSCAL
+                  END IF
+                     TJJ = CABS1( TJJS )
+                     IF( TJJ.GT.ONE ) THEN
+*
+*                       Divide by A(j,j) when scaling x if A(j,j) > 1.
+*
+                        REC = MIN( ONE, REC*TJJ )
+                        USCAL = CLADIV( USCAL, TJJS )
+                     END IF
+                  IF( REC.LT.ONE ) THEN
+                     CALL CSSCAL( N, REC, X, 1 )
+                     SCALE = SCALE*REC
+                     XMAX = XMAX*REC
+                  END IF
+               END IF
+*
+               CSUMJ = ZERO
+               IF( USCAL.EQ.CMPLX( ONE ) ) THEN
+*
+*                 If the scaling needed for A in the dot product is 1,
+*                 call CDOTU to perform the dot product.
+*
+                  IF( UPPER ) THEN
+                     CSUMJ = CDOTU( J-1, AP( IP-J+1 ), 1, X, 1 )
+                  ELSE IF( J.LT.N ) THEN
+                     CSUMJ = CDOTU( N-J, AP( IP+1 ), 1, X( J+1 ), 1 )
+                  END IF
+               ELSE
+*
+*                 Otherwise, use in-line code for the dot product.
+*
+                  IF( UPPER ) THEN
+                     DO 120 I = 1, J - 1
+                        CSUMJ = CSUMJ + ( AP( IP-J+I )*USCAL )*X( I )
+  120                CONTINUE
+                  ELSE IF( J.LT.N ) THEN
+                     DO 130 I = 1, N - J
+                        CSUMJ = CSUMJ + ( AP( IP+I )*USCAL )*X( J+I )
+  130                CONTINUE
+                  END IF
+               END IF
+*
+               IF( USCAL.EQ.CMPLX( TSCAL ) ) THEN
+*
+*                 Compute x(j) := ( x(j) - CSUMJ ) / A(j,j) if 1/A(j,j)
+*                 was not used to scale the dotproduct.
+*
+                  X( J ) = X( J ) - CSUMJ
+                  XJ = CABS1( X( J ) )
+                  IF( NOUNIT ) THEN
+*
+*                    Compute x(j) = x(j) / A(j,j), scaling if necessary.
+*
+                     TJJS = AP( IP )*TSCAL
+                  ELSE
+                     TJJS = TSCAL
+                     IF( TSCAL.EQ.ONE )
+     $                  GO TO 145
+                  END IF
+                     TJJ = CABS1( TJJS )
+                     IF( TJJ.GT.SMLNUM ) THEN
+*
+*                       abs(A(j,j)) > SMLNUM:
+*
+                        IF( TJJ.LT.ONE ) THEN
+                           IF( XJ.GT.TJJ*BIGNUM ) THEN
+*
+*                             Scale X by 1/abs(x(j)).
+*
+                              REC = ONE / XJ
+                              CALL CSSCAL( N, REC, X, 1 )
+                              SCALE = SCALE*REC
+                              XMAX = XMAX*REC
+                           END IF
+                        END IF
+                        X( J ) = CLADIV( X( J ), TJJS )
+                     ELSE IF( TJJ.GT.ZERO ) THEN
+*
+*                       0 < abs(A(j,j)) <= SMLNUM:
+*
+                        IF( XJ.GT.TJJ*BIGNUM ) THEN
+*
+*                          Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM.
+*
+                           REC = ( TJJ*BIGNUM ) / XJ
+                           CALL CSSCAL( N, REC, X, 1 )
+                           SCALE = SCALE*REC
+                           XMAX = XMAX*REC
+                        END IF
+                        X( J ) = CLADIV( X( J ), TJJS )
+                     ELSE
+*
+*                       A(j,j) = 0:  Set x(1:n) = 0, x(j) = 1, and
+*                       scale = 0 and compute a solution to A**T *x = 0.
+*
+                        DO 140 I = 1, N
+                           X( I ) = ZERO
+  140                   CONTINUE
+                        X( J ) = ONE
+                        SCALE = ZERO
+                        XMAX = ZERO
+                     END IF
+  145             CONTINUE
+               ELSE
+*
+*                 Compute x(j) := x(j) / A(j,j) - CSUMJ if the dot
+*                 product has already been divided by 1/A(j,j).
+*
+                  X( J ) = CLADIV( X( J ), TJJS ) - CSUMJ
+               END IF
+               XMAX = MAX( XMAX, CABS1( X( J ) ) )
+               JLEN = JLEN + 1
+               IP = IP + JINC*JLEN
+  150       CONTINUE
+*
+         ELSE
+*
+*           Solve A**H * x = b
+*
+            IP = JFIRST*( JFIRST+1 ) / 2
+            JLEN = 1
+            DO 190 J = JFIRST, JLAST, JINC
+*
+*              Compute x(j) = b(j) - sum A(k,j)*x(k).
+*                                    k<>j
+*
+               XJ = CABS1( X( J ) )
+               USCAL = TSCAL
+               REC = ONE / MAX( XMAX, ONE )
+               IF( CNORM( J ).GT.( BIGNUM-XJ )*REC ) THEN
+*
+*                 If x(j) could overflow, scale x by 1/(2*XMAX).
+*
+                  REC = REC*HALF
+                  IF( NOUNIT ) THEN
+                     TJJS = CONJG( AP( IP ) )*TSCAL
+                  ELSE
+                     TJJS = TSCAL
+                  END IF
+                     TJJ = CABS1( TJJS )
+                     IF( TJJ.GT.ONE ) THEN
+*
+*                       Divide by A(j,j) when scaling x if A(j,j) > 1.
+*
+                        REC = MIN( ONE, REC*TJJ )
+                        USCAL = CLADIV( USCAL, TJJS )
+                     END IF
+                  IF( REC.LT.ONE ) THEN
+                     CALL CSSCAL( N, REC, X, 1 )
+                     SCALE = SCALE*REC
+                     XMAX = XMAX*REC
+                  END IF
+               END IF
+*
+               CSUMJ = ZERO
+               IF( USCAL.EQ.CMPLX( ONE ) ) THEN
+*
+*                 If the scaling needed for A in the dot product is 1,
+*                 call CDOTC to perform the dot product.
+*
+                  IF( UPPER ) THEN
+                     CSUMJ = CDOTC( J-1, AP( IP-J+1 ), 1, X, 1 )
+                  ELSE IF( J.LT.N ) THEN
+                     CSUMJ = CDOTC( N-J, AP( IP+1 ), 1, X( J+1 ), 1 )
+                  END IF
+               ELSE
+*
+*                 Otherwise, use in-line code for the dot product.
+*
+                  IF( UPPER ) THEN
+                     DO 160 I = 1, J - 1
+                        CSUMJ = CSUMJ + ( CONJG( AP( IP-J+I ) )*USCAL )*
+     $                          X( I )
+  160                CONTINUE
+                  ELSE IF( J.LT.N ) THEN
+                     DO 170 I = 1, N - J
+                        CSUMJ = CSUMJ + ( CONJG( AP( IP+I ) )*USCAL )*
+     $                          X( J+I )
+  170                CONTINUE
+                  END IF
+               END IF
+*
+               IF( USCAL.EQ.CMPLX( TSCAL ) ) THEN
+*
+*                 Compute x(j) := ( x(j) - CSUMJ ) / A(j,j) if 1/A(j,j)
+*                 was not used to scale the dotproduct.
+*
+                  X( J ) = X( J ) - CSUMJ
+                  XJ = CABS1( X( J ) )
+                  IF( NOUNIT ) THEN
+*
+*                    Compute x(j) = x(j) / A(j,j), scaling if necessary.
+*
+                     TJJS = CONJG( AP( IP ) )*TSCAL
+                  ELSE
+                     TJJS = TSCAL
+                     IF( TSCAL.EQ.ONE )
+     $                  GO TO 185
+                  END IF
+                     TJJ = CABS1( TJJS )
+                     IF( TJJ.GT.SMLNUM ) THEN
+*
+*                       abs(A(j,j)) > SMLNUM:
+*
+                        IF( TJJ.LT.ONE ) THEN
+                           IF( XJ.GT.TJJ*BIGNUM ) THEN
+*
+*                             Scale X by 1/abs(x(j)).
+*
+                              REC = ONE / XJ
+                              CALL CSSCAL( N, REC, X, 1 )
+                              SCALE = SCALE*REC
+                              XMAX = XMAX*REC
+                           END IF
+                        END IF
+                        X( J ) = CLADIV( X( J ), TJJS )
+                     ELSE IF( TJJ.GT.ZERO ) THEN
+*
+*                       0 < abs(A(j,j)) <= SMLNUM:
+*
+                        IF( XJ.GT.TJJ*BIGNUM ) THEN
+*
+*                          Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM.
+*
+                           REC = ( TJJ*BIGNUM ) / XJ
+                           CALL CSSCAL( N, REC, X, 1 )
+                           SCALE = SCALE*REC
+                           XMAX = XMAX*REC
+                        END IF
+                        X( J ) = CLADIV( X( J ), TJJS )
+                     ELSE
+*
+*                       A(j,j) = 0:  Set x(1:n) = 0, x(j) = 1, and
+*                       scale = 0 and compute a solution to A**H *x = 0.
+*
+                        DO 180 I = 1, N
+                           X( I ) = ZERO
+  180                   CONTINUE
+                        X( J ) = ONE
+                        SCALE = ZERO
+                        XMAX = ZERO
+                     END IF
+  185             CONTINUE
+               ELSE
+*
+*                 Compute x(j) := x(j) / A(j,j) - CSUMJ if the dot
+*                 product has already been divided by 1/A(j,j).
+*
+                  X( J ) = CLADIV( X( J ), TJJS ) - CSUMJ
+               END IF
+               XMAX = MAX( XMAX, CABS1( X( J ) ) )
+               JLEN = JLEN + 1
+               IP = IP + JINC*JLEN
+  190       CONTINUE
+         END IF
+         SCALE = SCALE / TSCAL
+      END IF
+*
+*     Scale the column norms by 1/TSCAL for return.
+*
+      IF( TSCAL.NE.ONE ) THEN
+         CALL SSCAL( N, ONE / TSCAL, CNORM, 1 )
+      END IF
+*
+      RETURN
+*
+*     End of CLATPS
+*
+      END
diff --git a/costa/native/external/lapack/clatrd.f b/costa/native/external/lapack/clatrd.f
new file mode 100644
index 000000000..fe44d842a
--- /dev/null
+++ b/costa/native/external/lapack/clatrd.f
@@ -0,0 +1,280 @@
+      SUBROUTINE CLATRD( UPLO, N, NB, A, LDA, E, TAU, W, LDW )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            LDA, LDW, N, NB
+*     ..
+*     .. Array Arguments ..
+      REAL               E( * )
+      COMPLEX            A( LDA, * ), TAU( * ), W( LDW, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CLATRD reduces NB rows and columns of a complex Hermitian matrix A to
+*  Hermitian tridiagonal form by a unitary similarity
+*  transformation Q' * A * Q, and returns the matrices V and W which are
+*  needed to apply the transformation to the unreduced part of A.
+*
+*  If UPLO = 'U', CLATRD reduces the last NB rows and columns of a
+*  matrix, of which the upper triangle is supplied;
+*  if UPLO = 'L', CLATRD reduces the first NB rows and columns of a
+*  matrix, of which the lower triangle is supplied.
+*
+*  This is an auxiliary routine called by CHETRD.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER
+*          Specifies whether the upper or lower triangular part of the
+*          Hermitian matrix A is stored:
+*          = 'U': Upper triangular
+*          = 'L': Lower triangular
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.
+*
+*  NB      (input) INTEGER
+*          The number of rows and columns to be reduced.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,N)
+*          On entry, the Hermitian matrix A.  If UPLO = 'U', the leading
+*          n-by-n upper triangular part of A contains the upper
+*          triangular part of the matrix A, and the strictly lower
+*          triangular part of A is not referenced.  If UPLO = 'L', the
+*          leading n-by-n lower triangular part of A contains the lower
+*          triangular part of the matrix A, and the strictly upper
+*          triangular part of A is not referenced.
+*          On exit:
+*          if UPLO = 'U', the last NB columns have been reduced to
+*            tridiagonal form, with the diagonal elements overwriting
+*            the diagonal elements of A; the elements above the diagonal
+*            with the array TAU, represent the unitary matrix Q as a
+*            product of elementary reflectors;
+*          if UPLO = 'L', the first NB columns have been reduced to
+*            tridiagonal form, with the diagonal elements overwriting
+*            the diagonal elements of A; the elements below the diagonal
+*            with the array TAU, represent the  unitary matrix Q as a
+*            product of elementary reflectors.
+*          See Further Details.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  E       (output) REAL array, dimension (N-1)
+*          If UPLO = 'U', E(n-nb:n-1) contains the superdiagonal
+*          elements of the last NB columns of the reduced matrix;
+*          if UPLO = 'L', E(1:nb) contains the subdiagonal elements of
+*          the first NB columns of the reduced matrix.
+*
+*  TAU     (output) COMPLEX array, dimension (N-1)
+*          The scalar factors of the elementary reflectors, stored in
+*          TAU(n-nb:n-1) if UPLO = 'U', and in TAU(1:nb) if UPLO = 'L'.
+*          See Further Details.
+*
+*  W       (output) COMPLEX array, dimension (LDW,NB)
+*          The n-by-nb matrix W required to update the unreduced part
+*          of A.
+*
+*  LDW     (input) INTEGER
+*          The leading dimension of the array W. LDW >= max(1,N).
+*
+*  Further Details
+*  ===============
+*
+*  If UPLO = 'U', the matrix Q is represented as a product of elementary
+*  reflectors
+*
+*     Q = H(n) H(n-1) . . . H(n-nb+1).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - tau * v * v'
+*
+*  where tau is a complex scalar, and v is a complex vector with
+*  v(i:n) = 0 and v(i-1) = 1; v(1:i-1) is stored on exit in A(1:i-1,i),
+*  and tau in TAU(i-1).
+*
+*  If UPLO = 'L', the matrix Q is represented as a product of elementary
+*  reflectors
+*
+*     Q = H(1) H(2) . . . H(nb).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - tau * v * v'
+*
+*  where tau is a complex scalar, and v is a complex vector with
+*  v(1:i) = 0 and v(i+1) = 1; v(i+1:n) is stored on exit in A(i+1:n,i),
+*  and tau in TAU(i).
+*
+*  The elements of the vectors v together form the n-by-nb matrix V
+*  which is needed, with W, to apply the transformation to the unreduced
+*  part of the matrix, using a Hermitian rank-2k update of the form:
+*  A := A - V*W' - W*V'.
+*
+*  The contents of A on exit are illustrated by the following examples
+*  with n = 5 and nb = 2:
+*
+*  if UPLO = 'U':                       if UPLO = 'L':
+*
+*    (  a   a   a   v4  v5 )              (  d                  )
+*    (      a   a   v4  v5 )              (  1   d              )
+*    (          a   1   v5 )              (  v1  1   a          )
+*    (              d   1  )              (  v1  v2  a   a      )
+*    (                  d  )              (  v1  v2  a   a   a  )
+*
+*  where d denotes a diagonal element of the reduced matrix, a denotes
+*  an element of the original matrix that is unchanged, and vi denotes
+*  an element of the vector defining H(i).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX            ZERO, ONE, HALF
+      PARAMETER          ( ZERO = ( 0.0E+0, 0.0E+0 ),
+     $                   ONE = ( 1.0E+0, 0.0E+0 ),
+     $                   HALF = ( 0.5E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, IW
+      COMPLEX            ALPHA
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CAXPY, CGEMV, CHEMV, CLACGV, CLARFG, CSCAL
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      COMPLEX            CDOTC
+      EXTERNAL           LSAME, CDOTC
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MIN, REAL
+*     ..
+*     .. Executable Statements ..
+*
+*     Quick return if possible
+*
+      IF( N.LE.0 )
+     $   RETURN
+*
+      IF( LSAME( UPLO, 'U' ) ) THEN
+*
+*        Reduce last NB columns of upper triangle
+*
+         DO 10 I = N, N - NB + 1, -1
+            IW = I - N + NB
+            IF( I.LT.N ) THEN
+*
+*              Update A(1:i,i)
+*
+               A( I, I ) = REAL( A( I, I ) )
+               CALL CLACGV( N-I, W( I, IW+1 ), LDW )
+               CALL CGEMV( 'No transpose', I, N-I, -ONE, A( 1, I+1 ),
+     $                     LDA, W( I, IW+1 ), LDW, ONE, A( 1, I ), 1 )
+               CALL CLACGV( N-I, W( I, IW+1 ), LDW )
+               CALL CLACGV( N-I, A( I, I+1 ), LDA )
+               CALL CGEMV( 'No transpose', I, N-I, -ONE, W( 1, IW+1 ),
+     $                     LDW, A( I, I+1 ), LDA, ONE, A( 1, I ), 1 )
+               CALL CLACGV( N-I, A( I, I+1 ), LDA )
+               A( I, I ) = REAL( A( I, I ) )
+            END IF
+            IF( I.GT.1 ) THEN
+*
+*              Generate elementary reflector H(i) to annihilate
+*              A(1:i-2,i)
+*
+               ALPHA = A( I-1, I )
+               CALL CLARFG( I-1, ALPHA, A( 1, I ), 1, TAU( I-1 ) )
+               E( I-1 ) = ALPHA
+               A( I-1, I ) = ONE
+*
+*              Compute W(1:i-1,i)
+*
+               CALL CHEMV( 'Upper', I-1, ONE, A, LDA, A( 1, I ), 1,
+     $                     ZERO, W( 1, IW ), 1 )
+               IF( I.LT.N ) THEN
+                  CALL CGEMV( 'Conjugate transpose', I-1, N-I, ONE,
+     $                        W( 1, IW+1 ), LDW, A( 1, I ), 1, ZERO,
+     $                        W( I+1, IW ), 1 )
+                  CALL CGEMV( 'No transpose', I-1, N-I, -ONE,
+     $                        A( 1, I+1 ), LDA, W( I+1, IW ), 1, ONE,
+     $                        W( 1, IW ), 1 )
+                  CALL CGEMV( 'Conjugate transpose', I-1, N-I, ONE,
+     $                        A( 1, I+1 ), LDA, A( 1, I ), 1, ZERO,
+     $                        W( I+1, IW ), 1 )
+                  CALL CGEMV( 'No transpose', I-1, N-I, -ONE,
+     $                        W( 1, IW+1 ), LDW, W( I+1, IW ), 1, ONE,
+     $                        W( 1, IW ), 1 )
+               END IF
+               CALL CSCAL( I-1, TAU( I-1 ), W( 1, IW ), 1 )
+               ALPHA = -HALF*TAU( I-1 )*CDOTC( I-1, W( 1, IW ), 1,
+     $                 A( 1, I ), 1 )
+               CALL CAXPY( I-1, ALPHA, A( 1, I ), 1, W( 1, IW ), 1 )
+            END IF
+*
+   10    CONTINUE
+      ELSE
+*
+*        Reduce first NB columns of lower triangle
+*
+         DO 20 I = 1, NB
+*
+*           Update A(i:n,i)
+*
+            A( I, I ) = REAL( A( I, I ) )
+            CALL CLACGV( I-1, W( I, 1 ), LDW )
+            CALL CGEMV( 'No transpose', N-I+1, I-1, -ONE, A( I, 1 ),
+     $                  LDA, W( I, 1 ), LDW, ONE, A( I, I ), 1 )
+            CALL CLACGV( I-1, W( I, 1 ), LDW )
+            CALL CLACGV( I-1, A( I, 1 ), LDA )
+            CALL CGEMV( 'No transpose', N-I+1, I-1, -ONE, W( I, 1 ),
+     $                  LDW, A( I, 1 ), LDA, ONE, A( I, I ), 1 )
+            CALL CLACGV( I-1, A( I, 1 ), LDA )
+            A( I, I ) = REAL( A( I, I ) )
+            IF( I.LT.N ) THEN
+*
+*              Generate elementary reflector H(i) to annihilate
+*              A(i+2:n,i)
+*
+               ALPHA = A( I+1, I )
+               CALL CLARFG( N-I, ALPHA, A( MIN( I+2, N ), I ), 1,
+     $                      TAU( I ) )
+               E( I ) = ALPHA
+               A( I+1, I ) = ONE
+*
+*              Compute W(i+1:n,i)
+*
+               CALL CHEMV( 'Lower', N-I, ONE, A( I+1, I+1 ), LDA,
+     $                     A( I+1, I ), 1, ZERO, W( I+1, I ), 1 )
+               CALL CGEMV( 'Conjugate transpose', N-I, I-1, ONE,
+     $                     W( I+1, 1 ), LDW, A( I+1, I ), 1, ZERO,
+     $                     W( 1, I ), 1 )
+               CALL CGEMV( 'No transpose', N-I, I-1, -ONE, A( I+1, 1 ),
+     $                     LDA, W( 1, I ), 1, ONE, W( I+1, I ), 1 )
+               CALL CGEMV( 'Conjugate transpose', N-I, I-1, ONE,
+     $                     A( I+1, 1 ), LDA, A( I+1, I ), 1, ZERO,
+     $                     W( 1, I ), 1 )
+               CALL CGEMV( 'No transpose', N-I, I-1, -ONE, W( I+1, 1 ),
+     $                     LDW, W( 1, I ), 1, ONE, W( I+1, I ), 1 )
+               CALL CSCAL( N-I, TAU( I ), W( I+1, I ), 1 )
+               ALPHA = -HALF*TAU( I )*CDOTC( N-I, W( I+1, I ), 1,
+     $                 A( I+1, I ), 1 )
+               CALL CAXPY( N-I, ALPHA, A( I+1, I ), 1, W( I+1, I ), 1 )
+            END IF
+*
+   20    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of CLATRD
+*
+      END
diff --git a/costa/native/external/lapack/clatrs.f b/costa/native/external/lapack/clatrs.f
new file mode 100644
index 000000000..cd9243b1c
--- /dev/null
+++ b/costa/native/external/lapack/clatrs.f
@@ -0,0 +1,880 @@
+      SUBROUTINE CLATRS( UPLO, TRANS, DIAG, NORMIN, N, A, LDA, X, SCALE,
+     $                   CNORM, INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIAG, NORMIN, TRANS, UPLO
+      INTEGER            INFO, LDA, N
+      REAL               SCALE
+*     ..
+*     .. Array Arguments ..
+      REAL               CNORM( * )
+      COMPLEX            A( LDA, * ), X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CLATRS solves one of the triangular systems
+*
+*     A * x = s*b,  A**T * x = s*b,  or  A**H * x = s*b,
+*
+*  with scaling to prevent overflow.  Here A is an upper or lower
+*  triangular matrix, A**T denotes the transpose of A, A**H denotes the
+*  conjugate transpose of A, x and b are n-element vectors, and s is a
+*  scaling factor, usually less than or equal to 1, chosen so that the
+*  components of x will be less than the overflow threshold.  If the
+*  unscaled problem will not cause overflow, the Level 2 BLAS routine
+*  CTRSV is called. If the matrix A is singular (A(j,j) = 0 for some j),
+*  then s is set to 0 and a non-trivial solution to A*x = 0 is returned.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the matrix A is upper or lower triangular.
+*          = 'U':  Upper triangular
+*          = 'L':  Lower triangular
+*
+*  TRANS   (input) CHARACTER*1
+*          Specifies the operation applied to A.
+*          = 'N':  Solve A * x = s*b     (No transpose)
+*          = 'T':  Solve A**T * x = s*b  (Transpose)
+*          = 'C':  Solve A**H * x = s*b  (Conjugate transpose)
+*
+*  DIAG    (input) CHARACTER*1
+*          Specifies whether or not the matrix A is unit triangular.
+*          = 'N':  Non-unit triangular
+*          = 'U':  Unit triangular
+*
+*  NORMIN  (input) CHARACTER*1
+*          Specifies whether CNORM has been set or not.
+*          = 'Y':  CNORM contains the column norms on entry
+*          = 'N':  CNORM is not set on entry.  On exit, the norms will
+*                  be computed and stored in CNORM.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input) COMPLEX array, dimension (LDA,N)
+*          The triangular matrix A.  If UPLO = 'U', the leading n by n
+*          upper triangular part of the array A contains the upper
+*          triangular matrix, and the strictly lower triangular part of
+*          A is not referenced.  If UPLO = 'L', the leading n by n lower
+*          triangular part of the array A contains the lower triangular
+*          matrix, and the strictly upper triangular part of A is not
+*          referenced.  If DIAG = 'U', the diagonal elements of A are
+*          also not referenced and are assumed to be 1.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max (1,N).
+*
+*  X       (input/output) COMPLEX array, dimension (N)
+*          On entry, the right hand side b of the triangular system.
+*          On exit, X is overwritten by the solution vector x.
+*
+*  SCALE   (output) REAL
+*          The scaling factor s for the triangular system
+*             A * x = s*b,  A**T * x = s*b,  or  A**H * x = s*b.
+*          If SCALE = 0, the matrix A is singular or badly scaled, and
+*          the vector x is an exact or approximate solution to A*x = 0.
+*
+*  CNORM   (input or output) REAL array, dimension (N)
+*
+*          If NORMIN = 'Y', CNORM is an input argument and CNORM(j)
+*          contains the norm of the off-diagonal part of the j-th column
+*          of A.  If TRANS = 'N', CNORM(j) must be greater than or equal
+*          to the infinity-norm, and if TRANS = 'T' or 'C', CNORM(j)
+*          must be greater than or equal to the 1-norm.
+*
+*          If NORMIN = 'N', CNORM is an output argument and CNORM(j)
+*          returns the 1-norm of the offdiagonal part of the j-th column
+*          of A.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -k, the k-th argument had an illegal value
+*
+*  Further Details
+*  ======= =======
+*
+*  A rough bound on x is computed; if that is less than overflow, CTRSV
+*  is called, otherwise, specific code is used which checks for possible
+*  overflow or divide-by-zero at every operation.
+*
+*  A columnwise scheme is used for solving A*x = b.  The basic algorithm
+*  if A is lower triangular is
+*
+*       x[1:n] := b[1:n]
+*       for j = 1, ..., n
+*            x(j) := x(j) / A(j,j)
+*            x[j+1:n] := x[j+1:n] - x(j) * A[j+1:n,j]
+*       end
+*
+*  Define bounds on the components of x after j iterations of the loop:
+*     M(j) = bound on x[1:j]
+*     G(j) = bound on x[j+1:n]
+*  Initially, let M(0) = 0 and G(0) = max{x(i), i=1,...,n}.
+*
+*  Then for iteration j+1 we have
+*     M(j+1) <= G(j) / | A(j+1,j+1) |
+*     G(j+1) <= G(j) + M(j+1) * | A[j+2:n,j+1] |
+*            <= G(j) ( 1 + CNORM(j+1) / | A(j+1,j+1) | )
+*
+*  where CNORM(j+1) is greater than or equal to the infinity-norm of
+*  column j+1 of A, not counting the diagonal.  Hence
+*
+*     G(j) <= G(0) product ( 1 + CNORM(i) / | A(i,i) | )
+*                  1<=i<=j
+*  and
+*
+*     |x(j)| <= ( G(0) / |A(j,j)| ) product ( 1 + CNORM(i) / |A(i,i)| )
+*                                   1<=i< j
+*
+*  Since |x(j)| <= M(j), we use the Level 2 BLAS routine CTRSV if the
+*  reciprocal of the largest M(j), j=1,..,n, is larger than
+*  max(underflow, 1/overflow).
+*
+*  The bound on x(j) is also used to determine when a step in the
+*  columnwise method can be performed without fear of overflow.  If
+*  the computed bound is greater than a large constant, x is scaled to
+*  prevent overflow, but if the bound overflows, x is set to 0, x(j) to
+*  1, and scale to 0, and a non-trivial solution to A*x = 0 is found.
+*
+*  Similarly, a row-wise scheme is used to solve A**T *x = b  or
+*  A**H *x = b.  The basic algorithm for A upper triangular is
+*
+*       for j = 1, ..., n
+*            x(j) := ( b(j) - A[1:j-1,j]' * x[1:j-1] ) / A(j,j)
+*       end
+*
+*  We simultaneously compute two bounds
+*       G(j) = bound on ( b(i) - A[1:i-1,i]' * x[1:i-1] ), 1<=i<=j
+*       M(j) = bound on x(i), 1<=i<=j
+*
+*  The initial values are G(0) = 0, M(0) = max{b(i), i=1,..,n}, and we
+*  add the constraint G(j) >= G(j-1) and M(j) >= M(j-1) for j >= 1.
+*  Then the bound on x(j) is
+*
+*       M(j) <= M(j-1) * ( 1 + CNORM(j) ) / | A(j,j) |
+*
+*            <= M(0) * product ( ( 1 + CNORM(i) ) / |A(i,i)| )
+*                      1<=i<=j
+*
+*  and we can safely call CTRSV if 1/M(n) and 1/G(n) are both greater
+*  than max(underflow, 1/overflow).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, HALF, ONE, TWO
+      PARAMETER          ( ZERO = 0.0E+0, HALF = 0.5E+0, ONE = 1.0E+0,
+     $                   TWO = 2.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOTRAN, NOUNIT, UPPER
+      INTEGER            I, IMAX, J, JFIRST, JINC, JLAST
+      REAL               BIGNUM, GROW, REC, SMLNUM, TJJ, TMAX, TSCAL,
+     $                   XBND, XJ, XMAX
+      COMPLEX            CSUMJ, TJJS, USCAL, ZDUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ICAMAX, ISAMAX
+      REAL               SCASUM, SLAMCH
+      COMPLEX            CDOTC, CDOTU, CLADIV
+      EXTERNAL           LSAME, ICAMAX, ISAMAX, SCASUM, SLAMCH, CDOTC,
+     $                   CDOTU, CLADIV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CAXPY, CSSCAL, CTRSV, SLABAD, SSCAL, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, AIMAG, CMPLX, CONJG, MAX, MIN, REAL
+*     ..
+*     .. Statement Functions ..
+      REAL               CABS1, CABS2
+*     ..
+*     .. Statement Function definitions ..
+      CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) )
+      CABS2( ZDUM ) = ABS( REAL( ZDUM ) / 2. ) +
+     $                ABS( AIMAG( ZDUM ) / 2. )
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      NOTRAN = LSAME( TRANS, 'N' )
+      NOUNIT = LSAME( DIAG, 'N' )
+*
+*     Test the input parameters.
+*
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT.
+     $         LSAME( TRANS, 'C' ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN
+         INFO = -3
+      ELSE IF( .NOT.LSAME( NORMIN, 'Y' ) .AND. .NOT.
+     $         LSAME( NORMIN, 'N' ) ) THEN
+         INFO = -4
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CLATRS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Determine machine dependent parameters to control overflow.
+*
+      SMLNUM = SLAMCH( 'Safe minimum' )
+      BIGNUM = ONE / SMLNUM
+      CALL SLABAD( SMLNUM, BIGNUM )
+      SMLNUM = SMLNUM / SLAMCH( 'Precision' )
+      BIGNUM = ONE / SMLNUM
+      SCALE = ONE
+*
+      IF( LSAME( NORMIN, 'N' ) ) THEN
+*
+*        Compute the 1-norm of each column, not including the diagonal.
+*
+         IF( UPPER ) THEN
+*
+*           A is upper triangular.
+*
+            DO 10 J = 1, N
+               CNORM( J ) = SCASUM( J-1, A( 1, J ), 1 )
+   10       CONTINUE
+         ELSE
+*
+*           A is lower triangular.
+*
+            DO 20 J = 1, N - 1
+               CNORM( J ) = SCASUM( N-J, A( J+1, J ), 1 )
+   20       CONTINUE
+            CNORM( N ) = ZERO
+         END IF
+      END IF
+*
+*     Scale the column norms by TSCAL if the maximum element in CNORM is
+*     greater than BIGNUM/2.
+*
+      IMAX = ISAMAX( N, CNORM, 1 )
+      TMAX = CNORM( IMAX )
+      IF( TMAX.LE.BIGNUM*HALF ) THEN
+         TSCAL = ONE
+      ELSE
+         TSCAL = HALF / ( SMLNUM*TMAX )
+         CALL SSCAL( N, TSCAL, CNORM, 1 )
+      END IF
+*
+*     Compute a bound on the computed solution vector to see if the
+*     Level 2 BLAS routine CTRSV can be used.
+*
+      XMAX = ZERO
+      DO 30 J = 1, N
+         XMAX = MAX( XMAX, CABS2( X( J ) ) )
+   30 CONTINUE
+      XBND = XMAX
+*
+      IF( NOTRAN ) THEN
+*
+*        Compute the growth in A * x = b.
+*
+         IF( UPPER ) THEN
+            JFIRST = N
+            JLAST = 1
+            JINC = -1
+         ELSE
+            JFIRST = 1
+            JLAST = N
+            JINC = 1
+         END IF
+*
+         IF( TSCAL.NE.ONE ) THEN
+            GROW = ZERO
+            GO TO 60
+         END IF
+*
+         IF( NOUNIT ) THEN
+*
+*           A is non-unit triangular.
+*
+*           Compute GROW = 1/G(j) and XBND = 1/M(j).
+*           Initially, G(0) = max{x(i), i=1,...,n}.
+*
+            GROW = HALF / MAX( XBND, SMLNUM )
+            XBND = GROW
+            DO 40 J = JFIRST, JLAST, JINC
+*
+*              Exit the loop if the growth factor is too small.
+*
+               IF( GROW.LE.SMLNUM )
+     $            GO TO 60
+*
+               TJJS = A( J, J )
+               TJJ = CABS1( TJJS )
+*
+               IF( TJJ.GE.SMLNUM ) THEN
+*
+*                 M(j) = G(j-1) / abs(A(j,j))
+*
+                  XBND = MIN( XBND, MIN( ONE, TJJ )*GROW )
+               ELSE
+*
+*                 M(j) could overflow, set XBND to 0.
+*
+                  XBND = ZERO
+               END IF
+*
+               IF( TJJ+CNORM( J ).GE.SMLNUM ) THEN
+*
+*                 G(j) = G(j-1)*( 1 + CNORM(j) / abs(A(j,j)) )
+*
+                  GROW = GROW*( TJJ / ( TJJ+CNORM( J ) ) )
+               ELSE
+*
+*                 G(j) could overflow, set GROW to 0.
+*
+                  GROW = ZERO
+               END IF
+   40       CONTINUE
+            GROW = XBND
+         ELSE
+*
+*           A is unit triangular.
+*
+*           Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}.
+*
+            GROW = MIN( ONE, HALF / MAX( XBND, SMLNUM ) )
+            DO 50 J = JFIRST, JLAST, JINC
+*
+*              Exit the loop if the growth factor is too small.
+*
+               IF( GROW.LE.SMLNUM )
+     $            GO TO 60
+*
+*              G(j) = G(j-1)*( 1 + CNORM(j) )
+*
+               GROW = GROW*( ONE / ( ONE+CNORM( J ) ) )
+   50       CONTINUE
+         END IF
+   60    CONTINUE
+*
+      ELSE
+*
+*        Compute the growth in A**T * x = b  or  A**H * x = b.
+*
+         IF( UPPER ) THEN
+            JFIRST = 1
+            JLAST = N
+            JINC = 1
+         ELSE
+            JFIRST = N
+            JLAST = 1
+            JINC = -1
+         END IF
+*
+         IF( TSCAL.NE.ONE ) THEN
+            GROW = ZERO
+            GO TO 90
+         END IF
+*
+         IF( NOUNIT ) THEN
+*
+*           A is non-unit triangular.
+*
+*           Compute GROW = 1/G(j) and XBND = 1/M(j).
+*           Initially, M(0) = max{x(i), i=1,...,n}.
+*
+            GROW = HALF / MAX( XBND, SMLNUM )
+            XBND = GROW
+            DO 70 J = JFIRST, JLAST, JINC
+*
+*              Exit the loop if the growth factor is too small.
+*
+               IF( GROW.LE.SMLNUM )
+     $            GO TO 90
+*
+*              G(j) = max( G(j-1), M(j-1)*( 1 + CNORM(j) ) )
+*
+               XJ = ONE + CNORM( J )
+               GROW = MIN( GROW, XBND / XJ )
+*
+               TJJS = A( J, J )
+               TJJ = CABS1( TJJS )
+*
+               IF( TJJ.GE.SMLNUM ) THEN
+*
+*                 M(j) = M(j-1)*( 1 + CNORM(j) ) / abs(A(j,j))
+*
+                  IF( XJ.GT.TJJ )
+     $               XBND = XBND*( TJJ / XJ )
+               ELSE
+*
+*                 M(j) could overflow, set XBND to 0.
+*
+                  XBND = ZERO
+               END IF
+   70       CONTINUE
+            GROW = MIN( GROW, XBND )
+         ELSE
+*
+*           A is unit triangular.
+*
+*           Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}.
+*
+            GROW = MIN( ONE, HALF / MAX( XBND, SMLNUM ) )
+            DO 80 J = JFIRST, JLAST, JINC
+*
+*              Exit the loop if the growth factor is too small.
+*
+               IF( GROW.LE.SMLNUM )
+     $            GO TO 90
+*
+*              G(j) = ( 1 + CNORM(j) )*G(j-1)
+*
+               XJ = ONE + CNORM( J )
+               GROW = GROW / XJ
+   80       CONTINUE
+         END IF
+   90    CONTINUE
+      END IF
+*
+      IF( ( GROW*TSCAL ).GT.SMLNUM ) THEN
+*
+*        Use the Level 2 BLAS solve if the reciprocal of the bound on
+*        elements of X is not too small.
+*
+         CALL CTRSV( UPLO, TRANS, DIAG, N, A, LDA, X, 1 )
+      ELSE
+*
+*        Use a Level 1 BLAS solve, scaling intermediate results.
+*
+         IF( XMAX.GT.BIGNUM*HALF ) THEN
+*
+*           Scale X so that its components are less than or equal to
+*           BIGNUM in absolute value.
+*
+            SCALE = ( BIGNUM*HALF ) / XMAX
+            CALL CSSCAL( N, SCALE, X, 1 )
+            XMAX = BIGNUM
+         ELSE
+            XMAX = XMAX*TWO
+         END IF
+*
+         IF( NOTRAN ) THEN
+*
+*           Solve A * x = b
+*
+            DO 110 J = JFIRST, JLAST, JINC
+*
+*              Compute x(j) = b(j) / A(j,j), scaling x if necessary.
+*
+               XJ = CABS1( X( J ) )
+               IF( NOUNIT ) THEN
+                  TJJS = A( J, J )*TSCAL
+               ELSE
+                  TJJS = TSCAL
+                  IF( TSCAL.EQ.ONE )
+     $               GO TO 105
+               END IF
+                  TJJ = CABS1( TJJS )
+                  IF( TJJ.GT.SMLNUM ) THEN
+*
+*                    abs(A(j,j)) > SMLNUM:
+*
+                     IF( TJJ.LT.ONE ) THEN
+                        IF( XJ.GT.TJJ*BIGNUM ) THEN
+*
+*                          Scale x by 1/b(j).
+*
+                           REC = ONE / XJ
+                           CALL CSSCAL( N, REC, X, 1 )
+                           SCALE = SCALE*REC
+                           XMAX = XMAX*REC
+                        END IF
+                     END IF
+                     X( J ) = CLADIV( X( J ), TJJS )
+                     XJ = CABS1( X( J ) )
+                  ELSE IF( TJJ.GT.ZERO ) THEN
+*
+*                    0 < abs(A(j,j)) <= SMLNUM:
+*
+                     IF( XJ.GT.TJJ*BIGNUM ) THEN
+*
+*                       Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM
+*                       to avoid overflow when dividing by A(j,j).
+*
+                        REC = ( TJJ*BIGNUM ) / XJ
+                        IF( CNORM( J ).GT.ONE ) THEN
+*
+*                          Scale by 1/CNORM(j) to avoid overflow when
+*                          multiplying x(j) times column j.
+*
+                           REC = REC / CNORM( J )
+                        END IF
+                        CALL CSSCAL( N, REC, X, 1 )
+                        SCALE = SCALE*REC
+                        XMAX = XMAX*REC
+                     END IF
+                     X( J ) = CLADIV( X( J ), TJJS )
+                     XJ = CABS1( X( J ) )
+                  ELSE
+*
+*                    A(j,j) = 0:  Set x(1:n) = 0, x(j) = 1, and
+*                    scale = 0, and compute a solution to A*x = 0.
+*
+                     DO 100 I = 1, N
+                        X( I ) = ZERO
+  100                CONTINUE
+                     X( J ) = ONE
+                     XJ = ONE
+                     SCALE = ZERO
+                     XMAX = ZERO
+                  END IF
+  105          CONTINUE
+*
+*              Scale x if necessary to avoid overflow when adding a
+*              multiple of column j of A.
+*
+               IF( XJ.GT.ONE ) THEN
+                  REC = ONE / XJ
+                  IF( CNORM( J ).GT.( BIGNUM-XMAX )*REC ) THEN
+*
+*                    Scale x by 1/(2*abs(x(j))).
+*
+                     REC = REC*HALF
+                     CALL CSSCAL( N, REC, X, 1 )
+                     SCALE = SCALE*REC
+                  END IF
+               ELSE IF( XJ*CNORM( J ).GT.( BIGNUM-XMAX ) ) THEN
+*
+*                 Scale x by 1/2.
+*
+                  CALL CSSCAL( N, HALF, X, 1 )
+                  SCALE = SCALE*HALF
+               END IF
+*
+               IF( UPPER ) THEN
+                  IF( J.GT.1 ) THEN
+*
+*                    Compute the update
+*                       x(1:j-1) := x(1:j-1) - x(j) * A(1:j-1,j)
+*
+                     CALL CAXPY( J-1, -X( J )*TSCAL, A( 1, J ), 1, X,
+     $                           1 )
+                     I = ICAMAX( J-1, X, 1 )
+                     XMAX = CABS1( X( I ) )
+                  END IF
+               ELSE
+                  IF( J.LT.N ) THEN
+*
+*                    Compute the update
+*                       x(j+1:n) := x(j+1:n) - x(j) * A(j+1:n,j)
+*
+                     CALL CAXPY( N-J, -X( J )*TSCAL, A( J+1, J ), 1,
+     $                           X( J+1 ), 1 )
+                     I = J + ICAMAX( N-J, X( J+1 ), 1 )
+                     XMAX = CABS1( X( I ) )
+                  END IF
+               END IF
+  110       CONTINUE
+*
+         ELSE IF( LSAME( TRANS, 'T' ) ) THEN
+*
+*           Solve A**T * x = b
+*
+            DO 150 J = JFIRST, JLAST, JINC
+*
+*              Compute x(j) = b(j) - sum A(k,j)*x(k).
+*                                    k<>j
+*
+               XJ = CABS1( X( J ) )
+               USCAL = TSCAL
+               REC = ONE / MAX( XMAX, ONE )
+               IF( CNORM( J ).GT.( BIGNUM-XJ )*REC ) THEN
+*
+*                 If x(j) could overflow, scale x by 1/(2*XMAX).
+*
+                  REC = REC*HALF
+                  IF( NOUNIT ) THEN
+                     TJJS = A( J, J )*TSCAL
+                  ELSE
+                     TJJS = TSCAL
+                  END IF
+                     TJJ = CABS1( TJJS )
+                     IF( TJJ.GT.ONE ) THEN
+*
+*                       Divide by A(j,j) when scaling x if A(j,j) > 1.
+*
+                        REC = MIN( ONE, REC*TJJ )
+                        USCAL = CLADIV( USCAL, TJJS )
+                     END IF
+                  IF( REC.LT.ONE ) THEN
+                     CALL CSSCAL( N, REC, X, 1 )
+                     SCALE = SCALE*REC
+                     XMAX = XMAX*REC
+                  END IF
+               END IF
+*
+               CSUMJ = ZERO
+               IF( USCAL.EQ.CMPLX( ONE ) ) THEN
+*
+*                 If the scaling needed for A in the dot product is 1,
+*                 call CDOTU to perform the dot product.
+*
+                  IF( UPPER ) THEN
+                     CSUMJ = CDOTU( J-1, A( 1, J ), 1, X, 1 )
+                  ELSE IF( J.LT.N ) THEN
+                     CSUMJ = CDOTU( N-J, A( J+1, J ), 1, X( J+1 ), 1 )
+                  END IF
+               ELSE
+*
+*                 Otherwise, use in-line code for the dot product.
+*
+                  IF( UPPER ) THEN
+                     DO 120 I = 1, J - 1
+                        CSUMJ = CSUMJ + ( A( I, J )*USCAL )*X( I )
+  120                CONTINUE
+                  ELSE IF( J.LT.N ) THEN
+                     DO 130 I = J + 1, N
+                        CSUMJ = CSUMJ + ( A( I, J )*USCAL )*X( I )
+  130                CONTINUE
+                  END IF
+               END IF
+*
+               IF( USCAL.EQ.CMPLX( TSCAL ) ) THEN
+*
+*                 Compute x(j) := ( x(j) - CSUMJ ) / A(j,j) if 1/A(j,j)
+*                 was not used to scale the dotproduct.
+*
+                  X( J ) = X( J ) - CSUMJ
+                  XJ = CABS1( X( J ) )
+                  IF( NOUNIT ) THEN
+                     TJJS = A( J, J )*TSCAL
+                  ELSE
+                     TJJS = TSCAL
+                     IF( TSCAL.EQ.ONE )
+     $                  GO TO 145
+                  END IF
+*
+*                    Compute x(j) = x(j) / A(j,j), scaling if necessary.
+*
+                     TJJ = CABS1( TJJS )
+                     IF( TJJ.GT.SMLNUM ) THEN
+*
+*                       abs(A(j,j)) > SMLNUM:
+*
+                        IF( TJJ.LT.ONE ) THEN
+                           IF( XJ.GT.TJJ*BIGNUM ) THEN
+*
+*                             Scale X by 1/abs(x(j)).
+*
+                              REC = ONE / XJ
+                              CALL CSSCAL( N, REC, X, 1 )
+                              SCALE = SCALE*REC
+                              XMAX = XMAX*REC
+                           END IF
+                        END IF
+                        X( J ) = CLADIV( X( J ), TJJS )
+                     ELSE IF( TJJ.GT.ZERO ) THEN
+*
+*                       0 < abs(A(j,j)) <= SMLNUM:
+*
+                        IF( XJ.GT.TJJ*BIGNUM ) THEN
+*
+*                          Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM.
+*
+                           REC = ( TJJ*BIGNUM ) / XJ
+                           CALL CSSCAL( N, REC, X, 1 )
+                           SCALE = SCALE*REC
+                           XMAX = XMAX*REC
+                        END IF
+                        X( J ) = CLADIV( X( J ), TJJS )
+                     ELSE
+*
+*                       A(j,j) = 0:  Set x(1:n) = 0, x(j) = 1, and
+*                       scale = 0 and compute a solution to A**T *x = 0.
+*
+                        DO 140 I = 1, N
+                           X( I ) = ZERO
+  140                   CONTINUE
+                        X( J ) = ONE
+                        SCALE = ZERO
+                        XMAX = ZERO
+                     END IF
+  145             CONTINUE
+               ELSE
+*
+*                 Compute x(j) := x(j) / A(j,j) - CSUMJ if the dot
+*                 product has already been divided by 1/A(j,j).
+*
+                  X( J ) = CLADIV( X( J ), TJJS ) - CSUMJ
+               END IF
+               XMAX = MAX( XMAX, CABS1( X( J ) ) )
+  150       CONTINUE
+*
+         ELSE
+*
+*           Solve A**H * x = b
+*
+            DO 190 J = JFIRST, JLAST, JINC
+*
+*              Compute x(j) = b(j) - sum A(k,j)*x(k).
+*                                    k<>j
+*
+               XJ = CABS1( X( J ) )
+               USCAL = TSCAL
+               REC = ONE / MAX( XMAX, ONE )
+               IF( CNORM( J ).GT.( BIGNUM-XJ )*REC ) THEN
+*
+*                 If x(j) could overflow, scale x by 1/(2*XMAX).
+*
+                  REC = REC*HALF
+                  IF( NOUNIT ) THEN
+                     TJJS = CONJG( A( J, J ) )*TSCAL
+                  ELSE
+                     TJJS = TSCAL
+                  END IF
+                     TJJ = CABS1( TJJS )
+                     IF( TJJ.GT.ONE ) THEN
+*
+*                       Divide by A(j,j) when scaling x if A(j,j) > 1.
+*
+                        REC = MIN( ONE, REC*TJJ )
+                        USCAL = CLADIV( USCAL, TJJS )
+                     END IF
+                  IF( REC.LT.ONE ) THEN
+                     CALL CSSCAL( N, REC, X, 1 )
+                     SCALE = SCALE*REC
+                     XMAX = XMAX*REC
+                  END IF
+               END IF
+*
+               CSUMJ = ZERO
+               IF( USCAL.EQ.CMPLX( ONE ) ) THEN
+*
+*                 If the scaling needed for A in the dot product is 1,
+*                 call CDOTC to perform the dot product.
+*
+                  IF( UPPER ) THEN
+                     CSUMJ = CDOTC( J-1, A( 1, J ), 1, X, 1 )
+                  ELSE IF( J.LT.N ) THEN
+                     CSUMJ = CDOTC( N-J, A( J+1, J ), 1, X( J+1 ), 1 )
+                  END IF
+               ELSE
+*
+*                 Otherwise, use in-line code for the dot product.
+*
+                  IF( UPPER ) THEN
+                     DO 160 I = 1, J - 1
+                        CSUMJ = CSUMJ + ( CONJG( A( I, J ) )*USCAL )*
+     $                          X( I )
+  160                CONTINUE
+                  ELSE IF( J.LT.N ) THEN
+                     DO 170 I = J + 1, N
+                        CSUMJ = CSUMJ + ( CONJG( A( I, J ) )*USCAL )*
+     $                          X( I )
+  170                CONTINUE
+                  END IF
+               END IF
+*
+               IF( USCAL.EQ.CMPLX( TSCAL ) ) THEN
+*
+*                 Compute x(j) := ( x(j) - CSUMJ ) / A(j,j) if 1/A(j,j)
+*                 was not used to scale the dotproduct.
+*
+                  X( J ) = X( J ) - CSUMJ
+                  XJ = CABS1( X( J ) )
+                  IF( NOUNIT ) THEN
+                     TJJS = CONJG( A( J, J ) )*TSCAL
+                  ELSE
+                     TJJS = TSCAL
+                     IF( TSCAL.EQ.ONE )
+     $                  GO TO 185
+                  END IF
+*
+*                    Compute x(j) = x(j) / A(j,j), scaling if necessary.
+*
+                     TJJ = CABS1( TJJS )
+                     IF( TJJ.GT.SMLNUM ) THEN
+*
+*                       abs(A(j,j)) > SMLNUM:
+*
+                        IF( TJJ.LT.ONE ) THEN
+                           IF( XJ.GT.TJJ*BIGNUM ) THEN
+*
+*                             Scale X by 1/abs(x(j)).
+*
+                              REC = ONE / XJ
+                              CALL CSSCAL( N, REC, X, 1 )
+                              SCALE = SCALE*REC
+                              XMAX = XMAX*REC
+                           END IF
+                        END IF
+                        X( J ) = CLADIV( X( J ), TJJS )
+                     ELSE IF( TJJ.GT.ZERO ) THEN
+*
+*                       0 < abs(A(j,j)) <= SMLNUM:
+*
+                        IF( XJ.GT.TJJ*BIGNUM ) THEN
+*
+*                          Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM.
+*
+                           REC = ( TJJ*BIGNUM ) / XJ
+                           CALL CSSCAL( N, REC, X, 1 )
+                           SCALE = SCALE*REC
+                           XMAX = XMAX*REC
+                        END IF
+                        X( J ) = CLADIV( X( J ), TJJS )
+                     ELSE
+*
+*                       A(j,j) = 0:  Set x(1:n) = 0, x(j) = 1, and
+*                       scale = 0 and compute a solution to A**H *x = 0.
+*
+                        DO 180 I = 1, N
+                           X( I ) = ZERO
+  180                   CONTINUE
+                        X( J ) = ONE
+                        SCALE = ZERO
+                        XMAX = ZERO
+                     END IF
+  185             CONTINUE
+               ELSE
+*
+*                 Compute x(j) := x(j) / A(j,j) - CSUMJ if the dot
+*                 product has already been divided by 1/A(j,j).
+*
+                  X( J ) = CLADIV( X( J ), TJJS ) - CSUMJ
+               END IF
+               XMAX = MAX( XMAX, CABS1( X( J ) ) )
+  190       CONTINUE
+         END IF
+         SCALE = SCALE / TSCAL
+      END IF
+*
+*     Scale the column norms by 1/TSCAL for return.
+*
+      IF( TSCAL.NE.ONE ) THEN
+         CALL SSCAL( N, ONE / TSCAL, CNORM, 1 )
+      END IF
+*
+      RETURN
+*
+*     End of CLATRS
+*
+      END
diff --git a/costa/native/external/lapack/clatrz.f b/costa/native/external/lapack/clatrz.f
new file mode 100644
index 000000000..fa51d60cd
--- /dev/null
+++ b/costa/native/external/lapack/clatrz.f
@@ -0,0 +1,134 @@
+      SUBROUTINE CLATRZ( M, N, L, A, LDA, TAU, WORK )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            L, LDA, M, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CLATRZ factors the M-by-(M+L) complex upper trapezoidal matrix
+*  [ A1 A2 ] = [ A(1:M,1:M) A(1:M,N-L+1:N) ] as ( R  0 ) * Z by means
+*  of unitary transformations, where  Z is an (M+L)-by-(M+L) unitary
+*  matrix and, R and A1 are M-by-M upper triangular matrices.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  L       (input) INTEGER
+*          The number of columns of the matrix A containing the
+*          meaningful part of the Householder vectors. N-M >= L >= 0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,N)
+*          On entry, the leading M-by-N upper trapezoidal part of the
+*          array A must contain the matrix to be factorized.
+*          On exit, the leading M-by-M upper triangular part of A
+*          contains the upper triangular matrix R, and elements N-L+1 to
+*          N of the first M rows of A, with the array TAU, represent the
+*          unitary matrix Z as a product of M elementary reflectors.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  TAU     (output) COMPLEX array, dimension (M)
+*          The scalar factors of the elementary reflectors.
+*
+*  WORK    (workspace) COMPLEX array, dimension (M)
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*    A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA
+*
+*  The factorization is obtained by Householder's method.  The kth
+*  transformation matrix, Z( k ), which is used to introduce zeros into
+*  the ( m - k + 1 )th row of A, is given in the form
+*
+*     Z( k ) = ( I     0   ),
+*              ( 0  T( k ) )
+*
+*  where
+*
+*     T( k ) = I - tau*u( k )*u( k )',   u( k ) = (   1    ),
+*                                                 (   0    )
+*                                                 ( z( k ) )
+*
+*  tau is a scalar and z( k ) is an l element vector. tau and z( k )
+*  are chosen to annihilate the elements of the kth row of A2.
+*
+*  The scalar tau is returned in the kth element of TAU and the vector
+*  u( k ) in the kth row of A2, such that the elements of z( k ) are
+*  in  a( k, l + 1 ), ..., a( k, n ). The elements of R are returned in
+*  the upper triangular part of A1.
+*
+*  Z is given by
+*
+*     Z =  Z( 1 ) * Z( 2 ) * ... * Z( m ).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX            ZERO
+      PARAMETER          ( ZERO = ( 0.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I
+      COMPLEX            ALPHA
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CLACGV, CLARFG, CLARZ
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          CONJG
+*     ..
+*     .. Executable Statements ..
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 ) THEN
+         RETURN
+      ELSE IF( M.EQ.N ) THEN
+         DO 10 I = 1, N
+            TAU( I ) = ZERO
+   10    CONTINUE
+         RETURN
+      END IF
+*
+      DO 20 I = M, 1, -1
+*
+*        Generate elementary reflector H(i) to annihilate
+*        [ A(i,i) A(i,n-l+1:n) ]
+*
+         CALL CLACGV( L, A( I, N-L+1 ), LDA )
+         ALPHA = CONJG( A( I, I ) )
+         CALL CLARFG( L+1, ALPHA, A( I, N-L+1 ), LDA, TAU( I ) )
+         TAU( I ) = CONJG( TAU( I ) )
+*
+*        Apply H(i) to A(1:i-1,i:n) from the right
+*
+         CALL CLARZ( 'Right', I-1, N-I+1, L, A( I, N-L+1 ), LDA,
+     $               CONJG( TAU( I ) ), A( 1, I ), LDA, WORK )
+         A( I, I ) = CONJG( ALPHA )
+*
+   20 CONTINUE
+*
+      RETURN
+*
+*     End of CLATRZ
+*
+      END
diff --git a/costa/native/external/lapack/clatzm.f b/costa/native/external/lapack/clatzm.f
new file mode 100644
index 000000000..1c2507ffc
--- /dev/null
+++ b/costa/native/external/lapack/clatzm.f
@@ -0,0 +1,147 @@
+      SUBROUTINE CLATZM( SIDE, M, N, V, INCV, TAU, C1, C2, LDC, WORK )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          SIDE
+      INTEGER            INCV, LDC, M, N
+      COMPLEX            TAU
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            C1( LDC, * ), C2( LDC, * ), V( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  This routine is deprecated and has been replaced by routine CUNMRZ.
+*
+*  CLATZM applies a Householder matrix generated by CTZRQF to a matrix.
+*
+*  Let P = I - tau*u*u',   u = ( 1 ),
+*                              ( v )
+*  where v is an (m-1) vector if SIDE = 'L', or a (n-1) vector if
+*  SIDE = 'R'.
+*
+*  If SIDE equals 'L', let
+*         C = [ C1 ] 1
+*             [ C2 ] m-1
+*               n
+*  Then C is overwritten by P*C.
+*
+*  If SIDE equals 'R', let
+*         C = [ C1, C2 ] m
+*                1  n-1
+*  Then C is overwritten by C*P.
+*
+*  Arguments
+*  =========
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'L': form P * C
+*          = 'R': form C * P
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix C.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix C.
+*
+*  V       (input) COMPLEX array, dimension
+*                  (1 + (M-1)*abs(INCV)) if SIDE = 'L'
+*                  (1 + (N-1)*abs(INCV)) if SIDE = 'R'
+*          The vector v in the representation of P. V is not used
+*          if TAU = 0.
+*
+*  INCV    (input) INTEGER
+*          The increment between elements of v. INCV <> 0
+*
+*  TAU     (input) COMPLEX
+*          The value tau in the representation of P.
+*
+*  C1      (input/output) COMPLEX array, dimension
+*                         (LDC,N) if SIDE = 'L'
+*                         (M,1)   if SIDE = 'R'
+*          On entry, the n-vector C1 if SIDE = 'L', or the m-vector C1
+*          if SIDE = 'R'.
+*
+*          On exit, the first row of P*C if SIDE = 'L', or the first
+*          column of C*P if SIDE = 'R'.
+*
+*  C2      (input/output) COMPLEX array, dimension
+*                         (LDC, N)   if SIDE = 'L'
+*                         (LDC, N-1) if SIDE = 'R'
+*          On entry, the (m - 1) x n matrix C2 if SIDE = 'L', or the
+*          m x (n - 1) matrix C2 if SIDE = 'R'.
+*
+*          On exit, rows 2:m of P*C if SIDE = 'L', or columns 2:m of C*P
+*          if SIDE = 'R'.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the arrays C1 and C2.
+*          LDC >= max(1,M).
+*
+*  WORK    (workspace) COMPLEX array, dimension
+*                      (N) if SIDE = 'L'
+*                      (M) if SIDE = 'R'
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX            ONE, ZERO
+      PARAMETER          ( ONE = ( 1.0E+0, 0.0E+0 ),
+     $                   ZERO = ( 0.0E+0, 0.0E+0 ) )
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CAXPY, CCOPY, CGEMV, CGERC, CGERU, CLACGV
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MIN
+*     ..
+*     .. Executable Statements ..
+*
+      IF( ( MIN( M, N ).EQ.0 ) .OR. ( TAU.EQ.ZERO ) )
+     $   RETURN
+*
+      IF( LSAME( SIDE, 'L' ) ) THEN
+*
+*        w :=  conjg( C1 + v' * C2 )
+*
+         CALL CCOPY( N, C1, LDC, WORK, 1 )
+         CALL CLACGV( N, WORK, 1 )
+         CALL CGEMV( 'Conjugate transpose', M-1, N, ONE, C2, LDC, V,
+     $               INCV, ONE, WORK, 1 )
+*
+*        [ C1 ] := [ C1 ] - tau* [ 1 ] * w'
+*        [ C2 ]    [ C2 ]        [ v ]
+*
+         CALL CLACGV( N, WORK, 1 )
+         CALL CAXPY( N, -TAU, WORK, 1, C1, LDC )
+         CALL CGERU( M-1, N, -TAU, V, INCV, WORK, 1, C2, LDC )
+*
+      ELSE IF( LSAME( SIDE, 'R' ) ) THEN
+*
+*        w := C1 + C2 * v
+*
+         CALL CCOPY( M, C1, 1, WORK, 1 )
+         CALL CGEMV( 'No transpose', M, N-1, ONE, C2, LDC, V, INCV, ONE,
+     $               WORK, 1 )
+*
+*        [ C1, C2 ] := [ C1, C2 ] - tau* w * [ 1 , v']
+*
+         CALL CAXPY( M, -TAU, WORK, 1, C1, 1 )
+         CALL CGERC( M, N-1, -TAU, WORK, 1, V, INCV, C2, LDC )
+      END IF
+*
+      RETURN
+*
+*     End of CLATZM
+*
+      END
diff --git a/costa/native/external/lapack/clauu2.f b/costa/native/external/lapack/clauu2.f
new file mode 100644
index 000000000..e1054964b
--- /dev/null
+++ b/costa/native/external/lapack/clauu2.f
@@ -0,0 +1,144 @@
+      SUBROUTINE CLAUU2( UPLO, N, A, LDA, INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CLAUU2 computes the product U * U' or L' * L, where the triangular
+*  factor U or L is stored in the upper or lower triangular part of
+*  the array A.
+*
+*  If UPLO = 'U' or 'u' then the upper triangle of the result is stored,
+*  overwriting the factor U in A.
+*  If UPLO = 'L' or 'l' then the lower triangle of the result is stored,
+*  overwriting the factor L in A.
+*
+*  This is the unblocked form of the algorithm, calling Level 2 BLAS.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the triangular factor stored in the array A
+*          is upper or lower triangular:
+*          = 'U':  Upper triangular
+*          = 'L':  Lower triangular
+*
+*  N       (input) INTEGER
+*          The order of the triangular factor U or L.  N >= 0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,N)
+*          On entry, the triangular factor U or L.
+*          On exit, if UPLO = 'U', the upper triangle of A is
+*          overwritten with the upper triangle of the product U * U';
+*          if UPLO = 'L', the lower triangle of A is overwritten with
+*          the lower triangle of the product L' * L.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -k, the k-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX            ONE
+      PARAMETER          ( ONE = ( 1.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            I
+      REAL               AII
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      COMPLEX            CDOTC
+      EXTERNAL           LSAME, CDOTC
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CGEMV, CLACGV, CSSCAL, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          CMPLX, MAX, REAL
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CLAUU2', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      IF( UPPER ) THEN
+*
+*        Compute the product U * U'.
+*
+         DO 10 I = 1, N
+            AII = A( I, I )
+            IF( I.LT.N ) THEN
+               A( I, I ) = AII*AII + REAL( CDOTC( N-I, A( I, I+1 ), LDA,
+     $                     A( I, I+1 ), LDA ) )
+               CALL CLACGV( N-I, A( I, I+1 ), LDA )
+               CALL CGEMV( 'No transpose', I-1, N-I, ONE, A( 1, I+1 ),
+     $                     LDA, A( I, I+1 ), LDA, CMPLX( AII ),
+     $                     A( 1, I ), 1 )
+               CALL CLACGV( N-I, A( I, I+1 ), LDA )
+            ELSE
+               CALL CSSCAL( I, AII, A( 1, I ), 1 )
+            END IF
+   10    CONTINUE
+*
+      ELSE
+*
+*        Compute the product L' * L.
+*
+         DO 20 I = 1, N
+            AII = A( I, I )
+            IF( I.LT.N ) THEN
+               A( I, I ) = AII*AII + REAL( CDOTC( N-I, A( I+1, I ), 1,
+     $                     A( I+1, I ), 1 ) )
+               CALL CLACGV( I-1, A( I, 1 ), LDA )
+               CALL CGEMV( 'Conjugate transpose', N-I, I-1, ONE,
+     $                     A( I+1, 1 ), LDA, A( I+1, I ), 1,
+     $                     CMPLX( AII ), A( I, 1 ), LDA )
+               CALL CLACGV( I-1, A( I, 1 ), LDA )
+            ELSE
+               CALL CSSCAL( I, AII, A( I, 1 ), LDA )
+            END IF
+   20    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of CLAUU2
+*
+      END
diff --git a/costa/native/external/lapack/clauum.f b/costa/native/external/lapack/clauum.f
new file mode 100644
index 000000000..01276e40e
--- /dev/null
+++ b/costa/native/external/lapack/clauum.f
@@ -0,0 +1,161 @@
+      SUBROUTINE CLAUUM( UPLO, N, A, LDA, INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CLAUUM computes the product U * U' or L' * L, where the triangular
+*  factor U or L is stored in the upper or lower triangular part of
+*  the array A.
+*
+*  If UPLO = 'U' or 'u' then the upper triangle of the result is stored,
+*  overwriting the factor U in A.
+*  If UPLO = 'L' or 'l' then the lower triangle of the result is stored,
+*  overwriting the factor L in A.
+*
+*  This is the blocked form of the algorithm, calling Level 3 BLAS.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the triangular factor stored in the array A
+*          is upper or lower triangular:
+*          = 'U':  Upper triangular
+*          = 'L':  Lower triangular
+*
+*  N       (input) INTEGER
+*          The order of the triangular factor U or L.  N >= 0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,N)
+*          On entry, the triangular factor U or L.
+*          On exit, if UPLO = 'U', the upper triangle of A is
+*          overwritten with the upper triangle of the product U * U';
+*          if UPLO = 'L', the lower triangle of A is overwritten with
+*          the lower triangle of the product L' * L.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -k, the k-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE
+      PARAMETER          ( ONE = 1.0E+0 )
+      COMPLEX            CONE
+      PARAMETER          ( CONE = ( 1.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            I, IB, NB
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           LSAME, ILAENV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CGEMM, CHERK, CLAUU2, CTRMM, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CLAUUM', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Determine the block size for this environment.
+*
+      NB = ILAENV( 1, 'CLAUUM', UPLO, N, -1, -1, -1 )
+*
+      IF( NB.LE.1 .OR. NB.GE.N ) THEN
+*
+*        Use unblocked code
+*
+         CALL CLAUU2( UPLO, N, A, LDA, INFO )
+      ELSE
+*
+*        Use blocked code
+*
+         IF( UPPER ) THEN
+*
+*           Compute the product U * U'.
+*
+            DO 10 I = 1, N, NB
+               IB = MIN( NB, N-I+1 )
+               CALL CTRMM( 'Right', 'Upper', 'Conjugate transpose',
+     $                     'Non-unit', I-1, IB, CONE, A( I, I ), LDA,
+     $                     A( 1, I ), LDA )
+               CALL CLAUU2( 'Upper', IB, A( I, I ), LDA, INFO )
+               IF( I+IB.LE.N ) THEN
+                  CALL CGEMM( 'No transpose', 'Conjugate transpose',
+     $                        I-1, IB, N-I-IB+1, CONE, A( 1, I+IB ),
+     $                        LDA, A( I, I+IB ), LDA, CONE, A( 1, I ),
+     $                        LDA )
+                  CALL CHERK( 'Upper', 'No transpose', IB, N-I-IB+1,
+     $                        ONE, A( I, I+IB ), LDA, ONE, A( I, I ),
+     $                        LDA )
+               END IF
+   10       CONTINUE
+         ELSE
+*
+*           Compute the product L' * L.
+*
+            DO 20 I = 1, N, NB
+               IB = MIN( NB, N-I+1 )
+               CALL CTRMM( 'Left', 'Lower', 'Conjugate transpose',
+     $                     'Non-unit', IB, I-1, CONE, A( I, I ), LDA,
+     $                     A( I, 1 ), LDA )
+               CALL CLAUU2( 'Lower', IB, A( I, I ), LDA, INFO )
+               IF( I+IB.LE.N ) THEN
+                  CALL CGEMM( 'Conjugate transpose', 'No transpose', IB,
+     $                        I-1, N-I-IB+1, CONE, A( I+IB, I ), LDA,
+     $                        A( I+IB, 1 ), LDA, CONE, A( I, 1 ), LDA )
+                  CALL CHERK( 'Lower', 'Conjugate transpose', IB,
+     $                        N-I-IB+1, ONE, A( I+IB, I ), LDA, ONE,
+     $                        A( I, I ), LDA )
+               END IF
+   20       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of CLAUUM
+*
+      END
diff --git a/costa/native/external/lapack/cpbcon.f b/costa/native/external/lapack/cpbcon.f
new file mode 100644
index 000000000..a9ba6b058
--- /dev/null
+++ b/costa/native/external/lapack/cpbcon.f
@@ -0,0 +1,194 @@
+      SUBROUTINE CPBCON( UPLO, N, KD, AB, LDAB, ANORM, RCOND, WORK,
+     $                   RWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, KD, LDAB, N
+      REAL               ANORM, RCOND
+*     ..
+*     .. Array Arguments ..
+      REAL               RWORK( * )
+      COMPLEX            AB( LDAB, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CPBCON estimates the reciprocal of the condition number (in the
+*  1-norm) of a complex Hermitian positive definite band matrix using
+*  the Cholesky factorization A = U**H*U or A = L*L**H computed by
+*  CPBTRF.
+*
+*  An estimate is obtained for norm(inv(A)), and the reciprocal of the
+*  condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))).
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangular factor stored in AB;
+*          = 'L':  Lower triangular factor stored in AB.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  KD      (input) INTEGER
+*          The number of superdiagonals of the matrix A if UPLO = 'U',
+*          or the number of sub-diagonals if UPLO = 'L'.  KD >= 0.
+*
+*  AB      (input) COMPLEX array, dimension (LDAB,N)
+*          The triangular factor U or L from the Cholesky factorization
+*          A = U**H*U or A = L*L**H of the band matrix A, stored in the
+*          first KD+1 rows of the array.  The j-th column of U or L is
+*          stored in the j-th column of the array AB as follows:
+*          if UPLO ='U', AB(kd+1+i-j,j) = U(i,j) for max(1,j-kd)<=i<=j;
+*          if UPLO ='L', AB(1+i-j,j)    = L(i,j) for j<=i<=min(n,j+kd).
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KD+1.
+*
+*  ANORM   (input) REAL
+*          The 1-norm (or infinity-norm) of the Hermitian band matrix A.
+*
+*  RCOND   (output) REAL
+*          The reciprocal of the condition number of the matrix A,
+*          computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an
+*          estimate of the 1-norm of inv(A) computed in this routine.
+*
+*  WORK    (workspace) COMPLEX array, dimension (2*N)
+*
+*  RWORK   (workspace) REAL array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      CHARACTER          NORMIN
+      INTEGER            IX, KASE
+      REAL               AINVNM, SCALE, SCALEL, SCALEU, SMLNUM
+      COMPLEX            ZDUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ICAMAX
+      REAL               SLAMCH
+      EXTERNAL           LSAME, ICAMAX, SLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CLACON, CLATBS, CSRSCL, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, AIMAG, REAL
+*     ..
+*     .. Statement Functions ..
+      REAL               CABS1
+*     ..
+*     .. Statement Function definitions ..
+      CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( KD.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDAB.LT.KD+1 ) THEN
+         INFO = -5
+      ELSE IF( ANORM.LT.ZERO ) THEN
+         INFO = -6
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CPBCON', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      RCOND = ZERO
+      IF( N.EQ.0 ) THEN
+         RCOND = ONE
+         RETURN
+      ELSE IF( ANORM.EQ.ZERO ) THEN
+         RETURN
+      END IF
+*
+      SMLNUM = SLAMCH( 'Safe minimum' )
+*
+*     Estimate the 1-norm of the inverse.
+*
+      KASE = 0
+      NORMIN = 'N'
+   10 CONTINUE
+      CALL CLACON( N, WORK( N+1 ), WORK, AINVNM, KASE )
+      IF( KASE.NE.0 ) THEN
+         IF( UPPER ) THEN
+*
+*           Multiply by inv(U').
+*
+            CALL CLATBS( 'Upper', 'Conjugate transpose', 'Non-unit',
+     $                   NORMIN, N, KD, AB, LDAB, WORK, SCALEL, RWORK,
+     $                   INFO )
+            NORMIN = 'Y'
+*
+*           Multiply by inv(U).
+*
+            CALL CLATBS( 'Upper', 'No transpose', 'Non-unit', NORMIN, N,
+     $                   KD, AB, LDAB, WORK, SCALEU, RWORK, INFO )
+         ELSE
+*
+*           Multiply by inv(L).
+*
+            CALL CLATBS( 'Lower', 'No transpose', 'Non-unit', NORMIN, N,
+     $                   KD, AB, LDAB, WORK, SCALEL, RWORK, INFO )
+            NORMIN = 'Y'
+*
+*           Multiply by inv(L').
+*
+            CALL CLATBS( 'Lower', 'Conjugate transpose', 'Non-unit',
+     $                   NORMIN, N, KD, AB, LDAB, WORK, SCALEU, RWORK,
+     $                   INFO )
+         END IF
+*
+*        Multiply by 1/SCALE if doing so will not cause overflow.
+*
+         SCALE = SCALEL*SCALEU
+         IF( SCALE.NE.ONE ) THEN
+            IX = ICAMAX( N, WORK, 1 )
+            IF( SCALE.LT.CABS1( WORK( IX ) )*SMLNUM .OR. SCALE.EQ.ZERO )
+     $         GO TO 20
+            CALL CSRSCL( N, SCALE, WORK, 1 )
+         END IF
+         GO TO 10
+      END IF
+*
+*     Compute the estimate of the reciprocal condition number.
+*
+      IF( AINVNM.NE.ZERO )
+     $   RCOND = ( ONE / AINVNM ) / ANORM
+*
+   20 CONTINUE
+*
+      RETURN
+*
+*     End of CPBCON
+*
+      END
diff --git a/costa/native/external/lapack/cpbequ.f b/costa/native/external/lapack/cpbequ.f
new file mode 100644
index 000000000..0f177ae45
--- /dev/null
+++ b/costa/native/external/lapack/cpbequ.f
@@ -0,0 +1,168 @@
+      SUBROUTINE CPBEQU( UPLO, N, KD, AB, LDAB, S, SCOND, AMAX, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, KD, LDAB, N
+      REAL               AMAX, SCOND
+*     ..
+*     .. Array Arguments ..
+      REAL               S( * )
+      COMPLEX            AB( LDAB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CPBEQU computes row and column scalings intended to equilibrate a
+*  Hermitian positive definite band matrix A and reduce its condition
+*  number (with respect to the two-norm).  S contains the scale factors,
+*  S(i) = 1/sqrt(A(i,i)), chosen so that the scaled matrix B with
+*  elements B(i,j) = S(i)*A(i,j)*S(j) has ones on the diagonal.  This
+*  choice of S puts the condition number of B within a factor N of the
+*  smallest possible condition number over all possible diagonal
+*  scalings.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangular of A is stored;
+*          = 'L':  Lower triangular of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  KD      (input) INTEGER
+*          The number of superdiagonals of the matrix A if UPLO = 'U',
+*          or the number of subdiagonals if UPLO = 'L'.  KD >= 0.
+*
+*  AB      (input) COMPLEX array, dimension (LDAB,N)
+*          The upper or lower triangle of the Hermitian band matrix A,
+*          stored in the first KD+1 rows of the array.  The j-th column
+*          of A is stored in the j-th column of the array AB as follows:
+*          if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j;
+*          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+kd).
+*
+*  LDAB     (input) INTEGER
+*          The leading dimension of the array A.  LDAB >= KD+1.
+*
+*  S       (output) REAL array, dimension (N)
+*          If INFO = 0, S contains the scale factors for A.
+*
+*  SCOND   (output) REAL
+*          If INFO = 0, S contains the ratio of the smallest S(i) to
+*          the largest S(i).  If SCOND >= 0.1 and AMAX is neither too
+*          large nor too small, it is not worth scaling by S.
+*
+*  AMAX    (output) REAL
+*          Absolute value of largest matrix element.  If AMAX is very
+*          close to overflow or very close to underflow, the matrix
+*          should be scaled.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          > 0:  if INFO = i, the i-th diagonal element is nonpositive.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            I, J
+      REAL               SMIN
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN, REAL, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( KD.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDAB.LT.KD+1 ) THEN
+         INFO = -5
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CPBEQU', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 ) THEN
+         SCOND = ONE
+         AMAX = ZERO
+         RETURN
+      END IF
+*
+      IF( UPPER ) THEN
+         J = KD + 1
+      ELSE
+         J = 1
+      END IF
+*
+*     Initialize SMIN and AMAX.
+*
+      S( 1 ) = REAL( AB( J, 1 ) )
+      SMIN = S( 1 )
+      AMAX = S( 1 )
+*
+*     Find the minimum and maximum diagonal elements.
+*
+      DO 10 I = 2, N
+         S( I ) = REAL( AB( J, I ) )
+         SMIN = MIN( SMIN, S( I ) )
+         AMAX = MAX( AMAX, S( I ) )
+   10 CONTINUE
+*
+      IF( SMIN.LE.ZERO ) THEN
+*
+*        Find the first non-positive diagonal element and return.
+*
+         DO 20 I = 1, N
+            IF( S( I ).LE.ZERO ) THEN
+               INFO = I
+               RETURN
+            END IF
+   20    CONTINUE
+      ELSE
+*
+*        Set the scale factors to the reciprocals
+*        of the diagonal elements.
+*
+         DO 30 I = 1, N
+            S( I ) = ONE / SQRT( S( I ) )
+   30    CONTINUE
+*
+*        Compute SCOND = min(S(I)) / max(S(I))
+*
+         SCOND = SQRT( SMIN ) / SQRT( AMAX )
+      END IF
+      RETURN
+*
+*     End of CPBEQU
+*
+      END
diff --git a/costa/native/external/lapack/cpbrfs.f b/costa/native/external/lapack/cpbrfs.f
new file mode 100644
index 000000000..8acd89bd8
--- /dev/null
+++ b/costa/native/external/lapack/cpbrfs.f
@@ -0,0 +1,342 @@
+      SUBROUTINE CPBRFS( UPLO, N, KD, NRHS, AB, LDAB, AFB, LDAFB, B,
+     $                   LDB, X, LDX, FERR, BERR, WORK, RWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, KD, LDAB, LDAFB, LDB, LDX, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      REAL               BERR( * ), FERR( * ), RWORK( * )
+      COMPLEX            AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ),
+     $                   WORK( * ), X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CPBRFS improves the computed solution to a system of linear
+*  equations when the coefficient matrix is Hermitian positive definite
+*  and banded, and provides error bounds and backward error estimates
+*  for the solution.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  KD      (input) INTEGER
+*          The number of superdiagonals of the matrix A if UPLO = 'U',
+*          or the number of subdiagonals if UPLO = 'L'.  KD >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrices B and X.  NRHS >= 0.
+*
+*  AB      (input) COMPLEX array, dimension (LDAB,N)
+*          The upper or lower triangle of the Hermitian band matrix A,
+*          stored in the first KD+1 rows of the array.  The j-th column
+*          of A is stored in the j-th column of the array AB as follows:
+*          if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j;
+*          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+kd).
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KD+1.
+*
+*  AFB     (input) COMPLEX array, dimension (LDAFB,N)
+*          The triangular factor U or L from the Cholesky factorization
+*          A = U**H*U or A = L*L**H of the band matrix A as computed by
+*          CPBTRF, in the same storage format as A (see AB).
+*
+*  LDAFB   (input) INTEGER
+*          The leading dimension of the array AFB.  LDAFB >= KD+1.
+*
+*  B       (input) COMPLEX array, dimension (LDB,NRHS)
+*          The right hand side matrix B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  X       (input/output) COMPLEX array, dimension (LDX,NRHS)
+*          On entry, the solution matrix X, as computed by CPBTRS.
+*          On exit, the improved solution matrix X.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(1,N).
+*
+*  FERR    (output) REAL array, dimension (NRHS)
+*          The estimated forward error bound for each solution vector
+*          X(j) (the j-th column of the solution matrix X).
+*          If XTRUE is the true solution corresponding to X(j), FERR(j)
+*          is an estimated upper bound for the magnitude of the largest
+*          element in (X(j) - XTRUE) divided by the magnitude of the
+*          largest element in X(j).  The estimate is as reliable as
+*          the estimate for RCOND, and is almost always a slight
+*          overestimate of the true error.
+*
+*  BERR    (output) REAL array, dimension (NRHS)
+*          The componentwise relative backward error of each solution
+*          vector X(j) (i.e., the smallest relative change in
+*          any element of A or B that makes X(j) an exact solution).
+*
+*  WORK    (workspace) COMPLEX array, dimension (2*N)
+*
+*  RWORK   (workspace) REAL array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Internal Parameters
+*  ===================
+*
+*  ITMAX is the maximum number of steps of iterative refinement.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            ITMAX
+      PARAMETER          ( ITMAX = 5 )
+      REAL               ZERO
+      PARAMETER          ( ZERO = 0.0E+0 )
+      COMPLEX            ONE
+      PARAMETER          ( ONE = ( 1.0E+0, 0.0E+0 ) )
+      REAL               TWO
+      PARAMETER          ( TWO = 2.0E+0 )
+      REAL               THREE
+      PARAMETER          ( THREE = 3.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            COUNT, I, J, K, KASE, L, NZ
+      REAL               EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK
+      COMPLEX            ZDUM
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CAXPY, CCOPY, CHBMV, CLACON, CPBTRS, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, AIMAG, MAX, MIN, REAL
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               SLAMCH
+      EXTERNAL           LSAME, SLAMCH
+*     ..
+*     .. Statement Functions ..
+      REAL               CABS1
+*     ..
+*     .. Statement Function definitions ..
+      CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( KD.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDAB.LT.KD+1 ) THEN
+         INFO = -6
+      ELSE IF( LDAFB.LT.KD+1 ) THEN
+         INFO = -8
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -10
+      ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+         INFO = -12
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CPBRFS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN
+         DO 10 J = 1, NRHS
+            FERR( J ) = ZERO
+            BERR( J ) = ZERO
+   10    CONTINUE
+         RETURN
+      END IF
+*
+*     NZ = maximum number of nonzero elements in each row of A, plus 1
+*
+      NZ = MIN( N+1, 2*KD+2 )
+      EPS = SLAMCH( 'Epsilon' )
+      SAFMIN = SLAMCH( 'Safe minimum' )
+      SAFE1 = NZ*SAFMIN
+      SAFE2 = SAFE1 / EPS
+*
+*     Do for each right hand side
+*
+      DO 140 J = 1, NRHS
+*
+         COUNT = 1
+         LSTRES = THREE
+   20    CONTINUE
+*
+*        Loop until stopping criterion is satisfied.
+*
+*        Compute residual R = B - A * X
+*
+         CALL CCOPY( N, B( 1, J ), 1, WORK, 1 )
+         CALL CHBMV( UPLO, N, KD, -ONE, AB, LDAB, X( 1, J ), 1, ONE,
+     $               WORK, 1 )
+*
+*        Compute componentwise relative backward error from formula
+*
+*        max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) )
+*
+*        where abs(Z) is the componentwise absolute value of the matrix
+*        or vector Z.  If the i-th component of the denominator is less
+*        than SAFE2, then SAFE1 is added to the i-th components of the
+*        numerator and denominator before dividing.
+*
+         DO 30 I = 1, N
+            RWORK( I ) = CABS1( B( I, J ) )
+   30    CONTINUE
+*
+*        Compute abs(A)*abs(X) + abs(B).
+*
+         IF( UPPER ) THEN
+            DO 50 K = 1, N
+               S = ZERO
+               XK = CABS1( X( K, J ) )
+               L = KD + 1 - K
+               DO 40 I = MAX( 1, K-KD ), K - 1
+                  RWORK( I ) = RWORK( I ) + CABS1( AB( L+I, K ) )*XK
+                  S = S + CABS1( AB( L+I, K ) )*CABS1( X( I, J ) )
+   40          CONTINUE
+               RWORK( K ) = RWORK( K ) + ABS( REAL( AB( KD+1, K ) ) )*
+     $                      XK + S
+   50       CONTINUE
+         ELSE
+            DO 70 K = 1, N
+               S = ZERO
+               XK = CABS1( X( K, J ) )
+               RWORK( K ) = RWORK( K ) + ABS( REAL( AB( 1, K ) ) )*XK
+               L = 1 - K
+               DO 60 I = K + 1, MIN( N, K+KD )
+                  RWORK( I ) = RWORK( I ) + CABS1( AB( L+I, K ) )*XK
+                  S = S + CABS1( AB( L+I, K ) )*CABS1( X( I, J ) )
+   60          CONTINUE
+               RWORK( K ) = RWORK( K ) + S
+   70       CONTINUE
+         END IF
+         S = ZERO
+         DO 80 I = 1, N
+            IF( RWORK( I ).GT.SAFE2 ) THEN
+               S = MAX( S, CABS1( WORK( I ) ) / RWORK( I ) )
+            ELSE
+               S = MAX( S, ( CABS1( WORK( I ) )+SAFE1 ) /
+     $             ( RWORK( I )+SAFE1 ) )
+            END IF
+   80    CONTINUE
+         BERR( J ) = S
+*
+*        Test stopping criterion. Continue iterating if
+*           1) The residual BERR(J) is larger than machine epsilon, and
+*           2) BERR(J) decreased by at least a factor of 2 during the
+*              last iteration, and
+*           3) At most ITMAX iterations tried.
+*
+         IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND.
+     $       COUNT.LE.ITMAX ) THEN
+*
+*           Update solution and try again.
+*
+            CALL CPBTRS( UPLO, N, KD, 1, AFB, LDAFB, WORK, N, INFO )
+            CALL CAXPY( N, ONE, WORK, 1, X( 1, J ), 1 )
+            LSTRES = BERR( J )
+            COUNT = COUNT + 1
+            GO TO 20
+         END IF
+*
+*        Bound error from formula
+*
+*        norm(X - XTRUE) / norm(X) .le. FERR =
+*        norm( abs(inv(A))*
+*           ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X)
+*
+*        where
+*          norm(Z) is the magnitude of the largest component of Z
+*          inv(A) is the inverse of A
+*          abs(Z) is the componentwise absolute value of the matrix or
+*             vector Z
+*          NZ is the maximum number of nonzeros in any row of A, plus 1
+*          EPS is machine epsilon
+*
+*        The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B))
+*        is incremented by SAFE1 if the i-th component of
+*        abs(A)*abs(X) + abs(B) is less than SAFE2.
+*
+*        Use CLACON to estimate the infinity-norm of the matrix
+*           inv(A) * diag(W),
+*        where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) )))
+*
+         DO 90 I = 1, N
+            IF( RWORK( I ).GT.SAFE2 ) THEN
+               RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I )
+            ELSE
+               RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) +
+     $                      SAFE1
+            END IF
+   90    CONTINUE
+*
+         KASE = 0
+  100    CONTINUE
+         CALL CLACON( N, WORK( N+1 ), WORK, FERR( J ), KASE )
+         IF( KASE.NE.0 ) THEN
+            IF( KASE.EQ.1 ) THEN
+*
+*              Multiply by diag(W)*inv(A').
+*
+               CALL CPBTRS( UPLO, N, KD, 1, AFB, LDAFB, WORK, N, INFO )
+               DO 110 I = 1, N
+                  WORK( I ) = RWORK( I )*WORK( I )
+  110          CONTINUE
+            ELSE IF( KASE.EQ.2 ) THEN
+*
+*              Multiply by inv(A)*diag(W).
+*
+               DO 120 I = 1, N
+                  WORK( I ) = RWORK( I )*WORK( I )
+  120          CONTINUE
+               CALL CPBTRS( UPLO, N, KD, 1, AFB, LDAFB, WORK, N, INFO )
+            END IF
+            GO TO 100
+         END IF
+*
+*        Normalize error.
+*
+         LSTRES = ZERO
+         DO 130 I = 1, N
+            LSTRES = MAX( LSTRES, CABS1( X( I, J ) ) )
+  130    CONTINUE
+         IF( LSTRES.NE.ZERO )
+     $      FERR( J ) = FERR( J ) / LSTRES
+*
+  140 CONTINUE
+*
+      RETURN
+*
+*     End of CPBRFS
+*
+      END
diff --git a/costa/native/external/lapack/cpbstf.f b/costa/native/external/lapack/cpbstf.f
new file mode 100644
index 000000000..07463d4ce
--- /dev/null
+++ b/costa/native/external/lapack/cpbstf.f
@@ -0,0 +1,264 @@
+      SUBROUTINE CPBSTF( UPLO, N, KD, AB, LDAB, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, KD, LDAB, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            AB( LDAB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CPBSTF computes a split Cholesky factorization of a complex
+*  Hermitian positive definite band matrix A.
+*
+*  This routine is designed to be used in conjunction with CHBGST.
+*
+*  The factorization has the form  A = S**H*S  where S is a band matrix
+*  of the same bandwidth as A and the following structure:
+*
+*    S = ( U    )
+*        ( M  L )
+*
+*  where U is upper triangular of order m = (n+kd)/2, and L is lower
+*  triangular of order n-m.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  KD      (input) INTEGER
+*          The number of superdiagonals of the matrix A if UPLO = 'U',
+*          or the number of subdiagonals if UPLO = 'L'.  KD >= 0.
+*
+*  AB      (input/output) COMPLEX array, dimension (LDAB,N)
+*          On entry, the upper or lower triangle of the Hermitian band
+*          matrix A, stored in the first kd+1 rows of the array.  The
+*          j-th column of A is stored in the j-th column of the array AB
+*          as follows:
+*          if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j;
+*          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+kd).
+*
+*          On exit, if INFO = 0, the factor S from the split Cholesky
+*          factorization A = S**H*S. See Further Details.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KD+1.
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*          > 0: if INFO = i, the factorization could not be completed,
+*               because the updated element a(i,i) was negative; the
+*               matrix A is not positive definite.
+*
+*  Further Details
+*  ===============
+*
+*  The band storage scheme is illustrated by the following example, when
+*  N = 7, KD = 2:
+*
+*  S = ( s11  s12  s13                     )
+*      (      s22  s23  s24                )
+*      (           s33  s34                )
+*      (                s44                )
+*      (           s53  s54  s55           )
+*      (                s64  s65  s66      )
+*      (                     s75  s76  s77 )
+*
+*  If UPLO = 'U', the array AB holds:
+*
+*  on entry:                          on exit:
+*
+*   *    *   a13  a24  a35  a46  a57   *    *   s13  s24  s53' s64' s75'
+*   *   a12  a23  a34  a45  a56  a67   *   s12  s23  s34  s54' s65' s76'
+*  a11  a22  a33  a44  a55  a66  a77  s11  s22  s33  s44  s55  s66  s77
+*
+*  If UPLO = 'L', the array AB holds:
+*
+*  on entry:                          on exit:
+*
+*  a11  a22  a33  a44  a55  a66  a77  s11  s22  s33  s44  s55  s66  s77
+*  a21  a32  a43  a54  a65  a76   *   s12' s23' s34' s54  s65  s76   *
+*  a31  a42  a53  a64  a64   *    *   s13' s24' s53  s64  s75   *    *
+*
+*  Array elements marked * are not used by the routine; s12' denotes
+*  conjg(s12); the diagonal elements of S are real.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            J, KLD, KM, M
+      REAL               AJJ
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CHER, CLACGV, CSSCAL, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN, REAL, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( KD.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDAB.LT.KD+1 ) THEN
+         INFO = -5
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CPBSTF', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      KLD = MAX( 1, LDAB-1 )
+*
+*     Set the splitting point m.
+*
+      M = ( N+KD ) / 2
+*
+      IF( UPPER ) THEN
+*
+*        Factorize A(m+1:n,m+1:n) as L**H*L, and update A(1:m,1:m).
+*
+         DO 10 J = N, M + 1, -1
+*
+*           Compute s(j,j) and test for non-positive-definiteness.
+*
+            AJJ = REAL( AB( KD+1, J ) )
+            IF( AJJ.LE.ZERO ) THEN
+               AB( KD+1, J ) = AJJ
+               GO TO 50
+            END IF
+            AJJ = SQRT( AJJ )
+            AB( KD+1, J ) = AJJ
+            KM = MIN( J-1, KD )
+*
+*           Compute elements j-km:j-1 of the j-th column and update the
+*           the leading submatrix within the band.
+*
+            CALL CSSCAL( KM, ONE / AJJ, AB( KD+1-KM, J ), 1 )
+            CALL CHER( 'Upper', KM, -ONE, AB( KD+1-KM, J ), 1,
+     $                 AB( KD+1, J-KM ), KLD )
+   10    CONTINUE
+*
+*        Factorize the updated submatrix A(1:m,1:m) as U**H*U.
+*
+         DO 20 J = 1, M
+*
+*           Compute s(j,j) and test for non-positive-definiteness.
+*
+            AJJ = REAL( AB( KD+1, J ) )
+            IF( AJJ.LE.ZERO ) THEN
+               AB( KD+1, J ) = AJJ
+               GO TO 50
+            END IF
+            AJJ = SQRT( AJJ )
+            AB( KD+1, J ) = AJJ
+            KM = MIN( KD, M-J )
+*
+*           Compute elements j+1:j+km of the j-th row and update the
+*           trailing submatrix within the band.
+*
+            IF( KM.GT.0 ) THEN
+               CALL CSSCAL( KM, ONE / AJJ, AB( KD, J+1 ), KLD )
+               CALL CLACGV( KM, AB( KD, J+1 ), KLD )
+               CALL CHER( 'Upper', KM, -ONE, AB( KD, J+1 ), KLD,
+     $                    AB( KD+1, J+1 ), KLD )
+               CALL CLACGV( KM, AB( KD, J+1 ), KLD )
+            END IF
+   20    CONTINUE
+      ELSE
+*
+*        Factorize A(m+1:n,m+1:n) as L**H*L, and update A(1:m,1:m).
+*
+         DO 30 J = N, M + 1, -1
+*
+*           Compute s(j,j) and test for non-positive-definiteness.
+*
+            AJJ = REAL( AB( 1, J ) )
+            IF( AJJ.LE.ZERO ) THEN
+               AB( 1, J ) = AJJ
+               GO TO 50
+            END IF
+            AJJ = SQRT( AJJ )
+            AB( 1, J ) = AJJ
+            KM = MIN( J-1, KD )
+*
+*           Compute elements j-km:j-1 of the j-th row and update the
+*           trailing submatrix within the band.
+*
+            CALL CSSCAL( KM, ONE / AJJ, AB( KM+1, J-KM ), KLD )
+            CALL CLACGV( KM, AB( KM+1, J-KM ), KLD )
+            CALL CHER( 'Lower', KM, -ONE, AB( KM+1, J-KM ), KLD,
+     $                 AB( 1, J-KM ), KLD )
+            CALL CLACGV( KM, AB( KM+1, J-KM ), KLD )
+   30    CONTINUE
+*
+*        Factorize the updated submatrix A(1:m,1:m) as U**H*U.
+*
+         DO 40 J = 1, M
+*
+*           Compute s(j,j) and test for non-positive-definiteness.
+*
+            AJJ = REAL( AB( 1, J ) )
+            IF( AJJ.LE.ZERO ) THEN
+               AB( 1, J ) = AJJ
+               GO TO 50
+            END IF
+            AJJ = SQRT( AJJ )
+            AB( 1, J ) = AJJ
+            KM = MIN( KD, M-J )
+*
+*           Compute elements j+1:j+km of the j-th column and update the
+*           trailing submatrix within the band.
+*
+            IF( KM.GT.0 ) THEN
+               CALL CSSCAL( KM, ONE / AJJ, AB( 2, J ), 1 )
+               CALL CHER( 'Lower', KM, -ONE, AB( 2, J ), 1,
+     $                    AB( 1, J+1 ), KLD )
+            END IF
+   40    CONTINUE
+      END IF
+      RETURN
+*
+   50 CONTINUE
+      INFO = J
+      RETURN
+*
+*     End of CPBSTF
+*
+      END
diff --git a/costa/native/external/lapack/cpbsv.f b/costa/native/external/lapack/cpbsv.f
new file mode 100644
index 000000000..61d7f164e
--- /dev/null
+++ b/costa/native/external/lapack/cpbsv.f
@@ -0,0 +1,152 @@
+      SUBROUTINE CPBSV( UPLO, N, KD, NRHS, AB, LDAB, B, LDB, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993 
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, KD, LDAB, LDB, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            AB( LDAB, * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CPBSV computes the solution to a complex system of linear equations
+*     A * X = B,
+*  where A is an N-by-N Hermitian positive definite band matrix and X
+*  and B are N-by-NRHS matrices.
+*
+*  The Cholesky decomposition is used to factor A as
+*     A = U**H * U,  if UPLO = 'U', or
+*     A = L * L**H,  if UPLO = 'L',
+*  where U is an upper triangular band matrix, and L is a lower
+*  triangular band matrix, with the same number of superdiagonals or
+*  subdiagonals as A.  The factored form of A is then used to solve the
+*  system of equations A * X = B.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The number of linear equations, i.e., the order of the
+*          matrix A.  N >= 0.
+*
+*  KD      (input) INTEGER
+*          The number of superdiagonals of the matrix A if UPLO = 'U',
+*          or the number of subdiagonals if UPLO = 'L'.  KD >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  AB      (input/output) COMPLEX array, dimension (LDAB,N)
+*          On entry, the upper or lower triangle of the Hermitian band
+*          matrix A, stored in the first KD+1 rows of the array.  The
+*          j-th column of A is stored in the j-th column of the array AB
+*          as follows:
+*          if UPLO = 'U', AB(KD+1+i-j,j) = A(i,j) for max(1,j-KD)<=i<=j;
+*          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(N,j+KD).
+*          See below for further details.
+*
+*          On exit, if INFO = 0, the triangular factor U or L from the
+*          Cholesky factorization A = U**H*U or A = L*L**H of the band
+*          matrix A, in the same storage format as A.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KD+1.
+*
+*  B       (input/output) COMPLEX array, dimension (LDB,NRHS)
+*          On entry, the N-by-NRHS right hand side matrix B.
+*          On exit, if INFO = 0, the N-by-NRHS solution matrix X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, the leading minor of order i of A is not
+*                positive definite, so the factorization could not be
+*                completed, and the solution has not been computed.
+*
+*  Further Details
+*  ===============
+*
+*  The band storage scheme is illustrated by the following example, when
+*  N = 6, KD = 2, and UPLO = 'U':
+*
+*  On entry:                       On exit:
+*
+*      *    *   a13  a24  a35  a46      *    *   u13  u24  u35  u46
+*      *   a12  a23  a34  a45  a56      *   u12  u23  u34  u45  u56
+*     a11  a22  a33  a44  a55  a66     u11  u22  u33  u44  u55  u66
+*
+*  Similarly, if UPLO = 'L' the format of A is as follows:
+*
+*  On entry:                       On exit:
+*
+*     a11  a22  a33  a44  a55  a66     l11  l22  l33  l44  l55  l66
+*     a21  a32  a43  a54  a65   *      l21  l32  l43  l54  l65   *
+*     a31  a42  a53  a64   *    *      l31  l42  l53  l64   *    *
+*
+*  Array elements marked * are not used by the routine.
+*
+*  =====================================================================
+*
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CPBTRF, CPBTRS, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( KD.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDAB.LT.KD+1 ) THEN
+         INFO = -6
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -8
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CPBSV ', -INFO )
+         RETURN
+      END IF
+*
+*     Compute the Cholesky factorization A = U'*U or A = L*L'.
+*
+      CALL CPBTRF( UPLO, N, KD, AB, LDAB, INFO )
+      IF( INFO.EQ.0 ) THEN
+*
+*        Solve the system A*X = B, overwriting B with X.
+*
+         CALL CPBTRS( UPLO, N, KD, NRHS, AB, LDAB, B, LDB, INFO )
+*
+      END IF
+      RETURN
+*
+*     End of CPBSV
+*
+      END
diff --git a/costa/native/external/lapack/cpbsvx.f b/costa/native/external/lapack/cpbsvx.f
new file mode 100644
index 000000000..c43403437
--- /dev/null
+++ b/costa/native/external/lapack/cpbsvx.f
@@ -0,0 +1,423 @@
+      SUBROUTINE CPBSVX( FACT, UPLO, N, KD, NRHS, AB, LDAB, AFB, LDAFB,
+     $                   EQUED, S, B, LDB, X, LDX, RCOND, FERR, BERR,
+     $                   WORK, RWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          EQUED, FACT, UPLO
+      INTEGER            INFO, KD, LDAB, LDAFB, LDB, LDX, N, NRHS
+      REAL               RCOND
+*     ..
+*     .. Array Arguments ..
+      REAL               BERR( * ), FERR( * ), RWORK( * ), S( * )
+      COMPLEX            AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ),
+     $                   WORK( * ), X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CPBSVX uses the Cholesky factorization A = U**H*U or A = L*L**H to
+*  compute the solution to a complex system of linear equations
+*     A * X = B,
+*  where A is an N-by-N Hermitian positive definite band matrix and X
+*  and B are N-by-NRHS matrices.
+*
+*  Error bounds on the solution and a condition estimate are also
+*  provided.
+*
+*  Description
+*  ===========
+*
+*  The following steps are performed:
+*
+*  1. If FACT = 'E', real scaling factors are computed to equilibrate
+*     the system:
+*        diag(S) * A * diag(S) * inv(diag(S)) * X = diag(S) * B
+*     Whether or not the system will be equilibrated depends on the
+*     scaling of the matrix A, but if equilibration is used, A is
+*     overwritten by diag(S)*A*diag(S) and B by diag(S)*B.
+*
+*  2. If FACT = 'N' or 'E', the Cholesky decomposition is used to
+*     factor the matrix A (after equilibration if FACT = 'E') as
+*        A = U**H * U,  if UPLO = 'U', or
+*        A = L * L**H,  if UPLO = 'L',
+*     where U is an upper triangular band matrix, and L is a lower
+*     triangular band matrix.
+*
+*  3. If the leading i-by-i principal minor is not positive definite,
+*     then the routine returns with INFO = i. Otherwise, the factored
+*     form of A is used to estimate the condition number of the matrix
+*     A.  If the reciprocal of the condition number is less than machine
+*     precision, INFO = N+1 is returned as a warning, but the routine
+*     still goes on to solve for X and compute error bounds as
+*     described below.
+*
+*  4. The system of equations is solved for X using the factored form
+*     of A.
+*
+*  5. Iterative refinement is applied to improve the computed solution
+*     matrix and calculate error bounds and backward error estimates
+*     for it.
+*
+*  6. If equilibration was used, the matrix X is premultiplied by
+*     diag(S) so that it solves the original system before
+*     equilibration.
+*
+*  Arguments
+*  =========
+*
+*  FACT    (input) CHARACTER*1
+*          Specifies whether or not the factored form of the matrix A is
+*          supplied on entry, and if not, whether the matrix A should be
+*          equilibrated before it is factored.
+*          = 'F':  On entry, AFB contains the factored form of A.
+*                  If EQUED = 'Y', the matrix A has been equilibrated
+*                  with scaling factors given by S.  AB and AFB will not
+*                  be modified.
+*          = 'N':  The matrix A will be copied to AFB and factored.
+*          = 'E':  The matrix A will be equilibrated if necessary, then
+*                  copied to AFB and factored.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The number of linear equations, i.e., the order of the
+*          matrix A.  N >= 0.
+*
+*  KD      (input) INTEGER
+*          The number of superdiagonals of the matrix A if UPLO = 'U',
+*          or the number of subdiagonals if UPLO = 'L'.  KD >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right-hand sides, i.e., the number of columns
+*          of the matrices B and X.  NRHS >= 0.
+*
+*  AB      (input/output) COMPLEX array, dimension (LDAB,N)
+*          On entry, the upper or lower triangle of the Hermitian band
+*          matrix A, stored in the first KD+1 rows of the array, except
+*          if FACT = 'F' and EQUED = 'Y', then A must contain the
+*          equilibrated matrix diag(S)*A*diag(S).  The j-th column of A
+*          is stored in the j-th column of the array AB as follows:
+*          if UPLO = 'U', AB(KD+1+i-j,j) = A(i,j) for max(1,j-KD)<=i<=j;
+*          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(N,j+KD).
+*          See below for further details.
+*
+*          On exit, if FACT = 'E' and EQUED = 'Y', A is overwritten by
+*          diag(S)*A*diag(S).
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array A.  LDAB >= KD+1.
+*
+*  AFB     (input or output) COMPLEX array, dimension (LDAFB,N)
+*          If FACT = 'F', then AFB is an input argument and on entry
+*          contains the triangular factor U or L from the Cholesky
+*          factorization A = U**H*U or A = L*L**H of the band matrix
+*          A, in the same storage format as A (see AB).  If EQUED = 'Y',
+*          then AFB is the factored form of the equilibrated matrix A.
+*
+*          If FACT = 'N', then AFB is an output argument and on exit
+*          returns the triangular factor U or L from the Cholesky
+*          factorization A = U**H*U or A = L*L**H.
+*
+*          If FACT = 'E', then AFB is an output argument and on exit
+*          returns the triangular factor U or L from the Cholesky
+*          factorization A = U**H*U or A = L*L**H of the equilibrated
+*          matrix A (see the description of A for the form of the
+*          equilibrated matrix).
+*
+*  LDAFB   (input) INTEGER
+*          The leading dimension of the array AFB.  LDAFB >= KD+1.
+*
+*  EQUED   (input or output) CHARACTER*1
+*          Specifies the form of equilibration that was done.
+*          = 'N':  No equilibration (always true if FACT = 'N').
+*          = 'Y':  Equilibration was done, i.e., A has been replaced by
+*                  diag(S) * A * diag(S).
+*          EQUED is an input argument if FACT = 'F'; otherwise, it is an
+*          output argument.
+*
+*  S       (input or output) REAL array, dimension (N)
+*          The scale factors for A; not accessed if EQUED = 'N'.  S is
+*          an input argument if FACT = 'F'; otherwise, S is an output
+*          argument.  If FACT = 'F' and EQUED = 'Y', each element of S
+*          must be positive.
+*
+*  B       (input/output) COMPLEX array, dimension (LDB,NRHS)
+*          On entry, the N-by-NRHS right hand side matrix B.
+*          On exit, if EQUED = 'N', B is not modified; if EQUED = 'Y',
+*          B is overwritten by diag(S) * B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  X       (output) COMPLEX array, dimension (LDX,NRHS)
+*          If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X to
+*          the original system of equations.  Note that if EQUED = 'Y',
+*          A and B are modified on exit, and the solution to the
+*          equilibrated system is inv(diag(S))*X.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(1,N).
+*
+*  RCOND   (output) REAL
+*          The estimate of the reciprocal condition number of the matrix
+*          A after equilibration (if done).  If RCOND is less than the
+*          machine precision (in particular, if RCOND = 0), the matrix
+*          is singular to working precision.  This condition is
+*          indicated by a return code of INFO > 0.
+*
+*  FERR    (output) REAL array, dimension (NRHS)
+*          The estimated forward error bound for each solution vector
+*          X(j) (the j-th column of the solution matrix X).
+*          If XTRUE is the true solution corresponding to X(j), FERR(j)
+*          is an estimated upper bound for the magnitude of the largest
+*          element in (X(j) - XTRUE) divided by the magnitude of the
+*          largest element in X(j).  The estimate is as reliable as
+*          the estimate for RCOND, and is almost always a slight
+*          overestimate of the true error.
+*
+*  BERR    (output) REAL array, dimension (NRHS)
+*          The componentwise relative backward error of each solution
+*          vector X(j) (i.e., the smallest relative change in
+*          any element of A or B that makes X(j) an exact solution).
+*
+*  WORK    (workspace) COMPLEX array, dimension (2*N)
+*
+*  RWORK   (workspace) REAL array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*          > 0: if INFO = i, and i is
+*                <= N:  the leading minor of order i of A is
+*                       not positive definite, so the factorization
+*                       could not be completed, and the solution has not
+*                       been computed. RCOND = 0 is returned.
+*                = N+1: U is nonsingular, but RCOND is less than machine
+*                       precision, meaning that the matrix is singular
+*                       to working precision.  Nevertheless, the
+*                       solution and error bounds are computed because
+*                       there are a number of situations where the
+*                       computed solution can be more accurate than the
+*                       value of RCOND would suggest.
+*
+*  Further Details
+*  ===============
+*
+*  The band storage scheme is illustrated by the following example, when
+*  N = 6, KD = 2, and UPLO = 'U':
+*
+*  Two-dimensional storage of the Hermitian matrix A:
+*
+*     a11  a12  a13
+*          a22  a23  a24
+*               a33  a34  a35
+*                    a44  a45  a46
+*                         a55  a56
+*     (aij=conjg(aji))         a66
+*
+*  Band storage of the upper triangle of A:
+*
+*      *    *   a13  a24  a35  a46
+*      *   a12  a23  a34  a45  a56
+*     a11  a22  a33  a44  a55  a66
+*
+*  Similarly, if UPLO = 'L' the format of A is as follows:
+*
+*     a11  a22  a33  a44  a55  a66
+*     a21  a32  a43  a54  a65   *
+*     a31  a42  a53  a64   *    *
+*
+*  Array elements marked * are not used by the routine.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            EQUIL, NOFACT, RCEQU, UPPER
+      INTEGER            I, INFEQU, J, J1, J2
+      REAL               AMAX, ANORM, BIGNUM, SCOND, SMAX, SMIN, SMLNUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               CLANHB, SLAMCH
+      EXTERNAL           LSAME, CLANHB, SLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CCOPY, CLACPY, CLAQHB, CPBCON, CPBEQU, CPBRFS,
+     $                   CPBTRF, CPBTRS, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+      NOFACT = LSAME( FACT, 'N' )
+      EQUIL = LSAME( FACT, 'E' )
+      UPPER = LSAME( UPLO, 'U' )
+      IF( NOFACT .OR. EQUIL ) THEN
+         EQUED = 'N'
+         RCEQU = .FALSE.
+      ELSE
+         RCEQU = LSAME( EQUED, 'Y' )
+         SMLNUM = SLAMCH( 'Safe minimum' )
+         BIGNUM = ONE / SMLNUM
+      END IF
+*
+*     Test the input parameters.
+*
+      IF( .NOT.NOFACT .AND. .NOT.EQUIL .AND. .NOT.LSAME( FACT, 'F' ) )
+     $     THEN
+         INFO = -1
+      ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( KD.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( LDAB.LT.KD+1 ) THEN
+         INFO = -7
+      ELSE IF( LDAFB.LT.KD+1 ) THEN
+         INFO = -9
+      ELSE IF( LSAME( FACT, 'F' ) .AND. .NOT.
+     $         ( RCEQU .OR. LSAME( EQUED, 'N' ) ) ) THEN
+         INFO = -10
+      ELSE
+         IF( RCEQU ) THEN
+            SMIN = BIGNUM
+            SMAX = ZERO
+            DO 10 J = 1, N
+               SMIN = MIN( SMIN, S( J ) )
+               SMAX = MAX( SMAX, S( J ) )
+   10       CONTINUE
+            IF( SMIN.LE.ZERO ) THEN
+               INFO = -11
+            ELSE IF( N.GT.0 ) THEN
+               SCOND = MAX( SMIN, SMLNUM ) / MIN( SMAX, BIGNUM )
+            ELSE
+               SCOND = ONE
+            END IF
+         END IF
+         IF( INFO.EQ.0 ) THEN
+            IF( LDB.LT.MAX( 1, N ) ) THEN
+               INFO = -13
+            ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+               INFO = -15
+            END IF
+         END IF
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CPBSVX', -INFO )
+         RETURN
+      END IF
+*
+      IF( EQUIL ) THEN
+*
+*        Compute row and column scalings to equilibrate the matrix A.
+*
+         CALL CPBEQU( UPLO, N, KD, AB, LDAB, S, SCOND, AMAX, INFEQU )
+         IF( INFEQU.EQ.0 ) THEN
+*
+*           Equilibrate the matrix.
+*
+            CALL CLAQHB( UPLO, N, KD, AB, LDAB, S, SCOND, AMAX, EQUED )
+            RCEQU = LSAME( EQUED, 'Y' )
+         END IF
+      END IF
+*
+*     Scale the right-hand side.
+*
+      IF( RCEQU ) THEN
+         DO 30 J = 1, NRHS
+            DO 20 I = 1, N
+               B( I, J ) = S( I )*B( I, J )
+   20       CONTINUE
+   30    CONTINUE
+      END IF
+*
+      IF( NOFACT .OR. EQUIL ) THEN
+*
+*        Compute the Cholesky factorization A = U'*U or A = L*L'.
+*
+         IF( UPPER ) THEN
+            DO 40 J = 1, N
+               J1 = MAX( J-KD, 1 )
+               CALL CCOPY( J-J1+1, AB( KD+1-J+J1, J ), 1,
+     $                     AFB( KD+1-J+J1, J ), 1 )
+   40       CONTINUE
+         ELSE
+            DO 50 J = 1, N
+               J2 = MIN( J+KD, N )
+               CALL CCOPY( J2-J+1, AB( 1, J ), 1, AFB( 1, J ), 1 )
+   50       CONTINUE
+         END IF
+*
+         CALL CPBTRF( UPLO, N, KD, AFB, LDAFB, INFO )
+*
+*        Return if INFO is non-zero.
+*
+         IF( INFO.NE.0 ) THEN
+            IF( INFO.GT.0 )
+     $         RCOND = ZERO
+            RETURN
+         END IF
+      END IF
+*
+*     Compute the norm of the matrix A.
+*
+      ANORM = CLANHB( '1', UPLO, N, KD, AB, LDAB, RWORK )
+*
+*     Compute the reciprocal of the condition number of A.
+*
+      CALL CPBCON( UPLO, N, KD, AFB, LDAFB, ANORM, RCOND, WORK, RWORK,
+     $             INFO )
+*
+*     Set INFO = N+1 if the matrix is singular to working precision.
+*
+      IF( RCOND.LT.SLAMCH( 'Epsilon' ) )
+     $   INFO = N + 1
+*
+*     Compute the solution matrix X.
+*
+      CALL CLACPY( 'Full', N, NRHS, B, LDB, X, LDX )
+      CALL CPBTRS( UPLO, N, KD, NRHS, AFB, LDAFB, X, LDX, INFO )
+*
+*     Use iterative refinement to improve the computed solution and
+*     compute error bounds and backward error estimates for it.
+*
+      CALL CPBRFS( UPLO, N, KD, NRHS, AB, LDAB, AFB, LDAFB, B, LDB, X,
+     $             LDX, FERR, BERR, WORK, RWORK, INFO )
+*
+*     Transform the solution matrix X to a solution of the original
+*     system.
+*
+      IF( RCEQU ) THEN
+         DO 70 J = 1, NRHS
+            DO 60 I = 1, N
+               X( I, J ) = S( I )*X( I, J )
+   60       CONTINUE
+   70    CONTINUE
+         DO 80 J = 1, NRHS
+            FERR( J ) = FERR( J ) / SCOND
+   80    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of CPBSVX
+*
+      END
diff --git a/costa/native/external/lapack/cpbtf2.f b/costa/native/external/lapack/cpbtf2.f
new file mode 100644
index 000000000..c6edd2637
--- /dev/null
+++ b/costa/native/external/lapack/cpbtf2.f
@@ -0,0 +1,201 @@
+      SUBROUTINE CPBTF2( UPLO, N, KD, AB, LDAB, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, KD, LDAB, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            AB( LDAB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CPBTF2 computes the Cholesky factorization of a complex Hermitian
+*  positive definite band matrix A.
+*
+*  The factorization has the form
+*     A = U' * U ,  if UPLO = 'U', or
+*     A = L  * L',  if UPLO = 'L',
+*  where U is an upper triangular matrix, U' is the conjugate transpose
+*  of U, and L is lower triangular.
+*
+*  This is the unblocked version of the algorithm, calling Level 2 BLAS.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the upper or lower triangular part of the
+*          Hermitian matrix A is stored:
+*          = 'U':  Upper triangular
+*          = 'L':  Lower triangular
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  KD      (input) INTEGER
+*          The number of super-diagonals of the matrix A if UPLO = 'U',
+*          or the number of sub-diagonals if UPLO = 'L'.  KD >= 0.
+*
+*  AB      (input/output) COMPLEX array, dimension (LDAB,N)
+*          On entry, the upper or lower triangle of the Hermitian band
+*          matrix A, stored in the first KD+1 rows of the array.  The
+*          j-th column of A is stored in the j-th column of the array AB
+*          as follows:
+*          if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j;
+*          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+kd).
+*
+*          On exit, if INFO = 0, the triangular factor U or L from the
+*          Cholesky factorization A = U'*U or A = L*L' of the band
+*          matrix A, in the same storage format as A.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KD+1.
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -k, the k-th argument had an illegal value
+*          > 0: if INFO = k, the leading minor of order k is not
+*               positive definite, and the factorization could not be
+*               completed.
+*
+*  Further Details
+*  ===============
+*
+*  The band storage scheme is illustrated by the following example, when
+*  N = 6, KD = 2, and UPLO = 'U':
+*
+*  On entry:                       On exit:
+*
+*      *    *   a13  a24  a35  a46      *    *   u13  u24  u35  u46
+*      *   a12  a23  a34  a45  a56      *   u12  u23  u34  u45  u56
+*     a11  a22  a33  a44  a55  a66     u11  u22  u33  u44  u55  u66
+*
+*  Similarly, if UPLO = 'L' the format of A is as follows:
+*
+*  On entry:                       On exit:
+*
+*     a11  a22  a33  a44  a55  a66     l11  l22  l33  l44  l55  l66
+*     a21  a32  a43  a54  a65   *      l21  l32  l43  l54  l65   *
+*     a31  a42  a53  a64   *    *      l31  l42  l53  l64   *    *
+*
+*  Array elements marked * are not used by the routine.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            J, KLD, KN
+      REAL               AJJ
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CHER, CLACGV, CSSCAL, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN, REAL, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( KD.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDAB.LT.KD+1 ) THEN
+         INFO = -5
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CPBTF2', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      KLD = MAX( 1, LDAB-1 )
+*
+      IF( UPPER ) THEN
+*
+*        Compute the Cholesky factorization A = U'*U.
+*
+         DO 10 J = 1, N
+*
+*           Compute U(J,J) and test for non-positive-definiteness.
+*
+            AJJ = REAL( AB( KD+1, J ) )
+            IF( AJJ.LE.ZERO ) THEN
+               AB( KD+1, J ) = AJJ
+               GO TO 30
+            END IF
+            AJJ = SQRT( AJJ )
+            AB( KD+1, J ) = AJJ
+*
+*           Compute elements J+1:J+KN of row J and update the
+*           trailing submatrix within the band.
+*
+            KN = MIN( KD, N-J )
+            IF( KN.GT.0 ) THEN
+               CALL CSSCAL( KN, ONE / AJJ, AB( KD, J+1 ), KLD )
+               CALL CLACGV( KN, AB( KD, J+1 ), KLD )
+               CALL CHER( 'Upper', KN, -ONE, AB( KD, J+1 ), KLD,
+     $                    AB( KD+1, J+1 ), KLD )
+               CALL CLACGV( KN, AB( KD, J+1 ), KLD )
+            END IF
+   10    CONTINUE
+      ELSE
+*
+*        Compute the Cholesky factorization A = L*L'.
+*
+         DO 20 J = 1, N
+*
+*           Compute L(J,J) and test for non-positive-definiteness.
+*
+            AJJ = REAL( AB( 1, J ) )
+            IF( AJJ.LE.ZERO ) THEN
+               AB( 1, J ) = AJJ
+               GO TO 30
+            END IF
+            AJJ = SQRT( AJJ )
+            AB( 1, J ) = AJJ
+*
+*           Compute elements J+1:J+KN of column J and update the
+*           trailing submatrix within the band.
+*
+            KN = MIN( KD, N-J )
+            IF( KN.GT.0 ) THEN
+               CALL CSSCAL( KN, ONE / AJJ, AB( 2, J ), 1 )
+               CALL CHER( 'Lower', KN, -ONE, AB( 2, J ), 1,
+     $                    AB( 1, J+1 ), KLD )
+            END IF
+   20    CONTINUE
+      END IF
+      RETURN
+*
+   30 CONTINUE
+      INFO = J
+      RETURN
+*
+*     End of CPBTF2
+*
+      END
diff --git a/costa/native/external/lapack/cpbtrf.f b/costa/native/external/lapack/cpbtrf.f
new file mode 100644
index 000000000..102257dd9
--- /dev/null
+++ b/costa/native/external/lapack/cpbtrf.f
@@ -0,0 +1,372 @@
+      SUBROUTINE CPBTRF( UPLO, N, KD, AB, LDAB, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, KD, LDAB, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            AB( LDAB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CPBTRF computes the Cholesky factorization of a complex Hermitian
+*  positive definite band matrix A.
+*
+*  The factorization has the form
+*     A = U**H * U,  if UPLO = 'U', or
+*     A = L  * L**H,  if UPLO = 'L',
+*  where U is an upper triangular matrix and L is lower triangular.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  KD      (input) INTEGER
+*          The number of superdiagonals of the matrix A if UPLO = 'U',
+*          or the number of subdiagonals if UPLO = 'L'.  KD >= 0.
+*
+*  AB      (input/output) COMPLEX array, dimension (LDAB,N)
+*          On entry, the upper or lower triangle of the Hermitian band
+*          matrix A, stored in the first KD+1 rows of the array.  The
+*          j-th column of A is stored in the j-th column of the array AB
+*          as follows:
+*          if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j;
+*          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+kd).
+*
+*          On exit, if INFO = 0, the triangular factor U or L from the
+*          Cholesky factorization A = U**H*U or A = L*L**H of the band
+*          matrix A, in the same storage format as A.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KD+1.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, the leading minor of order i is not
+*                positive definite, and the factorization could not be
+*                completed.
+*
+*  Further Details
+*  ===============
+*
+*  The band storage scheme is illustrated by the following example, when
+*  N = 6, KD = 2, and UPLO = 'U':
+*
+*  On entry:                       On exit:
+*
+*      *    *   a13  a24  a35  a46      *    *   u13  u24  u35  u46
+*      *   a12  a23  a34  a45  a56      *   u12  u23  u34  u45  u56
+*     a11  a22  a33  a44  a55  a66     u11  u22  u33  u44  u55  u66
+*
+*  Similarly, if UPLO = 'L' the format of A is as follows:
+*
+*  On entry:                       On exit:
+*
+*     a11  a22  a33  a44  a55  a66     l11  l22  l33  l44  l55  l66
+*     a21  a32  a43  a54  a65   *      l21  l32  l43  l54  l65   *
+*     a31  a42  a53  a64   *    *      l31  l42  l53  l64   *    *
+*
+*  Array elements marked * are not used by the routine.
+*
+*  Contributed by
+*  Peter Mayes and Giuseppe Radicati, IBM ECSEC, Rome, March 23, 1989
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+      COMPLEX            CONE
+      PARAMETER          ( CONE = ( 1.0E+0, 0.0E+0 ) )
+      INTEGER            NBMAX, LDWORK
+      PARAMETER          ( NBMAX = 32, LDWORK = NBMAX+1 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, I2, I3, IB, II, J, JJ, NB
+*     ..
+*     .. Local Arrays ..
+      COMPLEX            WORK( LDWORK, NBMAX )
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           LSAME, ILAENV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CGEMM, CHERK, CPBTF2, CPOTF2, CTRSM, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( ( .NOT.LSAME( UPLO, 'U' ) ) .AND.
+     $    ( .NOT.LSAME( UPLO, 'L' ) ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( KD.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDAB.LT.KD+1 ) THEN
+         INFO = -5
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CPBTRF', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Determine the block size for this environment
+*
+      NB = ILAENV( 1, 'CPBTRF', UPLO, N, KD, -1, -1 )
+*
+*     The block size must not exceed the semi-bandwidth KD, and must not
+*     exceed the limit set by the size of the local array WORK.
+*
+      NB = MIN( NB, NBMAX )
+*
+      IF( NB.LE.1 .OR. NB.GT.KD ) THEN
+*
+*        Use unblocked code
+*
+         CALL CPBTF2( UPLO, N, KD, AB, LDAB, INFO )
+      ELSE
+*
+*        Use blocked code
+*
+         IF( LSAME( UPLO, 'U' ) ) THEN
+*
+*           Compute the Cholesky factorization of a Hermitian band
+*           matrix, given the upper triangle of the matrix in band
+*           storage.
+*
+*           Zero the upper triangle of the work array.
+*
+            DO 20 J = 1, NB
+               DO 10 I = 1, J - 1
+                  WORK( I, J ) = ZERO
+   10          CONTINUE
+   20       CONTINUE
+*
+*           Process the band matrix one diagonal block at a time.
+*
+            DO 70 I = 1, N, NB
+               IB = MIN( NB, N-I+1 )
+*
+*              Factorize the diagonal block
+*
+               CALL CPOTF2( UPLO, IB, AB( KD+1, I ), LDAB-1, II )
+               IF( II.NE.0 ) THEN
+                  INFO = I + II - 1
+                  GO TO 150
+               END IF
+               IF( I+IB.LE.N ) THEN
+*
+*                 Update the relevant part of the trailing submatrix.
+*                 If A11 denotes the diagonal block which has just been
+*                 factorized, then we need to update the remaining
+*                 blocks in the diagram:
+*
+*                    A11   A12   A13
+*                          A22   A23
+*                                A33
+*
+*                 The numbers of rows and columns in the partitioning
+*                 are IB, I2, I3 respectively. The blocks A12, A22 and
+*                 A23 are empty if IB = KD. The upper triangle of A13
+*                 lies outside the band.
+*
+                  I2 = MIN( KD-IB, N-I-IB+1 )
+                  I3 = MIN( IB, N-I-KD+1 )
+*
+                  IF( I2.GT.0 ) THEN
+*
+*                    Update A12
+*
+                     CALL CTRSM( 'Left', 'Upper', 'Conjugate transpose',
+     $                           'Non-unit', IB, I2, CONE,
+     $                           AB( KD+1, I ), LDAB-1,
+     $                           AB( KD+1-IB, I+IB ), LDAB-1 )
+*
+*                    Update A22
+*
+                     CALL CHERK( 'Upper', 'Conjugate transpose', I2, IB,
+     $                           -ONE, AB( KD+1-IB, I+IB ), LDAB-1, ONE,
+     $                           AB( KD+1, I+IB ), LDAB-1 )
+                  END IF
+*
+                  IF( I3.GT.0 ) THEN
+*
+*                    Copy the lower triangle of A13 into the work array.
+*
+                     DO 40 JJ = 1, I3
+                        DO 30 II = JJ, IB
+                           WORK( II, JJ ) = AB( II-JJ+1, JJ+I+KD-1 )
+   30                   CONTINUE
+   40                CONTINUE
+*
+*                    Update A13 (in the work array).
+*
+                     CALL CTRSM( 'Left', 'Upper', 'Conjugate transpose',
+     $                           'Non-unit', IB, I3, CONE,
+     $                           AB( KD+1, I ), LDAB-1, WORK, LDWORK )
+*
+*                    Update A23
+*
+                     IF( I2.GT.0 )
+     $                  CALL CGEMM( 'Conjugate transpose',
+     $                              'No transpose', I2, I3, IB, -CONE,
+     $                              AB( KD+1-IB, I+IB ), LDAB-1, WORK,
+     $                              LDWORK, CONE, AB( 1+IB, I+KD ),
+     $                              LDAB-1 )
+*
+*                    Update A33
+*
+                     CALL CHERK( 'Upper', 'Conjugate transpose', I3, IB,
+     $                           -ONE, WORK, LDWORK, ONE,
+     $                           AB( KD+1, I+KD ), LDAB-1 )
+*
+*                    Copy the lower triangle of A13 back into place.
+*
+                     DO 60 JJ = 1, I3
+                        DO 50 II = JJ, IB
+                           AB( II-JJ+1, JJ+I+KD-1 ) = WORK( II, JJ )
+   50                   CONTINUE
+   60                CONTINUE
+                  END IF
+               END IF
+   70       CONTINUE
+         ELSE
+*
+*           Compute the Cholesky factorization of a Hermitian band
+*           matrix, given the lower triangle of the matrix in band
+*           storage.
+*
+*           Zero the lower triangle of the work array.
+*
+            DO 90 J = 1, NB
+               DO 80 I = J + 1, NB
+                  WORK( I, J ) = ZERO
+   80          CONTINUE
+   90       CONTINUE
+*
+*           Process the band matrix one diagonal block at a time.
+*
+            DO 140 I = 1, N, NB
+               IB = MIN( NB, N-I+1 )
+*
+*              Factorize the diagonal block
+*
+               CALL CPOTF2( UPLO, IB, AB( 1, I ), LDAB-1, II )
+               IF( II.NE.0 ) THEN
+                  INFO = I + II - 1
+                  GO TO 150
+               END IF
+               IF( I+IB.LE.N ) THEN
+*
+*                 Update the relevant part of the trailing submatrix.
+*                 If A11 denotes the diagonal block which has just been
+*                 factorized, then we need to update the remaining
+*                 blocks in the diagram:
+*
+*                    A11
+*                    A21   A22
+*                    A31   A32   A33
+*
+*                 The numbers of rows and columns in the partitioning
+*                 are IB, I2, I3 respectively. The blocks A21, A22 and
+*                 A32 are empty if IB = KD. The lower triangle of A31
+*                 lies outside the band.
+*
+                  I2 = MIN( KD-IB, N-I-IB+1 )
+                  I3 = MIN( IB, N-I-KD+1 )
+*
+                  IF( I2.GT.0 ) THEN
+*
+*                    Update A21
+*
+                     CALL CTRSM( 'Right', 'Lower',
+     $                           'Conjugate transpose', 'Non-unit', I2,
+     $                           IB, CONE, AB( 1, I ), LDAB-1,
+     $                           AB( 1+IB, I ), LDAB-1 )
+*
+*                    Update A22
+*
+                     CALL CHERK( 'Lower', 'No transpose', I2, IB, -ONE,
+     $                           AB( 1+IB, I ), LDAB-1, ONE,
+     $                           AB( 1, I+IB ), LDAB-1 )
+                  END IF
+*
+                  IF( I3.GT.0 ) THEN
+*
+*                    Copy the upper triangle of A31 into the work array.
+*
+                     DO 110 JJ = 1, IB
+                        DO 100 II = 1, MIN( JJ, I3 )
+                           WORK( II, JJ ) = AB( KD+1-JJ+II, JJ+I-1 )
+  100                   CONTINUE
+  110                CONTINUE
+*
+*                    Update A31 (in the work array).
+*
+                     CALL CTRSM( 'Right', 'Lower',
+     $                           'Conjugate transpose', 'Non-unit', I3,
+     $                           IB, CONE, AB( 1, I ), LDAB-1, WORK,
+     $                           LDWORK )
+*
+*                    Update A32
+*
+                     IF( I2.GT.0 )
+     $                  CALL CGEMM( 'No transpose',
+     $                              'Conjugate transpose', I3, I2, IB,
+     $                              -CONE, WORK, LDWORK, AB( 1+IB, I ),
+     $                              LDAB-1, CONE, AB( 1+KD-IB, I+IB ),
+     $                              LDAB-1 )
+*
+*                    Update A33
+*
+                     CALL CHERK( 'Lower', 'No transpose', I3, IB, -ONE,
+     $                           WORK, LDWORK, ONE, AB( 1, I+KD ),
+     $                           LDAB-1 )
+*
+*                    Copy the upper triangle of A31 back into place.
+*
+                     DO 130 JJ = 1, IB
+                        DO 120 II = 1, MIN( JJ, I3 )
+                           AB( KD+1-JJ+II, JJ+I-1 ) = WORK( II, JJ )
+  120                   CONTINUE
+  130                CONTINUE
+                  END IF
+               END IF
+  140       CONTINUE
+         END IF
+      END IF
+      RETURN
+*
+  150 CONTINUE
+      RETURN
+*
+*     End of CPBTRF
+*
+      END
diff --git a/costa/native/external/lapack/cpbtrs.f b/costa/native/external/lapack/cpbtrs.f
new file mode 100644
index 000000000..e86b69565
--- /dev/null
+++ b/costa/native/external/lapack/cpbtrs.f
@@ -0,0 +1,146 @@
+      SUBROUTINE CPBTRS( UPLO, N, KD, NRHS, AB, LDAB, B, LDB, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, KD, LDAB, LDB, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            AB( LDAB, * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CPBTRS solves a system of linear equations A*X = B with a Hermitian
+*  positive definite band matrix A using the Cholesky factorization
+*  A = U**H*U or A = L*L**H computed by CPBTRF.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangular factor stored in AB;
+*          = 'L':  Lower triangular factor stored in AB.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  KD      (input) INTEGER
+*          The number of superdiagonals of the matrix A if UPLO = 'U',
+*          or the number of subdiagonals if UPLO = 'L'.  KD >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  AB      (input) COMPLEX array, dimension (LDAB,N)
+*          The triangular factor U or L from the Cholesky factorization
+*          A = U**H*U or A = L*L**H of the band matrix A, stored in the
+*          first KD+1 rows of the array.  The j-th column of U or L is
+*          stored in the j-th column of the array AB as follows:
+*          if UPLO ='U', AB(kd+1+i-j,j) = U(i,j) for max(1,j-kd)<=i<=j;
+*          if UPLO ='L', AB(1+i-j,j)    = L(i,j) for j<=i<=min(n,j+kd).
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KD+1.
+*
+*  B       (input/output) COMPLEX array, dimension (LDB,NRHS)
+*          On entry, the right hand side matrix B.
+*          On exit, the solution matrix X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            J
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CTBSV, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( KD.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDAB.LT.KD+1 ) THEN
+         INFO = -6
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -8
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CPBTRS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 )
+     $   RETURN
+*
+      IF( UPPER ) THEN
+*
+*        Solve A*X = B where A = U'*U.
+*
+         DO 10 J = 1, NRHS
+*
+*           Solve U'*X = B, overwriting B with X.
+*
+            CALL CTBSV( 'Upper', 'Conjugate transpose', 'Non-unit', N,
+     $                  KD, AB, LDAB, B( 1, J ), 1 )
+*
+*           Solve U*X = B, overwriting B with X.
+*
+            CALL CTBSV( 'Upper', 'No transpose', 'Non-unit', N, KD, AB,
+     $                  LDAB, B( 1, J ), 1 )
+   10    CONTINUE
+      ELSE
+*
+*        Solve A*X = B where A = L*L'.
+*
+         DO 20 J = 1, NRHS
+*
+*           Solve L*X = B, overwriting B with X.
+*
+            CALL CTBSV( 'Lower', 'No transpose', 'Non-unit', N, KD, AB,
+     $                  LDAB, B( 1, J ), 1 )
+*
+*           Solve L'*X = B, overwriting B with X.
+*
+            CALL CTBSV( 'Lower', 'Conjugate transpose', 'Non-unit', N,
+     $                  KD, AB, LDAB, B( 1, J ), 1 )
+   20    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of CPBTRS
+*
+      END
diff --git a/costa/native/external/lapack/cpocon.f b/costa/native/external/lapack/cpocon.f
new file mode 100644
index 000000000..90eeb4ae6
--- /dev/null
+++ b/costa/native/external/lapack/cpocon.f
@@ -0,0 +1,180 @@
+      SUBROUTINE CPOCON( UPLO, N, A, LDA, ANORM, RCOND, WORK, RWORK,
+     $                   INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993 
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, N
+      REAL               ANORM, RCOND
+*     ..
+*     .. Array Arguments ..
+      REAL               RWORK( * )
+      COMPLEX            A( LDA, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CPOCON estimates the reciprocal of the condition number (in the
+*  1-norm) of a complex Hermitian positive definite matrix using the
+*  Cholesky factorization A = U**H*U or A = L*L**H computed by CPOTRF.
+*
+*  An estimate is obtained for norm(inv(A)), and the reciprocal of the
+*  condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))).
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input) COMPLEX array, dimension (LDA,N)
+*          The triangular factor U or L from the Cholesky factorization
+*          A = U**H*U or A = L*L**H, as computed by CPOTRF.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  ANORM   (input) REAL
+*          The 1-norm (or infinity-norm) of the Hermitian matrix A.
+*
+*  RCOND   (output) REAL
+*          The reciprocal of the condition number of the matrix A,
+*          computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an
+*          estimate of the 1-norm of inv(A) computed in this routine.
+*
+*  WORK    (workspace) COMPLEX array, dimension (2*N)
+*
+*  RWORK   (workspace) REAL array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      CHARACTER          NORMIN
+      INTEGER            IX, KASE
+      REAL               AINVNM, SCALE, SCALEL, SCALEU, SMLNUM
+      COMPLEX            ZDUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ICAMAX
+      REAL               SLAMCH
+      EXTERNAL           LSAME, ICAMAX, SLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CLACON, CLATRS, CSRSCL, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, AIMAG, MAX, REAL
+*     ..
+*     .. Statement Functions ..
+      REAL               CABS1
+*     ..
+*     .. Statement Function definitions ..
+      CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -4
+      ELSE IF( ANORM.LT.ZERO ) THEN
+         INFO = -5
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CPOCON', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      RCOND = ZERO
+      IF( N.EQ.0 ) THEN
+         RCOND = ONE
+         RETURN
+      ELSE IF( ANORM.EQ.ZERO ) THEN
+         RETURN
+      END IF
+*
+      SMLNUM = SLAMCH( 'Safe minimum' )
+*
+*     Estimate the 1-norm of inv(A).
+*
+      KASE = 0
+      NORMIN = 'N'
+   10 CONTINUE
+      CALL CLACON( N, WORK( N+1 ), WORK, AINVNM, KASE )
+      IF( KASE.NE.0 ) THEN
+         IF( UPPER ) THEN
+*
+*           Multiply by inv(U').
+*
+            CALL CLATRS( 'Upper', 'Conjugate transpose', 'Non-unit',
+     $                   NORMIN, N, A, LDA, WORK, SCALEL, RWORK, INFO )
+            NORMIN = 'Y'
+*
+*           Multiply by inv(U).
+*
+            CALL CLATRS( 'Upper', 'No transpose', 'Non-unit', NORMIN, N,
+     $                   A, LDA, WORK, SCALEU, RWORK, INFO )
+         ELSE
+*
+*           Multiply by inv(L).
+*
+            CALL CLATRS( 'Lower', 'No transpose', 'Non-unit', NORMIN, N,
+     $                   A, LDA, WORK, SCALEL, RWORK, INFO )
+            NORMIN = 'Y'
+*
+*           Multiply by inv(L').
+*
+            CALL CLATRS( 'Lower', 'Conjugate transpose', 'Non-unit',
+     $                   NORMIN, N, A, LDA, WORK, SCALEU, RWORK, INFO )
+         END IF
+*
+*        Multiply by 1/SCALE if doing so will not cause overflow.
+*
+         SCALE = SCALEL*SCALEU
+         IF( SCALE.NE.ONE ) THEN
+            IX = ICAMAX( N, WORK, 1 )
+            IF( SCALE.LT.CABS1( WORK( IX ) )*SMLNUM .OR. SCALE.EQ.ZERO )
+     $         GO TO 20
+            CALL CSRSCL( N, SCALE, WORK, 1 )
+         END IF
+         GO TO 10
+      END IF
+*
+*     Compute the estimate of the reciprocal condition number.
+*
+      IF( AINVNM.NE.ZERO )
+     $   RCOND = ( ONE / AINVNM ) / ANORM
+*
+   20 CONTINUE
+      RETURN
+*
+*     End of CPOCON
+*
+      END
diff --git a/costa/native/external/lapack/cpoequ.f b/costa/native/external/lapack/cpoequ.f
new file mode 100644
index 000000000..353efbd0b
--- /dev/null
+++ b/costa/native/external/lapack/cpoequ.f
@@ -0,0 +1,138 @@
+      SUBROUTINE CPOEQU( N, A, LDA, S, SCOND, AMAX, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993 
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, N
+      REAL               AMAX, SCOND
+*     ..
+*     .. Array Arguments ..
+      REAL               S( * )
+      COMPLEX            A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CPOEQU computes row and column scalings intended to equilibrate a
+*  Hermitian positive definite matrix A and reduce its condition number
+*  (with respect to the two-norm).  S contains the scale factors,
+*  S(i) = 1/sqrt(A(i,i)), chosen so that the scaled matrix B with
+*  elements B(i,j) = S(i)*A(i,j)*S(j) has ones on the diagonal.  This
+*  choice of S puts the condition number of B within a factor N of the
+*  smallest possible condition number over all possible diagonal
+*  scalings.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input) COMPLEX array, dimension (LDA,N)
+*          The N-by-N Hermitian positive definite matrix whose scaling
+*          factors are to be computed.  Only the diagonal elements of A
+*          are referenced.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  S       (output) REAL array, dimension (N)
+*          If INFO = 0, S contains the scale factors for A.
+*
+*  SCOND   (output) REAL
+*          If INFO = 0, S contains the ratio of the smallest S(i) to
+*          the largest S(i).  If SCOND >= 0.1 and AMAX is neither too
+*          large nor too small, it is not worth scaling by S.
+*
+*  AMAX    (output) REAL
+*          Absolute value of largest matrix element.  If AMAX is very
+*          close to overflow or very close to underflow, the matrix
+*          should be scaled.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, the i-th diagonal element is nonpositive.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I
+      REAL               SMIN
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN, REAL, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( N.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -3
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CPOEQU', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 ) THEN
+         SCOND = ONE
+         AMAX = ZERO
+         RETURN
+      END IF
+*
+*     Find the minimum and maximum diagonal elements.
+*
+      S( 1 ) = REAL( A( 1, 1 ) )
+      SMIN = S( 1 )
+      AMAX = S( 1 )
+      DO 10 I = 2, N
+         S( I ) = REAL( A( I, I ) )
+         SMIN = MIN( SMIN, S( I ) )
+         AMAX = MAX( AMAX, S( I ) )
+   10 CONTINUE
+*
+      IF( SMIN.LE.ZERO ) THEN
+*
+*        Find the first non-positive diagonal element and return.
+*
+         DO 20 I = 1, N
+            IF( S( I ).LE.ZERO ) THEN
+               INFO = I
+               RETURN
+            END IF
+   20    CONTINUE
+      ELSE
+*
+*        Set the scale factors to the reciprocals
+*        of the diagonal elements.
+*
+         DO 30 I = 1, N
+            S( I ) = ONE / SQRT( S( I ) )
+   30    CONTINUE
+*
+*        Compute SCOND = min(S(I)) / max(S(I))
+*
+         SCOND = SQRT( SMIN ) / SQRT( AMAX )
+      END IF
+      RETURN
+*
+*     End of CPOEQU
+*
+      END
diff --git a/costa/native/external/lapack/cporfs.f b/costa/native/external/lapack/cporfs.f
new file mode 100644
index 000000000..d0c18012c
--- /dev/null
+++ b/costa/native/external/lapack/cporfs.f
@@ -0,0 +1,333 @@
+      SUBROUTINE CPORFS( UPLO, N, NRHS, A, LDA, AF, LDAF, B, LDB, X,
+     $                   LDX, FERR, BERR, WORK, RWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, LDAF, LDB, LDX, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      REAL               BERR( * ), FERR( * ), RWORK( * )
+      COMPLEX            A( LDA, * ), AF( LDAF, * ), B( LDB, * ),
+     $                   WORK( * ), X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CPORFS improves the computed solution to a system of linear
+*  equations when the coefficient matrix is Hermitian positive definite,
+*  and provides error bounds and backward error estimates for the
+*  solution.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrices B and X.  NRHS >= 0.
+*
+*  A       (input) COMPLEX array, dimension (LDA,N)
+*          The Hermitian matrix A.  If UPLO = 'U', the leading N-by-N
+*          upper triangular part of A contains the upper triangular part
+*          of the matrix A, and the strictly lower triangular part of A
+*          is not referenced.  If UPLO = 'L', the leading N-by-N lower
+*          triangular part of A contains the lower triangular part of
+*          the matrix A, and the strictly upper triangular part of A is
+*          not referenced.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  AF      (input) COMPLEX array, dimension (LDAF,N)
+*          The triangular factor U or L from the Cholesky factorization
+*          A = U**H*U or A = L*L**H, as computed by CPOTRF.
+*
+*  LDAF    (input) INTEGER
+*          The leading dimension of the array AF.  LDAF >= max(1,N).
+*
+*  B       (input) COMPLEX array, dimension (LDB,NRHS)
+*          The right hand side matrix B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  X       (input/output) COMPLEX array, dimension (LDX,NRHS)
+*          On entry, the solution matrix X, as computed by CPOTRS.
+*          On exit, the improved solution matrix X.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(1,N).
+*
+*  FERR    (output) REAL array, dimension (NRHS)
+*          The estimated forward error bound for each solution vector
+*          X(j) (the j-th column of the solution matrix X).
+*          If XTRUE is the true solution corresponding to X(j), FERR(j)
+*          is an estimated upper bound for the magnitude of the largest
+*          element in (X(j) - XTRUE) divided by the magnitude of the
+*          largest element in X(j).  The estimate is as reliable as
+*          the estimate for RCOND, and is almost always a slight
+*          overestimate of the true error.
+*
+*  BERR    (output) REAL array, dimension (NRHS)
+*          The componentwise relative backward error of each solution
+*          vector X(j) (i.e., the smallest relative change in
+*          any element of A or B that makes X(j) an exact solution).
+*
+*  WORK    (workspace) COMPLEX array, dimension (2*N)
+*
+*  RWORK   (workspace) REAL array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Internal Parameters
+*  ===================
+*
+*  ITMAX is the maximum number of steps of iterative refinement.
+*
+*  ====================================================================
+*
+*     .. Parameters ..
+      INTEGER            ITMAX
+      PARAMETER          ( ITMAX = 5 )
+      REAL               ZERO
+      PARAMETER          ( ZERO = 0.0E+0 )
+      COMPLEX            ONE
+      PARAMETER          ( ONE = ( 1.0E+0, 0.0E+0 ) )
+      REAL               TWO
+      PARAMETER          ( TWO = 2.0E+0 )
+      REAL               THREE
+      PARAMETER          ( THREE = 3.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            COUNT, I, J, K, KASE, NZ
+      REAL               EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK
+      COMPLEX            ZDUM
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CAXPY, CCOPY, CHEMV, CLACON, CPOTRS, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, AIMAG, MAX, REAL
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               SLAMCH
+      EXTERNAL           LSAME, SLAMCH
+*     ..
+*     .. Statement Functions ..
+      REAL               CABS1
+*     ..
+*     .. Statement Function definitions ..
+      CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -9
+      ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+         INFO = -11
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CPORFS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN
+         DO 10 J = 1, NRHS
+            FERR( J ) = ZERO
+            BERR( J ) = ZERO
+   10    CONTINUE
+         RETURN
+      END IF
+*
+*     NZ = maximum number of nonzero elements in each row of A, plus 1
+*
+      NZ = N + 1
+      EPS = SLAMCH( 'Epsilon' )
+      SAFMIN = SLAMCH( 'Safe minimum' )
+      SAFE1 = NZ*SAFMIN
+      SAFE2 = SAFE1 / EPS
+*
+*     Do for each right hand side
+*
+      DO 140 J = 1, NRHS
+*
+         COUNT = 1
+         LSTRES = THREE
+   20    CONTINUE
+*
+*        Loop until stopping criterion is satisfied.
+*
+*        Compute residual R = B - A * X
+*
+         CALL CCOPY( N, B( 1, J ), 1, WORK, 1 )
+         CALL CHEMV( UPLO, N, -ONE, A, LDA, X( 1, J ), 1, ONE, WORK, 1 )
+*
+*        Compute componentwise relative backward error from formula
+*
+*        max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) )
+*
+*        where abs(Z) is the componentwise absolute value of the matrix
+*        or vector Z.  If the i-th component of the denominator is less
+*        than SAFE2, then SAFE1 is added to the i-th components of the
+*        numerator and denominator before dividing.
+*
+         DO 30 I = 1, N
+            RWORK( I ) = CABS1( B( I, J ) )
+   30    CONTINUE
+*
+*        Compute abs(A)*abs(X) + abs(B).
+*
+         IF( UPPER ) THEN
+            DO 50 K = 1, N
+               S = ZERO
+               XK = CABS1( X( K, J ) )
+               DO 40 I = 1, K - 1
+                  RWORK( I ) = RWORK( I ) + CABS1( A( I, K ) )*XK
+                  S = S + CABS1( A( I, K ) )*CABS1( X( I, J ) )
+   40          CONTINUE
+               RWORK( K ) = RWORK( K ) + ABS( REAL( A( K, K ) ) )*XK + S
+   50       CONTINUE
+         ELSE
+            DO 70 K = 1, N
+               S = ZERO
+               XK = CABS1( X( K, J ) )
+               RWORK( K ) = RWORK( K ) + ABS( REAL( A( K, K ) ) )*XK
+               DO 60 I = K + 1, N
+                  RWORK( I ) = RWORK( I ) + CABS1( A( I, K ) )*XK
+                  S = S + CABS1( A( I, K ) )*CABS1( X( I, J ) )
+   60          CONTINUE
+               RWORK( K ) = RWORK( K ) + S
+   70       CONTINUE
+         END IF
+         S = ZERO
+         DO 80 I = 1, N
+            IF( RWORK( I ).GT.SAFE2 ) THEN
+               S = MAX( S, CABS1( WORK( I ) ) / RWORK( I ) )
+            ELSE
+               S = MAX( S, ( CABS1( WORK( I ) )+SAFE1 ) /
+     $             ( RWORK( I )+SAFE1 ) )
+            END IF
+   80    CONTINUE
+         BERR( J ) = S
+*
+*        Test stopping criterion. Continue iterating if
+*           1) The residual BERR(J) is larger than machine epsilon, and
+*           2) BERR(J) decreased by at least a factor of 2 during the
+*              last iteration, and
+*           3) At most ITMAX iterations tried.
+*
+         IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND.
+     $       COUNT.LE.ITMAX ) THEN
+*
+*           Update solution and try again.
+*
+            CALL CPOTRS( UPLO, N, 1, AF, LDAF, WORK, N, INFO )
+            CALL CAXPY( N, ONE, WORK, 1, X( 1, J ), 1 )
+            LSTRES = BERR( J )
+            COUNT = COUNT + 1
+            GO TO 20
+         END IF
+*
+*        Bound error from formula
+*
+*        norm(X - XTRUE) / norm(X) .le. FERR =
+*        norm( abs(inv(A))*
+*           ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X)
+*
+*        where
+*          norm(Z) is the magnitude of the largest component of Z
+*          inv(A) is the inverse of A
+*          abs(Z) is the componentwise absolute value of the matrix or
+*             vector Z
+*          NZ is the maximum number of nonzeros in any row of A, plus 1
+*          EPS is machine epsilon
+*
+*        The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B))
+*        is incremented by SAFE1 if the i-th component of
+*        abs(A)*abs(X) + abs(B) is less than SAFE2.
+*
+*        Use CLACON to estimate the infinity-norm of the matrix
+*           inv(A) * diag(W),
+*        where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) )))
+*
+         DO 90 I = 1, N
+            IF( RWORK( I ).GT.SAFE2 ) THEN
+               RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I )
+            ELSE
+               RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) +
+     $                      SAFE1
+            END IF
+   90    CONTINUE
+*
+         KASE = 0
+  100    CONTINUE
+         CALL CLACON( N, WORK( N+1 ), WORK, FERR( J ), KASE )
+         IF( KASE.NE.0 ) THEN
+            IF( KASE.EQ.1 ) THEN
+*
+*              Multiply by diag(W)*inv(A').
+*
+               CALL CPOTRS( UPLO, N, 1, AF, LDAF, WORK, N, INFO )
+               DO 110 I = 1, N
+                  WORK( I ) = RWORK( I )*WORK( I )
+  110          CONTINUE
+            ELSE IF( KASE.EQ.2 ) THEN
+*
+*              Multiply by inv(A)*diag(W).
+*
+               DO 120 I = 1, N
+                  WORK( I ) = RWORK( I )*WORK( I )
+  120          CONTINUE
+               CALL CPOTRS( UPLO, N, 1, AF, LDAF, WORK, N, INFO )
+            END IF
+            GO TO 100
+         END IF
+*
+*        Normalize error.
+*
+         LSTRES = ZERO
+         DO 130 I = 1, N
+            LSTRES = MAX( LSTRES, CABS1( X( I, J ) ) )
+  130    CONTINUE
+         IF( LSTRES.NE.ZERO )
+     $      FERR( J ) = FERR( J ) / LSTRES
+*
+  140 CONTINUE
+*
+      RETURN
+*
+*     End of CPORFS
+*
+      END
diff --git a/costa/native/external/lapack/cposv.f b/costa/native/external/lapack/cposv.f
new file mode 100644
index 000000000..5c150d958
--- /dev/null
+++ b/costa/native/external/lapack/cposv.f
@@ -0,0 +1,122 @@
+      SUBROUTINE CPOSV( UPLO, N, NRHS, A, LDA, B, LDB, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993 
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, LDB, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CPOSV computes the solution to a complex system of linear equations
+*     A * X = B,
+*  where A is an N-by-N Hermitian positive definite matrix and X and B
+*  are N-by-NRHS matrices.
+*
+*  The Cholesky decomposition is used to factor A as
+*     A = U**H* U,  if UPLO = 'U', or
+*     A = L * L**H,  if UPLO = 'L',
+*  where U is an upper triangular matrix and  L is a lower triangular
+*  matrix.  The factored form of A is then used to solve the system of
+*  equations A * X = B.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The number of linear equations, i.e., the order of the
+*          matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,N)
+*          On entry, the Hermitian matrix A.  If UPLO = 'U', the leading
+*          N-by-N upper triangular part of A contains the upper
+*          triangular part of the matrix A, and the strictly lower
+*          triangular part of A is not referenced.  If UPLO = 'L', the
+*          leading N-by-N lower triangular part of A contains the lower
+*          triangular part of the matrix A, and the strictly upper
+*          triangular part of A is not referenced.
+*
+*          On exit, if INFO = 0, the factor U or L from the Cholesky
+*          factorization A = U**H*U or A = L*L**H.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  B       (input/output) COMPLEX array, dimension (LDB,NRHS)
+*          On entry, the N-by-NRHS right hand side matrix B.
+*          On exit, if INFO = 0, the N-by-NRHS solution matrix X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, the leading minor of order i of A is not
+*                positive definite, so the factorization could not be
+*                completed, and the solution has not been computed.
+*
+*  =====================================================================
+*
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CPOTRF, CPOTRS, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CPOSV ', -INFO )
+         RETURN
+      END IF
+*
+*     Compute the Cholesky factorization A = U'*U or A = L*L'.
+*
+      CALL CPOTRF( UPLO, N, A, LDA, INFO )
+      IF( INFO.EQ.0 ) THEN
+*
+*        Solve the system A*X = B, overwriting B with X.
+*
+         CALL CPOTRS( UPLO, N, NRHS, A, LDA, B, LDB, INFO )
+*
+      END IF
+      RETURN
+*
+*     End of CPOSV
+*
+      END
diff --git a/costa/native/external/lapack/cposvx.f b/costa/native/external/lapack/cposvx.f
new file mode 100644
index 000000000..2ae696ca8
--- /dev/null
+++ b/costa/native/external/lapack/cposvx.f
@@ -0,0 +1,378 @@
+      SUBROUTINE CPOSVX( FACT, UPLO, N, NRHS, A, LDA, AF, LDAF, EQUED,
+     $                   S, B, LDB, X, LDX, RCOND, FERR, BERR, WORK,
+     $                   RWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          EQUED, FACT, UPLO
+      INTEGER            INFO, LDA, LDAF, LDB, LDX, N, NRHS
+      REAL               RCOND
+*     ..
+*     .. Array Arguments ..
+      REAL               BERR( * ), FERR( * ), RWORK( * ), S( * )
+      COMPLEX            A( LDA, * ), AF( LDAF, * ), B( LDB, * ),
+     $                   WORK( * ), X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CPOSVX uses the Cholesky factorization A = U**H*U or A = L*L**H to
+*  compute the solution to a complex system of linear equations
+*     A * X = B,
+*  where A is an N-by-N Hermitian positive definite matrix and X and B
+*  are N-by-NRHS matrices.
+*
+*  Error bounds on the solution and a condition estimate are also
+*  provided.
+*
+*  Description
+*  ===========
+*
+*  The following steps are performed:
+*
+*  1. If FACT = 'E', real scaling factors are computed to equilibrate
+*     the system:
+*        diag(S) * A * diag(S) * inv(diag(S)) * X = diag(S) * B
+*     Whether or not the system will be equilibrated depends on the
+*     scaling of the matrix A, but if equilibration is used, A is
+*     overwritten by diag(S)*A*diag(S) and B by diag(S)*B.
+*
+*  2. If FACT = 'N' or 'E', the Cholesky decomposition is used to
+*     factor the matrix A (after equilibration if FACT = 'E') as
+*        A = U**H* U,  if UPLO = 'U', or
+*        A = L * L**H,  if UPLO = 'L',
+*     where U is an upper triangular matrix and L is a lower triangular
+*     matrix.
+*
+*  3. If the leading i-by-i principal minor is not positive definite,
+*     then the routine returns with INFO = i. Otherwise, the factored
+*     form of A is used to estimate the condition number of the matrix
+*     A.  If the reciprocal of the condition number is less than machine
+*     precision, INFO = N+1 is returned as a warning, but the routine
+*     still goes on to solve for X and compute error bounds as
+*     described below.
+*
+*  4. The system of equations is solved for X using the factored form
+*     of A.
+*
+*  5. Iterative refinement is applied to improve the computed solution
+*     matrix and calculate error bounds and backward error estimates
+*     for it.
+*
+*  6. If equilibration was used, the matrix X is premultiplied by
+*     diag(S) so that it solves the original system before
+*     equilibration.
+*
+*  Arguments
+*  =========
+*
+*  FACT    (input) CHARACTER*1
+*          Specifies whether or not the factored form of the matrix A is
+*          supplied on entry, and if not, whether the matrix A should be
+*          equilibrated before it is factored.
+*          = 'F':  On entry, AF contains the factored form of A.
+*                  If EQUED = 'Y', the matrix A has been equilibrated
+*                  with scaling factors given by S.  A and AF will not
+*                  be modified.
+*          = 'N':  The matrix A will be copied to AF and factored.
+*          = 'E':  The matrix A will be equilibrated if necessary, then
+*                  copied to AF and factored.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The number of linear equations, i.e., the order of the
+*          matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrices B and X.  NRHS >= 0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,N)
+*          On entry, the Hermitian matrix A, except if FACT = 'F' and
+*          EQUED = 'Y', then A must contain the equilibrated matrix
+*          diag(S)*A*diag(S).  If UPLO = 'U', the leading
+*          N-by-N upper triangular part of A contains the upper
+*          triangular part of the matrix A, and the strictly lower
+*          triangular part of A is not referenced.  If UPLO = 'L', the
+*          leading N-by-N lower triangular part of A contains the lower
+*          triangular part of the matrix A, and the strictly upper
+*          triangular part of A is not referenced.  A is not modified if
+*          FACT = 'F' or 'N', or if FACT = 'E' and EQUED = 'N' on exit.
+*
+*          On exit, if FACT = 'E' and EQUED = 'Y', A is overwritten by
+*          diag(S)*A*diag(S).
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  AF      (input or output) COMPLEX array, dimension (LDAF,N)
+*          If FACT = 'F', then AF is an input argument and on entry
+*          contains the triangular factor U or L from the Cholesky
+*          factorization A = U**H*U or A = L*L**H, in the same storage
+*          format as A.  If EQUED .ne. 'N', then AF is the factored form
+*          of the equilibrated matrix diag(S)*A*diag(S).
+*
+*          If FACT = 'N', then AF is an output argument and on exit
+*          returns the triangular factor U or L from the Cholesky
+*          factorization A = U**H*U or A = L*L**H of the original
+*          matrix A.
+*
+*          If FACT = 'E', then AF is an output argument and on exit
+*          returns the triangular factor U or L from the Cholesky
+*          factorization A = U**H*U or A = L*L**H of the equilibrated
+*          matrix A (see the description of A for the form of the
+*          equilibrated matrix).
+*
+*  LDAF    (input) INTEGER
+*          The leading dimension of the array AF.  LDAF >= max(1,N).
+*
+*  EQUED   (input or output) CHARACTER*1
+*          Specifies the form of equilibration that was done.
+*          = 'N':  No equilibration (always true if FACT = 'N').
+*          = 'Y':  Equilibration was done, i.e., A has been replaced by
+*                  diag(S) * A * diag(S).
+*          EQUED is an input argument if FACT = 'F'; otherwise, it is an
+*          output argument.
+*
+*  S       (input or output) REAL array, dimension (N)
+*          The scale factors for A; not accessed if EQUED = 'N'.  S is
+*          an input argument if FACT = 'F'; otherwise, S is an output
+*          argument.  If FACT = 'F' and EQUED = 'Y', each element of S
+*          must be positive.
+*
+*  B       (input/output) COMPLEX array, dimension (LDB,NRHS)
+*          On entry, the N-by-NRHS righthand side matrix B.
+*          On exit, if EQUED = 'N', B is not modified; if EQUED = 'Y',
+*          B is overwritten by diag(S) * B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  X       (output) COMPLEX array, dimension (LDX,NRHS)
+*          If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X to
+*          the original system of equations.  Note that if EQUED = 'Y',
+*          A and B are modified on exit, and the solution to the
+*          equilibrated system is inv(diag(S))*X.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(1,N).
+*
+*  RCOND   (output) REAL
+*          The estimate of the reciprocal condition number of the matrix
+*          A after equilibration (if done).  If RCOND is less than the
+*          machine precision (in particular, if RCOND = 0), the matrix
+*          is singular to working precision.  This condition is
+*          indicated by a return code of INFO > 0.
+*
+*  FERR    (output) REAL array, dimension (NRHS)
+*          The estimated forward error bound for each solution vector
+*          X(j) (the j-th column of the solution matrix X).
+*          If XTRUE is the true solution corresponding to X(j), FERR(j)
+*          is an estimated upper bound for the magnitude of the largest
+*          element in (X(j) - XTRUE) divided by the magnitude of the
+*          largest element in X(j).  The estimate is as reliable as
+*          the estimate for RCOND, and is almost always a slight
+*          overestimate of the true error.
+*
+*  BERR    (output) REAL array, dimension (NRHS)
+*          The componentwise relative backward error of each solution
+*          vector X(j) (i.e., the smallest relative change in
+*          any element of A or B that makes X(j) an exact solution).
+*
+*  WORK    (workspace) COMPLEX array, dimension (2*N)
+*
+*  RWORK   (workspace) REAL array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*          > 0: if INFO = i, and i is
+*                <= N:  the leading minor of order i of A is
+*                       not positive definite, so the factorization
+*                       could not be completed, and the solution has not
+*                       been computed. RCOND = 0 is returned.
+*                = N+1: U is nonsingular, but RCOND is less than machine
+*                       precision, meaning that the matrix is singular
+*                       to working precision.  Nevertheless, the
+*                       solution and error bounds are computed because
+*                       there are a number of situations where the
+*                       computed solution can be more accurate than the
+*                       value of RCOND would suggest.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            EQUIL, NOFACT, RCEQU
+      INTEGER            I, INFEQU, J
+      REAL               AMAX, ANORM, BIGNUM, SCOND, SMAX, SMIN, SMLNUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               CLANHE, SLAMCH
+      EXTERNAL           LSAME, CLANHE, SLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CLACPY, CLAQHE, CPOCON, CPOEQU, CPORFS, CPOTRF,
+     $                   CPOTRS, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+      NOFACT = LSAME( FACT, 'N' )
+      EQUIL = LSAME( FACT, 'E' )
+      IF( NOFACT .OR. EQUIL ) THEN
+         EQUED = 'N'
+         RCEQU = .FALSE.
+      ELSE
+         RCEQU = LSAME( EQUED, 'Y' )
+         SMLNUM = SLAMCH( 'Safe minimum' )
+         BIGNUM = ONE / SMLNUM
+      END IF
+*
+*     Test the input parameters.
+*
+      IF( .NOT.NOFACT .AND. .NOT.EQUIL .AND. .NOT.LSAME( FACT, 'F' ) )
+     $     THEN
+         INFO = -1
+      ELSE IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) )
+     $          THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -6
+      ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN
+         INFO = -8
+      ELSE IF( LSAME( FACT, 'F' ) .AND. .NOT.
+     $         ( RCEQU .OR. LSAME( EQUED, 'N' ) ) ) THEN
+         INFO = -9
+      ELSE
+         IF( RCEQU ) THEN
+            SMIN = BIGNUM
+            SMAX = ZERO
+            DO 10 J = 1, N
+               SMIN = MIN( SMIN, S( J ) )
+               SMAX = MAX( SMAX, S( J ) )
+   10       CONTINUE
+            IF( SMIN.LE.ZERO ) THEN
+               INFO = -10
+            ELSE IF( N.GT.0 ) THEN
+               SCOND = MAX( SMIN, SMLNUM ) / MIN( SMAX, BIGNUM )
+            ELSE
+               SCOND = ONE
+            END IF
+         END IF
+         IF( INFO.EQ.0 ) THEN
+            IF( LDB.LT.MAX( 1, N ) ) THEN
+               INFO = -12
+            ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+               INFO = -14
+            END IF
+         END IF
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CPOSVX', -INFO )
+         RETURN
+      END IF
+*
+      IF( EQUIL ) THEN
+*
+*        Compute row and column scalings to equilibrate the matrix A.
+*
+         CALL CPOEQU( N, A, LDA, S, SCOND, AMAX, INFEQU )
+         IF( INFEQU.EQ.0 ) THEN
+*
+*           Equilibrate the matrix.
+*
+            CALL CLAQHE( UPLO, N, A, LDA, S, SCOND, AMAX, EQUED )
+            RCEQU = LSAME( EQUED, 'Y' )
+         END IF
+      END IF
+*
+*     Scale the right hand side.
+*
+      IF( RCEQU ) THEN
+         DO 30 J = 1, NRHS
+            DO 20 I = 1, N
+               B( I, J ) = S( I )*B( I, J )
+   20       CONTINUE
+   30    CONTINUE
+      END IF
+*
+      IF( NOFACT .OR. EQUIL ) THEN
+*
+*        Compute the Cholesky factorization A = U'*U or A = L*L'.
+*
+         CALL CLACPY( UPLO, N, N, A, LDA, AF, LDAF )
+         CALL CPOTRF( UPLO, N, AF, LDAF, INFO )
+*
+*        Return if INFO is non-zero.
+*
+         IF( INFO.NE.0 ) THEN
+            IF( INFO.GT.0 )
+     $         RCOND = ZERO
+            RETURN
+         END IF
+      END IF
+*
+*     Compute the norm of the matrix A.
+*
+      ANORM = CLANHE( '1', UPLO, N, A, LDA, RWORK )
+*
+*     Compute the reciprocal of the condition number of A.
+*
+      CALL CPOCON( UPLO, N, AF, LDAF, ANORM, RCOND, WORK, RWORK, INFO )
+*
+*     Set INFO = N+1 if the matrix is singular to working precision.
+*
+      IF( RCOND.LT.SLAMCH( 'Epsilon' ) )
+     $   INFO = N + 1
+*
+*     Compute the solution matrix X.
+*
+      CALL CLACPY( 'Full', N, NRHS, B, LDB, X, LDX )
+      CALL CPOTRS( UPLO, N, NRHS, AF, LDAF, X, LDX, INFO )
+*
+*     Use iterative refinement to improve the computed solution and
+*     compute error bounds and backward error estimates for it.
+*
+      CALL CPORFS( UPLO, N, NRHS, A, LDA, AF, LDAF, B, LDB, X, LDX,
+     $             FERR, BERR, WORK, RWORK, INFO )
+*
+*     Transform the solution matrix X to a solution of the original
+*     system.
+*
+      IF( RCEQU ) THEN
+         DO 50 J = 1, NRHS
+            DO 40 I = 1, N
+               X( I, J ) = S( I )*X( I, J )
+   40       CONTINUE
+   50    CONTINUE
+         DO 60 J = 1, NRHS
+            FERR( J ) = FERR( J ) / SCOND
+   60    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of CPOSVX
+*
+      END
diff --git a/costa/native/external/lapack/cpotf2.f b/costa/native/external/lapack/cpotf2.f
new file mode 100644
index 000000000..73cb0b3ea
--- /dev/null
+++ b/costa/native/external/lapack/cpotf2.f
@@ -0,0 +1,175 @@
+      SUBROUTINE CPOTF2( UPLO, N, A, LDA, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CPOTF2 computes the Cholesky factorization of a complex Hermitian
+*  positive definite matrix A.
+*
+*  The factorization has the form
+*     A = U' * U ,  if UPLO = 'U', or
+*     A = L  * L',  if UPLO = 'L',
+*  where U is an upper triangular matrix and L is lower triangular.
+*
+*  This is the unblocked version of the algorithm, calling Level 2 BLAS.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the upper or lower triangular part of the
+*          Hermitian matrix A is stored.
+*          = 'U':  Upper triangular
+*          = 'L':  Lower triangular
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,N)
+*          On entry, the Hermitian matrix A.  If UPLO = 'U', the leading
+*          n by n upper triangular part of A contains the upper
+*          triangular part of the matrix A, and the strictly lower
+*          triangular part of A is not referenced.  If UPLO = 'L', the
+*          leading n by n lower triangular part of A contains the lower
+*          triangular part of the matrix A, and the strictly upper
+*          triangular part of A is not referenced.
+*
+*          On exit, if INFO = 0, the factor U or L from the Cholesky
+*          factorization A = U'*U  or A = L*L'.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -k, the k-th argument had an illegal value
+*          > 0: if INFO = k, the leading minor of order k is not
+*               positive definite, and the factorization could not be
+*               completed.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+      COMPLEX            CONE
+      PARAMETER          ( CONE = ( 1.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            J
+      REAL               AJJ
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      COMPLEX            CDOTC
+      EXTERNAL           LSAME, CDOTC
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CGEMV, CLACGV, CSSCAL, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, REAL, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CPOTF2', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      IF( UPPER ) THEN
+*
+*        Compute the Cholesky factorization A = U'*U.
+*
+         DO 10 J = 1, N
+*
+*           Compute U(J,J) and test for non-positive-definiteness.
+*
+            AJJ = REAL( A( J, J ) ) - CDOTC( J-1, A( 1, J ), 1,
+     $            A( 1, J ), 1 )
+            IF( AJJ.LE.ZERO ) THEN
+               A( J, J ) = AJJ
+               GO TO 30
+            END IF
+            AJJ = SQRT( AJJ )
+            A( J, J ) = AJJ
+*
+*           Compute elements J+1:N of row J.
+*
+            IF( J.LT.N ) THEN
+               CALL CLACGV( J-1, A( 1, J ), 1 )
+               CALL CGEMV( 'Transpose', J-1, N-J, -CONE, A( 1, J+1 ),
+     $                     LDA, A( 1, J ), 1, CONE, A( J, J+1 ), LDA )
+               CALL CLACGV( J-1, A( 1, J ), 1 )
+               CALL CSSCAL( N-J, ONE / AJJ, A( J, J+1 ), LDA )
+            END IF
+   10    CONTINUE
+      ELSE
+*
+*        Compute the Cholesky factorization A = L*L'.
+*
+         DO 20 J = 1, N
+*
+*           Compute L(J,J) and test for non-positive-definiteness.
+*
+            AJJ = REAL( A( J, J ) ) - CDOTC( J-1, A( J, 1 ), LDA,
+     $            A( J, 1 ), LDA )
+            IF( AJJ.LE.ZERO ) THEN
+               A( J, J ) = AJJ
+               GO TO 30
+            END IF
+            AJJ = SQRT( AJJ )
+            A( J, J ) = AJJ
+*
+*           Compute elements J+1:N of column J.
+*
+            IF( J.LT.N ) THEN
+               CALL CLACGV( J-1, A( J, 1 ), LDA )
+               CALL CGEMV( 'No transpose', N-J, J-1, -CONE, A( J+1, 1 ),
+     $                     LDA, A( J, 1 ), LDA, CONE, A( J+1, J ), 1 )
+               CALL CLACGV( J-1, A( J, 1 ), LDA )
+               CALL CSSCAL( N-J, ONE / AJJ, A( J+1, J ), 1 )
+            END IF
+   20    CONTINUE
+      END IF
+      GO TO 40
+*
+   30 CONTINUE
+      INFO = J
+*
+   40 CONTINUE
+      RETURN
+*
+*     End of CPOTF2
+*
+      END
diff --git a/costa/native/external/lapack/cpotrf.f b/costa/native/external/lapack/cpotrf.f
new file mode 100644
index 000000000..bbbc6dff5
--- /dev/null
+++ b/costa/native/external/lapack/cpotrf.f
@@ -0,0 +1,187 @@
+      SUBROUTINE CPOTRF( UPLO, N, A, LDA, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CPOTRF computes the Cholesky factorization of a complex Hermitian
+*  positive definite matrix A.
+*
+*  The factorization has the form
+*     A = U**H * U,  if UPLO = 'U', or
+*     A = L  * L**H,  if UPLO = 'L',
+*  where U is an upper triangular matrix and L is lower triangular.
+*
+*  This is the block version of the algorithm, calling Level 3 BLAS.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,N)
+*          On entry, the Hermitian matrix A.  If UPLO = 'U', the leading
+*          N-by-N upper triangular part of A contains the upper
+*          triangular part of the matrix A, and the strictly lower
+*          triangular part of A is not referenced.  If UPLO = 'L', the
+*          leading N-by-N lower triangular part of A contains the lower
+*          triangular part of the matrix A, and the strictly upper
+*          triangular part of A is not referenced.
+*
+*          On exit, if INFO = 0, the factor U or L from the Cholesky
+*          factorization A = U**H*U or A = L*L**H.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, the leading minor of order i is not
+*                positive definite, and the factorization could not be
+*                completed.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE
+      COMPLEX            CONE
+      PARAMETER          ( ONE = 1.0E+0, CONE = ( 1.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            J, JB, NB
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           LSAME, ILAENV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CGEMM, CHERK, CPOTF2, CTRSM, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CPOTRF', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Determine the block size for this environment.
+*
+      NB = ILAENV( 1, 'CPOTRF', UPLO, N, -1, -1, -1 )
+      IF( NB.LE.1 .OR. NB.GE.N ) THEN
+*
+*        Use unblocked code.
+*
+         CALL CPOTF2( UPLO, N, A, LDA, INFO )
+      ELSE
+*
+*        Use blocked code.
+*
+         IF( UPPER ) THEN
+*
+*           Compute the Cholesky factorization A = U'*U.
+*
+            DO 10 J = 1, N, NB
+*
+*              Update and factorize the current diagonal block and test
+*              for non-positive-definiteness.
+*
+               JB = MIN( NB, N-J+1 )
+               CALL CHERK( 'Upper', 'Conjugate transpose', JB, J-1,
+     $                     -ONE, A( 1, J ), LDA, ONE, A( J, J ), LDA )
+               CALL CPOTF2( 'Upper', JB, A( J, J ), LDA, INFO )
+               IF( INFO.NE.0 )
+     $            GO TO 30
+               IF( J+JB.LE.N ) THEN
+*
+*                 Compute the current block row.
+*
+                  CALL CGEMM( 'Conjugate transpose', 'No transpose', JB,
+     $                        N-J-JB+1, J-1, -CONE, A( 1, J ), LDA,
+     $                        A( 1, J+JB ), LDA, CONE, A( J, J+JB ),
+     $                        LDA )
+                  CALL CTRSM( 'Left', 'Upper', 'Conjugate transpose',
+     $                        'Non-unit', JB, N-J-JB+1, CONE, A( J, J ),
+     $                        LDA, A( J, J+JB ), LDA )
+               END IF
+   10       CONTINUE
+*
+         ELSE
+*
+*           Compute the Cholesky factorization A = L*L'.
+*
+            DO 20 J = 1, N, NB
+*
+*              Update and factorize the current diagonal block and test
+*              for non-positive-definiteness.
+*
+               JB = MIN( NB, N-J+1 )
+               CALL CHERK( 'Lower', 'No transpose', JB, J-1, -ONE,
+     $                     A( J, 1 ), LDA, ONE, A( J, J ), LDA )
+               CALL CPOTF2( 'Lower', JB, A( J, J ), LDA, INFO )
+               IF( INFO.NE.0 )
+     $            GO TO 30
+               IF( J+JB.LE.N ) THEN
+*
+*                 Compute the current block column.
+*
+                  CALL CGEMM( 'No transpose', 'Conjugate transpose',
+     $                        N-J-JB+1, JB, J-1, -CONE, A( J+JB, 1 ),
+     $                        LDA, A( J, 1 ), LDA, CONE, A( J+JB, J ),
+     $                        LDA )
+                  CALL CTRSM( 'Right', 'Lower', 'Conjugate transpose',
+     $                        'Non-unit', N-J-JB+1, JB, CONE, A( J, J ),
+     $                        LDA, A( J+JB, J ), LDA )
+               END IF
+   20       CONTINUE
+         END IF
+      END IF
+      GO TO 40
+*
+   30 CONTINUE
+      INFO = INFO + J - 1
+*
+   40 CONTINUE
+      RETURN
+*
+*     End of CPOTRF
+*
+      END
diff --git a/costa/native/external/lapack/cpotri.f b/costa/native/external/lapack/cpotri.f
new file mode 100644
index 000000000..d1dc0291e
--- /dev/null
+++ b/costa/native/external/lapack/cpotri.f
@@ -0,0 +1,97 @@
+      SUBROUTINE CPOTRI( UPLO, N, A, LDA, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993 
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CPOTRI computes the inverse of a complex Hermitian positive definite
+*  matrix A using the Cholesky factorization A = U**H*U or A = L*L**H
+*  computed by CPOTRF.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,N)
+*          On entry, the triangular factor U or L from the Cholesky
+*          factorization A = U**H*U or A = L*L**H, as computed by
+*          CPOTRF.
+*          On exit, the upper or lower triangle of the (Hermitian)
+*          inverse of A, overwriting the input factor U or L.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, the (i,i) element of the factor U or L is
+*                zero, and the inverse could not be computed.
+*
+*  =====================================================================
+*
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CLAUUM, CTRTRI, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CPOTRI', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Invert the triangular Cholesky factor U or L.
+*
+      CALL CTRTRI( UPLO, 'Non-unit', N, A, LDA, INFO )
+      IF( INFO.GT.0 )
+     $   RETURN
+*
+*     Form inv(U)*inv(U)' or inv(L)'*inv(L).
+*
+      CALL CLAUUM( UPLO, N, A, LDA, INFO )
+*
+      RETURN
+*
+*     End of CPOTRI
+*
+      END
diff --git a/costa/native/external/lapack/cpotrs.f b/costa/native/external/lapack/cpotrs.f
new file mode 100644
index 000000000..d779e2977
--- /dev/null
+++ b/costa/native/external/lapack/cpotrs.f
@@ -0,0 +1,133 @@
+      SUBROUTINE CPOTRS( UPLO, N, NRHS, A, LDA, B, LDB, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, LDB, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CPOTRS solves a system of linear equations A*X = B with a Hermitian
+*  positive definite matrix A using the Cholesky factorization 
+*  A = U**H*U or A = L*L**H computed by CPOTRF.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  A       (input) COMPLEX array, dimension (LDA,N)
+*          The triangular factor U or L from the Cholesky factorization
+*          A = U**H*U or A = L*L**H, as computed by CPOTRF.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  B       (input/output) COMPLEX array, dimension (LDB,NRHS)
+*          On entry, the right hand side matrix B.
+*          On exit, the solution matrix X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX            ONE
+      PARAMETER          ( ONE = ( 1.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CTRSM, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CPOTRS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 )
+     $   RETURN
+*
+      IF( UPPER ) THEN
+*
+*        Solve A*X = B where A = U'*U.
+*
+*        Solve U'*X = B, overwriting B with X.
+*
+         CALL CTRSM( 'Left', 'Upper', 'Conjugate transpose', 'Non-unit',
+     $               N, NRHS, ONE, A, LDA, B, LDB )
+*
+*        Solve U*X = B, overwriting B with X.
+*
+         CALL CTRSM( 'Left', 'Upper', 'No transpose', 'Non-unit', N,
+     $               NRHS, ONE, A, LDA, B, LDB )
+      ELSE
+*
+*        Solve A*X = B where A = L*L'.
+*
+*        Solve L*X = B, overwriting B with X.
+*
+         CALL CTRSM( 'Left', 'Lower', 'No transpose', 'Non-unit', N,
+     $               NRHS, ONE, A, LDA, B, LDB )
+*
+*        Solve L'*X = B, overwriting B with X.
+*
+         CALL CTRSM( 'Left', 'Lower', 'Conjugate transpose', 'Non-unit',
+     $               N, NRHS, ONE, A, LDA, B, LDB )
+      END IF
+*
+      RETURN
+*
+*     End of CPOTRS
+*
+      END
diff --git a/costa/native/external/lapack/cppcon.f b/costa/native/external/lapack/cppcon.f
new file mode 100644
index 000000000..8fdddfe17
--- /dev/null
+++ b/costa/native/external/lapack/cppcon.f
@@ -0,0 +1,179 @@
+      SUBROUTINE CPPCON( UPLO, N, AP, ANORM, RCOND, WORK, RWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993 
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, N
+      REAL               ANORM, RCOND
+*     ..
+*     .. Array Arguments ..
+      REAL               RWORK( * )
+      COMPLEX            AP( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CPPCON estimates the reciprocal of the condition number (in the 
+*  1-norm) of a complex Hermitian positive definite packed matrix using
+*  the Cholesky factorization A = U**H*U or A = L*L**H computed by
+*  CPPTRF.
+*
+*  An estimate is obtained for norm(inv(A)), and the reciprocal of the
+*  condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))).
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  AP      (input) COMPLEX array, dimension (N*(N+1)/2)
+*          The triangular factor U or L from the Cholesky factorization
+*          A = U**H*U or A = L*L**H, packed columnwise in a linear
+*          array.  The j-th column of U or L is stored in the array AP
+*          as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = U(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = L(i,j) for j<=i<=n.
+*
+*  ANORM   (input) REAL
+*          The 1-norm (or infinity-norm) of the Hermitian matrix A.
+*
+*  RCOND   (output) REAL
+*          The reciprocal of the condition number of the matrix A,
+*          computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an
+*          estimate of the 1-norm of inv(A) computed in this routine.
+*
+*  WORK    (workspace) COMPLEX array, dimension (2*N)
+*
+*  RWORK   (workspace) REAL array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      CHARACTER          NORMIN
+      INTEGER            IX, KASE
+      REAL               AINVNM, SCALE, SCALEL, SCALEU, SMLNUM
+      COMPLEX            ZDUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ICAMAX
+      REAL               SLAMCH
+      EXTERNAL           LSAME, ICAMAX, SLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CLACON, CLATPS, CSRSCL, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, AIMAG, REAL
+*     ..
+*     .. Statement Functions ..
+      REAL               CABS1
+*     ..
+*     .. Statement Function definitions ..
+      CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( ANORM.LT.ZERO ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CPPCON', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      RCOND = ZERO
+      IF( N.EQ.0 ) THEN
+         RCOND = ONE
+         RETURN
+      ELSE IF( ANORM.EQ.ZERO ) THEN
+         RETURN
+      END IF
+*
+      SMLNUM = SLAMCH( 'Safe minimum' )
+*
+*     Estimate the 1-norm of the inverse.
+*
+      KASE = 0
+      NORMIN = 'N'
+   10 CONTINUE
+      CALL CLACON( N, WORK( N+1 ), WORK, AINVNM, KASE )
+      IF( KASE.NE.0 ) THEN
+         IF( UPPER ) THEN
+*
+*           Multiply by inv(U').
+*
+            CALL CLATPS( 'Upper', 'Conjugate transpose', 'Non-unit',
+     $                   NORMIN, N, AP, WORK, SCALEL, RWORK, INFO )
+            NORMIN = 'Y'
+*
+*           Multiply by inv(U).
+*
+            CALL CLATPS( 'Upper', 'No transpose', 'Non-unit', NORMIN, N,
+     $                   AP, WORK, SCALEU, RWORK, INFO )
+         ELSE
+*
+*           Multiply by inv(L).
+*
+            CALL CLATPS( 'Lower', 'No transpose', 'Non-unit', NORMIN, N,
+     $                   AP, WORK, SCALEL, RWORK, INFO )
+            NORMIN = 'Y'
+*
+*           Multiply by inv(L').
+*
+            CALL CLATPS( 'Lower', 'Conjugate transpose', 'Non-unit',
+     $                   NORMIN, N, AP, WORK, SCALEU, RWORK, INFO )
+         END IF
+*
+*        Multiply by 1/SCALE if doing so will not cause overflow.
+*
+         SCALE = SCALEL*SCALEU
+         IF( SCALE.NE.ONE ) THEN
+            IX = ICAMAX( N, WORK, 1 )
+            IF( SCALE.LT.CABS1( WORK( IX ) )*SMLNUM .OR. SCALE.EQ.ZERO )
+     $         GO TO 20
+            CALL CSRSCL( N, SCALE, WORK, 1 )
+         END IF
+         GO TO 10
+      END IF
+*
+*     Compute the estimate of the reciprocal condition number.
+*
+      IF( AINVNM.NE.ZERO )
+     $   RCOND = ( ONE / AINVNM ) / ANORM
+*
+   20 CONTINUE
+      RETURN
+*
+*     End of CPPCON
+*
+      END
diff --git a/costa/native/external/lapack/cppequ.f b/costa/native/external/lapack/cppequ.f
new file mode 100644
index 000000000..1001cc6e8
--- /dev/null
+++ b/costa/native/external/lapack/cppequ.f
@@ -0,0 +1,170 @@
+      SUBROUTINE CPPEQU( UPLO, N, AP, S, SCOND, AMAX, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993 
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, N
+      REAL               AMAX, SCOND
+*     ..
+*     .. Array Arguments ..
+      REAL               S( * )
+      COMPLEX            AP( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CPPEQU computes row and column scalings intended to equilibrate a
+*  Hermitian positive definite matrix A in packed storage and reduce
+*  its condition number (with respect to the two-norm).  S contains the
+*  scale factors, S(i)=1/sqrt(A(i,i)), chosen so that the scaled matrix
+*  B with elements B(i,j)=S(i)*A(i,j)*S(j) has ones on the diagonal.
+*  This choice of S puts the condition number of B within a factor N of
+*  the smallest possible condition number over all possible diagonal
+*  scalings.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  AP      (input) COMPLEX array, dimension (N*(N+1)/2)
+*          The upper or lower triangle of the Hermitian matrix A, packed
+*          columnwise in a linear array.  The j-th column of A is stored
+*          in the array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n.
+*
+*  S       (output) REAL array, dimension (N)
+*          If INFO = 0, S contains the scale factors for A.
+*
+*  SCOND   (output) REAL
+*          If INFO = 0, S contains the ratio of the smallest S(i) to
+*          the largest S(i).  If SCOND >= 0.1 and AMAX is neither too
+*          large nor too small, it is not worth scaling by S.
+*
+*  AMAX    (output) REAL
+*          Absolute value of largest matrix element.  If AMAX is very
+*          close to overflow or very close to underflow, the matrix
+*          should be scaled.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, the i-th diagonal element is nonpositive.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            I, JJ
+      REAL               SMIN
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN, REAL, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CPPEQU', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 ) THEN
+         SCOND = ONE
+         AMAX = ZERO
+         RETURN
+      END IF
+*
+*     Initialize SMIN and AMAX.
+*
+      S( 1 ) = REAL( AP( 1 ) )
+      SMIN = S( 1 )
+      AMAX = S( 1 )
+*
+      IF( UPPER ) THEN
+*
+*        UPLO = 'U':  Upper triangle of A is stored.
+*        Find the minimum and maximum diagonal elements.
+*
+         JJ = 1
+         DO 10 I = 2, N
+            JJ = JJ + I
+            S( I ) = REAL( AP( JJ ) )
+            SMIN = MIN( SMIN, S( I ) )
+            AMAX = MAX( AMAX, S( I ) )
+   10    CONTINUE
+*
+      ELSE
+*
+*        UPLO = 'L':  Lower triangle of A is stored.
+*        Find the minimum and maximum diagonal elements.
+*
+         JJ = 1
+         DO 20 I = 2, N
+            JJ = JJ + N - I + 2
+            S( I ) = REAL( AP( JJ ) )
+            SMIN = MIN( SMIN, S( I ) )
+            AMAX = MAX( AMAX, S( I ) )
+   20    CONTINUE
+      END IF
+*
+      IF( SMIN.LE.ZERO ) THEN
+*
+*        Find the first non-positive diagonal element and return.
+*
+         DO 30 I = 1, N
+            IF( S( I ).LE.ZERO ) THEN
+               INFO = I
+               RETURN
+            END IF
+   30    CONTINUE
+      ELSE
+*
+*        Set the scale factors to the reciprocals
+*        of the diagonal elements.
+*
+         DO 40 I = 1, N
+            S( I ) = ONE / SQRT( S( I ) )
+   40    CONTINUE
+*
+*        Compute SCOND = min(S(I)) / max(S(I))
+*
+         SCOND = SQRT( SMIN ) / SQRT( AMAX )
+      END IF
+      RETURN
+*
+*     End of CPPEQU
+*
+      END
diff --git a/costa/native/external/lapack/cpprfs.f b/costa/native/external/lapack/cpprfs.f
new file mode 100644
index 000000000..53e6ff3db
--- /dev/null
+++ b/costa/native/external/lapack/cpprfs.f
@@ -0,0 +1,331 @@
+      SUBROUTINE CPPRFS( UPLO, N, NRHS, AP, AFP, B, LDB, X, LDX, FERR,
+     $                   BERR, WORK, RWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDB, LDX, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      REAL               BERR( * ), FERR( * ), RWORK( * )
+      COMPLEX            AFP( * ), AP( * ), B( LDB, * ), WORK( * ),
+     $                   X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CPPRFS improves the computed solution to a system of linear
+*  equations when the coefficient matrix is Hermitian positive definite
+*  and packed, and provides error bounds and backward error estimates
+*  for the solution.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrices B and X.  NRHS >= 0.
+*
+*  AP      (input) COMPLEX array, dimension (N*(N+1)/2)
+*          The upper or lower triangle of the Hermitian matrix A, packed
+*          columnwise in a linear array.  The j-th column of A is stored
+*          in the array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n.
+*
+*  AFP     (input) COMPLEX array, dimension (N*(N+1)/2)
+*          The triangular factor U or L from the Cholesky factorization
+*          A = U**H*U or A = L*L**H, as computed by SPPTRF/CPPTRF,
+*          packed columnwise in a linear array in the same format as A
+*          (see AP).
+*
+*  B       (input) COMPLEX array, dimension (LDB,NRHS)
+*          The right hand side matrix B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  X       (input/output) COMPLEX array, dimension (LDX,NRHS)
+*          On entry, the solution matrix X, as computed by CPPTRS.
+*          On exit, the improved solution matrix X.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(1,N).
+*
+*  FERR    (output) REAL array, dimension (NRHS)
+*          The estimated forward error bound for each solution vector
+*          X(j) (the j-th column of the solution matrix X).
+*          If XTRUE is the true solution corresponding to X(j), FERR(j)
+*          is an estimated upper bound for the magnitude of the largest
+*          element in (X(j) - XTRUE) divided by the magnitude of the
+*          largest element in X(j).  The estimate is as reliable as
+*          the estimate for RCOND, and is almost always a slight
+*          overestimate of the true error.
+*
+*  BERR    (output) REAL array, dimension (NRHS)
+*          The componentwise relative backward error of each solution
+*          vector X(j) (i.e., the smallest relative change in
+*          any element of A or B that makes X(j) an exact solution).
+*
+*  WORK    (workspace) COMPLEX array, dimension (2*N)
+*
+*  RWORK   (workspace) REAL array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Internal Parameters
+*  ===================
+*
+*  ITMAX is the maximum number of steps of iterative refinement.
+*
+*  ====================================================================
+*
+*     .. Parameters ..
+      INTEGER            ITMAX
+      PARAMETER          ( ITMAX = 5 )
+      REAL               ZERO
+      PARAMETER          ( ZERO = 0.0E+0 )
+      COMPLEX            CONE
+      PARAMETER          ( CONE = ( 1.0E+0, 0.0E+0 ) )
+      REAL               TWO
+      PARAMETER          ( TWO = 2.0E+0 )
+      REAL               THREE
+      PARAMETER          ( THREE = 3.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            COUNT, I, IK, J, K, KASE, KK, NZ
+      REAL               EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK
+      COMPLEX            ZDUM
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CAXPY, CCOPY, CHPMV, CLACON, CPPTRS, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, AIMAG, MAX, REAL
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               SLAMCH
+      EXTERNAL           LSAME, SLAMCH
+*     ..
+*     .. Statement Functions ..
+      REAL               CABS1
+*     ..
+*     .. Statement Function definitions ..
+      CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+         INFO = -9
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CPPRFS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN
+         DO 10 J = 1, NRHS
+            FERR( J ) = ZERO
+            BERR( J ) = ZERO
+   10    CONTINUE
+         RETURN
+      END IF
+*
+*     NZ = maximum number of nonzero elements in each row of A, plus 1
+*
+      NZ = N + 1
+      EPS = SLAMCH( 'Epsilon' )
+      SAFMIN = SLAMCH( 'Safe minimum' )
+      SAFE1 = NZ*SAFMIN
+      SAFE2 = SAFE1 / EPS
+*
+*     Do for each right hand side
+*
+      DO 140 J = 1, NRHS
+*
+         COUNT = 1
+         LSTRES = THREE
+   20    CONTINUE
+*
+*        Loop until stopping criterion is satisfied.
+*
+*        Compute residual R = B - A * X
+*
+         CALL CCOPY( N, B( 1, J ), 1, WORK, 1 )
+         CALL CHPMV( UPLO, N, -CONE, AP, X( 1, J ), 1, CONE, WORK, 1 )
+*
+*        Compute componentwise relative backward error from formula
+*
+*        max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) )
+*
+*        where abs(Z) is the componentwise absolute value of the matrix
+*        or vector Z.  If the i-th component of the denominator is less
+*        than SAFE2, then SAFE1 is added to the i-th components of the
+*        numerator and denominator before dividing.
+*
+         DO 30 I = 1, N
+            RWORK( I ) = CABS1( B( I, J ) )
+   30    CONTINUE
+*
+*        Compute abs(A)*abs(X) + abs(B).
+*
+         KK = 1
+         IF( UPPER ) THEN
+            DO 50 K = 1, N
+               S = ZERO
+               XK = CABS1( X( K, J ) )
+               IK = KK
+               DO 40 I = 1, K - 1
+                  RWORK( I ) = RWORK( I ) + CABS1( AP( IK ) )*XK
+                  S = S + CABS1( AP( IK ) )*CABS1( X( I, J ) )
+                  IK = IK + 1
+   40          CONTINUE
+               RWORK( K ) = RWORK( K ) + ABS( REAL( AP( KK+K-1 ) ) )*
+     $                      XK + S
+               KK = KK + K
+   50       CONTINUE
+         ELSE
+            DO 70 K = 1, N
+               S = ZERO
+               XK = CABS1( X( K, J ) )
+               RWORK( K ) = RWORK( K ) + ABS( REAL( AP( KK ) ) )*XK
+               IK = KK + 1
+               DO 60 I = K + 1, N
+                  RWORK( I ) = RWORK( I ) + CABS1( AP( IK ) )*XK
+                  S = S + CABS1( AP( IK ) )*CABS1( X( I, J ) )
+                  IK = IK + 1
+   60          CONTINUE
+               RWORK( K ) = RWORK( K ) + S
+               KK = KK + ( N-K+1 )
+   70       CONTINUE
+         END IF
+         S = ZERO
+         DO 80 I = 1, N
+            IF( RWORK( I ).GT.SAFE2 ) THEN
+               S = MAX( S, CABS1( WORK( I ) ) / RWORK( I ) )
+            ELSE
+               S = MAX( S, ( CABS1( WORK( I ) )+SAFE1 ) /
+     $             ( RWORK( I )+SAFE1 ) )
+            END IF
+   80    CONTINUE
+         BERR( J ) = S
+*
+*        Test stopping criterion. Continue iterating if
+*           1) The residual BERR(J) is larger than machine epsilon, and
+*           2) BERR(J) decreased by at least a factor of 2 during the
+*              last iteration, and
+*           3) At most ITMAX iterations tried.
+*
+         IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND.
+     $       COUNT.LE.ITMAX ) THEN
+*
+*           Update solution and try again.
+*
+            CALL CPPTRS( UPLO, N, 1, AFP, WORK, N, INFO )
+            CALL CAXPY( N, CONE, WORK, 1, X( 1, J ), 1 )
+            LSTRES = BERR( J )
+            COUNT = COUNT + 1
+            GO TO 20
+         END IF
+*
+*        Bound error from formula
+*
+*        norm(X - XTRUE) / norm(X) .le. FERR =
+*        norm( abs(inv(A))*
+*           ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X)
+*
+*        where
+*          norm(Z) is the magnitude of the largest component of Z
+*          inv(A) is the inverse of A
+*          abs(Z) is the componentwise absolute value of the matrix or
+*             vector Z
+*          NZ is the maximum number of nonzeros in any row of A, plus 1
+*          EPS is machine epsilon
+*
+*        The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B))
+*        is incremented by SAFE1 if the i-th component of
+*        abs(A)*abs(X) + abs(B) is less than SAFE2.
+*
+*        Use CLACON to estimate the infinity-norm of the matrix
+*           inv(A) * diag(W),
+*        where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) )))
+*
+         DO 90 I = 1, N
+            IF( RWORK( I ).GT.SAFE2 ) THEN
+               RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I )
+            ELSE
+               RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) +
+     $                      SAFE1
+            END IF
+   90    CONTINUE
+*
+         KASE = 0
+  100    CONTINUE
+         CALL CLACON( N, WORK( N+1 ), WORK, FERR( J ), KASE )
+         IF( KASE.NE.0 ) THEN
+            IF( KASE.EQ.1 ) THEN
+*
+*              Multiply by diag(W)*inv(A').
+*
+               CALL CPPTRS( UPLO, N, 1, AFP, WORK, N, INFO )
+               DO 110 I = 1, N
+                  WORK( I ) = RWORK( I )*WORK( I )
+  110          CONTINUE
+            ELSE IF( KASE.EQ.2 ) THEN
+*
+*              Multiply by inv(A)*diag(W).
+*
+               DO 120 I = 1, N
+                  WORK( I ) = RWORK( I )*WORK( I )
+  120          CONTINUE
+               CALL CPPTRS( UPLO, N, 1, AFP, WORK, N, INFO )
+            END IF
+            GO TO 100
+         END IF
+*
+*        Normalize error.
+*
+         LSTRES = ZERO
+         DO 130 I = 1, N
+            LSTRES = MAX( LSTRES, CABS1( X( I, J ) ) )
+  130    CONTINUE
+         IF( LSTRES.NE.ZERO )
+     $      FERR( J ) = FERR( J ) / LSTRES
+*
+  140 CONTINUE
+*
+      RETURN
+*
+*     End of CPPRFS
+*
+      END
diff --git a/costa/native/external/lapack/cppsv.f b/costa/native/external/lapack/cppsv.f
new file mode 100644
index 000000000..73187dfad
--- /dev/null
+++ b/costa/native/external/lapack/cppsv.f
@@ -0,0 +1,134 @@
+      SUBROUTINE CPPSV( UPLO, N, NRHS, AP, B, LDB, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993 
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDB, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            AP( * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CPPSV computes the solution to a complex system of linear equations
+*     A * X = B,
+*  where A is an N-by-N Hermitian positive definite matrix stored in
+*  packed format and X and B are N-by-NRHS matrices.
+*
+*  The Cholesky decomposition is used to factor A as
+*     A = U**H* U,  if UPLO = 'U', or
+*     A = L * L**H,  if UPLO = 'L',
+*  where U is an upper triangular matrix and L is a lower triangular
+*  matrix.  The factored form of A is then used to solve the system of
+*  equations A * X = B.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The number of linear equations, i.e., the order of the
+*          matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  AP      (input/output) COMPLEX array, dimension (N*(N+1)/2)
+*          On entry, the upper or lower triangle of the Hermitian matrix
+*          A, packed columnwise in a linear array.  The j-th column of A
+*          is stored in the array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n.
+*          See below for further details.  
+*
+*          On exit, if INFO = 0, the factor U or L from the Cholesky
+*          factorization A = U**H*U or A = L*L**H, in the same storage
+*          format as A.
+*
+*  B       (input/output) COMPLEX array, dimension (LDB,NRHS)
+*          On entry, the N-by-NRHS right hand side matrix B.
+*          On exit, if INFO = 0, the N-by-NRHS solution matrix X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, the leading minor of order i of A is not
+*                positive definite, so the factorization could not be
+*                completed, and the solution has not been computed.
+*
+*  Further Details
+*  ===============
+*
+*  The packed storage scheme is illustrated by the following example
+*  when N = 4, UPLO = 'U':
+*
+*  Two-dimensional storage of the Hermitian matrix A:
+*
+*     a11 a12 a13 a14
+*         a22 a23 a24
+*             a33 a34     (aij = conjg(aji))
+*                 a44
+*
+*  Packed storage of the upper triangle of A:
+*
+*  AP = [ a11, a12, a22, a13, a23, a33, a14, a24, a34, a44 ]
+*
+*  =====================================================================
+*
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CPPTRF, CPPTRS, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -6
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CPPSV ', -INFO )
+         RETURN
+      END IF
+*
+*     Compute the Cholesky factorization A = U'*U or A = L*L'.
+*
+      CALL CPPTRF( UPLO, N, AP, INFO )
+      IF( INFO.EQ.0 ) THEN
+*
+*        Solve the system A*X = B, overwriting B with X.
+*
+         CALL CPPTRS( UPLO, N, NRHS, AP, B, LDB, INFO )
+*
+      END IF
+      RETURN
+*
+*     End of CPPSV
+*
+      END
diff --git a/costa/native/external/lapack/cppsvx.f b/costa/native/external/lapack/cppsvx.f
new file mode 100644
index 000000000..07c7047ec
--- /dev/null
+++ b/costa/native/external/lapack/cppsvx.f
@@ -0,0 +1,383 @@
+      SUBROUTINE CPPSVX( FACT, UPLO, N, NRHS, AP, AFP, EQUED, S, B, LDB,
+     $                   X, LDX, RCOND, FERR, BERR, WORK, RWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          EQUED, FACT, UPLO
+      INTEGER            INFO, LDB, LDX, N, NRHS
+      REAL               RCOND
+*     ..
+*     .. Array Arguments ..
+      REAL               BERR( * ), FERR( * ), RWORK( * ), S( * )
+      COMPLEX            AFP( * ), AP( * ), B( LDB, * ), WORK( * ),
+     $                   X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CPPSVX uses the Cholesky factorization A = U**H*U or A = L*L**H to
+*  compute the solution to a complex system of linear equations
+*     A * X = B,
+*  where A is an N-by-N Hermitian positive definite matrix stored in
+*  packed format and X and B are N-by-NRHS matrices.
+*
+*  Error bounds on the solution and a condition estimate are also
+*  provided.
+*
+*  Description
+*  ===========
+*
+*  The following steps are performed:
+*
+*  1. If FACT = 'E', real scaling factors are computed to equilibrate
+*     the system:
+*        diag(S) * A * diag(S) * inv(diag(S)) * X = diag(S) * B
+*     Whether or not the system will be equilibrated depends on the
+*     scaling of the matrix A, but if equilibration is used, A is
+*     overwritten by diag(S)*A*diag(S) and B by diag(S)*B.
+*
+*  2. If FACT = 'N' or 'E', the Cholesky decomposition is used to
+*     factor the matrix A (after equilibration if FACT = 'E') as
+*        A = U'* U ,  if UPLO = 'U', or
+*        A = L * L',  if UPLO = 'L',
+*     where U is an upper triangular matrix, L is a lower triangular
+*     matrix, and ' indicates conjugate transpose.
+*
+*  3. If the leading i-by-i principal minor is not positive definite,
+*     then the routine returns with INFO = i. Otherwise, the factored
+*     form of A is used to estimate the condition number of the matrix
+*     A.  If the reciprocal of the condition number is less than machine
+*     precision, INFO = N+1 is returned as a warning, but the routine
+*     still goes on to solve for X and compute error bounds as
+*     described below.
+*
+*  4. The system of equations is solved for X using the factored form
+*     of A.
+*
+*  5. Iterative refinement is applied to improve the computed solution
+*     matrix and calculate error bounds and backward error estimates
+*     for it.
+*
+*  6. If equilibration was used, the matrix X is premultiplied by
+*     diag(S) so that it solves the original system before
+*     equilibration.
+*
+*  Arguments
+*  =========
+*
+*  FACT    (input) CHARACTER*1
+*          Specifies whether or not the factored form of the matrix A is
+*          supplied on entry, and if not, whether the matrix A should be
+*          equilibrated before it is factored.
+*          = 'F':  On entry, AFP contains the factored form of A.
+*                  If EQUED = 'Y', the matrix A has been equilibrated
+*                  with scaling factors given by S.  AP and AFP will not
+*                  be modified.
+*          = 'N':  The matrix A will be copied to AFP and factored.
+*          = 'E':  The matrix A will be equilibrated if necessary, then
+*                  copied to AFP and factored.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The number of linear equations, i.e., the order of the
+*          matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrices B and X.  NRHS >= 0.
+*
+*  AP      (input/output) COMPLEX array, dimension (N*(N+1)/2)
+*          On entry, the upper or lower triangle of the Hermitian matrix
+*          A, packed columnwise in a linear array, except if FACT = 'F'
+*          and EQUED = 'Y', then A must contain the equilibrated matrix
+*          diag(S)*A*diag(S).  The j-th column of A is stored in the
+*          array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n.
+*          See below for further details.  A is not modified if
+*          FACT = 'F' or 'N', or if FACT = 'E' and EQUED = 'N' on exit.
+*
+*          On exit, if FACT = 'E' and EQUED = 'Y', A is overwritten by
+*          diag(S)*A*diag(S).
+*
+*  AFP     (input or output) COMPLEX array, dimension (N*(N+1)/2)
+*          If FACT = 'F', then AFP is an input argument and on entry
+*          contains the triangular factor U or L from the Cholesky
+*          factorization A = U**H*U or A = L*L**H, in the same storage
+*          format as A.  If EQUED .ne. 'N', then AFP is the factored
+*          form of the equilibrated matrix A.
+*
+*          If FACT = 'N', then AFP is an output argument and on exit
+*          returns the triangular factor U or L from the Cholesky
+*          factorization A = U**H*U or A = L*L**H of the original
+*          matrix A.
+*
+*          If FACT = 'E', then AFP is an output argument and on exit
+*          returns the triangular factor U or L from the Cholesky
+*          factorization A = U**H*U or A = L*L**H of the equilibrated
+*          matrix A (see the description of AP for the form of the
+*          equilibrated matrix).
+*
+*  EQUED   (input or output) CHARACTER*1
+*          Specifies the form of equilibration that was done.
+*          = 'N':  No equilibration (always true if FACT = 'N').
+*          = 'Y':  Equilibration was done, i.e., A has been replaced by
+*                  diag(S) * A * diag(S).
+*          EQUED is an input argument if FACT = 'F'; otherwise, it is an
+*          output argument.
+*
+*  S       (input or output) REAL array, dimension (N)
+*          The scale factors for A; not accessed if EQUED = 'N'.  S is
+*          an input argument if FACT = 'F'; otherwise, S is an output
+*          argument.  If FACT = 'F' and EQUED = 'Y', each element of S
+*          must be positive.
+*
+*  B       (input/output) COMPLEX array, dimension (LDB,NRHS)
+*          On entry, the N-by-NRHS right hand side matrix B.
+*          On exit, if EQUED = 'N', B is not modified; if EQUED = 'Y',
+*          B is overwritten by diag(S) * B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  X       (output) COMPLEX array, dimension (LDX,NRHS)
+*          If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X to
+*          the original system of equations.  Note that if EQUED = 'Y',
+*          A and B are modified on exit, and the solution to the
+*          equilibrated system is inv(diag(S))*X.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(1,N).
+*
+*  RCOND   (output) REAL
+*          The estimate of the reciprocal condition number of the matrix
+*          A after equilibration (if done).  If RCOND is less than the
+*          machine precision (in particular, if RCOND = 0), the matrix
+*          is singular to working precision.  This condition is
+*          indicated by a return code of INFO > 0.
+*
+*  FERR    (output) REAL array, dimension (NRHS)
+*          The estimated forward error bound for each solution vector
+*          X(j) (the j-th column of the solution matrix X).
+*          If XTRUE is the true solution corresponding to X(j), FERR(j)
+*          is an estimated upper bound for the magnitude of the largest
+*          element in (X(j) - XTRUE) divided by the magnitude of the
+*          largest element in X(j).  The estimate is as reliable as
+*          the estimate for RCOND, and is almost always a slight
+*          overestimate of the true error.
+*
+*  BERR    (output) REAL array, dimension (NRHS)
+*          The componentwise relative backward error of each solution
+*          vector X(j) (i.e., the smallest relative change in
+*          any element of A or B that makes X(j) an exact solution).
+*
+*  WORK    (workspace) COMPLEX array, dimension (2*N)
+*
+*  RWORK   (workspace) REAL array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, and i is
+*                <= N:  the leading minor of order i of A is
+*                       not positive definite, so the factorization
+*                       could not be completed, and the solution has not
+*                       been computed. RCOND = 0 is returned.
+*                = N+1: U is nonsingular, but RCOND is less than machine
+*                       precision, meaning that the matrix is singular
+*                       to working precision.  Nevertheless, the
+*                       solution and error bounds are computed because
+*                       there are a number of situations where the
+*                       computed solution can be more accurate than the
+*                       value of RCOND would suggest.
+*
+*  Further Details
+*  ===============
+*
+*  The packed storage scheme is illustrated by the following example
+*  when N = 4, UPLO = 'U':
+*
+*  Two-dimensional storage of the Hermitian matrix A:
+*
+*     a11 a12 a13 a14
+*         a22 a23 a24
+*             a33 a34     (aij = conjg(aji))
+*                 a44
+*
+*  Packed storage of the upper triangle of A:
+*
+*  AP = [ a11, a12, a22, a13, a23, a33, a14, a24, a34, a44 ]
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            EQUIL, NOFACT, RCEQU
+      INTEGER            I, INFEQU, J
+      REAL               AMAX, ANORM, BIGNUM, SCOND, SMAX, SMIN, SMLNUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               CLANHP, SLAMCH
+      EXTERNAL           LSAME, CLANHP, SLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CCOPY, CLACPY, CLAQHP, CPPCON, CPPEQU, CPPRFS,
+     $                   CPPTRF, CPPTRS, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+      NOFACT = LSAME( FACT, 'N' )
+      EQUIL = LSAME( FACT, 'E' )
+      IF( NOFACT .OR. EQUIL ) THEN
+         EQUED = 'N'
+         RCEQU = .FALSE.
+      ELSE
+         RCEQU = LSAME( EQUED, 'Y' )
+         SMLNUM = SLAMCH( 'Safe minimum' )
+         BIGNUM = ONE / SMLNUM
+      END IF
+*
+*     Test the input parameters.
+*
+      IF( .NOT.NOFACT .AND. .NOT.EQUIL .AND. .NOT.LSAME( FACT, 'F' ) )
+     $     THEN
+         INFO = -1
+      ELSE IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) )
+     $          THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LSAME( FACT, 'F' ) .AND. .NOT.
+     $         ( RCEQU .OR. LSAME( EQUED, 'N' ) ) ) THEN
+         INFO = -7
+      ELSE
+         IF( RCEQU ) THEN
+            SMIN = BIGNUM
+            SMAX = ZERO
+            DO 10 J = 1, N
+               SMIN = MIN( SMIN, S( J ) )
+               SMAX = MAX( SMAX, S( J ) )
+   10       CONTINUE
+            IF( SMIN.LE.ZERO ) THEN
+               INFO = -8
+            ELSE IF( N.GT.0 ) THEN
+               SCOND = MAX( SMIN, SMLNUM ) / MIN( SMAX, BIGNUM )
+            ELSE
+               SCOND = ONE
+            END IF
+         END IF
+         IF( INFO.EQ.0 ) THEN
+            IF( LDB.LT.MAX( 1, N ) ) THEN
+               INFO = -10
+            ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+               INFO = -12
+            END IF
+         END IF
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CPPSVX', -INFO )
+         RETURN
+      END IF
+*
+      IF( EQUIL ) THEN
+*
+*        Compute row and column scalings to equilibrate the matrix A.
+*
+         CALL CPPEQU( UPLO, N, AP, S, SCOND, AMAX, INFEQU )
+         IF( INFEQU.EQ.0 ) THEN
+*
+*           Equilibrate the matrix.
+*
+            CALL CLAQHP( UPLO, N, AP, S, SCOND, AMAX, EQUED )
+            RCEQU = LSAME( EQUED, 'Y' )
+         END IF
+      END IF
+*
+*     Scale the right-hand side.
+*
+      IF( RCEQU ) THEN
+         DO 30 J = 1, NRHS
+            DO 20 I = 1, N
+               B( I, J ) = S( I )*B( I, J )
+   20       CONTINUE
+   30    CONTINUE
+      END IF
+*
+      IF( NOFACT .OR. EQUIL ) THEN
+*
+*        Compute the Cholesky factorization A = U'*U or A = L*L'.
+*
+         CALL CCOPY( N*( N+1 ) / 2, AP, 1, AFP, 1 )
+         CALL CPPTRF( UPLO, N, AFP, INFO )
+*
+*        Return if INFO is non-zero.
+*
+         IF( INFO.NE.0 ) THEN
+            IF( INFO.GT.0 )
+     $         RCOND = ZERO
+            RETURN
+         END IF
+      END IF
+*
+*     Compute the norm of the matrix A.
+*
+      ANORM = CLANHP( 'I', UPLO, N, AP, RWORK )
+*
+*     Compute the reciprocal of the condition number of A.
+*
+      CALL CPPCON( UPLO, N, AFP, ANORM, RCOND, WORK, RWORK, INFO )
+*
+*     Set INFO = N+1 if the matrix is singular to working precision.
+*
+      IF( RCOND.LT.SLAMCH( 'Epsilon' ) )
+     $   INFO = N + 1
+*
+*     Compute the solution matrix X.
+*
+      CALL CLACPY( 'Full', N, NRHS, B, LDB, X, LDX )
+      CALL CPPTRS( UPLO, N, NRHS, AFP, X, LDX, INFO )
+*
+*     Use iterative refinement to improve the computed solution and
+*     compute error bounds and backward error estimates for it.
+*
+      CALL CPPRFS( UPLO, N, NRHS, AP, AFP, B, LDB, X, LDX, FERR, BERR,
+     $             WORK, RWORK, INFO )
+*
+*     Transform the solution matrix X to a solution of the original
+*     system.
+*
+      IF( RCEQU ) THEN
+         DO 50 J = 1, NRHS
+            DO 40 I = 1, N
+               X( I, J ) = S( I )*X( I, J )
+   40       CONTINUE
+   50    CONTINUE
+         DO 60 J = 1, NRHS
+            FERR( J ) = FERR( J ) / SCOND
+   60    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of CPPSVX
+*
+      END
diff --git a/costa/native/external/lapack/cpptrf.f b/costa/native/external/lapack/cpptrf.f
new file mode 100644
index 000000000..a7ced5934
--- /dev/null
+++ b/costa/native/external/lapack/cpptrf.f
@@ -0,0 +1,179 @@
+      SUBROUTINE CPPTRF( UPLO, N, AP, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            AP( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CPPTRF computes the Cholesky factorization of a complex Hermitian
+*  positive definite matrix A stored in packed format.
+*
+*  The factorization has the form
+*     A = U**H * U,  if UPLO = 'U', or
+*     A = L  * L**H,  if UPLO = 'L',
+*  where U is an upper triangular matrix and L is lower triangular.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  AP      (input/output) COMPLEX array, dimension (N*(N+1)/2)
+*          On entry, the upper or lower triangle of the Hermitian matrix
+*          A, packed columnwise in a linear array.  The j-th column of A
+*          is stored in the array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n.
+*          See below for further details.
+*
+*          On exit, if INFO = 0, the triangular factor U or L from the
+*          Cholesky factorization A = U**H*U or A = L*L**H, in the same
+*          storage format as A.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, the leading minor of order i is not
+*                positive definite, and the factorization could not be
+*                completed.
+*
+*  Further Details
+*  ===============
+*
+*  The packed storage scheme is illustrated by the following example
+*  when N = 4, UPLO = 'U':
+*
+*  Two-dimensional storage of the Hermitian matrix A:
+*
+*     a11 a12 a13 a14
+*         a22 a23 a24
+*             a33 a34     (aij = conjg(aji))
+*                 a44
+*
+*  Packed storage of the upper triangle of A:
+*
+*  AP = [ a11, a12, a22, a13, a23, a33, a14, a24, a34, a44 ]
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            J, JC, JJ
+      REAL               AJJ
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      COMPLEX            CDOTC
+      EXTERNAL           LSAME, CDOTC
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CHPR, CSSCAL, CTPSV, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          REAL, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CPPTRF', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      IF( UPPER ) THEN
+*
+*        Compute the Cholesky factorization A = U'*U.
+*
+         JJ = 0
+         DO 10 J = 1, N
+            JC = JJ + 1
+            JJ = JJ + J
+*
+*           Compute elements 1:J-1 of column J.
+*
+            IF( J.GT.1 )
+     $         CALL CTPSV( 'Upper', 'Conjugate transpose', 'Non-unit',
+     $                     J-1, AP, AP( JC ), 1 )
+*
+*           Compute U(J,J) and test for non-positive-definiteness.
+*
+            AJJ = REAL( AP( JJ ) ) - CDOTC( J-1, AP( JC ), 1, AP( JC ),
+     $            1 )
+            IF( AJJ.LE.ZERO ) THEN
+               AP( JJ ) = AJJ
+               GO TO 30
+            END IF
+            AP( JJ ) = SQRT( AJJ )
+   10    CONTINUE
+      ELSE
+*
+*        Compute the Cholesky factorization A = L*L'.
+*
+         JJ = 1
+         DO 20 J = 1, N
+*
+*           Compute L(J,J) and test for non-positive-definiteness.
+*
+            AJJ = REAL( AP( JJ ) )
+            IF( AJJ.LE.ZERO ) THEN
+               AP( JJ ) = AJJ
+               GO TO 30
+            END IF
+            AJJ = SQRT( AJJ )
+            AP( JJ ) = AJJ
+*
+*           Compute elements J+1:N of column J and update the trailing
+*           submatrix.
+*
+            IF( J.LT.N ) THEN
+               CALL CSSCAL( N-J, ONE / AJJ, AP( JJ+1 ), 1 )
+               CALL CHPR( 'Lower', N-J, -ONE, AP( JJ+1 ), 1,
+     $                    AP( JJ+N-J+1 ) )
+               JJ = JJ + N - J + 1
+            END IF
+   20    CONTINUE
+      END IF
+      GO TO 40
+*
+   30 CONTINUE
+      INFO = J
+*
+   40 CONTINUE
+      RETURN
+*
+*     End of CPPTRF
+*
+      END
diff --git a/costa/native/external/lapack/cpptri.f b/costa/native/external/lapack/cpptri.f
new file mode 100644
index 000000000..f0e1d6151
--- /dev/null
+++ b/costa/native/external/lapack/cpptri.f
@@ -0,0 +1,131 @@
+      SUBROUTINE CPPTRI( UPLO, N, AP, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993 
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            AP( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CPPTRI computes the inverse of a complex Hermitian positive definite
+*  matrix A using the Cholesky factorization A = U**H*U or A = L*L**H
+*  computed by CPPTRF.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangular factor is stored in AP;
+*          = 'L':  Lower triangular factor is stored in AP.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  AP      (input/output) COMPLEX array, dimension (N*(N+1)/2)
+*          On entry, the triangular factor U or L from the Cholesky
+*          factorization A = U**H*U or A = L*L**H, packed columnwise as
+*          a linear array.  The j-th column of U or L is stored in the
+*          array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = U(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = L(i,j) for j<=i<=n.
+*
+*          On exit, the upper or lower triangle of the (Hermitian)
+*          inverse of A, overwriting the input factor U or L.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, the (i,i) element of the factor U or L is
+*                zero, and the inverse could not be computed.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE
+      PARAMETER          ( ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            J, JC, JJ, JJN
+      REAL               AJJ
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      COMPLEX            CDOTC
+      EXTERNAL           LSAME, CDOTC
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CHPR, CSSCAL, CTPMV, CTPTRI, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          REAL
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CPPTRI', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Invert the triangular Cholesky factor U or L.
+*
+      CALL CTPTRI( UPLO, 'Non-unit', N, AP, INFO )
+      IF( INFO.GT.0 )
+     $   RETURN
+      IF( UPPER ) THEN
+*
+*        Compute the product inv(U) * inv(U)'.
+*
+         JJ = 0
+         DO 10 J = 1, N
+            JC = JJ + 1
+            JJ = JJ + J
+            IF( J.GT.1 )
+     $         CALL CHPR( 'Upper', J-1, ONE, AP( JC ), 1, AP )
+            AJJ = AP( JJ )
+            CALL CSSCAL( J, AJJ, AP( JC ), 1 )
+   10    CONTINUE
+*
+      ELSE
+*
+*        Compute the product inv(L)' * inv(L).
+*
+         JJ = 1
+         DO 20 J = 1, N
+            JJN = JJ + N - J + 1
+            AP( JJ ) = REAL( CDOTC( N-J+1, AP( JJ ), 1, AP( JJ ), 1 ) )
+            IF( J.LT.N )
+     $         CALL CTPMV( 'Lower', 'Conjugate transpose', 'Non-unit',
+     $                     N-J, AP( JJN ), AP( JJ+1 ), 1 )
+            JJ = JJN
+   20    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of CPPTRI
+*
+      END
diff --git a/costa/native/external/lapack/cpptrs.f b/costa/native/external/lapack/cpptrs.f
new file mode 100644
index 000000000..d82999c27
--- /dev/null
+++ b/costa/native/external/lapack/cpptrs.f
@@ -0,0 +1,135 @@
+      SUBROUTINE CPPTRS( UPLO, N, NRHS, AP, B, LDB, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDB, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            AP( * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CPPTRS solves a system of linear equations A*X = B with a Hermitian
+*  positive definite matrix A in packed storage using the Cholesky
+*  factorization A = U**H*U or A = L*L**H computed by CPPTRF.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  AP      (input) COMPLEX array, dimension (N*(N+1)/2)
+*          The triangular factor U or L from the Cholesky factorization
+*          A = U**H*U or A = L*L**H, packed columnwise in a linear
+*          array.  The j-th column of U or L is stored in the array AP
+*          as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = U(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = L(i,j) for j<=i<=n.
+*
+*  B       (input/output) COMPLEX array, dimension (LDB,NRHS)
+*          On entry, the right hand side matrix B.
+*          On exit, the solution matrix X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            I
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CTPSV, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -6
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CPPTRS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 )
+     $   RETURN
+*
+      IF( UPPER ) THEN
+*
+*        Solve A*X = B where A = U'*U.
+*
+         DO 10 I = 1, NRHS
+*
+*           Solve U'*X = B, overwriting B with X.
+*
+            CALL CTPSV( 'Upper', 'Conjugate transpose', 'Non-unit', N,
+     $                  AP, B( 1, I ), 1 )
+*
+*           Solve U*X = B, overwriting B with X.
+*
+            CALL CTPSV( 'Upper', 'No transpose', 'Non-unit', N, AP,
+     $                  B( 1, I ), 1 )
+   10    CONTINUE
+      ELSE
+*
+*        Solve A*X = B where A = L*L'.
+*
+         DO 20 I = 1, NRHS
+*
+*           Solve L*Y = B, overwriting B with X.
+*
+            CALL CTPSV( 'Lower', 'No transpose', 'Non-unit', N, AP,
+     $                  B( 1, I ), 1 )
+*
+*           Solve L'*X = Y, overwriting B with X.
+*
+            CALL CTPSV( 'Lower', 'Conjugate transpose', 'Non-unit', N,
+     $                  AP, B( 1, I ), 1 )
+   20    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of CPPTRS
+*
+      END
diff --git a/costa/native/external/lapack/cptcon.f b/costa/native/external/lapack/cptcon.f
new file mode 100644
index 000000000..1b2cd5095
--- /dev/null
+++ b/costa/native/external/lapack/cptcon.f
@@ -0,0 +1,151 @@
+      SUBROUTINE CPTCON( N, D, E, ANORM, RCOND, RWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, N
+      REAL               ANORM, RCOND
+*     ..
+*     .. Array Arguments ..
+      REAL               D( * ), RWORK( * )
+      COMPLEX            E( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CPTCON computes the reciprocal of the condition number (in the
+*  1-norm) of a complex Hermitian positive definite tridiagonal matrix
+*  using the factorization A = L*D*L**H or A = U**H*D*U computed by
+*  CPTTRF.
+*
+*  Norm(inv(A)) is computed by a direct method, and the reciprocal of
+*  the condition number is computed as
+*                   RCOND = 1 / (ANORM * norm(inv(A))).
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  D       (input) REAL array, dimension (N)
+*          The n diagonal elements of the diagonal matrix D from the
+*          factorization of A, as computed by CPTTRF.
+*
+*  E       (input) COMPLEX array, dimension (N-1)
+*          The (n-1) off-diagonal elements of the unit bidiagonal factor
+*          U or L from the factorization of A, as computed by CPTTRF.
+*
+*  ANORM   (input) REAL
+*          The 1-norm of the original matrix A.
+*
+*  RCOND   (output) REAL
+*          The reciprocal of the condition number of the matrix A,
+*          computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is the
+*          1-norm of inv(A) computed in this routine.
+*
+*  RWORK   (workspace) REAL array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Further Details
+*  ===============
+*
+*  The method used is described in Nicholas J. Higham, "Efficient
+*  Algorithms for Computing the Condition Number of a Tridiagonal
+*  Matrix", SIAM J. Sci. Stat. Comput., Vol. 7, No. 1, January 1986.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, IX
+      REAL               AINVNM
+*     ..
+*     .. External Functions ..
+      INTEGER            ISAMAX
+      EXTERNAL           ISAMAX
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments.
+*
+      INFO = 0
+      IF( N.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( ANORM.LT.ZERO ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CPTCON', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      RCOND = ZERO
+      IF( N.EQ.0 ) THEN
+         RCOND = ONE
+         RETURN
+      ELSE IF( ANORM.EQ.ZERO ) THEN
+         RETURN
+      END IF
+*
+*     Check that D(1:N) is positive.
+*
+      DO 10 I = 1, N
+         IF( D( I ).LE.ZERO )
+     $      RETURN
+   10 CONTINUE
+*
+*     Solve M(A) * x = e, where M(A) = (m(i,j)) is given by
+*
+*        m(i,j) =  abs(A(i,j)), i = j,
+*        m(i,j) = -abs(A(i,j)), i .ne. j,
+*
+*     and e = [ 1, 1, ..., 1 ]'.  Note M(A) = M(L)*D*M(L)'.
+*
+*     Solve M(L) * x = e.
+*
+      RWORK( 1 ) = ONE
+      DO 20 I = 2, N
+         RWORK( I ) = ONE + RWORK( I-1 )*ABS( E( I-1 ) )
+   20 CONTINUE
+*
+*     Solve D * M(L)' * x = b.
+*
+      RWORK( N ) = RWORK( N ) / D( N )
+      DO 30 I = N - 1, 1, -1
+         RWORK( I ) = RWORK( I ) / D( I ) + RWORK( I+1 )*ABS( E( I ) )
+   30 CONTINUE
+*
+*     Compute AINVNM = max(x(i)), 1<=i<=n.
+*
+      IX = ISAMAX( N, RWORK, 1 )
+      AINVNM = ABS( RWORK( IX ) )
+*
+*     Compute the reciprocal condition number.
+*
+      IF( AINVNM.NE.ZERO )
+     $   RCOND = ( ONE / AINVNM ) / ANORM
+*
+      RETURN
+*
+*     End of CPTCON
+*
+      END
diff --git a/costa/native/external/lapack/cpteqr.f b/costa/native/external/lapack/cpteqr.f
new file mode 100644
index 000000000..ff28815a1
--- /dev/null
+++ b/costa/native/external/lapack/cpteqr.f
@@ -0,0 +1,191 @@
+      SUBROUTINE CPTEQR( COMPZ, N, D, E, Z, LDZ, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          COMPZ
+      INTEGER            INFO, LDZ, N
+*     ..
+*     .. Array Arguments ..
+      REAL               D( * ), E( * ), WORK( * )
+      COMPLEX            Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CPTEQR computes all eigenvalues and, optionally, eigenvectors of a
+*  symmetric positive definite tridiagonal matrix by first factoring the
+*  matrix using SPTTRF and then calling CBDSQR to compute the singular
+*  values of the bidiagonal factor.
+*
+*  This routine computes the eigenvalues of the positive definite
+*  tridiagonal matrix to high relative accuracy.  This means that if the
+*  eigenvalues range over many orders of magnitude in size, then the
+*  small eigenvalues and corresponding eigenvectors will be computed
+*  more accurately than, for example, with the standard QR method.
+*
+*  The eigenvectors of a full or band positive definite Hermitian matrix
+*  can also be found if CHETRD, CHPTRD, or CHBTRD has been used to
+*  reduce this matrix to tridiagonal form.  (The reduction to
+*  tridiagonal form, however, may preclude the possibility of obtaining
+*  high relative accuracy in the small eigenvalues of the original
+*  matrix, if these eigenvalues range over many orders of magnitude.)
+*
+*  Arguments
+*  =========
+*
+*  COMPZ   (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only.
+*          = 'V':  Compute eigenvectors of original Hermitian
+*                  matrix also.  Array Z contains the unitary matrix
+*                  used to reduce the original matrix to tridiagonal
+*                  form.
+*          = 'I':  Compute eigenvectors of tridiagonal matrix also.
+*
+*  N       (input) INTEGER
+*          The order of the matrix.  N >= 0.
+*
+*  D       (input/output) REAL array, dimension (N)
+*          On entry, the n diagonal elements of the tridiagonal matrix.
+*          On normal exit, D contains the eigenvalues, in descending
+*          order.
+*
+*  E       (input/output) REAL array, dimension (N-1)
+*          On entry, the (n-1) subdiagonal elements of the tridiagonal
+*          matrix.
+*          On exit, E has been destroyed.
+*
+*  Z       (input/output) COMPLEX array, dimension (LDZ, N)
+*          On entry, if COMPZ = 'V', the unitary matrix used in the
+*          reduction to tridiagonal form.
+*          On exit, if COMPZ = 'V', the orthonormal eigenvectors of the
+*          original Hermitian matrix;
+*          if COMPZ = 'I', the orthonormal eigenvectors of the
+*          tridiagonal matrix.
+*          If INFO > 0 on exit, Z contains the eigenvectors associated
+*          with only the stored eigenvalues.
+*          If  COMPZ = 'N', then Z is not referenced.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= 1, and if
+*          COMPZ = 'V' or 'I', LDZ >= max(1,N).
+*
+*  WORK    (workspace) REAL array, dimension (4*N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          > 0:  if INFO = i, and i is:
+*                <= N  the Cholesky factorization of the matrix could
+*                      not be performed because the i-th principal minor
+*                      was not positive definite.
+*                > N   the SVD algorithm failed to converge;
+*                      if INFO = N+i, i off-diagonal elements of the
+*                      bidiagonal factor did not converge to zero.
+*
+*  ====================================================================
+*
+*     .. Parameters ..
+      COMPLEX            CZERO, CONE
+      PARAMETER          ( CZERO = ( 0.0E+0, 0.0E+0 ),
+     $                   CONE = ( 1.0E+0, 0.0E+0 ) )
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CBDSQR, CLASET, SPTTRF, XERBLA
+*     ..
+*     .. Local Arrays ..
+      COMPLEX            C( 1, 1 ), VT( 1, 1 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, ICOMPZ, NRU
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+*
+      IF( LSAME( COMPZ, 'N' ) ) THEN
+         ICOMPZ = 0
+      ELSE IF( LSAME( COMPZ, 'V' ) ) THEN
+         ICOMPZ = 1
+      ELSE IF( LSAME( COMPZ, 'I' ) ) THEN
+         ICOMPZ = 2
+      ELSE
+         ICOMPZ = -1
+      END IF
+      IF( ICOMPZ.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( ( LDZ.LT.1 ) .OR. ( ICOMPZ.GT.0 .AND. LDZ.LT.MAX( 1,
+     $         N ) ) ) THEN
+         INFO = -6
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CPTEQR', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      IF( N.EQ.1 ) THEN
+         IF( ICOMPZ.GT.0 )
+     $      Z( 1, 1 ) = CONE
+         RETURN
+      END IF
+      IF( ICOMPZ.EQ.2 )
+     $   CALL CLASET( 'Full', N, N, CZERO, CONE, Z, LDZ )
+*
+*     Call SPTTRF to factor the matrix.
+*
+      CALL SPTTRF( N, D, E, INFO )
+      IF( INFO.NE.0 )
+     $   RETURN
+      DO 10 I = 1, N
+         D( I ) = SQRT( D( I ) )
+   10 CONTINUE
+      DO 20 I = 1, N - 1
+         E( I ) = E( I )*D( I )
+   20 CONTINUE
+*
+*     Call CBDSQR to compute the singular values/vectors of the
+*     bidiagonal factor.
+*
+      IF( ICOMPZ.GT.0 ) THEN
+         NRU = N
+      ELSE
+         NRU = 0
+      END IF
+      CALL CBDSQR( 'Lower', N, 0, NRU, 0, D, E, VT, 1, Z, LDZ, C, 1,
+     $             WORK, INFO )
+*
+*     Square the singular values.
+*
+      IF( INFO.EQ.0 ) THEN
+         DO 30 I = 1, N
+            D( I ) = D( I )*D( I )
+   30    CONTINUE
+      ELSE
+         INFO = N + INFO
+      END IF
+*
+      RETURN
+*
+*     End of CPTEQR
+*
+      END
diff --git a/costa/native/external/lapack/cptrfs.f b/costa/native/external/lapack/cptrfs.f
new file mode 100644
index 000000000..8dc5df10c
--- /dev/null
+++ b/costa/native/external/lapack/cptrfs.f
@@ -0,0 +1,367 @@
+      SUBROUTINE CPTRFS( UPLO, N, NRHS, D, E, DF, EF, B, LDB, X, LDX,
+     $                   FERR, BERR, WORK, RWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDB, LDX, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      REAL               BERR( * ), D( * ), DF( * ), FERR( * ),
+     $                   RWORK( * )
+      COMPLEX            B( LDB, * ), E( * ), EF( * ), WORK( * ),
+     $                   X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CPTRFS improves the computed solution to a system of linear
+*  equations when the coefficient matrix is Hermitian positive definite
+*  and tridiagonal, and provides error bounds and backward error
+*  estimates for the solution.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the superdiagonal or the subdiagonal of the
+*          tridiagonal matrix A is stored and the form of the
+*          factorization:
+*          = 'U':  E is the superdiagonal of A, and A = U**H*D*U;
+*          = 'L':  E is the subdiagonal of A, and A = L*D*L**H.
+*          (The two forms are equivalent if A is real.)
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  D       (input) REAL array, dimension (N)
+*          The n real diagonal elements of the tridiagonal matrix A.
+*
+*  E       (input) COMPLEX array, dimension (N-1)
+*          The (n-1) off-diagonal elements of the tridiagonal matrix A
+*          (see UPLO).
+*
+*  DF      (input) REAL array, dimension (N)
+*          The n diagonal elements of the diagonal matrix D from
+*          the factorization computed by CPTTRF.
+*
+*  EF      (input) COMPLEX array, dimension (N-1)
+*          The (n-1) off-diagonal elements of the unit bidiagonal
+*          factor U or L from the factorization computed by CPTTRF
+*          (see UPLO).
+*
+*  B       (input) COMPLEX array, dimension (LDB,NRHS)
+*          The right hand side matrix B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  X       (input/output) COMPLEX array, dimension (LDX,NRHS)
+*          On entry, the solution matrix X, as computed by CPTTRS.
+*          On exit, the improved solution matrix X.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(1,N).
+*
+*  FERR    (output) REAL array, dimension (NRHS)
+*          The forward error bound for each solution vector
+*          X(j) (the j-th column of the solution matrix X).
+*          If XTRUE is the true solution corresponding to X(j), FERR(j)
+*          is an estimated upper bound for the magnitude of the largest
+*          element in (X(j) - XTRUE) divided by the magnitude of the
+*          largest element in X(j).
+*
+*  BERR    (output) REAL array, dimension (NRHS)
+*          The componentwise relative backward error of each solution
+*          vector X(j) (i.e., the smallest relative change in
+*          any element of A or B that makes X(j) an exact solution).
+*
+*  WORK    (workspace) COMPLEX array, dimension (N)
+*
+*  RWORK   (workspace) REAL array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Internal Parameters
+*  ===================
+*
+*  ITMAX is the maximum number of steps of iterative refinement.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            ITMAX
+      PARAMETER          ( ITMAX = 5 )
+      REAL               ZERO
+      PARAMETER          ( ZERO = 0.0E+0 )
+      REAL               ONE
+      PARAMETER          ( ONE = 1.0E+0 )
+      REAL               TWO
+      PARAMETER          ( TWO = 2.0E+0 )
+      REAL               THREE
+      PARAMETER          ( THREE = 3.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            COUNT, I, IX, J, NZ
+      REAL               EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN
+      COMPLEX            BI, CX, DX, EX, ZDUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ISAMAX
+      REAL               SLAMCH
+      EXTERNAL           LSAME, ISAMAX, SLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CAXPY, CPTTRS, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, AIMAG, CMPLX, CONJG, MAX, REAL
+*     ..
+*     .. Statement Functions ..
+      REAL               CABS1
+*     ..
+*     .. Statement Function definitions ..
+      CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -9
+      ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+         INFO = -11
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CPTRFS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN
+         DO 10 J = 1, NRHS
+            FERR( J ) = ZERO
+            BERR( J ) = ZERO
+   10    CONTINUE
+         RETURN
+      END IF
+*
+*     NZ = maximum number of nonzero elements in each row of A, plus 1
+*
+      NZ = 4
+      EPS = SLAMCH( 'Epsilon' )
+      SAFMIN = SLAMCH( 'Safe minimum' )
+      SAFE1 = NZ*SAFMIN
+      SAFE2 = SAFE1 / EPS
+*
+*     Do for each right hand side
+*
+      DO 100 J = 1, NRHS
+*
+         COUNT = 1
+         LSTRES = THREE
+   20    CONTINUE
+*
+*        Loop until stopping criterion is satisfied.
+*
+*        Compute residual R = B - A * X.  Also compute
+*        abs(A)*abs(x) + abs(b) for use in the backward error bound.
+*
+         IF( UPPER ) THEN
+            IF( N.EQ.1 ) THEN
+               BI = B( 1, J )
+               DX = D( 1 )*X( 1, J )
+               WORK( 1 ) = BI - DX
+               RWORK( 1 ) = CABS1( BI ) + CABS1( DX )
+            ELSE
+               BI = B( 1, J )
+               DX = D( 1 )*X( 1, J )
+               EX = E( 1 )*X( 2, J )
+               WORK( 1 ) = BI - DX - EX
+               RWORK( 1 ) = CABS1( BI ) + CABS1( DX ) +
+     $                      CABS1( E( 1 ) )*CABS1( X( 2, J ) )
+               DO 30 I = 2, N - 1
+                  BI = B( I, J )
+                  CX = CONJG( E( I-1 ) )*X( I-1, J )
+                  DX = D( I )*X( I, J )
+                  EX = E( I )*X( I+1, J )
+                  WORK( I ) = BI - CX - DX - EX
+                  RWORK( I ) = CABS1( BI ) +
+     $                         CABS1( E( I-1 ) )*CABS1( X( I-1, J ) ) +
+     $                         CABS1( DX ) + CABS1( E( I ) )*
+     $                         CABS1( X( I+1, J ) )
+   30          CONTINUE
+               BI = B( N, J )
+               CX = CONJG( E( N-1 ) )*X( N-1, J )
+               DX = D( N )*X( N, J )
+               WORK( N ) = BI - CX - DX
+               RWORK( N ) = CABS1( BI ) + CABS1( E( N-1 ) )*
+     $                      CABS1( X( N-1, J ) ) + CABS1( DX )
+            END IF
+         ELSE
+            IF( N.EQ.1 ) THEN
+               BI = B( 1, J )
+               DX = D( 1 )*X( 1, J )
+               WORK( 1 ) = BI - DX
+               RWORK( 1 ) = CABS1( BI ) + CABS1( DX )
+            ELSE
+               BI = B( 1, J )
+               DX = D( 1 )*X( 1, J )
+               EX = CONJG( E( 1 ) )*X( 2, J )
+               WORK( 1 ) = BI - DX - EX
+               RWORK( 1 ) = CABS1( BI ) + CABS1( DX ) +
+     $                      CABS1( E( 1 ) )*CABS1( X( 2, J ) )
+               DO 40 I = 2, N - 1
+                  BI = B( I, J )
+                  CX = E( I-1 )*X( I-1, J )
+                  DX = D( I )*X( I, J )
+                  EX = CONJG( E( I ) )*X( I+1, J )
+                  WORK( I ) = BI - CX - DX - EX
+                  RWORK( I ) = CABS1( BI ) +
+     $                         CABS1( E( I-1 ) )*CABS1( X( I-1, J ) ) +
+     $                         CABS1( DX ) + CABS1( E( I ) )*
+     $                         CABS1( X( I+1, J ) )
+   40          CONTINUE
+               BI = B( N, J )
+               CX = E( N-1 )*X( N-1, J )
+               DX = D( N )*X( N, J )
+               WORK( N ) = BI - CX - DX
+               RWORK( N ) = CABS1( BI ) + CABS1( E( N-1 ) )*
+     $                      CABS1( X( N-1, J ) ) + CABS1( DX )
+            END IF
+         END IF
+*
+*        Compute componentwise relative backward error from formula
+*
+*        max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) )
+*
+*        where abs(Z) is the componentwise absolute value of the matrix
+*        or vector Z.  If the i-th component of the denominator is less
+*        than SAFE2, then SAFE1 is added to the i-th components of the
+*        numerator and denominator before dividing.
+*
+         S = ZERO
+         DO 50 I = 1, N
+            IF( RWORK( I ).GT.SAFE2 ) THEN
+               S = MAX( S, CABS1( WORK( I ) ) / RWORK( I ) )
+            ELSE
+               S = MAX( S, ( CABS1( WORK( I ) )+SAFE1 ) /
+     $             ( RWORK( I )+SAFE1 ) )
+            END IF
+   50    CONTINUE
+         BERR( J ) = S
+*
+*        Test stopping criterion. Continue iterating if
+*           1) The residual BERR(J) is larger than machine epsilon, and
+*           2) BERR(J) decreased by at least a factor of 2 during the
+*              last iteration, and
+*           3) At most ITMAX iterations tried.
+*
+         IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND.
+     $       COUNT.LE.ITMAX ) THEN
+*
+*           Update solution and try again.
+*
+            CALL CPTTRS( UPLO, N, 1, DF, EF, WORK, N, INFO )
+            CALL CAXPY( N, CMPLX( ONE ), WORK, 1, X( 1, J ), 1 )
+            LSTRES = BERR( J )
+            COUNT = COUNT + 1
+            GO TO 20
+         END IF
+*
+*        Bound error from formula
+*
+*        norm(X - XTRUE) / norm(X) .le. FERR =
+*        norm( abs(inv(A))*
+*           ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X)
+*
+*        where
+*          norm(Z) is the magnitude of the largest component of Z
+*          inv(A) is the inverse of A
+*          abs(Z) is the componentwise absolute value of the matrix or
+*             vector Z
+*          NZ is the maximum number of nonzeros in any row of A, plus 1
+*          EPS is machine epsilon
+*
+*        The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B))
+*        is incremented by SAFE1 if the i-th component of
+*        abs(A)*abs(X) + abs(B) is less than SAFE2.
+*
+         DO 60 I = 1, N
+            IF( RWORK( I ).GT.SAFE2 ) THEN
+               RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I )
+            ELSE
+               RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) +
+     $                      SAFE1
+            END IF
+   60    CONTINUE
+         IX = ISAMAX( N, RWORK, 1 )
+         FERR( J ) = RWORK( IX )
+*
+*        Estimate the norm of inv(A).
+*
+*        Solve M(A) * x = e, where M(A) = (m(i,j)) is given by
+*
+*           m(i,j) =  abs(A(i,j)), i = j,
+*           m(i,j) = -abs(A(i,j)), i .ne. j,
+*
+*        and e = [ 1, 1, ..., 1 ]'.  Note M(A) = M(L)*D*M(L)'.
+*
+*        Solve M(L) * x = e.
+*
+         RWORK( 1 ) = ONE
+         DO 70 I = 2, N
+            RWORK( I ) = ONE + RWORK( I-1 )*ABS( EF( I-1 ) )
+   70    CONTINUE
+*
+*        Solve D * M(L)' * x = b.
+*
+         RWORK( N ) = RWORK( N ) / DF( N )
+         DO 80 I = N - 1, 1, -1
+            RWORK( I ) = RWORK( I ) / DF( I ) +
+     $                   RWORK( I+1 )*ABS( EF( I ) )
+   80    CONTINUE
+*
+*        Compute norm(inv(A)) = max(x(i)), 1<=i<=n.
+*
+         IX = ISAMAX( N, RWORK, 1 )
+         FERR( J ) = FERR( J )*ABS( RWORK( IX ) )
+*
+*        Normalize error.
+*
+         LSTRES = ZERO
+         DO 90 I = 1, N
+            LSTRES = MAX( LSTRES, ABS( X( I, J ) ) )
+   90    CONTINUE
+         IF( LSTRES.NE.ZERO )
+     $      FERR( J ) = FERR( J ) / LSTRES
+*
+  100 CONTINUE
+*
+      RETURN
+*
+*     End of CPTRFS
+*
+      END
diff --git a/costa/native/external/lapack/cptsv.f b/costa/native/external/lapack/cptsv.f
new file mode 100644
index 000000000..16172d0ea
--- /dev/null
+++ b/costa/native/external/lapack/cptsv.f
@@ -0,0 +1,101 @@
+      SUBROUTINE CPTSV( N, NRHS, D, E, B, LDB, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 25, 1997
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDB, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      REAL               D( * )
+      COMPLEX            B( LDB, * ), E( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CPTSV computes the solution to a complex system of linear equations
+*  A*X = B, where A is an N-by-N Hermitian positive definite tridiagonal
+*  matrix, and X and B are N-by-NRHS matrices.
+*
+*  A is factored as A = L*D*L**H, and the factored form of A is then
+*  used to solve the system of equations.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  D       (input/output) REAL array, dimension (N)
+*          On entry, the n diagonal elements of the tridiagonal matrix
+*          A.  On exit, the n diagonal elements of the diagonal matrix
+*          D from the factorization A = L*D*L**H.
+*
+*  E       (input/output) COMPLEX array, dimension (N-1)
+*          On entry, the (n-1) subdiagonal elements of the tridiagonal
+*          matrix A.  On exit, the (n-1) subdiagonal elements of the
+*          unit bidiagonal factor L from the L*D*L**H factorization of
+*          A.  E can also be regarded as the superdiagonal of the unit
+*          bidiagonal factor U from the U**H*D*U factorization of A.
+*
+*  B       (input/output) COMPLEX array, dimension (LDB,NRHS)
+*          On entry, the N-by-NRHS right hand side matrix B.
+*          On exit, if INFO = 0, the N-by-NRHS solution matrix X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, the leading minor of order i is not
+*                positive definite, and the solution has not been
+*                computed.  The factorization has not been completed
+*                unless i = N.
+*
+*  =====================================================================
+*
+*     .. External Subroutines ..
+      EXTERNAL           CPTTRF, CPTTRS, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( N.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -6
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CPTSV ', -INFO )
+         RETURN
+      END IF
+*
+*     Compute the L*D*L' (or U'*D*U) factorization of A.
+*
+      CALL CPTTRF( N, D, E, INFO )
+      IF( INFO.EQ.0 ) THEN
+*
+*        Solve the system A*X = B, overwriting B with X.
+*
+         CALL CPTTRS( 'Lower', N, NRHS, D, E, B, LDB, INFO )
+      END IF
+      RETURN
+*
+*     End of CPTSV
+*
+      END
diff --git a/costa/native/external/lapack/cptsvx.f b/costa/native/external/lapack/cptsvx.f
new file mode 100644
index 000000000..7ae483786
--- /dev/null
+++ b/costa/native/external/lapack/cptsvx.f
@@ -0,0 +1,238 @@
+      SUBROUTINE CPTSVX( FACT, N, NRHS, D, E, DF, EF, B, LDB, X, LDX,
+     $                   RCOND, FERR, BERR, WORK, RWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          FACT
+      INTEGER            INFO, LDB, LDX, N, NRHS
+      REAL               RCOND
+*     ..
+*     .. Array Arguments ..
+      REAL               BERR( * ), D( * ), DF( * ), FERR( * ),
+     $                   RWORK( * )
+      COMPLEX            B( LDB, * ), E( * ), EF( * ), WORK( * ),
+     $                   X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CPTSVX uses the factorization A = L*D*L**H to compute the solution
+*  to a complex system of linear equations A*X = B, where A is an
+*  N-by-N Hermitian positive definite tridiagonal matrix and X and B
+*  are N-by-NRHS matrices.
+*
+*  Error bounds on the solution and a condition estimate are also
+*  provided.
+*
+*  Description
+*  ===========
+*
+*  The following steps are performed:
+*
+*  1. If FACT = 'N', the matrix A is factored as A = L*D*L**H, where L
+*     is a unit lower bidiagonal matrix and D is diagonal.  The
+*     factorization can also be regarded as having the form
+*     A = U**H*D*U.
+*
+*  2. If the leading i-by-i principal minor is not positive definite,
+*     then the routine returns with INFO = i. Otherwise, the factored
+*     form of A is used to estimate the condition number of the matrix
+*     A.  If the reciprocal of the condition number is less than machine
+*     precision, INFO = N+1 is returned as a warning, but the routine
+*     still goes on to solve for X and compute error bounds as
+*     described below.
+*
+*  3. The system of equations is solved for X using the factored form
+*     of A.
+*
+*  4. Iterative refinement is applied to improve the computed solution
+*     matrix and calculate error bounds and backward error estimates
+*     for it.
+*
+*  Arguments
+*  =========
+*
+*  FACT    (input) CHARACTER*1
+*          Specifies whether or not the factored form of the matrix
+*          A is supplied on entry.
+*          = 'F':  On entry, DF and EF contain the factored form of A.
+*                  D, E, DF, and EF will not be modified.
+*          = 'N':  The matrix A will be copied to DF and EF and
+*                  factored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrices B and X.  NRHS >= 0.
+*
+*  D       (input) REAL array, dimension (N)
+*          The n diagonal elements of the tridiagonal matrix A.
+*
+*  E       (input) COMPLEX array, dimension (N-1)
+*          The (n-1) subdiagonal elements of the tridiagonal matrix A.
+*
+*  DF      (input or output) REAL array, dimension (N)
+*          If FACT = 'F', then DF is an input argument and on entry
+*          contains the n diagonal elements of the diagonal matrix D
+*          from the L*D*L**H factorization of A.
+*          If FACT = 'N', then DF is an output argument and on exit
+*          contains the n diagonal elements of the diagonal matrix D
+*          from the L*D*L**H factorization of A.
+*
+*  EF      (input or output) COMPLEX array, dimension (N-1)
+*          If FACT = 'F', then EF is an input argument and on entry
+*          contains the (n-1) subdiagonal elements of the unit
+*          bidiagonal factor L from the L*D*L**H factorization of A.
+*          If FACT = 'N', then EF is an output argument and on exit
+*          contains the (n-1) subdiagonal elements of the unit
+*          bidiagonal factor L from the L*D*L**H factorization of A.
+*
+*  B       (input) COMPLEX array, dimension (LDB,NRHS)
+*          The N-by-NRHS right hand side matrix B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  X       (output) COMPLEX array, dimension (LDX,NRHS)
+*          If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(1,N).
+*
+*  RCOND   (output) REAL
+*          The reciprocal condition number of the matrix A.  If RCOND
+*          is less than the machine precision (in particular, if
+*          RCOND = 0), the matrix is singular to working precision.
+*          This condition is indicated by a return code of INFO > 0.
+*
+*  FERR    (output) REAL array, dimension (NRHS)
+*          The forward error bound for each solution vector
+*          X(j) (the j-th column of the solution matrix X).
+*          If XTRUE is the true solution corresponding to X(j), FERR(j)
+*          is an estimated upper bound for the magnitude of the largest
+*          element in (X(j) - XTRUE) divided by the magnitude of the
+*          largest element in X(j).
+*
+*  BERR    (output) REAL array, dimension (NRHS)
+*          The componentwise relative backward error of each solution
+*          vector X(j) (i.e., the smallest relative change in any
+*          element of A or B that makes X(j) an exact solution).
+*
+*  WORK    (workspace) COMPLEX array, dimension (N)
+*
+*  RWORK   (workspace) REAL array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, and i is
+*                <= N:  the leading minor of order i of A is
+*                       not positive definite, so the factorization
+*                       could not be completed, and the solution has not
+*                       been computed. RCOND = 0 is returned.
+*                = N+1: U is nonsingular, but RCOND is less than machine
+*                       precision, meaning that the matrix is singular
+*                       to working precision.  Nevertheless, the
+*                       solution and error bounds are computed because
+*                       there are a number of situations where the
+*                       computed solution can be more accurate than the
+*                       value of RCOND would suggest.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO
+      PARAMETER          ( ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOFACT
+      REAL               ANORM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               CLANHT, SLAMCH
+      EXTERNAL           LSAME, CLANHT, SLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CCOPY, CLACPY, CPTCON, CPTRFS, CPTTRF, CPTTRS,
+     $                   SCOPY, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      NOFACT = LSAME( FACT, 'N' )
+      IF( .NOT.NOFACT .AND. .NOT.LSAME( FACT, 'F' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -9
+      ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+         INFO = -11
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CPTSVX', -INFO )
+         RETURN
+      END IF
+*
+      IF( NOFACT ) THEN
+*
+*        Compute the L*D*L' (or U'*D*U) factorization of A.
+*
+         CALL SCOPY( N, D, 1, DF, 1 )
+         IF( N.GT.1 )
+     $      CALL CCOPY( N-1, E, 1, EF, 1 )
+         CALL CPTTRF( N, DF, EF, INFO )
+*
+*        Return if INFO is non-zero.
+*
+         IF( INFO.NE.0 ) THEN
+            IF( INFO.GT.0 )
+     $         RCOND = ZERO
+            RETURN
+         END IF
+      END IF
+*
+*     Compute the norm of the matrix A.
+*
+      ANORM = CLANHT( '1', N, D, E )
+*
+*     Compute the reciprocal of the condition number of A.
+*
+      CALL CPTCON( N, DF, EF, ANORM, RCOND, RWORK, INFO )
+*
+*     Set INFO = N+1 if the matrix is singular to working precision.
+*
+      IF( RCOND.LT.SLAMCH( 'Epsilon' ) )
+     $   INFO = N + 1
+*
+*     Compute the solution vectors X.
+*
+      CALL CLACPY( 'Full', N, NRHS, B, LDB, X, LDX )
+      CALL CPTTRS( 'Lower', N, NRHS, DF, EF, X, LDX, INFO )
+*
+*     Use iterative refinement to improve the computed solutions and
+*     compute error bounds and backward error estimates for them.
+*
+      CALL CPTRFS( 'Lower', N, NRHS, D, E, DF, EF, B, LDB, X, LDX, FERR,
+     $             BERR, WORK, RWORK, INFO )
+*
+      RETURN
+*
+*     End of CPTSVX
+*
+      END
diff --git a/costa/native/external/lapack/cpttrf.f b/costa/native/external/lapack/cpttrf.f
new file mode 100644
index 000000000..ac451bd70
--- /dev/null
+++ b/costa/native/external/lapack/cpttrf.f
@@ -0,0 +1,169 @@
+      SUBROUTINE CPTTRF( N, D, E, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, N
+*     ..
+*     .. Array Arguments ..
+      REAL               D( * )
+      COMPLEX            E( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CPTTRF computes the L*D*L' factorization of a complex Hermitian
+*  positive definite tridiagonal matrix A.  The factorization may also
+*  be regarded as having the form A = U'*D*U.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  D       (input/output) REAL array, dimension (N)
+*          On entry, the n diagonal elements of the tridiagonal matrix
+*          A.  On exit, the n diagonal elements of the diagonal matrix
+*          D from the L*D*L' factorization of A.
+*
+*  E       (input/output) COMPLEX array, dimension (N-1)
+*          On entry, the (n-1) subdiagonal elements of the tridiagonal
+*          matrix A.  On exit, the (n-1) subdiagonal elements of the
+*          unit bidiagonal factor L from the L*D*L' factorization of A.
+*          E can also be regarded as the superdiagonal of the unit
+*          bidiagonal factor U from the U'*D*U factorization of A.
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -k, the k-th argument had an illegal value
+*          > 0: if INFO = k, the leading minor of order k is not
+*               positive definite; if k < N, the factorization could not
+*               be completed, while if k = N, the factorization was
+*               completed, but D(N) = 0.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO
+      PARAMETER          ( ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, I4
+      REAL               EII, EIR, F, G
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          AIMAG, CMPLX, REAL
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( N.LT.0 ) THEN
+         INFO = -1
+         CALL XERBLA( 'CPTTRF', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Compute the L*D*L' (or U'*D*U) factorization of A.
+*
+      I4 = MOD( N-1, 4 )
+      DO 10 I = 1, I4
+         IF( D( I ).LE.ZERO ) THEN
+            INFO = I
+            GO TO 20
+         END IF
+         EIR = REAL( E( I ) )
+         EII = AIMAG( E( I ) )
+         F = EIR / D( I )
+         G = EII / D( I )
+         E( I ) = CMPLX( F, G )
+         D( I+1 ) = D( I+1 ) - F*EIR - G*EII
+   10 CONTINUE
+*
+      DO 110 I = I4+1, N - 4, 4
+*
+*        Drop out of the loop if d(i) <= 0: the matrix is not positive
+*        definite.
+*
+         IF( D( I ).LE.ZERO ) THEN
+            INFO = I
+            GO TO 20
+         END IF
+*
+*        Solve for e(i) and d(i+1).
+*
+         EIR = REAL( E( I ) )
+         EII = AIMAG( E( I ) )
+         F = EIR / D( I )
+         G = EII / D( I )
+         E( I ) = CMPLX( F, G )
+         D( I+1 ) = D( I+1 ) - F*EIR - G*EII
+*
+         IF( D( I+1 ).LE.ZERO ) THEN
+            INFO = I+1
+            GO TO 20
+         END IF
+*
+*        Solve for e(i+1) and d(i+2).
+*
+         EIR = REAL( E( I+1 ) )
+         EII = AIMAG( E( I+1 ) )
+         F = EIR / D( I+1 )
+         G = EII / D( I+1 )
+         E( I+1 ) = CMPLX( F, G )
+         D( I+2 ) = D( I+2 ) - F*EIR - G*EII
+*
+         IF( D( I+2 ).LE.ZERO ) THEN
+            INFO = I+2
+            GO TO 20
+         END IF
+*
+*        Solve for e(i+2) and d(i+3).
+*
+         EIR = REAL( E( I+2 ) )
+         EII = AIMAG( E( I+2 ) )
+         F = EIR / D( I+2 )
+         G = EII / D( I+2 )
+         E( I+2 ) = CMPLX( F, G )
+         D( I+3 ) = D( I+3 ) - F*EIR - G*EII
+*
+         IF( D( I+3 ).LE.ZERO ) THEN
+            INFO = I+3
+            GO TO 20
+         END IF
+*
+*        Solve for e(i+3) and d(i+4).
+*
+         EIR = REAL( E( I+3 ) )
+         EII = AIMAG( E( I+3 ) )
+         F = EIR / D( I+3 )
+         G = EII / D( I+3 )
+         E( I+3 ) = CMPLX( F, G )
+         D( I+4 ) = D( I+4 ) - F*EIR - G*EII
+  110 CONTINUE
+*
+*     Check d(n) for positive definiteness.
+*
+      IF( D( N ).LE.ZERO )
+     $   INFO = N
+*
+   20 CONTINUE
+      RETURN
+*
+*     End of CPTTRF
+*
+      END
diff --git a/costa/native/external/lapack/cpttrs.f b/costa/native/external/lapack/cpttrs.f
new file mode 100644
index 000000000..2438caf1c
--- /dev/null
+++ b/costa/native/external/lapack/cpttrs.f
@@ -0,0 +1,136 @@
+      SUBROUTINE CPTTRS( UPLO, N, NRHS, D, E, B, LDB, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDB, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      REAL               D( * )
+      COMPLEX            B( LDB, * ), E( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CPTTRS solves a tridiagonal system of the form
+*     A * X = B
+*  using the factorization A = U'*D*U or A = L*D*L' computed by CPTTRF.
+*  D is a diagonal matrix specified in the vector D, U (or L) is a unit
+*  bidiagonal matrix whose superdiagonal (subdiagonal) is specified in
+*  the vector E, and X and B are N by NRHS matrices.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies the form of the factorization and whether the
+*          vector E is the superdiagonal of the upper bidiagonal factor
+*          U or the subdiagonal of the lower bidiagonal factor L.
+*          = 'U':  A = U'*D*U, E is the superdiagonal of U
+*          = 'L':  A = L*D*L', E is the subdiagonal of L
+*
+*  N       (input) INTEGER
+*          The order of the tridiagonal matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  D       (input) REAL array, dimension (N)
+*          The n diagonal elements of the diagonal matrix D from the
+*          factorization A = U'*D*U or A = L*D*L'.
+*
+*  E       (input) COMPLEX array, dimension (N-1)
+*          If UPLO = 'U', the (n-1) superdiagonal elements of the unit
+*          bidiagonal factor U from the factorization A = U'*D*U.
+*          If UPLO = 'L', the (n-1) subdiagonal elements of the unit
+*          bidiagonal factor L from the factorization A = L*D*L'.
+*
+*  B       (input/output) REAL array, dimension (LDB,NRHS)
+*          On entry, the right hand side vectors B for the system of
+*          linear equations.
+*          On exit, the solution vectors, X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -k, the k-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            IUPLO, J, JB, NB
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CPTTS2, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments.
+*
+      INFO = 0
+      UPPER = ( UPLO.EQ.'U' .OR. UPLO.EQ.'u' )
+      IF( .NOT.UPPER .AND. .NOT.( UPLO.EQ.'L' .OR. UPLO.EQ.'l' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CPTTRS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 )
+     $   RETURN
+*
+*     Determine the number of right-hand sides to solve at a time.
+*
+      IF( NRHS.EQ.1 ) THEN
+         NB = 1
+      ELSE
+         NB = MAX( 1, ILAENV( 1, 'CPTTRS', UPLO, N, NRHS, -1, -1 ) )
+      END IF
+*
+*     Decode UPLO
+*
+      IF( UPPER ) THEN
+         IUPLO = 1
+      ELSE
+         IUPLO = 0
+      END IF
+*
+      IF( NB.GE.NRHS ) THEN
+         CALL CPTTS2( IUPLO, N, NRHS, D, E, B, LDB )
+      ELSE
+         DO 10 J = 1, NRHS, NB
+            JB = MIN( NRHS-J+1, NB )
+            CALL CPTTS2( IUPLO, N, JB, D, E, B( 1, J ), LDB )
+   10    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of CPTTRS
+*
+      END
diff --git a/costa/native/external/lapack/cptts2.f b/costa/native/external/lapack/cptts2.f
new file mode 100644
index 000000000..00e731bdd
--- /dev/null
+++ b/costa/native/external/lapack/cptts2.f
@@ -0,0 +1,177 @@
+      SUBROUTINE CPTTS2( IUPLO, N, NRHS, D, E, B, LDB )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            IUPLO, LDB, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      REAL               D( * )
+      COMPLEX            B( LDB, * ), E( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CPTTS2 solves a tridiagonal system of the form
+*     A * X = B
+*  using the factorization A = U'*D*U or A = L*D*L' computed by CPTTRF.
+*  D is a diagonal matrix specified in the vector D, U (or L) is a unit
+*  bidiagonal matrix whose superdiagonal (subdiagonal) is specified in
+*  the vector E, and X and B are N by NRHS matrices.
+*
+*  Arguments
+*  =========
+*
+*  IUPLO   (input) INTEGER
+*          Specifies the form of the factorization and whether the
+*          vector E is the superdiagonal of the upper bidiagonal factor
+*          U or the subdiagonal of the lower bidiagonal factor L.
+*          = 1:  A = U'*D*U, E is the superdiagonal of U
+*          = 0:  A = L*D*L', E is the subdiagonal of L
+*
+*  N       (input) INTEGER
+*          The order of the tridiagonal matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  D       (input) REAL array, dimension (N)
+*          The n diagonal elements of the diagonal matrix D from the
+*          factorization A = U'*D*U or A = L*D*L'.
+*
+*  E       (input) COMPLEX array, dimension (N-1)
+*          If IUPLO = 1, the (n-1) superdiagonal elements of the unit
+*          bidiagonal factor U from the factorization A = U'*D*U.
+*          If IUPLO = 0, the (n-1) subdiagonal elements of the unit
+*          bidiagonal factor L from the factorization A = L*D*L'.
+*
+*  B       (input/output) REAL array, dimension (LDB,NRHS)
+*          On entry, the right hand side vectors B for the system of
+*          linear equations.
+*          On exit, the solution vectors, X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      INTEGER            I, J
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CSSCAL
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          CONJG
+*     ..
+*     .. Executable Statements ..
+*
+*     Quick return if possible
+*
+      IF( N.LE.1 ) THEN
+         IF( N.EQ.1 )
+     $      CALL CSSCAL( NRHS, 1. / D( 1 ), B, LDB )
+         RETURN
+      END IF
+*
+      IF( IUPLO.EQ.1 ) THEN
+*
+*        Solve A * X = B using the factorization A = U'*D*U,
+*        overwriting each right hand side vector with its solution.
+*
+         IF( NRHS.LE.2 ) THEN
+            J = 1
+    5       CONTINUE
+*
+*           Solve U' * x = b.
+*
+            DO 10 I = 2, N
+               B( I, J ) = B( I, J ) - B( I-1, J )*CONJG( E( I-1 ) )
+   10       CONTINUE
+*
+*           Solve D * U * x = b.
+*
+            DO 20 I = 1, N
+               B( I, J ) = B( I, J ) / D( I )
+   20       CONTINUE
+            DO 30 I = N - 1, 1, -1
+               B( I, J ) = B( I, J ) - B( I+1, J )*E( I )
+   30       CONTINUE
+            IF( J.LT.NRHS ) THEN
+               J = J + 1
+               GO TO 5
+            END IF
+         ELSE
+            DO 60 J = 1, NRHS
+*
+*              Solve U' * x = b.
+*
+               DO 40 I = 2, N
+                  B( I, J ) = B( I, J ) - B( I-1, J )*CONJG( E( I-1 ) )
+   40          CONTINUE
+*
+*              Solve D * U * x = b.
+*
+               B( N, J ) = B( N, J ) / D( N )
+               DO 50 I = N - 1, 1, -1
+                  B( I, J ) = B( I, J ) / D( I ) - B( I+1, J )*E( I )
+   50          CONTINUE
+   60       CONTINUE
+         END IF
+      ELSE
+*
+*        Solve A * X = B using the factorization A = L*D*L',
+*        overwriting each right hand side vector with its solution.
+*
+         IF( NRHS.LE.2 ) THEN
+            J = 1
+   65       CONTINUE
+*
+*           Solve L * x = b.
+*
+            DO 70 I = 2, N
+               B( I, J ) = B( I, J ) - B( I-1, J )*E( I-1 )
+   70       CONTINUE
+*
+*           Solve D * L' * x = b.
+*
+            DO 80 I = 1, N
+               B( I, J ) = B( I, J ) / D( I )
+   80       CONTINUE
+            DO 90 I = N - 1, 1, -1
+               B( I, J ) = B( I, J ) - B( I+1, J )*CONJG( E( I ) )
+   90       CONTINUE
+            IF( J.LT.NRHS ) THEN
+               J = J + 1
+               GO TO 65
+            END IF
+         ELSE
+            DO 120 J = 1, NRHS
+*
+*              Solve L * x = b.
+*
+               DO 100 I = 2, N
+                  B( I, J ) = B( I, J ) - B( I-1, J )*E( I-1 )
+  100          CONTINUE
+*
+*              Solve D * L' * x = b.
+*
+               B( N, J ) = B( N, J ) / D( N )
+               DO 110 I = N - 1, 1, -1
+                  B( I, J ) = B( I, J ) / D( I ) -
+     $                        B( I+1, J )*CONJG( E( I ) )
+  110          CONTINUE
+  120       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of CPTTS2
+*
+      END
diff --git a/costa/native/external/lapack/crot.f b/costa/native/external/lapack/crot.f
new file mode 100644
index 000000000..4c3b26dc4
--- /dev/null
+++ b/costa/native/external/lapack/crot.f
@@ -0,0 +1,92 @@
+      SUBROUTINE CROT( N, CX, INCX, CY, INCY, C, S )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      INTEGER            INCX, INCY, N
+      REAL               C
+      COMPLEX            S
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            CX( * ), CY( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CROT   applies a plane rotation, where the cos (C) is real and the
+*  sin (S) is complex, and the vectors CX and CY are complex.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The number of elements in the vectors CX and CY.
+*
+*  CX      (input/output) COMPLEX array, dimension (N)
+*          On input, the vector X.
+*          On output, CX is overwritten with C*X + S*Y.
+*
+*  INCX    (input) INTEGER
+*          The increment between successive values of CY.  INCX <> 0.
+*
+*  CY      (input/output) COMPLEX array, dimension (N)
+*          On input, the vector Y.
+*          On output, CY is overwritten with -CONJG(S)*X + C*Y.
+*
+*  INCY    (input) INTEGER
+*          The increment between successive values of CY.  INCX <> 0.
+*
+*  C       (input) REAL
+*  S       (input) COMPLEX
+*          C and S define a rotation
+*             [  C          S  ]
+*             [ -conjg(S)   C  ]
+*          where C*C + S*CONJG(S) = 1.0.
+*
+* =====================================================================
+*
+*     .. Local Scalars ..
+      INTEGER            I, IX, IY
+      COMPLEX            STEMP
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          CONJG
+*     ..
+*     .. Executable Statements ..
+*
+      IF( N.LE.0 )
+     $   RETURN
+      IF( INCX.EQ.1 .AND. INCY.EQ.1 )
+     $   GO TO 20
+*
+*     Code for unequal increments or equal increments not equal to 1
+*
+      IX = 1
+      IY = 1
+      IF( INCX.LT.0 )
+     $   IX = ( -N+1 )*INCX + 1
+      IF( INCY.LT.0 )
+     $   IY = ( -N+1 )*INCY + 1
+      DO 10 I = 1, N
+         STEMP = C*CX( IX ) + S*CY( IY )
+         CY( IY ) = C*CY( IY ) - CONJG( S )*CX( IX )
+         CX( IX ) = STEMP
+         IX = IX + INCX
+         IY = IY + INCY
+   10 CONTINUE
+      RETURN
+*
+*     Code for both increments equal to 1
+*
+   20 CONTINUE
+      DO 30 I = 1, N
+         STEMP = C*CX( I ) + S*CY( I )
+         CY( I ) = C*CY( I ) - CONJG( S )*CX( I )
+         CX( I ) = STEMP
+   30 CONTINUE
+      RETURN
+      END
diff --git a/costa/native/external/lapack/cspcon.f b/costa/native/external/lapack/cspcon.f
new file mode 100644
index 000000000..a9df7975a
--- /dev/null
+++ b/costa/native/external/lapack/cspcon.f
@@ -0,0 +1,155 @@
+      SUBROUTINE CSPCON( UPLO, N, AP, IPIV, ANORM, RCOND, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993 
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, N
+      REAL               ANORM, RCOND
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      COMPLEX            AP( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CSPCON estimates the reciprocal of the condition number (in the
+*  1-norm) of a complex symmetric packed matrix A using the
+*  factorization A = U*D*U**T or A = L*D*L**T computed by CSPTRF.
+*
+*  An estimate is obtained for norm(inv(A)), and the reciprocal of the
+*  condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))).
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the details of the factorization are stored
+*          as an upper or lower triangular matrix.
+*          = 'U':  Upper triangular, form is A = U*D*U**T;
+*          = 'L':  Lower triangular, form is A = L*D*L**T.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  AP      (input) COMPLEX array, dimension (N*(N+1)/2)
+*          The block diagonal matrix D and the multipliers used to
+*          obtain the factor U or L as computed by CSPTRF, stored as a
+*          packed triangular matrix.
+*
+*  IPIV    (input) INTEGER array, dimension (N)
+*          Details of the interchanges and the block structure of D
+*          as determined by CSPTRF.
+*
+*  ANORM   (input) REAL
+*          The 1-norm of the original matrix A.
+*
+*  RCOND   (output) REAL
+*          The reciprocal of the condition number of the matrix A,
+*          computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an
+*          estimate of the 1-norm of inv(A) computed in this routine.
+*
+*  WORK    (workspace) COMPLEX array, dimension (2*N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            I, IP, KASE
+      REAL               AINVNM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CLACON, CSPTRS, XERBLA
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( ANORM.LT.ZERO ) THEN
+         INFO = -5
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CSPCON', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      RCOND = ZERO
+      IF( N.EQ.0 ) THEN
+         RCOND = ONE
+         RETURN
+      ELSE IF( ANORM.LE.ZERO ) THEN
+         RETURN
+      END IF
+*
+*     Check that the diagonal matrix D is nonsingular.
+*
+      IF( UPPER ) THEN
+*
+*        Upper triangular storage: examine D from bottom to top
+*
+         IP = N*( N+1 ) / 2
+         DO 10 I = N, 1, -1
+            IF( IPIV( I ).GT.0 .AND. AP( IP ).EQ.ZERO )
+     $         RETURN
+            IP = IP - I
+   10    CONTINUE
+      ELSE
+*
+*        Lower triangular storage: examine D from top to bottom.
+*
+         IP = 1
+         DO 20 I = 1, N
+            IF( IPIV( I ).GT.0 .AND. AP( IP ).EQ.ZERO )
+     $         RETURN
+            IP = IP + N - I + 1
+   20    CONTINUE
+      END IF
+*
+*     Estimate the 1-norm of the inverse.
+*
+      KASE = 0
+   30 CONTINUE
+      CALL CLACON( N, WORK( N+1 ), WORK, AINVNM, KASE )
+      IF( KASE.NE.0 ) THEN
+*
+*        Multiply by inv(L*D*L') or inv(U*D*U').
+*
+         CALL CSPTRS( UPLO, N, 1, AP, IPIV, WORK, N, INFO )
+         GO TO 30
+      END IF
+*
+*     Compute the estimate of the reciprocal condition number.
+*
+      IF( AINVNM.NE.ZERO )
+     $   RCOND = ( ONE / AINVNM ) / ANORM
+*
+      RETURN
+*
+*     End of CSPCON
+*
+      END
diff --git a/costa/native/external/lapack/cspmv.f b/costa/native/external/lapack/cspmv.f
new file mode 100644
index 000000000..cf8d38865
--- /dev/null
+++ b/costa/native/external/lapack/cspmv.f
@@ -0,0 +1,265 @@
+      SUBROUTINE CSPMV( UPLO, N, ALPHA, AP, X, INCX, BETA, Y, INCY )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INCX, INCY, N
+      COMPLEX            ALPHA, BETA
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            AP( * ), X( * ), Y( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CSPMV  performs the matrix-vector operation
+*
+*     y := alpha*A*x + beta*y,
+*
+*  where alpha and beta are scalars, x and y are n element vectors and
+*  A is an n by n symmetric matrix, supplied in packed form.
+*
+*  Arguments
+*  ==========
+*
+*  UPLO   - CHARACTER*1
+*           On entry, UPLO specifies whether the upper or lower
+*           triangular part of the matrix A is supplied in the packed
+*           array AP as follows:
+*
+*              UPLO = 'U' or 'u'   The upper triangular part of A is
+*                                  supplied in AP.
+*
+*              UPLO = 'L' or 'l'   The lower triangular part of A is
+*                                  supplied in AP.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - COMPLEX
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  AP     - COMPLEX array, dimension at least
+*           ( ( N*( N + 1 ) )/2 ).
+*           Before entry, with UPLO = 'U' or 'u', the array AP must
+*           contain the upper triangular part of the symmetric matrix
+*           packed sequentially, column by column, so that AP( 1 )
+*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 )
+*           and a( 2, 2 ) respectively, and so on.
+*           Before entry, with UPLO = 'L' or 'l', the array AP must
+*           contain the lower triangular part of the symmetric matrix
+*           packed sequentially, column by column, so that AP( 1 )
+*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 )
+*           and a( 3, 1 ) respectively, and so on.
+*           Unchanged on exit.
+*
+*  X      - COMPLEX array, dimension at least
+*           ( 1 + ( N - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the N-
+*           element vector x.
+*           Unchanged on exit.
+*
+*  INCX   - INTEGER
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*  BETA   - COMPLEX
+*           On entry, BETA specifies the scalar beta. When BETA is
+*           supplied as zero then Y need not be set on input.
+*           Unchanged on exit.
+*
+*  Y      - COMPLEX array, dimension at least
+*           ( 1 + ( N - 1 )*abs( INCY ) ).
+*           Before entry, the incremented array Y must contain the n
+*           element vector y. On exit, Y is overwritten by the updated
+*           vector y.
+*
+*  INCY   - INTEGER
+*           On entry, INCY specifies the increment for the elements of
+*           Y. INCY must not be zero.
+*           Unchanged on exit.
+*
+* =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX            ONE
+      PARAMETER          ( ONE = ( 1.0E+0, 0.0E+0 ) )
+      COMPLEX            ZERO
+      PARAMETER          ( ZERO = ( 0.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, INFO, IX, IY, J, JX, JY, K, KK, KX, KY
+      COMPLEX            TEMP1, TEMP2
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = 1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = 2
+      ELSE IF( INCX.EQ.0 ) THEN
+         INFO = 6
+      ELSE IF( INCY.EQ.0 ) THEN
+         INFO = 9
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CSPMV ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( N.EQ.0 ) .OR. ( ( ALPHA.EQ.ZERO ) .AND. ( BETA.EQ.ONE ) ) )
+     $   RETURN
+*
+*     Set up the start points in  X  and  Y.
+*
+      IF( INCX.GT.0 ) THEN
+         KX = 1
+      ELSE
+         KX = 1 - ( N-1 )*INCX
+      END IF
+      IF( INCY.GT.0 ) THEN
+         KY = 1
+      ELSE
+         KY = 1 - ( N-1 )*INCY
+      END IF
+*
+*     Start the operations. In this version the elements of the array AP
+*     are accessed sequentially with one pass through AP.
+*
+*     First form  y := beta*y.
+*
+      IF( BETA.NE.ONE ) THEN
+         IF( INCY.EQ.1 ) THEN
+            IF( BETA.EQ.ZERO ) THEN
+               DO 10 I = 1, N
+                  Y( I ) = ZERO
+   10          CONTINUE
+            ELSE
+               DO 20 I = 1, N
+                  Y( I ) = BETA*Y( I )
+   20          CONTINUE
+            END IF
+         ELSE
+            IY = KY
+            IF( BETA.EQ.ZERO ) THEN
+               DO 30 I = 1, N
+                  Y( IY ) = ZERO
+                  IY = IY + INCY
+   30          CONTINUE
+            ELSE
+               DO 40 I = 1, N
+                  Y( IY ) = BETA*Y( IY )
+                  IY = IY + INCY
+   40          CONTINUE
+            END IF
+         END IF
+      END IF
+      IF( ALPHA.EQ.ZERO )
+     $   RETURN
+      KK = 1
+      IF( LSAME( UPLO, 'U' ) ) THEN
+*
+*        Form  y  when AP contains the upper triangle.
+*
+         IF( ( INCX.EQ.1 ) .AND. ( INCY.EQ.1 ) ) THEN
+            DO 60 J = 1, N
+               TEMP1 = ALPHA*X( J )
+               TEMP2 = ZERO
+               K = KK
+               DO 50 I = 1, J - 1
+                  Y( I ) = Y( I ) + TEMP1*AP( K )
+                  TEMP2 = TEMP2 + AP( K )*X( I )
+                  K = K + 1
+   50          CONTINUE
+               Y( J ) = Y( J ) + TEMP1*AP( KK+J-1 ) + ALPHA*TEMP2
+               KK = KK + J
+   60       CONTINUE
+         ELSE
+            JX = KX
+            JY = KY
+            DO 80 J = 1, N
+               TEMP1 = ALPHA*X( JX )
+               TEMP2 = ZERO
+               IX = KX
+               IY = KY
+               DO 70 K = KK, KK + J - 2
+                  Y( IY ) = Y( IY ) + TEMP1*AP( K )
+                  TEMP2 = TEMP2 + AP( K )*X( IX )
+                  IX = IX + INCX
+                  IY = IY + INCY
+   70          CONTINUE
+               Y( JY ) = Y( JY ) + TEMP1*AP( KK+J-1 ) + ALPHA*TEMP2
+               JX = JX + INCX
+               JY = JY + INCY
+               KK = KK + J
+   80       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  y  when AP contains the lower triangle.
+*
+         IF( ( INCX.EQ.1 ) .AND. ( INCY.EQ.1 ) ) THEN
+            DO 100 J = 1, N
+               TEMP1 = ALPHA*X( J )
+               TEMP2 = ZERO
+               Y( J ) = Y( J ) + TEMP1*AP( KK )
+               K = KK + 1
+               DO 90 I = J + 1, N
+                  Y( I ) = Y( I ) + TEMP1*AP( K )
+                  TEMP2 = TEMP2 + AP( K )*X( I )
+                  K = K + 1
+   90          CONTINUE
+               Y( J ) = Y( J ) + ALPHA*TEMP2
+               KK = KK + ( N-J+1 )
+  100       CONTINUE
+         ELSE
+            JX = KX
+            JY = KY
+            DO 120 J = 1, N
+               TEMP1 = ALPHA*X( JX )
+               TEMP2 = ZERO
+               Y( JY ) = Y( JY ) + TEMP1*AP( KK )
+               IX = JX
+               IY = JY
+               DO 110 K = KK + 1, KK + N - J
+                  IX = IX + INCX
+                  IY = IY + INCY
+                  Y( IY ) = Y( IY ) + TEMP1*AP( K )
+                  TEMP2 = TEMP2 + AP( K )*X( IX )
+  110          CONTINUE
+               Y( JY ) = Y( JY ) + ALPHA*TEMP2
+               JX = JX + INCX
+               JY = JY + INCY
+               KK = KK + ( N-J+1 )
+  120       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of CSPMV
+*
+      END
diff --git a/costa/native/external/lapack/cspr.f b/costa/native/external/lapack/cspr.f
new file mode 100644
index 000000000..45c505171
--- /dev/null
+++ b/costa/native/external/lapack/cspr.f
@@ -0,0 +1,214 @@
+      SUBROUTINE CSPR( UPLO, N, ALPHA, X, INCX, AP )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INCX, N
+      COMPLEX            ALPHA
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            AP( * ), X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CSPR    performs the symmetric rank 1 operation
+*
+*     A := alpha*x*conjg( x' ) + A,
+*
+*  where alpha is a complex scalar, x is an n element vector and A is an
+*  n by n symmetric matrix, supplied in packed form.
+*
+*  Arguments
+*  ==========
+*
+*  UPLO   - CHARACTER*1
+*           On entry, UPLO specifies whether the upper or lower
+*           triangular part of the matrix A is supplied in the packed
+*           array AP as follows:
+*
+*              UPLO = 'U' or 'u'   The upper triangular part of A is
+*                                  supplied in AP.
+*
+*              UPLO = 'L' or 'l'   The lower triangular part of A is
+*                                  supplied in AP.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - COMPLEX
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  X      - COMPLEX array, dimension at least
+*           ( 1 + ( N - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the N-
+*           element vector x.
+*           Unchanged on exit.
+*
+*  INCX   - INTEGER
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*  AP     - COMPLEX array, dimension at least
+*           ( ( N*( N + 1 ) )/2 ).
+*           Before entry, with  UPLO = 'U' or 'u', the array AP must
+*           contain the upper triangular part of the symmetric matrix
+*           packed sequentially, column by column, so that AP( 1 )
+*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 )
+*           and a( 2, 2 ) respectively, and so on. On exit, the array
+*           AP is overwritten by the upper triangular part of the
+*           updated matrix.
+*           Before entry, with UPLO = 'L' or 'l', the array AP must
+*           contain the lower triangular part of the symmetric matrix
+*           packed sequentially, column by column, so that AP( 1 )
+*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 )
+*           and a( 3, 1 ) respectively, and so on. On exit, the array
+*           AP is overwritten by the lower triangular part of the
+*           updated matrix.
+*           Note that the imaginary parts of the diagonal elements need
+*           not be set, they are assumed to be zero, and on exit they
+*           are set to zero.
+*
+* =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX            ZERO
+      PARAMETER          ( ZERO = ( 0.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, INFO, IX, J, JX, K, KK, KX
+      COMPLEX            TEMP
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = 1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = 2
+      ELSE IF( INCX.EQ.0 ) THEN
+         INFO = 5
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CSPR  ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( N.EQ.0 ) .OR. ( ALPHA.EQ.ZERO ) )
+     $   RETURN
+*
+*     Set the start point in X if the increment is not unity.
+*
+      IF( INCX.LE.0 ) THEN
+         KX = 1 - ( N-1 )*INCX
+      ELSE IF( INCX.NE.1 ) THEN
+         KX = 1
+      END IF
+*
+*     Start the operations. In this version the elements of the array AP
+*     are accessed sequentially with one pass through AP.
+*
+      KK = 1
+      IF( LSAME( UPLO, 'U' ) ) THEN
+*
+*        Form  A  when upper triangle is stored in AP.
+*
+         IF( INCX.EQ.1 ) THEN
+            DO 20 J = 1, N
+               IF( X( J ).NE.ZERO ) THEN
+                  TEMP = ALPHA*X( J )
+                  K = KK
+                  DO 10 I = 1, J - 1
+                     AP( K ) = AP( K ) + X( I )*TEMP
+                     K = K + 1
+   10             CONTINUE
+                  AP( KK+J-1 ) = AP( KK+J-1 ) + X( J )*TEMP
+               ELSE
+                  AP( KK+J-1 ) = AP( KK+J-1 )
+               END IF
+               KK = KK + J
+   20       CONTINUE
+         ELSE
+            JX = KX
+            DO 40 J = 1, N
+               IF( X( JX ).NE.ZERO ) THEN
+                  TEMP = ALPHA*X( JX )
+                  IX = KX
+                  DO 30 K = KK, KK + J - 2
+                     AP( K ) = AP( K ) + X( IX )*TEMP
+                     IX = IX + INCX
+   30             CONTINUE
+                  AP( KK+J-1 ) = AP( KK+J-1 ) + X( JX )*TEMP
+               ELSE
+                  AP( KK+J-1 ) = AP( KK+J-1 )
+               END IF
+               JX = JX + INCX
+               KK = KK + J
+   40       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  A  when lower triangle is stored in AP.
+*
+         IF( INCX.EQ.1 ) THEN
+            DO 60 J = 1, N
+               IF( X( J ).NE.ZERO ) THEN
+                  TEMP = ALPHA*X( J )
+                  AP( KK ) = AP( KK ) + TEMP*X( J )
+                  K = KK + 1
+                  DO 50 I = J + 1, N
+                     AP( K ) = AP( K ) + X( I )*TEMP
+                     K = K + 1
+   50             CONTINUE
+               ELSE
+                  AP( KK ) = AP( KK )
+               END IF
+               KK = KK + N - J + 1
+   60       CONTINUE
+         ELSE
+            JX = KX
+            DO 80 J = 1, N
+               IF( X( JX ).NE.ZERO ) THEN
+                  TEMP = ALPHA*X( JX )
+                  AP( KK ) = AP( KK ) + TEMP*X( JX )
+                  IX = JX
+                  DO 70 K = KK + 1, KK + N - J
+                     IX = IX + INCX
+                     AP( K ) = AP( K ) + X( IX )*TEMP
+   70             CONTINUE
+               ELSE
+                  AP( KK ) = AP( KK )
+               END IF
+               JX = JX + INCX
+               KK = KK + N - J + 1
+   80       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of CSPR
+*
+      END
diff --git a/costa/native/external/lapack/csprfs.f b/costa/native/external/lapack/csprfs.f
new file mode 100644
index 000000000..ae5c18e34
--- /dev/null
+++ b/costa/native/external/lapack/csprfs.f
@@ -0,0 +1,336 @@
+      SUBROUTINE CSPRFS( UPLO, N, NRHS, AP, AFP, IPIV, B, LDB, X, LDX,
+     $                   FERR, BERR, WORK, RWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDB, LDX, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      REAL               BERR( * ), FERR( * ), RWORK( * )
+      COMPLEX            AFP( * ), AP( * ), B( LDB, * ), WORK( * ),
+     $                   X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CSPRFS improves the computed solution to a system of linear
+*  equations when the coefficient matrix is symmetric indefinite
+*  and packed, and provides error bounds and backward error estimates
+*  for the solution.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrices B and X.  NRHS >= 0.
+*
+*  AP      (input) COMPLEX array, dimension (N*(N+1)/2)
+*          The upper or lower triangle of the symmetric matrix A, packed
+*          columnwise in a linear array.  The j-th column of A is stored
+*          in the array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n.
+*
+*  AFP     (input) COMPLEX array, dimension (N*(N+1)/2)
+*          The factored form of the matrix A.  AFP contains the block
+*          diagonal matrix D and the multipliers used to obtain the
+*          factor U or L from the factorization A = U*D*U**T or
+*          A = L*D*L**T as computed by CSPTRF, stored as a packed
+*          triangular matrix.
+*
+*  IPIV    (input) INTEGER array, dimension (N)
+*          Details of the interchanges and the block structure of D
+*          as determined by CSPTRF.
+*
+*  B       (input) COMPLEX array, dimension (LDB,NRHS)
+*          The right hand side matrix B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  X       (input/output) COMPLEX array, dimension (LDX,NRHS)
+*          On entry, the solution matrix X, as computed by CSPTRS.
+*          On exit, the improved solution matrix X.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(1,N).
+*
+*  FERR    (output) REAL array, dimension (NRHS)
+*          The estimated forward error bound for each solution vector
+*          X(j) (the j-th column of the solution matrix X).
+*          If XTRUE is the true solution corresponding to X(j), FERR(j)
+*          is an estimated upper bound for the magnitude of the largest
+*          element in (X(j) - XTRUE) divided by the magnitude of the
+*          largest element in X(j).  The estimate is as reliable as
+*          the estimate for RCOND, and is almost always a slight
+*          overestimate of the true error.
+*
+*  BERR    (output) REAL array, dimension (NRHS)
+*          The componentwise relative backward error of each solution
+*          vector X(j) (i.e., the smallest relative change in
+*          any element of A or B that makes X(j) an exact solution).
+*
+*  WORK    (workspace) COMPLEX array, dimension (2*N)
+*
+*  RWORK   (workspace) REAL array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Internal Parameters
+*  ===================
+*
+*  ITMAX is the maximum number of steps of iterative refinement.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            ITMAX
+      PARAMETER          ( ITMAX = 5 )
+      REAL               ZERO
+      PARAMETER          ( ZERO = 0.0E+0 )
+      COMPLEX            ONE
+      PARAMETER          ( ONE = ( 1.0E+0, 0.0E+0 ) )
+      REAL               TWO
+      PARAMETER          ( TWO = 2.0E+0 )
+      REAL               THREE
+      PARAMETER          ( THREE = 3.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            COUNT, I, IK, J, K, KASE, KK, NZ
+      REAL               EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK
+      COMPLEX            ZDUM
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CAXPY, CCOPY, CLACON, CSPMV, CSPTRS, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, AIMAG, MAX, REAL
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               SLAMCH
+      EXTERNAL           LSAME, SLAMCH
+*     ..
+*     .. Statement Functions ..
+      REAL               CABS1
+*     ..
+*     .. Statement Function definitions ..
+      CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -8
+      ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+         INFO = -10
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CSPRFS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN
+         DO 10 J = 1, NRHS
+            FERR( J ) = ZERO
+            BERR( J ) = ZERO
+   10    CONTINUE
+         RETURN
+      END IF
+*
+*     NZ = maximum number of nonzero elements in each row of A, plus 1
+*
+      NZ = N + 1
+      EPS = SLAMCH( 'Epsilon' )
+      SAFMIN = SLAMCH( 'Safe minimum' )
+      SAFE1 = NZ*SAFMIN
+      SAFE2 = SAFE1 / EPS
+*
+*     Do for each right hand side
+*
+      DO 140 J = 1, NRHS
+*
+         COUNT = 1
+         LSTRES = THREE
+   20    CONTINUE
+*
+*        Loop until stopping criterion is satisfied.
+*
+*        Compute residual R = B - A * X
+*
+         CALL CCOPY( N, B( 1, J ), 1, WORK, 1 )
+         CALL CSPMV( UPLO, N, -ONE, AP, X( 1, J ), 1, ONE, WORK, 1 )
+*
+*        Compute componentwise relative backward error from formula
+*
+*        max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) )
+*
+*        where abs(Z) is the componentwise absolute value of the matrix
+*        or vector Z.  If the i-th component of the denominator is less
+*        than SAFE2, then SAFE1 is added to the i-th components of the
+*        numerator and denominator before dividing.
+*
+         DO 30 I = 1, N
+            RWORK( I ) = CABS1( B( I, J ) )
+   30    CONTINUE
+*
+*        Compute abs(A)*abs(X) + abs(B).
+*
+         KK = 1
+         IF( UPPER ) THEN
+            DO 50 K = 1, N
+               S = ZERO
+               XK = CABS1( X( K, J ) )
+               IK = KK
+               DO 40 I = 1, K - 1
+                  RWORK( I ) = RWORK( I ) + CABS1( AP( IK ) )*XK
+                  S = S + CABS1( AP( IK ) )*CABS1( X( I, J ) )
+                  IK = IK + 1
+   40          CONTINUE
+               RWORK( K ) = RWORK( K ) + CABS1( AP( KK+K-1 ) )*XK + S
+               KK = KK + K
+   50       CONTINUE
+         ELSE
+            DO 70 K = 1, N
+               S = ZERO
+               XK = CABS1( X( K, J ) )
+               RWORK( K ) = RWORK( K ) + CABS1( AP( KK ) )*XK
+               IK = KK + 1
+               DO 60 I = K + 1, N
+                  RWORK( I ) = RWORK( I ) + CABS1( AP( IK ) )*XK
+                  S = S + CABS1( AP( IK ) )*CABS1( X( I, J ) )
+                  IK = IK + 1
+   60          CONTINUE
+               RWORK( K ) = RWORK( K ) + S
+               KK = KK + ( N-K+1 )
+   70       CONTINUE
+         END IF
+         S = ZERO
+         DO 80 I = 1, N
+            IF( RWORK( I ).GT.SAFE2 ) THEN
+               S = MAX( S, CABS1( WORK( I ) ) / RWORK( I ) )
+            ELSE
+               S = MAX( S, ( CABS1( WORK( I ) )+SAFE1 ) /
+     $             ( RWORK( I )+SAFE1 ) )
+            END IF
+   80    CONTINUE
+         BERR( J ) = S
+*
+*        Test stopping criterion. Continue iterating if
+*           1) The residual BERR(J) is larger than machine epsilon, and
+*           2) BERR(J) decreased by at least a factor of 2 during the
+*              last iteration, and
+*           3) At most ITMAX iterations tried.
+*
+         IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND.
+     $       COUNT.LE.ITMAX ) THEN
+*
+*           Update solution and try again.
+*
+            CALL CSPTRS( UPLO, N, 1, AFP, IPIV, WORK, N, INFO )
+            CALL CAXPY( N, ONE, WORK, 1, X( 1, J ), 1 )
+            LSTRES = BERR( J )
+            COUNT = COUNT + 1
+            GO TO 20
+         END IF
+*
+*        Bound error from formula
+*
+*        norm(X - XTRUE) / norm(X) .le. FERR =
+*        norm( abs(inv(A))*
+*           ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X)
+*
+*        where
+*          norm(Z) is the magnitude of the largest component of Z
+*          inv(A) is the inverse of A
+*          abs(Z) is the componentwise absolute value of the matrix or
+*             vector Z
+*          NZ is the maximum number of nonzeros in any row of A, plus 1
+*          EPS is machine epsilon
+*
+*        The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B))
+*        is incremented by SAFE1 if the i-th component of
+*        abs(A)*abs(X) + abs(B) is less than SAFE2.
+*
+*        Use CLACON to estimate the infinity-norm of the matrix
+*           inv(A) * diag(W),
+*        where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) )))
+*
+         DO 90 I = 1, N
+            IF( RWORK( I ).GT.SAFE2 ) THEN
+               RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I )
+            ELSE
+               RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) +
+     $                      SAFE1
+            END IF
+   90    CONTINUE
+*
+         KASE = 0
+  100    CONTINUE
+         CALL CLACON( N, WORK( N+1 ), WORK, FERR( J ), KASE )
+         IF( KASE.NE.0 ) THEN
+            IF( KASE.EQ.1 ) THEN
+*
+*              Multiply by diag(W)*inv(A').
+*
+               CALL CSPTRS( UPLO, N, 1, AFP, IPIV, WORK, N, INFO )
+               DO 110 I = 1, N
+                  WORK( I ) = RWORK( I )*WORK( I )
+  110          CONTINUE
+            ELSE IF( KASE.EQ.2 ) THEN
+*
+*              Multiply by inv(A)*diag(W).
+*
+               DO 120 I = 1, N
+                  WORK( I ) = RWORK( I )*WORK( I )
+  120          CONTINUE
+               CALL CSPTRS( UPLO, N, 1, AFP, IPIV, WORK, N, INFO )
+            END IF
+            GO TO 100
+         END IF
+*
+*        Normalize error.
+*
+         LSTRES = ZERO
+         DO 130 I = 1, N
+            LSTRES = MAX( LSTRES, CABS1( X( I, J ) ) )
+  130    CONTINUE
+         IF( LSTRES.NE.ZERO )
+     $      FERR( J ) = FERR( J ) / LSTRES
+*
+  140 CONTINUE
+*
+      RETURN
+*
+*     End of CSPRFS
+*
+      END
diff --git a/costa/native/external/lapack/cspsv.f b/costa/native/external/lapack/cspsv.f
new file mode 100644
index 000000000..7c9c86be8
--- /dev/null
+++ b/costa/native/external/lapack/cspsv.f
@@ -0,0 +1,149 @@
+      SUBROUTINE CSPSV( UPLO, N, NRHS, AP, IPIV, B, LDB, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993 
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDB, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      COMPLEX            AP( * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CSPSV computes the solution to a complex system of linear equations
+*     A * X = B,
+*  where A is an N-by-N symmetric matrix stored in packed format and X
+*  and B are N-by-NRHS matrices.
+*
+*  The diagonal pivoting method is used to factor A as
+*     A = U * D * U**T,  if UPLO = 'U', or
+*     A = L * D * L**T,  if UPLO = 'L',
+*  where U (or L) is a product of permutation and unit upper (lower)
+*  triangular matrices, D is symmetric and block diagonal with 1-by-1
+*  and 2-by-2 diagonal blocks.  The factored form of A is then used to
+*  solve the system of equations A * X = B.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The number of linear equations, i.e., the order of the
+*          matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  AP      (input/output) COMPLEX array, dimension (N*(N+1)/2)
+*          On entry, the upper or lower triangle of the symmetric matrix
+*          A, packed columnwise in a linear array.  The j-th column of A
+*          is stored in the array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n.
+*          See below for further details.
+*
+*          On exit, the block diagonal matrix D and the multipliers used
+*          to obtain the factor U or L from the factorization
+*          A = U*D*U**T or A = L*D*L**T as computed by CSPTRF, stored as
+*          a packed triangular matrix in the same storage format as A.
+*
+*  IPIV    (output) INTEGER array, dimension (N)
+*          Details of the interchanges and the block structure of D, as
+*          determined by CSPTRF.  If IPIV(k) > 0, then rows and columns
+*          k and IPIV(k) were interchanged, and D(k,k) is a 1-by-1
+*          diagonal block.  If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0,
+*          then rows and columns k-1 and -IPIV(k) were interchanged and
+*          D(k-1:k,k-1:k) is a 2-by-2 diagonal block.  If UPLO = 'L' and
+*          IPIV(k) = IPIV(k+1) < 0, then rows and columns k+1 and
+*          -IPIV(k) were interchanged and D(k:k+1,k:k+1) is a 2-by-2
+*          diagonal block.
+*
+*  B       (input/output) COMPLEX array, dimension (LDB,NRHS)
+*          On entry, the N-by-NRHS right hand side matrix B.
+*          On exit, if INFO = 0, the N-by-NRHS solution matrix X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, D(i,i) is exactly zero.  The factorization
+*                has been completed, but the block diagonal matrix D is
+*                exactly singular, so the solution could not be
+*                computed.
+*
+*  Further Details
+*  ===============
+*
+*  The packed storage scheme is illustrated by the following example
+*  when N = 4, UPLO = 'U':
+*
+*  Two-dimensional storage of the symmetric matrix A:
+*
+*     a11 a12 a13 a14
+*         a22 a23 a24
+*             a33 a34     (aij = aji)
+*                 a44
+*
+*  Packed storage of the upper triangle of A:
+*
+*  AP = [ a11, a12, a22, a13, a23, a33, a14, a24, a34, a44 ]
+*
+*  =====================================================================
+*
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CSPTRF, CSPTRS, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CSPSV ', -INFO )
+         RETURN
+      END IF
+*
+*     Compute the factorization A = U*D*U' or A = L*D*L'.
+*
+      CALL CSPTRF( UPLO, N, AP, IPIV, INFO )
+      IF( INFO.EQ.0 ) THEN
+*
+*        Solve the system A*X = B, overwriting B with X.
+*
+         CALL CSPTRS( UPLO, N, NRHS, AP, IPIV, B, LDB, INFO )
+*
+      END IF
+      RETURN
+*
+*     End of CSPSV
+*
+      END
diff --git a/costa/native/external/lapack/cspsvx.f b/costa/native/external/lapack/cspsvx.f
new file mode 100644
index 000000000..ea7b53177
--- /dev/null
+++ b/costa/native/external/lapack/cspsvx.f
@@ -0,0 +1,279 @@
+      SUBROUTINE CSPSVX( FACT, UPLO, N, NRHS, AP, AFP, IPIV, B, LDB, X,
+     $                   LDX, RCOND, FERR, BERR, WORK, RWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          FACT, UPLO
+      INTEGER            INFO, LDB, LDX, N, NRHS
+      REAL               RCOND
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      REAL               BERR( * ), FERR( * ), RWORK( * )
+      COMPLEX            AFP( * ), AP( * ), B( LDB, * ), WORK( * ),
+     $                   X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CSPSVX uses the diagonal pivoting factorization A = U*D*U**T or
+*  A = L*D*L**T to compute the solution to a complex system of linear
+*  equations A * X = B, where A is an N-by-N symmetric matrix stored
+*  in packed format and X and B are N-by-NRHS matrices.
+*
+*  Error bounds on the solution and a condition estimate are also
+*  provided.
+*
+*  Description
+*  ===========
+*
+*  The following steps are performed:
+*
+*  1. If FACT = 'N', the diagonal pivoting method is used to factor A as
+*        A = U * D * U**T,  if UPLO = 'U', or
+*        A = L * D * L**T,  if UPLO = 'L',
+*     where U (or L) is a product of permutation and unit upper (lower)
+*     triangular matrices and D is symmetric and block diagonal with
+*     1-by-1 and 2-by-2 diagonal blocks.
+*
+*  2. If some D(i,i)=0, so that D is exactly singular, then the routine
+*     returns with INFO = i. Otherwise, the factored form of A is used
+*     to estimate the condition number of the matrix A.  If the
+*     reciprocal of the condition number is less than machine precision,
+*     INFO = N+1 is returned as a warning, but the routine still goes on
+*     to solve for X and compute error bounds as described below.
+*
+*  3. The system of equations is solved for X using the factored form
+*     of A.
+*
+*  4. Iterative refinement is applied to improve the computed solution
+*     matrix and calculate error bounds and backward error estimates
+*     for it.
+*
+*  Arguments
+*  =========
+*
+*  FACT    (input) CHARACTER*1
+*          Specifies whether or not the factored form of A has been
+*          supplied on entry.
+*          = 'F':  On entry, AFP and IPIV contain the factored form
+*                  of A.  AP, AFP and IPIV will not be modified.
+*          = 'N':  The matrix A will be copied to AFP and factored.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The number of linear equations, i.e., the order of the
+*          matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrices B and X.  NRHS >= 0.
+*
+*  AP      (input) COMPLEX array, dimension (N*(N+1)/2)
+*          The upper or lower triangle of the symmetric matrix A, packed
+*          columnwise in a linear array.  The j-th column of A is stored
+*          in the array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n.
+*          See below for further details.
+*
+*  AFP     (input or output) COMPLEX array, dimension (N*(N+1)/2)
+*          If FACT = 'F', then AFP is an input argument and on entry
+*          contains the block diagonal matrix D and the multipliers used
+*          to obtain the factor U or L from the factorization
+*          A = U*D*U**T or A = L*D*L**T as computed by CSPTRF, stored as
+*          a packed triangular matrix in the same storage format as A.
+*
+*          If FACT = 'N', then AFP is an output argument and on exit
+*          contains the block diagonal matrix D and the multipliers used
+*          to obtain the factor U or L from the factorization
+*          A = U*D*U**T or A = L*D*L**T as computed by CSPTRF, stored as
+*          a packed triangular matrix in the same storage format as A.
+*
+*  IPIV    (input or output) INTEGER array, dimension (N)
+*          If FACT = 'F', then IPIV is an input argument and on entry
+*          contains details of the interchanges and the block structure
+*          of D, as determined by CSPTRF.
+*          If IPIV(k) > 0, then rows and columns k and IPIV(k) were
+*          interchanged and D(k,k) is a 1-by-1 diagonal block.
+*          If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and
+*          columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k)
+*          is a 2-by-2 diagonal block.  If UPLO = 'L' and IPIV(k) =
+*          IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were
+*          interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block.
+*
+*          If FACT = 'N', then IPIV is an output argument and on exit
+*          contains details of the interchanges and the block structure
+*          of D, as determined by CSPTRF.
+*
+*  B       (input) COMPLEX array, dimension (LDB,NRHS)
+*          The N-by-NRHS right hand side matrix B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  X       (output) COMPLEX array, dimension (LDX,NRHS)
+*          If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(1,N).
+*
+*  RCOND   (output) REAL
+*          The estimate of the reciprocal condition number of the matrix
+*          A.  If RCOND is less than the machine precision (in
+*          particular, if RCOND = 0), the matrix is singular to working
+*          precision.  This condition is indicated by a return code of
+*          INFO > 0.
+*
+*  FERR    (output) REAL array, dimension (NRHS)
+*          The estimated forward error bound for each solution vector
+*          X(j) (the j-th column of the solution matrix X).
+*          If XTRUE is the true solution corresponding to X(j), FERR(j)
+*          is an estimated upper bound for the magnitude of the largest
+*          element in (X(j) - XTRUE) divided by the magnitude of the
+*          largest element in X(j).  The estimate is as reliable as
+*          the estimate for RCOND, and is almost always a slight
+*          overestimate of the true error.
+*
+*  BERR    (output) REAL array, dimension (NRHS)
+*          The componentwise relative backward error of each solution
+*          vector X(j) (i.e., the smallest relative change in
+*          any element of A or B that makes X(j) an exact solution).
+*
+*  WORK    (workspace) COMPLEX array, dimension (2*N)
+*
+*  RWORK   (workspace) REAL array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, and i is
+*                <= N:  D(i,i) is exactly zero.  The factorization
+*                       has been completed but the factor D is exactly
+*                       singular, so the solution and error bounds could
+*                       not be computed. RCOND = 0 is returned.
+*                = N+1: D is nonsingular, but RCOND is less than machine
+*                       precision, meaning that the matrix is singular
+*                       to working precision.  Nevertheless, the
+*                       solution and error bounds are computed because
+*                       there are a number of situations where the
+*                       computed solution can be more accurate than the
+*                       value of RCOND would suggest.
+*
+*  Further Details
+*  ===============
+*
+*  The packed storage scheme is illustrated by the following example
+*  when N = 4, UPLO = 'U':
+*
+*  Two-dimensional storage of the symmetric matrix A:
+*
+*     a11 a12 a13 a14
+*         a22 a23 a24
+*             a33 a34     (aij = aji)
+*                 a44
+*
+*  Packed storage of the upper triangle of A:
+*
+*  AP = [ a11, a12, a22, a13, a23, a33, a14, a24, a34, a44 ]
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO
+      PARAMETER          ( ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOFACT
+      REAL               ANORM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               CLANSP, SLAMCH
+      EXTERNAL           LSAME, CLANSP, SLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CCOPY, CLACPY, CSPCON, CSPRFS, CSPTRF, CSPTRS,
+     $                   XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      NOFACT = LSAME( FACT, 'N' )
+      IF( .NOT.NOFACT .AND. .NOT.LSAME( FACT, 'F' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) )
+     $          THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -9
+      ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+         INFO = -11
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CSPSVX', -INFO )
+         RETURN
+      END IF
+*
+      IF( NOFACT ) THEN
+*
+*        Compute the factorization A = U*D*U' or A = L*D*L'.
+*
+         CALL CCOPY( N*( N+1 ) / 2, AP, 1, AFP, 1 )
+         CALL CSPTRF( UPLO, N, AFP, IPIV, INFO )
+*
+*        Return if INFO is non-zero.
+*
+         IF( INFO.NE.0 ) THEN
+            IF( INFO.GT.0 )
+     $         RCOND = ZERO
+            RETURN
+         END IF
+      END IF
+*
+*     Compute the norm of the matrix A.
+*
+      ANORM = CLANSP( 'I', UPLO, N, AP, RWORK )
+*
+*     Compute the reciprocal of the condition number of A.
+*
+      CALL CSPCON( UPLO, N, AFP, IPIV, ANORM, RCOND, WORK, INFO )
+*
+*     Set INFO = N+1 if the matrix is singular to working precision.
+*
+      IF( RCOND.LT.SLAMCH( 'Epsilon' ) )
+     $   INFO = N + 1
+*
+*     Compute the solution vectors X.
+*
+      CALL CLACPY( 'Full', N, NRHS, B, LDB, X, LDX )
+      CALL CSPTRS( UPLO, N, NRHS, AFP, IPIV, X, LDX, INFO )
+*
+*     Use iterative refinement to improve the computed solutions and
+*     compute error bounds and backward error estimates for them.
+*
+      CALL CSPRFS( UPLO, N, NRHS, AP, AFP, IPIV, B, LDB, X, LDX, FERR,
+     $             BERR, WORK, RWORK, INFO )
+*
+      RETURN
+*
+*     End of CSPSVX
+*
+      END
diff --git a/costa/native/external/lapack/csptrf.f b/costa/native/external/lapack/csptrf.f
new file mode 100644
index 000000000..7183cda51
--- /dev/null
+++ b/costa/native/external/lapack/csptrf.f
@@ -0,0 +1,556 @@
+      SUBROUTINE CSPTRF( UPLO, N, AP, IPIV, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      COMPLEX            AP( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CSPTRF computes the factorization of a complex symmetric matrix A
+*  stored in packed format using the Bunch-Kaufman diagonal pivoting
+*  method:
+*
+*     A = U*D*U**T  or  A = L*D*L**T
+*
+*  where U (or L) is a product of permutation and unit upper (lower)
+*  triangular matrices, and D is symmetric and block diagonal with
+*  1-by-1 and 2-by-2 diagonal blocks.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  AP      (input/output) COMPLEX array, dimension (N*(N+1)/2)
+*          On entry, the upper or lower triangle of the symmetric matrix
+*          A, packed columnwise in a linear array.  The j-th column of A
+*          is stored in the array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n.
+*
+*          On exit, the block diagonal matrix D and the multipliers used
+*          to obtain the factor U or L, stored as a packed triangular
+*          matrix overwriting A (see below for further details).
+*
+*  IPIV    (output) INTEGER array, dimension (N)
+*          Details of the interchanges and the block structure of D.
+*          If IPIV(k) > 0, then rows and columns k and IPIV(k) were
+*          interchanged and D(k,k) is a 1-by-1 diagonal block.
+*          If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and
+*          columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k)
+*          is a 2-by-2 diagonal block.  If UPLO = 'L' and IPIV(k) =
+*          IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were
+*          interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block.
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*          > 0: if INFO = i, D(i,i) is exactly zero.  The factorization
+*               has been completed, but the block diagonal matrix D is
+*               exactly singular, and division by zero will occur if it
+*               is used to solve a system of equations.
+*
+*  Further Details
+*  ===============
+*
+*  5-96 - Based on modifications by J. Lewis, Boeing Computer Services
+*         Company
+*
+*  If UPLO = 'U', then A = U*D*U', where
+*     U = P(n)*U(n)* ... *P(k)U(k)* ...,
+*  i.e., U is a product of terms P(k)*U(k), where k decreases from n to
+*  1 in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1
+*  and 2-by-2 diagonal blocks D(k).  P(k) is a permutation matrix as
+*  defined by IPIV(k), and U(k) is a unit upper triangular matrix, such
+*  that if the diagonal block D(k) is of order s (s = 1 or 2), then
+*
+*             (   I    v    0   )   k-s
+*     U(k) =  (   0    I    0   )   s
+*             (   0    0    I   )   n-k
+*                k-s   s   n-k
+*
+*  If s = 1, D(k) overwrites A(k,k), and v overwrites A(1:k-1,k).
+*  If s = 2, the upper triangle of D(k) overwrites A(k-1,k-1), A(k-1,k),
+*  and A(k,k), and v overwrites A(1:k-2,k-1:k).
+*
+*  If UPLO = 'L', then A = L*D*L', where
+*     L = P(1)*L(1)* ... *P(k)*L(k)* ...,
+*  i.e., L is a product of terms P(k)*L(k), where k increases from 1 to
+*  n in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1
+*  and 2-by-2 diagonal blocks D(k).  P(k) is a permutation matrix as
+*  defined by IPIV(k), and L(k) is a unit lower triangular matrix, such
+*  that if the diagonal block D(k) is of order s (s = 1 or 2), then
+*
+*             (   I    0     0   )  k-1
+*     L(k) =  (   0    I     0   )  s
+*             (   0    v     I   )  n-k-s+1
+*                k-1   s  n-k-s+1
+*
+*  If s = 1, D(k) overwrites A(k,k), and v overwrites A(k+1:n,k).
+*  If s = 2, the lower triangle of D(k) overwrites A(k,k), A(k+1,k),
+*  and A(k+1,k+1), and v overwrites A(k+2:n,k:k+1).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+      REAL               EIGHT, SEVTEN
+      PARAMETER          ( EIGHT = 8.0E+0, SEVTEN = 17.0E+0 )
+      COMPLEX            CONE
+      PARAMETER          ( CONE = ( 1.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            I, IMAX, J, JMAX, K, KC, KK, KNC, KP, KPC,
+     $                   KSTEP, KX, NPP
+      REAL               ABSAKK, ALPHA, COLMAX, ROWMAX
+      COMPLEX            D11, D12, D21, D22, R1, T, WK, WKM1, WKP1, ZDUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ICAMAX
+      EXTERNAL           LSAME, ICAMAX
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CSCAL, CSPR, CSWAP, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, AIMAG, MAX, REAL, SQRT
+*     ..
+*     .. Statement Functions ..
+      REAL               CABS1
+*     ..
+*     .. Statement Function definitions ..
+      CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CSPTRF', -INFO )
+         RETURN
+      END IF
+*
+*     Initialize ALPHA for use in choosing pivot block size.
+*
+      ALPHA = ( ONE+SQRT( SEVTEN ) ) / EIGHT
+*
+      IF( UPPER ) THEN
+*
+*        Factorize A as U*D*U' using the upper triangle of A
+*
+*        K is the main loop index, decreasing from N to 1 in steps of
+*        1 or 2
+*
+         K = N
+         KC = ( N-1 )*N / 2 + 1
+   10    CONTINUE
+         KNC = KC
+*
+*        If K < 1, exit from loop
+*
+         IF( K.LT.1 )
+     $      GO TO 110
+         KSTEP = 1
+*
+*        Determine rows and columns to be interchanged and whether
+*        a 1-by-1 or 2-by-2 pivot block will be used
+*
+         ABSAKK = CABS1( AP( KC+K-1 ) )
+*
+*        IMAX is the row-index of the largest off-diagonal element in
+*        column K, and COLMAX is its absolute value
+*
+         IF( K.GT.1 ) THEN
+            IMAX = ICAMAX( K-1, AP( KC ), 1 )
+            COLMAX = CABS1( AP( KC+IMAX-1 ) )
+         ELSE
+            COLMAX = ZERO
+         END IF
+*
+         IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN
+*
+*           Column K is zero: set INFO and continue
+*
+            IF( INFO.EQ.0 )
+     $         INFO = K
+            KP = K
+         ELSE
+            IF( ABSAKK.GE.ALPHA*COLMAX ) THEN
+*
+*              no interchange, use 1-by-1 pivot block
+*
+               KP = K
+            ELSE
+*
+*              JMAX is the column-index of the largest off-diagonal
+*              element in row IMAX, and ROWMAX is its absolute value
+*
+               ROWMAX = ZERO
+               JMAX = IMAX
+               KX = IMAX*( IMAX+1 ) / 2 + IMAX
+               DO 20 J = IMAX + 1, K
+                  IF( CABS1( AP( KX ) ).GT.ROWMAX ) THEN
+                     ROWMAX = CABS1( AP( KX ) )
+                     JMAX = J
+                  END IF
+                  KX = KX + J
+   20          CONTINUE
+               KPC = ( IMAX-1 )*IMAX / 2 + 1
+               IF( IMAX.GT.1 ) THEN
+                  JMAX = ICAMAX( IMAX-1, AP( KPC ), 1 )
+                  ROWMAX = MAX( ROWMAX, CABS1( AP( KPC+JMAX-1 ) ) )
+               END IF
+*
+               IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN
+*
+*                 no interchange, use 1-by-1 pivot block
+*
+                  KP = K
+               ELSE IF( CABS1( AP( KPC+IMAX-1 ) ).GE.ALPHA*ROWMAX ) THEN
+*
+*                 interchange rows and columns K and IMAX, use 1-by-1
+*                 pivot block
+*
+                  KP = IMAX
+               ELSE
+*
+*                 interchange rows and columns K-1 and IMAX, use 2-by-2
+*                 pivot block
+*
+                  KP = IMAX
+                  KSTEP = 2
+               END IF
+            END IF
+*
+            KK = K - KSTEP + 1
+            IF( KSTEP.EQ.2 )
+     $         KNC = KNC - K + 1
+            IF( KP.NE.KK ) THEN
+*
+*              Interchange rows and columns KK and KP in the leading
+*              submatrix A(1:k,1:k)
+*
+               CALL CSWAP( KP-1, AP( KNC ), 1, AP( KPC ), 1 )
+               KX = KPC + KP - 1
+               DO 30 J = KP + 1, KK - 1
+                  KX = KX + J - 1
+                  T = AP( KNC+J-1 )
+                  AP( KNC+J-1 ) = AP( KX )
+                  AP( KX ) = T
+   30          CONTINUE
+               T = AP( KNC+KK-1 )
+               AP( KNC+KK-1 ) = AP( KPC+KP-1 )
+               AP( KPC+KP-1 ) = T
+               IF( KSTEP.EQ.2 ) THEN
+                  T = AP( KC+K-2 )
+                  AP( KC+K-2 ) = AP( KC+KP-1 )
+                  AP( KC+KP-1 ) = T
+               END IF
+            END IF
+*
+*           Update the leading submatrix
+*
+            IF( KSTEP.EQ.1 ) THEN
+*
+*              1-by-1 pivot block D(k): column k now holds
+*
+*              W(k) = U(k)*D(k)
+*
+*              where U(k) is the k-th column of U
+*
+*              Perform a rank-1 update of A(1:k-1,1:k-1) as
+*
+*              A := A - U(k)*D(k)*U(k)' = A - W(k)*1/D(k)*W(k)'
+*
+               R1 = CONE / AP( KC+K-1 )
+               CALL CSPR( UPLO, K-1, -R1, AP( KC ), 1, AP )
+*
+*              Store U(k) in column k
+*
+               CALL CSCAL( K-1, R1, AP( KC ), 1 )
+            ELSE
+*
+*              2-by-2 pivot block D(k): columns k and k-1 now hold
+*
+*              ( W(k-1) W(k) ) = ( U(k-1) U(k) )*D(k)
+*
+*              where U(k) and U(k-1) are the k-th and (k-1)-th columns
+*              of U
+*
+*              Perform a rank-2 update of A(1:k-2,1:k-2) as
+*
+*              A := A - ( U(k-1) U(k) )*D(k)*( U(k-1) U(k) )'
+*                 = A - ( W(k-1) W(k) )*inv(D(k))*( W(k-1) W(k) )'
+*
+               IF( K.GT.2 ) THEN
+*
+                  D12 = AP( K-1+( K-1 )*K / 2 )
+                  D22 = AP( K-1+( K-2 )*( K-1 ) / 2 ) / D12
+                  D11 = AP( K+( K-1 )*K / 2 ) / D12
+                  T = CONE / ( D11*D22-CONE )
+                  D12 = T / D12
+*
+                  DO 50 J = K - 2, 1, -1
+                     WKM1 = D12*( D11*AP( J+( K-2 )*( K-1 ) / 2 )-
+     $                      AP( J+( K-1 )*K / 2 ) )
+                     WK = D12*( D22*AP( J+( K-1 )*K / 2 )-
+     $                    AP( J+( K-2 )*( K-1 ) / 2 ) )
+                     DO 40 I = J, 1, -1
+                        AP( I+( J-1 )*J / 2 ) = AP( I+( J-1 )*J / 2 ) -
+     $                     AP( I+( K-1 )*K / 2 )*WK -
+     $                     AP( I+( K-2 )*( K-1 ) / 2 )*WKM1
+   40                CONTINUE
+                     AP( J+( K-1 )*K / 2 ) = WK
+                     AP( J+( K-2 )*( K-1 ) / 2 ) = WKM1
+   50             CONTINUE
+*
+               END IF
+            END IF
+         END IF
+*
+*        Store details of the interchanges in IPIV
+*
+         IF( KSTEP.EQ.1 ) THEN
+            IPIV( K ) = KP
+         ELSE
+            IPIV( K ) = -KP
+            IPIV( K-1 ) = -KP
+         END IF
+*
+*        Decrease K and return to the start of the main loop
+*
+         K = K - KSTEP
+         KC = KNC - K
+         GO TO 10
+*
+      ELSE
+*
+*        Factorize A as L*D*L' using the lower triangle of A
+*
+*        K is the main loop index, increasing from 1 to N in steps of
+*        1 or 2
+*
+         K = 1
+         KC = 1
+         NPP = N*( N+1 ) / 2
+   60    CONTINUE
+         KNC = KC
+*
+*        If K > N, exit from loop
+*
+         IF( K.GT.N )
+     $      GO TO 110
+         KSTEP = 1
+*
+*        Determine rows and columns to be interchanged and whether
+*        a 1-by-1 or 2-by-2 pivot block will be used
+*
+         ABSAKK = CABS1( AP( KC ) )
+*
+*        IMAX is the row-index of the largest off-diagonal element in
+*        column K, and COLMAX is its absolute value
+*
+         IF( K.LT.N ) THEN
+            IMAX = K + ICAMAX( N-K, AP( KC+1 ), 1 )
+            COLMAX = CABS1( AP( KC+IMAX-K ) )
+         ELSE
+            COLMAX = ZERO
+         END IF
+*
+         IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN
+*
+*           Column K is zero: set INFO and continue
+*
+            IF( INFO.EQ.0 )
+     $         INFO = K
+            KP = K
+         ELSE
+            IF( ABSAKK.GE.ALPHA*COLMAX ) THEN
+*
+*              no interchange, use 1-by-1 pivot block
+*
+               KP = K
+            ELSE
+*
+*              JMAX is the column-index of the largest off-diagonal
+*              element in row IMAX, and ROWMAX is its absolute value
+*
+               ROWMAX = ZERO
+               KX = KC + IMAX - K
+               DO 70 J = K, IMAX - 1
+                  IF( CABS1( AP( KX ) ).GT.ROWMAX ) THEN
+                     ROWMAX = CABS1( AP( KX ) )
+                     JMAX = J
+                  END IF
+                  KX = KX + N - J
+   70          CONTINUE
+               KPC = NPP - ( N-IMAX+1 )*( N-IMAX+2 ) / 2 + 1
+               IF( IMAX.LT.N ) THEN
+                  JMAX = IMAX + ICAMAX( N-IMAX, AP( KPC+1 ), 1 )
+                  ROWMAX = MAX( ROWMAX, CABS1( AP( KPC+JMAX-IMAX ) ) )
+               END IF
+*
+               IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN
+*
+*                 no interchange, use 1-by-1 pivot block
+*
+                  KP = K
+               ELSE IF( CABS1( AP( KPC ) ).GE.ALPHA*ROWMAX ) THEN
+*
+*                 interchange rows and columns K and IMAX, use 1-by-1
+*                 pivot block
+*
+                  KP = IMAX
+               ELSE
+*
+*                 interchange rows and columns K+1 and IMAX, use 2-by-2
+*                 pivot block
+*
+                  KP = IMAX
+                  KSTEP = 2
+               END IF
+            END IF
+*
+            KK = K + KSTEP - 1
+            IF( KSTEP.EQ.2 )
+     $         KNC = KNC + N - K + 1
+            IF( KP.NE.KK ) THEN
+*
+*              Interchange rows and columns KK and KP in the trailing
+*              submatrix A(k:n,k:n)
+*
+               IF( KP.LT.N )
+     $            CALL CSWAP( N-KP, AP( KNC+KP-KK+1 ), 1, AP( KPC+1 ),
+     $                        1 )
+               KX = KNC + KP - KK
+               DO 80 J = KK + 1, KP - 1
+                  KX = KX + N - J + 1
+                  T = AP( KNC+J-KK )
+                  AP( KNC+J-KK ) = AP( KX )
+                  AP( KX ) = T
+   80          CONTINUE
+               T = AP( KNC )
+               AP( KNC ) = AP( KPC )
+               AP( KPC ) = T
+               IF( KSTEP.EQ.2 ) THEN
+                  T = AP( KC+1 )
+                  AP( KC+1 ) = AP( KC+KP-K )
+                  AP( KC+KP-K ) = T
+               END IF
+            END IF
+*
+*           Update the trailing submatrix
+*
+            IF( KSTEP.EQ.1 ) THEN
+*
+*              1-by-1 pivot block D(k): column k now holds
+*
+*              W(k) = L(k)*D(k)
+*
+*              where L(k) is the k-th column of L
+*
+               IF( K.LT.N ) THEN
+*
+*                 Perform a rank-1 update of A(k+1:n,k+1:n) as
+*
+*                 A := A - L(k)*D(k)*L(k)' = A - W(k)*(1/D(k))*W(k)'
+*
+                  R1 = CONE / AP( KC )
+                  CALL CSPR( UPLO, N-K, -R1, AP( KC+1 ), 1,
+     $                       AP( KC+N-K+1 ) )
+*
+*                 Store L(k) in column K
+*
+                  CALL CSCAL( N-K, R1, AP( KC+1 ), 1 )
+               END IF
+            ELSE
+*
+*              2-by-2 pivot block D(k): columns K and K+1 now hold
+*
+*              ( W(k) W(k+1) ) = ( L(k) L(k+1) )*D(k)
+*
+*              where L(k) and L(k+1) are the k-th and (k+1)-th columns
+*              of L
+*
+               IF( K.LT.N-1 ) THEN
+*
+*                 Perform a rank-2 update of A(k+2:n,k+2:n) as
+*
+*                 A := A - ( L(k) L(k+1) )*D(k)*( L(k) L(k+1) )'
+*                    = A - ( W(k) W(k+1) )*inv(D(k))*( W(k) W(k+1) )'
+*
+*                 where L(k) and L(k+1) are the k-th and (k+1)-th
+*                 columns of L
+*
+                  D21 = AP( K+1+( K-1 )*( 2*N-K ) / 2 )
+                  D11 = AP( K+1+K*( 2*N-K-1 ) / 2 ) / D21
+                  D22 = AP( K+( K-1 )*( 2*N-K ) / 2 ) / D21
+                  T = CONE / ( D11*D22-CONE )
+                  D21 = T / D21
+*
+                  DO 100 J = K + 2, N
+                     WK = D21*( D11*AP( J+( K-1 )*( 2*N-K ) / 2 )-
+     $                    AP( J+K*( 2*N-K-1 ) / 2 ) )
+                     WKP1 = D21*( D22*AP( J+K*( 2*N-K-1 ) / 2 )-
+     $                      AP( J+( K-1 )*( 2*N-K ) / 2 ) )
+                     DO 90 I = J, N
+                        AP( I+( J-1 )*( 2*N-J ) / 2 ) = AP( I+( J-1 )*
+     $                     ( 2*N-J ) / 2 ) - AP( I+( K-1 )*( 2*N-K ) /
+     $                     2 )*WK - AP( I+K*( 2*N-K-1 ) / 2 )*WKP1
+   90                CONTINUE
+                     AP( J+( K-1 )*( 2*N-K ) / 2 ) = WK
+                     AP( J+K*( 2*N-K-1 ) / 2 ) = WKP1
+  100             CONTINUE
+               END IF
+            END IF
+         END IF
+*
+*        Store details of the interchanges in IPIV
+*
+         IF( KSTEP.EQ.1 ) THEN
+            IPIV( K ) = KP
+         ELSE
+            IPIV( K ) = -KP
+            IPIV( K+1 ) = -KP
+         END IF
+*
+*        Increase K and return to the start of the main loop
+*
+         K = K + KSTEP
+         KC = KNC + N - K + 2
+         GO TO 60
+*
+      END IF
+*
+  110 CONTINUE
+      RETURN
+*
+*     End of CSPTRF
+*
+      END
diff --git a/costa/native/external/lapack/csptri.f b/costa/native/external/lapack/csptri.f
new file mode 100644
index 000000000..ace977f01
--- /dev/null
+++ b/costa/native/external/lapack/csptri.f
@@ -0,0 +1,338 @@
+      SUBROUTINE CSPTRI( UPLO, N, AP, IPIV, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      COMPLEX            AP( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CSPTRI computes the inverse of a complex symmetric indefinite matrix
+*  A in packed storage using the factorization A = U*D*U**T or
+*  A = L*D*L**T computed by CSPTRF.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the details of the factorization are stored
+*          as an upper or lower triangular matrix.
+*          = 'U':  Upper triangular, form is A = U*D*U**T;
+*          = 'L':  Lower triangular, form is A = L*D*L**T.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  AP      (input/output) COMPLEX array, dimension (N*(N+1)/2)
+*          On entry, the block diagonal matrix D and the multipliers
+*          used to obtain the factor U or L as computed by CSPTRF,
+*          stored as a packed triangular matrix.
+*
+*          On exit, if INFO = 0, the (symmetric) inverse of the original
+*          matrix, stored as a packed triangular matrix. The j-th column
+*          of inv(A) is stored in the array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = inv(A)(i,j) for 1<=i<=j;
+*          if UPLO = 'L',
+*             AP(i + (j-1)*(2n-j)/2) = inv(A)(i,j) for j<=i<=n.
+*
+*  IPIV    (input) INTEGER array, dimension (N)
+*          Details of the interchanges and the block structure of D
+*          as determined by CSPTRF.
+*
+*  WORK    (workspace) COMPLEX array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*          > 0: if INFO = i, D(i,i) = 0; the matrix is singular and its
+*               inverse could not be computed.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX            ONE, ZERO
+      PARAMETER          ( ONE = ( 1.0E+0, 0.0E+0 ),
+     $                   ZERO = ( 0.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            J, K, KC, KCNEXT, KP, KPC, KSTEP, KX, NPP
+      COMPLEX            AK, AKKP1, AKP1, D, T, TEMP
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      COMPLEX            CDOTU
+      EXTERNAL           LSAME, CDOTU
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CCOPY, CSPMV, CSWAP, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CSPTRI', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Check that the diagonal matrix D is nonsingular.
+*
+      IF( UPPER ) THEN
+*
+*        Upper triangular storage: examine D from bottom to top
+*
+         KP = N*( N+1 ) / 2
+         DO 10 INFO = N, 1, -1
+            IF( IPIV( INFO ).GT.0 .AND. AP( KP ).EQ.ZERO )
+     $         RETURN
+            KP = KP - INFO
+   10    CONTINUE
+      ELSE
+*
+*        Lower triangular storage: examine D from top to bottom.
+*
+         KP = 1
+         DO 20 INFO = 1, N
+            IF( IPIV( INFO ).GT.0 .AND. AP( KP ).EQ.ZERO )
+     $         RETURN
+            KP = KP + N - INFO + 1
+   20    CONTINUE
+      END IF
+      INFO = 0
+*
+      IF( UPPER ) THEN
+*
+*        Compute inv(A) from the factorization A = U*D*U'.
+*
+*        K is the main loop index, increasing from 1 to N in steps of
+*        1 or 2, depending on the size of the diagonal blocks.
+*
+         K = 1
+         KC = 1
+   30    CONTINUE
+*
+*        If K > N, exit from loop.
+*
+         IF( K.GT.N )
+     $      GO TO 50
+*
+         KCNEXT = KC + K
+         IF( IPIV( K ).GT.0 ) THEN
+*
+*           1 x 1 diagonal block
+*
+*           Invert the diagonal block.
+*
+            AP( KC+K-1 ) = ONE / AP( KC+K-1 )
+*
+*           Compute column K of the inverse.
+*
+            IF( K.GT.1 ) THEN
+               CALL CCOPY( K-1, AP( KC ), 1, WORK, 1 )
+               CALL CSPMV( UPLO, K-1, -ONE, AP, WORK, 1, ZERO, AP( KC ),
+     $                     1 )
+               AP( KC+K-1 ) = AP( KC+K-1 ) -
+     $                        CDOTU( K-1, WORK, 1, AP( KC ), 1 )
+            END IF
+            KSTEP = 1
+         ELSE
+*
+*           2 x 2 diagonal block
+*
+*           Invert the diagonal block.
+*
+            T = AP( KCNEXT+K-1 )
+            AK = AP( KC+K-1 ) / T
+            AKP1 = AP( KCNEXT+K ) / T
+            AKKP1 = AP( KCNEXT+K-1 ) / T
+            D = T*( AK*AKP1-ONE )
+            AP( KC+K-1 ) = AKP1 / D
+            AP( KCNEXT+K ) = AK / D
+            AP( KCNEXT+K-1 ) = -AKKP1 / D
+*
+*           Compute columns K and K+1 of the inverse.
+*
+            IF( K.GT.1 ) THEN
+               CALL CCOPY( K-1, AP( KC ), 1, WORK, 1 )
+               CALL CSPMV( UPLO, K-1, -ONE, AP, WORK, 1, ZERO, AP( KC ),
+     $                     1 )
+               AP( KC+K-1 ) = AP( KC+K-1 ) -
+     $                        CDOTU( K-1, WORK, 1, AP( KC ), 1 )
+               AP( KCNEXT+K-1 ) = AP( KCNEXT+K-1 ) -
+     $                            CDOTU( K-1, AP( KC ), 1, AP( KCNEXT ),
+     $                            1 )
+               CALL CCOPY( K-1, AP( KCNEXT ), 1, WORK, 1 )
+               CALL CSPMV( UPLO, K-1, -ONE, AP, WORK, 1, ZERO,
+     $                     AP( KCNEXT ), 1 )
+               AP( KCNEXT+K ) = AP( KCNEXT+K ) -
+     $                          CDOTU( K-1, WORK, 1, AP( KCNEXT ), 1 )
+            END IF
+            KSTEP = 2
+            KCNEXT = KCNEXT + K + 1
+         END IF
+*
+         KP = ABS( IPIV( K ) )
+         IF( KP.NE.K ) THEN
+*
+*           Interchange rows and columns K and KP in the leading
+*           submatrix A(1:k+1,1:k+1)
+*
+            KPC = ( KP-1 )*KP / 2 + 1
+            CALL CSWAP( KP-1, AP( KC ), 1, AP( KPC ), 1 )
+            KX = KPC + KP - 1
+            DO 40 J = KP + 1, K - 1
+               KX = KX + J - 1
+               TEMP = AP( KC+J-1 )
+               AP( KC+J-1 ) = AP( KX )
+               AP( KX ) = TEMP
+   40       CONTINUE
+            TEMP = AP( KC+K-1 )
+            AP( KC+K-1 ) = AP( KPC+KP-1 )
+            AP( KPC+KP-1 ) = TEMP
+            IF( KSTEP.EQ.2 ) THEN
+               TEMP = AP( KC+K+K-1 )
+               AP( KC+K+K-1 ) = AP( KC+K+KP-1 )
+               AP( KC+K+KP-1 ) = TEMP
+            END IF
+         END IF
+*
+         K = K + KSTEP
+         KC = KCNEXT
+         GO TO 30
+   50    CONTINUE
+*
+      ELSE
+*
+*        Compute inv(A) from the factorization A = L*D*L'.
+*
+*        K is the main loop index, increasing from 1 to N in steps of
+*        1 or 2, depending on the size of the diagonal blocks.
+*
+         NPP = N*( N+1 ) / 2
+         K = N
+         KC = NPP
+   60    CONTINUE
+*
+*        If K < 1, exit from loop.
+*
+         IF( K.LT.1 )
+     $      GO TO 80
+*
+         KCNEXT = KC - ( N-K+2 )
+         IF( IPIV( K ).GT.0 ) THEN
+*
+*           1 x 1 diagonal block
+*
+*           Invert the diagonal block.
+*
+            AP( KC ) = ONE / AP( KC )
+*
+*           Compute column K of the inverse.
+*
+            IF( K.LT.N ) THEN
+               CALL CCOPY( N-K, AP( KC+1 ), 1, WORK, 1 )
+               CALL CSPMV( UPLO, N-K, -ONE, AP( KC+N-K+1 ), WORK, 1,
+     $                     ZERO, AP( KC+1 ), 1 )
+               AP( KC ) = AP( KC ) - CDOTU( N-K, WORK, 1, AP( KC+1 ),
+     $                    1 )
+            END IF
+            KSTEP = 1
+         ELSE
+*
+*           2 x 2 diagonal block
+*
+*           Invert the diagonal block.
+*
+            T = AP( KCNEXT+1 )
+            AK = AP( KCNEXT ) / T
+            AKP1 = AP( KC ) / T
+            AKKP1 = AP( KCNEXT+1 ) / T
+            D = T*( AK*AKP1-ONE )
+            AP( KCNEXT ) = AKP1 / D
+            AP( KC ) = AK / D
+            AP( KCNEXT+1 ) = -AKKP1 / D
+*
+*           Compute columns K-1 and K of the inverse.
+*
+            IF( K.LT.N ) THEN
+               CALL CCOPY( N-K, AP( KC+1 ), 1, WORK, 1 )
+               CALL CSPMV( UPLO, N-K, -ONE, AP( KC+( N-K+1 ) ), WORK, 1,
+     $                     ZERO, AP( KC+1 ), 1 )
+               AP( KC ) = AP( KC ) - CDOTU( N-K, WORK, 1, AP( KC+1 ),
+     $                    1 )
+               AP( KCNEXT+1 ) = AP( KCNEXT+1 ) -
+     $                          CDOTU( N-K, AP( KC+1 ), 1,
+     $                          AP( KCNEXT+2 ), 1 )
+               CALL CCOPY( N-K, AP( KCNEXT+2 ), 1, WORK, 1 )
+               CALL CSPMV( UPLO, N-K, -ONE, AP( KC+( N-K+1 ) ), WORK, 1,
+     $                     ZERO, AP( KCNEXT+2 ), 1 )
+               AP( KCNEXT ) = AP( KCNEXT ) -
+     $                        CDOTU( N-K, WORK, 1, AP( KCNEXT+2 ), 1 )
+            END IF
+            KSTEP = 2
+            KCNEXT = KCNEXT - ( N-K+3 )
+         END IF
+*
+         KP = ABS( IPIV( K ) )
+         IF( KP.NE.K ) THEN
+*
+*           Interchange rows and columns K and KP in the trailing
+*           submatrix A(k-1:n,k-1:n)
+*
+            KPC = NPP - ( N-KP+1 )*( N-KP+2 ) / 2 + 1
+            IF( KP.LT.N )
+     $         CALL CSWAP( N-KP, AP( KC+KP-K+1 ), 1, AP( KPC+1 ), 1 )
+            KX = KC + KP - K
+            DO 70 J = K + 1, KP - 1
+               KX = KX + N - J + 1
+               TEMP = AP( KC+J-K )
+               AP( KC+J-K ) = AP( KX )
+               AP( KX ) = TEMP
+   70       CONTINUE
+            TEMP = AP( KC )
+            AP( KC ) = AP( KPC )
+            AP( KPC ) = TEMP
+            IF( KSTEP.EQ.2 ) THEN
+               TEMP = AP( KC-N+K-1 )
+               AP( KC-N+K-1 ) = AP( KC-N+KP-1 )
+               AP( KC-N+KP-1 ) = TEMP
+            END IF
+         END IF
+*
+         K = K - KSTEP
+         KC = KCNEXT
+         GO TO 60
+   80    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of CSPTRI
+*
+      END
diff --git a/costa/native/external/lapack/csptrs.f b/costa/native/external/lapack/csptrs.f
new file mode 100644
index 000000000..0d5219091
--- /dev/null
+++ b/costa/native/external/lapack/csptrs.f
@@ -0,0 +1,378 @@
+      SUBROUTINE CSPTRS( UPLO, N, NRHS, AP, IPIV, B, LDB, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDB, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      COMPLEX            AP( * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CSPTRS solves a system of linear equations A*X = B with a complex
+*  symmetric matrix A stored in packed format using the factorization
+*  A = U*D*U**T or A = L*D*L**T computed by CSPTRF.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the details of the factorization are stored
+*          as an upper or lower triangular matrix.
+*          = 'U':  Upper triangular, form is A = U*D*U**T;
+*          = 'L':  Lower triangular, form is A = L*D*L**T.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  AP      (input) COMPLEX array, dimension (N*(N+1)/2)
+*          The block diagonal matrix D and the multipliers used to
+*          obtain the factor U or L as computed by CSPTRF, stored as a
+*          packed triangular matrix.
+*
+*  IPIV    (input) INTEGER array, dimension (N)
+*          Details of the interchanges and the block structure of D
+*          as determined by CSPTRF.
+*
+*  B       (input/output) COMPLEX array, dimension (LDB,NRHS)
+*          On entry, the right hand side matrix B.
+*          On exit, the solution matrix X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX            ONE
+      PARAMETER          ( ONE = ( 1.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            J, K, KC, KP
+      COMPLEX            AK, AKM1, AKM1K, BK, BKM1, DENOM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CGEMV, CGERU, CSCAL, CSWAP, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CSPTRS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 )
+     $   RETURN
+*
+      IF( UPPER ) THEN
+*
+*        Solve A*X = B, where A = U*D*U'.
+*
+*        First solve U*D*X = B, overwriting B with X.
+*
+*        K is the main loop index, decreasing from N to 1 in steps of
+*        1 or 2, depending on the size of the diagonal blocks.
+*
+         K = N
+         KC = N*( N+1 ) / 2 + 1
+   10    CONTINUE
+*
+*        If K < 1, exit from loop.
+*
+         IF( K.LT.1 )
+     $      GO TO 30
+*
+         KC = KC - K
+         IF( IPIV( K ).GT.0 ) THEN
+*
+*           1 x 1 diagonal block
+*
+*           Interchange rows K and IPIV(K).
+*
+            KP = IPIV( K )
+            IF( KP.NE.K )
+     $         CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB )
+*
+*           Multiply by inv(U(K)), where U(K) is the transformation
+*           stored in column K of A.
+*
+            CALL CGERU( K-1, NRHS, -ONE, AP( KC ), 1, B( K, 1 ), LDB,
+     $                  B( 1, 1 ), LDB )
+*
+*           Multiply by the inverse of the diagonal block.
+*
+            CALL CSCAL( NRHS, ONE / AP( KC+K-1 ), B( K, 1 ), LDB )
+            K = K - 1
+         ELSE
+*
+*           2 x 2 diagonal block
+*
+*           Interchange rows K-1 and -IPIV(K).
+*
+            KP = -IPIV( K )
+            IF( KP.NE.K-1 )
+     $         CALL CSWAP( NRHS, B( K-1, 1 ), LDB, B( KP, 1 ), LDB )
+*
+*           Multiply by inv(U(K)), where U(K) is the transformation
+*           stored in columns K-1 and K of A.
+*
+            CALL CGERU( K-2, NRHS, -ONE, AP( KC ), 1, B( K, 1 ), LDB,
+     $                  B( 1, 1 ), LDB )
+            CALL CGERU( K-2, NRHS, -ONE, AP( KC-( K-1 ) ), 1,
+     $                  B( K-1, 1 ), LDB, B( 1, 1 ), LDB )
+*
+*           Multiply by the inverse of the diagonal block.
+*
+            AKM1K = AP( KC+K-2 )
+            AKM1 = AP( KC-1 ) / AKM1K
+            AK = AP( KC+K-1 ) / AKM1K
+            DENOM = AKM1*AK - ONE
+            DO 20 J = 1, NRHS
+               BKM1 = B( K-1, J ) / AKM1K
+               BK = B( K, J ) / AKM1K
+               B( K-1, J ) = ( AK*BKM1-BK ) / DENOM
+               B( K, J ) = ( AKM1*BK-BKM1 ) / DENOM
+   20       CONTINUE
+            KC = KC - K + 1
+            K = K - 2
+         END IF
+*
+         GO TO 10
+   30    CONTINUE
+*
+*        Next solve U'*X = B, overwriting B with X.
+*
+*        K is the main loop index, increasing from 1 to N in steps of
+*        1 or 2, depending on the size of the diagonal blocks.
+*
+         K = 1
+         KC = 1
+   40    CONTINUE
+*
+*        If K > N, exit from loop.
+*
+         IF( K.GT.N )
+     $      GO TO 50
+*
+         IF( IPIV( K ).GT.0 ) THEN
+*
+*           1 x 1 diagonal block
+*
+*           Multiply by inv(U'(K)), where U(K) is the transformation
+*           stored in column K of A.
+*
+            CALL CGEMV( 'Transpose', K-1, NRHS, -ONE, B, LDB, AP( KC ),
+     $                  1, ONE, B( K, 1 ), LDB )
+*
+*           Interchange rows K and IPIV(K).
+*
+            KP = IPIV( K )
+            IF( KP.NE.K )
+     $         CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB )
+            KC = KC + K
+            K = K + 1
+         ELSE
+*
+*           2 x 2 diagonal block
+*
+*           Multiply by inv(U'(K+1)), where U(K+1) is the transformation
+*           stored in columns K and K+1 of A.
+*
+            CALL CGEMV( 'Transpose', K-1, NRHS, -ONE, B, LDB, AP( KC ),
+     $                  1, ONE, B( K, 1 ), LDB )
+            CALL CGEMV( 'Transpose', K-1, NRHS, -ONE, B, LDB,
+     $                  AP( KC+K ), 1, ONE, B( K+1, 1 ), LDB )
+*
+*           Interchange rows K and -IPIV(K).
+*
+            KP = -IPIV( K )
+            IF( KP.NE.K )
+     $         CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB )
+            KC = KC + 2*K + 1
+            K = K + 2
+         END IF
+*
+         GO TO 40
+   50    CONTINUE
+*
+      ELSE
+*
+*        Solve A*X = B, where A = L*D*L'.
+*
+*        First solve L*D*X = B, overwriting B with X.
+*
+*        K is the main loop index, increasing from 1 to N in steps of
+*        1 or 2, depending on the size of the diagonal blocks.
+*
+         K = 1
+         KC = 1
+   60    CONTINUE
+*
+*        If K > N, exit from loop.
+*
+         IF( K.GT.N )
+     $      GO TO 80
+*
+         IF( IPIV( K ).GT.0 ) THEN
+*
+*           1 x 1 diagonal block
+*
+*           Interchange rows K and IPIV(K).
+*
+            KP = IPIV( K )
+            IF( KP.NE.K )
+     $         CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB )
+*
+*           Multiply by inv(L(K)), where L(K) is the transformation
+*           stored in column K of A.
+*
+            IF( K.LT.N )
+     $         CALL CGERU( N-K, NRHS, -ONE, AP( KC+1 ), 1, B( K, 1 ),
+     $                     LDB, B( K+1, 1 ), LDB )
+*
+*           Multiply by the inverse of the diagonal block.
+*
+            CALL CSCAL( NRHS, ONE / AP( KC ), B( K, 1 ), LDB )
+            KC = KC + N - K + 1
+            K = K + 1
+         ELSE
+*
+*           2 x 2 diagonal block
+*
+*           Interchange rows K+1 and -IPIV(K).
+*
+            KP = -IPIV( K )
+            IF( KP.NE.K+1 )
+     $         CALL CSWAP( NRHS, B( K+1, 1 ), LDB, B( KP, 1 ), LDB )
+*
+*           Multiply by inv(L(K)), where L(K) is the transformation
+*           stored in columns K and K+1 of A.
+*
+            IF( K.LT.N-1 ) THEN
+               CALL CGERU( N-K-1, NRHS, -ONE, AP( KC+2 ), 1, B( K, 1 ),
+     $                     LDB, B( K+2, 1 ), LDB )
+               CALL CGERU( N-K-1, NRHS, -ONE, AP( KC+N-K+2 ), 1,
+     $                     B( K+1, 1 ), LDB, B( K+2, 1 ), LDB )
+            END IF
+*
+*           Multiply by the inverse of the diagonal block.
+*
+            AKM1K = AP( KC+1 )
+            AKM1 = AP( KC ) / AKM1K
+            AK = AP( KC+N-K+1 ) / AKM1K
+            DENOM = AKM1*AK - ONE
+            DO 70 J = 1, NRHS
+               BKM1 = B( K, J ) / AKM1K
+               BK = B( K+1, J ) / AKM1K
+               B( K, J ) = ( AK*BKM1-BK ) / DENOM
+               B( K+1, J ) = ( AKM1*BK-BKM1 ) / DENOM
+   70       CONTINUE
+            KC = KC + 2*( N-K ) + 1
+            K = K + 2
+         END IF
+*
+         GO TO 60
+   80    CONTINUE
+*
+*        Next solve L'*X = B, overwriting B with X.
+*
+*        K is the main loop index, decreasing from N to 1 in steps of
+*        1 or 2, depending on the size of the diagonal blocks.
+*
+         K = N
+         KC = N*( N+1 ) / 2 + 1
+   90    CONTINUE
+*
+*        If K < 1, exit from loop.
+*
+         IF( K.LT.1 )
+     $      GO TO 100
+*
+         KC = KC - ( N-K+1 )
+         IF( IPIV( K ).GT.0 ) THEN
+*
+*           1 x 1 diagonal block
+*
+*           Multiply by inv(L'(K)), where L(K) is the transformation
+*           stored in column K of A.
+*
+            IF( K.LT.N )
+     $         CALL CGEMV( 'Transpose', N-K, NRHS, -ONE, B( K+1, 1 ),
+     $                     LDB, AP( KC+1 ), 1, ONE, B( K, 1 ), LDB )
+*
+*           Interchange rows K and IPIV(K).
+*
+            KP = IPIV( K )
+            IF( KP.NE.K )
+     $         CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB )
+            K = K - 1
+         ELSE
+*
+*           2 x 2 diagonal block
+*
+*           Multiply by inv(L'(K-1)), where L(K-1) is the transformation
+*           stored in columns K-1 and K of A.
+*
+            IF( K.LT.N ) THEN
+               CALL CGEMV( 'Transpose', N-K, NRHS, -ONE, B( K+1, 1 ),
+     $                     LDB, AP( KC+1 ), 1, ONE, B( K, 1 ), LDB )
+               CALL CGEMV( 'Transpose', N-K, NRHS, -ONE, B( K+1, 1 ),
+     $                     LDB, AP( KC-( N-K ) ), 1, ONE, B( K-1, 1 ),
+     $                     LDB )
+            END IF
+*
+*           Interchange rows K and -IPIV(K).
+*
+            KP = -IPIV( K )
+            IF( KP.NE.K )
+     $         CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB )
+            KC = KC - ( N-K+2 )
+            K = K - 2
+         END IF
+*
+         GO TO 90
+  100    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of CSPTRS
+*
+      END
diff --git a/costa/native/external/lapack/csrot.f b/costa/native/external/lapack/csrot.f
new file mode 100644
index 000000000..f0a53ed7f
--- /dev/null
+++ b/costa/native/external/lapack/csrot.f
@@ -0,0 +1,53 @@
+      SUBROUTINE CSROT( N, CX, INCX, CY, INCY, C, S )
+*
+*     applies a plane rotation, where the cos and sin (c and s) are real
+*     and the vectors cx and cy are complex.
+*     jack dongarra, linpack, 3/11/78.
+*
+*  =====================================================================
+*
+*     .. Scalar Arguments ..
+      INTEGER           INCX, INCY, N
+      REAL              C, S
+*     ..
+*     .. Array Arguments ..
+      COMPLEX           CX( * ), CY( * )
+*     ..
+*     .. Local Scalars ..
+      INTEGER           I, IX, IY
+      COMPLEX           CTEMP
+*     ..
+*     .. Executable Statements ..
+*
+      IF( N.LE.0 )
+     $   RETURN
+      IF( INCX.EQ.1 .AND. INCY.EQ.1 )
+     $   GO TO 20
+*
+*        code for unequal increments or equal increments not equal
+*          to 1
+*
+      IX = 1
+      IY = 1
+      IF( INCX.LT.0 )
+     $   IX = ( -N+1 )*INCX + 1
+      IF( INCY.LT.0 )
+     $   IY = ( -N+1 )*INCY + 1
+      DO 10 I = 1, N
+         CTEMP = C*CX( IX ) + S*CY( IY )
+         CY( IY ) = C*CY( IY ) - S*CX( IX )
+         CX( IX ) = CTEMP
+         IX = IX + INCX
+         IY = IY + INCY
+   10 CONTINUE
+      RETURN
+*
+*        code for both increments equal to 1
+*
+   20 DO 30 I = 1, N
+         CTEMP = C*CX( I ) + S*CY( I )
+         CY( I ) = C*CY( I ) - S*CX( I )
+         CX( I ) = CTEMP
+   30 CONTINUE
+      RETURN
+      END
diff --git a/costa/native/external/lapack/csrscl.f b/costa/native/external/lapack/csrscl.f
new file mode 100644
index 000000000..9789e0000
--- /dev/null
+++ b/costa/native/external/lapack/csrscl.f
@@ -0,0 +1,115 @@
+      SUBROUTINE CSRSCL( N, SA, SX, INCX )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      INTEGER            INCX, N
+      REAL               SA
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            SX( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CSRSCL multiplies an n-element complex vector x by the real scalar
+*  1/a.  This is done without overflow or underflow as long as
+*  the final result x/a does not overflow or underflow.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The number of components of the vector x.
+*
+*  SA      (input) REAL
+*          The scalar a which is used to divide each component of x.
+*          SA must be >= 0, or the subroutine will divide by zero.
+*
+*  SX      (input/output) COMPLEX array, dimension
+*                         (1+(N-1)*abs(INCX))
+*          The n-element vector x.
+*
+*  INCX    (input) INTEGER
+*          The increment between successive values of the vector SX.
+*          > 0:  SX(1) = X(1) and SX(1+(i-1)*INCX) = x(i),     1< i<= n
+*
+* =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            DONE
+      REAL               BIGNUM, CDEN, CDEN1, CNUM, CNUM1, MUL, SMLNUM
+*     ..
+*     .. External Functions ..
+      REAL               SLAMCH
+      EXTERNAL           SLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CSSCAL, SLABAD
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS
+*     ..
+*     .. Executable Statements ..
+*
+*     Quick return if possible
+*
+      IF( N.LE.0 )
+     $   RETURN
+*
+*     Get machine parameters
+*
+      SMLNUM = SLAMCH( 'S' )
+      BIGNUM = ONE / SMLNUM
+      CALL SLABAD( SMLNUM, BIGNUM )
+*
+*     Initialize the denominator to SA and the numerator to 1.
+*
+      CDEN = SA
+      CNUM = ONE
+*
+   10 CONTINUE
+      CDEN1 = CDEN*SMLNUM
+      CNUM1 = CNUM / BIGNUM
+      IF( ABS( CDEN1 ).GT.ABS( CNUM ) .AND. CNUM.NE.ZERO ) THEN
+*
+*        Pre-multiply X by SMLNUM if CDEN is large compared to CNUM.
+*
+         MUL = SMLNUM
+         DONE = .FALSE.
+         CDEN = CDEN1
+      ELSE IF( ABS( CNUM1 ).GT.ABS( CDEN ) ) THEN
+*
+*        Pre-multiply X by BIGNUM if CDEN is small compared to CNUM.
+*
+         MUL = BIGNUM
+         DONE = .FALSE.
+         CNUM = CNUM1
+      ELSE
+*
+*        Multiply X by CNUM / CDEN and return.
+*
+         MUL = CNUM / CDEN
+         DONE = .TRUE.
+      END IF
+*
+*     Scale the vector X by MUL
+*
+      CALL CSSCAL( N, MUL, SX, INCX )
+*
+      IF( .NOT.DONE )
+     $   GO TO 10
+*
+      RETURN
+*
+*     End of CSRSCL
+*
+      END
diff --git a/costa/native/external/lapack/cstedc.f b/costa/native/external/lapack/cstedc.f
new file mode 100644
index 000000000..e0b166542
--- /dev/null
+++ b/costa/native/external/lapack/cstedc.f
@@ -0,0 +1,390 @@
+      SUBROUTINE CSTEDC( COMPZ, N, D, E, Z, LDZ, WORK, LWORK, RWORK,
+     $                   LRWORK, IWORK, LIWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          COMPZ
+      INTEGER            INFO, LDZ, LIWORK, LRWORK, LWORK, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      REAL               D( * ), E( * ), RWORK( * )
+      COMPLEX            WORK( * ), Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CSTEDC computes all eigenvalues and, optionally, eigenvectors of a
+*  symmetric tridiagonal matrix using the divide and conquer method.
+*  The eigenvectors of a full or band complex Hermitian matrix can also
+*  be found if CHETRD or CHPTRD or CHBTRD has been used to reduce this
+*  matrix to tridiagonal form.
+*
+*  This code makes very mild assumptions about floating point
+*  arithmetic. It will work on machines with a guard digit in
+*  add/subtract, or on those binary machines without guard digits
+*  which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2.
+*  It could conceivably fail on hexadecimal or decimal machines
+*  without guard digits, but we know of none.  See SLAED3 for details.
+*
+*  Arguments
+*  =========
+*
+*  COMPZ   (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only.
+*          = 'I':  Compute eigenvectors of tridiagonal matrix also.
+*          = 'V':  Compute eigenvectors of original Hermitian matrix
+*                  also.  On entry, Z contains the unitary matrix used
+*                  to reduce the original matrix to tridiagonal form.
+*
+*  N       (input) INTEGER
+*          The dimension of the symmetric tridiagonal matrix.  N >= 0.
+*
+*  D       (input/output) REAL array, dimension (N)
+*          On entry, the diagonal elements of the tridiagonal matrix.
+*          On exit, if INFO = 0, the eigenvalues in ascending order.
+*
+*  E       (input/output) REAL array, dimension (N-1)
+*          On entry, the subdiagonal elements of the tridiagonal matrix.
+*          On exit, E has been destroyed.
+*
+*  Z       (input/output) COMPLEX array, dimension (LDZ,N)
+*          On entry, if COMPZ = 'V', then Z contains the unitary
+*          matrix used in the reduction to tridiagonal form.
+*          On exit, if INFO = 0, then if COMPZ = 'V', Z contains the
+*          orthonormal eigenvectors of the original Hermitian matrix,
+*          and if COMPZ = 'I', Z contains the orthonormal eigenvectors
+*          of the symmetric tridiagonal matrix.
+*          If  COMPZ = 'N', then Z is not referenced.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= 1.
+*          If eigenvectors are desired, then LDZ >= max(1,N).
+*
+*  WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.
+*          If COMPZ = 'N' or 'I', or N <= 1, LWORK must be at least 1.
+*          If COMPZ = 'V' and N > 1, LWORK must be at least N*N.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  RWORK   (workspace/output) REAL array,
+*                                         dimension (LRWORK)
+*          On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK.
+*
+*  LRWORK  (input) INTEGER
+*          The dimension of the array RWORK.
+*          If COMPZ = 'N' or N <= 1, LRWORK must be at least 1.
+*          If COMPZ = 'V' and N > 1, LRWORK must be at least
+*                         1 + 3*N + 2*N*lg N + 3*N**2 ,
+*                         where lg( N ) = smallest integer k such
+*                         that 2**k >= N.
+*          If COMPZ = 'I' and N > 1, LRWORK must be at least
+*                         1 + 4*N + 2*N**2 .
+*
+*          If LRWORK = -1, then a workspace query is assumed; the
+*          routine only calculates the optimal size of the RWORK array,
+*          returns this value as the first entry of the RWORK array, and
+*          no error message related to LRWORK is issued by XERBLA.
+*
+*  IWORK   (workspace/output) INTEGER array, dimension (LIWORK)
+*          On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK.
+*
+*  LIWORK  (input) INTEGER
+*          The dimension of the array IWORK.
+*          If COMPZ = 'N' or N <= 1, LIWORK must be at least 1.
+*          If COMPZ = 'V' or N > 1,  LIWORK must be at least
+*                                    6 + 6*N + 5*N*lg N.
+*          If COMPZ = 'I' or N > 1,  LIWORK must be at least
+*                                    3 + 5*N .
+*
+*          If LIWORK = -1, then a workspace query is assumed; the
+*          routine only calculates the optimal size of the IWORK array,
+*          returns this value as the first entry of the IWORK array, and
+*          no error message related to LIWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          > 0:  The algorithm failed to compute an eigenvalue while
+*                working on the submatrix lying in rows and columns
+*                INFO/(N+1) through mod(INFO,N+1).
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Jeff Rutter, Computer Science Division, University of California
+*     at Berkeley, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE, TWO
+      PARAMETER          ( ZERO = 0.0E0, ONE = 1.0E0, TWO = 2.0E0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            END, I, ICOMPZ, II, J, K, LGN, LIWMIN, LL,
+     $                   LRWMIN, LWMIN, M, SMLSIZ, START
+      REAL               EPS, ORGNRM, P, TINY
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      REAL               SLAMCH, SLANST
+      EXTERNAL           ILAENV, LSAME, SLAMCH, SLANST
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CLACPY, CLACRM, CLAED0, CSTEQR, CSWAP, SLASCL,
+     $                   SLASET, SSTEDC, SSTEQR, SSTERF, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, INT, LOG, MAX, MOD, REAL, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      LQUERY = ( LWORK.EQ.-1 .OR. LRWORK.EQ.-1 .OR. LIWORK.EQ.-1 )
+*
+      IF( LSAME( COMPZ, 'N' ) ) THEN
+         ICOMPZ = 0
+      ELSE IF( LSAME( COMPZ, 'V' ) ) THEN
+         ICOMPZ = 1
+      ELSE IF( LSAME( COMPZ, 'I' ) ) THEN
+         ICOMPZ = 2
+      ELSE
+         ICOMPZ = -1
+      END IF
+      IF( N.LE.1 .OR. ICOMPZ.LE.0 ) THEN
+         LWMIN = 1
+         LIWMIN = 1
+         LRWMIN = 1
+      ELSE
+         LGN = INT( LOG( REAL( N ) ) / LOG( TWO ) )
+         IF( 2**LGN.LT.N )
+     $      LGN = LGN + 1
+         IF( 2**LGN.LT.N )
+     $      LGN = LGN + 1
+         IF( ICOMPZ.EQ.1 ) THEN
+            LWMIN = N*N
+            LRWMIN = 1 + 3*N + 2*N*LGN + 3*N**2
+            LIWMIN = 6 + 6*N + 5*N*LGN
+         ELSE IF( ICOMPZ.EQ.2 ) THEN
+            LWMIN = 1
+            LRWMIN = 1 + 4*N + 2*N**2
+            LIWMIN = 3 + 5*N
+         END IF
+      END IF
+      IF( ICOMPZ.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( ( LDZ.LT.1 ) .OR. ( ICOMPZ.GT.0 .AND. LDZ.LT.MAX( 1,
+     $         N ) ) ) THEN
+         INFO = -6
+      ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -8
+      ELSE IF( LRWORK.LT.LRWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -10
+      ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -12
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         WORK( 1 ) = LWMIN
+         RWORK( 1 ) = LRWMIN
+         IWORK( 1 ) = LIWMIN
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CSTEDC', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN 
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN 
+      IF( N.EQ.1 ) THEN
+         IF( ICOMPZ.NE.0 )
+     $      Z( 1, 1 ) = ONE
+         RETURN 
+      END IF
+*
+      SMLSIZ = ILAENV( 9, 'CSTEDC', ' ', 0, 0, 0, 0 )
+*
+*     If the following conditional clause is removed, then the routine
+*     will use the Divide and Conquer routine to compute only the
+*     eigenvalues, which requires (3N + 3N**2) real workspace and
+*     (2 + 5N + 2N lg(N)) integer workspace.
+*     Since on many architectures SSTERF is much faster than any other
+*     algorithm for finding eigenvalues only, it is used here
+*     as the default.
+*
+*     If COMPZ = 'N', use SSTERF to compute the eigenvalues.
+*
+      IF( ICOMPZ.EQ.0 ) THEN
+         CALL SSTERF( N, D, E, INFO )
+         RETURN 
+      END IF
+*
+*     If N is smaller than the minimum divide size (SMLSIZ+1), then
+*     solve the problem with another solver.
+*
+      IF( N.LE.SMLSIZ ) THEN
+         IF( ICOMPZ.EQ.0 ) THEN
+            CALL SSTERF( N, D, E, INFO )
+            RETURN 
+         ELSE IF( ICOMPZ.EQ.2 ) THEN
+            CALL CSTEQR( 'I', N, D, E, Z, LDZ, RWORK, INFO )
+            RETURN 
+         ELSE
+            CALL CSTEQR( 'V', N, D, E, Z, LDZ, RWORK, INFO )
+            RETURN 
+         END IF
+      END IF
+*
+*     If COMPZ = 'I', we simply call SSTEDC instead.
+*
+      IF( ICOMPZ.EQ.2 ) THEN
+         CALL SLASET( 'Full', N, N, ZERO, ONE, RWORK, N )
+         LL = N*N + 1
+         CALL SSTEDC( 'I', N, D, E, RWORK, N, RWORK( LL ), LRWORK-LL+1,
+     $                IWORK, LIWORK, INFO )
+         DO 20 J = 1, N
+            DO 10 I = 1, N
+               Z( I, J ) = RWORK( ( J-1 )*N+I )
+   10       CONTINUE
+   20    CONTINUE
+         RETURN 
+      END IF
+*
+*     From now on, only option left to be handled is COMPZ = 'V',
+*     i.e. ICOMPZ = 1.
+*
+*     Scale.
+*
+      ORGNRM = SLANST( 'M', N, D, E )
+      IF( ORGNRM.EQ.ZERO )
+     $   RETURN
+*
+      EPS = SLAMCH( 'Epsilon' )
+*
+      START = 1
+*
+*     while ( START <= N )
+*
+   30 CONTINUE
+      IF( START.LE.N ) THEN
+*
+*     Let END be the position of the next subdiagonal entry such that
+*     E( END ) <= TINY or END = N if no such subdiagonal exists.  The
+*     matrix identified by the elements between START and END
+*     constitutes an independent sub-problem.
+*
+         END = START
+   40    CONTINUE
+         IF( END.LT.N ) THEN
+            TINY = EPS*SQRT( ABS( D( END ) ) )*SQRT( ABS( D( END+1 ) ) )
+            IF( ABS( E( END ) ).GT.TINY ) THEN
+               END = END + 1
+               GO TO 40
+            END IF
+         END IF
+*
+*        (Sub) Problem determined.  Compute its size and solve it.
+*
+         M = END - START + 1
+         IF( M.GT.SMLSIZ ) THEN
+            INFO = SMLSIZ
+*
+*           Scale.
+*
+            ORGNRM = SLANST( 'M', M, D( START ), E( START ) )
+            CALL SLASCL( 'G', 0, 0, ORGNRM, ONE, M, 1, D( START ), M,
+     $                   INFO )
+            CALL SLASCL( 'G', 0, 0, ORGNRM, ONE, M-1, 1, E( START ),
+     $                   M-1, INFO )
+*
+            CALL CLAED0( N, M, D( START ), E( START ), Z( 1, START ),
+     $                   LDZ, WORK, N, RWORK, IWORK, INFO )
+            IF( INFO.GT.0 ) THEN
+               INFO = ( INFO / ( M+1 )+START-1 )*( N+1 ) +
+     $                MOD( INFO, ( M+1 ) ) + START - 1
+               RETURN 
+            END IF
+*
+*           Scale back.
+*
+            CALL SLASCL( 'G', 0, 0, ONE, ORGNRM, M, 1, D( START ), M,
+     $                   INFO )
+*
+         ELSE
+            CALL SSTEQR( 'I', M, D( START ), E( START ), RWORK, M,
+     $                   RWORK( M*M+1 ), INFO )
+            CALL CLACRM( N, M, Z( 1, START ), LDZ, RWORK, M, WORK, N,
+     $                   RWORK( M*M+1 ) )
+            CALL CLACPY( 'A', N, M, WORK, N, Z( 1, START ), LDZ )
+            IF( INFO.GT.0 ) THEN
+               INFO = START*( N+1 ) + END
+               RETURN 
+            END IF
+         END IF
+*
+         START = END + 1
+         GO TO 30
+      END IF
+*
+*     endwhile
+*
+*     If the problem split any number of times, then the eigenvalues
+*     will not be properly ordered.  Here we permute the eigenvalues
+*     (and the associated eigenvectors) into ascending order.
+*
+      IF( M.NE.N ) THEN
+*
+*        Use Selection Sort to minimize swaps of eigenvectors
+*
+         DO 60 II = 2, N
+            I = II - 1
+            K = I
+            P = D( I )
+            DO 50 J = II, N
+               IF( D( J ).LT.P ) THEN
+                  K = J
+                  P = D( J )
+               END IF
+   50       CONTINUE
+            IF( K.NE.I ) THEN
+               D( K ) = D( I )
+               D( I ) = P
+               CALL CSWAP( N, Z( 1, I ), 1, Z( 1, K ), 1 )
+            END IF
+   60    CONTINUE
+      END IF
+*
+      WORK( 1 ) = LWMIN
+      RWORK( 1 ) = LRWMIN
+      IWORK( 1 ) = LIWMIN
+*
+      RETURN
+*
+*     End of CSTEDC
+*
+      END
diff --git a/costa/native/external/lapack/cstegr.f b/costa/native/external/lapack/cstegr.f
new file mode 100644
index 000000000..da04d0c35
--- /dev/null
+++ b/costa/native/external/lapack/cstegr.f
@@ -0,0 +1,405 @@
+      SUBROUTINE CSTEGR( JOBZ, RANGE, N, D, E, VL, VU, IL, IU, ABSTOL,
+     $                   M, W, Z, LDZ, ISUPPZ, WORK, LWORK, IWORK,
+     $                   LIWORK, INFO )
+*
+*  -- LAPACK computational routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBZ, RANGE
+      INTEGER            IL, INFO, IU, LDZ, LIWORK, LWORK, M, N
+      REAL               ABSTOL, VL, VU
+*     ..
+*     .. Array Arguments ..
+      INTEGER            ISUPPZ( * ), IWORK( * )
+      REAL               D( * ), E( * ), W( * ), WORK( * )
+      COMPLEX            Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+* CSTEGR computes selected eigenvalues and, optionally, eigenvectors
+* of a real symmetric tridiagonal matrix T.  Eigenvalues and
+* eigenvectors can be selected by specifying either a range of values
+* or a range of indices for the desired eigenvalues. The eigenvalues
+* are computed by the dqds algorithm, while orthogonal eigenvectors are
+* computed from various ``good'' L D L^T representations (also known as
+* Relatively Robust Representations). Gram-Schmidt orthogonalization is
+* avoided as far as possible. More specifically, the various steps of
+* the algorithm are as follows. For the i-th unreduced block of T,
+*     (a) Compute T - sigma_i = L_i D_i L_i^T, such that L_i D_i L_i^T
+*         is a relatively robust representation,
+*     (b) Compute the eigenvalues, lambda_j, of L_i D_i L_i^T to high
+*         relative accuracy by the dqds algorithm,
+*     (c) If there is a cluster of close eigenvalues, "choose" sigma_i
+*         close to the cluster, and go to step (a),
+*     (d) Given the approximate eigenvalue lambda_j of L_i D_i L_i^T,
+*         compute the corresponding eigenvector by forming a
+*         rank-revealing twisted factorization.
+*  The desired accuracy of the output can be specified by the input
+*  parameter ABSTOL.
+*
+*  For more details, see "A new O(n^2) algorithm for the symmetric
+*  tridiagonal eigenvalue/eigenvector problem", by Inderjit Dhillon,
+*  Computer Science Division Technical Report No. UCB/CSD-97-971,
+*  UC Berkeley, May 1997.
+*
+*  Note 1 : Currently CSTEGR is only set up to find ALL the n
+*  eigenvalues and eigenvectors of T in O(n^2) time
+*  Note 2 : Currently the routine CSTEIN is called when an appropriate
+*  sigma_i cannot be chosen in step (c) above. CSTEIN invokes modified
+*  Gram-Schmidt when eigenvalues are close.
+*  Note 3 : CSTEGR works only on machines which follow ieee-754
+*  floating-point standard in their handling of infinities and NaNs.
+*  Normal execution of CSTEGR may create NaNs and infinities and hence
+*  may abort due to a floating point exception in environments which
+*  do not conform to the ieee standard.
+*
+*  Arguments
+*  =========
+*
+*  JOBZ    (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only;
+*          = 'V':  Compute eigenvalues and eigenvectors.
+*
+*  RANGE   (input) CHARACTER*1
+*          = 'A': all eigenvalues will be found.
+*          = 'V': all eigenvalues in the half-open interval (VL,VU]
+*                 will be found.
+*          = 'I': the IL-th through IU-th eigenvalues will be found.
+********** Only RANGE = 'A' is currently supported *********************
+*
+*  N       (input) INTEGER
+*          The order of the matrix.  N >= 0.
+*
+*  D       (input/output) REAL array, dimension (N)
+*          On entry, the n diagonal elements of the tridiagonal matrix
+*          T. On exit, D is overwritten.
+*
+*  E       (input/output) REAL array, dimension (N)
+*          On entry, the (n-1) subdiagonal elements of the tridiagonal
+*          matrix T in elements 1 to N-1 of E; E(N) need not be set.
+*          On exit, E is overwritten.
+*
+*  VL      (input) REAL
+*  VU      (input) REAL
+*          If RANGE='V', the lower and upper bounds of the interval to
+*          be searched for eigenvalues. VL < VU.
+*          Not referenced if RANGE = 'A' or 'I'.
+*
+*  IL      (input) INTEGER
+*  IU      (input) INTEGER
+*          If RANGE='I', the indices (in ascending order) of the
+*          smallest and largest eigenvalues to be returned.
+*          1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0.
+*          Not referenced if RANGE = 'A' or 'V'.
+*
+*  ABSTOL  (input) REAL
+*          The absolute error tolerance for the
+*          eigenvalues/eigenvectors. IF JOBZ = 'V', the eigenvalues and
+*          eigenvectors output have residual norms bounded by ABSTOL,
+*          and the dot products between different eigenvectors are
+*          bounded by ABSTOL. If ABSTOL is less than N*EPS*|T|, then
+*          N*EPS*|T| will be used in its place, where EPS is the
+*          machine precision and |T| is the 1-norm of the tridiagonal
+*          matrix. The eigenvalues are computed to an accuracy of
+*          EPS*|T| irrespective of ABSTOL. If high relative accuracy
+*          is important, set ABSTOL to DLAMCH( 'Safe minimum' ).
+*          See Barlow and Demmel "Computing Accurate Eigensystems of
+*          Scaled Diagonally Dominant Matrices", LAPACK Working Note #7
+*          for a discussion of which matrices define their eigenvalues
+*          to high relative accuracy.
+*
+*  M       (output) INTEGER
+*          The total number of eigenvalues found.  0 <= M <= N.
+*          If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1.
+*
+*  W       (output) REAL array, dimension (N)
+*          The first M elements contain the selected eigenvalues in
+*          ascending order.
+*
+*  Z       (output) COMPLEX array, dimension (LDZ, max(1,M) )
+*          If JOBZ = 'V', then if INFO = 0, the first M columns of Z
+*          contain the orthonormal eigenvectors of the matrix T
+*          corresponding to the selected eigenvalues, with the i-th
+*          column of Z holding the eigenvector associated with W(i).
+*          If JOBZ = 'N', then Z is not referenced.
+*          Note: the user must ensure that at least max(1,M) columns are
+*          supplied in the array Z; if RANGE = 'V', the exact value of M
+*          is not known in advance and an upper bound must be used.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= 1, and if
+*          JOBZ = 'V', LDZ >= max(1,N).
+*
+*  ISUPPZ  (output) INTEGER ARRAY, dimension ( 2*max(1,M) )
+*          The support of the eigenvectors in Z, i.e., the indices
+*          indicating the nonzero elements in Z. The i-th eigenvector
+*          is nonzero only in elements ISUPPZ( 2*i-1 ) through
+*          ISUPPZ( 2*i ).
+*
+*  WORK    (workspace/output) REAL array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal
+*          (and minimal) LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.  LWORK >= max(1,18*N)
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  IWORK   (workspace/output) INTEGER array, dimension (LIWORK)
+*          On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK.
+*
+*  LIWORK  (input) INTEGER
+*          The dimension of the array IWORK.  LIWORK >= max(1,10*N)
+*
+*          If LIWORK = -1, then a workspace query is assumed; the
+*          routine only calculates the optimal size of the IWORK array,
+*          returns this value as the first entry of the IWORK array, and
+*          no error message related to LIWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = 1, internal error in SLARRE,
+*                if INFO = 2, internal error in CLARRV.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Inderjit Dhillon, IBM Almaden, USA
+*     Osni Marques, LBNL/NERSC, USA
+*     Ken Stanley, Computer Science Division, University of
+*       California at Berkeley, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E0, ONE = 1.0E0 )
+      COMPLEX            CZERO
+      PARAMETER          ( CZERO = ( 0.0E0, 0.0E0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            ALLEIG, INDEIG, LQUERY, VALEIG, WANTZ
+      INTEGER            I, IBEGIN, IEND, IINDBL, IINDWK, IINFO, IINSPL, 
+     $                   INDGRS, INDWOF, INDWRK, ITMP, J, JJ, LIWMIN, 
+     $                   LWMIN, NSPLIT
+      REAL               BIGNUM, EPS, RMAX, RMIN, SAFMIN, SCALE, SMLNUM,
+     $                   THRESH, TMP, TNRM, TOL
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               SLAMCH, SLANST
+      EXTERNAL           LSAME, SLAMCH, SLANST
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CLARRV, CLASET, CSWAP, SLARRE, SSCAL, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN, REAL, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      WANTZ = LSAME( JOBZ, 'V' )
+      ALLEIG = LSAME( RANGE, 'A' )
+      VALEIG = LSAME( RANGE, 'V' )
+      INDEIG = LSAME( RANGE, 'I' )
+*
+      LQUERY = ( ( LWORK.EQ.-1 ) .OR. ( LIWORK.EQ.-1 ) )
+      LWMIN = 18*N
+      LIWMIN = 10*N
+*
+      INFO = 0
+      IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN
+         INFO = -2
+*
+*     The following two lines need to be removed once the 
+*     RANGE = 'V' and RANGE = 'I' options are provided.
+*
+      ELSE IF( VALEIG .OR. INDEIG ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( VALEIG .AND. N.GT.0 .AND. VU.LE.VL ) THEN
+         INFO = -7
+      ELSE IF( INDEIG .AND. IL.LT.1 ) THEN
+         INFO = -8
+*     The following change should be made in DSTEVX also, otherwise
+*     IL can be specified as N+1 and IU as N.
+*     ELSE IF( INDEIG .AND. ( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) ) THEN
+      ELSE IF( INDEIG .AND. ( IU.LT.IL .OR. IU.GT.N ) ) THEN
+         INFO = -9
+      ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN
+         INFO = -14
+      ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -17
+      ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -19
+      END IF
+      IF( INFO.EQ.0 ) THEN
+         WORK( 1 ) = LWMIN
+         IWORK( 1 ) = LIWMIN
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CSTEGR', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      M = 0
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      IF( N.EQ.1 ) THEN
+         IF( ALLEIG .OR. INDEIG ) THEN
+            M = 1
+            W( 1 ) = D( 1 )
+         ELSE
+            IF( VL.LT.D( 1 ) .AND. VU.GE.D( 1 ) ) THEN
+               M = 1
+               W( 1 ) = D( 1 )
+            END IF
+         END IF
+         IF( WANTZ )
+     $      Z( 1, 1 ) = ONE
+         RETURN
+      END IF
+*
+*     Get machine constants.
+*
+      SAFMIN = SLAMCH( 'Safe minimum' )
+      EPS = SLAMCH( 'Precision' )
+      SMLNUM = SAFMIN / EPS
+      BIGNUM = ONE / SMLNUM
+      RMIN = SQRT( SMLNUM )
+      RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) )
+*
+*     Scale matrix to allowable range, if necessary.
+*
+      SCALE = ONE
+      TNRM = SLANST( 'M', N, D, E )
+      IF( TNRM.GT.ZERO .AND. TNRM.LT.RMIN ) THEN
+         SCALE = RMIN / TNRM
+      ELSE IF( TNRM.GT.RMAX ) THEN
+         SCALE = RMAX / TNRM
+      END IF
+      IF( SCALE.NE.ONE ) THEN
+         CALL SSCAL( N, SCALE, D, 1 )
+         CALL SSCAL( N-1, SCALE, E, 1 )
+         TNRM = TNRM*SCALE
+      END IF
+      INDGRS = 1
+      INDWOF = 2*N + 1
+      INDWRK = 3*N + 1
+*
+      IINSPL = 1
+      IINDBL = N + 1
+      IINDWK = 2*N + 1
+*
+      CALL CLASET( 'Full', N, N, CZERO, CZERO, Z, LDZ )
+*
+*     Compute the desired eigenvalues of the tridiagonal after splitting
+*     into smaller subblocks if the corresponding of-diagonal elements
+*     are small
+*
+      THRESH = EPS*TNRM
+      CALL SLARRE( N, D, E, THRESH, NSPLIT, IWORK( IINSPL ), M, W,
+     $             WORK( INDWOF ), WORK( INDGRS ), WORK( INDWRK ),
+     $             IINFO )
+      IF( IINFO.NE.0 ) THEN
+         INFO = 1
+         RETURN
+      END IF
+*
+      IF( WANTZ ) THEN
+*
+*        Compute the desired eigenvectors corresponding to the computed
+*        eigenvalues
+*
+         TOL = MAX( ABSTOL, REAL( N )*THRESH )
+         IBEGIN = 1
+         DO 20 I = 1, NSPLIT
+            IEND = IWORK( IINSPL+I-1 )
+            DO 10 J = IBEGIN, IEND
+               IWORK( IINDBL+J-1 ) = I
+   10       CONTINUE
+            IBEGIN = IEND + 1
+   20    CONTINUE
+*
+         CALL CLARRV( N, D, E, IWORK( IINSPL ), M, W, IWORK( IINDBL ),
+     $                WORK( INDGRS ), TOL, Z, LDZ, ISUPPZ,
+     $                WORK( INDWRK ), IWORK( IINDWK ), IINFO )
+         IF( IINFO.NE.0 ) THEN
+            INFO = 2
+            RETURN
+         END IF
+*
+      END IF
+*
+      IBEGIN = 1
+      DO 40 I = 1, NSPLIT
+         IEND = IWORK( IINSPL+I-1 )
+         DO 30 J = IBEGIN, IEND
+            W( J ) = W( J ) + WORK( INDWOF+I-1 )
+   30    CONTINUE
+         IBEGIN = IEND + 1
+   40 CONTINUE
+*
+*     If matrix was scaled, then rescale eigenvalues appropriately.
+*
+      IF( SCALE.NE.ONE ) THEN
+         CALL SSCAL( M, ONE / SCALE, W, 1 )
+      END IF
+*
+*     If eigenvalues are not in order, then sort them, along with
+*     eigenvectors.
+*
+      IF( NSPLIT.GT.1 ) THEN
+         DO 60 J = 1, M - 1
+            I = 0
+            TMP = W( J )
+            DO 50 JJ = J + 1, M
+               IF( W( JJ ).LT.TMP ) THEN
+                  I = JJ
+                  TMP = W( JJ )
+               END IF
+   50       CONTINUE
+            IF( I.NE.0 ) THEN
+               W( I ) = W( J )
+               W( J ) = TMP
+               IF( WANTZ ) THEN
+                  CALL CSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 )
+                  ITMP = ISUPPZ( 2*I-1 )
+                  ISUPPZ( 2*I-1 ) = ISUPPZ( 2*J-1 )
+                  ISUPPZ( 2*J-1 ) = ITMP
+                  ITMP = ISUPPZ( 2*I )
+                  ISUPPZ( 2*I ) = ISUPPZ( 2*J )
+                  ISUPPZ( 2*J ) = ITMP
+               END IF
+            END IF
+   60    CONTINUE
+      END IF
+*
+      WORK( 1 ) = LWMIN
+      IWORK( 1 ) = LIWMIN
+      RETURN
+*
+*     End of CSTEGR
+*
+      END
diff --git a/costa/native/external/lapack/cstein.f b/costa/native/external/lapack/cstein.f
new file mode 100644
index 000000000..39bdc4f86
--- /dev/null
+++ b/costa/native/external/lapack/cstein.f
@@ -0,0 +1,377 @@
+      SUBROUTINE CSTEIN( N, D, E, M, W, IBLOCK, ISPLIT, Z, LDZ, WORK,
+     $                   IWORK, IFAIL, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDZ, M, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IBLOCK( * ), IFAIL( * ), ISPLIT( * ),
+     $                   IWORK( * )
+      REAL               D( * ), E( * ), W( * ), WORK( * )
+      COMPLEX            Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CSTEIN computes the eigenvectors of a real symmetric tridiagonal
+*  matrix T corresponding to specified eigenvalues, using inverse
+*  iteration.
+*
+*  The maximum number of iterations allowed for each eigenvector is
+*  specified by an internal parameter MAXITS (currently set to 5).
+*
+*  Although the eigenvectors are real, they are stored in a complex
+*  array, which may be passed to CUNMTR or CUPMTR for back
+*  transformation to the eigenvectors of a complex Hermitian matrix
+*  which was reduced to tridiagonal form.
+*
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The order of the matrix.  N >= 0.
+*
+*  D       (input) REAL array, dimension (N)
+*          The n diagonal elements of the tridiagonal matrix T.
+*
+*  E       (input) REAL array, dimension (N)
+*          The (n-1) subdiagonal elements of the tridiagonal matrix
+*          T, stored in elements 1 to N-1; E(N) need not be set.
+*
+*  M       (input) INTEGER
+*          The number of eigenvectors to be found.  0 <= M <= N.
+*
+*  W       (input) REAL array, dimension (N)
+*          The first M elements of W contain the eigenvalues for
+*          which eigenvectors are to be computed.  The eigenvalues
+*          should be grouped by split-off block and ordered from
+*          smallest to largest within the block.  ( The output array
+*          W from SSTEBZ with ORDER = 'B' is expected here. )
+*
+*  IBLOCK  (input) INTEGER array, dimension (N)
+*          The submatrix indices associated with the corresponding
+*          eigenvalues in W; IBLOCK(i)=1 if eigenvalue W(i) belongs to
+*          the first submatrix from the top, =2 if W(i) belongs to
+*          the second submatrix, etc.  ( The output array IBLOCK
+*          from SSTEBZ is expected here. )
+*
+*  ISPLIT  (input) INTEGER array, dimension (N)
+*          The splitting points, at which T breaks up into submatrices.
+*          The first submatrix consists of rows/columns 1 to
+*          ISPLIT( 1 ), the second of rows/columns ISPLIT( 1 )+1
+*          through ISPLIT( 2 ), etc.
+*          ( The output array ISPLIT from SSTEBZ is expected here. )
+*
+*  Z       (output) COMPLEX array, dimension (LDZ, M)
+*          The computed eigenvectors.  The eigenvector associated
+*          with the eigenvalue W(i) is stored in the i-th column of
+*          Z.  Any vector which fails to converge is set to its current
+*          iterate after MAXITS iterations.
+*          The imaginary parts of the eigenvectors are set to zero.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= max(1,N).
+*
+*  WORK    (workspace) REAL array, dimension (5*N)
+*
+*  IWORK   (workspace) INTEGER array, dimension (N)
+*
+*  IFAIL   (output) INTEGER array, dimension (M)
+*          On normal exit, all elements of IFAIL are zero.
+*          If one or more eigenvectors fail to converge after
+*          MAXITS iterations, then their indices are stored in
+*          array IFAIL.
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*          > 0: if INFO = i, then i eigenvectors failed to converge
+*               in MAXITS iterations.  Their indices are stored in
+*               array IFAIL.
+*
+*  Internal Parameters
+*  ===================
+*
+*  MAXITS  INTEGER, default = 5
+*          The maximum number of iterations performed.
+*
+*  EXTRA   INTEGER, default = 2
+*          The number of iterations performed after norm growth
+*          criterion is satisfied, should be at least 1.
+*
+* =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX            CZERO, CONE
+      PARAMETER          ( CZERO = ( 0.0E+0, 0.0E+0 ),
+     $                   CONE = ( 1.0E+0, 0.0E+0 ) )
+      REAL               ZERO, ONE, TEN, ODM3, ODM1
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0, TEN = 1.0E+1,
+     $                   ODM3 = 1.0E-3, ODM1 = 1.0E-1 )
+      INTEGER            MAXITS, EXTRA
+      PARAMETER          ( MAXITS = 5, EXTRA = 2 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            B1, BLKSIZ, BN, GPIND, I, IINFO, INDRV1,
+     $                   INDRV2, INDRV3, INDRV4, INDRV5, ITS, J, J1,
+     $                   JBLK, JMAX, JR, NBLK, NRMCHK
+      REAL               CTR, EPS, EPS1, NRM, ONENRM, ORTOL, PERTOL,
+     $                   SCL, SEP, STPCRT, TOL, XJ, XJM
+*     ..
+*     .. Local Arrays ..
+      INTEGER            ISEED( 4 )
+*     ..
+*     .. External Functions ..
+      INTEGER            ISAMAX
+      REAL               SASUM, SLAMCH, SNRM2
+      EXTERNAL           ISAMAX, SASUM, SLAMCH, SNRM2
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SCOPY, SLAGTF, SLAGTS, SLARNV, SSCAL, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, CMPLX, MAX, REAL, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      DO 10 I = 1, M
+         IFAIL( I ) = 0
+   10 CONTINUE
+*
+      IF( N.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( M.LT.0 .OR. M.GT.N ) THEN
+         INFO = -4
+      ELSE IF( LDZ.LT.MAX( 1, N ) ) THEN
+         INFO = -9
+      ELSE
+         DO 20 J = 2, M
+            IF( IBLOCK( J ).LT.IBLOCK( J-1 ) ) THEN
+               INFO = -6
+               GO TO 30
+            END IF
+            IF( IBLOCK( J ).EQ.IBLOCK( J-1 ) .AND. W( J ).LT.W( J-1 ) )
+     $           THEN
+               INFO = -5
+               GO TO 30
+            END IF
+   20    CONTINUE
+   30    CONTINUE
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CSTEIN', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. M.EQ.0 ) THEN
+         RETURN
+      ELSE IF( N.EQ.1 ) THEN
+         Z( 1, 1 ) = CONE
+         RETURN
+      END IF
+*
+*     Get machine constants.
+*
+      EPS = SLAMCH( 'Precision' )
+*
+*     Initialize seed for random number generator SLARNV.
+*
+      DO 40 I = 1, 4
+         ISEED( I ) = 1
+   40 CONTINUE
+*
+*     Initialize pointers.
+*
+      INDRV1 = 0
+      INDRV2 = INDRV1 + N
+      INDRV3 = INDRV2 + N
+      INDRV4 = INDRV3 + N
+      INDRV5 = INDRV4 + N
+*
+*     Compute eigenvectors of matrix blocks.
+*
+      J1 = 1
+      DO 180 NBLK = 1, IBLOCK( M )
+*
+*        Find starting and ending indices of block nblk.
+*
+         IF( NBLK.EQ.1 ) THEN
+            B1 = 1
+         ELSE
+            B1 = ISPLIT( NBLK-1 ) + 1
+         END IF
+         BN = ISPLIT( NBLK )
+         BLKSIZ = BN - B1 + 1
+         IF( BLKSIZ.EQ.1 )
+     $      GO TO 60
+         GPIND = B1
+*
+*        Compute reorthogonalization criterion and stopping criterion.
+*
+         ONENRM = ABS( D( B1 ) ) + ABS( E( B1 ) )
+         ONENRM = MAX( ONENRM, ABS( D( BN ) )+ABS( E( BN-1 ) ) )
+         DO 50 I = B1 + 1, BN - 1
+            ONENRM = MAX( ONENRM, ABS( D( I ) )+ABS( E( I-1 ) )+
+     $               ABS( E( I ) ) )
+   50    CONTINUE
+         ORTOL = ODM3*ONENRM
+*
+         STPCRT = SQRT( ODM1 / BLKSIZ )
+*
+*        Loop through eigenvalues of block nblk.
+*
+   60    CONTINUE
+         JBLK = 0
+         DO 170 J = J1, M
+            IF( IBLOCK( J ).NE.NBLK ) THEN
+               J1 = J
+               GO TO 180
+            END IF
+            JBLK = JBLK + 1
+            XJ = W( J )
+*
+*           Skip all the work if the block size is one.
+*
+            IF( BLKSIZ.EQ.1 ) THEN
+               WORK( INDRV1+1 ) = ONE
+               GO TO 140
+            END IF
+*
+*           If eigenvalues j and j-1 are too close, add a relatively
+*           small perturbation.
+*
+            IF( JBLK.GT.1 ) THEN
+               EPS1 = ABS( EPS*XJ )
+               PERTOL = TEN*EPS1
+               SEP = XJ - XJM
+               IF( SEP.LT.PERTOL )
+     $            XJ = XJM + PERTOL
+            END IF
+*
+            ITS = 0
+            NRMCHK = 0
+*
+*           Get random starting vector.
+*
+            CALL SLARNV( 2, ISEED, BLKSIZ, WORK( INDRV1+1 ) )
+*
+*           Copy the matrix T so it won't be destroyed in factorization.
+*
+            CALL SCOPY( BLKSIZ, D( B1 ), 1, WORK( INDRV4+1 ), 1 )
+            CALL SCOPY( BLKSIZ-1, E( B1 ), 1, WORK( INDRV2+2 ), 1 )
+            CALL SCOPY( BLKSIZ-1, E( B1 ), 1, WORK( INDRV3+1 ), 1 )
+*
+*           Compute LU factors with partial pivoting  ( PT = LU )
+*
+            TOL = ZERO
+            CALL SLAGTF( BLKSIZ, WORK( INDRV4+1 ), XJ, WORK( INDRV2+2 ),
+     $                   WORK( INDRV3+1 ), TOL, WORK( INDRV5+1 ), IWORK,
+     $                   IINFO )
+*
+*           Update iteration count.
+*
+   70       CONTINUE
+            ITS = ITS + 1
+            IF( ITS.GT.MAXITS )
+     $         GO TO 120
+*
+*           Normalize and scale the righthand side vector Pb.
+*
+            SCL = BLKSIZ*ONENRM*MAX( EPS,
+     $            ABS( WORK( INDRV4+BLKSIZ ) ) ) /
+     $            SASUM( BLKSIZ, WORK( INDRV1+1 ), 1 )
+            CALL SSCAL( BLKSIZ, SCL, WORK( INDRV1+1 ), 1 )
+*
+*           Solve the system LU = Pb.
+*
+            CALL SLAGTS( -1, BLKSIZ, WORK( INDRV4+1 ), WORK( INDRV2+2 ),
+     $                   WORK( INDRV3+1 ), WORK( INDRV5+1 ), IWORK,
+     $                   WORK( INDRV1+1 ), TOL, IINFO )
+*
+*           Reorthogonalize by modified Gram-Schmidt if eigenvalues are
+*           close enough.
+*
+            IF( JBLK.EQ.1 )
+     $         GO TO 110
+            IF( ABS( XJ-XJM ).GT.ORTOL )
+     $         GPIND = J
+            IF( GPIND.NE.J ) THEN
+               DO 100 I = GPIND, J - 1
+                  CTR = ZERO
+                  DO 80 JR = 1, BLKSIZ
+                     CTR = CTR + WORK( INDRV1+JR )*
+     $                     REAL( Z( B1-1+JR, I ) )
+   80             CONTINUE
+                  DO 90 JR = 1, BLKSIZ
+                     WORK( INDRV1+JR ) = WORK( INDRV1+JR ) -
+     $                                   CTR*REAL( Z( B1-1+JR, I ) )
+   90             CONTINUE
+  100          CONTINUE
+            END IF
+*
+*           Check the infinity norm of the iterate.
+*
+  110       CONTINUE
+            JMAX = ISAMAX( BLKSIZ, WORK( INDRV1+1 ), 1 )
+            NRM = ABS( WORK( INDRV1+JMAX ) )
+*
+*           Continue for additional iterations after norm reaches
+*           stopping criterion.
+*
+            IF( NRM.LT.STPCRT )
+     $         GO TO 70
+            NRMCHK = NRMCHK + 1
+            IF( NRMCHK.LT.EXTRA+1 )
+     $         GO TO 70
+*
+            GO TO 130
+*
+*           If stopping criterion was not satisfied, update info and
+*           store eigenvector number in array ifail.
+*
+  120       CONTINUE
+            INFO = INFO + 1
+            IFAIL( INFO ) = J
+*
+*           Accept iterate as jth eigenvector.
+*
+  130       CONTINUE
+            SCL = ONE / SNRM2( BLKSIZ, WORK( INDRV1+1 ), 1 )
+            JMAX = ISAMAX( BLKSIZ, WORK( INDRV1+1 ), 1 )
+            IF( WORK( INDRV1+JMAX ).LT.ZERO )
+     $         SCL = -SCL
+            CALL SSCAL( BLKSIZ, SCL, WORK( INDRV1+1 ), 1 )
+  140       CONTINUE
+            DO 150 I = 1, N
+               Z( I, J ) = CZERO
+  150       CONTINUE
+            DO 160 I = 1, BLKSIZ
+               Z( B1+I-1, J ) = CMPLX( WORK( INDRV1+I ), ZERO )
+  160       CONTINUE
+*
+*           Save the shift to check eigenvalue spacing at next
+*           iteration.
+*
+            XJM = XJ
+*
+  170    CONTINUE
+  180 CONTINUE
+*
+      RETURN
+*
+*     End of CSTEIN
+*
+      END
diff --git a/costa/native/external/lapack/csteqr.f b/costa/native/external/lapack/csteqr.f
new file mode 100644
index 000000000..337e919ce
--- /dev/null
+++ b/costa/native/external/lapack/csteqr.f
@@ -0,0 +1,504 @@
+      SUBROUTINE CSTEQR( COMPZ, N, D, E, Z, LDZ, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          COMPZ
+      INTEGER            INFO, LDZ, N
+*     ..
+*     .. Array Arguments ..
+      REAL               D( * ), E( * ), WORK( * )
+      COMPLEX            Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CSTEQR computes all eigenvalues and, optionally, eigenvectors of a
+*  symmetric tridiagonal matrix using the implicit QL or QR method.
+*  The eigenvectors of a full or band complex Hermitian matrix can also
+*  be found if CHETRD or CHPTRD or CHBTRD has been used to reduce this
+*  matrix to tridiagonal form.
+*
+*  Arguments
+*  =========
+*
+*  COMPZ   (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only.
+*          = 'V':  Compute eigenvalues and eigenvectors of the original
+*                  Hermitian matrix.  On entry, Z must contain the
+*                  unitary matrix used to reduce the original matrix
+*                  to tridiagonal form.
+*          = 'I':  Compute eigenvalues and eigenvectors of the
+*                  tridiagonal matrix.  Z is initialized to the identity
+*                  matrix.
+*
+*  N       (input) INTEGER
+*          The order of the matrix.  N >= 0.
+*
+*  D       (input/output) REAL array, dimension (N)
+*          On entry, the diagonal elements of the tridiagonal matrix.
+*          On exit, if INFO = 0, the eigenvalues in ascending order.
+*
+*  E       (input/output) REAL array, dimension (N-1)
+*          On entry, the (n-1) subdiagonal elements of the tridiagonal
+*          matrix.
+*          On exit, E has been destroyed.
+*
+*  Z       (input/output) COMPLEX array, dimension (LDZ, N)
+*          On entry, if  COMPZ = 'V', then Z contains the unitary
+*          matrix used in the reduction to tridiagonal form.
+*          On exit, if INFO = 0, then if COMPZ = 'V', Z contains the
+*          orthonormal eigenvectors of the original Hermitian matrix,
+*          and if COMPZ = 'I', Z contains the orthonormal eigenvectors
+*          of the symmetric tridiagonal matrix.
+*          If COMPZ = 'N', then Z is not referenced.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= 1, and if
+*          eigenvectors are desired, then  LDZ >= max(1,N).
+*
+*  WORK    (workspace) REAL array, dimension (max(1,2*N-2))
+*          If COMPZ = 'N', then WORK is not referenced.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  the algorithm has failed to find all the eigenvalues in
+*                a total of 30*N iterations; if INFO = i, then i
+*                elements of E have not converged to zero; on exit, D
+*                and E contain the elements of a symmetric tridiagonal
+*                matrix which is unitarily similar to the original
+*                matrix.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE, TWO, THREE
+      PARAMETER          ( ZERO = 0.0E0, ONE = 1.0E0, TWO = 2.0E0,
+     $                   THREE = 3.0E0 )
+      COMPLEX            CZERO, CONE
+      PARAMETER          ( CZERO = ( 0.0E0, 0.0E0 ),
+     $                   CONE = ( 1.0E0, 0.0E0 ) )
+      INTEGER            MAXIT
+      PARAMETER          ( MAXIT = 30 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, ICOMPZ, II, ISCALE, J, JTOT, K, L, L1, LEND,
+     $                   LENDM1, LENDP1, LENDSV, LM1, LSV, M, MM, MM1,
+     $                   NM1, NMAXIT
+      REAL               ANORM, B, C, EPS, EPS2, F, G, P, R, RT1, RT2,
+     $                   S, SAFMAX, SAFMIN, SSFMAX, SSFMIN, TST
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               SLAMCH, SLANST, SLAPY2
+      EXTERNAL           LSAME, SLAMCH, SLANST, SLAPY2
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CLASET, CLASR, CSWAP, SLAE2, SLAEV2, SLARTG,
+     $                   SLASCL, SLASRT, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, SIGN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+*
+      IF( LSAME( COMPZ, 'N' ) ) THEN
+         ICOMPZ = 0
+      ELSE IF( LSAME( COMPZ, 'V' ) ) THEN
+         ICOMPZ = 1
+      ELSE IF( LSAME( COMPZ, 'I' ) ) THEN
+         ICOMPZ = 2
+      ELSE
+         ICOMPZ = -1
+      END IF
+      IF( ICOMPZ.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( ( LDZ.LT.1 ) .OR. ( ICOMPZ.GT.0 .AND. LDZ.LT.MAX( 1,
+     $         N ) ) ) THEN
+         INFO = -6
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CSTEQR', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      IF( N.EQ.1 ) THEN
+         IF( ICOMPZ.EQ.2 )
+     $      Z( 1, 1 ) = CONE
+         RETURN
+      END IF
+*
+*     Determine the unit roundoff and over/underflow thresholds.
+*
+      EPS = SLAMCH( 'E' )
+      EPS2 = EPS**2
+      SAFMIN = SLAMCH( 'S' )
+      SAFMAX = ONE / SAFMIN
+      SSFMAX = SQRT( SAFMAX ) / THREE
+      SSFMIN = SQRT( SAFMIN ) / EPS2
+*
+*     Compute the eigenvalues and eigenvectors of the tridiagonal
+*     matrix.
+*
+      IF( ICOMPZ.EQ.2 )
+     $   CALL CLASET( 'Full', N, N, CZERO, CONE, Z, LDZ )
+*
+      NMAXIT = N*MAXIT
+      JTOT = 0
+*
+*     Determine where the matrix splits and choose QL or QR iteration
+*     for each block, according to whether top or bottom diagonal
+*     element is smaller.
+*
+      L1 = 1
+      NM1 = N - 1
+*
+   10 CONTINUE
+      IF( L1.GT.N )
+     $   GO TO 160
+      IF( L1.GT.1 )
+     $   E( L1-1 ) = ZERO
+      IF( L1.LE.NM1 ) THEN
+         DO 20 M = L1, NM1
+            TST = ABS( E( M ) )
+            IF( TST.EQ.ZERO )
+     $         GO TO 30
+            IF( TST.LE.( SQRT( ABS( D( M ) ) )*SQRT( ABS( D( M+
+     $          1 ) ) ) )*EPS ) THEN
+               E( M ) = ZERO
+               GO TO 30
+            END IF
+   20    CONTINUE
+      END IF
+      M = N
+*
+   30 CONTINUE
+      L = L1
+      LSV = L
+      LEND = M
+      LENDSV = LEND
+      L1 = M + 1
+      IF( LEND.EQ.L )
+     $   GO TO 10
+*
+*     Scale submatrix in rows and columns L to LEND
+*
+      ANORM = SLANST( 'I', LEND-L+1, D( L ), E( L ) )
+      ISCALE = 0
+      IF( ANORM.EQ.ZERO )
+     $   GO TO 10
+      IF( ANORM.GT.SSFMAX ) THEN
+         ISCALE = 1
+         CALL SLASCL( 'G', 0, 0, ANORM, SSFMAX, LEND-L+1, 1, D( L ), N,
+     $                INFO )
+         CALL SLASCL( 'G', 0, 0, ANORM, SSFMAX, LEND-L, 1, E( L ), N,
+     $                INFO )
+      ELSE IF( ANORM.LT.SSFMIN ) THEN
+         ISCALE = 2
+         CALL SLASCL( 'G', 0, 0, ANORM, SSFMIN, LEND-L+1, 1, D( L ), N,
+     $                INFO )
+         CALL SLASCL( 'G', 0, 0, ANORM, SSFMIN, LEND-L, 1, E( L ), N,
+     $                INFO )
+      END IF
+*
+*     Choose between QL and QR iteration
+*
+      IF( ABS( D( LEND ) ).LT.ABS( D( L ) ) ) THEN
+         LEND = LSV
+         L = LENDSV
+      END IF
+*
+      IF( LEND.GT.L ) THEN
+*
+*        QL Iteration
+*
+*        Look for small subdiagonal element.
+*
+   40    CONTINUE
+         IF( L.NE.LEND ) THEN
+            LENDM1 = LEND - 1
+            DO 50 M = L, LENDM1
+               TST = ABS( E( M ) )**2
+               IF( TST.LE.( EPS2*ABS( D( M ) ) )*ABS( D( M+1 ) )+
+     $             SAFMIN )GO TO 60
+   50       CONTINUE
+         END IF
+*
+         M = LEND
+*
+   60    CONTINUE
+         IF( M.LT.LEND )
+     $      E( M ) = ZERO
+         P = D( L )
+         IF( M.EQ.L )
+     $      GO TO 80
+*
+*        If remaining matrix is 2-by-2, use SLAE2 or SLAEV2
+*        to compute its eigensystem.
+*
+         IF( M.EQ.L+1 ) THEN
+            IF( ICOMPZ.GT.0 ) THEN
+               CALL SLAEV2( D( L ), E( L ), D( L+1 ), RT1, RT2, C, S )
+               WORK( L ) = C
+               WORK( N-1+L ) = S
+               CALL CLASR( 'R', 'V', 'B', N, 2, WORK( L ),
+     $                     WORK( N-1+L ), Z( 1, L ), LDZ )
+            ELSE
+               CALL SLAE2( D( L ), E( L ), D( L+1 ), RT1, RT2 )
+            END IF
+            D( L ) = RT1
+            D( L+1 ) = RT2
+            E( L ) = ZERO
+            L = L + 2
+            IF( L.LE.LEND )
+     $         GO TO 40
+            GO TO 140
+         END IF
+*
+         IF( JTOT.EQ.NMAXIT )
+     $      GO TO 140
+         JTOT = JTOT + 1
+*
+*        Form shift.
+*
+         G = ( D( L+1 )-P ) / ( TWO*E( L ) )
+         R = SLAPY2( G, ONE )
+         G = D( M ) - P + ( E( L ) / ( G+SIGN( R, G ) ) )
+*
+         S = ONE
+         C = ONE
+         P = ZERO
+*
+*        Inner loop
+*
+         MM1 = M - 1
+         DO 70 I = MM1, L, -1
+            F = S*E( I )
+            B = C*E( I )
+            CALL SLARTG( G, F, C, S, R )
+            IF( I.NE.M-1 )
+     $         E( I+1 ) = R
+            G = D( I+1 ) - P
+            R = ( D( I )-G )*S + TWO*C*B
+            P = S*R
+            D( I+1 ) = G + P
+            G = C*R - B
+*
+*           If eigenvectors are desired, then save rotations.
+*
+            IF( ICOMPZ.GT.0 ) THEN
+               WORK( I ) = C
+               WORK( N-1+I ) = -S
+            END IF
+*
+   70    CONTINUE
+*
+*        If eigenvectors are desired, then apply saved rotations.
+*
+         IF( ICOMPZ.GT.0 ) THEN
+            MM = M - L + 1
+            CALL CLASR( 'R', 'V', 'B', N, MM, WORK( L ), WORK( N-1+L ),
+     $                  Z( 1, L ), LDZ )
+         END IF
+*
+         D( L ) = D( L ) - P
+         E( L ) = G
+         GO TO 40
+*
+*        Eigenvalue found.
+*
+   80    CONTINUE
+         D( L ) = P
+*
+         L = L + 1
+         IF( L.LE.LEND )
+     $      GO TO 40
+         GO TO 140
+*
+      ELSE
+*
+*        QR Iteration
+*
+*        Look for small superdiagonal element.
+*
+   90    CONTINUE
+         IF( L.NE.LEND ) THEN
+            LENDP1 = LEND + 1
+            DO 100 M = L, LENDP1, -1
+               TST = ABS( E( M-1 ) )**2
+               IF( TST.LE.( EPS2*ABS( D( M ) ) )*ABS( D( M-1 ) )+
+     $             SAFMIN )GO TO 110
+  100       CONTINUE
+         END IF
+*
+         M = LEND
+*
+  110    CONTINUE
+         IF( M.GT.LEND )
+     $      E( M-1 ) = ZERO
+         P = D( L )
+         IF( M.EQ.L )
+     $      GO TO 130
+*
+*        If remaining matrix is 2-by-2, use SLAE2 or SLAEV2
+*        to compute its eigensystem.
+*
+         IF( M.EQ.L-1 ) THEN
+            IF( ICOMPZ.GT.0 ) THEN
+               CALL SLAEV2( D( L-1 ), E( L-1 ), D( L ), RT1, RT2, C, S )
+               WORK( M ) = C
+               WORK( N-1+M ) = S
+               CALL CLASR( 'R', 'V', 'F', N, 2, WORK( M ),
+     $                     WORK( N-1+M ), Z( 1, L-1 ), LDZ )
+            ELSE
+               CALL SLAE2( D( L-1 ), E( L-1 ), D( L ), RT1, RT2 )
+            END IF
+            D( L-1 ) = RT1
+            D( L ) = RT2
+            E( L-1 ) = ZERO
+            L = L - 2
+            IF( L.GE.LEND )
+     $         GO TO 90
+            GO TO 140
+         END IF
+*
+         IF( JTOT.EQ.NMAXIT )
+     $      GO TO 140
+         JTOT = JTOT + 1
+*
+*        Form shift.
+*
+         G = ( D( L-1 )-P ) / ( TWO*E( L-1 ) )
+         R = SLAPY2( G, ONE )
+         G = D( M ) - P + ( E( L-1 ) / ( G+SIGN( R, G ) ) )
+*
+         S = ONE
+         C = ONE
+         P = ZERO
+*
+*        Inner loop
+*
+         LM1 = L - 1
+         DO 120 I = M, LM1
+            F = S*E( I )
+            B = C*E( I )
+            CALL SLARTG( G, F, C, S, R )
+            IF( I.NE.M )
+     $         E( I-1 ) = R
+            G = D( I ) - P
+            R = ( D( I+1 )-G )*S + TWO*C*B
+            P = S*R
+            D( I ) = G + P
+            G = C*R - B
+*
+*           If eigenvectors are desired, then save rotations.
+*
+            IF( ICOMPZ.GT.0 ) THEN
+               WORK( I ) = C
+               WORK( N-1+I ) = S
+            END IF
+*
+  120    CONTINUE
+*
+*        If eigenvectors are desired, then apply saved rotations.
+*
+         IF( ICOMPZ.GT.0 ) THEN
+            MM = L - M + 1
+            CALL CLASR( 'R', 'V', 'F', N, MM, WORK( M ), WORK( N-1+M ),
+     $                  Z( 1, M ), LDZ )
+         END IF
+*
+         D( L ) = D( L ) - P
+         E( LM1 ) = G
+         GO TO 90
+*
+*        Eigenvalue found.
+*
+  130    CONTINUE
+         D( L ) = P
+*
+         L = L - 1
+         IF( L.GE.LEND )
+     $      GO TO 90
+         GO TO 140
+*
+      END IF
+*
+*     Undo scaling if necessary
+*
+  140 CONTINUE
+      IF( ISCALE.EQ.1 ) THEN
+         CALL SLASCL( 'G', 0, 0, SSFMAX, ANORM, LENDSV-LSV+1, 1,
+     $                D( LSV ), N, INFO )
+         CALL SLASCL( 'G', 0, 0, SSFMAX, ANORM, LENDSV-LSV, 1, E( LSV ),
+     $                N, INFO )
+      ELSE IF( ISCALE.EQ.2 ) THEN
+         CALL SLASCL( 'G', 0, 0, SSFMIN, ANORM, LENDSV-LSV+1, 1,
+     $                D( LSV ), N, INFO )
+         CALL SLASCL( 'G', 0, 0, SSFMIN, ANORM, LENDSV-LSV, 1, E( LSV ),
+     $                N, INFO )
+      END IF
+*
+*     Check for no convergence to an eigenvalue after a total
+*     of N*MAXIT iterations.
+*
+      IF( JTOT.EQ.NMAXIT ) THEN
+         DO 150 I = 1, N - 1
+            IF( E( I ).NE.ZERO )
+     $         INFO = INFO + 1
+  150    CONTINUE
+         RETURN
+      END IF
+      GO TO 10
+*
+*     Order eigenvalues and eigenvectors.
+*
+  160 CONTINUE
+      IF( ICOMPZ.EQ.0 ) THEN
+*
+*        Use Quick Sort
+*
+         CALL SLASRT( 'I', N, D, INFO )
+*
+      ELSE
+*
+*        Use Selection Sort to minimize swaps of eigenvectors
+*
+         DO 180 II = 2, N
+            I = II - 1
+            K = I
+            P = D( I )
+            DO 170 J = II, N
+               IF( D( J ).LT.P ) THEN
+                  K = J
+                  P = D( J )
+               END IF
+  170       CONTINUE
+            IF( K.NE.I ) THEN
+               D( K ) = D( I )
+               D( I ) = P
+               CALL CSWAP( N, Z( 1, I ), 1, Z( 1, K ), 1 )
+            END IF
+  180    CONTINUE
+      END IF
+      RETURN
+*
+*     End of CSTEQR
+*
+      END
diff --git a/costa/native/external/lapack/csycon.f b/costa/native/external/lapack/csycon.f
new file mode 100644
index 000000000..252ad585f
--- /dev/null
+++ b/costa/native/external/lapack/csycon.f
@@ -0,0 +1,159 @@
+      SUBROUTINE CSYCON( UPLO, N, A, LDA, IPIV, ANORM, RCOND, WORK,
+     $                   INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993 
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, N
+      REAL               ANORM, RCOND
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      COMPLEX            A( LDA, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CSYCON estimates the reciprocal of the condition number (in the
+*  1-norm) of a complex symmetric matrix A using the factorization
+*  A = U*D*U**T or A = L*D*L**T computed by CSYTRF.
+*
+*  An estimate is obtained for norm(inv(A)), and the reciprocal of the
+*  condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))).
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the details of the factorization are stored
+*          as an upper or lower triangular matrix.
+*          = 'U':  Upper triangular, form is A = U*D*U**T;
+*          = 'L':  Lower triangular, form is A = L*D*L**T.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input) COMPLEX array, dimension (LDA,N)
+*          The block diagonal matrix D and the multipliers used to
+*          obtain the factor U or L as computed by CSYTRF.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  IPIV    (input) INTEGER array, dimension (N)
+*          Details of the interchanges and the block structure of D
+*          as determined by CSYTRF.
+*
+*  ANORM   (input) REAL
+*          The 1-norm of the original matrix A.
+*
+*  RCOND   (output) REAL
+*          The reciprocal of the condition number of the matrix A,
+*          computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an
+*          estimate of the 1-norm of inv(A) computed in this routine.
+*
+*  WORK    (workspace) COMPLEX array, dimension (2*N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            I, KASE
+      REAL               AINVNM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CLACON, CSYTRS, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -4
+      ELSE IF( ANORM.LT.ZERO ) THEN
+         INFO = -6
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CSYCON', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      RCOND = ZERO
+      IF( N.EQ.0 ) THEN
+         RCOND = ONE
+         RETURN
+      ELSE IF( ANORM.LE.ZERO ) THEN
+         RETURN
+      END IF
+*
+*     Check that the diagonal matrix D is nonsingular.
+*
+      IF( UPPER ) THEN
+*
+*        Upper triangular storage: examine D from bottom to top
+*
+         DO 10 I = N, 1, -1
+            IF( IPIV( I ).GT.0 .AND. A( I, I ).EQ.ZERO )
+     $         RETURN
+   10    CONTINUE
+      ELSE
+*
+*        Lower triangular storage: examine D from top to bottom.
+*
+         DO 20 I = 1, N
+            IF( IPIV( I ).GT.0 .AND. A( I, I ).EQ.ZERO )
+     $         RETURN
+   20    CONTINUE
+      END IF
+*
+*     Estimate the 1-norm of the inverse.
+*
+      KASE = 0
+   30 CONTINUE
+      CALL CLACON( N, WORK( N+1 ), WORK, AINVNM, KASE )
+      IF( KASE.NE.0 ) THEN
+*
+*        Multiply by inv(L*D*L') or inv(U*D*U').
+*
+         CALL CSYTRS( UPLO, N, 1, A, LDA, IPIV, WORK, N, INFO )
+         GO TO 30
+      END IF
+*
+*     Compute the estimate of the reciprocal condition number.
+*
+      IF( AINVNM.NE.ZERO )
+     $   RCOND = ( ONE / AINVNM ) / ANORM
+*
+      RETURN
+*
+*     End of CSYCON
+*
+      END
diff --git a/costa/native/external/lapack/csymv.f b/costa/native/external/lapack/csymv.f
new file mode 100644
index 000000000..6ff7e760c
--- /dev/null
+++ b/costa/native/external/lapack/csymv.f
@@ -0,0 +1,265 @@
+      SUBROUTINE CSYMV( UPLO, N, ALPHA, A, LDA, X, INCX, BETA, Y, INCY )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INCX, INCY, LDA, N
+      COMPLEX            ALPHA, BETA
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), X( * ), Y( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CSYMV  performs the matrix-vector  operation
+*
+*     y := alpha*A*x + beta*y,
+*
+*  where alpha and beta are scalars, x and y are n element vectors and
+*  A is an n by n symmetric matrix.
+*
+*  Arguments
+*  ==========
+*
+*  UPLO   - CHARACTER*1
+*           On entry, UPLO specifies whether the upper or lower
+*           triangular part of the array A is to be referenced as
+*           follows:
+*
+*              UPLO = 'U' or 'u'   Only the upper triangular part of A
+*                                  is to be referenced.
+*
+*              UPLO = 'L' or 'l'   Only the lower triangular part of A
+*                                  is to be referenced.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - COMPLEX
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  A      - COMPLEX array, dimension ( LDA, N )
+*           Before entry, with  UPLO = 'U' or 'u', the leading n by n
+*           upper triangular part of the array A must contain the upper
+*           triangular part of the symmetric matrix and the strictly
+*           lower triangular part of A is not referenced.
+*           Before entry, with UPLO = 'L' or 'l', the leading n by n
+*           lower triangular part of the array A must contain the lower
+*           triangular part of the symmetric matrix and the strictly
+*           upper triangular part of A is not referenced.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           max( 1, N ).
+*           Unchanged on exit.
+*
+*  X      - COMPLEX array, dimension at least
+*           ( 1 + ( N - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the N-
+*           element vector x.
+*           Unchanged on exit.
+*
+*  INCX   - INTEGER
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*  BETA   - COMPLEX
+*           On entry, BETA specifies the scalar beta. When BETA is
+*           supplied as zero then Y need not be set on input.
+*           Unchanged on exit.
+*
+*  Y      - COMPLEX array, dimension at least
+*           ( 1 + ( N - 1 )*abs( INCY ) ).
+*           Before entry, the incremented array Y must contain the n
+*           element vector y. On exit, Y is overwritten by the updated
+*           vector y.
+*
+*  INCY   - INTEGER
+*           On entry, INCY specifies the increment for the elements of
+*           Y. INCY must not be zero.
+*           Unchanged on exit.
+*
+* =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX            ONE
+      PARAMETER          ( ONE = ( 1.0E+0, 0.0E+0 ) )
+      COMPLEX            ZERO
+      PARAMETER          ( ZERO = ( 0.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, INFO, IX, IY, J, JX, JY, KX, KY
+      COMPLEX            TEMP1, TEMP2
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = 1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = 2
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = 5
+      ELSE IF( INCX.EQ.0 ) THEN
+         INFO = 7
+      ELSE IF( INCY.EQ.0 ) THEN
+         INFO = 10
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CSYMV ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( N.EQ.0 ) .OR. ( ( ALPHA.EQ.ZERO ) .AND. ( BETA.EQ.ONE ) ) )
+     $   RETURN
+*
+*     Set up the start points in  X  and  Y.
+*
+      IF( INCX.GT.0 ) THEN
+         KX = 1
+      ELSE
+         KX = 1 - ( N-1 )*INCX
+      END IF
+      IF( INCY.GT.0 ) THEN
+         KY = 1
+      ELSE
+         KY = 1 - ( N-1 )*INCY
+      END IF
+*
+*     Start the operations. In this version the elements of A are
+*     accessed sequentially with one pass through the triangular part
+*     of A.
+*
+*     First form  y := beta*y.
+*
+      IF( BETA.NE.ONE ) THEN
+         IF( INCY.EQ.1 ) THEN
+            IF( BETA.EQ.ZERO ) THEN
+               DO 10 I = 1, N
+                  Y( I ) = ZERO
+   10          CONTINUE
+            ELSE
+               DO 20 I = 1, N
+                  Y( I ) = BETA*Y( I )
+   20          CONTINUE
+            END IF
+         ELSE
+            IY = KY
+            IF( BETA.EQ.ZERO ) THEN
+               DO 30 I = 1, N
+                  Y( IY ) = ZERO
+                  IY = IY + INCY
+   30          CONTINUE
+            ELSE
+               DO 40 I = 1, N
+                  Y( IY ) = BETA*Y( IY )
+                  IY = IY + INCY
+   40          CONTINUE
+            END IF
+         END IF
+      END IF
+      IF( ALPHA.EQ.ZERO )
+     $   RETURN
+      IF( LSAME( UPLO, 'U' ) ) THEN
+*
+*        Form  y  when A is stored in upper triangle.
+*
+         IF( ( INCX.EQ.1 ) .AND. ( INCY.EQ.1 ) ) THEN
+            DO 60 J = 1, N
+               TEMP1 = ALPHA*X( J )
+               TEMP2 = ZERO
+               DO 50 I = 1, J - 1
+                  Y( I ) = Y( I ) + TEMP1*A( I, J )
+                  TEMP2 = TEMP2 + A( I, J )*X( I )
+   50          CONTINUE
+               Y( J ) = Y( J ) + TEMP1*A( J, J ) + ALPHA*TEMP2
+   60       CONTINUE
+         ELSE
+            JX = KX
+            JY = KY
+            DO 80 J = 1, N
+               TEMP1 = ALPHA*X( JX )
+               TEMP2 = ZERO
+               IX = KX
+               IY = KY
+               DO 70 I = 1, J - 1
+                  Y( IY ) = Y( IY ) + TEMP1*A( I, J )
+                  TEMP2 = TEMP2 + A( I, J )*X( IX )
+                  IX = IX + INCX
+                  IY = IY + INCY
+   70          CONTINUE
+               Y( JY ) = Y( JY ) + TEMP1*A( J, J ) + ALPHA*TEMP2
+               JX = JX + INCX
+               JY = JY + INCY
+   80       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  y  when A is stored in lower triangle.
+*
+         IF( ( INCX.EQ.1 ) .AND. ( INCY.EQ.1 ) ) THEN
+            DO 100 J = 1, N
+               TEMP1 = ALPHA*X( J )
+               TEMP2 = ZERO
+               Y( J ) = Y( J ) + TEMP1*A( J, J )
+               DO 90 I = J + 1, N
+                  Y( I ) = Y( I ) + TEMP1*A( I, J )
+                  TEMP2 = TEMP2 + A( I, J )*X( I )
+   90          CONTINUE
+               Y( J ) = Y( J ) + ALPHA*TEMP2
+  100       CONTINUE
+         ELSE
+            JX = KX
+            JY = KY
+            DO 120 J = 1, N
+               TEMP1 = ALPHA*X( JX )
+               TEMP2 = ZERO
+               Y( JY ) = Y( JY ) + TEMP1*A( J, J )
+               IX = JX
+               IY = JY
+               DO 110 I = J + 1, N
+                  IX = IX + INCX
+                  IY = IY + INCY
+                  Y( IY ) = Y( IY ) + TEMP1*A( I, J )
+                  TEMP2 = TEMP2 + A( I, J )*X( IX )
+  110          CONTINUE
+               Y( JY ) = Y( JY ) + ALPHA*TEMP2
+               JX = JX + INCX
+               JY = JY + INCY
+  120       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of CSYMV
+*
+      END
diff --git a/costa/native/external/lapack/csyr.f b/costa/native/external/lapack/csyr.f
new file mode 100644
index 000000000..94d69db8b
--- /dev/null
+++ b/costa/native/external/lapack/csyr.f
@@ -0,0 +1,199 @@
+      SUBROUTINE CSYR( UPLO, N, ALPHA, X, INCX, A, LDA )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INCX, LDA, N
+      COMPLEX            ALPHA
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CSYR   performs the symmetric rank 1 operation
+*
+*     A := alpha*x*( x' ) + A,
+*
+*  where alpha is a complex scalar, x is an n element vector and A is an
+*  n by n symmetric matrix.
+*
+*  Arguments
+*  ==========
+*
+*  UPLO   - CHARACTER*1
+*           On entry, UPLO specifies whether the upper or lower
+*           triangular part of the array A is to be referenced as
+*           follows:
+*
+*              UPLO = 'U' or 'u'   Only the upper triangular part of A
+*                                  is to be referenced.
+*
+*              UPLO = 'L' or 'l'   Only the lower triangular part of A
+*                                  is to be referenced.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - COMPLEX
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  X      - COMPLEX array, dimension at least
+*           ( 1 + ( N - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the N-
+*           element vector x.
+*           Unchanged on exit.
+*
+*  INCX   - INTEGER
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*  A      - COMPLEX array, dimension ( LDA, N )
+*           Before entry, with  UPLO = 'U' or 'u', the leading n by n
+*           upper triangular part of the array A must contain the upper
+*           triangular part of the symmetric matrix and the strictly
+*           lower triangular part of A is not referenced. On exit, the
+*           upper triangular part of the array A is overwritten by the
+*           upper triangular part of the updated matrix.
+*           Before entry, with UPLO = 'L' or 'l', the leading n by n
+*           lower triangular part of the array A must contain the lower
+*           triangular part of the symmetric matrix and the strictly
+*           upper triangular part of A is not referenced. On exit, the
+*           lower triangular part of the array A is overwritten by the
+*           lower triangular part of the updated matrix.
+*
+*  LDA    - INTEGER
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           max( 1, N ).
+*           Unchanged on exit.
+*
+* =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX            ZERO
+      PARAMETER          ( ZERO = ( 0.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, INFO, IX, J, JX, KX
+      COMPLEX            TEMP
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = 1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = 2
+      ELSE IF( INCX.EQ.0 ) THEN
+         INFO = 5
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = 7
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CSYR  ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( N.EQ.0 ) .OR. ( ALPHA.EQ.ZERO ) )
+     $   RETURN
+*
+*     Set the start point in X if the increment is not unity.
+*
+      IF( INCX.LE.0 ) THEN
+         KX = 1 - ( N-1 )*INCX
+      ELSE IF( INCX.NE.1 ) THEN
+         KX = 1
+      END IF
+*
+*     Start the operations. In this version the elements of A are
+*     accessed sequentially with one pass through the triangular part
+*     of A.
+*
+      IF( LSAME( UPLO, 'U' ) ) THEN
+*
+*        Form  A  when A is stored in upper triangle.
+*
+         IF( INCX.EQ.1 ) THEN
+            DO 20 J = 1, N
+               IF( X( J ).NE.ZERO ) THEN
+                  TEMP = ALPHA*X( J )
+                  DO 10 I = 1, J
+                     A( I, J ) = A( I, J ) + X( I )*TEMP
+   10             CONTINUE
+               END IF
+   20       CONTINUE
+         ELSE
+            JX = KX
+            DO 40 J = 1, N
+               IF( X( JX ).NE.ZERO ) THEN
+                  TEMP = ALPHA*X( JX )
+                  IX = KX
+                  DO 30 I = 1, J
+                     A( I, J ) = A( I, J ) + X( IX )*TEMP
+                     IX = IX + INCX
+   30             CONTINUE
+               END IF
+               JX = JX + INCX
+   40       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  A  when A is stored in lower triangle.
+*
+         IF( INCX.EQ.1 ) THEN
+            DO 60 J = 1, N
+               IF( X( J ).NE.ZERO ) THEN
+                  TEMP = ALPHA*X( J )
+                  DO 50 I = J, N
+                     A( I, J ) = A( I, J ) + X( I )*TEMP
+   50             CONTINUE
+               END IF
+   60       CONTINUE
+         ELSE
+            JX = KX
+            DO 80 J = 1, N
+               IF( X( JX ).NE.ZERO ) THEN
+                  TEMP = ALPHA*X( JX )
+                  IX = JX
+                  DO 70 I = J, N
+                     A( I, J ) = A( I, J ) + X( IX )*TEMP
+                     IX = IX + INCX
+   70             CONTINUE
+               END IF
+               JX = JX + INCX
+   80       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of CSYR
+*
+      END
diff --git a/costa/native/external/lapack/csyrfs.f b/costa/native/external/lapack/csyrfs.f
new file mode 100644
index 000000000..42a25aa8e
--- /dev/null
+++ b/costa/native/external/lapack/csyrfs.f
@@ -0,0 +1,339 @@
+      SUBROUTINE CSYRFS( UPLO, N, NRHS, A, LDA, AF, LDAF, IPIV, B, LDB,
+     $                   X, LDX, FERR, BERR, WORK, RWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, LDAF, LDB, LDX, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      REAL               BERR( * ), FERR( * ), RWORK( * )
+      COMPLEX            A( LDA, * ), AF( LDAF, * ), B( LDB, * ),
+     $                   WORK( * ), X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CSYRFS improves the computed solution to a system of linear
+*  equations when the coefficient matrix is symmetric indefinite, and
+*  provides error bounds and backward error estimates for the solution.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrices B and X.  NRHS >= 0.
+*
+*  A       (input) COMPLEX array, dimension (LDA,N)
+*          The symmetric matrix A.  If UPLO = 'U', the leading N-by-N
+*          upper triangular part of A contains the upper triangular part
+*          of the matrix A, and the strictly lower triangular part of A
+*          is not referenced.  If UPLO = 'L', the leading N-by-N lower
+*          triangular part of A contains the lower triangular part of
+*          the matrix A, and the strictly upper triangular part of A is
+*          not referenced.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  AF      (input) COMPLEX array, dimension (LDAF,N)
+*          The factored form of the matrix A.  AF contains the block
+*          diagonal matrix D and the multipliers used to obtain the
+*          factor U or L from the factorization A = U*D*U**T or
+*          A = L*D*L**T as computed by CSYTRF.
+*
+*  LDAF    (input) INTEGER
+*          The leading dimension of the array AF.  LDAF >= max(1,N).
+*
+*  IPIV    (input) INTEGER array, dimension (N)
+*          Details of the interchanges and the block structure of D
+*          as determined by CSYTRF.
+*
+*  B       (input) COMPLEX array, dimension (LDB,NRHS)
+*          The right hand side matrix B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  X       (input/output) COMPLEX array, dimension (LDX,NRHS)
+*          On entry, the solution matrix X, as computed by CSYTRS.
+*          On exit, the improved solution matrix X.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(1,N).
+*
+*  FERR    (output) REAL array, dimension (NRHS)
+*          The estimated forward error bound for each solution vector
+*          X(j) (the j-th column of the solution matrix X).
+*          If XTRUE is the true solution corresponding to X(j), FERR(j)
+*          is an estimated upper bound for the magnitude of the largest
+*          element in (X(j) - XTRUE) divided by the magnitude of the
+*          largest element in X(j).  The estimate is as reliable as
+*          the estimate for RCOND, and is almost always a slight
+*          overestimate of the true error.
+*
+*  BERR    (output) REAL array, dimension (NRHS)
+*          The componentwise relative backward error of each solution
+*          vector X(j) (i.e., the smallest relative change in
+*          any element of A or B that makes X(j) an exact solution).
+*
+*  WORK    (workspace) COMPLEX array, dimension (2*N)
+*
+*  RWORK   (workspace) REAL array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Internal Parameters
+*  ===================
+*
+*  ITMAX is the maximum number of steps of iterative refinement.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            ITMAX
+      PARAMETER          ( ITMAX = 5 )
+      REAL               ZERO
+      PARAMETER          ( ZERO = 0.0E+0 )
+      COMPLEX            ONE
+      PARAMETER          ( ONE = ( 1.0E+0, 0.0E+0 ) )
+      REAL               TWO
+      PARAMETER          ( TWO = 2.0E+0 )
+      REAL               THREE
+      PARAMETER          ( THREE = 3.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            COUNT, I, J, K, KASE, NZ
+      REAL               EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK
+      COMPLEX            ZDUM
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CAXPY, CCOPY, CLACON, CSYMV, CSYTRS, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, AIMAG, MAX, REAL
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               SLAMCH
+      EXTERNAL           LSAME, SLAMCH
+*     ..
+*     .. Statement Functions ..
+      REAL               CABS1
+*     ..
+*     .. Statement Function definitions ..
+      CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -10
+      ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+         INFO = -12
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CSYRFS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN
+         DO 10 J = 1, NRHS
+            FERR( J ) = ZERO
+            BERR( J ) = ZERO
+   10    CONTINUE
+         RETURN
+      END IF
+*
+*     NZ = maximum number of nonzero elements in each row of A, plus 1
+*
+      NZ = N + 1
+      EPS = SLAMCH( 'Epsilon' )
+      SAFMIN = SLAMCH( 'Safe minimum' )
+      SAFE1 = NZ*SAFMIN
+      SAFE2 = SAFE1 / EPS
+*
+*     Do for each right hand side
+*
+      DO 140 J = 1, NRHS
+*
+         COUNT = 1
+         LSTRES = THREE
+   20    CONTINUE
+*
+*        Loop until stopping criterion is satisfied.
+*
+*        Compute residual R = B - A * X
+*
+         CALL CCOPY( N, B( 1, J ), 1, WORK, 1 )
+         CALL CSYMV( UPLO, N, -ONE, A, LDA, X( 1, J ), 1, ONE, WORK, 1 )
+*
+*        Compute componentwise relative backward error from formula
+*
+*        max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) )
+*
+*        where abs(Z) is the componentwise absolute value of the matrix
+*        or vector Z.  If the i-th component of the denominator is less
+*        than SAFE2, then SAFE1 is added to the i-th components of the
+*        numerator and denominator before dividing.
+*
+         DO 30 I = 1, N
+            RWORK( I ) = CABS1( B( I, J ) )
+   30    CONTINUE
+*
+*        Compute abs(A)*abs(X) + abs(B).
+*
+         IF( UPPER ) THEN
+            DO 50 K = 1, N
+               S = ZERO
+               XK = CABS1( X( K, J ) )
+               DO 40 I = 1, K - 1
+                  RWORK( I ) = RWORK( I ) + CABS1( A( I, K ) )*XK
+                  S = S + CABS1( A( I, K ) )*CABS1( X( I, J ) )
+   40          CONTINUE
+               RWORK( K ) = RWORK( K ) + CABS1( A( K, K ) )*XK + S
+   50       CONTINUE
+         ELSE
+            DO 70 K = 1, N
+               S = ZERO
+               XK = CABS1( X( K, J ) )
+               RWORK( K ) = RWORK( K ) + CABS1( A( K, K ) )*XK
+               DO 60 I = K + 1, N
+                  RWORK( I ) = RWORK( I ) + CABS1( A( I, K ) )*XK
+                  S = S + CABS1( A( I, K ) )*CABS1( X( I, J ) )
+   60          CONTINUE
+               RWORK( K ) = RWORK( K ) + S
+   70       CONTINUE
+         END IF
+         S = ZERO
+         DO 80 I = 1, N
+            IF( RWORK( I ).GT.SAFE2 ) THEN
+               S = MAX( S, CABS1( WORK( I ) ) / RWORK( I ) )
+            ELSE
+               S = MAX( S, ( CABS1( WORK( I ) )+SAFE1 ) /
+     $             ( RWORK( I )+SAFE1 ) )
+            END IF
+   80    CONTINUE
+         BERR( J ) = S
+*
+*        Test stopping criterion. Continue iterating if
+*           1) The residual BERR(J) is larger than machine epsilon, and
+*           2) BERR(J) decreased by at least a factor of 2 during the
+*              last iteration, and
+*           3) At most ITMAX iterations tried.
+*
+         IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND.
+     $       COUNT.LE.ITMAX ) THEN
+*
+*           Update solution and try again.
+*
+            CALL CSYTRS( UPLO, N, 1, AF, LDAF, IPIV, WORK, N, INFO )
+            CALL CAXPY( N, ONE, WORK, 1, X( 1, J ), 1 )
+            LSTRES = BERR( J )
+            COUNT = COUNT + 1
+            GO TO 20
+         END IF
+*
+*        Bound error from formula
+*
+*        norm(X - XTRUE) / norm(X) .le. FERR =
+*        norm( abs(inv(A))*
+*           ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X)
+*
+*        where
+*          norm(Z) is the magnitude of the largest component of Z
+*          inv(A) is the inverse of A
+*          abs(Z) is the componentwise absolute value of the matrix or
+*             vector Z
+*          NZ is the maximum number of nonzeros in any row of A, plus 1
+*          EPS is machine epsilon
+*
+*        The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B))
+*        is incremented by SAFE1 if the i-th component of
+*        abs(A)*abs(X) + abs(B) is less than SAFE2.
+*
+*        Use CLACON to estimate the infinity-norm of the matrix
+*           inv(A) * diag(W),
+*        where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) )))
+*
+         DO 90 I = 1, N
+            IF( RWORK( I ).GT.SAFE2 ) THEN
+               RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I )
+            ELSE
+               RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) +
+     $                      SAFE1
+            END IF
+   90    CONTINUE
+*
+         KASE = 0
+  100    CONTINUE
+         CALL CLACON( N, WORK( N+1 ), WORK, FERR( J ), KASE )
+         IF( KASE.NE.0 ) THEN
+            IF( KASE.EQ.1 ) THEN
+*
+*              Multiply by diag(W)*inv(A').
+*
+               CALL CSYTRS( UPLO, N, 1, AF, LDAF, IPIV, WORK, N, INFO )
+               DO 110 I = 1, N
+                  WORK( I ) = RWORK( I )*WORK( I )
+  110          CONTINUE
+            ELSE IF( KASE.EQ.2 ) THEN
+*
+*              Multiply by inv(A)*diag(W).
+*
+               DO 120 I = 1, N
+                  WORK( I ) = RWORK( I )*WORK( I )
+  120          CONTINUE
+               CALL CSYTRS( UPLO, N, 1, AF, LDAF, IPIV, WORK, N, INFO )
+            END IF
+            GO TO 100
+         END IF
+*
+*        Normalize error.
+*
+         LSTRES = ZERO
+         DO 130 I = 1, N
+            LSTRES = MAX( LSTRES, CABS1( X( I, J ) ) )
+  130    CONTINUE
+         IF( LSTRES.NE.ZERO )
+     $      FERR( J ) = FERR( J ) / LSTRES
+*
+  140 CONTINUE
+*
+      RETURN
+*
+*     End of CSYRFS
+*
+      END
diff --git a/costa/native/external/lapack/csysv.f b/costa/native/external/lapack/csysv.f
new file mode 100644
index 000000000..7c331a643
--- /dev/null
+++ b/costa/native/external/lapack/csysv.f
@@ -0,0 +1,171 @@
+      SUBROUTINE CSYSV( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK,
+     $                  LWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, LDB, LWORK, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      COMPLEX            A( LDA, * ), B( LDB, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CSYSV computes the solution to a complex system of linear equations
+*     A * X = B,
+*  where A is an N-by-N symmetric matrix and X and B are N-by-NRHS
+*  matrices.
+*
+*  The diagonal pivoting method is used to factor A as
+*     A = U * D * U**T,  if UPLO = 'U', or
+*     A = L * D * L**T,  if UPLO = 'L',
+*  where U (or L) is a product of permutation and unit upper (lower)
+*  triangular matrices, and D is symmetric and block diagonal with 
+*  1-by-1 and 2-by-2 diagonal blocks.  The factored form of A is then
+*  used to solve the system of equations A * X = B.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The number of linear equations, i.e., the order of the
+*          matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,N)
+*          On entry, the symmetric matrix A.  If UPLO = 'U', the leading
+*          N-by-N upper triangular part of A contains the upper
+*          triangular part of the matrix A, and the strictly lower
+*          triangular part of A is not referenced.  If UPLO = 'L', the
+*          leading N-by-N lower triangular part of A contains the lower
+*          triangular part of the matrix A, and the strictly upper
+*          triangular part of A is not referenced.
+*
+*          On exit, if INFO = 0, the block diagonal matrix D and the
+*          multipliers used to obtain the factor U or L from the
+*          factorization A = U*D*U**T or A = L*D*L**T as computed by
+*          CSYTRF.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  IPIV    (output) INTEGER array, dimension (N)
+*          Details of the interchanges and the block structure of D, as
+*          determined by CSYTRF.  If IPIV(k) > 0, then rows and columns
+*          k and IPIV(k) were interchanged, and D(k,k) is a 1-by-1
+*          diagonal block.  If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0,
+*          then rows and columns k-1 and -IPIV(k) were interchanged and
+*          D(k-1:k,k-1:k) is a 2-by-2 diagonal block.  If UPLO = 'L' and
+*          IPIV(k) = IPIV(k+1) < 0, then rows and columns k+1 and
+*          -IPIV(k) were interchanged and D(k:k+1,k:k+1) is a 2-by-2
+*          diagonal block.
+*
+*  B       (input/output) COMPLEX array, dimension (LDB,NRHS)
+*          On entry, the N-by-NRHS right hand side matrix B.
+*          On exit, if INFO = 0, the N-by-NRHS solution matrix X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The length of WORK.  LWORK >= 1, and for best performance
+*          LWORK >= N*NB, where NB is the optimal blocksize for
+*          CSYTRF.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*          > 0: if INFO = i, D(i,i) is exactly zero.  The factorization
+*               has been completed, but the block diagonal matrix D is
+*               exactly singular, so the solution could not be computed.
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            LWKOPT, NB
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV, LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CSYTRF, CSYTRS, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -8
+      ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN
+         INFO = -10
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         NB = ILAENV( 1, 'CSYTRF', UPLO, N, -1, -1, -1 )
+         LWKOPT = N*NB
+         WORK( 1 ) = LWKOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CSYSV ', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Compute the factorization A = U*D*U' or A = L*D*L'.
+*
+      CALL CSYTRF( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO )
+      IF( INFO.EQ.0 ) THEN
+*
+*        Solve the system A*X = B, overwriting B with X.
+*
+         CALL CSYTRS( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, INFO )
+*
+      END IF
+*
+      WORK( 1 ) = LWKOPT
+*
+      RETURN
+*
+*     End of CSYSV
+*
+      END
diff --git a/costa/native/external/lapack/csysvx.f b/costa/native/external/lapack/csysvx.f
new file mode 100644
index 000000000..1612dda3f
--- /dev/null
+++ b/costa/native/external/lapack/csysvx.f
@@ -0,0 +1,299 @@
+      SUBROUTINE CSYSVX( FACT, UPLO, N, NRHS, A, LDA, AF, LDAF, IPIV, B,
+     $                   LDB, X, LDX, RCOND, FERR, BERR, WORK, LWORK,
+     $                   RWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          FACT, UPLO
+      INTEGER            INFO, LDA, LDAF, LDB, LDX, LWORK, N, NRHS
+      REAL               RCOND
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      REAL               BERR( * ), FERR( * ), RWORK( * )
+      COMPLEX            A( LDA, * ), AF( LDAF, * ), B( LDB, * ),
+     $                   WORK( * ), X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CSYSVX uses the diagonal pivoting factorization to compute the
+*  solution to a complex system of linear equations A * X = B,
+*  where A is an N-by-N symmetric matrix and X and B are N-by-NRHS
+*  matrices.
+*
+*  Error bounds on the solution and a condition estimate are also
+*  provided.
+*
+*  Description
+*  ===========
+*
+*  The following steps are performed:
+*
+*  1. If FACT = 'N', the diagonal pivoting method is used to factor A.
+*     The form of the factorization is
+*        A = U * D * U**T,  if UPLO = 'U', or
+*        A = L * D * L**T,  if UPLO = 'L',
+*     where U (or L) is a product of permutation and unit upper (lower)
+*     triangular matrices, and D is symmetric and block diagonal with
+*     1-by-1 and 2-by-2 diagonal blocks.
+*
+*  2. If some D(i,i)=0, so that D is exactly singular, then the routine
+*     returns with INFO = i. Otherwise, the factored form of A is used
+*     to estimate the condition number of the matrix A.  If the
+*     reciprocal of the condition number is less than machine precision,
+*     INFO = N+1 is returned as a warning, but the routine still goes on
+*     to solve for X and compute error bounds as described below.
+*
+*  3. The system of equations is solved for X using the factored form
+*     of A.
+*
+*  4. Iterative refinement is applied to improve the computed solution
+*     matrix and calculate error bounds and backward error estimates
+*     for it.
+*
+*  Arguments
+*  =========
+*
+*  FACT    (input) CHARACTER*1
+*          Specifies whether or not the factored form of A has been
+*          supplied on entry.
+*          = 'F':  On entry, AF and IPIV contain the factored form
+*                  of A.  A, AF and IPIV will not be modified.
+*          = 'N':  The matrix A will be copied to AF and factored.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The number of linear equations, i.e., the order of the
+*          matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrices B and X.  NRHS >= 0.
+*
+*  A       (input) COMPLEX array, dimension (LDA,N)
+*          The symmetric matrix A.  If UPLO = 'U', the leading N-by-N
+*          upper triangular part of A contains the upper triangular part
+*          of the matrix A, and the strictly lower triangular part of A
+*          is not referenced.  If UPLO = 'L', the leading N-by-N lower
+*          triangular part of A contains the lower triangular part of
+*          the matrix A, and the strictly upper triangular part of A is
+*          not referenced.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  AF      (input or output) COMPLEX array, dimension (LDAF,N)
+*          If FACT = 'F', then AF is an input argument and on entry
+*          contains the block diagonal matrix D and the multipliers used
+*          to obtain the factor U or L from the factorization
+*          A = U*D*U**T or A = L*D*L**T as computed by CSYTRF.
+*
+*          If FACT = 'N', then AF is an output argument and on exit
+*          returns the block diagonal matrix D and the multipliers used
+*          to obtain the factor U or L from the factorization
+*          A = U*D*U**T or A = L*D*L**T.
+*
+*  LDAF    (input) INTEGER
+*          The leading dimension of the array AF.  LDAF >= max(1,N).
+*
+*  IPIV    (input or output) INTEGER array, dimension (N)
+*          If FACT = 'F', then IPIV is an input argument and on entry
+*          contains details of the interchanges and the block structure
+*          of D, as determined by CSYTRF.
+*          If IPIV(k) > 0, then rows and columns k and IPIV(k) were
+*          interchanged and D(k,k) is a 1-by-1 diagonal block.
+*          If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and
+*          columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k)
+*          is a 2-by-2 diagonal block.  If UPLO = 'L' and IPIV(k) =
+*          IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were
+*          interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block.
+*
+*          If FACT = 'N', then IPIV is an output argument and on exit
+*          contains details of the interchanges and the block structure
+*          of D, as determined by CSYTRF.
+*
+*  B       (input) COMPLEX array, dimension (LDB,NRHS)
+*          The N-by-NRHS right hand side matrix B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  X       (output) COMPLEX array, dimension (LDX,NRHS)
+*          If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(1,N).
+*
+*  RCOND   (output) REAL
+*          The estimate of the reciprocal condition number of the matrix
+*          A.  If RCOND is less than the machine precision (in
+*          particular, if RCOND = 0), the matrix is singular to working
+*          precision.  This condition is indicated by a return code of
+*          INFO > 0.
+*
+*  FERR    (output) REAL array, dimension (NRHS)
+*          The estimated forward error bound for each solution vector
+*          X(j) (the j-th column of the solution matrix X).
+*          If XTRUE is the true solution corresponding to X(j), FERR(j)
+*          is an estimated upper bound for the magnitude of the largest
+*          element in (X(j) - XTRUE) divided by the magnitude of the
+*          largest element in X(j).  The estimate is as reliable as
+*          the estimate for RCOND, and is almost always a slight
+*          overestimate of the true error.
+*
+*  BERR    (output) REAL array, dimension (NRHS)
+*          The componentwise relative backward error of each solution
+*          vector X(j) (i.e., the smallest relative change in
+*          any element of A or B that makes X(j) an exact solution).
+*
+*  WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The length of WORK.  LWORK >= 2*N, and for best performance
+*          LWORK >= N*NB, where NB is the optimal blocksize for
+*          CSYTRF.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  RWORK   (workspace) REAL array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*          > 0: if INFO = i, and i is
+*                <= N:  D(i,i) is exactly zero.  The factorization
+*                       has been completed but the factor D is exactly
+*                       singular, so the solution and error bounds could
+*                       not be computed. RCOND = 0 is returned.
+*                = N+1: D is nonsingular, but RCOND is less than machine
+*                       precision, meaning that the matrix is singular
+*                       to working precision.  Nevertheless, the
+*                       solution and error bounds are computed because
+*                       there are a number of situations where the
+*                       computed solution can be more accurate than the
+*                       value of RCOND would suggest.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO
+      PARAMETER          ( ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY, NOFACT
+      INTEGER            LWKOPT, NB
+      REAL               ANORM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      REAL               CLANSY, SLAMCH
+      EXTERNAL           ILAENV, LSAME, CLANSY, SLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CLACPY, CSYCON, CSYRFS, CSYTRF, CSYTRS, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      NOFACT = LSAME( FACT, 'N' )
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( .NOT.NOFACT .AND. .NOT.LSAME( FACT, 'F' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) )
+     $          THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -6
+      ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN
+         INFO = -8
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -11
+      ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+         INFO = -13
+      ELSE IF( LWORK.LT.MAX( 1, 2*N ) .AND. .NOT.LQUERY ) THEN
+         INFO = -18
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         NB = ILAENV( 1, 'CSYTRF', UPLO, N, -1, -1, -1 )
+         LWKOPT = N*NB
+         WORK( 1 ) = LWKOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CSYSVX', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+      IF( NOFACT ) THEN
+*
+*        Compute the factorization A = U*D*U' or A = L*D*L'.
+*
+         CALL CLACPY( UPLO, N, N, A, LDA, AF, LDAF )
+         CALL CSYTRF( UPLO, N, AF, LDAF, IPIV, WORK, LWORK, INFO )
+*
+*        Return if INFO is non-zero.
+*
+         IF( INFO.NE.0 ) THEN
+            IF( INFO.GT.0 )
+     $         RCOND = ZERO
+            RETURN
+         END IF
+      END IF
+*
+*     Compute the norm of the matrix A.
+*
+      ANORM = CLANSY( 'I', UPLO, N, A, LDA, RWORK )
+*
+*     Compute the reciprocal of the condition number of A.
+*
+      CALL CSYCON( UPLO, N, AF, LDAF, IPIV, ANORM, RCOND, WORK, INFO )
+*
+*     Set INFO = N+1 if the matrix is singular to working precision.
+*
+      IF( RCOND.LT.SLAMCH( 'Epsilon' ) )
+     $   INFO = N + 1
+*
+*     Compute the solution vectors X.
+*
+      CALL CLACPY( 'Full', N, NRHS, B, LDB, X, LDX )
+      CALL CSYTRS( UPLO, N, NRHS, AF, LDAF, IPIV, X, LDX, INFO )
+*
+*     Use iterative refinement to improve the computed solutions and
+*     compute error bounds and backward error estimates for them.
+*
+      CALL CSYRFS( UPLO, N, NRHS, A, LDA, AF, LDAF, IPIV, B, LDB, X,
+     $             LDX, FERR, BERR, WORK, RWORK, INFO )
+*
+      WORK( 1 ) = LWKOPT
+*
+      RETURN
+*
+*     End of CSYSVX
+*
+      END
diff --git a/costa/native/external/lapack/csytf2.f b/costa/native/external/lapack/csytf2.f
new file mode 100644
index 000000000..1366519ab
--- /dev/null
+++ b/costa/native/external/lapack/csytf2.f
@@ -0,0 +1,515 @@
+      SUBROUTINE CSYTF2( UPLO, N, A, LDA, IPIV, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      COMPLEX            A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CSYTF2 computes the factorization of a complex symmetric matrix A
+*  using the Bunch-Kaufman diagonal pivoting method:
+*
+*     A = U*D*U'  or  A = L*D*L'
+*
+*  where U (or L) is a product of permutation and unit upper (lower)
+*  triangular matrices, U' is the transpose of U, and D is symmetric and
+*  block diagonal with 1-by-1 and 2-by-2 diagonal blocks.
+*
+*  This is the unblocked version of the algorithm, calling Level 2 BLAS.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the upper or lower triangular part of the
+*          symmetric matrix A is stored:
+*          = 'U':  Upper triangular
+*          = 'L':  Lower triangular
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,N)
+*          On entry, the symmetric matrix A.  If UPLO = 'U', the leading
+*          n-by-n upper triangular part of A contains the upper
+*          triangular part of the matrix A, and the strictly lower
+*          triangular part of A is not referenced.  If UPLO = 'L', the
+*          leading n-by-n lower triangular part of A contains the lower
+*          triangular part of the matrix A, and the strictly upper
+*          triangular part of A is not referenced.
+*
+*          On exit, the block diagonal matrix D and the multipliers used
+*          to obtain the factor U or L (see below for further details).
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  IPIV    (output) INTEGER array, dimension (N)
+*          Details of the interchanges and the block structure of D.
+*          If IPIV(k) > 0, then rows and columns k and IPIV(k) were
+*          interchanged and D(k,k) is a 1-by-1 diagonal block.
+*          If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and
+*          columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k)
+*          is a 2-by-2 diagonal block.  If UPLO = 'L' and IPIV(k) =
+*          IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were
+*          interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block.
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -k, the k-th argument had an illegal value
+*          > 0: if INFO = k, D(k,k) is exactly zero.  The factorization
+*               has been completed, but the block diagonal matrix D is
+*               exactly singular, and division by zero will occur if it
+*               is used to solve a system of equations.
+*
+*  Further Details
+*  ===============
+*
+*  1-96 - Based on modifications by J. Lewis, Boeing Computer Services
+*         Company
+*
+*  If UPLO = 'U', then A = U*D*U', where
+*     U = P(n)*U(n)* ... *P(k)U(k)* ...,
+*  i.e., U is a product of terms P(k)*U(k), where k decreases from n to
+*  1 in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1
+*  and 2-by-2 diagonal blocks D(k).  P(k) is a permutation matrix as
+*  defined by IPIV(k), and U(k) is a unit upper triangular matrix, such
+*  that if the diagonal block D(k) is of order s (s = 1 or 2), then
+*
+*             (   I    v    0   )   k-s
+*     U(k) =  (   0    I    0   )   s
+*             (   0    0    I   )   n-k
+*                k-s   s   n-k
+*
+*  If s = 1, D(k) overwrites A(k,k), and v overwrites A(1:k-1,k).
+*  If s = 2, the upper triangle of D(k) overwrites A(k-1,k-1), A(k-1,k),
+*  and A(k,k), and v overwrites A(1:k-2,k-1:k).
+*
+*  If UPLO = 'L', then A = L*D*L', where
+*     L = P(1)*L(1)* ... *P(k)*L(k)* ...,
+*  i.e., L is a product of terms P(k)*L(k), where k increases from 1 to
+*  n in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1
+*  and 2-by-2 diagonal blocks D(k).  P(k) is a permutation matrix as
+*  defined by IPIV(k), and L(k) is a unit lower triangular matrix, such
+*  that if the diagonal block D(k) is of order s (s = 1 or 2), then
+*
+*             (   I    0     0   )  k-1
+*     L(k) =  (   0    I     0   )  s
+*             (   0    v     I   )  n-k-s+1
+*                k-1   s  n-k-s+1
+*
+*  If s = 1, D(k) overwrites A(k,k), and v overwrites A(k+1:n,k).
+*  If s = 2, the lower triangle of D(k) overwrites A(k,k), A(k+1,k),
+*  and A(k+1,k+1), and v overwrites A(k+2:n,k:k+1).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+      REAL               EIGHT, SEVTEN
+      PARAMETER          ( EIGHT = 8.0E+0, SEVTEN = 17.0E+0 )
+      COMPLEX            CONE
+      PARAMETER          ( CONE = ( 1.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            I, IMAX, J, JMAX, K, KK, KP, KSTEP
+      REAL               ABSAKK, ALPHA, COLMAX, ROWMAX
+      COMPLEX            D11, D12, D21, D22, R1, T, WK, WKM1, WKP1, Z
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ICAMAX
+      EXTERNAL           LSAME, ICAMAX
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CSCAL, CSWAP, CSYR, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, AIMAG, MAX, REAL, SQRT
+*     ..
+*     .. Statement Functions ..
+      REAL               CABS1
+*     ..
+*     .. Statement Function definitions ..
+      CABS1( Z ) = ABS( REAL( Z ) ) + ABS( AIMAG( Z ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CSYTF2', -INFO )
+         RETURN
+      END IF
+*
+*     Initialize ALPHA for use in choosing pivot block size.
+*
+      ALPHA = ( ONE+SQRT( SEVTEN ) ) / EIGHT
+*
+      IF( UPPER ) THEN
+*
+*        Factorize A as U*D*U' using the upper triangle of A
+*
+*        K is the main loop index, decreasing from N to 1 in steps of
+*        1 or 2
+*
+         K = N
+   10    CONTINUE
+*
+*        If K < 1, exit from loop
+*
+         IF( K.LT.1 )
+     $      GO TO 70
+         KSTEP = 1
+*
+*        Determine rows and columns to be interchanged and whether
+*        a 1-by-1 or 2-by-2 pivot block will be used
+*
+         ABSAKK = CABS1( A( K, K ) )
+*
+*        IMAX is the row-index of the largest off-diagonal element in
+*        column K, and COLMAX is its absolute value
+*
+         IF( K.GT.1 ) THEN
+            IMAX = ICAMAX( K-1, A( 1, K ), 1 )
+            COLMAX = CABS1( A( IMAX, K ) )
+         ELSE
+            COLMAX = ZERO
+         END IF
+*
+         IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN
+*
+*           Column K is zero: set INFO and continue
+*
+            IF( INFO.EQ.0 )
+     $         INFO = K
+            KP = K
+         ELSE
+            IF( ABSAKK.GE.ALPHA*COLMAX ) THEN
+*
+*              no interchange, use 1-by-1 pivot block
+*
+               KP = K
+            ELSE
+*
+*              JMAX is the column-index of the largest off-diagonal
+*              element in row IMAX, and ROWMAX is its absolute value
+*
+               JMAX = IMAX + ICAMAX( K-IMAX, A( IMAX, IMAX+1 ), LDA )
+               ROWMAX = CABS1( A( IMAX, JMAX ) )
+               IF( IMAX.GT.1 ) THEN
+                  JMAX = ICAMAX( IMAX-1, A( 1, IMAX ), 1 )
+                  ROWMAX = MAX( ROWMAX, CABS1( A( JMAX, IMAX ) ) )
+               END IF
+*
+               IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN
+*
+*                 no interchange, use 1-by-1 pivot block
+*
+                  KP = K
+               ELSE IF( CABS1( A( IMAX, IMAX ) ).GE.ALPHA*ROWMAX ) THEN
+*
+*                 interchange rows and columns K and IMAX, use 1-by-1
+*                 pivot block
+*
+                  KP = IMAX
+               ELSE
+*
+*                 interchange rows and columns K-1 and IMAX, use 2-by-2
+*                 pivot block
+*
+                  KP = IMAX
+                  KSTEP = 2
+               END IF
+            END IF
+*
+            KK = K - KSTEP + 1
+            IF( KP.NE.KK ) THEN
+*
+*              Interchange rows and columns KK and KP in the leading
+*              submatrix A(1:k,1:k)
+*
+               CALL CSWAP( KP-1, A( 1, KK ), 1, A( 1, KP ), 1 )
+               CALL CSWAP( KK-KP-1, A( KP+1, KK ), 1, A( KP, KP+1 ),
+     $                     LDA )
+               T = A( KK, KK )
+               A( KK, KK ) = A( KP, KP )
+               A( KP, KP ) = T
+               IF( KSTEP.EQ.2 ) THEN
+                  T = A( K-1, K )
+                  A( K-1, K ) = A( KP, K )
+                  A( KP, K ) = T
+               END IF
+            END IF
+*
+*           Update the leading submatrix
+*
+            IF( KSTEP.EQ.1 ) THEN
+*
+*              1-by-1 pivot block D(k): column k now holds
+*
+*              W(k) = U(k)*D(k)
+*
+*              where U(k) is the k-th column of U
+*
+*              Perform a rank-1 update of A(1:k-1,1:k-1) as
+*
+*              A := A - U(k)*D(k)*U(k)' = A - W(k)*1/D(k)*W(k)'
+*
+               R1 = CONE / A( K, K )
+               CALL CSYR( UPLO, K-1, -R1, A( 1, K ), 1, A, LDA )
+*
+*              Store U(k) in column k
+*
+               CALL CSCAL( K-1, R1, A( 1, K ), 1 )
+            ELSE
+*
+*              2-by-2 pivot block D(k): columns k and k-1 now hold
+*
+*              ( W(k-1) W(k) ) = ( U(k-1) U(k) )*D(k)
+*
+*              where U(k) and U(k-1) are the k-th and (k-1)-th columns
+*              of U
+*
+*              Perform a rank-2 update of A(1:k-2,1:k-2) as
+*
+*              A := A - ( U(k-1) U(k) )*D(k)*( U(k-1) U(k) )'
+*                 = A - ( W(k-1) W(k) )*inv(D(k))*( W(k-1) W(k) )'
+*
+               IF( K.GT.2 ) THEN
+*
+                  D12 = A( K-1, K )
+                  D22 = A( K-1, K-1 ) / D12
+                  D11 = A( K, K ) / D12
+                  T = CONE / ( D11*D22-CONE )
+                  D12 = T / D12
+*
+                  DO 30 J = K - 2, 1, -1
+                     WKM1 = D12*( D11*A( J, K-1 )-A( J, K ) )
+                     WK = D12*( D22*A( J, K )-A( J, K-1 ) )
+                     DO 20 I = J, 1, -1
+                        A( I, J ) = A( I, J ) - A( I, K )*WK -
+     $                              A( I, K-1 )*WKM1
+   20                CONTINUE
+                     A( J, K ) = WK
+                     A( J, K-1 ) = WKM1
+   30             CONTINUE
+*
+               END IF
+*
+            END IF
+         END IF
+*
+*        Store details of the interchanges in IPIV
+*
+         IF( KSTEP.EQ.1 ) THEN
+            IPIV( K ) = KP
+         ELSE
+            IPIV( K ) = -KP
+            IPIV( K-1 ) = -KP
+         END IF
+*
+*        Decrease K and return to the start of the main loop
+*
+         K = K - KSTEP
+         GO TO 10
+*
+      ELSE
+*
+*        Factorize A as L*D*L' using the lower triangle of A
+*
+*        K is the main loop index, increasing from 1 to N in steps of
+*        1 or 2
+*
+         K = 1
+   40    CONTINUE
+*
+*        If K > N, exit from loop
+*
+         IF( K.GT.N )
+     $      GO TO 70
+         KSTEP = 1
+*
+*        Determine rows and columns to be interchanged and whether
+*        a 1-by-1 or 2-by-2 pivot block will be used
+*
+         ABSAKK = CABS1( A( K, K ) )
+*
+*        IMAX is the row-index of the largest off-diagonal element in
+*        column K, and COLMAX is its absolute value
+*
+         IF( K.LT.N ) THEN
+            IMAX = K + ICAMAX( N-K, A( K+1, K ), 1 )
+            COLMAX = CABS1( A( IMAX, K ) )
+         ELSE
+            COLMAX = ZERO
+         END IF
+*
+         IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN
+*
+*           Column K is zero: set INFO and continue
+*
+            IF( INFO.EQ.0 )
+     $         INFO = K
+            KP = K
+         ELSE
+            IF( ABSAKK.GE.ALPHA*COLMAX ) THEN
+*
+*              no interchange, use 1-by-1 pivot block
+*
+               KP = K
+            ELSE
+*
+*              JMAX is the column-index of the largest off-diagonal
+*              element in row IMAX, and ROWMAX is its absolute value
+*
+               JMAX = K - 1 + ICAMAX( IMAX-K, A( IMAX, K ), LDA )
+               ROWMAX = CABS1( A( IMAX, JMAX ) )
+               IF( IMAX.LT.N ) THEN
+                  JMAX = IMAX + ICAMAX( N-IMAX, A( IMAX+1, IMAX ), 1 )
+                  ROWMAX = MAX( ROWMAX, CABS1( A( JMAX, IMAX ) ) )
+               END IF
+*
+               IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN
+*
+*                 no interchange, use 1-by-1 pivot block
+*
+                  KP = K
+               ELSE IF( CABS1( A( IMAX, IMAX ) ).GE.ALPHA*ROWMAX ) THEN
+*
+*                 interchange rows and columns K and IMAX, use 1-by-1
+*                 pivot block
+*
+                  KP = IMAX
+               ELSE
+*
+*                 interchange rows and columns K+1 and IMAX, use 2-by-2
+*                 pivot block
+*
+                  KP = IMAX
+                  KSTEP = 2
+               END IF
+            END IF
+*
+            KK = K + KSTEP - 1
+            IF( KP.NE.KK ) THEN
+*
+*              Interchange rows and columns KK and KP in the trailing
+*              submatrix A(k:n,k:n)
+*
+               IF( KP.LT.N )
+     $            CALL CSWAP( N-KP, A( KP+1, KK ), 1, A( KP+1, KP ), 1 )
+               CALL CSWAP( KP-KK-1, A( KK+1, KK ), 1, A( KP, KK+1 ),
+     $                     LDA )
+               T = A( KK, KK )
+               A( KK, KK ) = A( KP, KP )
+               A( KP, KP ) = T
+               IF( KSTEP.EQ.2 ) THEN
+                  T = A( K+1, K )
+                  A( K+1, K ) = A( KP, K )
+                  A( KP, K ) = T
+               END IF
+            END IF
+*
+*           Update the trailing submatrix
+*
+            IF( KSTEP.EQ.1 ) THEN
+*
+*              1-by-1 pivot block D(k): column k now holds
+*
+*              W(k) = L(k)*D(k)
+*
+*              where L(k) is the k-th column of L
+*
+               IF( K.LT.N ) THEN
+*
+*                 Perform a rank-1 update of A(k+1:n,k+1:n) as
+*
+*                 A := A - L(k)*D(k)*L(k)' = A - W(k)*(1/D(k))*W(k)'
+*
+                  R1 = CONE / A( K, K )
+                  CALL CSYR( UPLO, N-K, -R1, A( K+1, K ), 1,
+     $                       A( K+1, K+1 ), LDA )
+*
+*                 Store L(k) in column K
+*
+                  CALL CSCAL( N-K, R1, A( K+1, K ), 1 )
+               END IF
+            ELSE
+*
+*              2-by-2 pivot block D(k)
+*
+               IF( K.LT.N-1 ) THEN
+*
+*                 Perform a rank-2 update of A(k+2:n,k+2:n) as
+*
+*                 A := A - ( L(k) L(k+1) )*D(k)*( L(k) L(k+1) )'
+*                    = A - ( W(k) W(k+1) )*inv(D(k))*( W(k) W(k+1) )'
+*
+*                 where L(k) and L(k+1) are the k-th and (k+1)-th
+*                 columns of L
+*
+                  D21 = A( K+1, K )
+                  D11 = A( K+1, K+1 ) / D21
+                  D22 = A( K, K ) / D21
+                  T = CONE / ( D11*D22-CONE )
+                  D21 = T / D21
+*
+                  DO 60 J = K + 2, N
+                     WK = D21*( D11*A( J, K )-A( J, K+1 ) )
+                     WKP1 = D21*( D22*A( J, K+1 )-A( J, K ) )
+                     DO 50 I = J, N
+                        A( I, J ) = A( I, J ) - A( I, K )*WK -
+     $                              A( I, K+1 )*WKP1
+   50                CONTINUE
+                     A( J, K ) = WK
+                     A( J, K+1 ) = WKP1
+   60             CONTINUE
+               END IF
+            END IF
+         END IF
+*
+*        Store details of the interchanges in IPIV
+*
+         IF( KSTEP.EQ.1 ) THEN
+            IPIV( K ) = KP
+         ELSE
+            IPIV( K ) = -KP
+            IPIV( K+1 ) = -KP
+         END IF
+*
+*        Increase K and return to the start of the main loop
+*
+         K = K + KSTEP
+         GO TO 40
+*
+      END IF
+*
+   70 CONTINUE
+      RETURN
+*
+*     End of CSYTF2
+*
+      END
diff --git a/costa/native/external/lapack/csytrf.f b/costa/native/external/lapack/csytrf.f
new file mode 100644
index 000000000..30a20f502
--- /dev/null
+++ b/costa/native/external/lapack/csytrf.f
@@ -0,0 +1,287 @@
+      SUBROUTINE CSYTRF( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, LWORK, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      COMPLEX            A( LDA, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CSYTRF computes the factorization of a complex symmetric matrix A
+*  using the Bunch-Kaufman diagonal pivoting method.  The form of the
+*  factorization is
+*
+*     A = U*D*U**T  or  A = L*D*L**T
+*
+*  where U (or L) is a product of permutation and unit upper (lower)
+*  triangular matrices, and D is symmetric and block diagonal with
+*  with 1-by-1 and 2-by-2 diagonal blocks.
+*
+*  This is the blocked version of the algorithm, calling Level 3 BLAS.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,N)
+*          On entry, the symmetric matrix A.  If UPLO = 'U', the leading
+*          N-by-N upper triangular part of A contains the upper
+*          triangular part of the matrix A, and the strictly lower
+*          triangular part of A is not referenced.  If UPLO = 'L', the
+*          leading N-by-N lower triangular part of A contains the lower
+*          triangular part of the matrix A, and the strictly upper
+*          triangular part of A is not referenced.
+*
+*          On exit, the block diagonal matrix D and the multipliers used
+*          to obtain the factor U or L (see below for further details).
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  IPIV    (output) INTEGER array, dimension (N)
+*          Details of the interchanges and the block structure of D.
+*          If IPIV(k) > 0, then rows and columns k and IPIV(k) were
+*          interchanged and D(k,k) is a 1-by-1 diagonal block.
+*          If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and
+*          columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k)
+*          is a 2-by-2 diagonal block.  If UPLO = 'L' and IPIV(k) =
+*          IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were
+*          interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block.
+*
+*  WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The length of WORK.  LWORK >=1.  For best performance
+*          LWORK >= N*NB, where NB is the block size returned by ILAENV.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, D(i,i) is exactly zero.  The factorization
+*                has been completed, but the block diagonal matrix D is
+*                exactly singular, and division by zero will occur if it
+*                is used to solve a system of equations.
+*
+*  Further Details
+*  ===============
+*
+*  If UPLO = 'U', then A = U*D*U', where
+*     U = P(n)*U(n)* ... *P(k)U(k)* ...,
+*  i.e., U is a product of terms P(k)*U(k), where k decreases from n to
+*  1 in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1
+*  and 2-by-2 diagonal blocks D(k).  P(k) is a permutation matrix as
+*  defined by IPIV(k), and U(k) is a unit upper triangular matrix, such
+*  that if the diagonal block D(k) is of order s (s = 1 or 2), then
+*
+*             (   I    v    0   )   k-s
+*     U(k) =  (   0    I    0   )   s
+*             (   0    0    I   )   n-k
+*                k-s   s   n-k
+*
+*  If s = 1, D(k) overwrites A(k,k), and v overwrites A(1:k-1,k).
+*  If s = 2, the upper triangle of D(k) overwrites A(k-1,k-1), A(k-1,k),
+*  and A(k,k), and v overwrites A(1:k-2,k-1:k).
+*
+*  If UPLO = 'L', then A = L*D*L', where
+*     L = P(1)*L(1)* ... *P(k)*L(k)* ...,
+*  i.e., L is a product of terms P(k)*L(k), where k increases from 1 to
+*  n in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1
+*  and 2-by-2 diagonal blocks D(k).  P(k) is a permutation matrix as
+*  defined by IPIV(k), and L(k) is a unit lower triangular matrix, such
+*  that if the diagonal block D(k) is of order s (s = 1 or 2), then
+*
+*             (   I    0     0   )  k-1
+*     L(k) =  (   0    I     0   )  s
+*             (   0    v     I   )  n-k-s+1
+*                k-1   s  n-k-s+1
+*
+*  If s = 1, D(k) overwrites A(k,k), and v overwrites A(k+1:n,k).
+*  If s = 2, the lower triangle of D(k) overwrites A(k,k), A(k+1,k),
+*  and A(k+1,k+1), and v overwrites A(k+2:n,k:k+1).
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            LQUERY, UPPER
+      INTEGER            IINFO, IWS, J, K, KB, LDWORK, LWKOPT, NB, NBMIN
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           LSAME, ILAENV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CLASYF, CSYTF2, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -4
+      ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN
+         INFO = -7
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+*
+*        Determine the block size
+*
+         NB = ILAENV( 1, 'CSYTRF', UPLO, N, -1, -1, -1 )
+         LWKOPT = N*NB
+         WORK( 1 ) = LWKOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CSYTRF', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+      NBMIN = 2
+      LDWORK = N
+      IF( NB.GT.1 .AND. NB.LT.N ) THEN
+         IWS = LDWORK*NB
+         IF( LWORK.LT.IWS ) THEN
+            NB = MAX( LWORK / LDWORK, 1 )
+            NBMIN = MAX( 2, ILAENV( 2, 'CSYTRF', UPLO, N, -1, -1, -1 ) )
+         END IF
+      ELSE
+         IWS = 1
+      END IF
+      IF( NB.LT.NBMIN )
+     $   NB = N
+*
+      IF( UPPER ) THEN
+*
+*        Factorize A as U*D*U' using the upper triangle of A
+*
+*        K is the main loop index, decreasing from N to 1 in steps of
+*        KB, where KB is the number of columns factorized by CLASYF;
+*        KB is either NB or NB-1, or K for the last block
+*
+         K = N
+   10    CONTINUE
+*
+*        If K < 1, exit from loop
+*
+         IF( K.LT.1 )
+     $      GO TO 40
+*
+         IF( K.GT.NB ) THEN
+*
+*           Factorize columns k-kb+1:k of A and use blocked code to
+*           update columns 1:k-kb
+*
+            CALL CLASYF( UPLO, K, NB, KB, A, LDA, IPIV, WORK, N, IINFO )
+         ELSE
+*
+*           Use unblocked code to factorize columns 1:k of A
+*
+            CALL CSYTF2( UPLO, K, A, LDA, IPIV, IINFO )
+            KB = K
+         END IF
+*
+*        Set INFO on the first occurrence of a zero pivot
+*
+         IF( INFO.EQ.0 .AND. IINFO.GT.0 )
+     $      INFO = IINFO
+*
+*        Decrease K and return to the start of the main loop
+*
+         K = K - KB
+         GO TO 10
+*
+      ELSE
+*
+*        Factorize A as L*D*L' using the lower triangle of A
+*
+*        K is the main loop index, increasing from 1 to N in steps of
+*        KB, where KB is the number of columns factorized by CLASYF;
+*        KB is either NB or NB-1, or N-K+1 for the last block
+*
+         K = 1
+   20    CONTINUE
+*
+*        If K > N, exit from loop
+*
+         IF( K.GT.N )
+     $      GO TO 40
+*
+         IF( K.LE.N-NB ) THEN
+*
+*           Factorize columns k:k+kb-1 of A and use blocked code to
+*           update columns k+kb:n
+*
+            CALL CLASYF( UPLO, N-K+1, NB, KB, A( K, K ), LDA, IPIV( K ),
+     $                   WORK, N, IINFO )
+         ELSE
+*
+*           Use unblocked code to factorize columns k:n of A
+*
+            CALL CSYTF2( UPLO, N-K+1, A( K, K ), LDA, IPIV( K ), IINFO )
+            KB = N - K + 1
+         END IF
+*
+*        Set INFO on the first occurrence of a zero pivot
+*
+         IF( INFO.EQ.0 .AND. IINFO.GT.0 )
+     $      INFO = IINFO + K - 1
+*
+*        Adjust IPIV
+*
+         DO 30 J = K, K + KB - 1
+            IF( IPIV( J ).GT.0 ) THEN
+               IPIV( J ) = IPIV( J ) + K - 1
+            ELSE
+               IPIV( J ) = IPIV( J ) - K + 1
+            END IF
+   30    CONTINUE
+*
+*        Increase K and return to the start of the main loop
+*
+         K = K + KB
+         GO TO 20
+*
+      END IF
+*
+   40 CONTINUE
+      WORK( 1 ) = LWKOPT
+      RETURN
+*
+*     End of CSYTRF
+*
+      END
diff --git a/costa/native/external/lapack/csytri.f b/costa/native/external/lapack/csytri.f
new file mode 100644
index 000000000..451176426
--- /dev/null
+++ b/costa/native/external/lapack/csytri.f
@@ -0,0 +1,314 @@
+      SUBROUTINE CSYTRI( UPLO, N, A, LDA, IPIV, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994 
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      COMPLEX            A( LDA, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CSYTRI computes the inverse of a complex symmetric indefinite matrix
+*  A using the factorization A = U*D*U**T or A = L*D*L**T computed by
+*  CSYTRF.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the details of the factorization are stored
+*          as an upper or lower triangular matrix.
+*          = 'U':  Upper triangular, form is A = U*D*U**T;
+*          = 'L':  Lower triangular, form is A = L*D*L**T.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,N)
+*          On entry, the block diagonal matrix D and the multipliers
+*          used to obtain the factor U or L as computed by CSYTRF.
+*
+*          On exit, if INFO = 0, the (symmetric) inverse of the original
+*          matrix.  If UPLO = 'U', the upper triangular part of the
+*          inverse is formed and the part of A below the diagonal is not
+*          referenced; if UPLO = 'L' the lower triangular part of the
+*          inverse is formed and the part of A above the diagonal is
+*          not referenced.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  IPIV    (input) INTEGER array, dimension (N)
+*          Details of the interchanges and the block structure of D
+*          as determined by CSYTRF.
+*
+*  WORK    (workspace) COMPLEX array, dimension (2*N)
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*          > 0: if INFO = i, D(i,i) = 0; the matrix is singular and its
+*               inverse could not be computed.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX            ONE, ZERO
+      PARAMETER          ( ONE = ( 1.0E+0, 0.0E+0 ),
+     $                   ZERO = ( 0.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            K, KP, KSTEP
+      COMPLEX            AK, AKKP1, AKP1, D, T, TEMP
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      COMPLEX            CDOTU
+      EXTERNAL           LSAME, CDOTU
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CCOPY, CSWAP, CSYMV, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CSYTRI', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Check that the diagonal matrix D is nonsingular.
+*
+      IF( UPPER ) THEN
+*
+*        Upper triangular storage: examine D from bottom to top
+*
+         DO 10 INFO = N, 1, -1
+            IF( IPIV( INFO ).GT.0 .AND. A( INFO, INFO ).EQ.ZERO )
+     $         RETURN
+   10    CONTINUE
+      ELSE
+*
+*        Lower triangular storage: examine D from top to bottom.
+*
+         DO 20 INFO = 1, N
+            IF( IPIV( INFO ).GT.0 .AND. A( INFO, INFO ).EQ.ZERO )
+     $         RETURN
+   20    CONTINUE
+      END IF
+      INFO = 0
+*
+      IF( UPPER ) THEN
+*
+*        Compute inv(A) from the factorization A = U*D*U'.
+*
+*        K is the main loop index, increasing from 1 to N in steps of
+*        1 or 2, depending on the size of the diagonal blocks.
+*
+         K = 1
+   30    CONTINUE
+*
+*        If K > N, exit from loop.
+*
+         IF( K.GT.N )
+     $      GO TO 40
+*
+         IF( IPIV( K ).GT.0 ) THEN
+*
+*           1 x 1 diagonal block
+*
+*           Invert the diagonal block.
+*
+            A( K, K ) = ONE / A( K, K )
+*
+*           Compute column K of the inverse.
+*
+            IF( K.GT.1 ) THEN
+               CALL CCOPY( K-1, A( 1, K ), 1, WORK, 1 )
+               CALL CSYMV( UPLO, K-1, -ONE, A, LDA, WORK, 1, ZERO,
+     $                     A( 1, K ), 1 )
+               A( K, K ) = A( K, K ) - CDOTU( K-1, WORK, 1, A( 1, K ),
+     $                     1 )
+            END IF
+            KSTEP = 1
+         ELSE
+*
+*           2 x 2 diagonal block
+*
+*           Invert the diagonal block.
+*
+            T = A( K, K+1 )
+            AK = A( K, K ) / T
+            AKP1 = A( K+1, K+1 ) / T
+            AKKP1 = A( K, K+1 ) / T
+            D = T*( AK*AKP1-ONE )
+            A( K, K ) = AKP1 / D
+            A( K+1, K+1 ) = AK / D
+            A( K, K+1 ) = -AKKP1 / D
+*
+*           Compute columns K and K+1 of the inverse.
+*
+            IF( K.GT.1 ) THEN
+               CALL CCOPY( K-1, A( 1, K ), 1, WORK, 1 )
+               CALL CSYMV( UPLO, K-1, -ONE, A, LDA, WORK, 1, ZERO,
+     $                     A( 1, K ), 1 )
+               A( K, K ) = A( K, K ) - CDOTU( K-1, WORK, 1, A( 1, K ),
+     $                     1 )
+               A( K, K+1 ) = A( K, K+1 ) -
+     $                       CDOTU( K-1, A( 1, K ), 1, A( 1, K+1 ), 1 )
+               CALL CCOPY( K-1, A( 1, K+1 ), 1, WORK, 1 )
+               CALL CSYMV( UPLO, K-1, -ONE, A, LDA, WORK, 1, ZERO,
+     $                     A( 1, K+1 ), 1 )
+               A( K+1, K+1 ) = A( K+1, K+1 ) -
+     $                         CDOTU( K-1, WORK, 1, A( 1, K+1 ), 1 )
+            END IF
+            KSTEP = 2
+         END IF
+*
+         KP = ABS( IPIV( K ) )
+         IF( KP.NE.K ) THEN
+*
+*           Interchange rows and columns K and KP in the leading
+*           submatrix A(1:k+1,1:k+1)
+*
+            CALL CSWAP( KP-1, A( 1, K ), 1, A( 1, KP ), 1 )
+            CALL CSWAP( K-KP-1, A( KP+1, K ), 1, A( KP, KP+1 ), LDA )
+            TEMP = A( K, K )
+            A( K, K ) = A( KP, KP )
+            A( KP, KP ) = TEMP
+            IF( KSTEP.EQ.2 ) THEN
+               TEMP = A( K, K+1 )
+               A( K, K+1 ) = A( KP, K+1 )
+               A( KP, K+1 ) = TEMP
+            END IF
+         END IF
+*
+         K = K + KSTEP
+         GO TO 30
+   40    CONTINUE
+*
+      ELSE
+*
+*        Compute inv(A) from the factorization A = L*D*L'.
+*
+*        K is the main loop index, increasing from 1 to N in steps of
+*        1 or 2, depending on the size of the diagonal blocks.
+*
+         K = N
+   50    CONTINUE
+*
+*        If K < 1, exit from loop.
+*
+         IF( K.LT.1 )
+     $      GO TO 60
+*
+         IF( IPIV( K ).GT.0 ) THEN
+*
+*           1 x 1 diagonal block
+*
+*           Invert the diagonal block.
+*
+            A( K, K ) = ONE / A( K, K )
+*
+*           Compute column K of the inverse.
+*
+            IF( K.LT.N ) THEN
+               CALL CCOPY( N-K, A( K+1, K ), 1, WORK, 1 )
+               CALL CSYMV( UPLO, N-K, -ONE, A( K+1, K+1 ), LDA, WORK, 1,
+     $                     ZERO, A( K+1, K ), 1 )
+               A( K, K ) = A( K, K ) - CDOTU( N-K, WORK, 1, A( K+1, K ),
+     $                     1 )
+            END IF
+            KSTEP = 1
+         ELSE
+*
+*           2 x 2 diagonal block
+*
+*           Invert the diagonal block.
+*
+            T = A( K, K-1 )
+            AK = A( K-1, K-1 ) / T
+            AKP1 = A( K, K ) / T
+            AKKP1 = A( K, K-1 ) / T
+            D = T*( AK*AKP1-ONE )
+            A( K-1, K-1 ) = AKP1 / D
+            A( K, K ) = AK / D
+            A( K, K-1 ) = -AKKP1 / D
+*
+*           Compute columns K-1 and K of the inverse.
+*
+            IF( K.LT.N ) THEN
+               CALL CCOPY( N-K, A( K+1, K ), 1, WORK, 1 )
+               CALL CSYMV( UPLO, N-K, -ONE, A( K+1, K+1 ), LDA, WORK, 1,
+     $                     ZERO, A( K+1, K ), 1 )
+               A( K, K ) = A( K, K ) - CDOTU( N-K, WORK, 1, A( K+1, K ),
+     $                     1 )
+               A( K, K-1 ) = A( K, K-1 ) -
+     $                       CDOTU( N-K, A( K+1, K ), 1, A( K+1, K-1 ),
+     $                       1 )
+               CALL CCOPY( N-K, A( K+1, K-1 ), 1, WORK, 1 )
+               CALL CSYMV( UPLO, N-K, -ONE, A( K+1, K+1 ), LDA, WORK, 1,
+     $                     ZERO, A( K+1, K-1 ), 1 )
+               A( K-1, K-1 ) = A( K-1, K-1 ) -
+     $                         CDOTU( N-K, WORK, 1, A( K+1, K-1 ), 1 )
+            END IF
+            KSTEP = 2
+         END IF
+*
+         KP = ABS( IPIV( K ) )
+         IF( KP.NE.K ) THEN
+*
+*           Interchange rows and columns K and KP in the trailing
+*           submatrix A(k-1:n,k-1:n)
+*
+            IF( KP.LT.N )
+     $         CALL CSWAP( N-KP, A( KP+1, K ), 1, A( KP+1, KP ), 1 )
+            CALL CSWAP( KP-K-1, A( K+1, K ), 1, A( KP, K+1 ), LDA )
+            TEMP = A( K, K )
+            A( K, K ) = A( KP, KP )
+            A( KP, KP ) = TEMP
+            IF( KSTEP.EQ.2 ) THEN
+               TEMP = A( K, K-1 )
+               A( K, K-1 ) = A( KP, K-1 )
+               A( KP, K-1 ) = TEMP
+            END IF
+         END IF
+*
+         K = K - KSTEP
+         GO TO 50
+   60    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of CSYTRI
+*
+      END
diff --git a/costa/native/external/lapack/csytrs.f b/costa/native/external/lapack/csytrs.f
new file mode 100644
index 000000000..e392dcf65
--- /dev/null
+++ b/costa/native/external/lapack/csytrs.f
@@ -0,0 +1,370 @@
+      SUBROUTINE CSYTRS( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, LDB, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      COMPLEX            A( LDA, * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CSYTRS solves a system of linear equations A*X = B with a complex
+*  symmetric matrix A using the factorization A = U*D*U**T or
+*  A = L*D*L**T computed by CSYTRF.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the details of the factorization are stored
+*          as an upper or lower triangular matrix.
+*          = 'U':  Upper triangular, form is A = U*D*U**T;
+*          = 'L':  Lower triangular, form is A = L*D*L**T.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  A       (input) COMPLEX array, dimension (LDA,N)
+*          The block diagonal matrix D and the multipliers used to
+*          obtain the factor U or L as computed by CSYTRF.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  IPIV    (input) INTEGER array, dimension (N)
+*          Details of the interchanges and the block structure of D
+*          as determined by CSYTRF.
+*
+*  B       (input/output) COMPLEX array, dimension (LDB,NRHS)
+*          On entry, the right hand side matrix B.
+*          On exit, the solution matrix X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX            ONE
+      PARAMETER          ( ONE = ( 1.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            J, K, KP
+      COMPLEX            AK, AKM1, AKM1K, BK, BKM1, DENOM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CGEMV, CGERU, CSCAL, CSWAP, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -8
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CSYTRS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 )
+     $   RETURN
+*
+      IF( UPPER ) THEN
+*
+*        Solve A*X = B, where A = U*D*U'.
+*
+*        First solve U*D*X = B, overwriting B with X.
+*
+*        K is the main loop index, decreasing from N to 1 in steps of
+*        1 or 2, depending on the size of the diagonal blocks.
+*
+         K = N
+   10    CONTINUE
+*
+*        If K < 1, exit from loop.
+*
+         IF( K.LT.1 )
+     $      GO TO 30
+*
+         IF( IPIV( K ).GT.0 ) THEN
+*
+*           1 x 1 diagonal block
+*
+*           Interchange rows K and IPIV(K).
+*
+            KP = IPIV( K )
+            IF( KP.NE.K )
+     $         CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB )
+*
+*           Multiply by inv(U(K)), where U(K) is the transformation
+*           stored in column K of A.
+*
+            CALL CGERU( K-1, NRHS, -ONE, A( 1, K ), 1, B( K, 1 ), LDB,
+     $                  B( 1, 1 ), LDB )
+*
+*           Multiply by the inverse of the diagonal block.
+*
+            CALL CSCAL( NRHS, ONE / A( K, K ), B( K, 1 ), LDB )
+            K = K - 1
+         ELSE
+*
+*           2 x 2 diagonal block
+*
+*           Interchange rows K-1 and -IPIV(K).
+*
+            KP = -IPIV( K )
+            IF( KP.NE.K-1 )
+     $         CALL CSWAP( NRHS, B( K-1, 1 ), LDB, B( KP, 1 ), LDB )
+*
+*           Multiply by inv(U(K)), where U(K) is the transformation
+*           stored in columns K-1 and K of A.
+*
+            CALL CGERU( K-2, NRHS, -ONE, A( 1, K ), 1, B( K, 1 ), LDB,
+     $                  B( 1, 1 ), LDB )
+            CALL CGERU( K-2, NRHS, -ONE, A( 1, K-1 ), 1, B( K-1, 1 ),
+     $                  LDB, B( 1, 1 ), LDB )
+*
+*           Multiply by the inverse of the diagonal block.
+*
+            AKM1K = A( K-1, K )
+            AKM1 = A( K-1, K-1 ) / AKM1K
+            AK = A( K, K ) / AKM1K
+            DENOM = AKM1*AK - ONE
+            DO 20 J = 1, NRHS
+               BKM1 = B( K-1, J ) / AKM1K
+               BK = B( K, J ) / AKM1K
+               B( K-1, J ) = ( AK*BKM1-BK ) / DENOM
+               B( K, J ) = ( AKM1*BK-BKM1 ) / DENOM
+   20       CONTINUE
+            K = K - 2
+         END IF
+*
+         GO TO 10
+   30    CONTINUE
+*
+*        Next solve U'*X = B, overwriting B with X.
+*
+*        K is the main loop index, increasing from 1 to N in steps of
+*        1 or 2, depending on the size of the diagonal blocks.
+*
+         K = 1
+   40    CONTINUE
+*
+*        If K > N, exit from loop.
+*
+         IF( K.GT.N )
+     $      GO TO 50
+*
+         IF( IPIV( K ).GT.0 ) THEN
+*
+*           1 x 1 diagonal block
+*
+*           Multiply by inv(U'(K)), where U(K) is the transformation
+*           stored in column K of A.
+*
+            CALL CGEMV( 'Transpose', K-1, NRHS, -ONE, B, LDB, A( 1, K ),
+     $                  1, ONE, B( K, 1 ), LDB )
+*
+*           Interchange rows K and IPIV(K).
+*
+            KP = IPIV( K )
+            IF( KP.NE.K )
+     $         CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB )
+            K = K + 1
+         ELSE
+*
+*           2 x 2 diagonal block
+*
+*           Multiply by inv(U'(K+1)), where U(K+1) is the transformation
+*           stored in columns K and K+1 of A.
+*
+            CALL CGEMV( 'Transpose', K-1, NRHS, -ONE, B, LDB, A( 1, K ),
+     $                  1, ONE, B( K, 1 ), LDB )
+            CALL CGEMV( 'Transpose', K-1, NRHS, -ONE, B, LDB,
+     $                  A( 1, K+1 ), 1, ONE, B( K+1, 1 ), LDB )
+*
+*           Interchange rows K and -IPIV(K).
+*
+            KP = -IPIV( K )
+            IF( KP.NE.K )
+     $         CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB )
+            K = K + 2
+         END IF
+*
+         GO TO 40
+   50    CONTINUE
+*
+      ELSE
+*
+*        Solve A*X = B, where A = L*D*L'.
+*
+*        First solve L*D*X = B, overwriting B with X.
+*
+*        K is the main loop index, increasing from 1 to N in steps of
+*        1 or 2, depending on the size of the diagonal blocks.
+*
+         K = 1
+   60    CONTINUE
+*
+*        If K > N, exit from loop.
+*
+         IF( K.GT.N )
+     $      GO TO 80
+*
+         IF( IPIV( K ).GT.0 ) THEN
+*
+*           1 x 1 diagonal block
+*
+*           Interchange rows K and IPIV(K).
+*
+            KP = IPIV( K )
+            IF( KP.NE.K )
+     $         CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB )
+*
+*           Multiply by inv(L(K)), where L(K) is the transformation
+*           stored in column K of A.
+*
+            IF( K.LT.N )
+     $         CALL CGERU( N-K, NRHS, -ONE, A( K+1, K ), 1, B( K, 1 ),
+     $                     LDB, B( K+1, 1 ), LDB )
+*
+*           Multiply by the inverse of the diagonal block.
+*
+            CALL CSCAL( NRHS, ONE / A( K, K ), B( K, 1 ), LDB )
+            K = K + 1
+         ELSE
+*
+*           2 x 2 diagonal block
+*
+*           Interchange rows K+1 and -IPIV(K).
+*
+            KP = -IPIV( K )
+            IF( KP.NE.K+1 )
+     $         CALL CSWAP( NRHS, B( K+1, 1 ), LDB, B( KP, 1 ), LDB )
+*
+*           Multiply by inv(L(K)), where L(K) is the transformation
+*           stored in columns K and K+1 of A.
+*
+            IF( K.LT.N-1 ) THEN
+               CALL CGERU( N-K-1, NRHS, -ONE, A( K+2, K ), 1, B( K, 1 ),
+     $                     LDB, B( K+2, 1 ), LDB )
+               CALL CGERU( N-K-1, NRHS, -ONE, A( K+2, K+1 ), 1,
+     $                     B( K+1, 1 ), LDB, B( K+2, 1 ), LDB )
+            END IF
+*
+*           Multiply by the inverse of the diagonal block.
+*
+            AKM1K = A( K+1, K )
+            AKM1 = A( K, K ) / AKM1K
+            AK = A( K+1, K+1 ) / AKM1K
+            DENOM = AKM1*AK - ONE
+            DO 70 J = 1, NRHS
+               BKM1 = B( K, J ) / AKM1K
+               BK = B( K+1, J ) / AKM1K
+               B( K, J ) = ( AK*BKM1-BK ) / DENOM
+               B( K+1, J ) = ( AKM1*BK-BKM1 ) / DENOM
+   70       CONTINUE
+            K = K + 2
+         END IF
+*
+         GO TO 60
+   80    CONTINUE
+*
+*        Next solve L'*X = B, overwriting B with X.
+*
+*        K is the main loop index, decreasing from N to 1 in steps of
+*        1 or 2, depending on the size of the diagonal blocks.
+*
+         K = N
+   90    CONTINUE
+*
+*        If K < 1, exit from loop.
+*
+         IF( K.LT.1 )
+     $      GO TO 100
+*
+         IF( IPIV( K ).GT.0 ) THEN
+*
+*           1 x 1 diagonal block
+*
+*           Multiply by inv(L'(K)), where L(K) is the transformation
+*           stored in column K of A.
+*
+            IF( K.LT.N )
+     $         CALL CGEMV( 'Transpose', N-K, NRHS, -ONE, B( K+1, 1 ),
+     $                     LDB, A( K+1, K ), 1, ONE, B( K, 1 ), LDB )
+*
+*           Interchange rows K and IPIV(K).
+*
+            KP = IPIV( K )
+            IF( KP.NE.K )
+     $         CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB )
+            K = K - 1
+         ELSE
+*
+*           2 x 2 diagonal block
+*
+*           Multiply by inv(L'(K-1)), where L(K-1) is the transformation
+*           stored in columns K-1 and K of A.
+*
+            IF( K.LT.N ) THEN
+               CALL CGEMV( 'Transpose', N-K, NRHS, -ONE, B( K+1, 1 ),
+     $                     LDB, A( K+1, K ), 1, ONE, B( K, 1 ), LDB )
+               CALL CGEMV( 'Transpose', N-K, NRHS, -ONE, B( K+1, 1 ),
+     $                     LDB, A( K+1, K-1 ), 1, ONE, B( K-1, 1 ),
+     $                     LDB )
+            END IF
+*
+*           Interchange rows K and -IPIV(K).
+*
+            KP = -IPIV( K )
+            IF( KP.NE.K )
+     $         CALL CSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB )
+            K = K - 2
+         END IF
+*
+         GO TO 90
+  100    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of CSYTRS
+*
+      END
diff --git a/costa/native/external/lapack/ctbcon.f b/costa/native/external/lapack/ctbcon.f
new file mode 100644
index 000000000..84beed289
--- /dev/null
+++ b/costa/native/external/lapack/ctbcon.f
@@ -0,0 +1,205 @@
+      SUBROUTINE CTBCON( NORM, UPLO, DIAG, N, KD, AB, LDAB, RCOND, WORK,
+     $                   RWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993 
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIAG, NORM, UPLO
+      INTEGER            INFO, KD, LDAB, N
+      REAL               RCOND
+*     ..
+*     .. Array Arguments ..
+      REAL               RWORK( * )
+      COMPLEX            AB( LDAB, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CTBCON estimates the reciprocal of the condition number of a
+*  triangular band matrix A, in either the 1-norm or the infinity-norm.
+*
+*  The norm of A is computed and an estimate is obtained for
+*  norm(inv(A)), then the reciprocal of the condition number is
+*  computed as
+*     RCOND = 1 / ( norm(A) * norm(inv(A)) ).
+*
+*  Arguments
+*  =========
+*
+*  NORM    (input) CHARACTER*1
+*          Specifies whether the 1-norm condition number or the
+*          infinity-norm condition number is required:
+*          = '1' or 'O':  1-norm;
+*          = 'I':         Infinity-norm.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  A is upper triangular;
+*          = 'L':  A is lower triangular.
+*
+*  DIAG    (input) CHARACTER*1
+*          = 'N':  A is non-unit triangular;
+*          = 'U':  A is unit triangular.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  KD      (input) INTEGER
+*          The number of superdiagonals or subdiagonals of the
+*          triangular band matrix A.  KD >= 0.
+*
+*  AB      (input) COMPLEX array, dimension (LDAB,N)
+*          The upper or lower triangular band matrix A, stored in the
+*          first kd+1 rows of the array. The j-th column of A is stored
+*          in the j-th column of the array AB as follows:
+*          if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j;
+*          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+kd).
+*          If DIAG = 'U', the diagonal elements of A are not referenced
+*          and are assumed to be 1.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KD+1.
+*
+*  RCOND   (output) REAL
+*          The reciprocal of the condition number of the matrix A,
+*          computed as RCOND = 1/(norm(A) * norm(inv(A))).
+*
+*  WORK    (workspace) COMPLEX array, dimension (2*N)
+*
+*  RWORK   (workspace) REAL array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOUNIT, ONENRM, UPPER
+      CHARACTER          NORMIN
+      INTEGER            IX, KASE, KASE1
+      REAL               AINVNM, ANORM, SCALE, SMLNUM, XNORM
+      COMPLEX            ZDUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ICAMAX
+      REAL               CLANTB, SLAMCH
+      EXTERNAL           LSAME, ICAMAX, CLANTB, SLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CLACON, CLATBS, CSRSCL, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, AIMAG, MAX, REAL
+*     ..
+*     .. Statement Functions ..
+      REAL               CABS1
+*     ..
+*     .. Statement Function definitions ..
+      CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      ONENRM = NORM.EQ.'1' .OR. LSAME( NORM, 'O' )
+      NOUNIT = LSAME( DIAG, 'N' )
+*
+      IF( .NOT.ONENRM .AND. .NOT.LSAME( NORM, 'I' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( KD.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( LDAB.LT.KD+1 ) THEN
+         INFO = -7
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CTBCON', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 ) THEN
+         RCOND = ONE
+         RETURN
+      END IF
+*
+      RCOND = ZERO
+      SMLNUM = SLAMCH( 'Safe minimum' )*REAL( MAX( N, 1 ) )
+*
+*     Compute the 1-norm of the triangular matrix A or A'.
+*
+      ANORM = CLANTB( NORM, UPLO, DIAG, N, KD, AB, LDAB, RWORK )
+*
+*     Continue only if ANORM > 0.
+*
+      IF( ANORM.GT.ZERO ) THEN
+*
+*        Estimate the 1-norm of the inverse of A.
+*
+         AINVNM = ZERO
+         NORMIN = 'N'
+         IF( ONENRM ) THEN
+            KASE1 = 1
+         ELSE
+            KASE1 = 2
+         END IF
+         KASE = 0
+   10    CONTINUE
+         CALL CLACON( N, WORK( N+1 ), WORK, AINVNM, KASE )
+         IF( KASE.NE.0 ) THEN
+            IF( KASE.EQ.KASE1 ) THEN
+*
+*              Multiply by inv(A).
+*
+               CALL CLATBS( UPLO, 'No transpose', DIAG, NORMIN, N, KD,
+     $                      AB, LDAB, WORK, SCALE, RWORK, INFO )
+            ELSE
+*
+*              Multiply by inv(A').
+*
+               CALL CLATBS( UPLO, 'Conjugate transpose', DIAG, NORMIN,
+     $                      N, KD, AB, LDAB, WORK, SCALE, RWORK, INFO )
+            END IF
+            NORMIN = 'Y'
+*
+*           Multiply by 1/SCALE if doing so will not cause overflow.
+*
+            IF( SCALE.NE.ONE ) THEN
+               IX = ICAMAX( N, WORK, 1 )
+               XNORM = CABS1( WORK( IX ) )
+               IF( SCALE.LT.XNORM*SMLNUM .OR. SCALE.EQ.ZERO )
+     $            GO TO 20
+               CALL CSRSCL( N, SCALE, WORK, 1 )
+            END IF
+            GO TO 10
+         END IF
+*
+*        Compute the estimate of the reciprocal condition number.
+*
+         IF( AINVNM.NE.ZERO )
+     $      RCOND = ( ONE / ANORM ) / AINVNM
+      END IF
+*
+   20 CONTINUE
+      RETURN
+*
+*     End of CTBCON
+*
+      END
diff --git a/costa/native/external/lapack/ctbrfs.f b/costa/native/external/lapack/ctbrfs.f
new file mode 100644
index 000000000..2b6736a33
--- /dev/null
+++ b/costa/native/external/lapack/ctbrfs.f
@@ -0,0 +1,393 @@
+      SUBROUTINE CTBRFS( UPLO, TRANS, DIAG, N, KD, NRHS, AB, LDAB, B,
+     $                   LDB, X, LDX, FERR, BERR, WORK, RWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIAG, TRANS, UPLO
+      INTEGER            INFO, KD, LDAB, LDB, LDX, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      REAL               BERR( * ), FERR( * ), RWORK( * )
+      COMPLEX            AB( LDAB, * ), B( LDB, * ), WORK( * ),
+     $                   X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CTBRFS provides error bounds and backward error estimates for the
+*  solution to a system of linear equations with a triangular band
+*  coefficient matrix.
+*
+*  The solution matrix X must be computed by CTBTRS or some other
+*  means before entering this routine.  CTBRFS does not do iterative
+*  refinement because doing so cannot improve the backward error.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  A is upper triangular;
+*          = 'L':  A is lower triangular.
+*
+*  TRANS   (input) CHARACTER*1
+*          Specifies the form of the system of equations:
+*          = 'N':  A * X = B     (No transpose)
+*          = 'T':  A**T * X = B  (Transpose)
+*          = 'C':  A**H * X = B  (Conjugate transpose)
+*
+*  DIAG    (input) CHARACTER*1
+*          = 'N':  A is non-unit triangular;
+*          = 'U':  A is unit triangular.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  KD      (input) INTEGER
+*          The number of superdiagonals or subdiagonals of the
+*          triangular band matrix A.  KD >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrices B and X.  NRHS >= 0.
+*
+*  AB      (input) COMPLEX array, dimension (LDAB,N)
+*          The upper or lower triangular band matrix A, stored in the
+*          first kd+1 rows of the array. The j-th column of A is stored
+*          in the j-th column of the array AB as follows:
+*          if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j;
+*          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+kd).
+*          If DIAG = 'U', the diagonal elements of A are not referenced
+*          and are assumed to be 1.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KD+1.
+*
+*  B       (input) COMPLEX array, dimension (LDB,NRHS)
+*          The right hand side matrix B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  X       (input) COMPLEX array, dimension (LDX,NRHS)
+*          The solution matrix X.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(1,N).
+*
+*  FERR    (output) REAL array, dimension (NRHS)
+*          The estimated forward error bound for each solution vector
+*          X(j) (the j-th column of the solution matrix X).
+*          If XTRUE is the true solution corresponding to X(j), FERR(j)
+*          is an estimated upper bound for the magnitude of the largest
+*          element in (X(j) - XTRUE) divided by the magnitude of the
+*          largest element in X(j).  The estimate is as reliable as
+*          the estimate for RCOND, and is almost always a slight
+*          overestimate of the true error.
+*
+*  BERR    (output) REAL array, dimension (NRHS)
+*          The componentwise relative backward error of each solution
+*          vector X(j) (i.e., the smallest relative change in
+*          any element of A or B that makes X(j) an exact solution).
+*
+*  WORK    (workspace) COMPLEX array, dimension (2*N)
+*
+*  RWORK   (workspace) REAL array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO
+      PARAMETER          ( ZERO = 0.0E+0 )
+      COMPLEX            ONE
+      PARAMETER          ( ONE = ( 1.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOTRAN, NOUNIT, UPPER
+      CHARACTER          TRANSN, TRANST
+      INTEGER            I, J, K, KASE, NZ
+      REAL               EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK
+      COMPLEX            ZDUM
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CAXPY, CCOPY, CLACON, CTBMV, CTBSV, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, AIMAG, MAX, MIN, REAL
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               SLAMCH
+      EXTERNAL           LSAME, SLAMCH
+*     ..
+*     .. Statement Functions ..
+      REAL               CABS1
+*     ..
+*     .. Statement Function definitions ..
+      CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      NOTRAN = LSAME( TRANS, 'N' )
+      NOUNIT = LSAME( DIAG, 'N' )
+*
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT.
+     $         LSAME( TRANS, 'C' ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( KD.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -6
+      ELSE IF( LDAB.LT.KD+1 ) THEN
+         INFO = -8
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -10
+      ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+         INFO = -12
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CTBRFS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN
+         DO 10 J = 1, NRHS
+            FERR( J ) = ZERO
+            BERR( J ) = ZERO
+   10    CONTINUE
+         RETURN
+      END IF
+*
+      IF( NOTRAN ) THEN
+         TRANSN = 'N'
+         TRANST = 'C'
+      ELSE
+         TRANSN = 'C'
+         TRANST = 'N'
+      END IF
+*
+*     NZ = maximum number of nonzero elements in each row of A, plus 1
+*
+      NZ = KD + 2
+      EPS = SLAMCH( 'Epsilon' )
+      SAFMIN = SLAMCH( 'Safe minimum' )
+      SAFE1 = NZ*SAFMIN
+      SAFE2 = SAFE1 / EPS
+*
+*     Do for each right hand side
+*
+      DO 250 J = 1, NRHS
+*
+*        Compute residual R = B - op(A) * X,
+*        where op(A) = A, A**T, or A**H, depending on TRANS.
+*
+         CALL CCOPY( N, X( 1, J ), 1, WORK, 1 )
+         CALL CTBMV( UPLO, TRANS, DIAG, N, KD, AB, LDAB, WORK, 1 )
+         CALL CAXPY( N, -ONE, B( 1, J ), 1, WORK, 1 )
+*
+*        Compute componentwise relative backward error from formula
+*
+*        max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) )
+*
+*        where abs(Z) is the componentwise absolute value of the matrix
+*        or vector Z.  If the i-th component of the denominator is less
+*        than SAFE2, then SAFE1 is added to the i-th components of the
+*        numerator and denominator before dividing.
+*
+         DO 20 I = 1, N
+            RWORK( I ) = CABS1( B( I, J ) )
+   20    CONTINUE
+*
+         IF( NOTRAN ) THEN
+*
+*           Compute abs(A)*abs(X) + abs(B).
+*
+            IF( UPPER ) THEN
+               IF( NOUNIT ) THEN
+                  DO 40 K = 1, N
+                     XK = CABS1( X( K, J ) )
+                     DO 30 I = MAX( 1, K-KD ), K
+                        RWORK( I ) = RWORK( I ) +
+     $                               CABS1( AB( KD+1+I-K, K ) )*XK
+   30                CONTINUE
+   40             CONTINUE
+               ELSE
+                  DO 60 K = 1, N
+                     XK = CABS1( X( K, J ) )
+                     DO 50 I = MAX( 1, K-KD ), K - 1
+                        RWORK( I ) = RWORK( I ) +
+     $                               CABS1( AB( KD+1+I-K, K ) )*XK
+   50                CONTINUE
+                     RWORK( K ) = RWORK( K ) + XK
+   60             CONTINUE
+               END IF
+            ELSE
+               IF( NOUNIT ) THEN
+                  DO 80 K = 1, N
+                     XK = CABS1( X( K, J ) )
+                     DO 70 I = K, MIN( N, K+KD )
+                        RWORK( I ) = RWORK( I ) +
+     $                               CABS1( AB( 1+I-K, K ) )*XK
+   70                CONTINUE
+   80             CONTINUE
+               ELSE
+                  DO 100 K = 1, N
+                     XK = CABS1( X( K, J ) )
+                     DO 90 I = K + 1, MIN( N, K+KD )
+                        RWORK( I ) = RWORK( I ) +
+     $                               CABS1( AB( 1+I-K, K ) )*XK
+   90                CONTINUE
+                     RWORK( K ) = RWORK( K ) + XK
+  100             CONTINUE
+               END IF
+            END IF
+         ELSE
+*
+*           Compute abs(A**H)*abs(X) + abs(B).
+*
+            IF( UPPER ) THEN
+               IF( NOUNIT ) THEN
+                  DO 120 K = 1, N
+                     S = ZERO
+                     DO 110 I = MAX( 1, K-KD ), K
+                        S = S + CABS1( AB( KD+1+I-K, K ) )*
+     $                      CABS1( X( I, J ) )
+  110                CONTINUE
+                     RWORK( K ) = RWORK( K ) + S
+  120             CONTINUE
+               ELSE
+                  DO 140 K = 1, N
+                     S = CABS1( X( K, J ) )
+                     DO 130 I = MAX( 1, K-KD ), K - 1
+                        S = S + CABS1( AB( KD+1+I-K, K ) )*
+     $                      CABS1( X( I, J ) )
+  130                CONTINUE
+                     RWORK( K ) = RWORK( K ) + S
+  140             CONTINUE
+               END IF
+            ELSE
+               IF( NOUNIT ) THEN
+                  DO 160 K = 1, N
+                     S = ZERO
+                     DO 150 I = K, MIN( N, K+KD )
+                        S = S + CABS1( AB( 1+I-K, K ) )*
+     $                      CABS1( X( I, J ) )
+  150                CONTINUE
+                     RWORK( K ) = RWORK( K ) + S
+  160             CONTINUE
+               ELSE
+                  DO 180 K = 1, N
+                     S = CABS1( X( K, J ) )
+                     DO 170 I = K + 1, MIN( N, K+KD )
+                        S = S + CABS1( AB( 1+I-K, K ) )*
+     $                      CABS1( X( I, J ) )
+  170                CONTINUE
+                     RWORK( K ) = RWORK( K ) + S
+  180             CONTINUE
+               END IF
+            END IF
+         END IF
+         S = ZERO
+         DO 190 I = 1, N
+            IF( RWORK( I ).GT.SAFE2 ) THEN
+               S = MAX( S, CABS1( WORK( I ) ) / RWORK( I ) )
+            ELSE
+               S = MAX( S, ( CABS1( WORK( I ) )+SAFE1 ) /
+     $             ( RWORK( I )+SAFE1 ) )
+            END IF
+  190    CONTINUE
+         BERR( J ) = S
+*
+*        Bound error from formula
+*
+*        norm(X - XTRUE) / norm(X) .le. FERR =
+*        norm( abs(inv(op(A)))*
+*           ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X)
+*
+*        where
+*          norm(Z) is the magnitude of the largest component of Z
+*          inv(op(A)) is the inverse of op(A)
+*          abs(Z) is the componentwise absolute value of the matrix or
+*             vector Z
+*          NZ is the maximum number of nonzeros in any row of A, plus 1
+*          EPS is machine epsilon
+*
+*        The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B))
+*        is incremented by SAFE1 if the i-th component of
+*        abs(op(A))*abs(X) + abs(B) is less than SAFE2.
+*
+*        Use CLACON to estimate the infinity-norm of the matrix
+*           inv(op(A)) * diag(W),
+*        where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) )))
+*
+         DO 200 I = 1, N
+            IF( RWORK( I ).GT.SAFE2 ) THEN
+               RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I )
+            ELSE
+               RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) +
+     $                      SAFE1
+            END IF
+  200    CONTINUE
+*
+         KASE = 0
+  210    CONTINUE
+         CALL CLACON( N, WORK( N+1 ), WORK, FERR( J ), KASE )
+         IF( KASE.NE.0 ) THEN
+            IF( KASE.EQ.1 ) THEN
+*
+*              Multiply by diag(W)*inv(op(A)**H).
+*
+               CALL CTBSV( UPLO, TRANST, DIAG, N, KD, AB, LDAB, WORK,
+     $                     1 )
+               DO 220 I = 1, N
+                  WORK( I ) = RWORK( I )*WORK( I )
+  220          CONTINUE
+            ELSE
+*
+*              Multiply by inv(op(A))*diag(W).
+*
+               DO 230 I = 1, N
+                  WORK( I ) = RWORK( I )*WORK( I )
+  230          CONTINUE
+               CALL CTBSV( UPLO, TRANSN, DIAG, N, KD, AB, LDAB, WORK,
+     $                     1 )
+            END IF
+            GO TO 210
+         END IF
+*
+*        Normalize error.
+*
+         LSTRES = ZERO
+         DO 240 I = 1, N
+            LSTRES = MAX( LSTRES, CABS1( X( I, J ) ) )
+  240    CONTINUE
+         IF( LSTRES.NE.ZERO )
+     $      FERR( J ) = FERR( J ) / LSTRES
+*
+  250 CONTINUE
+*
+      RETURN
+*
+*     End of CTBRFS
+*
+      END
diff --git a/costa/native/external/lapack/ctbtrs.f b/costa/native/external/lapack/ctbtrs.f
new file mode 100644
index 000000000..67865b341
--- /dev/null
+++ b/costa/native/external/lapack/ctbtrs.f
@@ -0,0 +1,163 @@
+      SUBROUTINE CTBTRS( UPLO, TRANS, DIAG, N, KD, NRHS, AB, LDAB, B,
+     $                   LDB, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994 
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIAG, TRANS, UPLO
+      INTEGER            INFO, KD, LDAB, LDB, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            AB( LDAB, * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CTBTRS solves a triangular system of the form
+*
+*     A * X = B,  A**T * X = B,  or  A**H * X = B,
+*
+*  where A is a triangular band matrix of order N, and B is an
+*  N-by-NRHS matrix.  A check is made to verify that A is nonsingular.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  A is upper triangular;
+*          = 'L':  A is lower triangular.
+*
+*  TRANS   (input) CHARACTER*1
+*          Specifies the form of the system of equations:
+*          = 'N':  A * X = B     (No transpose)
+*          = 'T':  A**T * X = B  (Transpose)
+*          = 'C':  A**H * X = B  (Conjugate transpose)
+*
+*  DIAG    (input) CHARACTER*1
+*          = 'N':  A is non-unit triangular;
+*          = 'U':  A is unit triangular.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  KD      (input) INTEGER
+*          The number of superdiagonals or subdiagonals of the
+*          triangular band matrix A.  KD >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  AB      (input) COMPLEX array, dimension (LDAB,N)
+*          The upper or lower triangular band matrix A, stored in the
+*          first kd+1 rows of AB.  The j-th column of A is stored
+*          in the j-th column of the array AB as follows:
+*          if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j;
+*          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+kd).
+*          If DIAG = 'U', the diagonal elements of A are not referenced
+*          and are assumed to be 1.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KD+1.
+*
+*  B       (input/output) COMPLEX array, dimension (LDB,NRHS)
+*          On entry, the right hand side matrix B.
+*          On exit, if INFO = 0, the solution matrix X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, the i-th diagonal element of A is zero,
+*                indicating that the matrix is singular and the
+*                solutions X have not been computed.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX            ZERO
+      PARAMETER          ( ZERO = ( 0.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOUNIT, UPPER
+      INTEGER            J
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CTBSV, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      NOUNIT = LSAME( DIAG, 'N' )
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.LSAME( TRANS, 'N' ) .AND. .NOT.
+     $         LSAME( TRANS, 'T' ) .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( KD.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -6
+      ELSE IF( LDAB.LT.KD+1 ) THEN
+         INFO = -8
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -10
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CTBTRS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Check for singularity.
+*
+      IF( NOUNIT ) THEN
+         IF( UPPER ) THEN
+            DO 10 INFO = 1, N
+               IF( AB( KD+1, INFO ).EQ.ZERO )
+     $            RETURN
+   10       CONTINUE
+         ELSE
+            DO 20 INFO = 1, N
+               IF( AB( 1, INFO ).EQ.ZERO )
+     $            RETURN
+   20       CONTINUE
+         END IF
+      END IF
+      INFO = 0
+*
+*     Solve A * X = B,  A**T * X = B,  or  A**H * X = B.
+*
+      DO 30 J = 1, NRHS
+         CALL CTBSV( UPLO, TRANS, DIAG, N, KD, AB, LDAB, B( 1, J ), 1 )
+   30 CONTINUE
+*
+      RETURN
+*
+*     End of CTBTRS
+*
+      END
diff --git a/costa/native/external/lapack/ctgevc.f b/costa/native/external/lapack/ctgevc.f
new file mode 100644
index 000000000..faec3da87
--- /dev/null
+++ b/costa/native/external/lapack/ctgevc.f
@@ -0,0 +1,632 @@
+      SUBROUTINE CTGEVC( SIDE, HOWMNY, SELECT, N, A, LDA, B, LDB, VL,
+     $                   LDVL, VR, LDVR, MM, M, WORK, RWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          HOWMNY, SIDE
+      INTEGER            INFO, LDA, LDB, LDVL, LDVR, M, MM, N
+*     ..
+*     .. Array Arguments ..
+      LOGICAL            SELECT( * )
+      REAL               RWORK( * )
+      COMPLEX            A( LDA, * ), B( LDB, * ), VL( LDVL, * ),
+     $                   VR( LDVR, * ), WORK( * )
+*     ..
+*
+*
+*  Purpose
+*  =======
+*
+*  CTGEVC computes some or all of the right and/or left generalized
+*  eigenvectors of a pair of complex upper triangular matrices (A,B).
+*
+*  The right generalized eigenvector x and the left generalized
+*  eigenvector y of (A,B) corresponding to a generalized eigenvalue
+*  w are defined by:
+*
+*          (A - wB) * x = 0  and  y**H * (A - wB) = 0
+*
+*  where y**H denotes the conjugate tranpose of y.
+*
+*  If an eigenvalue w is determined by zero diagonal elements of both A
+*  and B, a unit vector is returned as the corresponding eigenvector.
+*
+*  If all eigenvectors are requested, the routine may either return
+*  the matrices X and/or Y of right or left eigenvectors of (A,B), or
+*  the products Z*X and/or Q*Y, where Z and Q are input unitary
+*  matrices.  If (A,B) was obtained from the generalized Schur
+*  factorization of an original pair of matrices
+*     (A0,B0) = (Q*A*Z**H,Q*B*Z**H),
+*  then Z*X and Q*Y are the matrices of right or left eigenvectors of
+*  A.
+*
+*  Arguments
+*  =========
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'R': compute right eigenvectors only;
+*          = 'L': compute left eigenvectors only;
+*          = 'B': compute both right and left eigenvectors.
+*
+*  HOWMNY  (input) CHARACTER*1
+*          = 'A': compute all right and/or left eigenvectors;
+*          = 'B': compute all right and/or left eigenvectors, and
+*                 backtransform them using the input matrices supplied
+*                 in VR and/or VL;
+*          = 'S': compute selected right and/or left eigenvectors,
+*                 specified by the logical array SELECT.
+*
+*  SELECT  (input) LOGICAL array, dimension (N)
+*          If HOWMNY='S', SELECT specifies the eigenvectors to be
+*          computed.
+*          If HOWMNY='A' or 'B', SELECT is not referenced.
+*          To select the eigenvector corresponding to the j-th
+*          eigenvalue, SELECT(j) must be set to .TRUE..
+*
+*  N       (input) INTEGER
+*          The order of the matrices A and B.  N >= 0.
+*
+*  A       (input) COMPLEX array, dimension (LDA,N)
+*          The upper triangular matrix A.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of array A.  LDA >= max(1,N).
+*
+*  B       (input) COMPLEX array, dimension (LDB,N)
+*          The upper triangular matrix B.  B must have real diagonal
+*          elements.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of array B.  LDB >= max(1,N).
+*
+*  VL      (input/output) COMPLEX array, dimension (LDVL,MM)
+*          On entry, if SIDE = 'L' or 'B' and HOWMNY = 'B', VL must
+*          contain an N-by-N matrix Q (usually the unitary matrix Q
+*          of left Schur vectors returned by CHGEQZ).
+*          On exit, if SIDE = 'L' or 'B', VL contains:
+*          if HOWMNY = 'A', the matrix Y of left eigenvectors of (A,B);
+*          if HOWMNY = 'B', the matrix Q*Y;
+*          if HOWMNY = 'S', the left eigenvectors of (A,B) specified by
+*                      SELECT, stored consecutively in the columns of
+*                      VL, in the same order as their eigenvalues.
+*          If SIDE = 'R', VL is not referenced.
+*
+*  LDVL    (input) INTEGER
+*          The leading dimension of array VL.
+*          LDVL >= max(1,N) if SIDE = 'L' or 'B'; LDVL >= 1 otherwise.
+*
+*  VR      (input/output) COMPLEX array, dimension (LDVR,MM)
+*          On entry, if SIDE = 'R' or 'B' and HOWMNY = 'B', VR must
+*          contain an N-by-N matrix Q (usually the unitary matrix Z
+*          of right Schur vectors returned by CHGEQZ).
+*          On exit, if SIDE = 'R' or 'B', VR contains:
+*          if HOWMNY = 'A', the matrix X of right eigenvectors of (A,B);
+*          if HOWMNY = 'B', the matrix Z*X;
+*          if HOWMNY = 'S', the right eigenvectors of (A,B) specified by
+*                      SELECT, stored consecutively in the columns of
+*                      VR, in the same order as their eigenvalues.
+*          If SIDE = 'L', VR is not referenced.
+*
+*  LDVR    (input) INTEGER
+*          The leading dimension of the array VR.
+*          LDVR >= max(1,N) if SIDE = 'R' or 'B'; LDVR >= 1 otherwise.
+*
+*  MM      (input) INTEGER
+*          The number of columns in the arrays VL and/or VR. MM >= M.
+*
+*  M       (output) INTEGER
+*          The number of columns in the arrays VL and/or VR actually
+*          used to store the eigenvectors.  If HOWMNY = 'A' or 'B', M
+*          is set to N.  Each selected eigenvector occupies one column.
+*
+*  WORK    (workspace) COMPLEX array, dimension (2*N)
+*
+*  RWORK   (workspace) REAL array, dimension (2*N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+      COMPLEX            CZERO, CONE
+      PARAMETER          ( CZERO = ( 0.0E+0, 0.0E+0 ),
+     $                   CONE = ( 1.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            COMPL, COMPR, ILALL, ILBACK, ILBBAD, ILCOMP,
+     $                   LSA, LSB
+      INTEGER            I, IBEG, IEIG, IEND, IHWMNY, IM, ISIDE, ISRC,
+     $                   J, JE, JR
+      REAL               ACOEFA, ACOEFF, ANORM, ASCALE, BCOEFA, BIG,
+     $                   BIGNUM, BNORM, BSCALE, DMIN, SAFMIN, SBETA,
+     $                   SCALE, SMALL, TEMP, ULP, XMAX
+      COMPLEX            BCOEFF, CA, CB, D, SALPHA, SUM, SUMA, SUMB, X
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               SLAMCH
+      COMPLEX            CLADIV
+      EXTERNAL           LSAME, SLAMCH, CLADIV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CGEMV, SLABAD, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, AIMAG, CMPLX, CONJG, MAX, MIN, REAL
+*     ..
+*     .. Statement Functions ..
+      REAL               ABS1
+*     ..
+*     .. Statement Function definitions ..
+      ABS1( X ) = ABS( REAL( X ) ) + ABS( AIMAG( X ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode and Test the input parameters
+*
+      IF( LSAME( HOWMNY, 'A' ) ) THEN
+         IHWMNY = 1
+         ILALL = .TRUE.
+         ILBACK = .FALSE.
+      ELSE IF( LSAME( HOWMNY, 'S' ) ) THEN
+         IHWMNY = 2
+         ILALL = .FALSE.
+         ILBACK = .FALSE.
+      ELSE IF( LSAME( HOWMNY, 'B' ) .OR. LSAME( HOWMNY, 'T' ) ) THEN
+         IHWMNY = 3
+         ILALL = .TRUE.
+         ILBACK = .TRUE.
+      ELSE
+         IHWMNY = -1
+      END IF
+*
+      IF( LSAME( SIDE, 'R' ) ) THEN
+         ISIDE = 1
+         COMPL = .FALSE.
+         COMPR = .TRUE.
+      ELSE IF( LSAME( SIDE, 'L' ) ) THEN
+         ISIDE = 2
+         COMPL = .TRUE.
+         COMPR = .FALSE.
+      ELSE IF( LSAME( SIDE, 'B' ) ) THEN
+         ISIDE = 3
+         COMPL = .TRUE.
+         COMPR = .TRUE.
+      ELSE
+         ISIDE = -1
+      END IF
+*
+      INFO = 0
+      IF( ISIDE.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( IHWMNY.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -6
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -8
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CTGEVC', -INFO )
+         RETURN
+      END IF
+*
+*     Count the number of eigenvectors
+*
+      IF( .NOT.ILALL ) THEN
+         IM = 0
+         DO 10 J = 1, N
+            IF( SELECT( J ) )
+     $         IM = IM + 1
+   10    CONTINUE
+      ELSE
+         IM = N
+      END IF
+*
+*     Check diagonal of B
+*
+      ILBBAD = .FALSE.
+      DO 20 J = 1, N
+         IF( AIMAG( B( J, J ) ).NE.ZERO )
+     $      ILBBAD = .TRUE.
+   20 CONTINUE
+*
+      IF( ILBBAD ) THEN
+         INFO = -7
+      ELSE IF( COMPL .AND. LDVL.LT.N .OR. LDVL.LT.1 ) THEN
+         INFO = -10
+      ELSE IF( COMPR .AND. LDVR.LT.N .OR. LDVR.LT.1 ) THEN
+         INFO = -12
+      ELSE IF( MM.LT.IM ) THEN
+         INFO = -13
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CTGEVC', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      M = IM
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Machine Constants
+*
+      SAFMIN = SLAMCH( 'Safe minimum' )
+      BIG = ONE / SAFMIN
+      CALL SLABAD( SAFMIN, BIG )
+      ULP = SLAMCH( 'Epsilon' )*SLAMCH( 'Base' )
+      SMALL = SAFMIN*N / ULP
+      BIG = ONE / SMALL
+      BIGNUM = ONE / ( SAFMIN*N )
+*
+*     Compute the 1-norm of each column of the strictly upper triangular
+*     part of A and B to check for possible overflow in the triangular
+*     solver.
+*
+      ANORM = ABS1( A( 1, 1 ) )
+      BNORM = ABS1( B( 1, 1 ) )
+      RWORK( 1 ) = ZERO
+      RWORK( N+1 ) = ZERO
+      DO 40 J = 2, N
+         RWORK( J ) = ZERO
+         RWORK( N+J ) = ZERO
+         DO 30 I = 1, J - 1
+            RWORK( J ) = RWORK( J ) + ABS1( A( I, J ) )
+            RWORK( N+J ) = RWORK( N+J ) + ABS1( B( I, J ) )
+   30    CONTINUE
+         ANORM = MAX( ANORM, RWORK( J )+ABS1( A( J, J ) ) )
+         BNORM = MAX( BNORM, RWORK( N+J )+ABS1( B( J, J ) ) )
+   40 CONTINUE
+*
+      ASCALE = ONE / MAX( ANORM, SAFMIN )
+      BSCALE = ONE / MAX( BNORM, SAFMIN )
+*
+*     Left eigenvectors
+*
+      IF( COMPL ) THEN
+         IEIG = 0
+*
+*        Main loop over eigenvalues
+*
+         DO 140 JE = 1, N
+            IF( ILALL ) THEN
+               ILCOMP = .TRUE.
+            ELSE
+               ILCOMP = SELECT( JE )
+            END IF
+            IF( ILCOMP ) THEN
+               IEIG = IEIG + 1
+*
+               IF( ABS1( A( JE, JE ) ).LE.SAFMIN .AND.
+     $             ABS( REAL( B( JE, JE ) ) ).LE.SAFMIN ) THEN
+*
+*                 Singular matrix pencil -- return unit eigenvector
+*
+                  DO 50 JR = 1, N
+                     VL( JR, IEIG ) = CZERO
+   50             CONTINUE
+                  VL( IEIG, IEIG ) = CONE
+                  GO TO 140
+               END IF
+*
+*              Non-singular eigenvalue:
+*              Compute coefficients  a  and  b  in
+*                   H
+*                 y  ( a A - b B ) = 0
+*
+               TEMP = ONE / MAX( ABS1( A( JE, JE ) )*ASCALE,
+     $                ABS( REAL( B( JE, JE ) ) )*BSCALE, SAFMIN )
+               SALPHA = ( TEMP*A( JE, JE ) )*ASCALE
+               SBETA = ( TEMP*REAL( B( JE, JE ) ) )*BSCALE
+               ACOEFF = SBETA*ASCALE
+               BCOEFF = SALPHA*BSCALE
+*
+*              Scale to avoid underflow
+*
+               LSA = ABS( SBETA ).GE.SAFMIN .AND. ABS( ACOEFF ).LT.SMALL
+               LSB = ABS1( SALPHA ).GE.SAFMIN .AND. ABS1( BCOEFF ).LT.
+     $               SMALL
+*
+               SCALE = ONE
+               IF( LSA )
+     $            SCALE = ( SMALL / ABS( SBETA ) )*MIN( ANORM, BIG )
+               IF( LSB )
+     $            SCALE = MAX( SCALE, ( SMALL / ABS1( SALPHA ) )*
+     $                    MIN( BNORM, BIG ) )
+               IF( LSA .OR. LSB ) THEN
+                  SCALE = MIN( SCALE, ONE /
+     $                    ( SAFMIN*MAX( ONE, ABS( ACOEFF ),
+     $                    ABS1( BCOEFF ) ) ) )
+                  IF( LSA ) THEN
+                     ACOEFF = ASCALE*( SCALE*SBETA )
+                  ELSE
+                     ACOEFF = SCALE*ACOEFF
+                  END IF
+                  IF( LSB ) THEN
+                     BCOEFF = BSCALE*( SCALE*SALPHA )
+                  ELSE
+                     BCOEFF = SCALE*BCOEFF
+                  END IF
+               END IF
+*
+               ACOEFA = ABS( ACOEFF )
+               BCOEFA = ABS1( BCOEFF )
+               XMAX = ONE
+               DO 60 JR = 1, N
+                  WORK( JR ) = CZERO
+   60          CONTINUE
+               WORK( JE ) = CONE
+               DMIN = MAX( ULP*ACOEFA*ANORM, ULP*BCOEFA*BNORM, SAFMIN )
+*
+*                                              H
+*              Triangular solve of  (a A - b B)  y = 0
+*
+*                                      H
+*              (rowwise in  (a A - b B) , or columnwise in a A - b B)
+*
+               DO 100 J = JE + 1, N
+*
+*                 Compute
+*                       j-1
+*                 SUM = sum  conjg( a*A(k,j) - b*B(k,j) )*x(k)
+*                       k=je
+*                 (Scale if necessary)
+*
+                  TEMP = ONE / XMAX
+                  IF( ACOEFA*RWORK( J )+BCOEFA*RWORK( N+J ).GT.BIGNUM*
+     $                TEMP ) THEN
+                     DO 70 JR = JE, J - 1
+                        WORK( JR ) = TEMP*WORK( JR )
+   70                CONTINUE
+                     XMAX = ONE
+                  END IF
+                  SUMA = CZERO
+                  SUMB = CZERO
+*
+                  DO 80 JR = JE, J - 1
+                     SUMA = SUMA + CONJG( A( JR, J ) )*WORK( JR )
+                     SUMB = SUMB + CONJG( B( JR, J ) )*WORK( JR )
+   80             CONTINUE
+                  SUM = ACOEFF*SUMA - CONJG( BCOEFF )*SUMB
+*
+*                 Form x(j) = - SUM / conjg( a*A(j,j) - b*B(j,j) )
+*
+*                 with scaling and perturbation of the denominator
+*
+                  D = CONJG( ACOEFF*A( J, J )-BCOEFF*B( J, J ) )
+                  IF( ABS1( D ).LE.DMIN )
+     $               D = CMPLX( DMIN )
+*
+                  IF( ABS1( D ).LT.ONE ) THEN
+                     IF( ABS1( SUM ).GE.BIGNUM*ABS1( D ) ) THEN
+                        TEMP = ONE / ABS1( SUM )
+                        DO 90 JR = JE, J - 1
+                           WORK( JR ) = TEMP*WORK( JR )
+   90                   CONTINUE
+                        XMAX = TEMP*XMAX
+                        SUM = TEMP*SUM
+                     END IF
+                  END IF
+                  WORK( J ) = CLADIV( -SUM, D )
+                  XMAX = MAX( XMAX, ABS1( WORK( J ) ) )
+  100          CONTINUE
+*
+*              Back transform eigenvector if HOWMNY='B'.
+*
+               IF( ILBACK ) THEN
+                  CALL CGEMV( 'N', N, N+1-JE, CONE, VL( 1, JE ), LDVL,
+     $                        WORK( JE ), 1, CZERO, WORK( N+1 ), 1 )
+                  ISRC = 2
+                  IBEG = 1
+               ELSE
+                  ISRC = 1
+                  IBEG = JE
+               END IF
+*
+*              Copy and scale eigenvector into column of VL
+*
+               XMAX = ZERO
+               DO 110 JR = IBEG, N
+                  XMAX = MAX( XMAX, ABS1( WORK( ( ISRC-1 )*N+JR ) ) )
+  110          CONTINUE
+*
+               IF( XMAX.GT.SAFMIN ) THEN
+                  TEMP = ONE / XMAX
+                  DO 120 JR = IBEG, N
+                     VL( JR, IEIG ) = TEMP*WORK( ( ISRC-1 )*N+JR )
+  120             CONTINUE
+               ELSE
+                  IBEG = N + 1
+               END IF
+*
+               DO 130 JR = 1, IBEG - 1
+                  VL( JR, IEIG ) = CZERO
+  130          CONTINUE
+*
+            END IF
+  140    CONTINUE
+      END IF
+*
+*     Right eigenvectors
+*
+      IF( COMPR ) THEN
+         IEIG = IM + 1
+*
+*        Main loop over eigenvalues
+*
+         DO 250 JE = N, 1, -1
+            IF( ILALL ) THEN
+               ILCOMP = .TRUE.
+            ELSE
+               ILCOMP = SELECT( JE )
+            END IF
+            IF( ILCOMP ) THEN
+               IEIG = IEIG - 1
+*
+               IF( ABS1( A( JE, JE ) ).LE.SAFMIN .AND.
+     $             ABS( REAL( B( JE, JE ) ) ).LE.SAFMIN ) THEN
+*
+*                 Singular matrix pencil -- return unit eigenvector
+*
+                  DO 150 JR = 1, N
+                     VR( JR, IEIG ) = CZERO
+  150             CONTINUE
+                  VR( IEIG, IEIG ) = CONE
+                  GO TO 250
+               END IF
+*
+*              Non-singular eigenvalue:
+*              Compute coefficients  a  and  b  in
+*
+*              ( a A - b B ) x  = 0
+*
+               TEMP = ONE / MAX( ABS1( A( JE, JE ) )*ASCALE,
+     $                ABS( REAL( B( JE, JE ) ) )*BSCALE, SAFMIN )
+               SALPHA = ( TEMP*A( JE, JE ) )*ASCALE
+               SBETA = ( TEMP*REAL( B( JE, JE ) ) )*BSCALE
+               ACOEFF = SBETA*ASCALE
+               BCOEFF = SALPHA*BSCALE
+*
+*              Scale to avoid underflow
+*
+               LSA = ABS( SBETA ).GE.SAFMIN .AND. ABS( ACOEFF ).LT.SMALL
+               LSB = ABS1( SALPHA ).GE.SAFMIN .AND. ABS1( BCOEFF ).LT.
+     $               SMALL
+*
+               SCALE = ONE
+               IF( LSA )
+     $            SCALE = ( SMALL / ABS( SBETA ) )*MIN( ANORM, BIG )
+               IF( LSB )
+     $            SCALE = MAX( SCALE, ( SMALL / ABS1( SALPHA ) )*
+     $                    MIN( BNORM, BIG ) )
+               IF( LSA .OR. LSB ) THEN
+                  SCALE = MIN( SCALE, ONE /
+     $                    ( SAFMIN*MAX( ONE, ABS( ACOEFF ),
+     $                    ABS1( BCOEFF ) ) ) )
+                  IF( LSA ) THEN
+                     ACOEFF = ASCALE*( SCALE*SBETA )
+                  ELSE
+                     ACOEFF = SCALE*ACOEFF
+                  END IF
+                  IF( LSB ) THEN
+                     BCOEFF = BSCALE*( SCALE*SALPHA )
+                  ELSE
+                     BCOEFF = SCALE*BCOEFF
+                  END IF
+               END IF
+*
+               ACOEFA = ABS( ACOEFF )
+               BCOEFA = ABS1( BCOEFF )
+               XMAX = ONE
+               DO 160 JR = 1, N
+                  WORK( JR ) = CZERO
+  160          CONTINUE
+               WORK( JE ) = CONE
+               DMIN = MAX( ULP*ACOEFA*ANORM, ULP*BCOEFA*BNORM, SAFMIN )
+*
+*              Triangular solve of  (a A - b B) x = 0  (columnwise)
+*
+*              WORK(1:j-1) contains sums w,
+*              WORK(j+1:JE) contains x
+*
+               DO 170 JR = 1, JE - 1
+                  WORK( JR ) = ACOEFF*A( JR, JE ) - BCOEFF*B( JR, JE )
+  170          CONTINUE
+               WORK( JE ) = CONE
+*
+               DO 210 J = JE - 1, 1, -1
+*
+*                 Form x(j) := - w(j) / d
+*                 with scaling and perturbation of the denominator
+*
+                  D = ACOEFF*A( J, J ) - BCOEFF*B( J, J )
+                  IF( ABS1( D ).LE.DMIN )
+     $               D = CMPLX( DMIN )
+*
+                  IF( ABS1( D ).LT.ONE ) THEN
+                     IF( ABS1( WORK( J ) ).GE.BIGNUM*ABS1( D ) ) THEN
+                        TEMP = ONE / ABS1( WORK( J ) )
+                        DO 180 JR = 1, JE
+                           WORK( JR ) = TEMP*WORK( JR )
+  180                   CONTINUE
+                     END IF
+                  END IF
+*
+                  WORK( J ) = CLADIV( -WORK( J ), D )
+*
+                  IF( J.GT.1 ) THEN
+*
+*                    w = w + x(j)*(a A(*,j) - b B(*,j) ) with scaling
+*
+                     IF( ABS1( WORK( J ) ).GT.ONE ) THEN
+                        TEMP = ONE / ABS1( WORK( J ) )
+                        IF( ACOEFA*RWORK( J )+BCOEFA*RWORK( N+J ).GE.
+     $                      BIGNUM*TEMP ) THEN
+                           DO 190 JR = 1, JE
+                              WORK( JR ) = TEMP*WORK( JR )
+  190                      CONTINUE
+                        END IF
+                     END IF
+*
+                     CA = ACOEFF*WORK( J )
+                     CB = BCOEFF*WORK( J )
+                     DO 200 JR = 1, J - 1
+                        WORK( JR ) = WORK( JR ) + CA*A( JR, J ) -
+     $                               CB*B( JR, J )
+  200                CONTINUE
+                  END IF
+  210          CONTINUE
+*
+*              Back transform eigenvector if HOWMNY='B'.
+*
+               IF( ILBACK ) THEN
+                  CALL CGEMV( 'N', N, JE, CONE, VR, LDVR, WORK, 1,
+     $                        CZERO, WORK( N+1 ), 1 )
+                  ISRC = 2
+                  IEND = N
+               ELSE
+                  ISRC = 1
+                  IEND = JE
+               END IF
+*
+*              Copy and scale eigenvector into column of VR
+*
+               XMAX = ZERO
+               DO 220 JR = 1, IEND
+                  XMAX = MAX( XMAX, ABS1( WORK( ( ISRC-1 )*N+JR ) ) )
+  220          CONTINUE
+*
+               IF( XMAX.GT.SAFMIN ) THEN
+                  TEMP = ONE / XMAX
+                  DO 230 JR = 1, IEND
+                     VR( JR, IEIG ) = TEMP*WORK( ( ISRC-1 )*N+JR )
+  230             CONTINUE
+               ELSE
+                  IEND = 0
+               END IF
+*
+               DO 240 JR = IEND + 1, N
+                  VR( JR, IEIG ) = CZERO
+  240          CONTINUE
+*
+            END IF
+  250    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of CTGEVC
+*
+      END
diff --git a/costa/native/external/lapack/ctgex2.f b/costa/native/external/lapack/ctgex2.f
new file mode 100644
index 000000000..ef3913a7d
--- /dev/null
+++ b/costa/native/external/lapack/ctgex2.f
@@ -0,0 +1,264 @@
+      SUBROUTINE CTGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z,
+     $                   LDZ, J1, INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      LOGICAL            WANTQ, WANTZ
+      INTEGER            INFO, J1, LDA, LDB, LDQ, LDZ, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), B( LDB, * ), Q( LDQ, * ),
+     $                   Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CTGEX2 swaps adjacent diagonal 1 by 1 blocks (A11,B11) and (A22,B22)
+*  in an upper triangular matrix pair (A, B) by an unitary equivalence
+*  transformation.
+*
+*  (A, B) must be in generalized Schur canonical form, that is, A and
+*  B are both upper triangular.
+*
+*  Optionally, the matrices Q and Z of generalized Schur vectors are
+*  updated.
+*
+*         Q(in) * A(in) * Z(in)' = Q(out) * A(out) * Z(out)'
+*         Q(in) * B(in) * Z(in)' = Q(out) * B(out) * Z(out)'
+*
+*
+*  Arguments
+*  =========
+*
+*  WANTQ   (input) LOGICAL
+*          .TRUE. : update the left transformation matrix Q;
+*          .FALSE.: do not update Q.
+*
+*  WANTZ   (input) LOGICAL
+*          .TRUE. : update the right transformation matrix Z;
+*          .FALSE.: do not update Z.
+*
+*  N       (input) INTEGER
+*          The order of the matrices A and B. N >= 0.
+*
+*  A       (input/output) COMPLEX arrays, dimensions (LDA,N)
+*          On entry, the matrix A in the pair (A, B).
+*          On exit, the updated matrix A.
+*
+*  LDA     (input)  INTEGER
+*          The leading dimension of the array A. LDA >= max(1,N).
+*
+*  B       (input/output) COMPLEX arrays, dimensions (LDB,N)
+*          On entry, the matrix B in the pair (A, B).
+*          On exit, the updated matrix B.
+*
+*  LDB     (input)  INTEGER
+*          The leading dimension of the array B. LDB >= max(1,N).
+*
+*  Q       (input/output) COMPLEX array, dimension (LDZ,N)
+*          If WANTQ = .TRUE, on entry, the unitary matrix Q. On exit,
+*          the updated matrix Q.
+*          Not referenced if WANTQ = .FALSE..
+*
+*  LDQ     (input) INTEGER
+*          The leading dimension of the array Q. LDQ >= 1;
+*          If WANTQ = .TRUE., LDQ >= N.
+*
+*  Z       (input/output) COMPLEX array, dimension (LDZ,N)
+*          If WANTZ = .TRUE, on entry, the unitary matrix Z. On exit,
+*          the updated matrix Z.
+*          Not referenced if WANTZ = .FALSE..
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z. LDZ >= 1;
+*          If WANTZ = .TRUE., LDZ >= N.
+*
+*  J1      (input) INTEGER
+*          The index to the first block (A11, B11).
+*
+*  INFO    (output) INTEGER
+*           =0:  Successful exit.
+*           =1:  The transformed matrix pair (A, B) would be too far
+*                from generalized Schur form; the problem is ill-
+*                conditioned. (A, B) may have been partially reordered,
+*                and ILST points to the first row of the current
+*                position of the block being moved.
+*
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Bo Kagstrom and Peter Poromaa, Department of Computing Science,
+*     Umea University, S-901 87 Umea, Sweden.
+*
+*  In the current code both weak and strong stability tests are
+*  performed. The user can omit the strong stability test by changing
+*  the internal logical parameter WANDS to .FALSE.. See ref. [2] for
+*  details.
+*
+*  [1] B. Kagstrom; A Direct Method for Reordering Eigenvalues in the
+*      Generalized Real Schur Form of a Regular Matrix Pair (A, B), in
+*      M.S. Moonen et al (eds), Linear Algebra for Large Scale and
+*      Real-Time Applications, Kluwer Academic Publ. 1993, pp 195-218.
+*
+*  [2] B. Kagstrom and P. Poromaa; Computing Eigenspaces with Specified
+*      Eigenvalues of a Regular Matrix Pair (A, B) and Condition
+*      Estimation: Theory, Algorithms and Software, Report UMINF-94.04,
+*      Department of Computing Science, Umea University, S-901 87 Umea,
+*      Sweden, 1994. Also as LAPACK Working Note 87. To appear in
+*      Numerical Algorithms, 1996.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX            CZERO, CONE
+      PARAMETER          ( CZERO = ( 0.0E+0, 0.0E+0 ),
+     $                   CONE = ( 1.0E+0, 0.0E+0 ) )
+      REAL               TEN
+      PARAMETER          ( TEN = 10.0E+0 )
+      INTEGER            LDST
+      PARAMETER          ( LDST = 2 )
+      LOGICAL            WANDS
+      PARAMETER          ( WANDS = .TRUE. )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            STRONG, WEAK
+      INTEGER            I, M
+      REAL               CQ, CZ, EPS, SA, SB, SCALE, SMLNUM, SS, SUM,
+     $                   THRESH, WS
+      COMPLEX            CDUM, F, G, SQ, SZ
+*     ..
+*     .. Local Arrays ..
+      COMPLEX            S( LDST, LDST ), T( LDST, LDST ), WORK( 8 )
+*     ..
+*     .. External Functions ..
+      REAL               SLAMCH
+      EXTERNAL           SLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CLACPY, CLARTG, CLASSQ, CROT
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, CONJG, MAX, REAL, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+*
+*     Quick return if possible
+*
+      IF( N.LE.1 )
+     $   RETURN
+*
+      M = LDST
+      WEAK = .FALSE.
+      STRONG = .FALSE.
+*
+*     Make a local copy of selected block in (A, B)
+*
+      CALL CLACPY( 'Full', M, M, A( J1, J1 ), LDA, S, LDST )
+      CALL CLACPY( 'Full', M, M, B( J1, J1 ), LDB, T, LDST )
+*
+*     Compute the threshold for testing the acceptance of swapping.
+*
+      EPS = SLAMCH( 'P' )
+      SMLNUM = SLAMCH( 'S' ) / EPS
+      SCALE = REAL( CZERO )
+      SUM = REAL( CONE )
+      CALL CLACPY( 'Full', M, M, S, LDST, WORK, M )
+      CALL CLACPY( 'Full', M, M, T, LDST, WORK( M*M+1 ), M )
+      CALL CLASSQ( 2*M*M, WORK, 1, SCALE, SUM )
+      SA = SCALE*SQRT( SUM )
+      THRESH = MAX( TEN*EPS*SA, SMLNUM )
+*
+*     Compute unitary QL and RQ that swap 1-by-1 and 1-by-1 blocks
+*     using Givens rotations and perform the swap tentatively.
+*
+      F = S( 2, 2 )*T( 1, 1 ) - T( 2, 2 )*S( 1, 1 )
+      G = S( 2, 2 )*T( 1, 2 ) - T( 2, 2 )*S( 1, 2 )
+      SA = ABS( S( 2, 2 ) )
+      SB = ABS( T( 2, 2 ) )
+      CALL CLARTG( G, F, CZ, SZ, CDUM )
+      SZ = -SZ
+      CALL CROT( 2, S( 1, 1 ), 1, S( 1, 2 ), 1, CZ, CONJG( SZ ) )
+      CALL CROT( 2, T( 1, 1 ), 1, T( 1, 2 ), 1, CZ, CONJG( SZ ) )
+      IF( SA.GE.SB ) THEN
+         CALL CLARTG( S( 1, 1 ), S( 2, 1 ), CQ, SQ, CDUM )
+      ELSE
+         CALL CLARTG( T( 1, 1 ), T( 2, 1 ), CQ, SQ, CDUM )
+      END IF
+      CALL CROT( 2, S( 1, 1 ), LDST, S( 2, 1 ), LDST, CQ, SQ )
+      CALL CROT( 2, T( 1, 1 ), LDST, T( 2, 1 ), LDST, CQ, SQ )
+*
+*     Weak stability test: |S21| + |T21| <= O(EPS F-norm((S, T)))
+*
+      WS = ABS( S( 2, 1 ) ) + ABS( T( 2, 1 ) )
+      WEAK = WS.LE.THRESH
+      IF( .NOT.WEAK )
+     $   GO TO 20
+*
+      IF( WANDS ) THEN
+*
+*        Strong stability test:
+*           F-norm((A-QL'*S*QR, B-QL'*T*QR)) <= O(EPS*F-norm((A, B)))
+*
+         CALL CLACPY( 'Full', M, M, S, LDST, WORK, M )
+         CALL CLACPY( 'Full', M, M, T, LDST, WORK( M*M+1 ), M )
+         CALL CROT( 2, WORK, 1, WORK( 3 ), 1, CZ, -CONJG( SZ ) )
+         CALL CROT( 2, WORK( 5 ), 1, WORK( 7 ), 1, CZ, -CONJG( SZ ) )
+         CALL CROT( 2, WORK, 2, WORK( 2 ), 2, CQ, -SQ )
+         CALL CROT( 2, WORK( 5 ), 2, WORK( 6 ), 2, CQ, -SQ )
+         DO 10 I = 1, 2
+            WORK( I ) = WORK( I ) - A( J1+I-1, J1 )
+            WORK( I+2 ) = WORK( I+2 ) - A( J1+I-1, J1+1 )
+            WORK( I+4 ) = WORK( I+4 ) - B( J1+I-1, J1 )
+            WORK( I+6 ) = WORK( I+6 ) - B( J1+I-1, J1+1 )
+   10    CONTINUE
+         SCALE = REAL( CZERO )
+         SUM = REAL( CONE )
+         CALL CLASSQ( 2*M*M, WORK, 1, SCALE, SUM )
+         SS = SCALE*SQRT( SUM )
+         STRONG = SS.LE.THRESH
+         IF( .NOT.STRONG )
+     $      GO TO 20
+      END IF
+*
+*     If the swap is accepted ("weakly" and "strongly"), apply the
+*     equivalence transformations to the original matrix pair (A,B)
+*
+      CALL CROT( J1+1, A( 1, J1 ), 1, A( 1, J1+1 ), 1, CZ, CONJG( SZ ) )
+      CALL CROT( J1+1, B( 1, J1 ), 1, B( 1, J1+1 ), 1, CZ, CONJG( SZ ) )
+      CALL CROT( N-J1+1, A( J1, J1 ), LDA, A( J1+1, J1 ), LDA, CQ, SQ )
+      CALL CROT( N-J1+1, B( J1, J1 ), LDB, B( J1+1, J1 ), LDB, CQ, SQ )
+*
+*     Set  N1 by N2 (2,1) blocks to 0
+*
+      A( J1+1, J1 ) = CZERO
+      B( J1+1, J1 ) = CZERO
+*
+*     Accumulate transformations into Q and Z if requested.
+*
+      IF( WANTZ )
+     $   CALL CROT( N, Z( 1, J1 ), 1, Z( 1, J1+1 ), 1, CZ, CONJG( SZ ) )
+      IF( WANTQ )
+     $   CALL CROT( N, Q( 1, J1 ), 1, Q( 1, J1+1 ), 1, CQ, CONJG( SQ ) )
+*
+*     Exit with INFO = 0 if swap was successfully performed.
+*
+      RETURN
+*
+*     Exit with INFO = 1 if swap was rejected.
+*
+   20 CONTINUE
+      INFO = 1
+      RETURN
+*
+*     End of CTGEX2
+*
+      END
diff --git a/costa/native/external/lapack/ctgexc.f b/costa/native/external/lapack/ctgexc.f
new file mode 100644
index 000000000..cbc02ba52
--- /dev/null
+++ b/costa/native/external/lapack/ctgexc.f
@@ -0,0 +1,207 @@
+      SUBROUTINE CTGEXC( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z,
+     $                   LDZ, IFST, ILST, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      LOGICAL            WANTQ, WANTZ
+      INTEGER            IFST, ILST, INFO, LDA, LDB, LDQ, LDZ, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), B( LDB, * ), Q( LDQ, * ),
+     $                   Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CTGEXC reorders the generalized Schur decomposition of a complex
+*  matrix pair (A,B), using an unitary equivalence transformation
+*  (A, B) := Q * (A, B) * Z', so that the diagonal block of (A, B) with
+*  row index IFST is moved to row ILST.
+*
+*  (A, B) must be in generalized Schur canonical form, that is, A and
+*  B are both upper triangular.
+*
+*  Optionally, the matrices Q and Z of generalized Schur vectors are
+*  updated.
+*
+*         Q(in) * A(in) * Z(in)' = Q(out) * A(out) * Z(out)'
+*         Q(in) * B(in) * Z(in)' = Q(out) * B(out) * Z(out)'
+*
+*  Arguments
+*  =========
+*
+*  WANTQ   (input) LOGICAL
+*          .TRUE. : update the left transformation matrix Q;
+*          .FALSE.: do not update Q.
+*
+*  WANTZ   (input) LOGICAL
+*          .TRUE. : update the right transformation matrix Z;
+*          .FALSE.: do not update Z.
+*
+*  N       (input) INTEGER
+*          The order of the matrices A and B. N >= 0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,N)
+*          On entry, the upper triangular matrix A in the pair (A, B).
+*          On exit, the updated matrix A.
+*
+*  LDA     (input)  INTEGER
+*          The leading dimension of the array A. LDA >= max(1,N).
+*
+*  B       (input/output) COMPLEX array, dimension (LDB,N)
+*          On entry, the upper triangular matrix B in the pair (A, B).
+*          On exit, the updated matrix B.
+*
+*  LDB     (input)  INTEGER
+*          The leading dimension of the array B. LDB >= max(1,N).
+*
+*  Q       (input/output) COMPLEX array, dimension (LDZ,N)
+*          On entry, if WANTQ = .TRUE., the unitary matrix Q.
+*          On exit, the updated matrix Q.
+*          If WANTQ = .FALSE., Q is not referenced.
+*
+*  LDQ     (input) INTEGER
+*          The leading dimension of the array Q. LDQ >= 1;
+*          If WANTQ = .TRUE., LDQ >= N.
+*
+*  Z       (input/output) COMPLEX array, dimension (LDZ,N)
+*          On entry, if WANTZ = .TRUE., the unitary matrix Z.
+*          On exit, the updated matrix Z.
+*          If WANTZ = .FALSE., Z is not referenced.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z. LDZ >= 1;
+*          If WANTZ = .TRUE., LDZ >= N.
+*
+*  IFST    (input/output) INTEGER
+*  ILST    (input/output) INTEGER
+*          Specify the reordering of the diagonal blocks of (A, B).
+*          The block with row index IFST is moved to row ILST, by a
+*          sequence of swapping between adjacent blocks.
+*
+*  INFO    (output) INTEGER
+*           =0:  Successful exit.
+*           <0:  if INFO = -i, the i-th argument had an illegal value.
+*           =1:  The transformed matrix pair (A, B) would be too far
+*                from generalized Schur form; the problem is ill-
+*                conditioned. (A, B) may have been partially reordered,
+*                and ILST points to the first row of the current
+*                position of the block being moved.
+*
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Bo Kagstrom and Peter Poromaa, Department of Computing Science,
+*     Umea University, S-901 87 Umea, Sweden.
+*
+*  [1] B. Kagstrom; A Direct Method for Reordering Eigenvalues in the
+*      Generalized Real Schur Form of a Regular Matrix Pair (A, B), in
+*      M.S. Moonen et al (eds), Linear Algebra for Large Scale and
+*      Real-Time Applications, Kluwer Academic Publ. 1993, pp 195-218.
+*
+*  [2] B. Kagstrom and P. Poromaa; Computing Eigenspaces with Specified
+*      Eigenvalues of a Regular Matrix Pair (A, B) and Condition
+*      Estimation: Theory, Algorithms and Software, Report
+*      UMINF - 94.04, Department of Computing Science, Umea University,
+*      S-901 87 Umea, Sweden, 1994. Also as LAPACK Working Note 87.
+*      To appear in Numerical Algorithms, 1996.
+*
+*  [3] B. Kagstrom and P. Poromaa, LAPACK-Style Algorithms and Software
+*      for Solving the Generalized Sylvester Equation and Estimating the
+*      Separation between Regular Matrix Pairs, Report UMINF - 93.23,
+*      Department of Computing Science, Umea University, S-901 87 Umea,
+*      Sweden, December 1993, Revised April 1994, Also as LAPACK working
+*      Note 75. To appear in ACM Trans. on Math. Software, Vol 22, No 1,
+*      1996.
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      INTEGER            HERE
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CTGEX2, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode and test input arguments.
+      INFO = 0
+      IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      ELSE IF( LDQ.LT.1 .OR. WANTQ .AND. ( LDQ.LT.MAX( 1, N ) ) ) THEN
+         INFO = -9
+      ELSE IF( LDZ.LT.1 .OR. WANTZ .AND. ( LDZ.LT.MAX( 1, N ) ) ) THEN
+         INFO = -11
+      ELSE IF( IFST.LT.1 .OR. IFST.GT.N ) THEN
+         INFO = -12
+      ELSE IF( ILST.LT.1 .OR. ILST.GT.N ) THEN
+         INFO = -13
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CTGEXC', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.LE.1 )
+     $   RETURN
+      IF( IFST.EQ.ILST )
+     $   RETURN
+*
+      IF( IFST.LT.ILST ) THEN
+*
+         HERE = IFST
+*
+   10    CONTINUE
+*
+*        Swap with next one below
+*
+         CALL CTGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z, LDZ,
+     $                HERE, INFO )
+         IF( INFO.NE.0 ) THEN
+            ILST = HERE
+            RETURN
+         END IF
+         HERE = HERE + 1
+         IF( HERE.LT.ILST )
+     $      GO TO 10
+         HERE = HERE - 1
+      ELSE
+         HERE = IFST - 1
+*
+   20    CONTINUE
+*
+*        Swap with next one above
+*
+         CALL CTGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z, LDZ,
+     $                HERE, INFO )
+         IF( INFO.NE.0 ) THEN
+            ILST = HERE
+            RETURN
+         END IF
+         HERE = HERE - 1
+         IF( HERE.GE.ILST )
+     $      GO TO 20
+         HERE = HERE + 1
+      END IF
+      ILST = HERE
+      RETURN
+*
+*     End of CTGEXC
+*
+      END
diff --git a/costa/native/external/lapack/ctgsen.f b/costa/native/external/lapack/ctgsen.f
new file mode 100644
index 000000000..7fe2cff0e
--- /dev/null
+++ b/costa/native/external/lapack/ctgsen.f
@@ -0,0 +1,642 @@
+      SUBROUTINE CTGSEN( IJOB, WANTQ, WANTZ, SELECT, N, A, LDA, B, LDB,
+     $                   ALPHA, BETA, Q, LDQ, Z, LDZ, M, PL, PR, DIF,
+     $                   WORK, LWORK, IWORK, LIWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      LOGICAL            WANTQ, WANTZ
+      INTEGER            IJOB, INFO, LDA, LDB, LDQ, LDZ, LIWORK, LWORK,
+     $                   M, N
+      REAL               PL, PR
+*     ..
+*     .. Array Arguments ..
+      LOGICAL            SELECT( * )
+      INTEGER            IWORK( * )
+      REAL               DIF( * )
+      COMPLEX            A( LDA, * ), ALPHA( * ), B( LDB, * ),
+     $                   BETA( * ), Q( LDQ, * ), WORK( * ), Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CTGSEN reorders the generalized Schur decomposition of a complex
+*  matrix pair (A, B) (in terms of an unitary equivalence trans-
+*  formation Q' * (A, B) * Z), so that a selected cluster of eigenvalues
+*  appears in the leading diagonal blocks of the pair (A,B). The leading
+*  columns of Q and Z form unitary bases of the corresponding left and
+*  right eigenspaces (deflating subspaces). (A, B) must be in
+*  generalized Schur canonical form, that is, A and B are both upper
+*  triangular.
+*
+*  CTGSEN also computes the generalized eigenvalues
+*
+*           w(j)= ALPHA(j) / BETA(j)
+*
+*  of the reordered matrix pair (A, B).
+*
+*  Optionally, the routine computes estimates of reciprocal condition
+*  numbers for eigenvalues and eigenspaces. These are Difu[(A11,B11),
+*  (A22,B22)] and Difl[(A11,B11), (A22,B22)], i.e. the separation(s)
+*  between the matrix pairs (A11, B11) and (A22,B22) that correspond to
+*  the selected cluster and the eigenvalues outside the cluster, resp.,
+*  and norms of "projections" onto left and right eigenspaces w.r.t.
+*  the selected cluster in the (1,1)-block.
+*
+*
+*  Arguments
+*  =========
+*
+*  IJOB    (input) integer
+*          Specifies whether condition numbers are required for the
+*          cluster of eigenvalues (PL and PR) or the deflating subspaces
+*          (Difu and Difl):
+*           =0: Only reorder w.r.t. SELECT. No extras.
+*           =1: Reciprocal of norms of "projections" onto left and right
+*               eigenspaces w.r.t. the selected cluster (PL and PR).
+*           =2: Upper bounds on Difu and Difl. F-norm-based estimate
+*               (DIF(1:2)).
+*           =3: Estimate of Difu and Difl. 1-norm-based estimate
+*               (DIF(1:2)).
+*               About 5 times as expensive as IJOB = 2.
+*           =4: Compute PL, PR and DIF (i.e. 0, 1 and 2 above): Economic
+*               version to get it all.
+*           =5: Compute PL, PR and DIF (i.e. 0, 1 and 3 above)
+*
+*  WANTQ   (input) LOGICAL
+*          .TRUE. : update the left transformation matrix Q;
+*          .FALSE.: do not update Q.
+*
+*  WANTZ   (input) LOGICAL
+*          .TRUE. : update the right transformation matrix Z;
+*          .FALSE.: do not update Z.
+*
+*  SELECT  (input) LOGICAL array, dimension (N)
+*          SELECT specifies the eigenvalues in the selected cluster. To
+*          select an eigenvalue w(j), SELECT(j) must be set to
+*          .TRUE..
+*
+*  N       (input) INTEGER
+*          The order of the matrices A and B. N >= 0.
+*
+*  A       (input/output) COMPLEX array, dimension(LDA,N)
+*          On entry, the upper triangular matrix A, in generalized
+*          Schur canonical form.
+*          On exit, A is overwritten by the reordered matrix A.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,N).
+*
+*  B       (input/output) COMPLEX array, dimension(LDB,N)
+*          On entry, the upper triangular matrix B, in generalized
+*          Schur canonical form.
+*          On exit, B is overwritten by the reordered matrix B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B. LDB >= max(1,N).
+*
+*  ALPHA   (output) COMPLEX array, dimension (N)
+*  BETA    (output) COMPLEX array, dimension (N)
+*          The diagonal elements of A and B, respectively,
+*          when the pair (A,B) has been reduced to generalized Schur
+*          form.  ALPHA(i)/BETA(i) i=1,...,N are the generalized
+*          eigenvalues.
+*
+*  Q       (input/output) COMPLEX array, dimension (LDQ,N)
+*          On entry, if WANTQ = .TRUE., Q is an N-by-N matrix.
+*          On exit, Q has been postmultiplied by the left unitary
+*          transformation matrix which reorder (A, B); The leading M
+*          columns of Q form orthonormal bases for the specified pair of
+*          left eigenspaces (deflating subspaces).
+*          If WANTQ = .FALSE., Q is not referenced.
+*
+*  LDQ     (input) INTEGER
+*          The leading dimension of the array Q. LDQ >= 1.
+*          If WANTQ = .TRUE., LDQ >= N.
+*
+*  Z       (input/output) COMPLEX array, dimension (LDZ,N)
+*          On entry, if WANTZ = .TRUE., Z is an N-by-N matrix.
+*          On exit, Z has been postmultiplied by the left unitary
+*          transformation matrix which reorder (A, B); The leading M
+*          columns of Z form orthonormal bases for the specified pair of
+*          left eigenspaces (deflating subspaces).
+*          If WANTZ = .FALSE., Z is not referenced.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z. LDZ >= 1.
+*          If WANTZ = .TRUE., LDZ >= N.
+*
+*  M       (output) INTEGER
+*          The dimension of the specified pair of left and right
+*          eigenspaces, (deflating subspaces) 0 <= M <= N.
+*
+*  PL, PR  (output) REAL
+*          If IJOB = 1, 4 or 5, PL, PR are lower bounds on the
+*          reciprocal  of the norm of "projections" onto left and right
+*          eigenspace with respect to the selected cluster.
+*          0 < PL, PR <= 1.
+*          If M = 0 or M = N, PL = PR  = 1.
+*          If IJOB = 0, 2 or 3 PL, PR are not referenced.
+*
+*  DIF     (output) REAL array, dimension (2).
+*          If IJOB >= 2, DIF(1:2) store the estimates of Difu and Difl.
+*          If IJOB = 2 or 4, DIF(1:2) are F-norm-based upper bounds on
+*          Difu and Difl. If IJOB = 3 or 5, DIF(1:2) are 1-norm-based
+*          estimates of Difu and Difl, computed using reversed
+*          communication with CLACON.
+*          If M = 0 or N, DIF(1:2) = F-norm([A, B]).
+*          If IJOB = 0 or 1, DIF is not referenced.
+*
+*  WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+*          IF IJOB = 0, WORK is not referenced.  Otherwise,
+*          on exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK. LWORK >=  1
+*          If IJOB = 1, 2 or 4, LWORK >=  2*M*(N-M)
+*          If IJOB = 3 or 5, LWORK >=  4*M*(N-M)
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  IWORK   (workspace/output) INTEGER, dimension (LIWORK)
+*          IF IJOB = 0, IWORK is not referenced.  Otherwise,
+*          on exit, if INFO = 0, IWORK(1) returns the optimal LIWORK.
+*
+*  LIWORK  (input) INTEGER
+*          The dimension of the array IWORK. LIWORK >= 1.
+*          If IJOB = 1, 2 or 4, LIWORK >=  N+2;
+*          If IJOB = 3 or 5, LIWORK >= MAX(N+2, 2*M*(N-M));
+*
+*          If LIWORK = -1, then a workspace query is assumed; the
+*          routine only calculates the optimal size of the IWORK array,
+*          returns this value as the first entry of the IWORK array, and
+*          no error message related to LIWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*            =0: Successful exit.
+*            <0: If INFO = -i, the i-th argument had an illegal value.
+*            =1: Reordering of (A, B) failed because the transformed
+*                matrix pair (A, B) would be too far from generalized
+*                Schur form; the problem is very ill-conditioned.
+*                (A, B) may have been partially reordered.
+*                If requested, 0 is returned in DIF(*), PL and PR.
+*
+*
+*  Further Details
+*  ===============
+*
+*  CTGSEN first collects the selected eigenvalues by computing unitary
+*  U and W that move them to the top left corner of (A, B). In other
+*  words, the selected eigenvalues are the eigenvalues of (A11, B11) in
+*
+*                U'*(A, B)*W = (A11 A12) (B11 B12) n1
+*                              ( 0  A22),( 0  B22) n2
+*                                n1  n2    n1  n2
+*
+*  where N = n1+n2 and U' means the conjugate transpose of U. The first
+*  n1 columns of U and W span the specified pair of left and right
+*  eigenspaces (deflating subspaces) of (A, B).
+*
+*  If (A, B) has been obtained from the generalized real Schur
+*  decomposition of a matrix pair (C, D) = Q*(A, B)*Z', then the
+*  reordered generalized Schur form of (C, D) is given by
+*
+*           (C, D) = (Q*U)*(U'*(A, B)*W)*(Z*W)',
+*
+*  and the first n1 columns of Q*U and Z*W span the corresponding
+*  deflating subspaces of (C, D) (Q and Z store Q*U and Z*W, resp.).
+*
+*  Note that if the selected eigenvalue is sufficiently ill-conditioned,
+*  then its value may differ significantly from its value before
+*  reordering.
+*
+*  The reciprocal condition numbers of the left and right eigenspaces
+*  spanned by the first n1 columns of U and W (or Q*U and Z*W) may
+*  be returned in DIF(1:2), corresponding to Difu and Difl, resp.
+*
+*  The Difu and Difl are defined as:
+*
+*       Difu[(A11, B11), (A22, B22)] = sigma-min( Zu )
+*  and
+*       Difl[(A11, B11), (A22, B22)] = Difu[(A22, B22), (A11, B11)],
+*
+*  where sigma-min(Zu) is the smallest singular value of the
+*  (2*n1*n2)-by-(2*n1*n2) matrix
+*
+*       Zu = [ kron(In2, A11)  -kron(A22', In1) ]
+*            [ kron(In2, B11)  -kron(B22', In1) ].
+*
+*  Here, Inx is the identity matrix of size nx and A22' is the
+*  transpose of A22. kron(X, Y) is the Kronecker product between
+*  the matrices X and Y.
+*
+*  When DIF(2) is small, small changes in (A, B) can cause large changes
+*  in the deflating subspace. An approximate (asymptotic) bound on the
+*  maximum angular error in the computed deflating subspaces is
+*
+*       EPS * norm((A, B)) / DIF(2),
+*
+*  where EPS is the machine precision.
+*
+*  The reciprocal norm of the projectors on the left and right
+*  eigenspaces associated with (A11, B11) may be returned in PL and PR.
+*  They are computed as follows. First we compute L and R so that
+*  P*(A, B)*Q is block diagonal, where
+*
+*       P = ( I -L ) n1           Q = ( I R ) n1
+*           ( 0  I ) n2    and        ( 0 I ) n2
+*             n1 n2                    n1 n2
+*
+*  and (L, R) is the solution to the generalized Sylvester equation
+*
+*       A11*R - L*A22 = -A12
+*       B11*R - L*B22 = -B12
+*
+*  Then PL = (F-norm(L)**2+1)**(-1/2) and PR = (F-norm(R)**2+1)**(-1/2).
+*  An approximate (asymptotic) bound on the average absolute error of
+*  the selected eigenvalues is
+*
+*       EPS * norm((A, B)) / PL.
+*
+*  There are also global error bounds which valid for perturbations up
+*  to a certain restriction:  A lower bound (x) on the smallest
+*  F-norm(E,F) for which an eigenvalue of (A11, B11) may move and
+*  coalesce with an eigenvalue of (A22, B22) under perturbation (E,F),
+*  (i.e. (A + E, B + F), is
+*
+*   x = min(Difu,Difl)/((1/(PL*PL)+1/(PR*PR))**(1/2)+2*max(1/PL,1/PR)).
+*
+*  An approximate bound on x can be computed from DIF(1:2), PL and PR.
+*
+*  If y = ( F-norm(E,F) / x) <= 1, the angles between the perturbed
+*  (L', R') and unperturbed (L, R) left and right deflating subspaces
+*  associated with the selected cluster in the (1,1)-blocks can be
+*  bounded as
+*
+*   max-angle(L, L') <= arctan( y * PL / (1 - y * (1 - PL * PL)**(1/2))
+*   max-angle(R, R') <= arctan( y * PR / (1 - y * (1 - PR * PR)**(1/2))
+*
+*  See LAPACK User's Guide section 4.11 or the following references
+*  for more information.
+*
+*  Note that if the default method for computing the Frobenius-norm-
+*  based estimate DIF is not wanted (see CLATDF), then the parameter
+*  IDIFJB (see below) should be changed from 3 to 4 (routine CLATDF
+*  (IJOB = 2 will be used)). See CTGSYL for more details.
+*
+*  Based on contributions by
+*     Bo Kagstrom and Peter Poromaa, Department of Computing Science,
+*     Umea University, S-901 87 Umea, Sweden.
+*
+*  References
+*  ==========
+*
+*  [1] B. Kagstrom; A Direct Method for Reordering Eigenvalues in the
+*      Generalized Real Schur Form of a Regular Matrix Pair (A, B), in
+*      M.S. Moonen et al (eds), Linear Algebra for Large Scale and
+*      Real-Time Applications, Kluwer Academic Publ. 1993, pp 195-218.
+*
+*  [2] B. Kagstrom and P. Poromaa; Computing Eigenspaces with Specified
+*      Eigenvalues of a Regular Matrix Pair (A, B) and Condition
+*      Estimation: Theory, Algorithms and Software, Report
+*      UMINF - 94.04, Department of Computing Science, Umea University,
+*      S-901 87 Umea, Sweden, 1994. Also as LAPACK Working Note 87.
+*      To appear in Numerical Algorithms, 1996.
+*
+*  [3] B. Kagstrom and P. Poromaa, LAPACK-Style Algorithms and Software
+*      for Solving the Generalized Sylvester Equation and Estimating the
+*      Separation between Regular Matrix Pairs, Report UMINF - 93.23,
+*      Department of Computing Science, Umea University, S-901 87 Umea,
+*      Sweden, December 1993, Revised April 1994, Also as LAPACK working
+*      Note 75. To appear in ACM Trans. on Math. Software, Vol 22, No 1,
+*      1996.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            IDIFJB
+      PARAMETER          ( IDIFJB = 3 )
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY, SWAP, WANTD, WANTD1, WANTD2, WANTP
+      INTEGER            I, IERR, IJB, K, KASE, KS, LIWMIN, LWMIN, MN2,
+     $                   N1, N2
+      REAL               DSCALE, DSUM, RDSCAL, SAFMIN
+*     ..
+*     .. External Subroutines ..
+      REAL               SLAMCH 
+      EXTERNAL           CLACON, CLACPY, CLASSQ, CSCAL, CTGEXC, CTGSYL,
+     $                   SLAMCH, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, CMPLX, CONJG, MAX, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode and test the input parameters
+*
+      INFO = 0
+      LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 )
+*
+      IF( IJOB.LT.0 .OR. IJOB.GT.5 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -9
+      ELSE IF( LDQ.LT.1 .OR. ( WANTQ .AND. LDQ.LT.N ) ) THEN
+         INFO = -13
+      ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN
+         INFO = -15
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CTGSEN', -INFO )
+         RETURN
+      END IF
+*
+      IERR = 0
+*
+      WANTP = IJOB.EQ.1 .OR. IJOB.GE.4
+      WANTD1 = IJOB.EQ.2 .OR. IJOB.EQ.4
+      WANTD2 = IJOB.EQ.3 .OR. IJOB.EQ.5
+      WANTD = WANTD1 .OR. WANTD2
+*
+*     Set M to the dimension of the specified pair of deflating
+*     subspaces.
+*
+      M = 0
+      DO 10 K = 1, N
+         ALPHA( K ) = A( K, K )
+         BETA( K ) = B( K, K )
+         IF( K.LT.N ) THEN
+            IF( SELECT( K ) )
+     $         M = M + 1
+         ELSE
+            IF( SELECT( N ) )
+     $         M = M + 1
+         END IF
+   10 CONTINUE
+*
+      IF( IJOB.EQ.1 .OR. IJOB.EQ.2 .OR. IJOB.EQ.4 ) THEN
+         LWMIN = MAX( 1, 2*M*(N-M) )
+         LIWMIN = MAX( 1, N+2 )
+      ELSE IF( IJOB.EQ.3 .OR. IJOB.EQ.5 ) THEN
+         LWMIN = MAX( 1, 4*M*(N-M) )
+         LIWMIN = MAX( 1, 2*M*(N-M), N+2 )
+      ELSE
+         LWMIN = 1
+         LIWMIN = 1
+      END IF
+*
+      WORK( 1 ) = LWMIN
+      IWORK( 1 ) = LIWMIN
+*
+      IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -21
+      ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -23
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CTGSEN', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( M.EQ.N .OR. M.EQ.0 ) THEN
+         IF( WANTP ) THEN
+            PL = ONE
+            PR = ONE
+         END IF
+         IF( WANTD ) THEN
+            DSCALE = ZERO
+            DSUM = ONE
+            DO 20 I = 1, N
+               CALL CLASSQ( N, A( 1, I ), 1, DSCALE, DSUM )
+               CALL CLASSQ( N, B( 1, I ), 1, DSCALE, DSUM )
+   20       CONTINUE
+            DIF( 1 ) = DSCALE*SQRT( DSUM )
+            DIF( 2 ) = DIF( 1 )
+         END IF
+         GO TO 70
+      END IF
+*
+*     Get machine constant
+*
+      SAFMIN = SLAMCH( 'S' )
+*
+*     Collect the selected blocks at the top-left corner of (A, B).
+*
+      KS = 0
+      DO 30 K = 1, N
+         SWAP = SELECT( K )
+         IF( SWAP ) THEN
+            KS = KS + 1
+*
+*           Swap the K-th block to position KS. Compute unitary Q
+*           and Z that will swap adjacent diagonal blocks in (A, B).
+*
+            IF( K.NE.KS )
+     $         CALL CTGEXC( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z,
+     $                      LDZ, K, KS, IERR )
+*
+            IF( IERR.GT.0 ) THEN
+*
+*              Swap is rejected: exit.
+*
+               INFO = 1
+               IF( WANTP ) THEN
+                  PL = ZERO
+                  PR = ZERO
+               END IF
+               IF( WANTD ) THEN
+                  DIF( 1 ) = ZERO
+                  DIF( 2 ) = ZERO
+               END IF
+               GO TO 70
+            END IF
+         END IF
+   30 CONTINUE
+      IF( WANTP ) THEN
+*
+*        Solve generalized Sylvester equation for R and L:
+*                   A11 * R - L * A22 = A12
+*                   B11 * R - L * B22 = B12
+*
+         N1 = M
+         N2 = N - M
+         I = N1 + 1
+         CALL CLACPY( 'Full', N1, N2, A( 1, I ), LDA, WORK, N1 )
+         CALL CLACPY( 'Full', N1, N2, B( 1, I ), LDB, WORK( N1*N2+1 ),
+     $                N1 )
+         IJB = 0
+         CALL CTGSYL( 'N', IJB, N1, N2, A, LDA, A( I, I ), LDA, WORK,
+     $                N1, B, LDB, B( I, I ), LDB, WORK( N1*N2+1 ), N1,
+     $                DSCALE, DIF( 1 ), WORK( N1*N2*2+1 ),
+     $                LWORK-2*N1*N2, IWORK, IERR )
+*
+*        Estimate the reciprocal of norms of "projections" onto
+*        left and right eigenspaces
+*
+         RDSCAL = ZERO
+         DSUM = ONE
+         CALL CLASSQ( N1*N2, WORK, 1, RDSCAL, DSUM )
+         PL = RDSCAL*SQRT( DSUM )
+         IF( PL.EQ.ZERO ) THEN
+            PL = ONE
+         ELSE
+            PL = DSCALE / ( SQRT( DSCALE*DSCALE / PL+PL )*SQRT( PL ) )
+         END IF
+         RDSCAL = ZERO
+         DSUM = ONE
+         CALL CLASSQ( N1*N2, WORK( N1*N2+1 ), 1, RDSCAL, DSUM )
+         PR = RDSCAL*SQRT( DSUM )
+         IF( PR.EQ.ZERO ) THEN
+            PR = ONE
+         ELSE
+            PR = DSCALE / ( SQRT( DSCALE*DSCALE / PR+PR )*SQRT( PR ) )
+         END IF
+      END IF
+      IF( WANTD ) THEN
+*
+*        Compute estimates Difu and Difl.
+*
+         IF( WANTD1 ) THEN
+            N1 = M
+            N2 = N - M
+            I = N1 + 1
+            IJB = IDIFJB
+*
+*           Frobenius norm-based Difu estimate.
+*
+            CALL CTGSYL( 'N', IJB, N1, N2, A, LDA, A( I, I ), LDA, WORK,
+     $                   N1, B, LDB, B( I, I ), LDB, WORK( N1*N2+1 ),
+     $                   N1, DSCALE, DIF( 1 ), WORK( N1*N2*2+1 ),
+     $                   LWORK-2*N1*N2, IWORK, IERR )
+*
+*           Frobenius norm-based Difl estimate.
+*
+            CALL CTGSYL( 'N', IJB, N2, N1, A( I, I ), LDA, A, LDA, WORK,
+     $                   N2, B( I, I ), LDB, B, LDB, WORK( N1*N2+1 ),
+     $                   N2, DSCALE, DIF( 2 ), WORK( N1*N2*2+1 ),
+     $                   LWORK-2*N1*N2, IWORK, IERR )
+         ELSE
+*
+*           Compute 1-norm-based estimates of Difu and Difl using
+*           reversed communication with CLACON. In each step a
+*           generalized Sylvester equation or a transposed variant
+*           is solved.
+*
+            KASE = 0
+            N1 = M
+            N2 = N - M
+            I = N1 + 1
+            IJB = 0
+            MN2 = 2*N1*N2
+*
+*           1-norm-based estimate of Difu.
+*
+   40       CONTINUE
+            CALL CLACON( MN2, WORK( MN2+1 ), WORK, DIF( 1 ), KASE )
+            IF( KASE.NE.0 ) THEN
+               IF( KASE.EQ.1 ) THEN
+*
+*                 Solve generalized Sylvester equation
+*
+                  CALL CTGSYL( 'N', IJB, N1, N2, A, LDA, A( I, I ), LDA,
+     $                         WORK, N1, B, LDB, B( I, I ), LDB,
+     $                         WORK( N1*N2+1 ), N1, DSCALE, DIF( 1 ),
+     $                         WORK( N1*N2*2+1 ), LWORK-2*N1*N2, IWORK,
+     $                         IERR )
+               ELSE
+*
+*                 Solve the transposed variant.
+*
+                  CALL CTGSYL( 'C', IJB, N1, N2, A, LDA, A( I, I ), LDA,
+     $                         WORK, N1, B, LDB, B( I, I ), LDB,
+     $                         WORK( N1*N2+1 ), N1, DSCALE, DIF( 1 ),
+     $                         WORK( N1*N2*2+1 ), LWORK-2*N1*N2, IWORK,
+     $                         IERR )
+               END IF
+               GO TO 40
+            END IF
+            DIF( 1 ) = DSCALE / DIF( 1 )
+*
+*           1-norm-based estimate of Difl.
+*
+   50       CONTINUE
+            CALL CLACON( MN2, WORK( MN2+1 ), WORK, DIF( 2 ), KASE )
+            IF( KASE.NE.0 ) THEN
+               IF( KASE.EQ.1 ) THEN
+*
+*                 Solve generalized Sylvester equation
+*
+                  CALL CTGSYL( 'N', IJB, N2, N1, A( I, I ), LDA, A, LDA,
+     $                         WORK, N2, B( I, I ), LDB, B, LDB,
+     $                         WORK( N1*N2+1 ), N2, DSCALE, DIF( 2 ),
+     $                         WORK( N1*N2*2+1 ), LWORK-2*N1*N2, IWORK,
+     $                         IERR )
+               ELSE
+*
+*                 Solve the transposed variant.
+*
+                  CALL CTGSYL( 'C', IJB, N2, N1, A( I, I ), LDA, A, LDA,
+     $                         WORK, N2, B, LDB, B( I, I ), LDB,
+     $                         WORK( N1*N2+1 ), N2, DSCALE, DIF( 2 ),
+     $                         WORK( N1*N2*2+1 ), LWORK-2*N1*N2, IWORK,
+     $                         IERR )
+               END IF
+               GO TO 50
+            END IF
+            DIF( 2 ) = DSCALE / DIF( 2 )
+         END IF
+      END IF
+*
+*     If B(K,K) is complex, make it real and positive (normalization
+*     of the generalized Schur form) and Store the generalized 
+*     eigenvalues of reordered pair (A, B)
+*
+      DO 60 K = 1, N
+         DSCALE = ABS( B( K, K ) )
+         IF( DSCALE.GT.SAFMIN ) THEN
+            WORK( 1 ) = CONJG( B( K, K ) / DSCALE )
+            WORK( 2 ) = B( K, K ) / DSCALE
+            B( K, K ) = DSCALE
+            CALL CSCAL( N-K, WORK( 1 ), B( K, K+1 ), LDB )
+            CALL CSCAL( N-K+1, WORK( 1 ), A( K, K ), LDA )
+            IF( WANTQ )
+     $         CALL CSCAL( N, WORK( 2 ), Q( 1, K ), 1 )
+         ELSE
+            B( K, K ) = CMPLX( ZERO, ZERO )
+         END IF
+*
+         ALPHA( K ) = A( K, K )
+         BETA( K ) = B( K, K )
+*
+   60 CONTINUE
+*
+   70 CONTINUE
+*
+      WORK( 1 ) = LWMIN
+      IWORK( 1 ) = LIWMIN
+*
+      RETURN
+*
+*     End of CTGSEN
+*
+      END
diff --git a/costa/native/external/lapack/ctgsja.f b/costa/native/external/lapack/ctgsja.f
new file mode 100644
index 000000000..e9a54241c
--- /dev/null
+++ b/costa/native/external/lapack/ctgsja.f
@@ -0,0 +1,526 @@
+      SUBROUTINE CTGSJA( JOBU, JOBV, JOBQ, M, P, N, K, L, A, LDA, B,
+     $                   LDB, TOLA, TOLB, ALPHA, BETA, U, LDU, V, LDV,
+     $                   Q, LDQ, WORK, NCYCLE, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBQ, JOBU, JOBV
+      INTEGER            INFO, K, L, LDA, LDB, LDQ, LDU, LDV, M, N,
+     $                   NCYCLE, P
+      REAL               TOLA, TOLB
+*     ..
+*     .. Array Arguments ..
+      REAL               ALPHA( * ), BETA( * )
+      COMPLEX            A( LDA, * ), B( LDB, * ), Q( LDQ, * ),
+     $                   U( LDU, * ), V( LDV, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CTGSJA computes the generalized singular value decomposition (GSVD)
+*  of two complex upper triangular (or trapezoidal) matrices A and B.
+*
+*  On entry, it is assumed that matrices A and B have the following
+*  forms, which may be obtained by the preprocessing subroutine CGGSVP
+*  from a general M-by-N matrix A and P-by-N matrix B:
+*
+*               N-K-L  K    L
+*     A =    K ( 0    A12  A13 ) if M-K-L >= 0;
+*            L ( 0     0   A23 )
+*        M-K-L ( 0     0    0  )
+*
+*             N-K-L  K    L
+*     A =  K ( 0    A12  A13 ) if M-K-L < 0;
+*        M-K ( 0     0   A23 )
+*
+*             N-K-L  K    L
+*     B =  L ( 0     0   B13 )
+*        P-L ( 0     0    0  )
+*
+*  where the K-by-K matrix A12 and L-by-L matrix B13 are nonsingular
+*  upper triangular; A23 is L-by-L upper triangular if M-K-L >= 0,
+*  otherwise A23 is (M-K)-by-L upper trapezoidal.
+*
+*  On exit,
+*
+*         U'*A*Q = D1*( 0 R ),    V'*B*Q = D2*( 0 R ),
+*
+*  where U, V and Q are unitary matrices, Z' denotes the conjugate
+*  transpose of Z, R is a nonsingular upper triangular matrix, and D1
+*  and D2 are ``diagonal'' matrices, which are of the following
+*  structures:
+*
+*  If M-K-L >= 0,
+*
+*                      K  L
+*         D1 =     K ( I  0 )
+*                  L ( 0  C )
+*              M-K-L ( 0  0 )
+*
+*                     K  L
+*         D2 = L   ( 0  S )
+*              P-L ( 0  0 )
+*
+*                 N-K-L  K    L
+*    ( 0 R ) = K (  0   R11  R12 ) K
+*              L (  0    0   R22 ) L
+*
+*  where
+*
+*    C = diag( ALPHA(K+1), ... , ALPHA(K+L) ),
+*    S = diag( BETA(K+1),  ... , BETA(K+L) ),
+*    C**2 + S**2 = I.
+*
+*    R is stored in A(1:K+L,N-K-L+1:N) on exit.
+*
+*  If M-K-L < 0,
+*
+*                 K M-K K+L-M
+*      D1 =   K ( I  0    0   )
+*           M-K ( 0  C    0   )
+*
+*                   K M-K K+L-M
+*      D2 =   M-K ( 0  S    0   )
+*           K+L-M ( 0  0    I   )
+*             P-L ( 0  0    0   )
+*
+*                 N-K-L  K   M-K  K+L-M
+* ( 0 R ) =    K ( 0    R11  R12  R13  )
+*            M-K ( 0     0   R22  R23  )
+*          K+L-M ( 0     0    0   R33  )
+*
+*  where
+*  C = diag( ALPHA(K+1), ... , ALPHA(M) ),
+*  S = diag( BETA(K+1),  ... , BETA(M) ),
+*  C**2 + S**2 = I.
+*
+*  R = ( R11 R12 R13 ) is stored in A(1:M, N-K-L+1:N) and R33 is stored
+*      (  0  R22 R23 )
+*  in B(M-K+1:L,N+M-K-L+1:N) on exit.
+*
+*  The computation of the unitary transformation matrices U, V or Q
+*  is optional.  These matrices may either be formed explicitly, or they
+*  may be postmultiplied into input matrices U1, V1, or Q1.
+*
+*  Arguments
+*  =========
+*
+*  JOBU    (input) CHARACTER*1
+*          = 'U':  U must contain a unitary matrix U1 on entry, and
+*                  the product U1*U is returned;
+*          = 'I':  U is initialized to the unit matrix, and the
+*                  unitary matrix U is returned;
+*          = 'N':  U is not computed.
+*
+*  JOBV    (input) CHARACTER*1
+*          = 'V':  V must contain a unitary matrix V1 on entry, and
+*                  the product V1*V is returned;
+*          = 'I':  V is initialized to the unit matrix, and the
+*                  unitary matrix V is returned;
+*          = 'N':  V is not computed.
+*
+*  JOBQ    (input) CHARACTER*1
+*          = 'Q':  Q must contain a unitary matrix Q1 on entry, and
+*                  the product Q1*Q is returned;
+*          = 'I':  Q is initialized to the unit matrix, and the
+*                  unitary matrix Q is returned;
+*          = 'N':  Q is not computed.
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  P       (input) INTEGER
+*          The number of rows of the matrix B.  P >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrices A and B.  N >= 0.
+*
+*  K       (input) INTEGER
+*  L       (input) INTEGER
+*          K and L specify the subblocks in the input matrices A and B:
+*          A23 = A(K+1:MIN(K+L,M),N-L+1:N) and B13 = B(1:L,,N-L+1:N)
+*          of A and B, whose GSVD is going to be computed by CTGSJA.
+*          See Further details.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,N)
+*          On entry, the M-by-N matrix A.
+*          On exit, A(N-K+1:N,1:MIN(K+L,M) ) contains the triangular
+*          matrix R or part of R.  See Purpose for details.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,M).
+*
+*  B       (input/output) COMPLEX array, dimension (LDB,N)
+*          On entry, the P-by-N matrix B.
+*          On exit, if necessary, B(M-K+1:L,N+M-K-L+1:N) contains
+*          a part of R.  See Purpose for details.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B. LDB >= max(1,P).
+*
+*  TOLA    (input) REAL
+*  TOLB    (input) REAL
+*          TOLA and TOLB are the convergence criteria for the Jacobi-
+*          Kogbetliantz iteration procedure. Generally, they are the
+*          same as used in the preprocessing step, say
+*              TOLA = MAX(M,N)*norm(A)*MACHEPS,
+*              TOLB = MAX(P,N)*norm(B)*MACHEPS.
+*
+*  ALPHA   (output) REAL array, dimension (N)
+*  BETA    (output) REAL array, dimension (N)
+*          On exit, ALPHA and BETA contain the generalized singular
+*          value pairs of A and B;
+*            ALPHA(1:K) = 1,
+*            BETA(1:K)  = 0,
+*          and if M-K-L >= 0,
+*            ALPHA(K+1:K+L) = diag(C),
+*            BETA(K+1:K+L)  = diag(S),
+*          or if M-K-L < 0,
+*            ALPHA(K+1:M)= C, ALPHA(M+1:K+L)= 0
+*            BETA(K+1:M) = S, BETA(M+1:K+L) = 1.
+*          Furthermore, if K+L < N,
+*            ALPHA(K+L+1:N) = 0
+*            BETA(K+L+1:N)  = 0.
+*
+*  U       (input/output) COMPLEX array, dimension (LDU,M)
+*          On entry, if JOBU = 'U', U must contain a matrix U1 (usually
+*          the unitary matrix returned by CGGSVP).
+*          On exit,
+*          if JOBU = 'I', U contains the unitary matrix U;
+*          if JOBU = 'U', U contains the product U1*U.
+*          If JOBU = 'N', U is not referenced.
+*
+*  LDU     (input) INTEGER
+*          The leading dimension of the array U. LDU >= max(1,M) if
+*          JOBU = 'U'; LDU >= 1 otherwise.
+*
+*  V       (input/output) COMPLEX array, dimension (LDV,P)
+*          On entry, if JOBV = 'V', V must contain a matrix V1 (usually
+*          the unitary matrix returned by CGGSVP).
+*          On exit,
+*          if JOBV = 'I', V contains the unitary matrix V;
+*          if JOBV = 'V', V contains the product V1*V.
+*          If JOBV = 'N', V is not referenced.
+*
+*  LDV     (input) INTEGER
+*          The leading dimension of the array V. LDV >= max(1,P) if
+*          JOBV = 'V'; LDV >= 1 otherwise.
+*
+*  Q       (input/output) COMPLEX array, dimension (LDQ,N)
+*          On entry, if JOBQ = 'Q', Q must contain a matrix Q1 (usually
+*          the unitary matrix returned by CGGSVP).
+*          On exit,
+*          if JOBQ = 'I', Q contains the unitary matrix Q;
+*          if JOBQ = 'Q', Q contains the product Q1*Q.
+*          If JOBQ = 'N', Q is not referenced.
+*
+*  LDQ     (input) INTEGER
+*          The leading dimension of the array Q. LDQ >= max(1,N) if
+*          JOBQ = 'Q'; LDQ >= 1 otherwise.
+*
+*  WORK    (workspace) COMPLEX array, dimension (2*N)
+*
+*  NCYCLE  (output) INTEGER
+*          The number of cycles required for convergence.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          = 1:  the procedure does not converge after MAXIT cycles.
+*
+*  Internal Parameters
+*  ===================
+*
+*  MAXIT   INTEGER
+*          MAXIT specifies the total loops that the iterative procedure
+*          may take. If after MAXIT cycles, the routine fails to
+*          converge, we return INFO = 1.
+*
+*  Further Details
+*  ===============
+*
+*  CTGSJA essentially uses a variant of Kogbetliantz algorithm to reduce
+*  min(L,M-K)-by-L triangular (or trapezoidal) matrix A23 and L-by-L
+*  matrix B13 to the form:
+*
+*           U1'*A13*Q1 = C1*R1; V1'*B13*Q1 = S1*R1,
+*
+*  where U1, V1 and Q1 are unitary matrix, and Z' is the conjugate
+*  transpose of Z.  C1 and S1 are diagonal matrices satisfying
+*
+*                C1**2 + S1**2 = I,
+*
+*  and R1 is an L-by-L nonsingular upper triangular matrix.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            MAXIT
+      PARAMETER          ( MAXIT = 40 )
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+      COMPLEX            CZERO, CONE
+      PARAMETER          ( CZERO = ( 0.0E+0, 0.0E+0 ),
+     $                   CONE = ( 1.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+*
+      LOGICAL            INITQ, INITU, INITV, UPPER, WANTQ, WANTU, WANTV
+      INTEGER            I, J, KCYCLE
+      REAL               A1, A3, B1, B3, CSQ, CSU, CSV, ERROR, GAMMA,
+     $                   RWK, SSMIN
+      COMPLEX            A2, B2, SNQ, SNU, SNV
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CCOPY, CLAGS2, CLAPLL, CLASET, CROT, CSSCAL,
+     $                   SLARTG, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, CONJG, MAX, MIN, REAL
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode and test the input parameters
+*
+      INITU = LSAME( JOBU, 'I' )
+      WANTU = INITU .OR. LSAME( JOBU, 'U' )
+*
+      INITV = LSAME( JOBV, 'I' )
+      WANTV = INITV .OR. LSAME( JOBV, 'V' )
+*
+      INITQ = LSAME( JOBQ, 'I' )
+      WANTQ = INITQ .OR. LSAME( JOBQ, 'Q' )
+*
+      INFO = 0
+      IF( .NOT.( INITU .OR. WANTU .OR. LSAME( JOBU, 'N' ) ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( INITV .OR. WANTV .OR. LSAME( JOBV, 'N' ) ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.( INITQ .OR. WANTQ .OR. LSAME( JOBQ, 'N' ) ) ) THEN
+         INFO = -3
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( P.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -6
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -10
+      ELSE IF( LDB.LT.MAX( 1, P ) ) THEN
+         INFO = -12
+      ELSE IF( LDU.LT.1 .OR. ( WANTU .AND. LDU.LT.M ) ) THEN
+         INFO = -18
+      ELSE IF( LDV.LT.1 .OR. ( WANTV .AND. LDV.LT.P ) ) THEN
+         INFO = -20
+      ELSE IF( LDQ.LT.1 .OR. ( WANTQ .AND. LDQ.LT.N ) ) THEN
+         INFO = -22
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CTGSJA', -INFO )
+         RETURN
+      END IF
+*
+*     Initialize U, V and Q, if necessary
+*
+      IF( INITU )
+     $   CALL CLASET( 'Full', M, M, CZERO, CONE, U, LDU )
+      IF( INITV )
+     $   CALL CLASET( 'Full', P, P, CZERO, CONE, V, LDV )
+      IF( INITQ )
+     $   CALL CLASET( 'Full', N, N, CZERO, CONE, Q, LDQ )
+*
+*     Loop until convergence
+*
+      UPPER = .FALSE.
+      DO 40 KCYCLE = 1, MAXIT
+*
+         UPPER = .NOT.UPPER
+*
+         DO 20 I = 1, L - 1
+            DO 10 J = I + 1, L
+*
+               A1 = ZERO
+               A2 = CZERO
+               A3 = ZERO
+               IF( K+I.LE.M )
+     $            A1 = REAL( A( K+I, N-L+I ) )
+               IF( K+J.LE.M )
+     $            A3 = REAL( A( K+J, N-L+J ) )
+*
+               B1 = REAL( B( I, N-L+I ) )
+               B3 = REAL( B( J, N-L+J ) )
+*
+               IF( UPPER ) THEN
+                  IF( K+I.LE.M )
+     $               A2 = A( K+I, N-L+J )
+                  B2 = B( I, N-L+J )
+               ELSE
+                  IF( K+J.LE.M )
+     $               A2 = A( K+J, N-L+I )
+                  B2 = B( J, N-L+I )
+               END IF
+*
+               CALL CLAGS2( UPPER, A1, A2, A3, B1, B2, B3, CSU, SNU,
+     $                      CSV, SNV, CSQ, SNQ )
+*
+*              Update (K+I)-th and (K+J)-th rows of matrix A: U'*A
+*
+               IF( K+J.LE.M )
+     $            CALL CROT( L, A( K+J, N-L+1 ), LDA, A( K+I, N-L+1 ),
+     $                       LDA, CSU, CONJG( SNU ) )
+*
+*              Update I-th and J-th rows of matrix B: V'*B
+*
+               CALL CROT( L, B( J, N-L+1 ), LDB, B( I, N-L+1 ), LDB,
+     $                    CSV, CONJG( SNV ) )
+*
+*              Update (N-L+I)-th and (N-L+J)-th columns of matrices
+*              A and B: A*Q and B*Q
+*
+               CALL CROT( MIN( K+L, M ), A( 1, N-L+J ), 1,
+     $                    A( 1, N-L+I ), 1, CSQ, SNQ )
+*
+               CALL CROT( L, B( 1, N-L+J ), 1, B( 1, N-L+I ), 1, CSQ,
+     $                    SNQ )
+*
+               IF( UPPER ) THEN
+                  IF( K+I.LE.M )
+     $               A( K+I, N-L+J ) = CZERO
+                  B( I, N-L+J ) = CZERO
+               ELSE
+                  IF( K+J.LE.M )
+     $               A( K+J, N-L+I ) = CZERO
+                  B( J, N-L+I ) = CZERO
+               END IF
+*
+*              Ensure that the diagonal elements of A and B are real.
+*
+               IF( K+I.LE.M )
+     $            A( K+I, N-L+I ) = REAL( A( K+I, N-L+I ) )
+               IF( K+J.LE.M )
+     $            A( K+J, N-L+J ) = REAL( A( K+J, N-L+J ) )
+               B( I, N-L+I ) = REAL( B( I, N-L+I ) )
+               B( J, N-L+J ) = REAL( B( J, N-L+J ) )
+*
+*              Update unitary matrices U, V, Q, if desired.
+*
+               IF( WANTU .AND. K+J.LE.M )
+     $            CALL CROT( M, U( 1, K+J ), 1, U( 1, K+I ), 1, CSU,
+     $                       SNU )
+*
+               IF( WANTV )
+     $            CALL CROT( P, V( 1, J ), 1, V( 1, I ), 1, CSV, SNV )
+*
+               IF( WANTQ )
+     $            CALL CROT( N, Q( 1, N-L+J ), 1, Q( 1, N-L+I ), 1, CSQ,
+     $                       SNQ )
+*
+   10       CONTINUE
+   20    CONTINUE
+*
+         IF( .NOT.UPPER ) THEN
+*
+*           The matrices A13 and B13 were lower triangular at the start
+*           of the cycle, and are now upper triangular.
+*
+*           Convergence test: test the parallelism of the corresponding
+*           rows of A and B.
+*
+            ERROR = ZERO
+            DO 30 I = 1, MIN( L, M-K )
+               CALL CCOPY( L-I+1, A( K+I, N-L+I ), LDA, WORK, 1 )
+               CALL CCOPY( L-I+1, B( I, N-L+I ), LDB, WORK( L+1 ), 1 )
+               CALL CLAPLL( L-I+1, WORK, 1, WORK( L+1 ), 1, SSMIN )
+               ERROR = MAX( ERROR, SSMIN )
+   30       CONTINUE
+*
+            IF( ABS( ERROR ).LE.MIN( TOLA, TOLB ) )
+     $         GO TO 50
+         END IF
+*
+*        End of cycle loop
+*
+   40 CONTINUE
+*
+*     The algorithm has not converged after MAXIT cycles.
+*
+      INFO = 1
+      GO TO 100
+*
+   50 CONTINUE
+*
+*     If ERROR <= MIN(TOLA,TOLB), then the algorithm has converged.
+*     Compute the generalized singular value pairs (ALPHA, BETA), and
+*     set the triangular matrix R to array A.
+*
+      DO 60 I = 1, K
+         ALPHA( I ) = ONE
+         BETA( I ) = ZERO
+   60 CONTINUE
+*
+      DO 70 I = 1, MIN( L, M-K )
+*
+         A1 = REAL( A( K+I, N-L+I ) )
+         B1 = REAL( B( I, N-L+I ) )
+*
+         IF( A1.NE.ZERO ) THEN
+            GAMMA = B1 / A1
+*
+            IF( GAMMA.LT.ZERO ) THEN
+               CALL CSSCAL( L-I+1, -ONE, B( I, N-L+I ), LDB )
+               IF( WANTV )
+     $            CALL CSSCAL( P, -ONE, V( 1, I ), 1 )
+            END IF
+*
+            CALL SLARTG( ABS( GAMMA ), ONE, BETA( K+I ), ALPHA( K+I ),
+     $                   RWK )
+*
+            IF( ALPHA( K+I ).GE.BETA( K+I ) ) THEN
+               CALL CSSCAL( L-I+1, ONE / ALPHA( K+I ), A( K+I, N-L+I ),
+     $                      LDA )
+            ELSE
+               CALL CSSCAL( L-I+1, ONE / BETA( K+I ), B( I, N-L+I ),
+     $                      LDB )
+               CALL CCOPY( L-I+1, B( I, N-L+I ), LDB, A( K+I, N-L+I ),
+     $                     LDA )
+            END IF
+*
+         ELSE
+            ALPHA( K+I ) = ZERO
+            BETA( K+I ) = ONE
+            CALL CCOPY( L-I+1, B( I, N-L+I ), LDB, A( K+I, N-L+I ),
+     $                  LDA )
+         END IF
+   70 CONTINUE
+*
+*     Post-assignment
+*
+      DO 80 I = M + 1, K + L
+         ALPHA( I ) = ZERO
+         BETA( I ) = ONE
+   80 CONTINUE
+*
+      IF( K+L.LT.N ) THEN
+         DO 90 I = K + L + 1, N
+            ALPHA( I ) = ZERO
+            BETA( I ) = ZERO
+   90    CONTINUE
+      END IF
+*
+  100 CONTINUE
+      NCYCLE = KCYCLE
+*
+      RETURN
+*
+*     End of CTGSJA
+*
+      END
diff --git a/costa/native/external/lapack/ctgsna.f b/costa/native/external/lapack/ctgsna.f
new file mode 100644
index 000000000..74b4acfbe
--- /dev/null
+++ b/costa/native/external/lapack/ctgsna.f
@@ -0,0 +1,402 @@
+      SUBROUTINE CTGSNA( JOB, HOWMNY, SELECT, N, A, LDA, B, LDB, VL,
+     $                   LDVL, VR, LDVR, S, DIF, MM, M, WORK, LWORK,
+     $                   IWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          HOWMNY, JOB
+      INTEGER            INFO, LDA, LDB, LDVL, LDVR, LWORK, M, MM, N
+*     ..
+*     .. Array Arguments ..
+      LOGICAL            SELECT( * )
+      INTEGER            IWORK( * )
+      REAL               DIF( * ), S( * )
+      COMPLEX            A( LDA, * ), B( LDB, * ), VL( LDVL, * ),
+     $                   VR( LDVR, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CTGSNA estimates reciprocal condition numbers for specified
+*  eigenvalues and/or eigenvectors of a matrix pair (A, B).
+*
+*  (A, B) must be in generalized Schur canonical form, that is, A and
+*  B are both upper triangular.
+*
+*  Arguments
+*  =========
+*
+*  JOB     (input) CHARACTER*1
+*          Specifies whether condition numbers are required for
+*          eigenvalues (S) or eigenvectors (DIF):
+*          = 'E': for eigenvalues only (S);
+*          = 'V': for eigenvectors only (DIF);
+*          = 'B': for both eigenvalues and eigenvectors (S and DIF).
+*
+*  HOWMNY  (input) CHARACTER*1
+*          = 'A': compute condition numbers for all eigenpairs;
+*          = 'S': compute condition numbers for selected eigenpairs
+*                 specified by the array SELECT.
+*
+*  SELECT  (input) LOGICAL array, dimension (N)
+*          If HOWMNY = 'S', SELECT specifies the eigenpairs for which
+*          condition numbers are required. To select condition numbers
+*          for the corresponding j-th eigenvalue and/or eigenvector,
+*          SELECT(j) must be set to .TRUE..
+*          If HOWMNY = 'A', SELECT is not referenced.
+*
+*  N       (input) INTEGER
+*          The order of the square matrix pair (A, B). N >= 0.
+*
+*  A       (input) COMPLEX array, dimension (LDA,N)
+*          The upper triangular matrix A in the pair (A,B).
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,N).
+*
+*  B       (input) COMPLEX array, dimension (LDB,N)
+*          The upper triangular matrix B in the pair (A, B).
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B. LDB >= max(1,N).
+*
+*  VL      (input) COMPLEX array, dimension (LDVL,M)
+*          IF JOB = 'E' or 'B', VL must contain left eigenvectors of
+*          (A, B), corresponding to the eigenpairs specified by HOWMNY
+*          and SELECT.  The eigenvectors must be stored in consecutive
+*          columns of VL, as returned by CTGEVC.
+*          If JOB = 'V', VL is not referenced.
+*
+*  LDVL    (input) INTEGER
+*          The leading dimension of the array VL. LDVL >= 1; and
+*          If JOB = 'E' or 'B', LDVL >= N.
+*
+*  VR      (input) COMPLEX array, dimension (LDVR,M)
+*          IF JOB = 'E' or 'B', VR must contain right eigenvectors of
+*          (A, B), corresponding to the eigenpairs specified by HOWMNY
+*          and SELECT.  The eigenvectors must be stored in consecutive
+*          columns of VR, as returned by CTGEVC.
+*          If JOB = 'V', VR is not referenced.
+*
+*  LDVR    (input) INTEGER
+*          The leading dimension of the array VR. LDVR >= 1;
+*          If JOB = 'E' or 'B', LDVR >= N.
+*
+*  S       (output) REAL array, dimension (MM)
+*          If JOB = 'E' or 'B', the reciprocal condition numbers of the
+*          selected eigenvalues, stored in consecutive elements of the
+*          array.
+*          If JOB = 'V', S is not referenced.
+*
+*  DIF     (output) REAL array, dimension (MM)
+*          If JOB = 'V' or 'B', the estimated reciprocal condition
+*          numbers of the selected eigenvectors, stored in consecutive
+*          elements of the array.
+*          If the eigenvalues cannot be reordered to compute DIF(j),
+*          DIF(j) is set to 0; this can only occur when the true value
+*          would be very small anyway.
+*          For each eigenvalue/vector specified by SELECT, DIF stores
+*          a Frobenius norm-based estimate of Difl.
+*          If JOB = 'E', DIF is not referenced.
+*
+*  MM      (input) INTEGER
+*          The number of elements in the arrays S and DIF. MM >= M.
+*
+*  M       (output) INTEGER
+*          The number of elements of the arrays S and DIF used to store
+*          the specified condition numbers; for each selected eigenvalue
+*          one element is used. If HOWMNY = 'A', M is set to N.
+*
+*  WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+*          If JOB = 'E', WORK is not referenced.  Otherwise,
+*          on exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK  (input) INTEGER
+*          The dimension of the array WORK. LWORK >= 1.
+*          If JOB = 'V' or 'B', LWORK >= 2*N*N.
+*
+*  IWORK   (workspace) INTEGER array, dimension (N+2)
+*          If JOB = 'E', IWORK is not referenced.
+*
+*  INFO    (output) INTEGER
+*          = 0: Successful exit
+*          < 0: If INFO = -i, the i-th argument had an illegal value
+*
+*  Further Details
+*  ===============
+*
+*  The reciprocal of the condition number of the i-th generalized
+*  eigenvalue w = (a, b) is defined as
+*
+*          S(I) = (|v'Au|**2 + |v'Bu|**2)**(1/2) / (norm(u)*norm(v))
+*
+*  where u and v are the right and left eigenvectors of (A, B)
+*  corresponding to w; |z| denotes the absolute value of the complex
+*  number, and norm(u) denotes the 2-norm of the vector u. The pair
+*  (a, b) corresponds to an eigenvalue w = a/b (= v'Au/v'Bu) of the
+*  matrix pair (A, B). If both a and b equal zero, then (A,B) is
+*  singular and S(I) = -1 is returned.
+*
+*  An approximate error bound on the chordal distance between the i-th
+*  computed generalized eigenvalue w and the corresponding exact
+*  eigenvalue lambda is
+*
+*          chord(w, lambda) <=   EPS * norm(A, B) / S(I),
+*
+*  where EPS is the machine precision.
+*
+*  The reciprocal of the condition number of the right eigenvector u
+*  and left eigenvector v corresponding to the generalized eigenvalue w
+*  is defined as follows. Suppose
+*
+*                   (A, B) = ( a   *  ) ( b  *  )  1
+*                            ( 0  A22 ),( 0 B22 )  n-1
+*                              1  n-1     1 n-1
+*
+*  Then the reciprocal condition number DIF(I) is
+*
+*          Difl[(a, b), (A22, B22)]  = sigma-min( Zl )
+*
+*  where sigma-min(Zl) denotes the smallest singular value of
+*
+*         Zl = [ kron(a, In-1) -kron(1, A22) ]
+*              [ kron(b, In-1) -kron(1, B22) ].
+*
+*  Here In-1 is the identity matrix of size n-1 and X' is the conjugate
+*  transpose of X. kron(X, Y) is the Kronecker product between the
+*  matrices X and Y.
+*
+*  We approximate the smallest singular value of Zl with an upper
+*  bound. This is done by CLATDF.
+*
+*  An approximate error bound for a computed eigenvector VL(i) or
+*  VR(i) is given by
+*
+*                      EPS * norm(A, B) / DIF(i).
+*
+*  See ref. [2-3] for more details and further references.
+*
+*  Based on contributions by
+*     Bo Kagstrom and Peter Poromaa, Department of Computing Science,
+*     Umea University, S-901 87 Umea, Sweden.
+*
+*  References
+*  ==========
+*
+*  [1] B. Kagstrom; A Direct Method for Reordering Eigenvalues in the
+*      Generalized Real Schur Form of a Regular Matrix Pair (A, B), in
+*      M.S. Moonen et al (eds), Linear Algebra for Large Scale and
+*      Real-Time Applications, Kluwer Academic Publ. 1993, pp 195-218.
+*
+*  [2] B. Kagstrom and P. Poromaa; Computing Eigenspaces with Specified
+*      Eigenvalues of a Regular Matrix Pair (A, B) and Condition
+*      Estimation: Theory, Algorithms and Software, Report
+*      UMINF - 94.04, Department of Computing Science, Umea University,
+*      S-901 87 Umea, Sweden, 1994. Also as LAPACK Working Note 87.
+*      To appear in Numerical Algorithms, 1996.
+*
+*  [3] B. Kagstrom and P. Poromaa, LAPACK-Style Algorithms and Software
+*      for Solving the Generalized Sylvester Equation and Estimating the
+*      Separation between Regular Matrix Pairs, Report UMINF - 93.23,
+*      Department of Computing Science, Umea University, S-901 87 Umea,
+*      Sweden, December 1993, Revised April 1994, Also as LAPACK Working
+*      Note 75.
+*      To appear in ACM Trans. on Math. Software, Vol 22, No 1, 1996.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      INTEGER            IDIFJB
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0, IDIFJB = 3 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY, SOMCON, WANTBH, WANTDF, WANTS
+      INTEGER            I, IERR, IFST, ILST, K, KS, LLWRK, LWMIN,
+     $                   N1, N2
+      REAL               BIGNUM, COND, EPS, LNRM, RNRM, SCALE, SMLNUM
+      COMPLEX            YHAX, YHBX
+*     ..
+*     .. Local Arrays ..
+      COMPLEX            DUMMY( 1 ), DUMMY1( 1 )
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               SCNRM2, SLAMCH, SLAPY2
+      COMPLEX            CDOTC
+      EXTERNAL           LSAME, SCNRM2, SLAMCH, SLAPY2, CDOTC
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CGEMV, CLACPY, CTGEXC, CTGSYL, SLABAD, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, CMPLX, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode and test the input parameters
+*
+      WANTBH = LSAME( JOB, 'B' )
+      WANTS = LSAME( JOB, 'E' ) .OR. WANTBH
+      WANTDF = LSAME( JOB, 'V' ) .OR. WANTBH
+*
+      SOMCON = LSAME( HOWMNY, 'S' )
+*
+      INFO = 0
+      LQUERY = ( LWORK.EQ.-1 )
+*
+      IF( LSAME( JOB, 'V' ) .OR. LSAME( JOB, 'B' ) ) THEN
+         LWMIN = MAX( 1, 2*N*N )
+      ELSE
+         LWMIN = 1
+      END IF
+*
+      IF( .NOT.WANTS .AND. .NOT.WANTDF ) THEN
+         INFO = -1
+      ELSE IF( .NOT.LSAME( HOWMNY, 'A' ) .AND. .NOT.SOMCON ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -6
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -8
+      ELSE IF( WANTS .AND. LDVL.LT.N ) THEN
+         INFO = -10
+      ELSE IF( WANTS .AND. LDVR.LT.N ) THEN
+         INFO = -12
+      ELSE
+*
+*        Set M to the number of eigenpairs for which condition numbers
+*        are required, and test MM.
+*
+         IF( SOMCON ) THEN
+            M = 0
+            DO 10 K = 1, N
+               IF( SELECT( K ) )
+     $            M = M + 1
+   10       CONTINUE
+         ELSE
+            M = N
+         END IF
+*
+         IF( MM.LT.M ) THEN
+            INFO = -15
+         ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN
+            INFO = -18
+         END IF
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         WORK( 1 ) = LWMIN
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CTGSNA', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Get machine constants
+*
+      EPS = SLAMCH( 'P' )
+      SMLNUM = SLAMCH( 'S' ) / EPS
+      BIGNUM = ONE / SMLNUM
+      CALL SLABAD( SMLNUM, BIGNUM )
+      LLWRK = LWORK - 2*N*N
+      KS = 0
+      DO 20 K = 1, N
+*
+*        Determine whether condition numbers are required for the k-th
+*        eigenpair.
+*
+         IF( SOMCON ) THEN
+            IF( .NOT.SELECT( K ) )
+     $         GO TO 20
+         END IF
+*
+         KS = KS + 1
+*
+         IF( WANTS ) THEN
+*
+*           Compute the reciprocal condition number of the k-th
+*           eigenvalue.
+*
+            RNRM = SCNRM2( N, VR( 1, KS ), 1 )
+            LNRM = SCNRM2( N, VL( 1, KS ), 1 )
+            CALL CGEMV( 'N', N, N, CMPLX( ONE, ZERO ), A, LDA,
+     $                  VR( 1, KS ), 1, CMPLX( ZERO, ZERO ), WORK, 1 )
+            YHAX = CDOTC( N, WORK, 1, VL( 1, KS ), 1 )
+            CALL CGEMV( 'N', N, N, CMPLX( ONE, ZERO ), B, LDB,
+     $                  VR( 1, KS ), 1, CMPLX( ZERO, ZERO ), WORK, 1 )
+            YHBX = CDOTC( N, WORK, 1, VL( 1, KS ), 1 )
+            COND = SLAPY2( ABS( YHAX ), ABS( YHBX ) )
+            IF( COND.EQ.ZERO ) THEN
+               S( KS ) = -ONE
+            ELSE
+               S( KS ) = COND / ( RNRM*LNRM )
+            END IF
+         END IF
+*
+         IF( WANTDF ) THEN
+            IF( N.EQ.1 ) THEN
+               DIF( KS ) = SLAPY2( ABS( A( 1, 1 ) ), ABS( B( 1, 1 ) ) )
+               GO TO 20
+            END IF
+*
+*           Estimate the reciprocal condition number of the k-th
+*           eigenvectors.
+*
+*           Copy the matrix (A, B) to the array WORK and move the
+*           (k,k)th pair to the (1,1) position.
+*
+            CALL CLACPY( 'Full', N, N, A, LDA, WORK, N )
+            CALL CLACPY( 'Full', N, N, B, LDB, WORK( N*N+1 ), N )
+            IFST = K
+            ILST = 1
+*
+            CALL CTGEXC( .FALSE., .FALSE., N, WORK, N, WORK( N*N+1 ), N,
+     $                   DUMMY, 1, DUMMY1, 1, IFST, ILST, IERR )
+*
+            IF( IERR.GT.0 ) THEN
+*
+*              Ill-conditioned problem - swap rejected.
+*
+               DIF( KS ) = ZERO
+            ELSE
+*
+*              Reordering successful, solve generalized Sylvester
+*              equation for R and L,
+*                         A22 * R - L * A11 = A12
+*                         B22 * R - L * B11 = B12,
+*              and compute estimate of Difl[(A11,B11), (A22, B22)].
+*
+               N1 = 1
+               N2 = N - N1
+               I = N*N + 1
+               CALL CTGSYL( 'N', IDIFJB, N2, N1, WORK( N*N1+N1+1 ), N,
+     $                      WORK, N, WORK( N1+1 ), N, WORK( N*N1+N1+I ),
+     $                      N, WORK( I ), N, WORK( N1+I ), N, SCALE,
+     $                      DIF( KS ), WORK( N*N*2+1 ), LLWRK, IWORK,
+     $                      IERR )
+            END IF
+         END IF
+*
+   20 CONTINUE
+      WORK( 1 ) = LWMIN
+      RETURN
+*
+*     End of CTGSNA
+*
+      END
diff --git a/costa/native/external/lapack/ctgsy2.f b/costa/native/external/lapack/ctgsy2.f
new file mode 100644
index 000000000..575df2494
--- /dev/null
+++ b/costa/native/external/lapack/ctgsy2.f
@@ -0,0 +1,357 @@
+      SUBROUTINE CTGSY2( TRANS, IJOB, M, N, A, LDA, B, LDB, C, LDC, D,
+     $                   LDD, E, LDE, F, LDF, SCALE, RDSUM, RDSCAL,
+     $                   INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          TRANS
+      INTEGER            IJOB, INFO, LDA, LDB, LDC, LDD, LDE, LDF, M, N
+      REAL               RDSCAL, RDSUM, SCALE
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), B( LDB, * ), C( LDC, * ),
+     $                   D( LDD, * ), E( LDE, * ), F( LDF, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CTGSY2 solves the generalized Sylvester equation
+*
+*              A * R - L * B = scale *   C               (1)
+*              D * R - L * E = scale * F
+*
+*  using Level 1 and 2 BLAS, where R and L are unknown M-by-N matrices,
+*  (A, D), (B, E) and (C, F) are given matrix pairs of size M-by-M,
+*  N-by-N and M-by-N, respectively. A, B, D and E are upper triangular
+*  (i.e., (A,D) and (B,E) in generalized Schur form).
+*
+*  The solution (R, L) overwrites (C, F). 0 <= SCALE <= 1 is an output
+*  scaling factor chosen to avoid overflow.
+*
+*  In matrix notation solving equation (1) corresponds to solve
+*  Zx = scale * b, where Z is defined as
+*
+*         Z = [ kron(In, A)  -kron(B', Im) ]             (2)
+*             [ kron(In, D)  -kron(E', Im) ],
+*
+*  Ik is the identity matrix of size k and X' is the transpose of X.
+*  kron(X, Y) is the Kronecker product between the matrices X and Y.
+*
+*  If TRANS = 'C', y in the conjugate transposed system Z'y = scale*b
+*  is solved for, which is equivalent to solve for R and L in
+*
+*              A' * R  + D' * L   = scale *  C           (3)
+*              R  * B' + L  * E'  = scale * -F
+*
+*  This case is used to compute an estimate of Dif[(A, D), (B, E)] =
+*  = sigma_min(Z) using reverse communicaton with CLACON.
+*
+*  CTGSY2 also (IJOB >= 1) contributes to the computation in CTGSYL
+*  of an upper bound on the separation between to matrix pairs. Then
+*  the input (A, D), (B, E) are sub-pencils of two matrix pairs in
+*  CTGSYL.
+*
+*  Arguments
+*  =========
+*
+*  TRANS   (input) CHARACTER
+*          = 'N', solve the generalized Sylvester equation (1).
+*          = 'T': solve the 'transposed' system (3).
+*
+*  IJOB    (input) INTEGER
+*          Specifies what kind of functionality to be performed.
+*          =0: solve (1) only.
+*          =1: A contribution from this subsystem to a Frobenius
+*              norm-based estimate of the separation between two matrix
+*              pairs is computed. (look ahead strategy is used).
+*          =2: A contribution from this subsystem to a Frobenius
+*              norm-based estimate of the separation between two matrix
+*              pairs is computed. (SGECON on sub-systems is used.)
+*          Not referenced if TRANS = 'T'.
+*
+*  M       (input) INTEGER
+*          On entry, M specifies the order of A and D, and the row
+*          dimension of C, F, R and L.
+*
+*  N       (input) INTEGER
+*          On entry, N specifies the order of B and E, and the column
+*          dimension of C, F, R and L.
+*
+*  A       (input) COMPLEX array, dimension (LDA, M)
+*          On entry, A contains an upper triangular matrix.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the matrix A. LDA >= max(1, M).
+*
+*  B       (input) COMPLEX array, dimension (LDB, N)
+*          On entry, B contains an upper triangular matrix.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the matrix B. LDB >= max(1, N).
+*
+*  C       (input/ output) COMPLEX array, dimension (LDC, N)
+*          On entry, C contains the right-hand-side of the first matrix
+*          equation in (1).
+*          On exit, if IJOB = 0, C has been overwritten by the solution
+*          R.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the matrix C. LDC >= max(1, M).
+*
+*  D       (input) COMPLEX array, dimension (LDD, M)
+*          On entry, D contains an upper triangular matrix.
+*
+*  LDD     (input) INTEGER
+*          The leading dimension of the matrix D. LDD >= max(1, M).
+*
+*  E       (input) COMPLEX array, dimension (LDE, N)
+*          On entry, E contains an upper triangular matrix.
+*
+*  LDE     (input) INTEGER
+*          The leading dimension of the matrix E. LDE >= max(1, N).
+*
+*  F       (input/ output) COMPLEX array, dimension (LDF, N)
+*          On entry, F contains the right-hand-side of the second matrix
+*          equation in (1).
+*          On exit, if IJOB = 0, F has been overwritten by the solution
+*          L.
+*
+*  LDF     (input) INTEGER
+*          The leading dimension of the matrix F. LDF >= max(1, M).
+*
+*  SCALE   (output) REAL
+*          On exit, 0 <= SCALE <= 1. If 0 < SCALE < 1, the solutions
+*          R and L (C and F on entry) will hold the solutions to a
+*          slightly perturbed system but the input matrices A, B, D and
+*          E have not been changed. If SCALE = 0, R and L will hold the
+*          solutions to the homogeneous system with C = F = 0.
+*          Normally, SCALE = 1.
+*
+*  RDSUM   (input/output) REAL
+*          On entry, the sum of squares of computed contributions to
+*          the Dif-estimate under computation by CTGSYL, where the
+*          scaling factor RDSCAL (see below) has been factored out.
+*          On exit, the corresponding sum of squares updated with the
+*          contributions from the current sub-system.
+*          If TRANS = 'T' RDSUM is not touched.
+*          NOTE: RDSUM only makes sense when CTGSY2 is called by
+*          CTGSYL.
+*
+*  RDSCAL  (input/output) REAL
+*          On entry, scaling factor used to prevent overflow in RDSUM.
+*          On exit, RDSCAL is updated w.r.t. the current contributions
+*          in RDSUM.
+*          If TRANS = 'T', RDSCAL is not touched.
+*          NOTE: RDSCAL only makes sense when CTGSY2 is called by
+*          CTGSYL.
+*
+*  INFO    (output) INTEGER
+*          On exit, if INFO is set to
+*            =0: Successful exit
+*            <0: If INFO = -i, input argument number i is illegal.
+*            >0: The matrix pairs (A, D) and (B, E) have common or very
+*                close eigenvalues.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Bo Kagstrom and Peter Poromaa, Department of Computing Science,
+*     Umea University, S-901 87 Umea, Sweden.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      INTEGER            LDZ
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0, LDZ = 2 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOTRAN
+      INTEGER            I, IERR, J, K
+      REAL               SCALOC
+      COMPLEX            ALPHA
+*     ..
+*     .. Local Arrays ..
+      INTEGER            IPIV( LDZ ), JPIV( LDZ )
+      COMPLEX            RHS( LDZ ), Z( LDZ, LDZ )
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CAXPY, CGESC2, CGETC2, CSCAL, CLATDF, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          CMPLX, CONJG, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode and test input parameters
+*
+      INFO = 0
+      IERR = 0
+      NOTRAN = LSAME( TRANS, 'N' )
+      IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN
+         INFO = -1
+      ELSE IF( ( IJOB.LT.0 ) .OR. ( IJOB.GT.2 ) ) THEN
+         INFO = -2
+      ELSE IF( M.LE.0 ) THEN
+         INFO = -3
+      ELSE IF( N.LE.0 ) THEN
+         INFO = -4
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -8
+      ELSE IF( LDC.LT.MAX( 1, M ) ) THEN
+         INFO = -10
+      ELSE IF( LDD.LT.MAX( 1, M ) ) THEN
+         INFO = -12
+      ELSE IF( LDE.LT.MAX( 1, N ) ) THEN
+         INFO = -14
+      ELSE IF( LDF.LT.MAX( 1, M ) ) THEN
+         INFO = -16
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CTGSY2', -INFO )
+         RETURN
+      END IF
+*
+      IF( NOTRAN ) THEN
+*
+*        Solve (I, J) - system
+*           A(I, I) * R(I, J) - L(I, J) * B(J, J) = C(I, J)
+*           D(I, I) * R(I, J) - L(I, J) * E(J, J) = F(I, J)
+*        for I = M, M - 1, ..., 1; J = 1, 2, ..., N
+*
+         SCALE = ONE
+         SCALOC = ONE
+         DO 30 J = 1, N
+            DO 20 I = M, 1, -1
+*
+*              Build 2 by 2 system
+*
+               Z( 1, 1 ) = A( I, I )
+               Z( 2, 1 ) = D( I, I )
+               Z( 1, 2 ) = -B( J, J )
+               Z( 2, 2 ) = -E( J, J )
+*
+*              Set up right hand side(s)
+*
+               RHS( 1 ) = C( I, J )
+               RHS( 2 ) = F( I, J )
+*
+*              Solve Z * x = RHS
+*
+               CALL CGETC2( LDZ, Z, LDZ, IPIV, JPIV, IERR )
+               IF( IERR.GT.0 )
+     $            INFO = IERR
+               IF( IJOB.EQ.0 ) THEN
+                  CALL CGESC2( LDZ, Z, LDZ, RHS, IPIV, JPIV, SCALOC )
+                  IF( SCALOC.NE.ONE ) THEN
+                     DO 10 K = 1, N
+                        CALL CSCAL( M, CMPLX( SCALOC, ZERO ), C( 1, K ),
+     $                              1 )
+                        CALL CSCAL( M, CMPLX( SCALOC, ZERO ), F( 1, K ),
+     $                              1 )
+   10                CONTINUE
+                     SCALE = SCALE*SCALOC
+                  END IF
+               ELSE
+                  CALL CLATDF( IJOB, LDZ, Z, LDZ, RHS, RDSUM, RDSCAL,
+     $                         IPIV, JPIV )
+               END IF
+*
+*              Unpack solution vector(s)
+*
+               C( I, J ) = RHS( 1 )
+               F( I, J ) = RHS( 2 )
+*
+*              Substitute R(I, J) and L(I, J) into remaining equation.
+*
+               IF( I.GT.1 ) THEN
+                  ALPHA = -RHS( 1 )
+                  CALL CAXPY( I-1, ALPHA, A( 1, I ), 1, C( 1, J ), 1 )
+                  CALL CAXPY( I-1, ALPHA, D( 1, I ), 1, F( 1, J ), 1 )
+               END IF
+               IF( J.LT.N ) THEN
+                  CALL CAXPY( N-J, RHS( 2 ), B( J, J+1 ), LDB,
+     $                        C( I, J+1 ), LDC )
+                  CALL CAXPY( N-J, RHS( 2 ), E( J, J+1 ), LDE,
+     $                        F( I, J+1 ), LDF )
+               END IF
+*
+   20       CONTINUE
+   30    CONTINUE
+      ELSE
+*
+*        Solve transposed (I, J) - system:
+*           A(I, I)' * R(I, J) + D(I, I)' * L(J, J) = C(I, J)
+*           R(I, I) * B(J, J) + L(I, J) * E(J, J)   = -F(I, J)
+*        for I = 1, 2, ..., M, J = N, N - 1, ..., 1
+*
+         SCALE = ONE
+         SCALOC = ONE
+         DO 80 I = 1, M
+            DO 70 J = N, 1, -1
+*
+*              Build 2 by 2 system Z'
+*
+               Z( 1, 1 ) = CONJG( A( I, I ) )
+               Z( 2, 1 ) = -CONJG( B( J, J ) )
+               Z( 1, 2 ) = CONJG( D( I, I ) )
+               Z( 2, 2 ) = -CONJG( E( J, J ) )
+*
+*
+*              Set up right hand side(s)
+*
+               RHS( 1 ) = C( I, J )
+               RHS( 2 ) = F( I, J )
+*
+*              Solve Z' * x = RHS
+*
+               CALL CGETC2( LDZ, Z, LDZ, IPIV, JPIV, IERR )
+               IF( IERR.GT.0 )
+     $            INFO = IERR
+               CALL CGESC2( LDZ, Z, LDZ, RHS, IPIV, JPIV, SCALOC )
+               IF( SCALOC.NE.ONE ) THEN
+                  DO 40 K = 1, N
+                     CALL CSCAL( M, CMPLX( SCALOC, ZERO ), C( 1, K ),
+     $                           1 )
+                     CALL CSCAL( M, CMPLX( SCALOC, ZERO ), F( 1, K ),
+     $                           1 )
+   40             CONTINUE
+                  SCALE = SCALE*SCALOC
+               END IF
+*
+*              Unpack solution vector(s)
+*
+               C( I, J ) = RHS( 1 )
+               F( I, J ) = RHS( 2 )
+*
+*              Substitute R(I, J) and L(I, J) into remaining equation.
+*
+               DO 50 K = 1, J - 1
+                  F( I, K ) = F( I, K ) + RHS( 1 )*CONJG( B( K, J ) ) +
+     $                        RHS( 2 )*CONJG( E( K, J ) )
+   50          CONTINUE
+               DO 60 K = I + 1, M
+                  C( K, J ) = C( K, J ) - CONJG( A( I, K ) )*RHS( 1 ) -
+     $                        CONJG( D( I, K ) )*RHS( 2 )
+   60          CONTINUE
+*
+   70       CONTINUE
+   80    CONTINUE
+      END IF
+      RETURN
+*
+*     End of CTGSY2
+*
+      END
diff --git a/costa/native/external/lapack/ctgsyl.f b/costa/native/external/lapack/ctgsyl.f
new file mode 100644
index 000000000..f7cdb98be
--- /dev/null
+++ b/costa/native/external/lapack/ctgsyl.f
@@ -0,0 +1,548 @@
+      SUBROUTINE CTGSYL( TRANS, IJOB, M, N, A, LDA, B, LDB, C, LDC, D,
+     $                   LDD, E, LDE, F, LDF, SCALE, DIF, WORK, LWORK,
+     $                   IWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          TRANS
+      INTEGER            IJOB, INFO, LDA, LDB, LDC, LDD, LDE, LDF,
+     $                   LWORK, M, N
+      REAL               DIF, SCALE
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      COMPLEX            A( LDA, * ), B( LDB, * ), C( LDC, * ),
+     $                   D( LDD, * ), E( LDE, * ), F( LDF, * ),
+     $                   WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CTGSYL solves the generalized Sylvester equation:
+*
+*              A * R - L * B = scale * C            (1)
+*              D * R - L * E = scale * F
+*
+*  where R and L are unknown m-by-n matrices, (A, D), (B, E) and
+*  (C, F) are given matrix pairs of size m-by-m, n-by-n and m-by-n,
+*  respectively, with complex entries. A, B, D and E are upper
+*  triangular (i.e., (A,D) and (B,E) in generalized Schur form).
+*
+*  The solution (R, L) overwrites (C, F). 0 <= SCALE <= 1
+*  is an output scaling factor chosen to avoid overflow.
+*
+*  In matrix notation (1) is equivalent to solve Zx = scale*b, where Z
+*  is defined as
+*
+*         Z = [ kron(In, A)  -kron(B', Im) ]        (2)
+*             [ kron(In, D)  -kron(E', Im) ],
+*
+*  Here Ix is the identity matrix of size x and X' is the conjugate
+*  transpose of X. Kron(X, Y) is the Kronecker product between the
+*  matrices X and Y.
+*
+*  If TRANS = 'C', y in the conjugate transposed system Z'*y = scale*b
+*  is solved for, which is equivalent to solve for R and L in
+*
+*              A' * R + D' * L = scale * C           (3)
+*              R * B' + L * E' = scale * -F
+*
+*  This case (TRANS = 'C') is used to compute an one-norm-based estimate
+*  of Dif[(A,D), (B,E)], the separation between the matrix pairs (A,D)
+*  and (B,E), using CLACON.
+*
+*  If IJOB >= 1, CTGSYL computes a Frobenius norm-based estimate of
+*  Dif[(A,D),(B,E)]. That is, the reciprocal of a lower bound on the
+*  reciprocal of the smallest singular value of Z.
+*
+*  This is a level-3 BLAS algorithm.
+*
+*  Arguments
+*  =========
+*
+*  TRANS   (input) CHARACTER*1
+*          = 'N': solve the generalized sylvester equation (1).
+*          = 'C': solve the "conjugate transposed" system (3).
+*
+*  IJOB    (input) INTEGER
+*          Specifies what kind of functionality to be performed.
+*          =0: solve (1) only.
+*          =1: The functionality of 0 and 3.
+*          =2: The functionality of 0 and 4.
+*          =3: Only an estimate of Dif[(A,D), (B,E)] is computed.
+*              (look ahead strategy is used).
+*          =4: Only an estimate of Dif[(A,D), (B,E)] is computed.
+*              (CGECON on sub-systems is used).
+*          Not referenced if TRANS = 'C'.
+*
+*  M       (input) INTEGER
+*          The order of the matrices A and D, and the row dimension of
+*          the matrices C, F, R and L.
+*
+*  N       (input) INTEGER
+*          The order of the matrices B and E, and the column dimension
+*          of the matrices C, F, R and L.
+*
+*  A       (input) COMPLEX array, dimension (LDA, M)
+*          The upper triangular matrix A.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1, M).
+*
+*  B       (input) COMPLEX array, dimension (LDB, N)
+*          The upper triangular matrix B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B. LDB >= max(1, N).
+*
+*  C       (input/output) COMPLEX array, dimension (LDC, N)
+*          On entry, C contains the right-hand-side of the first matrix
+*          equation in (1) or (3).
+*          On exit, if IJOB = 0, 1 or 2, C has been overwritten by
+*          the solution R. If IJOB = 3 or 4 and TRANS = 'N', C holds R,
+*          the solution achieved during the computation of the
+*          Dif-estimate.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C. LDC >= max(1, M).
+*
+*  D       (input) COMPLEX array, dimension (LDD, M)
+*          The upper triangular matrix D.
+*
+*  LDD     (input) INTEGER
+*          The leading dimension of the array D. LDD >= max(1, M).
+*
+*  E       (input) COMPLEX array, dimension (LDE, N)
+*          The upper triangular matrix E.
+*
+*  LDE     (input) INTEGER
+*          The leading dimension of the array E. LDE >= max(1, N).
+*
+*  F       (input/output) COMPLEX array, dimension (LDF, N)
+*          On entry, F contains the right-hand-side of the second matrix
+*          equation in (1) or (3).
+*          On exit, if IJOB = 0, 1 or 2, F has been overwritten by
+*          the solution L. If IJOB = 3 or 4 and TRANS = 'N', F holds L,
+*          the solution achieved during the computation of the
+*          Dif-estimate.
+*
+*  LDF     (input) INTEGER
+*          The leading dimension of the array F. LDF >= max(1, M).
+*
+*  DIF     (output) REAL
+*          On exit DIF is the reciprocal of a lower bound of the
+*          reciprocal of the Dif-function, i.e. DIF is an upper bound of
+*          Dif[(A,D), (B,E)] = sigma-min(Z), where Z as in (2).
+*          IF IJOB = 0 or TRANS = 'C', DIF is not referenced.
+*
+*  SCALE   (output) REAL
+*          On exit SCALE is the scaling factor in (1) or (3).
+*          If 0 < SCALE < 1, C and F hold the solutions R and L, resp.,
+*          to a slightly perturbed system but the input matrices A, B,
+*          D and E have not been changed. If SCALE = 0, R and L will
+*          hold the solutions to the homogenious system with C = F = 0.
+*
+*  WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+*          IF IJOB = 0, WORK is not referenced.  Otherwise,
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK. LWORK > = 1.
+*          If IJOB = 1 or 2 and TRANS = 'N', LWORK >= 2*M*N.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  IWORK   (workspace) INTEGER array, dimension (M+N+2)
+*          If IJOB = 0, IWORK is not referenced.
+*
+*  INFO    (output) INTEGER
+*            =0: successful exit
+*            <0: If INFO = -i, the i-th argument had an illegal value.
+*            >0: (A, D) and (B, E) have common or very close
+*                eigenvalues.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Bo Kagstrom and Peter Poromaa, Department of Computing Science,
+*     Umea University, S-901 87 Umea, Sweden.
+*
+*  [1] B. Kagstrom and P. Poromaa, LAPACK-Style Algorithms and Software
+*      for Solving the Generalized Sylvester Equation and Estimating the
+*      Separation between Regular Matrix Pairs, Report UMINF - 93.23,
+*      Department of Computing Science, Umea University, S-901 87 Umea,
+*      Sweden, December 1993, Revised April 1994, Also as LAPACK Working
+*      Note 75.  To appear in ACM Trans. on Math. Software, Vol 22,
+*      No 1, 1996.
+*
+*  [2] B. Kagstrom, A Perturbation Analysis of the Generalized Sylvester
+*      Equation (AR - LB, DR - LE ) = (C, F), SIAM J. Matrix Anal.
+*      Appl., 15(4):1045-1060, 1994.
+*
+*  [3] B. Kagstrom and L. Westin, Generalized Schur Methods with
+*      Condition Estimators for Solving the Generalized Sylvester
+*      Equation, IEEE Transactions on Automatic Control, Vol. 34, No. 7,
+*      July 1989, pp 745-751.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY, NOTRAN
+      INTEGER            I, IE, IFUNC, IROUND, IS, ISOLVE, J, JE, JS, K,
+     $                   LINFO, LWMIN, MB, NB, P, PQ, Q
+      REAL               DSCALE, DSUM, SCALE2, SCALOC
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           LSAME, ILAENV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CCOPY, CGEMM, CLACPY, CSCAL, CTGSY2, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          CMPLX, MAX, REAL, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode and test input parameters
+*
+      INFO = 0
+      NOTRAN = LSAME( TRANS, 'N' )
+      LQUERY = ( LWORK.EQ.-1 )
+*
+      IF( ( IJOB.EQ.1 .OR. IJOB.EQ.2 ) .AND. NOTRAN ) THEN
+         LWMIN = MAX( 1, 2*M*N )
+      ELSE
+         LWMIN = 1
+      END IF
+*
+      IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN
+         INFO = -1
+      ELSE IF( ( IJOB.LT.0 ) .OR. ( IJOB.GT.4 ) ) THEN
+         INFO = -2
+      ELSE IF( M.LE.0 ) THEN
+         INFO = -3
+      ELSE IF( N.LE.0 ) THEN
+         INFO = -4
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -6
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -8
+      ELSE IF( LDC.LT.MAX( 1, M ) ) THEN
+         INFO = -10
+      ELSE IF( LDD.LT.MAX( 1, M ) ) THEN
+         INFO = -12
+      ELSE IF( LDE.LT.MAX( 1, N ) ) THEN
+         INFO = -14
+      ELSE IF( LDF.LT.MAX( 1, M ) ) THEN
+         INFO = -16
+      ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -20
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         WORK( 1 ) = LWMIN
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CTGSYL', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Determine  optimal block sizes MB and NB
+*
+      MB = ILAENV( 2, 'CTGSYL', TRANS, M, N, -1, -1 )
+      NB = ILAENV( 5, 'CTGSYL', TRANS, M, N, -1, -1 )
+*
+      ISOLVE = 1
+      IFUNC = 0
+      IF( IJOB.GE.3 .AND. NOTRAN ) THEN
+         IFUNC = IJOB - 2
+         DO 10 J = 1, N
+            CALL CCOPY( M, CMPLX( ZERO, ZERO ), 0, C( 1, J ), 1 )
+            CALL CCOPY( M, CMPLX( ZERO, ZERO ), 0, F( 1, J ), 1 )
+   10    CONTINUE
+      ELSE IF( IJOB.GE.1 .AND. NOTRAN ) THEN
+         ISOLVE = 2
+      END IF
+*
+      IF( ( MB.LE.1 .AND. NB.LE.1 ) .OR. ( MB.GE.M .AND. NB.GE.N ) )
+     $     THEN
+*
+*        Use unblocked Level 2 solver
+*
+         DO 30 IROUND = 1, ISOLVE
+*
+            SCALE = ONE
+            DSCALE = ZERO
+            DSUM = ONE
+            PQ = M*N
+            CALL CTGSY2( TRANS, IFUNC, M, N, A, LDA, B, LDB, C, LDC, D,
+     $                   LDD, E, LDE, F, LDF, SCALE, DSUM, DSCALE,
+     $                   INFO )
+            IF( DSCALE.NE.ZERO ) THEN
+               IF( IJOB.EQ.1 .OR. IJOB.EQ.3 ) THEN
+                  DIF = SQRT( REAL( 2*M*N ) ) / ( DSCALE*SQRT( DSUM ) )
+               ELSE
+                  DIF = SQRT( REAL( PQ ) ) / ( DSCALE*SQRT( DSUM ) )
+               END IF
+            END IF
+            IF( ISOLVE.EQ.2 .AND. IROUND.EQ.1 ) THEN
+               IFUNC = IJOB
+               SCALE2 = SCALE
+               CALL CLACPY( 'F', M, N, C, LDC, WORK, M )
+               CALL CLACPY( 'F', M, N, F, LDF, WORK( M*N+1 ), M )
+               DO 20 J = 1, N
+                  CALL CCOPY( M, CMPLX( ZERO, ZERO ), 0, C( 1, J ), 1 )
+                  CALL CCOPY( M, CMPLX( ZERO, ZERO ), 0, F( 1, J ), 1 )
+   20          CONTINUE
+            ELSE IF( ISOLVE.EQ.2 .AND. IROUND.EQ.2 ) THEN
+               CALL CLACPY( 'F', M, N, WORK, M, C, LDC )
+               CALL CLACPY( 'F', M, N, WORK( M*N+1 ), M, F, LDF )
+               SCALE = SCALE2
+            END IF
+   30    CONTINUE
+*
+         RETURN
+*
+      END IF
+*
+*     Determine block structure of A
+*
+      P = 0
+      I = 1
+   40 CONTINUE
+      IF( I.GT.M )
+     $   GO TO 50
+      P = P + 1
+      IWORK( P ) = I
+      I = I + MB
+      IF( I.GE.M )
+     $   GO TO 50
+      GO TO 40
+   50 CONTINUE
+      IWORK( P+1 ) = M + 1
+      IF( IWORK( P ).EQ.IWORK( P+1 ) )
+     $   P = P - 1
+*
+*     Determine block structure of B
+*
+      Q = P + 1
+      J = 1
+   60 CONTINUE
+      IF( J.GT.N )
+     $   GO TO 70
+*
+      Q = Q + 1
+      IWORK( Q ) = J
+      J = J + NB
+      IF( J.GE.N )
+     $   GO TO 70
+      GO TO 60
+*
+   70 CONTINUE
+      IWORK( Q+1 ) = N + 1
+      IF( IWORK( Q ).EQ.IWORK( Q+1 ) )
+     $   Q = Q - 1
+*
+      IF( NOTRAN ) THEN
+         DO 150 IROUND = 1, ISOLVE
+*
+*           Solve (I, J) - subsystem
+*               A(I, I) * R(I, J) - L(I, J) * B(J, J) = C(I, J)
+*               D(I, I) * R(I, J) - L(I, J) * E(J, J) = F(I, J)
+*           for I = P, P - 1, ..., 1; J = 1, 2, ..., Q
+*
+            PQ = 0
+            SCALE = ONE
+            DSCALE = ZERO
+            DSUM = ONE
+            DO 130 J = P + 2, Q
+               JS = IWORK( J )
+               JE = IWORK( J+1 ) - 1
+               NB = JE - JS + 1
+               DO 120 I = P, 1, -1
+                  IS = IWORK( I )
+                  IE = IWORK( I+1 ) - 1
+                  MB = IE - IS + 1
+                  CALL CTGSY2( TRANS, IFUNC, MB, NB, A( IS, IS ), LDA,
+     $                         B( JS, JS ), LDB, C( IS, JS ), LDC,
+     $                         D( IS, IS ), LDD, E( JS, JS ), LDE,
+     $                         F( IS, JS ), LDF, SCALOC, DSUM, DSCALE,
+     $                         LINFO )
+                  IF( LINFO.GT.0 )
+     $               INFO = LINFO
+                  PQ = PQ + MB*NB
+                  IF( SCALOC.NE.ONE ) THEN
+                     DO 80 K = 1, JS - 1
+                        CALL CSCAL( M, CMPLX( SCALOC, ZERO ), C( 1, K ),
+     $                              1 )
+                        CALL CSCAL( M, CMPLX( SCALOC, ZERO ), F( 1, K ),
+     $                              1 )
+   80                CONTINUE
+                     DO 90 K = JS, JE
+                        CALL CSCAL( IS-1, CMPLX( SCALOC, ZERO ),
+     $                              C( 1, K ), 1 )
+                        CALL CSCAL( IS-1, CMPLX( SCALOC, ZERO ),
+     $                              F( 1, K ), 1 )
+   90                CONTINUE
+                     DO 100 K = JS, JE
+                        CALL CSCAL( M-IE, CMPLX( SCALOC, ZERO ),
+     $                              C( IE+1, K ), 1 )
+                        CALL CSCAL( M-IE, CMPLX( SCALOC, ZERO ),
+     $                              F( IE+1, K ), 1 )
+  100                CONTINUE
+                     DO 110 K = JE + 1, N
+                        CALL CSCAL( M, CMPLX( SCALOC, ZERO ), C( 1, K ),
+     $                              1 )
+                        CALL CSCAL( M, CMPLX( SCALOC, ZERO ), F( 1, K ),
+     $                              1 )
+  110                CONTINUE
+                     SCALE = SCALE*SCALOC
+                  END IF
+*
+*                 Substitute R(I,J) and L(I,J) into remaining equation.
+*
+                  IF( I.GT.1 ) THEN
+                     CALL CGEMM( 'N', 'N', IS-1, NB, MB,
+     $                           CMPLX( -ONE, ZERO ), A( 1, IS ), LDA,
+     $                           C( IS, JS ), LDC, CMPLX( ONE, ZERO ),
+     $                           C( 1, JS ), LDC )
+                     CALL CGEMM( 'N', 'N', IS-1, NB, MB,
+     $                           CMPLX( -ONE, ZERO ), D( 1, IS ), LDD,
+     $                           C( IS, JS ), LDC, CMPLX( ONE, ZERO ),
+     $                           F( 1, JS ), LDF )
+                  END IF
+                  IF( J.LT.Q ) THEN
+                     CALL CGEMM( 'N', 'N', MB, N-JE, NB,
+     $                           CMPLX( ONE, ZERO ), F( IS, JS ), LDF,
+     $                           B( JS, JE+1 ), LDB, CMPLX( ONE, ZERO ),
+     $                           C( IS, JE+1 ), LDC )
+                     CALL CGEMM( 'N', 'N', MB, N-JE, NB,
+     $                           CMPLX( ONE, ZERO ), F( IS, JS ), LDF,
+     $                           E( JS, JE+1 ), LDE, CMPLX( ONE, ZERO ),
+     $                           F( IS, JE+1 ), LDF )
+                  END IF
+  120          CONTINUE
+  130       CONTINUE
+            IF( DSCALE.NE.ZERO ) THEN
+               IF( IJOB.EQ.1 .OR. IJOB.EQ.3 ) THEN
+                  DIF = SQRT( REAL( 2*M*N ) ) / ( DSCALE*SQRT( DSUM ) )
+               ELSE
+                  DIF = SQRT( REAL( PQ ) ) / ( DSCALE*SQRT( DSUM ) )
+               END IF
+            END IF
+            IF( ISOLVE.EQ.2 .AND. IROUND.EQ.1 ) THEN
+               IFUNC = IJOB
+               SCALE2 = SCALE
+               CALL CLACPY( 'F', M, N, C, LDC, WORK, M )
+               CALL CLACPY( 'F', M, N, F, LDF, WORK( M*N+1 ), M )
+               DO 140 J = 1, N
+                  CALL CCOPY( M, CMPLX( ZERO, ZERO ), 0, C( 1, J ), 1 )
+                  CALL CCOPY( M, CMPLX( ZERO, ZERO ), 0, F( 1, J ), 1 )
+  140          CONTINUE
+            ELSE IF( ISOLVE.EQ.2 .AND. IROUND.EQ.2 ) THEN
+               CALL CLACPY( 'F', M, N, WORK, M, C, LDC )
+               CALL CLACPY( 'F', M, N, WORK( M*N+1 ), M, F, LDF )
+               SCALE = SCALE2
+            END IF
+  150    CONTINUE
+      ELSE
+*
+*        Solve transposed (I, J)-subsystem
+*            A(I, I)' * R(I, J) + D(I, I)' * L(I, J) = C(I, J)
+*            R(I, J) * B(J, J)  + L(I, J) * E(J, J) = -F(I, J)
+*        for I = 1,2,..., P; J = Q, Q-1,..., 1
+*
+         SCALE = ONE
+         DO 210 I = 1, P
+            IS = IWORK( I )
+            IE = IWORK( I+1 ) - 1
+            MB = IE - IS + 1
+            DO 200 J = Q, P + 2, -1
+               JS = IWORK( J )
+               JE = IWORK( J+1 ) - 1
+               NB = JE - JS + 1
+               CALL CTGSY2( TRANS, IFUNC, MB, NB, A( IS, IS ), LDA,
+     $                      B( JS, JS ), LDB, C( IS, JS ), LDC,
+     $                      D( IS, IS ), LDD, E( JS, JS ), LDE,
+     $                      F( IS, JS ), LDF, SCALOC, DSUM, DSCALE,
+     $                      LINFO )
+               IF( LINFO.GT.0 )
+     $            INFO = LINFO
+               IF( SCALOC.NE.ONE ) THEN
+                  DO 160 K = 1, JS - 1
+                     CALL CSCAL( M, CMPLX( SCALOC, ZERO ), C( 1, K ),
+     $                           1 )
+                     CALL CSCAL( M, CMPLX( SCALOC, ZERO ), F( 1, K ),
+     $                           1 )
+  160             CONTINUE
+                  DO 170 K = JS, JE
+                     CALL CSCAL( IS-1, CMPLX( SCALOC, ZERO ), C( 1, K ),
+     $                           1 )
+                     CALL CSCAL( IS-1, CMPLX( SCALOC, ZERO ), F( 1, K ),
+     $                           1 )
+  170             CONTINUE
+                  DO 180 K = JS, JE
+                     CALL CSCAL( M-IE, CMPLX( SCALOC, ZERO ),
+     $                           C( IE+1, K ), 1 )
+                     CALL CSCAL( M-IE, CMPLX( SCALOC, ZERO ),
+     $                           F( IE+1, K ), 1 )
+  180             CONTINUE
+                  DO 190 K = JE + 1, N
+                     CALL CSCAL( M, CMPLX( SCALOC, ZERO ), C( 1, K ),
+     $                           1 )
+                     CALL CSCAL( M, CMPLX( SCALOC, ZERO ), F( 1, K ),
+     $                           1 )
+  190             CONTINUE
+                  SCALE = SCALE*SCALOC
+               END IF
+*
+*              Substitute R(I,J) and L(I,J) into remaining equation.
+*
+               IF( J.GT.P+2 ) THEN
+                  CALL CGEMM( 'N', 'C', MB, JS-1, NB,
+     $                        CMPLX( ONE, ZERO ), C( IS, JS ), LDC,
+     $                        B( 1, JS ), LDB, CMPLX( ONE, ZERO ),
+     $                        F( IS, 1 ), LDF )
+                  CALL CGEMM( 'N', 'C', MB, JS-1, NB,
+     $                        CMPLX( ONE, ZERO ), F( IS, JS ), LDF,
+     $                        E( 1, JS ), LDE, CMPLX( ONE, ZERO ),
+     $                        F( IS, 1 ), LDF )
+               END IF
+               IF( I.LT.P ) THEN
+                  CALL CGEMM( 'C', 'N', M-IE, NB, MB,
+     $                        CMPLX( -ONE, ZERO ), A( IS, IE+1 ), LDA,
+     $                        C( IS, JS ), LDC, CMPLX( ONE, ZERO ),
+     $                        C( IE+1, JS ), LDC )
+                  CALL CGEMM( 'C', 'N', M-IE, NB, MB,
+     $                        CMPLX( -ONE, ZERO ), D( IS, IE+1 ), LDD,
+     $                        F( IS, JS ), LDF, CMPLX( ONE, ZERO ),
+     $                        C( IE+1, JS ), LDC )
+               END IF
+  200       CONTINUE
+  210    CONTINUE
+      END IF
+*
+      WORK( 1 ) = LWMIN
+*
+      RETURN
+*
+*     End of CTGSYL
+*
+      END
diff --git a/costa/native/external/lapack/ctpcon.f b/costa/native/external/lapack/ctpcon.f
new file mode 100644
index 000000000..46982d397
--- /dev/null
+++ b/costa/native/external/lapack/ctpcon.f
@@ -0,0 +1,194 @@
+      SUBROUTINE CTPCON( NORM, UPLO, DIAG, N, AP, RCOND, WORK, RWORK,
+     $                   INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993 
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIAG, NORM, UPLO
+      INTEGER            INFO, N
+      REAL               RCOND
+*     ..
+*     .. Array Arguments ..
+      REAL               RWORK( * )
+      COMPLEX            AP( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CTPCON estimates the reciprocal of the condition number of a packed
+*  triangular matrix A, in either the 1-norm or the infinity-norm.
+*
+*  The norm of A is computed and an estimate is obtained for
+*  norm(inv(A)), then the reciprocal of the condition number is
+*  computed as
+*     RCOND = 1 / ( norm(A) * norm(inv(A)) ).
+*
+*  Arguments
+*  =========
+*
+*  NORM    (input) CHARACTER*1
+*          Specifies whether the 1-norm condition number or the
+*          infinity-norm condition number is required:
+*          = '1' or 'O':  1-norm;
+*          = 'I':         Infinity-norm.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  A is upper triangular;
+*          = 'L':  A is lower triangular.
+*
+*  DIAG    (input) CHARACTER*1
+*          = 'N':  A is non-unit triangular;
+*          = 'U':  A is unit triangular.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  AP      (input) COMPLEX array, dimension (N*(N+1)/2)
+*          The upper or lower triangular matrix A, packed columnwise in
+*          a linear array.  The j-th column of A is stored in the array
+*          AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n.
+*          If DIAG = 'U', the diagonal elements of A are not referenced
+*          and are assumed to be 1.
+*
+*  RCOND   (output) REAL
+*          The reciprocal of the condition number of the matrix A,
+*          computed as RCOND = 1/(norm(A) * norm(inv(A))).
+*
+*  WORK    (workspace) COMPLEX array, dimension (2*N)
+*
+*  RWORK   (workspace) REAL array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOUNIT, ONENRM, UPPER
+      CHARACTER          NORMIN
+      INTEGER            IX, KASE, KASE1
+      REAL               AINVNM, ANORM, SCALE, SMLNUM, XNORM
+      COMPLEX            ZDUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ICAMAX
+      REAL               CLANTP, SLAMCH
+      EXTERNAL           LSAME, ICAMAX, CLANTP, SLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CLACON, CLATPS, CSRSCL, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, AIMAG, MAX, REAL
+*     ..
+*     .. Statement Functions ..
+      REAL               CABS1
+*     ..
+*     .. Statement Function definitions ..
+      CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      ONENRM = NORM.EQ.'1' .OR. LSAME( NORM, 'O' )
+      NOUNIT = LSAME( DIAG, 'N' )
+*
+      IF( .NOT.ONENRM .AND. .NOT.LSAME( NORM, 'I' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CTPCON', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 ) THEN
+         RCOND = ONE
+         RETURN
+      END IF
+*
+      RCOND = ZERO
+      SMLNUM = SLAMCH( 'Safe minimum' )*REAL( MAX( 1, N ) )
+*
+*     Compute the norm of the triangular matrix A.
+*
+      ANORM = CLANTP( NORM, UPLO, DIAG, N, AP, RWORK )
+*
+*     Continue only if ANORM > 0.
+*
+      IF( ANORM.GT.ZERO ) THEN
+*
+*        Estimate the norm of the inverse of A.
+*
+         AINVNM = ZERO
+         NORMIN = 'N'
+         IF( ONENRM ) THEN
+            KASE1 = 1
+         ELSE
+            KASE1 = 2
+         END IF
+         KASE = 0
+   10    CONTINUE
+         CALL CLACON( N, WORK( N+1 ), WORK, AINVNM, KASE )
+         IF( KASE.NE.0 ) THEN
+            IF( KASE.EQ.KASE1 ) THEN
+*
+*              Multiply by inv(A).
+*
+               CALL CLATPS( UPLO, 'No transpose', DIAG, NORMIN, N, AP,
+     $                      WORK, SCALE, RWORK, INFO )
+            ELSE
+*
+*              Multiply by inv(A').
+*
+               CALL CLATPS( UPLO, 'Conjugate transpose', DIAG, NORMIN,
+     $                      N, AP, WORK, SCALE, RWORK, INFO )
+            END IF
+            NORMIN = 'Y'
+*
+*           Multiply by 1/SCALE if doing so will not cause overflow.
+*
+            IF( SCALE.NE.ONE ) THEN
+               IX = ICAMAX( N, WORK, 1 )
+               XNORM = CABS1( WORK( IX ) )
+               IF( SCALE.LT.XNORM*SMLNUM .OR. SCALE.EQ.ZERO )
+     $            GO TO 20
+               CALL CSRSCL( N, SCALE, WORK, 1 )
+            END IF
+            GO TO 10
+         END IF
+*
+*        Compute the estimate of the reciprocal condition number.
+*
+         IF( AINVNM.NE.ZERO )
+     $      RCOND = ( ONE / ANORM ) / AINVNM
+      END IF
+*
+   20 CONTINUE
+      RETURN
+*
+*     End of CTPCON
+*
+      END
diff --git a/costa/native/external/lapack/ctprfs.f b/costa/native/external/lapack/ctprfs.f
new file mode 100644
index 000000000..b448f2b9c
--- /dev/null
+++ b/costa/native/external/lapack/ctprfs.f
@@ -0,0 +1,387 @@
+      SUBROUTINE CTPRFS( UPLO, TRANS, DIAG, N, NRHS, AP, B, LDB, X, LDX,
+     $                   FERR, BERR, WORK, RWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIAG, TRANS, UPLO
+      INTEGER            INFO, LDB, LDX, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      REAL               BERR( * ), FERR( * ), RWORK( * )
+      COMPLEX            AP( * ), B( LDB, * ), WORK( * ), X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CTPRFS provides error bounds and backward error estimates for the
+*  solution to a system of linear equations with a triangular packed
+*  coefficient matrix.
+*
+*  The solution matrix X must be computed by CTPTRS or some other
+*  means before entering this routine.  CTPRFS does not do iterative
+*  refinement because doing so cannot improve the backward error.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  A is upper triangular;
+*          = 'L':  A is lower triangular.
+*
+*  TRANS   (input) CHARACTER*1
+*          Specifies the form of the system of equations:
+*          = 'N':  A * X = B     (No transpose)
+*          = 'T':  A**T * X = B  (Transpose)
+*          = 'C':  A**H * X = B  (Conjugate transpose)
+*
+*  DIAG    (input) CHARACTER*1
+*          = 'N':  A is non-unit triangular;
+*          = 'U':  A is unit triangular.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrices B and X.  NRHS >= 0.
+*
+*  AP      (input) COMPLEX array, dimension (N*(N+1)/2)
+*          The upper or lower triangular matrix A, packed columnwise in
+*          a linear array.  The j-th column of A is stored in the array
+*          AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n.
+*          If DIAG = 'U', the diagonal elements of A are not referenced
+*          and are assumed to be 1.
+*
+*  B       (input) COMPLEX array, dimension (LDB,NRHS)
+*          The right hand side matrix B. 
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  X       (input) COMPLEX array, dimension (LDX,NRHS)
+*          The solution matrix X.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(1,N).
+*
+*  FERR    (output) REAL array, dimension (NRHS)
+*          The estimated forward error bound for each solution vector
+*          X(j) (the j-th column of the solution matrix X).
+*          If XTRUE is the true solution corresponding to X(j), FERR(j)
+*          is an estimated upper bound for the magnitude of the largest
+*          element in (X(j) - XTRUE) divided by the magnitude of the
+*          largest element in X(j).  The estimate is as reliable as
+*          the estimate for RCOND, and is almost always a slight
+*          overestimate of the true error.
+*
+*  BERR    (output) REAL array, dimension (NRHS)
+*          The componentwise relative backward error of each solution
+*          vector X(j) (i.e., the smallest relative change in
+*          any element of A or B that makes X(j) an exact solution).
+*
+*  WORK    (workspace) COMPLEX array, dimension (2*N)
+*
+*  RWORK   (workspace) REAL array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO
+      PARAMETER          ( ZERO = 0.0E+0 )
+      COMPLEX            ONE
+      PARAMETER          ( ONE = ( 1.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOTRAN, NOUNIT, UPPER
+      CHARACTER          TRANSN, TRANST
+      INTEGER            I, J, K, KASE, KC, NZ
+      REAL               EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK
+      COMPLEX            ZDUM
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CAXPY, CCOPY, CLACON, CTPMV, CTPSV, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, AIMAG, MAX, REAL
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               SLAMCH
+      EXTERNAL           LSAME, SLAMCH
+*     ..
+*     .. Statement Functions ..
+      REAL               CABS1
+*     ..
+*     .. Statement Function definitions ..
+      CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      NOTRAN = LSAME( TRANS, 'N' )
+      NOUNIT = LSAME( DIAG, 'N' )
+*
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT.
+     $         LSAME( TRANS, 'C' ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -8
+      ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+         INFO = -10
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CTPRFS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN
+         DO 10 J = 1, NRHS
+            FERR( J ) = ZERO
+            BERR( J ) = ZERO
+   10    CONTINUE
+         RETURN
+      END IF
+*
+      IF( NOTRAN ) THEN
+         TRANSN = 'N'
+         TRANST = 'C'
+      ELSE
+         TRANSN = 'C'
+         TRANST = 'N'
+      END IF
+*
+*     NZ = maximum number of nonzero elements in each row of A, plus 1
+*
+      NZ = N + 1
+      EPS = SLAMCH( 'Epsilon' )
+      SAFMIN = SLAMCH( 'Safe minimum' )
+      SAFE1 = NZ*SAFMIN
+      SAFE2 = SAFE1 / EPS
+*
+*     Do for each right hand side
+*
+      DO 250 J = 1, NRHS
+*
+*        Compute residual R = B - op(A) * X,
+*        where op(A) = A, A**T, or A**H, depending on TRANS.
+*
+         CALL CCOPY( N, X( 1, J ), 1, WORK, 1 )
+         CALL CTPMV( UPLO, TRANS, DIAG, N, AP, WORK, 1 )
+         CALL CAXPY( N, -ONE, B( 1, J ), 1, WORK, 1 )
+*
+*        Compute componentwise relative backward error from formula
+*
+*        max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) )
+*
+*        where abs(Z) is the componentwise absolute value of the matrix
+*        or vector Z.  If the i-th component of the denominator is less
+*        than SAFE2, then SAFE1 is added to the i-th components of the
+*        numerator and denominator before dividing.
+*
+         DO 20 I = 1, N
+            RWORK( I ) = CABS1( B( I, J ) )
+   20    CONTINUE
+*
+         IF( NOTRAN ) THEN
+*
+*           Compute abs(A)*abs(X) + abs(B).
+*
+            IF( UPPER ) THEN
+               KC = 1
+               IF( NOUNIT ) THEN
+                  DO 40 K = 1, N
+                     XK = CABS1( X( K, J ) )
+                     DO 30 I = 1, K
+                        RWORK( I ) = RWORK( I ) +
+     $                               CABS1( AP( KC+I-1 ) )*XK
+   30                CONTINUE
+                     KC = KC + K
+   40             CONTINUE
+               ELSE
+                  DO 60 K = 1, N
+                     XK = CABS1( X( K, J ) )
+                     DO 50 I = 1, K - 1
+                        RWORK( I ) = RWORK( I ) +
+     $                               CABS1( AP( KC+I-1 ) )*XK
+   50                CONTINUE
+                     RWORK( K ) = RWORK( K ) + XK
+                     KC = KC + K
+   60             CONTINUE
+               END IF
+            ELSE
+               KC = 1
+               IF( NOUNIT ) THEN
+                  DO 80 K = 1, N
+                     XK = CABS1( X( K, J ) )
+                     DO 70 I = K, N
+                        RWORK( I ) = RWORK( I ) +
+     $                               CABS1( AP( KC+I-K ) )*XK
+   70                CONTINUE
+                     KC = KC + N - K + 1
+   80             CONTINUE
+               ELSE
+                  DO 100 K = 1, N
+                     XK = CABS1( X( K, J ) )
+                     DO 90 I = K + 1, N
+                        RWORK( I ) = RWORK( I ) +
+     $                               CABS1( AP( KC+I-K ) )*XK
+   90                CONTINUE
+                     RWORK( K ) = RWORK( K ) + XK
+                     KC = KC + N - K + 1
+  100             CONTINUE
+               END IF
+            END IF
+         ELSE
+*
+*           Compute abs(A**H)*abs(X) + abs(B).
+*
+            IF( UPPER ) THEN
+               KC = 1
+               IF( NOUNIT ) THEN
+                  DO 120 K = 1, N
+                     S = ZERO
+                     DO 110 I = 1, K
+                        S = S + CABS1( AP( KC+I-1 ) )*CABS1( X( I, J ) )
+  110                CONTINUE
+                     RWORK( K ) = RWORK( K ) + S
+                     KC = KC + K
+  120             CONTINUE
+               ELSE
+                  DO 140 K = 1, N
+                     S = CABS1( X( K, J ) )
+                     DO 130 I = 1, K - 1
+                        S = S + CABS1( AP( KC+I-1 ) )*CABS1( X( I, J ) )
+  130                CONTINUE
+                     RWORK( K ) = RWORK( K ) + S
+                     KC = KC + K
+  140             CONTINUE
+               END IF
+            ELSE
+               KC = 1
+               IF( NOUNIT ) THEN
+                  DO 160 K = 1, N
+                     S = ZERO
+                     DO 150 I = K, N
+                        S = S + CABS1( AP( KC+I-K ) )*CABS1( X( I, J ) )
+  150                CONTINUE
+                     RWORK( K ) = RWORK( K ) + S
+                     KC = KC + N - K + 1
+  160             CONTINUE
+               ELSE
+                  DO 180 K = 1, N
+                     S = CABS1( X( K, J ) )
+                     DO 170 I = K + 1, N
+                        S = S + CABS1( AP( KC+I-K ) )*CABS1( X( I, J ) )
+  170                CONTINUE
+                     RWORK( K ) = RWORK( K ) + S
+                     KC = KC + N - K + 1
+  180             CONTINUE
+               END IF
+            END IF
+         END IF
+         S = ZERO
+         DO 190 I = 1, N
+            IF( RWORK( I ).GT.SAFE2 ) THEN
+               S = MAX( S, CABS1( WORK( I ) ) / RWORK( I ) )
+            ELSE
+               S = MAX( S, ( CABS1( WORK( I ) )+SAFE1 ) /
+     $             ( RWORK( I )+SAFE1 ) )
+            END IF
+  190    CONTINUE
+         BERR( J ) = S
+*
+*        Bound error from formula
+*
+*        norm(X - XTRUE) / norm(X) .le. FERR =
+*        norm( abs(inv(op(A)))*
+*           ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X)
+*
+*        where
+*          norm(Z) is the magnitude of the largest component of Z
+*          inv(op(A)) is the inverse of op(A)
+*          abs(Z) is the componentwise absolute value of the matrix or
+*             vector Z
+*          NZ is the maximum number of nonzeros in any row of A, plus 1
+*          EPS is machine epsilon
+*
+*        The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B))
+*        is incremented by SAFE1 if the i-th component of
+*        abs(op(A))*abs(X) + abs(B) is less than SAFE2.
+*
+*        Use CLACON to estimate the infinity-norm of the matrix
+*           inv(op(A)) * diag(W),
+*        where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) )))
+*
+         DO 200 I = 1, N
+            IF( RWORK( I ).GT.SAFE2 ) THEN
+               RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I )
+            ELSE
+               RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) +
+     $                      SAFE1
+            END IF
+  200    CONTINUE
+*
+         KASE = 0
+  210    CONTINUE
+         CALL CLACON( N, WORK( N+1 ), WORK, FERR( J ), KASE )
+         IF( KASE.NE.0 ) THEN
+            IF( KASE.EQ.1 ) THEN
+*
+*              Multiply by diag(W)*inv(op(A)**H).
+*
+               CALL CTPSV( UPLO, TRANST, DIAG, N, AP, WORK, 1 )
+               DO 220 I = 1, N
+                  WORK( I ) = RWORK( I )*WORK( I )
+  220          CONTINUE
+            ELSE
+*
+*              Multiply by inv(op(A))*diag(W).
+*
+               DO 230 I = 1, N
+                  WORK( I ) = RWORK( I )*WORK( I )
+  230          CONTINUE
+               CALL CTPSV( UPLO, TRANSN, DIAG, N, AP, WORK, 1 )
+            END IF
+            GO TO 210
+         END IF
+*
+*        Normalize error.
+*
+         LSTRES = ZERO
+         DO 240 I = 1, N
+            LSTRES = MAX( LSTRES, CABS1( X( I, J ) ) )
+  240    CONTINUE
+         IF( LSTRES.NE.ZERO )
+     $      FERR( J ) = FERR( J ) / LSTRES
+*
+  250 CONTINUE
+*
+      RETURN
+*
+*     End of CTPRFS
+*
+      END
diff --git a/costa/native/external/lapack/ctptri.f b/costa/native/external/lapack/ctptri.f
new file mode 100644
index 000000000..baddb5f71
--- /dev/null
+++ b/costa/native/external/lapack/ctptri.f
@@ -0,0 +1,177 @@
+      SUBROUTINE CTPTRI( UPLO, DIAG, N, AP, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIAG, UPLO
+      INTEGER            INFO, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            AP( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CTPTRI computes the inverse of a complex upper or lower triangular
+*  matrix A stored in packed format.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  A is upper triangular;
+*          = 'L':  A is lower triangular.
+*
+*  DIAG    (input) CHARACTER*1
+*          = 'N':  A is non-unit triangular;
+*          = 'U':  A is unit triangular.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  AP      (input/output) COMPLEX array, dimension (N*(N+1)/2)
+*          On entry, the upper or lower triangular matrix A, stored
+*          columnwise in a linear array.  The j-th column of A is stored
+*          in the array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*((2*n-j)/2) = A(i,j) for j<=i<=n.
+*          See below for further details.
+*          On exit, the (triangular) inverse of the original matrix, in
+*          the same packed storage format.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, A(i,i) is exactly zero.  The triangular
+*                matrix is singular and its inverse can not be computed.
+*
+*  Further Details
+*  ===============
+*
+*  A triangular matrix A can be transferred to packed storage using one
+*  of the following program segments:
+*
+*  UPLO = 'U':                      UPLO = 'L':
+*
+*        JC = 1                           JC = 1
+*        DO 2 J = 1, N                    DO 2 J = 1, N
+*           DO 1 I = 1, J                    DO 1 I = J, N
+*              AP(JC+I-1) = A(I,J)              AP(JC+I-J) = A(I,J)
+*      1    CONTINUE                    1    CONTINUE
+*           JC = JC + J                      JC = JC + N - J + 1
+*      2 CONTINUE                       2 CONTINUE
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX            ONE, ZERO
+      PARAMETER          ( ONE = ( 1.0E+0, 0.0E+0 ),
+     $                   ZERO = ( 0.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOUNIT, UPPER
+      INTEGER            J, JC, JCLAST, JJ
+      COMPLEX            AJJ
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CSCAL, CTPMV, XERBLA
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      NOUNIT = LSAME( DIAG, 'N' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CTPTRI', -INFO )
+         RETURN
+      END IF
+*
+*     Check for singularity if non-unit.
+*
+      IF( NOUNIT ) THEN
+         IF( UPPER ) THEN
+            JJ = 0
+            DO 10 INFO = 1, N
+               JJ = JJ + INFO
+               IF( AP( JJ ).EQ.ZERO )
+     $            RETURN
+   10       CONTINUE
+         ELSE
+            JJ = 1
+            DO 20 INFO = 1, N
+               IF( AP( JJ ).EQ.ZERO )
+     $            RETURN
+               JJ = JJ + N - INFO + 1
+   20       CONTINUE
+         END IF
+         INFO = 0
+      END IF
+*
+      IF( UPPER ) THEN
+*
+*        Compute inverse of upper triangular matrix.
+*
+         JC = 1
+         DO 30 J = 1, N
+            IF( NOUNIT ) THEN
+               AP( JC+J-1 ) = ONE / AP( JC+J-1 )
+               AJJ = -AP( JC+J-1 )
+            ELSE
+               AJJ = -ONE
+            END IF
+*
+*           Compute elements 1:j-1 of j-th column.
+*
+            CALL CTPMV( 'Upper', 'No transpose', DIAG, J-1, AP,
+     $                  AP( JC ), 1 )
+            CALL CSCAL( J-1, AJJ, AP( JC ), 1 )
+            JC = JC + J
+   30    CONTINUE
+*
+      ELSE
+*
+*        Compute inverse of lower triangular matrix.
+*
+         JC = N*( N+1 ) / 2
+         DO 40 J = N, 1, -1
+            IF( NOUNIT ) THEN
+               AP( JC ) = ONE / AP( JC )
+               AJJ = -AP( JC )
+            ELSE
+               AJJ = -ONE
+            END IF
+            IF( J.LT.N ) THEN
+*
+*              Compute elements j+1:n of j-th column.
+*
+               CALL CTPMV( 'Lower', 'No transpose', DIAG, N-J,
+     $                     AP( JCLAST ), AP( JC+1 ), 1 )
+               CALL CSCAL( N-J, AJJ, AP( JC+1 ), 1 )
+            END IF
+            JCLAST = JC
+            JC = JC - N + J - 2
+   40    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of CTPTRI
+*
+      END
diff --git a/costa/native/external/lapack/ctptrs.f b/costa/native/external/lapack/ctptrs.f
new file mode 100644
index 000000000..c3e481bd2
--- /dev/null
+++ b/costa/native/external/lapack/ctptrs.f
@@ -0,0 +1,154 @@
+      SUBROUTINE CTPTRS( UPLO, TRANS, DIAG, N, NRHS, AP, B, LDB, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIAG, TRANS, UPLO
+      INTEGER            INFO, LDB, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            AP( * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CTPTRS solves a triangular system of the form
+*
+*     A * X = B,  A**T * X = B,  or  A**H * X = B,
+*
+*  where A is a triangular matrix of order N stored in packed format,
+*  and B is an N-by-NRHS matrix.  A check is made to verify that A is
+*  nonsingular.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  A is upper triangular;
+*          = 'L':  A is lower triangular.
+*
+*  TRANS   (input) CHARACTER*1
+*          Specifies the form of the system of equations:
+*          = 'N':  A * X = B     (No transpose)
+*          = 'T':  A**T * X = B  (Transpose)
+*          = 'C':  A**H * X = B  (Conjugate transpose)
+*
+*  DIAG    (input) CHARACTER*1
+*          = 'N':  A is non-unit triangular;
+*          = 'U':  A is unit triangular.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  AP      (input) COMPLEX array, dimension (N*(N+1)/2)
+*          The upper or lower triangular matrix A, packed columnwise in
+*          a linear array.  The j-th column of A is stored in the array
+*          AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n.
+*
+*  B       (input/output) COMPLEX array, dimension (LDB,NRHS)
+*          On entry, the right hand side matrix B.
+*          On exit, if INFO = 0, the solution matrix X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, the i-th diagonal element of A is zero,
+*                indicating that the matrix is singular and the
+*                solutions X have not been computed.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX            ZERO
+      PARAMETER          ( ZERO = ( 0.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOUNIT, UPPER
+      INTEGER            J, JC
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CTPSV, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      NOUNIT = LSAME( DIAG, 'N' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.LSAME( TRANS, 'N' ) .AND. .NOT.
+     $         LSAME( TRANS, 'T' ) .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -8
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CTPTRS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Check for singularity.
+*
+      IF( NOUNIT ) THEN
+         IF( UPPER ) THEN
+            JC = 1
+            DO 10 INFO = 1, N
+               IF( AP( JC+INFO-1 ).EQ.ZERO )
+     $            RETURN
+               JC = JC + INFO
+   10       CONTINUE
+         ELSE
+            JC = 1
+            DO 20 INFO = 1, N
+               IF( AP( JC ).EQ.ZERO )
+     $            RETURN
+               JC = JC + N - INFO + 1
+   20       CONTINUE
+         END IF
+      END IF
+      INFO = 0
+*
+*     Solve  A * x = b,  A**T * x = b,  or  A**H * x = b.
+*
+      DO 30 J = 1, NRHS
+         CALL CTPSV( UPLO, TRANS, DIAG, N, AP, B( 1, J ), 1 )
+   30 CONTINUE
+*
+      RETURN
+*
+*     End of CTPTRS
+*
+      END
diff --git a/costa/native/external/lapack/ctrcon.f b/costa/native/external/lapack/ctrcon.f
new file mode 100644
index 000000000..21f9fa3dd
--- /dev/null
+++ b/costa/native/external/lapack/ctrcon.f
@@ -0,0 +1,200 @@
+      SUBROUTINE CTRCON( NORM, UPLO, DIAG, N, A, LDA, RCOND, WORK,
+     $                   RWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993 
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIAG, NORM, UPLO
+      INTEGER            INFO, LDA, N
+      REAL               RCOND
+*     ..
+*     .. Array Arguments ..
+      REAL               RWORK( * )
+      COMPLEX            A( LDA, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CTRCON estimates the reciprocal of the condition number of a
+*  triangular matrix A, in either the 1-norm or the infinity-norm.
+*
+*  The norm of A is computed and an estimate is obtained for
+*  norm(inv(A)), then the reciprocal of the condition number is
+*  computed as
+*     RCOND = 1 / ( norm(A) * norm(inv(A)) ).
+*
+*  Arguments
+*  =========
+*
+*  NORM    (input) CHARACTER*1
+*          Specifies whether the 1-norm condition number or the
+*          infinity-norm condition number is required:
+*          = '1' or 'O':  1-norm;
+*          = 'I':         Infinity-norm.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  A is upper triangular;
+*          = 'L':  A is lower triangular.
+*
+*  DIAG    (input) CHARACTER*1
+*          = 'N':  A is non-unit triangular;
+*          = 'U':  A is unit triangular.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input) COMPLEX array, dimension (LDA,N)
+*          The triangular matrix A.  If UPLO = 'U', the leading N-by-N
+*          upper triangular part of the array A contains the upper
+*          triangular matrix, and the strictly lower triangular part of
+*          A is not referenced.  If UPLO = 'L', the leading N-by-N lower
+*          triangular part of the array A contains the lower triangular
+*          matrix, and the strictly upper triangular part of A is not
+*          referenced.  If DIAG = 'U', the diagonal elements of A are
+*          also not referenced and are assumed to be 1.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  RCOND   (output) REAL
+*          The reciprocal of the condition number of the matrix A,
+*          computed as RCOND = 1/(norm(A) * norm(inv(A))).
+*
+*  WORK    (workspace) COMPLEX array, dimension (2*N)
+*
+*  RWORK   (workspace) REAL array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOUNIT, ONENRM, UPPER
+      CHARACTER          NORMIN
+      INTEGER            IX, KASE, KASE1
+      REAL               AINVNM, ANORM, SCALE, SMLNUM, XNORM
+      COMPLEX            ZDUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ICAMAX
+      REAL               CLANTR, SLAMCH
+      EXTERNAL           LSAME, ICAMAX, CLANTR, SLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CLACON, CLATRS, CSRSCL, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, AIMAG, MAX, REAL
+*     ..
+*     .. Statement Functions ..
+      REAL               CABS1
+*     ..
+*     .. Statement Function definitions ..
+      CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      ONENRM = NORM.EQ.'1' .OR. LSAME( NORM, 'O' )
+      NOUNIT = LSAME( DIAG, 'N' )
+*
+      IF( .NOT.ONENRM .AND. .NOT.LSAME( NORM, 'I' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -6
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CTRCON', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 ) THEN
+         RCOND = ONE
+         RETURN
+      END IF
+*
+      RCOND = ZERO
+      SMLNUM = SLAMCH( 'Safe minimum' )*REAL( MAX( 1, N ) )
+*
+*     Compute the norm of the triangular matrix A.
+*
+      ANORM = CLANTR( NORM, UPLO, DIAG, N, N, A, LDA, RWORK )
+*
+*     Continue only if ANORM > 0.
+*
+      IF( ANORM.GT.ZERO ) THEN
+*
+*        Estimate the norm of the inverse of A.
+*
+         AINVNM = ZERO
+         NORMIN = 'N'
+         IF( ONENRM ) THEN
+            KASE1 = 1
+         ELSE
+            KASE1 = 2
+         END IF
+         KASE = 0
+   10    CONTINUE
+         CALL CLACON( N, WORK( N+1 ), WORK, AINVNM, KASE )
+         IF( KASE.NE.0 ) THEN
+            IF( KASE.EQ.KASE1 ) THEN
+*
+*              Multiply by inv(A).
+*
+               CALL CLATRS( UPLO, 'No transpose', DIAG, NORMIN, N, A,
+     $                      LDA, WORK, SCALE, RWORK, INFO )
+            ELSE
+*
+*              Multiply by inv(A').
+*
+               CALL CLATRS( UPLO, 'Conjugate transpose', DIAG, NORMIN,
+     $                      N, A, LDA, WORK, SCALE, RWORK, INFO )
+            END IF
+            NORMIN = 'Y'
+*
+*           Multiply by 1/SCALE if doing so will not cause overflow.
+*
+            IF( SCALE.NE.ONE ) THEN
+               IX = ICAMAX( N, WORK, 1 )
+               XNORM = CABS1( WORK( IX ) )
+               IF( SCALE.LT.XNORM*SMLNUM .OR. SCALE.EQ.ZERO )
+     $            GO TO 20
+               CALL CSRSCL( N, SCALE, WORK, 1 )
+            END IF
+            GO TO 10
+         END IF
+*
+*        Compute the estimate of the reciprocal condition number.
+*
+         IF( AINVNM.NE.ZERO )
+     $      RCOND = ( ONE / ANORM ) / AINVNM
+      END IF
+*
+   20 CONTINUE
+      RETURN
+*
+*     End of CTRCON
+*
+      END
diff --git a/costa/native/external/lapack/ctrevc.f b/costa/native/external/lapack/ctrevc.f
new file mode 100644
index 000000000..b352c4003
--- /dev/null
+++ b/costa/native/external/lapack/ctrevc.f
@@ -0,0 +1,390 @@
+      SUBROUTINE CTREVC( SIDE, HOWMNY, SELECT, N, T, LDT, VL, LDVL, VR,
+     $                   LDVR, MM, M, WORK, RWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          HOWMNY, SIDE
+      INTEGER            INFO, LDT, LDVL, LDVR, M, MM, N
+*     ..
+*     .. Array Arguments ..
+      LOGICAL            SELECT( * )
+      REAL               RWORK( * )
+      COMPLEX            T( LDT, * ), VL( LDVL, * ), VR( LDVR, * ),
+     $                   WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CTREVC computes some or all of the right and/or left eigenvectors of
+*  a complex upper triangular matrix T.
+*
+*  The right eigenvector x and the left eigenvector y of T corresponding
+*  to an eigenvalue w are defined by:
+*
+*               T*x = w*x,     y'*T = w*y'
+*
+*  where y' denotes the conjugate transpose of the vector y.
+*
+*  If all eigenvectors are requested, the routine may either return the
+*  matrices X and/or Y of right or left eigenvectors of T, or the
+*  products Q*X and/or Q*Y, where Q is an input unitary
+*  matrix. If T was obtained from the Schur factorization of an
+*  original matrix A = Q*T*Q', then Q*X and Q*Y are the matrices of
+*  right or left eigenvectors of A.
+*
+*  Arguments
+*  =========
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'R':  compute right eigenvectors only;
+*          = 'L':  compute left eigenvectors only;
+*          = 'B':  compute both right and left eigenvectors.
+*
+*  HOWMNY  (input) CHARACTER*1
+*          = 'A':  compute all right and/or left eigenvectors;
+*          = 'B':  compute all right and/or left eigenvectors,
+*                  and backtransform them using the input matrices
+*                  supplied in VR and/or VL;
+*          = 'S':  compute selected right and/or left eigenvectors,
+*                  specified by the logical array SELECT.
+*
+*  SELECT  (input) LOGICAL array, dimension (N)
+*          If HOWMNY = 'S', SELECT specifies the eigenvectors to be
+*          computed.
+*          If HOWMNY = 'A' or 'B', SELECT is not referenced.
+*          To select the eigenvector corresponding to the j-th
+*          eigenvalue, SELECT(j) must be set to .TRUE..
+*
+*  N       (input) INTEGER
+*          The order of the matrix T. N >= 0.
+*
+*  T       (input/output) COMPLEX array, dimension (LDT,N)
+*          The upper triangular matrix T.  T is modified, but restored
+*          on exit.
+*
+*  LDT     (input) INTEGER
+*          The leading dimension of the array T. LDT >= max(1,N).
+*
+*  VL      (input/output) COMPLEX array, dimension (LDVL,MM)
+*          On entry, if SIDE = 'L' or 'B' and HOWMNY = 'B', VL must
+*          contain an N-by-N matrix Q (usually the unitary matrix Q of
+*          Schur vectors returned by CHSEQR).
+*          On exit, if SIDE = 'L' or 'B', VL contains:
+*          if HOWMNY = 'A', the matrix Y of left eigenvectors of T;
+*                           VL is lower triangular. The i-th column
+*                           VL(i) of VL is the eigenvector corresponding
+*                           to T(i,i).
+*          if HOWMNY = 'B', the matrix Q*Y;
+*          if HOWMNY = 'S', the left eigenvectors of T specified by
+*                           SELECT, stored consecutively in the columns
+*                           of VL, in the same order as their
+*                           eigenvalues.
+*          If SIDE = 'R', VL is not referenced.
+*
+*  LDVL    (input) INTEGER
+*          The leading dimension of the array VL.  LDVL >= max(1,N) if
+*          SIDE = 'L' or 'B'; LDVL >= 1 otherwise.
+*
+*  VR      (input/output) COMPLEX array, dimension (LDVR,MM)
+*          On entry, if SIDE = 'R' or 'B' and HOWMNY = 'B', VR must
+*          contain an N-by-N matrix Q (usually the unitary matrix Q of
+*          Schur vectors returned by CHSEQR).
+*          On exit, if SIDE = 'R' or 'B', VR contains:
+*          if HOWMNY = 'A', the matrix X of right eigenvectors of T;
+*                           VR is upper triangular. The i-th column
+*                           VR(i) of VR is the eigenvector corresponding
+*                           to T(i,i).
+*          if HOWMNY = 'B', the matrix Q*X;
+*          if HOWMNY = 'S', the right eigenvectors of T specified by
+*                           SELECT, stored consecutively in the columns
+*                           of VR, in the same order as their
+*                           eigenvalues.
+*          If SIDE = 'L', VR is not referenced.
+*
+*  LDVR    (input) INTEGER
+*          The leading dimension of the array VR.  LDVR >= max(1,N) if
+*           SIDE = 'R' or 'B'; LDVR >= 1 otherwise.
+*
+*  MM      (input) INTEGER
+*          The number of columns in the arrays VL and/or VR. MM >= M.
+*
+*  M       (output) INTEGER
+*          The number of columns in the arrays VL and/or VR actually
+*          used to store the eigenvectors.  If HOWMNY = 'A' or 'B', M
+*          is set to N.  Each selected eigenvector occupies one
+*          column.
+*
+*  WORK    (workspace) COMPLEX array, dimension (2*N)
+*
+*  RWORK   (workspace) REAL array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Further Details
+*  ===============
+*
+*  The algorithm used in this program is basically backward (forward)
+*  substitution, with scaling to make the the code robust against
+*  possible overflow.
+*
+*  Each eigenvector is normalized so that the element of largest
+*  magnitude has magnitude 1; here the magnitude of a complex number
+*  (x,y) is taken to be |x| + |y|.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+      COMPLEX            CMZERO, CMONE
+      PARAMETER          ( CMZERO = ( 0.0E+0, 0.0E+0 ),
+     $                   CMONE = ( 1.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            ALLV, BOTHV, LEFTV, OVER, RIGHTV, SOMEV
+      INTEGER            I, II, IS, J, K, KI
+      REAL               OVFL, REMAX, SCALE, SMIN, SMLNUM, ULP, UNFL
+      COMPLEX            CDUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ICAMAX
+      REAL               SCASUM, SLAMCH
+      EXTERNAL           LSAME, ICAMAX, SCASUM, SLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CCOPY, CGEMV, CLATRS, CSSCAL, SLABAD, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, AIMAG, CMPLX, CONJG, MAX, REAL
+*     ..
+*     .. Statement Functions ..
+      REAL               CABS1
+*     ..
+*     .. Statement Function definitions ..
+      CABS1( CDUM ) = ABS( REAL( CDUM ) ) + ABS( AIMAG( CDUM ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode and test the input parameters
+*
+      BOTHV = LSAME( SIDE, 'B' )
+      RIGHTV = LSAME( SIDE, 'R' ) .OR. BOTHV
+      LEFTV = LSAME( SIDE, 'L' ) .OR. BOTHV
+*
+      ALLV = LSAME( HOWMNY, 'A' )
+      OVER = LSAME( HOWMNY, 'B' )
+      SOMEV = LSAME( HOWMNY, 'S' )
+*
+*     Set M to the number of columns required to store the selected
+*     eigenvectors.
+*
+      IF( SOMEV ) THEN
+         M = 0
+         DO 10 J = 1, N
+            IF( SELECT( J ) )
+     $         M = M + 1
+   10    CONTINUE
+      ELSE
+         M = N
+      END IF
+*
+      INFO = 0
+      IF( .NOT.RIGHTV .AND. .NOT.LEFTV ) THEN
+         INFO = -1
+      ELSE IF( .NOT.ALLV .AND. .NOT.OVER .AND. .NOT.SOMEV ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDT.LT.MAX( 1, N ) ) THEN
+         INFO = -6
+      ELSE IF( LDVL.LT.1 .OR. ( LEFTV .AND. LDVL.LT.N ) ) THEN
+         INFO = -8
+      ELSE IF( LDVR.LT.1 .OR. ( RIGHTV .AND. LDVR.LT.N ) ) THEN
+         INFO = -10
+      ELSE IF( MM.LT.M ) THEN
+         INFO = -11
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CTREVC', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Set the constants to control overflow.
+*
+      UNFL = SLAMCH( 'Safe minimum' )
+      OVFL = ONE / UNFL
+      CALL SLABAD( UNFL, OVFL )
+      ULP = SLAMCH( 'Precision' )
+      SMLNUM = UNFL*( N / ULP )
+*
+*     Store the diagonal elements of T in working array WORK.
+*
+      DO 20 I = 1, N
+         WORK( I+N ) = T( I, I )
+   20 CONTINUE
+*
+*     Compute 1-norm of each column of strictly upper triangular
+*     part of T to control overflow in triangular solver.
+*
+      RWORK( 1 ) = ZERO
+      DO 30 J = 2, N
+         RWORK( J ) = SCASUM( J-1, T( 1, J ), 1 )
+   30 CONTINUE
+*
+      IF( RIGHTV ) THEN
+*
+*        Compute right eigenvectors.
+*
+         IS = M
+         DO 80 KI = N, 1, -1
+*
+            IF( SOMEV ) THEN
+               IF( .NOT.SELECT( KI ) )
+     $            GO TO 80
+            END IF
+            SMIN = MAX( ULP*( CABS1( T( KI, KI ) ) ), SMLNUM )
+*
+            WORK( 1 ) = CMONE
+*
+*           Form right-hand side.
+*
+            DO 40 K = 1, KI - 1
+               WORK( K ) = -T( K, KI )
+   40       CONTINUE
+*
+*           Solve the triangular system:
+*              (T(1:KI-1,1:KI-1) - T(KI,KI))*X = SCALE*WORK.
+*
+            DO 50 K = 1, KI - 1
+               T( K, K ) = T( K, K ) - T( KI, KI )
+               IF( CABS1( T( K, K ) ).LT.SMIN )
+     $            T( K, K ) = SMIN
+   50       CONTINUE
+*
+            IF( KI.GT.1 ) THEN
+               CALL CLATRS( 'Upper', 'No transpose', 'Non-unit', 'Y',
+     $                      KI-1, T, LDT, WORK( 1 ), SCALE, RWORK,
+     $                      INFO )
+               WORK( KI ) = SCALE
+            END IF
+*
+*           Copy the vector x or Q*x to VR and normalize.
+*
+            IF( .NOT.OVER ) THEN
+               CALL CCOPY( KI, WORK( 1 ), 1, VR( 1, IS ), 1 )
+*
+               II = ICAMAX( KI, VR( 1, IS ), 1 )
+               REMAX = ONE / CABS1( VR( II, IS ) )
+               CALL CSSCAL( KI, REMAX, VR( 1, IS ), 1 )
+*
+               DO 60 K = KI + 1, N
+                  VR( K, IS ) = CMZERO
+   60          CONTINUE
+            ELSE
+               IF( KI.GT.1 )
+     $            CALL CGEMV( 'N', N, KI-1, CMONE, VR, LDVR, WORK( 1 ),
+     $                        1, CMPLX( SCALE ), VR( 1, KI ), 1 )
+*
+               II = ICAMAX( N, VR( 1, KI ), 1 )
+               REMAX = ONE / CABS1( VR( II, KI ) )
+               CALL CSSCAL( N, REMAX, VR( 1, KI ), 1 )
+            END IF
+*
+*           Set back the original diagonal elements of T.
+*
+            DO 70 K = 1, KI - 1
+               T( K, K ) = WORK( K+N )
+   70       CONTINUE
+*
+            IS = IS - 1
+   80    CONTINUE
+      END IF
+*
+      IF( LEFTV ) THEN
+*
+*        Compute left eigenvectors.
+*
+         IS = 1
+         DO 130 KI = 1, N
+*
+            IF( SOMEV ) THEN
+               IF( .NOT.SELECT( KI ) )
+     $            GO TO 130
+            END IF
+            SMIN = MAX( ULP*( CABS1( T( KI, KI ) ) ), SMLNUM )
+*
+            WORK( N ) = CMONE
+*
+*           Form right-hand side.
+*
+            DO 90 K = KI + 1, N
+               WORK( K ) = -CONJG( T( KI, K ) )
+   90       CONTINUE
+*
+*           Solve the triangular system:
+*              (T(KI+1:N,KI+1:N) - T(KI,KI))'*X = SCALE*WORK.
+*
+            DO 100 K = KI + 1, N
+               T( K, K ) = T( K, K ) - T( KI, KI )
+               IF( CABS1( T( K, K ) ).LT.SMIN )
+     $            T( K, K ) = SMIN
+  100       CONTINUE
+*
+            IF( KI.LT.N ) THEN
+               CALL CLATRS( 'Upper', 'Conjugate transpose', 'Non-unit',
+     $                      'Y', N-KI, T( KI+1, KI+1 ), LDT,
+     $                      WORK( KI+1 ), SCALE, RWORK, INFO )
+               WORK( KI ) = SCALE
+            END IF
+*
+*           Copy the vector x or Q*x to VL and normalize.
+*
+            IF( .NOT.OVER ) THEN
+               CALL CCOPY( N-KI+1, WORK( KI ), 1, VL( KI, IS ), 1 )
+*
+               II = ICAMAX( N-KI+1, VL( KI, IS ), 1 ) + KI - 1
+               REMAX = ONE / CABS1( VL( II, IS ) )
+               CALL CSSCAL( N-KI+1, REMAX, VL( KI, IS ), 1 )
+*
+               DO 110 K = 1, KI - 1
+                  VL( K, IS ) = CMZERO
+  110          CONTINUE
+            ELSE
+               IF( KI.LT.N )
+     $            CALL CGEMV( 'N', N, N-KI, CMONE, VL( 1, KI+1 ), LDVL,
+     $                        WORK( KI+1 ), 1, CMPLX( SCALE ),
+     $                        VL( 1, KI ), 1 )
+*
+               II = ICAMAX( N, VL( 1, KI ), 1 )
+               REMAX = ONE / CABS1( VL( II, KI ) )
+               CALL CSSCAL( N, REMAX, VL( 1, KI ), 1 )
+            END IF
+*
+*           Set back the original diagonal elements of T.
+*
+            DO 120 K = KI + 1, N
+               T( K, K ) = WORK( K+N )
+  120       CONTINUE
+*
+            IS = IS + 1
+  130    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of CTREVC
+*
+      END
diff --git a/costa/native/external/lapack/ctrexc.f b/costa/native/external/lapack/ctrexc.f
new file mode 100644
index 000000000..a9de47b69
--- /dev/null
+++ b/costa/native/external/lapack/ctrexc.f
@@ -0,0 +1,162 @@
+      SUBROUTINE CTREXC( COMPQ, N, T, LDT, Q, LDQ, IFST, ILST, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993 
+*
+*     .. Scalar Arguments ..
+      CHARACTER          COMPQ
+      INTEGER            IFST, ILST, INFO, LDQ, LDT, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            Q( LDQ, * ), T( LDT, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CTREXC reorders the Schur factorization of a complex matrix
+*  A = Q*T*Q**H, so that the diagonal element of T with row index IFST
+*  is moved to row ILST.
+*
+*  The Schur form T is reordered by a unitary similarity transformation
+*  Z**H*T*Z, and optionally the matrix Q of Schur vectors is updated by
+*  postmultplying it with Z.
+*
+*  Arguments
+*  =========
+*
+*  COMPQ   (input) CHARACTER*1
+*          = 'V':  update the matrix Q of Schur vectors;
+*          = 'N':  do not update Q.
+*
+*  N       (input) INTEGER
+*          The order of the matrix T. N >= 0.
+*
+*  T       (input/output) COMPLEX array, dimension (LDT,N)
+*          On entry, the upper triangular matrix T.
+*          On exit, the reordered upper triangular matrix.
+*
+*  LDT     (input) INTEGER
+*          The leading dimension of the array T. LDT >= max(1,N).
+*
+*  Q       (input/output) COMPLEX array, dimension (LDQ,N)
+*          On entry, if COMPQ = 'V', the matrix Q of Schur vectors.
+*          On exit, if COMPQ = 'V', Q has been postmultiplied by the
+*          unitary transformation matrix Z which reorders T.
+*          If COMPQ = 'N', Q is not referenced.
+*
+*  LDQ     (input) INTEGER
+*          The leading dimension of the array Q.  LDQ >= max(1,N).
+*
+*  IFST    (input) INTEGER
+*  ILST    (input) INTEGER
+*          Specify the reordering of the diagonal elements of T:
+*          The element with row index IFST is moved to row ILST by a
+*          sequence of transpositions between adjacent elements.
+*          1 <= IFST <= N; 1 <= ILST <= N.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            WANTQ
+      INTEGER            K, M1, M2, M3
+      REAL               CS
+      COMPLEX            SN, T11, T22, TEMP
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CLARTG, CROT, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          CONJG, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode and test the input parameters.
+*
+      INFO = 0
+      WANTQ = LSAME( COMPQ, 'V' )
+      IF( .NOT.LSAME( COMPQ, 'N' ) .AND. .NOT.WANTQ ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDT.LT.MAX( 1, N ) ) THEN
+         INFO = -4
+      ELSE IF( LDQ.LT.1 .OR. ( WANTQ .AND. LDQ.LT.MAX( 1, N ) ) ) THEN
+         INFO = -6
+      ELSE IF( IFST.LT.1 .OR. IFST.GT.N ) THEN
+         INFO = -7
+      ELSE IF( ILST.LT.1 .OR. ILST.GT.N ) THEN
+         INFO = -8
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CTREXC', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.1 .OR. IFST.EQ.ILST )
+     $   RETURN
+*
+      IF( IFST.LT.ILST ) THEN
+*
+*        Move the IFST-th diagonal element forward down the diagonal.
+*
+         M1 = 0
+         M2 = -1
+         M3 = 1
+      ELSE
+*
+*        Move the IFST-th diagonal element backward up the diagonal.
+*
+         M1 = -1
+         M2 = 0
+         M3 = -1
+      END IF
+*
+      DO 10 K = IFST + M1, ILST + M2, M3
+*
+*        Interchange the k-th and (k+1)-th diagonal elements.
+*
+         T11 = T( K, K )
+         T22 = T( K+1, K+1 )
+*
+*        Determine the transformation to perform the interchange.
+*
+         CALL CLARTG( T( K, K+1 ), T22-T11, CS, SN, TEMP )
+*
+*        Apply transformation to the matrix T.
+*
+         IF( K+2.LE.N )
+     $      CALL CROT( N-K-1, T( K, K+2 ), LDT, T( K+1, K+2 ), LDT, CS,
+     $                 SN )
+         CALL CROT( K-1, T( 1, K ), 1, T( 1, K+1 ), 1, CS, CONJG( SN ) )
+*
+         T( K, K ) = T22
+         T( K+1, K+1 ) = T11
+*
+         IF( WANTQ ) THEN
+*
+*           Accumulate transformation in the matrix Q.
+*
+            CALL CROT( N, Q( 1, K ), 1, Q( 1, K+1 ), 1, CS,
+     $                 CONJG( SN ) )
+         END IF
+*
+   10 CONTINUE
+*
+      RETURN
+*
+*     End of CTREXC
+*
+      END
diff --git a/costa/native/external/lapack/ctrrfs.f b/costa/native/external/lapack/ctrrfs.f
new file mode 100644
index 000000000..c9e84c3b3
--- /dev/null
+++ b/costa/native/external/lapack/ctrrfs.f
@@ -0,0 +1,378 @@
+      SUBROUTINE CTRRFS( UPLO, TRANS, DIAG, N, NRHS, A, LDA, B, LDB, X,
+     $                   LDX, FERR, BERR, WORK, RWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIAG, TRANS, UPLO
+      INTEGER            INFO, LDA, LDB, LDX, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      REAL               BERR( * ), FERR( * ), RWORK( * )
+      COMPLEX            A( LDA, * ), B( LDB, * ), WORK( * ),
+     $                   X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CTRRFS provides error bounds and backward error estimates for the
+*  solution to a system of linear equations with a triangular
+*  coefficient matrix.
+*
+*  The solution matrix X must be computed by CTRTRS or some other
+*  means before entering this routine.  CTRRFS does not do iterative
+*  refinement because doing so cannot improve the backward error.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  A is upper triangular;
+*          = 'L':  A is lower triangular.
+*
+*  TRANS   (input) CHARACTER*1
+*          Specifies the form of the system of equations:
+*          = 'N':  A * X = B     (No transpose)
+*          = 'T':  A**T * X = B  (Transpose)
+*          = 'C':  A**H * X = B  (Conjugate transpose)
+*
+*  DIAG    (input) CHARACTER*1
+*          = 'N':  A is non-unit triangular;
+*          = 'U':  A is unit triangular.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrices B and X.  NRHS >= 0.
+*
+*  A       (input) COMPLEX array, dimension (LDA,N)
+*          The triangular matrix A.  If UPLO = 'U', the leading N-by-N
+*          upper triangular part of the array A contains the upper
+*          triangular matrix, and the strictly lower triangular part of
+*          A is not referenced.  If UPLO = 'L', the leading N-by-N lower
+*          triangular part of the array A contains the lower triangular
+*          matrix, and the strictly upper triangular part of A is not
+*          referenced.  If DIAG = 'U', the diagonal elements of A are
+*          also not referenced and are assumed to be 1.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  B       (input) COMPLEX array, dimension (LDB,NRHS)
+*          The right hand side matrix B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  X       (input) COMPLEX array, dimension (LDX,NRHS)
+*          The solution matrix X.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(1,N).
+*
+*  FERR    (output) REAL array, dimension (NRHS)
+*          The estimated forward error bound for each solution vector
+*          X(j) (the j-th column of the solution matrix X).
+*          If XTRUE is the true solution corresponding to X(j), FERR(j)
+*          is an estimated upper bound for the magnitude of the largest
+*          element in (X(j) - XTRUE) divided by the magnitude of the
+*          largest element in X(j).  The estimate is as reliable as
+*          the estimate for RCOND, and is almost always a slight
+*          overestimate of the true error.
+*
+*  BERR    (output) REAL array, dimension (NRHS)
+*          The componentwise relative backward error of each solution
+*          vector X(j) (i.e., the smallest relative change in
+*          any element of A or B that makes X(j) an exact solution).
+*
+*  WORK    (workspace) COMPLEX array, dimension (2*N)
+*
+*  RWORK   (workspace) REAL array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO
+      PARAMETER          ( ZERO = 0.0E+0 )
+      COMPLEX            ONE
+      PARAMETER          ( ONE = ( 1.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOTRAN, NOUNIT, UPPER
+      CHARACTER          TRANSN, TRANST
+      INTEGER            I, J, K, KASE, NZ
+      REAL               EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK
+      COMPLEX            ZDUM
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CAXPY, CCOPY, CLACON, CTRMV, CTRSV, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, AIMAG, MAX, REAL
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               SLAMCH
+      EXTERNAL           LSAME, SLAMCH
+*     ..
+*     .. Statement Functions ..
+      REAL               CABS1
+*     ..
+*     .. Statement Function definitions ..
+      CABS1( ZDUM ) = ABS( REAL( ZDUM ) ) + ABS( AIMAG( ZDUM ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      NOTRAN = LSAME( TRANS, 'N' )
+      NOUNIT = LSAME( DIAG, 'N' )
+*
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT.
+     $         LSAME( TRANS, 'C' ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -9
+      ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+         INFO = -11
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CTRRFS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN
+         DO 10 J = 1, NRHS
+            FERR( J ) = ZERO
+            BERR( J ) = ZERO
+   10    CONTINUE
+         RETURN
+      END IF
+*
+      IF( NOTRAN ) THEN
+         TRANSN = 'N'
+         TRANST = 'C'
+      ELSE
+         TRANSN = 'C'
+         TRANST = 'N'
+      END IF
+*
+*     NZ = maximum number of nonzero elements in each row of A, plus 1
+*
+      NZ = N + 1
+      EPS = SLAMCH( 'Epsilon' )
+      SAFMIN = SLAMCH( 'Safe minimum' )
+      SAFE1 = NZ*SAFMIN
+      SAFE2 = SAFE1 / EPS
+*
+*     Do for each right hand side
+*
+      DO 250 J = 1, NRHS
+*
+*        Compute residual R = B - op(A) * X,
+*        where op(A) = A, A**T, or A**H, depending on TRANS.
+*
+         CALL CCOPY( N, X( 1, J ), 1, WORK, 1 )
+         CALL CTRMV( UPLO, TRANS, DIAG, N, A, LDA, WORK, 1 )
+         CALL CAXPY( N, -ONE, B( 1, J ), 1, WORK, 1 )
+*
+*        Compute componentwise relative backward error from formula
+*
+*        max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) )
+*
+*        where abs(Z) is the componentwise absolute value of the matrix
+*        or vector Z.  If the i-th component of the denominator is less
+*        than SAFE2, then SAFE1 is added to the i-th components of the
+*        numerator and denominator before dividing.
+*
+         DO 20 I = 1, N
+            RWORK( I ) = CABS1( B( I, J ) )
+   20    CONTINUE
+*
+         IF( NOTRAN ) THEN
+*
+*           Compute abs(A)*abs(X) + abs(B).
+*
+            IF( UPPER ) THEN
+               IF( NOUNIT ) THEN
+                  DO 40 K = 1, N
+                     XK = CABS1( X( K, J ) )
+                     DO 30 I = 1, K
+                        RWORK( I ) = RWORK( I ) + CABS1( A( I, K ) )*XK
+   30                CONTINUE
+   40             CONTINUE
+               ELSE
+                  DO 60 K = 1, N
+                     XK = CABS1( X( K, J ) )
+                     DO 50 I = 1, K - 1
+                        RWORK( I ) = RWORK( I ) + CABS1( A( I, K ) )*XK
+   50                CONTINUE
+                     RWORK( K ) = RWORK( K ) + XK
+   60             CONTINUE
+               END IF
+            ELSE
+               IF( NOUNIT ) THEN
+                  DO 80 K = 1, N
+                     XK = CABS1( X( K, J ) )
+                     DO 70 I = K, N
+                        RWORK( I ) = RWORK( I ) + CABS1( A( I, K ) )*XK
+   70                CONTINUE
+   80             CONTINUE
+               ELSE
+                  DO 100 K = 1, N
+                     XK = CABS1( X( K, J ) )
+                     DO 90 I = K + 1, N
+                        RWORK( I ) = RWORK( I ) + CABS1( A( I, K ) )*XK
+   90                CONTINUE
+                     RWORK( K ) = RWORK( K ) + XK
+  100             CONTINUE
+               END IF
+            END IF
+         ELSE
+*
+*           Compute abs(A**H)*abs(X) + abs(B).
+*
+            IF( UPPER ) THEN
+               IF( NOUNIT ) THEN
+                  DO 120 K = 1, N
+                     S = ZERO
+                     DO 110 I = 1, K
+                        S = S + CABS1( A( I, K ) )*CABS1( X( I, J ) )
+  110                CONTINUE
+                     RWORK( K ) = RWORK( K ) + S
+  120             CONTINUE
+               ELSE
+                  DO 140 K = 1, N
+                     S = CABS1( X( K, J ) )
+                     DO 130 I = 1, K - 1
+                        S = S + CABS1( A( I, K ) )*CABS1( X( I, J ) )
+  130                CONTINUE
+                     RWORK( K ) = RWORK( K ) + S
+  140             CONTINUE
+               END IF
+            ELSE
+               IF( NOUNIT ) THEN
+                  DO 160 K = 1, N
+                     S = ZERO
+                     DO 150 I = K, N
+                        S = S + CABS1( A( I, K ) )*CABS1( X( I, J ) )
+  150                CONTINUE
+                     RWORK( K ) = RWORK( K ) + S
+  160             CONTINUE
+               ELSE
+                  DO 180 K = 1, N
+                     S = CABS1( X( K, J ) )
+                     DO 170 I = K + 1, N
+                        S = S + CABS1( A( I, K ) )*CABS1( X( I, J ) )
+  170                CONTINUE
+                     RWORK( K ) = RWORK( K ) + S
+  180             CONTINUE
+               END IF
+            END IF
+         END IF
+         S = ZERO
+         DO 190 I = 1, N
+            IF( RWORK( I ).GT.SAFE2 ) THEN
+               S = MAX( S, CABS1( WORK( I ) ) / RWORK( I ) )
+            ELSE
+               S = MAX( S, ( CABS1( WORK( I ) )+SAFE1 ) /
+     $             ( RWORK( I )+SAFE1 ) )
+            END IF
+  190    CONTINUE
+         BERR( J ) = S
+*
+*        Bound error from formula
+*
+*        norm(X - XTRUE) / norm(X) .le. FERR =
+*        norm( abs(inv(op(A)))*
+*           ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X)
+*
+*        where
+*          norm(Z) is the magnitude of the largest component of Z
+*          inv(op(A)) is the inverse of op(A)
+*          abs(Z) is the componentwise absolute value of the matrix or
+*             vector Z
+*          NZ is the maximum number of nonzeros in any row of A, plus 1
+*          EPS is machine epsilon
+*
+*        The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B))
+*        is incremented by SAFE1 if the i-th component of
+*        abs(op(A))*abs(X) + abs(B) is less than SAFE2.
+*
+*        Use CLACON to estimate the infinity-norm of the matrix
+*           inv(op(A)) * diag(W),
+*        where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) )))
+*
+         DO 200 I = 1, N
+            IF( RWORK( I ).GT.SAFE2 ) THEN
+               RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I )
+            ELSE
+               RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) +
+     $                      SAFE1
+            END IF
+  200    CONTINUE
+*
+         KASE = 0
+  210    CONTINUE
+         CALL CLACON( N, WORK( N+1 ), WORK, FERR( J ), KASE )
+         IF( KASE.NE.0 ) THEN
+            IF( KASE.EQ.1 ) THEN
+*
+*              Multiply by diag(W)*inv(op(A)**H).
+*
+               CALL CTRSV( UPLO, TRANST, DIAG, N, A, LDA, WORK, 1 )
+               DO 220 I = 1, N
+                  WORK( I ) = RWORK( I )*WORK( I )
+  220          CONTINUE
+            ELSE
+*
+*              Multiply by inv(op(A))*diag(W).
+*
+               DO 230 I = 1, N
+                  WORK( I ) = RWORK( I )*WORK( I )
+  230          CONTINUE
+               CALL CTRSV( UPLO, TRANSN, DIAG, N, A, LDA, WORK, 1 )
+            END IF
+            GO TO 210
+         END IF
+*
+*        Normalize error.
+*
+         LSTRES = ZERO
+         DO 240 I = 1, N
+            LSTRES = MAX( LSTRES, CABS1( X( I, J ) ) )
+  240    CONTINUE
+         IF( LSTRES.NE.ZERO )
+     $      FERR( J ) = FERR( J ) / LSTRES
+*
+  250 CONTINUE
+*
+      RETURN
+*
+*     End of CTRRFS
+*
+      END
diff --git a/costa/native/external/lapack/ctrsen.f b/costa/native/external/lapack/ctrsen.f
new file mode 100644
index 000000000..9b622d687
--- /dev/null
+++ b/costa/native/external/lapack/ctrsen.f
@@ -0,0 +1,358 @@
+      SUBROUTINE CTRSEN( JOB, COMPQ, SELECT, N, T, LDT, Q, LDQ, W, M, S,
+     $                   SEP, WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          COMPQ, JOB
+      INTEGER            INFO, LDQ, LDT, LWORK, M, N
+      REAL               S, SEP
+*     ..
+*     .. Array Arguments ..
+      LOGICAL            SELECT( * )
+      COMPLEX            Q( LDQ, * ), T( LDT, * ), W( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CTRSEN reorders the Schur factorization of a complex matrix
+*  A = Q*T*Q**H, so that a selected cluster of eigenvalues appears in
+*  the leading positions on the diagonal of the upper triangular matrix
+*  T, and the leading columns of Q form an orthonormal basis of the
+*  corresponding right invariant subspace.
+*
+*  Optionally the routine computes the reciprocal condition numbers of
+*  the cluster of eigenvalues and/or the invariant subspace.
+*
+*  Arguments
+*  =========
+*
+*  JOB     (input) CHARACTER*1
+*          Specifies whether condition numbers are required for the
+*          cluster of eigenvalues (S) or the invariant subspace (SEP):
+*          = 'N': none;
+*          = 'E': for eigenvalues only (S);
+*          = 'V': for invariant subspace only (SEP);
+*          = 'B': for both eigenvalues and invariant subspace (S and
+*                 SEP).
+*
+*  COMPQ   (input) CHARACTER*1
+*          = 'V': update the matrix Q of Schur vectors;
+*          = 'N': do not update Q.
+*
+*  SELECT  (input) LOGICAL array, dimension (N)
+*          SELECT specifies the eigenvalues in the selected cluster. To
+*          select the j-th eigenvalue, SELECT(j) must be set to .TRUE..
+*
+*  N       (input) INTEGER
+*          The order of the matrix T. N >= 0.
+*
+*  T       (input/output) COMPLEX array, dimension (LDT,N)
+*          On entry, the upper triangular matrix T.
+*          On exit, T is overwritten by the reordered matrix T, with the
+*          selected eigenvalues as the leading diagonal elements.
+*
+*  LDT     (input) INTEGER
+*          The leading dimension of the array T. LDT >= max(1,N).
+*
+*  Q       (input/output) COMPLEX array, dimension (LDQ,N)
+*          On entry, if COMPQ = 'V', the matrix Q of Schur vectors.
+*          On exit, if COMPQ = 'V', Q has been postmultiplied by the
+*          unitary transformation matrix which reorders T; the leading M
+*          columns of Q form an orthonormal basis for the specified
+*          invariant subspace.
+*          If COMPQ = 'N', Q is not referenced.
+*
+*  LDQ     (input) INTEGER
+*          The leading dimension of the array Q.
+*          LDQ >= 1; and if COMPQ = 'V', LDQ >= N.
+*
+*  W       (output) COMPLEX array, dimension (N)
+*          The reordered eigenvalues of T, in the same order as they
+*          appear on the diagonal of T.
+*
+*  M       (output) INTEGER
+*          The dimension of the specified invariant subspace.
+*          0 <= M <= N.
+*
+*  S       (output) REAL
+*          If JOB = 'E' or 'B', S is a lower bound on the reciprocal
+*          condition number for the selected cluster of eigenvalues.
+*          S cannot underestimate the true reciprocal condition number
+*          by more than a factor of sqrt(N). If M = 0 or N, S = 1.
+*          If JOB = 'N' or 'V', S is not referenced.
+*
+*  SEP     (output) REAL
+*          If JOB = 'V' or 'B', SEP is the estimated reciprocal
+*          condition number of the specified invariant subspace. If
+*          M = 0 or N, SEP = norm(T).
+*          If JOB = 'N' or 'E', SEP is not referenced.
+*
+*  WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+*          If JOB = 'N', WORK is not referenced.  Otherwise,
+*          on exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.
+*          If JOB = 'N', LWORK >= 1;
+*          if JOB = 'E', LWORK = M*(N-M);
+*          if JOB = 'V' or 'B', LWORK >= 2*M*(N-M).
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Further Details
+*  ===============
+*
+*  CTRSEN first collects the selected eigenvalues by computing a unitary
+*  transformation Z to move them to the top left corner of T. In other
+*  words, the selected eigenvalues are the eigenvalues of T11 in:
+*
+*                Z'*T*Z = ( T11 T12 ) n1
+*                         (  0  T22 ) n2
+*                            n1  n2
+*
+*  where N = n1+n2 and Z' means the conjugate transpose of Z. The first
+*  n1 columns of Z span the specified invariant subspace of T.
+*
+*  If T has been obtained from the Schur factorization of a matrix
+*  A = Q*T*Q', then the reordered Schur factorization of A is given by
+*  A = (Q*Z)*(Z'*T*Z)*(Q*Z)', and the first n1 columns of Q*Z span the
+*  corresponding invariant subspace of A.
+*
+*  The reciprocal condition number of the average of the eigenvalues of
+*  T11 may be returned in S. S lies between 0 (very badly conditioned)
+*  and 1 (very well conditioned). It is computed as follows. First we
+*  compute R so that
+*
+*                         P = ( I  R ) n1
+*                             ( 0  0 ) n2
+*                               n1 n2
+*
+*  is the projector on the invariant subspace associated with T11.
+*  R is the solution of the Sylvester equation:
+*
+*                        T11*R - R*T22 = T12.
+*
+*  Let F-norm(M) denote the Frobenius-norm of M and 2-norm(M) denote
+*  the two-norm of M. Then S is computed as the lower bound
+*
+*                      (1 + F-norm(R)**2)**(-1/2)
+*
+*  on the reciprocal of 2-norm(P), the true reciprocal condition number.
+*  S cannot underestimate 1 / 2-norm(P) by more than a factor of
+*  sqrt(N).
+*
+*  An approximate error bound for the computed average of the
+*  eigenvalues of T11 is
+*
+*                         EPS * norm(T) / S
+*
+*  where EPS is the machine precision.
+*
+*  The reciprocal condition number of the right invariant subspace
+*  spanned by the first n1 columns of Z (or of Q*Z) is returned in SEP.
+*  SEP is defined as the separation of T11 and T22:
+*
+*                     sep( T11, T22 ) = sigma-min( C )
+*
+*  where sigma-min(C) is the smallest singular value of the
+*  n1*n2-by-n1*n2 matrix
+*
+*     C  = kprod( I(n2), T11 ) - kprod( transpose(T22), I(n1) )
+*
+*  I(m) is an m by m identity matrix, and kprod denotes the Kronecker
+*  product. We estimate sigma-min(C) by the reciprocal of an estimate of
+*  the 1-norm of inverse(C). The true reciprocal 1-norm of inverse(C)
+*  cannot differ from sigma-min(C) by more than a factor of sqrt(n1*n2).
+*
+*  When SEP is small, small changes in T can cause large changes in
+*  the invariant subspace. An approximate bound on the maximum angular
+*  error in the computed right invariant subspace is
+*
+*                      EPS * norm(T) / SEP
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY, WANTBH, WANTQ, WANTS, WANTSP
+      INTEGER            IERR, K, KASE, KS, LWMIN, N1, N2, NN
+      REAL               EST, RNORM, SCALE
+*     ..
+*     .. Local Arrays ..
+      REAL               RWORK( 1 )
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               CLANGE
+      EXTERNAL           LSAME, CLANGE
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CLACON, CLACPY, CTREXC, CTRSYL, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode and test the input parameters.
+*
+      WANTBH = LSAME( JOB, 'B' )
+      WANTS = LSAME( JOB, 'E' ) .OR. WANTBH
+      WANTSP = LSAME( JOB, 'V' ) .OR. WANTBH
+      WANTQ = LSAME( COMPQ, 'V' )
+*
+*     Set M to the number of selected eigenvalues.
+*
+      M = 0
+      DO 10 K = 1, N
+         IF( SELECT( K ) )
+     $      M = M + 1
+   10 CONTINUE
+*
+      N1 = M
+      N2 = N - M
+      NN = N1*N2
+*
+      INFO = 0
+      LQUERY = ( LWORK.EQ.-1 )
+*
+      IF( WANTSP ) THEN
+         LWMIN = MAX( 1, 2*NN )
+      ELSE IF( LSAME( JOB, 'N' ) ) THEN
+         LWMIN = 1
+      ELSE IF( LSAME( JOB, 'E' ) ) THEN
+         LWMIN = MAX( 1, NN )
+      END IF
+*
+      IF( .NOT.LSAME( JOB, 'N' ) .AND. .NOT.WANTS .AND. .NOT.WANTSP )
+     $     THEN
+         INFO = -1
+      ELSE IF( .NOT.LSAME( COMPQ, 'N' ) .AND. .NOT.WANTQ ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDT.LT.MAX( 1, N ) ) THEN
+         INFO = -6
+      ELSE IF( LDQ.LT.1 .OR. ( WANTQ .AND. LDQ.LT.N ) ) THEN
+         INFO = -8
+      ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -14
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         WORK( 1 ) = LWMIN
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CTRSEN', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.N .OR. M.EQ.0 ) THEN
+         IF( WANTS )
+     $      S = ONE
+         IF( WANTSP )
+     $      SEP = CLANGE( '1', N, N, T, LDT, RWORK )
+         GO TO 40
+      END IF
+*
+*     Collect the selected eigenvalues at the top left corner of T.
+*
+      KS = 0
+      DO 20 K = 1, N
+         IF( SELECT( K ) ) THEN
+            KS = KS + 1
+*
+*           Swap the K-th eigenvalue to position KS.
+*
+            IF( K.NE.KS )
+     $         CALL CTREXC( COMPQ, N, T, LDT, Q, LDQ, K, KS, IERR )
+         END IF
+   20 CONTINUE
+*
+      IF( WANTS ) THEN
+*
+*        Solve the Sylvester equation for R:
+*
+*           T11*R - R*T22 = scale*T12
+*
+         CALL CLACPY( 'F', N1, N2, T( 1, N1+1 ), LDT, WORK, N1 )
+         CALL CTRSYL( 'N', 'N', -1, N1, N2, T, LDT, T( N1+1, N1+1 ),
+     $                LDT, WORK, N1, SCALE, IERR )
+*
+*        Estimate the reciprocal of the condition number of the cluster
+*        of eigenvalues.
+*
+         RNORM = CLANGE( 'F', N1, N2, WORK, N1, RWORK )
+         IF( RNORM.EQ.ZERO ) THEN
+            S = ONE
+         ELSE
+            S = SCALE / ( SQRT( SCALE*SCALE / RNORM+RNORM )*
+     $          SQRT( RNORM ) )
+         END IF
+      END IF
+*
+      IF( WANTSP ) THEN
+*
+*        Estimate sep(T11,T22).
+*
+         EST = ZERO
+         KASE = 0
+   30    CONTINUE
+         CALL CLACON( NN, WORK( NN+1 ), WORK, EST, KASE )
+         IF( KASE.NE.0 ) THEN
+            IF( KASE.EQ.1 ) THEN
+*
+*              Solve T11*R - R*T22 = scale*X.
+*
+               CALL CTRSYL( 'N', 'N', -1, N1, N2, T, LDT,
+     $                      T( N1+1, N1+1 ), LDT, WORK, N1, SCALE,
+     $                      IERR )
+            ELSE
+*
+*              Solve T11'*R - R*T22' = scale*X.
+*
+               CALL CTRSYL( 'C', 'C', -1, N1, N2, T, LDT,
+     $                      T( N1+1, N1+1 ), LDT, WORK, N1, SCALE,
+     $                      IERR )
+            END IF
+            GO TO 30
+         END IF
+*
+         SEP = SCALE / EST
+      END IF
+*
+   40 CONTINUE
+*
+*     Copy reordered eigenvalues to W.
+*
+      DO 50 K = 1, N
+         W( K ) = T( K, K )
+   50 CONTINUE
+*
+      WORK( 1 ) = LWMIN
+*
+      RETURN
+*
+*     End of CTRSEN
+*
+      END
diff --git a/costa/native/external/lapack/ctrsna.f b/costa/native/external/lapack/ctrsna.f
new file mode 100644
index 000000000..c4b3079ab
--- /dev/null
+++ b/costa/native/external/lapack/ctrsna.f
@@ -0,0 +1,354 @@
+      SUBROUTINE CTRSNA( JOB, HOWMNY, SELECT, N, T, LDT, VL, LDVL, VR,
+     $                   LDVR, S, SEP, MM, M, WORK, LDWORK, RWORK,
+     $                   INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          HOWMNY, JOB
+      INTEGER            INFO, LDT, LDVL, LDVR, LDWORK, M, MM, N
+*     ..
+*     .. Array Arguments ..
+      LOGICAL            SELECT( * )
+      REAL               RWORK( * ), S( * ), SEP( * )
+      COMPLEX            T( LDT, * ), VL( LDVL, * ), VR( LDVR, * ),
+     $                   WORK( LDWORK, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CTRSNA estimates reciprocal condition numbers for specified
+*  eigenvalues and/or right eigenvectors of a complex upper triangular
+*  matrix T (or of any matrix Q*T*Q**H with Q unitary).
+*
+*  Arguments
+*  =========
+*
+*  JOB     (input) CHARACTER*1
+*          Specifies whether condition numbers are required for
+*          eigenvalues (S) or eigenvectors (SEP):
+*          = 'E': for eigenvalues only (S);
+*          = 'V': for eigenvectors only (SEP);
+*          = 'B': for both eigenvalues and eigenvectors (S and SEP).
+*
+*  HOWMNY  (input) CHARACTER*1
+*          = 'A': compute condition numbers for all eigenpairs;
+*          = 'S': compute condition numbers for selected eigenpairs
+*                 specified by the array SELECT.
+*
+*  SELECT  (input) LOGICAL array, dimension (N)
+*          If HOWMNY = 'S', SELECT specifies the eigenpairs for which
+*          condition numbers are required. To select condition numbers
+*          for the j-th eigenpair, SELECT(j) must be set to .TRUE..
+*          If HOWMNY = 'A', SELECT is not referenced.
+*
+*  N       (input) INTEGER
+*          The order of the matrix T. N >= 0.
+*
+*  T       (input) COMPLEX array, dimension (LDT,N)
+*          The upper triangular matrix T.
+*
+*  LDT     (input) INTEGER
+*          The leading dimension of the array T. LDT >= max(1,N).
+*
+*  VL      (input) COMPLEX array, dimension (LDVL,M)
+*          If JOB = 'E' or 'B', VL must contain left eigenvectors of T
+*          (or of any Q*T*Q**H with Q unitary), corresponding to the
+*          eigenpairs specified by HOWMNY and SELECT. The eigenvectors
+*          must be stored in consecutive columns of VL, as returned by
+*          CHSEIN or CTREVC.
+*          If JOB = 'V', VL is not referenced.
+*
+*  LDVL    (input) INTEGER
+*          The leading dimension of the array VL.
+*          LDVL >= 1; and if JOB = 'E' or 'B', LDVL >= N.
+*
+*  VR      (input) COMPLEX array, dimension (LDVR,M)
+*          If JOB = 'E' or 'B', VR must contain right eigenvectors of T
+*          (or of any Q*T*Q**H with Q unitary), corresponding to the
+*          eigenpairs specified by HOWMNY and SELECT. The eigenvectors
+*          must be stored in consecutive columns of VR, as returned by
+*          CHSEIN or CTREVC.
+*          If JOB = 'V', VR is not referenced.
+*
+*  LDVR    (input) INTEGER
+*          The leading dimension of the array VR.
+*          LDVR >= 1; and if JOB = 'E' or 'B', LDVR >= N.
+*
+*  S       (output) REAL array, dimension (MM)
+*          If JOB = 'E' or 'B', the reciprocal condition numbers of the
+*          selected eigenvalues, stored in consecutive elements of the
+*          array. Thus S(j), SEP(j), and the j-th columns of VL and VR
+*          all correspond to the same eigenpair (but not in general the
+*          j-th eigenpair, unless all eigenpairs are selected).
+*          If JOB = 'V', S is not referenced.
+*
+*  SEP     (output) REAL array, dimension (MM)
+*          If JOB = 'V' or 'B', the estimated reciprocal condition
+*          numbers of the selected eigenvectors, stored in consecutive
+*          elements of the array.
+*          If JOB = 'E', SEP is not referenced.
+*
+*  MM      (input) INTEGER
+*          The number of elements in the arrays S (if JOB = 'E' or 'B')
+*           and/or SEP (if JOB = 'V' or 'B'). MM >= M.
+*
+*  M       (output) INTEGER
+*          The number of elements of the arrays S and/or SEP actually
+*          used to store the estimated condition numbers.
+*          If HOWMNY = 'A', M is set to N.
+*
+*  WORK    (workspace) COMPLEX array, dimension (LDWORK,N+1)
+*          If JOB = 'E', WORK is not referenced.
+*
+*  LDWORK  (input) INTEGER
+*          The leading dimension of the array WORK.
+*          LDWORK >= 1; and if JOB = 'V' or 'B', LDWORK >= N.
+*
+*  RWORK   (workspace) REAL array, dimension (N)
+*          If JOB = 'E', RWORK is not referenced.
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*
+*  Further Details
+*  ===============
+*
+*  The reciprocal of the condition number of an eigenvalue lambda is
+*  defined as
+*
+*          S(lambda) = |v'*u| / (norm(u)*norm(v))
+*
+*  where u and v are the right and left eigenvectors of T corresponding
+*  to lambda; v' denotes the conjugate transpose of v, and norm(u)
+*  denotes the Euclidean norm. These reciprocal condition numbers always
+*  lie between zero (very badly conditioned) and one (very well
+*  conditioned). If n = 1, S(lambda) is defined to be 1.
+*
+*  An approximate error bound for a computed eigenvalue W(i) is given by
+*
+*                      EPS * norm(T) / S(i)
+*
+*  where EPS is the machine precision.
+*
+*  The reciprocal of the condition number of the right eigenvector u
+*  corresponding to lambda is defined as follows. Suppose
+*
+*              T = ( lambda  c  )
+*                  (   0    T22 )
+*
+*  Then the reciprocal condition number is
+*
+*          SEP( lambda, T22 ) = sigma-min( T22 - lambda*I )
+*
+*  where sigma-min denotes the smallest singular value. We approximate
+*  the smallest singular value by the reciprocal of an estimate of the
+*  one-norm of the inverse of T22 - lambda*I. If n = 1, SEP(1) is
+*  defined to be abs(T(1,1)).
+*
+*  An approximate error bound for a computed right eigenvector VR(i)
+*  is given by
+*
+*                      EPS * norm(T) / SEP(i)
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            SOMCON, WANTBH, WANTS, WANTSP
+      CHARACTER          NORMIN
+      INTEGER            I, IERR, IX, J, K, KASE, KS
+      REAL               BIGNUM, EPS, EST, LNRM, RNRM, SCALE, SMLNUM,
+     $                   XNORM
+      COMPLEX            CDUM, PROD
+*     ..
+*     .. Local Arrays ..
+      COMPLEX            DUMMY( 1 )
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ICAMAX
+      REAL               SCNRM2, SLAMCH
+      COMPLEX            CDOTC
+      EXTERNAL           LSAME, ICAMAX, SCNRM2, SLAMCH, CDOTC
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CLACON, CLACPY, CLATRS, CSRSCL, CTREXC, SLABAD,
+     $                   XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, AIMAG, MAX, REAL
+*     ..
+*     .. Statement Functions ..
+      REAL               CABS1
+*     ..
+*     .. Statement Function definitions ..
+      CABS1( CDUM ) = ABS( REAL( CDUM ) ) + ABS( AIMAG( CDUM ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode and test the input parameters
+*
+      WANTBH = LSAME( JOB, 'B' )
+      WANTS = LSAME( JOB, 'E' ) .OR. WANTBH
+      WANTSP = LSAME( JOB, 'V' ) .OR. WANTBH
+*
+      SOMCON = LSAME( HOWMNY, 'S' )
+*
+*     Set M to the number of eigenpairs for which condition numbers are
+*     to be computed.
+*
+      IF( SOMCON ) THEN
+         M = 0
+         DO 10 J = 1, N
+            IF( SELECT( J ) )
+     $         M = M + 1
+   10    CONTINUE
+      ELSE
+         M = N
+      END IF
+*
+      INFO = 0
+      IF( .NOT.WANTS .AND. .NOT.WANTSP ) THEN
+         INFO = -1
+      ELSE IF( .NOT.LSAME( HOWMNY, 'A' ) .AND. .NOT.SOMCON ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDT.LT.MAX( 1, N ) ) THEN
+         INFO = -6
+      ELSE IF( LDVL.LT.1 .OR. ( WANTS .AND. LDVL.LT.N ) ) THEN
+         INFO = -8
+      ELSE IF( LDVR.LT.1 .OR. ( WANTS .AND. LDVR.LT.N ) ) THEN
+         INFO = -10
+      ELSE IF( MM.LT.M ) THEN
+         INFO = -13
+      ELSE IF( LDWORK.LT.1 .OR. ( WANTSP .AND. LDWORK.LT.N ) ) THEN
+         INFO = -16
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CTRSNA', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      IF( N.EQ.1 ) THEN
+         IF( SOMCON ) THEN
+            IF( .NOT.SELECT( 1 ) )
+     $         RETURN
+         END IF
+         IF( WANTS )
+     $      S( 1 ) = ONE
+         IF( WANTSP )
+     $      SEP( 1 ) = ABS( T( 1, 1 ) )
+         RETURN
+      END IF
+*
+*     Get machine constants
+*
+      EPS = SLAMCH( 'P' )
+      SMLNUM = SLAMCH( 'S' ) / EPS
+      BIGNUM = ONE / SMLNUM
+      CALL SLABAD( SMLNUM, BIGNUM )
+*
+      KS = 1
+      DO 50 K = 1, N
+*
+         IF( SOMCON ) THEN
+            IF( .NOT.SELECT( K ) )
+     $         GO TO 50
+         END IF
+*
+         IF( WANTS ) THEN
+*
+*           Compute the reciprocal condition number of the k-th
+*           eigenvalue.
+*
+            PROD = CDOTC( N, VR( 1, KS ), 1, VL( 1, KS ), 1 )
+            RNRM = SCNRM2( N, VR( 1, KS ), 1 )
+            LNRM = SCNRM2( N, VL( 1, KS ), 1 )
+            S( KS ) = ABS( PROD ) / ( RNRM*LNRM )
+*
+         END IF
+*
+         IF( WANTSP ) THEN
+*
+*           Estimate the reciprocal condition number of the k-th
+*           eigenvector.
+*
+*           Copy the matrix T to the array WORK and swap the k-th
+*           diagonal element to the (1,1) position.
+*
+            CALL CLACPY( 'Full', N, N, T, LDT, WORK, LDWORK )
+            CALL CTREXC( 'No Q', N, WORK, LDWORK, DUMMY, 1, K, 1, IERR )
+*
+*           Form  C = T22 - lambda*I in WORK(2:N,2:N).
+*
+            DO 20 I = 2, N
+               WORK( I, I ) = WORK( I, I ) - WORK( 1, 1 )
+   20       CONTINUE
+*
+*           Estimate a lower bound for the 1-norm of inv(C'). The 1st
+*           and (N+1)th columns of WORK are used to store work vectors.
+*
+            SEP( KS ) = ZERO
+            EST = ZERO
+            KASE = 0
+            NORMIN = 'N'
+   30       CONTINUE
+            CALL CLACON( N-1, WORK( 1, N+1 ), WORK, EST, KASE )
+*
+            IF( KASE.NE.0 ) THEN
+               IF( KASE.EQ.1 ) THEN
+*
+*                 Solve C'*x = scale*b
+*
+                  CALL CLATRS( 'Upper', 'Conjugate transpose',
+     $                         'Nonunit', NORMIN, N-1, WORK( 2, 2 ),
+     $                         LDWORK, WORK, SCALE, RWORK, IERR )
+               ELSE
+*
+*                 Solve C*x = scale*b
+*
+                  CALL CLATRS( 'Upper', 'No transpose', 'Nonunit',
+     $                         NORMIN, N-1, WORK( 2, 2 ), LDWORK, WORK,
+     $                         SCALE, RWORK, IERR )
+               END IF
+               NORMIN = 'Y'
+               IF( SCALE.NE.ONE ) THEN
+*
+*                 Multiply by 1/SCALE if doing so will not cause
+*                 overflow.
+*
+                  IX = ICAMAX( N-1, WORK, 1 )
+                  XNORM = CABS1( WORK( IX, 1 ) )
+                  IF( SCALE.LT.XNORM*SMLNUM .OR. SCALE.EQ.ZERO )
+     $               GO TO 40
+                  CALL CSRSCL( N, SCALE, WORK, 1 )
+               END IF
+               GO TO 30
+            END IF
+*
+            SEP( KS ) = ONE / MAX( EST, SMLNUM )
+         END IF
+*
+   40    CONTINUE
+         KS = KS + 1
+   50 CONTINUE
+      RETURN
+*
+*     End of CTRSNA
+*
+      END
diff --git a/costa/native/external/lapack/ctrsyl.f b/costa/native/external/lapack/ctrsyl.f
new file mode 100644
index 000000000..aa0faa5aa
--- /dev/null
+++ b/costa/native/external/lapack/ctrsyl.f
@@ -0,0 +1,368 @@
+      SUBROUTINE CTRSYL( TRANA, TRANB, ISGN, M, N, A, LDA, B, LDB, C,
+     $                   LDC, SCALE, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          TRANA, TRANB
+      INTEGER            INFO, ISGN, LDA, LDB, LDC, M, N
+      REAL               SCALE
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), B( LDB, * ), C( LDC, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CTRSYL solves the complex Sylvester matrix equation:
+*
+*     op(A)*X + X*op(B) = scale*C or
+*     op(A)*X - X*op(B) = scale*C,
+*
+*  where op(A) = A or A**H, and A and B are both upper triangular. A is
+*  M-by-M and B is N-by-N; the right hand side C and the solution X are
+*  M-by-N; and scale is an output scale factor, set <= 1 to avoid
+*  overflow in X.
+*
+*  Arguments
+*  =========
+*
+*  TRANA   (input) CHARACTER*1
+*          Specifies the option op(A):
+*          = 'N': op(A) = A    (No transpose)
+*          = 'C': op(A) = A**H (Conjugate transpose)
+*
+*  TRANB   (input) CHARACTER*1
+*          Specifies the option op(B):
+*          = 'N': op(B) = B    (No transpose)
+*          = 'C': op(B) = B**H (Conjugate transpose)
+*
+*  ISGN    (input) INTEGER
+*          Specifies the sign in the equation:
+*          = +1: solve op(A)*X + X*op(B) = scale*C
+*          = -1: solve op(A)*X - X*op(B) = scale*C
+*
+*  M       (input) INTEGER
+*          The order of the matrix A, and the number of rows in the
+*          matrices X and C. M >= 0.
+*
+*  N       (input) INTEGER
+*          The order of the matrix B, and the number of columns in the
+*          matrices X and C. N >= 0.
+*
+*  A       (input) COMPLEX array, dimension (LDA,M)
+*          The upper triangular matrix A.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,M).
+*
+*  B       (input) COMPLEX array, dimension (LDB,N)
+*          The upper triangular matrix B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B. LDB >= max(1,N).
+*
+*  C       (input/output) COMPLEX array, dimension (LDC,N)
+*          On entry, the M-by-N right hand side matrix C.
+*          On exit, C is overwritten by the solution matrix X.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C. LDC >= max(1,M)
+*
+*  SCALE   (output) REAL
+*          The scale factor, scale, set <= 1 to avoid overflow in X.
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*          = 1: A and B have common or very close eigenvalues; perturbed
+*               values were used to solve the equation (but the matrices
+*               A and B are unchanged).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE
+      PARAMETER          ( ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOTRNA, NOTRNB
+      INTEGER            J, K, L
+      REAL               BIGNUM, DA11, DB, EPS, SCALOC, SGN, SMIN,
+     $                   SMLNUM
+      COMPLEX            A11, SUML, SUMR, VEC, X11
+*     ..
+*     .. Local Arrays ..
+      REAL               DUM( 1 )
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               CLANGE, SLAMCH
+      COMPLEX            CDOTC, CDOTU, CLADIV
+      EXTERNAL           LSAME, CLANGE, SLAMCH, CDOTC, CDOTU, CLADIV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CSSCAL, SLABAD, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, AIMAG, CMPLX, CONJG, MAX, MIN, REAL
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode and Test input parameters
+*
+      NOTRNA = LSAME( TRANA, 'N' )
+      NOTRNB = LSAME( TRANB, 'N' )
+*
+      INFO = 0
+      IF( .NOT.NOTRNA .AND. .NOT.LSAME( TRANA, 'T' ) .AND. .NOT.
+     $    LSAME( TRANA, 'C' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.NOTRNB .AND. .NOT.LSAME( TRANB, 'T' ) .AND. .NOT.
+     $         LSAME( TRANB, 'C' ) ) THEN
+         INFO = -2
+      ELSE IF( ISGN.NE.1 .AND. ISGN.NE.-1 ) THEN
+         INFO = -3
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -7
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -9
+      ELSE IF( LDC.LT.MAX( 1, M ) ) THEN
+         INFO = -11
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CTRSYL', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 )
+     $   RETURN
+*
+*     Set constants to control overflow
+*
+      EPS = SLAMCH( 'P' )
+      SMLNUM = SLAMCH( 'S' )
+      BIGNUM = ONE / SMLNUM
+      CALL SLABAD( SMLNUM, BIGNUM )
+      SMLNUM = SMLNUM*REAL( M*N ) / EPS
+      BIGNUM = ONE / SMLNUM
+      SMIN = MAX( SMLNUM, EPS*CLANGE( 'M', M, M, A, LDA, DUM ),
+     $       EPS*CLANGE( 'M', N, N, B, LDB, DUM ) )
+      SCALE = ONE
+      SGN = ISGN
+*
+      IF( NOTRNA .AND. NOTRNB ) THEN
+*
+*        Solve    A*X + ISGN*X*B = scale*C.
+*
+*        The (K,L)th block of X is determined starting from
+*        bottom-left corner column by column by
+*
+*            A(K,K)*X(K,L) + ISGN*X(K,L)*B(L,L) = C(K,L) - R(K,L)
+*
+*        Where
+*                    M                        L-1
+*          R(K,L) = SUM [A(K,I)*X(I,L)] +ISGN*SUM [X(K,J)*B(J,L)].
+*                  I=K+1                      J=1
+*
+         DO 30 L = 1, N
+            DO 20 K = M, 1, -1
+*
+               SUML = CDOTU( M-K, A( K, MIN( K+1, M ) ), LDA,
+     $                C( MIN( K+1, M ), L ), 1 )
+               SUMR = CDOTU( L-1, C( K, 1 ), LDC, B( 1, L ), 1 )
+               VEC = C( K, L ) - ( SUML+SGN*SUMR )
+*
+               SCALOC = ONE
+               A11 = A( K, K ) + SGN*B( L, L )
+               DA11 = ABS( REAL( A11 ) ) + ABS( AIMAG( A11 ) )
+               IF( DA11.LE.SMIN ) THEN
+                  A11 = SMIN
+                  DA11 = SMIN
+                  INFO = 1
+               END IF
+               DB = ABS( REAL( VEC ) ) + ABS( AIMAG( VEC ) )
+               IF( DA11.LT.ONE .AND. DB.GT.ONE ) THEN
+                  IF( DB.GT.BIGNUM*DA11 )
+     $               SCALOC = ONE / DB
+               END IF
+               X11 = CLADIV( VEC*CMPLX( SCALOC ), A11 )
+*
+               IF( SCALOC.NE.ONE ) THEN
+                  DO 10 J = 1, N
+                     CALL CSSCAL( M, SCALOC, C( 1, J ), 1 )
+   10             CONTINUE
+                  SCALE = SCALE*SCALOC
+               END IF
+               C( K, L ) = X11
+*
+   20       CONTINUE
+   30    CONTINUE
+*
+      ELSE IF( .NOT.NOTRNA .AND. NOTRNB ) THEN
+*
+*        Solve    A' *X + ISGN*X*B = scale*C.
+*
+*        The (K,L)th block of X is determined starting from
+*        upper-left corner column by column by
+*
+*            A'(K,K)*X(K,L) + ISGN*X(K,L)*B(L,L) = C(K,L) - R(K,L)
+*
+*        Where
+*                   K-1                         L-1
+*          R(K,L) = SUM [A'(I,K)*X(I,L)] + ISGN*SUM [X(K,J)*B(J,L)]
+*                   I=1                         J=1
+*
+         DO 60 L = 1, N
+            DO 50 K = 1, M
+*
+               SUML = CDOTC( K-1, A( 1, K ), 1, C( 1, L ), 1 )
+               SUMR = CDOTU( L-1, C( K, 1 ), LDC, B( 1, L ), 1 )
+               VEC = C( K, L ) - ( SUML+SGN*SUMR )
+*
+               SCALOC = ONE
+               A11 = CONJG( A( K, K ) ) + SGN*B( L, L )
+               DA11 = ABS( REAL( A11 ) ) + ABS( AIMAG( A11 ) )
+               IF( DA11.LE.SMIN ) THEN
+                  A11 = SMIN
+                  DA11 = SMIN
+                  INFO = 1
+               END IF
+               DB = ABS( REAL( VEC ) ) + ABS( AIMAG( VEC ) )
+               IF( DA11.LT.ONE .AND. DB.GT.ONE ) THEN
+                  IF( DB.GT.BIGNUM*DA11 )
+     $               SCALOC = ONE / DB
+               END IF
+*
+               X11 = CLADIV( VEC*CMPLX( SCALOC ), A11 )
+*
+               IF( SCALOC.NE.ONE ) THEN
+                  DO 40 J = 1, N
+                     CALL CSSCAL( M, SCALOC, C( 1, J ), 1 )
+   40             CONTINUE
+                  SCALE = SCALE*SCALOC
+               END IF
+               C( K, L ) = X11
+*
+   50       CONTINUE
+   60    CONTINUE
+*
+      ELSE IF( .NOT.NOTRNA .AND. .NOT.NOTRNB ) THEN
+*
+*        Solve    A'*X + ISGN*X*B' = C.
+*
+*        The (K,L)th block of X is determined starting from
+*        upper-right corner column by column by
+*
+*            A'(K,K)*X(K,L) + ISGN*X(K,L)*B'(L,L) = C(K,L) - R(K,L)
+*
+*        Where
+*                    K-1
+*           R(K,L) = SUM [A'(I,K)*X(I,L)] +
+*                    I=1
+*                           N
+*                     ISGN*SUM [X(K,J)*B'(L,J)].
+*                          J=L+1
+*
+         DO 90 L = N, 1, -1
+            DO 80 K = 1, M
+*
+               SUML = CDOTC( K-1, A( 1, K ), 1, C( 1, L ), 1 )
+               SUMR = CDOTC( N-L, C( K, MIN( L+1, N ) ), LDC,
+     $                B( L, MIN( L+1, N ) ), LDB )
+               VEC = C( K, L ) - ( SUML+SGN*CONJG( SUMR ) )
+*
+               SCALOC = ONE
+               A11 = CONJG( A( K, K )+SGN*B( L, L ) )
+               DA11 = ABS( REAL( A11 ) ) + ABS( AIMAG( A11 ) )
+               IF( DA11.LE.SMIN ) THEN
+                  A11 = SMIN
+                  DA11 = SMIN
+                  INFO = 1
+               END IF
+               DB = ABS( REAL( VEC ) ) + ABS( AIMAG( VEC ) )
+               IF( DA11.LT.ONE .AND. DB.GT.ONE ) THEN
+                  IF( DB.GT.BIGNUM*DA11 )
+     $               SCALOC = ONE / DB
+               END IF
+*
+               X11 = CLADIV( VEC*CMPLX( SCALOC ), A11 )
+*
+               IF( SCALOC.NE.ONE ) THEN
+                  DO 70 J = 1, N
+                     CALL CSSCAL( M, SCALOC, C( 1, J ), 1 )
+   70             CONTINUE
+                  SCALE = SCALE*SCALOC
+               END IF
+               C( K, L ) = X11
+*
+   80       CONTINUE
+   90    CONTINUE
+*
+      ELSE IF( NOTRNA .AND. .NOT.NOTRNB ) THEN
+*
+*        Solve    A*X + ISGN*X*B' = C.
+*
+*        The (K,L)th block of X is determined starting from
+*        bottom-left corner column by column by
+*
+*           A(K,K)*X(K,L) + ISGN*X(K,L)*B'(L,L) = C(K,L) - R(K,L)
+*
+*        Where
+*                    M                          N
+*          R(K,L) = SUM [A(K,I)*X(I,L)] + ISGN*SUM [X(K,J)*B'(L,J)]
+*                  I=K+1                      J=L+1
+*
+         DO 120 L = N, 1, -1
+            DO 110 K = M, 1, -1
+*
+               SUML = CDOTU( M-K, A( K, MIN( K+1, M ) ), LDA,
+     $                C( MIN( K+1, M ), L ), 1 )
+               SUMR = CDOTC( N-L, C( K, MIN( L+1, N ) ), LDC,
+     $                B( L, MIN( L+1, N ) ), LDB )
+               VEC = C( K, L ) - ( SUML+SGN*CONJG( SUMR ) )
+*
+               SCALOC = ONE
+               A11 = A( K, K ) + SGN*CONJG( B( L, L ) )
+               DA11 = ABS( REAL( A11 ) ) + ABS( AIMAG( A11 ) )
+               IF( DA11.LE.SMIN ) THEN
+                  A11 = SMIN
+                  DA11 = SMIN
+                  INFO = 1
+               END IF
+               DB = ABS( REAL( VEC ) ) + ABS( AIMAG( VEC ) )
+               IF( DA11.LT.ONE .AND. DB.GT.ONE ) THEN
+                  IF( DB.GT.BIGNUM*DA11 )
+     $               SCALOC = ONE / DB
+               END IF
+*
+               X11 = CLADIV( VEC*CMPLX( SCALOC ), A11 )
+*
+               IF( SCALOC.NE.ONE ) THEN
+                  DO 100 J = 1, N
+                     CALL CSSCAL( M, SCALOC, C( 1, J ), 1 )
+  100             CONTINUE
+                  SCALE = SCALE*SCALOC
+               END IF
+               C( K, L ) = X11
+*
+  110       CONTINUE
+  120    CONTINUE
+*
+      END IF
+*
+      RETURN
+*
+*     End of CTRSYL
+*
+      END
diff --git a/costa/native/external/lapack/ctrti2.f b/costa/native/external/lapack/ctrti2.f
new file mode 100644
index 000000000..a8a379bfe
--- /dev/null
+++ b/costa/native/external/lapack/ctrti2.f
@@ -0,0 +1,147 @@
+      SUBROUTINE CTRTI2( UPLO, DIAG, N, A, LDA, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIAG, UPLO
+      INTEGER            INFO, LDA, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CTRTI2 computes the inverse of a complex upper or lower triangular
+*  matrix.
+*
+*  This is the Level 2 BLAS version of the algorithm.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the matrix A is upper or lower triangular.
+*          = 'U':  Upper triangular
+*          = 'L':  Lower triangular
+*
+*  DIAG    (input) CHARACTER*1
+*          Specifies whether or not the matrix A is unit triangular.
+*          = 'N':  Non-unit triangular
+*          = 'U':  Unit triangular
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,N)
+*          On entry, the triangular matrix A.  If UPLO = 'U', the
+*          leading n by n upper triangular part of the array A contains
+*          the upper triangular matrix, and the strictly lower
+*          triangular part of A is not referenced.  If UPLO = 'L', the
+*          leading n by n lower triangular part of the array A contains
+*          the lower triangular matrix, and the strictly upper
+*          triangular part of A is not referenced.  If DIAG = 'U', the
+*          diagonal elements of A are also not referenced and are
+*          assumed to be 1.
+*
+*          On exit, the (triangular) inverse of the original matrix, in
+*          the same storage format.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -k, the k-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX            ONE
+      PARAMETER          ( ONE = ( 1.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOUNIT, UPPER
+      INTEGER            J
+      COMPLEX            AJJ
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CSCAL, CTRMV, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      NOUNIT = LSAME( DIAG, 'N' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CTRTI2', -INFO )
+         RETURN
+      END IF
+*
+      IF( UPPER ) THEN
+*
+*        Compute inverse of upper triangular matrix.
+*
+         DO 10 J = 1, N
+            IF( NOUNIT ) THEN
+               A( J, J ) = ONE / A( J, J )
+               AJJ = -A( J, J )
+            ELSE
+               AJJ = -ONE
+            END IF
+*
+*           Compute elements 1:j-1 of j-th column.
+*
+            CALL CTRMV( 'Upper', 'No transpose', DIAG, J-1, A, LDA,
+     $                  A( 1, J ), 1 )
+            CALL CSCAL( J-1, AJJ, A( 1, J ), 1 )
+   10    CONTINUE
+      ELSE
+*
+*        Compute inverse of lower triangular matrix.
+*
+         DO 20 J = N, 1, -1
+            IF( NOUNIT ) THEN
+               A( J, J ) = ONE / A( J, J )
+               AJJ = -A( J, J )
+            ELSE
+               AJJ = -ONE
+            END IF
+            IF( J.LT.N ) THEN
+*
+*              Compute elements j+1:n of j-th column.
+*
+               CALL CTRMV( 'Lower', 'No transpose', DIAG, N-J,
+     $                     A( J+1, J+1 ), LDA, A( J+1, J ), 1 )
+               CALL CSCAL( N-J, AJJ, A( J+1, J ), 1 )
+            END IF
+   20    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of CTRTI2
+*
+      END
diff --git a/costa/native/external/lapack/ctrtri.f b/costa/native/external/lapack/ctrtri.f
new file mode 100644
index 000000000..ffd79b870
--- /dev/null
+++ b/costa/native/external/lapack/ctrtri.f
@@ -0,0 +1,178 @@
+      SUBROUTINE CTRTRI( UPLO, DIAG, N, A, LDA, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIAG, UPLO
+      INTEGER            INFO, LDA, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CTRTRI computes the inverse of a complex upper or lower triangular
+*  matrix A.
+*
+*  This is the Level 3 BLAS version of the algorithm.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  A is upper triangular;
+*          = 'L':  A is lower triangular.
+*
+*  DIAG    (input) CHARACTER*1
+*          = 'N':  A is non-unit triangular;
+*          = 'U':  A is unit triangular.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,N)
+*          On entry, the triangular matrix A.  If UPLO = 'U', the
+*          leading N-by-N upper triangular part of the array A contains
+*          the upper triangular matrix, and the strictly lower
+*          triangular part of A is not referenced.  If UPLO = 'L', the
+*          leading N-by-N lower triangular part of the array A contains
+*          the lower triangular matrix, and the strictly upper
+*          triangular part of A is not referenced.  If DIAG = 'U', the
+*          diagonal elements of A are also not referenced and are
+*          assumed to be 1.
+*          On exit, the (triangular) inverse of the original matrix, in
+*          the same storage format.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*          > 0: if INFO = i, A(i,i) is exactly zero.  The triangular
+*               matrix is singular and its inverse can not be computed.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX            ONE, ZERO
+      PARAMETER          ( ONE = ( 1.0E+0, 0.0E+0 ),
+     $                   ZERO = ( 0.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOUNIT, UPPER
+      INTEGER            J, JB, NB, NN
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           LSAME, ILAENV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CTRMM, CTRSM, CTRTI2, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      NOUNIT = LSAME( DIAG, 'N' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CTRTRI', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Check for singularity if non-unit.
+*
+      IF( NOUNIT ) THEN
+         DO 10 INFO = 1, N
+            IF( A( INFO, INFO ).EQ.ZERO )
+     $         RETURN
+   10    CONTINUE
+         INFO = 0
+      END IF
+*
+*     Determine the block size for this environment.
+*
+      NB = ILAENV( 1, 'CTRTRI', UPLO // DIAG, N, -1, -1, -1 )
+      IF( NB.LE.1 .OR. NB.GE.N ) THEN
+*
+*        Use unblocked code
+*
+         CALL CTRTI2( UPLO, DIAG, N, A, LDA, INFO )
+      ELSE
+*
+*        Use blocked code
+*
+         IF( UPPER ) THEN
+*
+*           Compute inverse of upper triangular matrix
+*
+            DO 20 J = 1, N, NB
+               JB = MIN( NB, N-J+1 )
+*
+*              Compute rows 1:j-1 of current block column
+*
+               CALL CTRMM( 'Left', 'Upper', 'No transpose', DIAG, J-1,
+     $                     JB, ONE, A, LDA, A( 1, J ), LDA )
+               CALL CTRSM( 'Right', 'Upper', 'No transpose', DIAG, J-1,
+     $                     JB, -ONE, A( J, J ), LDA, A( 1, J ), LDA )
+*
+*              Compute inverse of current diagonal block
+*
+               CALL CTRTI2( 'Upper', DIAG, JB, A( J, J ), LDA, INFO )
+   20       CONTINUE
+         ELSE
+*
+*           Compute inverse of lower triangular matrix
+*
+            NN = ( ( N-1 ) / NB )*NB + 1
+            DO 30 J = NN, 1, -NB
+               JB = MIN( NB, N-J+1 )
+               IF( J+JB.LE.N ) THEN
+*
+*                 Compute rows j+jb:n of current block column
+*
+                  CALL CTRMM( 'Left', 'Lower', 'No transpose', DIAG,
+     $                        N-J-JB+1, JB, ONE, A( J+JB, J+JB ), LDA,
+     $                        A( J+JB, J ), LDA )
+                  CALL CTRSM( 'Right', 'Lower', 'No transpose', DIAG,
+     $                        N-J-JB+1, JB, -ONE, A( J, J ), LDA,
+     $                        A( J+JB, J ), LDA )
+               END IF
+*
+*              Compute inverse of current diagonal block
+*
+               CALL CTRTI2( 'Lower', DIAG, JB, A( J, J ), LDA, INFO )
+   30       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of CTRTRI
+*
+      END
diff --git a/costa/native/external/lapack/ctrtrs.f b/costa/native/external/lapack/ctrtrs.f
new file mode 100644
index 000000000..9ec56c4ab
--- /dev/null
+++ b/costa/native/external/lapack/ctrtrs.f
@@ -0,0 +1,149 @@
+      SUBROUTINE CTRTRS( UPLO, TRANS, DIAG, N, NRHS, A, LDA, B, LDB,
+     $                   INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIAG, TRANS, UPLO
+      INTEGER            INFO, LDA, LDB, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CTRTRS solves a triangular system of the form
+*
+*     A * X = B,  A**T * X = B,  or  A**H * X = B,
+*
+*  where A is a triangular matrix of order N, and B is an N-by-NRHS
+*  matrix.  A check is made to verify that A is nonsingular.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  A is upper triangular;
+*          = 'L':  A is lower triangular.
+*
+*  TRANS   (input) CHARACTER*1
+*          Specifies the form of the system of equations:
+*          = 'N':  A * X = B     (No transpose)
+*          = 'T':  A**T * X = B  (Transpose)
+*          = 'C':  A**H * X = B  (Conjugate transpose)
+*
+*  DIAG    (input) CHARACTER*1
+*          = 'N':  A is non-unit triangular;
+*          = 'U':  A is unit triangular.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  A       (input) COMPLEX array, dimension (LDA,N)
+*          The triangular matrix A.  If UPLO = 'U', the leading N-by-N
+*          upper triangular part of the array A contains the upper
+*          triangular matrix, and the strictly lower triangular part of
+*          A is not referenced.  If UPLO = 'L', the leading N-by-N lower
+*          triangular part of the array A contains the lower triangular
+*          matrix, and the strictly upper triangular part of A is not
+*          referenced.  If DIAG = 'U', the diagonal elements of A are
+*          also not referenced and are assumed to be 1.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  B       (input/output) COMPLEX array, dimension (LDB,NRHS)
+*          On entry, the right hand side matrix B.
+*          On exit, if INFO = 0, the solution matrix X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*          > 0: if INFO = i, the i-th diagonal element of A is zero,
+*               indicating that the matrix is singular and the solutions
+*               X have not been computed.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX            ZERO, ONE
+      PARAMETER          ( ZERO = ( 0.0E+0, 0.0E+0 ),
+     $                   ONE = ( 1.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOUNIT
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CTRSM, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      NOUNIT = LSAME( DIAG, 'N' )
+      IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.LSAME( TRANS, 'N' ) .AND. .NOT.
+     $         LSAME( TRANS, 'T' ) .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -9
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CTRTRS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Check for singularity.
+*
+      IF( NOUNIT ) THEN
+         DO 10 INFO = 1, N
+            IF( A( INFO, INFO ).EQ.ZERO )
+     $         RETURN
+   10    CONTINUE
+      END IF
+      INFO = 0
+*
+*     Solve A * x = b,  A**T * x = b,  or  A**H * x = b.
+*
+      CALL CTRSM( 'Left', UPLO, TRANS, DIAG, N, NRHS, ONE, A, LDA, B,
+     $            LDB )
+*
+      RETURN
+*
+*     End of CTRTRS
+*
+      END
diff --git a/costa/native/external/lapack/ctzrqf.f b/costa/native/external/lapack/ctzrqf.f
new file mode 100644
index 000000000..1da25fbd4
--- /dev/null
+++ b/costa/native/external/lapack/ctzrqf.f
@@ -0,0 +1,174 @@
+      SUBROUTINE CTZRQF( M, N, A, LDA, TAU, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, M, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), TAU( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  This routine is deprecated and has been replaced by routine CTZRZF.
+*
+*  CTZRQF reduces the M-by-N ( M<=N ) complex upper trapezoidal matrix A
+*  to upper triangular form by means of unitary transformations.
+*
+*  The upper trapezoidal matrix A is factored as
+*
+*     A = ( R  0 ) * Z,
+*
+*  where Z is an N-by-N unitary matrix and R is an M-by-M upper
+*  triangular matrix.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= M.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,N)
+*          On entry, the leading M-by-N upper trapezoidal part of the
+*          array A must contain the matrix to be factorized.
+*          On exit, the leading M-by-M upper triangular part of A
+*          contains the upper triangular matrix R, and elements M+1 to
+*          N of the first M rows of A, with the array TAU, represent the
+*          unitary matrix Z as a product of M elementary reflectors.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  TAU     (output) COMPLEX array, dimension (M)
+*          The scalar factors of the elementary reflectors.
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*
+*  Further Details
+*  ===============
+*
+*  The  factorization is obtained by Householder's method.  The kth
+*  transformation matrix, Z( k ), whose conjugate transpose is used to
+*  introduce zeros into the (m - k + 1)th row of A, is given in the form
+*
+*     Z( k ) = ( I     0   ),
+*              ( 0  T( k ) )
+*
+*  where
+*
+*     T( k ) = I - tau*u( k )*u( k )',   u( k ) = (   1    ),
+*                                                 (   0    )
+*                                                 ( z( k ) )
+*
+*  tau is a scalar and z( k ) is an ( n - m ) element vector.
+*  tau and z( k ) are chosen to annihilate the elements of the kth row
+*  of X.
+*
+*  The scalar tau is returned in the kth element of TAU and the vector
+*  u( k ) in the kth row of A, such that the elements of z( k ) are
+*  in  a( k, m + 1 ), ..., a( k, n ). The elements of R are returned in
+*  the upper triangular part of A.
+*
+*  Z is given by
+*
+*     Z =  Z( 1 ) * Z( 2 ) * ... * Z( m ).
+*
+* =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX            CONE, CZERO
+      PARAMETER          ( CONE = ( 1.0E+0, 0.0E+0 ),
+     $                   CZERO = ( 0.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, K, M1
+      COMPLEX            ALPHA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          CONJG, MAX, MIN
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CAXPY, CCOPY, CGEMV, CGERC, CLACGV, CLARFG,
+     $                   XERBLA
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.M ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CTZRQF', -INFO )
+         RETURN
+      END IF
+*
+*     Perform the factorization.
+*
+      IF( M.EQ.0 )
+     $   RETURN
+      IF( M.EQ.N ) THEN
+         DO 10 I = 1, N
+            TAU( I ) = CZERO
+   10    CONTINUE
+      ELSE
+         M1 = MIN( M+1, N )
+         DO 20 K = M, 1, -1
+*
+*           Use a Householder reflection to zero the kth row of A.
+*           First set up the reflection.
+*
+            A( K, K ) = CONJG( A( K, K ) )
+            CALL CLACGV( N-M, A( K, M1 ), LDA )
+            ALPHA = A( K, K )
+            CALL CLARFG( N-M+1, ALPHA, A( K, M1 ), LDA, TAU( K ) )
+            A( K, K ) = ALPHA
+            TAU( K ) = CONJG( TAU( K ) )
+*
+            IF( TAU( K ).NE.CZERO .AND. K.GT.1 ) THEN
+*
+*              We now perform the operation  A := A*P( k )'.
+*
+*              Use the first ( k - 1 ) elements of TAU to store  a( k ),
+*              where  a( k ) consists of the first ( k - 1 ) elements of
+*              the  kth column  of  A.  Also  let  B  denote  the  first
+*              ( k - 1 ) rows of the last ( n - m ) columns of A.
+*
+               CALL CCOPY( K-1, A( 1, K ), 1, TAU, 1 )
+*
+*              Form   w = a( k ) + B*z( k )  in TAU.
+*
+               CALL CGEMV( 'No transpose', K-1, N-M, CONE, A( 1, M1 ),
+     $                     LDA, A( K, M1 ), LDA, CONE, TAU, 1 )
+*
+*              Now form  a( k ) := a( k ) - conjg(tau)*w
+*              and       B      := B      - conjg(tau)*w*z( k )'.
+*
+               CALL CAXPY( K-1, -CONJG( TAU( K ) ), TAU, 1, A( 1, K ),
+     $                     1 )
+               CALL CGERC( K-1, N-M, -CONJG( TAU( K ) ), TAU, 1,
+     $                     A( K, M1 ), LDA, A( 1, M1 ), LDA )
+            END IF
+   20    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of CTZRQF
+*
+      END
diff --git a/costa/native/external/lapack/ctzrzf.f b/costa/native/external/lapack/ctzrzf.f
new file mode 100644
index 000000000..f380dfba1
--- /dev/null
+++ b/costa/native/external/lapack/ctzrzf.f
@@ -0,0 +1,241 @@
+      SUBROUTINE CTZRZF( M, N, A, LDA, TAU, WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, LWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CTZRZF reduces the M-by-N ( M<=N ) complex upper trapezoidal matrix A
+*  to upper triangular form by means of unitary transformations.
+*
+*  The upper trapezoidal matrix A is factored as
+*
+*     A = ( R  0 ) * Z,
+*
+*  where Z is an N-by-N unitary matrix and R is an M-by-M upper
+*  triangular matrix.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,N)
+*          On entry, the leading M-by-N upper trapezoidal part of the
+*          array A must contain the matrix to be factorized.
+*          On exit, the leading M-by-M upper triangular part of A
+*          contains the upper triangular matrix R, and elements M+1 to
+*          N of the first M rows of A, with the array TAU, represent the
+*          unitary matrix Z as a product of M elementary reflectors.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  TAU     (output) COMPLEX array, dimension (M)
+*          The scalar factors of the elementary reflectors.
+*
+*  WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.  LWORK >= max(1,M).
+*          For optimum performance LWORK >= M*NB, where NB is
+*          the optimal blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*    A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA
+*
+*  The factorization is obtained by Householder's method.  The kth
+*  transformation matrix, Z( k ), which is used to introduce zeros into
+*  the ( m - k + 1 )th row of A, is given in the form
+*
+*     Z( k ) = ( I     0   ),
+*              ( 0  T( k ) )
+*
+*  where
+*
+*     T( k ) = I - tau*u( k )*u( k )',   u( k ) = (   1    ),
+*                                                 (   0    )
+*                                                 ( z( k ) )
+*
+*  tau is a scalar and z( k ) is an ( n - m ) element vector.
+*  tau and z( k ) are chosen to annihilate the elements of the kth row
+*  of X.
+*
+*  The scalar tau is returned in the kth element of TAU and the vector
+*  u( k ) in the kth row of A, such that the elements of z( k ) are
+*  in  a( k, m + 1 ), ..., a( k, n ). The elements of R are returned in
+*  the upper triangular part of A.
+*
+*  Z is given by
+*
+*     Z =  Z( 1 ) * Z( 2 ) * ... * Z( m ).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX            ZERO
+      PARAMETER          ( ZERO = ( 0.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            I, IB, IWS, KI, KK, LDWORK, LWKOPT, M1, MU, NB,
+     $                   NBMIN, NX
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CLARZB, CLARZT, CLATRZ, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.M ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -4
+      ELSE IF( LWORK.LT.MAX( 1, M ) .AND. .NOT.LQUERY ) THEN
+         INFO = -7
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+*
+*        Determine the block size.
+*
+         NB = ILAENV( 1, 'CGERQF', ' ', M, N, -1, -1 )
+         LWKOPT = M*NB
+         WORK( 1 ) = LWKOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CTZRZF', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      ELSE IF( M.EQ.N ) THEN
+         DO 10 I = 1, N
+            TAU( I ) = ZERO
+   10    CONTINUE
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      NBMIN = 2
+      NX = 1
+      IWS = M
+      IF( NB.GT.1 .AND. NB.LT.M ) THEN
+*
+*        Determine when to cross over from blocked to unblocked code.
+*
+         NX = MAX( 0, ILAENV( 3, 'CGERQF', ' ', M, N, -1, -1 ) )
+         IF( NX.LT.M ) THEN
+*
+*           Determine if workspace is large enough for blocked code.
+*
+            LDWORK = M
+            IWS = LDWORK*NB
+            IF( LWORK.LT.IWS ) THEN
+*
+*              Not enough workspace to use optimal NB:  reduce NB and
+*              determine the minimum value of NB.
+*
+               NB = LWORK / LDWORK
+               NBMIN = MAX( 2, ILAENV( 2, 'CGERQF', ' ', M, N, -1,
+     $                 -1 ) )
+            END IF
+         END IF
+      END IF
+*
+      IF( NB.GE.NBMIN .AND. NB.LT.M .AND. NX.LT.M ) THEN
+*
+*        Use blocked code initially.
+*        The last kk rows are handled by the block method.
+*
+         M1 = MIN( M+1, N )
+         KI = ( ( M-NX-1 ) / NB )*NB
+         KK = MIN( M, KI+NB )
+*
+         DO 20 I = M - KK + KI + 1, M - KK + 1, -NB
+            IB = MIN( M-I+1, NB )
+*
+*           Compute the TZ factorization of the current block
+*           A(i:i+ib-1,i:n)
+*
+            CALL CLATRZ( IB, N-I+1, N-M, A( I, I ), LDA, TAU( I ),
+     $                   WORK )
+            IF( I.GT.1 ) THEN
+*
+*              Form the triangular factor of the block reflector
+*              H = H(i+ib-1) . . . H(i+1) H(i)
+*
+               CALL CLARZT( 'Backward', 'Rowwise', N-M, IB, A( I, M1 ),
+     $                      LDA, TAU( I ), WORK, LDWORK )
+*
+*              Apply H to A(1:i-1,i:n) from the right
+*
+               CALL CLARZB( 'Right', 'No transpose', 'Backward',
+     $                      'Rowwise', I-1, N-I+1, IB, N-M, A( I, M1 ),
+     $                      LDA, WORK, LDWORK, A( 1, I ), LDA,
+     $                      WORK( IB+1 ), LDWORK )
+            END IF
+   20    CONTINUE
+         MU = I + NB - 1
+      ELSE
+         MU = M
+      END IF
+*
+*     Use unblocked code to factor the last or only block
+*
+      IF( MU.GT.0 )
+     $   CALL CLATRZ( MU, N, N-M, A, LDA, TAU, WORK )
+*
+      WORK( 1 ) = LWKOPT
+*
+      RETURN
+*
+*     End of CTZRZF
+*
+      END
diff --git a/costa/native/external/lapack/cung2l.f b/costa/native/external/lapack/cung2l.f
new file mode 100644
index 000000000..bc8c56abd
--- /dev/null
+++ b/costa/native/external/lapack/cung2l.f
@@ -0,0 +1,129 @@
+      SUBROUTINE CUNG2L( M, N, K, A, LDA, TAU, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, K, LDA, M, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CUNG2L generates an m by n complex matrix Q with orthonormal columns,
+*  which is defined as the last n columns of a product of k elementary
+*  reflectors of order m
+*
+*        Q  =  H(k) . . . H(2) H(1)
+*
+*  as returned by CGEQLF.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix Q. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix Q. M >= N >= 0.
+*
+*  K       (input) INTEGER
+*          The number of elementary reflectors whose product defines the
+*          matrix Q. N >= K >= 0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,N)
+*          On entry, the (n-k+i)-th column must contain the vector which
+*          defines the elementary reflector H(i), for i = 1,2,...,k, as
+*          returned by CGEQLF in the last k columns of its array
+*          argument A.
+*          On exit, the m-by-n matrix Q.
+*
+*  LDA     (input) INTEGER
+*          The first dimension of the array A. LDA >= max(1,M).
+*
+*  TAU     (input) COMPLEX array, dimension (K)
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by CGEQLF.
+*
+*  WORK    (workspace) COMPLEX array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument has an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX            ONE, ZERO
+      PARAMETER          ( ONE = ( 1.0E+0, 0.0E+0 ),
+     $                   ZERO = ( 0.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, II, J, L
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CLARF, CSCAL, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 .OR. N.GT.M ) THEN
+         INFO = -2
+      ELSE IF( K.LT.0 .OR. K.GT.N ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -5
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CUNG2L', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.LE.0 )
+     $   RETURN
+*
+*     Initialise columns 1:n-k to columns of the unit matrix
+*
+      DO 20 J = 1, N - K
+         DO 10 L = 1, M
+            A( L, J ) = ZERO
+   10    CONTINUE
+         A( M-N+J, J ) = ONE
+   20 CONTINUE
+*
+      DO 40 I = 1, K
+         II = N - K + I
+*
+*        Apply H(i) to A(1:m-k+i,1:n-k+i) from the left
+*
+         A( M-N+II, II ) = ONE
+         CALL CLARF( 'Left', M-N+II, II-1, A( 1, II ), 1, TAU( I ), A,
+     $               LDA, WORK )
+         CALL CSCAL( M-N+II-1, -TAU( I ), A( 1, II ), 1 )
+         A( M-N+II, II ) = ONE - TAU( I )
+*
+*        Set A(m-k+i+1:m,n-k+i) to zero
+*
+         DO 30 L = M - N + II + 1, M
+            A( L, II ) = ZERO
+   30    CONTINUE
+   40 CONTINUE
+      RETURN
+*
+*     End of CUNG2L
+*
+      END
diff --git a/costa/native/external/lapack/cung2r.f b/costa/native/external/lapack/cung2r.f
new file mode 100644
index 000000000..290c96d4c
--- /dev/null
+++ b/costa/native/external/lapack/cung2r.f
@@ -0,0 +1,131 @@
+      SUBROUTINE CUNG2R( M, N, K, A, LDA, TAU, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, K, LDA, M, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CUNG2R generates an m by n complex matrix Q with orthonormal columns,
+*  which is defined as the first n columns of a product of k elementary
+*  reflectors of order m
+*
+*        Q  =  H(1) H(2) . . . H(k)
+*
+*  as returned by CGEQRF.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix Q. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix Q. M >= N >= 0.
+*
+*  K       (input) INTEGER
+*          The number of elementary reflectors whose product defines the
+*          matrix Q. N >= K >= 0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,N)
+*          On entry, the i-th column must contain the vector which
+*          defines the elementary reflector H(i), for i = 1,2,...,k, as
+*          returned by CGEQRF in the first k columns of its array
+*          argument A.
+*          On exit, the m by n matrix Q.
+*
+*  LDA     (input) INTEGER
+*          The first dimension of the array A. LDA >= max(1,M).
+*
+*  TAU     (input) COMPLEX array, dimension (K)
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by CGEQRF.
+*
+*  WORK    (workspace) COMPLEX array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument has an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX            ONE, ZERO
+      PARAMETER          ( ONE = ( 1.0E+0, 0.0E+0 ),
+     $                   ZERO = ( 0.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J, L
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CLARF, CSCAL, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 .OR. N.GT.M ) THEN
+         INFO = -2
+      ELSE IF( K.LT.0 .OR. K.GT.N ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -5
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CUNG2R', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.LE.0 )
+     $   RETURN
+*
+*     Initialise columns k+1:n to columns of the unit matrix
+*
+      DO 20 J = K + 1, N
+         DO 10 L = 1, M
+            A( L, J ) = ZERO
+   10    CONTINUE
+         A( J, J ) = ONE
+   20 CONTINUE
+*
+      DO 40 I = K, 1, -1
+*
+*        Apply H(i) to A(i:m,i:n) from the left
+*
+         IF( I.LT.N ) THEN
+            A( I, I ) = ONE
+            CALL CLARF( 'Left', M-I+1, N-I, A( I, I ), 1, TAU( I ),
+     $                  A( I, I+1 ), LDA, WORK )
+         END IF
+         IF( I.LT.M )
+     $      CALL CSCAL( M-I, -TAU( I ), A( I+1, I ), 1 )
+         A( I, I ) = ONE - TAU( I )
+*
+*        Set A(1:i-1,i) to zero
+*
+         DO 30 L = 1, I - 1
+            A( L, I ) = ZERO
+   30    CONTINUE
+   40 CONTINUE
+      RETURN
+*
+*     End of CUNG2R
+*
+      END
diff --git a/costa/native/external/lapack/cungbr.f b/costa/native/external/lapack/cungbr.f
new file mode 100644
index 000000000..8894670e7
--- /dev/null
+++ b/costa/native/external/lapack/cungbr.f
@@ -0,0 +1,246 @@
+      SUBROUTINE CUNGBR( VECT, M, N, K, A, LDA, TAU, WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          VECT
+      INTEGER            INFO, K, LDA, LWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CUNGBR generates one of the complex unitary matrices Q or P**H
+*  determined by CGEBRD when reducing a complex matrix A to bidiagonal
+*  form: A = Q * B * P**H.  Q and P**H are defined as products of
+*  elementary reflectors H(i) or G(i) respectively.
+*
+*  If VECT = 'Q', A is assumed to have been an M-by-K matrix, and Q
+*  is of order M:
+*  if m >= k, Q = H(1) H(2) . . . H(k) and CUNGBR returns the first n
+*  columns of Q, where m >= n >= k;
+*  if m < k, Q = H(1) H(2) . . . H(m-1) and CUNGBR returns Q as an
+*  M-by-M matrix.
+*
+*  If VECT = 'P', A is assumed to have been a K-by-N matrix, and P**H
+*  is of order N:
+*  if k < n, P**H = G(k) . . . G(2) G(1) and CUNGBR returns the first m
+*  rows of P**H, where n >= m >= k;
+*  if k >= n, P**H = G(n-1) . . . G(2) G(1) and CUNGBR returns P**H as
+*  an N-by-N matrix.
+*
+*  Arguments
+*  =========
+*
+*  VECT    (input) CHARACTER*1
+*          Specifies whether the matrix Q or the matrix P**H is
+*          required, as defined in the transformation applied by CGEBRD:
+*          = 'Q':  generate Q;
+*          = 'P':  generate P**H.
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix Q or P**H to be returned.
+*          M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix Q or P**H to be returned.
+*          N >= 0.
+*          If VECT = 'Q', M >= N >= min(M,K);
+*          if VECT = 'P', N >= M >= min(N,K).
+*
+*  K       (input) INTEGER
+*          If VECT = 'Q', the number of columns in the original M-by-K
+*          matrix reduced by CGEBRD.
+*          If VECT = 'P', the number of rows in the original K-by-N
+*          matrix reduced by CGEBRD.
+*          K >= 0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,N)
+*          On entry, the vectors which define the elementary reflectors,
+*          as returned by CGEBRD.
+*          On exit, the M-by-N matrix Q or P**H.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= M.
+*
+*  TAU     (input) COMPLEX array, dimension
+*                                (min(M,K)) if VECT = 'Q'
+*                                (min(N,K)) if VECT = 'P'
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i) or G(i), which determines Q or P**H, as
+*          returned by CGEBRD in its array argument TAUQ or TAUP.
+*
+*  WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK. LWORK >= max(1,min(M,N)).
+*          For optimum performance LWORK >= min(M,N)*NB, where NB
+*          is the optimal blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX            ZERO, ONE
+      PARAMETER          ( ZERO = ( 0.0E+0, 0.0E+0 ),
+     $                   ONE = ( 1.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY, WANTQ
+      INTEGER            I, IINFO, J, LWKOPT, MN, NB
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV, LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CUNGLQ, CUNGQR, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      WANTQ = LSAME( VECT, 'Q' )
+      MN = MIN( M, N )
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( .NOT.WANTQ .AND. .NOT.LSAME( VECT, 'P' ) ) THEN
+         INFO = -1
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 .OR. ( WANTQ .AND. ( N.GT.M .OR. N.LT.MIN( M,
+     $         K ) ) ) .OR. ( .NOT.WANTQ .AND. ( M.GT.N .OR. M.LT.
+     $         MIN( N, K ) ) ) ) THEN
+         INFO = -3
+      ELSE IF( K.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -6
+      ELSE IF( LWORK.LT.MAX( 1, MN ) .AND. .NOT.LQUERY ) THEN
+         INFO = -9
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         IF( WANTQ ) THEN
+            NB = ILAENV( 1, 'CUNGQR', ' ', M, N, K, -1 )
+         ELSE
+            NB = ILAENV( 1, 'CUNGLQ', ' ', M, N, K, -1 )
+         END IF
+         LWKOPT = MAX( 1, MN )*NB
+         WORK( 1 ) = LWKOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CUNGBR', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      IF( WANTQ ) THEN
+*
+*        Form Q, determined by a call to CGEBRD to reduce an m-by-k
+*        matrix
+*
+         IF( M.GE.K ) THEN
+*
+*           If m >= k, assume m >= n >= k
+*
+            CALL CUNGQR( M, N, K, A, LDA, TAU, WORK, LWORK, IINFO )
+*
+         ELSE
+*
+*           If m < k, assume m = n
+*
+*           Shift the vectors which define the elementary reflectors one
+*           column to the right, and set the first row and column of Q
+*           to those of the unit matrix
+*
+            DO 20 J = M, 2, -1
+               A( 1, J ) = ZERO
+               DO 10 I = J + 1, M
+                  A( I, J ) = A( I, J-1 )
+   10          CONTINUE
+   20       CONTINUE
+            A( 1, 1 ) = ONE
+            DO 30 I = 2, M
+               A( I, 1 ) = ZERO
+   30       CONTINUE
+            IF( M.GT.1 ) THEN
+*
+*              Form Q(2:m,2:m)
+*
+               CALL CUNGQR( M-1, M-1, M-1, A( 2, 2 ), LDA, TAU, WORK,
+     $                      LWORK, IINFO )
+            END IF
+         END IF
+      ELSE
+*
+*        Form P', determined by a call to CGEBRD to reduce a k-by-n
+*        matrix
+*
+         IF( K.LT.N ) THEN
+*
+*           If k < n, assume k <= m <= n
+*
+            CALL CUNGLQ( M, N, K, A, LDA, TAU, WORK, LWORK, IINFO )
+*
+         ELSE
+*
+*           If k >= n, assume m = n
+*
+*           Shift the vectors which define the elementary reflectors one
+*           row downward, and set the first row and column of P' to
+*           those of the unit matrix
+*
+            A( 1, 1 ) = ONE
+            DO 40 I = 2, N
+               A( I, 1 ) = ZERO
+   40       CONTINUE
+            DO 60 J = 2, N
+               DO 50 I = J - 1, 2, -1
+                  A( I, J ) = A( I-1, J )
+   50          CONTINUE
+               A( 1, J ) = ZERO
+   60       CONTINUE
+            IF( N.GT.1 ) THEN
+*
+*              Form P'(2:n,2:n)
+*
+               CALL CUNGLQ( N-1, N-1, N-1, A( 2, 2 ), LDA, TAU, WORK,
+     $                      LWORK, IINFO )
+            END IF
+         END IF
+      END IF
+      WORK( 1 ) = LWKOPT
+      RETURN
+*
+*     End of CUNGBR
+*
+      END
diff --git a/costa/native/external/lapack/cunghr.f b/costa/native/external/lapack/cunghr.f
new file mode 100644
index 000000000..704acdc25
--- /dev/null
+++ b/costa/native/external/lapack/cunghr.f
@@ -0,0 +1,166 @@
+      SUBROUTINE CUNGHR( N, ILO, IHI, A, LDA, TAU, WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            IHI, ILO, INFO, LDA, LWORK, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CUNGHR generates a complex unitary matrix Q which is defined as the
+*  product of IHI-ILO elementary reflectors of order N, as returned by
+*  CGEHRD:
+*
+*  Q = H(ilo) H(ilo+1) . . . H(ihi-1).
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The order of the matrix Q. N >= 0.
+*
+*  ILO     (input) INTEGER
+*  IHI     (input) INTEGER
+*          ILO and IHI must have the same values as in the previous call
+*          of CGEHRD. Q is equal to the unit matrix except in the
+*          submatrix Q(ilo+1:ihi,ilo+1:ihi).
+*          1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,N)
+*          On entry, the vectors which define the elementary reflectors,
+*          as returned by CGEHRD.
+*          On exit, the N-by-N unitary matrix Q.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,N).
+*
+*  TAU     (input) COMPLEX array, dimension (N-1)
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by CGEHRD.
+*
+*  WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK. LWORK >= IHI-ILO.
+*          For optimum performance LWORK >= (IHI-ILO)*NB, where NB is
+*          the optimal blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX            ZERO, ONE
+      PARAMETER          ( ZERO = ( 0.0E+0, 0.0E+0 ),
+     $                   ONE = ( 1.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            I, IINFO, J, LWKOPT, NB, NH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CUNGQR, XERBLA
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      NH = IHI - ILO
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( N.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, N ) ) THEN
+         INFO = -2
+      ELSE IF( IHI.LT.MIN( ILO, N ) .OR. IHI.GT.N ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( LWORK.LT.MAX( 1, NH ) .AND. .NOT.LQUERY ) THEN
+         INFO = -8
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         NB = ILAENV( 1, 'CUNGQR', ' ', NH, NH, NH, -1 )
+         LWKOPT = MAX( 1, NH )*NB
+         WORK( 1 ) = LWKOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CUNGHR', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+*     Shift the vectors which define the elementary reflectors one
+*     column to the right, and set the first ilo and the last n-ihi
+*     rows and columns to those of the unit matrix
+*
+      DO 40 J = IHI, ILO + 1, -1
+         DO 10 I = 1, J - 1
+            A( I, J ) = ZERO
+   10    CONTINUE
+         DO 20 I = J + 1, IHI
+            A( I, J ) = A( I, J-1 )
+   20    CONTINUE
+         DO 30 I = IHI + 1, N
+            A( I, J ) = ZERO
+   30    CONTINUE
+   40 CONTINUE
+      DO 60 J = 1, ILO
+         DO 50 I = 1, N
+            A( I, J ) = ZERO
+   50    CONTINUE
+         A( J, J ) = ONE
+   60 CONTINUE
+      DO 80 J = IHI + 1, N
+         DO 70 I = 1, N
+            A( I, J ) = ZERO
+   70    CONTINUE
+         A( J, J ) = ONE
+   80 CONTINUE
+*
+      IF( NH.GT.0 ) THEN
+*
+*        Generate Q(ilo+1:ihi,ilo+1:ihi)
+*
+         CALL CUNGQR( NH, NH, NH, A( ILO+1, ILO+1 ), LDA, TAU( ILO ),
+     $                WORK, LWORK, IINFO )
+      END IF
+      WORK( 1 ) = LWKOPT
+      RETURN
+*
+*     End of CUNGHR
+*
+      END
diff --git a/costa/native/external/lapack/cungl2.f b/costa/native/external/lapack/cungl2.f
new file mode 100644
index 000000000..68d0f7a73
--- /dev/null
+++ b/costa/native/external/lapack/cungl2.f
@@ -0,0 +1,137 @@
+      SUBROUTINE CUNGL2( M, N, K, A, LDA, TAU, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, K, LDA, M, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CUNGL2 generates an m-by-n complex matrix Q with orthonormal rows,
+*  which is defined as the first m rows of a product of k elementary
+*  reflectors of order n
+*
+*        Q  =  H(k)' . . . H(2)' H(1)'
+*
+*  as returned by CGELQF.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix Q. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix Q. N >= M.
+*
+*  K       (input) INTEGER
+*          The number of elementary reflectors whose product defines the
+*          matrix Q. M >= K >= 0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,N)
+*          On entry, the i-th row must contain the vector which defines
+*          the elementary reflector H(i), for i = 1,2,...,k, as returned
+*          by CGELQF in the first k rows of its array argument A.
+*          On exit, the m by n matrix Q.
+*
+*  LDA     (input) INTEGER
+*          The first dimension of the array A. LDA >= max(1,M).
+*
+*  TAU     (input) COMPLEX array, dimension (K)
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by CGELQF.
+*
+*  WORK    (workspace) COMPLEX array, dimension (M)
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument has an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX            ONE, ZERO
+      PARAMETER          ( ONE = ( 1.0E+0, 0.0E+0 ),
+     $                   ZERO = ( 0.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J, L
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CLACGV, CLARF, CSCAL, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          CONJG, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.M ) THEN
+         INFO = -2
+      ELSE IF( K.LT.0 .OR. K.GT.M ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -5
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CUNGL2', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.LE.0 )
+     $   RETURN
+*
+      IF( K.LT.M ) THEN
+*
+*        Initialise rows k+1:m to rows of the unit matrix
+*
+         DO 20 J = 1, N
+            DO 10 L = K + 1, M
+               A( L, J ) = ZERO
+   10       CONTINUE
+            IF( J.GT.K .AND. J.LE.M )
+     $         A( J, J ) = ONE
+   20    CONTINUE
+      END IF
+*
+      DO 40 I = K, 1, -1
+*
+*        Apply H(i)' to A(i:m,i:n) from the right
+*
+         IF( I.LT.N ) THEN
+            CALL CLACGV( N-I, A( I, I+1 ), LDA )
+            IF( I.LT.M ) THEN
+               A( I, I ) = ONE
+               CALL CLARF( 'Right', M-I, N-I+1, A( I, I ), LDA,
+     $                     CONJG( TAU( I ) ), A( I+1, I ), LDA, WORK )
+            END IF
+            CALL CSCAL( N-I, -TAU( I ), A( I, I+1 ), LDA )
+            CALL CLACGV( N-I, A( I, I+1 ), LDA )
+         END IF
+         A( I, I ) = ONE - CONJG( TAU( I ) )
+*
+*        Set A(i,1:i-1,i) to zero
+*
+         DO 30 L = 1, I - 1
+            A( I, L ) = ZERO
+   30    CONTINUE
+   40 CONTINUE
+      RETURN
+*
+*     End of CUNGL2
+*
+      END
diff --git a/costa/native/external/lapack/cunglq.f b/costa/native/external/lapack/cunglq.f
new file mode 100644
index 000000000..494649482
--- /dev/null
+++ b/costa/native/external/lapack/cunglq.f
@@ -0,0 +1,216 @@
+      SUBROUTINE CUNGLQ( M, N, K, A, LDA, TAU, WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, K, LDA, LWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CUNGLQ generates an M-by-N complex matrix Q with orthonormal rows,
+*  which is defined as the first M rows of a product of K elementary
+*  reflectors of order N
+*
+*        Q  =  H(k)' . . . H(2)' H(1)'
+*
+*  as returned by CGELQF.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix Q. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix Q. N >= M.
+*
+*  K       (input) INTEGER
+*          The number of elementary reflectors whose product defines the
+*          matrix Q. M >= K >= 0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,N)
+*          On entry, the i-th row must contain the vector which defines
+*          the elementary reflector H(i), for i = 1,2,...,k, as returned
+*          by CGELQF in the first k rows of its array argument A.
+*          On exit, the M-by-N matrix Q.
+*
+*  LDA     (input) INTEGER
+*          The first dimension of the array A. LDA >= max(1,M).
+*
+*  TAU     (input) COMPLEX array, dimension (K)
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by CGELQF.
+*
+*  WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK. LWORK >= max(1,M).
+*          For optimum performance LWORK >= M*NB, where NB is
+*          the optimal blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit;
+*          < 0:  if INFO = -i, the i-th argument has an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX            ZERO
+      PARAMETER          ( ZERO = ( 0.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            I, IB, IINFO, IWS, J, KI, KK, L, LDWORK,
+     $                   LWKOPT, NB, NBMIN, NX
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CLARFB, CLARFT, CUNGL2, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      NB = ILAENV( 1, 'CUNGLQ', ' ', M, N, K, -1 )
+      LWKOPT = MAX( 1, M )*NB
+      WORK( 1 ) = LWKOPT
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.M ) THEN
+         INFO = -2
+      ELSE IF( K.LT.0 .OR. K.GT.M ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -5
+      ELSE IF( LWORK.LT.MAX( 1, M ) .AND. .NOT.LQUERY ) THEN
+         INFO = -8
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CUNGLQ', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.LE.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      NBMIN = 2
+      NX = 0
+      IWS = M
+      IF( NB.GT.1 .AND. NB.LT.K ) THEN
+*
+*        Determine when to cross over from blocked to unblocked code.
+*
+         NX = MAX( 0, ILAENV( 3, 'CUNGLQ', ' ', M, N, K, -1 ) )
+         IF( NX.LT.K ) THEN
+*
+*           Determine if workspace is large enough for blocked code.
+*
+            LDWORK = M
+            IWS = LDWORK*NB
+            IF( LWORK.LT.IWS ) THEN
+*
+*              Not enough workspace to use optimal NB:  reduce NB and
+*              determine the minimum value of NB.
+*
+               NB = LWORK / LDWORK
+               NBMIN = MAX( 2, ILAENV( 2, 'CUNGLQ', ' ', M, N, K, -1 ) )
+            END IF
+         END IF
+      END IF
+*
+      IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN
+*
+*        Use blocked code after the last block.
+*        The first kk rows are handled by the block method.
+*
+         KI = ( ( K-NX-1 ) / NB )*NB
+         KK = MIN( K, KI+NB )
+*
+*        Set A(kk+1:m,1:kk) to zero.
+*
+         DO 20 J = 1, KK
+            DO 10 I = KK + 1, M
+               A( I, J ) = ZERO
+   10       CONTINUE
+   20    CONTINUE
+      ELSE
+         KK = 0
+      END IF
+*
+*     Use unblocked code for the last or only block.
+*
+      IF( KK.LT.M )
+     $   CALL CUNGL2( M-KK, N-KK, K-KK, A( KK+1, KK+1 ), LDA,
+     $                TAU( KK+1 ), WORK, IINFO )
+*
+      IF( KK.GT.0 ) THEN
+*
+*        Use blocked code
+*
+         DO 50 I = KI + 1, 1, -NB
+            IB = MIN( NB, K-I+1 )
+            IF( I+IB.LE.M ) THEN
+*
+*              Form the triangular factor of the block reflector
+*              H = H(i) H(i+1) . . . H(i+ib-1)
+*
+               CALL CLARFT( 'Forward', 'Rowwise', N-I+1, IB, A( I, I ),
+     $                      LDA, TAU( I ), WORK, LDWORK )
+*
+*              Apply H' to A(i+ib:m,i:n) from the right
+*
+               CALL CLARFB( 'Right', 'Conjugate transpose', 'Forward',
+     $                      'Rowwise', M-I-IB+1, N-I+1, IB, A( I, I ),
+     $                      LDA, WORK, LDWORK, A( I+IB, I ), LDA,
+     $                      WORK( IB+1 ), LDWORK )
+            END IF
+*
+*           Apply H' to columns i:n of current block
+*
+            CALL CUNGL2( IB, N-I+1, IB, A( I, I ), LDA, TAU( I ), WORK,
+     $                   IINFO )
+*
+*           Set columns 1:i-1 of current block to zero
+*
+            DO 40 J = 1, I - 1
+               DO 30 L = I, I + IB - 1
+                  A( L, J ) = ZERO
+   30          CONTINUE
+   40       CONTINUE
+   50    CONTINUE
+      END IF
+*
+      WORK( 1 ) = IWS
+      RETURN
+*
+*     End of CUNGLQ
+*
+      END
diff --git a/costa/native/external/lapack/cungql.f b/costa/native/external/lapack/cungql.f
new file mode 100644
index 000000000..1f5e4c954
--- /dev/null
+++ b/costa/native/external/lapack/cungql.f
@@ -0,0 +1,214 @@
+      SUBROUTINE CUNGQL( M, N, K, A, LDA, TAU, WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, K, LDA, LWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CUNGQL generates an M-by-N complex matrix Q with orthonormal columns,
+*  which is defined as the last N columns of a product of K elementary
+*  reflectors of order M
+*
+*        Q  =  H(k) . . . H(2) H(1)
+*
+*  as returned by CGEQLF.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix Q. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix Q. M >= N >= 0.
+*
+*  K       (input) INTEGER
+*          The number of elementary reflectors whose product defines the
+*          matrix Q. N >= K >= 0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,N)
+*          On entry, the (n-k+i)-th column must contain the vector which
+*          defines the elementary reflector H(i), for i = 1,2,...,k, as
+*          returned by CGEQLF in the last k columns of its array
+*          argument A.
+*          On exit, the M-by-N matrix Q.
+*
+*  LDA     (input) INTEGER
+*          The first dimension of the array A. LDA >= max(1,M).
+*
+*  TAU     (input) COMPLEX array, dimension (K)
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by CGEQLF.
+*
+*  WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK. LWORK >= max(1,N).
+*          For optimum performance LWORK >= N*NB, where NB is the
+*          optimal blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument has an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX            ZERO
+      PARAMETER          ( ZERO = ( 0.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            I, IB, IINFO, IWS, J, KK, L, LDWORK, LWKOPT,
+     $                   NB, NBMIN, NX
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CLARFB, CLARFT, CUNG2L, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      NB = ILAENV( 1, 'CUNGQL', ' ', M, N, K, -1 )
+      LWKOPT = MAX( 1, N )*NB
+      WORK( 1 ) = LWKOPT
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 .OR. N.GT.M ) THEN
+         INFO = -2
+      ELSE IF( K.LT.0 .OR. K.GT.N ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -5
+      ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN
+         INFO = -8
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CUNGQL', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.LE.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      NBMIN = 2
+      NX = 0
+      IWS = N
+      IF( NB.GT.1 .AND. NB.LT.K ) THEN
+*
+*        Determine when to cross over from blocked to unblocked code.
+*
+         NX = MAX( 0, ILAENV( 3, 'CUNGQL', ' ', M, N, K, -1 ) )
+         IF( NX.LT.K ) THEN
+*
+*           Determine if workspace is large enough for blocked code.
+*
+            LDWORK = N
+            IWS = LDWORK*NB
+            IF( LWORK.LT.IWS ) THEN
+*
+*              Not enough workspace to use optimal NB:  reduce NB and
+*              determine the minimum value of NB.
+*
+               NB = LWORK / LDWORK
+               NBMIN = MAX( 2, ILAENV( 2, 'CUNGQL', ' ', M, N, K, -1 ) )
+            END IF
+         END IF
+      END IF
+*
+      IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN
+*
+*        Use blocked code after the first block.
+*        The last kk columns are handled by the block method.
+*
+         KK = MIN( K, ( ( K-NX+NB-1 ) / NB )*NB )
+*
+*        Set A(m-kk+1:m,1:n-kk) to zero.
+*
+         DO 20 J = 1, N - KK
+            DO 10 I = M - KK + 1, M
+               A( I, J ) = ZERO
+   10       CONTINUE
+   20    CONTINUE
+      ELSE
+         KK = 0
+      END IF
+*
+*     Use unblocked code for the first or only block.
+*
+      CALL CUNG2L( M-KK, N-KK, K-KK, A, LDA, TAU, WORK, IINFO )
+*
+      IF( KK.GT.0 ) THEN
+*
+*        Use blocked code
+*
+         DO 50 I = K - KK + 1, K, NB
+            IB = MIN( NB, K-I+1 )
+            IF( N-K+I.GT.1 ) THEN
+*
+*              Form the triangular factor of the block reflector
+*              H = H(i+ib-1) . . . H(i+1) H(i)
+*
+               CALL CLARFT( 'Backward', 'Columnwise', M-K+I+IB-1, IB,
+     $                      A( 1, N-K+I ), LDA, TAU( I ), WORK, LDWORK )
+*
+*              Apply H to A(1:m-k+i+ib-1,1:n-k+i-1) from the left
+*
+               CALL CLARFB( 'Left', 'No transpose', 'Backward',
+     $                      'Columnwise', M-K+I+IB-1, N-K+I-1, IB,
+     $                      A( 1, N-K+I ), LDA, WORK, LDWORK, A, LDA,
+     $                      WORK( IB+1 ), LDWORK )
+            END IF
+*
+*           Apply H to rows 1:m-k+i+ib-1 of current block
+*
+            CALL CUNG2L( M-K+I+IB-1, IB, IB, A( 1, N-K+I ), LDA,
+     $                   TAU( I ), WORK, IINFO )
+*
+*           Set rows m-k+i+ib:m of current block to zero
+*
+            DO 40 J = N - K + I, N - K + I + IB - 1
+               DO 30 L = M - K + I + IB, M
+                  A( L, J ) = ZERO
+   30          CONTINUE
+   40       CONTINUE
+   50    CONTINUE
+      END IF
+*
+      WORK( 1 ) = IWS
+      RETURN
+*
+*     End of CUNGQL
+*
+      END
diff --git a/costa/native/external/lapack/cungqr.f b/costa/native/external/lapack/cungqr.f
new file mode 100644
index 000000000..4dc4035ca
--- /dev/null
+++ b/costa/native/external/lapack/cungqr.f
@@ -0,0 +1,217 @@
+      SUBROUTINE CUNGQR( M, N, K, A, LDA, TAU, WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, K, LDA, LWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CUNGQR generates an M-by-N complex matrix Q with orthonormal columns,
+*  which is defined as the first N columns of a product of K elementary
+*  reflectors of order M
+*
+*        Q  =  H(1) H(2) . . . H(k)
+*
+*  as returned by CGEQRF.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix Q. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix Q. M >= N >= 0.
+*
+*  K       (input) INTEGER
+*          The number of elementary reflectors whose product defines the
+*          matrix Q. N >= K >= 0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,N)
+*          On entry, the i-th column must contain the vector which
+*          defines the elementary reflector H(i), for i = 1,2,...,k, as
+*          returned by CGEQRF in the first k columns of its array
+*          argument A.
+*          On exit, the M-by-N matrix Q.
+*
+*  LDA     (input) INTEGER
+*          The first dimension of the array A. LDA >= max(1,M).
+*
+*  TAU     (input) COMPLEX array, dimension (K)
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by CGEQRF.
+*
+*  WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK. LWORK >= max(1,N).
+*          For optimum performance LWORK >= N*NB, where NB is the
+*          optimal blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument has an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX            ZERO
+      PARAMETER          ( ZERO = ( 0.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            I, IB, IINFO, IWS, J, KI, KK, L, LDWORK,
+     $                   LWKOPT, NB, NBMIN, NX
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CLARFB, CLARFT, CUNG2R, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      NB = ILAENV( 1, 'CUNGQR', ' ', M, N, K, -1 )
+      LWKOPT = MAX( 1, N )*NB
+      WORK( 1 ) = LWKOPT
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 .OR. N.GT.M ) THEN
+         INFO = -2
+      ELSE IF( K.LT.0 .OR. K.GT.N ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -5
+      ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN
+         INFO = -8
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CUNGQR', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.LE.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      NBMIN = 2
+      NX = 0
+      IWS = N
+      IF( NB.GT.1 .AND. NB.LT.K ) THEN
+*
+*        Determine when to cross over from blocked to unblocked code.
+*
+         NX = MAX( 0, ILAENV( 3, 'CUNGQR', ' ', M, N, K, -1 ) )
+         IF( NX.LT.K ) THEN
+*
+*           Determine if workspace is large enough for blocked code.
+*
+            LDWORK = N
+            IWS = LDWORK*NB
+            IF( LWORK.LT.IWS ) THEN
+*
+*              Not enough workspace to use optimal NB:  reduce NB and
+*              determine the minimum value of NB.
+*
+               NB = LWORK / LDWORK
+               NBMIN = MAX( 2, ILAENV( 2, 'CUNGQR', ' ', M, N, K, -1 ) )
+            END IF
+         END IF
+      END IF
+*
+      IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN
+*
+*        Use blocked code after the last block.
+*        The first kk columns are handled by the block method.
+*
+         KI = ( ( K-NX-1 ) / NB )*NB
+         KK = MIN( K, KI+NB )
+*
+*        Set A(1:kk,kk+1:n) to zero.
+*
+         DO 20 J = KK + 1, N
+            DO 10 I = 1, KK
+               A( I, J ) = ZERO
+   10       CONTINUE
+   20    CONTINUE
+      ELSE
+         KK = 0
+      END IF
+*
+*     Use unblocked code for the last or only block.
+*
+      IF( KK.LT.N )
+     $   CALL CUNG2R( M-KK, N-KK, K-KK, A( KK+1, KK+1 ), LDA,
+     $                TAU( KK+1 ), WORK, IINFO )
+*
+      IF( KK.GT.0 ) THEN
+*
+*        Use blocked code
+*
+         DO 50 I = KI + 1, 1, -NB
+            IB = MIN( NB, K-I+1 )
+            IF( I+IB.LE.N ) THEN
+*
+*              Form the triangular factor of the block reflector
+*              H = H(i) H(i+1) . . . H(i+ib-1)
+*
+               CALL CLARFT( 'Forward', 'Columnwise', M-I+1, IB,
+     $                      A( I, I ), LDA, TAU( I ), WORK, LDWORK )
+*
+*              Apply H to A(i:m,i+ib:n) from the left
+*
+               CALL CLARFB( 'Left', 'No transpose', 'Forward',
+     $                      'Columnwise', M-I+1, N-I-IB+1, IB,
+     $                      A( I, I ), LDA, WORK, LDWORK, A( I, I+IB ),
+     $                      LDA, WORK( IB+1 ), LDWORK )
+            END IF
+*
+*           Apply H to rows i:m of current block
+*
+            CALL CUNG2R( M-I+1, IB, IB, A( I, I ), LDA, TAU( I ), WORK,
+     $                   IINFO )
+*
+*           Set rows 1:i-1 of current block to zero
+*
+            DO 40 J = I, I + IB - 1
+               DO 30 L = 1, I - 1
+                  A( L, J ) = ZERO
+   30          CONTINUE
+   40       CONTINUE
+   50    CONTINUE
+      END IF
+*
+      WORK( 1 ) = IWS
+      RETURN
+*
+*     End of CUNGQR
+*
+      END
diff --git a/costa/native/external/lapack/cungr2.f b/costa/native/external/lapack/cungr2.f
new file mode 100644
index 000000000..8cf1cd309
--- /dev/null
+++ b/costa/native/external/lapack/cungr2.f
@@ -0,0 +1,135 @@
+      SUBROUTINE CUNGR2( M, N, K, A, LDA, TAU, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, K, LDA, M, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CUNGR2 generates an m by n complex matrix Q with orthonormal rows,
+*  which is defined as the last m rows of a product of k elementary
+*  reflectors of order n
+*
+*        Q  =  H(1)' H(2)' . . . H(k)'
+*
+*  as returned by CGERQF.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix Q. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix Q. N >= M.
+*
+*  K       (input) INTEGER
+*          The number of elementary reflectors whose product defines the
+*          matrix Q. M >= K >= 0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,N)
+*          On entry, the (m-k+i)-th row must contain the vector which
+*          defines the elementary reflector H(i), for i = 1,2,...,k, as
+*          returned by CGERQF in the last k rows of its array argument
+*          A.
+*          On exit, the m-by-n matrix Q.
+*
+*  LDA     (input) INTEGER
+*          The first dimension of the array A. LDA >= max(1,M).
+*
+*  TAU     (input) COMPLEX array, dimension (K)
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by CGERQF.
+*
+*  WORK    (workspace) COMPLEX array, dimension (M)
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument has an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX            ONE, ZERO
+      PARAMETER          ( ONE = ( 1.0E+0, 0.0E+0 ),
+     $                   ZERO = ( 0.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, II, J, L
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CLACGV, CLARF, CSCAL, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          CONJG, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.M ) THEN
+         INFO = -2
+      ELSE IF( K.LT.0 .OR. K.GT.M ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -5
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CUNGR2', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.LE.0 )
+     $   RETURN
+*
+      IF( K.LT.M ) THEN
+*
+*        Initialise rows 1:m-k to rows of the unit matrix
+*
+         DO 20 J = 1, N
+            DO 10 L = 1, M - K
+               A( L, J ) = ZERO
+   10       CONTINUE
+            IF( J.GT.N-M .AND. J.LE.N-K )
+     $         A( M-N+J, J ) = ONE
+   20    CONTINUE
+      END IF
+*
+      DO 40 I = 1, K
+         II = M - K + I
+*
+*        Apply H(i)' to A(1:m-k+i,1:n-k+i) from the right
+*
+         CALL CLACGV( N-M+II-1, A( II, 1 ), LDA )
+         A( II, N-M+II ) = ONE
+         CALL CLARF( 'Right', II-1, N-M+II, A( II, 1 ), LDA,
+     $               CONJG( TAU( I ) ), A, LDA, WORK )
+         CALL CSCAL( N-M+II-1, -TAU( I ), A( II, 1 ), LDA )
+         CALL CLACGV( N-M+II-1, A( II, 1 ), LDA )
+         A( II, N-M+II ) = ONE - CONJG( TAU( I ) )
+*
+*        Set A(m-k+i,n-k+i+1:n) to zero
+*
+         DO 30 L = N - M + II + 1, N
+            A( II, L ) = ZERO
+   30    CONTINUE
+   40 CONTINUE
+      RETURN
+*
+*     End of CUNGR2
+*
+      END
diff --git a/costa/native/external/lapack/cungrq.f b/costa/native/external/lapack/cungrq.f
new file mode 100644
index 000000000..a9e07e114
--- /dev/null
+++ b/costa/native/external/lapack/cungrq.f
@@ -0,0 +1,215 @@
+      SUBROUTINE CUNGRQ( M, N, K, A, LDA, TAU, WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, K, LDA, LWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CUNGRQ generates an M-by-N complex matrix Q with orthonormal rows,
+*  which is defined as the last M rows of a product of K elementary
+*  reflectors of order N
+*
+*        Q  =  H(1)' H(2)' . . . H(k)'
+*
+*  as returned by CGERQF.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix Q. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix Q. N >= M.
+*
+*  K       (input) INTEGER
+*          The number of elementary reflectors whose product defines the
+*          matrix Q. M >= K >= 0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,N)
+*          On entry, the (m-k+i)-th row must contain the vector which
+*          defines the elementary reflector H(i), for i = 1,2,...,k, as
+*          returned by CGERQF in the last k rows of its array argument
+*          A.
+*          On exit, the M-by-N matrix Q.
+*
+*  LDA     (input) INTEGER
+*          The first dimension of the array A. LDA >= max(1,M).
+*
+*  TAU     (input) COMPLEX array, dimension (K)
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by CGERQF.
+*
+*  WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK. LWORK >= max(1,M).
+*          For optimum performance LWORK >= M*NB, where NB is the
+*          optimal blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument has an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX            ZERO
+      PARAMETER          ( ZERO = ( 0.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            I, IB, II, IINFO, IWS, J, KK, L, LDWORK,
+     $                   LWKOPT, NB, NBMIN, NX
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CLARFB, CLARFT, CUNGR2, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      NB = ILAENV( 1, 'CUNGRQ', ' ', M, N, K, -1 )
+      LWKOPT = MAX( 1, M )*NB
+      WORK( 1 ) = LWKOPT
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.M ) THEN
+         INFO = -2
+      ELSE IF( K.LT.0 .OR. K.GT.M ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -5
+      ELSE IF( LWORK.LT.MAX( 1, M ) .AND. .NOT.LQUERY ) THEN
+         INFO = -8
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CUNGRQ', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.LE.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      NBMIN = 2
+      NX = 0
+      IWS = M
+      IF( NB.GT.1 .AND. NB.LT.K ) THEN
+*
+*        Determine when to cross over from blocked to unblocked code.
+*
+         NX = MAX( 0, ILAENV( 3, 'CUNGRQ', ' ', M, N, K, -1 ) )
+         IF( NX.LT.K ) THEN
+*
+*           Determine if workspace is large enough for blocked code.
+*
+            LDWORK = M
+            IWS = LDWORK*NB
+            IF( LWORK.LT.IWS ) THEN
+*
+*              Not enough workspace to use optimal NB:  reduce NB and
+*              determine the minimum value of NB.
+*
+               NB = LWORK / LDWORK
+               NBMIN = MAX( 2, ILAENV( 2, 'CUNGRQ', ' ', M, N, K, -1 ) )
+            END IF
+         END IF
+      END IF
+*
+      IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN
+*
+*        Use blocked code after the first block.
+*        The last kk rows are handled by the block method.
+*
+         KK = MIN( K, ( ( K-NX+NB-1 ) / NB )*NB )
+*
+*        Set A(1:m-kk,n-kk+1:n) to zero.
+*
+         DO 20 J = N - KK + 1, N
+            DO 10 I = 1, M - KK
+               A( I, J ) = ZERO
+   10       CONTINUE
+   20    CONTINUE
+      ELSE
+         KK = 0
+      END IF
+*
+*     Use unblocked code for the first or only block.
+*
+      CALL CUNGR2( M-KK, N-KK, K-KK, A, LDA, TAU, WORK, IINFO )
+*
+      IF( KK.GT.0 ) THEN
+*
+*        Use blocked code
+*
+         DO 50 I = K - KK + 1, K, NB
+            IB = MIN( NB, K-I+1 )
+            II = M - K + I
+            IF( II.GT.1 ) THEN
+*
+*              Form the triangular factor of the block reflector
+*              H = H(i+ib-1) . . . H(i+1) H(i)
+*
+               CALL CLARFT( 'Backward', 'Rowwise', N-K+I+IB-1, IB,
+     $                      A( II, 1 ), LDA, TAU( I ), WORK, LDWORK )
+*
+*              Apply H' to A(1:m-k+i-1,1:n-k+i+ib-1) from the right
+*
+               CALL CLARFB( 'Right', 'Conjugate transpose', 'Backward',
+     $                      'Rowwise', II-1, N-K+I+IB-1, IB, A( II, 1 ),
+     $                      LDA, WORK, LDWORK, A, LDA, WORK( IB+1 ),
+     $                      LDWORK )
+            END IF
+*
+*           Apply H' to columns 1:n-k+i+ib-1 of current block
+*
+            CALL CUNGR2( IB, N-K+I+IB-1, IB, A( II, 1 ), LDA, TAU( I ),
+     $                   WORK, IINFO )
+*
+*           Set columns n-k+i+ib:n of current block to zero
+*
+            DO 40 L = N - K + I + IB, N
+               DO 30 J = II, II + IB - 1
+                  A( J, L ) = ZERO
+   30          CONTINUE
+   40       CONTINUE
+   50    CONTINUE
+      END IF
+*
+      WORK( 1 ) = IWS
+      RETURN
+*
+*     End of CUNGRQ
+*
+      END
diff --git a/costa/native/external/lapack/cungtr.f b/costa/native/external/lapack/cungtr.f
new file mode 100644
index 000000000..a2ea7a680
--- /dev/null
+++ b/costa/native/external/lapack/cungtr.f
@@ -0,0 +1,185 @@
+      SUBROUTINE CUNGTR( UPLO, N, A, LDA, TAU, WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, LWORK, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CUNGTR generates a complex unitary matrix Q which is defined as the
+*  product of n-1 elementary reflectors of order N, as returned by
+*  CHETRD:
+*
+*  if UPLO = 'U', Q = H(n-1) . . . H(2) H(1),
+*
+*  if UPLO = 'L', Q = H(1) H(2) . . . H(n-1).
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U': Upper triangle of A contains elementary reflectors
+*                 from CHETRD;
+*          = 'L': Lower triangle of A contains elementary reflectors
+*                 from CHETRD.
+*
+*  N       (input) INTEGER
+*          The order of the matrix Q. N >= 0.
+*
+*  A       (input/output) COMPLEX array, dimension (LDA,N)
+*          On entry, the vectors which define the elementary reflectors,
+*          as returned by CHETRD.
+*          On exit, the N-by-N unitary matrix Q.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= N.
+*
+*  TAU     (input) COMPLEX array, dimension (N-1)
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by CHETRD.
+*
+*  WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK. LWORK >= N-1.
+*          For optimum performance LWORK >= (N-1)*NB, where NB is
+*          the optimal blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX            ZERO, ONE
+      PARAMETER          ( ZERO = ( 0.0E+0, 0.0E+0 ),
+     $                   ONE = ( 1.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY, UPPER
+      INTEGER            I, IINFO, J, LWKOPT, NB
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV, LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CUNGQL, CUNGQR, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      LQUERY = ( LWORK.EQ.-1 )
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -4
+      ELSE IF( LWORK.LT.MAX( 1, N-1 ) .AND. .NOT.LQUERY ) THEN
+         INFO = -7
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         IF ( UPPER ) THEN
+           NB = ILAENV( 1, 'CUNGQL', ' ', N-1, N-1, N-1, -1 )
+         ELSE
+           NB = ILAENV( 1, 'CUNGQR', ' ', N-1, N-1, N-1, -1 )
+         END IF
+         LWKOPT = MAX( 1, N-1 )*NB
+         WORK( 1 ) = LWKOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CUNGTR', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      IF( UPPER ) THEN
+*
+*        Q was determined by a call to CHETRD with UPLO = 'U'
+*
+*        Shift the vectors which define the elementary reflectors one
+*        column to the left, and set the last row and column of Q to
+*        those of the unit matrix
+*
+         DO 20 J = 1, N - 1
+            DO 10 I = 1, J - 1
+               A( I, J ) = A( I, J+1 )
+   10       CONTINUE
+            A( N, J ) = ZERO
+   20    CONTINUE
+         DO 30 I = 1, N - 1
+            A( I, N ) = ZERO
+   30    CONTINUE
+         A( N, N ) = ONE
+*
+*        Generate Q(1:n-1,1:n-1)
+*
+         CALL CUNGQL( N-1, N-1, N-1, A, LDA, TAU, WORK, LWORK, IINFO )
+*
+      ELSE
+*
+*        Q was determined by a call to CHETRD with UPLO = 'L'.
+*
+*        Shift the vectors which define the elementary reflectors one
+*        column to the right, and set the first row and column of Q to
+*        those of the unit matrix
+*
+         DO 50 J = N, 2, -1
+            A( 1, J ) = ZERO
+            DO 40 I = J + 1, N
+               A( I, J ) = A( I, J-1 )
+   40       CONTINUE
+   50    CONTINUE
+         A( 1, 1 ) = ONE
+         DO 60 I = 2, N
+            A( I, 1 ) = ZERO
+   60    CONTINUE
+         IF( N.GT.1 ) THEN
+*
+*           Generate Q(2:n,2:n)
+*
+            CALL CUNGQR( N-1, N-1, N-1, A( 2, 2 ), LDA, TAU, WORK,
+     $                   LWORK, IINFO )
+         END IF
+      END IF
+      WORK( 1 ) = LWKOPT
+      RETURN
+*
+*     End of CUNGTR
+*
+      END
diff --git a/costa/native/external/lapack/cunm2l.f b/costa/native/external/lapack/cunm2l.f
new file mode 100644
index 000000000..dbf85c57e
--- /dev/null
+++ b/costa/native/external/lapack/cunm2l.f
@@ -0,0 +1,197 @@
+      SUBROUTINE CUNM2L( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC,
+     $                   WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          SIDE, TRANS
+      INTEGER            INFO, K, LDA, LDC, M, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CUNM2L overwrites the general complex m-by-n matrix C with
+*
+*        Q * C  if SIDE = 'L' and TRANS = 'N', or
+*
+*        Q'* C  if SIDE = 'L' and TRANS = 'C', or
+*
+*        C * Q  if SIDE = 'R' and TRANS = 'N', or
+*
+*        C * Q' if SIDE = 'R' and TRANS = 'C',
+*
+*  where Q is a complex unitary matrix defined as the product of k
+*  elementary reflectors
+*
+*        Q = H(k) . . . H(2) H(1)
+*
+*  as returned by CGEQLF. Q is of order m if SIDE = 'L' and of order n
+*  if SIDE = 'R'.
+*
+*  Arguments
+*  =========
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'L': apply Q or Q' from the Left
+*          = 'R': apply Q or Q' from the Right
+*
+*  TRANS   (input) CHARACTER*1
+*          = 'N': apply Q  (No transpose)
+*          = 'C': apply Q' (Conjugate transpose)
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix C. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix C. N >= 0.
+*
+*  K       (input) INTEGER
+*          The number of elementary reflectors whose product defines
+*          the matrix Q.
+*          If SIDE = 'L', M >= K >= 0;
+*          if SIDE = 'R', N >= K >= 0.
+*
+*  A       (input) COMPLEX array, dimension (LDA,K)
+*          The i-th column must contain the vector which defines the
+*          elementary reflector H(i), for i = 1,2,...,k, as returned by
+*          CGEQLF in the last k columns of its array argument A.
+*          A is modified by the routine but restored on exit.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.
+*          If SIDE = 'L', LDA >= max(1,M);
+*          if SIDE = 'R', LDA >= max(1,N).
+*
+*  TAU     (input) COMPLEX array, dimension (K)
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by CGEQLF.
+*
+*  C       (input/output) COMPLEX array, dimension (LDC,N)
+*          On entry, the m-by-n matrix C.
+*          On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C. LDC >= max(1,M).
+*
+*  WORK    (workspace) COMPLEX array, dimension
+*                                   (N) if SIDE = 'L',
+*                                   (M) if SIDE = 'R'
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX            ONE
+      PARAMETER          ( ONE = ( 1.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LEFT, NOTRAN
+      INTEGER            I, I1, I2, I3, MI, NI, NQ
+      COMPLEX            AII, TAUI
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CLARF, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          CONJG, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      LEFT = LSAME( SIDE, 'L' )
+      NOTRAN = LSAME( TRANS, 'N' )
+*
+*     NQ is the order of Q
+*
+      IF( LEFT ) THEN
+         NQ = M
+      ELSE
+         NQ = N
+      END IF
+      IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN
+         INFO = -2
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN
+         INFO = -5
+      ELSE IF( LDA.LT.MAX( 1, NQ ) ) THEN
+         INFO = -7
+      ELSE IF( LDC.LT.MAX( 1, M ) ) THEN
+         INFO = -10
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CUNM2L', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 )
+     $   RETURN
+*
+      IF( ( LEFT .AND. NOTRAN .OR. .NOT.LEFT .AND. .NOT.NOTRAN ) ) THEN
+         I1 = 1
+         I2 = K
+         I3 = 1
+      ELSE
+         I1 = K
+         I2 = 1
+         I3 = -1
+      END IF
+*
+      IF( LEFT ) THEN
+         NI = N
+      ELSE
+         MI = M
+      END IF
+*
+      DO 10 I = I1, I2, I3
+         IF( LEFT ) THEN
+*
+*           H(i) or H(i)' is applied to C(1:m-k+i,1:n)
+*
+            MI = M - K + I
+         ELSE
+*
+*           H(i) or H(i)' is applied to C(1:m,1:n-k+i)
+*
+            NI = N - K + I
+         END IF
+*
+*        Apply H(i) or H(i)'
+*
+         IF( NOTRAN ) THEN
+            TAUI = TAU( I )
+         ELSE
+            TAUI = CONJG( TAU( I ) )
+         END IF
+         AII = A( NQ-K+I, I )
+         A( NQ-K+I, I ) = ONE
+         CALL CLARF( SIDE, MI, NI, A( 1, I ), 1, TAUI, C, LDC, WORK )
+         A( NQ-K+I, I ) = AII
+   10 CONTINUE
+      RETURN
+*
+*     End of CUNM2L
+*
+      END
diff --git a/costa/native/external/lapack/cunm2r.f b/costa/native/external/lapack/cunm2r.f
new file mode 100644
index 000000000..b05d13546
--- /dev/null
+++ b/costa/native/external/lapack/cunm2r.f
@@ -0,0 +1,202 @@
+      SUBROUTINE CUNM2R( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC,
+     $                   WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          SIDE, TRANS
+      INTEGER            INFO, K, LDA, LDC, M, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CUNM2R overwrites the general complex m-by-n matrix C with
+*
+*        Q * C  if SIDE = 'L' and TRANS = 'N', or
+*
+*        Q'* C  if SIDE = 'L' and TRANS = 'C', or
+*
+*        C * Q  if SIDE = 'R' and TRANS = 'N', or
+*
+*        C * Q' if SIDE = 'R' and TRANS = 'C',
+*
+*  where Q is a complex unitary matrix defined as the product of k
+*  elementary reflectors
+*
+*        Q = H(1) H(2) . . . H(k)
+*
+*  as returned by CGEQRF. Q is of order m if SIDE = 'L' and of order n
+*  if SIDE = 'R'.
+*
+*  Arguments
+*  =========
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'L': apply Q or Q' from the Left
+*          = 'R': apply Q or Q' from the Right
+*
+*  TRANS   (input) CHARACTER*1
+*          = 'N': apply Q  (No transpose)
+*          = 'C': apply Q' (Conjugate transpose)
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix C. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix C. N >= 0.
+*
+*  K       (input) INTEGER
+*          The number of elementary reflectors whose product defines
+*          the matrix Q.
+*          If SIDE = 'L', M >= K >= 0;
+*          if SIDE = 'R', N >= K >= 0.
+*
+*  A       (input) COMPLEX array, dimension (LDA,K)
+*          The i-th column must contain the vector which defines the
+*          elementary reflector H(i), for i = 1,2,...,k, as returned by
+*          CGEQRF in the first k columns of its array argument A.
+*          A is modified by the routine but restored on exit.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.
+*          If SIDE = 'L', LDA >= max(1,M);
+*          if SIDE = 'R', LDA >= max(1,N).
+*
+*  TAU     (input) COMPLEX array, dimension (K)
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by CGEQRF.
+*
+*  C       (input/output) COMPLEX array, dimension (LDC,N)
+*          On entry, the m-by-n matrix C.
+*          On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C. LDC >= max(1,M).
+*
+*  WORK    (workspace) COMPLEX array, dimension
+*                                   (N) if SIDE = 'L',
+*                                   (M) if SIDE = 'R'
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX            ONE
+      PARAMETER          ( ONE = ( 1.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LEFT, NOTRAN
+      INTEGER            I, I1, I2, I3, IC, JC, MI, NI, NQ
+      COMPLEX            AII, TAUI
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CLARF, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          CONJG, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      LEFT = LSAME( SIDE, 'L' )
+      NOTRAN = LSAME( TRANS, 'N' )
+*
+*     NQ is the order of Q
+*
+      IF( LEFT ) THEN
+         NQ = M
+      ELSE
+         NQ = N
+      END IF
+      IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN
+         INFO = -2
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN
+         INFO = -5
+      ELSE IF( LDA.LT.MAX( 1, NQ ) ) THEN
+         INFO = -7
+      ELSE IF( LDC.LT.MAX( 1, M ) ) THEN
+         INFO = -10
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CUNM2R', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 )
+     $   RETURN
+*
+      IF( ( LEFT .AND. .NOT.NOTRAN .OR. .NOT.LEFT .AND. NOTRAN ) ) THEN
+         I1 = 1
+         I2 = K
+         I3 = 1
+      ELSE
+         I1 = K
+         I2 = 1
+         I3 = -1
+      END IF
+*
+      IF( LEFT ) THEN
+         NI = N
+         JC = 1
+      ELSE
+         MI = M
+         IC = 1
+      END IF
+*
+      DO 10 I = I1, I2, I3
+         IF( LEFT ) THEN
+*
+*           H(i) or H(i)' is applied to C(i:m,1:n)
+*
+            MI = M - I + 1
+            IC = I
+         ELSE
+*
+*           H(i) or H(i)' is applied to C(1:m,i:n)
+*
+            NI = N - I + 1
+            JC = I
+         END IF
+*
+*        Apply H(i) or H(i)'
+*
+         IF( NOTRAN ) THEN
+            TAUI = TAU( I )
+         ELSE
+            TAUI = CONJG( TAU( I ) )
+         END IF
+         AII = A( I, I )
+         A( I, I ) = ONE
+         CALL CLARF( SIDE, MI, NI, A( I, I ), 1, TAUI, C( IC, JC ), LDC,
+     $               WORK )
+         A( I, I ) = AII
+   10 CONTINUE
+      RETURN
+*
+*     End of CUNM2R
+*
+      END
diff --git a/costa/native/external/lapack/cunmbr.f b/costa/native/external/lapack/cunmbr.f
new file mode 100644
index 000000000..2f93df663
--- /dev/null
+++ b/costa/native/external/lapack/cunmbr.f
@@ -0,0 +1,282 @@
+      SUBROUTINE CUNMBR( VECT, SIDE, TRANS, M, N, K, A, LDA, TAU, C,
+     $                   LDC, WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          SIDE, TRANS, VECT
+      INTEGER            INFO, K, LDA, LDC, LWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), C( LDC, * ), TAU( * ),
+     $                   WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  If VECT = 'Q', CUNMBR overwrites the general complex M-by-N matrix C
+*  with
+*                  SIDE = 'L'     SIDE = 'R'
+*  TRANS = 'N':      Q * C          C * Q
+*  TRANS = 'C':      Q**H * C       C * Q**H
+*
+*  If VECT = 'P', CUNMBR overwrites the general complex M-by-N matrix C
+*  with
+*                  SIDE = 'L'     SIDE = 'R'
+*  TRANS = 'N':      P * C          C * P
+*  TRANS = 'C':      P**H * C       C * P**H
+*
+*  Here Q and P**H are the unitary matrices determined by CGEBRD when
+*  reducing a complex matrix A to bidiagonal form: A = Q * B * P**H. Q
+*  and P**H are defined as products of elementary reflectors H(i) and
+*  G(i) respectively.
+*
+*  Let nq = m if SIDE = 'L' and nq = n if SIDE = 'R'. Thus nq is the
+*  order of the unitary matrix Q or P**H that is applied.
+*
+*  If VECT = 'Q', A is assumed to have been an NQ-by-K matrix:
+*  if nq >= k, Q = H(1) H(2) . . . H(k);
+*  if nq < k, Q = H(1) H(2) . . . H(nq-1).
+*
+*  If VECT = 'P', A is assumed to have been a K-by-NQ matrix:
+*  if k < nq, P = G(1) G(2) . . . G(k);
+*  if k >= nq, P = G(1) G(2) . . . G(nq-1).
+*
+*  Arguments
+*  =========
+*
+*  VECT    (input) CHARACTER*1
+*          = 'Q': apply Q or Q**H;
+*          = 'P': apply P or P**H.
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'L': apply Q, Q**H, P or P**H from the Left;
+*          = 'R': apply Q, Q**H, P or P**H from the Right.
+*
+*  TRANS   (input) CHARACTER*1
+*          = 'N':  No transpose, apply Q or P;
+*          = 'C':  Conjugate transpose, apply Q**H or P**H.
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix C. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix C. N >= 0.
+*
+*  K       (input) INTEGER
+*          If VECT = 'Q', the number of columns in the original
+*          matrix reduced by CGEBRD.
+*          If VECT = 'P', the number of rows in the original
+*          matrix reduced by CGEBRD.
+*          K >= 0.
+*
+*  A       (input) COMPLEX array, dimension
+*                                (LDA,min(nq,K)) if VECT = 'Q'
+*                                (LDA,nq)        if VECT = 'P'
+*          The vectors which define the elementary reflectors H(i) and
+*          G(i), whose products determine the matrices Q and P, as
+*          returned by CGEBRD.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.
+*          If VECT = 'Q', LDA >= max(1,nq);
+*          if VECT = 'P', LDA >= max(1,min(nq,K)).
+*
+*  TAU     (input) COMPLEX array, dimension (min(nq,K))
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i) or G(i) which determines Q or P, as returned
+*          by CGEBRD in the array argument TAUQ or TAUP.
+*
+*  C       (input/output) COMPLEX array, dimension (LDC,N)
+*          On entry, the M-by-N matrix C.
+*          On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q
+*          or P*C or P**H*C or C*P or C*P**H.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C. LDC >= max(1,M).
+*
+*  WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.
+*          If SIDE = 'L', LWORK >= max(1,N);
+*          if SIDE = 'R', LWORK >= max(1,M).
+*          For optimum performance LWORK >= N*NB if SIDE = 'L', and
+*          LWORK >= M*NB if SIDE = 'R', where NB is the optimal
+*          blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            APPLYQ, LEFT, LQUERY, NOTRAN
+      CHARACTER          TRANST
+      INTEGER            I1, I2, IINFO, LWKOPT, MI, NB, NI, NQ, NW
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV, LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CUNMLQ, CUNMQR, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      APPLYQ = LSAME( VECT, 'Q' )
+      LEFT = LSAME( SIDE, 'L' )
+      NOTRAN = LSAME( TRANS, 'N' )
+      LQUERY = ( LWORK.EQ.-1 )
+*
+*     NQ is the order of Q or P and NW is the minimum dimension of WORK
+*
+      IF( LEFT ) THEN
+         NQ = M
+         NW = N
+      ELSE
+         NQ = N
+         NW = M
+      END IF
+      IF( .NOT.APPLYQ .AND. .NOT.LSAME( VECT, 'P' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN
+         INFO = -3
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( K.LT.0 ) THEN
+         INFO = -6
+      ELSE IF( ( APPLYQ .AND. LDA.LT.MAX( 1, NQ ) ) .OR.
+     $         ( .NOT.APPLYQ .AND. LDA.LT.MAX( 1, MIN( NQ, K ) ) ) )
+     $          THEN
+         INFO = -8
+      ELSE IF( LDC.LT.MAX( 1, M ) ) THEN
+         INFO = -11
+      ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN
+         INFO = -13
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         IF( APPLYQ ) THEN
+            IF( LEFT ) THEN
+               NB = ILAENV( 1, 'CUNMQR', SIDE // TRANS, M-1, N, M-1,
+     $                      -1 )
+            ELSE
+               NB = ILAENV( 1, 'CUNMQR', SIDE // TRANS, M, N-1, N-1,
+     $                      -1 )
+            END IF
+         ELSE
+            IF( LEFT ) THEN
+               NB = ILAENV( 1, 'CUNMLQ', SIDE // TRANS, M-1, N, M-1,
+     $                      -1 )
+            ELSE
+               NB = ILAENV( 1, 'CUNMLQ', SIDE // TRANS, M, N-1, N-1,
+     $                      -1 )
+            END IF
+         END IF
+         LWKOPT = MAX( 1, NW )*NB
+         WORK( 1 ) = LWKOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CUNMBR', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+      END IF
+*
+*     Quick return if possible
+*
+      WORK( 1 ) = 1
+      IF( M.EQ.0 .OR. N.EQ.0 )
+     $   RETURN
+*
+      IF( APPLYQ ) THEN
+*
+*        Apply Q
+*
+         IF( NQ.GE.K ) THEN
+*
+*           Q was determined by a call to CGEBRD with nq >= k
+*
+            CALL CUNMQR( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC,
+     $                   WORK, LWORK, IINFO )
+         ELSE IF( NQ.GT.1 ) THEN
+*
+*           Q was determined by a call to CGEBRD with nq < k
+*
+            IF( LEFT ) THEN
+               MI = M - 1
+               NI = N
+               I1 = 2
+               I2 = 1
+            ELSE
+               MI = M
+               NI = N - 1
+               I1 = 1
+               I2 = 2
+            END IF
+            CALL CUNMQR( SIDE, TRANS, MI, NI, NQ-1, A( 2, 1 ), LDA, TAU,
+     $                   C( I1, I2 ), LDC, WORK, LWORK, IINFO )
+         END IF
+      ELSE
+*
+*        Apply P
+*
+         IF( NOTRAN ) THEN
+            TRANST = 'C'
+         ELSE
+            TRANST = 'N'
+         END IF
+         IF( NQ.GT.K ) THEN
+*
+*           P was determined by a call to CGEBRD with nq > k
+*
+            CALL CUNMLQ( SIDE, TRANST, M, N, K, A, LDA, TAU, C, LDC,
+     $                   WORK, LWORK, IINFO )
+         ELSE IF( NQ.GT.1 ) THEN
+*
+*           P was determined by a call to CGEBRD with nq <= k
+*
+            IF( LEFT ) THEN
+               MI = M - 1
+               NI = N
+               I1 = 2
+               I2 = 1
+            ELSE
+               MI = M
+               NI = N - 1
+               I1 = 1
+               I2 = 2
+            END IF
+            CALL CUNMLQ( SIDE, TRANST, MI, NI, NQ-1, A( 1, 2 ), LDA,
+     $                   TAU, C( I1, I2 ), LDC, WORK, LWORK, IINFO )
+         END IF
+      END IF
+      WORK( 1 ) = LWKOPT
+      RETURN
+*
+*     End of CUNMBR
+*
+      END
diff --git a/costa/native/external/lapack/cunmhr.f b/costa/native/external/lapack/cunmhr.f
new file mode 100644
index 000000000..8bbfb57ea
--- /dev/null
+++ b/costa/native/external/lapack/cunmhr.f
@@ -0,0 +1,203 @@
+      SUBROUTINE CUNMHR( SIDE, TRANS, M, N, ILO, IHI, A, LDA, TAU, C,
+     $                   LDC, WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          SIDE, TRANS
+      INTEGER            IHI, ILO, INFO, LDA, LDC, LWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), C( LDC, * ), TAU( * ),
+     $                   WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CUNMHR overwrites the general complex M-by-N matrix C with
+*
+*                  SIDE = 'L'     SIDE = 'R'
+*  TRANS = 'N':      Q * C          C * Q
+*  TRANS = 'C':      Q**H * C       C * Q**H
+*
+*  where Q is a complex unitary matrix of order nq, with nq = m if
+*  SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of
+*  IHI-ILO elementary reflectors, as returned by CGEHRD:
+*
+*  Q = H(ilo) H(ilo+1) . . . H(ihi-1).
+*
+*  Arguments
+*  =========
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'L': apply Q or Q**H from the Left;
+*          = 'R': apply Q or Q**H from the Right.
+*
+*  TRANS   (input) CHARACTER*1
+*          = 'N': apply Q  (No transpose)
+*          = 'C': apply Q**H (Conjugate transpose)
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix C. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix C. N >= 0.
+*
+*  ILO     (input) INTEGER
+*  IHI     (input) INTEGER
+*          ILO and IHI must have the same values as in the previous call
+*          of CGEHRD. Q is equal to the unit matrix except in the
+*          submatrix Q(ilo+1:ihi,ilo+1:ihi).
+*          If SIDE = 'L', then 1 <= ILO <= IHI <= M, if M > 0, and
+*          ILO = 1 and IHI = 0, if M = 0;
+*          if SIDE = 'R', then 1 <= ILO <= IHI <= N, if N > 0, and
+*          ILO = 1 and IHI = 0, if N = 0.
+*
+*  A       (input) COMPLEX array, dimension
+*                               (LDA,M) if SIDE = 'L'
+*                               (LDA,N) if SIDE = 'R'
+*          The vectors which define the elementary reflectors, as
+*          returned by CGEHRD.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.
+*          LDA >= max(1,M) if SIDE = 'L'; LDA >= max(1,N) if SIDE = 'R'.
+*
+*  TAU     (input) COMPLEX array, dimension
+*                               (M-1) if SIDE = 'L'
+*                               (N-1) if SIDE = 'R'
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by CGEHRD.
+*
+*  C       (input/output) COMPLEX array, dimension (LDC,N)
+*          On entry, the M-by-N matrix C.
+*          On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C. LDC >= max(1,M).
+*
+*  WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.
+*          If SIDE = 'L', LWORK >= max(1,N);
+*          if SIDE = 'R', LWORK >= max(1,M).
+*          For optimum performance LWORK >= N*NB if SIDE = 'L', and
+*          LWORK >= M*NB if SIDE = 'R', where NB is the optimal
+*          blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            LEFT, LQUERY
+      INTEGER            I1, I2, IINFO, LWKOPT, MI, NB, NH, NI, NQ, NW
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV, LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CUNMQR, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      NH = IHI - ILO
+      LEFT = LSAME( SIDE, 'L' )
+      LQUERY = ( LWORK.EQ.-1 )
+*
+*     NQ is the order of Q and NW is the minimum dimension of WORK
+*
+      IF( LEFT ) THEN
+         NQ = M
+         NW = N
+      ELSE
+         NQ = N
+         NW = M
+      END IF
+      IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.LSAME( TRANS, 'N' ) .AND. .NOT.LSAME( TRANS, 'C' ) )
+     $          THEN
+         INFO = -2
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, NQ ) ) THEN
+         INFO = -5
+      ELSE IF( IHI.LT.MIN( ILO, NQ ) .OR. IHI.GT.NQ ) THEN
+         INFO = -6
+      ELSE IF( LDA.LT.MAX( 1, NQ ) ) THEN
+         INFO = -8
+      ELSE IF( LDC.LT.MAX( 1, M ) ) THEN
+         INFO = -11
+      ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN
+         INFO = -13
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         IF( LEFT ) THEN
+            NB = ILAENV( 1, 'CUNMQR', SIDE // TRANS, NH, N, NH, -1 )
+         ELSE
+            NB = ILAENV( 1, 'CUNMQR', SIDE // TRANS, M, NH, NH, -1 )
+         END IF
+         LWKOPT = MAX( 1, NW )*NB
+         WORK( 1 ) = LWKOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CUNMHR', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 .OR. NH.EQ.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      IF( LEFT ) THEN
+         MI = NH
+         NI = N
+         I1 = ILO + 1
+         I2 = 1
+      ELSE
+         MI = M
+         NI = NH
+         I1 = 1
+         I2 = ILO + 1
+      END IF
+*
+      CALL CUNMQR( SIDE, TRANS, MI, NI, NH, A( ILO+1, ILO ), LDA,
+     $             TAU( ILO ), C( I1, I2 ), LDC, WORK, LWORK, IINFO )
+*
+      WORK( 1 ) = LWKOPT
+      RETURN
+*
+*     End of CUNMHR
+*
+      END
diff --git a/costa/native/external/lapack/cunml2.f b/costa/native/external/lapack/cunml2.f
new file mode 100644
index 000000000..7aca75b94
--- /dev/null
+++ b/costa/native/external/lapack/cunml2.f
@@ -0,0 +1,206 @@
+      SUBROUTINE CUNML2( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC,
+     $                   WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          SIDE, TRANS
+      INTEGER            INFO, K, LDA, LDC, M, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CUNML2 overwrites the general complex m-by-n matrix C with
+*
+*        Q * C  if SIDE = 'L' and TRANS = 'N', or
+*
+*        Q'* C  if SIDE = 'L' and TRANS = 'C', or
+*
+*        C * Q  if SIDE = 'R' and TRANS = 'N', or
+*
+*        C * Q' if SIDE = 'R' and TRANS = 'C',
+*
+*  where Q is a complex unitary matrix defined as the product of k
+*  elementary reflectors
+*
+*        Q = H(k)' . . . H(2)' H(1)'
+*
+*  as returned by CGELQF. Q is of order m if SIDE = 'L' and of order n
+*  if SIDE = 'R'.
+*
+*  Arguments
+*  =========
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'L': apply Q or Q' from the Left
+*          = 'R': apply Q or Q' from the Right
+*
+*  TRANS   (input) CHARACTER*1
+*          = 'N': apply Q  (No transpose)
+*          = 'C': apply Q' (Conjugate transpose)
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix C. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix C. N >= 0.
+*
+*  K       (input) INTEGER
+*          The number of elementary reflectors whose product defines
+*          the matrix Q.
+*          If SIDE = 'L', M >= K >= 0;
+*          if SIDE = 'R', N >= K >= 0.
+*
+*  A       (input) COMPLEX array, dimension
+*                               (LDA,M) if SIDE = 'L',
+*                               (LDA,N) if SIDE = 'R'
+*          The i-th row must contain the vector which defines the
+*          elementary reflector H(i), for i = 1,2,...,k, as returned by
+*          CGELQF in the first k rows of its array argument A.
+*          A is modified by the routine but restored on exit.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,K).
+*
+*  TAU     (input) COMPLEX array, dimension (K)
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by CGELQF.
+*
+*  C       (input/output) COMPLEX array, dimension (LDC,N)
+*          On entry, the m-by-n matrix C.
+*          On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C. LDC >= max(1,M).
+*
+*  WORK    (workspace) COMPLEX array, dimension
+*                                   (N) if SIDE = 'L',
+*                                   (M) if SIDE = 'R'
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX            ONE
+      PARAMETER          ( ONE = ( 1.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LEFT, NOTRAN
+      INTEGER            I, I1, I2, I3, IC, JC, MI, NI, NQ
+      COMPLEX            AII, TAUI
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CLACGV, CLARF, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          CONJG, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      LEFT = LSAME( SIDE, 'L' )
+      NOTRAN = LSAME( TRANS, 'N' )
+*
+*     NQ is the order of Q
+*
+      IF( LEFT ) THEN
+         NQ = M
+      ELSE
+         NQ = N
+      END IF
+      IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN
+         INFO = -2
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN
+         INFO = -5
+      ELSE IF( LDA.LT.MAX( 1, K ) ) THEN
+         INFO = -7
+      ELSE IF( LDC.LT.MAX( 1, M ) ) THEN
+         INFO = -10
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CUNML2', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 )
+     $   RETURN
+*
+      IF( ( LEFT .AND. NOTRAN .OR. .NOT.LEFT .AND. .NOT.NOTRAN ) ) THEN
+         I1 = 1
+         I2 = K
+         I3 = 1
+      ELSE
+         I1 = K
+         I2 = 1
+         I3 = -1
+      END IF
+*
+      IF( LEFT ) THEN
+         NI = N
+         JC = 1
+      ELSE
+         MI = M
+         IC = 1
+      END IF
+*
+      DO 10 I = I1, I2, I3
+         IF( LEFT ) THEN
+*
+*           H(i) or H(i)' is applied to C(i:m,1:n)
+*
+            MI = M - I + 1
+            IC = I
+         ELSE
+*
+*           H(i) or H(i)' is applied to C(1:m,i:n)
+*
+            NI = N - I + 1
+            JC = I
+         END IF
+*
+*        Apply H(i) or H(i)'
+*
+         IF( NOTRAN ) THEN
+            TAUI = CONJG( TAU( I ) )
+         ELSE
+            TAUI = TAU( I )
+         END IF
+         IF( I.LT.NQ )
+     $      CALL CLACGV( NQ-I, A( I, I+1 ), LDA )
+         AII = A( I, I )
+         A( I, I ) = ONE
+         CALL CLARF( SIDE, MI, NI, A( I, I ), LDA, TAUI, C( IC, JC ),
+     $               LDC, WORK )
+         A( I, I ) = AII
+         IF( I.LT.NQ )
+     $      CALL CLACGV( NQ-I, A( I, I+1 ), LDA )
+   10 CONTINUE
+      RETURN
+*
+*     End of CUNML2
+*
+      END
diff --git a/costa/native/external/lapack/cunmlq.f b/costa/native/external/lapack/cunmlq.f
new file mode 100644
index 000000000..1d420ea0a
--- /dev/null
+++ b/costa/native/external/lapack/cunmlq.f
@@ -0,0 +1,269 @@
+      SUBROUTINE CUNMLQ( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC,
+     $                   WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          SIDE, TRANS
+      INTEGER            INFO, K, LDA, LDC, LWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), C( LDC, * ), TAU( * ),
+     $                   WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CUNMLQ overwrites the general complex M-by-N matrix C with
+*
+*                  SIDE = 'L'     SIDE = 'R'
+*  TRANS = 'N':      Q * C          C * Q
+*  TRANS = 'C':      Q**H * C       C * Q**H
+*
+*  where Q is a complex unitary matrix defined as the product of k
+*  elementary reflectors
+*
+*        Q = H(k)' . . . H(2)' H(1)'
+*
+*  as returned by CGELQF. Q is of order M if SIDE = 'L' and of order N
+*  if SIDE = 'R'.
+*
+*  Arguments
+*  =========
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'L': apply Q or Q**H from the Left;
+*          = 'R': apply Q or Q**H from the Right.
+*
+*  TRANS   (input) CHARACTER*1
+*          = 'N':  No transpose, apply Q;
+*          = 'C':  Conjugate transpose, apply Q**H.
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix C. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix C. N >= 0.
+*
+*  K       (input) INTEGER
+*          The number of elementary reflectors whose product defines
+*          the matrix Q.
+*          If SIDE = 'L', M >= K >= 0;
+*          if SIDE = 'R', N >= K >= 0.
+*
+*  A       (input) COMPLEX array, dimension
+*                               (LDA,M) if SIDE = 'L',
+*                               (LDA,N) if SIDE = 'R'
+*          The i-th row must contain the vector which defines the
+*          elementary reflector H(i), for i = 1,2,...,k, as returned by
+*          CGELQF in the first k rows of its array argument A.
+*          A is modified by the routine but restored on exit.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,K).
+*
+*  TAU     (input) COMPLEX array, dimension (K)
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by CGELQF.
+*
+*  C       (input/output) COMPLEX array, dimension (LDC,N)
+*          On entry, the M-by-N matrix C.
+*          On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C. LDC >= max(1,M).
+*
+*  WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.
+*          If SIDE = 'L', LWORK >= max(1,N);
+*          if SIDE = 'R', LWORK >= max(1,M).
+*          For optimum performance LWORK >= N*NB if SIDE 'L', and
+*          LWORK >= M*NB if SIDE = 'R', where NB is the optimal
+*          blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            NBMAX, LDT
+      PARAMETER          ( NBMAX = 64, LDT = NBMAX+1 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LEFT, LQUERY, NOTRAN
+      CHARACTER          TRANST
+      INTEGER            I, I1, I2, I3, IB, IC, IINFO, IWS, JC, LDWORK,
+     $                   LWKOPT, MI, NB, NBMIN, NI, NQ, NW
+*     ..
+*     .. Local Arrays ..
+      COMPLEX            T( LDT, NBMAX )
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           LSAME, ILAENV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CLARFB, CLARFT, CUNML2, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      LEFT = LSAME( SIDE, 'L' )
+      NOTRAN = LSAME( TRANS, 'N' )
+      LQUERY = ( LWORK.EQ.-1 )
+*
+*     NQ is the order of Q and NW is the minimum dimension of WORK
+*
+      IF( LEFT ) THEN
+         NQ = M
+         NW = N
+      ELSE
+         NQ = N
+         NW = M
+      END IF
+      IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN
+         INFO = -2
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN
+         INFO = -5
+      ELSE IF( LDA.LT.MAX( 1, K ) ) THEN
+         INFO = -7
+      ELSE IF( LDC.LT.MAX( 1, M ) ) THEN
+         INFO = -10
+      ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN
+         INFO = -12
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+*
+*        Determine the block size.  NB may be at most NBMAX, where NBMAX
+*        is used to define the local array T.
+*
+         NB = MIN( NBMAX, ILAENV( 1, 'CUNMLQ', SIDE // TRANS, M, N, K,
+     $             -1 ) )
+         LWKOPT = MAX( 1, NW )*NB
+         WORK( 1 ) = LWKOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CUNMLQ', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      NBMIN = 2
+      LDWORK = NW
+      IF( NB.GT.1 .AND. NB.LT.K ) THEN
+         IWS = NW*NB
+         IF( LWORK.LT.IWS ) THEN
+            NB = LWORK / LDWORK
+            NBMIN = MAX( 2, ILAENV( 2, 'CUNMLQ', SIDE // TRANS, M, N, K,
+     $              -1 ) )
+         END IF
+      ELSE
+         IWS = NW
+      END IF
+*
+      IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN
+*
+*        Use unblocked code
+*
+         CALL CUNML2( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, WORK,
+     $                IINFO )
+      ELSE
+*
+*        Use blocked code
+*
+         IF( ( LEFT .AND. NOTRAN ) .OR.
+     $       ( .NOT.LEFT .AND. .NOT.NOTRAN ) ) THEN
+            I1 = 1
+            I2 = K
+            I3 = NB
+         ELSE
+            I1 = ( ( K-1 ) / NB )*NB + 1
+            I2 = 1
+            I3 = -NB
+         END IF
+*
+         IF( LEFT ) THEN
+            NI = N
+            JC = 1
+         ELSE
+            MI = M
+            IC = 1
+         END IF
+*
+         IF( NOTRAN ) THEN
+            TRANST = 'C'
+         ELSE
+            TRANST = 'N'
+         END IF
+*
+         DO 10 I = I1, I2, I3
+            IB = MIN( NB, K-I+1 )
+*
+*           Form the triangular factor of the block reflector
+*           H = H(i) H(i+1) . . . H(i+ib-1)
+*
+            CALL CLARFT( 'Forward', 'Rowwise', NQ-I+1, IB, A( I, I ),
+     $                   LDA, TAU( I ), T, LDT )
+            IF( LEFT ) THEN
+*
+*              H or H' is applied to C(i:m,1:n)
+*
+               MI = M - I + 1
+               IC = I
+            ELSE
+*
+*              H or H' is applied to C(1:m,i:n)
+*
+               NI = N - I + 1
+               JC = I
+            END IF
+*
+*           Apply H or H'
+*
+            CALL CLARFB( SIDE, TRANST, 'Forward', 'Rowwise', MI, NI, IB,
+     $                   A( I, I ), LDA, T, LDT, C( IC, JC ), LDC, WORK,
+     $                   LDWORK )
+   10    CONTINUE
+      END IF
+      WORK( 1 ) = LWKOPT
+      RETURN
+*
+*     End of CUNMLQ
+*
+      END
diff --git a/costa/native/external/lapack/cunmql.f b/costa/native/external/lapack/cunmql.f
new file mode 100644
index 000000000..cbaca868f
--- /dev/null
+++ b/costa/native/external/lapack/cunmql.f
@@ -0,0 +1,258 @@
+      SUBROUTINE CUNMQL( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC,
+     $                   WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          SIDE, TRANS
+      INTEGER            INFO, K, LDA, LDC, LWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), C( LDC, * ), TAU( * ),
+     $                   WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CUNMQL overwrites the general complex M-by-N matrix C with
+*
+*                  SIDE = 'L'     SIDE = 'R'
+*  TRANS = 'N':      Q * C          C * Q
+*  TRANS = 'C':      Q**H * C       C * Q**H
+*
+*  where Q is a complex unitary matrix defined as the product of k
+*  elementary reflectors
+*
+*        Q = H(k) . . . H(2) H(1)
+*
+*  as returned by CGEQLF. Q is of order M if SIDE = 'L' and of order N
+*  if SIDE = 'R'.
+*
+*  Arguments
+*  =========
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'L': apply Q or Q**H from the Left;
+*          = 'R': apply Q or Q**H from the Right.
+*
+*  TRANS   (input) CHARACTER*1
+*          = 'N':  No transpose, apply Q;
+*          = 'C':  Transpose, apply Q**H.
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix C. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix C. N >= 0.
+*
+*  K       (input) INTEGER
+*          The number of elementary reflectors whose product defines
+*          the matrix Q.
+*          If SIDE = 'L', M >= K >= 0;
+*          if SIDE = 'R', N >= K >= 0.
+*
+*  A       (input) COMPLEX array, dimension (LDA,K)
+*          The i-th column must contain the vector which defines the
+*          elementary reflector H(i), for i = 1,2,...,k, as returned by
+*          CGEQLF in the last k columns of its array argument A.
+*          A is modified by the routine but restored on exit.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.
+*          If SIDE = 'L', LDA >= max(1,M);
+*          if SIDE = 'R', LDA >= max(1,N).
+*
+*  TAU     (input) COMPLEX array, dimension (K)
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by CGEQLF.
+*
+*  C       (input/output) COMPLEX array, dimension (LDC,N)
+*          On entry, the M-by-N matrix C.
+*          On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C. LDC >= max(1,M).
+*
+*  WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.
+*          If SIDE = 'L', LWORK >= max(1,N);
+*          if SIDE = 'R', LWORK >= max(1,M).
+*          For optimum performance LWORK >= N*NB if SIDE = 'L', and
+*          LWORK >= M*NB if SIDE = 'R', where NB is the optimal
+*          blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            NBMAX, LDT
+      PARAMETER          ( NBMAX = 64, LDT = NBMAX+1 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LEFT, LQUERY, NOTRAN
+      INTEGER            I, I1, I2, I3, IB, IINFO, IWS, LDWORK, LWKOPT,
+     $                   MI, NB, NBMIN, NI, NQ, NW
+*     ..
+*     .. Local Arrays ..
+      COMPLEX            T( LDT, NBMAX )
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           LSAME, ILAENV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CLARFB, CLARFT, CUNM2L, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      LEFT = LSAME( SIDE, 'L' )
+      NOTRAN = LSAME( TRANS, 'N' )
+      LQUERY = ( LWORK.EQ.-1 )
+*
+*     NQ is the order of Q and NW is the minimum dimension of WORK
+*
+      IF( LEFT ) THEN
+         NQ = M
+         NW = N
+      ELSE
+         NQ = N
+         NW = M
+      END IF
+      IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN
+         INFO = -2
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN
+         INFO = -5
+      ELSE IF( LDA.LT.MAX( 1, NQ ) ) THEN
+         INFO = -7
+      ELSE IF( LDC.LT.MAX( 1, M ) ) THEN
+         INFO = -10
+      ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN
+         INFO = -12
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+*
+*        Determine the block size.  NB may be at most NBMAX, where NBMAX
+*        is used to define the local array T.
+*
+         NB = MIN( NBMAX, ILAENV( 1, 'CUNMQL', SIDE // TRANS, M, N, K,
+     $             -1 ) )
+         LWKOPT = MAX( 1, NW )*NB
+         WORK( 1 ) = LWKOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CUNMQL', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      NBMIN = 2
+      LDWORK = NW
+      IF( NB.GT.1 .AND. NB.LT.K ) THEN
+         IWS = NW*NB
+         IF( LWORK.LT.IWS ) THEN
+            NB = LWORK / LDWORK
+            NBMIN = MAX( 2, ILAENV( 2, 'CUNMQL', SIDE // TRANS, M, N, K,
+     $              -1 ) )
+         END IF
+      ELSE
+         IWS = NW
+      END IF
+*
+      IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN
+*
+*        Use unblocked code
+*
+         CALL CUNM2L( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, WORK,
+     $                IINFO )
+      ELSE
+*
+*        Use blocked code
+*
+         IF( ( LEFT .AND. NOTRAN ) .OR.
+     $       ( .NOT.LEFT .AND. .NOT.NOTRAN ) ) THEN
+            I1 = 1
+            I2 = K
+            I3 = NB
+         ELSE
+            I1 = ( ( K-1 ) / NB )*NB + 1
+            I2 = 1
+            I3 = -NB
+         END IF
+*
+         IF( LEFT ) THEN
+            NI = N
+         ELSE
+            MI = M
+         END IF
+*
+         DO 10 I = I1, I2, I3
+            IB = MIN( NB, K-I+1 )
+*
+*           Form the triangular factor of the block reflector
+*           H = H(i+ib-1) . . . H(i+1) H(i)
+*
+            CALL CLARFT( 'Backward', 'Columnwise', NQ-K+I+IB-1, IB,
+     $                   A( 1, I ), LDA, TAU( I ), T, LDT )
+            IF( LEFT ) THEN
+*
+*              H or H' is applied to C(1:m-k+i+ib-1,1:n)
+*
+               MI = M - K + I + IB - 1
+            ELSE
+*
+*              H or H' is applied to C(1:m,1:n-k+i+ib-1)
+*
+               NI = N - K + I + IB - 1
+            END IF
+*
+*           Apply H or H'
+*
+            CALL CLARFB( SIDE, TRANS, 'Backward', 'Columnwise', MI, NI,
+     $                   IB, A( 1, I ), LDA, T, LDT, C, LDC, WORK,
+     $                   LDWORK )
+   10    CONTINUE
+      END IF
+      WORK( 1 ) = LWKOPT
+      RETURN
+*
+*     End of CUNMQL
+*
+      END
diff --git a/costa/native/external/lapack/cunmqr.f b/costa/native/external/lapack/cunmqr.f
new file mode 100644
index 000000000..0c2aa4563
--- /dev/null
+++ b/costa/native/external/lapack/cunmqr.f
@@ -0,0 +1,262 @@
+      SUBROUTINE CUNMQR( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC,
+     $                   WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          SIDE, TRANS
+      INTEGER            INFO, K, LDA, LDC, LWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), C( LDC, * ), TAU( * ),
+     $                   WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CUNMQR overwrites the general complex M-by-N matrix C with
+*
+*                  SIDE = 'L'     SIDE = 'R'
+*  TRANS = 'N':      Q * C          C * Q
+*  TRANS = 'C':      Q**H * C       C * Q**H
+*
+*  where Q is a complex unitary matrix defined as the product of k
+*  elementary reflectors
+*
+*        Q = H(1) H(2) . . . H(k)
+*
+*  as returned by CGEQRF. Q is of order M if SIDE = 'L' and of order N
+*  if SIDE = 'R'.
+*
+*  Arguments
+*  =========
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'L': apply Q or Q**H from the Left;
+*          = 'R': apply Q or Q**H from the Right.
+*
+*  TRANS   (input) CHARACTER*1
+*          = 'N':  No transpose, apply Q;
+*          = 'C':  Conjugate transpose, apply Q**H.
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix C. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix C. N >= 0.
+*
+*  K       (input) INTEGER
+*          The number of elementary reflectors whose product defines
+*          the matrix Q.
+*          If SIDE = 'L', M >= K >= 0;
+*          if SIDE = 'R', N >= K >= 0.
+*
+*  A       (input) COMPLEX array, dimension (LDA,K)
+*          The i-th column must contain the vector which defines the
+*          elementary reflector H(i), for i = 1,2,...,k, as returned by
+*          CGEQRF in the first k columns of its array argument A.
+*          A is modified by the routine but restored on exit.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.
+*          If SIDE = 'L', LDA >= max(1,M);
+*          if SIDE = 'R', LDA >= max(1,N).
+*
+*  TAU     (input) COMPLEX array, dimension (K)
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by CGEQRF.
+*
+*  C       (input/output) COMPLEX array, dimension (LDC,N)
+*          On entry, the M-by-N matrix C.
+*          On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C. LDC >= max(1,M).
+*
+*  WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.
+*          If SIDE = 'L', LWORK >= max(1,N);
+*          if SIDE = 'R', LWORK >= max(1,M).
+*          For optimum performance LWORK >= N*NB if SIDE = 'L', and
+*          LWORK >= M*NB if SIDE = 'R', where NB is the optimal
+*          blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            NBMAX, LDT
+      PARAMETER          ( NBMAX = 64, LDT = NBMAX+1 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LEFT, LQUERY, NOTRAN
+      INTEGER            I, I1, I2, I3, IB, IC, IINFO, IWS, JC, LDWORK,
+     $                   LWKOPT, MI, NB, NBMIN, NI, NQ, NW
+*     ..
+*     .. Local Arrays ..
+      COMPLEX            T( LDT, NBMAX )
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           LSAME, ILAENV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CLARFB, CLARFT, CUNM2R, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      LEFT = LSAME( SIDE, 'L' )
+      NOTRAN = LSAME( TRANS, 'N' )
+      LQUERY = ( LWORK.EQ.-1 )
+*
+*     NQ is the order of Q and NW is the minimum dimension of WORK
+*
+      IF( LEFT ) THEN
+         NQ = M
+         NW = N
+      ELSE
+         NQ = N
+         NW = M
+      END IF
+      IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN
+         INFO = -2
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN
+         INFO = -5
+      ELSE IF( LDA.LT.MAX( 1, NQ ) ) THEN
+         INFO = -7
+      ELSE IF( LDC.LT.MAX( 1, M ) ) THEN
+         INFO = -10
+      ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN
+         INFO = -12
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+*
+*        Determine the block size.  NB may be at most NBMAX, where NBMAX
+*        is used to define the local array T.
+*
+         NB = MIN( NBMAX, ILAENV( 1, 'CUNMQR', SIDE // TRANS, M, N, K,
+     $        -1 ) )
+         LWKOPT = MAX( 1, NW )*NB
+         WORK( 1 ) = LWKOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CUNMQR', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      NBMIN = 2
+      LDWORK = NW
+      IF( NB.GT.1 .AND. NB.LT.K ) THEN
+         IWS = NW*NB
+         IF( LWORK.LT.IWS ) THEN
+            NB = LWORK / LDWORK
+            NBMIN = MAX( 2, ILAENV( 2, 'CUNMQR', SIDE // TRANS, M, N, K,
+     $              -1 ) )
+         END IF
+      ELSE
+         IWS = NW
+      END IF
+*
+      IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN
+*
+*        Use unblocked code
+*
+         CALL CUNM2R( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, WORK,
+     $                IINFO )
+      ELSE
+*
+*        Use blocked code
+*
+         IF( ( LEFT .AND. .NOT.NOTRAN ) .OR.
+     $       ( .NOT.LEFT .AND. NOTRAN ) ) THEN
+            I1 = 1
+            I2 = K
+            I3 = NB
+         ELSE
+            I1 = ( ( K-1 ) / NB )*NB + 1
+            I2 = 1
+            I3 = -NB
+         END IF
+*
+         IF( LEFT ) THEN
+            NI = N
+            JC = 1
+         ELSE
+            MI = M
+            IC = 1
+         END IF
+*
+         DO 10 I = I1, I2, I3
+            IB = MIN( NB, K-I+1 )
+*
+*           Form the triangular factor of the block reflector
+*           H = H(i) H(i+1) . . . H(i+ib-1)
+*
+            CALL CLARFT( 'Forward', 'Columnwise', NQ-I+1, IB, A( I, I ),
+     $                   LDA, TAU( I ), T, LDT )
+            IF( LEFT ) THEN
+*
+*              H or H' is applied to C(i:m,1:n)
+*
+               MI = M - I + 1
+               IC = I
+            ELSE
+*
+*              H or H' is applied to C(1:m,i:n)
+*
+               NI = N - I + 1
+               JC = I
+            END IF
+*
+*           Apply H or H'
+*
+            CALL CLARFB( SIDE, TRANS, 'Forward', 'Columnwise', MI, NI,
+     $                   IB, A( I, I ), LDA, T, LDT, C( IC, JC ), LDC,
+     $                   WORK, LDWORK )
+   10    CONTINUE
+      END IF
+      WORK( 1 ) = LWKOPT
+      RETURN
+*
+*     End of CUNMQR
+*
+      END
diff --git a/costa/native/external/lapack/cunmr2.f b/costa/native/external/lapack/cunmr2.f
new file mode 100644
index 000000000..7fc45626e
--- /dev/null
+++ b/costa/native/external/lapack/cunmr2.f
@@ -0,0 +1,199 @@
+      SUBROUTINE CUNMR2( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC,
+     $                   WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          SIDE, TRANS
+      INTEGER            INFO, K, LDA, LDC, M, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CUNMR2 overwrites the general complex m-by-n matrix C with
+*
+*        Q * C  if SIDE = 'L' and TRANS = 'N', or
+*
+*        Q'* C  if SIDE = 'L' and TRANS = 'C', or
+*
+*        C * Q  if SIDE = 'R' and TRANS = 'N', or
+*
+*        C * Q' if SIDE = 'R' and TRANS = 'C',
+*
+*  where Q is a complex unitary matrix defined as the product of k
+*  elementary reflectors
+*
+*        Q = H(1)' H(2)' . . . H(k)'
+*
+*  as returned by CGERQF. Q is of order m if SIDE = 'L' and of order n
+*  if SIDE = 'R'.
+*
+*  Arguments
+*  =========
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'L': apply Q or Q' from the Left
+*          = 'R': apply Q or Q' from the Right
+*
+*  TRANS   (input) CHARACTER*1
+*          = 'N': apply Q  (No transpose)
+*          = 'C': apply Q' (Conjugate transpose)
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix C. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix C. N >= 0.
+*
+*  K       (input) INTEGER
+*          The number of elementary reflectors whose product defines
+*          the matrix Q.
+*          If SIDE = 'L', M >= K >= 0;
+*          if SIDE = 'R', N >= K >= 0.
+*
+*  A       (input) COMPLEX array, dimension
+*                               (LDA,M) if SIDE = 'L',
+*                               (LDA,N) if SIDE = 'R'
+*          The i-th row must contain the vector which defines the
+*          elementary reflector H(i), for i = 1,2,...,k, as returned by
+*          CGERQF in the last k rows of its array argument A.
+*          A is modified by the routine but restored on exit.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,K).
+*
+*  TAU     (input) COMPLEX array, dimension (K)
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by CGERQF.
+*
+*  C       (input/output) COMPLEX array, dimension (LDC,N)
+*          On entry, the m-by-n matrix C.
+*          On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C. LDC >= max(1,M).
+*
+*  WORK    (workspace) COMPLEX array, dimension
+*                                   (N) if SIDE = 'L',
+*                                   (M) if SIDE = 'R'
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX            ONE
+      PARAMETER          ( ONE = ( 1.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LEFT, NOTRAN
+      INTEGER            I, I1, I2, I3, MI, NI, NQ
+      COMPLEX            AII, TAUI
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CLACGV, CLARF, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          CONJG, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      LEFT = LSAME( SIDE, 'L' )
+      NOTRAN = LSAME( TRANS, 'N' )
+*
+*     NQ is the order of Q
+*
+      IF( LEFT ) THEN
+         NQ = M
+      ELSE
+         NQ = N
+      END IF
+      IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN
+         INFO = -2
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN
+         INFO = -5
+      ELSE IF( LDA.LT.MAX( 1, K ) ) THEN
+         INFO = -7
+      ELSE IF( LDC.LT.MAX( 1, M ) ) THEN
+         INFO = -10
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CUNMR2', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 )
+     $   RETURN
+*
+      IF( ( LEFT .AND. .NOT.NOTRAN .OR. .NOT.LEFT .AND. NOTRAN ) ) THEN
+         I1 = 1
+         I2 = K
+         I3 = 1
+      ELSE
+         I1 = K
+         I2 = 1
+         I3 = -1
+      END IF
+*
+      IF( LEFT ) THEN
+         NI = N
+      ELSE
+         MI = M
+      END IF
+*
+      DO 10 I = I1, I2, I3
+         IF( LEFT ) THEN
+*
+*           H(i) or H(i)' is applied to C(1:m-k+i,1:n)
+*
+            MI = M - K + I
+         ELSE
+*
+*           H(i) or H(i)' is applied to C(1:m,1:n-k+i)
+*
+            NI = N - K + I
+         END IF
+*
+*        Apply H(i) or H(i)'
+*
+         IF( NOTRAN ) THEN
+            TAUI = CONJG( TAU( I ) )
+         ELSE
+            TAUI = TAU( I )
+         END IF
+         CALL CLACGV( NQ-K+I-1, A( I, 1 ), LDA )
+         AII = A( I, NQ-K+I )
+         A( I, NQ-K+I ) = ONE
+         CALL CLARF( SIDE, MI, NI, A( I, 1 ), LDA, TAUI, C, LDC, WORK )
+         A( I, NQ-K+I ) = AII
+         CALL CLACGV( NQ-K+I-1, A( I, 1 ), LDA )
+   10 CONTINUE
+      RETURN
+*
+*     End of CUNMR2
+*
+      END
diff --git a/costa/native/external/lapack/cunmr3.f b/costa/native/external/lapack/cunmr3.f
new file mode 100644
index 000000000..38c8eca46
--- /dev/null
+++ b/costa/native/external/lapack/cunmr3.f
@@ -0,0 +1,213 @@
+      SUBROUTINE CUNMR3( SIDE, TRANS, M, N, K, L, A, LDA, TAU, C, LDC,
+     $                   WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          SIDE, TRANS
+      INTEGER            INFO, K, L, LDA, LDC, M, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CUNMR3 overwrites the general complex m by n matrix C with
+*
+*        Q * C  if SIDE = 'L' and TRANS = 'N', or
+*
+*        Q'* C  if SIDE = 'L' and TRANS = 'C', or
+*
+*        C * Q  if SIDE = 'R' and TRANS = 'N', or
+*
+*        C * Q' if SIDE = 'R' and TRANS = 'C',
+*
+*  where Q is a complex unitary matrix defined as the product of k
+*  elementary reflectors
+*
+*        Q = H(1) H(2) . . . H(k)
+*
+*  as returned by CTZRZF. Q is of order m if SIDE = 'L' and of order n
+*  if SIDE = 'R'.
+*
+*  Arguments
+*  =========
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'L': apply Q or Q' from the Left
+*          = 'R': apply Q or Q' from the Right
+*
+*  TRANS   (input) CHARACTER*1
+*          = 'N': apply Q  (No transpose)
+*          = 'C': apply Q' (Conjugate transpose)
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix C. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix C. N >= 0.
+*
+*  K       (input) INTEGER
+*          The number of elementary reflectors whose product defines
+*          the matrix Q.
+*          If SIDE = 'L', M >= K >= 0;
+*          if SIDE = 'R', N >= K >= 0.
+*
+*  L       (input) INTEGER
+*          The number of columns of the matrix A containing
+*          the meaningful part of the Householder reflectors.
+*          If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0.
+*
+*  A       (input) COMPLEX array, dimension
+*                               (LDA,M) if SIDE = 'L',
+*                               (LDA,N) if SIDE = 'R'
+*          The i-th row must contain the vector which defines the
+*          elementary reflector H(i), for i = 1,2,...,k, as returned by
+*          CTZRZF in the last k rows of its array argument A.
+*          A is modified by the routine but restored on exit.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,K).
+*
+*  TAU     (input) COMPLEX array, dimension (K)
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by CTZRZF.
+*
+*  C       (input/output) COMPLEX array, dimension (LDC,N)
+*          On entry, the m-by-n matrix C.
+*          On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C. LDC >= max(1,M).
+*
+*  WORK    (workspace) COMPLEX array, dimension
+*                                   (N) if SIDE = 'L',
+*                                   (M) if SIDE = 'R'
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*    A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            LEFT, NOTRAN
+      INTEGER            I, I1, I2, I3, IC, JA, JC, MI, NI, NQ
+      COMPLEX            TAUI
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CLARZ, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          CONJG, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      LEFT = LSAME( SIDE, 'L' )
+      NOTRAN = LSAME( TRANS, 'N' )
+*
+*     NQ is the order of Q
+*
+      IF( LEFT ) THEN
+         NQ = M
+      ELSE
+         NQ = N
+      END IF
+      IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN
+         INFO = -2
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN
+         INFO = -5
+      ELSE IF( L.LT.0 .OR. ( LEFT .AND. ( L.GT.M ) ) .OR.
+     $         ( .NOT.LEFT .AND. ( L.GT.N ) ) ) THEN
+         INFO = -6
+      ELSE IF( LDA.LT.MAX( 1, K ) ) THEN
+         INFO = -8
+      ELSE IF( LDC.LT.MAX( 1, M ) ) THEN
+         INFO = -11
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CUNMR3', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 )
+     $   RETURN
+*
+      IF( ( LEFT .AND. .NOT.NOTRAN .OR. .NOT.LEFT .AND. NOTRAN ) ) THEN
+         I1 = 1
+         I2 = K
+         I3 = 1
+      ELSE
+         I1 = K
+         I2 = 1
+         I3 = -1
+      END IF
+*
+      IF( LEFT ) THEN
+         NI = N
+         JA = M - L + 1
+         JC = 1
+      ELSE
+         MI = M
+         JA = N - L + 1
+         IC = 1
+      END IF
+*
+      DO 10 I = I1, I2, I3
+         IF( LEFT ) THEN
+*
+*           H(i) or H(i)' is applied to C(i:m,1:n)
+*
+            MI = M - I + 1
+            IC = I
+         ELSE
+*
+*           H(i) or H(i)' is applied to C(1:m,i:n)
+*
+            NI = N - I + 1
+            JC = I
+         END IF
+*
+*        Apply H(i) or H(i)'
+*
+         IF( NOTRAN ) THEN
+            TAUI = TAU( I )
+         ELSE
+            TAUI = CONJG( TAU( I ) )
+         END IF
+         CALL CLARZ( SIDE, MI, NI, L, A( I, JA ), LDA, TAUI,
+     $               C( IC, JC ), LDC, WORK )
+*
+   10 CONTINUE
+*
+      RETURN
+*
+*     End of CUNMR3
+*
+      END
diff --git a/costa/native/external/lapack/cunmrq.f b/costa/native/external/lapack/cunmrq.f
new file mode 100644
index 000000000..89f9b6c37
--- /dev/null
+++ b/costa/native/external/lapack/cunmrq.f
@@ -0,0 +1,265 @@
+      SUBROUTINE CUNMRQ( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC,
+     $                   WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          SIDE, TRANS
+      INTEGER            INFO, K, LDA, LDC, LWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), C( LDC, * ), TAU( * ),
+     $                   WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CUNMRQ overwrites the general complex M-by-N matrix C with
+*
+*                  SIDE = 'L'     SIDE = 'R'
+*  TRANS = 'N':      Q * C          C * Q
+*  TRANS = 'C':      Q**H * C       C * Q**H
+*
+*  where Q is a complex unitary matrix defined as the product of k
+*  elementary reflectors
+*
+*        Q = H(1)' H(2)' . . . H(k)'
+*
+*  as returned by CGERQF. Q is of order M if SIDE = 'L' and of order N
+*  if SIDE = 'R'.
+*
+*  Arguments
+*  =========
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'L': apply Q or Q**H from the Left;
+*          = 'R': apply Q or Q**H from the Right.
+*
+*  TRANS   (input) CHARACTER*1
+*          = 'N':  No transpose, apply Q;
+*          = 'C':  Transpose, apply Q**H.
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix C. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix C. N >= 0.
+*
+*  K       (input) INTEGER
+*          The number of elementary reflectors whose product defines
+*          the matrix Q.
+*          If SIDE = 'L', M >= K >= 0;
+*          if SIDE = 'R', N >= K >= 0.
+*
+*  A       (input) COMPLEX array, dimension
+*                               (LDA,M) if SIDE = 'L',
+*                               (LDA,N) if SIDE = 'R'
+*          The i-th row must contain the vector which defines the
+*          elementary reflector H(i), for i = 1,2,...,k, as returned by
+*          CGERQF in the last k rows of its array argument A.
+*          A is modified by the routine but restored on exit.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,K).
+*
+*  TAU     (input) COMPLEX array, dimension (K)
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by CGERQF.
+*
+*  C       (input/output) COMPLEX array, dimension (LDC,N)
+*          On entry, the M-by-N matrix C.
+*          On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C. LDC >= max(1,M).
+*
+*  WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.
+*          If SIDE = 'L', LWORK >= max(1,N);
+*          if SIDE = 'R', LWORK >= max(1,M).
+*          For optimum performance LWORK >= N*NB if SIDE = 'L', and
+*          LWORK >= M*NB if SIDE = 'R', where NB is the optimal
+*          blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            NBMAX, LDT
+      PARAMETER          ( NBMAX = 64, LDT = NBMAX+1 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LEFT, LQUERY, NOTRAN
+      CHARACTER          TRANST
+      INTEGER            I, I1, I2, I3, IB, IINFO, IWS, LDWORK, LWKOPT,
+     $                   MI, NB, NBMIN, NI, NQ, NW
+*     ..
+*     .. Local Arrays ..
+      COMPLEX            T( LDT, NBMAX )
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           LSAME, ILAENV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CLARFB, CLARFT, CUNMR2, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      LEFT = LSAME( SIDE, 'L' )
+      NOTRAN = LSAME( TRANS, 'N' )
+      LQUERY = ( LWORK.EQ.-1 )
+*
+*     NQ is the order of Q and NW is the minimum dimension of WORK
+*
+      IF( LEFT ) THEN
+         NQ = M
+         NW = N
+      ELSE
+         NQ = N
+         NW = M
+      END IF
+      IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN
+         INFO = -2
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN
+         INFO = -5
+      ELSE IF( LDA.LT.MAX( 1, K ) ) THEN
+         INFO = -7
+      ELSE IF( LDC.LT.MAX( 1, M ) ) THEN
+         INFO = -10
+      ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN
+         INFO = -12
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+*
+*        Determine the block size.  NB may be at most NBMAX, where NBMAX
+*        is used to define the local array T.
+*
+         NB = MIN( NBMAX, ILAENV( 1, 'CUNMRQ', SIDE // TRANS, M, N, K,
+     $        -1 ) )
+         LWKOPT = MAX( 1, NW )*NB
+         WORK( 1 ) = LWKOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CUNMRQ', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      NBMIN = 2
+      LDWORK = NW
+      IF( NB.GT.1 .AND. NB.LT.K ) THEN
+         IWS = NW*NB
+         IF( LWORK.LT.IWS ) THEN
+            NB = LWORK / LDWORK
+            NBMIN = MAX( 2, ILAENV( 2, 'CUNMRQ', SIDE // TRANS, M, N, K,
+     $              -1 ) )
+         END IF
+      ELSE
+         IWS = NW
+      END IF
+*
+      IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN
+*
+*        Use unblocked code
+*
+         CALL CUNMR2( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, WORK,
+     $                IINFO )
+      ELSE
+*
+*        Use blocked code
+*
+         IF( ( LEFT .AND. .NOT.NOTRAN ) .OR.
+     $       ( .NOT.LEFT .AND. NOTRAN ) ) THEN
+            I1 = 1
+            I2 = K
+            I3 = NB
+         ELSE
+            I1 = ( ( K-1 ) / NB )*NB + 1
+            I2 = 1
+            I3 = -NB
+         END IF
+*
+         IF( LEFT ) THEN
+            NI = N
+         ELSE
+            MI = M
+         END IF
+*
+         IF( NOTRAN ) THEN
+            TRANST = 'C'
+         ELSE
+            TRANST = 'N'
+         END IF
+*
+         DO 10 I = I1, I2, I3
+            IB = MIN( NB, K-I+1 )
+*
+*           Form the triangular factor of the block reflector
+*           H = H(i+ib-1) . . . H(i+1) H(i)
+*
+            CALL CLARFT( 'Backward', 'Rowwise', NQ-K+I+IB-1, IB,
+     $                   A( I, 1 ), LDA, TAU( I ), T, LDT )
+            IF( LEFT ) THEN
+*
+*              H or H' is applied to C(1:m-k+i+ib-1,1:n)
+*
+               MI = M - K + I + IB - 1
+            ELSE
+*
+*              H or H' is applied to C(1:m,1:n-k+i+ib-1)
+*
+               NI = N - K + I + IB - 1
+            END IF
+*
+*           Apply H or H'
+*
+            CALL CLARFB( SIDE, TRANST, 'Backward', 'Rowwise', MI, NI,
+     $                   IB, A( I, 1 ), LDA, T, LDT, C, LDC, WORK,
+     $                   LDWORK )
+   10    CONTINUE
+      END IF
+      WORK( 1 ) = LWKOPT
+      RETURN
+*
+*     End of CUNMRQ
+*
+      END
diff --git a/costa/native/external/lapack/cunmrz.f b/costa/native/external/lapack/cunmrz.f
new file mode 100644
index 000000000..b157af671
--- /dev/null
+++ b/costa/native/external/lapack/cunmrz.f
@@ -0,0 +1,294 @@
+      SUBROUTINE CUNMRZ( SIDE, TRANS, M, N, K, L, A, LDA, TAU, C, LDC,
+     $                   WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          SIDE, TRANS
+      INTEGER            INFO, K, L, LDA, LDC, LWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), C( LDC, * ), TAU( * ),
+     $                   WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CUNMRZ overwrites the general complex M-by-N matrix C with
+*
+*                  SIDE = 'L'     SIDE = 'R'
+*  TRANS = 'N':      Q * C          C * Q
+*  TRANS = 'C':      Q**H * C       C * Q**H
+*
+*  where Q is a complex unitary matrix defined as the product of k
+*  elementary reflectors
+*
+*        Q = H(1) H(2) . . . H(k)
+*
+*  as returned by CTZRZF. Q is of order M if SIDE = 'L' and of order N
+*  if SIDE = 'R'.
+*
+*  Arguments
+*  =========
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'L': apply Q or Q**H from the Left;
+*          = 'R': apply Q or Q**H from the Right.
+*
+*  TRANS   (input) CHARACTER*1
+*          = 'N':  No transpose, apply Q;
+*          = 'C':  Conjugate transpose, apply Q**H.
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix C. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix C. N >= 0.
+*
+*  K       (input) INTEGER
+*          The number of elementary reflectors whose product defines
+*          the matrix Q.
+*          If SIDE = 'L', M >= K >= 0;
+*          if SIDE = 'R', N >= K >= 0.
+*
+*  L       (input) INTEGER
+*          The number of columns of the matrix A containing
+*          the meaningful part of the Householder reflectors.
+*          If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0.
+*
+*  A       (input) COMPLEX array, dimension
+*                               (LDA,M) if SIDE = 'L',
+*                               (LDA,N) if SIDE = 'R'
+*          The i-th row must contain the vector which defines the
+*          elementary reflector H(i), for i = 1,2,...,k, as returned by
+*          CTZRZF in the last k rows of its array argument A.
+*          A is modified by the routine but restored on exit.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,K).
+*
+*  TAU     (input) COMPLEX array, dimension (K)
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by CTZRZF.
+*
+*  C       (input/output) COMPLEX array, dimension (LDC,N)
+*          On entry, the M-by-N matrix C.
+*          On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C. LDC >= max(1,M).
+*
+*  WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.
+*          If SIDE = 'L', LWORK >= max(1,N);
+*          if SIDE = 'R', LWORK >= max(1,M).
+*          For optimum performance LWORK >= N*NB if SIDE = 'L', and
+*          LWORK >= M*NB if SIDE = 'R', where NB is the optimal
+*          blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*    A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            NBMAX, LDT
+      PARAMETER          ( NBMAX = 64, LDT = NBMAX+1 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LEFT, LQUERY, NOTRAN
+      CHARACTER          TRANST
+      INTEGER            I, I1, I2, I3, IB, IC, IINFO, IWS, JA, JC,
+     $                   LDWORK, LWKOPT, MI, NB, NBMIN, NI, NQ, NW
+*     ..
+*     .. Local Arrays ..
+      COMPLEX            T( LDT, NBMAX )
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           LSAME, ILAENV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CLARZB, CLARZT, CUNMR3, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      LEFT = LSAME( SIDE, 'L' )
+      NOTRAN = LSAME( TRANS, 'N' )
+      LQUERY = ( LWORK.EQ.-1 )
+*
+*     NQ is the order of Q and NW is the minimum dimension of WORK
+*
+      IF( LEFT ) THEN
+         NQ = M
+         NW = N
+      ELSE
+         NQ = N
+         NW = M
+      END IF
+      IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN
+         INFO = -2
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN
+         INFO = -5
+      ELSE IF( L.LT.0 .OR. ( LEFT .AND. ( L.GT.M ) ) .OR.
+     $         ( .NOT.LEFT .AND. ( L.GT.N ) ) ) THEN
+         INFO = -6
+      ELSE IF( LDA.LT.MAX( 1, K ) ) THEN
+         INFO = -8
+      ELSE IF( LDC.LT.MAX( 1, M ) ) THEN
+         INFO = -11
+      ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN
+         INFO = -13
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+*
+*        Determine the block size.  NB may be at most NBMAX, where NBMAX
+*        is used to define the local array T.
+*
+         NB = MIN( NBMAX, ILAENV( 1, 'CUNMRQ', SIDE // TRANS, M, N, K,
+     $        -1 ) )
+         LWKOPT = MAX( 1, NW )*NB
+         WORK( 1 ) = LWKOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CUNMRZ', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+*     Determine the block size.  NB may be at most NBMAX, where NBMAX
+*     is used to define the local array T.
+*
+      NB = MIN( NBMAX, ILAENV( 1, 'CUNMRQ', SIDE // TRANS, M, N, K,
+     $     -1 ) )
+      NBMIN = 2
+      LDWORK = NW
+      IF( NB.GT.1 .AND. NB.LT.K ) THEN
+         IWS = NW*NB
+         IF( LWORK.LT.IWS ) THEN
+            NB = LWORK / LDWORK
+            NBMIN = MAX( 2, ILAENV( 2, 'CUNMRQ', SIDE // TRANS, M, N, K,
+     $              -1 ) )
+         END IF
+      ELSE
+         IWS = NW
+      END IF
+*
+      IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN
+*
+*        Use unblocked code
+*
+         CALL CUNMR3( SIDE, TRANS, M, N, K, L, A, LDA, TAU, C, LDC,
+     $                WORK, IINFO )
+      ELSE
+*
+*        Use blocked code
+*
+         IF( ( LEFT .AND. .NOT.NOTRAN ) .OR.
+     $       ( .NOT.LEFT .AND. NOTRAN ) ) THEN
+            I1 = 1
+            I2 = K
+            I3 = NB
+         ELSE
+            I1 = ( ( K-1 ) / NB )*NB + 1
+            I2 = 1
+            I3 = -NB
+         END IF
+*
+         IF( LEFT ) THEN
+            NI = N
+            JC = 1
+            JA = M - L + 1
+         ELSE
+            MI = M
+            IC = 1
+            JA = N - L + 1
+         END IF
+*
+         IF( NOTRAN ) THEN
+            TRANST = 'C'
+         ELSE
+            TRANST = 'N'
+         END IF
+*
+         DO 10 I = I1, I2, I3
+            IB = MIN( NB, K-I+1 )
+*
+*           Form the triangular factor of the block reflector
+*           H = H(i+ib-1) . . . H(i+1) H(i)
+*
+            CALL CLARZT( 'Backward', 'Rowwise', L, IB, A( I, JA ), LDA,
+     $                   TAU( I ), T, LDT )
+*
+            IF( LEFT ) THEN
+*
+*              H or H' is applied to C(i:m,1:n)
+*
+               MI = M - I + 1
+               IC = I
+            ELSE
+*
+*              H or H' is applied to C(1:m,i:n)
+*
+               NI = N - I + 1
+               JC = I
+            END IF
+*
+*           Apply H or H'
+*
+            CALL CLARZB( SIDE, TRANST, 'Backward', 'Rowwise', MI, NI,
+     $                   IB, L, A( I, JA ), LDA, T, LDT, C( IC, JC ),
+     $                   LDC, WORK, LDWORK )
+   10    CONTINUE
+*
+      END IF
+*
+      WORK( 1 ) = LWKOPT
+*
+      RETURN
+*
+*     End of CUNMRZ
+*
+      END
diff --git a/costa/native/external/lapack/cunmtr.f b/costa/native/external/lapack/cunmtr.f
new file mode 100644
index 000000000..aa2992474
--- /dev/null
+++ b/costa/native/external/lapack/cunmtr.f
@@ -0,0 +1,224 @@
+      SUBROUTINE CUNMTR( SIDE, UPLO, TRANS, M, N, A, LDA, TAU, C, LDC,
+     $                   WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          SIDE, TRANS, UPLO
+      INTEGER            INFO, LDA, LDC, LWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * ), C( LDC, * ), TAU( * ),
+     $                   WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CUNMTR overwrites the general complex M-by-N matrix C with
+*
+*                  SIDE = 'L'     SIDE = 'R'
+*  TRANS = 'N':      Q * C          C * Q
+*  TRANS = 'C':      Q**H * C       C * Q**H
+*
+*  where Q is a complex unitary matrix of order nq, with nq = m if
+*  SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of
+*  nq-1 elementary reflectors, as returned by CHETRD:
+*
+*  if UPLO = 'U', Q = H(nq-1) . . . H(2) H(1);
+*
+*  if UPLO = 'L', Q = H(1) H(2) . . . H(nq-1).
+*
+*  Arguments
+*  =========
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'L': apply Q or Q**H from the Left;
+*          = 'R': apply Q or Q**H from the Right.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U': Upper triangle of A contains elementary reflectors
+*                 from CHETRD;
+*          = 'L': Lower triangle of A contains elementary reflectors
+*                 from CHETRD.
+*
+*  TRANS   (input) CHARACTER*1
+*          = 'N':  No transpose, apply Q;
+*          = 'C':  Conjugate transpose, apply Q**H.
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix C. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix C. N >= 0.
+*
+*  A       (input) COMPLEX array, dimension
+*                               (LDA,M) if SIDE = 'L'
+*                               (LDA,N) if SIDE = 'R'
+*          The vectors which define the elementary reflectors, as
+*          returned by CHETRD.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.
+*          LDA >= max(1,M) if SIDE = 'L'; LDA >= max(1,N) if SIDE = 'R'.
+*
+*  TAU     (input) COMPLEX array, dimension
+*                               (M-1) if SIDE = 'L'
+*                               (N-1) if SIDE = 'R'
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by CHETRD.
+*
+*  C       (input/output) COMPLEX array, dimension (LDC,N)
+*          On entry, the M-by-N matrix C.
+*          On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C. LDC >= max(1,M).
+*
+*  WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.
+*          If SIDE = 'L', LWORK >= max(1,N);
+*          if SIDE = 'R', LWORK >= max(1,M).
+*          For optimum performance LWORK >= N*NB if SIDE = 'L', and
+*          LWORK >=M*NB if SIDE = 'R', where NB is the optimal
+*          blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            LEFT, LQUERY, UPPER
+      INTEGER            I1, I2, IINFO, LWKOPT, MI, NB, NI, NQ, NW
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV, LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CUNMQL, CUNMQR, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      LEFT = LSAME( SIDE, 'L' )
+      UPPER = LSAME( UPLO, 'U' )
+      LQUERY = ( LWORK.EQ.-1 )
+*
+*     NQ is the order of Q and NW is the minimum dimension of WORK
+*
+      IF( LEFT ) THEN
+         NQ = M
+         NW = N
+      ELSE
+         NQ = N
+         NW = M
+      END IF
+      IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.LSAME( TRANS, 'N' ) .AND. .NOT.LSAME( TRANS, 'C' ) )
+     $          THEN
+         INFO = -3
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( LDA.LT.MAX( 1, NQ ) ) THEN
+         INFO = -7
+      ELSE IF( LDC.LT.MAX( 1, M ) ) THEN
+         INFO = -10
+      ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN
+         INFO = -12
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         IF( UPPER ) THEN
+            IF( LEFT ) THEN
+               NB = ILAENV( 1, 'CUNMQL', SIDE // TRANS, M-1, N, M-1,
+     $                      -1 )
+            ELSE
+               NB = ILAENV( 1, 'CUNMQL', SIDE // TRANS, M, N-1, N-1,
+     $                      -1 )
+            END IF
+         ELSE
+            IF( LEFT ) THEN
+               NB = ILAENV( 1, 'CUNMQR', SIDE // TRANS, M-1, N, M-1,
+     $                      -1 )
+            ELSE
+               NB = ILAENV( 1, 'CUNMQR', SIDE // TRANS, M, N-1, N-1,
+     $                      -1 )
+            END IF
+         END IF
+         LWKOPT = MAX( 1, NW )*NB
+         WORK( 1 ) = LWKOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CUNMTR', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 .OR. NQ.EQ.1 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      IF( LEFT ) THEN
+         MI = M - 1
+         NI = N
+      ELSE
+         MI = M
+         NI = N - 1
+      END IF
+*
+      IF( UPPER ) THEN
+*
+*        Q was determined by a call to CHETRD with UPLO = 'U'
+*
+         CALL CUNMQL( SIDE, TRANS, MI, NI, NQ-1, A( 1, 2 ), LDA, TAU, C,
+     $                LDC, WORK, LWORK, IINFO )
+      ELSE
+*
+*        Q was determined by a call to CHETRD with UPLO = 'L'
+*
+         IF( LEFT ) THEN
+            I1 = 2
+            I2 = 1
+         ELSE
+            I1 = 1
+            I2 = 2
+         END IF
+         CALL CUNMQR( SIDE, TRANS, MI, NI, NQ-1, A( 2, 1 ), LDA, TAU,
+     $                C( I1, I2 ), LDC, WORK, LWORK, IINFO )
+      END IF
+      WORK( 1 ) = LWKOPT
+      RETURN
+*
+*     End of CUNMTR
+*
+      END
diff --git a/costa/native/external/lapack/cupgtr.f b/costa/native/external/lapack/cupgtr.f
new file mode 100644
index 000000000..82e84b611
--- /dev/null
+++ b/costa/native/external/lapack/cupgtr.f
@@ -0,0 +1,162 @@
+      SUBROUTINE CUPGTR( UPLO, N, AP, TAU, Q, LDQ, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDQ, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            AP( * ), Q( LDQ, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CUPGTR generates a complex unitary matrix Q which is defined as the
+*  product of n-1 elementary reflectors H(i) of order n, as returned by
+*  CHPTRD using packed storage:
+*
+*  if UPLO = 'U', Q = H(n-1) . . . H(2) H(1),
+*
+*  if UPLO = 'L', Q = H(1) H(2) . . . H(n-1).
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U': Upper triangular packed storage used in previous
+*                 call to CHPTRD;
+*          = 'L': Lower triangular packed storage used in previous
+*                 call to CHPTRD.
+*
+*  N       (input) INTEGER
+*          The order of the matrix Q. N >= 0.
+*
+*  AP      (input) COMPLEX array, dimension (N*(N+1)/2)
+*          The vectors which define the elementary reflectors, as
+*          returned by CHPTRD.
+*
+*  TAU     (input) COMPLEX array, dimension (N-1)
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by CHPTRD.
+*
+*  Q       (output) COMPLEX array, dimension (LDQ,N)
+*          The N-by-N unitary matrix Q.
+*
+*  LDQ     (input) INTEGER
+*          The leading dimension of the array Q. LDQ >= max(1,N).
+*
+*  WORK    (workspace) COMPLEX array, dimension (N-1)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX            CZERO, CONE
+      PARAMETER          ( CZERO = ( 0.0E+0, 0.0E+0 ),
+     $                   CONE = ( 1.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            I, IINFO, IJ, J
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CUNG2L, CUNG2R, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN
+         INFO = -6
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CUPGTR', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      IF( UPPER ) THEN
+*
+*        Q was determined by a call to CHPTRD with UPLO = 'U'
+*
+*        Unpack the vectors which define the elementary reflectors and
+*        set the last row and column of Q equal to those of the unit
+*        matrix
+*
+         IJ = 2
+         DO 20 J = 1, N - 1
+            DO 10 I = 1, J - 1
+               Q( I, J ) = AP( IJ )
+               IJ = IJ + 1
+   10       CONTINUE
+            IJ = IJ + 2
+            Q( N, J ) = CZERO
+   20    CONTINUE
+         DO 30 I = 1, N - 1
+            Q( I, N ) = CZERO
+   30    CONTINUE
+         Q( N, N ) = CONE
+*
+*        Generate Q(1:n-1,1:n-1)
+*
+         CALL CUNG2L( N-1, N-1, N-1, Q, LDQ, TAU, WORK, IINFO )
+*
+      ELSE
+*
+*        Q was determined by a call to CHPTRD with UPLO = 'L'.
+*
+*        Unpack the vectors which define the elementary reflectors and
+*        set the first row and column of Q equal to those of the unit
+*        matrix
+*
+         Q( 1, 1 ) = CONE
+         DO 40 I = 2, N
+            Q( I, 1 ) = CZERO
+   40    CONTINUE
+         IJ = 3
+         DO 60 J = 2, N
+            Q( 1, J ) = CZERO
+            DO 50 I = J + 1, N
+               Q( I, J ) = AP( IJ )
+               IJ = IJ + 1
+   50       CONTINUE
+            IJ = IJ + 2
+   60    CONTINUE
+         IF( N.GT.1 ) THEN
+*
+*           Generate Q(2:n,2:n)
+*
+            CALL CUNG2R( N-1, N-1, N-1, Q( 2, 2 ), LDQ, TAU, WORK,
+     $                   IINFO )
+         END IF
+      END IF
+      RETURN
+*
+*     End of CUPGTR
+*
+      END
diff --git a/costa/native/external/lapack/cupmtr.f b/costa/native/external/lapack/cupmtr.f
new file mode 100644
index 000000000..d9013f769
--- /dev/null
+++ b/costa/native/external/lapack/cupmtr.f
@@ -0,0 +1,268 @@
+      SUBROUTINE CUPMTR( SIDE, UPLO, TRANS, M, N, AP, TAU, C, LDC, WORK,
+     $                   INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          SIDE, TRANS, UPLO
+      INTEGER            INFO, LDC, M, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            AP( * ), C( LDC, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  CUPMTR overwrites the general complex M-by-N matrix C with
+*
+*                  SIDE = 'L'     SIDE = 'R'
+*  TRANS = 'N':      Q * C          C * Q
+*  TRANS = 'C':      Q**H * C       C * Q**H
+*
+*  where Q is a complex unitary matrix of order nq, with nq = m if
+*  SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of
+*  nq-1 elementary reflectors, as returned by CHPTRD using packed
+*  storage:
+*
+*  if UPLO = 'U', Q = H(nq-1) . . . H(2) H(1);
+*
+*  if UPLO = 'L', Q = H(1) H(2) . . . H(nq-1).
+*
+*  Arguments
+*  =========
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'L': apply Q or Q**H from the Left;
+*          = 'R': apply Q or Q**H from the Right.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U': Upper triangular packed storage used in previous
+*                 call to CHPTRD;
+*          = 'L': Lower triangular packed storage used in previous
+*                 call to CHPTRD.
+*
+*  TRANS   (input) CHARACTER*1
+*          = 'N':  No transpose, apply Q;
+*          = 'C':  Conjugate transpose, apply Q**H.
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix C. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix C. N >= 0.
+*
+*  AP      (input) COMPLEX array, dimension
+*                               (M*(M+1)/2) if SIDE = 'L'
+*                               (N*(N+1)/2) if SIDE = 'R'
+*          The vectors which define the elementary reflectors, as
+*          returned by CHPTRD.  AP is modified by the routine but
+*          restored on exit.
+*
+*  TAU     (input) COMPLEX array, dimension (M-1) if SIDE = 'L'
+*                                     or (N-1) if SIDE = 'R'
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by CHPTRD.
+*
+*  C       (input/output) COMPLEX array, dimension (LDC,N)
+*          On entry, the M-by-N matrix C.
+*          On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C. LDC >= max(1,M).
+*
+*  WORK    (workspace) COMPLEX array, dimension
+*                                   (N) if SIDE = 'L'
+*                                   (M) if SIDE = 'R'
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX            ONE
+      PARAMETER          ( ONE = ( 1.0E+0, 0.0E+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            FORWRD, LEFT, NOTRAN, UPPER
+      INTEGER            I, I1, I2, I3, IC, II, JC, MI, NI, NQ
+      COMPLEX            AII, TAUI
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CLARF, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          CONJG, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      LEFT = LSAME( SIDE, 'L' )
+      NOTRAN = LSAME( TRANS, 'N' )
+      UPPER = LSAME( UPLO, 'U' )
+*
+*     NQ is the order of Q
+*
+      IF( LEFT ) THEN
+         NQ = M
+      ELSE
+         NQ = N
+      END IF
+      IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN
+         INFO = -3
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( LDC.LT.MAX( 1, M ) ) THEN
+         INFO = -9
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CUPMTR', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 )
+     $   RETURN
+*
+      IF( UPPER ) THEN
+*
+*        Q was determined by a call to CHPTRD with UPLO = 'U'
+*
+         FORWRD = ( LEFT .AND. NOTRAN ) .OR.
+     $            ( .NOT.LEFT .AND. .NOT.NOTRAN )
+*
+         IF( FORWRD ) THEN
+            I1 = 1
+            I2 = NQ - 1
+            I3 = 1
+            II = 2
+         ELSE
+            I1 = NQ - 1
+            I2 = 1
+            I3 = -1
+            II = NQ*( NQ+1 ) / 2 - 1
+         END IF
+*
+         IF( LEFT ) THEN
+            NI = N
+         ELSE
+            MI = M
+         END IF
+*
+         DO 10 I = I1, I2, I3
+            IF( LEFT ) THEN
+*
+*              H(i) or H(i)' is applied to C(1:i,1:n)
+*
+               MI = I
+            ELSE
+*
+*              H(i) or H(i)' is applied to C(1:m,1:i)
+*
+               NI = I
+            END IF
+*
+*           Apply H(i) or H(i)'
+*
+            IF( NOTRAN ) THEN
+               TAUI = TAU( I )
+            ELSE
+               TAUI = CONJG( TAU( I ) )
+            END IF
+            AII = AP( II )
+            AP( II ) = ONE
+            CALL CLARF( SIDE, MI, NI, AP( II-I+1 ), 1, TAUI, C, LDC,
+     $                  WORK )
+            AP( II ) = AII
+*
+            IF( FORWRD ) THEN
+               II = II + I + 2
+            ELSE
+               II = II - I - 1
+            END IF
+   10    CONTINUE
+      ELSE
+*
+*        Q was determined by a call to CHPTRD with UPLO = 'L'.
+*
+         FORWRD = ( LEFT .AND. .NOT.NOTRAN ) .OR.
+     $            ( .NOT.LEFT .AND. NOTRAN )
+*
+         IF( FORWRD ) THEN
+            I1 = 1
+            I2 = NQ - 1
+            I3 = 1
+            II = 2
+         ELSE
+            I1 = NQ - 1
+            I2 = 1
+            I3 = -1
+            II = NQ*( NQ+1 ) / 2 - 1
+         END IF
+*
+         IF( LEFT ) THEN
+            NI = N
+            JC = 1
+         ELSE
+            MI = M
+            IC = 1
+         END IF
+*
+         DO 20 I = I1, I2, I3
+            AII = AP( II )
+            AP( II ) = ONE
+            IF( LEFT ) THEN
+*
+*              H(i) or H(i)' is applied to C(i+1:m,1:n)
+*
+               MI = M - I
+               IC = I + 1
+            ELSE
+*
+*              H(i) or H(i)' is applied to C(1:m,i+1:n)
+*
+               NI = N - I
+               JC = I + 1
+            END IF
+*
+*           Apply H(i) or H(i)'
+*
+            IF( NOTRAN ) THEN
+               TAUI = TAU( I )
+            ELSE
+               TAUI = CONJG( TAU( I ) )
+            END IF
+            CALL CLARF( SIDE, MI, NI, AP( II ), 1, TAUI, C( IC, JC ),
+     $                  LDC, WORK )
+            AP( II ) = AII
+*
+            IF( FORWRD ) THEN
+               II = II + NQ - I + 1
+            ELSE
+               II = II - NQ + I - 2
+            END IF
+   20    CONTINUE
+      END IF
+      RETURN
+*
+*     End of CUPMTR
+*
+      END
diff --git a/costa/native/external/lapack/dbdsdc.f b/costa/native/external/lapack/dbdsdc.f
new file mode 100644
index 000000000..17828d161
--- /dev/null
+++ b/costa/native/external/lapack/dbdsdc.f
@@ -0,0 +1,427 @@
+      SUBROUTINE DBDSDC( UPLO, COMPQ, N, D, E, U, LDU, VT, LDVT, Q, IQ,
+     $                   WORK, IWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     December 1, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          COMPQ, UPLO
+      INTEGER            INFO, LDU, LDVT, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IQ( * ), IWORK( * )
+      DOUBLE PRECISION   D( * ), E( * ), Q( * ), U( LDU, * ),
+     $                   VT( LDVT, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DBDSDC computes the singular value decomposition (SVD) of a real
+*  N-by-N (upper or lower) bidiagonal matrix B:  B = U * S * VT,
+*  using a divide and conquer method, where S is a diagonal matrix
+*  with non-negative diagonal elements (the singular values of B), and
+*  U and VT are orthogonal matrices of left and right singular vectors,
+*  respectively. DBDSDC can be used to compute all singular values,
+*  and optionally, singular vectors or singular vectors in compact form.
+*
+*  This code makes very mild assumptions about floating point
+*  arithmetic. It will work on machines with a guard digit in
+*  add/subtract, or on those binary machines without guard digits
+*  which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2.
+*  It could conceivably fail on hexadecimal or decimal machines
+*  without guard digits, but we know of none.  See DLASD3 for details.
+*
+*  The code currently call DLASDQ if singular values only are desired.
+*  However, it can be slightly modified to compute singular values
+*  using the divide and conquer method.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  B is upper bidiagonal.
+*          = 'L':  B is lower bidiagonal.
+*
+*  COMPQ   (input) CHARACTER*1
+*          Specifies whether singular vectors are to be computed
+*          as follows:
+*          = 'N':  Compute singular values only;
+*          = 'P':  Compute singular values and compute singular
+*                  vectors in compact form;
+*          = 'I':  Compute singular values and singular vectors.
+*
+*  N       (input) INTEGER
+*          The order of the matrix B.  N >= 0.
+*
+*  D       (input/output) DOUBLE PRECISION array, dimension (N)
+*          On entry, the n diagonal elements of the bidiagonal matrix B.
+*          On exit, if INFO=0, the singular values of B.
+*
+*  E       (input/output) DOUBLE PRECISION array, dimension (N)
+*          On entry, the elements of E contain the offdiagonal
+*          elements of the bidiagonal matrix whose SVD is desired.
+*          On exit, E has been destroyed.
+*
+*  U       (output) DOUBLE PRECISION array, dimension (LDU,N)
+*          If  COMPQ = 'I', then:
+*             On exit, if INFO = 0, U contains the left singular vectors
+*             of the bidiagonal matrix.
+*          For other values of COMPQ, U is not referenced.
+*
+*  LDU     (input) INTEGER
+*          The leading dimension of the array U.  LDU >= 1.
+*          If singular vectors are desired, then LDU >= max( 1, N ).
+*
+*  VT      (output) DOUBLE PRECISION array, dimension (LDVT,N)
+*          If  COMPQ = 'I', then:
+*             On exit, if INFO = 0, VT' contains the right singular
+*             vectors of the bidiagonal matrix.
+*          For other values of COMPQ, VT is not referenced.
+*
+*  LDVT    (input) INTEGER
+*          The leading dimension of the array VT.  LDVT >= 1.
+*          If singular vectors are desired, then LDVT >= max( 1, N ).
+*
+*  Q       (output) DOUBLE PRECISION array, dimension (LDQ)
+*          If  COMPQ = 'P', then:
+*             On exit, if INFO = 0, Q and IQ contain the left
+*             and right singular vectors in a compact form,
+*             requiring O(N log N) space instead of 2*N**2.
+*             In particular, Q contains all the DOUBLE PRECISION data in
+*             LDQ >= N*(11 + 2*SMLSIZ + 8*INT(LOG_2(N/(SMLSIZ+1))))
+*             words of memory, where SMLSIZ is returned by ILAENV and
+*             is equal to the maximum size of the subproblems at the
+*             bottom of the computation tree (usually about 25).
+*          For other values of COMPQ, Q is not referenced.
+*
+*  IQ      (output) INTEGER array, dimension (LDIQ)
+*          If  COMPQ = 'P', then:
+*             On exit, if INFO = 0, Q and IQ contain the left
+*             and right singular vectors in a compact form,
+*             requiring O(N log N) space instead of 2*N**2.
+*             In particular, IQ contains all INTEGER data in
+*             LDIQ >= N*(3 + 3*INT(LOG_2(N/(SMLSIZ+1))))
+*             words of memory, where SMLSIZ is returned by ILAENV and
+*             is equal to the maximum size of the subproblems at the
+*             bottom of the computation tree (usually about 25).
+*          For other values of COMPQ, IQ is not referenced.
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (LWORK)
+*          If COMPQ = 'N' then LWORK >= (4 * N).
+*          If COMPQ = 'P' then LWORK >= (6 * N).
+*          If COMPQ = 'I' then LWORK >= (3 * N**2 + 4 * N).
+*
+*  IWORK   (workspace) INTEGER array, dimension (8*N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          > 0:  The algorithm failed to compute an singular value.
+*                The update process of divide and conquer failed.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Ming Gu and Huan Ren, Computer Science Division, University of
+*     California at Berkeley, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE, TWO
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0, TWO = 2.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            DIFL, DIFR, GIVCOL, GIVNUM, GIVPTR, I, IC,
+     $                   ICOMPQ, IERR, II, IS, IU, IUPLO, IVT, J, K, KK,
+     $                   MLVL, NM1, NSIZE, PERM, POLES, QSTART, SMLSIZ,
+     $                   SMLSZP, SQRE, START, WSTART, Z
+      DOUBLE PRECISION   CS, EPS, ORGNRM, P, R, SN
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      DOUBLE PRECISION   DLAMCH, DLANST
+      EXTERNAL           LSAME, ILAENV, DLAMCH, DLANST
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DCOPY, DLARTG, DLASCL, DLASD0, DLASDA, DLASDQ,
+     $                   DLASET, DLASR, DSWAP, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, INT, LOG, SIGN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+*
+      IUPLO = 0
+      IF( LSAME( UPLO, 'U' ) )
+     $   IUPLO = 1
+      IF( LSAME( UPLO, 'L' ) )
+     $   IUPLO = 2
+      IF( LSAME( COMPQ, 'N' ) ) THEN
+         ICOMPQ = 0
+      ELSE IF( LSAME( COMPQ, 'P' ) ) THEN
+         ICOMPQ = 1
+      ELSE IF( LSAME( COMPQ, 'I' ) ) THEN
+         ICOMPQ = 2
+      ELSE
+         ICOMPQ = -1
+      END IF
+      IF( IUPLO.EQ.0 ) THEN
+         INFO = -1
+      ELSE IF( ICOMPQ.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( ( LDU.LT.1 ) .OR. ( ( ICOMPQ.EQ.2 ) .AND. ( LDU.LT.
+     $         N ) ) ) THEN
+         INFO = -7
+      ELSE IF( ( LDVT.LT.1 ) .OR. ( ( ICOMPQ.EQ.2 ) .AND. ( LDVT.LT.
+     $         N ) ) ) THEN
+         INFO = -9
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DBDSDC', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+      SMLSIZ = ILAENV( 9, 'DBDSDC', ' ', 0, 0, 0, 0 )
+      IF( N.EQ.1 ) THEN
+         IF( ICOMPQ.EQ.1 ) THEN
+            Q( 1 ) = SIGN( ONE, D( 1 ) )
+            Q( 1+SMLSIZ*N ) = ONE
+         ELSE IF( ICOMPQ.EQ.2 ) THEN
+            U( 1, 1 ) = SIGN( ONE, D( 1 ) )
+            VT( 1, 1 ) = ONE
+         END IF
+         D( 1 ) = ABS( D( 1 ) )
+         RETURN
+      END IF
+      NM1 = N - 1
+*
+*     If matrix lower bidiagonal, rotate to be upper bidiagonal
+*     by applying Givens rotations on the left
+*
+      WSTART = 1
+      QSTART = 3
+      IF( ICOMPQ.EQ.1 ) THEN
+         CALL DCOPY( N, D, 1, Q( 1 ), 1 )
+         CALL DCOPY( N-1, E, 1, Q( N+1 ), 1 )
+      END IF
+      IF( IUPLO.EQ.2 ) THEN
+         QSTART = 5
+         WSTART = 2*N - 1
+         DO 10 I = 1, N - 1
+            CALL DLARTG( D( I ), E( I ), CS, SN, R )
+            D( I ) = R
+            E( I ) = SN*D( I+1 )
+            D( I+1 ) = CS*D( I+1 )
+            IF( ICOMPQ.EQ.1 ) THEN
+               Q( I+2*N ) = CS
+               Q( I+3*N ) = SN
+            ELSE IF( ICOMPQ.EQ.2 ) THEN
+               WORK( I ) = CS
+               WORK( NM1+I ) = -SN
+            END IF
+   10    CONTINUE
+      END IF
+*
+*     If ICOMPQ = 0, use DLASDQ to compute the singular values.
+*
+      IF( ICOMPQ.EQ.0 ) THEN
+         CALL DLASDQ( 'U', 0, N, 0, 0, 0, D, E, VT, LDVT, U, LDU, U,
+     $                LDU, WORK( WSTART ), INFO )
+         GO TO 40
+      END IF
+*
+*     If N is smaller than the minimum divide size SMLSIZ, then solve
+*     the problem with another solver.
+*
+      IF( N.LE.SMLSIZ ) THEN
+         IF( ICOMPQ.EQ.2 ) THEN
+            CALL DLASET( 'A', N, N, ZERO, ONE, U, LDU )
+            CALL DLASET( 'A', N, N, ZERO, ONE, VT, LDVT )
+            CALL DLASDQ( 'U', 0, N, N, N, 0, D, E, VT, LDVT, U, LDU, U,
+     $                   LDU, WORK( WSTART ), INFO )
+         ELSE IF( ICOMPQ.EQ.1 ) THEN
+            IU = 1
+            IVT = IU + N
+            CALL DLASET( 'A', N, N, ZERO, ONE, Q( IU+( QSTART-1 )*N ),
+     $                   N )
+            CALL DLASET( 'A', N, N, ZERO, ONE, Q( IVT+( QSTART-1 )*N ),
+     $                   N )
+            CALL DLASDQ( 'U', 0, N, N, N, 0, D, E,
+     $                   Q( IVT+( QSTART-1 )*N ), N,
+     $                   Q( IU+( QSTART-1 )*N ), N,
+     $                   Q( IU+( QSTART-1 )*N ), N, WORK( WSTART ),
+     $                   INFO )
+         END IF
+         GO TO 40
+      END IF
+*
+      IF( ICOMPQ.EQ.2 ) THEN
+         CALL DLASET( 'A', N, N, ZERO, ONE, U, LDU )
+         CALL DLASET( 'A', N, N, ZERO, ONE, VT, LDVT )
+      END IF
+*
+*     Scale.
+*
+      ORGNRM = DLANST( 'M', N, D, E )
+      IF( ORGNRM.EQ.ZERO )
+     $   RETURN
+      CALL DLASCL( 'G', 0, 0, ORGNRM, ONE, N, 1, D, N, IERR )
+      CALL DLASCL( 'G', 0, 0, ORGNRM, ONE, NM1, 1, E, NM1, IERR )
+*
+      EPS = DLAMCH( 'Epsilon' )
+*
+      MLVL = INT( LOG( DBLE( N ) / DBLE( SMLSIZ+1 ) ) / LOG( TWO ) ) + 1
+      SMLSZP = SMLSIZ + 1
+*
+      IF( ICOMPQ.EQ.1 ) THEN
+         IU = 1
+         IVT = 1 + SMLSIZ
+         DIFL = IVT + SMLSZP
+         DIFR = DIFL + MLVL
+         Z = DIFR + MLVL*2
+         IC = Z + MLVL
+         IS = IC + 1
+         POLES = IS + 1
+         GIVNUM = POLES + 2*MLVL
+*
+         K = 1
+         GIVPTR = 2
+         PERM = 3
+         GIVCOL = PERM + MLVL
+      END IF
+*
+      DO 20 I = 1, N
+         IF( ABS( D( I ) ).LT.EPS ) THEN
+            D( I ) = SIGN( EPS, D( I ) )
+         END IF
+   20 CONTINUE
+*
+      START = 1
+      SQRE = 0
+*
+      DO 30 I = 1, NM1
+         IF( ( ABS( E( I ) ).LT.EPS ) .OR. ( I.EQ.NM1 ) ) THEN
+*
+*        Subproblem found. First determine its size and then
+*        apply divide and conquer on it.
+*
+            IF( I.LT.NM1 ) THEN
+*
+*        A subproblem with E(I) small for I < NM1.
+*
+               NSIZE = I - START + 1
+            ELSE IF( ABS( E( I ) ).GE.EPS ) THEN
+*
+*        A subproblem with E(NM1) not too small but I = NM1.
+*
+               NSIZE = N - START + 1
+            ELSE
+*
+*        A subproblem with E(NM1) small. This implies an
+*        1-by-1 subproblem at D(N). Solve this 1-by-1 problem
+*        first.
+*
+               NSIZE = I - START + 1
+               IF( ICOMPQ.EQ.2 ) THEN
+                  U( N, N ) = SIGN( ONE, D( N ) )
+                  VT( N, N ) = ONE
+               ELSE IF( ICOMPQ.EQ.1 ) THEN
+                  Q( N+( QSTART-1 )*N ) = SIGN( ONE, D( N ) )
+                  Q( N+( SMLSIZ+QSTART-1 )*N ) = ONE
+               END IF
+               D( N ) = ABS( D( N ) )
+            END IF
+            IF( ICOMPQ.EQ.2 ) THEN
+               CALL DLASD0( NSIZE, SQRE, D( START ), E( START ),
+     $                      U( START, START ), LDU, VT( START, START ),
+     $                      LDVT, SMLSIZ, IWORK, WORK( WSTART ), INFO )
+            ELSE
+               CALL DLASDA( ICOMPQ, SMLSIZ, NSIZE, SQRE, D( START ),
+     $                      E( START ), Q( START+( IU+QSTART-2 )*N ), N,
+     $                      Q( START+( IVT+QSTART-2 )*N ),
+     $                      IQ( START+K*N ), Q( START+( DIFL+QSTART-2 )*
+     $                      N ), Q( START+( DIFR+QSTART-2 )*N ),
+     $                      Q( START+( Z+QSTART-2 )*N ),
+     $                      Q( START+( POLES+QSTART-2 )*N ),
+     $                      IQ( START+GIVPTR*N ), IQ( START+GIVCOL*N ),
+     $                      N, IQ( START+PERM*N ),
+     $                      Q( START+( GIVNUM+QSTART-2 )*N ),
+     $                      Q( START+( IC+QSTART-2 )*N ),
+     $                      Q( START+( IS+QSTART-2 )*N ),
+     $                      WORK( WSTART ), IWORK, INFO )
+               IF( INFO.NE.0 ) THEN
+                  RETURN
+               END IF
+            END IF
+            START = I + 1
+         END IF
+   30 CONTINUE
+*
+*     Unscale
+*
+      CALL DLASCL( 'G', 0, 0, ONE, ORGNRM, N, 1, D, N, IERR )
+   40 CONTINUE
+*
+*     Use Selection Sort to minimize swaps of singular vectors
+*
+      DO 60 II = 2, N
+         I = II - 1
+         KK = I
+         P = D( I )
+         DO 50 J = II, N
+            IF( D( J ).GT.P ) THEN
+               KK = J
+               P = D( J )
+            END IF
+   50    CONTINUE
+         IF( KK.NE.I ) THEN
+            D( KK ) = D( I )
+            D( I ) = P
+            IF( ICOMPQ.EQ.1 ) THEN
+               IQ( I ) = KK
+            ELSE IF( ICOMPQ.EQ.2 ) THEN
+               CALL DSWAP( N, U( 1, I ), 1, U( 1, KK ), 1 )
+               CALL DSWAP( N, VT( I, 1 ), LDVT, VT( KK, 1 ), LDVT )
+            END IF
+         ELSE IF( ICOMPQ.EQ.1 ) THEN
+            IQ( I ) = I
+         END IF
+   60 CONTINUE
+*
+*     If ICOMPQ = 1, use IQ(N,1) as the indicator for UPLO
+*
+      IF( ICOMPQ.EQ.1 ) THEN
+         IF( IUPLO.EQ.1 ) THEN
+            IQ( N ) = 1
+         ELSE
+            IQ( N ) = 0
+         END IF
+      END IF
+*
+*     If B is lower bidiagonal, update U by those Givens rotations
+*     which rotated B to be upper bidiagonal
+*
+      IF( ( IUPLO.EQ.2 ) .AND. ( ICOMPQ.EQ.2 ) )
+     $   CALL DLASR( 'L', 'V', 'B', N, N, WORK( 1 ), WORK( N ), U, LDU )
+*
+      RETURN
+*
+*     End of DBDSDC
+*
+      END
diff --git a/costa/native/external/lapack/dbdsqr.f b/costa/native/external/lapack/dbdsqr.f
new file mode 100644
index 000000000..678a24408
--- /dev/null
+++ b/costa/native/external/lapack/dbdsqr.f
@@ -0,0 +1,733 @@
+      SUBROUTINE DBDSQR( UPLO, N, NCVT, NRU, NCC, D, E, VT, LDVT, U,
+     $                   LDU, C, LDC, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDC, LDU, LDVT, N, NCC, NCVT, NRU
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   C( LDC, * ), D( * ), E( * ), U( LDU, * ),
+     $                   VT( LDVT, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DBDSQR computes the singular value decomposition (SVD) of a real
+*  N-by-N (upper or lower) bidiagonal matrix B:  B = Q * S * P' (P'
+*  denotes the transpose of P), where S is a diagonal matrix with
+*  non-negative diagonal elements (the singular values of B), and Q
+*  and P are orthogonal matrices.
+*
+*  The routine computes S, and optionally computes U * Q, P' * VT,
+*  or Q' * C, for given real input matrices U, VT, and C.
+*
+*  See "Computing  Small Singular Values of Bidiagonal Matrices With
+*  Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan,
+*  LAPACK Working Note #3 (or SIAM J. Sci. Statist. Comput. vol. 11,
+*  no. 5, pp. 873-912, Sept 1990) and
+*  "Accurate singular values and differential qd algorithms," by
+*  B. Parlett and V. Fernando, Technical Report CPAM-554, Mathematics
+*  Department, University of California at Berkeley, July 1992
+*  for a detailed description of the algorithm.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  B is upper bidiagonal;
+*          = 'L':  B is lower bidiagonal.
+*
+*  N       (input) INTEGER
+*          The order of the matrix B.  N >= 0.
+*
+*  NCVT    (input) INTEGER
+*          The number of columns of the matrix VT. NCVT >= 0.
+*
+*  NRU     (input) INTEGER
+*          The number of rows of the matrix U. NRU >= 0.
+*
+*  NCC     (input) INTEGER
+*          The number of columns of the matrix C. NCC >= 0.
+*
+*  D       (input/output) DOUBLE PRECISION array, dimension (N)
+*          On entry, the n diagonal elements of the bidiagonal matrix B.
+*          On exit, if INFO=0, the singular values of B in decreasing
+*          order.
+*
+*  E       (input/output) DOUBLE PRECISION array, dimension (N)
+*          On entry, the elements of E contain the
+*          offdiagonal elements of the bidiagonal matrix whose SVD
+*          is desired. On normal exit (INFO = 0), E is destroyed.
+*          If the algorithm does not converge (INFO > 0), D and E
+*          will contain the diagonal and superdiagonal elements of a
+*          bidiagonal matrix orthogonally equivalent to the one given
+*          as input. E(N) is used for workspace.
+*
+*  VT      (input/output) DOUBLE PRECISION array, dimension (LDVT, NCVT)
+*          On entry, an N-by-NCVT matrix VT.
+*          On exit, VT is overwritten by P' * VT.
+*          VT is not referenced if NCVT = 0.
+*
+*  LDVT    (input) INTEGER
+*          The leading dimension of the array VT.
+*          LDVT >= max(1,N) if NCVT > 0; LDVT >= 1 if NCVT = 0.
+*
+*  U       (input/output) DOUBLE PRECISION array, dimension (LDU, N)
+*          On entry, an NRU-by-N matrix U.
+*          On exit, U is overwritten by U * Q.
+*          U is not referenced if NRU = 0.
+*
+*  LDU     (input) INTEGER
+*          The leading dimension of the array U.  LDU >= max(1,NRU).
+*
+*  C       (input/output) DOUBLE PRECISION array, dimension (LDC, NCC)
+*          On entry, an N-by-NCC matrix C.
+*          On exit, C is overwritten by Q' * C.
+*          C is not referenced if NCC = 0.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C.
+*          LDC >= max(1,N) if NCC > 0; LDC >=1 if NCC = 0.
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (4*N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  If INFO = -i, the i-th argument had an illegal value
+*          > 0:  the algorithm did not converge; D and E contain the
+*                elements of a bidiagonal matrix which is orthogonally
+*                similar to the input matrix B;  if INFO = i, i
+*                elements of E have not converged to zero.
+*
+*  Internal Parameters
+*  ===================
+*
+*  TOLMUL  DOUBLE PRECISION, default = max(10,min(100,EPS**(-1/8)))
+*          TOLMUL controls the convergence criterion of the QR loop.
+*          If it is positive, TOLMUL*EPS is the desired relative
+*             precision in the computed singular values.
+*          If it is negative, abs(TOLMUL*EPS*sigma_max) is the
+*             desired absolute accuracy in the computed singular
+*             values (corresponds to relative accuracy
+*             abs(TOLMUL*EPS) in the largest singular value.
+*          abs(TOLMUL) should be between 1 and 1/EPS, and preferably
+*             between 10 (for fast convergence) and .1/EPS
+*             (for there to be some accuracy in the results).
+*          Default is to lose at either one eighth or 2 of the
+*             available decimal digits in each computed singular value
+*             (whichever is smaller).
+*
+*  MAXITR  INTEGER, default = 6
+*          MAXITR controls the maximum number of passes of the
+*          algorithm through its inner loop. The algorithms stops
+*          (and so fails to converge) if the number of passes
+*          through the inner loop exceeds MAXITR*N**2.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO
+      PARAMETER          ( ZERO = 0.0D0 )
+      DOUBLE PRECISION   ONE
+      PARAMETER          ( ONE = 1.0D0 )
+      DOUBLE PRECISION   NEGONE
+      PARAMETER          ( NEGONE = -1.0D0 )
+      DOUBLE PRECISION   HNDRTH
+      PARAMETER          ( HNDRTH = 0.01D0 )
+      DOUBLE PRECISION   TEN
+      PARAMETER          ( TEN = 10.0D0 )
+      DOUBLE PRECISION   HNDRD
+      PARAMETER          ( HNDRD = 100.0D0 )
+      DOUBLE PRECISION   MEIGTH
+      PARAMETER          ( MEIGTH = -0.125D0 )
+      INTEGER            MAXITR
+      PARAMETER          ( MAXITR = 6 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LOWER, ROTATE
+      INTEGER            I, IDIR, ISUB, ITER, J, LL, LLL, M, MAXIT, NM1,
+     $                   NM12, NM13, OLDLL, OLDM
+      DOUBLE PRECISION   ABSE, ABSS, COSL, COSR, CS, EPS, F, G, H, MU,
+     $                   OLDCS, OLDSN, R, SHIFT, SIGMN, SIGMX, SINL,
+     $                   SINR, SLL, SMAX, SMIN, SMINL, SMINLO, SMINOA,
+     $                   SN, THRESH, TOL, TOLMUL, UNFL
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DLAMCH
+      EXTERNAL           LSAME, DLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLARTG, DLAS2, DLASQ1, DLASR, DLASV2, DROT,
+     $                   DSCAL, DSWAP, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, MAX, MIN, SIGN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      LOWER = LSAME( UPLO, 'L' )
+      IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LOWER ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NCVT.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( NRU.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( NCC.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( ( NCVT.EQ.0 .AND. LDVT.LT.1 ) .OR.
+     $         ( NCVT.GT.0 .AND. LDVT.LT.MAX( 1, N ) ) ) THEN
+         INFO = -9
+      ELSE IF( LDU.LT.MAX( 1, NRU ) ) THEN
+         INFO = -11
+      ELSE IF( ( NCC.EQ.0 .AND. LDC.LT.1 ) .OR.
+     $         ( NCC.GT.0 .AND. LDC.LT.MAX( 1, N ) ) ) THEN
+         INFO = -13
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DBDSQR', -INFO )
+         RETURN
+      END IF
+      IF( N.EQ.0 )
+     $   RETURN
+      IF( N.EQ.1 )
+     $   GO TO 160
+*
+*     ROTATE is true if any singular vectors desired, false otherwise
+*
+      ROTATE = ( NCVT.GT.0 ) .OR. ( NRU.GT.0 ) .OR. ( NCC.GT.0 )
+*
+*     If no singular vectors desired, use qd algorithm
+*
+      IF( .NOT.ROTATE ) THEN
+         CALL DLASQ1( N, D, E, WORK, INFO )
+         RETURN
+      END IF
+*
+      NM1 = N - 1
+      NM12 = NM1 + NM1
+      NM13 = NM12 + NM1
+      IDIR = 0
+*
+*     Get machine constants
+*
+      EPS = DLAMCH( 'Epsilon' )
+      UNFL = DLAMCH( 'Safe minimum' )
+*
+*     If matrix lower bidiagonal, rotate to be upper bidiagonal
+*     by applying Givens rotations on the left
+*
+      IF( LOWER ) THEN
+         DO 10 I = 1, N - 1
+            CALL DLARTG( D( I ), E( I ), CS, SN, R )
+            D( I ) = R
+            E( I ) = SN*D( I+1 )
+            D( I+1 ) = CS*D( I+1 )
+            WORK( I ) = CS
+            WORK( NM1+I ) = SN
+   10    CONTINUE
+*
+*        Update singular vectors if desired
+*
+         IF( NRU.GT.0 )
+     $      CALL DLASR( 'R', 'V', 'F', NRU, N, WORK( 1 ), WORK( N ), U,
+     $                  LDU )
+         IF( NCC.GT.0 )
+     $      CALL DLASR( 'L', 'V', 'F', N, NCC, WORK( 1 ), WORK( N ), C,
+     $                  LDC )
+      END IF
+*
+*     Compute singular values to relative accuracy TOL
+*     (By setting TOL to be negative, algorithm will compute
+*     singular values to absolute accuracy ABS(TOL)*norm(input matrix))
+*
+      TOLMUL = MAX( TEN, MIN( HNDRD, EPS**MEIGTH ) )
+      TOL = TOLMUL*EPS
+*
+*     Compute approximate maximum, minimum singular values
+*
+      SMAX = ZERO
+      DO 20 I = 1, N
+         SMAX = MAX( SMAX, ABS( D( I ) ) )
+   20 CONTINUE
+      DO 30 I = 1, N - 1
+         SMAX = MAX( SMAX, ABS( E( I ) ) )
+   30 CONTINUE
+      SMINL = ZERO
+      IF( TOL.GE.ZERO ) THEN
+*
+*        Relative accuracy desired
+*
+         SMINOA = ABS( D( 1 ) )
+         IF( SMINOA.EQ.ZERO )
+     $      GO TO 50
+         MU = SMINOA
+         DO 40 I = 2, N
+            MU = ABS( D( I ) )*( MU / ( MU+ABS( E( I-1 ) ) ) )
+            SMINOA = MIN( SMINOA, MU )
+            IF( SMINOA.EQ.ZERO )
+     $         GO TO 50
+   40    CONTINUE
+   50    CONTINUE
+         SMINOA = SMINOA / SQRT( DBLE( N ) )
+         THRESH = MAX( TOL*SMINOA, MAXITR*N*N*UNFL )
+      ELSE
+*
+*        Absolute accuracy desired
+*
+         THRESH = MAX( ABS( TOL )*SMAX, MAXITR*N*N*UNFL )
+      END IF
+*
+*     Prepare for main iteration loop for the singular values
+*     (MAXIT is the maximum number of passes through the inner
+*     loop permitted before nonconvergence signalled.)
+*
+      MAXIT = MAXITR*N*N
+      ITER = 0
+      OLDLL = -1
+      OLDM = -1
+*
+*     M points to last element of unconverged part of matrix
+*
+      M = N
+*
+*     Begin main iteration loop
+*
+   60 CONTINUE
+*
+*     Check for convergence or exceeding iteration count
+*
+      IF( M.LE.1 )
+     $   GO TO 160
+      IF( ITER.GT.MAXIT )
+     $   GO TO 200
+*
+*     Find diagonal block of matrix to work on
+*
+      IF( TOL.LT.ZERO .AND. ABS( D( M ) ).LE.THRESH )
+     $   D( M ) = ZERO
+      SMAX = ABS( D( M ) )
+      SMIN = SMAX
+      DO 70 LLL = 1, M - 1
+         LL = M - LLL
+         ABSS = ABS( D( LL ) )
+         ABSE = ABS( E( LL ) )
+         IF( TOL.LT.ZERO .AND. ABSS.LE.THRESH )
+     $      D( LL ) = ZERO
+         IF( ABSE.LE.THRESH )
+     $      GO TO 80
+         SMIN = MIN( SMIN, ABSS )
+         SMAX = MAX( SMAX, ABSS, ABSE )
+   70 CONTINUE
+      LL = 0
+      GO TO 90
+   80 CONTINUE
+      E( LL ) = ZERO
+*
+*     Matrix splits since E(LL) = 0
+*
+      IF( LL.EQ.M-1 ) THEN
+*
+*        Convergence of bottom singular value, return to top of loop
+*
+         M = M - 1
+         GO TO 60
+      END IF
+   90 CONTINUE
+      LL = LL + 1
+*
+*     E(LL) through E(M-1) are nonzero, E(LL-1) is zero
+*
+      IF( LL.EQ.M-1 ) THEN
+*
+*        2 by 2 block, handle separately
+*
+         CALL DLASV2( D( M-1 ), E( M-1 ), D( M ), SIGMN, SIGMX, SINR,
+     $                COSR, SINL, COSL )
+         D( M-1 ) = SIGMX
+         E( M-1 ) = ZERO
+         D( M ) = SIGMN
+*
+*        Compute singular vectors, if desired
+*
+         IF( NCVT.GT.0 )
+     $      CALL DROT( NCVT, VT( M-1, 1 ), LDVT, VT( M, 1 ), LDVT, COSR,
+     $                 SINR )
+         IF( NRU.GT.0 )
+     $      CALL DROT( NRU, U( 1, M-1 ), 1, U( 1, M ), 1, COSL, SINL )
+         IF( NCC.GT.0 )
+     $      CALL DROT( NCC, C( M-1, 1 ), LDC, C( M, 1 ), LDC, COSL,
+     $                 SINL )
+         M = M - 2
+         GO TO 60
+      END IF
+*
+*     If working on new submatrix, choose shift direction
+*     (from larger end diagonal element towards smaller)
+*
+      IF( LL.GT.OLDM .OR. M.LT.OLDLL ) THEN
+         IF( ABS( D( LL ) ).GE.ABS( D( M ) ) ) THEN
+*
+*           Chase bulge from top (big end) to bottom (small end)
+*
+            IDIR = 1
+         ELSE
+*
+*           Chase bulge from bottom (big end) to top (small end)
+*
+            IDIR = 2
+         END IF
+      END IF
+*
+*     Apply convergence tests
+*
+      IF( IDIR.EQ.1 ) THEN
+*
+*        Run convergence test in forward direction
+*        First apply standard test to bottom of matrix
+*
+         IF( ABS( E( M-1 ) ).LE.ABS( TOL )*ABS( D( M ) ) .OR.
+     $       ( TOL.LT.ZERO .AND. ABS( E( M-1 ) ).LE.THRESH ) ) THEN
+            E( M-1 ) = ZERO
+            GO TO 60
+         END IF
+*
+         IF( TOL.GE.ZERO ) THEN
+*
+*           If relative accuracy desired,
+*           apply convergence criterion forward
+*
+            MU = ABS( D( LL ) )
+            SMINL = MU
+            DO 100 LLL = LL, M - 1
+               IF( ABS( E( LLL ) ).LE.TOL*MU ) THEN
+                  E( LLL ) = ZERO
+                  GO TO 60
+               END IF
+               SMINLO = SMINL
+               MU = ABS( D( LLL+1 ) )*( MU / ( MU+ABS( E( LLL ) ) ) )
+               SMINL = MIN( SMINL, MU )
+  100       CONTINUE
+         END IF
+*
+      ELSE
+*
+*        Run convergence test in backward direction
+*        First apply standard test to top of matrix
+*
+         IF( ABS( E( LL ) ).LE.ABS( TOL )*ABS( D( LL ) ) .OR.
+     $       ( TOL.LT.ZERO .AND. ABS( E( LL ) ).LE.THRESH ) ) THEN
+            E( LL ) = ZERO
+            GO TO 60
+         END IF
+*
+         IF( TOL.GE.ZERO ) THEN
+*
+*           If relative accuracy desired,
+*           apply convergence criterion backward
+*
+            MU = ABS( D( M ) )
+            SMINL = MU
+            DO 110 LLL = M - 1, LL, -1
+               IF( ABS( E( LLL ) ).LE.TOL*MU ) THEN
+                  E( LLL ) = ZERO
+                  GO TO 60
+               END IF
+               SMINLO = SMINL
+               MU = ABS( D( LLL ) )*( MU / ( MU+ABS( E( LLL ) ) ) )
+               SMINL = MIN( SMINL, MU )
+  110       CONTINUE
+         END IF
+      END IF
+      OLDLL = LL
+      OLDM = M
+*
+*     Compute shift.  First, test if shifting would ruin relative
+*     accuracy, and if so set the shift to zero.
+*
+      IF( TOL.GE.ZERO .AND. N*TOL*( SMINL / SMAX ).LE.
+     $    MAX( EPS, HNDRTH*TOL ) ) THEN
+*
+*        Use a zero shift to avoid loss of relative accuracy
+*
+         SHIFT = ZERO
+      ELSE
+*
+*        Compute the shift from 2-by-2 block at end of matrix
+*
+         IF( IDIR.EQ.1 ) THEN
+            SLL = ABS( D( LL ) )
+            CALL DLAS2( D( M-1 ), E( M-1 ), D( M ), SHIFT, R )
+         ELSE
+            SLL = ABS( D( M ) )
+            CALL DLAS2( D( LL ), E( LL ), D( LL+1 ), SHIFT, R )
+         END IF
+*
+*        Test if shift negligible, and if so set to zero
+*
+         IF( SLL.GT.ZERO ) THEN
+            IF( ( SHIFT / SLL )**2.LT.EPS )
+     $         SHIFT = ZERO
+         END IF
+      END IF
+*
+*     Increment iteration count
+*
+      ITER = ITER + M - LL
+*
+*     If SHIFT = 0, do simplified QR iteration
+*
+      IF( SHIFT.EQ.ZERO ) THEN
+         IF( IDIR.EQ.1 ) THEN
+*
+*           Chase bulge from top to bottom
+*           Save cosines and sines for later singular vector updates
+*
+            CS = ONE
+            OLDCS = ONE
+            DO 120 I = LL, M - 1
+               CALL DLARTG( D( I )*CS, E( I ), CS, SN, R )
+               IF( I.GT.LL )
+     $            E( I-1 ) = OLDSN*R
+               CALL DLARTG( OLDCS*R, D( I+1 )*SN, OLDCS, OLDSN, D( I ) )
+               WORK( I-LL+1 ) = CS
+               WORK( I-LL+1+NM1 ) = SN
+               WORK( I-LL+1+NM12 ) = OLDCS
+               WORK( I-LL+1+NM13 ) = OLDSN
+  120       CONTINUE
+            H = D( M )*CS
+            D( M ) = H*OLDCS
+            E( M-1 ) = H*OLDSN
+*
+*           Update singular vectors
+*
+            IF( NCVT.GT.0 )
+     $         CALL DLASR( 'L', 'V', 'F', M-LL+1, NCVT, WORK( 1 ),
+     $                     WORK( N ), VT( LL, 1 ), LDVT )
+            IF( NRU.GT.0 )
+     $         CALL DLASR( 'R', 'V', 'F', NRU, M-LL+1, WORK( NM12+1 ),
+     $                     WORK( NM13+1 ), U( 1, LL ), LDU )
+            IF( NCC.GT.0 )
+     $         CALL DLASR( 'L', 'V', 'F', M-LL+1, NCC, WORK( NM12+1 ),
+     $                     WORK( NM13+1 ), C( LL, 1 ), LDC )
+*
+*           Test convergence
+*
+            IF( ABS( E( M-1 ) ).LE.THRESH )
+     $         E( M-1 ) = ZERO
+*
+         ELSE
+*
+*           Chase bulge from bottom to top
+*           Save cosines and sines for later singular vector updates
+*
+            CS = ONE
+            OLDCS = ONE
+            DO 130 I = M, LL + 1, -1
+               CALL DLARTG( D( I )*CS, E( I-1 ), CS, SN, R )
+               IF( I.LT.M )
+     $            E( I ) = OLDSN*R
+               CALL DLARTG( OLDCS*R, D( I-1 )*SN, OLDCS, OLDSN, D( I ) )
+               WORK( I-LL ) = CS
+               WORK( I-LL+NM1 ) = -SN
+               WORK( I-LL+NM12 ) = OLDCS
+               WORK( I-LL+NM13 ) = -OLDSN
+  130       CONTINUE
+            H = D( LL )*CS
+            D( LL ) = H*OLDCS
+            E( LL ) = H*OLDSN
+*
+*           Update singular vectors
+*
+            IF( NCVT.GT.0 )
+     $         CALL DLASR( 'L', 'V', 'B', M-LL+1, NCVT, WORK( NM12+1 ),
+     $                     WORK( NM13+1 ), VT( LL, 1 ), LDVT )
+            IF( NRU.GT.0 )
+     $         CALL DLASR( 'R', 'V', 'B', NRU, M-LL+1, WORK( 1 ),
+     $                     WORK( N ), U( 1, LL ), LDU )
+            IF( NCC.GT.0 )
+     $         CALL DLASR( 'L', 'V', 'B', M-LL+1, NCC, WORK( 1 ),
+     $                     WORK( N ), C( LL, 1 ), LDC )
+*
+*           Test convergence
+*
+            IF( ABS( E( LL ) ).LE.THRESH )
+     $         E( LL ) = ZERO
+         END IF
+      ELSE
+*
+*        Use nonzero shift
+*
+         IF( IDIR.EQ.1 ) THEN
+*
+*           Chase bulge from top to bottom
+*           Save cosines and sines for later singular vector updates
+*
+            F = ( ABS( D( LL ) )-SHIFT )*
+     $          ( SIGN( ONE, D( LL ) )+SHIFT / D( LL ) )
+            G = E( LL )
+            DO 140 I = LL, M - 1
+               CALL DLARTG( F, G, COSR, SINR, R )
+               IF( I.GT.LL )
+     $            E( I-1 ) = R
+               F = COSR*D( I ) + SINR*E( I )
+               E( I ) = COSR*E( I ) - SINR*D( I )
+               G = SINR*D( I+1 )
+               D( I+1 ) = COSR*D( I+1 )
+               CALL DLARTG( F, G, COSL, SINL, R )
+               D( I ) = R
+               F = COSL*E( I ) + SINL*D( I+1 )
+               D( I+1 ) = COSL*D( I+1 ) - SINL*E( I )
+               IF( I.LT.M-1 ) THEN
+                  G = SINL*E( I+1 )
+                  E( I+1 ) = COSL*E( I+1 )
+               END IF
+               WORK( I-LL+1 ) = COSR
+               WORK( I-LL+1+NM1 ) = SINR
+               WORK( I-LL+1+NM12 ) = COSL
+               WORK( I-LL+1+NM13 ) = SINL
+  140       CONTINUE
+            E( M-1 ) = F
+*
+*           Update singular vectors
+*
+            IF( NCVT.GT.0 )
+     $         CALL DLASR( 'L', 'V', 'F', M-LL+1, NCVT, WORK( 1 ),
+     $                     WORK( N ), VT( LL, 1 ), LDVT )
+            IF( NRU.GT.0 )
+     $         CALL DLASR( 'R', 'V', 'F', NRU, M-LL+1, WORK( NM12+1 ),
+     $                     WORK( NM13+1 ), U( 1, LL ), LDU )
+            IF( NCC.GT.0 )
+     $         CALL DLASR( 'L', 'V', 'F', M-LL+1, NCC, WORK( NM12+1 ),
+     $                     WORK( NM13+1 ), C( LL, 1 ), LDC )
+*
+*           Test convergence
+*
+            IF( ABS( E( M-1 ) ).LE.THRESH )
+     $         E( M-1 ) = ZERO
+*
+         ELSE
+*
+*           Chase bulge from bottom to top
+*           Save cosines and sines for later singular vector updates
+*
+            F = ( ABS( D( M ) )-SHIFT )*( SIGN( ONE, D( M ) )+SHIFT /
+     $          D( M ) )
+            G = E( M-1 )
+            DO 150 I = M, LL + 1, -1
+               CALL DLARTG( F, G, COSR, SINR, R )
+               IF( I.LT.M )
+     $            E( I ) = R
+               F = COSR*D( I ) + SINR*E( I-1 )
+               E( I-1 ) = COSR*E( I-1 ) - SINR*D( I )
+               G = SINR*D( I-1 )
+               D( I-1 ) = COSR*D( I-1 )
+               CALL DLARTG( F, G, COSL, SINL, R )
+               D( I ) = R
+               F = COSL*E( I-1 ) + SINL*D( I-1 )
+               D( I-1 ) = COSL*D( I-1 ) - SINL*E( I-1 )
+               IF( I.GT.LL+1 ) THEN
+                  G = SINL*E( I-2 )
+                  E( I-2 ) = COSL*E( I-2 )
+               END IF
+               WORK( I-LL ) = COSR
+               WORK( I-LL+NM1 ) = -SINR
+               WORK( I-LL+NM12 ) = COSL
+               WORK( I-LL+NM13 ) = -SINL
+  150       CONTINUE
+            E( LL ) = F
+*
+*           Test convergence
+*
+            IF( ABS( E( LL ) ).LE.THRESH )
+     $         E( LL ) = ZERO
+*
+*           Update singular vectors if desired
+*
+            IF( NCVT.GT.0 )
+     $         CALL DLASR( 'L', 'V', 'B', M-LL+1, NCVT, WORK( NM12+1 ),
+     $                     WORK( NM13+1 ), VT( LL, 1 ), LDVT )
+            IF( NRU.GT.0 )
+     $         CALL DLASR( 'R', 'V', 'B', NRU, M-LL+1, WORK( 1 ),
+     $                     WORK( N ), U( 1, LL ), LDU )
+            IF( NCC.GT.0 )
+     $         CALL DLASR( 'L', 'V', 'B', M-LL+1, NCC, WORK( 1 ),
+     $                     WORK( N ), C( LL, 1 ), LDC )
+         END IF
+      END IF
+*
+*     QR iteration finished, go back and check convergence
+*
+      GO TO 60
+*
+*     All singular values converged, so make them positive
+*
+  160 CONTINUE
+      DO 170 I = 1, N
+         IF( D( I ).LT.ZERO ) THEN
+            D( I ) = -D( I )
+*
+*           Change sign of singular vectors, if desired
+*
+            IF( NCVT.GT.0 )
+     $         CALL DSCAL( NCVT, NEGONE, VT( I, 1 ), LDVT )
+         END IF
+  170 CONTINUE
+*
+*     Sort the singular values into decreasing order (insertion sort on
+*     singular values, but only one transposition per singular vector)
+*
+      DO 190 I = 1, N - 1
+*
+*        Scan for smallest D(I)
+*
+         ISUB = 1
+         SMIN = D( 1 )
+         DO 180 J = 2, N + 1 - I
+            IF( D( J ).LE.SMIN ) THEN
+               ISUB = J
+               SMIN = D( J )
+            END IF
+  180    CONTINUE
+         IF( ISUB.NE.N+1-I ) THEN
+*
+*           Swap singular values and vectors
+*
+            D( ISUB ) = D( N+1-I )
+            D( N+1-I ) = SMIN
+            IF( NCVT.GT.0 )
+     $         CALL DSWAP( NCVT, VT( ISUB, 1 ), LDVT, VT( N+1-I, 1 ),
+     $                     LDVT )
+            IF( NRU.GT.0 )
+     $         CALL DSWAP( NRU, U( 1, ISUB ), 1, U( 1, N+1-I ), 1 )
+            IF( NCC.GT.0 )
+     $         CALL DSWAP( NCC, C( ISUB, 1 ), LDC, C( N+1-I, 1 ), LDC )
+         END IF
+  190 CONTINUE
+      GO TO 220
+*
+*     Maximum number of iterations exceeded, failure to converge
+*
+  200 CONTINUE
+      INFO = 0
+      DO 210 I = 1, N - 1
+         IF( E( I ).NE.ZERO )
+     $      INFO = INFO + 1
+  210 CONTINUE
+  220 CONTINUE
+      RETURN
+*
+*     End of DBDSQR
+*
+      END
diff --git a/costa/native/external/lapack/ddisna.f b/costa/native/external/lapack/ddisna.f
new file mode 100644
index 000000000..b17771154
--- /dev/null
+++ b/costa/native/external/lapack/ddisna.f
@@ -0,0 +1,180 @@
+      SUBROUTINE DDISNA( JOB, M, N, D, SEP, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOB
+      INTEGER            INFO, M, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   D( * ), SEP( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DDISNA computes the reciprocal condition numbers for the eigenvectors
+*  of a real symmetric or complex Hermitian matrix or for the left or
+*  right singular vectors of a general m-by-n matrix. The reciprocal
+*  condition number is the 'gap' between the corresponding eigenvalue or
+*  singular value and the nearest other one.
+*
+*  The bound on the error, measured by angle in radians, in the I-th
+*  computed vector is given by
+*
+*         DLAMCH( 'E' ) * ( ANORM / SEP( I ) )
+*
+*  where ANORM = 2-norm(A) = max( abs( D(j) ) ).  SEP(I) is not allowed
+*  to be smaller than DLAMCH( 'E' )*ANORM in order to limit the size of
+*  the error bound.
+*
+*  DDISNA may also be used to compute error bounds for eigenvectors of
+*  the generalized symmetric definite eigenproblem.
+*
+*  Arguments
+*  =========
+*
+*  JOB     (input) CHARACTER*1
+*          Specifies for which problem the reciprocal condition numbers
+*          should be computed:
+*          = 'E':  the eigenvectors of a symmetric/Hermitian matrix;
+*          = 'L':  the left singular vectors of a general matrix;
+*          = 'R':  the right singular vectors of a general matrix.
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix. M >= 0.
+*
+*  N       (input) INTEGER
+*          If JOB = 'L' or 'R', the number of columns of the matrix,
+*          in which case N >= 0. Ignored if JOB = 'E'.
+*
+*  D       (input) DOUBLE PRECISION array, dimension (M) if JOB = 'E'
+*                              dimension (min(M,N)) if JOB = 'L' or 'R'
+*          The eigenvalues (if JOB = 'E') or singular values (if JOB =
+*          'L' or 'R') of the matrix, in either increasing or decreasing
+*          order. If singular values, they must be non-negative.
+*
+*  SEP     (output) DOUBLE PRECISION array, dimension (M) if JOB = 'E'
+*                               dimension (min(M,N)) if JOB = 'L' or 'R'
+*          The reciprocal condition numbers of the vectors.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO
+      PARAMETER          ( ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            DECR, EIGEN, INCR, LEFT, RIGHT, SING
+      INTEGER            I, K
+      DOUBLE PRECISION   ANORM, EPS, NEWGAP, OLDGAP, SAFMIN, THRESH
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DLAMCH
+      EXTERNAL           LSAME, DLAMCH
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      EIGEN = LSAME( JOB, 'E' )
+      LEFT = LSAME( JOB, 'L' )
+      RIGHT = LSAME( JOB, 'R' )
+      SING = LEFT .OR. RIGHT
+      IF( EIGEN ) THEN
+         K = M
+      ELSE IF( SING ) THEN
+         K = MIN( M, N )
+      END IF
+      IF( .NOT.EIGEN .AND. .NOT.SING ) THEN
+         INFO = -1
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( K.LT.0 ) THEN
+         INFO = -3
+      ELSE
+         INCR = .TRUE.
+         DECR = .TRUE.
+         DO 10 I = 1, K - 1
+            IF( INCR )
+     $         INCR = INCR .AND. D( I ).LE.D( I+1 )
+            IF( DECR )
+     $         DECR = DECR .AND. D( I ).GE.D( I+1 )
+   10    CONTINUE
+         IF( SING .AND. K.GT.0 ) THEN
+            IF( INCR )
+     $         INCR = INCR .AND. ZERO.LE.D( 1 )
+            IF( DECR )
+     $         DECR = DECR .AND. D( K ).GE.ZERO
+         END IF
+         IF( .NOT.( INCR .OR. DECR ) )
+     $      INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DDISNA', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( K.EQ.0 )
+     $   RETURN
+*
+*     Compute reciprocal condition numbers
+*
+      IF( K.EQ.1 ) THEN
+         SEP( 1 ) = DLAMCH( 'O' )
+      ELSE
+         OLDGAP = ABS( D( 2 )-D( 1 ) )
+         SEP( 1 ) = OLDGAP
+         DO 20 I = 2, K - 1
+            NEWGAP = ABS( D( I+1 )-D( I ) )
+            SEP( I ) = MIN( OLDGAP, NEWGAP )
+            OLDGAP = NEWGAP
+   20    CONTINUE
+         SEP( K ) = OLDGAP
+      END IF
+      IF( SING ) THEN
+         IF( ( LEFT .AND. M.GT.N ) .OR. ( RIGHT .AND. M.LT.N ) ) THEN
+            IF( INCR )
+     $         SEP( 1 ) = MIN( SEP( 1 ), D( 1 ) )
+            IF( DECR )
+     $         SEP( K ) = MIN( SEP( K ), D( K ) )
+         END IF
+      END IF
+*
+*     Ensure that reciprocal condition numbers are not less than
+*     threshold, in order to limit the size of the error bound
+*
+      EPS = DLAMCH( 'E' )
+      SAFMIN = DLAMCH( 'S' )
+      ANORM = MAX( ABS( D( 1 ) ), ABS( D( K ) ) )
+      IF( ANORM.EQ.ZERO ) THEN
+         THRESH = EPS
+      ELSE
+         THRESH = MAX( EPS*ANORM, SAFMIN )
+      END IF
+      DO 30 I = 1, K
+         SEP( I ) = MAX( SEP( I ), THRESH )
+   30 CONTINUE
+*
+      RETURN
+*
+*     End of DDISNA
+*
+      END
diff --git a/costa/native/external/lapack/dgbbrd.f b/costa/native/external/lapack/dgbbrd.f
new file mode 100644
index 000000000..a04475259
--- /dev/null
+++ b/costa/native/external/lapack/dgbbrd.f
@@ -0,0 +1,444 @@
+      SUBROUTINE DGBBRD( VECT, M, N, NCC, KL, KU, AB, LDAB, D, E, Q,
+     $                   LDQ, PT, LDPT, C, LDC, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          VECT
+      INTEGER            INFO, KL, KU, LDAB, LDC, LDPT, LDQ, M, N, NCC
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   AB( LDAB, * ), C( LDC, * ), D( * ), E( * ),
+     $                   PT( LDPT, * ), Q( LDQ, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DGBBRD reduces a real general m-by-n band matrix A to upper
+*  bidiagonal form B by an orthogonal transformation: Q' * A * P = B.
+*
+*  The routine computes B, and optionally forms Q or P', or computes
+*  Q'*C for a given matrix C.
+*
+*  Arguments
+*  =========
+*
+*  VECT    (input) CHARACTER*1
+*          Specifies whether or not the matrices Q and P' are to be
+*          formed.
+*          = 'N': do not form Q or P';
+*          = 'Q': form Q only;
+*          = 'P': form P' only;
+*          = 'B': form both.
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  NCC     (input) INTEGER
+*          The number of columns of the matrix C.  NCC >= 0.
+*
+*  KL      (input) INTEGER
+*          The number of subdiagonals of the matrix A. KL >= 0.
+*
+*  KU      (input) INTEGER
+*          The number of superdiagonals of the matrix A. KU >= 0.
+*
+*  AB      (input/output) DOUBLE PRECISION array, dimension (LDAB,N)
+*          On entry, the m-by-n band matrix A, stored in rows 1 to
+*          KL+KU+1. The j-th column of A is stored in the j-th column of
+*          the array AB as follows:
+*          AB(ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl).
+*          On exit, A is overwritten by values generated during the
+*          reduction.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array A. LDAB >= KL+KU+1.
+*
+*  D       (output) DOUBLE PRECISION array, dimension (min(M,N))
+*          The diagonal elements of the bidiagonal matrix B.
+*
+*  E       (output) DOUBLE PRECISION array, dimension (min(M,N)-1)
+*          The superdiagonal elements of the bidiagonal matrix B.
+*
+*  Q       (output) DOUBLE PRECISION array, dimension (LDQ,M)
+*          If VECT = 'Q' or 'B', the m-by-m orthogonal matrix Q.
+*          If VECT = 'N' or 'P', the array Q is not referenced.
+*
+*  LDQ     (input) INTEGER
+*          The leading dimension of the array Q.
+*          LDQ >= max(1,M) if VECT = 'Q' or 'B'; LDQ >= 1 otherwise.
+*
+*  PT      (output) DOUBLE PRECISION array, dimension (LDPT,N)
+*          If VECT = 'P' or 'B', the n-by-n orthogonal matrix P'.
+*          If VECT = 'N' or 'Q', the array PT is not referenced.
+*
+*  LDPT    (input) INTEGER
+*          The leading dimension of the array PT.
+*          LDPT >= max(1,N) if VECT = 'P' or 'B'; LDPT >= 1 otherwise.
+*
+*  C       (input/output) DOUBLE PRECISION array, dimension (LDC,NCC)
+*          On entry, an m-by-ncc matrix C.
+*          On exit, C is overwritten by Q'*C.
+*          C is not referenced if NCC = 0.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C.
+*          LDC >= max(1,M) if NCC > 0; LDC >= 1 if NCC = 0.
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (2*max(M,N))
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            WANTB, WANTC, WANTPT, WANTQ
+      INTEGER            I, INCA, J, J1, J2, KB, KB1, KK, KLM, KLU1,
+     $                   KUN, L, MINMN, ML, ML0, MN, MU, MU0, NR, NRT
+      DOUBLE PRECISION   RA, RB, RC, RS
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLARGV, DLARTG, DLARTV, DLASET, DROT, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters
+*
+      WANTB = LSAME( VECT, 'B' )
+      WANTQ = LSAME( VECT, 'Q' ) .OR. WANTB
+      WANTPT = LSAME( VECT, 'P' ) .OR. WANTB
+      WANTC = NCC.GT.0
+      KLU1 = KL + KU + 1
+      INFO = 0
+      IF( .NOT.WANTQ .AND. .NOT.WANTPT .AND. .NOT.LSAME( VECT, 'N' ) )
+     $     THEN
+         INFO = -1
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( NCC.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( KL.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( KU.LT.0 ) THEN
+         INFO = -6
+      ELSE IF( LDAB.LT.KLU1 ) THEN
+         INFO = -8
+      ELSE IF( LDQ.LT.1 .OR. WANTQ .AND. LDQ.LT.MAX( 1, M ) ) THEN
+         INFO = -12
+      ELSE IF( LDPT.LT.1 .OR. WANTPT .AND. LDPT.LT.MAX( 1, N ) ) THEN
+         INFO = -14
+      ELSE IF( LDC.LT.1 .OR. WANTC .AND. LDC.LT.MAX( 1, M ) ) THEN
+         INFO = -16
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DGBBRD', -INFO )
+         RETURN
+      END IF
+*
+*     Initialize Q and P' to the unit matrix, if needed
+*
+      IF( WANTQ )
+     $   CALL DLASET( 'Full', M, M, ZERO, ONE, Q, LDQ )
+      IF( WANTPT )
+     $   CALL DLASET( 'Full', N, N, ZERO, ONE, PT, LDPT )
+*
+*     Quick return if possible.
+*
+      IF( M.EQ.0 .OR. N.EQ.0 )
+     $   RETURN
+*
+      MINMN = MIN( M, N )
+*
+      IF( KL+KU.GT.1 ) THEN
+*
+*        Reduce to upper bidiagonal form if KU > 0; if KU = 0, reduce
+*        first to lower bidiagonal form and then transform to upper
+*        bidiagonal
+*
+         IF( KU.GT.0 ) THEN
+            ML0 = 1
+            MU0 = 2
+         ELSE
+            ML0 = 2
+            MU0 = 1
+         END IF
+*
+*        Wherever possible, plane rotations are generated and applied in
+*        vector operations of length NR over the index set J1:J2:KLU1.
+*
+*        The sines of the plane rotations are stored in WORK(1:max(m,n))
+*        and the cosines in WORK(max(m,n)+1:2*max(m,n)).
+*
+         MN = MAX( M, N )
+         KLM = MIN( M-1, KL )
+         KUN = MIN( N-1, KU )
+         KB = KLM + KUN
+         KB1 = KB + 1
+         INCA = KB1*LDAB
+         NR = 0
+         J1 = KLM + 2
+         J2 = 1 - KUN
+*
+         DO 90 I = 1, MINMN
+*
+*           Reduce i-th column and i-th row of matrix to bidiagonal form
+*
+            ML = KLM + 1
+            MU = KUN + 1
+            DO 80 KK = 1, KB
+               J1 = J1 + KB
+               J2 = J2 + KB
+*
+*              generate plane rotations to annihilate nonzero elements
+*              which have been created below the band
+*
+               IF( NR.GT.0 )
+     $            CALL DLARGV( NR, AB( KLU1, J1-KLM-1 ), INCA,
+     $                         WORK( J1 ), KB1, WORK( MN+J1 ), KB1 )
+*
+*              apply plane rotations from the left
+*
+               DO 10 L = 1, KB
+                  IF( J2-KLM+L-1.GT.N ) THEN
+                     NRT = NR - 1
+                  ELSE
+                     NRT = NR
+                  END IF
+                  IF( NRT.GT.0 )
+     $               CALL DLARTV( NRT, AB( KLU1-L, J1-KLM+L-1 ), INCA,
+     $                            AB( KLU1-L+1, J1-KLM+L-1 ), INCA,
+     $                            WORK( MN+J1 ), WORK( J1 ), KB1 )
+   10          CONTINUE
+*
+               IF( ML.GT.ML0 ) THEN
+                  IF( ML.LE.M-I+1 ) THEN
+*
+*                    generate plane rotation to annihilate a(i+ml-1,i)
+*                    within the band, and apply rotation from the left
+*
+                     CALL DLARTG( AB( KU+ML-1, I ), AB( KU+ML, I ),
+     $                            WORK( MN+I+ML-1 ), WORK( I+ML-1 ),
+     $                            RA )
+                     AB( KU+ML-1, I ) = RA
+                     IF( I.LT.N )
+     $                  CALL DROT( MIN( KU+ML-2, N-I ),
+     $                             AB( KU+ML-2, I+1 ), LDAB-1,
+     $                             AB( KU+ML-1, I+1 ), LDAB-1,
+     $                             WORK( MN+I+ML-1 ), WORK( I+ML-1 ) )
+                  END IF
+                  NR = NR + 1
+                  J1 = J1 - KB1
+               END IF
+*
+               IF( WANTQ ) THEN
+*
+*                 accumulate product of plane rotations in Q
+*
+                  DO 20 J = J1, J2, KB1
+                     CALL DROT( M, Q( 1, J-1 ), 1, Q( 1, J ), 1,
+     $                          WORK( MN+J ), WORK( J ) )
+   20             CONTINUE
+               END IF
+*
+               IF( WANTC ) THEN
+*
+*                 apply plane rotations to C
+*
+                  DO 30 J = J1, J2, KB1
+                     CALL DROT( NCC, C( J-1, 1 ), LDC, C( J, 1 ), LDC,
+     $                          WORK( MN+J ), WORK( J ) )
+   30             CONTINUE
+               END IF
+*
+               IF( J2+KUN.GT.N ) THEN
+*
+*                 adjust J2 to keep within the bounds of the matrix
+*
+                  NR = NR - 1
+                  J2 = J2 - KB1
+               END IF
+*
+               DO 40 J = J1, J2, KB1
+*
+*                 create nonzero element a(j-1,j+ku) above the band
+*                 and store it in WORK(n+1:2*n)
+*
+                  WORK( J+KUN ) = WORK( J )*AB( 1, J+KUN )
+                  AB( 1, J+KUN ) = WORK( MN+J )*AB( 1, J+KUN )
+   40          CONTINUE
+*
+*              generate plane rotations to annihilate nonzero elements
+*              which have been generated above the band
+*
+               IF( NR.GT.0 )
+     $            CALL DLARGV( NR, AB( 1, J1+KUN-1 ), INCA,
+     $                         WORK( J1+KUN ), KB1, WORK( MN+J1+KUN ),
+     $                         KB1 )
+*
+*              apply plane rotations from the right
+*
+               DO 50 L = 1, KB
+                  IF( J2+L-1.GT.M ) THEN
+                     NRT = NR - 1
+                  ELSE
+                     NRT = NR
+                  END IF
+                  IF( NRT.GT.0 )
+     $               CALL DLARTV( NRT, AB( L+1, J1+KUN-1 ), INCA,
+     $                            AB( L, J1+KUN ), INCA,
+     $                            WORK( MN+J1+KUN ), WORK( J1+KUN ),
+     $                            KB1 )
+   50          CONTINUE
+*
+               IF( ML.EQ.ML0 .AND. MU.GT.MU0 ) THEN
+                  IF( MU.LE.N-I+1 ) THEN
+*
+*                    generate plane rotation to annihilate a(i,i+mu-1)
+*                    within the band, and apply rotation from the right
+*
+                     CALL DLARTG( AB( KU-MU+3, I+MU-2 ),
+     $                            AB( KU-MU+2, I+MU-1 ),
+     $                            WORK( MN+I+MU-1 ), WORK( I+MU-1 ),
+     $                            RA )
+                     AB( KU-MU+3, I+MU-2 ) = RA
+                     CALL DROT( MIN( KL+MU-2, M-I ),
+     $                          AB( KU-MU+4, I+MU-2 ), 1,
+     $                          AB( KU-MU+3, I+MU-1 ), 1,
+     $                          WORK( MN+I+MU-1 ), WORK( I+MU-1 ) )
+                  END IF
+                  NR = NR + 1
+                  J1 = J1 - KB1
+               END IF
+*
+               IF( WANTPT ) THEN
+*
+*                 accumulate product of plane rotations in P'
+*
+                  DO 60 J = J1, J2, KB1
+                     CALL DROT( N, PT( J+KUN-1, 1 ), LDPT,
+     $                          PT( J+KUN, 1 ), LDPT, WORK( MN+J+KUN ),
+     $                          WORK( J+KUN ) )
+   60             CONTINUE
+               END IF
+*
+               IF( J2+KB.GT.M ) THEN
+*
+*                 adjust J2 to keep within the bounds of the matrix
+*
+                  NR = NR - 1
+                  J2 = J2 - KB1
+               END IF
+*
+               DO 70 J = J1, J2, KB1
+*
+*                 create nonzero element a(j+kl+ku,j+ku-1) below the
+*                 band and store it in WORK(1:n)
+*
+                  WORK( J+KB ) = WORK( J+KUN )*AB( KLU1, J+KUN )
+                  AB( KLU1, J+KUN ) = WORK( MN+J+KUN )*AB( KLU1, J+KUN )
+   70          CONTINUE
+*
+               IF( ML.GT.ML0 ) THEN
+                  ML = ML - 1
+               ELSE
+                  MU = MU - 1
+               END IF
+   80       CONTINUE
+   90    CONTINUE
+      END IF
+*
+      IF( KU.EQ.0 .AND. KL.GT.0 ) THEN
+*
+*        A has been reduced to lower bidiagonal form
+*
+*        Transform lower bidiagonal form to upper bidiagonal by applying
+*        plane rotations from the left, storing diagonal elements in D
+*        and off-diagonal elements in E
+*
+         DO 100 I = 1, MIN( M-1, N )
+            CALL DLARTG( AB( 1, I ), AB( 2, I ), RC, RS, RA )
+            D( I ) = RA
+            IF( I.LT.N ) THEN
+               E( I ) = RS*AB( 1, I+1 )
+               AB( 1, I+1 ) = RC*AB( 1, I+1 )
+            END IF
+            IF( WANTQ )
+     $         CALL DROT( M, Q( 1, I ), 1, Q( 1, I+1 ), 1, RC, RS )
+            IF( WANTC )
+     $         CALL DROT( NCC, C( I, 1 ), LDC, C( I+1, 1 ), LDC, RC,
+     $                    RS )
+  100    CONTINUE
+         IF( M.LE.N )
+     $      D( M ) = AB( 1, M )
+      ELSE IF( KU.GT.0 ) THEN
+*
+*        A has been reduced to upper bidiagonal form
+*
+         IF( M.LT.N ) THEN
+*
+*           Annihilate a(m,m+1) by applying plane rotations from the
+*           right, storing diagonal elements in D and off-diagonal
+*           elements in E
+*
+            RB = AB( KU, M+1 )
+            DO 110 I = M, 1, -1
+               CALL DLARTG( AB( KU+1, I ), RB, RC, RS, RA )
+               D( I ) = RA
+               IF( I.GT.1 ) THEN
+                  RB = -RS*AB( KU, I )
+                  E( I-1 ) = RC*AB( KU, I )
+               END IF
+               IF( WANTPT )
+     $            CALL DROT( N, PT( I, 1 ), LDPT, PT( M+1, 1 ), LDPT,
+     $                       RC, RS )
+  110       CONTINUE
+         ELSE
+*
+*           Copy off-diagonal elements to E and diagonal elements to D
+*
+            DO 120 I = 1, MINMN - 1
+               E( I ) = AB( KU, I+1 )
+  120       CONTINUE
+            DO 130 I = 1, MINMN
+               D( I ) = AB( KU+1, I )
+  130       CONTINUE
+         END IF
+      ELSE
+*
+*        A is diagonal. Set elements of E to zero and copy diagonal
+*        elements to D.
+*
+         DO 140 I = 1, MINMN - 1
+            E( I ) = ZERO
+  140    CONTINUE
+         DO 150 I = 1, MINMN
+            D( I ) = AB( 1, I )
+  150    CONTINUE
+      END IF
+      RETURN
+*
+*     End of DGBBRD
+*
+      END
diff --git a/costa/native/external/lapack/dgbcon.f b/costa/native/external/lapack/dgbcon.f
new file mode 100644
index 000000000..ba613b735
--- /dev/null
+++ b/costa/native/external/lapack/dgbcon.f
@@ -0,0 +1,222 @@
+      SUBROUTINE DGBCON( NORM, N, KL, KU, AB, LDAB, IPIV, ANORM, RCOND,
+     $                   WORK, IWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          NORM
+      INTEGER            INFO, KL, KU, LDAB, N
+      DOUBLE PRECISION   ANORM, RCOND
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * ), IWORK( * )
+      DOUBLE PRECISION   AB( LDAB, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DGBCON estimates the reciprocal of the condition number of a real
+*  general band matrix A, in either the 1-norm or the infinity-norm,
+*  using the LU factorization computed by DGBTRF.
+*
+*  An estimate is obtained for norm(inv(A)), and the reciprocal of the
+*  condition number is computed as
+*     RCOND = 1 / ( norm(A) * norm(inv(A)) ).
+*
+*  Arguments
+*  =========
+*
+*  NORM    (input) CHARACTER*1
+*          Specifies whether the 1-norm condition number or the
+*          infinity-norm condition number is required:
+*          = '1' or 'O':  1-norm;
+*          = 'I':         Infinity-norm.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  KL      (input) INTEGER
+*          The number of subdiagonals within the band of A.  KL >= 0.
+*
+*  KU      (input) INTEGER
+*          The number of superdiagonals within the band of A.  KU >= 0.
+*
+*  AB      (input) DOUBLE PRECISION array, dimension (LDAB,N)
+*          Details of the LU factorization of the band matrix A, as
+*          computed by DGBTRF.  U is stored as an upper triangular band
+*          matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, and
+*          the multipliers used during the factorization are stored in
+*          rows KL+KU+2 to 2*KL+KU+1.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= 2*KL+KU+1.
+*
+*  IPIV    (input) INTEGER array, dimension (N)
+*          The pivot indices; for 1 <= i <= N, row i of the matrix was
+*          interchanged with row IPIV(i).
+*
+*  ANORM   (input) DOUBLE PRECISION
+*          If NORM = '1' or 'O', the 1-norm of the original matrix A.
+*          If NORM = 'I', the infinity-norm of the original matrix A.
+*
+*  RCOND   (output) DOUBLE PRECISION
+*          The reciprocal of the condition number of the matrix A,
+*          computed as RCOND = 1/(norm(A) * norm(inv(A))).
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (3*N)
+*
+*  IWORK   (workspace) INTEGER array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LNOTI, ONENRM
+      CHARACTER          NORMIN
+      INTEGER            IX, J, JP, KASE, KASE1, KD, LM
+      DOUBLE PRECISION   AINVNM, SCALE, SMLNUM, T
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            IDAMAX
+      DOUBLE PRECISION   DDOT, DLAMCH
+      EXTERNAL           LSAME, IDAMAX, DDOT, DLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DAXPY, DLACON, DLATBS, DRSCL, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      ONENRM = NORM.EQ.'1' .OR. LSAME( NORM, 'O' )
+      IF( .NOT.ONENRM .AND. .NOT.LSAME( NORM, 'I' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( KL.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( KU.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDAB.LT.2*KL+KU+1 ) THEN
+         INFO = -6
+      ELSE IF( ANORM.LT.ZERO ) THEN
+         INFO = -8
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DGBCON', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      RCOND = ZERO
+      IF( N.EQ.0 ) THEN
+         RCOND = ONE
+         RETURN
+      ELSE IF( ANORM.EQ.ZERO ) THEN
+         RETURN
+      END IF
+*
+      SMLNUM = DLAMCH( 'Safe minimum' )
+*
+*     Estimate the norm of inv(A).
+*
+      AINVNM = ZERO
+      NORMIN = 'N'
+      IF( ONENRM ) THEN
+         KASE1 = 1
+      ELSE
+         KASE1 = 2
+      END IF
+      KD = KL + KU + 1
+      LNOTI = KL.GT.0
+      KASE = 0
+   10 CONTINUE
+      CALL DLACON( N, WORK( N+1 ), WORK, IWORK, AINVNM, KASE )
+      IF( KASE.NE.0 ) THEN
+         IF( KASE.EQ.KASE1 ) THEN
+*
+*           Multiply by inv(L).
+*
+            IF( LNOTI ) THEN
+               DO 20 J = 1, N - 1
+                  LM = MIN( KL, N-J )
+                  JP = IPIV( J )
+                  T = WORK( JP )
+                  IF( JP.NE.J ) THEN
+                     WORK( JP ) = WORK( J )
+                     WORK( J ) = T
+                  END IF
+                  CALL DAXPY( LM, -T, AB( KD+1, J ), 1, WORK( J+1 ), 1 )
+   20          CONTINUE
+            END IF
+*
+*           Multiply by inv(U).
+*
+            CALL DLATBS( 'Upper', 'No transpose', 'Non-unit', NORMIN, N,
+     $                   KL+KU, AB, LDAB, WORK, SCALE, WORK( 2*N+1 ),
+     $                   INFO )
+         ELSE
+*
+*           Multiply by inv(U').
+*
+            CALL DLATBS( 'Upper', 'Transpose', 'Non-unit', NORMIN, N,
+     $                   KL+KU, AB, LDAB, WORK, SCALE, WORK( 2*N+1 ),
+     $                   INFO )
+*
+*           Multiply by inv(L').
+*
+            IF( LNOTI ) THEN
+               DO 30 J = N - 1, 1, -1
+                  LM = MIN( KL, N-J )
+                  WORK( J ) = WORK( J ) - DDOT( LM, AB( KD+1, J ), 1,
+     $                        WORK( J+1 ), 1 )
+                  JP = IPIV( J )
+                  IF( JP.NE.J ) THEN
+                     T = WORK( JP )
+                     WORK( JP ) = WORK( J )
+                     WORK( J ) = T
+                  END IF
+   30          CONTINUE
+            END IF
+         END IF
+*
+*        Divide X by 1/SCALE if doing so will not cause overflow.
+*
+         NORMIN = 'Y'
+         IF( SCALE.NE.ONE ) THEN
+            IX = IDAMAX( N, WORK, 1 )
+            IF( SCALE.LT.ABS( WORK( IX ) )*SMLNUM .OR. SCALE.EQ.ZERO )
+     $         GO TO 40
+            CALL DRSCL( N, SCALE, WORK, 1 )
+         END IF
+         GO TO 10
+      END IF
+*
+*     Compute the estimate of the reciprocal condition number.
+*
+      IF( AINVNM.NE.ZERO )
+     $   RCOND = ( ONE / AINVNM ) / ANORM
+*
+   40 CONTINUE
+      RETURN
+*
+*     End of DGBCON
+*
+      END
diff --git a/costa/native/external/lapack/dgbequ.f b/costa/native/external/lapack/dgbequ.f
new file mode 100644
index 000000000..309798c98
--- /dev/null
+++ b/costa/native/external/lapack/dgbequ.f
@@ -0,0 +1,240 @@
+      SUBROUTINE DGBEQU( M, N, KL, KU, AB, LDAB, R, C, ROWCND, COLCND,
+     $                   AMAX, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, KL, KU, LDAB, M, N
+      DOUBLE PRECISION   AMAX, COLCND, ROWCND
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   AB( LDAB, * ), C( * ), R( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DGBEQU computes row and column scalings intended to equilibrate an
+*  M-by-N band matrix A and reduce its condition number.  R returns the
+*  row scale factors and C the column scale factors, chosen to try to
+*  make the largest element in each row and column of the matrix B with
+*  elements B(i,j)=R(i)*A(i,j)*C(j) have absolute value 1.
+*
+*  R(i) and C(j) are restricted to be between SMLNUM = smallest safe
+*  number and BIGNUM = largest safe number.  Use of these scaling
+*  factors is not guaranteed to reduce the condition number of A but
+*  works well in practice.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  KL      (input) INTEGER
+*          The number of subdiagonals within the band of A.  KL >= 0.
+*
+*  KU      (input) INTEGER
+*          The number of superdiagonals within the band of A.  KU >= 0.
+*
+*  AB      (input) DOUBLE PRECISION array, dimension (LDAB,N)
+*          The band matrix A, stored in rows 1 to KL+KU+1.  The j-th
+*          column of A is stored in the j-th column of the array AB as
+*          follows:
+*          AB(ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl).
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KL+KU+1.
+*
+*  R       (output) DOUBLE PRECISION array, dimension (M)
+*          If INFO = 0, or INFO > M, R contains the row scale factors
+*          for A.
+*
+*  C       (output) DOUBLE PRECISION array, dimension (N)
+*          If INFO = 0, C contains the column scale factors for A.
+*
+*  ROWCND  (output) DOUBLE PRECISION
+*          If INFO = 0 or INFO > M, ROWCND contains the ratio of the
+*          smallest R(i) to the largest R(i).  If ROWCND >= 0.1 and
+*          AMAX is neither too large nor too small, it is not worth
+*          scaling by R.
+*
+*  COLCND  (output) DOUBLE PRECISION
+*          If INFO = 0, COLCND contains the ratio of the smallest
+*          C(i) to the largest C(i).  If COLCND >= 0.1, it is not
+*          worth scaling by C.
+*
+*  AMAX    (output) DOUBLE PRECISION
+*          Absolute value of largest matrix element.  If AMAX is very
+*          close to overflow or very close to underflow, the matrix
+*          should be scaled.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, and i is
+*                <= M:  the i-th row of A is exactly zero
+*                >  M:  the (i-M)-th column of A is exactly zero
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J, KD
+      DOUBLE PRECISION   BIGNUM, RCMAX, RCMIN, SMLNUM
+*     ..
+*     .. External Functions ..
+      DOUBLE PRECISION   DLAMCH
+      EXTERNAL           DLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters
+*
+      INFO = 0
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( KL.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( KU.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDAB.LT.KL+KU+1 ) THEN
+         INFO = -6
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DGBEQU', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 ) THEN
+         ROWCND = ONE
+         COLCND = ONE
+         AMAX = ZERO
+         RETURN
+      END IF
+*
+*     Get machine constants.
+*
+      SMLNUM = DLAMCH( 'S' )
+      BIGNUM = ONE / SMLNUM
+*
+*     Compute row scale factors.
+*
+      DO 10 I = 1, M
+         R( I ) = ZERO
+   10 CONTINUE
+*
+*     Find the maximum element in each row.
+*
+      KD = KU + 1
+      DO 30 J = 1, N
+         DO 20 I = MAX( J-KU, 1 ), MIN( J+KL, M )
+            R( I ) = MAX( R( I ), ABS( AB( KD+I-J, J ) ) )
+   20    CONTINUE
+   30 CONTINUE
+*
+*     Find the maximum and minimum scale factors.
+*
+      RCMIN = BIGNUM
+      RCMAX = ZERO
+      DO 40 I = 1, M
+         RCMAX = MAX( RCMAX, R( I ) )
+         RCMIN = MIN( RCMIN, R( I ) )
+   40 CONTINUE
+      AMAX = RCMAX
+*
+      IF( RCMIN.EQ.ZERO ) THEN
+*
+*        Find the first zero scale factor and return an error code.
+*
+         DO 50 I = 1, M
+            IF( R( I ).EQ.ZERO ) THEN
+               INFO = I
+               RETURN
+            END IF
+   50    CONTINUE
+      ELSE
+*
+*        Invert the scale factors.
+*
+         DO 60 I = 1, M
+            R( I ) = ONE / MIN( MAX( R( I ), SMLNUM ), BIGNUM )
+   60    CONTINUE
+*
+*        Compute ROWCND = min(R(I)) / max(R(I))
+*
+         ROWCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM )
+      END IF
+*
+*     Compute column scale factors
+*
+      DO 70 J = 1, N
+         C( J ) = ZERO
+   70 CONTINUE
+*
+*     Find the maximum element in each column,
+*     assuming the row scaling computed above.
+*
+      KD = KU + 1
+      DO 90 J = 1, N
+         DO 80 I = MAX( J-KU, 1 ), MIN( J+KL, M )
+            C( J ) = MAX( C( J ), ABS( AB( KD+I-J, J ) )*R( I ) )
+   80    CONTINUE
+   90 CONTINUE
+*
+*     Find the maximum and minimum scale factors.
+*
+      RCMIN = BIGNUM
+      RCMAX = ZERO
+      DO 100 J = 1, N
+         RCMIN = MIN( RCMIN, C( J ) )
+         RCMAX = MAX( RCMAX, C( J ) )
+  100 CONTINUE
+*
+      IF( RCMIN.EQ.ZERO ) THEN
+*
+*        Find the first zero scale factor and return an error code.
+*
+         DO 110 J = 1, N
+            IF( C( J ).EQ.ZERO ) THEN
+               INFO = M + J
+               RETURN
+            END IF
+  110    CONTINUE
+      ELSE
+*
+*        Invert the scale factors.
+*
+         DO 120 J = 1, N
+            C( J ) = ONE / MIN( MAX( C( J ), SMLNUM ), BIGNUM )
+  120    CONTINUE
+*
+*        Compute COLCND = min(C(J)) / max(C(J))
+*
+         COLCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM )
+      END IF
+*
+      RETURN
+*
+*     End of DGBEQU
+*
+      END
diff --git a/costa/native/external/lapack/dgbrfs.f b/costa/native/external/lapack/dgbrfs.f
new file mode 100644
index 000000000..226fd7736
--- /dev/null
+++ b/costa/native/external/lapack/dgbrfs.f
@@ -0,0 +1,351 @@
+      SUBROUTINE DGBRFS( TRANS, N, KL, KU, NRHS, AB, LDAB, AFB, LDAFB,
+     $                   IPIV, B, LDB, X, LDX, FERR, BERR, WORK, IWORK,
+     $                   INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          TRANS
+      INTEGER            INFO, KL, KU, LDAB, LDAFB, LDB, LDX, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * ), IWORK( * )
+      DOUBLE PRECISION   AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ),
+     $                   BERR( * ), FERR( * ), WORK( * ), X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DGBRFS improves the computed solution to a system of linear
+*  equations when the coefficient matrix is banded, and provides
+*  error bounds and backward error estimates for the solution.
+*
+*  Arguments
+*  =========
+*
+*  TRANS   (input) CHARACTER*1
+*          Specifies the form of the system of equations:
+*          = 'N':  A * X = B     (No transpose)
+*          = 'T':  A**T * X = B  (Transpose)
+*          = 'C':  A**H * X = B  (Conjugate transpose = Transpose)
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  KL      (input) INTEGER
+*          The number of subdiagonals within the band of A.  KL >= 0.
+*
+*  KU      (input) INTEGER
+*          The number of superdiagonals within the band of A.  KU >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrices B and X.  NRHS >= 0.
+*
+*  AB      (input) DOUBLE PRECISION array, dimension (LDAB,N)
+*          The original band matrix A, stored in rows 1 to KL+KU+1.
+*          The j-th column of A is stored in the j-th column of the
+*          array AB as follows:
+*          AB(ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(n,j+kl).
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KL+KU+1.
+*
+*  AFB     (input) DOUBLE PRECISION array, dimension (LDAFB,N)
+*          Details of the LU factorization of the band matrix A, as
+*          computed by DGBTRF.  U is stored as an upper triangular band
+*          matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, and
+*          the multipliers used during the factorization are stored in
+*          rows KL+KU+2 to 2*KL+KU+1.
+*
+*  LDAFB   (input) INTEGER
+*          The leading dimension of the array AFB.  LDAFB >= 2*KL*KU+1.
+*
+*  IPIV    (input) INTEGER array, dimension (N)
+*          The pivot indices from DGBTRF; for 1<=i<=N, row i of the
+*          matrix was interchanged with row IPIV(i).
+*
+*  B       (input) DOUBLE PRECISION array, dimension (LDB,NRHS)
+*          The right hand side matrix B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  X       (input/output) DOUBLE PRECISION array, dimension (LDX,NRHS)
+*          On entry, the solution matrix X, as computed by DGBTRS.
+*          On exit, the improved solution matrix X.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(1,N).
+*
+*  FERR    (output) DOUBLE PRECISION array, dimension (NRHS)
+*          The estimated forward error bound for each solution vector
+*          X(j) (the j-th column of the solution matrix X).
+*          If XTRUE is the true solution corresponding to X(j), FERR(j)
+*          is an estimated upper bound for the magnitude of the largest
+*          element in (X(j) - XTRUE) divided by the magnitude of the
+*          largest element in X(j).  The estimate is as reliable as
+*          the estimate for RCOND, and is almost always a slight
+*          overestimate of the true error.
+*
+*  BERR    (output) DOUBLE PRECISION array, dimension (NRHS)
+*          The componentwise relative backward error of each solution
+*          vector X(j) (i.e., the smallest relative change in
+*          any element of A or B that makes X(j) an exact solution).
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (3*N)
+*
+*  IWORK   (workspace) INTEGER array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Internal Parameters
+*  ===================
+*
+*  ITMAX is the maximum number of steps of iterative refinement.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            ITMAX
+      PARAMETER          ( ITMAX = 5 )
+      DOUBLE PRECISION   ZERO
+      PARAMETER          ( ZERO = 0.0D+0 )
+      DOUBLE PRECISION   ONE
+      PARAMETER          ( ONE = 1.0D+0 )
+      DOUBLE PRECISION   TWO
+      PARAMETER          ( TWO = 2.0D+0 )
+      DOUBLE PRECISION   THREE
+      PARAMETER          ( THREE = 3.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOTRAN
+      CHARACTER          TRANST
+      INTEGER            COUNT, I, J, K, KASE, KK, NZ
+      DOUBLE PRECISION   EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DAXPY, DCOPY, DGBMV, DGBTRS, DLACON, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DLAMCH
+      EXTERNAL           LSAME, DLAMCH
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      NOTRAN = LSAME( TRANS, 'N' )
+      IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT.
+     $    LSAME( TRANS, 'C' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( KL.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( KU.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( LDAB.LT.KL+KU+1 ) THEN
+         INFO = -7
+      ELSE IF( LDAFB.LT.2*KL+KU+1 ) THEN
+         INFO = -9
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -12
+      ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+         INFO = -14
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DGBRFS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN
+         DO 10 J = 1, NRHS
+            FERR( J ) = ZERO
+            BERR( J ) = ZERO
+   10    CONTINUE
+         RETURN
+      END IF
+*
+      IF( NOTRAN ) THEN
+         TRANST = 'T'
+      ELSE
+         TRANST = 'N'
+      END IF
+*
+*     NZ = maximum number of nonzero elements in each row of A, plus 1
+*
+      NZ = MIN( KL+KU+2, N+1 )
+      EPS = DLAMCH( 'Epsilon' )
+      SAFMIN = DLAMCH( 'Safe minimum' )
+      SAFE1 = NZ*SAFMIN
+      SAFE2 = SAFE1 / EPS
+*
+*     Do for each right hand side
+*
+      DO 140 J = 1, NRHS
+*
+         COUNT = 1
+         LSTRES = THREE
+   20    CONTINUE
+*
+*        Loop until stopping criterion is satisfied.
+*
+*        Compute residual R = B - op(A) * X,
+*        where op(A) = A, A**T, or A**H, depending on TRANS.
+*
+         CALL DCOPY( N, B( 1, J ), 1, WORK( N+1 ), 1 )
+         CALL DGBMV( TRANS, N, N, KL, KU, -ONE, AB, LDAB, X( 1, J ), 1,
+     $               ONE, WORK( N+1 ), 1 )
+*
+*        Compute componentwise relative backward error from formula
+*
+*        max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) )
+*
+*        where abs(Z) is the componentwise absolute value of the matrix
+*        or vector Z.  If the i-th component of the denominator is less
+*        than SAFE2, then SAFE1 is added to the i-th components of the
+*        numerator and denominator before dividing.
+*
+         DO 30 I = 1, N
+            WORK( I ) = ABS( B( I, J ) )
+   30    CONTINUE
+*
+*        Compute abs(op(A))*abs(X) + abs(B).
+*
+         IF( NOTRAN ) THEN
+            DO 50 K = 1, N
+               KK = KU + 1 - K
+               XK = ABS( X( K, J ) )
+               DO 40 I = MAX( 1, K-KU ), MIN( N, K+KL )
+                  WORK( I ) = WORK( I ) + ABS( AB( KK+I, K ) )*XK
+   40          CONTINUE
+   50       CONTINUE
+         ELSE
+            DO 70 K = 1, N
+               S = ZERO
+               KK = KU + 1 - K
+               DO 60 I = MAX( 1, K-KU ), MIN( N, K+KL )
+                  S = S + ABS( AB( KK+I, K ) )*ABS( X( I, J ) )
+   60          CONTINUE
+               WORK( K ) = WORK( K ) + S
+   70       CONTINUE
+         END IF
+         S = ZERO
+         DO 80 I = 1, N
+            IF( WORK( I ).GT.SAFE2 ) THEN
+               S = MAX( S, ABS( WORK( N+I ) ) / WORK( I ) )
+            ELSE
+               S = MAX( S, ( ABS( WORK( N+I ) )+SAFE1 ) /
+     $             ( WORK( I )+SAFE1 ) )
+            END IF
+   80    CONTINUE
+         BERR( J ) = S
+*
+*        Test stopping criterion. Continue iterating if
+*           1) The residual BERR(J) is larger than machine epsilon, and
+*           2) BERR(J) decreased by at least a factor of 2 during the
+*              last iteration, and
+*           3) At most ITMAX iterations tried.
+*
+         IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND.
+     $       COUNT.LE.ITMAX ) THEN
+*
+*           Update solution and try again.
+*
+            CALL DGBTRS( TRANS, N, KL, KU, 1, AFB, LDAFB, IPIV,
+     $                   WORK( N+1 ), N, INFO )
+            CALL DAXPY( N, ONE, WORK( N+1 ), 1, X( 1, J ), 1 )
+            LSTRES = BERR( J )
+            COUNT = COUNT + 1
+            GO TO 20
+         END IF
+*
+*        Bound error from formula
+*
+*        norm(X - XTRUE) / norm(X) .le. FERR =
+*        norm( abs(inv(op(A)))*
+*           ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X)
+*
+*        where
+*          norm(Z) is the magnitude of the largest component of Z
+*          inv(op(A)) is the inverse of op(A)
+*          abs(Z) is the componentwise absolute value of the matrix or
+*             vector Z
+*          NZ is the maximum number of nonzeros in any row of A, plus 1
+*          EPS is machine epsilon
+*
+*        The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B))
+*        is incremented by SAFE1 if the i-th component of
+*        abs(op(A))*abs(X) + abs(B) is less than SAFE2.
+*
+*        Use DLACON to estimate the infinity-norm of the matrix
+*           inv(op(A)) * diag(W),
+*        where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) )))
+*
+         DO 90 I = 1, N
+            IF( WORK( I ).GT.SAFE2 ) THEN
+               WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I )
+            ELSE
+               WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + SAFE1
+            END IF
+   90    CONTINUE
+*
+         KASE = 0
+  100    CONTINUE
+         CALL DLACON( N, WORK( 2*N+1 ), WORK( N+1 ), IWORK, FERR( J ),
+     $                KASE )
+         IF( KASE.NE.0 ) THEN
+            IF( KASE.EQ.1 ) THEN
+*
+*              Multiply by diag(W)*inv(op(A)**T).
+*
+               CALL DGBTRS( TRANST, N, KL, KU, 1, AFB, LDAFB, IPIV,
+     $                      WORK( N+1 ), N, INFO )
+               DO 110 I = 1, N
+                  WORK( N+I ) = WORK( N+I )*WORK( I )
+  110          CONTINUE
+            ELSE
+*
+*              Multiply by inv(op(A))*diag(W).
+*
+               DO 120 I = 1, N
+                  WORK( N+I ) = WORK( N+I )*WORK( I )
+  120          CONTINUE
+               CALL DGBTRS( TRANS, N, KL, KU, 1, AFB, LDAFB, IPIV,
+     $                      WORK( N+1 ), N, INFO )
+            END IF
+            GO TO 100
+         END IF
+*
+*        Normalize error.
+*
+         LSTRES = ZERO
+         DO 130 I = 1, N
+            LSTRES = MAX( LSTRES, ABS( X( I, J ) ) )
+  130    CONTINUE
+         IF( LSTRES.NE.ZERO )
+     $      FERR( J ) = FERR( J ) / LSTRES
+*
+  140 CONTINUE
+*
+      RETURN
+*
+*     End of DGBRFS
+*
+      END
diff --git a/costa/native/external/lapack/dgbsv.f b/costa/native/external/lapack/dgbsv.f
new file mode 100644
index 000000000..0d35fe88b
--- /dev/null
+++ b/costa/native/external/lapack/dgbsv.f
@@ -0,0 +1,143 @@
+      SUBROUTINE DGBSV( N, KL, KU, NRHS, AB, LDAB, IPIV, B, LDB, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, KL, KU, LDAB, LDB, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      DOUBLE PRECISION   AB( LDAB, * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DGBSV computes the solution to a real system of linear equations
+*  A * X = B, where A is a band matrix of order N with KL subdiagonals
+*  and KU superdiagonals, and X and B are N-by-NRHS matrices.
+*
+*  The LU decomposition with partial pivoting and row interchanges is
+*  used to factor A as A = L * U, where L is a product of permutation
+*  and unit lower triangular matrices with KL subdiagonals, and U is
+*  upper triangular with KL+KU superdiagonals.  The factored form of A
+*  is then used to solve the system of equations A * X = B.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The number of linear equations, i.e., the order of the
+*          matrix A.  N >= 0.
+*
+*  KL      (input) INTEGER
+*          The number of subdiagonals within the band of A.  KL >= 0.
+*
+*  KU      (input) INTEGER
+*          The number of superdiagonals within the band of A.  KU >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  AB      (input/output) DOUBLE PRECISION array, dimension (LDAB,N)
+*          On entry, the matrix A in band storage, in rows KL+1 to
+*          2*KL+KU+1; rows 1 to KL of the array need not be set.
+*          The j-th column of A is stored in the j-th column of the
+*          array AB as follows:
+*          AB(KL+KU+1+i-j,j) = A(i,j) for max(1,j-KU)<=i<=min(N,j+KL)
+*          On exit, details of the factorization: U is stored as an
+*          upper triangular band matrix with KL+KU superdiagonals in
+*          rows 1 to KL+KU+1, and the multipliers used during the
+*          factorization are stored in rows KL+KU+2 to 2*KL+KU+1.
+*          See below for further details.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= 2*KL+KU+1.
+*
+*  IPIV    (output) INTEGER array, dimension (N)
+*          The pivot indices that define the permutation matrix P;
+*          row i of the matrix was interchanged with row IPIV(i).
+*
+*  B       (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS)
+*          On entry, the N-by-NRHS right hand side matrix B.
+*          On exit, if INFO = 0, the N-by-NRHS solution matrix X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, U(i,i) is exactly zero.  The factorization
+*                has been completed, but the factor U is exactly
+*                singular, and the solution has not been computed.
+*
+*  Further Details
+*  ===============
+*
+*  The band storage scheme is illustrated by the following example, when
+*  M = N = 6, KL = 2, KU = 1:
+*
+*  On entry:                       On exit:
+*
+*      *    *    *    +    +    +       *    *    *   u14  u25  u36
+*      *    *    +    +    +    +       *    *   u13  u24  u35  u46
+*      *   a12  a23  a34  a45  a56      *   u12  u23  u34  u45  u56
+*     a11  a22  a33  a44  a55  a66     u11  u22  u33  u44  u55  u66
+*     a21  a32  a43  a54  a65   *      m21  m32  m43  m54  m65   *
+*     a31  a42  a53  a64   *    *      m31  m42  m53  m64   *    *
+*
+*  Array elements marked * are not used by the routine; elements marked
+*  + need not be set on entry, but are required by the routine to store
+*  elements of U because of fill-in resulting from the row interchanges.
+*
+*  =====================================================================
+*
+*     .. External Subroutines ..
+      EXTERNAL           DGBTRF, DGBTRS, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( N.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( KL.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( KU.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDAB.LT.2*KL+KU+1 ) THEN
+         INFO = -6
+      ELSE IF( LDB.LT.MAX( N, 1 ) ) THEN
+         INFO = -9
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DGBSV ', -INFO )
+         RETURN
+      END IF
+*
+*     Compute the LU factorization of the band matrix A.
+*
+      CALL DGBTRF( N, N, KL, KU, AB, LDAB, IPIV, INFO )
+      IF( INFO.EQ.0 ) THEN
+*
+*        Solve the system A*X = B, overwriting B with X.
+*
+         CALL DGBTRS( 'No transpose', N, KL, KU, NRHS, AB, LDAB, IPIV,
+     $                B, LDB, INFO )
+      END IF
+      RETURN
+*
+*     End of DGBSV
+*
+      END
diff --git a/costa/native/external/lapack/dgbsvx.f b/costa/native/external/lapack/dgbsvx.f
new file mode 100644
index 000000000..3e4961404
--- /dev/null
+++ b/costa/native/external/lapack/dgbsvx.f
@@ -0,0 +1,517 @@
+      SUBROUTINE DGBSVX( FACT, TRANS, N, KL, KU, NRHS, AB, LDAB, AFB,
+     $                   LDAFB, IPIV, EQUED, R, C, B, LDB, X, LDX,
+     $                   RCOND, FERR, BERR, WORK, IWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          EQUED, FACT, TRANS
+      INTEGER            INFO, KL, KU, LDAB, LDAFB, LDB, LDX, N, NRHS
+      DOUBLE PRECISION   RCOND
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * ), IWORK( * )
+      DOUBLE PRECISION   AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ),
+     $                   BERR( * ), C( * ), FERR( * ), R( * ),
+     $                   WORK( * ), X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DGBSVX uses the LU factorization to compute the solution to a real
+*  system of linear equations A * X = B, A**T * X = B, or A**H * X = B,
+*  where A is a band matrix of order N with KL subdiagonals and KU
+*  superdiagonals, and X and B are N-by-NRHS matrices.
+*
+*  Error bounds on the solution and a condition estimate are also
+*  provided.
+*
+*  Description
+*  ===========
+*
+*  The following steps are performed by this subroutine:
+*
+*  1. If FACT = 'E', real scaling factors are computed to equilibrate
+*     the system:
+*        TRANS = 'N':  diag(R)*A*diag(C)     *inv(diag(C))*X = diag(R)*B
+*        TRANS = 'T': (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B
+*        TRANS = 'C': (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B
+*     Whether or not the system will be equilibrated depends on the
+*     scaling of the matrix A, but if equilibration is used, A is
+*     overwritten by diag(R)*A*diag(C) and B by diag(R)*B (if TRANS='N')
+*     or diag(C)*B (if TRANS = 'T' or 'C').
+*
+*  2. If FACT = 'N' or 'E', the LU decomposition is used to factor the
+*     matrix A (after equilibration if FACT = 'E') as
+*        A = L * U,
+*     where L is a product of permutation and unit lower triangular
+*     matrices with KL subdiagonals, and U is upper triangular with
+*     KL+KU superdiagonals.
+*
+*  3. If some U(i,i)=0, so that U is exactly singular, then the routine
+*     returns with INFO = i. Otherwise, the factored form of A is used
+*     to estimate the condition number of the matrix A.  If the
+*     reciprocal of the condition number is less than machine precision,
+*     INFO = N+1 is returned as a warning, but the routine still goes on
+*     to solve for X and compute error bounds as described below.
+*
+*  4. The system of equations is solved for X using the factored form
+*     of A.
+*
+*  5. Iterative refinement is applied to improve the computed solution
+*     matrix and calculate error bounds and backward error estimates
+*     for it.
+*
+*  6. If equilibration was used, the matrix X is premultiplied by
+*     diag(C) (if TRANS = 'N') or diag(R) (if TRANS = 'T' or 'C') so
+*     that it solves the original system before equilibration.
+*
+*  Arguments
+*  =========
+*
+*  FACT    (input) CHARACTER*1
+*          Specifies whether or not the factored form of the matrix A is
+*          supplied on entry, and if not, whether the matrix A should be
+*          equilibrated before it is factored.
+*          = 'F':  On entry, AFB and IPIV contain the factored form of
+*                  A.  If EQUED is not 'N', the matrix A has been
+*                  equilibrated with scaling factors given by R and C.
+*                  AB, AFB, and IPIV are not modified.
+*          = 'N':  The matrix A will be copied to AFB and factored.
+*          = 'E':  The matrix A will be equilibrated if necessary, then
+*                  copied to AFB and factored.
+*
+*  TRANS   (input) CHARACTER*1
+*          Specifies the form of the system of equations.
+*          = 'N':  A * X = B     (No transpose)
+*          = 'T':  A**T * X = B  (Transpose)
+*          = 'C':  A**H * X = B  (Transpose)
+*
+*  N       (input) INTEGER
+*          The number of linear equations, i.e., the order of the
+*          matrix A.  N >= 0.
+*
+*  KL      (input) INTEGER
+*          The number of subdiagonals within the band of A.  KL >= 0.
+*
+*  KU      (input) INTEGER
+*          The number of superdiagonals within the band of A.  KU >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrices B and X.  NRHS >= 0.
+*
+*  AB      (input/output) DOUBLE PRECISION array, dimension (LDAB,N)
+*          On entry, the matrix A in band storage, in rows 1 to KL+KU+1.
+*          The j-th column of A is stored in the j-th column of the
+*          array AB as follows:
+*          AB(KU+1+i-j,j) = A(i,j) for max(1,j-KU)<=i<=min(N,j+kl)
+*
+*          If FACT = 'F' and EQUED is not 'N', then A must have been
+*          equilibrated by the scaling factors in R and/or C.  AB is not
+*          modified if FACT = 'F' or 'N', or if FACT = 'E' and
+*          EQUED = 'N' on exit.
+*
+*          On exit, if EQUED .ne. 'N', A is scaled as follows:
+*          EQUED = 'R':  A := diag(R) * A
+*          EQUED = 'C':  A := A * diag(C)
+*          EQUED = 'B':  A := diag(R) * A * diag(C).
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KL+KU+1.
+*
+*  AFB     (input or output) DOUBLE PRECISION array, dimension (LDAFB,N)
+*          If FACT = 'F', then AFB is an input argument and on entry
+*          contains details of the LU factorization of the band matrix
+*          A, as computed by DGBTRF.  U is stored as an upper triangular
+*          band matrix with KL+KU superdiagonals in rows 1 to KL+KU+1,
+*          and the multipliers used during the factorization are stored
+*          in rows KL+KU+2 to 2*KL+KU+1.  If EQUED .ne. 'N', then AFB is
+*          the factored form of the equilibrated matrix A.
+*
+*          If FACT = 'N', then AFB is an output argument and on exit
+*          returns details of the LU factorization of A.
+*
+*          If FACT = 'E', then AFB is an output argument and on exit
+*          returns details of the LU factorization of the equilibrated
+*          matrix A (see the description of AB for the form of the
+*          equilibrated matrix).
+*
+*  LDAFB   (input) INTEGER
+*          The leading dimension of the array AFB.  LDAFB >= 2*KL+KU+1.
+*
+*  IPIV    (input or output) INTEGER array, dimension (N)
+*          If FACT = 'F', then IPIV is an input argument and on entry
+*          contains the pivot indices from the factorization A = L*U
+*          as computed by DGBTRF; row i of the matrix was interchanged
+*          with row IPIV(i).
+*
+*          If FACT = 'N', then IPIV is an output argument and on exit
+*          contains the pivot indices from the factorization A = L*U
+*          of the original matrix A.
+*
+*          If FACT = 'E', then IPIV is an output argument and on exit
+*          contains the pivot indices from the factorization A = L*U
+*          of the equilibrated matrix A.
+*
+*  EQUED   (input or output) CHARACTER*1
+*          Specifies the form of equilibration that was done.
+*          = 'N':  No equilibration (always true if FACT = 'N').
+*          = 'R':  Row equilibration, i.e., A has been premultiplied by
+*                  diag(R).
+*          = 'C':  Column equilibration, i.e., A has been postmultiplied
+*                  by diag(C).
+*          = 'B':  Both row and column equilibration, i.e., A has been
+*                  replaced by diag(R) * A * diag(C).
+*          EQUED is an input argument if FACT = 'F'; otherwise, it is an
+*          output argument.
+*
+*  R       (input or output) DOUBLE PRECISION array, dimension (N)
+*          The row scale factors for A.  If EQUED = 'R' or 'B', A is
+*          multiplied on the left by diag(R); if EQUED = 'N' or 'C', R
+*          is not accessed.  R is an input argument if FACT = 'F';
+*          otherwise, R is an output argument.  If FACT = 'F' and
+*          EQUED = 'R' or 'B', each element of R must be positive.
+*
+*  C       (input or output) DOUBLE PRECISION array, dimension (N)
+*          The column scale factors for A.  If EQUED = 'C' or 'B', A is
+*          multiplied on the right by diag(C); if EQUED = 'N' or 'R', C
+*          is not accessed.  C is an input argument if FACT = 'F';
+*          otherwise, C is an output argument.  If FACT = 'F' and
+*          EQUED = 'C' or 'B', each element of C must be positive.
+*
+*  B       (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS)
+*          On entry, the right hand side matrix B.
+*          On exit,
+*          if EQUED = 'N', B is not modified;
+*          if TRANS = 'N' and EQUED = 'R' or 'B', B is overwritten by
+*          diag(R)*B;
+*          if TRANS = 'T' or 'C' and EQUED = 'C' or 'B', B is
+*          overwritten by diag(C)*B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  X       (output) DOUBLE PRECISION array, dimension (LDX,NRHS)
+*          If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X
+*          to the original system of equations.  Note that A and B are
+*          modified on exit if EQUED .ne. 'N', and the solution to the
+*          equilibrated system is inv(diag(C))*X if TRANS = 'N' and
+*          EQUED = 'C' or 'B', or inv(diag(R))*X if TRANS = 'T' or 'C'
+*          and EQUED = 'R' or 'B'.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(1,N).
+*
+*  RCOND   (output) DOUBLE PRECISION
+*          The estimate of the reciprocal condition number of the matrix
+*          A after equilibration (if done).  If RCOND is less than the
+*          machine precision (in particular, if RCOND = 0), the matrix
+*          is singular to working precision.  This condition is
+*          indicated by a return code of INFO > 0.
+*
+*  FERR    (output) DOUBLE PRECISION array, dimension (NRHS)
+*          The estimated forward error bound for each solution vector
+*          X(j) (the j-th column of the solution matrix X).
+*          If XTRUE is the true solution corresponding to X(j), FERR(j)
+*          is an estimated upper bound for the magnitude of the largest
+*          element in (X(j) - XTRUE) divided by the magnitude of the
+*          largest element in X(j).  The estimate is as reliable as
+*          the estimate for RCOND, and is almost always a slight
+*          overestimate of the true error.
+*
+*  BERR    (output) DOUBLE PRECISION array, dimension (NRHS)
+*          The componentwise relative backward error of each solution
+*          vector X(j) (i.e., the smallest relative change in
+*          any element of A or B that makes X(j) an exact solution).
+*
+*  WORK    (workspace/output) DOUBLE PRECISION array, dimension (3*N)
+*          On exit, WORK(1) contains the reciprocal pivot growth
+*          factor norm(A)/norm(U). The "max absolute element" norm is
+*          used. If WORK(1) is much less than 1, then the stability
+*          of the LU factorization of the (equilibrated) matrix A
+*          could be poor. This also means that the solution X, condition
+*          estimator RCOND, and forward error bound FERR could be
+*          unreliable. If factorization fails with 0<INFO<=N, then
+*          WORK(1) contains the reciprocal pivot growth factor for the
+*          leading INFO columns of A.
+*
+*  IWORK   (workspace) INTEGER array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, and i is
+*                <= N:  U(i,i) is exactly zero.  The factorization
+*                       has been completed, but the factor U is exactly
+*                       singular, so the solution and error bounds
+*                       could not be computed. RCOND = 0 is returned.
+*                = N+1: U is nonsingular, but RCOND is less than machine
+*                       precision, meaning that the matrix is singular
+*                       to working precision.  Nevertheless, the
+*                       solution and error bounds are computed because
+*                       there are a number of situations where the
+*                       computed solution can be more accurate than the
+*                       value of RCOND would suggest.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            COLEQU, EQUIL, NOFACT, NOTRAN, ROWEQU
+      CHARACTER          NORM
+      INTEGER            I, INFEQU, J, J1, J2
+      DOUBLE PRECISION   AMAX, ANORM, BIGNUM, COLCND, RCMAX, RCMIN,
+     $                   ROWCND, RPVGRW, SMLNUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DLAMCH, DLANGB, DLANTB
+      EXTERNAL           LSAME, DLAMCH, DLANGB, DLANTB
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DCOPY, DGBCON, DGBEQU, DGBRFS, DGBTRF, DGBTRS,
+     $                   DLACPY, DLAQGB, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+      NOFACT = LSAME( FACT, 'N' )
+      EQUIL = LSAME( FACT, 'E' )
+      NOTRAN = LSAME( TRANS, 'N' )
+      IF( NOFACT .OR. EQUIL ) THEN
+         EQUED = 'N'
+         ROWEQU = .FALSE.
+         COLEQU = .FALSE.
+      ELSE
+         ROWEQU = LSAME( EQUED, 'R' ) .OR. LSAME( EQUED, 'B' )
+         COLEQU = LSAME( EQUED, 'C' ) .OR. LSAME( EQUED, 'B' )
+         SMLNUM = DLAMCH( 'Safe minimum' )
+         BIGNUM = ONE / SMLNUM
+      END IF
+*
+*     Test the input parameters.
+*
+      IF( .NOT.NOFACT .AND. .NOT.EQUIL .AND. .NOT.LSAME( FACT, 'F' ) )
+     $     THEN
+         INFO = -1
+      ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT.
+     $         LSAME( TRANS, 'C' ) ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( KL.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( KU.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -6
+      ELSE IF( LDAB.LT.KL+KU+1 ) THEN
+         INFO = -8
+      ELSE IF( LDAFB.LT.2*KL+KU+1 ) THEN
+         INFO = -10
+      ELSE IF( LSAME( FACT, 'F' ) .AND. .NOT.
+     $         ( ROWEQU .OR. COLEQU .OR. LSAME( EQUED, 'N' ) ) ) THEN
+         INFO = -12
+      ELSE
+         IF( ROWEQU ) THEN
+            RCMIN = BIGNUM
+            RCMAX = ZERO
+            DO 10 J = 1, N
+               RCMIN = MIN( RCMIN, R( J ) )
+               RCMAX = MAX( RCMAX, R( J ) )
+   10       CONTINUE
+            IF( RCMIN.LE.ZERO ) THEN
+               INFO = -13
+            ELSE IF( N.GT.0 ) THEN
+               ROWCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM )
+            ELSE
+               ROWCND = ONE
+            END IF
+         END IF
+         IF( COLEQU .AND. INFO.EQ.0 ) THEN
+            RCMIN = BIGNUM
+            RCMAX = ZERO
+            DO 20 J = 1, N
+               RCMIN = MIN( RCMIN, C( J ) )
+               RCMAX = MAX( RCMAX, C( J ) )
+   20       CONTINUE
+            IF( RCMIN.LE.ZERO ) THEN
+               INFO = -14
+            ELSE IF( N.GT.0 ) THEN
+               COLCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM )
+            ELSE
+               COLCND = ONE
+            END IF
+         END IF
+         IF( INFO.EQ.0 ) THEN
+            IF( LDB.LT.MAX( 1, N ) ) THEN
+               INFO = -16
+            ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+               INFO = -18
+            END IF
+         END IF
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DGBSVX', -INFO )
+         RETURN
+      END IF
+*
+      IF( EQUIL ) THEN
+*
+*        Compute row and column scalings to equilibrate the matrix A.
+*
+         CALL DGBEQU( N, N, KL, KU, AB, LDAB, R, C, ROWCND, COLCND,
+     $                AMAX, INFEQU )
+         IF( INFEQU.EQ.0 ) THEN
+*
+*           Equilibrate the matrix.
+*
+            CALL DLAQGB( N, N, KL, KU, AB, LDAB, R, C, ROWCND, COLCND,
+     $                   AMAX, EQUED )
+            ROWEQU = LSAME( EQUED, 'R' ) .OR. LSAME( EQUED, 'B' )
+            COLEQU = LSAME( EQUED, 'C' ) .OR. LSAME( EQUED, 'B' )
+         END IF
+      END IF
+*
+*     Scale the right hand side.
+*
+      IF( NOTRAN ) THEN
+         IF( ROWEQU ) THEN
+            DO 40 J = 1, NRHS
+               DO 30 I = 1, N
+                  B( I, J ) = R( I )*B( I, J )
+   30          CONTINUE
+   40       CONTINUE
+         END IF
+      ELSE IF( COLEQU ) THEN
+         DO 60 J = 1, NRHS
+            DO 50 I = 1, N
+               B( I, J ) = C( I )*B( I, J )
+   50       CONTINUE
+   60    CONTINUE
+      END IF
+*
+      IF( NOFACT .OR. EQUIL ) THEN
+*
+*        Compute the LU factorization of the band matrix A.
+*
+         DO 70 J = 1, N
+            J1 = MAX( J-KU, 1 )
+            J2 = MIN( J+KL, N )
+            CALL DCOPY( J2-J1+1, AB( KU+1-J+J1, J ), 1,
+     $                  AFB( KL+KU+1-J+J1, J ), 1 )
+   70    CONTINUE
+*
+         CALL DGBTRF( N, N, KL, KU, AFB, LDAFB, IPIV, INFO )
+*
+*        Return if INFO is non-zero.
+*
+         IF( INFO.NE.0 ) THEN
+            IF( INFO.GT.0 ) THEN
+*
+*              Compute the reciprocal pivot growth factor of the
+*              leading rank-deficient INFO columns of A.
+*
+               ANORM = ZERO
+               DO 90 J = 1, INFO
+                  DO 80 I = MAX( KU+2-J, 1 ),
+     $                    MIN( N+KU+1-J, KL+KU+1 )
+                     ANORM = MAX( ANORM, ABS( AB( I, J ) ) )
+   80             CONTINUE
+   90          CONTINUE
+               RPVGRW = DLANTB( 'M', 'U', 'N', INFO,
+     $                  MIN( INFO-1, KL+KU ), AFB( MAX( 1,
+     $                  KL+KU+2-INFO ), 1 ), LDAFB, WORK )
+               IF( RPVGRW.EQ.ZERO ) THEN
+                  RPVGRW = ONE
+               ELSE
+                  RPVGRW = ANORM / RPVGRW
+               END IF
+               WORK( 1 ) = RPVGRW
+               RCOND = ZERO
+            END IF
+            RETURN
+         END IF
+      END IF
+*
+*     Compute the norm of the matrix A and the
+*     reciprocal pivot growth factor RPVGRW.
+*
+      IF( NOTRAN ) THEN
+         NORM = '1'
+      ELSE
+         NORM = 'I'
+      END IF
+      ANORM = DLANGB( NORM, N, KL, KU, AB, LDAB, WORK )
+      RPVGRW = DLANTB( 'M', 'U', 'N', N, KL+KU, AFB, LDAFB, WORK )
+      IF( RPVGRW.EQ.ZERO ) THEN
+         RPVGRW = ONE
+      ELSE
+         RPVGRW = DLANGB( 'M', N, KL, KU, AB, LDAB, WORK ) / RPVGRW
+      END IF
+*
+*     Compute the reciprocal of the condition number of A.
+*
+      CALL DGBCON( NORM, N, KL, KU, AFB, LDAFB, IPIV, ANORM, RCOND,
+     $             WORK, IWORK, INFO )
+*
+*     Set INFO = N+1 if the matrix is singular to working precision.
+*
+      IF( RCOND.LT.DLAMCH( 'Epsilon' ) )
+     $   INFO = N + 1
+*
+*     Compute the solution matrix X.
+*
+      CALL DLACPY( 'Full', N, NRHS, B, LDB, X, LDX )
+      CALL DGBTRS( TRANS, N, KL, KU, NRHS, AFB, LDAFB, IPIV, X, LDX,
+     $             INFO )
+*
+*     Use iterative refinement to improve the computed solution and
+*     compute error bounds and backward error estimates for it.
+*
+      CALL DGBRFS( TRANS, N, KL, KU, NRHS, AB, LDAB, AFB, LDAFB, IPIV,
+     $             B, LDB, X, LDX, FERR, BERR, WORK, IWORK, INFO )
+*
+*     Transform the solution matrix X to a solution of the original
+*     system.
+*
+      IF( NOTRAN ) THEN
+         IF( COLEQU ) THEN
+            DO 110 J = 1, NRHS
+               DO 100 I = 1, N
+                  X( I, J ) = C( I )*X( I, J )
+  100          CONTINUE
+  110       CONTINUE
+            DO 120 J = 1, NRHS
+               FERR( J ) = FERR( J ) / COLCND
+  120       CONTINUE
+         END IF
+      ELSE IF( ROWEQU ) THEN
+         DO 140 J = 1, NRHS
+            DO 130 I = 1, N
+               X( I, J ) = R( I )*X( I, J )
+  130       CONTINUE
+  140    CONTINUE
+         DO 150 J = 1, NRHS
+            FERR( J ) = FERR( J ) / ROWCND
+  150    CONTINUE
+      END IF
+*
+      WORK( 1 ) = RPVGRW
+      RETURN
+*
+*     End of DGBSVX
+*
+      END
diff --git a/costa/native/external/lapack/dgbtf2.f b/costa/native/external/lapack/dgbtf2.f
new file mode 100644
index 000000000..5e25629fc
--- /dev/null
+++ b/costa/native/external/lapack/dgbtf2.f
@@ -0,0 +1,203 @@
+      SUBROUTINE DGBTF2( M, N, KL, KU, AB, LDAB, IPIV, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, KL, KU, LDAB, M, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      DOUBLE PRECISION   AB( LDAB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DGBTF2 computes an LU factorization of a real m-by-n band matrix A
+*  using partial pivoting with row interchanges.
+*
+*  This is the unblocked version of the algorithm, calling Level 2 BLAS.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  KL      (input) INTEGER
+*          The number of subdiagonals within the band of A.  KL >= 0.
+*
+*  KU      (input) INTEGER
+*          The number of superdiagonals within the band of A.  KU >= 0.
+*
+*  AB      (input/output) DOUBLE PRECISION array, dimension (LDAB,N)
+*          On entry, the matrix A in band storage, in rows KL+1 to
+*          2*KL+KU+1; rows 1 to KL of the array need not be set.
+*          The j-th column of A is stored in the j-th column of the
+*          array AB as follows:
+*          AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl)
+*
+*          On exit, details of the factorization: U is stored as an
+*          upper triangular band matrix with KL+KU superdiagonals in
+*          rows 1 to KL+KU+1, and the multipliers used during the
+*          factorization are stored in rows KL+KU+2 to 2*KL+KU+1.
+*          See below for further details.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= 2*KL+KU+1.
+*
+*  IPIV    (output) INTEGER array, dimension (min(M,N))
+*          The pivot indices; for 1 <= i <= min(M,N), row i of the
+*          matrix was interchanged with row IPIV(i).
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*          > 0: if INFO = +i, U(i,i) is exactly zero. The factorization
+*               has been completed, but the factor U is exactly
+*               singular, and division by zero will occur if it is used
+*               to solve a system of equations.
+*
+*  Further Details
+*  ===============
+*
+*  The band storage scheme is illustrated by the following example, when
+*  M = N = 6, KL = 2, KU = 1:
+*
+*  On entry:                       On exit:
+*
+*      *    *    *    +    +    +       *    *    *   u14  u25  u36
+*      *    *    +    +    +    +       *    *   u13  u24  u35  u46
+*      *   a12  a23  a34  a45  a56      *   u12  u23  u34  u45  u56
+*     a11  a22  a33  a44  a55  a66     u11  u22  u33  u44  u55  u66
+*     a21  a32  a43  a54  a65   *      m21  m32  m43  m54  m65   *
+*     a31  a42  a53  a64   *    *      m31  m42  m53  m64   *    *
+*
+*  Array elements marked * are not used by the routine; elements marked
+*  + need not be set on entry, but are required by the routine to store
+*  elements of U, because of fill-in resulting from the row
+*  interchanges.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J, JP, JU, KM, KV
+*     ..
+*     .. External Functions ..
+      INTEGER            IDAMAX
+      EXTERNAL           IDAMAX
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DGER, DSCAL, DSWAP, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     KV is the number of superdiagonals in the factor U, allowing for
+*     fill-in.
+*
+      KV = KU + KL
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( KL.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( KU.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDAB.LT.KL+KV+1 ) THEN
+         INFO = -6
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DGBTF2', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 )
+     $   RETURN
+*
+*     Gaussian elimination with partial pivoting
+*
+*     Set fill-in elements in columns KU+2 to KV to zero.
+*
+      DO 20 J = KU + 2, MIN( KV, N )
+         DO 10 I = KV - J + 2, KL
+            AB( I, J ) = ZERO
+   10    CONTINUE
+   20 CONTINUE
+*
+*     JU is the index of the last column affected by the current stage
+*     of the factorization.
+*
+      JU = 1
+*
+      DO 40 J = 1, MIN( M, N )
+*
+*        Set fill-in elements in column J+KV to zero.
+*
+         IF( J+KV.LE.N ) THEN
+            DO 30 I = 1, KL
+               AB( I, J+KV ) = ZERO
+   30       CONTINUE
+         END IF
+*
+*        Find pivot and test for singularity. KM is the number of
+*        subdiagonal elements in the current column.
+*
+         KM = MIN( KL, M-J )
+         JP = IDAMAX( KM+1, AB( KV+1, J ), 1 )
+         IPIV( J ) = JP + J - 1
+         IF( AB( KV+JP, J ).NE.ZERO ) THEN
+            JU = MAX( JU, MIN( J+KU+JP-1, N ) )
+*
+*           Apply interchange to columns J to JU.
+*
+            IF( JP.NE.1 )
+     $         CALL DSWAP( JU-J+1, AB( KV+JP, J ), LDAB-1,
+     $                     AB( KV+1, J ), LDAB-1 )
+*
+            IF( KM.GT.0 ) THEN
+*
+*              Compute multipliers.
+*
+               CALL DSCAL( KM, ONE / AB( KV+1, J ), AB( KV+2, J ), 1 )
+*
+*              Update trailing submatrix within the band.
+*
+               IF( JU.GT.J )
+     $            CALL DGER( KM, JU-J, -ONE, AB( KV+2, J ), 1,
+     $                       AB( KV, J+1 ), LDAB-1, AB( KV+1, J+1 ),
+     $                       LDAB-1 )
+            END IF
+         ELSE
+*
+*           If pivot is zero, set INFO to the index of the pivot
+*           unless a zero pivot has already been found.
+*
+            IF( INFO.EQ.0 )
+     $         INFO = J
+         END IF
+   40 CONTINUE
+      RETURN
+*
+*     End of DGBTF2
+*
+      END
diff --git a/costa/native/external/lapack/dgbtrf.f b/costa/native/external/lapack/dgbtrf.f
new file mode 100644
index 000000000..c6e3d0a9c
--- /dev/null
+++ b/costa/native/external/lapack/dgbtrf.f
@@ -0,0 +1,442 @@
+      SUBROUTINE DGBTRF( M, N, KL, KU, AB, LDAB, IPIV, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, KL, KU, LDAB, M, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      DOUBLE PRECISION   AB( LDAB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DGBTRF computes an LU factorization of a real m-by-n band matrix A
+*  using partial pivoting with row interchanges.
+*
+*  This is the blocked version of the algorithm, calling Level 3 BLAS.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  KL      (input) INTEGER
+*          The number of subdiagonals within the band of A.  KL >= 0.
+*
+*  KU      (input) INTEGER
+*          The number of superdiagonals within the band of A.  KU >= 0.
+*
+*  AB      (input/output) DOUBLE PRECISION array, dimension (LDAB,N)
+*          On entry, the matrix A in band storage, in rows KL+1 to
+*          2*KL+KU+1; rows 1 to KL of the array need not be set.
+*          The j-th column of A is stored in the j-th column of the
+*          array AB as follows:
+*          AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl)
+*
+*          On exit, details of the factorization: U is stored as an
+*          upper triangular band matrix with KL+KU superdiagonals in
+*          rows 1 to KL+KU+1, and the multipliers used during the
+*          factorization are stored in rows KL+KU+2 to 2*KL+KU+1.
+*          See below for further details.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= 2*KL+KU+1.
+*
+*  IPIV    (output) INTEGER array, dimension (min(M,N))
+*          The pivot indices; for 1 <= i <= min(M,N), row i of the
+*          matrix was interchanged with row IPIV(i).
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*          > 0: if INFO = +i, U(i,i) is exactly zero. The factorization
+*               has been completed, but the factor U is exactly
+*               singular, and division by zero will occur if it is used
+*               to solve a system of equations.
+*
+*  Further Details
+*  ===============
+*
+*  The band storage scheme is illustrated by the following example, when
+*  M = N = 6, KL = 2, KU = 1:
+*
+*  On entry:                       On exit:
+*
+*      *    *    *    +    +    +       *    *    *   u14  u25  u36
+*      *    *    +    +    +    +       *    *   u13  u24  u35  u46
+*      *   a12  a23  a34  a45  a56      *   u12  u23  u34  u45  u56
+*     a11  a22  a33  a44  a55  a66     u11  u22  u33  u44  u55  u66
+*     a21  a32  a43  a54  a65   *      m21  m32  m43  m54  m65   *
+*     a31  a42  a53  a64   *    *      m31  m42  m53  m64   *    *
+*
+*  Array elements marked * are not used by the routine; elements marked
+*  + need not be set on entry, but are required by the routine to store
+*  elements of U because of fill-in resulting from the row interchanges.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+      INTEGER            NBMAX, LDWORK
+      PARAMETER          ( NBMAX = 64, LDWORK = NBMAX+1 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, I2, I3, II, IP, J, J2, J3, JB, JJ, JM, JP,
+     $                   JU, K2, KM, KV, NB, NW
+      DOUBLE PRECISION   TEMP
+*     ..
+*     .. Local Arrays ..
+      DOUBLE PRECISION   WORK13( LDWORK, NBMAX ),
+     $                   WORK31( LDWORK, NBMAX )
+*     ..
+*     .. External Functions ..
+      INTEGER            IDAMAX, ILAENV
+      EXTERNAL           IDAMAX, ILAENV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DCOPY, DGBTF2, DGEMM, DGER, DLASWP, DSCAL,
+     $                   DSWAP, DTRSM, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     KV is the number of superdiagonals in the factor U, allowing for
+*     fill-in
+*
+      KV = KU + KL
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( KL.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( KU.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDAB.LT.KL+KV+1 ) THEN
+         INFO = -6
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DGBTRF', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 )
+     $   RETURN
+*
+*     Determine the block size for this environment
+*
+      NB = ILAENV( 1, 'DGBTRF', ' ', M, N, KL, KU )
+*
+*     The block size must not exceed the limit set by the size of the
+*     local arrays WORK13 and WORK31.
+*
+      NB = MIN( NB, NBMAX )
+*
+      IF( NB.LE.1 .OR. NB.GT.KL ) THEN
+*
+*        Use unblocked code
+*
+         CALL DGBTF2( M, N, KL, KU, AB, LDAB, IPIV, INFO )
+      ELSE
+*
+*        Use blocked code
+*
+*        Zero the superdiagonal elements of the work array WORK13
+*
+         DO 20 J = 1, NB
+            DO 10 I = 1, J - 1
+               WORK13( I, J ) = ZERO
+   10       CONTINUE
+   20    CONTINUE
+*
+*        Zero the subdiagonal elements of the work array WORK31
+*
+         DO 40 J = 1, NB
+            DO 30 I = J + 1, NB
+               WORK31( I, J ) = ZERO
+   30       CONTINUE
+   40    CONTINUE
+*
+*        Gaussian elimination with partial pivoting
+*
+*        Set fill-in elements in columns KU+2 to KV to zero
+*
+         DO 60 J = KU + 2, MIN( KV, N )
+            DO 50 I = KV - J + 2, KL
+               AB( I, J ) = ZERO
+   50       CONTINUE
+   60    CONTINUE
+*
+*        JU is the index of the last column affected by the current
+*        stage of the factorization
+*
+         JU = 1
+*
+         DO 180 J = 1, MIN( M, N ), NB
+            JB = MIN( NB, MIN( M, N )-J+1 )
+*
+*           The active part of the matrix is partitioned
+*
+*              A11   A12   A13
+*              A21   A22   A23
+*              A31   A32   A33
+*
+*           Here A11, A21 and A31 denote the current block of JB columns
+*           which is about to be factorized. The number of rows in the
+*           partitioning are JB, I2, I3 respectively, and the numbers
+*           of columns are JB, J2, J3. The superdiagonal elements of A13
+*           and the subdiagonal elements of A31 lie outside the band.
+*
+            I2 = MIN( KL-JB, M-J-JB+1 )
+            I3 = MIN( JB, M-J-KL+1 )
+*
+*           J2 and J3 are computed after JU has been updated.
+*
+*           Factorize the current block of JB columns
+*
+            DO 80 JJ = J, J + JB - 1
+*
+*              Set fill-in elements in column JJ+KV to zero
+*
+               IF( JJ+KV.LE.N ) THEN
+                  DO 70 I = 1, KL
+                     AB( I, JJ+KV ) = ZERO
+   70             CONTINUE
+               END IF
+*
+*              Find pivot and test for singularity. KM is the number of
+*              subdiagonal elements in the current column.
+*
+               KM = MIN( KL, M-JJ )
+               JP = IDAMAX( KM+1, AB( KV+1, JJ ), 1 )
+               IPIV( JJ ) = JP + JJ - J
+               IF( AB( KV+JP, JJ ).NE.ZERO ) THEN
+                  JU = MAX( JU, MIN( JJ+KU+JP-1, N ) )
+                  IF( JP.NE.1 ) THEN
+*
+*                    Apply interchange to columns J to J+JB-1
+*
+                     IF( JP+JJ-1.LT.J+KL ) THEN
+*
+                        CALL DSWAP( JB, AB( KV+1+JJ-J, J ), LDAB-1,
+     $                              AB( KV+JP+JJ-J, J ), LDAB-1 )
+                     ELSE
+*
+*                       The interchange affects columns J to JJ-1 of A31
+*                       which are stored in the work array WORK31
+*
+                        CALL DSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,
+     $                              WORK31( JP+JJ-J-KL, 1 ), LDWORK )
+                        CALL DSWAP( J+JB-JJ, AB( KV+1, JJ ), LDAB-1,
+     $                              AB( KV+JP, JJ ), LDAB-1 )
+                     END IF
+                  END IF
+*
+*                 Compute multipliers
+*
+                  CALL DSCAL( KM, ONE / AB( KV+1, JJ ), AB( KV+2, JJ ),
+     $                        1 )
+*
+*                 Update trailing submatrix within the band and within
+*                 the current block. JM is the index of the last column
+*                 which needs to be updated.
+*
+                  JM = MIN( JU, J+JB-1 )
+                  IF( JM.GT.JJ )
+     $               CALL DGER( KM, JM-JJ, -ONE, AB( KV+2, JJ ), 1,
+     $                          AB( KV, JJ+1 ), LDAB-1,
+     $                          AB( KV+1, JJ+1 ), LDAB-1 )
+               ELSE
+*
+*                 If pivot is zero, set INFO to the index of the pivot
+*                 unless a zero pivot has already been found.
+*
+                  IF( INFO.EQ.0 )
+     $               INFO = JJ
+               END IF
+*
+*              Copy current column of A31 into the work array WORK31
+*
+               NW = MIN( JJ-J+1, I3 )
+               IF( NW.GT.0 )
+     $            CALL DCOPY( NW, AB( KV+KL+1-JJ+J, JJ ), 1,
+     $                        WORK31( 1, JJ-J+1 ), 1 )
+   80       CONTINUE
+            IF( J+JB.LE.N ) THEN
+*
+*              Apply the row interchanges to the other blocks.
+*
+               J2 = MIN( JU-J+1, KV ) - JB
+               J3 = MAX( 0, JU-J-KV+1 )
+*
+*              Use DLASWP to apply the row interchanges to A12, A22, and
+*              A32.
+*
+               CALL DLASWP( J2, AB( KV+1-JB, J+JB ), LDAB-1, 1, JB,
+     $                      IPIV( J ), 1 )
+*
+*              Adjust the pivot indices.
+*
+               DO 90 I = J, J + JB - 1
+                  IPIV( I ) = IPIV( I ) + J - 1
+   90          CONTINUE
+*
+*              Apply the row interchanges to A13, A23, and A33
+*              columnwise.
+*
+               K2 = J - 1 + JB + J2
+               DO 110 I = 1, J3
+                  JJ = K2 + I
+                  DO 100 II = J + I - 1, J + JB - 1
+                     IP = IPIV( II )
+                     IF( IP.NE.II ) THEN
+                        TEMP = AB( KV+1+II-JJ, JJ )
+                        AB( KV+1+II-JJ, JJ ) = AB( KV+1+IP-JJ, JJ )
+                        AB( KV+1+IP-JJ, JJ ) = TEMP
+                     END IF
+  100             CONTINUE
+  110          CONTINUE
+*
+*              Update the relevant part of the trailing submatrix
+*
+               IF( J2.GT.0 ) THEN
+*
+*                 Update A12
+*
+                  CALL DTRSM( 'Left', 'Lower', 'No transpose', 'Unit',
+     $                        JB, J2, ONE, AB( KV+1, J ), LDAB-1,
+     $                        AB( KV+1-JB, J+JB ), LDAB-1 )
+*
+                  IF( I2.GT.0 ) THEN
+*
+*                    Update A22
+*
+                     CALL DGEMM( 'No transpose', 'No transpose', I2, J2,
+     $                           JB, -ONE, AB( KV+1+JB, J ), LDAB-1,
+     $                           AB( KV+1-JB, J+JB ), LDAB-1, ONE,
+     $                           AB( KV+1, J+JB ), LDAB-1 )
+                  END IF
+*
+                  IF( I3.GT.0 ) THEN
+*
+*                    Update A32
+*
+                     CALL DGEMM( 'No transpose', 'No transpose', I3, J2,
+     $                           JB, -ONE, WORK31, LDWORK,
+     $                           AB( KV+1-JB, J+JB ), LDAB-1, ONE,
+     $                           AB( KV+KL+1-JB, J+JB ), LDAB-1 )
+                  END IF
+               END IF
+*
+               IF( J3.GT.0 ) THEN
+*
+*                 Copy the lower triangle of A13 into the work array
+*                 WORK13
+*
+                  DO 130 JJ = 1, J3
+                     DO 120 II = JJ, JB
+                        WORK13( II, JJ ) = AB( II-JJ+1, JJ+J+KV-1 )
+  120                CONTINUE
+  130             CONTINUE
+*
+*                 Update A13 in the work array
+*
+                  CALL DTRSM( 'Left', 'Lower', 'No transpose', 'Unit',
+     $                        JB, J3, ONE, AB( KV+1, J ), LDAB-1,
+     $                        WORK13, LDWORK )
+*
+                  IF( I2.GT.0 ) THEN
+*
+*                    Update A23
+*
+                     CALL DGEMM( 'No transpose', 'No transpose', I2, J3,
+     $                           JB, -ONE, AB( KV+1+JB, J ), LDAB-1,
+     $                           WORK13, LDWORK, ONE, AB( 1+JB, J+KV ),
+     $                           LDAB-1 )
+                  END IF
+*
+                  IF( I3.GT.0 ) THEN
+*
+*                    Update A33
+*
+                     CALL DGEMM( 'No transpose', 'No transpose', I3, J3,
+     $                           JB, -ONE, WORK31, LDWORK, WORK13,
+     $                           LDWORK, ONE, AB( 1+KL, J+KV ), LDAB-1 )
+                  END IF
+*
+*                 Copy the lower triangle of A13 back into place
+*
+                  DO 150 JJ = 1, J3
+                     DO 140 II = JJ, JB
+                        AB( II-JJ+1, JJ+J+KV-1 ) = WORK13( II, JJ )
+  140                CONTINUE
+  150             CONTINUE
+               END IF
+            ELSE
+*
+*              Adjust the pivot indices.
+*
+               DO 160 I = J, J + JB - 1
+                  IPIV( I ) = IPIV( I ) + J - 1
+  160          CONTINUE
+            END IF
+*
+*           Partially undo the interchanges in the current block to
+*           restore the upper triangular form of A31 and copy the upper
+*           triangle of A31 back into place
+*
+            DO 170 JJ = J + JB - 1, J, -1
+               JP = IPIV( JJ ) - JJ + 1
+               IF( JP.NE.1 ) THEN
+*
+*                 Apply interchange to columns J to JJ-1
+*
+                  IF( JP+JJ-1.LT.J+KL ) THEN
+*
+*                    The interchange does not affect A31
+*
+                     CALL DSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,
+     $                           AB( KV+JP+JJ-J, J ), LDAB-1 )
+                  ELSE
+*
+*                    The interchange does affect A31
+*
+                     CALL DSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,
+     $                           WORK31( JP+JJ-J-KL, 1 ), LDWORK )
+                  END IF
+               END IF
+*
+*              Copy the current column of A31 back into place
+*
+               NW = MIN( I3, JJ-J+1 )
+               IF( NW.GT.0 )
+     $            CALL DCOPY( NW, WORK31( 1, JJ-J+1 ), 1,
+     $                        AB( KV+KL+1-JJ+J, JJ ), 1 )
+  170       CONTINUE
+  180    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of DGBTRF
+*
+      END
diff --git a/costa/native/external/lapack/dgbtrs.f b/costa/native/external/lapack/dgbtrs.f
new file mode 100644
index 000000000..26fdf91eb
--- /dev/null
+++ b/costa/native/external/lapack/dgbtrs.f
@@ -0,0 +1,187 @@
+      SUBROUTINE DGBTRS( TRANS, N, KL, KU, NRHS, AB, LDAB, IPIV, B, LDB,
+     $                   INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993
+*
+*     .. Scalar Arguments ..
+      CHARACTER          TRANS
+      INTEGER            INFO, KL, KU, LDAB, LDB, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      DOUBLE PRECISION   AB( LDAB, * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DGBTRS solves a system of linear equations
+*     A * X = B  or  A' * X = B
+*  with a general band matrix A using the LU factorization computed
+*  by DGBTRF.
+*
+*  Arguments
+*  =========
+*
+*  TRANS   (input) CHARACTER*1
+*          Specifies the form of the system of equations.
+*          = 'N':  A * X = B  (No transpose)
+*          = 'T':  A'* X = B  (Transpose)
+*          = 'C':  A'* X = B  (Conjugate transpose = Transpose)
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  KL      (input) INTEGER
+*          The number of subdiagonals within the band of A.  KL >= 0.
+*
+*  KU      (input) INTEGER
+*          The number of superdiagonals within the band of A.  KU >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  AB      (input) DOUBLE PRECISION array, dimension (LDAB,N)
+*          Details of the LU factorization of the band matrix A, as
+*          computed by DGBTRF.  U is stored as an upper triangular band
+*          matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, and
+*          the multipliers used during the factorization are stored in
+*          rows KL+KU+2 to 2*KL+KU+1.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= 2*KL+KU+1.
+*
+*  IPIV    (input) INTEGER array, dimension (N)
+*          The pivot indices; for 1 <= i <= N, row i of the matrix was
+*          interchanged with row IPIV(i).
+*
+*  B       (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS)
+*          On entry, the right hand side matrix B.
+*          On exit, the solution matrix X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE
+      PARAMETER          ( ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LNOTI, NOTRAN
+      INTEGER            I, J, KD, L, LM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DGEMV, DGER, DSWAP, DTBSV, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      NOTRAN = LSAME( TRANS, 'N' )
+      IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT.
+     $    LSAME( TRANS, 'C' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( KL.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( KU.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( LDAB.LT.( 2*KL+KU+1 ) ) THEN
+         INFO = -7
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -10
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DGBTRS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 )
+     $   RETURN
+*
+      KD = KU + KL + 1
+      LNOTI = KL.GT.0
+*
+      IF( NOTRAN ) THEN
+*
+*        Solve  A*X = B.
+*
+*        Solve L*X = B, overwriting B with X.
+*
+*        L is represented as a product of permutations and unit lower
+*        triangular matrices L = P(1) * L(1) * ... * P(n-1) * L(n-1),
+*        where each transformation L(i) is a rank-one modification of
+*        the identity matrix.
+*
+         IF( LNOTI ) THEN
+            DO 10 J = 1, N - 1
+               LM = MIN( KL, N-J )
+               L = IPIV( J )
+               IF( L.NE.J )
+     $            CALL DSWAP( NRHS, B( L, 1 ), LDB, B( J, 1 ), LDB )
+               CALL DGER( LM, NRHS, -ONE, AB( KD+1, J ), 1, B( J, 1 ),
+     $                    LDB, B( J+1, 1 ), LDB )
+   10       CONTINUE
+         END IF
+*
+         DO 20 I = 1, NRHS
+*
+*           Solve U*X = B, overwriting B with X.
+*
+            CALL DTBSV( 'Upper', 'No transpose', 'Non-unit', N, KL+KU,
+     $                  AB, LDAB, B( 1, I ), 1 )
+   20    CONTINUE
+*
+      ELSE
+*
+*        Solve A'*X = B.
+*
+         DO 30 I = 1, NRHS
+*
+*           Solve U'*X = B, overwriting B with X.
+*
+            CALL DTBSV( 'Upper', 'Transpose', 'Non-unit', N, KL+KU, AB,
+     $                  LDAB, B( 1, I ), 1 )
+   30    CONTINUE
+*
+*        Solve L'*X = B, overwriting B with X.
+*
+         IF( LNOTI ) THEN
+            DO 40 J = N - 1, 1, -1
+               LM = MIN( KL, N-J )
+               CALL DGEMV( 'Transpose', LM, NRHS, -ONE, B( J+1, 1 ),
+     $                     LDB, AB( KD+1, J ), 1, ONE, B( J, 1 ), LDB )
+               L = IPIV( J )
+               IF( L.NE.J )
+     $            CALL DSWAP( NRHS, B( L, 1 ), LDB, B( J, 1 ), LDB )
+   40       CONTINUE
+         END IF
+      END IF
+      RETURN
+*
+*     End of DGBTRS
+*
+      END
diff --git a/costa/native/external/lapack/dgebak.f b/costa/native/external/lapack/dgebak.f
new file mode 100644
index 000000000..7604639e2
--- /dev/null
+++ b/costa/native/external/lapack/dgebak.f
@@ -0,0 +1,189 @@
+      SUBROUTINE DGEBAK( JOB, SIDE, N, ILO, IHI, SCALE, M, V, LDV,
+     $                   INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOB, SIDE
+      INTEGER            IHI, ILO, INFO, LDV, M, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   SCALE( * ), V( LDV, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DGEBAK forms the right or left eigenvectors of a real general matrix
+*  by backward transformation on the computed eigenvectors of the
+*  balanced matrix output by DGEBAL.
+*
+*  Arguments
+*  =========
+*
+*  JOB     (input) CHARACTER*1
+*          Specifies the type of backward transformation required:
+*          = 'N', do nothing, return immediately;
+*          = 'P', do backward transformation for permutation only;
+*          = 'S', do backward transformation for scaling only;
+*          = 'B', do backward transformations for both permutation and
+*                 scaling.
+*          JOB must be the same as the argument JOB supplied to DGEBAL.
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'R':  V contains right eigenvectors;
+*          = 'L':  V contains left eigenvectors.
+*
+*  N       (input) INTEGER
+*          The number of rows of the matrix V.  N >= 0.
+*
+*  ILO     (input) INTEGER
+*  IHI     (input) INTEGER
+*          The integers ILO and IHI determined by DGEBAL.
+*          1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
+*
+*  SCALE   (input) DOUBLE PRECISION array, dimension (N)
+*          Details of the permutation and scaling factors, as returned
+*          by DGEBAL.
+*
+*  M       (input) INTEGER
+*          The number of columns of the matrix V.  M >= 0.
+*
+*  V       (input/output) DOUBLE PRECISION array, dimension (LDV,M)
+*          On entry, the matrix of right or left eigenvectors to be
+*          transformed, as returned by DHSEIN or DTREVC.
+*          On exit, V is overwritten by the transformed eigenvectors.
+*
+*  LDV     (input) INTEGER
+*          The leading dimension of the array V. LDV >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE
+      PARAMETER          ( ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LEFTV, RIGHTV
+      INTEGER            I, II, K
+      DOUBLE PRECISION   S
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DSCAL, DSWAP, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode and Test the input parameters
+*
+      RIGHTV = LSAME( SIDE, 'R' )
+      LEFTV = LSAME( SIDE, 'L' )
+*
+      INFO = 0
+      IF( .NOT.LSAME( JOB, 'N' ) .AND. .NOT.LSAME( JOB, 'P' ) .AND.
+     $    .NOT.LSAME( JOB, 'S' ) .AND. .NOT.LSAME( JOB, 'B' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.RIGHTV .AND. .NOT.LEFTV ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, N ) ) THEN
+         INFO = -4
+      ELSE IF( IHI.LT.MIN( ILO, N ) .OR. IHI.GT.N ) THEN
+         INFO = -5
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -7
+      ELSE IF( LDV.LT.MAX( 1, N ) ) THEN
+         INFO = -9
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DGEBAK', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+      IF( M.EQ.0 )
+     $   RETURN
+      IF( LSAME( JOB, 'N' ) )
+     $   RETURN
+*
+      IF( ILO.EQ.IHI )
+     $   GO TO 30
+*
+*     Backward balance
+*
+      IF( LSAME( JOB, 'S' ) .OR. LSAME( JOB, 'B' ) ) THEN
+*
+         IF( RIGHTV ) THEN
+            DO 10 I = ILO, IHI
+               S = SCALE( I )
+               CALL DSCAL( M, S, V( I, 1 ), LDV )
+   10       CONTINUE
+         END IF
+*
+         IF( LEFTV ) THEN
+            DO 20 I = ILO, IHI
+               S = ONE / SCALE( I )
+               CALL DSCAL( M, S, V( I, 1 ), LDV )
+   20       CONTINUE
+         END IF
+*
+      END IF
+*
+*     Backward permutation
+*
+*     For  I = ILO-1 step -1 until 1,
+*              IHI+1 step 1 until N do --
+*
+   30 CONTINUE
+      IF( LSAME( JOB, 'P' ) .OR. LSAME( JOB, 'B' ) ) THEN
+         IF( RIGHTV ) THEN
+            DO 40 II = 1, N
+               I = II
+               IF( I.GE.ILO .AND. I.LE.IHI )
+     $            GO TO 40
+               IF( I.LT.ILO )
+     $            I = ILO - II
+               K = SCALE( I )
+               IF( K.EQ.I )
+     $            GO TO 40
+               CALL DSWAP( M, V( I, 1 ), LDV, V( K, 1 ), LDV )
+   40       CONTINUE
+         END IF
+*
+         IF( LEFTV ) THEN
+            DO 50 II = 1, N
+               I = II
+               IF( I.GE.ILO .AND. I.LE.IHI )
+     $            GO TO 50
+               IF( I.LT.ILO )
+     $            I = ILO - II
+               K = SCALE( I )
+               IF( K.EQ.I )
+     $            GO TO 50
+               CALL DSWAP( M, V( I, 1 ), LDV, V( K, 1 ), LDV )
+   50       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of DGEBAK
+*
+      END
diff --git a/costa/native/external/lapack/dgebal.f b/costa/native/external/lapack/dgebal.f
new file mode 100644
index 000000000..63e0d1dfe
--- /dev/null
+++ b/costa/native/external/lapack/dgebal.f
@@ -0,0 +1,323 @@
+      SUBROUTINE DGEBAL( JOB, N, A, LDA, ILO, IHI, SCALE, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOB
+      INTEGER            IHI, ILO, INFO, LDA, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), SCALE( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DGEBAL balances a general real matrix A.  This involves, first,
+*  permuting A by a similarity transformation to isolate eigenvalues
+*  in the first 1 to ILO-1 and last IHI+1 to N elements on the
+*  diagonal; and second, applying a diagonal similarity transformation
+*  to rows and columns ILO to IHI to make the rows and columns as
+*  close in norm as possible.  Both steps are optional.
+*
+*  Balancing may reduce the 1-norm of the matrix, and improve the
+*  accuracy of the computed eigenvalues and/or eigenvectors.
+*
+*  Arguments
+*  =========
+*
+*  JOB     (input) CHARACTER*1
+*          Specifies the operations to be performed on A:
+*          = 'N':  none:  simply set ILO = 1, IHI = N, SCALE(I) = 1.0
+*                  for i = 1,...,N;
+*          = 'P':  permute only;
+*          = 'S':  scale only;
+*          = 'B':  both permute and scale.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+*          On entry, the input matrix A.
+*          On exit,  A is overwritten by the balanced matrix.
+*          If JOB = 'N', A is not referenced.
+*          See Further Details.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  ILO     (output) INTEGER
+*  IHI     (output) INTEGER
+*          ILO and IHI are set to integers such that on exit
+*          A(i,j) = 0 if i > j and j = 1,...,ILO-1 or I = IHI+1,...,N.
+*          If JOB = 'N' or 'S', ILO = 1 and IHI = N.
+*
+*  SCALE   (output) DOUBLE PRECISION array, dimension (N)
+*          Details of the permutations and scaling factors applied to
+*          A.  If P(j) is the index of the row and column interchanged
+*          with row and column j and D(j) is the scaling factor
+*          applied to row and column j, then
+*          SCALE(j) = P(j)    for j = 1,...,ILO-1
+*                   = D(j)    for j = ILO,...,IHI
+*                   = P(j)    for j = IHI+1,...,N.
+*          The order in which the interchanges are made is N to IHI+1,
+*          then 1 to ILO-1.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*
+*  Further Details
+*  ===============
+*
+*  The permutations consist of row and column interchanges which put
+*  the matrix in the form
+*
+*             ( T1   X   Y  )
+*     P A P = (  0   B   Z  )
+*             (  0   0   T2 )
+*
+*  where T1 and T2 are upper triangular matrices whose eigenvalues lie
+*  along the diagonal.  The column indices ILO and IHI mark the starting
+*  and ending columns of the submatrix B. Balancing consists of applying
+*  a diagonal similarity transformation inv(D) * B * D to make the
+*  1-norms of each row of B and its corresponding column nearly equal.
+*  The output matrix is
+*
+*     ( T1     X*D          Y    )
+*     (  0  inv(D)*B*D  inv(D)*Z ).
+*     (  0      0           T2   )
+*
+*  Information about the permutations P and the diagonal matrix D is
+*  returned in the vector SCALE.
+*
+*  This subroutine is based on the EISPACK routine BALANC.
+*
+*  Modified by Tzu-Yi Chen, Computer Science Division, University of
+*    California at Berkeley, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+      DOUBLE PRECISION   SCLFAC
+      PARAMETER          ( SCLFAC = 0.8D+1 )
+      DOUBLE PRECISION   FACTOR
+      PARAMETER          ( FACTOR = 0.95D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOCONV
+      INTEGER            I, ICA, IEXC, IRA, J, K, L, M
+      DOUBLE PRECISION   C, CA, F, G, R, RA, S, SFMAX1, SFMAX2, SFMIN1,
+     $                   SFMIN2
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            IDAMAX
+      DOUBLE PRECISION   DLAMCH
+      EXTERNAL           LSAME, IDAMAX, DLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DSCAL, DSWAP, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters
+*
+      INFO = 0
+      IF( .NOT.LSAME( JOB, 'N' ) .AND. .NOT.LSAME( JOB, 'P' ) .AND.
+     $    .NOT.LSAME( JOB, 'S' ) .AND. .NOT.LSAME( JOB, 'B' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DGEBAL', -INFO )
+         RETURN
+      END IF
+*
+      K = 1
+      L = N
+*
+      IF( N.EQ.0 )
+     $   GO TO 210
+*
+      IF( LSAME( JOB, 'N' ) ) THEN
+         DO 10 I = 1, N
+            SCALE( I ) = ONE
+   10    CONTINUE
+         GO TO 210
+      END IF
+*
+      IF( LSAME( JOB, 'S' ) )
+     $   GO TO 120
+*
+*     Permutation to isolate eigenvalues if possible
+*
+      GO TO 50
+*
+*     Row and column exchange.
+*
+   20 CONTINUE
+      SCALE( M ) = J
+      IF( J.EQ.M )
+     $   GO TO 30
+*
+      CALL DSWAP( L, A( 1, J ), 1, A( 1, M ), 1 )
+      CALL DSWAP( N-K+1, A( J, K ), LDA, A( M, K ), LDA )
+*
+   30 CONTINUE
+      GO TO ( 40, 80 )IEXC
+*
+*     Search for rows isolating an eigenvalue and push them down.
+*
+   40 CONTINUE
+      IF( L.EQ.1 )
+     $   GO TO 210
+      L = L - 1
+*
+   50 CONTINUE
+      DO 70 J = L, 1, -1
+*
+         DO 60 I = 1, L
+            IF( I.EQ.J )
+     $         GO TO 60
+            IF( A( J, I ).NE.ZERO )
+     $         GO TO 70
+   60    CONTINUE
+*
+         M = L
+         IEXC = 1
+         GO TO 20
+   70 CONTINUE
+*
+      GO TO 90
+*
+*     Search for columns isolating an eigenvalue and push them left.
+*
+   80 CONTINUE
+      K = K + 1
+*
+   90 CONTINUE
+      DO 110 J = K, L
+*
+         DO 100 I = K, L
+            IF( I.EQ.J )
+     $         GO TO 100
+            IF( A( I, J ).NE.ZERO )
+     $         GO TO 110
+  100    CONTINUE
+*
+         M = K
+         IEXC = 2
+         GO TO 20
+  110 CONTINUE
+*
+  120 CONTINUE
+      DO 130 I = K, L
+         SCALE( I ) = ONE
+  130 CONTINUE
+*
+      IF( LSAME( JOB, 'P' ) )
+     $   GO TO 210
+*
+*     Balance the submatrix in rows K to L.
+*
+*     Iterative loop for norm reduction
+*
+      SFMIN1 = DLAMCH( 'S' ) / DLAMCH( 'P' )
+      SFMAX1 = ONE / SFMIN1
+      SFMIN2 = SFMIN1*SCLFAC
+      SFMAX2 = ONE / SFMIN2
+  140 CONTINUE
+      NOCONV = .FALSE.
+*
+      DO 200 I = K, L
+         C = ZERO
+         R = ZERO
+*
+         DO 150 J = K, L
+            IF( J.EQ.I )
+     $         GO TO 150
+            C = C + ABS( A( J, I ) )
+            R = R + ABS( A( I, J ) )
+  150    CONTINUE
+         ICA = IDAMAX( L, A( 1, I ), 1 )
+         CA = ABS( A( ICA, I ) )
+         IRA = IDAMAX( N-K+1, A( I, K ), LDA )
+         RA = ABS( A( I, IRA+K-1 ) )
+*
+*        Guard against zero C or R due to underflow.
+*
+         IF( C.EQ.ZERO .OR. R.EQ.ZERO )
+     $      GO TO 200
+         G = R / SCLFAC
+         F = ONE
+         S = C + R
+  160    CONTINUE
+         IF( C.GE.G .OR. MAX( F, C, CA ).GE.SFMAX2 .OR.
+     $       MIN( R, G, RA ).LE.SFMIN2 )GO TO 170
+         F = F*SCLFAC
+         C = C*SCLFAC
+         CA = CA*SCLFAC
+         R = R / SCLFAC
+         G = G / SCLFAC
+         RA = RA / SCLFAC
+         GO TO 160
+*
+  170    CONTINUE
+         G = C / SCLFAC
+  180    CONTINUE
+         IF( G.LT.R .OR. MAX( R, RA ).GE.SFMAX2 .OR.
+     $       MIN( F, C, G, CA ).LE.SFMIN2 )GO TO 190
+         F = F / SCLFAC
+         C = C / SCLFAC
+         G = G / SCLFAC
+         CA = CA / SCLFAC
+         R = R*SCLFAC
+         RA = RA*SCLFAC
+         GO TO 180
+*
+*        Now balance.
+*
+  190    CONTINUE
+         IF( ( C+R ).GE.FACTOR*S )
+     $      GO TO 200
+         IF( F.LT.ONE .AND. SCALE( I ).LT.ONE ) THEN
+            IF( F*SCALE( I ).LE.SFMIN1 )
+     $         GO TO 200
+         END IF
+         IF( F.GT.ONE .AND. SCALE( I ).GT.ONE ) THEN
+            IF( SCALE( I ).GE.SFMAX1 / F )
+     $         GO TO 200
+         END IF
+         G = ONE / F
+         SCALE( I ) = SCALE( I )*F
+         NOCONV = .TRUE.
+*
+         CALL DSCAL( N-K+1, G, A( I, K ), LDA )
+         CALL DSCAL( L, F, A( 1, I ), 1 )
+*
+  200 CONTINUE
+*
+      IF( NOCONV )
+     $   GO TO 140
+*
+  210 CONTINUE
+      ILO = K
+      IHI = L
+*
+      RETURN
+*
+*     End of DGEBAL
+*
+      END
diff --git a/costa/native/external/lapack/dgebd2.f b/costa/native/external/lapack/dgebd2.f
new file mode 100644
index 000000000..d4ed81f8b
--- /dev/null
+++ b/costa/native/external/lapack/dgebd2.f
@@ -0,0 +1,238 @@
+      SUBROUTINE DGEBD2( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, M, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), D( * ), E( * ), TAUP( * ),
+     $                   TAUQ( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DGEBD2 reduces a real general m by n matrix A to upper or lower
+*  bidiagonal form B by an orthogonal transformation: Q' * A * P = B.
+*
+*  If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows in the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns in the matrix A.  N >= 0.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+*          On entry, the m by n general matrix to be reduced.
+*          On exit,
+*          if m >= n, the diagonal and the first superdiagonal are
+*            overwritten with the upper bidiagonal matrix B; the
+*            elements below the diagonal, with the array TAUQ, represent
+*            the orthogonal matrix Q as a product of elementary
+*            reflectors, and the elements above the first superdiagonal,
+*            with the array TAUP, represent the orthogonal matrix P as
+*            a product of elementary reflectors;
+*          if m < n, the diagonal and the first subdiagonal are
+*            overwritten with the lower bidiagonal matrix B; the
+*            elements below the first subdiagonal, with the array TAUQ,
+*            represent the orthogonal matrix Q as a product of
+*            elementary reflectors, and the elements above the diagonal,
+*            with the array TAUP, represent the orthogonal matrix P as
+*            a product of elementary reflectors.
+*          See Further Details.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  D       (output) DOUBLE PRECISION array, dimension (min(M,N))
+*          The diagonal elements of the bidiagonal matrix B:
+*          D(i) = A(i,i).
+*
+*  E       (output) DOUBLE PRECISION array, dimension (min(M,N)-1)
+*          The off-diagonal elements of the bidiagonal matrix B:
+*          if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1;
+*          if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1.
+*
+*  TAUQ    (output) DOUBLE PRECISION array dimension (min(M,N))
+*          The scalar factors of the elementary reflectors which
+*          represent the orthogonal matrix Q. See Further Details.
+*
+*  TAUP    (output) DOUBLE PRECISION array, dimension (min(M,N))
+*          The scalar factors of the elementary reflectors which
+*          represent the orthogonal matrix P. See Further Details.
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (max(M,N))
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit.
+*          < 0: if INFO = -i, the i-th argument had an illegal value.
+*
+*  Further Details
+*  ===============
+*
+*  The matrices Q and P are represented as products of elementary
+*  reflectors:
+*
+*  If m >= n,
+*
+*     Q = H(1) H(2) . . . H(n)  and  P = G(1) G(2) . . . G(n-1)
+*
+*  Each H(i) and G(i) has the form:
+*
+*     H(i) = I - tauq * v * v'  and G(i) = I - taup * u * u'
+*
+*  where tauq and taup are real scalars, and v and u are real vectors;
+*  v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in A(i+1:m,i);
+*  u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in A(i,i+2:n);
+*  tauq is stored in TAUQ(i) and taup in TAUP(i).
+*
+*  If m < n,
+*
+*     Q = H(1) H(2) . . . H(m-1)  and  P = G(1) G(2) . . . G(m)
+*
+*  Each H(i) and G(i) has the form:
+*
+*     H(i) = I - tauq * v * v'  and G(i) = I - taup * u * u'
+*
+*  where tauq and taup are real scalars, and v and u are real vectors;
+*  v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i);
+*  u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n);
+*  tauq is stored in TAUQ(i) and taup in TAUP(i).
+*
+*  The contents of A on exit are illustrated by the following examples:
+*
+*  m = 6 and n = 5 (m > n):          m = 5 and n = 6 (m < n):
+*
+*    (  d   e   u1  u1  u1 )           (  d   u1  u1  u1  u1  u1 )
+*    (  v1  d   e   u2  u2 )           (  e   d   u2  u2  u2  u2 )
+*    (  v1  v2  d   e   u3 )           (  v1  e   d   u3  u3  u3 )
+*    (  v1  v2  v3  d   e  )           (  v1  v2  e   d   u4  u4 )
+*    (  v1  v2  v3  v4  d  )           (  v1  v2  v3  e   d   u5 )
+*    (  v1  v2  v3  v4  v5 )
+*
+*  where d and e denote diagonal and off-diagonal elements of B, vi
+*  denotes an element of the vector defining H(i), and ui an element of
+*  the vector defining G(i).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLARF, DLARFG, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters
+*
+      INFO = 0
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.LT.0 ) THEN
+         CALL XERBLA( 'DGEBD2', -INFO )
+         RETURN
+      END IF
+*
+      IF( M.GE.N ) THEN
+*
+*        Reduce to upper bidiagonal form
+*
+         DO 10 I = 1, N
+*
+*           Generate elementary reflector H(i) to annihilate A(i+1:m,i)
+*
+            CALL DLARFG( M-I+1, A( I, I ), A( MIN( I+1, M ), I ), 1,
+     $                   TAUQ( I ) )
+            D( I ) = A( I, I )
+            A( I, I ) = ONE
+*
+*           Apply H(i) to A(i:m,i+1:n) from the left
+*
+            CALL DLARF( 'Left', M-I+1, N-I, A( I, I ), 1, TAUQ( I ),
+     $                  A( I, I+1 ), LDA, WORK )
+            A( I, I ) = D( I )
+*
+            IF( I.LT.N ) THEN
+*
+*              Generate elementary reflector G(i) to annihilate
+*              A(i,i+2:n)
+*
+               CALL DLARFG( N-I, A( I, I+1 ), A( I, MIN( I+2, N ) ),
+     $                      LDA, TAUP( I ) )
+               E( I ) = A( I, I+1 )
+               A( I, I+1 ) = ONE
+*
+*              Apply G(i) to A(i+1:m,i+1:n) from the right
+*
+               CALL DLARF( 'Right', M-I, N-I, A( I, I+1 ), LDA,
+     $                     TAUP( I ), A( I+1, I+1 ), LDA, WORK )
+               A( I, I+1 ) = E( I )
+            ELSE
+               TAUP( I ) = ZERO
+            END IF
+   10    CONTINUE
+      ELSE
+*
+*        Reduce to lower bidiagonal form
+*
+         DO 20 I = 1, M
+*
+*           Generate elementary reflector G(i) to annihilate A(i,i+1:n)
+*
+            CALL DLARFG( N-I+1, A( I, I ), A( I, MIN( I+1, N ) ), LDA,
+     $                   TAUP( I ) )
+            D( I ) = A( I, I )
+            A( I, I ) = ONE
+*
+*           Apply G(i) to A(i+1:m,i:n) from the right
+*
+            CALL DLARF( 'Right', M-I, N-I+1, A( I, I ), LDA, TAUP( I ),
+     $                  A( MIN( I+1, M ), I ), LDA, WORK )
+            A( I, I ) = D( I )
+*
+            IF( I.LT.M ) THEN
+*
+*              Generate elementary reflector H(i) to annihilate
+*              A(i+2:m,i)
+*
+               CALL DLARFG( M-I, A( I+1, I ), A( MIN( I+2, M ), I ), 1,
+     $                      TAUQ( I ) )
+               E( I ) = A( I+1, I )
+               A( I+1, I ) = ONE
+*
+*              Apply H(i) to A(i+1:m,i+1:n) from the left
+*
+               CALL DLARF( 'Left', M-I, N-I, A( I+1, I ), 1, TAUQ( I ),
+     $                     A( I+1, I+1 ), LDA, WORK )
+               A( I+1, I ) = E( I )
+            ELSE
+               TAUQ( I ) = ZERO
+            END IF
+   20    CONTINUE
+      END IF
+      RETURN
+*
+*     End of DGEBD2
+*
+      END
diff --git a/costa/native/external/lapack/dgebrd.f b/costa/native/external/lapack/dgebrd.f
new file mode 100644
index 000000000..75184b846
--- /dev/null
+++ b/costa/native/external/lapack/dgebrd.f
@@ -0,0 +1,269 @@
+      SUBROUTINE DGEBRD( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, LWORK,
+     $                   INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, LWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), D( * ), E( * ), TAUP( * ),
+     $                   TAUQ( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DGEBRD reduces a general real M-by-N matrix A to upper or lower
+*  bidiagonal form B by an orthogonal transformation: Q**T * A * P = B.
+*
+*  If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows in the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns in the matrix A.  N >= 0.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+*          On entry, the M-by-N general matrix to be reduced.
+*          On exit,
+*          if m >= n, the diagonal and the first superdiagonal are
+*            overwritten with the upper bidiagonal matrix B; the
+*            elements below the diagonal, with the array TAUQ, represent
+*            the orthogonal matrix Q as a product of elementary
+*            reflectors, and the elements above the first superdiagonal,
+*            with the array TAUP, represent the orthogonal matrix P as
+*            a product of elementary reflectors;
+*          if m < n, the diagonal and the first subdiagonal are
+*            overwritten with the lower bidiagonal matrix B; the
+*            elements below the first subdiagonal, with the array TAUQ,
+*            represent the orthogonal matrix Q as a product of
+*            elementary reflectors, and the elements above the diagonal,
+*            with the array TAUP, represent the orthogonal matrix P as
+*            a product of elementary reflectors.
+*          See Further Details.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  D       (output) DOUBLE PRECISION array, dimension (min(M,N))
+*          The diagonal elements of the bidiagonal matrix B:
+*          D(i) = A(i,i).
+*
+*  E       (output) DOUBLE PRECISION array, dimension (min(M,N)-1)
+*          The off-diagonal elements of the bidiagonal matrix B:
+*          if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1;
+*          if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1.
+*
+*  TAUQ    (output) DOUBLE PRECISION array dimension (min(M,N))
+*          The scalar factors of the elementary reflectors which
+*          represent the orthogonal matrix Q. See Further Details.
+*
+*  TAUP    (output) DOUBLE PRECISION array, dimension (min(M,N))
+*          The scalar factors of the elementary reflectors which
+*          represent the orthogonal matrix P. See Further Details.
+*
+*  WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The length of the array WORK.  LWORK >= max(1,M,N).
+*          For optimum performance LWORK >= (M+N)*NB, where NB
+*          is the optimal blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*
+*  Further Details
+*  ===============
+*
+*  The matrices Q and P are represented as products of elementary
+*  reflectors:
+*
+*  If m >= n,
+*
+*     Q = H(1) H(2) . . . H(n)  and  P = G(1) G(2) . . . G(n-1)
+*
+*  Each H(i) and G(i) has the form:
+*
+*     H(i) = I - tauq * v * v'  and G(i) = I - taup * u * u'
+*
+*  where tauq and taup are real scalars, and v and u are real vectors;
+*  v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in A(i+1:m,i);
+*  u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in A(i,i+2:n);
+*  tauq is stored in TAUQ(i) and taup in TAUP(i).
+*
+*  If m < n,
+*
+*     Q = H(1) H(2) . . . H(m-1)  and  P = G(1) G(2) . . . G(m)
+*
+*  Each H(i) and G(i) has the form:
+*
+*     H(i) = I - tauq * v * v'  and G(i) = I - taup * u * u'
+*
+*  where tauq and taup are real scalars, and v and u are real vectors;
+*  v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i);
+*  u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n);
+*  tauq is stored in TAUQ(i) and taup in TAUP(i).
+*
+*  The contents of A on exit are illustrated by the following examples:
+*
+*  m = 6 and n = 5 (m > n):          m = 5 and n = 6 (m < n):
+*
+*    (  d   e   u1  u1  u1 )           (  d   u1  u1  u1  u1  u1 )
+*    (  v1  d   e   u2  u2 )           (  e   d   u2  u2  u2  u2 )
+*    (  v1  v2  d   e   u3 )           (  v1  e   d   u3  u3  u3 )
+*    (  v1  v2  v3  d   e  )           (  v1  v2  e   d   u4  u4 )
+*    (  v1  v2  v3  v4  d  )           (  v1  v2  v3  e   d   u5 )
+*    (  v1  v2  v3  v4  v5 )
+*
+*  where d and e denote diagonal and off-diagonal elements of B, vi
+*  denotes an element of the vector defining H(i), and ui an element of
+*  the vector defining G(i).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE
+      PARAMETER          ( ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            I, IINFO, J, LDWRKX, LDWRKY, LWKOPT, MINMN, NB,
+     $                   NBMIN, NX
+      DOUBLE PRECISION   WS
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DGEBD2, DGEMM, DLABRD, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          DBLE, MAX, MIN
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters
+*
+      INFO = 0
+      NB = MAX( 1, ILAENV( 1, 'DGEBRD', ' ', M, N, -1, -1 ) )
+      LWKOPT = ( M+N )*NB
+      WORK( 1 ) = DBLE( LWKOPT )
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -4
+      ELSE IF( LWORK.LT.MAX( 1, M, N ) .AND. .NOT.LQUERY ) THEN
+         INFO = -10
+      END IF
+      IF( INFO.LT.0 ) THEN
+         CALL XERBLA( 'DGEBRD', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      MINMN = MIN( M, N )
+      IF( MINMN.EQ.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      WS = MAX( M, N )
+      LDWRKX = M
+      LDWRKY = N
+*
+      IF( NB.GT.1 .AND. NB.LT.MINMN ) THEN
+*
+*        Set the crossover point NX.
+*
+         NX = MAX( NB, ILAENV( 3, 'DGEBRD', ' ', M, N, -1, -1 ) )
+*
+*        Determine when to switch from blocked to unblocked code.
+*
+         IF( NX.LT.MINMN ) THEN
+            WS = ( M+N )*NB
+            IF( LWORK.LT.WS ) THEN
+*
+*              Not enough work space for the optimal NB, consider using
+*              a smaller block size.
+*
+               NBMIN = ILAENV( 2, 'DGEBRD', ' ', M, N, -1, -1 )
+               IF( LWORK.GE.( M+N )*NBMIN ) THEN
+                  NB = LWORK / ( M+N )
+               ELSE
+                  NB = 1
+                  NX = MINMN
+               END IF
+            END IF
+         END IF
+      ELSE
+         NX = MINMN
+      END IF
+*
+      DO 30 I = 1, MINMN - NX, NB
+*
+*        Reduce rows and columns i:i+nb-1 to bidiagonal form and return
+*        the matrices X and Y which are needed to update the unreduced
+*        part of the matrix
+*
+         CALL DLABRD( M-I+1, N-I+1, NB, A( I, I ), LDA, D( I ), E( I ),
+     $                TAUQ( I ), TAUP( I ), WORK, LDWRKX,
+     $                WORK( LDWRKX*NB+1 ), LDWRKY )
+*
+*        Update the trailing submatrix A(i+nb:m,i+nb:n), using an update
+*        of the form  A := A - V*Y' - X*U'
+*
+         CALL DGEMM( 'No transpose', 'Transpose', M-I-NB+1, N-I-NB+1,
+     $               NB, -ONE, A( I+NB, I ), LDA,
+     $               WORK( LDWRKX*NB+NB+1 ), LDWRKY, ONE,
+     $               A( I+NB, I+NB ), LDA )
+         CALL DGEMM( 'No transpose', 'No transpose', M-I-NB+1, N-I-NB+1,
+     $               NB, -ONE, WORK( NB+1 ), LDWRKX, A( I, I+NB ), LDA,
+     $               ONE, A( I+NB, I+NB ), LDA )
+*
+*        Copy diagonal and off-diagonal elements of B back into A
+*
+         IF( M.GE.N ) THEN
+            DO 10 J = I, I + NB - 1
+               A( J, J ) = D( J )
+               A( J, J+1 ) = E( J )
+   10       CONTINUE
+         ELSE
+            DO 20 J = I, I + NB - 1
+               A( J, J ) = D( J )
+               A( J+1, J ) = E( J )
+   20       CONTINUE
+         END IF
+   30 CONTINUE
+*
+*     Use unblocked code to reduce the remainder of the matrix
+*
+      CALL DGEBD2( M-I+1, N-I+1, A( I, I ), LDA, D( I ), E( I ),
+     $             TAUQ( I ), TAUP( I ), WORK, IINFO )
+      WORK( 1 ) = WS
+      RETURN
+*
+*     End of DGEBRD
+*
+      END
diff --git a/costa/native/external/lapack/dgecon.f b/costa/native/external/lapack/dgecon.f
new file mode 100644
index 000000000..f6bd485f4
--- /dev/null
+++ b/costa/native/external/lapack/dgecon.f
@@ -0,0 +1,181 @@
+      SUBROUTINE DGECON( NORM, N, A, LDA, ANORM, RCOND, WORK, IWORK,
+     $                   INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          NORM
+      INTEGER            INFO, LDA, N
+      DOUBLE PRECISION   ANORM, RCOND
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      DOUBLE PRECISION   A( LDA, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DGECON estimates the reciprocal of the condition number of a general
+*  real matrix A, in either the 1-norm or the infinity-norm, using
+*  the LU factorization computed by DGETRF.
+*
+*  An estimate is obtained for norm(inv(A)), and the reciprocal of the
+*  condition number is computed as
+*     RCOND = 1 / ( norm(A) * norm(inv(A)) ).
+*
+*  Arguments
+*  =========
+*
+*  NORM    (input) CHARACTER*1
+*          Specifies whether the 1-norm condition number or the
+*          infinity-norm condition number is required:
+*          = '1' or 'O':  1-norm;
+*          = 'I':         Infinity-norm.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input) DOUBLE PRECISION array, dimension (LDA,N)
+*          The factors L and U from the factorization A = P*L*U
+*          as computed by DGETRF.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  ANORM   (input) DOUBLE PRECISION
+*          If NORM = '1' or 'O', the 1-norm of the original matrix A.
+*          If NORM = 'I', the infinity-norm of the original matrix A.
+*
+*  RCOND   (output) DOUBLE PRECISION
+*          The reciprocal of the condition number of the matrix A,
+*          computed as RCOND = 1/(norm(A) * norm(inv(A))).
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (4*N)
+*
+*  IWORK   (workspace) INTEGER array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            ONENRM
+      CHARACTER          NORMIN
+      INTEGER            IX, KASE, KASE1
+      DOUBLE PRECISION   AINVNM, SCALE, SL, SMLNUM, SU
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            IDAMAX
+      DOUBLE PRECISION   DLAMCH
+      EXTERNAL           LSAME, IDAMAX, DLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLACON, DLATRS, DRSCL, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      ONENRM = NORM.EQ.'1' .OR. LSAME( NORM, 'O' )
+      IF( .NOT.ONENRM .AND. .NOT.LSAME( NORM, 'I' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -4
+      ELSE IF( ANORM.LT.ZERO ) THEN
+         INFO = -5
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DGECON', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      RCOND = ZERO
+      IF( N.EQ.0 ) THEN
+         RCOND = ONE
+         RETURN
+      ELSE IF( ANORM.EQ.ZERO ) THEN
+         RETURN
+      END IF
+*
+      SMLNUM = DLAMCH( 'Safe minimum' )
+*
+*     Estimate the norm of inv(A).
+*
+      AINVNM = ZERO
+      NORMIN = 'N'
+      IF( ONENRM ) THEN
+         KASE1 = 1
+      ELSE
+         KASE1 = 2
+      END IF
+      KASE = 0
+   10 CONTINUE
+      CALL DLACON( N, WORK( N+1 ), WORK, IWORK, AINVNM, KASE )
+      IF( KASE.NE.0 ) THEN
+         IF( KASE.EQ.KASE1 ) THEN
+*
+*           Multiply by inv(L).
+*
+            CALL DLATRS( 'Lower', 'No transpose', 'Unit', NORMIN, N, A,
+     $                   LDA, WORK, SL, WORK( 2*N+1 ), INFO )
+*
+*           Multiply by inv(U).
+*
+            CALL DLATRS( 'Upper', 'No transpose', 'Non-unit', NORMIN, N,
+     $                   A, LDA, WORK, SU, WORK( 3*N+1 ), INFO )
+         ELSE
+*
+*           Multiply by inv(U').
+*
+            CALL DLATRS( 'Upper', 'Transpose', 'Non-unit', NORMIN, N, A,
+     $                   LDA, WORK, SU, WORK( 3*N+1 ), INFO )
+*
+*           Multiply by inv(L').
+*
+            CALL DLATRS( 'Lower', 'Transpose', 'Unit', NORMIN, N, A,
+     $                   LDA, WORK, SL, WORK( 2*N+1 ), INFO )
+         END IF
+*
+*        Divide X by 1/(SL*SU) if doing so will not cause overflow.
+*
+         SCALE = SL*SU
+         NORMIN = 'Y'
+         IF( SCALE.NE.ONE ) THEN
+            IX = IDAMAX( N, WORK, 1 )
+            IF( SCALE.LT.ABS( WORK( IX ) )*SMLNUM .OR. SCALE.EQ.ZERO )
+     $         GO TO 20
+            CALL DRSCL( N, SCALE, WORK, 1 )
+         END IF
+         GO TO 10
+      END IF
+*
+*     Compute the estimate of the reciprocal condition number.
+*
+      IF( AINVNM.NE.ZERO )
+     $   RCOND = ( ONE / AINVNM ) / ANORM
+*
+   20 CONTINUE
+      RETURN
+*
+*     End of DGECON
+*
+      END
diff --git a/costa/native/external/lapack/dgeequ.f b/costa/native/external/lapack/dgeequ.f
new file mode 100644
index 000000000..a1be3c169
--- /dev/null
+++ b/costa/native/external/lapack/dgeequ.f
@@ -0,0 +1,226 @@
+      SUBROUTINE DGEEQU( M, N, A, LDA, R, C, ROWCND, COLCND, AMAX,
+     $                   INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, M, N
+      DOUBLE PRECISION   AMAX, COLCND, ROWCND
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), C( * ), R( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DGEEQU computes row and column scalings intended to equilibrate an
+*  M-by-N matrix A and reduce its condition number.  R returns the row
+*  scale factors and C the column scale factors, chosen to try to make
+*  the largest element in each row and column of the matrix B with
+*  elements B(i,j)=R(i)*A(i,j)*C(j) have absolute value 1.
+*
+*  R(i) and C(j) are restricted to be between SMLNUM = smallest safe
+*  number and BIGNUM = largest safe number.  Use of these scaling
+*  factors is not guaranteed to reduce the condition number of A but
+*  works well in practice.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  A       (input) DOUBLE PRECISION array, dimension (LDA,N)
+*          The M-by-N matrix whose equilibration factors are
+*          to be computed.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  R       (output) DOUBLE PRECISION array, dimension (M)
+*          If INFO = 0 or INFO > M, R contains the row scale factors
+*          for A.
+*
+*  C       (output) DOUBLE PRECISION array, dimension (N)
+*          If INFO = 0,  C contains the column scale factors for A.
+*
+*  ROWCND  (output) DOUBLE PRECISION
+*          If INFO = 0 or INFO > M, ROWCND contains the ratio of the
+*          smallest R(i) to the largest R(i).  If ROWCND >= 0.1 and
+*          AMAX is neither too large nor too small, it is not worth
+*          scaling by R.
+*
+*  COLCND  (output) DOUBLE PRECISION
+*          If INFO = 0, COLCND contains the ratio of the smallest
+*          C(i) to the largest C(i).  If COLCND >= 0.1, it is not
+*          worth scaling by C.
+*
+*  AMAX    (output) DOUBLE PRECISION
+*          Absolute value of largest matrix element.  If AMAX is very
+*          close to overflow or very close to underflow, the matrix
+*          should be scaled.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i,  and i is
+*                <= M:  the i-th row of A is exactly zero
+*                >  M:  the (i-M)-th column of A is exactly zero
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J
+      DOUBLE PRECISION   BIGNUM, RCMAX, RCMIN, SMLNUM
+*     ..
+*     .. External Functions ..
+      DOUBLE PRECISION   DLAMCH
+      EXTERNAL           DLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DGEEQU', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 ) THEN
+         ROWCND = ONE
+         COLCND = ONE
+         AMAX = ZERO
+         RETURN
+      END IF
+*
+*     Get machine constants.
+*
+      SMLNUM = DLAMCH( 'S' )
+      BIGNUM = ONE / SMLNUM
+*
+*     Compute row scale factors.
+*
+      DO 10 I = 1, M
+         R( I ) = ZERO
+   10 CONTINUE
+*
+*     Find the maximum element in each row.
+*
+      DO 30 J = 1, N
+         DO 20 I = 1, M
+            R( I ) = MAX( R( I ), ABS( A( I, J ) ) )
+   20    CONTINUE
+   30 CONTINUE
+*
+*     Find the maximum and minimum scale factors.
+*
+      RCMIN = BIGNUM
+      RCMAX = ZERO
+      DO 40 I = 1, M
+         RCMAX = MAX( RCMAX, R( I ) )
+         RCMIN = MIN( RCMIN, R( I ) )
+   40 CONTINUE
+      AMAX = RCMAX
+*
+      IF( RCMIN.EQ.ZERO ) THEN
+*
+*        Find the first zero scale factor and return an error code.
+*
+         DO 50 I = 1, M
+            IF( R( I ).EQ.ZERO ) THEN
+               INFO = I
+               RETURN
+            END IF
+   50    CONTINUE
+      ELSE
+*
+*        Invert the scale factors.
+*
+         DO 60 I = 1, M
+            R( I ) = ONE / MIN( MAX( R( I ), SMLNUM ), BIGNUM )
+   60    CONTINUE
+*
+*        Compute ROWCND = min(R(I)) / max(R(I))
+*
+         ROWCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM )
+      END IF
+*
+*     Compute column scale factors
+*
+      DO 70 J = 1, N
+         C( J ) = ZERO
+   70 CONTINUE
+*
+*     Find the maximum element in each column,
+*     assuming the row scaling computed above.
+*
+      DO 90 J = 1, N
+         DO 80 I = 1, M
+            C( J ) = MAX( C( J ), ABS( A( I, J ) )*R( I ) )
+   80    CONTINUE
+   90 CONTINUE
+*
+*     Find the maximum and minimum scale factors.
+*
+      RCMIN = BIGNUM
+      RCMAX = ZERO
+      DO 100 J = 1, N
+         RCMIN = MIN( RCMIN, C( J ) )
+         RCMAX = MAX( RCMAX, C( J ) )
+  100 CONTINUE
+*
+      IF( RCMIN.EQ.ZERO ) THEN
+*
+*        Find the first zero scale factor and return an error code.
+*
+         DO 110 J = 1, N
+            IF( C( J ).EQ.ZERO ) THEN
+               INFO = M + J
+               RETURN
+            END IF
+  110    CONTINUE
+      ELSE
+*
+*        Invert the scale factors.
+*
+         DO 120 J = 1, N
+            C( J ) = ONE / MIN( MAX( C( J ), SMLNUM ), BIGNUM )
+  120    CONTINUE
+*
+*        Compute COLCND = min(C(J)) / max(C(J))
+*
+         COLCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM )
+      END IF
+*
+      RETURN
+*
+*     End of DGEEQU
+*
+      END
diff --git a/costa/native/external/lapack/dgees.f b/costa/native/external/lapack/dgees.f
new file mode 100644
index 000000000..18f097041
--- /dev/null
+++ b/costa/native/external/lapack/dgees.f
@@ -0,0 +1,431 @@
+      SUBROUTINE DGEES( JOBVS, SORT, SELECT, N, A, LDA, SDIM, WR, WI,
+     $                  VS, LDVS, WORK, LWORK, BWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBVS, SORT
+      INTEGER            INFO, LDA, LDVS, LWORK, N, SDIM
+*     ..
+*     .. Array Arguments ..
+      LOGICAL            BWORK( * )
+      DOUBLE PRECISION   A( LDA, * ), VS( LDVS, * ), WI( * ), WORK( * ),
+     $                   WR( * )
+*     ..
+*     .. Function Arguments ..
+      LOGICAL            SELECT
+      EXTERNAL           SELECT
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DGEES computes for an N-by-N real nonsymmetric matrix A, the
+*  eigenvalues, the real Schur form T, and, optionally, the matrix of
+*  Schur vectors Z.  This gives the Schur factorization A = Z*T*(Z**T).
+*
+*  Optionally, it also orders the eigenvalues on the diagonal of the
+*  real Schur form so that selected eigenvalues are at the top left.
+*  The leading columns of Z then form an orthonormal basis for the
+*  invariant subspace corresponding to the selected eigenvalues.
+*
+*  A matrix is in real Schur form if it is upper quasi-triangular with
+*  1-by-1 and 2-by-2 blocks. 2-by-2 blocks will be standardized in the
+*  form
+*          [  a  b  ]
+*          [  c  a  ]
+*
+*  where b*c < 0. The eigenvalues of such a block are a +- sqrt(bc).
+*
+*  Arguments
+*  =========
+*
+*  JOBVS   (input) CHARACTER*1
+*          = 'N': Schur vectors are not computed;
+*          = 'V': Schur vectors are computed.
+*
+*  SORT    (input) CHARACTER*1
+*          Specifies whether or not to order the eigenvalues on the
+*          diagonal of the Schur form.
+*          = 'N': Eigenvalues are not ordered;
+*          = 'S': Eigenvalues are ordered (see SELECT).
+*
+*  SELECT  (input) LOGICAL FUNCTION of two DOUBLE PRECISION arguments
+*          SELECT must be declared EXTERNAL in the calling subroutine.
+*          If SORT = 'S', SELECT is used to select eigenvalues to sort
+*          to the top left of the Schur form.
+*          If SORT = 'N', SELECT is not referenced.
+*          An eigenvalue WR(j)+sqrt(-1)*WI(j) is selected if
+*          SELECT(WR(j),WI(j)) is true; i.e., if either one of a complex
+*          conjugate pair of eigenvalues is selected, then both complex
+*          eigenvalues are selected.
+*          Note that a selected complex eigenvalue may no longer
+*          satisfy SELECT(WR(j),WI(j)) = .TRUE. after ordering, since
+*          ordering may change the value of complex eigenvalues
+*          (especially if the eigenvalue is ill-conditioned); in this
+*          case INFO is set to N+2 (see INFO below).
+*
+*  N       (input) INTEGER
+*          The order of the matrix A. N >= 0.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+*          On entry, the N-by-N matrix A.
+*          On exit, A has been overwritten by its real Schur form T.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  SDIM    (output) INTEGER
+*          If SORT = 'N', SDIM = 0.
+*          If SORT = 'S', SDIM = number of eigenvalues (after sorting)
+*                         for which SELECT is true. (Complex conjugate
+*                         pairs for which SELECT is true for either
+*                         eigenvalue count as 2.)
+*
+*  WR      (output) DOUBLE PRECISION array, dimension (N)
+*  WI      (output) DOUBLE PRECISION array, dimension (N)
+*          WR and WI contain the real and imaginary parts,
+*          respectively, of the computed eigenvalues in the same order
+*          that they appear on the diagonal of the output Schur form T.
+*          Complex conjugate pairs of eigenvalues will appear
+*          consecutively with the eigenvalue having the positive
+*          imaginary part first.
+*
+*  VS      (output) DOUBLE PRECISION array, dimension (LDVS,N)
+*          If JOBVS = 'V', VS contains the orthogonal matrix Z of Schur
+*          vectors.
+*          If JOBVS = 'N', VS is not referenced.
+*
+*  LDVS    (input) INTEGER
+*          The leading dimension of the array VS.  LDVS >= 1; if
+*          JOBVS = 'V', LDVS >= N.
+*
+*  WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) contains the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.  LWORK >= max(1,3*N).
+*          For good performance, LWORK must generally be larger.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  BWORK   (workspace) LOGICAL array, dimension (N)
+*          Not referenced if SORT = 'N'.
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value.
+*          > 0: if INFO = i, and i is
+*             <= N: the QR algorithm failed to compute all the
+*                   eigenvalues; elements 1:ILO-1 and i+1:N of WR and WI
+*                   contain those eigenvalues which have converged; if
+*                   JOBVS = 'V', VS contains the matrix which reduces A
+*                   to its partially converged Schur form.
+*             = N+1: the eigenvalues could not be reordered because some
+*                   eigenvalues were too close to separate (the problem
+*                   is very ill-conditioned);
+*             = N+2: after reordering, roundoff changed values of some
+*                   complex eigenvalues so that leading eigenvalues in
+*                   the Schur form no longer satisfy SELECT=.TRUE.  This
+*                   could also be caused by underflow due to scaling.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            CURSL, LASTSL, LQUERY, LST2SL, SCALEA, WANTST,
+     $                   WANTVS
+      INTEGER            HSWORK, I, I1, I2, IBAL, ICOND, IERR, IEVAL,
+     $                   IHI, ILO, INXT, IP, ITAU, IWRK, K, MAXB,
+     $                   MAXWRK, MINWRK
+      DOUBLE PRECISION   ANRM, BIGNUM, CSCALE, EPS, S, SEP, SMLNUM
+*     ..
+*     .. Local Arrays ..
+      INTEGER            IDUM( 1 )
+      DOUBLE PRECISION   DUM( 1 )
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DCOPY, DGEBAK, DGEBAL, DGEHRD, DHSEQR, DLACPY,
+     $                   DLASCL, DORGHR, DSWAP, DTRSEN, XERBLA
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      DOUBLE PRECISION   DLAMCH, DLANGE
+      EXTERNAL           LSAME, ILAENV, DLAMCH, DLANGE
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      LQUERY = ( LWORK.EQ.-1 )
+      WANTVS = LSAME( JOBVS, 'V' )
+      WANTST = LSAME( SORT, 'S' )
+      IF( ( .NOT.WANTVS ) .AND. ( .NOT.LSAME( JOBVS, 'N' ) ) ) THEN
+         INFO = -1
+      ELSE IF( ( .NOT.WANTST ) .AND. ( .NOT.LSAME( SORT, 'N' ) ) ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -6
+      ELSE IF( LDVS.LT.1 .OR. ( WANTVS .AND. LDVS.LT.N ) ) THEN
+         INFO = -11
+      END IF
+*
+*     Compute workspace
+*      (Note: Comments in the code beginning "Workspace:" describe the
+*       minimal amount of workspace needed at that point in the code,
+*       as well as the preferred amount for good performance.
+*       NB refers to the optimal block size for the immediately
+*       following subroutine, as returned by ILAENV.
+*       HSWORK refers to the workspace preferred by DHSEQR, as
+*       calculated below. HSWORK is computed assuming ILO=1 and IHI=N,
+*       the worst case.)
+*
+      MINWRK = 1
+      IF( INFO.EQ.0 .AND. ( LWORK.GE.1 .OR. LQUERY ) ) THEN
+         MAXWRK = 2*N + N*ILAENV( 1, 'DGEHRD', ' ', N, 1, N, 0 )
+         MINWRK = MAX( 1, 3*N )
+         IF( .NOT.WANTVS ) THEN
+            MAXB = MAX( ILAENV( 8, 'DHSEQR', 'SN', N, 1, N, -1 ), 2 )
+            K = MIN( MAXB, N, MAX( 2, ILAENV( 4, 'DHSEQR', 'SN', N, 1,
+     $          N, -1 ) ) )
+            HSWORK = MAX( K*( K+2 ), 2*N )
+            MAXWRK = MAX( MAXWRK, N+HSWORK, 1 )
+         ELSE
+            MAXWRK = MAX( MAXWRK, 2*N+( N-1 )*
+     $               ILAENV( 1, 'DORGHR', ' ', N, 1, N, -1 ) )
+            MAXB = MAX( ILAENV( 8, 'DHSEQR', 'EN', N, 1, N, -1 ), 2 )
+            K = MIN( MAXB, N, MAX( 2, ILAENV( 4, 'DHSEQR', 'EN', N, 1,
+     $          N, -1 ) ) )
+            HSWORK = MAX( K*( K+2 ), 2*N )
+            MAXWRK = MAX( MAXWRK, N+HSWORK, 1 )
+         END IF
+         WORK( 1 ) = MAXWRK
+      END IF
+      IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN
+         INFO = -13
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DGEES ', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 ) THEN
+         SDIM = 0
+         RETURN
+      END IF
+*
+*     Get machine constants
+*
+      EPS = DLAMCH( 'P' )
+      SMLNUM = DLAMCH( 'S' )
+      BIGNUM = ONE / SMLNUM
+      CALL DLABAD( SMLNUM, BIGNUM )
+      SMLNUM = SQRT( SMLNUM ) / EPS
+      BIGNUM = ONE / SMLNUM
+*
+*     Scale A if max element outside range [SMLNUM,BIGNUM]
+*
+      ANRM = DLANGE( 'M', N, N, A, LDA, DUM )
+      SCALEA = .FALSE.
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
+         SCALEA = .TRUE.
+         CSCALE = SMLNUM
+      ELSE IF( ANRM.GT.BIGNUM ) THEN
+         SCALEA = .TRUE.
+         CSCALE = BIGNUM
+      END IF
+      IF( SCALEA )
+     $   CALL DLASCL( 'G', 0, 0, ANRM, CSCALE, N, N, A, LDA, IERR )
+*
+*     Permute the matrix to make it more nearly triangular
+*     (Workspace: need N)
+*
+      IBAL = 1
+      CALL DGEBAL( 'P', N, A, LDA, ILO, IHI, WORK( IBAL ), IERR )
+*
+*     Reduce to upper Hessenberg form
+*     (Workspace: need 3*N, prefer 2*N+N*NB)
+*
+      ITAU = N + IBAL
+      IWRK = N + ITAU
+      CALL DGEHRD( N, ILO, IHI, A, LDA, WORK( ITAU ), WORK( IWRK ),
+     $             LWORK-IWRK+1, IERR )
+*
+      IF( WANTVS ) THEN
+*
+*        Copy Householder vectors to VS
+*
+         CALL DLACPY( 'L', N, N, A, LDA, VS, LDVS )
+*
+*        Generate orthogonal matrix in VS
+*        (Workspace: need 3*N-1, prefer 2*N+(N-1)*NB)
+*
+         CALL DORGHR( N, ILO, IHI, VS, LDVS, WORK( ITAU ), WORK( IWRK ),
+     $                LWORK-IWRK+1, IERR )
+      END IF
+*
+      SDIM = 0
+*
+*     Perform QR iteration, accumulating Schur vectors in VS if desired
+*     (Workspace: need N+1, prefer N+HSWORK (see comments) )
+*
+      IWRK = ITAU
+      CALL DHSEQR( 'S', JOBVS, N, ILO, IHI, A, LDA, WR, WI, VS, LDVS,
+     $             WORK( IWRK ), LWORK-IWRK+1, IEVAL )
+      IF( IEVAL.GT.0 )
+     $   INFO = IEVAL
+*
+*     Sort eigenvalues if desired
+*
+      IF( WANTST .AND. INFO.EQ.0 ) THEN
+         IF( SCALEA ) THEN
+            CALL DLASCL( 'G', 0, 0, CSCALE, ANRM, N, 1, WR, N, IERR )
+            CALL DLASCL( 'G', 0, 0, CSCALE, ANRM, N, 1, WI, N, IERR )
+         END IF
+         DO 10 I = 1, N
+            BWORK( I ) = SELECT( WR( I ), WI( I ) )
+   10    CONTINUE
+*
+*        Reorder eigenvalues and transform Schur vectors
+*        (Workspace: none needed)
+*
+         CALL DTRSEN( 'N', JOBVS, BWORK, N, A, LDA, VS, LDVS, WR, WI,
+     $                SDIM, S, SEP, WORK( IWRK ), LWORK-IWRK+1, IDUM, 1,
+     $                ICOND )
+         IF( ICOND.GT.0 )
+     $      INFO = N + ICOND
+      END IF
+*
+      IF( WANTVS ) THEN
+*
+*        Undo balancing
+*        (Workspace: need N)
+*
+         CALL DGEBAK( 'P', 'R', N, ILO, IHI, WORK( IBAL ), N, VS, LDVS,
+     $                IERR )
+      END IF
+*
+      IF( SCALEA ) THEN
+*
+*        Undo scaling for the Schur form of A
+*
+         CALL DLASCL( 'H', 0, 0, CSCALE, ANRM, N, N, A, LDA, IERR )
+         CALL DCOPY( N, A, LDA+1, WR, 1 )
+         IF( CSCALE.EQ.SMLNUM ) THEN
+*
+*           If scaling back towards underflow, adjust WI if an
+*           offdiagonal element of a 2-by-2 block in the Schur form
+*           underflows.
+*
+            IF( IEVAL.GT.0 ) THEN
+               I1 = IEVAL + 1
+               I2 = IHI - 1
+               CALL DLASCL( 'G', 0, 0, CSCALE, ANRM, ILO-1, 1, WI,
+     $                      MAX( ILO-1, 1 ), IERR )
+            ELSE IF( WANTST ) THEN
+               I1 = 1
+               I2 = N - 1
+            ELSE
+               I1 = ILO
+               I2 = IHI - 1
+            END IF
+            INXT = I1 - 1
+            DO 20 I = I1, I2
+               IF( I.LT.INXT )
+     $            GO TO 20
+               IF( WI( I ).EQ.ZERO ) THEN
+                  INXT = I + 1
+               ELSE
+                  IF( A( I+1, I ).EQ.ZERO ) THEN
+                     WI( I ) = ZERO
+                     WI( I+1 ) = ZERO
+                  ELSE IF( A( I+1, I ).NE.ZERO .AND. A( I, I+1 ).EQ.
+     $                     ZERO ) THEN
+                     WI( I ) = ZERO
+                     WI( I+1 ) = ZERO
+                     IF( I.GT.1 )
+     $                  CALL DSWAP( I-1, A( 1, I ), 1, A( 1, I+1 ), 1 )
+                     IF( N.GT.I+1 )
+     $                  CALL DSWAP( N-I-1, A( I, I+2 ), LDA,
+     $                              A( I+1, I+2 ), LDA )
+                     CALL DSWAP( N, VS( 1, I ), 1, VS( 1, I+1 ), 1 )
+                     A( I, I+1 ) = A( I+1, I )
+                     A( I+1, I ) = ZERO
+                  END IF
+                  INXT = I + 2
+               END IF
+   20       CONTINUE
+         END IF
+*
+*        Undo scaling for the imaginary part of the eigenvalues
+*
+         CALL DLASCL( 'G', 0, 0, CSCALE, ANRM, N-IEVAL, 1,
+     $                WI( IEVAL+1 ), MAX( N-IEVAL, 1 ), IERR )
+      END IF
+*
+      IF( WANTST .AND. INFO.EQ.0 ) THEN
+*
+*        Check if reordering successful
+*
+         LASTSL = .TRUE.
+         LST2SL = .TRUE.
+         SDIM = 0
+         IP = 0
+         DO 30 I = 1, N
+            CURSL = SELECT( WR( I ), WI( I ) )
+            IF( WI( I ).EQ.ZERO ) THEN
+               IF( CURSL )
+     $            SDIM = SDIM + 1
+               IP = 0
+               IF( CURSL .AND. .NOT.LASTSL )
+     $            INFO = N + 2
+            ELSE
+               IF( IP.EQ.1 ) THEN
+*
+*                 Last eigenvalue of conjugate pair
+*
+                  CURSL = CURSL .OR. LASTSL
+                  LASTSL = CURSL
+                  IF( CURSL )
+     $               SDIM = SDIM + 2
+                  IP = -1
+                  IF( CURSL .AND. .NOT.LST2SL )
+     $               INFO = N + 2
+               ELSE
+*
+*                 First eigenvalue of conjugate pair
+*
+                  IP = 1
+               END IF
+            END IF
+            LST2SL = LASTSL
+            LASTSL = CURSL
+   30    CONTINUE
+      END IF
+*
+      WORK( 1 ) = MAXWRK
+      RETURN
+*
+*     End of DGEES
+*
+      END
diff --git a/costa/native/external/lapack/dgeesx.f b/costa/native/external/lapack/dgeesx.f
new file mode 100644
index 000000000..bda013487
--- /dev/null
+++ b/costa/native/external/lapack/dgeesx.f
@@ -0,0 +1,502 @@
+      SUBROUTINE DGEESX( JOBVS, SORT, SELECT, SENSE, N, A, LDA, SDIM,
+     $                   WR, WI, VS, LDVS, RCONDE, RCONDV, WORK, LWORK,
+     $                   IWORK, LIWORK, BWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBVS, SENSE, SORT
+      INTEGER            INFO, LDA, LDVS, LIWORK, LWORK, N, SDIM
+      DOUBLE PRECISION   RCONDE, RCONDV
+*     ..
+*     .. Array Arguments ..
+      LOGICAL            BWORK( * )
+      INTEGER            IWORK( * )
+      DOUBLE PRECISION   A( LDA, * ), VS( LDVS, * ), WI( * ), WORK( * ),
+     $                   WR( * )
+*     ..
+*     .. Function Arguments ..
+      LOGICAL            SELECT
+      EXTERNAL           SELECT
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DGEESX computes for an N-by-N real nonsymmetric matrix A, the
+*  eigenvalues, the real Schur form T, and, optionally, the matrix of
+*  Schur vectors Z.  This gives the Schur factorization A = Z*T*(Z**T).
+*
+*  Optionally, it also orders the eigenvalues on the diagonal of the
+*  real Schur form so that selected eigenvalues are at the top left;
+*  computes a reciprocal condition number for the average of the
+*  selected eigenvalues (RCONDE); and computes a reciprocal condition
+*  number for the right invariant subspace corresponding to the
+*  selected eigenvalues (RCONDV).  The leading columns of Z form an
+*  orthonormal basis for this invariant subspace.
+*
+*  For further explanation of the reciprocal condition numbers RCONDE
+*  and RCONDV, see Section 4.10 of the LAPACK Users' Guide (where
+*  these quantities are called s and sep respectively).
+*
+*  A real matrix is in real Schur form if it is upper quasi-triangular
+*  with 1-by-1 and 2-by-2 blocks. 2-by-2 blocks will be standardized in
+*  the form
+*            [  a  b  ]
+*            [  c  a  ]
+*
+*  where b*c < 0. The eigenvalues of such a block are a +- sqrt(bc).
+*
+*  Arguments
+*  =========
+*
+*  JOBVS   (input) CHARACTER*1
+*          = 'N': Schur vectors are not computed;
+*          = 'V': Schur vectors are computed.
+*
+*  SORT    (input) CHARACTER*1
+*          Specifies whether or not to order the eigenvalues on the
+*          diagonal of the Schur form.
+*          = 'N': Eigenvalues are not ordered;
+*          = 'S': Eigenvalues are ordered (see SELECT).
+*
+*  SELECT  (input) LOGICAL FUNCTION of two DOUBLE PRECISION arguments
+*          SELECT must be declared EXTERNAL in the calling subroutine.
+*          If SORT = 'S', SELECT is used to select eigenvalues to sort
+*          to the top left of the Schur form.
+*          If SORT = 'N', SELECT is not referenced.
+*          An eigenvalue WR(j)+sqrt(-1)*WI(j) is selected if
+*          SELECT(WR(j),WI(j)) is true; i.e., if either one of a
+*          complex conjugate pair of eigenvalues is selected, then both
+*          are.  Note that a selected complex eigenvalue may no longer
+*          satisfy SELECT(WR(j),WI(j)) = .TRUE. after ordering, since
+*          ordering may change the value of complex eigenvalues
+*          (especially if the eigenvalue is ill-conditioned); in this
+*          case INFO may be set to N+3 (see INFO below).
+*
+*  SENSE   (input) CHARACTER*1
+*          Determines which reciprocal condition numbers are computed.
+*          = 'N': None are computed;
+*          = 'E': Computed for average of selected eigenvalues only;
+*          = 'V': Computed for selected right invariant subspace only;
+*          = 'B': Computed for both.
+*          If SENSE = 'E', 'V' or 'B', SORT must equal 'S'.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A. N >= 0.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA, N)
+*          On entry, the N-by-N matrix A.
+*          On exit, A is overwritten by its real Schur form T.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  SDIM    (output) INTEGER
+*          If SORT = 'N', SDIM = 0.
+*          If SORT = 'S', SDIM = number of eigenvalues (after sorting)
+*                         for which SELECT is true. (Complex conjugate
+*                         pairs for which SELECT is true for either
+*                         eigenvalue count as 2.)
+*
+*  WR      (output) DOUBLE PRECISION array, dimension (N)
+*  WI      (output) DOUBLE PRECISION array, dimension (N)
+*          WR and WI contain the real and imaginary parts, respectively,
+*          of the computed eigenvalues, in the same order that they
+*          appear on the diagonal of the output Schur form T.  Complex
+*          conjugate pairs of eigenvalues appear consecutively with the
+*          eigenvalue having the positive imaginary part first.
+*
+*  VS      (output) DOUBLE PRECISION array, dimension (LDVS,N)
+*          If JOBVS = 'V', VS contains the orthogonal matrix Z of Schur
+*          vectors.
+*          If JOBVS = 'N', VS is not referenced.
+*
+*  LDVS    (input) INTEGER
+*          The leading dimension of the array VS.  LDVS >= 1, and if
+*          JOBVS = 'V', LDVS >= N.
+*
+*  RCONDE  (output) DOUBLE PRECISION
+*          If SENSE = 'E' or 'B', RCONDE contains the reciprocal
+*          condition number for the average of the selected eigenvalues.
+*          Not referenced if SENSE = 'N' or 'V'.
+*
+*  RCONDV  (output) DOUBLE PRECISION
+*          If SENSE = 'V' or 'B', RCONDV contains the reciprocal
+*          condition number for the selected right invariant subspace.
+*          Not referenced if SENSE = 'N' or 'E'.
+*
+*  WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.  LWORK >= max(1,3*N).
+*          Also, if SENSE = 'E' or 'V' or 'B',
+*          LWORK >= N+2*SDIM*(N-SDIM), where SDIM is the number of
+*          selected eigenvalues computed by this routine.  Note that
+*          N+2*SDIM*(N-SDIM) <= N+N*N/2.
+*          For good performance, LWORK must generally be larger.
+*
+*  IWORK   (workspace/output) INTEGER array, dimension (LIWORK)
+*          Not referenced if SENSE = 'N' or 'E'.
+*          On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK.
+*
+*  LIWORK  (input) INTEGER
+*          The dimension of the array IWORK.
+*          LIWORK >= 1; if SENSE = 'V' or 'B', LIWORK >= SDIM*(N-SDIM).
+*
+*  BWORK   (workspace) LOGICAL array, dimension (N)
+*          Not referenced if SORT = 'N'.
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value.
+*          > 0: if INFO = i, and i is
+*             <= N: the QR algorithm failed to compute all the
+*                   eigenvalues; elements 1:ILO-1 and i+1:N of WR and WI
+*                   contain those eigenvalues which have converged; if
+*                   JOBVS = 'V', VS contains the transformation which
+*                   reduces A to its partially converged Schur form.
+*             = N+1: the eigenvalues could not be reordered because some
+*                   eigenvalues were too close to separate (the problem
+*                   is very ill-conditioned);
+*             = N+2: after reordering, roundoff changed values of some
+*                   complex eigenvalues so that leading eigenvalues in
+*                   the Schur form no longer satisfy SELECT=.TRUE.  This
+*                   could also be caused by underflow due to scaling.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            CURSL, LASTSL, LST2SL, SCALEA, WANTSB, WANTSE,
+     $                   WANTSN, WANTST, WANTSV, WANTVS
+      INTEGER            HSWORK, I, I1, I2, IBAL, ICOND, IERR, IEVAL,
+     $                   IHI, ILO, INXT, IP, ITAU, IWRK, K, MAXB,
+     $                   MAXWRK, MINWRK
+      DOUBLE PRECISION   ANRM, BIGNUM, CSCALE, EPS, SMLNUM
+*     ..
+*     .. Local Arrays ..
+      DOUBLE PRECISION   DUM( 1 )
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DCOPY, DGEBAK, DGEBAL, DGEHRD, DHSEQR, DLACPY,
+     $                   DLASCL, DORGHR, DSWAP, DTRSEN, XERBLA
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      DOUBLE PRECISION   DLAMCH, DLANGE
+      EXTERNAL           LSAME, ILAENV, DLAMCH, DLANGE
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      WANTVS = LSAME( JOBVS, 'V' )
+      WANTST = LSAME( SORT, 'S' )
+      WANTSN = LSAME( SENSE, 'N' )
+      WANTSE = LSAME( SENSE, 'E' )
+      WANTSV = LSAME( SENSE, 'V' )
+      WANTSB = LSAME( SENSE, 'B' )
+      IF( ( .NOT.WANTVS ) .AND. ( .NOT.LSAME( JOBVS, 'N' ) ) ) THEN
+         INFO = -1
+      ELSE IF( ( .NOT.WANTST ) .AND. ( .NOT.LSAME( SORT, 'N' ) ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.( WANTSN .OR. WANTSE .OR. WANTSV .OR. WANTSB ) .OR.
+     $         ( .NOT.WANTST .AND. .NOT.WANTSN ) ) THEN
+         INFO = -4
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      ELSE IF( LDVS.LT.1 .OR. ( WANTVS .AND. LDVS.LT.N ) ) THEN
+         INFO = -12
+      END IF
+*
+*     Compute workspace
+*      (Note: Comments in the code beginning "RWorkspace:" describe the
+*       minimal amount of real workspace needed at that point in the
+*       code, as well as the preferred amount for good performance.
+*       IWorkspace refers to integer workspace.
+*       NB refers to the optimal block size for the immediately
+*       following subroutine, as returned by ILAENV.
+*       HSWORK refers to the workspace preferred by DHSEQR, as
+*       calculated below. HSWORK is computed assuming ILO=1 and IHI=N,
+*       the worst case.
+*       If SENSE = 'E', 'V' or 'B', then the amount of workspace needed
+*       depends on SDIM, which is computed by the routine DTRSEN later
+*       in the code.)
+*
+      MINWRK = 1
+      IF( INFO.EQ.0 .AND. LWORK.GE.1 ) THEN
+         MAXWRK = 2*N + N*ILAENV( 1, 'DGEHRD', ' ', N, 1, N, 0 )
+         MINWRK = MAX( 1, 3*N )
+         IF( .NOT.WANTVS ) THEN
+            MAXB = MAX( ILAENV( 8, 'DHSEQR', 'SN', N, 1, N, -1 ), 2 )
+            K = MIN( MAXB, N, MAX( 2, ILAENV( 4, 'DHSEQR', 'SN', N, 1,
+     $          N, -1 ) ) )
+            HSWORK = MAX( K*( K+2 ), 2*N )
+            MAXWRK = MAX( MAXWRK, N+HSWORK, 1 )
+         ELSE
+            MAXWRK = MAX( MAXWRK, 2*N+( N-1 )*
+     $               ILAENV( 1, 'DORGHR', ' ', N, 1, N, -1 ) )
+            MAXB = MAX( ILAENV( 8, 'DHSEQR', 'SV', N, 1, N, -1 ), 2 )
+            K = MIN( MAXB, N, MAX( 2, ILAENV( 4, 'DHSEQR', 'SV', N, 1,
+     $          N, -1 ) ) )
+            HSWORK = MAX( K*( K+2 ), 2*N )
+            MAXWRK = MAX( MAXWRK, N+HSWORK, 1 )
+         END IF
+         WORK( 1 ) = MAXWRK
+      END IF
+      IF( LWORK.LT.MINWRK ) THEN
+         INFO = -16
+      END IF
+      IF( LIWORK.LT.1 ) THEN
+         INFO = -18
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DGEESX', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 ) THEN
+         SDIM = 0
+         RETURN
+      END IF
+*
+*     Get machine constants
+*
+      EPS = DLAMCH( 'P' )
+      SMLNUM = DLAMCH( 'S' )
+      BIGNUM = ONE / SMLNUM
+      CALL DLABAD( SMLNUM, BIGNUM )
+      SMLNUM = SQRT( SMLNUM ) / EPS
+      BIGNUM = ONE / SMLNUM
+*
+*     Scale A if max element outside range [SMLNUM,BIGNUM]
+*
+      ANRM = DLANGE( 'M', N, N, A, LDA, DUM )
+      SCALEA = .FALSE.
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
+         SCALEA = .TRUE.
+         CSCALE = SMLNUM
+      ELSE IF( ANRM.GT.BIGNUM ) THEN
+         SCALEA = .TRUE.
+         CSCALE = BIGNUM
+      END IF
+      IF( SCALEA )
+     $   CALL DLASCL( 'G', 0, 0, ANRM, CSCALE, N, N, A, LDA, IERR )
+*
+*     Permute the matrix to make it more nearly triangular
+*     (RWorkspace: need N)
+*
+      IBAL = 1
+      CALL DGEBAL( 'P', N, A, LDA, ILO, IHI, WORK( IBAL ), IERR )
+*
+*     Reduce to upper Hessenberg form
+*     (RWorkspace: need 3*N, prefer 2*N+N*NB)
+*
+      ITAU = N + IBAL
+      IWRK = N + ITAU
+      CALL DGEHRD( N, ILO, IHI, A, LDA, WORK( ITAU ), WORK( IWRK ),
+     $             LWORK-IWRK+1, IERR )
+*
+      IF( WANTVS ) THEN
+*
+*        Copy Householder vectors to VS
+*
+         CALL DLACPY( 'L', N, N, A, LDA, VS, LDVS )
+*
+*        Generate orthogonal matrix in VS
+*        (RWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
+*
+         CALL DORGHR( N, ILO, IHI, VS, LDVS, WORK( ITAU ), WORK( IWRK ),
+     $                LWORK-IWRK+1, IERR )
+      END IF
+*
+      SDIM = 0
+*
+*     Perform QR iteration, accumulating Schur vectors in VS if desired
+*     (RWorkspace: need N+1, prefer N+HSWORK (see comments) )
+*
+      IWRK = ITAU
+      CALL DHSEQR( 'S', JOBVS, N, ILO, IHI, A, LDA, WR, WI, VS, LDVS,
+     $             WORK( IWRK ), LWORK-IWRK+1, IEVAL )
+      IF( IEVAL.GT.0 )
+     $   INFO = IEVAL
+*
+*     Sort eigenvalues if desired
+*
+      IF( WANTST .AND. INFO.EQ.0 ) THEN
+         IF( SCALEA ) THEN
+            CALL DLASCL( 'G', 0, 0, CSCALE, ANRM, N, 1, WR, N, IERR )
+            CALL DLASCL( 'G', 0, 0, CSCALE, ANRM, N, 1, WI, N, IERR )
+         END IF
+         DO 10 I = 1, N
+            BWORK( I ) = SELECT( WR( I ), WI( I ) )
+   10    CONTINUE
+*
+*        Reorder eigenvalues, transform Schur vectors, and compute
+*        reciprocal condition numbers
+*        (RWorkspace: if SENSE is not 'N', need N+2*SDIM*(N-SDIM)
+*                     otherwise, need N )
+*        (IWorkspace: if SENSE is 'V' or 'B', need SDIM*(N-SDIM)
+*                     otherwise, need 0 )
+*
+         CALL DTRSEN( SENSE, JOBVS, BWORK, N, A, LDA, VS, LDVS, WR, WI,
+     $                SDIM, RCONDE, RCONDV, WORK( IWRK ), LWORK-IWRK+1,
+     $                IWORK, LIWORK, ICOND )
+         IF( .NOT.WANTSN )
+     $      MAXWRK = MAX( MAXWRK, N+2*SDIM*( N-SDIM ) )
+         IF( ICOND.EQ.-15 ) THEN
+*
+*           Not enough real workspace
+*
+            INFO = -16
+         ELSE IF( ICOND.EQ.-17 ) THEN
+*
+*           Not enough integer workspace
+*
+            INFO = -18
+         ELSE IF( ICOND.GT.0 ) THEN
+*
+*           DTRSEN failed to reorder or to restore standard Schur form
+*
+            INFO = ICOND + N
+         END IF
+      END IF
+*
+      IF( WANTVS ) THEN
+*
+*        Undo balancing
+*        (RWorkspace: need N)
+*
+         CALL DGEBAK( 'P', 'R', N, ILO, IHI, WORK( IBAL ), N, VS, LDVS,
+     $                IERR )
+      END IF
+*
+      IF( SCALEA ) THEN
+*
+*        Undo scaling for the Schur form of A
+*
+         CALL DLASCL( 'H', 0, 0, CSCALE, ANRM, N, N, A, LDA, IERR )
+         CALL DCOPY( N, A, LDA+1, WR, 1 )
+         IF( ( WANTSV .OR. WANTSB ) .AND. INFO.EQ.0 ) THEN
+            DUM( 1 ) = RCONDV
+            CALL DLASCL( 'G', 0, 0, CSCALE, ANRM, 1, 1, DUM, 1, IERR )
+            RCONDV = DUM( 1 )
+         END IF
+         IF( CSCALE.EQ.SMLNUM ) THEN
+*
+*           If scaling back towards underflow, adjust WI if an
+*           offdiagonal element of a 2-by-2 block in the Schur form
+*           underflows.
+*
+            IF( IEVAL.GT.0 ) THEN
+               I1 = IEVAL + 1
+               I2 = IHI - 1
+               CALL DLASCL( 'G', 0, 0, CSCALE, ANRM, ILO-1, 1, WI, N,
+     $                      IERR )
+            ELSE IF( WANTST ) THEN
+               I1 = 1
+               I2 = N - 1
+            ELSE
+               I1 = ILO
+               I2 = IHI - 1
+            END IF
+            INXT = I1 - 1
+            DO 20 I = I1, I2
+               IF( I.LT.INXT )
+     $            GO TO 20
+               IF( WI( I ).EQ.ZERO ) THEN
+                  INXT = I + 1
+               ELSE
+                  IF( A( I+1, I ).EQ.ZERO ) THEN
+                     WI( I ) = ZERO
+                     WI( I+1 ) = ZERO
+                  ELSE IF( A( I+1, I ).NE.ZERO .AND. A( I, I+1 ).EQ.
+     $                     ZERO ) THEN
+                     WI( I ) = ZERO
+                     WI( I+1 ) = ZERO
+                     IF( I.GT.1 )
+     $                  CALL DSWAP( I-1, A( 1, I ), 1, A( 1, I+1 ), 1 )
+                     IF( N.GT.I+1 )
+     $                  CALL DSWAP( N-I-1, A( I, I+2 ), LDA,
+     $                              A( I+1, I+2 ), LDA )
+                     CALL DSWAP( N, VS( 1, I ), 1, VS( 1, I+1 ), 1 )
+                     A( I, I+1 ) = A( I+1, I )
+                     A( I+1, I ) = ZERO
+                  END IF
+                  INXT = I + 2
+               END IF
+   20       CONTINUE
+         END IF
+         CALL DLASCL( 'G', 0, 0, CSCALE, ANRM, N-IEVAL, 1,
+     $                WI( IEVAL+1 ), MAX( N-IEVAL, 1 ), IERR )
+      END IF
+*
+      IF( WANTST .AND. INFO.EQ.0 ) THEN
+*
+*        Check if reordering successful
+*
+         LASTSL = .TRUE.
+         LST2SL = .TRUE.
+         SDIM = 0
+         IP = 0
+         DO 30 I = 1, N
+            CURSL = SELECT( WR( I ), WI( I ) )
+            IF( WI( I ).EQ.ZERO ) THEN
+               IF( CURSL )
+     $            SDIM = SDIM + 1
+               IP = 0
+               IF( CURSL .AND. .NOT.LASTSL )
+     $            INFO = N + 2
+            ELSE
+               IF( IP.EQ.1 ) THEN
+*
+*                 Last eigenvalue of conjugate pair
+*
+                  CURSL = CURSL .OR. LASTSL
+                  LASTSL = CURSL
+                  IF( CURSL )
+     $               SDIM = SDIM + 2
+                  IP = -1
+                  IF( CURSL .AND. .NOT.LST2SL )
+     $               INFO = N + 2
+               ELSE
+*
+*                 First eigenvalue of conjugate pair
+*
+                  IP = 1
+               END IF
+            END IF
+            LST2SL = LASTSL
+            LASTSL = CURSL
+   30    CONTINUE
+      END IF
+*
+      WORK( 1 ) = MAXWRK
+      IF( WANTSV .OR. WANTSB ) THEN
+         IWORK( 1 ) = SDIM*( N-SDIM )
+      ELSE
+         IWORK( 1 ) = 1
+      END IF
+*
+      RETURN
+*
+*     End of DGEESX
+*
+      END
diff --git a/costa/native/external/lapack/dgeev.f b/costa/native/external/lapack/dgeev.f
new file mode 100644
index 000000000..cdd3d2d6e
--- /dev/null
+++ b/costa/native/external/lapack/dgeev.f
@@ -0,0 +1,409 @@
+      SUBROUTINE DGEEV( JOBVL, JOBVR, N, A, LDA, WR, WI, VL, LDVL, VR,
+     $                  LDVR, WORK, LWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     December 8, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBVL, JOBVR
+      INTEGER            INFO, LDA, LDVL, LDVR, LWORK, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), VL( LDVL, * ), VR( LDVR, * ),
+     $                   WI( * ), WORK( * ), WR( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DGEEV computes for an N-by-N real nonsymmetric matrix A, the
+*  eigenvalues and, optionally, the left and/or right eigenvectors.
+*
+*  The right eigenvector v(j) of A satisfies
+*                   A * v(j) = lambda(j) * v(j)
+*  where lambda(j) is its eigenvalue.
+*  The left eigenvector u(j) of A satisfies
+*                u(j)**H * A = lambda(j) * u(j)**H
+*  where u(j)**H denotes the conjugate transpose of u(j).
+*
+*  The computed eigenvectors are normalized to have Euclidean norm
+*  equal to 1 and largest component real.
+*
+*  Arguments
+*  =========
+*
+*  JOBVL   (input) CHARACTER*1
+*          = 'N': left eigenvectors of A are not computed;
+*          = 'V': left eigenvectors of A are computed.
+*
+*  JOBVR   (input) CHARACTER*1
+*          = 'N': right eigenvectors of A are not computed;
+*          = 'V': right eigenvectors of A are computed.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A. N >= 0.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+*          On entry, the N-by-N matrix A.
+*          On exit, A has been overwritten.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  WR      (output) DOUBLE PRECISION array, dimension (N)
+*  WI      (output) DOUBLE PRECISION array, dimension (N)
+*          WR and WI contain the real and imaginary parts,
+*          respectively, of the computed eigenvalues.  Complex
+*          conjugate pairs of eigenvalues appear consecutively
+*          with the eigenvalue having the positive imaginary part
+*          first.
+*
+*  VL      (output) DOUBLE PRECISION array, dimension (LDVL,N)
+*          If JOBVL = 'V', the left eigenvectors u(j) are stored one
+*          after another in the columns of VL, in the same order
+*          as their eigenvalues.
+*          If JOBVL = 'N', VL is not referenced.
+*          If the j-th eigenvalue is real, then u(j) = VL(:,j),
+*          the j-th column of VL.
+*          If the j-th and (j+1)-st eigenvalues form a complex
+*          conjugate pair, then u(j) = VL(:,j) + i*VL(:,j+1) and
+*          u(j+1) = VL(:,j) - i*VL(:,j+1).
+*
+*  LDVL    (input) INTEGER
+*          The leading dimension of the array VL.  LDVL >= 1; if
+*          JOBVL = 'V', LDVL >= N.
+*
+*  VR      (output) DOUBLE PRECISION array, dimension (LDVR,N)
+*          If JOBVR = 'V', the right eigenvectors v(j) are stored one
+*          after another in the columns of VR, in the same order
+*          as their eigenvalues.
+*          If JOBVR = 'N', VR is not referenced.
+*          If the j-th eigenvalue is real, then v(j) = VR(:,j),
+*          the j-th column of VR.
+*          If the j-th and (j+1)-st eigenvalues form a complex
+*          conjugate pair, then v(j) = VR(:,j) + i*VR(:,j+1) and
+*          v(j+1) = VR(:,j) - i*VR(:,j+1).
+*
+*  LDVR    (input) INTEGER
+*          The leading dimension of the array VR.  LDVR >= 1; if
+*          JOBVR = 'V', LDVR >= N.
+*
+*  WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.  LWORK >= max(1,3*N), and
+*          if JOBVL = 'V' or JOBVR = 'V', LWORK >= 4*N.  For good
+*          performance, LWORK must generally be larger.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          > 0:  if INFO = i, the QR algorithm failed to compute all the
+*                eigenvalues, and no eigenvectors have been computed;
+*                elements i+1:N of WR and WI contain eigenvalues which
+*                have converged.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY, SCALEA, WANTVL, WANTVR
+      CHARACTER          SIDE
+      INTEGER            HSWORK, I, IBAL, IERR, IHI, ILO, ITAU, IWRK, K,
+     $                   MAXB, MAXWRK, MINWRK, NOUT
+      DOUBLE PRECISION   ANRM, BIGNUM, CS, CSCALE, EPS, R, SCL, SMLNUM,
+     $                   SN
+*     ..
+*     .. Local Arrays ..
+      LOGICAL            SELECT( 1 )
+      DOUBLE PRECISION   DUM( 1 )
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DGEBAK, DGEBAL, DGEHRD, DHSEQR, DLACPY, DLARTG,
+     $                   DLASCL, DORGHR, DROT, DSCAL, DTREVC, XERBLA
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            IDAMAX, ILAENV
+      DOUBLE PRECISION   DLAMCH, DLANGE, DLAPY2, DNRM2
+      EXTERNAL           LSAME, IDAMAX, ILAENV, DLAMCH, DLANGE, DLAPY2,
+     $                   DNRM2
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      LQUERY = ( LWORK.EQ.-1 )
+      WANTVL = LSAME( JOBVL, 'V' )
+      WANTVR = LSAME( JOBVR, 'V' )
+      IF( ( .NOT.WANTVL ) .AND. ( .NOT.LSAME( JOBVL, 'N' ) ) ) THEN
+         INFO = -1
+      ELSE IF( ( .NOT.WANTVR ) .AND. ( .NOT.LSAME( JOBVR, 'N' ) ) ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( LDVL.LT.1 .OR. ( WANTVL .AND. LDVL.LT.N ) ) THEN
+         INFO = -9
+      ELSE IF( LDVR.LT.1 .OR. ( WANTVR .AND. LDVR.LT.N ) ) THEN
+         INFO = -11
+      END IF
+*
+*     Compute workspace
+*      (Note: Comments in the code beginning "Workspace:" describe the
+*       minimal amount of workspace needed at that point in the code,
+*       as well as the preferred amount for good performance.
+*       NB refers to the optimal block size for the immediately
+*       following subroutine, as returned by ILAENV.
+*       HSWORK refers to the workspace preferred by DHSEQR, as
+*       calculated below. HSWORK is computed assuming ILO=1 and IHI=N,
+*       the worst case.)
+*
+      MINWRK = 1
+      IF( INFO.EQ.0 .AND. ( LWORK.GE.1 .OR. LQUERY ) ) THEN
+         MAXWRK = 2*N + N*ILAENV( 1, 'DGEHRD', ' ', N, 1, N, 0 )
+         IF( ( .NOT.WANTVL ) .AND. ( .NOT.WANTVR ) ) THEN
+            MINWRK = MAX( 1, 3*N )
+            MAXB = MAX( ILAENV( 8, 'DHSEQR', 'EN', N, 1, N, -1 ), 2 )
+            K = MIN( MAXB, N, MAX( 2, ILAENV( 4, 'DHSEQR', 'EN', N, 1,
+     $          N, -1 ) ) )
+            HSWORK = MAX( K*( K+2 ), 2*N )
+            MAXWRK = MAX( MAXWRK, N+1, N+HSWORK )
+         ELSE
+            MINWRK = MAX( 1, 4*N )
+            MAXWRK = MAX( MAXWRK, 2*N+( N-1 )*
+     $               ILAENV( 1, 'DORGHR', ' ', N, 1, N, -1 ) )
+            MAXB = MAX( ILAENV( 8, 'DHSEQR', 'SV', N, 1, N, -1 ), 2 )
+            K = MIN( MAXB, N, MAX( 2, ILAENV( 4, 'DHSEQR', 'SV', N, 1,
+     $          N, -1 ) ) )
+            HSWORK = MAX( K*( K+2 ), 2*N )
+            MAXWRK = MAX( MAXWRK, N+1, N+HSWORK )
+            MAXWRK = MAX( MAXWRK, 4*N )
+         END IF
+         WORK( 1 ) = MAXWRK
+      END IF
+      IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN
+         INFO = -13
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DGEEV ', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Get machine constants
+*
+      EPS = DLAMCH( 'P' )
+      SMLNUM = DLAMCH( 'S' )
+      BIGNUM = ONE / SMLNUM
+      CALL DLABAD( SMLNUM, BIGNUM )
+      SMLNUM = SQRT( SMLNUM ) / EPS
+      BIGNUM = ONE / SMLNUM
+*
+*     Scale A if max element outside range [SMLNUM,BIGNUM]
+*
+      ANRM = DLANGE( 'M', N, N, A, LDA, DUM )
+      SCALEA = .FALSE.
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
+         SCALEA = .TRUE.
+         CSCALE = SMLNUM
+      ELSE IF( ANRM.GT.BIGNUM ) THEN
+         SCALEA = .TRUE.
+         CSCALE = BIGNUM
+      END IF
+      IF( SCALEA )
+     $   CALL DLASCL( 'G', 0, 0, ANRM, CSCALE, N, N, A, LDA, IERR )
+*
+*     Balance the matrix
+*     (Workspace: need N)
+*
+      IBAL = 1
+      CALL DGEBAL( 'B', N, A, LDA, ILO, IHI, WORK( IBAL ), IERR )
+*
+*     Reduce to upper Hessenberg form
+*     (Workspace: need 3*N, prefer 2*N+N*NB)
+*
+      ITAU = IBAL + N
+      IWRK = ITAU + N
+      CALL DGEHRD( N, ILO, IHI, A, LDA, WORK( ITAU ), WORK( IWRK ),
+     $             LWORK-IWRK+1, IERR )
+*
+      IF( WANTVL ) THEN
+*
+*        Want left eigenvectors
+*        Copy Householder vectors to VL
+*
+         SIDE = 'L'
+         CALL DLACPY( 'L', N, N, A, LDA, VL, LDVL )
+*
+*        Generate orthogonal matrix in VL
+*        (Workspace: need 3*N-1, prefer 2*N+(N-1)*NB)
+*
+         CALL DORGHR( N, ILO, IHI, VL, LDVL, WORK( ITAU ), WORK( IWRK ),
+     $                LWORK-IWRK+1, IERR )
+*
+*        Perform QR iteration, accumulating Schur vectors in VL
+*        (Workspace: need N+1, prefer N+HSWORK (see comments) )
+*
+         IWRK = ITAU
+         CALL DHSEQR( 'S', 'V', N, ILO, IHI, A, LDA, WR, WI, VL, LDVL,
+     $                WORK( IWRK ), LWORK-IWRK+1, INFO )
+*
+         IF( WANTVR ) THEN
+*
+*           Want left and right eigenvectors
+*           Copy Schur vectors to VR
+*
+            SIDE = 'B'
+            CALL DLACPY( 'F', N, N, VL, LDVL, VR, LDVR )
+         END IF
+*
+      ELSE IF( WANTVR ) THEN
+*
+*        Want right eigenvectors
+*        Copy Householder vectors to VR
+*
+         SIDE = 'R'
+         CALL DLACPY( 'L', N, N, A, LDA, VR, LDVR )
+*
+*        Generate orthogonal matrix in VR
+*        (Workspace: need 3*N-1, prefer 2*N+(N-1)*NB)
+*
+         CALL DORGHR( N, ILO, IHI, VR, LDVR, WORK( ITAU ), WORK( IWRK ),
+     $                LWORK-IWRK+1, IERR )
+*
+*        Perform QR iteration, accumulating Schur vectors in VR
+*        (Workspace: need N+1, prefer N+HSWORK (see comments) )
+*
+         IWRK = ITAU
+         CALL DHSEQR( 'S', 'V', N, ILO, IHI, A, LDA, WR, WI, VR, LDVR,
+     $                WORK( IWRK ), LWORK-IWRK+1, INFO )
+*
+      ELSE
+*
+*        Compute eigenvalues only
+*        (Workspace: need N+1, prefer N+HSWORK (see comments) )
+*
+         IWRK = ITAU
+         CALL DHSEQR( 'E', 'N', N, ILO, IHI, A, LDA, WR, WI, VR, LDVR,
+     $                WORK( IWRK ), LWORK-IWRK+1, INFO )
+      END IF
+*
+*     If INFO > 0 from DHSEQR, then quit
+*
+      IF( INFO.GT.0 )
+     $   GO TO 50
+*
+      IF( WANTVL .OR. WANTVR ) THEN
+*
+*        Compute left and/or right eigenvectors
+*        (Workspace: need 4*N)
+*
+         CALL DTREVC( SIDE, 'B', SELECT, N, A, LDA, VL, LDVL, VR, LDVR,
+     $                N, NOUT, WORK( IWRK ), IERR )
+      END IF
+*
+      IF( WANTVL ) THEN
+*
+*        Undo balancing of left eigenvectors
+*        (Workspace: need N)
+*
+         CALL DGEBAK( 'B', 'L', N, ILO, IHI, WORK( IBAL ), N, VL, LDVL,
+     $                IERR )
+*
+*        Normalize left eigenvectors and make largest component real
+*
+         DO 20 I = 1, N
+            IF( WI( I ).EQ.ZERO ) THEN
+               SCL = ONE / DNRM2( N, VL( 1, I ), 1 )
+               CALL DSCAL( N, SCL, VL( 1, I ), 1 )
+            ELSE IF( WI( I ).GT.ZERO ) THEN
+               SCL = ONE / DLAPY2( DNRM2( N, VL( 1, I ), 1 ),
+     $               DNRM2( N, VL( 1, I+1 ), 1 ) )
+               CALL DSCAL( N, SCL, VL( 1, I ), 1 )
+               CALL DSCAL( N, SCL, VL( 1, I+1 ), 1 )
+               DO 10 K = 1, N
+                  WORK( IWRK+K-1 ) = VL( K, I )**2 + VL( K, I+1 )**2
+   10          CONTINUE
+               K = IDAMAX( N, WORK( IWRK ), 1 )
+               CALL DLARTG( VL( K, I ), VL( K, I+1 ), CS, SN, R )
+               CALL DROT( N, VL( 1, I ), 1, VL( 1, I+1 ), 1, CS, SN )
+               VL( K, I+1 ) = ZERO
+            END IF
+   20    CONTINUE
+      END IF
+*
+      IF( WANTVR ) THEN
+*
+*        Undo balancing of right eigenvectors
+*        (Workspace: need N)
+*
+         CALL DGEBAK( 'B', 'R', N, ILO, IHI, WORK( IBAL ), N, VR, LDVR,
+     $                IERR )
+*
+*        Normalize right eigenvectors and make largest component real
+*
+         DO 40 I = 1, N
+            IF( WI( I ).EQ.ZERO ) THEN
+               SCL = ONE / DNRM2( N, VR( 1, I ), 1 )
+               CALL DSCAL( N, SCL, VR( 1, I ), 1 )
+            ELSE IF( WI( I ).GT.ZERO ) THEN
+               SCL = ONE / DLAPY2( DNRM2( N, VR( 1, I ), 1 ),
+     $               DNRM2( N, VR( 1, I+1 ), 1 ) )
+               CALL DSCAL( N, SCL, VR( 1, I ), 1 )
+               CALL DSCAL( N, SCL, VR( 1, I+1 ), 1 )
+               DO 30 K = 1, N
+                  WORK( IWRK+K-1 ) = VR( K, I )**2 + VR( K, I+1 )**2
+   30          CONTINUE
+               K = IDAMAX( N, WORK( IWRK ), 1 )
+               CALL DLARTG( VR( K, I ), VR( K, I+1 ), CS, SN, R )
+               CALL DROT( N, VR( 1, I ), 1, VR( 1, I+1 ), 1, CS, SN )
+               VR( K, I+1 ) = ZERO
+            END IF
+   40    CONTINUE
+      END IF
+*
+*     Undo scaling if necessary
+*
+   50 CONTINUE
+      IF( SCALEA ) THEN
+         CALL DLASCL( 'G', 0, 0, CSCALE, ANRM, N-INFO, 1, WR( INFO+1 ),
+     $                MAX( N-INFO, 1 ), IERR )
+         CALL DLASCL( 'G', 0, 0, CSCALE, ANRM, N-INFO, 1, WI( INFO+1 ),
+     $                MAX( N-INFO, 1 ), IERR )
+         IF( INFO.GT.0 ) THEN
+            CALL DLASCL( 'G', 0, 0, CSCALE, ANRM, ILO-1, 1, WR, N,
+     $                   IERR )
+            CALL DLASCL( 'G', 0, 0, CSCALE, ANRM, ILO-1, 1, WI, N,
+     $                   IERR )
+         END IF
+      END IF
+*
+      WORK( 1 ) = MAXWRK
+      RETURN
+*
+*     End of DGEEV
+*
+      END
diff --git a/costa/native/external/lapack/dgeevx.f b/costa/native/external/lapack/dgeevx.f
new file mode 100644
index 000000000..f6f173b23
--- /dev/null
+++ b/costa/native/external/lapack/dgeevx.f
@@ -0,0 +1,544 @@
+      SUBROUTINE DGEEVX( BALANC, JOBVL, JOBVR, SENSE, N, A, LDA, WR, WI,
+     $                   VL, LDVL, VR, LDVR, ILO, IHI, SCALE, ABNRM,
+     $                   RCONDE, RCONDV, WORK, LWORK, IWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          BALANC, JOBVL, JOBVR, SENSE
+      INTEGER            IHI, ILO, INFO, LDA, LDVL, LDVR, LWORK, N
+      DOUBLE PRECISION   ABNRM
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      DOUBLE PRECISION   A( LDA, * ), RCONDE( * ), RCONDV( * ),
+     $                   SCALE( * ), VL( LDVL, * ), VR( LDVR, * ),
+     $                   WI( * ), WORK( * ), WR( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DGEEVX computes for an N-by-N real nonsymmetric matrix A, the
+*  eigenvalues and, optionally, the left and/or right eigenvectors.
+*
+*  Optionally also, it computes a balancing transformation to improve
+*  the conditioning of the eigenvalues and eigenvectors (ILO, IHI,
+*  SCALE, and ABNRM), reciprocal condition numbers for the eigenvalues
+*  (RCONDE), and reciprocal condition numbers for the right
+*  eigenvectors (RCONDV).
+*
+*  The right eigenvector v(j) of A satisfies
+*                   A * v(j) = lambda(j) * v(j)
+*  where lambda(j) is its eigenvalue.
+*  The left eigenvector u(j) of A satisfies
+*                u(j)**H * A = lambda(j) * u(j)**H
+*  where u(j)**H denotes the conjugate transpose of u(j).
+*
+*  The computed eigenvectors are normalized to have Euclidean norm
+*  equal to 1 and largest component real.
+*
+*  Balancing a matrix means permuting the rows and columns to make it
+*  more nearly upper triangular, and applying a diagonal similarity
+*  transformation D * A * D**(-1), where D is a diagonal matrix, to
+*  make its rows and columns closer in norm and the condition numbers
+*  of its eigenvalues and eigenvectors smaller.  The computed
+*  reciprocal condition numbers correspond to the balanced matrix.
+*  Permuting rows and columns will not change the condition numbers
+*  (in exact arithmetic) but diagonal scaling will.  For further
+*  explanation of balancing, see section 4.10.2 of the LAPACK
+*  Users' Guide.
+*
+*  Arguments
+*  =========
+*
+*  BALANC  (input) CHARACTER*1
+*          Indicates how the input matrix should be diagonally scaled
+*          and/or permuted to improve the conditioning of its
+*          eigenvalues.
+*          = 'N': Do not diagonally scale or permute;
+*          = 'P': Perform permutations to make the matrix more nearly
+*                 upper triangular. Do not diagonally scale;
+*          = 'S': Diagonally scale the matrix, i.e. replace A by
+*                 D*A*D**(-1), where D is a diagonal matrix chosen
+*                 to make the rows and columns of A more equal in
+*                 norm. Do not permute;
+*          = 'B': Both diagonally scale and permute A.
+*
+*          Computed reciprocal condition numbers will be for the matrix
+*          after balancing and/or permuting. Permuting does not change
+*          condition numbers (in exact arithmetic), but balancing does.
+*
+*  JOBVL   (input) CHARACTER*1
+*          = 'N': left eigenvectors of A are not computed;
+*          = 'V': left eigenvectors of A are computed.
+*          If SENSE = 'E' or 'B', JOBVL must = 'V'.
+*
+*  JOBVR   (input) CHARACTER*1
+*          = 'N': right eigenvectors of A are not computed;
+*          = 'V': right eigenvectors of A are computed.
+*          If SENSE = 'E' or 'B', JOBVR must = 'V'.
+*
+*  SENSE   (input) CHARACTER*1
+*          Determines which reciprocal condition numbers are computed.
+*          = 'N': None are computed;
+*          = 'E': Computed for eigenvalues only;
+*          = 'V': Computed for right eigenvectors only;
+*          = 'B': Computed for eigenvalues and right eigenvectors.
+*
+*          If SENSE = 'E' or 'B', both left and right eigenvectors
+*          must also be computed (JOBVL = 'V' and JOBVR = 'V').
+*
+*  N       (input) INTEGER
+*          The order of the matrix A. N >= 0.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+*          On entry, the N-by-N matrix A.
+*          On exit, A has been overwritten.  If JOBVL = 'V' or
+*          JOBVR = 'V', A contains the real Schur form of the balanced
+*          version of the input matrix A.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  WR      (output) DOUBLE PRECISION array, dimension (N)
+*  WI      (output) DOUBLE PRECISION array, dimension (N)
+*          WR and WI contain the real and imaginary parts,
+*          respectively, of the computed eigenvalues.  Complex
+*          conjugate pairs of eigenvalues will appear consecutively
+*          with the eigenvalue having the positive imaginary part
+*          first.
+*
+*  VL      (output) DOUBLE PRECISION array, dimension (LDVL,N)
+*          If JOBVL = 'V', the left eigenvectors u(j) are stored one
+*          after another in the columns of VL, in the same order
+*          as their eigenvalues.
+*          If JOBVL = 'N', VL is not referenced.
+*          If the j-th eigenvalue is real, then u(j) = VL(:,j),
+*          the j-th column of VL.
+*          If the j-th and (j+1)-st eigenvalues form a complex
+*          conjugate pair, then u(j) = VL(:,j) + i*VL(:,j+1) and
+*          u(j+1) = VL(:,j) - i*VL(:,j+1).
+*
+*  LDVL    (input) INTEGER
+*          The leading dimension of the array VL.  LDVL >= 1; if
+*          JOBVL = 'V', LDVL >= N.
+*
+*  VR      (output) DOUBLE PRECISION array, dimension (LDVR,N)
+*          If JOBVR = 'V', the right eigenvectors v(j) are stored one
+*          after another in the columns of VR, in the same order
+*          as their eigenvalues.
+*          If JOBVR = 'N', VR is not referenced.
+*          If the j-th eigenvalue is real, then v(j) = VR(:,j),
+*          the j-th column of VR.
+*          If the j-th and (j+1)-st eigenvalues form a complex
+*          conjugate pair, then v(j) = VR(:,j) + i*VR(:,j+1) and
+*          v(j+1) = VR(:,j) - i*VR(:,j+1).
+*
+*  LDVR    (input) INTEGER
+*          The leading dimension of the array VR.  LDVR >= 1, and if
+*          JOBVR = 'V', LDVR >= N.
+*
+*  ILO,IHI (output) INTEGER
+*          ILO and IHI are integer values determined when A was
+*          balanced.  The balanced A(i,j) = 0 if I > J and
+*          J = 1,...,ILO-1 or I = IHI+1,...,N.
+*
+*  SCALE   (output) DOUBLE PRECISION array, dimension (N)
+*          Details of the permutations and scaling factors applied
+*          when balancing A.  If P(j) is the index of the row and column
+*          interchanged with row and column j, and D(j) is the scaling
+*          factor applied to row and column j, then
+*          SCALE(J) = P(J),    for J = 1,...,ILO-1
+*                   = D(J),    for J = ILO,...,IHI
+*                   = P(J)     for J = IHI+1,...,N.
+*          The order in which the interchanges are made is N to IHI+1,
+*          then 1 to ILO-1.
+*
+*  ABNRM   (output) DOUBLE PRECISION
+*          The one-norm of the balanced matrix (the maximum
+*          of the sum of absolute values of elements of any column).
+*
+*  RCONDE  (output) DOUBLE PRECISION array, dimension (N)
+*          RCONDE(j) is the reciprocal condition number of the j-th
+*          eigenvalue.
+*
+*  RCONDV  (output) DOUBLE PRECISION array, dimension (N)
+*          RCONDV(j) is the reciprocal condition number of the j-th
+*          right eigenvector.
+*
+*  WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.   If SENSE = 'N' or 'E',
+*          LWORK >= max(1,2*N), and if JOBVL = 'V' or JOBVR = 'V',
+*          LWORK >= 3*N.  If SENSE = 'V' or 'B', LWORK >= N*(N+6).
+*          For good performance, LWORK must generally be larger.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  IWORK   (workspace) INTEGER array, dimension (2*N-2)
+*          If SENSE = 'N' or 'E', not referenced.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          > 0:  if INFO = i, the QR algorithm failed to compute all the
+*                eigenvalues, and no eigenvectors or condition numbers
+*                have been computed; elements 1:ILO-1 and i+1:N of WR
+*                and WI contain eigenvalues which have converged.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY, SCALEA, WANTVL, WANTVR, WNTSNB, WNTSNE,
+     $                   WNTSNN, WNTSNV
+      CHARACTER          JOB, SIDE
+      INTEGER            HSWORK, I, ICOND, IERR, ITAU, IWRK, K, MAXB,
+     $                   MAXWRK, MINWRK, NOUT
+      DOUBLE PRECISION   ANRM, BIGNUM, CS, CSCALE, EPS, R, SCL, SMLNUM,
+     $                   SN
+*     ..
+*     .. Local Arrays ..
+      LOGICAL            SELECT( 1 )
+      DOUBLE PRECISION   DUM( 1 )
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DGEBAK, DGEBAL, DGEHRD, DHSEQR, DLACPY, DLARTG,
+     $                   DLASCL, DORGHR, DROT, DSCAL, DTREVC, DTRSNA,
+     $                   XERBLA
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            IDAMAX, ILAENV
+      DOUBLE PRECISION   DLAMCH, DLANGE, DLAPY2, DNRM2
+      EXTERNAL           LSAME, IDAMAX, ILAENV, DLAMCH, DLANGE, DLAPY2,
+     $                   DNRM2
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      LQUERY = ( LWORK.EQ.-1 )
+      WANTVL = LSAME( JOBVL, 'V' )
+      WANTVR = LSAME( JOBVR, 'V' )
+      WNTSNN = LSAME( SENSE, 'N' )
+      WNTSNE = LSAME( SENSE, 'E' )
+      WNTSNV = LSAME( SENSE, 'V' )
+      WNTSNB = LSAME( SENSE, 'B' )
+      IF( .NOT.( LSAME( BALANC, 'N' ) .OR. LSAME( BALANC,
+     $    'S' ) .OR. LSAME( BALANC, 'P' ) .OR. LSAME( BALANC, 'B' ) ) )
+     $     THEN
+         INFO = -1
+      ELSE IF( ( .NOT.WANTVL ) .AND. ( .NOT.LSAME( JOBVL, 'N' ) ) ) THEN
+         INFO = -2
+      ELSE IF( ( .NOT.WANTVR ) .AND. ( .NOT.LSAME( JOBVR, 'N' ) ) ) THEN
+         INFO = -3
+      ELSE IF( .NOT.( WNTSNN .OR. WNTSNE .OR. WNTSNB .OR. WNTSNV ) .OR.
+     $         ( ( WNTSNE .OR. WNTSNB ) .AND. .NOT.( WANTVL .AND.
+     $         WANTVR ) ) ) THEN
+         INFO = -4
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      ELSE IF( LDVL.LT.1 .OR. ( WANTVL .AND. LDVL.LT.N ) ) THEN
+         INFO = -11
+      ELSE IF( LDVR.LT.1 .OR. ( WANTVR .AND. LDVR.LT.N ) ) THEN
+         INFO = -13
+      END IF
+*
+*     Compute workspace
+*      (Note: Comments in the code beginning "Workspace:" describe the
+*       minimal amount of workspace needed at that point in the code,
+*       as well as the preferred amount for good performance.
+*       NB refers to the optimal block size for the immediately
+*       following subroutine, as returned by ILAENV.
+*       HSWORK refers to the workspace preferred by DHSEQR, as
+*       calculated below. HSWORK is computed assuming ILO=1 and IHI=N,
+*       the worst case.)
+*
+      MINWRK = 1
+      IF( INFO.EQ.0 .AND. ( LWORK.GE.1 .OR. LQUERY ) ) THEN
+         MAXWRK = N + N*ILAENV( 1, 'DGEHRD', ' ', N, 1, N, 0 )
+         IF( ( .NOT.WANTVL ) .AND. ( .NOT.WANTVR ) ) THEN
+            MINWRK = MAX( 1, 2*N )
+            IF( .NOT.WNTSNN )
+     $         MINWRK = MAX( MINWRK, N*N+6*N )
+            MAXB = MAX( ILAENV( 8, 'DHSEQR', 'SN', N, 1, N, -1 ), 2 )
+            IF( WNTSNN ) THEN
+               K = MIN( MAXB, N, MAX( 2, ILAENV( 4, 'DHSEQR', 'EN', N,
+     $             1, N, -1 ) ) )
+            ELSE
+               K = MIN( MAXB, N, MAX( 2, ILAENV( 4, 'DHSEQR', 'SN', N,
+     $             1, N, -1 ) ) )
+            END IF
+            HSWORK = MAX( K*( K+2 ), 2*N )
+            MAXWRK = MAX( MAXWRK, 1, HSWORK )
+            IF( .NOT.WNTSNN )
+     $         MAXWRK = MAX( MAXWRK, N*N+6*N )
+         ELSE
+            MINWRK = MAX( 1, 3*N )
+            IF( ( .NOT.WNTSNN ) .AND. ( .NOT.WNTSNE ) )
+     $         MINWRK = MAX( MINWRK, N*N+6*N )
+            MAXB = MAX( ILAENV( 8, 'DHSEQR', 'SN', N, 1, N, -1 ), 2 )
+            K = MIN( MAXB, N, MAX( 2, ILAENV( 4, 'DHSEQR', 'EN', N, 1,
+     $          N, -1 ) ) )
+            HSWORK = MAX( K*( K+2 ), 2*N )
+            MAXWRK = MAX( MAXWRK, 1, HSWORK )
+            MAXWRK = MAX( MAXWRK, N+( N-1 )*
+     $               ILAENV( 1, 'DORGHR', ' ', N, 1, N, -1 ) )
+            IF( ( .NOT.WNTSNN ) .AND. ( .NOT.WNTSNE ) )
+     $         MAXWRK = MAX( MAXWRK, N*N+6*N )
+            MAXWRK = MAX( MAXWRK, 3*N, 1 )
+         END IF
+         WORK( 1 ) = MAXWRK
+      END IF
+      IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN
+         INFO = -21
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DGEEVX', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Get machine constants
+*
+      EPS = DLAMCH( 'P' )
+      SMLNUM = DLAMCH( 'S' )
+      BIGNUM = ONE / SMLNUM
+      CALL DLABAD( SMLNUM, BIGNUM )
+      SMLNUM = SQRT( SMLNUM ) / EPS
+      BIGNUM = ONE / SMLNUM
+*
+*     Scale A if max element outside range [SMLNUM,BIGNUM]
+*
+      ICOND = 0
+      ANRM = DLANGE( 'M', N, N, A, LDA, DUM )
+      SCALEA = .FALSE.
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
+         SCALEA = .TRUE.
+         CSCALE = SMLNUM
+      ELSE IF( ANRM.GT.BIGNUM ) THEN
+         SCALEA = .TRUE.
+         CSCALE = BIGNUM
+      END IF
+      IF( SCALEA )
+     $   CALL DLASCL( 'G', 0, 0, ANRM, CSCALE, N, N, A, LDA, IERR )
+*
+*     Balance the matrix and compute ABNRM
+*
+      CALL DGEBAL( BALANC, N, A, LDA, ILO, IHI, SCALE, IERR )
+      ABNRM = DLANGE( '1', N, N, A, LDA, DUM )
+      IF( SCALEA ) THEN
+         DUM( 1 ) = ABNRM
+         CALL DLASCL( 'G', 0, 0, CSCALE, ANRM, 1, 1, DUM, 1, IERR )
+         ABNRM = DUM( 1 )
+      END IF
+*
+*     Reduce to upper Hessenberg form
+*     (Workspace: need 2*N, prefer N+N*NB)
+*
+      ITAU = 1
+      IWRK = ITAU + N
+      CALL DGEHRD( N, ILO, IHI, A, LDA, WORK( ITAU ), WORK( IWRK ),
+     $             LWORK-IWRK+1, IERR )
+*
+      IF( WANTVL ) THEN
+*
+*        Want left eigenvectors
+*        Copy Householder vectors to VL
+*
+         SIDE = 'L'
+         CALL DLACPY( 'L', N, N, A, LDA, VL, LDVL )
+*
+*        Generate orthogonal matrix in VL
+*        (Workspace: need 2*N-1, prefer N+(N-1)*NB)
+*
+         CALL DORGHR( N, ILO, IHI, VL, LDVL, WORK( ITAU ), WORK( IWRK ),
+     $                LWORK-IWRK+1, IERR )
+*
+*        Perform QR iteration, accumulating Schur vectors in VL
+*        (Workspace: need 1, prefer HSWORK (see comments) )
+*
+         IWRK = ITAU
+         CALL DHSEQR( 'S', 'V', N, ILO, IHI, A, LDA, WR, WI, VL, LDVL,
+     $                WORK( IWRK ), LWORK-IWRK+1, INFO )
+*
+         IF( WANTVR ) THEN
+*
+*           Want left and right eigenvectors
+*           Copy Schur vectors to VR
+*
+            SIDE = 'B'
+            CALL DLACPY( 'F', N, N, VL, LDVL, VR, LDVR )
+         END IF
+*
+      ELSE IF( WANTVR ) THEN
+*
+*        Want right eigenvectors
+*        Copy Householder vectors to VR
+*
+         SIDE = 'R'
+         CALL DLACPY( 'L', N, N, A, LDA, VR, LDVR )
+*
+*        Generate orthogonal matrix in VR
+*        (Workspace: need 2*N-1, prefer N+(N-1)*NB)
+*
+         CALL DORGHR( N, ILO, IHI, VR, LDVR, WORK( ITAU ), WORK( IWRK ),
+     $                LWORK-IWRK+1, IERR )
+*
+*        Perform QR iteration, accumulating Schur vectors in VR
+*        (Workspace: need 1, prefer HSWORK (see comments) )
+*
+         IWRK = ITAU
+         CALL DHSEQR( 'S', 'V', N, ILO, IHI, A, LDA, WR, WI, VR, LDVR,
+     $                WORK( IWRK ), LWORK-IWRK+1, INFO )
+*
+      ELSE
+*
+*        Compute eigenvalues only
+*        If condition numbers desired, compute Schur form
+*
+         IF( WNTSNN ) THEN
+            JOB = 'E'
+         ELSE
+            JOB = 'S'
+         END IF
+*
+*        (Workspace: need 1, prefer HSWORK (see comments) )
+*
+         IWRK = ITAU
+         CALL DHSEQR( JOB, 'N', N, ILO, IHI, A, LDA, WR, WI, VR, LDVR,
+     $                WORK( IWRK ), LWORK-IWRK+1, INFO )
+      END IF
+*
+*     If INFO > 0 from DHSEQR, then quit
+*
+      IF( INFO.GT.0 )
+     $   GO TO 50
+*
+      IF( WANTVL .OR. WANTVR ) THEN
+*
+*        Compute left and/or right eigenvectors
+*        (Workspace: need 3*N)
+*
+         CALL DTREVC( SIDE, 'B', SELECT, N, A, LDA, VL, LDVL, VR, LDVR,
+     $                N, NOUT, WORK( IWRK ), IERR )
+      END IF
+*
+*     Compute condition numbers if desired
+*     (Workspace: need N*N+6*N unless SENSE = 'E')
+*
+      IF( .NOT.WNTSNN ) THEN
+         CALL DTRSNA( SENSE, 'A', SELECT, N, A, LDA, VL, LDVL, VR, LDVR,
+     $                RCONDE, RCONDV, N, NOUT, WORK( IWRK ), N, IWORK,
+     $                ICOND )
+      END IF
+*
+      IF( WANTVL ) THEN
+*
+*        Undo balancing of left eigenvectors
+*
+         CALL DGEBAK( BALANC, 'L', N, ILO, IHI, SCALE, N, VL, LDVL,
+     $                IERR )
+*
+*        Normalize left eigenvectors and make largest component real
+*
+         DO 20 I = 1, N
+            IF( WI( I ).EQ.ZERO ) THEN
+               SCL = ONE / DNRM2( N, VL( 1, I ), 1 )
+               CALL DSCAL( N, SCL, VL( 1, I ), 1 )
+            ELSE IF( WI( I ).GT.ZERO ) THEN
+               SCL = ONE / DLAPY2( DNRM2( N, VL( 1, I ), 1 ),
+     $               DNRM2( N, VL( 1, I+1 ), 1 ) )
+               CALL DSCAL( N, SCL, VL( 1, I ), 1 )
+               CALL DSCAL( N, SCL, VL( 1, I+1 ), 1 )
+               DO 10 K = 1, N
+                  WORK( K ) = VL( K, I )**2 + VL( K, I+1 )**2
+   10          CONTINUE
+               K = IDAMAX( N, WORK, 1 )
+               CALL DLARTG( VL( K, I ), VL( K, I+1 ), CS, SN, R )
+               CALL DROT( N, VL( 1, I ), 1, VL( 1, I+1 ), 1, CS, SN )
+               VL( K, I+1 ) = ZERO
+            END IF
+   20    CONTINUE
+      END IF
+*
+      IF( WANTVR ) THEN
+*
+*        Undo balancing of right eigenvectors
+*
+         CALL DGEBAK( BALANC, 'R', N, ILO, IHI, SCALE, N, VR, LDVR,
+     $                IERR )
+*
+*        Normalize right eigenvectors and make largest component real
+*
+         DO 40 I = 1, N
+            IF( WI( I ).EQ.ZERO ) THEN
+               SCL = ONE / DNRM2( N, VR( 1, I ), 1 )
+               CALL DSCAL( N, SCL, VR( 1, I ), 1 )
+            ELSE IF( WI( I ).GT.ZERO ) THEN
+               SCL = ONE / DLAPY2( DNRM2( N, VR( 1, I ), 1 ),
+     $               DNRM2( N, VR( 1, I+1 ), 1 ) )
+               CALL DSCAL( N, SCL, VR( 1, I ), 1 )
+               CALL DSCAL( N, SCL, VR( 1, I+1 ), 1 )
+               DO 30 K = 1, N
+                  WORK( K ) = VR( K, I )**2 + VR( K, I+1 )**2
+   30          CONTINUE
+               K = IDAMAX( N, WORK, 1 )
+               CALL DLARTG( VR( K, I ), VR( K, I+1 ), CS, SN, R )
+               CALL DROT( N, VR( 1, I ), 1, VR( 1, I+1 ), 1, CS, SN )
+               VR( K, I+1 ) = ZERO
+            END IF
+   40    CONTINUE
+      END IF
+*
+*     Undo scaling if necessary
+*
+   50 CONTINUE
+      IF( SCALEA ) THEN
+         CALL DLASCL( 'G', 0, 0, CSCALE, ANRM, N-INFO, 1, WR( INFO+1 ),
+     $                MAX( N-INFO, 1 ), IERR )
+         CALL DLASCL( 'G', 0, 0, CSCALE, ANRM, N-INFO, 1, WI( INFO+1 ),
+     $                MAX( N-INFO, 1 ), IERR )
+         IF( INFO.EQ.0 ) THEN
+            IF( ( WNTSNV .OR. WNTSNB ) .AND. ICOND.EQ.0 )
+     $         CALL DLASCL( 'G', 0, 0, CSCALE, ANRM, N, 1, RCONDV, N,
+     $                      IERR )
+         ELSE
+            CALL DLASCL( 'G', 0, 0, CSCALE, ANRM, ILO-1, 1, WR, N,
+     $                   IERR )
+            CALL DLASCL( 'G', 0, 0, CSCALE, ANRM, ILO-1, 1, WI, N,
+     $                   IERR )
+         END IF
+      END IF
+*
+      WORK( 1 ) = MAXWRK
+      RETURN
+*
+*     End of DGEEVX
+*
+      END
diff --git a/costa/native/external/lapack/dgegs.f b/costa/native/external/lapack/dgegs.f
new file mode 100644
index 000000000..25a6c4b50
--- /dev/null
+++ b/costa/native/external/lapack/dgegs.f
@@ -0,0 +1,470 @@
+      SUBROUTINE DGEGS( JOBVSL, JOBVSR, N, A, LDA, B, LDB, ALPHAR,
+     $                  ALPHAI, BETA, VSL, LDVSL, VSR, LDVSR, WORK,
+     $                  LWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBVSL, JOBVSR
+      INTEGER            INFO, LDA, LDB, LDVSL, LDVSR, LWORK, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), ALPHAI( * ), ALPHAR( * ),
+     $                   B( LDB, * ), BETA( * ), VSL( LDVSL, * ),
+     $                   VSR( LDVSR, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  This routine is deprecated and has been replaced by routine DGGES.
+*
+*  DGEGS computes for a pair of N-by-N real nonsymmetric matrices A, B:
+*  the generalized eigenvalues (alphar +/- alphai*i, beta), the real
+*  Schur form (A, B), and optionally left and/or right Schur vectors
+*  (VSL and VSR).
+*
+*  (If only the generalized eigenvalues are needed, use the driver DGEGV
+*  instead.)
+*
+*  A generalized eigenvalue for a pair of matrices (A,B) is, roughly
+*  speaking, a scalar w or a ratio  alpha/beta = w, such that  A - w*B
+*  is singular.  It is usually represented as the pair (alpha,beta),
+*  as there is a reasonable interpretation for beta=0, and even for
+*  both being zero.  A good beginning reference is the book, "Matrix
+*  Computations", by G. Golub & C. van Loan (Johns Hopkins U. Press)
+*
+*  The (generalized) Schur form of a pair of matrices is the result of
+*  multiplying both matrices on the left by one orthogonal matrix and
+*  both on the right by another orthogonal matrix, these two orthogonal
+*  matrices being chosen so as to bring the pair of matrices into
+*  (real) Schur form.
+*
+*  A pair of matrices A, B is in generalized real Schur form if B is
+*  upper triangular with non-negative diagonal and A is block upper
+*  triangular with 1-by-1 and 2-by-2 blocks.  1-by-1 blocks correspond
+*  to real generalized eigenvalues, while 2-by-2 blocks of A will be
+*  "standardized" by making the corresponding elements of B have the
+*  form:
+*          [  a  0  ]
+*          [  0  b  ]
+*
+*  and the pair of corresponding 2-by-2 blocks in A and B will
+*  have a complex conjugate pair of generalized eigenvalues.
+*
+*  The left and right Schur vectors are the columns of VSL and VSR,
+*  respectively, where VSL and VSR are the orthogonal matrices
+*  which reduce A and B to Schur form:
+*
+*  Schur form of (A,B) = ( (VSL)**T A (VSR), (VSL)**T B (VSR) )
+*
+*  Arguments
+*  =========
+*
+*  JOBVSL  (input) CHARACTER*1
+*          = 'N':  do not compute the left Schur vectors;
+*          = 'V':  compute the left Schur vectors.
+*
+*  JOBVSR  (input) CHARACTER*1
+*          = 'N':  do not compute the right Schur vectors;
+*          = 'V':  compute the right Schur vectors.
+*
+*  N       (input) INTEGER
+*          The order of the matrices A, B, VSL, and VSR.  N >= 0.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA, N)
+*          On entry, the first of the pair of matrices whose generalized
+*          eigenvalues and (optionally) Schur vectors are to be
+*          computed.
+*          On exit, the generalized Schur form of A.
+*          Note: to avoid overflow, the Frobenius norm of the matrix
+*          A should be less than the overflow threshold.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of A.  LDA >= max(1,N).
+*
+*  B       (input/output) DOUBLE PRECISION array, dimension (LDB, N)
+*          On entry, the second of the pair of matrices whose
+*          generalized eigenvalues and (optionally) Schur vectors are
+*          to be computed.
+*          On exit, the generalized Schur form of B.
+*          Note: to avoid overflow, the Frobenius norm of the matrix
+*          B should be less than the overflow threshold.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of B.  LDB >= max(1,N).
+*
+*  ALPHAR  (output) DOUBLE PRECISION array, dimension (N)
+*  ALPHAI  (output) DOUBLE PRECISION array, dimension (N)
+*  BETA    (output) DOUBLE PRECISION array, dimension (N)
+*          On exit, (ALPHAR(j) + ALPHAI(j)*i)/BETA(j), j=1,...,N, will
+*          be the generalized eigenvalues.  ALPHAR(j) + ALPHAI(j)*i,
+*          j=1,...,N  and  BETA(j),j=1,...,N  are the diagonals of the
+*          complex Schur form (A,B) that would result if the 2-by-2
+*          diagonal blocks of the real Schur form of (A,B) were further
+*          reduced to triangular form using 2-by-2 complex unitary
+*          transformations.  If ALPHAI(j) is zero, then the j-th
+*          eigenvalue is real; if positive, then the j-th and (j+1)-st
+*          eigenvalues are a complex conjugate pair, with ALPHAI(j+1)
+*          negative.
+*
+*          Note: the quotients ALPHAR(j)/BETA(j) and ALPHAI(j)/BETA(j)
+*          may easily over- or underflow, and BETA(j) may even be zero.
+*          Thus, the user should avoid naively computing the ratio
+*          alpha/beta.  However, ALPHAR and ALPHAI will be always less
+*          than and usually comparable with norm(A) in magnitude, and
+*          BETA always less than and usually comparable with norm(B).
+*
+*  VSL     (output) DOUBLE PRECISION array, dimension (LDVSL,N)
+*          If JOBVSL = 'V', VSL will contain the left Schur vectors.
+*          (See "Purpose", above.)
+*          Not referenced if JOBVSL = 'N'.
+*
+*  LDVSL   (input) INTEGER
+*          The leading dimension of the matrix VSL. LDVSL >=1, and
+*          if JOBVSL = 'V', LDVSL >= N.
+*
+*  VSR     (output) DOUBLE PRECISION array, dimension (LDVSR,N)
+*          If JOBVSR = 'V', VSR will contain the right Schur vectors.
+*          (See "Purpose", above.)
+*          Not referenced if JOBVSR = 'N'.
+*
+*  LDVSR   (input) INTEGER
+*          The leading dimension of the matrix VSR. LDVSR >= 1, and
+*          if JOBVSR = 'V', LDVSR >= N.
+*
+*  WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.  LWORK >= max(1,4*N).
+*          For good performance, LWORK must generally be larger.
+*          To compute the optimal value of LWORK, call ILAENV to get
+*          blocksizes (for DGEQRF, DORMQR, and DORGQR.)  Then compute:
+*          NB  -- MAX of the blocksizes for DGEQRF, DORMQR, and DORGQR
+*          The optimal LWORK is  2*N + N*(NB+1).
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          = 1,...,N:
+*                The QZ iteration failed.  (A,B) are not in Schur
+*                form, but ALPHAR(j), ALPHAI(j), and BETA(j) should
+*                be correct for j=INFO+1,...,N.
+*          > N:  errors that usually indicate LAPACK problems:
+*                =N+1: error return from DGGBAL
+*                =N+2: error return from DGEQRF
+*                =N+3: error return from DORMQR
+*                =N+4: error return from DORGQR
+*                =N+5: error return from DGGHRD
+*                =N+6: error return from DHGEQZ (other than failed
+*                                                iteration)
+*                =N+7: error return from DGGBAK (computing VSL)
+*                =N+8: error return from DGGBAK (computing VSR)
+*                =N+9: error return from DLASCL (various places)
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            ILASCL, ILBSCL, ILVSL, ILVSR, LQUERY
+      INTEGER            ICOLS, IHI, IINFO, IJOBVL, IJOBVR, ILEFT, ILO,
+     $                   IRIGHT, IROWS, ITAU, IWORK, LOPT, LWKMIN,
+     $                   LWKOPT, NB, NB1, NB2, NB3
+      DOUBLE PRECISION   ANRM, ANRMTO, BIGNUM, BNRM, BNRMTO, EPS,
+     $                   SAFMIN, SMLNUM
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DGEQRF, DGGBAK, DGGBAL, DGGHRD, DHGEQZ, DLACPY,
+     $                   DLASCL, DLASET, DORGQR, DORMQR, XERBLA
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      DOUBLE PRECISION   DLAMCH, DLANGE
+      EXTERNAL           LSAME, ILAENV, DLAMCH, DLANGE
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          INT, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode the input arguments
+*
+      IF( LSAME( JOBVSL, 'N' ) ) THEN
+         IJOBVL = 1
+         ILVSL = .FALSE.
+      ELSE IF( LSAME( JOBVSL, 'V' ) ) THEN
+         IJOBVL = 2
+         ILVSL = .TRUE.
+      ELSE
+         IJOBVL = -1
+         ILVSL = .FALSE.
+      END IF
+*
+      IF( LSAME( JOBVSR, 'N' ) ) THEN
+         IJOBVR = 1
+         ILVSR = .FALSE.
+      ELSE IF( LSAME( JOBVSR, 'V' ) ) THEN
+         IJOBVR = 2
+         ILVSR = .TRUE.
+      ELSE
+         IJOBVR = -1
+         ILVSR = .FALSE.
+      END IF
+*
+*     Test the input arguments
+*
+      LWKMIN = MAX( 4*N, 1 )
+      LWKOPT = LWKMIN
+      WORK( 1 ) = LWKOPT
+      LQUERY = ( LWORK.EQ.-1 )
+      INFO = 0
+      IF( IJOBVL.LE.0 ) THEN
+         INFO = -1
+      ELSE IF( IJOBVR.LE.0 ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      ELSE IF( LDVSL.LT.1 .OR. ( ILVSL .AND. LDVSL.LT.N ) ) THEN
+         INFO = -12
+      ELSE IF( LDVSR.LT.1 .OR. ( ILVSR .AND. LDVSR.LT.N ) ) THEN
+         INFO = -14
+      ELSE IF( LWORK.LT.LWKMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -16
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         NB1 = ILAENV( 1, 'DGEQRF', ' ', N, N, -1, -1 )
+         NB2 = ILAENV( 1, 'DORMQR', ' ', N, N, N, -1 )
+         NB3 = ILAENV( 1, 'DORGQR', ' ', N, N, N, -1 )
+         NB = MAX( NB1, NB2, NB3 )
+         LOPT = 2*N + N*( NB+1 )
+         WORK( 1 ) = LOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DGEGS ', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Get machine constants
+*
+      EPS = DLAMCH( 'E' )*DLAMCH( 'B' )
+      SAFMIN = DLAMCH( 'S' )
+      SMLNUM = N*SAFMIN / EPS
+      BIGNUM = ONE / SMLNUM
+*
+*     Scale A if max element outside range [SMLNUM,BIGNUM]
+*
+      ANRM = DLANGE( 'M', N, N, A, LDA, WORK )
+      ILASCL = .FALSE.
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
+         ANRMTO = SMLNUM
+         ILASCL = .TRUE.
+      ELSE IF( ANRM.GT.BIGNUM ) THEN
+         ANRMTO = BIGNUM
+         ILASCL = .TRUE.
+      END IF
+*
+      IF( ILASCL ) THEN
+         CALL DLASCL( 'G', -1, -1, ANRM, ANRMTO, N, N, A, LDA, IINFO )
+         IF( IINFO.NE.0 ) THEN
+            INFO = N + 9
+            RETURN
+         END IF
+      END IF
+*
+*     Scale B if max element outside range [SMLNUM,BIGNUM]
+*
+      BNRM = DLANGE( 'M', N, N, B, LDB, WORK )
+      ILBSCL = .FALSE.
+      IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN
+         BNRMTO = SMLNUM
+         ILBSCL = .TRUE.
+      ELSE IF( BNRM.GT.BIGNUM ) THEN
+         BNRMTO = BIGNUM
+         ILBSCL = .TRUE.
+      END IF
+*
+      IF( ILBSCL ) THEN
+         CALL DLASCL( 'G', -1, -1, BNRM, BNRMTO, N, N, B, LDB, IINFO )
+         IF( IINFO.NE.0 ) THEN
+            INFO = N + 9
+            RETURN
+         END IF
+      END IF
+*
+*     Permute the matrix to make it more nearly triangular
+*     Workspace layout:  (2*N words -- "work..." not actually used)
+*        left_permutation, right_permutation, work...
+*
+      ILEFT = 1
+      IRIGHT = N + 1
+      IWORK = IRIGHT + N
+      CALL DGGBAL( 'P', N, A, LDA, B, LDB, ILO, IHI, WORK( ILEFT ),
+     $             WORK( IRIGHT ), WORK( IWORK ), IINFO )
+      IF( IINFO.NE.0 ) THEN
+         INFO = N + 1
+         GO TO 10
+      END IF
+*
+*     Reduce B to triangular form, and initialize VSL and/or VSR
+*     Workspace layout:  ("work..." must have at least N words)
+*        left_permutation, right_permutation, tau, work...
+*
+      IROWS = IHI + 1 - ILO
+      ICOLS = N + 1 - ILO
+      ITAU = IWORK
+      IWORK = ITAU + IROWS
+      CALL DGEQRF( IROWS, ICOLS, B( ILO, ILO ), LDB, WORK( ITAU ),
+     $             WORK( IWORK ), LWORK+1-IWORK, IINFO )
+      IF( IINFO.GE.0 )
+     $   LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 )
+      IF( IINFO.NE.0 ) THEN
+         INFO = N + 2
+         GO TO 10
+      END IF
+*
+      CALL DORMQR( 'L', 'T', IROWS, ICOLS, IROWS, B( ILO, ILO ), LDB,
+     $             WORK( ITAU ), A( ILO, ILO ), LDA, WORK( IWORK ),
+     $             LWORK+1-IWORK, IINFO )
+      IF( IINFO.GE.0 )
+     $   LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 )
+      IF( IINFO.NE.0 ) THEN
+         INFO = N + 3
+         GO TO 10
+      END IF
+*
+      IF( ILVSL ) THEN
+         CALL DLASET( 'Full', N, N, ZERO, ONE, VSL, LDVSL )
+         CALL DLACPY( 'L', IROWS-1, IROWS-1, B( ILO+1, ILO ), LDB,
+     $                VSL( ILO+1, ILO ), LDVSL )
+         CALL DORGQR( IROWS, IROWS, IROWS, VSL( ILO, ILO ), LDVSL,
+     $                WORK( ITAU ), WORK( IWORK ), LWORK+1-IWORK,
+     $                IINFO )
+         IF( IINFO.GE.0 )
+     $      LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 )
+         IF( IINFO.NE.0 ) THEN
+            INFO = N + 4
+            GO TO 10
+         END IF
+      END IF
+*
+      IF( ILVSR )
+     $   CALL DLASET( 'Full', N, N, ZERO, ONE, VSR, LDVSR )
+*
+*     Reduce to generalized Hessenberg form
+*
+      CALL DGGHRD( JOBVSL, JOBVSR, N, ILO, IHI, A, LDA, B, LDB, VSL,
+     $             LDVSL, VSR, LDVSR, IINFO )
+      IF( IINFO.NE.0 ) THEN
+         INFO = N + 5
+         GO TO 10
+      END IF
+*
+*     Perform QZ algorithm, computing Schur vectors if desired
+*     Workspace layout:  ("work..." must have at least 1 word)
+*        left_permutation, right_permutation, work...
+*
+      IWORK = ITAU
+      CALL DHGEQZ( 'S', JOBVSL, JOBVSR, N, ILO, IHI, A, LDA, B, LDB,
+     $             ALPHAR, ALPHAI, BETA, VSL, LDVSL, VSR, LDVSR,
+     $             WORK( IWORK ), LWORK+1-IWORK, IINFO )
+      IF( IINFO.GE.0 )
+     $   LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 )
+      IF( IINFO.NE.0 ) THEN
+         IF( IINFO.GT.0 .AND. IINFO.LE.N ) THEN
+            INFO = IINFO
+         ELSE IF( IINFO.GT.N .AND. IINFO.LE.2*N ) THEN
+            INFO = IINFO - N
+         ELSE
+            INFO = N + 6
+         END IF
+         GO TO 10
+      END IF
+*
+*     Apply permutation to VSL and VSR
+*
+      IF( ILVSL ) THEN
+         CALL DGGBAK( 'P', 'L', N, ILO, IHI, WORK( ILEFT ),
+     $                WORK( IRIGHT ), N, VSL, LDVSL, IINFO )
+         IF( IINFO.NE.0 ) THEN
+            INFO = N + 7
+            GO TO 10
+         END IF
+      END IF
+      IF( ILVSR ) THEN
+         CALL DGGBAK( 'P', 'R', N, ILO, IHI, WORK( ILEFT ),
+     $                WORK( IRIGHT ), N, VSR, LDVSR, IINFO )
+         IF( IINFO.NE.0 ) THEN
+            INFO = N + 8
+            GO TO 10
+         END IF
+      END IF
+*
+*     Undo scaling
+*
+      IF( ILASCL ) THEN
+         CALL DLASCL( 'H', -1, -1, ANRMTO, ANRM, N, N, A, LDA, IINFO )
+         IF( IINFO.NE.0 ) THEN
+            INFO = N + 9
+            RETURN
+         END IF
+         CALL DLASCL( 'G', -1, -1, ANRMTO, ANRM, N, 1, ALPHAR, N,
+     $                IINFO )
+         IF( IINFO.NE.0 ) THEN
+            INFO = N + 9
+            RETURN
+         END IF
+         CALL DLASCL( 'G', -1, -1, ANRMTO, ANRM, N, 1, ALPHAI, N,
+     $                IINFO )
+         IF( IINFO.NE.0 ) THEN
+            INFO = N + 9
+            RETURN
+         END IF
+      END IF
+*
+      IF( ILBSCL ) THEN
+         CALL DLASCL( 'U', -1, -1, BNRMTO, BNRM, N, N, B, LDB, IINFO )
+         IF( IINFO.NE.0 ) THEN
+            INFO = N + 9
+            RETURN
+         END IF
+         CALL DLASCL( 'G', -1, -1, BNRMTO, BNRM, N, 1, BETA, N, IINFO )
+         IF( IINFO.NE.0 ) THEN
+            INFO = N + 9
+            RETURN
+         END IF
+      END IF
+*
+   10 CONTINUE
+      WORK( 1 ) = LWKOPT
+*
+      RETURN
+*
+*     End of DGEGS
+*
+      END
diff --git a/costa/native/external/lapack/dgegv.f b/costa/native/external/lapack/dgegv.f
new file mode 100644
index 000000000..67e1bba94
--- /dev/null
+++ b/costa/native/external/lapack/dgegv.f
@@ -0,0 +1,641 @@
+      SUBROUTINE DGEGV( JOBVL, JOBVR, N, A, LDA, B, LDB, ALPHAR, ALPHAI,
+     $                  BETA, VL, LDVL, VR, LDVR, WORK, LWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBVL, JOBVR
+      INTEGER            INFO, LDA, LDB, LDVL, LDVR, LWORK, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), ALPHAI( * ), ALPHAR( * ),
+     $                   B( LDB, * ), BETA( * ), VL( LDVL, * ),
+     $                   VR( LDVR, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  This routine is deprecated and has been replaced by routine DGGEV.
+*
+*  DGEGV computes for a pair of n-by-n real nonsymmetric matrices A and
+*  B, the generalized eigenvalues (alphar +/- alphai*i, beta), and
+*  optionally, the left and/or right generalized eigenvectors (VL and
+*  VR).
+*
+*  A generalized eigenvalue for a pair of matrices (A,B) is, roughly
+*  speaking, a scalar w or a ratio  alpha/beta = w, such that  A - w*B
+*  is singular.  It is usually represented as the pair (alpha,beta),
+*  as there is a reasonable interpretation for beta=0, and even for
+*  both being zero.  A good beginning reference is the book, "Matrix
+*  Computations", by G. Golub & C. van Loan (Johns Hopkins U. Press)
+*
+*  A right generalized eigenvector corresponding to a generalized
+*  eigenvalue  w  for a pair of matrices (A,B) is a vector  r  such
+*  that  (A - w B) r = 0 .  A left generalized eigenvector is a vector
+*  l such that l**H * (A - w B) = 0, where l**H is the
+*  conjugate-transpose of l.
+*
+*  Note: this routine performs "full balancing" on A and B -- see
+*  "Further Details", below.
+*
+*  Arguments
+*  =========
+*
+*  JOBVL   (input) CHARACTER*1
+*          = 'N':  do not compute the left generalized eigenvectors;
+*          = 'V':  compute the left generalized eigenvectors.
+*
+*  JOBVR   (input) CHARACTER*1
+*          = 'N':  do not compute the right generalized eigenvectors;
+*          = 'V':  compute the right generalized eigenvectors.
+*
+*  N       (input) INTEGER
+*          The order of the matrices A, B, VL, and VR.  N >= 0.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA, N)
+*          On entry, the first of the pair of matrices whose
+*          generalized eigenvalues and (optionally) generalized
+*          eigenvectors are to be computed.
+*          On exit, the contents will have been destroyed.  (For a
+*          description of the contents of A on exit, see "Further
+*          Details", below.)
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of A.  LDA >= max(1,N).
+*
+*  B       (input/output) DOUBLE PRECISION array, dimension (LDB, N)
+*          On entry, the second of the pair of matrices whose
+*          generalized eigenvalues and (optionally) generalized
+*          eigenvectors are to be computed.
+*          On exit, the contents will have been destroyed.  (For a
+*          description of the contents of B on exit, see "Further
+*          Details", below.)
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of B.  LDB >= max(1,N).
+*
+*  ALPHAR  (output) DOUBLE PRECISION array, dimension (N)
+*  ALPHAI  (output) DOUBLE PRECISION array, dimension (N)
+*  BETA    (output) DOUBLE PRECISION array, dimension (N)
+*          On exit, (ALPHAR(j) + ALPHAI(j)*i)/BETA(j), j=1,...,N, will
+*          be the generalized eigenvalues.  If ALPHAI(j) is zero, then
+*          the j-th eigenvalue is real; if positive, then the j-th and
+*          (j+1)-st eigenvalues are a complex conjugate pair, with
+*          ALPHAI(j+1) negative.
+*
+*          Note: the quotients ALPHAR(j)/BETA(j) and ALPHAI(j)/BETA(j)
+*          may easily over- or underflow, and BETA(j) may even be zero.
+*          Thus, the user should avoid naively computing the ratio
+*          alpha/beta.  However, ALPHAR and ALPHAI will be always less
+*          than and usually comparable with norm(A) in magnitude, and
+*          BETA always less than and usually comparable with norm(B).
+*
+*  VL      (output) DOUBLE PRECISION array, dimension (LDVL,N)
+*          If JOBVL = 'V', the left generalized eigenvectors.  (See
+*          "Purpose", above.)  Real eigenvectors take one column,
+*          complex take two columns, the first for the real part and
+*          the second for the imaginary part.  Complex eigenvectors
+*          correspond to an eigenvalue with positive imaginary part.
+*          Each eigenvector will be scaled so the largest component
+*          will have abs(real part) + abs(imag. part) = 1, *except*
+*          that for eigenvalues with alpha=beta=0, a zero vector will
+*          be returned as the corresponding eigenvector.
+*          Not referenced if JOBVL = 'N'.
+*
+*  LDVL    (input) INTEGER
+*          The leading dimension of the matrix VL. LDVL >= 1, and
+*          if JOBVL = 'V', LDVL >= N.
+*
+*  VR      (output) DOUBLE PRECISION array, dimension (LDVR,N)
+*          If JOBVR = 'V', the right generalized eigenvectors.  (See
+*          "Purpose", above.)  Real eigenvectors take one column,
+*          complex take two columns, the first for the real part and
+*          the second for the imaginary part.  Complex eigenvectors
+*          correspond to an eigenvalue with positive imaginary part.
+*          Each eigenvector will be scaled so the largest component
+*          will have abs(real part) + abs(imag. part) = 1, *except*
+*          that for eigenvalues with alpha=beta=0, a zero vector will
+*          be returned as the corresponding eigenvector.
+*          Not referenced if JOBVR = 'N'.
+*
+*  LDVR    (input) INTEGER
+*          The leading dimension of the matrix VR. LDVR >= 1, and
+*          if JOBVR = 'V', LDVR >= N.
+*
+*  WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.  LWORK >= max(1,8*N).
+*          For good performance, LWORK must generally be larger.
+*          To compute the optimal value of LWORK, call ILAENV to get
+*          blocksizes (for DGEQRF, DORMQR, and DORGQR.)  Then compute:
+*          NB  -- MAX of the blocksizes for DGEQRF, DORMQR, and DORGQR;
+*          The optimal LWORK is:
+*              2*N + MAX( 6*N, N*(NB+1) ).
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          = 1,...,N:
+*                The QZ iteration failed.  No eigenvectors have been
+*                calculated, but ALPHAR(j), ALPHAI(j), and BETA(j)
+*                should be correct for j=INFO+1,...,N.
+*          > N:  errors that usually indicate LAPACK problems:
+*                =N+1: error return from DGGBAL
+*                =N+2: error return from DGEQRF
+*                =N+3: error return from DORMQR
+*                =N+4: error return from DORGQR
+*                =N+5: error return from DGGHRD
+*                =N+6: error return from DHGEQZ (other than failed
+*                                                iteration)
+*                =N+7: error return from DTGEVC
+*                =N+8: error return from DGGBAK (computing VL)
+*                =N+9: error return from DGGBAK (computing VR)
+*                =N+10: error return from DLASCL (various calls)
+*
+*  Further Details
+*  ===============
+*
+*  Balancing
+*  ---------
+*
+*  This driver calls DGGBAL to both permute and scale rows and columns
+*  of A and B.  The permutations PL and PR are chosen so that PL*A*PR
+*  and PL*B*R will be upper triangular except for the diagonal blocks
+*  A(i:j,i:j) and B(i:j,i:j), with i and j as close together as
+*  possible.  The diagonal scaling matrices DL and DR are chosen so
+*  that the pair  DL*PL*A*PR*DR, DL*PL*B*PR*DR have elements close to
+*  one (except for the elements that start out zero.)
+*
+*  After the eigenvalues and eigenvectors of the balanced matrices
+*  have been computed, DGGBAK transforms the eigenvectors back to what
+*  they would have been (in perfect arithmetic) if they had not been
+*  balanced.
+*
+*  Contents of A and B on Exit
+*  -------- -- - --- - -- ----
+*
+*  If any eigenvectors are computed (either JOBVL='V' or JOBVR='V' or
+*  both), then on exit the arrays A and B will contain the real Schur
+*  form[*] of the "balanced" versions of A and B.  If no eigenvectors
+*  are computed, then only the diagonal blocks will be correct.
+*
+*  [*] See DHGEQZ, DGEGS, or read the book "Matrix Computations",
+*      by Golub & van Loan, pub. by Johns Hopkins U. Press.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            ILIMIT, ILV, ILVL, ILVR, LQUERY
+      CHARACTER          CHTEMP
+      INTEGER            ICOLS, IHI, IINFO, IJOBVL, IJOBVR, ILEFT, ILO,
+     $                   IN, IRIGHT, IROWS, ITAU, IWORK, JC, JR, LOPT,
+     $                   LWKMIN, LWKOPT, NB, NB1, NB2, NB3
+      DOUBLE PRECISION   ABSAI, ABSAR, ABSB, ANRM, ANRM1, ANRM2, BNRM,
+     $                   BNRM1, BNRM2, EPS, ONEPLS, SAFMAX, SAFMIN,
+     $                   SALFAI, SALFAR, SBETA, SCALE, TEMP
+*     ..
+*     .. Local Arrays ..
+      LOGICAL            LDUMMA( 1 )
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DGEQRF, DGGBAK, DGGBAL, DGGHRD, DHGEQZ, DLACPY,
+     $                   DLASCL, DLASET, DORGQR, DORMQR, DTGEVC, XERBLA
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      DOUBLE PRECISION   DLAMCH, DLANGE
+      EXTERNAL           LSAME, ILAENV, DLAMCH, DLANGE
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, INT, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode the input arguments
+*
+      IF( LSAME( JOBVL, 'N' ) ) THEN
+         IJOBVL = 1
+         ILVL = .FALSE.
+      ELSE IF( LSAME( JOBVL, 'V' ) ) THEN
+         IJOBVL = 2
+         ILVL = .TRUE.
+      ELSE
+         IJOBVL = -1
+         ILVL = .FALSE.
+      END IF
+*
+      IF( LSAME( JOBVR, 'N' ) ) THEN
+         IJOBVR = 1
+         ILVR = .FALSE.
+      ELSE IF( LSAME( JOBVR, 'V' ) ) THEN
+         IJOBVR = 2
+         ILVR = .TRUE.
+      ELSE
+         IJOBVR = -1
+         ILVR = .FALSE.
+      END IF
+      ILV = ILVL .OR. ILVR
+*
+*     Test the input arguments
+*
+      LWKMIN = MAX( 8*N, 1 )
+      LWKOPT = LWKMIN
+      WORK( 1 ) = LWKOPT
+      LQUERY = ( LWORK.EQ.-1 )
+      INFO = 0
+      IF( IJOBVL.LE.0 ) THEN
+         INFO = -1
+      ELSE IF( IJOBVR.LE.0 ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      ELSE IF( LDVL.LT.1 .OR. ( ILVL .AND. LDVL.LT.N ) ) THEN
+         INFO = -12
+      ELSE IF( LDVR.LT.1 .OR. ( ILVR .AND. LDVR.LT.N ) ) THEN
+         INFO = -14
+      ELSE IF( LWORK.LT.LWKMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -16
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         NB1 = ILAENV( 1, 'DGEQRF', ' ', N, N, -1, -1 )
+         NB2 = ILAENV( 1, 'DORMQR', ' ', N, N, N, -1 )
+         NB3 = ILAENV( 1, 'DORGQR', ' ', N, N, N, -1 )
+         NB = MAX( NB1, NB2, NB3 )
+         LOPT = 2*N + MAX( 6*N, N*( NB+1 ) )
+         WORK( 1 ) = LOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DGEGV ', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Get machine constants
+*
+      EPS = DLAMCH( 'E' )*DLAMCH( 'B' )
+      SAFMIN = DLAMCH( 'S' )
+      SAFMIN = SAFMIN + SAFMIN
+      SAFMAX = ONE / SAFMIN
+      ONEPLS = ONE + ( 4*EPS )
+*
+*     Scale A
+*
+      ANRM = DLANGE( 'M', N, N, A, LDA, WORK )
+      ANRM1 = ANRM
+      ANRM2 = ONE
+      IF( ANRM.LT.ONE ) THEN
+         IF( SAFMAX*ANRM.LT.ONE ) THEN
+            ANRM1 = SAFMIN
+            ANRM2 = SAFMAX*ANRM
+         END IF
+      END IF
+*
+      IF( ANRM.GT.ZERO ) THEN
+         CALL DLASCL( 'G', -1, -1, ANRM, ONE, N, N, A, LDA, IINFO )
+         IF( IINFO.NE.0 ) THEN
+            INFO = N + 10
+            RETURN
+         END IF
+      END IF
+*
+*     Scale B
+*
+      BNRM = DLANGE( 'M', N, N, B, LDB, WORK )
+      BNRM1 = BNRM
+      BNRM2 = ONE
+      IF( BNRM.LT.ONE ) THEN
+         IF( SAFMAX*BNRM.LT.ONE ) THEN
+            BNRM1 = SAFMIN
+            BNRM2 = SAFMAX*BNRM
+         END IF
+      END IF
+*
+      IF( BNRM.GT.ZERO ) THEN
+         CALL DLASCL( 'G', -1, -1, BNRM, ONE, N, N, B, LDB, IINFO )
+         IF( IINFO.NE.0 ) THEN
+            INFO = N + 10
+            RETURN
+         END IF
+      END IF
+*
+*     Permute the matrix to make it more nearly triangular
+*     Workspace layout:  (8*N words -- "work" requires 6*N words)
+*        left_permutation, right_permutation, work...
+*
+      ILEFT = 1
+      IRIGHT = N + 1
+      IWORK = IRIGHT + N
+      CALL DGGBAL( 'P', N, A, LDA, B, LDB, ILO, IHI, WORK( ILEFT ),
+     $             WORK( IRIGHT ), WORK( IWORK ), IINFO )
+      IF( IINFO.NE.0 ) THEN
+         INFO = N + 1
+         GO TO 120
+      END IF
+*
+*     Reduce B to triangular form, and initialize VL and/or VR
+*     Workspace layout:  ("work..." must have at least N words)
+*        left_permutation, right_permutation, tau, work...
+*
+      IROWS = IHI + 1 - ILO
+      IF( ILV ) THEN
+         ICOLS = N + 1 - ILO
+      ELSE
+         ICOLS = IROWS
+      END IF
+      ITAU = IWORK
+      IWORK = ITAU + IROWS
+      CALL DGEQRF( IROWS, ICOLS, B( ILO, ILO ), LDB, WORK( ITAU ),
+     $             WORK( IWORK ), LWORK+1-IWORK, IINFO )
+      IF( IINFO.GE.0 )
+     $   LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 )
+      IF( IINFO.NE.0 ) THEN
+         INFO = N + 2
+         GO TO 120
+      END IF
+*
+      CALL DORMQR( 'L', 'T', IROWS, ICOLS, IROWS, B( ILO, ILO ), LDB,
+     $             WORK( ITAU ), A( ILO, ILO ), LDA, WORK( IWORK ),
+     $             LWORK+1-IWORK, IINFO )
+      IF( IINFO.GE.0 )
+     $   LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 )
+      IF( IINFO.NE.0 ) THEN
+         INFO = N + 3
+         GO TO 120
+      END IF
+*
+      IF( ILVL ) THEN
+         CALL DLASET( 'Full', N, N, ZERO, ONE, VL, LDVL )
+         CALL DLACPY( 'L', IROWS-1, IROWS-1, B( ILO+1, ILO ), LDB,
+     $                VL( ILO+1, ILO ), LDVL )
+         CALL DORGQR( IROWS, IROWS, IROWS, VL( ILO, ILO ), LDVL,
+     $                WORK( ITAU ), WORK( IWORK ), LWORK+1-IWORK,
+     $                IINFO )
+         IF( IINFO.GE.0 )
+     $      LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 )
+         IF( IINFO.NE.0 ) THEN
+            INFO = N + 4
+            GO TO 120
+         END IF
+      END IF
+*
+      IF( ILVR )
+     $   CALL DLASET( 'Full', N, N, ZERO, ONE, VR, LDVR )
+*
+*     Reduce to generalized Hessenberg form
+*
+      IF( ILV ) THEN
+*
+*        Eigenvectors requested -- work on whole matrix.
+*
+         CALL DGGHRD( JOBVL, JOBVR, N, ILO, IHI, A, LDA, B, LDB, VL,
+     $                LDVL, VR, LDVR, IINFO )
+      ELSE
+         CALL DGGHRD( 'N', 'N', IROWS, 1, IROWS, A( ILO, ILO ), LDA,
+     $                B( ILO, ILO ), LDB, VL, LDVL, VR, LDVR, IINFO )
+      END IF
+      IF( IINFO.NE.0 ) THEN
+         INFO = N + 5
+         GO TO 120
+      END IF
+*
+*     Perform QZ algorithm
+*     Workspace layout:  ("work..." must have at least 1 word)
+*        left_permutation, right_permutation, work...
+*
+      IWORK = ITAU
+      IF( ILV ) THEN
+         CHTEMP = 'S'
+      ELSE
+         CHTEMP = 'E'
+      END IF
+      CALL DHGEQZ( CHTEMP, JOBVL, JOBVR, N, ILO, IHI, A, LDA, B, LDB,
+     $             ALPHAR, ALPHAI, BETA, VL, LDVL, VR, LDVR,
+     $             WORK( IWORK ), LWORK+1-IWORK, IINFO )
+      IF( IINFO.GE.0 )
+     $   LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 )
+      IF( IINFO.NE.0 ) THEN
+         IF( IINFO.GT.0 .AND. IINFO.LE.N ) THEN
+            INFO = IINFO
+         ELSE IF( IINFO.GT.N .AND. IINFO.LE.2*N ) THEN
+            INFO = IINFO - N
+         ELSE
+            INFO = N + 6
+         END IF
+         GO TO 120
+      END IF
+*
+      IF( ILV ) THEN
+*
+*        Compute Eigenvectors  (DTGEVC requires 6*N words of workspace)
+*
+         IF( ILVL ) THEN
+            IF( ILVR ) THEN
+               CHTEMP = 'B'
+            ELSE
+               CHTEMP = 'L'
+            END IF
+         ELSE
+            CHTEMP = 'R'
+         END IF
+*
+         CALL DTGEVC( CHTEMP, 'B', LDUMMA, N, A, LDA, B, LDB, VL, LDVL,
+     $                VR, LDVR, N, IN, WORK( IWORK ), IINFO )
+         IF( IINFO.NE.0 ) THEN
+            INFO = N + 7
+            GO TO 120
+         END IF
+*
+*        Undo balancing on VL and VR, rescale
+*
+         IF( ILVL ) THEN
+            CALL DGGBAK( 'P', 'L', N, ILO, IHI, WORK( ILEFT ),
+     $                   WORK( IRIGHT ), N, VL, LDVL, IINFO )
+            IF( IINFO.NE.0 ) THEN
+               INFO = N + 8
+               GO TO 120
+            END IF
+            DO 50 JC = 1, N
+               IF( ALPHAI( JC ).LT.ZERO )
+     $            GO TO 50
+               TEMP = ZERO
+               IF( ALPHAI( JC ).EQ.ZERO ) THEN
+                  DO 10 JR = 1, N
+                     TEMP = MAX( TEMP, ABS( VL( JR, JC ) ) )
+   10             CONTINUE
+               ELSE
+                  DO 20 JR = 1, N
+                     TEMP = MAX( TEMP, ABS( VL( JR, JC ) )+
+     $                      ABS( VL( JR, JC+1 ) ) )
+   20             CONTINUE
+               END IF
+               IF( TEMP.LT.SAFMIN )
+     $            GO TO 50
+               TEMP = ONE / TEMP
+               IF( ALPHAI( JC ).EQ.ZERO ) THEN
+                  DO 30 JR = 1, N
+                     VL( JR, JC ) = VL( JR, JC )*TEMP
+   30             CONTINUE
+               ELSE
+                  DO 40 JR = 1, N
+                     VL( JR, JC ) = VL( JR, JC )*TEMP
+                     VL( JR, JC+1 ) = VL( JR, JC+1 )*TEMP
+   40             CONTINUE
+               END IF
+   50       CONTINUE
+         END IF
+         IF( ILVR ) THEN
+            CALL DGGBAK( 'P', 'R', N, ILO, IHI, WORK( ILEFT ),
+     $                   WORK( IRIGHT ), N, VR, LDVR, IINFO )
+            IF( IINFO.NE.0 ) THEN
+               INFO = N + 9
+               GO TO 120
+            END IF
+            DO 100 JC = 1, N
+               IF( ALPHAI( JC ).LT.ZERO )
+     $            GO TO 100
+               TEMP = ZERO
+               IF( ALPHAI( JC ).EQ.ZERO ) THEN
+                  DO 60 JR = 1, N
+                     TEMP = MAX( TEMP, ABS( VR( JR, JC ) ) )
+   60             CONTINUE
+               ELSE
+                  DO 70 JR = 1, N
+                     TEMP = MAX( TEMP, ABS( VR( JR, JC ) )+
+     $                      ABS( VR( JR, JC+1 ) ) )
+   70             CONTINUE
+               END IF
+               IF( TEMP.LT.SAFMIN )
+     $            GO TO 100
+               TEMP = ONE / TEMP
+               IF( ALPHAI( JC ).EQ.ZERO ) THEN
+                  DO 80 JR = 1, N
+                     VR( JR, JC ) = VR( JR, JC )*TEMP
+   80             CONTINUE
+               ELSE
+                  DO 90 JR = 1, N
+                     VR( JR, JC ) = VR( JR, JC )*TEMP
+                     VR( JR, JC+1 ) = VR( JR, JC+1 )*TEMP
+   90             CONTINUE
+               END IF
+  100       CONTINUE
+         END IF
+*
+*        End of eigenvector calculation
+*
+      END IF
+*
+*     Undo scaling in alpha, beta
+*
+*     Note: this does not give the alpha and beta for the unscaled
+*     problem.
+*
+*     Un-scaling is limited to avoid underflow in alpha and beta
+*     if they are significant.
+*
+      DO 110 JC = 1, N
+         ABSAR = ABS( ALPHAR( JC ) )
+         ABSAI = ABS( ALPHAI( JC ) )
+         ABSB = ABS( BETA( JC ) )
+         SALFAR = ANRM*ALPHAR( JC )
+         SALFAI = ANRM*ALPHAI( JC )
+         SBETA = BNRM*BETA( JC )
+         ILIMIT = .FALSE.
+         SCALE = ONE
+*
+*        Check for significant underflow in ALPHAI
+*
+         IF( ABS( SALFAI ).LT.SAFMIN .AND. ABSAI.GE.
+     $       MAX( SAFMIN, EPS*ABSAR, EPS*ABSB ) ) THEN
+            ILIMIT = .TRUE.
+            SCALE = ( ONEPLS*SAFMIN / ANRM1 ) /
+     $              MAX( ONEPLS*SAFMIN, ANRM2*ABSAI )
+*
+         ELSE IF( SALFAI.EQ.ZERO ) THEN
+*
+*           If insignificant underflow in ALPHAI, then make the
+*           conjugate eigenvalue real.
+*
+            IF( ALPHAI( JC ).LT.ZERO .AND. JC.GT.1 ) THEN
+               ALPHAI( JC-1 ) = ZERO
+            ELSE IF( ALPHAI( JC ).GT.ZERO .AND. JC.LT.N ) THEN
+               ALPHAI( JC+1 ) = ZERO
+            END IF
+         END IF
+*
+*        Check for significant underflow in ALPHAR
+*
+         IF( ABS( SALFAR ).LT.SAFMIN .AND. ABSAR.GE.
+     $       MAX( SAFMIN, EPS*ABSAI, EPS*ABSB ) ) THEN
+            ILIMIT = .TRUE.
+            SCALE = MAX( SCALE, ( ONEPLS*SAFMIN / ANRM1 ) /
+     $              MAX( ONEPLS*SAFMIN, ANRM2*ABSAR ) )
+         END IF
+*
+*        Check for significant underflow in BETA
+*
+         IF( ABS( SBETA ).LT.SAFMIN .AND. ABSB.GE.
+     $       MAX( SAFMIN, EPS*ABSAR, EPS*ABSAI ) ) THEN
+            ILIMIT = .TRUE.
+            SCALE = MAX( SCALE, ( ONEPLS*SAFMIN / BNRM1 ) /
+     $              MAX( ONEPLS*SAFMIN, BNRM2*ABSB ) )
+         END IF
+*
+*        Check for possible overflow when limiting scaling
+*
+         IF( ILIMIT ) THEN
+            TEMP = ( SCALE*SAFMIN )*MAX( ABS( SALFAR ), ABS( SALFAI ),
+     $             ABS( SBETA ) )
+            IF( TEMP.GT.ONE )
+     $         SCALE = SCALE / TEMP
+            IF( SCALE.LT.ONE )
+     $         ILIMIT = .FALSE.
+         END IF
+*
+*        Recompute un-scaled ALPHAR, ALPHAI, BETA if necessary.
+*
+         IF( ILIMIT ) THEN
+            SALFAR = ( SCALE*ALPHAR( JC ) )*ANRM
+            SALFAI = ( SCALE*ALPHAI( JC ) )*ANRM
+            SBETA = ( SCALE*BETA( JC ) )*BNRM
+         END IF
+         ALPHAR( JC ) = SALFAR
+         ALPHAI( JC ) = SALFAI
+         BETA( JC ) = SBETA
+  110 CONTINUE
+*
+  120 CONTINUE
+      WORK( 1 ) = LWKOPT
+*
+      RETURN
+*
+*     End of DGEGV
+*
+      END
diff --git a/costa/native/external/lapack/dgehd2.f b/costa/native/external/lapack/dgehd2.f
new file mode 100644
index 000000000..b2345717c
--- /dev/null
+++ b/costa/native/external/lapack/dgehd2.f
@@ -0,0 +1,150 @@
+      SUBROUTINE DGEHD2( N, ILO, IHI, A, LDA, TAU, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      INTEGER            IHI, ILO, INFO, LDA, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DGEHD2 reduces a real general matrix A to upper Hessenberg form H by
+*  an orthogonal similarity transformation:  Q' * A * Q = H .
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  ILO     (input) INTEGER
+*  IHI     (input) INTEGER
+*          It is assumed that A is already upper triangular in rows
+*          and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally
+*          set by a previous call to DGEBAL; otherwise they should be
+*          set to 1 and N respectively. See Further Details.
+*          1 <= ILO <= IHI <= max(1,N).
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+*          On entry, the n by n general matrix to be reduced.
+*          On exit, the upper triangle and the first subdiagonal of A
+*          are overwritten with the upper Hessenberg matrix H, and the
+*          elements below the first subdiagonal, with the array TAU,
+*          represent the orthogonal matrix Q as a product of elementary
+*          reflectors. See Further Details.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  TAU     (output) DOUBLE PRECISION array, dimension (N-1)
+*          The scalar factors of the elementary reflectors (see Further
+*          Details).
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*
+*  Further Details
+*  ===============
+*
+*  The matrix Q is represented as a product of (ihi-ilo) elementary
+*  reflectors
+*
+*     Q = H(ilo) H(ilo+1) . . . H(ihi-1).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - tau * v * v'
+*
+*  where tau is a real scalar, and v is a real vector with
+*  v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on
+*  exit in A(i+2:ihi,i), and tau in TAU(i).
+*
+*  The contents of A are illustrated by the following example, with
+*  n = 7, ilo = 2 and ihi = 6:
+*
+*  on entry,                        on exit,
+*
+*  ( a   a   a   a   a   a   a )    (  a   a   h   h   h   h   a )
+*  (     a   a   a   a   a   a )    (      a   h   h   h   h   a )
+*  (     a   a   a   a   a   a )    (      h   h   h   h   h   h )
+*  (     a   a   a   a   a   a )    (      v2  h   h   h   h   h )
+*  (     a   a   a   a   a   a )    (      v2  v3  h   h   h   h )
+*  (     a   a   a   a   a   a )    (      v2  v3  v4  h   h   h )
+*  (                         a )    (                          a )
+*
+*  where a denotes an element of the original matrix A, h denotes a
+*  modified element of the upper Hessenberg matrix H, and vi denotes an
+*  element of the vector defining H(i).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE
+      PARAMETER          ( ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I
+      DOUBLE PRECISION   AII
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLARF, DLARFG, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters
+*
+      INFO = 0
+      IF( N.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, N ) ) THEN
+         INFO = -2
+      ELSE IF( IHI.LT.MIN( ILO, N ) .OR. IHI.GT.N ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DGEHD2', -INFO )
+         RETURN
+      END IF
+*
+      DO 10 I = ILO, IHI - 1
+*
+*        Compute elementary reflector H(i) to annihilate A(i+2:ihi,i)
+*
+         CALL DLARFG( IHI-I, A( I+1, I ), A( MIN( I+2, N ), I ), 1,
+     $                TAU( I ) )
+         AII = A( I+1, I )
+         A( I+1, I ) = ONE
+*
+*        Apply H(i) to A(1:ihi,i+1:ihi) from the right
+*
+         CALL DLARF( 'Right', IHI, IHI-I, A( I+1, I ), 1, TAU( I ),
+     $               A( 1, I+1 ), LDA, WORK )
+*
+*        Apply H(i) to A(i+1:ihi,i+1:n) from the left
+*
+         CALL DLARF( 'Left', IHI-I, N-I, A( I+1, I ), 1, TAU( I ),
+     $               A( I+1, I+1 ), LDA, WORK )
+*
+         A( I+1, I ) = AII
+   10 CONTINUE
+*
+      RETURN
+*
+*     End of DGEHD2
+*
+      END
diff --git a/costa/native/external/lapack/dgehrd.f b/costa/native/external/lapack/dgehrd.f
new file mode 100644
index 000000000..e9ffcfad2
--- /dev/null
+++ b/costa/native/external/lapack/dgehrd.f
@@ -0,0 +1,255 @@
+      SUBROUTINE DGEHRD( N, ILO, IHI, A, LDA, TAU, WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            IHI, ILO, INFO, LDA, LWORK, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DGEHRD reduces a real general matrix A to upper Hessenberg form H by
+*  an orthogonal similarity transformation:  Q' * A * Q = H .
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  ILO     (input) INTEGER
+*  IHI     (input) INTEGER
+*          It is assumed that A is already upper triangular in rows
+*          and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally
+*          set by a previous call to DGEBAL; otherwise they should be
+*          set to 1 and N respectively. See Further Details.
+*          1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+*          On entry, the N-by-N general matrix to be reduced.
+*          On exit, the upper triangle and the first subdiagonal of A
+*          are overwritten with the upper Hessenberg matrix H, and the
+*          elements below the first subdiagonal, with the array TAU,
+*          represent the orthogonal matrix Q as a product of elementary
+*          reflectors. See Further Details.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  TAU     (output) DOUBLE PRECISION array, dimension (N-1)
+*          The scalar factors of the elementary reflectors (see Further
+*          Details). Elements 1:ILO-1 and IHI:N-1 of TAU are set to
+*          zero.
+*
+*  WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The length of the array WORK.  LWORK >= max(1,N).
+*          For optimum performance LWORK >= N*NB, where NB is the
+*          optimal blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*
+*  Further Details
+*  ===============
+*
+*  The matrix Q is represented as a product of (ihi-ilo) elementary
+*  reflectors
+*
+*     Q = H(ilo) H(ilo+1) . . . H(ihi-1).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - tau * v * v'
+*
+*  where tau is a real scalar, and v is a real vector with
+*  v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on
+*  exit in A(i+2:ihi,i), and tau in TAU(i).
+*
+*  The contents of A are illustrated by the following example, with
+*  n = 7, ilo = 2 and ihi = 6:
+*
+*  on entry,                        on exit,
+*
+*  ( a   a   a   a   a   a   a )    (  a   a   h   h   h   h   a )
+*  (     a   a   a   a   a   a )    (      a   h   h   h   h   a )
+*  (     a   a   a   a   a   a )    (      h   h   h   h   h   h )
+*  (     a   a   a   a   a   a )    (      v2  h   h   h   h   h )
+*  (     a   a   a   a   a   a )    (      v2  v3  h   h   h   h )
+*  (     a   a   a   a   a   a )    (      v2  v3  v4  h   h   h )
+*  (                         a )    (                          a )
+*
+*  where a denotes an element of the original matrix A, h denotes a
+*  modified element of the upper Hessenberg matrix H, and vi denotes an
+*  element of the vector defining H(i).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            NBMAX, LDT
+      PARAMETER          ( NBMAX = 64, LDT = NBMAX+1 )
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            I, IB, IINFO, IWS, LDWORK, LWKOPT, NB, NBMIN,
+     $                   NH, NX
+      DOUBLE PRECISION   EI
+*     ..
+*     .. Local Arrays ..
+      DOUBLE PRECISION   T( LDT, NBMAX )
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DGEHD2, DGEMM, DLAHRD, DLARFB, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters
+*
+      INFO = 0
+      NB = MIN( NBMAX, ILAENV( 1, 'DGEHRD', ' ', N, ILO, IHI, -1 ) )
+      LWKOPT = N*NB
+      WORK( 1 ) = LWKOPT
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( N.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, N ) ) THEN
+         INFO = -2
+      ELSE IF( IHI.LT.MIN( ILO, N ) .OR. IHI.GT.N ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN
+         INFO = -8
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DGEHRD', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Set elements 1:ILO-1 and IHI:N-1 of TAU to zero
+*
+      DO 10 I = 1, ILO - 1
+         TAU( I ) = ZERO
+   10 CONTINUE
+      DO 20 I = MAX( 1, IHI ), N - 1
+         TAU( I ) = ZERO
+   20 CONTINUE
+*
+*     Quick return if possible
+*
+      NH = IHI - ILO + 1
+      IF( NH.LE.1 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+*     Determine the block size.
+*
+      NB = MIN( NBMAX, ILAENV( 1, 'DGEHRD', ' ', N, ILO, IHI, -1 ) )
+      NBMIN = 2
+      IWS = 1
+      IF( NB.GT.1 .AND. NB.LT.NH ) THEN
+*
+*        Determine when to cross over from blocked to unblocked code
+*        (last block is always handled by unblocked code).
+*
+         NX = MAX( NB, ILAENV( 3, 'DGEHRD', ' ', N, ILO, IHI, -1 ) )
+         IF( NX.LT.NH ) THEN
+*
+*           Determine if workspace is large enough for blocked code.
+*
+            IWS = N*NB
+            IF( LWORK.LT.IWS ) THEN
+*
+*              Not enough workspace to use optimal NB:  determine the
+*              minimum value of NB, and reduce NB or force use of
+*              unblocked code.
+*
+               NBMIN = MAX( 2, ILAENV( 2, 'DGEHRD', ' ', N, ILO, IHI,
+     $                 -1 ) )
+               IF( LWORK.GE.N*NBMIN ) THEN
+                  NB = LWORK / N
+               ELSE
+                  NB = 1
+               END IF
+            END IF
+         END IF
+      END IF
+      LDWORK = N
+*
+      IF( NB.LT.NBMIN .OR. NB.GE.NH ) THEN
+*
+*        Use unblocked code below
+*
+         I = ILO
+*
+      ELSE
+*
+*        Use blocked code
+*
+         DO 30 I = ILO, IHI - 1 - NX, NB
+            IB = MIN( NB, IHI-I )
+*
+*           Reduce columns i:i+ib-1 to Hessenberg form, returning the
+*           matrices V and T of the block reflector H = I - V*T*V'
+*           which performs the reduction, and also the matrix Y = A*V*T
+*
+            CALL DLAHRD( IHI, I, IB, A( 1, I ), LDA, TAU( I ), T, LDT,
+     $                   WORK, LDWORK )
+*
+*           Apply the block reflector H to A(1:ihi,i+ib:ihi) from the
+*           right, computing  A := A - Y * V'. V(i+ib,ib-1) must be set
+*           to 1.
+*
+            EI = A( I+IB, I+IB-1 )
+            A( I+IB, I+IB-1 ) = ONE
+            CALL DGEMM( 'No transpose', 'Transpose', IHI, IHI-I-IB+1,
+     $                  IB, -ONE, WORK, LDWORK, A( I+IB, I ), LDA, ONE,
+     $                  A( 1, I+IB ), LDA )
+            A( I+IB, I+IB-1 ) = EI
+*
+*           Apply the block reflector H to A(i+1:ihi,i+ib:n) from the
+*           left
+*
+            CALL DLARFB( 'Left', 'Transpose', 'Forward', 'Columnwise',
+     $                   IHI-I, N-I-IB+1, IB, A( I+1, I ), LDA, T, LDT,
+     $                   A( I+1, I+IB ), LDA, WORK, LDWORK )
+   30    CONTINUE
+      END IF
+*
+*     Use unblocked code to reduce the rest of the matrix
+*
+      CALL DGEHD2( N, I, IHI, A, LDA, TAU, WORK, IINFO )
+      WORK( 1 ) = IWS
+*
+      RETURN
+*
+*     End of DGEHRD
+*
+      END
diff --git a/costa/native/external/lapack/dgelq2.f b/costa/native/external/lapack/dgelq2.f
new file mode 100644
index 000000000..a12048779
--- /dev/null
+++ b/costa/native/external/lapack/dgelq2.f
@@ -0,0 +1,122 @@
+      SUBROUTINE DGELQ2( M, N, A, LDA, TAU, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, M, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DGELQ2 computes an LQ factorization of a real m by n matrix A:
+*  A = L * Q.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+*          On entry, the m by n matrix A.
+*          On exit, the elements on and below the diagonal of the array
+*          contain the m by min(m,n) lower trapezoidal matrix L (L is
+*          lower triangular if m <= n); the elements above the diagonal,
+*          with the array TAU, represent the orthogonal matrix Q as a
+*          product of elementary reflectors (see Further Details).
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  TAU     (output) DOUBLE PRECISION array, dimension (min(M,N))
+*          The scalar factors of the elementary reflectors (see Further
+*          Details).
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (M)
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*
+*  Further Details
+*  ===============
+*
+*  The matrix Q is represented as a product of elementary reflectors
+*
+*     Q = H(k) . . . H(2) H(1), where k = min(m,n).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - tau * v * v'
+*
+*  where tau is a real scalar, and v is a real vector with
+*  v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i,i+1:n),
+*  and tau in TAU(i).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE
+      PARAMETER          ( ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, K
+      DOUBLE PRECISION   AII
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLARF, DLARFG, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DGELQ2', -INFO )
+         RETURN
+      END IF
+*
+      K = MIN( M, N )
+*
+      DO 10 I = 1, K
+*
+*        Generate elementary reflector H(i) to annihilate A(i,i+1:n)
+*
+         CALL DLARFG( N-I+1, A( I, I ), A( I, MIN( I+1, N ) ), LDA,
+     $                TAU( I ) )
+         IF( I.LT.M ) THEN
+*
+*           Apply H(i) to A(i+1:m,i:n) from the right
+*
+            AII = A( I, I )
+            A( I, I ) = ONE
+            CALL DLARF( 'Right', M-I, N-I+1, A( I, I ), LDA, TAU( I ),
+     $                  A( I+1, I ), LDA, WORK )
+            A( I, I ) = AII
+         END IF
+   10 CONTINUE
+      RETURN
+*
+*     End of DGELQ2
+*
+      END
diff --git a/costa/native/external/lapack/dgelqf.f b/costa/native/external/lapack/dgelqf.f
new file mode 100644
index 000000000..13d2d2f9f
--- /dev/null
+++ b/costa/native/external/lapack/dgelqf.f
@@ -0,0 +1,196 @@
+      SUBROUTINE DGELQF( M, N, A, LDA, TAU, WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, LWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DGELQF computes an LQ factorization of a real M-by-N matrix A:
+*  A = L * Q.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+*          On entry, the M-by-N matrix A.
+*          On exit, the elements on and below the diagonal of the array
+*          contain the m-by-min(m,n) lower trapezoidal matrix L (L is
+*          lower triangular if m <= n); the elements above the diagonal,
+*          with the array TAU, represent the orthogonal matrix Q as a
+*          product of elementary reflectors (see Further Details).
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  TAU     (output) DOUBLE PRECISION array, dimension (min(M,N))
+*          The scalar factors of the elementary reflectors (see Further
+*          Details).
+*
+*  WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.  LWORK >= max(1,M).
+*          For optimum performance LWORK >= M*NB, where NB is the
+*          optimal blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Further Details
+*  ===============
+*
+*  The matrix Q is represented as a product of elementary reflectors
+*
+*     Q = H(k) . . . H(2) H(1), where k = min(m,n).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - tau * v * v'
+*
+*  where tau is a real scalar, and v is a real vector with
+*  v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i,i+1:n),
+*  and tau in TAU(i).
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            I, IB, IINFO, IWS, K, LDWORK, LWKOPT, NB,
+     $                   NBMIN, NX
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DGELQ2, DLARFB, DLARFT, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      NB = ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 )
+      LWKOPT = M*NB
+      WORK( 1 ) = LWKOPT
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -4
+      ELSE IF( LWORK.LT.MAX( 1, M ) .AND. .NOT.LQUERY ) THEN
+         INFO = -7
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DGELQF', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      K = MIN( M, N )
+      IF( K.EQ.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      NBMIN = 2
+      NX = 0
+      IWS = M
+      IF( NB.GT.1 .AND. NB.LT.K ) THEN
+*
+*        Determine when to cross over from blocked to unblocked code.
+*
+         NX = MAX( 0, ILAENV( 3, 'DGELQF', ' ', M, N, -1, -1 ) )
+         IF( NX.LT.K ) THEN
+*
+*           Determine if workspace is large enough for blocked code.
+*
+            LDWORK = M
+            IWS = LDWORK*NB
+            IF( LWORK.LT.IWS ) THEN
+*
+*              Not enough workspace to use optimal NB:  reduce NB and
+*              determine the minimum value of NB.
+*
+               NB = LWORK / LDWORK
+               NBMIN = MAX( 2, ILAENV( 2, 'DGELQF', ' ', M, N, -1,
+     $                 -1 ) )
+            END IF
+         END IF
+      END IF
+*
+      IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN
+*
+*        Use blocked code initially
+*
+         DO 10 I = 1, K - NX, NB
+            IB = MIN( K-I+1, NB )
+*
+*           Compute the LQ factorization of the current block
+*           A(i:i+ib-1,i:n)
+*
+            CALL DGELQ2( IB, N-I+1, A( I, I ), LDA, TAU( I ), WORK,
+     $                   IINFO )
+            IF( I+IB.LE.M ) THEN
+*
+*              Form the triangular factor of the block reflector
+*              H = H(i) H(i+1) . . . H(i+ib-1)
+*
+               CALL DLARFT( 'Forward', 'Rowwise', N-I+1, IB, A( I, I ),
+     $                      LDA, TAU( I ), WORK, LDWORK )
+*
+*              Apply H to A(i+ib:m,i:n) from the right
+*
+               CALL DLARFB( 'Right', 'No transpose', 'Forward',
+     $                      'Rowwise', M-I-IB+1, N-I+1, IB, A( I, I ),
+     $                      LDA, WORK, LDWORK, A( I+IB, I ), LDA,
+     $                      WORK( IB+1 ), LDWORK )
+            END IF
+   10    CONTINUE
+      ELSE
+         I = 1
+      END IF
+*
+*     Use unblocked code to factor the last or only block.
+*
+      IF( I.LE.K )
+     $   CALL DGELQ2( M-I+1, N-I+1, A( I, I ), LDA, TAU( I ), WORK,
+     $                IINFO )
+*
+      WORK( 1 ) = IWS
+      RETURN
+*
+*     End of DGELQF
+*
+      END
diff --git a/costa/native/external/lapack/dgels.f b/costa/native/external/lapack/dgels.f
new file mode 100644
index 000000000..a7d6519fa
--- /dev/null
+++ b/costa/native/external/lapack/dgels.f
@@ -0,0 +1,403 @@
+      SUBROUTINE DGELS( TRANS, M, N, NRHS, A, LDA, B, LDB, WORK, LWORK,
+     $                  INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          TRANS
+      INTEGER            INFO, LDA, LDB, LWORK, M, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), B( LDB, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DGELS solves overdetermined or underdetermined real linear systems
+*  involving an M-by-N matrix A, or its transpose, using a QR or LQ
+*  factorization of A.  It is assumed that A has full rank.
+*
+*  The following options are provided:
+*
+*  1. If TRANS = 'N' and m >= n:  find the least squares solution of
+*     an overdetermined system, i.e., solve the least squares problem
+*                  minimize || B - A*X ||.
+*
+*  2. If TRANS = 'N' and m < n:  find the minimum norm solution of
+*     an underdetermined system A * X = B.
+*
+*  3. If TRANS = 'T' and m >= n:  find the minimum norm solution of
+*     an undetermined system A**T * X = B.
+*
+*  4. If TRANS = 'T' and m < n:  find the least squares solution of
+*     an overdetermined system, i.e., solve the least squares problem
+*                  minimize || B - A**T * X ||.
+*
+*  Several right hand side vectors b and solution vectors x can be
+*  handled in a single call; they are stored as the columns of the
+*  M-by-NRHS right hand side matrix B and the N-by-NRHS solution
+*  matrix X.
+*
+*  Arguments
+*  =========
+*
+*  TRANS   (input) CHARACTER
+*          = 'N': the linear system involves A;
+*          = 'T': the linear system involves A**T.
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of
+*          columns of the matrices B and X. NRHS >=0.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+*          On entry, the M-by-N matrix A.
+*          On exit,
+*            if M >= N, A is overwritten by details of its QR
+*                       factorization as returned by DGEQRF;
+*            if M <  N, A is overwritten by details of its LQ
+*                       factorization as returned by DGELQF.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  B       (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS)
+*          On entry, the matrix B of right hand side vectors, stored
+*          columnwise; B is M-by-NRHS if TRANS = 'N', or N-by-NRHS
+*          if TRANS = 'T'.
+*          On exit, B is overwritten by the solution vectors, stored
+*          columnwise:
+*          if TRANS = 'N' and m >= n, rows 1 to n of B contain the least
+*          squares solution vectors; the residual sum of squares for the
+*          solution in each column is given by the sum of squares of
+*          elements N+1 to M in that column;
+*          if TRANS = 'N' and m < n, rows 1 to N of B contain the
+*          minimum norm solution vectors;
+*          if TRANS = 'T' and m >= n, rows 1 to M of B contain the
+*          minimum norm solution vectors;
+*          if TRANS = 'T' and m < n, rows 1 to M of B contain the
+*          least squares solution vectors; the residual sum of squares
+*          for the solution in each column is given by the sum of
+*          squares of elements M+1 to N in that column.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B. LDB >= MAX(1,M,N).
+*
+*  WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.
+*          LWORK >= max( 1, MN + max( MN, NRHS ) ).
+*          For optimal performance,
+*          LWORK >= max( 1, MN + max( MN, NRHS )*NB ).
+*          where MN = min(M,N) and NB is the optimum block size.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY, TPSD
+      INTEGER            BROW, I, IASCL, IBSCL, J, MN, NB, SCLLEN, WSIZE
+      DOUBLE PRECISION   ANRM, BIGNUM, BNRM, SMLNUM
+*     ..
+*     .. Local Arrays ..
+      DOUBLE PRECISION   RWORK( 1 )
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      DOUBLE PRECISION   DLAMCH, DLANGE
+      EXTERNAL           LSAME, ILAENV, DLAMCH, DLANGE
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DGELQF, DGEQRF, DLASCL, DLASET, DORMLQ, DORMQR,
+     $                   DTRSM, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          DBLE, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments.
+*
+      INFO = 0
+      MN = MIN( M, N )
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( .NOT.( LSAME( TRANS, 'N' ) .OR. LSAME( TRANS, 'T' ) ) ) THEN
+         INFO = -1
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -6
+      ELSE IF( LDB.LT.MAX( 1, M, N ) ) THEN
+         INFO = -8
+      ELSE IF( LWORK.LT.MAX( 1, MN+MAX( MN, NRHS ) ) .AND. .NOT.LQUERY )
+     $          THEN
+         INFO = -10
+      END IF
+*
+*     Figure out optimal block size
+*
+      IF( INFO.EQ.0 .OR. INFO.EQ.-10 ) THEN
+*
+         TPSD = .TRUE.
+         IF( LSAME( TRANS, 'N' ) )
+     $      TPSD = .FALSE.
+*
+         IF( M.GE.N ) THEN
+            NB = ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 )
+            IF( TPSD ) THEN
+               NB = MAX( NB, ILAENV( 1, 'DORMQR', 'LN', M, NRHS, N,
+     $              -1 ) )
+            ELSE
+               NB = MAX( NB, ILAENV( 1, 'DORMQR', 'LT', M, NRHS, N,
+     $              -1 ) )
+            END IF
+         ELSE
+            NB = ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 )
+            IF( TPSD ) THEN
+               NB = MAX( NB, ILAENV( 1, 'DORMLQ', 'LT', N, NRHS, M,
+     $              -1 ) )
+            ELSE
+               NB = MAX( NB, ILAENV( 1, 'DORMLQ', 'LN', N, NRHS, M,
+     $              -1 ) )
+            END IF
+         END IF
+*
+         WSIZE = MAX( 1, MN+MAX( MN, NRHS )*NB )
+         WORK( 1 ) = DBLE( WSIZE )
+*
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DGELS ', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( MIN( M, N, NRHS ).EQ.0 ) THEN
+         CALL DLASET( 'Full', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB )
+         RETURN
+      END IF
+*
+*     Get machine parameters
+*
+      SMLNUM = DLAMCH( 'S' ) / DLAMCH( 'P' )
+      BIGNUM = ONE / SMLNUM
+      CALL DLABAD( SMLNUM, BIGNUM )
+*
+*     Scale A, B if max element outside range [SMLNUM,BIGNUM]
+*
+      ANRM = DLANGE( 'M', M, N, A, LDA, RWORK )
+      IASCL = 0
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
+*
+*        Scale matrix norm up to SMLNUM
+*
+         CALL DLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO )
+         IASCL = 1
+      ELSE IF( ANRM.GT.BIGNUM ) THEN
+*
+*        Scale matrix norm down to BIGNUM
+*
+         CALL DLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO )
+         IASCL = 2
+      ELSE IF( ANRM.EQ.ZERO ) THEN
+*
+*        Matrix all zero. Return zero solution.
+*
+         CALL DLASET( 'F', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB )
+         GO TO 50
+      END IF
+*
+      BROW = M
+      IF( TPSD )
+     $   BROW = N
+      BNRM = DLANGE( 'M', BROW, NRHS, B, LDB, RWORK )
+      IBSCL = 0
+      IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN
+*
+*        Scale matrix norm up to SMLNUM
+*
+         CALL DLASCL( 'G', 0, 0, BNRM, SMLNUM, BROW, NRHS, B, LDB,
+     $                INFO )
+         IBSCL = 1
+      ELSE IF( BNRM.GT.BIGNUM ) THEN
+*
+*        Scale matrix norm down to BIGNUM
+*
+         CALL DLASCL( 'G', 0, 0, BNRM, BIGNUM, BROW, NRHS, B, LDB,
+     $                INFO )
+         IBSCL = 2
+      END IF
+*
+      IF( M.GE.N ) THEN
+*
+*        compute QR factorization of A
+*
+         CALL DGEQRF( M, N, A, LDA, WORK( 1 ), WORK( MN+1 ), LWORK-MN,
+     $                INFO )
+*
+*        workspace at least N, optimally N*NB
+*
+         IF( .NOT.TPSD ) THEN
+*
+*           Least-Squares Problem min || A * X - B ||
+*
+*           B(1:M,1:NRHS) := Q' * B(1:M,1:NRHS)
+*
+            CALL DORMQR( 'Left', 'Transpose', M, NRHS, N, A, LDA,
+     $                   WORK( 1 ), B, LDB, WORK( MN+1 ), LWORK-MN,
+     $                   INFO )
+*
+*           workspace at least NRHS, optimally NRHS*NB
+*
+*           B(1:N,1:NRHS) := inv(R) * B(1:N,1:NRHS)
+*
+            CALL DTRSM( 'Left', 'Upper', 'No transpose', 'Non-unit', N,
+     $                  NRHS, ONE, A, LDA, B, LDB )
+*
+            SCLLEN = N
+*
+         ELSE
+*
+*           Overdetermined system of equations A' * X = B
+*
+*           B(1:N,1:NRHS) := inv(R') * B(1:N,1:NRHS)
+*
+            CALL DTRSM( 'Left', 'Upper', 'Transpose', 'Non-unit', N,
+     $                  NRHS, ONE, A, LDA, B, LDB )
+*
+*           B(N+1:M,1:NRHS) = ZERO
+*
+            DO 20 J = 1, NRHS
+               DO 10 I = N + 1, M
+                  B( I, J ) = ZERO
+   10          CONTINUE
+   20       CONTINUE
+*
+*           B(1:M,1:NRHS) := Q(1:N,:) * B(1:N,1:NRHS)
+*
+            CALL DORMQR( 'Left', 'No transpose', M, NRHS, N, A, LDA,
+     $                   WORK( 1 ), B, LDB, WORK( MN+1 ), LWORK-MN,
+     $                   INFO )
+*
+*           workspace at least NRHS, optimally NRHS*NB
+*
+            SCLLEN = M
+*
+         END IF
+*
+      ELSE
+*
+*        Compute LQ factorization of A
+*
+         CALL DGELQF( M, N, A, LDA, WORK( 1 ), WORK( MN+1 ), LWORK-MN,
+     $                INFO )
+*
+*        workspace at least M, optimally M*NB.
+*
+         IF( .NOT.TPSD ) THEN
+*
+*           underdetermined system of equations A * X = B
+*
+*           B(1:M,1:NRHS) := inv(L) * B(1:M,1:NRHS)
+*
+            CALL DTRSM( 'Left', 'Lower', 'No transpose', 'Non-unit', M,
+     $                  NRHS, ONE, A, LDA, B, LDB )
+*
+*           B(M+1:N,1:NRHS) = 0
+*
+            DO 40 J = 1, NRHS
+               DO 30 I = M + 1, N
+                  B( I, J ) = ZERO
+   30          CONTINUE
+   40       CONTINUE
+*
+*           B(1:N,1:NRHS) := Q(1:N,:)' * B(1:M,1:NRHS)
+*
+            CALL DORMLQ( 'Left', 'Transpose', N, NRHS, M, A, LDA,
+     $                   WORK( 1 ), B, LDB, WORK( MN+1 ), LWORK-MN,
+     $                   INFO )
+*
+*           workspace at least NRHS, optimally NRHS*NB
+*
+            SCLLEN = N
+*
+         ELSE
+*
+*           overdetermined system min || A' * X - B ||
+*
+*           B(1:N,1:NRHS) := Q * B(1:N,1:NRHS)
+*
+            CALL DORMLQ( 'Left', 'No transpose', N, NRHS, M, A, LDA,
+     $                   WORK( 1 ), B, LDB, WORK( MN+1 ), LWORK-MN,
+     $                   INFO )
+*
+*           workspace at least NRHS, optimally NRHS*NB
+*
+*           B(1:M,1:NRHS) := inv(L') * B(1:M,1:NRHS)
+*
+            CALL DTRSM( 'Left', 'Lower', 'Transpose', 'Non-unit', M,
+     $                  NRHS, ONE, A, LDA, B, LDB )
+*
+            SCLLEN = M
+*
+         END IF
+*
+      END IF
+*
+*     Undo scaling
+*
+      IF( IASCL.EQ.1 ) THEN
+         CALL DLASCL( 'G', 0, 0, ANRM, SMLNUM, SCLLEN, NRHS, B, LDB,
+     $                INFO )
+      ELSE IF( IASCL.EQ.2 ) THEN
+         CALL DLASCL( 'G', 0, 0, ANRM, BIGNUM, SCLLEN, NRHS, B, LDB,
+     $                INFO )
+      END IF
+      IF( IBSCL.EQ.1 ) THEN
+         CALL DLASCL( 'G', 0, 0, SMLNUM, BNRM, SCLLEN, NRHS, B, LDB,
+     $                INFO )
+      ELSE IF( IBSCL.EQ.2 ) THEN
+         CALL DLASCL( 'G', 0, 0, BIGNUM, BNRM, SCLLEN, NRHS, B, LDB,
+     $                INFO )
+      END IF
+*
+   50 CONTINUE
+      WORK( 1 ) = DBLE( WSIZE )
+*
+      RETURN
+*
+*     End of DGELS
+*
+      END
diff --git a/costa/native/external/lapack/dgelsd.f b/costa/native/external/lapack/dgelsd.f
new file mode 100644
index 000000000..0d4de0fdd
--- /dev/null
+++ b/costa/native/external/lapack/dgelsd.f
@@ -0,0 +1,529 @@
+      SUBROUTINE DGELSD( M, N, NRHS, A, LDA, B, LDB, S, RCOND, RANK,
+     $                   WORK, LWORK, IWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, LDB, LWORK, M, N, NRHS, RANK
+      DOUBLE PRECISION   RCOND
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      DOUBLE PRECISION   A( LDA, * ), B( LDB, * ), S( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DGELSD computes the minimum-norm solution to a real linear least
+*  squares problem:
+*      minimize 2-norm(| b - A*x |)
+*  using the singular value decomposition (SVD) of A. A is an M-by-N
+*  matrix which may be rank-deficient.
+*
+*  Several right hand side vectors b and solution vectors x can be
+*  handled in a single call; they are stored as the columns of the
+*  M-by-NRHS right hand side matrix B and the N-by-NRHS solution
+*  matrix X.
+*
+*  The problem is solved in three steps:
+*  (1) Reduce the coefficient matrix A to bidiagonal form with
+*      Householder transformations, reducing the original problem
+*      into a "bidiagonal least squares problem" (BLS)
+*  (2) Solve the BLS using a divide and conquer approach.
+*  (3) Apply back all the Householder tranformations to solve
+*      the original least squares problem.
+*
+*  The effective rank of A is determined by treating as zero those
+*  singular values which are less than RCOND times the largest singular
+*  value.
+*
+*  The divide and conquer algorithm makes very mild assumptions about
+*  floating point arithmetic. It will work on machines with a guard
+*  digit in add/subtract, or on those binary machines without guard
+*  digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
+*  Cray-2. It could conceivably fail on hexadecimal or decimal machines
+*  without guard digits, but we know of none.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of A. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of A. N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrices B and X. NRHS >= 0.
+*
+*  A       (input) DOUBLE PRECISION array, dimension (LDA,N)
+*          On entry, the M-by-N matrix A.
+*          On exit, A has been destroyed.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  B       (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS)
+*          On entry, the M-by-NRHS right hand side matrix B.
+*          On exit, B is overwritten by the N-by-NRHS solution
+*          matrix X.  If m >= n and RANK = n, the residual
+*          sum-of-squares for the solution in the i-th column is given
+*          by the sum of squares of elements n+1:m in that column.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B. LDB >= max(1,max(M,N)).
+*
+*  S       (output) DOUBLE PRECISION array, dimension (min(M,N))
+*          The singular values of A in decreasing order.
+*          The condition number of A in the 2-norm = S(1)/S(min(m,n)).
+*
+*  RCOND   (input) DOUBLE PRECISION
+*          RCOND is used to determine the effective rank of A.
+*          Singular values S(i) <= RCOND*S(1) are treated as zero.
+*          If RCOND < 0, machine precision is used instead.
+*
+*  RANK    (output) INTEGER
+*          The effective rank of A, i.e., the number of singular values
+*          which are greater than RCOND*S(1).
+*
+*  WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK. LWORK must be at least 1.
+*          The exact minimum amount of workspace needed depends on M,
+*          N and NRHS. As long as LWORK is at least
+*              12*N + 2*N*SMLSIZ + 8*N*NLVL + N*NRHS + (SMLSIZ+1)**2,
+*          if M is greater than or equal to N or
+*              12*M + 2*M*SMLSIZ + 8*M*NLVL + M*NRHS + (SMLSIZ+1)**2,
+*          if M is less than N, the code will execute correctly.
+*          SMLSIZ is returned by ILAENV and is equal to the maximum
+*          size of the subproblems at the bottom of the computation
+*          tree (usually about 25), and
+*             NLVL = MAX( 0, INT( LOG_2( MIN( M,N )/(SMLSIZ+1) ) ) + 1 )
+*          For good performance, LWORK should generally be larger.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  IWORK   (workspace) INTEGER array, dimension (LIWORK)
+*          LIWORK >= 3 * MINMN * NLVL + 11 * MINMN,
+*          where MINMN = MIN( M,N ).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          > 0:  the algorithm for computing the SVD failed to converge;
+*                if INFO = i, i off-diagonal elements of an intermediate
+*                bidiagonal form did not converge to zero.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Ming Gu and Ren-Cang Li, Computer Science Division, University of
+*       California at Berkeley, USA
+*     Osni Marques, LBNL/NERSC, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE, TWO
+      PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0, TWO = 2.0D0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            IASCL, IBSCL, IE, IL, ITAU, ITAUP, ITAUQ,
+     $                   LDWORK, MAXMN, MAXWRK, MINMN, MINWRK, MM,
+     $                   MNTHR, NLVL, NWORK, SMLSIZ, WLALSD
+      DOUBLE PRECISION   ANRM, BIGNUM, BNRM, EPS, SFMIN, SMLNUM
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DGEBRD, DGELQF, DGEQRF, DLABAD, DLACPY, DLALSD,
+     $                   DLASCL, DLASET, DORMBR, DORMLQ, DORMQR, XERBLA
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      DOUBLE PRECISION   DLAMCH, DLANGE
+      EXTERNAL           ILAENV, DLAMCH, DLANGE
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          DBLE, INT, LOG, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments.
+*
+      INFO = 0
+      MINMN = MIN( M, N )
+      MAXMN = MAX( M, N )
+      MNTHR = ILAENV( 6, 'DGELSD', ' ', M, N, NRHS, -1 )
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, MAXMN ) ) THEN
+         INFO = -7
+      END IF
+*
+      SMLSIZ = ILAENV( 9, 'DGELSD', ' ', 0, 0, 0, 0 )
+*
+*     Compute workspace.
+*     (Note: Comments in the code beginning "Workspace:" describe the
+*     minimal amount of workspace needed at that point in the code,
+*     as well as the preferred amount for good performance.
+*     NB refers to the optimal block size for the immediately
+*     following subroutine, as returned by ILAENV.)
+*
+      MINWRK = 1
+      MINMN = MAX( 1, MINMN )
+      NLVL = MAX( INT( LOG( DBLE( MINMN ) / DBLE( SMLSIZ+1 ) ) /
+     $       LOG( TWO ) ) + 1, 0 )
+*
+      IF( INFO.EQ.0 ) THEN
+         MAXWRK = 0
+         MM = M
+         IF( M.GE.N .AND. M.GE.MNTHR ) THEN
+*
+*           Path 1a - overdetermined, with many more rows than columns.
+*
+            MM = N
+            MAXWRK = MAX( MAXWRK, N+N*ILAENV( 1, 'DGEQRF', ' ', M, N,
+     $               -1, -1 ) )
+            MAXWRK = MAX( MAXWRK, N+NRHS*
+     $               ILAENV( 1, 'DORMQR', 'LT', M, NRHS, N, -1 ) )
+         END IF
+         IF( M.GE.N ) THEN
+*
+*           Path 1 - overdetermined or exactly determined.
+*
+            MAXWRK = MAX( MAXWRK, 3*N+( MM+N )*
+     $               ILAENV( 1, 'DGEBRD', ' ', MM, N, -1, -1 ) )
+            MAXWRK = MAX( MAXWRK, 3*N+NRHS*
+     $               ILAENV( 1, 'DORMBR', 'QLT', MM, NRHS, N, -1 ) )
+            MAXWRK = MAX( MAXWRK, 3*N+( N-1 )*
+     $               ILAENV( 1, 'DORMBR', 'PLN', N, NRHS, N, -1 ) )
+            WLALSD = 9*N+2*N*SMLSIZ+8*N*NLVL+N*NRHS+(SMLSIZ+1)**2
+            MAXWRK = MAX( MAXWRK, 3*N+WLALSD )
+            MINWRK = MAX( 3*N+MM, 3*N+NRHS, 3*N+WLALSD )
+         END IF
+         IF( N.GT.M ) THEN
+            WLALSD = 9*M+2*M*SMLSIZ+8*M*NLVL+M*NRHS+(SMLSIZ+1)**2
+            IF( N.GE.MNTHR ) THEN
+*
+*              Path 2a - underdetermined, with many more columns
+*              than rows.
+*
+               MAXWRK = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 )
+               MAXWRK = MAX( MAXWRK, M*M+4*M+2*M*
+     $                  ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) )
+               MAXWRK = MAX( MAXWRK, M*M+4*M+NRHS*
+     $                  ILAENV( 1, 'DORMBR', 'QLT', M, NRHS, M, -1 ) )
+               MAXWRK = MAX( MAXWRK, M*M+4*M+( M-1 )*
+     $                  ILAENV( 1, 'DORMBR', 'PLN', M, NRHS, M, -1 ) )
+               IF( NRHS.GT.1 ) THEN
+                  MAXWRK = MAX( MAXWRK, M*M+M+M*NRHS )
+               ELSE
+                  MAXWRK = MAX( MAXWRK, M*M+2*M )
+               END IF
+               MAXWRK = MAX( MAXWRK, M+NRHS*
+     $                  ILAENV( 1, 'DORMLQ', 'LT', N, NRHS, M, -1 ) )
+               MAXWRK = MAX( MAXWRK, M*M+4*M+WLALSD )
+            ELSE
+*
+*              Path 2 - remaining underdetermined cases.
+*
+               MAXWRK = 3*M + ( N+M )*ILAENV( 1, 'DGEBRD', ' ', M, N,
+     $                  -1, -1 )
+               MAXWRK = MAX( MAXWRK, 3*M+NRHS*
+     $                  ILAENV( 1, 'DORMBR', 'QLT', M, NRHS, N, -1 ) )
+               MAXWRK = MAX( MAXWRK, 3*M+M*
+     $                  ILAENV( 1, 'DORMBR', 'PLN', N, NRHS, M, -1 ) )
+               MAXWRK = MAX( MAXWRK, 3*M+WLALSD )
+            END IF
+            MINWRK = MAX( 3*M+NRHS, 3*M+M, 3*M+WLALSD )
+         END IF
+         MINWRK = MIN( MINWRK, MAXWRK )
+         WORK( 1 ) = MAXWRK
+         IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN
+            INFO = -12
+         END IF
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DGELSD', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         GO TO 10
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( M.EQ.0 .OR. N.EQ.0 ) THEN
+         RANK = 0
+         RETURN
+      END IF
+*
+*     Get machine parameters.
+*
+      EPS = DLAMCH( 'P' )
+      SFMIN = DLAMCH( 'S' )
+      SMLNUM = SFMIN / EPS
+      BIGNUM = ONE / SMLNUM
+      CALL DLABAD( SMLNUM, BIGNUM )
+*
+*     Scale A if max entry outside range [SMLNUM,BIGNUM].
+*
+      ANRM = DLANGE( 'M', M, N, A, LDA, WORK )
+      IASCL = 0
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
+*
+*        Scale matrix norm up to SMLNUM.
+*
+         CALL DLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO )
+         IASCL = 1
+      ELSE IF( ANRM.GT.BIGNUM ) THEN
+*
+*        Scale matrix norm down to BIGNUM.
+*
+         CALL DLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO )
+         IASCL = 2
+      ELSE IF( ANRM.EQ.ZERO ) THEN
+*
+*        Matrix all zero. Return zero solution.
+*
+         CALL DLASET( 'F', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB )
+         CALL DLASET( 'F', MINMN, 1, ZERO, ZERO, S, 1 )
+         RANK = 0
+         GO TO 10
+      END IF
+*
+*     Scale B if max entry outside range [SMLNUM,BIGNUM].
+*
+      BNRM = DLANGE( 'M', M, NRHS, B, LDB, WORK )
+      IBSCL = 0
+      IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN
+*
+*        Scale matrix norm up to SMLNUM.
+*
+         CALL DLASCL( 'G', 0, 0, BNRM, SMLNUM, M, NRHS, B, LDB, INFO )
+         IBSCL = 1
+      ELSE IF( BNRM.GT.BIGNUM ) THEN
+*
+*        Scale matrix norm down to BIGNUM.
+*
+         CALL DLASCL( 'G', 0, 0, BNRM, BIGNUM, M, NRHS, B, LDB, INFO )
+         IBSCL = 2
+      END IF
+*
+*     If M < N make sure certain entries of B are zero.
+*
+      IF( M.LT.N )
+     $   CALL DLASET( 'F', N-M, NRHS, ZERO, ZERO, B( M+1, 1 ), LDB )
+*
+*     Overdetermined case.
+*
+      IF( M.GE.N ) THEN
+*
+*        Path 1 - overdetermined or exactly determined.
+*
+         MM = M
+         IF( M.GE.MNTHR ) THEN
+*
+*           Path 1a - overdetermined, with many more rows than columns.
+*
+            MM = N
+            ITAU = 1
+            NWORK = ITAU + N
+*
+*           Compute A=Q*R.
+*           (Workspace: need 2*N, prefer N+N*NB)
+*
+            CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ),
+     $                   LWORK-NWORK+1, INFO )
+*
+*           Multiply B by transpose(Q).
+*           (Workspace: need N+NRHS, prefer N+NRHS*NB)
+*
+            CALL DORMQR( 'L', 'T', M, NRHS, N, A, LDA, WORK( ITAU ), B,
+     $                   LDB, WORK( NWORK ), LWORK-NWORK+1, INFO )
+*
+*           Zero out below R.
+*
+            IF( N.GT.1 ) THEN
+               CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ), LDA )
+            END IF
+         END IF
+*
+         IE = 1
+         ITAUQ = IE + N
+         ITAUP = ITAUQ + N
+         NWORK = ITAUP + N
+*
+*        Bidiagonalize R in A.
+*        (Workspace: need 3*N+MM, prefer 3*N+(MM+N)*NB)
+*
+         CALL DGEBRD( MM, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
+     $                WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1,
+     $                INFO )
+*
+*        Multiply B by transpose of left bidiagonalizing vectors of R.
+*        (Workspace: need 3*N+NRHS, prefer 3*N+NRHS*NB)
+*
+         CALL DORMBR( 'Q', 'L', 'T', MM, NRHS, N, A, LDA, WORK( ITAUQ ),
+     $                B, LDB, WORK( NWORK ), LWORK-NWORK+1, INFO )
+*
+*        Solve the bidiagonal least squares problem.
+*
+         CALL DLALSD( 'U', SMLSIZ, N, NRHS, S, WORK( IE ), B, LDB,
+     $                RCOND, RANK, WORK( NWORK ), IWORK, INFO )
+         IF( INFO.NE.0 ) THEN
+            GO TO 10
+         END IF
+*
+*        Multiply B by right bidiagonalizing vectors of R.
+*
+         CALL DORMBR( 'P', 'L', 'N', N, NRHS, N, A, LDA, WORK( ITAUP ),
+     $                B, LDB, WORK( NWORK ), LWORK-NWORK+1, INFO )
+*
+      ELSE IF( N.GE.MNTHR .AND. LWORK.GE.4*M+M*M+
+     $         MAX( M, 2*M-4, NRHS, N-3*M ) ) THEN
+*
+*        Path 2a - underdetermined, with many more columns than rows
+*        and sufficient workspace for an efficient algorithm.
+*
+         LDWORK = M
+         IF( LWORK.GE.MAX( 4*M+M*LDA+MAX( M, 2*M-4, NRHS, N-3*M ),
+     $       M*LDA+M+M*NRHS ) )LDWORK = LDA
+         ITAU = 1
+         NWORK = M + 1
+*
+*        Compute A=L*Q.
+*        (Workspace: need 2*M, prefer M+M*NB)
+*
+         CALL DGELQF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ),
+     $                LWORK-NWORK+1, INFO )
+         IL = NWORK
+*
+*        Copy L to WORK(IL), zeroing out above its diagonal.
+*
+         CALL DLACPY( 'L', M, M, A, LDA, WORK( IL ), LDWORK )
+         CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, WORK( IL+LDWORK ),
+     $                LDWORK )
+         IE = IL + LDWORK*M
+         ITAUQ = IE + M
+         ITAUP = ITAUQ + M
+         NWORK = ITAUP + M
+*
+*        Bidiagonalize L in WORK(IL).
+*        (Workspace: need M*M+5*M, prefer M*M+4*M+2*M*NB)
+*
+         CALL DGEBRD( M, M, WORK( IL ), LDWORK, S, WORK( IE ),
+     $                WORK( ITAUQ ), WORK( ITAUP ), WORK( NWORK ),
+     $                LWORK-NWORK+1, INFO )
+*
+*        Multiply B by transpose of left bidiagonalizing vectors of L.
+*        (Workspace: need M*M+4*M+NRHS, prefer M*M+4*M+NRHS*NB)
+*
+         CALL DORMBR( 'Q', 'L', 'T', M, NRHS, M, WORK( IL ), LDWORK,
+     $                WORK( ITAUQ ), B, LDB, WORK( NWORK ),
+     $                LWORK-NWORK+1, INFO )
+*
+*        Solve the bidiagonal least squares problem.
+*
+         CALL DLALSD( 'U', SMLSIZ, M, NRHS, S, WORK( IE ), B, LDB,
+     $                RCOND, RANK, WORK( NWORK ), IWORK, INFO )
+         IF( INFO.NE.0 ) THEN
+            GO TO 10
+         END IF
+*
+*        Multiply B by right bidiagonalizing vectors of L.
+*
+         CALL DORMBR( 'P', 'L', 'N', M, NRHS, M, WORK( IL ), LDWORK,
+     $                WORK( ITAUP ), B, LDB, WORK( NWORK ),
+     $                LWORK-NWORK+1, INFO )
+*
+*        Zero out below first M rows of B.
+*
+         CALL DLASET( 'F', N-M, NRHS, ZERO, ZERO, B( M+1, 1 ), LDB )
+         NWORK = ITAU + M
+*
+*        Multiply transpose(Q) by B.
+*        (Workspace: need M+NRHS, prefer M+NRHS*NB)
+*
+         CALL DORMLQ( 'L', 'T', N, NRHS, M, A, LDA, WORK( ITAU ), B,
+     $                LDB, WORK( NWORK ), LWORK-NWORK+1, INFO )
+*
+      ELSE
+*
+*        Path 2 - remaining underdetermined cases.
+*
+         IE = 1
+         ITAUQ = IE + M
+         ITAUP = ITAUQ + M
+         NWORK = ITAUP + M
+*
+*        Bidiagonalize A.
+*        (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
+*
+         CALL DGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
+     $                WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1,
+     $                INFO )
+*
+*        Multiply B by transpose of left bidiagonalizing vectors.
+*        (Workspace: need 3*M+NRHS, prefer 3*M+NRHS*NB)
+*
+         CALL DORMBR( 'Q', 'L', 'T', M, NRHS, N, A, LDA, WORK( ITAUQ ),
+     $                B, LDB, WORK( NWORK ), LWORK-NWORK+1, INFO )
+*
+*        Solve the bidiagonal least squares problem.
+*
+         CALL DLALSD( 'L', SMLSIZ, M, NRHS, S, WORK( IE ), B, LDB,
+     $                RCOND, RANK, WORK( NWORK ), IWORK, INFO )
+         IF( INFO.NE.0 ) THEN
+            GO TO 10
+         END IF
+*
+*        Multiply B by right bidiagonalizing vectors of A.
+*
+         CALL DORMBR( 'P', 'L', 'N', N, NRHS, M, A, LDA, WORK( ITAUP ),
+     $                B, LDB, WORK( NWORK ), LWORK-NWORK+1, INFO )
+*
+      END IF
+*
+*     Undo scaling.
+*
+      IF( IASCL.EQ.1 ) THEN
+         CALL DLASCL( 'G', 0, 0, ANRM, SMLNUM, N, NRHS, B, LDB, INFO )
+         CALL DLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN,
+     $                INFO )
+      ELSE IF( IASCL.EQ.2 ) THEN
+         CALL DLASCL( 'G', 0, 0, ANRM, BIGNUM, N, NRHS, B, LDB, INFO )
+         CALL DLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN,
+     $                INFO )
+      END IF
+      IF( IBSCL.EQ.1 ) THEN
+         CALL DLASCL( 'G', 0, 0, SMLNUM, BNRM, N, NRHS, B, LDB, INFO )
+      ELSE IF( IBSCL.EQ.2 ) THEN
+         CALL DLASCL( 'G', 0, 0, BIGNUM, BNRM, N, NRHS, B, LDB, INFO )
+      END IF
+*
+   10 CONTINUE
+      WORK( 1 ) = MAXWRK
+      RETURN
+*
+*     End of DGELSD
+*
+      END
diff --git a/costa/native/external/lapack/dgelss.f b/costa/native/external/lapack/dgelss.f
new file mode 100644
index 000000000..a437767db
--- /dev/null
+++ b/costa/native/external/lapack/dgelss.f
@@ -0,0 +1,613 @@
+      SUBROUTINE DGELSS( M, N, NRHS, A, LDA, B, LDB, S, RCOND, RANK,
+     $                   WORK, LWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, LDB, LWORK, M, N, NRHS, RANK
+      DOUBLE PRECISION   RCOND
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), B( LDB, * ), S( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DGELSS computes the minimum norm solution to a real linear least
+*  squares problem:
+*
+*  Minimize 2-norm(| b - A*x |).
+*
+*  using the singular value decomposition (SVD) of A. A is an M-by-N
+*  matrix which may be rank-deficient.
+*
+*  Several right hand side vectors b and solution vectors x can be
+*  handled in a single call; they are stored as the columns of the
+*  M-by-NRHS right hand side matrix B and the N-by-NRHS solution matrix
+*  X.
+*
+*  The effective rank of A is determined by treating as zero those
+*  singular values which are less than RCOND times the largest singular
+*  value.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A. N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrices B and X. NRHS >= 0.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+*          On entry, the M-by-N matrix A.
+*          On exit, the first min(m,n) rows of A are overwritten with
+*          its right singular vectors, stored rowwise.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  B       (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS)
+*          On entry, the M-by-NRHS right hand side matrix B.
+*          On exit, B is overwritten by the N-by-NRHS solution
+*          matrix X.  If m >= n and RANK = n, the residual
+*          sum-of-squares for the solution in the i-th column is given
+*          by the sum of squares of elements n+1:m in that column.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B. LDB >= max(1,max(M,N)).
+*
+*  S       (output) DOUBLE PRECISION array, dimension (min(M,N))
+*          The singular values of A in decreasing order.
+*          The condition number of A in the 2-norm = S(1)/S(min(m,n)).
+*
+*  RCOND   (input) DOUBLE PRECISION
+*          RCOND is used to determine the effective rank of A.
+*          Singular values S(i) <= RCOND*S(1) are treated as zero.
+*          If RCOND < 0, machine precision is used instead.
+*
+*  RANK    (output) INTEGER
+*          The effective rank of A, i.e., the number of singular values
+*          which are greater than RCOND*S(1).
+*
+*  WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK. LWORK >= 1, and also:
+*          LWORK >= 3*min(M,N) + max( 2*min(M,N), max(M,N), NRHS )
+*          For good performance, LWORK should generally be larger.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          > 0:  the algorithm for computing the SVD failed to converge;
+*                if INFO = i, i off-diagonal elements of an intermediate
+*                bidiagonal form did not converge to zero.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            BDSPAC, BL, CHUNK, I, IASCL, IBSCL, IE, IL,
+     $                   ITAU, ITAUP, ITAUQ, IWORK, LDWORK, MAXMN,
+     $                   MAXWRK, MINMN, MINWRK, MM, MNTHR
+      DOUBLE PRECISION   ANRM, BIGNUM, BNRM, EPS, SFMIN, SMLNUM, THR
+*     ..
+*     .. Local Arrays ..
+      DOUBLE PRECISION   VDUM( 1 )
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DBDSQR, DCOPY, DGEBRD, DGELQF, DGEMM, DGEMV,
+     $                   DGEQRF, DLABAD, DLACPY, DLASCL, DLASET, DORGBR,
+     $                   DORMBR, DORMLQ, DORMQR, DRSCL, XERBLA
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      DOUBLE PRECISION   DLAMCH, DLANGE
+      EXTERNAL           ILAENV, DLAMCH, DLANGE
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      MINMN = MIN( M, N )
+      MAXMN = MAX( M, N )
+      MNTHR = ILAENV( 6, 'DGELSS', ' ', M, N, NRHS, -1 )
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, MAXMN ) ) THEN
+         INFO = -7
+      END IF
+*
+*     Compute workspace
+*      (Note: Comments in the code beginning "Workspace:" describe the
+*       minimal amount of workspace needed at that point in the code,
+*       as well as the preferred amount for good performance.
+*       NB refers to the optimal block size for the immediately
+*       following subroutine, as returned by ILAENV.)
+*
+      MINWRK = 1
+      IF( INFO.EQ.0 .AND. ( LWORK.GE.1 .OR. LQUERY ) ) THEN
+         MAXWRK = 0
+         MM = M
+         IF( M.GE.N .AND. M.GE.MNTHR ) THEN
+*
+*           Path 1a - overdetermined, with many more rows than columns
+*
+            MM = N
+            MAXWRK = MAX( MAXWRK, N+N*ILAENV( 1, 'DGEQRF', ' ', M, N,
+     $               -1, -1 ) )
+            MAXWRK = MAX( MAXWRK, N+NRHS*
+     $               ILAENV( 1, 'DORMQR', 'LT', M, NRHS, N, -1 ) )
+         END IF
+         IF( M.GE.N ) THEN
+*
+*           Path 1 - overdetermined or exactly determined
+*
+*           Compute workspace needed for DBDSQR
+*
+            BDSPAC = MAX( 1, 5*N )
+            MAXWRK = MAX( MAXWRK, 3*N+( MM+N )*
+     $               ILAENV( 1, 'DGEBRD', ' ', MM, N, -1, -1 ) )
+            MAXWRK = MAX( MAXWRK, 3*N+NRHS*
+     $               ILAENV( 1, 'DORMBR', 'QLT', MM, NRHS, N, -1 ) )
+            MAXWRK = MAX( MAXWRK, 3*N+( N-1 )*
+     $               ILAENV( 1, 'DORGBR', 'P', N, N, N, -1 ) )
+            MAXWRK = MAX( MAXWRK, BDSPAC )
+            MAXWRK = MAX( MAXWRK, N*NRHS )
+            MINWRK = MAX( 3*N+MM, 3*N+NRHS, BDSPAC )
+            MAXWRK = MAX( MINWRK, MAXWRK )
+         END IF
+         IF( N.GT.M ) THEN
+*
+*           Compute workspace needed for DBDSQR
+*
+            BDSPAC = MAX( 1, 5*M )
+            MINWRK = MAX( 3*M+NRHS, 3*M+N, BDSPAC )
+            IF( N.GE.MNTHR ) THEN
+*
+*              Path 2a - underdetermined, with many more columns
+*              than rows
+*
+               MAXWRK = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 )
+               MAXWRK = MAX( MAXWRK, M*M+4*M+2*M*
+     $                  ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) )
+               MAXWRK = MAX( MAXWRK, M*M+4*M+NRHS*
+     $                  ILAENV( 1, 'DORMBR', 'QLT', M, NRHS, M, -1 ) )
+               MAXWRK = MAX( MAXWRK, M*M+4*M+( M-1 )*
+     $                  ILAENV( 1, 'DORGBR', 'P', M, M, M, -1 ) )
+               MAXWRK = MAX( MAXWRK, M*M+M+BDSPAC )
+               IF( NRHS.GT.1 ) THEN
+                  MAXWRK = MAX( MAXWRK, M*M+M+M*NRHS )
+               ELSE
+                  MAXWRK = MAX( MAXWRK, M*M+2*M )
+               END IF
+               MAXWRK = MAX( MAXWRK, M+NRHS*
+     $                  ILAENV( 1, 'DORMLQ', 'LT', N, NRHS, M, -1 ) )
+            ELSE
+*
+*              Path 2 - underdetermined
+*
+               MAXWRK = 3*M + ( N+M )*ILAENV( 1, 'DGEBRD', ' ', M, N,
+     $                  -1, -1 )
+               MAXWRK = MAX( MAXWRK, 3*M+NRHS*
+     $                  ILAENV( 1, 'DORMBR', 'QLT', M, NRHS, M, -1 ) )
+               MAXWRK = MAX( MAXWRK, 3*M+M*
+     $                  ILAENV( 1, 'DORGBR', 'P', M, N, M, -1 ) )
+               MAXWRK = MAX( MAXWRK, BDSPAC )
+               MAXWRK = MAX( MAXWRK, N*NRHS )
+            END IF
+         END IF
+         MAXWRK = MAX( MINWRK, MAXWRK )
+         WORK( 1 ) = MAXWRK
+      END IF
+*
+      MINWRK = MAX( MINWRK, 1 )
+      IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY )
+     $   INFO = -12
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DGELSS', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 ) THEN
+         RANK = 0
+         RETURN
+      END IF
+*
+*     Get machine parameters
+*
+      EPS = DLAMCH( 'P' )
+      SFMIN = DLAMCH( 'S' )
+      SMLNUM = SFMIN / EPS
+      BIGNUM = ONE / SMLNUM
+      CALL DLABAD( SMLNUM, BIGNUM )
+*
+*     Scale A if max element outside range [SMLNUM,BIGNUM]
+*
+      ANRM = DLANGE( 'M', M, N, A, LDA, WORK )
+      IASCL = 0
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
+*
+*        Scale matrix norm up to SMLNUM
+*
+         CALL DLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO )
+         IASCL = 1
+      ELSE IF( ANRM.GT.BIGNUM ) THEN
+*
+*        Scale matrix norm down to BIGNUM
+*
+         CALL DLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO )
+         IASCL = 2
+      ELSE IF( ANRM.EQ.ZERO ) THEN
+*
+*        Matrix all zero. Return zero solution.
+*
+         CALL DLASET( 'F', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB )
+         CALL DLASET( 'F', MINMN, 1, ZERO, ZERO, S, 1 )
+         RANK = 0
+         GO TO 70
+      END IF
+*
+*     Scale B if max element outside range [SMLNUM,BIGNUM]
+*
+      BNRM = DLANGE( 'M', M, NRHS, B, LDB, WORK )
+      IBSCL = 0
+      IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN
+*
+*        Scale matrix norm up to SMLNUM
+*
+         CALL DLASCL( 'G', 0, 0, BNRM, SMLNUM, M, NRHS, B, LDB, INFO )
+         IBSCL = 1
+      ELSE IF( BNRM.GT.BIGNUM ) THEN
+*
+*        Scale matrix norm down to BIGNUM
+*
+         CALL DLASCL( 'G', 0, 0, BNRM, BIGNUM, M, NRHS, B, LDB, INFO )
+         IBSCL = 2
+      END IF
+*
+*     Overdetermined case
+*
+      IF( M.GE.N ) THEN
+*
+*        Path 1 - overdetermined or exactly determined
+*
+         MM = M
+         IF( M.GE.MNTHR ) THEN
+*
+*           Path 1a - overdetermined, with many more rows than columns
+*
+            MM = N
+            ITAU = 1
+            IWORK = ITAU + N
+*
+*           Compute A=Q*R
+*           (Workspace: need 2*N, prefer N+N*NB)
+*
+            CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ),
+     $                   LWORK-IWORK+1, INFO )
+*
+*           Multiply B by transpose(Q)
+*           (Workspace: need N+NRHS, prefer N+NRHS*NB)
+*
+            CALL DORMQR( 'L', 'T', M, NRHS, N, A, LDA, WORK( ITAU ), B,
+     $                   LDB, WORK( IWORK ), LWORK-IWORK+1, INFO )
+*
+*           Zero out below R
+*
+            IF( N.GT.1 )
+     $         CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ), LDA )
+         END IF
+*
+         IE = 1
+         ITAUQ = IE + N
+         ITAUP = ITAUQ + N
+         IWORK = ITAUP + N
+*
+*        Bidiagonalize R in A
+*        (Workspace: need 3*N+MM, prefer 3*N+(MM+N)*NB)
+*
+         CALL DGEBRD( MM, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
+     $                WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
+     $                INFO )
+*
+*        Multiply B by transpose of left bidiagonalizing vectors of R
+*        (Workspace: need 3*N+NRHS, prefer 3*N+NRHS*NB)
+*
+         CALL DORMBR( 'Q', 'L', 'T', MM, NRHS, N, A, LDA, WORK( ITAUQ ),
+     $                B, LDB, WORK( IWORK ), LWORK-IWORK+1, INFO )
+*
+*        Generate right bidiagonalizing vectors of R in A
+*        (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
+*
+         CALL DORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
+     $                WORK( IWORK ), LWORK-IWORK+1, INFO )
+         IWORK = IE + N
+*
+*        Perform bidiagonal QR iteration
+*          multiply B by transpose of left singular vectors
+*          compute right singular vectors in A
+*        (Workspace: need BDSPAC)
+*
+         CALL DBDSQR( 'U', N, N, 0, NRHS, S, WORK( IE ), A, LDA, VDUM,
+     $                1, B, LDB, WORK( IWORK ), INFO )
+         IF( INFO.NE.0 )
+     $      GO TO 70
+*
+*        Multiply B by reciprocals of singular values
+*
+         THR = MAX( RCOND*S( 1 ), SFMIN )
+         IF( RCOND.LT.ZERO )
+     $      THR = MAX( EPS*S( 1 ), SFMIN )
+         RANK = 0
+         DO 10 I = 1, N
+            IF( S( I ).GT.THR ) THEN
+               CALL DRSCL( NRHS, S( I ), B( I, 1 ), LDB )
+               RANK = RANK + 1
+            ELSE
+               CALL DLASET( 'F', 1, NRHS, ZERO, ZERO, B( I, 1 ), LDB )
+            END IF
+   10    CONTINUE
+*
+*        Multiply B by right singular vectors
+*        (Workspace: need N, prefer N*NRHS)
+*
+         IF( LWORK.GE.LDB*NRHS .AND. NRHS.GT.1 ) THEN
+            CALL DGEMM( 'T', 'N', N, NRHS, N, ONE, A, LDA, B, LDB, ZERO,
+     $                  WORK, LDB )
+            CALL DLACPY( 'G', N, NRHS, WORK, LDB, B, LDB )
+         ELSE IF( NRHS.GT.1 ) THEN
+            CHUNK = LWORK / N
+            DO 20 I = 1, NRHS, CHUNK
+               BL = MIN( NRHS-I+1, CHUNK )
+               CALL DGEMM( 'T', 'N', N, BL, N, ONE, A, LDA, B( 1, I ),
+     $                     LDB, ZERO, WORK, N )
+               CALL DLACPY( 'G', N, BL, WORK, N, B( 1, I ), LDB )
+   20       CONTINUE
+         ELSE
+            CALL DGEMV( 'T', N, N, ONE, A, LDA, B, 1, ZERO, WORK, 1 )
+            CALL DCOPY( N, WORK, 1, B, 1 )
+         END IF
+*
+      ELSE IF( N.GE.MNTHR .AND. LWORK.GE.4*M+M*M+
+     $         MAX( M, 2*M-4, NRHS, N-3*M ) ) THEN
+*
+*        Path 2a - underdetermined, with many more columns than rows
+*        and sufficient workspace for an efficient algorithm
+*
+         LDWORK = M
+         IF( LWORK.GE.MAX( 4*M+M*LDA+MAX( M, 2*M-4, NRHS, N-3*M ),
+     $       M*LDA+M+M*NRHS ) )LDWORK = LDA
+         ITAU = 1
+         IWORK = M + 1
+*
+*        Compute A=L*Q
+*        (Workspace: need 2*M, prefer M+M*NB)
+*
+         CALL DGELQF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ),
+     $                LWORK-IWORK+1, INFO )
+         IL = IWORK
+*
+*        Copy L to WORK(IL), zeroing out above it
+*
+         CALL DLACPY( 'L', M, M, A, LDA, WORK( IL ), LDWORK )
+         CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, WORK( IL+LDWORK ),
+     $                LDWORK )
+         IE = IL + LDWORK*M
+         ITAUQ = IE + M
+         ITAUP = ITAUQ + M
+         IWORK = ITAUP + M
+*
+*        Bidiagonalize L in WORK(IL)
+*        (Workspace: need M*M+5*M, prefer M*M+4*M+2*M*NB)
+*
+         CALL DGEBRD( M, M, WORK( IL ), LDWORK, S, WORK( IE ),
+     $                WORK( ITAUQ ), WORK( ITAUP ), WORK( IWORK ),
+     $                LWORK-IWORK+1, INFO )
+*
+*        Multiply B by transpose of left bidiagonalizing vectors of L
+*        (Workspace: need M*M+4*M+NRHS, prefer M*M+4*M+NRHS*NB)
+*
+         CALL DORMBR( 'Q', 'L', 'T', M, NRHS, M, WORK( IL ), LDWORK,
+     $                WORK( ITAUQ ), B, LDB, WORK( IWORK ),
+     $                LWORK-IWORK+1, INFO )
+*
+*        Generate right bidiagonalizing vectors of R in WORK(IL)
+*        (Workspace: need M*M+5*M-1, prefer M*M+4*M+(M-1)*NB)
+*
+         CALL DORGBR( 'P', M, M, M, WORK( IL ), LDWORK, WORK( ITAUP ),
+     $                WORK( IWORK ), LWORK-IWORK+1, INFO )
+         IWORK = IE + M
+*
+*        Perform bidiagonal QR iteration,
+*           computing right singular vectors of L in WORK(IL) and
+*           multiplying B by transpose of left singular vectors
+*        (Workspace: need M*M+M+BDSPAC)
+*
+         CALL DBDSQR( 'U', M, M, 0, NRHS, S, WORK( IE ), WORK( IL ),
+     $                LDWORK, A, LDA, B, LDB, WORK( IWORK ), INFO )
+         IF( INFO.NE.0 )
+     $      GO TO 70
+*
+*        Multiply B by reciprocals of singular values
+*
+         THR = MAX( RCOND*S( 1 ), SFMIN )
+         IF( RCOND.LT.ZERO )
+     $      THR = MAX( EPS*S( 1 ), SFMIN )
+         RANK = 0
+         DO 30 I = 1, M
+            IF( S( I ).GT.THR ) THEN
+               CALL DRSCL( NRHS, S( I ), B( I, 1 ), LDB )
+               RANK = RANK + 1
+            ELSE
+               CALL DLASET( 'F', 1, NRHS, ZERO, ZERO, B( I, 1 ), LDB )
+            END IF
+   30    CONTINUE
+         IWORK = IE
+*
+*        Multiply B by right singular vectors of L in WORK(IL)
+*        (Workspace: need M*M+2*M, prefer M*M+M+M*NRHS)
+*
+         IF( LWORK.GE.LDB*NRHS+IWORK-1 .AND. NRHS.GT.1 ) THEN
+            CALL DGEMM( 'T', 'N', M, NRHS, M, ONE, WORK( IL ), LDWORK,
+     $                  B, LDB, ZERO, WORK( IWORK ), LDB )
+            CALL DLACPY( 'G', M, NRHS, WORK( IWORK ), LDB, B, LDB )
+         ELSE IF( NRHS.GT.1 ) THEN
+            CHUNK = ( LWORK-IWORK+1 ) / M
+            DO 40 I = 1, NRHS, CHUNK
+               BL = MIN( NRHS-I+1, CHUNK )
+               CALL DGEMM( 'T', 'N', M, BL, M, ONE, WORK( IL ), LDWORK,
+     $                     B( 1, I ), LDB, ZERO, WORK( IWORK ), N )
+               CALL DLACPY( 'G', M, BL, WORK( IWORK ), N, B( 1, I ),
+     $                      LDB )
+   40       CONTINUE
+         ELSE
+            CALL DGEMV( 'T', M, M, ONE, WORK( IL ), LDWORK, B( 1, 1 ),
+     $                  1, ZERO, WORK( IWORK ), 1 )
+            CALL DCOPY( M, WORK( IWORK ), 1, B( 1, 1 ), 1 )
+         END IF
+*
+*        Zero out below first M rows of B
+*
+         CALL DLASET( 'F', N-M, NRHS, ZERO, ZERO, B( M+1, 1 ), LDB )
+         IWORK = ITAU + M
+*
+*        Multiply transpose(Q) by B
+*        (Workspace: need M+NRHS, prefer M+NRHS*NB)
+*
+         CALL DORMLQ( 'L', 'T', N, NRHS, M, A, LDA, WORK( ITAU ), B,
+     $                LDB, WORK( IWORK ), LWORK-IWORK+1, INFO )
+*
+      ELSE
+*
+*        Path 2 - remaining underdetermined cases
+*
+         IE = 1
+         ITAUQ = IE + M
+         ITAUP = ITAUQ + M
+         IWORK = ITAUP + M
+*
+*        Bidiagonalize A
+*        (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
+*
+         CALL DGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
+     $                WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
+     $                INFO )
+*
+*        Multiply B by transpose of left bidiagonalizing vectors
+*        (Workspace: need 3*M+NRHS, prefer 3*M+NRHS*NB)
+*
+         CALL DORMBR( 'Q', 'L', 'T', M, NRHS, N, A, LDA, WORK( ITAUQ ),
+     $                B, LDB, WORK( IWORK ), LWORK-IWORK+1, INFO )
+*
+*        Generate right bidiagonalizing vectors in A
+*        (Workspace: need 4*M, prefer 3*M+M*NB)
+*
+         CALL DORGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ),
+     $                WORK( IWORK ), LWORK-IWORK+1, INFO )
+         IWORK = IE + M
+*
+*        Perform bidiagonal QR iteration,
+*           computing right singular vectors of A in A and
+*           multiplying B by transpose of left singular vectors
+*        (Workspace: need BDSPAC)
+*
+         CALL DBDSQR( 'L', M, N, 0, NRHS, S, WORK( IE ), A, LDA, VDUM,
+     $                1, B, LDB, WORK( IWORK ), INFO )
+         IF( INFO.NE.0 )
+     $      GO TO 70
+*
+*        Multiply B by reciprocals of singular values
+*
+         THR = MAX( RCOND*S( 1 ), SFMIN )
+         IF( RCOND.LT.ZERO )
+     $      THR = MAX( EPS*S( 1 ), SFMIN )
+         RANK = 0
+         DO 50 I = 1, M
+            IF( S( I ).GT.THR ) THEN
+               CALL DRSCL( NRHS, S( I ), B( I, 1 ), LDB )
+               RANK = RANK + 1
+            ELSE
+               CALL DLASET( 'F', 1, NRHS, ZERO, ZERO, B( I, 1 ), LDB )
+            END IF
+   50    CONTINUE
+*
+*        Multiply B by right singular vectors of A
+*        (Workspace: need N, prefer N*NRHS)
+*
+         IF( LWORK.GE.LDB*NRHS .AND. NRHS.GT.1 ) THEN
+            CALL DGEMM( 'T', 'N', N, NRHS, M, ONE, A, LDA, B, LDB, ZERO,
+     $                  WORK, LDB )
+            CALL DLACPY( 'F', N, NRHS, WORK, LDB, B, LDB )
+         ELSE IF( NRHS.GT.1 ) THEN
+            CHUNK = LWORK / N
+            DO 60 I = 1, NRHS, CHUNK
+               BL = MIN( NRHS-I+1, CHUNK )
+               CALL DGEMM( 'T', 'N', N, BL, M, ONE, A, LDA, B( 1, I ),
+     $                     LDB, ZERO, WORK, N )
+               CALL DLACPY( 'F', N, BL, WORK, N, B( 1, I ), LDB )
+   60       CONTINUE
+         ELSE
+            CALL DGEMV( 'T', M, N, ONE, A, LDA, B, 1, ZERO, WORK, 1 )
+            CALL DCOPY( N, WORK, 1, B, 1 )
+         END IF
+      END IF
+*
+*     Undo scaling
+*
+      IF( IASCL.EQ.1 ) THEN
+         CALL DLASCL( 'G', 0, 0, ANRM, SMLNUM, N, NRHS, B, LDB, INFO )
+         CALL DLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN,
+     $                INFO )
+      ELSE IF( IASCL.EQ.2 ) THEN
+         CALL DLASCL( 'G', 0, 0, ANRM, BIGNUM, N, NRHS, B, LDB, INFO )
+         CALL DLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN,
+     $                INFO )
+      END IF
+      IF( IBSCL.EQ.1 ) THEN
+         CALL DLASCL( 'G', 0, 0, SMLNUM, BNRM, N, NRHS, B, LDB, INFO )
+      ELSE IF( IBSCL.EQ.2 ) THEN
+         CALL DLASCL( 'G', 0, 0, BIGNUM, BNRM, N, NRHS, B, LDB, INFO )
+      END IF
+*
+   70 CONTINUE
+      WORK( 1 ) = MAXWRK
+      RETURN
+*
+*     End of DGELSS
+*
+      END
diff --git a/costa/native/external/lapack/dgelsx.f b/costa/native/external/lapack/dgelsx.f
new file mode 100644
index 000000000..2a009a4c3
--- /dev/null
+++ b/costa/native/external/lapack/dgelsx.f
@@ -0,0 +1,350 @@
+      SUBROUTINE DGELSX( M, N, NRHS, A, LDA, B, LDB, JPVT, RCOND, RANK,
+     $                   WORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, LDB, M, N, NRHS, RANK
+      DOUBLE PRECISION   RCOND
+*     ..
+*     .. Array Arguments ..
+      INTEGER            JPVT( * )
+      DOUBLE PRECISION   A( LDA, * ), B( LDB, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  This routine is deprecated and has been replaced by routine DGELSY.
+*
+*  DGELSX computes the minimum-norm solution to a real linear least
+*  squares problem:
+*      minimize || A * X - B ||
+*  using a complete orthogonal factorization of A.  A is an M-by-N
+*  matrix which may be rank-deficient.
+*
+*  Several right hand side vectors b and solution vectors x can be
+*  handled in a single call; they are stored as the columns of the
+*  M-by-NRHS right hand side matrix B and the N-by-NRHS solution
+*  matrix X.
+*
+*  The routine first computes a QR factorization with column pivoting:
+*      A * P = Q * [ R11 R12 ]
+*                  [  0  R22 ]
+*  with R11 defined as the largest leading submatrix whose estimated
+*  condition number is less than 1/RCOND.  The order of R11, RANK,
+*  is the effective rank of A.
+*
+*  Then, R22 is considered to be negligible, and R12 is annihilated
+*  by orthogonal transformations from the right, arriving at the
+*  complete orthogonal factorization:
+*     A * P = Q * [ T11 0 ] * Z
+*                 [  0  0 ]
+*  The minimum-norm solution is then
+*     X = P * Z' [ inv(T11)*Q1'*B ]
+*                [        0       ]
+*  where Q1 consists of the first RANK columns of Q.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of
+*          columns of matrices B and X. NRHS >= 0.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+*          On entry, the M-by-N matrix A.
+*          On exit, A has been overwritten by details of its
+*          complete orthogonal factorization.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  B       (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS)
+*          On entry, the M-by-NRHS right hand side matrix B.
+*          On exit, the N-by-NRHS solution matrix X.
+*          If m >= n and RANK = n, the residual sum-of-squares for
+*          the solution in the i-th column is given by the sum of
+*          squares of elements N+1:M in that column.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B. LDB >= max(1,M,N).
+*
+*  JPVT    (input/output) INTEGER array, dimension (N)
+*          On entry, if JPVT(i) .ne. 0, the i-th column of A is an
+*          initial column, otherwise it is a free column.  Before
+*          the QR factorization of A, all initial columns are
+*          permuted to the leading positions; only the remaining
+*          free columns are moved as a result of column pivoting
+*          during the factorization.
+*          On exit, if JPVT(i) = k, then the i-th column of A*P
+*          was the k-th column of A.
+*
+*  RCOND   (input) DOUBLE PRECISION
+*          RCOND is used to determine the effective rank of A, which
+*          is defined as the order of the largest leading triangular
+*          submatrix R11 in the QR factorization with pivoting of A,
+*          whose estimated condition number < 1/RCOND.
+*
+*  RANK    (output) INTEGER
+*          The effective rank of A, i.e., the order of the submatrix
+*          R11.  This is the same as the order of the submatrix T11
+*          in the complete orthogonal factorization of A.
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension
+*                      (max( min(M,N)+3*N, 2*min(M,N)+NRHS )),
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            IMAX, IMIN
+      PARAMETER          ( IMAX = 1, IMIN = 2 )
+      DOUBLE PRECISION   ZERO, ONE, DONE, NTDONE
+      PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0, DONE = ZERO,
+     $                   NTDONE = ONE )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, IASCL, IBSCL, ISMAX, ISMIN, J, K, MN
+      DOUBLE PRECISION   ANRM, BIGNUM, BNRM, C1, C2, S1, S2, SMAX,
+     $                   SMAXPR, SMIN, SMINPR, SMLNUM, T1, T2
+*     ..
+*     .. External Functions ..
+      DOUBLE PRECISION   DLAMCH, DLANGE
+      EXTERNAL           DLAMCH, DLANGE
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DGEQPF, DLAIC1, DLASCL, DLASET, DLATZM, DORM2R,
+     $                   DTRSM, DTZRQF, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+      MN = MIN( M, N )
+      ISMIN = MN + 1
+      ISMAX = 2*MN + 1
+*
+*     Test the input arguments.
+*
+      INFO = 0
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, M, N ) ) THEN
+         INFO = -7
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DGELSX', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( MIN( M, N, NRHS ).EQ.0 ) THEN
+         RANK = 0
+         RETURN
+      END IF
+*
+*     Get machine parameters
+*
+      SMLNUM = DLAMCH( 'S' ) / DLAMCH( 'P' )
+      BIGNUM = ONE / SMLNUM
+      CALL DLABAD( SMLNUM, BIGNUM )
+*
+*     Scale A, B if max elements outside range [SMLNUM,BIGNUM]
+*
+      ANRM = DLANGE( 'M', M, N, A, LDA, WORK )
+      IASCL = 0
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
+*
+*        Scale matrix norm up to SMLNUM
+*
+         CALL DLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO )
+         IASCL = 1
+      ELSE IF( ANRM.GT.BIGNUM ) THEN
+*
+*        Scale matrix norm down to BIGNUM
+*
+         CALL DLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO )
+         IASCL = 2
+      ELSE IF( ANRM.EQ.ZERO ) THEN
+*
+*        Matrix all zero. Return zero solution.
+*
+         CALL DLASET( 'F', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB )
+         RANK = 0
+         GO TO 100
+      END IF
+*
+      BNRM = DLANGE( 'M', M, NRHS, B, LDB, WORK )
+      IBSCL = 0
+      IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN
+*
+*        Scale matrix norm up to SMLNUM
+*
+         CALL DLASCL( 'G', 0, 0, BNRM, SMLNUM, M, NRHS, B, LDB, INFO )
+         IBSCL = 1
+      ELSE IF( BNRM.GT.BIGNUM ) THEN
+*
+*        Scale matrix norm down to BIGNUM
+*
+         CALL DLASCL( 'G', 0, 0, BNRM, BIGNUM, M, NRHS, B, LDB, INFO )
+         IBSCL = 2
+      END IF
+*
+*     Compute QR factorization with column pivoting of A:
+*        A * P = Q * R
+*
+      CALL DGEQPF( M, N, A, LDA, JPVT, WORK( 1 ), WORK( MN+1 ), INFO )
+*
+*     workspace 3*N. Details of Householder rotations stored
+*     in WORK(1:MN).
+*
+*     Determine RANK using incremental condition estimation
+*
+      WORK( ISMIN ) = ONE
+      WORK( ISMAX ) = ONE
+      SMAX = ABS( A( 1, 1 ) )
+      SMIN = SMAX
+      IF( ABS( A( 1, 1 ) ).EQ.ZERO ) THEN
+         RANK = 0
+         CALL DLASET( 'F', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB )
+         GO TO 100
+      ELSE
+         RANK = 1
+      END IF
+*
+   10 CONTINUE
+      IF( RANK.LT.MN ) THEN
+         I = RANK + 1
+         CALL DLAIC1( IMIN, RANK, WORK( ISMIN ), SMIN, A( 1, I ),
+     $                A( I, I ), SMINPR, S1, C1 )
+         CALL DLAIC1( IMAX, RANK, WORK( ISMAX ), SMAX, A( 1, I ),
+     $                A( I, I ), SMAXPR, S2, C2 )
+*
+         IF( SMAXPR*RCOND.LE.SMINPR ) THEN
+            DO 20 I = 1, RANK
+               WORK( ISMIN+I-1 ) = S1*WORK( ISMIN+I-1 )
+               WORK( ISMAX+I-1 ) = S2*WORK( ISMAX+I-1 )
+   20       CONTINUE
+            WORK( ISMIN+RANK ) = C1
+            WORK( ISMAX+RANK ) = C2
+            SMIN = SMINPR
+            SMAX = SMAXPR
+            RANK = RANK + 1
+            GO TO 10
+         END IF
+      END IF
+*
+*     Logically partition R = [ R11 R12 ]
+*                             [  0  R22 ]
+*     where R11 = R(1:RANK,1:RANK)
+*
+*     [R11,R12] = [ T11, 0 ] * Y
+*
+      IF( RANK.LT.N )
+     $   CALL DTZRQF( RANK, N, A, LDA, WORK( MN+1 ), INFO )
+*
+*     Details of Householder rotations stored in WORK(MN+1:2*MN)
+*
+*     B(1:M,1:NRHS) := Q' * B(1:M,1:NRHS)
+*
+      CALL DORM2R( 'Left', 'Transpose', M, NRHS, MN, A, LDA, WORK( 1 ),
+     $             B, LDB, WORK( 2*MN+1 ), INFO )
+*
+*     workspace NRHS
+*
+*     B(1:RANK,1:NRHS) := inv(T11) * B(1:RANK,1:NRHS)
+*
+      CALL DTRSM( 'Left', 'Upper', 'No transpose', 'Non-unit', RANK,
+     $            NRHS, ONE, A, LDA, B, LDB )
+*
+      DO 40 I = RANK + 1, N
+         DO 30 J = 1, NRHS
+            B( I, J ) = ZERO
+   30    CONTINUE
+   40 CONTINUE
+*
+*     B(1:N,1:NRHS) := Y' * B(1:N,1:NRHS)
+*
+      IF( RANK.LT.N ) THEN
+         DO 50 I = 1, RANK
+            CALL DLATZM( 'Left', N-RANK+1, NRHS, A( I, RANK+1 ), LDA,
+     $                   WORK( MN+I ), B( I, 1 ), B( RANK+1, 1 ), LDB,
+     $                   WORK( 2*MN+1 ) )
+   50    CONTINUE
+      END IF
+*
+*     workspace NRHS
+*
+*     B(1:N,1:NRHS) := P * B(1:N,1:NRHS)
+*
+      DO 90 J = 1, NRHS
+         DO 60 I = 1, N
+            WORK( 2*MN+I ) = NTDONE
+   60    CONTINUE
+         DO 80 I = 1, N
+            IF( WORK( 2*MN+I ).EQ.NTDONE ) THEN
+               IF( JPVT( I ).NE.I ) THEN
+                  K = I
+                  T1 = B( K, J )
+                  T2 = B( JPVT( K ), J )
+   70             CONTINUE
+                  B( JPVT( K ), J ) = T1
+                  WORK( 2*MN+K ) = DONE
+                  T1 = T2
+                  K = JPVT( K )
+                  T2 = B( JPVT( K ), J )
+                  IF( JPVT( K ).NE.I )
+     $               GO TO 70
+                  B( I, J ) = T1
+                  WORK( 2*MN+K ) = DONE
+               END IF
+            END IF
+   80    CONTINUE
+   90 CONTINUE
+*
+*     Undo scaling
+*
+      IF( IASCL.EQ.1 ) THEN
+         CALL DLASCL( 'G', 0, 0, ANRM, SMLNUM, N, NRHS, B, LDB, INFO )
+         CALL DLASCL( 'U', 0, 0, SMLNUM, ANRM, RANK, RANK, A, LDA,
+     $                INFO )
+      ELSE IF( IASCL.EQ.2 ) THEN
+         CALL DLASCL( 'G', 0, 0, ANRM, BIGNUM, N, NRHS, B, LDB, INFO )
+         CALL DLASCL( 'U', 0, 0, BIGNUM, ANRM, RANK, RANK, A, LDA,
+     $                INFO )
+      END IF
+      IF( IBSCL.EQ.1 ) THEN
+         CALL DLASCL( 'G', 0, 0, SMLNUM, BNRM, N, NRHS, B, LDB, INFO )
+      ELSE IF( IBSCL.EQ.2 ) THEN
+         CALL DLASCL( 'G', 0, 0, BIGNUM, BNRM, N, NRHS, B, LDB, INFO )
+      END IF
+*
+  100 CONTINUE
+*
+      RETURN
+*
+*     End of DGELSX
+*
+      END
diff --git a/costa/native/external/lapack/dgelsy.f b/costa/native/external/lapack/dgelsy.f
new file mode 100644
index 000000000..5690c5403
--- /dev/null
+++ b/costa/native/external/lapack/dgelsy.f
@@ -0,0 +1,379 @@
+      SUBROUTINE DGELSY( M, N, NRHS, A, LDA, B, LDB, JPVT, RCOND, RANK,
+     $                   WORK, LWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, LDB, LWORK, M, N, NRHS, RANK
+      DOUBLE PRECISION   RCOND
+*     ..
+*     .. Array Arguments ..
+      INTEGER            JPVT( * )
+      DOUBLE PRECISION   A( LDA, * ), B( LDB, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DGELSY computes the minimum-norm solution to a real linear least
+*  squares problem:
+*      minimize || A * X - B ||
+*  using a complete orthogonal factorization of A.  A is an M-by-N
+*  matrix which may be rank-deficient.
+*
+*  Several right hand side vectors b and solution vectors x can be
+*  handled in a single call; they are stored as the columns of the
+*  M-by-NRHS right hand side matrix B and the N-by-NRHS solution
+*  matrix X.
+*
+*  The routine first computes a QR factorization with column pivoting:
+*      A * P = Q * [ R11 R12 ]
+*                  [  0  R22 ]
+*  with R11 defined as the largest leading submatrix whose estimated
+*  condition number is less than 1/RCOND.  The order of R11, RANK,
+*  is the effective rank of A.
+*
+*  Then, R22 is considered to be negligible, and R12 is annihilated
+*  by orthogonal transformations from the right, arriving at the
+*  complete orthogonal factorization:
+*     A * P = Q * [ T11 0 ] * Z
+*                 [  0  0 ]
+*  The minimum-norm solution is then
+*     X = P * Z' [ inv(T11)*Q1'*B ]
+*                [        0       ]
+*  where Q1 consists of the first RANK columns of Q.
+*
+*  This routine is basically identical to the original xGELSX except
+*  three differences:
+*    o The call to the subroutine xGEQPF has been substituted by the
+*      the call to the subroutine xGEQP3. This subroutine is a Blas-3
+*      version of the QR factorization with column pivoting.
+*    o Matrix B (the right hand side) is updated with Blas-3.
+*    o The permutation of matrix B (the right hand side) is faster and
+*      more simple.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of
+*          columns of matrices B and X. NRHS >= 0.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+*          On entry, the M-by-N matrix A.
+*          On exit, A has been overwritten by details of its
+*          complete orthogonal factorization.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  B       (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS)
+*          On entry, the M-by-NRHS right hand side matrix B.
+*          On exit, the N-by-NRHS solution matrix X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B. LDB >= max(1,M,N).
+*
+*  JPVT    (input/output) INTEGER array, dimension (N)
+*          On entry, if JPVT(i) .ne. 0, the i-th column of A is permuted
+*          to the front of AP, otherwise column i is a free column.
+*          On exit, if JPVT(i) = k, then the i-th column of AP
+*          was the k-th column of A.
+*
+*  RCOND   (input) DOUBLE PRECISION
+*          RCOND is used to determine the effective rank of A, which
+*          is defined as the order of the largest leading triangular
+*          submatrix R11 in the QR factorization with pivoting of A,
+*          whose estimated condition number < 1/RCOND.
+*
+*  RANK    (output) INTEGER
+*          The effective rank of A, i.e., the order of the submatrix
+*          R11.  This is the same as the order of the submatrix T11
+*          in the complete orthogonal factorization of A.
+*
+*  WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.
+*          The unblocked strategy requires that:
+*             LWORK >= MAX( MN+3*N+1, 2*MN+NRHS ),
+*          where MN = min( M, N ).
+*          The block algorithm requires that:
+*             LWORK >= MAX( MN+2*N+NB*(N+1), 2*MN+NB*NRHS ),
+*          where NB is an upper bound on the blocksize returned
+*          by ILAENV for the routines DGEQP3, DTZRZF, STZRQF, DORMQR,
+*          and DORMRZ.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: If INFO = -i, the i-th argument had an illegal value.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*    A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA
+*    E. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain
+*    G. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            IMAX, IMIN
+      PARAMETER          ( IMAX = 1, IMIN = 2 )
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            I, IASCL, IBSCL, ISMAX, ISMIN, J, LWKOPT, MN,
+     $                   NB, NB1, NB2, NB3, NB4
+      DOUBLE PRECISION   ANRM, BIGNUM, BNRM, C1, C2, S1, S2, SMAX,
+     $                   SMAXPR, SMIN, SMINPR, SMLNUM, WSIZE
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      DOUBLE PRECISION   DLAMCH, DLANGE
+      EXTERNAL           ILAENV, DLAMCH, DLANGE
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DCOPY, DGEQP3, DLABAD, DLAIC1, DLASCL, DLASET,
+     $                   DORMQR, DORMRZ, DTRSM, DTZRZF, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+      MN = MIN( M, N )
+      ISMIN = MN + 1
+      ISMAX = 2*MN + 1
+*
+*     Test the input arguments.
+*
+      INFO = 0
+      NB1 = ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 )
+      NB2 = ILAENV( 1, 'DGERQF', ' ', M, N, -1, -1 )
+      NB3 = ILAENV( 1, 'DORMQR', ' ', M, N, NRHS, -1 )
+      NB4 = ILAENV( 1, 'DORMRQ', ' ', M, N, NRHS, -1 )
+      NB = MAX( NB1, NB2, NB3, NB4 )
+      LWKOPT = MAX( 1, MN+2*N+NB*( N+1 ), 2*MN+NB*NRHS )
+      WORK( 1 ) = DBLE( LWKOPT )
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, M, N ) ) THEN
+         INFO = -7
+      ELSE IF( LWORK.LT.MAX( 1, MN+3*N+1, 2*MN+NRHS ) .AND. .NOT.
+     $         LQUERY ) THEN
+         INFO = -12
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DGELSY', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( MIN( M, N, NRHS ).EQ.0 ) THEN
+         RANK = 0
+         RETURN
+      END IF
+*
+*     Get machine parameters
+*
+      SMLNUM = DLAMCH( 'S' ) / DLAMCH( 'P' )
+      BIGNUM = ONE / SMLNUM
+      CALL DLABAD( SMLNUM, BIGNUM )
+*
+*     Scale A, B if max entries outside range [SMLNUM,BIGNUM]
+*
+      ANRM = DLANGE( 'M', M, N, A, LDA, WORK )
+      IASCL = 0
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
+*
+*        Scale matrix norm up to SMLNUM
+*
+         CALL DLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO )
+         IASCL = 1
+      ELSE IF( ANRM.GT.BIGNUM ) THEN
+*
+*        Scale matrix norm down to BIGNUM
+*
+         CALL DLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO )
+         IASCL = 2
+      ELSE IF( ANRM.EQ.ZERO ) THEN
+*
+*        Matrix all zero. Return zero solution.
+*
+         CALL DLASET( 'F', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB )
+         RANK = 0
+         GO TO 70
+      END IF
+*
+      BNRM = DLANGE( 'M', M, NRHS, B, LDB, WORK )
+      IBSCL = 0
+      IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN
+*
+*        Scale matrix norm up to SMLNUM
+*
+         CALL DLASCL( 'G', 0, 0, BNRM, SMLNUM, M, NRHS, B, LDB, INFO )
+         IBSCL = 1
+      ELSE IF( BNRM.GT.BIGNUM ) THEN
+*
+*        Scale matrix norm down to BIGNUM
+*
+         CALL DLASCL( 'G', 0, 0, BNRM, BIGNUM, M, NRHS, B, LDB, INFO )
+         IBSCL = 2
+      END IF
+*
+*     Compute QR factorization with column pivoting of A:
+*        A * P = Q * R
+*
+      CALL DGEQP3( M, N, A, LDA, JPVT, WORK( 1 ), WORK( MN+1 ),
+     $             LWORK-MN, INFO )
+      WSIZE = MN + WORK( MN+1 )
+*
+*     workspace: MN+2*N+NB*(N+1).
+*     Details of Householder rotations stored in WORK(1:MN).
+*
+*     Determine RANK using incremental condition estimation
+*
+      WORK( ISMIN ) = ONE
+      WORK( ISMAX ) = ONE
+      SMAX = ABS( A( 1, 1 ) )
+      SMIN = SMAX
+      IF( ABS( A( 1, 1 ) ).EQ.ZERO ) THEN
+         RANK = 0
+         CALL DLASET( 'F', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB )
+         GO TO 70
+      ELSE
+         RANK = 1
+      END IF
+*
+   10 CONTINUE
+      IF( RANK.LT.MN ) THEN
+         I = RANK + 1
+         CALL DLAIC1( IMIN, RANK, WORK( ISMIN ), SMIN, A( 1, I ),
+     $                A( I, I ), SMINPR, S1, C1 )
+         CALL DLAIC1( IMAX, RANK, WORK( ISMAX ), SMAX, A( 1, I ),
+     $                A( I, I ), SMAXPR, S2, C2 )
+*
+         IF( SMAXPR*RCOND.LE.SMINPR ) THEN
+            DO 20 I = 1, RANK
+               WORK( ISMIN+I-1 ) = S1*WORK( ISMIN+I-1 )
+               WORK( ISMAX+I-1 ) = S2*WORK( ISMAX+I-1 )
+   20       CONTINUE
+            WORK( ISMIN+RANK ) = C1
+            WORK( ISMAX+RANK ) = C2
+            SMIN = SMINPR
+            SMAX = SMAXPR
+            RANK = RANK + 1
+            GO TO 10
+         END IF
+      END IF
+*
+*     workspace: 3*MN.
+*
+*     Logically partition R = [ R11 R12 ]
+*                             [  0  R22 ]
+*     where R11 = R(1:RANK,1:RANK)
+*
+*     [R11,R12] = [ T11, 0 ] * Y
+*
+      IF( RANK.LT.N )
+     $   CALL DTZRZF( RANK, N, A, LDA, WORK( MN+1 ), WORK( 2*MN+1 ),
+     $                LWORK-2*MN, INFO )
+*
+*     workspace: 2*MN.
+*     Details of Householder rotations stored in WORK(MN+1:2*MN)
+*
+*     B(1:M,1:NRHS) := Q' * B(1:M,1:NRHS)
+*
+      CALL DORMQR( 'Left', 'Transpose', M, NRHS, MN, A, LDA, WORK( 1 ),
+     $             B, LDB, WORK( 2*MN+1 ), LWORK-2*MN, INFO )
+      WSIZE = MAX( WSIZE, 2*MN+WORK( 2*MN+1 ) )
+*
+*     workspace: 2*MN+NB*NRHS.
+*
+*     B(1:RANK,1:NRHS) := inv(T11) * B(1:RANK,1:NRHS)
+*
+      CALL DTRSM( 'Left', 'Upper', 'No transpose', 'Non-unit', RANK,
+     $            NRHS, ONE, A, LDA, B, LDB )
+*
+      DO 40 J = 1, NRHS
+         DO 30 I = RANK + 1, N
+            B( I, J ) = ZERO
+   30    CONTINUE
+   40 CONTINUE
+*
+*     B(1:N,1:NRHS) := Y' * B(1:N,1:NRHS)
+*
+      IF( RANK.LT.N ) THEN
+         CALL DORMRZ( 'Left', 'Transpose', N, NRHS, RANK, N-RANK, A,
+     $                LDA, WORK( MN+1 ), B, LDB, WORK( 2*MN+1 ),
+     $                LWORK-2*MN, INFO )
+      END IF
+*
+*     workspace: 2*MN+NRHS.
+*
+*     B(1:N,1:NRHS) := P * B(1:N,1:NRHS)
+*
+      DO 60 J = 1, NRHS
+         DO 50 I = 1, N
+            WORK( JPVT( I ) ) = B( I, J )
+   50    CONTINUE
+         CALL DCOPY( N, WORK( 1 ), 1, B( 1, J ), 1 )
+   60 CONTINUE
+*
+*     workspace: N.
+*
+*     Undo scaling
+*
+      IF( IASCL.EQ.1 ) THEN
+         CALL DLASCL( 'G', 0, 0, ANRM, SMLNUM, N, NRHS, B, LDB, INFO )
+         CALL DLASCL( 'U', 0, 0, SMLNUM, ANRM, RANK, RANK, A, LDA,
+     $                INFO )
+      ELSE IF( IASCL.EQ.2 ) THEN
+         CALL DLASCL( 'G', 0, 0, ANRM, BIGNUM, N, NRHS, B, LDB, INFO )
+         CALL DLASCL( 'U', 0, 0, BIGNUM, ANRM, RANK, RANK, A, LDA,
+     $                INFO )
+      END IF
+      IF( IBSCL.EQ.1 ) THEN
+         CALL DLASCL( 'G', 0, 0, SMLNUM, BNRM, N, NRHS, B, LDB, INFO )
+      ELSE IF( IBSCL.EQ.2 ) THEN
+         CALL DLASCL( 'G', 0, 0, BIGNUM, BNRM, N, NRHS, B, LDB, INFO )
+      END IF
+*
+   70 CONTINUE
+      WORK( 1 ) = DBLE( LWKOPT )
+*
+      RETURN
+*
+*     End of DGELSY
+*
+      END
diff --git a/costa/native/external/lapack/dgeql2.f b/costa/native/external/lapack/dgeql2.f
new file mode 100644
index 000000000..3df1b0f14
--- /dev/null
+++ b/costa/native/external/lapack/dgeql2.f
@@ -0,0 +1,123 @@
+      SUBROUTINE DGEQL2( M, N, A, LDA, TAU, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, M, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DGEQL2 computes a QL factorization of a real m by n matrix A:
+*  A = Q * L.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+*          On entry, the m by n matrix A.
+*          On exit, if m >= n, the lower triangle of the subarray
+*          A(m-n+1:m,1:n) contains the n by n lower triangular matrix L;
+*          if m <= n, the elements on and below the (n-m)-th
+*          superdiagonal contain the m by n lower trapezoidal matrix L;
+*          the remaining elements, with the array TAU, represent the
+*          orthogonal matrix Q as a product of elementary reflectors
+*          (see Further Details).
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  TAU     (output) DOUBLE PRECISION array, dimension (min(M,N))
+*          The scalar factors of the elementary reflectors (see Further
+*          Details).
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*
+*  Further Details
+*  ===============
+*
+*  The matrix Q is represented as a product of elementary reflectors
+*
+*     Q = H(k) . . . H(2) H(1), where k = min(m,n).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - tau * v * v'
+*
+*  where tau is a real scalar, and v is a real vector with
+*  v(m-k+i+1:m) = 0 and v(m-k+i) = 1; v(1:m-k+i-1) is stored on exit in
+*  A(1:m-k+i-1,n-k+i), and tau in TAU(i).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE
+      PARAMETER          ( ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, K
+      DOUBLE PRECISION   AII
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLARF, DLARFG, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DGEQL2', -INFO )
+         RETURN
+      END IF
+*
+      K = MIN( M, N )
+*
+      DO 10 I = K, 1, -1
+*
+*        Generate elementary reflector H(i) to annihilate
+*        A(1:m-k+i-1,n-k+i)
+*
+         CALL DLARFG( M-K+I, A( M-K+I, N-K+I ), A( 1, N-K+I ), 1,
+     $                TAU( I ) )
+*
+*        Apply H(i) to A(1:m-k+i,1:n-k+i-1) from the left
+*
+         AII = A( M-K+I, N-K+I )
+         A( M-K+I, N-K+I ) = ONE
+         CALL DLARF( 'Left', M-K+I, N-K+I-1, A( 1, N-K+I ), 1, TAU( I ),
+     $               A, LDA, WORK )
+         A( M-K+I, N-K+I ) = AII
+   10 CONTINUE
+      RETURN
+*
+*     End of DGEQL2
+*
+      END
diff --git a/costa/native/external/lapack/dgeqlf.f b/costa/native/external/lapack/dgeqlf.f
new file mode 100644
index 000000000..3bfd1d766
--- /dev/null
+++ b/costa/native/external/lapack/dgeqlf.f
@@ -0,0 +1,205 @@
+      SUBROUTINE DGEQLF( M, N, A, LDA, TAU, WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, LWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DGEQLF computes a QL factorization of a real M-by-N matrix A:
+*  A = Q * L.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+*          On entry, the M-by-N matrix A.
+*          On exit,
+*          if m >= n, the lower triangle of the subarray
+*          A(m-n+1:m,1:n) contains the N-by-N lower triangular matrix L;
+*          if m <= n, the elements on and below the (n-m)-th
+*          superdiagonal contain the M-by-N lower trapezoidal matrix L;
+*          the remaining elements, with the array TAU, represent the
+*          orthogonal matrix Q as a product of elementary reflectors
+*          (see Further Details).
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  TAU     (output) DOUBLE PRECISION array, dimension (min(M,N))
+*          The scalar factors of the elementary reflectors (see Further
+*          Details).
+*
+*  WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.  LWORK >= max(1,N).
+*          For optimum performance LWORK >= N*NB, where NB is the
+*          optimal blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Further Details
+*  ===============
+*
+*  The matrix Q is represented as a product of elementary reflectors
+*
+*     Q = H(k) . . . H(2) H(1), where k = min(m,n).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - tau * v * v'
+*
+*  where tau is a real scalar, and v is a real vector with
+*  v(m-k+i+1:m) = 0 and v(m-k+i) = 1; v(1:m-k+i-1) is stored on exit in
+*  A(1:m-k+i-1,n-k+i), and tau in TAU(i).
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            I, IB, IINFO, IWS, K, KI, KK, LDWORK, LWKOPT,
+     $                   MU, NB, NBMIN, NU, NX
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DGEQL2, DLARFB, DLARFT, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      NB = ILAENV( 1, 'DGEQLF', ' ', M, N, -1, -1 )
+      LWKOPT = N*NB
+      WORK( 1 ) = LWKOPT
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -4
+      ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN
+         INFO = -7
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DGEQLF', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      K = MIN( M, N )
+      IF( K.EQ.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      NBMIN = 2
+      NX = 1
+      IWS = N
+      IF( NB.GT.1 .AND. NB.LT.K ) THEN
+*
+*        Determine when to cross over from blocked to unblocked code.
+*
+         NX = MAX( 0, ILAENV( 3, 'DGEQLF', ' ', M, N, -1, -1 ) )
+         IF( NX.LT.K ) THEN
+*
+*           Determine if workspace is large enough for blocked code.
+*
+            LDWORK = N
+            IWS = LDWORK*NB
+            IF( LWORK.LT.IWS ) THEN
+*
+*              Not enough workspace to use optimal NB:  reduce NB and
+*              determine the minimum value of NB.
+*
+               NB = LWORK / LDWORK
+               NBMIN = MAX( 2, ILAENV( 2, 'DGEQLF', ' ', M, N, -1,
+     $                 -1 ) )
+            END IF
+         END IF
+      END IF
+*
+      IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN
+*
+*        Use blocked code initially.
+*        The last kk columns are handled by the block method.
+*
+         KI = ( ( K-NX-1 ) / NB )*NB
+         KK = MIN( K, KI+NB )
+*
+         DO 10 I = K - KK + KI + 1, K - KK + 1, -NB
+            IB = MIN( K-I+1, NB )
+*
+*           Compute the QL factorization of the current block
+*           A(1:m-k+i+ib-1,n-k+i:n-k+i+ib-1)
+*
+            CALL DGEQL2( M-K+I+IB-1, IB, A( 1, N-K+I ), LDA, TAU( I ),
+     $                   WORK, IINFO )
+            IF( N-K+I.GT.1 ) THEN
+*
+*              Form the triangular factor of the block reflector
+*              H = H(i+ib-1) . . . H(i+1) H(i)
+*
+               CALL DLARFT( 'Backward', 'Columnwise', M-K+I+IB-1, IB,
+     $                      A( 1, N-K+I ), LDA, TAU( I ), WORK, LDWORK )
+*
+*              Apply H' to A(1:m-k+i+ib-1,1:n-k+i-1) from the left
+*
+               CALL DLARFB( 'Left', 'Transpose', 'Backward',
+     $                      'Columnwise', M-K+I+IB-1, N-K+I-1, IB,
+     $                      A( 1, N-K+I ), LDA, WORK, LDWORK, A, LDA,
+     $                      WORK( IB+1 ), LDWORK )
+            END IF
+   10    CONTINUE
+         MU = M - K + I + NB - 1
+         NU = N - K + I + NB - 1
+      ELSE
+         MU = M
+         NU = N
+      END IF
+*
+*     Use unblocked code to factor the last or only block
+*
+      IF( MU.GT.0 .AND. NU.GT.0 )
+     $   CALL DGEQL2( MU, NU, A, LDA, TAU, WORK, IINFO )
+*
+      WORK( 1 ) = IWS
+      RETURN
+*
+*     End of DGEQLF
+*
+      END
diff --git a/costa/native/external/lapack/dgeqp3.f b/costa/native/external/lapack/dgeqp3.f
new file mode 100644
index 000000000..8ccde4506
--- /dev/null
+++ b/costa/native/external/lapack/dgeqp3.f
@@ -0,0 +1,279 @@
+      SUBROUTINE DGEQP3( M, N, A, LDA, JPVT, TAU, WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, LWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            JPVT( * )
+      DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DGEQP3 computes a QR factorization with column pivoting of a
+*  matrix A:  A*P = Q*R  using Level 3 BLAS.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+*          On entry, the M-by-N matrix A.
+*          On exit, the upper triangle of the array contains the
+*          min(M,N)-by-N upper trapezoidal matrix R; the elements below
+*          the diagonal, together with the array TAU, represent the
+*          orthogonal matrix Q as a product of min(M,N) elementary
+*          reflectors.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,M).
+*
+*  JPVT    (input/output) INTEGER array, dimension (N)
+*          On entry, if JPVT(J).ne.0, the J-th column of A is permuted
+*          to the front of A*P (a leading column); if JPVT(J)=0,
+*          the J-th column of A is a free column.
+*          On exit, if JPVT(J)=K, then the J-th column of A*P was the
+*          the K-th column of A.
+*
+*  TAU     (output) DOUBLE PRECISION array, dimension (min(M,N))
+*          The scalar factors of the elementary reflectors.
+*
+*  WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+*          On exit, if INFO=0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK. LWORK >= 3*N+1.
+*          For optimal performance LWORK >= 2*N+( N+1 )*NB, where NB
+*          is the optimal blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit.
+*          < 0: if INFO = -i, the i-th argument had an illegal value.
+*
+*  Further Details
+*  ===============
+*
+*  The matrix Q is represented as a product of elementary reflectors
+*
+*     Q = H(1) H(2) . . . H(k), where k = min(m,n).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - tau * v * v'
+*
+*  where tau is a real/complex scalar, and v is a real/complex vector
+*  with v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in
+*  A(i+1:m,i), and tau in TAU(i).
+*
+*  Based on contributions by
+*    G. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain
+*    X. Sun, Computer Science Dept., Duke University, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            INB, INBMIN, IXOVER
+      PARAMETER          ( INB = 1, INBMIN = 2, IXOVER = 3 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            FJB, IWS, J, JB, LWKOPT, MINMN, MINWS, NA, NB,
+     $                   NBMIN, NFXD, NX, SM, SMINMN, SN, TOPBMN
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DGEQRF, DLAQP2, DLAQPS, DORMQR, DSWAP, XERBLA
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      DOUBLE PRECISION   DNRM2
+      EXTERNAL           ILAENV, DNRM2
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          INT, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+      IWS = 3*N + 1
+      MINMN = MIN( M, N )
+*
+*     Test input arguments
+*     ====================
+*
+      INFO = 0
+      NB = ILAENV( INB, 'DGEQRF', ' ', M, N, -1, -1 )
+      LWKOPT = 2*N + ( N+1 )*NB
+      WORK( 1 ) = LWKOPT
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -4
+      ELSE IF( ( LWORK.LT.IWS ) .AND. .NOT.LQUERY ) THEN
+         INFO = -8
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DGEQP3', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( MINMN.EQ.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+*     Move initial columns up front.
+*
+      NFXD = 1
+      DO 10 J = 1, N
+         IF( JPVT( J ).NE.0 ) THEN
+            IF( J.NE.NFXD ) THEN
+               CALL DSWAP( M, A( 1, J ), 1, A( 1, NFXD ), 1 )
+               JPVT( J ) = JPVT( NFXD )
+               JPVT( NFXD ) = J
+            ELSE
+               JPVT( J ) = J
+            END IF
+            NFXD = NFXD + 1
+         ELSE
+            JPVT( J ) = J
+         END IF
+   10 CONTINUE
+      NFXD = NFXD - 1
+*
+*     Factorize fixed columns
+*     =======================
+*
+*     Compute the QR factorization of fixed columns and update
+*     remaining columns.
+*
+      IF( NFXD.GT.0 ) THEN
+         NA = MIN( M, NFXD )
+*CC      CALL DGEQR2( M, NA, A, LDA, TAU, WORK, INFO )
+         CALL DGEQRF( M, NA, A, LDA, TAU, WORK, LWORK, INFO )
+         IWS = MAX( IWS, INT( WORK( 1 ) ) )
+         IF( NA.LT.N ) THEN
+*CC         CALL DORM2R( 'Left', 'Transpose', M, N-NA, NA, A, LDA,
+*CC  $                   TAU, A( 1, NA+1 ), LDA, WORK, INFO )
+            CALL DORMQR( 'Left', 'Transpose', M, N-NA, NA, A, LDA, TAU,
+     $                   A( 1, NA+1 ), LDA, WORK, LWORK, INFO )
+            IWS = MAX( IWS, INT( WORK( 1 ) ) )
+         END IF
+      END IF
+*
+*     Factorize free columns
+*     ======================
+*
+      IF( NFXD.LT.MINMN ) THEN
+*
+         SM = M - NFXD
+         SN = N - NFXD
+         SMINMN = MINMN - NFXD
+*
+*        Determine the block size.
+*
+         NB = ILAENV( INB, 'DGEQRF', ' ', SM, SN, -1, -1 )
+         NBMIN = 2
+         NX = 0
+*
+         IF( ( NB.GT.1 ) .AND. ( NB.LT.SMINMN ) ) THEN
+*
+*           Determine when to cross over from blocked to unblocked code.
+*
+            NX = MAX( 0, ILAENV( IXOVER, 'DGEQRF', ' ', SM, SN, -1,
+     $           -1 ) )
+*
+*
+            IF( NX.LT.SMINMN ) THEN
+*
+*              Determine if workspace is large enough for blocked code.
+*
+               MINWS = 2*SN + ( SN+1 )*NB
+               IWS = MAX( IWS, MINWS )
+               IF( LWORK.LT.MINWS ) THEN
+*
+*                 Not enough workspace to use optimal NB: Reduce NB and
+*                 determine the minimum value of NB.
+*
+                  NB = ( LWORK-2*SN ) / ( SN+1 )
+                  NBMIN = MAX( 2, ILAENV( INBMIN, 'DGEQRF', ' ', SM, SN,
+     $                    -1, -1 ) )
+*
+*
+               END IF
+            END IF
+         END IF
+*
+*        Initialize partial column norms. The first N elements of work
+*        store the exact column norms.
+*
+         DO 20 J = NFXD + 1, N
+            WORK( J ) = DNRM2( SM, A( NFXD+1, J ), 1 )
+            WORK( N+J ) = WORK( J )
+   20    CONTINUE
+*
+         IF( ( NB.GE.NBMIN ) .AND. ( NB.LT.SMINMN ) .AND.
+     $       ( NX.LT.SMINMN ) ) THEN
+*
+*           Use blocked code initially.
+*
+            J = NFXD + 1
+*
+*           Compute factorization: while loop.
+*
+*
+            TOPBMN = MINMN - NX
+   30       CONTINUE
+            IF( J.LE.TOPBMN ) THEN
+               JB = MIN( NB, TOPBMN-J+1 )
+*
+*              Factorize JB columns among columns J:N.
+*
+               CALL DLAQPS( M, N-J+1, J-1, JB, FJB, A( 1, J ), LDA,
+     $                      JPVT( J ), TAU( J ), WORK( J ), WORK( N+J ),
+     $                      WORK( 2*N+1 ), WORK( 2*N+JB+1 ), N-J+1 )
+*
+               J = J + FJB
+               GO TO 30
+            END IF
+         ELSE
+            J = NFXD + 1
+         END IF
+*
+*        Use unblocked code to factor the last or only block.
+*
+*
+         IF( J.LE.MINMN )
+     $      CALL DLAQP2( M, N-J+1, J-1, A( 1, J ), LDA, JPVT( J ),
+     $                   TAU( J ), WORK( J ), WORK( N+J ),
+     $                   WORK( 2*N+1 ) )
+*
+      END IF
+*
+      WORK( 1 ) = IWS
+      RETURN
+*
+*     End of DGEQP3
+*
+      END
diff --git a/costa/native/external/lapack/dgeqpf.f b/costa/native/external/lapack/dgeqpf.f
new file mode 100644
index 000000000..a33bee9d2
--- /dev/null
+++ b/costa/native/external/lapack/dgeqpf.f
@@ -0,0 +1,222 @@
+      SUBROUTINE DGEQPF( M, N, A, LDA, JPVT, TAU, WORK, INFO )
+*
+*  -- LAPACK test routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, M, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            JPVT( * )
+      DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  This routine is deprecated and has been replaced by routine DGEQP3.
+*
+*  DGEQPF computes a QR factorization with column pivoting of a
+*  real M-by-N matrix A: A*P = Q*R.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A. N >= 0
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+*          On entry, the M-by-N matrix A.
+*          On exit, the upper triangle of the array contains the
+*          min(M,N)-by-N upper triangular matrix R; the elements
+*          below the diagonal, together with the array TAU,
+*          represent the orthogonal matrix Q as a product of
+*          min(m,n) elementary reflectors.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,M).
+*
+*  JPVT    (input/output) INTEGER array, dimension (N)
+*          On entry, if JPVT(i) .ne. 0, the i-th column of A is permuted
+*          to the front of A*P (a leading column); if JPVT(i) = 0,
+*          the i-th column of A is a free column.
+*          On exit, if JPVT(i) = k, then the i-th column of A*P
+*          was the k-th column of A.
+*
+*  TAU     (output) DOUBLE PRECISION array, dimension (min(M,N))
+*          The scalar factors of the elementary reflectors.
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (3*N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Further Details
+*  ===============
+*
+*  The matrix Q is represented as a product of elementary reflectors
+*
+*     Q = H(1) H(2) . . . H(n)
+*
+*  Each H(i) has the form
+*
+*     H = I - tau * v * v'
+*
+*  where tau is a real scalar, and v is a real vector with
+*  v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i).
+*
+*  The matrix P is represented in jpvt as follows: If
+*     jpvt(j) = i
+*  then the jth column of P is the ith canonical unit vector.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, ITEMP, J, MA, MN, PVT
+      DOUBLE PRECISION   AII, TEMP, TEMP2
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DGEQR2, DLARF, DLARFG, DORM2R, DSWAP, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN, SQRT
+*     ..
+*     .. External Functions ..
+      INTEGER            IDAMAX
+      DOUBLE PRECISION   DNRM2
+      EXTERNAL           IDAMAX, DNRM2
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DGEQPF', -INFO )
+         RETURN
+      END IF
+*
+      MN = MIN( M, N )
+*
+*     Move initial columns up front
+*
+      ITEMP = 1
+      DO 10 I = 1, N
+         IF( JPVT( I ).NE.0 ) THEN
+            IF( I.NE.ITEMP ) THEN
+               CALL DSWAP( M, A( 1, I ), 1, A( 1, ITEMP ), 1 )
+               JPVT( I ) = JPVT( ITEMP )
+               JPVT( ITEMP ) = I
+            ELSE
+               JPVT( I ) = I
+            END IF
+            ITEMP = ITEMP + 1
+         ELSE
+            JPVT( I ) = I
+         END IF
+   10 CONTINUE
+      ITEMP = ITEMP - 1
+*
+*     Compute the QR factorization and update remaining columns
+*
+      IF( ITEMP.GT.0 ) THEN
+         MA = MIN( ITEMP, M )
+         CALL DGEQR2( M, MA, A, LDA, TAU, WORK, INFO )
+         IF( MA.LT.N ) THEN
+            CALL DORM2R( 'Left', 'Transpose', M, N-MA, MA, A, LDA, TAU,
+     $                   A( 1, MA+1 ), LDA, WORK, INFO )
+         END IF
+      END IF
+*
+      IF( ITEMP.LT.MN ) THEN
+*
+*        Initialize partial column norms. The first n elements of
+*        work store the exact column norms.
+*
+         DO 20 I = ITEMP + 1, N
+            WORK( I ) = DNRM2( M-ITEMP, A( ITEMP+1, I ), 1 )
+            WORK( N+I ) = WORK( I )
+   20    CONTINUE
+*
+*        Compute factorization
+*
+         DO 40 I = ITEMP + 1, MN
+*
+*           Determine ith pivot column and swap if necessary
+*
+            PVT = ( I-1 ) + IDAMAX( N-I+1, WORK( I ), 1 )
+*
+            IF( PVT.NE.I ) THEN
+               CALL DSWAP( M, A( 1, PVT ), 1, A( 1, I ), 1 )
+               ITEMP = JPVT( PVT )
+               JPVT( PVT ) = JPVT( I )
+               JPVT( I ) = ITEMP
+               WORK( PVT ) = WORK( I )
+               WORK( N+PVT ) = WORK( N+I )
+            END IF
+*
+*           Generate elementary reflector H(i)
+*
+            IF( I.LT.M ) THEN
+               CALL DLARFG( M-I+1, A( I, I ), A( I+1, I ), 1, TAU( I ) )
+            ELSE
+               CALL DLARFG( 1, A( M, M ), A( M, M ), 1, TAU( M ) )
+            END IF
+*
+            IF( I.LT.N ) THEN
+*
+*              Apply H(i) to A(i:m,i+1:n) from the left
+*
+               AII = A( I, I )
+               A( I, I ) = ONE
+               CALL DLARF( 'LEFT', M-I+1, N-I, A( I, I ), 1, TAU( I ),
+     $                     A( I, I+1 ), LDA, WORK( 2*N+1 ) )
+               A( I, I ) = AII
+            END IF
+*
+*           Update partial column norms
+*
+            DO 30 J = I + 1, N
+               IF( WORK( J ).NE.ZERO ) THEN
+                  TEMP = ONE - ( ABS( A( I, J ) ) / WORK( J ) )**2
+                  TEMP = MAX( TEMP, ZERO )
+                  TEMP2 = ONE + 0.05D0*TEMP*
+     $                    ( WORK( J ) / WORK( N+J ) )**2
+                  IF( TEMP2.EQ.ONE ) THEN
+                     IF( M-I.GT.0 ) THEN
+                        WORK( J ) = DNRM2( M-I, A( I+1, J ), 1 )
+                        WORK( N+J ) = WORK( J )
+                     ELSE
+                        WORK( J ) = ZERO
+                        WORK( N+J ) = ZERO
+                     END IF
+                  ELSE
+                     WORK( J ) = WORK( J )*SQRT( TEMP )
+                  END IF
+               END IF
+   30       CONTINUE
+*
+   40    CONTINUE
+      END IF
+      RETURN
+*
+*     End of DGEQPF
+*
+      END
diff --git a/costa/native/external/lapack/dgeqr2.f b/costa/native/external/lapack/dgeqr2.f
new file mode 100644
index 000000000..b1ea5152e
--- /dev/null
+++ b/costa/native/external/lapack/dgeqr2.f
@@ -0,0 +1,122 @@
+      SUBROUTINE DGEQR2( M, N, A, LDA, TAU, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, M, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DGEQR2 computes a QR factorization of a real m by n matrix A:
+*  A = Q * R.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+*          On entry, the m by n matrix A.
+*          On exit, the elements on and above the diagonal of the array
+*          contain the min(m,n) by n upper trapezoidal matrix R (R is
+*          upper triangular if m >= n); the elements below the diagonal,
+*          with the array TAU, represent the orthogonal matrix Q as a
+*          product of elementary reflectors (see Further Details).
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  TAU     (output) DOUBLE PRECISION array, dimension (min(M,N))
+*          The scalar factors of the elementary reflectors (see Further
+*          Details).
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*
+*  Further Details
+*  ===============
+*
+*  The matrix Q is represented as a product of elementary reflectors
+*
+*     Q = H(1) H(2) . . . H(k), where k = min(m,n).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - tau * v * v'
+*
+*  where tau is a real scalar, and v is a real vector with
+*  v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i),
+*  and tau in TAU(i).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE
+      PARAMETER          ( ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, K
+      DOUBLE PRECISION   AII
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLARF, DLARFG, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DGEQR2', -INFO )
+         RETURN
+      END IF
+*
+      K = MIN( M, N )
+*
+      DO 10 I = 1, K
+*
+*        Generate elementary reflector H(i) to annihilate A(i+1:m,i)
+*
+         CALL DLARFG( M-I+1, A( I, I ), A( MIN( I+1, M ), I ), 1,
+     $                TAU( I ) )
+         IF( I.LT.N ) THEN
+*
+*           Apply H(i) to A(i:m,i+1:n) from the left
+*
+            AII = A( I, I )
+            A( I, I ) = ONE
+            CALL DLARF( 'Left', M-I+1, N-I, A( I, I ), 1, TAU( I ),
+     $                  A( I, I+1 ), LDA, WORK )
+            A( I, I ) = AII
+         END IF
+   10 CONTINUE
+      RETURN
+*
+*     End of DGEQR2
+*
+      END
diff --git a/costa/native/external/lapack/dgeqrf.f b/costa/native/external/lapack/dgeqrf.f
new file mode 100644
index 000000000..7a40b537f
--- /dev/null
+++ b/costa/native/external/lapack/dgeqrf.f
@@ -0,0 +1,197 @@
+      SUBROUTINE DGEQRF( M, N, A, LDA, TAU, WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, LWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DGEQRF computes a QR factorization of a real M-by-N matrix A:
+*  A = Q * R.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+*          On entry, the M-by-N matrix A.
+*          On exit, the elements on and above the diagonal of the array
+*          contain the min(M,N)-by-N upper trapezoidal matrix R (R is
+*          upper triangular if m >= n); the elements below the diagonal,
+*          with the array TAU, represent the orthogonal matrix Q as a
+*          product of min(m,n) elementary reflectors (see Further
+*          Details).
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  TAU     (output) DOUBLE PRECISION array, dimension (min(M,N))
+*          The scalar factors of the elementary reflectors (see Further
+*          Details).
+*
+*  WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.  LWORK >= max(1,N).
+*          For optimum performance LWORK >= N*NB, where NB is
+*          the optimal blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Further Details
+*  ===============
+*
+*  The matrix Q is represented as a product of elementary reflectors
+*
+*     Q = H(1) H(2) . . . H(k), where k = min(m,n).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - tau * v * v'
+*
+*  where tau is a real scalar, and v is a real vector with
+*  v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i),
+*  and tau in TAU(i).
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            I, IB, IINFO, IWS, K, LDWORK, LWKOPT, NB,
+     $                   NBMIN, NX
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DGEQR2, DLARFB, DLARFT, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      NB = ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 )
+      LWKOPT = N*NB
+      WORK( 1 ) = LWKOPT
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -4
+      ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN
+         INFO = -7
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DGEQRF', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      K = MIN( M, N )
+      IF( K.EQ.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      NBMIN = 2
+      NX = 0
+      IWS = N
+      IF( NB.GT.1 .AND. NB.LT.K ) THEN
+*
+*        Determine when to cross over from blocked to unblocked code.
+*
+         NX = MAX( 0, ILAENV( 3, 'DGEQRF', ' ', M, N, -1, -1 ) )
+         IF( NX.LT.K ) THEN
+*
+*           Determine if workspace is large enough for blocked code.
+*
+            LDWORK = N
+            IWS = LDWORK*NB
+            IF( LWORK.LT.IWS ) THEN
+*
+*              Not enough workspace to use optimal NB:  reduce NB and
+*              determine the minimum value of NB.
+*
+               NB = LWORK / LDWORK
+               NBMIN = MAX( 2, ILAENV( 2, 'DGEQRF', ' ', M, N, -1,
+     $                 -1 ) )
+            END IF
+         END IF
+      END IF
+*
+      IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN
+*
+*        Use blocked code initially
+*
+         DO 10 I = 1, K - NX, NB
+            IB = MIN( K-I+1, NB )
+*
+*           Compute the QR factorization of the current block
+*           A(i:m,i:i+ib-1)
+*
+            CALL DGEQR2( M-I+1, IB, A( I, I ), LDA, TAU( I ), WORK,
+     $                   IINFO )
+            IF( I+IB.LE.N ) THEN
+*
+*              Form the triangular factor of the block reflector
+*              H = H(i) H(i+1) . . . H(i+ib-1)
+*
+               CALL DLARFT( 'Forward', 'Columnwise', M-I+1, IB,
+     $                      A( I, I ), LDA, TAU( I ), WORK, LDWORK )
+*
+*              Apply H' to A(i:m,i+ib:n) from the left
+*
+               CALL DLARFB( 'Left', 'Transpose', 'Forward',
+     $                      'Columnwise', M-I+1, N-I-IB+1, IB,
+     $                      A( I, I ), LDA, WORK, LDWORK, A( I, I+IB ),
+     $                      LDA, WORK( IB+1 ), LDWORK )
+            END IF
+   10    CONTINUE
+      ELSE
+         I = 1
+      END IF
+*
+*     Use unblocked code to factor the last or only block.
+*
+      IF( I.LE.K )
+     $   CALL DGEQR2( M-I+1, N-I+1, A( I, I ), LDA, TAU( I ), WORK,
+     $                IINFO )
+*
+      WORK( 1 ) = IWS
+      RETURN
+*
+*     End of DGEQRF
+*
+      END
diff --git a/costa/native/external/lapack/dgerfs.f b/costa/native/external/lapack/dgerfs.f
new file mode 100644
index 000000000..aa6c13415
--- /dev/null
+++ b/costa/native/external/lapack/dgerfs.f
@@ -0,0 +1,332 @@
+      SUBROUTINE DGERFS( TRANS, N, NRHS, A, LDA, AF, LDAF, IPIV, B, LDB,
+     $                   X, LDX, FERR, BERR, WORK, IWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          TRANS
+      INTEGER            INFO, LDA, LDAF, LDB, LDX, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * ), IWORK( * )
+      DOUBLE PRECISION   A( LDA, * ), AF( LDAF, * ), B( LDB, * ),
+     $                   BERR( * ), FERR( * ), WORK( * ), X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DGERFS improves the computed solution to a system of linear
+*  equations and provides error bounds and backward error estimates for
+*  the solution.
+*
+*  Arguments
+*  =========
+*
+*  TRANS   (input) CHARACTER*1
+*          Specifies the form of the system of equations:
+*          = 'N':  A * X = B     (No transpose)
+*          = 'T':  A**T * X = B  (Transpose)
+*          = 'C':  A**H * X = B  (Conjugate transpose = Transpose)
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrices B and X.  NRHS >= 0.
+*
+*  A       (input) DOUBLE PRECISION array, dimension (LDA,N)
+*          The original N-by-N matrix A.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  AF      (input) DOUBLE PRECISION array, dimension (LDAF,N)
+*          The factors L and U from the factorization A = P*L*U
+*          as computed by DGETRF.
+*
+*  LDAF    (input) INTEGER
+*          The leading dimension of the array AF.  LDAF >= max(1,N).
+*
+*  IPIV    (input) INTEGER array, dimension (N)
+*          The pivot indices from DGETRF; for 1<=i<=N, row i of the
+*          matrix was interchanged with row IPIV(i).
+*
+*  B       (input) DOUBLE PRECISION array, dimension (LDB,NRHS)
+*          The right hand side matrix B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  X       (input/output) DOUBLE PRECISION array, dimension (LDX,NRHS)
+*          On entry, the solution matrix X, as computed by DGETRS.
+*          On exit, the improved solution matrix X.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(1,N).
+*
+*  FERR    (output) DOUBLE PRECISION array, dimension (NRHS)
+*          The estimated forward error bound for each solution vector
+*          X(j) (the j-th column of the solution matrix X).
+*          If XTRUE is the true solution corresponding to X(j), FERR(j)
+*          is an estimated upper bound for the magnitude of the largest
+*          element in (X(j) - XTRUE) divided by the magnitude of the
+*          largest element in X(j).  The estimate is as reliable as
+*          the estimate for RCOND, and is almost always a slight
+*          overestimate of the true error.
+*
+*  BERR    (output) DOUBLE PRECISION array, dimension (NRHS)
+*          The componentwise relative backward error of each solution
+*          vector X(j) (i.e., the smallest relative change in
+*          any element of A or B that makes X(j) an exact solution).
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (3*N)
+*
+*  IWORK   (workspace) INTEGER array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Internal Parameters
+*  ===================
+*
+*  ITMAX is the maximum number of steps of iterative refinement.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            ITMAX
+      PARAMETER          ( ITMAX = 5 )
+      DOUBLE PRECISION   ZERO
+      PARAMETER          ( ZERO = 0.0D+0 )
+      DOUBLE PRECISION   ONE
+      PARAMETER          ( ONE = 1.0D+0 )
+      DOUBLE PRECISION   TWO
+      PARAMETER          ( TWO = 2.0D+0 )
+      DOUBLE PRECISION   THREE
+      PARAMETER          ( THREE = 3.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOTRAN
+      CHARACTER          TRANST
+      INTEGER            COUNT, I, J, K, KASE, NZ
+      DOUBLE PRECISION   EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DAXPY, DCOPY, DGEMV, DGETRS, DLACON, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DLAMCH
+      EXTERNAL           LSAME, DLAMCH
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      NOTRAN = LSAME( TRANS, 'N' )
+      IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT.
+     $    LSAME( TRANS, 'C' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -10
+      ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+         INFO = -12
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DGERFS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN
+         DO 10 J = 1, NRHS
+            FERR( J ) = ZERO
+            BERR( J ) = ZERO
+   10    CONTINUE
+         RETURN
+      END IF
+*
+      IF( NOTRAN ) THEN
+         TRANST = 'T'
+      ELSE
+         TRANST = 'N'
+      END IF
+*
+*     NZ = maximum number of nonzero elements in each row of A, plus 1
+*
+      NZ = N + 1
+      EPS = DLAMCH( 'Epsilon' )
+      SAFMIN = DLAMCH( 'Safe minimum' )
+      SAFE1 = NZ*SAFMIN
+      SAFE2 = SAFE1 / EPS
+*
+*     Do for each right hand side
+*
+      DO 140 J = 1, NRHS
+*
+         COUNT = 1
+         LSTRES = THREE
+   20    CONTINUE
+*
+*        Loop until stopping criterion is satisfied.
+*
+*        Compute residual R = B - op(A) * X,
+*        where op(A) = A, A**T, or A**H, depending on TRANS.
+*
+         CALL DCOPY( N, B( 1, J ), 1, WORK( N+1 ), 1 )
+         CALL DGEMV( TRANS, N, N, -ONE, A, LDA, X( 1, J ), 1, ONE,
+     $               WORK( N+1 ), 1 )
+*
+*        Compute componentwise relative backward error from formula
+*
+*        max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) )
+*
+*        where abs(Z) is the componentwise absolute value of the matrix
+*        or vector Z.  If the i-th component of the denominator is less
+*        than SAFE2, then SAFE1 is added to the i-th components of the
+*        numerator and denominator before dividing.
+*
+         DO 30 I = 1, N
+            WORK( I ) = ABS( B( I, J ) )
+   30    CONTINUE
+*
+*        Compute abs(op(A))*abs(X) + abs(B).
+*
+         IF( NOTRAN ) THEN
+            DO 50 K = 1, N
+               XK = ABS( X( K, J ) )
+               DO 40 I = 1, N
+                  WORK( I ) = WORK( I ) + ABS( A( I, K ) )*XK
+   40          CONTINUE
+   50       CONTINUE
+         ELSE
+            DO 70 K = 1, N
+               S = ZERO
+               DO 60 I = 1, N
+                  S = S + ABS( A( I, K ) )*ABS( X( I, J ) )
+   60          CONTINUE
+               WORK( K ) = WORK( K ) + S
+   70       CONTINUE
+         END IF
+         S = ZERO
+         DO 80 I = 1, N
+            IF( WORK( I ).GT.SAFE2 ) THEN
+               S = MAX( S, ABS( WORK( N+I ) ) / WORK( I ) )
+            ELSE
+               S = MAX( S, ( ABS( WORK( N+I ) )+SAFE1 ) /
+     $             ( WORK( I )+SAFE1 ) )
+            END IF
+   80    CONTINUE
+         BERR( J ) = S
+*
+*        Test stopping criterion. Continue iterating if
+*           1) The residual BERR(J) is larger than machine epsilon, and
+*           2) BERR(J) decreased by at least a factor of 2 during the
+*              last iteration, and
+*           3) At most ITMAX iterations tried.
+*
+         IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND.
+     $       COUNT.LE.ITMAX ) THEN
+*
+*           Update solution and try again.
+*
+            CALL DGETRS( TRANS, N, 1, AF, LDAF, IPIV, WORK( N+1 ), N,
+     $                   INFO )
+            CALL DAXPY( N, ONE, WORK( N+1 ), 1, X( 1, J ), 1 )
+            LSTRES = BERR( J )
+            COUNT = COUNT + 1
+            GO TO 20
+         END IF
+*
+*        Bound error from formula
+*
+*        norm(X - XTRUE) / norm(X) .le. FERR =
+*        norm( abs(inv(op(A)))*
+*           ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X)
+*
+*        where
+*          norm(Z) is the magnitude of the largest component of Z
+*          inv(op(A)) is the inverse of op(A)
+*          abs(Z) is the componentwise absolute value of the matrix or
+*             vector Z
+*          NZ is the maximum number of nonzeros in any row of A, plus 1
+*          EPS is machine epsilon
+*
+*        The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B))
+*        is incremented by SAFE1 if the i-th component of
+*        abs(op(A))*abs(X) + abs(B) is less than SAFE2.
+*
+*        Use DLACON to estimate the infinity-norm of the matrix
+*           inv(op(A)) * diag(W),
+*        where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) )))
+*
+         DO 90 I = 1, N
+            IF( WORK( I ).GT.SAFE2 ) THEN
+               WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I )
+            ELSE
+               WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + SAFE1
+            END IF
+   90    CONTINUE
+*
+         KASE = 0
+  100    CONTINUE
+         CALL DLACON( N, WORK( 2*N+1 ), WORK( N+1 ), IWORK, FERR( J ),
+     $                KASE )
+         IF( KASE.NE.0 ) THEN
+            IF( KASE.EQ.1 ) THEN
+*
+*              Multiply by diag(W)*inv(op(A)**T).
+*
+               CALL DGETRS( TRANST, N, 1, AF, LDAF, IPIV, WORK( N+1 ),
+     $                      N, INFO )
+               DO 110 I = 1, N
+                  WORK( N+I ) = WORK( I )*WORK( N+I )
+  110          CONTINUE
+            ELSE
+*
+*              Multiply by inv(op(A))*diag(W).
+*
+               DO 120 I = 1, N
+                  WORK( N+I ) = WORK( I )*WORK( N+I )
+  120          CONTINUE
+               CALL DGETRS( TRANS, N, 1, AF, LDAF, IPIV, WORK( N+1 ), N,
+     $                      INFO )
+            END IF
+            GO TO 100
+         END IF
+*
+*        Normalize error.
+*
+         LSTRES = ZERO
+         DO 130 I = 1, N
+            LSTRES = MAX( LSTRES, ABS( X( I, J ) ) )
+  130    CONTINUE
+         IF( LSTRES.NE.ZERO )
+     $      FERR( J ) = FERR( J ) / LSTRES
+*
+  140 CONTINUE
+*
+      RETURN
+*
+*     End of DGERFS
+*
+      END
diff --git a/costa/native/external/lapack/dgerq2.f b/costa/native/external/lapack/dgerq2.f
new file mode 100644
index 000000000..cdba313da
--- /dev/null
+++ b/costa/native/external/lapack/dgerq2.f
@@ -0,0 +1,123 @@
+      SUBROUTINE DGERQ2( M, N, A, LDA, TAU, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, M, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DGERQ2 computes an RQ factorization of a real m by n matrix A:
+*  A = R * Q.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+*          On entry, the m by n matrix A.
+*          On exit, if m <= n, the upper triangle of the subarray
+*          A(1:m,n-m+1:n) contains the m by m upper triangular matrix R;
+*          if m >= n, the elements on and above the (m-n)-th subdiagonal
+*          contain the m by n upper trapezoidal matrix R; the remaining
+*          elements, with the array TAU, represent the orthogonal matrix
+*          Q as a product of elementary reflectors (see Further
+*          Details).
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  TAU     (output) DOUBLE PRECISION array, dimension (min(M,N))
+*          The scalar factors of the elementary reflectors (see Further
+*          Details).
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (M)
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*
+*  Further Details
+*  ===============
+*
+*  The matrix Q is represented as a product of elementary reflectors
+*
+*     Q = H(1) H(2) . . . H(k), where k = min(m,n).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - tau * v * v'
+*
+*  where tau is a real scalar, and v is a real vector with
+*  v(n-k+i+1:n) = 0 and v(n-k+i) = 1; v(1:n-k+i-1) is stored on exit in
+*  A(m-k+i,1:n-k+i-1), and tau in TAU(i).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE
+      PARAMETER          ( ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, K
+      DOUBLE PRECISION   AII
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLARF, DLARFG, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DGERQ2', -INFO )
+         RETURN
+      END IF
+*
+      K = MIN( M, N )
+*
+      DO 10 I = K, 1, -1
+*
+*        Generate elementary reflector H(i) to annihilate
+*        A(m-k+i,1:n-k+i-1)
+*
+         CALL DLARFG( N-K+I, A( M-K+I, N-K+I ), A( M-K+I, 1 ), LDA,
+     $                TAU( I ) )
+*
+*        Apply H(i) to A(1:m-k+i-1,1:n-k+i) from the right
+*
+         AII = A( M-K+I, N-K+I )
+         A( M-K+I, N-K+I ) = ONE
+         CALL DLARF( 'Right', M-K+I-1, N-K+I, A( M-K+I, 1 ), LDA,
+     $               TAU( I ), A, LDA, WORK )
+         A( M-K+I, N-K+I ) = AII
+   10 CONTINUE
+      RETURN
+*
+*     End of DGERQ2
+*
+      END
diff --git a/costa/native/external/lapack/dgerqf.f b/costa/native/external/lapack/dgerqf.f
new file mode 100644
index 000000000..f5940f232
--- /dev/null
+++ b/costa/native/external/lapack/dgerqf.f
@@ -0,0 +1,205 @@
+      SUBROUTINE DGERQF( M, N, A, LDA, TAU, WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, LWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DGERQF computes an RQ factorization of a real M-by-N matrix A:
+*  A = R * Q.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+*          On entry, the M-by-N matrix A.
+*          On exit,
+*          if m <= n, the upper triangle of the subarray
+*          A(1:m,n-m+1:n) contains the M-by-M upper triangular matrix R;
+*          if m >= n, the elements on and above the (m-n)-th subdiagonal
+*          contain the M-by-N upper trapezoidal matrix R;
+*          the remaining elements, with the array TAU, represent the
+*          orthogonal matrix Q as a product of min(m,n) elementary
+*          reflectors (see Further Details).
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  TAU     (output) DOUBLE PRECISION array, dimension (min(M,N))
+*          The scalar factors of the elementary reflectors (see Further
+*          Details).
+*
+*  WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.  LWORK >= max(1,M).
+*          For optimum performance LWORK >= M*NB, where NB is
+*          the optimal blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Further Details
+*  ===============
+*
+*  The matrix Q is represented as a product of elementary reflectors
+*
+*     Q = H(1) H(2) . . . H(k), where k = min(m,n).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - tau * v * v'
+*
+*  where tau is a real scalar, and v is a real vector with
+*  v(n-k+i+1:n) = 0 and v(n-k+i) = 1; v(1:n-k+i-1) is stored on exit in
+*  A(m-k+i,1:n-k+i-1), and tau in TAU(i).
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            I, IB, IINFO, IWS, K, KI, KK, LDWORK, LWKOPT,
+     $                   MU, NB, NBMIN, NU, NX
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DGERQ2, DLARFB, DLARFT, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      NB = ILAENV( 1, 'DGERQF', ' ', M, N, -1, -1 )
+      LWKOPT = M*NB
+      WORK( 1 ) = LWKOPT
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -4
+      ELSE IF( LWORK.LT.MAX( 1, M ) .AND. .NOT.LQUERY ) THEN
+         INFO = -7
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DGERQF', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      K = MIN( M, N )
+      IF( K.EQ.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      NBMIN = 2
+      NX = 1
+      IWS = M
+      IF( NB.GT.1 .AND. NB.LT.K ) THEN
+*
+*        Determine when to cross over from blocked to unblocked code.
+*
+         NX = MAX( 0, ILAENV( 3, 'DGERQF', ' ', M, N, -1, -1 ) )
+         IF( NX.LT.K ) THEN
+*
+*           Determine if workspace is large enough for blocked code.
+*
+            LDWORK = M
+            IWS = LDWORK*NB
+            IF( LWORK.LT.IWS ) THEN
+*
+*              Not enough workspace to use optimal NB:  reduce NB and
+*              determine the minimum value of NB.
+*
+               NB = LWORK / LDWORK
+               NBMIN = MAX( 2, ILAENV( 2, 'DGERQF', ' ', M, N, -1,
+     $                 -1 ) )
+            END IF
+         END IF
+      END IF
+*
+      IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN
+*
+*        Use blocked code initially.
+*        The last kk rows are handled by the block method.
+*
+         KI = ( ( K-NX-1 ) / NB )*NB
+         KK = MIN( K, KI+NB )
+*
+         DO 10 I = K - KK + KI + 1, K - KK + 1, -NB
+            IB = MIN( K-I+1, NB )
+*
+*           Compute the RQ factorization of the current block
+*           A(m-k+i:m-k+i+ib-1,1:n-k+i+ib-1)
+*
+            CALL DGERQ2( IB, N-K+I+IB-1, A( M-K+I, 1 ), LDA, TAU( I ),
+     $                   WORK, IINFO )
+            IF( M-K+I.GT.1 ) THEN
+*
+*              Form the triangular factor of the block reflector
+*              H = H(i+ib-1) . . . H(i+1) H(i)
+*
+               CALL DLARFT( 'Backward', 'Rowwise', N-K+I+IB-1, IB,
+     $                      A( M-K+I, 1 ), LDA, TAU( I ), WORK, LDWORK )
+*
+*              Apply H to A(1:m-k+i-1,1:n-k+i+ib-1) from the right
+*
+               CALL DLARFB( 'Right', 'No transpose', 'Backward',
+     $                      'Rowwise', M-K+I-1, N-K+I+IB-1, IB,
+     $                      A( M-K+I, 1 ), LDA, WORK, LDWORK, A, LDA,
+     $                      WORK( IB+1 ), LDWORK )
+            END IF
+   10    CONTINUE
+         MU = M - K + I + NB - 1
+         NU = N - K + I + NB - 1
+      ELSE
+         MU = M
+         NU = N
+      END IF
+*
+*     Use unblocked code to factor the last or only block
+*
+      IF( MU.GT.0 .AND. NU.GT.0 )
+     $   CALL DGERQ2( MU, NU, A, LDA, TAU, WORK, IINFO )
+*
+      WORK( 1 ) = IWS
+      RETURN
+*
+*     End of DGERQF
+*
+      END
diff --git a/costa/native/external/lapack/dgesc2.f b/costa/native/external/lapack/dgesc2.f
new file mode 100644
index 000000000..33ac0e603
--- /dev/null
+++ b/costa/native/external/lapack/dgesc2.f
@@ -0,0 +1,133 @@
+      SUBROUTINE DGESC2( N, A, LDA, RHS, IPIV, JPIV, SCALE )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            LDA, N
+      DOUBLE PRECISION   SCALE
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * ), JPIV( * )
+      DOUBLE PRECISION   A( LDA, * ), RHS( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DGESC2 solves a system of linear equations
+*
+*            A * X = scale* RHS
+*
+*  with a general N-by-N matrix A using the LU factorization with
+*  complete pivoting computed by DGETC2.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.
+*
+*  A       (input) DOUBLE PRECISION array, dimension (LDA,N)
+*          On entry, the  LU part of the factorization of the n-by-n
+*          matrix A computed by DGETC2:  A = P * L * U * Q
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1, N).
+*
+*  RHS     (input/output) DOUBLE PRECISION array, dimension (N).
+*          On entry, the right hand side vector b.
+*          On exit, the solution vector X.
+*
+*  IPIV    (iput) INTEGER array, dimension (N).
+*          The pivot indices; for 1 <= i <= N, row i of the
+*          matrix has been interchanged with row IPIV(i).
+*
+*  JPIV    (iput) INTEGER array, dimension (N).
+*          The pivot indices; for 1 <= j <= N, column j of the
+*          matrix has been interchanged with column JPIV(j).
+*
+*  SCALE    (output) DOUBLE PRECISION
+*           On exit, SCALE contains the scale factor. SCALE is chosen
+*           0 <= SCALE <= 1 to prevent owerflow in the solution.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Bo Kagstrom and Peter Poromaa, Department of Computing Science,
+*     Umea University, S-901 87 Umea, Sweden.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, TWO
+      PARAMETER          ( ONE = 1.0D+0, TWO = 2.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J
+      DOUBLE PRECISION   BIGNUM, EPS, SMLNUM, TEMP
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLASWP, DSCAL
+*     ..
+*     .. External Functions ..
+      INTEGER            IDAMAX
+      DOUBLE PRECISION   DLAMCH
+      EXTERNAL           IDAMAX, DLAMCH
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS
+*     ..
+*     .. Executable Statements ..
+*
+*      Set constant to control owerflow
+*
+      EPS = DLAMCH( 'P' )
+      SMLNUM = DLAMCH( 'S' ) / EPS
+      BIGNUM = ONE / SMLNUM
+      CALL DLABAD( SMLNUM, BIGNUM )
+*
+*     Apply permutations IPIV to RHS
+*
+      CALL DLASWP( 1, RHS, LDA, 1, N-1, IPIV, 1 )
+*
+*     Solve for L part
+*
+      DO 20 I = 1, N - 1
+         DO 10 J = I + 1, N
+            RHS( J ) = RHS( J ) - A( J, I )*RHS( I )
+   10    CONTINUE
+   20 CONTINUE
+*
+*     Solve for U part
+*
+      SCALE = ONE
+*
+*     Check for scaling
+*
+      I = IDAMAX( N, RHS, 1 )
+      IF( TWO*SMLNUM*ABS( RHS( I ) ).GT.ABS( A( N, N ) ) ) THEN
+         TEMP = ( ONE / TWO ) / ABS( RHS( I ) )
+         CALL DSCAL( N, TEMP, RHS( 1 ), 1 )
+         SCALE = SCALE*TEMP
+      END IF
+*
+      DO 40 I = N, 1, -1
+         TEMP = ONE / A( I, I )
+         RHS( I ) = RHS( I )*TEMP
+         DO 30 J = I + 1, N
+            RHS( I ) = RHS( I ) - RHS( J )*( A( I, J )*TEMP )
+   30    CONTINUE
+   40 CONTINUE
+*
+*     Apply permutations JPIV to the solution (RHS)
+*
+      CALL DLASWP( 1, RHS, LDA, 1, N-1, JPIV, -1 )
+      RETURN
+*
+*     End of DGESC2
+*
+      END
diff --git a/costa/native/external/lapack/dgesdd.f b/costa/native/external/lapack/dgesdd.f
new file mode 100644
index 000000000..51e55e60b
--- /dev/null
+++ b/costa/native/external/lapack/dgesdd.f
@@ -0,0 +1,1335 @@
+      SUBROUTINE DGESDD( JOBZ, M, N, A, LDA, S, U, LDU, VT, LDVT, WORK,
+     $                   LWORK, IWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBZ
+      INTEGER            INFO, LDA, LDU, LDVT, LWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      DOUBLE PRECISION   A( LDA, * ), S( * ), U( LDU, * ),
+     $                   VT( LDVT, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DGESDD computes the singular value decomposition (SVD) of a real
+*  M-by-N matrix A, optionally computing the left and right singular
+*  vectors.  If singular vectors are desired, it uses a
+*  divide-and-conquer algorithm.
+*
+*  The SVD is written
+*
+*       A = U * SIGMA * transpose(V)
+*
+*  where SIGMA is an M-by-N matrix which is zero except for its
+*  min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and
+*  V is an N-by-N orthogonal matrix.  The diagonal elements of SIGMA
+*  are the singular values of A; they are real and non-negative, and
+*  are returned in descending order.  The first min(m,n) columns of
+*  U and V are the left and right singular vectors of A.
+*
+*  Note that the routine returns VT = V**T, not V.
+*
+*  The divide and conquer algorithm makes very mild assumptions about
+*  floating point arithmetic. It will work on machines with a guard
+*  digit in add/subtract, or on those binary machines without guard
+*  digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
+*  Cray-2. It could conceivably fail on hexadecimal or decimal machines
+*  without guard digits, but we know of none.
+*
+*  Arguments
+*  =========
+*
+*  JOBZ    (input) CHARACTER*1
+*          Specifies options for computing all or part of the matrix U:
+*          = 'A':  all M columns of U and all N rows of V**T are
+*                  returned in the arrays U and VT;
+*          = 'S':  the first min(M,N) columns of U and the first
+*                  min(M,N) rows of V**T are returned in the arrays U
+*                  and VT;
+*          = 'O':  If M >= N, the first N columns of U are overwritten
+*                  on the array A and all rows of V**T are returned in
+*                  the array VT;
+*                  otherwise, all columns of U are returned in the
+*                  array U and the first M rows of V**T are overwritten
+*                  in the array VT;
+*          = 'N':  no columns of U or rows of V**T are computed.
+*
+*  M       (input) INTEGER
+*          The number of rows of the input matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the input matrix A.  N >= 0.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+*          On entry, the M-by-N matrix A.
+*          On exit,
+*          if JOBZ = 'O',  A is overwritten with the first N columns
+*                          of U (the left singular vectors, stored
+*                          columnwise) if M >= N;
+*                          A is overwritten with the first M rows
+*                          of V**T (the right singular vectors, stored
+*                          rowwise) otherwise.
+*          if JOBZ .ne. 'O', the contents of A are destroyed.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  S       (output) DOUBLE PRECISION array, dimension (min(M,N))
+*          The singular values of A, sorted so that S(i) >= S(i+1).
+*
+*  U       (output) DOUBLE PRECISION array, dimension (LDU,UCOL)
+*          UCOL = M if JOBZ = 'A' or JOBZ = 'O' and M < N;
+*          UCOL = min(M,N) if JOBZ = 'S'.
+*          If JOBZ = 'A' or JOBZ = 'O' and M < N, U contains the M-by-M
+*          orthogonal matrix U;
+*          if JOBZ = 'S', U contains the first min(M,N) columns of U
+*          (the left singular vectors, stored columnwise);
+*          if JOBZ = 'O' and M >= N, or JOBZ = 'N', U is not referenced.
+*
+*  LDU     (input) INTEGER
+*          The leading dimension of the array U.  LDU >= 1; if
+*          JOBZ = 'S' or 'A' or JOBZ = 'O' and M < N, LDU >= M.
+*
+*  VT      (output) DOUBLE PRECISION array, dimension (LDVT,N)
+*          If JOBZ = 'A' or JOBZ = 'O' and M >= N, VT contains the
+*          N-by-N orthogonal matrix V**T;
+*          if JOBZ = 'S', VT contains the first min(M,N) rows of
+*          V**T (the right singular vectors, stored rowwise);
+*          if JOBZ = 'O' and M < N, or JOBZ = 'N', VT is not referenced.
+*
+*  LDVT    (input) INTEGER
+*          The leading dimension of the array VT.  LDVT >= 1; if
+*          JOBZ = 'A' or JOBZ = 'O' and M >= N, LDVT >= N;
+*          if JOBZ = 'S', LDVT >= min(M,N).
+*
+*  WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK;
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK. LWORK >= 1.
+*          If JOBZ = 'N',
+*            LWORK >= 3*min(M,N) + max(max(M,N),6*min(M,N)).
+*          If JOBZ = 'O',
+*            LWORK >= 3*min(M,N)*min(M,N) + 
+*                     max(max(M,N),5*min(M,N)*min(M,N)+4*min(M,N)).
+*          If JOBZ = 'S' or 'A'
+*            LWORK >= 3*min(M,N)*min(M,N) +
+*                     max(max(M,N),4*min(M,N)*min(M,N)+4*min(M,N)).
+*          For good performance, LWORK should generally be larger.
+*          If LWORK < 0 but other input arguments are legal, WORK(1)
+*          returns the optimal LWORK.
+*
+*  IWORK   (workspace) INTEGER array, dimension (8*min(M,N))
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          > 0:  DBDSDC did not converge, updating process failed.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Ming Gu and Huan Ren, Computer Science Division, University of
+*     California at Berkeley, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY, WNTQA, WNTQAS, WNTQN, WNTQO, WNTQS
+      INTEGER            BDSPAC, BLK, CHUNK, I, IE, IERR, IL,
+     $                   IR, ISCL, ITAU, ITAUP, ITAUQ, IU, IVT, LDWKVT,
+     $                   LDWRKL, LDWRKR, LDWRKU, MAXWRK, MINMN, MINWRK,
+     $                   MNTHR, NWORK, WRKBL
+      DOUBLE PRECISION   ANRM, BIGNUM, EPS, SMLNUM
+*     ..
+*     .. Local Arrays ..
+      INTEGER            IDUM( 1 )
+      DOUBLE PRECISION   DUM( 1 )
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DBDSDC, DGEBRD, DGELQF, DGEMM, DGEQRF, DLACPY,
+     $                   DLASCL, DLASET, DORGBR, DORGLQ, DORGQR, DORMBR,
+     $                   XERBLA
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      DOUBLE PRECISION   DLAMCH, DLANGE
+      EXTERNAL           DLAMCH, DLANGE, ILAENV, LSAME
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          DBLE, INT, MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      MINMN = MIN( M, N )
+      MNTHR = INT( MINMN*11.0D0 / 6.0D0 )
+      WNTQA = LSAME( JOBZ, 'A' )
+      WNTQS = LSAME( JOBZ, 'S' )
+      WNTQAS = WNTQA .OR. WNTQS
+      WNTQO = LSAME( JOBZ, 'O' )
+      WNTQN = LSAME( JOBZ, 'N' )
+      MINWRK = 1
+      MAXWRK = 1
+      LQUERY = ( LWORK.EQ.-1 )
+*
+      IF( .NOT.( WNTQA .OR. WNTQS .OR. WNTQO .OR. WNTQN ) ) THEN
+         INFO = -1
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -5
+      ELSE IF( LDU.LT.1 .OR. ( WNTQAS .AND. LDU.LT.M ) .OR.
+     $         ( WNTQO .AND. M.LT.N .AND. LDU.LT.M ) ) THEN
+         INFO = -8
+      ELSE IF( LDVT.LT.1 .OR. ( WNTQA .AND. LDVT.LT.N ) .OR.
+     $         ( WNTQS .AND. LDVT.LT.MINMN ) .OR.
+     $         ( WNTQO .AND. M.GE.N .AND. LDVT.LT.N ) ) THEN
+         INFO = -10
+      END IF
+*
+*     Compute workspace
+*      (Note: Comments in the code beginning "Workspace:" describe the
+*       minimal amount of workspace needed at that point in the code,
+*       as well as the preferred amount for good performance.
+*       NB refers to the optimal block size for the immediately
+*       following subroutine, as returned by ILAENV.)
+*
+      IF( INFO.EQ.0 .AND. M.GT.0 .AND. N.GT.0 ) THEN
+         IF( M.GE.N ) THEN
+*
+*           Compute space needed for DBDSDC
+*
+            IF( WNTQN ) THEN
+               BDSPAC = 7*N
+            ELSE
+               BDSPAC = 3*N*N + 4*N
+            END IF
+            IF( M.GE.MNTHR ) THEN
+               IF( WNTQN ) THEN
+*
+*                 Path 1 (M much larger than N, JOBZ='N')
+*
+                  WRKBL = N + N*ILAENV( 1, 'DGEQRF', ' ', M, N, -1,
+     $                    -1 )
+                  WRKBL = MAX( WRKBL, 3*N+2*N*
+     $                    ILAENV( 1, 'DGEBRD', ' ', N, N, -1, -1 ) )
+                  MAXWRK = MAX( WRKBL, BDSPAC+N )
+                  MINWRK = BDSPAC + N
+               ELSE IF( WNTQO ) THEN
+*
+*                 Path 2 (M much larger than N, JOBZ='O')
+*
+                  WRKBL = N + N*ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 )
+                  WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'DORGQR', ' ', M,
+     $                    N, N, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*N+2*N*
+     $                    ILAENV( 1, 'DGEBRD', ' ', N, N, -1, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*N+N*
+     $                    ILAENV( 1, 'DORMBR', 'QLN', N, N, N, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*N+N*
+     $                    ILAENV( 1, 'DORMBR', 'PRT', N, N, N, -1 ) )
+                  WRKBL = MAX( WRKBL, BDSPAC+3*N )
+                  MAXWRK = WRKBL + 2*N*N
+                  MINWRK = BDSPAC + 2*N*N + 3*N
+               ELSE IF( WNTQS ) THEN
+*
+*                 Path 3 (M much larger than N, JOBZ='S')
+*
+                  WRKBL = N + N*ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 )
+                  WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'DORGQR', ' ', M,
+     $                    N, N, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*N+2*N*
+     $                    ILAENV( 1, 'DGEBRD', ' ', N, N, -1, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*N+N*
+     $                    ILAENV( 1, 'DORMBR', 'QLN', N, N, N, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*N+N*
+     $                    ILAENV( 1, 'DORMBR', 'PRT', N, N, N, -1 ) )
+                  WRKBL = MAX( WRKBL, BDSPAC+3*N )
+                  MAXWRK = WRKBL + N*N
+                  MINWRK = BDSPAC + N*N + 3*N
+               ELSE IF( WNTQA ) THEN
+*
+*                 Path 4 (M much larger than N, JOBZ='A')
+*
+                  WRKBL = N + N*ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 )
+                  WRKBL = MAX( WRKBL, N+M*ILAENV( 1, 'DORGQR', ' ', M,
+     $                    M, N, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*N+2*N*
+     $                    ILAENV( 1, 'DGEBRD', ' ', N, N, -1, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*N+N*
+     $                    ILAENV( 1, 'DORMBR', 'QLN', N, N, N, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*N+N*
+     $                    ILAENV( 1, 'DORMBR', 'PRT', N, N, N, -1 ) )
+                  WRKBL = MAX( WRKBL, BDSPAC+3*N )
+                  MAXWRK = WRKBL + N*N
+                  MINWRK = BDSPAC + N*N + 3*N
+               END IF
+            ELSE
+*
+*              Path 5 (M at least N, but not much larger)
+*
+               WRKBL = 3*N + ( M+N )*ILAENV( 1, 'DGEBRD', ' ', M, N, -1,
+     $                 -1 )
+               IF( WNTQN ) THEN
+                  MAXWRK = MAX( WRKBL, BDSPAC+3*N )
+                  MINWRK = 3*N + MAX( M, BDSPAC )
+               ELSE IF( WNTQO ) THEN
+                  WRKBL = MAX( WRKBL, 3*N+N*
+     $                    ILAENV( 1, 'DORMBR', 'QLN', M, N, N, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*N+N*
+     $                    ILAENV( 1, 'DORMBR', 'PRT', N, N, N, -1 ) )
+                  WRKBL = MAX( WRKBL, BDSPAC+3*N )
+                  MAXWRK = WRKBL + M*N
+                  MINWRK = 3*N + MAX( M, N*N+BDSPAC )
+               ELSE IF( WNTQS ) THEN
+                  WRKBL = MAX( WRKBL, 3*N+N*
+     $                    ILAENV( 1, 'DORMBR', 'QLN', M, N, N, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*N+N*
+     $                    ILAENV( 1, 'DORMBR', 'PRT', N, N, N, -1 ) )
+                  MAXWRK = MAX( WRKBL, BDSPAC+3*N )
+                  MINWRK = 3*N + MAX( M, BDSPAC )
+               ELSE IF( WNTQA ) THEN
+                  WRKBL = MAX( WRKBL, 3*N+M*
+     $                    ILAENV( 1, 'DORMBR', 'QLN', M, M, N, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*N+N*
+     $                    ILAENV( 1, 'DORMBR', 'PRT', N, N, N, -1 ) )
+                  MAXWRK = MAX( MAXWRK, BDSPAC+3*N )
+                  MINWRK = 3*N + MAX( M, BDSPAC )
+               END IF
+            END IF
+         ELSE
+*
+*           Compute space needed for DBDSDC
+*
+            IF( WNTQN ) THEN
+               BDSPAC = 7*M
+            ELSE
+               BDSPAC = 3*M*M + 4*M
+            END IF
+            IF( N.GE.MNTHR ) THEN
+               IF( WNTQN ) THEN
+*
+*                 Path 1t (N much larger than M, JOBZ='N')
+*
+                  WRKBL = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1,
+     $                    -1 )
+                  WRKBL = MAX( WRKBL, 3*M+2*M*
+     $                    ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) )
+                  MAXWRK = MAX( WRKBL, BDSPAC+M )
+                  MINWRK = BDSPAC + M
+               ELSE IF( WNTQO ) THEN
+*
+*                 Path 2t (N much larger than M, JOBZ='O')
+*
+                  WRKBL = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 )
+                  WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'DORGLQ', ' ', M,
+     $                    N, M, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*M+2*M*
+     $                    ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*M+M*
+     $                    ILAENV( 1, 'DORMBR', 'QLN', M, M, M, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*M+M*
+     $                    ILAENV( 1, 'DORMBR', 'PRT', M, M, M, -1 ) )
+                  WRKBL = MAX( WRKBL, BDSPAC+3*M )
+                  MAXWRK = WRKBL + 2*M*M
+                  MINWRK = BDSPAC + 2*M*M + 3*M
+               ELSE IF( WNTQS ) THEN
+*
+*                 Path 3t (N much larger than M, JOBZ='S')
+*
+                  WRKBL = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 )
+                  WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'DORGLQ', ' ', M,
+     $                    N, M, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*M+2*M*
+     $                    ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*M+M*
+     $                    ILAENV( 1, 'DORMBR', 'QLN', M, M, M, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*M+M*
+     $                    ILAENV( 1, 'DORMBR', 'PRT', M, M, M, -1 ) )
+                  WRKBL = MAX( WRKBL, BDSPAC+3*M )
+                  MAXWRK = WRKBL + M*M
+                  MINWRK = BDSPAC + M*M + 3*M
+               ELSE IF( WNTQA ) THEN
+*
+*                 Path 4t (N much larger than M, JOBZ='A')
+*
+                  WRKBL = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 )
+                  WRKBL = MAX( WRKBL, M+N*ILAENV( 1, 'DORGLQ', ' ', N,
+     $                    N, M, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*M+2*M*
+     $                    ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*M+M*
+     $                    ILAENV( 1, 'DORMBR', 'QLN', M, M, M, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*M+M*
+     $                    ILAENV( 1, 'DORMBR', 'PRT', M, M, M, -1 ) )
+                  WRKBL = MAX( WRKBL, BDSPAC+3*M )
+                  MAXWRK = WRKBL + M*M
+                  MINWRK = BDSPAC + M*M + 3*M
+               END IF
+            ELSE
+*
+*              Path 5t (N greater than M, but not much larger)
+*
+               WRKBL = 3*M + ( M+N )*ILAENV( 1, 'DGEBRD', ' ', M, N, -1,
+     $                 -1 )
+               IF( WNTQN ) THEN
+                  MAXWRK = MAX( WRKBL, BDSPAC+3*M )
+                  MINWRK = 3*M + MAX( N, BDSPAC )
+               ELSE IF( WNTQO ) THEN
+                  WRKBL = MAX( WRKBL, 3*M+M*
+     $                    ILAENV( 1, 'DORMBR', 'QLN', M, M, N, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*M+M*
+     $                    ILAENV( 1, 'DORMBR', 'PRT', M, N, M, -1 ) )
+                  WRKBL = MAX( WRKBL, BDSPAC+3*M )
+                  MAXWRK = WRKBL + M*N
+                  MINWRK = 3*M + MAX( N, M*M+BDSPAC )
+               ELSE IF( WNTQS ) THEN
+                  WRKBL = MAX( WRKBL, 3*M+M*
+     $                    ILAENV( 1, 'DORMBR', 'QLN', M, M, N, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*M+M*
+     $                    ILAENV( 1, 'DORMBR', 'PRT', M, N, M, -1 ) )
+                  MAXWRK = MAX( WRKBL, BDSPAC+3*M )
+                  MINWRK = 3*M + MAX( N, BDSPAC )
+               ELSE IF( WNTQA ) THEN
+                  WRKBL = MAX( WRKBL, 3*M+M*
+     $                    ILAENV( 1, 'DORMBR', 'QLN', M, M, N, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*M+M*
+     $                    ILAENV( 1, 'DORMBR', 'PRT', N, N, M, -1 ) )
+                  MAXWRK = MAX( WRKBL, BDSPAC+3*M )
+                  MINWRK = 3*M + MAX( N, BDSPAC )
+               END IF
+            END IF
+         END IF
+         WORK( 1 ) = MAXWRK
+      END IF
+*
+      IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN
+         INFO = -12
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DGESDD', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 ) THEN
+         IF( LWORK.GE.1 )
+     $      WORK( 1 ) = ONE
+         RETURN
+      END IF
+*
+*     Get machine constants
+*
+      EPS = DLAMCH( 'P' )
+      SMLNUM = SQRT( DLAMCH( 'S' ) ) / EPS
+      BIGNUM = ONE / SMLNUM
+*
+*     Scale A if max element outside range [SMLNUM,BIGNUM]
+*
+      ANRM = DLANGE( 'M', M, N, A, LDA, DUM )
+      ISCL = 0
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
+         ISCL = 1
+         CALL DLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, IERR )
+      ELSE IF( ANRM.GT.BIGNUM ) THEN
+         ISCL = 1
+         CALL DLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, IERR )
+      END IF
+*
+      IF( M.GE.N ) THEN
+*
+*        A has at least as many rows as columns. If A has sufficiently
+*        more rows than columns, first reduce using the QR
+*        decomposition (if sufficient workspace available)
+*
+         IF( M.GE.MNTHR ) THEN
+*
+            IF( WNTQN ) THEN
+*
+*              Path 1 (M much larger than N, JOBZ='N')
+*              No singular vectors to be computed
+*
+               ITAU = 1
+               NWORK = ITAU + N
+*
+*              Compute A=Q*R
+*              (Workspace: need 2*N, prefer N+N*NB)
+*
+               CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+*
+*              Zero out below R
+*
+               CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ), LDA )
+               IE = 1
+               ITAUQ = IE + N
+               ITAUP = ITAUQ + N
+               NWORK = ITAUP + N
+*
+*              Bidiagonalize R in A
+*              (Workspace: need 4*N, prefer 3*N+2*N*NB)
+*
+               CALL DGEBRD( N, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
+     $                      WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1,
+     $                      IERR )
+               NWORK = IE + N
+*
+*              Perform bidiagonal SVD, computing singular values only
+*              (Workspace: need N+BDSPAC)
+*
+               CALL DBDSDC( 'U', 'N', N, S, WORK( IE ), DUM, 1, DUM, 1,
+     $                      DUM, IDUM, WORK( NWORK ), IWORK, INFO )
+*
+            ELSE IF( WNTQO ) THEN
+*
+*              Path 2 (M much larger than N, JOBZ = 'O')
+*              N left singular vectors to be overwritten on A and
+*              N right singular vectors to be computed in VT
+*
+               IR = 1
+*
+*              WORK(IR) is LDWRKR by N
+*
+               IF( LWORK.GE.LDA*N+N*N+3*N+BDSPAC ) THEN
+                  LDWRKR = LDA
+               ELSE
+                  LDWRKR = ( LWORK-N*N-3*N-BDSPAC ) / N
+               END IF
+               ITAU = IR + LDWRKR*N
+               NWORK = ITAU + N
+*
+*              Compute A=Q*R
+*              (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+*
+               CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+*
+*              Copy R to WORK(IR), zeroing out below it
+*
+               CALL DLACPY( 'U', N, N, A, LDA, WORK( IR ), LDWRKR )
+               CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, WORK( IR+1 ),
+     $                      LDWRKR )
+*
+*              Generate Q in A
+*              (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+*
+               CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
+     $                      WORK( NWORK ), LWORK-NWORK+1, IERR )
+               IE = ITAU
+               ITAUQ = IE + N
+               ITAUP = ITAUQ + N
+               NWORK = ITAUP + N
+*
+*              Bidiagonalize R in VT, copying result to WORK(IR)
+*              (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
+*
+               CALL DGEBRD( N, N, WORK( IR ), LDWRKR, S, WORK( IE ),
+     $                      WORK( ITAUQ ), WORK( ITAUP ), WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+*
+*              WORK(IU) is N by N
+*
+               IU = NWORK
+               NWORK = IU + N*N
+*
+*              Perform bidiagonal SVD, computing left singular vectors
+*              of bidiagonal matrix in WORK(IU) and computing right
+*              singular vectors of bidiagonal matrix in VT
+*              (Workspace: need N+N*N+BDSPAC)
+*
+               CALL DBDSDC( 'U', 'I', N, S, WORK( IE ), WORK( IU ), N,
+     $                      VT, LDVT, DUM, IDUM, WORK( NWORK ), IWORK,
+     $                      INFO )
+*
+*              Overwrite WORK(IU) by left singular vectors of R
+*              and VT by right singular vectors of R
+*              (Workspace: need 2*N*N+3*N, prefer 2*N*N+2*N+N*NB)
+*
+               CALL DORMBR( 'Q', 'L', 'N', N, N, N, WORK( IR ), LDWRKR,
+     $                      WORK( ITAUQ ), WORK( IU ), N, WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+               CALL DORMBR( 'P', 'R', 'T', N, N, N, WORK( IR ), LDWRKR,
+     $                      WORK( ITAUP ), VT, LDVT, WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+*
+*              Multiply Q in A by left singular vectors of R in
+*              WORK(IU), storing result in WORK(IR) and copying to A
+*              (Workspace: need 2*N*N, prefer N*N+M*N)
+*
+               DO 10 I = 1, M, LDWRKR
+                  CHUNK = MIN( M-I+1, LDWRKR )
+                  CALL DGEMM( 'N', 'N', CHUNK, N, N, ONE, A( I, 1 ),
+     $                        LDA, WORK( IU ), N, ZERO, WORK( IR ),
+     $                        LDWRKR )
+                  CALL DLACPY( 'F', CHUNK, N, WORK( IR ), LDWRKR,
+     $                         A( I, 1 ), LDA )
+   10          CONTINUE
+*
+            ELSE IF( WNTQS ) THEN
+*
+*              Path 3 (M much larger than N, JOBZ='S')
+*              N left singular vectors to be computed in U and
+*              N right singular vectors to be computed in VT
+*
+               IR = 1
+*
+*              WORK(IR) is N by N
+*
+               LDWRKR = N
+               ITAU = IR + LDWRKR*N
+               NWORK = ITAU + N
+*
+*              Compute A=Q*R
+*              (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+*
+               CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+*
+*              Copy R to WORK(IR), zeroing out below it
+*
+               CALL DLACPY( 'U', N, N, A, LDA, WORK( IR ), LDWRKR )
+               CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, WORK( IR+1 ),
+     $                      LDWRKR )
+*
+*              Generate Q in A
+*              (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+*
+               CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
+     $                      WORK( NWORK ), LWORK-NWORK+1, IERR )
+               IE = ITAU
+               ITAUQ = IE + N
+               ITAUP = ITAUQ + N
+               NWORK = ITAUP + N
+*
+*              Bidiagonalize R in WORK(IR)
+*              (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
+*
+               CALL DGEBRD( N, N, WORK( IR ), LDWRKR, S, WORK( IE ),
+     $                      WORK( ITAUQ ), WORK( ITAUP ), WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+*
+*              Perform bidiagonal SVD, computing left singular vectors
+*              of bidiagoal matrix in U and computing right singular
+*              vectors of bidiagonal matrix in VT
+*              (Workspace: need N+BDSPAC)
+*
+               CALL DBDSDC( 'U', 'I', N, S, WORK( IE ), U, LDU, VT,
+     $                      LDVT, DUM, IDUM, WORK( NWORK ), IWORK,
+     $                      INFO )
+*
+*              Overwrite U by left singular vectors of R and VT
+*              by right singular vectors of R
+*              (Workspace: need N*N+3*N, prefer N*N+2*N+N*NB)
+*
+               CALL DORMBR( 'Q', 'L', 'N', N, N, N, WORK( IR ), LDWRKR,
+     $                      WORK( ITAUQ ), U, LDU, WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+*
+               CALL DORMBR( 'P', 'R', 'T', N, N, N, WORK( IR ), LDWRKR,
+     $                      WORK( ITAUP ), VT, LDVT, WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+*
+*              Multiply Q in A by left singular vectors of R in
+*              WORK(IR), storing result in U
+*              (Workspace: need N*N)
+*
+               CALL DLACPY( 'F', N, N, U, LDU, WORK( IR ), LDWRKR )
+               CALL DGEMM( 'N', 'N', M, N, N, ONE, A, LDA, WORK( IR ),
+     $                     LDWRKR, ZERO, U, LDU )
+*
+            ELSE IF( WNTQA ) THEN
+*
+*              Path 4 (M much larger than N, JOBZ='A')
+*              M left singular vectors to be computed in U and
+*              N right singular vectors to be computed in VT
+*
+               IU = 1
+*
+*              WORK(IU) is N by N
+*
+               LDWRKU = N
+               ITAU = IU + LDWRKU*N
+               NWORK = ITAU + N
+*
+*              Compute A=Q*R, copying result to U
+*              (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+*
+               CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+               CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
+*
+*              Generate Q in U
+*              (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+               CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ),
+     $                      WORK( NWORK ), LWORK-NWORK+1, IERR )
+*
+*              Produce R in A, zeroing out other entries
+*
+               CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ), LDA )
+               IE = ITAU
+               ITAUQ = IE + N
+               ITAUP = ITAUQ + N
+               NWORK = ITAUP + N
+*
+*              Bidiagonalize R in A
+*              (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
+*
+               CALL DGEBRD( N, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
+     $                      WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1,
+     $                      IERR )
+*
+*              Perform bidiagonal SVD, computing left singular vectors
+*              of bidiagonal matrix in WORK(IU) and computing right
+*              singular vectors of bidiagonal matrix in VT
+*              (Workspace: need N+N*N+BDSPAC)
+*
+               CALL DBDSDC( 'U', 'I', N, S, WORK( IE ), WORK( IU ), N,
+     $                      VT, LDVT, DUM, IDUM, WORK( NWORK ), IWORK,
+     $                      INFO )
+*
+*              Overwrite WORK(IU) by left singular vectors of R and VT
+*              by right singular vectors of R
+*              (Workspace: need N*N+3*N, prefer N*N+2*N+N*NB)
+*
+               CALL DORMBR( 'Q', 'L', 'N', N, N, N, A, LDA,
+     $                      WORK( ITAUQ ), WORK( IU ), LDWRKU,
+     $                      WORK( NWORK ), LWORK-NWORK+1, IERR )
+               CALL DORMBR( 'P', 'R', 'T', N, N, N, A, LDA,
+     $                      WORK( ITAUP ), VT, LDVT, WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+*
+*              Multiply Q in U by left singular vectors of R in
+*              WORK(IU), storing result in A
+*              (Workspace: need N*N)
+*
+               CALL DGEMM( 'N', 'N', M, N, N, ONE, U, LDU, WORK( IU ),
+     $                     LDWRKU, ZERO, A, LDA )
+*
+*              Copy left singular vectors of A from A to U
+*
+               CALL DLACPY( 'F', M, N, A, LDA, U, LDU )
+*
+            END IF
+*
+         ELSE
+*
+*           M .LT. MNTHR
+*
+*           Path 5 (M at least N, but not much larger)
+*           Reduce to bidiagonal form without QR decomposition
+*
+            IE = 1
+            ITAUQ = IE + N
+            ITAUP = ITAUQ + N
+            NWORK = ITAUP + N
+*
+*           Bidiagonalize A
+*           (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB)
+*
+            CALL DGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
+     $                   WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1,
+     $                   IERR )
+            IF( WNTQN ) THEN
+*
+*              Perform bidiagonal SVD, only computing singular values
+*              (Workspace: need N+BDSPAC)
+*
+               CALL DBDSDC( 'U', 'N', N, S, WORK( IE ), DUM, 1, DUM, 1,
+     $                      DUM, IDUM, WORK( NWORK ), IWORK, INFO )
+            ELSE IF( WNTQO ) THEN
+               IU = NWORK
+               IF( LWORK.GE.M*N+3*N+BDSPAC ) THEN
+*
+*                 WORK( IU ) is M by N
+*
+                  LDWRKU = M
+                  NWORK = IU + LDWRKU*N
+                  CALL DLASET( 'F', M, N, ZERO, ZERO, WORK( IU ),
+     $                         LDWRKU )
+               ELSE
+*
+*                 WORK( IU ) is N by N
+*
+                  LDWRKU = N
+                  NWORK = IU + LDWRKU*N
+*
+*                 WORK(IR) is LDWRKR by N
+*
+                  IR = NWORK
+                  LDWRKR = ( LWORK-N*N-3*N ) / N
+               END IF
+               NWORK = IU + LDWRKU*N
+*
+*              Perform bidiagonal SVD, computing left singular vectors
+*              of bidiagonal matrix in WORK(IU) and computing right
+*              singular vectors of bidiagonal matrix in VT
+*              (Workspace: need N+N*N+BDSPAC)
+*
+               CALL DBDSDC( 'U', 'I', N, S, WORK( IE ), WORK( IU ),
+     $                      LDWRKU, VT, LDVT, DUM, IDUM, WORK( NWORK ),
+     $                      IWORK, INFO )
+*
+*              Overwrite VT by right singular vectors of A
+*              (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+*
+               CALL DORMBR( 'P', 'R', 'T', N, N, N, A, LDA,
+     $                      WORK( ITAUP ), VT, LDVT, WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+*
+               IF( LWORK.GE.M*N+3*N+BDSPAC ) THEN
+*
+*                 Overwrite WORK(IU) by left singular vectors of A
+*                 (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+*
+                  CALL DORMBR( 'Q', 'L', 'N', M, N, N, A, LDA,
+     $                         WORK( ITAUQ ), WORK( IU ), LDWRKU,
+     $                         WORK( NWORK ), LWORK-NWORK+1, IERR )
+*
+*                 Copy left singular vectors of A from WORK(IU) to A
+*
+                  CALL DLACPY( 'F', M, N, WORK( IU ), LDWRKU, A, LDA )
+               ELSE
+*
+*                 Generate Q in A
+*                 (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+*
+                  CALL DORGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ),
+     $                         WORK( NWORK ), LWORK-NWORK+1, IERR )
+*
+*                 Multiply Q in A by left singular vectors of
+*                 bidiagonal matrix in WORK(IU), storing result in
+*                 WORK(IR) and copying to A
+*                 (Workspace: need 2*N*N, prefer N*N+M*N)
+*
+                  DO 20 I = 1, M, LDWRKR
+                     CHUNK = MIN( M-I+1, LDWRKR )
+                     CALL DGEMM( 'N', 'N', CHUNK, N, N, ONE, A( I, 1 ),
+     $                           LDA, WORK( IU ), LDWRKU, ZERO,
+     $                           WORK( IR ), LDWRKR )
+                     CALL DLACPY( 'F', CHUNK, N, WORK( IR ), LDWRKR,
+     $                            A( I, 1 ), LDA )
+   20             CONTINUE
+               END IF
+*
+            ELSE IF( WNTQS ) THEN
+*
+*              Perform bidiagonal SVD, computing left singular vectors
+*              of bidiagonal matrix in U and computing right singular
+*              vectors of bidiagonal matrix in VT
+*              (Workspace: need N+BDSPAC)
+*
+               CALL DLASET( 'F', M, N, ZERO, ZERO, U, LDU )
+               CALL DBDSDC( 'U', 'I', N, S, WORK( IE ), U, LDU, VT,
+     $                      LDVT, DUM, IDUM, WORK( NWORK ), IWORK,
+     $                      INFO )
+*
+*              Overwrite U by left singular vectors of A and VT
+*              by right singular vectors of A
+*              (Workspace: need 3*N, prefer 2*N+N*NB)
+*
+               CALL DORMBR( 'Q', 'L', 'N', M, N, N, A, LDA,
+     $                      WORK( ITAUQ ), U, LDU, WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+               CALL DORMBR( 'P', 'R', 'T', N, N, N, A, LDA,
+     $                      WORK( ITAUP ), VT, LDVT, WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+            ELSE IF( WNTQA ) THEN
+*
+*              Perform bidiagonal SVD, computing left singular vectors
+*              of bidiagonal matrix in U and computing right singular
+*              vectors of bidiagonal matrix in VT
+*              (Workspace: need N+BDSPAC)
+*
+               CALL DLASET( 'F', M, M, ZERO, ZERO, U, LDU )
+               CALL DBDSDC( 'U', 'I', N, S, WORK( IE ), U, LDU, VT,
+     $                      LDVT, DUM, IDUM, WORK( NWORK ), IWORK,
+     $                      INFO )
+*
+*              Set the right corner of U to identity matrix
+*
+               CALL DLASET( 'F', M-N, M-N, ZERO, ONE, U( N+1, N+1 ),
+     $                      LDU )
+*
+*              Overwrite U by left singular vectors of A and VT
+*              by right singular vectors of A
+*              (Workspace: need N*N+2*N+M, prefer N*N+2*N+M*NB)
+*
+               CALL DORMBR( 'Q', 'L', 'N', M, M, N, A, LDA,
+     $                      WORK( ITAUQ ), U, LDU, WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+               CALL DORMBR( 'P', 'R', 'T', N, N, M, A, LDA,
+     $                      WORK( ITAUP ), VT, LDVT, WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+            END IF
+*
+         END IF
+*
+      ELSE
+*
+*        A has more columns than rows. If A has sufficiently more
+*        columns than rows, first reduce using the LQ decomposition (if
+*        sufficient workspace available)
+*
+         IF( N.GE.MNTHR ) THEN
+*
+            IF( WNTQN ) THEN
+*
+*              Path 1t (N much larger than M, JOBZ='N')
+*              No singular vectors to be computed
+*
+               ITAU = 1
+               NWORK = ITAU + M
+*
+*              Compute A=L*Q
+*              (Workspace: need 2*M, prefer M+M*NB)
+*
+               CALL DGELQF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+*
+*              Zero out above L
+*
+               CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ), LDA )
+               IE = 1
+               ITAUQ = IE + M
+               ITAUP = ITAUQ + M
+               NWORK = ITAUP + M
+*
+*              Bidiagonalize L in A
+*              (Workspace: need 4*M, prefer 3*M+2*M*NB)
+*
+               CALL DGEBRD( M, M, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
+     $                      WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1,
+     $                      IERR )
+               NWORK = IE + M
+*
+*              Perform bidiagonal SVD, computing singular values only
+*              (Workspace: need M+BDSPAC)
+*
+               CALL DBDSDC( 'U', 'N', M, S, WORK( IE ), DUM, 1, DUM, 1,
+     $                      DUM, IDUM, WORK( NWORK ), IWORK, INFO )
+*
+            ELSE IF( WNTQO ) THEN
+*
+*              Path 2t (N much larger than M, JOBZ='O')
+*              M right singular vectors to be overwritten on A and
+*              M left singular vectors to be computed in U
+*
+               IVT = 1
+*
+*              IVT is M by M
+*
+               IL = IVT + M*M
+               IF( LWORK.GE.M*N+M*M+3*M+BDSPAC ) THEN
+*
+*                 WORK(IL) is M by N
+*
+                  LDWRKL = M
+                  CHUNK = N
+               ELSE
+                  LDWRKL = M
+                  CHUNK = ( LWORK-M*M ) / M
+               END IF
+               ITAU = IL + LDWRKL*M
+               NWORK = ITAU + M
+*
+*              Compute A=L*Q
+*              (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+*
+               CALL DGELQF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+*
+*              Copy L to WORK(IL), zeroing about above it
+*
+               CALL DLACPY( 'L', M, M, A, LDA, WORK( IL ), LDWRKL )
+               CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
+     $                      WORK( IL+LDWRKL ), LDWRKL )
+*
+*              Generate Q in A
+*              (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+*
+               CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
+     $                      WORK( NWORK ), LWORK-NWORK+1, IERR )
+               IE = ITAU
+               ITAUQ = IE + M
+               ITAUP = ITAUQ + M
+               NWORK = ITAUP + M
+*
+*              Bidiagonalize L in WORK(IL)
+*              (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
+*
+               CALL DGEBRD( M, M, WORK( IL ), LDWRKL, S, WORK( IE ),
+     $                      WORK( ITAUQ ), WORK( ITAUP ), WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+*
+*              Perform bidiagonal SVD, computing left singular vectors
+*              of bidiagonal matrix in U, and computing right singular
+*              vectors of bidiagonal matrix in WORK(IVT)
+*              (Workspace: need M+M*M+BDSPAC)
+*
+               CALL DBDSDC( 'U', 'I', M, S, WORK( IE ), U, LDU,
+     $                      WORK( IVT ), M, DUM, IDUM, WORK( NWORK ),
+     $                      IWORK, INFO )
+*
+*              Overwrite U by left singular vectors of L and WORK(IVT)
+*              by right singular vectors of L
+*              (Workspace: need 2*M*M+3*M, prefer 2*M*M+2*M+M*NB)
+*
+               CALL DORMBR( 'Q', 'L', 'N', M, M, M, WORK( IL ), LDWRKL,
+     $                      WORK( ITAUQ ), U, LDU, WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+               CALL DORMBR( 'P', 'R', 'T', M, M, M, WORK( IL ), LDWRKL,
+     $                      WORK( ITAUP ), WORK( IVT ), M,
+     $                      WORK( NWORK ), LWORK-NWORK+1, IERR )
+*
+*              Multiply right singular vectors of L in WORK(IVT) by Q
+*              in A, storing result in WORK(IL) and copying to A
+*              (Workspace: need 2*M*M, prefer M*M+M*N)
+*
+               DO 30 I = 1, N, CHUNK
+                  BLK = MIN( N-I+1, CHUNK )
+                  CALL DGEMM( 'N', 'N', M, BLK, M, ONE, WORK( IVT ), M,
+     $                        A( 1, I ), LDA, ZERO, WORK( IL ), LDWRKL )
+                  CALL DLACPY( 'F', M, BLK, WORK( IL ), LDWRKL,
+     $                         A( 1, I ), LDA )
+   30          CONTINUE
+*
+            ELSE IF( WNTQS ) THEN
+*
+*              Path 3t (N much larger than M, JOBZ='S')
+*              M right singular vectors to be computed in VT and
+*              M left singular vectors to be computed in U
+*
+               IL = 1
+*
+*              WORK(IL) is M by M
+*
+               LDWRKL = M
+               ITAU = IL + LDWRKL*M
+               NWORK = ITAU + M
+*
+*              Compute A=L*Q
+*              (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+*
+               CALL DGELQF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+*
+*              Copy L to WORK(IL), zeroing out above it
+*
+               CALL DLACPY( 'L', M, M, A, LDA, WORK( IL ), LDWRKL )
+               CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
+     $                      WORK( IL+LDWRKL ), LDWRKL )
+*
+*              Generate Q in A
+*              (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+*
+               CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
+     $                      WORK( NWORK ), LWORK-NWORK+1, IERR )
+               IE = ITAU
+               ITAUQ = IE + M
+               ITAUP = ITAUQ + M
+               NWORK = ITAUP + M
+*
+*              Bidiagonalize L in WORK(IU), copying result to U
+*              (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
+*
+               CALL DGEBRD( M, M, WORK( IL ), LDWRKL, S, WORK( IE ),
+     $                      WORK( ITAUQ ), WORK( ITAUP ), WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+*
+*              Perform bidiagonal SVD, computing left singular vectors
+*              of bidiagonal matrix in U and computing right singular
+*              vectors of bidiagonal matrix in VT
+*              (Workspace: need M+BDSPAC)
+*
+               CALL DBDSDC( 'U', 'I', M, S, WORK( IE ), U, LDU, VT,
+     $                      LDVT, DUM, IDUM, WORK( NWORK ), IWORK,
+     $                      INFO )
+*
+*              Overwrite U by left singular vectors of L and VT
+*              by right singular vectors of L
+*              (Workspace: need M*M+3*M, prefer M*M+2*M+M*NB)
+*
+               CALL DORMBR( 'Q', 'L', 'N', M, M, M, WORK( IL ), LDWRKL,
+     $                      WORK( ITAUQ ), U, LDU, WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+               CALL DORMBR( 'P', 'R', 'T', M, M, M, WORK( IL ), LDWRKL,
+     $                      WORK( ITAUP ), VT, LDVT, WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+*
+*              Multiply right singular vectors of L in WORK(IL) by
+*              Q in A, storing result in VT
+*              (Workspace: need M*M)
+*
+               CALL DLACPY( 'F', M, M, VT, LDVT, WORK( IL ), LDWRKL )
+               CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IL ), LDWRKL,
+     $                     A, LDA, ZERO, VT, LDVT )
+*
+            ELSE IF( WNTQA ) THEN
+*
+*              Path 4t (N much larger than M, JOBZ='A')
+*              N right singular vectors to be computed in VT and
+*              M left singular vectors to be computed in U
+*
+               IVT = 1
+*
+*              WORK(IVT) is M by M
+*
+               LDWKVT = M
+               ITAU = IVT + LDWKVT*M
+               NWORK = ITAU + M
+*
+*              Compute A=L*Q, copying result to VT
+*              (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+*
+               CALL DGELQF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+               CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
+*
+*              Generate Q in VT
+*              (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+*
+               CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
+     $                      WORK( NWORK ), LWORK-NWORK+1, IERR )
+*
+*              Produce L in A, zeroing out other entries
+*
+               CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ), LDA )
+               IE = ITAU
+               ITAUQ = IE + M
+               ITAUP = ITAUQ + M
+               NWORK = ITAUP + M
+*
+*              Bidiagonalize L in A
+*              (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
+*
+               CALL DGEBRD( M, M, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
+     $                      WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1,
+     $                      IERR )
+*
+*              Perform bidiagonal SVD, computing left singular vectors
+*              of bidiagonal matrix in U and computing right singular
+*              vectors of bidiagonal matrix in WORK(IVT)
+*              (Workspace: need M+M*M+BDSPAC)
+*
+               CALL DBDSDC( 'U', 'I', M, S, WORK( IE ), U, LDU,
+     $                      WORK( IVT ), LDWKVT, DUM, IDUM,
+     $                      WORK( NWORK ), IWORK, INFO )
+*
+*              Overwrite U by left singular vectors of L and WORK(IVT)
+*              by right singular vectors of L
+*              (Workspace: need M*M+3*M, prefer M*M+2*M+M*NB)
+*
+               CALL DORMBR( 'Q', 'L', 'N', M, M, M, A, LDA,
+     $                      WORK( ITAUQ ), U, LDU, WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+               CALL DORMBR( 'P', 'R', 'T', M, M, M, A, LDA,
+     $                      WORK( ITAUP ), WORK( IVT ), LDWKVT,
+     $                      WORK( NWORK ), LWORK-NWORK+1, IERR )
+*
+*              Multiply right singular vectors of L in WORK(IVT) by
+*              Q in VT, storing result in A
+*              (Workspace: need M*M)
+*
+               CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IVT ), LDWKVT,
+     $                     VT, LDVT, ZERO, A, LDA )
+*
+*              Copy right singular vectors of A from A to VT
+*
+               CALL DLACPY( 'F', M, N, A, LDA, VT, LDVT )
+*
+            END IF
+*
+         ELSE
+*
+*           N .LT. MNTHR
+*
+*           Path 5t (N greater than M, but not much larger)
+*           Reduce to bidiagonal form without LQ decomposition
+*
+            IE = 1
+            ITAUQ = IE + M
+            ITAUP = ITAUQ + M
+            NWORK = ITAUP + M
+*
+*           Bidiagonalize A
+*           (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
+*
+            CALL DGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
+     $                   WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1,
+     $                   IERR )
+            IF( WNTQN ) THEN
+*
+*              Perform bidiagonal SVD, only computing singular values
+*              (Workspace: need M+BDSPAC)
+*
+               CALL DBDSDC( 'L', 'N', M, S, WORK( IE ), DUM, 1, DUM, 1,
+     $                      DUM, IDUM, WORK( NWORK ), IWORK, INFO )
+            ELSE IF( WNTQO ) THEN
+               LDWKVT = M
+               IVT = NWORK
+               IF( LWORK.GE.M*N+3*M+BDSPAC ) THEN
+*
+*                 WORK( IVT ) is M by N
+*
+                  CALL DLASET( 'F', M, N, ZERO, ZERO, WORK( IVT ),
+     $                         LDWKVT )
+                  NWORK = IVT + LDWKVT*N
+               ELSE
+*
+*                 WORK( IVT ) is M by M
+*
+                  NWORK = IVT + LDWKVT*M
+                  IL = NWORK
+*
+*                 WORK(IL) is M by CHUNK
+*
+                  CHUNK = ( LWORK-M*M-3*M ) / M
+               END IF
+*
+*              Perform bidiagonal SVD, computing left singular vectors
+*              of bidiagonal matrix in U and computing right singular
+*              vectors of bidiagonal matrix in WORK(IVT)
+*              (Workspace: need M*M+BDSPAC)
+*
+               CALL DBDSDC( 'L', 'I', M, S, WORK( IE ), U, LDU,
+     $                      WORK( IVT ), LDWKVT, DUM, IDUM,
+     $                      WORK( NWORK ), IWORK, INFO )
+*
+*              Overwrite U by left singular vectors of A
+*              (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+*
+               CALL DORMBR( 'Q', 'L', 'N', M, M, N, A, LDA,
+     $                      WORK( ITAUQ ), U, LDU, WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+*
+               IF( LWORK.GE.M*N+3*M+BDSPAC ) THEN
+*
+*                 Overwrite WORK(IVT) by left singular vectors of A
+*                 (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+*
+                  CALL DORMBR( 'P', 'R', 'T', M, N, M, A, LDA,
+     $                         WORK( ITAUP ), WORK( IVT ), LDWKVT,
+     $                         WORK( NWORK ), LWORK-NWORK+1, IERR )
+*
+*                 Copy right singular vectors of A from WORK(IVT) to A
+*
+                  CALL DLACPY( 'F', M, N, WORK( IVT ), LDWKVT, A, LDA )
+               ELSE
+*
+*                 Generate P**T in A
+*                 (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+*
+                  CALL DORGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ),
+     $                         WORK( NWORK ), LWORK-NWORK+1, IERR )
+*
+*                 Multiply Q in A by right singular vectors of
+*                 bidiagonal matrix in WORK(IVT), storing result in
+*                 WORK(IL) and copying to A
+*                 (Workspace: need 2*M*M, prefer M*M+M*N)
+*
+                  DO 40 I = 1, N, CHUNK
+                     BLK = MIN( N-I+1, CHUNK )
+                     CALL DGEMM( 'N', 'N', M, BLK, M, ONE, WORK( IVT ),
+     $                           LDWKVT, A( 1, I ), LDA, ZERO,
+     $                           WORK( IL ), M )
+                     CALL DLACPY( 'F', M, BLK, WORK( IL ), M, A( 1, I ),
+     $                            LDA )
+   40             CONTINUE
+               END IF
+            ELSE IF( WNTQS ) THEN
+*
+*              Perform bidiagonal SVD, computing left singular vectors
+*              of bidiagonal matrix in U and computing right singular
+*              vectors of bidiagonal matrix in VT
+*              (Workspace: need M+BDSPAC)
+*
+               CALL DLASET( 'F', M, N, ZERO, ZERO, VT, LDVT )
+               CALL DBDSDC( 'L', 'I', M, S, WORK( IE ), U, LDU, VT,
+     $                      LDVT, DUM, IDUM, WORK( NWORK ), IWORK,
+     $                      INFO )
+*
+*              Overwrite U by left singular vectors of A and VT
+*              by right singular vectors of A
+*              (Workspace: need 3*M, prefer 2*M+M*NB)
+*
+               CALL DORMBR( 'Q', 'L', 'N', M, M, N, A, LDA,
+     $                      WORK( ITAUQ ), U, LDU, WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+               CALL DORMBR( 'P', 'R', 'T', M, N, M, A, LDA,
+     $                      WORK( ITAUP ), VT, LDVT, WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+            ELSE IF( WNTQA ) THEN
+*
+*              Perform bidiagonal SVD, computing left singular vectors
+*              of bidiagonal matrix in U and computing right singular
+*              vectors of bidiagonal matrix in VT
+*              (Workspace: need M+BDSPAC)
+*
+               CALL DLASET( 'F', N, N, ZERO, ZERO, VT, LDVT )
+               CALL DBDSDC( 'L', 'I', M, S, WORK( IE ), U, LDU, VT,
+     $                      LDVT, DUM, IDUM, WORK( NWORK ), IWORK,
+     $                      INFO )
+*
+*              Set the right corner of VT to identity matrix
+*
+               CALL DLASET( 'F', N-M, N-M, ZERO, ONE, VT( M+1, M+1 ),
+     $                      LDVT )
+*
+*              Overwrite U by left singular vectors of A and VT
+*              by right singular vectors of A
+*              (Workspace: need 2*M+N, prefer 2*M+N*NB)
+*
+               CALL DORMBR( 'Q', 'L', 'N', M, M, N, A, LDA,
+     $                      WORK( ITAUQ ), U, LDU, WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+               CALL DORMBR( 'P', 'R', 'T', N, N, M, A, LDA,
+     $                      WORK( ITAUP ), VT, LDVT, WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+            END IF
+*
+         END IF
+*
+      END IF
+*
+*     Undo scaling if necessary
+*
+      IF( ISCL.EQ.1 ) THEN
+         IF( ANRM.GT.BIGNUM )
+     $      CALL DLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN,
+     $                   IERR )
+         IF( ANRM.LT.SMLNUM )
+     $      CALL DLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN,
+     $                   IERR )
+      END IF
+*
+*     Return optimal workspace in WORK(1)
+*
+      WORK( 1 ) = DBLE( MAXWRK )
+*
+      RETURN
+*
+*     End of DGESDD
+*
+      END
diff --git a/costa/native/external/lapack/dgesv.f b/costa/native/external/lapack/dgesv.f
new file mode 100644
index 000000000..3dbd27ff0
--- /dev/null
+++ b/costa/native/external/lapack/dgesv.f
@@ -0,0 +1,108 @@
+      SUBROUTINE DGESV( N, NRHS, A, LDA, IPIV, B, LDB, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, LDB, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      DOUBLE PRECISION   A( LDA, * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DGESV computes the solution to a real system of linear equations
+*     A * X = B,
+*  where A is an N-by-N matrix and X and B are N-by-NRHS matrices.
+*
+*  The LU decomposition with partial pivoting and row interchanges is
+*  used to factor A as
+*     A = P * L * U,
+*  where P is a permutation matrix, L is unit lower triangular, and U is
+*  upper triangular.  The factored form of A is then used to solve the
+*  system of equations A * X = B.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The number of linear equations, i.e., the order of the
+*          matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+*          On entry, the N-by-N coefficient matrix A.
+*          On exit, the factors L and U from the factorization
+*          A = P*L*U; the unit diagonal elements of L are not stored.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  IPIV    (output) INTEGER array, dimension (N)
+*          The pivot indices that define the permutation matrix P;
+*          row i of the matrix was interchanged with row IPIV(i).
+*
+*  B       (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS)
+*          On entry, the N-by-NRHS matrix of right hand side matrix B.
+*          On exit, if INFO = 0, the N-by-NRHS solution matrix X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, U(i,i) is exactly zero.  The factorization
+*                has been completed, but the factor U is exactly
+*                singular, so the solution could not be computed.
+*
+*  =====================================================================
+*
+*     .. External Subroutines ..
+      EXTERNAL           DGETRF, DGETRS, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( N.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -4
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DGESV ', -INFO )
+         RETURN
+      END IF
+*
+*     Compute the LU factorization of A.
+*
+      CALL DGETRF( N, N, A, LDA, IPIV, INFO )
+      IF( INFO.EQ.0 ) THEN
+*
+*        Solve the system A*X = B, overwriting B with X.
+*
+         CALL DGETRS( 'No transpose', N, NRHS, A, LDA, IPIV, B, LDB,
+     $                INFO )
+      END IF
+      RETURN
+*
+*     End of DGESV
+*
+      END
diff --git a/costa/native/external/lapack/dgesvd.f b/costa/native/external/lapack/dgesvd.f
new file mode 100644
index 000000000..95b9b3e87
--- /dev/null
+++ b/costa/native/external/lapack/dgesvd.f
@@ -0,0 +1,3417 @@
+      SUBROUTINE DGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT,
+     $                   WORK, LWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBU, JOBVT
+      INTEGER            INFO, LDA, LDU, LDVT, LWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), S( * ), U( LDU, * ),
+     $                   VT( LDVT, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DGESVD computes the singular value decomposition (SVD) of a real
+*  M-by-N matrix A, optionally computing the left and/or right singular
+*  vectors. The SVD is written
+*
+*       A = U * SIGMA * transpose(V)
+*
+*  where SIGMA is an M-by-N matrix which is zero except for its
+*  min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and
+*  V is an N-by-N orthogonal matrix.  The diagonal elements of SIGMA
+*  are the singular values of A; they are real and non-negative, and
+*  are returned in descending order.  The first min(m,n) columns of
+*  U and V are the left and right singular vectors of A.
+*
+*  Note that the routine returns V**T, not V.
+*
+*  Arguments
+*  =========
+*
+*  JOBU    (input) CHARACTER*1
+*          Specifies options for computing all or part of the matrix U:
+*          = 'A':  all M columns of U are returned in array U:
+*          = 'S':  the first min(m,n) columns of U (the left singular
+*                  vectors) are returned in the array U;
+*          = 'O':  the first min(m,n) columns of U (the left singular
+*                  vectors) are overwritten on the array A;
+*          = 'N':  no columns of U (no left singular vectors) are
+*                  computed.
+*
+*  JOBVT   (input) CHARACTER*1
+*          Specifies options for computing all or part of the matrix
+*          V**T:
+*          = 'A':  all N rows of V**T are returned in the array VT;
+*          = 'S':  the first min(m,n) rows of V**T (the right singular
+*                  vectors) are returned in the array VT;
+*          = 'O':  the first min(m,n) rows of V**T (the right singular
+*                  vectors) are overwritten on the array A;
+*          = 'N':  no rows of V**T (no right singular vectors) are
+*                  computed.
+*
+*          JOBVT and JOBU cannot both be 'O'.
+*
+*  M       (input) INTEGER
+*          The number of rows of the input matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the input matrix A.  N >= 0.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+*          On entry, the M-by-N matrix A.
+*          On exit,
+*          if JOBU = 'O',  A is overwritten with the first min(m,n)
+*                          columns of U (the left singular vectors,
+*                          stored columnwise);
+*          if JOBVT = 'O', A is overwritten with the first min(m,n)
+*                          rows of V**T (the right singular vectors,
+*                          stored rowwise);
+*          if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A
+*                          are destroyed.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  S       (output) DOUBLE PRECISION array, dimension (min(M,N))
+*          The singular values of A, sorted so that S(i) >= S(i+1).
+*
+*  U       (output) DOUBLE PRECISION array, dimension (LDU,UCOL)
+*          (LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'.
+*          If JOBU = 'A', U contains the M-by-M orthogonal matrix U;
+*          if JOBU = 'S', U contains the first min(m,n) columns of U
+*          (the left singular vectors, stored columnwise);
+*          if JOBU = 'N' or 'O', U is not referenced.
+*
+*  LDU     (input) INTEGER
+*          The leading dimension of the array U.  LDU >= 1; if
+*          JOBU = 'S' or 'A', LDU >= M.
+*
+*  VT      (output) DOUBLE PRECISION array, dimension (LDVT,N)
+*          If JOBVT = 'A', VT contains the N-by-N orthogonal matrix
+*          V**T;
+*          if JOBVT = 'S', VT contains the first min(m,n) rows of
+*          V**T (the right singular vectors, stored rowwise);
+*          if JOBVT = 'N' or 'O', VT is not referenced.
+*
+*  LDVT    (input) INTEGER
+*          The leading dimension of the array VT.  LDVT >= 1; if
+*          JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N).
+*
+*  WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK;
+*          if INFO > 0, WORK(2:MIN(M,N)) contains the unconverged
+*          superdiagonal elements of an upper bidiagonal matrix B
+*          whose diagonal is in S (not necessarily sorted). B
+*          satisfies A = U * B * VT, so it has the same singular values
+*          as A, and singular vectors related by U and VT.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK. LWORK >= 1.
+*          LWORK >= MAX(3*MIN(M,N)+MAX(M,N),5*MIN(M,N)).
+*          For good performance, LWORK should generally be larger.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          > 0:  if DBDSQR did not converge, INFO specifies how many
+*                superdiagonals of an intermediate bidiagonal form B
+*                did not converge to zero. See the description of WORK
+*                above for details.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY, WNTUA, WNTUAS, WNTUN, WNTUO, WNTUS,
+     $                   WNTVA, WNTVAS, WNTVN, WNTVO, WNTVS
+      INTEGER            BDSPAC, BLK, CHUNK, I, IE, IERR, IR, ISCL,
+     $                   ITAU, ITAUP, ITAUQ, IU, IWORK, LDWRKR, LDWRKU,
+     $                   MAXWRK, MINMN, MINWRK, MNTHR, NCU, NCVT, NRU,
+     $                   NRVT, WRKBL
+      DOUBLE PRECISION   ANRM, BIGNUM, EPS, SMLNUM
+*     ..
+*     .. Local Arrays ..
+      DOUBLE PRECISION   DUM( 1 )
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DBDSQR, DGEBRD, DGELQF, DGEMM, DGEQRF, DLACPY,
+     $                   DLASCL, DLASET, DORGBR, DORGLQ, DORGQR, DORMBR,
+     $                   XERBLA
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      DOUBLE PRECISION   DLAMCH, DLANGE
+      EXTERNAL           LSAME, ILAENV, DLAMCH, DLANGE
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      MINMN = MIN( M, N )
+      MNTHR = ILAENV( 6, 'DGESVD', JOBU // JOBVT, M, N, 0, 0 )
+      WNTUA = LSAME( JOBU, 'A' )
+      WNTUS = LSAME( JOBU, 'S' )
+      WNTUAS = WNTUA .OR. WNTUS
+      WNTUO = LSAME( JOBU, 'O' )
+      WNTUN = LSAME( JOBU, 'N' )
+      WNTVA = LSAME( JOBVT, 'A' )
+      WNTVS = LSAME( JOBVT, 'S' )
+      WNTVAS = WNTVA .OR. WNTVS
+      WNTVO = LSAME( JOBVT, 'O' )
+      WNTVN = LSAME( JOBVT, 'N' )
+      MINWRK = 1
+      LQUERY = ( LWORK.EQ.-1 )
+*
+      IF( .NOT.( WNTUA .OR. WNTUS .OR. WNTUO .OR. WNTUN ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( WNTVA .OR. WNTVS .OR. WNTVO .OR. WNTVN ) .OR.
+     $         ( WNTVO .AND. WNTUO ) ) THEN
+         INFO = -2
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -6
+      ELSE IF( LDU.LT.1 .OR. ( WNTUAS .AND. LDU.LT.M ) ) THEN
+         INFO = -9
+      ELSE IF( LDVT.LT.1 .OR. ( WNTVA .AND. LDVT.LT.N ) .OR.
+     $         ( WNTVS .AND. LDVT.LT.MINMN ) ) THEN
+         INFO = -11
+      END IF
+*
+*     Compute workspace
+*      (Note: Comments in the code beginning "Workspace:" describe the
+*       minimal amount of workspace needed at that point in the code,
+*       as well as the preferred amount for good performance.
+*       NB refers to the optimal block size for the immediately
+*       following subroutine, as returned by ILAENV.)
+*
+      IF( INFO.EQ.0 .AND. ( LWORK.GE.1 .OR. LQUERY ) .AND. M.GT.0 .AND.
+     $    N.GT.0 ) THEN
+         IF( M.GE.N ) THEN
+*
+*           Compute space needed for DBDSQR
+*
+            BDSPAC = 5*N
+            IF( M.GE.MNTHR ) THEN
+               IF( WNTUN ) THEN
+*
+*                 Path 1 (M much larger than N, JOBU='N')
+*
+                  MAXWRK = N + N*ILAENV( 1, 'DGEQRF', ' ', M, N, -1,
+     $                     -1 )
+                  MAXWRK = MAX( MAXWRK, 3*N+2*N*
+     $                     ILAENV( 1, 'DGEBRD', ' ', N, N, -1, -1 ) )
+                  IF( WNTVO .OR. WNTVAS )
+     $               MAXWRK = MAX( MAXWRK, 3*N+( N-1 )*
+     $                        ILAENV( 1, 'DORGBR', 'P', N, N, N, -1 ) )
+                  MAXWRK = MAX( MAXWRK, BDSPAC )
+                  MINWRK = MAX( 4*N, BDSPAC )
+                  MAXWRK = MAX( MAXWRK, MINWRK )
+               ELSE IF( WNTUO .AND. WNTVN ) THEN
+*
+*                 Path 2 (M much larger than N, JOBU='O', JOBVT='N')
+*
+                  WRKBL = N + N*ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 )
+                  WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'DORGQR', ' ', M,
+     $                    N, N, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*N+2*N*
+     $                    ILAENV( 1, 'DGEBRD', ' ', N, N, -1, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*N+N*
+     $                    ILAENV( 1, 'DORGBR', 'Q', N, N, N, -1 ) )
+                  WRKBL = MAX( WRKBL, BDSPAC )
+                  MAXWRK = MAX( N*N+WRKBL, N*N+M*N+N )
+                  MINWRK = MAX( 3*N+M, BDSPAC )
+                  MAXWRK = MAX( MAXWRK, MINWRK )
+               ELSE IF( WNTUO .AND. WNTVAS ) THEN
+*
+*                 Path 3 (M much larger than N, JOBU='O', JOBVT='S' or
+*                 'A')
+*
+                  WRKBL = N + N*ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 )
+                  WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'DORGQR', ' ', M,
+     $                    N, N, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*N+2*N*
+     $                    ILAENV( 1, 'DGEBRD', ' ', N, N, -1, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*N+N*
+     $                    ILAENV( 1, 'DORGBR', 'Q', N, N, N, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*N+( N-1 )*
+     $                    ILAENV( 1, 'DORGBR', 'P', N, N, N, -1 ) )
+                  WRKBL = MAX( WRKBL, BDSPAC )
+                  MAXWRK = MAX( N*N+WRKBL, N*N+M*N+N )
+                  MINWRK = MAX( 3*N+M, BDSPAC )
+                  MAXWRK = MAX( MAXWRK, MINWRK )
+               ELSE IF( WNTUS .AND. WNTVN ) THEN
+*
+*                 Path 4 (M much larger than N, JOBU='S', JOBVT='N')
+*
+                  WRKBL = N + N*ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 )
+                  WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'DORGQR', ' ', M,
+     $                    N, N, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*N+2*N*
+     $                    ILAENV( 1, 'DGEBRD', ' ', N, N, -1, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*N+N*
+     $                    ILAENV( 1, 'DORGBR', 'Q', N, N, N, -1 ) )
+                  WRKBL = MAX( WRKBL, BDSPAC )
+                  MAXWRK = N*N + WRKBL
+                  MINWRK = MAX( 3*N+M, BDSPAC )
+                  MAXWRK = MAX( MAXWRK, MINWRK )
+               ELSE IF( WNTUS .AND. WNTVO ) THEN
+*
+*                 Path 5 (M much larger than N, JOBU='S', JOBVT='O')
+*
+                  WRKBL = N + N*ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 )
+                  WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'DORGQR', ' ', M,
+     $                    N, N, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*N+2*N*
+     $                    ILAENV( 1, 'DGEBRD', ' ', N, N, -1, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*N+N*
+     $                    ILAENV( 1, 'DORGBR', 'Q', N, N, N, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*N+( N-1 )*
+     $                    ILAENV( 1, 'DORGBR', 'P', N, N, N, -1 ) )
+                  WRKBL = MAX( WRKBL, BDSPAC )
+                  MAXWRK = 2*N*N + WRKBL
+                  MINWRK = MAX( 3*N+M, BDSPAC )
+                  MAXWRK = MAX( MAXWRK, MINWRK )
+               ELSE IF( WNTUS .AND. WNTVAS ) THEN
+*
+*                 Path 6 (M much larger than N, JOBU='S', JOBVT='S' or
+*                 'A')
+*
+                  WRKBL = N + N*ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 )
+                  WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'DORGQR', ' ', M,
+     $                    N, N, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*N+2*N*
+     $                    ILAENV( 1, 'DGEBRD', ' ', N, N, -1, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*N+N*
+     $                    ILAENV( 1, 'DORGBR', 'Q', N, N, N, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*N+( N-1 )*
+     $                    ILAENV( 1, 'DORGBR', 'P', N, N, N, -1 ) )
+                  WRKBL = MAX( WRKBL, BDSPAC )
+                  MAXWRK = N*N + WRKBL
+                  MINWRK = MAX( 3*N+M, BDSPAC )
+                  MAXWRK = MAX( MAXWRK, MINWRK )
+               ELSE IF( WNTUA .AND. WNTVN ) THEN
+*
+*                 Path 7 (M much larger than N, JOBU='A', JOBVT='N')
+*
+                  WRKBL = N + N*ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 )
+                  WRKBL = MAX( WRKBL, N+M*ILAENV( 1, 'DORGQR', ' ', M,
+     $                    M, N, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*N+2*N*
+     $                    ILAENV( 1, 'DGEBRD', ' ', N, N, -1, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*N+N*
+     $                    ILAENV( 1, 'DORGBR', 'Q', N, N, N, -1 ) )
+                  WRKBL = MAX( WRKBL, BDSPAC )
+                  MAXWRK = N*N + WRKBL
+                  MINWRK = MAX( 3*N+M, BDSPAC )
+                  MAXWRK = MAX( MAXWRK, MINWRK )
+               ELSE IF( WNTUA .AND. WNTVO ) THEN
+*
+*                 Path 8 (M much larger than N, JOBU='A', JOBVT='O')
+*
+                  WRKBL = N + N*ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 )
+                  WRKBL = MAX( WRKBL, N+M*ILAENV( 1, 'DORGQR', ' ', M,
+     $                    M, N, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*N+2*N*
+     $                    ILAENV( 1, 'DGEBRD', ' ', N, N, -1, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*N+N*
+     $                    ILAENV( 1, 'DORGBR', 'Q', N, N, N, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*N+( N-1 )*
+     $                    ILAENV( 1, 'DORGBR', 'P', N, N, N, -1 ) )
+                  WRKBL = MAX( WRKBL, BDSPAC )
+                  MAXWRK = 2*N*N + WRKBL
+                  MINWRK = MAX( 3*N+M, BDSPAC )
+                  MAXWRK = MAX( MAXWRK, MINWRK )
+               ELSE IF( WNTUA .AND. WNTVAS ) THEN
+*
+*                 Path 9 (M much larger than N, JOBU='A', JOBVT='S' or
+*                 'A')
+*
+                  WRKBL = N + N*ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 )
+                  WRKBL = MAX( WRKBL, N+M*ILAENV( 1, 'DORGQR', ' ', M,
+     $                    M, N, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*N+2*N*
+     $                    ILAENV( 1, 'DGEBRD', ' ', N, N, -1, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*N+N*
+     $                    ILAENV( 1, 'DORGBR', 'Q', N, N, N, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*N+( N-1 )*
+     $                    ILAENV( 1, 'DORGBR', 'P', N, N, N, -1 ) )
+                  WRKBL = MAX( WRKBL, BDSPAC )
+                  MAXWRK = N*N + WRKBL
+                  MINWRK = MAX( 3*N+M, BDSPAC )
+                  MAXWRK = MAX( MAXWRK, MINWRK )
+               END IF
+            ELSE
+*
+*              Path 10 (M at least N, but not much larger)
+*
+               MAXWRK = 3*N + ( M+N )*ILAENV( 1, 'DGEBRD', ' ', M, N,
+     $                  -1, -1 )
+               IF( WNTUS .OR. WNTUO )
+     $            MAXWRK = MAX( MAXWRK, 3*N+N*
+     $                     ILAENV( 1, 'DORGBR', 'Q', M, N, N, -1 ) )
+               IF( WNTUA )
+     $            MAXWRK = MAX( MAXWRK, 3*N+M*
+     $                     ILAENV( 1, 'DORGBR', 'Q', M, M, N, -1 ) )
+               IF( .NOT.WNTVN )
+     $            MAXWRK = MAX( MAXWRK, 3*N+( N-1 )*
+     $                     ILAENV( 1, 'DORGBR', 'P', N, N, N, -1 ) )
+               MAXWRK = MAX( MAXWRK, BDSPAC )
+               MINWRK = MAX( 3*N+M, BDSPAC )
+               MAXWRK = MAX( MAXWRK, MINWRK )
+            END IF
+         ELSE
+*
+*           Compute space needed for DBDSQR
+*
+            BDSPAC = 5*M
+            IF( N.GE.MNTHR ) THEN
+               IF( WNTVN ) THEN
+*
+*                 Path 1t(N much larger than M, JOBVT='N')
+*
+                  MAXWRK = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1,
+     $                     -1 )
+                  MAXWRK = MAX( MAXWRK, 3*M+2*M*
+     $                     ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) )
+                  IF( WNTUO .OR. WNTUAS )
+     $               MAXWRK = MAX( MAXWRK, 3*M+M*
+     $                        ILAENV( 1, 'DORGBR', 'Q', M, M, M, -1 ) )
+                  MAXWRK = MAX( MAXWRK, BDSPAC )
+                  MINWRK = MAX( 4*M, BDSPAC )
+                  MAXWRK = MAX( MAXWRK, MINWRK )
+               ELSE IF( WNTVO .AND. WNTUN ) THEN
+*
+*                 Path 2t(N much larger than M, JOBU='N', JOBVT='O')
+*
+                  WRKBL = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 )
+                  WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'DORGLQ', ' ', M,
+     $                    N, M, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*M+2*M*
+     $                    ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*M+( M-1 )*
+     $                    ILAENV( 1, 'DORGBR', 'P', M, M, M, -1 ) )
+                  WRKBL = MAX( WRKBL, BDSPAC )
+                  MAXWRK = MAX( M*M+WRKBL, M*M+M*N+M )
+                  MINWRK = MAX( 3*M+N, BDSPAC )
+                  MAXWRK = MAX( MAXWRK, MINWRK )
+               ELSE IF( WNTVO .AND. WNTUAS ) THEN
+*
+*                 Path 3t(N much larger than M, JOBU='S' or 'A',
+*                 JOBVT='O')
+*
+                  WRKBL = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 )
+                  WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'DORGLQ', ' ', M,
+     $                    N, M, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*M+2*M*
+     $                    ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*M+( M-1 )*
+     $                    ILAENV( 1, 'DORGBR', 'P', M, M, M, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*M+M*
+     $                    ILAENV( 1, 'DORGBR', 'Q', M, M, M, -1 ) )
+                  WRKBL = MAX( WRKBL, BDSPAC )
+                  MAXWRK = MAX( M*M+WRKBL, M*M+M*N+M )
+                  MINWRK = MAX( 3*M+N, BDSPAC )
+                  MAXWRK = MAX( MAXWRK, MINWRK )
+               ELSE IF( WNTVS .AND. WNTUN ) THEN
+*
+*                 Path 4t(N much larger than M, JOBU='N', JOBVT='S')
+*
+                  WRKBL = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 )
+                  WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'DORGLQ', ' ', M,
+     $                    N, M, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*M+2*M*
+     $                    ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*M+( M-1 )*
+     $                    ILAENV( 1, 'DORGBR', 'P', M, M, M, -1 ) )
+                  WRKBL = MAX( WRKBL, BDSPAC )
+                  MAXWRK = M*M + WRKBL
+                  MINWRK = MAX( 3*M+N, BDSPAC )
+                  MAXWRK = MAX( MAXWRK, MINWRK )
+               ELSE IF( WNTVS .AND. WNTUO ) THEN
+*
+*                 Path 5t(N much larger than M, JOBU='O', JOBVT='S')
+*
+                  WRKBL = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 )
+                  WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'DORGLQ', ' ', M,
+     $                    N, M, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*M+2*M*
+     $                    ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*M+( M-1 )*
+     $                    ILAENV( 1, 'DORGBR', 'P', M, M, M, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*M+M*
+     $                    ILAENV( 1, 'DORGBR', 'Q', M, M, M, -1 ) )
+                  WRKBL = MAX( WRKBL, BDSPAC )
+                  MAXWRK = 2*M*M + WRKBL
+                  MINWRK = MAX( 3*M+N, BDSPAC )
+                  MAXWRK = MAX( MAXWRK, MINWRK )
+               ELSE IF( WNTVS .AND. WNTUAS ) THEN
+*
+*                 Path 6t(N much larger than M, JOBU='S' or 'A',
+*                 JOBVT='S')
+*
+                  WRKBL = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 )
+                  WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'DORGLQ', ' ', M,
+     $                    N, M, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*M+2*M*
+     $                    ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*M+( M-1 )*
+     $                    ILAENV( 1, 'DORGBR', 'P', M, M, M, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*M+M*
+     $                    ILAENV( 1, 'DORGBR', 'Q', M, M, M, -1 ) )
+                  WRKBL = MAX( WRKBL, BDSPAC )
+                  MAXWRK = M*M + WRKBL
+                  MINWRK = MAX( 3*M+N, BDSPAC )
+                  MAXWRK = MAX( MAXWRK, MINWRK )
+               ELSE IF( WNTVA .AND. WNTUN ) THEN
+*
+*                 Path 7t(N much larger than M, JOBU='N', JOBVT='A')
+*
+                  WRKBL = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 )
+                  WRKBL = MAX( WRKBL, M+N*ILAENV( 1, 'DORGLQ', ' ', N,
+     $                    N, M, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*M+2*M*
+     $                    ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*M+( M-1 )*
+     $                    ILAENV( 1, 'DORGBR', 'P', M, M, M, -1 ) )
+                  WRKBL = MAX( WRKBL, BDSPAC )
+                  MAXWRK = M*M + WRKBL
+                  MINWRK = MAX( 3*M+N, BDSPAC )
+                  MAXWRK = MAX( MAXWRK, MINWRK )
+               ELSE IF( WNTVA .AND. WNTUO ) THEN
+*
+*                 Path 8t(N much larger than M, JOBU='O', JOBVT='A')
+*
+                  WRKBL = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 )
+                  WRKBL = MAX( WRKBL, M+N*ILAENV( 1, 'DORGLQ', ' ', N,
+     $                    N, M, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*M+2*M*
+     $                    ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*M+( M-1 )*
+     $                    ILAENV( 1, 'DORGBR', 'P', M, M, M, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*M+M*
+     $                    ILAENV( 1, 'DORGBR', 'Q', M, M, M, -1 ) )
+                  WRKBL = MAX( WRKBL, BDSPAC )
+                  MAXWRK = 2*M*M + WRKBL
+                  MINWRK = MAX( 3*M+N, BDSPAC )
+                  MAXWRK = MAX( MAXWRK, MINWRK )
+               ELSE IF( WNTVA .AND. WNTUAS ) THEN
+*
+*                 Path 9t(N much larger than M, JOBU='S' or 'A',
+*                 JOBVT='A')
+*
+                  WRKBL = M + M*ILAENV( 1, 'DGELQF', ' ', M, N, -1, -1 )
+                  WRKBL = MAX( WRKBL, M+N*ILAENV( 1, 'DORGLQ', ' ', N,
+     $                    N, M, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*M+2*M*
+     $                    ILAENV( 1, 'DGEBRD', ' ', M, M, -1, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*M+( M-1 )*
+     $                    ILAENV( 1, 'DORGBR', 'P', M, M, M, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*M+M*
+     $                    ILAENV( 1, 'DORGBR', 'Q', M, M, M, -1 ) )
+                  WRKBL = MAX( WRKBL, BDSPAC )
+                  MAXWRK = M*M + WRKBL
+                  MINWRK = MAX( 3*M+N, BDSPAC )
+                  MAXWRK = MAX( MAXWRK, MINWRK )
+               END IF
+            ELSE
+*
+*              Path 10t(N greater than M, but not much larger)
+*
+               MAXWRK = 3*M + ( M+N )*ILAENV( 1, 'DGEBRD', ' ', M, N,
+     $                  -1, -1 )
+               IF( WNTVS .OR. WNTVO )
+     $            MAXWRK = MAX( MAXWRK, 3*M+M*
+     $                     ILAENV( 1, 'DORGBR', 'P', M, N, M, -1 ) )
+               IF( WNTVA )
+     $            MAXWRK = MAX( MAXWRK, 3*M+N*
+     $                     ILAENV( 1, 'DORGBR', 'P', N, N, M, -1 ) )
+               IF( .NOT.WNTUN )
+     $            MAXWRK = MAX( MAXWRK, 3*M+( M-1 )*
+     $                     ILAENV( 1, 'DORGBR', 'Q', M, M, M, -1 ) )
+               MAXWRK = MAX( MAXWRK, BDSPAC )
+               MINWRK = MAX( 3*M+N, BDSPAC )
+               MAXWRK = MAX( MAXWRK, MINWRK )
+            END IF
+         END IF
+         WORK( 1 ) = MAXWRK
+      END IF
+*
+      IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN
+         INFO = -13
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DGESVD', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 ) THEN
+         IF( LWORK.GE.1 )
+     $      WORK( 1 ) = ONE
+         RETURN
+      END IF
+*
+*     Get machine constants
+*
+      EPS = DLAMCH( 'P' )
+      SMLNUM = SQRT( DLAMCH( 'S' ) ) / EPS
+      BIGNUM = ONE / SMLNUM
+*
+*     Scale A if max element outside range [SMLNUM,BIGNUM]
+*
+      ANRM = DLANGE( 'M', M, N, A, LDA, DUM )
+      ISCL = 0
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
+         ISCL = 1
+         CALL DLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, IERR )
+      ELSE IF( ANRM.GT.BIGNUM ) THEN
+         ISCL = 1
+         CALL DLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, IERR )
+      END IF
+*
+      IF( M.GE.N ) THEN
+*
+*        A has at least as many rows as columns. If A has sufficiently
+*        more rows than columns, first reduce using the QR
+*        decomposition (if sufficient workspace available)
+*
+         IF( M.GE.MNTHR ) THEN
+*
+            IF( WNTUN ) THEN
+*
+*              Path 1 (M much larger than N, JOBU='N')
+*              No left singular vectors to be computed
+*
+               ITAU = 1
+               IWORK = ITAU + N
+*
+*              Compute A=Q*R
+*              (Workspace: need 2*N, prefer N+N*NB)
+*
+               CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ),
+     $                      LWORK-IWORK+1, IERR )
+*
+*              Zero out below R
+*
+               CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ), LDA )
+               IE = 1
+               ITAUQ = IE + N
+               ITAUP = ITAUQ + N
+               IWORK = ITAUP + N
+*
+*              Bidiagonalize R in A
+*              (Workspace: need 4*N, prefer 3*N+2*N*NB)
+*
+               CALL DGEBRD( N, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
+     $                      WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
+     $                      IERR )
+               NCVT = 0
+               IF( WNTVO .OR. WNTVAS ) THEN
+*
+*                 If right singular vectors desired, generate P'.
+*                 (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
+*
+                  CALL DORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+                  NCVT = N
+               END IF
+               IWORK = IE + N
+*
+*              Perform bidiagonal QR iteration, computing right
+*              singular vectors of A in A if desired
+*              (Workspace: need BDSPAC)
+*
+               CALL DBDSQR( 'U', N, NCVT, 0, 0, S, WORK( IE ), A, LDA,
+     $                      DUM, 1, DUM, 1, WORK( IWORK ), INFO )
+*
+*              If right singular vectors desired in VT, copy them there
+*
+               IF( WNTVAS )
+     $            CALL DLACPY( 'F', N, N, A, LDA, VT, LDVT )
+*
+            ELSE IF( WNTUO .AND. WNTVN ) THEN
+*
+*              Path 2 (M much larger than N, JOBU='O', JOBVT='N')
+*              N left singular vectors to be overwritten on A and
+*              no right singular vectors to be computed
+*
+               IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN
+*
+*                 Sufficient workspace for a fast algorithm
+*
+                  IR = 1
+                  IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+LDA*N ) THEN
+*
+*                    WORK(IU) is LDA by N, WORK(IR) is LDA by N
+*
+                     LDWRKU = LDA
+                     LDWRKR = LDA
+                  ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+N*N ) THEN
+*
+*                    WORK(IU) is LDA by N, WORK(IR) is N by N
+*
+                     LDWRKU = LDA
+                     LDWRKR = N
+                  ELSE
+*
+*                    WORK(IU) is LDWRKU by N, WORK(IR) is N by N
+*
+                     LDWRKU = ( LWORK-N*N-N ) / N
+                     LDWRKR = N
+                  END IF
+                  ITAU = IR + LDWRKR*N
+                  IWORK = ITAU + N
+*
+*                 Compute A=Q*R
+*                 (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+*
+                  CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                 Copy R to WORK(IR) and zero out below it
+*
+                  CALL DLACPY( 'U', N, N, A, LDA, WORK( IR ), LDWRKR )
+                  CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, WORK( IR+1 ),
+     $                         LDWRKR )
+*
+*                 Generate Q in A
+*                 (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+*
+                  CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+                  IE = ITAU
+                  ITAUQ = IE + N
+                  ITAUP = ITAUQ + N
+                  IWORK = ITAUP + N
+*
+*                 Bidiagonalize R in WORK(IR)
+*                 (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
+*
+                  CALL DGEBRD( N, N, WORK( IR ), LDWRKR, S, WORK( IE ),
+     $                         WORK( ITAUQ ), WORK( ITAUP ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                 Generate left vectors bidiagonalizing R
+*                 (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
+*
+                  CALL DORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
+     $                         WORK( ITAUQ ), WORK( IWORK ),
+     $                         LWORK-IWORK+1, IERR )
+                  IWORK = IE + N
+*
+*                 Perform bidiagonal QR iteration, computing left
+*                 singular vectors of R in WORK(IR)
+*                 (Workspace: need N*N+BDSPAC)
+*
+                  CALL DBDSQR( 'U', N, 0, N, 0, S, WORK( IE ), DUM, 1,
+     $                         WORK( IR ), LDWRKR, DUM, 1,
+     $                         WORK( IWORK ), INFO )
+                  IU = IE + N
+*
+*                 Multiply Q in A by left singular vectors of R in
+*                 WORK(IR), storing result in WORK(IU) and copying to A
+*                 (Workspace: need N*N+2*N, prefer N*N+M*N+N)
+*
+                  DO 10 I = 1, M, LDWRKU
+                     CHUNK = MIN( M-I+1, LDWRKU )
+                     CALL DGEMM( 'N', 'N', CHUNK, N, N, ONE, A( I, 1 ),
+     $                           LDA, WORK( IR ), LDWRKR, ZERO,
+     $                           WORK( IU ), LDWRKU )
+                     CALL DLACPY( 'F', CHUNK, N, WORK( IU ), LDWRKU,
+     $                            A( I, 1 ), LDA )
+   10             CONTINUE
+*
+               ELSE
+*
+*                 Insufficient workspace for a fast algorithm
+*
+                  IE = 1
+                  ITAUQ = IE + N
+                  ITAUP = ITAUQ + N
+                  IWORK = ITAUP + N
+*
+*                 Bidiagonalize A
+*                 (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB)
+*
+                  CALL DGEBRD( M, N, A, LDA, S, WORK( IE ),
+     $                         WORK( ITAUQ ), WORK( ITAUP ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                 Generate left vectors bidiagonalizing A
+*                 (Workspace: need 4*N, prefer 3*N+N*NB)
+*
+                  CALL DORGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+                  IWORK = IE + N
+*
+*                 Perform bidiagonal QR iteration, computing left
+*                 singular vectors of A in A
+*                 (Workspace: need BDSPAC)
+*
+                  CALL DBDSQR( 'U', N, 0, M, 0, S, WORK( IE ), DUM, 1,
+     $                         A, LDA, DUM, 1, WORK( IWORK ), INFO )
+*
+               END IF
+*
+            ELSE IF( WNTUO .AND. WNTVAS ) THEN
+*
+*              Path 3 (M much larger than N, JOBU='O', JOBVT='S' or 'A')
+*              N left singular vectors to be overwritten on A and
+*              N right singular vectors to be computed in VT
+*
+               IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN
+*
+*                 Sufficient workspace for a fast algorithm
+*
+                  IR = 1
+                  IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+LDA*N ) THEN
+*
+*                    WORK(IU) is LDA by N and WORK(IR) is LDA by N
+*
+                     LDWRKU = LDA
+                     LDWRKR = LDA
+                  ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+N*N ) THEN
+*
+*                    WORK(IU) is LDA by N and WORK(IR) is N by N
+*
+                     LDWRKU = LDA
+                     LDWRKR = N
+                  ELSE
+*
+*                    WORK(IU) is LDWRKU by N and WORK(IR) is N by N
+*
+                     LDWRKU = ( LWORK-N*N-N ) / N
+                     LDWRKR = N
+                  END IF
+                  ITAU = IR + LDWRKR*N
+                  IWORK = ITAU + N
+*
+*                 Compute A=Q*R
+*                 (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+*
+                  CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                 Copy R to VT, zeroing out below it
+*
+                  CALL DLACPY( 'U', N, N, A, LDA, VT, LDVT )
+                  CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, VT( 2, 1 ),
+     $                         LDVT )
+*
+*                 Generate Q in A
+*                 (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+*
+                  CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+                  IE = ITAU
+                  ITAUQ = IE + N
+                  ITAUP = ITAUQ + N
+                  IWORK = ITAUP + N
+*
+*                 Bidiagonalize R in VT, copying result to WORK(IR)
+*                 (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
+*
+                  CALL DGEBRD( N, N, VT, LDVT, S, WORK( IE ),
+     $                         WORK( ITAUQ ), WORK( ITAUP ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+                  CALL DLACPY( 'L', N, N, VT, LDVT, WORK( IR ), LDWRKR )
+*
+*                 Generate left vectors bidiagonalizing R in WORK(IR)
+*                 (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
+*
+                  CALL DORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
+     $                         WORK( ITAUQ ), WORK( IWORK ),
+     $                         LWORK-IWORK+1, IERR )
+*
+*                 Generate right vectors bidiagonalizing R in VT
+*                 (Workspace: need N*N+4*N-1, prefer N*N+3*N+(N-1)*NB)
+*
+                  CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+                  IWORK = IE + N
+*
+*                 Perform bidiagonal QR iteration, computing left
+*                 singular vectors of R in WORK(IR) and computing right
+*                 singular vectors of R in VT
+*                 (Workspace: need N*N+BDSPAC)
+*
+                  CALL DBDSQR( 'U', N, N, N, 0, S, WORK( IE ), VT, LDVT,
+     $                         WORK( IR ), LDWRKR, DUM, 1,
+     $                         WORK( IWORK ), INFO )
+                  IU = IE + N
+*
+*                 Multiply Q in A by left singular vectors of R in
+*                 WORK(IR), storing result in WORK(IU) and copying to A
+*                 (Workspace: need N*N+2*N, prefer N*N+M*N+N)
+*
+                  DO 20 I = 1, M, LDWRKU
+                     CHUNK = MIN( M-I+1, LDWRKU )
+                     CALL DGEMM( 'N', 'N', CHUNK, N, N, ONE, A( I, 1 ),
+     $                           LDA, WORK( IR ), LDWRKR, ZERO,
+     $                           WORK( IU ), LDWRKU )
+                     CALL DLACPY( 'F', CHUNK, N, WORK( IU ), LDWRKU,
+     $                            A( I, 1 ), LDA )
+   20             CONTINUE
+*
+               ELSE
+*
+*                 Insufficient workspace for a fast algorithm
+*
+                  ITAU = 1
+                  IWORK = ITAU + N
+*
+*                 Compute A=Q*R
+*                 (Workspace: need 2*N, prefer N+N*NB)
+*
+                  CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                 Copy R to VT, zeroing out below it
+*
+                  CALL DLACPY( 'U', N, N, A, LDA, VT, LDVT )
+                  CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, VT( 2, 1 ),
+     $                         LDVT )
+*
+*                 Generate Q in A
+*                 (Workspace: need 2*N, prefer N+N*NB)
+*
+                  CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+                  IE = ITAU
+                  ITAUQ = IE + N
+                  ITAUP = ITAUQ + N
+                  IWORK = ITAUP + N
+*
+*                 Bidiagonalize R in VT
+*                 (Workspace: need 4*N, prefer 3*N+2*N*NB)
+*
+                  CALL DGEBRD( N, N, VT, LDVT, S, WORK( IE ),
+     $                         WORK( ITAUQ ), WORK( ITAUP ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                 Multiply Q in A by left vectors bidiagonalizing R
+*                 (Workspace: need 3*N+M, prefer 3*N+M*NB)
+*
+                  CALL DORMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT,
+     $                         WORK( ITAUQ ), A, LDA, WORK( IWORK ),
+     $                         LWORK-IWORK+1, IERR )
+*
+*                 Generate right vectors bidiagonalizing R in VT
+*                 (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
+*
+                  CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+                  IWORK = IE + N
+*
+*                 Perform bidiagonal QR iteration, computing left
+*                 singular vectors of A in A and computing right
+*                 singular vectors of A in VT
+*                 (Workspace: need BDSPAC)
+*
+                  CALL DBDSQR( 'U', N, N, M, 0, S, WORK( IE ), VT, LDVT,
+     $                         A, LDA, DUM, 1, WORK( IWORK ), INFO )
+*
+               END IF
+*
+            ELSE IF( WNTUS ) THEN
+*
+               IF( WNTVN ) THEN
+*
+*                 Path 4 (M much larger than N, JOBU='S', JOBVT='N')
+*                 N left singular vectors to be computed in U and
+*                 no right singular vectors to be computed
+*
+                  IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN
+*
+*                    Sufficient workspace for a fast algorithm
+*
+                     IR = 1
+                     IF( LWORK.GE.WRKBL+LDA*N ) THEN
+*
+*                       WORK(IR) is LDA by N
+*
+                        LDWRKR = LDA
+                     ELSE
+*
+*                       WORK(IR) is N by N
+*
+                        LDWRKR = N
+                     END IF
+                     ITAU = IR + LDWRKR*N
+                     IWORK = ITAU + N
+*
+*                    Compute A=Q*R
+*                    (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+*
+                     CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Copy R to WORK(IR), zeroing out below it
+*
+                     CALL DLACPY( 'U', N, N, A, LDA, WORK( IR ),
+     $                            LDWRKR )
+                     CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
+     $                            WORK( IR+1 ), LDWRKR )
+*
+*                    Generate Q in A
+*                    (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+*
+                     CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IE = ITAU
+                     ITAUQ = IE + N
+                     ITAUP = ITAUQ + N
+                     IWORK = ITAUP + N
+*
+*                    Bidiagonalize R in WORK(IR)
+*                    (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
+*
+                     CALL DGEBRD( N, N, WORK( IR ), LDWRKR, S,
+     $                            WORK( IE ), WORK( ITAUQ ),
+     $                            WORK( ITAUP ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+*
+*                    Generate left vectors bidiagonalizing R in WORK(IR)
+*                    (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
+*
+                     CALL DORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
+     $                            WORK( ITAUQ ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+                     IWORK = IE + N
+*
+*                    Perform bidiagonal QR iteration, computing left
+*                    singular vectors of R in WORK(IR)
+*                    (Workspace: need N*N+BDSPAC)
+*
+                     CALL DBDSQR( 'U', N, 0, N, 0, S, WORK( IE ), DUM,
+     $                            1, WORK( IR ), LDWRKR, DUM, 1,
+     $                            WORK( IWORK ), INFO )
+*
+*                    Multiply Q in A by left singular vectors of R in
+*                    WORK(IR), storing result in U
+*                    (Workspace: need N*N)
+*
+                     CALL DGEMM( 'N', 'N', M, N, N, ONE, A, LDA,
+     $                           WORK( IR ), LDWRKR, ZERO, U, LDU )
+*
+                  ELSE
+*
+*                    Insufficient workspace for a fast algorithm
+*
+                     ITAU = 1
+                     IWORK = ITAU + N
+*
+*                    Compute A=Q*R, copying result to U
+*                    (Workspace: need 2*N, prefer N+N*NB)
+*
+                     CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
+*
+*                    Generate Q in U
+*                    (Workspace: need 2*N, prefer N+N*NB)
+*
+                     CALL DORGQR( M, N, N, U, LDU, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IE = ITAU
+                     ITAUQ = IE + N
+                     ITAUP = ITAUQ + N
+                     IWORK = ITAUP + N
+*
+*                    Zero out below R in A
+*
+                     CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ),
+     $                            LDA )
+*
+*                    Bidiagonalize R in A
+*                    (Workspace: need 4*N, prefer 3*N+2*N*NB)
+*
+                     CALL DGEBRD( N, N, A, LDA, S, WORK( IE ),
+     $                            WORK( ITAUQ ), WORK( ITAUP ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Multiply Q in U by left vectors bidiagonalizing R
+*                    (Workspace: need 3*N+M, prefer 3*N+M*NB)
+*
+                     CALL DORMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
+     $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+                     IWORK = IE + N
+*
+*                    Perform bidiagonal QR iteration, computing left
+*                    singular vectors of A in U
+*                    (Workspace: need BDSPAC)
+*
+                     CALL DBDSQR( 'U', N, 0, M, 0, S, WORK( IE ), DUM,
+     $                            1, U, LDU, DUM, 1, WORK( IWORK ),
+     $                            INFO )
+*
+                  END IF
+*
+               ELSE IF( WNTVO ) THEN
+*
+*                 Path 5 (M much larger than N, JOBU='S', JOBVT='O')
+*                 N left singular vectors to be computed in U and
+*                 N right singular vectors to be overwritten on A
+*
+                  IF( LWORK.GE.2*N*N+MAX( 4*N, BDSPAC ) ) THEN
+*
+*                    Sufficient workspace for a fast algorithm
+*
+                     IU = 1
+                     IF( LWORK.GE.WRKBL+2*LDA*N ) THEN
+*
+*                       WORK(IU) is LDA by N and WORK(IR) is LDA by N
+*
+                        LDWRKU = LDA
+                        IR = IU + LDWRKU*N
+                        LDWRKR = LDA
+                     ELSE IF( LWORK.GE.WRKBL+( LDA+N )*N ) THEN
+*
+*                       WORK(IU) is LDA by N and WORK(IR) is N by N
+*
+                        LDWRKU = LDA
+                        IR = IU + LDWRKU*N
+                        LDWRKR = N
+                     ELSE
+*
+*                       WORK(IU) is N by N and WORK(IR) is N by N
+*
+                        LDWRKU = N
+                        IR = IU + LDWRKU*N
+                        LDWRKR = N
+                     END IF
+                     ITAU = IR + LDWRKR*N
+                     IWORK = ITAU + N
+*
+*                    Compute A=Q*R
+*                    (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
+*
+                     CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Copy R to WORK(IU), zeroing out below it
+*
+                     CALL DLACPY( 'U', N, N, A, LDA, WORK( IU ),
+     $                            LDWRKU )
+                     CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
+     $                            WORK( IU+1 ), LDWRKU )
+*
+*                    Generate Q in A
+*                    (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
+*
+                     CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IE = ITAU
+                     ITAUQ = IE + N
+                     ITAUP = ITAUQ + N
+                     IWORK = ITAUP + N
+*
+*                    Bidiagonalize R in WORK(IU), copying result to
+*                    WORK(IR)
+*                    (Workspace: need 2*N*N+4*N,
+*                                prefer 2*N*N+3*N+2*N*NB)
+*
+                     CALL DGEBRD( N, N, WORK( IU ), LDWRKU, S,
+     $                            WORK( IE ), WORK( ITAUQ ),
+     $                            WORK( ITAUP ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+                     CALL DLACPY( 'U', N, N, WORK( IU ), LDWRKU,
+     $                            WORK( IR ), LDWRKR )
+*
+*                    Generate left bidiagonalizing vectors in WORK(IU)
+*                    (Workspace: need 2*N*N+4*N, prefer 2*N*N+3*N+N*NB)
+*
+                     CALL DORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
+     $                            WORK( ITAUQ ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+*
+*                    Generate right bidiagonalizing vectors in WORK(IR)
+*                    (Workspace: need 2*N*N+4*N-1,
+*                                prefer 2*N*N+3*N+(N-1)*NB)
+*
+                     CALL DORGBR( 'P', N, N, N, WORK( IR ), LDWRKR,
+     $                            WORK( ITAUP ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+                     IWORK = IE + N
+*
+*                    Perform bidiagonal QR iteration, computing left
+*                    singular vectors of R in WORK(IU) and computing
+*                    right singular vectors of R in WORK(IR)
+*                    (Workspace: need 2*N*N+BDSPAC)
+*
+                     CALL DBDSQR( 'U', N, N, N, 0, S, WORK( IE ),
+     $                            WORK( IR ), LDWRKR, WORK( IU ),
+     $                            LDWRKU, DUM, 1, WORK( IWORK ), INFO )
+*
+*                    Multiply Q in A by left singular vectors of R in
+*                    WORK(IU), storing result in U
+*                    (Workspace: need N*N)
+*
+                     CALL DGEMM( 'N', 'N', M, N, N, ONE, A, LDA,
+     $                           WORK( IU ), LDWRKU, ZERO, U, LDU )
+*
+*                    Copy right singular vectors of R to A
+*                    (Workspace: need N*N)
+*
+                     CALL DLACPY( 'F', N, N, WORK( IR ), LDWRKR, A,
+     $                            LDA )
+*
+                  ELSE
+*
+*                    Insufficient workspace for a fast algorithm
+*
+                     ITAU = 1
+                     IWORK = ITAU + N
+*
+*                    Compute A=Q*R, copying result to U
+*                    (Workspace: need 2*N, prefer N+N*NB)
+*
+                     CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
+*
+*                    Generate Q in U
+*                    (Workspace: need 2*N, prefer N+N*NB)
+*
+                     CALL DORGQR( M, N, N, U, LDU, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IE = ITAU
+                     ITAUQ = IE + N
+                     ITAUP = ITAUQ + N
+                     IWORK = ITAUP + N
+*
+*                    Zero out below R in A
+*
+                     CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ),
+     $                            LDA )
+*
+*                    Bidiagonalize R in A
+*                    (Workspace: need 4*N, prefer 3*N+2*N*NB)
+*
+                     CALL DGEBRD( N, N, A, LDA, S, WORK( IE ),
+     $                            WORK( ITAUQ ), WORK( ITAUP ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Multiply Q in U by left vectors bidiagonalizing R
+*                    (Workspace: need 3*N+M, prefer 3*N+M*NB)
+*
+                     CALL DORMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
+     $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+*
+*                    Generate right vectors bidiagonalizing R in A
+*                    (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
+*
+                     CALL DORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IWORK = IE + N
+*
+*                    Perform bidiagonal QR iteration, computing left
+*                    singular vectors of A in U and computing right
+*                    singular vectors of A in A
+*                    (Workspace: need BDSPAC)
+*
+                     CALL DBDSQR( 'U', N, N, M, 0, S, WORK( IE ), A,
+     $                            LDA, U, LDU, DUM, 1, WORK( IWORK ),
+     $                            INFO )
+*
+                  END IF
+*
+               ELSE IF( WNTVAS ) THEN
+*
+*                 Path 6 (M much larger than N, JOBU='S', JOBVT='S'
+*                         or 'A')
+*                 N left singular vectors to be computed in U and
+*                 N right singular vectors to be computed in VT
+*
+                  IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN
+*
+*                    Sufficient workspace for a fast algorithm
+*
+                     IU = 1
+                     IF( LWORK.GE.WRKBL+LDA*N ) THEN
+*
+*                       WORK(IU) is LDA by N
+*
+                        LDWRKU = LDA
+                     ELSE
+*
+*                       WORK(IU) is N by N
+*
+                        LDWRKU = N
+                     END IF
+                     ITAU = IU + LDWRKU*N
+                     IWORK = ITAU + N
+*
+*                    Compute A=Q*R
+*                    (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+*
+                     CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Copy R to WORK(IU), zeroing out below it
+*
+                     CALL DLACPY( 'U', N, N, A, LDA, WORK( IU ),
+     $                            LDWRKU )
+                     CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
+     $                            WORK( IU+1 ), LDWRKU )
+*
+*                    Generate Q in A
+*                    (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+*
+                     CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IE = ITAU
+                     ITAUQ = IE + N
+                     ITAUP = ITAUQ + N
+                     IWORK = ITAUP + N
+*
+*                    Bidiagonalize R in WORK(IU), copying result to VT
+*                    (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
+*
+                     CALL DGEBRD( N, N, WORK( IU ), LDWRKU, S,
+     $                            WORK( IE ), WORK( ITAUQ ),
+     $                            WORK( ITAUP ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+                     CALL DLACPY( 'U', N, N, WORK( IU ), LDWRKU, VT,
+     $                            LDVT )
+*
+*                    Generate left bidiagonalizing vectors in WORK(IU)
+*                    (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
+*
+                     CALL DORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
+     $                            WORK( ITAUQ ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+*
+*                    Generate right bidiagonalizing vectors in VT
+*                    (Workspace: need N*N+4*N-1,
+*                                prefer N*N+3*N+(N-1)*NB)
+*
+                     CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IWORK = IE + N
+*
+*                    Perform bidiagonal QR iteration, computing left
+*                    singular vectors of R in WORK(IU) and computing
+*                    right singular vectors of R in VT
+*                    (Workspace: need N*N+BDSPAC)
+*
+                     CALL DBDSQR( 'U', N, N, N, 0, S, WORK( IE ), VT,
+     $                            LDVT, WORK( IU ), LDWRKU, DUM, 1,
+     $                            WORK( IWORK ), INFO )
+*
+*                    Multiply Q in A by left singular vectors of R in
+*                    WORK(IU), storing result in U
+*                    (Workspace: need N*N)
+*
+                     CALL DGEMM( 'N', 'N', M, N, N, ONE, A, LDA,
+     $                           WORK( IU ), LDWRKU, ZERO, U, LDU )
+*
+                  ELSE
+*
+*                    Insufficient workspace for a fast algorithm
+*
+                     ITAU = 1
+                     IWORK = ITAU + N
+*
+*                    Compute A=Q*R, copying result to U
+*                    (Workspace: need 2*N, prefer N+N*NB)
+*
+                     CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
+*
+*                    Generate Q in U
+*                    (Workspace: need 2*N, prefer N+N*NB)
+*
+                     CALL DORGQR( M, N, N, U, LDU, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Copy R to VT, zeroing out below it
+*
+                     CALL DLACPY( 'U', N, N, A, LDA, VT, LDVT )
+                     CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, VT( 2, 1 ),
+     $                            LDVT )
+                     IE = ITAU
+                     ITAUQ = IE + N
+                     ITAUP = ITAUQ + N
+                     IWORK = ITAUP + N
+*
+*                    Bidiagonalize R in VT
+*                    (Workspace: need 4*N, prefer 3*N+2*N*NB)
+*
+                     CALL DGEBRD( N, N, VT, LDVT, S, WORK( IE ),
+     $                            WORK( ITAUQ ), WORK( ITAUP ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Multiply Q in U by left bidiagonalizing vectors
+*                    in VT
+*                    (Workspace: need 3*N+M, prefer 3*N+M*NB)
+*
+                     CALL DORMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT,
+     $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+*
+*                    Generate right bidiagonalizing vectors in VT
+*                    (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
+*
+                     CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IWORK = IE + N
+*
+*                    Perform bidiagonal QR iteration, computing left
+*                    singular vectors of A in U and computing right
+*                    singular vectors of A in VT
+*                    (Workspace: need BDSPAC)
+*
+                     CALL DBDSQR( 'U', N, N, M, 0, S, WORK( IE ), VT,
+     $                            LDVT, U, LDU, DUM, 1, WORK( IWORK ),
+     $                            INFO )
+*
+                  END IF
+*
+               END IF
+*
+            ELSE IF( WNTUA ) THEN
+*
+               IF( WNTVN ) THEN
+*
+*                 Path 7 (M much larger than N, JOBU='A', JOBVT='N')
+*                 M left singular vectors to be computed in U and
+*                 no right singular vectors to be computed
+*
+                  IF( LWORK.GE.N*N+MAX( N+M, 4*N, BDSPAC ) ) THEN
+*
+*                    Sufficient workspace for a fast algorithm
+*
+                     IR = 1
+                     IF( LWORK.GE.WRKBL+LDA*N ) THEN
+*
+*                       WORK(IR) is LDA by N
+*
+                        LDWRKR = LDA
+                     ELSE
+*
+*                       WORK(IR) is N by N
+*
+                        LDWRKR = N
+                     END IF
+                     ITAU = IR + LDWRKR*N
+                     IWORK = ITAU + N
+*
+*                    Compute A=Q*R, copying result to U
+*                    (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+*
+                     CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
+*
+*                    Copy R to WORK(IR), zeroing out below it
+*
+                     CALL DLACPY( 'U', N, N, A, LDA, WORK( IR ),
+     $                            LDWRKR )
+                     CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
+     $                            WORK( IR+1 ), LDWRKR )
+*
+*                    Generate Q in U
+*                    (Workspace: need N*N+N+M, prefer N*N+N+M*NB)
+*
+                     CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IE = ITAU
+                     ITAUQ = IE + N
+                     ITAUP = ITAUQ + N
+                     IWORK = ITAUP + N
+*
+*                    Bidiagonalize R in WORK(IR)
+*                    (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
+*
+                     CALL DGEBRD( N, N, WORK( IR ), LDWRKR, S,
+     $                            WORK( IE ), WORK( ITAUQ ),
+     $                            WORK( ITAUP ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+*
+*                    Generate left bidiagonalizing vectors in WORK(IR)
+*                    (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
+*
+                     CALL DORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
+     $                            WORK( ITAUQ ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+                     IWORK = IE + N
+*
+*                    Perform bidiagonal QR iteration, computing left
+*                    singular vectors of R in WORK(IR)
+*                    (Workspace: need N*N+BDSPAC)
+*
+                     CALL DBDSQR( 'U', N, 0, N, 0, S, WORK( IE ), DUM,
+     $                            1, WORK( IR ), LDWRKR, DUM, 1,
+     $                            WORK( IWORK ), INFO )
+*
+*                    Multiply Q in U by left singular vectors of R in
+*                    WORK(IR), storing result in A
+*                    (Workspace: need N*N)
+*
+                     CALL DGEMM( 'N', 'N', M, N, N, ONE, U, LDU,
+     $                           WORK( IR ), LDWRKR, ZERO, A, LDA )
+*
+*                    Copy left singular vectors of A from A to U
+*
+                     CALL DLACPY( 'F', M, N, A, LDA, U, LDU )
+*
+                  ELSE
+*
+*                    Insufficient workspace for a fast algorithm
+*
+                     ITAU = 1
+                     IWORK = ITAU + N
+*
+*                    Compute A=Q*R, copying result to U
+*                    (Workspace: need 2*N, prefer N+N*NB)
+*
+                     CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
+*
+*                    Generate Q in U
+*                    (Workspace: need N+M, prefer N+M*NB)
+*
+                     CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IE = ITAU
+                     ITAUQ = IE + N
+                     ITAUP = ITAUQ + N
+                     IWORK = ITAUP + N
+*
+*                    Zero out below R in A
+*
+                     CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ),
+     $                            LDA )
+*
+*                    Bidiagonalize R in A
+*                    (Workspace: need 4*N, prefer 3*N+2*N*NB)
+*
+                     CALL DGEBRD( N, N, A, LDA, S, WORK( IE ),
+     $                            WORK( ITAUQ ), WORK( ITAUP ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Multiply Q in U by left bidiagonalizing vectors
+*                    in A
+*                    (Workspace: need 3*N+M, prefer 3*N+M*NB)
+*
+                     CALL DORMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
+     $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+                     IWORK = IE + N
+*
+*                    Perform bidiagonal QR iteration, computing left
+*                    singular vectors of A in U
+*                    (Workspace: need BDSPAC)
+*
+                     CALL DBDSQR( 'U', N, 0, M, 0, S, WORK( IE ), DUM,
+     $                            1, U, LDU, DUM, 1, WORK( IWORK ),
+     $                            INFO )
+*
+                  END IF
+*
+               ELSE IF( WNTVO ) THEN
+*
+*                 Path 8 (M much larger than N, JOBU='A', JOBVT='O')
+*                 M left singular vectors to be computed in U and
+*                 N right singular vectors to be overwritten on A
+*
+                  IF( LWORK.GE.2*N*N+MAX( N+M, 4*N, BDSPAC ) ) THEN
+*
+*                    Sufficient workspace for a fast algorithm
+*
+                     IU = 1
+                     IF( LWORK.GE.WRKBL+2*LDA*N ) THEN
+*
+*                       WORK(IU) is LDA by N and WORK(IR) is LDA by N
+*
+                        LDWRKU = LDA
+                        IR = IU + LDWRKU*N
+                        LDWRKR = LDA
+                     ELSE IF( LWORK.GE.WRKBL+( LDA+N )*N ) THEN
+*
+*                       WORK(IU) is LDA by N and WORK(IR) is N by N
+*
+                        LDWRKU = LDA
+                        IR = IU + LDWRKU*N
+                        LDWRKR = N
+                     ELSE
+*
+*                       WORK(IU) is N by N and WORK(IR) is N by N
+*
+                        LDWRKU = N
+                        IR = IU + LDWRKU*N
+                        LDWRKR = N
+                     END IF
+                     ITAU = IR + LDWRKR*N
+                     IWORK = ITAU + N
+*
+*                    Compute A=Q*R, copying result to U
+*                    (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
+*
+                     CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
+*
+*                    Generate Q in U
+*                    (Workspace: need 2*N*N+N+M, prefer 2*N*N+N+M*NB)
+*
+                     CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Copy R to WORK(IU), zeroing out below it
+*
+                     CALL DLACPY( 'U', N, N, A, LDA, WORK( IU ),
+     $                            LDWRKU )
+                     CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
+     $                            WORK( IU+1 ), LDWRKU )
+                     IE = ITAU
+                     ITAUQ = IE + N
+                     ITAUP = ITAUQ + N
+                     IWORK = ITAUP + N
+*
+*                    Bidiagonalize R in WORK(IU), copying result to
+*                    WORK(IR)
+*                    (Workspace: need 2*N*N+4*N,
+*                                prefer 2*N*N+3*N+2*N*NB)
+*
+                     CALL DGEBRD( N, N, WORK( IU ), LDWRKU, S,
+     $                            WORK( IE ), WORK( ITAUQ ),
+     $                            WORK( ITAUP ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+                     CALL DLACPY( 'U', N, N, WORK( IU ), LDWRKU,
+     $                            WORK( IR ), LDWRKR )
+*
+*                    Generate left bidiagonalizing vectors in WORK(IU)
+*                    (Workspace: need 2*N*N+4*N, prefer 2*N*N+3*N+N*NB)
+*
+                     CALL DORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
+     $                            WORK( ITAUQ ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+*
+*                    Generate right bidiagonalizing vectors in WORK(IR)
+*                    (Workspace: need 2*N*N+4*N-1,
+*                                prefer 2*N*N+3*N+(N-1)*NB)
+*
+                     CALL DORGBR( 'P', N, N, N, WORK( IR ), LDWRKR,
+     $                            WORK( ITAUP ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+                     IWORK = IE + N
+*
+*                    Perform bidiagonal QR iteration, computing left
+*                    singular vectors of R in WORK(IU) and computing
+*                    right singular vectors of R in WORK(IR)
+*                    (Workspace: need 2*N*N+BDSPAC)
+*
+                     CALL DBDSQR( 'U', N, N, N, 0, S, WORK( IE ),
+     $                            WORK( IR ), LDWRKR, WORK( IU ),
+     $                            LDWRKU, DUM, 1, WORK( IWORK ), INFO )
+*
+*                    Multiply Q in U by left singular vectors of R in
+*                    WORK(IU), storing result in A
+*                    (Workspace: need N*N)
+*
+                     CALL DGEMM( 'N', 'N', M, N, N, ONE, U, LDU,
+     $                           WORK( IU ), LDWRKU, ZERO, A, LDA )
+*
+*                    Copy left singular vectors of A from A to U
+*
+                     CALL DLACPY( 'F', M, N, A, LDA, U, LDU )
+*
+*                    Copy right singular vectors of R from WORK(IR) to A
+*
+                     CALL DLACPY( 'F', N, N, WORK( IR ), LDWRKR, A,
+     $                            LDA )
+*
+                  ELSE
+*
+*                    Insufficient workspace for a fast algorithm
+*
+                     ITAU = 1
+                     IWORK = ITAU + N
+*
+*                    Compute A=Q*R, copying result to U
+*                    (Workspace: need 2*N, prefer N+N*NB)
+*
+                     CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
+*
+*                    Generate Q in U
+*                    (Workspace: need N+M, prefer N+M*NB)
+*
+                     CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IE = ITAU
+                     ITAUQ = IE + N
+                     ITAUP = ITAUQ + N
+                     IWORK = ITAUP + N
+*
+*                    Zero out below R in A
+*
+                     CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ),
+     $                            LDA )
+*
+*                    Bidiagonalize R in A
+*                    (Workspace: need 4*N, prefer 3*N+2*N*NB)
+*
+                     CALL DGEBRD( N, N, A, LDA, S, WORK( IE ),
+     $                            WORK( ITAUQ ), WORK( ITAUP ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Multiply Q in U by left bidiagonalizing vectors
+*                    in A
+*                    (Workspace: need 3*N+M, prefer 3*N+M*NB)
+*
+                     CALL DORMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
+     $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+*
+*                    Generate right bidiagonalizing vectors in A
+*                    (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
+*
+                     CALL DORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IWORK = IE + N
+*
+*                    Perform bidiagonal QR iteration, computing left
+*                    singular vectors of A in U and computing right
+*                    singular vectors of A in A
+*                    (Workspace: need BDSPAC)
+*
+                     CALL DBDSQR( 'U', N, N, M, 0, S, WORK( IE ), A,
+     $                            LDA, U, LDU, DUM, 1, WORK( IWORK ),
+     $                            INFO )
+*
+                  END IF
+*
+               ELSE IF( WNTVAS ) THEN
+*
+*                 Path 9 (M much larger than N, JOBU='A', JOBVT='S'
+*                         or 'A')
+*                 M left singular vectors to be computed in U and
+*                 N right singular vectors to be computed in VT
+*
+                  IF( LWORK.GE.N*N+MAX( N+M, 4*N, BDSPAC ) ) THEN
+*
+*                    Sufficient workspace for a fast algorithm
+*
+                     IU = 1
+                     IF( LWORK.GE.WRKBL+LDA*N ) THEN
+*
+*                       WORK(IU) is LDA by N
+*
+                        LDWRKU = LDA
+                     ELSE
+*
+*                       WORK(IU) is N by N
+*
+                        LDWRKU = N
+                     END IF
+                     ITAU = IU + LDWRKU*N
+                     IWORK = ITAU + N
+*
+*                    Compute A=Q*R, copying result to U
+*                    (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+*
+                     CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
+*
+*                    Generate Q in U
+*                    (Workspace: need N*N+N+M, prefer N*N+N+M*NB)
+*
+                     CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Copy R to WORK(IU), zeroing out below it
+*
+                     CALL DLACPY( 'U', N, N, A, LDA, WORK( IU ),
+     $                            LDWRKU )
+                     CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
+     $                            WORK( IU+1 ), LDWRKU )
+                     IE = ITAU
+                     ITAUQ = IE + N
+                     ITAUP = ITAUQ + N
+                     IWORK = ITAUP + N
+*
+*                    Bidiagonalize R in WORK(IU), copying result to VT
+*                    (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
+*
+                     CALL DGEBRD( N, N, WORK( IU ), LDWRKU, S,
+     $                            WORK( IE ), WORK( ITAUQ ),
+     $                            WORK( ITAUP ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+                     CALL DLACPY( 'U', N, N, WORK( IU ), LDWRKU, VT,
+     $                            LDVT )
+*
+*                    Generate left bidiagonalizing vectors in WORK(IU)
+*                    (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
+*
+                     CALL DORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
+     $                            WORK( ITAUQ ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+*
+*                    Generate right bidiagonalizing vectors in VT
+*                    (Workspace: need N*N+4*N-1,
+*                                prefer N*N+3*N+(N-1)*NB)
+*
+                     CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IWORK = IE + N
+*
+*                    Perform bidiagonal QR iteration, computing left
+*                    singular vectors of R in WORK(IU) and computing
+*                    right singular vectors of R in VT
+*                    (Workspace: need N*N+BDSPAC)
+*
+                     CALL DBDSQR( 'U', N, N, N, 0, S, WORK( IE ), VT,
+     $                            LDVT, WORK( IU ), LDWRKU, DUM, 1,
+     $                            WORK( IWORK ), INFO )
+*
+*                    Multiply Q in U by left singular vectors of R in
+*                    WORK(IU), storing result in A
+*                    (Workspace: need N*N)
+*
+                     CALL DGEMM( 'N', 'N', M, N, N, ONE, U, LDU,
+     $                           WORK( IU ), LDWRKU, ZERO, A, LDA )
+*
+*                    Copy left singular vectors of A from A to U
+*
+                     CALL DLACPY( 'F', M, N, A, LDA, U, LDU )
+*
+                  ELSE
+*
+*                    Insufficient workspace for a fast algorithm
+*
+                     ITAU = 1
+                     IWORK = ITAU + N
+*
+*                    Compute A=Q*R, copying result to U
+*                    (Workspace: need 2*N, prefer N+N*NB)
+*
+                     CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
+*
+*                    Generate Q in U
+*                    (Workspace: need N+M, prefer N+M*NB)
+*
+                     CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Copy R from A to VT, zeroing out below it
+*
+                     CALL DLACPY( 'U', N, N, A, LDA, VT, LDVT )
+                     CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, VT( 2, 1 ),
+     $                            LDVT )
+                     IE = ITAU
+                     ITAUQ = IE + N
+                     ITAUP = ITAUQ + N
+                     IWORK = ITAUP + N
+*
+*                    Bidiagonalize R in VT
+*                    (Workspace: need 4*N, prefer 3*N+2*N*NB)
+*
+                     CALL DGEBRD( N, N, VT, LDVT, S, WORK( IE ),
+     $                            WORK( ITAUQ ), WORK( ITAUP ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Multiply Q in U by left bidiagonalizing vectors
+*                    in VT
+*                    (Workspace: need 3*N+M, prefer 3*N+M*NB)
+*
+                     CALL DORMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT,
+     $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+*
+*                    Generate right bidiagonalizing vectors in VT
+*                    (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
+*
+                     CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IWORK = IE + N
+*
+*                    Perform bidiagonal QR iteration, computing left
+*                    singular vectors of A in U and computing right
+*                    singular vectors of A in VT
+*                    (Workspace: need BDSPAC)
+*
+                     CALL DBDSQR( 'U', N, N, M, 0, S, WORK( IE ), VT,
+     $                            LDVT, U, LDU, DUM, 1, WORK( IWORK ),
+     $                            INFO )
+*
+                  END IF
+*
+               END IF
+*
+            END IF
+*
+         ELSE
+*
+*           M .LT. MNTHR
+*
+*           Path 10 (M at least N, but not much larger)
+*           Reduce to bidiagonal form without QR decomposition
+*
+            IE = 1
+            ITAUQ = IE + N
+            ITAUP = ITAUQ + N
+            IWORK = ITAUP + N
+*
+*           Bidiagonalize A
+*           (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB)
+*
+            CALL DGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
+     $                   WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
+     $                   IERR )
+            IF( WNTUAS ) THEN
+*
+*              If left singular vectors desired in U, copy result to U
+*              and generate left bidiagonalizing vectors in U
+*              (Workspace: need 3*N+NCU, prefer 3*N+NCU*NB)
+*
+               CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
+               IF( WNTUS )
+     $            NCU = N
+               IF( WNTUA )
+     $            NCU = M
+               CALL DORGBR( 'Q', M, NCU, N, U, LDU, WORK( ITAUQ ),
+     $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
+            END IF
+            IF( WNTVAS ) THEN
+*
+*              If right singular vectors desired in VT, copy result to
+*              VT and generate right bidiagonalizing vectors in VT
+*              (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
+*
+               CALL DLACPY( 'U', N, N, A, LDA, VT, LDVT )
+               CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
+     $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
+            END IF
+            IF( WNTUO ) THEN
+*
+*              If left singular vectors desired in A, generate left
+*              bidiagonalizing vectors in A
+*              (Workspace: need 4*N, prefer 3*N+N*NB)
+*
+               CALL DORGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ),
+     $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
+            END IF
+            IF( WNTVO ) THEN
+*
+*              If right singular vectors desired in A, generate right
+*              bidiagonalizing vectors in A
+*              (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
+*
+               CALL DORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
+     $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
+            END IF
+            IWORK = IE + N
+            IF( WNTUAS .OR. WNTUO )
+     $         NRU = M
+            IF( WNTUN )
+     $         NRU = 0
+            IF( WNTVAS .OR. WNTVO )
+     $         NCVT = N
+            IF( WNTVN )
+     $         NCVT = 0
+            IF( ( .NOT.WNTUO ) .AND. ( .NOT.WNTVO ) ) THEN
+*
+*              Perform bidiagonal QR iteration, if desired, computing
+*              left singular vectors in U and computing right singular
+*              vectors in VT
+*              (Workspace: need BDSPAC)
+*
+               CALL DBDSQR( 'U', N, NCVT, NRU, 0, S, WORK( IE ), VT,
+     $                      LDVT, U, LDU, DUM, 1, WORK( IWORK ), INFO )
+            ELSE IF( ( .NOT.WNTUO ) .AND. WNTVO ) THEN
+*
+*              Perform bidiagonal QR iteration, if desired, computing
+*              left singular vectors in U and computing right singular
+*              vectors in A
+*              (Workspace: need BDSPAC)
+*
+               CALL DBDSQR( 'U', N, NCVT, NRU, 0, S, WORK( IE ), A, LDA,
+     $                      U, LDU, DUM, 1, WORK( IWORK ), INFO )
+            ELSE
+*
+*              Perform bidiagonal QR iteration, if desired, computing
+*              left singular vectors in A and computing right singular
+*              vectors in VT
+*              (Workspace: need BDSPAC)
+*
+               CALL DBDSQR( 'U', N, NCVT, NRU, 0, S, WORK( IE ), VT,
+     $                      LDVT, A, LDA, DUM, 1, WORK( IWORK ), INFO )
+            END IF
+*
+         END IF
+*
+      ELSE
+*
+*        A has more columns than rows. If A has sufficiently more
+*        columns than rows, first reduce using the LQ decomposition (if
+*        sufficient workspace available)
+*
+         IF( N.GE.MNTHR ) THEN
+*
+            IF( WNTVN ) THEN
+*
+*              Path 1t(N much larger than M, JOBVT='N')
+*              No right singular vectors to be computed
+*
+               ITAU = 1
+               IWORK = ITAU + M
+*
+*              Compute A=L*Q
+*              (Workspace: need 2*M, prefer M+M*NB)
+*
+               CALL DGELQF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ),
+     $                      LWORK-IWORK+1, IERR )
+*
+*              Zero out above L
+*
+               CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ), LDA )
+               IE = 1
+               ITAUQ = IE + M
+               ITAUP = ITAUQ + M
+               IWORK = ITAUP + M
+*
+*              Bidiagonalize L in A
+*              (Workspace: need 4*M, prefer 3*M+2*M*NB)
+*
+               CALL DGEBRD( M, M, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
+     $                      WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
+     $                      IERR )
+               IF( WNTUO .OR. WNTUAS ) THEN
+*
+*                 If left singular vectors desired, generate Q
+*                 (Workspace: need 4*M, prefer 3*M+M*NB)
+*
+                  CALL DORGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+               END IF
+               IWORK = IE + M
+               NRU = 0
+               IF( WNTUO .OR. WNTUAS )
+     $            NRU = M
+*
+*              Perform bidiagonal QR iteration, computing left singular
+*              vectors of A in A if desired
+*              (Workspace: need BDSPAC)
+*
+               CALL DBDSQR( 'U', M, 0, NRU, 0, S, WORK( IE ), DUM, 1, A,
+     $                      LDA, DUM, 1, WORK( IWORK ), INFO )
+*
+*              If left singular vectors desired in U, copy them there
+*
+               IF( WNTUAS )
+     $            CALL DLACPY( 'F', M, M, A, LDA, U, LDU )
+*
+            ELSE IF( WNTVO .AND. WNTUN ) THEN
+*
+*              Path 2t(N much larger than M, JOBU='N', JOBVT='O')
+*              M right singular vectors to be overwritten on A and
+*              no left singular vectors to be computed
+*
+               IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN
+*
+*                 Sufficient workspace for a fast algorithm
+*
+                  IR = 1
+                  IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+LDA*M ) THEN
+*
+*                    WORK(IU) is LDA by N and WORK(IR) is LDA by M
+*
+                     LDWRKU = LDA
+                     CHUNK = N
+                     LDWRKR = LDA
+                  ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+M*M ) THEN
+*
+*                    WORK(IU) is LDA by N and WORK(IR) is M by M
+*
+                     LDWRKU = LDA
+                     CHUNK = N
+                     LDWRKR = M
+                  ELSE
+*
+*                    WORK(IU) is M by CHUNK and WORK(IR) is M by M
+*
+                     LDWRKU = M
+                     CHUNK = ( LWORK-M*M-M ) / M
+                     LDWRKR = M
+                  END IF
+                  ITAU = IR + LDWRKR*M
+                  IWORK = ITAU + M
+*
+*                 Compute A=L*Q
+*                 (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+*
+                  CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                 Copy L to WORK(IR) and zero out above it
+*
+                  CALL DLACPY( 'L', M, M, A, LDA, WORK( IR ), LDWRKR )
+                  CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
+     $                         WORK( IR+LDWRKR ), LDWRKR )
+*
+*                 Generate Q in A
+*                 (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+*
+                  CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+                  IE = ITAU
+                  ITAUQ = IE + M
+                  ITAUP = ITAUQ + M
+                  IWORK = ITAUP + M
+*
+*                 Bidiagonalize L in WORK(IR)
+*                 (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
+*
+                  CALL DGEBRD( M, M, WORK( IR ), LDWRKR, S, WORK( IE ),
+     $                         WORK( ITAUQ ), WORK( ITAUP ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                 Generate right vectors bidiagonalizing L
+*                 (Workspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB)
+*
+                  CALL DORGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
+     $                         WORK( ITAUP ), WORK( IWORK ),
+     $                         LWORK-IWORK+1, IERR )
+                  IWORK = IE + M
+*
+*                 Perform bidiagonal QR iteration, computing right
+*                 singular vectors of L in WORK(IR)
+*                 (Workspace: need M*M+BDSPAC)
+*
+                  CALL DBDSQR( 'U', M, M, 0, 0, S, WORK( IE ),
+     $                         WORK( IR ), LDWRKR, DUM, 1, DUM, 1,
+     $                         WORK( IWORK ), INFO )
+                  IU = IE + M
+*
+*                 Multiply right singular vectors of L in WORK(IR) by Q
+*                 in A, storing result in WORK(IU) and copying to A
+*                 (Workspace: need M*M+2*M, prefer M*M+M*N+M)
+*
+                  DO 30 I = 1, N, CHUNK
+                     BLK = MIN( N-I+1, CHUNK )
+                     CALL DGEMM( 'N', 'N', M, BLK, M, ONE, WORK( IR ),
+     $                           LDWRKR, A( 1, I ), LDA, ZERO,
+     $                           WORK( IU ), LDWRKU )
+                     CALL DLACPY( 'F', M, BLK, WORK( IU ), LDWRKU,
+     $                            A( 1, I ), LDA )
+   30             CONTINUE
+*
+               ELSE
+*
+*                 Insufficient workspace for a fast algorithm
+*
+                  IE = 1
+                  ITAUQ = IE + M
+                  ITAUP = ITAUQ + M
+                  IWORK = ITAUP + M
+*
+*                 Bidiagonalize A
+*                 (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
+*
+                  CALL DGEBRD( M, N, A, LDA, S, WORK( IE ),
+     $                         WORK( ITAUQ ), WORK( ITAUP ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                 Generate right vectors bidiagonalizing A
+*                 (Workspace: need 4*M, prefer 3*M+M*NB)
+*
+                  CALL DORGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+                  IWORK = IE + M
+*
+*                 Perform bidiagonal QR iteration, computing right
+*                 singular vectors of A in A
+*                 (Workspace: need BDSPAC)
+*
+                  CALL DBDSQR( 'L', M, N, 0, 0, S, WORK( IE ), A, LDA,
+     $                         DUM, 1, DUM, 1, WORK( IWORK ), INFO )
+*
+               END IF
+*
+            ELSE IF( WNTVO .AND. WNTUAS ) THEN
+*
+*              Path 3t(N much larger than M, JOBU='S' or 'A', JOBVT='O')
+*              M right singular vectors to be overwritten on A and
+*              M left singular vectors to be computed in U
+*
+               IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN
+*
+*                 Sufficient workspace for a fast algorithm
+*
+                  IR = 1
+                  IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+LDA*M ) THEN
+*
+*                    WORK(IU) is LDA by N and WORK(IR) is LDA by M
+*
+                     LDWRKU = LDA
+                     CHUNK = N
+                     LDWRKR = LDA
+                  ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+M*M ) THEN
+*
+*                    WORK(IU) is LDA by N and WORK(IR) is M by M
+*
+                     LDWRKU = LDA
+                     CHUNK = N
+                     LDWRKR = M
+                  ELSE
+*
+*                    WORK(IU) is M by CHUNK and WORK(IR) is M by M
+*
+                     LDWRKU = M
+                     CHUNK = ( LWORK-M*M-M ) / M
+                     LDWRKR = M
+                  END IF
+                  ITAU = IR + LDWRKR*M
+                  IWORK = ITAU + M
+*
+*                 Compute A=L*Q
+*                 (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+*
+                  CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                 Copy L to U, zeroing about above it
+*
+                  CALL DLACPY( 'L', M, M, A, LDA, U, LDU )
+                  CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ),
+     $                         LDU )
+*
+*                 Generate Q in A
+*                 (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+*
+                  CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+                  IE = ITAU
+                  ITAUQ = IE + M
+                  ITAUP = ITAUQ + M
+                  IWORK = ITAUP + M
+*
+*                 Bidiagonalize L in U, copying result to WORK(IR)
+*                 (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
+*
+                  CALL DGEBRD( M, M, U, LDU, S, WORK( IE ),
+     $                         WORK( ITAUQ ), WORK( ITAUP ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+                  CALL DLACPY( 'U', M, M, U, LDU, WORK( IR ), LDWRKR )
+*
+*                 Generate right vectors bidiagonalizing L in WORK(IR)
+*                 (Workspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB)
+*
+                  CALL DORGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
+     $                         WORK( ITAUP ), WORK( IWORK ),
+     $                         LWORK-IWORK+1, IERR )
+*
+*                 Generate left vectors bidiagonalizing L in U
+*                 (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB)
+*
+                  CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+                  IWORK = IE + M
+*
+*                 Perform bidiagonal QR iteration, computing left
+*                 singular vectors of L in U, and computing right
+*                 singular vectors of L in WORK(IR)
+*                 (Workspace: need M*M+BDSPAC)
+*
+                  CALL DBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
+     $                         WORK( IR ), LDWRKR, U, LDU, DUM, 1,
+     $                         WORK( IWORK ), INFO )
+                  IU = IE + M
+*
+*                 Multiply right singular vectors of L in WORK(IR) by Q
+*                 in A, storing result in WORK(IU) and copying to A
+*                 (Workspace: need M*M+2*M, prefer M*M+M*N+M))
+*
+                  DO 40 I = 1, N, CHUNK
+                     BLK = MIN( N-I+1, CHUNK )
+                     CALL DGEMM( 'N', 'N', M, BLK, M, ONE, WORK( IR ),
+     $                           LDWRKR, A( 1, I ), LDA, ZERO,
+     $                           WORK( IU ), LDWRKU )
+                     CALL DLACPY( 'F', M, BLK, WORK( IU ), LDWRKU,
+     $                            A( 1, I ), LDA )
+   40             CONTINUE
+*
+               ELSE
+*
+*                 Insufficient workspace for a fast algorithm
+*
+                  ITAU = 1
+                  IWORK = ITAU + M
+*
+*                 Compute A=L*Q
+*                 (Workspace: need 2*M, prefer M+M*NB)
+*
+                  CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                 Copy L to U, zeroing out above it
+*
+                  CALL DLACPY( 'L', M, M, A, LDA, U, LDU )
+                  CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ),
+     $                         LDU )
+*
+*                 Generate Q in A
+*                 (Workspace: need 2*M, prefer M+M*NB)
+*
+                  CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+                  IE = ITAU
+                  ITAUQ = IE + M
+                  ITAUP = ITAUQ + M
+                  IWORK = ITAUP + M
+*
+*                 Bidiagonalize L in U
+*                 (Workspace: need 4*M, prefer 3*M+2*M*NB)
+*
+                  CALL DGEBRD( M, M, U, LDU, S, WORK( IE ),
+     $                         WORK( ITAUQ ), WORK( ITAUP ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                 Multiply right vectors bidiagonalizing L by Q in A
+*                 (Workspace: need 3*M+N, prefer 3*M+N*NB)
+*
+                  CALL DORMBR( 'P', 'L', 'T', M, N, M, U, LDU,
+     $                         WORK( ITAUP ), A, LDA, WORK( IWORK ),
+     $                         LWORK-IWORK+1, IERR )
+*
+*                 Generate left vectors bidiagonalizing L in U
+*                 (Workspace: need 4*M, prefer 3*M+M*NB)
+*
+                  CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+                  IWORK = IE + M
+*
+*                 Perform bidiagonal QR iteration, computing left
+*                 singular vectors of A in U and computing right
+*                 singular vectors of A in A
+*                 (Workspace: need BDSPAC)
+*
+                  CALL DBDSQR( 'U', M, N, M, 0, S, WORK( IE ), A, LDA,
+     $                         U, LDU, DUM, 1, WORK( IWORK ), INFO )
+*
+               END IF
+*
+            ELSE IF( WNTVS ) THEN
+*
+               IF( WNTUN ) THEN
+*
+*                 Path 4t(N much larger than M, JOBU='N', JOBVT='S')
+*                 M right singular vectors to be computed in VT and
+*                 no left singular vectors to be computed
+*
+                  IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN
+*
+*                    Sufficient workspace for a fast algorithm
+*
+                     IR = 1
+                     IF( LWORK.GE.WRKBL+LDA*M ) THEN
+*
+*                       WORK(IR) is LDA by M
+*
+                        LDWRKR = LDA
+                     ELSE
+*
+*                       WORK(IR) is M by M
+*
+                        LDWRKR = M
+                     END IF
+                     ITAU = IR + LDWRKR*M
+                     IWORK = ITAU + M
+*
+*                    Compute A=L*Q
+*                    (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+*
+                     CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Copy L to WORK(IR), zeroing out above it
+*
+                     CALL DLACPY( 'L', M, M, A, LDA, WORK( IR ),
+     $                            LDWRKR )
+                     CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
+     $                            WORK( IR+LDWRKR ), LDWRKR )
+*
+*                    Generate Q in A
+*                    (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+*
+                     CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IE = ITAU
+                     ITAUQ = IE + M
+                     ITAUP = ITAUQ + M
+                     IWORK = ITAUP + M
+*
+*                    Bidiagonalize L in WORK(IR)
+*                    (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
+*
+                     CALL DGEBRD( M, M, WORK( IR ), LDWRKR, S,
+     $                            WORK( IE ), WORK( ITAUQ ),
+     $                            WORK( ITAUP ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+*
+*                    Generate right vectors bidiagonalizing L in
+*                    WORK(IR)
+*                    (Workspace: need M*M+4*M, prefer M*M+3*M+(M-1)*NB)
+*
+                     CALL DORGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
+     $                            WORK( ITAUP ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+                     IWORK = IE + M
+*
+*                    Perform bidiagonal QR iteration, computing right
+*                    singular vectors of L in WORK(IR)
+*                    (Workspace: need M*M+BDSPAC)
+*
+                     CALL DBDSQR( 'U', M, M, 0, 0, S, WORK( IE ),
+     $                            WORK( IR ), LDWRKR, DUM, 1, DUM, 1,
+     $                            WORK( IWORK ), INFO )
+*
+*                    Multiply right singular vectors of L in WORK(IR) by
+*                    Q in A, storing result in VT
+*                    (Workspace: need M*M)
+*
+                     CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IR ),
+     $                           LDWRKR, A, LDA, ZERO, VT, LDVT )
+*
+                  ELSE
+*
+*                    Insufficient workspace for a fast algorithm
+*
+                     ITAU = 1
+                     IWORK = ITAU + M
+*
+*                    Compute A=L*Q
+*                    (Workspace: need 2*M, prefer M+M*NB)
+*
+                     CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Copy result to VT
+*
+                     CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
+*
+*                    Generate Q in VT
+*                    (Workspace: need 2*M, prefer M+M*NB)
+*
+                     CALL DORGLQ( M, N, M, VT, LDVT, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IE = ITAU
+                     ITAUQ = IE + M
+                     ITAUP = ITAUQ + M
+                     IWORK = ITAUP + M
+*
+*                    Zero out above L in A
+*
+                     CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ),
+     $                            LDA )
+*
+*                    Bidiagonalize L in A
+*                    (Workspace: need 4*M, prefer 3*M+2*M*NB)
+*
+                     CALL DGEBRD( M, M, A, LDA, S, WORK( IE ),
+     $                            WORK( ITAUQ ), WORK( ITAUP ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Multiply right vectors bidiagonalizing L by Q in VT
+*                    (Workspace: need 3*M+N, prefer 3*M+N*NB)
+*
+                     CALL DORMBR( 'P', 'L', 'T', M, N, M, A, LDA,
+     $                            WORK( ITAUP ), VT, LDVT,
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IWORK = IE + M
+*
+*                    Perform bidiagonal QR iteration, computing right
+*                    singular vectors of A in VT
+*                    (Workspace: need BDSPAC)
+*
+                     CALL DBDSQR( 'U', M, N, 0, 0, S, WORK( IE ), VT,
+     $                            LDVT, DUM, 1, DUM, 1, WORK( IWORK ),
+     $                            INFO )
+*
+                  END IF
+*
+               ELSE IF( WNTUO ) THEN
+*
+*                 Path 5t(N much larger than M, JOBU='O', JOBVT='S')
+*                 M right singular vectors to be computed in VT and
+*                 M left singular vectors to be overwritten on A
+*
+                  IF( LWORK.GE.2*M*M+MAX( 4*M, BDSPAC ) ) THEN
+*
+*                    Sufficient workspace for a fast algorithm
+*
+                     IU = 1
+                     IF( LWORK.GE.WRKBL+2*LDA*M ) THEN
+*
+*                       WORK(IU) is LDA by M and WORK(IR) is LDA by M
+*
+                        LDWRKU = LDA
+                        IR = IU + LDWRKU*M
+                        LDWRKR = LDA
+                     ELSE IF( LWORK.GE.WRKBL+( LDA+M )*M ) THEN
+*
+*                       WORK(IU) is LDA by M and WORK(IR) is M by M
+*
+                        LDWRKU = LDA
+                        IR = IU + LDWRKU*M
+                        LDWRKR = M
+                     ELSE
+*
+*                       WORK(IU) is M by M and WORK(IR) is M by M
+*
+                        LDWRKU = M
+                        IR = IU + LDWRKU*M
+                        LDWRKR = M
+                     END IF
+                     ITAU = IR + LDWRKR*M
+                     IWORK = ITAU + M
+*
+*                    Compute A=L*Q
+*                    (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
+*
+                     CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Copy L to WORK(IU), zeroing out below it
+*
+                     CALL DLACPY( 'L', M, M, A, LDA, WORK( IU ),
+     $                            LDWRKU )
+                     CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
+     $                            WORK( IU+LDWRKU ), LDWRKU )
+*
+*                    Generate Q in A
+*                    (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
+*
+                     CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IE = ITAU
+                     ITAUQ = IE + M
+                     ITAUP = ITAUQ + M
+                     IWORK = ITAUP + M
+*
+*                    Bidiagonalize L in WORK(IU), copying result to
+*                    WORK(IR)
+*                    (Workspace: need 2*M*M+4*M,
+*                                prefer 2*M*M+3*M+2*M*NB)
+*
+                     CALL DGEBRD( M, M, WORK( IU ), LDWRKU, S,
+     $                            WORK( IE ), WORK( ITAUQ ),
+     $                            WORK( ITAUP ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+                     CALL DLACPY( 'L', M, M, WORK( IU ), LDWRKU,
+     $                            WORK( IR ), LDWRKR )
+*
+*                    Generate right bidiagonalizing vectors in WORK(IU)
+*                    (Workspace: need 2*M*M+4*M-1,
+*                                prefer 2*M*M+3*M+(M-1)*NB)
+*
+                     CALL DORGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
+     $                            WORK( ITAUP ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+*
+*                    Generate left bidiagonalizing vectors in WORK(IR)
+*                    (Workspace: need 2*M*M+4*M, prefer 2*M*M+3*M+M*NB)
+*
+                     CALL DORGBR( 'Q', M, M, M, WORK( IR ), LDWRKR,
+     $                            WORK( ITAUQ ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+                     IWORK = IE + M
+*
+*                    Perform bidiagonal QR iteration, computing left
+*                    singular vectors of L in WORK(IR) and computing
+*                    right singular vectors of L in WORK(IU)
+*                    (Workspace: need 2*M*M+BDSPAC)
+*
+                     CALL DBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
+     $                            WORK( IU ), LDWRKU, WORK( IR ),
+     $                            LDWRKR, DUM, 1, WORK( IWORK ), INFO )
+*
+*                    Multiply right singular vectors of L in WORK(IU) by
+*                    Q in A, storing result in VT
+*                    (Workspace: need M*M)
+*
+                     CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ),
+     $                           LDWRKU, A, LDA, ZERO, VT, LDVT )
+*
+*                    Copy left singular vectors of L to A
+*                    (Workspace: need M*M)
+*
+                     CALL DLACPY( 'F', M, M, WORK( IR ), LDWRKR, A,
+     $                            LDA )
+*
+                  ELSE
+*
+*                    Insufficient workspace for a fast algorithm
+*
+                     ITAU = 1
+                     IWORK = ITAU + M
+*
+*                    Compute A=L*Q, copying result to VT
+*                    (Workspace: need 2*M, prefer M+M*NB)
+*
+                     CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
+*
+*                    Generate Q in VT
+*                    (Workspace: need 2*M, prefer M+M*NB)
+*
+                     CALL DORGLQ( M, N, M, VT, LDVT, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IE = ITAU
+                     ITAUQ = IE + M
+                     ITAUP = ITAUQ + M
+                     IWORK = ITAUP + M
+*
+*                    Zero out above L in A
+*
+                     CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ),
+     $                            LDA )
+*
+*                    Bidiagonalize L in A
+*                    (Workspace: need 4*M, prefer 3*M+2*M*NB)
+*
+                     CALL DGEBRD( M, M, A, LDA, S, WORK( IE ),
+     $                            WORK( ITAUQ ), WORK( ITAUP ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Multiply right vectors bidiagonalizing L by Q in VT
+*                    (Workspace: need 3*M+N, prefer 3*M+N*NB)
+*
+                     CALL DORMBR( 'P', 'L', 'T', M, N, M, A, LDA,
+     $                            WORK( ITAUP ), VT, LDVT,
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Generate left bidiagonalizing vectors of L in A
+*                    (Workspace: need 4*M, prefer 3*M+M*NB)
+*
+                     CALL DORGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IWORK = IE + M
+*
+*                    Perform bidiagonal QR iteration, compute left
+*                    singular vectors of A in A and compute right
+*                    singular vectors of A in VT
+*                    (Workspace: need BDSPAC)
+*
+                     CALL DBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT,
+     $                            LDVT, A, LDA, DUM, 1, WORK( IWORK ),
+     $                            INFO )
+*
+                  END IF
+*
+               ELSE IF( WNTUAS ) THEN
+*
+*                 Path 6t(N much larger than M, JOBU='S' or 'A',
+*                         JOBVT='S')
+*                 M right singular vectors to be computed in VT and
+*                 M left singular vectors to be computed in U
+*
+                  IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN
+*
+*                    Sufficient workspace for a fast algorithm
+*
+                     IU = 1
+                     IF( LWORK.GE.WRKBL+LDA*M ) THEN
+*
+*                       WORK(IU) is LDA by N
+*
+                        LDWRKU = LDA
+                     ELSE
+*
+*                       WORK(IU) is LDA by M
+*
+                        LDWRKU = M
+                     END IF
+                     ITAU = IU + LDWRKU*M
+                     IWORK = ITAU + M
+*
+*                    Compute A=L*Q
+*                    (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+*
+                     CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Copy L to WORK(IU), zeroing out above it
+*
+                     CALL DLACPY( 'L', M, M, A, LDA, WORK( IU ),
+     $                            LDWRKU )
+                     CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
+     $                            WORK( IU+LDWRKU ), LDWRKU )
+*
+*                    Generate Q in A
+*                    (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+*
+                     CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IE = ITAU
+                     ITAUQ = IE + M
+                     ITAUP = ITAUQ + M
+                     IWORK = ITAUP + M
+*
+*                    Bidiagonalize L in WORK(IU), copying result to U
+*                    (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
+*
+                     CALL DGEBRD( M, M, WORK( IU ), LDWRKU, S,
+     $                            WORK( IE ), WORK( ITAUQ ),
+     $                            WORK( ITAUP ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+                     CALL DLACPY( 'L', M, M, WORK( IU ), LDWRKU, U,
+     $                            LDU )
+*
+*                    Generate right bidiagonalizing vectors in WORK(IU)
+*                    (Workspace: need M*M+4*M-1,
+*                                prefer M*M+3*M+(M-1)*NB)
+*
+                     CALL DORGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
+     $                            WORK( ITAUP ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+*
+*                    Generate left bidiagonalizing vectors in U
+*                    (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB)
+*
+                     CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IWORK = IE + M
+*
+*                    Perform bidiagonal QR iteration, computing left
+*                    singular vectors of L in U and computing right
+*                    singular vectors of L in WORK(IU)
+*                    (Workspace: need M*M+BDSPAC)
+*
+                     CALL DBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
+     $                            WORK( IU ), LDWRKU, U, LDU, DUM, 1,
+     $                            WORK( IWORK ), INFO )
+*
+*                    Multiply right singular vectors of L in WORK(IU) by
+*                    Q in A, storing result in VT
+*                    (Workspace: need M*M)
+*
+                     CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ),
+     $                           LDWRKU, A, LDA, ZERO, VT, LDVT )
+*
+                  ELSE
+*
+*                    Insufficient workspace for a fast algorithm
+*
+                     ITAU = 1
+                     IWORK = ITAU + M
+*
+*                    Compute A=L*Q, copying result to VT
+*                    (Workspace: need 2*M, prefer M+M*NB)
+*
+                     CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
+*
+*                    Generate Q in VT
+*                    (Workspace: need 2*M, prefer M+M*NB)
+*
+                     CALL DORGLQ( M, N, M, VT, LDVT, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Copy L to U, zeroing out above it
+*
+                     CALL DLACPY( 'L', M, M, A, LDA, U, LDU )
+                     CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ),
+     $                            LDU )
+                     IE = ITAU
+                     ITAUQ = IE + M
+                     ITAUP = ITAUQ + M
+                     IWORK = ITAUP + M
+*
+*                    Bidiagonalize L in U
+*                    (Workspace: need 4*M, prefer 3*M+2*M*NB)
+*
+                     CALL DGEBRD( M, M, U, LDU, S, WORK( IE ),
+     $                            WORK( ITAUQ ), WORK( ITAUP ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Multiply right bidiagonalizing vectors in U by Q
+*                    in VT
+*                    (Workspace: need 3*M+N, prefer 3*M+N*NB)
+*
+                     CALL DORMBR( 'P', 'L', 'T', M, N, M, U, LDU,
+     $                            WORK( ITAUP ), VT, LDVT,
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Generate left bidiagonalizing vectors in U
+*                    (Workspace: need 4*M, prefer 3*M+M*NB)
+*
+                     CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IWORK = IE + M
+*
+*                    Perform bidiagonal QR iteration, computing left
+*                    singular vectors of A in U and computing right
+*                    singular vectors of A in VT
+*                    (Workspace: need BDSPAC)
+*
+                     CALL DBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT,
+     $                            LDVT, U, LDU, DUM, 1, WORK( IWORK ),
+     $                            INFO )
+*
+                  END IF
+*
+               END IF
+*
+            ELSE IF( WNTVA ) THEN
+*
+               IF( WNTUN ) THEN
+*
+*                 Path 7t(N much larger than M, JOBU='N', JOBVT='A')
+*                 N right singular vectors to be computed in VT and
+*                 no left singular vectors to be computed
+*
+                  IF( LWORK.GE.M*M+MAX( N+M, 4*M, BDSPAC ) ) THEN
+*
+*                    Sufficient workspace for a fast algorithm
+*
+                     IR = 1
+                     IF( LWORK.GE.WRKBL+LDA*M ) THEN
+*
+*                       WORK(IR) is LDA by M
+*
+                        LDWRKR = LDA
+                     ELSE
+*
+*                       WORK(IR) is M by M
+*
+                        LDWRKR = M
+                     END IF
+                     ITAU = IR + LDWRKR*M
+                     IWORK = ITAU + M
+*
+*                    Compute A=L*Q, copying result to VT
+*                    (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+*
+                     CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
+*
+*                    Copy L to WORK(IR), zeroing out above it
+*
+                     CALL DLACPY( 'L', M, M, A, LDA, WORK( IR ),
+     $                            LDWRKR )
+                     CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
+     $                            WORK( IR+LDWRKR ), LDWRKR )
+*
+*                    Generate Q in VT
+*                    (Workspace: need M*M+M+N, prefer M*M+M+N*NB)
+*
+                     CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IE = ITAU
+                     ITAUQ = IE + M
+                     ITAUP = ITAUQ + M
+                     IWORK = ITAUP + M
+*
+*                    Bidiagonalize L in WORK(IR)
+*                    (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
+*
+                     CALL DGEBRD( M, M, WORK( IR ), LDWRKR, S,
+     $                            WORK( IE ), WORK( ITAUQ ),
+     $                            WORK( ITAUP ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+*
+*                    Generate right bidiagonalizing vectors in WORK(IR)
+*                    (Workspace: need M*M+4*M-1,
+*                                prefer M*M+3*M+(M-1)*NB)
+*
+                     CALL DORGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
+     $                            WORK( ITAUP ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+                     IWORK = IE + M
+*
+*                    Perform bidiagonal QR iteration, computing right
+*                    singular vectors of L in WORK(IR)
+*                    (Workspace: need M*M+BDSPAC)
+*
+                     CALL DBDSQR( 'U', M, M, 0, 0, S, WORK( IE ),
+     $                            WORK( IR ), LDWRKR, DUM, 1, DUM, 1,
+     $                            WORK( IWORK ), INFO )
+*
+*                    Multiply right singular vectors of L in WORK(IR) by
+*                    Q in VT, storing result in A
+*                    (Workspace: need M*M)
+*
+                     CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IR ),
+     $                           LDWRKR, VT, LDVT, ZERO, A, LDA )
+*
+*                    Copy right singular vectors of A from A to VT
+*
+                     CALL DLACPY( 'F', M, N, A, LDA, VT, LDVT )
+*
+                  ELSE
+*
+*                    Insufficient workspace for a fast algorithm
+*
+                     ITAU = 1
+                     IWORK = ITAU + M
+*
+*                    Compute A=L*Q, copying result to VT
+*                    (Workspace: need 2*M, prefer M+M*NB)
+*
+                     CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
+*
+*                    Generate Q in VT
+*                    (Workspace: need M+N, prefer M+N*NB)
+*
+                     CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IE = ITAU
+                     ITAUQ = IE + M
+                     ITAUP = ITAUQ + M
+                     IWORK = ITAUP + M
+*
+*                    Zero out above L in A
+*
+                     CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ),
+     $                            LDA )
+*
+*                    Bidiagonalize L in A
+*                    (Workspace: need 4*M, prefer 3*M+2*M*NB)
+*
+                     CALL DGEBRD( M, M, A, LDA, S, WORK( IE ),
+     $                            WORK( ITAUQ ), WORK( ITAUP ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Multiply right bidiagonalizing vectors in A by Q
+*                    in VT
+*                    (Workspace: need 3*M+N, prefer 3*M+N*NB)
+*
+                     CALL DORMBR( 'P', 'L', 'T', M, N, M, A, LDA,
+     $                            WORK( ITAUP ), VT, LDVT,
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IWORK = IE + M
+*
+*                    Perform bidiagonal QR iteration, computing right
+*                    singular vectors of A in VT
+*                    (Workspace: need BDSPAC)
+*
+                     CALL DBDSQR( 'U', M, N, 0, 0, S, WORK( IE ), VT,
+     $                            LDVT, DUM, 1, DUM, 1, WORK( IWORK ),
+     $                            INFO )
+*
+                  END IF
+*
+               ELSE IF( WNTUO ) THEN
+*
+*                 Path 8t(N much larger than M, JOBU='O', JOBVT='A')
+*                 N right singular vectors to be computed in VT and
+*                 M left singular vectors to be overwritten on A
+*
+                  IF( LWORK.GE.2*M*M+MAX( N+M, 4*M, BDSPAC ) ) THEN
+*
+*                    Sufficient workspace for a fast algorithm
+*
+                     IU = 1
+                     IF( LWORK.GE.WRKBL+2*LDA*M ) THEN
+*
+*                       WORK(IU) is LDA by M and WORK(IR) is LDA by M
+*
+                        LDWRKU = LDA
+                        IR = IU + LDWRKU*M
+                        LDWRKR = LDA
+                     ELSE IF( LWORK.GE.WRKBL+( LDA+M )*M ) THEN
+*
+*                       WORK(IU) is LDA by M and WORK(IR) is M by M
+*
+                        LDWRKU = LDA
+                        IR = IU + LDWRKU*M
+                        LDWRKR = M
+                     ELSE
+*
+*                       WORK(IU) is M by M and WORK(IR) is M by M
+*
+                        LDWRKU = M
+                        IR = IU + LDWRKU*M
+                        LDWRKR = M
+                     END IF
+                     ITAU = IR + LDWRKR*M
+                     IWORK = ITAU + M
+*
+*                    Compute A=L*Q, copying result to VT
+*                    (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
+*
+                     CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
+*
+*                    Generate Q in VT
+*                    (Workspace: need 2*M*M+M+N, prefer 2*M*M+M+N*NB)
+*
+                     CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Copy L to WORK(IU), zeroing out above it
+*
+                     CALL DLACPY( 'L', M, M, A, LDA, WORK( IU ),
+     $                            LDWRKU )
+                     CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
+     $                            WORK( IU+LDWRKU ), LDWRKU )
+                     IE = ITAU
+                     ITAUQ = IE + M
+                     ITAUP = ITAUQ + M
+                     IWORK = ITAUP + M
+*
+*                    Bidiagonalize L in WORK(IU), copying result to
+*                    WORK(IR)
+*                    (Workspace: need 2*M*M+4*M,
+*                                prefer 2*M*M+3*M+2*M*NB)
+*
+                     CALL DGEBRD( M, M, WORK( IU ), LDWRKU, S,
+     $                            WORK( IE ), WORK( ITAUQ ),
+     $                            WORK( ITAUP ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+                     CALL DLACPY( 'L', M, M, WORK( IU ), LDWRKU,
+     $                            WORK( IR ), LDWRKR )
+*
+*                    Generate right bidiagonalizing vectors in WORK(IU)
+*                    (Workspace: need 2*M*M+4*M-1,
+*                                prefer 2*M*M+3*M+(M-1)*NB)
+*
+                     CALL DORGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
+     $                            WORK( ITAUP ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+*
+*                    Generate left bidiagonalizing vectors in WORK(IR)
+*                    (Workspace: need 2*M*M+4*M, prefer 2*M*M+3*M+M*NB)
+*
+                     CALL DORGBR( 'Q', M, M, M, WORK( IR ), LDWRKR,
+     $                            WORK( ITAUQ ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+                     IWORK = IE + M
+*
+*                    Perform bidiagonal QR iteration, computing left
+*                    singular vectors of L in WORK(IR) and computing
+*                    right singular vectors of L in WORK(IU)
+*                    (Workspace: need 2*M*M+BDSPAC)
+*
+                     CALL DBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
+     $                            WORK( IU ), LDWRKU, WORK( IR ),
+     $                            LDWRKR, DUM, 1, WORK( IWORK ), INFO )
+*
+*                    Multiply right singular vectors of L in WORK(IU) by
+*                    Q in VT, storing result in A
+*                    (Workspace: need M*M)
+*
+                     CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ),
+     $                           LDWRKU, VT, LDVT, ZERO, A, LDA )
+*
+*                    Copy right singular vectors of A from A to VT
+*
+                     CALL DLACPY( 'F', M, N, A, LDA, VT, LDVT )
+*
+*                    Copy left singular vectors of A from WORK(IR) to A
+*
+                     CALL DLACPY( 'F', M, M, WORK( IR ), LDWRKR, A,
+     $                            LDA )
+*
+                  ELSE
+*
+*                    Insufficient workspace for a fast algorithm
+*
+                     ITAU = 1
+                     IWORK = ITAU + M
+*
+*                    Compute A=L*Q, copying result to VT
+*                    (Workspace: need 2*M, prefer M+M*NB)
+*
+                     CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
+*
+*                    Generate Q in VT
+*                    (Workspace: need M+N, prefer M+N*NB)
+*
+                     CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IE = ITAU
+                     ITAUQ = IE + M
+                     ITAUP = ITAUQ + M
+                     IWORK = ITAUP + M
+*
+*                    Zero out above L in A
+*
+                     CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ),
+     $                            LDA )
+*
+*                    Bidiagonalize L in A
+*                    (Workspace: need 4*M, prefer 3*M+2*M*NB)
+*
+                     CALL DGEBRD( M, M, A, LDA, S, WORK( IE ),
+     $                            WORK( ITAUQ ), WORK( ITAUP ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Multiply right bidiagonalizing vectors in A by Q
+*                    in VT
+*                    (Workspace: need 3*M+N, prefer 3*M+N*NB)
+*
+                     CALL DORMBR( 'P', 'L', 'T', M, N, M, A, LDA,
+     $                            WORK( ITAUP ), VT, LDVT,
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Generate left bidiagonalizing vectors in A
+*                    (Workspace: need 4*M, prefer 3*M+M*NB)
+*
+                     CALL DORGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IWORK = IE + M
+*
+*                    Perform bidiagonal QR iteration, computing left
+*                    singular vectors of A in A and computing right
+*                    singular vectors of A in VT
+*                    (Workspace: need BDSPAC)
+*
+                     CALL DBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT,
+     $                            LDVT, A, LDA, DUM, 1, WORK( IWORK ),
+     $                            INFO )
+*
+                  END IF
+*
+               ELSE IF( WNTUAS ) THEN
+*
+*                 Path 9t(N much larger than M, JOBU='S' or 'A',
+*                         JOBVT='A')
+*                 N right singular vectors to be computed in VT and
+*                 M left singular vectors to be computed in U
+*
+                  IF( LWORK.GE.M*M+MAX( N+M, 4*M, BDSPAC ) ) THEN
+*
+*                    Sufficient workspace for a fast algorithm
+*
+                     IU = 1
+                     IF( LWORK.GE.WRKBL+LDA*M ) THEN
+*
+*                       WORK(IU) is LDA by M
+*
+                        LDWRKU = LDA
+                     ELSE
+*
+*                       WORK(IU) is M by M
+*
+                        LDWRKU = M
+                     END IF
+                     ITAU = IU + LDWRKU*M
+                     IWORK = ITAU + M
+*
+*                    Compute A=L*Q, copying result to VT
+*                    (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+*
+                     CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
+*
+*                    Generate Q in VT
+*                    (Workspace: need M*M+M+N, prefer M*M+M+N*NB)
+*
+                     CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Copy L to WORK(IU), zeroing out above it
+*
+                     CALL DLACPY( 'L', M, M, A, LDA, WORK( IU ),
+     $                            LDWRKU )
+                     CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
+     $                            WORK( IU+LDWRKU ), LDWRKU )
+                     IE = ITAU
+                     ITAUQ = IE + M
+                     ITAUP = ITAUQ + M
+                     IWORK = ITAUP + M
+*
+*                    Bidiagonalize L in WORK(IU), copying result to U
+*                    (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
+*
+                     CALL DGEBRD( M, M, WORK( IU ), LDWRKU, S,
+     $                            WORK( IE ), WORK( ITAUQ ),
+     $                            WORK( ITAUP ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+                     CALL DLACPY( 'L', M, M, WORK( IU ), LDWRKU, U,
+     $                            LDU )
+*
+*                    Generate right bidiagonalizing vectors in WORK(IU)
+*                    (Workspace: need M*M+4*M, prefer M*M+3*M+(M-1)*NB)
+*
+                     CALL DORGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
+     $                            WORK( ITAUP ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+*
+*                    Generate left bidiagonalizing vectors in U
+*                    (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB)
+*
+                     CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IWORK = IE + M
+*
+*                    Perform bidiagonal QR iteration, computing left
+*                    singular vectors of L in U and computing right
+*                    singular vectors of L in WORK(IU)
+*                    (Workspace: need M*M+BDSPAC)
+*
+                     CALL DBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
+     $                            WORK( IU ), LDWRKU, U, LDU, DUM, 1,
+     $                            WORK( IWORK ), INFO )
+*
+*                    Multiply right singular vectors of L in WORK(IU) by
+*                    Q in VT, storing result in A
+*                    (Workspace: need M*M)
+*
+                     CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ),
+     $                           LDWRKU, VT, LDVT, ZERO, A, LDA )
+*
+*                    Copy right singular vectors of A from A to VT
+*
+                     CALL DLACPY( 'F', M, N, A, LDA, VT, LDVT )
+*
+                  ELSE
+*
+*                    Insufficient workspace for a fast algorithm
+*
+                     ITAU = 1
+                     IWORK = ITAU + M
+*
+*                    Compute A=L*Q, copying result to VT
+*                    (Workspace: need 2*M, prefer M+M*NB)
+*
+                     CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
+*
+*                    Generate Q in VT
+*                    (Workspace: need M+N, prefer M+N*NB)
+*
+                     CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Copy L to U, zeroing out above it
+*
+                     CALL DLACPY( 'L', M, M, A, LDA, U, LDU )
+                     CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ),
+     $                            LDU )
+                     IE = ITAU
+                     ITAUQ = IE + M
+                     ITAUP = ITAUQ + M
+                     IWORK = ITAUP + M
+*
+*                    Bidiagonalize L in U
+*                    (Workspace: need 4*M, prefer 3*M+2*M*NB)
+*
+                     CALL DGEBRD( M, M, U, LDU, S, WORK( IE ),
+     $                            WORK( ITAUQ ), WORK( ITAUP ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Multiply right bidiagonalizing vectors in U by Q
+*                    in VT
+*                    (Workspace: need 3*M+N, prefer 3*M+N*NB)
+*
+                     CALL DORMBR( 'P', 'L', 'T', M, N, M, U, LDU,
+     $                            WORK( ITAUP ), VT, LDVT,
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Generate left bidiagonalizing vectors in U
+*                    (Workspace: need 4*M, prefer 3*M+M*NB)
+*
+                     CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IWORK = IE + M
+*
+*                    Perform bidiagonal QR iteration, computing left
+*                    singular vectors of A in U and computing right
+*                    singular vectors of A in VT
+*                    (Workspace: need BDSPAC)
+*
+                     CALL DBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT,
+     $                            LDVT, U, LDU, DUM, 1, WORK( IWORK ),
+     $                            INFO )
+*
+                  END IF
+*
+               END IF
+*
+            END IF
+*
+         ELSE
+*
+*           N .LT. MNTHR
+*
+*           Path 10t(N greater than M, but not much larger)
+*           Reduce to bidiagonal form without LQ decomposition
+*
+            IE = 1
+            ITAUQ = IE + M
+            ITAUP = ITAUQ + M
+            IWORK = ITAUP + M
+*
+*           Bidiagonalize A
+*           (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
+*
+            CALL DGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
+     $                   WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
+     $                   IERR )
+            IF( WNTUAS ) THEN
+*
+*              If left singular vectors desired in U, copy result to U
+*              and generate left bidiagonalizing vectors in U
+*              (Workspace: need 4*M-1, prefer 3*M+(M-1)*NB)
+*
+               CALL DLACPY( 'L', M, M, A, LDA, U, LDU )
+               CALL DORGBR( 'Q', M, M, N, U, LDU, WORK( ITAUQ ),
+     $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
+            END IF
+            IF( WNTVAS ) THEN
+*
+*              If right singular vectors desired in VT, copy result to
+*              VT and generate right bidiagonalizing vectors in VT
+*              (Workspace: need 3*M+NRVT, prefer 3*M+NRVT*NB)
+*
+               CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
+               IF( WNTVA )
+     $            NRVT = N
+               IF( WNTVS )
+     $            NRVT = M
+               CALL DORGBR( 'P', NRVT, N, M, VT, LDVT, WORK( ITAUP ),
+     $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
+            END IF
+            IF( WNTUO ) THEN
+*
+*              If left singular vectors desired in A, generate left
+*              bidiagonalizing vectors in A
+*              (Workspace: need 4*M-1, prefer 3*M+(M-1)*NB)
+*
+               CALL DORGBR( 'Q', M, M, N, A, LDA, WORK( ITAUQ ),
+     $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
+            END IF
+            IF( WNTVO ) THEN
+*
+*              If right singular vectors desired in A, generate right
+*              bidiagonalizing vectors in A
+*              (Workspace: need 4*M, prefer 3*M+M*NB)
+*
+               CALL DORGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ),
+     $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
+            END IF
+            IWORK = IE + M
+            IF( WNTUAS .OR. WNTUO )
+     $         NRU = M
+            IF( WNTUN )
+     $         NRU = 0
+            IF( WNTVAS .OR. WNTVO )
+     $         NCVT = N
+            IF( WNTVN )
+     $         NCVT = 0
+            IF( ( .NOT.WNTUO ) .AND. ( .NOT.WNTVO ) ) THEN
+*
+*              Perform bidiagonal QR iteration, if desired, computing
+*              left singular vectors in U and computing right singular
+*              vectors in VT
+*              (Workspace: need BDSPAC)
+*
+               CALL DBDSQR( 'L', M, NCVT, NRU, 0, S, WORK( IE ), VT,
+     $                      LDVT, U, LDU, DUM, 1, WORK( IWORK ), INFO )
+            ELSE IF( ( .NOT.WNTUO ) .AND. WNTVO ) THEN
+*
+*              Perform bidiagonal QR iteration, if desired, computing
+*              left singular vectors in U and computing right singular
+*              vectors in A
+*              (Workspace: need BDSPAC)
+*
+               CALL DBDSQR( 'L', M, NCVT, NRU, 0, S, WORK( IE ), A, LDA,
+     $                      U, LDU, DUM, 1, WORK( IWORK ), INFO )
+            ELSE
+*
+*              Perform bidiagonal QR iteration, if desired, computing
+*              left singular vectors in A and computing right singular
+*              vectors in VT
+*              (Workspace: need BDSPAC)
+*
+               CALL DBDSQR( 'L', M, NCVT, NRU, 0, S, WORK( IE ), VT,
+     $                      LDVT, A, LDA, DUM, 1, WORK( IWORK ), INFO )
+            END IF
+*
+         END IF
+*
+      END IF
+*
+*     If DBDSQR failed to converge, copy unconverged superdiagonals
+*     to WORK( 2:MINMN )
+*
+      IF( INFO.NE.0 ) THEN
+         IF( IE.GT.2 ) THEN
+            DO 50 I = 1, MINMN - 1
+               WORK( I+1 ) = WORK( I+IE-1 )
+   50       CONTINUE
+         END IF
+         IF( IE.LT.2 ) THEN
+            DO 60 I = MINMN - 1, 1, -1
+               WORK( I+1 ) = WORK( I+IE-1 )
+   60       CONTINUE
+         END IF
+      END IF
+*
+*     Undo scaling if necessary
+*
+      IF( ISCL.EQ.1 ) THEN
+         IF( ANRM.GT.BIGNUM )
+     $      CALL DLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN,
+     $                   IERR )
+         IF( INFO.NE.0 .AND. ANRM.GT.BIGNUM )
+     $      CALL DLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN-1, 1, WORK( 2 ),
+     $                   MINMN, IERR )
+         IF( ANRM.LT.SMLNUM )
+     $      CALL DLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN,
+     $                   IERR )
+         IF( INFO.NE.0 .AND. ANRM.LT.SMLNUM )
+     $      CALL DLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN-1, 1, WORK( 2 ),
+     $                   MINMN, IERR )
+      END IF
+*
+*     Return optimal workspace in WORK(1)
+*
+      WORK( 1 ) = MAXWRK
+*
+      RETURN
+*
+*     End of DGESVD
+*
+      END
diff --git a/costa/native/external/lapack/dgesvx.f b/costa/native/external/lapack/dgesvx.f
new file mode 100644
index 000000000..0019e5bbd
--- /dev/null
+++ b/costa/native/external/lapack/dgesvx.f
@@ -0,0 +1,482 @@
+      SUBROUTINE DGESVX( FACT, TRANS, N, NRHS, A, LDA, AF, LDAF, IPIV,
+     $                   EQUED, R, C, B, LDB, X, LDX, RCOND, FERR, BERR,
+     $                   WORK, IWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          EQUED, FACT, TRANS
+      INTEGER            INFO, LDA, LDAF, LDB, LDX, N, NRHS
+      DOUBLE PRECISION   RCOND
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * ), IWORK( * )
+      DOUBLE PRECISION   A( LDA, * ), AF( LDAF, * ), B( LDB, * ),
+     $                   BERR( * ), C( * ), FERR( * ), R( * ),
+     $                   WORK( * ), X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DGESVX uses the LU factorization to compute the solution to a real
+*  system of linear equations
+*     A * X = B,
+*  where A is an N-by-N matrix and X and B are N-by-NRHS matrices.
+*
+*  Error bounds on the solution and a condition estimate are also
+*  provided.
+*
+*  Description
+*  ===========
+*
+*  The following steps are performed:
+*
+*  1. If FACT = 'E', real scaling factors are computed to equilibrate
+*     the system:
+*        TRANS = 'N':  diag(R)*A*diag(C)     *inv(diag(C))*X = diag(R)*B
+*        TRANS = 'T': (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B
+*        TRANS = 'C': (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B
+*     Whether or not the system will be equilibrated depends on the
+*     scaling of the matrix A, but if equilibration is used, A is
+*     overwritten by diag(R)*A*diag(C) and B by diag(R)*B (if TRANS='N')
+*     or diag(C)*B (if TRANS = 'T' or 'C').
+*
+*  2. If FACT = 'N' or 'E', the LU decomposition is used to factor the
+*     matrix A (after equilibration if FACT = 'E') as
+*        A = P * L * U,
+*     where P is a permutation matrix, L is a unit lower triangular
+*     matrix, and U is upper triangular.
+*
+*  3. If some U(i,i)=0, so that U is exactly singular, then the routine
+*     returns with INFO = i. Otherwise, the factored form of A is used
+*     to estimate the condition number of the matrix A.  If the
+*     reciprocal of the condition number is less than machine precision,
+*     INFO = N+1 is returned as a warning, but the routine still goes on
+*     to solve for X and compute error bounds as described below.
+*
+*  4. The system of equations is solved for X using the factored form
+*     of A.
+*
+*  5. Iterative refinement is applied to improve the computed solution
+*     matrix and calculate error bounds and backward error estimates
+*     for it.
+*
+*  6. If equilibration was used, the matrix X is premultiplied by
+*     diag(C) (if TRANS = 'N') or diag(R) (if TRANS = 'T' or 'C') so
+*     that it solves the original system before equilibration.
+*
+*  Arguments
+*  =========
+*
+*  FACT    (input) CHARACTER*1
+*          Specifies whether or not the factored form of the matrix A is
+*          supplied on entry, and if not, whether the matrix A should be
+*          equilibrated before it is factored.
+*          = 'F':  On entry, AF and IPIV contain the factored form of A.
+*                  If EQUED is not 'N', the matrix A has been
+*                  equilibrated with scaling factors given by R and C.
+*                  A, AF, and IPIV are not modified.
+*          = 'N':  The matrix A will be copied to AF and factored.
+*          = 'E':  The matrix A will be equilibrated if necessary, then
+*                  copied to AF and factored.
+*
+*  TRANS   (input) CHARACTER*1
+*          Specifies the form of the system of equations:
+*          = 'N':  A * X = B     (No transpose)
+*          = 'T':  A**T * X = B  (Transpose)
+*          = 'C':  A**H * X = B  (Transpose)
+*
+*  N       (input) INTEGER
+*          The number of linear equations, i.e., the order of the
+*          matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrices B and X.  NRHS >= 0.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+*          On entry, the N-by-N matrix A.  If FACT = 'F' and EQUED is
+*          not 'N', then A must have been equilibrated by the scaling
+*          factors in R and/or C.  A is not modified if FACT = 'F' or
+*          'N', or if FACT = 'E' and EQUED = 'N' on exit.
+*
+*          On exit, if EQUED .ne. 'N', A is scaled as follows:
+*          EQUED = 'R':  A := diag(R) * A
+*          EQUED = 'C':  A := A * diag(C)
+*          EQUED = 'B':  A := diag(R) * A * diag(C).
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  AF      (input or output) DOUBLE PRECISION array, dimension (LDAF,N)
+*          If FACT = 'F', then AF is an input argument and on entry
+*          contains the factors L and U from the factorization
+*          A = P*L*U as computed by DGETRF.  If EQUED .ne. 'N', then
+*          AF is the factored form of the equilibrated matrix A.
+*
+*          If FACT = 'N', then AF is an output argument and on exit
+*          returns the factors L and U from the factorization A = P*L*U
+*          of the original matrix A.
+*
+*          If FACT = 'E', then AF is an output argument and on exit
+*          returns the factors L and U from the factorization A = P*L*U
+*          of the equilibrated matrix A (see the description of A for
+*          the form of the equilibrated matrix).
+*
+*  LDAF    (input) INTEGER
+*          The leading dimension of the array AF.  LDAF >= max(1,N).
+*
+*  IPIV    (input or output) INTEGER array, dimension (N)
+*          If FACT = 'F', then IPIV is an input argument and on entry
+*          contains the pivot indices from the factorization A = P*L*U
+*          as computed by DGETRF; row i of the matrix was interchanged
+*          with row IPIV(i).
+*
+*          If FACT = 'N', then IPIV is an output argument and on exit
+*          contains the pivot indices from the factorization A = P*L*U
+*          of the original matrix A.
+*
+*          If FACT = 'E', then IPIV is an output argument and on exit
+*          contains the pivot indices from the factorization A = P*L*U
+*          of the equilibrated matrix A.
+*
+*  EQUED   (input or output) CHARACTER*1
+*          Specifies the form of equilibration that was done.
+*          = 'N':  No equilibration (always true if FACT = 'N').
+*          = 'R':  Row equilibration, i.e., A has been premultiplied by
+*                  diag(R).
+*          = 'C':  Column equilibration, i.e., A has been postmultiplied
+*                  by diag(C).
+*          = 'B':  Both row and column equilibration, i.e., A has been
+*                  replaced by diag(R) * A * diag(C).
+*          EQUED is an input argument if FACT = 'F'; otherwise, it is an
+*          output argument.
+*
+*  R       (input or output) DOUBLE PRECISION array, dimension (N)
+*          The row scale factors for A.  If EQUED = 'R' or 'B', A is
+*          multiplied on the left by diag(R); if EQUED = 'N' or 'C', R
+*          is not accessed.  R is an input argument if FACT = 'F';
+*          otherwise, R is an output argument.  If FACT = 'F' and
+*          EQUED = 'R' or 'B', each element of R must be positive.
+*
+*  C       (input or output) DOUBLE PRECISION array, dimension (N)
+*          The column scale factors for A.  If EQUED = 'C' or 'B', A is
+*          multiplied on the right by diag(C); if EQUED = 'N' or 'R', C
+*          is not accessed.  C is an input argument if FACT = 'F';
+*          otherwise, C is an output argument.  If FACT = 'F' and
+*          EQUED = 'C' or 'B', each element of C must be positive.
+*
+*  B       (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS)
+*          On entry, the N-by-NRHS right hand side matrix B.
+*          On exit,
+*          if EQUED = 'N', B is not modified;
+*          if TRANS = 'N' and EQUED = 'R' or 'B', B is overwritten by
+*          diag(R)*B;
+*          if TRANS = 'T' or 'C' and EQUED = 'C' or 'B', B is
+*          overwritten by diag(C)*B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  X       (output) DOUBLE PRECISION array, dimension (LDX,NRHS)
+*          If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X
+*          to the original system of equations.  Note that A and B are
+*          modified on exit if EQUED .ne. 'N', and the solution to the
+*          equilibrated system is inv(diag(C))*X if TRANS = 'N' and
+*          EQUED = 'C' or 'B', or inv(diag(R))*X if TRANS = 'T' or 'C'
+*          and EQUED = 'R' or 'B'.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(1,N).
+*
+*  RCOND   (output) DOUBLE PRECISION
+*          The estimate of the reciprocal condition number of the matrix
+*          A after equilibration (if done).  If RCOND is less than the
+*          machine precision (in particular, if RCOND = 0), the matrix
+*          is singular to working precision.  This condition is
+*          indicated by a return code of INFO > 0.
+*
+*  FERR    (output) DOUBLE PRECISION array, dimension (NRHS)
+*          The estimated forward error bound for each solution vector
+*          X(j) (the j-th column of the solution matrix X).
+*          If XTRUE is the true solution corresponding to X(j), FERR(j)
+*          is an estimated upper bound for the magnitude of the largest
+*          element in (X(j) - XTRUE) divided by the magnitude of the
+*          largest element in X(j).  The estimate is as reliable as
+*          the estimate for RCOND, and is almost always a slight
+*          overestimate of the true error.
+*
+*  BERR    (output) DOUBLE PRECISION array, dimension (NRHS)
+*          The componentwise relative backward error of each solution
+*          vector X(j) (i.e., the smallest relative change in
+*          any element of A or B that makes X(j) an exact solution).
+*
+*  WORK    (workspace/output) DOUBLE PRECISION array, dimension (4*N)
+*          On exit, WORK(1) contains the reciprocal pivot growth
+*          factor norm(A)/norm(U). The "max absolute element" norm is
+*          used. If WORK(1) is much less than 1, then the stability
+*          of the LU factorization of the (equilibrated) matrix A
+*          could be poor. This also means that the solution X, condition
+*          estimator RCOND, and forward error bound FERR could be
+*          unreliable. If factorization fails with 0<INFO<=N, then
+*          WORK(1) contains the reciprocal pivot growth factor for the
+*          leading INFO columns of A.
+*
+*  IWORK   (workspace) INTEGER array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, and i is
+*                <= N:  U(i,i) is exactly zero.  The factorization has
+*                       been completed, but the factor U is exactly
+*                       singular, so the solution and error bounds
+*                       could not be computed. RCOND = 0 is returned.
+*                = N+1: U is nonsingular, but RCOND is less than machine
+*                       precision, meaning that the matrix is singular
+*                       to working precision.  Nevertheless, the
+*                       solution and error bounds are computed because
+*                       there are a number of situations where the
+*                       computed solution can be more accurate than the
+*                       value of RCOND would suggest.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            COLEQU, EQUIL, NOFACT, NOTRAN, ROWEQU
+      CHARACTER          NORM
+      INTEGER            I, INFEQU, J
+      DOUBLE PRECISION   AMAX, ANORM, BIGNUM, COLCND, RCMAX, RCMIN,
+     $                   ROWCND, RPVGRW, SMLNUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DLAMCH, DLANGE, DLANTR
+      EXTERNAL           LSAME, DLAMCH, DLANGE, DLANTR
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DGECON, DGEEQU, DGERFS, DGETRF, DGETRS, DLACPY,
+     $                   DLAQGE, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+      NOFACT = LSAME( FACT, 'N' )
+      EQUIL = LSAME( FACT, 'E' )
+      NOTRAN = LSAME( TRANS, 'N' )
+      IF( NOFACT .OR. EQUIL ) THEN
+         EQUED = 'N'
+         ROWEQU = .FALSE.
+         COLEQU = .FALSE.
+      ELSE
+         ROWEQU = LSAME( EQUED, 'R' ) .OR. LSAME( EQUED, 'B' )
+         COLEQU = LSAME( EQUED, 'C' ) .OR. LSAME( EQUED, 'B' )
+         SMLNUM = DLAMCH( 'Safe minimum' )
+         BIGNUM = ONE / SMLNUM
+      END IF
+*
+*     Test the input parameters.
+*
+      IF( .NOT.NOFACT .AND. .NOT.EQUIL .AND. .NOT.LSAME( FACT, 'F' ) )
+     $     THEN
+         INFO = -1
+      ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT.
+     $         LSAME( TRANS, 'C' ) ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -6
+      ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN
+         INFO = -8
+      ELSE IF( LSAME( FACT, 'F' ) .AND. .NOT.
+     $         ( ROWEQU .OR. COLEQU .OR. LSAME( EQUED, 'N' ) ) ) THEN
+         INFO = -10
+      ELSE
+         IF( ROWEQU ) THEN
+            RCMIN = BIGNUM
+            RCMAX = ZERO
+            DO 10 J = 1, N
+               RCMIN = MIN( RCMIN, R( J ) )
+               RCMAX = MAX( RCMAX, R( J ) )
+   10       CONTINUE
+            IF( RCMIN.LE.ZERO ) THEN
+               INFO = -11
+            ELSE IF( N.GT.0 ) THEN
+               ROWCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM )
+            ELSE
+               ROWCND = ONE
+            END IF
+         END IF
+         IF( COLEQU .AND. INFO.EQ.0 ) THEN
+            RCMIN = BIGNUM
+            RCMAX = ZERO
+            DO 20 J = 1, N
+               RCMIN = MIN( RCMIN, C( J ) )
+               RCMAX = MAX( RCMAX, C( J ) )
+   20       CONTINUE
+            IF( RCMIN.LE.ZERO ) THEN
+               INFO = -12
+            ELSE IF( N.GT.0 ) THEN
+               COLCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM )
+            ELSE
+               COLCND = ONE
+            END IF
+         END IF
+         IF( INFO.EQ.0 ) THEN
+            IF( LDB.LT.MAX( 1, N ) ) THEN
+               INFO = -14
+            ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+               INFO = -16
+            END IF
+         END IF
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DGESVX', -INFO )
+         RETURN
+      END IF
+*
+      IF( EQUIL ) THEN
+*
+*        Compute row and column scalings to equilibrate the matrix A.
+*
+         CALL DGEEQU( N, N, A, LDA, R, C, ROWCND, COLCND, AMAX, INFEQU )
+         IF( INFEQU.EQ.0 ) THEN
+*
+*           Equilibrate the matrix.
+*
+            CALL DLAQGE( N, N, A, LDA, R, C, ROWCND, COLCND, AMAX,
+     $                   EQUED )
+            ROWEQU = LSAME( EQUED, 'R' ) .OR. LSAME( EQUED, 'B' )
+            COLEQU = LSAME( EQUED, 'C' ) .OR. LSAME( EQUED, 'B' )
+         END IF
+      END IF
+*
+*     Scale the right hand side.
+*
+      IF( NOTRAN ) THEN
+         IF( ROWEQU ) THEN
+            DO 40 J = 1, NRHS
+               DO 30 I = 1, N
+                  B( I, J ) = R( I )*B( I, J )
+   30          CONTINUE
+   40       CONTINUE
+         END IF
+      ELSE IF( COLEQU ) THEN
+         DO 60 J = 1, NRHS
+            DO 50 I = 1, N
+               B( I, J ) = C( I )*B( I, J )
+   50       CONTINUE
+   60    CONTINUE
+      END IF
+*
+      IF( NOFACT .OR. EQUIL ) THEN
+*
+*        Compute the LU factorization of A.
+*
+         CALL DLACPY( 'Full', N, N, A, LDA, AF, LDAF )
+         CALL DGETRF( N, N, AF, LDAF, IPIV, INFO )
+*
+*        Return if INFO is non-zero.
+*
+         IF( INFO.NE.0 ) THEN
+            IF( INFO.GT.0 ) THEN
+*
+*              Compute the reciprocal pivot growth factor of the
+*              leading rank-deficient INFO columns of A.
+*
+               RPVGRW = DLANTR( 'M', 'U', 'N', INFO, INFO, AF, LDAF,
+     $                  WORK )
+               IF( RPVGRW.EQ.ZERO ) THEN
+                  RPVGRW = ONE
+               ELSE
+                  RPVGRW = DLANGE( 'M', N, INFO, A, LDA, WORK ) / RPVGRW
+               END IF
+               WORK( 1 ) = RPVGRW
+               RCOND = ZERO
+            END IF
+            RETURN
+         END IF
+      END IF
+*
+*     Compute the norm of the matrix A and the
+*     reciprocal pivot growth factor RPVGRW.
+*
+      IF( NOTRAN ) THEN
+         NORM = '1'
+      ELSE
+         NORM = 'I'
+      END IF
+      ANORM = DLANGE( NORM, N, N, A, LDA, WORK )
+      RPVGRW = DLANTR( 'M', 'U', 'N', N, N, AF, LDAF, WORK )
+      IF( RPVGRW.EQ.ZERO ) THEN
+         RPVGRW = ONE
+      ELSE
+         RPVGRW = DLANGE( 'M', N, N, A, LDA, WORK ) / RPVGRW
+      END IF
+*
+*     Compute the reciprocal of the condition number of A.
+*
+      CALL DGECON( NORM, N, AF, LDAF, ANORM, RCOND, WORK, IWORK, INFO )
+*
+*     Set INFO = N+1 if the matrix is singular to working precision.
+*
+      IF( RCOND.LT.DLAMCH( 'Epsilon' ) )
+     $   INFO = N + 1
+*
+*     Compute the solution matrix X.
+*
+      CALL DLACPY( 'Full', N, NRHS, B, LDB, X, LDX )
+      CALL DGETRS( TRANS, N, NRHS, AF, LDAF, IPIV, X, LDX, INFO )
+*
+*     Use iterative refinement to improve the computed solution and
+*     compute error bounds and backward error estimates for it.
+*
+      CALL DGERFS( TRANS, N, NRHS, A, LDA, AF, LDAF, IPIV, B, LDB, X,
+     $             LDX, FERR, BERR, WORK, IWORK, INFO )
+*
+*     Transform the solution matrix X to a solution of the original
+*     system.
+*
+      IF( NOTRAN ) THEN
+         IF( COLEQU ) THEN
+            DO 80 J = 1, NRHS
+               DO 70 I = 1, N
+                  X( I, J ) = C( I )*X( I, J )
+   70          CONTINUE
+   80       CONTINUE
+            DO 90 J = 1, NRHS
+               FERR( J ) = FERR( J ) / COLCND
+   90       CONTINUE
+         END IF
+      ELSE IF( ROWEQU ) THEN
+         DO 110 J = 1, NRHS
+            DO 100 I = 1, N
+               X( I, J ) = R( I )*X( I, J )
+  100       CONTINUE
+  110    CONTINUE
+         DO 120 J = 1, NRHS
+            FERR( J ) = FERR( J ) / ROWCND
+  120    CONTINUE
+      END IF
+*
+      WORK( 1 ) = RPVGRW
+      RETURN
+*
+*     End of DGESVX
+*
+      END
diff --git a/costa/native/external/lapack/dgetc2.f b/costa/native/external/lapack/dgetc2.f
new file mode 100644
index 000000000..d7ac5868a
--- /dev/null
+++ b/costa/native/external/lapack/dgetc2.f
@@ -0,0 +1,147 @@
+      SUBROUTINE DGETC2( N, A, LDA, IPIV, JPIV, INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * ), JPIV( * )
+      DOUBLE PRECISION   A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DGETC2 computes an LU factorization with complete pivoting of the
+*  n-by-n matrix A. The factorization has the form A = P * L * U * Q,
+*  where P and Q are permutation matrices, L is lower triangular with
+*  unit diagonal elements and U is upper triangular.
+*
+*  This is the Level 2 BLAS algorithm.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The order of the matrix A. N >= 0.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA, N)
+*          On entry, the n-by-n matrix A to be factored.
+*          On exit, the factors L and U from the factorization
+*          A = P*L*U*Q; the unit diagonal elements of L are not stored.
+*          If U(k, k) appears to be less than SMIN, U(k, k) is given the
+*          value of SMIN, i.e., giving a nonsingular perturbed system.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  IPIV    (output) INTEGER array, dimension(N).
+*          The pivot indices; for 1 <= i <= N, row i of the
+*          matrix has been interchanged with row IPIV(i).
+*
+*  JPIV    (output) INTEGER array, dimension(N).
+*          The pivot indices; for 1 <= j <= N, column j of the
+*          matrix has been interchanged with column JPIV(j).
+*
+*  INFO    (output) INTEGER
+*           = 0: successful exit
+*           > 0: if INFO = k, U(k, k) is likely to produce owerflow if
+*                we try to solve for x in Ax = b. So U is perturbed to
+*                avoid the overflow.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Bo Kagstrom and Peter Poromaa, Department of Computing Science,
+*     Umea University, S-901 87 Umea, Sweden.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, IP, IPV, J, JP, JPV
+      DOUBLE PRECISION   BIGNUM, EPS, SMIN, SMLNUM, XMAX
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DGER, DSWAP
+*     ..
+*     .. External Functions ..
+      DOUBLE PRECISION   DLAMCH
+      EXTERNAL           DLAMCH
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Set constants to control overflow
+*
+      INFO = 0
+      EPS = DLAMCH( 'P' )
+      SMLNUM = DLAMCH( 'S' ) / EPS
+      BIGNUM = ONE / SMLNUM
+      CALL DLABAD( SMLNUM, BIGNUM )
+*
+*     Factorize A using complete pivoting.
+*     Set pivots less than SMIN to SMIN.
+*
+      DO 40 I = 1, N - 1
+*
+*        Find max element in matrix A
+*
+         XMAX = ZERO
+         DO 20 IP = I, N
+            DO 10 JP = I, N
+               IF( ABS( A( IP, JP ) ).GE.XMAX ) THEN
+                  XMAX = ABS( A( IP, JP ) )
+                  IPV = IP
+                  JPV = JP
+               END IF
+   10       CONTINUE
+   20    CONTINUE
+         IF( I.EQ.1 )
+     $      SMIN = MAX( EPS*XMAX, SMLNUM )
+*
+*        Swap rows
+*
+         IF( IPV.NE.I )
+     $      CALL DSWAP( N, A( IPV, 1 ), LDA, A( I, 1 ), LDA )
+         IPIV( I ) = IPV
+*
+*        Swap columns
+*
+         IF( JPV.NE.I )
+     $      CALL DSWAP( N, A( 1, JPV ), 1, A( 1, I ), 1 )
+         JPIV( I ) = JPV
+*
+*        Check for singularity
+*
+         IF( ABS( A( I, I ) ).LT.SMIN ) THEN
+            INFO = I
+            A( I, I ) = SMIN
+         END IF
+         DO 30 J = I + 1, N
+            A( J, I ) = A( J, I ) / A( I, I )
+   30    CONTINUE
+         CALL DGER( N-I, N-I, -ONE, A( I+1, I ), 1, A( I, I+1 ), LDA,
+     $              A( I+1, I+1 ), LDA )
+   40 CONTINUE
+*
+      IF( ABS( A( N, N ) ).LT.SMIN ) THEN
+         INFO = N
+         A( N, N ) = SMIN
+      END IF
+*
+      RETURN
+*
+*     End of DGETC2
+*
+      END
diff --git a/costa/native/external/lapack/dgetf2.f b/costa/native/external/lapack/dgetf2.f
new file mode 100644
index 000000000..e7d9b03de
--- /dev/null
+++ b/costa/native/external/lapack/dgetf2.f
@@ -0,0 +1,135 @@
+      SUBROUTINE DGETF2( M, N, A, LDA, IPIV, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1992
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, M, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      DOUBLE PRECISION   A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DGETF2 computes an LU factorization of a general m-by-n matrix A
+*  using partial pivoting with row interchanges.
+*
+*  The factorization has the form
+*     A = P * L * U
+*  where P is a permutation matrix, L is lower triangular with unit
+*  diagonal elements (lower trapezoidal if m > n), and U is upper
+*  triangular (upper trapezoidal if m < n).
+*
+*  This is the right-looking Level 2 BLAS version of the algorithm.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+*          On entry, the m by n matrix to be factored.
+*          On exit, the factors L and U from the factorization
+*          A = P*L*U; the unit diagonal elements of L are not stored.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  IPIV    (output) INTEGER array, dimension (min(M,N))
+*          The pivot indices; for 1 <= i <= min(M,N), row i of the
+*          matrix was interchanged with row IPIV(i).
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -k, the k-th argument had an illegal value
+*          > 0: if INFO = k, U(k,k) is exactly zero. The factorization
+*               has been completed, but the factor U is exactly
+*               singular, and division by zero will occur if it is used
+*               to solve a system of equations.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            J, JP
+*     ..
+*     .. External Functions ..
+      INTEGER            IDAMAX
+      EXTERNAL           IDAMAX
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DGER, DSCAL, DSWAP, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DGETF2', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 )
+     $   RETURN
+*
+      DO 10 J = 1, MIN( M, N )
+*
+*        Find pivot and test for singularity.
+*
+         JP = J - 1 + IDAMAX( M-J+1, A( J, J ), 1 )
+         IPIV( J ) = JP
+         IF( A( JP, J ).NE.ZERO ) THEN
+*
+*           Apply the interchange to columns 1:N.
+*
+            IF( JP.NE.J )
+     $         CALL DSWAP( N, A( J, 1 ), LDA, A( JP, 1 ), LDA )
+*
+*           Compute elements J+1:M of J-th column.
+*
+            IF( J.LT.M )
+     $         CALL DSCAL( M-J, ONE / A( J, J ), A( J+1, J ), 1 )
+*
+         ELSE IF( INFO.EQ.0 ) THEN
+*
+            INFO = J
+         END IF
+*
+         IF( J.LT.MIN( M, N ) ) THEN
+*
+*           Update trailing submatrix.
+*
+            CALL DGER( M-J, N-J, -ONE, A( J+1, J ), 1, A( J, J+1 ), LDA,
+     $                 A( J+1, J+1 ), LDA )
+         END IF
+   10 CONTINUE
+      RETURN
+*
+*     End of DGETF2
+*
+      END
diff --git a/costa/native/external/lapack/dgetrf.f b/costa/native/external/lapack/dgetrf.f
new file mode 100644
index 000000000..2e0a4c4e7
--- /dev/null
+++ b/costa/native/external/lapack/dgetrf.f
@@ -0,0 +1,160 @@
+      SUBROUTINE DGETRF( M, N, A, LDA, IPIV, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, M, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      DOUBLE PRECISION   A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DGETRF computes an LU factorization of a general M-by-N matrix A
+*  using partial pivoting with row interchanges.
+*
+*  The factorization has the form
+*     A = P * L * U
+*  where P is a permutation matrix, L is lower triangular with unit
+*  diagonal elements (lower trapezoidal if m > n), and U is upper
+*  triangular (upper trapezoidal if m < n).
+*
+*  This is the right-looking Level 3 BLAS version of the algorithm.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+*          On entry, the M-by-N matrix to be factored.
+*          On exit, the factors L and U from the factorization
+*          A = P*L*U; the unit diagonal elements of L are not stored.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  IPIV    (output) INTEGER array, dimension (min(M,N))
+*          The pivot indices; for 1 <= i <= min(M,N), row i of the
+*          matrix was interchanged with row IPIV(i).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, U(i,i) is exactly zero. The factorization
+*                has been completed, but the factor U is exactly
+*                singular, and division by zero will occur if it is used
+*                to solve a system of equations.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE
+      PARAMETER          ( ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, IINFO, J, JB, NB
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DGEMM, DGETF2, DLASWP, DTRSM, XERBLA
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DGETRF', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 )
+     $   RETURN
+*
+*     Determine the block size for this environment.
+*
+      NB = ILAENV( 1, 'DGETRF', ' ', M, N, -1, -1 )
+      IF( NB.LE.1 .OR. NB.GE.MIN( M, N ) ) THEN
+*
+*        Use unblocked code.
+*
+         CALL DGETF2( M, N, A, LDA, IPIV, INFO )
+      ELSE
+*
+*        Use blocked code.
+*
+         DO 20 J = 1, MIN( M, N ), NB
+            JB = MIN( MIN( M, N )-J+1, NB )
+*
+*           Factor diagonal and subdiagonal blocks and test for exact
+*           singularity.
+*
+            CALL DGETF2( M-J+1, JB, A( J, J ), LDA, IPIV( J ), IINFO )
+*
+*           Adjust INFO and the pivot indices.
+*
+            IF( INFO.EQ.0 .AND. IINFO.GT.0 )
+     $         INFO = IINFO + J - 1
+            DO 10 I = J, MIN( M, J+JB-1 )
+               IPIV( I ) = J - 1 + IPIV( I )
+   10       CONTINUE
+*
+*           Apply interchanges to columns 1:J-1.
+*
+            CALL DLASWP( J-1, A, LDA, J, J+JB-1, IPIV, 1 )
+*
+            IF( J+JB.LE.N ) THEN
+*
+*              Apply interchanges to columns J+JB:N.
+*
+               CALL DLASWP( N-J-JB+1, A( 1, J+JB ), LDA, J, J+JB-1,
+     $                      IPIV, 1 )
+*
+*              Compute block row of U.
+*
+               CALL DTRSM( 'Left', 'Lower', 'No transpose', 'Unit', JB,
+     $                     N-J-JB+1, ONE, A( J, J ), LDA, A( J, J+JB ),
+     $                     LDA )
+               IF( J+JB.LE.M ) THEN
+*
+*                 Update trailing submatrix.
+*
+                  CALL DGEMM( 'No transpose', 'No transpose', M-J-JB+1,
+     $                        N-J-JB+1, JB, -ONE, A( J+JB, J ), LDA,
+     $                        A( J, J+JB ), LDA, ONE, A( J+JB, J+JB ),
+     $                        LDA )
+               END IF
+            END IF
+   20    CONTINUE
+      END IF
+      RETURN
+*
+*     End of DGETRF
+*
+      END
diff --git a/costa/native/external/lapack/dgetri.f b/costa/native/external/lapack/dgetri.f
new file mode 100644
index 000000000..c67a34803
--- /dev/null
+++ b/costa/native/external/lapack/dgetri.f
@@ -0,0 +1,193 @@
+      SUBROUTINE DGETRI( N, A, LDA, IPIV, WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, LWORK, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      DOUBLE PRECISION   A( LDA, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DGETRI computes the inverse of a matrix using the LU factorization
+*  computed by DGETRF.
+*
+*  This method inverts U and then computes inv(A) by solving the system
+*  inv(A)*L = inv(U) for inv(A).
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+*          On entry, the factors L and U from the factorization
+*          A = P*L*U as computed by DGETRF.
+*          On exit, if INFO = 0, the inverse of the original matrix A.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  IPIV    (input) INTEGER array, dimension (N)
+*          The pivot indices from DGETRF; for 1<=i<=N, row i of the
+*          matrix was interchanged with row IPIV(i).
+*
+*  WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+*          On exit, if INFO=0, then WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.  LWORK >= max(1,N).
+*          For optimal performance LWORK >= N*NB, where NB is
+*          the optimal blocksize returned by ILAENV.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, U(i,i) is exactly zero; the matrix is
+*                singular and its inverse could not be computed.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            I, IWS, J, JB, JJ, JP, LDWORK, LWKOPT, NB,
+     $                   NBMIN, NN
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DGEMM, DGEMV, DSWAP, DTRSM, DTRTRI, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      NB = ILAENV( 1, 'DGETRI', ' ', N, -1, -1, -1 )
+      LWKOPT = N*NB
+      WORK( 1 ) = LWKOPT
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( N.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -3
+      ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN
+         INFO = -6
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DGETRI', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Form inv(U).  If INFO > 0 from DTRTRI, then U is singular,
+*     and the inverse is not computed.
+*
+      CALL DTRTRI( 'Upper', 'Non-unit', N, A, LDA, INFO )
+      IF( INFO.GT.0 )
+     $   RETURN
+*
+      NBMIN = 2
+      LDWORK = N
+      IF( NB.GT.1 .AND. NB.LT.N ) THEN
+         IWS = MAX( LDWORK*NB, 1 )
+         IF( LWORK.LT.IWS ) THEN
+            NB = LWORK / LDWORK
+            NBMIN = MAX( 2, ILAENV( 2, 'DGETRI', ' ', N, -1, -1, -1 ) )
+         END IF
+      ELSE
+         IWS = N
+      END IF
+*
+*     Solve the equation inv(A)*L = inv(U) for inv(A).
+*
+      IF( NB.LT.NBMIN .OR. NB.GE.N ) THEN
+*
+*        Use unblocked code.
+*
+         DO 20 J = N, 1, -1
+*
+*           Copy current column of L to WORK and replace with zeros.
+*
+            DO 10 I = J + 1, N
+               WORK( I ) = A( I, J )
+               A( I, J ) = ZERO
+   10       CONTINUE
+*
+*           Compute current column of inv(A).
+*
+            IF( J.LT.N )
+     $         CALL DGEMV( 'No transpose', N, N-J, -ONE, A( 1, J+1 ),
+     $                     LDA, WORK( J+1 ), 1, ONE, A( 1, J ), 1 )
+   20    CONTINUE
+      ELSE
+*
+*        Use blocked code.
+*
+         NN = ( ( N-1 ) / NB )*NB + 1
+         DO 50 J = NN, 1, -NB
+            JB = MIN( NB, N-J+1 )
+*
+*           Copy current block column of L to WORK and replace with
+*           zeros.
+*
+            DO 40 JJ = J, J + JB - 1
+               DO 30 I = JJ + 1, N
+                  WORK( I+( JJ-J )*LDWORK ) = A( I, JJ )
+                  A( I, JJ ) = ZERO
+   30          CONTINUE
+   40       CONTINUE
+*
+*           Compute current block column of inv(A).
+*
+            IF( J+JB.LE.N )
+     $         CALL DGEMM( 'No transpose', 'No transpose', N, JB,
+     $                     N-J-JB+1, -ONE, A( 1, J+JB ), LDA,
+     $                     WORK( J+JB ), LDWORK, ONE, A( 1, J ), LDA )
+            CALL DTRSM( 'Right', 'Lower', 'No transpose', 'Unit', N, JB,
+     $                  ONE, WORK( J ), LDWORK, A( 1, J ), LDA )
+   50    CONTINUE
+      END IF
+*
+*     Apply column interchanges.
+*
+      DO 60 J = N - 1, 1, -1
+         JP = IPIV( J )
+         IF( JP.NE.J )
+     $      CALL DSWAP( N, A( 1, J ), 1, A( 1, JP ), 1 )
+   60 CONTINUE
+*
+      WORK( 1 ) = IWS
+      RETURN
+*
+*     End of DGETRI
+*
+      END
diff --git a/costa/native/external/lapack/dgetrs.f b/costa/native/external/lapack/dgetrs.f
new file mode 100644
index 000000000..652257634
--- /dev/null
+++ b/costa/native/external/lapack/dgetrs.f
@@ -0,0 +1,150 @@
+      SUBROUTINE DGETRS( TRANS, N, NRHS, A, LDA, IPIV, B, LDB, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993
+*
+*     .. Scalar Arguments ..
+      CHARACTER          TRANS
+      INTEGER            INFO, LDA, LDB, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      DOUBLE PRECISION   A( LDA, * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DGETRS solves a system of linear equations
+*     A * X = B  or  A' * X = B
+*  with a general N-by-N matrix A using the LU factorization computed
+*  by DGETRF.
+*
+*  Arguments
+*  =========
+*
+*  TRANS   (input) CHARACTER*1
+*          Specifies the form of the system of equations:
+*          = 'N':  A * X = B  (No transpose)
+*          = 'T':  A'* X = B  (Transpose)
+*          = 'C':  A'* X = B  (Conjugate transpose = Transpose)
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  A       (input) DOUBLE PRECISION array, dimension (LDA,N)
+*          The factors L and U from the factorization A = P*L*U
+*          as computed by DGETRF.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  IPIV    (input) INTEGER array, dimension (N)
+*          The pivot indices from DGETRF; for 1<=i<=N, row i of the
+*          matrix was interchanged with row IPIV(i).
+*
+*  B       (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS)
+*          On entry, the right hand side matrix B.
+*          On exit, the solution matrix X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE
+      PARAMETER          ( ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOTRAN
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLASWP, DTRSM, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      NOTRAN = LSAME( TRANS, 'N' )
+      IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT.
+     $    LSAME( TRANS, 'C' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -8
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DGETRS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 )
+     $   RETURN
+*
+      IF( NOTRAN ) THEN
+*
+*        Solve A * X = B.
+*
+*        Apply row interchanges to the right hand sides.
+*
+         CALL DLASWP( NRHS, B, LDB, 1, N, IPIV, 1 )
+*
+*        Solve L*X = B, overwriting B with X.
+*
+         CALL DTRSM( 'Left', 'Lower', 'No transpose', 'Unit', N, NRHS,
+     $               ONE, A, LDA, B, LDB )
+*
+*        Solve U*X = B, overwriting B with X.
+*
+         CALL DTRSM( 'Left', 'Upper', 'No transpose', 'Non-unit', N,
+     $               NRHS, ONE, A, LDA, B, LDB )
+      ELSE
+*
+*        Solve A' * X = B.
+*
+*        Solve U'*X = B, overwriting B with X.
+*
+         CALL DTRSM( 'Left', 'Upper', 'Transpose', 'Non-unit', N, NRHS,
+     $               ONE, A, LDA, B, LDB )
+*
+*        Solve L'*X = B, overwriting B with X.
+*
+         CALL DTRSM( 'Left', 'Lower', 'Transpose', 'Unit', N, NRHS, ONE,
+     $               A, LDA, B, LDB )
+*
+*        Apply row interchanges to the solution vectors.
+*
+         CALL DLASWP( NRHS, B, LDB, 1, N, IPIV, -1 )
+      END IF
+*
+      RETURN
+*
+*     End of DGETRS
+*
+      END
diff --git a/costa/native/external/lapack/dggbak.f b/costa/native/external/lapack/dggbak.f
new file mode 100644
index 000000000..75ffd51e3
--- /dev/null
+++ b/costa/native/external/lapack/dggbak.f
@@ -0,0 +1,216 @@
+      SUBROUTINE DGGBAK( JOB, SIDE, N, ILO, IHI, LSCALE, RSCALE, M, V,
+     $                   LDV, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOB, SIDE
+      INTEGER            IHI, ILO, INFO, LDV, M, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   LSCALE( * ), RSCALE( * ), V( LDV, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DGGBAK forms the right or left eigenvectors of a real generalized
+*  eigenvalue problem A*x = lambda*B*x, by backward transformation on
+*  the computed eigenvectors of the balanced pair of matrices output by
+*  DGGBAL.
+*
+*  Arguments
+*  =========
+*
+*  JOB     (input) CHARACTER*1
+*          Specifies the type of backward transformation required:
+*          = 'N':  do nothing, return immediately;
+*          = 'P':  do backward transformation for permutation only;
+*          = 'S':  do backward transformation for scaling only;
+*          = 'B':  do backward transformations for both permutation and
+*                  scaling.
+*          JOB must be the same as the argument JOB supplied to DGGBAL.
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'R':  V contains right eigenvectors;
+*          = 'L':  V contains left eigenvectors.
+*
+*  N       (input) INTEGER
+*          The number of rows of the matrix V.  N >= 0.
+*
+*  ILO     (input) INTEGER
+*  IHI     (input) INTEGER
+*          The integers ILO and IHI determined by DGGBAL.
+*          1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
+*
+*  LSCALE  (input) DOUBLE PRECISION array, dimension (N)
+*          Details of the permutations and/or scaling factors applied
+*          to the left side of A and B, as returned by DGGBAL.
+*
+*  RSCALE  (input) DOUBLE PRECISION array, dimension (N)
+*          Details of the permutations and/or scaling factors applied
+*          to the right side of A and B, as returned by DGGBAL.
+*
+*  M       (input) INTEGER
+*          The number of columns of the matrix V.  M >= 0.
+*
+*  V       (input/output) DOUBLE PRECISION array, dimension (LDV,M)
+*          On entry, the matrix of right or left eigenvectors to be
+*          transformed, as returned by DTGEVC.
+*          On exit, V is overwritten by the transformed eigenvectors.
+*
+*  LDV     (input) INTEGER
+*          The leading dimension of the matrix V. LDV >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*
+*  Further Details
+*  ===============
+*
+*  See R.C. Ward, Balancing the generalized eigenvalue problem,
+*                 SIAM J. Sci. Stat. Comp. 2 (1981), 141-152.
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            LEFTV, RIGHTV
+      INTEGER            I, K
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DSCAL, DSWAP, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters
+*
+      RIGHTV = LSAME( SIDE, 'R' )
+      LEFTV = LSAME( SIDE, 'L' )
+*
+      INFO = 0
+      IF( .NOT.LSAME( JOB, 'N' ) .AND. .NOT.LSAME( JOB, 'P' ) .AND.
+     $    .NOT.LSAME( JOB, 'S' ) .AND. .NOT.LSAME( JOB, 'B' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.RIGHTV .AND. .NOT.LEFTV ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( ILO.LT.1 ) THEN
+         INFO = -4
+      ELSE IF( IHI.LT.ILO .OR. IHI.GT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -6
+      ELSE IF( LDV.LT.MAX( 1, N ) ) THEN
+         INFO = -10
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DGGBAK', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+      IF( M.EQ.0 )
+     $   RETURN
+      IF( LSAME( JOB, 'N' ) )
+     $   RETURN
+*
+      IF( ILO.EQ.IHI )
+     $   GO TO 30
+*
+*     Backward balance
+*
+      IF( LSAME( JOB, 'S' ) .OR. LSAME( JOB, 'B' ) ) THEN
+*
+*        Backward transformation on right eigenvectors
+*
+         IF( RIGHTV ) THEN
+            DO 10 I = ILO, IHI
+               CALL DSCAL( M, RSCALE( I ), V( I, 1 ), LDV )
+   10       CONTINUE
+         END IF
+*
+*        Backward transformation on left eigenvectors
+*
+         IF( LEFTV ) THEN
+            DO 20 I = ILO, IHI
+               CALL DSCAL( M, LSCALE( I ), V( I, 1 ), LDV )
+   20       CONTINUE
+         END IF
+      END IF
+*
+*     Backward permutation
+*
+   30 CONTINUE
+      IF( LSAME( JOB, 'P' ) .OR. LSAME( JOB, 'B' ) ) THEN
+*
+*        Backward permutation on right eigenvectors
+*
+         IF( RIGHTV ) THEN
+            IF( ILO.EQ.1 )
+     $         GO TO 50
+*
+            DO 40 I = ILO - 1, 1, -1
+               K = RSCALE( I )
+               IF( K.EQ.I )
+     $            GO TO 40
+               CALL DSWAP( M, V( I, 1 ), LDV, V( K, 1 ), LDV )
+   40       CONTINUE
+*
+   50       CONTINUE
+            IF( IHI.EQ.N )
+     $         GO TO 70
+            DO 60 I = IHI + 1, N
+               K = RSCALE( I )
+               IF( K.EQ.I )
+     $            GO TO 60
+               CALL DSWAP( M, V( I, 1 ), LDV, V( K, 1 ), LDV )
+   60       CONTINUE
+         END IF
+*
+*        Backward permutation on left eigenvectors
+*
+   70    CONTINUE
+         IF( LEFTV ) THEN
+            IF( ILO.EQ.1 )
+     $         GO TO 90
+            DO 80 I = ILO - 1, 1, -1
+               K = LSCALE( I )
+               IF( K.EQ.I )
+     $            GO TO 80
+               CALL DSWAP( M, V( I, 1 ), LDV, V( K, 1 ), LDV )
+   80       CONTINUE
+*
+   90       CONTINUE
+            IF( IHI.EQ.N )
+     $         GO TO 110
+            DO 100 I = IHI + 1, N
+               K = LSCALE( I )
+               IF( K.EQ.I )
+     $            GO TO 100
+               CALL DSWAP( M, V( I, 1 ), LDV, V( K, 1 ), LDV )
+  100       CONTINUE
+         END IF
+      END IF
+*
+  110 CONTINUE
+*
+      RETURN
+*
+*     End of DGGBAK
+*
+      END
diff --git a/costa/native/external/lapack/dggbal.f b/costa/native/external/lapack/dggbal.f
new file mode 100644
index 000000000..03fd54506
--- /dev/null
+++ b/costa/native/external/lapack/dggbal.f
@@ -0,0 +1,461 @@
+      SUBROUTINE DGGBAL( JOB, N, A, LDA, B, LDB, ILO, IHI, LSCALE,
+     $                   RSCALE, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOB
+      INTEGER            IHI, ILO, INFO, LDA, LDB, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), B( LDB, * ), LSCALE( * ),
+     $                   RSCALE( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DGGBAL balances a pair of general real matrices (A,B).  This
+*  involves, first, permuting A and B by similarity transformations to
+*  isolate eigenvalues in the first 1 to ILO$-$1 and last IHI+1 to N
+*  elements on the diagonal; and second, applying a diagonal similarity
+*  transformation to rows and columns ILO to IHI to make the rows
+*  and columns as close in norm as possible. Both steps are optional.
+*
+*  Balancing may reduce the 1-norm of the matrices, and improve the
+*  accuracy of the computed eigenvalues and/or eigenvectors in the
+*  generalized eigenvalue problem A*x = lambda*B*x.
+*
+*  Arguments
+*  =========
+*
+*  JOB     (input) CHARACTER*1
+*          Specifies the operations to be performed on A and B:
+*          = 'N':  none:  simply set ILO = 1, IHI = N, LSCALE(I) = 1.0
+*                  and RSCALE(I) = 1.0 for i = 1,...,N.
+*          = 'P':  permute only;
+*          = 'S':  scale only;
+*          = 'B':  both permute and scale.
+*
+*  N       (input) INTEGER
+*          The order of the matrices A and B.  N >= 0.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+*          On entry, the input matrix A.
+*          On exit,  A is overwritten by the balanced matrix.
+*          If JOB = 'N', A is not referenced.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,N).
+*
+*  B       (input/output) DOUBLE PRECISION array, dimension (LDB,N)
+*          On entry, the input matrix B.
+*          On exit,  B is overwritten by the balanced matrix.
+*          If JOB = 'N', B is not referenced.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B. LDB >= max(1,N).
+*
+*  ILO     (output) INTEGER
+*  IHI     (output) INTEGER
+*          ILO and IHI are set to integers such that on exit
+*          A(i,j) = 0 and B(i,j) = 0 if i > j and
+*          j = 1,...,ILO-1 or i = IHI+1,...,N.
+*          If JOB = 'N' or 'S', ILO = 1 and IHI = N.
+*
+*  LSCALE  (output) DOUBLE PRECISION array, dimension (N)
+*          Details of the permutations and scaling factors applied
+*          to the left side of A and B.  If P(j) is the index of the
+*          row interchanged with row j, and D(j)
+*          is the scaling factor applied to row j, then
+*            LSCALE(j) = P(j)    for J = 1,...,ILO-1
+*                      = D(j)    for J = ILO,...,IHI
+*                      = P(j)    for J = IHI+1,...,N.
+*          The order in which the interchanges are made is N to IHI+1,
+*          then 1 to ILO-1.
+*
+*  RSCALE  (output) DOUBLE PRECISION array, dimension (N)
+*          Details of the permutations and scaling factors applied
+*          to the right side of A and B.  If P(j) is the index of the
+*          column interchanged with column j, and D(j)
+*          is the scaling factor applied to column j, then
+*            LSCALE(j) = P(j)    for J = 1,...,ILO-1
+*                      = D(j)    for J = ILO,...,IHI
+*                      = P(j)    for J = IHI+1,...,N.
+*          The order in which the interchanges are made is N to IHI+1,
+*          then 1 to ILO-1.
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (6*N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*
+*  Further Details
+*  ===============
+*
+*  See R.C. WARD, Balancing the generalized eigenvalue problem,
+*                 SIAM J. Sci. Stat. Comp. 2 (1981), 141-152.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, HALF, ONE
+      PARAMETER          ( ZERO = 0.0D+0, HALF = 0.5D+0, ONE = 1.0D+0 )
+      DOUBLE PRECISION   THREE, SCLFAC
+      PARAMETER          ( THREE = 3.0D+0, SCLFAC = 1.0D+1 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, ICAB, IFLOW, IP1, IR, IRAB, IT, J, JC, JP1,
+     $                   K, KOUNT, L, LCAB, LM1, LRAB, LSFMAX, LSFMIN,
+     $                   M, NR, NRP2
+      DOUBLE PRECISION   ALPHA, BASL, BETA, CAB, CMAX, COEF, COEF2,
+     $                   COEF5, COR, EW, EWC, GAMMA, PGAMMA, RAB, SFMAX,
+     $                   SFMIN, SUM, T, TA, TB, TC
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            IDAMAX
+      DOUBLE PRECISION   DDOT, DLAMCH
+      EXTERNAL           LSAME, IDAMAX, DDOT, DLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DAXPY, DSCAL, DSWAP, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, INT, LOG10, MAX, MIN, SIGN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters
+*
+      INFO = 0
+      IF( .NOT.LSAME( JOB, 'N' ) .AND. .NOT.LSAME( JOB, 'P' ) .AND.
+     $    .NOT.LSAME( JOB, 'S' ) .AND. .NOT.LSAME( JOB, 'B' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -4
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DGGBAL', -INFO )
+         RETURN
+      END IF
+*
+      K = 1
+      L = N
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      IF( LSAME( JOB, 'N' ) ) THEN
+         ILO = 1
+         IHI = N
+         DO 10 I = 1, N
+            LSCALE( I ) = ONE
+            RSCALE( I ) = ONE
+   10    CONTINUE
+         RETURN
+      END IF
+*
+      IF( K.EQ.L ) THEN
+         ILO = 1
+         IHI = 1
+         LSCALE( 1 ) = ONE
+         RSCALE( 1 ) = ONE
+         RETURN
+      END IF
+*
+      IF( LSAME( JOB, 'S' ) )
+     $   GO TO 190
+*
+      GO TO 30
+*
+*     Permute the matrices A and B to isolate the eigenvalues.
+*
+*     Find row with one nonzero in columns 1 through L
+*
+   20 CONTINUE
+      L = LM1
+      IF( L.NE.1 )
+     $   GO TO 30
+*
+      RSCALE( 1 ) = 1
+      LSCALE( 1 ) = 1
+      GO TO 190
+*
+   30 CONTINUE
+      LM1 = L - 1
+      DO 80 I = L, 1, -1
+         DO 40 J = 1, LM1
+            JP1 = J + 1
+            IF( A( I, J ).NE.ZERO .OR. B( I, J ).NE.ZERO )
+     $         GO TO 50
+   40    CONTINUE
+         J = L
+         GO TO 70
+*
+   50    CONTINUE
+         DO 60 J = JP1, L
+            IF( A( I, J ).NE.ZERO .OR. B( I, J ).NE.ZERO )
+     $         GO TO 80
+   60    CONTINUE
+         J = JP1 - 1
+*
+   70    CONTINUE
+         M = L
+         IFLOW = 1
+         GO TO 160
+   80 CONTINUE
+      GO TO 100
+*
+*     Find column with one nonzero in rows K through N
+*
+   90 CONTINUE
+      K = K + 1
+*
+  100 CONTINUE
+      DO 150 J = K, L
+         DO 110 I = K, LM1
+            IP1 = I + 1
+            IF( A( I, J ).NE.ZERO .OR. B( I, J ).NE.ZERO )
+     $         GO TO 120
+  110    CONTINUE
+         I = L
+         GO TO 140
+  120    CONTINUE
+         DO 130 I = IP1, L
+            IF( A( I, J ).NE.ZERO .OR. B( I, J ).NE.ZERO )
+     $         GO TO 150
+  130    CONTINUE
+         I = IP1 - 1
+  140    CONTINUE
+         M = K
+         IFLOW = 2
+         GO TO 160
+  150 CONTINUE
+      GO TO 190
+*
+*     Permute rows M and I
+*
+  160 CONTINUE
+      LSCALE( M ) = I
+      IF( I.EQ.M )
+     $   GO TO 170
+      CALL DSWAP( N-K+1, A( I, K ), LDA, A( M, K ), LDA )
+      CALL DSWAP( N-K+1, B( I, K ), LDB, B( M, K ), LDB )
+*
+*     Permute columns M and J
+*
+  170 CONTINUE
+      RSCALE( M ) = J
+      IF( J.EQ.M )
+     $   GO TO 180
+      CALL DSWAP( L, A( 1, J ), 1, A( 1, M ), 1 )
+      CALL DSWAP( L, B( 1, J ), 1, B( 1, M ), 1 )
+*
+  180 CONTINUE
+      GO TO ( 20, 90 )IFLOW
+*
+  190 CONTINUE
+      ILO = K
+      IHI = L
+*
+      IF( ILO.EQ.IHI )
+     $   RETURN
+*
+      IF( LSAME( JOB, 'P' ) )
+     $   RETURN
+*
+*     Balance the submatrix in rows ILO to IHI.
+*
+      NR = IHI - ILO + 1
+      DO 200 I = ILO, IHI
+         RSCALE( I ) = ZERO
+         LSCALE( I ) = ZERO
+*
+         WORK( I ) = ZERO
+         WORK( I+N ) = ZERO
+         WORK( I+2*N ) = ZERO
+         WORK( I+3*N ) = ZERO
+         WORK( I+4*N ) = ZERO
+         WORK( I+5*N ) = ZERO
+  200 CONTINUE
+*
+*     Compute right side vector in resulting linear equations
+*
+      BASL = LOG10( SCLFAC )
+      DO 240 I = ILO, IHI
+         DO 230 J = ILO, IHI
+            TB = B( I, J )
+            TA = A( I, J )
+            IF( TA.EQ.ZERO )
+     $         GO TO 210
+            TA = LOG10( ABS( TA ) ) / BASL
+  210       CONTINUE
+            IF( TB.EQ.ZERO )
+     $         GO TO 220
+            TB = LOG10( ABS( TB ) ) / BASL
+  220       CONTINUE
+            WORK( I+4*N ) = WORK( I+4*N ) - TA - TB
+            WORK( J+5*N ) = WORK( J+5*N ) - TA - TB
+  230    CONTINUE
+  240 CONTINUE
+*
+      COEF = ONE / DBLE( 2*NR )
+      COEF2 = COEF*COEF
+      COEF5 = HALF*COEF2
+      NRP2 = NR + 2
+      BETA = ZERO
+      IT = 1
+*
+*     Start generalized conjugate gradient iteration
+*
+  250 CONTINUE
+*
+      GAMMA = DDOT( NR, WORK( ILO+4*N ), 1, WORK( ILO+4*N ), 1 ) +
+     $        DDOT( NR, WORK( ILO+5*N ), 1, WORK( ILO+5*N ), 1 )
+*
+      EW = ZERO
+      EWC = ZERO
+      DO 260 I = ILO, IHI
+         EW = EW + WORK( I+4*N )
+         EWC = EWC + WORK( I+5*N )
+  260 CONTINUE
+*
+      GAMMA = COEF*GAMMA - COEF2*( EW**2+EWC**2 ) - COEF5*( EW-EWC )**2
+      IF( GAMMA.EQ.ZERO )
+     $   GO TO 350
+      IF( IT.NE.1 )
+     $   BETA = GAMMA / PGAMMA
+      T = COEF5*( EWC-THREE*EW )
+      TC = COEF5*( EW-THREE*EWC )
+*
+      CALL DSCAL( NR, BETA, WORK( ILO ), 1 )
+      CALL DSCAL( NR, BETA, WORK( ILO+N ), 1 )
+*
+      CALL DAXPY( NR, COEF, WORK( ILO+4*N ), 1, WORK( ILO+N ), 1 )
+      CALL DAXPY( NR, COEF, WORK( ILO+5*N ), 1, WORK( ILO ), 1 )
+*
+      DO 270 I = ILO, IHI
+         WORK( I ) = WORK( I ) + TC
+         WORK( I+N ) = WORK( I+N ) + T
+  270 CONTINUE
+*
+*     Apply matrix to vector
+*
+      DO 300 I = ILO, IHI
+         KOUNT = 0
+         SUM = ZERO
+         DO 290 J = ILO, IHI
+            IF( A( I, J ).EQ.ZERO )
+     $         GO TO 280
+            KOUNT = KOUNT + 1
+            SUM = SUM + WORK( J )
+  280       CONTINUE
+            IF( B( I, J ).EQ.ZERO )
+     $         GO TO 290
+            KOUNT = KOUNT + 1
+            SUM = SUM + WORK( J )
+  290    CONTINUE
+         WORK( I+2*N ) = DBLE( KOUNT )*WORK( I+N ) + SUM
+  300 CONTINUE
+*
+      DO 330 J = ILO, IHI
+         KOUNT = 0
+         SUM = ZERO
+         DO 320 I = ILO, IHI
+            IF( A( I, J ).EQ.ZERO )
+     $         GO TO 310
+            KOUNT = KOUNT + 1
+            SUM = SUM + WORK( I+N )
+  310       CONTINUE
+            IF( B( I, J ).EQ.ZERO )
+     $         GO TO 320
+            KOUNT = KOUNT + 1
+            SUM = SUM + WORK( I+N )
+  320    CONTINUE
+         WORK( J+3*N ) = DBLE( KOUNT )*WORK( J ) + SUM
+  330 CONTINUE
+*
+      SUM = DDOT( NR, WORK( ILO+N ), 1, WORK( ILO+2*N ), 1 ) +
+     $      DDOT( NR, WORK( ILO ), 1, WORK( ILO+3*N ), 1 )
+      ALPHA = GAMMA / SUM
+*
+*     Determine correction to current iteration
+*
+      CMAX = ZERO
+      DO 340 I = ILO, IHI
+         COR = ALPHA*WORK( I+N )
+         IF( ABS( COR ).GT.CMAX )
+     $      CMAX = ABS( COR )
+         LSCALE( I ) = LSCALE( I ) + COR
+         COR = ALPHA*WORK( I )
+         IF( ABS( COR ).GT.CMAX )
+     $      CMAX = ABS( COR )
+         RSCALE( I ) = RSCALE( I ) + COR
+  340 CONTINUE
+      IF( CMAX.LT.HALF )
+     $   GO TO 350
+*
+      CALL DAXPY( NR, -ALPHA, WORK( ILO+2*N ), 1, WORK( ILO+4*N ), 1 )
+      CALL DAXPY( NR, -ALPHA, WORK( ILO+3*N ), 1, WORK( ILO+5*N ), 1 )
+*
+      PGAMMA = GAMMA
+      IT = IT + 1
+      IF( IT.LE.NRP2 )
+     $   GO TO 250
+*
+*     End generalized conjugate gradient iteration
+*
+  350 CONTINUE
+      SFMIN = DLAMCH( 'S' )
+      SFMAX = ONE / SFMIN
+      LSFMIN = INT( LOG10( SFMIN ) / BASL+ONE )
+      LSFMAX = INT( LOG10( SFMAX ) / BASL )
+      DO 360 I = ILO, IHI
+         IRAB = IDAMAX( N-ILO+1, A( I, ILO ), LDA )
+         RAB = ABS( A( I, IRAB+ILO-1 ) )
+         IRAB = IDAMAX( N-ILO+1, B( I, ILO ), LDA )
+         RAB = MAX( RAB, ABS( B( I, IRAB+ILO-1 ) ) )
+         LRAB = INT( LOG10( RAB+SFMIN ) / BASL+ONE )
+         IR = LSCALE( I ) + SIGN( HALF, LSCALE( I ) )
+         IR = MIN( MAX( IR, LSFMIN ), LSFMAX, LSFMAX-LRAB )
+         LSCALE( I ) = SCLFAC**IR
+         ICAB = IDAMAX( IHI, A( 1, I ), 1 )
+         CAB = ABS( A( ICAB, I ) )
+         ICAB = IDAMAX( IHI, B( 1, I ), 1 )
+         CAB = MAX( CAB, ABS( B( ICAB, I ) ) )
+         LCAB = INT( LOG10( CAB+SFMIN ) / BASL+ONE )
+         JC = RSCALE( I ) + SIGN( HALF, RSCALE( I ) )
+         JC = MIN( MAX( JC, LSFMIN ), LSFMAX, LSFMAX-LCAB )
+         RSCALE( I ) = SCLFAC**JC
+  360 CONTINUE
+*
+*     Row scaling of matrices A and B
+*
+      DO 370 I = ILO, IHI
+         CALL DSCAL( N-ILO+1, LSCALE( I ), A( I, ILO ), LDA )
+         CALL DSCAL( N-ILO+1, LSCALE( I ), B( I, ILO ), LDB )
+  370 CONTINUE
+*
+*     Column scaling of matrices A and B
+*
+      DO 380 J = ILO, IHI
+         CALL DSCAL( IHI, RSCALE( J ), A( 1, J ), 1 )
+         CALL DSCAL( IHI, RSCALE( J ), B( 1, J ), 1 )
+  380 CONTINUE
+*
+      RETURN
+*
+*     End of DGGBAL
+*
+      END
diff --git a/costa/native/external/lapack/dgges.f b/costa/native/external/lapack/dgges.f
new file mode 100644
index 000000000..ce29aa527
--- /dev/null
+++ b/costa/native/external/lapack/dgges.f
@@ -0,0 +1,550 @@
+      SUBROUTINE DGGES( JOBVSL, JOBVSR, SORT, DELCTG, N, A, LDA, B, LDB,
+     $                  SDIM, ALPHAR, ALPHAI, BETA, VSL, LDVSL, VSR,
+     $                  LDVSR, WORK, LWORK, BWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBVSL, JOBVSR, SORT
+      INTEGER            INFO, LDA, LDB, LDVSL, LDVSR, LWORK, N, SDIM
+*     ..
+*     .. Array Arguments ..
+      LOGICAL            BWORK( * )
+      DOUBLE PRECISION   A( LDA, * ), ALPHAI( * ), ALPHAR( * ),
+     $                   B( LDB, * ), BETA( * ), VSL( LDVSL, * ),
+     $                   VSR( LDVSR, * ), WORK( * )
+*     ..
+*     .. Function Arguments ..
+      LOGICAL            DELCTG
+      EXTERNAL           DELCTG
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DGGES computes for a pair of N-by-N real nonsymmetric matrices (A,B),
+*  the generalized eigenvalues, the generalized real Schur form (S,T),
+*  optionally, the left and/or right matrices of Schur vectors (VSL and
+*  VSR). This gives the generalized Schur factorization
+*
+*           (A,B) = ( (VSL)*S*(VSR)**T, (VSL)*T*(VSR)**T )
+*
+*  Optionally, it also orders the eigenvalues so that a selected cluster
+*  of eigenvalues appears in the leading diagonal blocks of the upper
+*  quasi-triangular matrix S and the upper triangular matrix T.The
+*  leading columns of VSL and VSR then form an orthonormal basis for the
+*  corresponding left and right eigenspaces (deflating subspaces).
+*
+*  (If only the generalized eigenvalues are needed, use the driver
+*  DGGEV instead, which is faster.)
+*
+*  A generalized eigenvalue for a pair of matrices (A,B) is a scalar w
+*  or a ratio alpha/beta = w, such that  A - w*B is singular.  It is
+*  usually represented as the pair (alpha,beta), as there is a
+*  reasonable interpretation for beta=0 or both being zero.
+*
+*  A pair of matrices (S,T) is in generalized real Schur form if T is
+*  upper triangular with non-negative diagonal and S is block upper
+*  triangular with 1-by-1 and 2-by-2 blocks.  1-by-1 blocks correspond
+*  to real generalized eigenvalues, while 2-by-2 blocks of S will be
+*  "standardized" by making the corresponding elements of T have the
+*  form:
+*          [  a  0  ]
+*          [  0  b  ]
+*
+*  and the pair of corresponding 2-by-2 blocks in S and T will have a
+*  complex conjugate pair of generalized eigenvalues.
+*
+*
+*  Arguments
+*  =========
+*
+*  JOBVSL  (input) CHARACTER*1
+*          = 'N':  do not compute the left Schur vectors;
+*          = 'V':  compute the left Schur vectors.
+*
+*  JOBVSR  (input) CHARACTER*1
+*          = 'N':  do not compute the right Schur vectors;
+*          = 'V':  compute the right Schur vectors.
+*
+*  SORT    (input) CHARACTER*1
+*          Specifies whether or not to order the eigenvalues on the
+*          diagonal of the generalized Schur form.
+*          = 'N':  Eigenvalues are not ordered;
+*          = 'S':  Eigenvalues are ordered (see DELZTG);
+*
+*  DELZTG  (input) LOGICAL FUNCTION of three DOUBLE PRECISION arguments
+*          DELZTG must be declared EXTERNAL in the calling subroutine.
+*          If SORT = 'N', DELZTG is not referenced.
+*          If SORT = 'S', DELZTG is used to select eigenvalues to sort
+*          to the top left of the Schur form.
+*          An eigenvalue (ALPHAR(j)+ALPHAI(j))/BETA(j) is selected if
+*          DELZTG(ALPHAR(j),ALPHAI(j),BETA(j)) is true; i.e. if either
+*          one of a complex conjugate pair of eigenvalues is selected,
+*          then both complex eigenvalues are selected.
+*
+*          Note that in the ill-conditioned case, a selected complex
+*          eigenvalue may no longer satisfy DELZTG(ALPHAR(j),ALPHAI(j),
+*          BETA(j)) = .TRUE. after ordering. INFO is to be set to N+2
+*          in this case.
+*
+*  N       (input) INTEGER
+*          The order of the matrices A, B, VSL, and VSR.  N >= 0.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA, N)
+*          On entry, the first of the pair of matrices.
+*          On exit, A has been overwritten by its generalized Schur
+*          form S.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of A.  LDA >= max(1,N).
+*
+*  B       (input/output) DOUBLE PRECISION array, dimension (LDB, N)
+*          On entry, the second of the pair of matrices.
+*          On exit, B has been overwritten by its generalized Schur
+*          form T.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of B.  LDB >= max(1,N).
+*
+*  SDIM    (output) INTEGER
+*          If SORT = 'N', SDIM = 0.
+*          If SORT = 'S', SDIM = number of eigenvalues (after sorting)
+*          for which DELZTG is true.  (Complex conjugate pairs for which
+*          DELZTG is true for either eigenvalue count as 2.)
+*
+*  ALPHAR  (output) DOUBLE PRECISION array, dimension (N)
+*  ALPHAI  (output) DOUBLE PRECISION array, dimension (N)
+*  BETA    (output) DOUBLE PRECISION array, dimension (N)
+*          On exit, (ALPHAR(j) + ALPHAI(j)*i)/BETA(j), j=1,...,N, will
+*          be the generalized eigenvalues.  ALPHAR(j) + ALPHAI(j)*i,
+*          and  BETA(j),j=1,...,N are the diagonals of the complex Schur
+*          form (S,T) that would result if the 2-by-2 diagonal blocks of
+*          the real Schur form of (A,B) were further reduced to
+*          triangular form using 2-by-2 complex unitary transformations.
+*          If ALPHAI(j) is zero, then the j-th eigenvalue is real; if
+*          positive, then the j-th and (j+1)-st eigenvalues are a
+*          complex conjugate pair, with ALPHAI(j+1) negative.
+*
+*          Note: the quotients ALPHAR(j)/BETA(j) and ALPHAI(j)/BETA(j)
+*          may easily over- or underflow, and BETA(j) may even be zero.
+*          Thus, the user should avoid naively computing the ratio.
+*          However, ALPHAR and ALPHAI will be always less than and
+*          usually comparable with norm(A) in magnitude, and BETA always
+*          less than and usually comparable with norm(B).
+*
+*  VSL     (output) DOUBLE PRECISION array, dimension (LDVSL,N)
+*          If JOBVSL = 'V', VSL will contain the left Schur vectors.
+*          Not referenced if JOBVSL = 'N'.
+*
+*  LDVSL   (input) INTEGER
+*          The leading dimension of the matrix VSL. LDVSL >=1, and
+*          if JOBVSL = 'V', LDVSL >= N.
+*
+*  VSR     (output) DOUBLE PRECISION array, dimension (LDVSR,N)
+*          If JOBVSR = 'V', VSR will contain the right Schur vectors.
+*          Not referenced if JOBVSR = 'N'.
+*
+*  LDVSR   (input) INTEGER
+*          The leading dimension of the matrix VSR. LDVSR >= 1, and
+*          if JOBVSR = 'V', LDVSR >= N.
+*
+*  WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.  LWORK >= 8*N+16.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  BWORK   (workspace) LOGICAL array, dimension (N)
+*          Not referenced if SORT = 'N'.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          = 1,...,N:
+*                The QZ iteration failed.  (A,B) are not in Schur
+*                form, but ALPHAR(j), ALPHAI(j), and BETA(j) should
+*                be correct for j=INFO+1,...,N.
+*          > N:  =N+1: other than QZ iteration failed in DHGEQZ.
+*                =N+2: after reordering, roundoff changed values of
+*                      some complex eigenvalues so that leading
+*                      eigenvalues in the Generalized Schur form no
+*                      longer satisfy DELZTG=.TRUE.  This could also
+*                      be caused due to scaling.
+*                =N+3: reordering failed in DTGSEN.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            CURSL, ILASCL, ILBSCL, ILVSL, ILVSR, LASTSL,
+     $                   LQUERY, LST2SL, WANTST
+      INTEGER            I, ICOLS, IERR, IHI, IJOBVL, IJOBVR, ILEFT,
+     $                   ILO, IP, IRIGHT, IROWS, ITAU, IWRK, MAXWRK,
+     $                   MINWRK
+      DOUBLE PRECISION   ANRM, ANRMTO, BIGNUM, BNRM, BNRMTO, EPS, PVSL,
+     $                   PVSR, SAFMAX, SAFMIN, SMLNUM
+*     ..
+*     .. Local Arrays ..
+      INTEGER            IDUM( 1 )
+      DOUBLE PRECISION   DIF( 2 )
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DGEQRF, DGGBAK, DGGBAL, DGGHRD, DHGEQZ, DLABAD,
+     $                   DLACPY, DLASCL, DLASET, DORGQR, DORMQR, DTGSEN,
+     $                   XERBLA
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      DOUBLE PRECISION   DLAMCH, DLANGE
+      EXTERNAL           LSAME, ILAENV, DLAMCH, DLANGE
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode the input arguments
+*
+      IF( LSAME( JOBVSL, 'N' ) ) THEN
+         IJOBVL = 1
+         ILVSL = .FALSE.
+      ELSE IF( LSAME( JOBVSL, 'V' ) ) THEN
+         IJOBVL = 2
+         ILVSL = .TRUE.
+      ELSE
+         IJOBVL = -1
+         ILVSL = .FALSE.
+      END IF
+*
+      IF( LSAME( JOBVSR, 'N' ) ) THEN
+         IJOBVR = 1
+         ILVSR = .FALSE.
+      ELSE IF( LSAME( JOBVSR, 'V' ) ) THEN
+         IJOBVR = 2
+         ILVSR = .TRUE.
+      ELSE
+         IJOBVR = -1
+         ILVSR = .FALSE.
+      END IF
+*
+      WANTST = LSAME( SORT, 'S' )
+*
+*     Test the input arguments
+*
+      INFO = 0
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( IJOBVL.LE.0 ) THEN
+         INFO = -1
+      ELSE IF( IJOBVR.LE.0 ) THEN
+         INFO = -2
+      ELSE IF( ( .NOT.WANTST ) .AND. ( .NOT.LSAME( SORT, 'N' ) ) ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -9
+      ELSE IF( LDVSL.LT.1 .OR. ( ILVSL .AND. LDVSL.LT.N ) ) THEN
+         INFO = -15
+      ELSE IF( LDVSR.LT.1 .OR. ( ILVSR .AND. LDVSR.LT.N ) ) THEN
+         INFO = -17
+      END IF
+*
+*     Compute workspace
+*      (Note: Comments in the code beginning "Workspace:" describe the
+*       minimal amount of workspace needed at that point in the code,
+*       as well as the preferred amount for good performance.
+*       NB refers to the optimal block size for the immediately
+*       following subroutine, as returned by ILAENV.)
+*
+      MINWRK = 1
+      IF( INFO.EQ.0 .AND. ( LWORK.GE.1 .OR. LQUERY ) ) THEN
+         MINWRK = 7*( N+1 ) + 16
+         MAXWRK = 7*( N+1 ) + N*ILAENV( 1, 'DGEQRF', ' ', N, 1, N, 0 ) +
+     $            16
+         IF( ILVSL ) THEN
+            MAXWRK = MAX( MAXWRK, 7*( N+1 )+N*
+     $               ILAENV( 1, 'DORGQR', ' ', N, 1, N, -1 ) )
+         END IF
+         WORK( 1 ) = MAXWRK
+      END IF
+*
+      IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY )
+     $   INFO = -19
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DGGES ', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 ) THEN
+         SDIM = 0
+         RETURN
+      END IF
+*
+*     Get machine constants
+*
+      EPS = DLAMCH( 'P' )
+      SAFMIN = DLAMCH( 'S' )
+      SAFMAX = ONE / SAFMIN
+      CALL DLABAD( SAFMIN, SAFMAX )
+      SMLNUM = SQRT( SAFMIN ) / EPS
+      BIGNUM = ONE / SMLNUM
+*
+*     Scale A if max element outside range [SMLNUM,BIGNUM]
+*
+      ANRM = DLANGE( 'M', N, N, A, LDA, WORK )
+      ILASCL = .FALSE.
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
+         ANRMTO = SMLNUM
+         ILASCL = .TRUE.
+      ELSE IF( ANRM.GT.BIGNUM ) THEN
+         ANRMTO = BIGNUM
+         ILASCL = .TRUE.
+      END IF
+      IF( ILASCL )
+     $   CALL DLASCL( 'G', 0, 0, ANRM, ANRMTO, N, N, A, LDA, IERR )
+*
+*     Scale B if max element outside range [SMLNUM,BIGNUM]
+*
+      BNRM = DLANGE( 'M', N, N, B, LDB, WORK )
+      ILBSCL = .FALSE.
+      IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN
+         BNRMTO = SMLNUM
+         ILBSCL = .TRUE.
+      ELSE IF( BNRM.GT.BIGNUM ) THEN
+         BNRMTO = BIGNUM
+         ILBSCL = .TRUE.
+      END IF
+      IF( ILBSCL )
+     $   CALL DLASCL( 'G', 0, 0, BNRM, BNRMTO, N, N, B, LDB, IERR )
+*
+*     Permute the matrix to make it more nearly triangular
+*     (Workspace: need 6*N + 2*N space for storing balancing factors)
+*
+      ILEFT = 1
+      IRIGHT = N + 1
+      IWRK = IRIGHT + N
+      CALL DGGBAL( 'P', N, A, LDA, B, LDB, ILO, IHI, WORK( ILEFT ),
+     $             WORK( IRIGHT ), WORK( IWRK ), IERR )
+*
+*     Reduce B to triangular form (QR decomposition of B)
+*     (Workspace: need N, prefer N*NB)
+*
+      IROWS = IHI + 1 - ILO
+      ICOLS = N + 1 - ILO
+      ITAU = IWRK
+      IWRK = ITAU + IROWS
+      CALL DGEQRF( IROWS, ICOLS, B( ILO, ILO ), LDB, WORK( ITAU ),
+     $             WORK( IWRK ), LWORK+1-IWRK, IERR )
+*
+*     Apply the orthogonal transformation to matrix A
+*     (Workspace: need N, prefer N*NB)
+*
+      CALL DORMQR( 'L', 'T', IROWS, ICOLS, IROWS, B( ILO, ILO ), LDB,
+     $             WORK( ITAU ), A( ILO, ILO ), LDA, WORK( IWRK ),
+     $             LWORK+1-IWRK, IERR )
+*
+*     Initialize VSL
+*     (Workspace: need N, prefer N*NB)
+*
+      IF( ILVSL ) THEN
+         CALL DLASET( 'Full', N, N, ZERO, ONE, VSL, LDVSL )
+         CALL DLACPY( 'L', IROWS-1, IROWS-1, B( ILO+1, ILO ), LDB,
+     $                VSL( ILO+1, ILO ), LDVSL )
+         CALL DORGQR( IROWS, IROWS, IROWS, VSL( ILO, ILO ), LDVSL,
+     $                WORK( ITAU ), WORK( IWRK ), LWORK+1-IWRK, IERR )
+      END IF
+*
+*     Initialize VSR
+*
+      IF( ILVSR )
+     $   CALL DLASET( 'Full', N, N, ZERO, ONE, VSR, LDVSR )
+*
+*     Reduce to generalized Hessenberg form
+*     (Workspace: none needed)
+*
+      CALL DGGHRD( JOBVSL, JOBVSR, N, ILO, IHI, A, LDA, B, LDB, VSL,
+     $             LDVSL, VSR, LDVSR, IERR )
+*
+*     Perform QZ algorithm, computing Schur vectors if desired
+*     (Workspace: need N)
+*
+      IWRK = ITAU
+      CALL DHGEQZ( 'S', JOBVSL, JOBVSR, N, ILO, IHI, A, LDA, B, LDB,
+     $             ALPHAR, ALPHAI, BETA, VSL, LDVSL, VSR, LDVSR,
+     $             WORK( IWRK ), LWORK+1-IWRK, IERR )
+      IF( IERR.NE.0 ) THEN
+         IF( IERR.GT.0 .AND. IERR.LE.N ) THEN
+            INFO = IERR
+         ELSE IF( IERR.GT.N .AND. IERR.LE.2*N ) THEN
+            INFO = IERR - N
+         ELSE
+            INFO = N + 1
+         END IF
+         GO TO 50
+      END IF
+*
+*     Sort eigenvalues ALPHA/BETA if desired
+*     (Workspace: need 4*N+16 )
+*
+      SDIM = 0
+      IF( WANTST ) THEN
+*
+*        Undo scaling on eigenvalues before DELZTGing
+*
+         IF( ILASCL ) THEN
+            CALL DLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHAR, N,
+     $                   IERR )
+            CALL DLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHAI, N,
+     $                   IERR )
+         END IF
+         IF( ILBSCL )
+     $      CALL DLASCL( 'G', 0, 0, BNRMTO, BNRM, N, 1, BETA, N, IERR )
+*
+*        Select eigenvalues
+*
+         DO 10 I = 1, N
+            BWORK( I ) = DELCTG( ALPHAR( I ), ALPHAI( I ), BETA( I ) )
+   10    CONTINUE
+*
+         CALL DTGSEN( 0, ILVSL, ILVSR, BWORK, N, A, LDA, B, LDB, ALPHAR,
+     $                ALPHAI, BETA, VSL, LDVSL, VSR, LDVSR, SDIM, PVSL,
+     $                PVSR, DIF, WORK( IWRK ), LWORK-IWRK+1, IDUM, 1,
+     $                IERR )
+         IF( IERR.EQ.1 )
+     $      INFO = N + 3
+*
+      END IF
+*
+*     Apply back-permutation to VSL and VSR
+*     (Workspace: none needed)
+*
+      IF( ILVSL )
+     $   CALL DGGBAK( 'P', 'L', N, ILO, IHI, WORK( ILEFT ),
+     $                WORK( IRIGHT ), N, VSL, LDVSL, IERR )
+*
+      IF( ILVSR )
+     $   CALL DGGBAK( 'P', 'R', N, ILO, IHI, WORK( ILEFT ),
+     $                WORK( IRIGHT ), N, VSR, LDVSR, IERR )
+*
+*     Check if unscaling would cause over/underflow, if so, rescale
+*     (ALPHAR(I),ALPHAI(I),BETA(I)) so BETA(I) is on the order of
+*     B(I,I) and ALPHAR(I) and ALPHAI(I) are on the order of A(I,I)
+*
+      IF( ILASCL ) THEN
+         DO 20 I = 1, N
+            IF( ALPHAI( I ).NE.ZERO ) THEN
+               IF( ( ALPHAR( I ) / SAFMAX ).GT.( ANRMTO / ANRM ) .OR.
+     $             ( SAFMIN / ALPHAR( I ) ).GT.( ANRM / ANRMTO ) ) THEN
+                  WORK( 1 ) = ABS( A( I, I ) / ALPHAR( I ) )
+                  BETA( I ) = BETA( I )*WORK( 1 )
+                  ALPHAR( I ) = ALPHAR( I )*WORK( 1 )
+                  ALPHAI( I ) = ALPHAI( I )*WORK( 1 )
+               ELSE IF( ( ALPHAI( I ) / SAFMAX ).GT.
+     $                  ( ANRMTO / ANRM ) .OR.
+     $                  ( SAFMIN / ALPHAI( I ) ).GT.( ANRM / ANRMTO ) )
+     $                   THEN
+                  WORK( 1 ) = ABS( A( I, I+1 ) / ALPHAI( I ) )
+                  BETA( I ) = BETA( I )*WORK( 1 )
+                  ALPHAR( I ) = ALPHAR( I )*WORK( 1 )
+                  ALPHAI( I ) = ALPHAI( I )*WORK( 1 )
+               END IF
+            END IF
+   20    CONTINUE
+      END IF
+*
+      IF( ILBSCL ) THEN
+         DO 30 I = 1, N
+            IF( ALPHAI( I ).NE.ZERO ) THEN
+               IF( ( BETA( I ) / SAFMAX ).GT.( BNRMTO / BNRM ) .OR.
+     $             ( SAFMIN / BETA( I ) ).GT.( BNRM / BNRMTO ) ) THEN
+                  WORK( 1 ) = ABS( B( I, I ) / BETA( I ) )
+                  BETA( I ) = BETA( I )*WORK( 1 )
+                  ALPHAR( I ) = ALPHAR( I )*WORK( 1 )
+                  ALPHAI( I ) = ALPHAI( I )*WORK( 1 )
+               END IF
+            END IF
+   30    CONTINUE
+      END IF
+*
+*     Undo scaling
+*
+      IF( ILASCL ) THEN
+         CALL DLASCL( 'H', 0, 0, ANRMTO, ANRM, N, N, A, LDA, IERR )
+         CALL DLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHAR, N, IERR )
+         CALL DLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHAI, N, IERR )
+      END IF
+*
+      IF( ILBSCL ) THEN
+         CALL DLASCL( 'U', 0, 0, BNRMTO, BNRM, N, N, B, LDB, IERR )
+         CALL DLASCL( 'G', 0, 0, BNRMTO, BNRM, N, 1, BETA, N, IERR )
+      END IF
+*
+      IF( WANTST ) THEN
+*
+*        Check if reordering is correct
+*
+         LASTSL = .TRUE.
+         LST2SL = .TRUE.
+         SDIM = 0
+         IP = 0
+         DO 40 I = 1, N
+            CURSL = DELCTG( ALPHAR( I ), ALPHAI( I ), BETA( I ) )
+            IF( ALPHAI( I ).EQ.ZERO ) THEN
+               IF( CURSL )
+     $            SDIM = SDIM + 1
+               IP = 0
+               IF( CURSL .AND. .NOT.LASTSL )
+     $            INFO = N + 2
+            ELSE
+               IF( IP.EQ.1 ) THEN
+*
+*                 Last eigenvalue of conjugate pair
+*
+                  CURSL = CURSL .OR. LASTSL
+                  LASTSL = CURSL
+                  IF( CURSL )
+     $               SDIM = SDIM + 2
+                  IP = -1
+                  IF( CURSL .AND. .NOT.LST2SL )
+     $               INFO = N + 2
+               ELSE
+*
+*                 First eigenvalue of conjugate pair
+*
+                  IP = 1
+               END IF
+            END IF
+            LST2SL = LASTSL
+            LASTSL = CURSL
+   40    CONTINUE
+*
+      END IF
+*
+   50 CONTINUE
+*
+      WORK( 1 ) = MAXWRK
+*
+      RETURN
+*
+*     End of DGGES
+*
+      END
diff --git a/costa/native/external/lapack/dggesx.f b/costa/native/external/lapack/dggesx.f
new file mode 100644
index 000000000..17688320d
--- /dev/null
+++ b/costa/native/external/lapack/dggesx.f
@@ -0,0 +1,640 @@
+      SUBROUTINE DGGESX( JOBVSL, JOBVSR, SORT, DELCTG, SENSE, N, A, LDA,
+     $                   B, LDB, SDIM, ALPHAR, ALPHAI, BETA, VSL, LDVSL,
+     $                   VSR, LDVSR, RCONDE, RCONDV, WORK, LWORK, IWORK,
+     $                   LIWORK, BWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBVSL, JOBVSR, SENSE, SORT
+      INTEGER            INFO, LDA, LDB, LDVSL, LDVSR, LIWORK, LWORK, N,
+     $                   SDIM
+*     ..
+*     .. Array Arguments ..
+      LOGICAL            BWORK( * )
+      INTEGER            IWORK( * )
+      DOUBLE PRECISION   A( LDA, * ), ALPHAI( * ), ALPHAR( * ),
+     $                   B( LDB, * ), BETA( * ), RCONDE( 2 ),
+     $                   RCONDV( 2 ), VSL( LDVSL, * ), VSR( LDVSR, * ),
+     $                   WORK( * )
+*     ..
+*     .. Function Arguments ..
+      LOGICAL            DELCTG
+      EXTERNAL           DELCTG
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DGGESX computes for a pair of N-by-N real nonsymmetric matrices
+*  (A,B), the generalized eigenvalues, the real Schur form (S,T), and,
+*  optionally, the left and/or right matrices of Schur vectors (VSL and
+*  VSR).  This gives the generalized Schur factorization
+*
+*       (A,B) = ( (VSL) S (VSR)**T, (VSL) T (VSR)**T )
+*
+*  Optionally, it also orders the eigenvalues so that a selected cluster
+*  of eigenvalues appears in the leading diagonal blocks of the upper
+*  quasi-triangular matrix S and the upper triangular matrix T; computes
+*  a reciprocal condition number for the average of the selected
+*  eigenvalues (RCONDE); and computes a reciprocal condition number for
+*  the right and left deflating subspaces corresponding to the selected
+*  eigenvalues (RCONDV). The leading columns of VSL and VSR then form
+*  an orthonormal basis for the corresponding left and right eigenspaces
+*  (deflating subspaces).
+*
+*  A generalized eigenvalue for a pair of matrices (A,B) is a scalar w
+*  or a ratio alpha/beta = w, such that  A - w*B is singular.  It is
+*  usually represented as the pair (alpha,beta), as there is a
+*  reasonable interpretation for beta=0 or for both being zero.
+*
+*  A pair of matrices (S,T) is in generalized real Schur form if T is
+*  upper triangular with non-negative diagonal and S is block upper
+*  triangular with 1-by-1 and 2-by-2 blocks.  1-by-1 blocks correspond
+*  to real generalized eigenvalues, while 2-by-2 blocks of S will be
+*  "standardized" by making the corresponding elements of T have the
+*  form:
+*          [  a  0  ]
+*          [  0  b  ]
+*
+*  and the pair of corresponding 2-by-2 blocks in S and T will have a
+*  complex conjugate pair of generalized eigenvalues.
+*
+*
+*  Arguments
+*  =========
+*
+*  JOBVSL  (input) CHARACTER*1
+*          = 'N':  do not compute the left Schur vectors;
+*          = 'V':  compute the left Schur vectors.
+*
+*  JOBVSR  (input) CHARACTER*1
+*          = 'N':  do not compute the right Schur vectors;
+*          = 'V':  compute the right Schur vectors.
+*
+*  SORT    (input) CHARACTER*1
+*          Specifies whether or not to order the eigenvalues on the
+*          diagonal of the generalized Schur form.
+*          = 'N':  Eigenvalues are not ordered;
+*          = 'S':  Eigenvalues are ordered (see DELZTG).
+*
+*  DELZTG  (input) LOGICAL FUNCTION of three DOUBLE PRECISION arguments
+*          DELZTG must be declared EXTERNAL in the calling subroutine.
+*          If SORT = 'N', DELZTG is not referenced.
+*          If SORT = 'S', DELZTG is used to select eigenvalues to sort
+*          to the top left of the Schur form.
+*          An eigenvalue (ALPHAR(j)+ALPHAI(j))/BETA(j) is selected if
+*          DELZTG(ALPHAR(j),ALPHAI(j),BETA(j)) is true; i.e. if either
+*          one of a complex conjugate pair of eigenvalues is selected,
+*          then both complex eigenvalues are selected.
+*          Note that a selected complex eigenvalue may no longer satisfy
+*          DELZTG(ALPHAR(j),ALPHAI(j),BETA(j)) = .TRUE. after ordering,
+*          since ordering may change the value of complex eigenvalues
+*          (especially if the eigenvalue is ill-conditioned), in this
+*          case INFO is set to N+3.
+*
+*  SENSE   (input) CHARACTER
+*          Determines which reciprocal condition numbers are computed.
+*          = 'N' : None are computed;
+*          = 'E' : Computed for average of selected eigenvalues only;
+*          = 'V' : Computed for selected deflating subspaces only;
+*          = 'B' : Computed for both.
+*          If SENSE = 'E', 'V', or 'B', SORT must equal 'S'.
+*
+*  N       (input) INTEGER
+*          The order of the matrices A, B, VSL, and VSR.  N >= 0.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA, N)
+*          On entry, the first of the pair of matrices.
+*          On exit, A has been overwritten by its generalized Schur
+*          form S.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of A.  LDA >= max(1,N).
+*
+*  B       (input/output) DOUBLE PRECISION array, dimension (LDB, N)
+*          On entry, the second of the pair of matrices.
+*          On exit, B has been overwritten by its generalized Schur
+*          form T.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of B.  LDB >= max(1,N).
+*
+*  SDIM    (output) INTEGER
+*          If SORT = 'N', SDIM = 0.
+*          If SORT = 'S', SDIM = number of eigenvalues (after sorting)
+*          for which DELZTG is true.  (Complex conjugate pairs for which
+*          DELZTG is true for either eigenvalue count as 2.)
+*
+*  ALPHAR  (output) DOUBLE PRECISION array, dimension (N)
+*  ALPHAI  (output) DOUBLE PRECISION array, dimension (N)
+*  BETA    (output) DOUBLE PRECISION array, dimension (N)
+*          On exit, (ALPHAR(j) + ALPHAI(j)*i)/BETA(j), j=1,...,N, will
+*          be the generalized eigenvalues.  ALPHAR(j) + ALPHAI(j)*i
+*          and BETA(j),j=1,...,N  are the diagonals of the complex Schur
+*          form (S,T) that would result if the 2-by-2 diagonal blocks of
+*          the real Schur form of (A,B) were further reduced to
+*          triangular form using 2-by-2 complex unitary transformations.
+*          If ALPHAI(j) is zero, then the j-th eigenvalue is real; if
+*          positive, then the j-th and (j+1)-st eigenvalues are a
+*          complex conjugate pair, with ALPHAI(j+1) negative.
+*
+*          Note: the quotients ALPHAR(j)/BETA(j) and ALPHAI(j)/BETA(j)
+*          may easily over- or underflow, and BETA(j) may even be zero.
+*          Thus, the user should avoid naively computing the ratio.
+*          However, ALPHAR and ALPHAI will be always less than and
+*          usually comparable with norm(A) in magnitude, and BETA always
+*          less than and usually comparable with norm(B).
+*
+*  VSL     (output) DOUBLE PRECISION array, dimension (LDVSL,N)
+*          If JOBVSL = 'V', VSL will contain the left Schur vectors.
+*          Not referenced if JOBVSL = 'N'.
+*
+*  LDVSL   (input) INTEGER
+*          The leading dimension of the matrix VSL. LDVSL >=1, and
+*          if JOBVSL = 'V', LDVSL >= N.
+*
+*  VSR     (output) DOUBLE PRECISION array, dimension (LDVSR,N)
+*          If JOBVSR = 'V', VSR will contain the right Schur vectors.
+*          Not referenced if JOBVSR = 'N'.
+*
+*  LDVSR   (input) INTEGER
+*          The leading dimension of the matrix VSR. LDVSR >= 1, and
+*          if JOBVSR = 'V', LDVSR >= N.
+*
+*  RCONDE  (output) DOUBLE PRECISION array, dimension ( 2 )
+*          If SENSE = 'E' or 'B', RCONDE(1) and RCONDE(2) contain the
+*          reciprocal condition numbers for the average of the selected
+*          eigenvalues.
+*          Not referenced if SENSE = 'N' or 'V'.
+*
+*  RCONDV  (output) DOUBLE PRECISION array, dimension ( 2 )
+*          If SENSE = 'V' or 'B', RCONDV(1) and RCONDV(2) contain the
+*          reciprocal condition numbers for the selected deflating
+*          subspaces.
+*          Not referenced if SENSE = 'N' or 'E'.
+*
+*  WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.  LWORK >= 8*(N+1)+16.
+*          If SENSE = 'E', 'V', or 'B',
+*          LWORK >= MAX( 8*(N+1)+16, 2*SDIM*(N-SDIM) ).
+*
+*  IWORK   (workspace) INTEGER array, dimension (LIWORK)
+*          Not referenced if SENSE = 'N'.
+*
+*  LIWORK  (input) INTEGER
+*          The dimension of the array WORK.  LIWORK >= N+6.
+*
+*  BWORK   (workspace) LOGICAL array, dimension (N)
+*          Not referenced if SORT = 'N'.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          = 1,...,N:
+*                The QZ iteration failed.  (A,B) are not in Schur
+*                form, but ALPHAR(j), ALPHAI(j), and BETA(j) should
+*                be correct for j=INFO+1,...,N.
+*          > N:  =N+1: other than QZ iteration failed in DHGEQZ
+*                =N+2: after reordering, roundoff changed values of
+*                      some complex eigenvalues so that leading
+*                      eigenvalues in the Generalized Schur form no
+*                      longer satisfy DELZTG=.TRUE.  This could also
+*                      be caused due to scaling.
+*                =N+3: reordering failed in DTGSEN.
+*
+*  Further details
+*  ===============
+*
+*  An approximate (asymptotic) bound on the average absolute error of
+*  the selected eigenvalues is
+*
+*       EPS * norm((A, B)) / RCONDE( 1 ).
+*
+*  An approximate (asymptotic) bound on the maximum angular error in
+*  the computed deflating subspaces is
+*
+*       EPS * norm((A, B)) / RCONDV( 2 ).
+*
+*  See LAPACK User's Guide, section 4.11 for more information.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            CURSL, ILASCL, ILBSCL, ILVSL, ILVSR, LASTSL,
+     $                   LST2SL, WANTSB, WANTSE, WANTSN, WANTST, WANTSV
+      INTEGER            I, ICOLS, IERR, IHI, IJOB, IJOBVL, IJOBVR,
+     $                   ILEFT, ILO, IP, IRIGHT, IROWS, ITAU, IWRK,
+     $                   LIWMIN, MAXWRK, MINWRK
+      DOUBLE PRECISION   ANRM, ANRMTO, BIGNUM, BNRM, BNRMTO, EPS, PL,
+     $                   PR, SAFMAX, SAFMIN, SMLNUM
+*     ..
+*     .. Local Arrays ..
+      DOUBLE PRECISION   DIF( 2 )
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DGEQRF, DGGBAK, DGGBAL, DGGHRD, DHGEQZ, DLABAD,
+     $                   DLACPY, DLASCL, DLASET, DORGQR, DORMQR, DTGSEN,
+     $                   XERBLA
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      DOUBLE PRECISION   DLAMCH, DLANGE
+      EXTERNAL           LSAME, ILAENV, DLAMCH, DLANGE
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode the input arguments
+*
+      IF( LSAME( JOBVSL, 'N' ) ) THEN
+         IJOBVL = 1
+         ILVSL = .FALSE.
+      ELSE IF( LSAME( JOBVSL, 'V' ) ) THEN
+         IJOBVL = 2
+         ILVSL = .TRUE.
+      ELSE
+         IJOBVL = -1
+         ILVSL = .FALSE.
+      END IF
+*
+      IF( LSAME( JOBVSR, 'N' ) ) THEN
+         IJOBVR = 1
+         ILVSR = .FALSE.
+      ELSE IF( LSAME( JOBVSR, 'V' ) ) THEN
+         IJOBVR = 2
+         ILVSR = .TRUE.
+      ELSE
+         IJOBVR = -1
+         ILVSR = .FALSE.
+      END IF
+*
+      WANTST = LSAME( SORT, 'S' )
+      WANTSN = LSAME( SENSE, 'N' )
+      WANTSE = LSAME( SENSE, 'E' )
+      WANTSV = LSAME( SENSE, 'V' )
+      WANTSB = LSAME( SENSE, 'B' )
+      IF( WANTSN ) THEN
+         IJOB = 0
+         IWORK( 1 ) = 1
+      ELSE IF( WANTSE ) THEN
+         IJOB = 1
+      ELSE IF( WANTSV ) THEN
+         IJOB = 2
+      ELSE IF( WANTSB ) THEN
+         IJOB = 4
+      END IF
+*
+*     Test the input arguments
+*
+      INFO = 0
+      IF( IJOBVL.LE.0 ) THEN
+         INFO = -1
+      ELSE IF( IJOBVR.LE.0 ) THEN
+         INFO = -2
+      ELSE IF( ( .NOT.WANTST ) .AND. ( .NOT.LSAME( SORT, 'N' ) ) ) THEN
+         INFO = -3
+      ELSE IF( .NOT.( WANTSN .OR. WANTSE .OR. WANTSV .OR. WANTSB ) .OR.
+     $         ( .NOT.WANTST .AND. .NOT.WANTSN ) ) THEN
+         INFO = -5
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -6
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -8
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -10
+      ELSE IF( LDVSL.LT.1 .OR. ( ILVSL .AND. LDVSL.LT.N ) ) THEN
+         INFO = -16
+      ELSE IF( LDVSR.LT.1 .OR. ( ILVSR .AND. LDVSR.LT.N ) ) THEN
+         INFO = -18
+      END IF
+*
+*     Compute workspace
+*      (Note: Comments in the code beginning "Workspace:" describe the
+*       minimal amount of workspace needed at that point in the code,
+*       as well as the preferred amount for good performance.
+*       NB refers to the optimal block size for the immediately
+*       following subroutine, as returned by ILAENV.)
+*
+      MINWRK = 1
+      IF( INFO.EQ.0 .AND. LWORK.GE.1 ) THEN
+         MINWRK = 8*( N+1 ) + 16
+         MAXWRK = 7*( N+1 ) + N*ILAENV( 1, 'DGEQRF', ' ', N, 1, N, 0 ) +
+     $            16
+         IF( ILVSL ) THEN
+            MAXWRK = MAX( MAXWRK, 8*( N+1 )+N*
+     $               ILAENV( 1, 'DORGQR', ' ', N, 1, N, -1 )+16 )
+         END IF
+         WORK( 1 ) = MAXWRK
+      END IF
+      IF( .NOT.WANTSN ) THEN
+         LIWMIN = 1
+      ELSE
+         LIWMIN = N + 6
+      END IF
+      IWORK( 1 ) = LIWMIN
+*
+      IF( INFO.EQ.0 .AND. LWORK.LT.MINWRK ) THEN
+         INFO = -22
+      ELSE IF( INFO.EQ.0 .AND. IJOB.GE.1 ) THEN
+         IF( LIWORK.LT.LIWMIN )
+     $      INFO = -24
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DGGESX', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 ) THEN
+         SDIM = 0
+         RETURN
+      END IF
+*
+*     Get machine constants
+*
+      EPS = DLAMCH( 'P' )
+      SAFMIN = DLAMCH( 'S' )
+      SAFMAX = ONE / SAFMIN
+      CALL DLABAD( SAFMIN, SAFMAX )
+      SMLNUM = SQRT( SAFMIN ) / EPS
+      BIGNUM = ONE / SMLNUM
+*
+*     Scale A if max element outside range [SMLNUM,BIGNUM]
+*
+      ANRM = DLANGE( 'M', N, N, A, LDA, WORK )
+      ILASCL = .FALSE.
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
+         ANRMTO = SMLNUM
+         ILASCL = .TRUE.
+      ELSE IF( ANRM.GT.BIGNUM ) THEN
+         ANRMTO = BIGNUM
+         ILASCL = .TRUE.
+      END IF
+      IF( ILASCL )
+     $   CALL DLASCL( 'G', 0, 0, ANRM, ANRMTO, N, N, A, LDA, IERR )
+*
+*     Scale B if max element outside range [SMLNUM,BIGNUM]
+*
+      BNRM = DLANGE( 'M', N, N, B, LDB, WORK )
+      ILBSCL = .FALSE.
+      IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN
+         BNRMTO = SMLNUM
+         ILBSCL = .TRUE.
+      ELSE IF( BNRM.GT.BIGNUM ) THEN
+         BNRMTO = BIGNUM
+         ILBSCL = .TRUE.
+      END IF
+      IF( ILBSCL )
+     $   CALL DLASCL( 'G', 0, 0, BNRM, BNRMTO, N, N, B, LDB, IERR )
+*
+*     Permute the matrix to make it more nearly triangular
+*     (Workspace: need 6*N + 2*N for permutation parameters)
+*
+      ILEFT = 1
+      IRIGHT = N + 1
+      IWRK = IRIGHT + N
+      CALL DGGBAL( 'P', N, A, LDA, B, LDB, ILO, IHI, WORK( ILEFT ),
+     $             WORK( IRIGHT ), WORK( IWRK ), IERR )
+*
+*     Reduce B to triangular form (QR decomposition of B)
+*     (Workspace: need N, prefer N*NB)
+*
+      IROWS = IHI + 1 - ILO
+      ICOLS = N + 1 - ILO
+      ITAU = IWRK
+      IWRK = ITAU + IROWS
+      CALL DGEQRF( IROWS, ICOLS, B( ILO, ILO ), LDB, WORK( ITAU ),
+     $             WORK( IWRK ), LWORK+1-IWRK, IERR )
+*
+*     Apply the orthogonal transformation to matrix A
+*     (Workspace: need N, prefer N*NB)
+*
+      CALL DORMQR( 'L', 'T', IROWS, ICOLS, IROWS, B( ILO, ILO ), LDB,
+     $             WORK( ITAU ), A( ILO, ILO ), LDA, WORK( IWRK ),
+     $             LWORK+1-IWRK, IERR )
+*
+*     Initialize VSL
+*     (Workspace: need N, prefer N*NB)
+*
+      IF( ILVSL ) THEN
+         CALL DLASET( 'Full', N, N, ZERO, ONE, VSL, LDVSL )
+         CALL DLACPY( 'L', IROWS-1, IROWS-1, B( ILO+1, ILO ), LDB,
+     $                VSL( ILO+1, ILO ), LDVSL )
+         CALL DORGQR( IROWS, IROWS, IROWS, VSL( ILO, ILO ), LDVSL,
+     $                WORK( ITAU ), WORK( IWRK ), LWORK+1-IWRK, IERR )
+      END IF
+*
+*     Initialize VSR
+*
+      IF( ILVSR )
+     $   CALL DLASET( 'Full', N, N, ZERO, ONE, VSR, LDVSR )
+*
+*     Reduce to generalized Hessenberg form
+*     (Workspace: none needed)
+*
+      CALL DGGHRD( JOBVSL, JOBVSR, N, ILO, IHI, A, LDA, B, LDB, VSL,
+     $             LDVSL, VSR, LDVSR, IERR )
+*
+      SDIM = 0
+*
+*     Perform QZ algorithm, computing Schur vectors if desired
+*     (Workspace: need N)
+*
+      IWRK = ITAU
+      CALL DHGEQZ( 'S', JOBVSL, JOBVSR, N, ILO, IHI, A, LDA, B, LDB,
+     $             ALPHAR, ALPHAI, BETA, VSL, LDVSL, VSR, LDVSR,
+     $             WORK( IWRK ), LWORK+1-IWRK, IERR )
+      IF( IERR.NE.0 ) THEN
+         IF( IERR.GT.0 .AND. IERR.LE.N ) THEN
+            INFO = IERR
+         ELSE IF( IERR.GT.N .AND. IERR.LE.2*N ) THEN
+            INFO = IERR - N
+         ELSE
+            INFO = N + 1
+         END IF
+         GO TO 60
+      END IF
+*
+*     Sort eigenvalues ALPHA/BETA and compute the reciprocal of
+*     condition number(s)
+*     (Workspace: If IJOB >= 1, need MAX( 8*(N+1), 2*SDIM*(N-SDIM) )
+*                 otherwise, need 8*(N+1) )
+*
+      IF( WANTST ) THEN
+*
+*        Undo scaling on eigenvalues before DELZTGing
+*
+         IF( ILASCL ) THEN
+            CALL DLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHAR, N,
+     $                   IERR )
+            CALL DLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHAI, N,
+     $                   IERR )
+         END IF
+         IF( ILBSCL )
+     $      CALL DLASCL( 'G', 0, 0, BNRMTO, BNRM, N, 1, BETA, N, IERR )
+*
+*        Select eigenvalues
+*
+         DO 10 I = 1, N
+            BWORK( I ) = DELCTG( ALPHAR( I ), ALPHAI( I ), BETA( I ) )
+   10    CONTINUE
+*
+*        Reorder eigenvalues, transform Generalized Schur vectors, and
+*        compute reciprocal condition numbers
+*
+         CALL DTGSEN( IJOB, ILVSL, ILVSR, BWORK, N, A, LDA, B, LDB,
+     $                ALPHAR, ALPHAI, BETA, VSL, LDVSL, VSR, LDVSR,
+     $                SDIM, PL, PR, DIF, WORK( IWRK ), LWORK-IWRK+1,
+     $                IWORK, LIWORK, IERR )
+*
+         IF( IJOB.GE.1 )
+     $      MAXWRK = MAX( MAXWRK, 2*SDIM*( N-SDIM ) )
+         IF( IERR.EQ.-22 ) THEN
+*
+*            not enough real workspace
+*
+            INFO = -22
+         ELSE
+            RCONDE( 1 ) = PL
+            RCONDE( 2 ) = PR
+            RCONDV( 1 ) = DIF( 1 )
+            RCONDV( 2 ) = DIF( 2 )
+            IF( IERR.EQ.1 )
+     $         INFO = N + 3
+         END IF
+*
+      END IF
+*
+*     Apply permutation to VSL and VSR
+*     (Workspace: none needed)
+*
+      IF( ILVSL )
+     $   CALL DGGBAK( 'P', 'L', N, ILO, IHI, WORK( ILEFT ),
+     $                WORK( IRIGHT ), N, VSL, LDVSL, IERR )
+*
+      IF( ILVSR )
+     $   CALL DGGBAK( 'P', 'R', N, ILO, IHI, WORK( ILEFT ),
+     $                WORK( IRIGHT ), N, VSR, LDVSR, IERR )
+*
+*     Check if unscaling would cause over/underflow, if so, rescale
+*     (ALPHAR(I),ALPHAI(I),BETA(I)) so BETA(I) is on the order of
+*     B(I,I) and ALPHAR(I) and ALPHAI(I) are on the order of A(I,I)
+*
+      IF( ILASCL ) THEN
+         DO 20 I = 1, N
+            IF( ALPHAI( I ).NE.ZERO ) THEN
+               IF( ( ALPHAR( I ) / SAFMAX ).GT.( ANRMTO / ANRM ) .OR.
+     $             ( SAFMIN / ALPHAR( I ) ).GT.( ANRM / ANRMTO ) ) THEN
+                  WORK( 1 ) = ABS( A( I, I ) / ALPHAR( I ) )
+                  BETA( I ) = BETA( I )*WORK( 1 )
+                  ALPHAR( I ) = ALPHAR( I )*WORK( 1 )
+                  ALPHAI( I ) = ALPHAI( I )*WORK( 1 )
+               ELSE IF( ( ALPHAI( I ) / SAFMAX ).GT.
+     $                  ( ANRMTO / ANRM ) .OR.
+     $                  ( SAFMIN / ALPHAI( I ) ).GT.( ANRM / ANRMTO ) )
+     $                   THEN
+                  WORK( 1 ) = ABS( A( I, I+1 ) / ALPHAI( I ) )
+                  BETA( I ) = BETA( I )*WORK( 1 )
+                  ALPHAR( I ) = ALPHAR( I )*WORK( 1 )
+                  ALPHAI( I ) = ALPHAI( I )*WORK( 1 )
+               END IF
+            END IF
+   20    CONTINUE
+      END IF
+*
+      IF( ILBSCL ) THEN
+         DO 30 I = 1, N
+            IF( ALPHAI( I ).NE.ZERO ) THEN
+               IF( ( BETA( I ) / SAFMAX ).GT.( BNRMTO / BNRM ) .OR.
+     $             ( SAFMIN / BETA( I ) ).GT.( BNRM / BNRMTO ) ) THEN
+                  WORK( 1 ) = ABS( B( I, I ) / BETA( I ) )
+                  BETA( I ) = BETA( I )*WORK( 1 )
+                  ALPHAR( I ) = ALPHAR( I )*WORK( 1 )
+                  ALPHAI( I ) = ALPHAI( I )*WORK( 1 )
+               END IF
+            END IF
+   30    CONTINUE
+      END IF
+*
+*     Undo scaling
+*
+      IF( ILASCL ) THEN
+         CALL DLASCL( 'H', 0, 0, ANRMTO, ANRM, N, N, A, LDA, IERR )
+         CALL DLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHAR, N, IERR )
+         CALL DLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHAI, N, IERR )
+      END IF
+*
+      IF( ILBSCL ) THEN
+         CALL DLASCL( 'U', 0, 0, BNRMTO, BNRM, N, N, B, LDB, IERR )
+         CALL DLASCL( 'G', 0, 0, BNRMTO, BNRM, N, 1, BETA, N, IERR )
+      END IF
+*
+   40 CONTINUE
+*
+      IF( WANTST ) THEN
+*
+*        Check if reordering is correct
+*
+         LASTSL = .TRUE.
+         LST2SL = .TRUE.
+         SDIM = 0
+         IP = 0
+         DO 50 I = 1, N
+            CURSL = DELCTG( ALPHAR( I ), ALPHAI( I ), BETA( I ) )
+            IF( ALPHAI( I ).EQ.ZERO ) THEN
+               IF( CURSL )
+     $            SDIM = SDIM + 1
+               IP = 0
+               IF( CURSL .AND. .NOT.LASTSL )
+     $            INFO = N + 2
+            ELSE
+               IF( IP.EQ.1 ) THEN
+*
+*                 Last eigenvalue of conjugate pair
+*
+                  CURSL = CURSL .OR. LASTSL
+                  LASTSL = CURSL
+                  IF( CURSL )
+     $               SDIM = SDIM + 2
+                  IP = -1
+                  IF( CURSL .AND. .NOT.LST2SL )
+     $               INFO = N + 2
+               ELSE
+*
+*                 First eigenvalue of conjugate pair
+*
+                  IP = 1
+               END IF
+            END IF
+            LST2SL = LASTSL
+            LASTSL = CURSL
+   50    CONTINUE
+*
+      END IF
+*
+   60 CONTINUE
+*
+      WORK( 1 ) = MAXWRK
+      IWORK( 1 ) = LIWMIN
+*
+      RETURN
+*
+*     End of DGGESX
+*
+      END
diff --git a/costa/native/external/lapack/dggev.f b/costa/native/external/lapack/dggev.f
new file mode 100644
index 000000000..38b3a8bbe
--- /dev/null
+++ b/costa/native/external/lapack/dggev.f
@@ -0,0 +1,481 @@
+      SUBROUTINE DGGEV( JOBVL, JOBVR, N, A, LDA, B, LDB, ALPHAR, ALPHAI,
+     $                  BETA, VL, LDVL, VR, LDVR, WORK, LWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBVL, JOBVR
+      INTEGER            INFO, LDA, LDB, LDVL, LDVR, LWORK, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), ALPHAI( * ), ALPHAR( * ),
+     $                   B( LDB, * ), BETA( * ), VL( LDVL, * ),
+     $                   VR( LDVR, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DGGEV computes for a pair of N-by-N real nonsymmetric matrices (A,B)
+*  the generalized eigenvalues, and optionally, the left and/or right
+*  generalized eigenvectors.
+*
+*  A generalized eigenvalue for a pair of matrices (A,B) is a scalar
+*  lambda or a ratio alpha/beta = lambda, such that A - lambda*B is
+*  singular. It is usually represented as the pair (alpha,beta), as
+*  there is a reasonable interpretation for beta=0, and even for both
+*  being zero.
+*
+*  The right eigenvector v(j) corresponding to the eigenvalue lambda(j)
+*  of (A,B) satisfies
+*
+*                   A * v(j) = lambda(j) * B * v(j).
+*
+*  The left eigenvector u(j) corresponding to the eigenvalue lambda(j)
+*  of (A,B) satisfies
+*
+*                   u(j)**H * A  = lambda(j) * u(j)**H * B .
+*
+*  where u(j)**H is the conjugate-transpose of u(j).
+*
+*
+*  Arguments
+*  =========
+*
+*  JOBVL   (input) CHARACTER*1
+*          = 'N':  do not compute the left generalized eigenvectors;
+*          = 'V':  compute the left generalized eigenvectors.
+*
+*  JOBVR   (input) CHARACTER*1
+*          = 'N':  do not compute the right generalized eigenvectors;
+*          = 'V':  compute the right generalized eigenvectors.
+*
+*  N       (input) INTEGER
+*          The order of the matrices A, B, VL, and VR.  N >= 0.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA, N)
+*          On entry, the matrix A in the pair (A,B).
+*          On exit, A has been overwritten.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of A.  LDA >= max(1,N).
+*
+*  B       (input/output) DOUBLE PRECISION array, dimension (LDB, N)
+*          On entry, the matrix B in the pair (A,B).
+*          On exit, B has been overwritten.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of B.  LDB >= max(1,N).
+*
+*  ALPHAR  (output) DOUBLE PRECISION array, dimension (N)
+*  ALPHAI  (output) DOUBLE PRECISION array, dimension (N)
+*  BETA    (output) DOUBLE PRECISION array, dimension (N)
+*          On exit, (ALPHAR(j) + ALPHAI(j)*i)/BETA(j), j=1,...,N, will
+*          be the generalized eigenvalues.  If ALPHAI(j) is zero, then
+*          the j-th eigenvalue is real; if positive, then the j-th and
+*          (j+1)-st eigenvalues are a complex conjugate pair, with
+*          ALPHAI(j+1) negative.
+*
+*          Note: the quotients ALPHAR(j)/BETA(j) and ALPHAI(j)/BETA(j)
+*          may easily over- or underflow, and BETA(j) may even be zero.
+*          Thus, the user should avoid naively computing the ratio
+*          alpha/beta.  However, ALPHAR and ALPHAI will be always less
+*          than and usually comparable with norm(A) in magnitude, and
+*          BETA always less than and usually comparable with norm(B).
+*
+*  VL      (output) DOUBLE PRECISION array, dimension (LDVL,N)
+*          If JOBVL = 'V', the left eigenvectors u(j) are stored one
+*          after another in the columns of VL, in the same order as
+*          their eigenvalues. If the j-th eigenvalue is real, then
+*          u(j) = VL(:,j), the j-th column of VL. If the j-th and
+*          (j+1)-th eigenvalues form a complex conjugate pair, then
+*          u(j) = VL(:,j)+i*VL(:,j+1) and u(j+1) = VL(:,j)-i*VL(:,j+1).
+*          Each eigenvector will be scaled so the largest component have
+*          abs(real part)+abs(imag. part)=1.
+*          Not referenced if JOBVL = 'N'.
+*
+*  LDVL    (input) INTEGER
+*          The leading dimension of the matrix VL. LDVL >= 1, and
+*          if JOBVL = 'V', LDVL >= N.
+*
+*  VR      (output) DOUBLE PRECISION array, dimension (LDVR,N)
+*          If JOBVR = 'V', the right eigenvectors v(j) are stored one
+*          after another in the columns of VR, in the same order as
+*          their eigenvalues. If the j-th eigenvalue is real, then
+*          v(j) = VR(:,j), the j-th column of VR. If the j-th and
+*          (j+1)-th eigenvalues form a complex conjugate pair, then
+*          v(j) = VR(:,j)+i*VR(:,j+1) and v(j+1) = VR(:,j)-i*VR(:,j+1).
+*          Each eigenvector will be scaled so the largest component have
+*          abs(real part)+abs(imag. part)=1.
+*          Not referenced if JOBVR = 'N'.
+*
+*  LDVR    (input) INTEGER
+*          The leading dimension of the matrix VR. LDVR >= 1, and
+*          if JOBVR = 'V', LDVR >= N.
+*
+*  WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.  LWORK >= max(1,8*N).
+*          For good performance, LWORK must generally be larger.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          = 1,...,N:
+*                The QZ iteration failed.  No eigenvectors have been
+*                calculated, but ALPHAR(j), ALPHAI(j), and BETA(j)
+*                should be correct for j=INFO+1,...,N.
+*          > N:  =N+1: other than QZ iteration failed in DHGEQZ.
+*                =N+2: error return from DTGEVC.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            ILASCL, ILBSCL, ILV, ILVL, ILVR, LQUERY
+      CHARACTER          CHTEMP
+      INTEGER            ICOLS, IERR, IHI, IJOBVL, IJOBVR, ILEFT, ILO,
+     $                   IN, IRIGHT, IROWS, ITAU, IWRK, JC, JR, MAXWRK,
+     $                   MINWRK
+      DOUBLE PRECISION   ANRM, ANRMTO, BIGNUM, BNRM, BNRMTO, EPS,
+     $                   SMLNUM, TEMP
+*     ..
+*     .. Local Arrays ..
+      LOGICAL            LDUMMA( 1 )
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DGEQRF, DGGBAK, DGGBAL, DGGHRD, DHGEQZ, DLACPY,
+     $                   DLASCL, DLASET, DORGQR, DORMQR, DTGEVC, XERBLA
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      DOUBLE PRECISION   DLAMCH, DLANGE
+      EXTERNAL           LSAME, ILAENV, DLAMCH, DLANGE
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode the input arguments
+*
+      IF( LSAME( JOBVL, 'N' ) ) THEN
+         IJOBVL = 1
+         ILVL = .FALSE.
+      ELSE IF( LSAME( JOBVL, 'V' ) ) THEN
+         IJOBVL = 2
+         ILVL = .TRUE.
+      ELSE
+         IJOBVL = -1
+         ILVL = .FALSE.
+      END IF
+*
+      IF( LSAME( JOBVR, 'N' ) ) THEN
+         IJOBVR = 1
+         ILVR = .FALSE.
+      ELSE IF( LSAME( JOBVR, 'V' ) ) THEN
+         IJOBVR = 2
+         ILVR = .TRUE.
+      ELSE
+         IJOBVR = -1
+         ILVR = .FALSE.
+      END IF
+      ILV = ILVL .OR. ILVR
+*
+*     Test the input arguments
+*
+      INFO = 0
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( IJOBVL.LE.0 ) THEN
+         INFO = -1
+      ELSE IF( IJOBVR.LE.0 ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      ELSE IF( LDVL.LT.1 .OR. ( ILVL .AND. LDVL.LT.N ) ) THEN
+         INFO = -12
+      ELSE IF( LDVR.LT.1 .OR. ( ILVR .AND. LDVR.LT.N ) ) THEN
+         INFO = -14
+      END IF
+*
+*     Compute workspace
+*      (Note: Comments in the code beginning "Workspace:" describe the
+*       minimal amount of workspace needed at that point in the code,
+*       as well as the preferred amount for good performance.
+*       NB refers to the optimal block size for the immediately
+*       following subroutine, as returned by ILAENV. The workspace is
+*       computed assuming ILO = 1 and IHI = N, the worst case.)
+*
+      MINWRK = 1
+      IF( INFO.EQ.0 .AND. ( LWORK.GE.1 .OR. LQUERY ) ) THEN
+         MAXWRK = 7*N + N*ILAENV( 1, 'DGEQRF', ' ', N, 1, N, 0 )
+         MINWRK = MAX( 1, 8*N )
+         WORK( 1 ) = MAXWRK
+      END IF
+*
+      IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY )
+     $   INFO = -16
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DGGEV ', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Get machine constants
+*
+      EPS = DLAMCH( 'P' )
+      SMLNUM = DLAMCH( 'S' )
+      BIGNUM = ONE / SMLNUM
+      CALL DLABAD( SMLNUM, BIGNUM )
+      SMLNUM = SQRT( SMLNUM ) / EPS
+      BIGNUM = ONE / SMLNUM
+*
+*     Scale A if max element outside range [SMLNUM,BIGNUM]
+*
+      ANRM = DLANGE( 'M', N, N, A, LDA, WORK )
+      ILASCL = .FALSE.
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
+         ANRMTO = SMLNUM
+         ILASCL = .TRUE.
+      ELSE IF( ANRM.GT.BIGNUM ) THEN
+         ANRMTO = BIGNUM
+         ILASCL = .TRUE.
+      END IF
+      IF( ILASCL )
+     $   CALL DLASCL( 'G', 0, 0, ANRM, ANRMTO, N, N, A, LDA, IERR )
+*
+*     Scale B if max element outside range [SMLNUM,BIGNUM]
+*
+      BNRM = DLANGE( 'M', N, N, B, LDB, WORK )
+      ILBSCL = .FALSE.
+      IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN
+         BNRMTO = SMLNUM
+         ILBSCL = .TRUE.
+      ELSE IF( BNRM.GT.BIGNUM ) THEN
+         BNRMTO = BIGNUM
+         ILBSCL = .TRUE.
+      END IF
+      IF( ILBSCL )
+     $   CALL DLASCL( 'G', 0, 0, BNRM, BNRMTO, N, N, B, LDB, IERR )
+*
+*     Permute the matrices A, B to isolate eigenvalues if possible
+*     (Workspace: need 6*N)
+*
+      ILEFT = 1
+      IRIGHT = N + 1
+      IWRK = IRIGHT + N
+      CALL DGGBAL( 'P', N, A, LDA, B, LDB, ILO, IHI, WORK( ILEFT ),
+     $             WORK( IRIGHT ), WORK( IWRK ), IERR )
+*
+*     Reduce B to triangular form (QR decomposition of B)
+*     (Workspace: need N, prefer N*NB)
+*
+      IROWS = IHI + 1 - ILO
+      IF( ILV ) THEN
+         ICOLS = N + 1 - ILO
+      ELSE
+         ICOLS = IROWS
+      END IF
+      ITAU = IWRK
+      IWRK = ITAU + IROWS
+      CALL DGEQRF( IROWS, ICOLS, B( ILO, ILO ), LDB, WORK( ITAU ),
+     $             WORK( IWRK ), LWORK+1-IWRK, IERR )
+*
+*     Apply the orthogonal transformation to matrix A
+*     (Workspace: need N, prefer N*NB)
+*
+      CALL DORMQR( 'L', 'T', IROWS, ICOLS, IROWS, B( ILO, ILO ), LDB,
+     $             WORK( ITAU ), A( ILO, ILO ), LDA, WORK( IWRK ),
+     $             LWORK+1-IWRK, IERR )
+*
+*     Initialize VL
+*     (Workspace: need N, prefer N*NB)
+*
+      IF( ILVL ) THEN
+         CALL DLASET( 'Full', N, N, ZERO, ONE, VL, LDVL )
+         CALL DLACPY( 'L', IROWS-1, IROWS-1, B( ILO+1, ILO ), LDB,
+     $                VL( ILO+1, ILO ), LDVL )
+         CALL DORGQR( IROWS, IROWS, IROWS, VL( ILO, ILO ), LDVL,
+     $                WORK( ITAU ), WORK( IWRK ), LWORK+1-IWRK, IERR )
+      END IF
+*
+*     Initialize VR
+*
+      IF( ILVR )
+     $   CALL DLASET( 'Full', N, N, ZERO, ONE, VR, LDVR )
+*
+*     Reduce to generalized Hessenberg form
+*     (Workspace: none needed)
+*
+      IF( ILV ) THEN
+*
+*        Eigenvectors requested -- work on whole matrix.
+*
+         CALL DGGHRD( JOBVL, JOBVR, N, ILO, IHI, A, LDA, B, LDB, VL,
+     $                LDVL, VR, LDVR, IERR )
+      ELSE
+         CALL DGGHRD( 'N', 'N', IROWS, 1, IROWS, A( ILO, ILO ), LDA,
+     $                B( ILO, ILO ), LDB, VL, LDVL, VR, LDVR, IERR )
+      END IF
+*
+*     Perform QZ algorithm (Compute eigenvalues, and optionally, the
+*     Schur forms and Schur vectors)
+*     (Workspace: need N)
+*
+      IWRK = ITAU
+      IF( ILV ) THEN
+         CHTEMP = 'S'
+      ELSE
+         CHTEMP = 'E'
+      END IF
+      CALL DHGEQZ( CHTEMP, JOBVL, JOBVR, N, ILO, IHI, A, LDA, B, LDB,
+     $             ALPHAR, ALPHAI, BETA, VL, LDVL, VR, LDVR,
+     $             WORK( IWRK ), LWORK+1-IWRK, IERR )
+      IF( IERR.NE.0 ) THEN
+         IF( IERR.GT.0 .AND. IERR.LE.N ) THEN
+            INFO = IERR
+         ELSE IF( IERR.GT.N .AND. IERR.LE.2*N ) THEN
+            INFO = IERR - N
+         ELSE
+            INFO = N + 1
+         END IF
+         GO TO 110
+      END IF
+*
+*     Compute Eigenvectors
+*     (Workspace: need 6*N)
+*
+      IF( ILV ) THEN
+         IF( ILVL ) THEN
+            IF( ILVR ) THEN
+               CHTEMP = 'B'
+            ELSE
+               CHTEMP = 'L'
+            END IF
+         ELSE
+            CHTEMP = 'R'
+         END IF
+         CALL DTGEVC( CHTEMP, 'B', LDUMMA, N, A, LDA, B, LDB, VL, LDVL,
+     $                VR, LDVR, N, IN, WORK( IWRK ), IERR )
+         IF( IERR.NE.0 ) THEN
+            INFO = N + 2
+            GO TO 110
+         END IF
+*
+*        Undo balancing on VL and VR and normalization
+*        (Workspace: none needed)
+*
+         IF( ILVL ) THEN
+            CALL DGGBAK( 'P', 'L', N, ILO, IHI, WORK( ILEFT ),
+     $                   WORK( IRIGHT ), N, VL, LDVL, IERR )
+            DO 50 JC = 1, N
+               IF( ALPHAI( JC ).LT.ZERO )
+     $            GO TO 50
+               TEMP = ZERO
+               IF( ALPHAI( JC ).EQ.ZERO ) THEN
+                  DO 10 JR = 1, N
+                     TEMP = MAX( TEMP, ABS( VL( JR, JC ) ) )
+   10             CONTINUE
+               ELSE
+                  DO 20 JR = 1, N
+                     TEMP = MAX( TEMP, ABS( VL( JR, JC ) )+
+     $                      ABS( VL( JR, JC+1 ) ) )
+   20             CONTINUE
+               END IF
+               IF( TEMP.LT.SMLNUM )
+     $            GO TO 50
+               TEMP = ONE / TEMP
+               IF( ALPHAI( JC ).EQ.ZERO ) THEN
+                  DO 30 JR = 1, N
+                     VL( JR, JC ) = VL( JR, JC )*TEMP
+   30             CONTINUE
+               ELSE
+                  DO 40 JR = 1, N
+                     VL( JR, JC ) = VL( JR, JC )*TEMP
+                     VL( JR, JC+1 ) = VL( JR, JC+1 )*TEMP
+   40             CONTINUE
+               END IF
+   50       CONTINUE
+         END IF
+         IF( ILVR ) THEN
+            CALL DGGBAK( 'P', 'R', N, ILO, IHI, WORK( ILEFT ),
+     $                   WORK( IRIGHT ), N, VR, LDVR, IERR )
+            DO 100 JC = 1, N
+               IF( ALPHAI( JC ).LT.ZERO )
+     $            GO TO 100
+               TEMP = ZERO
+               IF( ALPHAI( JC ).EQ.ZERO ) THEN
+                  DO 60 JR = 1, N
+                     TEMP = MAX( TEMP, ABS( VR( JR, JC ) ) )
+   60             CONTINUE
+               ELSE
+                  DO 70 JR = 1, N
+                     TEMP = MAX( TEMP, ABS( VR( JR, JC ) )+
+     $                      ABS( VR( JR, JC+1 ) ) )
+   70             CONTINUE
+               END IF
+               IF( TEMP.LT.SMLNUM )
+     $            GO TO 100
+               TEMP = ONE / TEMP
+               IF( ALPHAI( JC ).EQ.ZERO ) THEN
+                  DO 80 JR = 1, N
+                     VR( JR, JC ) = VR( JR, JC )*TEMP
+   80             CONTINUE
+               ELSE
+                  DO 90 JR = 1, N
+                     VR( JR, JC ) = VR( JR, JC )*TEMP
+                     VR( JR, JC+1 ) = VR( JR, JC+1 )*TEMP
+   90             CONTINUE
+               END IF
+  100       CONTINUE
+         END IF
+*
+*        End of eigenvector calculation
+*
+      END IF
+*
+*     Undo scaling if necessary
+*
+      IF( ILASCL ) THEN
+         CALL DLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHAR, N, IERR )
+         CALL DLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHAI, N, IERR )
+      END IF
+*
+      IF( ILBSCL ) THEN
+         CALL DLASCL( 'G', 0, 0, BNRMTO, BNRM, N, 1, BETA, N, IERR )
+      END IF
+*
+  110 CONTINUE
+*
+      WORK( 1 ) = MAXWRK
+*
+      RETURN
+*
+*     End of DGGEV
+*
+      END
diff --git a/costa/native/external/lapack/dggevx.f b/costa/native/external/lapack/dggevx.f
new file mode 100644
index 000000000..0676e1b99
--- /dev/null
+++ b/costa/native/external/lapack/dggevx.f
@@ -0,0 +1,698 @@
+      SUBROUTINE DGGEVX( BALANC, JOBVL, JOBVR, SENSE, N, A, LDA, B, LDB,
+     $                   ALPHAR, ALPHAI, BETA, VL, LDVL, VR, LDVR, ILO,
+     $                   IHI, LSCALE, RSCALE, ABNRM, BBNRM, RCONDE,
+     $                   RCONDV, WORK, LWORK, IWORK, BWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          BALANC, JOBVL, JOBVR, SENSE
+      INTEGER            IHI, ILO, INFO, LDA, LDB, LDVL, LDVR, LWORK, N
+      DOUBLE PRECISION   ABNRM, BBNRM
+*     ..
+*     .. Array Arguments ..
+      LOGICAL            BWORK( * )
+      INTEGER            IWORK( * )
+      DOUBLE PRECISION   A( LDA, * ), ALPHAI( * ), ALPHAR( * ),
+     $                   B( LDB, * ), BETA( * ), LSCALE( * ),
+     $                   RCONDE( * ), RCONDV( * ), RSCALE( * ),
+     $                   VL( LDVL, * ), VR( LDVR, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DGGEVX computes for a pair of N-by-N real nonsymmetric matrices (A,B)
+*  the generalized eigenvalues, and optionally, the left and/or right
+*  generalized eigenvectors.
+*
+*  Optionally also, it computes a balancing transformation to improve
+*  the conditioning of the eigenvalues and eigenvectors (ILO, IHI,
+*  LSCALE, RSCALE, ABNRM, and BBNRM), reciprocal condition numbers for
+*  the eigenvalues (RCONDE), and reciprocal condition numbers for the
+*  right eigenvectors (RCONDV).
+*
+*  A generalized eigenvalue for a pair of matrices (A,B) is a scalar
+*  lambda or a ratio alpha/beta = lambda, such that A - lambda*B is
+*  singular. It is usually represented as the pair (alpha,beta), as
+*  there is a reasonable interpretation for beta=0, and even for both
+*  being zero.
+*
+*  The right eigenvector v(j) corresponding to the eigenvalue lambda(j)
+*  of (A,B) satisfies
+*
+*                   A * v(j) = lambda(j) * B * v(j) .
+*
+*  The left eigenvector u(j) corresponding to the eigenvalue lambda(j)
+*  of (A,B) satisfies
+*
+*                   u(j)**H * A  = lambda(j) * u(j)**H * B.
+*
+*  where u(j)**H is the conjugate-transpose of u(j).
+*
+*
+*  Arguments
+*  =========
+*
+*  BALANC  (input) CHARACTER*1
+*          Specifies the balance option to be performed.
+*          = 'N':  do not diagonally scale or permute;
+*          = 'P':  permute only;
+*          = 'S':  scale only;
+*          = 'B':  both permute and scale.
+*          Computed reciprocal condition numbers will be for the
+*          matrices after permuting and/or balancing. Permuting does
+*          not change condition numbers (in exact arithmetic), but
+*          balancing does.
+*
+*  JOBVL   (input) CHARACTER*1
+*          = 'N':  do not compute the left generalized eigenvectors;
+*          = 'V':  compute the left generalized eigenvectors.
+*
+*  JOBVR   (input) CHARACTER*1
+*          = 'N':  do not compute the right generalized eigenvectors;
+*          = 'V':  compute the right generalized eigenvectors.
+*
+*  SENSE   (input) CHARACTER*1
+*          Determines which reciprocal condition numbers are computed.
+*          = 'N': none are computed;
+*          = 'E': computed for eigenvalues only;
+*          = 'V': computed for eigenvectors only;
+*          = 'B': computed for eigenvalues and eigenvectors.
+*
+*  N       (input) INTEGER
+*          The order of the matrices A, B, VL, and VR.  N >= 0.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA, N)
+*          On entry, the matrix A in the pair (A,B).
+*          On exit, A has been overwritten. If JOBVL='V' or JOBVR='V'
+*          or both, then A contains the first part of the real Schur
+*          form of the "balanced" versions of the input A and B.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of A.  LDA >= max(1,N).
+*
+*  B       (input/output) DOUBLE PRECISION array, dimension (LDB, N)
+*          On entry, the matrix B in the pair (A,B).
+*          On exit, B has been overwritten. If JOBVL='V' or JOBVR='V'
+*          or both, then B contains the second part of the real Schur
+*          form of the "balanced" versions of the input A and B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of B.  LDB >= max(1,N).
+*
+*  ALPHAR  (output) DOUBLE PRECISION array, dimension (N)
+*  ALPHAI  (output) DOUBLE PRECISION array, dimension (N)
+*  BETA    (output) DOUBLE PRECISION array, dimension (N)
+*          On exit, (ALPHAR(j) + ALPHAI(j)*i)/BETA(j), j=1,...,N, will
+*          be the generalized eigenvalues.  If ALPHAI(j) is zero, then
+*          the j-th eigenvalue is real; if positive, then the j-th and
+*          (j+1)-st eigenvalues are a complex conjugate pair, with
+*          ALPHAI(j+1) negative.
+*
+*          Note: the quotients ALPHAR(j)/BETA(j) and ALPHAI(j)/BETA(j)
+*          may easily over- or underflow, and BETA(j) may even be zero.
+*          Thus, the user should avoid naively computing the ratio
+*          ALPHA/BETA. However, ALPHAR and ALPHAI will be always less
+*          than and usually comparable with norm(A) in magnitude, and
+*          BETA always less than and usually comparable with norm(B).
+*
+*  VL      (output) DOUBLE PRECISION array, dimension (LDVL,N)
+*          If JOBVL = 'V', the left eigenvectors u(j) are stored one
+*          after another in the columns of VL, in the same order as
+*          their eigenvalues. If the j-th eigenvalue is real, then
+*          u(j) = VL(:,j), the j-th column of VL. If the j-th and
+*          (j+1)-th eigenvalues form a complex conjugate pair, then
+*          u(j) = VL(:,j)+i*VL(:,j+1) and u(j+1) = VL(:,j)-i*VL(:,j+1).
+*          Each eigenvector will be scaled so the largest component have
+*          abs(real part) + abs(imag. part) = 1.
+*          Not referenced if JOBVL = 'N'.
+*
+*  LDVL    (input) INTEGER
+*          The leading dimension of the matrix VL. LDVL >= 1, and
+*          if JOBVL = 'V', LDVL >= N.
+*
+*  VR      (output) DOUBLE PRECISION array, dimension (LDVR,N)
+*          If JOBVR = 'V', the right eigenvectors v(j) are stored one
+*          after another in the columns of VR, in the same order as
+*          their eigenvalues. If the j-th eigenvalue is real, then
+*          v(j) = VR(:,j), the j-th column of VR. If the j-th and
+*          (j+1)-th eigenvalues form a complex conjugate pair, then
+*          v(j) = VR(:,j)+i*VR(:,j+1) and v(j+1) = VR(:,j)-i*VR(:,j+1).
+*          Each eigenvector will be scaled so the largest component have
+*          abs(real part) + abs(imag. part) = 1.
+*          Not referenced if JOBVR = 'N'.
+*
+*  LDVR    (input) INTEGER
+*          The leading dimension of the matrix VR. LDVR >= 1, and
+*          if JOBVR = 'V', LDVR >= N.
+*
+*  ILO,IHI (output) INTEGER
+*          ILO and IHI are integer values such that on exit
+*          A(i,j) = 0 and B(i,j) = 0 if i > j and
+*          j = 1,...,ILO-1 or i = IHI+1,...,N.
+*          If BALANC = 'N' or 'S', ILO = 1 and IHI = N.
+*
+*  LSCALE  (output) DOUBLE PRECISION array, dimension (N)
+*          Details of the permutations and scaling factors applied
+*          to the left side of A and B.  If PL(j) is the index of the
+*          row interchanged with row j, and DL(j) is the scaling
+*          factor applied to row j, then
+*            LSCALE(j) = PL(j)  for j = 1,...,ILO-1
+*                      = DL(j)  for j = ILO,...,IHI
+*                      = PL(j)  for j = IHI+1,...,N.
+*          The order in which the interchanges are made is N to IHI+1,
+*          then 1 to ILO-1.
+*
+*  RSCALE  (output) DOUBLE PRECISION array, dimension (N)
+*          Details of the permutations and scaling factors applied
+*          to the right side of A and B.  If PR(j) is the index of the
+*          column interchanged with column j, and DR(j) is the scaling
+*          factor applied to column j, then
+*            RSCALE(j) = PR(j)  for j = 1,...,ILO-1
+*                      = DR(j)  for j = ILO,...,IHI
+*                      = PR(j)  for j = IHI+1,...,N
+*          The order in which the interchanges are made is N to IHI+1,
+*          then 1 to ILO-1.
+*
+*  ABNRM   (output) DOUBLE PRECISION
+*          The one-norm of the balanced matrix A.
+*
+*  BBNRM   (output) DOUBLE PRECISION
+*          The one-norm of the balanced matrix B.
+*
+*  RCONDE  (output) DOUBLE PRECISION array, dimension (N)
+*          If SENSE = 'E' or 'B', the reciprocal condition numbers of
+*          the selected eigenvalues, stored in consecutive elements of
+*          the array. For a complex conjugate pair of eigenvalues two
+*          consecutive elements of RCONDE are set to the same value.
+*          Thus RCONDE(j), RCONDV(j), and the j-th columns of VL and VR
+*          all correspond to the same eigenpair (but not in general the
+*          j-th eigenpair, unless all eigenpairs are selected).
+*          If SENSE = 'V', RCONDE is not referenced.
+*
+*  RCONDV  (output) DOUBLE PRECISION array, dimension (N)
+*          If SENSE = 'V' or 'B', the estimated reciprocal condition
+*          numbers of the selected eigenvectors, stored in consecutive
+*          elements of the array. For a complex eigenvector two
+*          consecutive elements of RCONDV are set to the same value. If
+*          the eigenvalues cannot be reordered to compute RCONDV(j),
+*          RCONDV(j) is set to 0; this can only occur when the true
+*          value would be very small anyway.
+*          If SENSE = 'E', RCONDV is not referenced.
+*
+*  WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK. LWORK >= max(1,6*N).
+*          If SENSE = 'E', LWORK >= 12*N.
+*          If SENSE = 'V' or 'B', LWORK >= 2*N*N+12*N+16.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  IWORK   (workspace) INTEGER array, dimension (N+6)
+*          If SENSE = 'E', IWORK is not referenced.
+*
+*  BWORK   (workspace) LOGICAL array, dimension (N)
+*          If SENSE = 'N', BWORK is not referenced.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          = 1,...,N:
+*                The QZ iteration failed.  No eigenvectors have been
+*                calculated, but ALPHAR(j), ALPHAI(j), and BETA(j)
+*                should be correct for j=INFO+1,...,N.
+*          > N:  =N+1: other than QZ iteration failed in DHGEQZ.
+*                =N+2: error return from DTGEVC.
+*
+*  Further Details
+*  ===============
+*
+*  Balancing a matrix pair (A,B) includes, first, permuting rows and
+*  columns to isolate eigenvalues, second, applying diagonal similarity
+*  transformation to the rows and columns to make the rows and columns
+*  as close in norm as possible. The computed reciprocal condition
+*  numbers correspond to the balanced matrix. Permuting rows and columns
+*  will not change the condition numbers (in exact arithmetic) but
+*  diagonal scaling will.  For further explanation of balancing, see
+*  section 4.11.1.2 of LAPACK Users' Guide.
+*
+*  An approximate error bound on the chordal distance between the i-th
+*  computed generalized eigenvalue w and the corresponding exact
+*  eigenvalue lambda is
+*
+*       chord(w, lambda) <= EPS * norm(ABNRM, BBNRM) / RCONDE(I)
+*
+*  An approximate error bound for the angle between the i-th computed
+*  eigenvector VL(i) or VR(i) is given by
+*
+*       EPS * norm(ABNRM, BBNRM) / DIF(i).
+*
+*  For further explanation of the reciprocal condition numbers RCONDE
+*  and RCONDV, see section 4.11 of LAPACK User's Guide.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            ILASCL, ILBSCL, ILV, ILVL, ILVR, LQUERY, PAIR,
+     $                   WANTSB, WANTSE, WANTSN, WANTSV
+      CHARACTER          CHTEMP
+      INTEGER            I, ICOLS, IERR, IJOBVL, IJOBVR, IN, IROWS,
+     $                   ITAU, IWRK, IWRK1, J, JC, JR, M, MAXWRK,
+     $                   MINWRK, MM
+      DOUBLE PRECISION   ANRM, ANRMTO, BIGNUM, BNRM, BNRMTO, EPS,
+     $                   SMLNUM, TEMP
+*     ..
+*     .. Local Arrays ..
+      LOGICAL            LDUMMA( 1 )
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DGEQRF, DGGBAK, DGGBAL, DGGHRD, DHGEQZ, DLACPY,
+     $                   DLASCL, DLASET, DORGQR, DORMQR, DTGEVC, DTGSNA,
+     $                   XERBLA
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      DOUBLE PRECISION   DLAMCH, DLANGE
+      EXTERNAL           LSAME, ILAENV, DLAMCH, DLANGE
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode the input arguments
+*
+      IF( LSAME( JOBVL, 'N' ) ) THEN
+         IJOBVL = 1
+         ILVL = .FALSE.
+      ELSE IF( LSAME( JOBVL, 'V' ) ) THEN
+         IJOBVL = 2
+         ILVL = .TRUE.
+      ELSE
+         IJOBVL = -1
+         ILVL = .FALSE.
+      END IF
+*
+      IF( LSAME( JOBVR, 'N' ) ) THEN
+         IJOBVR = 1
+         ILVR = .FALSE.
+      ELSE IF( LSAME( JOBVR, 'V' ) ) THEN
+         IJOBVR = 2
+         ILVR = .TRUE.
+      ELSE
+         IJOBVR = -1
+         ILVR = .FALSE.
+      END IF
+      ILV = ILVL .OR. ILVR
+*
+      WANTSN = LSAME( SENSE, 'N' )
+      WANTSE = LSAME( SENSE, 'E' )
+      WANTSV = LSAME( SENSE, 'V' )
+      WANTSB = LSAME( SENSE, 'B' )
+*
+*     Test the input arguments
+*
+      INFO = 0
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( .NOT.( LSAME( BALANC, 'N' ) .OR. LSAME( BALANC,
+     $    'S' ) .OR. LSAME( BALANC, 'P' ) .OR. LSAME( BALANC, 'B' ) ) )
+     $     THEN
+         INFO = -1
+      ELSE IF( IJOBVL.LE.0 ) THEN
+         INFO = -2
+      ELSE IF( IJOBVR.LE.0 ) THEN
+         INFO = -3
+      ELSE IF( .NOT.( WANTSN .OR. WANTSE .OR. WANTSB .OR. WANTSV ) )
+     $          THEN
+         INFO = -4
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -9
+      ELSE IF( LDVL.LT.1 .OR. ( ILVL .AND. LDVL.LT.N ) ) THEN
+         INFO = -14
+      ELSE IF( LDVR.LT.1 .OR. ( ILVR .AND. LDVR.LT.N ) ) THEN
+         INFO = -16
+      END IF
+*
+*     Compute workspace
+*      (Note: Comments in the code beginning "Workspace:" describe the
+*       minimal amount of workspace needed at that point in the code,
+*       as well as the preferred amount for good performance.
+*       NB refers to the optimal block size for the immediately
+*       following subroutine, as returned by ILAENV. The workspace is
+*       computed assuming ILO = 1 and IHI = N, the worst case.)
+*
+      MINWRK = 1
+      IF( INFO.EQ.0 .AND. ( LWORK.GE.1 .OR. LQUERY ) ) THEN
+         MAXWRK = 5*N + N*ILAENV( 1, 'DGEQRF', ' ', N, 1, N, 0 )
+         MINWRK = MAX( 1, 6*N )
+         IF( WANTSE ) THEN
+            MINWRK = MAX( 1, 12*N )
+         ELSE IF( WANTSV .OR. WANTSB ) THEN
+            MINWRK = 2*N*N + 12*N + 16
+            MAXWRK = MAX( MAXWRK, 2*N*N+12*N+16 )
+         END IF
+         WORK( 1 ) = MAXWRK
+      END IF
+*
+      IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN
+         INFO = -26
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DGGEVX', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*
+*     Get machine constants
+*
+      EPS = DLAMCH( 'P' )
+      SMLNUM = DLAMCH( 'S' )
+      BIGNUM = ONE / SMLNUM
+      CALL DLABAD( SMLNUM, BIGNUM )
+      SMLNUM = SQRT( SMLNUM ) / EPS
+      BIGNUM = ONE / SMLNUM
+*
+*     Scale A if max element outside range [SMLNUM,BIGNUM]
+*
+      ANRM = DLANGE( 'M', N, N, A, LDA, WORK )
+      ILASCL = .FALSE.
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
+         ANRMTO = SMLNUM
+         ILASCL = .TRUE.
+      ELSE IF( ANRM.GT.BIGNUM ) THEN
+         ANRMTO = BIGNUM
+         ILASCL = .TRUE.
+      END IF
+      IF( ILASCL )
+     $   CALL DLASCL( 'G', 0, 0, ANRM, ANRMTO, N, N, A, LDA, IERR )
+*
+*     Scale B if max element outside range [SMLNUM,BIGNUM]
+*
+      BNRM = DLANGE( 'M', N, N, B, LDB, WORK )
+      ILBSCL = .FALSE.
+      IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN
+         BNRMTO = SMLNUM
+         ILBSCL = .TRUE.
+      ELSE IF( BNRM.GT.BIGNUM ) THEN
+         BNRMTO = BIGNUM
+         ILBSCL = .TRUE.
+      END IF
+      IF( ILBSCL )
+     $   CALL DLASCL( 'G', 0, 0, BNRM, BNRMTO, N, N, B, LDB, IERR )
+*
+*     Permute and/or balance the matrix pair (A,B)
+*     (Workspace: need 6*N)
+*
+      CALL DGGBAL( BALANC, N, A, LDA, B, LDB, ILO, IHI, LSCALE, RSCALE,
+     $             WORK, IERR )
+*
+*     Compute ABNRM and BBNRM
+*
+      ABNRM = DLANGE( '1', N, N, A, LDA, WORK( 1 ) )
+      IF( ILASCL ) THEN
+         WORK( 1 ) = ABNRM
+         CALL DLASCL( 'G', 0, 0, ANRMTO, ANRM, 1, 1, WORK( 1 ), 1,
+     $                IERR )
+         ABNRM = WORK( 1 )
+      END IF
+*
+      BBNRM = DLANGE( '1', N, N, B, LDB, WORK( 1 ) )
+      IF( ILBSCL ) THEN
+         WORK( 1 ) = BBNRM
+         CALL DLASCL( 'G', 0, 0, BNRMTO, BNRM, 1, 1, WORK( 1 ), 1,
+     $                IERR )
+         BBNRM = WORK( 1 )
+      END IF
+*
+*     Reduce B to triangular form (QR decomposition of B)
+*     (Workspace: need N, prefer N*NB )
+*
+      IROWS = IHI + 1 - ILO
+      IF( ILV .OR. .NOT.WANTSN ) THEN
+         ICOLS = N + 1 - ILO
+      ELSE
+         ICOLS = IROWS
+      END IF
+      ITAU = 1
+      IWRK = ITAU + IROWS
+      CALL DGEQRF( IROWS, ICOLS, B( ILO, ILO ), LDB, WORK( ITAU ),
+     $             WORK( IWRK ), LWORK+1-IWRK, IERR )
+*
+*     Apply the orthogonal transformation to A
+*     (Workspace: need N, prefer N*NB)
+*
+      CALL DORMQR( 'L', 'T', IROWS, ICOLS, IROWS, B( ILO, ILO ), LDB,
+     $             WORK( ITAU ), A( ILO, ILO ), LDA, WORK( IWRK ),
+     $             LWORK+1-IWRK, IERR )
+*
+*     Initialize VL and/or VR
+*     (Workspace: need N, prefer N*NB)
+*
+      IF( ILVL ) THEN
+         CALL DLASET( 'Full', N, N, ZERO, ONE, VL, LDVL )
+         CALL DLACPY( 'L', IROWS-1, IROWS-1, B( ILO+1, ILO ), LDB,
+     $                VL( ILO+1, ILO ), LDVL )
+         CALL DORGQR( IROWS, IROWS, IROWS, VL( ILO, ILO ), LDVL,
+     $                WORK( ITAU ), WORK( IWRK ), LWORK+1-IWRK, IERR )
+      END IF
+*
+      IF( ILVR )
+     $   CALL DLASET( 'Full', N, N, ZERO, ONE, VR, LDVR )
+*
+*     Reduce to generalized Hessenberg form
+*     (Workspace: none needed)
+*
+      IF( ILV .OR. .NOT.WANTSN ) THEN
+*
+*        Eigenvectors requested -- work on whole matrix.
+*
+         CALL DGGHRD( JOBVL, JOBVR, N, ILO, IHI, A, LDA, B, LDB, VL,
+     $                LDVL, VR, LDVR, IERR )
+      ELSE
+         CALL DGGHRD( 'N', 'N', IROWS, 1, IROWS, A( ILO, ILO ), LDA,
+     $                B( ILO, ILO ), LDB, VL, LDVL, VR, LDVR, IERR )
+      END IF
+*
+*     Perform QZ algorithm (Compute eigenvalues, and optionally, the
+*     Schur forms and Schur vectors)
+*     (Workspace: need N)
+*
+      IF( ILV .OR. .NOT.WANTSN ) THEN
+         CHTEMP = 'S'
+      ELSE
+         CHTEMP = 'E'
+      END IF
+*
+      CALL DHGEQZ( CHTEMP, JOBVL, JOBVR, N, ILO, IHI, A, LDA, B, LDB,
+     $             ALPHAR, ALPHAI, BETA, VL, LDVL, VR, LDVR, WORK,
+     $             LWORK, IERR )
+      IF( IERR.NE.0 ) THEN
+         IF( IERR.GT.0 .AND. IERR.LE.N ) THEN
+            INFO = IERR
+         ELSE IF( IERR.GT.N .AND. IERR.LE.2*N ) THEN
+            INFO = IERR - N
+         ELSE
+            INFO = N + 1
+         END IF
+         GO TO 130
+      END IF
+*
+*     Compute Eigenvectors and estimate condition numbers if desired
+*     (Workspace: DTGEVC: need 6*N
+*                 DTGSNA: need 2*N*(N+2)+16 if SENSE = 'V' or 'B',
+*                         need N otherwise )
+*
+      IF( ILV .OR. .NOT.WANTSN ) THEN
+         IF( ILV ) THEN
+            IF( ILVL ) THEN
+               IF( ILVR ) THEN
+                  CHTEMP = 'B'
+               ELSE
+                  CHTEMP = 'L'
+               END IF
+            ELSE
+               CHTEMP = 'R'
+            END IF
+*
+            CALL DTGEVC( CHTEMP, 'B', LDUMMA, N, A, LDA, B, LDB, VL,
+     $                   LDVL, VR, LDVR, N, IN, WORK, IERR )
+            IF( IERR.NE.0 ) THEN
+               INFO = N + 2
+               GO TO 130
+            END IF
+         END IF
+*
+         IF( .NOT.WANTSN ) THEN
+*
+*           compute eigenvectors (DTGEVC) and estimate condition
+*           numbers (DTGSNA). Note that the definition of the condition
+*           number is not invariant under transformation (u,v) to
+*           (Q*u, Z*v), where (u,v) are eigenvectors of the generalized
+*           Schur form (S,T), Q and Z are orthogonal matrices. In order
+*           to avoid using extra 2*N*N workspace, we have to recalculate
+*           eigenvectors and estimate one condition numbers at a time.
+*
+            PAIR = .FALSE.
+            DO 20 I = 1, N
+*
+               IF( PAIR ) THEN
+                  PAIR = .FALSE.
+                  GO TO 20
+               END IF
+               MM = 1
+               IF( I.LT.N ) THEN
+                  IF( A( I+1, I ).NE.ZERO ) THEN
+                     PAIR = .TRUE.
+                     MM = 2
+                  END IF
+               END IF
+*
+               DO 10 J = 1, N
+                  BWORK( J ) = .FALSE.
+   10          CONTINUE
+               IF( MM.EQ.1 ) THEN
+                  BWORK( I ) = .TRUE.
+               ELSE IF( MM.EQ.2 ) THEN
+                  BWORK( I ) = .TRUE.
+                  BWORK( I+1 ) = .TRUE.
+               END IF
+*
+               IWRK = MM*N + 1
+               IWRK1 = IWRK + MM*N
+*
+*              Compute a pair of left and right eigenvectors.
+*              (compute workspace: need up to 4*N + 6*N)
+*
+               IF( WANTSE .OR. WANTSB ) THEN
+                  CALL DTGEVC( 'B', 'S', BWORK, N, A, LDA, B, LDB,
+     $                         WORK( 1 ), N, WORK( IWRK ), N, MM, M,
+     $                         WORK( IWRK1 ), IERR )
+                  IF( IERR.NE.0 ) THEN
+                     INFO = N + 2
+                     GO TO 130
+                  END IF
+               END IF
+*
+               CALL DTGSNA( SENSE, 'S', BWORK, N, A, LDA, B, LDB,
+     $                      WORK( 1 ), N, WORK( IWRK ), N, RCONDE( I ),
+     $                      RCONDV( I ), MM, M, WORK( IWRK1 ),
+     $                      LWORK-IWRK1+1, IWORK, IERR )
+*
+   20       CONTINUE
+         END IF
+      END IF
+*
+*     Undo balancing on VL and VR and normalization
+*     (Workspace: none needed)
+*
+      IF( ILVL ) THEN
+         CALL DGGBAK( BALANC, 'L', N, ILO, IHI, LSCALE, RSCALE, N, VL,
+     $                LDVL, IERR )
+*
+         DO 70 JC = 1, N
+            IF( ALPHAI( JC ).LT.ZERO )
+     $         GO TO 70
+            TEMP = ZERO
+            IF( ALPHAI( JC ).EQ.ZERO ) THEN
+               DO 30 JR = 1, N
+                  TEMP = MAX( TEMP, ABS( VL( JR, JC ) ) )
+   30          CONTINUE
+            ELSE
+               DO 40 JR = 1, N
+                  TEMP = MAX( TEMP, ABS( VL( JR, JC ) )+
+     $                   ABS( VL( JR, JC+1 ) ) )
+   40          CONTINUE
+            END IF
+            IF( TEMP.LT.SMLNUM )
+     $         GO TO 70
+            TEMP = ONE / TEMP
+            IF( ALPHAI( JC ).EQ.ZERO ) THEN
+               DO 50 JR = 1, N
+                  VL( JR, JC ) = VL( JR, JC )*TEMP
+   50          CONTINUE
+            ELSE
+               DO 60 JR = 1, N
+                  VL( JR, JC ) = VL( JR, JC )*TEMP
+                  VL( JR, JC+1 ) = VL( JR, JC+1 )*TEMP
+   60          CONTINUE
+            END IF
+   70    CONTINUE
+      END IF
+      IF( ILVR ) THEN
+         CALL DGGBAK( BALANC, 'R', N, ILO, IHI, LSCALE, RSCALE, N, VR,
+     $                LDVR, IERR )
+         DO 120 JC = 1, N
+            IF( ALPHAI( JC ).LT.ZERO )
+     $         GO TO 120
+            TEMP = ZERO
+            IF( ALPHAI( JC ).EQ.ZERO ) THEN
+               DO 80 JR = 1, N
+                  TEMP = MAX( TEMP, ABS( VR( JR, JC ) ) )
+   80          CONTINUE
+            ELSE
+               DO 90 JR = 1, N
+                  TEMP = MAX( TEMP, ABS( VR( JR, JC ) )+
+     $                   ABS( VR( JR, JC+1 ) ) )
+   90          CONTINUE
+            END IF
+            IF( TEMP.LT.SMLNUM )
+     $         GO TO 120
+            TEMP = ONE / TEMP
+            IF( ALPHAI( JC ).EQ.ZERO ) THEN
+               DO 100 JR = 1, N
+                  VR( JR, JC ) = VR( JR, JC )*TEMP
+  100          CONTINUE
+            ELSE
+               DO 110 JR = 1, N
+                  VR( JR, JC ) = VR( JR, JC )*TEMP
+                  VR( JR, JC+1 ) = VR( JR, JC+1 )*TEMP
+  110          CONTINUE
+            END IF
+  120    CONTINUE
+      END IF
+*
+*     Undo scaling if necessary
+*
+      IF( ILASCL ) THEN
+         CALL DLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHAR, N, IERR )
+         CALL DLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHAI, N, IERR )
+      END IF
+*
+      IF( ILBSCL ) THEN
+         CALL DLASCL( 'G', 0, 0, BNRMTO, BNRM, N, 1, BETA, N, IERR )
+      END IF
+*
+  130 CONTINUE
+      WORK( 1 ) = MAXWRK
+*
+      RETURN
+*
+*     End of DGGEVX
+*
+      END
diff --git a/costa/native/external/lapack/dggglm.f b/costa/native/external/lapack/dggglm.f
new file mode 100644
index 000000000..6504276cf
--- /dev/null
+++ b/costa/native/external/lapack/dggglm.f
@@ -0,0 +1,212 @@
+      SUBROUTINE DGGGLM( N, M, P, A, LDA, B, LDB, D, X, Y, WORK, LWORK,
+     $                   INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, LDB, LWORK, M, N, P
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), B( LDB, * ), D( * ), WORK( * ),
+     $                   X( * ), Y( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DGGGLM solves a general Gauss-Markov linear model (GLM) problem:
+*
+*          minimize || y ||_2   subject to   d = A*x + B*y
+*              x
+*
+*  where A is an N-by-M matrix, B is an N-by-P matrix, and d is a
+*  given N-vector. It is assumed that M <= N <= M+P, and
+*
+*             rank(A) = M    and    rank( A B ) = N.
+*
+*  Under these assumptions, the constrained equation is always
+*  consistent, and there is a unique solution x and a minimal 2-norm
+*  solution y, which is obtained using a generalized QR factorization
+*  of A and B.
+*
+*  In particular, if matrix B is square nonsingular, then the problem
+*  GLM is equivalent to the following weighted linear least squares
+*  problem
+*
+*               minimize || inv(B)*(d-A*x) ||_2
+*                   x
+*
+*  where inv(B) denotes the inverse of B.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The number of rows of the matrices A and B.  N >= 0.
+*
+*  M       (input) INTEGER
+*          The number of columns of the matrix A.  0 <= M <= N.
+*
+*  P       (input) INTEGER
+*          The number of columns of the matrix B.  P >= N-M.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,M)
+*          On entry, the N-by-M matrix A.
+*          On exit, A is destroyed.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,N).
+*
+*  B       (input/output) DOUBLE PRECISION array, dimension (LDB,P)
+*          On entry, the N-by-P matrix B.
+*          On exit, B is destroyed.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B. LDB >= max(1,N).
+*
+*  D       (input/output) DOUBLE PRECISION array, dimension (N)
+*          On entry, D is the left hand side of the GLM equation.
+*          On exit, D is destroyed.
+*
+*  X       (output) DOUBLE PRECISION array, dimension (M)
+*  Y       (output) DOUBLE PRECISION array, dimension (P)
+*          On exit, X and Y are the solutions of the GLM problem.
+*
+*  WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK. LWORK >= max(1,N+M+P).
+*          For optimum performance, LWORK >= M+min(N,P)+max(N,P)*NB,
+*          where NB is an upper bound for the optimal blocksizes for
+*          DGEQRF, SGERQF, DORMQR and SORMRQ.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*
+*  ===================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            I, LOPT, LWKOPT, NB, NB1, NB2, NB3, NB4, NP
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DCOPY, DGEMV, DGGQRF, DORMQR, DORMRQ, DTRSV,
+     $                   XERBLA
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          INT, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters
+*
+      INFO = 0
+      NP = MIN( N, P )
+      NB1 = ILAENV( 1, 'DGEQRF', ' ', N, M, -1, -1 )
+      NB2 = ILAENV( 1, 'DGERQF', ' ', N, M, -1, -1 )
+      NB3 = ILAENV( 1, 'DORMQR', ' ', N, M, P, -1 )
+      NB4 = ILAENV( 1, 'DORMRQ', ' ', N, M, P, -1 )
+      NB = MAX( NB1, NB2, NB3, NB4 )
+      LWKOPT = M + NP + MAX( N, P )*NB
+      WORK( 1 ) = LWKOPT
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( N.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( M.LT.0 .OR. M.GT.N ) THEN
+         INFO = -2
+      ELSE IF( P.LT.0 .OR. P.LT.N-M ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      ELSE IF( LWORK.LT.MAX( 1, N+M+P ) .AND. .NOT.LQUERY ) THEN
+         INFO = -12
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DGGGLM', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Compute the GQR factorization of matrices A and B:
+*
+*            Q'*A = ( R11 ) M,    Q'*B*Z' = ( T11   T12 ) M
+*                   (  0  ) N-M             (  0    T22 ) N-M
+*                      M                     M+P-N  N-M
+*
+*     where R11 and T22 are upper triangular, and Q and Z are
+*     orthogonal.
+*
+      CALL DGGQRF( N, M, P, A, LDA, WORK, B, LDB, WORK( M+1 ),
+     $             WORK( M+NP+1 ), LWORK-M-NP, INFO )
+      LOPT = WORK( M+NP+1 )
+*
+*     Update left-hand-side vector d = Q'*d = ( d1 ) M
+*                                             ( d2 ) N-M
+*
+      CALL DORMQR( 'Left', 'Transpose', N, 1, M, A, LDA, WORK, D,
+     $             MAX( 1, N ), WORK( M+NP+1 ), LWORK-M-NP, INFO )
+      LOPT = MAX( LOPT, INT( WORK( M+NP+1 ) ) )
+*
+*     Solve T22*y2 = d2 for y2
+*
+      CALL DTRSV( 'Upper', 'No transpose', 'Non unit', N-M,
+     $            B( M+1, M+P-N+1 ), LDB, D( M+1 ), 1 )
+      CALL DCOPY( N-M, D( M+1 ), 1, Y( M+P-N+1 ), 1 )
+*
+*     Set y1 = 0
+*
+      DO 10 I = 1, M + P - N
+         Y( I ) = ZERO
+   10 CONTINUE
+*
+*     Update d1 = d1 - T12*y2
+*
+      CALL DGEMV( 'No transpose', M, N-M, -ONE, B( 1, M+P-N+1 ), LDB,
+     $            Y( M+P-N+1 ), 1, ONE, D, 1 )
+*
+*     Solve triangular system: R11*x = d1
+*
+      CALL DTRSV( 'Upper', 'No Transpose', 'Non unit', M, A, LDA, D, 1 )
+*
+*     Copy D to X
+*
+      CALL DCOPY( M, D, 1, X, 1 )
+*
+*     Backward transformation y = Z'*y
+*
+      CALL DORMRQ( 'Left', 'Transpose', P, 1, NP,
+     $             B( MAX( 1, N-P+1 ), 1 ), LDB, WORK( M+1 ), Y,
+     $             MAX( 1, P ), WORK( M+NP+1 ), LWORK-M-NP, INFO )
+      WORK( 1 ) = M + NP + MAX( LOPT, INT( WORK( M+NP+1 ) ) )
+*
+      RETURN
+*
+*     End of DGGGLM
+*
+      END
diff --git a/costa/native/external/lapack/dgghrd.f b/costa/native/external/lapack/dgghrd.f
new file mode 100644
index 000000000..9473fb580
--- /dev/null
+++ b/costa/native/external/lapack/dgghrd.f
@@ -0,0 +1,253 @@
+      SUBROUTINE DGGHRD( COMPQ, COMPZ, N, ILO, IHI, A, LDA, B, LDB, Q,
+     $                   LDQ, Z, LDZ, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          COMPQ, COMPZ
+      INTEGER            IHI, ILO, INFO, LDA, LDB, LDQ, LDZ, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), B( LDB, * ), Q( LDQ, * ),
+     $                   Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DGGHRD reduces a pair of real matrices (A,B) to generalized upper
+*  Hessenberg form using orthogonal transformations, where A is a
+*  general matrix and B is upper triangular:  Q' * A * Z = H and
+*  Q' * B * Z = T, where H is upper Hessenberg, T is upper triangular,
+*  and Q and Z are orthogonal, and ' means transpose.
+*
+*  The orthogonal matrices Q and Z are determined as products of Givens
+*  rotations.  They may either be formed explicitly, or they may be
+*  postmultiplied into input matrices Q1 and Z1, so that
+*
+*       Q1 * A * Z1' = (Q1*Q) * H * (Z1*Z)'
+*       Q1 * B * Z1' = (Q1*Q) * T * (Z1*Z)'
+*
+*  Arguments
+*  =========
+*
+*  COMPQ   (input) CHARACTER*1
+*          = 'N': do not compute Q;
+*          = 'I': Q is initialized to the unit matrix, and the
+*                 orthogonal matrix Q is returned;
+*          = 'V': Q must contain an orthogonal matrix Q1 on entry,
+*                 and the product Q1*Q is returned.
+*
+*  COMPZ   (input) CHARACTER*1
+*          = 'N': do not compute Z;
+*          = 'I': Z is initialized to the unit matrix, and the
+*                 orthogonal matrix Z is returned;
+*          = 'V': Z must contain an orthogonal matrix Z1 on entry,
+*                 and the product Z1*Z is returned.
+*
+*  N       (input) INTEGER
+*          The order of the matrices A and B.  N >= 0.
+*
+*  ILO     (input) INTEGER
+*  IHI     (input) INTEGER
+*          It is assumed that A is already upper triangular in rows and
+*          columns 1:ILO-1 and IHI+1:N.  ILO and IHI are normally set
+*          by a previous call to DGGBAL; otherwise they should be set
+*          to 1 and N respectively.
+*          1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA, N)
+*          On entry, the N-by-N general matrix to be reduced.
+*          On exit, the upper triangle and the first subdiagonal of A
+*          are overwritten with the upper Hessenberg matrix H, and the
+*          rest is set to zero.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  B       (input/output) DOUBLE PRECISION array, dimension (LDB, N)
+*          On entry, the N-by-N upper triangular matrix B.
+*          On exit, the upper triangular matrix T = Q' B Z.  The
+*          elements below the diagonal are set to zero.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  Q       (input/output) DOUBLE PRECISION array, dimension (LDQ, N)
+*          If COMPQ='N':  Q is not referenced.
+*          If COMPQ='I':  on entry, Q need not be set, and on exit it
+*                         contains the orthogonal matrix Q, where Q'
+*                         is the product of the Givens transformations
+*                         which are applied to A and B on the left.
+*          If COMPQ='V':  on entry, Q must contain an orthogonal matrix
+*                         Q1, and on exit this is overwritten by Q1*Q.
+*
+*  LDQ     (input) INTEGER
+*          The leading dimension of the array Q.
+*          LDQ >= N if COMPQ='V' or 'I'; LDQ >= 1 otherwise.
+*
+*  Z       (input/output) DOUBLE PRECISION array, dimension (LDZ, N)
+*          If COMPZ='N':  Z is not referenced.
+*          If COMPZ='I':  on entry, Z need not be set, and on exit it
+*                         contains the orthogonal matrix Z, which is
+*                         the product of the Givens transformations
+*                         which are applied to A and B on the right.
+*          If COMPZ='V':  on entry, Z must contain an orthogonal matrix
+*                         Z1, and on exit this is overwritten by Z1*Z.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.
+*          LDZ >= N if COMPZ='V' or 'I'; LDZ >= 1 otherwise.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*
+*  Further Details
+*  ===============
+*
+*  This routine reduces A to Hessenberg and B to triangular form by
+*  an unblocked reduction, as described in _Matrix_Computations_,
+*  by Golub and Van Loan (Johns Hopkins Press.)
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            ILQ, ILZ
+      INTEGER            ICOMPQ, ICOMPZ, JCOL, JROW
+      DOUBLE PRECISION   C, S, TEMP
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLARTG, DLASET, DROT, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode COMPQ
+*
+      IF( LSAME( COMPQ, 'N' ) ) THEN
+         ILQ = .FALSE.
+         ICOMPQ = 1
+      ELSE IF( LSAME( COMPQ, 'V' ) ) THEN
+         ILQ = .TRUE.
+         ICOMPQ = 2
+      ELSE IF( LSAME( COMPQ, 'I' ) ) THEN
+         ILQ = .TRUE.
+         ICOMPQ = 3
+      ELSE
+         ICOMPQ = 0
+      END IF
+*
+*     Decode COMPZ
+*
+      IF( LSAME( COMPZ, 'N' ) ) THEN
+         ILZ = .FALSE.
+         ICOMPZ = 1
+      ELSE IF( LSAME( COMPZ, 'V' ) ) THEN
+         ILZ = .TRUE.
+         ICOMPZ = 2
+      ELSE IF( LSAME( COMPZ, 'I' ) ) THEN
+         ILZ = .TRUE.
+         ICOMPZ = 3
+      ELSE
+         ICOMPZ = 0
+      END IF
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( ICOMPQ.LE.0 ) THEN
+         INFO = -1
+      ELSE IF( ICOMPZ.LE.0 ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( ILO.LT.1 ) THEN
+         INFO = -4
+      ELSE IF( IHI.GT.N .OR. IHI.LT.ILO-1 ) THEN
+         INFO = -5
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -9
+      ELSE IF( ( ILQ .AND. LDQ.LT.N ) .OR. LDQ.LT.1 ) THEN
+         INFO = -11
+      ELSE IF( ( ILZ .AND. LDZ.LT.N ) .OR. LDZ.LT.1 ) THEN
+         INFO = -13
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DGGHRD', -INFO )
+         RETURN
+      END IF
+*
+*     Initialize Q and Z if desired.
+*
+      IF( ICOMPQ.EQ.3 )
+     $   CALL DLASET( 'Full', N, N, ZERO, ONE, Q, LDQ )
+      IF( ICOMPZ.EQ.3 )
+     $   CALL DLASET( 'Full', N, N, ZERO, ONE, Z, LDZ )
+*
+*     Quick return if possible
+*
+      IF( N.LE.1 )
+     $   RETURN
+*
+*     Zero out lower triangle of B
+*
+      DO 20 JCOL = 1, N - 1
+         DO 10 JROW = JCOL + 1, N
+            B( JROW, JCOL ) = ZERO
+   10    CONTINUE
+   20 CONTINUE
+*
+*     Reduce A and B
+*
+      DO 40 JCOL = ILO, IHI - 2
+*
+         DO 30 JROW = IHI, JCOL + 2, -1
+*
+*           Step 1: rotate rows JROW-1, JROW to kill A(JROW,JCOL)
+*
+            TEMP = A( JROW-1, JCOL )
+            CALL DLARTG( TEMP, A( JROW, JCOL ), C, S,
+     $                   A( JROW-1, JCOL ) )
+            A( JROW, JCOL ) = ZERO
+            CALL DROT( N-JCOL, A( JROW-1, JCOL+1 ), LDA,
+     $                 A( JROW, JCOL+1 ), LDA, C, S )
+            CALL DROT( N+2-JROW, B( JROW-1, JROW-1 ), LDB,
+     $                 B( JROW, JROW-1 ), LDB, C, S )
+            IF( ILQ )
+     $         CALL DROT( N, Q( 1, JROW-1 ), 1, Q( 1, JROW ), 1, C, S )
+*
+*           Step 2: rotate columns JROW, JROW-1 to kill B(JROW,JROW-1)
+*
+            TEMP = B( JROW, JROW )
+            CALL DLARTG( TEMP, B( JROW, JROW-1 ), C, S,
+     $                   B( JROW, JROW ) )
+            B( JROW, JROW-1 ) = ZERO
+            CALL DROT( IHI, A( 1, JROW ), 1, A( 1, JROW-1 ), 1, C, S )
+            CALL DROT( JROW-1, B( 1, JROW ), 1, B( 1, JROW-1 ), 1, C,
+     $                 S )
+            IF( ILZ )
+     $         CALL DROT( N, Z( 1, JROW ), 1, Z( 1, JROW-1 ), 1, C, S )
+   30    CONTINUE
+   40 CONTINUE
+*
+      RETURN
+*
+*     End of DGGHRD
+*
+      END
diff --git a/costa/native/external/lapack/dgglse.f b/costa/native/external/lapack/dgglse.f
new file mode 100644
index 000000000..f83bae247
--- /dev/null
+++ b/costa/native/external/lapack/dgglse.f
@@ -0,0 +1,217 @@
+      SUBROUTINE DGGLSE( M, N, P, A, LDA, B, LDB, C, D, X, WORK, LWORK,
+     $                   INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, LDB, LWORK, M, N, P
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), B( LDB, * ), C( * ), D( * ),
+     $                   WORK( * ), X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DGGLSE solves the linear equality-constrained least squares (LSE)
+*  problem:
+*
+*          minimize || c - A*x ||_2   subject to   B*x = d
+*
+*  where A is an M-by-N matrix, B is a P-by-N matrix, c is a given
+*  M-vector, and d is a given P-vector. It is assumed that
+*  P <= N <= M+P, and
+*
+*           rank(B) = P and  rank( ( A ) ) = N.
+*                                ( ( B ) )
+*
+*  These conditions ensure that the LSE problem has a unique solution,
+*  which is obtained using a GRQ factorization of the matrices B and A.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrices A and B. N >= 0.
+*
+*  P       (input) INTEGER
+*          The number of rows of the matrix B. 0 <= P <= N <= M+P.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+*          On entry, the M-by-N matrix A.
+*          On exit, A is destroyed.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,M).
+*
+*  B       (input/output) DOUBLE PRECISION array, dimension (LDB,N)
+*          On entry, the P-by-N matrix B.
+*          On exit, B is destroyed.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B. LDB >= max(1,P).
+*
+*  C       (input/output) DOUBLE PRECISION array, dimension (M)
+*          On entry, C contains the right hand side vector for the
+*          least squares part of the LSE problem.
+*          On exit, the residual sum of squares for the solution
+*          is given by the sum of squares of elements N-P+1 to M of
+*          vector C.
+*
+*  D       (input/output) DOUBLE PRECISION array, dimension (P)
+*          On entry, D contains the right hand side vector for the
+*          constrained equation.
+*          On exit, D is destroyed.
+*
+*  X       (output) DOUBLE PRECISION array, dimension (N)
+*          On exit, X is the solution of the LSE problem.
+*
+*  WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK. LWORK >= max(1,M+N+P).
+*          For optimum performance LWORK >= P+min(M,N)+max(M,N)*NB,
+*          where NB is an upper bound for the optimal blocksizes for
+*          DGEQRF, SGERQF, DORMQR and SORMRQ.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE
+      PARAMETER          ( ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            LOPT, LWKOPT, MN, NB, NB1, NB2, NB3, NB4, NR
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DAXPY, DCOPY, DGEMV, DGGRQF, DORMQR, DORMRQ,
+     $                   DTRMV, DTRSV, XERBLA
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          INT, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters
+*
+      INFO = 0
+      MN = MIN( M, N )
+      NB1 = ILAENV( 1, 'DGEQRF', ' ', M, N, -1, -1 )
+      NB2 = ILAENV( 1, 'DGERQF', ' ', M, N, -1, -1 )
+      NB3 = ILAENV( 1, 'DORMQR', ' ', M, N, P, -1 )
+      NB4 = ILAENV( 1, 'DORMRQ', ' ', M, N, P, -1 )
+      NB = MAX( NB1, NB2, NB3, NB4 )
+      LWKOPT = P + MN + MAX( M, N )*NB
+      WORK( 1 ) = LWKOPT
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( P.LT.0 .OR. P.GT.N .OR. P.LT.N-M ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, P ) ) THEN
+         INFO = -7
+      ELSE IF( LWORK.LT.MAX( 1, M+N+P ) .AND. .NOT.LQUERY ) THEN
+         INFO = -12
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DGGLSE', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Compute the GRQ factorization of matrices B and A:
+*
+*            B*Q' = (  0  T12 ) P   Z'*A*Q' = ( R11 R12 ) N-P
+*                     N-P  P                  (  0  R22 ) M+P-N
+*                                               N-P  P
+*
+*     where T12 and R11 are upper triangular, and Q and Z are
+*     orthogonal.
+*
+      CALL DGGRQF( P, M, N, B, LDB, WORK, A, LDA, WORK( P+1 ),
+     $             WORK( P+MN+1 ), LWORK-P-MN, INFO )
+      LOPT = WORK( P+MN+1 )
+*
+*     Update c = Z'*c = ( c1 ) N-P
+*                       ( c2 ) M+P-N
+*
+      CALL DORMQR( 'Left', 'Transpose', M, 1, MN, A, LDA, WORK( P+1 ),
+     $             C, MAX( 1, M ), WORK( P+MN+1 ), LWORK-P-MN, INFO )
+      LOPT = MAX( LOPT, INT( WORK( P+MN+1 ) ) )
+*
+*     Solve T12*x2 = d for x2
+*
+      CALL DTRSV( 'Upper', 'No transpose', 'Non unit', P, B( 1, N-P+1 ),
+     $            LDB, D, 1 )
+*
+*     Update c1
+*
+      CALL DGEMV( 'No transpose', N-P, P, -ONE, A( 1, N-P+1 ), LDA, D,
+     $            1, ONE, C, 1 )
+*
+*     Sovle R11*x1 = c1 for x1
+*
+      CALL DTRSV( 'Upper', 'No transpose', 'Non unit', N-P, A, LDA, C,
+     $            1 )
+*
+*     Put the solutions in X
+*
+      CALL DCOPY( N-P, C, 1, X, 1 )
+      CALL DCOPY( P, D, 1, X( N-P+1 ), 1 )
+*
+*     Compute the residual vector:
+*
+      IF( M.LT.N ) THEN
+         NR = M + P - N
+         CALL DGEMV( 'No transpose', NR, N-M, -ONE, A( N-P+1, M+1 ),
+     $               LDA, D( NR+1 ), 1, ONE, C( N-P+1 ), 1 )
+      ELSE
+         NR = P
+      END IF
+      CALL DTRMV( 'Upper', 'No transpose', 'Non unit', NR,
+     $            A( N-P+1, N-P+1 ), LDA, D, 1 )
+      CALL DAXPY( NR, -ONE, D, 1, C( N-P+1 ), 1 )
+*
+*     Backward transformation x = Q'*x
+*
+      CALL DORMRQ( 'Left', 'Transpose', N, 1, P, B, LDB, WORK( 1 ), X,
+     $             N, WORK( P+MN+1 ), LWORK-P-MN, INFO )
+      WORK( 1 ) = P + MN + MAX( LOPT, INT( WORK( P+MN+1 ) ) )
+*
+      RETURN
+*
+*     End of DGGLSE
+*
+      END
diff --git a/costa/native/external/lapack/dggqrf.f b/costa/native/external/lapack/dggqrf.f
new file mode 100644
index 000000000..44c6954b1
--- /dev/null
+++ b/costa/native/external/lapack/dggqrf.f
@@ -0,0 +1,212 @@
+      SUBROUTINE DGGQRF( N, M, P, A, LDA, TAUA, B, LDB, TAUB, WORK,
+     $                   LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, LDB, LWORK, M, N, P
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), B( LDB, * ), TAUA( * ), TAUB( * ),
+     $                   WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DGGQRF computes a generalized QR factorization of an N-by-M matrix A
+*  and an N-by-P matrix B:
+*
+*              A = Q*R,        B = Q*T*Z,
+*
+*  where Q is an N-by-N orthogonal matrix, Z is a P-by-P orthogonal
+*  matrix, and R and T assume one of the forms:
+*
+*  if N >= M,  R = ( R11 ) M  ,   or if N < M,  R = ( R11  R12 ) N,
+*                  (  0  ) N-M                         N   M-N
+*                     M
+*
+*  where R11 is upper triangular, and
+*
+*  if N <= P,  T = ( 0  T12 ) N,   or if N > P,  T = ( T11 ) N-P,
+*                   P-N  N                           ( T21 ) P
+*                                                       P
+*
+*  where T12 or T21 is upper triangular.
+*
+*  In particular, if B is square and nonsingular, the GQR factorization
+*  of A and B implicitly gives the QR factorization of inv(B)*A:
+*
+*               inv(B)*A = Z'*(inv(T)*R)
+*
+*  where inv(B) denotes the inverse of the matrix B, and Z' denotes the
+*  transpose of the matrix Z.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The number of rows of the matrices A and B. N >= 0.
+*
+*  M       (input) INTEGER
+*          The number of columns of the matrix A.  M >= 0.
+*
+*  P       (input) INTEGER
+*          The number of columns of the matrix B.  P >= 0.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,M)
+*          On entry, the N-by-M matrix A.
+*          On exit, the elements on and above the diagonal of the array
+*          contain the min(N,M)-by-M upper trapezoidal matrix R (R is
+*          upper triangular if N >= M); the elements below the diagonal,
+*          with the array TAUA, represent the orthogonal matrix Q as a
+*          product of min(N,M) elementary reflectors (see Further
+*          Details).
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,N).
+*
+*  TAUA    (output) DOUBLE PRECISION array, dimension (min(N,M))
+*          The scalar factors of the elementary reflectors which
+*          represent the orthogonal matrix Q (see Further Details).
+*
+*  B       (input/output) DOUBLE PRECISION array, dimension (LDB,P)
+*          On entry, the N-by-P matrix B.
+*          On exit, if N <= P, the upper triangle of the subarray
+*          B(1:N,P-N+1:P) contains the N-by-N upper triangular matrix T;
+*          if N > P, the elements on and above the (N-P)-th subdiagonal
+*          contain the N-by-P upper trapezoidal matrix T; the remaining
+*          elements, with the array TAUB, represent the orthogonal
+*          matrix Z as a product of elementary reflectors (see Further
+*          Details).
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B. LDB >= max(1,N).
+*
+*  TAUB    (output) DOUBLE PRECISION array, dimension (min(N,P))
+*          The scalar factors of the elementary reflectors which
+*          represent the orthogonal matrix Z (see Further Details).
+*
+*  WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK. LWORK >= max(1,N,M,P).
+*          For optimum performance LWORK >= max(N,M,P)*max(NB1,NB2,NB3),
+*          where NB1 is the optimal blocksize for the QR factorization
+*          of an N-by-M matrix, NB2 is the optimal blocksize for the
+*          RQ factorization of an N-by-P matrix, and NB3 is the optimal
+*          blocksize for a call of DORMQR.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*
+*  Further Details
+*  ===============
+*
+*  The matrix Q is represented as a product of elementary reflectors
+*
+*     Q = H(1) H(2) . . . H(k), where k = min(n,m).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - taua * v * v'
+*
+*  where taua is a real scalar, and v is a real vector with
+*  v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i+1:n,i),
+*  and taua in TAUA(i).
+*  To form Q explicitly, use LAPACK subroutine DORGQR.
+*  To use Q to update another matrix, use LAPACK subroutine DORMQR.
+*
+*  The matrix Z is represented as a product of elementary reflectors
+*
+*     Z = H(1) H(2) . . . H(k), where k = min(n,p).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - taub * v * v'
+*
+*  where taub is a real scalar, and v is a real vector with
+*  v(p-k+i+1:p) = 0 and v(p-k+i) = 1; v(1:p-k+i-1) is stored on exit in
+*  B(n-k+i,1:p-k+i-1), and taub in TAUB(i).
+*  To form Z explicitly, use LAPACK subroutine DORGRQ.
+*  To use Z to update another matrix, use LAPACK subroutine DORMRQ.
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            LOPT, LWKOPT, NB, NB1, NB2, NB3
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DGEQRF, DGERQF, DORMQR, XERBLA
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          INT, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters
+*
+      INFO = 0
+      NB1 = ILAENV( 1, 'DGEQRF', ' ', N, M, -1, -1 )
+      NB2 = ILAENV( 1, 'DGERQF', ' ', N, P, -1, -1 )
+      NB3 = ILAENV( 1, 'DORMQR', ' ', N, M, P, -1 )
+      NB = MAX( NB1, NB2, NB3 )
+      LWKOPT = MAX( N, M, P )*NB
+      WORK( 1 ) = LWKOPT
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( N.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( P.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -8
+      ELSE IF( LWORK.LT.MAX( 1, N, M, P ) .AND. .NOT.LQUERY ) THEN
+         INFO = -11
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DGGQRF', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     QR factorization of N-by-M matrix A: A = Q*R
+*
+      CALL DGEQRF( N, M, A, LDA, TAUA, WORK, LWORK, INFO )
+      LOPT = WORK( 1 )
+*
+*     Update B := Q'*B.
+*
+      CALL DORMQR( 'Left', 'Transpose', N, P, MIN( N, M ), A, LDA, TAUA,
+     $             B, LDB, WORK, LWORK, INFO )
+      LOPT = MAX( LOPT, INT( WORK( 1 ) ) )
+*
+*     RQ factorization of N-by-P matrix B: B = T*Z.
+*
+      CALL DGERQF( N, P, B, LDB, TAUB, WORK, LWORK, INFO )
+      WORK( 1 ) = MAX( LOPT, INT( WORK( 1 ) ) )
+*
+      RETURN
+*
+*     End of DGGQRF
+*
+      END
diff --git a/costa/native/external/lapack/dggrqf.f b/costa/native/external/lapack/dggrqf.f
new file mode 100644
index 000000000..349f8d460
--- /dev/null
+++ b/costa/native/external/lapack/dggrqf.f
@@ -0,0 +1,212 @@
+      SUBROUTINE DGGRQF( M, P, N, A, LDA, TAUA, B, LDB, TAUB, WORK,
+     $                   LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, LDB, LWORK, M, N, P
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), B( LDB, * ), TAUA( * ), TAUB( * ),
+     $                   WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DGGRQF computes a generalized RQ factorization of an M-by-N matrix A
+*  and a P-by-N matrix B:
+*
+*              A = R*Q,        B = Z*T*Q,
+*
+*  where Q is an N-by-N orthogonal matrix, Z is a P-by-P orthogonal
+*  matrix, and R and T assume one of the forms:
+*
+*  if M <= N,  R = ( 0  R12 ) M,   or if M > N,  R = ( R11 ) M-N,
+*                   N-M  M                           ( R21 ) N
+*                                                       N
+*
+*  where R12 or R21 is upper triangular, and
+*
+*  if P >= N,  T = ( T11 ) N  ,   or if P < N,  T = ( T11  T12 ) P,
+*                  (  0  ) P-N                         P   N-P
+*                     N
+*
+*  where T11 is upper triangular.
+*
+*  In particular, if B is square and nonsingular, the GRQ factorization
+*  of A and B implicitly gives the RQ factorization of A*inv(B):
+*
+*               A*inv(B) = (R*inv(T))*Z'
+*
+*  where inv(B) denotes the inverse of the matrix B, and Z' denotes the
+*  transpose of the matrix Z.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  P       (input) INTEGER
+*          The number of rows of the matrix B.  P >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrices A and B. N >= 0.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+*          On entry, the M-by-N matrix A.
+*          On exit, if M <= N, the upper triangle of the subarray
+*          A(1:M,N-M+1:N) contains the M-by-M upper triangular matrix R;
+*          if M > N, the elements on and above the (M-N)-th subdiagonal
+*          contain the M-by-N upper trapezoidal matrix R; the remaining
+*          elements, with the array TAUA, represent the orthogonal
+*          matrix Q as a product of elementary reflectors (see Further
+*          Details).
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,M).
+*
+*  TAUA    (output) DOUBLE PRECISION array, dimension (min(M,N))
+*          The scalar factors of the elementary reflectors which
+*          represent the orthogonal matrix Q (see Further Details).
+*
+*  B       (input/output) DOUBLE PRECISION array, dimension (LDB,N)
+*          On entry, the P-by-N matrix B.
+*          On exit, the elements on and above the diagonal of the array
+*          contain the min(P,N)-by-N upper trapezoidal matrix T (T is
+*          upper triangular if P >= N); the elements below the diagonal,
+*          with the array TAUB, represent the orthogonal matrix Z as a
+*          product of elementary reflectors (see Further Details).
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B. LDB >= max(1,P).
+*
+*  TAUB    (output) DOUBLE PRECISION array, dimension (min(P,N))
+*          The scalar factors of the elementary reflectors which
+*          represent the orthogonal matrix Z (see Further Details).
+*
+*  WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK. LWORK >= max(1,N,M,P).
+*          For optimum performance LWORK >= max(N,M,P)*max(NB1,NB2,NB3),
+*          where NB1 is the optimal blocksize for the RQ factorization
+*          of an M-by-N matrix, NB2 is the optimal blocksize for the
+*          QR factorization of a P-by-N matrix, and NB3 is the optimal
+*          blocksize for a call of DORMRQ.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INF0= -i, the i-th argument had an illegal value.
+*
+*  Further Details
+*  ===============
+*
+*  The matrix Q is represented as a product of elementary reflectors
+*
+*     Q = H(1) H(2) . . . H(k), where k = min(m,n).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - taua * v * v'
+*
+*  where taua is a real scalar, and v is a real vector with
+*  v(n-k+i+1:n) = 0 and v(n-k+i) = 1; v(1:n-k+i-1) is stored on exit in
+*  A(m-k+i,1:n-k+i-1), and taua in TAUA(i).
+*  To form Q explicitly, use LAPACK subroutine DORGRQ.
+*  To use Q to update another matrix, use LAPACK subroutine DORMRQ.
+*
+*  The matrix Z is represented as a product of elementary reflectors
+*
+*     Z = H(1) H(2) . . . H(k), where k = min(p,n).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - taub * v * v'
+*
+*  where taub is a real scalar, and v is a real vector with
+*  v(1:i-1) = 0 and v(i) = 1; v(i+1:p) is stored on exit in B(i+1:p,i),
+*  and taub in TAUB(i).
+*  To form Z explicitly, use LAPACK subroutine DORGQR.
+*  To use Z to update another matrix, use LAPACK subroutine DORMQR.
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            LOPT, LWKOPT, NB, NB1, NB2, NB3
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DGEQRF, DGERQF, DORMRQ, XERBLA
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          INT, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters
+*
+      INFO = 0
+      NB1 = ILAENV( 1, 'DGERQF', ' ', M, N, -1, -1 )
+      NB2 = ILAENV( 1, 'DGEQRF', ' ', P, N, -1, -1 )
+      NB3 = ILAENV( 1, 'DORMRQ', ' ', M, N, P, -1 )
+      NB = MAX( NB1, NB2, NB3 )
+      LWKOPT = MAX( N, M, P )*NB
+      WORK( 1 ) = LWKOPT
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( P.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, P ) ) THEN
+         INFO = -8
+      ELSE IF( LWORK.LT.MAX( 1, M, P, N ) .AND. .NOT.LQUERY ) THEN
+         INFO = -11
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DGGRQF', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     RQ factorization of M-by-N matrix A: A = R*Q
+*
+      CALL DGERQF( M, N, A, LDA, TAUA, WORK, LWORK, INFO )
+      LOPT = WORK( 1 )
+*
+*     Update B := B*Q'
+*
+      CALL DORMRQ( 'Right', 'Transpose', P, N, MIN( M, N ),
+     $             A( MAX( 1, M-N+1 ), 1 ), LDA, TAUA, B, LDB, WORK,
+     $             LWORK, INFO )
+      LOPT = MAX( LOPT, INT( WORK( 1 ) ) )
+*
+*     QR factorization of P-by-N matrix B: B = Z*T
+*
+      CALL DGEQRF( P, N, B, LDB, TAUB, WORK, LWORK, INFO )
+      WORK( 1 ) = MAX( LOPT, INT( WORK( 1 ) ) )
+*
+      RETURN
+*
+*     End of DGGRQF
+*
+      END
diff --git a/costa/native/external/lapack/dggsvd.f b/costa/native/external/lapack/dggsvd.f
new file mode 100644
index 000000000..dd148186f
--- /dev/null
+++ b/costa/native/external/lapack/dggsvd.f
@@ -0,0 +1,336 @@
+      SUBROUTINE DGGSVD( JOBU, JOBV, JOBQ, M, N, P, K, L, A, LDA, B,
+     $                   LDB, ALPHA, BETA, U, LDU, V, LDV, Q, LDQ, WORK,
+     $                   IWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBQ, JOBU, JOBV
+      INTEGER            INFO, K, L, LDA, LDB, LDQ, LDU, LDV, M, N, P
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      DOUBLE PRECISION   A( LDA, * ), ALPHA( * ), B( LDB, * ),
+     $                   BETA( * ), Q( LDQ, * ), U( LDU, * ),
+     $                   V( LDV, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DGGSVD computes the generalized singular value decomposition (GSVD)
+*  of an M-by-N real matrix A and P-by-N real matrix B:
+*
+*      U'*A*Q = D1*( 0 R ),    V'*B*Q = D2*( 0 R )
+*
+*  where U, V and Q are orthogonal matrices, and Z' is the transpose
+*  of Z.  Let K+L = the effective numerical rank of the matrix (A',B')',
+*  then R is a K+L-by-K+L nonsingular upper triangular matrix, D1 and
+*  D2 are M-by-(K+L) and P-by-(K+L) "diagonal" matrices and of the
+*  following structures, respectively:
+*
+*  If M-K-L >= 0,
+*
+*                      K  L
+*         D1 =     K ( I  0 )
+*                  L ( 0  C )
+*              M-K-L ( 0  0 )
+*
+*                    K  L
+*         D2 =   L ( 0  S )
+*              P-L ( 0  0 )
+*
+*                  N-K-L  K    L
+*    ( 0 R ) = K (  0   R11  R12 )
+*              L (  0    0   R22 )
+*
+*  where
+*
+*    C = diag( ALPHA(K+1), ... , ALPHA(K+L) ),
+*    S = diag( BETA(K+1),  ... , BETA(K+L) ),
+*    C**2 + S**2 = I.
+*
+*    R is stored in A(1:K+L,N-K-L+1:N) on exit.
+*
+*  If M-K-L < 0,
+*
+*                    K M-K K+L-M
+*         D1 =   K ( I  0    0   )
+*              M-K ( 0  C    0   )
+*
+*                      K M-K K+L-M
+*         D2 =   M-K ( 0  S    0  )
+*              K+L-M ( 0  0    I  )
+*                P-L ( 0  0    0  )
+*
+*                     N-K-L  K   M-K  K+L-M
+*    ( 0 R ) =     K ( 0    R11  R12  R13  )
+*                M-K ( 0     0   R22  R23  )
+*              K+L-M ( 0     0    0   R33  )
+*
+*  where
+*
+*    C = diag( ALPHA(K+1), ... , ALPHA(M) ),
+*    S = diag( BETA(K+1),  ... , BETA(M) ),
+*    C**2 + S**2 = I.
+*
+*    (R11 R12 R13 ) is stored in A(1:M, N-K-L+1:N), and R33 is stored
+*    ( 0  R22 R23 )
+*    in B(M-K+1:L,N+M-K-L+1:N) on exit.
+*
+*  The routine computes C, S, R, and optionally the orthogonal
+*  transformation matrices U, V and Q.
+*
+*  In particular, if B is an N-by-N nonsingular matrix, then the GSVD of
+*  A and B implicitly gives the SVD of A*inv(B):
+*                       A*inv(B) = U*(D1*inv(D2))*V'.
+*  If ( A',B')' has orthonormal columns, then the GSVD of A and B is
+*  also equal to the CS decomposition of A and B. Furthermore, the GSVD
+*  can be used to derive the solution of the eigenvalue problem:
+*                       A'*A x = lambda* B'*B x.
+*  In some literature, the GSVD of A and B is presented in the form
+*                   U'*A*X = ( 0 D1 ),   V'*B*X = ( 0 D2 )
+*  where U and V are orthogonal and X is nonsingular, D1 and D2 are
+*  ``diagonal''.  The former GSVD form can be converted to the latter
+*  form by taking the nonsingular matrix X as
+*
+*                       X = Q*( I   0    )
+*                             ( 0 inv(R) ).
+*
+*  Arguments
+*  =========
+*
+*  JOBU    (input) CHARACTER*1
+*          = 'U':  Orthogonal matrix U is computed;
+*          = 'N':  U is not computed.
+*
+*  JOBV    (input) CHARACTER*1
+*          = 'V':  Orthogonal matrix V is computed;
+*          = 'N':  V is not computed.
+*
+*  JOBQ    (input) CHARACTER*1
+*          = 'Q':  Orthogonal matrix Q is computed;
+*          = 'N':  Q is not computed.
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrices A and B.  N >= 0.
+*
+*  P       (input) INTEGER
+*          The number of rows of the matrix B.  P >= 0.
+*
+*  K       (output) INTEGER
+*  L       (output) INTEGER
+*          On exit, K and L specify the dimension of the subblocks
+*          described in the Purpose section.
+*          K + L = effective numerical rank of (A',B')'.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+*          On entry, the M-by-N matrix A.
+*          On exit, A contains the triangular matrix R, or part of R.
+*          See Purpose for details.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,M).
+*
+*  B       (input/output) DOUBLE PRECISION array, dimension (LDB,N)
+*          On entry, the P-by-N matrix B.
+*          On exit, B contains the triangular matrix R if M-K-L < 0.
+*          See Purpose for details.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B. LDA >= max(1,P).
+*
+*  ALPHA   (output) DOUBLE PRECISION array, dimension (N)
+*  BETA    (output) DOUBLE PRECISION array, dimension (N)
+*          On exit, ALPHA and BETA contain the generalized singular
+*          value pairs of A and B;
+*            ALPHA(1:K) = 1,
+*            BETA(1:K)  = 0,
+*          and if M-K-L >= 0,
+*            ALPHA(K+1:K+L) = C,
+*            BETA(K+1:K+L)  = S,
+*          or if M-K-L < 0,
+*            ALPHA(K+1:M)=C, ALPHA(M+1:K+L)=0
+*            BETA(K+1:M) =S, BETA(M+1:K+L) =1
+*          and
+*            ALPHA(K+L+1:N) = 0
+*            BETA(K+L+1:N)  = 0
+*
+*  U       (output) DOUBLE PRECISION array, dimension (LDU,M)
+*          If JOBU = 'U', U contains the M-by-M orthogonal matrix U.
+*          If JOBU = 'N', U is not referenced.
+*
+*  LDU     (input) INTEGER
+*          The leading dimension of the array U. LDU >= max(1,M) if
+*          JOBU = 'U'; LDU >= 1 otherwise.
+*
+*  V       (output) DOUBLE PRECISION array, dimension (LDV,P)
+*          If JOBV = 'V', V contains the P-by-P orthogonal matrix V.
+*          If JOBV = 'N', V is not referenced.
+*
+*  LDV     (input) INTEGER
+*          The leading dimension of the array V. LDV >= max(1,P) if
+*          JOBV = 'V'; LDV >= 1 otherwise.
+*
+*  Q       (output) DOUBLE PRECISION array, dimension (LDQ,N)
+*          If JOBQ = 'Q', Q contains the N-by-N orthogonal matrix Q.
+*          If JOBQ = 'N', Q is not referenced.
+*
+*  LDQ     (input) INTEGER
+*          The leading dimension of the array Q. LDQ >= max(1,N) if
+*          JOBQ = 'Q'; LDQ >= 1 otherwise.
+*
+*  WORK    (workspace) DOUBLE PRECISION array,
+*                      dimension (max(3*N,M,P)+N)
+*
+*  IWORK   (workspace/output) INTEGER array, dimension (N)
+*          On exit, IWORK stores the sorting information. More
+*          precisely, the following loop will sort ALPHA
+*             for I = K+1, min(M,K+L)
+*                 swap ALPHA(I) and ALPHA(IWORK(I))
+*             endfor
+*          such that ALPHA(1) >= ALPHA(2) >= ... >= ALPHA(N).
+*
+*  INFO    (output)INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          > 0:  if INFO = 1, the Jacobi-type procedure failed to
+*                converge.  For further details, see subroutine DTGSJA.
+*
+*  Internal Parameters
+*  ===================
+*
+*  TOLA    DOUBLE PRECISION
+*  TOLB    DOUBLE PRECISION
+*          TOLA and TOLB are the thresholds to determine the effective
+*          rank of (A',B')'. Generally, they are set to
+*                   TOLA = MAX(M,N)*norm(A)*MAZHEPS,
+*                   TOLB = MAX(P,N)*norm(B)*MAZHEPS.
+*          The size of TOLA and TOLB may affect the size of backward
+*          errors of the decomposition.
+*
+*  Further Details
+*  ===============
+*
+*  2-96 Based on modifications by
+*     Ming Gu and Huan Ren, Computer Science Division, University of
+*     California at Berkeley, USA
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            WANTQ, WANTU, WANTV
+      INTEGER            I, IBND, ISUB, J, NCYCLE
+      DOUBLE PRECISION   ANORM, BNORM, SMAX, TEMP, TOLA, TOLB, ULP, UNFL
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DLAMCH, DLANGE
+      EXTERNAL           LSAME, DLAMCH, DLANGE
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DCOPY, DGGSVP, DTGSJA, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters
+*
+      WANTU = LSAME( JOBU, 'U' )
+      WANTV = LSAME( JOBV, 'V' )
+      WANTQ = LSAME( JOBQ, 'Q' )
+*
+      INFO = 0
+      IF( .NOT.( WANTU .OR. LSAME( JOBU, 'N' ) ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( WANTV .OR. LSAME( JOBV, 'N' ) ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.( WANTQ .OR. LSAME( JOBQ, 'N' ) ) ) THEN
+         INFO = -3
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( P.LT.0 ) THEN
+         INFO = -6
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -10
+      ELSE IF( LDB.LT.MAX( 1, P ) ) THEN
+         INFO = -12
+      ELSE IF( LDU.LT.1 .OR. ( WANTU .AND. LDU.LT.M ) ) THEN
+         INFO = -16
+      ELSE IF( LDV.LT.1 .OR. ( WANTV .AND. LDV.LT.P ) ) THEN
+         INFO = -18
+      ELSE IF( LDQ.LT.1 .OR. ( WANTQ .AND. LDQ.LT.N ) ) THEN
+         INFO = -20
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DGGSVD', -INFO )
+         RETURN
+      END IF
+*
+*     Compute the Frobenius norm of matrices A and B
+*
+      ANORM = DLANGE( '1', M, N, A, LDA, WORK )
+      BNORM = DLANGE( '1', P, N, B, LDB, WORK )
+*
+*     Get machine precision and set up threshold for determining
+*     the effective numerical rank of the matrices A and B.
+*
+      ULP = DLAMCH( 'Precision' )
+      UNFL = DLAMCH( 'Safe Minimum' )
+      TOLA = MAX( M, N )*MAX( ANORM, UNFL )*ULP
+      TOLB = MAX( P, N )*MAX( BNORM, UNFL )*ULP
+*
+*     Preprocessing
+*
+      CALL DGGSVP( JOBU, JOBV, JOBQ, M, P, N, A, LDA, B, LDB, TOLA,
+     $             TOLB, K, L, U, LDU, V, LDV, Q, LDQ, IWORK, WORK,
+     $             WORK( N+1 ), INFO )
+*
+*     Compute the GSVD of two upper "triangular" matrices
+*
+      CALL DTGSJA( JOBU, JOBV, JOBQ, M, P, N, K, L, A, LDA, B, LDB,
+     $             TOLA, TOLB, ALPHA, BETA, U, LDU, V, LDV, Q, LDQ,
+     $             WORK, NCYCLE, INFO )
+*
+*     Sort the singular values and store the pivot indices in IWORK
+*     Copy ALPHA to WORK, then sort ALPHA in WORK
+*
+      CALL DCOPY( N, ALPHA, 1, WORK, 1 )
+      IBND = MIN( L, M-K )
+      DO 20 I = 1, IBND
+*
+*        Scan for largest ALPHA(K+I)
+*
+         ISUB = I
+         SMAX = WORK( K+I )
+         DO 10 J = I + 1, IBND
+            TEMP = WORK( K+J )
+            IF( TEMP.GT.SMAX ) THEN
+               ISUB = J
+               SMAX = TEMP
+            END IF
+   10    CONTINUE
+         IF( ISUB.NE.I ) THEN
+            WORK( K+ISUB ) = WORK( K+I )
+            WORK( K+I ) = SMAX
+            IWORK( K+I ) = K + ISUB
+         ELSE
+            IWORK( K+I ) = K + I
+         END IF
+   20 CONTINUE
+*
+      RETURN
+*
+*     End of DGGSVD
+*
+      END
diff --git a/costa/native/external/lapack/dggsvp.f b/costa/native/external/lapack/dggsvp.f
new file mode 100644
index 000000000..c6475734a
--- /dev/null
+++ b/costa/native/external/lapack/dggsvp.f
@@ -0,0 +1,394 @@
+      SUBROUTINE DGGSVP( JOBU, JOBV, JOBQ, M, P, N, A, LDA, B, LDB,
+     $                   TOLA, TOLB, K, L, U, LDU, V, LDV, Q, LDQ,
+     $                   IWORK, TAU, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBQ, JOBU, JOBV
+      INTEGER            INFO, K, L, LDA, LDB, LDQ, LDU, LDV, M, N, P
+      DOUBLE PRECISION   TOLA, TOLB
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      DOUBLE PRECISION   A( LDA, * ), B( LDB, * ), Q( LDQ, * ),
+     $                   TAU( * ), U( LDU, * ), V( LDV, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DGGSVP computes orthogonal matrices U, V and Q such that
+*
+*                   N-K-L  K    L
+*   U'*A*Q =     K ( 0    A12  A13 )  if M-K-L >= 0;
+*                L ( 0     0   A23 )
+*            M-K-L ( 0     0    0  )
+*
+*                   N-K-L  K    L
+*          =     K ( 0    A12  A13 )  if M-K-L < 0;
+*              M-K ( 0     0   A23 )
+*
+*                 N-K-L  K    L
+*   V'*B*Q =   L ( 0     0   B13 )
+*            P-L ( 0     0    0  )
+*
+*  where the K-by-K matrix A12 and L-by-L matrix B13 are nonsingular
+*  upper triangular; A23 is L-by-L upper triangular if M-K-L >= 0,
+*  otherwise A23 is (M-K)-by-L upper trapezoidal.  K+L = the effective
+*  numerical rank of the (M+P)-by-N matrix (A',B')'.  Z' denotes the
+*  transpose of Z.
+*
+*  This decomposition is the preprocessing step for computing the
+*  Generalized Singular Value Decomposition (GSVD), see subroutine
+*  DGGSVD.
+*
+*  Arguments
+*  =========
+*
+*  JOBU    (input) CHARACTER*1
+*          = 'U':  Orthogonal matrix U is computed;
+*          = 'N':  U is not computed.
+*
+*  JOBV    (input) CHARACTER*1
+*          = 'V':  Orthogonal matrix V is computed;
+*          = 'N':  V is not computed.
+*
+*  JOBQ    (input) CHARACTER*1
+*          = 'Q':  Orthogonal matrix Q is computed;
+*          = 'N':  Q is not computed.
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  P       (input) INTEGER
+*          The number of rows of the matrix B.  P >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrices A and B.  N >= 0.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+*          On entry, the M-by-N matrix A.
+*          On exit, A contains the triangular (or trapezoidal) matrix
+*          described in the Purpose section.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,M).
+*
+*  B       (input/output) DOUBLE PRECISION array, dimension (LDB,N)
+*          On entry, the P-by-N matrix B.
+*          On exit, B contains the triangular matrix described in
+*          the Purpose section.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B. LDB >= max(1,P).
+*
+*  TOLA    (input) DOUBLE PRECISION
+*  TOLB    (input) DOUBLE PRECISION
+*          TOLA and TOLB are the thresholds to determine the effective
+*          numerical rank of matrix B and a subblock of A. Generally,
+*          they are set to
+*             TOLA = MAX(M,N)*norm(A)*MAZHEPS,
+*             TOLB = MAX(P,N)*norm(B)*MAZHEPS.
+*          The size of TOLA and TOLB may affect the size of backward
+*          errors of the decomposition.
+*
+*  K       (output) INTEGER
+*  L       (output) INTEGER
+*          On exit, K and L specify the dimension of the subblocks
+*          described in Purpose.
+*          K + L = effective numerical rank of (A',B')'.
+*
+*  U       (output) DOUBLE PRECISION array, dimension (LDU,M)
+*          If JOBU = 'U', U contains the orthogonal matrix U.
+*          If JOBU = 'N', U is not referenced.
+*
+*  LDU     (input) INTEGER
+*          The leading dimension of the array U. LDU >= max(1,M) if
+*          JOBU = 'U'; LDU >= 1 otherwise.
+*
+*  V       (output) DOUBLE PRECISION array, dimension (LDV,M)
+*          If JOBV = 'V', V contains the orthogonal matrix V.
+*          If JOBV = 'N', V is not referenced.
+*
+*  LDV     (input) INTEGER
+*          The leading dimension of the array V. LDV >= max(1,P) if
+*          JOBV = 'V'; LDV >= 1 otherwise.
+*
+*  Q       (output) DOUBLE PRECISION array, dimension (LDQ,N)
+*          If JOBQ = 'Q', Q contains the orthogonal matrix Q.
+*          If JOBQ = 'N', Q is not referenced.
+*
+*  LDQ     (input) INTEGER
+*          The leading dimension of the array Q. LDQ >= max(1,N) if
+*          JOBQ = 'Q'; LDQ >= 1 otherwise.
+*
+*  IWORK   (workspace) INTEGER array, dimension (N)
+*
+*  TAU     (workspace) DOUBLE PRECISION array, dimension (N)
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (max(3*N,M,P))
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*
+*
+*  Further Details
+*  ===============
+*
+*  The subroutine uses LAPACK subroutine DGEQPF for the QR factorization
+*  with column pivoting to detect the effective numerical rank of the
+*  a matrix. It may be replaced by a better rank determination strategy.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            FORWRD, WANTQ, WANTU, WANTV
+      INTEGER            I, J
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DGEQPF, DGEQR2, DGERQ2, DLACPY, DLAPMT, DLASET,
+     $                   DORG2R, DORM2R, DORMR2, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters
+*
+      WANTU = LSAME( JOBU, 'U' )
+      WANTV = LSAME( JOBV, 'V' )
+      WANTQ = LSAME( JOBQ, 'Q' )
+      FORWRD = .TRUE.
+*
+      INFO = 0
+      IF( .NOT.( WANTU .OR. LSAME( JOBU, 'N' ) ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( WANTV .OR. LSAME( JOBV, 'N' ) ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.( WANTQ .OR. LSAME( JOBQ, 'N' ) ) ) THEN
+         INFO = -3
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( P.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -6
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -8
+      ELSE IF( LDB.LT.MAX( 1, P ) ) THEN
+         INFO = -10
+      ELSE IF( LDU.LT.1 .OR. ( WANTU .AND. LDU.LT.M ) ) THEN
+         INFO = -16
+      ELSE IF( LDV.LT.1 .OR. ( WANTV .AND. LDV.LT.P ) ) THEN
+         INFO = -18
+      ELSE IF( LDQ.LT.1 .OR. ( WANTQ .AND. LDQ.LT.N ) ) THEN
+         INFO = -20
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DGGSVP', -INFO )
+         RETURN
+      END IF
+*
+*     QR with column pivoting of B: B*P = V*( S11 S12 )
+*                                           (  0   0  )
+*
+      DO 10 I = 1, N
+         IWORK( I ) = 0
+   10 CONTINUE
+      CALL DGEQPF( P, N, B, LDB, IWORK, TAU, WORK, INFO )
+*
+*     Update A := A*P
+*
+      CALL DLAPMT( FORWRD, M, N, A, LDA, IWORK )
+*
+*     Determine the effective rank of matrix B.
+*
+      L = 0
+      DO 20 I = 1, MIN( P, N )
+         IF( ABS( B( I, I ) ).GT.TOLB )
+     $      L = L + 1
+   20 CONTINUE
+*
+      IF( WANTV ) THEN
+*
+*        Copy the details of V, and form V.
+*
+         CALL DLASET( 'Full', P, P, ZERO, ZERO, V, LDV )
+         IF( P.GT.1 )
+     $      CALL DLACPY( 'Lower', P-1, N, B( 2, 1 ), LDB, V( 2, 1 ),
+     $                   LDV )
+         CALL DORG2R( P, P, MIN( P, N ), V, LDV, TAU, WORK, INFO )
+      END IF
+*
+*     Clean up B
+*
+      DO 40 J = 1, L - 1
+         DO 30 I = J + 1, L
+            B( I, J ) = ZERO
+   30    CONTINUE
+   40 CONTINUE
+      IF( P.GT.L )
+     $   CALL DLASET( 'Full', P-L, N, ZERO, ZERO, B( L+1, 1 ), LDB )
+*
+      IF( WANTQ ) THEN
+*
+*        Set Q = I and Update Q := Q*P
+*
+         CALL DLASET( 'Full', N, N, ZERO, ONE, Q, LDQ )
+         CALL DLAPMT( FORWRD, N, N, Q, LDQ, IWORK )
+      END IF
+*
+      IF( P.GE.L .AND. N.NE.L ) THEN
+*
+*        RQ factorization of (S11 S12): ( S11 S12 ) = ( 0 S12 )*Z
+*
+         CALL DGERQ2( L, N, B, LDB, TAU, WORK, INFO )
+*
+*        Update A := A*Z'
+*
+         CALL DORMR2( 'Right', 'Transpose', M, N, L, B, LDB, TAU, A,
+     $                LDA, WORK, INFO )
+*
+         IF( WANTQ ) THEN
+*
+*           Update Q := Q*Z'
+*
+            CALL DORMR2( 'Right', 'Transpose', N, N, L, B, LDB, TAU, Q,
+     $                   LDQ, WORK, INFO )
+         END IF
+*
+*        Clean up B
+*
+         CALL DLASET( 'Full', L, N-L, ZERO, ZERO, B, LDB )
+         DO 60 J = N - L + 1, N
+            DO 50 I = J - N + L + 1, L
+               B( I, J ) = ZERO
+   50       CONTINUE
+   60    CONTINUE
+*
+      END IF
+*
+*     Let              N-L     L
+*                A = ( A11    A12 ) M,
+*
+*     then the following does the complete QR decomposition of A11:
+*
+*              A11 = U*(  0  T12 )*P1'
+*                      (  0   0  )
+*
+      DO 70 I = 1, N - L
+         IWORK( I ) = 0
+   70 CONTINUE
+      CALL DGEQPF( M, N-L, A, LDA, IWORK, TAU, WORK, INFO )
+*
+*     Determine the effective rank of A11
+*
+      K = 0
+      DO 80 I = 1, MIN( M, N-L )
+         IF( ABS( A( I, I ) ).GT.TOLA )
+     $      K = K + 1
+   80 CONTINUE
+*
+*     Update A12 := U'*A12, where A12 = A( 1:M, N-L+1:N )
+*
+      CALL DORM2R( 'Left', 'Transpose', M, L, MIN( M, N-L ), A, LDA,
+     $             TAU, A( 1, N-L+1 ), LDA, WORK, INFO )
+*
+      IF( WANTU ) THEN
+*
+*        Copy the details of U, and form U
+*
+         CALL DLASET( 'Full', M, M, ZERO, ZERO, U, LDU )
+         IF( M.GT.1 )
+     $      CALL DLACPY( 'Lower', M-1, N-L, A( 2, 1 ), LDA, U( 2, 1 ),
+     $                   LDU )
+         CALL DORG2R( M, M, MIN( M, N-L ), U, LDU, TAU, WORK, INFO )
+      END IF
+*
+      IF( WANTQ ) THEN
+*
+*        Update Q( 1:N, 1:N-L )  = Q( 1:N, 1:N-L )*P1
+*
+         CALL DLAPMT( FORWRD, N, N-L, Q, LDQ, IWORK )
+      END IF
+*
+*     Clean up A: set the strictly lower triangular part of
+*     A(1:K, 1:K) = 0, and A( K+1:M, 1:N-L ) = 0.
+*
+      DO 100 J = 1, K - 1
+         DO 90 I = J + 1, K
+            A( I, J ) = ZERO
+   90    CONTINUE
+  100 CONTINUE
+      IF( M.GT.K )
+     $   CALL DLASET( 'Full', M-K, N-L, ZERO, ZERO, A( K+1, 1 ), LDA )
+*
+      IF( N-L.GT.K ) THEN
+*
+*        RQ factorization of ( T11 T12 ) = ( 0 T12 )*Z1
+*
+         CALL DGERQ2( K, N-L, A, LDA, TAU, WORK, INFO )
+*
+         IF( WANTQ ) THEN
+*
+*           Update Q( 1:N,1:N-L ) = Q( 1:N,1:N-L )*Z1'
+*
+            CALL DORMR2( 'Right', 'Transpose', N, N-L, K, A, LDA, TAU,
+     $                   Q, LDQ, WORK, INFO )
+         END IF
+*
+*        Clean up A
+*
+         CALL DLASET( 'Full', K, N-L-K, ZERO, ZERO, A, LDA )
+         DO 120 J = N - L - K + 1, N - L
+            DO 110 I = J - N + L + K + 1, K
+               A( I, J ) = ZERO
+  110       CONTINUE
+  120    CONTINUE
+*
+      END IF
+*
+      IF( M.GT.K ) THEN
+*
+*        QR factorization of A( K+1:M,N-L+1:N )
+*
+         CALL DGEQR2( M-K, L, A( K+1, N-L+1 ), LDA, TAU, WORK, INFO )
+*
+         IF( WANTU ) THEN
+*
+*           Update U(:,K+1:M) := U(:,K+1:M)*U1
+*
+            CALL DORM2R( 'Right', 'No transpose', M, M-K, MIN( M-K, L ),
+     $                   A( K+1, N-L+1 ), LDA, TAU, U( 1, K+1 ), LDU,
+     $                   WORK, INFO )
+         END IF
+*
+*        Clean up
+*
+         DO 140 J = N - L + 1, N
+            DO 130 I = J - N + K + L + 1, M
+               A( I, J ) = ZERO
+  130       CONTINUE
+  140    CONTINUE
+*
+      END IF
+*
+      RETURN
+*
+*     End of DGGSVP
+*
+      END
diff --git a/costa/native/external/lapack/dgtcon.f b/costa/native/external/lapack/dgtcon.f
new file mode 100644
index 000000000..4b5c0a2c2
--- /dev/null
+++ b/costa/native/external/lapack/dgtcon.f
@@ -0,0 +1,166 @@
+      SUBROUTINE DGTCON( NORM, N, DL, D, DU, DU2, IPIV, ANORM, RCOND,
+     $                   WORK, IWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          NORM
+      INTEGER            INFO, N
+      DOUBLE PRECISION   ANORM, RCOND
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * ), IWORK( * )
+      DOUBLE PRECISION   D( * ), DL( * ), DU( * ), DU2( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DGTCON estimates the reciprocal of the condition number of a real
+*  tridiagonal matrix A using the LU factorization as computed by
+*  DGTTRF.
+*
+*  An estimate is obtained for norm(inv(A)), and the reciprocal of the
+*  condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))).
+*
+*  Arguments
+*  =========
+*
+*  NORM    (input) CHARACTER*1
+*          Specifies whether the 1-norm condition number or the
+*          infinity-norm condition number is required:
+*          = '1' or 'O':  1-norm;
+*          = 'I':         Infinity-norm.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  DL      (input) DOUBLE PRECISION array, dimension (N-1)
+*          The (n-1) multipliers that define the matrix L from the
+*          LU factorization of A as computed by DGTTRF.
+*
+*  D       (input) DOUBLE PRECISION array, dimension (N)
+*          The n diagonal elements of the upper triangular matrix U from
+*          the LU factorization of A.
+*
+*  DU      (input) DOUBLE PRECISION array, dimension (N-1)
+*          The (n-1) elements of the first superdiagonal of U.
+*
+*  DU2     (input) DOUBLE PRECISION array, dimension (N-2)
+*          The (n-2) elements of the second superdiagonal of U.
+*
+*  IPIV    (input) INTEGER array, dimension (N)
+*          The pivot indices; for 1 <= i <= n, row i of the matrix was
+*          interchanged with row IPIV(i).  IPIV(i) will always be either
+*          i or i+1; IPIV(i) = i indicates a row interchange was not
+*          required.
+*
+*  ANORM   (input) DOUBLE PRECISION
+*          If NORM = '1' or 'O', the 1-norm of the original matrix A.
+*          If NORM = 'I', the infinity-norm of the original matrix A.
+*
+*  RCOND   (output) DOUBLE PRECISION
+*          The reciprocal of the condition number of the matrix A,
+*          computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an
+*          estimate of the 1-norm of inv(A) computed in this routine.
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (2*N)
+*
+*  IWORK   (workspace) INTEGER array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            ONENRM
+      INTEGER            I, KASE, KASE1
+      DOUBLE PRECISION   AINVNM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DGTTRS, DLACON, XERBLA
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments.
+*
+      INFO = 0
+      ONENRM = NORM.EQ.'1' .OR. LSAME( NORM, 'O' )
+      IF( .NOT.ONENRM .AND. .NOT.LSAME( NORM, 'I' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( ANORM.LT.ZERO ) THEN
+         INFO = -8
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DGTCON', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      RCOND = ZERO
+      IF( N.EQ.0 ) THEN
+         RCOND = ONE
+         RETURN
+      ELSE IF( ANORM.EQ.ZERO ) THEN
+         RETURN
+      END IF
+*
+*     Check that D(1:N) is non-zero.
+*
+      DO 10 I = 1, N
+         IF( D( I ).EQ.ZERO )
+     $      RETURN
+   10 CONTINUE
+*
+      AINVNM = ZERO
+      IF( ONENRM ) THEN
+         KASE1 = 1
+      ELSE
+         KASE1 = 2
+      END IF
+      KASE = 0
+   20 CONTINUE
+      CALL DLACON( N, WORK( N+1 ), WORK, IWORK, AINVNM, KASE )
+      IF( KASE.NE.0 ) THEN
+         IF( KASE.EQ.KASE1 ) THEN
+*
+*           Multiply by inv(U)*inv(L).
+*
+            CALL DGTTRS( 'No transpose', N, 1, DL, D, DU, DU2, IPIV,
+     $                   WORK, N, INFO )
+         ELSE
+*
+*           Multiply by inv(L')*inv(U').
+*
+            CALL DGTTRS( 'Transpose', N, 1, DL, D, DU, DU2, IPIV, WORK,
+     $                   N, INFO )
+         END IF
+         GO TO 20
+      END IF
+*
+*     Compute the estimate of the reciprocal condition number.
+*
+      IF( AINVNM.NE.ZERO )
+     $   RCOND = ( ONE / AINVNM ) / ANORM
+*
+      RETURN
+*
+*     End of DGTCON
+*
+      END
diff --git a/costa/native/external/lapack/dgtrfs.f b/costa/native/external/lapack/dgtrfs.f
new file mode 100644
index 000000000..96915ec9d
--- /dev/null
+++ b/costa/native/external/lapack/dgtrfs.f
@@ -0,0 +1,357 @@
+      SUBROUTINE DGTRFS( TRANS, N, NRHS, DL, D, DU, DLF, DF, DUF, DU2,
+     $                   IPIV, B, LDB, X, LDX, FERR, BERR, WORK, IWORK,
+     $                   INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          TRANS
+      INTEGER            INFO, LDB, LDX, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * ), IWORK( * )
+      DOUBLE PRECISION   B( LDB, * ), BERR( * ), D( * ), DF( * ),
+     $                   DL( * ), DLF( * ), DU( * ), DU2( * ), DUF( * ),
+     $                   FERR( * ), WORK( * ), X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DGTRFS improves the computed solution to a system of linear
+*  equations when the coefficient matrix is tridiagonal, and provides
+*  error bounds and backward error estimates for the solution.
+*
+*  Arguments
+*  =========
+*
+*  TRANS   (input) CHARACTER*1
+*          Specifies the form of the system of equations:
+*          = 'N':  A * X = B     (No transpose)
+*          = 'T':  A**T * X = B  (Transpose)
+*          = 'C':  A**H * X = B  (Conjugate transpose = Transpose)
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  DL      (input) DOUBLE PRECISION array, dimension (N-1)
+*          The (n-1) subdiagonal elements of A.
+*
+*  D       (input) DOUBLE PRECISION array, dimension (N)
+*          The diagonal elements of A.
+*
+*  DU      (input) DOUBLE PRECISION array, dimension (N-1)
+*          The (n-1) superdiagonal elements of A.
+*
+*  DLF     (input) DOUBLE PRECISION array, dimension (N-1)
+*          The (n-1) multipliers that define the matrix L from the
+*          LU factorization of A as computed by DGTTRF.
+*
+*  DF      (input) DOUBLE PRECISION array, dimension (N)
+*          The n diagonal elements of the upper triangular matrix U from
+*          the LU factorization of A.
+*
+*  DUF     (input) DOUBLE PRECISION array, dimension (N-1)
+*          The (n-1) elements of the first superdiagonal of U.
+*
+*  DU2     (input) DOUBLE PRECISION array, dimension (N-2)
+*          The (n-2) elements of the second superdiagonal of U.
+*
+*  IPIV    (input) INTEGER array, dimension (N)
+*          The pivot indices; for 1 <= i <= n, row i of the matrix was
+*          interchanged with row IPIV(i).  IPIV(i) will always be either
+*          i or i+1; IPIV(i) = i indicates a row interchange was not
+*          required.
+*
+*  B       (input) DOUBLE PRECISION array, dimension (LDB,NRHS)
+*          The right hand side matrix B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  X       (input/output) DOUBLE PRECISION array, dimension (LDX,NRHS)
+*          On entry, the solution matrix X, as computed by DGTTRS.
+*          On exit, the improved solution matrix X.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(1,N).
+*
+*  FERR    (output) DOUBLE PRECISION array, dimension (NRHS)
+*          The estimated forward error bound for each solution vector
+*          X(j) (the j-th column of the solution matrix X).
+*          If XTRUE is the true solution corresponding to X(j), FERR(j)
+*          is an estimated upper bound for the magnitude of the largest
+*          element in (X(j) - XTRUE) divided by the magnitude of the
+*          largest element in X(j).  The estimate is as reliable as
+*          the estimate for RCOND, and is almost always a slight
+*          overestimate of the true error.
+*
+*  BERR    (output) DOUBLE PRECISION array, dimension (NRHS)
+*          The componentwise relative backward error of each solution
+*          vector X(j) (i.e., the smallest relative change in
+*          any element of A or B that makes X(j) an exact solution).
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (3*N)
+*
+*  IWORK   (workspace) INTEGER array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Internal Parameters
+*  ===================
+*
+*  ITMAX is the maximum number of steps of iterative refinement.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            ITMAX
+      PARAMETER          ( ITMAX = 5 )
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+      DOUBLE PRECISION   TWO
+      PARAMETER          ( TWO = 2.0D+0 )
+      DOUBLE PRECISION   THREE
+      PARAMETER          ( THREE = 3.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOTRAN
+      CHARACTER          TRANSN, TRANST
+      INTEGER            COUNT, I, J, KASE, NZ
+      DOUBLE PRECISION   EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DAXPY, DCOPY, DGTTRS, DLACON, DLAGTM, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DLAMCH
+      EXTERNAL           LSAME, DLAMCH
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      NOTRAN = LSAME( TRANS, 'N' )
+      IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT.
+     $    LSAME( TRANS, 'C' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -13
+      ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+         INFO = -15
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DGTRFS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN
+         DO 10 J = 1, NRHS
+            FERR( J ) = ZERO
+            BERR( J ) = ZERO
+   10    CONTINUE
+         RETURN
+      END IF
+*
+      IF( NOTRAN ) THEN
+         TRANSN = 'N'
+         TRANST = 'T'
+      ELSE
+         TRANSN = 'T'
+         TRANST = 'N'
+      END IF
+*
+*     NZ = maximum number of nonzero elements in each row of A, plus 1
+*
+      NZ = 4
+      EPS = DLAMCH( 'Epsilon' )
+      SAFMIN = DLAMCH( 'Safe minimum' )
+      SAFE1 = NZ*SAFMIN
+      SAFE2 = SAFE1 / EPS
+*
+*     Do for each right hand side
+*
+      DO 110 J = 1, NRHS
+*
+         COUNT = 1
+         LSTRES = THREE
+   20    CONTINUE
+*
+*        Loop until stopping criterion is satisfied.
+*
+*        Compute residual R = B - op(A) * X,
+*        where op(A) = A, A**T, or A**H, depending on TRANS.
+*
+         CALL DCOPY( N, B( 1, J ), 1, WORK( N+1 ), 1 )
+         CALL DLAGTM( TRANS, N, 1, -ONE, DL, D, DU, X( 1, J ), LDX, ONE,
+     $                WORK( N+1 ), N )
+*
+*        Compute abs(op(A))*abs(x) + abs(b) for use in the backward
+*        error bound.
+*
+         IF( NOTRAN ) THEN
+            IF( N.EQ.1 ) THEN
+               WORK( 1 ) = ABS( B( 1, J ) ) + ABS( D( 1 )*X( 1, J ) )
+            ELSE
+               WORK( 1 ) = ABS( B( 1, J ) ) + ABS( D( 1 )*X( 1, J ) ) +
+     $                     ABS( DU( 1 )*X( 2, J ) )
+               DO 30 I = 2, N - 1
+                  WORK( I ) = ABS( B( I, J ) ) +
+     $                        ABS( DL( I-1 )*X( I-1, J ) ) +
+     $                        ABS( D( I )*X( I, J ) ) +
+     $                        ABS( DU( I )*X( I+1, J ) )
+   30          CONTINUE
+               WORK( N ) = ABS( B( N, J ) ) +
+     $                     ABS( DL( N-1 )*X( N-1, J ) ) +
+     $                     ABS( D( N )*X( N, J ) )
+            END IF
+         ELSE
+            IF( N.EQ.1 ) THEN
+               WORK( 1 ) = ABS( B( 1, J ) ) + ABS( D( 1 )*X( 1, J ) )
+            ELSE
+               WORK( 1 ) = ABS( B( 1, J ) ) + ABS( D( 1 )*X( 1, J ) ) +
+     $                     ABS( DL( 1 )*X( 2, J ) )
+               DO 40 I = 2, N - 1
+                  WORK( I ) = ABS( B( I, J ) ) +
+     $                        ABS( DU( I-1 )*X( I-1, J ) ) +
+     $                        ABS( D( I )*X( I, J ) ) +
+     $                        ABS( DL( I )*X( I+1, J ) )
+   40          CONTINUE
+               WORK( N ) = ABS( B( N, J ) ) +
+     $                     ABS( DU( N-1 )*X( N-1, J ) ) +
+     $                     ABS( D( N )*X( N, J ) )
+            END IF
+         END IF
+*
+*        Compute componentwise relative backward error from formula
+*
+*        max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) )
+*
+*        where abs(Z) is the componentwise absolute value of the matrix
+*        or vector Z.  If the i-th component of the denominator is less
+*        than SAFE2, then SAFE1 is added to the i-th components of the
+*        numerator and denominator before dividing.
+*
+         S = ZERO
+         DO 50 I = 1, N
+            IF( WORK( I ).GT.SAFE2 ) THEN
+               S = MAX( S, ABS( WORK( N+I ) ) / WORK( I ) )
+            ELSE
+               S = MAX( S, ( ABS( WORK( N+I ) )+SAFE1 ) /
+     $             ( WORK( I )+SAFE1 ) )
+            END IF
+   50    CONTINUE
+         BERR( J ) = S
+*
+*        Test stopping criterion. Continue iterating if
+*           1) The residual BERR(J) is larger than machine epsilon, and
+*           2) BERR(J) decreased by at least a factor of 2 during the
+*              last iteration, and
+*           3) At most ITMAX iterations tried.
+*
+         IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND.
+     $       COUNT.LE.ITMAX ) THEN
+*
+*           Update solution and try again.
+*
+            CALL DGTTRS( TRANS, N, 1, DLF, DF, DUF, DU2, IPIV,
+     $                   WORK( N+1 ), N, INFO )
+            CALL DAXPY( N, ONE, WORK( N+1 ), 1, X( 1, J ), 1 )
+            LSTRES = BERR( J )
+            COUNT = COUNT + 1
+            GO TO 20
+         END IF
+*
+*        Bound error from formula
+*
+*        norm(X - XTRUE) / norm(X) .le. FERR =
+*        norm( abs(inv(op(A)))*
+*           ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X)
+*
+*        where
+*          norm(Z) is the magnitude of the largest component of Z
+*          inv(op(A)) is the inverse of op(A)
+*          abs(Z) is the componentwise absolute value of the matrix or
+*             vector Z
+*          NZ is the maximum number of nonzeros in any row of A, plus 1
+*          EPS is machine epsilon
+*
+*        The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B))
+*        is incremented by SAFE1 if the i-th component of
+*        abs(op(A))*abs(X) + abs(B) is less than SAFE2.
+*
+*        Use DLACON to estimate the infinity-norm of the matrix
+*           inv(op(A)) * diag(W),
+*        where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) )))
+*
+         DO 60 I = 1, N
+            IF( WORK( I ).GT.SAFE2 ) THEN
+               WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I )
+            ELSE
+               WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + SAFE1
+            END IF
+   60    CONTINUE
+*
+         KASE = 0
+   70    CONTINUE
+         CALL DLACON( N, WORK( 2*N+1 ), WORK( N+1 ), IWORK, FERR( J ),
+     $                KASE )
+         IF( KASE.NE.0 ) THEN
+            IF( KASE.EQ.1 ) THEN
+*
+*              Multiply by diag(W)*inv(op(A)**T).
+*
+               CALL DGTTRS( TRANST, N, 1, DLF, DF, DUF, DU2, IPIV,
+     $                      WORK( N+1 ), N, INFO )
+               DO 80 I = 1, N
+                  WORK( N+I ) = WORK( I )*WORK( N+I )
+   80          CONTINUE
+            ELSE
+*
+*              Multiply by inv(op(A))*diag(W).
+*
+               DO 90 I = 1, N
+                  WORK( N+I ) = WORK( I )*WORK( N+I )
+   90          CONTINUE
+               CALL DGTTRS( TRANSN, N, 1, DLF, DF, DUF, DU2, IPIV,
+     $                      WORK( N+1 ), N, INFO )
+            END IF
+            GO TO 70
+         END IF
+*
+*        Normalize error.
+*
+         LSTRES = ZERO
+         DO 100 I = 1, N
+            LSTRES = MAX( LSTRES, ABS( X( I, J ) ) )
+  100    CONTINUE
+         IF( LSTRES.NE.ZERO )
+     $      FERR( J ) = FERR( J ) / LSTRES
+*
+  110 CONTINUE
+*
+      RETURN
+*
+*     End of DGTRFS
+*
+      END
diff --git a/costa/native/external/lapack/dgtsv.f b/costa/native/external/lapack/dgtsv.f
new file mode 100644
index 000000000..dcf1c1970
--- /dev/null
+++ b/costa/native/external/lapack/dgtsv.f
@@ -0,0 +1,263 @@
+      SUBROUTINE DGTSV( N, NRHS, DL, D, DU, B, LDB, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDB, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   B( LDB, * ), D( * ), DL( * ), DU( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DGTSV  solves the equation
+*
+*     A*X = B,
+*
+*  where A is an n by n tridiagonal matrix, by Gaussian elimination with
+*  partial pivoting.
+*
+*  Note that the equation  A'*X = B  may be solved by interchanging the
+*  order of the arguments DU and DL.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  DL      (input/output) DOUBLE PRECISION array, dimension (N-1)
+*          On entry, DL must contain the (n-1) sub-diagonal elements of
+*          A.
+*
+*          On exit, DL is overwritten by the (n-2) elements of the
+*          second super-diagonal of the upper triangular matrix U from
+*          the LU factorization of A, in DL(1), ..., DL(n-2).
+*
+*  D       (input/output) DOUBLE PRECISION array, dimension (N)
+*          On entry, D must contain the diagonal elements of A.
+*
+*          On exit, D is overwritten by the n diagonal elements of U.
+*
+*  DU      (input/output) DOUBLE PRECISION array, dimension (N-1)
+*          On entry, DU must contain the (n-1) super-diagonal elements
+*          of A.
+*
+*          On exit, DU is overwritten by the (n-1) elements of the first
+*          super-diagonal of U.
+*
+*  B       (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS)
+*          On entry, the N by NRHS matrix of right hand side matrix B.
+*          On exit, if INFO = 0, the N by NRHS solution matrix X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*          > 0: if INFO = i, U(i,i) is exactly zero, and the solution
+*               has not been computed.  The factorization has not been
+*               completed unless i = N.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO
+      PARAMETER          ( ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J
+      DOUBLE PRECISION   FACT, TEMP
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+      IF( N.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DGTSV ', -INFO )
+         RETURN
+      END IF
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      IF( NRHS.EQ.1 ) THEN
+         DO 10 I = 1, N - 2
+            IF( ABS( D( I ) ).GE.ABS( DL( I ) ) ) THEN
+*
+*              No row interchange required
+*
+               IF( D( I ).NE.ZERO ) THEN
+                  FACT = DL( I ) / D( I )
+                  D( I+1 ) = D( I+1 ) - FACT*DU( I )
+                  B( I+1, 1 ) = B( I+1, 1 ) - FACT*B( I, 1 )
+               ELSE
+                  INFO = I
+                  RETURN
+               END IF
+               DL( I ) = ZERO
+            ELSE
+*
+*              Interchange rows I and I+1
+*
+               FACT = D( I ) / DL( I )
+               D( I ) = DL( I )
+               TEMP = D( I+1 )
+               D( I+1 ) = DU( I ) - FACT*TEMP
+               DL( I ) = DU( I+1 )
+               DU( I+1 ) = -FACT*DL( I )
+               DU( I ) = TEMP
+               TEMP = B( I, 1 )
+               B( I, 1 ) = B( I+1, 1 )
+               B( I+1, 1 ) = TEMP - FACT*B( I+1, 1 )
+            END IF
+   10    CONTINUE
+         IF( N.GT.1 ) THEN
+            I = N - 1
+            IF( ABS( D( I ) ).GE.ABS( DL( I ) ) ) THEN
+               IF( D( I ).NE.ZERO ) THEN
+                  FACT = DL( I ) / D( I )
+                  D( I+1 ) = D( I+1 ) - FACT*DU( I )
+                  B( I+1, 1 ) = B( I+1, 1 ) - FACT*B( I, 1 )
+               ELSE
+                  INFO = I
+                  RETURN
+               END IF
+            ELSE
+               FACT = D( I ) / DL( I )
+               D( I ) = DL( I )
+               TEMP = D( I+1 )
+               D( I+1 ) = DU( I ) - FACT*TEMP
+               DU( I ) = TEMP
+               TEMP = B( I, 1 )
+               B( I, 1 ) = B( I+1, 1 )
+               B( I+1, 1 ) = TEMP - FACT*B( I+1, 1 )
+            END IF
+         END IF
+         IF( D( N ).EQ.ZERO ) THEN
+            INFO = N
+            RETURN
+         END IF
+      ELSE
+         DO 40 I = 1, N - 2
+            IF( ABS( D( I ) ).GE.ABS( DL( I ) ) ) THEN
+*
+*              No row interchange required
+*
+               IF( D( I ).NE.ZERO ) THEN
+                  FACT = DL( I ) / D( I )
+                  D( I+1 ) = D( I+1 ) - FACT*DU( I )
+                  DO 20 J = 1, NRHS
+                     B( I+1, J ) = B( I+1, J ) - FACT*B( I, J )
+   20             CONTINUE
+               ELSE
+                  INFO = I
+                  RETURN
+               END IF
+               DL( I ) = ZERO
+            ELSE
+*
+*              Interchange rows I and I+1
+*
+               FACT = D( I ) / DL( I )
+               D( I ) = DL( I )
+               TEMP = D( I+1 )
+               D( I+1 ) = DU( I ) - FACT*TEMP
+               DL( I ) = DU( I+1 )
+               DU( I+1 ) = -FACT*DL( I )
+               DU( I ) = TEMP
+               DO 30 J = 1, NRHS
+                  TEMP = B( I, J )
+                  B( I, J ) = B( I+1, J )
+                  B( I+1, J ) = TEMP - FACT*B( I+1, J )
+   30          CONTINUE
+            END IF
+   40    CONTINUE
+         IF( N.GT.1 ) THEN
+            I = N - 1
+            IF( ABS( D( I ) ).GE.ABS( DL( I ) ) ) THEN
+               IF( D( I ).NE.ZERO ) THEN
+                  FACT = DL( I ) / D( I )
+                  D( I+1 ) = D( I+1 ) - FACT*DU( I )
+                  DO 50 J = 1, NRHS
+                     B( I+1, J ) = B( I+1, J ) - FACT*B( I, J )
+   50             CONTINUE
+               ELSE
+                  INFO = I
+                  RETURN
+               END IF
+            ELSE
+               FACT = D( I ) / DL( I )
+               D( I ) = DL( I )
+               TEMP = D( I+1 )
+               D( I+1 ) = DU( I ) - FACT*TEMP
+               DU( I ) = TEMP
+               DO 60 J = 1, NRHS
+                  TEMP = B( I, J )
+                  B( I, J ) = B( I+1, J )
+                  B( I+1, J ) = TEMP - FACT*B( I+1, J )
+   60          CONTINUE
+            END IF
+         END IF
+         IF( D( N ).EQ.ZERO ) THEN
+            INFO = N
+            RETURN
+         END IF
+      END IF
+*
+*     Back solve with the matrix U from the factorization.
+*
+      IF( NRHS.LE.2 ) THEN
+         J = 1
+   70    CONTINUE
+         B( N, J ) = B( N, J ) / D( N )
+         IF( N.GT.1 )
+     $      B( N-1, J ) = ( B( N-1, J )-DU( N-1 )*B( N, J ) ) / D( N-1 )
+         DO 80 I = N - 2, 1, -1
+            B( I, J ) = ( B( I, J )-DU( I )*B( I+1, J )-DL( I )*
+     $                  B( I+2, J ) ) / D( I )
+   80    CONTINUE
+         IF( J.LT.NRHS ) THEN
+            J = J + 1
+            GO TO 70
+         END IF
+      ELSE
+         DO 100 J = 1, NRHS
+            B( N, J ) = B( N, J ) / D( N )
+            IF( N.GT.1 )
+     $         B( N-1, J ) = ( B( N-1, J )-DU( N-1 )*B( N, J ) ) /
+     $                       D( N-1 )
+            DO 90 I = N - 2, 1, -1
+               B( I, J ) = ( B( I, J )-DU( I )*B( I+1, J )-DL( I )*
+     $                     B( I+2, J ) ) / D( I )
+   90       CONTINUE
+  100    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of DGTSV
+*
+      END
diff --git a/costa/native/external/lapack/dgtsvx.f b/costa/native/external/lapack/dgtsvx.f
new file mode 100644
index 000000000..f6d0abe68
--- /dev/null
+++ b/costa/native/external/lapack/dgtsvx.f
@@ -0,0 +1,293 @@
+      SUBROUTINE DGTSVX( FACT, TRANS, N, NRHS, DL, D, DU, DLF, DF, DUF,
+     $                   DU2, IPIV, B, LDB, X, LDX, RCOND, FERR, BERR,
+     $                   WORK, IWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          FACT, TRANS
+      INTEGER            INFO, LDB, LDX, N, NRHS
+      DOUBLE PRECISION   RCOND
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * ), IWORK( * )
+      DOUBLE PRECISION   B( LDB, * ), BERR( * ), D( * ), DF( * ),
+     $                   DL( * ), DLF( * ), DU( * ), DU2( * ), DUF( * ),
+     $                   FERR( * ), WORK( * ), X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DGTSVX uses the LU factorization to compute the solution to a real
+*  system of linear equations A * X = B or A**T * X = B,
+*  where A is a tridiagonal matrix of order N and X and B are N-by-NRHS
+*  matrices.
+*
+*  Error bounds on the solution and a condition estimate are also
+*  provided.
+*
+*  Description
+*  ===========
+*
+*  The following steps are performed:
+*
+*  1. If FACT = 'N', the LU decomposition is used to factor the matrix A
+*     as A = L * U, where L is a product of permutation and unit lower
+*     bidiagonal matrices and U is upper triangular with nonzeros in
+*     only the main diagonal and first two superdiagonals.
+*
+*  2. If some U(i,i)=0, so that U is exactly singular, then the routine
+*     returns with INFO = i. Otherwise, the factored form of A is used
+*     to estimate the condition number of the matrix A.  If the
+*     reciprocal of the condition number is less than machine precision,
+*     INFO = N+1 is returned as a warning, but the routine still goes on
+*     to solve for X and compute error bounds as described below.
+*
+*  3. The system of equations is solved for X using the factored form
+*     of A.
+*
+*  4. Iterative refinement is applied to improve the computed solution
+*     matrix and calculate error bounds and backward error estimates
+*     for it.
+*
+*  Arguments
+*  =========
+*
+*  FACT    (input) CHARACTER*1
+*          Specifies whether or not the factored form of A has been
+*          supplied on entry.
+*          = 'F':  DLF, DF, DUF, DU2, and IPIV contain the factored
+*                  form of A; DL, D, DU, DLF, DF, DUF, DU2 and IPIV
+*                  will not be modified.
+*          = 'N':  The matrix will be copied to DLF, DF, and DUF
+*                  and factored.
+*
+*  TRANS   (input) CHARACTER*1
+*          Specifies the form of the system of equations:
+*          = 'N':  A * X = B     (No transpose)
+*          = 'T':  A**T * X = B  (Transpose)
+*          = 'C':  A**H * X = B  (Conjugate transpose = Transpose)
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  DL      (input) DOUBLE PRECISION array, dimension (N-1)
+*          The (n-1) subdiagonal elements of A.
+*
+*  D       (input) DOUBLE PRECISION array, dimension (N)
+*          The n diagonal elements of A.
+*
+*  DU      (input) DOUBLE PRECISION array, dimension (N-1)
+*          The (n-1) superdiagonal elements of A.
+*
+*  DLF     (input or output) DOUBLE PRECISION array, dimension (N-1)
+*          If FACT = 'F', then DLF is an input argument and on entry
+*          contains the (n-1) multipliers that define the matrix L from
+*          the LU factorization of A as computed by DGTTRF.
+*
+*          If FACT = 'N', then DLF is an output argument and on exit
+*          contains the (n-1) multipliers that define the matrix L from
+*          the LU factorization of A.
+*
+*  DF      (input or output) DOUBLE PRECISION array, dimension (N)
+*          If FACT = 'F', then DF is an input argument and on entry
+*          contains the n diagonal elements of the upper triangular
+*          matrix U from the LU factorization of A.
+*
+*          If FACT = 'N', then DF is an output argument and on exit
+*          contains the n diagonal elements of the upper triangular
+*          matrix U from the LU factorization of A.
+*
+*  DUF     (input or output) DOUBLE PRECISION array, dimension (N-1)
+*          If FACT = 'F', then DUF is an input argument and on entry
+*          contains the (n-1) elements of the first superdiagonal of U.
+*
+*          If FACT = 'N', then DUF is an output argument and on exit
+*          contains the (n-1) elements of the first superdiagonal of U.
+*
+*  DU2     (input or output) DOUBLE PRECISION array, dimension (N-2)
+*          If FACT = 'F', then DU2 is an input argument and on entry
+*          contains the (n-2) elements of the second superdiagonal of
+*          U.
+*
+*          If FACT = 'N', then DU2 is an output argument and on exit
+*          contains the (n-2) elements of the second superdiagonal of
+*          U.
+*
+*  IPIV    (input or output) INTEGER array, dimension (N)
+*          If FACT = 'F', then IPIV is an input argument and on entry
+*          contains the pivot indices from the LU factorization of A as
+*          computed by DGTTRF.
+*
+*          If FACT = 'N', then IPIV is an output argument and on exit
+*          contains the pivot indices from the LU factorization of A;
+*          row i of the matrix was interchanged with row IPIV(i).
+*          IPIV(i) will always be either i or i+1; IPIV(i) = i indicates
+*          a row interchange was not required.
+*
+*  B       (input) DOUBLE PRECISION array, dimension (LDB,NRHS)
+*          The N-by-NRHS right hand side matrix B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  X       (output) DOUBLE PRECISION array, dimension (LDX,NRHS)
+*          If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(1,N).
+*
+*  RCOND   (output) DOUBLE PRECISION
+*          The estimate of the reciprocal condition number of the matrix
+*          A.  If RCOND is less than the machine precision (in
+*          particular, if RCOND = 0), the matrix is singular to working
+*          precision.  This condition is indicated by a return code of
+*          INFO > 0.
+*
+*  FERR    (output) DOUBLE PRECISION array, dimension (NRHS)
+*          The estimated forward error bound for each solution vector
+*          X(j) (the j-th column of the solution matrix X).
+*          If XTRUE is the true solution corresponding to X(j), FERR(j)
+*          is an estimated upper bound for the magnitude of the largest
+*          element in (X(j) - XTRUE) divided by the magnitude of the
+*          largest element in X(j).  The estimate is as reliable as
+*          the estimate for RCOND, and is almost always a slight
+*          overestimate of the true error.
+*
+*  BERR    (output) DOUBLE PRECISION array, dimension (NRHS)
+*          The componentwise relative backward error of each solution
+*          vector X(j) (i.e., the smallest relative change in
+*          any element of A or B that makes X(j) an exact solution).
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (3*N)
+*
+*  IWORK   (workspace) INTEGER array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, and i is
+*                <= N:  U(i,i) is exactly zero.  The factorization
+*                       has not been completed unless i = N, but the
+*                       factor U is exactly singular, so the solution
+*                       and error bounds could not be computed.
+*                       RCOND = 0 is returned.
+*                = N+1: U is nonsingular, but RCOND is less than machine
+*                       precision, meaning that the matrix is singular
+*                       to working precision.  Nevertheless, the
+*                       solution and error bounds are computed because
+*                       there are a number of situations where the
+*                       computed solution can be more accurate than the
+*                       value of RCOND would suggest.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO
+      PARAMETER          ( ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOFACT, NOTRAN
+      CHARACTER          NORM
+      DOUBLE PRECISION   ANORM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DLAMCH, DLANGT
+      EXTERNAL           LSAME, DLAMCH, DLANGT
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DCOPY, DGTCON, DGTRFS, DGTTRF, DGTTRS, DLACPY,
+     $                   XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+      NOFACT = LSAME( FACT, 'N' )
+      NOTRAN = LSAME( TRANS, 'N' )
+      IF( .NOT.NOFACT .AND. .NOT.LSAME( FACT, 'F' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT.
+     $         LSAME( TRANS, 'C' ) ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -14
+      ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+         INFO = -16
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DGTSVX', -INFO )
+         RETURN
+      END IF
+*
+      IF( NOFACT ) THEN
+*
+*        Compute the LU factorization of A.
+*
+         CALL DCOPY( N, D, 1, DF, 1 )
+         IF( N.GT.1 ) THEN
+            CALL DCOPY( N-1, DL, 1, DLF, 1 )
+            CALL DCOPY( N-1, DU, 1, DUF, 1 )
+         END IF
+         CALL DGTTRF( N, DLF, DF, DUF, DU2, IPIV, INFO )
+*
+*        Return if INFO is non-zero.
+*
+         IF( INFO.NE.0 ) THEN
+            IF( INFO.GT.0 )
+     $         RCOND = ZERO
+            RETURN
+         END IF
+      END IF
+*
+*     Compute the norm of the matrix A.
+*
+      IF( NOTRAN ) THEN
+         NORM = '1'
+      ELSE
+         NORM = 'I'
+      END IF
+      ANORM = DLANGT( NORM, N, DL, D, DU )
+*
+*     Compute the reciprocal of the condition number of A.
+*
+      CALL DGTCON( NORM, N, DLF, DF, DUF, DU2, IPIV, ANORM, RCOND, WORK,
+     $             IWORK, INFO )
+*
+*     Set INFO = N+1 if the matrix is singular to working precision.
+*
+      IF( RCOND.LT.DLAMCH( 'Epsilon' ) )
+     $   INFO = N + 1
+*
+*     Compute the solution vectors X.
+*
+      CALL DLACPY( 'Full', N, NRHS, B, LDB, X, LDX )
+      CALL DGTTRS( TRANS, N, NRHS, DLF, DF, DUF, DU2, IPIV, X, LDX,
+     $             INFO )
+*
+*     Use iterative refinement to improve the computed solutions and
+*     compute error bounds and backward error estimates for them.
+*
+      CALL DGTRFS( TRANS, N, NRHS, DL, D, DU, DLF, DF, DUF, DU2, IPIV,
+     $             B, LDB, X, LDX, FERR, BERR, WORK, IWORK, INFO )
+*
+      RETURN
+*
+*     End of DGTSVX
+*
+      END
diff --git a/costa/native/external/lapack/dgttrf.f b/costa/native/external/lapack/dgttrf.f
new file mode 100644
index 000000000..2aaa8fd61
--- /dev/null
+++ b/costa/native/external/lapack/dgttrf.f
@@ -0,0 +1,169 @@
+      SUBROUTINE DGTTRF( N, DL, D, DU, DU2, IPIV, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      DOUBLE PRECISION   D( * ), DL( * ), DU( * ), DU2( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DGTTRF computes an LU factorization of a real tridiagonal matrix A
+*  using elimination with partial pivoting and row interchanges.
+*
+*  The factorization has the form
+*     A = L * U
+*  where L is a product of permutation and unit lower bidiagonal
+*  matrices and U is upper triangular with nonzeros in only the main
+*  diagonal and first two superdiagonals.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.
+*
+*  DL      (input/output) DOUBLE PRECISION array, dimension (N-1)
+*          On entry, DL must contain the (n-1) sub-diagonal elements of
+*          A.
+*
+*          On exit, DL is overwritten by the (n-1) multipliers that
+*          define the matrix L from the LU factorization of A.
+*
+*  D       (input/output) DOUBLE PRECISION array, dimension (N)
+*          On entry, D must contain the diagonal elements of A.
+*
+*          On exit, D is overwritten by the n diagonal elements of the
+*          upper triangular matrix U from the LU factorization of A.
+*
+*  DU      (input/output) DOUBLE PRECISION array, dimension (N-1)
+*          On entry, DU must contain the (n-1) super-diagonal elements
+*          of A.
+*
+*          On exit, DU is overwritten by the (n-1) elements of the first
+*          super-diagonal of U.
+*
+*  DU2     (output) DOUBLE PRECISION array, dimension (N-2)
+*          On exit, DU2 is overwritten by the (n-2) elements of the
+*          second super-diagonal of U.
+*
+*  IPIV    (output) INTEGER array, dimension (N)
+*          The pivot indices; for 1 <= i <= n, row i of the matrix was
+*          interchanged with row IPIV(i).  IPIV(i) will always be either
+*          i or i+1; IPIV(i) = i indicates a row interchange was not
+*          required.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -k, the k-th argument had an illegal value
+*          > 0:  if INFO = k, U(k,k) is exactly zero. The factorization
+*                has been completed, but the factor U is exactly
+*                singular, and division by zero will occur if it is used
+*                to solve a system of equations.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO
+      PARAMETER          ( ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I
+      DOUBLE PRECISION   FACT, TEMP
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+      IF( N.LT.0 ) THEN
+         INFO = -1
+         CALL XERBLA( 'DGTTRF', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Initialize IPIV(i) = i and DU2(I) = 0
+*
+      DO 10 I = 1, N
+         IPIV( I ) = I
+   10 CONTINUE
+      DO 20 I = 1, N - 2
+         DU2( I ) = ZERO
+   20 CONTINUE
+*
+      DO 30 I = 1, N - 2
+         IF( ABS( D( I ) ).GE.ABS( DL( I ) ) ) THEN
+*
+*           No row interchange required, eliminate DL(I)
+*
+            IF( D( I ).NE.ZERO ) THEN
+               FACT = DL( I ) / D( I )
+               DL( I ) = FACT
+               D( I+1 ) = D( I+1 ) - FACT*DU( I )
+            END IF
+         ELSE
+*
+*           Interchange rows I and I+1, eliminate DL(I)
+*
+            FACT = D( I ) / DL( I )
+            D( I ) = DL( I )
+            DL( I ) = FACT
+            TEMP = DU( I )
+            DU( I ) = D( I+1 )
+            D( I+1 ) = TEMP - FACT*D( I+1 )
+            DU2( I ) = DU( I+1 )
+            DU( I+1 ) = -FACT*DU( I+1 )
+            IPIV( I ) = I + 1
+         END IF
+   30 CONTINUE
+      IF( N.GT.1 ) THEN
+         I = N - 1
+         IF( ABS( D( I ) ).GE.ABS( DL( I ) ) ) THEN
+            IF( D( I ).NE.ZERO ) THEN
+               FACT = DL( I ) / D( I )
+               DL( I ) = FACT
+               D( I+1 ) = D( I+1 ) - FACT*DU( I )
+            END IF
+         ELSE
+            FACT = D( I ) / DL( I )
+            D( I ) = DL( I )
+            DL( I ) = FACT
+            TEMP = DU( I )
+            DU( I ) = D( I+1 )
+            D( I+1 ) = TEMP - FACT*D( I+1 )
+            IPIV( I ) = I + 1
+         END IF
+      END IF
+*
+*     Check for a zero on the diagonal of U.
+*
+      DO 40 I = 1, N
+         IF( D( I ).EQ.ZERO ) THEN
+            INFO = I
+            GO TO 50
+         END IF
+   40 CONTINUE
+   50 CONTINUE
+*
+      RETURN
+*
+*     End of DGTTRF
+*
+      END
diff --git a/costa/native/external/lapack/dgttrs.f b/costa/native/external/lapack/dgttrs.f
new file mode 100644
index 000000000..2f5ad1694
--- /dev/null
+++ b/costa/native/external/lapack/dgttrs.f
@@ -0,0 +1,141 @@
+      SUBROUTINE DGTTRS( TRANS, N, NRHS, DL, D, DU, DU2, IPIV, B, LDB,
+     $                   INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          TRANS
+      INTEGER            INFO, LDB, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      DOUBLE PRECISION   B( LDB, * ), D( * ), DL( * ), DU( * ), DU2( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DGTTRS solves one of the systems of equations
+*     A*X = B  or  A'*X = B,
+*  with a tridiagonal matrix A using the LU factorization computed
+*  by DGTTRF.
+*
+*  Arguments
+*  =========
+*
+*  TRANS   (input) CHARACTER
+*          Specifies the form of the system of equations.
+*          = 'N':  A * X = B  (No transpose)
+*          = 'T':  A'* X = B  (Transpose)
+*          = 'C':  A'* X = B  (Conjugate transpose = Transpose)
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  DL      (input) DOUBLE PRECISION array, dimension (N-1)
+*          The (n-1) multipliers that define the matrix L from the
+*          LU factorization of A.
+*
+*  D       (input) DOUBLE PRECISION array, dimension (N)
+*          The n diagonal elements of the upper triangular matrix U from
+*          the LU factorization of A.
+*
+*  DU      (input) DOUBLE PRECISION array, dimension (N-1)
+*          The (n-1) elements of the first super-diagonal of U.
+*
+*  DU2     (input) DOUBLE PRECISION array, dimension (N-2)
+*          The (n-2) elements of the second super-diagonal of U.
+*
+*  IPIV    (input) INTEGER array, dimension (N)
+*          The pivot indices; for 1 <= i <= n, row i of the matrix was
+*          interchanged with row IPIV(i).  IPIV(i) will always be either
+*          i or i+1; IPIV(i) = i indicates a row interchange was not
+*          required.
+*
+*  B       (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS)
+*          On entry, the matrix of right hand side vectors B.
+*          On exit, B is overwritten by the solution vectors X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            NOTRAN
+      INTEGER            ITRANS, J, JB, NB
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DGTTS2, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+      NOTRAN = ( TRANS.EQ.'N' .OR. TRANS.EQ.'n' )
+      IF( .NOT.NOTRAN .AND. .NOT.( TRANS.EQ.'T' .OR. TRANS.EQ.
+     $    't' ) .AND. .NOT.( TRANS.EQ.'C' .OR. TRANS.EQ.'c' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDB.LT.MAX( N, 1 ) ) THEN
+         INFO = -10
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DGTTRS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 )
+     $   RETURN
+*
+*     Decode TRANS
+*
+      IF( NOTRAN ) THEN
+         ITRANS = 0
+      ELSE
+         ITRANS = 1
+      END IF
+*
+*     Determine the number of right-hand sides to solve at a time.
+*
+      IF( NRHS.EQ.1 ) THEN
+         NB = 1
+      ELSE
+         NB = MAX( 1, ILAENV( 1, 'DGTTRS', TRANS, N, NRHS, -1, -1 ) )
+      END IF
+*
+      IF( NB.GE.NRHS ) THEN
+         CALL DGTTS2( ITRANS, N, NRHS, DL, D, DU, DU2, IPIV, B, LDB )
+      ELSE
+         DO 10 J = 1, NRHS, NB
+            JB = MIN( NRHS-J+1, NB )
+            CALL DGTTS2( ITRANS, N, JB, DL, D, DU, DU2, IPIV, B( 1, J ),
+     $                   LDB )
+   10    CONTINUE
+      END IF
+*
+*     End of DGTTRS
+*
+      END
diff --git a/costa/native/external/lapack/dgtts2.f b/costa/native/external/lapack/dgtts2.f
new file mode 100644
index 000000000..67d9f9e68
--- /dev/null
+++ b/costa/native/external/lapack/dgtts2.f
@@ -0,0 +1,197 @@
+      SUBROUTINE DGTTS2( ITRANS, N, NRHS, DL, D, DU, DU2, IPIV, B, LDB )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            ITRANS, LDB, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      DOUBLE PRECISION   B( LDB, * ), D( * ), DL( * ), DU( * ), DU2( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DGTTS2 solves one of the systems of equations
+*     A*X = B  or  A'*X = B,
+*  with a tridiagonal matrix A using the LU factorization computed
+*  by DGTTRF.
+*
+*  Arguments
+*  =========
+*
+*  ITRANS  (input) INTEGER
+*          Specifies the form of the system of equations.
+*          = 0:  A * X = B  (No transpose)
+*          = 1:  A'* X = B  (Transpose)
+*          = 2:  A'* X = B  (Conjugate transpose = Transpose)
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  DL      (input) DOUBLE PRECISION array, dimension (N-1)
+*          The (n-1) multipliers that define the matrix L from the
+*          LU factorization of A.
+*
+*  D       (input) DOUBLE PRECISION array, dimension (N)
+*          The n diagonal elements of the upper triangular matrix U from
+*          the LU factorization of A.
+*
+*  DU      (input) DOUBLE PRECISION array, dimension (N-1)
+*          The (n-1) elements of the first super-diagonal of U.
+*
+*  DU2     (input) DOUBLE PRECISION array, dimension (N-2)
+*          The (n-2) elements of the second super-diagonal of U.
+*
+*  IPIV    (input) INTEGER array, dimension (N)
+*          The pivot indices; for 1 <= i <= n, row i of the matrix was
+*          interchanged with row IPIV(i).  IPIV(i) will always be either
+*          i or i+1; IPIV(i) = i indicates a row interchange was not
+*          required.
+*
+*  B       (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS)
+*          On entry, the matrix of right hand side vectors B.
+*          On exit, B is overwritten by the solution vectors X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      INTEGER            I, IP, J
+      DOUBLE PRECISION   TEMP
+*     ..
+*     .. Executable Statements ..
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 )
+     $   RETURN
+*
+      IF( ITRANS.EQ.0 ) THEN
+*
+*        Solve A*X = B using the LU factorization of A,
+*        overwriting each right hand side vector with its solution.
+*
+         IF( NRHS.LE.1 ) THEN
+            J = 1
+   10       CONTINUE
+*
+*           Solve L*x = b.
+*
+            DO 20 I = 1, N - 1
+               IP = IPIV( I )
+               TEMP = B( I+1-IP+I, J ) - DL( I )*B( IP, J )
+               B( I, J ) = B( IP, J )
+               B( I+1, J ) = TEMP
+   20       CONTINUE
+*
+*           Solve U*x = b.
+*
+            B( N, J ) = B( N, J ) / D( N )
+            IF( N.GT.1 )
+     $         B( N-1, J ) = ( B( N-1, J )-DU( N-1 )*B( N, J ) ) /
+     $                       D( N-1 )
+            DO 30 I = N - 2, 1, -1
+               B( I, J ) = ( B( I, J )-DU( I )*B( I+1, J )-DU2( I )*
+     $                     B( I+2, J ) ) / D( I )
+   30       CONTINUE
+            IF( J.LT.NRHS ) THEN
+               J = J + 1
+               GO TO 10
+            END IF
+         ELSE
+            DO 60 J = 1, NRHS
+*
+*              Solve L*x = b.
+*
+               DO 40 I = 1, N - 1
+                  IF( IPIV( I ).EQ.I ) THEN
+                     B( I+1, J ) = B( I+1, J ) - DL( I )*B( I, J )
+                  ELSE
+                     TEMP = B( I, J )
+                     B( I, J ) = B( I+1, J )
+                     B( I+1, J ) = TEMP - DL( I )*B( I, J )
+                  END IF
+   40          CONTINUE
+*
+*              Solve U*x = b.
+*
+               B( N, J ) = B( N, J ) / D( N )
+               IF( N.GT.1 )
+     $            B( N-1, J ) = ( B( N-1, J )-DU( N-1 )*B( N, J ) ) /
+     $                          D( N-1 )
+               DO 50 I = N - 2, 1, -1
+                  B( I, J ) = ( B( I, J )-DU( I )*B( I+1, J )-DU2( I )*
+     $                        B( I+2, J ) ) / D( I )
+   50          CONTINUE
+   60       CONTINUE
+         END IF
+      ELSE
+*
+*        Solve A' * X = B.
+*
+         IF( NRHS.LE.1 ) THEN
+*
+*           Solve U'*x = b.
+*
+            J = 1
+   70       CONTINUE
+            B( 1, J ) = B( 1, J ) / D( 1 )
+            IF( N.GT.1 )
+     $         B( 2, J ) = ( B( 2, J )-DU( 1 )*B( 1, J ) ) / D( 2 )
+            DO 80 I = 3, N
+               B( I, J ) = ( B( I, J )-DU( I-1 )*B( I-1, J )-DU2( I-2 )*
+     $                     B( I-2, J ) ) / D( I )
+   80       CONTINUE
+*
+*           Solve L'*x = b.
+*
+            DO 90 I = N - 1, 1, -1
+               IP = IPIV( I )
+               TEMP = B( I, J ) - DL( I )*B( I+1, J )
+               B( I, J ) = B( IP, J )
+               B( IP, J ) = TEMP
+   90       CONTINUE
+            IF( J.LT.NRHS ) THEN
+               J = J + 1
+               GO TO 70
+            END IF
+*
+         ELSE
+            DO 120 J = 1, NRHS
+*
+*              Solve U'*x = b.
+*
+               B( 1, J ) = B( 1, J ) / D( 1 )
+               IF( N.GT.1 )
+     $            B( 2, J ) = ( B( 2, J )-DU( 1 )*B( 1, J ) ) / D( 2 )
+               DO 100 I = 3, N
+                  B( I, J ) = ( B( I, J )-DU( I-1 )*B( I-1, J )-
+     $                        DU2( I-2 )*B( I-2, J ) ) / D( I )
+  100          CONTINUE
+               DO 110 I = N - 1, 1, -1
+                  IF( IPIV( I ).EQ.I ) THEN
+                     B( I, J ) = B( I, J ) - DL( I )*B( I+1, J )
+                  ELSE
+                     TEMP = B( I+1, J )
+                     B( I+1, J ) = B( I, J ) - DL( I )*TEMP
+                     B( I, J ) = TEMP
+                  END IF
+  110          CONTINUE
+  120       CONTINUE
+         END IF
+      END IF
+*
+*     End of DGTTS2
+*
+      END
diff --git a/costa/native/external/lapack/dhgeqz.f b/costa/native/external/lapack/dhgeqz.f
new file mode 100644
index 000000000..0e8af2e5e
--- /dev/null
+++ b/costa/native/external/lapack/dhgeqz.f
@@ -0,0 +1,1243 @@
+      SUBROUTINE DHGEQZ( JOB, COMPQ, COMPZ, N, ILO, IHI, A, LDA, B, LDB,
+     $                   ALPHAR, ALPHAI, BETA, Q, LDQ, Z, LDZ, WORK,
+     $                   LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          COMPQ, COMPZ, JOB
+      INTEGER            IHI, ILO, INFO, LDA, LDB, LDQ, LDZ, LWORK, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), ALPHAI( * ), ALPHAR( * ),
+     $                   B( LDB, * ), BETA( * ), Q( LDQ, * ), WORK( * ),
+     $                   Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DHGEQZ implements a single-/double-shift version of the QZ method for
+*  finding the generalized eigenvalues
+*
+*  w(j)=(ALPHAR(j) + i*ALPHAI(j))/BETAR(j)   of the equation
+*
+*       det( A - w(i) B ) = 0
+*
+*  In addition, the pair A,B may be reduced to generalized Schur form:
+*  B is upper triangular, and A is block upper triangular, where the
+*  diagonal blocks are either 1-by-1 or 2-by-2, the 2-by-2 blocks having
+*  complex generalized eigenvalues (see the description of the argument
+*  JOB.)
+*
+*  If JOB='S', then the pair (A,B) is simultaneously reduced to Schur
+*  form by applying one orthogonal tranformation (usually called Q) on
+*  the left and another (usually called Z) on the right.  The 2-by-2
+*  upper-triangular diagonal blocks of B corresponding to 2-by-2 blocks
+*  of A will be reduced to positive diagonal matrices.  (I.e.,
+*  if A(j+1,j) is non-zero, then B(j+1,j)=B(j,j+1)=0 and B(j,j) and
+*  B(j+1,j+1) will be positive.)
+*
+*  If JOB='E', then at each iteration, the same transformations
+*  are computed, but they are only applied to those parts of A and B
+*  which are needed to compute ALPHAR, ALPHAI, and BETAR.
+*
+*  If JOB='S' and COMPQ and COMPZ are 'V' or 'I', then the orthogonal
+*  transformations used to reduce (A,B) are accumulated into the arrays
+*  Q and Z s.t.:
+*
+*       Q(in) A(in) Z(in)* = Q(out) A(out) Z(out)*
+*       Q(in) B(in) Z(in)* = Q(out) B(out) Z(out)*
+*
+*  Ref: C.B. Moler & G.W. Stewart, "An Algorithm for Generalized Matrix
+*       Eigenvalue Problems", SIAM J. Numer. Anal., 10(1973),
+*       pp. 241--256.
+*
+*  Arguments
+*  =========
+*
+*  JOB     (input) CHARACTER*1
+*          = 'E': compute only ALPHAR, ALPHAI, and BETA.  A and B will
+*                 not necessarily be put into generalized Schur form.
+*          = 'S': put A and B into generalized Schur form, as well
+*                 as computing ALPHAR, ALPHAI, and BETA.
+*
+*  COMPQ   (input) CHARACTER*1
+*          = 'N': do not modify Q.
+*          = 'V': multiply the array Q on the right by the transpose of
+*                 the orthogonal tranformation that is applied to the
+*                 left side of A and B to reduce them to Schur form.
+*          = 'I': like COMPQ='V', except that Q will be initialized to
+*                 the identity first.
+*
+*  COMPZ   (input) CHARACTER*1
+*          = 'N': do not modify Z.
+*          = 'V': multiply the array Z on the right by the orthogonal
+*                 tranformation that is applied to the right side of
+*                 A and B to reduce them to Schur form.
+*          = 'I': like COMPZ='V', except that Z will be initialized to
+*                 the identity first.
+*
+*  N       (input) INTEGER
+*          The order of the matrices A, B, Q, and Z.  N >= 0.
+*
+*  ILO     (input) INTEGER
+*  IHI     (input) INTEGER
+*          It is assumed that A is already upper triangular in rows and
+*          columns 1:ILO-1 and IHI+1:N.
+*          1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA, N)
+*          On entry, the N-by-N upper Hessenberg matrix A.  Elements
+*          below the subdiagonal must be zero.
+*          If JOB='S', then on exit A and B will have been
+*             simultaneously reduced to generalized Schur form.
+*          If JOB='E', then on exit A will have been destroyed.
+*             The diagonal blocks will be correct, but the off-diagonal
+*             portion will be meaningless.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max( 1, N ).
+*
+*  B       (input/output) DOUBLE PRECISION array, dimension (LDB, N)
+*          On entry, the N-by-N upper triangular matrix B.  Elements
+*          below the diagonal must be zero.  2-by-2 blocks in B
+*          corresponding to 2-by-2 blocks in A will be reduced to
+*          positive diagonal form.  (I.e., if A(j+1,j) is non-zero,
+*          then B(j+1,j)=B(j,j+1)=0 and B(j,j) and B(j+1,j+1) will be
+*          positive.)
+*          If JOB='S', then on exit A and B will have been
+*             simultaneously reduced to Schur form.
+*          If JOB='E', then on exit B will have been destroyed.
+*             Elements corresponding to diagonal blocks of A will be
+*             correct, but the off-diagonal portion will be meaningless.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max( 1, N ).
+*
+*  ALPHAR  (output) DOUBLE PRECISION array, dimension (N)
+*          ALPHAR(1:N) will be set to real parts of the diagonal
+*          elements of A that would result from reducing A and B to
+*          Schur form and then further reducing them both to triangular
+*          form using unitary transformations s.t. the diagonal of B
+*          was non-negative real.  Thus, if A(j,j) is in a 1-by-1 block
+*          (i.e., A(j+1,j)=A(j,j+1)=0), then ALPHAR(j)=A(j,j).
+*          Note that the (real or complex) values
+*          (ALPHAR(j) + i*ALPHAI(j))/BETA(j), j=1,...,N, are the
+*          generalized eigenvalues of the matrix pencil A - wB.
+*
+*  ALPHAI  (output) DOUBLE PRECISION array, dimension (N)
+*          ALPHAI(1:N) will be set to imaginary parts of the diagonal
+*          elements of A that would result from reducing A and B to
+*          Schur form and then further reducing them both to triangular
+*          form using unitary transformations s.t. the diagonal of B
+*          was non-negative real.  Thus, if A(j,j) is in a 1-by-1 block
+*          (i.e., A(j+1,j)=A(j,j+1)=0), then ALPHAR(j)=0.
+*          Note that the (real or complex) values
+*          (ALPHAR(j) + i*ALPHAI(j))/BETA(j), j=1,...,N, are the
+*          generalized eigenvalues of the matrix pencil A - wB.
+*
+*  BETA    (output) DOUBLE PRECISION array, dimension (N)
+*          BETA(1:N) will be set to the (real) diagonal elements of B
+*          that would result from reducing A and B to Schur form and
+*          then further reducing them both to triangular form using
+*          unitary transformations s.t. the diagonal of B was
+*          non-negative real.  Thus, if A(j,j) is in a 1-by-1 block
+*          (i.e., A(j+1,j)=A(j,j+1)=0), then BETA(j)=B(j,j).
+*          Note that the (real or complex) values
+*          (ALPHAR(j) + i*ALPHAI(j))/BETA(j), j=1,...,N, are the
+*          generalized eigenvalues of the matrix pencil A - wB.
+*          (Note that BETA(1:N) will always be non-negative, and no
+*          BETAI is necessary.)
+*
+*  Q       (input/output) DOUBLE PRECISION array, dimension (LDQ, N)
+*          If COMPQ='N', then Q will not be referenced.
+*          If COMPQ='V' or 'I', then the transpose of the orthogonal
+*             transformations which are applied to A and B on the left
+*             will be applied to the array Q on the right.
+*
+*  LDQ     (input) INTEGER
+*          The leading dimension of the array Q.  LDQ >= 1.
+*          If COMPQ='V' or 'I', then LDQ >= N.
+*
+*  Z       (input/output) DOUBLE PRECISION array, dimension (LDZ, N)
+*          If COMPZ='N', then Z will not be referenced.
+*          If COMPZ='V' or 'I', then the orthogonal transformations
+*             which are applied to A and B on the right will be applied
+*             to the array Z on the right.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= 1.
+*          If COMPZ='V' or 'I', then LDZ >= N.
+*
+*  WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+*          On exit, if INFO >= 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.  LWORK >= max(1,N).
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*          = 1,...,N: the QZ iteration did not converge.  (A,B) is not
+*                     in Schur form, but ALPHAR(i), ALPHAI(i), and
+*                     BETA(i), i=INFO+1,...,N should be correct.
+*          = N+1,...,2*N: the shift calculation failed.  (A,B) is not
+*                     in Schur form, but ALPHAR(i), ALPHAI(i), and
+*                     BETA(i), i=INFO-N+1,...,N should be correct.
+*          > 2*N:     various "impossible" errors.
+*
+*  Further Details
+*  ===============
+*
+*  Iteration counters:
+*
+*  JITER  -- counts iterations.
+*  IITER  -- counts iterations run since ILAST was last
+*            changed.  This is therefore reset only when a 1-by-1 or
+*            2-by-2 block deflates off the bottom.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+*    $                     SAFETY = 1.0E+0 )
+      DOUBLE PRECISION   HALF, ZERO, ONE, SAFETY
+      PARAMETER          ( HALF = 0.5D+0, ZERO = 0.0D+0, ONE = 1.0D+0,
+     $                   SAFETY = 1.0D+2 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            ILAZR2, ILAZRO, ILPIVT, ILQ, ILSCHR, ILZ,
+     $                   LQUERY
+      INTEGER            ICOMPQ, ICOMPZ, IFIRST, IFRSTM, IITER, ILAST,
+     $                   ILASTM, IN, ISCHUR, ISTART, J, JC, JCH, JITER,
+     $                   JR, MAXIT
+      DOUBLE PRECISION   A11, A12, A1I, A1R, A21, A22, A2I, A2R, AD11,
+     $                   AD11L, AD12, AD12L, AD21, AD21L, AD22, AD22L,
+     $                   AD32L, AN, ANORM, ASCALE, ATOL, B11, B1A, B1I,
+     $                   B1R, B22, B2A, B2I, B2R, BN, BNORM, BSCALE,
+     $                   BTOL, C, C11I, C11R, C12, C21, C22I, C22R, CL,
+     $                   CQ, CR, CZ, ESHIFT, S, S1, S1INV, S2, SAFMAX,
+     $                   SAFMIN, SCALE, SL, SQI, SQR, SR, SZI, SZR, T,
+     $                   TAU, TEMP, TEMP2, TEMPI, TEMPR, U1, U12, U12L,
+     $                   U2, ULP, VS, W11, W12, W21, W22, WABS, WI, WR,
+     $                   WR2
+*     ..
+*     .. Local Arrays ..
+      DOUBLE PRECISION   V( 3 )
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DLAMCH, DLANHS, DLAPY2, DLAPY3
+      EXTERNAL           LSAME, DLAMCH, DLANHS, DLAPY2, DLAPY3
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLAG2, DLARFG, DLARTG, DLASET, DLASV2, DROT,
+     $                   XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode JOB, COMPQ, COMPZ
+*
+      IF( LSAME( JOB, 'E' ) ) THEN
+         ILSCHR = .FALSE.
+         ISCHUR = 1
+      ELSE IF( LSAME( JOB, 'S' ) ) THEN
+         ILSCHR = .TRUE.
+         ISCHUR = 2
+      ELSE
+         ISCHUR = 0
+      END IF
+*
+      IF( LSAME( COMPQ, 'N' ) ) THEN
+         ILQ = .FALSE.
+         ICOMPQ = 1
+      ELSE IF( LSAME( COMPQ, 'V' ) ) THEN
+         ILQ = .TRUE.
+         ICOMPQ = 2
+      ELSE IF( LSAME( COMPQ, 'I' ) ) THEN
+         ILQ = .TRUE.
+         ICOMPQ = 3
+      ELSE
+         ICOMPQ = 0
+      END IF
+*
+      IF( LSAME( COMPZ, 'N' ) ) THEN
+         ILZ = .FALSE.
+         ICOMPZ = 1
+      ELSE IF( LSAME( COMPZ, 'V' ) ) THEN
+         ILZ = .TRUE.
+         ICOMPZ = 2
+      ELSE IF( LSAME( COMPZ, 'I' ) ) THEN
+         ILZ = .TRUE.
+         ICOMPZ = 3
+      ELSE
+         ICOMPZ = 0
+      END IF
+*
+*     Check Argument Values
+*
+      INFO = 0
+      WORK( 1 ) = MAX( 1, N )
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( ISCHUR.EQ.0 ) THEN
+         INFO = -1
+      ELSE IF( ICOMPQ.EQ.0 ) THEN
+         INFO = -2
+      ELSE IF( ICOMPZ.EQ.0 ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( ILO.LT.1 ) THEN
+         INFO = -5
+      ELSE IF( IHI.GT.N .OR. IHI.LT.ILO-1 ) THEN
+         INFO = -6
+      ELSE IF( LDA.LT.N ) THEN
+         INFO = -8
+      ELSE IF( LDB.LT.N ) THEN
+         INFO = -10
+      ELSE IF( LDQ.LT.1 .OR. ( ILQ .AND. LDQ.LT.N ) ) THEN
+         INFO = -15
+      ELSE IF( LDZ.LT.1 .OR. ( ILZ .AND. LDZ.LT.N ) ) THEN
+         INFO = -17
+      ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN
+         INFO = -19
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DHGEQZ', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.LE.0 ) THEN
+         WORK( 1 ) = DBLE( 1 )
+         RETURN
+      END IF
+*
+*     Initialize Q and Z
+*
+      IF( ICOMPQ.EQ.3 )
+     $   CALL DLASET( 'Full', N, N, ZERO, ONE, Q, LDQ )
+      IF( ICOMPZ.EQ.3 )
+     $   CALL DLASET( 'Full', N, N, ZERO, ONE, Z, LDZ )
+*
+*     Machine Constants
+*
+      IN = IHI + 1 - ILO
+      SAFMIN = DLAMCH( 'S' )
+      SAFMAX = ONE / SAFMIN
+      ULP = DLAMCH( 'E' )*DLAMCH( 'B' )
+      ANORM = DLANHS( 'F', IN, A( ILO, ILO ), LDA, WORK )
+      BNORM = DLANHS( 'F', IN, B( ILO, ILO ), LDB, WORK )
+      ATOL = MAX( SAFMIN, ULP*ANORM )
+      BTOL = MAX( SAFMIN, ULP*BNORM )
+      ASCALE = ONE / MAX( SAFMIN, ANORM )
+      BSCALE = ONE / MAX( SAFMIN, BNORM )
+*
+*     Set Eigenvalues IHI+1:N
+*
+      DO 30 J = IHI + 1, N
+         IF( B( J, J ).LT.ZERO ) THEN
+            IF( ILSCHR ) THEN
+               DO 10 JR = 1, J
+                  A( JR, J ) = -A( JR, J )
+                  B( JR, J ) = -B( JR, J )
+   10          CONTINUE
+            ELSE
+               A( J, J ) = -A( J, J )
+               B( J, J ) = -B( J, J )
+            END IF
+            IF( ILZ ) THEN
+               DO 20 JR = 1, N
+                  Z( JR, J ) = -Z( JR, J )
+   20          CONTINUE
+            END IF
+         END IF
+         ALPHAR( J ) = A( J, J )
+         ALPHAI( J ) = ZERO
+         BETA( J ) = B( J, J )
+   30 CONTINUE
+*
+*     If IHI < ILO, skip QZ steps
+*
+      IF( IHI.LT.ILO )
+     $   GO TO 380
+*
+*     MAIN QZ ITERATION LOOP
+*
+*     Initialize dynamic indices
+*
+*     Eigenvalues ILAST+1:N have been found.
+*        Column operations modify rows IFRSTM:whatever.
+*        Row operations modify columns whatever:ILASTM.
+*
+*     If only eigenvalues are being computed, then
+*        IFRSTM is the row of the last splitting row above row ILAST;
+*        this is always at least ILO.
+*     IITER counts iterations since the last eigenvalue was found,
+*        to tell when to use an extraordinary shift.
+*     MAXIT is the maximum number of QZ sweeps allowed.
+*
+      ILAST = IHI
+      IF( ILSCHR ) THEN
+         IFRSTM = 1
+         ILASTM = N
+      ELSE
+         IFRSTM = ILO
+         ILASTM = IHI
+      END IF
+      IITER = 0
+      ESHIFT = ZERO
+      MAXIT = 30*( IHI-ILO+1 )
+*
+      DO 360 JITER = 1, MAXIT
+*
+*        Split the matrix if possible.
+*
+*        Two tests:
+*           1: A(j,j-1)=0  or  j=ILO
+*           2: B(j,j)=0
+*
+         IF( ILAST.EQ.ILO ) THEN
+*
+*           Special case: j=ILAST
+*
+            GO TO 80
+         ELSE
+            IF( ABS( A( ILAST, ILAST-1 ) ).LE.ATOL ) THEN
+               A( ILAST, ILAST-1 ) = ZERO
+               GO TO 80
+            END IF
+         END IF
+*
+         IF( ABS( B( ILAST, ILAST ) ).LE.BTOL ) THEN
+            B( ILAST, ILAST ) = ZERO
+            GO TO 70
+         END IF
+*
+*        General case: j<ILAST
+*
+         DO 60 J = ILAST - 1, ILO, -1
+*
+*           Test 1: for A(j,j-1)=0 or j=ILO
+*
+            IF( J.EQ.ILO ) THEN
+               ILAZRO = .TRUE.
+            ELSE
+               IF( ABS( A( J, J-1 ) ).LE.ATOL ) THEN
+                  A( J, J-1 ) = ZERO
+                  ILAZRO = .TRUE.
+               ELSE
+                  ILAZRO = .FALSE.
+               END IF
+            END IF
+*
+*           Test 2: for B(j,j)=0
+*
+            IF( ABS( B( J, J ) ).LT.BTOL ) THEN
+               B( J, J ) = ZERO
+*
+*              Test 1a: Check for 2 consecutive small subdiagonals in A
+*
+               ILAZR2 = .FALSE.
+               IF( .NOT.ILAZRO ) THEN
+                  TEMP = ABS( A( J, J-1 ) )
+                  TEMP2 = ABS( A( J, J ) )
+                  TEMPR = MAX( TEMP, TEMP2 )
+                  IF( TEMPR.LT.ONE .AND. TEMPR.NE.ZERO ) THEN
+                     TEMP = TEMP / TEMPR
+                     TEMP2 = TEMP2 / TEMPR
+                  END IF
+                  IF( TEMP*( ASCALE*ABS( A( J+1, J ) ) ).LE.TEMP2*
+     $                ( ASCALE*ATOL ) )ILAZR2 = .TRUE.
+               END IF
+*
+*              If both tests pass (1 & 2), i.e., the leading diagonal
+*              element of B in the block is zero, split a 1x1 block off
+*              at the top. (I.e., at the J-th row/column) The leading
+*              diagonal element of the remainder can also be zero, so
+*              this may have to be done repeatedly.
+*
+               IF( ILAZRO .OR. ILAZR2 ) THEN
+                  DO 40 JCH = J, ILAST - 1
+                     TEMP = A( JCH, JCH )
+                     CALL DLARTG( TEMP, A( JCH+1, JCH ), C, S,
+     $                            A( JCH, JCH ) )
+                     A( JCH+1, JCH ) = ZERO
+                     CALL DROT( ILASTM-JCH, A( JCH, JCH+1 ), LDA,
+     $                          A( JCH+1, JCH+1 ), LDA, C, S )
+                     CALL DROT( ILASTM-JCH, B( JCH, JCH+1 ), LDB,
+     $                          B( JCH+1, JCH+1 ), LDB, C, S )
+                     IF( ILQ )
+     $                  CALL DROT( N, Q( 1, JCH ), 1, Q( 1, JCH+1 ), 1,
+     $                             C, S )
+                     IF( ILAZR2 )
+     $                  A( JCH, JCH-1 ) = A( JCH, JCH-1 )*C
+                     ILAZR2 = .FALSE.
+                     IF( ABS( B( JCH+1, JCH+1 ) ).GE.BTOL ) THEN
+                        IF( JCH+1.GE.ILAST ) THEN
+                           GO TO 80
+                        ELSE
+                           IFIRST = JCH + 1
+                           GO TO 110
+                        END IF
+                     END IF
+                     B( JCH+1, JCH+1 ) = ZERO
+   40             CONTINUE
+                  GO TO 70
+               ELSE
+*
+*                 Only test 2 passed -- chase the zero to B(ILAST,ILAST)
+*                 Then process as in the case B(ILAST,ILAST)=0
+*
+                  DO 50 JCH = J, ILAST - 1
+                     TEMP = B( JCH, JCH+1 )
+                     CALL DLARTG( TEMP, B( JCH+1, JCH+1 ), C, S,
+     $                            B( JCH, JCH+1 ) )
+                     B( JCH+1, JCH+1 ) = ZERO
+                     IF( JCH.LT.ILASTM-1 )
+     $                  CALL DROT( ILASTM-JCH-1, B( JCH, JCH+2 ), LDB,
+     $                             B( JCH+1, JCH+2 ), LDB, C, S )
+                     CALL DROT( ILASTM-JCH+2, A( JCH, JCH-1 ), LDA,
+     $                          A( JCH+1, JCH-1 ), LDA, C, S )
+                     IF( ILQ )
+     $                  CALL DROT( N, Q( 1, JCH ), 1, Q( 1, JCH+1 ), 1,
+     $                             C, S )
+                     TEMP = A( JCH+1, JCH )
+                     CALL DLARTG( TEMP, A( JCH+1, JCH-1 ), C, S,
+     $                            A( JCH+1, JCH ) )
+                     A( JCH+1, JCH-1 ) = ZERO
+                     CALL DROT( JCH+1-IFRSTM, A( IFRSTM, JCH ), 1,
+     $                          A( IFRSTM, JCH-1 ), 1, C, S )
+                     CALL DROT( JCH-IFRSTM, B( IFRSTM, JCH ), 1,
+     $                          B( IFRSTM, JCH-1 ), 1, C, S )
+                     IF( ILZ )
+     $                  CALL DROT( N, Z( 1, JCH ), 1, Z( 1, JCH-1 ), 1,
+     $                             C, S )
+   50             CONTINUE
+                  GO TO 70
+               END IF
+            ELSE IF( ILAZRO ) THEN
+*
+*              Only test 1 passed -- work on J:ILAST
+*
+               IFIRST = J
+               GO TO 110
+            END IF
+*
+*           Neither test passed -- try next J
+*
+   60    CONTINUE
+*
+*        (Drop-through is "impossible")
+*
+         INFO = N + 1
+         GO TO 420
+*
+*        B(ILAST,ILAST)=0 -- clear A(ILAST,ILAST-1) to split off a
+*        1x1 block.
+*
+   70    CONTINUE
+         TEMP = A( ILAST, ILAST )
+         CALL DLARTG( TEMP, A( ILAST, ILAST-1 ), C, S,
+     $                A( ILAST, ILAST ) )
+         A( ILAST, ILAST-1 ) = ZERO
+         CALL DROT( ILAST-IFRSTM, A( IFRSTM, ILAST ), 1,
+     $              A( IFRSTM, ILAST-1 ), 1, C, S )
+         CALL DROT( ILAST-IFRSTM, B( IFRSTM, ILAST ), 1,
+     $              B( IFRSTM, ILAST-1 ), 1, C, S )
+         IF( ILZ )
+     $      CALL DROT( N, Z( 1, ILAST ), 1, Z( 1, ILAST-1 ), 1, C, S )
+*
+*        A(ILAST,ILAST-1)=0 -- Standardize B, set ALPHAR, ALPHAI,
+*                              and BETA
+*
+   80    CONTINUE
+         IF( B( ILAST, ILAST ).LT.ZERO ) THEN
+            IF( ILSCHR ) THEN
+               DO 90 J = IFRSTM, ILAST
+                  A( J, ILAST ) = -A( J, ILAST )
+                  B( J, ILAST ) = -B( J, ILAST )
+   90          CONTINUE
+            ELSE
+               A( ILAST, ILAST ) = -A( ILAST, ILAST )
+               B( ILAST, ILAST ) = -B( ILAST, ILAST )
+            END IF
+            IF( ILZ ) THEN
+               DO 100 J = 1, N
+                  Z( J, ILAST ) = -Z( J, ILAST )
+  100          CONTINUE
+            END IF
+         END IF
+         ALPHAR( ILAST ) = A( ILAST, ILAST )
+         ALPHAI( ILAST ) = ZERO
+         BETA( ILAST ) = B( ILAST, ILAST )
+*
+*        Go to next block -- exit if finished.
+*
+         ILAST = ILAST - 1
+         IF( ILAST.LT.ILO )
+     $      GO TO 380
+*
+*        Reset counters
+*
+         IITER = 0
+         ESHIFT = ZERO
+         IF( .NOT.ILSCHR ) THEN
+            ILASTM = ILAST
+            IF( IFRSTM.GT.ILAST )
+     $         IFRSTM = ILO
+         END IF
+         GO TO 350
+*
+*        QZ step
+*
+*        This iteration only involves rows/columns IFIRST:ILAST. We
+*        assume IFIRST < ILAST, and that the diagonal of B is non-zero.
+*
+  110    CONTINUE
+         IITER = IITER + 1
+         IF( .NOT.ILSCHR ) THEN
+            IFRSTM = IFIRST
+         END IF
+*
+*        Compute single shifts.
+*
+*        At this point, IFIRST < ILAST, and the diagonal elements of
+*        B(IFIRST:ILAST,IFIRST,ILAST) are larger than BTOL (in
+*        magnitude)
+*
+         IF( ( IITER / 10 )*10.EQ.IITER ) THEN
+*
+*           Exceptional shift.  Chosen for no particularly good reason.
+*           (Single shift only.)
+*
+            IF( ( DBLE( MAXIT )*SAFMIN )*ABS( A( ILAST-1, ILAST ) ).LT.
+     $          ABS( B( ILAST-1, ILAST-1 ) ) ) THEN
+               ESHIFT = ESHIFT + A( ILAST-1, ILAST ) /
+     $                  B( ILAST-1, ILAST-1 )
+            ELSE
+               ESHIFT = ESHIFT + ONE / ( SAFMIN*DBLE( MAXIT ) )
+            END IF
+            S1 = ONE
+            WR = ESHIFT
+*
+         ELSE
+*
+*           Shifts based on the generalized eigenvalues of the
+*           bottom-right 2x2 block of A and B. The first eigenvalue
+*           returned by DLAG2 is the Wilkinson shift (AEP p.512),
+*
+            CALL DLAG2( A( ILAST-1, ILAST-1 ), LDA,
+     $                  B( ILAST-1, ILAST-1 ), LDB, SAFMIN*SAFETY, S1,
+     $                  S2, WR, WR2, WI )
+*
+            TEMP = MAX( S1, SAFMIN*MAX( ONE, ABS( WR ), ABS( WI ) ) )
+            IF( WI.NE.ZERO )
+     $         GO TO 200
+         END IF
+*
+*        Fiddle with shift to avoid overflow
+*
+         TEMP = MIN( ASCALE, ONE )*( HALF*SAFMAX )
+         IF( S1.GT.TEMP ) THEN
+            SCALE = TEMP / S1
+         ELSE
+            SCALE = ONE
+         END IF
+*
+         TEMP = MIN( BSCALE, ONE )*( HALF*SAFMAX )
+         IF( ABS( WR ).GT.TEMP )
+     $      SCALE = MIN( SCALE, TEMP / ABS( WR ) )
+         S1 = SCALE*S1
+         WR = SCALE*WR
+*
+*        Now check for two consecutive small subdiagonals.
+*
+         DO 120 J = ILAST - 1, IFIRST + 1, -1
+            ISTART = J
+            TEMP = ABS( S1*A( J, J-1 ) )
+            TEMP2 = ABS( S1*A( J, J )-WR*B( J, J ) )
+            TEMPR = MAX( TEMP, TEMP2 )
+            IF( TEMPR.LT.ONE .AND. TEMPR.NE.ZERO ) THEN
+               TEMP = TEMP / TEMPR
+               TEMP2 = TEMP2 / TEMPR
+            END IF
+            IF( ABS( ( ASCALE*A( J+1, J ) )*TEMP ).LE.( ASCALE*ATOL )*
+     $          TEMP2 )GO TO 130
+  120    CONTINUE
+*
+         ISTART = IFIRST
+  130    CONTINUE
+*
+*        Do an implicit single-shift QZ sweep.
+*
+*        Initial Q
+*
+         TEMP = S1*A( ISTART, ISTART ) - WR*B( ISTART, ISTART )
+         TEMP2 = S1*A( ISTART+1, ISTART )
+         CALL DLARTG( TEMP, TEMP2, C, S, TEMPR )
+*
+*        Sweep
+*
+         DO 190 J = ISTART, ILAST - 1
+            IF( J.GT.ISTART ) THEN
+               TEMP = A( J, J-1 )
+               CALL DLARTG( TEMP, A( J+1, J-1 ), C, S, A( J, J-1 ) )
+               A( J+1, J-1 ) = ZERO
+            END IF
+*
+            DO 140 JC = J, ILASTM
+               TEMP = C*A( J, JC ) + S*A( J+1, JC )
+               A( J+1, JC ) = -S*A( J, JC ) + C*A( J+1, JC )
+               A( J, JC ) = TEMP
+               TEMP2 = C*B( J, JC ) + S*B( J+1, JC )
+               B( J+1, JC ) = -S*B( J, JC ) + C*B( J+1, JC )
+               B( J, JC ) = TEMP2
+  140       CONTINUE
+            IF( ILQ ) THEN
+               DO 150 JR = 1, N
+                  TEMP = C*Q( JR, J ) + S*Q( JR, J+1 )
+                  Q( JR, J+1 ) = -S*Q( JR, J ) + C*Q( JR, J+1 )
+                  Q( JR, J ) = TEMP
+  150          CONTINUE
+            END IF
+*
+            TEMP = B( J+1, J+1 )
+            CALL DLARTG( TEMP, B( J+1, J ), C, S, B( J+1, J+1 ) )
+            B( J+1, J ) = ZERO
+*
+            DO 160 JR = IFRSTM, MIN( J+2, ILAST )
+               TEMP = C*A( JR, J+1 ) + S*A( JR, J )
+               A( JR, J ) = -S*A( JR, J+1 ) + C*A( JR, J )
+               A( JR, J+1 ) = TEMP
+  160       CONTINUE
+            DO 170 JR = IFRSTM, J
+               TEMP = C*B( JR, J+1 ) + S*B( JR, J )
+               B( JR, J ) = -S*B( JR, J+1 ) + C*B( JR, J )
+               B( JR, J+1 ) = TEMP
+  170       CONTINUE
+            IF( ILZ ) THEN
+               DO 180 JR = 1, N
+                  TEMP = C*Z( JR, J+1 ) + S*Z( JR, J )
+                  Z( JR, J ) = -S*Z( JR, J+1 ) + C*Z( JR, J )
+                  Z( JR, J+1 ) = TEMP
+  180          CONTINUE
+            END IF
+  190    CONTINUE
+*
+         GO TO 350
+*
+*        Use Francis double-shift
+*
+*        Note: the Francis double-shift should work with real shifts,
+*              but only if the block is at least 3x3.
+*              This code may break if this point is reached with
+*              a 2x2 block with real eigenvalues.
+*
+  200    CONTINUE
+         IF( IFIRST+1.EQ.ILAST ) THEN
+*
+*           Special case -- 2x2 block with complex eigenvectors
+*
+*           Step 1: Standardize, that is, rotate so that
+*
+*                       ( B11  0  )
+*                   B = (         )  with B11 non-negative.
+*                       (  0  B22 )
+*
+            CALL DLASV2( B( ILAST-1, ILAST-1 ), B( ILAST-1, ILAST ),
+     $                   B( ILAST, ILAST ), B22, B11, SR, CR, SL, CL )
+*
+            IF( B11.LT.ZERO ) THEN
+               CR = -CR
+               SR = -SR
+               B11 = -B11
+               B22 = -B22
+            END IF
+*
+            CALL DROT( ILASTM+1-IFIRST, A( ILAST-1, ILAST-1 ), LDA,
+     $                 A( ILAST, ILAST-1 ), LDA, CL, SL )
+            CALL DROT( ILAST+1-IFRSTM, A( IFRSTM, ILAST-1 ), 1,
+     $                 A( IFRSTM, ILAST ), 1, CR, SR )
+*
+            IF( ILAST.LT.ILASTM )
+     $         CALL DROT( ILASTM-ILAST, B( ILAST-1, ILAST+1 ), LDB,
+     $                    B( ILAST, ILAST+1 ), LDA, CL, SL )
+            IF( IFRSTM.LT.ILAST-1 )
+     $         CALL DROT( IFIRST-IFRSTM, B( IFRSTM, ILAST-1 ), 1,
+     $                    B( IFRSTM, ILAST ), 1, CR, SR )
+*
+            IF( ILQ )
+     $         CALL DROT( N, Q( 1, ILAST-1 ), 1, Q( 1, ILAST ), 1, CL,
+     $                    SL )
+            IF( ILZ )
+     $         CALL DROT( N, Z( 1, ILAST-1 ), 1, Z( 1, ILAST ), 1, CR,
+     $                    SR )
+*
+            B( ILAST-1, ILAST-1 ) = B11
+            B( ILAST-1, ILAST ) = ZERO
+            B( ILAST, ILAST-1 ) = ZERO
+            B( ILAST, ILAST ) = B22
+*
+*           If B22 is negative, negate column ILAST
+*
+            IF( B22.LT.ZERO ) THEN
+               DO 210 J = IFRSTM, ILAST
+                  A( J, ILAST ) = -A( J, ILAST )
+                  B( J, ILAST ) = -B( J, ILAST )
+  210          CONTINUE
+*
+               IF( ILZ ) THEN
+                  DO 220 J = 1, N
+                     Z( J, ILAST ) = -Z( J, ILAST )
+  220             CONTINUE
+               END IF
+            END IF
+*
+*           Step 2: Compute ALPHAR, ALPHAI, and BETA (see refs.)
+*
+*           Recompute shift
+*
+            CALL DLAG2( A( ILAST-1, ILAST-1 ), LDA,
+     $                  B( ILAST-1, ILAST-1 ), LDB, SAFMIN*SAFETY, S1,
+     $                  TEMP, WR, TEMP2, WI )
+*
+*           If standardization has perturbed the shift onto real line,
+*           do another (real single-shift) QR step.
+*
+            IF( WI.EQ.ZERO )
+     $         GO TO 350
+            S1INV = ONE / S1
+*
+*           Do EISPACK (QZVAL) computation of alpha and beta
+*
+            A11 = A( ILAST-1, ILAST-1 )
+            A21 = A( ILAST, ILAST-1 )
+            A12 = A( ILAST-1, ILAST )
+            A22 = A( ILAST, ILAST )
+*
+*           Compute complex Givens rotation on right
+*           (Assume some element of C = (sA - wB) > unfl )
+*                            __
+*           (sA - wB) ( CZ   -SZ )
+*                     ( SZ    CZ )
+*
+            C11R = S1*A11 - WR*B11
+            C11I = -WI*B11
+            C12 = S1*A12
+            C21 = S1*A21
+            C22R = S1*A22 - WR*B22
+            C22I = -WI*B22
+*
+            IF( ABS( C11R )+ABS( C11I )+ABS( C12 ).GT.ABS( C21 )+
+     $          ABS( C22R )+ABS( C22I ) ) THEN
+               T = DLAPY3( C12, C11R, C11I )
+               CZ = C12 / T
+               SZR = -C11R / T
+               SZI = -C11I / T
+            ELSE
+               CZ = DLAPY2( C22R, C22I )
+               IF( CZ.LE.SAFMIN ) THEN
+                  CZ = ZERO
+                  SZR = ONE
+                  SZI = ZERO
+               ELSE
+                  TEMPR = C22R / CZ
+                  TEMPI = C22I / CZ
+                  T = DLAPY2( CZ, C21 )
+                  CZ = CZ / T
+                  SZR = -C21*TEMPR / T
+                  SZI = C21*TEMPI / T
+               END IF
+            END IF
+*
+*           Compute Givens rotation on left
+*
+*           (  CQ   SQ )
+*           (  __      )  A or B
+*           ( -SQ   CQ )
+*
+            AN = ABS( A11 ) + ABS( A12 ) + ABS( A21 ) + ABS( A22 )
+            BN = ABS( B11 ) + ABS( B22 )
+            WABS = ABS( WR ) + ABS( WI )
+            IF( S1*AN.GT.WABS*BN ) THEN
+               CQ = CZ*B11
+               SQR = SZR*B22
+               SQI = -SZI*B22
+            ELSE
+               A1R = CZ*A11 + SZR*A12
+               A1I = SZI*A12
+               A2R = CZ*A21 + SZR*A22
+               A2I = SZI*A22
+               CQ = DLAPY2( A1R, A1I )
+               IF( CQ.LE.SAFMIN ) THEN
+                  CQ = ZERO
+                  SQR = ONE
+                  SQI = ZERO
+               ELSE
+                  TEMPR = A1R / CQ
+                  TEMPI = A1I / CQ
+                  SQR = TEMPR*A2R + TEMPI*A2I
+                  SQI = TEMPI*A2R - TEMPR*A2I
+               END IF
+            END IF
+            T = DLAPY3( CQ, SQR, SQI )
+            CQ = CQ / T
+            SQR = SQR / T
+            SQI = SQI / T
+*
+*           Compute diagonal elements of QBZ
+*
+            TEMPR = SQR*SZR - SQI*SZI
+            TEMPI = SQR*SZI + SQI*SZR
+            B1R = CQ*CZ*B11 + TEMPR*B22
+            B1I = TEMPI*B22
+            B1A = DLAPY2( B1R, B1I )
+            B2R = CQ*CZ*B22 + TEMPR*B11
+            B2I = -TEMPI*B11
+            B2A = DLAPY2( B2R, B2I )
+*
+*           Normalize so beta > 0, and Im( alpha1 ) > 0
+*
+            BETA( ILAST-1 ) = B1A
+            BETA( ILAST ) = B2A
+            ALPHAR( ILAST-1 ) = ( WR*B1A )*S1INV
+            ALPHAI( ILAST-1 ) = ( WI*B1A )*S1INV
+            ALPHAR( ILAST ) = ( WR*B2A )*S1INV
+            ALPHAI( ILAST ) = -( WI*B2A )*S1INV
+*
+*           Step 3: Go to next block -- exit if finished.
+*
+            ILAST = IFIRST - 1
+            IF( ILAST.LT.ILO )
+     $         GO TO 380
+*
+*           Reset counters
+*
+            IITER = 0
+            ESHIFT = ZERO
+            IF( .NOT.ILSCHR ) THEN
+               ILASTM = ILAST
+               IF( IFRSTM.GT.ILAST )
+     $            IFRSTM = ILO
+            END IF
+            GO TO 350
+         ELSE
+*
+*           Usual case: 3x3 or larger block, using Francis implicit
+*                       double-shift
+*
+*                                    2
+*           Eigenvalue equation is  w  - c w + d = 0,
+*
+*                                         -1 2        -1
+*           so compute 1st column of  (A B  )  - c A B   + d
+*           using the formula in QZIT (from EISPACK)
+*
+*           We assume that the block is at least 3x3
+*
+            AD11 = ( ASCALE*A( ILAST-1, ILAST-1 ) ) /
+     $             ( BSCALE*B( ILAST-1, ILAST-1 ) )
+            AD21 = ( ASCALE*A( ILAST, ILAST-1 ) ) /
+     $             ( BSCALE*B( ILAST-1, ILAST-1 ) )
+            AD12 = ( ASCALE*A( ILAST-1, ILAST ) ) /
+     $             ( BSCALE*B( ILAST, ILAST ) )
+            AD22 = ( ASCALE*A( ILAST, ILAST ) ) /
+     $             ( BSCALE*B( ILAST, ILAST ) )
+            U12 = B( ILAST-1, ILAST ) / B( ILAST, ILAST )
+            AD11L = ( ASCALE*A( IFIRST, IFIRST ) ) /
+     $              ( BSCALE*B( IFIRST, IFIRST ) )
+            AD21L = ( ASCALE*A( IFIRST+1, IFIRST ) ) /
+     $              ( BSCALE*B( IFIRST, IFIRST ) )
+            AD12L = ( ASCALE*A( IFIRST, IFIRST+1 ) ) /
+     $              ( BSCALE*B( IFIRST+1, IFIRST+1 ) )
+            AD22L = ( ASCALE*A( IFIRST+1, IFIRST+1 ) ) /
+     $              ( BSCALE*B( IFIRST+1, IFIRST+1 ) )
+            AD32L = ( ASCALE*A( IFIRST+2, IFIRST+1 ) ) /
+     $              ( BSCALE*B( IFIRST+1, IFIRST+1 ) )
+            U12L = B( IFIRST, IFIRST+1 ) / B( IFIRST+1, IFIRST+1 )
+*
+            V( 1 ) = ( AD11-AD11L )*( AD22-AD11L ) - AD12*AD21 +
+     $               AD21*U12*AD11L + ( AD12L-AD11L*U12L )*AD21L
+            V( 2 ) = ( ( AD22L-AD11L )-AD21L*U12L-( AD11-AD11L )-
+     $               ( AD22-AD11L )+AD21*U12 )*AD21L
+            V( 3 ) = AD32L*AD21L
+*
+            ISTART = IFIRST
+*
+            CALL DLARFG( 3, V( 1 ), V( 2 ), 1, TAU )
+            V( 1 ) = ONE
+*
+*           Sweep
+*
+            DO 290 J = ISTART, ILAST - 2
+*
+*              All but last elements: use 3x3 Householder transforms.
+*
+*              Zero (j-1)st column of A
+*
+               IF( J.GT.ISTART ) THEN
+                  V( 1 ) = A( J, J-1 )
+                  V( 2 ) = A( J+1, J-1 )
+                  V( 3 ) = A( J+2, J-1 )
+*
+                  CALL DLARFG( 3, A( J, J-1 ), V( 2 ), 1, TAU )
+                  V( 1 ) = ONE
+                  A( J+1, J-1 ) = ZERO
+                  A( J+2, J-1 ) = ZERO
+               END IF
+*
+               DO 230 JC = J, ILASTM
+                  TEMP = TAU*( A( J, JC )+V( 2 )*A( J+1, JC )+V( 3 )*
+     $                   A( J+2, JC ) )
+                  A( J, JC ) = A( J, JC ) - TEMP
+                  A( J+1, JC ) = A( J+1, JC ) - TEMP*V( 2 )
+                  A( J+2, JC ) = A( J+2, JC ) - TEMP*V( 3 )
+                  TEMP2 = TAU*( B( J, JC )+V( 2 )*B( J+1, JC )+V( 3 )*
+     $                    B( J+2, JC ) )
+                  B( J, JC ) = B( J, JC ) - TEMP2
+                  B( J+1, JC ) = B( J+1, JC ) - TEMP2*V( 2 )
+                  B( J+2, JC ) = B( J+2, JC ) - TEMP2*V( 3 )
+  230          CONTINUE
+               IF( ILQ ) THEN
+                  DO 240 JR = 1, N
+                     TEMP = TAU*( Q( JR, J )+V( 2 )*Q( JR, J+1 )+V( 3 )*
+     $                      Q( JR, J+2 ) )
+                     Q( JR, J ) = Q( JR, J ) - TEMP
+                     Q( JR, J+1 ) = Q( JR, J+1 ) - TEMP*V( 2 )
+                     Q( JR, J+2 ) = Q( JR, J+2 ) - TEMP*V( 3 )
+  240             CONTINUE
+               END IF
+*
+*              Zero j-th column of B (see DLAGBC for details)
+*
+*              Swap rows to pivot
+*
+               ILPIVT = .FALSE.
+               TEMP = MAX( ABS( B( J+1, J+1 ) ), ABS( B( J+1, J+2 ) ) )
+               TEMP2 = MAX( ABS( B( J+2, J+1 ) ), ABS( B( J+2, J+2 ) ) )
+               IF( MAX( TEMP, TEMP2 ).LT.SAFMIN ) THEN
+                  SCALE = ZERO
+                  U1 = ONE
+                  U2 = ZERO
+                  GO TO 250
+               ELSE IF( TEMP.GE.TEMP2 ) THEN
+                  W11 = B( J+1, J+1 )
+                  W21 = B( J+2, J+1 )
+                  W12 = B( J+1, J+2 )
+                  W22 = B( J+2, J+2 )
+                  U1 = B( J+1, J )
+                  U2 = B( J+2, J )
+               ELSE
+                  W21 = B( J+1, J+1 )
+                  W11 = B( J+2, J+1 )
+                  W22 = B( J+1, J+2 )
+                  W12 = B( J+2, J+2 )
+                  U2 = B( J+1, J )
+                  U1 = B( J+2, J )
+               END IF
+*
+*              Swap columns if nec.
+*
+               IF( ABS( W12 ).GT.ABS( W11 ) ) THEN
+                  ILPIVT = .TRUE.
+                  TEMP = W12
+                  TEMP2 = W22
+                  W12 = W11
+                  W22 = W21
+                  W11 = TEMP
+                  W21 = TEMP2
+               END IF
+*
+*              LU-factor
+*
+               TEMP = W21 / W11
+               U2 = U2 - TEMP*U1
+               W22 = W22 - TEMP*W12
+               W21 = ZERO
+*
+*              Compute SCALE
+*
+               SCALE = ONE
+               IF( ABS( W22 ).LT.SAFMIN ) THEN
+                  SCALE = ZERO
+                  U2 = ONE
+                  U1 = -W12 / W11
+                  GO TO 250
+               END IF
+               IF( ABS( W22 ).LT.ABS( U2 ) )
+     $            SCALE = ABS( W22 / U2 )
+               IF( ABS( W11 ).LT.ABS( U1 ) )
+     $            SCALE = MIN( SCALE, ABS( W11 / U1 ) )
+*
+*              Solve
+*
+               U2 = ( SCALE*U2 ) / W22
+               U1 = ( SCALE*U1-W12*U2 ) / W11
+*
+  250          CONTINUE
+               IF( ILPIVT ) THEN
+                  TEMP = U2
+                  U2 = U1
+                  U1 = TEMP
+               END IF
+*
+*              Compute Householder Vector
+*
+               T = SQRT( SCALE**2+U1**2+U2**2 )
+               TAU = ONE + SCALE / T
+               VS = -ONE / ( SCALE+T )
+               V( 1 ) = ONE
+               V( 2 ) = VS*U1
+               V( 3 ) = VS*U2
+*
+*              Apply transformations from the right.
+*
+               DO 260 JR = IFRSTM, MIN( J+3, ILAST )
+                  TEMP = TAU*( A( JR, J )+V( 2 )*A( JR, J+1 )+V( 3 )*
+     $                   A( JR, J+2 ) )
+                  A( JR, J ) = A( JR, J ) - TEMP
+                  A( JR, J+1 ) = A( JR, J+1 ) - TEMP*V( 2 )
+                  A( JR, J+2 ) = A( JR, J+2 ) - TEMP*V( 3 )
+  260          CONTINUE
+               DO 270 JR = IFRSTM, J + 2
+                  TEMP = TAU*( B( JR, J )+V( 2 )*B( JR, J+1 )+V( 3 )*
+     $                   B( JR, J+2 ) )
+                  B( JR, J ) = B( JR, J ) - TEMP
+                  B( JR, J+1 ) = B( JR, J+1 ) - TEMP*V( 2 )
+                  B( JR, J+2 ) = B( JR, J+2 ) - TEMP*V( 3 )
+  270          CONTINUE
+               IF( ILZ ) THEN
+                  DO 280 JR = 1, N
+                     TEMP = TAU*( Z( JR, J )+V( 2 )*Z( JR, J+1 )+V( 3 )*
+     $                      Z( JR, J+2 ) )
+                     Z( JR, J ) = Z( JR, J ) - TEMP
+                     Z( JR, J+1 ) = Z( JR, J+1 ) - TEMP*V( 2 )
+                     Z( JR, J+2 ) = Z( JR, J+2 ) - TEMP*V( 3 )
+  280             CONTINUE
+               END IF
+               B( J+1, J ) = ZERO
+               B( J+2, J ) = ZERO
+  290       CONTINUE
+*
+*           Last elements: Use Givens rotations
+*
+*           Rotations from the left
+*
+            J = ILAST - 1
+            TEMP = A( J, J-1 )
+            CALL DLARTG( TEMP, A( J+1, J-1 ), C, S, A( J, J-1 ) )
+            A( J+1, J-1 ) = ZERO
+*
+            DO 300 JC = J, ILASTM
+               TEMP = C*A( J, JC ) + S*A( J+1, JC )
+               A( J+1, JC ) = -S*A( J, JC ) + C*A( J+1, JC )
+               A( J, JC ) = TEMP
+               TEMP2 = C*B( J, JC ) + S*B( J+1, JC )
+               B( J+1, JC ) = -S*B( J, JC ) + C*B( J+1, JC )
+               B( J, JC ) = TEMP2
+  300       CONTINUE
+            IF( ILQ ) THEN
+               DO 310 JR = 1, N
+                  TEMP = C*Q( JR, J ) + S*Q( JR, J+1 )
+                  Q( JR, J+1 ) = -S*Q( JR, J ) + C*Q( JR, J+1 )
+                  Q( JR, J ) = TEMP
+  310          CONTINUE
+            END IF
+*
+*           Rotations from the right.
+*
+            TEMP = B( J+1, J+1 )
+            CALL DLARTG( TEMP, B( J+1, J ), C, S, B( J+1, J+1 ) )
+            B( J+1, J ) = ZERO
+*
+            DO 320 JR = IFRSTM, ILAST
+               TEMP = C*A( JR, J+1 ) + S*A( JR, J )
+               A( JR, J ) = -S*A( JR, J+1 ) + C*A( JR, J )
+               A( JR, J+1 ) = TEMP
+  320       CONTINUE
+            DO 330 JR = IFRSTM, ILAST - 1
+               TEMP = C*B( JR, J+1 ) + S*B( JR, J )
+               B( JR, J ) = -S*B( JR, J+1 ) + C*B( JR, J )
+               B( JR, J+1 ) = TEMP
+  330       CONTINUE
+            IF( ILZ ) THEN
+               DO 340 JR = 1, N
+                  TEMP = C*Z( JR, J+1 ) + S*Z( JR, J )
+                  Z( JR, J ) = -S*Z( JR, J+1 ) + C*Z( JR, J )
+                  Z( JR, J+1 ) = TEMP
+  340          CONTINUE
+            END IF
+*
+*           End of Double-Shift code
+*
+         END IF
+*
+         GO TO 350
+*
+*        End of iteration loop
+*
+  350    CONTINUE
+  360 CONTINUE
+*
+*     Drop-through = non-convergence
+*
+  370 CONTINUE
+      INFO = ILAST
+      GO TO 420
+*
+*     Successful completion of all QZ steps
+*
+  380 CONTINUE
+*
+*     Set Eigenvalues 1:ILO-1
+*
+      DO 410 J = 1, ILO - 1
+         IF( B( J, J ).LT.ZERO ) THEN
+            IF( ILSCHR ) THEN
+               DO 390 JR = 1, J
+                  A( JR, J ) = -A( JR, J )
+                  B( JR, J ) = -B( JR, J )
+  390          CONTINUE
+            ELSE
+               A( J, J ) = -A( J, J )
+               B( J, J ) = -B( J, J )
+            END IF
+            IF( ILZ ) THEN
+               DO 400 JR = 1, N
+                  Z( JR, J ) = -Z( JR, J )
+  400          CONTINUE
+            END IF
+         END IF
+         ALPHAR( J ) = A( J, J )
+         ALPHAI( J ) = ZERO
+         BETA( J ) = B( J, J )
+  410 CONTINUE
+*
+*     Normal Termination
+*
+      INFO = 0
+*
+*     Exit (other than argument error) -- return optimal workspace size
+*
+  420 CONTINUE
+      WORK( 1 ) = DBLE( N )
+      RETURN
+*
+*     End of DHGEQZ
+*
+      END
diff --git a/costa/native/external/lapack/dhsein.f b/costa/native/external/lapack/dhsein.f
new file mode 100644
index 000000000..e68f7a464
--- /dev/null
+++ b/costa/native/external/lapack/dhsein.f
@@ -0,0 +1,412 @@
+      SUBROUTINE DHSEIN( SIDE, EIGSRC, INITV, SELECT, N, H, LDH, WR, WI,
+     $                   VL, LDVL, VR, LDVR, MM, M, WORK, IFAILL,
+     $                   IFAILR, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          EIGSRC, INITV, SIDE
+      INTEGER            INFO, LDH, LDVL, LDVR, M, MM, N
+*     ..
+*     .. Array Arguments ..
+      LOGICAL            SELECT( * )
+      INTEGER            IFAILL( * ), IFAILR( * )
+      DOUBLE PRECISION   H( LDH, * ), VL( LDVL, * ), VR( LDVR, * ),
+     $                   WI( * ), WORK( * ), WR( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DHSEIN uses inverse iteration to find specified right and/or left
+*  eigenvectors of a real upper Hessenberg matrix H.
+*
+*  The right eigenvector x and the left eigenvector y of the matrix H
+*  corresponding to an eigenvalue w are defined by:
+*
+*               H * x = w * x,     y**h * H = w * y**h
+*
+*  where y**h denotes the conjugate transpose of the vector y.
+*
+*  Arguments
+*  =========
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'R': compute right eigenvectors only;
+*          = 'L': compute left eigenvectors only;
+*          = 'B': compute both right and left eigenvectors.
+*
+*  EIGSRC  (input) CHARACTER*1
+*          Specifies the source of eigenvalues supplied in (WR,WI):
+*          = 'Q': the eigenvalues were found using DHSEQR; thus, if
+*                 H has zero subdiagonal elements, and so is
+*                 block-triangular, then the j-th eigenvalue can be
+*                 assumed to be an eigenvalue of the block containing
+*                 the j-th row/column.  This property allows DHSEIN to
+*                 perform inverse iteration on just one diagonal block.
+*          = 'N': no assumptions are made on the correspondence
+*                 between eigenvalues and diagonal blocks.  In this
+*                 case, DHSEIN must always perform inverse iteration
+*                 using the whole matrix H.
+*
+*  INITV   (input) CHARACTER*1
+*          = 'N': no initial vectors are supplied;
+*          = 'U': user-supplied initial vectors are stored in the arrays
+*                 VL and/or VR.
+*
+*  SELECT  (input/output) LOGICAL array, dimension (N)
+*          Specifies the eigenvectors to be computed. To select the
+*          real eigenvector corresponding to a real eigenvalue WR(j),
+*          SELECT(j) must be set to .TRUE.. To select the complex
+*          eigenvector corresponding to a complex eigenvalue
+*          (WR(j),WI(j)), with complex conjugate (WR(j+1),WI(j+1)),
+*          either SELECT(j) or SELECT(j+1) or both must be set to
+*          .TRUE.; then on exit SELECT(j) is .TRUE. and SELECT(j+1) is
+*          .FALSE..
+*
+*  N       (input) INTEGER
+*          The order of the matrix H.  N >= 0.
+*
+*  H       (input) DOUBLE PRECISION array, dimension (LDH,N)
+*          The upper Hessenberg matrix H.
+*
+*  LDH     (input) INTEGER
+*          The leading dimension of the array H.  LDH >= max(1,N).
+*
+*  WR      (input/output) DOUBLE PRECISION array, dimension (N)
+*  WI      (input) DOUBLE PRECISION array, dimension (N)
+*          On entry, the real and imaginary parts of the eigenvalues of
+*          H; a complex conjugate pair of eigenvalues must be stored in
+*          consecutive elements of WR and WI.
+*          On exit, WR may have been altered since close eigenvalues
+*          are perturbed slightly in searching for independent
+*          eigenvectors.
+*
+*  VL      (input/output) DOUBLE PRECISION array, dimension (LDVL,MM)
+*          On entry, if INITV = 'U' and SIDE = 'L' or 'B', VL must
+*          contain starting vectors for the inverse iteration for the
+*          left eigenvectors; the starting vector for each eigenvector
+*          must be in the same column(s) in which the eigenvector will
+*          be stored.
+*          On exit, if SIDE = 'L' or 'B', the left eigenvectors
+*          specified by SELECT will be stored consecutively in the
+*          columns of VL, in the same order as their eigenvalues. A
+*          complex eigenvector corresponding to a complex eigenvalue is
+*          stored in two consecutive columns, the first holding the real
+*          part and the second the imaginary part.
+*          If SIDE = 'R', VL is not referenced.
+*
+*  LDVL    (input) INTEGER
+*          The leading dimension of the array VL.
+*          LDVL >= max(1,N) if SIDE = 'L' or 'B'; LDVL >= 1 otherwise.
+*
+*  VR      (input/output) DOUBLE PRECISION array, dimension (LDVR,MM)
+*          On entry, if INITV = 'U' and SIDE = 'R' or 'B', VR must
+*          contain starting vectors for the inverse iteration for the
+*          right eigenvectors; the starting vector for each eigenvector
+*          must be in the same column(s) in which the eigenvector will
+*          be stored.
+*          On exit, if SIDE = 'R' or 'B', the right eigenvectors
+*          specified by SELECT will be stored consecutively in the
+*          columns of VR, in the same order as their eigenvalues. A
+*          complex eigenvector corresponding to a complex eigenvalue is
+*          stored in two consecutive columns, the first holding the real
+*          part and the second the imaginary part.
+*          If SIDE = 'L', VR is not referenced.
+*
+*  LDVR    (input) INTEGER
+*          The leading dimension of the array VR.
+*          LDVR >= max(1,N) if SIDE = 'R' or 'B'; LDVR >= 1 otherwise.
+*
+*  MM      (input) INTEGER
+*          The number of columns in the arrays VL and/or VR. MM >= M.
+*
+*  M       (output) INTEGER
+*          The number of columns in the arrays VL and/or VR required to
+*          store the eigenvectors; each selected real eigenvector
+*          occupies one column and each selected complex eigenvector
+*          occupies two columns.
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension ((N+2)*N)
+*
+*  IFAILL  (output) INTEGER array, dimension (MM)
+*          If SIDE = 'L' or 'B', IFAILL(i) = j > 0 if the left
+*          eigenvector in the i-th column of VL (corresponding to the
+*          eigenvalue w(j)) failed to converge; IFAILL(i) = 0 if the
+*          eigenvector converged satisfactorily. If the i-th and (i+1)th
+*          columns of VL hold a complex eigenvector, then IFAILL(i) and
+*          IFAILL(i+1) are set to the same value.
+*          If SIDE = 'R', IFAILL is not referenced.
+*
+*  IFAILR  (output) INTEGER array, dimension (MM)
+*          If SIDE = 'R' or 'B', IFAILR(i) = j > 0 if the right
+*          eigenvector in the i-th column of VR (corresponding to the
+*          eigenvalue w(j)) failed to converge; IFAILR(i) = 0 if the
+*          eigenvector converged satisfactorily. If the i-th and (i+1)th
+*          columns of VR hold a complex eigenvector, then IFAILR(i) and
+*          IFAILR(i+1) are set to the same value.
+*          If SIDE = 'L', IFAILR is not referenced.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, i is the number of eigenvectors which
+*                failed to converge; see IFAILL and IFAILR for further
+*                details.
+*
+*  Further Details
+*  ===============
+*
+*  Each eigenvector is normalized so that the element of largest
+*  magnitude has magnitude 1; here the magnitude of a complex number
+*  (x,y) is taken to be |x|+|y|.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            BOTHV, FROMQR, LEFTV, NOINIT, PAIR, RIGHTV
+      INTEGER            I, IINFO, K, KL, KLN, KR, KSI, KSR, LDWORK
+      DOUBLE PRECISION   BIGNUM, EPS3, HNORM, SMLNUM, ULP, UNFL, WKI,
+     $                   WKR
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DLAMCH, DLANHS
+      EXTERNAL           LSAME, DLAMCH, DLANHS
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLAEIN, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode and test the input parameters.
+*
+      BOTHV = LSAME( SIDE, 'B' )
+      RIGHTV = LSAME( SIDE, 'R' ) .OR. BOTHV
+      LEFTV = LSAME( SIDE, 'L' ) .OR. BOTHV
+*
+      FROMQR = LSAME( EIGSRC, 'Q' )
+*
+      NOINIT = LSAME( INITV, 'N' )
+*
+*     Set M to the number of columns required to store the selected
+*     eigenvectors, and standardize the array SELECT.
+*
+      M = 0
+      PAIR = .FALSE.
+      DO 10 K = 1, N
+         IF( PAIR ) THEN
+            PAIR = .FALSE.
+            SELECT( K ) = .FALSE.
+         ELSE
+            IF( WI( K ).EQ.ZERO ) THEN
+               IF( SELECT( K ) )
+     $            M = M + 1
+            ELSE
+               PAIR = .TRUE.
+               IF( SELECT( K ) .OR. SELECT( K+1 ) ) THEN
+                  SELECT( K ) = .TRUE.
+                  M = M + 2
+               END IF
+            END IF
+         END IF
+   10 CONTINUE
+*
+      INFO = 0
+      IF( .NOT.RIGHTV .AND. .NOT.LEFTV ) THEN
+         INFO = -1
+      ELSE IF( .NOT.FROMQR .AND. .NOT.LSAME( EIGSRC, 'N' ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.NOINIT .AND. .NOT.LSAME( INITV, 'U' ) ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( LDH.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      ELSE IF( LDVL.LT.1 .OR. ( LEFTV .AND. LDVL.LT.N ) ) THEN
+         INFO = -11
+      ELSE IF( LDVR.LT.1 .OR. ( RIGHTV .AND. LDVR.LT.N ) ) THEN
+         INFO = -13
+      ELSE IF( MM.LT.M ) THEN
+         INFO = -14
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DHSEIN', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Set machine-dependent constants.
+*
+      UNFL = DLAMCH( 'Safe minimum' )
+      ULP = DLAMCH( 'Precision' )
+      SMLNUM = UNFL*( N / ULP )
+      BIGNUM = ( ONE-ULP ) / SMLNUM
+*
+      LDWORK = N + 1
+*
+      KL = 1
+      KLN = 0
+      IF( FROMQR ) THEN
+         KR = 0
+      ELSE
+         KR = N
+      END IF
+      KSR = 1
+*
+      DO 120 K = 1, N
+         IF( SELECT( K ) ) THEN
+*
+*           Compute eigenvector(s) corresponding to W(K).
+*
+            IF( FROMQR ) THEN
+*
+*              If affiliation of eigenvalues is known, check whether
+*              the matrix splits.
+*
+*              Determine KL and KR such that 1 <= KL <= K <= KR <= N
+*              and H(KL,KL-1) and H(KR+1,KR) are zero (or KL = 1 or
+*              KR = N).
+*
+*              Then inverse iteration can be performed with the
+*              submatrix H(KL:N,KL:N) for a left eigenvector, and with
+*              the submatrix H(1:KR,1:KR) for a right eigenvector.
+*
+               DO 20 I = K, KL + 1, -1
+                  IF( H( I, I-1 ).EQ.ZERO )
+     $               GO TO 30
+   20          CONTINUE
+   30          CONTINUE
+               KL = I
+               IF( K.GT.KR ) THEN
+                  DO 40 I = K, N - 1
+                     IF( H( I+1, I ).EQ.ZERO )
+     $                  GO TO 50
+   40             CONTINUE
+   50             CONTINUE
+                  KR = I
+               END IF
+            END IF
+*
+            IF( KL.NE.KLN ) THEN
+               KLN = KL
+*
+*              Compute infinity-norm of submatrix H(KL:KR,KL:KR) if it
+*              has not ben computed before.
+*
+               HNORM = DLANHS( 'I', KR-KL+1, H( KL, KL ), LDH, WORK )
+               IF( HNORM.GT.ZERO ) THEN
+                  EPS3 = HNORM*ULP
+               ELSE
+                  EPS3 = SMLNUM
+               END IF
+            END IF
+*
+*           Perturb eigenvalue if it is close to any previous
+*           selected eigenvalues affiliated to the submatrix
+*           H(KL:KR,KL:KR). Close roots are modified by EPS3.
+*
+            WKR = WR( K )
+            WKI = WI( K )
+   60       CONTINUE
+            DO 70 I = K - 1, KL, -1
+               IF( SELECT( I ) .AND. ABS( WR( I )-WKR )+
+     $             ABS( WI( I )-WKI ).LT.EPS3 ) THEN
+                  WKR = WKR + EPS3
+                  GO TO 60
+               END IF
+   70       CONTINUE
+            WR( K ) = WKR
+*
+            PAIR = WKI.NE.ZERO
+            IF( PAIR ) THEN
+               KSI = KSR + 1
+            ELSE
+               KSI = KSR
+            END IF
+            IF( LEFTV ) THEN
+*
+*              Compute left eigenvector.
+*
+               CALL DLAEIN( .FALSE., NOINIT, N-KL+1, H( KL, KL ), LDH,
+     $                      WKR, WKI, VL( KL, KSR ), VL( KL, KSI ),
+     $                      WORK, LDWORK, WORK( N*N+N+1 ), EPS3, SMLNUM,
+     $                      BIGNUM, IINFO )
+               IF( IINFO.GT.0 ) THEN
+                  IF( PAIR ) THEN
+                     INFO = INFO + 2
+                  ELSE
+                     INFO = INFO + 1
+                  END IF
+                  IFAILL( KSR ) = K
+                  IFAILL( KSI ) = K
+               ELSE
+                  IFAILL( KSR ) = 0
+                  IFAILL( KSI ) = 0
+               END IF
+               DO 80 I = 1, KL - 1
+                  VL( I, KSR ) = ZERO
+   80          CONTINUE
+               IF( PAIR ) THEN
+                  DO 90 I = 1, KL - 1
+                     VL( I, KSI ) = ZERO
+   90             CONTINUE
+               END IF
+            END IF
+            IF( RIGHTV ) THEN
+*
+*              Compute right eigenvector.
+*
+               CALL DLAEIN( .TRUE., NOINIT, KR, H, LDH, WKR, WKI,
+     $                      VR( 1, KSR ), VR( 1, KSI ), WORK, LDWORK,
+     $                      WORK( N*N+N+1 ), EPS3, SMLNUM, BIGNUM,
+     $                      IINFO )
+               IF( IINFO.GT.0 ) THEN
+                  IF( PAIR ) THEN
+                     INFO = INFO + 2
+                  ELSE
+                     INFO = INFO + 1
+                  END IF
+                  IFAILR( KSR ) = K
+                  IFAILR( KSI ) = K
+               ELSE
+                  IFAILR( KSR ) = 0
+                  IFAILR( KSI ) = 0
+               END IF
+               DO 100 I = KR + 1, N
+                  VR( I, KSR ) = ZERO
+  100          CONTINUE
+               IF( PAIR ) THEN
+                  DO 110 I = KR + 1, N
+                     VR( I, KSI ) = ZERO
+  110             CONTINUE
+               END IF
+            END IF
+*
+            IF( PAIR ) THEN
+               KSR = KSR + 2
+            ELSE
+               KSR = KSR + 1
+            END IF
+         END IF
+  120 CONTINUE
+*
+      RETURN
+*
+*     End of DHSEIN
+*
+      END
diff --git a/costa/native/external/lapack/dhseqr.f b/costa/native/external/lapack/dhseqr.f
new file mode 100644
index 000000000..48dbadac7
--- /dev/null
+++ b/costa/native/external/lapack/dhseqr.f
@@ -0,0 +1,467 @@
+      SUBROUTINE DHSEQR( JOB, COMPZ, N, ILO, IHI, H, LDH, WR, WI, Z,
+     $                   LDZ, WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          COMPZ, JOB
+      INTEGER            IHI, ILO, INFO, LDH, LDZ, LWORK, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   H( LDH, * ), WI( * ), WORK( * ), WR( * ),
+     $                   Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DHSEQR computes the eigenvalues of a real upper Hessenberg matrix H
+*  and, optionally, the matrices T and Z from the Schur decomposition
+*  H = Z T Z**T, where T is an upper quasi-triangular matrix (the Schur
+*  form), and Z is the orthogonal matrix of Schur vectors.
+*
+*  Optionally Z may be postmultiplied into an input orthogonal matrix Q,
+*  so that this routine can give the Schur factorization of a matrix A
+*  which has been reduced to the Hessenberg form H by the orthogonal
+*  matrix Q:  A = Q*H*Q**T = (QZ)*T*(QZ)**T.
+*
+*  Arguments
+*  =========
+*
+*  JOB     (input) CHARACTER*1
+*          = 'E':  compute eigenvalues only;
+*          = 'S':  compute eigenvalues and the Schur form T.
+*
+*  COMPZ   (input) CHARACTER*1
+*          = 'N':  no Schur vectors are computed;
+*          = 'I':  Z is initialized to the unit matrix and the matrix Z
+*                  of Schur vectors of H is returned;
+*          = 'V':  Z must contain an orthogonal matrix Q on entry, and
+*                  the product Q*Z is returned.
+*
+*  N       (input) INTEGER
+*          The order of the matrix H.  N >= 0.
+*
+*  ILO     (input) INTEGER
+*  IHI     (input) INTEGER
+*          It is assumed that H is already upper triangular in rows
+*          and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally
+*          set by a previous call to DGEBAL, and then passed to SGEHRD
+*          when the matrix output by DGEBAL is reduced to Hessenberg
+*          form. Otherwise ILO and IHI should be set to 1 and N
+*          respectively.
+*          1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
+*
+*  H       (input/output) DOUBLE PRECISION array, dimension (LDH,N)
+*          On entry, the upper Hessenberg matrix H.
+*          On exit, if JOB = 'S', H contains the upper quasi-triangular
+*          matrix T from the Schur decomposition (the Schur form);
+*          2-by-2 diagonal blocks (corresponding to complex conjugate
+*          pairs of eigenvalues) are returned in standard form, with
+*          H(i,i) = H(i+1,i+1) and H(i+1,i)*H(i,i+1) < 0. If JOB = 'E',
+*          the contents of H are unspecified on exit.
+*
+*  LDH     (input) INTEGER
+*          The leading dimension of the array H. LDH >= max(1,N).
+*
+*  WR      (output) DOUBLE PRECISION array, dimension (N)
+*  WI      (output) DOUBLE PRECISION array, dimension (N)
+*          The real and imaginary parts, respectively, of the computed
+*          eigenvalues. If two eigenvalues are computed as a complex
+*          conjugate pair, they are stored in consecutive elements of
+*          WR and WI, say the i-th and (i+1)th, with WI(i) > 0 and
+*          WI(i+1) < 0. If JOB = 'S', the eigenvalues are stored in the
+*          same order as on the diagonal of the Schur form returned in
+*          H, with WR(i) = H(i,i) and, if H(i:i+1,i:i+1) is a 2-by-2
+*          diagonal block, WI(i) = sqrt(H(i+1,i)*H(i,i+1)) and
+*          WI(i+1) = -WI(i).
+*
+*  Z       (input/output) DOUBLE PRECISION array, dimension (LDZ,N)
+*          If COMPZ = 'N': Z is not referenced.
+*          If COMPZ = 'I': on entry, Z need not be set, and on exit, Z
+*          contains the orthogonal matrix Z of the Schur vectors of H.
+*          If COMPZ = 'V': on entry Z must contain an N-by-N matrix Q,
+*          which is assumed to be equal to the unit matrix except for
+*          the submatrix Z(ILO:IHI,ILO:IHI); on exit Z contains Q*Z.
+*          Normally Q is the orthogonal matrix generated by DORGHR after
+*          the call to DGEHRD which formed the Hessenberg matrix H.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.
+*          LDZ >= max(1,N) if COMPZ = 'I' or 'V'; LDZ >= 1 otherwise.
+*
+*  WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.  LWORK >= max(1,N).
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, DHSEQR failed to compute all of the
+*                eigenvalues in a total of 30*(IHI-ILO+1) iterations;
+*                elements 1:ilo-1 and i+1:n of WR and WI contain those
+*                eigenvalues which have been successfully computed.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE, TWO
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0, TWO = 2.0D+0 )
+      DOUBLE PRECISION   CONST
+      PARAMETER          ( CONST = 1.5D+0 )
+      INTEGER            NSMAX, LDS
+      PARAMETER          ( NSMAX = 15, LDS = NSMAX )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            INITZ, LQUERY, WANTT, WANTZ
+      INTEGER            I, I1, I2, IERR, II, ITEMP, ITN, ITS, J, K, L,
+     $                   MAXB, NH, NR, NS, NV
+      DOUBLE PRECISION   ABSW, OVFL, SMLNUM, TAU, TEMP, TST1, ULP, UNFL
+*     ..
+*     .. Local Arrays ..
+      DOUBLE PRECISION   S( LDS, NSMAX ), V( NSMAX+1 ), VV( NSMAX+1 )
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            IDAMAX, ILAENV
+      DOUBLE PRECISION   DLAMCH, DLANHS, DLAPY2
+      EXTERNAL           LSAME, IDAMAX, ILAENV, DLAMCH, DLANHS, DLAPY2
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DCOPY, DGEMV, DLACPY, DLAHQR, DLARFG, DLARFX,
+     $                   DLASET, DSCAL, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode and test the input parameters
+*
+      WANTT = LSAME( JOB, 'S' )
+      INITZ = LSAME( COMPZ, 'I' )
+      WANTZ = INITZ .OR. LSAME( COMPZ, 'V' )
+*
+      INFO = 0
+      WORK( 1 ) = MAX( 1, N )
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( .NOT.LSAME( JOB, 'E' ) .AND. .NOT.WANTT ) THEN
+         INFO = -1
+      ELSE IF( .NOT.LSAME( COMPZ, 'N' ) .AND. .NOT.WANTZ ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, N ) ) THEN
+         INFO = -4
+      ELSE IF( IHI.LT.MIN( ILO, N ) .OR. IHI.GT.N ) THEN
+         INFO = -5
+      ELSE IF( LDH.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      ELSE IF( LDZ.LT.1 .OR. WANTZ .AND. LDZ.LT.MAX( 1, N ) ) THEN
+         INFO = -11
+      ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN
+         INFO = -13
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DHSEQR', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Initialize Z, if necessary
+*
+      IF( INITZ )
+     $   CALL DLASET( 'Full', N, N, ZERO, ONE, Z, LDZ )
+*
+*     Store the eigenvalues isolated by DGEBAL.
+*
+      DO 10 I = 1, ILO - 1
+         WR( I ) = H( I, I )
+         WI( I ) = ZERO
+   10 CONTINUE
+      DO 20 I = IHI + 1, N
+         WR( I ) = H( I, I )
+         WI( I ) = ZERO
+   20 CONTINUE
+*
+*     Quick return if possible.
+*
+      IF( N.EQ.0 )
+     $   RETURN
+      IF( ILO.EQ.IHI ) THEN
+         WR( ILO ) = H( ILO, ILO )
+         WI( ILO ) = ZERO
+         RETURN
+      END IF
+*
+*     Set rows and columns ILO to IHI to zero below the first
+*     subdiagonal.
+*
+      DO 40 J = ILO, IHI - 2
+         DO 30 I = J + 2, N
+            H( I, J ) = ZERO
+   30    CONTINUE
+   40 CONTINUE
+      NH = IHI - ILO + 1
+*
+*     Determine the order of the multi-shift QR algorithm to be used.
+*
+      NS = ILAENV( 4, 'DHSEQR', JOB // COMPZ, N, ILO, IHI, -1 )
+      MAXB = ILAENV( 8, 'DHSEQR', JOB // COMPZ, N, ILO, IHI, -1 )
+      IF( NS.LE.2 .OR. NS.GT.NH .OR. MAXB.GE.NH ) THEN
+*
+*        Use the standard double-shift algorithm
+*
+         CALL DLAHQR( WANTT, WANTZ, N, ILO, IHI, H, LDH, WR, WI, ILO,
+     $                IHI, Z, LDZ, INFO )
+         RETURN
+      END IF
+      MAXB = MAX( 3, MAXB )
+      NS = MIN( NS, MAXB, NSMAX )
+*
+*     Now 2 < NS <= MAXB < NH.
+*
+*     Set machine-dependent constants for the stopping criterion.
+*     If norm(H) <= sqrt(OVFL), overflow should not occur.
+*
+      UNFL = DLAMCH( 'Safe minimum' )
+      OVFL = ONE / UNFL
+      CALL DLABAD( UNFL, OVFL )
+      ULP = DLAMCH( 'Precision' )
+      SMLNUM = UNFL*( NH / ULP )
+*
+*     I1 and I2 are the indices of the first row and last column of H
+*     to which transformations must be applied. If eigenvalues only are
+*     being computed, I1 and I2 are set inside the main loop.
+*
+      IF( WANTT ) THEN
+         I1 = 1
+         I2 = N
+      END IF
+*
+*     ITN is the total number of multiple-shift QR iterations allowed.
+*
+      ITN = 30*NH
+*
+*     The main loop begins here. I is the loop index and decreases from
+*     IHI to ILO in steps of at most MAXB. Each iteration of the loop
+*     works with the active submatrix in rows and columns L to I.
+*     Eigenvalues I+1 to IHI have already converged. Either L = ILO or
+*     H(L,L-1) is negligible so that the matrix splits.
+*
+      I = IHI
+   50 CONTINUE
+      L = ILO
+      IF( I.LT.ILO )
+     $   GO TO 170
+*
+*     Perform multiple-shift QR iterations on rows and columns ILO to I
+*     until a submatrix of order at most MAXB splits off at the bottom
+*     because a subdiagonal element has become negligible.
+*
+      DO 150 ITS = 0, ITN
+*
+*        Look for a single small subdiagonal element.
+*
+         DO 60 K = I, L + 1, -1
+            TST1 = ABS( H( K-1, K-1 ) ) + ABS( H( K, K ) )
+            IF( TST1.EQ.ZERO )
+     $         TST1 = DLANHS( '1', I-L+1, H( L, L ), LDH, WORK )
+            IF( ABS( H( K, K-1 ) ).LE.MAX( ULP*TST1, SMLNUM ) )
+     $         GO TO 70
+   60    CONTINUE
+   70    CONTINUE
+         L = K
+         IF( L.GT.ILO ) THEN
+*
+*           H(L,L-1) is negligible.
+*
+            H( L, L-1 ) = ZERO
+         END IF
+*
+*        Exit from loop if a submatrix of order <= MAXB has split off.
+*
+         IF( L.GE.I-MAXB+1 )
+     $      GO TO 160
+*
+*        Now the active submatrix is in rows and columns L to I. If
+*        eigenvalues only are being computed, only the active submatrix
+*        need be transformed.
+*
+         IF( .NOT.WANTT ) THEN
+            I1 = L
+            I2 = I
+         END IF
+*
+         IF( ITS.EQ.20 .OR. ITS.EQ.30 ) THEN
+*
+*           Exceptional shifts.
+*
+            DO 80 II = I - NS + 1, I
+               WR( II ) = CONST*( ABS( H( II, II-1 ) )+
+     $                    ABS( H( II, II ) ) )
+               WI( II ) = ZERO
+   80       CONTINUE
+         ELSE
+*
+*           Use eigenvalues of trailing submatrix of order NS as shifts.
+*
+            CALL DLACPY( 'Full', NS, NS, H( I-NS+1, I-NS+1 ), LDH, S,
+     $                   LDS )
+            CALL DLAHQR( .FALSE., .FALSE., NS, 1, NS, S, LDS,
+     $                   WR( I-NS+1 ), WI( I-NS+1 ), 1, NS, Z, LDZ,
+     $                   IERR )
+            IF( IERR.GT.0 ) THEN
+*
+*              If DLAHQR failed to compute all NS eigenvalues, use the
+*              unconverged diagonal elements as the remaining shifts.
+*
+               DO 90 II = 1, IERR
+                  WR( I-NS+II ) = S( II, II )
+                  WI( I-NS+II ) = ZERO
+   90          CONTINUE
+            END IF
+         END IF
+*
+*        Form the first column of (G-w(1)) (G-w(2)) . . . (G-w(ns))
+*        where G is the Hessenberg submatrix H(L:I,L:I) and w is
+*        the vector of shifts (stored in WR and WI). The result is
+*        stored in the local array V.
+*
+         V( 1 ) = ONE
+         DO 100 II = 2, NS + 1
+            V( II ) = ZERO
+  100    CONTINUE
+         NV = 1
+         DO 120 J = I - NS + 1, I
+            IF( WI( J ).GE.ZERO ) THEN
+               IF( WI( J ).EQ.ZERO ) THEN
+*
+*                 real shift
+*
+                  CALL DCOPY( NV+1, V, 1, VV, 1 )
+                  CALL DGEMV( 'No transpose', NV+1, NV, ONE, H( L, L ),
+     $                        LDH, VV, 1, -WR( J ), V, 1 )
+                  NV = NV + 1
+               ELSE IF( WI( J ).GT.ZERO ) THEN
+*
+*                 complex conjugate pair of shifts
+*
+                  CALL DCOPY( NV+1, V, 1, VV, 1 )
+                  CALL DGEMV( 'No transpose', NV+1, NV, ONE, H( L, L ),
+     $                        LDH, V, 1, -TWO*WR( J ), VV, 1 )
+                  ITEMP = IDAMAX( NV+1, VV, 1 )
+                  TEMP = ONE / MAX( ABS( VV( ITEMP ) ), SMLNUM )
+                  CALL DSCAL( NV+1, TEMP, VV, 1 )
+                  ABSW = DLAPY2( WR( J ), WI( J ) )
+                  TEMP = ( TEMP*ABSW )*ABSW
+                  CALL DGEMV( 'No transpose', NV+2, NV+1, ONE,
+     $                        H( L, L ), LDH, VV, 1, TEMP, V, 1 )
+                  NV = NV + 2
+               END IF
+*
+*              Scale V(1:NV) so that max(abs(V(i))) = 1. If V is zero,
+*              reset it to the unit vector.
+*
+               ITEMP = IDAMAX( NV, V, 1 )
+               TEMP = ABS( V( ITEMP ) )
+               IF( TEMP.EQ.ZERO ) THEN
+                  V( 1 ) = ONE
+                  DO 110 II = 2, NV
+                     V( II ) = ZERO
+  110             CONTINUE
+               ELSE
+                  TEMP = MAX( TEMP, SMLNUM )
+                  CALL DSCAL( NV, ONE / TEMP, V, 1 )
+               END IF
+            END IF
+  120    CONTINUE
+*
+*        Multiple-shift QR step
+*
+         DO 140 K = L, I - 1
+*
+*           The first iteration of this loop determines a reflection G
+*           from the vector V and applies it from left and right to H,
+*           thus creating a nonzero bulge below the subdiagonal.
+*
+*           Each subsequent iteration determines a reflection G to
+*           restore the Hessenberg form in the (K-1)th column, and thus
+*           chases the bulge one step toward the bottom of the active
+*           submatrix. NR is the order of G.
+*
+            NR = MIN( NS+1, I-K+1 )
+            IF( K.GT.L )
+     $         CALL DCOPY( NR, H( K, K-1 ), 1, V, 1 )
+            CALL DLARFG( NR, V( 1 ), V( 2 ), 1, TAU )
+            IF( K.GT.L ) THEN
+               H( K, K-1 ) = V( 1 )
+               DO 130 II = K + 1, I
+                  H( II, K-1 ) = ZERO
+  130          CONTINUE
+            END IF
+            V( 1 ) = ONE
+*
+*           Apply G from the left to transform the rows of the matrix in
+*           columns K to I2.
+*
+            CALL DLARFX( 'Left', NR, I2-K+1, V, TAU, H( K, K ), LDH,
+     $                   WORK )
+*
+*           Apply G from the right to transform the columns of the
+*           matrix in rows I1 to min(K+NR,I).
+*
+            CALL DLARFX( 'Right', MIN( K+NR, I )-I1+1, NR, V, TAU,
+     $                   H( I1, K ), LDH, WORK )
+*
+            IF( WANTZ ) THEN
+*
+*              Accumulate transformations in the matrix Z
+*
+               CALL DLARFX( 'Right', NH, NR, V, TAU, Z( ILO, K ), LDZ,
+     $                      WORK )
+            END IF
+  140    CONTINUE
+*
+  150 CONTINUE
+*
+*     Failure to converge in remaining number of iterations
+*
+      INFO = I
+      RETURN
+*
+  160 CONTINUE
+*
+*     A submatrix of order <= MAXB in rows and columns L to I has split
+*     off. Use the double-shift QR algorithm to handle it.
+*
+      CALL DLAHQR( WANTT, WANTZ, N, L, I, H, LDH, WR, WI, ILO, IHI, Z,
+     $             LDZ, INFO )
+      IF( INFO.GT.0 )
+     $   RETURN
+*
+*     Decrement number of remaining iterations, and return to start of
+*     the main loop with a new value of I.
+*
+      ITN = ITN - ITS
+      I = L - 1
+      GO TO 50
+*
+  170 CONTINUE
+      WORK( 1 ) = MAX( 1, N )
+      RETURN
+*
+*     End of DHSEQR
+*
+      END
diff --git a/costa/native/external/lapack/dlabad.f b/costa/native/external/lapack/dlabad.f
new file mode 100644
index 000000000..2a80925be
--- /dev/null
+++ b/costa/native/external/lapack/dlabad.f
@@ -0,0 +1,56 @@
+      SUBROUTINE DLABAD( SMALL, LARGE )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      DOUBLE PRECISION   LARGE, SMALL
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLABAD takes as input the values computed by DLAMCH for underflow and
+*  overflow, and returns the square root of each of these values if the
+*  log of LARGE is sufficiently large.  This subroutine is intended to
+*  identify machines with a large exponent range, such as the Crays, and
+*  redefine the underflow and overflow limits to be the square roots of
+*  the values computed by DLAMCH.  This subroutine is needed because
+*  DLAMCH does not compensate for poor arithmetic in the upper half of
+*  the exponent range, as is found on a Cray.
+*
+*  Arguments
+*  =========
+*
+*  SMALL   (input/output) DOUBLE PRECISION
+*          On entry, the underflow threshold as computed by DLAMCH.
+*          On exit, if LOG10(LARGE) is sufficiently large, the square
+*          root of SMALL, otherwise unchanged.
+*
+*  LARGE   (input/output) DOUBLE PRECISION
+*          On entry, the overflow threshold as computed by DLAMCH.
+*          On exit, if LOG10(LARGE) is sufficiently large, the square
+*          root of LARGE, otherwise unchanged.
+*
+*  =====================================================================
+*
+*     .. Intrinsic Functions ..
+      INTRINSIC          LOG10, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     If it looks like we're on a Cray, take the square root of
+*     SMALL and LARGE to avoid overflow and underflow problems.
+*
+      IF( LOG10( LARGE ).GT.2000.D0 ) THEN
+         SMALL = SQRT( SMALL )
+         LARGE = SQRT( LARGE )
+      END IF
+*
+      RETURN
+*
+*     End of DLABAD
+*
+      END
diff --git a/costa/native/external/lapack/dlabrd.f b/costa/native/external/lapack/dlabrd.f
new file mode 100644
index 000000000..bea251b58
--- /dev/null
+++ b/costa/native/external/lapack/dlabrd.f
@@ -0,0 +1,291 @@
+      SUBROUTINE DLABRD( M, N, NB, A, LDA, D, E, TAUQ, TAUP, X, LDX, Y,
+     $                   LDY )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      INTEGER            LDA, LDX, LDY, M, N, NB
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), D( * ), E( * ), TAUP( * ),
+     $                   TAUQ( * ), X( LDX, * ), Y( LDY, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLABRD reduces the first NB rows and columns of a real general
+*  m by n matrix A to upper or lower bidiagonal form by an orthogonal
+*  transformation Q' * A * P, and returns the matrices X and Y which
+*  are needed to apply the transformation to the unreduced part of A.
+*
+*  If m >= n, A is reduced to upper bidiagonal form; if m < n, to lower
+*  bidiagonal form.
+*
+*  This is an auxiliary routine called by DGEBRD
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows in the matrix A.
+*
+*  N       (input) INTEGER
+*          The number of columns in the matrix A.
+*
+*  NB      (input) INTEGER
+*          The number of leading rows and columns of A to be reduced.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+*          On entry, the m by n general matrix to be reduced.
+*          On exit, the first NB rows and columns of the matrix are
+*          overwritten; the rest of the array is unchanged.
+*          If m >= n, elements on and below the diagonal in the first NB
+*            columns, with the array TAUQ, represent the orthogonal
+*            matrix Q as a product of elementary reflectors; and
+*            elements above the diagonal in the first NB rows, with the
+*            array TAUP, represent the orthogonal matrix P as a product
+*            of elementary reflectors.
+*          If m < n, elements below the diagonal in the first NB
+*            columns, with the array TAUQ, represent the orthogonal
+*            matrix Q as a product of elementary reflectors, and
+*            elements on and above the diagonal in the first NB rows,
+*            with the array TAUP, represent the orthogonal matrix P as
+*            a product of elementary reflectors.
+*          See Further Details.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  D       (output) DOUBLE PRECISION array, dimension (NB)
+*          The diagonal elements of the first NB rows and columns of
+*          the reduced matrix.  D(i) = A(i,i).
+*
+*  E       (output) DOUBLE PRECISION array, dimension (NB)
+*          The off-diagonal elements of the first NB rows and columns of
+*          the reduced matrix.
+*
+*  TAUQ    (output) DOUBLE PRECISION array dimension (NB)
+*          The scalar factors of the elementary reflectors which
+*          represent the orthogonal matrix Q. See Further Details.
+*
+*  TAUP    (output) DOUBLE PRECISION array, dimension (NB)
+*          The scalar factors of the elementary reflectors which
+*          represent the orthogonal matrix P. See Further Details.
+*
+*  X       (output) DOUBLE PRECISION array, dimension (LDX,NB)
+*          The m-by-nb matrix X required to update the unreduced part
+*          of A.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X. LDX >= M.
+*
+*  Y       (output) DOUBLE PRECISION array, dimension (LDY,NB)
+*          The n-by-nb matrix Y required to update the unreduced part
+*          of A.
+*
+*  LDY     (output) INTEGER
+*          The leading dimension of the array Y. LDY >= N.
+*
+*  Further Details
+*  ===============
+*
+*  The matrices Q and P are represented as products of elementary
+*  reflectors:
+*
+*     Q = H(1) H(2) . . . H(nb)  and  P = G(1) G(2) . . . G(nb)
+*
+*  Each H(i) and G(i) has the form:
+*
+*     H(i) = I - tauq * v * v'  and G(i) = I - taup * u * u'
+*
+*  where tauq and taup are real scalars, and v and u are real vectors.
+*
+*  If m >= n, v(1:i-1) = 0, v(i) = 1, and v(i:m) is stored on exit in
+*  A(i:m,i); u(1:i) = 0, u(i+1) = 1, and u(i+1:n) is stored on exit in
+*  A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i).
+*
+*  If m < n, v(1:i) = 0, v(i+1) = 1, and v(i+1:m) is stored on exit in
+*  A(i+2:m,i); u(1:i-1) = 0, u(i) = 1, and u(i:n) is stored on exit in
+*  A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i).
+*
+*  The elements of the vectors v and u together form the m-by-nb matrix
+*  V and the nb-by-n matrix U' which are needed, with X and Y, to apply
+*  the transformation to the unreduced part of the matrix, using a block
+*  update of the form:  A := A - V*Y' - X*U'.
+*
+*  The contents of A on exit are illustrated by the following examples
+*  with nb = 2:
+*
+*  m = 6 and n = 5 (m > n):          m = 5 and n = 6 (m < n):
+*
+*    (  1   1   u1  u1  u1 )           (  1   u1  u1  u1  u1  u1 )
+*    (  v1  1   1   u2  u2 )           (  1   1   u2  u2  u2  u2 )
+*    (  v1  v2  a   a   a  )           (  v1  1   a   a   a   a  )
+*    (  v1  v2  a   a   a  )           (  v1  v2  a   a   a   a  )
+*    (  v1  v2  a   a   a  )           (  v1  v2  a   a   a   a  )
+*    (  v1  v2  a   a   a  )
+*
+*  where a denotes an element of the original matrix which is unchanged,
+*  vi denotes an element of the vector defining H(i), and ui an element
+*  of the vector defining G(i).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DGEMV, DLARFG, DSCAL
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Quick return if possible
+*
+      IF( M.LE.0 .OR. N.LE.0 )
+     $   RETURN
+*
+      IF( M.GE.N ) THEN
+*
+*        Reduce to upper bidiagonal form
+*
+         DO 10 I = 1, NB
+*
+*           Update A(i:m,i)
+*
+            CALL DGEMV( 'No transpose', M-I+1, I-1, -ONE, A( I, 1 ),
+     $                  LDA, Y( I, 1 ), LDY, ONE, A( I, I ), 1 )
+            CALL DGEMV( 'No transpose', M-I+1, I-1, -ONE, X( I, 1 ),
+     $                  LDX, A( 1, I ), 1, ONE, A( I, I ), 1 )
+*
+*           Generate reflection Q(i) to annihilate A(i+1:m,i)
+*
+            CALL DLARFG( M-I+1, A( I, I ), A( MIN( I+1, M ), I ), 1,
+     $                   TAUQ( I ) )
+            D( I ) = A( I, I )
+            IF( I.LT.N ) THEN
+               A( I, I ) = ONE
+*
+*              Compute Y(i+1:n,i)
+*
+               CALL DGEMV( 'Transpose', M-I+1, N-I, ONE, A( I, I+1 ),
+     $                     LDA, A( I, I ), 1, ZERO, Y( I+1, I ), 1 )
+               CALL DGEMV( 'Transpose', M-I+1, I-1, ONE, A( I, 1 ), LDA,
+     $                     A( I, I ), 1, ZERO, Y( 1, I ), 1 )
+               CALL DGEMV( 'No transpose', N-I, I-1, -ONE, Y( I+1, 1 ),
+     $                     LDY, Y( 1, I ), 1, ONE, Y( I+1, I ), 1 )
+               CALL DGEMV( 'Transpose', M-I+1, I-1, ONE, X( I, 1 ), LDX,
+     $                     A( I, I ), 1, ZERO, Y( 1, I ), 1 )
+               CALL DGEMV( 'Transpose', I-1, N-I, -ONE, A( 1, I+1 ),
+     $                     LDA, Y( 1, I ), 1, ONE, Y( I+1, I ), 1 )
+               CALL DSCAL( N-I, TAUQ( I ), Y( I+1, I ), 1 )
+*
+*              Update A(i,i+1:n)
+*
+               CALL DGEMV( 'No transpose', N-I, I, -ONE, Y( I+1, 1 ),
+     $                     LDY, A( I, 1 ), LDA, ONE, A( I, I+1 ), LDA )
+               CALL DGEMV( 'Transpose', I-1, N-I, -ONE, A( 1, I+1 ),
+     $                     LDA, X( I, 1 ), LDX, ONE, A( I, I+1 ), LDA )
+*
+*              Generate reflection P(i) to annihilate A(i,i+2:n)
+*
+               CALL DLARFG( N-I, A( I, I+1 ), A( I, MIN( I+2, N ) ),
+     $                      LDA, TAUP( I ) )
+               E( I ) = A( I, I+1 )
+               A( I, I+1 ) = ONE
+*
+*              Compute X(i+1:m,i)
+*
+               CALL DGEMV( 'No transpose', M-I, N-I, ONE, A( I+1, I+1 ),
+     $                     LDA, A( I, I+1 ), LDA, ZERO, X( I+1, I ), 1 )
+               CALL DGEMV( 'Transpose', N-I, I, ONE, Y( I+1, 1 ), LDY,
+     $                     A( I, I+1 ), LDA, ZERO, X( 1, I ), 1 )
+               CALL DGEMV( 'No transpose', M-I, I, -ONE, A( I+1, 1 ),
+     $                     LDA, X( 1, I ), 1, ONE, X( I+1, I ), 1 )
+               CALL DGEMV( 'No transpose', I-1, N-I, ONE, A( 1, I+1 ),
+     $                     LDA, A( I, I+1 ), LDA, ZERO, X( 1, I ), 1 )
+               CALL DGEMV( 'No transpose', M-I, I-1, -ONE, X( I+1, 1 ),
+     $                     LDX, X( 1, I ), 1, ONE, X( I+1, I ), 1 )
+               CALL DSCAL( M-I, TAUP( I ), X( I+1, I ), 1 )
+            END IF
+   10    CONTINUE
+      ELSE
+*
+*        Reduce to lower bidiagonal form
+*
+         DO 20 I = 1, NB
+*
+*           Update A(i,i:n)
+*
+            CALL DGEMV( 'No transpose', N-I+1, I-1, -ONE, Y( I, 1 ),
+     $                  LDY, A( I, 1 ), LDA, ONE, A( I, I ), LDA )
+            CALL DGEMV( 'Transpose', I-1, N-I+1, -ONE, A( 1, I ), LDA,
+     $                  X( I, 1 ), LDX, ONE, A( I, I ), LDA )
+*
+*           Generate reflection P(i) to annihilate A(i,i+1:n)
+*
+            CALL DLARFG( N-I+1, A( I, I ), A( I, MIN( I+1, N ) ), LDA,
+     $                   TAUP( I ) )
+            D( I ) = A( I, I )
+            IF( I.LT.M ) THEN
+               A( I, I ) = ONE
+*
+*              Compute X(i+1:m,i)
+*
+               CALL DGEMV( 'No transpose', M-I, N-I+1, ONE, A( I+1, I ),
+     $                     LDA, A( I, I ), LDA, ZERO, X( I+1, I ), 1 )
+               CALL DGEMV( 'Transpose', N-I+1, I-1, ONE, Y( I, 1 ), LDY,
+     $                     A( I, I ), LDA, ZERO, X( 1, I ), 1 )
+               CALL DGEMV( 'No transpose', M-I, I-1, -ONE, A( I+1, 1 ),
+     $                     LDA, X( 1, I ), 1, ONE, X( I+1, I ), 1 )
+               CALL DGEMV( 'No transpose', I-1, N-I+1, ONE, A( 1, I ),
+     $                     LDA, A( I, I ), LDA, ZERO, X( 1, I ), 1 )
+               CALL DGEMV( 'No transpose', M-I, I-1, -ONE, X( I+1, 1 ),
+     $                     LDX, X( 1, I ), 1, ONE, X( I+1, I ), 1 )
+               CALL DSCAL( M-I, TAUP( I ), X( I+1, I ), 1 )
+*
+*              Update A(i+1:m,i)
+*
+               CALL DGEMV( 'No transpose', M-I, I-1, -ONE, A( I+1, 1 ),
+     $                     LDA, Y( I, 1 ), LDY, ONE, A( I+1, I ), 1 )
+               CALL DGEMV( 'No transpose', M-I, I, -ONE, X( I+1, 1 ),
+     $                     LDX, A( 1, I ), 1, ONE, A( I+1, I ), 1 )
+*
+*              Generate reflection Q(i) to annihilate A(i+2:m,i)
+*
+               CALL DLARFG( M-I, A( I+1, I ), A( MIN( I+2, M ), I ), 1,
+     $                      TAUQ( I ) )
+               E( I ) = A( I+1, I )
+               A( I+1, I ) = ONE
+*
+*              Compute Y(i+1:n,i)
+*
+               CALL DGEMV( 'Transpose', M-I, N-I, ONE, A( I+1, I+1 ),
+     $                     LDA, A( I+1, I ), 1, ZERO, Y( I+1, I ), 1 )
+               CALL DGEMV( 'Transpose', M-I, I-1, ONE, A( I+1, 1 ), LDA,
+     $                     A( I+1, I ), 1, ZERO, Y( 1, I ), 1 )
+               CALL DGEMV( 'No transpose', N-I, I-1, -ONE, Y( I+1, 1 ),
+     $                     LDY, Y( 1, I ), 1, ONE, Y( I+1, I ), 1 )
+               CALL DGEMV( 'Transpose', M-I, I, ONE, X( I+1, 1 ), LDX,
+     $                     A( I+1, I ), 1, ZERO, Y( 1, I ), 1 )
+               CALL DGEMV( 'Transpose', I, N-I, -ONE, A( 1, I+1 ), LDA,
+     $                     Y( 1, I ), 1, ONE, Y( I+1, I ), 1 )
+               CALL DSCAL( N-I, TAUQ( I ), Y( I+1, I ), 1 )
+            END IF
+   20    CONTINUE
+      END IF
+      RETURN
+*
+*     End of DLABRD
+*
+      END
diff --git a/costa/native/external/lapack/dlacon.f b/costa/native/external/lapack/dlacon.f
new file mode 100644
index 000000000..4efa0e816
--- /dev/null
+++ b/costa/native/external/lapack/dlacon.f
@@ -0,0 +1,204 @@
+      SUBROUTINE DLACON( N, V, X, ISGN, EST, KASE )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      INTEGER            KASE, N
+      DOUBLE PRECISION   EST
+*     ..
+*     .. Array Arguments ..
+      INTEGER            ISGN( * )
+      DOUBLE PRECISION   V( * ), X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLACON estimates the 1-norm of a square, real matrix A.
+*  Reverse communication is used for evaluating matrix-vector products.
+*
+*  Arguments
+*  =========
+*
+*  N      (input) INTEGER
+*         The order of the matrix.  N >= 1.
+*
+*  V      (workspace) DOUBLE PRECISION array, dimension (N)
+*         On the final return, V = A*W,  where  EST = norm(V)/norm(W)
+*         (W is not returned).
+*
+*  X      (input/output) DOUBLE PRECISION array, dimension (N)
+*         On an intermediate return, X should be overwritten by
+*               A * X,   if KASE=1,
+*               A' * X,  if KASE=2,
+*         and DLACON must be re-called with all the other parameters
+*         unchanged.
+*
+*  ISGN   (workspace) INTEGER array, dimension (N)
+*
+*  EST    (output) DOUBLE PRECISION
+*         An estimate (a lower bound) for norm(A).
+*
+*  KASE   (input/output) INTEGER
+*         On the initial call to DLACON, KASE should be 0.
+*         On an intermediate return, KASE will be 1 or 2, indicating
+*         whether X should be overwritten by A * X  or A' * X.
+*         On the final return from DLACON, KASE will again be 0.
+*
+*  Further Details
+*  ======= =======
+*
+*  Contributed by Nick Higham, University of Manchester.
+*  Originally named SONEST, dated March 16, 1988.
+*
+*  Reference: N.J. Higham, "FORTRAN codes for estimating the one-norm of
+*  a real or complex matrix, with applications to condition estimation",
+*  ACM Trans. Math. Soft., vol. 14, no. 4, pp. 381-396, December 1988.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            ITMAX
+      PARAMETER          ( ITMAX = 5 )
+      DOUBLE PRECISION   ZERO, ONE, TWO
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0, TWO = 2.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, ITER, J, JLAST, JUMP
+      DOUBLE PRECISION   ALTSGN, ESTOLD, TEMP
+*     ..
+*     .. External Functions ..
+      INTEGER            IDAMAX
+      DOUBLE PRECISION   DASUM
+      EXTERNAL           IDAMAX, DASUM
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DCOPY
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, NINT, SIGN
+*     ..
+*     .. Save statement ..
+      SAVE
+*     ..
+*     .. Executable Statements ..
+*
+      IF( KASE.EQ.0 ) THEN
+         DO 10 I = 1, N
+            X( I ) = ONE / DBLE( N )
+   10    CONTINUE
+         KASE = 1
+         JUMP = 1
+         RETURN
+      END IF
+*
+      GO TO ( 20, 40, 70, 110, 140 )JUMP
+*
+*     ................ ENTRY   (JUMP = 1)
+*     FIRST ITERATION.  X HAS BEEN OVERWRITTEN BY A*X.
+*
+   20 CONTINUE
+      IF( N.EQ.1 ) THEN
+         V( 1 ) = X( 1 )
+         EST = ABS( V( 1 ) )
+*        ... QUIT
+         GO TO 150
+      END IF
+      EST = DASUM( N, X, 1 )
+*
+      DO 30 I = 1, N
+         X( I ) = SIGN( ONE, X( I ) )
+         ISGN( I ) = NINT( X( I ) )
+   30 CONTINUE
+      KASE = 2
+      JUMP = 2
+      RETURN
+*
+*     ................ ENTRY   (JUMP = 2)
+*     FIRST ITERATION.  X HAS BEEN OVERWRITTEN BY TRANDPOSE(A)*X.
+*
+   40 CONTINUE
+      J = IDAMAX( N, X, 1 )
+      ITER = 2
+*
+*     MAIN LOOP - ITERATIONS 2,3,...,ITMAX.
+*
+   50 CONTINUE
+      DO 60 I = 1, N
+         X( I ) = ZERO
+   60 CONTINUE
+      X( J ) = ONE
+      KASE = 1
+      JUMP = 3
+      RETURN
+*
+*     ................ ENTRY   (JUMP = 3)
+*     X HAS BEEN OVERWRITTEN BY A*X.
+*
+   70 CONTINUE
+      CALL DCOPY( N, X, 1, V, 1 )
+      ESTOLD = EST
+      EST = DASUM( N, V, 1 )
+      DO 80 I = 1, N
+         IF( NINT( SIGN( ONE, X( I ) ) ).NE.ISGN( I ) )
+     $      GO TO 90
+   80 CONTINUE
+*     REPEATED SIGN VECTOR DETECTED, HENCE ALGORITHM HAS CONVERGED.
+      GO TO 120
+*
+   90 CONTINUE
+*     TEST FOR CYCLING.
+      IF( EST.LE.ESTOLD )
+     $   GO TO 120
+*
+      DO 100 I = 1, N
+         X( I ) = SIGN( ONE, X( I ) )
+         ISGN( I ) = NINT( X( I ) )
+  100 CONTINUE
+      KASE = 2
+      JUMP = 4
+      RETURN
+*
+*     ................ ENTRY   (JUMP = 4)
+*     X HAS BEEN OVERWRITTEN BY TRANDPOSE(A)*X.
+*
+  110 CONTINUE
+      JLAST = J
+      J = IDAMAX( N, X, 1 )
+      IF( ( X( JLAST ).NE.ABS( X( J ) ) ) .AND. ( ITER.LT.ITMAX ) ) THEN
+         ITER = ITER + 1
+         GO TO 50
+      END IF
+*
+*     ITERATION COMPLETE.  FINAL STAGE.
+*
+  120 CONTINUE
+      ALTSGN = ONE
+      DO 130 I = 1, N
+         X( I ) = ALTSGN*( ONE+DBLE( I-1 ) / DBLE( N-1 ) )
+         ALTSGN = -ALTSGN
+  130 CONTINUE
+      KASE = 1
+      JUMP = 5
+      RETURN
+*
+*     ................ ENTRY   (JUMP = 5)
+*     X HAS BEEN OVERWRITTEN BY A*X.
+*
+  140 CONTINUE
+      TEMP = TWO*( DASUM( N, X, 1 ) / DBLE( 3*N ) )
+      IF( TEMP.GT.EST ) THEN
+         CALL DCOPY( N, X, 1, V, 1 )
+         EST = TEMP
+      END IF
+*
+  150 CONTINUE
+      KASE = 0
+      RETURN
+*
+*     End of DLACON
+*
+      END
diff --git a/costa/native/external/lapack/dlacpy.f b/costa/native/external/lapack/dlacpy.f
new file mode 100644
index 000000000..afec37171
--- /dev/null
+++ b/costa/native/external/lapack/dlacpy.f
@@ -0,0 +1,88 @@
+      SUBROUTINE DLACPY( UPLO, M, N, A, LDA, B, LDB )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            LDA, LDB, M, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLACPY copies all or part of a two-dimensional matrix A to another
+*  matrix B.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies the part of the matrix A to be copied to B.
+*          = 'U':      Upper triangular part
+*          = 'L':      Lower triangular part
+*          Otherwise:  All of the matrix A
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  A       (input) DOUBLE PRECISION array, dimension (LDA,N)
+*          The m by n matrix A.  If UPLO = 'U', only the upper triangle
+*          or trapezoid is accessed; if UPLO = 'L', only the lower
+*          triangle or trapezoid is accessed.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  B       (output) DOUBLE PRECISION array, dimension (LDB,N)
+*          On exit, B = A in the locations specified by UPLO.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,M).
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      INTEGER            I, J
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MIN
+*     ..
+*     .. Executable Statements ..
+*
+      IF( LSAME( UPLO, 'U' ) ) THEN
+         DO 20 J = 1, N
+            DO 10 I = 1, MIN( J, M )
+               B( I, J ) = A( I, J )
+   10       CONTINUE
+   20    CONTINUE
+      ELSE IF( LSAME( UPLO, 'L' ) ) THEN
+         DO 40 J = 1, N
+            DO 30 I = J, M
+               B( I, J ) = A( I, J )
+   30       CONTINUE
+   40    CONTINUE
+      ELSE
+         DO 60 J = 1, N
+            DO 50 I = 1, M
+               B( I, J ) = A( I, J )
+   50       CONTINUE
+   60    CONTINUE
+      END IF
+      RETURN
+*
+*     End of DLACPY
+*
+      END
diff --git a/costa/native/external/lapack/dladiv.f b/costa/native/external/lapack/dladiv.f
new file mode 100644
index 000000000..205716257
--- /dev/null
+++ b/costa/native/external/lapack/dladiv.f
@@ -0,0 +1,63 @@
+      SUBROUTINE DLADIV( A, B, C, D, P, Q )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      DOUBLE PRECISION   A, B, C, D, P, Q
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLADIV performs complex division in  real arithmetic
+*
+*                        a + i*b
+*             p + i*q = ---------
+*                        c + i*d
+*
+*  The algorithm is due to Robert L. Smith and can be found
+*  in D. Knuth, The art of Computer Programming, Vol.2, p.195
+*
+*  Arguments
+*  =========
+*
+*  A       (input) DOUBLE PRECISION
+*  B       (input) DOUBLE PRECISION
+*  C       (input) DOUBLE PRECISION
+*  D       (input) DOUBLE PRECISION
+*          The scalars a, b, c, and d in the above expression.
+*
+*  P       (output) DOUBLE PRECISION
+*  Q       (output) DOUBLE PRECISION
+*          The scalars p and q in the above expression.
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      DOUBLE PRECISION   E, F
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS
+*     ..
+*     .. Executable Statements ..
+*
+      IF( ABS( D ).LT.ABS( C ) ) THEN
+         E = D / C
+         F = C + D*E
+         P = ( A+B*E ) / F
+         Q = ( B-A*E ) / F
+      ELSE
+         E = C / D
+         F = D + C*E
+         P = ( B+A*E ) / F
+         Q = ( -A+B*E ) / F
+      END IF
+*
+      RETURN
+*
+*     End of DLADIV
+*
+      END
diff --git a/costa/native/external/lapack/dlae2.f b/costa/native/external/lapack/dlae2.f
new file mode 100644
index 000000000..57c69b6dd
--- /dev/null
+++ b/costa/native/external/lapack/dlae2.f
@@ -0,0 +1,124 @@
+      SUBROUTINE DLAE2( A, B, C, RT1, RT2 )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      DOUBLE PRECISION   A, B, C, RT1, RT2
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLAE2  computes the eigenvalues of a 2-by-2 symmetric matrix
+*     [  A   B  ]
+*     [  B   C  ].
+*  On return, RT1 is the eigenvalue of larger absolute value, and RT2
+*  is the eigenvalue of smaller absolute value.
+*
+*  Arguments
+*  =========
+*
+*  A       (input) DOUBLE PRECISION
+*          The (1,1) element of the 2-by-2 matrix.
+*
+*  B       (input) DOUBLE PRECISION
+*          The (1,2) and (2,1) elements of the 2-by-2 matrix.
+*
+*  C       (input) DOUBLE PRECISION
+*          The (2,2) element of the 2-by-2 matrix.
+*
+*  RT1     (output) DOUBLE PRECISION
+*          The eigenvalue of larger absolute value.
+*
+*  RT2     (output) DOUBLE PRECISION
+*          The eigenvalue of smaller absolute value.
+*
+*  Further Details
+*  ===============
+*
+*  RT1 is accurate to a few ulps barring over/underflow.
+*
+*  RT2 may be inaccurate if there is massive cancellation in the
+*  determinant A*C-B*B; higher precision or correctly rounded or
+*  correctly truncated arithmetic would be needed to compute RT2
+*  accurately in all cases.
+*
+*  Overflow is possible only if RT1 is within a factor of 5 of overflow.
+*  Underflow is harmless if the input data is 0 or exceeds
+*     underflow_threshold / macheps.
+*
+* =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE
+      PARAMETER          ( ONE = 1.0D0 )
+      DOUBLE PRECISION   TWO
+      PARAMETER          ( TWO = 2.0D0 )
+      DOUBLE PRECISION   ZERO
+      PARAMETER          ( ZERO = 0.0D0 )
+      DOUBLE PRECISION   HALF
+      PARAMETER          ( HALF = 0.5D0 )
+*     ..
+*     .. Local Scalars ..
+      DOUBLE PRECISION   AB, ACMN, ACMX, ADF, DF, RT, SM, TB
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Compute the eigenvalues
+*
+      SM = A + C
+      DF = A - C
+      ADF = ABS( DF )
+      TB = B + B
+      AB = ABS( TB )
+      IF( ABS( A ).GT.ABS( C ) ) THEN
+         ACMX = A
+         ACMN = C
+      ELSE
+         ACMX = C
+         ACMN = A
+      END IF
+      IF( ADF.GT.AB ) THEN
+         RT = ADF*SQRT( ONE+( AB / ADF )**2 )
+      ELSE IF( ADF.LT.AB ) THEN
+         RT = AB*SQRT( ONE+( ADF / AB )**2 )
+      ELSE
+*
+*        Includes case AB=ADF=0
+*
+         RT = AB*SQRT( TWO )
+      END IF
+      IF( SM.LT.ZERO ) THEN
+         RT1 = HALF*( SM-RT )
+*
+*        Order of execution important.
+*        To get fully accurate smaller eigenvalue,
+*        next line needs to be executed in higher precision.
+*
+         RT2 = ( ACMX / RT1 )*ACMN - ( B / RT1 )*B
+      ELSE IF( SM.GT.ZERO ) THEN
+         RT1 = HALF*( SM+RT )
+*
+*        Order of execution important.
+*        To get fully accurate smaller eigenvalue,
+*        next line needs to be executed in higher precision.
+*
+         RT2 = ( ACMX / RT1 )*ACMN - ( B / RT1 )*B
+      ELSE
+*
+*        Includes case RT1 = RT2 = 0
+*
+         RT1 = HALF*RT
+         RT2 = -HALF*RT
+      END IF
+      RETURN
+*
+*     End of DLAE2
+*
+      END
diff --git a/costa/native/external/lapack/dlaebz.f b/costa/native/external/lapack/dlaebz.f
new file mode 100644
index 000000000..c2c06c66e
--- /dev/null
+++ b/costa/native/external/lapack/dlaebz.f
@@ -0,0 +1,552 @@
+      SUBROUTINE DLAEBZ( IJOB, NITMAX, N, MMAX, MINP, NBMIN, ABSTOL,
+     $                   RELTOL, PIVMIN, D, E, E2, NVAL, AB, C, MOUT,
+     $                   NAB, WORK, IWORK, INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            IJOB, INFO, MINP, MMAX, MOUT, N, NBMIN, NITMAX
+      DOUBLE PRECISION   ABSTOL, PIVMIN, RELTOL
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * ), NAB( MMAX, * ), NVAL( * )
+      DOUBLE PRECISION   AB( MMAX, * ), C( * ), D( * ), E( * ), E2( * ),
+     $                   WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLAEBZ contains the iteration loops which compute and use the
+*  function N(w), which is the count of eigenvalues of a symmetric
+*  tridiagonal matrix T less than or equal to its argument  w.  It
+*  performs a choice of two types of loops:
+*
+*  IJOB=1, followed by
+*  IJOB=2: It takes as input a list of intervals and returns a list of
+*          sufficiently small intervals whose union contains the same
+*          eigenvalues as the union of the original intervals.
+*          The input intervals are (AB(j,1),AB(j,2)], j=1,...,MINP.
+*          The output interval (AB(j,1),AB(j,2)] will contain
+*          eigenvalues NAB(j,1)+1,...,NAB(j,2), where 1 <= j <= MOUT.
+*
+*  IJOB=3: It performs a binary search in each input interval
+*          (AB(j,1),AB(j,2)] for a point  w(j)  such that
+*          N(w(j))=NVAL(j), and uses  C(j)  as the starting point of
+*          the search.  If such a w(j) is found, then on output
+*          AB(j,1)=AB(j,2)=w.  If no such w(j) is found, then on output
+*          (AB(j,1),AB(j,2)] will be a small interval containing the
+*          point where N(w) jumps through NVAL(j), unless that point
+*          lies outside the initial interval.
+*
+*  Note that the intervals are in all cases half-open intervals,
+*  i.e., of the form  (a,b] , which includes  b  but not  a .
+*
+*  To avoid underflow, the matrix should be scaled so that its largest
+*  element is no greater than  overflow**(1/2) * underflow**(1/4)
+*  in absolute value.  To assure the most accurate computation
+*  of small eigenvalues, the matrix should be scaled to be
+*  not much smaller than that, either.
+*
+*  See W. Kahan "Accurate Eigenvalues of a Symmetric Tridiagonal
+*  Matrix", Report CS41, Computer Science Dept., Stanford
+*  University, July 21, 1966
+*
+*  Note: the arguments are, in general, *not* checked for unreasonable
+*  values.
+*
+*  Arguments
+*  =========
+*
+*  IJOB    (input) INTEGER
+*          Specifies what is to be done:
+*          = 1:  Compute NAB for the initial intervals.
+*          = 2:  Perform bisection iteration to find eigenvalues of T.
+*          = 3:  Perform bisection iteration to invert N(w), i.e.,
+*                to find a point which has a specified number of
+*                eigenvalues of T to its left.
+*          Other values will cause DLAEBZ to return with INFO=-1.
+*
+*  NITMAX  (input) INTEGER
+*          The maximum number of "levels" of bisection to be
+*          performed, i.e., an interval of width W will not be made
+*          smaller than 2^(-NITMAX) * W.  If not all intervals
+*          have converged after NITMAX iterations, then INFO is set
+*          to the number of non-converged intervals.
+*
+*  N       (input) INTEGER
+*          The dimension n of the tridiagonal matrix T.  It must be at
+*          least 1.
+*
+*  MMAX    (input) INTEGER
+*          The maximum number of intervals.  If more than MMAX intervals
+*          are generated, then DLAEBZ will quit with INFO=MMAX+1.
+*
+*  MINP    (input) INTEGER
+*          The initial number of intervals.  It may not be greater than
+*          MMAX.
+*
+*  NBMIN   (input) INTEGER
+*          The smallest number of intervals that should be processed
+*          using a vector loop.  If zero, then only the scalar loop
+*          will be used.
+*
+*  ABSTOL  (input) DOUBLE PRECISION
+*          The minimum (absolute) width of an interval.  When an
+*          interval is narrower than ABSTOL, or than RELTOL times the
+*          larger (in magnitude) endpoint, then it is considered to be
+*          sufficiently small, i.e., converged.  This must be at least
+*          zero.
+*
+*  RELTOL  (input) DOUBLE PRECISION
+*          The minimum relative width of an interval.  When an interval
+*          is narrower than ABSTOL, or than RELTOL times the larger (in
+*          magnitude) endpoint, then it is considered to be
+*          sufficiently small, i.e., converged.  Note: this should
+*          always be at least radix*machine epsilon.
+*
+*  PIVMIN  (input) DOUBLE PRECISION
+*          The minimum absolute value of a "pivot" in the Sturm
+*          sequence loop.  This *must* be at least  max |e(j)**2| *
+*          safe_min  and at least safe_min, where safe_min is at least
+*          the smallest number that can divide one without overflow.
+*
+*  D       (input) DOUBLE PRECISION array, dimension (N)
+*          The diagonal elements of the tridiagonal matrix T.
+*
+*  E       (input) DOUBLE PRECISION array, dimension (N)
+*          The offdiagonal elements of the tridiagonal matrix T in
+*          positions 1 through N-1.  E(N) is arbitrary.
+*
+*  E2      (input) DOUBLE PRECISION array, dimension (N)
+*          The squares of the offdiagonal elements of the tridiagonal
+*          matrix T.  E2(N) is ignored.
+*
+*  NVAL    (input/output) INTEGER array, dimension (MINP)
+*          If IJOB=1 or 2, not referenced.
+*          If IJOB=3, the desired values of N(w).  The elements of NVAL
+*          will be reordered to correspond with the intervals in AB.
+*          Thus, NVAL(j) on output will not, in general be the same as
+*          NVAL(j) on input, but it will correspond with the interval
+*          (AB(j,1),AB(j,2)] on output.
+*
+*  AB      (input/output) DOUBLE PRECISION array, dimension (MMAX,2)
+*          The endpoints of the intervals.  AB(j,1) is  a(j), the left
+*          endpoint of the j-th interval, and AB(j,2) is b(j), the
+*          right endpoint of the j-th interval.  The input intervals
+*          will, in general, be modified, split, and reordered by the
+*          calculation.
+*
+*  C       (input/output) DOUBLE PRECISION array, dimension (MMAX)
+*          If IJOB=1, ignored.
+*          If IJOB=2, workspace.
+*          If IJOB=3, then on input C(j) should be initialized to the
+*          first search point in the binary search.
+*
+*  MOUT    (output) INTEGER
+*          If IJOB=1, the number of eigenvalues in the intervals.
+*          If IJOB=2 or 3, the number of intervals output.
+*          If IJOB=3, MOUT will equal MINP.
+*
+*  NAB     (input/output) INTEGER array, dimension (MMAX,2)
+*          If IJOB=1, then on output NAB(i,j) will be set to N(AB(i,j)).
+*          If IJOB=2, then on input, NAB(i,j) should be set.  It must
+*             satisfy the condition:
+*             N(AB(i,1)) <= NAB(i,1) <= NAB(i,2) <= N(AB(i,2)),
+*             which means that in interval i only eigenvalues
+*             NAB(i,1)+1,...,NAB(i,2) will be considered.  Usually,
+*             NAB(i,j)=N(AB(i,j)), from a previous call to DLAEBZ with
+*             IJOB=1.
+*             On output, NAB(i,j) will contain
+*             max(na(k),min(nb(k),N(AB(i,j)))), where k is the index of
+*             the input interval that the output interval
+*             (AB(j,1),AB(j,2)] came from, and na(k) and nb(k) are the
+*             the input values of NAB(k,1) and NAB(k,2).
+*          If IJOB=3, then on output, NAB(i,j) contains N(AB(i,j)),
+*             unless N(w) > NVAL(i) for all search points  w , in which
+*             case NAB(i,1) will not be modified, i.e., the output
+*             value will be the same as the input value (modulo
+*             reorderings -- see NVAL and AB), or unless N(w) < NVAL(i)
+*             for all search points  w , in which case NAB(i,2) will
+*             not be modified.  Normally, NAB should be set to some
+*             distinctive value(s) before DLAEBZ is called.
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (MMAX)
+*          Workspace.
+*
+*  IWORK   (workspace) INTEGER array, dimension (MMAX)
+*          Workspace.
+*
+*  INFO    (output) INTEGER
+*          = 0:       All intervals converged.
+*          = 1--MMAX: The last INFO intervals did not converge.
+*          = MMAX+1:  More than MMAX intervals were generated.
+*
+*  Further Details
+*  ===============
+*
+*      This routine is intended to be called only by other LAPACK
+*  routines, thus the interface is less user-friendly.  It is intended
+*  for two purposes:
+*
+*  (a) finding eigenvalues.  In this case, DLAEBZ should have one or
+*      more initial intervals set up in AB, and DLAEBZ should be called
+*      with IJOB=1.  This sets up NAB, and also counts the eigenvalues.
+*      Intervals with no eigenvalues would usually be thrown out at
+*      this point.  Also, if not all the eigenvalues in an interval i
+*      are desired, NAB(i,1) can be increased or NAB(i,2) decreased.
+*      For example, set NAB(i,1)=NAB(i,2)-1 to get the largest
+*      eigenvalue.  DLAEBZ is then called with IJOB=2 and MMAX
+*      no smaller than the value of MOUT returned by the call with
+*      IJOB=1.  After this (IJOB=2) call, eigenvalues NAB(i,1)+1
+*      through NAB(i,2) are approximately AB(i,1) (or AB(i,2)) to the
+*      tolerance specified by ABSTOL and RELTOL.
+*
+*  (b) finding an interval (a',b'] containing eigenvalues w(f),...,w(l).
+*      In this case, start with a Gershgorin interval  (a,b).  Set up
+*      AB to contain 2 search intervals, both initially (a,b).  One
+*      NVAL element should contain  f-1  and the other should contain  l
+*      , while C should contain a and b, resp.  NAB(i,1) should be -1
+*      and NAB(i,2) should be N+1, to flag an error if the desired
+*      interval does not lie in (a,b).  DLAEBZ is then called with
+*      IJOB=3.  On exit, if w(f-1) < w(f), then one of the intervals --
+*      j -- will have AB(j,1)=AB(j,2) and NAB(j,1)=NAB(j,2)=f-1, while
+*      if, to the specified tolerance, w(f-k)=...=w(f+r), k > 0 and r
+*      >= 0, then the interval will have  N(AB(j,1))=NAB(j,1)=f-k and
+*      N(AB(j,2))=NAB(j,2)=f+r.  The cases w(l) < w(l+1) and
+*      w(l-r)=...=w(l+k) are handled similarly.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, TWO, HALF
+      PARAMETER          ( ZERO = 0.0D0, TWO = 2.0D0,
+     $                   HALF = 1.0D0 / TWO )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            ITMP1, ITMP2, J, JI, JIT, JP, KF, KFNEW, KL,
+     $                   KLNEW
+      DOUBLE PRECISION   TMP1, TMP2
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Check for Errors
+*
+      INFO = 0
+      IF( IJOB.LT.1 .OR. IJOB.GT.3 ) THEN
+         INFO = -1
+         RETURN
+      END IF
+*
+*     Initialize NAB
+*
+      IF( IJOB.EQ.1 ) THEN
+*
+*        Compute the number of eigenvalues in the initial intervals.
+*
+         MOUT = 0
+*DIR$ NOVECTOR
+         DO 30 JI = 1, MINP
+            DO 20 JP = 1, 2
+               TMP1 = D( 1 ) - AB( JI, JP )
+               IF( ABS( TMP1 ).LT.PIVMIN )
+     $            TMP1 = -PIVMIN
+               NAB( JI, JP ) = 0
+               IF( TMP1.LE.ZERO )
+     $            NAB( JI, JP ) = 1
+*
+               DO 10 J = 2, N
+                  TMP1 = D( J ) - E2( J-1 ) / TMP1 - AB( JI, JP )
+                  IF( ABS( TMP1 ).LT.PIVMIN )
+     $               TMP1 = -PIVMIN
+                  IF( TMP1.LE.ZERO )
+     $               NAB( JI, JP ) = NAB( JI, JP ) + 1
+   10          CONTINUE
+   20       CONTINUE
+            MOUT = MOUT + NAB( JI, 2 ) - NAB( JI, 1 )
+   30    CONTINUE
+         RETURN
+      END IF
+*
+*     Initialize for loop
+*
+*     KF and KL have the following meaning:
+*        Intervals 1,...,KF-1 have converged.
+*        Intervals KF,...,KL  still need to be refined.
+*
+      KF = 1
+      KL = MINP
+*
+*     If IJOB=2, initialize C.
+*     If IJOB=3, use the user-supplied starting point.
+*
+      IF( IJOB.EQ.2 ) THEN
+         DO 40 JI = 1, MINP
+            C( JI ) = HALF*( AB( JI, 1 )+AB( JI, 2 ) )
+   40    CONTINUE
+      END IF
+*
+*     Iteration loop
+*
+      DO 130 JIT = 1, NITMAX
+*
+*        Loop over intervals
+*
+         IF( KL-KF+1.GE.NBMIN .AND. NBMIN.GT.0 ) THEN
+*
+*           Begin of Parallel Version of the loop
+*
+            DO 60 JI = KF, KL
+*
+*              Compute N(c), the number of eigenvalues less than c
+*
+               WORK( JI ) = D( 1 ) - C( JI )
+               IWORK( JI ) = 0
+               IF( WORK( JI ).LE.PIVMIN ) THEN
+                  IWORK( JI ) = 1
+                  WORK( JI ) = MIN( WORK( JI ), -PIVMIN )
+               END IF
+*
+               DO 50 J = 2, N
+                  WORK( JI ) = D( J ) - E2( J-1 ) / WORK( JI ) - C( JI )
+                  IF( WORK( JI ).LE.PIVMIN ) THEN
+                     IWORK( JI ) = IWORK( JI ) + 1
+                     WORK( JI ) = MIN( WORK( JI ), -PIVMIN )
+                  END IF
+   50          CONTINUE
+   60       CONTINUE
+*
+            IF( IJOB.LE.2 ) THEN
+*
+*              IJOB=2: Choose all intervals containing eigenvalues.
+*
+               KLNEW = KL
+               DO 70 JI = KF, KL
+*
+*                 Insure that N(w) is monotone
+*
+                  IWORK( JI ) = MIN( NAB( JI, 2 ),
+     $                          MAX( NAB( JI, 1 ), IWORK( JI ) ) )
+*
+*                 Update the Queue -- add intervals if both halves
+*                 contain eigenvalues.
+*
+                  IF( IWORK( JI ).EQ.NAB( JI, 2 ) ) THEN
+*
+*                    No eigenvalue in the upper interval:
+*                    just use the lower interval.
+*
+                     AB( JI, 2 ) = C( JI )
+*
+                  ELSE IF( IWORK( JI ).EQ.NAB( JI, 1 ) ) THEN
+*
+*                    No eigenvalue in the lower interval:
+*                    just use the upper interval.
+*
+                     AB( JI, 1 ) = C( JI )
+                  ELSE
+                     KLNEW = KLNEW + 1
+                     IF( KLNEW.LE.MMAX ) THEN
+*
+*                       Eigenvalue in both intervals -- add upper to
+*                       queue.
+*
+                        AB( KLNEW, 2 ) = AB( JI, 2 )
+                        NAB( KLNEW, 2 ) = NAB( JI, 2 )
+                        AB( KLNEW, 1 ) = C( JI )
+                        NAB( KLNEW, 1 ) = IWORK( JI )
+                        AB( JI, 2 ) = C( JI )
+                        NAB( JI, 2 ) = IWORK( JI )
+                     ELSE
+                        INFO = MMAX + 1
+                     END IF
+                  END IF
+   70          CONTINUE
+               IF( INFO.NE.0 )
+     $            RETURN
+               KL = KLNEW
+            ELSE
+*
+*              IJOB=3: Binary search.  Keep only the interval containing
+*                      w   s.t. N(w) = NVAL
+*
+               DO 80 JI = KF, KL
+                  IF( IWORK( JI ).LE.NVAL( JI ) ) THEN
+                     AB( JI, 1 ) = C( JI )
+                     NAB( JI, 1 ) = IWORK( JI )
+                  END IF
+                  IF( IWORK( JI ).GE.NVAL( JI ) ) THEN
+                     AB( JI, 2 ) = C( JI )
+                     NAB( JI, 2 ) = IWORK( JI )
+                  END IF
+   80          CONTINUE
+            END IF
+*
+         ELSE
+*
+*           End of Parallel Version of the loop
+*
+*           Begin of Serial Version of the loop
+*
+            KLNEW = KL
+            DO 100 JI = KF, KL
+*
+*              Compute N(w), the number of eigenvalues less than w
+*
+               TMP1 = C( JI )
+               TMP2 = D( 1 ) - TMP1
+               ITMP1 = 0
+               IF( TMP2.LE.PIVMIN ) THEN
+                  ITMP1 = 1
+                  TMP2 = MIN( TMP2, -PIVMIN )
+               END IF
+*
+*              A series of compiler directives to defeat vectorization
+*              for the next loop
+*
+*$PL$ CMCHAR=' '
+CDIR$          NEXTSCALAR
+C$DIR          SCALAR
+CDIR$          NEXT SCALAR
+CVD$L          NOVECTOR
+CDEC$          NOVECTOR
+CVD$           NOVECTOR
+*VDIR          NOVECTOR
+*VOCL          LOOP,SCALAR
+CIBM           PREFER SCALAR
+*$PL$ CMCHAR='*'
+*
+               DO 90 J = 2, N
+                  TMP2 = D( J ) - E2( J-1 ) / TMP2 - TMP1
+                  IF( TMP2.LE.PIVMIN ) THEN
+                     ITMP1 = ITMP1 + 1
+                     TMP2 = MIN( TMP2, -PIVMIN )
+                  END IF
+   90          CONTINUE
+*
+               IF( IJOB.LE.2 ) THEN
+*
+*                 IJOB=2: Choose all intervals containing eigenvalues.
+*
+*                 Insure that N(w) is monotone
+*
+                  ITMP1 = MIN( NAB( JI, 2 ),
+     $                    MAX( NAB( JI, 1 ), ITMP1 ) )
+*
+*                 Update the Queue -- add intervals if both halves
+*                 contain eigenvalues.
+*
+                  IF( ITMP1.EQ.NAB( JI, 2 ) ) THEN
+*
+*                    No eigenvalue in the upper interval:
+*                    just use the lower interval.
+*
+                     AB( JI, 2 ) = TMP1
+*
+                  ELSE IF( ITMP1.EQ.NAB( JI, 1 ) ) THEN
+*
+*                    No eigenvalue in the lower interval:
+*                    just use the upper interval.
+*
+                     AB( JI, 1 ) = TMP1
+                  ELSE IF( KLNEW.LT.MMAX ) THEN
+*
+*                    Eigenvalue in both intervals -- add upper to queue.
+*
+                     KLNEW = KLNEW + 1
+                     AB( KLNEW, 2 ) = AB( JI, 2 )
+                     NAB( KLNEW, 2 ) = NAB( JI, 2 )
+                     AB( KLNEW, 1 ) = TMP1
+                     NAB( KLNEW, 1 ) = ITMP1
+                     AB( JI, 2 ) = TMP1
+                     NAB( JI, 2 ) = ITMP1
+                  ELSE
+                     INFO = MMAX + 1
+                     RETURN
+                  END IF
+               ELSE
+*
+*                 IJOB=3: Binary search.  Keep only the interval
+*                         containing  w  s.t. N(w) = NVAL
+*
+                  IF( ITMP1.LE.NVAL( JI ) ) THEN
+                     AB( JI, 1 ) = TMP1
+                     NAB( JI, 1 ) = ITMP1
+                  END IF
+                  IF( ITMP1.GE.NVAL( JI ) ) THEN
+                     AB( JI, 2 ) = TMP1
+                     NAB( JI, 2 ) = ITMP1
+                  END IF
+               END IF
+  100       CONTINUE
+            KL = KLNEW
+*
+*           End of Serial Version of the loop
+*
+         END IF
+*
+*        Check for convergence
+*
+         KFNEW = KF
+         DO 110 JI = KF, KL
+            TMP1 = ABS( AB( JI, 2 )-AB( JI, 1 ) )
+            TMP2 = MAX( ABS( AB( JI, 2 ) ), ABS( AB( JI, 1 ) ) )
+            IF( TMP1.LT.MAX( ABSTOL, PIVMIN, RELTOL*TMP2 ) .OR.
+     $          NAB( JI, 1 ).GE.NAB( JI, 2 ) ) THEN
+*
+*              Converged -- Swap with position KFNEW,
+*                           then increment KFNEW
+*
+               IF( JI.GT.KFNEW ) THEN
+                  TMP1 = AB( JI, 1 )
+                  TMP2 = AB( JI, 2 )
+                  ITMP1 = NAB( JI, 1 )
+                  ITMP2 = NAB( JI, 2 )
+                  AB( JI, 1 ) = AB( KFNEW, 1 )
+                  AB( JI, 2 ) = AB( KFNEW, 2 )
+                  NAB( JI, 1 ) = NAB( KFNEW, 1 )
+                  NAB( JI, 2 ) = NAB( KFNEW, 2 )
+                  AB( KFNEW, 1 ) = TMP1
+                  AB( KFNEW, 2 ) = TMP2
+                  NAB( KFNEW, 1 ) = ITMP1
+                  NAB( KFNEW, 2 ) = ITMP2
+                  IF( IJOB.EQ.3 ) THEN
+                     ITMP1 = NVAL( JI )
+                     NVAL( JI ) = NVAL( KFNEW )
+                     NVAL( KFNEW ) = ITMP1
+                  END IF
+               END IF
+               KFNEW = KFNEW + 1
+            END IF
+  110    CONTINUE
+         KF = KFNEW
+*
+*        Choose Midpoints
+*
+         DO 120 JI = KF, KL
+            C( JI ) = HALF*( AB( JI, 1 )+AB( JI, 2 ) )
+  120    CONTINUE
+*
+*        If no more intervals to refine, quit.
+*
+         IF( KF.GT.KL )
+     $      GO TO 140
+  130 CONTINUE
+*
+*     Converged
+*
+  140 CONTINUE
+      INFO = MAX( KL+1-KF, 0 )
+      MOUT = KL
+*
+      RETURN
+*
+*     End of DLAEBZ
+*
+      END
diff --git a/costa/native/external/lapack/dlaed0.f b/costa/native/external/lapack/dlaed0.f
new file mode 100644
index 000000000..6259d61d3
--- /dev/null
+++ b/costa/native/external/lapack/dlaed0.f
@@ -0,0 +1,350 @@
+      SUBROUTINE DLAED0( ICOMPQ, QSIZ, N, D, E, Q, LDQ, QSTORE, LDQS,
+     $                   WORK, IWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            ICOMPQ, INFO, LDQ, LDQS, N, QSIZ
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      DOUBLE PRECISION   D( * ), E( * ), Q( LDQ, * ), QSTORE( LDQS, * ),
+     $                   WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLAED0 computes all eigenvalues and corresponding eigenvectors of a
+*  symmetric tridiagonal matrix using the divide and conquer method.
+*
+*  Arguments
+*  =========
+*
+*  ICOMPQ  (input) INTEGER
+*          = 0:  Compute eigenvalues only.
+*          = 1:  Compute eigenvectors of original dense symmetric matrix
+*                also.  On entry, Q contains the orthogonal matrix used
+*                to reduce the original matrix to tridiagonal form.
+*          = 2:  Compute eigenvalues and eigenvectors of tridiagonal
+*                matrix.
+*
+*  QSIZ   (input) INTEGER
+*         The dimension of the orthogonal matrix used to reduce
+*         the full matrix to tridiagonal form.  QSIZ >= N if ICOMPQ = 1.
+*
+*  N      (input) INTEGER
+*         The dimension of the symmetric tridiagonal matrix.  N >= 0.
+*
+*  D      (input/output) DOUBLE PRECISION array, dimension (N)
+*         On entry, the main diagonal of the tridiagonal matrix.
+*         On exit, its eigenvalues.
+*
+*  E      (input) DOUBLE PRECISION array, dimension (N-1)
+*         The off-diagonal elements of the tridiagonal matrix.
+*         On exit, E has been destroyed.
+*
+*  Q      (input/output) DOUBLE PRECISION array, dimension (LDQ, N)
+*         On entry, Q must contain an N-by-N orthogonal matrix.
+*         If ICOMPQ = 0    Q is not referenced.
+*         If ICOMPQ = 1    On entry, Q is a subset of the columns of the
+*                          orthogonal matrix used to reduce the full
+*                          matrix to tridiagonal form corresponding to
+*                          the subset of the full matrix which is being
+*                          decomposed at this time.
+*         If ICOMPQ = 2    On entry, Q will be the identity matrix.
+*                          On exit, Q contains the eigenvectors of the
+*                          tridiagonal matrix.
+*
+*  LDQ    (input) INTEGER
+*         The leading dimension of the array Q.  If eigenvectors are
+*         desired, then  LDQ >= max(1,N).  In any case,  LDQ >= 1.
+*
+*  QSTORE (workspace) DOUBLE PRECISION array, dimension (LDQS, N)
+*         Referenced only when ICOMPQ = 1.  Used to store parts of
+*         the eigenvector matrix when the updating matrix multiplies
+*         take place.
+*
+*  LDQS   (input) INTEGER
+*         The leading dimension of the array QSTORE.  If ICOMPQ = 1,
+*         then  LDQS >= max(1,N).  In any case,  LDQS >= 1.
+*
+*  WORK   (workspace) DOUBLE PRECISION array,
+*         If ICOMPQ = 0 or 1, the dimension of WORK must be at least
+*                     1 + 3*N + 2*N*lg N + 2*N**2
+*                     ( lg( N ) = smallest integer k
+*                                 such that 2^k >= N )
+*         If ICOMPQ = 2, the dimension of WORK must be at least
+*                     4*N + N**2.
+*
+*  IWORK  (workspace) INTEGER array,
+*         If ICOMPQ = 0 or 1, the dimension of IWORK must be at least
+*                        6 + 6*N + 5*N*lg N.
+*                        ( lg( N ) = smallest integer k
+*                                    such that 2^k >= N )
+*         If ICOMPQ = 2, the dimension of IWORK must be at least
+*                        3 + 5*N.
+*
+*  INFO   (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          > 0:  The algorithm failed to compute an eigenvalue while
+*                working on the submatrix lying in rows and columns
+*                INFO/(N+1) through mod(INFO,N+1).
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Jeff Rutter, Computer Science Division, University of California
+*     at Berkeley, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE, TWO
+      PARAMETER          ( ZERO = 0.D0, ONE = 1.D0, TWO = 2.D0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            CURLVL, CURPRB, CURR, I, IGIVCL, IGIVNM,
+     $                   IGIVPT, INDXQ, IPERM, IPRMPT, IQ, IQPTR, IWREM,
+     $                   J, K, LGN, MATSIZ, MSD2, SMLSIZ, SMM1, SPM1,
+     $                   SPM2, SUBMAT, SUBPBS, TLVLS
+      DOUBLE PRECISION   TEMP
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DCOPY, DGEMM, DLACPY, DLAED1, DLAED7, DSTEQR,
+     $                   XERBLA
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, INT, LOG, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+*
+      IF( ICOMPQ.LT.0 .OR. ICOMPQ.GT.2 ) THEN
+         INFO = -1
+      ELSE IF( ( ICOMPQ.EQ.1 ) .AND. ( QSIZ.LT.MAX( 0, N ) ) ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      ELSE IF( LDQS.LT.MAX( 1, N ) ) THEN
+         INFO = -9
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DLAED0', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      SMLSIZ = ILAENV( 9, 'DLAED0', ' ', 0, 0, 0, 0 )
+*
+*     Determine the size and placement of the submatrices, and save in
+*     the leading elements of IWORK.
+*
+      IWORK( 1 ) = N
+      SUBPBS = 1
+      TLVLS = 0
+   10 CONTINUE
+      IF( IWORK( SUBPBS ).GT.SMLSIZ ) THEN
+         DO 20 J = SUBPBS, 1, -1
+            IWORK( 2*J ) = ( IWORK( J )+1 ) / 2
+            IWORK( 2*J-1 ) = IWORK( J ) / 2
+   20    CONTINUE
+         TLVLS = TLVLS + 1
+         SUBPBS = 2*SUBPBS
+         GO TO 10
+      END IF
+      DO 30 J = 2, SUBPBS
+         IWORK( J ) = IWORK( J ) + IWORK( J-1 )
+   30 CONTINUE
+*
+*     Divide the matrix into SUBPBS submatrices of size at most SMLSIZ+1
+*     using rank-1 modifications (cuts).
+*
+      SPM1 = SUBPBS - 1
+      DO 40 I = 1, SPM1
+         SUBMAT = IWORK( I ) + 1
+         SMM1 = SUBMAT - 1
+         D( SMM1 ) = D( SMM1 ) - ABS( E( SMM1 ) )
+         D( SUBMAT ) = D( SUBMAT ) - ABS( E( SMM1 ) )
+   40 CONTINUE
+*
+      INDXQ = 4*N + 3
+      IF( ICOMPQ.NE.2 ) THEN
+*
+*        Set up workspaces for eigenvalues only/accumulate new vectors
+*        routine
+*
+         TEMP = LOG( DBLE( N ) ) / LOG( TWO )
+         LGN = INT( TEMP )
+         IF( 2**LGN.LT.N )
+     $      LGN = LGN + 1
+         IF( 2**LGN.LT.N )
+     $      LGN = LGN + 1
+         IPRMPT = INDXQ + N + 1
+         IPERM = IPRMPT + N*LGN
+         IQPTR = IPERM + N*LGN
+         IGIVPT = IQPTR + N + 2
+         IGIVCL = IGIVPT + N*LGN
+*
+         IGIVNM = 1
+         IQ = IGIVNM + 2*N*LGN
+         IWREM = IQ + N**2 + 1
+*
+*        Initialize pointers
+*
+         DO 50 I = 0, SUBPBS
+            IWORK( IPRMPT+I ) = 1
+            IWORK( IGIVPT+I ) = 1
+   50    CONTINUE
+         IWORK( IQPTR ) = 1
+      END IF
+*
+*     Solve each submatrix eigenproblem at the bottom of the divide and
+*     conquer tree.
+*
+      CURR = 0
+      DO 70 I = 0, SPM1
+         IF( I.EQ.0 ) THEN
+            SUBMAT = 1
+            MATSIZ = IWORK( 1 )
+         ELSE
+            SUBMAT = IWORK( I ) + 1
+            MATSIZ = IWORK( I+1 ) - IWORK( I )
+         END IF
+         IF( ICOMPQ.EQ.2 ) THEN
+            CALL DSTEQR( 'I', MATSIZ, D( SUBMAT ), E( SUBMAT ),
+     $                   Q( SUBMAT, SUBMAT ), LDQ, WORK, INFO )
+            IF( INFO.NE.0 )
+     $         GO TO 130
+         ELSE
+            CALL DSTEQR( 'I', MATSIZ, D( SUBMAT ), E( SUBMAT ),
+     $                   WORK( IQ-1+IWORK( IQPTR+CURR ) ), MATSIZ, WORK,
+     $                   INFO )
+            IF( INFO.NE.0 )
+     $         GO TO 130
+            IF( ICOMPQ.EQ.1 ) THEN
+               CALL DGEMM( 'N', 'N', QSIZ, MATSIZ, MATSIZ, ONE,
+     $                     Q( 1, SUBMAT ), LDQ, WORK( IQ-1+IWORK( IQPTR+
+     $                     CURR ) ), MATSIZ, ZERO, QSTORE( 1, SUBMAT ),
+     $                     LDQS )
+            END IF
+            IWORK( IQPTR+CURR+1 ) = IWORK( IQPTR+CURR ) + MATSIZ**2
+            CURR = CURR + 1
+         END IF
+         K = 1
+         DO 60 J = SUBMAT, IWORK( I+1 )
+            IWORK( INDXQ+J ) = K
+            K = K + 1
+   60    CONTINUE
+   70 CONTINUE
+*
+*     Successively merge eigensystems of adjacent submatrices
+*     into eigensystem for the corresponding larger matrix.
+*
+*     while ( SUBPBS > 1 )
+*
+      CURLVL = 1
+   80 CONTINUE
+      IF( SUBPBS.GT.1 ) THEN
+         SPM2 = SUBPBS - 2
+         DO 90 I = 0, SPM2, 2
+            IF( I.EQ.0 ) THEN
+               SUBMAT = 1
+               MATSIZ = IWORK( 2 )
+               MSD2 = IWORK( 1 )
+               CURPRB = 0
+            ELSE
+               SUBMAT = IWORK( I ) + 1
+               MATSIZ = IWORK( I+2 ) - IWORK( I )
+               MSD2 = MATSIZ / 2
+               CURPRB = CURPRB + 1
+            END IF
+*
+*     Merge lower order eigensystems (of size MSD2 and MATSIZ - MSD2)
+*     into an eigensystem of size MATSIZ.
+*     DLAED1 is used only for the full eigensystem of a tridiagonal
+*     matrix.
+*     DLAED7 handles the cases in which eigenvalues only or eigenvalues
+*     and eigenvectors of a full symmetric matrix (which was reduced to
+*     tridiagonal form) are desired.
+*
+            IF( ICOMPQ.EQ.2 ) THEN
+               CALL DLAED1( MATSIZ, D( SUBMAT ), Q( SUBMAT, SUBMAT ),
+     $                      LDQ, IWORK( INDXQ+SUBMAT ),
+     $                      E( SUBMAT+MSD2-1 ), MSD2, WORK,
+     $                      IWORK( SUBPBS+1 ), INFO )
+            ELSE
+               CALL DLAED7( ICOMPQ, MATSIZ, QSIZ, TLVLS, CURLVL, CURPRB,
+     $                      D( SUBMAT ), QSTORE( 1, SUBMAT ), LDQS,
+     $                      IWORK( INDXQ+SUBMAT ), E( SUBMAT+MSD2-1 ),
+     $                      MSD2, WORK( IQ ), IWORK( IQPTR ),
+     $                      IWORK( IPRMPT ), IWORK( IPERM ),
+     $                      IWORK( IGIVPT ), IWORK( IGIVCL ),
+     $                      WORK( IGIVNM ), WORK( IWREM ),
+     $                      IWORK( SUBPBS+1 ), INFO )
+            END IF
+            IF( INFO.NE.0 )
+     $         GO TO 130
+            IWORK( I / 2+1 ) = IWORK( I+2 )
+   90    CONTINUE
+         SUBPBS = SUBPBS / 2
+         CURLVL = CURLVL + 1
+         GO TO 80
+      END IF
+*
+*     end while
+*
+*     Re-merge the eigenvalues/vectors which were deflated at the final
+*     merge step.
+*
+      IF( ICOMPQ.EQ.1 ) THEN
+         DO 100 I = 1, N
+            J = IWORK( INDXQ+I )
+            WORK( I ) = D( J )
+            CALL DCOPY( QSIZ, QSTORE( 1, J ), 1, Q( 1, I ), 1 )
+  100    CONTINUE
+         CALL DCOPY( N, WORK, 1, D, 1 )
+      ELSE IF( ICOMPQ.EQ.2 ) THEN
+         DO 110 I = 1, N
+            J = IWORK( INDXQ+I )
+            WORK( I ) = D( J )
+            CALL DCOPY( N, Q( 1, J ), 1, WORK( N*I+1 ), 1 )
+  110    CONTINUE
+         CALL DCOPY( N, WORK, 1, D, 1 )
+         CALL DLACPY( 'A', N, N, WORK( N+1 ), N, Q, LDQ )
+      ELSE
+         DO 120 I = 1, N
+            J = IWORK( INDXQ+I )
+            WORK( I ) = D( J )
+  120    CONTINUE
+         CALL DCOPY( N, WORK, 1, D, 1 )
+      END IF
+      GO TO 140
+*
+  130 CONTINUE
+      INFO = SUBMAT*( N+1 ) + SUBMAT + MATSIZ - 1
+*
+  140 CONTINUE
+      RETURN
+*
+*     End of DLAED0
+*
+      END
diff --git a/costa/native/external/lapack/dlaed1.f b/costa/native/external/lapack/dlaed1.f
new file mode 100644
index 000000000..674ee80c5
--- /dev/null
+++ b/costa/native/external/lapack/dlaed1.f
@@ -0,0 +1,196 @@
+      SUBROUTINE DLAED1( N, D, Q, LDQ, INDXQ, RHO, CUTPNT, WORK, IWORK,
+     $                   INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            CUTPNT, INFO, LDQ, N
+      DOUBLE PRECISION   RHO
+*     ..
+*     .. Array Arguments ..
+      INTEGER            INDXQ( * ), IWORK( * )
+      DOUBLE PRECISION   D( * ), Q( LDQ, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLAED1 computes the updated eigensystem of a diagonal
+*  matrix after modification by a rank-one symmetric matrix.  This
+*  routine is used only for the eigenproblem which requires all
+*  eigenvalues and eigenvectors of a tridiagonal matrix.  DLAED7 handles
+*  the case in which eigenvalues only or eigenvalues and eigenvectors
+*  of a full symmetric matrix (which was reduced to tridiagonal form)
+*  are desired.
+*
+*    T = Q(in) ( D(in) + RHO * Z*Z' ) Q'(in) = Q(out) * D(out) * Q'(out)
+*
+*     where Z = Q'u, u is a vector of length N with ones in the
+*     CUTPNT and CUTPNT + 1 th elements and zeros elsewhere.
+*
+*     The eigenvectors of the original matrix are stored in Q, and the
+*     eigenvalues are in D.  The algorithm consists of three stages:
+*
+*        The first stage consists of deflating the size of the problem
+*        when there are multiple eigenvalues or if there is a zero in
+*        the Z vector.  For each such occurence the dimension of the
+*        secular equation problem is reduced by one.  This stage is
+*        performed by the routine DLAED2.
+*
+*        The second stage consists of calculating the updated
+*        eigenvalues. This is done by finding the roots of the secular
+*        equation via the routine DLAED4 (as called by DLAED3).
+*        This routine also calculates the eigenvectors of the current
+*        problem.
+*
+*        The final stage consists of computing the updated eigenvectors
+*        directly using the updated eigenvalues.  The eigenvectors for
+*        the current problem are multiplied with the eigenvectors from
+*        the overall problem.
+*
+*  Arguments
+*  =========
+*
+*  N      (input) INTEGER
+*         The dimension of the symmetric tridiagonal matrix.  N >= 0.
+*
+*  D      (input/output) DOUBLE PRECISION array, dimension (N)
+*         On entry, the eigenvalues of the rank-1-perturbed matrix.
+*         On exit, the eigenvalues of the repaired matrix.
+*
+*  Q      (input/output) DOUBLE PRECISION array, dimension (LDQ,N)
+*         On entry, the eigenvectors of the rank-1-perturbed matrix.
+*         On exit, the eigenvectors of the repaired tridiagonal matrix.
+*
+*  LDQ    (input) INTEGER
+*         The leading dimension of the array Q.  LDQ >= max(1,N).
+*
+*  INDXQ  (input/output) INTEGER array, dimension (N)
+*         On entry, the permutation which separately sorts the two
+*         subproblems in D into ascending order.
+*         On exit, the permutation which will reintegrate the
+*         subproblems back into sorted order,
+*         i.e. D( INDXQ( I = 1, N ) ) will be in ascending order.
+*
+*  RHO    (input) DOUBLE PRECISION
+*         The subdiagonal entry used to create the rank-1 modification.
+*
+*  CUTPNT (input) INTEGER
+*         The location of the last eigenvalue in the leading sub-matrix.
+*         min(1,N) <= CUTPNT <= N/2.
+*
+*  WORK   (workspace) DOUBLE PRECISION array, dimension (4*N + N**2)
+*
+*  IWORK  (workspace) INTEGER array, dimension (4*N)
+*
+*  INFO   (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          > 0:  if INFO = 1, an eigenvalue did not converge
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Jeff Rutter, Computer Science Division, University of California
+*     at Berkeley, USA
+*  Modified by Francoise Tisseur, University of Tennessee.
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      INTEGER            COLTYP, I, IDLMDA, INDX, INDXC, INDXP, IQ2, IS,
+     $                   IW, IZ, K, N1, N2, ZPP1
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DCOPY, DLAED2, DLAED3, DLAMRG, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+*
+      IF( N.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN
+         INFO = -4
+      ELSE IF( MIN( 1, N / 2 ).GT.CUTPNT .OR. ( N / 2 ).LT.CUTPNT ) THEN
+         INFO = -7
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DLAED1', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     The following values are integer pointers which indicate
+*     the portion of the workspace
+*     used by a particular array in DLAED2 and DLAED3.
+*
+      IZ = 1
+      IDLMDA = IZ + N
+      IW = IDLMDA + N
+      IQ2 = IW + N
+*
+      INDX = 1
+      INDXC = INDX + N
+      COLTYP = INDXC + N
+      INDXP = COLTYP + N
+*
+*
+*     Form the z-vector which consists of the last row of Q_1 and the
+*     first row of Q_2.
+*
+      CALL DCOPY( CUTPNT, Q( CUTPNT, 1 ), LDQ, WORK( IZ ), 1 )
+      ZPP1 = CUTPNT + 1
+      CALL DCOPY( N-CUTPNT, Q( ZPP1, ZPP1 ), LDQ, WORK( IZ+CUTPNT ), 1 )
+*
+*     Deflate eigenvalues.
+*
+      CALL DLAED2( K, N, CUTPNT, D, Q, LDQ, INDXQ, RHO, WORK( IZ ),
+     $             WORK( IDLMDA ), WORK( IW ), WORK( IQ2 ),
+     $             IWORK( INDX ), IWORK( INDXC ), IWORK( INDXP ),
+     $             IWORK( COLTYP ), INFO )
+*
+      IF( INFO.NE.0 )
+     $   GO TO 20
+*
+*     Solve Secular Equation.
+*
+      IF( K.NE.0 ) THEN
+         IS = ( IWORK( COLTYP )+IWORK( COLTYP+1 ) )*CUTPNT +
+     $        ( IWORK( COLTYP+1 )+IWORK( COLTYP+2 ) )*( N-CUTPNT ) + IQ2
+         CALL DLAED3( K, N, CUTPNT, D, Q, LDQ, RHO, WORK( IDLMDA ),
+     $                WORK( IQ2 ), IWORK( INDXC ), IWORK( COLTYP ),
+     $                WORK( IW ), WORK( IS ), INFO )
+         IF( INFO.NE.0 )
+     $      GO TO 20
+*
+*     Prepare the INDXQ sorting permutation.
+*
+         N1 = K
+         N2 = N - K
+         CALL DLAMRG( N1, N2, D, 1, -1, INDXQ )
+      ELSE
+         DO 10 I = 1, N
+            INDXQ( I ) = I
+   10    CONTINUE
+      END IF
+*
+   20 CONTINUE
+      RETURN
+*
+*     End of DLAED1
+*
+      END
diff --git a/costa/native/external/lapack/dlaed2.f b/costa/native/external/lapack/dlaed2.f
new file mode 100644
index 000000000..17e13505e
--- /dev/null
+++ b/costa/native/external/lapack/dlaed2.f
@@ -0,0 +1,435 @@
+      SUBROUTINE DLAED2( K, N, N1, D, Q, LDQ, INDXQ, RHO, Z, DLAMDA, W,
+     $                   Q2, INDX, INDXC, INDXP, COLTYP, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, K, LDQ, N, N1
+      DOUBLE PRECISION   RHO
+*     ..
+*     .. Array Arguments ..
+      INTEGER            COLTYP( * ), INDX( * ), INDXC( * ), INDXP( * ),
+     $                   INDXQ( * )
+      DOUBLE PRECISION   D( * ), DLAMDA( * ), Q( LDQ, * ), Q2( * ),
+     $                   W( * ), Z( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLAED2 merges the two sets of eigenvalues together into a single
+*  sorted set.  Then it tries to deflate the size of the problem.
+*  There are two ways in which deflation can occur:  when two or more
+*  eigenvalues are close together or if there is a tiny entry in the
+*  Z vector.  For each such occurrence the order of the related secular
+*  equation problem is reduced by one.
+*
+*  Arguments
+*  =========
+*
+*  K      (output) INTEGER
+*         The number of non-deflated eigenvalues, and the order of the
+*         related secular equation. 0 <= K <=N.
+*
+*  N      (input) INTEGER
+*         The dimension of the symmetric tridiagonal matrix.  N >= 0.
+*
+*  N1     (input) INTEGER
+*         The location of the last eigenvalue in the leading sub-matrix.
+*         min(1,N) <= N1 <= N/2.
+*
+*  D      (input/output) DOUBLE PRECISION array, dimension (N)
+*         On entry, D contains the eigenvalues of the two submatrices to
+*         be combined.
+*         On exit, D contains the trailing (N-K) updated eigenvalues
+*         (those which were deflated) sorted into increasing order.
+*
+*  Q      (input/output) DOUBLE PRECISION array, dimension (LDQ, N)
+*         On entry, Q contains the eigenvectors of two submatrices in
+*         the two square blocks with corners at (1,1), (N1,N1)
+*         and (N1+1, N1+1), (N,N).
+*         On exit, Q contains the trailing (N-K) updated eigenvectors
+*         (those which were deflated) in its last N-K columns.
+*
+*  LDQ    (input) INTEGER
+*         The leading dimension of the array Q.  LDQ >= max(1,N).
+*
+*  INDXQ  (input/output) INTEGER array, dimension (N)
+*         The permutation which separately sorts the two sub-problems
+*         in D into ascending order.  Note that elements in the second
+*         half of this permutation must first have N1 added to their
+*         values. Destroyed on exit.
+*
+*  RHO    (input/output) DOUBLE PRECISION
+*         On entry, the off-diagonal element associated with the rank-1
+*         cut which originally split the two submatrices which are now
+*         being recombined.
+*         On exit, RHO has been modified to the value required by
+*         DLAED3.
+*
+*  Z      (input) DOUBLE PRECISION array, dimension (N)
+*         On entry, Z contains the updating vector (the last
+*         row of the first sub-eigenvector matrix and the first row of
+*         the second sub-eigenvector matrix).
+*         On exit, the contents of Z have been destroyed by the updating
+*         process.
+*
+*  DLAMDA (output) DOUBLE PRECISION array, dimension (N)
+*         A copy of the first K eigenvalues which will be used by
+*         DLAED3 to form the secular equation.
+*
+*  W      (output) DOUBLE PRECISION array, dimension (N)
+*         The first k values of the final deflation-altered z-vector
+*         which will be passed to DLAED3.
+*
+*  Q2     (output) DOUBLE PRECISION array, dimension (N1**2+(N-N1)**2)
+*         A copy of the first K eigenvectors which will be used by
+*         DLAED3 in a matrix multiply (DGEMM) to solve for the new
+*         eigenvectors.
+*
+*  INDX   (workspace) INTEGER array, dimension (N)
+*         The permutation used to sort the contents of DLAMDA into
+*         ascending order.
+*
+*  INDXC  (output) INTEGER array, dimension (N)
+*         The permutation used to arrange the columns of the deflated
+*         Q matrix into three groups:  the first group contains non-zero
+*         elements only at and above N1, the second contains
+*         non-zero elements only below N1, and the third is dense.
+*
+*  INDXP  (workspace) INTEGER array, dimension (N)
+*         The permutation used to place deflated values of D at the end
+*         of the array.  INDXP(1:K) points to the nondeflated D-values
+*         and INDXP(K+1:N) points to the deflated eigenvalues.
+*
+*  COLTYP (workspace/output) INTEGER array, dimension (N)
+*         During execution, a label which will indicate which of the
+*         following types a column in the Q2 matrix is:
+*         1 : non-zero in the upper half only;
+*         2 : dense;
+*         3 : non-zero in the lower half only;
+*         4 : deflated.
+*         On exit, COLTYP(i) is the number of columns of type i,
+*         for i=1 to 4 only.
+*
+*  INFO   (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Jeff Rutter, Computer Science Division, University of California
+*     at Berkeley, USA
+*  Modified by Francoise Tisseur, University of Tennessee.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   MONE, ZERO, ONE, TWO, EIGHT
+      PARAMETER          ( MONE = -1.0D0, ZERO = 0.0D0, ONE = 1.0D0,
+     $                   TWO = 2.0D0, EIGHT = 8.0D0 )
+*     ..
+*     .. Local Arrays ..
+      INTEGER            CTOT( 4 ), PSM( 4 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            CT, I, IMAX, IQ1, IQ2, J, JMAX, JS, K2, N1P1,
+     $                   N2, NJ, PJ
+      DOUBLE PRECISION   C, EPS, S, T, TAU, TOL
+*     ..
+*     .. External Functions ..
+      INTEGER            IDAMAX
+      DOUBLE PRECISION   DLAMCH, DLAPY2
+      EXTERNAL           IDAMAX, DLAMCH, DLAPY2
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DCOPY, DLACPY, DLAMRG, DROT, DSCAL, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+*
+      IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN
+         INFO = -6
+      ELSE IF( MIN( 1, ( N / 2 ) ).GT.N1 .OR. ( N / 2 ).LT.N1 ) THEN
+         INFO = -3
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DLAED2', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      N2 = N - N1
+      N1P1 = N1 + 1
+*
+      IF( RHO.LT.ZERO ) THEN
+         CALL DSCAL( N2, MONE, Z( N1P1 ), 1 )
+      END IF
+*
+*     Normalize z so that norm(z) = 1.  Since z is the concatenation of
+*     two normalized vectors, norm2(z) = sqrt(2).
+*
+      T = ONE / SQRT( TWO )
+      CALL DSCAL( N, T, Z, 1 )
+*
+*     RHO = ABS( norm(z)**2 * RHO )
+*
+      RHO = ABS( TWO*RHO )
+*
+*     Sort the eigenvalues into increasing order
+*
+      DO 10 I = N1P1, N
+         INDXQ( I ) = INDXQ( I ) + N1
+   10 CONTINUE
+*
+*     re-integrate the deflated parts from the last pass
+*
+      DO 20 I = 1, N
+         DLAMDA( I ) = D( INDXQ( I ) )
+   20 CONTINUE
+      CALL DLAMRG( N1, N2, DLAMDA, 1, 1, INDXC )
+      DO 30 I = 1, N
+         INDX( I ) = INDXQ( INDXC( I ) )
+   30 CONTINUE
+*
+*     Calculate the allowable deflation tolerance
+*
+      IMAX = IDAMAX( N, Z, 1 )
+      JMAX = IDAMAX( N, D, 1 )
+      EPS = DLAMCH( 'Epsilon' )
+      TOL = EIGHT*EPS*MAX( ABS( D( JMAX ) ), ABS( Z( IMAX ) ) )
+*
+*     If the rank-1 modifier is small enough, no more needs to be done
+*     except to reorganize Q so that its columns correspond with the
+*     elements in D.
+*
+      IF( RHO*ABS( Z( IMAX ) ).LE.TOL ) THEN
+         K = 0
+         IQ2 = 1
+         DO 40 J = 1, N
+            I = INDX( J )
+            CALL DCOPY( N, Q( 1, I ), 1, Q2( IQ2 ), 1 )
+            DLAMDA( J ) = D( I )
+            IQ2 = IQ2 + N
+   40    CONTINUE
+         CALL DLACPY( 'A', N, N, Q2, N, Q, LDQ )
+         CALL DCOPY( N, DLAMDA, 1, D, 1 )
+         GO TO 190
+      END IF
+*
+*     If there are multiple eigenvalues then the problem deflates.  Here
+*     the number of equal eigenvalues are found.  As each equal
+*     eigenvalue is found, an elementary reflector is computed to rotate
+*     the corresponding eigensubspace so that the corresponding
+*     components of Z are zero in this new basis.
+*
+      DO 50 I = 1, N1
+         COLTYP( I ) = 1
+   50 CONTINUE
+      DO 60 I = N1P1, N
+         COLTYP( I ) = 3
+   60 CONTINUE
+*
+*
+      K = 0
+      K2 = N + 1
+      DO 70 J = 1, N
+         NJ = INDX( J )
+         IF( RHO*ABS( Z( NJ ) ).LE.TOL ) THEN
+*
+*           Deflate due to small z component.
+*
+            K2 = K2 - 1
+            COLTYP( NJ ) = 4
+            INDXP( K2 ) = NJ
+            IF( J.EQ.N )
+     $         GO TO 100
+         ELSE
+            PJ = NJ
+            GO TO 80
+         END IF
+   70 CONTINUE
+   80 CONTINUE
+      J = J + 1
+      NJ = INDX( J )
+      IF( J.GT.N )
+     $   GO TO 100
+      IF( RHO*ABS( Z( NJ ) ).LE.TOL ) THEN
+*
+*        Deflate due to small z component.
+*
+         K2 = K2 - 1
+         COLTYP( NJ ) = 4
+         INDXP( K2 ) = NJ
+      ELSE
+*
+*        Check if eigenvalues are close enough to allow deflation.
+*
+         S = Z( PJ )
+         C = Z( NJ )
+*
+*        Find sqrt(a**2+b**2) without overflow or
+*        destructive underflow.
+*
+         TAU = DLAPY2( C, S )
+         T = D( NJ ) - D( PJ )
+         C = C / TAU
+         S = -S / TAU
+         IF( ABS( T*C*S ).LE.TOL ) THEN
+*
+*           Deflation is possible.
+*
+            Z( NJ ) = TAU
+            Z( PJ ) = ZERO
+            IF( COLTYP( NJ ).NE.COLTYP( PJ ) )
+     $         COLTYP( NJ ) = 2
+            COLTYP( PJ ) = 4
+            CALL DROT( N, Q( 1, PJ ), 1, Q( 1, NJ ), 1, C, S )
+            T = D( PJ )*C**2 + D( NJ )*S**2
+            D( NJ ) = D( PJ )*S**2 + D( NJ )*C**2
+            D( PJ ) = T
+            K2 = K2 - 1
+            I = 1
+   90       CONTINUE
+            IF( K2+I.LE.N ) THEN
+               IF( D( PJ ).LT.D( INDXP( K2+I ) ) ) THEN
+                  INDXP( K2+I-1 ) = INDXP( K2+I )
+                  INDXP( K2+I ) = PJ
+                  I = I + 1
+                  GO TO 90
+               ELSE
+                  INDXP( K2+I-1 ) = PJ
+               END IF
+            ELSE
+               INDXP( K2+I-1 ) = PJ
+            END IF
+            PJ = NJ
+         ELSE
+            K = K + 1
+            DLAMDA( K ) = D( PJ )
+            W( K ) = Z( PJ )
+            INDXP( K ) = PJ
+            PJ = NJ
+         END IF
+      END IF
+      GO TO 80
+  100 CONTINUE
+*
+*     Record the last eigenvalue.
+*
+      K = K + 1
+      DLAMDA( K ) = D( PJ )
+      W( K ) = Z( PJ )
+      INDXP( K ) = PJ
+*
+*     Count up the total number of the various types of columns, then
+*     form a permutation which positions the four column types into
+*     four uniform groups (although one or more of these groups may be
+*     empty).
+*
+      DO 110 J = 1, 4
+         CTOT( J ) = 0
+  110 CONTINUE
+      DO 120 J = 1, N
+         CT = COLTYP( J )
+         CTOT( CT ) = CTOT( CT ) + 1
+  120 CONTINUE
+*
+*     PSM(*) = Position in SubMatrix (of types 1 through 4)
+*
+      PSM( 1 ) = 1
+      PSM( 2 ) = 1 + CTOT( 1 )
+      PSM( 3 ) = PSM( 2 ) + CTOT( 2 )
+      PSM( 4 ) = PSM( 3 ) + CTOT( 3 )
+      K = N - CTOT( 4 )
+*
+*     Fill out the INDXC array so that the permutation which it induces
+*     will place all type-1 columns first, all type-2 columns next,
+*     then all type-3's, and finally all type-4's.
+*
+      DO 130 J = 1, N
+         JS = INDXP( J )
+         CT = COLTYP( JS )
+         INDX( PSM( CT ) ) = JS
+         INDXC( PSM( CT ) ) = J
+         PSM( CT ) = PSM( CT ) + 1
+  130 CONTINUE
+*
+*     Sort the eigenvalues and corresponding eigenvectors into DLAMDA
+*     and Q2 respectively.  The eigenvalues/vectors which were not
+*     deflated go into the first K slots of DLAMDA and Q2 respectively,
+*     while those which were deflated go into the last N - K slots.
+*
+      I = 1
+      IQ1 = 1
+      IQ2 = 1 + ( CTOT( 1 )+CTOT( 2 ) )*N1
+      DO 140 J = 1, CTOT( 1 )
+         JS = INDX( I )
+         CALL DCOPY( N1, Q( 1, JS ), 1, Q2( IQ1 ), 1 )
+         Z( I ) = D( JS )
+         I = I + 1
+         IQ1 = IQ1 + N1
+  140 CONTINUE
+*
+      DO 150 J = 1, CTOT( 2 )
+         JS = INDX( I )
+         CALL DCOPY( N1, Q( 1, JS ), 1, Q2( IQ1 ), 1 )
+         CALL DCOPY( N2, Q( N1+1, JS ), 1, Q2( IQ2 ), 1 )
+         Z( I ) = D( JS )
+         I = I + 1
+         IQ1 = IQ1 + N1
+         IQ2 = IQ2 + N2
+  150 CONTINUE
+*
+      DO 160 J = 1, CTOT( 3 )
+         JS = INDX( I )
+         CALL DCOPY( N2, Q( N1+1, JS ), 1, Q2( IQ2 ), 1 )
+         Z( I ) = D( JS )
+         I = I + 1
+         IQ2 = IQ2 + N2
+  160 CONTINUE
+*
+      IQ1 = IQ2
+      DO 170 J = 1, CTOT( 4 )
+         JS = INDX( I )
+         CALL DCOPY( N, Q( 1, JS ), 1, Q2( IQ2 ), 1 )
+         IQ2 = IQ2 + N
+         Z( I ) = D( JS )
+         I = I + 1
+  170 CONTINUE
+*
+*     The deflated eigenvalues and their corresponding vectors go back
+*     into the last N - K slots of D and Q respectively.
+*
+      CALL DLACPY( 'A', N, CTOT( 4 ), Q2( IQ1 ), N, Q( 1, K+1 ), LDQ )
+      CALL DCOPY( N-K, Z( K+1 ), 1, D( K+1 ), 1 )
+*
+*     Copy CTOT into COLTYP for referencing in DLAED3.
+*
+      DO 180 J = 1, 4
+         COLTYP( J ) = CTOT( J )
+  180 CONTINUE
+*
+  190 CONTINUE
+      RETURN
+*
+*     End of DLAED2
+*
+      END
diff --git a/costa/native/external/lapack/dlaed3.f b/costa/native/external/lapack/dlaed3.f
new file mode 100644
index 000000000..5f14ab958
--- /dev/null
+++ b/costa/native/external/lapack/dlaed3.f
@@ -0,0 +1,265 @@
+      SUBROUTINE DLAED3( K, N, N1, D, Q, LDQ, RHO, DLAMDA, Q2, INDX,
+     $                   CTOT, W, S, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
+*     Courant Institute, NAG Ltd., and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, K, LDQ, N, N1
+      DOUBLE PRECISION   RHO
+*     ..
+*     .. Array Arguments ..
+      INTEGER            CTOT( * ), INDX( * )
+      DOUBLE PRECISION   D( * ), DLAMDA( * ), Q( LDQ, * ), Q2( * ),
+     $                   S( * ), W( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLAED3 finds the roots of the secular equation, as defined by the
+*  values in D, W, and RHO, between 1 and K.  It makes the
+*  appropriate calls to DLAED4 and then updates the eigenvectors by
+*  multiplying the matrix of eigenvectors of the pair of eigensystems
+*  being combined by the matrix of eigenvectors of the K-by-K system
+*  which is solved here.
+*
+*  This code makes very mild assumptions about floating point
+*  arithmetic. It will work on machines with a guard digit in
+*  add/subtract, or on those binary machines without guard digits
+*  which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2.
+*  It could conceivably fail on hexadecimal or decimal machines
+*  without guard digits, but we know of none.
+*
+*  Arguments
+*  =========
+*
+*  K       (input) INTEGER
+*          The number of terms in the rational function to be solved by
+*          DLAED4.  K >= 0.
+*
+*  N       (input) INTEGER
+*          The number of rows and columns in the Q matrix.
+*          N >= K (deflation may result in N>K).
+*
+*  N1      (input) INTEGER
+*          The location of the last eigenvalue in the leading submatrix.
+*          min(1,N) <= N1 <= N/2.
+*
+*  D       (output) DOUBLE PRECISION array, dimension (N)
+*          D(I) contains the updated eigenvalues for
+*          1 <= I <= K.
+*
+*  Q       (output) DOUBLE PRECISION array, dimension (LDQ,N)
+*          Initially the first K columns are used as workspace.
+*          On output the columns 1 to K contain
+*          the updated eigenvectors.
+*
+*  LDQ     (input) INTEGER
+*          The leading dimension of the array Q.  LDQ >= max(1,N).
+*
+*  RHO     (input) DOUBLE PRECISION
+*          The value of the parameter in the rank one update equation.
+*          RHO >= 0 required.
+*
+*  DLAMDA  (input/output) DOUBLE PRECISION array, dimension (K)
+*          The first K elements of this array contain the old roots
+*          of the deflated updating problem.  These are the poles
+*          of the secular equation. May be changed on output by
+*          having lowest order bit set to zero on Cray X-MP, Cray Y-MP,
+*          Cray-2, or Cray C-90, as described above.
+*
+*  Q2      (input) DOUBLE PRECISION array, dimension (LDQ2, N)
+*          The first K columns of this matrix contain the non-deflated
+*          eigenvectors for the split problem.
+*
+*  INDX    (input) INTEGER array, dimension (N)
+*          The permutation used to arrange the columns of the deflated
+*          Q matrix into three groups (see DLAED2).
+*          The rows of the eigenvectors found by DLAED4 must be likewise
+*          permuted before the matrix multiply can take place.
+*
+*  CTOT    (input) INTEGER array, dimension (4)
+*          A count of the total number of the various types of columns
+*          in Q, as described in INDX.  The fourth column type is any
+*          column which has been deflated.
+*
+*  W       (input/output) DOUBLE PRECISION array, dimension (K)
+*          The first K elements of this array contain the components
+*          of the deflation-adjusted updating vector. Destroyed on
+*          output.
+*
+*  S       (workspace) DOUBLE PRECISION array, dimension (N1 + 1)*K
+*          Will contain the eigenvectors of the repaired matrix which
+*          will be multiplied by the previously accumulated eigenvectors
+*          to update the system.
+*
+*  LDS     (input) INTEGER
+*          The leading dimension of S.  LDS >= max(1,K).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          > 0:  if INFO = 1, an eigenvalue did not converge
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Jeff Rutter, Computer Science Division, University of California
+*     at Berkeley, USA
+*  Modified by Francoise Tisseur, University of Tennessee.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D0, ZERO = 0.0D0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, II, IQ2, J, N12, N2, N23
+      DOUBLE PRECISION   TEMP
+*     ..
+*     .. External Functions ..
+      DOUBLE PRECISION   DLAMC3, DNRM2
+      EXTERNAL           DLAMC3, DNRM2
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DCOPY, DGEMM, DLACPY, DLAED4, DLASET, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, SIGN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+*
+      IF( K.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.K ) THEN
+         INFO = -2
+      ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN
+         INFO = -6
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DLAED3', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( K.EQ.0 )
+     $   RETURN
+*
+*     Modify values DLAMDA(i) to make sure all DLAMDA(i)-DLAMDA(j) can
+*     be computed with high relative accuracy (barring over/underflow).
+*     This is a problem on machines without a guard digit in
+*     add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2).
+*     The following code replaces DLAMDA(I) by 2*DLAMDA(I)-DLAMDA(I),
+*     which on any of these machines zeros out the bottommost
+*     bit of DLAMDA(I) if it is 1; this makes the subsequent
+*     subtractions DLAMDA(I)-DLAMDA(J) unproblematic when cancellation
+*     occurs. On binary machines with a guard digit (almost all
+*     machines) it does not change DLAMDA(I) at all. On hexadecimal
+*     and decimal machines with a guard digit, it slightly
+*     changes the bottommost bits of DLAMDA(I). It does not account
+*     for hexadecimal or decimal machines without guard digits
+*     (we know of none). We use a subroutine call to compute
+*     2*DLAMBDA(I) to prevent optimizing compilers from eliminating
+*     this code.
+*
+      DO 10 I = 1, K
+         DLAMDA( I ) = DLAMC3( DLAMDA( I ), DLAMDA( I ) ) - DLAMDA( I )
+   10 CONTINUE
+*
+      DO 20 J = 1, K
+         CALL DLAED4( K, J, DLAMDA, W, Q( 1, J ), RHO, D( J ), INFO )
+*
+*        If the zero finder fails, the computation is terminated.
+*
+         IF( INFO.NE.0 )
+     $      GO TO 120
+   20 CONTINUE
+*
+      IF( K.EQ.1 )
+     $   GO TO 110
+      IF( K.EQ.2 ) THEN
+         DO 30 J = 1, K
+            W( 1 ) = Q( 1, J )
+            W( 2 ) = Q( 2, J )
+            II = INDX( 1 )
+            Q( 1, J ) = W( II )
+            II = INDX( 2 )
+            Q( 2, J ) = W( II )
+   30    CONTINUE
+         GO TO 110
+      END IF
+*
+*     Compute updated W.
+*
+      CALL DCOPY( K, W, 1, S, 1 )
+*
+*     Initialize W(I) = Q(I,I)
+*
+      CALL DCOPY( K, Q, LDQ+1, W, 1 )
+      DO 60 J = 1, K
+         DO 40 I = 1, J - 1
+            W( I ) = W( I )*( Q( I, J ) / ( DLAMDA( I )-DLAMDA( J ) ) )
+   40    CONTINUE
+         DO 50 I = J + 1, K
+            W( I ) = W( I )*( Q( I, J ) / ( DLAMDA( I )-DLAMDA( J ) ) )
+   50    CONTINUE
+   60 CONTINUE
+      DO 70 I = 1, K
+         W( I ) = SIGN( SQRT( -W( I ) ), S( I ) )
+   70 CONTINUE
+*
+*     Compute eigenvectors of the modified rank-1 modification.
+*
+      DO 100 J = 1, K
+         DO 80 I = 1, K
+            S( I ) = W( I ) / Q( I, J )
+   80    CONTINUE
+         TEMP = DNRM2( K, S, 1 )
+         DO 90 I = 1, K
+            II = INDX( I )
+            Q( I, J ) = S( II ) / TEMP
+   90    CONTINUE
+  100 CONTINUE
+*
+*     Compute the updated eigenvectors.
+*
+  110 CONTINUE
+*
+      N2 = N - N1
+      N12 = CTOT( 1 ) + CTOT( 2 )
+      N23 = CTOT( 2 ) + CTOT( 3 )
+*
+      CALL DLACPY( 'A', N23, K, Q( CTOT( 1 )+1, 1 ), LDQ, S, N23 )
+      IQ2 = N1*N12 + 1
+      IF( N23.NE.0 ) THEN
+         CALL DGEMM( 'N', 'N', N2, K, N23, ONE, Q2( IQ2 ), N2, S, N23,
+     $               ZERO, Q( N1+1, 1 ), LDQ )
+      ELSE
+         CALL DLASET( 'A', N2, K, ZERO, ZERO, Q( N1+1, 1 ), LDQ )
+      END IF
+*
+      CALL DLACPY( 'A', N12, K, Q, LDQ, S, N12 )
+      IF( N12.NE.0 ) THEN
+         CALL DGEMM( 'N', 'N', N1, K, N12, ONE, Q2, N1, S, N12, ZERO, Q,
+     $               LDQ )
+      ELSE
+         CALL DLASET( 'A', N1, K, ZERO, ZERO, Q( 1, 1 ), LDQ )
+      END IF
+*
+*
+  120 CONTINUE
+      RETURN
+*
+*     End of DLAED3
+*
+      END
diff --git a/costa/native/external/lapack/dlaed4.f b/costa/native/external/lapack/dlaed4.f
new file mode 100644
index 000000000..b82cb1146
--- /dev/null
+++ b/costa/native/external/lapack/dlaed4.f
@@ -0,0 +1,846 @@
+      SUBROUTINE DLAED4( N, I, D, Z, DELTA, RHO, DLAM, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
+*     Courant Institute, NAG Ltd., and Rice University
+*     December 23, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            I, INFO, N
+      DOUBLE PRECISION   DLAM, RHO
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   D( * ), DELTA( * ), Z( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  This subroutine computes the I-th updated eigenvalue of a symmetric
+*  rank-one modification to a diagonal matrix whose elements are
+*  given in the array d, and that
+*
+*             D(i) < D(j)  for  i < j
+*
+*  and that RHO > 0.  This is arranged by the calling routine, and is
+*  no loss in generality.  The rank-one modified system is thus
+*
+*             diag( D )  +  RHO *  Z * Z_transpose.
+*
+*  where we assume the Euclidean norm of Z is 1.
+*
+*  The method consists of approximating the rational functions in the
+*  secular equation by simpler interpolating rational functions.
+*
+*  Arguments
+*  =========
+*
+*  N      (input) INTEGER
+*         The length of all arrays.
+*
+*  I      (input) INTEGER
+*         The index of the eigenvalue to be computed.  1 <= I <= N.
+*
+*  D      (input) DOUBLE PRECISION array, dimension (N)
+*         The original eigenvalues.  It is assumed that they are in
+*         order, D(I) < D(J)  for I < J.
+*
+*  Z      (input) DOUBLE PRECISION array, dimension (N)
+*         The components of the updating vector.
+*
+*  DELTA  (output) DOUBLE PRECISION array, dimension (N)
+*         If N .ne. 1, DELTA contains (D(j) - lambda_I) in its  j-th
+*         component.  If N = 1, then DELTA(1) = 1.  The vector DELTA
+*         contains the information necessary to construct the
+*         eigenvectors.
+*
+*  RHO    (input) DOUBLE PRECISION
+*         The scalar in the symmetric updating formula.
+*
+*  DLAM   (output) DOUBLE PRECISION
+*         The computed lambda_I, the I-th updated eigenvalue.
+*
+*  INFO   (output) INTEGER
+*         = 0:  successful exit
+*         > 0:  if INFO = 1, the updating process failed.
+*
+*  Internal Parameters
+*  ===================
+*
+*  Logical variable ORGATI (origin-at-i?) is used for distinguishing
+*  whether D(i) or D(i+1) is treated as the origin.
+*
+*            ORGATI = .true.    origin at i
+*            ORGATI = .false.   origin at i+1
+*
+*   Logical variable SWTCH3 (switch-for-3-poles?) is for noting
+*   if we are working with THREE poles!
+*
+*   MAXIT is the maximum number of iterations allowed for each
+*   eigenvalue.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Ren-Cang Li, Computer Science Division, University of California
+*     at Berkeley, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            MAXIT
+      PARAMETER          ( MAXIT = 30 )
+      DOUBLE PRECISION   ZERO, ONE, TWO, THREE, FOUR, EIGHT, TEN
+      PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0, TWO = 2.0D0,
+     $                   THREE = 3.0D0, FOUR = 4.0D0, EIGHT = 8.0D0,
+     $                   TEN = 10.0D0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            ORGATI, SWTCH, SWTCH3
+      INTEGER            II, IIM1, IIP1, IP1, ITER, J, NITER
+      DOUBLE PRECISION   A, B, C, DEL, DLTLB, DLTUB, DPHI, DPSI, DW,
+     $                   EPS, ERRETM, ETA, MIDPT, PHI, PREW, PSI,
+     $                   RHOINV, TAU, TEMP, TEMP1, W
+*     ..
+*     .. Local Arrays ..
+      DOUBLE PRECISION   ZZ( 3 )
+*     ..
+*     .. External Functions ..
+      DOUBLE PRECISION   DLAMCH
+      EXTERNAL           DLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLAED5, DLAED6
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Since this routine is called in an inner loop, we do no argument
+*     checking.
+*
+*     Quick return for N=1 and 2.
+*
+      INFO = 0
+      IF( N.EQ.1 ) THEN
+*
+*         Presumably, I=1 upon entry
+*
+         DLAM = D( 1 ) + RHO*Z( 1 )*Z( 1 )
+         DELTA( 1 ) = ONE
+         RETURN
+      END IF
+      IF( N.EQ.2 ) THEN
+         CALL DLAED5( I, D, Z, DELTA, RHO, DLAM )
+         RETURN
+      END IF
+*
+*     Compute machine epsilon
+*
+      EPS = DLAMCH( 'Epsilon' )
+      RHOINV = ONE / RHO
+*
+*     The case I = N
+*
+      IF( I.EQ.N ) THEN
+*
+*        Initialize some basic variables
+*
+         II = N - 1
+         NITER = 1
+*
+*        Calculate initial guess
+*
+         MIDPT = RHO / TWO
+*
+*        If ||Z||_2 is not one, then TEMP should be set to
+*        RHO * ||Z||_2^2 / TWO
+*
+         DO 10 J = 1, N
+            DELTA( J ) = ( D( J )-D( I ) ) - MIDPT
+   10    CONTINUE
+*
+         PSI = ZERO
+         DO 20 J = 1, N - 2
+            PSI = PSI + Z( J )*Z( J ) / DELTA( J )
+   20    CONTINUE
+*
+         C = RHOINV + PSI
+         W = C + Z( II )*Z( II ) / DELTA( II ) +
+     $       Z( N )*Z( N ) / DELTA( N )
+*
+         IF( W.LE.ZERO ) THEN
+            TEMP = Z( N-1 )*Z( N-1 ) / ( D( N )-D( N-1 )+RHO ) +
+     $             Z( N )*Z( N ) / RHO
+            IF( C.LE.TEMP ) THEN
+               TAU = RHO
+            ELSE
+               DEL = D( N ) - D( N-1 )
+               A = -C*DEL + Z( N-1 )*Z( N-1 ) + Z( N )*Z( N )
+               B = Z( N )*Z( N )*DEL
+               IF( A.LT.ZERO ) THEN
+                  TAU = TWO*B / ( SQRT( A*A+FOUR*B*C )-A )
+               ELSE
+                  TAU = ( A+SQRT( A*A+FOUR*B*C ) ) / ( TWO*C )
+               END IF
+            END IF
+*
+*           It can be proved that
+*               D(N)+RHO/2 <= LAMBDA(N) < D(N)+TAU <= D(N)+RHO
+*
+            DLTLB = MIDPT
+            DLTUB = RHO
+         ELSE
+            DEL = D( N ) - D( N-1 )
+            A = -C*DEL + Z( N-1 )*Z( N-1 ) + Z( N )*Z( N )
+            B = Z( N )*Z( N )*DEL
+            IF( A.LT.ZERO ) THEN
+               TAU = TWO*B / ( SQRT( A*A+FOUR*B*C )-A )
+            ELSE
+               TAU = ( A+SQRT( A*A+FOUR*B*C ) ) / ( TWO*C )
+            END IF
+*
+*           It can be proved that
+*               D(N) < D(N)+TAU < LAMBDA(N) < D(N)+RHO/2
+*
+            DLTLB = ZERO
+            DLTUB = MIDPT
+         END IF
+*
+         DO 30 J = 1, N
+            DELTA( J ) = ( D( J )-D( I ) ) - TAU
+   30    CONTINUE
+*
+*        Evaluate PSI and the derivative DPSI
+*
+         DPSI = ZERO
+         PSI = ZERO
+         ERRETM = ZERO
+         DO 40 J = 1, II
+            TEMP = Z( J ) / DELTA( J )
+            PSI = PSI + Z( J )*TEMP
+            DPSI = DPSI + TEMP*TEMP
+            ERRETM = ERRETM + PSI
+   40    CONTINUE
+         ERRETM = ABS( ERRETM )
+*
+*        Evaluate PHI and the derivative DPHI
+*
+         TEMP = Z( N ) / DELTA( N )
+         PHI = Z( N )*TEMP
+         DPHI = TEMP*TEMP
+         ERRETM = EIGHT*( -PHI-PSI ) + ERRETM - PHI + RHOINV +
+     $            ABS( TAU )*( DPSI+DPHI )
+*
+         W = RHOINV + PHI + PSI
+*
+*        Test for convergence
+*
+         IF( ABS( W ).LE.EPS*ERRETM ) THEN
+            DLAM = D( I ) + TAU
+            GO TO 250
+         END IF
+*
+         IF( W.LE.ZERO ) THEN
+            DLTLB = MAX( DLTLB, TAU )
+         ELSE
+            DLTUB = MIN( DLTUB, TAU )
+         END IF
+*
+*        Calculate the new step
+*
+         NITER = NITER + 1
+         C = W - DELTA( N-1 )*DPSI - DELTA( N )*DPHI
+         A = ( DELTA( N-1 )+DELTA( N ) )*W -
+     $       DELTA( N-1 )*DELTA( N )*( DPSI+DPHI )
+         B = DELTA( N-1 )*DELTA( N )*W
+         IF( C.LT.ZERO )
+     $      C = ABS( C )
+         IF( C.EQ.ZERO ) THEN
+*          ETA = B/A
+*           ETA = RHO - TAU
+            ETA = DLTUB - TAU
+         ELSE IF( A.GE.ZERO ) THEN
+            ETA = ( A+SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C )
+         ELSE
+            ETA = TWO*B / ( A-SQRT( ABS( A*A-FOUR*B*C ) ) )
+         END IF
+*
+*        Note, eta should be positive if w is negative, and
+*        eta should be negative otherwise. However,
+*        if for some reason caused by roundoff, eta*w > 0,
+*        we simply use one Newton step instead. This way
+*        will guarantee eta*w < 0.
+*
+         IF( W*ETA.GT.ZERO )
+     $      ETA = -W / ( DPSI+DPHI )
+         TEMP = TAU + ETA
+         IF( TEMP.GT.DLTUB .OR. TEMP.LT.DLTLB ) THEN
+            IF( W.LT.ZERO ) THEN
+               ETA = ( DLTUB-TAU ) / TWO
+            ELSE
+               ETA = ( DLTLB-TAU ) / TWO
+            END IF
+         END IF
+         DO 50 J = 1, N
+            DELTA( J ) = DELTA( J ) - ETA
+   50    CONTINUE
+*
+         TAU = TAU + ETA
+*
+*        Evaluate PSI and the derivative DPSI
+*
+         DPSI = ZERO
+         PSI = ZERO
+         ERRETM = ZERO
+         DO 60 J = 1, II
+            TEMP = Z( J ) / DELTA( J )
+            PSI = PSI + Z( J )*TEMP
+            DPSI = DPSI + TEMP*TEMP
+            ERRETM = ERRETM + PSI
+   60    CONTINUE
+         ERRETM = ABS( ERRETM )
+*
+*        Evaluate PHI and the derivative DPHI
+*
+         TEMP = Z( N ) / DELTA( N )
+         PHI = Z( N )*TEMP
+         DPHI = TEMP*TEMP
+         ERRETM = EIGHT*( -PHI-PSI ) + ERRETM - PHI + RHOINV +
+     $            ABS( TAU )*( DPSI+DPHI )
+*
+         W = RHOINV + PHI + PSI
+*
+*        Main loop to update the values of the array   DELTA
+*
+         ITER = NITER + 1
+*
+         DO 90 NITER = ITER, MAXIT
+*
+*           Test for convergence
+*
+            IF( ABS( W ).LE.EPS*ERRETM ) THEN
+               DLAM = D( I ) + TAU
+               GO TO 250
+            END IF
+*
+            IF( W.LE.ZERO ) THEN
+               DLTLB = MAX( DLTLB, TAU )
+            ELSE
+               DLTUB = MIN( DLTUB, TAU )
+            END IF
+*
+*           Calculate the new step
+*
+            C = W - DELTA( N-1 )*DPSI - DELTA( N )*DPHI
+            A = ( DELTA( N-1 )+DELTA( N ) )*W -
+     $          DELTA( N-1 )*DELTA( N )*( DPSI+DPHI )
+            B = DELTA( N-1 )*DELTA( N )*W
+            IF( A.GE.ZERO ) THEN
+               ETA = ( A+SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C )
+            ELSE
+               ETA = TWO*B / ( A-SQRT( ABS( A*A-FOUR*B*C ) ) )
+            END IF
+*
+*           Note, eta should be positive if w is negative, and
+*           eta should be negative otherwise. However,
+*           if for some reason caused by roundoff, eta*w > 0,
+*           we simply use one Newton step instead. This way
+*           will guarantee eta*w < 0.
+*
+            IF( W*ETA.GT.ZERO )
+     $         ETA = -W / ( DPSI+DPHI )
+            TEMP = TAU + ETA
+            IF( TEMP.GT.DLTUB .OR. TEMP.LT.DLTLB ) THEN
+               IF( W.LT.ZERO ) THEN
+                  ETA = ( DLTUB-TAU ) / TWO
+               ELSE
+                  ETA = ( DLTLB-TAU ) / TWO
+               END IF
+            END IF
+            DO 70 J = 1, N
+               DELTA( J ) = DELTA( J ) - ETA
+   70       CONTINUE
+*
+            TAU = TAU + ETA
+*
+*           Evaluate PSI and the derivative DPSI
+*
+            DPSI = ZERO
+            PSI = ZERO
+            ERRETM = ZERO
+            DO 80 J = 1, II
+               TEMP = Z( J ) / DELTA( J )
+               PSI = PSI + Z( J )*TEMP
+               DPSI = DPSI + TEMP*TEMP
+               ERRETM = ERRETM + PSI
+   80       CONTINUE
+            ERRETM = ABS( ERRETM )
+*
+*           Evaluate PHI and the derivative DPHI
+*
+            TEMP = Z( N ) / DELTA( N )
+            PHI = Z( N )*TEMP
+            DPHI = TEMP*TEMP
+            ERRETM = EIGHT*( -PHI-PSI ) + ERRETM - PHI + RHOINV +
+     $               ABS( TAU )*( DPSI+DPHI )
+*
+            W = RHOINV + PHI + PSI
+   90    CONTINUE
+*
+*        Return with INFO = 1, NITER = MAXIT and not converged
+*
+         INFO = 1
+         DLAM = D( I ) + TAU
+         GO TO 250
+*
+*        End for the case I = N
+*
+      ELSE
+*
+*        The case for I < N
+*
+         NITER = 1
+         IP1 = I + 1
+*
+*        Calculate initial guess
+*
+         DEL = D( IP1 ) - D( I )
+         MIDPT = DEL / TWO
+         DO 100 J = 1, N
+            DELTA( J ) = ( D( J )-D( I ) ) - MIDPT
+  100    CONTINUE
+*
+         PSI = ZERO
+         DO 110 J = 1, I - 1
+            PSI = PSI + Z( J )*Z( J ) / DELTA( J )
+  110    CONTINUE
+*
+         PHI = ZERO
+         DO 120 J = N, I + 2, -1
+            PHI = PHI + Z( J )*Z( J ) / DELTA( J )
+  120    CONTINUE
+         C = RHOINV + PSI + PHI
+         W = C + Z( I )*Z( I ) / DELTA( I ) +
+     $       Z( IP1 )*Z( IP1 ) / DELTA( IP1 )
+*
+         IF( W.GT.ZERO ) THEN
+*
+*           d(i)< the ith eigenvalue < (d(i)+d(i+1))/2
+*
+*           We choose d(i) as origin.
+*
+            ORGATI = .TRUE.
+            A = C*DEL + Z( I )*Z( I ) + Z( IP1 )*Z( IP1 )
+            B = Z( I )*Z( I )*DEL
+            IF( A.GT.ZERO ) THEN
+               TAU = TWO*B / ( A+SQRT( ABS( A*A-FOUR*B*C ) ) )
+            ELSE
+               TAU = ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C )
+            END IF
+            DLTLB = ZERO
+            DLTUB = MIDPT
+         ELSE
+*
+*           (d(i)+d(i+1))/2 <= the ith eigenvalue < d(i+1)
+*
+*           We choose d(i+1) as origin.
+*
+            ORGATI = .FALSE.
+            A = C*DEL - Z( I )*Z( I ) - Z( IP1 )*Z( IP1 )
+            B = Z( IP1 )*Z( IP1 )*DEL
+            IF( A.LT.ZERO ) THEN
+               TAU = TWO*B / ( A-SQRT( ABS( A*A+FOUR*B*C ) ) )
+            ELSE
+               TAU = -( A+SQRT( ABS( A*A+FOUR*B*C ) ) ) / ( TWO*C )
+            END IF
+            DLTLB = -MIDPT
+            DLTUB = ZERO
+         END IF
+*
+         IF( ORGATI ) THEN
+            DO 130 J = 1, N
+               DELTA( J ) = ( D( J )-D( I ) ) - TAU
+  130       CONTINUE
+         ELSE
+            DO 140 J = 1, N
+               DELTA( J ) = ( D( J )-D( IP1 ) ) - TAU
+  140       CONTINUE
+         END IF
+         IF( ORGATI ) THEN
+            II = I
+         ELSE
+            II = I + 1
+         END IF
+         IIM1 = II - 1
+         IIP1 = II + 1
+*
+*        Evaluate PSI and the derivative DPSI
+*
+         DPSI = ZERO
+         PSI = ZERO
+         ERRETM = ZERO
+         DO 150 J = 1, IIM1
+            TEMP = Z( J ) / DELTA( J )
+            PSI = PSI + Z( J )*TEMP
+            DPSI = DPSI + TEMP*TEMP
+            ERRETM = ERRETM + PSI
+  150    CONTINUE
+         ERRETM = ABS( ERRETM )
+*
+*        Evaluate PHI and the derivative DPHI
+*
+         DPHI = ZERO
+         PHI = ZERO
+         DO 160 J = N, IIP1, -1
+            TEMP = Z( J ) / DELTA( J )
+            PHI = PHI + Z( J )*TEMP
+            DPHI = DPHI + TEMP*TEMP
+            ERRETM = ERRETM + PHI
+  160    CONTINUE
+*
+         W = RHOINV + PHI + PSI
+*
+*        W is the value of the secular function with
+*        its ii-th element removed.
+*
+         SWTCH3 = .FALSE.
+         IF( ORGATI ) THEN
+            IF( W.LT.ZERO )
+     $         SWTCH3 = .TRUE.
+         ELSE
+            IF( W.GT.ZERO )
+     $         SWTCH3 = .TRUE.
+         END IF
+         IF( II.EQ.1 .OR. II.EQ.N )
+     $      SWTCH3 = .FALSE.
+*
+         TEMP = Z( II ) / DELTA( II )
+         DW = DPSI + DPHI + TEMP*TEMP
+         TEMP = Z( II )*TEMP
+         W = W + TEMP
+         ERRETM = EIGHT*( PHI-PSI ) + ERRETM + TWO*RHOINV +
+     $            THREE*ABS( TEMP ) + ABS( TAU )*DW
+*
+*        Test for convergence
+*
+         IF( ABS( W ).LE.EPS*ERRETM ) THEN
+            IF( ORGATI ) THEN
+               DLAM = D( I ) + TAU
+            ELSE
+               DLAM = D( IP1 ) + TAU
+            END IF
+            GO TO 250
+         END IF
+*
+         IF( W.LE.ZERO ) THEN
+            DLTLB = MAX( DLTLB, TAU )
+         ELSE
+            DLTUB = MIN( DLTUB, TAU )
+         END IF
+*
+*        Calculate the new step
+*
+         NITER = NITER + 1
+         IF( .NOT.SWTCH3 ) THEN
+            IF( ORGATI ) THEN
+               C = W - DELTA( IP1 )*DW - ( D( I )-D( IP1 ) )*
+     $             ( Z( I ) / DELTA( I ) )**2
+            ELSE
+               C = W - DELTA( I )*DW - ( D( IP1 )-D( I ) )*
+     $             ( Z( IP1 ) / DELTA( IP1 ) )**2
+            END IF
+            A = ( DELTA( I )+DELTA( IP1 ) )*W -
+     $          DELTA( I )*DELTA( IP1 )*DW
+            B = DELTA( I )*DELTA( IP1 )*W
+            IF( C.EQ.ZERO ) THEN
+               IF( A.EQ.ZERO ) THEN
+                  IF( ORGATI ) THEN
+                     A = Z( I )*Z( I ) + DELTA( IP1 )*DELTA( IP1 )*
+     $                   ( DPSI+DPHI )
+                  ELSE
+                     A = Z( IP1 )*Z( IP1 ) + DELTA( I )*DELTA( I )*
+     $                   ( DPSI+DPHI )
+                  END IF
+               END IF
+               ETA = B / A
+            ELSE IF( A.LE.ZERO ) THEN
+               ETA = ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C )
+            ELSE
+               ETA = TWO*B / ( A+SQRT( ABS( A*A-FOUR*B*C ) ) )
+            END IF
+         ELSE
+*
+*           Interpolation using THREE most relevant poles
+*
+            TEMP = RHOINV + PSI + PHI
+            IF( ORGATI ) THEN
+               TEMP1 = Z( IIM1 ) / DELTA( IIM1 )
+               TEMP1 = TEMP1*TEMP1
+               C = TEMP - DELTA( IIP1 )*( DPSI+DPHI ) -
+     $             ( D( IIM1 )-D( IIP1 ) )*TEMP1
+               ZZ( 1 ) = Z( IIM1 )*Z( IIM1 )
+               ZZ( 3 ) = DELTA( IIP1 )*DELTA( IIP1 )*
+     $                   ( ( DPSI-TEMP1 )+DPHI )
+            ELSE
+               TEMP1 = Z( IIP1 ) / DELTA( IIP1 )
+               TEMP1 = TEMP1*TEMP1
+               C = TEMP - DELTA( IIM1 )*( DPSI+DPHI ) -
+     $             ( D( IIP1 )-D( IIM1 ) )*TEMP1
+               ZZ( 1 ) = DELTA( IIM1 )*DELTA( IIM1 )*
+     $                   ( DPSI+( DPHI-TEMP1 ) )
+               ZZ( 3 ) = Z( IIP1 )*Z( IIP1 )
+            END IF
+            ZZ( 2 ) = Z( II )*Z( II )
+            CALL DLAED6( NITER, ORGATI, C, DELTA( IIM1 ), ZZ, W, ETA,
+     $                   INFO )
+            IF( INFO.NE.0 )
+     $         GO TO 250
+         END IF
+*
+*        Note, eta should be positive if w is negative, and
+*        eta should be negative otherwise. However,
+*        if for some reason caused by roundoff, eta*w > 0,
+*        we simply use one Newton step instead. This way
+*        will guarantee eta*w < 0.
+*
+         IF( W*ETA.GE.ZERO )
+     $      ETA = -W / DW
+         TEMP = TAU + ETA
+         IF( TEMP.GT.DLTUB .OR. TEMP.LT.DLTLB ) THEN
+            IF( W.LT.ZERO ) THEN
+               ETA = ( DLTUB-TAU ) / TWO
+            ELSE
+               ETA = ( DLTLB-TAU ) / TWO
+            END IF
+         END IF
+*
+         PREW = W
+*
+  170    CONTINUE
+         DO 180 J = 1, N
+            DELTA( J ) = DELTA( J ) - ETA
+  180    CONTINUE
+*
+*        Evaluate PSI and the derivative DPSI
+*
+         DPSI = ZERO
+         PSI = ZERO
+         ERRETM = ZERO
+         DO 190 J = 1, IIM1
+            TEMP = Z( J ) / DELTA( J )
+            PSI = PSI + Z( J )*TEMP
+            DPSI = DPSI + TEMP*TEMP
+            ERRETM = ERRETM + PSI
+  190    CONTINUE
+         ERRETM = ABS( ERRETM )
+*
+*        Evaluate PHI and the derivative DPHI
+*
+         DPHI = ZERO
+         PHI = ZERO
+         DO 200 J = N, IIP1, -1
+            TEMP = Z( J ) / DELTA( J )
+            PHI = PHI + Z( J )*TEMP
+            DPHI = DPHI + TEMP*TEMP
+            ERRETM = ERRETM + PHI
+  200    CONTINUE
+*
+         TEMP = Z( II ) / DELTA( II )
+         DW = DPSI + DPHI + TEMP*TEMP
+         TEMP = Z( II )*TEMP
+         W = RHOINV + PHI + PSI + TEMP
+         ERRETM = EIGHT*( PHI-PSI ) + ERRETM + TWO*RHOINV +
+     $            THREE*ABS( TEMP ) + ABS( TAU+ETA )*DW
+*
+         SWTCH = .FALSE.
+         IF( ORGATI ) THEN
+            IF( -W.GT.ABS( PREW ) / TEN )
+     $         SWTCH = .TRUE.
+         ELSE
+            IF( W.GT.ABS( PREW ) / TEN )
+     $         SWTCH = .TRUE.
+         END IF
+*
+         TAU = TAU + ETA
+*
+*        Main loop to update the values of the array   DELTA
+*
+         ITER = NITER + 1
+*
+         DO 240 NITER = ITER, MAXIT
+*
+*           Test for convergence
+*
+            IF( ABS( W ).LE.EPS*ERRETM ) THEN
+               IF( ORGATI ) THEN
+                  DLAM = D( I ) + TAU
+               ELSE
+                  DLAM = D( IP1 ) + TAU
+               END IF
+               GO TO 250
+            END IF
+*
+            IF( W.LE.ZERO ) THEN
+               DLTLB = MAX( DLTLB, TAU )
+            ELSE
+               DLTUB = MIN( DLTUB, TAU )
+            END IF
+*
+*           Calculate the new step
+*
+            IF( .NOT.SWTCH3 ) THEN
+               IF( .NOT.SWTCH ) THEN
+                  IF( ORGATI ) THEN
+                     C = W - DELTA( IP1 )*DW -
+     $                   ( D( I )-D( IP1 ) )*( Z( I ) / DELTA( I ) )**2
+                  ELSE
+                     C = W - DELTA( I )*DW - ( D( IP1 )-D( I ) )*
+     $                   ( Z( IP1 ) / DELTA( IP1 ) )**2
+                  END IF
+               ELSE
+                  TEMP = Z( II ) / DELTA( II )
+                  IF( ORGATI ) THEN
+                     DPSI = DPSI + TEMP*TEMP
+                  ELSE
+                     DPHI = DPHI + TEMP*TEMP
+                  END IF
+                  C = W - DELTA( I )*DPSI - DELTA( IP1 )*DPHI
+               END IF
+               A = ( DELTA( I )+DELTA( IP1 ) )*W -
+     $             DELTA( I )*DELTA( IP1 )*DW
+               B = DELTA( I )*DELTA( IP1 )*W
+               IF( C.EQ.ZERO ) THEN
+                  IF( A.EQ.ZERO ) THEN
+                     IF( .NOT.SWTCH ) THEN
+                        IF( ORGATI ) THEN
+                           A = Z( I )*Z( I ) + DELTA( IP1 )*
+     $                         DELTA( IP1 )*( DPSI+DPHI )
+                        ELSE
+                           A = Z( IP1 )*Z( IP1 ) +
+     $                         DELTA( I )*DELTA( I )*( DPSI+DPHI )
+                        END IF
+                     ELSE
+                        A = DELTA( I )*DELTA( I )*DPSI +
+     $                      DELTA( IP1 )*DELTA( IP1 )*DPHI
+                     END IF
+                  END IF
+                  ETA = B / A
+               ELSE IF( A.LE.ZERO ) THEN
+                  ETA = ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C )
+               ELSE
+                  ETA = TWO*B / ( A+SQRT( ABS( A*A-FOUR*B*C ) ) )
+               END IF
+            ELSE
+*
+*              Interpolation using THREE most relevant poles
+*
+               TEMP = RHOINV + PSI + PHI
+               IF( SWTCH ) THEN
+                  C = TEMP - DELTA( IIM1 )*DPSI - DELTA( IIP1 )*DPHI
+                  ZZ( 1 ) = DELTA( IIM1 )*DELTA( IIM1 )*DPSI
+                  ZZ( 3 ) = DELTA( IIP1 )*DELTA( IIP1 )*DPHI
+               ELSE
+                  IF( ORGATI ) THEN
+                     TEMP1 = Z( IIM1 ) / DELTA( IIM1 )
+                     TEMP1 = TEMP1*TEMP1
+                     C = TEMP - DELTA( IIP1 )*( DPSI+DPHI ) -
+     $                   ( D( IIM1 )-D( IIP1 ) )*TEMP1
+                     ZZ( 1 ) = Z( IIM1 )*Z( IIM1 )
+                     ZZ( 3 ) = DELTA( IIP1 )*DELTA( IIP1 )*
+     $                         ( ( DPSI-TEMP1 )+DPHI )
+                  ELSE
+                     TEMP1 = Z( IIP1 ) / DELTA( IIP1 )
+                     TEMP1 = TEMP1*TEMP1
+                     C = TEMP - DELTA( IIM1 )*( DPSI+DPHI ) -
+     $                   ( D( IIP1 )-D( IIM1 ) )*TEMP1
+                     ZZ( 1 ) = DELTA( IIM1 )*DELTA( IIM1 )*
+     $                         ( DPSI+( DPHI-TEMP1 ) )
+                     ZZ( 3 ) = Z( IIP1 )*Z( IIP1 )
+                  END IF
+               END IF
+               CALL DLAED6( NITER, ORGATI, C, DELTA( IIM1 ), ZZ, W, ETA,
+     $                      INFO )
+               IF( INFO.NE.0 )
+     $            GO TO 250
+            END IF
+*
+*           Note, eta should be positive if w is negative, and
+*           eta should be negative otherwise. However,
+*           if for some reason caused by roundoff, eta*w > 0,
+*           we simply use one Newton step instead. This way
+*           will guarantee eta*w < 0.
+*
+            IF( W*ETA.GE.ZERO )
+     $         ETA = -W / DW
+            TEMP = TAU + ETA
+            IF( TEMP.GT.DLTUB .OR. TEMP.LT.DLTLB ) THEN
+               IF( W.LT.ZERO ) THEN
+                  ETA = ( DLTUB-TAU ) / TWO
+               ELSE
+                  ETA = ( DLTLB-TAU ) / TWO
+               END IF
+            END IF
+*
+            DO 210 J = 1, N
+               DELTA( J ) = DELTA( J ) - ETA
+  210       CONTINUE
+*
+            TAU = TAU + ETA
+            PREW = W
+*
+*           Evaluate PSI and the derivative DPSI
+*
+            DPSI = ZERO
+            PSI = ZERO
+            ERRETM = ZERO
+            DO 220 J = 1, IIM1
+               TEMP = Z( J ) / DELTA( J )
+               PSI = PSI + Z( J )*TEMP
+               DPSI = DPSI + TEMP*TEMP
+               ERRETM = ERRETM + PSI
+  220       CONTINUE
+            ERRETM = ABS( ERRETM )
+*
+*           Evaluate PHI and the derivative DPHI
+*
+            DPHI = ZERO
+            PHI = ZERO
+            DO 230 J = N, IIP1, -1
+               TEMP = Z( J ) / DELTA( J )
+               PHI = PHI + Z( J )*TEMP
+               DPHI = DPHI + TEMP*TEMP
+               ERRETM = ERRETM + PHI
+  230       CONTINUE
+*
+            TEMP = Z( II ) / DELTA( II )
+            DW = DPSI + DPHI + TEMP*TEMP
+            TEMP = Z( II )*TEMP
+            W = RHOINV + PHI + PSI + TEMP
+            ERRETM = EIGHT*( PHI-PSI ) + ERRETM + TWO*RHOINV +
+     $               THREE*ABS( TEMP ) + ABS( TAU )*DW
+            IF( W*PREW.GT.ZERO .AND. ABS( W ).GT.ABS( PREW ) / TEN )
+     $         SWTCH = .NOT.SWTCH
+*
+  240    CONTINUE
+*
+*        Return with INFO = 1, NITER = MAXIT and not converged
+*
+         INFO = 1
+         IF( ORGATI ) THEN
+            DLAM = D( I ) + TAU
+         ELSE
+            DLAM = D( IP1 ) + TAU
+         END IF
+*
+      END IF
+*
+  250 CONTINUE
+*
+      RETURN
+*
+*     End of DLAED4
+*
+      END
diff --git a/costa/native/external/lapack/dlaed5.f b/costa/native/external/lapack/dlaed5.f
new file mode 100644
index 000000000..8b8f9447b
--- /dev/null
+++ b/costa/native/external/lapack/dlaed5.f
@@ -0,0 +1,125 @@
+      SUBROUTINE DLAED5( I, D, Z, DELTA, RHO, DLAM )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
+*     Courant Institute, NAG Ltd., and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      INTEGER            I
+      DOUBLE PRECISION   DLAM, RHO
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   D( 2 ), DELTA( 2 ), Z( 2 )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  This subroutine computes the I-th eigenvalue of a symmetric rank-one
+*  modification of a 2-by-2 diagonal matrix
+*
+*             diag( D )  +  RHO *  Z * transpose(Z) .
+*
+*  The diagonal elements in the array D are assumed to satisfy
+*
+*             D(i) < D(j)  for  i < j .
+*
+*  We also assume RHO > 0 and that the Euclidean norm of the vector
+*  Z is one.
+*
+*  Arguments
+*  =========
+*
+*  I      (input) INTEGER
+*         The index of the eigenvalue to be computed.  I = 1 or I = 2.
+*
+*  D      (input) DOUBLE PRECISION array, dimension (2)
+*         The original eigenvalues.  We assume D(1) < D(2).
+*
+*  Z      (input) DOUBLE PRECISION array, dimension (2)
+*         The components of the updating vector.
+*
+*  DELTA  (output) DOUBLE PRECISION array, dimension (2)
+*         The vector DELTA contains the information necessary
+*         to construct the eigenvectors.
+*
+*  RHO    (input) DOUBLE PRECISION
+*         The scalar in the symmetric updating formula.
+*
+*  DLAM   (output) DOUBLE PRECISION
+*         The computed lambda_I, the I-th updated eigenvalue.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Ren-Cang Li, Computer Science Division, University of California
+*     at Berkeley, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE, TWO, FOUR
+      PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0, TWO = 2.0D0,
+     $                   FOUR = 4.0D0 )
+*     ..
+*     .. Local Scalars ..
+      DOUBLE PRECISION   B, C, DEL, TAU, TEMP, W
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+      DEL = D( 2 ) - D( 1 )
+      IF( I.EQ.1 ) THEN
+         W = ONE + TWO*RHO*( Z( 2 )*Z( 2 )-Z( 1 )*Z( 1 ) ) / DEL
+         IF( W.GT.ZERO ) THEN
+            B = DEL + RHO*( Z( 1 )*Z( 1 )+Z( 2 )*Z( 2 ) )
+            C = RHO*Z( 1 )*Z( 1 )*DEL
+*
+*           B > ZERO, always
+*
+            TAU = TWO*C / ( B+SQRT( ABS( B*B-FOUR*C ) ) )
+            DLAM = D( 1 ) + TAU
+            DELTA( 1 ) = -Z( 1 ) / TAU
+            DELTA( 2 ) = Z( 2 ) / ( DEL-TAU )
+         ELSE
+            B = -DEL + RHO*( Z( 1 )*Z( 1 )+Z( 2 )*Z( 2 ) )
+            C = RHO*Z( 2 )*Z( 2 )*DEL
+            IF( B.GT.ZERO ) THEN
+               TAU = -TWO*C / ( B+SQRT( B*B+FOUR*C ) )
+            ELSE
+               TAU = ( B-SQRT( B*B+FOUR*C ) ) / TWO
+            END IF
+            DLAM = D( 2 ) + TAU
+            DELTA( 1 ) = -Z( 1 ) / ( DEL+TAU )
+            DELTA( 2 ) = -Z( 2 ) / TAU
+         END IF
+         TEMP = SQRT( DELTA( 1 )*DELTA( 1 )+DELTA( 2 )*DELTA( 2 ) )
+         DELTA( 1 ) = DELTA( 1 ) / TEMP
+         DELTA( 2 ) = DELTA( 2 ) / TEMP
+      ELSE
+*
+*     Now I=2
+*
+         B = -DEL + RHO*( Z( 1 )*Z( 1 )+Z( 2 )*Z( 2 ) )
+         C = RHO*Z( 2 )*Z( 2 )*DEL
+         IF( B.GT.ZERO ) THEN
+            TAU = ( B+SQRT( B*B+FOUR*C ) ) / TWO
+         ELSE
+            TAU = TWO*C / ( -B+SQRT( B*B+FOUR*C ) )
+         END IF
+         DLAM = D( 2 ) + TAU
+         DELTA( 1 ) = -Z( 1 ) / ( DEL+TAU )
+         DELTA( 2 ) = -Z( 2 ) / TAU
+         TEMP = SQRT( DELTA( 1 )*DELTA( 1 )+DELTA( 2 )*DELTA( 2 ) )
+         DELTA( 1 ) = DELTA( 1 ) / TEMP
+         DELTA( 2 ) = DELTA( 2 ) / TEMP
+      END IF
+      RETURN
+*
+*     End OF DLAED5
+*
+      END
diff --git a/costa/native/external/lapack/dlaed6.f b/costa/native/external/lapack/dlaed6.f
new file mode 100644
index 000000000..8beb171a2
--- /dev/null
+++ b/costa/native/external/lapack/dlaed6.f
@@ -0,0 +1,299 @@
+      SUBROUTINE DLAED6( KNITER, ORGATI, RHO, D, Z, FINIT, TAU, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
+*     Courant Institute, NAG Ltd., and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      LOGICAL            ORGATI
+      INTEGER            INFO, KNITER
+      DOUBLE PRECISION   FINIT, RHO, TAU
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   D( 3 ), Z( 3 )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLAED6 computes the positive or negative root (closest to the origin)
+*  of
+*                   z(1)        z(2)        z(3)
+*  f(x) =   rho + --------- + ---------- + ---------
+*                  d(1)-x      d(2)-x      d(3)-x
+*
+*  It is assumed that
+*
+*        if ORGATI = .true. the root is between d(2) and d(3);
+*        otherwise it is between d(1) and d(2)
+*
+*  This routine will be called by DLAED4 when necessary. In most cases,
+*  the root sought is the smallest in magnitude, though it might not be
+*  in some extremely rare situations.
+*
+*  Arguments
+*  =========
+*
+*  KNITER       (input) INTEGER
+*               Refer to DLAED4 for its significance.
+*
+*  ORGATI       (input) LOGICAL
+*               If ORGATI is true, the needed root is between d(2) and
+*               d(3); otherwise it is between d(1) and d(2).  See
+*               DLAED4 for further details.
+*
+*  RHO          (input) DOUBLE PRECISION
+*               Refer to the equation f(x) above.
+*
+*  D            (input) DOUBLE PRECISION array, dimension (3)
+*               D satisfies d(1) < d(2) < d(3).
+*
+*  Z            (input) DOUBLE PRECISION array, dimension (3)
+*               Each of the elements in z must be positive.
+*
+*  FINIT        (input) DOUBLE PRECISION
+*               The value of f at 0. It is more accurate than the one
+*               evaluated inside this routine (if someone wants to do
+*               so).
+*
+*  TAU          (output) DOUBLE PRECISION
+*               The root of the equation f(x).
+*
+*  INFO         (output) INTEGER
+*               = 0: successful exit
+*               > 0: if INFO = 1, failure to converge
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Ren-Cang Li, Computer Science Division, University of California
+*     at Berkeley, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            MAXIT
+      PARAMETER          ( MAXIT = 20 )
+      DOUBLE PRECISION   ZERO, ONE, TWO, THREE, FOUR, EIGHT
+      PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0, TWO = 2.0D0,
+     $                   THREE = 3.0D0, FOUR = 4.0D0, EIGHT = 8.0D0 )
+*     ..
+*     .. External Functions ..
+      DOUBLE PRECISION   DLAMCH
+      EXTERNAL           DLAMCH
+*     ..
+*     .. Local Arrays ..
+      DOUBLE PRECISION   DSCALE( 3 ), ZSCALE( 3 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            FIRST, SCALE
+      INTEGER            I, ITER, NITER
+      DOUBLE PRECISION   A, B, BASE, C, DDF, DF, EPS, ERRETM, ETA, F,
+     $                   FC, SCLFAC, SCLINV, SMALL1, SMALL2, SMINV1,
+     $                   SMINV2, TEMP, TEMP1, TEMP2, TEMP3, TEMP4
+*     ..
+*     .. Save statement ..
+      SAVE               FIRST, SMALL1, SMINV1, SMALL2, SMINV2, EPS
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, INT, LOG, MAX, MIN, SQRT
+*     ..
+*     .. Data statements ..
+      DATA               FIRST / .TRUE. /
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+*
+      NITER = 1
+      TAU = ZERO
+      IF( KNITER.EQ.2 ) THEN
+         IF( ORGATI ) THEN
+            TEMP = ( D( 3 )-D( 2 ) ) / TWO
+            C = RHO + Z( 1 ) / ( ( D( 1 )-D( 2 ) )-TEMP )
+            A = C*( D( 2 )+D( 3 ) ) + Z( 2 ) + Z( 3 )
+            B = C*D( 2 )*D( 3 ) + Z( 2 )*D( 3 ) + Z( 3 )*D( 2 )
+         ELSE
+            TEMP = ( D( 1 )-D( 2 ) ) / TWO
+            C = RHO + Z( 3 ) / ( ( D( 3 )-D( 2 ) )-TEMP )
+            A = C*( D( 1 )+D( 2 ) ) + Z( 1 ) + Z( 2 )
+            B = C*D( 1 )*D( 2 ) + Z( 1 )*D( 2 ) + Z( 2 )*D( 1 )
+         END IF
+         TEMP = MAX( ABS( A ), ABS( B ), ABS( C ) )
+         A = A / TEMP
+         B = B / TEMP
+         C = C / TEMP
+         IF( C.EQ.ZERO ) THEN
+            TAU = B / A
+         ELSE IF( A.LE.ZERO ) THEN
+            TAU = ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C )
+         ELSE
+            TAU = TWO*B / ( A+SQRT( ABS( A*A-FOUR*B*C ) ) )
+         END IF
+         TEMP = RHO + Z( 1 ) / ( D( 1 )-TAU ) +
+     $          Z( 2 ) / ( D( 2 )-TAU ) + Z( 3 ) / ( D( 3 )-TAU )
+         IF( ABS( FINIT ).LE.ABS( TEMP ) )
+     $      TAU = ZERO
+      END IF
+*
+*     On first call to routine, get machine parameters for
+*     possible scaling to avoid overflow
+*
+      IF( FIRST ) THEN
+         EPS = DLAMCH( 'Epsilon' )
+         BASE = DLAMCH( 'Base' )
+         SMALL1 = BASE**( INT( LOG( DLAMCH( 'SafMin' ) ) / LOG( BASE ) /
+     $            THREE ) )
+         SMINV1 = ONE / SMALL1
+         SMALL2 = SMALL1*SMALL1
+         SMINV2 = SMINV1*SMINV1
+         FIRST = .FALSE.
+      END IF
+*
+*     Determine if scaling of inputs necessary to avoid overflow
+*     when computing 1/TEMP**3
+*
+      IF( ORGATI ) THEN
+         TEMP = MIN( ABS( D( 2 )-TAU ), ABS( D( 3 )-TAU ) )
+      ELSE
+         TEMP = MIN( ABS( D( 1 )-TAU ), ABS( D( 2 )-TAU ) )
+      END IF
+      SCALE = .FALSE.
+      IF( TEMP.LE.SMALL1 ) THEN
+         SCALE = .TRUE.
+         IF( TEMP.LE.SMALL2 ) THEN
+*
+*        Scale up by power of radix nearest 1/SAFMIN**(2/3)
+*
+            SCLFAC = SMINV2
+            SCLINV = SMALL2
+         ELSE
+*
+*        Scale up by power of radix nearest 1/SAFMIN**(1/3)
+*
+            SCLFAC = SMINV1
+            SCLINV = SMALL1
+         END IF
+*
+*        Scaling up safe because D, Z, TAU scaled elsewhere to be O(1)
+*
+         DO 10 I = 1, 3
+            DSCALE( I ) = D( I )*SCLFAC
+            ZSCALE( I ) = Z( I )*SCLFAC
+   10    CONTINUE
+         TAU = TAU*SCLFAC
+      ELSE
+*
+*        Copy D and Z to DSCALE and ZSCALE
+*
+         DO 20 I = 1, 3
+            DSCALE( I ) = D( I )
+            ZSCALE( I ) = Z( I )
+   20    CONTINUE
+      END IF
+*
+      FC = ZERO
+      DF = ZERO
+      DDF = ZERO
+      DO 30 I = 1, 3
+         TEMP = ONE / ( DSCALE( I )-TAU )
+         TEMP1 = ZSCALE( I )*TEMP
+         TEMP2 = TEMP1*TEMP
+         TEMP3 = TEMP2*TEMP
+         FC = FC + TEMP1 / DSCALE( I )
+         DF = DF + TEMP2
+         DDF = DDF + TEMP3
+   30 CONTINUE
+      F = FINIT + TAU*FC
+*
+      IF( ABS( F ).LE.ZERO )
+     $   GO TO 60
+*
+*        Iteration begins
+*
+*     It is not hard to see that
+*
+*           1) Iterations will go up monotonically
+*              if FINIT < 0;
+*
+*           2) Iterations will go down monotonically
+*              if FINIT > 0.
+*
+      ITER = NITER + 1
+*
+      DO 50 NITER = ITER, MAXIT
+*
+         IF( ORGATI ) THEN
+            TEMP1 = DSCALE( 2 ) - TAU
+            TEMP2 = DSCALE( 3 ) - TAU
+         ELSE
+            TEMP1 = DSCALE( 1 ) - TAU
+            TEMP2 = DSCALE( 2 ) - TAU
+         END IF
+         A = ( TEMP1+TEMP2 )*F - TEMP1*TEMP2*DF
+         B = TEMP1*TEMP2*F
+         C = F - ( TEMP1+TEMP2 )*DF + TEMP1*TEMP2*DDF
+         TEMP = MAX( ABS( A ), ABS( B ), ABS( C ) )
+         A = A / TEMP
+         B = B / TEMP
+         C = C / TEMP
+         IF( C.EQ.ZERO ) THEN
+            ETA = B / A
+         ELSE IF( A.LE.ZERO ) THEN
+            ETA = ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C )
+         ELSE
+            ETA = TWO*B / ( A+SQRT( ABS( A*A-FOUR*B*C ) ) )
+         END IF
+         IF( F*ETA.GE.ZERO ) THEN
+            ETA = -F / DF
+         END IF
+*
+         TEMP = ETA + TAU
+         IF( ORGATI ) THEN
+            IF( ETA.GT.ZERO .AND. TEMP.GE.DSCALE( 3 ) )
+     $         ETA = ( DSCALE( 3 )-TAU ) / TWO
+            IF( ETA.LT.ZERO .AND. TEMP.LE.DSCALE( 2 ) )
+     $         ETA = ( DSCALE( 2 )-TAU ) / TWO
+         ELSE
+            IF( ETA.GT.ZERO .AND. TEMP.GE.DSCALE( 2 ) )
+     $         ETA = ( DSCALE( 2 )-TAU ) / TWO
+            IF( ETA.LT.ZERO .AND. TEMP.LE.DSCALE( 1 ) )
+     $         ETA = ( DSCALE( 1 )-TAU ) / TWO
+         END IF
+         TAU = TAU + ETA
+*
+         FC = ZERO
+         ERRETM = ZERO
+         DF = ZERO
+         DDF = ZERO
+         DO 40 I = 1, 3
+            TEMP = ONE / ( DSCALE( I )-TAU )
+            TEMP1 = ZSCALE( I )*TEMP
+            TEMP2 = TEMP1*TEMP
+            TEMP3 = TEMP2*TEMP
+            TEMP4 = TEMP1 / DSCALE( I )
+            FC = FC + TEMP4
+            ERRETM = ERRETM + ABS( TEMP4 )
+            DF = DF + TEMP2
+            DDF = DDF + TEMP3
+   40    CONTINUE
+         F = FINIT + TAU*FC
+         ERRETM = EIGHT*( ABS( FINIT )+ABS( TAU )*ERRETM ) +
+     $            ABS( TAU )*DF
+         IF( ABS( F ).LE.EPS*ERRETM )
+     $      GO TO 60
+   50 CONTINUE
+      INFO = 1
+   60 CONTINUE
+*
+*     Undo scaling
+*
+      IF( SCALE )
+     $   TAU = TAU*SCLINV
+      RETURN
+*
+*     End of DLAED6
+*
+      END
diff --git a/costa/native/external/lapack/dlaed7.f b/costa/native/external/lapack/dlaed7.f
new file mode 100644
index 000000000..cfa84b609
--- /dev/null
+++ b/costa/native/external/lapack/dlaed7.f
@@ -0,0 +1,288 @@
+      SUBROUTINE DLAED7( ICOMPQ, N, QSIZ, TLVLS, CURLVL, CURPBM, D, Q,
+     $                   LDQ, INDXQ, RHO, CUTPNT, QSTORE, QPTR, PRMPTR,
+     $                   PERM, GIVPTR, GIVCOL, GIVNUM, WORK, IWORK,
+     $                   INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      INTEGER            CURLVL, CURPBM, CUTPNT, ICOMPQ, INFO, LDQ, N,
+     $                   QSIZ, TLVLS
+      DOUBLE PRECISION   RHO
+*     ..
+*     .. Array Arguments ..
+      INTEGER            GIVCOL( 2, * ), GIVPTR( * ), INDXQ( * ),
+     $                   IWORK( * ), PERM( * ), PRMPTR( * ), QPTR( * )
+      DOUBLE PRECISION   D( * ), GIVNUM( 2, * ), Q( LDQ, * ),
+     $                   QSTORE( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLAED7 computes the updated eigensystem of a diagonal
+*  matrix after modification by a rank-one symmetric matrix. This
+*  routine is used only for the eigenproblem which requires all
+*  eigenvalues and optionally eigenvectors of a dense symmetric matrix
+*  that has been reduced to tridiagonal form.  DLAED1 handles
+*  the case in which all eigenvalues and eigenvectors of a symmetric
+*  tridiagonal matrix are desired.
+*
+*    T = Q(in) ( D(in) + RHO * Z*Z' ) Q'(in) = Q(out) * D(out) * Q'(out)
+*
+*     where Z = Q'u, u is a vector of length N with ones in the
+*     CUTPNT and CUTPNT + 1 th elements and zeros elsewhere.
+*
+*     The eigenvectors of the original matrix are stored in Q, and the
+*     eigenvalues are in D.  The algorithm consists of three stages:
+*
+*        The first stage consists of deflating the size of the problem
+*        when there are multiple eigenvalues or if there is a zero in
+*        the Z vector.  For each such occurence the dimension of the
+*        secular equation problem is reduced by one.  This stage is
+*        performed by the routine DLAED8.
+*
+*        The second stage consists of calculating the updated
+*        eigenvalues. This is done by finding the roots of the secular
+*        equation via the routine DLAED4 (as called by DLAED9).
+*        This routine also calculates the eigenvectors of the current
+*        problem.
+*
+*        The final stage consists of computing the updated eigenvectors
+*        directly using the updated eigenvalues.  The eigenvectors for
+*        the current problem are multiplied with the eigenvectors from
+*        the overall problem.
+*
+*  Arguments
+*  =========
+*
+*  ICOMPQ  (input) INTEGER
+*          = 0:  Compute eigenvalues only.
+*          = 1:  Compute eigenvectors of original dense symmetric matrix
+*                also.  On entry, Q contains the orthogonal matrix used
+*                to reduce the original matrix to tridiagonal form.
+*
+*  N      (input) INTEGER
+*         The dimension of the symmetric tridiagonal matrix.  N >= 0.
+*
+*  QSIZ   (input) INTEGER
+*         The dimension of the orthogonal matrix used to reduce
+*         the full matrix to tridiagonal form.  QSIZ >= N if ICOMPQ = 1.
+*
+*  TLVLS  (input) INTEGER
+*         The total number of merging levels in the overall divide and
+*         conquer tree.
+*
+*  CURLVL (input) INTEGER
+*         The current level in the overall merge routine,
+*         0 <= CURLVL <= TLVLS.
+*
+*  CURPBM (input) INTEGER
+*         The current problem in the current level in the overall
+*         merge routine (counting from upper left to lower right).
+*
+*  D      (input/output) DOUBLE PRECISION array, dimension (N)
+*         On entry, the eigenvalues of the rank-1-perturbed matrix.
+*         On exit, the eigenvalues of the repaired matrix.
+*
+*  Q      (input/output) DOUBLE PRECISION array, dimension (LDQ, N)
+*         On entry, the eigenvectors of the rank-1-perturbed matrix.
+*         On exit, the eigenvectors of the repaired tridiagonal matrix.
+*
+*  LDQ    (input) INTEGER
+*         The leading dimension of the array Q.  LDQ >= max(1,N).
+*
+*  INDXQ  (output) INTEGER array, dimension (N)
+*         The permutation which will reintegrate the subproblem just
+*         solved back into sorted order, i.e., D( INDXQ( I = 1, N ) )
+*         will be in ascending order.
+*
+*  RHO    (input) DOUBLE PRECISION
+*         The subdiagonal element used to create the rank-1
+*         modification.
+*
+*  CUTPNT (input) INTEGER
+*         Contains the location of the last eigenvalue in the leading
+*         sub-matrix.  min(1,N) <= CUTPNT <= N.
+*
+*  QSTORE (input/output) DOUBLE PRECISION array, dimension (N**2+1)
+*         Stores eigenvectors of submatrices encountered during
+*         divide and conquer, packed together. QPTR points to
+*         beginning of the submatrices.
+*
+*  QPTR   (input/output) INTEGER array, dimension (N+2)
+*         List of indices pointing to beginning of submatrices stored
+*         in QSTORE. The submatrices are numbered starting at the
+*         bottom left of the divide and conquer tree, from left to
+*         right and bottom to top.
+*
+*  PRMPTR (input) INTEGER array, dimension (N lg N)
+*         Contains a list of pointers which indicate where in PERM a
+*         level's permutation is stored.  PRMPTR(i+1) - PRMPTR(i)
+*         indicates the size of the permutation and also the size of
+*         the full, non-deflated problem.
+*
+*  PERM   (input) INTEGER array, dimension (N lg N)
+*         Contains the permutations (from deflation and sorting) to be
+*         applied to each eigenblock.
+*
+*  GIVPTR (input) INTEGER array, dimension (N lg N)
+*         Contains a list of pointers which indicate where in GIVCOL a
+*         level's Givens rotations are stored.  GIVPTR(i+1) - GIVPTR(i)
+*         indicates the number of Givens rotations.
+*
+*  GIVCOL (input) INTEGER array, dimension (2, N lg N)
+*         Each pair of numbers indicates a pair of columns to take place
+*         in a Givens rotation.
+*
+*  GIVNUM (input) DOUBLE PRECISION array, dimension (2, N lg N)
+*         Each number indicates the S value to be used in the
+*         corresponding Givens rotation.
+*
+*  WORK   (workspace) DOUBLE PRECISION array, dimension (3*N+QSIZ*N)
+*
+*  IWORK  (workspace) INTEGER array, dimension (4*N)
+*
+*  INFO   (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          > 0:  if INFO = 1, an eigenvalue did not converge
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Jeff Rutter, Computer Science Division, University of California
+*     at Berkeley, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D0, ZERO = 0.0D0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            COLTYP, CURR, I, IDLMDA, INDX, INDXC, INDXP,
+     $                   IQ2, IS, IW, IZ, K, LDQ2, N1, N2, PTR
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DGEMM, DLAED8, DLAED9, DLAEDA, DLAMRG, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+*
+      IF( ICOMPQ.LT.0 .OR. ICOMPQ.GT.1 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( ICOMPQ.EQ.1 .AND. QSIZ.LT.N ) THEN
+         INFO = -4
+      ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN
+         INFO = -9
+      ELSE IF( MIN( 1, N ).GT.CUTPNT .OR. N.LT.CUTPNT ) THEN
+         INFO = -12
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DLAED7', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     The following values are for bookkeeping purposes only.  They are
+*     integer pointers which indicate the portion of the workspace
+*     used by a particular array in DLAED8 and DLAED9.
+*
+      IF( ICOMPQ.EQ.1 ) THEN
+         LDQ2 = QSIZ
+      ELSE
+         LDQ2 = N
+      END IF
+*
+      IZ = 1
+      IDLMDA = IZ + N
+      IW = IDLMDA + N
+      IQ2 = IW + N
+      IS = IQ2 + N*LDQ2
+*
+      INDX = 1
+      INDXC = INDX + N
+      COLTYP = INDXC + N
+      INDXP = COLTYP + N
+*
+*     Form the z-vector which consists of the last row of Q_1 and the
+*     first row of Q_2.
+*
+      PTR = 1 + 2**TLVLS
+      DO 10 I = 1, CURLVL - 1
+         PTR = PTR + 2**( TLVLS-I )
+   10 CONTINUE
+      CURR = PTR + CURPBM
+      CALL DLAEDA( N, TLVLS, CURLVL, CURPBM, PRMPTR, PERM, GIVPTR,
+     $             GIVCOL, GIVNUM, QSTORE, QPTR, WORK( IZ ),
+     $             WORK( IZ+N ), INFO )
+*
+*     When solving the final problem, we no longer need the stored data,
+*     so we will overwrite the data from this level onto the previously
+*     used storage space.
+*
+      IF( CURLVL.EQ.TLVLS ) THEN
+         QPTR( CURR ) = 1
+         PRMPTR( CURR ) = 1
+         GIVPTR( CURR ) = 1
+      END IF
+*
+*     Sort and Deflate eigenvalues.
+*
+      CALL DLAED8( ICOMPQ, K, N, QSIZ, D, Q, LDQ, INDXQ, RHO, CUTPNT,
+     $             WORK( IZ ), WORK( IDLMDA ), WORK( IQ2 ), LDQ2,
+     $             WORK( IW ), PERM( PRMPTR( CURR ) ), GIVPTR( CURR+1 ),
+     $             GIVCOL( 1, GIVPTR( CURR ) ),
+     $             GIVNUM( 1, GIVPTR( CURR ) ), IWORK( INDXP ),
+     $             IWORK( INDX ), INFO )
+      PRMPTR( CURR+1 ) = PRMPTR( CURR ) + N
+      GIVPTR( CURR+1 ) = GIVPTR( CURR+1 ) + GIVPTR( CURR )
+*
+*     Solve Secular Equation.
+*
+      IF( K.NE.0 ) THEN
+         CALL DLAED9( K, 1, K, N, D, WORK( IS ), K, RHO, WORK( IDLMDA ),
+     $                WORK( IW ), QSTORE( QPTR( CURR ) ), K, INFO )
+         IF( INFO.NE.0 )
+     $      GO TO 30
+         IF( ICOMPQ.EQ.1 ) THEN
+            CALL DGEMM( 'N', 'N', QSIZ, K, K, ONE, WORK( IQ2 ), LDQ2,
+     $                  QSTORE( QPTR( CURR ) ), K, ZERO, Q, LDQ )
+         END IF
+         QPTR( CURR+1 ) = QPTR( CURR ) + K**2
+*
+*     Prepare the INDXQ sorting permutation.
+*
+         N1 = K
+         N2 = N - K
+         CALL DLAMRG( N1, N2, D, 1, -1, INDXQ )
+      ELSE
+         QPTR( CURR+1 ) = QPTR( CURR )
+         DO 20 I = 1, N
+            INDXQ( I ) = I
+   20    CONTINUE
+      END IF
+*
+   30 CONTINUE
+      RETURN
+*
+*     End of DLAED7
+*
+      END
diff --git a/costa/native/external/lapack/dlaed8.f b/costa/native/external/lapack/dlaed8.f
new file mode 100644
index 000000000..58404b8f5
--- /dev/null
+++ b/costa/native/external/lapack/dlaed8.f
@@ -0,0 +1,400 @@
+      SUBROUTINE DLAED8( ICOMPQ, K, N, QSIZ, D, Q, LDQ, INDXQ, RHO,
+     $                   CUTPNT, Z, DLAMDA, Q2, LDQ2, W, PERM, GIVPTR,
+     $                   GIVCOL, GIVNUM, INDXP, INDX, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
+*     Courant Institute, NAG Ltd., and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      INTEGER            CUTPNT, GIVPTR, ICOMPQ, INFO, K, LDQ, LDQ2, N,
+     $                   QSIZ
+      DOUBLE PRECISION   RHO
+*     ..
+*     .. Array Arguments ..
+      INTEGER            GIVCOL( 2, * ), INDX( * ), INDXP( * ),
+     $                   INDXQ( * ), PERM( * )
+      DOUBLE PRECISION   D( * ), DLAMDA( * ), GIVNUM( 2, * ),
+     $                   Q( LDQ, * ), Q2( LDQ2, * ), W( * ), Z( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLAED8 merges the two sets of eigenvalues together into a single
+*  sorted set.  Then it tries to deflate the size of the problem.
+*  There are two ways in which deflation can occur:  when two or more
+*  eigenvalues are close together or if there is a tiny element in the
+*  Z vector.  For each such occurrence the order of the related secular
+*  equation problem is reduced by one.
+*
+*  Arguments
+*  =========
+*
+*  ICOMPQ  (input) INTEGER
+*          = 0:  Compute eigenvalues only.
+*          = 1:  Compute eigenvectors of original dense symmetric matrix
+*                also.  On entry, Q contains the orthogonal matrix used
+*                to reduce the original matrix to tridiagonal form.
+*
+*  K      (output) INTEGER
+*         The number of non-deflated eigenvalues, and the order of the
+*         related secular equation.
+*
+*  N      (input) INTEGER
+*         The dimension of the symmetric tridiagonal matrix.  N >= 0.
+*
+*  QSIZ   (input) INTEGER
+*         The dimension of the orthogonal matrix used to reduce
+*         the full matrix to tridiagonal form.  QSIZ >= N if ICOMPQ = 1.
+*
+*  D      (input/output) DOUBLE PRECISION array, dimension (N)
+*         On entry, the eigenvalues of the two submatrices to be
+*         combined.  On exit, the trailing (N-K) updated eigenvalues
+*         (those which were deflated) sorted into increasing order.
+*
+*  Q      (input/output) DOUBLE PRECISION array, dimension (LDQ,N)
+*         If ICOMPQ = 0, Q is not referenced.  Otherwise,
+*         on entry, Q contains the eigenvectors of the partially solved
+*         system which has been previously updated in matrix
+*         multiplies with other partially solved eigensystems.
+*         On exit, Q contains the trailing (N-K) updated eigenvectors
+*         (those which were deflated) in its last N-K columns.
+*
+*  LDQ    (input) INTEGER
+*         The leading dimension of the array Q.  LDQ >= max(1,N).
+*
+*  INDXQ  (input) INTEGER array, dimension (N)
+*         The permutation which separately sorts the two sub-problems
+*         in D into ascending order.  Note that elements in the second
+*         half of this permutation must first have CUTPNT added to
+*         their values in order to be accurate.
+*
+*  RHO    (input/output) DOUBLE PRECISION
+*         On entry, the off-diagonal element associated with the rank-1
+*         cut which originally split the two submatrices which are now
+*         being recombined.
+*         On exit, RHO has been modified to the value required by
+*         DLAED3.
+*
+*  CUTPNT (input) INTEGER
+*         The location of the last eigenvalue in the leading
+*         sub-matrix.  min(1,N) <= CUTPNT <= N.
+*
+*  Z      (input) DOUBLE PRECISION array, dimension (N)
+*         On entry, Z contains the updating vector (the last row of
+*         the first sub-eigenvector matrix and the first row of the
+*         second sub-eigenvector matrix).
+*         On exit, the contents of Z are destroyed by the updating
+*         process.
+*
+*  DLAMDA (output) DOUBLE PRECISION array, dimension (N)
+*         A copy of the first K eigenvalues which will be used by
+*         DLAED3 to form the secular equation.
+*
+*  Q2     (output) DOUBLE PRECISION array, dimension (LDQ2,N)
+*         If ICOMPQ = 0, Q2 is not referenced.  Otherwise,
+*         a copy of the first K eigenvectors which will be used by
+*         DLAED7 in a matrix multiply (DGEMM) to update the new
+*         eigenvectors.
+*
+*  LDQ2   (input) INTEGER
+*         The leading dimension of the array Q2.  LDQ2 >= max(1,N).
+*
+*  W      (output) DOUBLE PRECISION array, dimension (N)
+*         The first k values of the final deflation-altered z-vector and
+*         will be passed to DLAED3.
+*
+*  PERM   (output) INTEGER array, dimension (N)
+*         The permutations (from deflation and sorting) to be applied
+*         to each eigenblock.
+*
+*  GIVPTR (output) INTEGER
+*         The number of Givens rotations which took place in this
+*         subproblem.
+*
+*  GIVCOL (output) INTEGER array, dimension (2, N)
+*         Each pair of numbers indicates a pair of columns to take place
+*         in a Givens rotation.
+*
+*  GIVNUM (output) DOUBLE PRECISION array, dimension (2, N)
+*         Each number indicates the S value to be used in the
+*         corresponding Givens rotation.
+*
+*  INDXP  (workspace) INTEGER array, dimension (N)
+*         The permutation used to place deflated values of D at the end
+*         of the array.  INDXP(1:K) points to the nondeflated D-values
+*         and INDXP(K+1:N) points to the deflated eigenvalues.
+*
+*  INDX   (workspace) INTEGER array, dimension (N)
+*         The permutation used to sort the contents of D into ascending
+*         order.
+*
+*  INFO   (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Jeff Rutter, Computer Science Division, University of California
+*     at Berkeley, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   MONE, ZERO, ONE, TWO, EIGHT
+      PARAMETER          ( MONE = -1.0D0, ZERO = 0.0D0, ONE = 1.0D0,
+     $                   TWO = 2.0D0, EIGHT = 8.0D0 )
+*     ..
+*     .. Local Scalars ..
+*
+      INTEGER            I, IMAX, J, JLAM, JMAX, JP, K2, N1, N1P1, N2
+      DOUBLE PRECISION   C, EPS, S, T, TAU, TOL
+*     ..
+*     .. External Functions ..
+      INTEGER            IDAMAX
+      DOUBLE PRECISION   DLAMCH, DLAPY2
+      EXTERNAL           IDAMAX, DLAMCH, DLAPY2
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DCOPY, DLACPY, DLAMRG, DROT, DSCAL, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+*
+      IF( ICOMPQ.LT.0 .OR. ICOMPQ.GT.1 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( ICOMPQ.EQ.1 .AND. QSIZ.LT.N ) THEN
+         INFO = -4
+      ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      ELSE IF( CUTPNT.LT.MIN( 1, N ) .OR. CUTPNT.GT.N ) THEN
+         INFO = -10
+      ELSE IF( LDQ2.LT.MAX( 1, N ) ) THEN
+         INFO = -14
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DLAED8', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      N1 = CUTPNT
+      N2 = N - N1
+      N1P1 = N1 + 1
+*
+      IF( RHO.LT.ZERO ) THEN
+         CALL DSCAL( N2, MONE, Z( N1P1 ), 1 )
+      END IF
+*
+*     Normalize z so that norm(z) = 1
+*
+      T = ONE / SQRT( TWO )
+      DO 10 J = 1, N
+         INDX( J ) = J
+   10 CONTINUE
+      CALL DSCAL( N, T, Z, 1 )
+      RHO = ABS( TWO*RHO )
+*
+*     Sort the eigenvalues into increasing order
+*
+      DO 20 I = CUTPNT + 1, N
+         INDXQ( I ) = INDXQ( I ) + CUTPNT
+   20 CONTINUE
+      DO 30 I = 1, N
+         DLAMDA( I ) = D( INDXQ( I ) )
+         W( I ) = Z( INDXQ( I ) )
+   30 CONTINUE
+      I = 1
+      J = CUTPNT + 1
+      CALL DLAMRG( N1, N2, DLAMDA, 1, 1, INDX )
+      DO 40 I = 1, N
+         D( I ) = DLAMDA( INDX( I ) )
+         Z( I ) = W( INDX( I ) )
+   40 CONTINUE
+*
+*     Calculate the allowable deflation tolerence
+*
+      IMAX = IDAMAX( N, Z, 1 )
+      JMAX = IDAMAX( N, D, 1 )
+      EPS = DLAMCH( 'Epsilon' )
+      TOL = EIGHT*EPS*ABS( D( JMAX ) )
+*
+*     If the rank-1 modifier is small enough, no more needs to be done
+*     except to reorganize Q so that its columns correspond with the
+*     elements in D.
+*
+      IF( RHO*ABS( Z( IMAX ) ).LE.TOL ) THEN
+         K = 0
+         IF( ICOMPQ.EQ.0 ) THEN
+            DO 50 J = 1, N
+               PERM( J ) = INDXQ( INDX( J ) )
+   50       CONTINUE
+         ELSE
+            DO 60 J = 1, N
+               PERM( J ) = INDXQ( INDX( J ) )
+               CALL DCOPY( QSIZ, Q( 1, PERM( J ) ), 1, Q2( 1, J ), 1 )
+   60       CONTINUE
+            CALL DLACPY( 'A', QSIZ, N, Q2( 1, 1 ), LDQ2, Q( 1, 1 ),
+     $                   LDQ )
+         END IF
+         RETURN
+      END IF
+*
+*     If there are multiple eigenvalues then the problem deflates.  Here
+*     the number of equal eigenvalues are found.  As each equal
+*     eigenvalue is found, an elementary reflector is computed to rotate
+*     the corresponding eigensubspace so that the corresponding
+*     components of Z are zero in this new basis.
+*
+      K = 0
+      GIVPTR = 0
+      K2 = N + 1
+      DO 70 J = 1, N
+         IF( RHO*ABS( Z( J ) ).LE.TOL ) THEN
+*
+*           Deflate due to small z component.
+*
+            K2 = K2 - 1
+            INDXP( K2 ) = J
+            IF( J.EQ.N )
+     $         GO TO 110
+         ELSE
+            JLAM = J
+            GO TO 80
+         END IF
+   70 CONTINUE
+   80 CONTINUE
+      J = J + 1
+      IF( J.GT.N )
+     $   GO TO 100
+      IF( RHO*ABS( Z( J ) ).LE.TOL ) THEN
+*
+*        Deflate due to small z component.
+*
+         K2 = K2 - 1
+         INDXP( K2 ) = J
+      ELSE
+*
+*        Check if eigenvalues are close enough to allow deflation.
+*
+         S = Z( JLAM )
+         C = Z( J )
+*
+*        Find sqrt(a**2+b**2) without overflow or
+*        destructive underflow.
+*
+         TAU = DLAPY2( C, S )
+         T = D( J ) - D( JLAM )
+         C = C / TAU
+         S = -S / TAU
+         IF( ABS( T*C*S ).LE.TOL ) THEN
+*
+*           Deflation is possible.
+*
+            Z( J ) = TAU
+            Z( JLAM ) = ZERO
+*
+*           Record the appropriate Givens rotation
+*
+            GIVPTR = GIVPTR + 1
+            GIVCOL( 1, GIVPTR ) = INDXQ( INDX( JLAM ) )
+            GIVCOL( 2, GIVPTR ) = INDXQ( INDX( J ) )
+            GIVNUM( 1, GIVPTR ) = C
+            GIVNUM( 2, GIVPTR ) = S
+            IF( ICOMPQ.EQ.1 ) THEN
+               CALL DROT( QSIZ, Q( 1, INDXQ( INDX( JLAM ) ) ), 1,
+     $                    Q( 1, INDXQ( INDX( J ) ) ), 1, C, S )
+            END IF
+            T = D( JLAM )*C*C + D( J )*S*S
+            D( J ) = D( JLAM )*S*S + D( J )*C*C
+            D( JLAM ) = T
+            K2 = K2 - 1
+            I = 1
+   90       CONTINUE
+            IF( K2+I.LE.N ) THEN
+               IF( D( JLAM ).LT.D( INDXP( K2+I ) ) ) THEN
+                  INDXP( K2+I-1 ) = INDXP( K2+I )
+                  INDXP( K2+I ) = JLAM
+                  I = I + 1
+                  GO TO 90
+               ELSE
+                  INDXP( K2+I-1 ) = JLAM
+               END IF
+            ELSE
+               INDXP( K2+I-1 ) = JLAM
+            END IF
+            JLAM = J
+         ELSE
+            K = K + 1
+            W( K ) = Z( JLAM )
+            DLAMDA( K ) = D( JLAM )
+            INDXP( K ) = JLAM
+            JLAM = J
+         END IF
+      END IF
+      GO TO 80
+  100 CONTINUE
+*
+*     Record the last eigenvalue.
+*
+      K = K + 1
+      W( K ) = Z( JLAM )
+      DLAMDA( K ) = D( JLAM )
+      INDXP( K ) = JLAM
+*
+  110 CONTINUE
+*
+*     Sort the eigenvalues and corresponding eigenvectors into DLAMDA
+*     and Q2 respectively.  The eigenvalues/vectors which were not
+*     deflated go into the first K slots of DLAMDA and Q2 respectively,
+*     while those which were deflated go into the last N - K slots.
+*
+      IF( ICOMPQ.EQ.0 ) THEN
+         DO 120 J = 1, N
+            JP = INDXP( J )
+            DLAMDA( J ) = D( JP )
+            PERM( J ) = INDXQ( INDX( JP ) )
+  120    CONTINUE
+      ELSE
+         DO 130 J = 1, N
+            JP = INDXP( J )
+            DLAMDA( J ) = D( JP )
+            PERM( J ) = INDXQ( INDX( JP ) )
+            CALL DCOPY( QSIZ, Q( 1, PERM( J ) ), 1, Q2( 1, J ), 1 )
+  130    CONTINUE
+      END IF
+*
+*     The deflated eigenvalues and their corresponding vectors go back
+*     into the last N - K slots of D and Q respectively.
+*
+      IF( K.LT.N ) THEN
+         IF( ICOMPQ.EQ.0 ) THEN
+            CALL DCOPY( N-K, DLAMDA( K+1 ), 1, D( K+1 ), 1 )
+         ELSE
+            CALL DCOPY( N-K, DLAMDA( K+1 ), 1, D( K+1 ), 1 )
+            CALL DLACPY( 'A', QSIZ, N-K, Q2( 1, K+1 ), LDQ2,
+     $                   Q( 1, K+1 ), LDQ )
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of DLAED8
+*
+      END
diff --git a/costa/native/external/lapack/dlaed9.f b/costa/native/external/lapack/dlaed9.f
new file mode 100644
index 000000000..e18630f33
--- /dev/null
+++ b/costa/native/external/lapack/dlaed9.f
@@ -0,0 +1,206 @@
+      SUBROUTINE DLAED9( K, KSTART, KSTOP, N, D, Q, LDQ, RHO, DLAMDA, W,
+     $                   S, LDS, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
+*     Courant Institute, NAG Ltd., and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, K, KSTART, KSTOP, LDQ, LDS, N
+      DOUBLE PRECISION   RHO
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   D( * ), DLAMDA( * ), Q( LDQ, * ), S( LDS, * ),
+     $                   W( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLAED9 finds the roots of the secular equation, as defined by the
+*  values in D, Z, and RHO, between KSTART and KSTOP.  It makes the
+*  appropriate calls to DLAED4 and then stores the new matrix of
+*  eigenvectors for use in calculating the next level of Z vectors.
+*
+*  Arguments
+*  =========
+*
+*  K       (input) INTEGER
+*          The number of terms in the rational function to be solved by
+*          DLAED4.  K >= 0.
+*
+*  KSTART  (input) INTEGER
+*  KSTOP   (input) INTEGER
+*          The updated eigenvalues Lambda(I), KSTART <= I <= KSTOP
+*          are to be computed.  1 <= KSTART <= KSTOP <= K.
+*
+*  N       (input) INTEGER
+*          The number of rows and columns in the Q matrix.
+*          N >= K (delation may result in N > K).
+*
+*  D       (output) DOUBLE PRECISION array, dimension (N)
+*          D(I) contains the updated eigenvalues
+*          for KSTART <= I <= KSTOP.
+*
+*  Q       (workspace) DOUBLE PRECISION array, dimension (LDQ,N)
+*
+*  LDQ     (input) INTEGER
+*          The leading dimension of the array Q.  LDQ >= max( 1, N ).
+*
+*  RHO     (input) DOUBLE PRECISION
+*          The value of the parameter in the rank one update equation.
+*          RHO >= 0 required.
+*
+*  DLAMDA  (input) DOUBLE PRECISION array, dimension (K)
+*          The first K elements of this array contain the old roots
+*          of the deflated updating problem.  These are the poles
+*          of the secular equation.
+*
+*  W       (input) DOUBLE PRECISION array, dimension (K)
+*          The first K elements of this array contain the components
+*          of the deflation-adjusted updating vector.
+*
+*  S       (output) DOUBLE PRECISION array, dimension (LDS, K)
+*          Will contain the eigenvectors of the repaired matrix which
+*          will be stored for subsequent Z vector calculation and
+*          multiplied by the previously accumulated eigenvectors
+*          to update the system.
+*
+*  LDS     (input) INTEGER
+*          The leading dimension of S.  LDS >= max( 1, K ).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          > 0:  if INFO = 1, an eigenvalue did not converge
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Jeff Rutter, Computer Science Division, University of California
+*     at Berkeley, USA
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      INTEGER            I, J
+      DOUBLE PRECISION   TEMP
+*     ..
+*     .. External Functions ..
+      DOUBLE PRECISION   DLAMC3, DNRM2
+      EXTERNAL           DLAMC3, DNRM2
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DCOPY, DLAED4, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, SIGN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+*
+      IF( K.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( KSTART.LT.1 .OR. KSTART.GT.MAX( 1, K ) ) THEN
+         INFO = -2
+      ELSE IF( MAX( 1, KSTOP ).LT.KSTART .OR. KSTOP.GT.MAX( 1, K ) )
+     $          THEN
+         INFO = -3
+      ELSE IF( N.LT.K ) THEN
+         INFO = -4
+      ELSE IF( LDQ.LT.MAX( 1, K ) ) THEN
+         INFO = -7
+      ELSE IF( LDS.LT.MAX( 1, K ) ) THEN
+         INFO = -12
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DLAED9', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( K.EQ.0 )
+     $   RETURN
+*
+*     Modify values DLAMDA(i) to make sure all DLAMDA(i)-DLAMDA(j) can
+*     be computed with high relative accuracy (barring over/underflow).
+*     This is a problem on machines without a guard digit in
+*     add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2).
+*     The following code replaces DLAMDA(I) by 2*DLAMDA(I)-DLAMDA(I),
+*     which on any of these machines zeros out the bottommost
+*     bit of DLAMDA(I) if it is 1; this makes the subsequent
+*     subtractions DLAMDA(I)-DLAMDA(J) unproblematic when cancellation
+*     occurs. On binary machines with a guard digit (almost all
+*     machines) it does not change DLAMDA(I) at all. On hexadecimal
+*     and decimal machines with a guard digit, it slightly
+*     changes the bottommost bits of DLAMDA(I). It does not account
+*     for hexadecimal or decimal machines without guard digits
+*     (we know of none). We use a subroutine call to compute
+*     2*DLAMBDA(I) to prevent optimizing compilers from eliminating
+*     this code.
+*
+      DO 10 I = 1, N
+         DLAMDA( I ) = DLAMC3( DLAMDA( I ), DLAMDA( I ) ) - DLAMDA( I )
+   10 CONTINUE
+*
+      DO 20 J = KSTART, KSTOP
+         CALL DLAED4( K, J, DLAMDA, W, Q( 1, J ), RHO, D( J ), INFO )
+*
+*        If the zero finder fails, the computation is terminated.
+*
+         IF( INFO.NE.0 )
+     $      GO TO 120
+   20 CONTINUE
+*
+      IF( K.EQ.1 .OR. K.EQ.2 ) THEN
+         DO 40 I = 1, K
+            DO 30 J = 1, K
+               S( J, I ) = Q( J, I )
+   30       CONTINUE
+   40    CONTINUE
+         GO TO 120
+      END IF
+*
+*     Compute updated W.
+*
+      CALL DCOPY( K, W, 1, S, 1 )
+*
+*     Initialize W(I) = Q(I,I)
+*
+      CALL DCOPY( K, Q, LDQ+1, W, 1 )
+      DO 70 J = 1, K
+         DO 50 I = 1, J - 1
+            W( I ) = W( I )*( Q( I, J ) / ( DLAMDA( I )-DLAMDA( J ) ) )
+   50    CONTINUE
+         DO 60 I = J + 1, K
+            W( I ) = W( I )*( Q( I, J ) / ( DLAMDA( I )-DLAMDA( J ) ) )
+   60    CONTINUE
+   70 CONTINUE
+      DO 80 I = 1, K
+         W( I ) = SIGN( SQRT( -W( I ) ), S( I, 1 ) )
+   80 CONTINUE
+*
+*     Compute eigenvectors of the modified rank-1 modification.
+*
+      DO 110 J = 1, K
+         DO 90 I = 1, K
+            Q( I, J ) = W( I ) / Q( I, J )
+   90    CONTINUE
+         TEMP = DNRM2( K, Q( 1, J ), 1 )
+         DO 100 I = 1, K
+            S( I, J ) = Q( I, J ) / TEMP
+  100    CONTINUE
+  110 CONTINUE
+*
+  120 CONTINUE
+      RETURN
+*
+*     End of DLAED9
+*
+      END
diff --git a/costa/native/external/lapack/dlaeda.f b/costa/native/external/lapack/dlaeda.f
new file mode 100644
index 000000000..96bcbb867
--- /dev/null
+++ b/costa/native/external/lapack/dlaeda.f
@@ -0,0 +1,218 @@
+      SUBROUTINE DLAEDA( N, TLVLS, CURLVL, CURPBM, PRMPTR, PERM, GIVPTR,
+     $                   GIVCOL, GIVNUM, Q, QPTR, Z, ZTEMP, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      INTEGER            CURLVL, CURPBM, INFO, N, TLVLS
+*     ..
+*     .. Array Arguments ..
+      INTEGER            GIVCOL( 2, * ), GIVPTR( * ), PERM( * ),
+     $                   PRMPTR( * ), QPTR( * )
+      DOUBLE PRECISION   GIVNUM( 2, * ), Q( * ), Z( * ), ZTEMP( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLAEDA computes the Z vector corresponding to the merge step in the
+*  CURLVLth step of the merge process with TLVLS steps for the CURPBMth
+*  problem.
+*
+*  Arguments
+*  =========
+*
+*  N      (input) INTEGER
+*         The dimension of the symmetric tridiagonal matrix.  N >= 0.
+*
+*  TLVLS  (input) INTEGER
+*         The total number of merging levels in the overall divide and
+*         conquer tree.
+*
+*  CURLVL (input) INTEGER
+*         The current level in the overall merge routine,
+*         0 <= curlvl <= tlvls.
+*
+*  CURPBM (input) INTEGER
+*         The current problem in the current level in the overall
+*         merge routine (counting from upper left to lower right).
+*
+*  PRMPTR (input) INTEGER array, dimension (N lg N)
+*         Contains a list of pointers which indicate where in PERM a
+*         level's permutation is stored.  PRMPTR(i+1) - PRMPTR(i)
+*         indicates the size of the permutation and incidentally the
+*         size of the full, non-deflated problem.
+*
+*  PERM   (input) INTEGER array, dimension (N lg N)
+*         Contains the permutations (from deflation and sorting) to be
+*         applied to each eigenblock.
+*
+*  GIVPTR (input) INTEGER array, dimension (N lg N)
+*         Contains a list of pointers which indicate where in GIVCOL a
+*         level's Givens rotations are stored.  GIVPTR(i+1) - GIVPTR(i)
+*         indicates the number of Givens rotations.
+*
+*  GIVCOL (input) INTEGER array, dimension (2, N lg N)
+*         Each pair of numbers indicates a pair of columns to take place
+*         in a Givens rotation.
+*
+*  GIVNUM (input) DOUBLE PRECISION array, dimension (2, N lg N)
+*         Each number indicates the S value to be used in the
+*         corresponding Givens rotation.
+*
+*  Q      (input) DOUBLE PRECISION array, dimension (N**2)
+*         Contains the square eigenblocks from previous levels, the
+*         starting positions for blocks are given by QPTR.
+*
+*  QPTR   (input) INTEGER array, dimension (N+2)
+*         Contains a list of pointers which indicate where in Q an
+*         eigenblock is stored.  SQRT( QPTR(i+1) - QPTR(i) ) indicates
+*         the size of the block.
+*
+*  Z      (output) DOUBLE PRECISION array, dimension (N)
+*         On output this vector contains the updating vector (the last
+*         row of the first sub-eigenvector matrix and the first row of
+*         the second sub-eigenvector matrix).
+*
+*  ZTEMP  (workspace) DOUBLE PRECISION array, dimension (N)
+*
+*  INFO   (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Jeff Rutter, Computer Science Division, University of California
+*     at Berkeley, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, HALF, ONE
+      PARAMETER          ( ZERO = 0.0D0, HALF = 0.5D0, ONE = 1.0D0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            BSIZ1, BSIZ2, CURR, I, K, MID, PSIZ1, PSIZ2,
+     $                   PTR, ZPTR1
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DCOPY, DGEMV, DROT, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          DBLE, INT, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+*
+      IF( N.LT.0 ) THEN
+         INFO = -1
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DLAEDA', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Determine location of first number in second half.
+*
+      MID = N / 2 + 1
+*
+*     Gather last/first rows of appropriate eigenblocks into center of Z
+*
+      PTR = 1
+*
+*     Determine location of lowest level subproblem in the full storage
+*     scheme
+*
+      CURR = PTR + CURPBM*2**CURLVL + 2**( CURLVL-1 ) - 1
+*
+*     Determine size of these matrices.  We add HALF to the value of
+*     the SQRT in case the machine underestimates one of these square
+*     roots.
+*
+      BSIZ1 = INT( HALF+SQRT( DBLE( QPTR( CURR+1 )-QPTR( CURR ) ) ) )
+      BSIZ2 = INT( HALF+SQRT( DBLE( QPTR( CURR+2 )-QPTR( CURR+1 ) ) ) )
+      DO 10 K = 1, MID - BSIZ1 - 1
+         Z( K ) = ZERO
+   10 CONTINUE
+      CALL DCOPY( BSIZ1, Q( QPTR( CURR )+BSIZ1-1 ), BSIZ1,
+     $            Z( MID-BSIZ1 ), 1 )
+      CALL DCOPY( BSIZ2, Q( QPTR( CURR+1 ) ), BSIZ2, Z( MID ), 1 )
+      DO 20 K = MID + BSIZ2, N
+         Z( K ) = ZERO
+   20 CONTINUE
+*
+*     Loop thru remaining levels 1 -> CURLVL applying the Givens
+*     rotations and permutation and then multiplying the center matrices
+*     against the current Z.
+*
+      PTR = 2**TLVLS + 1
+      DO 70 K = 1, CURLVL - 1
+         CURR = PTR + CURPBM*2**( CURLVL-K ) + 2**( CURLVL-K-1 ) - 1
+         PSIZ1 = PRMPTR( CURR+1 ) - PRMPTR( CURR )
+         PSIZ2 = PRMPTR( CURR+2 ) - PRMPTR( CURR+1 )
+         ZPTR1 = MID - PSIZ1
+*
+*       Apply Givens at CURR and CURR+1
+*
+         DO 30 I = GIVPTR( CURR ), GIVPTR( CURR+1 ) - 1
+            CALL DROT( 1, Z( ZPTR1+GIVCOL( 1, I )-1 ), 1,
+     $                 Z( ZPTR1+GIVCOL( 2, I )-1 ), 1, GIVNUM( 1, I ),
+     $                 GIVNUM( 2, I ) )
+   30    CONTINUE
+         DO 40 I = GIVPTR( CURR+1 ), GIVPTR( CURR+2 ) - 1
+            CALL DROT( 1, Z( MID-1+GIVCOL( 1, I ) ), 1,
+     $                 Z( MID-1+GIVCOL( 2, I ) ), 1, GIVNUM( 1, I ),
+     $                 GIVNUM( 2, I ) )
+   40    CONTINUE
+         PSIZ1 = PRMPTR( CURR+1 ) - PRMPTR( CURR )
+         PSIZ2 = PRMPTR( CURR+2 ) - PRMPTR( CURR+1 )
+         DO 50 I = 0, PSIZ1 - 1
+            ZTEMP( I+1 ) = Z( ZPTR1+PERM( PRMPTR( CURR )+I )-1 )
+   50    CONTINUE
+         DO 60 I = 0, PSIZ2 - 1
+            ZTEMP( PSIZ1+I+1 ) = Z( MID+PERM( PRMPTR( CURR+1 )+I )-1 )
+   60    CONTINUE
+*
+*        Multiply Blocks at CURR and CURR+1
+*
+*        Determine size of these matrices.  We add HALF to the value of
+*        the SQRT in case the machine underestimates one of these
+*        square roots.
+*
+         BSIZ1 = INT( HALF+SQRT( DBLE( QPTR( CURR+1 )-QPTR( CURR ) ) ) )
+         BSIZ2 = INT( HALF+SQRT( DBLE( QPTR( CURR+2 )-QPTR( CURR+
+     $           1 ) ) ) )
+         IF( BSIZ1.GT.0 ) THEN
+            CALL DGEMV( 'T', BSIZ1, BSIZ1, ONE, Q( QPTR( CURR ) ),
+     $                  BSIZ1, ZTEMP( 1 ), 1, ZERO, Z( ZPTR1 ), 1 )
+         END IF
+         CALL DCOPY( PSIZ1-BSIZ1, ZTEMP( BSIZ1+1 ), 1, Z( ZPTR1+BSIZ1 ),
+     $               1 )
+         IF( BSIZ2.GT.0 ) THEN
+            CALL DGEMV( 'T', BSIZ2, BSIZ2, ONE, Q( QPTR( CURR+1 ) ),
+     $                  BSIZ2, ZTEMP( PSIZ1+1 ), 1, ZERO, Z( MID ), 1 )
+         END IF
+         CALL DCOPY( PSIZ2-BSIZ2, ZTEMP( PSIZ1+BSIZ2+1 ), 1,
+     $               Z( MID+BSIZ2 ), 1 )
+*
+         PTR = PTR + 2**( TLVLS-K )
+   70 CONTINUE
+*
+      RETURN
+*
+*     End of DLAEDA
+*
+      END
diff --git a/costa/native/external/lapack/dlaein.f b/costa/native/external/lapack/dlaein.f
new file mode 100644
index 000000000..4186b82be
--- /dev/null
+++ b/costa/native/external/lapack/dlaein.f
@@ -0,0 +1,532 @@
+      SUBROUTINE DLAEIN( RIGHTV, NOINIT, N, H, LDH, WR, WI, VR, VI, B,
+     $                   LDB, WORK, EPS3, SMLNUM, BIGNUM, INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      LOGICAL            NOINIT, RIGHTV
+      INTEGER            INFO, LDB, LDH, N
+      DOUBLE PRECISION   BIGNUM, EPS3, SMLNUM, WI, WR
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   B( LDB, * ), H( LDH, * ), VI( * ), VR( * ),
+     $                   WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLAEIN uses inverse iteration to find a right or left eigenvector
+*  corresponding to the eigenvalue (WR,WI) of a real upper Hessenberg
+*  matrix H.
+*
+*  Arguments
+*  =========
+*
+*  RIGHTV   (input) LOGICAL
+*          = .TRUE. : compute right eigenvector;
+*          = .FALSE.: compute left eigenvector.
+*
+*  NOINIT   (input) LOGICAL
+*          = .TRUE. : no initial vector supplied in (VR,VI).
+*          = .FALSE.: initial vector supplied in (VR,VI).
+*
+*  N       (input) INTEGER
+*          The order of the matrix H.  N >= 0.
+*
+*  H       (input) DOUBLE PRECISION array, dimension (LDH,N)
+*          The upper Hessenberg matrix H.
+*
+*  LDH     (input) INTEGER
+*          The leading dimension of the array H.  LDH >= max(1,N).
+*
+*  WR      (input) DOUBLE PRECISION
+*  WI      (input) DOUBLE PRECISION
+*          The real and imaginary parts of the eigenvalue of H whose
+*          corresponding right or left eigenvector is to be computed.
+*
+*  VR      (input/output) DOUBLE PRECISION array, dimension (N)
+*  VI      (input/output) DOUBLE PRECISION array, dimension (N)
+*          On entry, if NOINIT = .FALSE. and WI = 0.0, VR must contain
+*          a real starting vector for inverse iteration using the real
+*          eigenvalue WR; if NOINIT = .FALSE. and WI.ne.0.0, VR and VI
+*          must contain the real and imaginary parts of a complex
+*          starting vector for inverse iteration using the complex
+*          eigenvalue (WR,WI); otherwise VR and VI need not be set.
+*          On exit, if WI = 0.0 (real eigenvalue), VR contains the
+*          computed real eigenvector; if WI.ne.0.0 (complex eigenvalue),
+*          VR and VI contain the real and imaginary parts of the
+*          computed complex eigenvector. The eigenvector is normalized
+*          so that the component of largest magnitude has magnitude 1;
+*          here the magnitude of a complex number (x,y) is taken to be
+*          |x| + |y|.
+*          VI is not referenced if WI = 0.0.
+*
+*  B       (workspace) DOUBLE PRECISION array, dimension (LDB,N)
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= N+1.
+*
+*  WORK   (workspace) DOUBLE PRECISION array, dimension (N)
+*
+*  EPS3    (input) DOUBLE PRECISION
+*          A small machine-dependent value which is used to perturb
+*          close eigenvalues, and to replace zero pivots.
+*
+*  SMLNUM  (input) DOUBLE PRECISION
+*          A machine-dependent value close to the underflow threshold.
+*
+*  BIGNUM  (input) DOUBLE PRECISION
+*          A machine-dependent value close to the overflow threshold.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          = 1:  inverse iteration did not converge; VR is set to the
+*                last iterate, and so is VI if WI.ne.0.0.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE, TENTH
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0, TENTH = 1.0D-1 )
+*     ..
+*     .. Local Scalars ..
+      CHARACTER          NORMIN, TRANS
+      INTEGER            I, I1, I2, I3, IERR, ITS, J
+      DOUBLE PRECISION   ABSBII, ABSBJJ, EI, EJ, GROWTO, NORM, NRMSML,
+     $                   REC, ROOTN, SCALE, TEMP, VCRIT, VMAX, VNORM, W,
+     $                   W1, X, XI, XR, Y
+*     ..
+*     .. External Functions ..
+      INTEGER            IDAMAX
+      DOUBLE PRECISION   DASUM, DLAPY2, DNRM2
+      EXTERNAL           IDAMAX, DASUM, DLAPY2, DNRM2
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLADIV, DLATRS, DSCAL
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, MAX, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+*
+*     GROWTO is the threshold used in the acceptance test for an
+*     eigenvector.
+*
+      ROOTN = SQRT( DBLE( N ) )
+      GROWTO = TENTH / ROOTN
+      NRMSML = MAX( ONE, EPS3*ROOTN )*SMLNUM
+*
+*     Form B = H - (WR,WI)*I (except that the subdiagonal elements and
+*     the imaginary parts of the diagonal elements are not stored).
+*
+      DO 20 J = 1, N
+         DO 10 I = 1, J - 1
+            B( I, J ) = H( I, J )
+   10    CONTINUE
+         B( J, J ) = H( J, J ) - WR
+   20 CONTINUE
+*
+      IF( WI.EQ.ZERO ) THEN
+*
+*        Real eigenvalue.
+*
+         IF( NOINIT ) THEN
+*
+*           Set initial vector.
+*
+            DO 30 I = 1, N
+               VR( I ) = EPS3
+   30       CONTINUE
+         ELSE
+*
+*           Scale supplied initial vector.
+*
+            VNORM = DNRM2( N, VR, 1 )
+            CALL DSCAL( N, ( EPS3*ROOTN ) / MAX( VNORM, NRMSML ), VR,
+     $                  1 )
+         END IF
+*
+         IF( RIGHTV ) THEN
+*
+*           LU decomposition with partial pivoting of B, replacing zero
+*           pivots by EPS3.
+*
+            DO 60 I = 1, N - 1
+               EI = H( I+1, I )
+               IF( ABS( B( I, I ) ).LT.ABS( EI ) ) THEN
+*
+*                 Interchange rows and eliminate.
+*
+                  X = B( I, I ) / EI
+                  B( I, I ) = EI
+                  DO 40 J = I + 1, N
+                     TEMP = B( I+1, J )
+                     B( I+1, J ) = B( I, J ) - X*TEMP
+                     B( I, J ) = TEMP
+   40             CONTINUE
+               ELSE
+*
+*                 Eliminate without interchange.
+*
+                  IF( B( I, I ).EQ.ZERO )
+     $               B( I, I ) = EPS3
+                  X = EI / B( I, I )
+                  IF( X.NE.ZERO ) THEN
+                     DO 50 J = I + 1, N
+                        B( I+1, J ) = B( I+1, J ) - X*B( I, J )
+   50                CONTINUE
+                  END IF
+               END IF
+   60       CONTINUE
+            IF( B( N, N ).EQ.ZERO )
+     $         B( N, N ) = EPS3
+*
+            TRANS = 'N'
+*
+         ELSE
+*
+*           UL decomposition with partial pivoting of B, replacing zero
+*           pivots by EPS3.
+*
+            DO 90 J = N, 2, -1
+               EJ = H( J, J-1 )
+               IF( ABS( B( J, J ) ).LT.ABS( EJ ) ) THEN
+*
+*                 Interchange columns and eliminate.
+*
+                  X = B( J, J ) / EJ
+                  B( J, J ) = EJ
+                  DO 70 I = 1, J - 1
+                     TEMP = B( I, J-1 )
+                     B( I, J-1 ) = B( I, J ) - X*TEMP
+                     B( I, J ) = TEMP
+   70             CONTINUE
+               ELSE
+*
+*                 Eliminate without interchange.
+*
+                  IF( B( J, J ).EQ.ZERO )
+     $               B( J, J ) = EPS3
+                  X = EJ / B( J, J )
+                  IF( X.NE.ZERO ) THEN
+                     DO 80 I = 1, J - 1
+                        B( I, J-1 ) = B( I, J-1 ) - X*B( I, J )
+   80                CONTINUE
+                  END IF
+               END IF
+   90       CONTINUE
+            IF( B( 1, 1 ).EQ.ZERO )
+     $         B( 1, 1 ) = EPS3
+*
+            TRANS = 'T'
+*
+         END IF
+*
+         NORMIN = 'N'
+         DO 110 ITS = 1, N
+*
+*           Solve U*x = scale*v for a right eigenvector
+*             or U'*x = scale*v for a left eigenvector,
+*           overwriting x on v.
+*
+            CALL DLATRS( 'Upper', TRANS, 'Nonunit', NORMIN, N, B, LDB,
+     $                   VR, SCALE, WORK, IERR )
+            NORMIN = 'Y'
+*
+*           Test for sufficient growth in the norm of v.
+*
+            VNORM = DASUM( N, VR, 1 )
+            IF( VNORM.GE.GROWTO*SCALE )
+     $         GO TO 120
+*
+*           Choose new orthogonal starting vector and try again.
+*
+            TEMP = EPS3 / ( ROOTN+ONE )
+            VR( 1 ) = EPS3
+            DO 100 I = 2, N
+               VR( I ) = TEMP
+  100       CONTINUE
+            VR( N-ITS+1 ) = VR( N-ITS+1 ) - EPS3*ROOTN
+  110    CONTINUE
+*
+*        Failure to find eigenvector in N iterations.
+*
+         INFO = 1
+*
+  120    CONTINUE
+*
+*        Normalize eigenvector.
+*
+         I = IDAMAX( N, VR, 1 )
+         CALL DSCAL( N, ONE / ABS( VR( I ) ), VR, 1 )
+      ELSE
+*
+*        Complex eigenvalue.
+*
+         IF( NOINIT ) THEN
+*
+*           Set initial vector.
+*
+            DO 130 I = 1, N
+               VR( I ) = EPS3
+               VI( I ) = ZERO
+  130       CONTINUE
+         ELSE
+*
+*           Scale supplied initial vector.
+*
+            NORM = DLAPY2( DNRM2( N, VR, 1 ), DNRM2( N, VI, 1 ) )
+            REC = ( EPS3*ROOTN ) / MAX( NORM, NRMSML )
+            CALL DSCAL( N, REC, VR, 1 )
+            CALL DSCAL( N, REC, VI, 1 )
+         END IF
+*
+         IF( RIGHTV ) THEN
+*
+*           LU decomposition with partial pivoting of B, replacing zero
+*           pivots by EPS3.
+*
+*           The imaginary part of the (i,j)-th element of U is stored in
+*           B(j+1,i).
+*
+            B( 2, 1 ) = -WI
+            DO 140 I = 2, N
+               B( I+1, 1 ) = ZERO
+  140       CONTINUE
+*
+            DO 170 I = 1, N - 1
+               ABSBII = DLAPY2( B( I, I ), B( I+1, I ) )
+               EI = H( I+1, I )
+               IF( ABSBII.LT.ABS( EI ) ) THEN
+*
+*                 Interchange rows and eliminate.
+*
+                  XR = B( I, I ) / EI
+                  XI = B( I+1, I ) / EI
+                  B( I, I ) = EI
+                  B( I+1, I ) = ZERO
+                  DO 150 J = I + 1, N
+                     TEMP = B( I+1, J )
+                     B( I+1, J ) = B( I, J ) - XR*TEMP
+                     B( J+1, I+1 ) = B( J+1, I ) - XI*TEMP
+                     B( I, J ) = TEMP
+                     B( J+1, I ) = ZERO
+  150             CONTINUE
+                  B( I+2, I ) = -WI
+                  B( I+1, I+1 ) = B( I+1, I+1 ) - XI*WI
+                  B( I+2, I+1 ) = B( I+2, I+1 ) + XR*WI
+               ELSE
+*
+*                 Eliminate without interchanging rows.
+*
+                  IF( ABSBII.EQ.ZERO ) THEN
+                     B( I, I ) = EPS3
+                     B( I+1, I ) = ZERO
+                     ABSBII = EPS3
+                  END IF
+                  EI = ( EI / ABSBII ) / ABSBII
+                  XR = B( I, I )*EI
+                  XI = -B( I+1, I )*EI
+                  DO 160 J = I + 1, N
+                     B( I+1, J ) = B( I+1, J ) - XR*B( I, J ) +
+     $                             XI*B( J+1, I )
+                     B( J+1, I+1 ) = -XR*B( J+1, I ) - XI*B( I, J )
+  160             CONTINUE
+                  B( I+2, I+1 ) = B( I+2, I+1 ) - WI
+               END IF
+*
+*              Compute 1-norm of offdiagonal elements of i-th row.
+*
+               WORK( I ) = DASUM( N-I, B( I, I+1 ), LDB ) +
+     $                     DASUM( N-I, B( I+2, I ), 1 )
+  170       CONTINUE
+            IF( B( N, N ).EQ.ZERO .AND. B( N+1, N ).EQ.ZERO )
+     $         B( N, N ) = EPS3
+            WORK( N ) = ZERO
+*
+            I1 = N
+            I2 = 1
+            I3 = -1
+         ELSE
+*
+*           UL decomposition with partial pivoting of conjg(B),
+*           replacing zero pivots by EPS3.
+*
+*           The imaginary part of the (i,j)-th element of U is stored in
+*           B(j+1,i).
+*
+            B( N+1, N ) = WI
+            DO 180 J = 1, N - 1
+               B( N+1, J ) = ZERO
+  180       CONTINUE
+*
+            DO 210 J = N, 2, -1
+               EJ = H( J, J-1 )
+               ABSBJJ = DLAPY2( B( J, J ), B( J+1, J ) )
+               IF( ABSBJJ.LT.ABS( EJ ) ) THEN
+*
+*                 Interchange columns and eliminate
+*
+                  XR = B( J, J ) / EJ
+                  XI = B( J+1, J ) / EJ
+                  B( J, J ) = EJ
+                  B( J+1, J ) = ZERO
+                  DO 190 I = 1, J - 1
+                     TEMP = B( I, J-1 )
+                     B( I, J-1 ) = B( I, J ) - XR*TEMP
+                     B( J, I ) = B( J+1, I ) - XI*TEMP
+                     B( I, J ) = TEMP
+                     B( J+1, I ) = ZERO
+  190             CONTINUE
+                  B( J+1, J-1 ) = WI
+                  B( J-1, J-1 ) = B( J-1, J-1 ) + XI*WI
+                  B( J, J-1 ) = B( J, J-1 ) - XR*WI
+               ELSE
+*
+*                 Eliminate without interchange.
+*
+                  IF( ABSBJJ.EQ.ZERO ) THEN
+                     B( J, J ) = EPS3
+                     B( J+1, J ) = ZERO
+                     ABSBJJ = EPS3
+                  END IF
+                  EJ = ( EJ / ABSBJJ ) / ABSBJJ
+                  XR = B( J, J )*EJ
+                  XI = -B( J+1, J )*EJ
+                  DO 200 I = 1, J - 1
+                     B( I, J-1 ) = B( I, J-1 ) - XR*B( I, J ) +
+     $                             XI*B( J+1, I )
+                     B( J, I ) = -XR*B( J+1, I ) - XI*B( I, J )
+  200             CONTINUE
+                  B( J, J-1 ) = B( J, J-1 ) + WI
+               END IF
+*
+*              Compute 1-norm of offdiagonal elements of j-th column.
+*
+               WORK( J ) = DASUM( J-1, B( 1, J ), 1 ) +
+     $                     DASUM( J-1, B( J+1, 1 ), LDB )
+  210       CONTINUE
+            IF( B( 1, 1 ).EQ.ZERO .AND. B( 2, 1 ).EQ.ZERO )
+     $         B( 1, 1 ) = EPS3
+            WORK( 1 ) = ZERO
+*
+            I1 = 1
+            I2 = N
+            I3 = 1
+         END IF
+*
+         DO 270 ITS = 1, N
+            SCALE = ONE
+            VMAX = ONE
+            VCRIT = BIGNUM
+*
+*           Solve U*(xr,xi) = scale*(vr,vi) for a right eigenvector,
+*             or U'*(xr,xi) = scale*(vr,vi) for a left eigenvector,
+*           overwriting (xr,xi) on (vr,vi).
+*
+            DO 250 I = I1, I2, I3
+*
+               IF( WORK( I ).GT.VCRIT ) THEN
+                  REC = ONE / VMAX
+                  CALL DSCAL( N, REC, VR, 1 )
+                  CALL DSCAL( N, REC, VI, 1 )
+                  SCALE = SCALE*REC
+                  VMAX = ONE
+                  VCRIT = BIGNUM
+               END IF
+*
+               XR = VR( I )
+               XI = VI( I )
+               IF( RIGHTV ) THEN
+                  DO 220 J = I + 1, N
+                     XR = XR - B( I, J )*VR( J ) + B( J+1, I )*VI( J )
+                     XI = XI - B( I, J )*VI( J ) - B( J+1, I )*VR( J )
+  220             CONTINUE
+               ELSE
+                  DO 230 J = 1, I - 1
+                     XR = XR - B( J, I )*VR( J ) + B( I+1, J )*VI( J )
+                     XI = XI - B( J, I )*VI( J ) - B( I+1, J )*VR( J )
+  230             CONTINUE
+               END IF
+*
+               W = ABS( B( I, I ) ) + ABS( B( I+1, I ) )
+               IF( W.GT.SMLNUM ) THEN
+                  IF( W.LT.ONE ) THEN
+                     W1 = ABS( XR ) + ABS( XI )
+                     IF( W1.GT.W*BIGNUM ) THEN
+                        REC = ONE / W1
+                        CALL DSCAL( N, REC, VR, 1 )
+                        CALL DSCAL( N, REC, VI, 1 )
+                        XR = VR( I )
+                        XI = VI( I )
+                        SCALE = SCALE*REC
+                        VMAX = VMAX*REC
+                     END IF
+                  END IF
+*
+*                 Divide by diagonal element of B.
+*
+                  CALL DLADIV( XR, XI, B( I, I ), B( I+1, I ), VR( I ),
+     $                         VI( I ) )
+                  VMAX = MAX( ABS( VR( I ) )+ABS( VI( I ) ), VMAX )
+                  VCRIT = BIGNUM / VMAX
+               ELSE
+                  DO 240 J = 1, N
+                     VR( J ) = ZERO
+                     VI( J ) = ZERO
+  240             CONTINUE
+                  VR( I ) = ONE
+                  VI( I ) = ONE
+                  SCALE = ZERO
+                  VMAX = ONE
+                  VCRIT = BIGNUM
+               END IF
+  250       CONTINUE
+*
+*           Test for sufficient growth in the norm of (VR,VI).
+*
+            VNORM = DASUM( N, VR, 1 ) + DASUM( N, VI, 1 )
+            IF( VNORM.GE.GROWTO*SCALE )
+     $         GO TO 280
+*
+*           Choose a new orthogonal starting vector and try again.
+*
+            Y = EPS3 / ( ROOTN+ONE )
+            VR( 1 ) = EPS3
+            VI( 1 ) = ZERO
+*
+            DO 260 I = 2, N
+               VR( I ) = Y
+               VI( I ) = ZERO
+  260       CONTINUE
+            VR( N-ITS+1 ) = VR( N-ITS+1 ) - EPS3*ROOTN
+  270    CONTINUE
+*
+*        Failure to find eigenvector in N iterations
+*
+         INFO = 1
+*
+  280    CONTINUE
+*
+*        Normalize eigenvector.
+*
+         VNORM = ZERO
+         DO 290 I = 1, N
+            VNORM = MAX( VNORM, ABS( VR( I ) )+ABS( VI( I ) ) )
+  290    CONTINUE
+         CALL DSCAL( N, ONE / VNORM, VR, 1 )
+         CALL DSCAL( N, ONE / VNORM, VI, 1 )
+*
+      END IF
+*
+      RETURN
+*
+*     End of DLAEIN
+*
+      END
diff --git a/costa/native/external/lapack/dlaev2.f b/costa/native/external/lapack/dlaev2.f
new file mode 100644
index 000000000..7d7219de4
--- /dev/null
+++ b/costa/native/external/lapack/dlaev2.f
@@ -0,0 +1,170 @@
+      SUBROUTINE DLAEV2( A, B, C, RT1, RT2, CS1, SN1 )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      DOUBLE PRECISION   A, B, C, CS1, RT1, RT2, SN1
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLAEV2 computes the eigendecomposition of a 2-by-2 symmetric matrix
+*     [  A   B  ]
+*     [  B   C  ].
+*  On return, RT1 is the eigenvalue of larger absolute value, RT2 is the
+*  eigenvalue of smaller absolute value, and (CS1,SN1) is the unit right
+*  eigenvector for RT1, giving the decomposition
+*
+*     [ CS1  SN1 ] [  A   B  ] [ CS1 -SN1 ]  =  [ RT1  0  ]
+*     [-SN1  CS1 ] [  B   C  ] [ SN1  CS1 ]     [  0  RT2 ].
+*
+*  Arguments
+*  =========
+*
+*  A       (input) DOUBLE PRECISION
+*          The (1,1) element of the 2-by-2 matrix.
+*
+*  B       (input) DOUBLE PRECISION
+*          The (1,2) element and the conjugate of the (2,1) element of
+*          the 2-by-2 matrix.
+*
+*  C       (input) DOUBLE PRECISION
+*          The (2,2) element of the 2-by-2 matrix.
+*
+*  RT1     (output) DOUBLE PRECISION
+*          The eigenvalue of larger absolute value.
+*
+*  RT2     (output) DOUBLE PRECISION
+*          The eigenvalue of smaller absolute value.
+*
+*  CS1     (output) DOUBLE PRECISION
+*  SN1     (output) DOUBLE PRECISION
+*          The vector (CS1, SN1) is a unit right eigenvector for RT1.
+*
+*  Further Details
+*  ===============
+*
+*  RT1 is accurate to a few ulps barring over/underflow.
+*
+*  RT2 may be inaccurate if there is massive cancellation in the
+*  determinant A*C-B*B; higher precision or correctly rounded or
+*  correctly truncated arithmetic would be needed to compute RT2
+*  accurately in all cases.
+*
+*  CS1 and SN1 are accurate to a few ulps barring over/underflow.
+*
+*  Overflow is possible only if RT1 is within a factor of 5 of overflow.
+*  Underflow is harmless if the input data is 0 or exceeds
+*     underflow_threshold / macheps.
+*
+* =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE
+      PARAMETER          ( ONE = 1.0D0 )
+      DOUBLE PRECISION   TWO
+      PARAMETER          ( TWO = 2.0D0 )
+      DOUBLE PRECISION   ZERO
+      PARAMETER          ( ZERO = 0.0D0 )
+      DOUBLE PRECISION   HALF
+      PARAMETER          ( HALF = 0.5D0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            SGN1, SGN2
+      DOUBLE PRECISION   AB, ACMN, ACMX, ACS, ADF, CS, CT, DF, RT, SM,
+     $                   TB, TN
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Compute the eigenvalues
+*
+      SM = A + C
+      DF = A - C
+      ADF = ABS( DF )
+      TB = B + B
+      AB = ABS( TB )
+      IF( ABS( A ).GT.ABS( C ) ) THEN
+         ACMX = A
+         ACMN = C
+      ELSE
+         ACMX = C
+         ACMN = A
+      END IF
+      IF( ADF.GT.AB ) THEN
+         RT = ADF*SQRT( ONE+( AB / ADF )**2 )
+      ELSE IF( ADF.LT.AB ) THEN
+         RT = AB*SQRT( ONE+( ADF / AB )**2 )
+      ELSE
+*
+*        Includes case AB=ADF=0
+*
+         RT = AB*SQRT( TWO )
+      END IF
+      IF( SM.LT.ZERO ) THEN
+         RT1 = HALF*( SM-RT )
+         SGN1 = -1
+*
+*        Order of execution important.
+*        To get fully accurate smaller eigenvalue,
+*        next line needs to be executed in higher precision.
+*
+         RT2 = ( ACMX / RT1 )*ACMN - ( B / RT1 )*B
+      ELSE IF( SM.GT.ZERO ) THEN
+         RT1 = HALF*( SM+RT )
+         SGN1 = 1
+*
+*        Order of execution important.
+*        To get fully accurate smaller eigenvalue,
+*        next line needs to be executed in higher precision.
+*
+         RT2 = ( ACMX / RT1 )*ACMN - ( B / RT1 )*B
+      ELSE
+*
+*        Includes case RT1 = RT2 = 0
+*
+         RT1 = HALF*RT
+         RT2 = -HALF*RT
+         SGN1 = 1
+      END IF
+*
+*     Compute the eigenvector
+*
+      IF( DF.GE.ZERO ) THEN
+         CS = DF + RT
+         SGN2 = 1
+      ELSE
+         CS = DF - RT
+         SGN2 = -1
+      END IF
+      ACS = ABS( CS )
+      IF( ACS.GT.AB ) THEN
+         CT = -TB / CS
+         SN1 = ONE / SQRT( ONE+CT*CT )
+         CS1 = CT*SN1
+      ELSE
+         IF( AB.EQ.ZERO ) THEN
+            CS1 = ONE
+            SN1 = ZERO
+         ELSE
+            TN = -CS / TB
+            CS1 = ONE / SQRT( ONE+TN*TN )
+            SN1 = TN*CS1
+         END IF
+      END IF
+      IF( SGN1.EQ.SGN2 ) THEN
+         TN = CS1
+         CS1 = -SN1
+         SN1 = TN
+      END IF
+      RETURN
+*
+*     End of DLAEV2
+*
+      END
diff --git a/costa/native/external/lapack/dlaexc.f b/costa/native/external/lapack/dlaexc.f
new file mode 100644
index 000000000..05f80dcc6
--- /dev/null
+++ b/costa/native/external/lapack/dlaexc.f
@@ -0,0 +1,355 @@
+      SUBROUTINE DLAEXC( WANTQ, N, T, LDT, Q, LDQ, J1, N1, N2, WORK,
+     $                   INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      LOGICAL            WANTQ
+      INTEGER            INFO, J1, LDQ, LDT, N, N1, N2
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   Q( LDQ, * ), T( LDT, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLAEXC swaps adjacent diagonal blocks T11 and T22 of order 1 or 2 in
+*  an upper quasi-triangular matrix T by an orthogonal similarity
+*  transformation.
+*
+*  T must be in Schur canonical form, that is, block upper triangular
+*  with 1-by-1 and 2-by-2 diagonal blocks; each 2-by-2 diagonal block
+*  has its diagonal elemnts equal and its off-diagonal elements of
+*  opposite sign.
+*
+*  Arguments
+*  =========
+*
+*  WANTQ   (input) LOGICAL
+*          = .TRUE. : accumulate the transformation in the matrix Q;
+*          = .FALSE.: do not accumulate the transformation.
+*
+*  N       (input) INTEGER
+*          The order of the matrix T. N >= 0.
+*
+*  T       (input/output) DOUBLE PRECISION array, dimension (LDT,N)
+*          On entry, the upper quasi-triangular matrix T, in Schur
+*          canonical form.
+*          On exit, the updated matrix T, again in Schur canonical form.
+*
+*  LDT     (input)  INTEGER
+*          The leading dimension of the array T. LDT >= max(1,N).
+*
+*  Q       (input/output) DOUBLE PRECISION array, dimension (LDQ,N)
+*          On entry, if WANTQ is .TRUE., the orthogonal matrix Q.
+*          On exit, if WANTQ is .TRUE., the updated matrix Q.
+*          If WANTQ is .FALSE., Q is not referenced.
+*
+*  LDQ     (input) INTEGER
+*          The leading dimension of the array Q.
+*          LDQ >= 1; and if WANTQ is .TRUE., LDQ >= N.
+*
+*  J1      (input) INTEGER
+*          The index of the first row of the first block T11.
+*
+*  N1      (input) INTEGER
+*          The order of the first block T11. N1 = 0, 1 or 2.
+*
+*  N2      (input) INTEGER
+*          The order of the second block T22. N2 = 0, 1 or 2.
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          = 1: the transformed matrix T would be too far from Schur
+*               form; the blocks are not swapped and T and Q are
+*               unchanged.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+      DOUBLE PRECISION   TEN
+      PARAMETER          ( TEN = 1.0D+1 )
+      INTEGER            LDD, LDX
+      PARAMETER          ( LDD = 4, LDX = 2 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            IERR, J2, J3, J4, K, ND
+      DOUBLE PRECISION   CS, DNORM, EPS, SCALE, SMLNUM, SN, T11, T22,
+     $                   T33, TAU, TAU1, TAU2, TEMP, THRESH, WI1, WI2,
+     $                   WR1, WR2, XNORM
+*     ..
+*     .. Local Arrays ..
+      DOUBLE PRECISION   D( LDD, 4 ), U( 3 ), U1( 3 ), U2( 3 ),
+     $                   X( LDX, 2 )
+*     ..
+*     .. External Functions ..
+      DOUBLE PRECISION   DLAMCH, DLANGE
+      EXTERNAL           DLAMCH, DLANGE
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLACPY, DLANV2, DLARFG, DLARFX, DLARTG, DLASY2,
+     $                   DROT
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. N1.EQ.0 .OR. N2.EQ.0 )
+     $   RETURN
+      IF( J1+N1.GT.N )
+     $   RETURN
+*
+      J2 = J1 + 1
+      J3 = J1 + 2
+      J4 = J1 + 3
+*
+      IF( N1.EQ.1 .AND. N2.EQ.1 ) THEN
+*
+*        Swap two 1-by-1 blocks.
+*
+         T11 = T( J1, J1 )
+         T22 = T( J2, J2 )
+*
+*        Determine the transformation to perform the interchange.
+*
+         CALL DLARTG( T( J1, J2 ), T22-T11, CS, SN, TEMP )
+*
+*        Apply transformation to the matrix T.
+*
+         IF( J3.LE.N )
+     $      CALL DROT( N-J1-1, T( J1, J3 ), LDT, T( J2, J3 ), LDT, CS,
+     $                 SN )
+         CALL DROT( J1-1, T( 1, J1 ), 1, T( 1, J2 ), 1, CS, SN )
+*
+         T( J1, J1 ) = T22
+         T( J2, J2 ) = T11
+*
+         IF( WANTQ ) THEN
+*
+*           Accumulate transformation in the matrix Q.
+*
+            CALL DROT( N, Q( 1, J1 ), 1, Q( 1, J2 ), 1, CS, SN )
+         END IF
+*
+      ELSE
+*
+*        Swapping involves at least one 2-by-2 block.
+*
+*        Copy the diagonal block of order N1+N2 to the local array D
+*        and compute its norm.
+*
+         ND = N1 + N2
+         CALL DLACPY( 'Full', ND, ND, T( J1, J1 ), LDT, D, LDD )
+         DNORM = DLANGE( 'Max', ND, ND, D, LDD, WORK )
+*
+*        Compute machine-dependent threshold for test for accepting
+*        swap.
+*
+         EPS = DLAMCH( 'P' )
+         SMLNUM = DLAMCH( 'S' ) / EPS
+         THRESH = MAX( TEN*EPS*DNORM, SMLNUM )
+*
+*        Solve T11*X - X*T22 = scale*T12 for X.
+*
+         CALL DLASY2( .FALSE., .FALSE., -1, N1, N2, D, LDD,
+     $                D( N1+1, N1+1 ), LDD, D( 1, N1+1 ), LDD, SCALE, X,
+     $                LDX, XNORM, IERR )
+*
+*        Swap the adjacent diagonal blocks.
+*
+         K = N1 + N1 + N2 - 3
+         GO TO ( 10, 20, 30 )K
+*
+   10    CONTINUE
+*
+*        N1 = 1, N2 = 2: generate elementary reflector H so that:
+*
+*        ( scale, X11, X12 ) H = ( 0, 0, * )
+*
+         U( 1 ) = SCALE
+         U( 2 ) = X( 1, 1 )
+         U( 3 ) = X( 1, 2 )
+         CALL DLARFG( 3, U( 3 ), U, 1, TAU )
+         U( 3 ) = ONE
+         T11 = T( J1, J1 )
+*
+*        Perform swap provisionally on diagonal block in D.
+*
+         CALL DLARFX( 'L', 3, 3, U, TAU, D, LDD, WORK )
+         CALL DLARFX( 'R', 3, 3, U, TAU, D, LDD, WORK )
+*
+*        Test whether to reject swap.
+*
+         IF( MAX( ABS( D( 3, 1 ) ), ABS( D( 3, 2 ) ), ABS( D( 3,
+     $       3 )-T11 ) ).GT.THRESH )GO TO 50
+*
+*        Accept swap: apply transformation to the entire matrix T.
+*
+         CALL DLARFX( 'L', 3, N-J1+1, U, TAU, T( J1, J1 ), LDT, WORK )
+         CALL DLARFX( 'R', J2, 3, U, TAU, T( 1, J1 ), LDT, WORK )
+*
+         T( J3, J1 ) = ZERO
+         T( J3, J2 ) = ZERO
+         T( J3, J3 ) = T11
+*
+         IF( WANTQ ) THEN
+*
+*           Accumulate transformation in the matrix Q.
+*
+            CALL DLARFX( 'R', N, 3, U, TAU, Q( 1, J1 ), LDQ, WORK )
+         END IF
+         GO TO 40
+*
+   20    CONTINUE
+*
+*        N1 = 2, N2 = 1: generate elementary reflector H so that:
+*
+*        H (  -X11 ) = ( * )
+*          (  -X21 ) = ( 0 )
+*          ( scale ) = ( 0 )
+*
+         U( 1 ) = -X( 1, 1 )
+         U( 2 ) = -X( 2, 1 )
+         U( 3 ) = SCALE
+         CALL DLARFG( 3, U( 1 ), U( 2 ), 1, TAU )
+         U( 1 ) = ONE
+         T33 = T( J3, J3 )
+*
+*        Perform swap provisionally on diagonal block in D.
+*
+         CALL DLARFX( 'L', 3, 3, U, TAU, D, LDD, WORK )
+         CALL DLARFX( 'R', 3, 3, U, TAU, D, LDD, WORK )
+*
+*        Test whether to reject swap.
+*
+         IF( MAX( ABS( D( 2, 1 ) ), ABS( D( 3, 1 ) ), ABS( D( 1,
+     $       1 )-T33 ) ).GT.THRESH )GO TO 50
+*
+*        Accept swap: apply transformation to the entire matrix T.
+*
+         CALL DLARFX( 'R', J3, 3, U, TAU, T( 1, J1 ), LDT, WORK )
+         CALL DLARFX( 'L', 3, N-J1, U, TAU, T( J1, J2 ), LDT, WORK )
+*
+         T( J1, J1 ) = T33
+         T( J2, J1 ) = ZERO
+         T( J3, J1 ) = ZERO
+*
+         IF( WANTQ ) THEN
+*
+*           Accumulate transformation in the matrix Q.
+*
+            CALL DLARFX( 'R', N, 3, U, TAU, Q( 1, J1 ), LDQ, WORK )
+         END IF
+         GO TO 40
+*
+   30    CONTINUE
+*
+*        N1 = 2, N2 = 2: generate elementary reflectors H(1) and H(2) so
+*        that:
+*
+*        H(2) H(1) (  -X11  -X12 ) = (  *  * )
+*                  (  -X21  -X22 )   (  0  * )
+*                  ( scale    0  )   (  0  0 )
+*                  (    0  scale )   (  0  0 )
+*
+         U1( 1 ) = -X( 1, 1 )
+         U1( 2 ) = -X( 2, 1 )
+         U1( 3 ) = SCALE
+         CALL DLARFG( 3, U1( 1 ), U1( 2 ), 1, TAU1 )
+         U1( 1 ) = ONE
+*
+         TEMP = -TAU1*( X( 1, 2 )+U1( 2 )*X( 2, 2 ) )
+         U2( 1 ) = -TEMP*U1( 2 ) - X( 2, 2 )
+         U2( 2 ) = -TEMP*U1( 3 )
+         U2( 3 ) = SCALE
+         CALL DLARFG( 3, U2( 1 ), U2( 2 ), 1, TAU2 )
+         U2( 1 ) = ONE
+*
+*        Perform swap provisionally on diagonal block in D.
+*
+         CALL DLARFX( 'L', 3, 4, U1, TAU1, D, LDD, WORK )
+         CALL DLARFX( 'R', 4, 3, U1, TAU1, D, LDD, WORK )
+         CALL DLARFX( 'L', 3, 4, U2, TAU2, D( 2, 1 ), LDD, WORK )
+         CALL DLARFX( 'R', 4, 3, U2, TAU2, D( 1, 2 ), LDD, WORK )
+*
+*        Test whether to reject swap.
+*
+         IF( MAX( ABS( D( 3, 1 ) ), ABS( D( 3, 2 ) ), ABS( D( 4, 1 ) ),
+     $       ABS( D( 4, 2 ) ) ).GT.THRESH )GO TO 50
+*
+*        Accept swap: apply transformation to the entire matrix T.
+*
+         CALL DLARFX( 'L', 3, N-J1+1, U1, TAU1, T( J1, J1 ), LDT, WORK )
+         CALL DLARFX( 'R', J4, 3, U1, TAU1, T( 1, J1 ), LDT, WORK )
+         CALL DLARFX( 'L', 3, N-J1+1, U2, TAU2, T( J2, J1 ), LDT, WORK )
+         CALL DLARFX( 'R', J4, 3, U2, TAU2, T( 1, J2 ), LDT, WORK )
+*
+         T( J3, J1 ) = ZERO
+         T( J3, J2 ) = ZERO
+         T( J4, J1 ) = ZERO
+         T( J4, J2 ) = ZERO
+*
+         IF( WANTQ ) THEN
+*
+*           Accumulate transformation in the matrix Q.
+*
+            CALL DLARFX( 'R', N, 3, U1, TAU1, Q( 1, J1 ), LDQ, WORK )
+            CALL DLARFX( 'R', N, 3, U2, TAU2, Q( 1, J2 ), LDQ, WORK )
+         END IF
+*
+   40    CONTINUE
+*
+         IF( N2.EQ.2 ) THEN
+*
+*           Standardize new 2-by-2 block T11
+*
+            CALL DLANV2( T( J1, J1 ), T( J1, J2 ), T( J2, J1 ),
+     $                   T( J2, J2 ), WR1, WI1, WR2, WI2, CS, SN )
+            CALL DROT( N-J1-1, T( J1, J1+2 ), LDT, T( J2, J1+2 ), LDT,
+     $                 CS, SN )
+            CALL DROT( J1-1, T( 1, J1 ), 1, T( 1, J2 ), 1, CS, SN )
+            IF( WANTQ )
+     $         CALL DROT( N, Q( 1, J1 ), 1, Q( 1, J2 ), 1, CS, SN )
+         END IF
+*
+         IF( N1.EQ.2 ) THEN
+*
+*           Standardize new 2-by-2 block T22
+*
+            J3 = J1 + N2
+            J4 = J3 + 1
+            CALL DLANV2( T( J3, J3 ), T( J3, J4 ), T( J4, J3 ),
+     $                   T( J4, J4 ), WR1, WI1, WR2, WI2, CS, SN )
+            IF( J3+2.LE.N )
+     $         CALL DROT( N-J3-1, T( J3, J3+2 ), LDT, T( J4, J3+2 ),
+     $                    LDT, CS, SN )
+            CALL DROT( J3-1, T( 1, J3 ), 1, T( 1, J4 ), 1, CS, SN )
+            IF( WANTQ )
+     $         CALL DROT( N, Q( 1, J3 ), 1, Q( 1, J4 ), 1, CS, SN )
+         END IF
+*
+      END IF
+      RETURN
+*
+*     Exit with INFO = 1 if swap was rejected.
+*
+   50 CONTINUE
+      INFO = 1
+      RETURN
+*
+*     End of DLAEXC
+*
+      END
diff --git a/costa/native/external/lapack/dlag2.f b/costa/native/external/lapack/dlag2.f
new file mode 100644
index 000000000..3cce61bb1
--- /dev/null
+++ b/costa/native/external/lapack/dlag2.f
@@ -0,0 +1,301 @@
+      SUBROUTINE DLAG2( A, LDA, B, LDB, SAFMIN, SCALE1, SCALE2, WR1,
+     $                  WR2, WI )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993
+*
+*     .. Scalar Arguments ..
+      INTEGER            LDA, LDB
+      DOUBLE PRECISION   SAFMIN, SCALE1, SCALE2, WI, WR1, WR2
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLAG2 computes the eigenvalues of a 2 x 2 generalized eigenvalue
+*  problem  A - w B, with scaling as necessary to avoid over-/underflow.
+*
+*  The scaling factor "s" results in a modified eigenvalue equation
+*
+*      s A - w B
+*
+*  where  s  is a non-negative scaling factor chosen so that  w,  w B,
+*  and  s A  do not overflow and, if possible, do not underflow, either.
+*
+*  Arguments
+*  =========
+*
+*  A       (input) DOUBLE PRECISION array, dimension (LDA, 2)
+*          On entry, the 2 x 2 matrix A.  It is assumed that its 1-norm
+*          is less than 1/SAFMIN.  Entries less than
+*          sqrt(SAFMIN)*norm(A) are subject to being treated as zero.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= 2.
+*
+*  B       (input) DOUBLE PRECISION array, dimension (LDB, 2)
+*          On entry, the 2 x 2 upper triangular matrix B.  It is
+*          assumed that the one-norm of B is less than 1/SAFMIN.  The
+*          diagonals should be at least sqrt(SAFMIN) times the largest
+*          element of B (in absolute value); if a diagonal is smaller
+*          than that, then  +/- sqrt(SAFMIN) will be used instead of
+*          that diagonal.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= 2.
+*
+*  SAFMIN  (input) DOUBLE PRECISION
+*          The smallest positive number s.t. 1/SAFMIN does not
+*          overflow.  (This should always be DLAMCH('S') -- it is an
+*          argument in order to avoid having to call DLAMCH frequently.)
+*
+*  SCALE1  (output) DOUBLE PRECISION
+*          A scaling factor used to avoid over-/underflow in the
+*          eigenvalue equation which defines the first eigenvalue.  If
+*          the eigenvalues are complex, then the eigenvalues are
+*          ( WR1  +/-  WI i ) / SCALE1  (which may lie outside the
+*          exponent range of the machine), SCALE1=SCALE2, and SCALE1
+*          will always be positive.  If the eigenvalues are real, then
+*          the first (real) eigenvalue is  WR1 / SCALE1 , but this may
+*          overflow or underflow, and in fact, SCALE1 may be zero or
+*          less than the underflow threshhold if the exact eigenvalue
+*          is sufficiently large.
+*
+*  SCALE2  (output) DOUBLE PRECISION
+*          A scaling factor used to avoid over-/underflow in the
+*          eigenvalue equation which defines the second eigenvalue.  If
+*          the eigenvalues are complex, then SCALE2=SCALE1.  If the
+*          eigenvalues are real, then the second (real) eigenvalue is
+*          WR2 / SCALE2 , but this may overflow or underflow, and in
+*          fact, SCALE2 may be zero or less than the underflow
+*          threshhold if the exact eigenvalue is sufficiently large.
+*
+*  WR1     (output) DOUBLE PRECISION
+*          If the eigenvalue is real, then WR1 is SCALE1 times the
+*          eigenvalue closest to the (2,2) element of A B**(-1).  If the
+*          eigenvalue is complex, then WR1=WR2 is SCALE1 times the real
+*          part of the eigenvalues.
+*
+*  WR2     (output) DOUBLE PRECISION
+*          If the eigenvalue is real, then WR2 is SCALE2 times the
+*          other eigenvalue.  If the eigenvalue is complex, then
+*          WR1=WR2 is SCALE1 times the real part of the eigenvalues.
+*
+*  WI      (output) DOUBLE PRECISION
+*          If the eigenvalue is real, then WI is zero.  If the
+*          eigenvalue is complex, then WI is SCALE1 times the imaginary
+*          part of the eigenvalues.  WI will always be non-negative.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE, TWO
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0, TWO = 2.0D+0 )
+      DOUBLE PRECISION   HALF
+      PARAMETER          ( HALF = ONE / TWO )
+      DOUBLE PRECISION   FUZZY1
+      PARAMETER          ( FUZZY1 = ONE+1.0D-5 )
+*     ..
+*     .. Local Scalars ..
+      DOUBLE PRECISION   A11, A12, A21, A22, ABI22, ANORM, AS11, AS12,
+     $                   AS22, ASCALE, B11, B12, B22, BINV11, BINV22,
+     $                   BMIN, BNORM, BSCALE, BSIZE, C1, C2, C3, C4, C5,
+     $                   DIFF, DISCR, PP, QQ, R, RTMAX, RTMIN, S1, S2,
+     $                   SAFMAX, SHIFT, SS, SUM, WABS, WBIG, WDET,
+     $                   WSCALE, WSIZE, WSMALL
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN, SIGN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+      RTMIN = SQRT( SAFMIN )
+      RTMAX = ONE / RTMIN
+      SAFMAX = ONE / SAFMIN
+*
+*     Scale A
+*
+      ANORM = MAX( ABS( A( 1, 1 ) )+ABS( A( 2, 1 ) ),
+     $        ABS( A( 1, 2 ) )+ABS( A( 2, 2 ) ), SAFMIN )
+      ASCALE = ONE / ANORM
+      A11 = ASCALE*A( 1, 1 )
+      A21 = ASCALE*A( 2, 1 )
+      A12 = ASCALE*A( 1, 2 )
+      A22 = ASCALE*A( 2, 2 )
+*
+*     Perturb B if necessary to insure non-singularity
+*
+      B11 = B( 1, 1 )
+      B12 = B( 1, 2 )
+      B22 = B( 2, 2 )
+      BMIN = RTMIN*MAX( ABS( B11 ), ABS( B12 ), ABS( B22 ), RTMIN )
+      IF( ABS( B11 ).LT.BMIN )
+     $   B11 = SIGN( BMIN, B11 )
+      IF( ABS( B22 ).LT.BMIN )
+     $   B22 = SIGN( BMIN, B22 )
+*
+*     Scale B
+*
+      BNORM = MAX( ABS( B11 ), ABS( B12 )+ABS( B22 ), SAFMIN )
+      BSIZE = MAX( ABS( B11 ), ABS( B22 ) )
+      BSCALE = ONE / BSIZE
+      B11 = B11*BSCALE
+      B12 = B12*BSCALE
+      B22 = B22*BSCALE
+*
+*     Compute larger eigenvalue by method described by C. van Loan
+*
+*     ( AS is A shifted by -SHIFT*B )
+*
+      BINV11 = ONE / B11
+      BINV22 = ONE / B22
+      S1 = A11*BINV11
+      S2 = A22*BINV22
+      IF( ABS( S1 ).LE.ABS( S2 ) ) THEN
+         AS12 = A12 - S1*B12
+         AS22 = A22 - S1*B22
+         SS = A21*( BINV11*BINV22 )
+         ABI22 = AS22*BINV22 - SS*B12
+         PP = HALF*ABI22
+         SHIFT = S1
+      ELSE
+         AS12 = A12 - S2*B12
+         AS11 = A11 - S2*B11
+         SS = A21*( BINV11*BINV22 )
+         ABI22 = -SS*B12
+         PP = HALF*( AS11*BINV11+ABI22 )
+         SHIFT = S2
+      END IF
+      QQ = SS*AS12
+      IF( ABS( PP*RTMIN ).GE.ONE ) THEN
+         DISCR = ( RTMIN*PP )**2 + QQ*SAFMIN
+         R = SQRT( ABS( DISCR ) )*RTMAX
+      ELSE
+         IF( PP**2+ABS( QQ ).LE.SAFMIN ) THEN
+            DISCR = ( RTMAX*PP )**2 + QQ*SAFMAX
+            R = SQRT( ABS( DISCR ) )*RTMIN
+         ELSE
+            DISCR = PP**2 + QQ
+            R = SQRT( ABS( DISCR ) )
+         END IF
+      END IF
+*
+*     Note: the test of R in the following IF is to cover the case when
+*           DISCR is small and negative and is flushed to zero during
+*           the calculation of R.  On machines which have a consistent
+*           flush-to-zero threshhold and handle numbers above that
+*           threshhold correctly, it would not be necessary.
+*
+      IF( DISCR.GE.ZERO .OR. R.EQ.ZERO ) THEN
+         SUM = PP + SIGN( R, PP )
+         DIFF = PP - SIGN( R, PP )
+         WBIG = SHIFT + SUM
+*
+*        Compute smaller eigenvalue
+*
+         WSMALL = SHIFT + DIFF
+         IF( HALF*ABS( WBIG ).GT.MAX( ABS( WSMALL ), SAFMIN ) ) THEN
+            WDET = ( A11*A22-A12*A21 )*( BINV11*BINV22 )
+            WSMALL = WDET / WBIG
+         END IF
+*
+*        Choose (real) eigenvalue closest to 2,2 element of A*B**(-1)
+*        for WR1.
+*
+         IF( PP.GT.ABI22 ) THEN
+            WR1 = MIN( WBIG, WSMALL )
+            WR2 = MAX( WBIG, WSMALL )
+         ELSE
+            WR1 = MAX( WBIG, WSMALL )
+            WR2 = MIN( WBIG, WSMALL )
+         END IF
+         WI = ZERO
+      ELSE
+*
+*        Complex eigenvalues
+*
+         WR1 = SHIFT + PP
+         WR2 = WR1
+         WI = R
+      END IF
+*
+*     Further scaling to avoid underflow and overflow in computing
+*     SCALE1 and overflow in computing w*B.
+*
+*     This scale factor (WSCALE) is bounded from above using C1 and C2,
+*     and from below using C3 and C4.
+*        C1 implements the condition  s A  must never overflow.
+*        C2 implements the condition  w B  must never overflow.
+*        C3, with C2,
+*           implement the condition that s A - w B must never overflow.
+*        C4 implements the condition  s    should not underflow.
+*        C5 implements the condition  max(s,|w|) should be at least 2.
+*
+      C1 = BSIZE*( SAFMIN*MAX( ONE, ASCALE ) )
+      C2 = SAFMIN*MAX( ONE, BNORM )
+      C3 = BSIZE*SAFMIN
+      IF( ASCALE.LE.ONE .AND. BSIZE.LE.ONE ) THEN
+         C4 = MIN( ONE, ( ASCALE / SAFMIN )*BSIZE )
+      ELSE
+         C4 = ONE
+      END IF
+      IF( ASCALE.LE.ONE .OR. BSIZE.LE.ONE ) THEN
+         C5 = MIN( ONE, ASCALE*BSIZE )
+      ELSE
+         C5 = ONE
+      END IF
+*
+*     Scale first eigenvalue
+*
+      WABS = ABS( WR1 ) + ABS( WI )
+      WSIZE = MAX( SAFMIN, C1, FUZZY1*( WABS*C2+C3 ),
+     $        MIN( C4, HALF*MAX( WABS, C5 ) ) )
+      IF( WSIZE.NE.ONE ) THEN
+         WSCALE = ONE / WSIZE
+         IF( WSIZE.GT.ONE ) THEN
+            SCALE1 = ( MAX( ASCALE, BSIZE )*WSCALE )*
+     $               MIN( ASCALE, BSIZE )
+         ELSE
+            SCALE1 = ( MIN( ASCALE, BSIZE )*WSCALE )*
+     $               MAX( ASCALE, BSIZE )
+         END IF
+         WR1 = WR1*WSCALE
+         IF( WI.NE.ZERO ) THEN
+            WI = WI*WSCALE
+            WR2 = WR1
+            SCALE2 = SCALE1
+         END IF
+      ELSE
+         SCALE1 = ASCALE*BSIZE
+         SCALE2 = SCALE1
+      END IF
+*
+*     Scale second eigenvalue (if real)
+*
+      IF( WI.EQ.ZERO ) THEN
+         WSIZE = MAX( SAFMIN, C1, FUZZY1*( ABS( WR2 )*C2+C3 ),
+     $           MIN( C4, HALF*MAX( ABS( WR2 ), C5 ) ) )
+         IF( WSIZE.NE.ONE ) THEN
+            WSCALE = ONE / WSIZE
+            IF( WSIZE.GT.ONE ) THEN
+               SCALE2 = ( MAX( ASCALE, BSIZE )*WSCALE )*
+     $                  MIN( ASCALE, BSIZE )
+            ELSE
+               SCALE2 = ( MIN( ASCALE, BSIZE )*WSCALE )*
+     $                  MAX( ASCALE, BSIZE )
+            END IF
+            WR2 = WR2*WSCALE
+         ELSE
+            SCALE2 = ASCALE*BSIZE
+         END IF
+      END IF
+*
+*     End of DLAG2
+*
+      RETURN
+      END
diff --git a/costa/native/external/lapack/dlags2.f b/costa/native/external/lapack/dlags2.f
new file mode 100644
index 000000000..bf2acdefb
--- /dev/null
+++ b/costa/native/external/lapack/dlags2.f
@@ -0,0 +1,270 @@
+      SUBROUTINE DLAGS2( UPPER, A1, A2, A3, B1, B2, B3, CSU, SNU, CSV,
+     $                   SNV, CSQ, SNQ )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      LOGICAL            UPPER
+      DOUBLE PRECISION   A1, A2, A3, B1, B2, B3, CSQ, CSU, CSV, SNQ,
+     $                   SNU, SNV
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLAGS2 computes 2-by-2 orthogonal matrices U, V and Q, such
+*  that if ( UPPER ) then
+*
+*            U'*A*Q = U'*( A1 A2 )*Q = ( x  0  )
+*                        ( 0  A3 )     ( x  x  )
+*  and
+*            V'*B*Q = V'*( B1 B2 )*Q = ( x  0  )
+*                        ( 0  B3 )     ( x  x  )
+*
+*  or if ( .NOT.UPPER ) then
+*
+*            U'*A*Q = U'*( A1 0  )*Q = ( x  x  )
+*                        ( A2 A3 )     ( 0  x  )
+*  and
+*            V'*B*Q = V'*( B1 0  )*Q = ( x  x  )
+*                        ( B2 B3 )     ( 0  x  )
+*
+*  The rows of the transformed A and B are parallel, where
+*
+*    U = (  CSU  SNU ), V = (  CSV SNV ), Q = (  CSQ   SNQ )
+*        ( -SNU  CSU )      ( -SNV CSV )      ( -SNQ   CSQ )
+*
+*  Z' denotes the transpose of Z.
+*
+*
+*  Arguments
+*  =========
+*
+*  UPPER   (input) LOGICAL
+*          = .TRUE.: the input matrices A and B are upper triangular.
+*          = .FALSE.: the input matrices A and B are lower triangular.
+*
+*  A1      (input) DOUBLE PRECISION
+*  A2      (input) DOUBLE PRECISION
+*  A3      (input) DOUBLE PRECISION
+*          On entry, A1, A2 and A3 are elements of the input 2-by-2
+*          upper (lower) triangular matrix A.
+*
+*  B1      (input) DOUBLE PRECISION
+*  B2      (input) DOUBLE PRECISION
+*  B3      (input) DOUBLE PRECISION
+*          On entry, B1, B2 and B3 are elements of the input 2-by-2
+*          upper (lower) triangular matrix B.
+*
+*  CSU     (output) DOUBLE PRECISION
+*  SNU     (output) DOUBLE PRECISION
+*          The desired orthogonal matrix U.
+*
+*  CSV     (output) DOUBLE PRECISION
+*  SNV     (output) DOUBLE PRECISION
+*          The desired orthogonal matrix V.
+*
+*  CSQ     (output) DOUBLE PRECISION
+*  SNQ     (output) DOUBLE PRECISION
+*          The desired orthogonal matrix Q.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO
+      PARAMETER          ( ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      DOUBLE PRECISION   A, AUA11, AUA12, AUA21, AUA22, AVB11, AVB12,
+     $                   AVB21, AVB22, B, C, CSL, CSR, D, R, S1, S2,
+     $                   SNL, SNR, UA11, UA11R, UA12, UA21, UA22, UA22R,
+     $                   VB11, VB11R, VB12, VB21, VB22, VB22R
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLARTG, DLASV2
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS
+*     ..
+*     .. Executable Statements ..
+*
+      IF( UPPER ) THEN
+*
+*        Input matrices A and B are upper triangular matrices
+*
+*        Form matrix C = A*adj(B) = ( a b )
+*                                   ( 0 d )
+*
+         A = A1*B3
+         D = A3*B1
+         B = A2*B1 - A1*B2
+*
+*        The SVD of real 2-by-2 triangular C
+*
+*         ( CSL -SNL )*( A B )*(  CSR  SNR ) = ( R 0 )
+*         ( SNL  CSL ) ( 0 D ) ( -SNR  CSR )   ( 0 T )
+*
+         CALL DLASV2( A, B, D, S1, S2, SNR, CSR, SNL, CSL )
+*
+         IF( ABS( CSL ).GE.ABS( SNL ) .OR. ABS( CSR ).GE.ABS( SNR ) )
+     $        THEN
+*
+*           Compute the (1,1) and (1,2) elements of U'*A and V'*B,
+*           and (1,2) element of |U|'*|A| and |V|'*|B|.
+*
+            UA11R = CSL*A1
+            UA12 = CSL*A2 + SNL*A3
+*
+            VB11R = CSR*B1
+            VB12 = CSR*B2 + SNR*B3
+*
+            AUA12 = ABS( CSL )*ABS( A2 ) + ABS( SNL )*ABS( A3 )
+            AVB12 = ABS( CSR )*ABS( B2 ) + ABS( SNR )*ABS( B3 )
+*
+*           zero (1,2) elements of U'*A and V'*B
+*
+            IF( ( ABS( UA11R )+ABS( UA12 ) ).NE.ZERO ) THEN
+               IF( AUA12 / ( ABS( UA11R )+ABS( UA12 ) ).LE.AVB12 /
+     $             ( ABS( VB11R )+ABS( VB12 ) ) ) THEN
+                  CALL DLARTG( -UA11R, UA12, CSQ, SNQ, R )
+               ELSE
+                  CALL DLARTG( -VB11R, VB12, CSQ, SNQ, R )
+               END IF
+            ELSE
+               CALL DLARTG( -VB11R, VB12, CSQ, SNQ, R )
+            END IF
+*
+            CSU = CSL
+            SNU = -SNL
+            CSV = CSR
+            SNV = -SNR
+*
+         ELSE
+*
+*           Compute the (2,1) and (2,2) elements of U'*A and V'*B,
+*           and (2,2) element of |U|'*|A| and |V|'*|B|.
+*
+            UA21 = -SNL*A1
+            UA22 = -SNL*A2 + CSL*A3
+*
+            VB21 = -SNR*B1
+            VB22 = -SNR*B2 + CSR*B3
+*
+            AUA22 = ABS( SNL )*ABS( A2 ) + ABS( CSL )*ABS( A3 )
+            AVB22 = ABS( SNR )*ABS( B2 ) + ABS( CSR )*ABS( B3 )
+*
+*           zero (2,2) elements of U'*A and V'*B, and then swap.
+*
+            IF( ( ABS( UA21 )+ABS( UA22 ) ).NE.ZERO ) THEN
+               IF( AUA22 / ( ABS( UA21 )+ABS( UA22 ) ).LE.AVB22 /
+     $             ( ABS( VB21 )+ABS( VB22 ) ) ) THEN
+                  CALL DLARTG( -UA21, UA22, CSQ, SNQ, R )
+               ELSE
+                  CALL DLARTG( -VB21, VB22, CSQ, SNQ, R )
+               END IF
+            ELSE
+               CALL DLARTG( -VB21, VB22, CSQ, SNQ, R )
+            END IF
+*
+            CSU = SNL
+            SNU = CSL
+            CSV = SNR
+            SNV = CSR
+*
+         END IF
+*
+      ELSE
+*
+*        Input matrices A and B are lower triangular matrices
+*
+*        Form matrix C = A*adj(B) = ( a 0 )
+*                                   ( c d )
+*
+         A = A1*B3
+         D = A3*B1
+         C = A2*B3 - A3*B2
+*
+*        The SVD of real 2-by-2 triangular C
+*
+*         ( CSL -SNL )*( A 0 )*(  CSR  SNR ) = ( R 0 )
+*         ( SNL  CSL ) ( C D ) ( -SNR  CSR )   ( 0 T )
+*
+         CALL DLASV2( A, C, D, S1, S2, SNR, CSR, SNL, CSL )
+*
+         IF( ABS( CSR ).GE.ABS( SNR ) .OR. ABS( CSL ).GE.ABS( SNL ) )
+     $        THEN
+*
+*           Compute the (2,1) and (2,2) elements of U'*A and V'*B,
+*           and (2,1) element of |U|'*|A| and |V|'*|B|.
+*
+            UA21 = -SNR*A1 + CSR*A2
+            UA22R = CSR*A3
+*
+            VB21 = -SNL*B1 + CSL*B2
+            VB22R = CSL*B3
+*
+            AUA21 = ABS( SNR )*ABS( A1 ) + ABS( CSR )*ABS( A2 )
+            AVB21 = ABS( SNL )*ABS( B1 ) + ABS( CSL )*ABS( B2 )
+*
+*           zero (2,1) elements of U'*A and V'*B.
+*
+            IF( ( ABS( UA21 )+ABS( UA22R ) ).NE.ZERO ) THEN
+               IF( AUA21 / ( ABS( UA21 )+ABS( UA22R ) ).LE.AVB21 /
+     $             ( ABS( VB21 )+ABS( VB22R ) ) ) THEN
+                  CALL DLARTG( UA22R, UA21, CSQ, SNQ, R )
+               ELSE
+                  CALL DLARTG( VB22R, VB21, CSQ, SNQ, R )
+               END IF
+            ELSE
+               CALL DLARTG( VB22R, VB21, CSQ, SNQ, R )
+            END IF
+*
+            CSU = CSR
+            SNU = -SNR
+            CSV = CSL
+            SNV = -SNL
+*
+         ELSE
+*
+*           Compute the (1,1) and (1,2) elements of U'*A and V'*B,
+*           and (1,1) element of |U|'*|A| and |V|'*|B|.
+*
+            UA11 = CSR*A1 + SNR*A2
+            UA12 = SNR*A3
+*
+            VB11 = CSL*B1 + SNL*B2
+            VB12 = SNL*B3
+*
+            AUA11 = ABS( CSR )*ABS( A1 ) + ABS( SNR )*ABS( A2 )
+            AVB11 = ABS( CSL )*ABS( B1 ) + ABS( SNL )*ABS( B2 )
+*
+*           zero (1,1) elements of U'*A and V'*B, and then swap.
+*
+            IF( ( ABS( UA11 )+ABS( UA12 ) ).NE.ZERO ) THEN
+               IF( AUA11 / ( ABS( UA11 )+ABS( UA12 ) ).LE.AVB11 /
+     $             ( ABS( VB11 )+ABS( VB12 ) ) ) THEN
+                  CALL DLARTG( UA12, UA11, CSQ, SNQ, R )
+               ELSE
+                  CALL DLARTG( VB12, VB11, CSQ, SNQ, R )
+               END IF
+            ELSE
+               CALL DLARTG( VB12, VB11, CSQ, SNQ, R )
+            END IF
+*
+            CSU = SNR
+            SNU = CSR
+            CSV = SNL
+            SNV = CSL
+*
+         END IF
+*
+      END IF
+*
+      RETURN
+*
+*     End of DLAGS2
+*
+      END
diff --git a/costa/native/external/lapack/dlagtf.f b/costa/native/external/lapack/dlagtf.f
new file mode 100644
index 000000000..b127f6b4c
--- /dev/null
+++ b/costa/native/external/lapack/dlagtf.f
@@ -0,0 +1,191 @@
+      SUBROUTINE DLAGTF( N, A, LAMBDA, B, C, TOL, D, IN, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, N
+      DOUBLE PRECISION   LAMBDA, TOL
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IN( * )
+      DOUBLE PRECISION   A( * ), B( * ), C( * ), D( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLAGTF factorizes the matrix (T - lambda*I), where T is an n by n
+*  tridiagonal matrix and lambda is a scalar, as
+*
+*     T - lambda*I = PLU,
+*
+*  where P is a permutation matrix, L is a unit lower tridiagonal matrix
+*  with at most one non-zero sub-diagonal elements per column and U is
+*  an upper triangular matrix with at most two non-zero super-diagonal
+*  elements per column.
+*
+*  The factorization is obtained by Gaussian elimination with partial
+*  pivoting and implicit row scaling.
+*
+*  The parameter LAMBDA is included in the routine so that DLAGTF may
+*  be used, in conjunction with DLAGTS, to obtain eigenvectors of T by
+*  inverse iteration.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The order of the matrix T.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (N)
+*          On entry, A must contain the diagonal elements of T.
+*
+*          On exit, A is overwritten by the n diagonal elements of the
+*          upper triangular matrix U of the factorization of T.
+*
+*  LAMBDA  (input) DOUBLE PRECISION
+*          On entry, the scalar lambda.
+*
+*  B       (input/output) DOUBLE PRECISION array, dimension (N-1)
+*          On entry, B must contain the (n-1) super-diagonal elements of
+*          T.
+*
+*          On exit, B is overwritten by the (n-1) super-diagonal
+*          elements of the matrix U of the factorization of T.
+*
+*  C       (input/output) DOUBLE PRECISION array, dimension (N-1)
+*          On entry, C must contain the (n-1) sub-diagonal elements of
+*          T.
+*
+*          On exit, C is overwritten by the (n-1) sub-diagonal elements
+*          of the matrix L of the factorization of T.
+*
+*  TOL     (input) DOUBLE PRECISION
+*          On entry, a relative tolerance used to indicate whether or
+*          not the matrix (T - lambda*I) is nearly singular. TOL should
+*          normally be chose as approximately the largest relative error
+*          in the elements of T. For example, if the elements of T are
+*          correct to about 4 significant figures, then TOL should be
+*          set to about 5*10**(-4). If TOL is supplied as less than eps,
+*          where eps is the relative machine precision, then the value
+*          eps is used in place of TOL.
+*
+*  D       (output) DOUBLE PRECISION array, dimension (N-2)
+*          On exit, D is overwritten by the (n-2) second super-diagonal
+*          elements of the matrix U of the factorization of T.
+*
+*  IN      (output) INTEGER array, dimension (N)
+*          On exit, IN contains details of the permutation matrix P. If
+*          an interchange occurred at the kth step of the elimination,
+*          then IN(k) = 1, otherwise IN(k) = 0. The element IN(n)
+*          returns the smallest positive integer j such that
+*
+*             abs( u(j,j) ).le. norm( (T - lambda*I)(j) )*TOL,
+*
+*          where norm( A(j) ) denotes the sum of the absolute values of
+*          the jth row of the matrix A. If no such j exists then IN(n)
+*          is returned as zero. If IN(n) is returned as positive, then a
+*          diagonal element of U is small, indicating that
+*          (T - lambda*I) is singular or nearly singular,
+*
+*  INFO    (output) INTEGER
+*          = 0   : successful exit
+*          .lt. 0: if INFO = -k, the kth argument had an illegal value
+*
+* =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO
+      PARAMETER          ( ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            K
+      DOUBLE PRECISION   EPS, MULT, PIV1, PIV2, SCALE1, SCALE2, TEMP, TL
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX
+*     ..
+*     .. External Functions ..
+      DOUBLE PRECISION   DLAMCH
+      EXTERNAL           DLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+      IF( N.LT.0 ) THEN
+         INFO = -1
+         CALL XERBLA( 'DLAGTF', -INFO )
+         RETURN
+      END IF
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      A( 1 ) = A( 1 ) - LAMBDA
+      IN( N ) = 0
+      IF( N.EQ.1 ) THEN
+         IF( A( 1 ).EQ.ZERO )
+     $      IN( 1 ) = 1
+         RETURN
+      END IF
+*
+      EPS = DLAMCH( 'Epsilon' )
+*
+      TL = MAX( TOL, EPS )
+      SCALE1 = ABS( A( 1 ) ) + ABS( B( 1 ) )
+      DO 10 K = 1, N - 1
+         A( K+1 ) = A( K+1 ) - LAMBDA
+         SCALE2 = ABS( C( K ) ) + ABS( A( K+1 ) )
+         IF( K.LT.( N-1 ) )
+     $      SCALE2 = SCALE2 + ABS( B( K+1 ) )
+         IF( A( K ).EQ.ZERO ) THEN
+            PIV1 = ZERO
+         ELSE
+            PIV1 = ABS( A( K ) ) / SCALE1
+         END IF
+         IF( C( K ).EQ.ZERO ) THEN
+            IN( K ) = 0
+            PIV2 = ZERO
+            SCALE1 = SCALE2
+            IF( K.LT.( N-1 ) )
+     $         D( K ) = ZERO
+         ELSE
+            PIV2 = ABS( C( K ) ) / SCALE2
+            IF( PIV2.LE.PIV1 ) THEN
+               IN( K ) = 0
+               SCALE1 = SCALE2
+               C( K ) = C( K ) / A( K )
+               A( K+1 ) = A( K+1 ) - C( K )*B( K )
+               IF( K.LT.( N-1 ) )
+     $            D( K ) = ZERO
+            ELSE
+               IN( K ) = 1
+               MULT = A( K ) / C( K )
+               A( K ) = C( K )
+               TEMP = A( K+1 )
+               A( K+1 ) = B( K ) - MULT*TEMP
+               IF( K.LT.( N-1 ) ) THEN
+                  D( K ) = B( K+1 )
+                  B( K+1 ) = -MULT*D( K )
+               END IF
+               B( K ) = TEMP
+               C( K ) = MULT
+            END IF
+         END IF
+         IF( ( MAX( PIV1, PIV2 ).LE.TL ) .AND. ( IN( N ).EQ.0 ) )
+     $      IN( N ) = K
+   10 CONTINUE
+      IF( ( ABS( A( N ) ).LE.SCALE1*TL ) .AND. ( IN( N ).EQ.0 ) )
+     $   IN( N ) = N
+*
+      RETURN
+*
+*     End of DLAGTF
+*
+      END
diff --git a/costa/native/external/lapack/dlagtm.f b/costa/native/external/lapack/dlagtm.f
new file mode 100644
index 000000000..6ffc9ea21
--- /dev/null
+++ b/costa/native/external/lapack/dlagtm.f
@@ -0,0 +1,191 @@
+      SUBROUTINE DLAGTM( TRANS, N, NRHS, ALPHA, DL, D, DU, X, LDX, BETA,
+     $                   B, LDB )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          TRANS
+      INTEGER            LDB, LDX, N, NRHS
+      DOUBLE PRECISION   ALPHA, BETA
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   B( LDB, * ), D( * ), DL( * ), DU( * ),
+     $                   X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLAGTM performs a matrix-vector product of the form
+*
+*     B := alpha * A * X + beta * B
+*
+*  where A is a tridiagonal matrix of order N, B and X are N by NRHS
+*  matrices, and alpha and beta are real scalars, each of which may be
+*  0., 1., or -1.
+*
+*  Arguments
+*  =========
+*
+*  TRANS   (input) CHARACTER
+*          Specifies the operation applied to A.
+*          = 'N':  No transpose, B := alpha * A * X + beta * B
+*          = 'T':  Transpose,    B := alpha * A'* X + beta * B
+*          = 'C':  Conjugate transpose = Transpose
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrices X and B.
+*
+*  ALPHA   (input) DOUBLE PRECISION
+*          The scalar alpha.  ALPHA must be 0., 1., or -1.; otherwise,
+*          it is assumed to be 0.
+*
+*  DL      (input) DOUBLE PRECISION array, dimension (N-1)
+*          The (n-1) sub-diagonal elements of T.
+*
+*  D       (input) DOUBLE PRECISION array, dimension (N)
+*          The diagonal elements of T.
+*
+*  DU      (input) DOUBLE PRECISION array, dimension (N-1)
+*          The (n-1) super-diagonal elements of T.
+*
+*  X       (input) DOUBLE PRECISION array, dimension (LDX,NRHS)
+*          The N by NRHS matrix X.
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(N,1).
+*
+*  BETA    (input) DOUBLE PRECISION
+*          The scalar beta.  BETA must be 0., 1., or -1.; otherwise,
+*          it is assumed to be 1.
+*
+*  B       (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS)
+*          On entry, the N by NRHS matrix B.
+*          On exit, B is overwritten by the matrix expression
+*          B := alpha * A * X + beta * B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(N,1).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. Executable Statements ..
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Multiply B by BETA if BETA.NE.1.
+*
+      IF( BETA.EQ.ZERO ) THEN
+         DO 20 J = 1, NRHS
+            DO 10 I = 1, N
+               B( I, J ) = ZERO
+   10       CONTINUE
+   20    CONTINUE
+      ELSE IF( BETA.EQ.-ONE ) THEN
+         DO 40 J = 1, NRHS
+            DO 30 I = 1, N
+               B( I, J ) = -B( I, J )
+   30       CONTINUE
+   40    CONTINUE
+      END IF
+*
+      IF( ALPHA.EQ.ONE ) THEN
+         IF( LSAME( TRANS, 'N' ) ) THEN
+*
+*           Compute B := B + A*X
+*
+            DO 60 J = 1, NRHS
+               IF( N.EQ.1 ) THEN
+                  B( 1, J ) = B( 1, J ) + D( 1 )*X( 1, J )
+               ELSE
+                  B( 1, J ) = B( 1, J ) + D( 1 )*X( 1, J ) +
+     $                        DU( 1 )*X( 2, J )
+                  B( N, J ) = B( N, J ) + DL( N-1 )*X( N-1, J ) +
+     $                        D( N )*X( N, J )
+                  DO 50 I = 2, N - 1
+                     B( I, J ) = B( I, J ) + DL( I-1 )*X( I-1, J ) +
+     $                           D( I )*X( I, J ) + DU( I )*X( I+1, J )
+   50             CONTINUE
+               END IF
+   60       CONTINUE
+         ELSE
+*
+*           Compute B := B + A'*X
+*
+            DO 80 J = 1, NRHS
+               IF( N.EQ.1 ) THEN
+                  B( 1, J ) = B( 1, J ) + D( 1 )*X( 1, J )
+               ELSE
+                  B( 1, J ) = B( 1, J ) + D( 1 )*X( 1, J ) +
+     $                        DL( 1 )*X( 2, J )
+                  B( N, J ) = B( N, J ) + DU( N-1 )*X( N-1, J ) +
+     $                        D( N )*X( N, J )
+                  DO 70 I = 2, N - 1
+                     B( I, J ) = B( I, J ) + DU( I-1 )*X( I-1, J ) +
+     $                           D( I )*X( I, J ) + DL( I )*X( I+1, J )
+   70             CONTINUE
+               END IF
+   80       CONTINUE
+         END IF
+      ELSE IF( ALPHA.EQ.-ONE ) THEN
+         IF( LSAME( TRANS, 'N' ) ) THEN
+*
+*           Compute B := B - A*X
+*
+            DO 100 J = 1, NRHS
+               IF( N.EQ.1 ) THEN
+                  B( 1, J ) = B( 1, J ) - D( 1 )*X( 1, J )
+               ELSE
+                  B( 1, J ) = B( 1, J ) - D( 1 )*X( 1, J ) -
+     $                        DU( 1 )*X( 2, J )
+                  B( N, J ) = B( N, J ) - DL( N-1 )*X( N-1, J ) -
+     $                        D( N )*X( N, J )
+                  DO 90 I = 2, N - 1
+                     B( I, J ) = B( I, J ) - DL( I-1 )*X( I-1, J ) -
+     $                           D( I )*X( I, J ) - DU( I )*X( I+1, J )
+   90             CONTINUE
+               END IF
+  100       CONTINUE
+         ELSE
+*
+*           Compute B := B - A'*X
+*
+            DO 120 J = 1, NRHS
+               IF( N.EQ.1 ) THEN
+                  B( 1, J ) = B( 1, J ) - D( 1 )*X( 1, J )
+               ELSE
+                  B( 1, J ) = B( 1, J ) - D( 1 )*X( 1, J ) -
+     $                        DL( 1 )*X( 2, J )
+                  B( N, J ) = B( N, J ) - DU( N-1 )*X( N-1, J ) -
+     $                        D( N )*X( N, J )
+                  DO 110 I = 2, N - 1
+                     B( I, J ) = B( I, J ) - DU( I-1 )*X( I-1, J ) -
+     $                           D( I )*X( I, J ) - DL( I )*X( I+1, J )
+  110             CONTINUE
+               END IF
+  120       CONTINUE
+         END IF
+      END IF
+      RETURN
+*
+*     End of DLAGTM
+*
+      END
diff --git a/costa/native/external/lapack/dlagts.f b/costa/native/external/lapack/dlagts.f
new file mode 100644
index 000000000..3bd41ff38
--- /dev/null
+++ b/costa/native/external/lapack/dlagts.f
@@ -0,0 +1,305 @@
+      SUBROUTINE DLAGTS( JOB, N, A, B, C, D, IN, Y, TOL, INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, JOB, N
+      DOUBLE PRECISION   TOL
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IN( * )
+      DOUBLE PRECISION   A( * ), B( * ), C( * ), D( * ), Y( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLAGTS may be used to solve one of the systems of equations
+*
+*     (T - lambda*I)*x = y   or   (T - lambda*I)'*x = y,
+*
+*  where T is an n by n tridiagonal matrix, for x, following the
+*  factorization of (T - lambda*I) as
+*
+*     (T - lambda*I) = P*L*U ,
+*
+*  by routine DLAGTF. The choice of equation to be solved is
+*  controlled by the argument JOB, and in each case there is an option
+*  to perturb zero or very small diagonal elements of U, this option
+*  being intended for use in applications such as inverse iteration.
+*
+*  Arguments
+*  =========
+*
+*  JOB     (input) INTEGER
+*          Specifies the job to be performed by DLAGTS as follows:
+*          =  1: The equations  (T - lambda*I)x = y  are to be solved,
+*                but diagonal elements of U are not to be perturbed.
+*          = -1: The equations  (T - lambda*I)x = y  are to be solved
+*                and, if overflow would otherwise occur, the diagonal
+*                elements of U are to be perturbed. See argument TOL
+*                below.
+*          =  2: The equations  (T - lambda*I)'x = y  are to be solved,
+*                but diagonal elements of U are not to be perturbed.
+*          = -2: The equations  (T - lambda*I)'x = y  are to be solved
+*                and, if overflow would otherwise occur, the diagonal
+*                elements of U are to be perturbed. See argument TOL
+*                below.
+*
+*  N       (input) INTEGER
+*          The order of the matrix T.
+*
+*  A       (input) DOUBLE PRECISION array, dimension (N)
+*          On entry, A must contain the diagonal elements of U as
+*          returned from DLAGTF.
+*
+*  B       (input) DOUBLE PRECISION array, dimension (N-1)
+*          On entry, B must contain the first super-diagonal elements of
+*          U as returned from DLAGTF.
+*
+*  C       (input) DOUBLE PRECISION array, dimension (N-1)
+*          On entry, C must contain the sub-diagonal elements of L as
+*          returned from DLAGTF.
+*
+*  D       (input) DOUBLE PRECISION array, dimension (N-2)
+*          On entry, D must contain the second super-diagonal elements
+*          of U as returned from DLAGTF.
+*
+*  IN      (input) INTEGER array, dimension (N)
+*          On entry, IN must contain details of the matrix P as returned
+*          from DLAGTF.
+*
+*  Y       (input/output) DOUBLE PRECISION array, dimension (N)
+*          On entry, the right hand side vector y.
+*          On exit, Y is overwritten by the solution vector x.
+*
+*  TOL     (input/output) DOUBLE PRECISION
+*          On entry, with  JOB .lt. 0, TOL should be the minimum
+*          perturbation to be made to very small diagonal elements of U.
+*          TOL should normally be chosen as about eps*norm(U), where eps
+*          is the relative machine precision, but if TOL is supplied as
+*          non-positive, then it is reset to eps*max( abs( u(i,j) ) ).
+*          If  JOB .gt. 0  then TOL is not referenced.
+*
+*          On exit, TOL is changed as described above, only if TOL is
+*          non-positive on entry. Otherwise TOL is unchanged.
+*
+*  INFO    (output) INTEGER
+*          = 0   : successful exit
+*          .lt. 0: if INFO = -i, the i-th argument had an illegal value
+*          .gt. 0: overflow would occur when computing the INFO(th)
+*                  element of the solution vector x. This can only occur
+*                  when JOB is supplied as positive and either means
+*                  that a diagonal element of U is very small, or that
+*                  the elements of the right-hand side vector y are very
+*                  large.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            K
+      DOUBLE PRECISION   ABSAK, AK, BIGNUM, EPS, PERT, SFMIN, TEMP
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, SIGN
+*     ..
+*     .. External Functions ..
+      DOUBLE PRECISION   DLAMCH
+      EXTERNAL           DLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+      IF( ( ABS( JOB ).GT.2 ) .OR. ( JOB.EQ.0 ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DLAGTS', -INFO )
+         RETURN
+      END IF
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      EPS = DLAMCH( 'Epsilon' )
+      SFMIN = DLAMCH( 'Safe minimum' )
+      BIGNUM = ONE / SFMIN
+*
+      IF( JOB.LT.0 ) THEN
+         IF( TOL.LE.ZERO ) THEN
+            TOL = ABS( A( 1 ) )
+            IF( N.GT.1 )
+     $         TOL = MAX( TOL, ABS( A( 2 ) ), ABS( B( 1 ) ) )
+            DO 10 K = 3, N
+               TOL = MAX( TOL, ABS( A( K ) ), ABS( B( K-1 ) ),
+     $               ABS( D( K-2 ) ) )
+   10       CONTINUE
+            TOL = TOL*EPS
+            IF( TOL.EQ.ZERO )
+     $         TOL = EPS
+         END IF
+      END IF
+*
+      IF( ABS( JOB ).EQ.1 ) THEN
+         DO 20 K = 2, N
+            IF( IN( K-1 ).EQ.0 ) THEN
+               Y( K ) = Y( K ) - C( K-1 )*Y( K-1 )
+            ELSE
+               TEMP = Y( K-1 )
+               Y( K-1 ) = Y( K )
+               Y( K ) = TEMP - C( K-1 )*Y( K )
+            END IF
+   20    CONTINUE
+         IF( JOB.EQ.1 ) THEN
+            DO 30 K = N, 1, -1
+               IF( K.LE.N-2 ) THEN
+                  TEMP = Y( K ) - B( K )*Y( K+1 ) - D( K )*Y( K+2 )
+               ELSE IF( K.EQ.N-1 ) THEN
+                  TEMP = Y( K ) - B( K )*Y( K+1 )
+               ELSE
+                  TEMP = Y( K )
+               END IF
+               AK = A( K )
+               ABSAK = ABS( AK )
+               IF( ABSAK.LT.ONE ) THEN
+                  IF( ABSAK.LT.SFMIN ) THEN
+                     IF( ABSAK.EQ.ZERO .OR. ABS( TEMP )*SFMIN.GT.ABSAK )
+     $                    THEN
+                        INFO = K
+                        RETURN
+                     ELSE
+                        TEMP = TEMP*BIGNUM
+                        AK = AK*BIGNUM
+                     END IF
+                  ELSE IF( ABS( TEMP ).GT.ABSAK*BIGNUM ) THEN
+                     INFO = K
+                     RETURN
+                  END IF
+               END IF
+               Y( K ) = TEMP / AK
+   30       CONTINUE
+         ELSE
+            DO 50 K = N, 1, -1
+               IF( K.LE.N-2 ) THEN
+                  TEMP = Y( K ) - B( K )*Y( K+1 ) - D( K )*Y( K+2 )
+               ELSE IF( K.EQ.N-1 ) THEN
+                  TEMP = Y( K ) - B( K )*Y( K+1 )
+               ELSE
+                  TEMP = Y( K )
+               END IF
+               AK = A( K )
+               PERT = SIGN( TOL, AK )
+   40          CONTINUE
+               ABSAK = ABS( AK )
+               IF( ABSAK.LT.ONE ) THEN
+                  IF( ABSAK.LT.SFMIN ) THEN
+                     IF( ABSAK.EQ.ZERO .OR. ABS( TEMP )*SFMIN.GT.ABSAK )
+     $                    THEN
+                        AK = AK + PERT
+                        PERT = 2*PERT
+                        GO TO 40
+                     ELSE
+                        TEMP = TEMP*BIGNUM
+                        AK = AK*BIGNUM
+                     END IF
+                  ELSE IF( ABS( TEMP ).GT.ABSAK*BIGNUM ) THEN
+                     AK = AK + PERT
+                     PERT = 2*PERT
+                     GO TO 40
+                  END IF
+               END IF
+               Y( K ) = TEMP / AK
+   50       CONTINUE
+         END IF
+      ELSE
+*
+*        Come to here if  JOB = 2 or -2
+*
+         IF( JOB.EQ.2 ) THEN
+            DO 60 K = 1, N
+               IF( K.GE.3 ) THEN
+                  TEMP = Y( K ) - B( K-1 )*Y( K-1 ) - D( K-2 )*Y( K-2 )
+               ELSE IF( K.EQ.2 ) THEN
+                  TEMP = Y( K ) - B( K-1 )*Y( K-1 )
+               ELSE
+                  TEMP = Y( K )
+               END IF
+               AK = A( K )
+               ABSAK = ABS( AK )
+               IF( ABSAK.LT.ONE ) THEN
+                  IF( ABSAK.LT.SFMIN ) THEN
+                     IF( ABSAK.EQ.ZERO .OR. ABS( TEMP )*SFMIN.GT.ABSAK )
+     $                    THEN
+                        INFO = K
+                        RETURN
+                     ELSE
+                        TEMP = TEMP*BIGNUM
+                        AK = AK*BIGNUM
+                     END IF
+                  ELSE IF( ABS( TEMP ).GT.ABSAK*BIGNUM ) THEN
+                     INFO = K
+                     RETURN
+                  END IF
+               END IF
+               Y( K ) = TEMP / AK
+   60       CONTINUE
+         ELSE
+            DO 80 K = 1, N
+               IF( K.GE.3 ) THEN
+                  TEMP = Y( K ) - B( K-1 )*Y( K-1 ) - D( K-2 )*Y( K-2 )
+               ELSE IF( K.EQ.2 ) THEN
+                  TEMP = Y( K ) - B( K-1 )*Y( K-1 )
+               ELSE
+                  TEMP = Y( K )
+               END IF
+               AK = A( K )
+               PERT = SIGN( TOL, AK )
+   70          CONTINUE
+               ABSAK = ABS( AK )
+               IF( ABSAK.LT.ONE ) THEN
+                  IF( ABSAK.LT.SFMIN ) THEN
+                     IF( ABSAK.EQ.ZERO .OR. ABS( TEMP )*SFMIN.GT.ABSAK )
+     $                    THEN
+                        AK = AK + PERT
+                        PERT = 2*PERT
+                        GO TO 70
+                     ELSE
+                        TEMP = TEMP*BIGNUM
+                        AK = AK*BIGNUM
+                     END IF
+                  ELSE IF( ABS( TEMP ).GT.ABSAK*BIGNUM ) THEN
+                     AK = AK + PERT
+                     PERT = 2*PERT
+                     GO TO 70
+                  END IF
+               END IF
+               Y( K ) = TEMP / AK
+   80       CONTINUE
+         END IF
+*
+         DO 90 K = N, 2, -1
+            IF( IN( K-1 ).EQ.0 ) THEN
+               Y( K-1 ) = Y( K-1 ) - C( K-1 )*Y( K )
+            ELSE
+               TEMP = Y( K-1 )
+               Y( K-1 ) = Y( K )
+               Y( K ) = TEMP - C( K-1 )*Y( K )
+            END IF
+   90    CONTINUE
+      END IF
+*
+*     End of DLAGTS
+*
+      END
diff --git a/costa/native/external/lapack/dlagv2.f b/costa/native/external/lapack/dlagv2.f
new file mode 100644
index 000000000..2ed2473b0
--- /dev/null
+++ b/costa/native/external/lapack/dlagv2.f
@@ -0,0 +1,290 @@
+      SUBROUTINE DLAGV2( A, LDA, B, LDB, ALPHAR, ALPHAI, BETA, CSL, SNL,
+     $                   CSR, SNR )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            LDA, LDB
+      DOUBLE PRECISION   CSL, CSR, SNL, SNR
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), ALPHAI( 2 ), ALPHAR( 2 ),
+     $                   B( LDB, * ), BETA( 2 )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLAGV2 computes the Generalized Schur factorization of a real 2-by-2
+*  matrix pencil (A,B) where B is upper triangular. This routine
+*  computes orthogonal (rotation) matrices given by CSL, SNL and CSR,
+*  SNR such that
+*
+*  1) if the pencil (A,B) has two real eigenvalues (include 0/0 or 1/0
+*     types), then
+*
+*     [ a11 a12 ] := [  CSL  SNL ] [ a11 a12 ] [  CSR -SNR ]
+*     [  0  a22 ]    [ -SNL  CSL ] [ a21 a22 ] [  SNR  CSR ]
+*
+*     [ b11 b12 ] := [  CSL  SNL ] [ b11 b12 ] [  CSR -SNR ]
+*     [  0  b22 ]    [ -SNL  CSL ] [  0  b22 ] [  SNR  CSR ],
+*
+*  2) if the pencil (A,B) has a pair of complex conjugate eigenvalues,
+*     then
+*
+*     [ a11 a12 ] := [  CSL  SNL ] [ a11 a12 ] [  CSR -SNR ]
+*     [ a21 a22 ]    [ -SNL  CSL ] [ a21 a22 ] [  SNR  CSR ]
+*
+*     [ b11  0  ] := [  CSL  SNL ] [ b11 b12 ] [  CSR -SNR ]
+*     [  0  b22 ]    [ -SNL  CSL ] [  0  b22 ] [  SNR  CSR ]
+*
+*     where b11 >= b22 > 0.
+*
+*
+*  Arguments
+*  =========
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA, 2)
+*          On entry, the 2 x 2 matrix A.
+*          On exit, A is overwritten by the ``A-part'' of the
+*          generalized Schur form.
+*
+*  LDA     (input) INTEGER
+*          THe leading dimension of the array A.  LDA >= 2.
+*
+*  B       (input/output) DOUBLE PRECISION array, dimension (LDB, 2)
+*          On entry, the upper triangular 2 x 2 matrix B.
+*          On exit, B is overwritten by the ``B-part'' of the
+*          generalized Schur form.
+*
+*  LDB     (input) INTEGER
+*          THe leading dimension of the array B.  LDB >= 2.
+*
+*  ALPHAR  (output) DOUBLE PRECISION array, dimension (2)
+*  ALPHAI  (output) DOUBLE PRECISION array, dimension (2)
+*  BETA    (output) DOUBLE PRECISION array, dimension (2)
+*          (ALPHAR(k)+i*ALPHAI(k))/BETA(k) are the eigenvalues of the
+*          pencil (A,B), k=1,2, i = sqrt(-1).  Note that BETA(k) may
+*          be zero.
+*
+*  CSL     (output) DOUBLE PRECISION
+*          The cosine of the left rotation matrix.
+*
+*  SNL     (output) DOUBLE PRECISION
+*          The sine of the left rotation matrix.
+*
+*  CSR     (output) DOUBLE PRECISION
+*          The cosine of the right rotation matrix.
+*
+*  SNR     (output) DOUBLE PRECISION
+*          The sine of the right rotation matrix.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      DOUBLE PRECISION   ANORM, ASCALE, BNORM, BSCALE, H1, H2, H3, QQ,
+     $                   R, RR, SAFMIN, SCALE1, SCALE2, T, ULP, WI, WR1,
+     $                   WR2
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLAG2, DLARTG, DLASV2, DROT
+*     ..
+*     .. External Functions ..
+      DOUBLE PRECISION   DLAMCH, DLAPY2
+      EXTERNAL           DLAMCH, DLAPY2
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX
+*     ..
+*     .. Executable Statements ..
+*
+      SAFMIN = DLAMCH( 'S' )
+      ULP = DLAMCH( 'P' )
+*
+*     Scale A
+*
+      ANORM = MAX( ABS( A( 1, 1 ) )+ABS( A( 2, 1 ) ),
+     $        ABS( A( 1, 2 ) )+ABS( A( 2, 2 ) ), SAFMIN )
+      ASCALE = ONE / ANORM
+      A( 1, 1 ) = ASCALE*A( 1, 1 )
+      A( 1, 2 ) = ASCALE*A( 1, 2 )
+      A( 2, 1 ) = ASCALE*A( 2, 1 )
+      A( 2, 2 ) = ASCALE*A( 2, 2 )
+*
+*     Scale B
+*
+      BNORM = MAX( ABS( B( 1, 1 ) ), ABS( B( 1, 2 ) )+ABS( B( 2, 2 ) ),
+     $        SAFMIN )
+      BSCALE = ONE / BNORM
+      B( 1, 1 ) = BSCALE*B( 1, 1 )
+      B( 1, 2 ) = BSCALE*B( 1, 2 )
+      B( 2, 2 ) = BSCALE*B( 2, 2 )
+*
+*     Check if A can be deflated
+*
+      IF( ABS( A( 2, 1 ) ).LE.ULP ) THEN
+         CSL = ONE
+         SNL = ZERO
+         CSR = ONE
+         SNR = ZERO
+         A( 2, 1 ) = ZERO
+         B( 2, 1 ) = ZERO
+*
+*     Check if B is singular
+*
+      ELSE IF( ABS( B( 1, 1 ) ).LE.ULP ) THEN
+         CALL DLARTG( A( 1, 1 ), A( 2, 1 ), CSL, SNL, R )
+         CSR = ONE
+         SNR = ZERO
+         CALL DROT( 2, A( 1, 1 ), LDA, A( 2, 1 ), LDA, CSL, SNL )
+         CALL DROT( 2, B( 1, 1 ), LDB, B( 2, 1 ), LDB, CSL, SNL )
+         A( 2, 1 ) = ZERO
+         B( 1, 1 ) = ZERO
+         B( 2, 1 ) = ZERO
+*
+      ELSE IF( ABS( B( 2, 2 ) ).LE.ULP ) THEN
+         CALL DLARTG( A( 2, 2 ), A( 2, 1 ), CSR, SNR, T )
+         SNR = -SNR
+         CALL DROT( 2, A( 1, 1 ), 1, A( 1, 2 ), 1, CSR, SNR )
+         CALL DROT( 2, B( 1, 1 ), 1, B( 1, 2 ), 1, CSR, SNR )
+         CSL = ONE
+         SNL = ZERO
+         A( 2, 1 ) = ZERO
+         B( 2, 1 ) = ZERO
+         B( 2, 2 ) = ZERO
+*
+      ELSE
+*
+*        B is nonsingular, first compute the eigenvalues of (A,B)
+*
+         CALL DLAG2( A, LDA, B, LDB, SAFMIN, SCALE1, SCALE2, WR1, WR2,
+     $               WI )
+*
+         IF( WI.EQ.ZERO ) THEN
+*
+*           two real eigenvalues, compute s*A-w*B
+*
+            H1 = SCALE1*A( 1, 1 ) - WR1*B( 1, 1 )
+            H2 = SCALE1*A( 1, 2 ) - WR1*B( 1, 2 )
+            H3 = SCALE1*A( 2, 2 ) - WR1*B( 2, 2 )
+*
+            RR = DLAPY2( H1, H2 )
+            QQ = DLAPY2( SCALE1*A( 2, 1 ), H3 )
+*
+            IF( RR.GT.QQ ) THEN
+*
+*              find right rotation matrix to zero 1,1 element of
+*              (sA - wB)
+*
+               CALL DLARTG( H2, H1, CSR, SNR, T )
+*
+            ELSE
+*
+*              find right rotation matrix to zero 2,1 element of
+*              (sA - wB)
+*
+               CALL DLARTG( H3, SCALE1*A( 2, 1 ), CSR, SNR, T )
+*
+            END IF
+*
+            SNR = -SNR
+            CALL DROT( 2, A( 1, 1 ), 1, A( 1, 2 ), 1, CSR, SNR )
+            CALL DROT( 2, B( 1, 1 ), 1, B( 1, 2 ), 1, CSR, SNR )
+*
+*           compute inf norms of A and B
+*
+            H1 = MAX( ABS( A( 1, 1 ) )+ABS( A( 1, 2 ) ),
+     $           ABS( A( 2, 1 ) )+ABS( A( 2, 2 ) ) )
+            H2 = MAX( ABS( B( 1, 1 ) )+ABS( B( 1, 2 ) ),
+     $           ABS( B( 2, 1 ) )+ABS( B( 2, 2 ) ) )
+*
+            IF( ( SCALE1*H1 ).GE.ABS( WR1 )*H2 ) THEN
+*
+*              find left rotation matrix Q to zero out B(2,1)
+*
+               CALL DLARTG( B( 1, 1 ), B( 2, 1 ), CSL, SNL, R )
+*
+            ELSE
+*
+*              find left rotation matrix Q to zero out A(2,1)
+*
+               CALL DLARTG( A( 1, 1 ), A( 2, 1 ), CSL, SNL, R )
+*
+            END IF
+*
+            CALL DROT( 2, A( 1, 1 ), LDA, A( 2, 1 ), LDA, CSL, SNL )
+            CALL DROT( 2, B( 1, 1 ), LDB, B( 2, 1 ), LDB, CSL, SNL )
+*
+            A( 2, 1 ) = ZERO
+            B( 2, 1 ) = ZERO
+*
+         ELSE
+*
+*           a pair of complex conjugate eigenvalues
+*           first compute the SVD of the matrix B
+*
+            CALL DLASV2( B( 1, 1 ), B( 1, 2 ), B( 2, 2 ), R, T, SNR,
+     $                   CSR, SNL, CSL )
+*
+*           Form (A,B) := Q(A,B)Z' where Q is left rotation matrix and
+*           Z is right rotation matrix computed from DLASV2
+*
+            CALL DROT( 2, A( 1, 1 ), LDA, A( 2, 1 ), LDA, CSL, SNL )
+            CALL DROT( 2, B( 1, 1 ), LDB, B( 2, 1 ), LDB, CSL, SNL )
+            CALL DROT( 2, A( 1, 1 ), 1, A( 1, 2 ), 1, CSR, SNR )
+            CALL DROT( 2, B( 1, 1 ), 1, B( 1, 2 ), 1, CSR, SNR )
+*
+            B( 2, 1 ) = ZERO
+            B( 1, 2 ) = ZERO
+*
+         END IF
+*
+      END IF
+*
+*     Unscaling
+*
+      A( 1, 1 ) = ANORM*A( 1, 1 )
+      A( 2, 1 ) = ANORM*A( 2, 1 )
+      A( 1, 2 ) = ANORM*A( 1, 2 )
+      A( 2, 2 ) = ANORM*A( 2, 2 )
+      B( 1, 1 ) = BNORM*B( 1, 1 )
+      B( 2, 1 ) = BNORM*B( 2, 1 )
+      B( 1, 2 ) = BNORM*B( 1, 2 )
+      B( 2, 2 ) = BNORM*B( 2, 2 )
+*
+      IF( WI.EQ.ZERO ) THEN
+         ALPHAR( 1 ) = A( 1, 1 )
+         ALPHAR( 2 ) = A( 2, 2 )
+         ALPHAI( 1 ) = ZERO
+         ALPHAI( 2 ) = ZERO
+         BETA( 1 ) = B( 1, 1 )
+         BETA( 2 ) = B( 2, 2 )
+      ELSE
+         ALPHAR( 1 ) = ANORM*WR1 / SCALE1 / BNORM
+         ALPHAI( 1 ) = ANORM*WI / SCALE1 / BNORM
+         ALPHAR( 2 ) = ALPHAR( 1 )
+         ALPHAI( 2 ) = -ALPHAI( 1 )
+         BETA( 1 ) = ONE
+         BETA( 2 ) = ONE
+      END IF
+*
+   10 CONTINUE
+*
+      RETURN
+*
+*     End of DLAGV2
+*
+      END
diff --git a/costa/native/external/lapack/dlahqr.f b/costa/native/external/lapack/dlahqr.f
new file mode 100644
index 000000000..2e2f25b6d
--- /dev/null
+++ b/costa/native/external/lapack/dlahqr.f
@@ -0,0 +1,435 @@
+      SUBROUTINE DLAHQR( WANTT, WANTZ, N, ILO, IHI, H, LDH, WR, WI,
+     $                   ILOZ, IHIZ, Z, LDZ, INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      LOGICAL            WANTT, WANTZ
+      INTEGER            IHI, IHIZ, ILO, ILOZ, INFO, LDH, LDZ, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   H( LDH, * ), WI( * ), WR( * ), Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLAHQR is an auxiliary routine called by DHSEQR to update the
+*  eigenvalues and Schur decomposition already computed by DHSEQR, by
+*  dealing with the Hessenberg submatrix in rows and columns ILO to IHI.
+*
+*  Arguments
+*  =========
+*
+*  WANTT   (input) LOGICAL
+*          = .TRUE. : the full Schur form T is required;
+*          = .FALSE.: only eigenvalues are required.
+*
+*  WANTZ   (input) LOGICAL
+*          = .TRUE. : the matrix of Schur vectors Z is required;
+*          = .FALSE.: Schur vectors are not required.
+*
+*  N       (input) INTEGER
+*          The order of the matrix H.  N >= 0.
+*
+*  ILO     (input) INTEGER
+*  IHI     (input) INTEGER
+*          It is assumed that H is already upper quasi-triangular in
+*          rows and columns IHI+1:N, and that H(ILO,ILO-1) = 0 (unless
+*          ILO = 1). DLAHQR works primarily with the Hessenberg
+*          submatrix in rows and columns ILO to IHI, but applies
+*          transformations to all of H if WANTT is .TRUE..
+*          1 <= ILO <= max(1,IHI); IHI <= N.
+*
+*  H       (input/output) DOUBLE PRECISION array, dimension (LDH,N)
+*          On entry, the upper Hessenberg matrix H.
+*          On exit, if WANTT is .TRUE., H is upper quasi-triangular in
+*          rows and columns ILO:IHI, with any 2-by-2 diagonal blocks in
+*          standard form. If WANTT is .FALSE., the contents of H are
+*          unspecified on exit.
+*
+*  LDH     (input) INTEGER
+*          The leading dimension of the array H. LDH >= max(1,N).
+*
+*  WR      (output) DOUBLE PRECISION array, dimension (N)
+*  WI      (output) DOUBLE PRECISION array, dimension (N)
+*          The real and imaginary parts, respectively, of the computed
+*          eigenvalues ILO to IHI are stored in the corresponding
+*          elements of WR and WI. If two eigenvalues are computed as a
+*          complex conjugate pair, they are stored in consecutive
+*          elements of WR and WI, say the i-th and (i+1)th, with
+*          WI(i) > 0 and WI(i+1) < 0. If WANTT is .TRUE., the
+*          eigenvalues are stored in the same order as on the diagonal
+*          of the Schur form returned in H, with WR(i) = H(i,i), and, if
+*          H(i:i+1,i:i+1) is a 2-by-2 diagonal block,
+*          WI(i) = sqrt(H(i+1,i)*H(i,i+1)) and WI(i+1) = -WI(i).
+*
+*  ILOZ    (input) INTEGER
+*  IHIZ    (input) INTEGER
+*          Specify the rows of Z to which transformations must be
+*          applied if WANTZ is .TRUE..
+*          1 <= ILOZ <= ILO; IHI <= IHIZ <= N.
+*
+*  Z       (input/output) DOUBLE PRECISION array, dimension (LDZ,N)
+*          If WANTZ is .TRUE., on entry Z must contain the current
+*          matrix Z of transformations accumulated by DHSEQR, and on
+*          exit Z has been updated; transformations are applied only to
+*          the submatrix Z(ILOZ:IHIZ,ILO:IHI).
+*          If WANTZ is .FALSE., Z is not referenced.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z. LDZ >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          > 0: DLAHQR failed to compute all the eigenvalues ILO to IHI
+*               in a total of 30*(IHI-ILO+1) iterations; if INFO = i,
+*               elements i+1:ihi of WR and WI contain those eigenvalues
+*               which have been successfully computed.
+*
+*  Further Details
+*  ===============
+*
+*  2-96 Based on modifications by
+*     David Day, Sandia National Laboratory, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE, HALF
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0, HALF = 0.5D0 )
+      DOUBLE PRECISION   DAT1, DAT2
+      PARAMETER          ( DAT1 = 0.75D+0, DAT2 = -0.4375D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, I1, I2, ITN, ITS, J, K, L, M, NH, NR, NZ
+      DOUBLE PRECISION   AVE, CS, DISC, H00, H10, H11, H12, H21, H22,
+     $                   H33, H33S, H43H34, H44, H44S, OVFL, S, SMLNUM,
+     $                   SN, SUM, T1, T2, T3, TST1, ULP, UNFL, V1, V2,
+     $                   V3
+*     ..
+*     .. Local Arrays ..
+      DOUBLE PRECISION   V( 3 ), WORK( 1 )
+*     ..
+*     .. External Functions ..
+      DOUBLE PRECISION   DLAMCH, DLANHS
+      EXTERNAL           DLAMCH, DLANHS
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DCOPY, DLANV2, DLARFG, DROT
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN, SIGN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+      IF( ILO.EQ.IHI ) THEN
+         WR( ILO ) = H( ILO, ILO )
+         WI( ILO ) = ZERO
+         RETURN
+      END IF
+*
+      NH = IHI - ILO + 1
+      NZ = IHIZ - ILOZ + 1
+*
+*     Set machine-dependent constants for the stopping criterion.
+*     If norm(H) <= sqrt(OVFL), overflow should not occur.
+*
+      UNFL = DLAMCH( 'Safe minimum' )
+      OVFL = ONE / UNFL
+      CALL DLABAD( UNFL, OVFL )
+      ULP = DLAMCH( 'Precision' )
+      SMLNUM = UNFL*( NH / ULP )
+*
+*     I1 and I2 are the indices of the first row and last column of H
+*     to which transformations must be applied. If eigenvalues only are
+*     being computed, I1 and I2 are set inside the main loop.
+*
+      IF( WANTT ) THEN
+         I1 = 1
+         I2 = N
+      END IF
+*
+*     ITN is the total number of QR iterations allowed.
+*
+      ITN = 30*NH
+*
+*     The main loop begins here. I is the loop index and decreases from
+*     IHI to ILO in steps of 1 or 2. Each iteration of the loop works
+*     with the active submatrix in rows and columns L to I.
+*     Eigenvalues I+1 to IHI have already converged. Either L = ILO or
+*     H(L,L-1) is negligible so that the matrix splits.
+*
+      I = IHI
+   10 CONTINUE
+      L = ILO
+      IF( I.LT.ILO )
+     $   GO TO 150
+*
+*     Perform QR iterations on rows and columns ILO to I until a
+*     submatrix of order 1 or 2 splits off at the bottom because a
+*     subdiagonal element has become negligible.
+*
+      DO 130 ITS = 0, ITN
+*
+*        Look for a single small subdiagonal element.
+*
+         DO 20 K = I, L + 1, -1
+            TST1 = ABS( H( K-1, K-1 ) ) + ABS( H( K, K ) )
+            IF( TST1.EQ.ZERO )
+     $         TST1 = DLANHS( '1', I-L+1, H( L, L ), LDH, WORK )
+            IF( ABS( H( K, K-1 ) ).LE.MAX( ULP*TST1, SMLNUM ) )
+     $         GO TO 30
+   20    CONTINUE
+   30    CONTINUE
+         L = K
+         IF( L.GT.ILO ) THEN
+*
+*           H(L,L-1) is negligible
+*
+            H( L, L-1 ) = ZERO
+         END IF
+*
+*        Exit from loop if a submatrix of order 1 or 2 has split off.
+*
+         IF( L.GE.I-1 )
+     $      GO TO 140
+*
+*        Now the active submatrix is in rows and columns L to I. If
+*        eigenvalues only are being computed, only the active submatrix
+*        need be transformed.
+*
+         IF( .NOT.WANTT ) THEN
+            I1 = L
+            I2 = I
+         END IF
+*
+         IF( ITS.EQ.10 .OR. ITS.EQ.20 ) THEN
+*
+*           Exceptional shift.
+*
+            S = ABS( H( I, I-1 ) ) + ABS( H( I-1, I-2 ) )
+            H44 = DAT1*S + H( I, I )
+            H33 = H44
+            H43H34 = DAT2*S*S
+         ELSE
+*
+*           Prepare to use Francis' double shift
+*           (i.e. 2nd degree generalized Rayleigh quotient)
+*
+            H44 = H( I, I )
+            H33 = H( I-1, I-1 )
+            H43H34 = H( I, I-1 )*H( I-1, I )
+            S = H( I-1, I-2 )*H( I-1, I-2 )
+            DISC = ( H33-H44 )*HALF
+            DISC = DISC*DISC + H43H34
+            IF( DISC.GT.ZERO ) THEN
+*
+*              Real roots: use Wilkinson's shift twice
+*
+               DISC = SQRT( DISC )
+               AVE = HALF*( H33+H44 )
+               IF( ABS( H33 )-ABS( H44 ).GT.ZERO ) THEN
+                  H33 = H33*H44 - H43H34
+                  H44 = H33 / ( SIGN( DISC, AVE )+AVE )
+               ELSE
+                  H44 = SIGN( DISC, AVE ) + AVE
+               END IF
+               H33 = H44
+               H43H34 = ZERO
+            END IF
+         END IF
+*
+*        Look for two consecutive small subdiagonal elements.
+*
+         DO 40 M = I - 2, L, -1
+*           Determine the effect of starting the double-shift QR
+*           iteration at row M, and see if this would make H(M,M-1)
+*           negligible.
+*
+            H11 = H( M, M )
+            H22 = H( M+1, M+1 )
+            H21 = H( M+1, M )
+            H12 = H( M, M+1 )
+            H44S = H44 - H11
+            H33S = H33 - H11
+            V1 = ( H33S*H44S-H43H34 ) / H21 + H12
+            V2 = H22 - H11 - H33S - H44S
+            V3 = H( M+2, M+1 )
+            S = ABS( V1 ) + ABS( V2 ) + ABS( V3 )
+            V1 = V1 / S
+            V2 = V2 / S
+            V3 = V3 / S
+            V( 1 ) = V1
+            V( 2 ) = V2
+            V( 3 ) = V3
+            IF( M.EQ.L )
+     $         GO TO 50
+            H00 = H( M-1, M-1 )
+            H10 = H( M, M-1 )
+            TST1 = ABS( V1 )*( ABS( H00 )+ABS( H11 )+ABS( H22 ) )
+            IF( ABS( H10 )*( ABS( V2 )+ABS( V3 ) ).LE.ULP*TST1 )
+     $         GO TO 50
+   40    CONTINUE
+   50    CONTINUE
+*
+*        Double-shift QR step
+*
+         DO 120 K = M, I - 1
+*
+*           The first iteration of this loop determines a reflection G
+*           from the vector V and applies it from left and right to H,
+*           thus creating a nonzero bulge below the subdiagonal.
+*
+*           Each subsequent iteration determines a reflection G to
+*           restore the Hessenberg form in the (K-1)th column, and thus
+*           chases the bulge one step toward the bottom of the active
+*           submatrix. NR is the order of G.
+*
+            NR = MIN( 3, I-K+1 )
+            IF( K.GT.M )
+     $         CALL DCOPY( NR, H( K, K-1 ), 1, V, 1 )
+            CALL DLARFG( NR, V( 1 ), V( 2 ), 1, T1 )
+            IF( K.GT.M ) THEN
+               H( K, K-1 ) = V( 1 )
+               H( K+1, K-1 ) = ZERO
+               IF( K.LT.I-1 )
+     $            H( K+2, K-1 ) = ZERO
+            ELSE IF( M.GT.L ) THEN
+               H( K, K-1 ) = -H( K, K-1 )
+            END IF
+            V2 = V( 2 )
+            T2 = T1*V2
+            IF( NR.EQ.3 ) THEN
+               V3 = V( 3 )
+               T3 = T1*V3
+*
+*              Apply G from the left to transform the rows of the matrix
+*              in columns K to I2.
+*
+               DO 60 J = K, I2
+                  SUM = H( K, J ) + V2*H( K+1, J ) + V3*H( K+2, J )
+                  H( K, J ) = H( K, J ) - SUM*T1
+                  H( K+1, J ) = H( K+1, J ) - SUM*T2
+                  H( K+2, J ) = H( K+2, J ) - SUM*T3
+   60          CONTINUE
+*
+*              Apply G from the right to transform the columns of the
+*              matrix in rows I1 to min(K+3,I).
+*
+               DO 70 J = I1, MIN( K+3, I )
+                  SUM = H( J, K ) + V2*H( J, K+1 ) + V3*H( J, K+2 )
+                  H( J, K ) = H( J, K ) - SUM*T1
+                  H( J, K+1 ) = H( J, K+1 ) - SUM*T2
+                  H( J, K+2 ) = H( J, K+2 ) - SUM*T3
+   70          CONTINUE
+*
+               IF( WANTZ ) THEN
+*
+*                 Accumulate transformations in the matrix Z
+*
+                  DO 80 J = ILOZ, IHIZ
+                     SUM = Z( J, K ) + V2*Z( J, K+1 ) + V3*Z( J, K+2 )
+                     Z( J, K ) = Z( J, K ) - SUM*T1
+                     Z( J, K+1 ) = Z( J, K+1 ) - SUM*T2
+                     Z( J, K+2 ) = Z( J, K+2 ) - SUM*T3
+   80             CONTINUE
+               END IF
+            ELSE IF( NR.EQ.2 ) THEN
+*
+*              Apply G from the left to transform the rows of the matrix
+*              in columns K to I2.
+*
+               DO 90 J = K, I2
+                  SUM = H( K, J ) + V2*H( K+1, J )
+                  H( K, J ) = H( K, J ) - SUM*T1
+                  H( K+1, J ) = H( K+1, J ) - SUM*T2
+   90          CONTINUE
+*
+*              Apply G from the right to transform the columns of the
+*              matrix in rows I1 to min(K+3,I).
+*
+               DO 100 J = I1, I
+                  SUM = H( J, K ) + V2*H( J, K+1 )
+                  H( J, K ) = H( J, K ) - SUM*T1
+                  H( J, K+1 ) = H( J, K+1 ) - SUM*T2
+  100          CONTINUE
+*
+               IF( WANTZ ) THEN
+*
+*                 Accumulate transformations in the matrix Z
+*
+                  DO 110 J = ILOZ, IHIZ
+                     SUM = Z( J, K ) + V2*Z( J, K+1 )
+                     Z( J, K ) = Z( J, K ) - SUM*T1
+                     Z( J, K+1 ) = Z( J, K+1 ) - SUM*T2
+  110             CONTINUE
+               END IF
+            END IF
+  120    CONTINUE
+*
+  130 CONTINUE
+*
+*     Failure to converge in remaining number of iterations
+*
+      INFO = I
+      RETURN
+*
+  140 CONTINUE
+*
+      IF( L.EQ.I ) THEN
+*
+*        H(I,I-1) is negligible: one eigenvalue has converged.
+*
+         WR( I ) = H( I, I )
+         WI( I ) = ZERO
+      ELSE IF( L.EQ.I-1 ) THEN
+*
+*        H(I-1,I-2) is negligible: a pair of eigenvalues have converged.
+*
+*        Transform the 2-by-2 submatrix to standard Schur form,
+*        and compute and store the eigenvalues.
+*
+         CALL DLANV2( H( I-1, I-1 ), H( I-1, I ), H( I, I-1 ),
+     $                H( I, I ), WR( I-1 ), WI( I-1 ), WR( I ), WI( I ),
+     $                CS, SN )
+*
+         IF( WANTT ) THEN
+*
+*           Apply the transformation to the rest of H.
+*
+            IF( I2.GT.I )
+     $         CALL DROT( I2-I, H( I-1, I+1 ), LDH, H( I, I+1 ), LDH,
+     $                    CS, SN )
+            CALL DROT( I-I1-1, H( I1, I-1 ), 1, H( I1, I ), 1, CS, SN )
+         END IF
+         IF( WANTZ ) THEN
+*
+*           Apply the transformation to Z.
+*
+            CALL DROT( NZ, Z( ILOZ, I-1 ), 1, Z( ILOZ, I ), 1, CS, SN )
+         END IF
+      END IF
+*
+*     Decrement number of remaining iterations, and return to start of
+*     the main loop with new value of I.
+*
+      ITN = ITN - ITS
+      I = L - 1
+      GO TO 10
+*
+  150 CONTINUE
+      RETURN
+*
+*     End of DLAHQR
+*
+      END
diff --git a/costa/native/external/lapack/dlahrd.f b/costa/native/external/lapack/dlahrd.f
new file mode 100644
index 000000000..9d473350d
--- /dev/null
+++ b/costa/native/external/lapack/dlahrd.f
@@ -0,0 +1,206 @@
+      SUBROUTINE DLAHRD( N, K, NB, A, LDA, TAU, T, LDT, Y, LDY )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            K, LDA, LDT, LDY, N, NB
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), T( LDT, NB ), TAU( NB ),
+     $                   Y( LDY, NB )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLAHRD reduces the first NB columns of a real general n-by-(n-k+1)
+*  matrix A so that elements below the k-th subdiagonal are zero. The
+*  reduction is performed by an orthogonal similarity transformation
+*  Q' * A * Q. The routine returns the matrices V and T which determine
+*  Q as a block reflector I - V*T*V', and also the matrix Y = A * V * T.
+*
+*  This is an auxiliary routine called by DGEHRD.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.
+*
+*  K       (input) INTEGER
+*          The offset for the reduction. Elements below the k-th
+*          subdiagonal in the first NB columns are reduced to zero.
+*
+*  NB      (input) INTEGER
+*          The number of columns to be reduced.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N-K+1)
+*          On entry, the n-by-(n-k+1) general matrix A.
+*          On exit, the elements on and above the k-th subdiagonal in
+*          the first NB columns are overwritten with the corresponding
+*          elements of the reduced matrix; the elements below the k-th
+*          subdiagonal, with the array TAU, represent the matrix Q as a
+*          product of elementary reflectors. The other columns of A are
+*          unchanged. See Further Details.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  TAU     (output) DOUBLE PRECISION array, dimension (NB)
+*          The scalar factors of the elementary reflectors. See Further
+*          Details.
+*
+*  T       (output) DOUBLE PRECISION array, dimension (LDT,NB)
+*          The upper triangular matrix T.
+*
+*  LDT     (input) INTEGER
+*          The leading dimension of the array T.  LDT >= NB.
+*
+*  Y       (output) DOUBLE PRECISION array, dimension (LDY,NB)
+*          The n-by-nb matrix Y.
+*
+*  LDY     (input) INTEGER
+*          The leading dimension of the array Y. LDY >= N.
+*
+*  Further Details
+*  ===============
+*
+*  The matrix Q is represented as a product of nb elementary reflectors
+*
+*     Q = H(1) H(2) . . . H(nb).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - tau * v * v'
+*
+*  where tau is a real scalar, and v is a real vector with
+*  v(1:i+k-1) = 0, v(i+k) = 1; v(i+k+1:n) is stored on exit in
+*  A(i+k+1:n,i), and tau in TAU(i).
+*
+*  The elements of the vectors v together form the (n-k+1)-by-nb matrix
+*  V which is needed, with T and Y, to apply the transformation to the
+*  unreduced part of the matrix, using an update of the form:
+*  A := (I - V*T*V') * (A - Y*V').
+*
+*  The contents of A on exit are illustrated by the following example
+*  with n = 7, k = 3 and nb = 2:
+*
+*     ( a   h   a   a   a )
+*     ( a   h   a   a   a )
+*     ( a   h   a   a   a )
+*     ( h   h   a   a   a )
+*     ( v1  h   a   a   a )
+*     ( v1  v2  a   a   a )
+*     ( v1  v2  a   a   a )
+*
+*  where a denotes an element of the original matrix A, h denotes a
+*  modified element of the upper Hessenberg matrix H, and vi denotes an
+*  element of the vector defining H(i).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I
+      DOUBLE PRECISION   EI
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DAXPY, DCOPY, DGEMV, DLARFG, DSCAL, DTRMV
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Quick return if possible
+*
+      IF( N.LE.1 )
+     $   RETURN
+*
+      DO 10 I = 1, NB
+         IF( I.GT.1 ) THEN
+*
+*           Update A(1:n,i)
+*
+*           Compute i-th column of A - Y * V'
+*
+            CALL DGEMV( 'No transpose', N, I-1, -ONE, Y, LDY,
+     $                  A( K+I-1, 1 ), LDA, ONE, A( 1, I ), 1 )
+*
+*           Apply I - V * T' * V' to this column (call it b) from the
+*           left, using the last column of T as workspace
+*
+*           Let  V = ( V1 )   and   b = ( b1 )   (first I-1 rows)
+*                    ( V2 )             ( b2 )
+*
+*           where V1 is unit lower triangular
+*
+*           w := V1' * b1
+*
+            CALL DCOPY( I-1, A( K+1, I ), 1, T( 1, NB ), 1 )
+            CALL DTRMV( 'Lower', 'Transpose', 'Unit', I-1, A( K+1, 1 ),
+     $                  LDA, T( 1, NB ), 1 )
+*
+*           w := w + V2'*b2
+*
+            CALL DGEMV( 'Transpose', N-K-I+1, I-1, ONE, A( K+I, 1 ),
+     $                  LDA, A( K+I, I ), 1, ONE, T( 1, NB ), 1 )
+*
+*           w := T'*w
+*
+            CALL DTRMV( 'Upper', 'Transpose', 'Non-unit', I-1, T, LDT,
+     $                  T( 1, NB ), 1 )
+*
+*           b2 := b2 - V2*w
+*
+            CALL DGEMV( 'No transpose', N-K-I+1, I-1, -ONE, A( K+I, 1 ),
+     $                  LDA, T( 1, NB ), 1, ONE, A( K+I, I ), 1 )
+*
+*           b1 := b1 - V1*w
+*
+            CALL DTRMV( 'Lower', 'No transpose', 'Unit', I-1,
+     $                  A( K+1, 1 ), LDA, T( 1, NB ), 1 )
+            CALL DAXPY( I-1, -ONE, T( 1, NB ), 1, A( K+1, I ), 1 )
+*
+            A( K+I-1, I-1 ) = EI
+         END IF
+*
+*        Generate the elementary reflector H(i) to annihilate
+*        A(k+i+1:n,i)
+*
+         CALL DLARFG( N-K-I+1, A( K+I, I ), A( MIN( K+I+1, N ), I ), 1,
+     $                TAU( I ) )
+         EI = A( K+I, I )
+         A( K+I, I ) = ONE
+*
+*        Compute  Y(1:n,i)
+*
+         CALL DGEMV( 'No transpose', N, N-K-I+1, ONE, A( 1, I+1 ), LDA,
+     $               A( K+I, I ), 1, ZERO, Y( 1, I ), 1 )
+         CALL DGEMV( 'Transpose', N-K-I+1, I-1, ONE, A( K+I, 1 ), LDA,
+     $               A( K+I, I ), 1, ZERO, T( 1, I ), 1 )
+         CALL DGEMV( 'No transpose', N, I-1, -ONE, Y, LDY, T( 1, I ), 1,
+     $               ONE, Y( 1, I ), 1 )
+         CALL DSCAL( N, TAU( I ), Y( 1, I ), 1 )
+*
+*        Compute T(1:i,i)
+*
+         CALL DSCAL( I-1, -TAU( I ), T( 1, I ), 1 )
+         CALL DTRMV( 'Upper', 'No transpose', 'Non-unit', I-1, T, LDT,
+     $               T( 1, I ), 1 )
+         T( I, I ) = TAU( I )
+*
+   10 CONTINUE
+      A( K+NB, NB ) = EI
+*
+      RETURN
+*
+*     End of DLAHRD
+*
+      END
diff --git a/costa/native/external/lapack/dlaic1.f b/costa/native/external/lapack/dlaic1.f
new file mode 100644
index 000000000..7a6bf93f7
--- /dev/null
+++ b/costa/native/external/lapack/dlaic1.f
@@ -0,0 +1,293 @@
+      SUBROUTINE DLAIC1( JOB, J, X, SEST, W, GAMMA, SESTPR, S, C )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      INTEGER            J, JOB
+      DOUBLE PRECISION   C, GAMMA, S, SEST, SESTPR
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   W( J ), X( J )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLAIC1 applies one step of incremental condition estimation in
+*  its simplest version:
+*
+*  Let x, twonorm(x) = 1, be an approximate singular vector of an j-by-j
+*  lower triangular matrix L, such that
+*           twonorm(L*x) = sest
+*  Then DLAIC1 computes sestpr, s, c such that
+*  the vector
+*                  [ s*x ]
+*           xhat = [  c  ]
+*  is an approximate singular vector of
+*                  [ L     0  ]
+*           Lhat = [ w' gamma ]
+*  in the sense that
+*           twonorm(Lhat*xhat) = sestpr.
+*
+*  Depending on JOB, an estimate for the largest or smallest singular
+*  value is computed.
+*
+*  Note that [s c]' and sestpr**2 is an eigenpair of the system
+*
+*      diag(sest*sest, 0) + [alpha  gamma] * [ alpha ]
+*                                            [ gamma ]
+*
+*  where  alpha =  x'*w.
+*
+*  Arguments
+*  =========
+*
+*  JOB     (input) INTEGER
+*          = 1: an estimate for the largest singular value is computed.
+*          = 2: an estimate for the smallest singular value is computed.
+*
+*  J       (input) INTEGER
+*          Length of X and W
+*
+*  X       (input) DOUBLE PRECISION array, dimension (J)
+*          The j-vector x.
+*
+*  SEST    (input) DOUBLE PRECISION
+*          Estimated singular value of j by j matrix L
+*
+*  W       (input) DOUBLE PRECISION array, dimension (J)
+*          The j-vector w.
+*
+*  GAMMA   (input) DOUBLE PRECISION
+*          The diagonal element gamma.
+*
+*  SEDTPR  (output) DOUBLE PRECISION
+*          Estimated singular value of (j+1) by (j+1) matrix Lhat.
+*
+*  S       (output) DOUBLE PRECISION
+*          Sine needed in forming xhat.
+*
+*  C       (output) DOUBLE PRECISION
+*          Cosine needed in forming xhat.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE, TWO
+      PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0, TWO = 2.0D0 )
+      DOUBLE PRECISION   HALF, FOUR
+      PARAMETER          ( HALF = 0.5D0, FOUR = 4.0D0 )
+*     ..
+*     .. Local Scalars ..
+      DOUBLE PRECISION   ABSALP, ABSEST, ABSGAM, ALPHA, B, COSINE, EPS,
+     $                   NORMA, S1, S2, SINE, T, TEST, TMP, ZETA1, ZETA2
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, SIGN, SQRT
+*     ..
+*     .. External Functions ..
+      DOUBLE PRECISION   DDOT, DLAMCH
+      EXTERNAL           DDOT, DLAMCH
+*     ..
+*     .. Executable Statements ..
+*
+      EPS = DLAMCH( 'Epsilon' )
+      ALPHA = DDOT( J, X, 1, W, 1 )
+*
+      ABSALP = ABS( ALPHA )
+      ABSGAM = ABS( GAMMA )
+      ABSEST = ABS( SEST )
+*
+      IF( JOB.EQ.1 ) THEN
+*
+*        Estimating largest singular value
+*
+*        special cases
+*
+         IF( SEST.EQ.ZERO ) THEN
+            S1 = MAX( ABSGAM, ABSALP )
+            IF( S1.EQ.ZERO ) THEN
+               S = ZERO
+               C = ONE
+               SESTPR = ZERO
+            ELSE
+               S = ALPHA / S1
+               C = GAMMA / S1
+               TMP = SQRT( S*S+C*C )
+               S = S / TMP
+               C = C / TMP
+               SESTPR = S1*TMP
+            END IF
+            RETURN
+         ELSE IF( ABSGAM.LE.EPS*ABSEST ) THEN
+            S = ONE
+            C = ZERO
+            TMP = MAX( ABSEST, ABSALP )
+            S1 = ABSEST / TMP
+            S2 = ABSALP / TMP
+            SESTPR = TMP*SQRT( S1*S1+S2*S2 )
+            RETURN
+         ELSE IF( ABSALP.LE.EPS*ABSEST ) THEN
+            S1 = ABSGAM
+            S2 = ABSEST
+            IF( S1.LE.S2 ) THEN
+               S = ONE
+               C = ZERO
+               SESTPR = S2
+            ELSE
+               S = ZERO
+               C = ONE
+               SESTPR = S1
+            END IF
+            RETURN
+         ELSE IF( ABSEST.LE.EPS*ABSALP .OR. ABSEST.LE.EPS*ABSGAM ) THEN
+            S1 = ABSGAM
+            S2 = ABSALP
+            IF( S1.LE.S2 ) THEN
+               TMP = S1 / S2
+               S = SQRT( ONE+TMP*TMP )
+               SESTPR = S2*S
+               C = ( GAMMA / S2 ) / S
+               S = SIGN( ONE, ALPHA ) / S
+            ELSE
+               TMP = S2 / S1
+               C = SQRT( ONE+TMP*TMP )
+               SESTPR = S1*C
+               S = ( ALPHA / S1 ) / C
+               C = SIGN( ONE, GAMMA ) / C
+            END IF
+            RETURN
+         ELSE
+*
+*           normal case
+*
+            ZETA1 = ALPHA / ABSEST
+            ZETA2 = GAMMA / ABSEST
+*
+            B = ( ONE-ZETA1*ZETA1-ZETA2*ZETA2 )*HALF
+            C = ZETA1*ZETA1
+            IF( B.GT.ZERO ) THEN
+               T = C / ( B+SQRT( B*B+C ) )
+            ELSE
+               T = SQRT( B*B+C ) - B
+            END IF
+*
+            SINE = -ZETA1 / T
+            COSINE = -ZETA2 / ( ONE+T )
+            TMP = SQRT( SINE*SINE+COSINE*COSINE )
+            S = SINE / TMP
+            C = COSINE / TMP
+            SESTPR = SQRT( T+ONE )*ABSEST
+            RETURN
+         END IF
+*
+      ELSE IF( JOB.EQ.2 ) THEN
+*
+*        Estimating smallest singular value
+*
+*        special cases
+*
+         IF( SEST.EQ.ZERO ) THEN
+            SESTPR = ZERO
+            IF( MAX( ABSGAM, ABSALP ).EQ.ZERO ) THEN
+               SINE = ONE
+               COSINE = ZERO
+            ELSE
+               SINE = -GAMMA
+               COSINE = ALPHA
+            END IF
+            S1 = MAX( ABS( SINE ), ABS( COSINE ) )
+            S = SINE / S1
+            C = COSINE / S1
+            TMP = SQRT( S*S+C*C )
+            S = S / TMP
+            C = C / TMP
+            RETURN
+         ELSE IF( ABSGAM.LE.EPS*ABSEST ) THEN
+            S = ZERO
+            C = ONE
+            SESTPR = ABSGAM
+            RETURN
+         ELSE IF( ABSALP.LE.EPS*ABSEST ) THEN
+            S1 = ABSGAM
+            S2 = ABSEST
+            IF( S1.LE.S2 ) THEN
+               S = ZERO
+               C = ONE
+               SESTPR = S1
+            ELSE
+               S = ONE
+               C = ZERO
+               SESTPR = S2
+            END IF
+            RETURN
+         ELSE IF( ABSEST.LE.EPS*ABSALP .OR. ABSEST.LE.EPS*ABSGAM ) THEN
+            S1 = ABSGAM
+            S2 = ABSALP
+            IF( S1.LE.S2 ) THEN
+               TMP = S1 / S2
+               C = SQRT( ONE+TMP*TMP )
+               SESTPR = ABSEST*( TMP / C )
+               S = -( GAMMA / S2 ) / C
+               C = SIGN( ONE, ALPHA ) / C
+            ELSE
+               TMP = S2 / S1
+               S = SQRT( ONE+TMP*TMP )
+               SESTPR = ABSEST / S
+               C = ( ALPHA / S1 ) / S
+               S = -SIGN( ONE, GAMMA ) / S
+            END IF
+            RETURN
+         ELSE
+*
+*           normal case
+*
+            ZETA1 = ALPHA / ABSEST
+            ZETA2 = GAMMA / ABSEST
+*
+            NORMA = MAX( ONE+ZETA1*ZETA1+ABS( ZETA1*ZETA2 ),
+     $              ABS( ZETA1*ZETA2 )+ZETA2*ZETA2 )
+*
+*           See if root is closer to zero or to ONE
+*
+            TEST = ONE + TWO*( ZETA1-ZETA2 )*( ZETA1+ZETA2 )
+            IF( TEST.GE.ZERO ) THEN
+*
+*              root is close to zero, compute directly
+*
+               B = ( ZETA1*ZETA1+ZETA2*ZETA2+ONE )*HALF
+               C = ZETA2*ZETA2
+               T = C / ( B+SQRT( ABS( B*B-C ) ) )
+               SINE = ZETA1 / ( ONE-T )
+               COSINE = -ZETA2 / T
+               SESTPR = SQRT( T+FOUR*EPS*EPS*NORMA )*ABSEST
+            ELSE
+*
+*              root is closer to ONE, shift by that amount
+*
+               B = ( ZETA2*ZETA2+ZETA1*ZETA1-ONE )*HALF
+               C = ZETA1*ZETA1
+               IF( B.GE.ZERO ) THEN
+                  T = -C / ( B+SQRT( B*B+C ) )
+               ELSE
+                  T = B - SQRT( B*B+C )
+               END IF
+               SINE = -ZETA1 / T
+               COSINE = -ZETA2 / ( ONE+T )
+               SESTPR = SQRT( ONE+T+FOUR*EPS*EPS*NORMA )*ABSEST
+            END IF
+            TMP = SQRT( SINE*SINE+COSINE*COSINE )
+            S = SINE / TMP
+            C = COSINE / TMP
+            RETURN
+*
+         END IF
+      END IF
+      RETURN
+*
+*     End of DLAIC1
+*
+      END
diff --git a/costa/native/external/lapack/dlaln2.f b/costa/native/external/lapack/dlaln2.f
new file mode 100644
index 000000000..e594b5a5b
--- /dev/null
+++ b/costa/native/external/lapack/dlaln2.f
@@ -0,0 +1,508 @@
+      SUBROUTINE DLALN2( LTRANS, NA, NW, SMIN, CA, A, LDA, D1, D2, B,
+     $                   LDB, WR, WI, X, LDX, SCALE, XNORM, INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      LOGICAL            LTRANS
+      INTEGER            INFO, LDA, LDB, LDX, NA, NW
+      DOUBLE PRECISION   CA, D1, D2, SCALE, SMIN, WI, WR, XNORM
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), B( LDB, * ), X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLALN2 solves a system of the form  (ca A - w D ) X = s B
+*  or (ca A' - w D) X = s B   with possible scaling ("s") and
+*  perturbation of A.  (A' means A-transpose.)
+*
+*  A is an NA x NA real matrix, ca is a real scalar, D is an NA x NA
+*  real diagonal matrix, w is a real or complex value, and X and B are
+*  NA x 1 matrices -- real if w is real, complex if w is complex.  NA
+*  may be 1 or 2.
+*
+*  If w is complex, X and B are represented as NA x 2 matrices,
+*  the first column of each being the real part and the second
+*  being the imaginary part.
+*
+*  "s" is a scaling factor (.LE. 1), computed by DLALN2, which is
+*  so chosen that X can be computed without overflow.  X is further
+*  scaled if necessary to assure that norm(ca A - w D)*norm(X) is less
+*  than overflow.
+*
+*  If both singular values of (ca A - w D) are less than SMIN,
+*  SMIN*identity will be used instead of (ca A - w D).  If only one
+*  singular value is less than SMIN, one element of (ca A - w D) will be
+*  perturbed enough to make the smallest singular value roughly SMIN.
+*  If both singular values are at least SMIN, (ca A - w D) will not be
+*  perturbed.  In any case, the perturbation will be at most some small
+*  multiple of max( SMIN, ulp*norm(ca A - w D) ).  The singular values
+*  are computed by infinity-norm approximations, and thus will only be
+*  correct to a factor of 2 or so.
+*
+*  Note: all input quantities are assumed to be smaller than overflow
+*  by a reasonable factor.  (See BIGNUM.)
+*
+*  Arguments
+*  ==========
+*
+*  LTRANS  (input) LOGICAL
+*          =.TRUE.:  A-transpose will be used.
+*          =.FALSE.: A will be used (not transposed.)
+*
+*  NA      (input) INTEGER
+*          The size of the matrix A.  It may (only) be 1 or 2.
+*
+*  NW      (input) INTEGER
+*          1 if "w" is real, 2 if "w" is complex.  It may only be 1
+*          or 2.
+*
+*  SMIN    (input) DOUBLE PRECISION
+*          The desired lower bound on the singular values of A.  This
+*          should be a safe distance away from underflow or overflow,
+*          say, between (underflow/machine precision) and  (machine
+*          precision * overflow ).  (See BIGNUM and ULP.)
+*
+*  CA      (input) DOUBLE PRECISION
+*          The coefficient c, which A is multiplied by.
+*
+*  A       (input) DOUBLE PRECISION array, dimension (LDA,NA)
+*          The NA x NA matrix A.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of A.  It must be at least NA.
+*
+*  D1      (input) DOUBLE PRECISION
+*          The 1,1 element in the diagonal matrix D.
+*
+*  D2      (input) DOUBLE PRECISION
+*          The 2,2 element in the diagonal matrix D.  Not used if NW=1.
+*
+*  B       (input) DOUBLE PRECISION array, dimension (LDB,NW)
+*          The NA x NW matrix B (right-hand side).  If NW=2 ("w" is
+*          complex), column 1 contains the real part of B and column 2
+*          contains the imaginary part.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of B.  It must be at least NA.
+*
+*  WR      (input) DOUBLE PRECISION
+*          The real part of the scalar "w".
+*
+*  WI      (input) DOUBLE PRECISION
+*          The imaginary part of the scalar "w".  Not used if NW=1.
+*
+*  X       (output) DOUBLE PRECISION array, dimension (LDX,NW)
+*          The NA x NW matrix X (unknowns), as computed by DLALN2.
+*          If NW=2 ("w" is complex), on exit, column 1 will contain
+*          the real part of X and column 2 will contain the imaginary
+*          part.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of X.  It must be at least NA.
+*
+*  SCALE   (output) DOUBLE PRECISION
+*          The scale factor that B must be multiplied by to insure
+*          that overflow does not occur when computing X.  Thus,
+*          (ca A - w D) X  will be SCALE*B, not B (ignoring
+*          perturbations of A.)  It will be at most 1.
+*
+*  XNORM   (output) DOUBLE PRECISION
+*          The infinity-norm of X, when X is regarded as an NA x NW
+*          real matrix.
+*
+*  INFO    (output) INTEGER
+*          An error flag.  It will be set to zero if no error occurs,
+*          a negative number if an argument is in error, or a positive
+*          number if  ca A - w D  had to be perturbed.
+*          The possible values are:
+*          = 0: No error occurred, and (ca A - w D) did not have to be
+*                 perturbed.
+*          = 1: (ca A - w D) had to be perturbed to make its smallest
+*               (or only) singular value greater than SMIN.
+*          NOTE: In the interests of speed, this routine does not
+*                check the inputs for errors.
+*
+* =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0 )
+      DOUBLE PRECISION   TWO
+      PARAMETER          ( TWO = 2.0D0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            ICMAX, J
+      DOUBLE PRECISION   BBND, BI1, BI2, BIGNUM, BNORM, BR1, BR2, CI21,
+     $                   CI22, CMAX, CNORM, CR21, CR22, CSI, CSR, LI21,
+     $                   LR21, SMINI, SMLNUM, TEMP, U22ABS, UI11, UI11R,
+     $                   UI12, UI12S, UI22, UR11, UR11R, UR12, UR12S,
+     $                   UR22, XI1, XI2, XR1, XR2
+*     ..
+*     .. Local Arrays ..
+      LOGICAL            RSWAP( 4 ), ZSWAP( 4 )
+      INTEGER            IPIVOT( 4, 4 )
+      DOUBLE PRECISION   CI( 2, 2 ), CIV( 4 ), CR( 2, 2 ), CRV( 4 )
+*     ..
+*     .. External Functions ..
+      DOUBLE PRECISION   DLAMCH
+      EXTERNAL           DLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLADIV
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX
+*     ..
+*     .. Equivalences ..
+      EQUIVALENCE        ( CI( 1, 1 ), CIV( 1 ) ),
+     $                   ( CR( 1, 1 ), CRV( 1 ) )
+*     ..
+*     .. Data statements ..
+      DATA               ZSWAP / .FALSE., .FALSE., .TRUE., .TRUE. /
+      DATA               RSWAP / .FALSE., .TRUE., .FALSE., .TRUE. /
+      DATA               IPIVOT / 1, 2, 3, 4, 2, 1, 4, 3, 3, 4, 1, 2, 4,
+     $                   3, 2, 1 /
+*     ..
+*     .. Executable Statements ..
+*
+*     Compute BIGNUM
+*
+      SMLNUM = TWO*DLAMCH( 'Safe minimum' )
+      BIGNUM = ONE / SMLNUM
+      SMINI = MAX( SMIN, SMLNUM )
+*
+*     Don't check for input errors
+*
+      INFO = 0
+*
+*     Standard Initializations
+*
+      SCALE = ONE
+*
+      IF( NA.EQ.1 ) THEN
+*
+*        1 x 1  (i.e., scalar) system   C X = B
+*
+         IF( NW.EQ.1 ) THEN
+*
+*           Real 1x1 system.
+*
+*           C = ca A - w D
+*
+            CSR = CA*A( 1, 1 ) - WR*D1
+            CNORM = ABS( CSR )
+*
+*           If | C | < SMINI, use C = SMINI
+*
+            IF( CNORM.LT.SMINI ) THEN
+               CSR = SMINI
+               CNORM = SMINI
+               INFO = 1
+            END IF
+*
+*           Check scaling for  X = B / C
+*
+            BNORM = ABS( B( 1, 1 ) )
+            IF( CNORM.LT.ONE .AND. BNORM.GT.ONE ) THEN
+               IF( BNORM.GT.BIGNUM*CNORM )
+     $            SCALE = ONE / BNORM
+            END IF
+*
+*           Compute X
+*
+            X( 1, 1 ) = ( B( 1, 1 )*SCALE ) / CSR
+            XNORM = ABS( X( 1, 1 ) )
+         ELSE
+*
+*           Complex 1x1 system (w is complex)
+*
+*           C = ca A - w D
+*
+            CSR = CA*A( 1, 1 ) - WR*D1
+            CSI = -WI*D1
+            CNORM = ABS( CSR ) + ABS( CSI )
+*
+*           If | C | < SMINI, use C = SMINI
+*
+            IF( CNORM.LT.SMINI ) THEN
+               CSR = SMINI
+               CSI = ZERO
+               CNORM = SMINI
+               INFO = 1
+            END IF
+*
+*           Check scaling for  X = B / C
+*
+            BNORM = ABS( B( 1, 1 ) ) + ABS( B( 1, 2 ) )
+            IF( CNORM.LT.ONE .AND. BNORM.GT.ONE ) THEN
+               IF( BNORM.GT.BIGNUM*CNORM )
+     $            SCALE = ONE / BNORM
+            END IF
+*
+*           Compute X
+*
+            CALL DLADIV( SCALE*B( 1, 1 ), SCALE*B( 1, 2 ), CSR, CSI,
+     $                   X( 1, 1 ), X( 1, 2 ) )
+            XNORM = ABS( X( 1, 1 ) ) + ABS( X( 1, 2 ) )
+         END IF
+*
+      ELSE
+*
+*        2x2 System
+*
+*        Compute the real part of  C = ca A - w D  (or  ca A' - w D )
+*
+         CR( 1, 1 ) = CA*A( 1, 1 ) - WR*D1
+         CR( 2, 2 ) = CA*A( 2, 2 ) - WR*D2
+         IF( LTRANS ) THEN
+            CR( 1, 2 ) = CA*A( 2, 1 )
+            CR( 2, 1 ) = CA*A( 1, 2 )
+         ELSE
+            CR( 2, 1 ) = CA*A( 2, 1 )
+            CR( 1, 2 ) = CA*A( 1, 2 )
+         END IF
+*
+         IF( NW.EQ.1 ) THEN
+*
+*           Real 2x2 system  (w is real)
+*
+*           Find the largest element in C
+*
+            CMAX = ZERO
+            ICMAX = 0
+*
+            DO 10 J = 1, 4
+               IF( ABS( CRV( J ) ).GT.CMAX ) THEN
+                  CMAX = ABS( CRV( J ) )
+                  ICMAX = J
+               END IF
+   10       CONTINUE
+*
+*           If norm(C) < SMINI, use SMINI*identity.
+*
+            IF( CMAX.LT.SMINI ) THEN
+               BNORM = MAX( ABS( B( 1, 1 ) ), ABS( B( 2, 1 ) ) )
+               IF( SMINI.LT.ONE .AND. BNORM.GT.ONE ) THEN
+                  IF( BNORM.GT.BIGNUM*SMINI )
+     $               SCALE = ONE / BNORM
+               END IF
+               TEMP = SCALE / SMINI
+               X( 1, 1 ) = TEMP*B( 1, 1 )
+               X( 2, 1 ) = TEMP*B( 2, 1 )
+               XNORM = TEMP*BNORM
+               INFO = 1
+               RETURN
+            END IF
+*
+*           Gaussian elimination with complete pivoting.
+*
+            UR11 = CRV( ICMAX )
+            CR21 = CRV( IPIVOT( 2, ICMAX ) )
+            UR12 = CRV( IPIVOT( 3, ICMAX ) )
+            CR22 = CRV( IPIVOT( 4, ICMAX ) )
+            UR11R = ONE / UR11
+            LR21 = UR11R*CR21
+            UR22 = CR22 - UR12*LR21
+*
+*           If smaller pivot < SMINI, use SMINI
+*
+            IF( ABS( UR22 ).LT.SMINI ) THEN
+               UR22 = SMINI
+               INFO = 1
+            END IF
+            IF( RSWAP( ICMAX ) ) THEN
+               BR1 = B( 2, 1 )
+               BR2 = B( 1, 1 )
+            ELSE
+               BR1 = B( 1, 1 )
+               BR2 = B( 2, 1 )
+            END IF
+            BR2 = BR2 - LR21*BR1
+            BBND = MAX( ABS( BR1*( UR22*UR11R ) ), ABS( BR2 ) )
+            IF( BBND.GT.ONE .AND. ABS( UR22 ).LT.ONE ) THEN
+               IF( BBND.GE.BIGNUM*ABS( UR22 ) )
+     $            SCALE = ONE / BBND
+            END IF
+*
+            XR2 = ( BR2*SCALE ) / UR22
+            XR1 = ( SCALE*BR1 )*UR11R - XR2*( UR11R*UR12 )
+            IF( ZSWAP( ICMAX ) ) THEN
+               X( 1, 1 ) = XR2
+               X( 2, 1 ) = XR1
+            ELSE
+               X( 1, 1 ) = XR1
+               X( 2, 1 ) = XR2
+            END IF
+            XNORM = MAX( ABS( XR1 ), ABS( XR2 ) )
+*
+*           Further scaling if  norm(A) norm(X) > overflow
+*
+            IF( XNORM.GT.ONE .AND. CMAX.GT.ONE ) THEN
+               IF( XNORM.GT.BIGNUM / CMAX ) THEN
+                  TEMP = CMAX / BIGNUM
+                  X( 1, 1 ) = TEMP*X( 1, 1 )
+                  X( 2, 1 ) = TEMP*X( 2, 1 )
+                  XNORM = TEMP*XNORM
+                  SCALE = TEMP*SCALE
+               END IF
+            END IF
+         ELSE
+*
+*           Complex 2x2 system  (w is complex)
+*
+*           Find the largest element in C
+*
+            CI( 1, 1 ) = -WI*D1
+            CI( 2, 1 ) = ZERO
+            CI( 1, 2 ) = ZERO
+            CI( 2, 2 ) = -WI*D2
+            CMAX = ZERO
+            ICMAX = 0
+*
+            DO 20 J = 1, 4
+               IF( ABS( CRV( J ) )+ABS( CIV( J ) ).GT.CMAX ) THEN
+                  CMAX = ABS( CRV( J ) ) + ABS( CIV( J ) )
+                  ICMAX = J
+               END IF
+   20       CONTINUE
+*
+*           If norm(C) < SMINI, use SMINI*identity.
+*
+            IF( CMAX.LT.SMINI ) THEN
+               BNORM = MAX( ABS( B( 1, 1 ) )+ABS( B( 1, 2 ) ),
+     $                 ABS( B( 2, 1 ) )+ABS( B( 2, 2 ) ) )
+               IF( SMINI.LT.ONE .AND. BNORM.GT.ONE ) THEN
+                  IF( BNORM.GT.BIGNUM*SMINI )
+     $               SCALE = ONE / BNORM
+               END IF
+               TEMP = SCALE / SMINI
+               X( 1, 1 ) = TEMP*B( 1, 1 )
+               X( 2, 1 ) = TEMP*B( 2, 1 )
+               X( 1, 2 ) = TEMP*B( 1, 2 )
+               X( 2, 2 ) = TEMP*B( 2, 2 )
+               XNORM = TEMP*BNORM
+               INFO = 1
+               RETURN
+            END IF
+*
+*           Gaussian elimination with complete pivoting.
+*
+            UR11 = CRV( ICMAX )
+            UI11 = CIV( ICMAX )
+            CR21 = CRV( IPIVOT( 2, ICMAX ) )
+            CI21 = CIV( IPIVOT( 2, ICMAX ) )
+            UR12 = CRV( IPIVOT( 3, ICMAX ) )
+            UI12 = CIV( IPIVOT( 3, ICMAX ) )
+            CR22 = CRV( IPIVOT( 4, ICMAX ) )
+            CI22 = CIV( IPIVOT( 4, ICMAX ) )
+            IF( ICMAX.EQ.1 .OR. ICMAX.EQ.4 ) THEN
+*
+*              Code when off-diagonals of pivoted C are real
+*
+               IF( ABS( UR11 ).GT.ABS( UI11 ) ) THEN
+                  TEMP = UI11 / UR11
+                  UR11R = ONE / ( UR11*( ONE+TEMP**2 ) )
+                  UI11R = -TEMP*UR11R
+               ELSE
+                  TEMP = UR11 / UI11
+                  UI11R = -ONE / ( UI11*( ONE+TEMP**2 ) )
+                  UR11R = -TEMP*UI11R
+               END IF
+               LR21 = CR21*UR11R
+               LI21 = CR21*UI11R
+               UR12S = UR12*UR11R
+               UI12S = UR12*UI11R
+               UR22 = CR22 - UR12*LR21
+               UI22 = CI22 - UR12*LI21
+            ELSE
+*
+*              Code when diagonals of pivoted C are real
+*
+               UR11R = ONE / UR11
+               UI11R = ZERO
+               LR21 = CR21*UR11R
+               LI21 = CI21*UR11R
+               UR12S = UR12*UR11R
+               UI12S = UI12*UR11R
+               UR22 = CR22 - UR12*LR21 + UI12*LI21
+               UI22 = -UR12*LI21 - UI12*LR21
+            END IF
+            U22ABS = ABS( UR22 ) + ABS( UI22 )
+*
+*           If smaller pivot < SMINI, use SMINI
+*
+            IF( U22ABS.LT.SMINI ) THEN
+               UR22 = SMINI
+               UI22 = ZERO
+               INFO = 1
+            END IF
+            IF( RSWAP( ICMAX ) ) THEN
+               BR2 = B( 1, 1 )
+               BR1 = B( 2, 1 )
+               BI2 = B( 1, 2 )
+               BI1 = B( 2, 2 )
+            ELSE
+               BR1 = B( 1, 1 )
+               BR2 = B( 2, 1 )
+               BI1 = B( 1, 2 )
+               BI2 = B( 2, 2 )
+            END IF
+            BR2 = BR2 - LR21*BR1 + LI21*BI1
+            BI2 = BI2 - LI21*BR1 - LR21*BI1
+            BBND = MAX( ( ABS( BR1 )+ABS( BI1 ) )*
+     $             ( U22ABS*( ABS( UR11R )+ABS( UI11R ) ) ),
+     $             ABS( BR2 )+ABS( BI2 ) )
+            IF( BBND.GT.ONE .AND. U22ABS.LT.ONE ) THEN
+               IF( BBND.GE.BIGNUM*U22ABS ) THEN
+                  SCALE = ONE / BBND
+                  BR1 = SCALE*BR1
+                  BI1 = SCALE*BI1
+                  BR2 = SCALE*BR2
+                  BI2 = SCALE*BI2
+               END IF
+            END IF
+*
+            CALL DLADIV( BR2, BI2, UR22, UI22, XR2, XI2 )
+            XR1 = UR11R*BR1 - UI11R*BI1 - UR12S*XR2 + UI12S*XI2
+            XI1 = UI11R*BR1 + UR11R*BI1 - UI12S*XR2 - UR12S*XI2
+            IF( ZSWAP( ICMAX ) ) THEN
+               X( 1, 1 ) = XR2
+               X( 2, 1 ) = XR1
+               X( 1, 2 ) = XI2
+               X( 2, 2 ) = XI1
+            ELSE
+               X( 1, 1 ) = XR1
+               X( 2, 1 ) = XR2
+               X( 1, 2 ) = XI1
+               X( 2, 2 ) = XI2
+            END IF
+            XNORM = MAX( ABS( XR1 )+ABS( XI1 ), ABS( XR2 )+ABS( XI2 ) )
+*
+*           Further scaling if  norm(A) norm(X) > overflow
+*
+            IF( XNORM.GT.ONE .AND. CMAX.GT.ONE ) THEN
+               IF( XNORM.GT.BIGNUM / CMAX ) THEN
+                  TEMP = CMAX / BIGNUM
+                  X( 1, 1 ) = TEMP*X( 1, 1 )
+                  X( 2, 1 ) = TEMP*X( 2, 1 )
+                  X( 1, 2 ) = TEMP*X( 1, 2 )
+                  X( 2, 2 ) = TEMP*X( 2, 2 )
+                  XNORM = TEMP*XNORM
+                  SCALE = TEMP*SCALE
+               END IF
+            END IF
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of DLALN2
+*
+      END
diff --git a/costa/native/external/lapack/dlals0.f b/costa/native/external/lapack/dlals0.f
new file mode 100644
index 000000000..a60ada8d9
--- /dev/null
+++ b/costa/native/external/lapack/dlals0.f
@@ -0,0 +1,375 @@
+      SUBROUTINE DLALS0( ICOMPQ, NL, NR, SQRE, NRHS, B, LDB, BX, LDBX,
+     $                   PERM, GIVPTR, GIVCOL, LDGCOL, GIVNUM, LDGNUM,
+     $                   POLES, DIFL, DIFR, Z, K, C, S, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     December 1, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            GIVPTR, ICOMPQ, INFO, K, LDB, LDBX, LDGCOL,
+     $                   LDGNUM, NL, NR, NRHS, SQRE
+      DOUBLE PRECISION   C, S
+*     ..
+*     .. Array Arguments ..
+      INTEGER            GIVCOL( LDGCOL, * ), PERM( * )
+      DOUBLE PRECISION   B( LDB, * ), BX( LDBX, * ), DIFL( * ),
+     $                   DIFR( LDGNUM, * ), GIVNUM( LDGNUM, * ),
+     $                   POLES( LDGNUM, * ), WORK( * ), Z( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLALS0 applies back the multiplying factors of either the left or the
+*  right singular vector matrix of a diagonal matrix appended by a row
+*  to the right hand side matrix B in solving the least squares problem
+*  using the divide-and-conquer SVD approach.
+*
+*  For the left singular vector matrix, three types of orthogonal
+*  matrices are involved:
+*
+*  (1L) Givens rotations: the number of such rotations is GIVPTR; the
+*       pairs of columns/rows they were applied to are stored in GIVCOL;
+*       and the C- and S-values of these rotations are stored in GIVNUM.
+*
+*  (2L) Permutation. The (NL+1)-st row of B is to be moved to the first
+*       row, and for J=2:N, PERM(J)-th row of B is to be moved to the
+*       J-th row.
+*
+*  (3L) The left singular vector matrix of the remaining matrix.
+*
+*  For the right singular vector matrix, four types of orthogonal
+*  matrices are involved:
+*
+*  (1R) The right singular vector matrix of the remaining matrix.
+*
+*  (2R) If SQRE = 1, one extra Givens rotation to generate the right
+*       null space.
+*
+*  (3R) The inverse transformation of (2L).
+*
+*  (4R) The inverse transformation of (1L).
+*
+*  Arguments
+*  =========
+*
+*  ICOMPQ (input) INTEGER
+*         Specifies whether singular vectors are to be computed in
+*         factored form:
+*         = 0: Left singular vector matrix.
+*         = 1: Right singular vector matrix.
+*
+*  NL     (input) INTEGER
+*         The row dimension of the upper block. NL >= 1.
+*
+*  NR     (input) INTEGER
+*         The row dimension of the lower block. NR >= 1.
+*
+*  SQRE   (input) INTEGER
+*         = 0: the lower block is an NR-by-NR square matrix.
+*         = 1: the lower block is an NR-by-(NR+1) rectangular matrix.
+*
+*         The bidiagonal matrix has row dimension N = NL + NR + 1,
+*         and column dimension M = N + SQRE.
+*
+*  NRHS   (input) INTEGER
+*         The number of columns of B and BX. NRHS must be at least 1.
+*
+*  B      (input/output) DOUBLE PRECISION array, dimension ( LDB, NRHS )
+*         On input, B contains the right hand sides of the least
+*         squares problem in rows 1 through M. On output, B contains
+*         the solution X in rows 1 through N.
+*
+*  LDB    (input) INTEGER
+*         The leading dimension of B. LDB must be at least
+*         max(1,MAX( M, N ) ).
+*
+*  BX     (workspace) DOUBLE PRECISION array, dimension ( LDBX, NRHS )
+*
+*  LDBX   (input) INTEGER
+*         The leading dimension of BX.
+*
+*  PERM   (input) INTEGER array, dimension ( N )
+*         The permutations (from deflation and sorting) applied
+*         to the two blocks.
+*
+*  GIVPTR (input) INTEGER
+*         The number of Givens rotations which took place in this
+*         subproblem.
+*
+*  GIVCOL (input) INTEGER array, dimension ( LDGCOL, 2 )
+*         Each pair of numbers indicates a pair of rows/columns
+*         involved in a Givens rotation.
+*
+*  LDGCOL (input) INTEGER
+*         The leading dimension of GIVCOL, must be at least N.
+*
+*  GIVNUM (input) DOUBLE PRECISION array, dimension ( LDGNUM, 2 )
+*         Each number indicates the C or S value used in the
+*         corresponding Givens rotation.
+*
+*  LDGNUM (input) INTEGER
+*         The leading dimension of arrays DIFR, POLES and
+*         GIVNUM, must be at least K.
+*
+*  POLES  (input) DOUBLE PRECISION array, dimension ( LDGNUM, 2 )
+*         On entry, POLES(1:K, 1) contains the new singular
+*         values obtained from solving the secular equation, and
+*         POLES(1:K, 2) is an array containing the poles in the secular
+*         equation.
+*
+*  DIFL   (input) DOUBLE PRECISION array, dimension ( K ).
+*         On entry, DIFL(I) is the distance between I-th updated
+*         (undeflated) singular value and the I-th (undeflated) old
+*         singular value.
+*
+*  DIFR   (input) DOUBLE PRECISION array, dimension ( LDGNUM, 2 ).
+*         On entry, DIFR(I, 1) contains the distances between I-th
+*         updated (undeflated) singular value and the I+1-th
+*         (undeflated) old singular value. And DIFR(I, 2) is the
+*         normalizing factor for the I-th right singular vector.
+*
+*  Z      (input) DOUBLE PRECISION array, dimension ( K )
+*         Contain the components of the deflation-adjusted updating row
+*         vector.
+*
+*  K      (input) INTEGER
+*         Contains the dimension of the non-deflated matrix,
+*         This is the order of the related secular equation. 1 <= K <=N.
+*
+*  C      (input) DOUBLE PRECISION
+*         C contains garbage if SQRE =0 and the C-value of a Givens
+*         rotation related to the right null space if SQRE = 1.
+*
+*  S      (input) DOUBLE PRECISION
+*         S contains garbage if SQRE =0 and the S-value of a Givens
+*         rotation related to the right null space if SQRE = 1.
+*
+*  WORK   (workspace) DOUBLE PRECISION array, dimension ( K )
+*
+*  INFO   (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Ming Gu and Ren-Cang Li, Computer Science Division, University of
+*       California at Berkeley, USA
+*     Osni Marques, LBNL/NERSC, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO, NEGONE
+      PARAMETER          ( ONE = 1.0D0, ZERO = 0.0D0, NEGONE = -1.0D0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J, M, N, NLP1
+      DOUBLE PRECISION   DIFLJ, DIFRJ, DJ, DSIGJ, DSIGJP, TEMP
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DCOPY, DGEMV, DLACPY, DLASCL, DROT, DSCAL,
+     $                   XERBLA
+*     ..
+*     .. External Functions ..
+      DOUBLE PRECISION   DLAMC3, DNRM2
+      EXTERNAL           DLAMC3, DNRM2
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+*
+      IF( ( ICOMPQ.LT.0 ) .OR. ( ICOMPQ.GT.1 ) ) THEN
+         INFO = -1
+      ELSE IF( NL.LT.1 ) THEN
+         INFO = -2
+      ELSE IF( NR.LT.1 ) THEN
+         INFO = -3
+      ELSE IF( ( SQRE.LT.0 ) .OR. ( SQRE.GT.1 ) ) THEN
+         INFO = -4
+      END IF
+*
+      N = NL + NR + 1
+*
+      IF( NRHS.LT.1 ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.N ) THEN
+         INFO = -7
+      ELSE IF( LDBX.LT.N ) THEN
+         INFO = -9
+      ELSE IF( GIVPTR.LT.0 ) THEN
+         INFO = -11
+      ELSE IF( LDGCOL.LT.N ) THEN
+         INFO = -13
+      ELSE IF( LDGNUM.LT.N ) THEN
+         INFO = -15
+      ELSE IF( K.LT.1 ) THEN
+         INFO = -20
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DLALS0', -INFO )
+         RETURN
+      END IF
+*
+      M = N + SQRE
+      NLP1 = NL + 1
+*
+      IF( ICOMPQ.EQ.0 ) THEN
+*
+*        Apply back orthogonal transformations from the left.
+*
+*        Step (1L): apply back the Givens rotations performed.
+*
+         DO 10 I = 1, GIVPTR
+            CALL DROT( NRHS, B( GIVCOL( I, 2 ), 1 ), LDB,
+     $                 B( GIVCOL( I, 1 ), 1 ), LDB, GIVNUM( I, 2 ),
+     $                 GIVNUM( I, 1 ) )
+   10    CONTINUE
+*
+*        Step (2L): permute rows of B.
+*
+         CALL DCOPY( NRHS, B( NLP1, 1 ), LDB, BX( 1, 1 ), LDBX )
+         DO 20 I = 2, N
+            CALL DCOPY( NRHS, B( PERM( I ), 1 ), LDB, BX( I, 1 ), LDBX )
+   20    CONTINUE
+*
+*        Step (3L): apply the inverse of the left singular vector
+*        matrix to BX.
+*
+         IF( K.EQ.1 ) THEN
+            CALL DCOPY( NRHS, BX, LDBX, B, LDB )
+            IF( Z( 1 ).LT.ZERO ) THEN
+               CALL DSCAL( NRHS, NEGONE, B, LDB )
+            END IF
+         ELSE
+            DO 50 J = 1, K
+               DIFLJ = DIFL( J )
+               DJ = POLES( J, 1 )
+               DSIGJ = -POLES( J, 2 )
+               IF( J.LT.K ) THEN
+                  DIFRJ = -DIFR( J, 1 )
+                  DSIGJP = -POLES( J+1, 2 )
+               END IF
+               IF( ( Z( J ).EQ.ZERO ) .OR. ( POLES( J, 2 ).EQ.ZERO ) )
+     $              THEN
+                  WORK( J ) = ZERO
+               ELSE
+                  WORK( J ) = -POLES( J, 2 )*Z( J ) / DIFLJ /
+     $                        ( POLES( J, 2 )+DJ )
+               END IF
+               DO 30 I = 1, J - 1
+                  IF( ( Z( I ).EQ.ZERO ) .OR.
+     $                ( POLES( I, 2 ).EQ.ZERO ) ) THEN
+                     WORK( I ) = ZERO
+                  ELSE
+                     WORK( I ) = POLES( I, 2 )*Z( I ) /
+     $                           ( DLAMC3( POLES( I, 2 ), DSIGJ )-
+     $                           DIFLJ ) / ( POLES( I, 2 )+DJ )
+                  END IF
+   30          CONTINUE
+               DO 40 I = J + 1, K
+                  IF( ( Z( I ).EQ.ZERO ) .OR.
+     $                ( POLES( I, 2 ).EQ.ZERO ) ) THEN
+                     WORK( I ) = ZERO
+                  ELSE
+                     WORK( I ) = POLES( I, 2 )*Z( I ) /
+     $                           ( DLAMC3( POLES( I, 2 ), DSIGJP )+
+     $                           DIFRJ ) / ( POLES( I, 2 )+DJ )
+                  END IF
+   40          CONTINUE
+               WORK( 1 ) = NEGONE
+               TEMP = DNRM2( K, WORK, 1 )
+               CALL DGEMV( 'T', K, NRHS, ONE, BX, LDBX, WORK, 1, ZERO,
+     $                     B( J, 1 ), LDB )
+               CALL DLASCL( 'G', 0, 0, TEMP, ONE, 1, NRHS, B( J, 1 ),
+     $                      LDB, INFO )
+   50       CONTINUE
+         END IF
+*
+*        Move the deflated rows of BX to B also.
+*
+         IF( K.LT.MAX( M, N ) )
+     $      CALL DLACPY( 'A', N-K, NRHS, BX( K+1, 1 ), LDBX,
+     $                   B( K+1, 1 ), LDB )
+      ELSE
+*
+*        Apply back the right orthogonal transformations.
+*
+*        Step (1R): apply back the new right singular vector matrix
+*        to B.
+*
+         IF( K.EQ.1 ) THEN
+            CALL DCOPY( NRHS, B, LDB, BX, LDBX )
+         ELSE
+            DO 80 J = 1, K
+               DSIGJ = POLES( J, 2 )
+               IF( Z( J ).EQ.ZERO ) THEN
+                  WORK( J ) = ZERO
+               ELSE
+                  WORK( J ) = -Z( J ) / DIFL( J ) /
+     $                        ( DSIGJ+POLES( J, 1 ) ) / DIFR( J, 2 )
+               END IF
+               DO 60 I = 1, J - 1
+                  IF( Z( J ).EQ.ZERO ) THEN
+                     WORK( I ) = ZERO
+                  ELSE
+                     WORK( I ) = Z( J ) / ( DLAMC3( DSIGJ, -POLES( I+1,
+     $                           2 ) )-DIFR( I, 1 ) ) /
+     $                           ( DSIGJ+POLES( I, 1 ) ) / DIFR( I, 2 )
+                  END IF
+   60          CONTINUE
+               DO 70 I = J + 1, K
+                  IF( Z( J ).EQ.ZERO ) THEN
+                     WORK( I ) = ZERO
+                  ELSE
+                     WORK( I ) = Z( J ) / ( DLAMC3( DSIGJ, -POLES( I,
+     $                           2 ) )-DIFL( I ) ) /
+     $                           ( DSIGJ+POLES( I, 1 ) ) / DIFR( I, 2 )
+                  END IF
+   70          CONTINUE
+               CALL DGEMV( 'T', K, NRHS, ONE, B, LDB, WORK, 1, ZERO,
+     $                     BX( J, 1 ), LDBX )
+   80       CONTINUE
+         END IF
+*
+*        Step (2R): if SQRE = 1, apply back the rotation that is
+*        related to the right null space of the subproblem.
+*
+         IF( SQRE.EQ.1 ) THEN
+            CALL DCOPY( NRHS, B( M, 1 ), LDB, BX( M, 1 ), LDBX )
+            CALL DROT( NRHS, BX( 1, 1 ), LDBX, BX( M, 1 ), LDBX, C, S )
+         END IF
+         IF( K.LT.MAX( M, N ) )
+     $      CALL DLACPY( 'A', N-K, NRHS, B( K+1, 1 ), LDB, BX( K+1, 1 ),
+     $                   LDBX )
+*
+*        Step (3R): permute rows of B.
+*
+         CALL DCOPY( NRHS, BX( 1, 1 ), LDBX, B( NLP1, 1 ), LDB )
+         IF( SQRE.EQ.1 ) THEN
+            CALL DCOPY( NRHS, BX( M, 1 ), LDBX, B( M, 1 ), LDB )
+         END IF
+         DO 90 I = 2, N
+            CALL DCOPY( NRHS, BX( I, 1 ), LDBX, B( PERM( I ), 1 ), LDB )
+   90    CONTINUE
+*
+*        Step (4R): apply back the Givens rotations performed.
+*
+         DO 100 I = GIVPTR, 1, -1
+            CALL DROT( NRHS, B( GIVCOL( I, 2 ), 1 ), LDB,
+     $                 B( GIVCOL( I, 1 ), 1 ), LDB, GIVNUM( I, 2 ),
+     $                 -GIVNUM( I, 1 ) )
+  100    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of DLALS0
+*
+      END
diff --git a/costa/native/external/lapack/dlalsa.f b/costa/native/external/lapack/dlalsa.f
new file mode 100644
index 000000000..d7ecb6037
--- /dev/null
+++ b/costa/native/external/lapack/dlalsa.f
@@ -0,0 +1,363 @@
+      SUBROUTINE DLALSA( ICOMPQ, SMLSIZ, N, NRHS, B, LDB, BX, LDBX, U,
+     $                   LDU, VT, K, DIFL, DIFR, Z, POLES, GIVPTR,
+     $                   GIVCOL, LDGCOL, PERM, GIVNUM, C, S, WORK,
+     $                   IWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            ICOMPQ, INFO, LDB, LDBX, LDGCOL, LDU, N, NRHS,
+     $                   SMLSIZ
+*     ..
+*     .. Array Arguments ..
+      INTEGER            GIVCOL( LDGCOL, * ), GIVPTR( * ), IWORK( * ),
+     $                   K( * ), PERM( LDGCOL, * )
+      DOUBLE PRECISION   B( LDB, * ), BX( LDBX, * ), C( * ),
+     $                   DIFL( LDU, * ), DIFR( LDU, * ),
+     $                   GIVNUM( LDU, * ), POLES( LDU, * ), S( * ),
+     $                   U( LDU, * ), VT( LDU, * ), WORK( * ),
+     $                   Z( LDU, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLALSA is an itermediate step in solving the least squares problem
+*  by computing the SVD of the coefficient matrix in compact form (The
+*  singular vectors are computed as products of simple orthorgonal
+*  matrices.).
+*
+*  If ICOMPQ = 0, DLALSA applies the inverse of the left singular vector
+*  matrix of an upper bidiagonal matrix to the right hand side; and if
+*  ICOMPQ = 1, DLALSA applies the right singular vector matrix to the
+*  right hand side. The singular vector matrices were generated in
+*  compact form by DLALSA.
+*
+*  Arguments
+*  =========
+*
+*
+*  ICOMPQ (input) INTEGER
+*         Specifies whether the left or the right singular vector
+*         matrix is involved.
+*         = 0: Left singular vector matrix
+*         = 1: Right singular vector matrix
+*
+*  SMLSIZ (input) INTEGER
+*         The maximum size of the subproblems at the bottom of the
+*         computation tree.
+*
+*  N      (input) INTEGER
+*         The row and column dimensions of the upper bidiagonal matrix.
+*
+*  NRHS   (input) INTEGER
+*         The number of columns of B and BX. NRHS must be at least 1.
+*
+*  B      (input) DOUBLE PRECISION array, dimension ( LDB, NRHS )
+*         On input, B contains the right hand sides of the least
+*         squares problem in rows 1 through M. On output, B contains
+*         the solution X in rows 1 through N.
+*
+*  LDB    (input) INTEGER
+*         The leading dimension of B in the calling subprogram.
+*         LDB must be at least max(1,MAX( M, N ) ).
+*
+*  BX     (output) DOUBLE PRECISION array, dimension ( LDBX, NRHS )
+*         On exit, the result of applying the left or right singular
+*         vector matrix to B.
+*
+*  LDBX   (input) INTEGER
+*         The leading dimension of BX.
+*
+*  U      (input) DOUBLE PRECISION array, dimension ( LDU, SMLSIZ ).
+*         On entry, U contains the left singular vector matrices of all
+*         subproblems at the bottom level.
+*
+*  LDU    (input) INTEGER, LDU = > N.
+*         The leading dimension of arrays U, VT, DIFL, DIFR,
+*         POLES, GIVNUM, and Z.
+*
+*  VT     (input) DOUBLE PRECISION array, dimension ( LDU, SMLSIZ+1 ).
+*         On entry, VT' contains the right singular vector matrices of
+*         all subproblems at the bottom level.
+*
+*  K      (input) INTEGER array, dimension ( N ).
+*
+*  DIFL   (input) DOUBLE PRECISION array, dimension ( LDU, NLVL ).
+*         where NLVL = INT(log_2 (N/(SMLSIZ+1))) + 1.
+*
+*  DIFR   (input) DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ).
+*         On entry, DIFL(*, I) and DIFR(*, 2 * I -1) record
+*         distances between singular values on the I-th level and
+*         singular values on the (I -1)-th level, and DIFR(*, 2 * I)
+*         record the normalizing factors of the right singular vectors
+*         matrices of subproblems on I-th level.
+*
+*  Z      (input) DOUBLE PRECISION array, dimension ( LDU, NLVL ).
+*         On entry, Z(1, I) contains the components of the deflation-
+*         adjusted updating row vector for subproblems on the I-th
+*         level.
+*
+*  POLES  (input) DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ).
+*         On entry, POLES(*, 2 * I -1: 2 * I) contains the new and old
+*         singular values involved in the secular equations on the I-th
+*         level.
+*
+*  GIVPTR (input) INTEGER array, dimension ( N ).
+*         On entry, GIVPTR( I ) records the number of Givens
+*         rotations performed on the I-th problem on the computation
+*         tree.
+*
+*  GIVCOL (input) INTEGER array, dimension ( LDGCOL, 2 * NLVL ).
+*         On entry, for each I, GIVCOL(*, 2 * I - 1: 2 * I) records the
+*         locations of Givens rotations performed on the I-th level on
+*         the computation tree.
+*
+*  LDGCOL (input) INTEGER, LDGCOL = > N.
+*         The leading dimension of arrays GIVCOL and PERM.
+*
+*  PERM   (input) INTEGER array, dimension ( LDGCOL, NLVL ).
+*         On entry, PERM(*, I) records permutations done on the I-th
+*         level of the computation tree.
+*
+*  GIVNUM (input) DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ).
+*         On entry, GIVNUM(*, 2 *I -1 : 2 * I) records the C- and S-
+*         values of Givens rotations performed on the I-th level on the
+*         computation tree.
+*
+*  C      (input) DOUBLE PRECISION array, dimension ( N ).
+*         On entry, if the I-th subproblem is not square,
+*         C( I ) contains the C-value of a Givens rotation related to
+*         the right null space of the I-th subproblem.
+*
+*  S      (input) DOUBLE PRECISION array, dimension ( N ).
+*         On entry, if the I-th subproblem is not square,
+*         S( I ) contains the S-value of a Givens rotation related to
+*         the right null space of the I-th subproblem.
+*
+*  WORK   (workspace) DOUBLE PRECISION array.
+*         The dimension must be at least N.
+*
+*  IWORK  (workspace) INTEGER array.
+*         The dimension must be at least 3 * N
+*
+*  INFO   (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Ming Gu and Ren-Cang Li, Computer Science Division, University of
+*       California at Berkeley, USA
+*     Osni Marques, LBNL/NERSC, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, I1, IC, IM1, INODE, J, LF, LL, LVL, LVL2,
+     $                   ND, NDB1, NDIML, NDIMR, NL, NLF, NLP1, NLVL,
+     $                   NR, NRF, NRP1, SQRE
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DCOPY, DGEMM, DLALS0, DLASDT, XERBLA
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+*
+      IF( ( ICOMPQ.LT.0 ) .OR. ( ICOMPQ.GT.1 ) ) THEN
+         INFO = -1
+      ELSE IF( SMLSIZ.LT.3 ) THEN
+         INFO = -2
+      ELSE IF( N.LT.SMLSIZ ) THEN
+         INFO = -3
+      ELSE IF( NRHS.LT.1 ) THEN
+         INFO = -4
+      ELSE IF( LDB.LT.N ) THEN
+         INFO = -6
+      ELSE IF( LDBX.LT.N ) THEN
+         INFO = -8
+      ELSE IF( LDU.LT.N ) THEN
+         INFO = -10
+      ELSE IF( LDGCOL.LT.N ) THEN
+         INFO = -19
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DLALSA', -INFO )
+         RETURN
+      END IF
+*
+*     Book-keeping and  setting up the computation tree.
+*
+      INODE = 1
+      NDIML = INODE + N
+      NDIMR = NDIML + N
+*
+      CALL DLASDT( N, NLVL, ND, IWORK( INODE ), IWORK( NDIML ),
+     $             IWORK( NDIMR ), SMLSIZ )
+*
+*     The following code applies back the left singular vector factors.
+*     For applying back the right singular vector factors, go to 50.
+*
+      IF( ICOMPQ.EQ.1 ) THEN
+         GO TO 50
+      END IF
+*
+*     The nodes on the bottom level of the tree were solved
+*     by DLASDQ. The corresponding left and right singular vector
+*     matrices are in explicit form. First apply back the left
+*     singular vector matrices.
+*
+      NDB1 = ( ND+1 ) / 2
+      DO 10 I = NDB1, ND
+*
+*        IC : center row of each node
+*        NL : number of rows of left  subproblem
+*        NR : number of rows of right subproblem
+*        NLF: starting row of the left   subproblem
+*        NRF: starting row of the right  subproblem
+*
+         I1 = I - 1
+         IC = IWORK( INODE+I1 )
+         NL = IWORK( NDIML+I1 )
+         NR = IWORK( NDIMR+I1 )
+         NLF = IC - NL
+         NRF = IC + 1
+         CALL DGEMM( 'T', 'N', NL, NRHS, NL, ONE, U( NLF, 1 ), LDU,
+     $               B( NLF, 1 ), LDB, ZERO, BX( NLF, 1 ), LDBX )
+         CALL DGEMM( 'T', 'N', NR, NRHS, NR, ONE, U( NRF, 1 ), LDU,
+     $               B( NRF, 1 ), LDB, ZERO, BX( NRF, 1 ), LDBX )
+   10 CONTINUE
+*
+*     Next copy the rows of B that correspond to unchanged rows
+*     in the bidiagonal matrix to BX.
+*
+      DO 20 I = 1, ND
+         IC = IWORK( INODE+I-1 )
+         CALL DCOPY( NRHS, B( IC, 1 ), LDB, BX( IC, 1 ), LDBX )
+   20 CONTINUE
+*
+*     Finally go through the left singular vector matrices of all
+*     the other subproblems bottom-up on the tree.
+*
+      J = 2**NLVL
+      SQRE = 0
+*
+      DO 40 LVL = NLVL, 1, -1
+         LVL2 = 2*LVL - 1
+*
+*        find the first node LF and last node LL on
+*        the current level LVL
+*
+         IF( LVL.EQ.1 ) THEN
+            LF = 1
+            LL = 1
+         ELSE
+            LF = 2**( LVL-1 )
+            LL = 2*LF - 1
+         END IF
+         DO 30 I = LF, LL
+            IM1 = I - 1
+            IC = IWORK( INODE+IM1 )
+            NL = IWORK( NDIML+IM1 )
+            NR = IWORK( NDIMR+IM1 )
+            NLF = IC - NL
+            NRF = IC + 1
+            J = J - 1
+            CALL DLALS0( ICOMPQ, NL, NR, SQRE, NRHS, BX( NLF, 1 ), LDBX,
+     $                   B( NLF, 1 ), LDB, PERM( NLF, LVL ),
+     $                   GIVPTR( J ), GIVCOL( NLF, LVL2 ), LDGCOL,
+     $                   GIVNUM( NLF, LVL2 ), LDU, POLES( NLF, LVL2 ),
+     $                   DIFL( NLF, LVL ), DIFR( NLF, LVL2 ),
+     $                   Z( NLF, LVL ), K( J ), C( J ), S( J ), WORK,
+     $                   INFO )
+   30    CONTINUE
+   40 CONTINUE
+      GO TO 90
+*
+*     ICOMPQ = 1: applying back the right singular vector factors.
+*
+   50 CONTINUE
+*
+*     First now go through the right singular vector matrices of all
+*     the tree nodes top-down.
+*
+      J = 0
+      DO 70 LVL = 1, NLVL
+         LVL2 = 2*LVL - 1
+*
+*        Find the first node LF and last node LL on
+*        the current level LVL.
+*
+         IF( LVL.EQ.1 ) THEN
+            LF = 1
+            LL = 1
+         ELSE
+            LF = 2**( LVL-1 )
+            LL = 2*LF - 1
+         END IF
+         DO 60 I = LL, LF, -1
+            IM1 = I - 1
+            IC = IWORK( INODE+IM1 )
+            NL = IWORK( NDIML+IM1 )
+            NR = IWORK( NDIMR+IM1 )
+            NLF = IC - NL
+            NRF = IC + 1
+            IF( I.EQ.LL ) THEN
+               SQRE = 0
+            ELSE
+               SQRE = 1
+            END IF
+            J = J + 1
+            CALL DLALS0( ICOMPQ, NL, NR, SQRE, NRHS, B( NLF, 1 ), LDB,
+     $                   BX( NLF, 1 ), LDBX, PERM( NLF, LVL ),
+     $                   GIVPTR( J ), GIVCOL( NLF, LVL2 ), LDGCOL,
+     $                   GIVNUM( NLF, LVL2 ), LDU, POLES( NLF, LVL2 ),
+     $                   DIFL( NLF, LVL ), DIFR( NLF, LVL2 ),
+     $                   Z( NLF, LVL ), K( J ), C( J ), S( J ), WORK,
+     $                   INFO )
+   60    CONTINUE
+   70 CONTINUE
+*
+*     The nodes on the bottom level of the tree were solved
+*     by DLASDQ. The corresponding right singular vector
+*     matrices are in explicit form. Apply them back.
+*
+      NDB1 = ( ND+1 ) / 2
+      DO 80 I = NDB1, ND
+         I1 = I - 1
+         IC = IWORK( INODE+I1 )
+         NL = IWORK( NDIML+I1 )
+         NR = IWORK( NDIMR+I1 )
+         NLP1 = NL + 1
+         IF( I.EQ.ND ) THEN
+            NRP1 = NR
+         ELSE
+            NRP1 = NR + 1
+         END IF
+         NLF = IC - NL
+         NRF = IC + 1
+         CALL DGEMM( 'T', 'N', NLP1, NRHS, NLP1, ONE, VT( NLF, 1 ), LDU,
+     $               B( NLF, 1 ), LDB, ZERO, BX( NLF, 1 ), LDBX )
+         CALL DGEMM( 'T', 'N', NRP1, NRHS, NRP1, ONE, VT( NRF, 1 ), LDU,
+     $               B( NRF, 1 ), LDB, ZERO, BX( NRF, 1 ), LDBX )
+   80 CONTINUE
+*
+   90 CONTINUE
+*
+      RETURN
+*
+*     End of DLALSA
+*
+      END
diff --git a/costa/native/external/lapack/dlalsd.f b/costa/native/external/lapack/dlalsd.f
new file mode 100644
index 000000000..188694aeb
--- /dev/null
+++ b/costa/native/external/lapack/dlalsd.f
@@ -0,0 +1,433 @@
+      SUBROUTINE DLALSD( UPLO, SMLSIZ, N, NRHS, D, E, B, LDB, RCOND,
+     $                   RANK, WORK, IWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDB, N, NRHS, RANK, SMLSIZ
+      DOUBLE PRECISION   RCOND
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      DOUBLE PRECISION   B( LDB, * ), D( * ), E( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLALSD uses the singular value decomposition of A to solve the least
+*  squares problem of finding X to minimize the Euclidean norm of each
+*  column of A*X-B, where A is N-by-N upper bidiagonal, and X and B
+*  are N-by-NRHS. The solution X overwrites B.
+*
+*  The singular values of A smaller than RCOND times the largest
+*  singular value are treated as zero in solving the least squares
+*  problem; in this case a minimum norm solution is returned.
+*  The actual singular values are returned in D in ascending order.
+*
+*  This code makes very mild assumptions about floating point
+*  arithmetic. It will work on machines with a guard digit in
+*  add/subtract, or on those binary machines without guard digits
+*  which subtract like the Cray XMP, Cray YMP, Cray C 90, or Cray 2.
+*  It could conceivably fail on hexadecimal or decimal machines
+*  without guard digits, but we know of none.
+*
+*  Arguments
+*  =========
+*
+*  UPLO   (input) CHARACTER*1
+*         = 'U': D and E define an upper bidiagonal matrix.
+*         = 'L': D and E define a  lower bidiagonal matrix.
+*
+*  SMLSIZ (input) INTEGER
+*         The maximum size of the subproblems at the bottom of the
+*         computation tree.
+*
+*  N      (input) INTEGER
+*         The dimension of the  bidiagonal matrix.  N >= 0.
+*
+*  NRHS   (input) INTEGER
+*         The number of columns of B. NRHS must be at least 1.
+*
+*  D      (input/output) DOUBLE PRECISION array, dimension (N)
+*         On entry D contains the main diagonal of the bidiagonal
+*         matrix. On exit, if INFO = 0, D contains its singular values.
+*
+*  E      (input) DOUBLE PRECISION array, dimension (N-1)
+*         Contains the super-diagonal entries of the bidiagonal matrix.
+*         On exit, E has been destroyed.
+*
+*  B      (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS)
+*         On input, B contains the right hand sides of the least
+*         squares problem. On output, B contains the solution X.
+*
+*  LDB    (input) INTEGER
+*         The leading dimension of B in the calling subprogram.
+*         LDB must be at least max(1,N).
+*
+*  RCOND  (input) DOUBLE PRECISION
+*         The singular values of A less than or equal to RCOND times
+*         the largest singular value are treated as zero in solving
+*         the least squares problem. If RCOND is negative,
+*         machine precision is used instead.
+*         For example, if diag(S)*X=B were the least squares problem,
+*         where diag(S) is a diagonal matrix of singular values, the
+*         solution would be X(i) = B(i) / S(i) if S(i) is greater than
+*         RCOND*max(S), and X(i) = 0 if S(i) is less than or equal to
+*         RCOND*max(S).
+*
+*  RANK   (output) INTEGER
+*         The number of singular values of A greater than RCOND times
+*         the largest singular value.
+*
+*  WORK   (workspace) DOUBLE PRECISION array, dimension at least
+*         (9*N + 2*N*SMLSIZ + 8*N*NLVL + N*NRHS + (SMLSIZ+1)**2),
+*         where NLVL = max(0, INT(log_2 (N/(SMLSIZ+1))) + 1).
+*
+*  IWORK  (workspace) INTEGER array, dimension at least
+*         (3*N*NLVL + 11*N)
+*
+*  INFO   (output) INTEGER
+*         = 0:  successful exit.
+*         < 0:  if INFO = -i, the i-th argument had an illegal value.
+*         > 0:  The algorithm failed to compute an singular value while
+*               working on the submatrix lying in rows and columns
+*               INFO/(N+1) through MOD(INFO,N+1).
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Ming Gu and Ren-Cang Li, Computer Science Division, University of
+*       California at Berkeley, USA
+*     Osni Marques, LBNL/NERSC, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE, TWO
+      PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0, TWO = 2.0D0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            BX, BXST, C, DIFL, DIFR, GIVCOL, GIVNUM,
+     $                   GIVPTR, I, ICMPQ1, ICMPQ2, IWK, J, K, NLVL,
+     $                   NM1, NSIZE, NSUB, NWORK, PERM, POLES, S, SIZEI,
+     $                   SMLSZP, SQRE, ST, ST1, U, VT, Z
+      DOUBLE PRECISION   CS, EPS, ORGNRM, R, SN, TOL
+*     ..
+*     .. External Functions ..
+      INTEGER            IDAMAX
+      DOUBLE PRECISION   DLAMCH, DLANST
+      EXTERNAL           IDAMAX, DLAMCH, DLANST
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DCOPY, DGEMM, DLACPY, DLALSA, DLARTG, DLASCL,
+     $                   DLASDA, DLASDQ, DLASET, DLASRT, DROT, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, INT, LOG, SIGN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+*
+      IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( NRHS.LT.1 ) THEN
+         INFO = -4
+      ELSE IF( ( LDB.LT.1 ) .OR. ( LDB.LT.N ) ) THEN
+         INFO = -8
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DLALSD', -INFO )
+         RETURN
+      END IF
+*
+      EPS = DLAMCH( 'Epsilon' )
+*
+*     Set up the tolerance.
+*
+      IF( ( RCOND.LE.ZERO ) .OR. ( RCOND.GE.ONE ) ) THEN
+         RCOND = EPS
+      END IF
+*
+      RANK = 0
+*
+*     Quick return if possible.
+*
+      IF( N.EQ.0 ) THEN
+         RETURN
+      ELSE IF( N.EQ.1 ) THEN
+         IF( D( 1 ).EQ.ZERO ) THEN
+            CALL DLASET( 'A', 1, NRHS, ZERO, ZERO, B, LDB )
+         ELSE
+            RANK = 1
+            CALL DLASCL( 'G', 0, 0, D( 1 ), ONE, 1, NRHS, B, LDB, INFO )
+            D( 1 ) = ABS( D( 1 ) )
+         END IF
+         RETURN
+      END IF
+*
+*     Rotate the matrix if it is lower bidiagonal.
+*
+      IF( UPLO.EQ.'L' ) THEN
+         DO 10 I = 1, N - 1
+            CALL DLARTG( D( I ), E( I ), CS, SN, R )
+            D( I ) = R
+            E( I ) = SN*D( I+1 )
+            D( I+1 ) = CS*D( I+1 )
+            IF( NRHS.EQ.1 ) THEN
+               CALL DROT( 1, B( I, 1 ), 1, B( I+1, 1 ), 1, CS, SN )
+            ELSE
+               WORK( I*2-1 ) = CS
+               WORK( I*2 ) = SN
+            END IF
+   10    CONTINUE
+         IF( NRHS.GT.1 ) THEN
+            DO 30 I = 1, NRHS
+               DO 20 J = 1, N - 1
+                  CS = WORK( J*2-1 )
+                  SN = WORK( J*2 )
+                  CALL DROT( 1, B( J, I ), 1, B( J+1, I ), 1, CS, SN )
+   20          CONTINUE
+   30       CONTINUE
+         END IF
+      END IF
+*
+*     Scale.
+*
+      NM1 = N - 1
+      ORGNRM = DLANST( 'M', N, D, E )
+      IF( ORGNRM.EQ.ZERO ) THEN
+         CALL DLASET( 'A', N, NRHS, ZERO, ZERO, B, LDB )
+         RETURN
+      END IF
+*
+      CALL DLASCL( 'G', 0, 0, ORGNRM, ONE, N, 1, D, N, INFO )
+      CALL DLASCL( 'G', 0, 0, ORGNRM, ONE, NM1, 1, E, NM1, INFO )
+*
+*     If N is smaller than the minimum divide size SMLSIZ, then solve
+*     the problem with another solver.
+*
+      IF( N.LE.SMLSIZ ) THEN
+         NWORK = 1 + N*N
+         CALL DLASET( 'A', N, N, ZERO, ONE, WORK, N )
+         CALL DLASDQ( 'U', 0, N, N, 0, NRHS, D, E, WORK, N, WORK, N, B,
+     $                LDB, WORK( NWORK ), INFO )
+         IF( INFO.NE.0 ) THEN
+            RETURN
+         END IF
+         TOL = RCOND*ABS( D( IDAMAX( N, D, 1 ) ) )
+         DO 40 I = 1, N
+            IF( D( I ).LE.TOL ) THEN
+               CALL DLASET( 'A', 1, NRHS, ZERO, ZERO, B( I, 1 ), LDB )
+            ELSE
+               CALL DLASCL( 'G', 0, 0, D( I ), ONE, 1, NRHS, B( I, 1 ),
+     $                      LDB, INFO )
+               RANK = RANK + 1
+            END IF
+   40    CONTINUE
+         CALL DGEMM( 'T', 'N', N, NRHS, N, ONE, WORK, N, B, LDB, ZERO,
+     $               WORK( NWORK ), N )
+         CALL DLACPY( 'A', N, NRHS, WORK( NWORK ), N, B, LDB )
+*
+*        Unscale.
+*
+         CALL DLASCL( 'G', 0, 0, ONE, ORGNRM, N, 1, D, N, INFO )
+         CALL DLASRT( 'D', N, D, INFO )
+         CALL DLASCL( 'G', 0, 0, ORGNRM, ONE, N, NRHS, B, LDB, INFO )
+*
+         RETURN
+      END IF
+*
+*     Book-keeping and setting up some constants.
+*
+      NLVL = INT( LOG( DBLE( N ) / DBLE( SMLSIZ+1 ) ) / LOG( TWO ) ) + 1
+*
+      SMLSZP = SMLSIZ + 1
+*
+      U = 1
+      VT = 1 + SMLSIZ*N
+      DIFL = VT + SMLSZP*N
+      DIFR = DIFL + NLVL*N
+      Z = DIFR + NLVL*N*2
+      C = Z + NLVL*N
+      S = C + N
+      POLES = S + N
+      GIVNUM = POLES + 2*NLVL*N
+      BX = GIVNUM + 2*NLVL*N
+      NWORK = BX + N*NRHS
+*
+      SIZEI = 1 + N
+      K = SIZEI + N
+      GIVPTR = K + N
+      PERM = GIVPTR + N
+      GIVCOL = PERM + NLVL*N
+      IWK = GIVCOL + NLVL*N*2
+*
+      ST = 1
+      SQRE = 0
+      ICMPQ1 = 1
+      ICMPQ2 = 0
+      NSUB = 0
+*
+      DO 50 I = 1, N
+         IF( ABS( D( I ) ).LT.EPS ) THEN
+            D( I ) = SIGN( EPS, D( I ) )
+         END IF
+   50 CONTINUE
+*
+      DO 60 I = 1, NM1
+         IF( ( ABS( E( I ) ).LT.EPS ) .OR. ( I.EQ.NM1 ) ) THEN
+            NSUB = NSUB + 1
+            IWORK( NSUB ) = ST
+*
+*           Subproblem found. First determine its size and then
+*           apply divide and conquer on it.
+*
+            IF( I.LT.NM1 ) THEN
+*
+*              A subproblem with E(I) small for I < NM1.
+*
+               NSIZE = I - ST + 1
+               IWORK( SIZEI+NSUB-1 ) = NSIZE
+            ELSE IF( ABS( E( I ) ).GE.EPS ) THEN
+*
+*              A subproblem with E(NM1) not too small but I = NM1.
+*
+               NSIZE = N - ST + 1
+               IWORK( SIZEI+NSUB-1 ) = NSIZE
+            ELSE
+*
+*              A subproblem with E(NM1) small. This implies an
+*              1-by-1 subproblem at D(N), which is not solved
+*              explicitly.
+*
+               NSIZE = I - ST + 1
+               IWORK( SIZEI+NSUB-1 ) = NSIZE
+               NSUB = NSUB + 1
+               IWORK( NSUB ) = N
+               IWORK( SIZEI+NSUB-1 ) = 1
+               CALL DCOPY( NRHS, B( N, 1 ), LDB, WORK( BX+NM1 ), N )
+            END IF
+            ST1 = ST - 1
+            IF( NSIZE.EQ.1 ) THEN
+*
+*              This is a 1-by-1 subproblem and is not solved
+*              explicitly.
+*
+               CALL DCOPY( NRHS, B( ST, 1 ), LDB, WORK( BX+ST1 ), N )
+            ELSE IF( NSIZE.LE.SMLSIZ ) THEN
+*
+*              This is a small subproblem and is solved by DLASDQ.
+*
+               CALL DLASET( 'A', NSIZE, NSIZE, ZERO, ONE,
+     $                      WORK( VT+ST1 ), N )
+               CALL DLASDQ( 'U', 0, NSIZE, NSIZE, 0, NRHS, D( ST ),
+     $                      E( ST ), WORK( VT+ST1 ), N, WORK( NWORK ),
+     $                      N, B( ST, 1 ), LDB, WORK( NWORK ), INFO )
+               IF( INFO.NE.0 ) THEN
+                  RETURN
+               END IF
+               CALL DLACPY( 'A', NSIZE, NRHS, B( ST, 1 ), LDB,
+     $                      WORK( BX+ST1 ), N )
+            ELSE
+*
+*              A large problem. Solve it using divide and conquer.
+*
+               CALL DLASDA( ICMPQ1, SMLSIZ, NSIZE, SQRE, D( ST ),
+     $                      E( ST ), WORK( U+ST1 ), N, WORK( VT+ST1 ),
+     $                      IWORK( K+ST1 ), WORK( DIFL+ST1 ),
+     $                      WORK( DIFR+ST1 ), WORK( Z+ST1 ),
+     $                      WORK( POLES+ST1 ), IWORK( GIVPTR+ST1 ),
+     $                      IWORK( GIVCOL+ST1 ), N, IWORK( PERM+ST1 ),
+     $                      WORK( GIVNUM+ST1 ), WORK( C+ST1 ),
+     $                      WORK( S+ST1 ), WORK( NWORK ), IWORK( IWK ),
+     $                      INFO )
+               IF( INFO.NE.0 ) THEN
+                  RETURN
+               END IF
+               BXST = BX + ST1
+               CALL DLALSA( ICMPQ2, SMLSIZ, NSIZE, NRHS, B( ST, 1 ),
+     $                      LDB, WORK( BXST ), N, WORK( U+ST1 ), N,
+     $                      WORK( VT+ST1 ), IWORK( K+ST1 ),
+     $                      WORK( DIFL+ST1 ), WORK( DIFR+ST1 ),
+     $                      WORK( Z+ST1 ), WORK( POLES+ST1 ),
+     $                      IWORK( GIVPTR+ST1 ), IWORK( GIVCOL+ST1 ), N,
+     $                      IWORK( PERM+ST1 ), WORK( GIVNUM+ST1 ),
+     $                      WORK( C+ST1 ), WORK( S+ST1 ), WORK( NWORK ),
+     $                      IWORK( IWK ), INFO )
+               IF( INFO.NE.0 ) THEN
+                  RETURN
+               END IF
+            END IF
+            ST = I + 1
+         END IF
+   60 CONTINUE
+*
+*     Apply the singular values and treat the tiny ones as zero.
+*
+      TOL = RCOND*ABS( D( IDAMAX( N, D, 1 ) ) )
+*
+      DO 70 I = 1, N
+*
+*        Some of the elements in D can be negative because 1-by-1
+*        subproblems were not solved explicitly.
+*
+         IF( ABS( D( I ) ).LE.TOL ) THEN
+            CALL DLASET( 'A', 1, NRHS, ZERO, ZERO, WORK( BX+I-1 ), N )
+         ELSE
+            RANK = RANK + 1
+            CALL DLASCL( 'G', 0, 0, D( I ), ONE, 1, NRHS,
+     $                   WORK( BX+I-1 ), N, INFO )
+         END IF
+         D( I ) = ABS( D( I ) )
+   70 CONTINUE
+*
+*     Now apply back the right singular vectors.
+*
+      ICMPQ2 = 1
+      DO 80 I = 1, NSUB
+         ST = IWORK( I )
+         ST1 = ST - 1
+         NSIZE = IWORK( SIZEI+I-1 )
+         BXST = BX + ST1
+         IF( NSIZE.EQ.1 ) THEN
+            CALL DCOPY( NRHS, WORK( BXST ), N, B( ST, 1 ), LDB )
+         ELSE IF( NSIZE.LE.SMLSIZ ) THEN
+            CALL DGEMM( 'T', 'N', NSIZE, NRHS, NSIZE, ONE,
+     $                  WORK( VT+ST1 ), N, WORK( BXST ), N, ZERO,
+     $                  B( ST, 1 ), LDB )
+         ELSE
+            CALL DLALSA( ICMPQ2, SMLSIZ, NSIZE, NRHS, WORK( BXST ), N,
+     $                   B( ST, 1 ), LDB, WORK( U+ST1 ), N,
+     $                   WORK( VT+ST1 ), IWORK( K+ST1 ),
+     $                   WORK( DIFL+ST1 ), WORK( DIFR+ST1 ),
+     $                   WORK( Z+ST1 ), WORK( POLES+ST1 ),
+     $                   IWORK( GIVPTR+ST1 ), IWORK( GIVCOL+ST1 ), N,
+     $                   IWORK( PERM+ST1 ), WORK( GIVNUM+ST1 ),
+     $                   WORK( C+ST1 ), WORK( S+ST1 ), WORK( NWORK ),
+     $                   IWORK( IWK ), INFO )
+            IF( INFO.NE.0 ) THEN
+               RETURN
+            END IF
+         END IF
+   80 CONTINUE
+*
+*     Unscale and sort the singular values.
+*
+      CALL DLASCL( 'G', 0, 0, ONE, ORGNRM, N, 1, D, N, INFO )
+      CALL DLASRT( 'D', N, D, INFO )
+      CALL DLASCL( 'G', 0, 0, ORGNRM, ONE, N, NRHS, B, LDB, INFO )
+*
+      RETURN
+*
+*     End of DLALSD
+*
+      END
diff --git a/costa/native/external/lapack/dlamch.f b/costa/native/external/lapack/dlamch.f
new file mode 100644
index 000000000..64ac3becd
--- /dev/null
+++ b/costa/native/external/lapack/dlamch.f
@@ -0,0 +1,857 @@
+      DOUBLE PRECISION FUNCTION DLAMCH( CMACH )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          CMACH
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLAMCH determines double precision machine parameters.
+*
+*  Arguments
+*  =========
+*
+*  CMACH   (input) CHARACTER*1
+*          Specifies the value to be returned by DLAMCH:
+*          = 'E' or 'e',   DLAMCH := eps
+*          = 'S' or 's ,   DLAMCH := sfmin
+*          = 'B' or 'b',   DLAMCH := base
+*          = 'P' or 'p',   DLAMCH := eps*base
+*          = 'N' or 'n',   DLAMCH := t
+*          = 'R' or 'r',   DLAMCH := rnd
+*          = 'M' or 'm',   DLAMCH := emin
+*          = 'U' or 'u',   DLAMCH := rmin
+*          = 'L' or 'l',   DLAMCH := emax
+*          = 'O' or 'o',   DLAMCH := rmax
+*
+*          where
+*
+*          eps   = relative machine precision
+*          sfmin = safe minimum, such that 1/sfmin does not overflow
+*          base  = base of the machine
+*          prec  = eps*base
+*          t     = number of (base) digits in the mantissa
+*          rnd   = 1.0 when rounding occurs in addition, 0.0 otherwise
+*          emin  = minimum exponent before (gradual) underflow
+*          rmin  = underflow threshold - base**(emin-1)
+*          emax  = largest exponent before overflow
+*          rmax  = overflow threshold  - (base**emax)*(1-eps)
+*
+* =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            FIRST, LRND
+      INTEGER            BETA, IMAX, IMIN, IT
+      DOUBLE PRECISION   BASE, EMAX, EMIN, EPS, PREC, RMACH, RMAX, RMIN,
+     $                   RND, SFMIN, SMALL, T
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLAMC2
+*     ..
+*     .. Save statement ..
+      SAVE               FIRST, EPS, SFMIN, BASE, T, RND, EMIN, RMIN,
+     $                   EMAX, RMAX, PREC
+*     ..
+*     .. Data statements ..
+      DATA               FIRST / .TRUE. /
+*     ..
+*     .. Executable Statements ..
+*
+      IF( FIRST ) THEN
+         FIRST = .FALSE.
+         CALL DLAMC2( BETA, IT, LRND, EPS, IMIN, RMIN, IMAX, RMAX )
+         BASE = BETA
+         T = IT
+         IF( LRND ) THEN
+            RND = ONE
+            EPS = ( BASE**( 1-IT ) ) / 2
+         ELSE
+            RND = ZERO
+            EPS = BASE**( 1-IT )
+         END IF
+         PREC = EPS*BASE
+         EMIN = IMIN
+         EMAX = IMAX
+         SFMIN = RMIN
+         SMALL = ONE / RMAX
+         IF( SMALL.GE.SFMIN ) THEN
+*
+*           Use SMALL plus a bit, to avoid the possibility of rounding
+*           causing overflow when computing  1/sfmin.
+*
+            SFMIN = SMALL*( ONE+EPS )
+         END IF
+      END IF
+*
+      IF( LSAME( CMACH, 'E' ) ) THEN
+         RMACH = EPS
+      ELSE IF( LSAME( CMACH, 'S' ) ) THEN
+         RMACH = SFMIN
+      ELSE IF( LSAME( CMACH, 'B' ) ) THEN
+         RMACH = BASE
+      ELSE IF( LSAME( CMACH, 'P' ) ) THEN
+         RMACH = PREC
+      ELSE IF( LSAME( CMACH, 'N' ) ) THEN
+         RMACH = T
+      ELSE IF( LSAME( CMACH, 'R' ) ) THEN
+         RMACH = RND
+      ELSE IF( LSAME( CMACH, 'M' ) ) THEN
+         RMACH = EMIN
+      ELSE IF( LSAME( CMACH, 'U' ) ) THEN
+         RMACH = RMIN
+      ELSE IF( LSAME( CMACH, 'L' ) ) THEN
+         RMACH = EMAX
+      ELSE IF( LSAME( CMACH, 'O' ) ) THEN
+         RMACH = RMAX
+      END IF
+*
+      DLAMCH = RMACH
+      RETURN
+*
+*     End of DLAMCH
+*
+      END
+*
+************************************************************************
+*
+      SUBROUTINE DLAMC1( BETA, T, RND, IEEE1 )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      LOGICAL            IEEE1, RND
+      INTEGER            BETA, T
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLAMC1 determines the machine parameters given by BETA, T, RND, and
+*  IEEE1.
+*
+*  Arguments
+*  =========
+*
+*  BETA    (output) INTEGER
+*          The base of the machine.
+*
+*  T       (output) INTEGER
+*          The number of ( BETA ) digits in the mantissa.
+*
+*  RND     (output) LOGICAL
+*          Specifies whether proper rounding  ( RND = .TRUE. )  or
+*          chopping  ( RND = .FALSE. )  occurs in addition. This may not
+*          be a reliable guide to the way in which the machine performs
+*          its arithmetic.
+*
+*  IEEE1   (output) LOGICAL
+*          Specifies whether rounding appears to be done in the IEEE
+*          'round to nearest' style.
+*
+*  Further Details
+*  ===============
+*
+*  The routine is based on the routine  ENVRON  by Malcolm and
+*  incorporates suggestions by Gentleman and Marovich. See
+*
+*     Malcolm M. A. (1972) Algorithms to reveal properties of
+*        floating-point arithmetic. Comms. of the ACM, 15, 949-951.
+*
+*     Gentleman W. M. and Marovich S. B. (1974) More on algorithms
+*        that reveal properties of floating point arithmetic units.
+*        Comms. of the ACM, 17, 276-277.
+*
+* =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            FIRST, LIEEE1, LRND
+      INTEGER            LBETA, LT
+      DOUBLE PRECISION   A, B, C, F, ONE, QTR, SAVEC, T1, T2
+*     ..
+*     .. External Functions ..
+      DOUBLE PRECISION   DLAMC3
+      EXTERNAL           DLAMC3
+*     ..
+*     .. Save statement ..
+      SAVE               FIRST, LIEEE1, LBETA, LRND, LT
+*     ..
+*     .. Data statements ..
+      DATA               FIRST / .TRUE. /
+*     ..
+*     .. Executable Statements ..
+*
+      IF( FIRST ) THEN
+         FIRST = .FALSE.
+         ONE = 1
+*
+*        LBETA,  LIEEE1,  LT and  LRND  are the  local values  of  BETA,
+*        IEEE1, T and RND.
+*
+*        Throughout this routine  we use the function  DLAMC3  to ensure
+*        that relevant values are  stored and not held in registers,  or
+*        are not affected by optimizers.
+*
+*        Compute  a = 2.0**m  with the  smallest positive integer m such
+*        that
+*
+*           fl( a + 1.0 ) = a.
+*
+         A = 1
+         C = 1
+*
+*+       WHILE( C.EQ.ONE )LOOP
+   10    CONTINUE
+         IF( C.EQ.ONE ) THEN
+            A = 2*A
+            C = DLAMC3( A, ONE )
+            C = DLAMC3( C, -A )
+            GO TO 10
+         END IF
+*+       END WHILE
+*
+*        Now compute  b = 2.0**m  with the smallest positive integer m
+*        such that
+*
+*           fl( a + b ) .gt. a.
+*
+         B = 1
+         C = DLAMC3( A, B )
+*
+*+       WHILE( C.EQ.A )LOOP
+   20    CONTINUE
+         IF( C.EQ.A ) THEN
+            B = 2*B
+            C = DLAMC3( A, B )
+            GO TO 20
+         END IF
+*+       END WHILE
+*
+*        Now compute the base.  a and c  are neighbouring floating point
+*        numbers  in the  interval  ( beta**t, beta**( t + 1 ) )  and so
+*        their difference is beta. Adding 0.25 to c is to ensure that it
+*        is truncated to beta and not ( beta - 1 ).
+*
+         QTR = ONE / 4
+         SAVEC = C
+         C = DLAMC3( C, -A )
+         LBETA = C + QTR
+*
+*        Now determine whether rounding or chopping occurs,  by adding a
+*        bit  less  than  beta/2  and a  bit  more  than  beta/2  to  a.
+*
+         B = LBETA
+         F = DLAMC3( B / 2, -B / 100 )
+         C = DLAMC3( F, A )
+         IF( C.EQ.A ) THEN
+            LRND = .TRUE.
+         ELSE
+            LRND = .FALSE.
+         END IF
+         F = DLAMC3( B / 2, B / 100 )
+         C = DLAMC3( F, A )
+         IF( ( LRND ) .AND. ( C.EQ.A ) )
+     $      LRND = .FALSE.
+*
+*        Try and decide whether rounding is done in the  IEEE  'round to
+*        nearest' style. B/2 is half a unit in the last place of the two
+*        numbers A and SAVEC. Furthermore, A is even, i.e. has last  bit
+*        zero, and SAVEC is odd. Thus adding B/2 to A should not  change
+*        A, but adding B/2 to SAVEC should change SAVEC.
+*
+         T1 = DLAMC3( B / 2, A )
+         T2 = DLAMC3( B / 2, SAVEC )
+         LIEEE1 = ( T1.EQ.A ) .AND. ( T2.GT.SAVEC ) .AND. LRND
+*
+*        Now find  the  mantissa, t.  It should  be the  integer part of
+*        log to the base beta of a,  however it is safer to determine  t
+*        by powering.  So we find t as the smallest positive integer for
+*        which
+*
+*           fl( beta**t + 1.0 ) = 1.0.
+*
+         LT = 0
+         A = 1
+         C = 1
+*
+*+       WHILE( C.EQ.ONE )LOOP
+   30    CONTINUE
+         IF( C.EQ.ONE ) THEN
+            LT = LT + 1
+            A = A*LBETA
+            C = DLAMC3( A, ONE )
+            C = DLAMC3( C, -A )
+            GO TO 30
+         END IF
+*+       END WHILE
+*
+      END IF
+*
+      BETA = LBETA
+      T = LT
+      RND = LRND
+      IEEE1 = LIEEE1
+      RETURN
+*
+*     End of DLAMC1
+*
+      END
+*
+************************************************************************
+*
+      SUBROUTINE DLAMC2( BETA, T, RND, EPS, EMIN, RMIN, EMAX, RMAX )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      LOGICAL            RND
+      INTEGER            BETA, EMAX, EMIN, T
+      DOUBLE PRECISION   EPS, RMAX, RMIN
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLAMC2 determines the machine parameters specified in its argument
+*  list.
+*
+*  Arguments
+*  =========
+*
+*  BETA    (output) INTEGER
+*          The base of the machine.
+*
+*  T       (output) INTEGER
+*          The number of ( BETA ) digits in the mantissa.
+*
+*  RND     (output) LOGICAL
+*          Specifies whether proper rounding  ( RND = .TRUE. )  or
+*          chopping  ( RND = .FALSE. )  occurs in addition. This may not
+*          be a reliable guide to the way in which the machine performs
+*          its arithmetic.
+*
+*  EPS     (output) DOUBLE PRECISION
+*          The smallest positive number such that
+*
+*             fl( 1.0 - EPS ) .LT. 1.0,
+*
+*          where fl denotes the computed value.
+*
+*  EMIN    (output) INTEGER
+*          The minimum exponent before (gradual) underflow occurs.
+*
+*  RMIN    (output) DOUBLE PRECISION
+*          The smallest normalized number for the machine, given by
+*          BASE**( EMIN - 1 ), where  BASE  is the floating point value
+*          of BETA.
+*
+*  EMAX    (output) INTEGER
+*          The maximum exponent before overflow occurs.
+*
+*  RMAX    (output) DOUBLE PRECISION
+*          The largest positive number for the machine, given by
+*          BASE**EMAX * ( 1 - EPS ), where  BASE  is the floating point
+*          value of BETA.
+*
+*  Further Details
+*  ===============
+*
+*  The computation of  EPS  is based on a routine PARANOIA by
+*  W. Kahan of the University of California at Berkeley.
+*
+* =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            FIRST, IEEE, IWARN, LIEEE1, LRND
+      INTEGER            GNMIN, GPMIN, I, LBETA, LEMAX, LEMIN, LT,
+     $                   NGNMIN, NGPMIN
+      DOUBLE PRECISION   A, B, C, HALF, LEPS, LRMAX, LRMIN, ONE, RBASE,
+     $                   SIXTH, SMALL, THIRD, TWO, ZERO
+*     ..
+*     .. External Functions ..
+      DOUBLE PRECISION   DLAMC3
+      EXTERNAL           DLAMC3
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLAMC1, DLAMC4, DLAMC5
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN
+*     ..
+*     .. Save statement ..
+      SAVE               FIRST, IWARN, LBETA, LEMAX, LEMIN, LEPS, LRMAX,
+     $                   LRMIN, LT
+*     ..
+*     .. Data statements ..
+      DATA               FIRST / .TRUE. / , IWARN / .FALSE. /
+*     ..
+*     .. Executable Statements ..
+*
+      IF( FIRST ) THEN
+         FIRST = .FALSE.
+         ZERO = 0
+         ONE = 1
+         TWO = 2
+*
+*        LBETA, LT, LRND, LEPS, LEMIN and LRMIN  are the local values of
+*        BETA, T, RND, EPS, EMIN and RMIN.
+*
+*        Throughout this routine  we use the function  DLAMC3  to ensure
+*        that relevant values are stored  and not held in registers,  or
+*        are not affected by optimizers.
+*
+*        DLAMC1 returns the parameters  LBETA, LT, LRND and LIEEE1.
+*
+         CALL DLAMC1( LBETA, LT, LRND, LIEEE1 )
+*
+*        Start to find EPS.
+*
+         B = LBETA
+         A = B**( -LT )
+         LEPS = A
+*
+*        Try some tricks to see whether or not this is the correct  EPS.
+*
+         B = TWO / 3
+         HALF = ONE / 2
+         SIXTH = DLAMC3( B, -HALF )
+         THIRD = DLAMC3( SIXTH, SIXTH )
+         B = DLAMC3( THIRD, -HALF )
+         B = DLAMC3( B, SIXTH )
+         B = ABS( B )
+         IF( B.LT.LEPS )
+     $      B = LEPS
+*
+         LEPS = 1
+*
+*+       WHILE( ( LEPS.GT.B ).AND.( B.GT.ZERO ) )LOOP
+   10    CONTINUE
+         IF( ( LEPS.GT.B ) .AND. ( B.GT.ZERO ) ) THEN
+            LEPS = B
+            C = DLAMC3( HALF*LEPS, ( TWO**5 )*( LEPS**2 ) )
+            C = DLAMC3( HALF, -C )
+            B = DLAMC3( HALF, C )
+            C = DLAMC3( HALF, -B )
+            B = DLAMC3( HALF, C )
+            GO TO 10
+         END IF
+*+       END WHILE
+*
+         IF( A.LT.LEPS )
+     $      LEPS = A
+*
+*        Computation of EPS complete.
+*
+*        Now find  EMIN.  Let A = + or - 1, and + or - (1 + BASE**(-3)).
+*        Keep dividing  A by BETA until (gradual) underflow occurs. This
+*        is detected when we cannot recover the previous A.
+*
+         RBASE = ONE / LBETA
+         SMALL = ONE
+         DO 20 I = 1, 3
+            SMALL = DLAMC3( SMALL*RBASE, ZERO )
+   20    CONTINUE
+         A = DLAMC3( ONE, SMALL )
+         CALL DLAMC4( NGPMIN, ONE, LBETA )
+         CALL DLAMC4( NGNMIN, -ONE, LBETA )
+         CALL DLAMC4( GPMIN, A, LBETA )
+         CALL DLAMC4( GNMIN, -A, LBETA )
+         IEEE = .FALSE.
+*
+         IF( ( NGPMIN.EQ.NGNMIN ) .AND. ( GPMIN.EQ.GNMIN ) ) THEN
+            IF( NGPMIN.EQ.GPMIN ) THEN
+               LEMIN = NGPMIN
+*            ( Non twos-complement machines, no gradual underflow;
+*              e.g.,  VAX )
+            ELSE IF( ( GPMIN-NGPMIN ).EQ.3 ) THEN
+               LEMIN = NGPMIN - 1 + LT
+               IEEE = .TRUE.
+*            ( Non twos-complement machines, with gradual underflow;
+*              e.g., IEEE standard followers )
+            ELSE
+               LEMIN = MIN( NGPMIN, GPMIN )
+*            ( A guess; no known machine )
+               IWARN = .TRUE.
+            END IF
+*
+         ELSE IF( ( NGPMIN.EQ.GPMIN ) .AND. ( NGNMIN.EQ.GNMIN ) ) THEN
+            IF( ABS( NGPMIN-NGNMIN ).EQ.1 ) THEN
+               LEMIN = MAX( NGPMIN, NGNMIN )
+*            ( Twos-complement machines, no gradual underflow;
+*              e.g., CYBER 205 )
+            ELSE
+               LEMIN = MIN( NGPMIN, NGNMIN )
+*            ( A guess; no known machine )
+               IWARN = .TRUE.
+            END IF
+*
+         ELSE IF( ( ABS( NGPMIN-NGNMIN ).EQ.1 ) .AND.
+     $            ( GPMIN.EQ.GNMIN ) ) THEN
+            IF( ( GPMIN-MIN( NGPMIN, NGNMIN ) ).EQ.3 ) THEN
+               LEMIN = MAX( NGPMIN, NGNMIN ) - 1 + LT
+*            ( Twos-complement machines with gradual underflow;
+*              no known machine )
+            ELSE
+               LEMIN = MIN( NGPMIN, NGNMIN )
+*            ( A guess; no known machine )
+               IWARN = .TRUE.
+            END IF
+*
+         ELSE
+            LEMIN = MIN( NGPMIN, NGNMIN, GPMIN, GNMIN )
+*         ( A guess; no known machine )
+            IWARN = .TRUE.
+         END IF
+***
+* Comment out this if block if EMIN is ok
+         IF( IWARN ) THEN
+            FIRST = .TRUE.
+            WRITE( 6, FMT = 9999 )LEMIN
+         END IF
+***
+*
+*        Assume IEEE arithmetic if we found denormalised  numbers above,
+*        or if arithmetic seems to round in the  IEEE style,  determined
+*        in routine DLAMC1. A true IEEE machine should have both  things
+*        true; however, faulty machines may have one or the other.
+*
+         IEEE = IEEE .OR. LIEEE1
+*
+*        Compute  RMIN by successive division by  BETA. We could compute
+*        RMIN as BASE**( EMIN - 1 ),  but some machines underflow during
+*        this computation.
+*
+         LRMIN = 1
+         DO 30 I = 1, 1 - LEMIN
+            LRMIN = DLAMC3( LRMIN*RBASE, ZERO )
+   30    CONTINUE
+*
+*        Finally, call DLAMC5 to compute EMAX and RMAX.
+*
+         CALL DLAMC5( LBETA, LT, LEMIN, IEEE, LEMAX, LRMAX )
+      END IF
+*
+      BETA = LBETA
+      T = LT
+      RND = LRND
+      EPS = LEPS
+      EMIN = LEMIN
+      RMIN = LRMIN
+      EMAX = LEMAX
+      RMAX = LRMAX
+*
+      RETURN
+*
+ 9999 FORMAT( / / ' WARNING. The value EMIN may be incorrect:-',
+     $      '  EMIN = ', I8, /
+     $      ' If, after inspection, the value EMIN looks',
+     $      ' acceptable please comment out ',
+     $      / ' the IF block as marked within the code of routine',
+     $      ' DLAMC2,', / ' otherwise supply EMIN explicitly.', / )
+*
+*     End of DLAMC2
+*
+      END
+*
+************************************************************************
+*
+      DOUBLE PRECISION FUNCTION DLAMC3( A, B )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      DOUBLE PRECISION   A, B
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLAMC3  is intended to force  A  and  B  to be stored prior to doing
+*  the addition of  A  and  B ,  for use in situations where optimizers
+*  might hold one of these in a register.
+*
+*  Arguments
+*  =========
+*
+*  A, B    (input) DOUBLE PRECISION
+*          The values A and B.
+*
+* =====================================================================
+*
+*     .. Executable Statements ..
+*
+      DLAMC3 = A + B
+*
+      RETURN
+*
+*     End of DLAMC3
+*
+      END
+*
+************************************************************************
+*
+      SUBROUTINE DLAMC4( EMIN, START, BASE )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      INTEGER            BASE, EMIN
+      DOUBLE PRECISION   START
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLAMC4 is a service routine for DLAMC2.
+*
+*  Arguments
+*  =========
+*
+*  EMIN    (output) EMIN
+*          The minimum exponent before (gradual) underflow, computed by
+*          setting A = START and dividing by BASE until the previous A
+*          can not be recovered.
+*
+*  START   (input) DOUBLE PRECISION
+*          The starting point for determining EMIN.
+*
+*  BASE    (input) INTEGER
+*          The base of the machine.
+*
+* =====================================================================
+*
+*     .. Local Scalars ..
+      INTEGER            I
+      DOUBLE PRECISION   A, B1, B2, C1, C2, D1, D2, ONE, RBASE, ZERO
+*     ..
+*     .. External Functions ..
+      DOUBLE PRECISION   DLAMC3
+      EXTERNAL           DLAMC3
+*     ..
+*     .. Executable Statements ..
+*
+      A = START
+      ONE = 1
+      RBASE = ONE / BASE
+      ZERO = 0
+      EMIN = 1
+      B1 = DLAMC3( A*RBASE, ZERO )
+      C1 = A
+      C2 = A
+      D1 = A
+      D2 = A
+*+    WHILE( ( C1.EQ.A ).AND.( C2.EQ.A ).AND.
+*    $       ( D1.EQ.A ).AND.( D2.EQ.A )      )LOOP
+   10 CONTINUE
+      IF( ( C1.EQ.A ) .AND. ( C2.EQ.A ) .AND. ( D1.EQ.A ) .AND.
+     $    ( D2.EQ.A ) ) THEN
+         EMIN = EMIN - 1
+         A = B1
+         B1 = DLAMC3( A / BASE, ZERO )
+         C1 = DLAMC3( B1*BASE, ZERO )
+         D1 = ZERO
+         DO 20 I = 1, BASE
+            D1 = D1 + B1
+   20    CONTINUE
+         B2 = DLAMC3( A*RBASE, ZERO )
+         C2 = DLAMC3( B2 / RBASE, ZERO )
+         D2 = ZERO
+         DO 30 I = 1, BASE
+            D2 = D2 + B2
+   30    CONTINUE
+         GO TO 10
+      END IF
+*+    END WHILE
+*
+      RETURN
+*
+*     End of DLAMC4
+*
+      END
+*
+************************************************************************
+*
+      SUBROUTINE DLAMC5( BETA, P, EMIN, IEEE, EMAX, RMAX )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      LOGICAL            IEEE
+      INTEGER            BETA, EMAX, EMIN, P
+      DOUBLE PRECISION   RMAX
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLAMC5 attempts to compute RMAX, the largest machine floating-point
+*  number, without overflow.  It assumes that EMAX + abs(EMIN) sum
+*  approximately to a power of 2.  It will fail on machines where this
+*  assumption does not hold, for example, the Cyber 205 (EMIN = -28625,
+*  EMAX = 28718).  It will also fail if the value supplied for EMIN is
+*  too large (i.e. too close to zero), probably with overflow.
+*
+*  Arguments
+*  =========
+*
+*  BETA    (input) INTEGER
+*          The base of floating-point arithmetic.
+*
+*  P       (input) INTEGER
+*          The number of base BETA digits in the mantissa of a
+*          floating-point value.
+*
+*  EMIN    (input) INTEGER
+*          The minimum exponent before (gradual) underflow.
+*
+*  IEEE    (input) LOGICAL
+*          A logical flag specifying whether or not the arithmetic
+*          system is thought to comply with the IEEE standard.
+*
+*  EMAX    (output) INTEGER
+*          The largest exponent before overflow
+*
+*  RMAX    (output) DOUBLE PRECISION
+*          The largest machine floating-point number.
+*
+* =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            EXBITS, EXPSUM, I, LEXP, NBITS, TRY, UEXP
+      DOUBLE PRECISION   OLDY, RECBAS, Y, Z
+*     ..
+*     .. External Functions ..
+      DOUBLE PRECISION   DLAMC3
+      EXTERNAL           DLAMC3
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MOD
+*     ..
+*     .. Executable Statements ..
+*
+*     First compute LEXP and UEXP, two powers of 2 that bound
+*     abs(EMIN). We then assume that EMAX + abs(EMIN) will sum
+*     approximately to the bound that is closest to abs(EMIN).
+*     (EMAX is the exponent of the required number RMAX).
+*
+      LEXP = 1
+      EXBITS = 1
+   10 CONTINUE
+      TRY = LEXP*2
+      IF( TRY.LE.( -EMIN ) ) THEN
+         LEXP = TRY
+         EXBITS = EXBITS + 1
+         GO TO 10
+      END IF
+      IF( LEXP.EQ.-EMIN ) THEN
+         UEXP = LEXP
+      ELSE
+         UEXP = TRY
+         EXBITS = EXBITS + 1
+      END IF
+*
+*     Now -LEXP is less than or equal to EMIN, and -UEXP is greater
+*     than or equal to EMIN. EXBITS is the number of bits needed to
+*     store the exponent.
+*
+      IF( ( UEXP+EMIN ).GT.( -LEXP-EMIN ) ) THEN
+         EXPSUM = 2*LEXP
+      ELSE
+         EXPSUM = 2*UEXP
+      END IF
+*
+*     EXPSUM is the exponent range, approximately equal to
+*     EMAX - EMIN + 1 .
+*
+      EMAX = EXPSUM + EMIN - 1
+      NBITS = 1 + EXBITS + P
+*
+*     NBITS is the total number of bits needed to store a
+*     floating-point number.
+*
+      IF( ( MOD( NBITS, 2 ).EQ.1 ) .AND. ( BETA.EQ.2 ) ) THEN
+*
+*        Either there are an odd number of bits used to store a
+*        floating-point number, which is unlikely, or some bits are
+*        not used in the representation of numbers, which is possible,
+*        (e.g. Cray machines) or the mantissa has an implicit bit,
+*        (e.g. IEEE machines, Dec Vax machines), which is perhaps the
+*        most likely. We have to assume the last alternative.
+*        If this is true, then we need to reduce EMAX by one because
+*        there must be some way of representing zero in an implicit-bit
+*        system. On machines like Cray, we are reducing EMAX by one
+*        unnecessarily.
+*
+         EMAX = EMAX - 1
+      END IF
+*
+      IF( IEEE ) THEN
+*
+*        Assume we are on an IEEE machine which reserves one exponent
+*        for infinity and NaN.
+*
+         EMAX = EMAX - 1
+      END IF
+*
+*     Now create RMAX, the largest machine number, which should
+*     be equal to (1.0 - BETA**(-P)) * BETA**EMAX .
+*
+*     First compute 1.0 - BETA**(-P), being careful that the
+*     result is less than 1.0 .
+*
+      RECBAS = ONE / BETA
+      Z = BETA - ONE
+      Y = ZERO
+      DO 20 I = 1, P
+         Z = Z*RECBAS
+         IF( Y.LT.ONE )
+     $      OLDY = Y
+         Y = DLAMC3( Y, Z )
+   20 CONTINUE
+      IF( Y.GE.ONE )
+     $   Y = OLDY
+*
+*     Now multiply by BETA**EMAX to get RMAX.
+*
+      DO 30 I = 1, EMAX
+         Y = DLAMC3( Y*BETA, ZERO )
+   30 CONTINUE
+*
+      RMAX = Y
+      RETURN
+*
+*     End of DLAMC5
+*
+      END
diff --git a/costa/native/external/lapack/dlamrg.f b/costa/native/external/lapack/dlamrg.f
new file mode 100644
index 000000000..ac52dd4be
--- /dev/null
+++ b/costa/native/external/lapack/dlamrg.f
@@ -0,0 +1,104 @@
+      SUBROUTINE DLAMRG( N1, N2, A, DTRD1, DTRD2, INDEX )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      INTEGER            DTRD1, DTRD2, N1, N2
+*     ..
+*     .. Array Arguments ..
+      INTEGER            INDEX( * )
+      DOUBLE PRECISION   A( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLAMRG will create a permutation list which will merge the elements
+*  of A (which is composed of two independently sorted sets) into a
+*  single set which is sorted in ascending order.
+*
+*  Arguments
+*  =========
+*
+*  N1     (input) INTEGER
+*  N2     (input) INTEGER
+*         These arguements contain the respective lengths of the two
+*         sorted lists to be merged.
+*
+*  A      (input) DOUBLE PRECISION array, dimension (N1+N2)
+*         The first N1 elements of A contain a list of numbers which
+*         are sorted in either ascending or descending order.  Likewise
+*         for the final N2 elements.
+*
+*  DTRD1  (input) INTEGER
+*  DTRD2  (input) INTEGER
+*         These are the strides to be taken through the array A.
+*         Allowable strides are 1 and -1.  They indicate whether a
+*         subset of A is sorted in ascending (DTRDx = 1) or descending
+*         (DTRDx = -1) order.
+*
+*  INDEX  (output) INTEGER array, dimension (N1+N2)
+*         On exit this array will contain a permutation such that
+*         if B( I ) = A( INDEX( I ) ) for I=1,N1+N2, then B will be
+*         sorted in ascending order.
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      INTEGER            I, IND1, IND2, N1SV, N2SV
+*     ..
+*     .. Executable Statements ..
+*
+      N1SV = N1
+      N2SV = N2
+      IF( DTRD1.GT.0 ) THEN
+         IND1 = 1
+      ELSE
+         IND1 = N1
+      END IF
+      IF( DTRD2.GT.0 ) THEN
+         IND2 = 1 + N1
+      ELSE
+         IND2 = N1 + N2
+      END IF
+      I = 1
+*     while ( (N1SV > 0) & (N2SV > 0) )
+   10 CONTINUE
+      IF( N1SV.GT.0 .AND. N2SV.GT.0 ) THEN
+         IF( A( IND1 ).LE.A( IND2 ) ) THEN
+            INDEX( I ) = IND1
+            I = I + 1
+            IND1 = IND1 + DTRD1
+            N1SV = N1SV - 1
+         ELSE
+            INDEX( I ) = IND2
+            I = I + 1
+            IND2 = IND2 + DTRD2
+            N2SV = N2SV - 1
+         END IF
+         GO TO 10
+      END IF
+*     end while
+      IF( N1SV.EQ.0 ) THEN
+         DO 20 N1SV = 1, N2SV
+            INDEX( I ) = IND2
+            I = I + 1
+            IND2 = IND2 + DTRD2
+   20    CONTINUE
+      ELSE
+*     N2SV .EQ. 0
+         DO 30 N2SV = 1, N1SV
+            INDEX( I ) = IND1
+            I = I + 1
+            IND1 = IND1 + DTRD1
+   30    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of DLAMRG
+*
+      END
diff --git a/costa/native/external/lapack/dlangb.f b/costa/native/external/lapack/dlangb.f
new file mode 100644
index 000000000..9aea88339
--- /dev/null
+++ b/costa/native/external/lapack/dlangb.f
@@ -0,0 +1,155 @@
+      DOUBLE PRECISION FUNCTION DLANGB( NORM, N, KL, KU, AB, LDAB,
+     $                 WORK )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          NORM
+      INTEGER            KL, KU, LDAB, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   AB( LDAB, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLANGB  returns the value of the one norm,  or the Frobenius norm, or
+*  the  infinity norm,  or the element of  largest absolute value  of an
+*  n by n band matrix  A,  with kl sub-diagonals and ku super-diagonals.
+*
+*  Description
+*  ===========
+*
+*  DLANGB returns the value
+*
+*     DLANGB = ( max(abs(A(i,j))), NORM = 'M' or 'm'
+*              (
+*              ( norm1(A),         NORM = '1', 'O' or 'o'
+*              (
+*              ( normI(A),         NORM = 'I' or 'i'
+*              (
+*              ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
+*
+*  where  norm1  denotes the  one norm of a matrix (maximum column sum),
+*  normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
+*  normF  denotes the  Frobenius norm of a matrix (square root of sum of
+*  squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
+*
+*  Arguments
+*  =========
+*
+*  NORM    (input) CHARACTER*1
+*          Specifies the value to be returned in DLANGB as described
+*          above.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.  When N = 0, DLANGB is
+*          set to zero.
+*
+*  KL      (input) INTEGER
+*          The number of sub-diagonals of the matrix A.  KL >= 0.
+*
+*  KU      (input) INTEGER
+*          The number of super-diagonals of the matrix A.  KU >= 0.
+*
+*  AB      (input) DOUBLE PRECISION array, dimension (LDAB,N)
+*          The band matrix A, stored in rows 1 to KL+KU+1.  The j-th
+*          column of A is stored in the j-th column of the array AB as
+*          follows:
+*          AB(ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(n,j+kl).
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KL+KU+1.
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (LWORK),
+*          where LWORK >= N when NORM = 'I'; otherwise, WORK is not
+*          referenced.
+*
+* =====================================================================
+*
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J, K, L
+      DOUBLE PRECISION   SCALE, SUM, VALUE
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLASSQ
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+      IF( N.EQ.0 ) THEN
+         VALUE = ZERO
+      ELSE IF( LSAME( NORM, 'M' ) ) THEN
+*
+*        Find max(abs(A(i,j))).
+*
+         VALUE = ZERO
+         DO 20 J = 1, N
+            DO 10 I = MAX( KU+2-J, 1 ), MIN( N+KU+1-J, KL+KU+1 )
+               VALUE = MAX( VALUE, ABS( AB( I, J ) ) )
+   10       CONTINUE
+   20    CONTINUE
+      ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN
+*
+*        Find norm1(A).
+*
+         VALUE = ZERO
+         DO 40 J = 1, N
+            SUM = ZERO
+            DO 30 I = MAX( KU+2-J, 1 ), MIN( N+KU+1-J, KL+KU+1 )
+               SUM = SUM + ABS( AB( I, J ) )
+   30       CONTINUE
+            VALUE = MAX( VALUE, SUM )
+   40    CONTINUE
+      ELSE IF( LSAME( NORM, 'I' ) ) THEN
+*
+*        Find normI(A).
+*
+         DO 50 I = 1, N
+            WORK( I ) = ZERO
+   50    CONTINUE
+         DO 70 J = 1, N
+            K = KU + 1 - J
+            DO 60 I = MAX( 1, J-KU ), MIN( N, J+KL )
+               WORK( I ) = WORK( I ) + ABS( AB( K+I, J ) )
+   60       CONTINUE
+   70    CONTINUE
+         VALUE = ZERO
+         DO 80 I = 1, N
+            VALUE = MAX( VALUE, WORK( I ) )
+   80    CONTINUE
+      ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
+*
+*        Find normF(A).
+*
+         SCALE = ZERO
+         SUM = ONE
+         DO 90 J = 1, N
+            L = MAX( 1, J-KU )
+            K = KU + 1 - J + L
+            CALL DLASSQ( MIN( N, J+KL )-L+1, AB( K, J ), 1, SCALE, SUM )
+   90    CONTINUE
+         VALUE = SCALE*SQRT( SUM )
+      END IF
+*
+      DLANGB = VALUE
+      RETURN
+*
+*     End of DLANGB
+*
+      END
diff --git a/costa/native/external/lapack/dlange.f b/costa/native/external/lapack/dlange.f
new file mode 100644
index 000000000..202ba6047
--- /dev/null
+++ b/costa/native/external/lapack/dlange.f
@@ -0,0 +1,145 @@
+      DOUBLE PRECISION FUNCTION DLANGE( NORM, M, N, A, LDA, WORK )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          NORM
+      INTEGER            LDA, M, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLANGE  returns the value of the one norm,  or the Frobenius norm, or
+*  the  infinity norm,  or the  element of  largest absolute value  of a
+*  real matrix A.
+*
+*  Description
+*  ===========
+*
+*  DLANGE returns the value
+*
+*     DLANGE = ( max(abs(A(i,j))), NORM = 'M' or 'm'
+*              (
+*              ( norm1(A),         NORM = '1', 'O' or 'o'
+*              (
+*              ( normI(A),         NORM = 'I' or 'i'
+*              (
+*              ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
+*
+*  where  norm1  denotes the  one norm of a matrix (maximum column sum),
+*  normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
+*  normF  denotes the  Frobenius norm of a matrix (square root of sum of
+*  squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
+*
+*  Arguments
+*  =========
+*
+*  NORM    (input) CHARACTER*1
+*          Specifies the value to be returned in DLANGE as described
+*          above.
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.  When M = 0,
+*          DLANGE is set to zero.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.  When N = 0,
+*          DLANGE is set to zero.
+*
+*  A       (input) DOUBLE PRECISION array, dimension (LDA,N)
+*          The m by n matrix A.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(M,1).
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (LWORK),
+*          where LWORK >= M when NORM = 'I'; otherwise, WORK is not
+*          referenced.
+*
+* =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J
+      DOUBLE PRECISION   SCALE, SUM, VALUE
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLASSQ
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+      IF( MIN( M, N ).EQ.0 ) THEN
+         VALUE = ZERO
+      ELSE IF( LSAME( NORM, 'M' ) ) THEN
+*
+*        Find max(abs(A(i,j))).
+*
+         VALUE = ZERO
+         DO 20 J = 1, N
+            DO 10 I = 1, M
+               VALUE = MAX( VALUE, ABS( A( I, J ) ) )
+   10       CONTINUE
+   20    CONTINUE
+      ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN
+*
+*        Find norm1(A).
+*
+         VALUE = ZERO
+         DO 40 J = 1, N
+            SUM = ZERO
+            DO 30 I = 1, M
+               SUM = SUM + ABS( A( I, J ) )
+   30       CONTINUE
+            VALUE = MAX( VALUE, SUM )
+   40    CONTINUE
+      ELSE IF( LSAME( NORM, 'I' ) ) THEN
+*
+*        Find normI(A).
+*
+         DO 50 I = 1, M
+            WORK( I ) = ZERO
+   50    CONTINUE
+         DO 70 J = 1, N
+            DO 60 I = 1, M
+               WORK( I ) = WORK( I ) + ABS( A( I, J ) )
+   60       CONTINUE
+   70    CONTINUE
+         VALUE = ZERO
+         DO 80 I = 1, M
+            VALUE = MAX( VALUE, WORK( I ) )
+   80    CONTINUE
+      ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
+*
+*        Find normF(A).
+*
+         SCALE = ZERO
+         SUM = ONE
+         DO 90 J = 1, N
+            CALL DLASSQ( M, A( 1, J ), 1, SCALE, SUM )
+   90    CONTINUE
+         VALUE = SCALE*SQRT( SUM )
+      END IF
+*
+      DLANGE = VALUE
+      RETURN
+*
+*     End of DLANGE
+*
+      END
diff --git a/costa/native/external/lapack/dlangt.f b/costa/native/external/lapack/dlangt.f
new file mode 100644
index 000000000..7d507bc81
--- /dev/null
+++ b/costa/native/external/lapack/dlangt.f
@@ -0,0 +1,142 @@
+      DOUBLE PRECISION FUNCTION DLANGT( NORM, N, DL, D, DU )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          NORM
+      INTEGER            N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   D( * ), DL( * ), DU( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLANGT  returns the value of the one norm,  or the Frobenius norm, or
+*  the  infinity norm,  or the  element of  largest absolute value  of a
+*  real tridiagonal matrix A.
+*
+*  Description
+*  ===========
+*
+*  DLANGT returns the value
+*
+*     DLANGT = ( max(abs(A(i,j))), NORM = 'M' or 'm'
+*              (
+*              ( norm1(A),         NORM = '1', 'O' or 'o'
+*              (
+*              ( normI(A),         NORM = 'I' or 'i'
+*              (
+*              ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
+*
+*  where  norm1  denotes the  one norm of a matrix (maximum column sum),
+*  normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
+*  normF  denotes the  Frobenius norm of a matrix (square root of sum of
+*  squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
+*
+*  Arguments
+*  =========
+*
+*  NORM    (input) CHARACTER*1
+*          Specifies the value to be returned in DLANGT as described
+*          above.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.  When N = 0, DLANGT is
+*          set to zero.
+*
+*  DL      (input) DOUBLE PRECISION array, dimension (N-1)
+*          The (n-1) sub-diagonal elements of A.
+*
+*  D       (input) DOUBLE PRECISION array, dimension (N)
+*          The diagonal elements of A.
+*
+*  DU      (input) DOUBLE PRECISION array, dimension (N-1)
+*          The (n-1) super-diagonal elements of A.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I
+      DOUBLE PRECISION   ANORM, SCALE, SUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLASSQ
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+      IF( N.LE.0 ) THEN
+         ANORM = ZERO
+      ELSE IF( LSAME( NORM, 'M' ) ) THEN
+*
+*        Find max(abs(A(i,j))).
+*
+         ANORM = ABS( D( N ) )
+         DO 10 I = 1, N - 1
+            ANORM = MAX( ANORM, ABS( DL( I ) ) )
+            ANORM = MAX( ANORM, ABS( D( I ) ) )
+            ANORM = MAX( ANORM, ABS( DU( I ) ) )
+   10    CONTINUE
+      ELSE IF( LSAME( NORM, 'O' ) .OR. NORM.EQ.'1' ) THEN
+*
+*        Find norm1(A).
+*
+         IF( N.EQ.1 ) THEN
+            ANORM = ABS( D( 1 ) )
+         ELSE
+            ANORM = MAX( ABS( D( 1 ) )+ABS( DL( 1 ) ),
+     $              ABS( D( N ) )+ABS( DU( N-1 ) ) )
+            DO 20 I = 2, N - 1
+               ANORM = MAX( ANORM, ABS( D( I ) )+ABS( DL( I ) )+
+     $                 ABS( DU( I-1 ) ) )
+   20       CONTINUE
+         END IF
+      ELSE IF( LSAME( NORM, 'I' ) ) THEN
+*
+*        Find normI(A).
+*
+         IF( N.EQ.1 ) THEN
+            ANORM = ABS( D( 1 ) )
+         ELSE
+            ANORM = MAX( ABS( D( 1 ) )+ABS( DU( 1 ) ),
+     $              ABS( D( N ) )+ABS( DL( N-1 ) ) )
+            DO 30 I = 2, N - 1
+               ANORM = MAX( ANORM, ABS( D( I ) )+ABS( DU( I ) )+
+     $                 ABS( DL( I-1 ) ) )
+   30       CONTINUE
+         END IF
+      ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
+*
+*        Find normF(A).
+*
+         SCALE = ZERO
+         SUM = ONE
+         CALL DLASSQ( N, D, 1, SCALE, SUM )
+         IF( N.GT.1 ) THEN
+            CALL DLASSQ( N-1, DL, 1, SCALE, SUM )
+            CALL DLASSQ( N-1, DU, 1, SCALE, SUM )
+         END IF
+         ANORM = SCALE*SQRT( SUM )
+      END IF
+*
+      DLANGT = ANORM
+      RETURN
+*
+*     End of DLANGT
+*
+      END
diff --git a/costa/native/external/lapack/dlanhs.f b/costa/native/external/lapack/dlanhs.f
new file mode 100644
index 000000000..45939f0ab
--- /dev/null
+++ b/costa/native/external/lapack/dlanhs.f
@@ -0,0 +1,142 @@
+      DOUBLE PRECISION FUNCTION DLANHS( NORM, N, A, LDA, WORK )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          NORM
+      INTEGER            LDA, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLANHS  returns the value of the one norm,  or the Frobenius norm, or
+*  the  infinity norm,  or the  element of  largest absolute value  of a
+*  Hessenberg matrix A.
+*
+*  Description
+*  ===========
+*
+*  DLANHS returns the value
+*
+*     DLANHS = ( max(abs(A(i,j))), NORM = 'M' or 'm'
+*              (
+*              ( norm1(A),         NORM = '1', 'O' or 'o'
+*              (
+*              ( normI(A),         NORM = 'I' or 'i'
+*              (
+*              ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
+*
+*  where  norm1  denotes the  one norm of a matrix (maximum column sum),
+*  normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
+*  normF  denotes the  Frobenius norm of a matrix (square root of sum of
+*  squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
+*
+*  Arguments
+*  =========
+*
+*  NORM    (input) CHARACTER*1
+*          Specifies the value to be returned in DLANHS as described
+*          above.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.  When N = 0, DLANHS is
+*          set to zero.
+*
+*  A       (input) DOUBLE PRECISION array, dimension (LDA,N)
+*          The n by n upper Hessenberg matrix A; the part of A below the
+*          first sub-diagonal is not referenced.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(N,1).
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (LWORK),
+*          where LWORK >= N when NORM = 'I'; otherwise, WORK is not
+*          referenced.
+*
+* =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J
+      DOUBLE PRECISION   SCALE, SUM, VALUE
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLASSQ
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+      IF( N.EQ.0 ) THEN
+         VALUE = ZERO
+      ELSE IF( LSAME( NORM, 'M' ) ) THEN
+*
+*        Find max(abs(A(i,j))).
+*
+         VALUE = ZERO
+         DO 20 J = 1, N
+            DO 10 I = 1, MIN( N, J+1 )
+               VALUE = MAX( VALUE, ABS( A( I, J ) ) )
+   10       CONTINUE
+   20    CONTINUE
+      ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN
+*
+*        Find norm1(A).
+*
+         VALUE = ZERO
+         DO 40 J = 1, N
+            SUM = ZERO
+            DO 30 I = 1, MIN( N, J+1 )
+               SUM = SUM + ABS( A( I, J ) )
+   30       CONTINUE
+            VALUE = MAX( VALUE, SUM )
+   40    CONTINUE
+      ELSE IF( LSAME( NORM, 'I' ) ) THEN
+*
+*        Find normI(A).
+*
+         DO 50 I = 1, N
+            WORK( I ) = ZERO
+   50    CONTINUE
+         DO 70 J = 1, N
+            DO 60 I = 1, MIN( N, J+1 )
+               WORK( I ) = WORK( I ) + ABS( A( I, J ) )
+   60       CONTINUE
+   70    CONTINUE
+         VALUE = ZERO
+         DO 80 I = 1, N
+            VALUE = MAX( VALUE, WORK( I ) )
+   80    CONTINUE
+      ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
+*
+*        Find normF(A).
+*
+         SCALE = ZERO
+         SUM = ONE
+         DO 90 J = 1, N
+            CALL DLASSQ( MIN( N, J+1 ), A( 1, J ), 1, SCALE, SUM )
+   90    CONTINUE
+         VALUE = SCALE*SQRT( SUM )
+      END IF
+*
+      DLANHS = VALUE
+      RETURN
+*
+*     End of DLANHS
+*
+      END
diff --git a/costa/native/external/lapack/dlansb.f b/costa/native/external/lapack/dlansb.f
new file mode 100644
index 000000000..3a6893bc8
--- /dev/null
+++ b/costa/native/external/lapack/dlansb.f
@@ -0,0 +1,187 @@
+      DOUBLE PRECISION FUNCTION DLANSB( NORM, UPLO, N, K, AB, LDAB,
+     $                 WORK )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          NORM, UPLO
+      INTEGER            K, LDAB, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   AB( LDAB, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLANSB  returns the value of the one norm,  or the Frobenius norm, or
+*  the  infinity norm,  or the element of  largest absolute value  of an
+*  n by n symmetric band matrix A,  with k super-diagonals.
+*
+*  Description
+*  ===========
+*
+*  DLANSB returns the value
+*
+*     DLANSB = ( max(abs(A(i,j))), NORM = 'M' or 'm'
+*              (
+*              ( norm1(A),         NORM = '1', 'O' or 'o'
+*              (
+*              ( normI(A),         NORM = 'I' or 'i'
+*              (
+*              ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
+*
+*  where  norm1  denotes the  one norm of a matrix (maximum column sum),
+*  normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
+*  normF  denotes the  Frobenius norm of a matrix (square root of sum of
+*  squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
+*
+*  Arguments
+*  =========
+*
+*  NORM    (input) CHARACTER*1
+*          Specifies the value to be returned in DLANSB as described
+*          above.
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the upper or lower triangular part of the
+*          band matrix A is supplied.
+*          = 'U':  Upper triangular part is supplied
+*          = 'L':  Lower triangular part is supplied
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.  When N = 0, DLANSB is
+*          set to zero.
+*
+*  K       (input) INTEGER
+*          The number of super-diagonals or sub-diagonals of the
+*          band matrix A.  K >= 0.
+*
+*  AB      (input) DOUBLE PRECISION array, dimension (LDAB,N)
+*          The upper or lower triangle of the symmetric band matrix A,
+*          stored in the first K+1 rows of AB.  The j-th column of A is
+*          stored in the j-th column of the array AB as follows:
+*          if UPLO = 'U', AB(k+1+i-j,j) = A(i,j) for max(1,j-k)<=i<=j;
+*          if UPLO = 'L', AB(1+i-j,j)   = A(i,j) for j<=i<=min(n,j+k).
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= K+1.
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (LWORK),
+*          where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise,
+*          WORK is not referenced.
+*
+* =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J, L
+      DOUBLE PRECISION   ABSA, SCALE, SUM, VALUE
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLASSQ
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+      IF( N.EQ.0 ) THEN
+         VALUE = ZERO
+      ELSE IF( LSAME( NORM, 'M' ) ) THEN
+*
+*        Find max(abs(A(i,j))).
+*
+         VALUE = ZERO
+         IF( LSAME( UPLO, 'U' ) ) THEN
+            DO 20 J = 1, N
+               DO 10 I = MAX( K+2-J, 1 ), K + 1
+                  VALUE = MAX( VALUE, ABS( AB( I, J ) ) )
+   10          CONTINUE
+   20       CONTINUE
+         ELSE
+            DO 40 J = 1, N
+               DO 30 I = 1, MIN( N+1-J, K+1 )
+                  VALUE = MAX( VALUE, ABS( AB( I, J ) ) )
+   30          CONTINUE
+   40       CONTINUE
+         END IF
+      ELSE IF( ( LSAME( NORM, 'I' ) ) .OR. ( LSAME( NORM, 'O' ) ) .OR.
+     $         ( NORM.EQ.'1' ) ) THEN
+*
+*        Find normI(A) ( = norm1(A), since A is symmetric).
+*
+         VALUE = ZERO
+         IF( LSAME( UPLO, 'U' ) ) THEN
+            DO 60 J = 1, N
+               SUM = ZERO
+               L = K + 1 - J
+               DO 50 I = MAX( 1, J-K ), J - 1
+                  ABSA = ABS( AB( L+I, J ) )
+                  SUM = SUM + ABSA
+                  WORK( I ) = WORK( I ) + ABSA
+   50          CONTINUE
+               WORK( J ) = SUM + ABS( AB( K+1, J ) )
+   60       CONTINUE
+            DO 70 I = 1, N
+               VALUE = MAX( VALUE, WORK( I ) )
+   70       CONTINUE
+         ELSE
+            DO 80 I = 1, N
+               WORK( I ) = ZERO
+   80       CONTINUE
+            DO 100 J = 1, N
+               SUM = WORK( J ) + ABS( AB( 1, J ) )
+               L = 1 - J
+               DO 90 I = J + 1, MIN( N, J+K )
+                  ABSA = ABS( AB( L+I, J ) )
+                  SUM = SUM + ABSA
+                  WORK( I ) = WORK( I ) + ABSA
+   90          CONTINUE
+               VALUE = MAX( VALUE, SUM )
+  100       CONTINUE
+         END IF
+      ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
+*
+*        Find normF(A).
+*
+         SCALE = ZERO
+         SUM = ONE
+         IF( K.GT.0 ) THEN
+            IF( LSAME( UPLO, 'U' ) ) THEN
+               DO 110 J = 2, N
+                  CALL DLASSQ( MIN( J-1, K ), AB( MAX( K+2-J, 1 ), J ),
+     $                         1, SCALE, SUM )
+  110          CONTINUE
+               L = K + 1
+            ELSE
+               DO 120 J = 1, N - 1
+                  CALL DLASSQ( MIN( N-J, K ), AB( 2, J ), 1, SCALE,
+     $                         SUM )
+  120          CONTINUE
+               L = 1
+            END IF
+            SUM = 2*SUM
+         ELSE
+            L = 1
+         END IF
+         CALL DLASSQ( N, AB( L, 1 ), LDAB, SCALE, SUM )
+         VALUE = SCALE*SQRT( SUM )
+      END IF
+*
+      DLANSB = VALUE
+      RETURN
+*
+*     End of DLANSB
+*
+      END
diff --git a/costa/native/external/lapack/dlansp.f b/costa/native/external/lapack/dlansp.f
new file mode 100644
index 000000000..52c283ebb
--- /dev/null
+++ b/costa/native/external/lapack/dlansp.f
@@ -0,0 +1,197 @@
+      DOUBLE PRECISION FUNCTION DLANSP( NORM, UPLO, N, AP, WORK )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          NORM, UPLO
+      INTEGER            N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   AP( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLANSP  returns the value of the one norm,  or the Frobenius norm, or
+*  the  infinity norm,  or the  element of  largest absolute value  of a
+*  real symmetric matrix A,  supplied in packed form.
+*
+*  Description
+*  ===========
+*
+*  DLANSP returns the value
+*
+*     DLANSP = ( max(abs(A(i,j))), NORM = 'M' or 'm'
+*              (
+*              ( norm1(A),         NORM = '1', 'O' or 'o'
+*              (
+*              ( normI(A),         NORM = 'I' or 'i'
+*              (
+*              ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
+*
+*  where  norm1  denotes the  one norm of a matrix (maximum column sum),
+*  normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
+*  normF  denotes the  Frobenius norm of a matrix (square root of sum of
+*  squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
+*
+*  Arguments
+*  =========
+*
+*  NORM    (input) CHARACTER*1
+*          Specifies the value to be returned in DLANSP as described
+*          above.
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the upper or lower triangular part of the
+*          symmetric matrix A is supplied.
+*          = 'U':  Upper triangular part of A is supplied
+*          = 'L':  Lower triangular part of A is supplied
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.  When N = 0, DLANSP is
+*          set to zero.
+*
+*  AP      (input) DOUBLE PRECISION array, dimension (N*(N+1)/2)
+*          The upper or lower triangle of the symmetric matrix A, packed
+*          columnwise in a linear array.  The j-th column of A is stored
+*          in the array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n.
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (LWORK),
+*          where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise,
+*          WORK is not referenced.
+*
+* =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J, K
+      DOUBLE PRECISION   ABSA, SCALE, SUM, VALUE
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLASSQ
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+      IF( N.EQ.0 ) THEN
+         VALUE = ZERO
+      ELSE IF( LSAME( NORM, 'M' ) ) THEN
+*
+*        Find max(abs(A(i,j))).
+*
+         VALUE = ZERO
+         IF( LSAME( UPLO, 'U' ) ) THEN
+            K = 1
+            DO 20 J = 1, N
+               DO 10 I = K, K + J - 1
+                  VALUE = MAX( VALUE, ABS( AP( I ) ) )
+   10          CONTINUE
+               K = K + J
+   20       CONTINUE
+         ELSE
+            K = 1
+            DO 40 J = 1, N
+               DO 30 I = K, K + N - J
+                  VALUE = MAX( VALUE, ABS( AP( I ) ) )
+   30          CONTINUE
+               K = K + N - J + 1
+   40       CONTINUE
+         END IF
+      ELSE IF( ( LSAME( NORM, 'I' ) ) .OR. ( LSAME( NORM, 'O' ) ) .OR.
+     $         ( NORM.EQ.'1' ) ) THEN
+*
+*        Find normI(A) ( = norm1(A), since A is symmetric).
+*
+         VALUE = ZERO
+         K = 1
+         IF( LSAME( UPLO, 'U' ) ) THEN
+            DO 60 J = 1, N
+               SUM = ZERO
+               DO 50 I = 1, J - 1
+                  ABSA = ABS( AP( K ) )
+                  SUM = SUM + ABSA
+                  WORK( I ) = WORK( I ) + ABSA
+                  K = K + 1
+   50          CONTINUE
+               WORK( J ) = SUM + ABS( AP( K ) )
+               K = K + 1
+   60       CONTINUE
+            DO 70 I = 1, N
+               VALUE = MAX( VALUE, WORK( I ) )
+   70       CONTINUE
+         ELSE
+            DO 80 I = 1, N
+               WORK( I ) = ZERO
+   80       CONTINUE
+            DO 100 J = 1, N
+               SUM = WORK( J ) + ABS( AP( K ) )
+               K = K + 1
+               DO 90 I = J + 1, N
+                  ABSA = ABS( AP( K ) )
+                  SUM = SUM + ABSA
+                  WORK( I ) = WORK( I ) + ABSA
+                  K = K + 1
+   90          CONTINUE
+               VALUE = MAX( VALUE, SUM )
+  100       CONTINUE
+         END IF
+      ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
+*
+*        Find normF(A).
+*
+         SCALE = ZERO
+         SUM = ONE
+         K = 2
+         IF( LSAME( UPLO, 'U' ) ) THEN
+            DO 110 J = 2, N
+               CALL DLASSQ( J-1, AP( K ), 1, SCALE, SUM )
+               K = K + J
+  110       CONTINUE
+         ELSE
+            DO 120 J = 1, N - 1
+               CALL DLASSQ( N-J, AP( K ), 1, SCALE, SUM )
+               K = K + N - J + 1
+  120       CONTINUE
+         END IF
+         SUM = 2*SUM
+         K = 1
+         DO 130 I = 1, N
+            IF( AP( K ).NE.ZERO ) THEN
+               ABSA = ABS( AP( K ) )
+               IF( SCALE.LT.ABSA ) THEN
+                  SUM = ONE + SUM*( SCALE / ABSA )**2
+                  SCALE = ABSA
+               ELSE
+                  SUM = SUM + ( ABSA / SCALE )**2
+               END IF
+            END IF
+            IF( LSAME( UPLO, 'U' ) ) THEN
+               K = K + I + 1
+            ELSE
+               K = K + N - I + 1
+            END IF
+  130    CONTINUE
+         VALUE = SCALE*SQRT( SUM )
+      END IF
+*
+      DLANSP = VALUE
+      RETURN
+*
+*     End of DLANSP
+*
+      END
diff --git a/costa/native/external/lapack/dlanst.f b/costa/native/external/lapack/dlanst.f
new file mode 100644
index 000000000..e604348c3
--- /dev/null
+++ b/costa/native/external/lapack/dlanst.f
@@ -0,0 +1,125 @@
+      DOUBLE PRECISION FUNCTION DLANST( NORM, N, D, E )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          NORM
+      INTEGER            N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   D( * ), E( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLANST  returns the value of the one norm,  or the Frobenius norm, or
+*  the  infinity norm,  or the  element of  largest absolute value  of a
+*  real symmetric tridiagonal matrix A.
+*
+*  Description
+*  ===========
+*
+*  DLANST returns the value
+*
+*     DLANST = ( max(abs(A(i,j))), NORM = 'M' or 'm'
+*              (
+*              ( norm1(A),         NORM = '1', 'O' or 'o'
+*              (
+*              ( normI(A),         NORM = 'I' or 'i'
+*              (
+*              ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
+*
+*  where  norm1  denotes the  one norm of a matrix (maximum column sum),
+*  normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
+*  normF  denotes the  Frobenius norm of a matrix (square root of sum of
+*  squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
+*
+*  Arguments
+*  =========
+*
+*  NORM    (input) CHARACTER*1
+*          Specifies the value to be returned in DLANST as described
+*          above.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.  When N = 0, DLANST is
+*          set to zero.
+*
+*  D       (input) DOUBLE PRECISION array, dimension (N)
+*          The diagonal elements of A.
+*
+*  E       (input) DOUBLE PRECISION array, dimension (N-1)
+*          The (n-1) sub-diagonal or super-diagonal elements of A.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I
+      DOUBLE PRECISION   ANORM, SCALE, SUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLASSQ
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+      IF( N.LE.0 ) THEN
+         ANORM = ZERO
+      ELSE IF( LSAME( NORM, 'M' ) ) THEN
+*
+*        Find max(abs(A(i,j))).
+*
+         ANORM = ABS( D( N ) )
+         DO 10 I = 1, N - 1
+            ANORM = MAX( ANORM, ABS( D( I ) ) )
+            ANORM = MAX( ANORM, ABS( E( I ) ) )
+   10    CONTINUE
+      ELSE IF( LSAME( NORM, 'O' ) .OR. NORM.EQ.'1' .OR.
+     $         LSAME( NORM, 'I' ) ) THEN
+*
+*        Find norm1(A).
+*
+         IF( N.EQ.1 ) THEN
+            ANORM = ABS( D( 1 ) )
+         ELSE
+            ANORM = MAX( ABS( D( 1 ) )+ABS( E( 1 ) ),
+     $              ABS( E( N-1 ) )+ABS( D( N ) ) )
+            DO 20 I = 2, N - 1
+               ANORM = MAX( ANORM, ABS( D( I ) )+ABS( E( I ) )+
+     $                 ABS( E( I-1 ) ) )
+   20       CONTINUE
+         END IF
+      ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
+*
+*        Find normF(A).
+*
+         SCALE = ZERO
+         SUM = ONE
+         IF( N.GT.1 ) THEN
+            CALL DLASSQ( N-1, E, 1, SCALE, SUM )
+            SUM = 2*SUM
+         END IF
+         CALL DLASSQ( N, D, 1, SCALE, SUM )
+         ANORM = SCALE*SQRT( SUM )
+      END IF
+*
+      DLANST = ANORM
+      RETURN
+*
+*     End of DLANST
+*
+      END
diff --git a/costa/native/external/lapack/dlansy.f b/costa/native/external/lapack/dlansy.f
new file mode 100644
index 000000000..73d2ab71c
--- /dev/null
+++ b/costa/native/external/lapack/dlansy.f
@@ -0,0 +1,174 @@
+      DOUBLE PRECISION FUNCTION DLANSY( NORM, UPLO, N, A, LDA, WORK )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          NORM, UPLO
+      INTEGER            LDA, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLANSY  returns the value of the one norm,  or the Frobenius norm, or
+*  the  infinity norm,  or the  element of  largest absolute value  of a
+*  real symmetric matrix A.
+*
+*  Description
+*  ===========
+*
+*  DLANSY returns the value
+*
+*     DLANSY = ( max(abs(A(i,j))), NORM = 'M' or 'm'
+*              (
+*              ( norm1(A),         NORM = '1', 'O' or 'o'
+*              (
+*              ( normI(A),         NORM = 'I' or 'i'
+*              (
+*              ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
+*
+*  where  norm1  denotes the  one norm of a matrix (maximum column sum),
+*  normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
+*  normF  denotes the  Frobenius norm of a matrix (square root of sum of
+*  squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
+*
+*  Arguments
+*  =========
+*
+*  NORM    (input) CHARACTER*1
+*          Specifies the value to be returned in DLANSY as described
+*          above.
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the upper or lower triangular part of the
+*          symmetric matrix A is to be referenced.
+*          = 'U':  Upper triangular part of A is referenced
+*          = 'L':  Lower triangular part of A is referenced
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.  When N = 0, DLANSY is
+*          set to zero.
+*
+*  A       (input) DOUBLE PRECISION array, dimension (LDA,N)
+*          The symmetric matrix A.  If UPLO = 'U', the leading n by n
+*          upper triangular part of A contains the upper triangular part
+*          of the matrix A, and the strictly lower triangular part of A
+*          is not referenced.  If UPLO = 'L', the leading n by n lower
+*          triangular part of A contains the lower triangular part of
+*          the matrix A, and the strictly upper triangular part of A is
+*          not referenced.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(N,1).
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (LWORK),
+*          where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise,
+*          WORK is not referenced.
+*
+* =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J
+      DOUBLE PRECISION   ABSA, SCALE, SUM, VALUE
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLASSQ
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+      IF( N.EQ.0 ) THEN
+         VALUE = ZERO
+      ELSE IF( LSAME( NORM, 'M' ) ) THEN
+*
+*        Find max(abs(A(i,j))).
+*
+         VALUE = ZERO
+         IF( LSAME( UPLO, 'U' ) ) THEN
+            DO 20 J = 1, N
+               DO 10 I = 1, J
+                  VALUE = MAX( VALUE, ABS( A( I, J ) ) )
+   10          CONTINUE
+   20       CONTINUE
+         ELSE
+            DO 40 J = 1, N
+               DO 30 I = J, N
+                  VALUE = MAX( VALUE, ABS( A( I, J ) ) )
+   30          CONTINUE
+   40       CONTINUE
+         END IF
+      ELSE IF( ( LSAME( NORM, 'I' ) ) .OR. ( LSAME( NORM, 'O' ) ) .OR.
+     $         ( NORM.EQ.'1' ) ) THEN
+*
+*        Find normI(A) ( = norm1(A), since A is symmetric).
+*
+         VALUE = ZERO
+         IF( LSAME( UPLO, 'U' ) ) THEN
+            DO 60 J = 1, N
+               SUM = ZERO
+               DO 50 I = 1, J - 1
+                  ABSA = ABS( A( I, J ) )
+                  SUM = SUM + ABSA
+                  WORK( I ) = WORK( I ) + ABSA
+   50          CONTINUE
+               WORK( J ) = SUM + ABS( A( J, J ) )
+   60       CONTINUE
+            DO 70 I = 1, N
+               VALUE = MAX( VALUE, WORK( I ) )
+   70       CONTINUE
+         ELSE
+            DO 80 I = 1, N
+               WORK( I ) = ZERO
+   80       CONTINUE
+            DO 100 J = 1, N
+               SUM = WORK( J ) + ABS( A( J, J ) )
+               DO 90 I = J + 1, N
+                  ABSA = ABS( A( I, J ) )
+                  SUM = SUM + ABSA
+                  WORK( I ) = WORK( I ) + ABSA
+   90          CONTINUE
+               VALUE = MAX( VALUE, SUM )
+  100       CONTINUE
+         END IF
+      ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
+*
+*        Find normF(A).
+*
+         SCALE = ZERO
+         SUM = ONE
+         IF( LSAME( UPLO, 'U' ) ) THEN
+            DO 110 J = 2, N
+               CALL DLASSQ( J-1, A( 1, J ), 1, SCALE, SUM )
+  110       CONTINUE
+         ELSE
+            DO 120 J = 1, N - 1
+               CALL DLASSQ( N-J, A( J+1, J ), 1, SCALE, SUM )
+  120       CONTINUE
+         END IF
+         SUM = 2*SUM
+         CALL DLASSQ( N, A, LDA+1, SCALE, SUM )
+         VALUE = SCALE*SQRT( SUM )
+      END IF
+*
+      DLANSY = VALUE
+      RETURN
+*
+*     End of DLANSY
+*
+      END
diff --git a/costa/native/external/lapack/dlantb.f b/costa/native/external/lapack/dlantb.f
new file mode 100644
index 000000000..5792bd64e
--- /dev/null
+++ b/costa/native/external/lapack/dlantb.f
@@ -0,0 +1,285 @@
+      DOUBLE PRECISION FUNCTION DLANTB( NORM, UPLO, DIAG, N, K, AB,
+     $                 LDAB, WORK )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIAG, NORM, UPLO
+      INTEGER            K, LDAB, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   AB( LDAB, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLANTB  returns the value of the one norm,  or the Frobenius norm, or
+*  the  infinity norm,  or the element of  largest absolute value  of an
+*  n by n triangular band matrix A,  with ( k + 1 ) diagonals.
+*
+*  Description
+*  ===========
+*
+*  DLANTB returns the value
+*
+*     DLANTB = ( max(abs(A(i,j))), NORM = 'M' or 'm'
+*              (
+*              ( norm1(A),         NORM = '1', 'O' or 'o'
+*              (
+*              ( normI(A),         NORM = 'I' or 'i'
+*              (
+*              ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
+*
+*  where  norm1  denotes the  one norm of a matrix (maximum column sum),
+*  normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
+*  normF  denotes the  Frobenius norm of a matrix (square root of sum of
+*  squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
+*
+*  Arguments
+*  =========
+*
+*  NORM    (input) CHARACTER*1
+*          Specifies the value to be returned in DLANTB as described
+*          above.
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the matrix A is upper or lower triangular.
+*          = 'U':  Upper triangular
+*          = 'L':  Lower triangular
+*
+*  DIAG    (input) CHARACTER*1
+*          Specifies whether or not the matrix A is unit triangular.
+*          = 'N':  Non-unit triangular
+*          = 'U':  Unit triangular
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.  When N = 0, DLANTB is
+*          set to zero.
+*
+*  K       (input) INTEGER
+*          The number of super-diagonals of the matrix A if UPLO = 'U',
+*          or the number of sub-diagonals of the matrix A if UPLO = 'L'.
+*          K >= 0.
+*
+*  AB      (input) DOUBLE PRECISION array, dimension (LDAB,N)
+*          The upper or lower triangular band matrix A, stored in the
+*          first k+1 rows of AB.  The j-th column of A is stored
+*          in the j-th column of the array AB as follows:
+*          if UPLO = 'U', AB(k+1+i-j,j) = A(i,j) for max(1,j-k)<=i<=j;
+*          if UPLO = 'L', AB(1+i-j,j)   = A(i,j) for j<=i<=min(n,j+k).
+*          Note that when DIAG = 'U', the elements of the array AB
+*          corresponding to the diagonal elements of the matrix A are
+*          not referenced, but are assumed to be one.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= K+1.
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (LWORK),
+*          where LWORK >= N when NORM = 'I'; otherwise, WORK is not
+*          referenced.
+*
+* =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UDIAG
+      INTEGER            I, J, L
+      DOUBLE PRECISION   SCALE, SUM, VALUE
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLASSQ
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+      IF( N.EQ.0 ) THEN
+         VALUE = ZERO
+      ELSE IF( LSAME( NORM, 'M' ) ) THEN
+*
+*        Find max(abs(A(i,j))).
+*
+         IF( LSAME( DIAG, 'U' ) ) THEN
+            VALUE = ONE
+            IF( LSAME( UPLO, 'U' ) ) THEN
+               DO 20 J = 1, N
+                  DO 10 I = MAX( K+2-J, 1 ), K
+                     VALUE = MAX( VALUE, ABS( AB( I, J ) ) )
+   10             CONTINUE
+   20          CONTINUE
+            ELSE
+               DO 40 J = 1, N
+                  DO 30 I = 2, MIN( N+1-J, K+1 )
+                     VALUE = MAX( VALUE, ABS( AB( I, J ) ) )
+   30             CONTINUE
+   40          CONTINUE
+            END IF
+         ELSE
+            VALUE = ZERO
+            IF( LSAME( UPLO, 'U' ) ) THEN
+               DO 60 J = 1, N
+                  DO 50 I = MAX( K+2-J, 1 ), K + 1
+                     VALUE = MAX( VALUE, ABS( AB( I, J ) ) )
+   50             CONTINUE
+   60          CONTINUE
+            ELSE
+               DO 80 J = 1, N
+                  DO 70 I = 1, MIN( N+1-J, K+1 )
+                     VALUE = MAX( VALUE, ABS( AB( I, J ) ) )
+   70             CONTINUE
+   80          CONTINUE
+            END IF
+         END IF
+      ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN
+*
+*        Find norm1(A).
+*
+         VALUE = ZERO
+         UDIAG = LSAME( DIAG, 'U' )
+         IF( LSAME( UPLO, 'U' ) ) THEN
+            DO 110 J = 1, N
+               IF( UDIAG ) THEN
+                  SUM = ONE
+                  DO 90 I = MAX( K+2-J, 1 ), K
+                     SUM = SUM + ABS( AB( I, J ) )
+   90             CONTINUE
+               ELSE
+                  SUM = ZERO
+                  DO 100 I = MAX( K+2-J, 1 ), K + 1
+                     SUM = SUM + ABS( AB( I, J ) )
+  100             CONTINUE
+               END IF
+               VALUE = MAX( VALUE, SUM )
+  110       CONTINUE
+         ELSE
+            DO 140 J = 1, N
+               IF( UDIAG ) THEN
+                  SUM = ONE
+                  DO 120 I = 2, MIN( N+1-J, K+1 )
+                     SUM = SUM + ABS( AB( I, J ) )
+  120             CONTINUE
+               ELSE
+                  SUM = ZERO
+                  DO 130 I = 1, MIN( N+1-J, K+1 )
+                     SUM = SUM + ABS( AB( I, J ) )
+  130             CONTINUE
+               END IF
+               VALUE = MAX( VALUE, SUM )
+  140       CONTINUE
+         END IF
+      ELSE IF( LSAME( NORM, 'I' ) ) THEN
+*
+*        Find normI(A).
+*
+         VALUE = ZERO
+         IF( LSAME( UPLO, 'U' ) ) THEN
+            IF( LSAME( DIAG, 'U' ) ) THEN
+               DO 150 I = 1, N
+                  WORK( I ) = ONE
+  150          CONTINUE
+               DO 170 J = 1, N
+                  L = K + 1 - J
+                  DO 160 I = MAX( 1, J-K ), J - 1
+                     WORK( I ) = WORK( I ) + ABS( AB( L+I, J ) )
+  160             CONTINUE
+  170          CONTINUE
+            ELSE
+               DO 180 I = 1, N
+                  WORK( I ) = ZERO
+  180          CONTINUE
+               DO 200 J = 1, N
+                  L = K + 1 - J
+                  DO 190 I = MAX( 1, J-K ), J
+                     WORK( I ) = WORK( I ) + ABS( AB( L+I, J ) )
+  190             CONTINUE
+  200          CONTINUE
+            END IF
+         ELSE
+            IF( LSAME( DIAG, 'U' ) ) THEN
+               DO 210 I = 1, N
+                  WORK( I ) = ONE
+  210          CONTINUE
+               DO 230 J = 1, N
+                  L = 1 - J
+                  DO 220 I = J + 1, MIN( N, J+K )
+                     WORK( I ) = WORK( I ) + ABS( AB( L+I, J ) )
+  220             CONTINUE
+  230          CONTINUE
+            ELSE
+               DO 240 I = 1, N
+                  WORK( I ) = ZERO
+  240          CONTINUE
+               DO 260 J = 1, N
+                  L = 1 - J
+                  DO 250 I = J, MIN( N, J+K )
+                     WORK( I ) = WORK( I ) + ABS( AB( L+I, J ) )
+  250             CONTINUE
+  260          CONTINUE
+            END IF
+         END IF
+         DO 270 I = 1, N
+            VALUE = MAX( VALUE, WORK( I ) )
+  270    CONTINUE
+      ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
+*
+*        Find normF(A).
+*
+         IF( LSAME( UPLO, 'U' ) ) THEN
+            IF( LSAME( DIAG, 'U' ) ) THEN
+               SCALE = ONE
+               SUM = N
+               IF( K.GT.0 ) THEN
+                  DO 280 J = 2, N
+                     CALL DLASSQ( MIN( J-1, K ),
+     $                            AB( MAX( K+2-J, 1 ), J ), 1, SCALE,
+     $                            SUM )
+  280             CONTINUE
+               END IF
+            ELSE
+               SCALE = ZERO
+               SUM = ONE
+               DO 290 J = 1, N
+                  CALL DLASSQ( MIN( J, K+1 ), AB( MAX( K+2-J, 1 ), J ),
+     $                         1, SCALE, SUM )
+  290          CONTINUE
+            END IF
+         ELSE
+            IF( LSAME( DIAG, 'U' ) ) THEN
+               SCALE = ONE
+               SUM = N
+               IF( K.GT.0 ) THEN
+                  DO 300 J = 1, N - 1
+                     CALL DLASSQ( MIN( N-J, K ), AB( 2, J ), 1, SCALE,
+     $                            SUM )
+  300             CONTINUE
+               END IF
+            ELSE
+               SCALE = ZERO
+               SUM = ONE
+               DO 310 J = 1, N
+                  CALL DLASSQ( MIN( N-J+1, K+1 ), AB( 1, J ), 1, SCALE,
+     $                         SUM )
+  310          CONTINUE
+            END IF
+         END IF
+         VALUE = SCALE*SQRT( SUM )
+      END IF
+*
+      DLANTB = VALUE
+      RETURN
+*
+*     End of DLANTB
+*
+      END
diff --git a/costa/native/external/lapack/dlantp.f b/costa/native/external/lapack/dlantp.f
new file mode 100644
index 000000000..019dcfaa6
--- /dev/null
+++ b/costa/native/external/lapack/dlantp.f
@@ -0,0 +1,286 @@
+      DOUBLE PRECISION FUNCTION DLANTP( NORM, UPLO, DIAG, N, AP, WORK )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIAG, NORM, UPLO
+      INTEGER            N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   AP( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLANTP  returns the value of the one norm,  or the Frobenius norm, or
+*  the  infinity norm,  or the  element of  largest absolute value  of a
+*  triangular matrix A, supplied in packed form.
+*
+*  Description
+*  ===========
+*
+*  DLANTP returns the value
+*
+*     DLANTP = ( max(abs(A(i,j))), NORM = 'M' or 'm'
+*              (
+*              ( norm1(A),         NORM = '1', 'O' or 'o'
+*              (
+*              ( normI(A),         NORM = 'I' or 'i'
+*              (
+*              ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
+*
+*  where  norm1  denotes the  one norm of a matrix (maximum column sum),
+*  normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
+*  normF  denotes the  Frobenius norm of a matrix (square root of sum of
+*  squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
+*
+*  Arguments
+*  =========
+*
+*  NORM    (input) CHARACTER*1
+*          Specifies the value to be returned in DLANTP as described
+*          above.
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the matrix A is upper or lower triangular.
+*          = 'U':  Upper triangular
+*          = 'L':  Lower triangular
+*
+*  DIAG    (input) CHARACTER*1
+*          Specifies whether or not the matrix A is unit triangular.
+*          = 'N':  Non-unit triangular
+*          = 'U':  Unit triangular
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.  When N = 0, DLANTP is
+*          set to zero.
+*
+*  AP      (input) DOUBLE PRECISION array, dimension (N*(N+1)/2)
+*          The upper or lower triangular matrix A, packed columnwise in
+*          a linear array.  The j-th column of A is stored in the array
+*          AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n.
+*          Note that when DIAG = 'U', the elements of the array AP
+*          corresponding to the diagonal elements of the matrix A are
+*          not referenced, but are assumed to be one.
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (LWORK),
+*          where LWORK >= N when NORM = 'I'; otherwise, WORK is not
+*          referenced.
+*
+* =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UDIAG
+      INTEGER            I, J, K
+      DOUBLE PRECISION   SCALE, SUM, VALUE
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLASSQ
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+      IF( N.EQ.0 ) THEN
+         VALUE = ZERO
+      ELSE IF( LSAME( NORM, 'M' ) ) THEN
+*
+*        Find max(abs(A(i,j))).
+*
+         K = 1
+         IF( LSAME( DIAG, 'U' ) ) THEN
+            VALUE = ONE
+            IF( LSAME( UPLO, 'U' ) ) THEN
+               DO 20 J = 1, N
+                  DO 10 I = K, K + J - 2
+                     VALUE = MAX( VALUE, ABS( AP( I ) ) )
+   10             CONTINUE
+                  K = K + J
+   20          CONTINUE
+            ELSE
+               DO 40 J = 1, N
+                  DO 30 I = K + 1, K + N - J
+                     VALUE = MAX( VALUE, ABS( AP( I ) ) )
+   30             CONTINUE
+                  K = K + N - J + 1
+   40          CONTINUE
+            END IF
+         ELSE
+            VALUE = ZERO
+            IF( LSAME( UPLO, 'U' ) ) THEN
+               DO 60 J = 1, N
+                  DO 50 I = K, K + J - 1
+                     VALUE = MAX( VALUE, ABS( AP( I ) ) )
+   50             CONTINUE
+                  K = K + J
+   60          CONTINUE
+            ELSE
+               DO 80 J = 1, N
+                  DO 70 I = K, K + N - J
+                     VALUE = MAX( VALUE, ABS( AP( I ) ) )
+   70             CONTINUE
+                  K = K + N - J + 1
+   80          CONTINUE
+            END IF
+         END IF
+      ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN
+*
+*        Find norm1(A).
+*
+         VALUE = ZERO
+         K = 1
+         UDIAG = LSAME( DIAG, 'U' )
+         IF( LSAME( UPLO, 'U' ) ) THEN
+            DO 110 J = 1, N
+               IF( UDIAG ) THEN
+                  SUM = ONE
+                  DO 90 I = K, K + J - 2
+                     SUM = SUM + ABS( AP( I ) )
+   90             CONTINUE
+               ELSE
+                  SUM = ZERO
+                  DO 100 I = K, K + J - 1
+                     SUM = SUM + ABS( AP( I ) )
+  100             CONTINUE
+               END IF
+               K = K + J
+               VALUE = MAX( VALUE, SUM )
+  110       CONTINUE
+         ELSE
+            DO 140 J = 1, N
+               IF( UDIAG ) THEN
+                  SUM = ONE
+                  DO 120 I = K + 1, K + N - J
+                     SUM = SUM + ABS( AP( I ) )
+  120             CONTINUE
+               ELSE
+                  SUM = ZERO
+                  DO 130 I = K, K + N - J
+                     SUM = SUM + ABS( AP( I ) )
+  130             CONTINUE
+               END IF
+               K = K + N - J + 1
+               VALUE = MAX( VALUE, SUM )
+  140       CONTINUE
+         END IF
+      ELSE IF( LSAME( NORM, 'I' ) ) THEN
+*
+*        Find normI(A).
+*
+         K = 1
+         IF( LSAME( UPLO, 'U' ) ) THEN
+            IF( LSAME( DIAG, 'U' ) ) THEN
+               DO 150 I = 1, N
+                  WORK( I ) = ONE
+  150          CONTINUE
+               DO 170 J = 1, N
+                  DO 160 I = 1, J - 1
+                     WORK( I ) = WORK( I ) + ABS( AP( K ) )
+                     K = K + 1
+  160             CONTINUE
+                  K = K + 1
+  170          CONTINUE
+            ELSE
+               DO 180 I = 1, N
+                  WORK( I ) = ZERO
+  180          CONTINUE
+               DO 200 J = 1, N
+                  DO 190 I = 1, J
+                     WORK( I ) = WORK( I ) + ABS( AP( K ) )
+                     K = K + 1
+  190             CONTINUE
+  200          CONTINUE
+            END IF
+         ELSE
+            IF( LSAME( DIAG, 'U' ) ) THEN
+               DO 210 I = 1, N
+                  WORK( I ) = ONE
+  210          CONTINUE
+               DO 230 J = 1, N
+                  K = K + 1
+                  DO 220 I = J + 1, N
+                     WORK( I ) = WORK( I ) + ABS( AP( K ) )
+                     K = K + 1
+  220             CONTINUE
+  230          CONTINUE
+            ELSE
+               DO 240 I = 1, N
+                  WORK( I ) = ZERO
+  240          CONTINUE
+               DO 260 J = 1, N
+                  DO 250 I = J, N
+                     WORK( I ) = WORK( I ) + ABS( AP( K ) )
+                     K = K + 1
+  250             CONTINUE
+  260          CONTINUE
+            END IF
+         END IF
+         VALUE = ZERO
+         DO 270 I = 1, N
+            VALUE = MAX( VALUE, WORK( I ) )
+  270    CONTINUE
+      ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
+*
+*        Find normF(A).
+*
+         IF( LSAME( UPLO, 'U' ) ) THEN
+            IF( LSAME( DIAG, 'U' ) ) THEN
+               SCALE = ONE
+               SUM = N
+               K = 2
+               DO 280 J = 2, N
+                  CALL DLASSQ( J-1, AP( K ), 1, SCALE, SUM )
+                  K = K + J
+  280          CONTINUE
+            ELSE
+               SCALE = ZERO
+               SUM = ONE
+               K = 1
+               DO 290 J = 1, N
+                  CALL DLASSQ( J, AP( K ), 1, SCALE, SUM )
+                  K = K + J
+  290          CONTINUE
+            END IF
+         ELSE
+            IF( LSAME( DIAG, 'U' ) ) THEN
+               SCALE = ONE
+               SUM = N
+               K = 2
+               DO 300 J = 1, N - 1
+                  CALL DLASSQ( N-J, AP( K ), 1, SCALE, SUM )
+                  K = K + N - J + 1
+  300          CONTINUE
+            ELSE
+               SCALE = ZERO
+               SUM = ONE
+               K = 1
+               DO 310 J = 1, N
+                  CALL DLASSQ( N-J+1, AP( K ), 1, SCALE, SUM )
+                  K = K + N - J + 1
+  310          CONTINUE
+            END IF
+         END IF
+         VALUE = SCALE*SQRT( SUM )
+      END IF
+*
+      DLANTP = VALUE
+      RETURN
+*
+*     End of DLANTP
+*
+      END
diff --git a/costa/native/external/lapack/dlantr.f b/costa/native/external/lapack/dlantr.f
new file mode 100644
index 000000000..19e9b5d92
--- /dev/null
+++ b/costa/native/external/lapack/dlantr.f
@@ -0,0 +1,277 @@
+      DOUBLE PRECISION FUNCTION DLANTR( NORM, UPLO, DIAG, M, N, A, LDA,
+     $                 WORK )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIAG, NORM, UPLO
+      INTEGER            LDA, M, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLANTR  returns the value of the one norm,  or the Frobenius norm, or
+*  the  infinity norm,  or the  element of  largest absolute value  of a
+*  trapezoidal or triangular matrix A.
+*
+*  Description
+*  ===========
+*
+*  DLANTR returns the value
+*
+*     DLANTR = ( max(abs(A(i,j))), NORM = 'M' or 'm'
+*              (
+*              ( norm1(A),         NORM = '1', 'O' or 'o'
+*              (
+*              ( normI(A),         NORM = 'I' or 'i'
+*              (
+*              ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
+*
+*  where  norm1  denotes the  one norm of a matrix (maximum column sum),
+*  normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
+*  normF  denotes the  Frobenius norm of a matrix (square root of sum of
+*  squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
+*
+*  Arguments
+*  =========
+*
+*  NORM    (input) CHARACTER*1
+*          Specifies the value to be returned in DLANTR as described
+*          above.
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the matrix A is upper or lower trapezoidal.
+*          = 'U':  Upper trapezoidal
+*          = 'L':  Lower trapezoidal
+*          Note that A is triangular instead of trapezoidal if M = N.
+*
+*  DIAG    (input) CHARACTER*1
+*          Specifies whether or not the matrix A has unit diagonal.
+*          = 'N':  Non-unit diagonal
+*          = 'U':  Unit diagonal
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0, and if
+*          UPLO = 'U', M <= N.  When M = 0, DLANTR is set to zero.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0, and if
+*          UPLO = 'L', N <= M.  When N = 0, DLANTR is set to zero.
+*
+*  A       (input) DOUBLE PRECISION array, dimension (LDA,N)
+*          The trapezoidal matrix A (A is triangular if M = N).
+*          If UPLO = 'U', the leading m by n upper trapezoidal part of
+*          the array A contains the upper trapezoidal matrix, and the
+*          strictly lower triangular part of A is not referenced.
+*          If UPLO = 'L', the leading m by n lower trapezoidal part of
+*          the array A contains the lower trapezoidal matrix, and the
+*          strictly upper triangular part of A is not referenced.  Note
+*          that when DIAG = 'U', the diagonal elements of A are not
+*          referenced and are assumed to be one.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(M,1).
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (LWORK),
+*          where LWORK >= M when NORM = 'I'; otherwise, WORK is not
+*          referenced.
+*
+* =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UDIAG
+      INTEGER            I, J
+      DOUBLE PRECISION   SCALE, SUM, VALUE
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLASSQ
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+      IF( MIN( M, N ).EQ.0 ) THEN
+         VALUE = ZERO
+      ELSE IF( LSAME( NORM, 'M' ) ) THEN
+*
+*        Find max(abs(A(i,j))).
+*
+         IF( LSAME( DIAG, 'U' ) ) THEN
+            VALUE = ONE
+            IF( LSAME( UPLO, 'U' ) ) THEN
+               DO 20 J = 1, N
+                  DO 10 I = 1, MIN( M, J-1 )
+                     VALUE = MAX( VALUE, ABS( A( I, J ) ) )
+   10             CONTINUE
+   20          CONTINUE
+            ELSE
+               DO 40 J = 1, N
+                  DO 30 I = J + 1, M
+                     VALUE = MAX( VALUE, ABS( A( I, J ) ) )
+   30             CONTINUE
+   40          CONTINUE
+            END IF
+         ELSE
+            VALUE = ZERO
+            IF( LSAME( UPLO, 'U' ) ) THEN
+               DO 60 J = 1, N
+                  DO 50 I = 1, MIN( M, J )
+                     VALUE = MAX( VALUE, ABS( A( I, J ) ) )
+   50             CONTINUE
+   60          CONTINUE
+            ELSE
+               DO 80 J = 1, N
+                  DO 70 I = J, M
+                     VALUE = MAX( VALUE, ABS( A( I, J ) ) )
+   70             CONTINUE
+   80          CONTINUE
+            END IF
+         END IF
+      ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN
+*
+*        Find norm1(A).
+*
+         VALUE = ZERO
+         UDIAG = LSAME( DIAG, 'U' )
+         IF( LSAME( UPLO, 'U' ) ) THEN
+            DO 110 J = 1, N
+               IF( ( UDIAG ) .AND. ( J.LE.M ) ) THEN
+                  SUM = ONE
+                  DO 90 I = 1, J - 1
+                     SUM = SUM + ABS( A( I, J ) )
+   90             CONTINUE
+               ELSE
+                  SUM = ZERO
+                  DO 100 I = 1, MIN( M, J )
+                     SUM = SUM + ABS( A( I, J ) )
+  100             CONTINUE
+               END IF
+               VALUE = MAX( VALUE, SUM )
+  110       CONTINUE
+         ELSE
+            DO 140 J = 1, N
+               IF( UDIAG ) THEN
+                  SUM = ONE
+                  DO 120 I = J + 1, M
+                     SUM = SUM + ABS( A( I, J ) )
+  120             CONTINUE
+               ELSE
+                  SUM = ZERO
+                  DO 130 I = J, M
+                     SUM = SUM + ABS( A( I, J ) )
+  130             CONTINUE
+               END IF
+               VALUE = MAX( VALUE, SUM )
+  140       CONTINUE
+         END IF
+      ELSE IF( LSAME( NORM, 'I' ) ) THEN
+*
+*        Find normI(A).
+*
+         IF( LSAME( UPLO, 'U' ) ) THEN
+            IF( LSAME( DIAG, 'U' ) ) THEN
+               DO 150 I = 1, M
+                  WORK( I ) = ONE
+  150          CONTINUE
+               DO 170 J = 1, N
+                  DO 160 I = 1, MIN( M, J-1 )
+                     WORK( I ) = WORK( I ) + ABS( A( I, J ) )
+  160             CONTINUE
+  170          CONTINUE
+            ELSE
+               DO 180 I = 1, M
+                  WORK( I ) = ZERO
+  180          CONTINUE
+               DO 200 J = 1, N
+                  DO 190 I = 1, MIN( M, J )
+                     WORK( I ) = WORK( I ) + ABS( A( I, J ) )
+  190             CONTINUE
+  200          CONTINUE
+            END IF
+         ELSE
+            IF( LSAME( DIAG, 'U' ) ) THEN
+               DO 210 I = 1, N
+                  WORK( I ) = ONE
+  210          CONTINUE
+               DO 220 I = N + 1, M
+                  WORK( I ) = ZERO
+  220          CONTINUE
+               DO 240 J = 1, N
+                  DO 230 I = J + 1, M
+                     WORK( I ) = WORK( I ) + ABS( A( I, J ) )
+  230             CONTINUE
+  240          CONTINUE
+            ELSE
+               DO 250 I = 1, M
+                  WORK( I ) = ZERO
+  250          CONTINUE
+               DO 270 J = 1, N
+                  DO 260 I = J, M
+                     WORK( I ) = WORK( I ) + ABS( A( I, J ) )
+  260             CONTINUE
+  270          CONTINUE
+            END IF
+         END IF
+         VALUE = ZERO
+         DO 280 I = 1, M
+            VALUE = MAX( VALUE, WORK( I ) )
+  280    CONTINUE
+      ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
+*
+*        Find normF(A).
+*
+         IF( LSAME( UPLO, 'U' ) ) THEN
+            IF( LSAME( DIAG, 'U' ) ) THEN
+               SCALE = ONE
+               SUM = MIN( M, N )
+               DO 290 J = 2, N
+                  CALL DLASSQ( MIN( M, J-1 ), A( 1, J ), 1, SCALE, SUM )
+  290          CONTINUE
+            ELSE
+               SCALE = ZERO
+               SUM = ONE
+               DO 300 J = 1, N
+                  CALL DLASSQ( MIN( M, J ), A( 1, J ), 1, SCALE, SUM )
+  300          CONTINUE
+            END IF
+         ELSE
+            IF( LSAME( DIAG, 'U' ) ) THEN
+               SCALE = ONE
+               SUM = MIN( M, N )
+               DO 310 J = 1, N
+                  CALL DLASSQ( M-J, A( MIN( M, J+1 ), J ), 1, SCALE,
+     $                         SUM )
+  310          CONTINUE
+            ELSE
+               SCALE = ZERO
+               SUM = ONE
+               DO 320 J = 1, N
+                  CALL DLASSQ( M-J+1, A( J, J ), 1, SCALE, SUM )
+  320          CONTINUE
+            END IF
+         END IF
+         VALUE = SCALE*SQRT( SUM )
+      END IF
+*
+      DLANTR = VALUE
+      RETURN
+*
+*     End of DLANTR
+*
+      END
diff --git a/costa/native/external/lapack/dlanv2.f b/costa/native/external/lapack/dlanv2.f
new file mode 100644
index 000000000..ccc6ef62a
--- /dev/null
+++ b/costa/native/external/lapack/dlanv2.f
@@ -0,0 +1,206 @@
+      SUBROUTINE DLANV2( A, B, C, D, RT1R, RT1I, RT2R, RT2I, CS, SN )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      DOUBLE PRECISION   A, B, C, CS, D, RT1I, RT1R, RT2I, RT2R, SN
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLANV2 computes the Schur factorization of a real 2-by-2 nonsymmetric
+*  matrix in standard form:
+*
+*       [ A  B ] = [ CS -SN ] [ AA  BB ] [ CS  SN ]
+*       [ C  D ]   [ SN  CS ] [ CC  DD ] [-SN  CS ]
+*
+*  where either
+*  1) CC = 0 so that AA and DD are real eigenvalues of the matrix, or
+*  2) AA = DD and BB*CC < 0, so that AA + or - sqrt(BB*CC) are complex
+*  conjugate eigenvalues.
+*
+*  Arguments
+*  =========
+*
+*  A       (input/output) DOUBLE PRECISION
+*  B       (input/output) DOUBLE PRECISION
+*  C       (input/output) DOUBLE PRECISION
+*  D       (input/output) DOUBLE PRECISION
+*          On entry, the elements of the input matrix.
+*          On exit, they are overwritten by the elements of the
+*          standardised Schur form.
+*
+*  RT1R    (output) DOUBLE PRECISION
+*  RT1I    (output) DOUBLE PRECISION
+*  RT2R    (output) DOUBLE PRECISION
+*  RT2I    (output) DOUBLE PRECISION
+*          The real and imaginary parts of the eigenvalues. If the
+*          eigenvalues are a complex conjugate pair, RT1I > 0.
+*
+*  CS      (output) DOUBLE PRECISION
+*  SN      (output) DOUBLE PRECISION
+*          Parameters of the rotation matrix.
+*
+*  Further Details
+*  ===============
+*
+*  Modified by V. Sima, Research Institute for Informatics, Bucharest,
+*  Romania, to reduce the risk of cancellation errors,
+*  when computing real eigenvalues, and to ensure, if possible, that
+*  abs(RT1R) >= abs(RT2R).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, HALF, ONE
+      PARAMETER          ( ZERO = 0.0D+0, HALF = 0.5D+0, ONE = 1.0D+0 )
+      DOUBLE PRECISION   MULTPL
+      PARAMETER          ( MULTPL = 4.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      DOUBLE PRECISION   AA, BB, BCMAX, BCMIS, CC, CS1, DD, EPS, P, SAB,
+     $                   SAC, SCALE, SIGMA, SN1, TAU, TEMP, Z
+*     ..
+*     .. External Functions ..
+      DOUBLE PRECISION   DLAMCH, DLAPY2
+      EXTERNAL           DLAMCH, DLAPY2
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN, SIGN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+      EPS = DLAMCH( 'P' )
+      IF( C.EQ.ZERO ) THEN
+         CS = ONE
+         SN = ZERO
+         GO TO 10
+*
+      ELSE IF( B.EQ.ZERO ) THEN
+*
+*        Swap rows and columns
+*
+         CS = ZERO
+         SN = ONE
+         TEMP = D
+         D = A
+         A = TEMP
+         B = -C
+         C = ZERO
+         GO TO 10
+      ELSE IF( ( A-D ).EQ.ZERO .AND. SIGN( ONE, B ).NE.SIGN( ONE, C ) )
+     $          THEN
+         CS = ONE
+         SN = ZERO
+         GO TO 10
+      ELSE
+*
+         TEMP = A - D
+         P = HALF*TEMP
+         BCMAX = MAX( ABS( B ), ABS( C ) )
+         BCMIS = MIN( ABS( B ), ABS( C ) )*SIGN( ONE, B )*SIGN( ONE, C )
+         SCALE = MAX( ABS( P ), BCMAX )
+         Z = ( P / SCALE )*P + ( BCMAX / SCALE )*BCMIS
+*
+*        If Z is of the order of the machine accuracy, postpone the
+*        decision on the nature of eigenvalues
+*
+         IF( Z.GE.MULTPL*EPS ) THEN
+*
+*           Real eigenvalues. Compute A and D.
+*
+            Z = P + SIGN( SQRT( SCALE )*SQRT( Z ), P )
+            A = D + Z
+            D = D - ( BCMAX / Z )*BCMIS
+*
+*           Compute B and the rotation matrix
+*
+            TAU = DLAPY2( C, Z )
+            CS = Z / TAU
+            SN = C / TAU
+            B = B - C
+            C = ZERO
+         ELSE
+*
+*           Complex eigenvalues, or real (almost) equal eigenvalues.
+*           Make diagonal elements equal.
+*
+            SIGMA = B + C
+            TAU = DLAPY2( SIGMA, TEMP )
+            CS = SQRT( HALF*( ONE+ABS( SIGMA ) / TAU ) )
+            SN = -( P / ( TAU*CS ) )*SIGN( ONE, SIGMA )
+*
+*           Compute [ AA  BB ] = [ A  B ] [ CS -SN ]
+*                   [ CC  DD ]   [ C  D ] [ SN  CS ]
+*
+            AA = A*CS + B*SN
+            BB = -A*SN + B*CS
+            CC = C*CS + D*SN
+            DD = -C*SN + D*CS
+*
+*           Compute [ A  B ] = [ CS  SN ] [ AA  BB ]
+*                   [ C  D ]   [-SN  CS ] [ CC  DD ]
+*
+            A = AA*CS + CC*SN
+            B = BB*CS + DD*SN
+            C = -AA*SN + CC*CS
+            D = -BB*SN + DD*CS
+*
+            TEMP = HALF*( A+D )
+            A = TEMP
+            D = TEMP
+*
+            IF( C.NE.ZERO ) THEN
+               IF( B.NE.ZERO ) THEN
+                  IF( SIGN( ONE, B ).EQ.SIGN( ONE, C ) ) THEN
+*
+*                    Real eigenvalues: reduce to upper triangular form
+*
+                     SAB = SQRT( ABS( B ) )
+                     SAC = SQRT( ABS( C ) )
+                     P = SIGN( SAB*SAC, C )
+                     TAU = ONE / SQRT( ABS( B+C ) )
+                     A = TEMP + P
+                     D = TEMP - P
+                     B = B - C
+                     C = ZERO
+                     CS1 = SAB*TAU
+                     SN1 = SAC*TAU
+                     TEMP = CS*CS1 - SN*SN1
+                     SN = CS*SN1 + SN*CS1
+                     CS = TEMP
+                  END IF
+               ELSE
+                  B = -C
+                  C = ZERO
+                  TEMP = CS
+                  CS = -SN
+                  SN = TEMP
+               END IF
+            END IF
+         END IF
+*
+      END IF
+*
+   10 CONTINUE
+*
+*     Store eigenvalues in (RT1R,RT1I) and (RT2R,RT2I).
+*
+      RT1R = A
+      RT2R = D
+      IF( C.EQ.ZERO ) THEN
+         RT1I = ZERO
+         RT2I = ZERO
+      ELSE
+         RT1I = SQRT( ABS( B ) )*SQRT( ABS( C ) )
+         RT2I = -RT1I
+      END IF
+      RETURN
+*
+*     End of DLANV2
+*
+      END
diff --git a/costa/native/external/lapack/dlapll.f b/costa/native/external/lapack/dlapll.f
new file mode 100644
index 000000000..9ae679831
--- /dev/null
+++ b/costa/native/external/lapack/dlapll.f
@@ -0,0 +1,100 @@
+      SUBROUTINE DLAPLL( N, X, INCX, Y, INCY, SSMIN )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993
+*
+*     .. Scalar Arguments ..
+      INTEGER            INCX, INCY, N
+      DOUBLE PRECISION   SSMIN
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   X( * ), Y( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  Given two column vectors X and Y, let
+*
+*                       A = ( X Y ).
+*
+*  The subroutine first computes the QR factorization of A = Q*R,
+*  and then computes the SVD of the 2-by-2 upper triangular matrix R.
+*  The smaller singular value of R is returned in SSMIN, which is used
+*  as the measurement of the linear dependency of the vectors X and Y.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The length of the vectors X and Y.
+*
+*  X       (input/output) DOUBLE PRECISION array,
+*                         dimension (1+(N-1)*INCX)
+*          On entry, X contains the N-vector X.
+*          On exit, X is overwritten.
+*
+*  INCX    (input) INTEGER
+*          The increment between successive elements of X. INCX > 0.
+*
+*  Y       (input/output) DOUBLE PRECISION array,
+*                         dimension (1+(N-1)*INCY)
+*          On entry, Y contains the N-vector Y.
+*          On exit, Y is overwritten.
+*
+*  INCY    (input) INTEGER
+*          The increment between successive elements of Y. INCY > 0.
+*
+*  SSMIN   (output) DOUBLE PRECISION
+*          The smallest singular value of the N-by-2 matrix A = ( X Y ).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      DOUBLE PRECISION   A11, A12, A22, C, SSMAX, TAU
+*     ..
+*     .. External Functions ..
+      DOUBLE PRECISION   DDOT
+      EXTERNAL           DDOT
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DAXPY, DLARFG, DLAS2
+*     ..
+*     .. Executable Statements ..
+*
+*     Quick return if possible
+*
+      IF( N.LE.1 ) THEN
+         SSMIN = ZERO
+         RETURN
+      END IF
+*
+*     Compute the QR factorization of the N-by-2 matrix ( X Y )
+*
+      CALL DLARFG( N, X( 1 ), X( 1+INCX ), INCX, TAU )
+      A11 = X( 1 )
+      X( 1 ) = ONE
+*
+      C = -TAU*DDOT( N, X, INCX, Y, INCY )
+      CALL DAXPY( N, C, X, INCX, Y, INCY )
+*
+      CALL DLARFG( N-1, Y( 1+INCY ), Y( 1+2*INCY ), INCY, TAU )
+*
+      A12 = Y( 1 )
+      A22 = Y( 1+INCY )
+*
+*     Compute the SVD of 2-by-2 Upper triangular matrix.
+*
+      CALL DLAS2( A11, A12, A22, SSMIN, SSMAX )
+*
+      RETURN
+*
+*     End of DLAPLL
+*
+      END
diff --git a/costa/native/external/lapack/dlapmt.f b/costa/native/external/lapack/dlapmt.f
new file mode 100644
index 000000000..80b55c35d
--- /dev/null
+++ b/costa/native/external/lapack/dlapmt.f
@@ -0,0 +1,135 @@
+      SUBROUTINE DLAPMT( FORWRD, M, N, X, LDX, K )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993
+*
+*     .. Scalar Arguments ..
+      LOGICAL            FORWRD
+      INTEGER            LDX, M, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            K( * )
+      DOUBLE PRECISION   X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLAPMT rearranges the columns of the M by N matrix X as specified
+*  by the permutation K(1),K(2),...,K(N) of the integers 1,...,N.
+*  If FORWRD = .TRUE.,  forward permutation:
+*
+*       X(*,K(J)) is moved X(*,J) for J = 1,2,...,N.
+*
+*  If FORWRD = .FALSE., backward permutation:
+*
+*       X(*,J) is moved to X(*,K(J)) for J = 1,2,...,N.
+*
+*  Arguments
+*  =========
+*
+*  FORWRD  (input) LOGICAL
+*          = .TRUE., forward permutation
+*          = .FALSE., backward permutation
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix X. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix X. N >= 0.
+*
+*  X       (input/output) DOUBLE PRECISION array, dimension (LDX,N)
+*          On entry, the M by N matrix X.
+*          On exit, X contains the permuted matrix X.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X, LDX >= MAX(1,M).
+*
+*  K       (input) INTEGER array, dimension (N)
+*          On entry, K contains the permutation vector.
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      INTEGER            I, II, IN, J
+      DOUBLE PRECISION   TEMP
+*     ..
+*     .. Executable Statements ..
+*
+      IF( N.LE.1 )
+     $   RETURN
+*
+      DO 10 I = 1, N
+         K( I ) = -K( I )
+   10 CONTINUE
+*
+      IF( FORWRD ) THEN
+*
+*        Forward permutation
+*
+         DO 50 I = 1, N
+*
+            IF( K( I ).GT.0 )
+     $         GO TO 40
+*
+            J = I
+            K( J ) = -K( J )
+            IN = K( J )
+*
+   20       CONTINUE
+            IF( K( IN ).GT.0 )
+     $         GO TO 40
+*
+            DO 30 II = 1, M
+               TEMP = X( II, J )
+               X( II, J ) = X( II, IN )
+               X( II, IN ) = TEMP
+   30       CONTINUE
+*
+            K( IN ) = -K( IN )
+            J = IN
+            IN = K( IN )
+            GO TO 20
+*
+   40       CONTINUE
+*
+   50    CONTINUE
+*
+      ELSE
+*
+*        Backward permutation
+*
+         DO 90 I = 1, N
+*
+            IF( K( I ).GT.0 )
+     $         GO TO 80
+*
+            K( I ) = -K( I )
+            J = K( I )
+   60       CONTINUE
+            IF( J.EQ.I )
+     $         GO TO 80
+*
+            DO 70 II = 1, M
+               TEMP = X( II, I )
+               X( II, I ) = X( II, J )
+               X( II, J ) = TEMP
+   70       CONTINUE
+*
+            K( J ) = -K( J )
+            J = K( J )
+            GO TO 60
+*
+   80       CONTINUE
+*
+   90    CONTINUE
+*
+      END IF
+*
+      RETURN
+*
+*     End of DLAPMT
+*
+      END
diff --git a/costa/native/external/lapack/dlapy2.f b/costa/native/external/lapack/dlapy2.f
new file mode 100644
index 000000000..f3d732da4
--- /dev/null
+++ b/costa/native/external/lapack/dlapy2.f
@@ -0,0 +1,54 @@
+      DOUBLE PRECISION FUNCTION DLAPY2( X, Y )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      DOUBLE PRECISION   X, Y
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLAPY2 returns sqrt(x**2+y**2), taking care not to cause unnecessary
+*  overflow.
+*
+*  Arguments
+*  =========
+*
+*  X       (input) DOUBLE PRECISION
+*  Y       (input) DOUBLE PRECISION
+*          X and Y specify the values x and y.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO
+      PARAMETER          ( ZERO = 0.0D0 )
+      DOUBLE PRECISION   ONE
+      PARAMETER          ( ONE = 1.0D0 )
+*     ..
+*     .. Local Scalars ..
+      DOUBLE PRECISION   W, XABS, YABS, Z
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+      XABS = ABS( X )
+      YABS = ABS( Y )
+      W = MAX( XABS, YABS )
+      Z = MIN( XABS, YABS )
+      IF( Z.EQ.ZERO ) THEN
+         DLAPY2 = W
+      ELSE
+         DLAPY2 = W*SQRT( ONE+( Z / W )**2 )
+      END IF
+      RETURN
+*
+*     End of DLAPY2
+*
+      END
diff --git a/costa/native/external/lapack/dlapy3.f b/costa/native/external/lapack/dlapy3.f
new file mode 100644
index 000000000..8c1483263
--- /dev/null
+++ b/costa/native/external/lapack/dlapy3.f
@@ -0,0 +1,54 @@
+      DOUBLE PRECISION FUNCTION DLAPY3( X, Y, Z )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      DOUBLE PRECISION   X, Y, Z
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLAPY3 returns sqrt(x**2+y**2+z**2), taking care not to cause
+*  unnecessary overflow.
+*
+*  Arguments
+*  =========
+*
+*  X       (input) DOUBLE PRECISION
+*  Y       (input) DOUBLE PRECISION
+*  Z       (input) DOUBLE PRECISION
+*          X, Y and Z specify the values x, y and z.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO
+      PARAMETER          ( ZERO = 0.0D0 )
+*     ..
+*     .. Local Scalars ..
+      DOUBLE PRECISION   W, XABS, YABS, ZABS
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+      XABS = ABS( X )
+      YABS = ABS( Y )
+      ZABS = ABS( Z )
+      W = MAX( XABS, YABS, ZABS )
+      IF( W.EQ.ZERO ) THEN
+         DLAPY3 = ZERO
+      ELSE
+         DLAPY3 = W*SQRT( ( XABS / W )**2+( YABS / W )**2+
+     $            ( ZABS / W )**2 )
+      END IF
+      RETURN
+*
+*     End of DLAPY3
+*
+      END
diff --git a/costa/native/external/lapack/dlaqgb.f b/costa/native/external/lapack/dlaqgb.f
new file mode 100644
index 000000000..2f47b456c
--- /dev/null
+++ b/costa/native/external/lapack/dlaqgb.f
@@ -0,0 +1,169 @@
+      SUBROUTINE DLAQGB( M, N, KL, KU, AB, LDAB, R, C, ROWCND, COLCND,
+     $                   AMAX, EQUED )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          EQUED
+      INTEGER            KL, KU, LDAB, M, N
+      DOUBLE PRECISION   AMAX, COLCND, ROWCND
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   AB( LDAB, * ), C( * ), R( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLAQGB equilibrates a general M by N band matrix A with KL
+*  subdiagonals and KU superdiagonals using the row and scaling factors
+*  in the vectors R and C.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  KL      (input) INTEGER
+*          The number of subdiagonals within the band of A.  KL >= 0.
+*
+*  KU      (input) INTEGER
+*          The number of superdiagonals within the band of A.  KU >= 0.
+*
+*  AB      (input/output) DOUBLE PRECISION array, dimension (LDAB,N)
+*          On entry, the matrix A in band storage, in rows 1 to KL+KU+1.
+*          The j-th column of A is stored in the j-th column of the
+*          array AB as follows:
+*          AB(ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl)
+*
+*          On exit, the equilibrated matrix, in the same storage format
+*          as A.  See EQUED for the form of the equilibrated matrix.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDA >= KL+KU+1.
+*
+*  R       (output) DOUBLE PRECISION array, dimension (M)
+*          The row scale factors for A.
+*
+*  C       (output) DOUBLE PRECISION array, dimension (N)
+*          The column scale factors for A.
+*
+*  ROWCND  (output) DOUBLE PRECISION
+*          Ratio of the smallest R(i) to the largest R(i).
+*
+*  COLCND  (output) DOUBLE PRECISION
+*          Ratio of the smallest C(i) to the largest C(i).
+*
+*  AMAX    (input) DOUBLE PRECISION
+*          Absolute value of largest matrix entry.
+*
+*  EQUED   (output) CHARACTER*1
+*          Specifies the form of equilibration that was done.
+*          = 'N':  No equilibration
+*          = 'R':  Row equilibration, i.e., A has been premultiplied by
+*                  diag(R).
+*          = 'C':  Column equilibration, i.e., A has been postmultiplied
+*                  by diag(C).
+*          = 'B':  Both row and column equilibration, i.e., A has been
+*                  replaced by diag(R) * A * diag(C).
+*
+*  Internal Parameters
+*  ===================
+*
+*  THRESH is a threshold value used to decide if row or column scaling
+*  should be done based on the ratio of the row or column scaling
+*  factors.  If ROWCND < THRESH, row scaling is done, and if
+*  COLCND < THRESH, column scaling is done.
+*
+*  LARGE and SMALL are threshold values used to decide if row scaling
+*  should be done based on the absolute size of the largest matrix
+*  element.  If AMAX > LARGE or AMAX < SMALL, row scaling is done.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, THRESH
+      PARAMETER          ( ONE = 1.0D+0, THRESH = 0.1D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J
+      DOUBLE PRECISION   CJ, LARGE, SMALL
+*     ..
+*     .. External Functions ..
+      DOUBLE PRECISION   DLAMCH
+      EXTERNAL           DLAMCH
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Quick return if possible
+*
+      IF( M.LE.0 .OR. N.LE.0 ) THEN
+         EQUED = 'N'
+         RETURN
+      END IF
+*
+*     Initialize LARGE and SMALL.
+*
+      SMALL = DLAMCH( 'Safe minimum' ) / DLAMCH( 'Precision' )
+      LARGE = ONE / SMALL
+*
+      IF( ROWCND.GE.THRESH .AND. AMAX.GE.SMALL .AND. AMAX.LE.LARGE )
+     $     THEN
+*
+*        No row scaling
+*
+         IF( COLCND.GE.THRESH ) THEN
+*
+*           No column scaling
+*
+            EQUED = 'N'
+         ELSE
+*
+*           Column scaling
+*
+            DO 20 J = 1, N
+               CJ = C( J )
+               DO 10 I = MAX( 1, J-KU ), MIN( M, J+KL )
+                  AB( KU+1+I-J, J ) = CJ*AB( KU+1+I-J, J )
+   10          CONTINUE
+   20       CONTINUE
+            EQUED = 'C'
+         END IF
+      ELSE IF( COLCND.GE.THRESH ) THEN
+*
+*        Row scaling, no column scaling
+*
+         DO 40 J = 1, N
+            DO 30 I = MAX( 1, J-KU ), MIN( M, J+KL )
+               AB( KU+1+I-J, J ) = R( I )*AB( KU+1+I-J, J )
+   30       CONTINUE
+   40    CONTINUE
+         EQUED = 'R'
+      ELSE
+*
+*        Row and column scaling
+*
+         DO 60 J = 1, N
+            CJ = C( J )
+            DO 50 I = MAX( 1, J-KU ), MIN( M, J+KL )
+               AB( KU+1+I-J, J ) = CJ*R( I )*AB( KU+1+I-J, J )
+   50       CONTINUE
+   60    CONTINUE
+         EQUED = 'B'
+      END IF
+*
+      RETURN
+*
+*     End of DLAQGB
+*
+      END
diff --git a/costa/native/external/lapack/dlaqge.f b/costa/native/external/lapack/dlaqge.f
new file mode 100644
index 000000000..d93956fcf
--- /dev/null
+++ b/costa/native/external/lapack/dlaqge.f
@@ -0,0 +1,155 @@
+      SUBROUTINE DLAQGE( M, N, A, LDA, R, C, ROWCND, COLCND, AMAX,
+     $                   EQUED )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          EQUED
+      INTEGER            LDA, M, N
+      DOUBLE PRECISION   AMAX, COLCND, ROWCND
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), C( * ), R( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLAQGE equilibrates a general M by N matrix A using the row and
+*  scaling factors in the vectors R and C.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+*          On entry, the M by N matrix A.
+*          On exit, the equilibrated matrix.  See EQUED for the form of
+*          the equilibrated matrix.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(M,1).
+*
+*  R       (input) DOUBLE PRECISION array, dimension (M)
+*          The row scale factors for A.
+*
+*  C       (input) DOUBLE PRECISION array, dimension (N)
+*          The column scale factors for A.
+*
+*  ROWCND  (input) DOUBLE PRECISION
+*          Ratio of the smallest R(i) to the largest R(i).
+*
+*  COLCND  (input) DOUBLE PRECISION
+*          Ratio of the smallest C(i) to the largest C(i).
+*
+*  AMAX    (input) DOUBLE PRECISION
+*          Absolute value of largest matrix entry.
+*
+*  EQUED   (output) CHARACTER*1
+*          Specifies the form of equilibration that was done.
+*          = 'N':  No equilibration
+*          = 'R':  Row equilibration, i.e., A has been premultiplied by
+*                  diag(R).
+*          = 'C':  Column equilibration, i.e., A has been postmultiplied
+*                  by diag(C).
+*          = 'B':  Both row and column equilibration, i.e., A has been
+*                  replaced by diag(R) * A * diag(C).
+*
+*  Internal Parameters
+*  ===================
+*
+*  THRESH is a threshold value used to decide if row or column scaling
+*  should be done based on the ratio of the row or column scaling
+*  factors.  If ROWCND < THRESH, row scaling is done, and if
+*  COLCND < THRESH, column scaling is done.
+*
+*  LARGE and SMALL are threshold values used to decide if row scaling
+*  should be done based on the absolute size of the largest matrix
+*  element.  If AMAX > LARGE or AMAX < SMALL, row scaling is done.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, THRESH
+      PARAMETER          ( ONE = 1.0D+0, THRESH = 0.1D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J
+      DOUBLE PRECISION   CJ, LARGE, SMALL
+*     ..
+*     .. External Functions ..
+      DOUBLE PRECISION   DLAMCH
+      EXTERNAL           DLAMCH
+*     ..
+*     .. Executable Statements ..
+*
+*     Quick return if possible
+*
+      IF( M.LE.0 .OR. N.LE.0 ) THEN
+         EQUED = 'N'
+         RETURN
+      END IF
+*
+*     Initialize LARGE and SMALL.
+*
+      SMALL = DLAMCH( 'Safe minimum' ) / DLAMCH( 'Precision' )
+      LARGE = ONE / SMALL
+*
+      IF( ROWCND.GE.THRESH .AND. AMAX.GE.SMALL .AND. AMAX.LE.LARGE )
+     $     THEN
+*
+*        No row scaling
+*
+         IF( COLCND.GE.THRESH ) THEN
+*
+*           No column scaling
+*
+            EQUED = 'N'
+         ELSE
+*
+*           Column scaling
+*
+            DO 20 J = 1, N
+               CJ = C( J )
+               DO 10 I = 1, M
+                  A( I, J ) = CJ*A( I, J )
+   10          CONTINUE
+   20       CONTINUE
+            EQUED = 'C'
+         END IF
+      ELSE IF( COLCND.GE.THRESH ) THEN
+*
+*        Row scaling, no column scaling
+*
+         DO 40 J = 1, N
+            DO 30 I = 1, M
+               A( I, J ) = R( I )*A( I, J )
+   30       CONTINUE
+   40    CONTINUE
+         EQUED = 'R'
+      ELSE
+*
+*        Row and column scaling
+*
+         DO 60 J = 1, N
+            CJ = C( J )
+            DO 50 I = 1, M
+               A( I, J ) = CJ*R( I )*A( I, J )
+   50       CONTINUE
+   60    CONTINUE
+         EQUED = 'B'
+      END IF
+*
+      RETURN
+*
+*     End of DLAQGE
+*
+      END
diff --git a/costa/native/external/lapack/dlaqp2.f b/costa/native/external/lapack/dlaqp2.f
new file mode 100644
index 000000000..720b740dc
--- /dev/null
+++ b/costa/native/external/lapack/dlaqp2.f
@@ -0,0 +1,166 @@
+      SUBROUTINE DLAQP2( M, N, OFFSET, A, LDA, JPVT, TAU, VN1, VN2,
+     $                   WORK )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            LDA, M, N, OFFSET
+*     ..
+*     .. Array Arguments ..
+      INTEGER            JPVT( * )
+      DOUBLE PRECISION   A( LDA, * ), TAU( * ), VN1( * ), VN2( * ),
+     $                   WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLAQP2 computes a QR factorization with column pivoting of
+*  the block A(OFFSET+1:M,1:N).
+*  The block A(1:OFFSET,1:N) is accordingly pivoted, but not factorized.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A. N >= 0.
+*
+*  OFFSET  (input) INTEGER
+*          The number of rows of the matrix A that must be pivoted
+*          but no factorized. OFFSET >= 0.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+*          On entry, the M-by-N matrix A.
+*          On exit, the upper triangle of block A(OFFSET+1:M,1:N) is
+*          the triangular factor obtained; the elements in block
+*          A(OFFSET+1:M,1:N) below the diagonal, together with the
+*          array TAU, represent the orthogonal matrix Q as a product of
+*          elementary reflectors. Block A(1:OFFSET,1:N) has been
+*          accordingly pivoted, but no factorized.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,M).
+*
+*  JPVT    (input/output) INTEGER array, dimension (N)
+*          On entry, if JPVT(i) .ne. 0, the i-th column of A is permuted
+*          to the front of A*P (a leading column); if JPVT(i) = 0,
+*          the i-th column of A is a free column.
+*          On exit, if JPVT(i) = k, then the i-th column of A*P
+*          was the k-th column of A.
+*
+*  TAU     (output) DOUBLE PRECISION array, dimension (min(M,N))
+*          The scalar factors of the elementary reflectors.
+*
+*  VN1     (input/output) DOUBLE PRECISION array, dimension (N)
+*          The vector with the partial column norms.
+*
+*  VN2     (input/output) DOUBLE PRECISION array, dimension (N)
+*          The vector with the exact column norms.
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (N)
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*    G. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain
+*    X. Sun, Computer Science Dept., Duke University, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, ITEMP, J, MN, OFFPI, PVT
+      DOUBLE PRECISION   AII, TEMP, TEMP2
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLARF, DLARFG, DSWAP
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN, SQRT
+*     ..
+*     .. External Functions ..
+      INTEGER            IDAMAX
+      DOUBLE PRECISION   DNRM2
+      EXTERNAL           IDAMAX, DNRM2
+*     ..
+*     .. Executable Statements ..
+*
+      MN = MIN( M-OFFSET, N )
+*
+*     Compute factorization.
+*
+      DO 20 I = 1, MN
+*
+         OFFPI = OFFSET + I
+*
+*        Determine ith pivot column and swap if necessary.
+*
+         PVT = ( I-1 ) + IDAMAX( N-I+1, VN1( I ), 1 )
+*
+         IF( PVT.NE.I ) THEN
+            CALL DSWAP( M, A( 1, PVT ), 1, A( 1, I ), 1 )
+            ITEMP = JPVT( PVT )
+            JPVT( PVT ) = JPVT( I )
+            JPVT( I ) = ITEMP
+            VN1( PVT ) = VN1( I )
+            VN2( PVT ) = VN2( I )
+         END IF
+*
+*        Generate elementary reflector H(i).
+*
+         IF( OFFPI.LT.M ) THEN
+            CALL DLARFG( M-OFFPI+1, A( OFFPI, I ), A( OFFPI+1, I ), 1,
+     $                   TAU( I ) )
+         ELSE
+            CALL DLARFG( 1, A( M, I ), A( M, I ), 1, TAU( I ) )
+         END IF
+*
+         IF( I.LT.N ) THEN
+*
+*           Apply H(i)' to A(offset+i:m,i+1:n) from the left.
+*
+            AII = A( OFFPI, I )
+            A( OFFPI, I ) = ONE
+            CALL DLARF( 'Left', M-OFFPI+1, N-I, A( OFFPI, I ), 1,
+     $                  TAU( I ), A( OFFPI, I+1 ), LDA, WORK( 1 ) )
+            A( OFFPI, I ) = AII
+         END IF
+*
+*        Update partial column norms.
+*
+         DO 10 J = I + 1, N
+            IF( VN1( J ).NE.ZERO ) THEN
+               TEMP = ONE - ( ABS( A( OFFPI, J ) ) / VN1( J ) )**2
+               TEMP = MAX( TEMP, ZERO )
+               TEMP2 = ONE + 0.05D0*TEMP*( VN1( J ) / VN2( J ) )**2
+               IF( TEMP2.EQ.ONE ) THEN
+                  IF( OFFPI.LT.M ) THEN
+                     VN1( J ) = DNRM2( M-OFFPI, A( OFFPI+1, J ), 1 )
+                     VN2( J ) = VN1( J )
+                  ELSE
+                     VN1( J ) = ZERO
+                     VN2( J ) = ZERO
+                  END IF
+               ELSE
+                  VN1( J ) = VN1( J )*SQRT( TEMP )
+               END IF
+            END IF
+   10    CONTINUE
+*
+   20 CONTINUE
+*
+      RETURN
+*
+*     End of DLAQP2
+*
+      END
diff --git a/costa/native/external/lapack/dlaqps.f b/costa/native/external/lapack/dlaqps.f
new file mode 100644
index 000000000..3190e3d7f
--- /dev/null
+++ b/costa/native/external/lapack/dlaqps.f
@@ -0,0 +1,245 @@
+      SUBROUTINE DLAQPS( M, N, OFFSET, NB, KB, A, LDA, JPVT, TAU, VN1,
+     $                   VN2, AUXV, F, LDF )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            KB, LDA, LDF, M, N, NB, OFFSET
+*     ..
+*     .. Array Arguments ..
+      INTEGER            JPVT( * )
+      DOUBLE PRECISION   A( LDA, * ), AUXV( * ), F( LDF, * ), TAU( * ),
+     $                   VN1( * ), VN2( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLAQPS computes a step of QR factorization with column pivoting
+*  of a real M-by-N matrix A by using Blas-3.  It tries to factorize
+*  NB columns from A starting from the row OFFSET+1, and updates all
+*  of the matrix with Blas-3 xGEMM.
+*
+*  In some cases, due to catastrophic cancellations, it cannot
+*  factorize NB columns.  Hence, the actual number of factorized
+*  columns is returned in KB.
+*
+*  Block A(1:OFFSET,1:N) is accordingly pivoted, but not factorized.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A. N >= 0
+*
+*  OFFSET  (input) INTEGER
+*          The number of rows of A that have been factorized in
+*          previous steps.
+*
+*  NB      (input) INTEGER
+*          The number of columns to factorize.
+*
+*  KB      (output) INTEGER
+*          The number of columns actually factorized.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+*          On entry, the M-by-N matrix A.
+*          On exit, block A(OFFSET+1:M,1:KB) is the triangular
+*          factor obtained and block A(1:OFFSET,1:N) has been
+*          accordingly pivoted, but no factorized.
+*          The rest of the matrix, block A(OFFSET+1:M,KB+1:N) has
+*          been updated.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,M).
+*
+*  JPVT    (input/output) INTEGER array, dimension (N)
+*          JPVT(I) = K <==> Column K of the full matrix A has been
+*          permuted into position I in AP.
+*
+*  TAU     (output) DOUBLE PRECISION array, dimension (KB)
+*          The scalar factors of the elementary reflectors.
+*
+*  VN1     (input/output) DOUBLE PRECISION array, dimension (N)
+*          The vector with the partial column norms.
+*
+*  VN2     (input/output) DOUBLE PRECISION array, dimension (N)
+*          The vector with the exact column norms.
+*
+*  AUXV    (input/output) DOUBLE PRECISION array, dimension (NB)
+*          Auxiliar vector.
+*
+*  F       (input/output) DOUBLE PRECISION array, dimension (LDF,NB)
+*          Matrix F' = L*Y'*A.
+*
+*  LDF     (input) INTEGER
+*          The leading dimension of the array F. LDF >= max(1,N).
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*    G. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain
+*    X. Sun, Computer Science Dept., Duke University, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            ITEMP, J, K, LASTRK, LSTICC, PVT, RK
+      DOUBLE PRECISION   AKK, TEMP, TEMP2
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DGEMM, DGEMV, DLARFG, DSWAP
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, MAX, MIN, NINT, SQRT
+*     ..
+*     .. External Functions ..
+      INTEGER            IDAMAX
+      DOUBLE PRECISION   DNRM2
+      EXTERNAL           IDAMAX, DNRM2
+*     ..
+*     .. Executable Statements ..
+*
+      LASTRK = MIN( M, N+OFFSET )
+      LSTICC = 0
+      K = 0
+*
+*     Beginning of while loop.
+*
+   10 CONTINUE
+      IF( ( K.LT.NB ) .AND. ( LSTICC.EQ.0 ) ) THEN
+         K = K + 1
+         RK = OFFSET + K
+*
+*        Determine ith pivot column and swap if necessary
+*
+         PVT = ( K-1 ) + IDAMAX( N-K+1, VN1( K ), 1 )
+         IF( PVT.NE.K ) THEN
+            CALL DSWAP( M, A( 1, PVT ), 1, A( 1, K ), 1 )
+            CALL DSWAP( K-1, F( PVT, 1 ), LDF, F( K, 1 ), LDF )
+            ITEMP = JPVT( PVT )
+            JPVT( PVT ) = JPVT( K )
+            JPVT( K ) = ITEMP
+            VN1( PVT ) = VN1( K )
+            VN2( PVT ) = VN2( K )
+         END IF
+*
+*        Apply previous Householder reflectors to column K:
+*        A(RK:M,K) := A(RK:M,K) - A(RK:M,1:K-1)*F(K,1:K-1)'.
+*
+         IF( K.GT.1 ) THEN
+            CALL DGEMV( 'No transpose', M-RK+1, K-1, -ONE, A( RK, 1 ),
+     $                  LDA, F( K, 1 ), LDF, ONE, A( RK, K ), 1 )
+         END IF
+*
+*        Generate elementary reflector H(k).
+*
+         IF( RK.LT.M ) THEN
+            CALL DLARFG( M-RK+1, A( RK, K ), A( RK+1, K ), 1, TAU( K ) )
+         ELSE
+            CALL DLARFG( 1, A( RK, K ), A( RK, K ), 1, TAU( K ) )
+         END IF
+*
+         AKK = A( RK, K )
+         A( RK, K ) = ONE
+*
+*        Compute Kth column of F:
+*
+*        Compute  F(K+1:N,K) := tau(K)*A(RK:M,K+1:N)'*A(RK:M,K).
+*
+         IF( K.LT.N ) THEN
+            CALL DGEMV( 'Transpose', M-RK+1, N-K, TAU( K ),
+     $                  A( RK, K+1 ), LDA, A( RK, K ), 1, ZERO,
+     $                  F( K+1, K ), 1 )
+         END IF
+*
+*        Padding F(1:K,K) with zeros.
+*
+         DO 20 J = 1, K
+            F( J, K ) = ZERO
+   20    CONTINUE
+*
+*        Incremental updating of F:
+*        F(1:N,K) := F(1:N,K) - tau(K)*F(1:N,1:K-1)*A(RK:M,1:K-1)'
+*                    *A(RK:M,K).
+*
+         IF( K.GT.1 ) THEN
+            CALL DGEMV( 'Transpose', M-RK+1, K-1, -TAU( K ), A( RK, 1 ),
+     $                  LDA, A( RK, K ), 1, ZERO, AUXV( 1 ), 1 )
+*
+            CALL DGEMV( 'No transpose', N, K-1, ONE, F( 1, 1 ), LDF,
+     $                  AUXV( 1 ), 1, ONE, F( 1, K ), 1 )
+         END IF
+*
+*        Update the current row of A:
+*        A(RK,K+1:N) := A(RK,K+1:N) - A(RK,1:K)*F(K+1:N,1:K)'.
+*
+         IF( K.LT.N ) THEN
+            CALL DGEMV( 'No transpose', N-K, K, -ONE, F( K+1, 1 ), LDF,
+     $                  A( RK, 1 ), LDA, ONE, A( RK, K+1 ), LDA )
+         END IF
+*
+*        Update partial column norms.
+*
+         IF( RK.LT.LASTRK ) THEN
+            DO 30 J = K + 1, N
+               IF( VN1( J ).NE.ZERO ) THEN
+                  TEMP = ABS( A( RK, J ) ) / VN1( J )
+                  TEMP = MAX( ZERO, ( ONE+TEMP )*( ONE-TEMP ) )
+                  TEMP2 = ONE + 0.05D0*TEMP*( VN1( J ) / VN2( J ) )**2
+                  IF( TEMP2.EQ.ONE ) THEN
+                     VN2( J ) = DBLE( LSTICC )
+                     LSTICC = J
+                  ELSE
+                     VN1( J ) = VN1( J )*SQRT( TEMP )
+                  END IF
+               END IF
+   30       CONTINUE
+         END IF
+*
+         A( RK, K ) = AKK
+*
+*        End of while loop.
+*
+         GO TO 10
+      END IF
+      KB = K
+      RK = OFFSET + KB
+*
+*     Apply the block reflector to the rest of the matrix:
+*     A(OFFSET+KB+1:M,KB+1:N) := A(OFFSET+KB+1:M,KB+1:N) -
+*                         A(OFFSET+KB+1:M,1:KB)*F(KB+1:N,1:KB)'.
+*
+      IF( KB.LT.MIN( N, M-OFFSET ) ) THEN
+         CALL DGEMM( 'No transpose', 'Transpose', M-RK, N-KB, KB, -ONE,
+     $               A( RK+1, 1 ), LDA, F( KB+1, 1 ), LDF, ONE,
+     $               A( RK+1, KB+1 ), LDA )
+      END IF
+*
+*     Recomputation of difficult columns.
+*
+   40 CONTINUE
+      IF( LSTICC.GT.0 ) THEN
+         ITEMP = NINT( VN2( LSTICC ) )
+         VN1( LSTICC ) = DNRM2( M-RK, A( RK+1, LSTICC ), 1 )
+         VN2( LSTICC ) = VN1( LSTICC )
+         LSTICC = ITEMP
+         GO TO 40
+      END IF
+*
+      RETURN
+*
+*     End of DLAQPS
+*
+      END
diff --git a/costa/native/external/lapack/dlaqsb.f b/costa/native/external/lapack/dlaqsb.f
new file mode 100644
index 000000000..83aaf20ee
--- /dev/null
+++ b/costa/native/external/lapack/dlaqsb.f
@@ -0,0 +1,149 @@
+      SUBROUTINE DLAQSB( UPLO, N, KD, AB, LDAB, S, SCOND, AMAX, EQUED )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          EQUED, UPLO
+      INTEGER            KD, LDAB, N
+      DOUBLE PRECISION   AMAX, SCOND
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   AB( LDAB, * ), S( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLAQSB equilibrates a symmetric band matrix A using the scaling
+*  factors in the vector S.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the upper or lower triangular part of the
+*          symmetric matrix A is stored.
+*          = 'U':  Upper triangular
+*          = 'L':  Lower triangular
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  KD      (input) INTEGER
+*          The number of super-diagonals of the matrix A if UPLO = 'U',
+*          or the number of sub-diagonals if UPLO = 'L'.  KD >= 0.
+*
+*  AB      (input/output) DOUBLE PRECISION array, dimension (LDAB,N)
+*          On entry, the upper or lower triangle of the symmetric band
+*          matrix A, stored in the first KD+1 rows of the array.  The
+*          j-th column of A is stored in the j-th column of the array AB
+*          as follows:
+*          if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j;
+*          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+kd).
+*
+*          On exit, if INFO = 0, the triangular factor U or L from the
+*          Cholesky factorization A = U'*U or A = L*L' of the band
+*          matrix A, in the same storage format as A.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KD+1.
+*
+*  S       (output) DOUBLE PRECISION array, dimension (N)
+*          The scale factors for A.
+*
+*  SCOND   (input) DOUBLE PRECISION
+*          Ratio of the smallest S(i) to the largest S(i).
+*
+*  AMAX    (input) DOUBLE PRECISION
+*          Absolute value of largest matrix entry.
+*
+*  EQUED   (output) CHARACTER*1
+*          Specifies whether or not equilibration was done.
+*          = 'N':  No equilibration.
+*          = 'Y':  Equilibration was done, i.e., A has been replaced by
+*                  diag(S) * A * diag(S).
+*
+*  Internal Parameters
+*  ===================
+*
+*  THRESH is a threshold value used to decide if scaling should be done
+*  based on the ratio of the scaling factors.  If SCOND < THRESH,
+*  scaling is done.
+*
+*  LARGE and SMALL are threshold values used to decide if scaling should
+*  be done based on the absolute size of the largest matrix element.
+*  If AMAX > LARGE or AMAX < SMALL, scaling is done.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, THRESH
+      PARAMETER          ( ONE = 1.0D+0, THRESH = 0.1D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J
+      DOUBLE PRECISION   CJ, LARGE, SMALL
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DLAMCH
+      EXTERNAL           LSAME, DLAMCH
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Quick return if possible
+*
+      IF( N.LE.0 ) THEN
+         EQUED = 'N'
+         RETURN
+      END IF
+*
+*     Initialize LARGE and SMALL.
+*
+      SMALL = DLAMCH( 'Safe minimum' ) / DLAMCH( 'Precision' )
+      LARGE = ONE / SMALL
+*
+      IF( SCOND.GE.THRESH .AND. AMAX.GE.SMALL .AND. AMAX.LE.LARGE ) THEN
+*
+*        No equilibration
+*
+         EQUED = 'N'
+      ELSE
+*
+*        Replace A by diag(S) * A * diag(S).
+*
+         IF( LSAME( UPLO, 'U' ) ) THEN
+*
+*           Upper triangle of A is stored in band format.
+*
+            DO 20 J = 1, N
+               CJ = S( J )
+               DO 10 I = MAX( 1, J-KD ), J
+                  AB( KD+1+I-J, J ) = CJ*S( I )*AB( KD+1+I-J, J )
+   10          CONTINUE
+   20       CONTINUE
+         ELSE
+*
+*           Lower triangle of A is stored.
+*
+            DO 40 J = 1, N
+               CJ = S( J )
+               DO 30 I = J, MIN( N, J+KD )
+                  AB( 1+I-J, J ) = CJ*S( I )*AB( 1+I-J, J )
+   30          CONTINUE
+   40       CONTINUE
+         END IF
+         EQUED = 'Y'
+      END IF
+*
+      RETURN
+*
+*     End of DLAQSB
+*
+      END
diff --git a/costa/native/external/lapack/dlaqsp.f b/costa/native/external/lapack/dlaqsp.f
new file mode 100644
index 000000000..3c6db870c
--- /dev/null
+++ b/costa/native/external/lapack/dlaqsp.f
@@ -0,0 +1,141 @@
+      SUBROUTINE DLAQSP( UPLO, N, AP, S, SCOND, AMAX, EQUED )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          EQUED, UPLO
+      INTEGER            N
+      DOUBLE PRECISION   AMAX, SCOND
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   AP( * ), S( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLAQSP equilibrates a symmetric matrix A using the scaling factors
+*  in the vector S.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the upper or lower triangular part of the
+*          symmetric matrix A is stored.
+*          = 'U':  Upper triangular
+*          = 'L':  Lower triangular
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  AP      (input/output) DOUBLE PRECISION array, dimension (N*(N+1)/2)
+*          On entry, the upper or lower triangle of the symmetric matrix
+*          A, packed columnwise in a linear array.  The j-th column of A
+*          is stored in the array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n.
+*
+*          On exit, the equilibrated matrix:  diag(S) * A * diag(S), in
+*          the same storage format as A.
+*
+*  S       (input) DOUBLE PRECISION array, dimension (N)
+*          The scale factors for A.
+*
+*  SCOND   (input) DOUBLE PRECISION
+*          Ratio of the smallest S(i) to the largest S(i).
+*
+*  AMAX    (input) DOUBLE PRECISION
+*          Absolute value of largest matrix entry.
+*
+*  EQUED   (output) CHARACTER*1
+*          Specifies whether or not equilibration was done.
+*          = 'N':  No equilibration.
+*          = 'Y':  Equilibration was done, i.e., A has been replaced by
+*                  diag(S) * A * diag(S).
+*
+*  Internal Parameters
+*  ===================
+*
+*  THRESH is a threshold value used to decide if scaling should be done
+*  based on the ratio of the scaling factors.  If SCOND < THRESH,
+*  scaling is done.
+*
+*  LARGE and SMALL are threshold values used to decide if scaling should
+*  be done based on the absolute size of the largest matrix element.
+*  If AMAX > LARGE or AMAX < SMALL, scaling is done.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, THRESH
+      PARAMETER          ( ONE = 1.0D+0, THRESH = 0.1D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J, JC
+      DOUBLE PRECISION   CJ, LARGE, SMALL
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DLAMCH
+      EXTERNAL           LSAME, DLAMCH
+*     ..
+*     .. Executable Statements ..
+*
+*     Quick return if possible
+*
+      IF( N.LE.0 ) THEN
+         EQUED = 'N'
+         RETURN
+      END IF
+*
+*     Initialize LARGE and SMALL.
+*
+      SMALL = DLAMCH( 'Safe minimum' ) / DLAMCH( 'Precision' )
+      LARGE = ONE / SMALL
+*
+      IF( SCOND.GE.THRESH .AND. AMAX.GE.SMALL .AND. AMAX.LE.LARGE ) THEN
+*
+*        No equilibration
+*
+         EQUED = 'N'
+      ELSE
+*
+*        Replace A by diag(S) * A * diag(S).
+*
+         IF( LSAME( UPLO, 'U' ) ) THEN
+*
+*           Upper triangle of A is stored.
+*
+            JC = 1
+            DO 20 J = 1, N
+               CJ = S( J )
+               DO 10 I = 1, J
+                  AP( JC+I-1 ) = CJ*S( I )*AP( JC+I-1 )
+   10          CONTINUE
+               JC = JC + J
+   20       CONTINUE
+         ELSE
+*
+*           Lower triangle of A is stored.
+*
+            JC = 1
+            DO 40 J = 1, N
+               CJ = S( J )
+               DO 30 I = J, N
+                  AP( JC+I-J ) = CJ*S( I )*AP( JC+I-J )
+   30          CONTINUE
+               JC = JC + N - J + 1
+   40       CONTINUE
+         END IF
+         EQUED = 'Y'
+      END IF
+*
+      RETURN
+*
+*     End of DLAQSP
+*
+      END
diff --git a/costa/native/external/lapack/dlaqsy.f b/costa/native/external/lapack/dlaqsy.f
new file mode 100644
index 000000000..b285f4bc4
--- /dev/null
+++ b/costa/native/external/lapack/dlaqsy.f
@@ -0,0 +1,142 @@
+      SUBROUTINE DLAQSY( UPLO, N, A, LDA, S, SCOND, AMAX, EQUED )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          EQUED, UPLO
+      INTEGER            LDA, N
+      DOUBLE PRECISION   AMAX, SCOND
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), S( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLAQSY equilibrates a symmetric matrix A using the scaling factors
+*  in the vector S.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the upper or lower triangular part of the
+*          symmetric matrix A is stored.
+*          = 'U':  Upper triangular
+*          = 'L':  Lower triangular
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+*          On entry, the symmetric matrix A.  If UPLO = 'U', the leading
+*          n by n upper triangular part of A contains the upper
+*          triangular part of the matrix A, and the strictly lower
+*          triangular part of A is not referenced.  If UPLO = 'L', the
+*          leading n by n lower triangular part of A contains the lower
+*          triangular part of the matrix A, and the strictly upper
+*          triangular part of A is not referenced.
+*
+*          On exit, if EQUED = 'Y', the equilibrated matrix:
+*          diag(S) * A * diag(S).
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(N,1).
+*
+*  S       (input) DOUBLE PRECISION array, dimension (N)
+*          The scale factors for A.
+*
+*  SCOND   (input) DOUBLE PRECISION
+*          Ratio of the smallest S(i) to the largest S(i).
+*
+*  AMAX    (input) DOUBLE PRECISION
+*          Absolute value of largest matrix entry.
+*
+*  EQUED   (output) CHARACTER*1
+*          Specifies whether or not equilibration was done.
+*          = 'N':  No equilibration.
+*          = 'Y':  Equilibration was done, i.e., A has been replaced by
+*                  diag(S) * A * diag(S).
+*
+*  Internal Parameters
+*  ===================
+*
+*  THRESH is a threshold value used to decide if scaling should be done
+*  based on the ratio of the scaling factors.  If SCOND < THRESH,
+*  scaling is done.
+*
+*  LARGE and SMALL are threshold values used to decide if scaling should
+*  be done based on the absolute size of the largest matrix element.
+*  If AMAX > LARGE or AMAX < SMALL, scaling is done.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, THRESH
+      PARAMETER          ( ONE = 1.0D+0, THRESH = 0.1D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J
+      DOUBLE PRECISION   CJ, LARGE, SMALL
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DLAMCH
+      EXTERNAL           LSAME, DLAMCH
+*     ..
+*     .. Executable Statements ..
+*
+*     Quick return if possible
+*
+      IF( N.LE.0 ) THEN
+         EQUED = 'N'
+         RETURN
+      END IF
+*
+*     Initialize LARGE and SMALL.
+*
+      SMALL = DLAMCH( 'Safe minimum' ) / DLAMCH( 'Precision' )
+      LARGE = ONE / SMALL
+*
+      IF( SCOND.GE.THRESH .AND. AMAX.GE.SMALL .AND. AMAX.LE.LARGE ) THEN
+*
+*        No equilibration
+*
+         EQUED = 'N'
+      ELSE
+*
+*        Replace A by diag(S) * A * diag(S).
+*
+         IF( LSAME( UPLO, 'U' ) ) THEN
+*
+*           Upper triangle of A is stored.
+*
+            DO 20 J = 1, N
+               CJ = S( J )
+               DO 10 I = 1, J
+                  A( I, J ) = CJ*S( I )*A( I, J )
+   10          CONTINUE
+   20       CONTINUE
+         ELSE
+*
+*           Lower triangle of A is stored.
+*
+            DO 40 J = 1, N
+               CJ = S( J )
+               DO 30 I = J, N
+                  A( I, J ) = CJ*S( I )*A( I, J )
+   30          CONTINUE
+   40       CONTINUE
+         END IF
+         EQUED = 'Y'
+      END IF
+*
+      RETURN
+*
+*     End of DLAQSY
+*
+      END
diff --git a/costa/native/external/lapack/dlaqtr.f b/costa/native/external/lapack/dlaqtr.f
new file mode 100644
index 000000000..f1dbeb7db
--- /dev/null
+++ b/costa/native/external/lapack/dlaqtr.f
@@ -0,0 +1,666 @@
+      SUBROUTINE DLAQTR( LTRAN, LREAL, N, T, LDT, B, W, SCALE, X, WORK,
+     $                   INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      LOGICAL            LREAL, LTRAN
+      INTEGER            INFO, LDT, N
+      DOUBLE PRECISION   SCALE, W
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   B( * ), T( LDT, * ), WORK( * ), X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLAQTR solves the real quasi-triangular system
+*
+*               op(T)*p = scale*c,               if LREAL = .TRUE.
+*
+*  or the complex quasi-triangular systems
+*
+*             op(T + iB)*(p+iq) = scale*(c+id),  if LREAL = .FALSE.
+*
+*  in real arithmetic, where T is upper quasi-triangular.
+*  If LREAL = .FALSE., then the first diagonal block of T must be
+*  1 by 1, B is the specially structured matrix
+*
+*                 B = [ b(1) b(2) ... b(n) ]
+*                     [       w            ]
+*                     [           w        ]
+*                     [              .     ]
+*                     [                 w  ]
+*
+*  op(A) = A or A', A' denotes the conjugate transpose of
+*  matrix A.
+*
+*  On input, X = [ c ].  On output, X = [ p ].
+*                [ d ]                  [ q ]
+*
+*  This subroutine is designed for the condition number estimation
+*  in routine DTRSNA.
+*
+*  Arguments
+*  =========
+*
+*  LTRAN   (input) LOGICAL
+*          On entry, LTRAN specifies the option of conjugate transpose:
+*             = .FALSE.,    op(T+i*B) = T+i*B,
+*             = .TRUE.,     op(T+i*B) = (T+i*B)'.
+*
+*  LREAL   (input) LOGICAL
+*          On entry, LREAL specifies the input matrix structure:
+*             = .FALSE.,    the input is complex
+*             = .TRUE.,     the input is real
+*
+*  N       (input) INTEGER
+*          On entry, N specifies the order of T+i*B. N >= 0.
+*
+*  T       (input) DOUBLE PRECISION array, dimension (LDT,N)
+*          On entry, T contains a matrix in Schur canonical form.
+*          If LREAL = .FALSE., then the first diagonal block of T mu
+*          be 1 by 1.
+*
+*  LDT     (input) INTEGER
+*          The leading dimension of the matrix T. LDT >= max(1,N).
+*
+*  B       (input) DOUBLE PRECISION array, dimension (N)
+*          On entry, B contains the elements to form the matrix
+*          B as described above.
+*          If LREAL = .TRUE., B is not referenced.
+*
+*  W       (input) DOUBLE PRECISION
+*          On entry, W is the diagonal element of the matrix B.
+*          If LREAL = .TRUE., W is not referenced.
+*
+*  SCALE   (output) DOUBLE PRECISION
+*          On exit, SCALE is the scale factor.
+*
+*  X       (input/output) DOUBLE PRECISION array, dimension (2*N)
+*          On entry, X contains the right hand side of the system.
+*          On exit, X is overwritten by the solution.
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          On exit, INFO is set to
+*             0: successful exit.
+*               1: the some diagonal 1 by 1 block has been perturbed by
+*                  a small number SMIN to keep nonsingularity.
+*               2: the some diagonal 2 by 2 block has been perturbed by
+*                  a small number in DLALN2 to keep nonsingularity.
+*          NOTE: In the interests of speed, this routine does not
+*                check the inputs for errors.
+*
+* =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOTRAN
+      INTEGER            I, IERR, J, J1, J2, JNEXT, K, N1, N2
+      DOUBLE PRECISION   BIGNUM, EPS, REC, SCALOC, SI, SMIN, SMINW,
+     $                   SMLNUM, SR, TJJ, TMP, XJ, XMAX, XNORM, Z
+*     ..
+*     .. Local Arrays ..
+      DOUBLE PRECISION   D( 2, 2 ), V( 2, 2 )
+*     ..
+*     .. External Functions ..
+      INTEGER            IDAMAX
+      DOUBLE PRECISION   DASUM, DDOT, DLAMCH, DLANGE
+      EXTERNAL           IDAMAX, DASUM, DDOT, DLAMCH, DLANGE
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DAXPY, DLADIV, DLALN2, DSCAL
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Do not test the input parameters for errors
+*
+      NOTRAN = .NOT.LTRAN
+      INFO = 0
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Set constants to control overflow
+*
+      EPS = DLAMCH( 'P' )
+      SMLNUM = DLAMCH( 'S' ) / EPS
+      BIGNUM = ONE / SMLNUM
+*
+      XNORM = DLANGE( 'M', N, N, T, LDT, D )
+      IF( .NOT.LREAL )
+     $   XNORM = MAX( XNORM, ABS( W ), DLANGE( 'M', N, 1, B, N, D ) )
+      SMIN = MAX( SMLNUM, EPS*XNORM )
+*
+*     Compute 1-norm of each column of strictly upper triangular
+*     part of T to control overflow in triangular solver.
+*
+      WORK( 1 ) = ZERO
+      DO 10 J = 2, N
+         WORK( J ) = DASUM( J-1, T( 1, J ), 1 )
+   10 CONTINUE
+*
+      IF( .NOT.LREAL ) THEN
+         DO 20 I = 2, N
+            WORK( I ) = WORK( I ) + ABS( B( I ) )
+   20    CONTINUE
+      END IF
+*
+      N2 = 2*N
+      N1 = N
+      IF( .NOT.LREAL )
+     $   N1 = N2
+      K = IDAMAX( N1, X, 1 )
+      XMAX = ABS( X( K ) )
+      SCALE = ONE
+*
+      IF( XMAX.GT.BIGNUM ) THEN
+         SCALE = BIGNUM / XMAX
+         CALL DSCAL( N1, SCALE, X, 1 )
+         XMAX = BIGNUM
+      END IF
+*
+      IF( LREAL ) THEN
+*
+         IF( NOTRAN ) THEN
+*
+*           Solve T*p = scale*c
+*
+            JNEXT = N
+            DO 30 J = N, 1, -1
+               IF( J.GT.JNEXT )
+     $            GO TO 30
+               J1 = J
+               J2 = J
+               JNEXT = J - 1
+               IF( J.GT.1 ) THEN
+                  IF( T( J, J-1 ).NE.ZERO ) THEN
+                     J1 = J - 1
+                     JNEXT = J - 2
+                  END IF
+               END IF
+*
+               IF( J1.EQ.J2 ) THEN
+*
+*                 Meet 1 by 1 diagonal block
+*
+*                 Scale to avoid overflow when computing
+*                     x(j) = b(j)/T(j,j)
+*
+                  XJ = ABS( X( J1 ) )
+                  TJJ = ABS( T( J1, J1 ) )
+                  TMP = T( J1, J1 )
+                  IF( TJJ.LT.SMIN ) THEN
+                     TMP = SMIN
+                     TJJ = SMIN
+                     INFO = 1
+                  END IF
+*
+                  IF( XJ.EQ.ZERO )
+     $               GO TO 30
+*
+                  IF( TJJ.LT.ONE ) THEN
+                     IF( XJ.GT.BIGNUM*TJJ ) THEN
+                        REC = ONE / XJ
+                        CALL DSCAL( N, REC, X, 1 )
+                        SCALE = SCALE*REC
+                        XMAX = XMAX*REC
+                     END IF
+                  END IF
+                  X( J1 ) = X( J1 ) / TMP
+                  XJ = ABS( X( J1 ) )
+*
+*                 Scale x if necessary to avoid overflow when adding a
+*                 multiple of column j1 of T.
+*
+                  IF( XJ.GT.ONE ) THEN
+                     REC = ONE / XJ
+                     IF( WORK( J1 ).GT.( BIGNUM-XMAX )*REC ) THEN
+                        CALL DSCAL( N, REC, X, 1 )
+                        SCALE = SCALE*REC
+                     END IF
+                  END IF
+                  IF( J1.GT.1 ) THEN
+                     CALL DAXPY( J1-1, -X( J1 ), T( 1, J1 ), 1, X, 1 )
+                     K = IDAMAX( J1-1, X, 1 )
+                     XMAX = ABS( X( K ) )
+                  END IF
+*
+               ELSE
+*
+*                 Meet 2 by 2 diagonal block
+*
+*                 Call 2 by 2 linear system solve, to take
+*                 care of possible overflow by scaling factor.
+*
+                  D( 1, 1 ) = X( J1 )
+                  D( 2, 1 ) = X( J2 )
+                  CALL DLALN2( .FALSE., 2, 1, SMIN, ONE, T( J1, J1 ),
+     $                         LDT, ONE, ONE, D, 2, ZERO, ZERO, V, 2,
+     $                         SCALOC, XNORM, IERR )
+                  IF( IERR.NE.0 )
+     $               INFO = 2
+*
+                  IF( SCALOC.NE.ONE ) THEN
+                     CALL DSCAL( N, SCALOC, X, 1 )
+                     SCALE = SCALE*SCALOC
+                  END IF
+                  X( J1 ) = V( 1, 1 )
+                  X( J2 ) = V( 2, 1 )
+*
+*                 Scale V(1,1) (= X(J1)) and/or V(2,1) (=X(J2))
+*                 to avoid overflow in updating right-hand side.
+*
+                  XJ = MAX( ABS( V( 1, 1 ) ), ABS( V( 2, 1 ) ) )
+                  IF( XJ.GT.ONE ) THEN
+                     REC = ONE / XJ
+                     IF( MAX( WORK( J1 ), WORK( J2 ) ).GT.
+     $                   ( BIGNUM-XMAX )*REC ) THEN
+                        CALL DSCAL( N, REC, X, 1 )
+                        SCALE = SCALE*REC
+                     END IF
+                  END IF
+*
+*                 Update right-hand side
+*
+                  IF( J1.GT.1 ) THEN
+                     CALL DAXPY( J1-1, -X( J1 ), T( 1, J1 ), 1, X, 1 )
+                     CALL DAXPY( J1-1, -X( J2 ), T( 1, J2 ), 1, X, 1 )
+                     K = IDAMAX( J1-1, X, 1 )
+                     XMAX = ABS( X( K ) )
+                  END IF
+*
+               END IF
+*
+   30       CONTINUE
+*
+         ELSE
+*
+*           Solve T'*p = scale*c
+*
+            JNEXT = 1
+            DO 40 J = 1, N
+               IF( J.LT.JNEXT )
+     $            GO TO 40
+               J1 = J
+               J2 = J
+               JNEXT = J + 1
+               IF( J.LT.N ) THEN
+                  IF( T( J+1, J ).NE.ZERO ) THEN
+                     J2 = J + 1
+                     JNEXT = J + 2
+                  END IF
+               END IF
+*
+               IF( J1.EQ.J2 ) THEN
+*
+*                 1 by 1 diagonal block
+*
+*                 Scale if necessary to avoid overflow in forming the
+*                 right-hand side element by inner product.
+*
+                  XJ = ABS( X( J1 ) )
+                  IF( XMAX.GT.ONE ) THEN
+                     REC = ONE / XMAX
+                     IF( WORK( J1 ).GT.( BIGNUM-XJ )*REC ) THEN
+                        CALL DSCAL( N, REC, X, 1 )
+                        SCALE = SCALE*REC
+                        XMAX = XMAX*REC
+                     END IF
+                  END IF
+*
+                  X( J1 ) = X( J1 ) - DDOT( J1-1, T( 1, J1 ), 1, X, 1 )
+*
+                  XJ = ABS( X( J1 ) )
+                  TJJ = ABS( T( J1, J1 ) )
+                  TMP = T( J1, J1 )
+                  IF( TJJ.LT.SMIN ) THEN
+                     TMP = SMIN
+                     TJJ = SMIN
+                     INFO = 1
+                  END IF
+*
+                  IF( TJJ.LT.ONE ) THEN
+                     IF( XJ.GT.BIGNUM*TJJ ) THEN
+                        REC = ONE / XJ
+                        CALL DSCAL( N, REC, X, 1 )
+                        SCALE = SCALE*REC
+                        XMAX = XMAX*REC
+                     END IF
+                  END IF
+                  X( J1 ) = X( J1 ) / TMP
+                  XMAX = MAX( XMAX, ABS( X( J1 ) ) )
+*
+               ELSE
+*
+*                 2 by 2 diagonal block
+*
+*                 Scale if necessary to avoid overflow in forming the
+*                 right-hand side elements by inner product.
+*
+                  XJ = MAX( ABS( X( J1 ) ), ABS( X( J2 ) ) )
+                  IF( XMAX.GT.ONE ) THEN
+                     REC = ONE / XMAX
+                     IF( MAX( WORK( J2 ), WORK( J1 ) ).GT.( BIGNUM-XJ )*
+     $                   REC ) THEN
+                        CALL DSCAL( N, REC, X, 1 )
+                        SCALE = SCALE*REC
+                        XMAX = XMAX*REC
+                     END IF
+                  END IF
+*
+                  D( 1, 1 ) = X( J1 ) - DDOT( J1-1, T( 1, J1 ), 1, X,
+     $                        1 )
+                  D( 2, 1 ) = X( J2 ) - DDOT( J1-1, T( 1, J2 ), 1, X,
+     $                        1 )
+*
+                  CALL DLALN2( .TRUE., 2, 1, SMIN, ONE, T( J1, J1 ),
+     $                         LDT, ONE, ONE, D, 2, ZERO, ZERO, V, 2,
+     $                         SCALOC, XNORM, IERR )
+                  IF( IERR.NE.0 )
+     $               INFO = 2
+*
+                  IF( SCALOC.NE.ONE ) THEN
+                     CALL DSCAL( N, SCALOC, X, 1 )
+                     SCALE = SCALE*SCALOC
+                  END IF
+                  X( J1 ) = V( 1, 1 )
+                  X( J2 ) = V( 2, 1 )
+                  XMAX = MAX( ABS( X( J1 ) ), ABS( X( J2 ) ), XMAX )
+*
+               END IF
+   40       CONTINUE
+         END IF
+*
+      ELSE
+*
+         SMINW = MAX( EPS*ABS( W ), SMIN )
+         IF( NOTRAN ) THEN
+*
+*           Solve (T + iB)*(p+iq) = c+id
+*
+            JNEXT = N
+            DO 70 J = N, 1, -1
+               IF( J.GT.JNEXT )
+     $            GO TO 70
+               J1 = J
+               J2 = J
+               JNEXT = J - 1
+               IF( J.GT.1 ) THEN
+                  IF( T( J, J-1 ).NE.ZERO ) THEN
+                     J1 = J - 1
+                     JNEXT = J - 2
+                  END IF
+               END IF
+*
+               IF( J1.EQ.J2 ) THEN
+*
+*                 1 by 1 diagonal block
+*
+*                 Scale if necessary to avoid overflow in division
+*
+                  Z = W
+                  IF( J1.EQ.1 )
+     $               Z = B( 1 )
+                  XJ = ABS( X( J1 ) ) + ABS( X( N+J1 ) )
+                  TJJ = ABS( T( J1, J1 ) ) + ABS( Z )
+                  TMP = T( J1, J1 )
+                  IF( TJJ.LT.SMINW ) THEN
+                     TMP = SMINW
+                     TJJ = SMINW
+                     INFO = 1
+                  END IF
+*
+                  IF( XJ.EQ.ZERO )
+     $               GO TO 70
+*
+                  IF( TJJ.LT.ONE ) THEN
+                     IF( XJ.GT.BIGNUM*TJJ ) THEN
+                        REC = ONE / XJ
+                        CALL DSCAL( N2, REC, X, 1 )
+                        SCALE = SCALE*REC
+                        XMAX = XMAX*REC
+                     END IF
+                  END IF
+                  CALL DLADIV( X( J1 ), X( N+J1 ), TMP, Z, SR, SI )
+                  X( J1 ) = SR
+                  X( N+J1 ) = SI
+                  XJ = ABS( X( J1 ) ) + ABS( X( N+J1 ) )
+*
+*                 Scale x if necessary to avoid overflow when adding a
+*                 multiple of column j1 of T.
+*
+                  IF( XJ.GT.ONE ) THEN
+                     REC = ONE / XJ
+                     IF( WORK( J1 ).GT.( BIGNUM-XMAX )*REC ) THEN
+                        CALL DSCAL( N2, REC, X, 1 )
+                        SCALE = SCALE*REC
+                     END IF
+                  END IF
+*
+                  IF( J1.GT.1 ) THEN
+                     CALL DAXPY( J1-1, -X( J1 ), T( 1, J1 ), 1, X, 1 )
+                     CALL DAXPY( J1-1, -X( N+J1 ), T( 1, J1 ), 1,
+     $                           X( N+1 ), 1 )
+*
+                     X( 1 ) = X( 1 ) + B( J1 )*X( N+J1 )
+                     X( N+1 ) = X( N+1 ) - B( J1 )*X( J1 )
+*
+                     XMAX = ZERO
+                     DO 50 K = 1, J1 - 1
+                        XMAX = MAX( XMAX, ABS( X( K ) )+
+     $                         ABS( X( K+N ) ) )
+   50                CONTINUE
+                  END IF
+*
+               ELSE
+*
+*                 Meet 2 by 2 diagonal block
+*
+                  D( 1, 1 ) = X( J1 )
+                  D( 2, 1 ) = X( J2 )
+                  D( 1, 2 ) = X( N+J1 )
+                  D( 2, 2 ) = X( N+J2 )
+                  CALL DLALN2( .FALSE., 2, 2, SMINW, ONE, T( J1, J1 ),
+     $                         LDT, ONE, ONE, D, 2, ZERO, -W, V, 2,
+     $                         SCALOC, XNORM, IERR )
+                  IF( IERR.NE.0 )
+     $               INFO = 2
+*
+                  IF( SCALOC.NE.ONE ) THEN
+                     CALL DSCAL( 2*N, SCALOC, X, 1 )
+                     SCALE = SCALOC*SCALE
+                  END IF
+                  X( J1 ) = V( 1, 1 )
+                  X( J2 ) = V( 2, 1 )
+                  X( N+J1 ) = V( 1, 2 )
+                  X( N+J2 ) = V( 2, 2 )
+*
+*                 Scale X(J1), .... to avoid overflow in
+*                 updating right hand side.
+*
+                  XJ = MAX( ABS( V( 1, 1 ) )+ABS( V( 1, 2 ) ),
+     $                 ABS( V( 2, 1 ) )+ABS( V( 2, 2 ) ) )
+                  IF( XJ.GT.ONE ) THEN
+                     REC = ONE / XJ
+                     IF( MAX( WORK( J1 ), WORK( J2 ) ).GT.
+     $                   ( BIGNUM-XMAX )*REC ) THEN
+                        CALL DSCAL( N2, REC, X, 1 )
+                        SCALE = SCALE*REC
+                     END IF
+                  END IF
+*
+*                 Update the right-hand side.
+*
+                  IF( J1.GT.1 ) THEN
+                     CALL DAXPY( J1-1, -X( J1 ), T( 1, J1 ), 1, X, 1 )
+                     CALL DAXPY( J1-1, -X( J2 ), T( 1, J2 ), 1, X, 1 )
+*
+                     CALL DAXPY( J1-1, -X( N+J1 ), T( 1, J1 ), 1,
+     $                           X( N+1 ), 1 )
+                     CALL DAXPY( J1-1, -X( N+J2 ), T( 1, J2 ), 1,
+     $                           X( N+1 ), 1 )
+*
+                     X( 1 ) = X( 1 ) + B( J1 )*X( N+J1 ) +
+     $                        B( J2 )*X( N+J2 )
+                     X( N+1 ) = X( N+1 ) - B( J1 )*X( J1 ) -
+     $                          B( J2 )*X( J2 )
+*
+                     XMAX = ZERO
+                     DO 60 K = 1, J1 - 1
+                        XMAX = MAX( ABS( X( K ) )+ABS( X( K+N ) ),
+     $                         XMAX )
+   60                CONTINUE
+                  END IF
+*
+               END IF
+   70       CONTINUE
+*
+         ELSE
+*
+*           Solve (T + iB)'*(p+iq) = c+id
+*
+            JNEXT = 1
+            DO 80 J = 1, N
+               IF( J.LT.JNEXT )
+     $            GO TO 80
+               J1 = J
+               J2 = J
+               JNEXT = J + 1
+               IF( J.LT.N ) THEN
+                  IF( T( J+1, J ).NE.ZERO ) THEN
+                     J2 = J + 1
+                     JNEXT = J + 2
+                  END IF
+               END IF
+*
+               IF( J1.EQ.J2 ) THEN
+*
+*                 1 by 1 diagonal block
+*
+*                 Scale if necessary to avoid overflow in forming the
+*                 right-hand side element by inner product.
+*
+                  XJ = ABS( X( J1 ) ) + ABS( X( J1+N ) )
+                  IF( XMAX.GT.ONE ) THEN
+                     REC = ONE / XMAX
+                     IF( WORK( J1 ).GT.( BIGNUM-XJ )*REC ) THEN
+                        CALL DSCAL( N2, REC, X, 1 )
+                        SCALE = SCALE*REC
+                        XMAX = XMAX*REC
+                     END IF
+                  END IF
+*
+                  X( J1 ) = X( J1 ) - DDOT( J1-1, T( 1, J1 ), 1, X, 1 )
+                  X( N+J1 ) = X( N+J1 ) - DDOT( J1-1, T( 1, J1 ), 1,
+     $                        X( N+1 ), 1 )
+                  IF( J1.GT.1 ) THEN
+                     X( J1 ) = X( J1 ) - B( J1 )*X( N+1 )
+                     X( N+J1 ) = X( N+J1 ) + B( J1 )*X( 1 )
+                  END IF
+                  XJ = ABS( X( J1 ) ) + ABS( X( J1+N ) )
+*
+                  Z = W
+                  IF( J1.EQ.1 )
+     $               Z = B( 1 )
+*
+*                 Scale if necessary to avoid overflow in
+*                 complex division
+*
+                  TJJ = ABS( T( J1, J1 ) ) + ABS( Z )
+                  TMP = T( J1, J1 )
+                  IF( TJJ.LT.SMINW ) THEN
+                     TMP = SMINW
+                     TJJ = SMINW
+                     INFO = 1
+                  END IF
+*
+                  IF( TJJ.LT.ONE ) THEN
+                     IF( XJ.GT.BIGNUM*TJJ ) THEN
+                        REC = ONE / XJ
+                        CALL DSCAL( N2, REC, X, 1 )
+                        SCALE = SCALE*REC
+                        XMAX = XMAX*REC
+                     END IF
+                  END IF
+                  CALL DLADIV( X( J1 ), X( N+J1 ), TMP, -Z, SR, SI )
+                  X( J1 ) = SR
+                  X( J1+N ) = SI
+                  XMAX = MAX( ABS( X( J1 ) )+ABS( X( J1+N ) ), XMAX )
+*
+               ELSE
+*
+*                 2 by 2 diagonal block
+*
+*                 Scale if necessary to avoid overflow in forming the
+*                 right-hand side element by inner product.
+*
+                  XJ = MAX( ABS( X( J1 ) )+ABS( X( N+J1 ) ),
+     $                 ABS( X( J2 ) )+ABS( X( N+J2 ) ) )
+                  IF( XMAX.GT.ONE ) THEN
+                     REC = ONE / XMAX
+                     IF( MAX( WORK( J1 ), WORK( J2 ) ).GT.
+     $                   ( BIGNUM-XJ ) / XMAX ) THEN
+                        CALL DSCAL( N2, REC, X, 1 )
+                        SCALE = SCALE*REC
+                        XMAX = XMAX*REC
+                     END IF
+                  END IF
+*
+                  D( 1, 1 ) = X( J1 ) - DDOT( J1-1, T( 1, J1 ), 1, X,
+     $                        1 )
+                  D( 2, 1 ) = X( J2 ) - DDOT( J1-1, T( 1, J2 ), 1, X,
+     $                        1 )
+                  D( 1, 2 ) = X( N+J1 ) - DDOT( J1-1, T( 1, J1 ), 1,
+     $                        X( N+1 ), 1 )
+                  D( 2, 2 ) = X( N+J2 ) - DDOT( J1-1, T( 1, J2 ), 1,
+     $                        X( N+1 ), 1 )
+                  D( 1, 1 ) = D( 1, 1 ) - B( J1 )*X( N+1 )
+                  D( 2, 1 ) = D( 2, 1 ) - B( J2 )*X( N+1 )
+                  D( 1, 2 ) = D( 1, 2 ) + B( J1 )*X( 1 )
+                  D( 2, 2 ) = D( 2, 2 ) + B( J2 )*X( 1 )
+*
+                  CALL DLALN2( .TRUE., 2, 2, SMINW, ONE, T( J1, J1 ),
+     $                         LDT, ONE, ONE, D, 2, ZERO, W, V, 2,
+     $                         SCALOC, XNORM, IERR )
+                  IF( IERR.NE.0 )
+     $               INFO = 2
+*
+                  IF( SCALOC.NE.ONE ) THEN
+                     CALL DSCAL( N2, SCALOC, X, 1 )
+                     SCALE = SCALOC*SCALE
+                  END IF
+                  X( J1 ) = V( 1, 1 )
+                  X( J2 ) = V( 2, 1 )
+                  X( N+J1 ) = V( 1, 2 )
+                  X( N+J2 ) = V( 2, 2 )
+                  XMAX = MAX( ABS( X( J1 ) )+ABS( X( N+J1 ) ),
+     $                   ABS( X( J2 ) )+ABS( X( N+J2 ) ), XMAX )
+*
+               END IF
+*
+   80       CONTINUE
+*
+         END IF
+*
+      END IF
+*
+      RETURN
+*
+*     End of DLAQTR
+*
+      END
diff --git a/costa/native/external/lapack/dlar1v.f b/costa/native/external/lapack/dlar1v.f
new file mode 100644
index 000000000..0e93ea5ea
--- /dev/null
+++ b/costa/native/external/lapack/dlar1v.f
@@ -0,0 +1,323 @@
+      SUBROUTINE DLAR1V( N, B1, BN, SIGMA, D, L, LD, LLD, GERSCH, Z,
+     $                   ZTZ, MINGMA, R, ISUPPZ, WORK )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            B1, BN, N, R
+      DOUBLE PRECISION   MINGMA, SIGMA, ZTZ
+*     ..
+*     .. Array Arguments ..
+      INTEGER            ISUPPZ( * )
+      DOUBLE PRECISION   D( * ), GERSCH( * ), L( * ), LD( * ), LLD( * ),
+     $                   WORK( * ), Z( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLAR1V computes the (scaled) r-th column of the inverse of
+*  the sumbmatrix in rows B1 through BN of the tridiagonal matrix
+*  L D L^T - sigma I. The following steps accomplish this computation :
+*  (a) Stationary qd transform,  L D L^T - sigma I = L(+) D(+) L(+)^T,
+*  (b) Progressive qd transform, L D L^T - sigma I = U(-) D(-) U(-)^T,
+*  (c) Computation of the diagonal elements of the inverse of
+*      L D L^T - sigma I by combining the above transforms, and choosing
+*      r as the index where the diagonal of the inverse is (one of the)
+*      largest in magnitude.
+*  (d) Computation of the (scaled) r-th column of the inverse using the
+*      twisted factorization obtained by combining the top part of the
+*      the stationary and the bottom part of the progressive transform.
+*
+*  Arguments
+*  =========
+*
+*  N        (input) INTEGER
+*           The order of the matrix L D L^T.
+*
+*  B1       (input) INTEGER
+*           First index of the submatrix of L D L^T.
+*
+*  BN       (input) INTEGER
+*           Last index of the submatrix of L D L^T.
+*
+*  SIGMA    (input) DOUBLE PRECISION
+*           The shift. Initially, when R = 0, SIGMA should be a good
+*           approximation to an eigenvalue of L D L^T.
+*
+*  L        (input) DOUBLE PRECISION array, dimension (N-1)
+*           The (n-1) subdiagonal elements of the unit bidiagonal matrix
+*           L, in elements 1 to N-1.
+*
+*  D        (input) DOUBLE PRECISION array, dimension (N)
+*           The n diagonal elements of the diagonal matrix D.
+*
+*  LD       (input) DOUBLE PRECISION array, dimension (N-1)
+*           The n-1 elements L(i)*D(i).
+*
+*  LLD      (input) DOUBLE PRECISION array, dimension (N-1)
+*           The n-1 elements L(i)*L(i)*D(i).
+*
+*  GERSCH   (input) DOUBLE PRECISION array, dimension (2*N)
+*           The n Gerschgorin intervals. These are used to restrict
+*           the initial search for R, when R is input as 0.
+*
+*  Z        (output) DOUBLE PRECISION array, dimension (N)
+*           The (scaled) r-th column of the inverse. Z(R) is returned
+*           to be 1.
+*
+*  ZTZ      (output) DOUBLE PRECISION
+*           The square of the norm of Z.
+*
+*  MINGMA   (output) DOUBLE PRECISION
+*           The reciprocal of the largest (in magnitude) diagonal
+*           element of the inverse of L D L^T - sigma I.
+*
+*  R        (input/output) INTEGER
+*           Initially, R should be input to be 0 and is then output as
+*           the index where the diagonal element of the inverse is
+*           largest in magnitude. In later iterations, this same value
+*           of R should be input.
+*
+*  ISUPPZ   (output) INTEGER array, dimension (2)
+*           The support of the vector in Z, i.e., the vector Z is
+*           nonzero only in elements ISUPPZ(1) through ISUPPZ( 2 ).
+*
+*  WORK     (workspace) DOUBLE PRECISION array, dimension (4*N)
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Inderjit Dhillon, IBM Almaden, USA
+*     Osni Marques, LBNL/NERSC, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            BLKSIZ
+      PARAMETER          ( BLKSIZ = 32 )
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            SAWNAN
+      INTEGER            FROM, I, INDP, INDS, INDUMN, J, R1, R2, TO
+      DOUBLE PRECISION   DMINUS, DPLUS, EPS, S, TMP
+*     ..
+*     .. External Functions ..
+      DOUBLE PRECISION   DLAMCH
+      EXTERNAL           DLAMCH
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+      EPS = DLAMCH( 'Precision' )
+      IF( R.EQ.0 ) THEN
+*
+*        Eliminate the top and bottom indices from the possible values
+*        of R where the desired eigenvector is largest in magnitude.
+*
+         R1 = B1
+         DO 10 I = B1, BN
+            IF( SIGMA.GE.GERSCH( 2*I-1 ) .OR. SIGMA.LE.GERSCH( 2*I ) )
+     $           THEN
+               R1 = I
+               GO TO 20
+            END IF
+   10    CONTINUE
+   20    CONTINUE
+         R2 = BN
+         DO 30 I = BN, B1, -1
+            IF( SIGMA.GE.GERSCH( 2*I-1 ) .OR. SIGMA.LE.GERSCH( 2*I ) )
+     $           THEN
+               R2 = I
+               GO TO 40
+            END IF
+   30    CONTINUE
+   40    CONTINUE
+      ELSE
+         R1 = R
+         R2 = R
+      END IF
+*
+      INDUMN = N
+      INDS = 2*N + 1
+      INDP = 3*N + 1
+      SAWNAN = .FALSE.
+*
+*     Compute the stationary transform (using the differential form)
+*     untill the index R2
+*
+      IF( B1.EQ.1 ) THEN
+         WORK( INDS ) = ZERO
+      ELSE
+         WORK( INDS ) = LLD( B1-1 )
+      END IF
+      S = WORK( INDS ) - SIGMA
+      DO 50 I = B1, R2 - 1
+         DPLUS = D( I ) + S
+         WORK( I ) = LD( I ) / DPLUS
+         WORK( INDS+I ) = S*WORK( I )*L( I )
+         S = WORK( INDS+I ) - SIGMA
+   50 CONTINUE
+*
+      IF( .NOT.( S.GT.ZERO .OR. S.LT.ONE ) ) THEN
+*
+*        Run a slower version of the above loop if a NaN is detected
+*
+         SAWNAN = .TRUE.
+         J = B1 + 1
+   60    CONTINUE
+         IF( WORK( INDS+J ).GT.ZERO .OR. WORK( INDS+J ).LT.ONE ) THEN
+            J = J + 1
+            GO TO 60
+         END IF
+         WORK( INDS+J ) = LLD( J )
+         S = WORK( INDS+J ) - SIGMA
+         DO 70 I = J + 1, R2 - 1
+            DPLUS = D( I ) + S
+            WORK( I ) = LD( I ) / DPLUS
+            IF( WORK( I ).EQ.ZERO ) THEN
+               WORK( INDS+I ) = LLD( I )
+            ELSE
+               WORK( INDS+I ) = S*WORK( I )*L( I )
+            END IF
+            S = WORK( INDS+I ) - SIGMA
+   70    CONTINUE
+      END IF
+      WORK( INDP+BN-1 ) = D( BN ) - SIGMA
+      DO 80 I = BN - 1, R1, -1
+         DMINUS = LLD( I ) + WORK( INDP+I )
+         TMP = D( I ) / DMINUS
+         WORK( INDUMN+I ) = L( I )*TMP
+         WORK( INDP+I-1 ) = WORK( INDP+I )*TMP - SIGMA
+   80 CONTINUE
+      TMP = WORK( INDP+R1-1 )
+      IF( .NOT.( TMP.GT.ZERO .OR. TMP.LT.ONE ) ) THEN
+*
+*        Run a slower version of the above loop if a NaN is detected
+*
+         SAWNAN = .TRUE.
+         J = BN - 3
+   90    CONTINUE
+         IF( WORK( INDP+J ).GT.ZERO .OR. WORK( INDP+J ).LT.ONE ) THEN
+            J = J - 1
+            GO TO 90
+         END IF
+         WORK( INDP+J ) = D( J+1 ) - SIGMA
+         DO 100 I = J, R1, -1
+            DMINUS = LLD( I ) + WORK( INDP+I )
+            TMP = D( I ) / DMINUS
+            WORK( INDUMN+I ) = L( I )*TMP
+            IF( TMP.EQ.ZERO ) THEN
+               WORK( INDP+I-1 ) = D( I ) - SIGMA
+            ELSE
+               WORK( INDP+I-1 ) = WORK( INDP+I )*TMP - SIGMA
+            END IF
+  100    CONTINUE
+      END IF
+*
+*     Find the index (from R1 to R2) of the largest (in magnitude)
+*     diagonal element of the inverse
+*
+      MINGMA = WORK( INDS+R1-1 ) + WORK( INDP+R1-1 )
+      IF( MINGMA.EQ.ZERO )
+     $   MINGMA = EPS*WORK( INDS+R1-1 )
+      R = R1
+      DO 110 I = R1, R2 - 1
+         TMP = WORK( INDS+I ) + WORK( INDP+I )
+         IF( TMP.EQ.ZERO )
+     $      TMP = EPS*WORK( INDS+I )
+         IF( ABS( TMP ).LT.ABS( MINGMA ) ) THEN
+            MINGMA = TMP
+            R = I + 1
+         END IF
+  110 CONTINUE
+*
+*     Compute the (scaled) r-th column of the inverse
+*
+      ISUPPZ( 1 ) = B1
+      ISUPPZ( 2 ) = BN
+      Z( R ) = ONE
+      ZTZ = ONE
+      IF( .NOT.SAWNAN ) THEN
+         FROM = R - 1
+         TO = MAX( R-BLKSIZ, B1 )
+  120    CONTINUE
+         IF( FROM.GE.B1 ) THEN
+            DO 130 I = FROM, TO, -1
+               Z( I ) = -( WORK( I )*Z( I+1 ) )
+               ZTZ = ZTZ + Z( I )*Z( I )
+  130       CONTINUE
+            IF( ABS( Z( TO ) ).LE.EPS .AND. ABS( Z( TO+1 ) ).LE.EPS )
+     $           THEN
+               ISUPPZ( 1 ) = TO + 2
+            ELSE
+               FROM = TO - 1
+               TO = MAX( TO-BLKSIZ, B1 )
+               GO TO 120
+            END IF
+         END IF
+         FROM = R + 1
+         TO = MIN( R+BLKSIZ, BN )
+  140    CONTINUE
+         IF( FROM.LE.BN ) THEN
+            DO 150 I = FROM, TO
+               Z( I ) = -( WORK( INDUMN+I-1 )*Z( I-1 ) )
+               ZTZ = ZTZ + Z( I )*Z( I )
+  150       CONTINUE
+            IF( ABS( Z( TO ) ).LE.EPS .AND. ABS( Z( TO-1 ) ).LE.EPS )
+     $           THEN
+               ISUPPZ( 2 ) = TO - 2
+            ELSE
+               FROM = TO + 1
+               TO = MIN( TO+BLKSIZ, BN )
+               GO TO 140
+            END IF
+         END IF
+      ELSE
+         DO 160 I = R - 1, B1, -1
+            IF( Z( I+1 ).EQ.ZERO ) THEN
+               Z( I ) = -( LD( I+1 ) / LD( I ) )*Z( I+2 )
+            ELSE IF( ABS( Z( I+1 ) ).LE.EPS .AND. ABS( Z( I+2 ) ).LE.
+     $               EPS ) THEN
+               ISUPPZ( 1 ) = I + 3
+               GO TO 170
+            ELSE
+               Z( I ) = -( WORK( I )*Z( I+1 ) )
+            END IF
+            ZTZ = ZTZ + Z( I )*Z( I )
+  160    CONTINUE
+  170    CONTINUE
+         DO 180 I = R, BN - 1
+            IF( Z( I ).EQ.ZERO ) THEN
+               Z( I+1 ) = -( LD( I-1 ) / LD( I ) )*Z( I-1 )
+            ELSE IF( ABS( Z( I ) ).LE.EPS .AND. ABS( Z( I-1 ) ).LE.EPS )
+     $                THEN
+               ISUPPZ( 2 ) = I - 2
+               GO TO 190
+            ELSE
+               Z( I+1 ) = -( WORK( INDUMN+I )*Z( I ) )
+            END IF
+            ZTZ = ZTZ + Z( I+1 )*Z( I+1 )
+  180    CONTINUE
+  190    CONTINUE
+      END IF
+      DO 200 I = B1, ISUPPZ( 1 ) - 3
+         Z( I ) = ZERO
+  200 CONTINUE
+      DO 210 I = ISUPPZ( 2 ) + 3, BN
+         Z( I ) = ZERO
+  210 CONTINUE
+*
+      RETURN
+*
+*     End of DLAR1V
+*
+      END
diff --git a/costa/native/external/lapack/dlar2v.f b/costa/native/external/lapack/dlar2v.f
new file mode 100644
index 000000000..83176732e
--- /dev/null
+++ b/costa/native/external/lapack/dlar2v.f
@@ -0,0 +1,87 @@
+      SUBROUTINE DLAR2V( N, X, Y, Z, INCX, C, S, INCC )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      INTEGER            INCC, INCX, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   C( * ), S( * ), X( * ), Y( * ), Z( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLAR2V applies a vector of real plane rotations from both sides to
+*  a sequence of 2-by-2 real symmetric matrices, defined by the elements
+*  of the vectors x, y and z. For i = 1,2,...,n
+*
+*     ( x(i)  z(i) ) := (  c(i)  s(i) ) ( x(i)  z(i) ) ( c(i) -s(i) )
+*     ( z(i)  y(i) )    ( -s(i)  c(i) ) ( z(i)  y(i) ) ( s(i)  c(i) )
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The number of plane rotations to be applied.
+*
+*  X       (input/output) DOUBLE PRECISION array,
+*                         dimension (1+(N-1)*INCX)
+*          The vector x.
+*
+*  Y       (input/output) DOUBLE PRECISION array,
+*                         dimension (1+(N-1)*INCX)
+*          The vector y.
+*
+*  Z       (input/output) DOUBLE PRECISION array,
+*                         dimension (1+(N-1)*INCX)
+*          The vector z.
+*
+*  INCX    (input) INTEGER
+*          The increment between elements of X, Y and Z. INCX > 0.
+*
+*  C       (input) DOUBLE PRECISION array, dimension (1+(N-1)*INCC)
+*          The cosines of the plane rotations.
+*
+*  S       (input) DOUBLE PRECISION array, dimension (1+(N-1)*INCC)
+*          The sines of the plane rotations.
+*
+*  INCC    (input) INTEGER
+*          The increment between elements of C and S. INCC > 0.
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      INTEGER            I, IC, IX
+      DOUBLE PRECISION   CI, SI, T1, T2, T3, T4, T5, T6, XI, YI, ZI
+*     ..
+*     .. Executable Statements ..
+*
+      IX = 1
+      IC = 1
+      DO 10 I = 1, N
+         XI = X( IX )
+         YI = Y( IX )
+         ZI = Z( IX )
+         CI = C( IC )
+         SI = S( IC )
+         T1 = SI*ZI
+         T2 = CI*ZI
+         T3 = T2 - SI*XI
+         T4 = T2 + SI*YI
+         T5 = CI*XI + T1
+         T6 = CI*YI - T1
+         X( IX ) = CI*T5 + SI*T4
+         Y( IX ) = CI*T6 - SI*T3
+         Z( IX ) = CI*T4 - SI*T5
+         IX = IX + INCX
+         IC = IC + INCC
+   10 CONTINUE
+*
+*     End of DLAR2V
+*
+      RETURN
+      END
diff --git a/costa/native/external/lapack/dlarf.f b/costa/native/external/lapack/dlarf.f
new file mode 100644
index 000000000..e81b6092b
--- /dev/null
+++ b/costa/native/external/lapack/dlarf.f
@@ -0,0 +1,116 @@
+      SUBROUTINE DLARF( SIDE, M, N, V, INCV, TAU, C, LDC, WORK )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          SIDE
+      INTEGER            INCV, LDC, M, N
+      DOUBLE PRECISION   TAU
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   C( LDC, * ), V( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLARF applies a real elementary reflector H to a real m by n matrix
+*  C, from either the left or the right. H is represented in the form
+*
+*        H = I - tau * v * v'
+*
+*  where tau is a real scalar and v is a real vector.
+*
+*  If tau = 0, then H is taken to be the unit matrix.
+*
+*  Arguments
+*  =========
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'L': form  H * C
+*          = 'R': form  C * H
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix C.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix C.
+*
+*  V       (input) DOUBLE PRECISION array, dimension
+*                     (1 + (M-1)*abs(INCV)) if SIDE = 'L'
+*                  or (1 + (N-1)*abs(INCV)) if SIDE = 'R'
+*          The vector v in the representation of H. V is not used if
+*          TAU = 0.
+*
+*  INCV    (input) INTEGER
+*          The increment between elements of v. INCV <> 0.
+*
+*  TAU     (input) DOUBLE PRECISION
+*          The value tau in the representation of H.
+*
+*  C       (input/output) DOUBLE PRECISION array, dimension (LDC,N)
+*          On entry, the m by n matrix C.
+*          On exit, C is overwritten by the matrix H * C if SIDE = 'L',
+*          or C * H if SIDE = 'R'.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C. LDC >= max(1,M).
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension
+*                         (N) if SIDE = 'L'
+*                      or (M) if SIDE = 'R'
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DGEMV, DGER
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. Executable Statements ..
+*
+      IF( LSAME( SIDE, 'L' ) ) THEN
+*
+*        Form  H * C
+*
+         IF( TAU.NE.ZERO ) THEN
+*
+*           w := C' * v
+*
+            CALL DGEMV( 'Transpose', M, N, ONE, C, LDC, V, INCV, ZERO,
+     $                  WORK, 1 )
+*
+*           C := C - v * w'
+*
+            CALL DGER( M, N, -TAU, V, INCV, WORK, 1, C, LDC )
+         END IF
+      ELSE
+*
+*        Form  C * H
+*
+         IF( TAU.NE.ZERO ) THEN
+*
+*           w := C * v
+*
+            CALL DGEMV( 'No transpose', M, N, ONE, C, LDC, V, INCV,
+     $                  ZERO, WORK, 1 )
+*
+*           C := C - w * v'
+*
+            CALL DGER( M, N, -TAU, WORK, 1, V, INCV, C, LDC )
+         END IF
+      END IF
+      RETURN
+*
+*     End of DLARF
+*
+      END
diff --git a/costa/native/external/lapack/dlarfb.f b/costa/native/external/lapack/dlarfb.f
new file mode 100644
index 000000000..e36a7fd5f
--- /dev/null
+++ b/costa/native/external/lapack/dlarfb.f
@@ -0,0 +1,588 @@
+      SUBROUTINE DLARFB( SIDE, TRANS, DIRECT, STOREV, M, N, K, V, LDV,
+     $                   T, LDT, C, LDC, WORK, LDWORK )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIRECT, SIDE, STOREV, TRANS
+      INTEGER            K, LDC, LDT, LDV, LDWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   C( LDC, * ), T( LDT, * ), V( LDV, * ),
+     $                   WORK( LDWORK, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLARFB applies a real block reflector H or its transpose H' to a
+*  real m by n matrix C, from either the left or the right.
+*
+*  Arguments
+*  =========
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'L': apply H or H' from the Left
+*          = 'R': apply H or H' from the Right
+*
+*  TRANS   (input) CHARACTER*1
+*          = 'N': apply H (No transpose)
+*          = 'T': apply H' (Transpose)
+*
+*  DIRECT  (input) CHARACTER*1
+*          Indicates how H is formed from a product of elementary
+*          reflectors
+*          = 'F': H = H(1) H(2) . . . H(k) (Forward)
+*          = 'B': H = H(k) . . . H(2) H(1) (Backward)
+*
+*  STOREV  (input) CHARACTER*1
+*          Indicates how the vectors which define the elementary
+*          reflectors are stored:
+*          = 'C': Columnwise
+*          = 'R': Rowwise
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix C.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix C.
+*
+*  K       (input) INTEGER
+*          The order of the matrix T (= the number of elementary
+*          reflectors whose product defines the block reflector).
+*
+*  V       (input) DOUBLE PRECISION array, dimension
+*                                (LDV,K) if STOREV = 'C'
+*                                (LDV,M) if STOREV = 'R' and SIDE = 'L'
+*                                (LDV,N) if STOREV = 'R' and SIDE = 'R'
+*          The matrix V. See further details.
+*
+*  LDV     (input) INTEGER
+*          The leading dimension of the array V.
+*          If STOREV = 'C' and SIDE = 'L', LDV >= max(1,M);
+*          if STOREV = 'C' and SIDE = 'R', LDV >= max(1,N);
+*          if STOREV = 'R', LDV >= K.
+*
+*  T       (input) DOUBLE PRECISION array, dimension (LDT,K)
+*          The triangular k by k matrix T in the representation of the
+*          block reflector.
+*
+*  LDT     (input) INTEGER
+*          The leading dimension of the array T. LDT >= K.
+*
+*  C       (input/output) DOUBLE PRECISION array, dimension (LDC,N)
+*          On entry, the m by n matrix C.
+*          On exit, C is overwritten by H*C or H'*C or C*H or C*H'.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C. LDA >= max(1,M).
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (LDWORK,K)
+*
+*  LDWORK  (input) INTEGER
+*          The leading dimension of the array WORK.
+*          If SIDE = 'L', LDWORK >= max(1,N);
+*          if SIDE = 'R', LDWORK >= max(1,M).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE
+      PARAMETER          ( ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      CHARACTER          TRANST
+      INTEGER            I, J
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DCOPY, DGEMM, DTRMM
+*     ..
+*     .. Executable Statements ..
+*
+*     Quick return if possible
+*
+      IF( M.LE.0 .OR. N.LE.0 )
+     $   RETURN
+*
+      IF( LSAME( TRANS, 'N' ) ) THEN
+         TRANST = 'T'
+      ELSE
+         TRANST = 'N'
+      END IF
+*
+      IF( LSAME( STOREV, 'C' ) ) THEN
+*
+         IF( LSAME( DIRECT, 'F' ) ) THEN
+*
+*           Let  V =  ( V1 )    (first K rows)
+*                     ( V2 )
+*           where  V1  is unit lower triangular.
+*
+            IF( LSAME( SIDE, 'L' ) ) THEN
+*
+*              Form  H * C  or  H' * C  where  C = ( C1 )
+*                                                  ( C2 )
+*
+*              W := C' * V  =  (C1'*V1 + C2'*V2)  (stored in WORK)
+*
+*              W := C1'
+*
+               DO 10 J = 1, K
+                  CALL DCOPY( N, C( J, 1 ), LDC, WORK( 1, J ), 1 )
+   10          CONTINUE
+*
+*              W := W * V1
+*
+               CALL DTRMM( 'Right', 'Lower', 'No transpose', 'Unit', N,
+     $                     K, ONE, V, LDV, WORK, LDWORK )
+               IF( M.GT.K ) THEN
+*
+*                 W := W + C2'*V2
+*
+                  CALL DGEMM( 'Transpose', 'No transpose', N, K, M-K,
+     $                        ONE, C( K+1, 1 ), LDC, V( K+1, 1 ), LDV,
+     $                        ONE, WORK, LDWORK )
+               END IF
+*
+*              W := W * T'  or  W * T
+*
+               CALL DTRMM( 'Right', 'Upper', TRANST, 'Non-unit', N, K,
+     $                     ONE, T, LDT, WORK, LDWORK )
+*
+*              C := C - V * W'
+*
+               IF( M.GT.K ) THEN
+*
+*                 C2 := C2 - V2 * W'
+*
+                  CALL DGEMM( 'No transpose', 'Transpose', M-K, N, K,
+     $                        -ONE, V( K+1, 1 ), LDV, WORK, LDWORK, ONE,
+     $                        C( K+1, 1 ), LDC )
+               END IF
+*
+*              W := W * V1'
+*
+               CALL DTRMM( 'Right', 'Lower', 'Transpose', 'Unit', N, K,
+     $                     ONE, V, LDV, WORK, LDWORK )
+*
+*              C1 := C1 - W'
+*
+               DO 30 J = 1, K
+                  DO 20 I = 1, N
+                     C( J, I ) = C( J, I ) - WORK( I, J )
+   20             CONTINUE
+   30          CONTINUE
+*
+            ELSE IF( LSAME( SIDE, 'R' ) ) THEN
+*
+*              Form  C * H  or  C * H'  where  C = ( C1  C2 )
+*
+*              W := C * V  =  (C1*V1 + C2*V2)  (stored in WORK)
+*
+*              W := C1
+*
+               DO 40 J = 1, K
+                  CALL DCOPY( M, C( 1, J ), 1, WORK( 1, J ), 1 )
+   40          CONTINUE
+*
+*              W := W * V1
+*
+               CALL DTRMM( 'Right', 'Lower', 'No transpose', 'Unit', M,
+     $                     K, ONE, V, LDV, WORK, LDWORK )
+               IF( N.GT.K ) THEN
+*
+*                 W := W + C2 * V2
+*
+                  CALL DGEMM( 'No transpose', 'No transpose', M, K, N-K,
+     $                        ONE, C( 1, K+1 ), LDC, V( K+1, 1 ), LDV,
+     $                        ONE, WORK, LDWORK )
+               END IF
+*
+*              W := W * T  or  W * T'
+*
+               CALL DTRMM( 'Right', 'Upper', TRANS, 'Non-unit', M, K,
+     $                     ONE, T, LDT, WORK, LDWORK )
+*
+*              C := C - W * V'
+*
+               IF( N.GT.K ) THEN
+*
+*                 C2 := C2 - W * V2'
+*
+                  CALL DGEMM( 'No transpose', 'Transpose', M, N-K, K,
+     $                        -ONE, WORK, LDWORK, V( K+1, 1 ), LDV, ONE,
+     $                        C( 1, K+1 ), LDC )
+               END IF
+*
+*              W := W * V1'
+*
+               CALL DTRMM( 'Right', 'Lower', 'Transpose', 'Unit', M, K,
+     $                     ONE, V, LDV, WORK, LDWORK )
+*
+*              C1 := C1 - W
+*
+               DO 60 J = 1, K
+                  DO 50 I = 1, M
+                     C( I, J ) = C( I, J ) - WORK( I, J )
+   50             CONTINUE
+   60          CONTINUE
+            END IF
+*
+         ELSE
+*
+*           Let  V =  ( V1 )
+*                     ( V2 )    (last K rows)
+*           where  V2  is unit upper triangular.
+*
+            IF( LSAME( SIDE, 'L' ) ) THEN
+*
+*              Form  H * C  or  H' * C  where  C = ( C1 )
+*                                                  ( C2 )
+*
+*              W := C' * V  =  (C1'*V1 + C2'*V2)  (stored in WORK)
+*
+*              W := C2'
+*
+               DO 70 J = 1, K
+                  CALL DCOPY( N, C( M-K+J, 1 ), LDC, WORK( 1, J ), 1 )
+   70          CONTINUE
+*
+*              W := W * V2
+*
+               CALL DTRMM( 'Right', 'Upper', 'No transpose', 'Unit', N,
+     $                     K, ONE, V( M-K+1, 1 ), LDV, WORK, LDWORK )
+               IF( M.GT.K ) THEN
+*
+*                 W := W + C1'*V1
+*
+                  CALL DGEMM( 'Transpose', 'No transpose', N, K, M-K,
+     $                        ONE, C, LDC, V, LDV, ONE, WORK, LDWORK )
+               END IF
+*
+*              W := W * T'  or  W * T
+*
+               CALL DTRMM( 'Right', 'Lower', TRANST, 'Non-unit', N, K,
+     $                     ONE, T, LDT, WORK, LDWORK )
+*
+*              C := C - V * W'
+*
+               IF( M.GT.K ) THEN
+*
+*                 C1 := C1 - V1 * W'
+*
+                  CALL DGEMM( 'No transpose', 'Transpose', M-K, N, K,
+     $                        -ONE, V, LDV, WORK, LDWORK, ONE, C, LDC )
+               END IF
+*
+*              W := W * V2'
+*
+               CALL DTRMM( 'Right', 'Upper', 'Transpose', 'Unit', N, K,
+     $                     ONE, V( M-K+1, 1 ), LDV, WORK, LDWORK )
+*
+*              C2 := C2 - W'
+*
+               DO 90 J = 1, K
+                  DO 80 I = 1, N
+                     C( M-K+J, I ) = C( M-K+J, I ) - WORK( I, J )
+   80             CONTINUE
+   90          CONTINUE
+*
+            ELSE IF( LSAME( SIDE, 'R' ) ) THEN
+*
+*              Form  C * H  or  C * H'  where  C = ( C1  C2 )
+*
+*              W := C * V  =  (C1*V1 + C2*V2)  (stored in WORK)
+*
+*              W := C2
+*
+               DO 100 J = 1, K
+                  CALL DCOPY( M, C( 1, N-K+J ), 1, WORK( 1, J ), 1 )
+  100          CONTINUE
+*
+*              W := W * V2
+*
+               CALL DTRMM( 'Right', 'Upper', 'No transpose', 'Unit', M,
+     $                     K, ONE, V( N-K+1, 1 ), LDV, WORK, LDWORK )
+               IF( N.GT.K ) THEN
+*
+*                 W := W + C1 * V1
+*
+                  CALL DGEMM( 'No transpose', 'No transpose', M, K, N-K,
+     $                        ONE, C, LDC, V, LDV, ONE, WORK, LDWORK )
+               END IF
+*
+*              W := W * T  or  W * T'
+*
+               CALL DTRMM( 'Right', 'Lower', TRANS, 'Non-unit', M, K,
+     $                     ONE, T, LDT, WORK, LDWORK )
+*
+*              C := C - W * V'
+*
+               IF( N.GT.K ) THEN
+*
+*                 C1 := C1 - W * V1'
+*
+                  CALL DGEMM( 'No transpose', 'Transpose', M, N-K, K,
+     $                        -ONE, WORK, LDWORK, V, LDV, ONE, C, LDC )
+               END IF
+*
+*              W := W * V2'
+*
+               CALL DTRMM( 'Right', 'Upper', 'Transpose', 'Unit', M, K,
+     $                     ONE, V( N-K+1, 1 ), LDV, WORK, LDWORK )
+*
+*              C2 := C2 - W
+*
+               DO 120 J = 1, K
+                  DO 110 I = 1, M
+                     C( I, N-K+J ) = C( I, N-K+J ) - WORK( I, J )
+  110             CONTINUE
+  120          CONTINUE
+            END IF
+         END IF
+*
+      ELSE IF( LSAME( STOREV, 'R' ) ) THEN
+*
+         IF( LSAME( DIRECT, 'F' ) ) THEN
+*
+*           Let  V =  ( V1  V2 )    (V1: first K columns)
+*           where  V1  is unit upper triangular.
+*
+            IF( LSAME( SIDE, 'L' ) ) THEN
+*
+*              Form  H * C  or  H' * C  where  C = ( C1 )
+*                                                  ( C2 )
+*
+*              W := C' * V'  =  (C1'*V1' + C2'*V2') (stored in WORK)
+*
+*              W := C1'
+*
+               DO 130 J = 1, K
+                  CALL DCOPY( N, C( J, 1 ), LDC, WORK( 1, J ), 1 )
+  130          CONTINUE
+*
+*              W := W * V1'
+*
+               CALL DTRMM( 'Right', 'Upper', 'Transpose', 'Unit', N, K,
+     $                     ONE, V, LDV, WORK, LDWORK )
+               IF( M.GT.K ) THEN
+*
+*                 W := W + C2'*V2'
+*
+                  CALL DGEMM( 'Transpose', 'Transpose', N, K, M-K, ONE,
+     $                        C( K+1, 1 ), LDC, V( 1, K+1 ), LDV, ONE,
+     $                        WORK, LDWORK )
+               END IF
+*
+*              W := W * T'  or  W * T
+*
+               CALL DTRMM( 'Right', 'Upper', TRANST, 'Non-unit', N, K,
+     $                     ONE, T, LDT, WORK, LDWORK )
+*
+*              C := C - V' * W'
+*
+               IF( M.GT.K ) THEN
+*
+*                 C2 := C2 - V2' * W'
+*
+                  CALL DGEMM( 'Transpose', 'Transpose', M-K, N, K, -ONE,
+     $                        V( 1, K+1 ), LDV, WORK, LDWORK, ONE,
+     $                        C( K+1, 1 ), LDC )
+               END IF
+*
+*              W := W * V1
+*
+               CALL DTRMM( 'Right', 'Upper', 'No transpose', 'Unit', N,
+     $                     K, ONE, V, LDV, WORK, LDWORK )
+*
+*              C1 := C1 - W'
+*
+               DO 150 J = 1, K
+                  DO 140 I = 1, N
+                     C( J, I ) = C( J, I ) - WORK( I, J )
+  140             CONTINUE
+  150          CONTINUE
+*
+            ELSE IF( LSAME( SIDE, 'R' ) ) THEN
+*
+*              Form  C * H  or  C * H'  where  C = ( C1  C2 )
+*
+*              W := C * V'  =  (C1*V1' + C2*V2')  (stored in WORK)
+*
+*              W := C1
+*
+               DO 160 J = 1, K
+                  CALL DCOPY( M, C( 1, J ), 1, WORK( 1, J ), 1 )
+  160          CONTINUE
+*
+*              W := W * V1'
+*
+               CALL DTRMM( 'Right', 'Upper', 'Transpose', 'Unit', M, K,
+     $                     ONE, V, LDV, WORK, LDWORK )
+               IF( N.GT.K ) THEN
+*
+*                 W := W + C2 * V2'
+*
+                  CALL DGEMM( 'No transpose', 'Transpose', M, K, N-K,
+     $                        ONE, C( 1, K+1 ), LDC, V( 1, K+1 ), LDV,
+     $                        ONE, WORK, LDWORK )
+               END IF
+*
+*              W := W * T  or  W * T'
+*
+               CALL DTRMM( 'Right', 'Upper', TRANS, 'Non-unit', M, K,
+     $                     ONE, T, LDT, WORK, LDWORK )
+*
+*              C := C - W * V
+*
+               IF( N.GT.K ) THEN
+*
+*                 C2 := C2 - W * V2
+*
+                  CALL DGEMM( 'No transpose', 'No transpose', M, N-K, K,
+     $                        -ONE, WORK, LDWORK, V( 1, K+1 ), LDV, ONE,
+     $                        C( 1, K+1 ), LDC )
+               END IF
+*
+*              W := W * V1
+*
+               CALL DTRMM( 'Right', 'Upper', 'No transpose', 'Unit', M,
+     $                     K, ONE, V, LDV, WORK, LDWORK )
+*
+*              C1 := C1 - W
+*
+               DO 180 J = 1, K
+                  DO 170 I = 1, M
+                     C( I, J ) = C( I, J ) - WORK( I, J )
+  170             CONTINUE
+  180          CONTINUE
+*
+            END IF
+*
+         ELSE
+*
+*           Let  V =  ( V1  V2 )    (V2: last K columns)
+*           where  V2  is unit lower triangular.
+*
+            IF( LSAME( SIDE, 'L' ) ) THEN
+*
+*              Form  H * C  or  H' * C  where  C = ( C1 )
+*                                                  ( C2 )
+*
+*              W := C' * V'  =  (C1'*V1' + C2'*V2') (stored in WORK)
+*
+*              W := C2'
+*
+               DO 190 J = 1, K
+                  CALL DCOPY( N, C( M-K+J, 1 ), LDC, WORK( 1, J ), 1 )
+  190          CONTINUE
+*
+*              W := W * V2'
+*
+               CALL DTRMM( 'Right', 'Lower', 'Transpose', 'Unit', N, K,
+     $                     ONE, V( 1, M-K+1 ), LDV, WORK, LDWORK )
+               IF( M.GT.K ) THEN
+*
+*                 W := W + C1'*V1'
+*
+                  CALL DGEMM( 'Transpose', 'Transpose', N, K, M-K, ONE,
+     $                        C, LDC, V, LDV, ONE, WORK, LDWORK )
+               END IF
+*
+*              W := W * T'  or  W * T
+*
+               CALL DTRMM( 'Right', 'Lower', TRANST, 'Non-unit', N, K,
+     $                     ONE, T, LDT, WORK, LDWORK )
+*
+*              C := C - V' * W'
+*
+               IF( M.GT.K ) THEN
+*
+*                 C1 := C1 - V1' * W'
+*
+                  CALL DGEMM( 'Transpose', 'Transpose', M-K, N, K, -ONE,
+     $                        V, LDV, WORK, LDWORK, ONE, C, LDC )
+               END IF
+*
+*              W := W * V2
+*
+               CALL DTRMM( 'Right', 'Lower', 'No transpose', 'Unit', N,
+     $                     K, ONE, V( 1, M-K+1 ), LDV, WORK, LDWORK )
+*
+*              C2 := C2 - W'
+*
+               DO 210 J = 1, K
+                  DO 200 I = 1, N
+                     C( M-K+J, I ) = C( M-K+J, I ) - WORK( I, J )
+  200             CONTINUE
+  210          CONTINUE
+*
+            ELSE IF( LSAME( SIDE, 'R' ) ) THEN
+*
+*              Form  C * H  or  C * H'  where  C = ( C1  C2 )
+*
+*              W := C * V'  =  (C1*V1' + C2*V2')  (stored in WORK)
+*
+*              W := C2
+*
+               DO 220 J = 1, K
+                  CALL DCOPY( M, C( 1, N-K+J ), 1, WORK( 1, J ), 1 )
+  220          CONTINUE
+*
+*              W := W * V2'
+*
+               CALL DTRMM( 'Right', 'Lower', 'Transpose', 'Unit', M, K,
+     $                     ONE, V( 1, N-K+1 ), LDV, WORK, LDWORK )
+               IF( N.GT.K ) THEN
+*
+*                 W := W + C1 * V1'
+*
+                  CALL DGEMM( 'No transpose', 'Transpose', M, K, N-K,
+     $                        ONE, C, LDC, V, LDV, ONE, WORK, LDWORK )
+               END IF
+*
+*              W := W * T  or  W * T'
+*
+               CALL DTRMM( 'Right', 'Lower', TRANS, 'Non-unit', M, K,
+     $                     ONE, T, LDT, WORK, LDWORK )
+*
+*              C := C - W * V
+*
+               IF( N.GT.K ) THEN
+*
+*                 C1 := C1 - W * V1
+*
+                  CALL DGEMM( 'No transpose', 'No transpose', M, N-K, K,
+     $                        -ONE, WORK, LDWORK, V, LDV, ONE, C, LDC )
+               END IF
+*
+*              W := W * V2
+*
+               CALL DTRMM( 'Right', 'Lower', 'No transpose', 'Unit', M,
+     $                     K, ONE, V( 1, N-K+1 ), LDV, WORK, LDWORK )
+*
+*              C1 := C1 - W
+*
+               DO 240 J = 1, K
+                  DO 230 I = 1, M
+                     C( I, N-K+J ) = C( I, N-K+J ) - WORK( I, J )
+  230             CONTINUE
+  240          CONTINUE
+*
+            END IF
+*
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of DLARFB
+*
+      END
diff --git a/costa/native/external/lapack/dlarfg.f b/costa/native/external/lapack/dlarfg.f
new file mode 100644
index 000000000..0e67a7eb5
--- /dev/null
+++ b/costa/native/external/lapack/dlarfg.f
@@ -0,0 +1,138 @@
+      SUBROUTINE DLARFG( N, ALPHA, X, INCX, TAU )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      INTEGER            INCX, N
+      DOUBLE PRECISION   ALPHA, TAU
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLARFG generates a real elementary reflector H of order n, such
+*  that
+*
+*        H * ( alpha ) = ( beta ),   H' * H = I.
+*            (   x   )   (   0  )
+*
+*  where alpha and beta are scalars, and x is an (n-1)-element real
+*  vector. H is represented in the form
+*
+*        H = I - tau * ( 1 ) * ( 1 v' ) ,
+*                      ( v )
+*
+*  where tau is a real scalar and v is a real (n-1)-element
+*  vector.
+*
+*  If the elements of x are all zero, then tau = 0 and H is taken to be
+*  the unit matrix.
+*
+*  Otherwise  1 <= tau <= 2.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The order of the elementary reflector.
+*
+*  ALPHA   (input/output) DOUBLE PRECISION
+*          On entry, the value alpha.
+*          On exit, it is overwritten with the value beta.
+*
+*  X       (input/output) DOUBLE PRECISION array, dimension
+*                         (1+(N-2)*abs(INCX))
+*          On entry, the vector x.
+*          On exit, it is overwritten with the vector v.
+*
+*  INCX    (input) INTEGER
+*          The increment between elements of X. INCX > 0.
+*
+*  TAU     (output) DOUBLE PRECISION
+*          The value tau.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            J, KNT
+      DOUBLE PRECISION   BETA, RSAFMN, SAFMIN, XNORM
+*     ..
+*     .. External Functions ..
+      DOUBLE PRECISION   DLAMCH, DLAPY2, DNRM2
+      EXTERNAL           DLAMCH, DLAPY2, DNRM2
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, SIGN
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DSCAL
+*     ..
+*     .. Executable Statements ..
+*
+      IF( N.LE.1 ) THEN
+         TAU = ZERO
+         RETURN
+      END IF
+*
+      XNORM = DNRM2( N-1, X, INCX )
+*
+      IF( XNORM.EQ.ZERO ) THEN
+*
+*        H  =  I
+*
+         TAU = ZERO
+      ELSE
+*
+*        general case
+*
+         BETA = -SIGN( DLAPY2( ALPHA, XNORM ), ALPHA )
+         SAFMIN = DLAMCH( 'S' ) / DLAMCH( 'E' )
+         IF( ABS( BETA ).LT.SAFMIN ) THEN
+*
+*           XNORM, BETA may be inaccurate; scale X and recompute them
+*
+            RSAFMN = ONE / SAFMIN
+            KNT = 0
+   10       CONTINUE
+            KNT = KNT + 1
+            CALL DSCAL( N-1, RSAFMN, X, INCX )
+            BETA = BETA*RSAFMN
+            ALPHA = ALPHA*RSAFMN
+            IF( ABS( BETA ).LT.SAFMIN )
+     $         GO TO 10
+*
+*           New BETA is at most 1, at least SAFMIN
+*
+            XNORM = DNRM2( N-1, X, INCX )
+            BETA = -SIGN( DLAPY2( ALPHA, XNORM ), ALPHA )
+            TAU = ( BETA-ALPHA ) / BETA
+            CALL DSCAL( N-1, ONE / ( ALPHA-BETA ), X, INCX )
+*
+*           If ALPHA is subnormal, it may lose relative accuracy
+*
+            ALPHA = BETA
+            DO 20 J = 1, KNT
+               ALPHA = ALPHA*SAFMIN
+   20       CONTINUE
+         ELSE
+            TAU = ( BETA-ALPHA ) / BETA
+            CALL DSCAL( N-1, ONE / ( ALPHA-BETA ), X, INCX )
+            ALPHA = BETA
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of DLARFG
+*
+      END
diff --git a/costa/native/external/lapack/dlarft.f b/costa/native/external/lapack/dlarft.f
new file mode 100644
index 000000000..147b22d95
--- /dev/null
+++ b/costa/native/external/lapack/dlarft.f
@@ -0,0 +1,218 @@
+      SUBROUTINE DLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIRECT, STOREV
+      INTEGER            K, LDT, LDV, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   T( LDT, * ), TAU( * ), V( LDV, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLARFT forms the triangular factor T of a real block reflector H
+*  of order n, which is defined as a product of k elementary reflectors.
+*
+*  If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular;
+*
+*  If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular.
+*
+*  If STOREV = 'C', the vector which defines the elementary reflector
+*  H(i) is stored in the i-th column of the array V, and
+*
+*     H  =  I - V * T * V'
+*
+*  If STOREV = 'R', the vector which defines the elementary reflector
+*  H(i) is stored in the i-th row of the array V, and
+*
+*     H  =  I - V' * T * V
+*
+*  Arguments
+*  =========
+*
+*  DIRECT  (input) CHARACTER*1
+*          Specifies the order in which the elementary reflectors are
+*          multiplied to form the block reflector:
+*          = 'F': H = H(1) H(2) . . . H(k) (Forward)
+*          = 'B': H = H(k) . . . H(2) H(1) (Backward)
+*
+*  STOREV  (input) CHARACTER*1
+*          Specifies how the vectors which define the elementary
+*          reflectors are stored (see also Further Details):
+*          = 'C': columnwise
+*          = 'R': rowwise
+*
+*  N       (input) INTEGER
+*          The order of the block reflector H. N >= 0.
+*
+*  K       (input) INTEGER
+*          The order of the triangular factor T (= the number of
+*          elementary reflectors). K >= 1.
+*
+*  V       (input/output) DOUBLE PRECISION array, dimension
+*                               (LDV,K) if STOREV = 'C'
+*                               (LDV,N) if STOREV = 'R'
+*          The matrix V. See further details.
+*
+*  LDV     (input) INTEGER
+*          The leading dimension of the array V.
+*          If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K.
+*
+*  TAU     (input) DOUBLE PRECISION array, dimension (K)
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i).
+*
+*  T       (output) DOUBLE PRECISION array, dimension (LDT,K)
+*          The k by k triangular factor T of the block reflector.
+*          If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is
+*          lower triangular. The rest of the array is not used.
+*
+*  LDT     (input) INTEGER
+*          The leading dimension of the array T. LDT >= K.
+*
+*  Further Details
+*  ===============
+*
+*  The shape of the matrix V and the storage of the vectors which define
+*  the H(i) is best illustrated by the following example with n = 5 and
+*  k = 3. The elements equal to 1 are not stored; the corresponding
+*  array elements are modified but restored on exit. The rest of the
+*  array is not used.
+*
+*  DIRECT = 'F' and STOREV = 'C':         DIRECT = 'F' and STOREV = 'R':
+*
+*               V = (  1       )                 V = (  1 v1 v1 v1 v1 )
+*                   ( v1  1    )                     (     1 v2 v2 v2 )
+*                   ( v1 v2  1 )                     (        1 v3 v3 )
+*                   ( v1 v2 v3 )
+*                   ( v1 v2 v3 )
+*
+*  DIRECT = 'B' and STOREV = 'C':         DIRECT = 'B' and STOREV = 'R':
+*
+*               V = ( v1 v2 v3 )                 V = ( v1 v1  1       )
+*                   ( v1 v2 v3 )                     ( v2 v2 v2  1    )
+*                   (  1 v2 v3 )                     ( v3 v3 v3 v3  1 )
+*                   (     1 v3 )
+*                   (        1 )
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J
+      DOUBLE PRECISION   VII
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DGEMV, DTRMV
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. Executable Statements ..
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      IF( LSAME( DIRECT, 'F' ) ) THEN
+         DO 20 I = 1, K
+            IF( TAU( I ).EQ.ZERO ) THEN
+*
+*              H(i)  =  I
+*
+               DO 10 J = 1, I
+                  T( J, I ) = ZERO
+   10          CONTINUE
+            ELSE
+*
+*              general case
+*
+               VII = V( I, I )
+               V( I, I ) = ONE
+               IF( LSAME( STOREV, 'C' ) ) THEN
+*
+*                 T(1:i-1,i) := - tau(i) * V(i:n,1:i-1)' * V(i:n,i)
+*
+                  CALL DGEMV( 'Transpose', N-I+1, I-1, -TAU( I ),
+     $                        V( I, 1 ), LDV, V( I, I ), 1, ZERO,
+     $                        T( 1, I ), 1 )
+               ELSE
+*
+*                 T(1:i-1,i) := - tau(i) * V(1:i-1,i:n) * V(i,i:n)'
+*
+                  CALL DGEMV( 'No transpose', I-1, N-I+1, -TAU( I ),
+     $                        V( 1, I ), LDV, V( I, I ), LDV, ZERO,
+     $                        T( 1, I ), 1 )
+               END IF
+               V( I, I ) = VII
+*
+*              T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i)
+*
+               CALL DTRMV( 'Upper', 'No transpose', 'Non-unit', I-1, T,
+     $                     LDT, T( 1, I ), 1 )
+               T( I, I ) = TAU( I )
+            END IF
+   20    CONTINUE
+      ELSE
+         DO 40 I = K, 1, -1
+            IF( TAU( I ).EQ.ZERO ) THEN
+*
+*              H(i)  =  I
+*
+               DO 30 J = I, K
+                  T( J, I ) = ZERO
+   30          CONTINUE
+            ELSE
+*
+*              general case
+*
+               IF( I.LT.K ) THEN
+                  IF( LSAME( STOREV, 'C' ) ) THEN
+                     VII = V( N-K+I, I )
+                     V( N-K+I, I ) = ONE
+*
+*                    T(i+1:k,i) :=
+*                            - tau(i) * V(1:n-k+i,i+1:k)' * V(1:n-k+i,i)
+*
+                     CALL DGEMV( 'Transpose', N-K+I, K-I, -TAU( I ),
+     $                           V( 1, I+1 ), LDV, V( 1, I ), 1, ZERO,
+     $                           T( I+1, I ), 1 )
+                     V( N-K+I, I ) = VII
+                  ELSE
+                     VII = V( I, N-K+I )
+                     V( I, N-K+I ) = ONE
+*
+*                    T(i+1:k,i) :=
+*                            - tau(i) * V(i+1:k,1:n-k+i) * V(i,1:n-k+i)'
+*
+                     CALL DGEMV( 'No transpose', K-I, N-K+I, -TAU( I ),
+     $                           V( I+1, 1 ), LDV, V( I, 1 ), LDV, ZERO,
+     $                           T( I+1, I ), 1 )
+                     V( I, N-K+I ) = VII
+                  END IF
+*
+*                 T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i)
+*
+                  CALL DTRMV( 'Lower', 'No transpose', 'Non-unit', K-I,
+     $                        T( I+1, I+1 ), LDT, T( I+1, I ), 1 )
+               END IF
+               T( I, I ) = TAU( I )
+            END IF
+   40    CONTINUE
+      END IF
+      RETURN
+*
+*     End of DLARFT
+*
+      END
diff --git a/costa/native/external/lapack/dlarfx.f b/costa/native/external/lapack/dlarfx.f
new file mode 100644
index 000000000..178a20ae4
--- /dev/null
+++ b/costa/native/external/lapack/dlarfx.f
@@ -0,0 +1,639 @@
+      SUBROUTINE DLARFX( SIDE, M, N, V, TAU, C, LDC, WORK )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          SIDE
+      INTEGER            LDC, M, N
+      DOUBLE PRECISION   TAU
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   C( LDC, * ), V( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLARFX applies a real elementary reflector H to a real m by n
+*  matrix C, from either the left or the right. H is represented in the
+*  form
+*
+*        H = I - tau * v * v'
+*
+*  where tau is a real scalar and v is a real vector.
+*
+*  If tau = 0, then H is taken to be the unit matrix
+*
+*  This version uses inline code if H has order < 11.
+*
+*  Arguments
+*  =========
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'L': form  H * C
+*          = 'R': form  C * H
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix C.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix C.
+*
+*  V       (input) DOUBLE PRECISION array, dimension (M) if SIDE = 'L'
+*                                     or (N) if SIDE = 'R'
+*          The vector v in the representation of H.
+*
+*  TAU     (input) DOUBLE PRECISION
+*          The value tau in the representation of H.
+*
+*  C       (input/output) DOUBLE PRECISION array, dimension (LDC,N)
+*          On entry, the m by n matrix C.
+*          On exit, C is overwritten by the matrix H * C if SIDE = 'L',
+*          or C * H if SIDE = 'R'.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C. LDA >= (1,M).
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension
+*                      (N) if SIDE = 'L'
+*                      or (M) if SIDE = 'R'
+*          WORK is not referenced if H has order < 11.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            J
+      DOUBLE PRECISION   SUM, T1, T10, T2, T3, T4, T5, T6, T7, T8, T9,
+     $                   V1, V10, V2, V3, V4, V5, V6, V7, V8, V9
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DGEMV, DGER
+*     ..
+*     .. Executable Statements ..
+*
+      IF( TAU.EQ.ZERO )
+     $   RETURN
+      IF( LSAME( SIDE, 'L' ) ) THEN
+*
+*        Form  H * C, where H has order m.
+*
+         GO TO ( 10, 30, 50, 70, 90, 110, 130, 150,
+     $           170, 190 )M
+*
+*        Code for general M
+*
+*        w := C'*v
+*
+         CALL DGEMV( 'Transpose', M, N, ONE, C, LDC, V, 1, ZERO, WORK,
+     $               1 )
+*
+*        C := C - tau * v * w'
+*
+         CALL DGER( M, N, -TAU, V, 1, WORK, 1, C, LDC )
+         GO TO 410
+   10    CONTINUE
+*
+*        Special code for 1 x 1 Householder
+*
+         T1 = ONE - TAU*V( 1 )*V( 1 )
+         DO 20 J = 1, N
+            C( 1, J ) = T1*C( 1, J )
+   20    CONTINUE
+         GO TO 410
+   30    CONTINUE
+*
+*        Special code for 2 x 2 Householder
+*
+         V1 = V( 1 )
+         T1 = TAU*V1
+         V2 = V( 2 )
+         T2 = TAU*V2
+         DO 40 J = 1, N
+            SUM = V1*C( 1, J ) + V2*C( 2, J )
+            C( 1, J ) = C( 1, J ) - SUM*T1
+            C( 2, J ) = C( 2, J ) - SUM*T2
+   40    CONTINUE
+         GO TO 410
+   50    CONTINUE
+*
+*        Special code for 3 x 3 Householder
+*
+         V1 = V( 1 )
+         T1 = TAU*V1
+         V2 = V( 2 )
+         T2 = TAU*V2
+         V3 = V( 3 )
+         T3 = TAU*V3
+         DO 60 J = 1, N
+            SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J )
+            C( 1, J ) = C( 1, J ) - SUM*T1
+            C( 2, J ) = C( 2, J ) - SUM*T2
+            C( 3, J ) = C( 3, J ) - SUM*T3
+   60    CONTINUE
+         GO TO 410
+   70    CONTINUE
+*
+*        Special code for 4 x 4 Householder
+*
+         V1 = V( 1 )
+         T1 = TAU*V1
+         V2 = V( 2 )
+         T2 = TAU*V2
+         V3 = V( 3 )
+         T3 = TAU*V3
+         V4 = V( 4 )
+         T4 = TAU*V4
+         DO 80 J = 1, N
+            SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) +
+     $            V4*C( 4, J )
+            C( 1, J ) = C( 1, J ) - SUM*T1
+            C( 2, J ) = C( 2, J ) - SUM*T2
+            C( 3, J ) = C( 3, J ) - SUM*T3
+            C( 4, J ) = C( 4, J ) - SUM*T4
+   80    CONTINUE
+         GO TO 410
+   90    CONTINUE
+*
+*        Special code for 5 x 5 Householder
+*
+         V1 = V( 1 )
+         T1 = TAU*V1
+         V2 = V( 2 )
+         T2 = TAU*V2
+         V3 = V( 3 )
+         T3 = TAU*V3
+         V4 = V( 4 )
+         T4 = TAU*V4
+         V5 = V( 5 )
+         T5 = TAU*V5
+         DO 100 J = 1, N
+            SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) +
+     $            V4*C( 4, J ) + V5*C( 5, J )
+            C( 1, J ) = C( 1, J ) - SUM*T1
+            C( 2, J ) = C( 2, J ) - SUM*T2
+            C( 3, J ) = C( 3, J ) - SUM*T3
+            C( 4, J ) = C( 4, J ) - SUM*T4
+            C( 5, J ) = C( 5, J ) - SUM*T5
+  100    CONTINUE
+         GO TO 410
+  110    CONTINUE
+*
+*        Special code for 6 x 6 Householder
+*
+         V1 = V( 1 )
+         T1 = TAU*V1
+         V2 = V( 2 )
+         T2 = TAU*V2
+         V3 = V( 3 )
+         T3 = TAU*V3
+         V4 = V( 4 )
+         T4 = TAU*V4
+         V5 = V( 5 )
+         T5 = TAU*V5
+         V6 = V( 6 )
+         T6 = TAU*V6
+         DO 120 J = 1, N
+            SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) +
+     $            V4*C( 4, J ) + V5*C( 5, J ) + V6*C( 6, J )
+            C( 1, J ) = C( 1, J ) - SUM*T1
+            C( 2, J ) = C( 2, J ) - SUM*T2
+            C( 3, J ) = C( 3, J ) - SUM*T3
+            C( 4, J ) = C( 4, J ) - SUM*T4
+            C( 5, J ) = C( 5, J ) - SUM*T5
+            C( 6, J ) = C( 6, J ) - SUM*T6
+  120    CONTINUE
+         GO TO 410
+  130    CONTINUE
+*
+*        Special code for 7 x 7 Householder
+*
+         V1 = V( 1 )
+         T1 = TAU*V1
+         V2 = V( 2 )
+         T2 = TAU*V2
+         V3 = V( 3 )
+         T3 = TAU*V3
+         V4 = V( 4 )
+         T4 = TAU*V4
+         V5 = V( 5 )
+         T5 = TAU*V5
+         V6 = V( 6 )
+         T6 = TAU*V6
+         V7 = V( 7 )
+         T7 = TAU*V7
+         DO 140 J = 1, N
+            SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) +
+     $            V4*C( 4, J ) + V5*C( 5, J ) + V6*C( 6, J ) +
+     $            V7*C( 7, J )
+            C( 1, J ) = C( 1, J ) - SUM*T1
+            C( 2, J ) = C( 2, J ) - SUM*T2
+            C( 3, J ) = C( 3, J ) - SUM*T3
+            C( 4, J ) = C( 4, J ) - SUM*T4
+            C( 5, J ) = C( 5, J ) - SUM*T5
+            C( 6, J ) = C( 6, J ) - SUM*T6
+            C( 7, J ) = C( 7, J ) - SUM*T7
+  140    CONTINUE
+         GO TO 410
+  150    CONTINUE
+*
+*        Special code for 8 x 8 Householder
+*
+         V1 = V( 1 )
+         T1 = TAU*V1
+         V2 = V( 2 )
+         T2 = TAU*V2
+         V3 = V( 3 )
+         T3 = TAU*V3
+         V4 = V( 4 )
+         T4 = TAU*V4
+         V5 = V( 5 )
+         T5 = TAU*V5
+         V6 = V( 6 )
+         T6 = TAU*V6
+         V7 = V( 7 )
+         T7 = TAU*V7
+         V8 = V( 8 )
+         T8 = TAU*V8
+         DO 160 J = 1, N
+            SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) +
+     $            V4*C( 4, J ) + V5*C( 5, J ) + V6*C( 6, J ) +
+     $            V7*C( 7, J ) + V8*C( 8, J )
+            C( 1, J ) = C( 1, J ) - SUM*T1
+            C( 2, J ) = C( 2, J ) - SUM*T2
+            C( 3, J ) = C( 3, J ) - SUM*T3
+            C( 4, J ) = C( 4, J ) - SUM*T4
+            C( 5, J ) = C( 5, J ) - SUM*T5
+            C( 6, J ) = C( 6, J ) - SUM*T6
+            C( 7, J ) = C( 7, J ) - SUM*T7
+            C( 8, J ) = C( 8, J ) - SUM*T8
+  160    CONTINUE
+         GO TO 410
+  170    CONTINUE
+*
+*        Special code for 9 x 9 Householder
+*
+         V1 = V( 1 )
+         T1 = TAU*V1
+         V2 = V( 2 )
+         T2 = TAU*V2
+         V3 = V( 3 )
+         T3 = TAU*V3
+         V4 = V( 4 )
+         T4 = TAU*V4
+         V5 = V( 5 )
+         T5 = TAU*V5
+         V6 = V( 6 )
+         T6 = TAU*V6
+         V7 = V( 7 )
+         T7 = TAU*V7
+         V8 = V( 8 )
+         T8 = TAU*V8
+         V9 = V( 9 )
+         T9 = TAU*V9
+         DO 180 J = 1, N
+            SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) +
+     $            V4*C( 4, J ) + V5*C( 5, J ) + V6*C( 6, J ) +
+     $            V7*C( 7, J ) + V8*C( 8, J ) + V9*C( 9, J )
+            C( 1, J ) = C( 1, J ) - SUM*T1
+            C( 2, J ) = C( 2, J ) - SUM*T2
+            C( 3, J ) = C( 3, J ) - SUM*T3
+            C( 4, J ) = C( 4, J ) - SUM*T4
+            C( 5, J ) = C( 5, J ) - SUM*T5
+            C( 6, J ) = C( 6, J ) - SUM*T6
+            C( 7, J ) = C( 7, J ) - SUM*T7
+            C( 8, J ) = C( 8, J ) - SUM*T8
+            C( 9, J ) = C( 9, J ) - SUM*T9
+  180    CONTINUE
+         GO TO 410
+  190    CONTINUE
+*
+*        Special code for 10 x 10 Householder
+*
+         V1 = V( 1 )
+         T1 = TAU*V1
+         V2 = V( 2 )
+         T2 = TAU*V2
+         V3 = V( 3 )
+         T3 = TAU*V3
+         V4 = V( 4 )
+         T4 = TAU*V4
+         V5 = V( 5 )
+         T5 = TAU*V5
+         V6 = V( 6 )
+         T6 = TAU*V6
+         V7 = V( 7 )
+         T7 = TAU*V7
+         V8 = V( 8 )
+         T8 = TAU*V8
+         V9 = V( 9 )
+         T9 = TAU*V9
+         V10 = V( 10 )
+         T10 = TAU*V10
+         DO 200 J = 1, N
+            SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) +
+     $            V4*C( 4, J ) + V5*C( 5, J ) + V6*C( 6, J ) +
+     $            V7*C( 7, J ) + V8*C( 8, J ) + V9*C( 9, J ) +
+     $            V10*C( 10, J )
+            C( 1, J ) = C( 1, J ) - SUM*T1
+            C( 2, J ) = C( 2, J ) - SUM*T2
+            C( 3, J ) = C( 3, J ) - SUM*T3
+            C( 4, J ) = C( 4, J ) - SUM*T4
+            C( 5, J ) = C( 5, J ) - SUM*T5
+            C( 6, J ) = C( 6, J ) - SUM*T6
+            C( 7, J ) = C( 7, J ) - SUM*T7
+            C( 8, J ) = C( 8, J ) - SUM*T8
+            C( 9, J ) = C( 9, J ) - SUM*T9
+            C( 10, J ) = C( 10, J ) - SUM*T10
+  200    CONTINUE
+         GO TO 410
+      ELSE
+*
+*        Form  C * H, where H has order n.
+*
+         GO TO ( 210, 230, 250, 270, 290, 310, 330, 350,
+     $           370, 390 )N
+*
+*        Code for general N
+*
+*        w := C * v
+*
+         CALL DGEMV( 'No transpose', M, N, ONE, C, LDC, V, 1, ZERO,
+     $               WORK, 1 )
+*
+*        C := C - tau * w * v'
+*
+         CALL DGER( M, N, -TAU, WORK, 1, V, 1, C, LDC )
+         GO TO 410
+  210    CONTINUE
+*
+*        Special code for 1 x 1 Householder
+*
+         T1 = ONE - TAU*V( 1 )*V( 1 )
+         DO 220 J = 1, M
+            C( J, 1 ) = T1*C( J, 1 )
+  220    CONTINUE
+         GO TO 410
+  230    CONTINUE
+*
+*        Special code for 2 x 2 Householder
+*
+         V1 = V( 1 )
+         T1 = TAU*V1
+         V2 = V( 2 )
+         T2 = TAU*V2
+         DO 240 J = 1, M
+            SUM = V1*C( J, 1 ) + V2*C( J, 2 )
+            C( J, 1 ) = C( J, 1 ) - SUM*T1
+            C( J, 2 ) = C( J, 2 ) - SUM*T2
+  240    CONTINUE
+         GO TO 410
+  250    CONTINUE
+*
+*        Special code for 3 x 3 Householder
+*
+         V1 = V( 1 )
+         T1 = TAU*V1
+         V2 = V( 2 )
+         T2 = TAU*V2
+         V3 = V( 3 )
+         T3 = TAU*V3
+         DO 260 J = 1, M
+            SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 )
+            C( J, 1 ) = C( J, 1 ) - SUM*T1
+            C( J, 2 ) = C( J, 2 ) - SUM*T2
+            C( J, 3 ) = C( J, 3 ) - SUM*T3
+  260    CONTINUE
+         GO TO 410
+  270    CONTINUE
+*
+*        Special code for 4 x 4 Householder
+*
+         V1 = V( 1 )
+         T1 = TAU*V1
+         V2 = V( 2 )
+         T2 = TAU*V2
+         V3 = V( 3 )
+         T3 = TAU*V3
+         V4 = V( 4 )
+         T4 = TAU*V4
+         DO 280 J = 1, M
+            SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) +
+     $            V4*C( J, 4 )
+            C( J, 1 ) = C( J, 1 ) - SUM*T1
+            C( J, 2 ) = C( J, 2 ) - SUM*T2
+            C( J, 3 ) = C( J, 3 ) - SUM*T3
+            C( J, 4 ) = C( J, 4 ) - SUM*T4
+  280    CONTINUE
+         GO TO 410
+  290    CONTINUE
+*
+*        Special code for 5 x 5 Householder
+*
+         V1 = V( 1 )
+         T1 = TAU*V1
+         V2 = V( 2 )
+         T2 = TAU*V2
+         V3 = V( 3 )
+         T3 = TAU*V3
+         V4 = V( 4 )
+         T4 = TAU*V4
+         V5 = V( 5 )
+         T5 = TAU*V5
+         DO 300 J = 1, M
+            SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) +
+     $            V4*C( J, 4 ) + V5*C( J, 5 )
+            C( J, 1 ) = C( J, 1 ) - SUM*T1
+            C( J, 2 ) = C( J, 2 ) - SUM*T2
+            C( J, 3 ) = C( J, 3 ) - SUM*T3
+            C( J, 4 ) = C( J, 4 ) - SUM*T4
+            C( J, 5 ) = C( J, 5 ) - SUM*T5
+  300    CONTINUE
+         GO TO 410
+  310    CONTINUE
+*
+*        Special code for 6 x 6 Householder
+*
+         V1 = V( 1 )
+         T1 = TAU*V1
+         V2 = V( 2 )
+         T2 = TAU*V2
+         V3 = V( 3 )
+         T3 = TAU*V3
+         V4 = V( 4 )
+         T4 = TAU*V4
+         V5 = V( 5 )
+         T5 = TAU*V5
+         V6 = V( 6 )
+         T6 = TAU*V6
+         DO 320 J = 1, M
+            SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) +
+     $            V4*C( J, 4 ) + V5*C( J, 5 ) + V6*C( J, 6 )
+            C( J, 1 ) = C( J, 1 ) - SUM*T1
+            C( J, 2 ) = C( J, 2 ) - SUM*T2
+            C( J, 3 ) = C( J, 3 ) - SUM*T3
+            C( J, 4 ) = C( J, 4 ) - SUM*T4
+            C( J, 5 ) = C( J, 5 ) - SUM*T5
+            C( J, 6 ) = C( J, 6 ) - SUM*T6
+  320    CONTINUE
+         GO TO 410
+  330    CONTINUE
+*
+*        Special code for 7 x 7 Householder
+*
+         V1 = V( 1 )
+         T1 = TAU*V1
+         V2 = V( 2 )
+         T2 = TAU*V2
+         V3 = V( 3 )
+         T3 = TAU*V3
+         V4 = V( 4 )
+         T4 = TAU*V4
+         V5 = V( 5 )
+         T5 = TAU*V5
+         V6 = V( 6 )
+         T6 = TAU*V6
+         V7 = V( 7 )
+         T7 = TAU*V7
+         DO 340 J = 1, M
+            SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) +
+     $            V4*C( J, 4 ) + V5*C( J, 5 ) + V6*C( J, 6 ) +
+     $            V7*C( J, 7 )
+            C( J, 1 ) = C( J, 1 ) - SUM*T1
+            C( J, 2 ) = C( J, 2 ) - SUM*T2
+            C( J, 3 ) = C( J, 3 ) - SUM*T3
+            C( J, 4 ) = C( J, 4 ) - SUM*T4
+            C( J, 5 ) = C( J, 5 ) - SUM*T5
+            C( J, 6 ) = C( J, 6 ) - SUM*T6
+            C( J, 7 ) = C( J, 7 ) - SUM*T7
+  340    CONTINUE
+         GO TO 410
+  350    CONTINUE
+*
+*        Special code for 8 x 8 Householder
+*
+         V1 = V( 1 )
+         T1 = TAU*V1
+         V2 = V( 2 )
+         T2 = TAU*V2
+         V3 = V( 3 )
+         T3 = TAU*V3
+         V4 = V( 4 )
+         T4 = TAU*V4
+         V5 = V( 5 )
+         T5 = TAU*V5
+         V6 = V( 6 )
+         T6 = TAU*V6
+         V7 = V( 7 )
+         T7 = TAU*V7
+         V8 = V( 8 )
+         T8 = TAU*V8
+         DO 360 J = 1, M
+            SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) +
+     $            V4*C( J, 4 ) + V5*C( J, 5 ) + V6*C( J, 6 ) +
+     $            V7*C( J, 7 ) + V8*C( J, 8 )
+            C( J, 1 ) = C( J, 1 ) - SUM*T1
+            C( J, 2 ) = C( J, 2 ) - SUM*T2
+            C( J, 3 ) = C( J, 3 ) - SUM*T3
+            C( J, 4 ) = C( J, 4 ) - SUM*T4
+            C( J, 5 ) = C( J, 5 ) - SUM*T5
+            C( J, 6 ) = C( J, 6 ) - SUM*T6
+            C( J, 7 ) = C( J, 7 ) - SUM*T7
+            C( J, 8 ) = C( J, 8 ) - SUM*T8
+  360    CONTINUE
+         GO TO 410
+  370    CONTINUE
+*
+*        Special code for 9 x 9 Householder
+*
+         V1 = V( 1 )
+         T1 = TAU*V1
+         V2 = V( 2 )
+         T2 = TAU*V2
+         V3 = V( 3 )
+         T3 = TAU*V3
+         V4 = V( 4 )
+         T4 = TAU*V4
+         V5 = V( 5 )
+         T5 = TAU*V5
+         V6 = V( 6 )
+         T6 = TAU*V6
+         V7 = V( 7 )
+         T7 = TAU*V7
+         V8 = V( 8 )
+         T8 = TAU*V8
+         V9 = V( 9 )
+         T9 = TAU*V9
+         DO 380 J = 1, M
+            SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) +
+     $            V4*C( J, 4 ) + V5*C( J, 5 ) + V6*C( J, 6 ) +
+     $            V7*C( J, 7 ) + V8*C( J, 8 ) + V9*C( J, 9 )
+            C( J, 1 ) = C( J, 1 ) - SUM*T1
+            C( J, 2 ) = C( J, 2 ) - SUM*T2
+            C( J, 3 ) = C( J, 3 ) - SUM*T3
+            C( J, 4 ) = C( J, 4 ) - SUM*T4
+            C( J, 5 ) = C( J, 5 ) - SUM*T5
+            C( J, 6 ) = C( J, 6 ) - SUM*T6
+            C( J, 7 ) = C( J, 7 ) - SUM*T7
+            C( J, 8 ) = C( J, 8 ) - SUM*T8
+            C( J, 9 ) = C( J, 9 ) - SUM*T9
+  380    CONTINUE
+         GO TO 410
+  390    CONTINUE
+*
+*        Special code for 10 x 10 Householder
+*
+         V1 = V( 1 )
+         T1 = TAU*V1
+         V2 = V( 2 )
+         T2 = TAU*V2
+         V3 = V( 3 )
+         T3 = TAU*V3
+         V4 = V( 4 )
+         T4 = TAU*V4
+         V5 = V( 5 )
+         T5 = TAU*V5
+         V6 = V( 6 )
+         T6 = TAU*V6
+         V7 = V( 7 )
+         T7 = TAU*V7
+         V8 = V( 8 )
+         T8 = TAU*V8
+         V9 = V( 9 )
+         T9 = TAU*V9
+         V10 = V( 10 )
+         T10 = TAU*V10
+         DO 400 J = 1, M
+            SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) +
+     $            V4*C( J, 4 ) + V5*C( J, 5 ) + V6*C( J, 6 ) +
+     $            V7*C( J, 7 ) + V8*C( J, 8 ) + V9*C( J, 9 ) +
+     $            V10*C( J, 10 )
+            C( J, 1 ) = C( J, 1 ) - SUM*T1
+            C( J, 2 ) = C( J, 2 ) - SUM*T2
+            C( J, 3 ) = C( J, 3 ) - SUM*T3
+            C( J, 4 ) = C( J, 4 ) - SUM*T4
+            C( J, 5 ) = C( J, 5 ) - SUM*T5
+            C( J, 6 ) = C( J, 6 ) - SUM*T6
+            C( J, 7 ) = C( J, 7 ) - SUM*T7
+            C( J, 8 ) = C( J, 8 ) - SUM*T8
+            C( J, 9 ) = C( J, 9 ) - SUM*T9
+            C( J, 10 ) = C( J, 10 ) - SUM*T10
+  400    CONTINUE
+         GO TO 410
+      END IF
+  410 CONTINUE
+      RETURN
+*
+*     End of DLARFX
+*
+      END
diff --git a/costa/native/external/lapack/dlargv.f b/costa/native/external/lapack/dlargv.f
new file mode 100644
index 000000000..6b1c10411
--- /dev/null
+++ b/costa/native/external/lapack/dlargv.f
@@ -0,0 +1,100 @@
+      SUBROUTINE DLARGV( N, X, INCX, Y, INCY, C, INCC )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INCC, INCX, INCY, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   C( * ), X( * ), Y( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLARGV generates a vector of real plane rotations, determined by
+*  elements of the real vectors x and y. For i = 1,2,...,n
+*
+*     (  c(i)  s(i) ) ( x(i) ) = ( a(i) )
+*     ( -s(i)  c(i) ) ( y(i) ) = (   0  )
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The number of plane rotations to be generated.
+*
+*  X       (input/output) DOUBLE PRECISION array,
+*                         dimension (1+(N-1)*INCX)
+*          On entry, the vector x.
+*          On exit, x(i) is overwritten by a(i), for i = 1,...,n.
+*
+*  INCX    (input) INTEGER
+*          The increment between elements of X. INCX > 0.
+*
+*  Y       (input/output) DOUBLE PRECISION array,
+*                         dimension (1+(N-1)*INCY)
+*          On entry, the vector y.
+*          On exit, the sines of the plane rotations.
+*
+*  INCY    (input) INTEGER
+*          The increment between elements of Y. INCY > 0.
+*
+*  C       (output) DOUBLE PRECISION array, dimension (1+(N-1)*INCC)
+*          The cosines of the plane rotations.
+*
+*  INCC    (input) INTEGER
+*          The increment between elements of C. INCC > 0.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, IC, IX, IY
+      DOUBLE PRECISION   F, G, T, TT
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+      IX = 1
+      IY = 1
+      IC = 1
+      DO 10 I = 1, N
+         F = X( IX )
+         G = Y( IY )
+         IF( G.EQ.ZERO ) THEN
+            C( IC ) = ONE
+         ELSE IF( F.EQ.ZERO ) THEN
+            C( IC ) = ZERO
+            Y( IY ) = ONE
+            X( IX ) = G
+         ELSE IF( ABS( F ).GT.ABS( G ) ) THEN
+            T = G / F
+            TT = SQRT( ONE+T*T )
+            C( IC ) = ONE / TT
+            Y( IY ) = T*C( IC )
+            X( IX ) = F*TT
+         ELSE
+            T = F / G
+            TT = SQRT( ONE+T*T )
+            Y( IY ) = ONE / TT
+            C( IC ) = T*Y( IY )
+            X( IX ) = G*TT
+         END IF
+         IC = IC + INCC
+         IY = IY + INCY
+         IX = IX + INCX
+   10 CONTINUE
+      RETURN
+*
+*     End of DLARGV
+*
+      END
diff --git a/costa/native/external/lapack/dlarnv.f b/costa/native/external/lapack/dlarnv.f
new file mode 100644
index 000000000..46928d5fe
--- /dev/null
+++ b/costa/native/external/lapack/dlarnv.f
@@ -0,0 +1,116 @@
+      SUBROUTINE DLARNV( IDIST, ISEED, N, X )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      INTEGER            IDIST, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            ISEED( 4 )
+      DOUBLE PRECISION   X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLARNV returns a vector of n random real numbers from a uniform or
+*  normal distribution.
+*
+*  Arguments
+*  =========
+*
+*  IDIST   (input) INTEGER
+*          Specifies the distribution of the random numbers:
+*          = 1:  uniform (0,1)
+*          = 2:  uniform (-1,1)
+*          = 3:  normal (0,1)
+*
+*  ISEED   (input/output) INTEGER array, dimension (4)
+*          On entry, the seed of the random number generator; the array
+*          elements must be between 0 and 4095, and ISEED(4) must be
+*          odd.
+*          On exit, the seed is updated.
+*
+*  N       (input) INTEGER
+*          The number of random numbers to be generated.
+*
+*  X       (output) DOUBLE PRECISION array, dimension (N)
+*          The generated random numbers.
+*
+*  Further Details
+*  ===============
+*
+*  This routine calls the auxiliary routine DLARUV to generate random
+*  real numbers from a uniform (0,1) distribution, in batches of up to
+*  128 using vectorisable code. The Box-Muller method is used to
+*  transform numbers from a uniform to a normal distribution.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, TWO
+      PARAMETER          ( ONE = 1.0D+0, TWO = 2.0D+0 )
+      INTEGER            LV
+      PARAMETER          ( LV = 128 )
+      DOUBLE PRECISION   TWOPI
+      PARAMETER          ( TWOPI = 6.2831853071795864769252867663D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, IL, IL2, IV
+*     ..
+*     .. Local Arrays ..
+      DOUBLE PRECISION   U( LV )
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          COS, LOG, MIN, SQRT
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLARUV
+*     ..
+*     .. Executable Statements ..
+*
+      DO 40 IV = 1, N, LV / 2
+         IL = MIN( LV / 2, N-IV+1 )
+         IF( IDIST.EQ.3 ) THEN
+            IL2 = 2*IL
+         ELSE
+            IL2 = IL
+         END IF
+*
+*        Call DLARUV to generate IL2 numbers from a uniform (0,1)
+*        distribution (IL2 <= LV)
+*
+         CALL DLARUV( ISEED, IL2, U )
+*
+         IF( IDIST.EQ.1 ) THEN
+*
+*           Copy generated numbers
+*
+            DO 10 I = 1, IL
+               X( IV+I-1 ) = U( I )
+   10       CONTINUE
+         ELSE IF( IDIST.EQ.2 ) THEN
+*
+*           Convert generated numbers to uniform (-1,1) distribution
+*
+            DO 20 I = 1, IL
+               X( IV+I-1 ) = TWO*U( I ) - ONE
+   20       CONTINUE
+         ELSE IF( IDIST.EQ.3 ) THEN
+*
+*           Convert generated numbers to normal (0,1) distribution
+*
+            DO 30 I = 1, IL
+               X( IV+I-1 ) = SQRT( -TWO*LOG( U( 2*I-1 ) ) )*
+     $                       COS( TWOPI*U( 2*I ) )
+   30       CONTINUE
+         END IF
+   40 CONTINUE
+      RETURN
+*
+*     End of DLARNV
+*
+      END
diff --git a/costa/native/external/lapack/dlarrb.f b/costa/native/external/lapack/dlarrb.f
new file mode 100644
index 000000000..01ad7bd23
--- /dev/null
+++ b/costa/native/external/lapack/dlarrb.f
@@ -0,0 +1,282 @@
+      SUBROUTINE DLARRB( N, D, L, LD, LLD, IFIRST, ILAST, SIGMA, RELTOL,
+     $                   W, WGAP, WERR, WORK, IWORK, INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            IFIRST, ILAST, INFO, N
+      DOUBLE PRECISION   RELTOL, SIGMA
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      DOUBLE PRECISION   D( * ), L( * ), LD( * ), LLD( * ), W( * ),
+     $                   WERR( * ), WGAP( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  Given the relatively robust representation(RRR) L D L^T, DLARRB
+*  does ``limited'' bisection to locate the eigenvalues of L D L^T,
+*  W( IFIRST ) thru' W( ILAST ), to more accuracy. Intervals
+*  [left, right] are maintained by storing their mid-points and
+*  semi-widths in the arrays W and WERR respectively.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The order of the matrix.
+*
+*  D       (input) DOUBLE PRECISION array, dimension (N)
+*          The n diagonal elements of the diagonal matrix D.
+*
+*  L       (input) DOUBLE PRECISION array, dimension (N-1)
+*          The n-1 subdiagonal elements of the unit bidiagonal matrix L.
+*
+*  LD      (input) DOUBLE PRECISION array, dimension (N-1)
+*          The n-1 elements L(i)*D(i).
+*
+*  LLD     (input) DOUBLE PRECISION array, dimension (N-1)
+*          The n-1 elements L(i)*L(i)*D(i).
+*
+*  IFIRST  (input) INTEGER
+*          The index of the first eigenvalue in the cluster.
+*
+*  ILAST   (input) INTEGER
+*          The index of the last eigenvalue in the cluster.
+*
+*  SIGMA   (input) DOUBLE PRECISION
+*          The shift used to form L D L^T (see DLARRF).
+*
+*  RELTOL  (input) DOUBLE PRECISION
+*          The relative tolerance.
+*
+*  W       (input/output) DOUBLE PRECISION array, dimension (N)
+*          On input, W( IFIRST ) thru' W( ILAST ) are estimates of the
+*          corresponding eigenvalues of L D L^T.
+*          On output, these estimates are ``refined''.
+*
+*  WGAP    (input/output) DOUBLE PRECISION array, dimension (N)
+*          The gaps between the eigenvalues of L D L^T. Very small
+*          gaps are changed on output.
+*
+*  WERR    (input/output) DOUBLE PRECISION array, dimension (N)
+*          On input, WERR( IFIRST ) thru' WERR( ILAST ) are the errors
+*          in the estimates W( IFIRST ) thru' W( ILAST ).
+*          On output, these are the ``refined'' errors.
+*
+*****Reminder to Inder --- WORK is never used in this subroutine *****
+*  WORK    (input) DOUBLE PRECISION array, dimension (???)
+*          Workspace.
+*
+*  IWORK   (input) INTEGER array, dimension (2*N)
+*          Workspace.
+*
+*****Reminder to Inder --- INFO is never set in this subroutine ******
+*  INFO    (output) INTEGER
+*          Error flag.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Inderjit Dhillon, IBM Almaden, USA
+*     Osni Marques, LBNL/NERSC, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, TWO, HALF
+      PARAMETER          ( ZERO = 0.0D0, TWO = 2.0D0, HALF = 0.5D0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            CNT, I, I1, I2, INITI1, INITI2, J, K, NCNVRG,
+     $                   NEIG, NINT, NRIGHT, OLNINT
+      DOUBLE PRECISION   DELTA, EPS, GAP, LEFT, MID, PERT, RIGHT, S,
+     $                   THRESH, TMP, WIDTH
+*     ..
+*     .. External Functions ..
+      DOUBLE PRECISION   DLAMCH
+      EXTERNAL           DLAMCH
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+      EPS = DLAMCH( 'Precision' )
+      I1 = IFIRST
+      I2 = IFIRST
+      NEIG = ILAST - IFIRST + 1
+      NCNVRG = 0
+      THRESH = RELTOL
+      DO 10 I = IFIRST, ILAST
+         IWORK( I ) = 0
+         PERT = EPS*( ABS( SIGMA )+ABS( W( I ) ) )
+         WERR( I ) = WERR( I ) + PERT
+         IF( WGAP( I ).LT.PERT )
+     $      WGAP( I ) = PERT
+   10 CONTINUE
+      DO 20 I = I1, ILAST
+         IF( I.EQ.1 ) THEN
+            GAP = WGAP( I )
+         ELSE IF( I.EQ.N ) THEN
+            GAP = WGAP( I-1 )
+         ELSE
+            GAP = MIN( WGAP( I-1 ), WGAP( I ) )
+         END IF
+         IF( WERR( I ).LT.THRESH*GAP ) THEN
+            NCNVRG = NCNVRG + 1
+            IWORK( I ) = 1
+            IF( I1.EQ.I )
+     $         I1 = I1 + 1
+         ELSE
+            I2 = I
+         END IF
+   20 CONTINUE
+*
+*     Initialize the unconverged intervals.
+*
+      I = I1
+      NINT = 0
+      RIGHT = ZERO
+   30 CONTINUE
+      IF( I.LE.I2 ) THEN
+         IF( IWORK( I ).EQ.0 ) THEN
+            DELTA = EPS
+            LEFT = W( I ) - WERR( I )
+*
+*           Do while( CNT(LEFT).GT.I-1 )
+*
+   40       CONTINUE
+            IF( I.GT.I1 .AND. LEFT.LE.RIGHT ) THEN
+               LEFT = RIGHT
+               CNT = I - 1
+            ELSE
+               S = -LEFT
+               CNT = 0
+               DO 50 J = 1, N - 1
+                  TMP = D( J ) + S
+                  S = S*( LD( J ) / TMP )*L( J ) - LEFT
+                  IF( TMP.LT.ZERO )
+     $               CNT = CNT + 1
+   50          CONTINUE
+               TMP = D( N ) + S
+               IF( TMP.LT.ZERO )
+     $            CNT = CNT + 1
+               IF( CNT.GT.I-1 ) THEN
+                  DELTA = TWO*DELTA
+                  LEFT = LEFT - ( ABS( SIGMA )+ABS( LEFT ) )*DELTA
+                  GO TO 40
+               END IF
+            END IF
+            DELTA = EPS
+            RIGHT = W( I ) + WERR( I )
+*
+*           Do while( CNT(RIGHT).LT.I )
+*
+   60       CONTINUE
+            S = -RIGHT
+            CNT = 0
+            DO 70 J = 1, N - 1
+               TMP = D( J ) + S
+               S = S*( LD( J ) / TMP )*L( J ) - RIGHT
+               IF( TMP.LT.ZERO )
+     $            CNT = CNT + 1
+   70       CONTINUE
+            TMP = D( N ) + S
+            IF( TMP.LT.ZERO )
+     $         CNT = CNT + 1
+            IF( CNT.LT.I ) THEN
+               DELTA = TWO*DELTA
+               RIGHT = RIGHT + ( ABS( SIGMA )+ABS( RIGHT ) )*DELTA
+               GO TO 60
+            END IF
+            WERR( I ) = LEFT
+            W( I ) = RIGHT
+            IWORK( N+I ) = CNT
+            NINT = NINT + 1
+            I = CNT + 1
+         ELSE
+            I = I + 1
+         END IF
+         GO TO 30
+      END IF
+*
+*     While( NCNVRG.LT.NEIG )
+*
+      INITI1 = I1
+      INITI2 = I2
+   80 CONTINUE
+      IF( NCNVRG.LT.NEIG ) THEN
+         OLNINT = NINT
+         I = I1
+         DO 100 K = 1, OLNINT
+            NRIGHT = IWORK( N+I )
+            IF( IWORK( I ).EQ.0 ) THEN
+               MID = HALF*( WERR( I )+W( I ) )
+               S = -MID
+               CNT = 0
+               DO 90 J = 1, N - 1
+                  TMP = D( J ) + S
+                  S = S*( LD( J ) / TMP )*L( J ) - MID
+                  IF( TMP.LT.ZERO )
+     $               CNT = CNT + 1
+   90          CONTINUE
+               TMP = D( N ) + S
+               IF( TMP.LT.ZERO )
+     $            CNT = CNT + 1
+               CNT = MAX( I-1, MIN( NRIGHT, CNT ) )
+               IF( I.EQ.NRIGHT ) THEN
+                  IF( I.EQ.IFIRST ) THEN
+                     GAP = WERR( I+1 ) - W( I )
+                  ELSE IF( I.EQ.ILAST ) THEN
+                     GAP = WERR( I ) - W( I-1 )
+                  ELSE
+                     GAP = MIN( WERR( I+1 )-W( I ), WERR( I )-W( I-1 ) )
+                  END IF
+                  WIDTH = W( I ) - MID
+                  IF( WIDTH.LT.THRESH*GAP ) THEN
+                     NCNVRG = NCNVRG + 1
+                     IWORK( I ) = 1
+                     IF( I1.EQ.I ) THEN
+                        I1 = I1 + 1
+                        NINT = NINT - 1
+                     END IF
+                  END IF
+               END IF
+               IF( IWORK( I ).EQ.0 )
+     $            I2 = K
+               IF( CNT.EQ.I-1 ) THEN
+                  WERR( I ) = MID
+               ELSE IF( CNT.EQ.NRIGHT ) THEN
+                  W( I ) = MID
+               ELSE
+                  IWORK( N+I ) = CNT
+                  NINT = NINT + 1
+                  WERR( CNT+1 ) = MID
+                  W( CNT+1 ) = W( I )
+                  W( I ) = MID
+                  I = CNT + 1
+                  IWORK( N+I ) = NRIGHT
+               END IF
+            END IF
+            I = NRIGHT + 1
+  100    CONTINUE
+         NINT = NINT - OLNINT + I2
+         GO TO 80
+      END IF
+      DO 110 I = INITI1, INITI2
+         W( I ) = HALF*( WERR( I )+W( I ) )
+         WERR( I ) = W( I ) - WERR( I )
+  110 CONTINUE
+*
+      RETURN
+*
+*     End of DLARRB
+*
+      END
diff --git a/costa/native/external/lapack/dlarre.f b/costa/native/external/lapack/dlarre.f
new file mode 100644
index 000000000..d15d5cbdb
--- /dev/null
+++ b/costa/native/external/lapack/dlarre.f
@@ -0,0 +1,318 @@
+      SUBROUTINE DLARRE( N, D, E, TOL, NSPLIT, ISPLIT, M, W, WOFF,
+     $                   GERSCH, WORK, INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, M, N, NSPLIT
+      DOUBLE PRECISION   TOL
+*     ..
+*     .. Array Arguments ..
+      INTEGER            ISPLIT( * )
+      DOUBLE PRECISION   D( * ), E( * ), GERSCH( * ), W( * ), WOFF( * ),
+     $                   WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  Given the tridiagonal matrix T, DLARRE sets "small" off-diagonal
+*  elements to zero, and for each unreduced block T_i, it finds
+*  (i) the numbers sigma_i
+*  (ii) the base T_i - sigma_i I = L_i D_i L_i^T representations and
+*  (iii) eigenvalues of each L_i D_i L_i^T.
+*  The representations and eigenvalues found are then used by
+*  DSTEGR to compute the eigenvectors of a symmetric tridiagonal
+*  matrix. Currently, the base representations are limited to being
+*  positive or negative definite, and the eigenvalues of the definite
+*  matrices are found by the dqds algorithm (subroutine DLASQ2). As
+*  an added benefit, DLARRE also outputs the n Gerschgorin
+*  intervals for each L_i D_i L_i^T.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The order of the matrix.
+*
+*  D       (input/output) DOUBLE PRECISION array, dimension (N)
+*          On entry, the n diagonal elements of the tridiagonal
+*          matrix T.
+*          On exit, the n diagonal elements of the diagonal
+*          matrices D_i.
+*
+*  E       (input/output) DOUBLE PRECISION array, dimension (N)
+*          On entry, the (n-1) subdiagonal elements of the tridiagonal
+*          matrix T; E(N) need not be set.
+*          On exit, the subdiagonal elements of the unit bidiagonal
+*          matrices L_i.
+*
+*  TOL     (input) DOUBLE PRECISION
+*          The threshold for splitting. If on input |E(i)| < TOL, then
+*          the matrix T is split into smaller blocks.
+*
+*  NSPLIT  (input) INTEGER
+*          The number of blocks T splits into. 1 <= NSPLIT <= N.
+*
+*  ISPLIT  (output) INTEGER array, dimension (2*N)
+*          The splitting points, at which T breaks up into submatrices.
+*          The first submatrix consists of rows/columns 1 to ISPLIT(1),
+*          the second of rows/columns ISPLIT(1)+1 through ISPLIT(2),
+*          etc., and the NSPLIT-th consists of rows/columns
+*          ISPLIT(NSPLIT-1)+1 through ISPLIT(NSPLIT)=N.
+*
+*  M       (output) INTEGER
+*          The total number of eigenvalues (of all the L_i D_i L_i^T)
+*          found.
+*
+*  W       (output) DOUBLE PRECISION array, dimension (N)
+*          The first M elements contain the eigenvalues. The
+*          eigenvalues of each of the blocks, L_i D_i L_i^T, are
+*          sorted in ascending order.
+*
+*  WOFF    (output) DOUBLE PRECISION array, dimension (N)
+*          The NSPLIT base points sigma_i.
+*
+*  GERSCH  (output) DOUBLE PRECISION array, dimension (2*N)
+*          The n Gerschgorin intervals.
+*
+*  WORK    (input) DOUBLE PRECISION array, dimension (4*N???)
+*          Workspace.
+*
+*  INFO    (output) INTEGER
+*          Output error code from DLASQ2
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Inderjit Dhillon, IBM Almaden, USA
+*     Osni Marques, LBNL/NERSC, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE, TWO, FOUR, FOURTH
+      PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0, TWO = 2.0D0,
+     $                   FOUR = 4.0D0, FOURTH = ONE / FOUR )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            CNT, I, IBEGIN, IEND, IN, J, JBLK, MAXCNT
+      DOUBLE PRECISION   DELTA, EPS, GL, GU, NRM, OFFD, S, SGNDEF,
+     $                   SIGMA, TAU, TMP1, WIDTH
+*     ..
+*     .. External Functions ..
+      DOUBLE PRECISION   DLAMCH
+      EXTERNAL           DLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DCOPY, DLASQ2
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+      EPS = DLAMCH( 'Precision' )
+*
+*     Compute Splitting Points
+*
+      NSPLIT = 1
+      DO 10 I = 1, N - 1
+         IF( ABS( E( I ) ).LE.TOL ) THEN
+            ISPLIT( NSPLIT ) = I
+            NSPLIT = NSPLIT + 1
+         END IF
+   10 CONTINUE
+      ISPLIT( NSPLIT ) = N
+*
+      IBEGIN = 1
+      DO 170 JBLK = 1, NSPLIT
+         IEND = ISPLIT( JBLK )
+         IF( IBEGIN.EQ.IEND ) THEN
+            W( IBEGIN ) = D( IBEGIN )
+            WOFF( JBLK ) = ZERO
+            IBEGIN = IEND + 1
+            GO TO 170
+         END IF
+         IN = IEND - IBEGIN + 1
+*
+*        Form the n Gerschgorin intervals
+*
+         GL = D( IBEGIN ) - ABS( E( IBEGIN ) )
+         GU = D( IBEGIN ) + ABS( E( IBEGIN ) )
+         GERSCH( 2*IBEGIN-1 ) = GL
+         GERSCH( 2*IBEGIN ) = GU
+         GERSCH( 2*IEND-1 ) = D( IEND ) - ABS( E( IEND-1 ) )
+         GERSCH( 2*IEND ) = D( IEND ) + ABS( E( IEND-1 ) )
+         GL = MIN( GERSCH( 2*IEND-1 ), GL )
+         GU = MAX( GERSCH( 2*IEND ), GU )
+         DO 20 I = IBEGIN + 1, IEND - 1
+            OFFD = ABS( E( I-1 ) ) + ABS( E( I ) )
+            GERSCH( 2*I-1 ) = D( I ) - OFFD
+            GL = MIN( GERSCH( 2*I-1 ), GL )
+            GERSCH( 2*I ) = D( I ) + OFFD
+            GU = MAX( GERSCH( 2*I ), GU )
+   20    CONTINUE
+         NRM = MAX( ABS( GL ), ABS( GU ) )
+*
+*        Find the number SIGMA where the base representation
+*        T - sigma I = L D L^T is to be formed.
+*
+         WIDTH = GU - GL
+         DO 30 I = IBEGIN, IEND - 1
+            WORK( I ) = E( I )*E( I )
+   30    CONTINUE
+         DO 50 J = 1, 2
+            IF( J.EQ.1 ) THEN
+               TAU = GL + FOURTH*WIDTH
+            ELSE
+               TAU = GU - FOURTH*WIDTH
+            END IF
+            TMP1 = D( IBEGIN ) - TAU
+            IF( TMP1.LT.ZERO ) THEN
+               CNT = 1
+            ELSE
+               CNT = 0
+            END IF
+            DO 40 I = IBEGIN + 1, IEND
+               TMP1 = D( I ) - TAU - WORK( I-1 ) / TMP1
+               IF( TMP1.LT.ZERO )
+     $            CNT = CNT + 1
+   40       CONTINUE
+            IF( CNT.EQ.0 ) THEN
+               GL = TAU
+            ELSE IF( CNT.EQ.IN ) THEN
+               GU = TAU
+            END IF
+            IF( J.EQ.1 ) THEN
+               MAXCNT = CNT
+               SIGMA = GL
+               SGNDEF = ONE
+            ELSE
+               IF( IN-CNT.GT.MAXCNT ) THEN
+                  SIGMA = GU
+                  SGNDEF = -ONE
+               END IF
+            END IF
+   50    CONTINUE
+*
+*        Find the base L D L^T representation
+*
+         WORK( 3*IN ) = ONE
+         DELTA = EPS
+         TAU = SGNDEF*NRM
+   60    CONTINUE
+         SIGMA = SIGMA - DELTA*TAU
+         WORK( 1 ) = D( IBEGIN ) - SIGMA
+         J = IBEGIN
+         DO 70 I = 1, IN - 1
+            WORK( 2*IN+I ) = ONE / WORK( 2*I-1 )
+            TMP1 = E( J )*WORK( 2*IN+I )
+            WORK( 2*I+1 ) = ( D( J+1 )-SIGMA ) - TMP1*E( J )
+            WORK( 2*I ) = TMP1
+            J = J + 1
+   70    CONTINUE
+         DO 80 I = IN, 1, -1
+            TMP1 = SGNDEF*WORK( 2*I-1 )
+            IF( TMP1.LT.ZERO .OR. WORK( 2*IN+I ).EQ.ZERO .OR. .NOT.
+     $          ( TMP1.GT.ZERO .OR. TMP1.LT.ONE ) ) THEN
+               DELTA = TWO*DELTA
+               GO TO 60
+            END IF
+            J = J - 1
+   80    CONTINUE
+*
+         J = IBEGIN
+         D( IBEGIN ) = WORK( 1 )
+         WORK( 1 ) = ABS( WORK( 1 ) )
+         DO 90 I = 1, IN - 1
+            TMP1 = E( J )
+            E( J ) = WORK( 2*I )
+            WORK( 2*I ) = ABS( TMP1*WORK( 2*I ) )
+            J = J + 1
+            D( J ) = WORK( 2*I+1 )
+            WORK( 2*I+1 ) = ABS( WORK( 2*I+1 ) )
+   90    CONTINUE
+*
+         CALL DLASQ2( IN, WORK, INFO )
+*
+         TAU = SGNDEF*WORK( IN )
+         WORK( 3*IN ) = ONE
+         DELTA = TWO*EPS
+  100    CONTINUE
+         TAU = TAU*( ONE-DELTA )
+*
+         S = -TAU
+         J = IBEGIN
+         DO 110 I = 1, IN - 1
+            WORK( I ) = D( J ) + S
+            WORK( 2*IN+I ) = ONE / WORK( I )
+*           WORK( N+I ) = ( E( I ) * D( I ) ) / WORK( I )
+            WORK( IN+I ) = ( E( J )*D( J ) )*WORK( 2*IN+I )
+            S = S*WORK( IN+I )*E( J ) - TAU
+            J = J + 1
+  110    CONTINUE
+         WORK( IN ) = D( IEND ) + S
+*
+*        Checking to see if all the diagonal elements of the new
+*        L D L^T representation have the same sign
+*
+         DO 120 I = IN, 1, -1
+            TMP1 = SGNDEF*WORK( I )
+            IF( TMP1.LT.ZERO .OR. WORK( 2*IN+I ).EQ.ZERO .OR. .NOT.
+     $          ( TMP1.GT.ZERO .OR. TMP1.LT.ONE ) ) THEN
+               DELTA = TWO*DELTA
+               GO TO 100
+            END IF
+  120    CONTINUE
+*
+         SIGMA = SIGMA + TAU
+         CALL DCOPY( IN, WORK, 1, D( IBEGIN ), 1 )
+         CALL DCOPY( IN-1, WORK( IN+1 ), 1, E( IBEGIN ), 1 )
+         WOFF( JBLK ) = SIGMA
+*
+*        Update the n Gerschgorin intervals
+*
+         DO 130 I = IBEGIN, IEND
+            GERSCH( 2*I-1 ) = GERSCH( 2*I-1 ) - SIGMA
+            GERSCH( 2*I ) = GERSCH( 2*I ) - SIGMA
+  130    CONTINUE
+*
+*        Compute the eigenvalues of L D L^T.
+*
+         J = IBEGIN
+         DO 140 I = 1, IN - 1
+            WORK( 2*I-1 ) = ABS( D( J ) )
+            WORK( 2*I ) = E( J )*E( J )*WORK( 2*I-1 )
+            J = J + 1
+  140    CONTINUE
+         WORK( 2*IN-1 ) = ABS( D( IEND ) )
+*
+         CALL DLASQ2( IN, WORK, INFO )
+*
+         J = IBEGIN
+         IF( SGNDEF.GT.ZERO ) THEN
+            DO 150 I = 1, IN
+               W( J ) = WORK( IN-I+1 )
+               J = J + 1
+  150       CONTINUE
+         ELSE
+            DO 160 I = 1, IN
+               W( J ) = -WORK( I )
+               J = J + 1
+  160       CONTINUE
+         END IF
+         IBEGIN = IEND + 1
+  170 CONTINUE
+      M = N
+*
+      RETURN
+*
+*     End of DLARRE
+*
+      END
diff --git a/costa/native/external/lapack/dlarrf.f b/costa/native/external/lapack/dlarrf.f
new file mode 100644
index 000000000..e3994f953
--- /dev/null
+++ b/costa/native/external/lapack/dlarrf.f
@@ -0,0 +1,150 @@
+      SUBROUTINE DLARRF( N, D, L, LD, LLD, IFIRST, ILAST, W, DPLUS,
+     $                   LPLUS, WORK, IWORK, INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            IFIRST, ILAST, INFO, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      DOUBLE PRECISION   D( * ), DPLUS( * ), L( * ), LD( * ), LLD( * ),
+     $                   LPLUS( * ), W( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  Given the initial representation L D L^T and its cluster of close
+*  eigenvalues (in a relative measure), W( IFIRST ), W( IFIRST+1 ), ...
+*  W( ILAST ), DLARRF finds a new relatively robust representation
+*  L D L^T - SIGMA I = L(+) D(+) L(+)^T such that at least one of the
+*  eigenvalues of L(+) D(+) L(+)^T is relatively isolated.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The order of the matrix.
+*
+*  D       (input) DOUBLE PRECISION array, dimension (N)
+*          The n diagonal elements of the diagonal matrix D.
+*
+*  L       (input) DOUBLE PRECISION array, dimension (N-1)
+*          The (n-1) subdiagonal elements of the unit bidiagonal
+*          matrix L.
+*
+*  LD      (input) DOUBLE PRECISION array, dimension (N-1)
+*          The n-1 elements L(i)*D(i).
+*
+*  LLD     (input) DOUBLE PRECISION array, dimension (N-1)
+*          The n-1 elements L(i)*L(i)*D(i).
+*
+*  IFIRST  (input) INTEGER
+*          The index of the first eigenvalue in the cluster.
+*
+*  ILAST   (input) INTEGER
+*          The index of the last eigenvalue in the cluster.
+*
+*  W       (input/output) DOUBLE PRECISION array, dimension (N)
+*          On input, the eigenvalues of L D L^T in ascending order.
+*          W( IFIRST ) through W( ILAST ) form the cluster of relatively
+*          close eigenalues.
+*          On output, W( IFIRST ) thru' W( ILAST ) are estimates of the
+*          corresponding eigenvalues of L(+) D(+) L(+)^T.
+*
+*  SIGMA   (input) DOUBLE PRECISION
+*          The shift used to form L(+) D(+) L(+)^T.
+*
+*  DPLUS   (output) DOUBLE PRECISION array, dimension (N)
+*          The n diagonal elements of the diagonal matrix D(+).
+*
+*  LPLUS   (output) DOUBLE PRECISION array, dimension (N)
+*          The first (n-1) elements of LPLUS contain the subdiagonal
+*          elements of the unit bidiagonal matrix L(+). LPLUS( N ) is
+*          set to SIGMA.
+*
+*  WORK    (input) DOUBLE PRECISION array, dimension (???)
+*          Workspace.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Inderjit Dhillon, IBM Almaden, USA
+*     Osni Marques, LBNL/NERSC, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, TWO
+      PARAMETER          ( ZERO = 0.0D0, TWO = 2.0D0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I
+      DOUBLE PRECISION   DELTA, EPS, S, SIGMA
+*     ..
+*     .. External Functions ..
+      DOUBLE PRECISION   DLAMCH
+      EXTERNAL           DLAMCH
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+      EPS = DLAMCH( 'Precision' )
+      IF( IFIRST.EQ.1 ) THEN
+         SIGMA = W( IFIRST )
+      ELSE IF( ILAST.EQ.N ) THEN
+         SIGMA = W( ILAST )
+      ELSE
+         INFO = 1
+         RETURN
+      END IF
+*
+*     Compute the new relatively robust representation (RRR)
+*
+      DELTA = TWO*EPS
+   10 CONTINUE
+      IF( IFIRST.EQ.1 ) THEN
+         SIGMA = SIGMA - ABS( SIGMA )*DELTA
+      ELSE
+         SIGMA = SIGMA + ABS( SIGMA )*DELTA
+      END IF
+      S = -SIGMA
+      DO 20 I = 1, N - 1
+         DPLUS( I ) = D( I ) + S
+         LPLUS( I ) = LD( I ) / DPLUS( I )
+         S = S*LPLUS( I )*L( I ) - SIGMA
+   20 CONTINUE
+      DPLUS( N ) = D( N ) + S
+      IF( IFIRST.EQ.1 ) THEN
+         DO 30 I = 1, N
+            IF( DPLUS( I ).LT.ZERO ) THEN
+               DELTA = TWO*DELTA
+               GO TO 10
+            END IF
+   30    CONTINUE
+      ELSE
+         DO 40 I = 1, N
+            IF( DPLUS( I ).GT.ZERO ) THEN
+               DELTA = TWO*DELTA
+               GO TO 10
+            END IF
+   40    CONTINUE
+      END IF
+      DO 50 I = IFIRST, ILAST
+         W( I ) = W( I ) - SIGMA
+   50 CONTINUE
+      LPLUS( N ) = SIGMA
+*
+      RETURN
+*
+*     End of DLARRF
+*
+      END
diff --git a/costa/native/external/lapack/dlarrv.f b/costa/native/external/lapack/dlarrv.f
new file mode 100644
index 000000000..87eaf65b0
--- /dev/null
+++ b/costa/native/external/lapack/dlarrv.f
@@ -0,0 +1,419 @@
+      SUBROUTINE DLARRV( N, D, L, ISPLIT, M, W, IBLOCK, GERSCH, TOL, Z,
+     $                   LDZ, ISUPPZ, WORK, IWORK, INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDZ, M, N
+      DOUBLE PRECISION   TOL
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IBLOCK( * ), ISPLIT( * ), ISUPPZ( * ),
+     $                   IWORK( * )
+      DOUBLE PRECISION   D( * ), GERSCH( * ), L( * ), W( * ), WORK( * ),
+     $                   Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLARRV computes the eigenvectors of the tridiagonal matrix
+*  T = L D L^T given L, D and the eigenvalues of L D L^T.
+*  The input eigenvalues should have high relative accuracy with
+*  respect to the entries of L and D. The desired accuracy of the
+*  output can be specified by the input parameter TOL.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The order of the matrix.  N >= 0.
+*
+*  D       (input/output) DOUBLE PRECISION array, dimension (N)
+*          On entry, the n diagonal elements of the diagonal matrix D.
+*          On exit, D may be overwritten.
+*
+*  L       (input/output) DOUBLE PRECISION array, dimension (N-1)
+*          On entry, the (n-1) subdiagonal elements of the unit
+*          bidiagonal matrix L in elements 1 to N-1 of L. L(N) need
+*          not be set. On exit, L is overwritten.
+*
+*  ISPLIT  (input) INTEGER array, dimension (N)
+*          The splitting points, at which T breaks up into submatrices.
+*          The first submatrix consists of rows/columns 1 to
+*          ISPLIT( 1 ), the second of rows/columns ISPLIT( 1 )+1
+*          through ISPLIT( 2 ), etc.
+*
+*  TOL     (input) DOUBLE PRECISION
+*          The absolute error tolerance for the
+*          eigenvalues/eigenvectors.
+*          Errors in the input eigenvalues must be bounded by TOL.
+*          The eigenvectors output have residual norms
+*          bounded by TOL, and the dot products between different
+*          eigenvectors are bounded by TOL. TOL must be at least
+*          N*EPS*|T|, where EPS is the machine precision and |T| is
+*          the 1-norm of the tridiagonal matrix.
+*
+*  M       (input) INTEGER
+*          The total number of eigenvalues found.  0 <= M <= N.
+*          If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1.
+*
+*  W       (input) DOUBLE PRECISION array, dimension (N)
+*          The first M elements of W contain the eigenvalues for
+*          which eigenvectors are to be computed.  The eigenvalues
+*          should be grouped by split-off block and ordered from
+*          smallest to largest within the block ( The output array
+*          W from DLARRE is expected here ).
+*          Errors in W must be bounded by TOL (see above).
+*
+*  IBLOCK  (input) INTEGER array, dimension (N)
+*          The submatrix indices associated with the corresponding
+*          eigenvalues in W; IBLOCK(i)=1 if eigenvalue W(i) belongs to
+*          the first submatrix from the top, =2 if W(i) belongs to
+*          the second submatrix, etc.
+*
+*  Z       (output) DOUBLE PRECISION array, dimension (LDZ, max(1,M) )
+*          If JOBZ = 'V', then if INFO = 0, the first M columns of Z
+*          contain the orthonormal eigenvectors of the matrix T
+*          corresponding to the selected eigenvalues, with the i-th
+*          column of Z holding the eigenvector associated with W(i).
+*          If JOBZ = 'N', then Z is not referenced.
+*          Note: the user must ensure that at least max(1,M) columns are
+*          supplied in the array Z; if RANGE = 'V', the exact value of M
+*          is not known in advance and an upper bound must be used.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= 1, and if
+*          JOBZ = 'V', LDZ >= max(1,N).
+*
+*  ISUPPZ  (output) INTEGER ARRAY, dimension ( 2*max(1,M) )
+*          The support of the eigenvectors in Z, i.e., the indices
+*          indicating the nonzero elements in Z. The i-th eigenvector
+*          is nonzero only in elements ISUPPZ( 2*i-1 ) through
+*          ISUPPZ( 2*i ).
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (13*N)
+*
+*  IWORK   (workspace) INTEGER array, dimension (6*N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = 1, internal error in DLARRB
+*                if INFO = 2, internal error in DSTEIN
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Inderjit Dhillon, IBM Almaden, USA
+*     Osni Marques, LBNL/NERSC, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            MGSSIZ
+      PARAMETER          ( MGSSIZ = 20 )
+      DOUBLE PRECISION   ZERO, ONE, FOUR
+      PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0, FOUR = 4.0D0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            MGSCLS
+      INTEGER            I, IBEGIN, IEND, IINDC1, IINDC2, IINDR, IINDWK,
+     $                   IINFO, IM, IN, INDERR, INDGAP, INDLD, INDLLD,
+     $                   INDWRK, ITER, ITMP1, ITMP2, J, JBLK, K, KTOT,
+     $                   LSBDPT, MAXITR, NCLUS, NDEPTH, NDONE, NEWCLS,
+     $                   NEWFRS, NEWFTT, NEWLST, NEWSIZ, NSPLIT, OLDCLS,
+     $                   OLDFST, OLDIEN, OLDLST, OLDNCL, P, Q
+      DOUBLE PRECISION   EPS, GAP, LAMBDA, MGSTOL, MINGMA, MINRGP,
+     $                   NRMINV, RELGAP, RELTOL, RESID, RQCORR, SIGMA,
+     $                   TMP1, ZTZ
+*     ..
+*     .. External Functions ..
+      DOUBLE PRECISION   DDOT, DLAMCH, DNRM2
+      EXTERNAL           DDOT, DLAMCH, DNRM2
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DAXPY, DCOPY, DLAR1V, DLARRB, DLARRF, DLASET,
+     $                   DSCAL, DSTEIN
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, MAX, MIN, SQRT
+*     ..
+*     .. Local Arrays ..
+      INTEGER            TEMP( 1 )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INDERR = N + 1
+      INDLD = 2*N
+      INDLLD = 3*N
+      INDGAP = 4*N
+      INDWRK = 5*N + 1
+*
+      IINDR = N
+      IINDC1 = 2*N
+      IINDC2 = 3*N
+      IINDWK = 4*N + 1
+*
+      EPS = DLAMCH( 'Precision' )
+*
+      DO 10 I = 1, 2*N
+         IWORK( I ) = 0
+   10 CONTINUE
+      DO 20 I = 1, M
+         WORK( INDERR+I-1 ) = EPS*ABS( W( I ) )
+   20 CONTINUE
+      CALL DLASET( 'Full', N, N, ZERO, ZERO, Z, LDZ )
+      MGSTOL = 5.0D0*EPS
+*
+      NSPLIT = IBLOCK( M )
+      IBEGIN = 1
+      DO 170 JBLK = 1, NSPLIT
+         IEND = ISPLIT( JBLK )
+*
+*        Find the eigenvectors of the submatrix indexed IBEGIN
+*        through IEND.
+*
+         IF( IBEGIN.EQ.IEND ) THEN
+            Z( IBEGIN, IBEGIN ) = ONE
+            ISUPPZ( 2*IBEGIN-1 ) = IBEGIN
+            ISUPPZ( 2*IBEGIN ) = IBEGIN
+            IBEGIN = IEND + 1
+            GO TO 170
+         END IF
+         OLDIEN = IBEGIN - 1
+         IN = IEND - OLDIEN
+         RELTOL = MIN( 1.0D-2, ONE / DBLE( IN ) )
+         IM = IN
+         CALL DCOPY( IM, W( IBEGIN ), 1, WORK, 1 )
+         DO 30 I = 1, IN - 1
+            WORK( INDGAP+I ) = WORK( I+1 ) - WORK( I )
+   30    CONTINUE
+         WORK( INDGAP+IN ) = MAX( ABS( WORK( IN ) ), EPS )
+         NDONE = 0
+*
+         NDEPTH = 0
+         LSBDPT = 1
+         NCLUS = 1
+         IWORK( IINDC1+1 ) = 1
+         IWORK( IINDC1+2 ) = IN
+*
+*        While( NDONE.LT.IM ) do
+*
+   40    CONTINUE
+         IF( NDONE.LT.IM ) THEN
+            OLDNCL = NCLUS
+            NCLUS = 0
+            LSBDPT = 1 - LSBDPT
+            DO 150 I = 1, OLDNCL
+               IF( LSBDPT.EQ.0 ) THEN
+                  OLDCLS = IINDC1
+                  NEWCLS = IINDC2
+               ELSE
+                  OLDCLS = IINDC2
+                  NEWCLS = IINDC1
+               END IF
+*
+*              If NDEPTH > 1, retrieve the relatively robust
+*              representation (RRR) and perform limited bisection
+*              (if necessary) to get approximate eigenvalues.
+*
+               J = OLDCLS + 2*I
+               OLDFST = IWORK( J-1 )
+               OLDLST = IWORK( J )
+               IF( NDEPTH.GT.0 ) THEN
+                  J = OLDIEN + OLDFST
+                  CALL DCOPY( IN, Z( IBEGIN, J ), 1, D( IBEGIN ), 1 )
+                  CALL DCOPY( IN, Z( IBEGIN, J+1 ), 1, L( IBEGIN ), 1 )
+                  SIGMA = L( IEND )
+               END IF
+               K = IBEGIN
+               DO 50 J = 1, IN - 1
+                  WORK( INDLD+J ) = D( K )*L( K )
+                  WORK( INDLLD+J ) = WORK( INDLD+J )*L( K )
+                  K = K + 1
+   50          CONTINUE
+               IF( NDEPTH.GT.0 ) THEN
+                  CALL DLARRB( IN, D( IBEGIN ), L( IBEGIN ),
+     $                         WORK( INDLD+1 ), WORK( INDLLD+1 ),
+     $                         OLDFST, OLDLST, SIGMA, RELTOL, WORK,
+     $                         WORK( INDGAP+1 ), WORK( INDERR ),
+     $                         WORK( INDWRK ), IWORK( IINDWK ), IINFO )
+                  IF( IINFO.NE.0 ) THEN
+                     INFO = 1
+                     RETURN
+                  END IF
+               END IF
+*
+*              Classify eigenvalues of the current representation (RRR)
+*              as (i) isolated, (ii) loosely clustered or (iii) tightly
+*              clustered
+*
+               NEWFRS = OLDFST
+               DO 140 J = OLDFST, OLDLST
+                  IF( J.EQ.OLDLST .OR. WORK( INDGAP+J ).GE.RELTOL*
+     $                ABS( WORK( J ) ) ) THEN
+                     NEWLST = J
+                  ELSE
+*
+*                    continue (to the next loop)
+*
+                     RELGAP = WORK( INDGAP+J ) / ABS( WORK( J ) )
+                     IF( J.EQ.NEWFRS ) THEN
+                        MINRGP = RELGAP
+                     ELSE
+                        MINRGP = MIN( MINRGP, RELGAP )
+                     END IF
+                     GO TO 140
+                  END IF
+                  NEWSIZ = NEWLST - NEWFRS + 1
+                  MAXITR = 10
+                  NEWFTT = OLDIEN + NEWFRS
+                  IF( NEWSIZ.GT.1 ) THEN
+                     MGSCLS = NEWSIZ.LE.MGSSIZ .AND. MINRGP.GE.MGSTOL
+                     IF( .NOT.MGSCLS ) THEN
+                        CALL DLARRF( IN, D( IBEGIN ), L( IBEGIN ),
+     $                               WORK( INDLD+1 ), WORK( INDLLD+1 ),
+     $                               NEWFRS, NEWLST, WORK,
+     $                               Z( IBEGIN, NEWFTT ),
+     $                               Z( IBEGIN, NEWFTT+1 ),
+     $                               WORK( INDWRK ), IWORK( IINDWK ),
+     $                               INFO )
+                        IF( INFO.EQ.0 ) THEN
+                           NCLUS = NCLUS + 1
+                           K = NEWCLS + 2*NCLUS
+                           IWORK( K-1 ) = NEWFRS
+                           IWORK( K ) = NEWLST
+                        ELSE
+                           INFO = 0
+                           IF( MINRGP.GE.MGSTOL ) THEN
+                              MGSCLS = .TRUE.
+                           ELSE
+*
+*                             Call DSTEIN to process this tight cluster.
+*                             This happens only if MINRGP <= MGSTOL
+*                             and DLARRF returns INFO = 1. The latter
+*                             means that a new RRR to "break" the
+*                             cluster could not be found.
+*
+                              WORK( INDWRK ) = D( IBEGIN )
+                              DO 60 K = 1, IN - 1
+                                 WORK( INDWRK+K ) = D( IBEGIN+K ) +
+     $                                              WORK( INDLLD+K )
+   60                         CONTINUE
+                              DO 70 K = 1, NEWSIZ
+                                 IWORK( IINDWK+K-1 ) = 1
+   70                         CONTINUE
+                              DO 80 K = NEWFRS, NEWLST
+                                 ISUPPZ( 2*( IBEGIN+K )-3 ) = 1
+                                 ISUPPZ( 2*( IBEGIN+K )-2 ) = IN
+   80                         CONTINUE
+                              TEMP( 1 ) = IN
+                              CALL DSTEIN( IN, WORK( INDWRK ),
+     $                                     WORK( INDLD+1 ), NEWSIZ,
+     $                                     WORK( NEWFRS ),
+     $                                     IWORK( IINDWK ), TEMP( 1 ),
+     $                                     Z( IBEGIN, NEWFTT ), LDZ,
+     $                                     WORK( INDWRK+IN ),
+     $                                     IWORK( IINDWK+IN ),
+     $                                     IWORK( IINDWK+2*IN ), IINFO )
+                              IF( IINFO.NE.0 ) THEN
+                                 INFO = 2
+                                 RETURN
+                              END IF
+                              NDONE = NDONE + NEWSIZ
+                           END IF
+                        END IF
+                     END IF
+                  ELSE
+                     MGSCLS = .FALSE.
+                  END IF
+                  IF( NEWSIZ.EQ.1 .OR. MGSCLS ) THEN
+                     KTOT = NEWFTT
+                     DO 100 K = NEWFRS, NEWLST
+                        ITER = 0
+   90                   CONTINUE
+                        LAMBDA = WORK( K )
+                        CALL DLAR1V( IN, 1, IN, LAMBDA, D( IBEGIN ),
+     $                               L( IBEGIN ), WORK( INDLD+1 ),
+     $                               WORK( INDLLD+1 ),
+     $                               GERSCH( 2*OLDIEN+1 ),
+     $                               Z( IBEGIN, KTOT ), ZTZ, MINGMA,
+     $                               IWORK( IINDR+KTOT ),
+     $                               ISUPPZ( 2*KTOT-1 ),
+     $                               WORK( INDWRK ) )
+                        TMP1 = ONE / ZTZ
+                        NRMINV = SQRT( TMP1 )
+                        RESID = ABS( MINGMA )*NRMINV
+                        RQCORR = MINGMA*TMP1
+                        IF( K.EQ.IN ) THEN
+                           GAP = WORK( INDGAP+K-1 )
+                        ELSE IF( K.EQ.1 ) THEN
+                           GAP = WORK( INDGAP+K )
+                        ELSE
+                           GAP = MIN( WORK( INDGAP+K-1 ),
+     $                           WORK( INDGAP+K ) )
+                        END IF
+                        ITER = ITER + 1
+                        IF( RESID.GT.TOL*GAP .AND. ABS( RQCORR ).GT.
+     $                      FOUR*EPS*ABS( LAMBDA ) ) THEN
+                           WORK( K ) = LAMBDA + RQCORR
+                           IF( ITER.LT.MAXITR ) THEN
+                              GO TO 90
+                           END IF
+                        END IF
+                        IWORK( KTOT ) = 1
+                        IF( NEWSIZ.EQ.1 )
+     $                     NDONE = NDONE + 1
+                        CALL DSCAL( IN, NRMINV, Z( IBEGIN, KTOT ), 1 )
+                        KTOT = KTOT + 1
+  100                CONTINUE
+                     IF( NEWSIZ.GT.1 ) THEN
+                        ITMP1 = ISUPPZ( 2*NEWFTT-1 )
+                        ITMP2 = ISUPPZ( 2*NEWFTT )
+                        KTOT = OLDIEN + NEWLST
+                        DO 120 P = NEWFTT + 1, KTOT
+                           DO 110 Q = NEWFTT, P - 1
+                              TMP1 = -DDOT( IN, Z( IBEGIN, P ), 1,
+     $                               Z( IBEGIN, Q ), 1 )
+                              CALL DAXPY( IN, TMP1, Z( IBEGIN, Q ), 1,
+     $                                    Z( IBEGIN, P ), 1 )
+  110                      CONTINUE
+                           TMP1 = ONE / DNRM2( IN, Z( IBEGIN, P ), 1 )
+                           CALL DSCAL( IN, TMP1, Z( IBEGIN, P ), 1 )
+                           ITMP1 = MIN( ITMP1, ISUPPZ( 2*P-1 ) )
+                           ITMP2 = MAX( ITMP2, ISUPPZ( 2*P ) )
+  120                   CONTINUE
+                        DO 130 P = NEWFTT, KTOT
+                           ISUPPZ( 2*P-1 ) = ITMP1
+                           ISUPPZ( 2*P ) = ITMP2
+  130                   CONTINUE
+                        NDONE = NDONE + NEWSIZ
+                     END IF
+                  END IF
+                  NEWFRS = J + 1
+  140          CONTINUE
+  150       CONTINUE
+            NDEPTH = NDEPTH + 1
+            GO TO 40
+         END IF
+         J = 2*IBEGIN
+         DO 160 I = IBEGIN, IEND
+            ISUPPZ( J-1 ) = ISUPPZ( J-1 ) + OLDIEN
+            ISUPPZ( J ) = ISUPPZ( J ) + OLDIEN
+            J = J + 2
+  160    CONTINUE
+         IBEGIN = IEND + 1
+  170 CONTINUE
+*
+      RETURN
+*
+*     End of DLARRV
+*
+      END
diff --git a/costa/native/external/lapack/dlartg.f b/costa/native/external/lapack/dlartg.f
new file mode 100644
index 000000000..c0cd3d142
--- /dev/null
+++ b/costa/native/external/lapack/dlartg.f
@@ -0,0 +1,143 @@
+      SUBROUTINE DLARTG( F, G, CS, SN, R )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      DOUBLE PRECISION   CS, F, G, R, SN
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLARTG generate a plane rotation so that
+*
+*     [  CS  SN  ]  .  [ F ]  =  [ R ]   where CS**2 + SN**2 = 1.
+*     [ -SN  CS  ]     [ G ]     [ 0 ]
+*
+*  This is a slower, more accurate version of the BLAS1 routine DROTG,
+*  with the following other differences:
+*     F and G are unchanged on return.
+*     If G=0, then CS=1 and SN=0.
+*     If F=0 and (G .ne. 0), then CS=0 and SN=1 without doing any
+*        floating point operations (saves work in DBDSQR when
+*        there are zeros on the diagonal).
+*
+*  If F exceeds G in magnitude, CS will be positive.
+*
+*  Arguments
+*  =========
+*
+*  F       (input) DOUBLE PRECISION
+*          The first component of vector to be rotated.
+*
+*  G       (input) DOUBLE PRECISION
+*          The second component of vector to be rotated.
+*
+*  CS      (output) DOUBLE PRECISION
+*          The cosine of the rotation.
+*
+*  SN      (output) DOUBLE PRECISION
+*          The sine of the rotation.
+*
+*  R       (output) DOUBLE PRECISION
+*          The nonzero component of the rotated vector.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO
+      PARAMETER          ( ZERO = 0.0D0 )
+      DOUBLE PRECISION   ONE
+      PARAMETER          ( ONE = 1.0D0 )
+      DOUBLE PRECISION   TWO
+      PARAMETER          ( TWO = 2.0D0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            FIRST
+      INTEGER            COUNT, I
+      DOUBLE PRECISION   EPS, F1, G1, SAFMIN, SAFMN2, SAFMX2, SCALE
+*     ..
+*     .. External Functions ..
+      DOUBLE PRECISION   DLAMCH
+      EXTERNAL           DLAMCH
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, INT, LOG, MAX, SQRT
+*     ..
+*     .. Save statement ..
+      SAVE               FIRST, SAFMX2, SAFMIN, SAFMN2
+*     ..
+*     .. Data statements ..
+      DATA               FIRST / .TRUE. /
+*     ..
+*     .. Executable Statements ..
+*
+      IF( FIRST ) THEN
+         FIRST = .FALSE.
+         SAFMIN = DLAMCH( 'S' )
+         EPS = DLAMCH( 'E' )
+         SAFMN2 = DLAMCH( 'B' )**INT( LOG( SAFMIN / EPS ) /
+     $            LOG( DLAMCH( 'B' ) ) / TWO )
+         SAFMX2 = ONE / SAFMN2
+      END IF
+      IF( G.EQ.ZERO ) THEN
+         CS = ONE
+         SN = ZERO
+         R = F
+      ELSE IF( F.EQ.ZERO ) THEN
+         CS = ZERO
+         SN = ONE
+         R = G
+      ELSE
+         F1 = F
+         G1 = G
+         SCALE = MAX( ABS( F1 ), ABS( G1 ) )
+         IF( SCALE.GE.SAFMX2 ) THEN
+            COUNT = 0
+   10       CONTINUE
+            COUNT = COUNT + 1
+            F1 = F1*SAFMN2
+            G1 = G1*SAFMN2
+            SCALE = MAX( ABS( F1 ), ABS( G1 ) )
+            IF( SCALE.GE.SAFMX2 )
+     $         GO TO 10
+            R = SQRT( F1**2+G1**2 )
+            CS = F1 / R
+            SN = G1 / R
+            DO 20 I = 1, COUNT
+               R = R*SAFMX2
+   20       CONTINUE
+         ELSE IF( SCALE.LE.SAFMN2 ) THEN
+            COUNT = 0
+   30       CONTINUE
+            COUNT = COUNT + 1
+            F1 = F1*SAFMX2
+            G1 = G1*SAFMX2
+            SCALE = MAX( ABS( F1 ), ABS( G1 ) )
+            IF( SCALE.LE.SAFMN2 )
+     $         GO TO 30
+            R = SQRT( F1**2+G1**2 )
+            CS = F1 / R
+            SN = G1 / R
+            DO 40 I = 1, COUNT
+               R = R*SAFMN2
+   40       CONTINUE
+         ELSE
+            R = SQRT( F1**2+G1**2 )
+            CS = F1 / R
+            SN = G1 / R
+         END IF
+         IF( ABS( F ).GT.ABS( G ) .AND. CS.LT.ZERO ) THEN
+            CS = -CS
+            SN = -SN
+            R = -R
+         END IF
+      END IF
+      RETURN
+*
+*     End of DLARTG
+*
+      END
diff --git a/costa/native/external/lapack/dlartv.f b/costa/native/external/lapack/dlartv.f
new file mode 100644
index 000000000..26d15ad45
--- /dev/null
+++ b/costa/native/external/lapack/dlartv.f
@@ -0,0 +1,77 @@
+      SUBROUTINE DLARTV( N, X, INCX, Y, INCY, C, S, INCC )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      INTEGER            INCC, INCX, INCY, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   C( * ), S( * ), X( * ), Y( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLARTV applies a vector of real plane rotations to elements of the
+*  real vectors x and y. For i = 1,2,...,n
+*
+*     ( x(i) ) := (  c(i)  s(i) ) ( x(i) )
+*     ( y(i) )    ( -s(i)  c(i) ) ( y(i) )
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The number of plane rotations to be applied.
+*
+*  X       (input/output) DOUBLE PRECISION array,
+*                         dimension (1+(N-1)*INCX)
+*          The vector x.
+*
+*  INCX    (input) INTEGER
+*          The increment between elements of X. INCX > 0.
+*
+*  Y       (input/output) DOUBLE PRECISION array,
+*                         dimension (1+(N-1)*INCY)
+*          The vector y.
+*
+*  INCY    (input) INTEGER
+*          The increment between elements of Y. INCY > 0.
+*
+*  C       (input) DOUBLE PRECISION array, dimension (1+(N-1)*INCC)
+*          The cosines of the plane rotations.
+*
+*  S       (input) DOUBLE PRECISION array, dimension (1+(N-1)*INCC)
+*          The sines of the plane rotations.
+*
+*  INCC    (input) INTEGER
+*          The increment between elements of C and S. INCC > 0.
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      INTEGER            I, IC, IX, IY
+      DOUBLE PRECISION   XI, YI
+*     ..
+*     .. Executable Statements ..
+*
+      IX = 1
+      IY = 1
+      IC = 1
+      DO 10 I = 1, N
+         XI = X( IX )
+         YI = Y( IY )
+         X( IX ) = C( IC )*XI + S( IC )*YI
+         Y( IY ) = C( IC )*YI - S( IC )*XI
+         IX = IX + INCX
+         IY = IY + INCY
+         IC = IC + INCC
+   10 CONTINUE
+      RETURN
+*
+*     End of DLARTV
+*
+      END
diff --git a/costa/native/external/lapack/dlaruv.f b/costa/native/external/lapack/dlaruv.f
new file mode 100644
index 000000000..003769065
--- /dev/null
+++ b/costa/native/external/lapack/dlaruv.f
@@ -0,0 +1,368 @@
+      SUBROUTINE DLARUV( ISEED, N, X )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      INTEGER            N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            ISEED( 4 )
+      DOUBLE PRECISION   X( N )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLARUV returns a vector of n random real numbers from a uniform (0,1)
+*  distribution (n <= 128).
+*
+*  This is an auxiliary routine called by DLARNV and ZLARNV.
+*
+*  Arguments
+*  =========
+*
+*  ISEED   (input/output) INTEGER array, dimension (4)
+*          On entry, the seed of the random number generator; the array
+*          elements must be between 0 and 4095, and ISEED(4) must be
+*          odd.
+*          On exit, the seed is updated.
+*
+*  N       (input) INTEGER
+*          The number of random numbers to be generated. N <= 128.
+*
+*  X       (output) DOUBLE PRECISION array, dimension (N)
+*          The generated random numbers.
+*
+*  Further Details
+*  ===============
+*
+*  This routine uses a multiplicative congruential method with modulus
+*  2**48 and multiplier 33952834046453 (see G.S.Fishman,
+*  'Multiplicative congruential random number generators with modulus
+*  2**b: an exhaustive analysis for b = 32 and a partial analysis for
+*  b = 48', Math. Comp. 189, pp 331-344, 1990).
+*
+*  48-bit integers are stored in 4 integer array elements with 12 bits
+*  per element. Hence the routine is portable across machines with
+*  integers of 32 bits or more.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE
+      PARAMETER          ( ONE = 1.0D0 )
+      INTEGER            LV, IPW2
+      DOUBLE PRECISION   R
+      PARAMETER          ( LV = 128, IPW2 = 4096, R = ONE / IPW2 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, I1, I2, I3, I4, IT1, IT2, IT3, IT4, J
+*     ..
+*     .. Local Arrays ..
+      INTEGER            MM( LV, 4 )
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          DBLE, MIN, MOD
+*     ..
+*     .. Data statements ..
+      DATA               ( MM( 1, J ), J = 1, 4 ) / 494, 322, 2508,
+     $                   2549 /
+      DATA               ( MM( 2, J ), J = 1, 4 ) / 2637, 789, 3754,
+     $                   1145 /
+      DATA               ( MM( 3, J ), J = 1, 4 ) / 255, 1440, 1766,
+     $                   2253 /
+      DATA               ( MM( 4, J ), J = 1, 4 ) / 2008, 752, 3572,
+     $                   305 /
+      DATA               ( MM( 5, J ), J = 1, 4 ) / 1253, 2859, 2893,
+     $                   3301 /
+      DATA               ( MM( 6, J ), J = 1, 4 ) / 3344, 123, 307,
+     $                   1065 /
+      DATA               ( MM( 7, J ), J = 1, 4 ) / 4084, 1848, 1297,
+     $                   3133 /
+      DATA               ( MM( 8, J ), J = 1, 4 ) / 1739, 643, 3966,
+     $                   2913 /
+      DATA               ( MM( 9, J ), J = 1, 4 ) / 3143, 2405, 758,
+     $                   3285 /
+      DATA               ( MM( 10, J ), J = 1, 4 ) / 3468, 2638, 2598,
+     $                   1241 /
+      DATA               ( MM( 11, J ), J = 1, 4 ) / 688, 2344, 3406,
+     $                   1197 /
+      DATA               ( MM( 12, J ), J = 1, 4 ) / 1657, 46, 2922,
+     $                   3729 /
+      DATA               ( MM( 13, J ), J = 1, 4 ) / 1238, 3814, 1038,
+     $                   2501 /
+      DATA               ( MM( 14, J ), J = 1, 4 ) / 3166, 913, 2934,
+     $                   1673 /
+      DATA               ( MM( 15, J ), J = 1, 4 ) / 1292, 3649, 2091,
+     $                   541 /
+      DATA               ( MM( 16, J ), J = 1, 4 ) / 3422, 339, 2451,
+     $                   2753 /
+      DATA               ( MM( 17, J ), J = 1, 4 ) / 1270, 3808, 1580,
+     $                   949 /
+      DATA               ( MM( 18, J ), J = 1, 4 ) / 2016, 822, 1958,
+     $                   2361 /
+      DATA               ( MM( 19, J ), J = 1, 4 ) / 154, 2832, 2055,
+     $                   1165 /
+      DATA               ( MM( 20, J ), J = 1, 4 ) / 2862, 3078, 1507,
+     $                   4081 /
+      DATA               ( MM( 21, J ), J = 1, 4 ) / 697, 3633, 1078,
+     $                   2725 /
+      DATA               ( MM( 22, J ), J = 1, 4 ) / 1706, 2970, 3273,
+     $                   3305 /
+      DATA               ( MM( 23, J ), J = 1, 4 ) / 491, 637, 17,
+     $                   3069 /
+      DATA               ( MM( 24, J ), J = 1, 4 ) / 931, 2249, 854,
+     $                   3617 /
+      DATA               ( MM( 25, J ), J = 1, 4 ) / 1444, 2081, 2916,
+     $                   3733 /
+      DATA               ( MM( 26, J ), J = 1, 4 ) / 444, 4019, 3971,
+     $                   409 /
+      DATA               ( MM( 27, J ), J = 1, 4 ) / 3577, 1478, 2889,
+     $                   2157 /
+      DATA               ( MM( 28, J ), J = 1, 4 ) / 3944, 242, 3831,
+     $                   1361 /
+      DATA               ( MM( 29, J ), J = 1, 4 ) / 2184, 481, 2621,
+     $                   3973 /
+      DATA               ( MM( 30, J ), J = 1, 4 ) / 1661, 2075, 1541,
+     $                   1865 /
+      DATA               ( MM( 31, J ), J = 1, 4 ) / 3482, 4058, 893,
+     $                   2525 /
+      DATA               ( MM( 32, J ), J = 1, 4 ) / 657, 622, 736,
+     $                   1409 /
+      DATA               ( MM( 33, J ), J = 1, 4 ) / 3023, 3376, 3992,
+     $                   3445 /
+      DATA               ( MM( 34, J ), J = 1, 4 ) / 3618, 812, 787,
+     $                   3577 /
+      DATA               ( MM( 35, J ), J = 1, 4 ) / 1267, 234, 2125,
+     $                   77 /
+      DATA               ( MM( 36, J ), J = 1, 4 ) / 1828, 641, 2364,
+     $                   3761 /
+      DATA               ( MM( 37, J ), J = 1, 4 ) / 164, 4005, 2460,
+     $                   2149 /
+      DATA               ( MM( 38, J ), J = 1, 4 ) / 3798, 1122, 257,
+     $                   1449 /
+      DATA               ( MM( 39, J ), J = 1, 4 ) / 3087, 3135, 1574,
+     $                   3005 /
+      DATA               ( MM( 40, J ), J = 1, 4 ) / 2400, 2640, 3912,
+     $                   225 /
+      DATA               ( MM( 41, J ), J = 1, 4 ) / 2870, 2302, 1216,
+     $                   85 /
+      DATA               ( MM( 42, J ), J = 1, 4 ) / 3876, 40, 3248,
+     $                   3673 /
+      DATA               ( MM( 43, J ), J = 1, 4 ) / 1905, 1832, 3401,
+     $                   3117 /
+      DATA               ( MM( 44, J ), J = 1, 4 ) / 1593, 2247, 2124,
+     $                   3089 /
+      DATA               ( MM( 45, J ), J = 1, 4 ) / 1797, 2034, 2762,
+     $                   1349 /
+      DATA               ( MM( 46, J ), J = 1, 4 ) / 1234, 2637, 149,
+     $                   2057 /
+      DATA               ( MM( 47, J ), J = 1, 4 ) / 3460, 1287, 2245,
+     $                   413 /
+      DATA               ( MM( 48, J ), J = 1, 4 ) / 328, 1691, 166,
+     $                   65 /
+      DATA               ( MM( 49, J ), J = 1, 4 ) / 2861, 496, 466,
+     $                   1845 /
+      DATA               ( MM( 50, J ), J = 1, 4 ) / 1950, 1597, 4018,
+     $                   697 /
+      DATA               ( MM( 51, J ), J = 1, 4 ) / 617, 2394, 1399,
+     $                   3085 /
+      DATA               ( MM( 52, J ), J = 1, 4 ) / 2070, 2584, 190,
+     $                   3441 /
+      DATA               ( MM( 53, J ), J = 1, 4 ) / 3331, 1843, 2879,
+     $                   1573 /
+      DATA               ( MM( 54, J ), J = 1, 4 ) / 769, 336, 153,
+     $                   3689 /
+      DATA               ( MM( 55, J ), J = 1, 4 ) / 1558, 1472, 2320,
+     $                   2941 /
+      DATA               ( MM( 56, J ), J = 1, 4 ) / 2412, 2407, 18,
+     $                   929 /
+      DATA               ( MM( 57, J ), J = 1, 4 ) / 2800, 433, 712,
+     $                   533 /
+      DATA               ( MM( 58, J ), J = 1, 4 ) / 189, 2096, 2159,
+     $                   2841 /
+      DATA               ( MM( 59, J ), J = 1, 4 ) / 287, 1761, 2318,
+     $                   4077 /
+      DATA               ( MM( 60, J ), J = 1, 4 ) / 2045, 2810, 2091,
+     $                   721 /
+      DATA               ( MM( 61, J ), J = 1, 4 ) / 1227, 566, 3443,
+     $                   2821 /
+      DATA               ( MM( 62, J ), J = 1, 4 ) / 2838, 442, 1510,
+     $                   2249 /
+      DATA               ( MM( 63, J ), J = 1, 4 ) / 209, 41, 449,
+     $                   2397 /
+      DATA               ( MM( 64, J ), J = 1, 4 ) / 2770, 1238, 1956,
+     $                   2817 /
+      DATA               ( MM( 65, J ), J = 1, 4 ) / 3654, 1086, 2201,
+     $                   245 /
+      DATA               ( MM( 66, J ), J = 1, 4 ) / 3993, 603, 3137,
+     $                   1913 /
+      DATA               ( MM( 67, J ), J = 1, 4 ) / 192, 840, 3399,
+     $                   1997 /
+      DATA               ( MM( 68, J ), J = 1, 4 ) / 2253, 3168, 1321,
+     $                   3121 /
+      DATA               ( MM( 69, J ), J = 1, 4 ) / 3491, 1499, 2271,
+     $                   997 /
+      DATA               ( MM( 70, J ), J = 1, 4 ) / 2889, 1084, 3667,
+     $                   1833 /
+      DATA               ( MM( 71, J ), J = 1, 4 ) / 2857, 3438, 2703,
+     $                   2877 /
+      DATA               ( MM( 72, J ), J = 1, 4 ) / 2094, 2408, 629,
+     $                   1633 /
+      DATA               ( MM( 73, J ), J = 1, 4 ) / 1818, 1589, 2365,
+     $                   981 /
+      DATA               ( MM( 74, J ), J = 1, 4 ) / 688, 2391, 2431,
+     $                   2009 /
+      DATA               ( MM( 75, J ), J = 1, 4 ) / 1407, 288, 1113,
+     $                   941 /
+      DATA               ( MM( 76, J ), J = 1, 4 ) / 634, 26, 3922,
+     $                   2449 /
+      DATA               ( MM( 77, J ), J = 1, 4 ) / 3231, 512, 2554,
+     $                   197 /
+      DATA               ( MM( 78, J ), J = 1, 4 ) / 815, 1456, 184,
+     $                   2441 /
+      DATA               ( MM( 79, J ), J = 1, 4 ) / 3524, 171, 2099,
+     $                   285 /
+      DATA               ( MM( 80, J ), J = 1, 4 ) / 1914, 1677, 3228,
+     $                   1473 /
+      DATA               ( MM( 81, J ), J = 1, 4 ) / 516, 2657, 4012,
+     $                   2741 /
+      DATA               ( MM( 82, J ), J = 1, 4 ) / 164, 2270, 1921,
+     $                   3129 /
+      DATA               ( MM( 83, J ), J = 1, 4 ) / 303, 2587, 3452,
+     $                   909 /
+      DATA               ( MM( 84, J ), J = 1, 4 ) / 2144, 2961, 3901,
+     $                   2801 /
+      DATA               ( MM( 85, J ), J = 1, 4 ) / 3480, 1970, 572,
+     $                   421 /
+      DATA               ( MM( 86, J ), J = 1, 4 ) / 119, 1817, 3309,
+     $                   4073 /
+      DATA               ( MM( 87, J ), J = 1, 4 ) / 3357, 676, 3171,
+     $                   2813 /
+      DATA               ( MM( 88, J ), J = 1, 4 ) / 837, 1410, 817,
+     $                   2337 /
+      DATA               ( MM( 89, J ), J = 1, 4 ) / 2826, 3723, 3039,
+     $                   1429 /
+      DATA               ( MM( 90, J ), J = 1, 4 ) / 2332, 2803, 1696,
+     $                   1177 /
+      DATA               ( MM( 91, J ), J = 1, 4 ) / 2089, 3185, 1256,
+     $                   1901 /
+      DATA               ( MM( 92, J ), J = 1, 4 ) / 3780, 184, 3715,
+     $                   81 /
+      DATA               ( MM( 93, J ), J = 1, 4 ) / 1700, 663, 2077,
+     $                   1669 /
+      DATA               ( MM( 94, J ), J = 1, 4 ) / 3712, 499, 3019,
+     $                   2633 /
+      DATA               ( MM( 95, J ), J = 1, 4 ) / 150, 3784, 1497,
+     $                   2269 /
+      DATA               ( MM( 96, J ), J = 1, 4 ) / 2000, 1631, 1101,
+     $                   129 /
+      DATA               ( MM( 97, J ), J = 1, 4 ) / 3375, 1925, 717,
+     $                   1141 /
+      DATA               ( MM( 98, J ), J = 1, 4 ) / 1621, 3912, 51,
+     $                   249 /
+      DATA               ( MM( 99, J ), J = 1, 4 ) / 3090, 1398, 981,
+     $                   3917 /
+      DATA               ( MM( 100, J ), J = 1, 4 ) / 3765, 1349, 1978,
+     $                   2481 /
+      DATA               ( MM( 101, J ), J = 1, 4 ) / 1149, 1441, 1813,
+     $                   3941 /
+      DATA               ( MM( 102, J ), J = 1, 4 ) / 3146, 2224, 3881,
+     $                   2217 /
+      DATA               ( MM( 103, J ), J = 1, 4 ) / 33, 2411, 76,
+     $                   2749 /
+      DATA               ( MM( 104, J ), J = 1, 4 ) / 3082, 1907, 3846,
+     $                   3041 /
+      DATA               ( MM( 105, J ), J = 1, 4 ) / 2741, 3192, 3694,
+     $                   1877 /
+      DATA               ( MM( 106, J ), J = 1, 4 ) / 359, 2786, 1682,
+     $                   345 /
+      DATA               ( MM( 107, J ), J = 1, 4 ) / 3316, 382, 124,
+     $                   2861 /
+      DATA               ( MM( 108, J ), J = 1, 4 ) / 1749, 37, 1660,
+     $                   1809 /
+      DATA               ( MM( 109, J ), J = 1, 4 ) / 185, 759, 3997,
+     $                   3141 /
+      DATA               ( MM( 110, J ), J = 1, 4 ) / 2784, 2948, 479,
+     $                   2825 /
+      DATA               ( MM( 111, J ), J = 1, 4 ) / 2202, 1862, 1141,
+     $                   157 /
+      DATA               ( MM( 112, J ), J = 1, 4 ) / 2199, 3802, 886,
+     $                   2881 /
+      DATA               ( MM( 113, J ), J = 1, 4 ) / 1364, 2423, 3514,
+     $                   3637 /
+      DATA               ( MM( 114, J ), J = 1, 4 ) / 1244, 2051, 1301,
+     $                   1465 /
+      DATA               ( MM( 115, J ), J = 1, 4 ) / 2020, 2295, 3604,
+     $                   2829 /
+      DATA               ( MM( 116, J ), J = 1, 4 ) / 3160, 1332, 1888,
+     $                   2161 /
+      DATA               ( MM( 117, J ), J = 1, 4 ) / 2785, 1832, 1836,
+     $                   3365 /
+      DATA               ( MM( 118, J ), J = 1, 4 ) / 2772, 2405, 1990,
+     $                   361 /
+      DATA               ( MM( 119, J ), J = 1, 4 ) / 1217, 3638, 2058,
+     $                   2685 /
+      DATA               ( MM( 120, J ), J = 1, 4 ) / 1822, 3661, 692,
+     $                   3745 /
+      DATA               ( MM( 121, J ), J = 1, 4 ) / 1245, 327, 1194,
+     $                   2325 /
+      DATA               ( MM( 122, J ), J = 1, 4 ) / 2252, 3660, 20,
+     $                   3609 /
+      DATA               ( MM( 123, J ), J = 1, 4 ) / 3904, 716, 3285,
+     $                   3821 /
+      DATA               ( MM( 124, J ), J = 1, 4 ) / 2774, 1842, 2046,
+     $                   3537 /
+      DATA               ( MM( 125, J ), J = 1, 4 ) / 997, 3987, 2107,
+     $                   517 /
+      DATA               ( MM( 126, J ), J = 1, 4 ) / 2573, 1368, 3508,
+     $                   3017 /
+      DATA               ( MM( 127, J ), J = 1, 4 ) / 1148, 1848, 3525,
+     $                   2141 /
+      DATA               ( MM( 128, J ), J = 1, 4 ) / 545, 2366, 3801,
+     $                   1537 /
+*     ..
+*     .. Executable Statements ..
+*
+      I1 = ISEED( 1 )
+      I2 = ISEED( 2 )
+      I3 = ISEED( 3 )
+      I4 = ISEED( 4 )
+*
+      DO 10 I = 1, MIN( N, LV )
+*
+*        Multiply the seed by i-th power of the multiplier modulo 2**48
+*
+         IT4 = I4*MM( I, 4 )
+         IT3 = IT4 / IPW2
+         IT4 = IT4 - IPW2*IT3
+         IT3 = IT3 + I3*MM( I, 4 ) + I4*MM( I, 3 )
+         IT2 = IT3 / IPW2
+         IT3 = IT3 - IPW2*IT2
+         IT2 = IT2 + I2*MM( I, 4 ) + I3*MM( I, 3 ) + I4*MM( I, 2 )
+         IT1 = IT2 / IPW2
+         IT2 = IT2 - IPW2*IT1
+         IT1 = IT1 + I1*MM( I, 4 ) + I2*MM( I, 3 ) + I3*MM( I, 2 ) +
+     $         I4*MM( I, 1 )
+         IT1 = MOD( IT1, IPW2 )
+*
+*        Convert 48-bit integer to a real number in the interval (0,1)
+*
+         X( I ) = R*( DBLE( IT1 )+R*( DBLE( IT2 )+R*( DBLE( IT3 )+R*
+     $            DBLE( IT4 ) ) ) )
+   10 CONTINUE
+*
+*     Return final value of seed
+*
+      ISEED( 1 ) = IT1
+      ISEED( 2 ) = IT2
+      ISEED( 3 ) = IT3
+      ISEED( 4 ) = IT4
+      RETURN
+*
+*     End of DLARUV
+*
+      END
diff --git a/costa/native/external/lapack/dlarz.f b/costa/native/external/lapack/dlarz.f
new file mode 100644
index 000000000..ab0e776dd
--- /dev/null
+++ b/costa/native/external/lapack/dlarz.f
@@ -0,0 +1,153 @@
+      SUBROUTINE DLARZ( SIDE, M, N, L, V, INCV, TAU, C, LDC, WORK )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          SIDE
+      INTEGER            INCV, L, LDC, M, N
+      DOUBLE PRECISION   TAU
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   C( LDC, * ), V( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLARZ applies a real elementary reflector H to a real M-by-N
+*  matrix C, from either the left or the right. H is represented in the
+*  form
+*
+*        H = I - tau * v * v'
+*
+*  where tau is a real scalar and v is a real vector.
+*
+*  If tau = 0, then H is taken to be the unit matrix.
+*
+*
+*  H is a product of k elementary reflectors as returned by DTZRZF.
+*
+*  Arguments
+*  =========
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'L': form  H * C
+*          = 'R': form  C * H
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix C.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix C.
+*
+*  L       (input) INTEGER
+*          The number of entries of the vector V containing
+*          the meaningful part of the Householder vectors.
+*          If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0.
+*
+*  V       (input) DOUBLE PRECISION array, dimension (1+(L-1)*abs(INCV))
+*          The vector v in the representation of H as returned by
+*          DTZRZF. V is not used if TAU = 0.
+*
+*  INCV    (input) INTEGER
+*          The increment between elements of v. INCV <> 0.
+*
+*  TAU     (input) DOUBLE PRECISION
+*          The value tau in the representation of H.
+*
+*  C       (input/output) DOUBLE PRECISION array, dimension (LDC,N)
+*          On entry, the M-by-N matrix C.
+*          On exit, C is overwritten by the matrix H * C if SIDE = 'L',
+*          or C * H if SIDE = 'R'.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C. LDC >= max(1,M).
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension
+*                         (N) if SIDE = 'L'
+*                      or (M) if SIDE = 'R'
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*    A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DAXPY, DCOPY, DGEMV, DGER
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. Executable Statements ..
+*
+      IF( LSAME( SIDE, 'L' ) ) THEN
+*
+*        Form  H * C
+*
+         IF( TAU.NE.ZERO ) THEN
+*
+*           w( 1:n ) = C( 1, 1:n )
+*
+            CALL DCOPY( N, C, LDC, WORK, 1 )
+*
+*           w( 1:n ) = w( 1:n ) + C( m-l+1:m, 1:n )' * v( 1:l )
+*
+            CALL DGEMV( 'Transpose', L, N, ONE, C( M-L+1, 1 ), LDC, V,
+     $                  INCV, ONE, WORK, 1 )
+*
+*           C( 1, 1:n ) = C( 1, 1:n ) - tau * w( 1:n )
+*
+            CALL DAXPY( N, -TAU, WORK, 1, C, LDC )
+*
+*           C( m-l+1:m, 1:n ) = C( m-l+1:m, 1:n ) - ...
+*                               tau * v( 1:l ) * w( 1:n )'
+*
+            CALL DGER( L, N, -TAU, V, INCV, WORK, 1, C( M-L+1, 1 ),
+     $                 LDC )
+         END IF
+*
+      ELSE
+*
+*        Form  C * H
+*
+         IF( TAU.NE.ZERO ) THEN
+*
+*           w( 1:m ) = C( 1:m, 1 )
+*
+            CALL DCOPY( M, C, 1, WORK, 1 )
+*
+*           w( 1:m ) = w( 1:m ) + C( 1:m, n-l+1:n, 1:n ) * v( 1:l )
+*
+            CALL DGEMV( 'No transpose', M, L, ONE, C( 1, N-L+1 ), LDC,
+     $                  V, INCV, ONE, WORK, 1 )
+*
+*           C( 1:m, 1 ) = C( 1:m, 1 ) - tau * w( 1:m )
+*
+            CALL DAXPY( M, -TAU, WORK, 1, C, 1 )
+*
+*           C( 1:m, n-l+1:n ) = C( 1:m, n-l+1:n ) - ...
+*                               tau * w( 1:m ) * v( 1:l )'
+*
+            CALL DGER( M, L, -TAU, WORK, 1, V, INCV, C( 1, N-L+1 ),
+     $                 LDC )
+*
+         END IF
+*
+      END IF
+*
+      RETURN
+*
+*     End of DLARZ
+*
+      END
diff --git a/costa/native/external/lapack/dlarzb.f b/costa/native/external/lapack/dlarzb.f
new file mode 100644
index 000000000..6ffd9cc1e
--- /dev/null
+++ b/costa/native/external/lapack/dlarzb.f
@@ -0,0 +1,221 @@
+      SUBROUTINE DLARZB( SIDE, TRANS, DIRECT, STOREV, M, N, K, L, V,
+     $                   LDV, T, LDT, C, LDC, WORK, LDWORK )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     December 1, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIRECT, SIDE, STOREV, TRANS
+      INTEGER            K, L, LDC, LDT, LDV, LDWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   C( LDC, * ), T( LDT, * ), V( LDV, * ),
+     $                   WORK( LDWORK, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLARZB applies a real block reflector H or its transpose H**T to
+*  a real distributed M-by-N  C from the left or the right.
+*
+*  Currently, only STOREV = 'R' and DIRECT = 'B' are supported.
+*
+*  Arguments
+*  =========
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'L': apply H or H' from the Left
+*          = 'R': apply H or H' from the Right
+*
+*  TRANS   (input) CHARACTER*1
+*          = 'N': apply H (No transpose)
+*          = 'C': apply H' (Transpose)
+*
+*  DIRECT  (input) CHARACTER*1
+*          Indicates how H is formed from a product of elementary
+*          reflectors
+*          = 'F': H = H(1) H(2) . . . H(k) (Forward, not supported yet)
+*          = 'B': H = H(k) . . . H(2) H(1) (Backward)
+*
+*  STOREV  (input) CHARACTER*1
+*          Indicates how the vectors which define the elementary
+*          reflectors are stored:
+*          = 'C': Columnwise                        (not supported yet)
+*          = 'R': Rowwise
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix C.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix C.
+*
+*  K       (input) INTEGER
+*          The order of the matrix T (= the number of elementary
+*          reflectors whose product defines the block reflector).
+*
+*  L       (input) INTEGER
+*          The number of columns of the matrix V containing the
+*          meaningful part of the Householder reflectors.
+*          If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0.
+*
+*  V       (input) DOUBLE PRECISION array, dimension (LDV,NV).
+*          If STOREV = 'C', NV = K; if STOREV = 'R', NV = L.
+*
+*  LDV     (input) INTEGER
+*          The leading dimension of the array V.
+*          If STOREV = 'C', LDV >= L; if STOREV = 'R', LDV >= K.
+*
+*  T       (input) DOUBLE PRECISION array, dimension (LDT,K)
+*          The triangular K-by-K matrix T in the representation of the
+*          block reflector.
+*
+*  LDT     (input) INTEGER
+*          The leading dimension of the array T. LDT >= K.
+*
+*  C       (input/output) DOUBLE PRECISION array, dimension (LDC,N)
+*          On entry, the M-by-N matrix C.
+*          On exit, C is overwritten by H*C or H'*C or C*H or C*H'.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C. LDC >= max(1,M).
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (LDWORK,K)
+*
+*  LDWORK  (input) INTEGER
+*          The leading dimension of the array WORK.
+*          If SIDE = 'L', LDWORK >= max(1,N);
+*          if SIDE = 'R', LDWORK >= max(1,M).
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*    A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE
+      PARAMETER          ( ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      CHARACTER          TRANST
+      INTEGER            I, INFO, J
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DCOPY, DGEMM, DTRMM, XERBLA
+*     ..
+*     .. Executable Statements ..
+*
+*     Quick return if possible
+*
+      IF( M.LE.0 .OR. N.LE.0 )
+     $   RETURN
+*
+*     Check for currently supported options
+*
+      INFO = 0
+      IF( .NOT.LSAME( DIRECT, 'B' ) ) THEN
+         INFO = -3
+      ELSE IF( .NOT.LSAME( STOREV, 'R' ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DLARZB', -INFO )
+         RETURN
+      END IF
+*
+      IF( LSAME( TRANS, 'N' ) ) THEN
+         TRANST = 'T'
+      ELSE
+         TRANST = 'N'
+      END IF
+*
+      IF( LSAME( SIDE, 'L' ) ) THEN
+*
+*        Form  H * C  or  H' * C
+*
+*        W( 1:n, 1:k ) = C( 1:k, 1:n )'
+*
+         DO 10 J = 1, K
+            CALL DCOPY( N, C( J, 1 ), LDC, WORK( 1, J ), 1 )
+   10    CONTINUE
+*
+*        W( 1:n, 1:k ) = W( 1:n, 1:k ) + ...
+*                        C( m-l+1:m, 1:n )' * V( 1:k, 1:l )'
+*
+         IF( L.GT.0 )
+     $      CALL DGEMM( 'Transpose', 'Transpose', N, K, L, ONE,
+     $                  C( M-L+1, 1 ), LDC, V, LDV, ONE, WORK, LDWORK )
+*
+*        W( 1:n, 1:k ) = W( 1:n, 1:k ) * T'  or  W( 1:m, 1:k ) * T
+*
+         CALL DTRMM( 'Right', 'Lower', TRANST, 'Non-unit', N, K, ONE, T,
+     $               LDT, WORK, LDWORK )
+*
+*        C( 1:k, 1:n ) = C( 1:k, 1:n ) - W( 1:n, 1:k )'
+*
+         DO 30 J = 1, N
+            DO 20 I = 1, K
+               C( I, J ) = C( I, J ) - WORK( J, I )
+   20       CONTINUE
+   30    CONTINUE
+*
+*        C( m-l+1:m, 1:n ) = C( m-l+1:m, 1:n ) - ...
+*                            V( 1:k, 1:l )' * W( 1:n, 1:k )'
+*
+         IF( L.GT.0 )
+     $      CALL DGEMM( 'Transpose', 'Transpose', L, N, K, -ONE, V, LDV,
+     $                  WORK, LDWORK, ONE, C( M-L+1, 1 ), LDC )
+*
+      ELSE IF( LSAME( SIDE, 'R' ) ) THEN
+*
+*        Form  C * H  or  C * H'
+*
+*        W( 1:m, 1:k ) = C( 1:m, 1:k )
+*
+         DO 40 J = 1, K
+            CALL DCOPY( M, C( 1, J ), 1, WORK( 1, J ), 1 )
+   40    CONTINUE
+*
+*        W( 1:m, 1:k ) = W( 1:m, 1:k ) + ...
+*                        C( 1:m, n-l+1:n ) * V( 1:k, 1:l )'
+*
+         IF( L.GT.0 )
+     $      CALL DGEMM( 'No transpose', 'Transpose', M, K, L, ONE,
+     $                  C( 1, N-L+1 ), LDC, V, LDV, ONE, WORK, LDWORK )
+*
+*        W( 1:m, 1:k ) = W( 1:m, 1:k ) * T  or  W( 1:m, 1:k ) * T'
+*
+         CALL DTRMM( 'Right', 'Lower', TRANS, 'Non-unit', M, K, ONE, T,
+     $               LDT, WORK, LDWORK )
+*
+*        C( 1:m, 1:k ) = C( 1:m, 1:k ) - W( 1:m, 1:k )
+*
+         DO 60 J = 1, K
+            DO 50 I = 1, M
+               C( I, J ) = C( I, J ) - WORK( I, J )
+   50       CONTINUE
+   60    CONTINUE
+*
+*        C( 1:m, n-l+1:n ) = C( 1:m, n-l+1:n ) - ...
+*                            W( 1:m, 1:k ) * V( 1:k, 1:l )
+*
+         IF( L.GT.0 )
+     $      CALL DGEMM( 'No transpose', 'No transpose', M, L, K, -ONE,
+     $                  WORK, LDWORK, V, LDV, ONE, C( 1, N-L+1 ), LDC )
+*
+      END IF
+*
+      RETURN
+*
+*     End of DLARZB
+*
+      END
diff --git a/costa/native/external/lapack/dlarzt.f b/costa/native/external/lapack/dlarzt.f
new file mode 100644
index 000000000..06a12d6d0
--- /dev/null
+++ b/costa/native/external/lapack/dlarzt.f
@@ -0,0 +1,185 @@
+      SUBROUTINE DLARZT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIRECT, STOREV
+      INTEGER            K, LDT, LDV, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   T( LDT, * ), TAU( * ), V( LDV, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLARZT forms the triangular factor T of a real block reflector
+*  H of order > n, which is defined as a product of k elementary
+*  reflectors.
+*
+*  If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular;
+*
+*  If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular.
+*
+*  If STOREV = 'C', the vector which defines the elementary reflector
+*  H(i) is stored in the i-th column of the array V, and
+*
+*     H  =  I - V * T * V'
+*
+*  If STOREV = 'R', the vector which defines the elementary reflector
+*  H(i) is stored in the i-th row of the array V, and
+*
+*     H  =  I - V' * T * V
+*
+*  Currently, only STOREV = 'R' and DIRECT = 'B' are supported.
+*
+*  Arguments
+*  =========
+*
+*  DIRECT  (input) CHARACTER*1
+*          Specifies the order in which the elementary reflectors are
+*          multiplied to form the block reflector:
+*          = 'F': H = H(1) H(2) . . . H(k) (Forward, not supported yet)
+*          = 'B': H = H(k) . . . H(2) H(1) (Backward)
+*
+*  STOREV  (input) CHARACTER*1
+*          Specifies how the vectors which define the elementary
+*          reflectors are stored (see also Further Details):
+*          = 'C': columnwise                        (not supported yet)
+*          = 'R': rowwise
+*
+*  N       (input) INTEGER
+*          The order of the block reflector H. N >= 0.
+*
+*  K       (input) INTEGER
+*          The order of the triangular factor T (= the number of
+*          elementary reflectors). K >= 1.
+*
+*  V       (input/output) DOUBLE PRECISION array, dimension
+*                               (LDV,K) if STOREV = 'C'
+*                               (LDV,N) if STOREV = 'R'
+*          The matrix V. See further details.
+*
+*  LDV     (input) INTEGER
+*          The leading dimension of the array V.
+*          If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K.
+*
+*  TAU     (input) DOUBLE PRECISION array, dimension (K)
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i).
+*
+*  T       (output) DOUBLE PRECISION array, dimension (LDT,K)
+*          The k by k triangular factor T of the block reflector.
+*          If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is
+*          lower triangular. The rest of the array is not used.
+*
+*  LDT     (input) INTEGER
+*          The leading dimension of the array T. LDT >= K.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*    A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA
+*
+*  The shape of the matrix V and the storage of the vectors which define
+*  the H(i) is best illustrated by the following example with n = 5 and
+*  k = 3. The elements equal to 1 are not stored; the corresponding
+*  array elements are modified but restored on exit. The rest of the
+*  array is not used.
+*
+*  DIRECT = 'F' and STOREV = 'C':         DIRECT = 'F' and STOREV = 'R':
+*
+*                                              ______V_____
+*         ( v1 v2 v3 )                        /            \
+*         ( v1 v2 v3 )                      ( v1 v1 v1 v1 v1 . . . . 1 )
+*     V = ( v1 v2 v3 )                      ( v2 v2 v2 v2 v2 . . . 1   )
+*         ( v1 v2 v3 )                      ( v3 v3 v3 v3 v3 . . 1     )
+*         ( v1 v2 v3 )
+*            .  .  .
+*            .  .  .
+*            1  .  .
+*               1  .
+*                  1
+*
+*  DIRECT = 'B' and STOREV = 'C':         DIRECT = 'B' and STOREV = 'R':
+*
+*                                                        ______V_____
+*            1                                          /            \
+*            .  1                           ( 1 . . . . v1 v1 v1 v1 v1 )
+*            .  .  1                        ( . 1 . . . v2 v2 v2 v2 v2 )
+*            .  .  .                        ( . . 1 . . v3 v3 v3 v3 v3 )
+*            .  .  .
+*         ( v1 v2 v3 )
+*         ( v1 v2 v3 )
+*     V = ( v1 v2 v3 )
+*         ( v1 v2 v3 )
+*         ( v1 v2 v3 )
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO
+      PARAMETER          ( ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, INFO, J
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DGEMV, DTRMV, XERBLA
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. Executable Statements ..
+*
+*     Check for currently supported options
+*
+      INFO = 0
+      IF( .NOT.LSAME( DIRECT, 'B' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.LSAME( STOREV, 'R' ) ) THEN
+         INFO = -2
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DLARZT', -INFO )
+         RETURN
+      END IF
+*
+      DO 20 I = K, 1, -1
+         IF( TAU( I ).EQ.ZERO ) THEN
+*
+*           H(i)  =  I
+*
+            DO 10 J = I, K
+               T( J, I ) = ZERO
+   10       CONTINUE
+         ELSE
+*
+*           general case
+*
+            IF( I.LT.K ) THEN
+*
+*              T(i+1:k,i) = - tau(i) * V(i+1:k,1:n) * V(i,1:n)'
+*
+               CALL DGEMV( 'No transpose', K-I, N, -TAU( I ),
+     $                     V( I+1, 1 ), LDV, V( I, 1 ), LDV, ZERO,
+     $                     T( I+1, I ), 1 )
+*
+*              T(i+1:k,i) = T(i+1:k,i+1:k) * T(i+1:k,i)
+*
+               CALL DTRMV( 'Lower', 'No transpose', 'Non-unit', K-I,
+     $                     T( I+1, I+1 ), LDT, T( I+1, I ), 1 )
+            END IF
+            T( I, I ) = TAU( I )
+         END IF
+   20 CONTINUE
+      RETURN
+*
+*     End of DLARZT
+*
+      END
diff --git a/costa/native/external/lapack/dlas2.f b/costa/native/external/lapack/dlas2.f
new file mode 100644
index 000000000..55652c087
--- /dev/null
+++ b/costa/native/external/lapack/dlas2.f
@@ -0,0 +1,122 @@
+      SUBROUTINE DLAS2( F, G, H, SSMIN, SSMAX )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      DOUBLE PRECISION   F, G, H, SSMAX, SSMIN
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLAS2  computes the singular values of the 2-by-2 matrix
+*     [  F   G  ]
+*     [  0   H  ].
+*  On return, SSMIN is the smaller singular value and SSMAX is the
+*  larger singular value.
+*
+*  Arguments
+*  =========
+*
+*  F       (input) DOUBLE PRECISION
+*          The (1,1) element of the 2-by-2 matrix.
+*
+*  G       (input) DOUBLE PRECISION
+*          The (1,2) element of the 2-by-2 matrix.
+*
+*  H       (input) DOUBLE PRECISION
+*          The (2,2) element of the 2-by-2 matrix.
+*
+*  SSMIN   (output) DOUBLE PRECISION
+*          The smaller singular value.
+*
+*  SSMAX   (output) DOUBLE PRECISION
+*          The larger singular value.
+*
+*  Further Details
+*  ===============
+*
+*  Barring over/underflow, all output quantities are correct to within
+*  a few units in the last place (ulps), even in the absence of a guard
+*  digit in addition/subtraction.
+*
+*  In IEEE arithmetic, the code works correctly if one matrix element is
+*  infinite.
+*
+*  Overflow will not occur unless the largest singular value itself
+*  overflows, or is within a few ulps of overflow. (On machines with
+*  partial overflow, like the Cray, overflow may occur if the largest
+*  singular value is within a factor of 2 of overflow.)
+*
+*  Underflow is harmless if underflow is gradual. Otherwise, results
+*  may correspond to a matrix modified by perturbations of size near
+*  the underflow threshold.
+*
+*  ====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO
+      PARAMETER          ( ZERO = 0.0D0 )
+      DOUBLE PRECISION   ONE
+      PARAMETER          ( ONE = 1.0D0 )
+      DOUBLE PRECISION   TWO
+      PARAMETER          ( TWO = 2.0D0 )
+*     ..
+*     .. Local Scalars ..
+      DOUBLE PRECISION   AS, AT, AU, C, FA, FHMN, FHMX, GA, HA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+      FA = ABS( F )
+      GA = ABS( G )
+      HA = ABS( H )
+      FHMN = MIN( FA, HA )
+      FHMX = MAX( FA, HA )
+      IF( FHMN.EQ.ZERO ) THEN
+         SSMIN = ZERO
+         IF( FHMX.EQ.ZERO ) THEN
+            SSMAX = GA
+         ELSE
+            SSMAX = MAX( FHMX, GA )*SQRT( ONE+
+     $              ( MIN( FHMX, GA ) / MAX( FHMX, GA ) )**2 )
+         END IF
+      ELSE
+         IF( GA.LT.FHMX ) THEN
+            AS = ONE + FHMN / FHMX
+            AT = ( FHMX-FHMN ) / FHMX
+            AU = ( GA / FHMX )**2
+            C = TWO / ( SQRT( AS*AS+AU )+SQRT( AT*AT+AU ) )
+            SSMIN = FHMN*C
+            SSMAX = FHMX / C
+         ELSE
+            AU = FHMX / GA
+            IF( AU.EQ.ZERO ) THEN
+*
+*              Avoid possible harmful underflow if exponent range
+*              asymmetric (true SSMIN may not underflow even if
+*              AU underflows)
+*
+               SSMIN = ( FHMN*FHMX ) / GA
+               SSMAX = GA
+            ELSE
+               AS = ONE + FHMN / FHMX
+               AT = ( FHMX-FHMN ) / FHMX
+               C = ONE / ( SQRT( ONE+( AS*AU )**2 )+
+     $             SQRT( ONE+( AT*AU )**2 ) )
+               SSMIN = ( FHMN*C )*AU
+               SSMIN = SSMIN + SSMIN
+               SSMAX = GA / ( C+C )
+            END IF
+         END IF
+      END IF
+      RETURN
+*
+*     End of DLAS2
+*
+      END
diff --git a/costa/native/external/lapack/dlascl.f b/costa/native/external/lapack/dlascl.f
new file mode 100644
index 000000000..4c05d4d79
--- /dev/null
+++ b/costa/native/external/lapack/dlascl.f
@@ -0,0 +1,268 @@
+      SUBROUTINE DLASCL( TYPE, KL, KU, CFROM, CTO, M, N, A, LDA, INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          TYPE
+      INTEGER            INFO, KL, KU, LDA, M, N
+      DOUBLE PRECISION   CFROM, CTO
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLASCL multiplies the M by N real matrix A by the real scalar
+*  CTO/CFROM.  This is done without over/underflow as long as the final
+*  result CTO*A(I,J)/CFROM does not over/underflow. TYPE specifies that
+*  A may be full, upper triangular, lower triangular, upper Hessenberg,
+*  or banded.
+*
+*  Arguments
+*  =========
+*
+*  TYPE    (input) CHARACTER*1
+*          TYPE indices the storage type of the input matrix.
+*          = 'G':  A is a full matrix.
+*          = 'L':  A is a lower triangular matrix.
+*          = 'U':  A is an upper triangular matrix.
+*          = 'H':  A is an upper Hessenberg matrix.
+*          = 'B':  A is a symmetric band matrix with lower bandwidth KL
+*                  and upper bandwidth KU and with the only the lower
+*                  half stored.
+*          = 'Q':  A is a symmetric band matrix with lower bandwidth KL
+*                  and upper bandwidth KU and with the only the upper
+*                  half stored.
+*          = 'Z':  A is a band matrix with lower bandwidth KL and upper
+*                  bandwidth KU.
+*
+*  KL      (input) INTEGER
+*          The lower bandwidth of A.  Referenced only if TYPE = 'B',
+*          'Q' or 'Z'.
+*
+*  KU      (input) INTEGER
+*          The upper bandwidth of A.  Referenced only if TYPE = 'B',
+*          'Q' or 'Z'.
+*
+*  CFROM   (input) DOUBLE PRECISION
+*  CTO     (input) DOUBLE PRECISION
+*          The matrix A is multiplied by CTO/CFROM. A(I,J) is computed
+*          without over/underflow if the final result CTO*A(I,J)/CFROM
+*          can be represented without over/underflow.  CFROM must be
+*          nonzero.
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,M)
+*          The matrix to be multiplied by CTO/CFROM.  See TYPE for the
+*          storage type.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  INFO    (output) INTEGER
+*          0  - successful exit
+*          <0 - if INFO = -i, the i-th argument had an illegal value.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            DONE
+      INTEGER            I, ITYPE, J, K1, K2, K3, K4
+      DOUBLE PRECISION   BIGNUM, CFROM1, CFROMC, CTO1, CTOC, MUL, SMLNUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DLAMCH
+      EXTERNAL           LSAME, DLAMCH
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+*
+      IF( LSAME( TYPE, 'G' ) ) THEN
+         ITYPE = 0
+      ELSE IF( LSAME( TYPE, 'L' ) ) THEN
+         ITYPE = 1
+      ELSE IF( LSAME( TYPE, 'U' ) ) THEN
+         ITYPE = 2
+      ELSE IF( LSAME( TYPE, 'H' ) ) THEN
+         ITYPE = 3
+      ELSE IF( LSAME( TYPE, 'B' ) ) THEN
+         ITYPE = 4
+      ELSE IF( LSAME( TYPE, 'Q' ) ) THEN
+         ITYPE = 5
+      ELSE IF( LSAME( TYPE, 'Z' ) ) THEN
+         ITYPE = 6
+      ELSE
+         ITYPE = -1
+      END IF
+*
+      IF( ITYPE.EQ.-1 ) THEN
+         INFO = -1
+      ELSE IF( CFROM.EQ.ZERO ) THEN
+         INFO = -4
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -6
+      ELSE IF( N.LT.0 .OR. ( ITYPE.EQ.4 .AND. N.NE.M ) .OR.
+     $         ( ITYPE.EQ.5 .AND. N.NE.M ) ) THEN
+         INFO = -7
+      ELSE IF( ITYPE.LE.3 .AND. LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -9
+      ELSE IF( ITYPE.GE.4 ) THEN
+         IF( KL.LT.0 .OR. KL.GT.MAX( M-1, 0 ) ) THEN
+            INFO = -2
+         ELSE IF( KU.LT.0 .OR. KU.GT.MAX( N-1, 0 ) .OR.
+     $            ( ( ITYPE.EQ.4 .OR. ITYPE.EQ.5 ) .AND. KL.NE.KU ) )
+     $             THEN
+            INFO = -3
+         ELSE IF( ( ITYPE.EQ.4 .AND. LDA.LT.KL+1 ) .OR.
+     $            ( ITYPE.EQ.5 .AND. LDA.LT.KU+1 ) .OR.
+     $            ( ITYPE.EQ.6 .AND. LDA.LT.2*KL+KU+1 ) ) THEN
+            INFO = -9
+         END IF
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DLASCL', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. M.EQ.0 )
+     $   RETURN
+*
+*     Get machine parameters
+*
+      SMLNUM = DLAMCH( 'S' )
+      BIGNUM = ONE / SMLNUM
+*
+      CFROMC = CFROM
+      CTOC = CTO
+*
+   10 CONTINUE
+      CFROM1 = CFROMC*SMLNUM
+      CTO1 = CTOC / BIGNUM
+      IF( ABS( CFROM1 ).GT.ABS( CTOC ) .AND. CTOC.NE.ZERO ) THEN
+         MUL = SMLNUM
+         DONE = .FALSE.
+         CFROMC = CFROM1
+      ELSE IF( ABS( CTO1 ).GT.ABS( CFROMC ) ) THEN
+         MUL = BIGNUM
+         DONE = .FALSE.
+         CTOC = CTO1
+      ELSE
+         MUL = CTOC / CFROMC
+         DONE = .TRUE.
+      END IF
+*
+      IF( ITYPE.EQ.0 ) THEN
+*
+*        Full matrix
+*
+         DO 30 J = 1, N
+            DO 20 I = 1, M
+               A( I, J ) = A( I, J )*MUL
+   20       CONTINUE
+   30    CONTINUE
+*
+      ELSE IF( ITYPE.EQ.1 ) THEN
+*
+*        Lower triangular matrix
+*
+         DO 50 J = 1, N
+            DO 40 I = J, M
+               A( I, J ) = A( I, J )*MUL
+   40       CONTINUE
+   50    CONTINUE
+*
+      ELSE IF( ITYPE.EQ.2 ) THEN
+*
+*        Upper triangular matrix
+*
+         DO 70 J = 1, N
+            DO 60 I = 1, MIN( J, M )
+               A( I, J ) = A( I, J )*MUL
+   60       CONTINUE
+   70    CONTINUE
+*
+      ELSE IF( ITYPE.EQ.3 ) THEN
+*
+*        Upper Hessenberg matrix
+*
+         DO 90 J = 1, N
+            DO 80 I = 1, MIN( J+1, M )
+               A( I, J ) = A( I, J )*MUL
+   80       CONTINUE
+   90    CONTINUE
+*
+      ELSE IF( ITYPE.EQ.4 ) THEN
+*
+*        Lower half of a symmetric band matrix
+*
+         K3 = KL + 1
+         K4 = N + 1
+         DO 110 J = 1, N
+            DO 100 I = 1, MIN( K3, K4-J )
+               A( I, J ) = A( I, J )*MUL
+  100       CONTINUE
+  110    CONTINUE
+*
+      ELSE IF( ITYPE.EQ.5 ) THEN
+*
+*        Upper half of a symmetric band matrix
+*
+         K1 = KU + 2
+         K3 = KU + 1
+         DO 130 J = 1, N
+            DO 120 I = MAX( K1-J, 1 ), K3
+               A( I, J ) = A( I, J )*MUL
+  120       CONTINUE
+  130    CONTINUE
+*
+      ELSE IF( ITYPE.EQ.6 ) THEN
+*
+*        Band matrix
+*
+         K1 = KL + KU + 2
+         K2 = KL + 1
+         K3 = 2*KL + KU + 1
+         K4 = KL + KU + 1 + M
+         DO 150 J = 1, N
+            DO 140 I = MAX( K1-J, K2 ), MIN( K3, K4-J )
+               A( I, J ) = A( I, J )*MUL
+  140       CONTINUE
+  150    CONTINUE
+*
+      END IF
+*
+      IF( .NOT.DONE )
+     $   GO TO 10
+*
+      RETURN
+*
+*     End of DLASCL
+*
+      END
diff --git a/costa/native/external/lapack/dlasd0.f b/costa/native/external/lapack/dlasd0.f
new file mode 100644
index 000000000..f7de695ff
--- /dev/null
+++ b/costa/native/external/lapack/dlasd0.f
@@ -0,0 +1,231 @@
+      SUBROUTINE DLASD0( N, SQRE, D, E, U, LDU, VT, LDVT, SMLSIZ, IWORK,
+     $                   WORK, INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDU, LDVT, N, SMLSIZ, SQRE
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      DOUBLE PRECISION   D( * ), E( * ), U( LDU, * ), VT( LDVT, * ),
+     $                   WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  Using a divide and conquer approach, DLASD0 computes the singular
+*  value decomposition (SVD) of a real upper bidiagonal N-by-M
+*  matrix B with diagonal D and offdiagonal E, where M = N + SQRE.
+*  The algorithm computes orthogonal matrices U and VT such that
+*  B = U * S * VT. The singular values S are overwritten on D.
+*
+*  A related subroutine, DLASDA, computes only the singular values,
+*  and optionally, the singular vectors in compact form.
+*
+*  Arguments
+*  =========
+*
+*  N      (input) INTEGER
+*         On entry, the row dimension of the upper bidiagonal matrix.
+*         This is also the dimension of the main diagonal array D.
+*
+*  SQRE   (input) INTEGER
+*         Specifies the column dimension of the bidiagonal matrix.
+*         = 0: The bidiagonal matrix has column dimension M = N;
+*         = 1: The bidiagonal matrix has column dimension M = N+1;
+*
+*  D      (input/output) DOUBLE PRECISION array, dimension (N)
+*         On entry D contains the main diagonal of the bidiagonal
+*         matrix.
+*         On exit D, if INFO = 0, contains its singular values.
+*
+*  E      (input) DOUBLE PRECISION array, dimension (M-1)
+*         Contains the subdiagonal entries of the bidiagonal matrix.
+*         On exit, E has been destroyed.
+*
+*  U      (output) DOUBLE PRECISION array, dimension at least (LDQ, N)
+*         On exit, U contains the left singular vectors.
+*
+*  LDU    (input) INTEGER
+*         On entry, leading dimension of U.
+*
+*  VT     (output) DOUBLE PRECISION array, dimension at least (LDVT, M)
+*         On exit, VT' contains the right singular vectors.
+*
+*  LDVT   (input) INTEGER
+*         On entry, leading dimension of VT.
+*
+*  SMLSIZ (input) INTEGER
+*         On entry, maximum size of the subproblems at the
+*         bottom of the computation tree.
+*
+*  IWORK  INTEGER work array.
+*         Dimension must be at least (8 * N)
+*
+*  WORK   DOUBLE PRECISION work array.
+*         Dimension must be at least (3 * M**2 + 2 * M)
+*
+*  INFO   (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          > 0:  if INFO = 1, an singular value did not converge
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Ming Gu and Huan Ren, Computer Science Division, University of
+*     California at Berkeley, USA
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      INTEGER            I, I1, IC, IDXQ, IDXQC, IM1, INODE, ITEMP, IWK,
+     $                   J, LF, LL, LVL, M, NCC, ND, NDB1, NDIML, NDIMR,
+     $                   NL, NLF, NLP1, NLVL, NR, NRF, NRP1, SQREI
+      DOUBLE PRECISION   ALPHA, BETA
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLASD1, DLASDQ, DLASDT, XERBLA
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+*
+      IF( N.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( ( SQRE.LT.0 ) .OR. ( SQRE.GT.1 ) ) THEN
+         INFO = -2
+      END IF
+*
+      M = N + SQRE
+*
+      IF( LDU.LT.N ) THEN
+         INFO = -6
+      ELSE IF( LDVT.LT.M ) THEN
+         INFO = -8
+      ELSE IF( SMLSIZ.LT.3 ) THEN
+         INFO = -9
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DLASD0', -INFO )
+         RETURN
+      END IF
+*
+*     If the input matrix is too small, call DLASDQ to find the SVD.
+*
+      IF( N.LE.SMLSIZ ) THEN
+         CALL DLASDQ( 'U', SQRE, N, M, N, 0, D, E, VT, LDVT, U, LDU, U,
+     $                LDU, WORK, INFO )
+         RETURN
+      END IF
+*
+*     Set up the computation tree.
+*
+      INODE = 1
+      NDIML = INODE + N
+      NDIMR = NDIML + N
+      IDXQ = NDIMR + N
+      IWK = IDXQ + N
+      CALL DLASDT( N, NLVL, ND, IWORK( INODE ), IWORK( NDIML ),
+     $             IWORK( NDIMR ), SMLSIZ )
+*
+*     For the nodes on bottom level of the tree, solve
+*     their subproblems by DLASDQ.
+*
+      NDB1 = ( ND+1 ) / 2
+      NCC = 0
+      DO 30 I = NDB1, ND
+*
+*     IC : center row of each node
+*     NL : number of rows of left  subproblem
+*     NR : number of rows of right subproblem
+*     NLF: starting row of the left   subproblem
+*     NRF: starting row of the right  subproblem
+*
+         I1 = I - 1
+         IC = IWORK( INODE+I1 )
+         NL = IWORK( NDIML+I1 )
+         NLP1 = NL + 1
+         NR = IWORK( NDIMR+I1 )
+         NRP1 = NR + 1
+         NLF = IC - NL
+         NRF = IC + 1
+         SQREI = 1
+         CALL DLASDQ( 'U', SQREI, NL, NLP1, NL, NCC, D( NLF ), E( NLF ),
+     $                VT( NLF, NLF ), LDVT, U( NLF, NLF ), LDU,
+     $                U( NLF, NLF ), LDU, WORK, INFO )
+         IF( INFO.NE.0 ) THEN
+            RETURN
+         END IF
+         ITEMP = IDXQ + NLF - 2
+         DO 10 J = 1, NL
+            IWORK( ITEMP+J ) = J
+   10    CONTINUE
+         IF( I.EQ.ND ) THEN
+            SQREI = SQRE
+         ELSE
+            SQREI = 1
+         END IF
+         NRP1 = NR + SQREI
+         CALL DLASDQ( 'U', SQREI, NR, NRP1, NR, NCC, D( NRF ), E( NRF ),
+     $                VT( NRF, NRF ), LDVT, U( NRF, NRF ), LDU,
+     $                U( NRF, NRF ), LDU, WORK, INFO )
+         IF( INFO.NE.0 ) THEN
+            RETURN
+         END IF
+         ITEMP = IDXQ + IC
+         DO 20 J = 1, NR
+            IWORK( ITEMP+J-1 ) = J
+   20    CONTINUE
+   30 CONTINUE
+*
+*     Now conquer each subproblem bottom-up.
+*
+      DO 50 LVL = NLVL, 1, -1
+*
+*        Find the first node LF and last node LL on the
+*        current level LVL.
+*
+         IF( LVL.EQ.1 ) THEN
+            LF = 1
+            LL = 1
+         ELSE
+            LF = 2**( LVL-1 )
+            LL = 2*LF - 1
+         END IF
+         DO 40 I = LF, LL
+            IM1 = I - 1
+            IC = IWORK( INODE+IM1 )
+            NL = IWORK( NDIML+IM1 )
+            NR = IWORK( NDIMR+IM1 )
+            NLF = IC - NL
+            IF( ( SQRE.EQ.0 ) .AND. ( I.EQ.LL ) ) THEN
+               SQREI = SQRE
+            ELSE
+               SQREI = 1
+            END IF
+            IDXQC = IDXQ + NLF - 1
+            ALPHA = D( IC )
+            BETA = E( IC )
+            CALL DLASD1( NL, NR, SQREI, D( NLF ), ALPHA, BETA,
+     $                   U( NLF, NLF ), LDU, VT( NLF, NLF ), LDVT,
+     $                   IWORK( IDXQC ), IWORK( IWK ), WORK, INFO )
+            IF( INFO.NE.0 ) THEN
+               RETURN
+            END IF
+   40    CONTINUE
+   50 CONTINUE
+*
+      RETURN
+*
+*     End of DLASD0
+*
+      END
diff --git a/costa/native/external/lapack/dlasd1.f b/costa/native/external/lapack/dlasd1.f
new file mode 100644
index 000000000..e3c338275
--- /dev/null
+++ b/costa/native/external/lapack/dlasd1.f
@@ -0,0 +1,233 @@
+      SUBROUTINE DLASD1( NL, NR, SQRE, D, ALPHA, BETA, U, LDU, VT, LDVT,
+     $                   IDXQ, IWORK, WORK, INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDU, LDVT, NL, NR, SQRE
+      DOUBLE PRECISION   ALPHA, BETA
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IDXQ( * ), IWORK( * )
+      DOUBLE PRECISION   D( * ), U( LDU, * ), VT( LDVT, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLASD1 computes the SVD of an upper bidiagonal N-by-M matrix B,
+*  where N = NL + NR + 1 and M = N + SQRE. DLASD1 is called from DLASD0.
+*
+*  A related subroutine DLASD7 handles the case in which the singular
+*  values (and the singular vectors in factored form) are desired.
+*
+*  DLASD1 computes the SVD as follows:
+*
+*                ( D1(in)  0    0     0 )
+*    B = U(in) * (   Z1'   a   Z2'    b ) * VT(in)
+*                (   0     0   D2(in) 0 )
+*
+*      = U(out) * ( D(out) 0) * VT(out)
+*
+*  where Z' = (Z1' a Z2' b) = u' VT', and u is a vector of dimension M
+*  with ALPHA and BETA in the NL+1 and NL+2 th entries and zeros
+*  elsewhere; and the entry b is empty if SQRE = 0.
+*
+*  The left singular vectors of the original matrix are stored in U, and
+*  the transpose of the right singular vectors are stored in VT, and the
+*  singular values are in D.  The algorithm consists of three stages:
+*
+*     The first stage consists of deflating the size of the problem
+*     when there are multiple singular values or when there are zeros in
+*     the Z vector.  For each such occurence the dimension of the
+*     secular equation problem is reduced by one.  This stage is
+*     performed by the routine DLASD2.
+*
+*     The second stage consists of calculating the updated
+*     singular values. This is done by finding the square roots of the
+*     roots of the secular equation via the routine DLASD4 (as called
+*     by DLASD3). This routine also calculates the singular vectors of
+*     the current problem.
+*
+*     The final stage consists of computing the updated singular vectors
+*     directly using the updated singular values.  The singular vectors
+*     for the current problem are multiplied with the singular vectors
+*     from the overall problem.
+*
+*  Arguments
+*  =========
+*
+*  NL     (input) INTEGER
+*         The row dimension of the upper block.  NL >= 1.
+*
+*  NR     (input) INTEGER
+*         The row dimension of the lower block.  NR >= 1.
+*
+*  SQRE   (input) INTEGER
+*         = 0: the lower block is an NR-by-NR square matrix.
+*         = 1: the lower block is an NR-by-(NR+1) rectangular matrix.
+*
+*         The bidiagonal matrix has row dimension N = NL + NR + 1,
+*         and column dimension M = N + SQRE.
+*
+*  D      (input/output) DOUBLE PRECISION array,
+*                        dimension (N = NL+NR+1).
+*         On entry D(1:NL,1:NL) contains the singular values of the
+*         upper block; and D(NL+2:N) contains the singular values of
+*         the lower block. On exit D(1:N) contains the singular values
+*         of the modified matrix.
+*
+*  ALPHA  (input) DOUBLE PRECISION
+*         Contains the diagonal element associated with the added row.
+*
+*  BETA   (input) DOUBLE PRECISION
+*         Contains the off-diagonal element associated with the added
+*         row.
+*
+*  U      (input/output) DOUBLE PRECISION array, dimension(LDU,N)
+*         On entry U(1:NL, 1:NL) contains the left singular vectors of
+*         the upper block; U(NL+2:N, NL+2:N) contains the left singular
+*         vectors of the lower block. On exit U contains the left
+*         singular vectors of the bidiagonal matrix.
+*
+*  LDU    (input) INTEGER
+*         The leading dimension of the array U.  LDU >= max( 1, N ).
+*
+*  VT     (input/output) DOUBLE PRECISION array, dimension(LDVT,M)
+*         where M = N + SQRE.
+*         On entry VT(1:NL+1, 1:NL+1)' contains the right singular
+*         vectors of the upper block; VT(NL+2:M, NL+2:M)' contains
+*         the right singular vectors of the lower block. On exit
+*         VT' contains the right singular vectors of the
+*         bidiagonal matrix.
+*
+*  LDVT   (input) INTEGER
+*         The leading dimension of the array VT.  LDVT >= max( 1, M ).
+*
+*  IDXQ  (output) INTEGER array, dimension(N)
+*         This contains the permutation which will reintegrate the
+*         subproblem just solved back into sorted order, i.e.
+*         D( IDXQ( I = 1, N ) ) will be in ascending order.
+*
+*  IWORK  (workspace) INTEGER array, dimension( 4 * N )
+*
+*  WORK   (workspace) DOUBLE PRECISION array, dimension( 3*M**2 + 2*M )
+*
+*  INFO   (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          > 0:  if INFO = 1, an singular value did not converge
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Ming Gu and Huan Ren, Computer Science Division, University of
+*     California at Berkeley, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+*
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            COLTYP, I, IDX, IDXC, IDXP, IQ, ISIGMA, IU2,
+     $                   IVT2, IZ, K, LDQ, LDU2, LDVT2, M, N, N1, N2
+      DOUBLE PRECISION   ORGNRM
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLAMRG, DLASCL, DLASD2, DLASD3, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+*
+      IF( NL.LT.1 ) THEN
+         INFO = -1
+      ELSE IF( NR.LT.1 ) THEN
+         INFO = -2
+      ELSE IF( ( SQRE.LT.0 ) .OR. ( SQRE.GT.1 ) ) THEN
+         INFO = -3
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DLASD1', -INFO )
+         RETURN
+      END IF
+*
+      N = NL + NR + 1
+      M = N + SQRE
+*
+*     The following values are for bookkeeping purposes only.  They are
+*     integer pointers which indicate the portion of the workspace
+*     used by a particular array in DLASD2 and DLASD3.
+*
+      LDU2 = N
+      LDVT2 = M
+*
+      IZ = 1
+      ISIGMA = IZ + M
+      IU2 = ISIGMA + N
+      IVT2 = IU2 + LDU2*N
+      IQ = IVT2 + LDVT2*M
+*
+      IDX = 1
+      IDXC = IDX + N
+      COLTYP = IDXC + N
+      IDXP = COLTYP + N
+*
+*     Scale.
+*
+      ORGNRM = MAX( ABS( ALPHA ), ABS( BETA ) )
+      D( NL+1 ) = ZERO
+      DO 10 I = 1, N
+         IF( ABS( D( I ) ).GT.ORGNRM ) THEN
+            ORGNRM = ABS( D( I ) )
+         END IF
+   10 CONTINUE
+      CALL DLASCL( 'G', 0, 0, ORGNRM, ONE, N, 1, D, N, INFO )
+      ALPHA = ALPHA / ORGNRM
+      BETA = BETA / ORGNRM
+*
+*     Deflate singular values.
+*
+      CALL DLASD2( NL, NR, SQRE, K, D, WORK( IZ ), ALPHA, BETA, U, LDU,
+     $             VT, LDVT, WORK( ISIGMA ), WORK( IU2 ), LDU2,
+     $             WORK( IVT2 ), LDVT2, IWORK( IDXP ), IWORK( IDX ),
+     $             IWORK( IDXC ), IDXQ, IWORK( COLTYP ), INFO )
+*
+*     Solve Secular Equation and update singular vectors.
+*
+      LDQ = K
+      CALL DLASD3( NL, NR, SQRE, K, D, WORK( IQ ), LDQ, WORK( ISIGMA ),
+     $             U, LDU, WORK( IU2 ), LDU2, VT, LDVT, WORK( IVT2 ),
+     $             LDVT2, IWORK( IDXC ), IWORK( COLTYP ), WORK( IZ ),
+     $             INFO )
+      IF( INFO.NE.0 ) THEN
+         RETURN
+      END IF
+*
+*     Unscale.
+*
+      CALL DLASCL( 'G', 0, 0, ONE, ORGNRM, N, 1, D, N, INFO )
+*
+*     Prepare the IDXQ sorting permutation.
+*
+      N1 = K
+      N2 = N - K
+      CALL DLAMRG( N1, N2, D, 1, -1, IDXQ )
+*
+      RETURN
+*
+*     End of DLASD1
+*
+      END
diff --git a/costa/native/external/lapack/dlasd2.f b/costa/native/external/lapack/dlasd2.f
new file mode 100644
index 000000000..9109402c7
--- /dev/null
+++ b/costa/native/external/lapack/dlasd2.f
@@ -0,0 +1,513 @@
+      SUBROUTINE DLASD2( NL, NR, SQRE, K, D, Z, ALPHA, BETA, U, LDU, VT,
+     $                   LDVT, DSIGMA, U2, LDU2, VT2, LDVT2, IDXP, IDX,
+     $                   IDXC, IDXQ, COLTYP, INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
+*     Courant Institute, NAG Ltd., and Rice University
+*     October 31, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, K, LDU, LDU2, LDVT, LDVT2, NL, NR, SQRE
+      DOUBLE PRECISION   ALPHA, BETA
+*     ..
+*     .. Array Arguments ..
+      INTEGER            COLTYP( * ), IDX( * ), IDXC( * ), IDXP( * ),
+     $                   IDXQ( * )
+      DOUBLE PRECISION   D( * ), DSIGMA( * ), U( LDU, * ),
+     $                   U2( LDU2, * ), VT( LDVT, * ), VT2( LDVT2, * ),
+     $                   Z( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLASD2 merges the two sets of singular values together into a single
+*  sorted set.  Then it tries to deflate the size of the problem.
+*  There are two ways in which deflation can occur:  when two or more
+*  singular values are close together or if there is a tiny entry in the
+*  Z vector.  For each such occurrence the order of the related secular
+*  equation problem is reduced by one.
+*
+*  DLASD2 is called from DLASD1.
+*
+*  Arguments
+*  =========
+*
+*  NL     (input) INTEGER
+*         The row dimension of the upper block.  NL >= 1.
+*
+*  NR     (input) INTEGER
+*         The row dimension of the lower block.  NR >= 1.
+*
+*  SQRE   (input) INTEGER
+*         = 0: the lower block is an NR-by-NR square matrix.
+*         = 1: the lower block is an NR-by-(NR+1) rectangular matrix.
+*
+*         The bidiagonal matrix has N = NL + NR + 1 rows and
+*         M = N + SQRE >= N columns.
+*
+*  K      (output) INTEGER
+*         Contains the dimension of the non-deflated matrix,
+*         This is the order of the related secular equation. 1 <= K <=N.
+*
+*  D      (input/output) DOUBLE PRECISION array, dimension(N)
+*         On entry D contains the singular values of the two submatrices
+*         to be combined.  On exit D contains the trailing (N-K) updated
+*         singular values (those which were deflated) sorted into
+*         increasing order.
+*
+*  ALPHA  (input) DOUBLE PRECISION
+*         Contains the diagonal element associated with the added row.
+*
+*  BETA   (input) DOUBLE PRECISION
+*         Contains the off-diagonal element associated with the added
+*         row.
+*
+*  U      (input/output) DOUBLE PRECISION array, dimension(LDU,N)
+*         On entry U contains the left singular vectors of two
+*         submatrices in the two square blocks with corners at (1,1),
+*         (NL, NL), and (NL+2, NL+2), (N,N).
+*         On exit U contains the trailing (N-K) updated left singular
+*         vectors (those which were deflated) in its last N-K columns.
+*
+*  LDU    (input) INTEGER
+*         The leading dimension of the array U.  LDU >= N.
+*
+*  Z      (output) DOUBLE PRECISION array, dimension(N)
+*         On exit Z contains the updating row vector in the secular
+*         equation.
+*
+*  DSIGMA (output) DOUBLE PRECISION array, dimension (N)
+*         Contains a copy of the diagonal elements (K-1 singular values
+*         and one zero) in the secular equation.
+*
+*  U2     (output) DOUBLE PRECISION array, dimension(LDU2,N)
+*         Contains a copy of the first K-1 left singular vectors which
+*         will be used by DLASD3 in a matrix multiply (DGEMM) to solve
+*         for the new left singular vectors. U2 is arranged into four
+*         blocks. The first block contains a column with 1 at NL+1 and
+*         zero everywhere else; the second block contains non-zero
+*         entries only at and above NL; the third contains non-zero
+*         entries only below NL+1; and the fourth is dense.
+*
+*  LDU2   (input) INTEGER
+*         The leading dimension of the array U2.  LDU2 >= N.
+*
+*  VT     (input/output) DOUBLE PRECISION array, dimension(LDVT,M)
+*         On entry VT' contains the right singular vectors of two
+*         submatrices in the two square blocks with corners at (1,1),
+*         (NL+1, NL+1), and (NL+2, NL+2), (M,M).
+*         On exit VT' contains the trailing (N-K) updated right singular
+*         vectors (those which were deflated) in its last N-K columns.
+*         In case SQRE =1, the last row of VT spans the right null
+*         space.
+*
+*  LDVT   (input) INTEGER
+*         The leading dimension of the array VT.  LDVT >= M.
+*
+*  VT2    (output) DOUBLE PRECISION array, dimension(LDVT2,N)
+*         VT2' contains a copy of the first K right singular vectors
+*         which will be used by DLASD3 in a matrix multiply (DGEMM) to
+*         solve for the new right singular vectors. VT2 is arranged into
+*         three blocks. The first block contains a row that corresponds
+*         to the special 0 diagonal element in SIGMA; the second block
+*         contains non-zeros only at and before NL +1; the third block
+*         contains non-zeros only at and after  NL +2.
+*
+*  LDVT2  (input) INTEGER
+*         The leading dimension of the array VT2.  LDVT2 >= M.
+*
+*  IDXP   (workspace) INTEGER array, dimension(N)
+*         This will contain the permutation used to place deflated
+*         values of D at the end of the array. On output IDXP(2:K)
+*         points to the nondeflated D-values and IDXP(K+1:N)
+*         points to the deflated singular values.
+*
+*  IDX    (workspace) INTEGER array, dimension(N)
+*         This will contain the permutation used to sort the contents of
+*         D into ascending order.
+*
+*  IDXC   (output) INTEGER array, dimension(N)
+*         This will contain the permutation used to arrange the columns
+*         of the deflated U matrix into three groups:  the first group
+*         contains non-zero entries only at and above NL, the second
+*         contains non-zero entries only below NL+2, and the third is
+*         dense.
+*
+*  COLTYP (workspace/output) INTEGER array, dimension(N)
+*         As workspace, this will contain a label which will indicate
+*         which of the following types a column in the U2 matrix or a
+*         row in the VT2 matrix is:
+*         1 : non-zero in the upper half only
+*         2 : non-zero in the lower half only
+*         3 : dense
+*         4 : deflated
+*
+*         On exit, it is an array of dimension 4, with COLTYP(I) being
+*         the dimension of the I-th type columns.
+*
+*  IDXQ   (input) INTEGER array, dimension(N)
+*         This contains the permutation which separately sorts the two
+*         sub-problems in D into ascending order.  Note that entries in
+*         the first hlaf of this permutation must first be moved one
+*         position backward; and entries in the second half
+*         must first have NL+1 added to their values.
+*
+*  INFO   (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Ming Gu and Huan Ren, Computer Science Division, University of
+*     California at Berkeley, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE, TWO, EIGHT
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0, TWO = 2.0D+0,
+     $                   EIGHT = 8.0D+0 )
+*     ..
+*     .. Local Arrays ..
+      INTEGER            CTOT( 4 ), PSM( 4 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            CT, I, IDXI, IDXJ, IDXJP, J, JP, JPREV, K2, M,
+     $                   N, NLP1, NLP2
+      DOUBLE PRECISION   C, EPS, HLFTOL, S, TAU, TOL, Z1
+*     ..
+*     .. External Functions ..
+      DOUBLE PRECISION   DLAMCH, DLAPY2
+      EXTERNAL           DLAMCH, DLAPY2
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DCOPY, DLACPY, DLAMRG, DLASET, DROT, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+*
+      IF( NL.LT.1 ) THEN
+         INFO = -1
+      ELSE IF( NR.LT.1 ) THEN
+         INFO = -2
+      ELSE IF( ( SQRE.NE.1 ) .AND. ( SQRE.NE.0 ) ) THEN
+         INFO = -3
+      END IF
+*
+      N = NL + NR + 1
+      M = N + SQRE
+*
+      IF( LDU.LT.N ) THEN
+         INFO = -10
+      ELSE IF( LDVT.LT.M ) THEN
+         INFO = -12
+      ELSE IF( LDU2.LT.N ) THEN
+         INFO = -15
+      ELSE IF( LDVT2.LT.M ) THEN
+         INFO = -17
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DLASD2', -INFO )
+         RETURN
+      END IF
+*
+      NLP1 = NL + 1
+      NLP2 = NL + 2
+*
+*     Generate the first part of the vector Z; and move the singular
+*     values in the first part of D one position backward.
+*
+      Z1 = ALPHA*VT( NLP1, NLP1 )
+      Z( 1 ) = Z1
+      DO 10 I = NL, 1, -1
+         Z( I+1 ) = ALPHA*VT( I, NLP1 )
+         D( I+1 ) = D( I )
+         IDXQ( I+1 ) = IDXQ( I ) + 1
+   10 CONTINUE
+*
+*     Generate the second part of the vector Z.
+*
+      DO 20 I = NLP2, M
+         Z( I ) = BETA*VT( I, NLP2 )
+   20 CONTINUE
+*
+*     Initialize some reference arrays.
+*
+      DO 30 I = 2, NLP1
+         COLTYP( I ) = 1
+   30 CONTINUE
+      DO 40 I = NLP2, N
+         COLTYP( I ) = 2
+   40 CONTINUE
+*
+*     Sort the singular values into increasing order
+*
+      DO 50 I = NLP2, N
+         IDXQ( I ) = IDXQ( I ) + NLP1
+   50 CONTINUE
+*
+*     DSIGMA, IDXC, IDXC, and the first column of U2
+*     are used as storage space.
+*
+      DO 60 I = 2, N
+         DSIGMA( I ) = D( IDXQ( I ) )
+         U2( I, 1 ) = Z( IDXQ( I ) )
+         IDXC( I ) = COLTYP( IDXQ( I ) )
+   60 CONTINUE
+*
+      CALL DLAMRG( NL, NR, DSIGMA( 2 ), 1, 1, IDX( 2 ) )
+*
+      DO 70 I = 2, N
+         IDXI = 1 + IDX( I )
+         D( I ) = DSIGMA( IDXI )
+         Z( I ) = U2( IDXI, 1 )
+         COLTYP( I ) = IDXC( IDXI )
+   70 CONTINUE
+*
+*     Calculate the allowable deflation tolerance
+*
+      EPS = DLAMCH( 'Epsilon' )
+      TOL = MAX( ABS( ALPHA ), ABS( BETA ) )
+      TOL = EIGHT*EPS*MAX( ABS( D( N ) ), TOL )
+*
+*     There are 2 kinds of deflation -- first a value in the z-vector
+*     is small, second two (or more) singular values are very close
+*     together (their difference is small).
+*
+*     If the value in the z-vector is small, we simply permute the
+*     array so that the corresponding singular value is moved to the
+*     end.
+*
+*     If two values in the D-vector are close, we perform a two-sided
+*     rotation designed to make one of the corresponding z-vector
+*     entries zero, and then permute the array so that the deflated
+*     singular value is moved to the end.
+*
+*     If there are multiple singular values then the problem deflates.
+*     Here the number of equal singular values are found.  As each equal
+*     singular value is found, an elementary reflector is computed to
+*     rotate the corresponding singular subspace so that the
+*     corresponding components of Z are zero in this new basis.
+*
+      K = 1
+      K2 = N + 1
+      DO 80 J = 2, N
+         IF( ABS( Z( J ) ).LE.TOL ) THEN
+*
+*           Deflate due to small z component.
+*
+            K2 = K2 - 1
+            IDXP( K2 ) = J
+            COLTYP( J ) = 4
+            IF( J.EQ.N )
+     $         GO TO 120
+         ELSE
+            JPREV = J
+            GO TO 90
+         END IF
+   80 CONTINUE
+   90 CONTINUE
+      J = JPREV
+  100 CONTINUE
+      J = J + 1
+      IF( J.GT.N )
+     $   GO TO 110
+      IF( ABS( Z( J ) ).LE.TOL ) THEN
+*
+*        Deflate due to small z component.
+*
+         K2 = K2 - 1
+         IDXP( K2 ) = J
+         COLTYP( J ) = 4
+      ELSE
+*
+*        Check if singular values are close enough to allow deflation.
+*
+         IF( ABS( D( J )-D( JPREV ) ).LE.TOL ) THEN
+*
+*           Deflation is possible.
+*
+            S = Z( JPREV )
+            C = Z( J )
+*
+*           Find sqrt(a**2+b**2) without overflow or
+*           destructive underflow.
+*
+            TAU = DLAPY2( C, S )
+            C = C / TAU
+            S = -S / TAU
+            Z( J ) = TAU
+            Z( JPREV ) = ZERO
+*
+*           Apply back the Givens rotation to the left and right
+*           singular vector matrices.
+*
+            IDXJP = IDXQ( IDX( JPREV )+1 )
+            IDXJ = IDXQ( IDX( J )+1 )
+            IF( IDXJP.LE.NLP1 ) THEN
+               IDXJP = IDXJP - 1
+            END IF
+            IF( IDXJ.LE.NLP1 ) THEN
+               IDXJ = IDXJ - 1
+            END IF
+            CALL DROT( N, U( 1, IDXJP ), 1, U( 1, IDXJ ), 1, C, S )
+            CALL DROT( M, VT( IDXJP, 1 ), LDVT, VT( IDXJ, 1 ), LDVT, C,
+     $                 S )
+            IF( COLTYP( J ).NE.COLTYP( JPREV ) ) THEN
+               COLTYP( J ) = 3
+            END IF
+            COLTYP( JPREV ) = 4
+            K2 = K2 - 1
+            IDXP( K2 ) = JPREV
+            JPREV = J
+         ELSE
+            K = K + 1
+            U2( K, 1 ) = Z( JPREV )
+            DSIGMA( K ) = D( JPREV )
+            IDXP( K ) = JPREV
+            JPREV = J
+         END IF
+      END IF
+      GO TO 100
+  110 CONTINUE
+*
+*     Record the last singular value.
+*
+      K = K + 1
+      U2( K, 1 ) = Z( JPREV )
+      DSIGMA( K ) = D( JPREV )
+      IDXP( K ) = JPREV
+*
+  120 CONTINUE
+*
+*     Count up the total number of the various types of columns, then
+*     form a permutation which positions the four column types into
+*     four groups of uniform structure (although one or more of these
+*     groups may be empty).
+*
+      DO 130 J = 1, 4
+         CTOT( J ) = 0
+  130 CONTINUE
+      DO 140 J = 2, N
+         CT = COLTYP( J )
+         CTOT( CT ) = CTOT( CT ) + 1
+  140 CONTINUE
+*
+*     PSM(*) = Position in SubMatrix (of types 1 through 4)
+*
+      PSM( 1 ) = 2
+      PSM( 2 ) = 2 + CTOT( 1 )
+      PSM( 3 ) = PSM( 2 ) + CTOT( 2 )
+      PSM( 4 ) = PSM( 3 ) + CTOT( 3 )
+*
+*     Fill out the IDXC array so that the permutation which it induces
+*     will place all type-1 columns first, all type-2 columns next,
+*     then all type-3's, and finally all type-4's, starting from the
+*     second column. This applies similarly to the rows of VT.
+*
+      DO 150 J = 2, N
+         JP = IDXP( J )
+         CT = COLTYP( JP )
+         IDXC( PSM( CT ) ) = J
+         PSM( CT ) = PSM( CT ) + 1
+  150 CONTINUE
+*
+*     Sort the singular values and corresponding singular vectors into
+*     DSIGMA, U2, and VT2 respectively.  The singular values/vectors
+*     which were not deflated go into the first K slots of DSIGMA, U2,
+*     and VT2 respectively, while those which were deflated go into the
+*     last N - K slots, except that the first column/row will be treated
+*     separately.
+*
+      DO 160 J = 2, N
+         JP = IDXP( J )
+         DSIGMA( J ) = D( JP )
+         IDXJ = IDXQ( IDX( IDXP( IDXC( J ) ) )+1 )
+         IF( IDXJ.LE.NLP1 ) THEN
+            IDXJ = IDXJ - 1
+         END IF
+         CALL DCOPY( N, U( 1, IDXJ ), 1, U2( 1, J ), 1 )
+         CALL DCOPY( M, VT( IDXJ, 1 ), LDVT, VT2( J, 1 ), LDVT2 )
+  160 CONTINUE
+*
+*     Determine DSIGMA(1), DSIGMA(2) and Z(1)
+*
+      DSIGMA( 1 ) = ZERO
+      HLFTOL = TOL / TWO
+      IF( ABS( DSIGMA( 2 ) ).LE.HLFTOL )
+     $   DSIGMA( 2 ) = HLFTOL
+      IF( M.GT.N ) THEN
+         Z( 1 ) = DLAPY2( Z1, Z( M ) )
+         IF( Z( 1 ).LE.TOL ) THEN
+            C = ONE
+            S = ZERO
+            Z( 1 ) = TOL
+         ELSE
+            C = Z1 / Z( 1 )
+            S = Z( M ) / Z( 1 )
+         END IF
+      ELSE
+         IF( ABS( Z1 ).LE.TOL ) THEN
+            Z( 1 ) = TOL
+         ELSE
+            Z( 1 ) = Z1
+         END IF
+      END IF
+*
+*     Move the rest of the updating row to Z.
+*
+      CALL DCOPY( K-1, U2( 2, 1 ), 1, Z( 2 ), 1 )
+*
+*     Determine the first column of U2, the first row of VT2 and the
+*     last row of VT.
+*
+      CALL DLASET( 'A', N, 1, ZERO, ZERO, U2, LDU2 )
+      U2( NLP1, 1 ) = ONE
+      IF( M.GT.N ) THEN
+         DO 170 I = 1, NLP1
+            VT( M, I ) = -S*VT( NLP1, I )
+            VT2( 1, I ) = C*VT( NLP1, I )
+  170    CONTINUE
+         DO 180 I = NLP2, M
+            VT2( 1, I ) = S*VT( M, I )
+            VT( M, I ) = C*VT( M, I )
+  180    CONTINUE
+      ELSE
+         CALL DCOPY( M, VT( NLP1, 1 ), LDVT, VT2( 1, 1 ), LDVT2 )
+      END IF
+      IF( M.GT.N ) THEN
+         CALL DCOPY( M, VT( M, 1 ), LDVT, VT2( M, 1 ), LDVT2 )
+      END IF
+*
+*     The deflated singular values and their corresponding vectors go
+*     into the back of D, U, and V respectively.
+*
+      IF( N.GT.K ) THEN
+         CALL DCOPY( N-K, DSIGMA( K+1 ), 1, D( K+1 ), 1 )
+         CALL DLACPY( 'A', N, N-K, U2( 1, K+1 ), LDU2, U( 1, K+1 ),
+     $                LDU )
+         CALL DLACPY( 'A', N-K, M, VT2( K+1, 1 ), LDVT2, VT( K+1, 1 ),
+     $                LDVT )
+      END IF
+*
+*     Copy CTOT into COLTYP for referencing in DLASD3.
+*
+      DO 190 J = 1, 4
+         COLTYP( J ) = CTOT( J )
+  190 CONTINUE
+*
+      RETURN
+*
+*     End of DLASD2
+*
+      END
diff --git a/costa/native/external/lapack/dlasd3.f b/costa/native/external/lapack/dlasd3.f
new file mode 100644
index 000000000..e92b076ab
--- /dev/null
+++ b/costa/native/external/lapack/dlasd3.f
@@ -0,0 +1,359 @@
+      SUBROUTINE DLASD3( NL, NR, SQRE, K, D, Q, LDQ, DSIGMA, U, LDU, U2,
+     $                   LDU2, VT, LDVT, VT2, LDVT2, IDXC, CTOT, Z,
+     $                   INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
+*     Courant Institute, NAG Ltd., and Rice University
+*     October 31, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, K, LDQ, LDU, LDU2, LDVT, LDVT2, NL, NR,
+     $                   SQRE
+*     ..
+*     .. Array Arguments ..
+      INTEGER            CTOT( * ), IDXC( * )
+      DOUBLE PRECISION   D( * ), DSIGMA( * ), Q( LDQ, * ), U( LDU, * ),
+     $                   U2( LDU2, * ), VT( LDVT, * ), VT2( LDVT2, * ),
+     $                   Z( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLASD3 finds all the square roots of the roots of the secular
+*  equation, as defined by the values in D and Z.  It makes the
+*  appropriate calls to DLASD4 and then updates the singular
+*  vectors by matrix multiplication.
+*
+*  This code makes very mild assumptions about floating point
+*  arithmetic. It will work on machines with a guard digit in
+*  add/subtract, or on those binary machines without guard digits
+*  which subtract like the Cray XMP, Cray YMP, Cray C 90, or Cray 2.
+*  It could conceivably fail on hexadecimal or decimal machines
+*  without guard digits, but we know of none.
+*
+*  DLASD3 is called from DLASD1.
+*
+*  Arguments
+*  =========
+*
+*  NL     (input) INTEGER
+*         The row dimension of the upper block.  NL >= 1.
+*
+*  NR     (input) INTEGER
+*         The row dimension of the lower block.  NR >= 1.
+*
+*  SQRE   (input) INTEGER
+*         = 0: the lower block is an NR-by-NR square matrix.
+*         = 1: the lower block is an NR-by-(NR+1) rectangular matrix.
+*
+*         The bidiagonal matrix has N = NL + NR + 1 rows and
+*         M = N + SQRE >= N columns.
+*
+*  K      (input) INTEGER
+*         The size of the secular equation, 1 =< K = < N.
+*
+*  D      (output) DOUBLE PRECISION array, dimension(K)
+*         On exit the square roots of the roots of the secular equation,
+*         in ascending order.
+*
+*  Q      (workspace) DOUBLE PRECISION array,
+*                     dimension at least (LDQ,K).
+*
+*  LDQ    (input) INTEGER
+*         The leading dimension of the array Q.  LDQ >= K.
+*
+*  DSIGMA (input) DOUBLE PRECISION array, dimension(K)
+*         The first K elements of this array contain the old roots
+*         of the deflated updating problem.  These are the poles
+*         of the secular equation.
+*
+*  U      (input) DOUBLE PRECISION array, dimension (LDU, N)
+*         The last N - K columns of this matrix contain the deflated
+*         left singular vectors.
+*
+*  LDU    (input) INTEGER
+*         The leading dimension of the array U.  LDU >= N.
+*
+*  U2     (input) DOUBLE PRECISION array, dimension (LDU2, N)
+*         The first K columns of this matrix contain the non-deflated
+*         left singular vectors for the split problem.
+*
+*  LDU2   (input) INTEGER
+*         The leading dimension of the array U2.  LDU2 >= N.
+*
+*  VT     (input) DOUBLE PRECISION array, dimension (LDVT, M)
+*         The last M - K columns of VT' contain the deflated
+*         right singular vectors.
+*
+*  LDVT   (input) INTEGER
+*         The leading dimension of the array VT.  LDVT >= N.
+*
+*  VT2    (input) DOUBLE PRECISION array, dimension (LDVT2, N)
+*         The first K columns of VT2' contain the non-deflated
+*         right singular vectors for the split problem.
+*
+*  LDVT2  (input) INTEGER
+*         The leading dimension of the array VT2.  LDVT2 >= N.
+*
+*  IDXC   (input) INTEGER array, dimension ( N )
+*         The permutation used to arrange the columns of U (and rows of
+*         VT) into three groups:  the first group contains non-zero
+*         entries only at and above (or before) NL +1; the second
+*         contains non-zero entries only at and below (or after) NL+2;
+*         and the third is dense. The first column of U and the row of
+*         VT are treated separately, however.
+*
+*         The rows of the singular vectors found by DLASD4
+*         must be likewise permuted before the matrix multiplies can
+*         take place.
+*
+*  CTOT   (input) INTEGER array, dimension ( 4 )
+*         A count of the total number of the various types of columns
+*         in U (or rows in VT), as described in IDXC. The fourth column
+*         type is any column which has been deflated.
+*
+*  Z      (input) DOUBLE PRECISION array, dimension (K)
+*         The first K elements of this array contain the components
+*         of the deflation-adjusted updating row vector.
+*
+*  INFO   (output) INTEGER
+*         = 0:  successful exit.
+*         < 0:  if INFO = -i, the i-th argument had an illegal value.
+*         > 0:  if INFO = 1, an singular value did not converge
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Ming Gu and Huan Ren, Computer Science Division, University of
+*     California at Berkeley, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO, NEGONE
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0,
+     $                   NEGONE = -1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            CTEMP, I, J, JC, KTEMP, M, N, NLP1, NLP2, NRP1
+      DOUBLE PRECISION   RHO, TEMP
+*     ..
+*     .. External Functions ..
+      DOUBLE PRECISION   DLAMC3, DNRM2
+      EXTERNAL           DLAMC3, DNRM2
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DCOPY, DGEMM, DLACPY, DLASCL, DLASD4, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, SIGN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+*
+      IF( NL.LT.1 ) THEN
+         INFO = -1
+      ELSE IF( NR.LT.1 ) THEN
+         INFO = -2
+      ELSE IF( ( SQRE.NE.1 ) .AND. ( SQRE.NE.0 ) ) THEN
+         INFO = -3
+      END IF
+*
+      N = NL + NR + 1
+      M = N + SQRE
+      NLP1 = NL + 1
+      NLP2 = NL + 2
+*
+      IF( ( K.LT.1 ) .OR. ( K.GT.N ) ) THEN
+         INFO = -4
+      ELSE IF( LDQ.LT.K ) THEN
+         INFO = -7
+      ELSE IF( LDU.LT.N ) THEN
+         INFO = -10
+      ELSE IF( LDU2.LT.N ) THEN
+         INFO = -12
+      ELSE IF( LDVT.LT.M ) THEN
+         INFO = -14
+      ELSE IF( LDVT2.LT.M ) THEN
+         INFO = -16
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DLASD3', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( K.EQ.1 ) THEN
+         D( 1 ) = ABS( Z( 1 ) )
+         CALL DCOPY( M, VT2( 1, 1 ), LDVT2, VT( 1, 1 ), LDVT )
+         IF( Z( 1 ).GT.ZERO ) THEN
+            CALL DCOPY( N, U2( 1, 1 ), 1, U( 1, 1 ), 1 )
+         ELSE
+            DO 10 I = 1, N
+               U( I, 1 ) = -U2( I, 1 )
+   10       CONTINUE
+         END IF
+         RETURN
+      END IF
+*
+*     Modify values DSIGMA(i) to make sure all DSIGMA(i)-DSIGMA(j) can
+*     be computed with high relative accuracy (barring over/underflow).
+*     This is a problem on machines without a guard digit in
+*     add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2).
+*     The following code replaces DSIGMA(I) by 2*DSIGMA(I)-DSIGMA(I),
+*     which on any of these machines zeros out the bottommost
+*     bit of DSIGMA(I) if it is 1; this makes the subsequent
+*     subtractions DSIGMA(I)-DSIGMA(J) unproblematic when cancellation
+*     occurs. On binary machines with a guard digit (almost all
+*     machines) it does not change DSIGMA(I) at all. On hexadecimal
+*     and decimal machines with a guard digit, it slightly
+*     changes the bottommost bits of DSIGMA(I). It does not account
+*     for hexadecimal or decimal machines without guard digits
+*     (we know of none). We use a subroutine call to compute
+*     2*DLAMBDA(I) to prevent optimizing compilers from eliminating
+*     this code.
+*
+      DO 20 I = 1, K
+         DSIGMA( I ) = DLAMC3( DSIGMA( I ), DSIGMA( I ) ) - DSIGMA( I )
+   20 CONTINUE
+*
+*     Keep a copy of Z.
+*
+      CALL DCOPY( K, Z, 1, Q, 1 )
+*
+*     Normalize Z.
+*
+      RHO = DNRM2( K, Z, 1 )
+      CALL DLASCL( 'G', 0, 0, RHO, ONE, K, 1, Z, K, INFO )
+      RHO = RHO*RHO
+*
+*     Find the new singular values.
+*
+      DO 30 J = 1, K
+         CALL DLASD4( K, J, DSIGMA, Z, U( 1, J ), RHO, D( J ),
+     $                VT( 1, J ), INFO )
+*
+*        If the zero finder fails, the computation is terminated.
+*
+         IF( INFO.NE.0 ) THEN
+            RETURN
+         END IF
+   30 CONTINUE
+*
+*     Compute updated Z.
+*
+      DO 60 I = 1, K
+         Z( I ) = U( I, K )*VT( I, K )
+         DO 40 J = 1, I - 1
+            Z( I ) = Z( I )*( U( I, J )*VT( I, J ) /
+     $               ( DSIGMA( I )-DSIGMA( J ) ) /
+     $               ( DSIGMA( I )+DSIGMA( J ) ) )
+   40    CONTINUE
+         DO 50 J = I, K - 1
+            Z( I ) = Z( I )*( U( I, J )*VT( I, J ) /
+     $               ( DSIGMA( I )-DSIGMA( J+1 ) ) /
+     $               ( DSIGMA( I )+DSIGMA( J+1 ) ) )
+   50    CONTINUE
+         Z( I ) = SIGN( SQRT( ABS( Z( I ) ) ), Q( I, 1 ) )
+   60 CONTINUE
+*
+*     Compute left singular vectors of the modified diagonal matrix,
+*     and store related information for the right singular vectors.
+*
+      DO 90 I = 1, K
+         VT( 1, I ) = Z( 1 ) / U( 1, I ) / VT( 1, I )
+         U( 1, I ) = NEGONE
+         DO 70 J = 2, K
+            VT( J, I ) = Z( J ) / U( J, I ) / VT( J, I )
+            U( J, I ) = DSIGMA( J )*VT( J, I )
+   70    CONTINUE
+         TEMP = DNRM2( K, U( 1, I ), 1 )
+         Q( 1, I ) = U( 1, I ) / TEMP
+         DO 80 J = 2, K
+            JC = IDXC( J )
+            Q( J, I ) = U( JC, I ) / TEMP
+   80    CONTINUE
+   90 CONTINUE
+*
+*     Update the left singular vector matrix.
+*
+      IF( K.EQ.2 ) THEN
+         CALL DGEMM( 'N', 'N', N, K, K, ONE, U2, LDU2, Q, LDQ, ZERO, U,
+     $               LDU )
+         GO TO 100
+      END IF
+      IF( CTOT( 1 ).GT.0 ) THEN
+         CALL DGEMM( 'N', 'N', NL, K, CTOT( 1 ), ONE, U2( 1, 2 ), LDU2,
+     $               Q( 2, 1 ), LDQ, ZERO, U( 1, 1 ), LDU )
+         IF( CTOT( 3 ).GT.0 ) THEN
+            KTEMP = 2 + CTOT( 1 ) + CTOT( 2 )
+            CALL DGEMM( 'N', 'N', NL, K, CTOT( 3 ), ONE, U2( 1, KTEMP ),
+     $                  LDU2, Q( KTEMP, 1 ), LDQ, ONE, U( 1, 1 ), LDU )
+         END IF
+      ELSE IF( CTOT( 3 ).GT.0 ) THEN
+         KTEMP = 2 + CTOT( 1 ) + CTOT( 2 )
+         CALL DGEMM( 'N', 'N', NL, K, CTOT( 3 ), ONE, U2( 1, KTEMP ),
+     $               LDU2, Q( KTEMP, 1 ), LDQ, ZERO, U( 1, 1 ), LDU )
+      ELSE
+         CALL DLACPY( 'F', NL, K, U2, LDU2, U, LDU )
+      END IF
+      CALL DCOPY( K, Q( 1, 1 ), LDQ, U( NLP1, 1 ), LDU )
+      KTEMP = 2 + CTOT( 1 )
+      CTEMP = CTOT( 2 ) + CTOT( 3 )
+      CALL DGEMM( 'N', 'N', NR, K, CTEMP, ONE, U2( NLP2, KTEMP ), LDU2,
+     $            Q( KTEMP, 1 ), LDQ, ZERO, U( NLP2, 1 ), LDU )
+*
+*     Generate the right singular vectors.
+*
+  100 CONTINUE
+      DO 120 I = 1, K
+         TEMP = DNRM2( K, VT( 1, I ), 1 )
+         Q( I, 1 ) = VT( 1, I ) / TEMP
+         DO 110 J = 2, K
+            JC = IDXC( J )
+            Q( I, J ) = VT( JC, I ) / TEMP
+  110    CONTINUE
+  120 CONTINUE
+*
+*     Update the right singular vector matrix.
+*
+      IF( K.EQ.2 ) THEN
+         CALL DGEMM( 'N', 'N', K, M, K, ONE, Q, LDQ, VT2, LDVT2, ZERO,
+     $               VT, LDVT )
+         RETURN
+      END IF
+      KTEMP = 1 + CTOT( 1 )
+      CALL DGEMM( 'N', 'N', K, NLP1, KTEMP, ONE, Q( 1, 1 ), LDQ,
+     $            VT2( 1, 1 ), LDVT2, ZERO, VT( 1, 1 ), LDVT )
+      KTEMP = 2 + CTOT( 1 ) + CTOT( 2 )
+      IF( KTEMP.LE.LDVT2 )
+     $   CALL DGEMM( 'N', 'N', K, NLP1, CTOT( 3 ), ONE, Q( 1, KTEMP ),
+     $               LDQ, VT2( KTEMP, 1 ), LDVT2, ONE, VT( 1, 1 ),
+     $               LDVT )
+*
+      KTEMP = CTOT( 1 ) + 1
+      NRP1 = NR + SQRE
+      IF( KTEMP.GT.1 ) THEN
+         DO 130 I = 1, K
+            Q( I, KTEMP ) = Q( I, 1 )
+  130    CONTINUE
+         DO 140 I = NLP2, M
+            VT2( KTEMP, I ) = VT2( 1, I )
+  140    CONTINUE
+      END IF
+      CTEMP = 1 + CTOT( 2 ) + CTOT( 3 )
+      CALL DGEMM( 'N', 'N', K, NRP1, CTEMP, ONE, Q( 1, KTEMP ), LDQ,
+     $            VT2( KTEMP, NLP2 ), LDVT2, ZERO, VT( 1, NLP2 ), LDVT )
+*
+      RETURN
+*
+*     End of DLASD3
+*
+      END
diff --git a/costa/native/external/lapack/dlasd4.f b/costa/native/external/lapack/dlasd4.f
new file mode 100644
index 000000000..fb10ac7ba
--- /dev/null
+++ b/costa/native/external/lapack/dlasd4.f
@@ -0,0 +1,891 @@
+      SUBROUTINE DLASD4( N, I, D, Z, DELTA, RHO, SIGMA, WORK, INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
+*     Courant Institute, NAG Ltd., and Rice University
+*     October 31, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            I, INFO, N
+      DOUBLE PRECISION   RHO, SIGMA
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   D( * ), DELTA( * ), WORK( * ), Z( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  This subroutine computes the square root of the I-th updated
+*  eigenvalue of a positive symmetric rank-one modification to
+*  a positive diagonal matrix whose entries are given as the squares
+*  of the corresponding entries in the array d, and that
+*
+*         0 <= D(i) < D(j)  for  i < j
+*
+*  and that RHO > 0. This is arranged by the calling routine, and is
+*  no loss in generality.  The rank-one modified system is thus
+*
+*         diag( D ) * diag( D ) +  RHO *  Z * Z_transpose.
+*
+*  where we assume the Euclidean norm of Z is 1.
+*
+*  The method consists of approximating the rational functions in the
+*  secular equation by simpler interpolating rational functions.
+*
+*  Arguments
+*  =========
+*
+*  N      (input) INTEGER
+*         The length of all arrays.
+*
+*  I      (input) INTEGER
+*         The index of the eigenvalue to be computed.  1 <= I <= N.
+*
+*  D      (input) DOUBLE PRECISION array, dimension ( N )
+*         The original eigenvalues.  It is assumed that they are in
+*         order, 0 <= D(I) < D(J)  for I < J.
+*
+*  Z      (input) DOUBLE PRECISION array, dimension ( N )
+*         The components of the updating vector.
+*
+*  DELTA  (output) DOUBLE PRECISION array, dimension ( N )
+*         If N .ne. 1, DELTA contains (D(j) - sigma_I) in its  j-th
+*         component.  If N = 1, then DELTA(1) = 1.  The vector DELTA
+*         contains the information necessary to construct the
+*         (singular) eigenvectors.
+*
+*  RHO    (input) DOUBLE PRECISION
+*         The scalar in the symmetric updating formula.
+*
+*  SIGMA  (output) DOUBLE PRECISION
+*         The computed lambda_I, the I-th updated eigenvalue.
+*
+*  WORK   (workspace) DOUBLE PRECISION array, dimension ( N )
+*         If N .ne. 1, WORK contains (D(j) + sigma_I) in its  j-th
+*         component.  If N = 1, then WORK( 1 ) = 1.
+*
+*  INFO   (output) INTEGER
+*         = 0:  successful exit
+*         > 0:  if INFO = 1, the updating process failed.
+*
+*  Internal Parameters
+*  ===================
+*
+*  Logical variable ORGATI (origin-at-i?) is used for distinguishing
+*  whether D(i) or D(i+1) is treated as the origin.
+*
+*            ORGATI = .true.    origin at i
+*            ORGATI = .false.   origin at i+1
+*
+*  Logical variable SWTCH3 (switch-for-3-poles?) is for noting
+*  if we are working with THREE poles!
+*
+*  MAXIT is the maximum number of iterations allowed for each
+*  eigenvalue.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Ren-Cang Li, Computer Science Division, University of California
+*     at Berkeley, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            MAXIT
+      PARAMETER          ( MAXIT = 20 )
+      DOUBLE PRECISION   ZERO, ONE, TWO, THREE, FOUR, EIGHT, TEN
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0, TWO = 2.0D+0,
+     $                   THREE = 3.0D+0, FOUR = 4.0D+0, EIGHT = 8.0D+0,
+     $                   TEN = 10.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            ORGATI, SWTCH, SWTCH3
+      INTEGER            II, IIM1, IIP1, IP1, ITER, J, NITER
+      DOUBLE PRECISION   A, B, C, DELSQ, DELSQ2, DPHI, DPSI, DTIIM,
+     $                   DTIIP, DTIPSQ, DTISQ, DTNSQ, DTNSQ1, DW, EPS,
+     $                   ERRETM, ETA, PHI, PREW, PSI, RHOINV, SG2LB,
+     $                   SG2UB, TAU, TEMP, TEMP1, TEMP2, W
+*     ..
+*     .. Local Arrays ..
+      DOUBLE PRECISION   DD( 3 ), ZZ( 3 )
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLAED6, DLASD5
+*     ..
+*     .. External Functions ..
+      DOUBLE PRECISION   DLAMCH
+      EXTERNAL           DLAMCH
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Since this routine is called in an inner loop, we do no argument
+*     checking.
+*
+*     Quick return for N=1 and 2.
+*
+      INFO = 0
+      IF( N.EQ.1 ) THEN
+*
+*        Presumably, I=1 upon entry
+*
+         SIGMA = SQRT( D( 1 )*D( 1 )+RHO*Z( 1 )*Z( 1 ) )
+         DELTA( 1 ) = ONE
+         WORK( 1 ) = ONE
+         RETURN
+      END IF
+      IF( N.EQ.2 ) THEN
+         CALL DLASD5( I, D, Z, DELTA, RHO, SIGMA, WORK )
+         RETURN
+      END IF
+*
+*     Compute machine epsilon
+*
+      EPS = DLAMCH( 'Epsilon' )
+      RHOINV = ONE / RHO
+*
+*     The case I = N
+*
+      IF( I.EQ.N ) THEN
+*
+*        Initialize some basic variables
+*
+         II = N - 1
+         NITER = 1
+*
+*        Calculate initial guess
+*
+         TEMP = RHO / TWO
+*
+*        If ||Z||_2 is not one, then TEMP should be set to
+*        RHO * ||Z||_2^2 / TWO
+*
+         TEMP1 = TEMP / ( D( N )+SQRT( D( N )*D( N )+TEMP ) )
+         DO 10 J = 1, N
+            WORK( J ) = D( J ) + D( N ) + TEMP1
+            DELTA( J ) = ( D( J )-D( N ) ) - TEMP1
+   10    CONTINUE
+*
+         PSI = ZERO
+         DO 20 J = 1, N - 2
+            PSI = PSI + Z( J )*Z( J ) / ( DELTA( J )*WORK( J ) )
+   20    CONTINUE
+*
+         C = RHOINV + PSI
+         W = C + Z( II )*Z( II ) / ( DELTA( II )*WORK( II ) ) +
+     $       Z( N )*Z( N ) / ( DELTA( N )*WORK( N ) )
+*
+         IF( W.LE.ZERO ) THEN
+            TEMP1 = SQRT( D( N )*D( N )+RHO )
+            TEMP = Z( N-1 )*Z( N-1 ) / ( ( D( N-1 )+TEMP1 )*
+     $             ( D( N )-D( N-1 )+RHO / ( D( N )+TEMP1 ) ) ) +
+     $             Z( N )*Z( N ) / RHO
+*
+*           The following TAU is to approximate
+*           SIGMA_n^2 - D( N )*D( N )
+*
+            IF( C.LE.TEMP ) THEN
+               TAU = RHO
+            ELSE
+               DELSQ = ( D( N )-D( N-1 ) )*( D( N )+D( N-1 ) )
+               A = -C*DELSQ + Z( N-1 )*Z( N-1 ) + Z( N )*Z( N )
+               B = Z( N )*Z( N )*DELSQ
+               IF( A.LT.ZERO ) THEN
+                  TAU = TWO*B / ( SQRT( A*A+FOUR*B*C )-A )
+               ELSE
+                  TAU = ( A+SQRT( A*A+FOUR*B*C ) ) / ( TWO*C )
+               END IF
+            END IF
+*
+*           It can be proved that
+*               D(N)^2+RHO/2 <= SIGMA_n^2 < D(N)^2+TAU <= D(N)^2+RHO
+*
+         ELSE
+            DELSQ = ( D( N )-D( N-1 ) )*( D( N )+D( N-1 ) )
+            A = -C*DELSQ + Z( N-1 )*Z( N-1 ) + Z( N )*Z( N )
+            B = Z( N )*Z( N )*DELSQ
+*
+*           The following TAU is to approximate
+*           SIGMA_n^2 - D( N )*D( N )
+*
+            IF( A.LT.ZERO ) THEN
+               TAU = TWO*B / ( SQRT( A*A+FOUR*B*C )-A )
+            ELSE
+               TAU = ( A+SQRT( A*A+FOUR*B*C ) ) / ( TWO*C )
+            END IF
+*
+*           It can be proved that
+*           D(N)^2 < D(N)^2+TAU < SIGMA(N)^2 < D(N)^2+RHO/2
+*
+         END IF
+*
+*        The following ETA is to approximate SIGMA_n - D( N )
+*
+         ETA = TAU / ( D( N )+SQRT( D( N )*D( N )+TAU ) )
+*
+         SIGMA = D( N ) + ETA
+         DO 30 J = 1, N
+            DELTA( J ) = ( D( J )-D( I ) ) - ETA
+            WORK( J ) = D( J ) + D( I ) + ETA
+   30    CONTINUE
+*
+*        Evaluate PSI and the derivative DPSI
+*
+         DPSI = ZERO
+         PSI = ZERO
+         ERRETM = ZERO
+         DO 40 J = 1, II
+            TEMP = Z( J ) / ( DELTA( J )*WORK( J ) )
+            PSI = PSI + Z( J )*TEMP
+            DPSI = DPSI + TEMP*TEMP
+            ERRETM = ERRETM + PSI
+   40    CONTINUE
+         ERRETM = ABS( ERRETM )
+*
+*        Evaluate PHI and the derivative DPHI
+*
+         TEMP = Z( N ) / ( DELTA( N )*WORK( N ) )
+         PHI = Z( N )*TEMP
+         DPHI = TEMP*TEMP
+         ERRETM = EIGHT*( -PHI-PSI ) + ERRETM - PHI + RHOINV +
+     $            ABS( TAU )*( DPSI+DPHI )
+*
+         W = RHOINV + PHI + PSI
+*
+*        Test for convergence
+*
+         IF( ABS( W ).LE.EPS*ERRETM ) THEN
+            GO TO 240
+         END IF
+*
+*        Calculate the new step
+*
+         NITER = NITER + 1
+         DTNSQ1 = WORK( N-1 )*DELTA( N-1 )
+         DTNSQ = WORK( N )*DELTA( N )
+         C = W - DTNSQ1*DPSI - DTNSQ*DPHI
+         A = ( DTNSQ+DTNSQ1 )*W - DTNSQ*DTNSQ1*( DPSI+DPHI )
+         B = DTNSQ*DTNSQ1*W
+         IF( C.LT.ZERO )
+     $      C = ABS( C )
+         IF( C.EQ.ZERO ) THEN
+            ETA = RHO - SIGMA*SIGMA
+         ELSE IF( A.GE.ZERO ) THEN
+            ETA = ( A+SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C )
+         ELSE
+            ETA = TWO*B / ( A-SQRT( ABS( A*A-FOUR*B*C ) ) )
+         END IF
+*
+*        Note, eta should be positive if w is negative, and
+*        eta should be negative otherwise. However,
+*        if for some reason caused by roundoff, eta*w > 0,
+*        we simply use one Newton step instead. This way
+*        will guarantee eta*w < 0.
+*
+         IF( W*ETA.GT.ZERO )
+     $      ETA = -W / ( DPSI+DPHI )
+         TEMP = ETA - DTNSQ
+         IF( TEMP.GT.RHO )
+     $      ETA = RHO + DTNSQ
+*
+         TAU = TAU + ETA
+         ETA = ETA / ( SIGMA+SQRT( ETA+SIGMA*SIGMA ) )
+         DO 50 J = 1, N
+            DELTA( J ) = DELTA( J ) - ETA
+            WORK( J ) = WORK( J ) + ETA
+   50    CONTINUE
+*
+         SIGMA = SIGMA + ETA
+*
+*        Evaluate PSI and the derivative DPSI
+*
+         DPSI = ZERO
+         PSI = ZERO
+         ERRETM = ZERO
+         DO 60 J = 1, II
+            TEMP = Z( J ) / ( WORK( J )*DELTA( J ) )
+            PSI = PSI + Z( J )*TEMP
+            DPSI = DPSI + TEMP*TEMP
+            ERRETM = ERRETM + PSI
+   60    CONTINUE
+         ERRETM = ABS( ERRETM )
+*
+*        Evaluate PHI and the derivative DPHI
+*
+         TEMP = Z( N ) / ( WORK( N )*DELTA( N ) )
+         PHI = Z( N )*TEMP
+         DPHI = TEMP*TEMP
+         ERRETM = EIGHT*( -PHI-PSI ) + ERRETM - PHI + RHOINV +
+     $            ABS( TAU )*( DPSI+DPHI )
+*
+         W = RHOINV + PHI + PSI
+*
+*        Main loop to update the values of the array   DELTA
+*
+         ITER = NITER + 1
+*
+         DO 90 NITER = ITER, MAXIT
+*
+*           Test for convergence
+*
+            IF( ABS( W ).LE.EPS*ERRETM ) THEN
+               GO TO 240
+            END IF
+*
+*           Calculate the new step
+*
+            DTNSQ1 = WORK( N-1 )*DELTA( N-1 )
+            DTNSQ = WORK( N )*DELTA( N )
+            C = W - DTNSQ1*DPSI - DTNSQ*DPHI
+            A = ( DTNSQ+DTNSQ1 )*W - DTNSQ1*DTNSQ*( DPSI+DPHI )
+            B = DTNSQ1*DTNSQ*W
+            IF( A.GE.ZERO ) THEN
+               ETA = ( A+SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C )
+            ELSE
+               ETA = TWO*B / ( A-SQRT( ABS( A*A-FOUR*B*C ) ) )
+            END IF
+*
+*           Note, eta should be positive if w is negative, and
+*           eta should be negative otherwise. However,
+*           if for some reason caused by roundoff, eta*w > 0,
+*           we simply use one Newton step instead. This way
+*           will guarantee eta*w < 0.
+*
+            IF( W*ETA.GT.ZERO )
+     $         ETA = -W / ( DPSI+DPHI )
+            TEMP = ETA - DTNSQ
+            IF( TEMP.LE.ZERO )
+     $         ETA = ETA / TWO
+*
+            TAU = TAU + ETA
+            ETA = ETA / ( SIGMA+SQRT( ETA+SIGMA*SIGMA ) )
+            DO 70 J = 1, N
+               DELTA( J ) = DELTA( J ) - ETA
+               WORK( J ) = WORK( J ) + ETA
+   70       CONTINUE
+*
+            SIGMA = SIGMA + ETA
+*
+*           Evaluate PSI and the derivative DPSI
+*
+            DPSI = ZERO
+            PSI = ZERO
+            ERRETM = ZERO
+            DO 80 J = 1, II
+               TEMP = Z( J ) / ( WORK( J )*DELTA( J ) )
+               PSI = PSI + Z( J )*TEMP
+               DPSI = DPSI + TEMP*TEMP
+               ERRETM = ERRETM + PSI
+   80       CONTINUE
+            ERRETM = ABS( ERRETM )
+*
+*           Evaluate PHI and the derivative DPHI
+*
+            TEMP = Z( N ) / ( WORK( N )*DELTA( N ) )
+            PHI = Z( N )*TEMP
+            DPHI = TEMP*TEMP
+            ERRETM = EIGHT*( -PHI-PSI ) + ERRETM - PHI + RHOINV +
+     $               ABS( TAU )*( DPSI+DPHI )
+*
+            W = RHOINV + PHI + PSI
+   90    CONTINUE
+*
+*        Return with INFO = 1, NITER = MAXIT and not converged
+*
+         INFO = 1
+         GO TO 240
+*
+*        End for the case I = N
+*
+      ELSE
+*
+*        The case for I < N
+*
+         NITER = 1
+         IP1 = I + 1
+*
+*        Calculate initial guess
+*
+         DELSQ = ( D( IP1 )-D( I ) )*( D( IP1 )+D( I ) )
+         DELSQ2 = DELSQ / TWO
+         TEMP = DELSQ2 / ( D( I )+SQRT( D( I )*D( I )+DELSQ2 ) )
+         DO 100 J = 1, N
+            WORK( J ) = D( J ) + D( I ) + TEMP
+            DELTA( J ) = ( D( J )-D( I ) ) - TEMP
+  100    CONTINUE
+*
+         PSI = ZERO
+         DO 110 J = 1, I - 1
+            PSI = PSI + Z( J )*Z( J ) / ( WORK( J )*DELTA( J ) )
+  110    CONTINUE
+*
+         PHI = ZERO
+         DO 120 J = N, I + 2, -1
+            PHI = PHI + Z( J )*Z( J ) / ( WORK( J )*DELTA( J ) )
+  120    CONTINUE
+         C = RHOINV + PSI + PHI
+         W = C + Z( I )*Z( I ) / ( WORK( I )*DELTA( I ) ) +
+     $       Z( IP1 )*Z( IP1 ) / ( WORK( IP1 )*DELTA( IP1 ) )
+*
+         IF( W.GT.ZERO ) THEN
+*
+*           d(i)^2 < the ith sigma^2 < (d(i)^2+d(i+1)^2)/2
+*
+*           We choose d(i) as origin.
+*
+            ORGATI = .TRUE.
+            SG2LB = ZERO
+            SG2UB = DELSQ2
+            A = C*DELSQ + Z( I )*Z( I ) + Z( IP1 )*Z( IP1 )
+            B = Z( I )*Z( I )*DELSQ
+            IF( A.GT.ZERO ) THEN
+               TAU = TWO*B / ( A+SQRT( ABS( A*A-FOUR*B*C ) ) )
+            ELSE
+               TAU = ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C )
+            END IF
+*
+*           TAU now is an estimation of SIGMA^2 - D( I )^2. The
+*           following, however, is the corresponding estimation of
+*           SIGMA - D( I ).
+*
+            ETA = TAU / ( D( I )+SQRT( D( I )*D( I )+TAU ) )
+         ELSE
+*
+*           (d(i)^2+d(i+1)^2)/2 <= the ith sigma^2 < d(i+1)^2/2
+*
+*           We choose d(i+1) as origin.
+*
+            ORGATI = .FALSE.
+            SG2LB = -DELSQ2
+            SG2UB = ZERO
+            A = C*DELSQ - Z( I )*Z( I ) - Z( IP1 )*Z( IP1 )
+            B = Z( IP1 )*Z( IP1 )*DELSQ
+            IF( A.LT.ZERO ) THEN
+               TAU = TWO*B / ( A-SQRT( ABS( A*A+FOUR*B*C ) ) )
+            ELSE
+               TAU = -( A+SQRT( ABS( A*A+FOUR*B*C ) ) ) / ( TWO*C )
+            END IF
+*
+*           TAU now is an estimation of SIGMA^2 - D( IP1 )^2. The
+*           following, however, is the corresponding estimation of
+*           SIGMA - D( IP1 ).
+*
+            ETA = TAU / ( D( IP1 )+SQRT( ABS( D( IP1 )*D( IP1 )+
+     $            TAU ) ) )
+         END IF
+*
+         IF( ORGATI ) THEN
+            II = I
+            SIGMA = D( I ) + ETA
+            DO 130 J = 1, N
+               WORK( J ) = D( J ) + D( I ) + ETA
+               DELTA( J ) = ( D( J )-D( I ) ) - ETA
+  130       CONTINUE
+         ELSE
+            II = I + 1
+            SIGMA = D( IP1 ) + ETA
+            DO 140 J = 1, N
+               WORK( J ) = D( J ) + D( IP1 ) + ETA
+               DELTA( J ) = ( D( J )-D( IP1 ) ) - ETA
+  140       CONTINUE
+         END IF
+         IIM1 = II - 1
+         IIP1 = II + 1
+*
+*        Evaluate PSI and the derivative DPSI
+*
+         DPSI = ZERO
+         PSI = ZERO
+         ERRETM = ZERO
+         DO 150 J = 1, IIM1
+            TEMP = Z( J ) / ( WORK( J )*DELTA( J ) )
+            PSI = PSI + Z( J )*TEMP
+            DPSI = DPSI + TEMP*TEMP
+            ERRETM = ERRETM + PSI
+  150    CONTINUE
+         ERRETM = ABS( ERRETM )
+*
+*        Evaluate PHI and the derivative DPHI
+*
+         DPHI = ZERO
+         PHI = ZERO
+         DO 160 J = N, IIP1, -1
+            TEMP = Z( J ) / ( WORK( J )*DELTA( J ) )
+            PHI = PHI + Z( J )*TEMP
+            DPHI = DPHI + TEMP*TEMP
+            ERRETM = ERRETM + PHI
+  160    CONTINUE
+*
+         W = RHOINV + PHI + PSI
+*
+*        W is the value of the secular function with
+*        its ii-th element removed.
+*
+         SWTCH3 = .FALSE.
+         IF( ORGATI ) THEN
+            IF( W.LT.ZERO )
+     $         SWTCH3 = .TRUE.
+         ELSE
+            IF( W.GT.ZERO )
+     $         SWTCH3 = .TRUE.
+         END IF
+         IF( II.EQ.1 .OR. II.EQ.N )
+     $      SWTCH3 = .FALSE.
+*
+         TEMP = Z( II ) / ( WORK( II )*DELTA( II ) )
+         DW = DPSI + DPHI + TEMP*TEMP
+         TEMP = Z( II )*TEMP
+         W = W + TEMP
+         ERRETM = EIGHT*( PHI-PSI ) + ERRETM + TWO*RHOINV +
+     $            THREE*ABS( TEMP ) + ABS( TAU )*DW
+*
+*        Test for convergence
+*
+         IF( ABS( W ).LE.EPS*ERRETM ) THEN
+            GO TO 240
+         END IF
+*
+         IF( W.LE.ZERO ) THEN
+            SG2LB = MAX( SG2LB, TAU )
+         ELSE
+            SG2UB = MIN( SG2UB, TAU )
+         END IF
+*
+*        Calculate the new step
+*
+         NITER = NITER + 1
+         IF( .NOT.SWTCH3 ) THEN
+            DTIPSQ = WORK( IP1 )*DELTA( IP1 )
+            DTISQ = WORK( I )*DELTA( I )
+            IF( ORGATI ) THEN
+               C = W - DTIPSQ*DW + DELSQ*( Z( I ) / DTISQ )**2
+            ELSE
+               C = W - DTISQ*DW - DELSQ*( Z( IP1 ) / DTIPSQ )**2
+            END IF
+            A = ( DTIPSQ+DTISQ )*W - DTIPSQ*DTISQ*DW
+            B = DTIPSQ*DTISQ*W
+            IF( C.EQ.ZERO ) THEN
+               IF( A.EQ.ZERO ) THEN
+                  IF( ORGATI ) THEN
+                     A = Z( I )*Z( I ) + DTIPSQ*DTIPSQ*( DPSI+DPHI )
+                  ELSE
+                     A = Z( IP1 )*Z( IP1 ) + DTISQ*DTISQ*( DPSI+DPHI )
+                  END IF
+               END IF
+               ETA = B / A
+            ELSE IF( A.LE.ZERO ) THEN
+               ETA = ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C )
+            ELSE
+               ETA = TWO*B / ( A+SQRT( ABS( A*A-FOUR*B*C ) ) )
+            END IF
+         ELSE
+*
+*           Interpolation using THREE most relevant poles
+*
+            DTIIM = WORK( IIM1 )*DELTA( IIM1 )
+            DTIIP = WORK( IIP1 )*DELTA( IIP1 )
+            TEMP = RHOINV + PSI + PHI
+            IF( ORGATI ) THEN
+               TEMP1 = Z( IIM1 ) / DTIIM
+               TEMP1 = TEMP1*TEMP1
+               C = ( TEMP - DTIIP*( DPSI+DPHI ) ) -
+     $             ( D( IIM1 )-D( IIP1 ) )*( D( IIM1 )+D( IIP1 ) )*TEMP1
+               ZZ( 1 ) = Z( IIM1 )*Z( IIM1 )
+               IF( DPSI.LT.TEMP1 ) THEN
+                  ZZ( 3 ) = DTIIP*DTIIP*DPHI
+               ELSE
+                  ZZ( 3 ) = DTIIP*DTIIP*( ( DPSI-TEMP1 )+DPHI )
+               END IF
+            ELSE
+               TEMP1 = Z( IIP1 ) / DTIIP
+               TEMP1 = TEMP1*TEMP1
+               C = ( TEMP - DTIIM*( DPSI+DPHI ) ) -
+     $             ( D( IIP1 )-D( IIM1 ) )*( D( IIM1 )+D( IIP1 ) )*TEMP1
+               IF( DPHI.LT.TEMP1 ) THEN
+                  ZZ( 1 ) = DTIIM*DTIIM*DPSI
+               ELSE
+                  ZZ( 1 ) = DTIIM*DTIIM*( DPSI+( DPHI-TEMP1 ) )
+               END IF
+               ZZ( 3 ) = Z( IIP1 )*Z( IIP1 )
+            END IF
+            ZZ( 2 ) = Z( II )*Z( II )
+            DD( 1 ) = DTIIM
+            DD( 2 ) = DELTA( II )*WORK( II )
+            DD( 3 ) = DTIIP
+            CALL DLAED6( NITER, ORGATI, C, DD, ZZ, W, ETA, INFO )
+            IF( INFO.NE.0 )
+     $         GO TO 240
+         END IF
+*
+*        Note, eta should be positive if w is negative, and
+*        eta should be negative otherwise. However,
+*        if for some reason caused by roundoff, eta*w > 0,
+*        we simply use one Newton step instead. This way
+*        will guarantee eta*w < 0.
+*
+         IF( W*ETA.GE.ZERO )
+     $      ETA = -W / DW
+         IF( ORGATI ) THEN
+            TEMP1 = WORK( I )*DELTA( I )
+            TEMP = ETA - TEMP1
+         ELSE
+            TEMP1 = WORK( IP1 )*DELTA( IP1 )
+            TEMP = ETA - TEMP1
+         END IF
+         IF( TEMP.GT.SG2UB .OR. TEMP.LT.SG2LB ) THEN
+            IF( W.LT.ZERO ) THEN
+               ETA = ( SG2UB-TAU ) / TWO
+            ELSE
+               ETA = ( SG2LB-TAU ) / TWO
+            END IF
+         END IF
+*
+         TAU = TAU + ETA
+         ETA = ETA / ( SIGMA+SQRT( SIGMA*SIGMA+ETA ) )
+*
+         PREW = W
+*
+         SIGMA = SIGMA + ETA
+         DO 170 J = 1, N
+            WORK( J ) = WORK( J ) + ETA
+            DELTA( J ) = DELTA( J ) - ETA
+  170    CONTINUE
+*
+*        Evaluate PSI and the derivative DPSI
+*
+         DPSI = ZERO
+         PSI = ZERO
+         ERRETM = ZERO
+         DO 180 J = 1, IIM1
+            TEMP = Z( J ) / ( WORK( J )*DELTA( J ) )
+            PSI = PSI + Z( J )*TEMP
+            DPSI = DPSI + TEMP*TEMP
+            ERRETM = ERRETM + PSI
+  180    CONTINUE
+         ERRETM = ABS( ERRETM )
+*
+*        Evaluate PHI and the derivative DPHI
+*
+         DPHI = ZERO
+         PHI = ZERO
+         DO 190 J = N, IIP1, -1
+            TEMP = Z( J ) / ( WORK( J )*DELTA( J ) )
+            PHI = PHI + Z( J )*TEMP
+            DPHI = DPHI + TEMP*TEMP
+            ERRETM = ERRETM + PHI
+  190    CONTINUE
+*
+         TEMP = Z( II ) / ( WORK( II )*DELTA( II ) )
+         DW = DPSI + DPHI + TEMP*TEMP
+         TEMP = Z( II )*TEMP
+         W = RHOINV + PHI + PSI + TEMP
+         ERRETM = EIGHT*( PHI-PSI ) + ERRETM + TWO*RHOINV +
+     $            THREE*ABS( TEMP ) + ABS( TAU )*DW
+*
+         IF( W.LE.ZERO ) THEN
+            SG2LB = MAX( SG2LB, TAU )
+         ELSE
+            SG2UB = MIN( SG2UB, TAU )
+         END IF
+*
+         SWTCH = .FALSE.
+         IF( ORGATI ) THEN
+            IF( -W.GT.ABS( PREW ) / TEN )
+     $         SWTCH = .TRUE.
+         ELSE
+            IF( W.GT.ABS( PREW ) / TEN )
+     $         SWTCH = .TRUE.
+         END IF
+*
+*        Main loop to update the values of the array   DELTA and WORK
+*
+         ITER = NITER + 1
+*
+         DO 230 NITER = ITER, MAXIT
+*
+*           Test for convergence
+*
+            IF( ABS( W ).LE.EPS*ERRETM ) THEN
+               GO TO 240
+            END IF
+*
+*           Calculate the new step
+*
+            IF( .NOT.SWTCH3 ) THEN
+               DTIPSQ = WORK( IP1 )*DELTA( IP1 )
+               DTISQ = WORK( I )*DELTA( I )
+               IF( .NOT.SWTCH ) THEN
+                  IF( ORGATI ) THEN
+                     C = W - DTIPSQ*DW + DELSQ*( Z( I ) / DTISQ )**2
+                  ELSE
+                     C = W - DTISQ*DW - DELSQ*( Z( IP1 ) / DTIPSQ )**2
+                  END IF
+               ELSE
+                  TEMP = Z( II ) / ( WORK( II )*DELTA( II ) )
+                  IF( ORGATI ) THEN
+                     DPSI = DPSI + TEMP*TEMP
+                  ELSE
+                     DPHI = DPHI + TEMP*TEMP
+                  END IF
+                  C = W - DTISQ*DPSI - DTIPSQ*DPHI
+               END IF
+               A = ( DTIPSQ+DTISQ )*W - DTIPSQ*DTISQ*DW
+               B = DTIPSQ*DTISQ*W
+               IF( C.EQ.ZERO ) THEN
+                  IF( A.EQ.ZERO ) THEN
+                     IF( .NOT.SWTCH ) THEN
+                        IF( ORGATI ) THEN
+                           A = Z( I )*Z( I ) + DTIPSQ*DTIPSQ*
+     $                         ( DPSI+DPHI )
+                        ELSE
+                           A = Z( IP1 )*Z( IP1 ) +
+     $                         DTISQ*DTISQ*( DPSI+DPHI )
+                        END IF
+                     ELSE
+                        A = DTISQ*DTISQ*DPSI + DTIPSQ*DTIPSQ*DPHI
+                     END IF
+                  END IF
+                  ETA = B / A
+               ELSE IF( A.LE.ZERO ) THEN
+                  ETA = ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C )
+               ELSE
+                  ETA = TWO*B / ( A+SQRT( ABS( A*A-FOUR*B*C ) ) )
+               END IF
+            ELSE
+*
+*              Interpolation using THREE most relevant poles
+*
+               DTIIM = WORK( IIM1 )*DELTA( IIM1 )
+               DTIIP = WORK( IIP1 )*DELTA( IIP1 )
+               TEMP = RHOINV + PSI + PHI
+               IF( SWTCH ) THEN
+                  C = TEMP - DTIIM*DPSI - DTIIP*DPHI
+                  ZZ( 1 ) = DTIIM*DTIIM*DPSI
+                  ZZ( 3 ) = DTIIP*DTIIP*DPHI
+               ELSE
+                  IF( ORGATI ) THEN
+                     TEMP1 = Z( IIM1 ) / DTIIM
+                     TEMP1 = TEMP1*TEMP1
+                     TEMP2 = ( D( IIM1 )-D( IIP1 ) )*
+     $                       ( D( IIM1 )+D( IIP1 ) )*TEMP1
+                     C = TEMP - DTIIP*( DPSI+DPHI ) - TEMP2
+                     ZZ( 1 ) = Z( IIM1 )*Z( IIM1 )
+                     IF( DPSI.LT.TEMP1 ) THEN
+                        ZZ( 3 ) = DTIIP*DTIIP*DPHI
+                     ELSE
+                        ZZ( 3 ) = DTIIP*DTIIP*( ( DPSI-TEMP1 )+DPHI )
+                     END IF
+                  ELSE
+                     TEMP1 = Z( IIP1 ) / DTIIP
+                     TEMP1 = TEMP1*TEMP1
+                     TEMP2 = ( D( IIP1 )-D( IIM1 ) )*
+     $                       ( D( IIM1 )+D( IIP1 ) )*TEMP1
+                     C = TEMP - DTIIM*( DPSI+DPHI ) - TEMP2
+                     IF( DPHI.LT.TEMP1 ) THEN
+                        ZZ( 1 ) = DTIIM*DTIIM*DPSI
+                     ELSE
+                        ZZ( 1 ) = DTIIM*DTIIM*( DPSI+( DPHI-TEMP1 ) )
+                     END IF
+                     ZZ( 3 ) = Z( IIP1 )*Z( IIP1 )
+                  END IF
+               END IF
+               DD( 1 ) = DTIIM
+               DD( 2 ) = DELTA( II )*WORK( II )
+               DD( 3 ) = DTIIP
+               CALL DLAED6( NITER, ORGATI, C, DD, ZZ, W, ETA, INFO )
+               IF( INFO.NE.0 )
+     $            GO TO 240
+            END IF
+*
+*           Note, eta should be positive if w is negative, and
+*           eta should be negative otherwise. However,
+*           if for some reason caused by roundoff, eta*w > 0,
+*           we simply use one Newton step instead. This way
+*           will guarantee eta*w < 0.
+*
+            IF( W*ETA.GE.ZERO )
+     $         ETA = -W / DW
+            IF( ORGATI ) THEN
+               TEMP1 = WORK( I )*DELTA( I )
+               TEMP = ETA - TEMP1
+            ELSE
+               TEMP1 = WORK( IP1 )*DELTA( IP1 )
+               TEMP = ETA - TEMP1
+            END IF
+            IF( TEMP.GT.SG2UB .OR. TEMP.LT.SG2LB ) THEN
+               IF( W.LT.ZERO ) THEN
+                  ETA = ( SG2UB-TAU ) / TWO
+               ELSE
+                  ETA = ( SG2LB-TAU ) / TWO
+               END IF
+            END IF
+*
+            TAU = TAU + ETA
+            ETA = ETA / ( SIGMA+SQRT( SIGMA*SIGMA+ETA ) )
+*
+            SIGMA = SIGMA + ETA
+            DO 200 J = 1, N
+               WORK( J ) = WORK( J ) + ETA
+               DELTA( J ) = DELTA( J ) - ETA
+  200       CONTINUE
+*
+            PREW = W
+*
+*           Evaluate PSI and the derivative DPSI
+*
+            DPSI = ZERO
+            PSI = ZERO
+            ERRETM = ZERO
+            DO 210 J = 1, IIM1
+               TEMP = Z( J ) / ( WORK( J )*DELTA( J ) )
+               PSI = PSI + Z( J )*TEMP
+               DPSI = DPSI + TEMP*TEMP
+               ERRETM = ERRETM + PSI
+  210       CONTINUE
+            ERRETM = ABS( ERRETM )
+*
+*           Evaluate PHI and the derivative DPHI
+*
+            DPHI = ZERO
+            PHI = ZERO
+            DO 220 J = N, IIP1, -1
+               TEMP = Z( J ) / ( WORK( J )*DELTA( J ) )
+               PHI = PHI + Z( J )*TEMP
+               DPHI = DPHI + TEMP*TEMP
+               ERRETM = ERRETM + PHI
+  220       CONTINUE
+*
+            TEMP = Z( II ) / ( WORK( II )*DELTA( II ) )
+            DW = DPSI + DPHI + TEMP*TEMP
+            TEMP = Z( II )*TEMP
+            W = RHOINV + PHI + PSI + TEMP
+            ERRETM = EIGHT*( PHI-PSI ) + ERRETM + TWO*RHOINV +
+     $               THREE*ABS( TEMP ) + ABS( TAU )*DW
+            IF( W*PREW.GT.ZERO .AND. ABS( W ).GT.ABS( PREW ) / TEN )
+     $         SWTCH = .NOT.SWTCH
+*
+            IF( W.LE.ZERO ) THEN
+               SG2LB = MAX( SG2LB, TAU )
+            ELSE
+               SG2UB = MIN( SG2UB, TAU )
+            END IF
+*
+  230    CONTINUE
+*
+*        Return with INFO = 1, NITER = MAXIT and not converged
+*
+         INFO = 1
+*
+      END IF
+*
+  240 CONTINUE
+      RETURN
+*
+*     End of DLASD4
+*
+      END
diff --git a/costa/native/external/lapack/dlasd5.f b/costa/native/external/lapack/dlasd5.f
new file mode 100644
index 000000000..aa33f8c7b
--- /dev/null
+++ b/costa/native/external/lapack/dlasd5.f
@@ -0,0 +1,164 @@
+      SUBROUTINE DLASD5( I, D, Z, DELTA, RHO, DSIGMA, WORK )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
+*     Courant Institute, NAG Ltd., and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            I
+      DOUBLE PRECISION   DSIGMA, RHO
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   D( 2 ), DELTA( 2 ), WORK( 2 ), Z( 2 )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  This subroutine computes the square root of the I-th eigenvalue
+*  of a positive symmetric rank-one modification of a 2-by-2 diagonal
+*  matrix
+*
+*             diag( D ) * diag( D ) +  RHO *  Z * transpose(Z) .
+*
+*  The diagonal entries in the array D are assumed to satisfy
+*
+*             0 <= D(i) < D(j)  for  i < j .
+*
+*  We also assume RHO > 0 and that the Euclidean norm of the vector
+*  Z is one.
+*
+*  Arguments
+*  =========
+*
+*  I      (input) INTEGER
+*         The index of the eigenvalue to be computed.  I = 1 or I = 2.
+*
+*  D      (input) DOUBLE PRECISION array, dimension ( 2 )
+*         The original eigenvalues.  We assume 0 <= D(1) < D(2).
+*
+*  Z      (input) DOUBLE PRECISION array, dimension ( 2 )
+*         The components of the updating vector.
+*
+*  DELTA  (output) DOUBLE PRECISION array, dimension ( 2 )
+*         Contains (D(j) - lambda_I) in its  j-th component.
+*         The vector DELTA contains the information necessary
+*         to construct the eigenvectors.
+*
+*  RHO    (input) DOUBLE PRECISION
+*         The scalar in the symmetric updating formula.
+*
+*  DSIGMA (output) DOUBLE PRECISION
+*         The computed lambda_I, the I-th updated eigenvalue.
+*
+*  WORK   (workspace) DOUBLE PRECISION array, dimension ( 2 )
+*         WORK contains (D(j) + sigma_I) in its  j-th component.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Ren-Cang Li, Computer Science Division, University of California
+*     at Berkeley, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE, TWO, THREE, FOUR
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0, TWO = 2.0D+0,
+     $                   THREE = 3.0D+0, FOUR = 4.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      DOUBLE PRECISION   B, C, DEL, DELSQ, TAU, W
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+      DEL = D( 2 ) - D( 1 )
+      DELSQ = DEL*( D( 2 )+D( 1 ) )
+      IF( I.EQ.1 ) THEN
+         W = ONE + FOUR*RHO*( Z( 2 )*Z( 2 ) / ( D( 1 )+THREE*D( 2 ) )-
+     $       Z( 1 )*Z( 1 ) / ( THREE*D( 1 )+D( 2 ) ) ) / DEL
+         IF( W.GT.ZERO ) THEN
+            B = DELSQ + RHO*( Z( 1 )*Z( 1 )+Z( 2 )*Z( 2 ) )
+            C = RHO*Z( 1 )*Z( 1 )*DELSQ
+*
+*           B > ZERO, always
+*
+*           The following TAU is DSIGMA * DSIGMA - D( 1 ) * D( 1 )
+*
+            TAU = TWO*C / ( B+SQRT( ABS( B*B-FOUR*C ) ) )
+*
+*           The following TAU is DSIGMA - D( 1 )
+*
+            TAU = TAU / ( D( 1 )+SQRT( D( 1 )*D( 1 )+TAU ) )
+            DSIGMA = D( 1 ) + TAU
+            DELTA( 1 ) = -TAU
+            DELTA( 2 ) = DEL - TAU
+            WORK( 1 ) = TWO*D( 1 ) + TAU
+            WORK( 2 ) = ( D( 1 )+TAU ) + D( 2 )
+*           DELTA( 1 ) = -Z( 1 ) / TAU
+*           DELTA( 2 ) = Z( 2 ) / ( DEL-TAU )
+         ELSE
+            B = -DELSQ + RHO*( Z( 1 )*Z( 1 )+Z( 2 )*Z( 2 ) )
+            C = RHO*Z( 2 )*Z( 2 )*DELSQ
+*
+*           The following TAU is DSIGMA * DSIGMA - D( 2 ) * D( 2 )
+*
+            IF( B.GT.ZERO ) THEN
+               TAU = -TWO*C / ( B+SQRT( B*B+FOUR*C ) )
+            ELSE
+               TAU = ( B-SQRT( B*B+FOUR*C ) ) / TWO
+            END IF
+*
+*           The following TAU is DSIGMA - D( 2 )
+*
+            TAU = TAU / ( D( 2 )+SQRT( ABS( D( 2 )*D( 2 )+TAU ) ) )
+            DSIGMA = D( 2 ) + TAU
+            DELTA( 1 ) = -( DEL+TAU )
+            DELTA( 2 ) = -TAU
+            WORK( 1 ) = D( 1 ) + TAU + D( 2 )
+            WORK( 2 ) = TWO*D( 2 ) + TAU
+*           DELTA( 1 ) = -Z( 1 ) / ( DEL+TAU )
+*           DELTA( 2 ) = -Z( 2 ) / TAU
+         END IF
+*        TEMP = SQRT( DELTA( 1 )*DELTA( 1 )+DELTA( 2 )*DELTA( 2 ) )
+*        DELTA( 1 ) = DELTA( 1 ) / TEMP
+*        DELTA( 2 ) = DELTA( 2 ) / TEMP
+      ELSE
+*
+*        Now I=2
+*
+         B = -DELSQ + RHO*( Z( 1 )*Z( 1 )+Z( 2 )*Z( 2 ) )
+         C = RHO*Z( 2 )*Z( 2 )*DELSQ
+*
+*        The following TAU is DSIGMA * DSIGMA - D( 2 ) * D( 2 )
+*
+         IF( B.GT.ZERO ) THEN
+            TAU = ( B+SQRT( B*B+FOUR*C ) ) / TWO
+         ELSE
+            TAU = TWO*C / ( -B+SQRT( B*B+FOUR*C ) )
+         END IF
+*
+*        The following TAU is DSIGMA - D( 2 )
+*
+         TAU = TAU / ( D( 2 )+SQRT( D( 2 )*D( 2 )+TAU ) )
+         DSIGMA = D( 2 ) + TAU
+         DELTA( 1 ) = -( DEL+TAU )
+         DELTA( 2 ) = -TAU
+         WORK( 1 ) = D( 1 ) + TAU + D( 2 )
+         WORK( 2 ) = TWO*D( 2 ) + TAU
+*        DELTA( 1 ) = -Z( 1 ) / ( DEL+TAU )
+*        DELTA( 2 ) = -Z( 2 ) / TAU
+*        TEMP = SQRT( DELTA( 1 )*DELTA( 1 )+DELTA( 2 )*DELTA( 2 ) )
+*        DELTA( 1 ) = DELTA( 1 ) / TEMP
+*        DELTA( 2 ) = DELTA( 2 ) / TEMP
+      END IF
+      RETURN
+*
+*     End of DLASD5
+*
+      END
diff --git a/costa/native/external/lapack/dlasd6.f b/costa/native/external/lapack/dlasd6.f
new file mode 100644
index 000000000..2001ae036
--- /dev/null
+++ b/costa/native/external/lapack/dlasd6.f
@@ -0,0 +1,306 @@
+      SUBROUTINE DLASD6( ICOMPQ, NL, NR, SQRE, D, VF, VL, ALPHA, BETA,
+     $                   IDXQ, PERM, GIVPTR, GIVCOL, LDGCOL, GIVNUM,
+     $                   LDGNUM, POLES, DIFL, DIFR, Z, K, C, S, WORK,
+     $                   IWORK, INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            GIVPTR, ICOMPQ, INFO, K, LDGCOL, LDGNUM, NL,
+     $                   NR, SQRE
+      DOUBLE PRECISION   ALPHA, BETA, C, S
+*     ..
+*     .. Array Arguments ..
+      INTEGER            GIVCOL( LDGCOL, * ), IDXQ( * ), IWORK( * ),
+     $                   PERM( * )
+      DOUBLE PRECISION   D( * ), DIFL( * ), DIFR( * ),
+     $                   GIVNUM( LDGNUM, * ), POLES( LDGNUM, * ),
+     $                   VF( * ), VL( * ), WORK( * ), Z( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLASD6 computes the SVD of an updated upper bidiagonal matrix B
+*  obtained by merging two smaller ones by appending a row. This
+*  routine is used only for the problem which requires all singular
+*  values and optionally singular vector matrices in factored form.
+*  B is an N-by-M matrix with N = NL + NR + 1 and M = N + SQRE.
+*  A related subroutine, DLASD1, handles the case in which all singular
+*  values and singular vectors of the bidiagonal matrix are desired.
+*
+*  DLASD6 computes the SVD as follows:
+*
+*                ( D1(in)  0    0     0 )
+*    B = U(in) * (   Z1'   a   Z2'    b ) * VT(in)
+*                (   0     0   D2(in) 0 )
+*
+*      = U(out) * ( D(out) 0) * VT(out)
+*
+*  where Z' = (Z1' a Z2' b) = u' VT', and u is a vector of dimension M
+*  with ALPHA and BETA in the NL+1 and NL+2 th entries and zeros
+*  elsewhere; and the entry b is empty if SQRE = 0.
+*
+*  The singular values of B can be computed using D1, D2, the first
+*  components of all the right singular vectors of the lower block, and
+*  the last components of all the right singular vectors of the upper
+*  block. These components are stored and updated in VF and VL,
+*  respectively, in DLASD6. Hence U and VT are not explicitly
+*  referenced.
+*
+*  The singular values are stored in D. The algorithm consists of two
+*  stages:
+*
+*        The first stage consists of deflating the size of the problem
+*        when there are multiple singular values or if there is a zero
+*        in the Z vector. For each such occurence the dimension of the
+*        secular equation problem is reduced by one. This stage is
+*        performed by the routine DLASD7.
+*
+*        The second stage consists of calculating the updated
+*        singular values. This is done by finding the roots of the
+*        secular equation via the routine DLASD4 (as called by DLASD8).
+*        This routine also updates VF and VL and computes the distances
+*        between the updated singular values and the old singular
+*        values.
+*
+*  DLASD6 is called from DLASDA.
+*
+*  Arguments
+*  =========
+*
+*  ICOMPQ (input) INTEGER
+*         Specifies whether singular vectors are to be computed in
+*         factored form:
+*         = 0: Compute singular values only.
+*         = 1: Compute singular vectors in factored form as well.
+*
+*  NL     (input) INTEGER
+*         The row dimension of the upper block.  NL >= 1.
+*
+*  NR     (input) INTEGER
+*         The row dimension of the lower block.  NR >= 1.
+*
+*  SQRE   (input) INTEGER
+*         = 0: the lower block is an NR-by-NR square matrix.
+*         = 1: the lower block is an NR-by-(NR+1) rectangular matrix.
+*
+*         The bidiagonal matrix has row dimension N = NL + NR + 1,
+*         and column dimension M = N + SQRE.
+*
+*  D      (input/output) DOUBLE PRECISION array, dimension ( NL+NR+1 ).
+*         On entry D(1:NL,1:NL) contains the singular values of the
+*         upper block, and D(NL+2:N) contains the singular values
+*         of the lower block. On exit D(1:N) contains the singular
+*         values of the modified matrix.
+*
+*  VF     (input/output) DOUBLE PRECISION array, dimension ( M )
+*         On entry, VF(1:NL+1) contains the first components of all
+*         right singular vectors of the upper block; and VF(NL+2:M)
+*         contains the first components of all right singular vectors
+*         of the lower block. On exit, VF contains the first components
+*         of all right singular vectors of the bidiagonal matrix.
+*
+*  VL     (input/output) DOUBLE PRECISION array, dimension ( M )
+*         On entry, VL(1:NL+1) contains the  last components of all
+*         right singular vectors of the upper block; and VL(NL+2:M)
+*         contains the last components of all right singular vectors of
+*         the lower block. On exit, VL contains the last components of
+*         all right singular vectors of the bidiagonal matrix.
+*
+*  ALPHA  (input) DOUBLE PRECISION
+*         Contains the diagonal element associated with the added row.
+*
+*  BETA   (input) DOUBLE PRECISION
+*         Contains the off-diagonal element associated with the added
+*         row.
+*
+*  IDXQ   (output) INTEGER array, dimension ( N )
+*         This contains the permutation which will reintegrate the
+*         subproblem just solved back into sorted order, i.e.
+*         D( IDXQ( I = 1, N ) ) will be in ascending order.
+*
+*  PERM   (output) INTEGER array, dimension ( N )
+*         The permutations (from deflation and sorting) to be applied
+*         to each block. Not referenced if ICOMPQ = 0.
+*
+*  GIVPTR (output) INTEGER
+*         The number of Givens rotations which took place in this
+*         subproblem. Not referenced if ICOMPQ = 0.
+*
+*  GIVCOL (output) INTEGER array, dimension ( LDGCOL, 2 )
+*         Each pair of numbers indicates a pair of columns to take place
+*         in a Givens rotation. Not referenced if ICOMPQ = 0.
+*
+*  LDGCOL (input) INTEGER
+*         leading dimension of GIVCOL, must be at least N.
+*
+*  GIVNUM (output) DOUBLE PRECISION array, dimension ( LDGNUM, 2 )
+*         Each number indicates the C or S value to be used in the
+*         corresponding Givens rotation. Not referenced if ICOMPQ = 0.
+*
+*  LDGNUM (input) INTEGER
+*         The leading dimension of GIVNUM and POLES, must be at least N.
+*
+*  POLES  (output) DOUBLE PRECISION array, dimension ( LDGNUM, 2 )
+*         On exit, POLES(1,*) is an array containing the new singular
+*         values obtained from solving the secular equation, and
+*         POLES(2,*) is an array containing the poles in the secular
+*         equation. Not referenced if ICOMPQ = 0.
+*
+*  DIFL   (output) DOUBLE PRECISION array, dimension ( N )
+*         On exit, DIFL(I) is the distance between I-th updated
+*         (undeflated) singular value and the I-th (undeflated) old
+*         singular value.
+*
+*  DIFR   (output) DOUBLE PRECISION array,
+*                  dimension ( LDGNUM, 2 ) if ICOMPQ = 1 and
+*                  dimension ( N ) if ICOMPQ = 0.
+*         On exit, DIFR(I, 1) is the distance between I-th updated
+*         (undeflated) singular value and the I+1-th (undeflated) old
+*         singular value.
+*
+*         If ICOMPQ = 1, DIFR(1:K,2) is an array containing the
+*         normalizing factors for the right singular vector matrix.
+*
+*         See DLASD8 for details on DIFL and DIFR.
+*
+*  Z      (output) DOUBLE PRECISION array, dimension ( M )
+*         The first elements of this array contain the components
+*         of the deflation-adjusted updating row vector.
+*
+*  K      (output) INTEGER
+*         Contains the dimension of the non-deflated matrix,
+*         This is the order of the related secular equation. 1 <= K <=N.
+*
+*  C      (output) DOUBLE PRECISION
+*         C contains garbage if SQRE =0 and the C-value of a Givens
+*         rotation related to the right null space if SQRE = 1.
+*
+*  S      (output) DOUBLE PRECISION
+*         S contains garbage if SQRE =0 and the S-value of a Givens
+*         rotation related to the right null space if SQRE = 1.
+*
+*  WORK   (workspace) DOUBLE PRECISION array, dimension ( 4 * M )
+*
+*  IWORK  (workspace) INTEGER array, dimension ( 3 * N )
+*
+*  INFO   (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          > 0:  if INFO = 1, an singular value did not converge
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Ming Gu and Huan Ren, Computer Science Division, University of
+*     California at Berkeley, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, IDX, IDXC, IDXP, ISIGMA, IVFW, IVLW, IW, M,
+     $                   N, N1, N2
+      DOUBLE PRECISION   ORGNRM
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DCOPY, DLAMRG, DLASCL, DLASD7, DLASD8, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      N = NL + NR + 1
+      M = N + SQRE
+*
+      IF( ( ICOMPQ.LT.0 ) .OR. ( ICOMPQ.GT.1 ) ) THEN
+         INFO = -1
+      ELSE IF( NL.LT.1 ) THEN
+         INFO = -2
+      ELSE IF( NR.LT.1 ) THEN
+         INFO = -3
+      ELSE IF( ( SQRE.LT.0 ) .OR. ( SQRE.GT.1 ) ) THEN
+         INFO = -4
+      ELSE IF( LDGCOL.LT.N ) THEN
+         INFO = -14
+      ELSE IF( LDGNUM.LT.N ) THEN
+         INFO = -16
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DLASD6', -INFO )
+         RETURN
+      END IF
+*
+*     The following values are for bookkeeping purposes only.  They are
+*     integer pointers which indicate the portion of the workspace
+*     used by a particular array in DLASD7 and DLASD8.
+*
+      ISIGMA = 1
+      IW = ISIGMA + N
+      IVFW = IW + M
+      IVLW = IVFW + M
+*
+      IDX = 1
+      IDXC = IDX + N
+      IDXP = IDXC + N
+*
+*     Scale.
+*
+      ORGNRM = MAX( ABS( ALPHA ), ABS( BETA ) )
+      D( NL+1 ) = ZERO
+      DO 10 I = 1, N
+         IF( ABS( D( I ) ).GT.ORGNRM ) THEN
+            ORGNRM = ABS( D( I ) )
+         END IF
+   10 CONTINUE
+      CALL DLASCL( 'G', 0, 0, ORGNRM, ONE, N, 1, D, N, INFO )
+      ALPHA = ALPHA / ORGNRM
+      BETA = BETA / ORGNRM
+*
+*     Sort and Deflate singular values.
+*
+      CALL DLASD7( ICOMPQ, NL, NR, SQRE, K, D, Z, WORK( IW ), VF,
+     $             WORK( IVFW ), VL, WORK( IVLW ), ALPHA, BETA,
+     $             WORK( ISIGMA ), IWORK( IDX ), IWORK( IDXP ), IDXQ,
+     $             PERM, GIVPTR, GIVCOL, LDGCOL, GIVNUM, LDGNUM, C, S,
+     $             INFO )
+*
+*     Solve Secular Equation, compute DIFL, DIFR, and update VF, VL.
+*
+      CALL DLASD8( ICOMPQ, K, D, Z, VF, VL, DIFL, DIFR, LDGNUM,
+     $             WORK( ISIGMA ), WORK( IW ), INFO )
+*
+*     Save the poles if ICOMPQ = 1.
+*
+      IF( ICOMPQ.EQ.1 ) THEN
+         CALL DCOPY( K, D, 1, POLES( 1, 1 ), 1 )
+         CALL DCOPY( K, WORK( ISIGMA ), 1, POLES( 1, 2 ), 1 )
+      END IF
+*
+*     Unscale.
+*
+      CALL DLASCL( 'G', 0, 0, ONE, ORGNRM, N, 1, D, N, INFO )
+*
+*     Prepare the IDXQ sorting permutation.
+*
+      N1 = K
+      N2 = N - K
+      CALL DLAMRG( N1, N2, D, 1, -1, IDXQ )
+*
+      RETURN
+*
+*     End of DLASD6
+*
+      END
diff --git a/costa/native/external/lapack/dlasd7.f b/costa/native/external/lapack/dlasd7.f
new file mode 100644
index 000000000..c96ff5da8
--- /dev/null
+++ b/costa/native/external/lapack/dlasd7.f
@@ -0,0 +1,445 @@
+      SUBROUTINE DLASD7( ICOMPQ, NL, NR, SQRE, K, D, Z, ZW, VF, VFW, VL,
+     $                   VLW, ALPHA, BETA, DSIGMA, IDX, IDXP, IDXQ,
+     $                   PERM, GIVPTR, GIVCOL, LDGCOL, GIVNUM, LDGNUM,
+     $                   C, S, INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
+*     Courant Institute, NAG Ltd., and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            GIVPTR, ICOMPQ, INFO, K, LDGCOL, LDGNUM, NL,
+     $                   NR, SQRE
+      DOUBLE PRECISION   ALPHA, BETA, C, S
+*     ..
+*     .. Array Arguments ..
+      INTEGER            GIVCOL( LDGCOL, * ), IDX( * ), IDXP( * ),
+     $                   IDXQ( * ), PERM( * )
+      DOUBLE PRECISION   D( * ), DSIGMA( * ), GIVNUM( LDGNUM, * ),
+     $                   VF( * ), VFW( * ), VL( * ), VLW( * ), Z( * ),
+     $                   ZW( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLASD7 merges the two sets of singular values together into a single
+*  sorted set. Then it tries to deflate the size of the problem. There
+*  are two ways in which deflation can occur:  when two or more singular
+*  values are close together or if there is a tiny entry in the Z
+*  vector. For each such occurrence the order of the related
+*  secular equation problem is reduced by one.
+*
+*  DLASD7 is called from DLASD6.
+*
+*  Arguments
+*  =========
+*
+*  ICOMPQ  (input) INTEGER
+*          Specifies whether singular vectors are to be computed
+*          in compact form, as follows:
+*          = 0: Compute singular values only.
+*          = 1: Compute singular vectors of upper
+*               bidiagonal matrix in compact form.
+*
+*  NL     (input) INTEGER
+*         The row dimension of the upper block. NL >= 1.
+*
+*  NR     (input) INTEGER
+*         The row dimension of the lower block. NR >= 1.
+*
+*  SQRE   (input) INTEGER
+*         = 0: the lower block is an NR-by-NR square matrix.
+*         = 1: the lower block is an NR-by-(NR+1) rectangular matrix.
+*
+*         The bidiagonal matrix has
+*         N = NL + NR + 1 rows and
+*         M = N + SQRE >= N columns.
+*
+*  K      (output) INTEGER
+*         Contains the dimension of the non-deflated matrix, this is
+*         the order of the related secular equation. 1 <= K <=N.
+*
+*  D      (input/output) DOUBLE PRECISION array, dimension ( N )
+*         On entry D contains the singular values of the two submatrices
+*         to be combined. On exit D contains the trailing (N-K) updated
+*         singular values (those which were deflated) sorted into
+*         increasing order.
+*
+*  Z      (output) DOUBLE PRECISION array, dimension ( M )
+*         On exit Z contains the updating row vector in the secular
+*         equation.
+*
+*  ZW     (workspace) DOUBLE PRECISION array, dimension ( M )
+*         Workspace for Z.
+*
+*  VF     (input/output) DOUBLE PRECISION array, dimension ( M )
+*         On entry, VF(1:NL+1) contains the first components of all
+*         right singular vectors of the upper block; and VF(NL+2:M)
+*         contains the first components of all right singular vectors
+*         of the lower block. On exit, VF contains the first components
+*         of all right singular vectors of the bidiagonal matrix.
+*
+*  VFW    (workspace) DOUBLE PRECISION array, dimension ( M )
+*         Workspace for VF.
+*
+*  VL     (input/output) DOUBLE PRECISION array, dimension ( M )
+*         On entry, VL(1:NL+1) contains the  last components of all
+*         right singular vectors of the upper block; and VL(NL+2:M)
+*         contains the last components of all right singular vectors
+*         of the lower block. On exit, VL contains the last components
+*         of all right singular vectors of the bidiagonal matrix.
+*
+*  VLW    (workspace) DOUBLE PRECISION array, dimension ( M )
+*         Workspace for VL.
+*
+*  ALPHA  (input) DOUBLE PRECISION
+*         Contains the diagonal element associated with the added row.
+*
+*  BETA   (input) DOUBLE PRECISION
+*         Contains the off-diagonal element associated with the added
+*         row.
+*
+*  DSIGMA (output) DOUBLE PRECISION array, dimension ( N )
+*         Contains a copy of the diagonal elements (K-1 singular values
+*         and one zero) in the secular equation.
+*
+*  IDX    (workspace) INTEGER array, dimension ( N )
+*         This will contain the permutation used to sort the contents of
+*         D into ascending order.
+*
+*  IDXP   (workspace) INTEGER array, dimension ( N )
+*         This will contain the permutation used to place deflated
+*         values of D at the end of the array. On output IDXP(2:K)
+*         points to the nondeflated D-values and IDXP(K+1:N)
+*         points to the deflated singular values.
+*
+*  IDXQ   (input) INTEGER array, dimension ( N )
+*         This contains the permutation which separately sorts the two
+*         sub-problems in D into ascending order.  Note that entries in
+*         the first half of this permutation must first be moved one
+*         position backward; and entries in the second half
+*         must first have NL+1 added to their values.
+*
+*  PERM   (output) INTEGER array, dimension ( N )
+*         The permutations (from deflation and sorting) to be applied
+*         to each singular block. Not referenced if ICOMPQ = 0.
+*
+*  GIVPTR (output) INTEGER
+*         The number of Givens rotations which took place in this
+*         subproblem. Not referenced if ICOMPQ = 0.
+*
+*  GIVCOL (output) INTEGER array, dimension ( LDGCOL, 2 )
+*         Each pair of numbers indicates a pair of columns to take place
+*         in a Givens rotation. Not referenced if ICOMPQ = 0.
+*
+*  LDGCOL (input) INTEGER
+*         The leading dimension of GIVCOL, must be at least N.
+*
+*  GIVNUM (output) DOUBLE PRECISION array, dimension ( LDGNUM, 2 )
+*         Each number indicates the C or S value to be used in the
+*         corresponding Givens rotation. Not referenced if ICOMPQ = 0.
+*
+*  LDGNUM (input) INTEGER
+*         The leading dimension of GIVNUM, must be at least N.
+*
+*  C      (output) DOUBLE PRECISION
+*         C contains garbage if SQRE =0 and the C-value of a Givens
+*         rotation related to the right null space if SQRE = 1.
+*
+*  S      (output) DOUBLE PRECISION
+*         S contains garbage if SQRE =0 and the S-value of a Givens
+*         rotation related to the right null space if SQRE = 1.
+*
+*  INFO   (output) INTEGER
+*         = 0:  successful exit.
+*         < 0:  if INFO = -i, the i-th argument had an illegal value.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Ming Gu and Huan Ren, Computer Science Division, University of
+*     California at Berkeley, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE, TWO, EIGHT
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0, TWO = 2.0D+0,
+     $                   EIGHT = 8.0D+0 )
+*     ..
+*     .. Local Scalars ..
+*
+      INTEGER            I, IDXI, IDXJ, IDXJP, J, JP, JPREV, K2, M, N,
+     $                   NLP1, NLP2
+      DOUBLE PRECISION   EPS, HLFTOL, TAU, TOL, Z1
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DCOPY, DLAMRG, DROT, XERBLA
+*     ..
+*     .. External Functions ..
+      DOUBLE PRECISION   DLAMCH, DLAPY2
+      EXTERNAL           DLAMCH, DLAPY2
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      N = NL + NR + 1
+      M = N + SQRE
+*
+      IF( ( ICOMPQ.LT.0 ) .OR. ( ICOMPQ.GT.1 ) ) THEN
+         INFO = -1
+      ELSE IF( NL.LT.1 ) THEN
+         INFO = -2
+      ELSE IF( NR.LT.1 ) THEN
+         INFO = -3
+      ELSE IF( ( SQRE.LT.0 ) .OR. ( SQRE.GT.1 ) ) THEN
+         INFO = -4
+      ELSE IF( LDGCOL.LT.N ) THEN
+         INFO = -22
+      ELSE IF( LDGNUM.LT.N ) THEN
+         INFO = -24
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DLASD7', -INFO )
+         RETURN
+      END IF
+*
+      NLP1 = NL + 1
+      NLP2 = NL + 2
+      IF( ICOMPQ.EQ.1 ) THEN
+         GIVPTR = 0
+      END IF
+*
+*     Generate the first part of the vector Z and move the singular
+*     values in the first part of D one position backward.
+*
+      Z1 = ALPHA*VL( NLP1 )
+      VL( NLP1 ) = ZERO
+      TAU = VF( NLP1 )
+      DO 10 I = NL, 1, -1
+         Z( I+1 ) = ALPHA*VL( I )
+         VL( I ) = ZERO
+         VF( I+1 ) = VF( I )
+         D( I+1 ) = D( I )
+         IDXQ( I+1 ) = IDXQ( I ) + 1
+   10 CONTINUE
+      VF( 1 ) = TAU
+*
+*     Generate the second part of the vector Z.
+*
+      DO 20 I = NLP2, M
+         Z( I ) = BETA*VF( I )
+         VF( I ) = ZERO
+   20 CONTINUE
+*
+*     Sort the singular values into increasing order
+*
+      DO 30 I = NLP2, N
+         IDXQ( I ) = IDXQ( I ) + NLP1
+   30 CONTINUE
+*
+*     DSIGMA, IDXC, IDXC, and ZW are used as storage space.
+*
+      DO 40 I = 2, N
+         DSIGMA( I ) = D( IDXQ( I ) )
+         ZW( I ) = Z( IDXQ( I ) )
+         VFW( I ) = VF( IDXQ( I ) )
+         VLW( I ) = VL( IDXQ( I ) )
+   40 CONTINUE
+*
+      CALL DLAMRG( NL, NR, DSIGMA( 2 ), 1, 1, IDX( 2 ) )
+*
+      DO 50 I = 2, N
+         IDXI = 1 + IDX( I )
+         D( I ) = DSIGMA( IDXI )
+         Z( I ) = ZW( IDXI )
+         VF( I ) = VFW( IDXI )
+         VL( I ) = VLW( IDXI )
+   50 CONTINUE
+*
+*     Calculate the allowable deflation tolerence
+*
+      EPS = DLAMCH( 'Epsilon' )
+      TOL = MAX( ABS( ALPHA ), ABS( BETA ) )
+      TOL = EIGHT*EIGHT*EPS*MAX( ABS( D( N ) ), TOL )
+*
+*     There are 2 kinds of deflation -- first a value in the z-vector
+*     is small, second two (or more) singular values are very close
+*     together (their difference is small).
+*
+*     If the value in the z-vector is small, we simply permute the
+*     array so that the corresponding singular value is moved to the
+*     end.
+*
+*     If two values in the D-vector are close, we perform a two-sided
+*     rotation designed to make one of the corresponding z-vector
+*     entries zero, and then permute the array so that the deflated
+*     singular value is moved to the end.
+*
+*     If there are multiple singular values then the problem deflates.
+*     Here the number of equal singular values are found.  As each equal
+*     singular value is found, an elementary reflector is computed to
+*     rotate the corresponding singular subspace so that the
+*     corresponding components of Z are zero in this new basis.
+*
+      K = 1
+      K2 = N + 1
+      DO 60 J = 2, N
+         IF( ABS( Z( J ) ).LE.TOL ) THEN
+*
+*           Deflate due to small z component.
+*
+            K2 = K2 - 1
+            IDXP( K2 ) = J
+            IF( J.EQ.N )
+     $         GO TO 100
+         ELSE
+            JPREV = J
+            GO TO 70
+         END IF
+   60 CONTINUE
+   70 CONTINUE
+      J = JPREV
+   80 CONTINUE
+      J = J + 1
+      IF( J.GT.N )
+     $   GO TO 90
+      IF( ABS( Z( J ) ).LE.TOL ) THEN
+*
+*        Deflate due to small z component.
+*
+         K2 = K2 - 1
+         IDXP( K2 ) = J
+      ELSE
+*
+*        Check if singular values are close enough to allow deflation.
+*
+         IF( ABS( D( J )-D( JPREV ) ).LE.TOL ) THEN
+*
+*           Deflation is possible.
+*
+            S = Z( JPREV )
+            C = Z( J )
+*
+*           Find sqrt(a**2+b**2) without overflow or
+*           destructive underflow.
+*
+            TAU = DLAPY2( C, S )
+            Z( J ) = TAU
+            Z( JPREV ) = ZERO
+            C = C / TAU
+            S = -S / TAU
+*
+*           Record the appropriate Givens rotation
+*
+            IF( ICOMPQ.EQ.1 ) THEN
+               GIVPTR = GIVPTR + 1
+               IDXJP = IDXQ( IDX( JPREV )+1 )
+               IDXJ = IDXQ( IDX( J )+1 )
+               IF( IDXJP.LE.NLP1 ) THEN
+                  IDXJP = IDXJP - 1
+               END IF
+               IF( IDXJ.LE.NLP1 ) THEN
+                  IDXJ = IDXJ - 1
+               END IF
+               GIVCOL( GIVPTR, 2 ) = IDXJP
+               GIVCOL( GIVPTR, 1 ) = IDXJ
+               GIVNUM( GIVPTR, 2 ) = C
+               GIVNUM( GIVPTR, 1 ) = S
+            END IF
+            CALL DROT( 1, VF( JPREV ), 1, VF( J ), 1, C, S )
+            CALL DROT( 1, VL( JPREV ), 1, VL( J ), 1, C, S )
+            K2 = K2 - 1
+            IDXP( K2 ) = JPREV
+            JPREV = J
+         ELSE
+            K = K + 1
+            ZW( K ) = Z( JPREV )
+            DSIGMA( K ) = D( JPREV )
+            IDXP( K ) = JPREV
+            JPREV = J
+         END IF
+      END IF
+      GO TO 80
+   90 CONTINUE
+*
+*     Record the last singular value.
+*
+      K = K + 1
+      ZW( K ) = Z( JPREV )
+      DSIGMA( K ) = D( JPREV )
+      IDXP( K ) = JPREV
+*
+  100 CONTINUE
+*
+*     Sort the singular values into DSIGMA. The singular values which
+*     were not deflated go into the first K slots of DSIGMA, except
+*     that DSIGMA(1) is treated separately.
+*
+      DO 110 J = 2, N
+         JP = IDXP( J )
+         DSIGMA( J ) = D( JP )
+         VFW( J ) = VF( JP )
+         VLW( J ) = VL( JP )
+  110 CONTINUE
+      IF( ICOMPQ.EQ.1 ) THEN
+         DO 120 J = 2, N
+            JP = IDXP( J )
+            PERM( J ) = IDXQ( IDX( JP )+1 )
+            IF( PERM( J ).LE.NLP1 ) THEN
+               PERM( J ) = PERM( J ) - 1
+            END IF
+  120    CONTINUE
+      END IF
+*
+*     The deflated singular values go back into the last N - K slots of
+*     D.
+*
+      CALL DCOPY( N-K, DSIGMA( K+1 ), 1, D( K+1 ), 1 )
+*
+*     Determine DSIGMA(1), DSIGMA(2), Z(1), VF(1), VL(1), VF(M), and
+*     VL(M).
+*
+      DSIGMA( 1 ) = ZERO
+      HLFTOL = TOL / TWO
+      IF( ABS( DSIGMA( 2 ) ).LE.HLFTOL )
+     $   DSIGMA( 2 ) = HLFTOL
+      IF( M.GT.N ) THEN
+         Z( 1 ) = DLAPY2( Z1, Z( M ) )
+         IF( Z( 1 ).LE.TOL ) THEN
+            C = ONE
+            S = ZERO
+            Z( 1 ) = TOL
+         ELSE
+            C = Z1 / Z( 1 )
+            S = -Z( M ) / Z( 1 )
+         END IF
+         CALL DROT( 1, VF( M ), 1, VF( 1 ), 1, C, S )
+         CALL DROT( 1, VL( M ), 1, VL( 1 ), 1, C, S )
+      ELSE
+         IF( ABS( Z1 ).LE.TOL ) THEN
+            Z( 1 ) = TOL
+         ELSE
+            Z( 1 ) = Z1
+         END IF
+      END IF
+*
+*     Restore Z, VF, and VL.
+*
+      CALL DCOPY( K-1, ZW( 2 ), 1, Z( 2 ), 1 )
+      CALL DCOPY( N-1, VFW( 2 ), 1, VF( 2 ), 1 )
+      CALL DCOPY( N-1, VLW( 2 ), 1, VL( 2 ), 1 )
+*
+      RETURN
+*
+*     End of DLASD7
+*
+      END
diff --git a/costa/native/external/lapack/dlasd8.f b/costa/native/external/lapack/dlasd8.f
new file mode 100644
index 000000000..9d1035865
--- /dev/null
+++ b/costa/native/external/lapack/dlasd8.f
@@ -0,0 +1,254 @@
+      SUBROUTINE DLASD8( ICOMPQ, K, D, Z, VF, VL, DIFL, DIFR, LDDIFR,
+     $                   DSIGMA, WORK, INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
+*     Courant Institute, NAG Ltd., and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            ICOMPQ, INFO, K, LDDIFR
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   D( * ), DIFL( * ), DIFR( LDDIFR, * ),
+     $                   DSIGMA( * ), VF( * ), VL( * ), WORK( * ),
+     $                   Z( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLASD8 finds the square roots of the roots of the secular equation,
+*  as defined by the values in DSIGMA and Z. It makes the appropriate
+*  calls to DLASD4, and stores, for each  element in D, the distance
+*  to its two nearest poles (elements in DSIGMA). It also updates
+*  the arrays VF and VL, the first and last components of all the
+*  right singular vectors of the original bidiagonal matrix.
+*
+*  DLASD8 is called from DLASD6.
+*
+*  Arguments
+*  =========
+*
+*  ICOMPQ  (input) INTEGER
+*          Specifies whether singular vectors are to be computed in
+*          factored form in the calling routine:
+*          = 0: Compute singular values only.
+*          = 1: Compute singular vectors in factored form as well.
+*
+*  K       (input) INTEGER
+*          The number of terms in the rational function to be solved
+*          by DLASD4.  K >= 1.
+*
+*  D       (output) DOUBLE PRECISION array, dimension ( K )
+*          On output, D contains the updated singular values.
+*
+*  Z       (input) DOUBLE PRECISION array, dimension ( K )
+*          The first K elements of this array contain the components
+*          of the deflation-adjusted updating row vector.
+*
+*  VF      (input/output) DOUBLE PRECISION array, dimension ( K )
+*          On entry, VF contains  information passed through DBEDE8.
+*          On exit, VF contains the first K components of the first
+*          components of all right singular vectors of the bidiagonal
+*          matrix.
+*
+*  VL      (input/output) DOUBLE PRECISION array, dimension ( K )
+*          On entry, VL contains  information passed through DBEDE8.
+*          On exit, VL contains the first K components of the last
+*          components of all right singular vectors of the bidiagonal
+*          matrix.
+*
+*  DIFL    (output) DOUBLE PRECISION array, dimension ( K )
+*          On exit, DIFL(I) = D(I) - DSIGMA(I).
+*
+*  DIFR    (output) DOUBLE PRECISION array,
+*                   dimension ( LDDIFR, 2 ) if ICOMPQ = 1 and
+*                   dimension ( K ) if ICOMPQ = 0.
+*          On exit, DIFR(I,1) = D(I) - DSIGMA(I+1), DIFR(K,1) is not
+*          defined and will not be referenced.
+*
+*          If ICOMPQ = 1, DIFR(1:K,2) is an array containing the
+*          normalizing factors for the right singular vector matrix.
+*
+*  LDDIFR  (input) INTEGER
+*          The leading dimension of DIFR, must be at least K.
+*
+*  DSIGMA  (input) DOUBLE PRECISION array, dimension ( K )
+*          The first K elements of this array contain the old roots
+*          of the deflated updating problem.  These are the poles
+*          of the secular equation.
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension at least 3 * K
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          > 0:  if INFO = 1, an singular value did not converge
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Ming Gu and Huan Ren, Computer Science Division, University of
+*     California at Berkeley, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE
+      PARAMETER          ( ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, IWK1, IWK2, IWK2I, IWK3, IWK3I, J
+      DOUBLE PRECISION   DIFLJ, DIFRJ, DJ, DSIGJ, DSIGJP, RHO, TEMP
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DCOPY, DLASCL, DLASD4, DLASET, XERBLA
+*     ..
+*     .. External Functions ..
+      DOUBLE PRECISION   DDOT, DLAMC3, DNRM2
+      EXTERNAL           DDOT, DLAMC3, DNRM2
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, SIGN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+*
+      IF( ( ICOMPQ.LT.0 ) .OR. ( ICOMPQ.GT.1 ) ) THEN
+         INFO = -1
+      ELSE IF( K.LT.1 ) THEN
+         INFO = -2
+      ELSE IF( LDDIFR.LT.K ) THEN
+         INFO = -9
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DLASD8', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( K.EQ.1 ) THEN
+         D( 1 ) = ABS( Z( 1 ) )
+         DIFL( 1 ) = D( 1 )
+         IF( ICOMPQ.EQ.1 ) THEN
+            DIFL( 2 ) = ONE
+            DIFR( 1, 2 ) = ONE
+         END IF
+         RETURN
+      END IF
+*
+*     Modify values DSIGMA(i) to make sure all DSIGMA(i)-DSIGMA(j) can
+*     be computed with high relative accuracy (barring over/underflow).
+*     This is a problem on machines without a guard digit in
+*     add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2).
+*     The following code replaces DSIGMA(I) by 2*DSIGMA(I)-DSIGMA(I),
+*     which on any of these machines zeros out the bottommost
+*     bit of DSIGMA(I) if it is 1; this makes the subsequent
+*     subtractions DSIGMA(I)-DSIGMA(J) unproblematic when cancellation
+*     occurs. On binary machines with a guard digit (almost all
+*     machines) it does not change DSIGMA(I) at all. On hexadecimal
+*     and decimal machines with a guard digit, it slightly
+*     changes the bottommost bits of DSIGMA(I). It does not account
+*     for hexadecimal or decimal machines without guard digits
+*     (we know of none). We use a subroutine call to compute
+*     2*DLAMBDA(I) to prevent optimizing compilers from eliminating
+*     this code.
+*
+      DO 10 I = 1, K
+         DSIGMA( I ) = DLAMC3( DSIGMA( I ), DSIGMA( I ) ) - DSIGMA( I )
+   10 CONTINUE
+*
+*     Book keeping.
+*
+      IWK1 = 1
+      IWK2 = IWK1 + K
+      IWK3 = IWK2 + K
+      IWK2I = IWK2 - 1
+      IWK3I = IWK3 - 1
+*
+*     Normalize Z.
+*
+      RHO = DNRM2( K, Z, 1 )
+      CALL DLASCL( 'G', 0, 0, RHO, ONE, K, 1, Z, K, INFO )
+      RHO = RHO*RHO
+*
+*     Initialize WORK(IWK3).
+*
+      CALL DLASET( 'A', K, 1, ONE, ONE, WORK( IWK3 ), K )
+*
+*     Compute the updated singular values, the arrays DIFL, DIFR,
+*     and the updated Z.
+*
+      DO 40 J = 1, K
+         CALL DLASD4( K, J, DSIGMA, Z, WORK( IWK1 ), RHO, D( J ),
+     $                WORK( IWK2 ), INFO )
+*
+*        If the root finder fails, the computation is terminated.
+*
+         IF( INFO.NE.0 ) THEN
+            RETURN
+         END IF
+         WORK( IWK3I+J ) = WORK( IWK3I+J )*WORK( J )*WORK( IWK2I+J )
+         DIFL( J ) = -WORK( J )
+         DIFR( J, 1 ) = -WORK( J+1 )
+         DO 20 I = 1, J - 1
+            WORK( IWK3I+I ) = WORK( IWK3I+I )*WORK( I )*
+     $                        WORK( IWK2I+I ) / ( DSIGMA( I )-
+     $                        DSIGMA( J ) ) / ( DSIGMA( I )+
+     $                        DSIGMA( J ) )
+   20    CONTINUE
+         DO 30 I = J + 1, K
+            WORK( IWK3I+I ) = WORK( IWK3I+I )*WORK( I )*
+     $                        WORK( IWK2I+I ) / ( DSIGMA( I )-
+     $                        DSIGMA( J ) ) / ( DSIGMA( I )+
+     $                        DSIGMA( J ) )
+   30    CONTINUE
+   40 CONTINUE
+*
+*     Compute updated Z.
+*
+      DO 50 I = 1, K
+         Z( I ) = SIGN( SQRT( ABS( WORK( IWK3I+I ) ) ), Z( I ) )
+   50 CONTINUE
+*
+*     Update VF and VL.
+*
+      DO 80 J = 1, K
+         DIFLJ = DIFL( J )
+         DJ = D( J )
+         DSIGJ = -DSIGMA( J )
+         IF( J.LT.K ) THEN
+            DIFRJ = -DIFR( J, 1 )
+            DSIGJP = -DSIGMA( J+1 )
+         END IF
+         WORK( J ) = -Z( J ) / DIFLJ / ( DSIGMA( J )+DJ )
+         DO 60 I = 1, J - 1
+            WORK( I ) = Z( I ) / ( DLAMC3( DSIGMA( I ), DSIGJ )-DIFLJ )
+     $                   / ( DSIGMA( I )+DJ )
+   60    CONTINUE
+         DO 70 I = J + 1, K
+            WORK( I ) = Z( I ) / ( DLAMC3( DSIGMA( I ), DSIGJP )+DIFRJ )
+     $                   / ( DSIGMA( I )+DJ )
+   70    CONTINUE
+         TEMP = DNRM2( K, WORK, 1 )
+         WORK( IWK2I+J ) = DDOT( K, WORK, 1, VF, 1 ) / TEMP
+         WORK( IWK3I+J ) = DDOT( K, WORK, 1, VL, 1 ) / TEMP
+         IF( ICOMPQ.EQ.1 ) THEN
+            DIFR( J, 2 ) = TEMP
+         END IF
+   80 CONTINUE
+*
+      CALL DCOPY( K, WORK( IWK2 ), 1, VF, 1 )
+      CALL DCOPY( K, WORK( IWK3 ), 1, VL, 1 )
+*
+      RETURN
+*
+*     End of DLASD8
+*
+      END
diff --git a/costa/native/external/lapack/dlasd9.f b/costa/native/external/lapack/dlasd9.f
new file mode 100644
index 000000000..45fe9cb60
--- /dev/null
+++ b/costa/native/external/lapack/dlasd9.f
@@ -0,0 +1,256 @@
+      SUBROUTINE DLASD9( ICOMPQ, LDU, K, D, Z, VF, VL, DIFL, DIFR,
+     $                   DSIGMA, WORK, INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
+*     Courant Institute, NAG Ltd., and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            ICOMPQ, INFO, K, LDU
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   D( * ), DIFL( * ), DIFR( LDU, * ), DSIGMA( * ),
+     $                   VF( * ), VL( * ), WORK( * ), Z( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLASD9 finds the square roots of the roots of the secular equation,
+*  as defined by the values in DSIGMA and Z.  It makes the
+*  appropriate calls to DLASD4, and stores, for each  element in D,
+*  the distance to its two nearest poles (elements in DSIGMA). It also
+*  updates the arrays VF and VL, the first and last components of all
+*  the right singular vectors of the original bidiagonal matrix.
+*
+*  DLASD9 is called from DLASD7.
+*
+*  Arguments
+*  =========
+*
+*  ICOMPQ  (input) INTEGER
+*          Specifies whether singular vectors are to be computed in
+*          factored form in the calling routine:
+*
+*             ICOMPQ = 0             Compute singular values only.
+*
+*             ICOMPQ = 1             Compute singular vector matrices in
+*                                    factored form also.
+*  K       (input) INTEGER
+*          The number of terms in the rational function to be solved by
+*          DLASD4.  K >= 1.
+*
+*  D       (output) DOUBLE PRECISION array, dimension(K)
+*          D(I) contains the updated singular values.
+*
+*  DSIGMA  (input) DOUBLE PRECISION array, dimension(K)
+*          The first K elements of this array contain the old roots
+*          of the deflated updating problem.  These are the poles
+*          of the secular equation.
+*
+*  Z       (input) DOUBLE PRECISION array, dimension (K)
+*          The first K elements of this array contain the components
+*          of the deflation-adjusted updating row vector.
+*
+*  VF      (input/output) DOUBLE PRECISION array, dimension(K)
+*          On entry, VF contains  information passed through SBEDE8.f
+*          On exit, VF contains the first K components of the first
+*          components of all right singular vectors of the bidiagonal
+*          matrix.
+*
+*  VL      (input/output) DOUBLE PRECISION array, dimension(K)
+*          On entry, VL contains  information passed through SBEDE8.f
+*          On exit, VL contains the first K components of the last
+*          components of all right singular vectors of the bidiagonal
+*          matrix.
+*
+*  DIFL    (output) DOUBLE PRECISION array, dimension (K).
+*          On exit, DIFL(I) = D(I) - DSIGMA(I).
+*
+*  DIFR    (output) DOUBLE PRECISION array,
+*                              dimension (LDU, 2) if ICOMPQ =1 and
+*                              dimension (K) if ICOMPQ = 0.
+*          On exit, DIFR(I, 1) = D(I) - DSIGMA(I+1), DIFR(K, 1) is not
+*          defined and will not be referenced.
+*
+*          If ICOMPQ = 1, DIFR(1:K, 2) is an array containing the
+*          normalizing factors for the right singular vector matrix.
+*
+*  WORK    (workspace) DOUBLE PRECISION array,
+*                                 dimension at least (3 * K)
+*          Workspace.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          > 0:  if INFO = 1, an singular value did not converge
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Ming Gu and Huan Ren, Computer Science Division, University of
+*     California at Berkeley, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE
+      PARAMETER          ( ONE = 1.0D0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, IWK1, IWK2, IWK2I, IWK3, IWK3I, J
+      DOUBLE PRECISION   DIFLJ, DIFRJ, DJ, DJP1, DSIGJ, DSIGJP, RHO,
+     $                   TEMP
+*     ..
+*     .. External Functions ..
+      DOUBLE PRECISION   DDOT, DLAMC3, DNRM2
+      EXTERNAL           DDOT, DLAMC3, DNRM2
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DCOPY, DLASCL, DLASD4, DLASET, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, SIGN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+*
+      IF( ( ICOMPQ.LT.0 ) .OR. ( ICOMPQ.GT.1 ) ) THEN
+         INFO = -1
+      ELSE IF( K.LT.1 ) THEN
+         INFO = -3
+      ELSE IF( LDU.LT.K ) THEN
+         INFO = -2
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DLASD9', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( K.EQ.1 ) THEN
+         D( 1 ) = ABS( Z( 1 ) )
+         DIFL( 1 ) = D( 1 )
+         IF( ICOMPQ.EQ.1 ) THEN
+            DIFL( 2 ) = ONE
+            DIFR( 1, 2 ) = ONE
+         END IF
+         RETURN
+      END IF
+*
+*     Modify values DSIGMA(i) to make sure all DSIGMA(i)-DSIGMA(j) can
+*     be computed with high relative accuracy (barring over/underflow).
+*     This is a problem on machines without a guard digit in
+*     add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2).
+*     The following code replaces DSIGMA(I) by 2*DSIGMA(I)-DSIGMA(I),
+*     which on any of these machines zeros out the bottommost
+*     bit of DSIGMA(I) if it is 1; this makes the subsequent
+*     subtractions DSIGMA(I)-DSIGMA(J) unproblematic when cancellation
+*     occurs. On binary machines with a guard digit (almost all
+*     machines) it does not change DSIGMA(I) at all. On hexadecimal
+*     and decimal machines with a guard digit, it slightly
+*     changes the bottommost bits of DSIGMA(I). It does not account
+*     for hexadecimal or decimal machines without guard digits
+*     (we know of none). We use a subroutine call to compute
+*     2*DLAMBDA(I) to prevent optimizing compilers from eliminating
+*     this code.
+*
+      DO 10 I = 1, K
+         DSIGMA( I ) = DLAMC3( DSIGMA( I ), DSIGMA( I ) ) - DSIGMA( I )
+   10 CONTINUE
+*
+*     Book keeping.
+*
+      IWK1 = 1
+      IWK2 = IWK1 + K
+      IWK3 = IWK2 + K
+      IWK2I = IWK2 - 1
+      IWK3I = IWK3 - 1
+*
+*     Normalize Z.
+*
+      RHO = DNRM2( K, Z, 1 )
+      CALL DLASCL( 'G', 0, 0, RHO, ONE, K, 1, Z, K, INFO )
+      RHO = RHO*RHO
+*
+*     Initialize WORK(IWK3).
+*
+      CALL DLASET( 'A', K, 1, ONE, ONE, WORK( IWK3 ), K )
+*
+*     Compute the updated singular values, the arrays DIFL, DIFR,
+*     and the updated Z.
+*
+      DO 40 J = 1, K
+         CALL DLASD4( K, J, DSIGMA, Z, WORK( IWK1 ), RHO, D( J ),
+     $                WORK( IWK2 ), INFO )
+*
+*        If the root finder fails, the computation is terminated.
+*
+         IF( INFO.NE.0 ) THEN
+            RETURN
+         END IF
+         WORK( IWK3I+J ) = WORK( IWK3I+J )*WORK( J )*WORK( IWK2I+J )
+         DIFL( J ) = -WORK( J )
+         DIFR( J, 1 ) = -WORK( J+1 )
+         DO 20 I = 1, J - 1
+            WORK( IWK3I+I ) = WORK( IWK3I+I )*WORK( I )*
+     $                        WORK( IWK2I+I ) / ( DSIGMA( I )-
+     $                        DSIGMA( J ) ) / ( DSIGMA( I )+
+     $                        DSIGMA( J ) )
+   20    CONTINUE
+         DO 30 I = J + 1, K
+            WORK( IWK3I+I ) = WORK( IWK3I+I )*WORK( I )*
+     $                        WORK( IWK2I+I ) / ( DSIGMA( I )-
+     $                        DSIGMA( J ) ) / ( DSIGMA( I )+
+     $                        DSIGMA( J ) )
+   30    CONTINUE
+   40 CONTINUE
+*
+*     Compute updated Z.
+*
+      DO 50 I = 1, K
+         Z( I ) = SIGN( SQRT( ABS( WORK( IWK3I+I ) ) ), Z( I ) )
+   50 CONTINUE
+*
+*     Update VF and VL.
+*
+      DO 80 J = 1, K
+         DIFLJ = DIFL( J )
+         DJ = D( J )
+         DSIGJ = -DSIGMA( J )
+         IF( J.LT.K ) THEN
+            DIFRJ = -DIFR( J, 1 )
+            DJP1 = D( J+1 )
+            DSIGJP = -DSIGMA( J+1 )
+         END IF
+         WORK( J ) = -Z( J ) / DIFLJ / ( DSIGMA( J )+DJ )
+         DO 60 I = 1, J - 1
+            WORK( I ) = Z( I ) / ( DLAMC3( DSIGMA( I ), DSIGJ )-DIFLJ )
+     $                   / ( DSIGMA( I )+DJ )
+   60    CONTINUE
+         DO 70 I = J + 1, K
+            WORK( I ) = Z( I ) / ( DLAMC3( DSIGMA( I ), DSIGJP )+DIFRJ )
+     $                   / ( DSIGMA( I )+DJ )
+   70    CONTINUE
+         TEMP = DNRM2( K, WORK, 1 )
+         WORK( IWK2I+J ) = DDOT( K, WORK, 1, VF, 1 ) / TEMP
+         WORK( IWK3I+J ) = DDOT( K, WORK, 1, VL, 1 ) / TEMP
+         IF( ICOMPQ.EQ.1 ) THEN
+            DIFR( J, 2 ) = TEMP
+         END IF
+   80 CONTINUE
+*
+      CALL DCOPY( K, WORK( IWK2 ), 1, VF, 1 )
+      CALL DCOPY( K, WORK( IWK3 ), 1, VL, 1 )
+*
+      RETURN
+*
+*     End of DLASD9
+*
+      END
diff --git a/costa/native/external/lapack/dlasda.f b/costa/native/external/lapack/dlasda.f
new file mode 100644
index 000000000..372516e30
--- /dev/null
+++ b/costa/native/external/lapack/dlasda.f
@@ -0,0 +1,391 @@
+      SUBROUTINE DLASDA( ICOMPQ, SMLSIZ, N, SQRE, D, E, U, LDU, VT, K,
+     $                   DIFL, DIFR, Z, POLES, GIVPTR, GIVCOL, LDGCOL,
+     $                   PERM, GIVNUM, C, S, WORK, IWORK, INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            ICOMPQ, INFO, LDGCOL, LDU, N, SMLSIZ, SQRE
+*     ..
+*     .. Array Arguments ..
+      INTEGER            GIVCOL( LDGCOL, * ), GIVPTR( * ), IWORK( * ),
+     $                   K( * ), PERM( LDGCOL, * )
+      DOUBLE PRECISION   C( * ), D( * ), DIFL( LDU, * ), DIFR( LDU, * ),
+     $                   E( * ), GIVNUM( LDU, * ), POLES( LDU, * ),
+     $                   S( * ), U( LDU, * ), VT( LDU, * ), WORK( * ),
+     $                   Z( LDU, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  Using a divide and conquer approach, DLASDA computes the singular
+*  value decomposition (SVD) of a real upper bidiagonal N-by-M matrix
+*  B with diagonal D and offdiagonal E, where M = N + SQRE. The
+*  algorithm computes the singular values in the SVD B = U * S * VT.
+*  The orthogonal matrices U and VT are optionally computed in
+*  compact form.
+*
+*  A related subroutine, DLASD0, computes the singular values and
+*  the singular vectors in explicit form.
+*
+*  Arguments
+*  =========
+*
+*  ICOMPQ (input) INTEGER
+*         Specifies whether singular vectors are to be computed
+*         in compact form, as follows
+*         = 0: Compute singular values only.
+*         = 1: Compute singular vectors of upper bidiagonal
+*              matrix in compact form.
+*
+*  SMLSIZ (input) INTEGER
+*         The maximum size of the subproblems at the bottom of the
+*         computation tree.
+*
+*  N      (input) INTEGER
+*         The row dimension of the upper bidiagonal matrix. This is
+*         also the dimension of the main diagonal array D.
+*
+*  SQRE   (input) INTEGER
+*         Specifies the column dimension of the bidiagonal matrix.
+*         = 0: The bidiagonal matrix has column dimension M = N;
+*         = 1: The bidiagonal matrix has column dimension M = N + 1.
+*
+*  D      (input/output) DOUBLE PRECISION array, dimension ( N )
+*         On entry D contains the main diagonal of the bidiagonal
+*         matrix. On exit D, if INFO = 0, contains its singular values.
+*
+*  E      (input) DOUBLE PRECISION array, dimension ( M-1 )
+*         Contains the subdiagonal entries of the bidiagonal matrix.
+*         On exit, E has been destroyed.
+*
+*  U      (output) DOUBLE PRECISION array,
+*         dimension ( LDU, SMLSIZ ) if ICOMPQ = 1, and not referenced
+*         if ICOMPQ = 0. If ICOMPQ = 1, on exit, U contains the left
+*         singular vector matrices of all subproblems at the bottom
+*         level.
+*
+*  LDU    (input) INTEGER, LDU = > N.
+*         The leading dimension of arrays U, VT, DIFL, DIFR, POLES,
+*         GIVNUM, and Z.
+*
+*  VT     (output) DOUBLE PRECISION array,
+*         dimension ( LDU, SMLSIZ+1 ) if ICOMPQ = 1, and not referenced
+*         if ICOMPQ = 0. If ICOMPQ = 1, on exit, VT' contains the right
+*         singular vector matrices of all subproblems at the bottom
+*         level.
+*
+*  K      (output) INTEGER array,
+*         dimension ( N ) if ICOMPQ = 1 and dimension 1 if ICOMPQ = 0.
+*         If ICOMPQ = 1, on exit, K(I) is the dimension of the I-th
+*         secular equation on the computation tree.
+*
+*  DIFL   (output) DOUBLE PRECISION array, dimension ( LDU, NLVL ),
+*         where NLVL = floor(log_2 (N/SMLSIZ))).
+*
+*  DIFR   (output) DOUBLE PRECISION array,
+*                  dimension ( LDU, 2 * NLVL ) if ICOMPQ = 1 and
+*                  dimension ( N ) if ICOMPQ = 0.
+*         If ICOMPQ = 1, on exit, DIFL(1:N, I) and DIFR(1:N, 2 * I - 1)
+*         record distances between singular values on the I-th
+*         level and singular values on the (I -1)-th level, and
+*         DIFR(1:N, 2 * I ) contains the normalizing factors for
+*         the right singular vector matrix. See DLASD8 for details.
+*
+*  Z      (output) DOUBLE PRECISION array,
+*                  dimension ( LDU, NLVL ) if ICOMPQ = 1 and
+*                  dimension ( N ) if ICOMPQ = 0.
+*         The first K elements of Z(1, I) contain the components of
+*         the deflation-adjusted updating row vector for subproblems
+*         on the I-th level.
+*
+*  POLES  (output) DOUBLE PRECISION array,
+*         dimension ( LDU, 2 * NLVL ) if ICOMPQ = 1, and not referenced
+*         if ICOMPQ = 0. If ICOMPQ = 1, on exit, POLES(1, 2*I - 1) and
+*         POLES(1, 2*I) contain  the new and old singular values
+*         involved in the secular equations on the I-th level.
+*
+*  GIVPTR (output) INTEGER array,
+*         dimension ( N ) if ICOMPQ = 1, and not referenced if
+*         ICOMPQ = 0. If ICOMPQ = 1, on exit, GIVPTR( I ) records
+*         the number of Givens rotations performed on the I-th
+*         problem on the computation tree.
+*
+*  GIVCOL (output) INTEGER array,
+*         dimension ( LDGCOL, 2 * NLVL ) if ICOMPQ = 1, and not
+*         referenced if ICOMPQ = 0. If ICOMPQ = 1, on exit, for each I,
+*         GIVCOL(1, 2 *I - 1) and GIVCOL(1, 2 *I) record the locations
+*         of Givens rotations performed on the I-th level on the
+*         computation tree.
+*
+*  LDGCOL (input) INTEGER, LDGCOL = > N.
+*         The leading dimension of arrays GIVCOL and PERM.
+*
+*  PERM   (output) INTEGER array,
+*         dimension ( LDGCOL, NLVL ) if ICOMPQ = 1, and not referenced
+*         if ICOMPQ = 0. If ICOMPQ = 1, on exit, PERM(1, I) records
+*         permutations done on the I-th level of the computation tree.
+*
+*  GIVNUM (output) DOUBLE PRECISION array,
+*         dimension ( LDU,  2 * NLVL ) if ICOMPQ = 1, and not
+*         referenced if ICOMPQ = 0. If ICOMPQ = 1, on exit, for each I,
+*         GIVNUM(1, 2 *I - 1) and GIVNUM(1, 2 *I) record the C- and S-
+*         values of Givens rotations performed on the I-th level on
+*         the computation tree.
+*
+*  C      (output) DOUBLE PRECISION array,
+*         dimension ( N ) if ICOMPQ = 1, and dimension 1 if ICOMPQ = 0.
+*         If ICOMPQ = 1 and the I-th subproblem is not square, on exit,
+*         C( I ) contains the C-value of a Givens rotation related to
+*         the right null space of the I-th subproblem.
+*
+*  S      (output) DOUBLE PRECISION array, dimension ( N ) if
+*         ICOMPQ = 1, and dimension 1 if ICOMPQ = 0. If ICOMPQ = 1
+*         and the I-th subproblem is not square, on exit, S( I )
+*         contains the S-value of a Givens rotation related to
+*         the right null space of the I-th subproblem.
+*
+*  WORK   (workspace) DOUBLE PRECISION array, dimension
+*         (6 * N + (SMLSIZ + 1)*(SMLSIZ + 1)).
+*
+*  IWORK  (workspace) INTEGER array.
+*         Dimension must be at least (7 * N).
+*
+*  INFO   (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          > 0:  if INFO = 1, an singular value did not converge
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Ming Gu and Huan Ren, Computer Science Division, University of
+*     California at Berkeley, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, I1, IC, IDXQ, IDXQI, IM1, INODE, ITEMP, IWK,
+     $                   J, LF, LL, LVL, LVL2, M, NCC, ND, NDB1, NDIML,
+     $                   NDIMR, NL, NLF, NLP1, NLVL, NR, NRF, NRP1, NRU,
+     $                   NWORK1, NWORK2, SMLSZP, SQREI, VF, VFI, VL, VLI
+      DOUBLE PRECISION   ALPHA, BETA
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DCOPY, DLASD6, DLASDQ, DLASDT, DLASET, XERBLA
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+*
+      IF( ( ICOMPQ.LT.0 ) .OR. ( ICOMPQ.GT.1 ) ) THEN
+         INFO = -1
+      ELSE IF( SMLSIZ.LT.3 ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( ( SQRE.LT.0 ) .OR. ( SQRE.GT.1 ) ) THEN
+         INFO = -4
+      ELSE IF( LDU.LT.( N+SQRE ) ) THEN
+         INFO = -8
+      ELSE IF( LDGCOL.LT.N ) THEN
+         INFO = -17
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DLASDA', -INFO )
+         RETURN
+      END IF
+*
+      M = N + SQRE
+*
+*     If the input matrix is too small, call DLASDQ to find the SVD.
+*
+      IF( N.LE.SMLSIZ ) THEN
+         IF( ICOMPQ.EQ.0 ) THEN
+            CALL DLASDQ( 'U', SQRE, N, 0, 0, 0, D, E, VT, LDU, U, LDU,
+     $                   U, LDU, WORK, INFO )
+         ELSE
+            CALL DLASDQ( 'U', SQRE, N, M, N, 0, D, E, VT, LDU, U, LDU,
+     $                   U, LDU, WORK, INFO )
+         END IF
+         RETURN
+      END IF
+*
+*     Book-keeping and  set up the computation tree.
+*
+      INODE = 1
+      NDIML = INODE + N
+      NDIMR = NDIML + N
+      IDXQ = NDIMR + N
+      IWK = IDXQ + N
+*
+      NCC = 0
+      NRU = 0
+*
+      SMLSZP = SMLSIZ + 1
+      VF = 1
+      VL = VF + M
+      NWORK1 = VL + M
+      NWORK2 = NWORK1 + SMLSZP*SMLSZP
+*
+      CALL DLASDT( N, NLVL, ND, IWORK( INODE ), IWORK( NDIML ),
+     $             IWORK( NDIMR ), SMLSIZ )
+*
+*     for the nodes on bottom level of the tree, solve
+*     their subproblems by DLASDQ.
+*
+      NDB1 = ( ND+1 ) / 2
+      DO 30 I = NDB1, ND
+*
+*        IC : center row of each node
+*        NL : number of rows of left  subproblem
+*        NR : number of rows of right subproblem
+*        NLF: starting row of the left   subproblem
+*        NRF: starting row of the right  subproblem
+*
+         I1 = I - 1
+         IC = IWORK( INODE+I1 )
+         NL = IWORK( NDIML+I1 )
+         NLP1 = NL + 1
+         NR = IWORK( NDIMR+I1 )
+         NLF = IC - NL
+         NRF = IC + 1
+         IDXQI = IDXQ + NLF - 2
+         VFI = VF + NLF - 1
+         VLI = VL + NLF - 1
+         SQREI = 1
+         IF( ICOMPQ.EQ.0 ) THEN
+            CALL DLASET( 'A', NLP1, NLP1, ZERO, ONE, WORK( NWORK1 ),
+     $                   SMLSZP )
+            CALL DLASDQ( 'U', SQREI, NL, NLP1, NRU, NCC, D( NLF ),
+     $                   E( NLF ), WORK( NWORK1 ), SMLSZP,
+     $                   WORK( NWORK2 ), NL, WORK( NWORK2 ), NL,
+     $                   WORK( NWORK2 ), INFO )
+            ITEMP = NWORK1 + NL*SMLSZP
+            CALL DCOPY( NLP1, WORK( NWORK1 ), 1, WORK( VFI ), 1 )
+            CALL DCOPY( NLP1, WORK( ITEMP ), 1, WORK( VLI ), 1 )
+         ELSE
+            CALL DLASET( 'A', NL, NL, ZERO, ONE, U( NLF, 1 ), LDU )
+            CALL DLASET( 'A', NLP1, NLP1, ZERO, ONE, VT( NLF, 1 ), LDU )
+            CALL DLASDQ( 'U', SQREI, NL, NLP1, NL, NCC, D( NLF ),
+     $                   E( NLF ), VT( NLF, 1 ), LDU, U( NLF, 1 ), LDU,
+     $                   U( NLF, 1 ), LDU, WORK( NWORK1 ), INFO )
+            CALL DCOPY( NLP1, VT( NLF, 1 ), 1, WORK( VFI ), 1 )
+            CALL DCOPY( NLP1, VT( NLF, NLP1 ), 1, WORK( VLI ), 1 )
+         END IF
+         IF( INFO.NE.0 ) THEN
+            RETURN
+         END IF
+         DO 10 J = 1, NL
+            IWORK( IDXQI+J ) = J
+   10    CONTINUE
+         IF( ( I.EQ.ND ) .AND. ( SQRE.EQ.0 ) ) THEN
+            SQREI = 0
+         ELSE
+            SQREI = 1
+         END IF
+         IDXQI = IDXQI + NLP1
+         VFI = VFI + NLP1
+         VLI = VLI + NLP1
+         NRP1 = NR + SQREI
+         IF( ICOMPQ.EQ.0 ) THEN
+            CALL DLASET( 'A', NRP1, NRP1, ZERO, ONE, WORK( NWORK1 ),
+     $                   SMLSZP )
+            CALL DLASDQ( 'U', SQREI, NR, NRP1, NRU, NCC, D( NRF ),
+     $                   E( NRF ), WORK( NWORK1 ), SMLSZP,
+     $                   WORK( NWORK2 ), NR, WORK( NWORK2 ), NR,
+     $                   WORK( NWORK2 ), INFO )
+            ITEMP = NWORK1 + ( NRP1-1 )*SMLSZP
+            CALL DCOPY( NRP1, WORK( NWORK1 ), 1, WORK( VFI ), 1 )
+            CALL DCOPY( NRP1, WORK( ITEMP ), 1, WORK( VLI ), 1 )
+         ELSE
+            CALL DLASET( 'A', NR, NR, ZERO, ONE, U( NRF, 1 ), LDU )
+            CALL DLASET( 'A', NRP1, NRP1, ZERO, ONE, VT( NRF, 1 ), LDU )
+            CALL DLASDQ( 'U', SQREI, NR, NRP1, NR, NCC, D( NRF ),
+     $                   E( NRF ), VT( NRF, 1 ), LDU, U( NRF, 1 ), LDU,
+     $                   U( NRF, 1 ), LDU, WORK( NWORK1 ), INFO )
+            CALL DCOPY( NRP1, VT( NRF, 1 ), 1, WORK( VFI ), 1 )
+            CALL DCOPY( NRP1, VT( NRF, NRP1 ), 1, WORK( VLI ), 1 )
+         END IF
+         IF( INFO.NE.0 ) THEN
+            RETURN
+         END IF
+         DO 20 J = 1, NR
+            IWORK( IDXQI+J ) = J
+   20    CONTINUE
+   30 CONTINUE
+*
+*     Now conquer each subproblem bottom-up.
+*
+      J = 2**NLVL
+      DO 50 LVL = NLVL, 1, -1
+         LVL2 = LVL*2 - 1
+*
+*        Find the first node LF and last node LL on
+*        the current level LVL.
+*
+         IF( LVL.EQ.1 ) THEN
+            LF = 1
+            LL = 1
+         ELSE
+            LF = 2**( LVL-1 )
+            LL = 2*LF - 1
+         END IF
+         DO 40 I = LF, LL
+            IM1 = I - 1
+            IC = IWORK( INODE+IM1 )
+            NL = IWORK( NDIML+IM1 )
+            NR = IWORK( NDIMR+IM1 )
+            NLF = IC - NL
+            NRF = IC + 1
+            IF( I.EQ.LL ) THEN
+               SQREI = SQRE
+            ELSE
+               SQREI = 1
+            END IF
+            VFI = VF + NLF - 1
+            VLI = VL + NLF - 1
+            IDXQI = IDXQ + NLF - 1
+            ALPHA = D( IC )
+            BETA = E( IC )
+            IF( ICOMPQ.EQ.0 ) THEN
+               CALL DLASD6( ICOMPQ, NL, NR, SQREI, D( NLF ),
+     $                      WORK( VFI ), WORK( VLI ), ALPHA, BETA,
+     $                      IWORK( IDXQI ), PERM, GIVPTR( 1 ), GIVCOL,
+     $                      LDGCOL, GIVNUM, LDU, POLES, DIFL, DIFR, Z,
+     $                      K( 1 ), C( 1 ), S( 1 ), WORK( NWORK1 ),
+     $                      IWORK( IWK ), INFO )
+            ELSE
+               J = J - 1
+               CALL DLASD6( ICOMPQ, NL, NR, SQREI, D( NLF ),
+     $                      WORK( VFI ), WORK( VLI ), ALPHA, BETA,
+     $                      IWORK( IDXQI ), PERM( NLF, LVL ),
+     $                      GIVPTR( J ), GIVCOL( NLF, LVL2 ), LDGCOL,
+     $                      GIVNUM( NLF, LVL2 ), LDU,
+     $                      POLES( NLF, LVL2 ), DIFL( NLF, LVL ),
+     $                      DIFR( NLF, LVL2 ), Z( NLF, LVL ), K( J ),
+     $                      C( J ), S( J ), WORK( NWORK1 ),
+     $                      IWORK( IWK ), INFO )
+            END IF
+            IF( INFO.NE.0 ) THEN
+               RETURN
+            END IF
+   40    CONTINUE
+   50 CONTINUE
+*
+      RETURN
+*
+*     End of DLASDA
+*
+      END
diff --git a/costa/native/external/lapack/dlasdq.f b/costa/native/external/lapack/dlasdq.f
new file mode 100644
index 000000000..9bc3134f3
--- /dev/null
+++ b/costa/native/external/lapack/dlasdq.f
@@ -0,0 +1,317 @@
+      SUBROUTINE DLASDQ( UPLO, SQRE, N, NCVT, NRU, NCC, D, E, VT, LDVT,
+     $                   U, LDU, C, LDC, WORK, INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDC, LDU, LDVT, N, NCC, NCVT, NRU, SQRE
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   C( LDC, * ), D( * ), E( * ), U( LDU, * ),
+     $                   VT( LDVT, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLASDQ computes the singular value decomposition (SVD) of a real
+*  (upper or lower) bidiagonal matrix with diagonal D and offdiagonal
+*  E, accumulating the transformations if desired. Letting B denote
+*  the input bidiagonal matrix, the algorithm computes orthogonal
+*  matrices Q and P such that B = Q * S * P' (P' denotes the transpose
+*  of P). The singular values S are overwritten on D.
+*
+*  The input matrix U  is changed to U  * Q  if desired.
+*  The input matrix VT is changed to P' * VT if desired.
+*  The input matrix C  is changed to Q' * C  if desired.
+*
+*  See "Computing  Small Singular Values of Bidiagonal Matrices With
+*  Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan,
+*  LAPACK Working Note #3, for a detailed description of the algorithm.
+*
+*  Arguments
+*  =========
+*
+*  UPLO  (input) CHARACTER*1
+*        On entry, UPLO specifies whether the input bidiagonal matrix
+*        is upper or lower bidiagonal, and wether it is square are
+*        not.
+*           UPLO = 'U' or 'u'   B is upper bidiagonal.
+*           UPLO = 'L' or 'l'   B is lower bidiagonal.
+*
+*  SQRE  (input) INTEGER
+*        = 0: then the input matrix is N-by-N.
+*        = 1: then the input matrix is N-by-(N+1) if UPLU = 'U' and
+*             (N+1)-by-N if UPLU = 'L'.
+*
+*        The bidiagonal matrix has
+*        N = NL + NR + 1 rows and
+*        M = N + SQRE >= N columns.
+*
+*  N     (input) INTEGER
+*        On entry, N specifies the number of rows and columns
+*        in the matrix. N must be at least 0.
+*
+*  NCVT  (input) INTEGER
+*        On entry, NCVT specifies the number of columns of
+*        the matrix VT. NCVT must be at least 0.
+*
+*  NRU   (input) INTEGER
+*        On entry, NRU specifies the number of rows of
+*        the matrix U. NRU must be at least 0.
+*
+*  NCC   (input) INTEGER
+*        On entry, NCC specifies the number of columns of
+*        the matrix C. NCC must be at least 0.
+*
+*  D     (input/output) DOUBLE PRECISION array, dimension (N)
+*        On entry, D contains the diagonal entries of the
+*        bidiagonal matrix whose SVD is desired. On normal exit,
+*        D contains the singular values in ascending order.
+*
+*  E     (input/output) DOUBLE PRECISION array.
+*        dimension is (N-1) if SQRE = 0 and N if SQRE = 1.
+*        On entry, the entries of E contain the offdiagonal entries
+*        of the bidiagonal matrix whose SVD is desired. On normal
+*        exit, E will contain 0. If the algorithm does not converge,
+*        D and E will contain the diagonal and superdiagonal entries
+*        of a bidiagonal matrix orthogonally equivalent to the one
+*        given as input.
+*
+*  VT    (input/output) DOUBLE PRECISION array, dimension (LDVT, NCVT)
+*        On entry, contains a matrix which on exit has been
+*        premultiplied by P', dimension N-by-NCVT if SQRE = 0
+*        and (N+1)-by-NCVT if SQRE = 1 (not referenced if NCVT=0).
+*
+*  LDVT  (input) INTEGER
+*        On entry, LDVT specifies the leading dimension of VT as
+*        declared in the calling (sub) program. LDVT must be at
+*        least 1. If NCVT is nonzero LDVT must also be at least N.
+*
+*  U     (input/output) DOUBLE PRECISION array, dimension (LDU, N)
+*        On entry, contains a  matrix which on exit has been
+*        postmultiplied by Q, dimension NRU-by-N if SQRE = 0
+*        and NRU-by-(N+1) if SQRE = 1 (not referenced if NRU=0).
+*
+*  LDU   (input) INTEGER
+*        On entry, LDU  specifies the leading dimension of U as
+*        declared in the calling (sub) program. LDU must be at
+*        least max( 1, NRU ) .
+*
+*  C     (input/output) DOUBLE PRECISION array, dimension (LDC, NCC)
+*        On entry, contains an N-by-NCC matrix which on exit
+*        has been premultiplied by Q'  dimension N-by-NCC if SQRE = 0
+*        and (N+1)-by-NCC if SQRE = 1 (not referenced if NCC=0).
+*
+*  LDC   (input) INTEGER
+*        On entry, LDC  specifies the leading dimension of C as
+*        declared in the calling (sub) program. LDC must be at
+*        least 1. If NCC is nonzero, LDC must also be at least N.
+*
+*  WORK  (workspace) DOUBLE PRECISION array, dimension (4*N)
+*        Workspace. Only referenced if one of NCVT, NRU, or NCC is
+*        nonzero, and if N is at least 2.
+*
+*  INFO  (output) INTEGER
+*        On exit, a value of 0 indicates a successful exit.
+*        If INFO < 0, argument number -INFO is illegal.
+*        If INFO > 0, the algorithm did not converge, and INFO
+*        specifies how many superdiagonals did not converge.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Ming Gu and Huan Ren, Computer Science Division, University of
+*     California at Berkeley, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO
+      PARAMETER          ( ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            ROTATE
+      INTEGER            I, ISUB, IUPLO, J, NP1, SQRE1
+      DOUBLE PRECISION   CS, R, SMIN, SN
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DBDSQR, DLARTG, DLASR, DSWAP, XERBLA
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IUPLO = 0
+      IF( LSAME( UPLO, 'U' ) )
+     $   IUPLO = 1
+      IF( LSAME( UPLO, 'L' ) )
+     $   IUPLO = 2
+      IF( IUPLO.EQ.0 ) THEN
+         INFO = -1
+      ELSE IF( ( SQRE.LT.0 ) .OR. ( SQRE.GT.1 ) ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( NCVT.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( NRU.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( NCC.LT.0 ) THEN
+         INFO = -6
+      ELSE IF( ( NCVT.EQ.0 .AND. LDVT.LT.1 ) .OR.
+     $         ( NCVT.GT.0 .AND. LDVT.LT.MAX( 1, N ) ) ) THEN
+         INFO = -10
+      ELSE IF( LDU.LT.MAX( 1, NRU ) ) THEN
+         INFO = -12
+      ELSE IF( ( NCC.EQ.0 .AND. LDC.LT.1 ) .OR.
+     $         ( NCC.GT.0 .AND. LDC.LT.MAX( 1, N ) ) ) THEN
+         INFO = -14
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DLASDQ', -INFO )
+         RETURN
+      END IF
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     ROTATE is true if any singular vectors desired, false otherwise
+*
+      ROTATE = ( NCVT.GT.0 ) .OR. ( NRU.GT.0 ) .OR. ( NCC.GT.0 )
+      NP1 = N + 1
+      SQRE1 = SQRE
+*
+*     If matrix non-square upper bidiagonal, rotate to be lower
+*     bidiagonal.  The rotations are on the right.
+*
+      IF( ( IUPLO.EQ.1 ) .AND. ( SQRE1.EQ.1 ) ) THEN
+         DO 10 I = 1, N - 1
+            CALL DLARTG( D( I ), E( I ), CS, SN, R )
+            D( I ) = R
+            E( I ) = SN*D( I+1 )
+            D( I+1 ) = CS*D( I+1 )
+            IF( ROTATE ) THEN
+               WORK( I ) = CS
+               WORK( N+I ) = SN
+            END IF
+   10    CONTINUE
+         CALL DLARTG( D( N ), E( N ), CS, SN, R )
+         D( N ) = R
+         E( N ) = ZERO
+         IF( ROTATE ) THEN
+            WORK( N ) = CS
+            WORK( N+N ) = SN
+         END IF
+         IUPLO = 2
+         SQRE1 = 0
+*
+*        Update singular vectors if desired.
+*
+         IF( NCVT.GT.0 )
+     $      CALL DLASR( 'L', 'V', 'F', NP1, NCVT, WORK( 1 ),
+     $                  WORK( NP1 ), VT, LDVT )
+      END IF
+*
+*     If matrix lower bidiagonal, rotate to be upper bidiagonal
+*     by applying Givens rotations on the left.
+*
+      IF( IUPLO.EQ.2 ) THEN
+         DO 20 I = 1, N - 1
+            CALL DLARTG( D( I ), E( I ), CS, SN, R )
+            D( I ) = R
+            E( I ) = SN*D( I+1 )
+            D( I+1 ) = CS*D( I+1 )
+            IF( ROTATE ) THEN
+               WORK( I ) = CS
+               WORK( N+I ) = SN
+            END IF
+   20    CONTINUE
+*
+*        If matrix (N+1)-by-N lower bidiagonal, one additional
+*        rotation is needed.
+*
+         IF( SQRE1.EQ.1 ) THEN
+            CALL DLARTG( D( N ), E( N ), CS, SN, R )
+            D( N ) = R
+            IF( ROTATE ) THEN
+               WORK( N ) = CS
+               WORK( N+N ) = SN
+            END IF
+         END IF
+*
+*        Update singular vectors if desired.
+*
+         IF( NRU.GT.0 ) THEN
+            IF( SQRE1.EQ.0 ) THEN
+               CALL DLASR( 'R', 'V', 'F', NRU, N, WORK( 1 ),
+     $                     WORK( NP1 ), U, LDU )
+            ELSE
+               CALL DLASR( 'R', 'V', 'F', NRU, NP1, WORK( 1 ),
+     $                     WORK( NP1 ), U, LDU )
+            END IF
+         END IF
+         IF( NCC.GT.0 ) THEN
+            IF( SQRE1.EQ.0 ) THEN
+               CALL DLASR( 'L', 'V', 'F', N, NCC, WORK( 1 ),
+     $                     WORK( NP1 ), C, LDC )
+            ELSE
+               CALL DLASR( 'L', 'V', 'F', NP1, NCC, WORK( 1 ),
+     $                     WORK( NP1 ), C, LDC )
+            END IF
+         END IF
+      END IF
+*
+*     Call DBDSQR to compute the SVD of the reduced real
+*     N-by-N upper bidiagonal matrix.
+*
+      CALL DBDSQR( 'U', N, NCVT, NRU, NCC, D, E, VT, LDVT, U, LDU, C,
+     $             LDC, WORK, INFO )
+*
+*     Sort the singular values into ascending order (insertion sort on
+*     singular values, but only one transposition per singular vector)
+*
+      DO 40 I = 1, N
+*
+*        Scan for smallest D(I).
+*
+         ISUB = I
+         SMIN = D( I )
+         DO 30 J = I + 1, N
+            IF( D( J ).LT.SMIN ) THEN
+               ISUB = J
+               SMIN = D( J )
+            END IF
+   30    CONTINUE
+         IF( ISUB.NE.I ) THEN
+*
+*           Swap singular values and vectors.
+*
+            D( ISUB ) = D( I )
+            D( I ) = SMIN
+            IF( NCVT.GT.0 )
+     $         CALL DSWAP( NCVT, VT( ISUB, 1 ), LDVT, VT( I, 1 ), LDVT )
+            IF( NRU.GT.0 )
+     $         CALL DSWAP( NRU, U( 1, ISUB ), 1, U( 1, I ), 1 )
+            IF( NCC.GT.0 )
+     $         CALL DSWAP( NCC, C( ISUB, 1 ), LDC, C( I, 1 ), LDC )
+         END IF
+   40 CONTINUE
+*
+      RETURN
+*
+*     End of DLASDQ
+*
+      END
diff --git a/costa/native/external/lapack/dlasdt.f b/costa/native/external/lapack/dlasdt.f
new file mode 100644
index 000000000..bb7842d07
--- /dev/null
+++ b/costa/native/external/lapack/dlasdt.f
@@ -0,0 +1,106 @@
+      SUBROUTINE DLASDT( N, LVL, ND, INODE, NDIML, NDIMR, MSUB )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            LVL, MSUB, N, ND
+*     ..
+*     .. Array Arguments ..
+      INTEGER            INODE( * ), NDIML( * ), NDIMR( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLASDT creates a tree of subproblems for bidiagonal divide and
+*  conquer.
+*
+*  Arguments
+*  =========
+*
+*   N      (input) INTEGER
+*          On entry, the number of diagonal elements of the
+*          bidiagonal matrix.
+*
+*   LVL    (output) INTEGER
+*          On exit, the number of levels on the computation tree.
+*
+*   ND     (output) INTEGER
+*          On exit, the number of nodes on the tree.
+*
+*   INODE  (output) INTEGER array, dimension ( N )
+*          On exit, centers of subproblems.
+*
+*   NDIML  (output) INTEGER array, dimension ( N )
+*          On exit, row dimensions of left children.
+*
+*   NDIMR  (output) INTEGER array, dimension ( N )
+*          On exit, row dimensions of right children.
+*
+*   MSUB   (input) INTEGER.
+*          On entry, the maximum row dimension each subproblem at the
+*          bottom of the tree can be of.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Ming Gu and Huan Ren, Computer Science Division, University of
+*     California at Berkeley, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   TWO
+      PARAMETER          ( TWO = 2.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, IL, IR, LLST, MAXN, NCRNT, NLVL
+      DOUBLE PRECISION   TEMP
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          DBLE, INT, LOG, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Find the number of levels on the tree.
+*
+      MAXN = MAX( 1, N )
+      TEMP = LOG( DBLE( MAXN ) / DBLE( MSUB+1 ) ) / LOG( TWO )
+      LVL = INT( TEMP ) + 1
+*
+      I = N / 2
+      INODE( 1 ) = I + 1
+      NDIML( 1 ) = I
+      NDIMR( 1 ) = N - I - 1
+      IL = 0
+      IR = 1
+      LLST = 1
+      DO 20 NLVL = 1, LVL - 1
+*
+*        Constructing the tree at (NLVL+1)-st level. The number of
+*        nodes created on this level is LLST * 2.
+*
+         DO 10 I = 0, LLST - 1
+            IL = IL + 2
+            IR = IR + 2
+            NCRNT = LLST + I
+            NDIML( IL ) = NDIML( NCRNT ) / 2
+            NDIMR( IL ) = NDIML( NCRNT ) - NDIML( IL ) - 1
+            INODE( IL ) = INODE( NCRNT ) - NDIMR( IL ) - 1
+            NDIML( IR ) = NDIMR( NCRNT ) / 2
+            NDIMR( IR ) = NDIMR( NCRNT ) - NDIML( IR ) - 1
+            INODE( IR ) = INODE( NCRNT ) + NDIML( IR ) + 1
+   10    CONTINUE
+         LLST = LLST*2
+   20 CONTINUE
+      ND = LLST*2 - 1
+*
+      RETURN
+*
+*     End of DLASDT
+*
+      END
diff --git a/costa/native/external/lapack/dlaset.f b/costa/native/external/lapack/dlaset.f
new file mode 100644
index 000000000..14af572d4
--- /dev/null
+++ b/costa/native/external/lapack/dlaset.f
@@ -0,0 +1,115 @@
+      SUBROUTINE DLASET( UPLO, M, N, ALPHA, BETA, A, LDA )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            LDA, M, N
+      DOUBLE PRECISION   ALPHA, BETA
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLASET initializes an m-by-n matrix A to BETA on the diagonal and
+*  ALPHA on the offdiagonals.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies the part of the matrix A to be set.
+*          = 'U':      Upper triangular part is set; the strictly lower
+*                      triangular part of A is not changed.
+*          = 'L':      Lower triangular part is set; the strictly upper
+*                      triangular part of A is not changed.
+*          Otherwise:  All of the matrix A is set.
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  ALPHA   (input) DOUBLE PRECISION
+*          The constant to which the offdiagonal elements are to be set.
+*
+*  BETA    (input) DOUBLE PRECISION
+*          The constant to which the diagonal elements are to be set.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+*          On exit, the leading m-by-n submatrix of A is set as follows:
+*
+*          if UPLO = 'U', A(i,j) = ALPHA, 1<=i<=j-1, 1<=j<=n,
+*          if UPLO = 'L', A(i,j) = ALPHA, j+1<=i<=m, 1<=j<=n,
+*          otherwise,     A(i,j) = ALPHA, 1<=i<=m, 1<=j<=n, i.ne.j,
+*
+*          and, for all UPLO, A(i,i) = BETA, 1<=i<=min(m,n).
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+* =====================================================================
+*
+*     .. Local Scalars ..
+      INTEGER            I, J
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MIN
+*     ..
+*     .. Executable Statements ..
+*
+      IF( LSAME( UPLO, 'U' ) ) THEN
+*
+*        Set the strictly upper triangular or trapezoidal part of the
+*        array to ALPHA.
+*
+         DO 20 J = 2, N
+            DO 10 I = 1, MIN( J-1, M )
+               A( I, J ) = ALPHA
+   10       CONTINUE
+   20    CONTINUE
+*
+      ELSE IF( LSAME( UPLO, 'L' ) ) THEN
+*
+*        Set the strictly lower triangular or trapezoidal part of the
+*        array to ALPHA.
+*
+         DO 40 J = 1, MIN( M, N )
+            DO 30 I = J + 1, M
+               A( I, J ) = ALPHA
+   30       CONTINUE
+   40    CONTINUE
+*
+      ELSE
+*
+*        Set the leading m-by-n submatrix to ALPHA.
+*
+         DO 60 J = 1, N
+            DO 50 I = 1, M
+               A( I, J ) = ALPHA
+   50       CONTINUE
+   60    CONTINUE
+      END IF
+*
+*     Set the first min(M,N) diagonal elements to BETA.
+*
+      DO 70 I = 1, MIN( M, N )
+         A( I, I ) = BETA
+   70 CONTINUE
+*
+      RETURN
+*
+*     End of DLASET
+*
+      END
diff --git a/costa/native/external/lapack/dlasq1.f b/costa/native/external/lapack/dlasq1.f
new file mode 100644
index 000000000..0bbe3f0ed
--- /dev/null
+++ b/costa/native/external/lapack/dlasq1.f
@@ -0,0 +1,149 @@
+      SUBROUTINE DLASQ1( N, D, E, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1999 
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   D( * ), E( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLASQ1 computes the singular values of a real N-by-N bidiagonal
+*  matrix with diagonal D and off-diagonal E. The singular values
+*  are computed to high relative accuracy, in the absence of
+*  denormalization, underflow and overflow. The algorithm was first
+*  presented in
+*
+*  "Accurate singular values and differential qd algorithms" by K. V.
+*  Fernando and B. N. Parlett, Numer. Math., Vol-67, No. 2, pp. 191-230,
+*  1994,
+*
+*  and the present implementation is described in "An implementation of
+*  the dqds Algorithm (Positive Case)", LAPACK Working Note.
+*
+*  Arguments
+*  =========
+*
+*  N     (input) INTEGER
+*        The number of rows and columns in the matrix. N >= 0.
+*
+*  D     (input/output) DOUBLE PRECISION array, dimension (N)
+*        On entry, D contains the diagonal elements of the
+*        bidiagonal matrix whose SVD is desired. On normal exit,
+*        D contains the singular values in decreasing order.
+*
+*  E     (input/output) DOUBLE PRECISION array, dimension (N)
+*        On entry, elements E(1:N-1) contain the off-diagonal elements
+*        of the bidiagonal matrix whose SVD is desired.
+*        On exit, E is overwritten.
+*
+*  WORK  (workspace) DOUBLE PRECISION array, dimension (4*N)
+*
+*  INFO  (output) INTEGER
+*        = 0: successful exit
+*        < 0: if INFO = -i, the i-th argument had an illegal value
+*        > 0: the algorithm failed
+*             = 1, a split was marked by a positive value in E
+*             = 2, current block of Z not diagonalized after 30*N
+*                  iterations (in inner while loop)
+*             = 3, termination criterion of outer while loop not met 
+*                  (program created more than N unreduced blocks)
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO
+      PARAMETER          ( ZERO = 0.0D0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, IINFO
+      DOUBLE PRECISION   EPS, SCALE, SAFMIN, SIGMN, SIGMX
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLAS2, DLASQ2, DLASRT, XERBLA
+*     ..
+*     .. External Functions ..
+      DOUBLE PRECISION   DLAMCH
+      EXTERNAL           DLAMCH
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+      IF( N.LT.0 ) THEN
+         INFO = -2
+         CALL XERBLA( 'DLASQ1', -INFO )
+         RETURN
+      ELSE IF( N.EQ.0 ) THEN
+         RETURN
+      ELSE IF( N.EQ.1 ) THEN
+         D( 1 ) = ABS( D( 1 ) )
+         RETURN
+      ELSE IF( N.EQ.2 ) THEN
+         CALL DLAS2( D( 1 ), E( 1 ), D( 2 ), SIGMN, SIGMX )
+         D( 1 ) = SIGMX
+         D( 2 ) = SIGMN
+         RETURN
+      END IF
+*
+*     Estimate the largest singular value.
+*
+      SIGMX = ZERO
+      DO 10 I = 1, N - 1
+         D( I ) = ABS( D( I ) )
+         SIGMX = MAX( SIGMX, ABS( E( I ) ) )
+   10 CONTINUE
+      D( N ) = ABS( D( N ) )
+*
+*     Early return if SIGMX is zero (matrix is already diagonal).
+*
+      IF( SIGMX.EQ.ZERO ) THEN
+         CALL DLASRT( 'D', N, D, IINFO )
+         RETURN
+      END IF
+*
+      DO 20 I = 1, N
+         SIGMX = MAX( SIGMX, D( I ) )
+   20 CONTINUE
+*
+*     Copy D and E into WORK (in the Z format) and scale (squaring the
+*     input data makes scaling by a power of the radix pointless).
+*
+      EPS = DLAMCH( 'Precision' )
+      SAFMIN = DLAMCH( 'Safe minimum' )
+      SCALE = SQRT( EPS / SAFMIN )
+      CALL DCOPY( N, D, 1, WORK( 1 ), 2 )
+      CALL DCOPY( N-1, E, 1, WORK( 2 ), 2 )
+      CALL DLASCL( 'G', 0, 0, SIGMX, SCALE, 2*N-1, 1, WORK, 2*N-1,
+     $             IINFO )
+*         
+*     Compute the q's and e's.
+*
+      DO 30 I = 1, 2*N - 1
+         WORK( I ) = WORK( I )**2
+   30 CONTINUE
+      WORK( 2*N ) = ZERO
+*
+      CALL DLASQ2( N, WORK, INFO )
+*
+      IF( INFO.EQ.0 ) THEN
+         DO 40 I = 1, N
+            D( I ) = SQRT( WORK( I ) )
+   40    CONTINUE
+         CALL DLASCL( 'G', 0, 0, SCALE, SIGMX, N, 1, D, N, IINFO )
+      END IF
+*
+      RETURN
+*
+*     End of DLASQ1
+*
+      END
diff --git a/costa/native/external/lapack/dlasq2.f b/costa/native/external/lapack/dlasq2.f
new file mode 100644
index 000000000..271111a82
--- /dev/null
+++ b/costa/native/external/lapack/dlasq2.f
@@ -0,0 +1,436 @@
+      SUBROUTINE DLASQ2( N, Z, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1999 
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   Z( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLASQ2 computes all the eigenvalues of the symmetric positive 
+*  definite tridiagonal matrix associated with the qd array Z to high
+*  relative accuracy are computed to high relative accuracy, in the
+*  absence of denormalization, underflow and overflow.
+*
+*  To see the relation of Z to the tridiagonal matrix, let L be a
+*  unit lower bidiagonal matrix with subdiagonals Z(2,4,6,,..) and
+*  let U be an upper bidiagonal matrix with 1's above and diagonal
+*  Z(1,3,5,,..). The tridiagonal is L*U or, if you prefer, the
+*  symmetric tridiagonal to which it is similar.
+*
+*  Note : DLASQ2 defines a logical variable, IEEE, which is true
+*  on machines which follow ieee-754 floating-point standard in their
+*  handling of infinities and NaNs, and false otherwise. This variable
+*  is passed to DLASQ3.
+*
+*  Arguments
+*  =========
+*
+*  N     (input) INTEGER
+*        The number of rows and columns in the matrix. N >= 0.
+*
+*  Z     (workspace) DOUBLE PRECISION array, dimension ( 4*N )
+*        On entry Z holds the qd array. On exit, entries 1 to N hold
+*        the eigenvalues in decreasing order, Z( 2*N+1 ) holds the
+*        trace, and Z( 2*N+2 ) holds the sum of the eigenvalues. If
+*        N > 2, then Z( 2*N+3 ) holds the iteration count, Z( 2*N+4 )
+*        holds NDIVS/NIN^2, and Z( 2*N+5 ) holds the percentage of
+*        shifts that failed.
+*
+*  INFO  (output) INTEGER
+*        = 0: successful exit
+*        < 0: if the i-th argument is a scalar and had an illegal
+*             value, then INFO = -i, if the i-th argument is an
+*             array and the j-entry had an illegal value, then
+*             INFO = -(i*100+j)
+*        > 0: the algorithm failed
+*              = 1, a split was marked by a positive value in E
+*              = 2, current block of Z not diagonalized after 30*N
+*                   iterations (in inner while loop)
+*              = 3, termination criterion of outer while loop not met 
+*                   (program created more than N unreduced blocks)
+*
+*  Further Details
+*  ===============
+*  Local Variables: I0:N0 defines a current unreduced segment of Z.
+*  The shifts are accumulated in SIGMA. Iteration count is in ITER.
+*  Ping-pong is controlled by PP (alternates between 0 and 1).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   CBIAS
+      PARAMETER          ( CBIAS = 1.50D0 )
+      DOUBLE PRECISION   ZERO, HALF, ONE, TWO, FOUR, HUNDRD
+      PARAMETER          ( ZERO = 0.0D0, HALF = 0.5D0, ONE = 1.0D0,
+     $                     TWO = 2.0D0, FOUR = 4.0D0, HUNDRD = 100.0D0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            IEEE
+      INTEGER            I0, I4, IINFO, IPN4, ITER, IWHILA, IWHILB, K, 
+     $                   N0, NBIG, NDIV, NFAIL, PP, SPLT
+      DOUBLE PRECISION   D, DESIG, DMIN, E, EMAX, EMIN, EPS, OLDEMN, 
+     $                   QMAX, QMIN, S, SAFMIN, SIGMA, T, TEMP, TOL, 
+     $                   TOL2, TRACE, ZMAX
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLASQ3, DLASRT, XERBLA
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      DOUBLE PRECISION   DLAMCH
+      EXTERNAL           DLAMCH, ILAENV
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          DBLE, MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*      
+*     Test the input arguments.
+*     (in case DLASQ2 is not called by DLASQ1)
+*
+      INFO = 0
+      EPS = DLAMCH( 'Precision' )
+      SAFMIN = DLAMCH( 'Safe minimum' )
+      TOL = EPS*HUNDRD
+      TOL2 = TOL**2
+*
+      IF( N.LT.0 ) THEN
+         INFO = -1
+         CALL XERBLA( 'DLASQ2', 1 )
+         RETURN
+      ELSE IF( N.EQ.0 ) THEN
+         RETURN
+      ELSE IF( N.EQ.1 ) THEN
+*
+*        1-by-1 case.
+*
+         IF( Z( 1 ).LT.ZERO ) THEN
+            INFO = -201
+            CALL XERBLA( 'DLASQ2', 2 )
+         END IF
+         RETURN
+      ELSE IF( N.EQ.2 ) THEN
+*
+*        2-by-2 case.
+*
+         IF( Z( 2 ).LT.ZERO .OR. Z( 3 ).LT.ZERO ) THEN
+            INFO = -2
+            CALL XERBLA( 'DLASQ2', 2 )
+            RETURN
+         ELSE IF( Z( 3 ).GT.Z( 1 ) ) THEN
+            D = Z( 3 )
+            Z( 3 ) = Z( 1 )
+            Z( 1 ) = D
+         END IF
+         Z( 5 ) = Z( 1 ) + Z( 2 ) + Z( 3 )
+         IF( Z( 2 ).GT.Z( 3 )*TOL2 ) THEN
+            T = HALF*( ( Z( 1 )-Z( 3 ) )+Z( 2 ) ) 
+            S = Z( 3 )*( Z( 2 ) / T )
+            IF( S.LE.T ) THEN
+               S = Z( 3 )*( Z( 2 ) / ( T*( ONE+SQRT( ONE+S / T ) ) ) )
+            ELSE
+               S = Z( 3 )*( Z( 2 ) / ( T+SQRT( T )*SQRT( T+S ) ) )
+            END IF
+            T = Z( 1 ) + ( S+Z( 2 ) )
+            Z( 3 ) = Z( 3 )*( Z( 1 ) / T )
+            Z( 1 ) = T
+         END IF
+         Z( 2 ) = Z( 3 )
+         Z( 6 ) = Z( 2 ) + Z( 1 )
+         RETURN
+      END IF
+*
+*     Check for negative data and compute sums of q's and e's.
+*
+      Z( 2*N ) = ZERO
+      EMIN = Z( 2 )
+      QMAX = ZERO
+      ZMAX = ZERO
+      D = ZERO
+      E = ZERO
+*
+      DO 10 K = 1, 2*( N-1 ), 2
+         IF( Z( K ).LT.ZERO ) THEN
+            INFO = -( 200+K )
+            CALL XERBLA( 'DLASQ2', 2 )
+            RETURN
+         ELSE IF( Z( K+1 ).LT.ZERO ) THEN
+            INFO = -( 200+K+1 )
+            CALL XERBLA( 'DLASQ2', 2 )
+            RETURN
+         END IF
+         D = D + Z( K )
+         E = E + Z( K+1 )
+         QMAX = MAX( QMAX, Z( K ) )
+         EMIN = MIN( EMIN, Z( K+1 ) )
+         ZMAX = MAX( QMAX, ZMAX, Z( K+1 ) )
+   10 CONTINUE
+      IF( Z( 2*N-1 ).LT.ZERO ) THEN
+         INFO = -( 200+2*N-1 )
+         CALL XERBLA( 'DLASQ2', 2 )
+         RETURN
+      END IF
+      D = D + Z( 2*N-1 )
+      QMAX = MAX( QMAX, Z( 2*N-1 ) )
+      ZMAX = MAX( QMAX, ZMAX )
+*
+*     Check for diagonality.
+*
+      IF( E.EQ.ZERO ) THEN
+         DO 20 K = 2, N
+            Z( K ) = Z( 2*K-1 )
+   20    CONTINUE
+         CALL DLASRT( 'D', N, Z, IINFO )
+         Z( 2*N-1 ) = D
+         RETURN
+      END IF
+*
+      TRACE = D + E
+*
+*     Check for zero data.
+*
+      IF( TRACE.EQ.ZERO ) THEN
+         Z( 2*N-1 ) = ZERO
+         RETURN
+      END IF
+*         
+*     Check whether the machine is IEEE conformable.
+*         
+      IEEE = ILAENV( 10, 'DLASQ2', 'N', 1, 2, 3, 4 ).EQ.1 .AND.
+     $       ILAENV( 11, 'DLASQ2', 'N', 1, 2, 3, 4 ).EQ.1      
+*         
+*     Rearrange data for locality: Z=(q1,qq1,e1,ee1,q2,qq2,e2,ee2,...).
+*
+      DO 30 K = 2*N, 2, -2
+         Z( 2*K ) = ZERO 
+         Z( 2*K-1 ) = Z( K ) 
+         Z( 2*K-2 ) = ZERO 
+         Z( 2*K-3 ) = Z( K-1 ) 
+   30 CONTINUE
+*
+      I0 = 1
+      N0 = N
+*
+*     Reverse the qd-array, if warranted.
+*
+      IF( CBIAS*Z( 4*I0-3 ).LT.Z( 4*N0-3 ) ) THEN
+         IPN4 = 4*( I0+N0 )
+         DO 40 I4 = 4*I0, 2*( I0+N0-1 ), 4
+            TEMP = Z( I4-3 )
+            Z( I4-3 ) = Z( IPN4-I4-3 )
+            Z( IPN4-I4-3 ) = TEMP
+            TEMP = Z( I4-1 )
+            Z( I4-1 ) = Z( IPN4-I4-5 )
+            Z( IPN4-I4-5 ) = TEMP
+   40    CONTINUE
+      END IF
+*
+*     Initial split checking via dqd and Li's test.
+*
+      PP = 0
+*
+      DO 80 K = 1, 2
+*
+         D = Z( 4*N0+PP-3 )
+         DO 50 I4 = 4*( N0-1 ) + PP, 4*I0 + PP, -4
+            IF( Z( I4-1 ).LE.TOL2*D ) THEN
+               Z( I4-1 ) = -ZERO
+               D = Z( I4-3 )
+            ELSE
+               D = Z( I4-3 )*( D / ( D+Z( I4-1 ) ) )
+            END IF
+   50    CONTINUE
+*
+*        dqd maps Z to ZZ plus Li's test.
+*
+         EMIN = Z( 4*I0+PP+1 )
+         D = Z( 4*I0+PP-3 )
+         DO 60 I4 = 4*I0 + PP, 4*( N0-1 ) + PP, 4
+            Z( I4-2*PP-2 ) = D + Z( I4-1 )
+            IF( Z( I4-1 ).LE.TOL2*D ) THEN
+               Z( I4-1 ) = -ZERO
+               Z( I4-2*PP-2 ) = D
+               Z( I4-2*PP ) = ZERO
+               D = Z( I4+1 )
+            ELSE IF( SAFMIN*Z( I4+1 ).LT.Z( I4-2*PP-2 ) .AND.
+     $               SAFMIN*Z( I4-2*PP-2 ).LT.Z( I4+1 ) ) THEN
+               TEMP = Z( I4+1 ) / Z( I4-2*PP-2 )
+               Z( I4-2*PP ) = Z( I4-1 )*TEMP
+               D = D*TEMP
+            ELSE
+               Z( I4-2*PP ) = Z( I4+1 )*( Z( I4-1 ) / Z( I4-2*PP-2 ) )
+               D = Z( I4+1 )*( D / Z( I4-2*PP-2 ) )
+            END IF
+            EMIN = MIN( EMIN, Z( I4-2*PP ) )
+   60    CONTINUE 
+         Z( 4*N0-PP-2 ) = D
+*
+*        Now find qmax.
+*
+         QMAX = Z( 4*I0-PP-2 )
+         DO 70 I4 = 4*I0 - PP + 2, 4*N0 - PP - 2, 4
+            QMAX = MAX( QMAX, Z( I4 ) )
+   70    CONTINUE
+*
+*        Prepare for the next iteration on K.
+*
+         PP = 1 - PP
+   80 CONTINUE
+*
+      ITER = 2
+      NFAIL = 0
+      NDIV = 2*( N0-I0 )
+*
+      DO 140 IWHILA = 1, N + 1
+         IF( N0.LT.1 ) 
+     $      GO TO 150
+*
+*        While array unfinished do 
+*
+*        E(N0) holds the value of SIGMA when submatrix in I0:N0
+*        splits from the rest of the array, but is negated.
+*      
+         DESIG = ZERO
+         IF( N0.EQ.N ) THEN
+            SIGMA = ZERO
+         ELSE
+            SIGMA = -Z( 4*N0-1 )
+         END IF
+         IF( SIGMA.LT.ZERO ) THEN
+            INFO = 1
+            RETURN
+         END IF
+*
+*        Find last unreduced submatrix's top index I0, find QMAX and
+*        EMIN. Find Gershgorin-type bound if Q's much greater than E's.
+*
+         EMAX = ZERO 
+         IF( N0.GT.I0 ) THEN
+            EMIN = ABS( Z( 4*N0-5 ) )
+         ELSE
+            EMIN = ZERO
+         END IF
+         QMIN = Z( 4*N0-3 )
+         QMAX = QMIN
+         DO 90 I4 = 4*N0, 8, -4
+            IF( Z( I4-5 ).LE.ZERO )
+     $         GO TO 100
+            IF( QMIN.GE.FOUR*EMAX ) THEN
+               QMIN = MIN( QMIN, Z( I4-3 ) )
+               EMAX = MAX( EMAX, Z( I4-5 ) )
+            END IF
+            QMAX = MAX( QMAX, Z( I4-7 )+Z( I4-5 ) )
+            EMIN = MIN( EMIN, Z( I4-5 ) )
+   90    CONTINUE
+         I4 = 4 
+*
+  100    CONTINUE
+         I0 = I4 / 4
+*
+*        Store EMIN for passing to DLASQ3.
+*
+         Z( 4*N0-1 ) = EMIN
+*
+*        Put -(initial shift) into DMIN.
+*
+         DMIN = -MAX( ZERO, QMIN-TWO*SQRT( QMIN )*SQRT( EMAX ) )
+*
+*        Now I0:N0 is unreduced. PP = 0 for ping, PP = 1 for pong.
+*
+         PP = 0 
+*
+         NBIG = 30*( N0-I0+1 )
+         DO 120 IWHILB = 1, NBIG
+            IF( I0.GT.N0 ) 
+     $         GO TO 130
+*
+*           While submatrix unfinished take a good dqds step.
+*
+            CALL DLASQ3( I0, N0, Z, PP, DMIN, SIGMA, DESIG, QMAX, NFAIL,
+     $                   ITER, NDIV, IEEE )
+*
+            PP = 1 - PP
+*
+*           When EMIN is very small check for splits.
+*
+            IF( PP.EQ.0 .AND. N0-I0.GE.3 ) THEN
+               IF( Z( 4*N0 ).LE.TOL2*QMAX .OR.
+     $             Z( 4*N0-1 ).LE.TOL2*SIGMA ) THEN
+                  SPLT = I0 - 1
+                  QMAX = Z( 4*I0-3 )
+                  EMIN = Z( 4*I0-1 )
+                  OLDEMN = Z( 4*I0 )
+                  DO 110 I4 = 4*I0, 4*( N0-3 ), 4
+                     IF( Z( I4 ).LE.TOL2*Z( I4-3 ) .OR.
+     $                   Z( I4-1 ).LE.TOL2*SIGMA ) THEN
+                        Z( I4-1 ) = -SIGMA
+                        SPLT = I4 / 4
+                        QMAX = ZERO
+                        EMIN = Z( I4+3 )
+                        OLDEMN = Z( I4+4 )
+                     ELSE
+                        QMAX = MAX( QMAX, Z( I4+1 ) )
+                        EMIN = MIN( EMIN, Z( I4-1 ) )
+                        OLDEMN = MIN( OLDEMN, Z( I4 ) )
+                     END IF
+  110             CONTINUE
+                  Z( 4*N0-1 ) = EMIN
+                  Z( 4*N0 ) = OLDEMN
+                  I0 = SPLT + 1
+               END IF
+            END IF
+*
+  120    CONTINUE
+*
+         INFO = 2
+         RETURN
+*
+*        end IWHILB
+*
+  130    CONTINUE
+*
+  140 CONTINUE
+*
+      INFO = 3
+      RETURN
+*
+*     end IWHILA   
+*
+  150 CONTINUE
+*      
+*     Move q's to the front.
+*      
+      DO 160 K = 2, N
+         Z( K ) = Z( 4*K-3 )
+  160 CONTINUE
+*      
+*     Sort and compute sum of eigenvalues.
+*
+      CALL DLASRT( 'D', N, Z, IINFO )
+*
+      E = ZERO
+      DO 170 K = N, 1, -1
+         E = E + Z( K )
+  170 CONTINUE
+*
+*     Store trace, sum(eigenvalues) and information on performance.
+*
+      Z( 2*N+1 ) = TRACE 
+      Z( 2*N+2 ) = E
+      Z( 2*N+3 ) = DBLE( ITER )
+      Z( 2*N+4 ) = DBLE( NDIV ) / DBLE( N**2 )
+      Z( 2*N+5 ) = HUNDRD*NFAIL / DBLE( ITER )
+      RETURN
+*
+*     End of DLASQ2
+*
+      END
diff --git a/costa/native/external/lapack/dlasq3.f b/costa/native/external/lapack/dlasq3.f
new file mode 100644
index 000000000..dd783616f
--- /dev/null
+++ b/costa/native/external/lapack/dlasq3.f
@@ -0,0 +1,298 @@
+      SUBROUTINE DLASQ3( I0, N0, Z, PP, DMIN, SIGMA, DESIG, QMAX, NFAIL,
+     $                   ITER, NDIV, IEEE )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     May 17, 2000
+*
+*     .. Scalar Arguments ..
+      LOGICAL            IEEE
+      INTEGER            I0, ITER, N0, NDIV, NFAIL, PP
+      DOUBLE PRECISION   DESIG, DMIN, QMAX, SIGMA
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   Z( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLASQ3 checks for deflation, computes a shift (TAU) and calls dqds.
+*  In case of failure it changes shifts, and tries again until output
+*  is positive.
+*
+*  Arguments
+*  =========
+*
+*  I0     (input) INTEGER
+*         First index.
+*
+*  N0     (input) INTEGER
+*         Last index.
+*
+*  Z      (input) DOUBLE PRECISION array, dimension ( 4*N )
+*         Z holds the qd array.
+*
+*  PP     (input) INTEGER
+*         PP=0 for ping, PP=1 for pong.
+*
+*  DMIN   (output) DOUBLE PRECISION
+*         Minimum value of d.
+*
+*  SIGMA  (output) DOUBLE PRECISION
+*         Sum of shifts used in current segment.
+*
+*  DESIG  (input/output) DOUBLE PRECISION
+*         Lower order part of SIGMA
+*
+*  QMAX   (input) DOUBLE PRECISION
+*         Maximum value of q.
+*
+*  NFAIL  (output) INTEGER
+*         Number of times shift was too big.
+*
+*  ITER   (output) INTEGER
+*         Number of iterations.
+*
+*  NDIV   (output) INTEGER
+*         Number of divisions.
+*
+*  TTYPE  (output) INTEGER
+*         Shift type.
+*
+*  IEEE   (input) LOGICAL
+*         Flag for IEEE or non IEEE arithmetic (passed to DLASQ5).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   CBIAS
+      PARAMETER          ( CBIAS = 1.50D0 )
+      DOUBLE PRECISION   ZERO, QURTR, HALF, ONE, TWO, HUNDRD
+      PARAMETER          ( ZERO = 0.0D0, QURTR = 0.250D0, HALF = 0.5D0,
+     $                     ONE = 1.0D0, TWO = 2.0D0, HUNDRD = 100.0D0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            IPN4, J4, N0IN, NN, TTYPE
+      DOUBLE PRECISION   DMIN1, DMIN2, DN, DN1, DN2, EPS, S, SAFMIN, T,
+     $                   TAU, TEMP, TOL, TOL2
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLASQ4, DLASQ5, DLASQ6
+*     ..
+*     .. External Function ..
+      DOUBLE PRECISION   DLAMCH
+      EXTERNAL           DLAMCH
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MIN, SQRT
+*     ..
+*     .. Save statement ..
+      SAVE               TTYPE
+      SAVE               DMIN1, DMIN2, DN, DN1, DN2, TAU
+*     ..
+*     .. Data statement ..
+      DATA               TTYPE / 0 /
+      DATA               DMIN1 / ZERO /, DMIN2 / ZERO /, DN / ZERO /,
+     $                   DN1 / ZERO /, DN2 / ZERO /, TAU / ZERO /
+*     ..
+*     .. Executable Statements ..
+*
+      N0IN = N0
+      EPS = DLAMCH( 'Precision' )
+      SAFMIN = DLAMCH( 'Safe minimum' )
+      TOL = EPS*HUNDRD
+      TOL2 = TOL**2
+*
+*     Check for deflation.
+*
+   10 CONTINUE
+*
+      IF( N0.LT.I0 )
+     $   RETURN
+      IF( N0.EQ.I0 )
+     $   GO TO 20
+      NN = 4*N0 + PP
+      IF( N0.EQ.( I0+1 ) )
+     $   GO TO 40
+*
+*     Check whether E(N0-1) is negligible, 1 eigenvalue.
+*
+      IF( Z( NN-5 ).GT.TOL2*( SIGMA+Z( NN-3 ) ) .AND.
+     $    Z( NN-2*PP-4 ).GT.TOL2*Z( NN-7 ) )
+     $   GO TO 30
+*
+   20 CONTINUE
+*
+      Z( 4*N0-3 ) = Z( 4*N0+PP-3 ) + SIGMA
+      N0 = N0 - 1
+      GO TO 10
+*
+*     Check  whether E(N0-2) is negligible, 2 eigenvalues.
+*
+   30 CONTINUE
+*
+      IF( Z( NN-9 ).GT.TOL2*SIGMA .AND.
+     $    Z( NN-2*PP-8 ).GT.TOL2*Z( NN-11 ) )
+     $   GO TO 50
+*
+   40 CONTINUE
+*
+      IF( Z( NN-3 ).GT.Z( NN-7 ) ) THEN
+         S = Z( NN-3 )
+         Z( NN-3 ) = Z( NN-7 )
+         Z( NN-7 ) = S
+      END IF
+      IF( Z( NN-5 ).GT.Z( NN-3 )*TOL2 ) THEN
+         T = HALF*( ( Z( NN-7 )-Z( NN-3 ) )+Z( NN-5 ) )
+         S = Z( NN-3 )*( Z( NN-5 ) / T )
+         IF( S.LE.T ) THEN
+            S = Z( NN-3 )*( Z( NN-5 ) /
+     $          ( T*( ONE+SQRT( ONE+S / T ) ) ) )
+         ELSE
+            S = Z( NN-3 )*( Z( NN-5 ) / ( T+SQRT( T )*SQRT( T+S ) ) )
+         END IF
+         T = Z( NN-7 ) + ( S+Z( NN-5 ) )
+         Z( NN-3 ) = Z( NN-3 )*( Z( NN-7 ) / T )
+         Z( NN-7 ) = T
+      END IF
+      Z( 4*N0-7 ) = Z( NN-7 ) + SIGMA
+      Z( 4*N0-3 ) = Z( NN-3 ) + SIGMA
+      N0 = N0 - 2
+      GO TO 10
+*
+   50 CONTINUE
+*
+*     Reverse the qd-array, if warranted.
+*
+      IF( DMIN.LE.ZERO .OR. N0.LT.N0IN ) THEN
+         IF( CBIAS*Z( 4*I0+PP-3 ).LT.Z( 4*N0+PP-3 ) ) THEN
+            IPN4 = 4*( I0+N0 )
+            DO 60 J4 = 4*I0, 2*( I0+N0-1 ), 4
+               TEMP = Z( J4-3 )
+               Z( J4-3 ) = Z( IPN4-J4-3 )
+               Z( IPN4-J4-3 ) = TEMP
+               TEMP = Z( J4-2 )
+               Z( J4-2 ) = Z( IPN4-J4-2 )
+               Z( IPN4-J4-2 ) = TEMP
+               TEMP = Z( J4-1 )
+               Z( J4-1 ) = Z( IPN4-J4-5 )
+               Z( IPN4-J4-5 ) = TEMP
+               TEMP = Z( J4 )
+               Z( J4 ) = Z( IPN4-J4-4 )
+               Z( IPN4-J4-4 ) = TEMP
+   60       CONTINUE
+            IF( N0-I0.LE.4 ) THEN
+               Z( 4*N0+PP-1 ) = Z( 4*I0+PP-1 )
+               Z( 4*N0-PP ) = Z( 4*I0-PP )
+            END IF
+            DMIN2 = MIN( DMIN2, Z( 4*N0+PP-1 ) )
+            Z( 4*N0+PP-1 ) = MIN( Z( 4*N0+PP-1 ), Z( 4*I0+PP-1 ),
+     $                            Z( 4*I0+PP+3 ) )
+            Z( 4*N0-PP ) = MIN( Z( 4*N0-PP ), Z( 4*I0-PP ),
+     $                          Z( 4*I0-PP+4 ) )
+            QMAX = MAX( QMAX, Z( 4*I0+PP-3 ), Z( 4*I0+PP+1 ) )
+            DMIN = -ZERO
+         END IF
+      END IF
+*
+   70 CONTINUE
+*
+      IF( DMIN.LT.ZERO .OR. SAFMIN*QMAX.LT.MIN( Z( 4*N0+PP-1 ),
+     $    Z( 4*N0+PP-9 ), DMIN2+Z( 4*N0-PP ) ) ) THEN
+*
+*        Choose a shift.
+*
+         CALL DLASQ4( I0, N0, Z, PP, N0IN, DMIN, DMIN1, DMIN2, DN, DN1,
+     $                DN2, TAU, TTYPE )
+*
+*        Call dqds until DMIN > 0.
+*
+   80    CONTINUE
+*
+         CALL DLASQ5( I0, N0, Z, PP, TAU, DMIN, DMIN1, DMIN2, DN,
+     $                DN1, DN2, IEEE )
+*
+         NDIV = NDIV + ( N0-I0+2 )
+         ITER = ITER + 1
+*
+*        Check status.
+*
+         IF( DMIN.GE.ZERO .AND. DMIN1.GT.ZERO ) THEN
+*
+*           Success.
+*
+            GO TO 100
+*
+         ELSE IF( DMIN.LT.ZERO .AND. DMIN1.GT.ZERO .AND.
+     $            Z( 4*( N0-1 )-PP ).LT.TOL*( SIGMA+DN1 ) .AND.
+     $            ABS( DN ).LT.TOL*SIGMA ) THEN
+*
+*           Convergence hidden by negative DN.
+*
+            Z( 4*( N0-1 )-PP+2 ) = ZERO
+            DMIN = ZERO
+            GO TO 100
+         ELSE IF( DMIN.LT.ZERO ) THEN
+*
+*           TAU too big. Select new TAU and try again.
+*
+            NFAIL = NFAIL + 1
+            IF( TTYPE.LT.-22 ) THEN
+*
+*              Failed twice. Play it safe.
+*
+               TAU = ZERO
+            ELSE IF( DMIN1.GT.ZERO ) THEN
+*
+*              Late failure. Gives excellent shift.
+*
+               TAU = ( TAU+DMIN )*( ONE-TWO*EPS )
+               TTYPE = TTYPE - 11
+            ELSE
+*
+*              Early failure. Divide by 4.
+*
+               TAU = QURTR*TAU
+               TTYPE = TTYPE - 12
+            END IF
+            GO TO 80
+         ELSE IF( DMIN.NE.DMIN ) THEN
+*
+*           NaN.
+*
+            TAU = ZERO
+            GO TO 80
+         ELSE
+*
+*           Possible underflow. Play it safe.
+*
+            GO TO 90
+         END IF
+      END IF
+*
+*     Risk of underflow.
+*
+   90 CONTINUE
+      CALL DLASQ6( I0, N0, Z, PP, DMIN, DMIN1, DMIN2, DN, DN1, DN2 )
+      NDIV = NDIV + ( N0-I0+2 )
+      ITER = ITER + 1
+      TAU = ZERO
+*
+  100 CONTINUE
+      IF( TAU.LT.SIGMA ) THEN
+         DESIG = DESIG + TAU
+         T = SIGMA + DESIG
+         DESIG = DESIG - ( T-SIGMA )
+      ELSE
+         T = SIGMA + TAU
+         DESIG = SIGMA - ( T-TAU ) + DESIG
+      END IF
+      SIGMA = T
+*
+      RETURN
+*
+*     End of DLASQ3
+*
+      END
diff --git a/costa/native/external/lapack/dlasq4.f b/costa/native/external/lapack/dlasq4.f
new file mode 100644
index 000000000..5702b8a3e
--- /dev/null
+++ b/costa/native/external/lapack/dlasq4.f
@@ -0,0 +1,330 @@
+      SUBROUTINE DLASQ4( I0, N0, Z, PP, N0IN, DMIN, DMIN1, DMIN2, DN,
+     $                   DN1, DN2, TAU, TTYPE )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            I0, N0, N0IN, PP, TTYPE
+      DOUBLE PRECISION   DMIN, DMIN1, DMIN2, DN, DN1, DN2, TAU
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   Z( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLASQ4 computes an approximation TAU to the smallest eigenvalue 
+*  using values of d from the previous transform.
+*
+*  I0    (input) INTEGER
+*        First index.
+*
+*  N0    (input) INTEGER
+*        Last index.
+*
+*  Z     (input) DOUBLE PRECISION array, dimension ( 4*N )
+*        Z holds the qd array.
+*
+*  PP    (input) INTEGER
+*        PP=0 for ping, PP=1 for pong.
+*
+*  NOIN  (input) INTEGER
+*        The value of N0 at start of EIGTEST.
+*
+*  DMIN  (input) DOUBLE PRECISION
+*        Minimum value of d.
+*
+*  DMIN1 (input) DOUBLE PRECISION
+*        Minimum value of d, excluding D( N0 ).
+*
+*  DMIN2 (input) DOUBLE PRECISION
+*        Minimum value of d, excluding D( N0 ) and D( N0-1 ).
+*
+*  DN    (input) DOUBLE PRECISION
+*        d(N)
+*
+*  DN1   (input) DOUBLE PRECISION
+*        d(N-1)
+*
+*  DN2   (input) DOUBLE PRECISION
+*        d(N-2)
+*
+*  TAU   (output) DOUBLE PRECISION
+*        This is the shift.
+*
+*  TTYPE (output) INTEGER
+*        Shift type.
+*
+*  Further Details
+*  ===============
+*  CNST1 = 9/16
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   CNST1, CNST2, CNST3
+      PARAMETER          ( CNST1 = 0.5630D0, CNST2 = 1.010D0,
+     $                   CNST3 = 1.050D0 )
+      DOUBLE PRECISION   QURTR, THIRD, HALF, ZERO, ONE, TWO, HUNDRD
+      PARAMETER          ( QURTR = 0.250D0, THIRD = 0.3330D0,
+     $                   HALF = 0.50D0, ZERO = 0.0D0, ONE = 1.0D0,
+     $                   TWO = 2.0D0, HUNDRD = 100.0D0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I4, NN, NP
+      DOUBLE PRECISION   A2, B1, B2, G, GAM, GAP1, GAP2, S
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN, SQRT
+*     ..
+*     .. Save statement ..
+      SAVE               G
+*     ..
+*     .. Data statement ..
+      DATA               G / ZERO /
+*     ..
+*     .. Executable Statements ..
+*
+*     A negative DMIN forces the shift to take that absolute value
+*     TTYPE records the type of shift.
+*
+      IF( DMIN.LE.ZERO ) THEN
+         TAU = -DMIN
+         TTYPE = -1
+         RETURN
+      END IF
+*       
+      NN = 4*N0 + PP
+      IF( N0IN.EQ.N0 ) THEN
+*
+*        No eigenvalues deflated.
+*
+         IF( DMIN.EQ.DN .OR. DMIN.EQ.DN1 ) THEN
+*
+            B1 = SQRT( Z( NN-3 ) )*SQRT( Z( NN-5 ) )
+            B2 = SQRT( Z( NN-7 ) )*SQRT( Z( NN-9 ) )
+            A2 = Z( NN-7 ) + Z( NN-5 )
+*
+*           Cases 2 and 3.
+*
+            IF( DMIN.EQ.DN .AND. DMIN1.EQ.DN1 ) THEN
+               GAP2 = DMIN2 - A2 - DMIN2*QURTR
+               IF( GAP2.GT.ZERO .AND. GAP2.GT.B2 ) THEN
+                  GAP1 = A2 - DN - ( B2 / GAP2 )*B2
+               ELSE
+                  GAP1 = A2 - DN - ( B1+B2 )
+               END IF
+               IF( GAP1.GT.ZERO .AND. GAP1.GT.B1 ) THEN
+                  S = MAX( DN-( B1 / GAP1 )*B1, HALF*DMIN )
+                  TTYPE = -2
+               ELSE
+                  S = ZERO
+                  IF( DN.GT.B1 )
+     $               S = DN - B1
+                  IF( A2.GT.( B1+B2 ) )
+     $               S = MIN( S, A2-( B1+B2 ) )
+                  S = MAX( S, THIRD*DMIN )
+                  TTYPE = -3
+               END IF
+            ELSE
+*
+*              Case 4.
+*
+               TTYPE = -4
+               S = QURTR*DMIN
+               IF( DMIN.EQ.DN ) THEN
+                  GAM = DN
+                  A2 = ZERO
+                  IF( Z( NN-5 ) .GT. Z( NN-7 ) )
+     $               RETURN
+                  B2 = Z( NN-5 ) / Z( NN-7 )
+                  NP = NN - 9
+               ELSE
+                  NP = NN - 2*PP
+                  B2 = Z( NP-2 )
+                  GAM = DN1
+                  IF( Z( NP-4 ) .GT. Z( NP-2 ) )
+     $               RETURN
+                  A2 = Z( NP-4 ) / Z( NP-2 )
+                  IF( Z( NN-9 ) .GT. Z( NN-11 ) )
+     $               RETURN
+                  B2 = Z( NN-9 ) / Z( NN-11 )
+                  NP = NN - 13
+               END IF
+*
+*              Approximate contribution to norm squared from I < NN-1.
+*
+               A2 = A2 + B2
+               DO 10 I4 = NP, 4*I0 - 1 + PP, -4
+                  IF( B2.EQ.ZERO )
+     $               GO TO 20
+                  B1 = B2
+                  IF( Z( I4 ) .GT. Z( I4-2 ) )
+     $               RETURN
+                  B2 = B2*( Z( I4 ) / Z( I4-2 ) )
+                  A2 = A2 + B2
+                  IF( HUNDRD*MAX( B2, B1 ).LT.A2 .OR. CNST1.LT.A2 ) 
+     $               GO TO 20
+   10          CONTINUE
+   20          CONTINUE
+               A2 = CNST3*A2
+*
+*              Rayleigh quotient residual bound.
+*
+               IF( A2.LT.CNST1 )
+     $            S = GAM*( ONE-SQRT( A2 ) ) / ( ONE+A2 )
+            END IF
+         ELSE IF( DMIN.EQ.DN2 ) THEN
+*
+*           Case 5.
+*
+            TTYPE = -5
+            S = QURTR*DMIN
+*
+*           Compute contribution to norm squared from I > NN-2.
+*
+            NP = NN - 2*PP
+            B1 = Z( NP-2 )
+            B2 = Z( NP-6 )
+            GAM = DN2
+            IF( Z( NP-8 ).GT.B2 .OR. Z( NP-4 ).GT.B1 )
+     $         RETURN
+            A2 = ( Z( NP-8 ) / B2 )*( ONE+Z( NP-4 ) / B1 )
+*
+*           Approximate contribution to norm squared from I < NN-2.
+*
+            IF( N0-I0.GT.2 ) THEN
+               B2 = Z( NN-13 ) / Z( NN-15 )
+               A2 = A2 + B2
+               DO 30 I4 = NN - 17, 4*I0 - 1 + PP, -4
+                  IF( B2.EQ.ZERO )
+     $               GO TO 40
+                  B1 = B2
+                  IF( Z( I4 ) .GT. Z( I4-2 ) )
+     $               RETURN
+                  B2 = B2*( Z( I4 ) / Z( I4-2 ) )
+                  A2 = A2 + B2
+                  IF( HUNDRD*MAX( B2, B1 ).LT.A2 .OR. CNST1.LT.A2 ) 
+     $               GO TO 40
+   30          CONTINUE
+   40          CONTINUE
+               A2 = CNST3*A2
+            END IF
+*
+            IF( A2.LT.CNST1 )
+     $         S = GAM*( ONE-SQRT( A2 ) ) / ( ONE+A2 )
+         ELSE
+*
+*           Case 6, no information to guide us.
+*
+            IF( TTYPE.EQ.-6 ) THEN
+               G = G + THIRD*( ONE-G )
+            ELSE IF( TTYPE.EQ.-18 ) THEN
+               G = QURTR*THIRD
+            ELSE
+               G = QURTR
+            END IF
+            S = G*DMIN
+            TTYPE = -6
+         END IF
+*
+      ELSE IF( N0IN.EQ.( N0+1 ) ) THEN
+*
+*        One eigenvalue just deflated. Use DMIN1, DN1 for DMIN and DN.
+*
+         IF( DMIN1.EQ.DN1 .AND. DMIN2.EQ.DN2 ) THEN 
+*
+*           Cases 7 and 8.
+*
+            TTYPE = -7
+            S = THIRD*DMIN1
+            IF( Z( NN-5 ).GT.Z( NN-7 ) )
+     $         RETURN
+            B1 = Z( NN-5 ) / Z( NN-7 )
+            B2 = B1
+            IF( B2.EQ.ZERO )
+     $         GO TO 60
+            DO 50 I4 = 4*N0 - 9 + PP, 4*I0 - 1 + PP, -4
+               A2 = B1
+               IF( Z( I4 ).GT.Z( I4-2 ) )
+     $            RETURN
+               B1 = B1*( Z( I4 ) / Z( I4-2 ) )
+               B2 = B2 + B1
+               IF( HUNDRD*MAX( B1, A2 ).LT.B2 ) 
+     $            GO TO 60
+   50       CONTINUE
+   60       CONTINUE
+            B2 = SQRT( CNST3*B2 )
+            A2 = DMIN1 / ( ONE+B2**2 )
+            GAP2 = HALF*DMIN2 - A2
+            IF( GAP2.GT.ZERO .AND. GAP2.GT.B2*A2 ) THEN
+               S = MAX( S, A2*( ONE-CNST2*A2*( B2 / GAP2 )*B2 ) )
+            ELSE 
+               S = MAX( S, A2*( ONE-CNST2*B2 ) )
+               TTYPE = -8
+            END IF
+         ELSE
+*
+*           Case 9.
+*
+            S = QURTR*DMIN1
+            IF( DMIN1.EQ.DN1 )
+     $         S = HALF*DMIN1
+            TTYPE = -9
+         END IF
+*
+      ELSE IF( N0IN.EQ.( N0+2 ) ) THEN
+*
+*        Two eigenvalues deflated. Use DMIN2, DN2 for DMIN and DN.
+*
+*        Cases 10 and 11.
+*
+         IF( DMIN2.EQ.DN2 .AND. TWO*Z( NN-5 ).LT.Z( NN-7 ) ) THEN 
+            TTYPE = -10
+            S = THIRD*DMIN2
+            IF( Z( NN-5 ).GT.Z( NN-7 ) )
+     $         RETURN
+            B1 = Z( NN-5 ) / Z( NN-7 )
+            B2 = B1
+            IF( B2.EQ.ZERO )
+     $         GO TO 80
+            DO 70 I4 = 4*N0 - 9 + PP, 4*I0 - 1 + PP, -4
+               IF( Z( I4 ).GT.Z( I4-2 ) )
+     $            RETURN
+               B1 = B1*( Z( I4 ) / Z( I4-2 ) )
+               B2 = B2 + B1
+               IF( HUNDRD*B1.LT.B2 )
+     $            GO TO 80
+   70       CONTINUE
+   80       CONTINUE
+            B2 = SQRT( CNST3*B2 )
+            A2 = DMIN2 / ( ONE+B2**2 )
+            GAP2 = Z( NN-7 ) + Z( NN-9 ) -
+     $             SQRT( Z( NN-11 ) )*SQRT( Z( NN-9 ) ) - A2
+            IF( GAP2.GT.ZERO .AND. GAP2.GT.B2*A2 ) THEN
+               S = MAX( S, A2*( ONE-CNST2*A2*( B2 / GAP2 )*B2 ) )
+            ELSE 
+               S = MAX( S, A2*( ONE-CNST2*B2 ) )
+            END IF
+         ELSE
+            S = QURTR*DMIN2
+            TTYPE = -11
+         END IF
+      ELSE IF( N0IN.GT.( N0+2 ) ) THEN
+*
+*        Case 12, more than two eigenvalues deflated. No information.
+*
+         S = ZERO 
+         TTYPE = -12
+      END IF
+*
+      TAU = S
+      RETURN
+*
+*     End of DLASQ4
+*
+      END
diff --git a/costa/native/external/lapack/dlasq5.f b/costa/native/external/lapack/dlasq5.f
new file mode 100644
index 000000000..068b897fe
--- /dev/null
+++ b/costa/native/external/lapack/dlasq5.f
@@ -0,0 +1,196 @@
+      SUBROUTINE DLASQ5( I0, N0, Z, PP, TAU, DMIN, DMIN1, DMIN2, DN,
+     $                   DNM1, DNM2, IEEE )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     May 17, 2000
+*
+*     .. Scalar Arguments ..
+      LOGICAL            IEEE
+      INTEGER            I0, N0, PP
+      DOUBLE PRECISION   DMIN, DMIN1, DMIN2, DN, DNM1, DNM2, TAU
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   Z( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLASQ5 computes one dqds transform in ping-pong form, one
+*  version for IEEE machines another for non IEEE machines.
+*
+*  Arguments
+*  =========
+*
+*  I0    (input) INTEGER
+*        First index.
+*
+*  N0    (input) INTEGER
+*        Last index.
+*
+*  Z     (input) DOUBLE PRECISION array, dimension ( 4*N )
+*        Z holds the qd array. EMIN is stored in Z(4*N0) to avoid
+*        an extra argument.
+*
+*  PP    (input) INTEGER
+*        PP=0 for ping, PP=1 for pong.
+*
+*  TAU   (input) DOUBLE PRECISION
+*        This is the shift.
+*
+*  DMIN  (output) DOUBLE PRECISION
+*        Minimum value of d.
+*
+*  DMIN1 (output) DOUBLE PRECISION
+*        Minimum value of d, excluding D( N0 ).
+*
+*  DMIN2 (output) DOUBLE PRECISION
+*        Minimum value of d, excluding D( N0 ) and D( N0-1 ).
+*
+*  DN    (output) DOUBLE PRECISION
+*        d(N0), the last value of d.
+*
+*  DNM1  (output) DOUBLE PRECISION
+*        d(N0-1).
+*
+*  DNM2  (output) DOUBLE PRECISION
+*        d(N0-2).
+*
+*  IEEE  (input) LOGICAL
+*        Flag for IEEE or non IEEE arithmetic.
+*
+*  =====================================================================
+*
+*     .. Parameter ..
+      DOUBLE PRECISION   ZERO
+      PARAMETER          ( ZERO = 0.0D0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            J4, J4P2
+      DOUBLE PRECISION   D, EMIN, TEMP
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MIN
+*     ..
+*     .. Executable Statements ..
+*
+      IF( ( N0-I0-1 ).LE.0 )
+     $   RETURN
+*
+      J4 = 4*I0 + PP - 3
+      EMIN = Z( J4+4 )
+      D = Z( J4 ) - TAU
+      DMIN = D
+      DMIN1 = -Z( J4 )
+*
+      IF( IEEE ) THEN
+*
+*        Code for IEEE arithmetic.
+*
+         IF( PP.EQ.0 ) THEN
+            DO 10 J4 = 4*I0, 4*( N0-3 ), 4
+               Z( J4-2 ) = D + Z( J4-1 )
+               TEMP = Z( J4+1 ) / Z( J4-2 )
+               D = D*TEMP - TAU
+               DMIN = MIN( DMIN, D )
+               Z( J4 ) = Z( J4-1 )*TEMP
+               EMIN = MIN( Z( J4 ), EMIN )
+   10       CONTINUE
+         ELSE
+            DO 20 J4 = 4*I0, 4*( N0-3 ), 4
+               Z( J4-3 ) = D + Z( J4 )
+               TEMP = Z( J4+2 ) / Z( J4-3 )
+               D = D*TEMP - TAU
+               DMIN = MIN( DMIN, D )
+               Z( J4-1 ) = Z( J4 )*TEMP
+               EMIN = MIN( Z( J4-1 ), EMIN )
+   20       CONTINUE
+         END IF
+*
+*        Unroll last two steps.
+*
+         DNM2 = D
+         DMIN2 = DMIN
+         J4 = 4*( N0-2 ) - PP
+         J4P2 = J4 + 2*PP - 1
+         Z( J4-2 ) = DNM2 + Z( J4P2 )
+         Z( J4 ) = Z( J4P2+2 )*( Z( J4P2 ) / Z( J4-2 ) )
+         DNM1 = Z( J4P2+2 )*( DNM2 / Z( J4-2 ) ) - TAU
+         DMIN = MIN( DMIN, DNM1 )
+*
+         DMIN1 = DMIN
+         J4 = J4 + 4
+         J4P2 = J4 + 2*PP - 1
+         Z( J4-2 ) = DNM1 + Z( J4P2 )
+         Z( J4 ) = Z( J4P2+2 )*( Z( J4P2 ) / Z( J4-2 ) )
+         DN = Z( J4P2+2 )*( DNM1 / Z( J4-2 ) ) - TAU
+         DMIN = MIN( DMIN, DN )
+*
+      ELSE
+*
+*        Code for non IEEE arithmetic.
+*
+         IF( PP.EQ.0 ) THEN
+            DO 30 J4 = 4*I0, 4*( N0-3 ), 4
+               Z( J4-2 ) = D + Z( J4-1 )
+               IF( D.LT.ZERO ) THEN
+                  RETURN
+               ELSE
+                  Z( J4 ) = Z( J4+1 )*( Z( J4-1 ) / Z( J4-2 ) )
+                  D = Z( J4+1 )*( D / Z( J4-2 ) ) - TAU
+               END IF
+               DMIN = MIN( DMIN, D )
+               EMIN = MIN( EMIN, Z( J4 ) )
+   30       CONTINUE
+         ELSE
+            DO 40 J4 = 4*I0, 4*( N0-3 ), 4
+               Z( J4-3 ) = D + Z( J4 )
+               IF( D.LT.ZERO ) THEN
+                  RETURN
+               ELSE
+                  Z( J4-1 ) = Z( J4+2 )*( Z( J4 ) / Z( J4-3 ) )
+                  D = Z( J4+2 )*( D / Z( J4-3 ) ) - TAU
+               END IF
+               DMIN = MIN( DMIN, D )
+               EMIN = MIN( EMIN, Z( J4-1 ) )
+   40       CONTINUE
+         END IF
+*
+*        Unroll last two steps.
+*
+         DNM2 = D
+         DMIN2 = DMIN
+         J4 = 4*( N0-2 ) - PP
+         J4P2 = J4 + 2*PP - 1
+         Z( J4-2 ) = DNM2 + Z( J4P2 )
+         IF( DNM2.LT.ZERO ) THEN
+            RETURN
+         ELSE
+            Z( J4 ) = Z( J4P2+2 )*( Z( J4P2 ) / Z( J4-2 ) )
+            DNM1 = Z( J4P2+2 )*( DNM2 / Z( J4-2 ) ) - TAU
+         END IF
+         DMIN = MIN( DMIN, DNM1 )
+*
+         DMIN1 = DMIN
+         J4 = J4 + 4
+         J4P2 = J4 + 2*PP - 1
+         Z( J4-2 ) = DNM1 + Z( J4P2 )
+         IF( DNM1.LT.ZERO ) THEN
+            RETURN
+         ELSE
+            Z( J4 ) = Z( J4P2+2 )*( Z( J4P2 ) / Z( J4-2 ) )
+            DN = Z( J4P2+2 )*( DNM1 / Z( J4-2 ) ) - TAU
+         END IF
+         DMIN = MIN( DMIN, DN )
+*
+      END IF
+*
+      Z( J4+2 ) = DN
+      Z( 4*N0-PP ) = EMIN
+      RETURN
+*
+*     End of DLASQ5
+*
+      END
diff --git a/costa/native/external/lapack/dlasq6.f b/costa/native/external/lapack/dlasq6.f
new file mode 100644
index 000000000..480157c91
--- /dev/null
+++ b/costa/native/external/lapack/dlasq6.f
@@ -0,0 +1,176 @@
+      SUBROUTINE DLASQ6( I0, N0, Z, PP, DMIN, DMIN1, DMIN2, DN,
+     $                   DNM1, DNM2 )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            I0, N0, PP
+      DOUBLE PRECISION   DMIN, DMIN1, DMIN2, DN, DNM1, DNM2
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   Z( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLASQ6 computes one dqd (shift equal to zero) transform in
+*  ping-pong form, with protection against underflow and overflow.
+*
+*  Arguments
+*  =========
+*
+*  I0    (input) INTEGER
+*        First index.
+*
+*  N0    (input) INTEGER
+*        Last index.
+*
+*  Z     (input) DOUBLE PRECISION array, dimension ( 4*N )
+*        Z holds the qd array. EMIN is stored in Z(4*N0) to avoid
+*        an extra argument.
+*
+*  PP    (input) INTEGER
+*        PP=0 for ping, PP=1 for pong.
+*
+*  DMIN  (output) DOUBLE PRECISION
+*        Minimum value of d.
+*
+*  DMIN1 (output) DOUBLE PRECISION
+*        Minimum value of d, excluding D( N0 ).
+*
+*  DMIN2 (output) DOUBLE PRECISION
+*        Minimum value of d, excluding D( N0 ) and D( N0-1 ).
+*
+*  DN    (output) DOUBLE PRECISION
+*        d(N0), the last value of d.
+*
+*  DNM1  (output) DOUBLE PRECISION
+*        d(N0-1).
+*
+*  DNM2  (output) DOUBLE PRECISION
+*        d(N0-2).
+*
+*  =====================================================================
+*
+*     .. Parameter ..
+      DOUBLE PRECISION   ZERO
+      PARAMETER          ( ZERO = 0.0D0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            J4, J4P2
+      DOUBLE PRECISION   D, EMIN, SAFMIN, TEMP
+*     ..
+*     .. External Function ..
+      DOUBLE PRECISION   DLAMCH
+      EXTERNAL           DLAMCH
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MIN
+*     ..
+*     .. Executable Statements ..
+*
+      IF( ( N0-I0-1 ).LE.0 )
+     $   RETURN
+*
+      SAFMIN = DLAMCH( 'Safe minimum' )
+      J4 = 4*I0 + PP - 3
+      EMIN = Z( J4+4 ) 
+      D = Z( J4 )
+      DMIN = D
+*
+      IF( PP.EQ.0 ) THEN
+         DO 10 J4 = 4*I0, 4*( N0-3 ), 4
+            Z( J4-2 ) = D + Z( J4-1 ) 
+            IF( Z( J4-2 ).EQ.ZERO ) THEN
+               Z( J4 ) = ZERO
+               D = Z( J4+1 )
+               DMIN = D
+               EMIN = ZERO
+            ELSE IF( SAFMIN*Z( J4+1 ).LT.Z( J4-2 ) .AND.
+     $               SAFMIN*Z( J4-2 ).LT.Z( J4+1 ) ) THEN
+               TEMP = Z( J4+1 ) / Z( J4-2 )
+               Z( J4 ) = Z( J4-1 )*TEMP
+               D = D*TEMP
+            ELSE 
+               Z( J4 ) = Z( J4+1 )*( Z( J4-1 ) / Z( J4-2 ) )
+               D = Z( J4+1 )*( D / Z( J4-2 ) )
+            END IF
+            DMIN = MIN( DMIN, D )
+            EMIN = MIN( EMIN, Z( J4 ) )
+   10    CONTINUE
+      ELSE
+         DO 20 J4 = 4*I0, 4*( N0-3 ), 4
+            Z( J4-3 ) = D + Z( J4 ) 
+            IF( Z( J4-3 ).EQ.ZERO ) THEN
+               Z( J4-1 ) = ZERO
+               D = Z( J4+2 )
+               DMIN = D
+               EMIN = ZERO
+            ELSE IF( SAFMIN*Z( J4+2 ).LT.Z( J4-3 ) .AND.
+     $               SAFMIN*Z( J4-3 ).LT.Z( J4+2 ) ) THEN
+               TEMP = Z( J4+2 ) / Z( J4-3 )
+               Z( J4-1 ) = Z( J4 )*TEMP
+               D = D*TEMP
+            ELSE 
+               Z( J4-1 ) = Z( J4+2 )*( Z( J4 ) / Z( J4-3 ) )
+               D = Z( J4+2 )*( D / Z( J4-3 ) )
+            END IF
+            DMIN = MIN( DMIN, D )
+            EMIN = MIN( EMIN, Z( J4-1 ) )
+   20    CONTINUE
+      END IF
+*
+*     Unroll last two steps. 
+*
+      DNM2 = D
+      DMIN2 = DMIN
+      J4 = 4*( N0-2 ) - PP
+      J4P2 = J4 + 2*PP - 1
+      Z( J4-2 ) = DNM2 + Z( J4P2 )
+      IF( Z( J4-2 ).EQ.ZERO ) THEN
+         Z( J4 ) = ZERO
+         DNM1 = Z( J4P2+2 )
+         DMIN = DNM1
+         EMIN = ZERO
+      ELSE IF( SAFMIN*Z( J4P2+2 ).LT.Z( J4-2 ) .AND.
+     $         SAFMIN*Z( J4-2 ).LT.Z( J4P2+2 ) ) THEN
+         TEMP = Z( J4P2+2 ) / Z( J4-2 )
+         Z( J4 ) = Z( J4P2 )*TEMP
+         DNM1 = DNM2*TEMP
+      ELSE
+         Z( J4 ) = Z( J4P2+2 )*( Z( J4P2 ) / Z( J4-2 ) )
+         DNM1 = Z( J4P2+2 )*( DNM2 / Z( J4-2 ) )
+      END IF
+      DMIN = MIN( DMIN, DNM1 )
+*
+      DMIN1 = DMIN
+      J4 = J4 + 4
+      J4P2 = J4 + 2*PP - 1
+      Z( J4-2 ) = DNM1 + Z( J4P2 )
+      IF( Z( J4-2 ).EQ.ZERO ) THEN
+         Z( J4 ) = ZERO
+         DN = Z( J4P2+2 )
+         DMIN = DN
+         EMIN = ZERO
+      ELSE IF( SAFMIN*Z( J4P2+2 ).LT.Z( J4-2 ) .AND.
+     $         SAFMIN*Z( J4-2 ).LT.Z( J4P2+2 ) ) THEN
+         TEMP = Z( J4P2+2 ) / Z( J4-2 )
+         Z( J4 ) = Z( J4P2 )*TEMP
+         DN = DNM1*TEMP
+      ELSE
+         Z( J4 ) = Z( J4P2+2 )*( Z( J4P2 ) / Z( J4-2 ) )
+         DN = Z( J4P2+2 )*( DNM1 / Z( J4-2 ) )
+      END IF
+      DMIN = MIN( DMIN, DN )
+*
+      Z( J4+2 ) = DN
+      Z( 4*N0-PP ) = EMIN
+      RETURN
+*
+*     End of DLASQ6
+*
+      END
diff --git a/costa/native/external/lapack/dlasr.f b/costa/native/external/lapack/dlasr.f
new file mode 100644
index 000000000..4b2cab754
--- /dev/null
+++ b/costa/native/external/lapack/dlasr.f
@@ -0,0 +1,325 @@
+      SUBROUTINE DLASR( SIDE, PIVOT, DIRECT, M, N, C, S, A, LDA )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIRECT, PIVOT, SIDE
+      INTEGER            LDA, M, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), C( * ), S( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLASR   performs the transformation
+*
+*     A := P*A,   when SIDE = 'L' or 'l'  (  Left-hand side )
+*
+*     A := A*P',  when SIDE = 'R' or 'r'  ( Right-hand side )
+*
+*  where A is an m by n real matrix and P is an orthogonal matrix,
+*  consisting of a sequence of plane rotations determined by the
+*  parameters PIVOT and DIRECT as follows ( z = m when SIDE = 'L' or 'l'
+*  and z = n when SIDE = 'R' or 'r' ):
+*
+*  When  DIRECT = 'F' or 'f'  ( Forward sequence ) then
+*
+*     P = P( z - 1 )*...*P( 2 )*P( 1 ),
+*
+*  and when DIRECT = 'B' or 'b'  ( Backward sequence ) then
+*
+*     P = P( 1 )*P( 2 )*...*P( z - 1 ),
+*
+*  where  P( k ) is a plane rotation matrix for the following planes:
+*
+*     when  PIVOT = 'V' or 'v'  ( Variable pivot ),
+*        the plane ( k, k + 1 )
+*
+*     when  PIVOT = 'T' or 't'  ( Top pivot ),
+*        the plane ( 1, k + 1 )
+*
+*     when  PIVOT = 'B' or 'b'  ( Bottom pivot ),
+*        the plane ( k, z )
+*
+*  c( k ) and s( k )  must contain the  cosine and sine that define the
+*  matrix  P( k ).  The two by two plane rotation part of the matrix
+*  P( k ), R( k ), is assumed to be of the form
+*
+*     R( k ) = (  c( k )  s( k ) ).
+*              ( -s( k )  c( k ) )
+*
+*  This version vectorises across rows of the array A when SIDE = 'L'.
+*
+*  Arguments
+*  =========
+*
+*  SIDE    (input) CHARACTER*1
+*          Specifies whether the plane rotation matrix P is applied to
+*          A on the left or the right.
+*          = 'L':  Left, compute A := P*A
+*          = 'R':  Right, compute A:= A*P'
+*
+*  DIRECT  (input) CHARACTER*1
+*          Specifies whether P is a forward or backward sequence of
+*          plane rotations.
+*          = 'F':  Forward, P = P( z - 1 )*...*P( 2 )*P( 1 )
+*          = 'B':  Backward, P = P( 1 )*P( 2 )*...*P( z - 1 )
+*
+*  PIVOT   (input) CHARACTER*1
+*          Specifies the plane for which P(k) is a plane rotation
+*          matrix.
+*          = 'V':  Variable pivot, the plane (k,k+1)
+*          = 'T':  Top pivot, the plane (1,k+1)
+*          = 'B':  Bottom pivot, the plane (k,z)
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  If m <= 1, an immediate
+*          return is effected.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  If n <= 1, an
+*          immediate return is effected.
+*
+*  C, S    (input) DOUBLE PRECISION arrays, dimension
+*                  (M-1) if SIDE = 'L'
+*                  (N-1) if SIDE = 'R'
+*          c(k) and s(k) contain the cosine and sine that define the
+*          matrix P(k).  The two by two plane rotation part of the
+*          matrix P(k), R(k), is assumed to be of the form
+*          R( k ) = (  c( k )  s( k ) ).
+*                   ( -s( k )  c( k ) )
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+*          The m by n matrix A.  On exit, A is overwritten by P*A if
+*          SIDE = 'R' or by A*P' if SIDE = 'L'.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, INFO, J
+      DOUBLE PRECISION   CTEMP, STEMP, TEMP
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters
+*
+      INFO = 0
+      IF( .NOT.( LSAME( SIDE, 'L' ) .OR. LSAME( SIDE, 'R' ) ) ) THEN
+         INFO = 1
+      ELSE IF( .NOT.( LSAME( PIVOT, 'V' ) .OR. LSAME( PIVOT,
+     $         'T' ) .OR. LSAME( PIVOT, 'B' ) ) ) THEN
+         INFO = 2
+      ELSE IF( .NOT.( LSAME( DIRECT, 'F' ) .OR. LSAME( DIRECT, 'B' ) ) )
+     $          THEN
+         INFO = 3
+      ELSE IF( M.LT.0 ) THEN
+         INFO = 4
+      ELSE IF( N.LT.0 ) THEN
+         INFO = 5
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = 9
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DLASR ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( ( M.EQ.0 ) .OR. ( N.EQ.0 ) )
+     $   RETURN
+      IF( LSAME( SIDE, 'L' ) ) THEN
+*
+*        Form  P * A
+*
+         IF( LSAME( PIVOT, 'V' ) ) THEN
+            IF( LSAME( DIRECT, 'F' ) ) THEN
+               DO 20 J = 1, M - 1
+                  CTEMP = C( J )
+                  STEMP = S( J )
+                  IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN
+                     DO 10 I = 1, N
+                        TEMP = A( J+1, I )
+                        A( J+1, I ) = CTEMP*TEMP - STEMP*A( J, I )
+                        A( J, I ) = STEMP*TEMP + CTEMP*A( J, I )
+   10                CONTINUE
+                  END IF
+   20          CONTINUE
+            ELSE IF( LSAME( DIRECT, 'B' ) ) THEN
+               DO 40 J = M - 1, 1, -1
+                  CTEMP = C( J )
+                  STEMP = S( J )
+                  IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN
+                     DO 30 I = 1, N
+                        TEMP = A( J+1, I )
+                        A( J+1, I ) = CTEMP*TEMP - STEMP*A( J, I )
+                        A( J, I ) = STEMP*TEMP + CTEMP*A( J, I )
+   30                CONTINUE
+                  END IF
+   40          CONTINUE
+            END IF
+         ELSE IF( LSAME( PIVOT, 'T' ) ) THEN
+            IF( LSAME( DIRECT, 'F' ) ) THEN
+               DO 60 J = 2, M
+                  CTEMP = C( J-1 )
+                  STEMP = S( J-1 )
+                  IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN
+                     DO 50 I = 1, N
+                        TEMP = A( J, I )
+                        A( J, I ) = CTEMP*TEMP - STEMP*A( 1, I )
+                        A( 1, I ) = STEMP*TEMP + CTEMP*A( 1, I )
+   50                CONTINUE
+                  END IF
+   60          CONTINUE
+            ELSE IF( LSAME( DIRECT, 'B' ) ) THEN
+               DO 80 J = M, 2, -1
+                  CTEMP = C( J-1 )
+                  STEMP = S( J-1 )
+                  IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN
+                     DO 70 I = 1, N
+                        TEMP = A( J, I )
+                        A( J, I ) = CTEMP*TEMP - STEMP*A( 1, I )
+                        A( 1, I ) = STEMP*TEMP + CTEMP*A( 1, I )
+   70                CONTINUE
+                  END IF
+   80          CONTINUE
+            END IF
+         ELSE IF( LSAME( PIVOT, 'B' ) ) THEN
+            IF( LSAME( DIRECT, 'F' ) ) THEN
+               DO 100 J = 1, M - 1
+                  CTEMP = C( J )
+                  STEMP = S( J )
+                  IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN
+                     DO 90 I = 1, N
+                        TEMP = A( J, I )
+                        A( J, I ) = STEMP*A( M, I ) + CTEMP*TEMP
+                        A( M, I ) = CTEMP*A( M, I ) - STEMP*TEMP
+   90                CONTINUE
+                  END IF
+  100          CONTINUE
+            ELSE IF( LSAME( DIRECT, 'B' ) ) THEN
+               DO 120 J = M - 1, 1, -1
+                  CTEMP = C( J )
+                  STEMP = S( J )
+                  IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN
+                     DO 110 I = 1, N
+                        TEMP = A( J, I )
+                        A( J, I ) = STEMP*A( M, I ) + CTEMP*TEMP
+                        A( M, I ) = CTEMP*A( M, I ) - STEMP*TEMP
+  110                CONTINUE
+                  END IF
+  120          CONTINUE
+            END IF
+         END IF
+      ELSE IF( LSAME( SIDE, 'R' ) ) THEN
+*
+*        Form A * P'
+*
+         IF( LSAME( PIVOT, 'V' ) ) THEN
+            IF( LSAME( DIRECT, 'F' ) ) THEN
+               DO 140 J = 1, N - 1
+                  CTEMP = C( J )
+                  STEMP = S( J )
+                  IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN
+                     DO 130 I = 1, M
+                        TEMP = A( I, J+1 )
+                        A( I, J+1 ) = CTEMP*TEMP - STEMP*A( I, J )
+                        A( I, J ) = STEMP*TEMP + CTEMP*A( I, J )
+  130                CONTINUE
+                  END IF
+  140          CONTINUE
+            ELSE IF( LSAME( DIRECT, 'B' ) ) THEN
+               DO 160 J = N - 1, 1, -1
+                  CTEMP = C( J )
+                  STEMP = S( J )
+                  IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN
+                     DO 150 I = 1, M
+                        TEMP = A( I, J+1 )
+                        A( I, J+1 ) = CTEMP*TEMP - STEMP*A( I, J )
+                        A( I, J ) = STEMP*TEMP + CTEMP*A( I, J )
+  150                CONTINUE
+                  END IF
+  160          CONTINUE
+            END IF
+         ELSE IF( LSAME( PIVOT, 'T' ) ) THEN
+            IF( LSAME( DIRECT, 'F' ) ) THEN
+               DO 180 J = 2, N
+                  CTEMP = C( J-1 )
+                  STEMP = S( J-1 )
+                  IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN
+                     DO 170 I = 1, M
+                        TEMP = A( I, J )
+                        A( I, J ) = CTEMP*TEMP - STEMP*A( I, 1 )
+                        A( I, 1 ) = STEMP*TEMP + CTEMP*A( I, 1 )
+  170                CONTINUE
+                  END IF
+  180          CONTINUE
+            ELSE IF( LSAME( DIRECT, 'B' ) ) THEN
+               DO 200 J = N, 2, -1
+                  CTEMP = C( J-1 )
+                  STEMP = S( J-1 )
+                  IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN
+                     DO 190 I = 1, M
+                        TEMP = A( I, J )
+                        A( I, J ) = CTEMP*TEMP - STEMP*A( I, 1 )
+                        A( I, 1 ) = STEMP*TEMP + CTEMP*A( I, 1 )
+  190                CONTINUE
+                  END IF
+  200          CONTINUE
+            END IF
+         ELSE IF( LSAME( PIVOT, 'B' ) ) THEN
+            IF( LSAME( DIRECT, 'F' ) ) THEN
+               DO 220 J = 1, N - 1
+                  CTEMP = C( J )
+                  STEMP = S( J )
+                  IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN
+                     DO 210 I = 1, M
+                        TEMP = A( I, J )
+                        A( I, J ) = STEMP*A( I, N ) + CTEMP*TEMP
+                        A( I, N ) = CTEMP*A( I, N ) - STEMP*TEMP
+  210                CONTINUE
+                  END IF
+  220          CONTINUE
+            ELSE IF( LSAME( DIRECT, 'B' ) ) THEN
+               DO 240 J = N - 1, 1, -1
+                  CTEMP = C( J )
+                  STEMP = S( J )
+                  IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN
+                     DO 230 I = 1, M
+                        TEMP = A( I, J )
+                        A( I, J ) = STEMP*A( I, N ) + CTEMP*TEMP
+                        A( I, N ) = CTEMP*A( I, N ) - STEMP*TEMP
+  230                CONTINUE
+                  END IF
+  240          CONTINUE
+            END IF
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of DLASR
+*
+      END
diff --git a/costa/native/external/lapack/dlasrt.f b/costa/native/external/lapack/dlasrt.f
new file mode 100644
index 000000000..e776034bc
--- /dev/null
+++ b/costa/native/external/lapack/dlasrt.f
@@ -0,0 +1,244 @@
+      SUBROUTINE DLASRT( ID, N, D, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          ID
+      INTEGER            INFO, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   D( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  Sort the numbers in D in increasing order (if ID = 'I') or
+*  in decreasing order (if ID = 'D' ).
+*
+*  Use Quick Sort, reverting to Insertion sort on arrays of
+*  size <= 20. Dimension of STACK limits N to about 2**32.
+*
+*  Arguments
+*  =========
+*
+*  ID      (input) CHARACTER*1
+*          = 'I': sort D in increasing order;
+*          = 'D': sort D in decreasing order.
+*
+*  N       (input) INTEGER
+*          The length of the array D.
+*
+*  D       (input/output) DOUBLE PRECISION array, dimension (N)
+*          On entry, the array to be sorted.
+*          On exit, D has been sorted into increasing order
+*          (D(1) <= ... <= D(N) ) or into decreasing order
+*          (D(1) >= ... >= D(N) ), depending on ID.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            SELECT
+      PARAMETER          ( SELECT = 20 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            DIR, ENDD, I, J, START, STKPNT
+      DOUBLE PRECISION   D1, D2, D3, DMNMX, TMP
+*     ..
+*     .. Local Arrays ..
+      INTEGER            STACK( 2, 32 )
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input paramters.
+*
+      INFO = 0
+      DIR = -1
+      IF( LSAME( ID, 'D' ) ) THEN
+         DIR = 0
+      ELSE IF( LSAME( ID, 'I' ) ) THEN
+         DIR = 1
+      END IF
+      IF( DIR.EQ.-1 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DLASRT', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.LE.1 )
+     $   RETURN
+*
+      STKPNT = 1
+      STACK( 1, 1 ) = 1
+      STACK( 2, 1 ) = N
+   10 CONTINUE
+      START = STACK( 1, STKPNT )
+      ENDD = STACK( 2, STKPNT )
+      STKPNT = STKPNT - 1
+      IF( ENDD-START.LE.SELECT .AND. ENDD-START.GT.0 ) THEN
+*
+*        Do Insertion sort on D( START:ENDD )
+*
+         IF( DIR.EQ.0 ) THEN
+*
+*           Sort into decreasing order
+*
+            DO 30 I = START + 1, ENDD
+               DO 20 J = I, START + 1, -1
+                  IF( D( J ).GT.D( J-1 ) ) THEN
+                     DMNMX = D( J )
+                     D( J ) = D( J-1 )
+                     D( J-1 ) = DMNMX
+                  ELSE
+                     GO TO 30
+                  END IF
+   20          CONTINUE
+   30       CONTINUE
+*
+         ELSE
+*
+*           Sort into increasing order
+*
+            DO 50 I = START + 1, ENDD
+               DO 40 J = I, START + 1, -1
+                  IF( D( J ).LT.D( J-1 ) ) THEN
+                     DMNMX = D( J )
+                     D( J ) = D( J-1 )
+                     D( J-1 ) = DMNMX
+                  ELSE
+                     GO TO 50
+                  END IF
+   40          CONTINUE
+   50       CONTINUE
+*
+         END IF
+*
+      ELSE IF( ENDD-START.GT.SELECT ) THEN
+*
+*        Partition D( START:ENDD ) and stack parts, largest one first
+*
+*        Choose partition entry as median of 3
+*
+         D1 = D( START )
+         D2 = D( ENDD )
+         I = ( START+ENDD ) / 2
+         D3 = D( I )
+         IF( D1.LT.D2 ) THEN
+            IF( D3.LT.D1 ) THEN
+               DMNMX = D1
+            ELSE IF( D3.LT.D2 ) THEN
+               DMNMX = D3
+            ELSE
+               DMNMX = D2
+            END IF
+         ELSE
+            IF( D3.LT.D2 ) THEN
+               DMNMX = D2
+            ELSE IF( D3.LT.D1 ) THEN
+               DMNMX = D3
+            ELSE
+               DMNMX = D1
+            END IF
+         END IF
+*
+         IF( DIR.EQ.0 ) THEN
+*
+*           Sort into decreasing order
+*
+            I = START - 1
+            J = ENDD + 1
+   60       CONTINUE
+   70       CONTINUE
+            J = J - 1
+            IF( D( J ).LT.DMNMX )
+     $         GO TO 70
+   80       CONTINUE
+            I = I + 1
+            IF( D( I ).GT.DMNMX )
+     $         GO TO 80
+            IF( I.LT.J ) THEN
+               TMP = D( I )
+               D( I ) = D( J )
+               D( J ) = TMP
+               GO TO 60
+            END IF
+            IF( J-START.GT.ENDD-J-1 ) THEN
+               STKPNT = STKPNT + 1
+               STACK( 1, STKPNT ) = START
+               STACK( 2, STKPNT ) = J
+               STKPNT = STKPNT + 1
+               STACK( 1, STKPNT ) = J + 1
+               STACK( 2, STKPNT ) = ENDD
+            ELSE
+               STKPNT = STKPNT + 1
+               STACK( 1, STKPNT ) = J + 1
+               STACK( 2, STKPNT ) = ENDD
+               STKPNT = STKPNT + 1
+               STACK( 1, STKPNT ) = START
+               STACK( 2, STKPNT ) = J
+            END IF
+         ELSE
+*
+*           Sort into increasing order
+*
+            I = START - 1
+            J = ENDD + 1
+   90       CONTINUE
+  100       CONTINUE
+            J = J - 1
+            IF( D( J ).GT.DMNMX )
+     $         GO TO 100
+  110       CONTINUE
+            I = I + 1
+            IF( D( I ).LT.DMNMX )
+     $         GO TO 110
+            IF( I.LT.J ) THEN
+               TMP = D( I )
+               D( I ) = D( J )
+               D( J ) = TMP
+               GO TO 90
+            END IF
+            IF( J-START.GT.ENDD-J-1 ) THEN
+               STKPNT = STKPNT + 1
+               STACK( 1, STKPNT ) = START
+               STACK( 2, STKPNT ) = J
+               STKPNT = STKPNT + 1
+               STACK( 1, STKPNT ) = J + 1
+               STACK( 2, STKPNT ) = ENDD
+            ELSE
+               STKPNT = STKPNT + 1
+               STACK( 1, STKPNT ) = J + 1
+               STACK( 2, STKPNT ) = ENDD
+               STKPNT = STKPNT + 1
+               STACK( 1, STKPNT ) = START
+               STACK( 2, STKPNT ) = J
+            END IF
+         END IF
+      END IF
+      IF( STKPNT.GT.0 )
+     $   GO TO 10
+      RETURN
+*
+*     End of DLASRT
+*
+      END
diff --git a/costa/native/external/lapack/dlassq.f b/costa/native/external/lapack/dlassq.f
new file mode 100644
index 000000000..b378c0635
--- /dev/null
+++ b/costa/native/external/lapack/dlassq.f
@@ -0,0 +1,89 @@
+      SUBROUTINE DLASSQ( N, X, INCX, SCALE, SUMSQ )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INCX, N
+      DOUBLE PRECISION   SCALE, SUMSQ
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLASSQ  returns the values  scl  and  smsq  such that
+*
+*     ( scl**2 )*smsq = x( 1 )**2 +...+ x( n )**2 + ( scale**2 )*sumsq,
+*
+*  where  x( i ) = X( 1 + ( i - 1 )*INCX ). The value of  sumsq  is
+*  assumed to be non-negative and  scl  returns the value
+*
+*     scl = max( scale, abs( x( i ) ) ).
+*
+*  scale and sumsq must be supplied in SCALE and SUMSQ and
+*  scl and smsq are overwritten on SCALE and SUMSQ respectively.
+*
+*  The routine makes only one pass through the vector x.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The number of elements to be used from the vector X.
+*
+*  X       (input) DOUBLE PRECISION array, dimension (N)
+*          The vector for which a scaled sum of squares is computed.
+*             x( i )  = X( 1 + ( i - 1 )*INCX ), 1 <= i <= n.
+*
+*  INCX    (input) INTEGER
+*          The increment between successive values of the vector X.
+*          INCX > 0.
+*
+*  SCALE   (input/output) DOUBLE PRECISION
+*          On entry, the value  scale  in the equation above.
+*          On exit, SCALE is overwritten with  scl , the scaling factor
+*          for the sum of squares.
+*
+*  SUMSQ   (input/output) DOUBLE PRECISION
+*          On entry, the value  sumsq  in the equation above.
+*          On exit, SUMSQ is overwritten with  smsq , the basic sum of
+*          squares from which  scl  has been factored out.
+*
+* =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO
+      PARAMETER          ( ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            IX
+      DOUBLE PRECISION   ABSXI
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS
+*     ..
+*     .. Executable Statements ..
+*
+      IF( N.GT.0 ) THEN
+         DO 10 IX = 1, 1 + ( N-1 )*INCX, INCX
+            IF( X( IX ).NE.ZERO ) THEN
+               ABSXI = ABS( X( IX ) )
+               IF( SCALE.LT.ABSXI ) THEN
+                  SUMSQ = 1 + SUMSQ*( SCALE / ABSXI )**2
+                  SCALE = ABSXI
+               ELSE
+                  SUMSQ = SUMSQ + ( ABSXI / SCALE )**2
+               END IF
+            END IF
+   10    CONTINUE
+      END IF
+      RETURN
+*
+*     End of DLASSQ
+*
+      END
diff --git a/costa/native/external/lapack/dlasv2.f b/costa/native/external/lapack/dlasv2.f
new file mode 100644
index 000000000..5d936863d
--- /dev/null
+++ b/costa/native/external/lapack/dlasv2.f
@@ -0,0 +1,250 @@
+      SUBROUTINE DLASV2( F, G, H, SSMIN, SSMAX, SNR, CSR, SNL, CSL )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      DOUBLE PRECISION   CSL, CSR, F, G, H, SNL, SNR, SSMAX, SSMIN
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLASV2 computes the singular value decomposition of a 2-by-2
+*  triangular matrix
+*     [  F   G  ]
+*     [  0   H  ].
+*  On return, abs(SSMAX) is the larger singular value, abs(SSMIN) is the
+*  smaller singular value, and (CSL,SNL) and (CSR,SNR) are the left and
+*  right singular vectors for abs(SSMAX), giving the decomposition
+*
+*     [ CSL  SNL ] [  F   G  ] [ CSR -SNR ]  =  [ SSMAX   0   ]
+*     [-SNL  CSL ] [  0   H  ] [ SNR  CSR ]     [  0    SSMIN ].
+*
+*  Arguments
+*  =========
+*
+*  F       (input) DOUBLE PRECISION
+*          The (1,1) element of the 2-by-2 matrix.
+*
+*  G       (input) DOUBLE PRECISION
+*          The (1,2) element of the 2-by-2 matrix.
+*
+*  H       (input) DOUBLE PRECISION
+*          The (2,2) element of the 2-by-2 matrix.
+*
+*  SSMIN   (output) DOUBLE PRECISION
+*          abs(SSMIN) is the smaller singular value.
+*
+*  SSMAX   (output) DOUBLE PRECISION
+*          abs(SSMAX) is the larger singular value.
+*
+*  SNL     (output) DOUBLE PRECISION
+*  CSL     (output) DOUBLE PRECISION
+*          The vector (CSL, SNL) is a unit left singular vector for the
+*          singular value abs(SSMAX).
+*
+*  SNR     (output) DOUBLE PRECISION
+*  CSR     (output) DOUBLE PRECISION
+*          The vector (CSR, SNR) is a unit right singular vector for the
+*          singular value abs(SSMAX).
+*
+*  Further Details
+*  ===============
+*
+*  Any input parameter may be aliased with any output parameter.
+*
+*  Barring over/underflow and assuming a guard digit in subtraction, all
+*  output quantities are correct to within a few units in the last
+*  place (ulps).
+*
+*  In IEEE arithmetic, the code works correctly if one matrix element is
+*  infinite.
+*
+*  Overflow will not occur unless the largest singular value itself
+*  overflows or is within a few ulps of overflow. (On machines with
+*  partial overflow, like the Cray, overflow may occur if the largest
+*  singular value is within a factor of 2 of overflow.)
+*
+*  Underflow is harmless if underflow is gradual. Otherwise, results
+*  may correspond to a matrix modified by perturbations of size near
+*  the underflow threshold.
+*
+* =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO
+      PARAMETER          ( ZERO = 0.0D0 )
+      DOUBLE PRECISION   HALF
+      PARAMETER          ( HALF = 0.5D0 )
+      DOUBLE PRECISION   ONE
+      PARAMETER          ( ONE = 1.0D0 )
+      DOUBLE PRECISION   TWO
+      PARAMETER          ( TWO = 2.0D0 )
+      DOUBLE PRECISION   FOUR
+      PARAMETER          ( FOUR = 4.0D0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            GASMAL, SWAP
+      INTEGER            PMAX
+      DOUBLE PRECISION   A, CLT, CRT, D, FA, FT, GA, GT, HA, HT, L, M,
+     $                   MM, R, S, SLT, SRT, T, TEMP, TSIGN, TT
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, SIGN, SQRT
+*     ..
+*     .. External Functions ..
+      DOUBLE PRECISION   DLAMCH
+      EXTERNAL           DLAMCH
+*     ..
+*     .. Executable Statements ..
+*
+      FT = F
+      FA = ABS( FT )
+      HT = H
+      HA = ABS( H )
+*
+*     PMAX points to the maximum absolute element of matrix
+*       PMAX = 1 if F largest in absolute values
+*       PMAX = 2 if G largest in absolute values
+*       PMAX = 3 if H largest in absolute values
+*
+      PMAX = 1
+      SWAP = ( HA.GT.FA )
+      IF( SWAP ) THEN
+         PMAX = 3
+         TEMP = FT
+         FT = HT
+         HT = TEMP
+         TEMP = FA
+         FA = HA
+         HA = TEMP
+*
+*        Now FA .ge. HA
+*
+      END IF
+      GT = G
+      GA = ABS( GT )
+      IF( GA.EQ.ZERO ) THEN
+*
+*        Diagonal matrix
+*
+         SSMIN = HA
+         SSMAX = FA
+         CLT = ONE
+         CRT = ONE
+         SLT = ZERO
+         SRT = ZERO
+      ELSE
+         GASMAL = .TRUE.
+         IF( GA.GT.FA ) THEN
+            PMAX = 2
+            IF( ( FA / GA ).LT.DLAMCH( 'EPS' ) ) THEN
+*
+*              Case of very large GA
+*
+               GASMAL = .FALSE.
+               SSMAX = GA
+               IF( HA.GT.ONE ) THEN
+                  SSMIN = FA / ( GA / HA )
+               ELSE
+                  SSMIN = ( FA / GA )*HA
+               END IF
+               CLT = ONE
+               SLT = HT / GT
+               SRT = ONE
+               CRT = FT / GT
+            END IF
+         END IF
+         IF( GASMAL ) THEN
+*
+*           Normal case
+*
+            D = FA - HA
+            IF( D.EQ.FA ) THEN
+*
+*              Copes with infinite F or H
+*
+               L = ONE
+            ELSE
+               L = D / FA
+            END IF
+*
+*           Note that 0 .le. L .le. 1
+*
+            M = GT / FT
+*
+*           Note that abs(M) .le. 1/macheps
+*
+            T = TWO - L
+*
+*           Note that T .ge. 1
+*
+            MM = M*M
+            TT = T*T
+            S = SQRT( TT+MM )
+*
+*           Note that 1 .le. S .le. 1 + 1/macheps
+*
+            IF( L.EQ.ZERO ) THEN
+               R = ABS( M )
+            ELSE
+               R = SQRT( L*L+MM )
+            END IF
+*
+*           Note that 0 .le. R .le. 1 + 1/macheps
+*
+            A = HALF*( S+R )
+*
+*           Note that 1 .le. A .le. 1 + abs(M)
+*
+            SSMIN = HA / A
+            SSMAX = FA*A
+            IF( MM.EQ.ZERO ) THEN
+*
+*              Note that M is very tiny
+*
+               IF( L.EQ.ZERO ) THEN
+                  T = SIGN( TWO, FT )*SIGN( ONE, GT )
+               ELSE
+                  T = GT / SIGN( D, FT ) + M / T
+               END IF
+            ELSE
+               T = ( M / ( S+T )+M / ( R+L ) )*( ONE+A )
+            END IF
+            L = SQRT( T*T+FOUR )
+            CRT = TWO / L
+            SRT = T / L
+            CLT = ( CRT+SRT*M ) / A
+            SLT = ( HT / FT )*SRT / A
+         END IF
+      END IF
+      IF( SWAP ) THEN
+         CSL = SRT
+         SNL = CRT
+         CSR = SLT
+         SNR = CLT
+      ELSE
+         CSL = CLT
+         SNL = SLT
+         CSR = CRT
+         SNR = SRT
+      END IF
+*
+*     Correct signs of SSMAX and SSMIN
+*
+      IF( PMAX.EQ.1 )
+     $   TSIGN = SIGN( ONE, CSR )*SIGN( ONE, CSL )*SIGN( ONE, F )
+      IF( PMAX.EQ.2 )
+     $   TSIGN = SIGN( ONE, SNR )*SIGN( ONE, CSL )*SIGN( ONE, G )
+      IF( PMAX.EQ.3 )
+     $   TSIGN = SIGN( ONE, SNR )*SIGN( ONE, SNL )*SIGN( ONE, H )
+      SSMAX = SIGN( SSMAX, TSIGN )
+      SSMIN = SIGN( SSMIN, TSIGN*SIGN( ONE, F )*SIGN( ONE, H ) )
+      RETURN
+*
+*     End of DLASV2
+*
+      END
diff --git a/costa/native/external/lapack/dlaswp.f b/costa/native/external/lapack/dlaswp.f
new file mode 100644
index 000000000..99c0dda27
--- /dev/null
+++ b/costa/native/external/lapack/dlaswp.f
@@ -0,0 +1,120 @@
+      SUBROUTINE DLASWP( N, A, LDA, K1, K2, IPIV, INCX )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INCX, K1, K2, LDA, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      DOUBLE PRECISION   A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLASWP performs a series of row interchanges on the matrix A.
+*  One row interchange is initiated for each of rows K1 through K2 of A.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+*          On entry, the matrix of column dimension N to which the row
+*          interchanges will be applied.
+*          On exit, the permuted matrix.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.
+*
+*  K1      (input) INTEGER
+*          The first element of IPIV for which a row interchange will
+*          be done.
+*
+*  K2      (input) INTEGER
+*          The last element of IPIV for which a row interchange will
+*          be done.
+*
+*  IPIV    (input) INTEGER array, dimension (M*abs(INCX))
+*          The vector of pivot indices.  Only the elements in positions
+*          K1 through K2 of IPIV are accessed.
+*          IPIV(K) = L implies rows K and L are to be interchanged.
+*
+*  INCX    (input) INTEGER
+*          The increment between successive values of IPIV.  If IPIV
+*          is negative, the pivots are applied in reverse order.
+*
+*  Further Details
+*  ===============
+*
+*  Modified by
+*   R. C. Whaley, Computer Science Dept., Univ. of Tenn., Knoxville, USA
+*
+* =====================================================================
+*
+*     .. Local Scalars ..
+      INTEGER            I, I1, I2, INC, IP, IX, IX0, J, K, N32
+      DOUBLE PRECISION   TEMP
+*     ..
+*     .. Executable Statements ..
+*
+*     Interchange row I with row IPIV(I) for each of rows K1 through K2.
+*
+      IF( INCX.GT.0 ) THEN
+         IX0 = K1
+         I1 = K1
+         I2 = K2
+         INC = 1
+      ELSE IF( INCX.LT.0 ) THEN
+         IX0 = 1 + ( 1-K2 )*INCX
+         I1 = K2
+         I2 = K1
+         INC = -1
+      ELSE
+         RETURN
+      END IF
+*
+      N32 = ( N / 32 )*32
+      IF( N32.NE.0 ) THEN
+         DO 30 J = 1, N32, 32
+            IX = IX0
+            DO 20 I = I1, I2, INC
+               IP = IPIV( IX )
+               IF( IP.NE.I ) THEN
+                  DO 10 K = J, J + 31
+                     TEMP = A( I, K )
+                     A( I, K ) = A( IP, K )
+                     A( IP, K ) = TEMP
+   10             CONTINUE
+               END IF
+               IX = IX + INCX
+   20       CONTINUE
+   30    CONTINUE
+      END IF
+      IF( N32.NE.N ) THEN
+         N32 = N32 + 1
+         IX = IX0
+         DO 50 I = I1, I2, INC
+            IP = IPIV( IX )
+            IF( IP.NE.I ) THEN
+               DO 40 K = N32, N
+                  TEMP = A( I, K )
+                  A( I, K ) = A( IP, K )
+                  A( IP, K ) = TEMP
+   40          CONTINUE
+            END IF
+            IX = IX + INCX
+   50    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of DLASWP
+*
+      END
diff --git a/costa/native/external/lapack/dlasy2.f b/costa/native/external/lapack/dlasy2.f
new file mode 100644
index 000000000..a3c2369c3
--- /dev/null
+++ b/costa/native/external/lapack/dlasy2.f
@@ -0,0 +1,382 @@
+      SUBROUTINE DLASY2( LTRANL, LTRANR, ISGN, N1, N2, TL, LDTL, TR,
+     $                   LDTR, B, LDB, SCALE, X, LDX, XNORM, INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      LOGICAL            LTRANL, LTRANR
+      INTEGER            INFO, ISGN, LDB, LDTL, LDTR, LDX, N1, N2
+      DOUBLE PRECISION   SCALE, XNORM
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   B( LDB, * ), TL( LDTL, * ), TR( LDTR, * ),
+     $                   X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLASY2 solves for the N1 by N2 matrix X, 1 <= N1,N2 <= 2, in
+*
+*         op(TL)*X + ISGN*X*op(TR) = SCALE*B,
+*
+*  where TL is N1 by N1, TR is N2 by N2, B is N1 by N2, and ISGN = 1 or
+*  -1.  op(T) = T or T', where T' denotes the transpose of T.
+*
+*  Arguments
+*  =========
+*
+*  LTRANL  (input) LOGICAL
+*          On entry, LTRANL specifies the op(TL):
+*             = .FALSE., op(TL) = TL,
+*             = .TRUE., op(TL) = TL'.
+*
+*  LTRANR  (input) LOGICAL
+*          On entry, LTRANR specifies the op(TR):
+*            = .FALSE., op(TR) = TR,
+*            = .TRUE., op(TR) = TR'.
+*
+*  ISGN    (input) INTEGER
+*          On entry, ISGN specifies the sign of the equation
+*          as described before. ISGN may only be 1 or -1.
+*
+*  N1      (input) INTEGER
+*          On entry, N1 specifies the order of matrix TL.
+*          N1 may only be 0, 1 or 2.
+*
+*  N2      (input) INTEGER
+*          On entry, N2 specifies the order of matrix TR.
+*          N2 may only be 0, 1 or 2.
+*
+*  TL      (input) DOUBLE PRECISION array, dimension (LDTL,2)
+*          On entry, TL contains an N1 by N1 matrix.
+*
+*  LDTL    (input) INTEGER
+*          The leading dimension of the matrix TL. LDTL >= max(1,N1).
+*
+*  TR      (input) DOUBLE PRECISION array, dimension (LDTR,2)
+*          On entry, TR contains an N2 by N2 matrix.
+*
+*  LDTR    (input) INTEGER
+*          The leading dimension of the matrix TR. LDTR >= max(1,N2).
+*
+*  B       (input) DOUBLE PRECISION array, dimension (LDB,2)
+*          On entry, the N1 by N2 matrix B contains the right-hand
+*          side of the equation.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the matrix B. LDB >= max(1,N1).
+*
+*  SCALE   (output) DOUBLE PRECISION
+*          On exit, SCALE contains the scale factor. SCALE is chosen
+*          less than or equal to 1 to prevent the solution overflowing.
+*
+*  X       (output) DOUBLE PRECISION array, dimension (LDX,2)
+*          On exit, X contains the N1 by N2 solution.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the matrix X. LDX >= max(1,N1).
+*
+*  XNORM   (output) DOUBLE PRECISION
+*          On exit, XNORM is the infinity-norm of the solution.
+*
+*  INFO    (output) INTEGER
+*          On exit, INFO is set to
+*             0: successful exit.
+*             1: TL and TR have too close eigenvalues, so TL or
+*                TR is perturbed to get a nonsingular equation.
+*          NOTE: In the interests of speed, this routine does not
+*                check the inputs for errors.
+*
+* =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+      DOUBLE PRECISION   TWO, HALF, EIGHT
+      PARAMETER          ( TWO = 2.0D+0, HALF = 0.5D+0, EIGHT = 8.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            BSWAP, XSWAP
+      INTEGER            I, IP, IPIV, IPSV, J, JP, JPSV, K
+      DOUBLE PRECISION   BET, EPS, GAM, L21, SGN, SMIN, SMLNUM, TAU1,
+     $                   TEMP, U11, U12, U22, XMAX
+*     ..
+*     .. Local Arrays ..
+      LOGICAL            BSWPIV( 4 ), XSWPIV( 4 )
+      INTEGER            JPIV( 4 ), LOCL21( 4 ), LOCU12( 4 ),
+     $                   LOCU22( 4 )
+      DOUBLE PRECISION   BTMP( 4 ), T16( 4, 4 ), TMP( 4 ), X2( 2 )
+*     ..
+*     .. External Functions ..
+      INTEGER            IDAMAX
+      DOUBLE PRECISION   DLAMCH
+      EXTERNAL           IDAMAX, DLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DCOPY, DSWAP
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX
+*     ..
+*     .. Data statements ..
+      DATA               LOCU12 / 3, 4, 1, 2 / , LOCL21 / 2, 1, 4, 3 / ,
+     $                   LOCU22 / 4, 3, 2, 1 /
+      DATA               XSWPIV / .FALSE., .FALSE., .TRUE., .TRUE. /
+      DATA               BSWPIV / .FALSE., .TRUE., .FALSE., .TRUE. /
+*     ..
+*     .. Executable Statements ..
+*
+*     Do not check the input parameters for errors
+*
+      INFO = 0
+*
+*     Quick return if possible
+*
+      IF( N1.EQ.0 .OR. N2.EQ.0 )
+     $   RETURN
+*
+*     Set constants to control overflow
+*
+      EPS = DLAMCH( 'P' )
+      SMLNUM = DLAMCH( 'S' ) / EPS
+      SGN = ISGN
+*
+      K = N1 + N1 + N2 - 2
+      GO TO ( 10, 20, 30, 50 )K
+*
+*     1 by 1: TL11*X + SGN*X*TR11 = B11
+*
+   10 CONTINUE
+      TAU1 = TL( 1, 1 ) + SGN*TR( 1, 1 )
+      BET = ABS( TAU1 )
+      IF( BET.LE.SMLNUM ) THEN
+         TAU1 = SMLNUM
+         BET = SMLNUM
+         INFO = 1
+      END IF
+*
+      SCALE = ONE
+      GAM = ABS( B( 1, 1 ) )
+      IF( SMLNUM*GAM.GT.BET )
+     $   SCALE = ONE / GAM
+*
+      X( 1, 1 ) = ( B( 1, 1 )*SCALE ) / TAU1
+      XNORM = ABS( X( 1, 1 ) )
+      RETURN
+*
+*     1 by 2:
+*     TL11*[X11 X12] + ISGN*[X11 X12]*op[TR11 TR12]  = [B11 B12]
+*                                       [TR21 TR22]
+*
+   20 CONTINUE
+*
+      SMIN = MAX( EPS*MAX( ABS( TL( 1, 1 ) ), ABS( TR( 1, 1 ) ),
+     $       ABS( TR( 1, 2 ) ), ABS( TR( 2, 1 ) ), ABS( TR( 2, 2 ) ) ),
+     $       SMLNUM )
+      TMP( 1 ) = TL( 1, 1 ) + SGN*TR( 1, 1 )
+      TMP( 4 ) = TL( 1, 1 ) + SGN*TR( 2, 2 )
+      IF( LTRANR ) THEN
+         TMP( 2 ) = SGN*TR( 2, 1 )
+         TMP( 3 ) = SGN*TR( 1, 2 )
+      ELSE
+         TMP( 2 ) = SGN*TR( 1, 2 )
+         TMP( 3 ) = SGN*TR( 2, 1 )
+      END IF
+      BTMP( 1 ) = B( 1, 1 )
+      BTMP( 2 ) = B( 1, 2 )
+      GO TO 40
+*
+*     2 by 1:
+*          op[TL11 TL12]*[X11] + ISGN* [X11]*TR11  = [B11]
+*            [TL21 TL22] [X21]         [X21]         [B21]
+*
+   30 CONTINUE
+      SMIN = MAX( EPS*MAX( ABS( TR( 1, 1 ) ), ABS( TL( 1, 1 ) ),
+     $       ABS( TL( 1, 2 ) ), ABS( TL( 2, 1 ) ), ABS( TL( 2, 2 ) ) ),
+     $       SMLNUM )
+      TMP( 1 ) = TL( 1, 1 ) + SGN*TR( 1, 1 )
+      TMP( 4 ) = TL( 2, 2 ) + SGN*TR( 1, 1 )
+      IF( LTRANL ) THEN
+         TMP( 2 ) = TL( 1, 2 )
+         TMP( 3 ) = TL( 2, 1 )
+      ELSE
+         TMP( 2 ) = TL( 2, 1 )
+         TMP( 3 ) = TL( 1, 2 )
+      END IF
+      BTMP( 1 ) = B( 1, 1 )
+      BTMP( 2 ) = B( 2, 1 )
+   40 CONTINUE
+*
+*     Solve 2 by 2 system using complete pivoting.
+*     Set pivots less than SMIN to SMIN.
+*
+      IPIV = IDAMAX( 4, TMP, 1 )
+      U11 = TMP( IPIV )
+      IF( ABS( U11 ).LE.SMIN ) THEN
+         INFO = 1
+         U11 = SMIN
+      END IF
+      U12 = TMP( LOCU12( IPIV ) )
+      L21 = TMP( LOCL21( IPIV ) ) / U11
+      U22 = TMP( LOCU22( IPIV ) ) - U12*L21
+      XSWAP = XSWPIV( IPIV )
+      BSWAP = BSWPIV( IPIV )
+      IF( ABS( U22 ).LE.SMIN ) THEN
+         INFO = 1
+         U22 = SMIN
+      END IF
+      IF( BSWAP ) THEN
+         TEMP = BTMP( 2 )
+         BTMP( 2 ) = BTMP( 1 ) - L21*TEMP
+         BTMP( 1 ) = TEMP
+      ELSE
+         BTMP( 2 ) = BTMP( 2 ) - L21*BTMP( 1 )
+      END IF
+      SCALE = ONE
+      IF( ( TWO*SMLNUM )*ABS( BTMP( 2 ) ).GT.ABS( U22 ) .OR.
+     $    ( TWO*SMLNUM )*ABS( BTMP( 1 ) ).GT.ABS( U11 ) ) THEN
+         SCALE = HALF / MAX( ABS( BTMP( 1 ) ), ABS( BTMP( 2 ) ) )
+         BTMP( 1 ) = BTMP( 1 )*SCALE
+         BTMP( 2 ) = BTMP( 2 )*SCALE
+      END IF
+      X2( 2 ) = BTMP( 2 ) / U22
+      X2( 1 ) = BTMP( 1 ) / U11 - ( U12 / U11 )*X2( 2 )
+      IF( XSWAP ) THEN
+         TEMP = X2( 2 )
+         X2( 2 ) = X2( 1 )
+         X2( 1 ) = TEMP
+      END IF
+      X( 1, 1 ) = X2( 1 )
+      IF( N1.EQ.1 ) THEN
+         X( 1, 2 ) = X2( 2 )
+         XNORM = ABS( X( 1, 1 ) ) + ABS( X( 1, 2 ) )
+      ELSE
+         X( 2, 1 ) = X2( 2 )
+         XNORM = MAX( ABS( X( 1, 1 ) ), ABS( X( 2, 1 ) ) )
+      END IF
+      RETURN
+*
+*     2 by 2:
+*     op[TL11 TL12]*[X11 X12] +ISGN* [X11 X12]*op[TR11 TR12] = [B11 B12]
+*       [TL21 TL22] [X21 X22]        [X21 X22]   [TR21 TR22]   [B21 B22]
+*
+*     Solve equivalent 4 by 4 system using complete pivoting.
+*     Set pivots less than SMIN to SMIN.
+*
+   50 CONTINUE
+      SMIN = MAX( ABS( TR( 1, 1 ) ), ABS( TR( 1, 2 ) ),
+     $       ABS( TR( 2, 1 ) ), ABS( TR( 2, 2 ) ) )
+      SMIN = MAX( SMIN, ABS( TL( 1, 1 ) ), ABS( TL( 1, 2 ) ),
+     $       ABS( TL( 2, 1 ) ), ABS( TL( 2, 2 ) ) )
+      SMIN = MAX( EPS*SMIN, SMLNUM )
+      BTMP( 1 ) = ZERO
+      CALL DCOPY( 16, BTMP, 0, T16, 1 )
+      T16( 1, 1 ) = TL( 1, 1 ) + SGN*TR( 1, 1 )
+      T16( 2, 2 ) = TL( 2, 2 ) + SGN*TR( 1, 1 )
+      T16( 3, 3 ) = TL( 1, 1 ) + SGN*TR( 2, 2 )
+      T16( 4, 4 ) = TL( 2, 2 ) + SGN*TR( 2, 2 )
+      IF( LTRANL ) THEN
+         T16( 1, 2 ) = TL( 2, 1 )
+         T16( 2, 1 ) = TL( 1, 2 )
+         T16( 3, 4 ) = TL( 2, 1 )
+         T16( 4, 3 ) = TL( 1, 2 )
+      ELSE
+         T16( 1, 2 ) = TL( 1, 2 )
+         T16( 2, 1 ) = TL( 2, 1 )
+         T16( 3, 4 ) = TL( 1, 2 )
+         T16( 4, 3 ) = TL( 2, 1 )
+      END IF
+      IF( LTRANR ) THEN
+         T16( 1, 3 ) = SGN*TR( 1, 2 )
+         T16( 2, 4 ) = SGN*TR( 1, 2 )
+         T16( 3, 1 ) = SGN*TR( 2, 1 )
+         T16( 4, 2 ) = SGN*TR( 2, 1 )
+      ELSE
+         T16( 1, 3 ) = SGN*TR( 2, 1 )
+         T16( 2, 4 ) = SGN*TR( 2, 1 )
+         T16( 3, 1 ) = SGN*TR( 1, 2 )
+         T16( 4, 2 ) = SGN*TR( 1, 2 )
+      END IF
+      BTMP( 1 ) = B( 1, 1 )
+      BTMP( 2 ) = B( 2, 1 )
+      BTMP( 3 ) = B( 1, 2 )
+      BTMP( 4 ) = B( 2, 2 )
+*
+*     Perform elimination
+*
+      DO 100 I = 1, 3
+         XMAX = ZERO
+         DO 70 IP = I, 4
+            DO 60 JP = I, 4
+               IF( ABS( T16( IP, JP ) ).GE.XMAX ) THEN
+                  XMAX = ABS( T16( IP, JP ) )
+                  IPSV = IP
+                  JPSV = JP
+               END IF
+   60       CONTINUE
+   70    CONTINUE
+         IF( IPSV.NE.I ) THEN
+            CALL DSWAP( 4, T16( IPSV, 1 ), 4, T16( I, 1 ), 4 )
+            TEMP = BTMP( I )
+            BTMP( I ) = BTMP( IPSV )
+            BTMP( IPSV ) = TEMP
+         END IF
+         IF( JPSV.NE.I )
+     $      CALL DSWAP( 4, T16( 1, JPSV ), 1, T16( 1, I ), 1 )
+         JPIV( I ) = JPSV
+         IF( ABS( T16( I, I ) ).LT.SMIN ) THEN
+            INFO = 1
+            T16( I, I ) = SMIN
+         END IF
+         DO 90 J = I + 1, 4
+            T16( J, I ) = T16( J, I ) / T16( I, I )
+            BTMP( J ) = BTMP( J ) - T16( J, I )*BTMP( I )
+            DO 80 K = I + 1, 4
+               T16( J, K ) = T16( J, K ) - T16( J, I )*T16( I, K )
+   80       CONTINUE
+   90    CONTINUE
+  100 CONTINUE
+      IF( ABS( T16( 4, 4 ) ).LT.SMIN )
+     $   T16( 4, 4 ) = SMIN
+      SCALE = ONE
+      IF( ( EIGHT*SMLNUM )*ABS( BTMP( 1 ) ).GT.ABS( T16( 1, 1 ) ) .OR.
+     $    ( EIGHT*SMLNUM )*ABS( BTMP( 2 ) ).GT.ABS( T16( 2, 2 ) ) .OR.
+     $    ( EIGHT*SMLNUM )*ABS( BTMP( 3 ) ).GT.ABS( T16( 3, 3 ) ) .OR.
+     $    ( EIGHT*SMLNUM )*ABS( BTMP( 4 ) ).GT.ABS( T16( 4, 4 ) ) ) THEN
+         SCALE = ( ONE / EIGHT ) / MAX( ABS( BTMP( 1 ) ),
+     $           ABS( BTMP( 2 ) ), ABS( BTMP( 3 ) ), ABS( BTMP( 4 ) ) )
+         BTMP( 1 ) = BTMP( 1 )*SCALE
+         BTMP( 2 ) = BTMP( 2 )*SCALE
+         BTMP( 3 ) = BTMP( 3 )*SCALE
+         BTMP( 4 ) = BTMP( 4 )*SCALE
+      END IF
+      DO 120 I = 1, 4
+         K = 5 - I
+         TEMP = ONE / T16( K, K )
+         TMP( K ) = BTMP( K )*TEMP
+         DO 110 J = K + 1, 4
+            TMP( K ) = TMP( K ) - ( TEMP*T16( K, J ) )*TMP( J )
+  110    CONTINUE
+  120 CONTINUE
+      DO 130 I = 1, 3
+         IF( JPIV( 4-I ).NE.4-I ) THEN
+            TEMP = TMP( 4-I )
+            TMP( 4-I ) = TMP( JPIV( 4-I ) )
+            TMP( JPIV( 4-I ) ) = TEMP
+         END IF
+  130 CONTINUE
+      X( 1, 1 ) = TMP( 1 )
+      X( 2, 1 ) = TMP( 2 )
+      X( 1, 2 ) = TMP( 3 )
+      X( 2, 2 ) = TMP( 4 )
+      XNORM = MAX( ABS( TMP( 1 ) )+ABS( TMP( 3 ) ),
+     $        ABS( TMP( 2 ) )+ABS( TMP( 4 ) ) )
+      RETURN
+*
+*     End of DLASY2
+*
+      END
diff --git a/costa/native/external/lapack/dlasyf.f b/costa/native/external/lapack/dlasyf.f
new file mode 100644
index 000000000..e2dcb3a41
--- /dev/null
+++ b/costa/native/external/lapack/dlasyf.f
@@ -0,0 +1,588 @@
+      SUBROUTINE DLASYF( UPLO, N, NB, KB, A, LDA, IPIV, W, LDW, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, KB, LDA, LDW, N, NB
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      DOUBLE PRECISION   A( LDA, * ), W( LDW, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLASYF computes a partial factorization of a real symmetric matrix A
+*  using the Bunch-Kaufman diagonal pivoting method. The partial
+*  factorization has the form:
+*
+*  A  =  ( I  U12 ) ( A11  0  ) (  I    0   )  if UPLO = 'U', or:
+*        ( 0  U22 ) (  0   D  ) ( U12' U22' )
+*
+*  A  =  ( L11  0 ) (  D   0  ) ( L11' L21' )  if UPLO = 'L'
+*        ( L21  I ) (  0  A22 ) (  0    I   )
+*
+*  where the order of D is at most NB. The actual order is returned in
+*  the argument KB, and is either NB or NB-1, or N if N <= NB.
+*
+*  DLASYF is an auxiliary routine called by DSYTRF. It uses blocked code
+*  (calling Level 3 BLAS) to update the submatrix A11 (if UPLO = 'U') or
+*  A22 (if UPLO = 'L').
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the upper or lower triangular part of the
+*          symmetric matrix A is stored:
+*          = 'U':  Upper triangular
+*          = 'L':  Lower triangular
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NB      (input) INTEGER
+*          The maximum number of columns of the matrix A that should be
+*          factored.  NB should be at least 2 to allow for 2-by-2 pivot
+*          blocks.
+*
+*  KB      (output) INTEGER
+*          The number of columns of A that were actually factored.
+*          KB is either NB-1 or NB, or N if N <= NB.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+*          On entry, the symmetric matrix A.  If UPLO = 'U', the leading
+*          n-by-n upper triangular part of A contains the upper
+*          triangular part of the matrix A, and the strictly lower
+*          triangular part of A is not referenced.  If UPLO = 'L', the
+*          leading n-by-n lower triangular part of A contains the lower
+*          triangular part of the matrix A, and the strictly upper
+*          triangular part of A is not referenced.
+*          On exit, A contains details of the partial factorization.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  IPIV    (output) INTEGER array, dimension (N)
+*          Details of the interchanges and the block structure of D.
+*          If UPLO = 'U', only the last KB elements of IPIV are set;
+*          if UPLO = 'L', only the first KB elements are set.
+*
+*          If IPIV(k) > 0, then rows and columns k and IPIV(k) were
+*          interchanged and D(k,k) is a 1-by-1 diagonal block.
+*          If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and
+*          columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k)
+*          is a 2-by-2 diagonal block.  If UPLO = 'L' and IPIV(k) =
+*          IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were
+*          interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block.
+*
+*  W       (workspace) DOUBLE PRECISION array, dimension (LDW,NB)
+*
+*  LDW     (input) INTEGER
+*          The leading dimension of the array W.  LDW >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          > 0: if INFO = k, D(k,k) is exactly zero.  The factorization
+*               has been completed, but the block diagonal matrix D is
+*               exactly singular.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+      DOUBLE PRECISION   EIGHT, SEVTEN
+      PARAMETER          ( EIGHT = 8.0D+0, SEVTEN = 17.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            IMAX, J, JB, JJ, JMAX, JP, K, KK, KKW, KP,
+     $                   KSTEP, KW
+      DOUBLE PRECISION   ABSAKK, ALPHA, COLMAX, D11, D21, D22, R1,
+     $                   ROWMAX, T
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            IDAMAX
+      EXTERNAL           LSAME, IDAMAX
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DCOPY, DGEMM, DGEMV, DSCAL, DSWAP
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+*
+*     Initialize ALPHA for use in choosing pivot block size.
+*
+      ALPHA = ( ONE+SQRT( SEVTEN ) ) / EIGHT
+*
+      IF( LSAME( UPLO, 'U' ) ) THEN
+*
+*        Factorize the trailing columns of A using the upper triangle
+*        of A and working backwards, and compute the matrix W = U12*D
+*        for use in updating A11
+*
+*        K is the main loop index, decreasing from N in steps of 1 or 2
+*
+*        KW is the column of W which corresponds to column K of A
+*
+         K = N
+   10    CONTINUE
+         KW = NB + K - N
+*
+*        Exit from loop
+*
+         IF( ( K.LE.N-NB+1 .AND. NB.LT.N ) .OR. K.LT.1 )
+     $      GO TO 30
+*
+*        Copy column K of A to column KW of W and update it
+*
+         CALL DCOPY( K, A( 1, K ), 1, W( 1, KW ), 1 )
+         IF( K.LT.N )
+     $      CALL DGEMV( 'No transpose', K, N-K, -ONE, A( 1, K+1 ), LDA,
+     $                  W( K, KW+1 ), LDW, ONE, W( 1, KW ), 1 )
+*
+         KSTEP = 1
+*
+*        Determine rows and columns to be interchanged and whether
+*        a 1-by-1 or 2-by-2 pivot block will be used
+*
+         ABSAKK = ABS( W( K, KW ) )
+*
+*        IMAX is the row-index of the largest off-diagonal element in
+*        column K, and COLMAX is its absolute value
+*
+         IF( K.GT.1 ) THEN
+            IMAX = IDAMAX( K-1, W( 1, KW ), 1 )
+            COLMAX = ABS( W( IMAX, KW ) )
+         ELSE
+            COLMAX = ZERO
+         END IF
+*
+         IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN
+*
+*           Column K is zero: set INFO and continue
+*
+            IF( INFO.EQ.0 )
+     $         INFO = K
+            KP = K
+         ELSE
+            IF( ABSAKK.GE.ALPHA*COLMAX ) THEN
+*
+*              no interchange, use 1-by-1 pivot block
+*
+               KP = K
+            ELSE
+*
+*              Copy column IMAX to column KW-1 of W and update it
+*
+               CALL DCOPY( IMAX, A( 1, IMAX ), 1, W( 1, KW-1 ), 1 )
+               CALL DCOPY( K-IMAX, A( IMAX, IMAX+1 ), LDA,
+     $                     W( IMAX+1, KW-1 ), 1 )
+               IF( K.LT.N )
+     $            CALL DGEMV( 'No transpose', K, N-K, -ONE, A( 1, K+1 ),
+     $                        LDA, W( IMAX, KW+1 ), LDW, ONE,
+     $                        W( 1, KW-1 ), 1 )
+*
+*              JMAX is the column-index of the largest off-diagonal
+*              element in row IMAX, and ROWMAX is its absolute value
+*
+               JMAX = IMAX + IDAMAX( K-IMAX, W( IMAX+1, KW-1 ), 1 )
+               ROWMAX = ABS( W( JMAX, KW-1 ) )
+               IF( IMAX.GT.1 ) THEN
+                  JMAX = IDAMAX( IMAX-1, W( 1, KW-1 ), 1 )
+                  ROWMAX = MAX( ROWMAX, ABS( W( JMAX, KW-1 ) ) )
+               END IF
+*
+               IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN
+*
+*                 no interchange, use 1-by-1 pivot block
+*
+                  KP = K
+               ELSE IF( ABS( W( IMAX, KW-1 ) ).GE.ALPHA*ROWMAX ) THEN
+*
+*                 interchange rows and columns K and IMAX, use 1-by-1
+*                 pivot block
+*
+                  KP = IMAX
+*
+*                 copy column KW-1 of W to column KW
+*
+                  CALL DCOPY( K, W( 1, KW-1 ), 1, W( 1, KW ), 1 )
+               ELSE
+*
+*                 interchange rows and columns K-1 and IMAX, use 2-by-2
+*                 pivot block
+*
+                  KP = IMAX
+                  KSTEP = 2
+               END IF
+            END IF
+*
+            KK = K - KSTEP + 1
+            KKW = NB + KK - N
+*
+*           Updated column KP is already stored in column KKW of W
+*
+            IF( KP.NE.KK ) THEN
+*
+*              Copy non-updated column KK to column KP
+*
+               A( KP, K ) = A( KK, K )
+               CALL DCOPY( K-1-KP, A( KP+1, KK ), 1, A( KP, KP+1 ),
+     $                     LDA )
+               CALL DCOPY( KP, A( 1, KK ), 1, A( 1, KP ), 1 )
+*
+*              Interchange rows KK and KP in last KK columns of A and W
+*
+               CALL DSWAP( N-KK+1, A( KK, KK ), LDA, A( KP, KK ), LDA )
+               CALL DSWAP( N-KK+1, W( KK, KKW ), LDW, W( KP, KKW ),
+     $                     LDW )
+            END IF
+*
+            IF( KSTEP.EQ.1 ) THEN
+*
+*              1-by-1 pivot block D(k): column KW of W now holds
+*
+*              W(k) = U(k)*D(k)
+*
+*              where U(k) is the k-th column of U
+*
+*              Store U(k) in column k of A
+*
+               CALL DCOPY( K, W( 1, KW ), 1, A( 1, K ), 1 )
+               R1 = ONE / A( K, K )
+               CALL DSCAL( K-1, R1, A( 1, K ), 1 )
+            ELSE
+*
+*              2-by-2 pivot block D(k): columns KW and KW-1 of W now
+*              hold
+*
+*              ( W(k-1) W(k) ) = ( U(k-1) U(k) )*D(k)
+*
+*              where U(k) and U(k-1) are the k-th and (k-1)-th columns
+*              of U
+*
+               IF( K.GT.2 ) THEN
+*
+*                 Store U(k) and U(k-1) in columns k and k-1 of A
+*
+                  D21 = W( K-1, KW )
+                  D11 = W( K, KW ) / D21
+                  D22 = W( K-1, KW-1 ) / D21
+                  T = ONE / ( D11*D22-ONE )
+                  D21 = T / D21
+                  DO 20 J = 1, K - 2
+                     A( J, K-1 ) = D21*( D11*W( J, KW-1 )-W( J, KW ) )
+                     A( J, K ) = D21*( D22*W( J, KW )-W( J, KW-1 ) )
+   20             CONTINUE
+               END IF
+*
+*              Copy D(k) to A
+*
+               A( K-1, K-1 ) = W( K-1, KW-1 )
+               A( K-1, K ) = W( K-1, KW )
+               A( K, K ) = W( K, KW )
+            END IF
+         END IF
+*
+*        Store details of the interchanges in IPIV
+*
+         IF( KSTEP.EQ.1 ) THEN
+            IPIV( K ) = KP
+         ELSE
+            IPIV( K ) = -KP
+            IPIV( K-1 ) = -KP
+         END IF
+*
+*        Decrease K and return to the start of the main loop
+*
+         K = K - KSTEP
+         GO TO 10
+*
+   30    CONTINUE
+*
+*        Update the upper triangle of A11 (= A(1:k,1:k)) as
+*
+*        A11 := A11 - U12*D*U12' = A11 - U12*W'
+*
+*        computing blocks of NB columns at a time
+*
+         DO 50 J = ( ( K-1 ) / NB )*NB + 1, 1, -NB
+            JB = MIN( NB, K-J+1 )
+*
+*           Update the upper triangle of the diagonal block
+*
+            DO 40 JJ = J, J + JB - 1
+               CALL DGEMV( 'No transpose', JJ-J+1, N-K, -ONE,
+     $                     A( J, K+1 ), LDA, W( JJ, KW+1 ), LDW, ONE,
+     $                     A( J, JJ ), 1 )
+   40       CONTINUE
+*
+*           Update the rectangular superdiagonal block
+*
+            CALL DGEMM( 'No transpose', 'Transpose', J-1, JB, N-K, -ONE,
+     $                  A( 1, K+1 ), LDA, W( J, KW+1 ), LDW, ONE,
+     $                  A( 1, J ), LDA )
+   50    CONTINUE
+*
+*        Put U12 in standard form by partially undoing the interchanges
+*        in columns k+1:n
+*
+         J = K + 1
+   60    CONTINUE
+         JJ = J
+         JP = IPIV( J )
+         IF( JP.LT.0 ) THEN
+            JP = -JP
+            J = J + 1
+         END IF
+         J = J + 1
+         IF( JP.NE.JJ .AND. J.LE.N )
+     $      CALL DSWAP( N-J+1, A( JP, J ), LDA, A( JJ, J ), LDA )
+         IF( J.LE.N )
+     $      GO TO 60
+*
+*        Set KB to the number of columns factorized
+*
+         KB = N - K
+*
+      ELSE
+*
+*        Factorize the leading columns of A using the lower triangle
+*        of A and working forwards, and compute the matrix W = L21*D
+*        for use in updating A22
+*
+*        K is the main loop index, increasing from 1 in steps of 1 or 2
+*
+         K = 1
+   70    CONTINUE
+*
+*        Exit from loop
+*
+         IF( ( K.GE.NB .AND. NB.LT.N ) .OR. K.GT.N )
+     $      GO TO 90
+*
+*        Copy column K of A to column K of W and update it
+*
+         CALL DCOPY( N-K+1, A( K, K ), 1, W( K, K ), 1 )
+         CALL DGEMV( 'No transpose', N-K+1, K-1, -ONE, A( K, 1 ), LDA,
+     $               W( K, 1 ), LDW, ONE, W( K, K ), 1 )
+*
+         KSTEP = 1
+*
+*        Determine rows and columns to be interchanged and whether
+*        a 1-by-1 or 2-by-2 pivot block will be used
+*
+         ABSAKK = ABS( W( K, K ) )
+*
+*        IMAX is the row-index of the largest off-diagonal element in
+*        column K, and COLMAX is its absolute value
+*
+         IF( K.LT.N ) THEN
+            IMAX = K + IDAMAX( N-K, W( K+1, K ), 1 )
+            COLMAX = ABS( W( IMAX, K ) )
+         ELSE
+            COLMAX = ZERO
+         END IF
+*
+         IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN
+*
+*           Column K is zero: set INFO and continue
+*
+            IF( INFO.EQ.0 )
+     $         INFO = K
+            KP = K
+         ELSE
+            IF( ABSAKK.GE.ALPHA*COLMAX ) THEN
+*
+*              no interchange, use 1-by-1 pivot block
+*
+               KP = K
+            ELSE
+*
+*              Copy column IMAX to column K+1 of W and update it
+*
+               CALL DCOPY( IMAX-K, A( IMAX, K ), LDA, W( K, K+1 ), 1 )
+               CALL DCOPY( N-IMAX+1, A( IMAX, IMAX ), 1, W( IMAX, K+1 ),
+     $                     1 )
+               CALL DGEMV( 'No transpose', N-K+1, K-1, -ONE, A( K, 1 ),
+     $                     LDA, W( IMAX, 1 ), LDW, ONE, W( K, K+1 ), 1 )
+*
+*              JMAX is the column-index of the largest off-diagonal
+*              element in row IMAX, and ROWMAX is its absolute value
+*
+               JMAX = K - 1 + IDAMAX( IMAX-K, W( K, K+1 ), 1 )
+               ROWMAX = ABS( W( JMAX, K+1 ) )
+               IF( IMAX.LT.N ) THEN
+                  JMAX = IMAX + IDAMAX( N-IMAX, W( IMAX+1, K+1 ), 1 )
+                  ROWMAX = MAX( ROWMAX, ABS( W( JMAX, K+1 ) ) )
+               END IF
+*
+               IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN
+*
+*                 no interchange, use 1-by-1 pivot block
+*
+                  KP = K
+               ELSE IF( ABS( W( IMAX, K+1 ) ).GE.ALPHA*ROWMAX ) THEN
+*
+*                 interchange rows and columns K and IMAX, use 1-by-1
+*                 pivot block
+*
+                  KP = IMAX
+*
+*                 copy column K+1 of W to column K
+*
+                  CALL DCOPY( N-K+1, W( K, K+1 ), 1, W( K, K ), 1 )
+               ELSE
+*
+*                 interchange rows and columns K+1 and IMAX, use 2-by-2
+*                 pivot block
+*
+                  KP = IMAX
+                  KSTEP = 2
+               END IF
+            END IF
+*
+            KK = K + KSTEP - 1
+*
+*           Updated column KP is already stored in column KK of W
+*
+            IF( KP.NE.KK ) THEN
+*
+*              Copy non-updated column KK to column KP
+*
+               A( KP, K ) = A( KK, K )
+               CALL DCOPY( KP-K-1, A( K+1, KK ), 1, A( KP, K+1 ), LDA )
+               CALL DCOPY( N-KP+1, A( KP, KK ), 1, A( KP, KP ), 1 )
+*
+*              Interchange rows KK and KP in first KK columns of A and W
+*
+               CALL DSWAP( KK, A( KK, 1 ), LDA, A( KP, 1 ), LDA )
+               CALL DSWAP( KK, W( KK, 1 ), LDW, W( KP, 1 ), LDW )
+            END IF
+*
+            IF( KSTEP.EQ.1 ) THEN
+*
+*              1-by-1 pivot block D(k): column k of W now holds
+*
+*              W(k) = L(k)*D(k)
+*
+*              where L(k) is the k-th column of L
+*
+*              Store L(k) in column k of A
+*
+               CALL DCOPY( N-K+1, W( K, K ), 1, A( K, K ), 1 )
+               IF( K.LT.N ) THEN
+                  R1 = ONE / A( K, K )
+                  CALL DSCAL( N-K, R1, A( K+1, K ), 1 )
+               END IF
+            ELSE
+*
+*              2-by-2 pivot block D(k): columns k and k+1 of W now hold
+*
+*              ( W(k) W(k+1) ) = ( L(k) L(k+1) )*D(k)
+*
+*              where L(k) and L(k+1) are the k-th and (k+1)-th columns
+*              of L
+*
+               IF( K.LT.N-1 ) THEN
+*
+*                 Store L(k) and L(k+1) in columns k and k+1 of A
+*
+                  D21 = W( K+1, K )
+                  D11 = W( K+1, K+1 ) / D21
+                  D22 = W( K, K ) / D21
+                  T = ONE / ( D11*D22-ONE )
+                  D21 = T / D21
+                  DO 80 J = K + 2, N
+                     A( J, K ) = D21*( D11*W( J, K )-W( J, K+1 ) )
+                     A( J, K+1 ) = D21*( D22*W( J, K+1 )-W( J, K ) )
+   80             CONTINUE
+               END IF
+*
+*              Copy D(k) to A
+*
+               A( K, K ) = W( K, K )
+               A( K+1, K ) = W( K+1, K )
+               A( K+1, K+1 ) = W( K+1, K+1 )
+            END IF
+         END IF
+*
+*        Store details of the interchanges in IPIV
+*
+         IF( KSTEP.EQ.1 ) THEN
+            IPIV( K ) = KP
+         ELSE
+            IPIV( K ) = -KP
+            IPIV( K+1 ) = -KP
+         END IF
+*
+*        Increase K and return to the start of the main loop
+*
+         K = K + KSTEP
+         GO TO 70
+*
+   90    CONTINUE
+*
+*        Update the lower triangle of A22 (= A(k:n,k:n)) as
+*
+*        A22 := A22 - L21*D*L21' = A22 - L21*W'
+*
+*        computing blocks of NB columns at a time
+*
+         DO 110 J = K, N, NB
+            JB = MIN( NB, N-J+1 )
+*
+*           Update the lower triangle of the diagonal block
+*
+            DO 100 JJ = J, J + JB - 1
+               CALL DGEMV( 'No transpose', J+JB-JJ, K-1, -ONE,
+     $                     A( JJ, 1 ), LDA, W( JJ, 1 ), LDW, ONE,
+     $                     A( JJ, JJ ), 1 )
+  100       CONTINUE
+*
+*           Update the rectangular subdiagonal block
+*
+            IF( J+JB.LE.N )
+     $         CALL DGEMM( 'No transpose', 'Transpose', N-J-JB+1, JB,
+     $                     K-1, -ONE, A( J+JB, 1 ), LDA, W( J, 1 ), LDW,
+     $                     ONE, A( J+JB, J ), LDA )
+  110    CONTINUE
+*
+*        Put L21 in standard form by partially undoing the interchanges
+*        in columns 1:k-1
+*
+         J = K - 1
+  120    CONTINUE
+         JJ = J
+         JP = IPIV( J )
+         IF( JP.LT.0 ) THEN
+            JP = -JP
+            J = J - 1
+         END IF
+         J = J - 1
+         IF( JP.NE.JJ .AND. J.GE.1 )
+     $      CALL DSWAP( J, A( JP, 1 ), LDA, A( JJ, 1 ), LDA )
+         IF( J.GE.1 )
+     $      GO TO 120
+*
+*        Set KB to the number of columns factorized
+*
+         KB = K - 1
+*
+      END IF
+      RETURN
+*
+*     End of DLASYF
+*
+      END
diff --git a/costa/native/external/lapack/dlatbs.f b/costa/native/external/lapack/dlatbs.f
new file mode 100644
index 000000000..fe7dd11cb
--- /dev/null
+++ b/costa/native/external/lapack/dlatbs.f
@@ -0,0 +1,724 @@
+      SUBROUTINE DLATBS( UPLO, TRANS, DIAG, NORMIN, N, KD, AB, LDAB, X,
+     $                   SCALE, CNORM, INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIAG, NORMIN, TRANS, UPLO
+      INTEGER            INFO, KD, LDAB, N
+      DOUBLE PRECISION   SCALE
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   AB( LDAB, * ), CNORM( * ), X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLATBS solves one of the triangular systems
+*
+*     A *x = s*b  or  A'*x = s*b
+*
+*  with scaling to prevent overflow, where A is an upper or lower
+*  triangular band matrix.  Here A' denotes the transpose of A, x and b
+*  are n-element vectors, and s is a scaling factor, usually less than
+*  or equal to 1, chosen so that the components of x will be less than
+*  the overflow threshold.  If the unscaled problem will not cause
+*  overflow, the Level 2 BLAS routine DTBSV is called.  If the matrix A
+*  is singular (A(j,j) = 0 for some j), then s is set to 0 and a
+*  non-trivial solution to A*x = 0 is returned.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the matrix A is upper or lower triangular.
+*          = 'U':  Upper triangular
+*          = 'L':  Lower triangular
+*
+*  TRANS   (input) CHARACTER*1
+*          Specifies the operation applied to A.
+*          = 'N':  Solve A * x = s*b  (No transpose)
+*          = 'T':  Solve A'* x = s*b  (Transpose)
+*          = 'C':  Solve A'* x = s*b  (Conjugate transpose = Transpose)
+*
+*  DIAG    (input) CHARACTER*1
+*          Specifies whether or not the matrix A is unit triangular.
+*          = 'N':  Non-unit triangular
+*          = 'U':  Unit triangular
+*
+*  NORMIN  (input) CHARACTER*1
+*          Specifies whether CNORM has been set or not.
+*          = 'Y':  CNORM contains the column norms on entry
+*          = 'N':  CNORM is not set on entry.  On exit, the norms will
+*                  be computed and stored in CNORM.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  KD      (input) INTEGER
+*          The number of subdiagonals or superdiagonals in the
+*          triangular matrix A.  KD >= 0.
+*
+*  AB      (input) DOUBLE PRECISION array, dimension (LDAB,N)
+*          The upper or lower triangular band matrix A, stored in the
+*          first KD+1 rows of the array. The j-th column of A is stored
+*          in the j-th column of the array AB as follows:
+*          if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j;
+*          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+kd).
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KD+1.
+*
+*  X       (input/output) DOUBLE PRECISION array, dimension (N)
+*          On entry, the right hand side b of the triangular system.
+*          On exit, X is overwritten by the solution vector x.
+*
+*  SCALE   (output) DOUBLE PRECISION
+*          The scaling factor s for the triangular system
+*             A * x = s*b  or  A'* x = s*b.
+*          If SCALE = 0, the matrix A is singular or badly scaled, and
+*          the vector x is an exact or approximate solution to A*x = 0.
+*
+*  CNORM   (input or output) DOUBLE PRECISION array, dimension (N)
+*
+*          If NORMIN = 'Y', CNORM is an input argument and CNORM(j)
+*          contains the norm of the off-diagonal part of the j-th column
+*          of A.  If TRANS = 'N', CNORM(j) must be greater than or equal
+*          to the infinity-norm, and if TRANS = 'T' or 'C', CNORM(j)
+*          must be greater than or equal to the 1-norm.
+*
+*          If NORMIN = 'N', CNORM is an output argument and CNORM(j)
+*          returns the 1-norm of the offdiagonal part of the j-th column
+*          of A.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -k, the k-th argument had an illegal value
+*
+*  Further Details
+*  ======= =======
+*
+*  A rough bound on x is computed; if that is less than overflow, DTBSV
+*  is called, otherwise, specific code is used which checks for possible
+*  overflow or divide-by-zero at every operation.
+*
+*  A columnwise scheme is used for solving A*x = b.  The basic algorithm
+*  if A is lower triangular is
+*
+*       x[1:n] := b[1:n]
+*       for j = 1, ..., n
+*            x(j) := x(j) / A(j,j)
+*            x[j+1:n] := x[j+1:n] - x(j) * A[j+1:n,j]
+*       end
+*
+*  Define bounds on the components of x after j iterations of the loop:
+*     M(j) = bound on x[1:j]
+*     G(j) = bound on x[j+1:n]
+*  Initially, let M(0) = 0 and G(0) = max{x(i), i=1,...,n}.
+*
+*  Then for iteration j+1 we have
+*     M(j+1) <= G(j) / | A(j+1,j+1) |
+*     G(j+1) <= G(j) + M(j+1) * | A[j+2:n,j+1] |
+*            <= G(j) ( 1 + CNORM(j+1) / | A(j+1,j+1) | )
+*
+*  where CNORM(j+1) is greater than or equal to the infinity-norm of
+*  column j+1 of A, not counting the diagonal.  Hence
+*
+*     G(j) <= G(0) product ( 1 + CNORM(i) / | A(i,i) | )
+*                  1<=i<=j
+*  and
+*
+*     |x(j)| <= ( G(0) / |A(j,j)| ) product ( 1 + CNORM(i) / |A(i,i)| )
+*                                   1<=i< j
+*
+*  Since |x(j)| <= M(j), we use the Level 2 BLAS routine DTBSV if the
+*  reciprocal of the largest M(j), j=1,..,n, is larger than
+*  max(underflow, 1/overflow).
+*
+*  The bound on x(j) is also used to determine when a step in the
+*  columnwise method can be performed without fear of overflow.  If
+*  the computed bound is greater than a large constant, x is scaled to
+*  prevent overflow, but if the bound overflows, x is set to 0, x(j) to
+*  1, and scale to 0, and a non-trivial solution to A*x = 0 is found.
+*
+*  Similarly, a row-wise scheme is used to solve A'*x = b.  The basic
+*  algorithm for A upper triangular is
+*
+*       for j = 1, ..., n
+*            x(j) := ( b(j) - A[1:j-1,j]' * x[1:j-1] ) / A(j,j)
+*       end
+*
+*  We simultaneously compute two bounds
+*       G(j) = bound on ( b(i) - A[1:i-1,i]' * x[1:i-1] ), 1<=i<=j
+*       M(j) = bound on x(i), 1<=i<=j
+*
+*  The initial values are G(0) = 0, M(0) = max{b(i), i=1,..,n}, and we
+*  add the constraint G(j) >= G(j-1) and M(j) >= M(j-1) for j >= 1.
+*  Then the bound on x(j) is
+*
+*       M(j) <= M(j-1) * ( 1 + CNORM(j) ) / | A(j,j) |
+*
+*            <= M(0) * product ( ( 1 + CNORM(i) ) / |A(i,i)| )
+*                      1<=i<=j
+*
+*  and we can safely call DTBSV if 1/M(n) and 1/G(n) are both greater
+*  than max(underflow, 1/overflow).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, HALF, ONE
+      PARAMETER          ( ZERO = 0.0D+0, HALF = 0.5D+0, ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOTRAN, NOUNIT, UPPER
+      INTEGER            I, IMAX, J, JFIRST, JINC, JLAST, JLEN, MAIND
+      DOUBLE PRECISION   BIGNUM, GROW, REC, SMLNUM, SUMJ, TJJ, TJJS,
+     $                   TMAX, TSCAL, USCAL, XBND, XJ, XMAX
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            IDAMAX
+      DOUBLE PRECISION   DASUM, DDOT, DLAMCH
+      EXTERNAL           LSAME, IDAMAX, DASUM, DDOT, DLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DAXPY, DSCAL, DTBSV, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      NOTRAN = LSAME( TRANS, 'N' )
+      NOUNIT = LSAME( DIAG, 'N' )
+*
+*     Test the input parameters.
+*
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT.
+     $         LSAME( TRANS, 'C' ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN
+         INFO = -3
+      ELSE IF( .NOT.LSAME( NORMIN, 'Y' ) .AND. .NOT.
+     $         LSAME( NORMIN, 'N' ) ) THEN
+         INFO = -4
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( KD.LT.0 ) THEN
+         INFO = -6
+      ELSE IF( LDAB.LT.KD+1 ) THEN
+         INFO = -8
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DLATBS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Determine machine dependent parameters to control overflow.
+*
+      SMLNUM = DLAMCH( 'Safe minimum' ) / DLAMCH( 'Precision' )
+      BIGNUM = ONE / SMLNUM
+      SCALE = ONE
+*
+      IF( LSAME( NORMIN, 'N' ) ) THEN
+*
+*        Compute the 1-norm of each column, not including the diagonal.
+*
+         IF( UPPER ) THEN
+*
+*           A is upper triangular.
+*
+            DO 10 J = 1, N
+               JLEN = MIN( KD, J-1 )
+               CNORM( J ) = DASUM( JLEN, AB( KD+1-JLEN, J ), 1 )
+   10       CONTINUE
+         ELSE
+*
+*           A is lower triangular.
+*
+            DO 20 J = 1, N
+               JLEN = MIN( KD, N-J )
+               IF( JLEN.GT.0 ) THEN
+                  CNORM( J ) = DASUM( JLEN, AB( 2, J ), 1 )
+               ELSE
+                  CNORM( J ) = ZERO
+               END IF
+   20       CONTINUE
+         END IF
+      END IF
+*
+*     Scale the column norms by TSCAL if the maximum element in CNORM is
+*     greater than BIGNUM.
+*
+      IMAX = IDAMAX( N, CNORM, 1 )
+      TMAX = CNORM( IMAX )
+      IF( TMAX.LE.BIGNUM ) THEN
+         TSCAL = ONE
+      ELSE
+         TSCAL = ONE / ( SMLNUM*TMAX )
+         CALL DSCAL( N, TSCAL, CNORM, 1 )
+      END IF
+*
+*     Compute a bound on the computed solution vector to see if the
+*     Level 2 BLAS routine DTBSV can be used.
+*
+      J = IDAMAX( N, X, 1 )
+      XMAX = ABS( X( J ) )
+      XBND = XMAX
+      IF( NOTRAN ) THEN
+*
+*        Compute the growth in A * x = b.
+*
+         IF( UPPER ) THEN
+            JFIRST = N
+            JLAST = 1
+            JINC = -1
+            MAIND = KD + 1
+         ELSE
+            JFIRST = 1
+            JLAST = N
+            JINC = 1
+            MAIND = 1
+         END IF
+*
+         IF( TSCAL.NE.ONE ) THEN
+            GROW = ZERO
+            GO TO 50
+         END IF
+*
+         IF( NOUNIT ) THEN
+*
+*           A is non-unit triangular.
+*
+*           Compute GROW = 1/G(j) and XBND = 1/M(j).
+*           Initially, G(0) = max{x(i), i=1,...,n}.
+*
+            GROW = ONE / MAX( XBND, SMLNUM )
+            XBND = GROW
+            DO 30 J = JFIRST, JLAST, JINC
+*
+*              Exit the loop if the growth factor is too small.
+*
+               IF( GROW.LE.SMLNUM )
+     $            GO TO 50
+*
+*              M(j) = G(j-1) / abs(A(j,j))
+*
+               TJJ = ABS( AB( MAIND, J ) )
+               XBND = MIN( XBND, MIN( ONE, TJJ )*GROW )
+               IF( TJJ+CNORM( J ).GE.SMLNUM ) THEN
+*
+*                 G(j) = G(j-1)*( 1 + CNORM(j) / abs(A(j,j)) )
+*
+                  GROW = GROW*( TJJ / ( TJJ+CNORM( J ) ) )
+               ELSE
+*
+*                 G(j) could overflow, set GROW to 0.
+*
+                  GROW = ZERO
+               END IF
+   30       CONTINUE
+            GROW = XBND
+         ELSE
+*
+*           A is unit triangular.
+*
+*           Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}.
+*
+            GROW = MIN( ONE, ONE / MAX( XBND, SMLNUM ) )
+            DO 40 J = JFIRST, JLAST, JINC
+*
+*              Exit the loop if the growth factor is too small.
+*
+               IF( GROW.LE.SMLNUM )
+     $            GO TO 50
+*
+*              G(j) = G(j-1)*( 1 + CNORM(j) )
+*
+               GROW = GROW*( ONE / ( ONE+CNORM( J ) ) )
+   40       CONTINUE
+         END IF
+   50    CONTINUE
+*
+      ELSE
+*
+*        Compute the growth in A' * x = b.
+*
+         IF( UPPER ) THEN
+            JFIRST = 1
+            JLAST = N
+            JINC = 1
+            MAIND = KD + 1
+         ELSE
+            JFIRST = N
+            JLAST = 1
+            JINC = -1
+            MAIND = 1
+         END IF
+*
+         IF( TSCAL.NE.ONE ) THEN
+            GROW = ZERO
+            GO TO 80
+         END IF
+*
+         IF( NOUNIT ) THEN
+*
+*           A is non-unit triangular.
+*
+*           Compute GROW = 1/G(j) and XBND = 1/M(j).
+*           Initially, M(0) = max{x(i), i=1,...,n}.
+*
+            GROW = ONE / MAX( XBND, SMLNUM )
+            XBND = GROW
+            DO 60 J = JFIRST, JLAST, JINC
+*
+*              Exit the loop if the growth factor is too small.
+*
+               IF( GROW.LE.SMLNUM )
+     $            GO TO 80
+*
+*              G(j) = max( G(j-1), M(j-1)*( 1 + CNORM(j) ) )
+*
+               XJ = ONE + CNORM( J )
+               GROW = MIN( GROW, XBND / XJ )
+*
+*              M(j) = M(j-1)*( 1 + CNORM(j) ) / abs(A(j,j))
+*
+               TJJ = ABS( AB( MAIND, J ) )
+               IF( XJ.GT.TJJ )
+     $            XBND = XBND*( TJJ / XJ )
+   60       CONTINUE
+            GROW = MIN( GROW, XBND )
+         ELSE
+*
+*           A is unit triangular.
+*
+*           Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}.
+*
+            GROW = MIN( ONE, ONE / MAX( XBND, SMLNUM ) )
+            DO 70 J = JFIRST, JLAST, JINC
+*
+*              Exit the loop if the growth factor is too small.
+*
+               IF( GROW.LE.SMLNUM )
+     $            GO TO 80
+*
+*              G(j) = ( 1 + CNORM(j) )*G(j-1)
+*
+               XJ = ONE + CNORM( J )
+               GROW = GROW / XJ
+   70       CONTINUE
+         END IF
+   80    CONTINUE
+      END IF
+*
+      IF( ( GROW*TSCAL ).GT.SMLNUM ) THEN
+*
+*        Use the Level 2 BLAS solve if the reciprocal of the bound on
+*        elements of X is not too small.
+*
+         CALL DTBSV( UPLO, TRANS, DIAG, N, KD, AB, LDAB, X, 1 )
+      ELSE
+*
+*        Use a Level 1 BLAS solve, scaling intermediate results.
+*
+         IF( XMAX.GT.BIGNUM ) THEN
+*
+*           Scale X so that its components are less than or equal to
+*           BIGNUM in absolute value.
+*
+            SCALE = BIGNUM / XMAX
+            CALL DSCAL( N, SCALE, X, 1 )
+            XMAX = BIGNUM
+         END IF
+*
+         IF( NOTRAN ) THEN
+*
+*           Solve A * x = b
+*
+            DO 110 J = JFIRST, JLAST, JINC
+*
+*              Compute x(j) = b(j) / A(j,j), scaling x if necessary.
+*
+               XJ = ABS( X( J ) )
+               IF( NOUNIT ) THEN
+                  TJJS = AB( MAIND, J )*TSCAL
+               ELSE
+                  TJJS = TSCAL
+                  IF( TSCAL.EQ.ONE )
+     $               GO TO 100
+               END IF
+               TJJ = ABS( TJJS )
+               IF( TJJ.GT.SMLNUM ) THEN
+*
+*                    abs(A(j,j)) > SMLNUM:
+*
+                  IF( TJJ.LT.ONE ) THEN
+                     IF( XJ.GT.TJJ*BIGNUM ) THEN
+*
+*                          Scale x by 1/b(j).
+*
+                        REC = ONE / XJ
+                        CALL DSCAL( N, REC, X, 1 )
+                        SCALE = SCALE*REC
+                        XMAX = XMAX*REC
+                     END IF
+                  END IF
+                  X( J ) = X( J ) / TJJS
+                  XJ = ABS( X( J ) )
+               ELSE IF( TJJ.GT.ZERO ) THEN
+*
+*                    0 < abs(A(j,j)) <= SMLNUM:
+*
+                  IF( XJ.GT.TJJ*BIGNUM ) THEN
+*
+*                       Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM
+*                       to avoid overflow when dividing by A(j,j).
+*
+                     REC = ( TJJ*BIGNUM ) / XJ
+                     IF( CNORM( J ).GT.ONE ) THEN
+*
+*                          Scale by 1/CNORM(j) to avoid overflow when
+*                          multiplying x(j) times column j.
+*
+                        REC = REC / CNORM( J )
+                     END IF
+                     CALL DSCAL( N, REC, X, 1 )
+                     SCALE = SCALE*REC
+                     XMAX = XMAX*REC
+                  END IF
+                  X( J ) = X( J ) / TJJS
+                  XJ = ABS( X( J ) )
+               ELSE
+*
+*                    A(j,j) = 0:  Set x(1:n) = 0, x(j) = 1, and
+*                    scale = 0, and compute a solution to A*x = 0.
+*
+                  DO 90 I = 1, N
+                     X( I ) = ZERO
+   90             CONTINUE
+                  X( J ) = ONE
+                  XJ = ONE
+                  SCALE = ZERO
+                  XMAX = ZERO
+               END IF
+  100          CONTINUE
+*
+*              Scale x if necessary to avoid overflow when adding a
+*              multiple of column j of A.
+*
+               IF( XJ.GT.ONE ) THEN
+                  REC = ONE / XJ
+                  IF( CNORM( J ).GT.( BIGNUM-XMAX )*REC ) THEN
+*
+*                    Scale x by 1/(2*abs(x(j))).
+*
+                     REC = REC*HALF
+                     CALL DSCAL( N, REC, X, 1 )
+                     SCALE = SCALE*REC
+                  END IF
+               ELSE IF( XJ*CNORM( J ).GT.( BIGNUM-XMAX ) ) THEN
+*
+*                 Scale x by 1/2.
+*
+                  CALL DSCAL( N, HALF, X, 1 )
+                  SCALE = SCALE*HALF
+               END IF
+*
+               IF( UPPER ) THEN
+                  IF( J.GT.1 ) THEN
+*
+*                    Compute the update
+*                       x(max(1,j-kd):j-1) := x(max(1,j-kd):j-1) -
+*                                             x(j)* A(max(1,j-kd):j-1,j)
+*
+                     JLEN = MIN( KD, J-1 )
+                     CALL DAXPY( JLEN, -X( J )*TSCAL,
+     $                           AB( KD+1-JLEN, J ), 1, X( J-JLEN ), 1 )
+                     I = IDAMAX( J-1, X, 1 )
+                     XMAX = ABS( X( I ) )
+                  END IF
+               ELSE IF( J.LT.N ) THEN
+*
+*                 Compute the update
+*                    x(j+1:min(j+kd,n)) := x(j+1:min(j+kd,n)) -
+*                                          x(j) * A(j+1:min(j+kd,n),j)
+*
+                  JLEN = MIN( KD, N-J )
+                  IF( JLEN.GT.0 )
+     $               CALL DAXPY( JLEN, -X( J )*TSCAL, AB( 2, J ), 1,
+     $                           X( J+1 ), 1 )
+                  I = J + IDAMAX( N-J, X( J+1 ), 1 )
+                  XMAX = ABS( X( I ) )
+               END IF
+  110       CONTINUE
+*
+         ELSE
+*
+*           Solve A' * x = b
+*
+            DO 160 J = JFIRST, JLAST, JINC
+*
+*              Compute x(j) = b(j) - sum A(k,j)*x(k).
+*                                    k<>j
+*
+               XJ = ABS( X( J ) )
+               USCAL = TSCAL
+               REC = ONE / MAX( XMAX, ONE )
+               IF( CNORM( J ).GT.( BIGNUM-XJ )*REC ) THEN
+*
+*                 If x(j) could overflow, scale x by 1/(2*XMAX).
+*
+                  REC = REC*HALF
+                  IF( NOUNIT ) THEN
+                     TJJS = AB( MAIND, J )*TSCAL
+                  ELSE
+                     TJJS = TSCAL
+                  END IF
+                  TJJ = ABS( TJJS )
+                  IF( TJJ.GT.ONE ) THEN
+*
+*                       Divide by A(j,j) when scaling x if A(j,j) > 1.
+*
+                     REC = MIN( ONE, REC*TJJ )
+                     USCAL = USCAL / TJJS
+                  END IF
+                  IF( REC.LT.ONE ) THEN
+                     CALL DSCAL( N, REC, X, 1 )
+                     SCALE = SCALE*REC
+                     XMAX = XMAX*REC
+                  END IF
+               END IF
+*
+               SUMJ = ZERO
+               IF( USCAL.EQ.ONE ) THEN
+*
+*                 If the scaling needed for A in the dot product is 1,
+*                 call DDOT to perform the dot product.
+*
+                  IF( UPPER ) THEN
+                     JLEN = MIN( KD, J-1 )
+                     SUMJ = DDOT( JLEN, AB( KD+1-JLEN, J ), 1,
+     $                      X( J-JLEN ), 1 )
+                  ELSE
+                     JLEN = MIN( KD, N-J )
+                     IF( JLEN.GT.0 )
+     $                  SUMJ = DDOT( JLEN, AB( 2, J ), 1, X( J+1 ), 1 )
+                  END IF
+               ELSE
+*
+*                 Otherwise, use in-line code for the dot product.
+*
+                  IF( UPPER ) THEN
+                     JLEN = MIN( KD, J-1 )
+                     DO 120 I = 1, JLEN
+                        SUMJ = SUMJ + ( AB( KD+I-JLEN, J )*USCAL )*
+     $                         X( J-JLEN-1+I )
+  120                CONTINUE
+                  ELSE
+                     JLEN = MIN( KD, N-J )
+                     DO 130 I = 1, JLEN
+                        SUMJ = SUMJ + ( AB( I+1, J )*USCAL )*X( J+I )
+  130                CONTINUE
+                  END IF
+               END IF
+*
+               IF( USCAL.EQ.TSCAL ) THEN
+*
+*                 Compute x(j) := ( x(j) - sumj ) / A(j,j) if 1/A(j,j)
+*                 was not used to scale the dotproduct.
+*
+                  X( J ) = X( J ) - SUMJ
+                  XJ = ABS( X( J ) )
+                  IF( NOUNIT ) THEN
+*
+*                    Compute x(j) = x(j) / A(j,j), scaling if necessary.
+*
+                     TJJS = AB( MAIND, J )*TSCAL
+                  ELSE
+                     TJJS = TSCAL
+                     IF( TSCAL.EQ.ONE )
+     $                  GO TO 150
+                  END IF
+                  TJJ = ABS( TJJS )
+                  IF( TJJ.GT.SMLNUM ) THEN
+*
+*                       abs(A(j,j)) > SMLNUM:
+*
+                     IF( TJJ.LT.ONE ) THEN
+                        IF( XJ.GT.TJJ*BIGNUM ) THEN
+*
+*                             Scale X by 1/abs(x(j)).
+*
+                           REC = ONE / XJ
+                           CALL DSCAL( N, REC, X, 1 )
+                           SCALE = SCALE*REC
+                           XMAX = XMAX*REC
+                        END IF
+                     END IF
+                     X( J ) = X( J ) / TJJS
+                  ELSE IF( TJJ.GT.ZERO ) THEN
+*
+*                       0 < abs(A(j,j)) <= SMLNUM:
+*
+                     IF( XJ.GT.TJJ*BIGNUM ) THEN
+*
+*                          Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM.
+*
+                        REC = ( TJJ*BIGNUM ) / XJ
+                        CALL DSCAL( N, REC, X, 1 )
+                        SCALE = SCALE*REC
+                        XMAX = XMAX*REC
+                     END IF
+                     X( J ) = X( J ) / TJJS
+                  ELSE
+*
+*                       A(j,j) = 0:  Set x(1:n) = 0, x(j) = 1, and
+*                       scale = 0, and compute a solution to A'*x = 0.
+*
+                     DO 140 I = 1, N
+                        X( I ) = ZERO
+  140                CONTINUE
+                     X( J ) = ONE
+                     SCALE = ZERO
+                     XMAX = ZERO
+                  END IF
+  150             CONTINUE
+               ELSE
+*
+*                 Compute x(j) := x(j) / A(j,j) - sumj if the dot
+*                 product has already been divided by 1/A(j,j).
+*
+                  X( J ) = X( J ) / TJJS - SUMJ
+               END IF
+               XMAX = MAX( XMAX, ABS( X( J ) ) )
+  160       CONTINUE
+         END IF
+         SCALE = SCALE / TSCAL
+      END IF
+*
+*     Scale the column norms by 1/TSCAL for return.
+*
+      IF( TSCAL.NE.ONE ) THEN
+         CALL DSCAL( N, ONE / TSCAL, CNORM, 1 )
+      END IF
+*
+      RETURN
+*
+*     End of DLATBS
+*
+      END
diff --git a/costa/native/external/lapack/dlatdf.f b/costa/native/external/lapack/dlatdf.f
new file mode 100644
index 000000000..728fd3a4b
--- /dev/null
+++ b/costa/native/external/lapack/dlatdf.f
@@ -0,0 +1,238 @@
+      SUBROUTINE DLATDF( IJOB, N, Z, LDZ, RHS, RDSUM, RDSCAL, IPIV,
+     $                   JPIV )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            IJOB, LDZ, N
+      DOUBLE PRECISION   RDSCAL, RDSUM
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * ), JPIV( * )
+      DOUBLE PRECISION   RHS( * ), Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLATDF uses the LU factorization of the n-by-n matrix Z computed by
+*  DGETC2 and computes a contribution to the reciprocal Dif-estimate
+*  by solving Z * x = b for x, and choosing the r.h.s. b such that
+*  the norm of x is as large as possible. On entry RHS = b holds the
+*  contribution from earlier solved sub-systems, and on return RHS = x.
+*
+*  The factorization of Z returned by DGETC2 has the form Z = P*L*U*Q,
+*  where P and Q are permutation matrices. L is lower triangular with
+*  unit diagonal elements and U is upper triangular.
+*
+*  Arguments
+*  =========
+*
+*  IJOB    (input) INTEGER
+*          IJOB = 2: First compute an approximative null-vector e
+*              of Z using DGECON, e is normalized and solve for
+*              Zx = +-e - f with the sign giving the greater value
+*              of 2-norm(x). About 5 times as expensive as Default.
+*          IJOB .ne. 2: Local look ahead strategy where all entries of
+*              the r.h.s. b is choosen as either +1 or -1 (Default).
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix Z.
+*
+*  Z       (input) DOUBLE PRECISION array, dimension (LDZ, N)
+*          On entry, the LU part of the factorization of the n-by-n
+*          matrix Z computed by DGETC2:  Z = P * L * U * Q
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDA >= max(1, N).
+*
+*  RHS     (input/output) DOUBLE PRECISION array, dimension N.
+*          On entry, RHS contains contributions from other subsystems.
+*          On exit, RHS contains the solution of the subsystem with
+*          entries acoording to the value of IJOB (see above).
+*
+*  RDSUM   (input/output) DOUBLE PRECISION
+*          On entry, the sum of squares of computed contributions to
+*          the Dif-estimate under computation by DTGSYL, where the
+*          scaling factor RDSCAL (see below) has been factored out.
+*          On exit, the corresponding sum of squares updated with the
+*          contributions from the current sub-system.
+*          If TRANS = 'T' RDSUM is not touched.
+*          NOTE: RDSUM only makes sense when DTGSY2 is called by STGSYL.
+*
+*  RDSCAL  (input/output) DOUBLE PRECISION
+*          On entry, scaling factor used to prevent overflow in RDSUM.
+*          On exit, RDSCAL is updated w.r.t. the current contributions
+*          in RDSUM.
+*          If TRANS = 'T', RDSCAL is not touched.
+*          NOTE: RDSCAL only makes sense when DTGSY2 is called by
+*                DTGSYL.
+*
+*  IPIV    (input) INTEGER array, dimension (N).
+*          The pivot indices; for 1 <= i <= N, row i of the
+*          matrix has been interchanged with row IPIV(i).
+*
+*  JPIV    (input) INTEGER array, dimension (N).
+*          The pivot indices; for 1 <= j <= N, column j of the
+*          matrix has been interchanged with column JPIV(j).
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Bo Kagstrom and Peter Poromaa, Department of Computing Science,
+*     Umea University, S-901 87 Umea, Sweden.
+*
+*  This routine is a further developed implementation of algorithm
+*  BSOLVE in [1] using complete pivoting in the LU factorization.
+*
+*  [1] Bo Kagstrom and Lars Westin,
+*      Generalized Schur Methods with Condition Estimators for
+*      Solving the Generalized Sylvester Equation, IEEE Transactions
+*      on Automatic Control, Vol. 34, No. 7, July 1989, pp 745-751.
+*
+*  [2] Peter Poromaa,
+*      On Efficient and Robust Estimators for the Separation
+*      between two Regular Matrix Pairs with Applications in
+*      Condition Estimation. Report IMINF-95.05, Departement of
+*      Computing Science, Umea University, S-901 87 Umea, Sweden, 1995.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            MAXDIM
+      PARAMETER          ( MAXDIM = 8 )
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, INFO, J, K
+      DOUBLE PRECISION   BM, BP, PMONE, SMINU, SPLUS, TEMP
+*     ..
+*     .. Local Arrays ..
+      INTEGER            IWORK( MAXDIM )
+      DOUBLE PRECISION   WORK( 4*MAXDIM ), XM( MAXDIM ), XP( MAXDIM )
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DAXPY, DCOPY, DGECON, DGESC2, DLASSQ, DLASWP,
+     $                   DSCAL
+*     ..
+*     .. External Functions ..
+      DOUBLE PRECISION   DASUM, DDOT
+      EXTERNAL           DASUM, DDOT
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+      IF( IJOB.NE.2 ) THEN
+*
+*        Apply permutations IPIV to RHS
+*
+         CALL DLASWP( 1, RHS, LDZ, 1, N-1, IPIV, 1 )
+*
+*        Solve for L-part choosing RHS either to +1 or -1.
+*
+         PMONE = -ONE
+*
+         DO 10 J = 1, N - 1
+            BP = RHS( J ) + ONE
+            BM = RHS( J ) - ONE
+            SPLUS = ONE
+*
+*           Look-ahead for L-part RHS(1:N-1) = + or -1, SPLUS and
+*           SMIN computed more efficiently than in BSOLVE [1].
+*
+            SPLUS = SPLUS + DDOT( N-J, Z( J+1, J ), 1, Z( J+1, J ), 1 )
+            SMINU = DDOT( N-J, Z( J+1, J ), 1, RHS( J+1 ), 1 )
+            SPLUS = SPLUS*RHS( J )
+            IF( SPLUS.GT.SMINU ) THEN
+               RHS( J ) = BP
+            ELSE IF( SMINU.GT.SPLUS ) THEN
+               RHS( J ) = BM
+            ELSE
+*
+*              In this case the updating sums are equal and we can
+*              choose RHS(J) +1 or -1. The first time this happens
+*              we choose -1, thereafter +1. This is a simple way to
+*              get good estimates of matrices like Byers well-known
+*              example (see [1]). (Not done in BSOLVE.)
+*
+               RHS( J ) = RHS( J ) + PMONE
+               PMONE = ONE
+            END IF
+*
+*           Compute the remaining r.h.s.
+*
+            TEMP = -RHS( J )
+            CALL DAXPY( N-J, TEMP, Z( J+1, J ), 1, RHS( J+1 ), 1 )
+*
+   10    CONTINUE
+*
+*        Solve for U-part, look-ahead for RHS(N) = +-1. This is not done
+*        in BSOLVE and will hopefully give us a better estimate because
+*        any ill-conditioning of the original matrix is transfered to U
+*        and not to L. U(N, N) is an approximation to sigma_min(LU).
+*
+         CALL DCOPY( N-1, RHS, 1, XP, 1 )
+         XP( N ) = RHS( N ) + ONE
+         RHS( N ) = RHS( N ) - ONE
+         SPLUS = ZERO
+         SMINU = ZERO
+         DO 30 I = N, 1, -1
+            TEMP = ONE / Z( I, I )
+            XP( I ) = XP( I )*TEMP
+            RHS( I ) = RHS( I )*TEMP
+            DO 20 K = I + 1, N
+               XP( I ) = XP( I ) - XP( K )*( Z( I, K )*TEMP )
+               RHS( I ) = RHS( I ) - RHS( K )*( Z( I, K )*TEMP )
+   20       CONTINUE
+            SPLUS = SPLUS + ABS( XP( I ) )
+            SMINU = SMINU + ABS( RHS( I ) )
+   30    CONTINUE
+         IF( SPLUS.GT.SMINU )
+     $      CALL DCOPY( N, XP, 1, RHS, 1 )
+*
+*        Apply the permutations JPIV to the computed solution (RHS)
+*
+         CALL DLASWP( 1, RHS, LDZ, 1, N-1, JPIV, -1 )
+*
+*        Compute the sum of squares
+*
+         CALL DLASSQ( N, RHS, 1, RDSCAL, RDSUM )
+*
+      ELSE
+*
+*        IJOB = 2, Compute approximate nullvector XM of Z
+*
+         CALL DGECON( 'I', N, Z, LDZ, ONE, TEMP, WORK, IWORK, INFO )
+         CALL DCOPY( N, WORK( N+1 ), 1, XM, 1 )
+*
+*        Compute RHS
+*
+         CALL DLASWP( 1, XM, LDZ, 1, N-1, IPIV, -1 )
+         TEMP = ONE / SQRT( DDOT( N, XM, 1, XM, 1 ) )
+         CALL DSCAL( N, TEMP, XM, 1 )
+         CALL DCOPY( N, XM, 1, XP, 1 )
+         CALL DAXPY( N, ONE, RHS, 1, XP, 1 )
+         CALL DAXPY( N, -ONE, XM, 1, RHS, 1 )
+         CALL DGESC2( N, Z, LDZ, RHS, IPIV, JPIV, TEMP )
+         CALL DGESC2( N, Z, LDZ, XP, IPIV, JPIV, TEMP )
+         IF( DASUM( N, XP, 1 ).GT.DASUM( N, RHS, 1 ) )
+     $      CALL DCOPY( N, XP, 1, RHS, 1 )
+*
+*        Compute the sum of squares
+*
+         CALL DLASSQ( N, RHS, 1, RDSCAL, RDSUM )
+*
+      END IF
+*
+      RETURN
+*
+*     End of DLATDF
+*
+      END
diff --git a/costa/native/external/lapack/dlatps.f b/costa/native/external/lapack/dlatps.f
new file mode 100644
index 000000000..32b837bfa
--- /dev/null
+++ b/costa/native/external/lapack/dlatps.f
@@ -0,0 +1,713 @@
+      SUBROUTINE DLATPS( UPLO, TRANS, DIAG, NORMIN, N, AP, X, SCALE,
+     $                   CNORM, INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIAG, NORMIN, TRANS, UPLO
+      INTEGER            INFO, N
+      DOUBLE PRECISION   SCALE
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   AP( * ), CNORM( * ), X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLATPS solves one of the triangular systems
+*
+*     A *x = s*b  or  A'*x = s*b
+*
+*  with scaling to prevent overflow, where A is an upper or lower
+*  triangular matrix stored in packed form.  Here A' denotes the
+*  transpose of A, x and b are n-element vectors, and s is a scaling
+*  factor, usually less than or equal to 1, chosen so that the
+*  components of x will be less than the overflow threshold.  If the
+*  unscaled problem will not cause overflow, the Level 2 BLAS routine
+*  DTPSV is called. If the matrix A is singular (A(j,j) = 0 for some j),
+*  then s is set to 0 and a non-trivial solution to A*x = 0 is returned.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the matrix A is upper or lower triangular.
+*          = 'U':  Upper triangular
+*          = 'L':  Lower triangular
+*
+*  TRANS   (input) CHARACTER*1
+*          Specifies the operation applied to A.
+*          = 'N':  Solve A * x = s*b  (No transpose)
+*          = 'T':  Solve A'* x = s*b  (Transpose)
+*          = 'C':  Solve A'* x = s*b  (Conjugate transpose = Transpose)
+*
+*  DIAG    (input) CHARACTER*1
+*          Specifies whether or not the matrix A is unit triangular.
+*          = 'N':  Non-unit triangular
+*          = 'U':  Unit triangular
+*
+*  NORMIN  (input) CHARACTER*1
+*          Specifies whether CNORM has been set or not.
+*          = 'Y':  CNORM contains the column norms on entry
+*          = 'N':  CNORM is not set on entry.  On exit, the norms will
+*                  be computed and stored in CNORM.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  AP      (input) DOUBLE PRECISION array, dimension (N*(N+1)/2)
+*          The upper or lower triangular matrix A, packed columnwise in
+*          a linear array.  The j-th column of A is stored in the array
+*          AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n.
+*
+*  X       (input/output) DOUBLE PRECISION array, dimension (N)
+*          On entry, the right hand side b of the triangular system.
+*          On exit, X is overwritten by the solution vector x.
+*
+*  SCALE   (output) DOUBLE PRECISION
+*          The scaling factor s for the triangular system
+*             A * x = s*b  or  A'* x = s*b.
+*          If SCALE = 0, the matrix A is singular or badly scaled, and
+*          the vector x is an exact or approximate solution to A*x = 0.
+*
+*  CNORM   (input or output) DOUBLE PRECISION array, dimension (N)
+*
+*          If NORMIN = 'Y', CNORM is an input argument and CNORM(j)
+*          contains the norm of the off-diagonal part of the j-th column
+*          of A.  If TRANS = 'N', CNORM(j) must be greater than or equal
+*          to the infinity-norm, and if TRANS = 'T' or 'C', CNORM(j)
+*          must be greater than or equal to the 1-norm.
+*
+*          If NORMIN = 'N', CNORM is an output argument and CNORM(j)
+*          returns the 1-norm of the offdiagonal part of the j-th column
+*          of A.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -k, the k-th argument had an illegal value
+*
+*  Further Details
+*  ======= =======
+*
+*  A rough bound on x is computed; if that is less than overflow, DTPSV
+*  is called, otherwise, specific code is used which checks for possible
+*  overflow or divide-by-zero at every operation.
+*
+*  A columnwise scheme is used for solving A*x = b.  The basic algorithm
+*  if A is lower triangular is
+*
+*       x[1:n] := b[1:n]
+*       for j = 1, ..., n
+*            x(j) := x(j) / A(j,j)
+*            x[j+1:n] := x[j+1:n] - x(j) * A[j+1:n,j]
+*       end
+*
+*  Define bounds on the components of x after j iterations of the loop:
+*     M(j) = bound on x[1:j]
+*     G(j) = bound on x[j+1:n]
+*  Initially, let M(0) = 0 and G(0) = max{x(i), i=1,...,n}.
+*
+*  Then for iteration j+1 we have
+*     M(j+1) <= G(j) / | A(j+1,j+1) |
+*     G(j+1) <= G(j) + M(j+1) * | A[j+2:n,j+1] |
+*            <= G(j) ( 1 + CNORM(j+1) / | A(j+1,j+1) | )
+*
+*  where CNORM(j+1) is greater than or equal to the infinity-norm of
+*  column j+1 of A, not counting the diagonal.  Hence
+*
+*     G(j) <= G(0) product ( 1 + CNORM(i) / | A(i,i) | )
+*                  1<=i<=j
+*  and
+*
+*     |x(j)| <= ( G(0) / |A(j,j)| ) product ( 1 + CNORM(i) / |A(i,i)| )
+*                                   1<=i< j
+*
+*  Since |x(j)| <= M(j), we use the Level 2 BLAS routine DTPSV if the
+*  reciprocal of the largest M(j), j=1,..,n, is larger than
+*  max(underflow, 1/overflow).
+*
+*  The bound on x(j) is also used to determine when a step in the
+*  columnwise method can be performed without fear of overflow.  If
+*  the computed bound is greater than a large constant, x is scaled to
+*  prevent overflow, but if the bound overflows, x is set to 0, x(j) to
+*  1, and scale to 0, and a non-trivial solution to A*x = 0 is found.
+*
+*  Similarly, a row-wise scheme is used to solve A'*x = b.  The basic
+*  algorithm for A upper triangular is
+*
+*       for j = 1, ..., n
+*            x(j) := ( b(j) - A[1:j-1,j]' * x[1:j-1] ) / A(j,j)
+*       end
+*
+*  We simultaneously compute two bounds
+*       G(j) = bound on ( b(i) - A[1:i-1,i]' * x[1:i-1] ), 1<=i<=j
+*       M(j) = bound on x(i), 1<=i<=j
+*
+*  The initial values are G(0) = 0, M(0) = max{b(i), i=1,..,n}, and we
+*  add the constraint G(j) >= G(j-1) and M(j) >= M(j-1) for j >= 1.
+*  Then the bound on x(j) is
+*
+*       M(j) <= M(j-1) * ( 1 + CNORM(j) ) / | A(j,j) |
+*
+*            <= M(0) * product ( ( 1 + CNORM(i) ) / |A(i,i)| )
+*                      1<=i<=j
+*
+*  and we can safely call DTPSV if 1/M(n) and 1/G(n) are both greater
+*  than max(underflow, 1/overflow).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, HALF, ONE
+      PARAMETER          ( ZERO = 0.0D+0, HALF = 0.5D+0, ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOTRAN, NOUNIT, UPPER
+      INTEGER            I, IMAX, IP, J, JFIRST, JINC, JLAST, JLEN
+      DOUBLE PRECISION   BIGNUM, GROW, REC, SMLNUM, SUMJ, TJJ, TJJS,
+     $                   TMAX, TSCAL, USCAL, XBND, XJ, XMAX
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            IDAMAX
+      DOUBLE PRECISION   DASUM, DDOT, DLAMCH
+      EXTERNAL           LSAME, IDAMAX, DASUM, DDOT, DLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DAXPY, DSCAL, DTPSV, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      NOTRAN = LSAME( TRANS, 'N' )
+      NOUNIT = LSAME( DIAG, 'N' )
+*
+*     Test the input parameters.
+*
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT.
+     $         LSAME( TRANS, 'C' ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN
+         INFO = -3
+      ELSE IF( .NOT.LSAME( NORMIN, 'Y' ) .AND. .NOT.
+     $         LSAME( NORMIN, 'N' ) ) THEN
+         INFO = -4
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -5
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DLATPS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Determine machine dependent parameters to control overflow.
+*
+      SMLNUM = DLAMCH( 'Safe minimum' ) / DLAMCH( 'Precision' )
+      BIGNUM = ONE / SMLNUM
+      SCALE = ONE
+*
+      IF( LSAME( NORMIN, 'N' ) ) THEN
+*
+*        Compute the 1-norm of each column, not including the diagonal.
+*
+         IF( UPPER ) THEN
+*
+*           A is upper triangular.
+*
+            IP = 1
+            DO 10 J = 1, N
+               CNORM( J ) = DASUM( J-1, AP( IP ), 1 )
+               IP = IP + J
+   10       CONTINUE
+         ELSE
+*
+*           A is lower triangular.
+*
+            IP = 1
+            DO 20 J = 1, N - 1
+               CNORM( J ) = DASUM( N-J, AP( IP+1 ), 1 )
+               IP = IP + N - J + 1
+   20       CONTINUE
+            CNORM( N ) = ZERO
+         END IF
+      END IF
+*
+*     Scale the column norms by TSCAL if the maximum element in CNORM is
+*     greater than BIGNUM.
+*
+      IMAX = IDAMAX( N, CNORM, 1 )
+      TMAX = CNORM( IMAX )
+      IF( TMAX.LE.BIGNUM ) THEN
+         TSCAL = ONE
+      ELSE
+         TSCAL = ONE / ( SMLNUM*TMAX )
+         CALL DSCAL( N, TSCAL, CNORM, 1 )
+      END IF
+*
+*     Compute a bound on the computed solution vector to see if the
+*     Level 2 BLAS routine DTPSV can be used.
+*
+      J = IDAMAX( N, X, 1 )
+      XMAX = ABS( X( J ) )
+      XBND = XMAX
+      IF( NOTRAN ) THEN
+*
+*        Compute the growth in A * x = b.
+*
+         IF( UPPER ) THEN
+            JFIRST = N
+            JLAST = 1
+            JINC = -1
+         ELSE
+            JFIRST = 1
+            JLAST = N
+            JINC = 1
+         END IF
+*
+         IF( TSCAL.NE.ONE ) THEN
+            GROW = ZERO
+            GO TO 50
+         END IF
+*
+         IF( NOUNIT ) THEN
+*
+*           A is non-unit triangular.
+*
+*           Compute GROW = 1/G(j) and XBND = 1/M(j).
+*           Initially, G(0) = max{x(i), i=1,...,n}.
+*
+            GROW = ONE / MAX( XBND, SMLNUM )
+            XBND = GROW
+            IP = JFIRST*( JFIRST+1 ) / 2
+            JLEN = N
+            DO 30 J = JFIRST, JLAST, JINC
+*
+*              Exit the loop if the growth factor is too small.
+*
+               IF( GROW.LE.SMLNUM )
+     $            GO TO 50
+*
+*              M(j) = G(j-1) / abs(A(j,j))
+*
+               TJJ = ABS( AP( IP ) )
+               XBND = MIN( XBND, MIN( ONE, TJJ )*GROW )
+               IF( TJJ+CNORM( J ).GE.SMLNUM ) THEN
+*
+*                 G(j) = G(j-1)*( 1 + CNORM(j) / abs(A(j,j)) )
+*
+                  GROW = GROW*( TJJ / ( TJJ+CNORM( J ) ) )
+               ELSE
+*
+*                 G(j) could overflow, set GROW to 0.
+*
+                  GROW = ZERO
+               END IF
+               IP = IP + JINC*JLEN
+               JLEN = JLEN - 1
+   30       CONTINUE
+            GROW = XBND
+         ELSE
+*
+*           A is unit triangular.
+*
+*           Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}.
+*
+            GROW = MIN( ONE, ONE / MAX( XBND, SMLNUM ) )
+            DO 40 J = JFIRST, JLAST, JINC
+*
+*              Exit the loop if the growth factor is too small.
+*
+               IF( GROW.LE.SMLNUM )
+     $            GO TO 50
+*
+*              G(j) = G(j-1)*( 1 + CNORM(j) )
+*
+               GROW = GROW*( ONE / ( ONE+CNORM( J ) ) )
+   40       CONTINUE
+         END IF
+   50    CONTINUE
+*
+      ELSE
+*
+*        Compute the growth in A' * x = b.
+*
+         IF( UPPER ) THEN
+            JFIRST = 1
+            JLAST = N
+            JINC = 1
+         ELSE
+            JFIRST = N
+            JLAST = 1
+            JINC = -1
+         END IF
+*
+         IF( TSCAL.NE.ONE ) THEN
+            GROW = ZERO
+            GO TO 80
+         END IF
+*
+         IF( NOUNIT ) THEN
+*
+*           A is non-unit triangular.
+*
+*           Compute GROW = 1/G(j) and XBND = 1/M(j).
+*           Initially, M(0) = max{x(i), i=1,...,n}.
+*
+            GROW = ONE / MAX( XBND, SMLNUM )
+            XBND = GROW
+            IP = JFIRST*( JFIRST+1 ) / 2
+            JLEN = 1
+            DO 60 J = JFIRST, JLAST, JINC
+*
+*              Exit the loop if the growth factor is too small.
+*
+               IF( GROW.LE.SMLNUM )
+     $            GO TO 80
+*
+*              G(j) = max( G(j-1), M(j-1)*( 1 + CNORM(j) ) )
+*
+               XJ = ONE + CNORM( J )
+               GROW = MIN( GROW, XBND / XJ )
+*
+*              M(j) = M(j-1)*( 1 + CNORM(j) ) / abs(A(j,j))
+*
+               TJJ = ABS( AP( IP ) )
+               IF( XJ.GT.TJJ )
+     $            XBND = XBND*( TJJ / XJ )
+               JLEN = JLEN + 1
+               IP = IP + JINC*JLEN
+   60       CONTINUE
+            GROW = MIN( GROW, XBND )
+         ELSE
+*
+*           A is unit triangular.
+*
+*           Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}.
+*
+            GROW = MIN( ONE, ONE / MAX( XBND, SMLNUM ) )
+            DO 70 J = JFIRST, JLAST, JINC
+*
+*              Exit the loop if the growth factor is too small.
+*
+               IF( GROW.LE.SMLNUM )
+     $            GO TO 80
+*
+*              G(j) = ( 1 + CNORM(j) )*G(j-1)
+*
+               XJ = ONE + CNORM( J )
+               GROW = GROW / XJ
+   70       CONTINUE
+         END IF
+   80    CONTINUE
+      END IF
+*
+      IF( ( GROW*TSCAL ).GT.SMLNUM ) THEN
+*
+*        Use the Level 2 BLAS solve if the reciprocal of the bound on
+*        elements of X is not too small.
+*
+         CALL DTPSV( UPLO, TRANS, DIAG, N, AP, X, 1 )
+      ELSE
+*
+*        Use a Level 1 BLAS solve, scaling intermediate results.
+*
+         IF( XMAX.GT.BIGNUM ) THEN
+*
+*           Scale X so that its components are less than or equal to
+*           BIGNUM in absolute value.
+*
+            SCALE = BIGNUM / XMAX
+            CALL DSCAL( N, SCALE, X, 1 )
+            XMAX = BIGNUM
+         END IF
+*
+         IF( NOTRAN ) THEN
+*
+*           Solve A * x = b
+*
+            IP = JFIRST*( JFIRST+1 ) / 2
+            DO 110 J = JFIRST, JLAST, JINC
+*
+*              Compute x(j) = b(j) / A(j,j), scaling x if necessary.
+*
+               XJ = ABS( X( J ) )
+               IF( NOUNIT ) THEN
+                  TJJS = AP( IP )*TSCAL
+               ELSE
+                  TJJS = TSCAL
+                  IF( TSCAL.EQ.ONE )
+     $               GO TO 100
+               END IF
+               TJJ = ABS( TJJS )
+               IF( TJJ.GT.SMLNUM ) THEN
+*
+*                    abs(A(j,j)) > SMLNUM:
+*
+                  IF( TJJ.LT.ONE ) THEN
+                     IF( XJ.GT.TJJ*BIGNUM ) THEN
+*
+*                          Scale x by 1/b(j).
+*
+                        REC = ONE / XJ
+                        CALL DSCAL( N, REC, X, 1 )
+                        SCALE = SCALE*REC
+                        XMAX = XMAX*REC
+                     END IF
+                  END IF
+                  X( J ) = X( J ) / TJJS
+                  XJ = ABS( X( J ) )
+               ELSE IF( TJJ.GT.ZERO ) THEN
+*
+*                    0 < abs(A(j,j)) <= SMLNUM:
+*
+                  IF( XJ.GT.TJJ*BIGNUM ) THEN
+*
+*                       Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM
+*                       to avoid overflow when dividing by A(j,j).
+*
+                     REC = ( TJJ*BIGNUM ) / XJ
+                     IF( CNORM( J ).GT.ONE ) THEN
+*
+*                          Scale by 1/CNORM(j) to avoid overflow when
+*                          multiplying x(j) times column j.
+*
+                        REC = REC / CNORM( J )
+                     END IF
+                     CALL DSCAL( N, REC, X, 1 )
+                     SCALE = SCALE*REC
+                     XMAX = XMAX*REC
+                  END IF
+                  X( J ) = X( J ) / TJJS
+                  XJ = ABS( X( J ) )
+               ELSE
+*
+*                    A(j,j) = 0:  Set x(1:n) = 0, x(j) = 1, and
+*                    scale = 0, and compute a solution to A*x = 0.
+*
+                  DO 90 I = 1, N
+                     X( I ) = ZERO
+   90             CONTINUE
+                  X( J ) = ONE
+                  XJ = ONE
+                  SCALE = ZERO
+                  XMAX = ZERO
+               END IF
+  100          CONTINUE
+*
+*              Scale x if necessary to avoid overflow when adding a
+*              multiple of column j of A.
+*
+               IF( XJ.GT.ONE ) THEN
+                  REC = ONE / XJ
+                  IF( CNORM( J ).GT.( BIGNUM-XMAX )*REC ) THEN
+*
+*                    Scale x by 1/(2*abs(x(j))).
+*
+                     REC = REC*HALF
+                     CALL DSCAL( N, REC, X, 1 )
+                     SCALE = SCALE*REC
+                  END IF
+               ELSE IF( XJ*CNORM( J ).GT.( BIGNUM-XMAX ) ) THEN
+*
+*                 Scale x by 1/2.
+*
+                  CALL DSCAL( N, HALF, X, 1 )
+                  SCALE = SCALE*HALF
+               END IF
+*
+               IF( UPPER ) THEN
+                  IF( J.GT.1 ) THEN
+*
+*                    Compute the update
+*                       x(1:j-1) := x(1:j-1) - x(j) * A(1:j-1,j)
+*
+                     CALL DAXPY( J-1, -X( J )*TSCAL, AP( IP-J+1 ), 1, X,
+     $                           1 )
+                     I = IDAMAX( J-1, X, 1 )
+                     XMAX = ABS( X( I ) )
+                  END IF
+                  IP = IP - J
+               ELSE
+                  IF( J.LT.N ) THEN
+*
+*                    Compute the update
+*                       x(j+1:n) := x(j+1:n) - x(j) * A(j+1:n,j)
+*
+                     CALL DAXPY( N-J, -X( J )*TSCAL, AP( IP+1 ), 1,
+     $                           X( J+1 ), 1 )
+                     I = J + IDAMAX( N-J, X( J+1 ), 1 )
+                     XMAX = ABS( X( I ) )
+                  END IF
+                  IP = IP + N - J + 1
+               END IF
+  110       CONTINUE
+*
+         ELSE
+*
+*           Solve A' * x = b
+*
+            IP = JFIRST*( JFIRST+1 ) / 2
+            JLEN = 1
+            DO 160 J = JFIRST, JLAST, JINC
+*
+*              Compute x(j) = b(j) - sum A(k,j)*x(k).
+*                                    k<>j
+*
+               XJ = ABS( X( J ) )
+               USCAL = TSCAL
+               REC = ONE / MAX( XMAX, ONE )
+               IF( CNORM( J ).GT.( BIGNUM-XJ )*REC ) THEN
+*
+*                 If x(j) could overflow, scale x by 1/(2*XMAX).
+*
+                  REC = REC*HALF
+                  IF( NOUNIT ) THEN
+                     TJJS = AP( IP )*TSCAL
+                  ELSE
+                     TJJS = TSCAL
+                  END IF
+                  TJJ = ABS( TJJS )
+                  IF( TJJ.GT.ONE ) THEN
+*
+*                       Divide by A(j,j) when scaling x if A(j,j) > 1.
+*
+                     REC = MIN( ONE, REC*TJJ )
+                     USCAL = USCAL / TJJS
+                  END IF
+                  IF( REC.LT.ONE ) THEN
+                     CALL DSCAL( N, REC, X, 1 )
+                     SCALE = SCALE*REC
+                     XMAX = XMAX*REC
+                  END IF
+               END IF
+*
+               SUMJ = ZERO
+               IF( USCAL.EQ.ONE ) THEN
+*
+*                 If the scaling needed for A in the dot product is 1,
+*                 call DDOT to perform the dot product.
+*
+                  IF( UPPER ) THEN
+                     SUMJ = DDOT( J-1, AP( IP-J+1 ), 1, X, 1 )
+                  ELSE IF( J.LT.N ) THEN
+                     SUMJ = DDOT( N-J, AP( IP+1 ), 1, X( J+1 ), 1 )
+                  END IF
+               ELSE
+*
+*                 Otherwise, use in-line code for the dot product.
+*
+                  IF( UPPER ) THEN
+                     DO 120 I = 1, J - 1
+                        SUMJ = SUMJ + ( AP( IP-J+I )*USCAL )*X( I )
+  120                CONTINUE
+                  ELSE IF( J.LT.N ) THEN
+                     DO 130 I = 1, N - J
+                        SUMJ = SUMJ + ( AP( IP+I )*USCAL )*X( J+I )
+  130                CONTINUE
+                  END IF
+               END IF
+*
+               IF( USCAL.EQ.TSCAL ) THEN
+*
+*                 Compute x(j) := ( x(j) - sumj ) / A(j,j) if 1/A(j,j)
+*                 was not used to scale the dotproduct.
+*
+                  X( J ) = X( J ) - SUMJ
+                  XJ = ABS( X( J ) )
+                  IF( NOUNIT ) THEN
+*
+*                    Compute x(j) = x(j) / A(j,j), scaling if necessary.
+*
+                     TJJS = AP( IP )*TSCAL
+                  ELSE
+                     TJJS = TSCAL
+                     IF( TSCAL.EQ.ONE )
+     $                  GO TO 150
+                  END IF
+                  TJJ = ABS( TJJS )
+                  IF( TJJ.GT.SMLNUM ) THEN
+*
+*                       abs(A(j,j)) > SMLNUM:
+*
+                     IF( TJJ.LT.ONE ) THEN
+                        IF( XJ.GT.TJJ*BIGNUM ) THEN
+*
+*                             Scale X by 1/abs(x(j)).
+*
+                           REC = ONE / XJ
+                           CALL DSCAL( N, REC, X, 1 )
+                           SCALE = SCALE*REC
+                           XMAX = XMAX*REC
+                        END IF
+                     END IF
+                     X( J ) = X( J ) / TJJS
+                  ELSE IF( TJJ.GT.ZERO ) THEN
+*
+*                       0 < abs(A(j,j)) <= SMLNUM:
+*
+                     IF( XJ.GT.TJJ*BIGNUM ) THEN
+*
+*                          Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM.
+*
+                        REC = ( TJJ*BIGNUM ) / XJ
+                        CALL DSCAL( N, REC, X, 1 )
+                        SCALE = SCALE*REC
+                        XMAX = XMAX*REC
+                     END IF
+                     X( J ) = X( J ) / TJJS
+                  ELSE
+*
+*                       A(j,j) = 0:  Set x(1:n) = 0, x(j) = 1, and
+*                       scale = 0, and compute a solution to A'*x = 0.
+*
+                     DO 140 I = 1, N
+                        X( I ) = ZERO
+  140                CONTINUE
+                     X( J ) = ONE
+                     SCALE = ZERO
+                     XMAX = ZERO
+                  END IF
+  150             CONTINUE
+               ELSE
+*
+*                 Compute x(j) := x(j) / A(j,j)  - sumj if the dot
+*                 product has already been divided by 1/A(j,j).
+*
+                  X( J ) = X( J ) / TJJS - SUMJ
+               END IF
+               XMAX = MAX( XMAX, ABS( X( J ) ) )
+               JLEN = JLEN + 1
+               IP = IP + JINC*JLEN
+  160       CONTINUE
+         END IF
+         SCALE = SCALE / TSCAL
+      END IF
+*
+*     Scale the column norms by 1/TSCAL for return.
+*
+      IF( TSCAL.NE.ONE ) THEN
+         CALL DSCAL( N, ONE / TSCAL, CNORM, 1 )
+      END IF
+*
+      RETURN
+*
+*     End of DLATPS
+*
+      END
diff --git a/costa/native/external/lapack/dlatrd.f b/costa/native/external/lapack/dlatrd.f
new file mode 100644
index 000000000..bf613b544
--- /dev/null
+++ b/costa/native/external/lapack/dlatrd.f
@@ -0,0 +1,259 @@
+      SUBROUTINE DLATRD( UPLO, N, NB, A, LDA, E, TAU, W, LDW )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            LDA, LDW, N, NB
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), E( * ), TAU( * ), W( LDW, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLATRD reduces NB rows and columns of a real symmetric matrix A to
+*  symmetric tridiagonal form by an orthogonal similarity
+*  transformation Q' * A * Q, and returns the matrices V and W which are
+*  needed to apply the transformation to the unreduced part of A.
+*
+*  If UPLO = 'U', DLATRD reduces the last NB rows and columns of a
+*  matrix, of which the upper triangle is supplied;
+*  if UPLO = 'L', DLATRD reduces the first NB rows and columns of a
+*  matrix, of which the lower triangle is supplied.
+*
+*  This is an auxiliary routine called by DSYTRD.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER
+*          Specifies whether the upper or lower triangular part of the
+*          symmetric matrix A is stored:
+*          = 'U': Upper triangular
+*          = 'L': Lower triangular
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.
+*
+*  NB      (input) INTEGER
+*          The number of rows and columns to be reduced.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+*          On entry, the symmetric matrix A.  If UPLO = 'U', the leading
+*          n-by-n upper triangular part of A contains the upper
+*          triangular part of the matrix A, and the strictly lower
+*          triangular part of A is not referenced.  If UPLO = 'L', the
+*          leading n-by-n lower triangular part of A contains the lower
+*          triangular part of the matrix A, and the strictly upper
+*          triangular part of A is not referenced.
+*          On exit:
+*          if UPLO = 'U', the last NB columns have been reduced to
+*            tridiagonal form, with the diagonal elements overwriting
+*            the diagonal elements of A; the elements above the diagonal
+*            with the array TAU, represent the orthogonal matrix Q as a
+*            product of elementary reflectors;
+*          if UPLO = 'L', the first NB columns have been reduced to
+*            tridiagonal form, with the diagonal elements overwriting
+*            the diagonal elements of A; the elements below the diagonal
+*            with the array TAU, represent the  orthogonal matrix Q as a
+*            product of elementary reflectors.
+*          See Further Details.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= (1,N).
+*
+*  E       (output) DOUBLE PRECISION array, dimension (N-1)
+*          If UPLO = 'U', E(n-nb:n-1) contains the superdiagonal
+*          elements of the last NB columns of the reduced matrix;
+*          if UPLO = 'L', E(1:nb) contains the subdiagonal elements of
+*          the first NB columns of the reduced matrix.
+*
+*  TAU     (output) DOUBLE PRECISION array, dimension (N-1)
+*          The scalar factors of the elementary reflectors, stored in
+*          TAU(n-nb:n-1) if UPLO = 'U', and in TAU(1:nb) if UPLO = 'L'.
+*          See Further Details.
+*
+*  W       (output) DOUBLE PRECISION array, dimension (LDW,NB)
+*          The n-by-nb matrix W required to update the unreduced part
+*          of A.
+*
+*  LDW     (input) INTEGER
+*          The leading dimension of the array W. LDW >= max(1,N).
+*
+*  Further Details
+*  ===============
+*
+*  If UPLO = 'U', the matrix Q is represented as a product of elementary
+*  reflectors
+*
+*     Q = H(n) H(n-1) . . . H(n-nb+1).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - tau * v * v'
+*
+*  where tau is a real scalar, and v is a real vector with
+*  v(i:n) = 0 and v(i-1) = 1; v(1:i-1) is stored on exit in A(1:i-1,i),
+*  and tau in TAU(i-1).
+*
+*  If UPLO = 'L', the matrix Q is represented as a product of elementary
+*  reflectors
+*
+*     Q = H(1) H(2) . . . H(nb).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - tau * v * v'
+*
+*  where tau is a real scalar, and v is a real vector with
+*  v(1:i) = 0 and v(i+1) = 1; v(i+1:n) is stored on exit in A(i+1:n,i),
+*  and tau in TAU(i).
+*
+*  The elements of the vectors v together form the n-by-nb matrix V
+*  which is needed, with W, to apply the transformation to the unreduced
+*  part of the matrix, using a symmetric rank-2k update of the form:
+*  A := A - V*W' - W*V'.
+*
+*  The contents of A on exit are illustrated by the following examples
+*  with n = 5 and nb = 2:
+*
+*  if UPLO = 'U':                       if UPLO = 'L':
+*
+*    (  a   a   a   v4  v5 )              (  d                  )
+*    (      a   a   v4  v5 )              (  1   d              )
+*    (          a   1   v5 )              (  v1  1   a          )
+*    (              d   1  )              (  v1  v2  a   a      )
+*    (                  d  )              (  v1  v2  a   a   a  )
+*
+*  where d denotes a diagonal element of the reduced matrix, a denotes
+*  an element of the original matrix that is unchanged, and vi denotes
+*  an element of the vector defining H(i).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE, HALF
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0, HALF = 0.5D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, IW
+      DOUBLE PRECISION   ALPHA
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DAXPY, DGEMV, DLARFG, DSCAL, DSYMV
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DDOT
+      EXTERNAL           LSAME, DDOT
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Quick return if possible
+*
+      IF( N.LE.0 )
+     $   RETURN
+*
+      IF( LSAME( UPLO, 'U' ) ) THEN
+*
+*        Reduce last NB columns of upper triangle
+*
+         DO 10 I = N, N - NB + 1, -1
+            IW = I - N + NB
+            IF( I.LT.N ) THEN
+*
+*              Update A(1:i,i)
+*
+               CALL DGEMV( 'No transpose', I, N-I, -ONE, A( 1, I+1 ),
+     $                     LDA, W( I, IW+1 ), LDW, ONE, A( 1, I ), 1 )
+               CALL DGEMV( 'No transpose', I, N-I, -ONE, W( 1, IW+1 ),
+     $                     LDW, A( I, I+1 ), LDA, ONE, A( 1, I ), 1 )
+            END IF
+            IF( I.GT.1 ) THEN
+*
+*              Generate elementary reflector H(i) to annihilate
+*              A(1:i-2,i)
+*
+               CALL DLARFG( I-1, A( I-1, I ), A( 1, I ), 1, TAU( I-1 ) )
+               E( I-1 ) = A( I-1, I )
+               A( I-1, I ) = ONE
+*
+*              Compute W(1:i-1,i)
+*
+               CALL DSYMV( 'Upper', I-1, ONE, A, LDA, A( 1, I ), 1,
+     $                     ZERO, W( 1, IW ), 1 )
+               IF( I.LT.N ) THEN
+                  CALL DGEMV( 'Transpose', I-1, N-I, ONE, W( 1, IW+1 ),
+     $                        LDW, A( 1, I ), 1, ZERO, W( I+1, IW ), 1 )
+                  CALL DGEMV( 'No transpose', I-1, N-I, -ONE,
+     $                        A( 1, I+1 ), LDA, W( I+1, IW ), 1, ONE,
+     $                        W( 1, IW ), 1 )
+                  CALL DGEMV( 'Transpose', I-1, N-I, ONE, A( 1, I+1 ),
+     $                        LDA, A( 1, I ), 1, ZERO, W( I+1, IW ), 1 )
+                  CALL DGEMV( 'No transpose', I-1, N-I, -ONE,
+     $                        W( 1, IW+1 ), LDW, W( I+1, IW ), 1, ONE,
+     $                        W( 1, IW ), 1 )
+               END IF
+               CALL DSCAL( I-1, TAU( I-1 ), W( 1, IW ), 1 )
+               ALPHA = -HALF*TAU( I-1 )*DDOT( I-1, W( 1, IW ), 1,
+     $                 A( 1, I ), 1 )
+               CALL DAXPY( I-1, ALPHA, A( 1, I ), 1, W( 1, IW ), 1 )
+            END IF
+*
+   10    CONTINUE
+      ELSE
+*
+*        Reduce first NB columns of lower triangle
+*
+         DO 20 I = 1, NB
+*
+*           Update A(i:n,i)
+*
+            CALL DGEMV( 'No transpose', N-I+1, I-1, -ONE, A( I, 1 ),
+     $                  LDA, W( I, 1 ), LDW, ONE, A( I, I ), 1 )
+            CALL DGEMV( 'No transpose', N-I+1, I-1, -ONE, W( I, 1 ),
+     $                  LDW, A( I, 1 ), LDA, ONE, A( I, I ), 1 )
+            IF( I.LT.N ) THEN
+*
+*              Generate elementary reflector H(i) to annihilate
+*              A(i+2:n,i)
+*
+               CALL DLARFG( N-I, A( I+1, I ), A( MIN( I+2, N ), I ), 1,
+     $                      TAU( I ) )
+               E( I ) = A( I+1, I )
+               A( I+1, I ) = ONE
+*
+*              Compute W(i+1:n,i)
+*
+               CALL DSYMV( 'Lower', N-I, ONE, A( I+1, I+1 ), LDA,
+     $                     A( I+1, I ), 1, ZERO, W( I+1, I ), 1 )
+               CALL DGEMV( 'Transpose', N-I, I-1, ONE, W( I+1, 1 ), LDW,
+     $                     A( I+1, I ), 1, ZERO, W( 1, I ), 1 )
+               CALL DGEMV( 'No transpose', N-I, I-1, -ONE, A( I+1, 1 ),
+     $                     LDA, W( 1, I ), 1, ONE, W( I+1, I ), 1 )
+               CALL DGEMV( 'Transpose', N-I, I-1, ONE, A( I+1, 1 ), LDA,
+     $                     A( I+1, I ), 1, ZERO, W( 1, I ), 1 )
+               CALL DGEMV( 'No transpose', N-I, I-1, -ONE, W( I+1, 1 ),
+     $                     LDW, W( 1, I ), 1, ONE, W( I+1, I ), 1 )
+               CALL DSCAL( N-I, TAU( I ), W( I+1, I ), 1 )
+               ALPHA = -HALF*TAU( I )*DDOT( N-I, W( I+1, I ), 1,
+     $                 A( I+1, I ), 1 )
+               CALL DAXPY( N-I, ALPHA, A( I+1, I ), 1, W( I+1, I ), 1 )
+            END IF
+*
+   20    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of DLATRD
+*
+      END
diff --git a/costa/native/external/lapack/dlatrs.f b/costa/native/external/lapack/dlatrs.f
new file mode 100644
index 000000000..591c966d2
--- /dev/null
+++ b/costa/native/external/lapack/dlatrs.f
@@ -0,0 +1,702 @@
+      SUBROUTINE DLATRS( UPLO, TRANS, DIAG, NORMIN, N, A, LDA, X, SCALE,
+     $                   CNORM, INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIAG, NORMIN, TRANS, UPLO
+      INTEGER            INFO, LDA, N
+      DOUBLE PRECISION   SCALE
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), CNORM( * ), X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLATRS solves one of the triangular systems
+*
+*     A *x = s*b  or  A'*x = s*b
+*
+*  with scaling to prevent overflow.  Here A is an upper or lower
+*  triangular matrix, A' denotes the transpose of A, x and b are
+*  n-element vectors, and s is a scaling factor, usually less than
+*  or equal to 1, chosen so that the components of x will be less than
+*  the overflow threshold.  If the unscaled problem will not cause
+*  overflow, the Level 2 BLAS routine DTRSV is called.  If the matrix A
+*  is singular (A(j,j) = 0 for some j), then s is set to 0 and a
+*  non-trivial solution to A*x = 0 is returned.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the matrix A is upper or lower triangular.
+*          = 'U':  Upper triangular
+*          = 'L':  Lower triangular
+*
+*  TRANS   (input) CHARACTER*1
+*          Specifies the operation applied to A.
+*          = 'N':  Solve A * x = s*b  (No transpose)
+*          = 'T':  Solve A'* x = s*b  (Transpose)
+*          = 'C':  Solve A'* x = s*b  (Conjugate transpose = Transpose)
+*
+*  DIAG    (input) CHARACTER*1
+*          Specifies whether or not the matrix A is unit triangular.
+*          = 'N':  Non-unit triangular
+*          = 'U':  Unit triangular
+*
+*  NORMIN  (input) CHARACTER*1
+*          Specifies whether CNORM has been set or not.
+*          = 'Y':  CNORM contains the column norms on entry
+*          = 'N':  CNORM is not set on entry.  On exit, the norms will
+*                  be computed and stored in CNORM.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input) DOUBLE PRECISION array, dimension (LDA,N)
+*          The triangular matrix A.  If UPLO = 'U', the leading n by n
+*          upper triangular part of the array A contains the upper
+*          triangular matrix, and the strictly lower triangular part of
+*          A is not referenced.  If UPLO = 'L', the leading n by n lower
+*          triangular part of the array A contains the lower triangular
+*          matrix, and the strictly upper triangular part of A is not
+*          referenced.  If DIAG = 'U', the diagonal elements of A are
+*          also not referenced and are assumed to be 1.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max (1,N).
+*
+*  X       (input/output) DOUBLE PRECISION array, dimension (N)
+*          On entry, the right hand side b of the triangular system.
+*          On exit, X is overwritten by the solution vector x.
+*
+*  SCALE   (output) DOUBLE PRECISION
+*          The scaling factor s for the triangular system
+*             A * x = s*b  or  A'* x = s*b.
+*          If SCALE = 0, the matrix A is singular or badly scaled, and
+*          the vector x is an exact or approximate solution to A*x = 0.
+*
+*  CNORM   (input or output) DOUBLE PRECISION array, dimension (N)
+*
+*          If NORMIN = 'Y', CNORM is an input argument and CNORM(j)
+*          contains the norm of the off-diagonal part of the j-th column
+*          of A.  If TRANS = 'N', CNORM(j) must be greater than or equal
+*          to the infinity-norm, and if TRANS = 'T' or 'C', CNORM(j)
+*          must be greater than or equal to the 1-norm.
+*
+*          If NORMIN = 'N', CNORM is an output argument and CNORM(j)
+*          returns the 1-norm of the offdiagonal part of the j-th column
+*          of A.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -k, the k-th argument had an illegal value
+*
+*  Further Details
+*  ======= =======
+*
+*  A rough bound on x is computed; if that is less than overflow, DTRSV
+*  is called, otherwise, specific code is used which checks for possible
+*  overflow or divide-by-zero at every operation.
+*
+*  A columnwise scheme is used for solving A*x = b.  The basic algorithm
+*  if A is lower triangular is
+*
+*       x[1:n] := b[1:n]
+*       for j = 1, ..., n
+*            x(j) := x(j) / A(j,j)
+*            x[j+1:n] := x[j+1:n] - x(j) * A[j+1:n,j]
+*       end
+*
+*  Define bounds on the components of x after j iterations of the loop:
+*     M(j) = bound on x[1:j]
+*     G(j) = bound on x[j+1:n]
+*  Initially, let M(0) = 0 and G(0) = max{x(i), i=1,...,n}.
+*
+*  Then for iteration j+1 we have
+*     M(j+1) <= G(j) / | A(j+1,j+1) |
+*     G(j+1) <= G(j) + M(j+1) * | A[j+2:n,j+1] |
+*            <= G(j) ( 1 + CNORM(j+1) / | A(j+1,j+1) | )
+*
+*  where CNORM(j+1) is greater than or equal to the infinity-norm of
+*  column j+1 of A, not counting the diagonal.  Hence
+*
+*     G(j) <= G(0) product ( 1 + CNORM(i) / | A(i,i) | )
+*                  1<=i<=j
+*  and
+*
+*     |x(j)| <= ( G(0) / |A(j,j)| ) product ( 1 + CNORM(i) / |A(i,i)| )
+*                                   1<=i< j
+*
+*  Since |x(j)| <= M(j), we use the Level 2 BLAS routine DTRSV if the
+*  reciprocal of the largest M(j), j=1,..,n, is larger than
+*  max(underflow, 1/overflow).
+*
+*  The bound on x(j) is also used to determine when a step in the
+*  columnwise method can be performed without fear of overflow.  If
+*  the computed bound is greater than a large constant, x is scaled to
+*  prevent overflow, but if the bound overflows, x is set to 0, x(j) to
+*  1, and scale to 0, and a non-trivial solution to A*x = 0 is found.
+*
+*  Similarly, a row-wise scheme is used to solve A'*x = b.  The basic
+*  algorithm for A upper triangular is
+*
+*       for j = 1, ..., n
+*            x(j) := ( b(j) - A[1:j-1,j]' * x[1:j-1] ) / A(j,j)
+*       end
+*
+*  We simultaneously compute two bounds
+*       G(j) = bound on ( b(i) - A[1:i-1,i]' * x[1:i-1] ), 1<=i<=j
+*       M(j) = bound on x(i), 1<=i<=j
+*
+*  The initial values are G(0) = 0, M(0) = max{b(i), i=1,..,n}, and we
+*  add the constraint G(j) >= G(j-1) and M(j) >= M(j-1) for j >= 1.
+*  Then the bound on x(j) is
+*
+*       M(j) <= M(j-1) * ( 1 + CNORM(j) ) / | A(j,j) |
+*
+*            <= M(0) * product ( ( 1 + CNORM(i) ) / |A(i,i)| )
+*                      1<=i<=j
+*
+*  and we can safely call DTRSV if 1/M(n) and 1/G(n) are both greater
+*  than max(underflow, 1/overflow).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, HALF, ONE
+      PARAMETER          ( ZERO = 0.0D+0, HALF = 0.5D+0, ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOTRAN, NOUNIT, UPPER
+      INTEGER            I, IMAX, J, JFIRST, JINC, JLAST
+      DOUBLE PRECISION   BIGNUM, GROW, REC, SMLNUM, SUMJ, TJJ, TJJS,
+     $                   TMAX, TSCAL, USCAL, XBND, XJ, XMAX
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            IDAMAX
+      DOUBLE PRECISION   DASUM, DDOT, DLAMCH
+      EXTERNAL           LSAME, IDAMAX, DASUM, DDOT, DLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DAXPY, DSCAL, DTRSV, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      NOTRAN = LSAME( TRANS, 'N' )
+      NOUNIT = LSAME( DIAG, 'N' )
+*
+*     Test the input parameters.
+*
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT.
+     $         LSAME( TRANS, 'C' ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN
+         INFO = -3
+      ELSE IF( .NOT.LSAME( NORMIN, 'Y' ) .AND. .NOT.
+     $         LSAME( NORMIN, 'N' ) ) THEN
+         INFO = -4
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DLATRS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Determine machine dependent parameters to control overflow.
+*
+      SMLNUM = DLAMCH( 'Safe minimum' ) / DLAMCH( 'Precision' )
+      BIGNUM = ONE / SMLNUM
+      SCALE = ONE
+*
+      IF( LSAME( NORMIN, 'N' ) ) THEN
+*
+*        Compute the 1-norm of each column, not including the diagonal.
+*
+         IF( UPPER ) THEN
+*
+*           A is upper triangular.
+*
+            DO 10 J = 1, N
+               CNORM( J ) = DASUM( J-1, A( 1, J ), 1 )
+   10       CONTINUE
+         ELSE
+*
+*           A is lower triangular.
+*
+            DO 20 J = 1, N - 1
+               CNORM( J ) = DASUM( N-J, A( J+1, J ), 1 )
+   20       CONTINUE
+            CNORM( N ) = ZERO
+         END IF
+      END IF
+*
+*     Scale the column norms by TSCAL if the maximum element in CNORM is
+*     greater than BIGNUM.
+*
+      IMAX = IDAMAX( N, CNORM, 1 )
+      TMAX = CNORM( IMAX )
+      IF( TMAX.LE.BIGNUM ) THEN
+         TSCAL = ONE
+      ELSE
+         TSCAL = ONE / ( SMLNUM*TMAX )
+         CALL DSCAL( N, TSCAL, CNORM, 1 )
+      END IF
+*
+*     Compute a bound on the computed solution vector to see if the
+*     Level 2 BLAS routine DTRSV can be used.
+*
+      J = IDAMAX( N, X, 1 )
+      XMAX = ABS( X( J ) )
+      XBND = XMAX
+      IF( NOTRAN ) THEN
+*
+*        Compute the growth in A * x = b.
+*
+         IF( UPPER ) THEN
+            JFIRST = N
+            JLAST = 1
+            JINC = -1
+         ELSE
+            JFIRST = 1
+            JLAST = N
+            JINC = 1
+         END IF
+*
+         IF( TSCAL.NE.ONE ) THEN
+            GROW = ZERO
+            GO TO 50
+         END IF
+*
+         IF( NOUNIT ) THEN
+*
+*           A is non-unit triangular.
+*
+*           Compute GROW = 1/G(j) and XBND = 1/M(j).
+*           Initially, G(0) = max{x(i), i=1,...,n}.
+*
+            GROW = ONE / MAX( XBND, SMLNUM )
+            XBND = GROW
+            DO 30 J = JFIRST, JLAST, JINC
+*
+*              Exit the loop if the growth factor is too small.
+*
+               IF( GROW.LE.SMLNUM )
+     $            GO TO 50
+*
+*              M(j) = G(j-1) / abs(A(j,j))
+*
+               TJJ = ABS( A( J, J ) )
+               XBND = MIN( XBND, MIN( ONE, TJJ )*GROW )
+               IF( TJJ+CNORM( J ).GE.SMLNUM ) THEN
+*
+*                 G(j) = G(j-1)*( 1 + CNORM(j) / abs(A(j,j)) )
+*
+                  GROW = GROW*( TJJ / ( TJJ+CNORM( J ) ) )
+               ELSE
+*
+*                 G(j) could overflow, set GROW to 0.
+*
+                  GROW = ZERO
+               END IF
+   30       CONTINUE
+            GROW = XBND
+         ELSE
+*
+*           A is unit triangular.
+*
+*           Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}.
+*
+            GROW = MIN( ONE, ONE / MAX( XBND, SMLNUM ) )
+            DO 40 J = JFIRST, JLAST, JINC
+*
+*              Exit the loop if the growth factor is too small.
+*
+               IF( GROW.LE.SMLNUM )
+     $            GO TO 50
+*
+*              G(j) = G(j-1)*( 1 + CNORM(j) )
+*
+               GROW = GROW*( ONE / ( ONE+CNORM( J ) ) )
+   40       CONTINUE
+         END IF
+   50    CONTINUE
+*
+      ELSE
+*
+*        Compute the growth in A' * x = b.
+*
+         IF( UPPER ) THEN
+            JFIRST = 1
+            JLAST = N
+            JINC = 1
+         ELSE
+            JFIRST = N
+            JLAST = 1
+            JINC = -1
+         END IF
+*
+         IF( TSCAL.NE.ONE ) THEN
+            GROW = ZERO
+            GO TO 80
+         END IF
+*
+         IF( NOUNIT ) THEN
+*
+*           A is non-unit triangular.
+*
+*           Compute GROW = 1/G(j) and XBND = 1/M(j).
+*           Initially, M(0) = max{x(i), i=1,...,n}.
+*
+            GROW = ONE / MAX( XBND, SMLNUM )
+            XBND = GROW
+            DO 60 J = JFIRST, JLAST, JINC
+*
+*              Exit the loop if the growth factor is too small.
+*
+               IF( GROW.LE.SMLNUM )
+     $            GO TO 80
+*
+*              G(j) = max( G(j-1), M(j-1)*( 1 + CNORM(j) ) )
+*
+               XJ = ONE + CNORM( J )
+               GROW = MIN( GROW, XBND / XJ )
+*
+*              M(j) = M(j-1)*( 1 + CNORM(j) ) / abs(A(j,j))
+*
+               TJJ = ABS( A( J, J ) )
+               IF( XJ.GT.TJJ )
+     $            XBND = XBND*( TJJ / XJ )
+   60       CONTINUE
+            GROW = MIN( GROW, XBND )
+         ELSE
+*
+*           A is unit triangular.
+*
+*           Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}.
+*
+            GROW = MIN( ONE, ONE / MAX( XBND, SMLNUM ) )
+            DO 70 J = JFIRST, JLAST, JINC
+*
+*              Exit the loop if the growth factor is too small.
+*
+               IF( GROW.LE.SMLNUM )
+     $            GO TO 80
+*
+*              G(j) = ( 1 + CNORM(j) )*G(j-1)
+*
+               XJ = ONE + CNORM( J )
+               GROW = GROW / XJ
+   70       CONTINUE
+         END IF
+   80    CONTINUE
+      END IF
+*
+      IF( ( GROW*TSCAL ).GT.SMLNUM ) THEN
+*
+*        Use the Level 2 BLAS solve if the reciprocal of the bound on
+*        elements of X is not too small.
+*
+         CALL DTRSV( UPLO, TRANS, DIAG, N, A, LDA, X, 1 )
+      ELSE
+*
+*        Use a Level 1 BLAS solve, scaling intermediate results.
+*
+         IF( XMAX.GT.BIGNUM ) THEN
+*
+*           Scale X so that its components are less than or equal to
+*           BIGNUM in absolute value.
+*
+            SCALE = BIGNUM / XMAX
+            CALL DSCAL( N, SCALE, X, 1 )
+            XMAX = BIGNUM
+         END IF
+*
+         IF( NOTRAN ) THEN
+*
+*           Solve A * x = b
+*
+            DO 110 J = JFIRST, JLAST, JINC
+*
+*              Compute x(j) = b(j) / A(j,j), scaling x if necessary.
+*
+               XJ = ABS( X( J ) )
+               IF( NOUNIT ) THEN
+                  TJJS = A( J, J )*TSCAL
+               ELSE
+                  TJJS = TSCAL
+                  IF( TSCAL.EQ.ONE )
+     $               GO TO 100
+               END IF
+               TJJ = ABS( TJJS )
+               IF( TJJ.GT.SMLNUM ) THEN
+*
+*                    abs(A(j,j)) > SMLNUM:
+*
+                  IF( TJJ.LT.ONE ) THEN
+                     IF( XJ.GT.TJJ*BIGNUM ) THEN
+*
+*                          Scale x by 1/b(j).
+*
+                        REC = ONE / XJ
+                        CALL DSCAL( N, REC, X, 1 )
+                        SCALE = SCALE*REC
+                        XMAX = XMAX*REC
+                     END IF
+                  END IF
+                  X( J ) = X( J ) / TJJS
+                  XJ = ABS( X( J ) )
+               ELSE IF( TJJ.GT.ZERO ) THEN
+*
+*                    0 < abs(A(j,j)) <= SMLNUM:
+*
+                  IF( XJ.GT.TJJ*BIGNUM ) THEN
+*
+*                       Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM
+*                       to avoid overflow when dividing by A(j,j).
+*
+                     REC = ( TJJ*BIGNUM ) / XJ
+                     IF( CNORM( J ).GT.ONE ) THEN
+*
+*                          Scale by 1/CNORM(j) to avoid overflow when
+*                          multiplying x(j) times column j.
+*
+                        REC = REC / CNORM( J )
+                     END IF
+                     CALL DSCAL( N, REC, X, 1 )
+                     SCALE = SCALE*REC
+                     XMAX = XMAX*REC
+                  END IF
+                  X( J ) = X( J ) / TJJS
+                  XJ = ABS( X( J ) )
+               ELSE
+*
+*                    A(j,j) = 0:  Set x(1:n) = 0, x(j) = 1, and
+*                    scale = 0, and compute a solution to A*x = 0.
+*
+                  DO 90 I = 1, N
+                     X( I ) = ZERO
+   90             CONTINUE
+                  X( J ) = ONE
+                  XJ = ONE
+                  SCALE = ZERO
+                  XMAX = ZERO
+               END IF
+  100          CONTINUE
+*
+*              Scale x if necessary to avoid overflow when adding a
+*              multiple of column j of A.
+*
+               IF( XJ.GT.ONE ) THEN
+                  REC = ONE / XJ
+                  IF( CNORM( J ).GT.( BIGNUM-XMAX )*REC ) THEN
+*
+*                    Scale x by 1/(2*abs(x(j))).
+*
+                     REC = REC*HALF
+                     CALL DSCAL( N, REC, X, 1 )
+                     SCALE = SCALE*REC
+                  END IF
+               ELSE IF( XJ*CNORM( J ).GT.( BIGNUM-XMAX ) ) THEN
+*
+*                 Scale x by 1/2.
+*
+                  CALL DSCAL( N, HALF, X, 1 )
+                  SCALE = SCALE*HALF
+               END IF
+*
+               IF( UPPER ) THEN
+                  IF( J.GT.1 ) THEN
+*
+*                    Compute the update
+*                       x(1:j-1) := x(1:j-1) - x(j) * A(1:j-1,j)
+*
+                     CALL DAXPY( J-1, -X( J )*TSCAL, A( 1, J ), 1, X,
+     $                           1 )
+                     I = IDAMAX( J-1, X, 1 )
+                     XMAX = ABS( X( I ) )
+                  END IF
+               ELSE
+                  IF( J.LT.N ) THEN
+*
+*                    Compute the update
+*                       x(j+1:n) := x(j+1:n) - x(j) * A(j+1:n,j)
+*
+                     CALL DAXPY( N-J, -X( J )*TSCAL, A( J+1, J ), 1,
+     $                           X( J+1 ), 1 )
+                     I = J + IDAMAX( N-J, X( J+1 ), 1 )
+                     XMAX = ABS( X( I ) )
+                  END IF
+               END IF
+  110       CONTINUE
+*
+         ELSE
+*
+*           Solve A' * x = b
+*
+            DO 160 J = JFIRST, JLAST, JINC
+*
+*              Compute x(j) = b(j) - sum A(k,j)*x(k).
+*                                    k<>j
+*
+               XJ = ABS( X( J ) )
+               USCAL = TSCAL
+               REC = ONE / MAX( XMAX, ONE )
+               IF( CNORM( J ).GT.( BIGNUM-XJ )*REC ) THEN
+*
+*                 If x(j) could overflow, scale x by 1/(2*XMAX).
+*
+                  REC = REC*HALF
+                  IF( NOUNIT ) THEN
+                     TJJS = A( J, J )*TSCAL
+                  ELSE
+                     TJJS = TSCAL
+                  END IF
+                  TJJ = ABS( TJJS )
+                  IF( TJJ.GT.ONE ) THEN
+*
+*                       Divide by A(j,j) when scaling x if A(j,j) > 1.
+*
+                     REC = MIN( ONE, REC*TJJ )
+                     USCAL = USCAL / TJJS
+                  END IF
+                  IF( REC.LT.ONE ) THEN
+                     CALL DSCAL( N, REC, X, 1 )
+                     SCALE = SCALE*REC
+                     XMAX = XMAX*REC
+                  END IF
+               END IF
+*
+               SUMJ = ZERO
+               IF( USCAL.EQ.ONE ) THEN
+*
+*                 If the scaling needed for A in the dot product is 1,
+*                 call DDOT to perform the dot product.
+*
+                  IF( UPPER ) THEN
+                     SUMJ = DDOT( J-1, A( 1, J ), 1, X, 1 )
+                  ELSE IF( J.LT.N ) THEN
+                     SUMJ = DDOT( N-J, A( J+1, J ), 1, X( J+1 ), 1 )
+                  END IF
+               ELSE
+*
+*                 Otherwise, use in-line code for the dot product.
+*
+                  IF( UPPER ) THEN
+                     DO 120 I = 1, J - 1
+                        SUMJ = SUMJ + ( A( I, J )*USCAL )*X( I )
+  120                CONTINUE
+                  ELSE IF( J.LT.N ) THEN
+                     DO 130 I = J + 1, N
+                        SUMJ = SUMJ + ( A( I, J )*USCAL )*X( I )
+  130                CONTINUE
+                  END IF
+               END IF
+*
+               IF( USCAL.EQ.TSCAL ) THEN
+*
+*                 Compute x(j) := ( x(j) - sumj ) / A(j,j) if 1/A(j,j)
+*                 was not used to scale the dotproduct.
+*
+                  X( J ) = X( J ) - SUMJ
+                  XJ = ABS( X( J ) )
+                  IF( NOUNIT ) THEN
+                     TJJS = A( J, J )*TSCAL
+                  ELSE
+                     TJJS = TSCAL
+                     IF( TSCAL.EQ.ONE )
+     $                  GO TO 150
+                  END IF
+*
+*                    Compute x(j) = x(j) / A(j,j), scaling if necessary.
+*
+                  TJJ = ABS( TJJS )
+                  IF( TJJ.GT.SMLNUM ) THEN
+*
+*                       abs(A(j,j)) > SMLNUM:
+*
+                     IF( TJJ.LT.ONE ) THEN
+                        IF( XJ.GT.TJJ*BIGNUM ) THEN
+*
+*                             Scale X by 1/abs(x(j)).
+*
+                           REC = ONE / XJ
+                           CALL DSCAL( N, REC, X, 1 )
+                           SCALE = SCALE*REC
+                           XMAX = XMAX*REC
+                        END IF
+                     END IF
+                     X( J ) = X( J ) / TJJS
+                  ELSE IF( TJJ.GT.ZERO ) THEN
+*
+*                       0 < abs(A(j,j)) <= SMLNUM:
+*
+                     IF( XJ.GT.TJJ*BIGNUM ) THEN
+*
+*                          Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM.
+*
+                        REC = ( TJJ*BIGNUM ) / XJ
+                        CALL DSCAL( N, REC, X, 1 )
+                        SCALE = SCALE*REC
+                        XMAX = XMAX*REC
+                     END IF
+                     X( J ) = X( J ) / TJJS
+                  ELSE
+*
+*                       A(j,j) = 0:  Set x(1:n) = 0, x(j) = 1, and
+*                       scale = 0, and compute a solution to A'*x = 0.
+*
+                     DO 140 I = 1, N
+                        X( I ) = ZERO
+  140                CONTINUE
+                     X( J ) = ONE
+                     SCALE = ZERO
+                     XMAX = ZERO
+                  END IF
+  150             CONTINUE
+               ELSE
+*
+*                 Compute x(j) := x(j) / A(j,j)  - sumj if the dot
+*                 product has already been divided by 1/A(j,j).
+*
+                  X( J ) = X( J ) / TJJS - SUMJ
+               END IF
+               XMAX = MAX( XMAX, ABS( X( J ) ) )
+  160       CONTINUE
+         END IF
+         SCALE = SCALE / TSCAL
+      END IF
+*
+*     Scale the column norms by 1/TSCAL for return.
+*
+      IF( TSCAL.NE.ONE ) THEN
+         CALL DSCAL( N, ONE / TSCAL, CNORM, 1 )
+      END IF
+*
+      RETURN
+*
+*     End of DLATRS
+*
+      END
diff --git a/costa/native/external/lapack/dlatrz.f b/costa/native/external/lapack/dlatrz.f
new file mode 100644
index 000000000..ac72ecb0d
--- /dev/null
+++ b/costa/native/external/lapack/dlatrz.f
@@ -0,0 +1,128 @@
+      SUBROUTINE DLATRZ( M, N, L, A, LDA, TAU, WORK )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            L, LDA, M, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLATRZ factors the M-by-(M+L) real upper trapezoidal matrix
+*  [ A1 A2 ] = [ A(1:M,1:M) A(1:M,N-L+1:N) ] as ( R  0 ) * Z, by means
+*  of orthogonal transformations.  Z is an (M+L)-by-(M+L) orthogonal
+*  matrix and, R and A1 are M-by-M upper triangular matrices.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  L       (input) INTEGER
+*          The number of columns of the matrix A containing the
+*          meaningful part of the Householder vectors. N-M >= L >= 0.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+*          On entry, the leading M-by-N upper trapezoidal part of the
+*          array A must contain the matrix to be factorized.
+*          On exit, the leading M-by-M upper triangular part of A
+*          contains the upper triangular matrix R, and elements N-L+1 to
+*          N of the first M rows of A, with the array TAU, represent the
+*          orthogonal matrix Z as a product of M elementary reflectors.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  TAU     (output) DOUBLE PRECISION array, dimension (M)
+*          The scalar factors of the elementary reflectors.
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (M)
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*    A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA
+*
+*  The factorization is obtained by Householder's method.  The kth
+*  transformation matrix, Z( k ), which is used to introduce zeros into
+*  the ( m - k + 1 )th row of A, is given in the form
+*
+*     Z( k ) = ( I     0   ),
+*              ( 0  T( k ) )
+*
+*  where
+*
+*     T( k ) = I - tau*u( k )*u( k )',   u( k ) = (   1    ),
+*                                                 (   0    )
+*                                                 ( z( k ) )
+*
+*  tau is a scalar and z( k ) is an l element vector. tau and z( k )
+*  are chosen to annihilate the elements of the kth row of A2.
+*
+*  The scalar tau is returned in the kth element of TAU and the vector
+*  u( k ) in the kth row of A2, such that the elements of z( k ) are
+*  in  a( k, l + 1 ), ..., a( k, n ). The elements of R are returned in
+*  the upper triangular part of A1.
+*
+*  Z is given by
+*
+*     Z =  Z( 1 ) * Z( 2 ) * ... * Z( m ).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO
+      PARAMETER          ( ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLARFG, DLARZ
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 ) THEN
+         RETURN
+      ELSE IF( M.EQ.N ) THEN
+         DO 10 I = 1, N
+            TAU( I ) = ZERO
+   10    CONTINUE
+         RETURN
+      END IF
+*
+      DO 20 I = M, 1, -1
+*
+*        Generate elementary reflector H(i) to annihilate
+*        [ A(i,i) A(i,n-l+1:n) ]
+*
+         CALL DLARFG( L+1, A( I, I ), A( I, N-L+1 ), LDA, TAU( I ) )
+*
+*        Apply H(i) to A(1:i-1,i:n) from the right
+*
+         CALL DLARZ( 'Right', I-1, N-I+1, L, A( I, N-L+1 ), LDA,
+     $               TAU( I ), A( 1, I ), LDA, WORK )
+*
+   20 CONTINUE
+*
+      RETURN
+*
+*     End of DLATRZ
+*
+      END
diff --git a/costa/native/external/lapack/dlatzm.f b/costa/native/external/lapack/dlatzm.f
new file mode 100644
index 000000000..7eaea60d3
--- /dev/null
+++ b/costa/native/external/lapack/dlatzm.f
@@ -0,0 +1,143 @@
+      SUBROUTINE DLATZM( SIDE, M, N, V, INCV, TAU, C1, C2, LDC, WORK )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          SIDE
+      INTEGER            INCV, LDC, M, N
+      DOUBLE PRECISION   TAU
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   C1( LDC, * ), C2( LDC, * ), V( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  This routine is deprecated and has been replaced by routine DORMRZ.
+*
+*  DLATZM applies a Householder matrix generated by DTZRQF to a matrix.
+*
+*  Let P = I - tau*u*u',   u = ( 1 ),
+*                              ( v )
+*  where v is an (m-1) vector if SIDE = 'L', or a (n-1) vector if
+*  SIDE = 'R'.
+*
+*  If SIDE equals 'L', let
+*         C = [ C1 ] 1
+*             [ C2 ] m-1
+*               n
+*  Then C is overwritten by P*C.
+*
+*  If SIDE equals 'R', let
+*         C = [ C1, C2 ] m
+*                1  n-1
+*  Then C is overwritten by C*P.
+*
+*  Arguments
+*  =========
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'L': form P * C
+*          = 'R': form C * P
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix C.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix C.
+*
+*  V       (input) DOUBLE PRECISION array, dimension
+*                  (1 + (M-1)*abs(INCV)) if SIDE = 'L'
+*                  (1 + (N-1)*abs(INCV)) if SIDE = 'R'
+*          The vector v in the representation of P. V is not used
+*          if TAU = 0.
+*
+*  INCV    (input) INTEGER
+*          The increment between elements of v. INCV <> 0
+*
+*  TAU     (input) DOUBLE PRECISION
+*          The value tau in the representation of P.
+*
+*  C1      (input/output) DOUBLE PRECISION array, dimension
+*                         (LDC,N) if SIDE = 'L'
+*                         (M,1)   if SIDE = 'R'
+*          On entry, the n-vector C1 if SIDE = 'L', or the m-vector C1
+*          if SIDE = 'R'.
+*
+*          On exit, the first row of P*C if SIDE = 'L', or the first
+*          column of C*P if SIDE = 'R'.
+*
+*  C2      (input/output) DOUBLE PRECISION array, dimension
+*                         (LDC, N)   if SIDE = 'L'
+*                         (LDC, N-1) if SIDE = 'R'
+*          On entry, the (m - 1) x n matrix C2 if SIDE = 'L', or the
+*          m x (n - 1) matrix C2 if SIDE = 'R'.
+*
+*          On exit, rows 2:m of P*C if SIDE = 'L', or columns 2:m of C*P
+*          if SIDE = 'R'.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the arrays C1 and C2. LDC >= (1,M).
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension
+*                      (N) if SIDE = 'L'
+*                      (M) if SIDE = 'R'
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DAXPY, DCOPY, DGEMV, DGER
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MIN
+*     ..
+*     .. Executable Statements ..
+*
+      IF( ( MIN( M, N ).EQ.0 ) .OR. ( TAU.EQ.ZERO ) )
+     $   RETURN
+*
+      IF( LSAME( SIDE, 'L' ) ) THEN
+*
+*        w := C1 + v' * C2
+*
+         CALL DCOPY( N, C1, LDC, WORK, 1 )
+         CALL DGEMV( 'Transpose', M-1, N, ONE, C2, LDC, V, INCV, ONE,
+     $               WORK, 1 )
+*
+*        [ C1 ] := [ C1 ] - tau* [ 1 ] * w'
+*        [ C2 ]    [ C2 ]        [ v ]
+*
+         CALL DAXPY( N, -TAU, WORK, 1, C1, LDC )
+         CALL DGER( M-1, N, -TAU, V, INCV, WORK, 1, C2, LDC )
+*
+      ELSE IF( LSAME( SIDE, 'R' ) ) THEN
+*
+*        w := C1 + C2 * v
+*
+         CALL DCOPY( M, C1, 1, WORK, 1 )
+         CALL DGEMV( 'No transpose', M, N-1, ONE, C2, LDC, V, INCV, ONE,
+     $               WORK, 1 )
+*
+*        [ C1, C2 ] := [ C1, C2 ] - tau* w * [ 1 , v']
+*
+         CALL DAXPY( M, -TAU, WORK, 1, C1, 1 )
+         CALL DGER( M, N-1, -TAU, WORK, 1, V, INCV, C2, LDC )
+      END IF
+*
+      RETURN
+*
+*     End of DLATZM
+*
+      END
diff --git a/costa/native/external/lapack/dlauu2.f b/costa/native/external/lapack/dlauu2.f
new file mode 100644
index 000000000..65a47ee6c
--- /dev/null
+++ b/costa/native/external/lapack/dlauu2.f
@@ -0,0 +1,136 @@
+      SUBROUTINE DLAUU2( UPLO, N, A, LDA, INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLAUU2 computes the product U * U' or L' * L, where the triangular
+*  factor U or L is stored in the upper or lower triangular part of
+*  the array A.
+*
+*  If UPLO = 'U' or 'u' then the upper triangle of the result is stored,
+*  overwriting the factor U in A.
+*  If UPLO = 'L' or 'l' then the lower triangle of the result is stored,
+*  overwriting the factor L in A.
+*
+*  This is the unblocked form of the algorithm, calling Level 2 BLAS.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the triangular factor stored in the array A
+*          is upper or lower triangular:
+*          = 'U':  Upper triangular
+*          = 'L':  Lower triangular
+*
+*  N       (input) INTEGER
+*          The order of the triangular factor U or L.  N >= 0.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+*          On entry, the triangular factor U or L.
+*          On exit, if UPLO = 'U', the upper triangle of A is
+*          overwritten with the upper triangle of the product U * U';
+*          if UPLO = 'L', the lower triangle of A is overwritten with
+*          the lower triangle of the product L' * L.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -k, the k-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE
+      PARAMETER          ( ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            I
+      DOUBLE PRECISION   AII
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DDOT
+      EXTERNAL           LSAME, DDOT
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DGEMV, DSCAL, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DLAUU2', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      IF( UPPER ) THEN
+*
+*        Compute the product U * U'.
+*
+         DO 10 I = 1, N
+            AII = A( I, I )
+            IF( I.LT.N ) THEN
+               A( I, I ) = DDOT( N-I+1, A( I, I ), LDA, A( I, I ), LDA )
+               CALL DGEMV( 'No transpose', I-1, N-I, ONE, A( 1, I+1 ),
+     $                     LDA, A( I, I+1 ), LDA, AII, A( 1, I ), 1 )
+            ELSE
+               CALL DSCAL( I, AII, A( 1, I ), 1 )
+            END IF
+   10    CONTINUE
+*
+      ELSE
+*
+*        Compute the product L' * L.
+*
+         DO 20 I = 1, N
+            AII = A( I, I )
+            IF( I.LT.N ) THEN
+               A( I, I ) = DDOT( N-I+1, A( I, I ), 1, A( I, I ), 1 )
+               CALL DGEMV( 'Transpose', N-I, I-1, ONE, A( I+1, 1 ), LDA,
+     $                     A( I+1, I ), 1, AII, A( I, 1 ), LDA )
+            ELSE
+               CALL DSCAL( I, AII, A( I, 1 ), LDA )
+            END IF
+   20    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of DLAUU2
+*
+      END
diff --git a/costa/native/external/lapack/dlauum.f b/costa/native/external/lapack/dlauum.f
new file mode 100644
index 000000000..581c84080
--- /dev/null
+++ b/costa/native/external/lapack/dlauum.f
@@ -0,0 +1,156 @@
+      SUBROUTINE DLAUUM( UPLO, N, A, LDA, INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DLAUUM computes the product U * U' or L' * L, where the triangular
+*  factor U or L is stored in the upper or lower triangular part of
+*  the array A.
+*
+*  If UPLO = 'U' or 'u' then the upper triangle of the result is stored,
+*  overwriting the factor U in A.
+*  If UPLO = 'L' or 'l' then the lower triangle of the result is stored,
+*  overwriting the factor L in A.
+*
+*  This is the blocked form of the algorithm, calling Level 3 BLAS.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the triangular factor stored in the array A
+*          is upper or lower triangular:
+*          = 'U':  Upper triangular
+*          = 'L':  Lower triangular
+*
+*  N       (input) INTEGER
+*          The order of the triangular factor U or L.  N >= 0.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+*          On entry, the triangular factor U or L.
+*          On exit, if UPLO = 'U', the upper triangle of A is
+*          overwritten with the upper triangle of the product U * U';
+*          if UPLO = 'L', the lower triangle of A is overwritten with
+*          the lower triangle of the product L' * L.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -k, the k-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE
+      PARAMETER          ( ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            I, IB, NB
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           LSAME, ILAENV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DGEMM, DLAUU2, DSYRK, DTRMM, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DLAUUM', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Determine the block size for this environment.
+*
+      NB = ILAENV( 1, 'DLAUUM', UPLO, N, -1, -1, -1 )
+*
+      IF( NB.LE.1 .OR. NB.GE.N ) THEN
+*
+*        Use unblocked code
+*
+         CALL DLAUU2( UPLO, N, A, LDA, INFO )
+      ELSE
+*
+*        Use blocked code
+*
+         IF( UPPER ) THEN
+*
+*           Compute the product U * U'.
+*
+            DO 10 I = 1, N, NB
+               IB = MIN( NB, N-I+1 )
+               CALL DTRMM( 'Right', 'Upper', 'Transpose', 'Non-unit',
+     $                     I-1, IB, ONE, A( I, I ), LDA, A( 1, I ),
+     $                     LDA )
+               CALL DLAUU2( 'Upper', IB, A( I, I ), LDA, INFO )
+               IF( I+IB.LE.N ) THEN
+                  CALL DGEMM( 'No transpose', 'Transpose', I-1, IB,
+     $                        N-I-IB+1, ONE, A( 1, I+IB ), LDA,
+     $                        A( I, I+IB ), LDA, ONE, A( 1, I ), LDA )
+                  CALL DSYRK( 'Upper', 'No transpose', IB, N-I-IB+1,
+     $                        ONE, A( I, I+IB ), LDA, ONE, A( I, I ),
+     $                        LDA )
+               END IF
+   10       CONTINUE
+         ELSE
+*
+*           Compute the product L' * L.
+*
+            DO 20 I = 1, N, NB
+               IB = MIN( NB, N-I+1 )
+               CALL DTRMM( 'Left', 'Lower', 'Transpose', 'Non-unit', IB,
+     $                     I-1, ONE, A( I, I ), LDA, A( I, 1 ), LDA )
+               CALL DLAUU2( 'Lower', IB, A( I, I ), LDA, INFO )
+               IF( I+IB.LE.N ) THEN
+                  CALL DGEMM( 'Transpose', 'No transpose', IB, I-1,
+     $                        N-I-IB+1, ONE, A( I+IB, I ), LDA,
+     $                        A( I+IB, 1 ), LDA, ONE, A( I, 1 ), LDA )
+                  CALL DSYRK( 'Lower', 'Transpose', IB, N-I-IB+1, ONE,
+     $                        A( I+IB, I ), LDA, ONE, A( I, I ), LDA )
+               END IF
+   20       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of DLAUUM
+*
+      END
diff --git a/costa/native/external/lapack/dopgtr.f b/costa/native/external/lapack/dopgtr.f
new file mode 100644
index 000000000..32c7d59a2
--- /dev/null
+++ b/costa/native/external/lapack/dopgtr.f
@@ -0,0 +1,161 @@
+      SUBROUTINE DOPGTR( UPLO, N, AP, TAU, Q, LDQ, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDQ, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   AP( * ), Q( LDQ, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DOPGTR generates a real orthogonal matrix Q which is defined as the
+*  product of n-1 elementary reflectors H(i) of order n, as returned by
+*  DSPTRD using packed storage:
+*
+*  if UPLO = 'U', Q = H(n-1) . . . H(2) H(1),
+*
+*  if UPLO = 'L', Q = H(1) H(2) . . . H(n-1).
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U': Upper triangular packed storage used in previous
+*                 call to DSPTRD;
+*          = 'L': Lower triangular packed storage used in previous
+*                 call to DSPTRD.
+*
+*  N       (input) INTEGER
+*          The order of the matrix Q. N >= 0.
+*
+*  AP      (input) DOUBLE PRECISION array, dimension (N*(N+1)/2)
+*          The vectors which define the elementary reflectors, as
+*          returned by DSPTRD.
+*
+*  TAU     (input) DOUBLE PRECISION array, dimension (N-1)
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by DSPTRD.
+*
+*  Q       (output) DOUBLE PRECISION array, dimension (LDQ,N)
+*          The N-by-N orthogonal matrix Q.
+*
+*  LDQ     (input) INTEGER
+*          The leading dimension of the array Q. LDQ >= max(1,N).
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (N-1)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            I, IINFO, IJ, J
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DORG2L, DORG2R, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN
+         INFO = -6
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DOPGTR', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      IF( UPPER ) THEN
+*
+*        Q was determined by a call to DSPTRD with UPLO = 'U'
+*
+*        Unpack the vectors which define the elementary reflectors and
+*        set the last row and column of Q equal to those of the unit
+*        matrix
+*
+         IJ = 2
+         DO 20 J = 1, N - 1
+            DO 10 I = 1, J - 1
+               Q( I, J ) = AP( IJ )
+               IJ = IJ + 1
+   10       CONTINUE
+            IJ = IJ + 2
+            Q( N, J ) = ZERO
+   20    CONTINUE
+         DO 30 I = 1, N - 1
+            Q( I, N ) = ZERO
+   30    CONTINUE
+         Q( N, N ) = ONE
+*
+*        Generate Q(1:n-1,1:n-1)
+*
+         CALL DORG2L( N-1, N-1, N-1, Q, LDQ, TAU, WORK, IINFO )
+*
+      ELSE
+*
+*        Q was determined by a call to DSPTRD with UPLO = 'L'.
+*
+*        Unpack the vectors which define the elementary reflectors and
+*        set the first row and column of Q equal to those of the unit
+*        matrix
+*
+         Q( 1, 1 ) = ONE
+         DO 40 I = 2, N
+            Q( I, 1 ) = ZERO
+   40    CONTINUE
+         IJ = 3
+         DO 60 J = 2, N
+            Q( 1, J ) = ZERO
+            DO 50 I = J + 1, N
+               Q( I, J ) = AP( IJ )
+               IJ = IJ + 1
+   50       CONTINUE
+            IJ = IJ + 2
+   60    CONTINUE
+         IF( N.GT.1 ) THEN
+*
+*           Generate Q(2:n,2:n)
+*
+            CALL DORG2R( N-1, N-1, N-1, Q( 2, 2 ), LDQ, TAU, WORK,
+     $                   IINFO )
+         END IF
+      END IF
+      RETURN
+*
+*     End of DOPGTR
+*
+      END
diff --git a/costa/native/external/lapack/dopmtr.f b/costa/native/external/lapack/dopmtr.f
new file mode 100644
index 000000000..3d9d426b9
--- /dev/null
+++ b/costa/native/external/lapack/dopmtr.f
@@ -0,0 +1,258 @@
+      SUBROUTINE DOPMTR( SIDE, UPLO, TRANS, M, N, AP, TAU, C, LDC, WORK,
+     $                   INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          SIDE, TRANS, UPLO
+      INTEGER            INFO, LDC, M, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   AP( * ), C( LDC, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DOPMTR overwrites the general real M-by-N matrix C with
+*
+*                  SIDE = 'L'     SIDE = 'R'
+*  TRANS = 'N':      Q * C          C * Q
+*  TRANS = 'T':      Q**T * C       C * Q**T
+*
+*  where Q is a real orthogonal matrix of order nq, with nq = m if
+*  SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of
+*  nq-1 elementary reflectors, as returned by DSPTRD using packed
+*  storage:
+*
+*  if UPLO = 'U', Q = H(nq-1) . . . H(2) H(1);
+*
+*  if UPLO = 'L', Q = H(1) H(2) . . . H(nq-1).
+*
+*  Arguments
+*  =========
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'L': apply Q or Q**T from the Left;
+*          = 'R': apply Q or Q**T from the Right.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U': Upper triangular packed storage used in previous
+*                 call to DSPTRD;
+*          = 'L': Lower triangular packed storage used in previous
+*                 call to DSPTRD.
+*
+*  TRANS   (input) CHARACTER*1
+*          = 'N':  No transpose, apply Q;
+*          = 'T':  Transpose, apply Q**T.
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix C. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix C. N >= 0.
+*
+*  AP      (input) DOUBLE PRECISION array, dimension
+*                               (M*(M+1)/2) if SIDE = 'L'
+*                               (N*(N+1)/2) if SIDE = 'R'
+*          The vectors which define the elementary reflectors, as
+*          returned by DSPTRD.  AP is modified by the routine but
+*          restored on exit.
+*
+*  TAU     (input) DOUBLE PRECISION array, dimension (M-1) if SIDE = 'L'
+*                                     or (N-1) if SIDE = 'R'
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by DSPTRD.
+*
+*  C       (input/output) DOUBLE PRECISION array, dimension (LDC,N)
+*          On entry, the M-by-N matrix C.
+*          On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C. LDC >= max(1,M).
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension
+*                                   (N) if SIDE = 'L'
+*                                   (M) if SIDE = 'R'
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE
+      PARAMETER          ( ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            FORWRD, LEFT, NOTRAN, UPPER
+      INTEGER            I, I1, I2, I3, IC, II, JC, MI, NI, NQ
+      DOUBLE PRECISION   AII
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLARF, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      LEFT = LSAME( SIDE, 'L' )
+      NOTRAN = LSAME( TRANS, 'N' )
+      UPPER = LSAME( UPLO, 'U' )
+*
+*     NQ is the order of Q
+*
+      IF( LEFT ) THEN
+         NQ = M
+      ELSE
+         NQ = N
+      END IF
+      IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN
+         INFO = -3
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( LDC.LT.MAX( 1, M ) ) THEN
+         INFO = -9
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DOPMTR', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 )
+     $   RETURN
+*
+      IF( UPPER ) THEN
+*
+*        Q was determined by a call to DSPTRD with UPLO = 'U'
+*
+         FORWRD = ( LEFT .AND. NOTRAN ) .OR.
+     $            ( .NOT.LEFT .AND. .NOT.NOTRAN )
+*
+         IF( FORWRD ) THEN
+            I1 = 1
+            I2 = NQ - 1
+            I3 = 1
+            II = 2
+         ELSE
+            I1 = NQ - 1
+            I2 = 1
+            I3 = -1
+            II = NQ*( NQ+1 ) / 2 - 1
+         END IF
+*
+         IF( LEFT ) THEN
+            NI = N
+         ELSE
+            MI = M
+         END IF
+*
+         DO 10 I = I1, I2, I3
+            IF( LEFT ) THEN
+*
+*              H(i) is applied to C(1:i,1:n)
+*
+               MI = I
+            ELSE
+*
+*              H(i) is applied to C(1:m,1:i)
+*
+               NI = I
+            END IF
+*
+*           Apply H(i)
+*
+            AII = AP( II )
+            AP( II ) = ONE
+            CALL DLARF( SIDE, MI, NI, AP( II-I+1 ), 1, TAU( I ), C, LDC,
+     $                  WORK )
+            AP( II ) = AII
+*
+            IF( FORWRD ) THEN
+               II = II + I + 2
+            ELSE
+               II = II - I - 1
+            END IF
+   10    CONTINUE
+      ELSE
+*
+*        Q was determined by a call to DSPTRD with UPLO = 'L'.
+*
+         FORWRD = ( LEFT .AND. .NOT.NOTRAN ) .OR.
+     $            ( .NOT.LEFT .AND. NOTRAN )
+*
+         IF( FORWRD ) THEN
+            I1 = 1
+            I2 = NQ - 1
+            I3 = 1
+            II = 2
+         ELSE
+            I1 = NQ - 1
+            I2 = 1
+            I3 = -1
+            II = NQ*( NQ+1 ) / 2 - 1
+         END IF
+*
+         IF( LEFT ) THEN
+            NI = N
+            JC = 1
+         ELSE
+            MI = M
+            IC = 1
+         END IF
+*
+         DO 20 I = I1, I2, I3
+            AII = AP( II )
+            AP( II ) = ONE
+            IF( LEFT ) THEN
+*
+*              H(i) is applied to C(i+1:m,1:n)
+*
+               MI = M - I
+               IC = I + 1
+            ELSE
+*
+*              H(i) is applied to C(1:m,i+1:n)
+*
+               NI = N - I
+               JC = I + 1
+            END IF
+*
+*           Apply H(i)
+*
+            CALL DLARF( SIDE, MI, NI, AP( II ), 1, TAU( I ),
+     $                  C( IC, JC ), LDC, WORK )
+            AP( II ) = AII
+*
+            IF( FORWRD ) THEN
+               II = II + NQ - I + 1
+            ELSE
+               II = II - NQ + I - 2
+            END IF
+   20    CONTINUE
+      END IF
+      RETURN
+*
+*     End of DOPMTR
+*
+      END
diff --git a/costa/native/external/lapack/dorg2l.f b/costa/native/external/lapack/dorg2l.f
new file mode 100644
index 000000000..5b86f6710
--- /dev/null
+++ b/costa/native/external/lapack/dorg2l.f
@@ -0,0 +1,128 @@
+      SUBROUTINE DORG2L( M, N, K, A, LDA, TAU, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, K, LDA, M, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DORG2L generates an m by n real matrix Q with orthonormal columns,
+*  which is defined as the last n columns of a product of k elementary
+*  reflectors of order m
+*
+*        Q  =  H(k) . . . H(2) H(1)
+*
+*  as returned by DGEQLF.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix Q. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix Q. M >= N >= 0.
+*
+*  K       (input) INTEGER
+*          The number of elementary reflectors whose product defines the
+*          matrix Q. N >= K >= 0.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+*          On entry, the (n-k+i)-th column must contain the vector which
+*          defines the elementary reflector H(i), for i = 1,2,...,k, as
+*          returned by DGEQLF in the last k columns of its array
+*          argument A.
+*          On exit, the m by n matrix Q.
+*
+*  LDA     (input) INTEGER
+*          The first dimension of the array A. LDA >= max(1,M).
+*
+*  TAU     (input) DOUBLE PRECISION array, dimension (K)
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by DGEQLF.
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument has an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, II, J, L
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLARF, DSCAL, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 .OR. N.GT.M ) THEN
+         INFO = -2
+      ELSE IF( K.LT.0 .OR. K.GT.N ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -5
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DORG2L', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.LE.0 )
+     $   RETURN
+*
+*     Initialise columns 1:n-k to columns of the unit matrix
+*
+      DO 20 J = 1, N - K
+         DO 10 L = 1, M
+            A( L, J ) = ZERO
+   10    CONTINUE
+         A( M-N+J, J ) = ONE
+   20 CONTINUE
+*
+      DO 40 I = 1, K
+         II = N - K + I
+*
+*        Apply H(i) to A(1:m-k+i,1:n-k+i) from the left
+*
+         A( M-N+II, II ) = ONE
+         CALL DLARF( 'Left', M-N+II, II-1, A( 1, II ), 1, TAU( I ), A,
+     $               LDA, WORK )
+         CALL DSCAL( M-N+II-1, -TAU( I ), A( 1, II ), 1 )
+         A( M-N+II, II ) = ONE - TAU( I )
+*
+*        Set A(m-k+i+1:m,n-k+i) to zero
+*
+         DO 30 L = M - N + II + 1, M
+            A( L, II ) = ZERO
+   30    CONTINUE
+   40 CONTINUE
+      RETURN
+*
+*     End of DORG2L
+*
+      END
diff --git a/costa/native/external/lapack/dorg2r.f b/costa/native/external/lapack/dorg2r.f
new file mode 100644
index 000000000..c15982e7a
--- /dev/null
+++ b/costa/native/external/lapack/dorg2r.f
@@ -0,0 +1,130 @@
+      SUBROUTINE DORG2R( M, N, K, A, LDA, TAU, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, K, LDA, M, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DORG2R generates an m by n real matrix Q with orthonormal columns,
+*  which is defined as the first n columns of a product of k elementary
+*  reflectors of order m
+*
+*        Q  =  H(1) H(2) . . . H(k)
+*
+*  as returned by DGEQRF.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix Q. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix Q. M >= N >= 0.
+*
+*  K       (input) INTEGER
+*          The number of elementary reflectors whose product defines the
+*          matrix Q. N >= K >= 0.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+*          On entry, the i-th column must contain the vector which
+*          defines the elementary reflector H(i), for i = 1,2,...,k, as
+*          returned by DGEQRF in the first k columns of its array
+*          argument A.
+*          On exit, the m-by-n matrix Q.
+*
+*  LDA     (input) INTEGER
+*          The first dimension of the array A. LDA >= max(1,M).
+*
+*  TAU     (input) DOUBLE PRECISION array, dimension (K)
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by DGEQRF.
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument has an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J, L
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLARF, DSCAL, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 .OR. N.GT.M ) THEN
+         INFO = -2
+      ELSE IF( K.LT.0 .OR. K.GT.N ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -5
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DORG2R', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.LE.0 )
+     $   RETURN
+*
+*     Initialise columns k+1:n to columns of the unit matrix
+*
+      DO 20 J = K + 1, N
+         DO 10 L = 1, M
+            A( L, J ) = ZERO
+   10    CONTINUE
+         A( J, J ) = ONE
+   20 CONTINUE
+*
+      DO 40 I = K, 1, -1
+*
+*        Apply H(i) to A(i:m,i:n) from the left
+*
+         IF( I.LT.N ) THEN
+            A( I, I ) = ONE
+            CALL DLARF( 'Left', M-I+1, N-I, A( I, I ), 1, TAU( I ),
+     $                  A( I, I+1 ), LDA, WORK )
+         END IF
+         IF( I.LT.M )
+     $      CALL DSCAL( M-I, -TAU( I ), A( I+1, I ), 1 )
+         A( I, I ) = ONE - TAU( I )
+*
+*        Set A(1:i-1,i) to zero
+*
+         DO 30 L = 1, I - 1
+            A( L, I ) = ZERO
+   30    CONTINUE
+   40 CONTINUE
+      RETURN
+*
+*     End of DORG2R
+*
+      END
diff --git a/costa/native/external/lapack/dorgbr.f b/costa/native/external/lapack/dorgbr.f
new file mode 100644
index 000000000..84dbe1757
--- /dev/null
+++ b/costa/native/external/lapack/dorgbr.f
@@ -0,0 +1,245 @@
+      SUBROUTINE DORGBR( VECT, M, N, K, A, LDA, TAU, WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          VECT
+      INTEGER            INFO, K, LDA, LWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DORGBR generates one of the real orthogonal matrices Q or P**T
+*  determined by DGEBRD when reducing a real matrix A to bidiagonal
+*  form: A = Q * B * P**T.  Q and P**T are defined as products of
+*  elementary reflectors H(i) or G(i) respectively.
+*
+*  If VECT = 'Q', A is assumed to have been an M-by-K matrix, and Q
+*  is of order M:
+*  if m >= k, Q = H(1) H(2) . . . H(k) and DORGBR returns the first n
+*  columns of Q, where m >= n >= k;
+*  if m < k, Q = H(1) H(2) . . . H(m-1) and DORGBR returns Q as an
+*  M-by-M matrix.
+*
+*  If VECT = 'P', A is assumed to have been a K-by-N matrix, and P**T
+*  is of order N:
+*  if k < n, P**T = G(k) . . . G(2) G(1) and DORGBR returns the first m
+*  rows of P**T, where n >= m >= k;
+*  if k >= n, P**T = G(n-1) . . . G(2) G(1) and DORGBR returns P**T as
+*  an N-by-N matrix.
+*
+*  Arguments
+*  =========
+*
+*  VECT    (input) CHARACTER*1
+*          Specifies whether the matrix Q or the matrix P**T is
+*          required, as defined in the transformation applied by DGEBRD:
+*          = 'Q':  generate Q;
+*          = 'P':  generate P**T.
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix Q or P**T to be returned.
+*          M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix Q or P**T to be returned.
+*          N >= 0.
+*          If VECT = 'Q', M >= N >= min(M,K);
+*          if VECT = 'P', N >= M >= min(N,K).
+*
+*  K       (input) INTEGER
+*          If VECT = 'Q', the number of columns in the original M-by-K
+*          matrix reduced by DGEBRD.
+*          If VECT = 'P', the number of rows in the original K-by-N
+*          matrix reduced by DGEBRD.
+*          K >= 0.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+*          On entry, the vectors which define the elementary reflectors,
+*          as returned by DGEBRD.
+*          On exit, the M-by-N matrix Q or P**T.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,M).
+*
+*  TAU     (input) DOUBLE PRECISION array, dimension
+*                                (min(M,K)) if VECT = 'Q'
+*                                (min(N,K)) if VECT = 'P'
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i) or G(i), which determines Q or P**T, as
+*          returned by DGEBRD in its array argument TAUQ or TAUP.
+*
+*  WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK. LWORK >= max(1,min(M,N)).
+*          For optimum performance LWORK >= min(M,N)*NB, where NB
+*          is the optimal blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY, WANTQ
+      INTEGER            I, IINFO, J, LWKOPT, MN, NB
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           LSAME, ILAENV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DORGLQ, DORGQR, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      WANTQ = LSAME( VECT, 'Q' )
+      MN = MIN( M, N )
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( .NOT.WANTQ .AND. .NOT.LSAME( VECT, 'P' ) ) THEN
+         INFO = -1
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 .OR. ( WANTQ .AND. ( N.GT.M .OR. N.LT.MIN( M,
+     $         K ) ) ) .OR. ( .NOT.WANTQ .AND. ( M.GT.N .OR. M.LT.
+     $         MIN( N, K ) ) ) ) THEN
+         INFO = -3
+      ELSE IF( K.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -6
+      ELSE IF( LWORK.LT.MAX( 1, MN ) .AND. .NOT.LQUERY ) THEN
+         INFO = -9
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         IF( WANTQ ) THEN
+            NB = ILAENV( 1, 'DORGQR', ' ', M, N, K, -1 )
+         ELSE
+            NB = ILAENV( 1, 'DORGLQ', ' ', M, N, K, -1 )
+         END IF
+         LWKOPT = MAX( 1, MN )*NB
+         WORK( 1 ) = LWKOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DORGBR', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      IF( WANTQ ) THEN
+*
+*        Form Q, determined by a call to DGEBRD to reduce an m-by-k
+*        matrix
+*
+         IF( M.GE.K ) THEN
+*
+*           If m >= k, assume m >= n >= k
+*
+            CALL DORGQR( M, N, K, A, LDA, TAU, WORK, LWORK, IINFO )
+*
+         ELSE
+*
+*           If m < k, assume m = n
+*
+*           Shift the vectors which define the elementary reflectors one
+*           column to the right, and set the first row and column of Q
+*           to those of the unit matrix
+*
+            DO 20 J = M, 2, -1
+               A( 1, J ) = ZERO
+               DO 10 I = J + 1, M
+                  A( I, J ) = A( I, J-1 )
+   10          CONTINUE
+   20       CONTINUE
+            A( 1, 1 ) = ONE
+            DO 30 I = 2, M
+               A( I, 1 ) = ZERO
+   30       CONTINUE
+            IF( M.GT.1 ) THEN
+*
+*              Form Q(2:m,2:m)
+*
+               CALL DORGQR( M-1, M-1, M-1, A( 2, 2 ), LDA, TAU, WORK,
+     $                      LWORK, IINFO )
+            END IF
+         END IF
+      ELSE
+*
+*        Form P', determined by a call to DGEBRD to reduce a k-by-n
+*        matrix
+*
+         IF( K.LT.N ) THEN
+*
+*           If k < n, assume k <= m <= n
+*
+            CALL DORGLQ( M, N, K, A, LDA, TAU, WORK, LWORK, IINFO )
+*
+         ELSE
+*
+*           If k >= n, assume m = n
+*
+*           Shift the vectors which define the elementary reflectors one
+*           row downward, and set the first row and column of P' to
+*           those of the unit matrix
+*
+            A( 1, 1 ) = ONE
+            DO 40 I = 2, N
+               A( I, 1 ) = ZERO
+   40       CONTINUE
+            DO 60 J = 2, N
+               DO 50 I = J - 1, 2, -1
+                  A( I, J ) = A( I-1, J )
+   50          CONTINUE
+               A( 1, J ) = ZERO
+   60       CONTINUE
+            IF( N.GT.1 ) THEN
+*
+*              Form P'(2:n,2:n)
+*
+               CALL DORGLQ( N-1, N-1, N-1, A( 2, 2 ), LDA, TAU, WORK,
+     $                      LWORK, IINFO )
+            END IF
+         END IF
+      END IF
+      WORK( 1 ) = LWKOPT
+      RETURN
+*
+*     End of DORGBR
+*
+      END
diff --git a/costa/native/external/lapack/dorghr.f b/costa/native/external/lapack/dorghr.f
new file mode 100644
index 000000000..5d6881856
--- /dev/null
+++ b/costa/native/external/lapack/dorghr.f
@@ -0,0 +1,165 @@
+      SUBROUTINE DORGHR( N, ILO, IHI, A, LDA, TAU, WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            IHI, ILO, INFO, LDA, LWORK, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DORGHR generates a real orthogonal matrix Q which is defined as the
+*  product of IHI-ILO elementary reflectors of order N, as returned by
+*  DGEHRD:
+*
+*  Q = H(ilo) H(ilo+1) . . . H(ihi-1).
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The order of the matrix Q. N >= 0.
+*
+*  ILO     (input) INTEGER
+*  IHI     (input) INTEGER
+*          ILO and IHI must have the same values as in the previous call
+*          of DGEHRD. Q is equal to the unit matrix except in the
+*          submatrix Q(ilo+1:ihi,ilo+1:ihi).
+*          1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+*          On entry, the vectors which define the elementary reflectors,
+*          as returned by DGEHRD.
+*          On exit, the N-by-N orthogonal matrix Q.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,N).
+*
+*  TAU     (input) DOUBLE PRECISION array, dimension (N-1)
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by DGEHRD.
+*
+*  WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK. LWORK >= IHI-ILO.
+*          For optimum performance LWORK >= (IHI-ILO)*NB, where NB is
+*          the optimal blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            I, IINFO, J, LWKOPT, NB, NH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DORGQR, XERBLA
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      NH = IHI - ILO
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( N.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, N ) ) THEN
+         INFO = -2
+      ELSE IF( IHI.LT.MIN( ILO, N ) .OR. IHI.GT.N ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( LWORK.LT.MAX( 1, NH ) .AND. .NOT.LQUERY ) THEN
+         INFO = -8
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         NB = ILAENV( 1, 'DORGQR', ' ', NH, NH, NH, -1 )
+         LWKOPT = MAX( 1, NH )*NB
+         WORK( 1 ) = LWKOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DORGHR', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+*     Shift the vectors which define the elementary reflectors one
+*     column to the right, and set the first ilo and the last n-ihi
+*     rows and columns to those of the unit matrix
+*
+      DO 40 J = IHI, ILO + 1, -1
+         DO 10 I = 1, J - 1
+            A( I, J ) = ZERO
+   10    CONTINUE
+         DO 20 I = J + 1, IHI
+            A( I, J ) = A( I, J-1 )
+   20    CONTINUE
+         DO 30 I = IHI + 1, N
+            A( I, J ) = ZERO
+   30    CONTINUE
+   40 CONTINUE
+      DO 60 J = 1, ILO
+         DO 50 I = 1, N
+            A( I, J ) = ZERO
+   50    CONTINUE
+         A( J, J ) = ONE
+   60 CONTINUE
+      DO 80 J = IHI + 1, N
+         DO 70 I = 1, N
+            A( I, J ) = ZERO
+   70    CONTINUE
+         A( J, J ) = ONE
+   80 CONTINUE
+*
+      IF( NH.GT.0 ) THEN
+*
+*        Generate Q(ilo+1:ihi,ilo+1:ihi)
+*
+         CALL DORGQR( NH, NH, NH, A( ILO+1, ILO+1 ), LDA, TAU( ILO ),
+     $                WORK, LWORK, IINFO )
+      END IF
+      WORK( 1 ) = LWKOPT
+      RETURN
+*
+*     End of DORGHR
+*
+      END
diff --git a/costa/native/external/lapack/dorgl2.f b/costa/native/external/lapack/dorgl2.f
new file mode 100644
index 000000000..3dab19cd0
--- /dev/null
+++ b/costa/native/external/lapack/dorgl2.f
@@ -0,0 +1,134 @@
+      SUBROUTINE DORGL2( M, N, K, A, LDA, TAU, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, K, LDA, M, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DORGL2 generates an m by n real matrix Q with orthonormal rows,
+*  which is defined as the first m rows of a product of k elementary
+*  reflectors of order n
+*
+*        Q  =  H(k) . . . H(2) H(1)
+*
+*  as returned by DGELQF.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix Q. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix Q. N >= M.
+*
+*  K       (input) INTEGER
+*          The number of elementary reflectors whose product defines the
+*          matrix Q. M >= K >= 0.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+*          On entry, the i-th row must contain the vector which defines
+*          the elementary reflector H(i), for i = 1,2,...,k, as returned
+*          by DGELQF in the first k rows of its array argument A.
+*          On exit, the m-by-n matrix Q.
+*
+*  LDA     (input) INTEGER
+*          The first dimension of the array A. LDA >= max(1,M).
+*
+*  TAU     (input) DOUBLE PRECISION array, dimension (K)
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by DGELQF.
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (M)
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument has an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J, L
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLARF, DSCAL, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.M ) THEN
+         INFO = -2
+      ELSE IF( K.LT.0 .OR. K.GT.M ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -5
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DORGL2', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.LE.0 )
+     $   RETURN
+*
+      IF( K.LT.M ) THEN
+*
+*        Initialise rows k+1:m to rows of the unit matrix
+*
+         DO 20 J = 1, N
+            DO 10 L = K + 1, M
+               A( L, J ) = ZERO
+   10       CONTINUE
+            IF( J.GT.K .AND. J.LE.M )
+     $         A( J, J ) = ONE
+   20    CONTINUE
+      END IF
+*
+      DO 40 I = K, 1, -1
+*
+*        Apply H(i) to A(i:m,i:n) from the right
+*
+         IF( I.LT.N ) THEN
+            IF( I.LT.M ) THEN
+               A( I, I ) = ONE
+               CALL DLARF( 'Right', M-I, N-I+1, A( I, I ), LDA,
+     $                     TAU( I ), A( I+1, I ), LDA, WORK )
+            END IF
+            CALL DSCAL( N-I, -TAU( I ), A( I, I+1 ), LDA )
+         END IF
+         A( I, I ) = ONE - TAU( I )
+*
+*        Set A(i,1:i-1) to zero
+*
+         DO 30 L = 1, I - 1
+            A( I, L ) = ZERO
+   30    CONTINUE
+   40 CONTINUE
+      RETURN
+*
+*     End of DORGL2
+*
+      END
diff --git a/costa/native/external/lapack/dorglq.f b/costa/native/external/lapack/dorglq.f
new file mode 100644
index 000000000..b40018b6a
--- /dev/null
+++ b/costa/native/external/lapack/dorglq.f
@@ -0,0 +1,216 @@
+      SUBROUTINE DORGLQ( M, N, K, A, LDA, TAU, WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, K, LDA, LWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DORGLQ generates an M-by-N real matrix Q with orthonormal rows,
+*  which is defined as the first M rows of a product of K elementary
+*  reflectors of order N
+*
+*        Q  =  H(k) . . . H(2) H(1)
+*
+*  as returned by DGELQF.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix Q. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix Q. N >= M.
+*
+*  K       (input) INTEGER
+*          The number of elementary reflectors whose product defines the
+*          matrix Q. M >= K >= 0.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+*          On entry, the i-th row must contain the vector which defines
+*          the elementary reflector H(i), for i = 1,2,...,k, as returned
+*          by DGELQF in the first k rows of its array argument A.
+*          On exit, the M-by-N matrix Q.
+*
+*  LDA     (input) INTEGER
+*          The first dimension of the array A. LDA >= max(1,M).
+*
+*  TAU     (input) DOUBLE PRECISION array, dimension (K)
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by DGELQF.
+*
+*  WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK. LWORK >= max(1,M).
+*          For optimum performance LWORK >= M*NB, where NB is
+*          the optimal blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument has an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO
+      PARAMETER          ( ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            I, IB, IINFO, IWS, J, KI, KK, L, LDWORK,
+     $                   LWKOPT, NB, NBMIN, NX
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLARFB, DLARFT, DORGL2, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      NB = ILAENV( 1, 'DORGLQ', ' ', M, N, K, -1 )
+      LWKOPT = MAX( 1, M )*NB
+      WORK( 1 ) = LWKOPT
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.M ) THEN
+         INFO = -2
+      ELSE IF( K.LT.0 .OR. K.GT.M ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -5
+      ELSE IF( LWORK.LT.MAX( 1, M ) .AND. .NOT.LQUERY ) THEN
+         INFO = -8
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DORGLQ', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.LE.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      NBMIN = 2
+      NX = 0
+      IWS = M
+      IF( NB.GT.1 .AND. NB.LT.K ) THEN
+*
+*        Determine when to cross over from blocked to unblocked code.
+*
+         NX = MAX( 0, ILAENV( 3, 'DORGLQ', ' ', M, N, K, -1 ) )
+         IF( NX.LT.K ) THEN
+*
+*           Determine if workspace is large enough for blocked code.
+*
+            LDWORK = M
+            IWS = LDWORK*NB
+            IF( LWORK.LT.IWS ) THEN
+*
+*              Not enough workspace to use optimal NB:  reduce NB and
+*              determine the minimum value of NB.
+*
+               NB = LWORK / LDWORK
+               NBMIN = MAX( 2, ILAENV( 2, 'DORGLQ', ' ', M, N, K, -1 ) )
+            END IF
+         END IF
+      END IF
+*
+      IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN
+*
+*        Use blocked code after the last block.
+*        The first kk rows are handled by the block method.
+*
+         KI = ( ( K-NX-1 ) / NB )*NB
+         KK = MIN( K, KI+NB )
+*
+*        Set A(kk+1:m,1:kk) to zero.
+*
+         DO 20 J = 1, KK
+            DO 10 I = KK + 1, M
+               A( I, J ) = ZERO
+   10       CONTINUE
+   20    CONTINUE
+      ELSE
+         KK = 0
+      END IF
+*
+*     Use unblocked code for the last or only block.
+*
+      IF( KK.LT.M )
+     $   CALL DORGL2( M-KK, N-KK, K-KK, A( KK+1, KK+1 ), LDA,
+     $                TAU( KK+1 ), WORK, IINFO )
+*
+      IF( KK.GT.0 ) THEN
+*
+*        Use blocked code
+*
+         DO 50 I = KI + 1, 1, -NB
+            IB = MIN( NB, K-I+1 )
+            IF( I+IB.LE.M ) THEN
+*
+*              Form the triangular factor of the block reflector
+*              H = H(i) H(i+1) . . . H(i+ib-1)
+*
+               CALL DLARFT( 'Forward', 'Rowwise', N-I+1, IB, A( I, I ),
+     $                      LDA, TAU( I ), WORK, LDWORK )
+*
+*              Apply H' to A(i+ib:m,i:n) from the right
+*
+               CALL DLARFB( 'Right', 'Transpose', 'Forward', 'Rowwise',
+     $                      M-I-IB+1, N-I+1, IB, A( I, I ), LDA, WORK,
+     $                      LDWORK, A( I+IB, I ), LDA, WORK( IB+1 ),
+     $                      LDWORK )
+            END IF
+*
+*           Apply H' to columns i:n of current block
+*
+            CALL DORGL2( IB, N-I+1, IB, A( I, I ), LDA, TAU( I ), WORK,
+     $                   IINFO )
+*
+*           Set columns 1:i-1 of current block to zero
+*
+            DO 40 J = 1, I - 1
+               DO 30 L = I, I + IB - 1
+                  A( L, J ) = ZERO
+   30          CONTINUE
+   40       CONTINUE
+   50    CONTINUE
+      END IF
+*
+      WORK( 1 ) = IWS
+      RETURN
+*
+*     End of DORGLQ
+*
+      END
diff --git a/costa/native/external/lapack/dorgql.f b/costa/native/external/lapack/dorgql.f
new file mode 100644
index 000000000..6a997c902
--- /dev/null
+++ b/costa/native/external/lapack/dorgql.f
@@ -0,0 +1,214 @@
+      SUBROUTINE DORGQL( M, N, K, A, LDA, TAU, WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, K, LDA, LWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DORGQL generates an M-by-N real matrix Q with orthonormal columns,
+*  which is defined as the last N columns of a product of K elementary
+*  reflectors of order M
+*
+*        Q  =  H(k) . . . H(2) H(1)
+*
+*  as returned by DGEQLF.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix Q. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix Q. M >= N >= 0.
+*
+*  K       (input) INTEGER
+*          The number of elementary reflectors whose product defines the
+*          matrix Q. N >= K >= 0.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+*          On entry, the (n-k+i)-th column must contain the vector which
+*          defines the elementary reflector H(i), for i = 1,2,...,k, as
+*          returned by DGEQLF in the last k columns of its array
+*          argument A.
+*          On exit, the M-by-N matrix Q.
+*
+*  LDA     (input) INTEGER
+*          The first dimension of the array A. LDA >= max(1,M).
+*
+*  TAU     (input) DOUBLE PRECISION array, dimension (K)
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by DGEQLF.
+*
+*  WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK. LWORK >= max(1,N).
+*          For optimum performance LWORK >= N*NB, where NB is the
+*          optimal blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument has an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO
+      PARAMETER          ( ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            I, IB, IINFO, IWS, J, KK, L, LDWORK, LWKOPT,
+     $                   NB, NBMIN, NX
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLARFB, DLARFT, DORG2L, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      NB = ILAENV( 1, 'DORGQL', ' ', M, N, K, -1 )
+      LWKOPT = MAX( 1, N )*NB
+      WORK( 1 ) = LWKOPT
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 .OR. N.GT.M ) THEN
+         INFO = -2
+      ELSE IF( K.LT.0 .OR. K.GT.N ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -5
+      ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN
+         INFO = -8
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DORGQL', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.LE.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      NBMIN = 2
+      NX = 0
+      IWS = N
+      IF( NB.GT.1 .AND. NB.LT.K ) THEN
+*
+*        Determine when to cross over from blocked to unblocked code.
+*
+         NX = MAX( 0, ILAENV( 3, 'DORGQL', ' ', M, N, K, -1 ) )
+         IF( NX.LT.K ) THEN
+*
+*           Determine if workspace is large enough for blocked code.
+*
+            LDWORK = N
+            IWS = LDWORK*NB
+            IF( LWORK.LT.IWS ) THEN
+*
+*              Not enough workspace to use optimal NB:  reduce NB and
+*              determine the minimum value of NB.
+*
+               NB = LWORK / LDWORK
+               NBMIN = MAX( 2, ILAENV( 2, 'DORGQL', ' ', M, N, K, -1 ) )
+            END IF
+         END IF
+      END IF
+*
+      IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN
+*
+*        Use blocked code after the first block.
+*        The last kk columns are handled by the block method.
+*
+         KK = MIN( K, ( ( K-NX+NB-1 ) / NB )*NB )
+*
+*        Set A(m-kk+1:m,1:n-kk) to zero.
+*
+         DO 20 J = 1, N - KK
+            DO 10 I = M - KK + 1, M
+               A( I, J ) = ZERO
+   10       CONTINUE
+   20    CONTINUE
+      ELSE
+         KK = 0
+      END IF
+*
+*     Use unblocked code for the first or only block.
+*
+      CALL DORG2L( M-KK, N-KK, K-KK, A, LDA, TAU, WORK, IINFO )
+*
+      IF( KK.GT.0 ) THEN
+*
+*        Use blocked code
+*
+         DO 50 I = K - KK + 1, K, NB
+            IB = MIN( NB, K-I+1 )
+            IF( N-K+I.GT.1 ) THEN
+*
+*              Form the triangular factor of the block reflector
+*              H = H(i+ib-1) . . . H(i+1) H(i)
+*
+               CALL DLARFT( 'Backward', 'Columnwise', M-K+I+IB-1, IB,
+     $                      A( 1, N-K+I ), LDA, TAU( I ), WORK, LDWORK )
+*
+*              Apply H to A(1:m-k+i+ib-1,1:n-k+i-1) from the left
+*
+               CALL DLARFB( 'Left', 'No transpose', 'Backward',
+     $                      'Columnwise', M-K+I+IB-1, N-K+I-1, IB,
+     $                      A( 1, N-K+I ), LDA, WORK, LDWORK, A, LDA,
+     $                      WORK( IB+1 ), LDWORK )
+            END IF
+*
+*           Apply H to rows 1:m-k+i+ib-1 of current block
+*
+            CALL DORG2L( M-K+I+IB-1, IB, IB, A( 1, N-K+I ), LDA,
+     $                   TAU( I ), WORK, IINFO )
+*
+*           Set rows m-k+i+ib:m of current block to zero
+*
+            DO 40 J = N - K + I, N - K + I + IB - 1
+               DO 30 L = M - K + I + IB, M
+                  A( L, J ) = ZERO
+   30          CONTINUE
+   40       CONTINUE
+   50    CONTINUE
+      END IF
+*
+      WORK( 1 ) = IWS
+      RETURN
+*
+*     End of DORGQL
+*
+      END
diff --git a/costa/native/external/lapack/dorgqr.f b/costa/native/external/lapack/dorgqr.f
new file mode 100644
index 000000000..171dfb527
--- /dev/null
+++ b/costa/native/external/lapack/dorgqr.f
@@ -0,0 +1,217 @@
+      SUBROUTINE DORGQR( M, N, K, A, LDA, TAU, WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, K, LDA, LWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DORGQR generates an M-by-N real matrix Q with orthonormal columns,
+*  which is defined as the first N columns of a product of K elementary
+*  reflectors of order M
+*
+*        Q  =  H(1) H(2) . . . H(k)
+*
+*  as returned by DGEQRF.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix Q. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix Q. M >= N >= 0.
+*
+*  K       (input) INTEGER
+*          The number of elementary reflectors whose product defines the
+*          matrix Q. N >= K >= 0.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+*          On entry, the i-th column must contain the vector which
+*          defines the elementary reflector H(i), for i = 1,2,...,k, as
+*          returned by DGEQRF in the first k columns of its array
+*          argument A.
+*          On exit, the M-by-N matrix Q.
+*
+*  LDA     (input) INTEGER
+*          The first dimension of the array A. LDA >= max(1,M).
+*
+*  TAU     (input) DOUBLE PRECISION array, dimension (K)
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by DGEQRF.
+*
+*  WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK. LWORK >= max(1,N).
+*          For optimum performance LWORK >= N*NB, where NB is the
+*          optimal blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument has an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO
+      PARAMETER          ( ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            I, IB, IINFO, IWS, J, KI, KK, L, LDWORK,
+     $                   LWKOPT, NB, NBMIN, NX
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLARFB, DLARFT, DORG2R, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      NB = ILAENV( 1, 'DORGQR', ' ', M, N, K, -1 )
+      LWKOPT = MAX( 1, N )*NB
+      WORK( 1 ) = LWKOPT
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 .OR. N.GT.M ) THEN
+         INFO = -2
+      ELSE IF( K.LT.0 .OR. K.GT.N ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -5
+      ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN
+         INFO = -8
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DORGQR', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.LE.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      NBMIN = 2
+      NX = 0
+      IWS = N
+      IF( NB.GT.1 .AND. NB.LT.K ) THEN
+*
+*        Determine when to cross over from blocked to unblocked code.
+*
+         NX = MAX( 0, ILAENV( 3, 'DORGQR', ' ', M, N, K, -1 ) )
+         IF( NX.LT.K ) THEN
+*
+*           Determine if workspace is large enough for blocked code.
+*
+            LDWORK = N
+            IWS = LDWORK*NB
+            IF( LWORK.LT.IWS ) THEN
+*
+*              Not enough workspace to use optimal NB:  reduce NB and
+*              determine the minimum value of NB.
+*
+               NB = LWORK / LDWORK
+               NBMIN = MAX( 2, ILAENV( 2, 'DORGQR', ' ', M, N, K, -1 ) )
+            END IF
+         END IF
+      END IF
+*
+      IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN
+*
+*        Use blocked code after the last block.
+*        The first kk columns are handled by the block method.
+*
+         KI = ( ( K-NX-1 ) / NB )*NB
+         KK = MIN( K, KI+NB )
+*
+*        Set A(1:kk,kk+1:n) to zero.
+*
+         DO 20 J = KK + 1, N
+            DO 10 I = 1, KK
+               A( I, J ) = ZERO
+   10       CONTINUE
+   20    CONTINUE
+      ELSE
+         KK = 0
+      END IF
+*
+*     Use unblocked code for the last or only block.
+*
+      IF( KK.LT.N )
+     $   CALL DORG2R( M-KK, N-KK, K-KK, A( KK+1, KK+1 ), LDA,
+     $                TAU( KK+1 ), WORK, IINFO )
+*
+      IF( KK.GT.0 ) THEN
+*
+*        Use blocked code
+*
+         DO 50 I = KI + 1, 1, -NB
+            IB = MIN( NB, K-I+1 )
+            IF( I+IB.LE.N ) THEN
+*
+*              Form the triangular factor of the block reflector
+*              H = H(i) H(i+1) . . . H(i+ib-1)
+*
+               CALL DLARFT( 'Forward', 'Columnwise', M-I+1, IB,
+     $                      A( I, I ), LDA, TAU( I ), WORK, LDWORK )
+*
+*              Apply H to A(i:m,i+ib:n) from the left
+*
+               CALL DLARFB( 'Left', 'No transpose', 'Forward',
+     $                      'Columnwise', M-I+1, N-I-IB+1, IB,
+     $                      A( I, I ), LDA, WORK, LDWORK, A( I, I+IB ),
+     $                      LDA, WORK( IB+1 ), LDWORK )
+            END IF
+*
+*           Apply H to rows i:m of current block
+*
+            CALL DORG2R( M-I+1, IB, IB, A( I, I ), LDA, TAU( I ), WORK,
+     $                   IINFO )
+*
+*           Set rows 1:i-1 of current block to zero
+*
+            DO 40 J = I, I + IB - 1
+               DO 30 L = 1, I - 1
+                  A( L, J ) = ZERO
+   30          CONTINUE
+   40       CONTINUE
+   50    CONTINUE
+      END IF
+*
+      WORK( 1 ) = IWS
+      RETURN
+*
+*     End of DORGQR
+*
+      END
diff --git a/costa/native/external/lapack/dorgr2.f b/costa/native/external/lapack/dorgr2.f
new file mode 100644
index 000000000..ea88d346b
--- /dev/null
+++ b/costa/native/external/lapack/dorgr2.f
@@ -0,0 +1,132 @@
+      SUBROUTINE DORGR2( M, N, K, A, LDA, TAU, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, K, LDA, M, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DORGR2 generates an m by n real matrix Q with orthonormal rows,
+*  which is defined as the last m rows of a product of k elementary
+*  reflectors of order n
+*
+*        Q  =  H(1) H(2) . . . H(k)
+*
+*  as returned by DGERQF.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix Q. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix Q. N >= M.
+*
+*  K       (input) INTEGER
+*          The number of elementary reflectors whose product defines the
+*          matrix Q. M >= K >= 0.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+*          On entry, the (m-k+i)-th row must contain the vector which
+*          defines the elementary reflector H(i), for i = 1,2,...,k, as
+*          returned by DGERQF in the last k rows of its array argument
+*          A.
+*          On exit, the m by n matrix Q.
+*
+*  LDA     (input) INTEGER
+*          The first dimension of the array A. LDA >= max(1,M).
+*
+*  TAU     (input) DOUBLE PRECISION array, dimension (K)
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by DGERQF.
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (M)
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument has an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, II, J, L
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLARF, DSCAL, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.M ) THEN
+         INFO = -2
+      ELSE IF( K.LT.0 .OR. K.GT.M ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -5
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DORGR2', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.LE.0 )
+     $   RETURN
+*
+      IF( K.LT.M ) THEN
+*
+*        Initialise rows 1:m-k to rows of the unit matrix
+*
+         DO 20 J = 1, N
+            DO 10 L = 1, M - K
+               A( L, J ) = ZERO
+   10       CONTINUE
+            IF( J.GT.N-M .AND. J.LE.N-K )
+     $         A( M-N+J, J ) = ONE
+   20    CONTINUE
+      END IF
+*
+      DO 40 I = 1, K
+         II = M - K + I
+*
+*        Apply H(i) to A(1:m-k+i,1:n-k+i) from the right
+*
+         A( II, N-M+II ) = ONE
+         CALL DLARF( 'Right', II-1, N-M+II, A( II, 1 ), LDA, TAU( I ),
+     $               A, LDA, WORK )
+         CALL DSCAL( N-M+II-1, -TAU( I ), A( II, 1 ), LDA )
+         A( II, N-M+II ) = ONE - TAU( I )
+*
+*        Set A(m-k+i,n-k+i+1:n) to zero
+*
+         DO 30 L = N - M + II + 1, N
+            A( II, L ) = ZERO
+   30    CONTINUE
+   40 CONTINUE
+      RETURN
+*
+*     End of DORGR2
+*
+      END
diff --git a/costa/native/external/lapack/dorgrq.f b/costa/native/external/lapack/dorgrq.f
new file mode 100644
index 000000000..ba0a60052
--- /dev/null
+++ b/costa/native/external/lapack/dorgrq.f
@@ -0,0 +1,214 @@
+      SUBROUTINE DORGRQ( M, N, K, A, LDA, TAU, WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, K, LDA, LWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DORGRQ generates an M-by-N real matrix Q with orthonormal rows,
+*  which is defined as the last M rows of a product of K elementary
+*  reflectors of order N
+*
+*        Q  =  H(1) H(2) . . . H(k)
+*
+*  as returned by DGERQF.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix Q. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix Q. N >= M.
+*
+*  K       (input) INTEGER
+*          The number of elementary reflectors whose product defines the
+*          matrix Q. M >= K >= 0.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+*          On entry, the (m-k+i)-th row must contain the vector which
+*          defines the elementary reflector H(i), for i = 1,2,...,k, as
+*          returned by DGERQF in the last k rows of its array argument
+*          A.
+*          On exit, the M-by-N matrix Q.
+*
+*  LDA     (input) INTEGER
+*          The first dimension of the array A. LDA >= max(1,M).
+*
+*  TAU     (input) DOUBLE PRECISION array, dimension (K)
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by DGERQF.
+*
+*  WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK. LWORK >= max(1,M).
+*          For optimum performance LWORK >= M*NB, where NB is the
+*          optimal blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument has an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO
+      PARAMETER          ( ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            I, IB, II, IINFO, IWS, J, KK, L, LDWORK,
+     $                   LWKOPT, NB, NBMIN, NX
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLARFB, DLARFT, DORGR2, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      NB = ILAENV( 1, 'DORGRQ', ' ', M, N, K, -1 )
+      LWKOPT = MAX( 1, M )*NB
+      WORK( 1 ) = LWKOPT
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.M ) THEN
+         INFO = -2
+      ELSE IF( K.LT.0 .OR. K.GT.M ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -5
+      ELSE IF( LWORK.LT.MAX( 1, M ) .AND. .NOT.LQUERY ) THEN
+         INFO = -8
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DORGRQ', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.LE.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      NBMIN = 2
+      NX = 0
+      IWS = M
+      IF( NB.GT.1 .AND. NB.LT.K ) THEN
+*
+*        Determine when to cross over from blocked to unblocked code.
+*
+         NX = MAX( 0, ILAENV( 3, 'DORGRQ', ' ', M, N, K, -1 ) )
+         IF( NX.LT.K ) THEN
+*
+*           Determine if workspace is large enough for blocked code.
+*
+            LDWORK = M
+            IWS = LDWORK*NB
+            IF( LWORK.LT.IWS ) THEN
+*
+*              Not enough workspace to use optimal NB:  reduce NB and
+*              determine the minimum value of NB.
+*
+               NB = LWORK / LDWORK
+               NBMIN = MAX( 2, ILAENV( 2, 'DORGRQ', ' ', M, N, K, -1 ) )
+            END IF
+         END IF
+      END IF
+*
+      IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN
+*
+*        Use blocked code after the first block.
+*        The last kk rows are handled by the block method.
+*
+         KK = MIN( K, ( ( K-NX+NB-1 ) / NB )*NB )
+*
+*        Set A(1:m-kk,n-kk+1:n) to zero.
+*
+         DO 20 J = N - KK + 1, N
+            DO 10 I = 1, M - KK
+               A( I, J ) = ZERO
+   10       CONTINUE
+   20    CONTINUE
+      ELSE
+         KK = 0
+      END IF
+*
+*     Use unblocked code for the first or only block.
+*
+      CALL DORGR2( M-KK, N-KK, K-KK, A, LDA, TAU, WORK, IINFO )
+*
+      IF( KK.GT.0 ) THEN
+*
+*        Use blocked code
+*
+         DO 50 I = K - KK + 1, K, NB
+            IB = MIN( NB, K-I+1 )
+            II = M - K + I
+            IF( II.GT.1 ) THEN
+*
+*              Form the triangular factor of the block reflector
+*              H = H(i+ib-1) . . . H(i+1) H(i)
+*
+               CALL DLARFT( 'Backward', 'Rowwise', N-K+I+IB-1, IB,
+     $                      A( II, 1 ), LDA, TAU( I ), WORK, LDWORK )
+*
+*              Apply H' to A(1:m-k+i-1,1:n-k+i+ib-1) from the right
+*
+               CALL DLARFB( 'Right', 'Transpose', 'Backward', 'Rowwise',
+     $                      II-1, N-K+I+IB-1, IB, A( II, 1 ), LDA, WORK,
+     $                      LDWORK, A, LDA, WORK( IB+1 ), LDWORK )
+            END IF
+*
+*           Apply H' to columns 1:n-k+i+ib-1 of current block
+*
+            CALL DORGR2( IB, N-K+I+IB-1, IB, A( II, 1 ), LDA, TAU( I ),
+     $                   WORK, IINFO )
+*
+*           Set columns n-k+i+ib:n of current block to zero
+*
+            DO 40 L = N - K + I + IB, N
+               DO 30 J = II, II + IB - 1
+                  A( J, L ) = ZERO
+   30          CONTINUE
+   40       CONTINUE
+   50    CONTINUE
+      END IF
+*
+      WORK( 1 ) = IWS
+      RETURN
+*
+*     End of DORGRQ
+*
+      END
diff --git a/costa/native/external/lapack/dorgtr.f b/costa/native/external/lapack/dorgtr.f
new file mode 100644
index 000000000..8723e01bf
--- /dev/null
+++ b/costa/native/external/lapack/dorgtr.f
@@ -0,0 +1,184 @@
+      SUBROUTINE DORGTR( UPLO, N, A, LDA, TAU, WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, LWORK, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DORGTR generates a real orthogonal matrix Q which is defined as the
+*  product of n-1 elementary reflectors of order N, as returned by
+*  DSYTRD:
+*
+*  if UPLO = 'U', Q = H(n-1) . . . H(2) H(1),
+*
+*  if UPLO = 'L', Q = H(1) H(2) . . . H(n-1).
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U': Upper triangle of A contains elementary reflectors
+*                 from DSYTRD;
+*          = 'L': Lower triangle of A contains elementary reflectors
+*                 from DSYTRD.
+*
+*  N       (input) INTEGER
+*          The order of the matrix Q. N >= 0.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+*          On entry, the vectors which define the elementary reflectors,
+*          as returned by DSYTRD.
+*          On exit, the N-by-N orthogonal matrix Q.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,N).
+*
+*  TAU     (input) DOUBLE PRECISION array, dimension (N-1)
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by DSYTRD.
+*
+*  WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK. LWORK >= max(1,N-1).
+*          For optimum performance LWORK >= (N-1)*NB, where NB is
+*          the optimal blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY, UPPER
+      INTEGER            I, IINFO, J, LWKOPT, NB
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           LSAME, ILAENV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DORGQL, DORGQR, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      LQUERY = ( LWORK.EQ.-1 )
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -4
+      ELSE IF( LWORK.LT.MAX( 1, N-1 ) .AND. .NOT.LQUERY ) THEN
+         INFO = -7
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         IF( UPPER ) THEN
+            NB = ILAENV( 1, 'DORGQL', ' ', N-1, N-1, N-1, -1 )
+         ELSE
+            NB = ILAENV( 1, 'DORGQR', ' ', N-1, N-1, N-1, -1 )
+         END IF
+         LWKOPT = MAX( 1, N-1 )*NB
+         WORK( 1 ) = LWKOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DORGTR', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      IF( UPPER ) THEN
+*
+*        Q was determined by a call to DSYTRD with UPLO = 'U'
+*
+*        Shift the vectors which define the elementary reflectors one
+*        column to the left, and set the last row and column of Q to
+*        those of the unit matrix
+*
+         DO 20 J = 1, N - 1
+            DO 10 I = 1, J - 1
+               A( I, J ) = A( I, J+1 )
+   10       CONTINUE
+            A( N, J ) = ZERO
+   20    CONTINUE
+         DO 30 I = 1, N - 1
+            A( I, N ) = ZERO
+   30    CONTINUE
+         A( N, N ) = ONE
+*
+*        Generate Q(1:n-1,1:n-1)
+*
+         CALL DORGQL( N-1, N-1, N-1, A, LDA, TAU, WORK, LWORK, IINFO )
+*
+      ELSE
+*
+*        Q was determined by a call to DSYTRD with UPLO = 'L'.
+*
+*        Shift the vectors which define the elementary reflectors one
+*        column to the right, and set the first row and column of Q to
+*        those of the unit matrix
+*
+         DO 50 J = N, 2, -1
+            A( 1, J ) = ZERO
+            DO 40 I = J + 1, N
+               A( I, J ) = A( I, J-1 )
+   40       CONTINUE
+   50    CONTINUE
+         A( 1, 1 ) = ONE
+         DO 60 I = 2, N
+            A( I, 1 ) = ZERO
+   60    CONTINUE
+         IF( N.GT.1 ) THEN
+*
+*           Generate Q(2:n,2:n)
+*
+            CALL DORGQR( N-1, N-1, N-1, A( 2, 2 ), LDA, TAU, WORK,
+     $                   LWORK, IINFO )
+         END IF
+      END IF
+      WORK( 1 ) = LWKOPT
+      RETURN
+*
+*     End of DORGTR
+*
+      END
diff --git a/costa/native/external/lapack/dorm2l.f b/costa/native/external/lapack/dorm2l.f
new file mode 100644
index 000000000..02b3538fd
--- /dev/null
+++ b/costa/native/external/lapack/dorm2l.f
@@ -0,0 +1,194 @@
+      SUBROUTINE DORM2L( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC,
+     $                   WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          SIDE, TRANS
+      INTEGER            INFO, K, LDA, LDC, M, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DORM2L overwrites the general real m by n matrix C with
+*
+*        Q * C  if SIDE = 'L' and TRANS = 'N', or
+*
+*        Q'* C  if SIDE = 'L' and TRANS = 'T', or
+*
+*        C * Q  if SIDE = 'R' and TRANS = 'N', or
+*
+*        C * Q' if SIDE = 'R' and TRANS = 'T',
+*
+*  where Q is a real orthogonal matrix defined as the product of k
+*  elementary reflectors
+*
+*        Q = H(k) . . . H(2) H(1)
+*
+*  as returned by DGEQLF. Q is of order m if SIDE = 'L' and of order n
+*  if SIDE = 'R'.
+*
+*  Arguments
+*  =========
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'L': apply Q or Q' from the Left
+*          = 'R': apply Q or Q' from the Right
+*
+*  TRANS   (input) CHARACTER*1
+*          = 'N': apply Q  (No transpose)
+*          = 'T': apply Q' (Transpose)
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix C. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix C. N >= 0.
+*
+*  K       (input) INTEGER
+*          The number of elementary reflectors whose product defines
+*          the matrix Q.
+*          If SIDE = 'L', M >= K >= 0;
+*          if SIDE = 'R', N >= K >= 0.
+*
+*  A       (input) DOUBLE PRECISION array, dimension (LDA,K)
+*          The i-th column must contain the vector which defines the
+*          elementary reflector H(i), for i = 1,2,...,k, as returned by
+*          DGEQLF in the last k columns of its array argument A.
+*          A is modified by the routine but restored on exit.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.
+*          If SIDE = 'L', LDA >= max(1,M);
+*          if SIDE = 'R', LDA >= max(1,N).
+*
+*  TAU     (input) DOUBLE PRECISION array, dimension (K)
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by DGEQLF.
+*
+*  C       (input/output) DOUBLE PRECISION array, dimension (LDC,N)
+*          On entry, the m by n matrix C.
+*          On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C. LDC >= max(1,M).
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension
+*                                   (N) if SIDE = 'L',
+*                                   (M) if SIDE = 'R'
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE
+      PARAMETER          ( ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LEFT, NOTRAN
+      INTEGER            I, I1, I2, I3, MI, NI, NQ
+      DOUBLE PRECISION   AII
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLARF, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      LEFT = LSAME( SIDE, 'L' )
+      NOTRAN = LSAME( TRANS, 'N' )
+*
+*     NQ is the order of Q
+*
+      IF( LEFT ) THEN
+         NQ = M
+      ELSE
+         NQ = N
+      END IF
+      IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN
+         INFO = -2
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN
+         INFO = -5
+      ELSE IF( LDA.LT.MAX( 1, NQ ) ) THEN
+         INFO = -7
+      ELSE IF( LDC.LT.MAX( 1, M ) ) THEN
+         INFO = -10
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DORM2L', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 )
+     $   RETURN
+*
+      IF( ( LEFT .AND. NOTRAN ) .OR. ( .NOT.LEFT .AND. .NOT.NOTRAN ) )
+     $     THEN
+         I1 = 1
+         I2 = K
+         I3 = 1
+      ELSE
+         I1 = K
+         I2 = 1
+         I3 = -1
+      END IF
+*
+      IF( LEFT ) THEN
+         NI = N
+      ELSE
+         MI = M
+      END IF
+*
+      DO 10 I = I1, I2, I3
+         IF( LEFT ) THEN
+*
+*           H(i) is applied to C(1:m-k+i,1:n)
+*
+            MI = M - K + I
+         ELSE
+*
+*           H(i) is applied to C(1:m,1:n-k+i)
+*
+            NI = N - K + I
+         END IF
+*
+*        Apply H(i)
+*
+         AII = A( NQ-K+I, I )
+         A( NQ-K+I, I ) = ONE
+         CALL DLARF( SIDE, MI, NI, A( 1, I ), 1, TAU( I ), C, LDC,
+     $               WORK )
+         A( NQ-K+I, I ) = AII
+   10 CONTINUE
+      RETURN
+*
+*     End of DORM2L
+*
+      END
diff --git a/costa/native/external/lapack/dorm2r.f b/costa/native/external/lapack/dorm2r.f
new file mode 100644
index 000000000..d5531bc37
--- /dev/null
+++ b/costa/native/external/lapack/dorm2r.f
@@ -0,0 +1,198 @@
+      SUBROUTINE DORM2R( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC,
+     $                   WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          SIDE, TRANS
+      INTEGER            INFO, K, LDA, LDC, M, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DORM2R overwrites the general real m by n matrix C with
+*
+*        Q * C  if SIDE = 'L' and TRANS = 'N', or
+*
+*        Q'* C  if SIDE = 'L' and TRANS = 'T', or
+*
+*        C * Q  if SIDE = 'R' and TRANS = 'N', or
+*
+*        C * Q' if SIDE = 'R' and TRANS = 'T',
+*
+*  where Q is a real orthogonal matrix defined as the product of k
+*  elementary reflectors
+*
+*        Q = H(1) H(2) . . . H(k)
+*
+*  as returned by DGEQRF. Q is of order m if SIDE = 'L' and of order n
+*  if SIDE = 'R'.
+*
+*  Arguments
+*  =========
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'L': apply Q or Q' from the Left
+*          = 'R': apply Q or Q' from the Right
+*
+*  TRANS   (input) CHARACTER*1
+*          = 'N': apply Q  (No transpose)
+*          = 'T': apply Q' (Transpose)
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix C. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix C. N >= 0.
+*
+*  K       (input) INTEGER
+*          The number of elementary reflectors whose product defines
+*          the matrix Q.
+*          If SIDE = 'L', M >= K >= 0;
+*          if SIDE = 'R', N >= K >= 0.
+*
+*  A       (input) DOUBLE PRECISION array, dimension (LDA,K)
+*          The i-th column must contain the vector which defines the
+*          elementary reflector H(i), for i = 1,2,...,k, as returned by
+*          DGEQRF in the first k columns of its array argument A.
+*          A is modified by the routine but restored on exit.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.
+*          If SIDE = 'L', LDA >= max(1,M);
+*          if SIDE = 'R', LDA >= max(1,N).
+*
+*  TAU     (input) DOUBLE PRECISION array, dimension (K)
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by DGEQRF.
+*
+*  C       (input/output) DOUBLE PRECISION array, dimension (LDC,N)
+*          On entry, the m by n matrix C.
+*          On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C. LDC >= max(1,M).
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension
+*                                   (N) if SIDE = 'L',
+*                                   (M) if SIDE = 'R'
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE
+      PARAMETER          ( ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LEFT, NOTRAN
+      INTEGER            I, I1, I2, I3, IC, JC, MI, NI, NQ
+      DOUBLE PRECISION   AII
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLARF, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      LEFT = LSAME( SIDE, 'L' )
+      NOTRAN = LSAME( TRANS, 'N' )
+*
+*     NQ is the order of Q
+*
+      IF( LEFT ) THEN
+         NQ = M
+      ELSE
+         NQ = N
+      END IF
+      IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN
+         INFO = -2
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN
+         INFO = -5
+      ELSE IF( LDA.LT.MAX( 1, NQ ) ) THEN
+         INFO = -7
+      ELSE IF( LDC.LT.MAX( 1, M ) ) THEN
+         INFO = -10
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DORM2R', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 )
+     $   RETURN
+*
+      IF( ( LEFT .AND. .NOT.NOTRAN ) .OR. ( .NOT.LEFT .AND. NOTRAN ) )
+     $     THEN
+         I1 = 1
+         I2 = K
+         I3 = 1
+      ELSE
+         I1 = K
+         I2 = 1
+         I3 = -1
+      END IF
+*
+      IF( LEFT ) THEN
+         NI = N
+         JC = 1
+      ELSE
+         MI = M
+         IC = 1
+      END IF
+*
+      DO 10 I = I1, I2, I3
+         IF( LEFT ) THEN
+*
+*           H(i) is applied to C(i:m,1:n)
+*
+            MI = M - I + 1
+            IC = I
+         ELSE
+*
+*           H(i) is applied to C(1:m,i:n)
+*
+            NI = N - I + 1
+            JC = I
+         END IF
+*
+*        Apply H(i)
+*
+         AII = A( I, I )
+         A( I, I ) = ONE
+         CALL DLARF( SIDE, MI, NI, A( I, I ), 1, TAU( I ), C( IC, JC ),
+     $               LDC, WORK )
+         A( I, I ) = AII
+   10 CONTINUE
+      RETURN
+*
+*     End of DORM2R
+*
+      END
diff --git a/costa/native/external/lapack/dormbr.f b/costa/native/external/lapack/dormbr.f
new file mode 100644
index 000000000..40ffe480e
--- /dev/null
+++ b/costa/native/external/lapack/dormbr.f
@@ -0,0 +1,282 @@
+      SUBROUTINE DORMBR( VECT, SIDE, TRANS, M, N, K, A, LDA, TAU, C,
+     $                   LDC, WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          SIDE, TRANS, VECT
+      INTEGER            INFO, K, LDA, LDC, LWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  If VECT = 'Q', DORMBR overwrites the general real M-by-N matrix C
+*  with
+*                  SIDE = 'L'     SIDE = 'R'
+*  TRANS = 'N':      Q * C          C * Q
+*  TRANS = 'T':      Q**T * C       C * Q**T
+*
+*  If VECT = 'P', DORMBR overwrites the general real M-by-N matrix C
+*  with
+*                  SIDE = 'L'     SIDE = 'R'
+*  TRANS = 'N':      P * C          C * P
+*  TRANS = 'T':      P**T * C       C * P**T
+*
+*  Here Q and P**T are the orthogonal matrices determined by DGEBRD when
+*  reducing a real matrix A to bidiagonal form: A = Q * B * P**T. Q and
+*  P**T are defined as products of elementary reflectors H(i) and G(i)
+*  respectively.
+*
+*  Let nq = m if SIDE = 'L' and nq = n if SIDE = 'R'. Thus nq is the
+*  order of the orthogonal matrix Q or P**T that is applied.
+*
+*  If VECT = 'Q', A is assumed to have been an NQ-by-K matrix:
+*  if nq >= k, Q = H(1) H(2) . . . H(k);
+*  if nq < k, Q = H(1) H(2) . . . H(nq-1).
+*
+*  If VECT = 'P', A is assumed to have been a K-by-NQ matrix:
+*  if k < nq, P = G(1) G(2) . . . G(k);
+*  if k >= nq, P = G(1) G(2) . . . G(nq-1).
+*
+*  Arguments
+*  =========
+*
+*  VECT    (input) CHARACTER*1
+*          = 'Q': apply Q or Q**T;
+*          = 'P': apply P or P**T.
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'L': apply Q, Q**T, P or P**T from the Left;
+*          = 'R': apply Q, Q**T, P or P**T from the Right.
+*
+*  TRANS   (input) CHARACTER*1
+*          = 'N':  No transpose, apply Q  or P;
+*          = 'T':  Transpose, apply Q**T or P**T.
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix C. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix C. N >= 0.
+*
+*  K       (input) INTEGER
+*          If VECT = 'Q', the number of columns in the original
+*          matrix reduced by DGEBRD.
+*          If VECT = 'P', the number of rows in the original
+*          matrix reduced by DGEBRD.
+*          K >= 0.
+*
+*  A       (input) DOUBLE PRECISION array, dimension
+*                                (LDA,min(nq,K)) if VECT = 'Q'
+*                                (LDA,nq)        if VECT = 'P'
+*          The vectors which define the elementary reflectors H(i) and
+*          G(i), whose products determine the matrices Q and P, as
+*          returned by DGEBRD.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.
+*          If VECT = 'Q', LDA >= max(1,nq);
+*          if VECT = 'P', LDA >= max(1,min(nq,K)).
+*
+*  TAU     (input) DOUBLE PRECISION array, dimension (min(nq,K))
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i) or G(i) which determines Q or P, as returned
+*          by DGEBRD in the array argument TAUQ or TAUP.
+*
+*  C       (input/output) DOUBLE PRECISION array, dimension (LDC,N)
+*          On entry, the M-by-N matrix C.
+*          On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q
+*          or P*C or P**T*C or C*P or C*P**T.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C. LDC >= max(1,M).
+*
+*  WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.
+*          If SIDE = 'L', LWORK >= max(1,N);
+*          if SIDE = 'R', LWORK >= max(1,M).
+*          For optimum performance LWORK >= N*NB if SIDE = 'L', and
+*          LWORK >= M*NB if SIDE = 'R', where NB is the optimal
+*          blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            APPLYQ, LEFT, LQUERY, NOTRAN
+      CHARACTER          TRANST
+      INTEGER            I1, I2, IINFO, LWKOPT, MI, NB, NI, NQ, NW
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           LSAME, ILAENV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DORMLQ, DORMQR, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      APPLYQ = LSAME( VECT, 'Q' )
+      LEFT = LSAME( SIDE, 'L' )
+      NOTRAN = LSAME( TRANS, 'N' )
+      LQUERY = ( LWORK.EQ.-1 )
+*
+*     NQ is the order of Q or P and NW is the minimum dimension of WORK
+*
+      IF( LEFT ) THEN
+         NQ = M
+         NW = N
+      ELSE
+         NQ = N
+         NW = M
+      END IF
+      IF( .NOT.APPLYQ .AND. .NOT.LSAME( VECT, 'P' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN
+         INFO = -3
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( K.LT.0 ) THEN
+         INFO = -6
+      ELSE IF( ( APPLYQ .AND. LDA.LT.MAX( 1, NQ ) ) .OR.
+     $         ( .NOT.APPLYQ .AND. LDA.LT.MAX( 1, MIN( NQ, K ) ) ) )
+     $          THEN
+         INFO = -8
+      ELSE IF( LDC.LT.MAX( 1, M ) ) THEN
+         INFO = -11
+      ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN
+         INFO = -13
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         IF( APPLYQ ) THEN
+            IF( LEFT ) THEN
+               NB = ILAENV( 1, 'DORMQR', SIDE // TRANS, M-1, N, M-1,
+     $              -1 )
+            ELSE
+               NB = ILAENV( 1, 'DORMQR', SIDE // TRANS, M, N-1, N-1,
+     $              -1 )
+            END IF
+         ELSE
+            IF( LEFT ) THEN
+               NB = ILAENV( 1, 'DORMLQ', SIDE // TRANS, M-1, N, M-1,
+     $              -1 )
+            ELSE
+               NB = ILAENV( 1, 'DORMLQ', SIDE // TRANS, M, N-1, N-1,
+     $              -1 )
+            END IF
+         END IF
+         LWKOPT = MAX( 1, NW )*NB
+         WORK( 1 ) = LWKOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DORMBR', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      WORK( 1 ) = 1
+      IF( M.EQ.0 .OR. N.EQ.0 )
+     $   RETURN
+*
+      IF( APPLYQ ) THEN
+*
+*        Apply Q
+*
+         IF( NQ.GE.K ) THEN
+*
+*           Q was determined by a call to DGEBRD with nq >= k
+*
+            CALL DORMQR( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC,
+     $                   WORK, LWORK, IINFO )
+         ELSE IF( NQ.GT.1 ) THEN
+*
+*           Q was determined by a call to DGEBRD with nq < k
+*
+            IF( LEFT ) THEN
+               MI = M - 1
+               NI = N
+               I1 = 2
+               I2 = 1
+            ELSE
+               MI = M
+               NI = N - 1
+               I1 = 1
+               I2 = 2
+            END IF
+            CALL DORMQR( SIDE, TRANS, MI, NI, NQ-1, A( 2, 1 ), LDA, TAU,
+     $                   C( I1, I2 ), LDC, WORK, LWORK, IINFO )
+         END IF
+      ELSE
+*
+*        Apply P
+*
+         IF( NOTRAN ) THEN
+            TRANST = 'T'
+         ELSE
+            TRANST = 'N'
+         END IF
+         IF( NQ.GT.K ) THEN
+*
+*           P was determined by a call to DGEBRD with nq > k
+*
+            CALL DORMLQ( SIDE, TRANST, M, N, K, A, LDA, TAU, C, LDC,
+     $                   WORK, LWORK, IINFO )
+         ELSE IF( NQ.GT.1 ) THEN
+*
+*           P was determined by a call to DGEBRD with nq <= k
+*
+            IF( LEFT ) THEN
+               MI = M - 1
+               NI = N
+               I1 = 2
+               I2 = 1
+            ELSE
+               MI = M
+               NI = N - 1
+               I1 = 1
+               I2 = 2
+            END IF
+            CALL DORMLQ( SIDE, TRANST, MI, NI, NQ-1, A( 1, 2 ), LDA,
+     $                   TAU, C( I1, I2 ), LDC, WORK, LWORK, IINFO )
+         END IF
+      END IF
+      WORK( 1 ) = LWKOPT
+      RETURN
+*
+*     End of DORMBR
+*
+      END
diff --git a/costa/native/external/lapack/dormhr.f b/costa/native/external/lapack/dormhr.f
new file mode 100644
index 000000000..ce3657145
--- /dev/null
+++ b/costa/native/external/lapack/dormhr.f
@@ -0,0 +1,202 @@
+      SUBROUTINE DORMHR( SIDE, TRANS, M, N, ILO, IHI, A, LDA, TAU, C,
+     $                   LDC, WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          SIDE, TRANS
+      INTEGER            IHI, ILO, INFO, LDA, LDC, LWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DORMHR overwrites the general real M-by-N matrix C with
+*
+*                  SIDE = 'L'     SIDE = 'R'
+*  TRANS = 'N':      Q * C          C * Q
+*  TRANS = 'T':      Q**T * C       C * Q**T
+*
+*  where Q is a real orthogonal matrix of order nq, with nq = m if
+*  SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of
+*  IHI-ILO elementary reflectors, as returned by DGEHRD:
+*
+*  Q = H(ilo) H(ilo+1) . . . H(ihi-1).
+*
+*  Arguments
+*  =========
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'L': apply Q or Q**T from the Left;
+*          = 'R': apply Q or Q**T from the Right.
+*
+*  TRANS   (input) CHARACTER*1
+*          = 'N':  No transpose, apply Q;
+*          = 'T':  Transpose, apply Q**T.
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix C. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix C. N >= 0.
+*
+*  ILO     (input) INTEGER
+*  IHI     (input) INTEGER
+*          ILO and IHI must have the same values as in the previous call
+*          of DGEHRD. Q is equal to the unit matrix except in the
+*          submatrix Q(ilo+1:ihi,ilo+1:ihi).
+*          If SIDE = 'L', then 1 <= ILO <= IHI <= M, if M > 0, and
+*          ILO = 1 and IHI = 0, if M = 0;
+*          if SIDE = 'R', then 1 <= ILO <= IHI <= N, if N > 0, and
+*          ILO = 1 and IHI = 0, if N = 0.
+*
+*  A       (input) DOUBLE PRECISION array, dimension
+*                               (LDA,M) if SIDE = 'L'
+*                               (LDA,N) if SIDE = 'R'
+*          The vectors which define the elementary reflectors, as
+*          returned by DGEHRD.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.
+*          LDA >= max(1,M) if SIDE = 'L'; LDA >= max(1,N) if SIDE = 'R'.
+*
+*  TAU     (input) DOUBLE PRECISION array, dimension
+*                               (M-1) if SIDE = 'L'
+*                               (N-1) if SIDE = 'R'
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by DGEHRD.
+*
+*  C       (input/output) DOUBLE PRECISION array, dimension (LDC,N)
+*          On entry, the M-by-N matrix C.
+*          On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C. LDC >= max(1,M).
+*
+*  WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.
+*          If SIDE = 'L', LWORK >= max(1,N);
+*          if SIDE = 'R', LWORK >= max(1,M).
+*          For optimum performance LWORK >= N*NB if SIDE = 'L', and
+*          LWORK >= M*NB if SIDE = 'R', where NB is the optimal
+*          blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            LEFT, LQUERY
+      INTEGER            I1, I2, IINFO, LWKOPT, MI, NB, NH, NI, NQ, NW
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           LSAME, ILAENV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DORMQR, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      NH = IHI - ILO
+      LEFT = LSAME( SIDE, 'L' )
+      LQUERY = ( LWORK.EQ.-1 )
+*
+*     NQ is the order of Q and NW is the minimum dimension of WORK
+*
+      IF( LEFT ) THEN
+         NQ = M
+         NW = N
+      ELSE
+         NQ = N
+         NW = M
+      END IF
+      IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.LSAME( TRANS, 'N' ) .AND. .NOT.LSAME( TRANS, 'T' ) )
+     $          THEN
+         INFO = -2
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, NQ ) ) THEN
+         INFO = -5
+      ELSE IF( IHI.LT.MIN( ILO, NQ ) .OR. IHI.GT.NQ ) THEN
+         INFO = -6
+      ELSE IF( LDA.LT.MAX( 1, NQ ) ) THEN
+         INFO = -8
+      ELSE IF( LDC.LT.MAX( 1, M ) ) THEN
+         INFO = -11
+      ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN
+         INFO = -13
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         IF( LEFT ) THEN
+            NB = ILAENV( 1, 'DORMQR', SIDE // TRANS, NH, N, NH, -1 )
+         ELSE
+            NB = ILAENV( 1, 'DORMQR', SIDE // TRANS, M, NH, NH, -1 )
+         END IF
+         LWKOPT = MAX( 1, NW )*NB
+         WORK( 1 ) = LWKOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DORMHR', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 .OR. NH.EQ.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      IF( LEFT ) THEN
+         MI = NH
+         NI = N
+         I1 = ILO + 1
+         I2 = 1
+      ELSE
+         MI = M
+         NI = NH
+         I1 = 1
+         I2 = ILO + 1
+      END IF
+*
+      CALL DORMQR( SIDE, TRANS, MI, NI, NH, A( ILO+1, ILO ), LDA,
+     $             TAU( ILO ), C( I1, I2 ), LDC, WORK, LWORK, IINFO )
+*
+      WORK( 1 ) = LWKOPT
+      RETURN
+*
+*     End of DORMHR
+*
+      END
diff --git a/costa/native/external/lapack/dorml2.f b/costa/native/external/lapack/dorml2.f
new file mode 100644
index 000000000..d07f31bf9
--- /dev/null
+++ b/costa/native/external/lapack/dorml2.f
@@ -0,0 +1,198 @@
+      SUBROUTINE DORML2( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC,
+     $                   WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          SIDE, TRANS
+      INTEGER            INFO, K, LDA, LDC, M, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DORML2 overwrites the general real m by n matrix C with
+*
+*        Q * C  if SIDE = 'L' and TRANS = 'N', or
+*
+*        Q'* C  if SIDE = 'L' and TRANS = 'T', or
+*
+*        C * Q  if SIDE = 'R' and TRANS = 'N', or
+*
+*        C * Q' if SIDE = 'R' and TRANS = 'T',
+*
+*  where Q is a real orthogonal matrix defined as the product of k
+*  elementary reflectors
+*
+*        Q = H(k) . . . H(2) H(1)
+*
+*  as returned by DGELQF. Q is of order m if SIDE = 'L' and of order n
+*  if SIDE = 'R'.
+*
+*  Arguments
+*  =========
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'L': apply Q or Q' from the Left
+*          = 'R': apply Q or Q' from the Right
+*
+*  TRANS   (input) CHARACTER*1
+*          = 'N': apply Q  (No transpose)
+*          = 'T': apply Q' (Transpose)
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix C. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix C. N >= 0.
+*
+*  K       (input) INTEGER
+*          The number of elementary reflectors whose product defines
+*          the matrix Q.
+*          If SIDE = 'L', M >= K >= 0;
+*          if SIDE = 'R', N >= K >= 0.
+*
+*  A       (input) DOUBLE PRECISION array, dimension
+*                               (LDA,M) if SIDE = 'L',
+*                               (LDA,N) if SIDE = 'R'
+*          The i-th row must contain the vector which defines the
+*          elementary reflector H(i), for i = 1,2,...,k, as returned by
+*          DGELQF in the first k rows of its array argument A.
+*          A is modified by the routine but restored on exit.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,K).
+*
+*  TAU     (input) DOUBLE PRECISION array, dimension (K)
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by DGELQF.
+*
+*  C       (input/output) DOUBLE PRECISION array, dimension (LDC,N)
+*          On entry, the m by n matrix C.
+*          On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C. LDC >= max(1,M).
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension
+*                                   (N) if SIDE = 'L',
+*                                   (M) if SIDE = 'R'
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE
+      PARAMETER          ( ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LEFT, NOTRAN
+      INTEGER            I, I1, I2, I3, IC, JC, MI, NI, NQ
+      DOUBLE PRECISION   AII
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLARF, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      LEFT = LSAME( SIDE, 'L' )
+      NOTRAN = LSAME( TRANS, 'N' )
+*
+*     NQ is the order of Q
+*
+      IF( LEFT ) THEN
+         NQ = M
+      ELSE
+         NQ = N
+      END IF
+      IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN
+         INFO = -2
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN
+         INFO = -5
+      ELSE IF( LDA.LT.MAX( 1, K ) ) THEN
+         INFO = -7
+      ELSE IF( LDC.LT.MAX( 1, M ) ) THEN
+         INFO = -10
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DORML2', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 )
+     $   RETURN
+*
+      IF( ( LEFT .AND. NOTRAN ) .OR. ( .NOT.LEFT .AND. .NOT.NOTRAN ) )
+     $     THEN
+         I1 = 1
+         I2 = K
+         I3 = 1
+      ELSE
+         I1 = K
+         I2 = 1
+         I3 = -1
+      END IF
+*
+      IF( LEFT ) THEN
+         NI = N
+         JC = 1
+      ELSE
+         MI = M
+         IC = 1
+      END IF
+*
+      DO 10 I = I1, I2, I3
+         IF( LEFT ) THEN
+*
+*           H(i) is applied to C(i:m,1:n)
+*
+            MI = M - I + 1
+            IC = I
+         ELSE
+*
+*           H(i) is applied to C(1:m,i:n)
+*
+            NI = N - I + 1
+            JC = I
+         END IF
+*
+*        Apply H(i)
+*
+         AII = A( I, I )
+         A( I, I ) = ONE
+         CALL DLARF( SIDE, MI, NI, A( I, I ), LDA, TAU( I ),
+     $               C( IC, JC ), LDC, WORK )
+         A( I, I ) = AII
+   10 CONTINUE
+      RETURN
+*
+*     End of DORML2
+*
+      END
diff --git a/costa/native/external/lapack/dormlq.f b/costa/native/external/lapack/dormlq.f
new file mode 100644
index 000000000..e5242a568
--- /dev/null
+++ b/costa/native/external/lapack/dormlq.f
@@ -0,0 +1,268 @@
+      SUBROUTINE DORMLQ( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC,
+     $                   WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          SIDE, TRANS
+      INTEGER            INFO, K, LDA, LDC, LWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DORMLQ overwrites the general real M-by-N matrix C with
+*
+*                  SIDE = 'L'     SIDE = 'R'
+*  TRANS = 'N':      Q * C          C * Q
+*  TRANS = 'T':      Q**T * C       C * Q**T
+*
+*  where Q is a real orthogonal matrix defined as the product of k
+*  elementary reflectors
+*
+*        Q = H(k) . . . H(2) H(1)
+*
+*  as returned by DGELQF. Q is of order M if SIDE = 'L' and of order N
+*  if SIDE = 'R'.
+*
+*  Arguments
+*  =========
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'L': apply Q or Q**T from the Left;
+*          = 'R': apply Q or Q**T from the Right.
+*
+*  TRANS   (input) CHARACTER*1
+*          = 'N':  No transpose, apply Q;
+*          = 'T':  Transpose, apply Q**T.
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix C. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix C. N >= 0.
+*
+*  K       (input) INTEGER
+*          The number of elementary reflectors whose product defines
+*          the matrix Q.
+*          If SIDE = 'L', M >= K >= 0;
+*          if SIDE = 'R', N >= K >= 0.
+*
+*  A       (input) DOUBLE PRECISION array, dimension
+*                               (LDA,M) if SIDE = 'L',
+*                               (LDA,N) if SIDE = 'R'
+*          The i-th row must contain the vector which defines the
+*          elementary reflector H(i), for i = 1,2,...,k, as returned by
+*          DGELQF in the first k rows of its array argument A.
+*          A is modified by the routine but restored on exit.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,K).
+*
+*  TAU     (input) DOUBLE PRECISION array, dimension (K)
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by DGELQF.
+*
+*  C       (input/output) DOUBLE PRECISION array, dimension (LDC,N)
+*          On entry, the M-by-N matrix C.
+*          On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C. LDC >= max(1,M).
+*
+*  WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.
+*          If SIDE = 'L', LWORK >= max(1,N);
+*          if SIDE = 'R', LWORK >= max(1,M).
+*          For optimum performance LWORK >= N*NB if SIDE = 'L', and
+*          LWORK >= M*NB if SIDE = 'R', where NB is the optimal
+*          blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            NBMAX, LDT
+      PARAMETER          ( NBMAX = 64, LDT = NBMAX+1 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LEFT, LQUERY, NOTRAN
+      CHARACTER          TRANST
+      INTEGER            I, I1, I2, I3, IB, IC, IINFO, IWS, JC, LDWORK,
+     $                   LWKOPT, MI, NB, NBMIN, NI, NQ, NW
+*     ..
+*     .. Local Arrays ..
+      DOUBLE PRECISION   T( LDT, NBMAX )
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           LSAME, ILAENV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLARFB, DLARFT, DORML2, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      LEFT = LSAME( SIDE, 'L' )
+      NOTRAN = LSAME( TRANS, 'N' )
+      LQUERY = ( LWORK.EQ.-1 )
+*
+*     NQ is the order of Q and NW is the minimum dimension of WORK
+*
+      IF( LEFT ) THEN
+         NQ = M
+         NW = N
+      ELSE
+         NQ = N
+         NW = M
+      END IF
+      IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN
+         INFO = -2
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN
+         INFO = -5
+      ELSE IF( LDA.LT.MAX( 1, K ) ) THEN
+         INFO = -7
+      ELSE IF( LDC.LT.MAX( 1, M ) ) THEN
+         INFO = -10
+      ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN
+         INFO = -12
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+*
+*        Determine the block size.  NB may be at most NBMAX, where NBMAX
+*        is used to define the local array T.
+*
+         NB = MIN( NBMAX, ILAENV( 1, 'DORMLQ', SIDE // TRANS, M, N, K,
+     $        -1 ) )
+         LWKOPT = MAX( 1, NW )*NB
+         WORK( 1 ) = LWKOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DORMLQ', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      NBMIN = 2
+      LDWORK = NW
+      IF( NB.GT.1 .AND. NB.LT.K ) THEN
+         IWS = NW*NB
+         IF( LWORK.LT.IWS ) THEN
+            NB = LWORK / LDWORK
+            NBMIN = MAX( 2, ILAENV( 2, 'DORMLQ', SIDE // TRANS, M, N, K,
+     $              -1 ) )
+         END IF
+      ELSE
+         IWS = NW
+      END IF
+*
+      IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN
+*
+*        Use unblocked code
+*
+         CALL DORML2( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, WORK,
+     $                IINFO )
+      ELSE
+*
+*        Use blocked code
+*
+         IF( ( LEFT .AND. NOTRAN ) .OR.
+     $       ( .NOT.LEFT .AND. .NOT.NOTRAN ) ) THEN
+            I1 = 1
+            I2 = K
+            I3 = NB
+         ELSE
+            I1 = ( ( K-1 ) / NB )*NB + 1
+            I2 = 1
+            I3 = -NB
+         END IF
+*
+         IF( LEFT ) THEN
+            NI = N
+            JC = 1
+         ELSE
+            MI = M
+            IC = 1
+         END IF
+*
+         IF( NOTRAN ) THEN
+            TRANST = 'T'
+         ELSE
+            TRANST = 'N'
+         END IF
+*
+         DO 10 I = I1, I2, I3
+            IB = MIN( NB, K-I+1 )
+*
+*           Form the triangular factor of the block reflector
+*           H = H(i) H(i+1) . . . H(i+ib-1)
+*
+            CALL DLARFT( 'Forward', 'Rowwise', NQ-I+1, IB, A( I, I ),
+     $                   LDA, TAU( I ), T, LDT )
+            IF( LEFT ) THEN
+*
+*              H or H' is applied to C(i:m,1:n)
+*
+               MI = M - I + 1
+               IC = I
+            ELSE
+*
+*              H or H' is applied to C(1:m,i:n)
+*
+               NI = N - I + 1
+               JC = I
+            END IF
+*
+*           Apply H or H'
+*
+            CALL DLARFB( SIDE, TRANST, 'Forward', 'Rowwise', MI, NI, IB,
+     $                   A( I, I ), LDA, T, LDT, C( IC, JC ), LDC, WORK,
+     $                   LDWORK )
+   10    CONTINUE
+      END IF
+      WORK( 1 ) = LWKOPT
+      RETURN
+*
+*     End of DORMLQ
+*
+      END
diff --git a/costa/native/external/lapack/dormql.f b/costa/native/external/lapack/dormql.f
new file mode 100644
index 000000000..f6a00c2af
--- /dev/null
+++ b/costa/native/external/lapack/dormql.f
@@ -0,0 +1,257 @@
+      SUBROUTINE DORMQL( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC,
+     $                   WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          SIDE, TRANS
+      INTEGER            INFO, K, LDA, LDC, LWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DORMQL overwrites the general real M-by-N matrix C with
+*
+*                  SIDE = 'L'     SIDE = 'R'
+*  TRANS = 'N':      Q * C          C * Q
+*  TRANS = 'T':      Q**T * C       C * Q**T
+*
+*  where Q is a real orthogonal matrix defined as the product of k
+*  elementary reflectors
+*
+*        Q = H(k) . . . H(2) H(1)
+*
+*  as returned by DGEQLF. Q is of order M if SIDE = 'L' and of order N
+*  if SIDE = 'R'.
+*
+*  Arguments
+*  =========
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'L': apply Q or Q**T from the Left;
+*          = 'R': apply Q or Q**T from the Right.
+*
+*  TRANS   (input) CHARACTER*1
+*          = 'N':  No transpose, apply Q;
+*          = 'T':  Transpose, apply Q**T.
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix C. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix C. N >= 0.
+*
+*  K       (input) INTEGER
+*          The number of elementary reflectors whose product defines
+*          the matrix Q.
+*          If SIDE = 'L', M >= K >= 0;
+*          if SIDE = 'R', N >= K >= 0.
+*
+*  A       (input) DOUBLE PRECISION array, dimension (LDA,K)
+*          The i-th column must contain the vector which defines the
+*          elementary reflector H(i), for i = 1,2,...,k, as returned by
+*          DGEQLF in the last k columns of its array argument A.
+*          A is modified by the routine but restored on exit.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.
+*          If SIDE = 'L', LDA >= max(1,M);
+*          if SIDE = 'R', LDA >= max(1,N).
+*
+*  TAU     (input) DOUBLE PRECISION array, dimension (K)
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by DGEQLF.
+*
+*  C       (input/output) DOUBLE PRECISION array, dimension (LDC,N)
+*          On entry, the M-by-N matrix C.
+*          On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C. LDC >= max(1,M).
+*
+*  WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.
+*          If SIDE = 'L', LWORK >= max(1,N);
+*          if SIDE = 'R', LWORK >= max(1,M).
+*          For optimum performance LWORK >= N*NB if SIDE = 'L', and
+*          LWORK >= M*NB if SIDE = 'R', where NB is the optimal
+*          blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            NBMAX, LDT
+      PARAMETER          ( NBMAX = 64, LDT = NBMAX+1 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LEFT, LQUERY, NOTRAN
+      INTEGER            I, I1, I2, I3, IB, IINFO, IWS, LDWORK, LWKOPT,
+     $                   MI, NB, NBMIN, NI, NQ, NW
+*     ..
+*     .. Local Arrays ..
+      DOUBLE PRECISION   T( LDT, NBMAX )
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           LSAME, ILAENV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLARFB, DLARFT, DORM2L, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      LEFT = LSAME( SIDE, 'L' )
+      NOTRAN = LSAME( TRANS, 'N' )
+      LQUERY = ( LWORK.EQ.-1 )
+*
+*     NQ is the order of Q and NW is the minimum dimension of WORK
+*
+      IF( LEFT ) THEN
+         NQ = M
+         NW = N
+      ELSE
+         NQ = N
+         NW = M
+      END IF
+      IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN
+         INFO = -2
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN
+         INFO = -5
+      ELSE IF( LDA.LT.MAX( 1, NQ ) ) THEN
+         INFO = -7
+      ELSE IF( LDC.LT.MAX( 1, M ) ) THEN
+         INFO = -10
+      ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN
+         INFO = -12
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+*
+*        Determine the block size.  NB may be at most NBMAX, where NBMAX
+*        is used to define the local array T.
+*
+         NB = MIN( NBMAX, ILAENV( 1, 'DORMQL', SIDE // TRANS, M, N, K,
+     $        -1 ) )
+         LWKOPT = MAX( 1, NW )*NB
+         WORK( 1 ) = LWKOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DORMQL', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      NBMIN = 2
+      LDWORK = NW
+      IF( NB.GT.1 .AND. NB.LT.K ) THEN
+         IWS = NW*NB
+         IF( LWORK.LT.IWS ) THEN
+            NB = LWORK / LDWORK
+            NBMIN = MAX( 2, ILAENV( 2, 'DORMQL', SIDE // TRANS, M, N, K,
+     $              -1 ) )
+         END IF
+      ELSE
+         IWS = NW
+      END IF
+*
+      IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN
+*
+*        Use unblocked code
+*
+         CALL DORM2L( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, WORK,
+     $                IINFO )
+      ELSE
+*
+*        Use blocked code
+*
+         IF( ( LEFT .AND. NOTRAN ) .OR.
+     $       ( .NOT.LEFT .AND. .NOT.NOTRAN ) ) THEN
+            I1 = 1
+            I2 = K
+            I3 = NB
+         ELSE
+            I1 = ( ( K-1 ) / NB )*NB + 1
+            I2 = 1
+            I3 = -NB
+         END IF
+*
+         IF( LEFT ) THEN
+            NI = N
+         ELSE
+            MI = M
+         END IF
+*
+         DO 10 I = I1, I2, I3
+            IB = MIN( NB, K-I+1 )
+*
+*           Form the triangular factor of the block reflector
+*           H = H(i+ib-1) . . . H(i+1) H(i)
+*
+            CALL DLARFT( 'Backward', 'Columnwise', NQ-K+I+IB-1, IB,
+     $                   A( 1, I ), LDA, TAU( I ), T, LDT )
+            IF( LEFT ) THEN
+*
+*              H or H' is applied to C(1:m-k+i+ib-1,1:n)
+*
+               MI = M - K + I + IB - 1
+            ELSE
+*
+*              H or H' is applied to C(1:m,1:n-k+i+ib-1)
+*
+               NI = N - K + I + IB - 1
+            END IF
+*
+*           Apply H or H'
+*
+            CALL DLARFB( SIDE, TRANS, 'Backward', 'Columnwise', MI, NI,
+     $                   IB, A( 1, I ), LDA, T, LDT, C, LDC, WORK,
+     $                   LDWORK )
+   10    CONTINUE
+      END IF
+      WORK( 1 ) = LWKOPT
+      RETURN
+*
+*     End of DORMQL
+*
+      END
diff --git a/costa/native/external/lapack/dormqr.f b/costa/native/external/lapack/dormqr.f
new file mode 100644
index 000000000..57315ae60
--- /dev/null
+++ b/costa/native/external/lapack/dormqr.f
@@ -0,0 +1,261 @@
+      SUBROUTINE DORMQR( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC,
+     $                   WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          SIDE, TRANS
+      INTEGER            INFO, K, LDA, LDC, LWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DORMQR overwrites the general real M-by-N matrix C with
+*
+*                  SIDE = 'L'     SIDE = 'R'
+*  TRANS = 'N':      Q * C          C * Q
+*  TRANS = 'T':      Q**T * C       C * Q**T
+*
+*  where Q is a real orthogonal matrix defined as the product of k
+*  elementary reflectors
+*
+*        Q = H(1) H(2) . . . H(k)
+*
+*  as returned by DGEQRF. Q is of order M if SIDE = 'L' and of order N
+*  if SIDE = 'R'.
+*
+*  Arguments
+*  =========
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'L': apply Q or Q**T from the Left;
+*          = 'R': apply Q or Q**T from the Right.
+*
+*  TRANS   (input) CHARACTER*1
+*          = 'N':  No transpose, apply Q;
+*          = 'T':  Transpose, apply Q**T.
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix C. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix C. N >= 0.
+*
+*  K       (input) INTEGER
+*          The number of elementary reflectors whose product defines
+*          the matrix Q.
+*          If SIDE = 'L', M >= K >= 0;
+*          if SIDE = 'R', N >= K >= 0.
+*
+*  A       (input) DOUBLE PRECISION array, dimension (LDA,K)
+*          The i-th column must contain the vector which defines the
+*          elementary reflector H(i), for i = 1,2,...,k, as returned by
+*          DGEQRF in the first k columns of its array argument A.
+*          A is modified by the routine but restored on exit.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.
+*          If SIDE = 'L', LDA >= max(1,M);
+*          if SIDE = 'R', LDA >= max(1,N).
+*
+*  TAU     (input) DOUBLE PRECISION array, dimension (K)
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by DGEQRF.
+*
+*  C       (input/output) DOUBLE PRECISION array, dimension (LDC,N)
+*          On entry, the M-by-N matrix C.
+*          On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C. LDC >= max(1,M).
+*
+*  WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.
+*          If SIDE = 'L', LWORK >= max(1,N);
+*          if SIDE = 'R', LWORK >= max(1,M).
+*          For optimum performance LWORK >= N*NB if SIDE = 'L', and
+*          LWORK >= M*NB if SIDE = 'R', where NB is the optimal
+*          blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            NBMAX, LDT
+      PARAMETER          ( NBMAX = 64, LDT = NBMAX+1 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LEFT, LQUERY, NOTRAN
+      INTEGER            I, I1, I2, I3, IB, IC, IINFO, IWS, JC, LDWORK,
+     $                   LWKOPT, MI, NB, NBMIN, NI, NQ, NW
+*     ..
+*     .. Local Arrays ..
+      DOUBLE PRECISION   T( LDT, NBMAX )
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           LSAME, ILAENV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLARFB, DLARFT, DORM2R, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      LEFT = LSAME( SIDE, 'L' )
+      NOTRAN = LSAME( TRANS, 'N' )
+      LQUERY = ( LWORK.EQ.-1 )
+*
+*     NQ is the order of Q and NW is the minimum dimension of WORK
+*
+      IF( LEFT ) THEN
+         NQ = M
+         NW = N
+      ELSE
+         NQ = N
+         NW = M
+      END IF
+      IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN
+         INFO = -2
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN
+         INFO = -5
+      ELSE IF( LDA.LT.MAX( 1, NQ ) ) THEN
+         INFO = -7
+      ELSE IF( LDC.LT.MAX( 1, M ) ) THEN
+         INFO = -10
+      ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN
+         INFO = -12
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+*
+*        Determine the block size.  NB may be at most NBMAX, where NBMAX
+*        is used to define the local array T.
+*
+         NB = MIN( NBMAX, ILAENV( 1, 'DORMQR', SIDE // TRANS, M, N, K,
+     $        -1 ) )
+         LWKOPT = MAX( 1, NW )*NB
+         WORK( 1 ) = LWKOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DORMQR', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      NBMIN = 2
+      LDWORK = NW
+      IF( NB.GT.1 .AND. NB.LT.K ) THEN
+         IWS = NW*NB
+         IF( LWORK.LT.IWS ) THEN
+            NB = LWORK / LDWORK
+            NBMIN = MAX( 2, ILAENV( 2, 'DORMQR', SIDE // TRANS, M, N, K,
+     $              -1 ) )
+         END IF
+      ELSE
+         IWS = NW
+      END IF
+*
+      IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN
+*
+*        Use unblocked code
+*
+         CALL DORM2R( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, WORK,
+     $                IINFO )
+      ELSE
+*
+*        Use blocked code
+*
+         IF( ( LEFT .AND. .NOT.NOTRAN ) .OR.
+     $       ( .NOT.LEFT .AND. NOTRAN ) ) THEN
+            I1 = 1
+            I2 = K
+            I3 = NB
+         ELSE
+            I1 = ( ( K-1 ) / NB )*NB + 1
+            I2 = 1
+            I3 = -NB
+         END IF
+*
+         IF( LEFT ) THEN
+            NI = N
+            JC = 1
+         ELSE
+            MI = M
+            IC = 1
+         END IF
+*
+         DO 10 I = I1, I2, I3
+            IB = MIN( NB, K-I+1 )
+*
+*           Form the triangular factor of the block reflector
+*           H = H(i) H(i+1) . . . H(i+ib-1)
+*
+            CALL DLARFT( 'Forward', 'Columnwise', NQ-I+1, IB, A( I, I ),
+     $                   LDA, TAU( I ), T, LDT )
+            IF( LEFT ) THEN
+*
+*              H or H' is applied to C(i:m,1:n)
+*
+               MI = M - I + 1
+               IC = I
+            ELSE
+*
+*              H or H' is applied to C(1:m,i:n)
+*
+               NI = N - I + 1
+               JC = I
+            END IF
+*
+*           Apply H or H'
+*
+            CALL DLARFB( SIDE, TRANS, 'Forward', 'Columnwise', MI, NI,
+     $                   IB, A( I, I ), LDA, T, LDT, C( IC, JC ), LDC,
+     $                   WORK, LDWORK )
+   10    CONTINUE
+      END IF
+      WORK( 1 ) = LWKOPT
+      RETURN
+*
+*     End of DORMQR
+*
+      END
diff --git a/costa/native/external/lapack/dormr2.f b/costa/native/external/lapack/dormr2.f
new file mode 100644
index 000000000..13beef049
--- /dev/null
+++ b/costa/native/external/lapack/dormr2.f
@@ -0,0 +1,194 @@
+      SUBROUTINE DORMR2( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC,
+     $                   WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          SIDE, TRANS
+      INTEGER            INFO, K, LDA, LDC, M, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DORMR2 overwrites the general real m by n matrix C with
+*
+*        Q * C  if SIDE = 'L' and TRANS = 'N', or
+*
+*        Q'* C  if SIDE = 'L' and TRANS = 'T', or
+*
+*        C * Q  if SIDE = 'R' and TRANS = 'N', or
+*
+*        C * Q' if SIDE = 'R' and TRANS = 'T',
+*
+*  where Q is a real orthogonal matrix defined as the product of k
+*  elementary reflectors
+*
+*        Q = H(1) H(2) . . . H(k)
+*
+*  as returned by DGERQF. Q is of order m if SIDE = 'L' and of order n
+*  if SIDE = 'R'.
+*
+*  Arguments
+*  =========
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'L': apply Q or Q' from the Left
+*          = 'R': apply Q or Q' from the Right
+*
+*  TRANS   (input) CHARACTER*1
+*          = 'N': apply Q  (No transpose)
+*          = 'T': apply Q' (Transpose)
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix C. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix C. N >= 0.
+*
+*  K       (input) INTEGER
+*          The number of elementary reflectors whose product defines
+*          the matrix Q.
+*          If SIDE = 'L', M >= K >= 0;
+*          if SIDE = 'R', N >= K >= 0.
+*
+*  A       (input) DOUBLE PRECISION array, dimension
+*                               (LDA,M) if SIDE = 'L',
+*                               (LDA,N) if SIDE = 'R'
+*          The i-th row must contain the vector which defines the
+*          elementary reflector H(i), for i = 1,2,...,k, as returned by
+*          DGERQF in the last k rows of its array argument A.
+*          A is modified by the routine but restored on exit.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,K).
+*
+*  TAU     (input) DOUBLE PRECISION array, dimension (K)
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by DGERQF.
+*
+*  C       (input/output) DOUBLE PRECISION array, dimension (LDC,N)
+*          On entry, the m by n matrix C.
+*          On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C. LDC >= max(1,M).
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension
+*                                   (N) if SIDE = 'L',
+*                                   (M) if SIDE = 'R'
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE
+      PARAMETER          ( ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LEFT, NOTRAN
+      INTEGER            I, I1, I2, I3, MI, NI, NQ
+      DOUBLE PRECISION   AII
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLARF, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      LEFT = LSAME( SIDE, 'L' )
+      NOTRAN = LSAME( TRANS, 'N' )
+*
+*     NQ is the order of Q
+*
+      IF( LEFT ) THEN
+         NQ = M
+      ELSE
+         NQ = N
+      END IF
+      IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN
+         INFO = -2
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN
+         INFO = -5
+      ELSE IF( LDA.LT.MAX( 1, K ) ) THEN
+         INFO = -7
+      ELSE IF( LDC.LT.MAX( 1, M ) ) THEN
+         INFO = -10
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DORMR2', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 )
+     $   RETURN
+*
+      IF( ( LEFT .AND. .NOT.NOTRAN ) .OR. ( .NOT.LEFT .AND. NOTRAN ) )
+     $     THEN
+         I1 = 1
+         I2 = K
+         I3 = 1
+      ELSE
+         I1 = K
+         I2 = 1
+         I3 = -1
+      END IF
+*
+      IF( LEFT ) THEN
+         NI = N
+      ELSE
+         MI = M
+      END IF
+*
+      DO 10 I = I1, I2, I3
+         IF( LEFT ) THEN
+*
+*           H(i) is applied to C(1:m-k+i,1:n)
+*
+            MI = M - K + I
+         ELSE
+*
+*           H(i) is applied to C(1:m,1:n-k+i)
+*
+            NI = N - K + I
+         END IF
+*
+*        Apply H(i)
+*
+         AII = A( I, NQ-K+I )
+         A( I, NQ-K+I ) = ONE
+         CALL DLARF( SIDE, MI, NI, A( I, 1 ), LDA, TAU( I ), C, LDC,
+     $               WORK )
+         A( I, NQ-K+I ) = AII
+   10 CONTINUE
+      RETURN
+*
+*     End of DORMR2
+*
+      END
diff --git a/costa/native/external/lapack/dormr3.f b/costa/native/external/lapack/dormr3.f
new file mode 100644
index 000000000..82ff9a872
--- /dev/null
+++ b/costa/native/external/lapack/dormr3.f
@@ -0,0 +1,207 @@
+      SUBROUTINE DORMR3( SIDE, TRANS, M, N, K, L, A, LDA, TAU, C, LDC,
+     $                   WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          SIDE, TRANS
+      INTEGER            INFO, K, L, LDA, LDC, M, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DORMR3 overwrites the general real m by n matrix C with
+*
+*        Q * C  if SIDE = 'L' and TRANS = 'N', or
+*
+*        Q'* C  if SIDE = 'L' and TRANS = 'T', or
+*
+*        C * Q  if SIDE = 'R' and TRANS = 'N', or
+*
+*        C * Q' if SIDE = 'R' and TRANS = 'T',
+*
+*  where Q is a real orthogonal matrix defined as the product of k
+*  elementary reflectors
+*
+*        Q = H(1) H(2) . . . H(k)
+*
+*  as returned by DTZRZF. Q is of order m if SIDE = 'L' and of order n
+*  if SIDE = 'R'.
+*
+*  Arguments
+*  =========
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'L': apply Q or Q' from the Left
+*          = 'R': apply Q or Q' from the Right
+*
+*  TRANS   (input) CHARACTER*1
+*          = 'N': apply Q  (No transpose)
+*          = 'T': apply Q' (Transpose)
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix C. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix C. N >= 0.
+*
+*  K       (input) INTEGER
+*          The number of elementary reflectors whose product defines
+*          the matrix Q.
+*          If SIDE = 'L', M >= K >= 0;
+*          if SIDE = 'R', N >= K >= 0.
+*
+*  L       (input) INTEGER
+*          The number of columns of the matrix A containing
+*          the meaningful part of the Householder reflectors.
+*          If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0.
+*
+*  A       (input) DOUBLE PRECISION array, dimension
+*                               (LDA,M) if SIDE = 'L',
+*                               (LDA,N) if SIDE = 'R'
+*          The i-th row must contain the vector which defines the
+*          elementary reflector H(i), for i = 1,2,...,k, as returned by
+*          DTZRZF in the last k rows of its array argument A.
+*          A is modified by the routine but restored on exit.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,K).
+*
+*  TAU     (input) DOUBLE PRECISION array, dimension (K)
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by DTZRZF.
+*
+*  C       (input/output) DOUBLE PRECISION array, dimension (LDC,N)
+*          On entry, the m-by-n matrix C.
+*          On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C. LDC >= max(1,M).
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension
+*                                   (N) if SIDE = 'L',
+*                                   (M) if SIDE = 'R'
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*    A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            LEFT, NOTRAN
+      INTEGER            I, I1, I2, I3, IC, JA, JC, MI, NI, NQ
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLARZ, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      LEFT = LSAME( SIDE, 'L' )
+      NOTRAN = LSAME( TRANS, 'N' )
+*
+*     NQ is the order of Q
+*
+      IF( LEFT ) THEN
+         NQ = M
+      ELSE
+         NQ = N
+      END IF
+      IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN
+         INFO = -2
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN
+         INFO = -5
+      ELSE IF( L.LT.0 .OR. ( LEFT .AND. ( L.GT.M ) ) .OR.
+     $         ( .NOT.LEFT .AND. ( L.GT.N ) ) ) THEN
+         INFO = -6
+      ELSE IF( LDA.LT.MAX( 1, K ) ) THEN
+         INFO = -8
+      ELSE IF( LDC.LT.MAX( 1, M ) ) THEN
+         INFO = -11
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DORMR3', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 )
+     $   RETURN
+*
+      IF( ( LEFT .AND. .NOT.NOTRAN .OR. .NOT.LEFT .AND. NOTRAN ) ) THEN
+         I1 = 1
+         I2 = K
+         I3 = 1
+      ELSE
+         I1 = K
+         I2 = 1
+         I3 = -1
+      END IF
+*
+      IF( LEFT ) THEN
+         NI = N
+         JA = M - L + 1
+         JC = 1
+      ELSE
+         MI = M
+         JA = N - L + 1
+         IC = 1
+      END IF
+*
+      DO 10 I = I1, I2, I3
+         IF( LEFT ) THEN
+*
+*           H(i) or H(i)' is applied to C(i:m,1:n)
+*
+            MI = M - I + 1
+            IC = I
+         ELSE
+*
+*           H(i) or H(i)' is applied to C(1:m,i:n)
+*
+            NI = N - I + 1
+            JC = I
+         END IF
+*
+*        Apply H(i) or H(i)'
+*
+         CALL DLARZ( SIDE, MI, NI, L, A( I, JA ), LDA, TAU( I ),
+     $               C( IC, JC ), LDC, WORK )
+*
+   10 CONTINUE
+*
+      RETURN
+*
+*     End of DORMR3
+*
+      END
diff --git a/costa/native/external/lapack/dormrq.f b/costa/native/external/lapack/dormrq.f
new file mode 100644
index 000000000..56972ff55
--- /dev/null
+++ b/costa/native/external/lapack/dormrq.f
@@ -0,0 +1,264 @@
+      SUBROUTINE DORMRQ( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC,
+     $                   WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          SIDE, TRANS
+      INTEGER            INFO, K, LDA, LDC, LWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DORMRQ overwrites the general real M-by-N matrix C with
+*
+*                  SIDE = 'L'     SIDE = 'R'
+*  TRANS = 'N':      Q * C          C * Q
+*  TRANS = 'T':      Q**T * C       C * Q**T
+*
+*  where Q is a real orthogonal matrix defined as the product of k
+*  elementary reflectors
+*
+*        Q = H(1) H(2) . . . H(k)
+*
+*  as returned by DGERQF. Q is of order M if SIDE = 'L' and of order N
+*  if SIDE = 'R'.
+*
+*  Arguments
+*  =========
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'L': apply Q or Q**T from the Left;
+*          = 'R': apply Q or Q**T from the Right.
+*
+*  TRANS   (input) CHARACTER*1
+*          = 'N':  No transpose, apply Q;
+*          = 'T':  Transpose, apply Q**T.
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix C. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix C. N >= 0.
+*
+*  K       (input) INTEGER
+*          The number of elementary reflectors whose product defines
+*          the matrix Q.
+*          If SIDE = 'L', M >= K >= 0;
+*          if SIDE = 'R', N >= K >= 0.
+*
+*  A       (input) DOUBLE PRECISION array, dimension
+*                               (LDA,M) if SIDE = 'L',
+*                               (LDA,N) if SIDE = 'R'
+*          The i-th row must contain the vector which defines the
+*          elementary reflector H(i), for i = 1,2,...,k, as returned by
+*          DGERQF in the last k rows of its array argument A.
+*          A is modified by the routine but restored on exit.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,K).
+*
+*  TAU     (input) DOUBLE PRECISION array, dimension (K)
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by DGERQF.
+*
+*  C       (input/output) DOUBLE PRECISION array, dimension (LDC,N)
+*          On entry, the M-by-N matrix C.
+*          On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C. LDC >= max(1,M).
+*
+*  WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.
+*          If SIDE = 'L', LWORK >= max(1,N);
+*          if SIDE = 'R', LWORK >= max(1,M).
+*          For optimum performance LWORK >= N*NB if SIDE = 'L', and
+*          LWORK >= M*NB if SIDE = 'R', where NB is the optimal
+*          blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            NBMAX, LDT
+      PARAMETER          ( NBMAX = 64, LDT = NBMAX+1 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LEFT, LQUERY, NOTRAN
+      CHARACTER          TRANST
+      INTEGER            I, I1, I2, I3, IB, IINFO, IWS, LDWORK, LWKOPT,
+     $                   MI, NB, NBMIN, NI, NQ, NW
+*     ..
+*     .. Local Arrays ..
+      DOUBLE PRECISION   T( LDT, NBMAX )
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           LSAME, ILAENV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLARFB, DLARFT, DORMR2, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      LEFT = LSAME( SIDE, 'L' )
+      NOTRAN = LSAME( TRANS, 'N' )
+      LQUERY = ( LWORK.EQ.-1 )
+*
+*     NQ is the order of Q and NW is the minimum dimension of WORK
+*
+      IF( LEFT ) THEN
+         NQ = M
+         NW = N
+      ELSE
+         NQ = N
+         NW = M
+      END IF
+      IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN
+         INFO = -2
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN
+         INFO = -5
+      ELSE IF( LDA.LT.MAX( 1, K ) ) THEN
+         INFO = -7
+      ELSE IF( LDC.LT.MAX( 1, M ) ) THEN
+         INFO = -10
+      ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN
+         INFO = -12
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+*
+*        Determine the block size.  NB may be at most NBMAX, where NBMAX
+*        is used to define the local array T.
+*
+         NB = MIN( NBMAX, ILAENV( 1, 'DORMRQ', SIDE // TRANS, M, N, K,
+     $        -1 ) )
+         LWKOPT = MAX( 1, NW )*NB
+         WORK( 1 ) = LWKOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DORMRQ', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      NBMIN = 2
+      LDWORK = NW
+      IF( NB.GT.1 .AND. NB.LT.K ) THEN
+         IWS = NW*NB
+         IF( LWORK.LT.IWS ) THEN
+            NB = LWORK / LDWORK
+            NBMIN = MAX( 2, ILAENV( 2, 'DORMRQ', SIDE // TRANS, M, N, K,
+     $              -1 ) )
+         END IF
+      ELSE
+         IWS = NW
+      END IF
+*
+      IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN
+*
+*        Use unblocked code
+*
+         CALL DORMR2( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, WORK,
+     $                IINFO )
+      ELSE
+*
+*        Use blocked code
+*
+         IF( ( LEFT .AND. .NOT.NOTRAN ) .OR.
+     $       ( .NOT.LEFT .AND. NOTRAN ) ) THEN
+            I1 = 1
+            I2 = K
+            I3 = NB
+         ELSE
+            I1 = ( ( K-1 ) / NB )*NB + 1
+            I2 = 1
+            I3 = -NB
+         END IF
+*
+         IF( LEFT ) THEN
+            NI = N
+         ELSE
+            MI = M
+         END IF
+*
+         IF( NOTRAN ) THEN
+            TRANST = 'T'
+         ELSE
+            TRANST = 'N'
+         END IF
+*
+         DO 10 I = I1, I2, I3
+            IB = MIN( NB, K-I+1 )
+*
+*           Form the triangular factor of the block reflector
+*           H = H(i+ib-1) . . . H(i+1) H(i)
+*
+            CALL DLARFT( 'Backward', 'Rowwise', NQ-K+I+IB-1, IB,
+     $                   A( I, 1 ), LDA, TAU( I ), T, LDT )
+            IF( LEFT ) THEN
+*
+*              H or H' is applied to C(1:m-k+i+ib-1,1:n)
+*
+               MI = M - K + I + IB - 1
+            ELSE
+*
+*              H or H' is applied to C(1:m,1:n-k+i+ib-1)
+*
+               NI = N - K + I + IB - 1
+            END IF
+*
+*           Apply H or H'
+*
+            CALL DLARFB( SIDE, TRANST, 'Backward', 'Rowwise', MI, NI,
+     $                   IB, A( I, 1 ), LDA, T, LDT, C, LDC, WORK,
+     $                   LDWORK )
+   10    CONTINUE
+      END IF
+      WORK( 1 ) = LWKOPT
+      RETURN
+*
+*     End of DORMRQ
+*
+      END
diff --git a/costa/native/external/lapack/dormrz.f b/costa/native/external/lapack/dormrz.f
new file mode 100644
index 000000000..b1f329cba
--- /dev/null
+++ b/costa/native/external/lapack/dormrz.f
@@ -0,0 +1,288 @@
+      SUBROUTINE DORMRZ( SIDE, TRANS, M, N, K, L, A, LDA, TAU, C, LDC,
+     $                   WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          SIDE, TRANS
+      INTEGER            INFO, K, L, LDA, LDC, LWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DORMRZ overwrites the general real M-by-N matrix C with
+*
+*                  SIDE = 'L'     SIDE = 'R'
+*  TRANS = 'N':      Q * C          C * Q
+*  TRANS = 'T':      Q**T * C       C * Q**T
+*
+*  where Q is a real orthogonal matrix defined as the product of k
+*  elementary reflectors
+*
+*        Q = H(1) H(2) . . . H(k)
+*
+*  as returned by DTZRZF. Q is of order M if SIDE = 'L' and of order N
+*  if SIDE = 'R'.
+*
+*  Arguments
+*  =========
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'L': apply Q or Q**T from the Left;
+*          = 'R': apply Q or Q**T from the Right.
+*
+*  TRANS   (input) CHARACTER*1
+*          = 'N':  No transpose, apply Q;
+*          = 'T':  Transpose, apply Q**T.
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix C. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix C. N >= 0.
+*
+*  K       (input) INTEGER
+*          The number of elementary reflectors whose product defines
+*          the matrix Q.
+*          If SIDE = 'L', M >= K >= 0;
+*          if SIDE = 'R', N >= K >= 0.
+*
+*  L       (input) INTEGER
+*          The number of columns of the matrix A containing
+*          the meaningful part of the Householder reflectors.
+*          If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0.
+*
+*  A       (input) DOUBLE PRECISION array, dimension
+*                               (LDA,M) if SIDE = 'L',
+*                               (LDA,N) if SIDE = 'R'
+*          The i-th row must contain the vector which defines the
+*          elementary reflector H(i), for i = 1,2,...,k, as returned by
+*          DTZRZF in the last k rows of its array argument A.
+*          A is modified by the routine but restored on exit.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,K).
+*
+*  TAU     (input) DOUBLE PRECISION array, dimension (K)
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by DTZRZF.
+*
+*  C       (input/output) DOUBLE PRECISION array, dimension (LDC,N)
+*          On entry, the M-by-N matrix C.
+*          On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C. LDC >= max(1,M).
+*
+*  WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.
+*          If SIDE = 'L', LWORK >= max(1,N);
+*          if SIDE = 'R', LWORK >= max(1,M).
+*          For optimum performance LWORK >= N*NB if SIDE = 'L', and
+*          LWORK >= M*NB if SIDE = 'R', where NB is the optimal
+*          blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*    A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            NBMAX, LDT
+      PARAMETER          ( NBMAX = 64, LDT = NBMAX+1 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LEFT, LQUERY, NOTRAN
+      CHARACTER          TRANST
+      INTEGER            I, I1, I2, I3, IB, IC, IINFO, IWS, JA, JC,
+     $                   LDWORK, LWKOPT, MI, NB, NBMIN, NI, NQ, NW
+*     ..
+*     .. Local Arrays ..
+      DOUBLE PRECISION   T( LDT, NBMAX )
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           LSAME, ILAENV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLARZB, DLARZT, DORMR3, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      LEFT = LSAME( SIDE, 'L' )
+      NOTRAN = LSAME( TRANS, 'N' )
+      LQUERY = ( LWORK.EQ.-1 )
+*
+*     NQ is the order of Q and NW is the minimum dimension of WORK
+*
+      IF( LEFT ) THEN
+         NQ = M
+         NW = N
+      ELSE
+         NQ = N
+         NW = M
+      END IF
+      IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN
+         INFO = -2
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN
+         INFO = -5
+      ELSE IF( L.LT.0 .OR. ( LEFT .AND. ( L.GT.M ) ) .OR.
+     $         ( .NOT.LEFT .AND. ( L.GT.N ) ) ) THEN
+         INFO = -6
+      ELSE IF( LDA.LT.MAX( 1, K ) ) THEN
+         INFO = -8
+      ELSE IF( LDC.LT.MAX( 1, M ) ) THEN
+         INFO = -11
+      ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN
+         INFO = -13
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+*
+*        Determine the block size.  NB may be at most NBMAX, where NBMAX
+*        is used to define the local array T.
+*
+         NB = MIN( NBMAX, ILAENV( 1, 'DORMRQ', SIDE // TRANS, M, N, K,
+     $        -1 ) )
+         LWKOPT = MAX( 1, NW )*NB
+         WORK( 1 ) = LWKOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DORMRZ', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      NBMIN = 2
+      LDWORK = NW
+      IF( NB.GT.1 .AND. NB.LT.K ) THEN
+         IWS = NW*NB
+         IF( LWORK.LT.IWS ) THEN
+            NB = LWORK / LDWORK
+            NBMIN = MAX( 2, ILAENV( 2, 'DORMRQ', SIDE // TRANS, M, N, K,
+     $              -1 ) )
+         END IF
+      ELSE
+         IWS = NW
+      END IF
+*
+      IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN
+*
+*        Use unblocked code
+*
+         CALL DORMR3( SIDE, TRANS, M, N, K, L, A, LDA, TAU, C, LDC,
+     $                WORK, IINFO )
+      ELSE
+*
+*        Use blocked code
+*
+         IF( ( LEFT .AND. .NOT.NOTRAN ) .OR.
+     $       ( .NOT.LEFT .AND. NOTRAN ) ) THEN
+            I1 = 1
+            I2 = K
+            I3 = NB
+         ELSE
+            I1 = ( ( K-1 ) / NB )*NB + 1
+            I2 = 1
+            I3 = -NB
+         END IF
+*
+         IF( LEFT ) THEN
+            NI = N
+            JC = 1
+            JA = M - L + 1
+         ELSE
+            MI = M
+            IC = 1
+            JA = N - L + 1
+         END IF
+*
+         IF( NOTRAN ) THEN
+            TRANST = 'T'
+         ELSE
+            TRANST = 'N'
+         END IF
+*
+         DO 10 I = I1, I2, I3
+            IB = MIN( NB, K-I+1 )
+*
+*           Form the triangular factor of the block reflector
+*           H = H(i+ib-1) . . . H(i+1) H(i)
+*
+            CALL DLARZT( 'Backward', 'Rowwise', L, IB, A( I, JA ), LDA,
+     $                   TAU( I ), T, LDT )
+*
+            IF( LEFT ) THEN
+*
+*              H or H' is applied to C(i:m,1:n)
+*
+               MI = M - I + 1
+               IC = I
+            ELSE
+*
+*              H or H' is applied to C(1:m,i:n)
+*
+               NI = N - I + 1
+               JC = I
+            END IF
+*
+*           Apply H or H'
+*
+            CALL DLARZB( SIDE, TRANST, 'Backward', 'Rowwise', MI, NI,
+     $                   IB, L, A( I, JA ), LDA, T, LDT, C( IC, JC ),
+     $                   LDC, WORK, LDWORK )
+   10    CONTINUE
+*
+      END IF
+*
+      WORK( 1 ) = LWKOPT
+*
+      RETURN
+*
+*     End of DORMRZ
+*
+      END
diff --git a/costa/native/external/lapack/dormtr.f b/costa/native/external/lapack/dormtr.f
new file mode 100644
index 000000000..995754bdf
--- /dev/null
+++ b/costa/native/external/lapack/dormtr.f
@@ -0,0 +1,223 @@
+      SUBROUTINE DORMTR( SIDE, UPLO, TRANS, M, N, A, LDA, TAU, C, LDC,
+     $                   WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          SIDE, TRANS, UPLO
+      INTEGER            INFO, LDA, LDC, LWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DORMTR overwrites the general real M-by-N matrix C with
+*
+*                  SIDE = 'L'     SIDE = 'R'
+*  TRANS = 'N':      Q * C          C * Q
+*  TRANS = 'T':      Q**T * C       C * Q**T
+*
+*  where Q is a real orthogonal matrix of order nq, with nq = m if
+*  SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of
+*  nq-1 elementary reflectors, as returned by DSYTRD:
+*
+*  if UPLO = 'U', Q = H(nq-1) . . . H(2) H(1);
+*
+*  if UPLO = 'L', Q = H(1) H(2) . . . H(nq-1).
+*
+*  Arguments
+*  =========
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'L': apply Q or Q**T from the Left;
+*          = 'R': apply Q or Q**T from the Right.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U': Upper triangle of A contains elementary reflectors
+*                 from DSYTRD;
+*          = 'L': Lower triangle of A contains elementary reflectors
+*                 from DSYTRD.
+*
+*  TRANS   (input) CHARACTER*1
+*          = 'N':  No transpose, apply Q;
+*          = 'T':  Transpose, apply Q**T.
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix C. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix C. N >= 0.
+*
+*  A       (input) DOUBLE PRECISION array, dimension
+*                               (LDA,M) if SIDE = 'L'
+*                               (LDA,N) if SIDE = 'R'
+*          The vectors which define the elementary reflectors, as
+*          returned by DSYTRD.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.
+*          LDA >= max(1,M) if SIDE = 'L'; LDA >= max(1,N) if SIDE = 'R'.
+*
+*  TAU     (input) DOUBLE PRECISION array, dimension
+*                               (M-1) if SIDE = 'L'
+*                               (N-1) if SIDE = 'R'
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by DSYTRD.
+*
+*  C       (input/output) DOUBLE PRECISION array, dimension (LDC,N)
+*          On entry, the M-by-N matrix C.
+*          On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C. LDC >= max(1,M).
+*
+*  WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.
+*          If SIDE = 'L', LWORK >= max(1,N);
+*          if SIDE = 'R', LWORK >= max(1,M).
+*          For optimum performance LWORK >= N*NB if SIDE = 'L', and
+*          LWORK >= M*NB if SIDE = 'R', where NB is the optimal
+*          blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            LEFT, LQUERY, UPPER
+      INTEGER            I1, I2, IINFO, LWKOPT, MI, NB, NI, NQ, NW
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           LSAME, ILAENV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DORMQL, DORMQR, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      LEFT = LSAME( SIDE, 'L' )
+      UPPER = LSAME( UPLO, 'U' )
+      LQUERY = ( LWORK.EQ.-1 )
+*
+*     NQ is the order of Q and NW is the minimum dimension of WORK
+*
+      IF( LEFT ) THEN
+         NQ = M
+         NW = N
+      ELSE
+         NQ = N
+         NW = M
+      END IF
+      IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.LSAME( TRANS, 'N' ) .AND. .NOT.LSAME( TRANS, 'T' ) )
+     $          THEN
+         INFO = -3
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( LDA.LT.MAX( 1, NQ ) ) THEN
+         INFO = -7
+      ELSE IF( LDC.LT.MAX( 1, M ) ) THEN
+         INFO = -10
+      ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN
+         INFO = -12
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         IF( UPPER ) THEN
+            IF( LEFT ) THEN
+               NB = ILAENV( 1, 'DORMQL', SIDE // TRANS, M-1, N, M-1,
+     $              -1 )
+            ELSE
+               NB = ILAENV( 1, 'DORMQL', SIDE // TRANS, M, N-1, N-1,
+     $              -1 )
+            END IF
+         ELSE
+            IF( LEFT ) THEN
+               NB = ILAENV( 1, 'DORMQR', SIDE // TRANS, M-1, N, M-1,
+     $              -1 )
+            ELSE
+               NB = ILAENV( 1, 'DORMQR', SIDE // TRANS, M, N-1, N-1,
+     $              -1 )
+            END IF
+         END IF
+         LWKOPT = MAX( 1, NW )*NB
+         WORK( 1 ) = LWKOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DORMTR', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 .OR. NQ.EQ.1 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      IF( LEFT ) THEN
+         MI = M - 1
+         NI = N
+      ELSE
+         MI = M
+         NI = N - 1
+      END IF
+*
+      IF( UPPER ) THEN
+*
+*        Q was determined by a call to DSYTRD with UPLO = 'U'
+*
+         CALL DORMQL( SIDE, TRANS, MI, NI, NQ-1, A( 1, 2 ), LDA, TAU, C,
+     $                LDC, WORK, LWORK, IINFO )
+      ELSE
+*
+*        Q was determined by a call to DSYTRD with UPLO = 'L'
+*
+         IF( LEFT ) THEN
+            I1 = 2
+            I2 = 1
+         ELSE
+            I1 = 1
+            I2 = 2
+         END IF
+         CALL DORMQR( SIDE, TRANS, MI, NI, NQ-1, A( 2, 1 ), LDA, TAU,
+     $                C( I1, I2 ), LDC, WORK, LWORK, IINFO )
+      END IF
+      WORK( 1 ) = LWKOPT
+      RETURN
+*
+*     End of DORMTR
+*
+      END
diff --git a/costa/native/external/lapack/dpbcon.f b/costa/native/external/lapack/dpbcon.f
new file mode 100644
index 000000000..d8ce1ef2c
--- /dev/null
+++ b/costa/native/external/lapack/dpbcon.f
@@ -0,0 +1,188 @@
+      SUBROUTINE DPBCON( UPLO, N, KD, AB, LDAB, ANORM, RCOND, WORK,
+     $                   IWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, KD, LDAB, N
+      DOUBLE PRECISION   ANORM, RCOND
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      DOUBLE PRECISION   AB( LDAB, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DPBCON estimates the reciprocal of the condition number (in the
+*  1-norm) of a real symmetric positive definite band matrix using the
+*  Cholesky factorization A = U**T*U or A = L*L**T computed by DPBTRF.
+*
+*  An estimate is obtained for norm(inv(A)), and the reciprocal of the
+*  condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))).
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangular factor stored in AB;
+*          = 'L':  Lower triangular factor stored in AB.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  KD      (input) INTEGER
+*          The number of superdiagonals of the matrix A if UPLO = 'U',
+*          or the number of subdiagonals if UPLO = 'L'.  KD >= 0.
+*
+*  AB      (input) DOUBLE PRECISION array, dimension (LDAB,N)
+*          The triangular factor U or L from the Cholesky factorization
+*          A = U**T*U or A = L*L**T of the band matrix A, stored in the
+*          first KD+1 rows of the array.  The j-th column of U or L is
+*          stored in the j-th column of the array AB as follows:
+*          if UPLO ='U', AB(kd+1+i-j,j) = U(i,j) for max(1,j-kd)<=i<=j;
+*          if UPLO ='L', AB(1+i-j,j)    = L(i,j) for j<=i<=min(n,j+kd).
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KD+1.
+*
+*  ANORM   (input) DOUBLE PRECISION
+*          The 1-norm (or infinity-norm) of the symmetric band matrix A.
+*
+*  RCOND   (output) DOUBLE PRECISION
+*          The reciprocal of the condition number of the matrix A,
+*          computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an
+*          estimate of the 1-norm of inv(A) computed in this routine.
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (3*N)
+*
+*  IWORK   (workspace) INTEGER array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      CHARACTER          NORMIN
+      INTEGER            IX, KASE
+      DOUBLE PRECISION   AINVNM, SCALE, SCALEL, SCALEU, SMLNUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            IDAMAX
+      DOUBLE PRECISION   DLAMCH
+      EXTERNAL           LSAME, IDAMAX, DLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLACON, DLATBS, DRSCL, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( KD.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDAB.LT.KD+1 ) THEN
+         INFO = -5
+      ELSE IF( ANORM.LT.ZERO ) THEN
+         INFO = -6
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DPBCON', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      RCOND = ZERO
+      IF( N.EQ.0 ) THEN
+         RCOND = ONE
+         RETURN
+      ELSE IF( ANORM.EQ.ZERO ) THEN
+         RETURN
+      END IF
+*
+      SMLNUM = DLAMCH( 'Safe minimum' )
+*
+*     Estimate the 1-norm of the inverse.
+*
+      KASE = 0
+      NORMIN = 'N'
+   10 CONTINUE
+      CALL DLACON( N, WORK( N+1 ), WORK, IWORK, AINVNM, KASE )
+      IF( KASE.NE.0 ) THEN
+         IF( UPPER ) THEN
+*
+*           Multiply by inv(U').
+*
+            CALL DLATBS( 'Upper', 'Transpose', 'Non-unit', NORMIN, N,
+     $                   KD, AB, LDAB, WORK, SCALEL, WORK( 2*N+1 ),
+     $                   INFO )
+            NORMIN = 'Y'
+*
+*           Multiply by inv(U).
+*
+            CALL DLATBS( 'Upper', 'No transpose', 'Non-unit', NORMIN, N,
+     $                   KD, AB, LDAB, WORK, SCALEU, WORK( 2*N+1 ),
+     $                   INFO )
+         ELSE
+*
+*           Multiply by inv(L).
+*
+            CALL DLATBS( 'Lower', 'No transpose', 'Non-unit', NORMIN, N,
+     $                   KD, AB, LDAB, WORK, SCALEL, WORK( 2*N+1 ),
+     $                   INFO )
+            NORMIN = 'Y'
+*
+*           Multiply by inv(L').
+*
+            CALL DLATBS( 'Lower', 'Transpose', 'Non-unit', NORMIN, N,
+     $                   KD, AB, LDAB, WORK, SCALEU, WORK( 2*N+1 ),
+     $                   INFO )
+         END IF
+*
+*        Multiply by 1/SCALE if doing so will not cause overflow.
+*
+         SCALE = SCALEL*SCALEU
+         IF( SCALE.NE.ONE ) THEN
+            IX = IDAMAX( N, WORK, 1 )
+            IF( SCALE.LT.ABS( WORK( IX ) )*SMLNUM .OR. SCALE.EQ.ZERO )
+     $         GO TO 20
+            CALL DRSCL( N, SCALE, WORK, 1 )
+         END IF
+         GO TO 10
+      END IF
+*
+*     Compute the estimate of the reciprocal condition number.
+*
+      IF( AINVNM.NE.ZERO )
+     $   RCOND = ( ONE / AINVNM ) / ANORM
+*
+   20 CONTINUE
+*
+      RETURN
+*
+*     End of DPBCON
+*
+      END
diff --git a/costa/native/external/lapack/dpbequ.f b/costa/native/external/lapack/dpbequ.f
new file mode 100644
index 000000000..b82fbf543
--- /dev/null
+++ b/costa/native/external/lapack/dpbequ.f
@@ -0,0 +1,167 @@
+      SUBROUTINE DPBEQU( UPLO, N, KD, AB, LDAB, S, SCOND, AMAX, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, KD, LDAB, N
+      DOUBLE PRECISION   AMAX, SCOND
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   AB( LDAB, * ), S( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DPBEQU computes row and column scalings intended to equilibrate a
+*  symmetric positive definite band matrix A and reduce its condition
+*  number (with respect to the two-norm).  S contains the scale factors,
+*  S(i) = 1/sqrt(A(i,i)), chosen so that the scaled matrix B with
+*  elements B(i,j) = S(i)*A(i,j)*S(j) has ones on the diagonal.  This
+*  choice of S puts the condition number of B within a factor N of the
+*  smallest possible condition number over all possible diagonal
+*  scalings.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangular of A is stored;
+*          = 'L':  Lower triangular of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  KD      (input) INTEGER
+*          The number of superdiagonals of the matrix A if UPLO = 'U',
+*          or the number of subdiagonals if UPLO = 'L'.  KD >= 0.
+*
+*  AB      (input) DOUBLE PRECISION array, dimension (LDAB,N)
+*          The upper or lower triangle of the symmetric band matrix A,
+*          stored in the first KD+1 rows of the array.  The j-th column
+*          of A is stored in the j-th column of the array AB as follows:
+*          if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j;
+*          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+kd).
+*
+*  LDAB     (input) INTEGER
+*          The leading dimension of the array A.  LDAB >= KD+1.
+*
+*  S       (output) DOUBLE PRECISION array, dimension (N)
+*          If INFO = 0, S contains the scale factors for A.
+*
+*  SCOND   (output) DOUBLE PRECISION
+*          If INFO = 0, S contains the ratio of the smallest S(i) to
+*          the largest S(i).  If SCOND >= 0.1 and AMAX is neither too
+*          large nor too small, it is not worth scaling by S.
+*
+*  AMAX    (output) DOUBLE PRECISION
+*          Absolute value of largest matrix element.  If AMAX is very
+*          close to overflow or very close to underflow, the matrix
+*          should be scaled.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          > 0:  if INFO = i, the i-th diagonal element is nonpositive.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            I, J
+      DOUBLE PRECISION   SMIN
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( KD.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDAB.LT.KD+1 ) THEN
+         INFO = -5
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DPBEQU', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 ) THEN
+         SCOND = ONE
+         AMAX = ZERO
+         RETURN
+      END IF
+*
+      IF( UPPER ) THEN
+         J = KD + 1
+      ELSE
+         J = 1
+      END IF
+*
+*     Initialize SMIN and AMAX.
+*
+      S( 1 ) = AB( J, 1 )
+      SMIN = S( 1 )
+      AMAX = S( 1 )
+*
+*     Find the minimum and maximum diagonal elements.
+*
+      DO 10 I = 2, N
+         S( I ) = AB( J, I )
+         SMIN = MIN( SMIN, S( I ) )
+         AMAX = MAX( AMAX, S( I ) )
+   10 CONTINUE
+*
+      IF( SMIN.LE.ZERO ) THEN
+*
+*        Find the first non-positive diagonal element and return.
+*
+         DO 20 I = 1, N
+            IF( S( I ).LE.ZERO ) THEN
+               INFO = I
+               RETURN
+            END IF
+   20    CONTINUE
+      ELSE
+*
+*        Set the scale factors to the reciprocals
+*        of the diagonal elements.
+*
+         DO 30 I = 1, N
+            S( I ) = ONE / SQRT( S( I ) )
+   30    CONTINUE
+*
+*        Compute SCOND = min(S(I)) / max(S(I))
+*
+         SCOND = SQRT( SMIN ) / SQRT( AMAX )
+      END IF
+      RETURN
+*
+*     End of DPBEQU
+*
+      END
diff --git a/costa/native/external/lapack/dpbrfs.f b/costa/native/external/lapack/dpbrfs.f
new file mode 100644
index 000000000..62d747282
--- /dev/null
+++ b/costa/native/external/lapack/dpbrfs.f
@@ -0,0 +1,337 @@
+      SUBROUTINE DPBRFS( UPLO, N, KD, NRHS, AB, LDAB, AFB, LDAFB, B,
+     $                   LDB, X, LDX, FERR, BERR, WORK, IWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, KD, LDAB, LDAFB, LDB, LDX, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      DOUBLE PRECISION   AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ),
+     $                   BERR( * ), FERR( * ), WORK( * ), X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DPBRFS improves the computed solution to a system of linear
+*  equations when the coefficient matrix is symmetric positive definite
+*  and banded, and provides error bounds and backward error estimates
+*  for the solution.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  KD      (input) INTEGER
+*          The number of superdiagonals of the matrix A if UPLO = 'U',
+*          or the number of subdiagonals if UPLO = 'L'.  KD >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrices B and X.  NRHS >= 0.
+*
+*  AB      (input) DOUBLE PRECISION array, dimension (LDAB,N)
+*          The upper or lower triangle of the symmetric band matrix A,
+*          stored in the first KD+1 rows of the array.  The j-th column
+*          of A is stored in the j-th column of the array AB as follows:
+*          if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j;
+*          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+kd).
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KD+1.
+*
+*  AFB     (input) DOUBLE PRECISION array, dimension (LDAFB,N)
+*          The triangular factor U or L from the Cholesky factorization
+*          A = U**T*U or A = L*L**T of the band matrix A as computed by
+*          DPBTRF, in the same storage format as A (see AB).
+*
+*  LDAFB   (input) INTEGER
+*          The leading dimension of the array AFB.  LDAFB >= KD+1.
+*
+*  B       (input) DOUBLE PRECISION array, dimension (LDB,NRHS)
+*          The right hand side matrix B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  X       (input/output) DOUBLE PRECISION array, dimension (LDX,NRHS)
+*          On entry, the solution matrix X, as computed by DPBTRS.
+*          On exit, the improved solution matrix X.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(1,N).
+*
+*  FERR    (output) DOUBLE PRECISION array, dimension (NRHS)
+*          The estimated forward error bound for each solution vector
+*          X(j) (the j-th column of the solution matrix X).
+*          If XTRUE is the true solution corresponding to X(j), FERR(j)
+*          is an estimated upper bound for the magnitude of the largest
+*          element in (X(j) - XTRUE) divided by the magnitude of the
+*          largest element in X(j).  The estimate is as reliable as
+*          the estimate for RCOND, and is almost always a slight
+*          overestimate of the true error.
+*
+*  BERR    (output) DOUBLE PRECISION array, dimension (NRHS)
+*          The componentwise relative backward error of each solution
+*          vector X(j) (i.e., the smallest relative change in
+*          any element of A or B that makes X(j) an exact solution).
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (3*N)
+*
+*  IWORK   (workspace) INTEGER array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Internal Parameters
+*  ===================
+*
+*  ITMAX is the maximum number of steps of iterative refinement.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            ITMAX
+      PARAMETER          ( ITMAX = 5 )
+      DOUBLE PRECISION   ZERO
+      PARAMETER          ( ZERO = 0.0D+0 )
+      DOUBLE PRECISION   ONE
+      PARAMETER          ( ONE = 1.0D+0 )
+      DOUBLE PRECISION   TWO
+      PARAMETER          ( TWO = 2.0D+0 )
+      DOUBLE PRECISION   THREE
+      PARAMETER          ( THREE = 3.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            COUNT, I, J, K, KASE, L, NZ
+      DOUBLE PRECISION   EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DAXPY, DCOPY, DLACON, DPBTRS, DSBMV, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DLAMCH
+      EXTERNAL           LSAME, DLAMCH
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( KD.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDAB.LT.KD+1 ) THEN
+         INFO = -6
+      ELSE IF( LDAFB.LT.KD+1 ) THEN
+         INFO = -8
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -10
+      ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+         INFO = -12
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DPBRFS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN
+         DO 10 J = 1, NRHS
+            FERR( J ) = ZERO
+            BERR( J ) = ZERO
+   10    CONTINUE
+         RETURN
+      END IF
+*
+*     NZ = maximum number of nonzero elements in each row of A, plus 1
+*
+      NZ = MIN( N+1, 2*KD+2 )
+      EPS = DLAMCH( 'Epsilon' )
+      SAFMIN = DLAMCH( 'Safe minimum' )
+      SAFE1 = NZ*SAFMIN
+      SAFE2 = SAFE1 / EPS
+*
+*     Do for each right hand side
+*
+      DO 140 J = 1, NRHS
+*
+         COUNT = 1
+         LSTRES = THREE
+   20    CONTINUE
+*
+*        Loop until stopping criterion is satisfied.
+*
+*        Compute residual R = B - A * X
+*
+         CALL DCOPY( N, B( 1, J ), 1, WORK( N+1 ), 1 )
+         CALL DSBMV( UPLO, N, KD, -ONE, AB, LDAB, X( 1, J ), 1, ONE,
+     $               WORK( N+1 ), 1 )
+*
+*        Compute componentwise relative backward error from formula
+*
+*        max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) )
+*
+*        where abs(Z) is the componentwise absolute value of the matrix
+*        or vector Z.  If the i-th component of the denominator is less
+*        than SAFE2, then SAFE1 is added to the i-th components of the
+*        numerator and denominator before dividing.
+*
+         DO 30 I = 1, N
+            WORK( I ) = ABS( B( I, J ) )
+   30    CONTINUE
+*
+*        Compute abs(A)*abs(X) + abs(B).
+*
+         IF( UPPER ) THEN
+            DO 50 K = 1, N
+               S = ZERO
+               XK = ABS( X( K, J ) )
+               L = KD + 1 - K
+               DO 40 I = MAX( 1, K-KD ), K - 1
+                  WORK( I ) = WORK( I ) + ABS( AB( L+I, K ) )*XK
+                  S = S + ABS( AB( L+I, K ) )*ABS( X( I, J ) )
+   40          CONTINUE
+               WORK( K ) = WORK( K ) + ABS( AB( KD+1, K ) )*XK + S
+   50       CONTINUE
+         ELSE
+            DO 70 K = 1, N
+               S = ZERO
+               XK = ABS( X( K, J ) )
+               WORK( K ) = WORK( K ) + ABS( AB( 1, K ) )*XK
+               L = 1 - K
+               DO 60 I = K + 1, MIN( N, K+KD )
+                  WORK( I ) = WORK( I ) + ABS( AB( L+I, K ) )*XK
+                  S = S + ABS( AB( L+I, K ) )*ABS( X( I, J ) )
+   60          CONTINUE
+               WORK( K ) = WORK( K ) + S
+   70       CONTINUE
+         END IF
+         S = ZERO
+         DO 80 I = 1, N
+            IF( WORK( I ).GT.SAFE2 ) THEN
+               S = MAX( S, ABS( WORK( N+I ) ) / WORK( I ) )
+            ELSE
+               S = MAX( S, ( ABS( WORK( N+I ) )+SAFE1 ) /
+     $             ( WORK( I )+SAFE1 ) )
+            END IF
+   80    CONTINUE
+         BERR( J ) = S
+*
+*        Test stopping criterion. Continue iterating if
+*           1) The residual BERR(J) is larger than machine epsilon, and
+*           2) BERR(J) decreased by at least a factor of 2 during the
+*              last iteration, and
+*           3) At most ITMAX iterations tried.
+*
+         IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND.
+     $       COUNT.LE.ITMAX ) THEN
+*
+*           Update solution and try again.
+*
+            CALL DPBTRS( UPLO, N, KD, 1, AFB, LDAFB, WORK( N+1 ), N,
+     $                   INFO )
+            CALL DAXPY( N, ONE, WORK( N+1 ), 1, X( 1, J ), 1 )
+            LSTRES = BERR( J )
+            COUNT = COUNT + 1
+            GO TO 20
+         END IF
+*
+*        Bound error from formula
+*
+*        norm(X - XTRUE) / norm(X) .le. FERR =
+*        norm( abs(inv(A))*
+*           ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X)
+*
+*        where
+*          norm(Z) is the magnitude of the largest component of Z
+*          inv(A) is the inverse of A
+*          abs(Z) is the componentwise absolute value of the matrix or
+*             vector Z
+*          NZ is the maximum number of nonzeros in any row of A, plus 1
+*          EPS is machine epsilon
+*
+*        The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B))
+*        is incremented by SAFE1 if the i-th component of
+*        abs(A)*abs(X) + abs(B) is less than SAFE2.
+*
+*        Use DLACON to estimate the infinity-norm of the matrix
+*           inv(A) * diag(W),
+*        where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) )))
+*
+         DO 90 I = 1, N
+            IF( WORK( I ).GT.SAFE2 ) THEN
+               WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I )
+            ELSE
+               WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + SAFE1
+            END IF
+   90    CONTINUE
+*
+         KASE = 0
+  100    CONTINUE
+         CALL DLACON( N, WORK( 2*N+1 ), WORK( N+1 ), IWORK, FERR( J ),
+     $                KASE )
+         IF( KASE.NE.0 ) THEN
+            IF( KASE.EQ.1 ) THEN
+*
+*              Multiply by diag(W)*inv(A').
+*
+               CALL DPBTRS( UPLO, N, KD, 1, AFB, LDAFB, WORK( N+1 ), N,
+     $                      INFO )
+               DO 110 I = 1, N
+                  WORK( N+I ) = WORK( N+I )*WORK( I )
+  110          CONTINUE
+            ELSE IF( KASE.EQ.2 ) THEN
+*
+*              Multiply by inv(A)*diag(W).
+*
+               DO 120 I = 1, N
+                  WORK( N+I ) = WORK( N+I )*WORK( I )
+  120          CONTINUE
+               CALL DPBTRS( UPLO, N, KD, 1, AFB, LDAFB, WORK( N+1 ), N,
+     $                      INFO )
+            END IF
+            GO TO 100
+         END IF
+*
+*        Normalize error.
+*
+         LSTRES = ZERO
+         DO 130 I = 1, N
+            LSTRES = MAX( LSTRES, ABS( X( I, J ) ) )
+  130    CONTINUE
+         IF( LSTRES.NE.ZERO )
+     $      FERR( J ) = FERR( J ) / LSTRES
+*
+  140 CONTINUE
+*
+      RETURN
+*
+*     End of DPBRFS
+*
+      END
diff --git a/costa/native/external/lapack/dpbstf.f b/costa/native/external/lapack/dpbstf.f
new file mode 100644
index 000000000..f88ffefa1
--- /dev/null
+++ b/costa/native/external/lapack/dpbstf.f
@@ -0,0 +1,251 @@
+      SUBROUTINE DPBSTF( UPLO, N, KD, AB, LDAB, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, KD, LDAB, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   AB( LDAB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DPBSTF computes a split Cholesky factorization of a real
+*  symmetric positive definite band matrix A.
+*
+*  This routine is designed to be used in conjunction with DSBGST.
+*
+*  The factorization has the form  A = S**T*S  where S is a band matrix
+*  of the same bandwidth as A and the following structure:
+*
+*    S = ( U    )
+*        ( M  L )
+*
+*  where U is upper triangular of order m = (n+kd)/2, and L is lower
+*  triangular of order n-m.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  KD      (input) INTEGER
+*          The number of superdiagonals of the matrix A if UPLO = 'U',
+*          or the number of subdiagonals if UPLO = 'L'.  KD >= 0.
+*
+*  AB      (input/output) DOUBLE PRECISION array, dimension (LDAB,N)
+*          On entry, the upper or lower triangle of the symmetric band
+*          matrix A, stored in the first kd+1 rows of the array.  The
+*          j-th column of A is stored in the j-th column of the array AB
+*          as follows:
+*          if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j;
+*          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+kd).
+*
+*          On exit, if INFO = 0, the factor S from the split Cholesky
+*          factorization A = S**T*S. See Further Details.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KD+1.
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*          > 0: if INFO = i, the factorization could not be completed,
+*               because the updated element a(i,i) was negative; the
+*               matrix A is not positive definite.
+*
+*  Further Details
+*  ===============
+*
+*  The band storage scheme is illustrated by the following example, when
+*  N = 7, KD = 2:
+*
+*  S = ( s11  s12  s13                     )
+*      (      s22  s23  s24                )
+*      (           s33  s34                )
+*      (                s44                )
+*      (           s53  s54  s55           )
+*      (                s64  s65  s66      )
+*      (                     s75  s76  s77 )
+*
+*  If UPLO = 'U', the array AB holds:
+*
+*  on entry:                          on exit:
+*
+*   *    *   a13  a24  a35  a46  a57   *    *   s13  s24  s53  s64  s75
+*   *   a12  a23  a34  a45  a56  a67   *   s12  s23  s34  s54  s65  s76
+*  a11  a22  a33  a44  a55  a66  a77  s11  s22  s33  s44  s55  s66  s77
+*
+*  If UPLO = 'L', the array AB holds:
+*
+*  on entry:                          on exit:
+*
+*  a11  a22  a33  a44  a55  a66  a77  s11  s22  s33  s44  s55  s66  s77
+*  a21  a32  a43  a54  a65  a76   *   s12  s23  s34  s54  s65  s76   *
+*  a31  a42  a53  a64  a64   *    *   s13  s24  s53  s64  s75   *    *
+*
+*  Array elements marked * are not used by the routine.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            J, KLD, KM, M
+      DOUBLE PRECISION   AJJ
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DSCAL, DSYR, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( KD.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDAB.LT.KD+1 ) THEN
+         INFO = -5
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DPBSTF', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      KLD = MAX( 1, LDAB-1 )
+*
+*     Set the splitting point m.
+*
+      M = ( N+KD ) / 2
+*
+      IF( UPPER ) THEN
+*
+*        Factorize A(m+1:n,m+1:n) as L**T*L, and update A(1:m,1:m).
+*
+         DO 10 J = N, M + 1, -1
+*
+*           Compute s(j,j) and test for non-positive-definiteness.
+*
+            AJJ = AB( KD+1, J )
+            IF( AJJ.LE.ZERO )
+     $         GO TO 50
+            AJJ = SQRT( AJJ )
+            AB( KD+1, J ) = AJJ
+            KM = MIN( J-1, KD )
+*
+*           Compute elements j-km:j-1 of the j-th column and update the
+*           the leading submatrix within the band.
+*
+            CALL DSCAL( KM, ONE / AJJ, AB( KD+1-KM, J ), 1 )
+            CALL DSYR( 'Upper', KM, -ONE, AB( KD+1-KM, J ), 1,
+     $                 AB( KD+1, J-KM ), KLD )
+   10    CONTINUE
+*
+*        Factorize the updated submatrix A(1:m,1:m) as U**T*U.
+*
+         DO 20 J = 1, M
+*
+*           Compute s(j,j) and test for non-positive-definiteness.
+*
+            AJJ = AB( KD+1, J )
+            IF( AJJ.LE.ZERO )
+     $         GO TO 50
+            AJJ = SQRT( AJJ )
+            AB( KD+1, J ) = AJJ
+            KM = MIN( KD, M-J )
+*
+*           Compute elements j+1:j+km of the j-th row and update the
+*           trailing submatrix within the band.
+*
+            IF( KM.GT.0 ) THEN
+               CALL DSCAL( KM, ONE / AJJ, AB( KD, J+1 ), KLD )
+               CALL DSYR( 'Upper', KM, -ONE, AB( KD, J+1 ), KLD,
+     $                    AB( KD+1, J+1 ), KLD )
+            END IF
+   20    CONTINUE
+      ELSE
+*
+*        Factorize A(m+1:n,m+1:n) as L**T*L, and update A(1:m,1:m).
+*
+         DO 30 J = N, M + 1, -1
+*
+*           Compute s(j,j) and test for non-positive-definiteness.
+*
+            AJJ = AB( 1, J )
+            IF( AJJ.LE.ZERO )
+     $         GO TO 50
+            AJJ = SQRT( AJJ )
+            AB( 1, J ) = AJJ
+            KM = MIN( J-1, KD )
+*
+*           Compute elements j-km:j-1 of the j-th row and update the
+*           trailing submatrix within the band.
+*
+            CALL DSCAL( KM, ONE / AJJ, AB( KM+1, J-KM ), KLD )
+            CALL DSYR( 'Lower', KM, -ONE, AB( KM+1, J-KM ), KLD,
+     $                 AB( 1, J-KM ), KLD )
+   30    CONTINUE
+*
+*        Factorize the updated submatrix A(1:m,1:m) as U**T*U.
+*
+         DO 40 J = 1, M
+*
+*           Compute s(j,j) and test for non-positive-definiteness.
+*
+            AJJ = AB( 1, J )
+            IF( AJJ.LE.ZERO )
+     $         GO TO 50
+            AJJ = SQRT( AJJ )
+            AB( 1, J ) = AJJ
+            KM = MIN( KD, M-J )
+*
+*           Compute elements j+1:j+km of the j-th column and update the
+*           trailing submatrix within the band.
+*
+            IF( KM.GT.0 ) THEN
+               CALL DSCAL( KM, ONE / AJJ, AB( 2, J ), 1 )
+               CALL DSYR( 'Lower', KM, -ONE, AB( 2, J ), 1,
+     $                    AB( 1, J+1 ), KLD )
+            END IF
+   40    CONTINUE
+      END IF
+      RETURN
+*
+   50 CONTINUE
+      INFO = J
+      RETURN
+*
+*     End of DPBSTF
+*
+      END
diff --git a/costa/native/external/lapack/dpbsv.f b/costa/native/external/lapack/dpbsv.f
new file mode 100644
index 000000000..79ddd4387
--- /dev/null
+++ b/costa/native/external/lapack/dpbsv.f
@@ -0,0 +1,152 @@
+      SUBROUTINE DPBSV( UPLO, N, KD, NRHS, AB, LDAB, B, LDB, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, KD, LDAB, LDB, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   AB( LDAB, * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DPBSV computes the solution to a real system of linear equations
+*     A * X = B,
+*  where A is an N-by-N symmetric positive definite band matrix and X
+*  and B are N-by-NRHS matrices.
+*
+*  The Cholesky decomposition is used to factor A as
+*     A = U**T * U,  if UPLO = 'U', or
+*     A = L * L**T,  if UPLO = 'L',
+*  where U is an upper triangular band matrix, and L is a lower
+*  triangular band matrix, with the same number of superdiagonals or
+*  subdiagonals as A.  The factored form of A is then used to solve the
+*  system of equations A * X = B.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The number of linear equations, i.e., the order of the
+*          matrix A.  N >= 0.
+*
+*  KD      (input) INTEGER
+*          The number of superdiagonals of the matrix A if UPLO = 'U',
+*          or the number of subdiagonals if UPLO = 'L'.  KD >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  AB      (input/output) DOUBLE PRECISION array, dimension (LDAB,N)
+*          On entry, the upper or lower triangle of the symmetric band
+*          matrix A, stored in the first KD+1 rows of the array.  The
+*          j-th column of A is stored in the j-th column of the array AB
+*          as follows:
+*          if UPLO = 'U', AB(KD+1+i-j,j) = A(i,j) for max(1,j-KD)<=i<=j;
+*          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(N,j+KD).
+*          See below for further details.
+*
+*          On exit, if INFO = 0, the triangular factor U or L from the
+*          Cholesky factorization A = U**T*U or A = L*L**T of the band
+*          matrix A, in the same storage format as A.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KD+1.
+*
+*  B       (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS)
+*          On entry, the N-by-NRHS right hand side matrix B.
+*          On exit, if INFO = 0, the N-by-NRHS solution matrix X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, the leading minor of order i of A is not
+*                positive definite, so the factorization could not be
+*                completed, and the solution has not been computed.
+*
+*  Further Details
+*  ===============
+*
+*  The band storage scheme is illustrated by the following example, when
+*  N = 6, KD = 2, and UPLO = 'U':
+*
+*  On entry:                       On exit:
+*
+*      *    *   a13  a24  a35  a46      *    *   u13  u24  u35  u46
+*      *   a12  a23  a34  a45  a56      *   u12  u23  u34  u45  u56
+*     a11  a22  a33  a44  a55  a66     u11  u22  u33  u44  u55  u66
+*
+*  Similarly, if UPLO = 'L' the format of A is as follows:
+*
+*  On entry:                       On exit:
+*
+*     a11  a22  a33  a44  a55  a66     l11  l22  l33  l44  l55  l66
+*     a21  a32  a43  a54  a65   *      l21  l32  l43  l54  l65   *
+*     a31  a42  a53  a64   *    *      l31  l42  l53  l64   *    *
+*
+*  Array elements marked * are not used by the routine.
+*
+*  =====================================================================
+*
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DPBTRF, DPBTRS, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( KD.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDAB.LT.KD+1 ) THEN
+         INFO = -6
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -8
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DPBSV ', -INFO )
+         RETURN
+      END IF
+*
+*     Compute the Cholesky factorization A = U'*U or A = L*L'.
+*
+      CALL DPBTRF( UPLO, N, KD, AB, LDAB, INFO )
+      IF( INFO.EQ.0 ) THEN
+*
+*        Solve the system A*X = B, overwriting B with X.
+*
+         CALL DPBTRS( UPLO, N, KD, NRHS, AB, LDAB, B, LDB, INFO )
+*
+      END IF
+      RETURN
+*
+*     End of DPBSV
+*
+      END
diff --git a/costa/native/external/lapack/dpbsvx.f b/costa/native/external/lapack/dpbsvx.f
new file mode 100644
index 000000000..66406ce70
--- /dev/null
+++ b/costa/native/external/lapack/dpbsvx.f
@@ -0,0 +1,424 @@
+      SUBROUTINE DPBSVX( FACT, UPLO, N, KD, NRHS, AB, LDAB, AFB, LDAFB,
+     $                   EQUED, S, B, LDB, X, LDX, RCOND, FERR, BERR,
+     $                   WORK, IWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          EQUED, FACT, UPLO
+      INTEGER            INFO, KD, LDAB, LDAFB, LDB, LDX, N, NRHS
+      DOUBLE PRECISION   RCOND
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      DOUBLE PRECISION   AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ),
+     $                   BERR( * ), FERR( * ), S( * ), WORK( * ),
+     $                   X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DPBSVX uses the Cholesky factorization A = U**T*U or A = L*L**T to
+*  compute the solution to a real system of linear equations
+*     A * X = B,
+*  where A is an N-by-N symmetric positive definite band matrix and X
+*  and B are N-by-NRHS matrices.
+*
+*  Error bounds on the solution and a condition estimate are also
+*  provided.
+*
+*  Description
+*  ===========
+*
+*  The following steps are performed:
+*
+*  1. If FACT = 'E', real scaling factors are computed to equilibrate
+*     the system:
+*        diag(S) * A * diag(S) * inv(diag(S)) * X = diag(S) * B
+*     Whether or not the system will be equilibrated depends on the
+*     scaling of the matrix A, but if equilibration is used, A is
+*     overwritten by diag(S)*A*diag(S) and B by diag(S)*B.
+*
+*  2. If FACT = 'N' or 'E', the Cholesky decomposition is used to
+*     factor the matrix A (after equilibration if FACT = 'E') as
+*        A = U**T * U,  if UPLO = 'U', or
+*        A = L * L**T,  if UPLO = 'L',
+*     where U is an upper triangular band matrix, and L is a lower
+*     triangular band matrix.
+*
+*  3. If the leading i-by-i principal minor is not positive definite,
+*     then the routine returns with INFO = i. Otherwise, the factored
+*     form of A is used to estimate the condition number of the matrix
+*     A.  If the reciprocal of the condition number is less than machine
+*     precision, INFO = N+1 is returned as a warning, but the routine
+*     still goes on to solve for X and compute error bounds as
+*     described below.
+*
+*  4. The system of equations is solved for X using the factored form
+*     of A.
+*
+*  5. Iterative refinement is applied to improve the computed solution
+*     matrix and calculate error bounds and backward error estimates
+*     for it.
+*
+*  6. If equilibration was used, the matrix X is premultiplied by
+*     diag(S) so that it solves the original system before
+*     equilibration.
+*
+*  Arguments
+*  =========
+*
+*  FACT    (input) CHARACTER*1
+*          Specifies whether or not the factored form of the matrix A is
+*          supplied on entry, and if not, whether the matrix A should be
+*          equilibrated before it is factored.
+*          = 'F':  On entry, AFB contains the factored form of A.
+*                  If EQUED = 'Y', the matrix A has been equilibrated
+*                  with scaling factors given by S.  AB and AFB will not
+*                  be modified.
+*          = 'N':  The matrix A will be copied to AFB and factored.
+*          = 'E':  The matrix A will be equilibrated if necessary, then
+*                  copied to AFB and factored.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The number of linear equations, i.e., the order of the
+*          matrix A.  N >= 0.
+*
+*  KD      (input) INTEGER
+*          The number of superdiagonals of the matrix A if UPLO = 'U',
+*          or the number of subdiagonals if UPLO = 'L'.  KD >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right-hand sides, i.e., the number of columns
+*          of the matrices B and X.  NRHS >= 0.
+*
+*  AB      (input/output) DOUBLE PRECISION array, dimension (LDAB,N)
+*          On entry, the upper or lower triangle of the symmetric band
+*          matrix A, stored in the first KD+1 rows of the array, except
+*          if FACT = 'F' and EQUED = 'Y', then A must contain the
+*          equilibrated matrix diag(S)*A*diag(S).  The j-th column of A
+*          is stored in the j-th column of the array AB as follows:
+*          if UPLO = 'U', AB(KD+1+i-j,j) = A(i,j) for max(1,j-KD)<=i<=j;
+*          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(N,j+KD).
+*          See below for further details.
+*
+*          On exit, if FACT = 'E' and EQUED = 'Y', A is overwritten by
+*          diag(S)*A*diag(S).
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array A.  LDAB >= KD+1.
+*
+*  AFB     (input or output) DOUBLE PRECISION array, dimension (LDAFB,N)
+*          If FACT = 'F', then AFB is an input argument and on entry
+*          contains the triangular factor U or L from the Cholesky
+*          factorization A = U**T*U or A = L*L**T of the band matrix
+*          A, in the same storage format as A (see AB).  If EQUED = 'Y',
+*          then AFB is the factored form of the equilibrated matrix A.
+*
+*          If FACT = 'N', then AFB is an output argument and on exit
+*          returns the triangular factor U or L from the Cholesky
+*          factorization A = U**T*U or A = L*L**T.
+*
+*          If FACT = 'E', then AFB is an output argument and on exit
+*          returns the triangular factor U or L from the Cholesky
+*          factorization A = U**T*U or A = L*L**T of the equilibrated
+*          matrix A (see the description of A for the form of the
+*          equilibrated matrix).
+*
+*  LDAFB   (input) INTEGER
+*          The leading dimension of the array AFB.  LDAFB >= KD+1.
+*
+*  EQUED   (input or output) CHARACTER*1
+*          Specifies the form of equilibration that was done.
+*          = 'N':  No equilibration (always true if FACT = 'N').
+*          = 'Y':  Equilibration was done, i.e., A has been replaced by
+*                  diag(S) * A * diag(S).
+*          EQUED is an input argument if FACT = 'F'; otherwise, it is an
+*          output argument.
+*
+*  S       (input or output) DOUBLE PRECISION array, dimension (N)
+*          The scale factors for A; not accessed if EQUED = 'N'.  S is
+*          an input argument if FACT = 'F'; otherwise, S is an output
+*          argument.  If FACT = 'F' and EQUED = 'Y', each element of S
+*          must be positive.
+*
+*  B       (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS)
+*          On entry, the N-by-NRHS right hand side matrix B.
+*          On exit, if EQUED = 'N', B is not modified; if EQUED = 'Y',
+*          B is overwritten by diag(S) * B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  X       (output) DOUBLE PRECISION array, dimension (LDX,NRHS)
+*          If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X to
+*          the original system of equations.  Note that if EQUED = 'Y',
+*          A and B are modified on exit, and the solution to the
+*          equilibrated system is inv(diag(S))*X.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(1,N).
+*
+*  RCOND   (output) DOUBLE PRECISION
+*          The estimate of the reciprocal condition number of the matrix
+*          A after equilibration (if done).  If RCOND is less than the
+*          machine precision (in particular, if RCOND = 0), the matrix
+*          is singular to working precision.  This condition is
+*          indicated by a return code of INFO > 0.
+*
+*  FERR    (output) DOUBLE PRECISION array, dimension (NRHS)
+*          The estimated forward error bound for each solution vector
+*          X(j) (the j-th column of the solution matrix X).
+*          If XTRUE is the true solution corresponding to X(j), FERR(j)
+*          is an estimated upper bound for the magnitude of the largest
+*          element in (X(j) - XTRUE) divided by the magnitude of the
+*          largest element in X(j).  The estimate is as reliable as
+*          the estimate for RCOND, and is almost always a slight
+*          overestimate of the true error.
+*
+*  BERR    (output) DOUBLE PRECISION array, dimension (NRHS)
+*          The componentwise relative backward error of each solution
+*          vector X(j) (i.e., the smallest relative change in
+*          any element of A or B that makes X(j) an exact solution).
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (3*N)
+*
+*  IWORK   (workspace) INTEGER array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, and i is
+*                <= N:  the leading minor of order i of A is
+*                       not positive definite, so the factorization
+*                       could not be completed, and the solution has not
+*                       been computed. RCOND = 0 is returned.
+*                = N+1: U is nonsingular, but RCOND is less than machine
+*                       precision, meaning that the matrix is singular
+*                       to working precision.  Nevertheless, the
+*                       solution and error bounds are computed because
+*                       there are a number of situations where the
+*                       computed solution can be more accurate than the
+*                       value of RCOND would suggest.
+*
+*  Further Details
+*  ===============
+*
+*  The band storage scheme is illustrated by the following example, when
+*  N = 6, KD = 2, and UPLO = 'U':
+*
+*  Two-dimensional storage of the symmetric matrix A:
+*
+*     a11  a12  a13
+*          a22  a23  a24
+*               a33  a34  a35
+*                    a44  a45  a46
+*                         a55  a56
+*     (aij=conjg(aji))         a66
+*
+*  Band storage of the upper triangle of A:
+*
+*      *    *   a13  a24  a35  a46
+*      *   a12  a23  a34  a45  a56
+*     a11  a22  a33  a44  a55  a66
+*
+*  Similarly, if UPLO = 'L' the format of A is as follows:
+*
+*     a11  a22  a33  a44  a55  a66
+*     a21  a32  a43  a54  a65   *
+*     a31  a42  a53  a64   *    *
+*
+*  Array elements marked * are not used by the routine.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            EQUIL, NOFACT, RCEQU, UPPER
+      INTEGER            I, INFEQU, J, J1, J2
+      DOUBLE PRECISION   AMAX, ANORM, BIGNUM, SCOND, SMAX, SMIN, SMLNUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DLAMCH, DLANSB
+      EXTERNAL           LSAME, DLAMCH, DLANSB
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DCOPY, DLACPY, DLAQSB, DPBCON, DPBEQU, DPBRFS,
+     $                   DPBTRF, DPBTRS, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+      NOFACT = LSAME( FACT, 'N' )
+      EQUIL = LSAME( FACT, 'E' )
+      UPPER = LSAME( UPLO, 'U' )
+      IF( NOFACT .OR. EQUIL ) THEN
+         EQUED = 'N'
+         RCEQU = .FALSE.
+      ELSE
+         RCEQU = LSAME( EQUED, 'Y' )
+         SMLNUM = DLAMCH( 'Safe minimum' )
+         BIGNUM = ONE / SMLNUM
+      END IF
+*
+*     Test the input parameters.
+*
+      IF( .NOT.NOFACT .AND. .NOT.EQUIL .AND. .NOT.LSAME( FACT, 'F' ) )
+     $     THEN
+         INFO = -1
+      ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( KD.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( LDAB.LT.KD+1 ) THEN
+         INFO = -7
+      ELSE IF( LDAFB.LT.KD+1 ) THEN
+         INFO = -9
+      ELSE IF( LSAME( FACT, 'F' ) .AND. .NOT.
+     $         ( RCEQU .OR. LSAME( EQUED, 'N' ) ) ) THEN
+         INFO = -10
+      ELSE
+         IF( RCEQU ) THEN
+            SMIN = BIGNUM
+            SMAX = ZERO
+            DO 10 J = 1, N
+               SMIN = MIN( SMIN, S( J ) )
+               SMAX = MAX( SMAX, S( J ) )
+   10       CONTINUE
+            IF( SMIN.LE.ZERO ) THEN
+               INFO = -11
+            ELSE IF( N.GT.0 ) THEN
+               SCOND = MAX( SMIN, SMLNUM ) / MIN( SMAX, BIGNUM )
+            ELSE
+               SCOND = ONE
+            END IF
+         END IF
+         IF( INFO.EQ.0 ) THEN
+            IF( LDB.LT.MAX( 1, N ) ) THEN
+               INFO = -13
+            ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+               INFO = -15
+            END IF
+         END IF
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DPBSVX', -INFO )
+         RETURN
+      END IF
+*
+      IF( EQUIL ) THEN
+*
+*        Compute row and column scalings to equilibrate the matrix A.
+*
+         CALL DPBEQU( UPLO, N, KD, AB, LDAB, S, SCOND, AMAX, INFEQU )
+         IF( INFEQU.EQ.0 ) THEN
+*
+*           Equilibrate the matrix.
+*
+            CALL DLAQSB( UPLO, N, KD, AB, LDAB, S, SCOND, AMAX, EQUED )
+            RCEQU = LSAME( EQUED, 'Y' )
+         END IF
+      END IF
+*
+*     Scale the right-hand side.
+*
+      IF( RCEQU ) THEN
+         DO 30 J = 1, NRHS
+            DO 20 I = 1, N
+               B( I, J ) = S( I )*B( I, J )
+   20       CONTINUE
+   30    CONTINUE
+      END IF
+*
+      IF( NOFACT .OR. EQUIL ) THEN
+*
+*        Compute the Cholesky factorization A = U'*U or A = L*L'.
+*
+         IF( UPPER ) THEN
+            DO 40 J = 1, N
+               J1 = MAX( J-KD, 1 )
+               CALL DCOPY( J-J1+1, AB( KD+1-J+J1, J ), 1,
+     $                     AFB( KD+1-J+J1, J ), 1 )
+   40       CONTINUE
+         ELSE
+            DO 50 J = 1, N
+               J2 = MIN( J+KD, N )
+               CALL DCOPY( J2-J+1, AB( 1, J ), 1, AFB( 1, J ), 1 )
+   50       CONTINUE
+         END IF
+*
+         CALL DPBTRF( UPLO, N, KD, AFB, LDAFB, INFO )
+*
+*        Return if INFO is non-zero.
+*
+         IF( INFO.NE.0 ) THEN
+            IF( INFO.GT.0 )
+     $         RCOND = ZERO
+            RETURN
+         END IF
+      END IF
+*
+*     Compute the norm of the matrix A.
+*
+      ANORM = DLANSB( '1', UPLO, N, KD, AB, LDAB, WORK )
+*
+*     Compute the reciprocal of the condition number of A.
+*
+      CALL DPBCON( UPLO, N, KD, AFB, LDAFB, ANORM, RCOND, WORK, IWORK,
+     $             INFO )
+*
+*     Set INFO = N+1 if the matrix is singular to working precision.
+*
+      IF( RCOND.LT.DLAMCH( 'Epsilon' ) )
+     $   INFO = N + 1
+*
+*     Compute the solution matrix X.
+*
+      CALL DLACPY( 'Full', N, NRHS, B, LDB, X, LDX )
+      CALL DPBTRS( UPLO, N, KD, NRHS, AFB, LDAFB, X, LDX, INFO )
+*
+*     Use iterative refinement to improve the computed solution and
+*     compute error bounds and backward error estimates for it.
+*
+      CALL DPBRFS( UPLO, N, KD, NRHS, AB, LDAB, AFB, LDAFB, B, LDB, X,
+     $             LDX, FERR, BERR, WORK, IWORK, INFO )
+*
+*     Transform the solution matrix X to a solution of the original
+*     system.
+*
+      IF( RCEQU ) THEN
+         DO 70 J = 1, NRHS
+            DO 60 I = 1, N
+               X( I, J ) = S( I )*X( I, J )
+   60       CONTINUE
+   70    CONTINUE
+         DO 80 J = 1, NRHS
+            FERR( J ) = FERR( J ) / SCOND
+   80    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of DPBSVX
+*
+      END
diff --git a/costa/native/external/lapack/dpbtf2.f b/costa/native/external/lapack/dpbtf2.f
new file mode 100644
index 000000000..6f211f31e
--- /dev/null
+++ b/costa/native/external/lapack/dpbtf2.f
@@ -0,0 +1,195 @@
+      SUBROUTINE DPBTF2( UPLO, N, KD, AB, LDAB, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, KD, LDAB, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   AB( LDAB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DPBTF2 computes the Cholesky factorization of a real symmetric
+*  positive definite band matrix A.
+*
+*  The factorization has the form
+*     A = U' * U ,  if UPLO = 'U', or
+*     A = L  * L',  if UPLO = 'L',
+*  where U is an upper triangular matrix, U' is the transpose of U, and
+*  L is lower triangular.
+*
+*  This is the unblocked version of the algorithm, calling Level 2 BLAS.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the upper or lower triangular part of the
+*          symmetric matrix A is stored:
+*          = 'U':  Upper triangular
+*          = 'L':  Lower triangular
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  KD      (input) INTEGER
+*          The number of super-diagonals of the matrix A if UPLO = 'U',
+*          or the number of sub-diagonals if UPLO = 'L'.  KD >= 0.
+*
+*  AB      (input/output) DOUBLE PRECISION array, dimension (LDAB,N)
+*          On entry, the upper or lower triangle of the symmetric band
+*          matrix A, stored in the first KD+1 rows of the array.  The
+*          j-th column of A is stored in the j-th column of the array AB
+*          as follows:
+*          if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j;
+*          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+kd).
+*
+*          On exit, if INFO = 0, the triangular factor U or L from the
+*          Cholesky factorization A = U'*U or A = L*L' of the band
+*          matrix A, in the same storage format as A.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KD+1.
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -k, the k-th argument had an illegal value
+*          > 0: if INFO = k, the leading minor of order k is not
+*               positive definite, and the factorization could not be
+*               completed.
+*
+*  Further Details
+*  ===============
+*
+*  The band storage scheme is illustrated by the following example, when
+*  N = 6, KD = 2, and UPLO = 'U':
+*
+*  On entry:                       On exit:
+*
+*      *    *   a13  a24  a35  a46      *    *   u13  u24  u35  u46
+*      *   a12  a23  a34  a45  a56      *   u12  u23  u34  u45  u56
+*     a11  a22  a33  a44  a55  a66     u11  u22  u33  u44  u55  u66
+*
+*  Similarly, if UPLO = 'L' the format of A is as follows:
+*
+*  On entry:                       On exit:
+*
+*     a11  a22  a33  a44  a55  a66     l11  l22  l33  l44  l55  l66
+*     a21  a32  a43  a54  a65   *      l21  l32  l43  l54  l65   *
+*     a31  a42  a53  a64   *    *      l31  l42  l53  l64   *    *
+*
+*  Array elements marked * are not used by the routine.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            J, KLD, KN
+      DOUBLE PRECISION   AJJ
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DSCAL, DSYR, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( KD.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDAB.LT.KD+1 ) THEN
+         INFO = -5
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DPBTF2', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      KLD = MAX( 1, LDAB-1 )
+*
+      IF( UPPER ) THEN
+*
+*        Compute the Cholesky factorization A = U'*U.
+*
+         DO 10 J = 1, N
+*
+*           Compute U(J,J) and test for non-positive-definiteness.
+*
+            AJJ = AB( KD+1, J )
+            IF( AJJ.LE.ZERO )
+     $         GO TO 30
+            AJJ = SQRT( AJJ )
+            AB( KD+1, J ) = AJJ
+*
+*           Compute elements J+1:J+KN of row J and update the
+*           trailing submatrix within the band.
+*
+            KN = MIN( KD, N-J )
+            IF( KN.GT.0 ) THEN
+               CALL DSCAL( KN, ONE / AJJ, AB( KD, J+1 ), KLD )
+               CALL DSYR( 'Upper', KN, -ONE, AB( KD, J+1 ), KLD,
+     $                    AB( KD+1, J+1 ), KLD )
+            END IF
+   10    CONTINUE
+      ELSE
+*
+*        Compute the Cholesky factorization A = L*L'.
+*
+         DO 20 J = 1, N
+*
+*           Compute L(J,J) and test for non-positive-definiteness.
+*
+            AJJ = AB( 1, J )
+            IF( AJJ.LE.ZERO )
+     $         GO TO 30
+            AJJ = SQRT( AJJ )
+            AB( 1, J ) = AJJ
+*
+*           Compute elements J+1:J+KN of column J and update the
+*           trailing submatrix within the band.
+*
+            KN = MIN( KD, N-J )
+            IF( KN.GT.0 ) THEN
+               CALL DSCAL( KN, ONE / AJJ, AB( 2, J ), 1 )
+               CALL DSYR( 'Lower', KN, -ONE, AB( 2, J ), 1,
+     $                    AB( 1, J+1 ), KLD )
+            END IF
+   20    CONTINUE
+      END IF
+      RETURN
+*
+   30 CONTINUE
+      INFO = J
+      RETURN
+*
+*     End of DPBTF2
+*
+      END
diff --git a/costa/native/external/lapack/dpbtrf.f b/costa/native/external/lapack/dpbtrf.f
new file mode 100644
index 000000000..6974eb798
--- /dev/null
+++ b/costa/native/external/lapack/dpbtrf.f
@@ -0,0 +1,365 @@
+      SUBROUTINE DPBTRF( UPLO, N, KD, AB, LDAB, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, KD, LDAB, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   AB( LDAB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DPBTRF computes the Cholesky factorization of a real symmetric
+*  positive definite band matrix A.
+*
+*  The factorization has the form
+*     A = U**T * U,  if UPLO = 'U', or
+*     A = L  * L**T,  if UPLO = 'L',
+*  where U is an upper triangular matrix and L is lower triangular.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  KD      (input) INTEGER
+*          The number of superdiagonals of the matrix A if UPLO = 'U',
+*          or the number of subdiagonals if UPLO = 'L'.  KD >= 0.
+*
+*  AB      (input/output) DOUBLE PRECISION array, dimension (LDAB,N)
+*          On entry, the upper or lower triangle of the symmetric band
+*          matrix A, stored in the first KD+1 rows of the array.  The
+*          j-th column of A is stored in the j-th column of the array AB
+*          as follows:
+*          if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j;
+*          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+kd).
+*
+*          On exit, if INFO = 0, the triangular factor U or L from the
+*          Cholesky factorization A = U**T*U or A = L*L**T of the band
+*          matrix A, in the same storage format as A.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KD+1.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, the leading minor of order i is not
+*                positive definite, and the factorization could not be
+*                completed.
+*
+*  Further Details
+*  ===============
+*
+*  The band storage scheme is illustrated by the following example, when
+*  N = 6, KD = 2, and UPLO = 'U':
+*
+*  On entry:                       On exit:
+*
+*      *    *   a13  a24  a35  a46      *    *   u13  u24  u35  u46
+*      *   a12  a23  a34  a45  a56      *   u12  u23  u34  u45  u56
+*     a11  a22  a33  a44  a55  a66     u11  u22  u33  u44  u55  u66
+*
+*  Similarly, if UPLO = 'L' the format of A is as follows:
+*
+*  On entry:                       On exit:
+*
+*     a11  a22  a33  a44  a55  a66     l11  l22  l33  l44  l55  l66
+*     a21  a32  a43  a54  a65   *      l21  l32  l43  l54  l65   *
+*     a31  a42  a53  a64   *    *      l31  l42  l53  l64   *    *
+*
+*  Array elements marked * are not used by the routine.
+*
+*  Contributed by
+*  Peter Mayes and Giuseppe Radicati, IBM ECSEC, Rome, March 23, 1989
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+      INTEGER            NBMAX, LDWORK
+      PARAMETER          ( NBMAX = 32, LDWORK = NBMAX+1 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, I2, I3, IB, II, J, JJ, NB
+*     ..
+*     .. Local Arrays ..
+      DOUBLE PRECISION   WORK( LDWORK, NBMAX )
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           LSAME, ILAENV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DGEMM, DPBTF2, DPOTF2, DSYRK, DTRSM, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( ( .NOT.LSAME( UPLO, 'U' ) ) .AND.
+     $    ( .NOT.LSAME( UPLO, 'L' ) ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( KD.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDAB.LT.KD+1 ) THEN
+         INFO = -5
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DPBTRF', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Determine the block size for this environment
+*
+      NB = ILAENV( 1, 'DPBTRF', UPLO, N, KD, -1, -1 )
+*
+*     The block size must not exceed the semi-bandwidth KD, and must not
+*     exceed the limit set by the size of the local array WORK.
+*
+      NB = MIN( NB, NBMAX )
+*
+      IF( NB.LE.1 .OR. NB.GT.KD ) THEN
+*
+*        Use unblocked code
+*
+         CALL DPBTF2( UPLO, N, KD, AB, LDAB, INFO )
+      ELSE
+*
+*        Use blocked code
+*
+         IF( LSAME( UPLO, 'U' ) ) THEN
+*
+*           Compute the Cholesky factorization of a symmetric band
+*           matrix, given the upper triangle of the matrix in band
+*           storage.
+*
+*           Zero the upper triangle of the work array.
+*
+            DO 20 J = 1, NB
+               DO 10 I = 1, J - 1
+                  WORK( I, J ) = ZERO
+   10          CONTINUE
+   20       CONTINUE
+*
+*           Process the band matrix one diagonal block at a time.
+*
+            DO 70 I = 1, N, NB
+               IB = MIN( NB, N-I+1 )
+*
+*              Factorize the diagonal block
+*
+               CALL DPOTF2( UPLO, IB, AB( KD+1, I ), LDAB-1, II )
+               IF( II.NE.0 ) THEN
+                  INFO = I + II - 1
+                  GO TO 150
+               END IF
+               IF( I+IB.LE.N ) THEN
+*
+*                 Update the relevant part of the trailing submatrix.
+*                 If A11 denotes the diagonal block which has just been
+*                 factorized, then we need to update the remaining
+*                 blocks in the diagram:
+*
+*                    A11   A12   A13
+*                          A22   A23
+*                                A33
+*
+*                 The numbers of rows and columns in the partitioning
+*                 are IB, I2, I3 respectively. The blocks A12, A22 and
+*                 A23 are empty if IB = KD. The upper triangle of A13
+*                 lies outside the band.
+*
+                  I2 = MIN( KD-IB, N-I-IB+1 )
+                  I3 = MIN( IB, N-I-KD+1 )
+*
+                  IF( I2.GT.0 ) THEN
+*
+*                    Update A12
+*
+                     CALL DTRSM( 'Left', 'Upper', 'Transpose',
+     $                           'Non-unit', IB, I2, ONE, AB( KD+1, I ),
+     $                           LDAB-1, AB( KD+1-IB, I+IB ), LDAB-1 )
+*
+*                    Update A22
+*
+                     CALL DSYRK( 'Upper', 'Transpose', I2, IB, -ONE,
+     $                           AB( KD+1-IB, I+IB ), LDAB-1, ONE,
+     $                           AB( KD+1, I+IB ), LDAB-1 )
+                  END IF
+*
+                  IF( I3.GT.0 ) THEN
+*
+*                    Copy the lower triangle of A13 into the work array.
+*
+                     DO 40 JJ = 1, I3
+                        DO 30 II = JJ, IB
+                           WORK( II, JJ ) = AB( II-JJ+1, JJ+I+KD-1 )
+   30                   CONTINUE
+   40                CONTINUE
+*
+*                    Update A13 (in the work array).
+*
+                     CALL DTRSM( 'Left', 'Upper', 'Transpose',
+     $                           'Non-unit', IB, I3, ONE, AB( KD+1, I ),
+     $                           LDAB-1, WORK, LDWORK )
+*
+*                    Update A23
+*
+                     IF( I2.GT.0 )
+     $                  CALL DGEMM( 'Transpose', 'No Transpose', I2, I3,
+     $                              IB, -ONE, AB( KD+1-IB, I+IB ),
+     $                              LDAB-1, WORK, LDWORK, ONE,
+     $                              AB( 1+IB, I+KD ), LDAB-1 )
+*
+*                    Update A33
+*
+                     CALL DSYRK( 'Upper', 'Transpose', I3, IB, -ONE,
+     $                           WORK, LDWORK, ONE, AB( KD+1, I+KD ),
+     $                           LDAB-1 )
+*
+*                    Copy the lower triangle of A13 back into place.
+*
+                     DO 60 JJ = 1, I3
+                        DO 50 II = JJ, IB
+                           AB( II-JJ+1, JJ+I+KD-1 ) = WORK( II, JJ )
+   50                   CONTINUE
+   60                CONTINUE
+                  END IF
+               END IF
+   70       CONTINUE
+         ELSE
+*
+*           Compute the Cholesky factorization of a symmetric band
+*           matrix, given the lower triangle of the matrix in band
+*           storage.
+*
+*           Zero the lower triangle of the work array.
+*
+            DO 90 J = 1, NB
+               DO 80 I = J + 1, NB
+                  WORK( I, J ) = ZERO
+   80          CONTINUE
+   90       CONTINUE
+*
+*           Process the band matrix one diagonal block at a time.
+*
+            DO 140 I = 1, N, NB
+               IB = MIN( NB, N-I+1 )
+*
+*              Factorize the diagonal block
+*
+               CALL DPOTF2( UPLO, IB, AB( 1, I ), LDAB-1, II )
+               IF( II.NE.0 ) THEN
+                  INFO = I + II - 1
+                  GO TO 150
+               END IF
+               IF( I+IB.LE.N ) THEN
+*
+*                 Update the relevant part of the trailing submatrix.
+*                 If A11 denotes the diagonal block which has just been
+*                 factorized, then we need to update the remaining
+*                 blocks in the diagram:
+*
+*                    A11
+*                    A21   A22
+*                    A31   A32   A33
+*
+*                 The numbers of rows and columns in the partitioning
+*                 are IB, I2, I3 respectively. The blocks A21, A22 and
+*                 A32 are empty if IB = KD. The lower triangle of A31
+*                 lies outside the band.
+*
+                  I2 = MIN( KD-IB, N-I-IB+1 )
+                  I3 = MIN( IB, N-I-KD+1 )
+*
+                  IF( I2.GT.0 ) THEN
+*
+*                    Update A21
+*
+                     CALL DTRSM( 'Right', 'Lower', 'Transpose',
+     $                           'Non-unit', I2, IB, ONE, AB( 1, I ),
+     $                           LDAB-1, AB( 1+IB, I ), LDAB-1 )
+*
+*                    Update A22
+*
+                     CALL DSYRK( 'Lower', 'No Transpose', I2, IB, -ONE,
+     $                           AB( 1+IB, I ), LDAB-1, ONE,
+     $                           AB( 1, I+IB ), LDAB-1 )
+                  END IF
+*
+                  IF( I3.GT.0 ) THEN
+*
+*                    Copy the upper triangle of A31 into the work array.
+*
+                     DO 110 JJ = 1, IB
+                        DO 100 II = 1, MIN( JJ, I3 )
+                           WORK( II, JJ ) = AB( KD+1-JJ+II, JJ+I-1 )
+  100                   CONTINUE
+  110                CONTINUE
+*
+*                    Update A31 (in the work array).
+*
+                     CALL DTRSM( 'Right', 'Lower', 'Transpose',
+     $                           'Non-unit', I3, IB, ONE, AB( 1, I ),
+     $                           LDAB-1, WORK, LDWORK )
+*
+*                    Update A32
+*
+                     IF( I2.GT.0 )
+     $                  CALL DGEMM( 'No transpose', 'Transpose', I3, I2,
+     $                              IB, -ONE, WORK, LDWORK,
+     $                              AB( 1+IB, I ), LDAB-1, ONE,
+     $                              AB( 1+KD-IB, I+IB ), LDAB-1 )
+*
+*                    Update A33
+*
+                     CALL DSYRK( 'Lower', 'No Transpose', I3, IB, -ONE,
+     $                           WORK, LDWORK, ONE, AB( 1, I+KD ),
+     $                           LDAB-1 )
+*
+*                    Copy the upper triangle of A31 back into place.
+*
+                     DO 130 JJ = 1, IB
+                        DO 120 II = 1, MIN( JJ, I3 )
+                           AB( KD+1-JJ+II, JJ+I-1 ) = WORK( II, JJ )
+  120                   CONTINUE
+  130                CONTINUE
+                  END IF
+               END IF
+  140       CONTINUE
+         END IF
+      END IF
+      RETURN
+*
+  150 CONTINUE
+      RETURN
+*
+*     End of DPBTRF
+*
+      END
diff --git a/costa/native/external/lapack/dpbtrs.f b/costa/native/external/lapack/dpbtrs.f
new file mode 100644
index 000000000..d8bedd242
--- /dev/null
+++ b/costa/native/external/lapack/dpbtrs.f
@@ -0,0 +1,146 @@
+      SUBROUTINE DPBTRS( UPLO, N, KD, NRHS, AB, LDAB, B, LDB, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, KD, LDAB, LDB, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   AB( LDAB, * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DPBTRS solves a system of linear equations A*X = B with a symmetric
+*  positive definite band matrix A using the Cholesky factorization
+*  A = U**T*U or A = L*L**T computed by DPBTRF.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangular factor stored in AB;
+*          = 'L':  Lower triangular factor stored in AB.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  KD      (input) INTEGER
+*          The number of superdiagonals of the matrix A if UPLO = 'U',
+*          or the number of subdiagonals if UPLO = 'L'.  KD >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  AB      (input) DOUBLE PRECISION array, dimension (LDAB,N)
+*          The triangular factor U or L from the Cholesky factorization
+*          A = U**T*U or A = L*L**T of the band matrix A, stored in the
+*          first KD+1 rows of the array.  The j-th column of U or L is
+*          stored in the j-th column of the array AB as follows:
+*          if UPLO ='U', AB(kd+1+i-j,j) = U(i,j) for max(1,j-kd)<=i<=j;
+*          if UPLO ='L', AB(1+i-j,j)    = L(i,j) for j<=i<=min(n,j+kd).
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KD+1.
+*
+*  B       (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS)
+*          On entry, the right hand side matrix B.
+*          On exit, the solution matrix X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            J
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DTBSV, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( KD.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDAB.LT.KD+1 ) THEN
+         INFO = -6
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -8
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DPBTRS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 )
+     $   RETURN
+*
+      IF( UPPER ) THEN
+*
+*        Solve A*X = B where A = U'*U.
+*
+         DO 10 J = 1, NRHS
+*
+*           Solve U'*X = B, overwriting B with X.
+*
+            CALL DTBSV( 'Upper', 'Transpose', 'Non-unit', N, KD, AB,
+     $                  LDAB, B( 1, J ), 1 )
+*
+*           Solve U*X = B, overwriting B with X.
+*
+            CALL DTBSV( 'Upper', 'No transpose', 'Non-unit', N, KD, AB,
+     $                  LDAB, B( 1, J ), 1 )
+   10    CONTINUE
+      ELSE
+*
+*        Solve A*X = B where A = L*L'.
+*
+         DO 20 J = 1, NRHS
+*
+*           Solve L*X = B, overwriting B with X.
+*
+            CALL DTBSV( 'Lower', 'No transpose', 'Non-unit', N, KD, AB,
+     $                  LDAB, B( 1, J ), 1 )
+*
+*           Solve L'*X = B, overwriting B with X.
+*
+            CALL DTBSV( 'Lower', 'Transpose', 'Non-unit', N, KD, AB,
+     $                  LDAB, B( 1, J ), 1 )
+   20    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of DPBTRS
+*
+      END
diff --git a/costa/native/external/lapack/dpocon.f b/costa/native/external/lapack/dpocon.f
new file mode 100644
index 000000000..cd36e9234
--- /dev/null
+++ b/costa/native/external/lapack/dpocon.f
@@ -0,0 +1,173 @@
+      SUBROUTINE DPOCON( UPLO, N, A, LDA, ANORM, RCOND, WORK, IWORK,
+     $                   INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, N
+      DOUBLE PRECISION   ANORM, RCOND
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      DOUBLE PRECISION   A( LDA, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DPOCON estimates the reciprocal of the condition number (in the
+*  1-norm) of a real symmetric positive definite matrix using the
+*  Cholesky factorization A = U**T*U or A = L*L**T computed by DPOTRF.
+*
+*  An estimate is obtained for norm(inv(A)), and the reciprocal of the
+*  condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))).
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input) DOUBLE PRECISION array, dimension (LDA,N)
+*          The triangular factor U or L from the Cholesky factorization
+*          A = U**T*U or A = L*L**T, as computed by DPOTRF.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  ANORM   (input) DOUBLE PRECISION
+*          The 1-norm (or infinity-norm) of the symmetric matrix A.
+*
+*  RCOND   (output) DOUBLE PRECISION
+*          The reciprocal of the condition number of the matrix A,
+*          computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an
+*          estimate of the 1-norm of inv(A) computed in this routine.
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (3*N)
+*
+*  IWORK   (workspace) INTEGER array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      CHARACTER          NORMIN
+      INTEGER            IX, KASE
+      DOUBLE PRECISION   AINVNM, SCALE, SCALEL, SCALEU, SMLNUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            IDAMAX
+      DOUBLE PRECISION   DLAMCH
+      EXTERNAL           LSAME, IDAMAX, DLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLACON, DLATRS, DRSCL, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -4
+      ELSE IF( ANORM.LT.ZERO ) THEN
+         INFO = -5
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DPOCON', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      RCOND = ZERO
+      IF( N.EQ.0 ) THEN
+         RCOND = ONE
+         RETURN
+      ELSE IF( ANORM.EQ.ZERO ) THEN
+         RETURN
+      END IF
+*
+      SMLNUM = DLAMCH( 'Safe minimum' )
+*
+*     Estimate the 1-norm of inv(A).
+*
+      KASE = 0
+      NORMIN = 'N'
+   10 CONTINUE
+      CALL DLACON( N, WORK( N+1 ), WORK, IWORK, AINVNM, KASE )
+      IF( KASE.NE.0 ) THEN
+         IF( UPPER ) THEN
+*
+*           Multiply by inv(U').
+*
+            CALL DLATRS( 'Upper', 'Transpose', 'Non-unit', NORMIN, N, A,
+     $                   LDA, WORK, SCALEL, WORK( 2*N+1 ), INFO )
+            NORMIN = 'Y'
+*
+*           Multiply by inv(U).
+*
+            CALL DLATRS( 'Upper', 'No transpose', 'Non-unit', NORMIN, N,
+     $                   A, LDA, WORK, SCALEU, WORK( 2*N+1 ), INFO )
+         ELSE
+*
+*           Multiply by inv(L).
+*
+            CALL DLATRS( 'Lower', 'No transpose', 'Non-unit', NORMIN, N,
+     $                   A, LDA, WORK, SCALEL, WORK( 2*N+1 ), INFO )
+            NORMIN = 'Y'
+*
+*           Multiply by inv(L').
+*
+            CALL DLATRS( 'Lower', 'Transpose', 'Non-unit', NORMIN, N, A,
+     $                   LDA, WORK, SCALEU, WORK( 2*N+1 ), INFO )
+         END IF
+*
+*        Multiply by 1/SCALE if doing so will not cause overflow.
+*
+         SCALE = SCALEL*SCALEU
+         IF( SCALE.NE.ONE ) THEN
+            IX = IDAMAX( N, WORK, 1 )
+            IF( SCALE.LT.ABS( WORK( IX ) )*SMLNUM .OR. SCALE.EQ.ZERO )
+     $         GO TO 20
+            CALL DRSCL( N, SCALE, WORK, 1 )
+         END IF
+         GO TO 10
+      END IF
+*
+*     Compute the estimate of the reciprocal condition number.
+*
+      IF( AINVNM.NE.ZERO )
+     $   RCOND = ( ONE / AINVNM ) / ANORM
+*
+   20 CONTINUE
+      RETURN
+*
+*     End of DPOCON
+*
+      END
diff --git a/costa/native/external/lapack/dpoequ.f b/costa/native/external/lapack/dpoequ.f
new file mode 100644
index 000000000..c59d7e3bc
--- /dev/null
+++ b/costa/native/external/lapack/dpoequ.f
@@ -0,0 +1,137 @@
+      SUBROUTINE DPOEQU( N, A, LDA, S, SCOND, AMAX, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, N
+      DOUBLE PRECISION   AMAX, SCOND
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), S( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DPOEQU computes row and column scalings intended to equilibrate a
+*  symmetric positive definite matrix A and reduce its condition number
+*  (with respect to the two-norm).  S contains the scale factors,
+*  S(i) = 1/sqrt(A(i,i)), chosen so that the scaled matrix B with
+*  elements B(i,j) = S(i)*A(i,j)*S(j) has ones on the diagonal.  This
+*  choice of S puts the condition number of B within a factor N of the
+*  smallest possible condition number over all possible diagonal
+*  scalings.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input) DOUBLE PRECISION array, dimension (LDA,N)
+*          The N-by-N symmetric positive definite matrix whose scaling
+*          factors are to be computed.  Only the diagonal elements of A
+*          are referenced.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  S       (output) DOUBLE PRECISION array, dimension (N)
+*          If INFO = 0, S contains the scale factors for A.
+*
+*  SCOND   (output) DOUBLE PRECISION
+*          If INFO = 0, S contains the ratio of the smallest S(i) to
+*          the largest S(i).  If SCOND >= 0.1 and AMAX is neither too
+*          large nor too small, it is not worth scaling by S.
+*
+*  AMAX    (output) DOUBLE PRECISION
+*          Absolute value of largest matrix element.  If AMAX is very
+*          close to overflow or very close to underflow, the matrix
+*          should be scaled.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, the i-th diagonal element is nonpositive.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I
+      DOUBLE PRECISION   SMIN
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( N.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -3
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DPOEQU', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 ) THEN
+         SCOND = ONE
+         AMAX = ZERO
+         RETURN
+      END IF
+*
+*     Find the minimum and maximum diagonal elements.
+*
+      S( 1 ) = A( 1, 1 )
+      SMIN = S( 1 )
+      AMAX = S( 1 )
+      DO 10 I = 2, N
+         S( I ) = A( I, I )
+         SMIN = MIN( SMIN, S( I ) )
+         AMAX = MAX( AMAX, S( I ) )
+   10 CONTINUE
+*
+      IF( SMIN.LE.ZERO ) THEN
+*
+*        Find the first non-positive diagonal element and return.
+*
+         DO 20 I = 1, N
+            IF( S( I ).LE.ZERO ) THEN
+               INFO = I
+               RETURN
+            END IF
+   20    CONTINUE
+      ELSE
+*
+*        Set the scale factors to the reciprocals
+*        of the diagonal elements.
+*
+         DO 30 I = 1, N
+            S( I ) = ONE / SQRT( S( I ) )
+   30    CONTINUE
+*
+*        Compute SCOND = min(S(I)) / max(S(I))
+*
+         SCOND = SQRT( SMIN ) / SQRT( AMAX )
+      END IF
+      RETURN
+*
+*     End of DPOEQU
+*
+      END
diff --git a/costa/native/external/lapack/dporfs.f b/costa/native/external/lapack/dporfs.f
new file mode 100644
index 000000000..d38b57df1
--- /dev/null
+++ b/costa/native/external/lapack/dporfs.f
@@ -0,0 +1,327 @@
+      SUBROUTINE DPORFS( UPLO, N, NRHS, A, LDA, AF, LDAF, B, LDB, X,
+     $                   LDX, FERR, BERR, WORK, IWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, LDAF, LDB, LDX, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      DOUBLE PRECISION   A( LDA, * ), AF( LDAF, * ), B( LDB, * ),
+     $                   BERR( * ), FERR( * ), WORK( * ), X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DPORFS improves the computed solution to a system of linear
+*  equations when the coefficient matrix is symmetric positive definite,
+*  and provides error bounds and backward error estimates for the
+*  solution.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrices B and X.  NRHS >= 0.
+*
+*  A       (input) DOUBLE PRECISION array, dimension (LDA,N)
+*          The symmetric matrix A.  If UPLO = 'U', the leading N-by-N
+*          upper triangular part of A contains the upper triangular part
+*          of the matrix A, and the strictly lower triangular part of A
+*          is not referenced.  If UPLO = 'L', the leading N-by-N lower
+*          triangular part of A contains the lower triangular part of
+*          the matrix A, and the strictly upper triangular part of A is
+*          not referenced.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  AF      (input) DOUBLE PRECISION array, dimension (LDAF,N)
+*          The triangular factor U or L from the Cholesky factorization
+*          A = U**T*U or A = L*L**T, as computed by DPOTRF.
+*
+*  LDAF    (input) INTEGER
+*          The leading dimension of the array AF.  LDAF >= max(1,N).
+*
+*  B       (input) DOUBLE PRECISION array, dimension (LDB,NRHS)
+*          The right hand side matrix B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  X       (input/output) DOUBLE PRECISION array, dimension (LDX,NRHS)
+*          On entry, the solution matrix X, as computed by DPOTRS.
+*          On exit, the improved solution matrix X.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(1,N).
+*
+*  FERR    (output) DOUBLE PRECISION array, dimension (NRHS)
+*          The estimated forward error bound for each solution vector
+*          X(j) (the j-th column of the solution matrix X).
+*          If XTRUE is the true solution corresponding to X(j), FERR(j)
+*          is an estimated upper bound for the magnitude of the largest
+*          element in (X(j) - XTRUE) divided by the magnitude of the
+*          largest element in X(j).  The estimate is as reliable as
+*          the estimate for RCOND, and is almost always a slight
+*          overestimate of the true error.
+*
+*  BERR    (output) DOUBLE PRECISION array, dimension (NRHS)
+*          The componentwise relative backward error of each solution
+*          vector X(j) (i.e., the smallest relative change in
+*          any element of A or B that makes X(j) an exact solution).
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (3*N)
+*
+*  IWORK   (workspace) INTEGER array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Internal Parameters
+*  ===================
+*
+*  ITMAX is the maximum number of steps of iterative refinement.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            ITMAX
+      PARAMETER          ( ITMAX = 5 )
+      DOUBLE PRECISION   ZERO
+      PARAMETER          ( ZERO = 0.0D+0 )
+      DOUBLE PRECISION   ONE
+      PARAMETER          ( ONE = 1.0D+0 )
+      DOUBLE PRECISION   TWO
+      PARAMETER          ( TWO = 2.0D+0 )
+      DOUBLE PRECISION   THREE
+      PARAMETER          ( THREE = 3.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            COUNT, I, J, K, KASE, NZ
+      DOUBLE PRECISION   EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DAXPY, DCOPY, DLACON, DPOTRS, DSYMV, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DLAMCH
+      EXTERNAL           LSAME, DLAMCH
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -9
+      ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+         INFO = -11
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DPORFS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN
+         DO 10 J = 1, NRHS
+            FERR( J ) = ZERO
+            BERR( J ) = ZERO
+   10    CONTINUE
+         RETURN
+      END IF
+*
+*     NZ = maximum number of nonzero elements in each row of A, plus 1
+*
+      NZ = N + 1
+      EPS = DLAMCH( 'Epsilon' )
+      SAFMIN = DLAMCH( 'Safe minimum' )
+      SAFE1 = NZ*SAFMIN
+      SAFE2 = SAFE1 / EPS
+*
+*     Do for each right hand side
+*
+      DO 140 J = 1, NRHS
+*
+         COUNT = 1
+         LSTRES = THREE
+   20    CONTINUE
+*
+*        Loop until stopping criterion is satisfied.
+*
+*        Compute residual R = B - A * X
+*
+         CALL DCOPY( N, B( 1, J ), 1, WORK( N+1 ), 1 )
+         CALL DSYMV( UPLO, N, -ONE, A, LDA, X( 1, J ), 1, ONE,
+     $               WORK( N+1 ), 1 )
+*
+*        Compute componentwise relative backward error from formula
+*
+*        max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) )
+*
+*        where abs(Z) is the componentwise absolute value of the matrix
+*        or vector Z.  If the i-th component of the denominator is less
+*        than SAFE2, then SAFE1 is added to the i-th components of the
+*        numerator and denominator before dividing.
+*
+         DO 30 I = 1, N
+            WORK( I ) = ABS( B( I, J ) )
+   30    CONTINUE
+*
+*        Compute abs(A)*abs(X) + abs(B).
+*
+         IF( UPPER ) THEN
+            DO 50 K = 1, N
+               S = ZERO
+               XK = ABS( X( K, J ) )
+               DO 40 I = 1, K - 1
+                  WORK( I ) = WORK( I ) + ABS( A( I, K ) )*XK
+                  S = S + ABS( A( I, K ) )*ABS( X( I, J ) )
+   40          CONTINUE
+               WORK( K ) = WORK( K ) + ABS( A( K, K ) )*XK + S
+   50       CONTINUE
+         ELSE
+            DO 70 K = 1, N
+               S = ZERO
+               XK = ABS( X( K, J ) )
+               WORK( K ) = WORK( K ) + ABS( A( K, K ) )*XK
+               DO 60 I = K + 1, N
+                  WORK( I ) = WORK( I ) + ABS( A( I, K ) )*XK
+                  S = S + ABS( A( I, K ) )*ABS( X( I, J ) )
+   60          CONTINUE
+               WORK( K ) = WORK( K ) + S
+   70       CONTINUE
+         END IF
+         S = ZERO
+         DO 80 I = 1, N
+            IF( WORK( I ).GT.SAFE2 ) THEN
+               S = MAX( S, ABS( WORK( N+I ) ) / WORK( I ) )
+            ELSE
+               S = MAX( S, ( ABS( WORK( N+I ) )+SAFE1 ) /
+     $             ( WORK( I )+SAFE1 ) )
+            END IF
+   80    CONTINUE
+         BERR( J ) = S
+*
+*        Test stopping criterion. Continue iterating if
+*           1) The residual BERR(J) is larger than machine epsilon, and
+*           2) BERR(J) decreased by at least a factor of 2 during the
+*              last iteration, and
+*           3) At most ITMAX iterations tried.
+*
+         IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND.
+     $       COUNT.LE.ITMAX ) THEN
+*
+*           Update solution and try again.
+*
+            CALL DPOTRS( UPLO, N, 1, AF, LDAF, WORK( N+1 ), N, INFO )
+            CALL DAXPY( N, ONE, WORK( N+1 ), 1, X( 1, J ), 1 )
+            LSTRES = BERR( J )
+            COUNT = COUNT + 1
+            GO TO 20
+         END IF
+*
+*        Bound error from formula
+*
+*        norm(X - XTRUE) / norm(X) .le. FERR =
+*        norm( abs(inv(A))*
+*           ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X)
+*
+*        where
+*          norm(Z) is the magnitude of the largest component of Z
+*          inv(A) is the inverse of A
+*          abs(Z) is the componentwise absolute value of the matrix or
+*             vector Z
+*          NZ is the maximum number of nonzeros in any row of A, plus 1
+*          EPS is machine epsilon
+*
+*        The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B))
+*        is incremented by SAFE1 if the i-th component of
+*        abs(A)*abs(X) + abs(B) is less than SAFE2.
+*
+*        Use DLACON to estimate the infinity-norm of the matrix
+*           inv(A) * diag(W),
+*        where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) )))
+*
+         DO 90 I = 1, N
+            IF( WORK( I ).GT.SAFE2 ) THEN
+               WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I )
+            ELSE
+               WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + SAFE1
+            END IF
+   90    CONTINUE
+*
+         KASE = 0
+  100    CONTINUE
+         CALL DLACON( N, WORK( 2*N+1 ), WORK( N+1 ), IWORK, FERR( J ),
+     $                KASE )
+         IF( KASE.NE.0 ) THEN
+            IF( KASE.EQ.1 ) THEN
+*
+*              Multiply by diag(W)*inv(A').
+*
+               CALL DPOTRS( UPLO, N, 1, AF, LDAF, WORK( N+1 ), N, INFO )
+               DO 110 I = 1, N
+                  WORK( N+I ) = WORK( I )*WORK( N+I )
+  110          CONTINUE
+            ELSE IF( KASE.EQ.2 ) THEN
+*
+*              Multiply by inv(A)*diag(W).
+*
+               DO 120 I = 1, N
+                  WORK( N+I ) = WORK( I )*WORK( N+I )
+  120          CONTINUE
+               CALL DPOTRS( UPLO, N, 1, AF, LDAF, WORK( N+1 ), N, INFO )
+            END IF
+            GO TO 100
+         END IF
+*
+*        Normalize error.
+*
+         LSTRES = ZERO
+         DO 130 I = 1, N
+            LSTRES = MAX( LSTRES, ABS( X( I, J ) ) )
+  130    CONTINUE
+         IF( LSTRES.NE.ZERO )
+     $      FERR( J ) = FERR( J ) / LSTRES
+*
+  140 CONTINUE
+*
+      RETURN
+*
+*     End of DPORFS
+*
+      END
diff --git a/costa/native/external/lapack/dposv.f b/costa/native/external/lapack/dposv.f
new file mode 100644
index 000000000..c3b59c8ca
--- /dev/null
+++ b/costa/native/external/lapack/dposv.f
@@ -0,0 +1,122 @@
+      SUBROUTINE DPOSV( UPLO, N, NRHS, A, LDA, B, LDB, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, LDB, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DPOSV computes the solution to a real system of linear equations
+*     A * X = B,
+*  where A is an N-by-N symmetric positive definite matrix and X and B
+*  are N-by-NRHS matrices.
+*
+*  The Cholesky decomposition is used to factor A as
+*     A = U**T* U,  if UPLO = 'U', or
+*     A = L * L**T,  if UPLO = 'L',
+*  where U is an upper triangular matrix and L is a lower triangular
+*  matrix.  The factored form of A is then used to solve the system of
+*  equations A * X = B.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The number of linear equations, i.e., the order of the
+*          matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+*          On entry, the symmetric matrix A.  If UPLO = 'U', the leading
+*          N-by-N upper triangular part of A contains the upper
+*          triangular part of the matrix A, and the strictly lower
+*          triangular part of A is not referenced.  If UPLO = 'L', the
+*          leading N-by-N lower triangular part of A contains the lower
+*          triangular part of the matrix A, and the strictly upper
+*          triangular part of A is not referenced.
+*
+*          On exit, if INFO = 0, the factor U or L from the Cholesky
+*          factorization A = U**T*U or A = L*L**T.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  B       (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS)
+*          On entry, the N-by-NRHS right hand side matrix B.
+*          On exit, if INFO = 0, the N-by-NRHS solution matrix X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, the leading minor of order i of A is not
+*                positive definite, so the factorization could not be
+*                completed, and the solution has not been computed.
+*
+*  =====================================================================
+*
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DPOTRF, DPOTRS, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DPOSV ', -INFO )
+         RETURN
+      END IF
+*
+*     Compute the Cholesky factorization A = U'*U or A = L*L'.
+*
+      CALL DPOTRF( UPLO, N, A, LDA, INFO )
+      IF( INFO.EQ.0 ) THEN
+*
+*        Solve the system A*X = B, overwriting B with X.
+*
+         CALL DPOTRS( UPLO, N, NRHS, A, LDA, B, LDB, INFO )
+*
+      END IF
+      RETURN
+*
+*     End of DPOSV
+*
+      END
diff --git a/costa/native/external/lapack/dposvx.f b/costa/native/external/lapack/dposvx.f
new file mode 100644
index 000000000..3e3be142a
--- /dev/null
+++ b/costa/native/external/lapack/dposvx.f
@@ -0,0 +1,379 @@
+      SUBROUTINE DPOSVX( FACT, UPLO, N, NRHS, A, LDA, AF, LDAF, EQUED,
+     $                   S, B, LDB, X, LDX, RCOND, FERR, BERR, WORK,
+     $                   IWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          EQUED, FACT, UPLO
+      INTEGER            INFO, LDA, LDAF, LDB, LDX, N, NRHS
+      DOUBLE PRECISION   RCOND
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      DOUBLE PRECISION   A( LDA, * ), AF( LDAF, * ), B( LDB, * ),
+     $                   BERR( * ), FERR( * ), S( * ), WORK( * ),
+     $                   X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DPOSVX uses the Cholesky factorization A = U**T*U or A = L*L**T to
+*  compute the solution to a real system of linear equations
+*     A * X = B,
+*  where A is an N-by-N symmetric positive definite matrix and X and B
+*  are N-by-NRHS matrices.
+*
+*  Error bounds on the solution and a condition estimate are also
+*  provided.
+*
+*  Description
+*  ===========
+*
+*  The following steps are performed:
+*
+*  1. If FACT = 'E', real scaling factors are computed to equilibrate
+*     the system:
+*        diag(S) * A * diag(S) * inv(diag(S)) * X = diag(S) * B
+*     Whether or not the system will be equilibrated depends on the
+*     scaling of the matrix A, but if equilibration is used, A is
+*     overwritten by diag(S)*A*diag(S) and B by diag(S)*B.
+*
+*  2. If FACT = 'N' or 'E', the Cholesky decomposition is used to
+*     factor the matrix A (after equilibration if FACT = 'E') as
+*        A = U**T* U,  if UPLO = 'U', or
+*        A = L * L**T,  if UPLO = 'L',
+*     where U is an upper triangular matrix and L is a lower triangular
+*     matrix.
+*
+*  3. If the leading i-by-i principal minor is not positive definite,
+*     then the routine returns with INFO = i. Otherwise, the factored
+*     form of A is used to estimate the condition number of the matrix
+*     A.  If the reciprocal of the condition number is less than machine
+*     precision, INFO = N+1 is returned as a warning, but the routine
+*     still goes on to solve for X and compute error bounds as
+*     described below.
+*
+*  4. The system of equations is solved for X using the factored form
+*     of A.
+*
+*  5. Iterative refinement is applied to improve the computed solution
+*     matrix and calculate error bounds and backward error estimates
+*     for it.
+*
+*  6. If equilibration was used, the matrix X is premultiplied by
+*     diag(S) so that it solves the original system before
+*     equilibration.
+*
+*  Arguments
+*  =========
+*
+*  FACT    (input) CHARACTER*1
+*          Specifies whether or not the factored form of the matrix A is
+*          supplied on entry, and if not, whether the matrix A should be
+*          equilibrated before it is factored.
+*          = 'F':  On entry, AF contains the factored form of A.
+*                  If EQUED = 'Y', the matrix A has been equilibrated
+*                  with scaling factors given by S.  A and AF will not
+*                  be modified.
+*          = 'N':  The matrix A will be copied to AF and factored.
+*          = 'E':  The matrix A will be equilibrated if necessary, then
+*                  copied to AF and factored.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The number of linear equations, i.e., the order of the
+*          matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrices B and X.  NRHS >= 0.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+*          On entry, the symmetric matrix A, except if FACT = 'F' and
+*          EQUED = 'Y', then A must contain the equilibrated matrix
+*          diag(S)*A*diag(S).  If UPLO = 'U', the leading
+*          N-by-N upper triangular part of A contains the upper
+*          triangular part of the matrix A, and the strictly lower
+*          triangular part of A is not referenced.  If UPLO = 'L', the
+*          leading N-by-N lower triangular part of A contains the lower
+*          triangular part of the matrix A, and the strictly upper
+*          triangular part of A is not referenced.  A is not modified if
+*          FACT = 'F' or 'N', or if FACT = 'E' and EQUED = 'N' on exit.
+*
+*          On exit, if FACT = 'E' and EQUED = 'Y', A is overwritten by
+*          diag(S)*A*diag(S).
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  AF      (input or output) DOUBLE PRECISION array, dimension (LDAF,N)
+*          If FACT = 'F', then AF is an input argument and on entry
+*          contains the triangular factor U or L from the Cholesky
+*          factorization A = U**T*U or A = L*L**T, in the same storage
+*          format as A.  If EQUED .ne. 'N', then AF is the factored form
+*          of the equilibrated matrix diag(S)*A*diag(S).
+*
+*          If FACT = 'N', then AF is an output argument and on exit
+*          returns the triangular factor U or L from the Cholesky
+*          factorization A = U**T*U or A = L*L**T of the original
+*          matrix A.
+*
+*          If FACT = 'E', then AF is an output argument and on exit
+*          returns the triangular factor U or L from the Cholesky
+*          factorization A = U**T*U or A = L*L**T of the equilibrated
+*          matrix A (see the description of A for the form of the
+*          equilibrated matrix).
+*
+*  LDAF    (input) INTEGER
+*          The leading dimension of the array AF.  LDAF >= max(1,N).
+*
+*  EQUED   (input or output) CHARACTER*1
+*          Specifies the form of equilibration that was done.
+*          = 'N':  No equilibration (always true if FACT = 'N').
+*          = 'Y':  Equilibration was done, i.e., A has been replaced by
+*                  diag(S) * A * diag(S).
+*          EQUED is an input argument if FACT = 'F'; otherwise, it is an
+*          output argument.
+*
+*  S       (input or output) DOUBLE PRECISION array, dimension (N)
+*          The scale factors for A; not accessed if EQUED = 'N'.  S is
+*          an input argument if FACT = 'F'; otherwise, S is an output
+*          argument.  If FACT = 'F' and EQUED = 'Y', each element of S
+*          must be positive.
+*
+*  B       (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS)
+*          On entry, the N-by-NRHS right hand side matrix B.
+*          On exit, if EQUED = 'N', B is not modified; if EQUED = 'Y',
+*          B is overwritten by diag(S) * B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  X       (output) DOUBLE PRECISION array, dimension (LDX,NRHS)
+*          If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X to
+*          the original system of equations.  Note that if EQUED = 'Y',
+*          A and B are modified on exit, and the solution to the
+*          equilibrated system is inv(diag(S))*X.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(1,N).
+*
+*  RCOND   (output) DOUBLE PRECISION
+*          The estimate of the reciprocal condition number of the matrix
+*          A after equilibration (if done).  If RCOND is less than the
+*          machine precision (in particular, if RCOND = 0), the matrix
+*          is singular to working precision.  This condition is
+*          indicated by a return code of INFO > 0.
+*
+*  FERR    (output) DOUBLE PRECISION array, dimension (NRHS)
+*          The estimated forward error bound for each solution vector
+*          X(j) (the j-th column of the solution matrix X).
+*          If XTRUE is the true solution corresponding to X(j), FERR(j)
+*          is an estimated upper bound for the magnitude of the largest
+*          element in (X(j) - XTRUE) divided by the magnitude of the
+*          largest element in X(j).  The estimate is as reliable as
+*          the estimate for RCOND, and is almost always a slight
+*          overestimate of the true error.
+*
+*  BERR    (output) DOUBLE PRECISION array, dimension (NRHS)
+*          The componentwise relative backward error of each solution
+*          vector X(j) (i.e., the smallest relative change in
+*          any element of A or B that makes X(j) an exact solution).
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (3*N)
+*
+*  IWORK   (workspace) INTEGER array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*          > 0: if INFO = i, and i is
+*                <= N:  the leading minor of order i of A is
+*                       not positive definite, so the factorization
+*                       could not be completed, and the solution has not
+*                       been computed. RCOND = 0 is returned.
+*                = N+1: U is nonsingular, but RCOND is less than machine
+*                       precision, meaning that the matrix is singular
+*                       to working precision.  Nevertheless, the
+*                       solution and error bounds are computed because
+*                       there are a number of situations where the
+*                       computed solution can be more accurate than the
+*                       value of RCOND would suggest.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            EQUIL, NOFACT, RCEQU
+      INTEGER            I, INFEQU, J
+      DOUBLE PRECISION   AMAX, ANORM, BIGNUM, SCOND, SMAX, SMIN, SMLNUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DLAMCH, DLANSY
+      EXTERNAL           LSAME, DLAMCH, DLANSY
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLACPY, DLAQSY, DPOCON, DPOEQU, DPORFS, DPOTRF,
+     $                   DPOTRS, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+      NOFACT = LSAME( FACT, 'N' )
+      EQUIL = LSAME( FACT, 'E' )
+      IF( NOFACT .OR. EQUIL ) THEN
+         EQUED = 'N'
+         RCEQU = .FALSE.
+      ELSE
+         RCEQU = LSAME( EQUED, 'Y' )
+         SMLNUM = DLAMCH( 'Safe minimum' )
+         BIGNUM = ONE / SMLNUM
+      END IF
+*
+*     Test the input parameters.
+*
+      IF( .NOT.NOFACT .AND. .NOT.EQUIL .AND. .NOT.LSAME( FACT, 'F' ) )
+     $     THEN
+         INFO = -1
+      ELSE IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) )
+     $          THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -6
+      ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN
+         INFO = -8
+      ELSE IF( LSAME( FACT, 'F' ) .AND. .NOT.
+     $         ( RCEQU .OR. LSAME( EQUED, 'N' ) ) ) THEN
+         INFO = -9
+      ELSE
+         IF( RCEQU ) THEN
+            SMIN = BIGNUM
+            SMAX = ZERO
+            DO 10 J = 1, N
+               SMIN = MIN( SMIN, S( J ) )
+               SMAX = MAX( SMAX, S( J ) )
+   10       CONTINUE
+            IF( SMIN.LE.ZERO ) THEN
+               INFO = -10
+            ELSE IF( N.GT.0 ) THEN
+               SCOND = MAX( SMIN, SMLNUM ) / MIN( SMAX, BIGNUM )
+            ELSE
+               SCOND = ONE
+            END IF
+         END IF
+         IF( INFO.EQ.0 ) THEN
+            IF( LDB.LT.MAX( 1, N ) ) THEN
+               INFO = -12
+            ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+               INFO = -14
+            END IF
+         END IF
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DPOSVX', -INFO )
+         RETURN
+      END IF
+*
+      IF( EQUIL ) THEN
+*
+*        Compute row and column scalings to equilibrate the matrix A.
+*
+         CALL DPOEQU( N, A, LDA, S, SCOND, AMAX, INFEQU )
+         IF( INFEQU.EQ.0 ) THEN
+*
+*           Equilibrate the matrix.
+*
+            CALL DLAQSY( UPLO, N, A, LDA, S, SCOND, AMAX, EQUED )
+            RCEQU = LSAME( EQUED, 'Y' )
+         END IF
+      END IF
+*
+*     Scale the right hand side.
+*
+      IF( RCEQU ) THEN
+         DO 30 J = 1, NRHS
+            DO 20 I = 1, N
+               B( I, J ) = S( I )*B( I, J )
+   20       CONTINUE
+   30    CONTINUE
+      END IF
+*
+      IF( NOFACT .OR. EQUIL ) THEN
+*
+*        Compute the Cholesky factorization A = U'*U or A = L*L'.
+*
+         CALL DLACPY( UPLO, N, N, A, LDA, AF, LDAF )
+         CALL DPOTRF( UPLO, N, AF, LDAF, INFO )
+*
+*        Return if INFO is non-zero.
+*
+         IF( INFO.NE.0 ) THEN
+            IF( INFO.GT.0 )
+     $         RCOND = ZERO
+            RETURN
+         END IF
+      END IF
+*
+*     Compute the norm of the matrix A.
+*
+      ANORM = DLANSY( '1', UPLO, N, A, LDA, WORK )
+*
+*     Compute the reciprocal of the condition number of A.
+*
+      CALL DPOCON( UPLO, N, AF, LDAF, ANORM, RCOND, WORK, IWORK, INFO )
+*
+*     Set INFO = N+1 if the matrix is singular to working precision.
+*
+      IF( RCOND.LT.DLAMCH( 'Epsilon' ) )
+     $   INFO = N + 1
+*
+*     Compute the solution matrix X.
+*
+      CALL DLACPY( 'Full', N, NRHS, B, LDB, X, LDX )
+      CALL DPOTRS( UPLO, N, NRHS, AF, LDAF, X, LDX, INFO )
+*
+*     Use iterative refinement to improve the computed solution and
+*     compute error bounds and backward error estimates for it.
+*
+      CALL DPORFS( UPLO, N, NRHS, A, LDA, AF, LDAF, B, LDB, X, LDX,
+     $             FERR, BERR, WORK, IWORK, INFO )
+*
+*     Transform the solution matrix X to a solution of the original
+*     system.
+*
+      IF( RCEQU ) THEN
+         DO 50 J = 1, NRHS
+            DO 40 I = 1, N
+               X( I, J ) = S( I )*X( I, J )
+   40       CONTINUE
+   50    CONTINUE
+         DO 60 J = 1, NRHS
+            FERR( J ) = FERR( J ) / SCOND
+   60    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of DPOSVX
+*
+      END
diff --git a/costa/native/external/lapack/dpotf2.f b/costa/native/external/lapack/dpotf2.f
new file mode 100644
index 000000000..f9e0de06e
--- /dev/null
+++ b/costa/native/external/lapack/dpotf2.f
@@ -0,0 +1,168 @@
+      SUBROUTINE DPOTF2( UPLO, N, A, LDA, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DPOTF2 computes the Cholesky factorization of a real symmetric
+*  positive definite matrix A.
+*
+*  The factorization has the form
+*     A = U' * U ,  if UPLO = 'U', or
+*     A = L  * L',  if UPLO = 'L',
+*  where U is an upper triangular matrix and L is lower triangular.
+*
+*  This is the unblocked version of the algorithm, calling Level 2 BLAS.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the upper or lower triangular part of the
+*          symmetric matrix A is stored.
+*          = 'U':  Upper triangular
+*          = 'L':  Lower triangular
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+*          On entry, the symmetric matrix A.  If UPLO = 'U', the leading
+*          n by n upper triangular part of A contains the upper
+*          triangular part of the matrix A, and the strictly lower
+*          triangular part of A is not referenced.  If UPLO = 'L', the
+*          leading n by n lower triangular part of A contains the lower
+*          triangular part of the matrix A, and the strictly upper
+*          triangular part of A is not referenced.
+*
+*          On exit, if INFO = 0, the factor U or L from the Cholesky
+*          factorization A = U'*U  or A = L*L'.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -k, the k-th argument had an illegal value
+*          > 0: if INFO = k, the leading minor of order k is not
+*               positive definite, and the factorization could not be
+*               completed.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            J
+      DOUBLE PRECISION   AJJ
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DDOT
+      EXTERNAL           LSAME, DDOT
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DGEMV, DSCAL, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DPOTF2', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      IF( UPPER ) THEN
+*
+*        Compute the Cholesky factorization A = U'*U.
+*
+         DO 10 J = 1, N
+*
+*           Compute U(J,J) and test for non-positive-definiteness.
+*
+            AJJ = A( J, J ) - DDOT( J-1, A( 1, J ), 1, A( 1, J ), 1 )
+            IF( AJJ.LE.ZERO ) THEN
+               A( J, J ) = AJJ
+               GO TO 30
+            END IF
+            AJJ = SQRT( AJJ )
+            A( J, J ) = AJJ
+*
+*           Compute elements J+1:N of row J.
+*
+            IF( J.LT.N ) THEN
+               CALL DGEMV( 'Transpose', J-1, N-J, -ONE, A( 1, J+1 ),
+     $                     LDA, A( 1, J ), 1, ONE, A( J, J+1 ), LDA )
+               CALL DSCAL( N-J, ONE / AJJ, A( J, J+1 ), LDA )
+            END IF
+   10    CONTINUE
+      ELSE
+*
+*        Compute the Cholesky factorization A = L*L'.
+*
+         DO 20 J = 1, N
+*
+*           Compute L(J,J) and test for non-positive-definiteness.
+*
+            AJJ = A( J, J ) - DDOT( J-1, A( J, 1 ), LDA, A( J, 1 ),
+     $            LDA )
+            IF( AJJ.LE.ZERO ) THEN
+               A( J, J ) = AJJ
+               GO TO 30
+            END IF
+            AJJ = SQRT( AJJ )
+            A( J, J ) = AJJ
+*
+*           Compute elements J+1:N of column J.
+*
+            IF( J.LT.N ) THEN
+               CALL DGEMV( 'No transpose', N-J, J-1, -ONE, A( J+1, 1 ),
+     $                     LDA, A( J, 1 ), LDA, ONE, A( J+1, J ), 1 )
+               CALL DSCAL( N-J, ONE / AJJ, A( J+1, J ), 1 )
+            END IF
+   20    CONTINUE
+      END IF
+      GO TO 40
+*
+   30 CONTINUE
+      INFO = J
+*
+   40 CONTINUE
+      RETURN
+*
+*     End of DPOTF2
+*
+      END
diff --git a/costa/native/external/lapack/dpotrf.f b/costa/native/external/lapack/dpotrf.f
new file mode 100644
index 000000000..c4b0cb459
--- /dev/null
+++ b/costa/native/external/lapack/dpotrf.f
@@ -0,0 +1,184 @@
+      SUBROUTINE DPOTRF( UPLO, N, A, LDA, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DPOTRF computes the Cholesky factorization of a real symmetric
+*  positive definite matrix A.
+*
+*  The factorization has the form
+*     A = U**T * U,  if UPLO = 'U', or
+*     A = L  * L**T,  if UPLO = 'L',
+*  where U is an upper triangular matrix and L is lower triangular.
+*
+*  This is the block version of the algorithm, calling Level 3 BLAS.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+*          On entry, the symmetric matrix A.  If UPLO = 'U', the leading
+*          N-by-N upper triangular part of A contains the upper
+*          triangular part of the matrix A, and the strictly lower
+*          triangular part of A is not referenced.  If UPLO = 'L', the
+*          leading N-by-N lower triangular part of A contains the lower
+*          triangular part of the matrix A, and the strictly upper
+*          triangular part of A is not referenced.
+*
+*          On exit, if INFO = 0, the factor U or L from the Cholesky
+*          factorization A = U**T*U or A = L*L**T.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, the leading minor of order i is not
+*                positive definite, and the factorization could not be
+*                completed.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE
+      PARAMETER          ( ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            J, JB, NB
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           LSAME, ILAENV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DGEMM, DPOTF2, DSYRK, DTRSM, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DPOTRF', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Determine the block size for this environment.
+*
+      NB = ILAENV( 1, 'DPOTRF', UPLO, N, -1, -1, -1 )
+      IF( NB.LE.1 .OR. NB.GE.N ) THEN
+*
+*        Use unblocked code.
+*
+         CALL DPOTF2( UPLO, N, A, LDA, INFO )
+      ELSE
+*
+*        Use blocked code.
+*
+         IF( UPPER ) THEN
+*
+*           Compute the Cholesky factorization A = U'*U.
+*
+            DO 10 J = 1, N, NB
+*
+*              Update and factorize the current diagonal block and test
+*              for non-positive-definiteness.
+*
+               JB = MIN( NB, N-J+1 )
+               CALL DSYRK( 'Upper', 'Transpose', JB, J-1, -ONE,
+     $                     A( 1, J ), LDA, ONE, A( J, J ), LDA )
+               CALL DPOTF2( 'Upper', JB, A( J, J ), LDA, INFO )
+               IF( INFO.NE.0 )
+     $            GO TO 30
+               IF( J+JB.LE.N ) THEN
+*
+*                 Compute the current block row.
+*
+                  CALL DGEMM( 'Transpose', 'No transpose', JB, N-J-JB+1,
+     $                        J-1, -ONE, A( 1, J ), LDA, A( 1, J+JB ),
+     $                        LDA, ONE, A( J, J+JB ), LDA )
+                  CALL DTRSM( 'Left', 'Upper', 'Transpose', 'Non-unit',
+     $                        JB, N-J-JB+1, ONE, A( J, J ), LDA,
+     $                        A( J, J+JB ), LDA )
+               END IF
+   10       CONTINUE
+*
+         ELSE
+*
+*           Compute the Cholesky factorization A = L*L'.
+*
+            DO 20 J = 1, N, NB
+*
+*              Update and factorize the current diagonal block and test
+*              for non-positive-definiteness.
+*
+               JB = MIN( NB, N-J+1 )
+               CALL DSYRK( 'Lower', 'No transpose', JB, J-1, -ONE,
+     $                     A( J, 1 ), LDA, ONE, A( J, J ), LDA )
+               CALL DPOTF2( 'Lower', JB, A( J, J ), LDA, INFO )
+               IF( INFO.NE.0 )
+     $            GO TO 30
+               IF( J+JB.LE.N ) THEN
+*
+*                 Compute the current block column.
+*
+                  CALL DGEMM( 'No transpose', 'Transpose', N-J-JB+1, JB,
+     $                        J-1, -ONE, A( J+JB, 1 ), LDA, A( J, 1 ),
+     $                        LDA, ONE, A( J+JB, J ), LDA )
+                  CALL DTRSM( 'Right', 'Lower', 'Transpose', 'Non-unit',
+     $                        N-J-JB+1, JB, ONE, A( J, J ), LDA,
+     $                        A( J+JB, J ), LDA )
+               END IF
+   20       CONTINUE
+         END IF
+      END IF
+      GO TO 40
+*
+   30 CONTINUE
+      INFO = INFO + J - 1
+*
+   40 CONTINUE
+      RETURN
+*
+*     End of DPOTRF
+*
+      END
diff --git a/costa/native/external/lapack/dpotri.f b/costa/native/external/lapack/dpotri.f
new file mode 100644
index 000000000..b3f956fc0
--- /dev/null
+++ b/costa/native/external/lapack/dpotri.f
@@ -0,0 +1,97 @@
+      SUBROUTINE DPOTRI( UPLO, N, A, LDA, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DPOTRI computes the inverse of a real symmetric positive definite
+*  matrix A using the Cholesky factorization A = U**T*U or A = L*L**T
+*  computed by DPOTRF.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+*          On entry, the triangular factor U or L from the Cholesky
+*          factorization A = U**T*U or A = L*L**T, as computed by
+*          DPOTRF.
+*          On exit, the upper or lower triangle of the (symmetric)
+*          inverse of A, overwriting the input factor U or L.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, the (i,i) element of the factor U or L is
+*                zero, and the inverse could not be computed.
+*
+*  =====================================================================
+*
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLAUUM, DTRTRI, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DPOTRI', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Invert the triangular Cholesky factor U or L.
+*
+      CALL DTRTRI( UPLO, 'Non-unit', N, A, LDA, INFO )
+      IF( INFO.GT.0 )
+     $   RETURN
+*
+*     Form inv(U)*inv(U)' or inv(L)'*inv(L).
+*
+      CALL DLAUUM( UPLO, N, A, LDA, INFO )
+*
+      RETURN
+*
+*     End of DPOTRI
+*
+      END
diff --git a/costa/native/external/lapack/dpotrs.f b/costa/native/external/lapack/dpotrs.f
new file mode 100644
index 000000000..ae3ab2f31
--- /dev/null
+++ b/costa/native/external/lapack/dpotrs.f
@@ -0,0 +1,133 @@
+      SUBROUTINE DPOTRS( UPLO, N, NRHS, A, LDA, B, LDB, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, LDB, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DPOTRS solves a system of linear equations A*X = B with a symmetric
+*  positive definite matrix A using the Cholesky factorization
+*  A = U**T*U or A = L*L**T computed by DPOTRF.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  A       (input) DOUBLE PRECISION array, dimension (LDA,N)
+*          The triangular factor U or L from the Cholesky factorization
+*          A = U**T*U or A = L*L**T, as computed by DPOTRF.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  B       (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS)
+*          On entry, the right hand side matrix B.
+*          On exit, the solution matrix X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE
+      PARAMETER          ( ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DTRSM, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DPOTRS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 )
+     $   RETURN
+*
+      IF( UPPER ) THEN
+*
+*        Solve A*X = B where A = U'*U.
+*
+*        Solve U'*X = B, overwriting B with X.
+*
+         CALL DTRSM( 'Left', 'Upper', 'Transpose', 'Non-unit', N, NRHS,
+     $               ONE, A, LDA, B, LDB )
+*
+*        Solve U*X = B, overwriting B with X.
+*
+         CALL DTRSM( 'Left', 'Upper', 'No transpose', 'Non-unit', N,
+     $               NRHS, ONE, A, LDA, B, LDB )
+      ELSE
+*
+*        Solve A*X = B where A = L*L'.
+*
+*        Solve L*X = B, overwriting B with X.
+*
+         CALL DTRSM( 'Left', 'Lower', 'No transpose', 'Non-unit', N,
+     $               NRHS, ONE, A, LDA, B, LDB )
+*
+*        Solve L'*X = B, overwriting B with X.
+*
+         CALL DTRSM( 'Left', 'Lower', 'Transpose', 'Non-unit', N, NRHS,
+     $               ONE, A, LDA, B, LDB )
+      END IF
+*
+      RETURN
+*
+*     End of DPOTRS
+*
+      END
diff --git a/costa/native/external/lapack/dppcon.f b/costa/native/external/lapack/dppcon.f
new file mode 100644
index 000000000..0a3ba04d3
--- /dev/null
+++ b/costa/native/external/lapack/dppcon.f
@@ -0,0 +1,172 @@
+      SUBROUTINE DPPCON( UPLO, N, AP, ANORM, RCOND, WORK, IWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, N
+      DOUBLE PRECISION   ANORM, RCOND
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      DOUBLE PRECISION   AP( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DPPCON estimates the reciprocal of the condition number (in the
+*  1-norm) of a real symmetric positive definite packed matrix using
+*  the Cholesky factorization A = U**T*U or A = L*L**T computed by
+*  DPPTRF.
+*
+*  An estimate is obtained for norm(inv(A)), and the reciprocal of the
+*  condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))).
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  AP      (input) DOUBLE PRECISION array, dimension (N*(N+1)/2)
+*          The triangular factor U or L from the Cholesky factorization
+*          A = U**T*U or A = L*L**T, packed columnwise in a linear
+*          array.  The j-th column of U or L is stored in the array AP
+*          as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = U(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = L(i,j) for j<=i<=n.
+*
+*  ANORM   (input) DOUBLE PRECISION
+*          The 1-norm (or infinity-norm) of the symmetric matrix A.
+*
+*  RCOND   (output) DOUBLE PRECISION
+*          The reciprocal of the condition number of the matrix A,
+*          computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an
+*          estimate of the 1-norm of inv(A) computed in this routine.
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (3*N)
+*
+*  IWORK   (workspace) INTEGER array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      CHARACTER          NORMIN
+      INTEGER            IX, KASE
+      DOUBLE PRECISION   AINVNM, SCALE, SCALEL, SCALEU, SMLNUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            IDAMAX
+      DOUBLE PRECISION   DLAMCH
+      EXTERNAL           LSAME, IDAMAX, DLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLACON, DLATPS, DRSCL, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( ANORM.LT.ZERO ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DPPCON', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      RCOND = ZERO
+      IF( N.EQ.0 ) THEN
+         RCOND = ONE
+         RETURN
+      ELSE IF( ANORM.EQ.ZERO ) THEN
+         RETURN
+      END IF
+*
+      SMLNUM = DLAMCH( 'Safe minimum' )
+*
+*     Estimate the 1-norm of the inverse.
+*
+      KASE = 0
+      NORMIN = 'N'
+   10 CONTINUE
+      CALL DLACON( N, WORK( N+1 ), WORK, IWORK, AINVNM, KASE )
+      IF( KASE.NE.0 ) THEN
+         IF( UPPER ) THEN
+*
+*           Multiply by inv(U').
+*
+            CALL DLATPS( 'Upper', 'Transpose', 'Non-unit', NORMIN, N,
+     $                   AP, WORK, SCALEL, WORK( 2*N+1 ), INFO )
+            NORMIN = 'Y'
+*
+*           Multiply by inv(U).
+*
+            CALL DLATPS( 'Upper', 'No transpose', 'Non-unit', NORMIN, N,
+     $                   AP, WORK, SCALEU, WORK( 2*N+1 ), INFO )
+         ELSE
+*
+*           Multiply by inv(L).
+*
+            CALL DLATPS( 'Lower', 'No transpose', 'Non-unit', NORMIN, N,
+     $                   AP, WORK, SCALEL, WORK( 2*N+1 ), INFO )
+            NORMIN = 'Y'
+*
+*           Multiply by inv(L').
+*
+            CALL DLATPS( 'Lower', 'Transpose', 'Non-unit', NORMIN, N,
+     $                   AP, WORK, SCALEU, WORK( 2*N+1 ), INFO )
+         END IF
+*
+*        Multiply by 1/SCALE if doing so will not cause overflow.
+*
+         SCALE = SCALEL*SCALEU
+         IF( SCALE.NE.ONE ) THEN
+            IX = IDAMAX( N, WORK, 1 )
+            IF( SCALE.LT.ABS( WORK( IX ) )*SMLNUM .OR. SCALE.EQ.ZERO )
+     $         GO TO 20
+            CALL DRSCL( N, SCALE, WORK, 1 )
+         END IF
+         GO TO 10
+      END IF
+*
+*     Compute the estimate of the reciprocal condition number.
+*
+      IF( AINVNM.NE.ZERO )
+     $   RCOND = ( ONE / AINVNM ) / ANORM
+*
+   20 CONTINUE
+      RETURN
+*
+*     End of DPPCON
+*
+      END
diff --git a/costa/native/external/lapack/dppequ.f b/costa/native/external/lapack/dppequ.f
new file mode 100644
index 000000000..ea6ca6bbe
--- /dev/null
+++ b/costa/native/external/lapack/dppequ.f
@@ -0,0 +1,169 @@
+      SUBROUTINE DPPEQU( UPLO, N, AP, S, SCOND, AMAX, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, N
+      DOUBLE PRECISION   AMAX, SCOND
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   AP( * ), S( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DPPEQU computes row and column scalings intended to equilibrate a
+*  symmetric positive definite matrix A in packed storage and reduce
+*  its condition number (with respect to the two-norm).  S contains the
+*  scale factors, S(i)=1/sqrt(A(i,i)), chosen so that the scaled matrix
+*  B with elements B(i,j)=S(i)*A(i,j)*S(j) has ones on the diagonal.
+*  This choice of S puts the condition number of B within a factor N of
+*  the smallest possible condition number over all possible diagonal
+*  scalings.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  AP      (input) DOUBLE PRECISION array, dimension (N*(N+1)/2)
+*          The upper or lower triangle of the symmetric matrix A, packed
+*          columnwise in a linear array.  The j-th column of A is stored
+*          in the array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n.
+*
+*  S       (output) DOUBLE PRECISION array, dimension (N)
+*          If INFO = 0, S contains the scale factors for A.
+*
+*  SCOND   (output) DOUBLE PRECISION
+*          If INFO = 0, S contains the ratio of the smallest S(i) to
+*          the largest S(i).  If SCOND >= 0.1 and AMAX is neither too
+*          large nor too small, it is not worth scaling by S.
+*
+*  AMAX    (output) DOUBLE PRECISION
+*          Absolute value of largest matrix element.  If AMAX is very
+*          close to overflow or very close to underflow, the matrix
+*          should be scaled.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, the i-th diagonal element is nonpositive.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            I, JJ
+      DOUBLE PRECISION   SMIN
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DPPEQU', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 ) THEN
+         SCOND = ONE
+         AMAX = ZERO
+         RETURN
+      END IF
+*
+*     Initialize SMIN and AMAX.
+*
+      S( 1 ) = AP( 1 )
+      SMIN = S( 1 )
+      AMAX = S( 1 )
+*
+      IF( UPPER ) THEN
+*
+*        UPLO = 'U':  Upper triangle of A is stored.
+*        Find the minimum and maximum diagonal elements.
+*
+         JJ = 1
+         DO 10 I = 2, N
+            JJ = JJ + I
+            S( I ) = AP( JJ )
+            SMIN = MIN( SMIN, S( I ) )
+            AMAX = MAX( AMAX, S( I ) )
+   10    CONTINUE
+*
+      ELSE
+*
+*        UPLO = 'L':  Lower triangle of A is stored.
+*        Find the minimum and maximum diagonal elements.
+*
+         JJ = 1
+         DO 20 I = 2, N
+            JJ = JJ + N - I + 2
+            S( I ) = AP( JJ )
+            SMIN = MIN( SMIN, S( I ) )
+            AMAX = MAX( AMAX, S( I ) )
+   20    CONTINUE
+      END IF
+*
+      IF( SMIN.LE.ZERO ) THEN
+*
+*        Find the first non-positive diagonal element and return.
+*
+         DO 30 I = 1, N
+            IF( S( I ).LE.ZERO ) THEN
+               INFO = I
+               RETURN
+            END IF
+   30    CONTINUE
+      ELSE
+*
+*        Set the scale factors to the reciprocals
+*        of the diagonal elements.
+*
+         DO 40 I = 1, N
+            S( I ) = ONE / SQRT( S( I ) )
+   40    CONTINUE
+*
+*        Compute SCOND = min(S(I)) / max(S(I))
+*
+         SCOND = SQRT( SMIN ) / SQRT( AMAX )
+      END IF
+      RETURN
+*
+*     End of DPPEQU
+*
+      END
diff --git a/costa/native/external/lapack/dpprfs.f b/costa/native/external/lapack/dpprfs.f
new file mode 100644
index 000000000..6a0fc5044
--- /dev/null
+++ b/costa/native/external/lapack/dpprfs.f
@@ -0,0 +1,324 @@
+      SUBROUTINE DPPRFS( UPLO, N, NRHS, AP, AFP, B, LDB, X, LDX, FERR,
+     $                   BERR, WORK, IWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDB, LDX, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      DOUBLE PRECISION   AFP( * ), AP( * ), B( LDB, * ), BERR( * ),
+     $                   FERR( * ), WORK( * ), X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DPPRFS improves the computed solution to a system of linear
+*  equations when the coefficient matrix is symmetric positive definite
+*  and packed, and provides error bounds and backward error estimates
+*  for the solution.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrices B and X.  NRHS >= 0.
+*
+*  AP      (input) DOUBLE PRECISION array, dimension (N*(N+1)/2)
+*          The upper or lower triangle of the symmetric matrix A, packed
+*          columnwise in a linear array.  The j-th column of A is stored
+*          in the array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n.
+*
+*  AFP     (input) DOUBLE PRECISION array, dimension (N*(N+1)/2)
+*          The triangular factor U or L from the Cholesky factorization
+*          A = U**T*U or A = L*L**T, as computed by DPPTRF/ZPPTRF,
+*          packed columnwise in a linear array in the same format as A
+*          (see AP).
+*
+*  B       (input) DOUBLE PRECISION array, dimension (LDB,NRHS)
+*          The right hand side matrix B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  X       (input/output) DOUBLE PRECISION array, dimension (LDX,NRHS)
+*          On entry, the solution matrix X, as computed by DPPTRS.
+*          On exit, the improved solution matrix X.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(1,N).
+*
+*  FERR    (output) DOUBLE PRECISION array, dimension (NRHS)
+*          The estimated forward error bound for each solution vector
+*          X(j) (the j-th column of the solution matrix X).
+*          If XTRUE is the true solution corresponding to X(j), FERR(j)
+*          is an estimated upper bound for the magnitude of the largest
+*          element in (X(j) - XTRUE) divided by the magnitude of the
+*          largest element in X(j).  The estimate is as reliable as
+*          the estimate for RCOND, and is almost always a slight
+*          overestimate of the true error.
+*
+*  BERR    (output) DOUBLE PRECISION array, dimension (NRHS)
+*          The componentwise relative backward error of each solution
+*          vector X(j) (i.e., the smallest relative change in
+*          any element of A or B that makes X(j) an exact solution).
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (3*N)
+*
+*  IWORK   (workspace) INTEGER array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Internal Parameters
+*  ===================
+*
+*  ITMAX is the maximum number of steps of iterative refinement.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            ITMAX
+      PARAMETER          ( ITMAX = 5 )
+      DOUBLE PRECISION   ZERO
+      PARAMETER          ( ZERO = 0.0D+0 )
+      DOUBLE PRECISION   ONE
+      PARAMETER          ( ONE = 1.0D+0 )
+      DOUBLE PRECISION   TWO
+      PARAMETER          ( TWO = 2.0D+0 )
+      DOUBLE PRECISION   THREE
+      PARAMETER          ( THREE = 3.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            COUNT, I, IK, J, K, KASE, KK, NZ
+      DOUBLE PRECISION   EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DAXPY, DCOPY, DLACON, DPPTRS, DSPMV, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DLAMCH
+      EXTERNAL           LSAME, DLAMCH
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+         INFO = -9
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DPPRFS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN
+         DO 10 J = 1, NRHS
+            FERR( J ) = ZERO
+            BERR( J ) = ZERO
+   10    CONTINUE
+         RETURN
+      END IF
+*
+*     NZ = maximum number of nonzero elements in each row of A, plus 1
+*
+      NZ = N + 1
+      EPS = DLAMCH( 'Epsilon' )
+      SAFMIN = DLAMCH( 'Safe minimum' )
+      SAFE1 = NZ*SAFMIN
+      SAFE2 = SAFE1 / EPS
+*
+*     Do for each right hand side
+*
+      DO 140 J = 1, NRHS
+*
+         COUNT = 1
+         LSTRES = THREE
+   20    CONTINUE
+*
+*        Loop until stopping criterion is satisfied.
+*
+*        Compute residual R = B - A * X
+*
+         CALL DCOPY( N, B( 1, J ), 1, WORK( N+1 ), 1 )
+         CALL DSPMV( UPLO, N, -ONE, AP, X( 1, J ), 1, ONE, WORK( N+1 ),
+     $               1 )
+*
+*        Compute componentwise relative backward error from formula
+*
+*        max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) )
+*
+*        where abs(Z) is the componentwise absolute value of the matrix
+*        or vector Z.  If the i-th component of the denominator is less
+*        than SAFE2, then SAFE1 is added to the i-th components of the
+*        numerator and denominator before dividing.
+*
+         DO 30 I = 1, N
+            WORK( I ) = ABS( B( I, J ) )
+   30    CONTINUE
+*
+*        Compute abs(A)*abs(X) + abs(B).
+*
+         KK = 1
+         IF( UPPER ) THEN
+            DO 50 K = 1, N
+               S = ZERO
+               XK = ABS( X( K, J ) )
+               IK = KK
+               DO 40 I = 1, K - 1
+                  WORK( I ) = WORK( I ) + ABS( AP( IK ) )*XK
+                  S = S + ABS( AP( IK ) )*ABS( X( I, J ) )
+                  IK = IK + 1
+   40          CONTINUE
+               WORK( K ) = WORK( K ) + ABS( AP( KK+K-1 ) )*XK + S
+               KK = KK + K
+   50       CONTINUE
+         ELSE
+            DO 70 K = 1, N
+               S = ZERO
+               XK = ABS( X( K, J ) )
+               WORK( K ) = WORK( K ) + ABS( AP( KK ) )*XK
+               IK = KK + 1
+               DO 60 I = K + 1, N
+                  WORK( I ) = WORK( I ) + ABS( AP( IK ) )*XK
+                  S = S + ABS( AP( IK ) )*ABS( X( I, J ) )
+                  IK = IK + 1
+   60          CONTINUE
+               WORK( K ) = WORK( K ) + S
+               KK = KK + ( N-K+1 )
+   70       CONTINUE
+         END IF
+         S = ZERO
+         DO 80 I = 1, N
+            IF( WORK( I ).GT.SAFE2 ) THEN
+               S = MAX( S, ABS( WORK( N+I ) ) / WORK( I ) )
+            ELSE
+               S = MAX( S, ( ABS( WORK( N+I ) )+SAFE1 ) /
+     $             ( WORK( I )+SAFE1 ) )
+            END IF
+   80    CONTINUE
+         BERR( J ) = S
+*
+*        Test stopping criterion. Continue iterating if
+*           1) The residual BERR(J) is larger than machine epsilon, and
+*           2) BERR(J) decreased by at least a factor of 2 during the
+*              last iteration, and
+*           3) At most ITMAX iterations tried.
+*
+         IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND.
+     $       COUNT.LE.ITMAX ) THEN
+*
+*           Update solution and try again.
+*
+            CALL DPPTRS( UPLO, N, 1, AFP, WORK( N+1 ), N, INFO )
+            CALL DAXPY( N, ONE, WORK( N+1 ), 1, X( 1, J ), 1 )
+            LSTRES = BERR( J )
+            COUNT = COUNT + 1
+            GO TO 20
+         END IF
+*
+*        Bound error from formula
+*
+*        norm(X - XTRUE) / norm(X) .le. FERR =
+*        norm( abs(inv(A))*
+*           ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X)
+*
+*        where
+*          norm(Z) is the magnitude of the largest component of Z
+*          inv(A) is the inverse of A
+*          abs(Z) is the componentwise absolute value of the matrix or
+*             vector Z
+*          NZ is the maximum number of nonzeros in any row of A, plus 1
+*          EPS is machine epsilon
+*
+*        The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B))
+*        is incremented by SAFE1 if the i-th component of
+*        abs(A)*abs(X) + abs(B) is less than SAFE2.
+*
+*        Use DLACON to estimate the infinity-norm of the matrix
+*           inv(A) * diag(W),
+*        where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) )))
+*
+         DO 90 I = 1, N
+            IF( WORK( I ).GT.SAFE2 ) THEN
+               WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I )
+            ELSE
+               WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + SAFE1
+            END IF
+   90    CONTINUE
+*
+         KASE = 0
+  100    CONTINUE
+         CALL DLACON( N, WORK( 2*N+1 ), WORK( N+1 ), IWORK, FERR( J ),
+     $                KASE )
+         IF( KASE.NE.0 ) THEN
+            IF( KASE.EQ.1 ) THEN
+*
+*              Multiply by diag(W)*inv(A').
+*
+               CALL DPPTRS( UPLO, N, 1, AFP, WORK( N+1 ), N, INFO )
+               DO 110 I = 1, N
+                  WORK( N+I ) = WORK( I )*WORK( N+I )
+  110          CONTINUE
+            ELSE IF( KASE.EQ.2 ) THEN
+*
+*              Multiply by inv(A)*diag(W).
+*
+               DO 120 I = 1, N
+                  WORK( N+I ) = WORK( I )*WORK( N+I )
+  120          CONTINUE
+               CALL DPPTRS( UPLO, N, 1, AFP, WORK( N+1 ), N, INFO )
+            END IF
+            GO TO 100
+         END IF
+*
+*        Normalize error.
+*
+         LSTRES = ZERO
+         DO 130 I = 1, N
+            LSTRES = MAX( LSTRES, ABS( X( I, J ) ) )
+  130    CONTINUE
+         IF( LSTRES.NE.ZERO )
+     $      FERR( J ) = FERR( J ) / LSTRES
+*
+  140 CONTINUE
+*
+      RETURN
+*
+*     End of DPPRFS
+*
+      END
diff --git a/costa/native/external/lapack/dppsv.f b/costa/native/external/lapack/dppsv.f
new file mode 100644
index 000000000..2e99b8cd8
--- /dev/null
+++ b/costa/native/external/lapack/dppsv.f
@@ -0,0 +1,134 @@
+      SUBROUTINE DPPSV( UPLO, N, NRHS, AP, B, LDB, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDB, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   AP( * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DPPSV computes the solution to a real system of linear equations
+*     A * X = B,
+*  where A is an N-by-N symmetric positive definite matrix stored in
+*  packed format and X and B are N-by-NRHS matrices.
+*
+*  The Cholesky decomposition is used to factor A as
+*     A = U**T* U,  if UPLO = 'U', or
+*     A = L * L**T,  if UPLO = 'L',
+*  where U is an upper triangular matrix and L is a lower triangular
+*  matrix.  The factored form of A is then used to solve the system of
+*  equations A * X = B.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The number of linear equations, i.e., the order of the
+*          matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  AP      (input/output) DOUBLE PRECISION array, dimension (N*(N+1)/2)
+*          On entry, the upper or lower triangle of the symmetric matrix
+*          A, packed columnwise in a linear array.  The j-th column of A
+*          is stored in the array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n.
+*          See below for further details.
+*
+*          On exit, if INFO = 0, the factor U or L from the Cholesky
+*          factorization A = U**T*U or A = L*L**T, in the same storage
+*          format as A.
+*
+*  B       (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS)
+*          On entry, the N-by-NRHS right hand side matrix B.
+*          On exit, if INFO = 0, the N-by-NRHS solution matrix X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, the leading minor of order i of A is not
+*                positive definite, so the factorization could not be
+*                completed, and the solution has not been computed.
+*
+*  Further Details
+*  ===============
+*
+*  The packed storage scheme is illustrated by the following example
+*  when N = 4, UPLO = 'U':
+*
+*  Two-dimensional storage of the symmetric matrix A:
+*
+*     a11 a12 a13 a14
+*         a22 a23 a24
+*             a33 a34     (aij = conjg(aji))
+*                 a44
+*
+*  Packed storage of the upper triangle of A:
+*
+*  AP = [ a11, a12, a22, a13, a23, a33, a14, a24, a34, a44 ]
+*
+*  =====================================================================
+*
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DPPTRF, DPPTRS, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -6
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DPPSV ', -INFO )
+         RETURN
+      END IF
+*
+*     Compute the Cholesky factorization A = U'*U or A = L*L'.
+*
+      CALL DPPTRF( UPLO, N, AP, INFO )
+      IF( INFO.EQ.0 ) THEN
+*
+*        Solve the system A*X = B, overwriting B with X.
+*
+         CALL DPPTRS( UPLO, N, NRHS, AP, B, LDB, INFO )
+*
+      END IF
+      RETURN
+*
+*     End of DPPSV
+*
+      END
diff --git a/costa/native/external/lapack/dppsvx.f b/costa/native/external/lapack/dppsvx.f
new file mode 100644
index 000000000..60b9ae7dd
--- /dev/null
+++ b/costa/native/external/lapack/dppsvx.f
@@ -0,0 +1,383 @@
+      SUBROUTINE DPPSVX( FACT, UPLO, N, NRHS, AP, AFP, EQUED, S, B, LDB,
+     $                   X, LDX, RCOND, FERR, BERR, WORK, IWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          EQUED, FACT, UPLO
+      INTEGER            INFO, LDB, LDX, N, NRHS
+      DOUBLE PRECISION   RCOND
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      DOUBLE PRECISION   AFP( * ), AP( * ), B( LDB, * ), BERR( * ),
+     $                   FERR( * ), S( * ), WORK( * ), X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DPPSVX uses the Cholesky factorization A = U**T*U or A = L*L**T to
+*  compute the solution to a real system of linear equations
+*     A * X = B,
+*  where A is an N-by-N symmetric positive definite matrix stored in
+*  packed format and X and B are N-by-NRHS matrices.
+*
+*  Error bounds on the solution and a condition estimate are also
+*  provided.
+*
+*  Description
+*  ===========
+*
+*  The following steps are performed:
+*
+*  1. If FACT = 'E', real scaling factors are computed to equilibrate
+*     the system:
+*        diag(S) * A * diag(S) * inv(diag(S)) * X = diag(S) * B
+*     Whether or not the system will be equilibrated depends on the
+*     scaling of the matrix A, but if equilibration is used, A is
+*     overwritten by diag(S)*A*diag(S) and B by diag(S)*B.
+*
+*  2. If FACT = 'N' or 'E', the Cholesky decomposition is used to
+*     factor the matrix A (after equilibration if FACT = 'E') as
+*        A = U**T* U,  if UPLO = 'U', or
+*        A = L * L**T,  if UPLO = 'L',
+*     where U is an upper triangular matrix and L is a lower triangular
+*     matrix.
+*
+*  3. If the leading i-by-i principal minor is not positive definite,
+*     then the routine returns with INFO = i. Otherwise, the factored
+*     form of A is used to estimate the condition number of the matrix
+*     A.  If the reciprocal of the condition number is less than machine
+*     precision, INFO = N+1 is returned as a warning, but the routine
+*     still goes on to solve for X and compute error bounds as
+*     described below.
+*
+*  4. The system of equations is solved for X using the factored form
+*     of A.
+*
+*  5. Iterative refinement is applied to improve the computed solution
+*     matrix and calculate error bounds and backward error estimates
+*     for it.
+*
+*  6. If equilibration was used, the matrix X is premultiplied by
+*     diag(S) so that it solves the original system before
+*     equilibration.
+*
+*  Arguments
+*  =========
+*
+*  FACT    (input) CHARACTER*1
+*          Specifies whether or not the factored form of the matrix A is
+*          supplied on entry, and if not, whether the matrix A should be
+*          equilibrated before it is factored.
+*          = 'F':  On entry, AFP contains the factored form of A.
+*                  If EQUED = 'Y', the matrix A has been equilibrated
+*                  with scaling factors given by S.  AP and AFP will not
+*                  be modified.
+*          = 'N':  The matrix A will be copied to AFP and factored.
+*          = 'E':  The matrix A will be equilibrated if necessary, then
+*                  copied to AFP and factored.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The number of linear equations, i.e., the order of the
+*          matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrices B and X.  NRHS >= 0.
+*
+*  AP      (input/output) DOUBLE PRECISION array, dimension (N*(N+1)/2)
+*          On entry, the upper or lower triangle of the symmetric matrix
+*          A, packed columnwise in a linear array, except if FACT = 'F'
+*          and EQUED = 'Y', then A must contain the equilibrated matrix
+*          diag(S)*A*diag(S).  The j-th column of A is stored in the
+*          array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n.
+*          See below for further details.  A is not modified if
+*          FACT = 'F' or 'N', or if FACT = 'E' and EQUED = 'N' on exit.
+*
+*          On exit, if FACT = 'E' and EQUED = 'Y', A is overwritten by
+*          diag(S)*A*diag(S).
+*
+*  AFP     (input or output) DOUBLE PRECISION array, dimension
+*                            (N*(N+1)/2)
+*          If FACT = 'F', then AFP is an input argument and on entry
+*          contains the triangular factor U or L from the Cholesky
+*          factorization A = U'*U or A = L*L', in the same storage
+*          format as A.  If EQUED .ne. 'N', then AFP is the factored
+*          form of the equilibrated matrix A.
+*
+*          If FACT = 'N', then AFP is an output argument and on exit
+*          returns the triangular factor U or L from the Cholesky
+*          factorization A = U'*U or A = L*L' of the original matrix A.
+*
+*          If FACT = 'E', then AFP is an output argument and on exit
+*          returns the triangular factor U or L from the Cholesky
+*          factorization A = U'*U or A = L*L' of the equilibrated
+*          matrix A (see the description of AP for the form of the
+*          equilibrated matrix).
+*
+*  EQUED   (input or output) CHARACTER*1
+*          Specifies the form of equilibration that was done.
+*          = 'N':  No equilibration (always true if FACT = 'N').
+*          = 'Y':  Equilibration was done, i.e., A has been replaced by
+*                  diag(S) * A * diag(S).
+*          EQUED is an input argument if FACT = 'F'; otherwise, it is an
+*          output argument.
+*
+*  S       (input or output) DOUBLE PRECISION array, dimension (N)
+*          The scale factors for A; not accessed if EQUED = 'N'.  S is
+*          an input argument if FACT = 'F'; otherwise, S is an output
+*          argument.  If FACT = 'F' and EQUED = 'Y', each element of S
+*          must be positive.
+*
+*  B       (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS)
+*          On entry, the N-by-NRHS right hand side matrix B.
+*          On exit, if EQUED = 'N', B is not modified; if EQUED = 'Y',
+*          B is overwritten by diag(S) * B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  X       (output) DOUBLE PRECISION array, dimension (LDX,NRHS)
+*          If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X to
+*          the original system of equations.  Note that if EQUED = 'Y',
+*          A and B are modified on exit, and the solution to the
+*          equilibrated system is inv(diag(S))*X.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(1,N).
+*
+*  RCOND   (output) DOUBLE PRECISION
+*          The estimate of the reciprocal condition number of the matrix
+*          A after equilibration (if done).  If RCOND is less than the
+*          machine precision (in particular, if RCOND = 0), the matrix
+*          is singular to working precision.  This condition is
+*          indicated by a return code of INFO > 0.
+*
+*  FERR    (output) DOUBLE PRECISION array, dimension (NRHS)
+*          The estimated forward error bound for each solution vector
+*          X(j) (the j-th column of the solution matrix X).
+*          If XTRUE is the true solution corresponding to X(j), FERR(j)
+*          is an estimated upper bound for the magnitude of the largest
+*          element in (X(j) - XTRUE) divided by the magnitude of the
+*          largest element in X(j).  The estimate is as reliable as
+*          the estimate for RCOND, and is almost always a slight
+*          overestimate of the true error.
+*
+*  BERR    (output) DOUBLE PRECISION array, dimension (NRHS)
+*          The componentwise relative backward error of each solution
+*          vector X(j) (i.e., the smallest relative change in
+*          any element of A or B that makes X(j) an exact solution).
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (3*N)
+*
+*  IWORK   (workspace) INTEGER array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, and i is
+*                <= N:  the leading minor of order i of A is
+*                       not positive definite, so the factorization
+*                       could not be completed, and the solution has not
+*                       been computed. RCOND = 0 is returned.
+*                = N+1: U is nonsingular, but RCOND is less than machine
+*                       precision, meaning that the matrix is singular
+*                       to working precision.  Nevertheless, the
+*                       solution and error bounds are computed because
+*                       there are a number of situations where the
+*                       computed solution can be more accurate than the
+*                       value of RCOND would suggest.
+*
+*  Further Details
+*  ===============
+*
+*  The packed storage scheme is illustrated by the following example
+*  when N = 4, UPLO = 'U':
+*
+*  Two-dimensional storage of the symmetric matrix A:
+*
+*     a11 a12 a13 a14
+*         a22 a23 a24
+*             a33 a34     (aij = conjg(aji))
+*                 a44
+*
+*  Packed storage of the upper triangle of A:
+*
+*  AP = [ a11, a12, a22, a13, a23, a33, a14, a24, a34, a44 ]
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            EQUIL, NOFACT, RCEQU
+      INTEGER            I, INFEQU, J
+      DOUBLE PRECISION   AMAX, ANORM, BIGNUM, SCOND, SMAX, SMIN, SMLNUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DLAMCH, DLANSP
+      EXTERNAL           LSAME, DLAMCH, DLANSP
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DCOPY, DLACPY, DLAQSP, DPPCON, DPPEQU, DPPRFS,
+     $                   DPPTRF, DPPTRS, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+      NOFACT = LSAME( FACT, 'N' )
+      EQUIL = LSAME( FACT, 'E' )
+      IF( NOFACT .OR. EQUIL ) THEN
+         EQUED = 'N'
+         RCEQU = .FALSE.
+      ELSE
+         RCEQU = LSAME( EQUED, 'Y' )
+         SMLNUM = DLAMCH( 'Safe minimum' )
+         BIGNUM = ONE / SMLNUM
+      END IF
+*
+*     Test the input parameters.
+*
+      IF( .NOT.NOFACT .AND. .NOT.EQUIL .AND. .NOT.LSAME( FACT, 'F' ) )
+     $     THEN
+         INFO = -1
+      ELSE IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) )
+     $          THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LSAME( FACT, 'F' ) .AND. .NOT.
+     $         ( RCEQU .OR. LSAME( EQUED, 'N' ) ) ) THEN
+         INFO = -7
+      ELSE
+         IF( RCEQU ) THEN
+            SMIN = BIGNUM
+            SMAX = ZERO
+            DO 10 J = 1, N
+               SMIN = MIN( SMIN, S( J ) )
+               SMAX = MAX( SMAX, S( J ) )
+   10       CONTINUE
+            IF( SMIN.LE.ZERO ) THEN
+               INFO = -8
+            ELSE IF( N.GT.0 ) THEN
+               SCOND = MAX( SMIN, SMLNUM ) / MIN( SMAX, BIGNUM )
+            ELSE
+               SCOND = ONE
+            END IF
+         END IF
+         IF( INFO.EQ.0 ) THEN
+            IF( LDB.LT.MAX( 1, N ) ) THEN
+               INFO = -10
+            ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+               INFO = -12
+            END IF
+         END IF
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DPPSVX', -INFO )
+         RETURN
+      END IF
+*
+      IF( EQUIL ) THEN
+*
+*        Compute row and column scalings to equilibrate the matrix A.
+*
+         CALL DPPEQU( UPLO, N, AP, S, SCOND, AMAX, INFEQU )
+         IF( INFEQU.EQ.0 ) THEN
+*
+*           Equilibrate the matrix.
+*
+            CALL DLAQSP( UPLO, N, AP, S, SCOND, AMAX, EQUED )
+            RCEQU = LSAME( EQUED, 'Y' )
+         END IF
+      END IF
+*
+*     Scale the right-hand side.
+*
+      IF( RCEQU ) THEN
+         DO 30 J = 1, NRHS
+            DO 20 I = 1, N
+               B( I, J ) = S( I )*B( I, J )
+   20       CONTINUE
+   30    CONTINUE
+      END IF
+*
+      IF( NOFACT .OR. EQUIL ) THEN
+*
+*        Compute the Cholesky factorization A = U'*U or A = L*L'.
+*
+         CALL DCOPY( N*( N+1 ) / 2, AP, 1, AFP, 1 )
+         CALL DPPTRF( UPLO, N, AFP, INFO )
+*
+*        Return if INFO is non-zero.
+*
+         IF( INFO.NE.0 ) THEN
+            IF( INFO.GT.0 )
+     $         RCOND = ZERO
+            RETURN
+         END IF
+      END IF
+*
+*     Compute the norm of the matrix A.
+*
+      ANORM = DLANSP( 'I', UPLO, N, AP, WORK )
+*
+*     Compute the reciprocal of the condition number of A.
+*
+      CALL DPPCON( UPLO, N, AFP, ANORM, RCOND, WORK, IWORK, INFO )
+*
+*     Set INFO = N+1 if the matrix is singular to working precision.
+*
+      IF( RCOND.LT.DLAMCH( 'Epsilon' ) )
+     $   INFO = N + 1
+*
+*     Compute the solution matrix X.
+*
+      CALL DLACPY( 'Full', N, NRHS, B, LDB, X, LDX )
+      CALL DPPTRS( UPLO, N, NRHS, AFP, X, LDX, INFO )
+*
+*     Use iterative refinement to improve the computed solution and
+*     compute error bounds and backward error estimates for it.
+*
+      CALL DPPRFS( UPLO, N, NRHS, AP, AFP, B, LDB, X, LDX, FERR, BERR,
+     $             WORK, IWORK, INFO )
+*
+*     Transform the solution matrix X to a solution of the original
+*     system.
+*
+      IF( RCEQU ) THEN
+         DO 50 J = 1, NRHS
+            DO 40 I = 1, N
+               X( I, J ) = S( I )*X( I, J )
+   40       CONTINUE
+   50    CONTINUE
+         DO 60 J = 1, NRHS
+            FERR( J ) = FERR( J ) / SCOND
+   60    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of DPPSVX
+*
+      END
diff --git a/costa/native/external/lapack/dpptrf.f b/costa/native/external/lapack/dpptrf.f
new file mode 100644
index 000000000..79fb4d762
--- /dev/null
+++ b/costa/native/external/lapack/dpptrf.f
@@ -0,0 +1,178 @@
+      SUBROUTINE DPPTRF( UPLO, N, AP, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   AP( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DPPTRF computes the Cholesky factorization of a real symmetric
+*  positive definite matrix A stored in packed format.
+*
+*  The factorization has the form
+*     A = U**T * U,  if UPLO = 'U', or
+*     A = L  * L**T,  if UPLO = 'L',
+*  where U is an upper triangular matrix and L is lower triangular.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  AP      (input/output) DOUBLE PRECISION array, dimension (N*(N+1)/2)
+*          On entry, the upper or lower triangle of the symmetric matrix
+*          A, packed columnwise in a linear array.  The j-th column of A
+*          is stored in the array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n.
+*          See below for further details.
+*
+*          On exit, if INFO = 0, the triangular factor U or L from the
+*          Cholesky factorization A = U**T*U or A = L*L**T, in the same
+*          storage format as A.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, the leading minor of order i is not
+*                positive definite, and the factorization could not be
+*                completed.
+*
+*  Further Details
+*  ======= =======
+*
+*  The packed storage scheme is illustrated by the following example
+*  when N = 4, UPLO = 'U':
+*
+*  Two-dimensional storage of the symmetric matrix A:
+*
+*     a11 a12 a13 a14
+*         a22 a23 a24
+*             a33 a34     (aij = aji)
+*                 a44
+*
+*  Packed storage of the upper triangle of A:
+*
+*  AP = [ a11, a12, a22, a13, a23, a33, a14, a24, a34, a44 ]
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            J, JC, JJ
+      DOUBLE PRECISION   AJJ
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DDOT
+      EXTERNAL           LSAME, DDOT
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DSCAL, DSPR, DTPSV, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DPPTRF', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      IF( UPPER ) THEN
+*
+*        Compute the Cholesky factorization A = U'*U.
+*
+         JJ = 0
+         DO 10 J = 1, N
+            JC = JJ + 1
+            JJ = JJ + J
+*
+*           Compute elements 1:J-1 of column J.
+*
+            IF( J.GT.1 )
+     $         CALL DTPSV( 'Upper', 'Transpose', 'Non-unit', J-1, AP,
+     $                     AP( JC ), 1 )
+*
+*           Compute U(J,J) and test for non-positive-definiteness.
+*
+            AJJ = AP( JJ ) - DDOT( J-1, AP( JC ), 1, AP( JC ), 1 )
+            IF( AJJ.LE.ZERO ) THEN
+               AP( JJ ) = AJJ
+               GO TO 30
+            END IF
+            AP( JJ ) = SQRT( AJJ )
+   10    CONTINUE
+      ELSE
+*
+*        Compute the Cholesky factorization A = L*L'.
+*
+         JJ = 1
+         DO 20 J = 1, N
+*
+*           Compute L(J,J) and test for non-positive-definiteness.
+*
+            AJJ = AP( JJ )
+            IF( AJJ.LE.ZERO ) THEN
+               AP( JJ ) = AJJ
+               GO TO 30
+            END IF
+            AJJ = SQRT( AJJ )
+            AP( JJ ) = AJJ
+*
+*           Compute elements J+1:N of column J and update the trailing
+*           submatrix.
+*
+            IF( J.LT.N ) THEN
+               CALL DSCAL( N-J, ONE / AJJ, AP( JJ+1 ), 1 )
+               CALL DSPR( 'Lower', N-J, -ONE, AP( JJ+1 ), 1,
+     $                    AP( JJ+N-J+1 ) )
+               JJ = JJ + N - J + 1
+            END IF
+   20    CONTINUE
+      END IF
+      GO TO 40
+*
+   30 CONTINUE
+      INFO = J
+*
+   40 CONTINUE
+      RETURN
+*
+*     End of DPPTRF
+*
+      END
diff --git a/costa/native/external/lapack/dpptri.f b/costa/native/external/lapack/dpptri.f
new file mode 100644
index 000000000..f05bef4af
--- /dev/null
+++ b/costa/native/external/lapack/dpptri.f
@@ -0,0 +1,129 @@
+      SUBROUTINE DPPTRI( UPLO, N, AP, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   AP( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DPPTRI computes the inverse of a real symmetric positive definite
+*  matrix A using the Cholesky factorization A = U**T*U or A = L*L**T
+*  computed by DPPTRF.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangular factor is stored in AP;
+*          = 'L':  Lower triangular factor is stored in AP.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  AP      (input/output) DOUBLE PRECISION array, dimension (N*(N+1)/2)
+*          On entry, the triangular factor U or L from the Cholesky
+*          factorization A = U**T*U or A = L*L**T, packed columnwise as
+*          a linear array.  The j-th column of U or L is stored in the
+*          array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = U(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = L(i,j) for j<=i<=n.
+*
+*          On exit, the upper or lower triangle of the (symmetric)
+*          inverse of A, overwriting the input factor U or L.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, the (i,i) element of the factor U or L is
+*                zero, and the inverse could not be computed.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE
+      PARAMETER          ( ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            J, JC, JJ, JJN
+      DOUBLE PRECISION   AJJ
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DDOT
+      EXTERNAL           LSAME, DDOT
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DSCAL, DSPR, DTPMV, DTPTRI, XERBLA
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DPPTRI', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Invert the triangular Cholesky factor U or L.
+*
+      CALL DTPTRI( UPLO, 'Non-unit', N, AP, INFO )
+      IF( INFO.GT.0 )
+     $   RETURN
+*
+      IF( UPPER ) THEN
+*
+*        Compute the product inv(U) * inv(U)'.
+*
+         JJ = 0
+         DO 10 J = 1, N
+            JC = JJ + 1
+            JJ = JJ + J
+            IF( J.GT.1 )
+     $         CALL DSPR( 'Upper', J-1, ONE, AP( JC ), 1, AP )
+            AJJ = AP( JJ )
+            CALL DSCAL( J, AJJ, AP( JC ), 1 )
+   10    CONTINUE
+*
+      ELSE
+*
+*        Compute the product inv(L)' * inv(L).
+*
+         JJ = 1
+         DO 20 J = 1, N
+            JJN = JJ + N - J + 1
+            AP( JJ ) = DDOT( N-J+1, AP( JJ ), 1, AP( JJ ), 1 )
+            IF( J.LT.N )
+     $         CALL DTPMV( 'Lower', 'Transpose', 'Non-unit', N-J,
+     $                     AP( JJN ), AP( JJ+1 ), 1 )
+            JJ = JJN
+   20    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of DPPTRI
+*
+      END
diff --git a/costa/native/external/lapack/dpptrs.f b/costa/native/external/lapack/dpptrs.f
new file mode 100644
index 000000000..bcc8390ad
--- /dev/null
+++ b/costa/native/external/lapack/dpptrs.f
@@ -0,0 +1,135 @@
+      SUBROUTINE DPPTRS( UPLO, N, NRHS, AP, B, LDB, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDB, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   AP( * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DPPTRS solves a system of linear equations A*X = B with a symmetric
+*  positive definite matrix A in packed storage using the Cholesky
+*  factorization A = U**T*U or A = L*L**T computed by DPPTRF.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  AP      (input) DOUBLE PRECISION array, dimension (N*(N+1)/2)
+*          The triangular factor U or L from the Cholesky factorization
+*          A = U**T*U or A = L*L**T, packed columnwise in a linear
+*          array.  The j-th column of U or L is stored in the array AP
+*          as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = U(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = L(i,j) for j<=i<=n.
+*
+*  B       (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS)
+*          On entry, the right hand side matrix B.
+*          On exit, the solution matrix X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            I
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DTPSV, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -6
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DPPTRS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 )
+     $   RETURN
+*
+      IF( UPPER ) THEN
+*
+*        Solve A*X = B where A = U'*U.
+*
+         DO 10 I = 1, NRHS
+*
+*           Solve U'*X = B, overwriting B with X.
+*
+            CALL DTPSV( 'Upper', 'Transpose', 'Non-unit', N, AP,
+     $                  B( 1, I ), 1 )
+*
+*           Solve U*X = B, overwriting B with X.
+*
+            CALL DTPSV( 'Upper', 'No transpose', 'Non-unit', N, AP,
+     $                  B( 1, I ), 1 )
+   10    CONTINUE
+      ELSE
+*
+*        Solve A*X = B where A = L*L'.
+*
+         DO 20 I = 1, NRHS
+*
+*           Solve L*Y = B, overwriting B with X.
+*
+            CALL DTPSV( 'Lower', 'No transpose', 'Non-unit', N, AP,
+     $                  B( 1, I ), 1 )
+*
+*           Solve L'*X = Y, overwriting B with X.
+*
+            CALL DTPSV( 'Lower', 'Transpose', 'Non-unit', N, AP,
+     $                  B( 1, I ), 1 )
+   20    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of DPPTRS
+*
+      END
diff --git a/costa/native/external/lapack/dptcon.f b/costa/native/external/lapack/dptcon.f
new file mode 100644
index 000000000..291a4f758
--- /dev/null
+++ b/costa/native/external/lapack/dptcon.f
@@ -0,0 +1,150 @@
+      SUBROUTINE DPTCON( N, D, E, ANORM, RCOND, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, N
+      DOUBLE PRECISION   ANORM, RCOND
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   D( * ), E( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DPTCON computes the reciprocal of the condition number (in the
+*  1-norm) of a real symmetric positive definite tridiagonal matrix
+*  using the factorization A = L*D*L**T or A = U**T*D*U computed by
+*  DPTTRF.
+*
+*  Norm(inv(A)) is computed by a direct method, and the reciprocal of
+*  the condition number is computed as
+*               RCOND = 1 / (ANORM * norm(inv(A))).
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  D       (input) DOUBLE PRECISION array, dimension (N)
+*          The n diagonal elements of the diagonal matrix D from the
+*          factorization of A, as computed by DPTTRF.
+*
+*  E       (input) DOUBLE PRECISION array, dimension (N-1)
+*          The (n-1) off-diagonal elements of the unit bidiagonal factor
+*          U or L from the factorization of A,  as computed by DPTTRF.
+*
+*  ANORM   (input) DOUBLE PRECISION
+*          The 1-norm of the original matrix A.
+*
+*  RCOND   (output) DOUBLE PRECISION
+*          The reciprocal of the condition number of the matrix A,
+*          computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is the
+*          1-norm of inv(A) computed in this routine.
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Further Details
+*  ===============
+*
+*  The method used is described in Nicholas J. Higham, "Efficient
+*  Algorithms for Computing the Condition Number of a Tridiagonal
+*  Matrix", SIAM J. Sci. Stat. Comput., Vol. 7, No. 1, January 1986.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, IX
+      DOUBLE PRECISION   AINVNM
+*     ..
+*     .. External Functions ..
+      INTEGER            IDAMAX
+      EXTERNAL           IDAMAX
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments.
+*
+      INFO = 0
+      IF( N.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( ANORM.LT.ZERO ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DPTCON', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      RCOND = ZERO
+      IF( N.EQ.0 ) THEN
+         RCOND = ONE
+         RETURN
+      ELSE IF( ANORM.EQ.ZERO ) THEN
+         RETURN
+      END IF
+*
+*     Check that D(1:N) is positive.
+*
+      DO 10 I = 1, N
+         IF( D( I ).LE.ZERO )
+     $      RETURN
+   10 CONTINUE
+*
+*     Solve M(A) * x = e, where M(A) = (m(i,j)) is given by
+*
+*        m(i,j) =  abs(A(i,j)), i = j,
+*        m(i,j) = -abs(A(i,j)), i .ne. j,
+*
+*     and e = [ 1, 1, ..., 1 ]'.  Note M(A) = M(L)*D*M(L)'.
+*
+*     Solve M(L) * x = e.
+*
+      WORK( 1 ) = ONE
+      DO 20 I = 2, N
+         WORK( I ) = ONE + WORK( I-1 )*ABS( E( I-1 ) )
+   20 CONTINUE
+*
+*     Solve D * M(L)' * x = b.
+*
+      WORK( N ) = WORK( N ) / D( N )
+      DO 30 I = N - 1, 1, -1
+         WORK( I ) = WORK( I ) / D( I ) + WORK( I+1 )*ABS( E( I ) )
+   30 CONTINUE
+*
+*     Compute AINVNM = max(x(i)), 1<=i<=n.
+*
+      IX = IDAMAX( N, WORK, 1 )
+      AINVNM = ABS( WORK( IX ) )
+*
+*     Compute the reciprocal condition number.
+*
+      IF( AINVNM.NE.ZERO )
+     $   RCOND = ( ONE / AINVNM ) / ANORM
+*
+      RETURN
+*
+*     End of DPTCON
+*
+      END
diff --git a/costa/native/external/lapack/dpteqr.f b/costa/native/external/lapack/dpteqr.f
new file mode 100644
index 000000000..c73c1e6bb
--- /dev/null
+++ b/costa/native/external/lapack/dpteqr.f
@@ -0,0 +1,190 @@
+      SUBROUTINE DPTEQR( COMPZ, N, D, E, Z, LDZ, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          COMPZ
+      INTEGER            INFO, LDZ, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   D( * ), E( * ), WORK( * ), Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DPTEQR computes all eigenvalues and, optionally, eigenvectors of a
+*  symmetric positive definite tridiagonal matrix by first factoring the
+*  matrix using DPTTRF, and then calling DBDSQR to compute the singular
+*  values of the bidiagonal factor.
+*
+*  This routine computes the eigenvalues of the positive definite
+*  tridiagonal matrix to high relative accuracy.  This means that if the
+*  eigenvalues range over many orders of magnitude in size, then the
+*  small eigenvalues and corresponding eigenvectors will be computed
+*  more accurately than, for example, with the standard QR method.
+*
+*  The eigenvectors of a full or band symmetric positive definite matrix
+*  can also be found if DSYTRD, DSPTRD, or DSBTRD has been used to
+*  reduce this matrix to tridiagonal form. (The reduction to tridiagonal
+*  form, however, may preclude the possibility of obtaining high
+*  relative accuracy in the small eigenvalues of the original matrix, if
+*  these eigenvalues range over many orders of magnitude.)
+*
+*  Arguments
+*  =========
+*
+*  COMPZ   (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only.
+*          = 'V':  Compute eigenvectors of original symmetric
+*                  matrix also.  Array Z contains the orthogonal
+*                  matrix used to reduce the original matrix to
+*                  tridiagonal form.
+*          = 'I':  Compute eigenvectors of tridiagonal matrix also.
+*
+*  N       (input) INTEGER
+*          The order of the matrix.  N >= 0.
+*
+*  D       (input/output) DOUBLE PRECISION array, dimension (N)
+*          On entry, the n diagonal elements of the tridiagonal
+*          matrix.
+*          On normal exit, D contains the eigenvalues, in descending
+*          order.
+*
+*  E       (input/output) DOUBLE PRECISION array, dimension (N-1)
+*          On entry, the (n-1) subdiagonal elements of the tridiagonal
+*          matrix.
+*          On exit, E has been destroyed.
+*
+*  Z       (input/output) DOUBLE PRECISION array, dimension (LDZ, N)
+*          On entry, if COMPZ = 'V', the orthogonal matrix used in the
+*          reduction to tridiagonal form.
+*          On exit, if COMPZ = 'V', the orthonormal eigenvectors of the
+*          original symmetric matrix;
+*          if COMPZ = 'I', the orthonormal eigenvectors of the
+*          tridiagonal matrix.
+*          If INFO > 0 on exit, Z contains the eigenvectors associated
+*          with only the stored eigenvalues.
+*          If  COMPZ = 'N', then Z is not referenced.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= 1, and if
+*          COMPZ = 'V' or 'I', LDZ >= max(1,N).
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (4*N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          > 0:  if INFO = i, and i is:
+*                <= N  the Cholesky factorization of the matrix could
+*                      not be performed because the i-th principal minor
+*                      was not positive definite.
+*                > N   the SVD algorithm failed to converge;
+*                      if INFO = N+i, i off-diagonal elements of the
+*                      bidiagonal factor did not converge to zero.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0 )
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DBDSQR, DLASET, DPTTRF, XERBLA
+*     ..
+*     .. Local Arrays ..
+      DOUBLE PRECISION   C( 1, 1 ), VT( 1, 1 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, ICOMPZ, NRU
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+*
+      IF( LSAME( COMPZ, 'N' ) ) THEN
+         ICOMPZ = 0
+      ELSE IF( LSAME( COMPZ, 'V' ) ) THEN
+         ICOMPZ = 1
+      ELSE IF( LSAME( COMPZ, 'I' ) ) THEN
+         ICOMPZ = 2
+      ELSE
+         ICOMPZ = -1
+      END IF
+      IF( ICOMPZ.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( ( LDZ.LT.1 ) .OR. ( ICOMPZ.GT.0 .AND. LDZ.LT.MAX( 1,
+     $         N ) ) ) THEN
+         INFO = -6
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DPTEQR', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      IF( N.EQ.1 ) THEN
+         IF( ICOMPZ.GT.0 )
+     $      Z( 1, 1 ) = ONE
+         RETURN
+      END IF
+      IF( ICOMPZ.EQ.2 )
+     $   CALL DLASET( 'Full', N, N, ZERO, ONE, Z, LDZ )
+*
+*     Call DPTTRF to factor the matrix.
+*
+      CALL DPTTRF( N, D, E, INFO )
+      IF( INFO.NE.0 )
+     $   RETURN
+      DO 10 I = 1, N
+         D( I ) = SQRT( D( I ) )
+   10 CONTINUE
+      DO 20 I = 1, N - 1
+         E( I ) = E( I )*D( I )
+   20 CONTINUE
+*
+*     Call DBDSQR to compute the singular values/vectors of the
+*     bidiagonal factor.
+*
+      IF( ICOMPZ.GT.0 ) THEN
+         NRU = N
+      ELSE
+         NRU = 0
+      END IF
+      CALL DBDSQR( 'Lower', N, 0, NRU, 0, D, E, VT, 1, Z, LDZ, C, 1,
+     $             WORK, INFO )
+*
+*     Square the singular values.
+*
+      IF( INFO.EQ.0 ) THEN
+         DO 30 I = 1, N
+            D( I ) = D( I )*D( I )
+   30    CONTINUE
+      ELSE
+         INFO = N + INFO
+      END IF
+*
+      RETURN
+*
+*     End of DPTEQR
+*
+      END
diff --git a/costa/native/external/lapack/dptrfs.f b/costa/native/external/lapack/dptrfs.f
new file mode 100644
index 000000000..21b555347
--- /dev/null
+++ b/costa/native/external/lapack/dptrfs.f
@@ -0,0 +1,302 @@
+      SUBROUTINE DPTRFS( N, NRHS, D, E, DF, EF, B, LDB, X, LDX, FERR,
+     $                   BERR, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDB, LDX, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   B( LDB, * ), BERR( * ), D( * ), DF( * ),
+     $                   E( * ), EF( * ), FERR( * ), WORK( * ),
+     $                   X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DPTRFS improves the computed solution to a system of linear
+*  equations when the coefficient matrix is symmetric positive definite
+*  and tridiagonal, and provides error bounds and backward error
+*  estimates for the solution.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  D       (input) DOUBLE PRECISION array, dimension (N)
+*          The n diagonal elements of the tridiagonal matrix A.
+*
+*  E       (input) DOUBLE PRECISION array, dimension (N-1)
+*          The (n-1) subdiagonal elements of the tridiagonal matrix A.
+*
+*  DF      (input) DOUBLE PRECISION array, dimension (N)
+*          The n diagonal elements of the diagonal matrix D from the
+*          factorization computed by DPTTRF.
+*
+*  EF      (input) DOUBLE PRECISION array, dimension (N-1)
+*          The (n-1) subdiagonal elements of the unit bidiagonal factor
+*          L from the factorization computed by DPTTRF.
+*
+*  B       (input) DOUBLE PRECISION array, dimension (LDB,NRHS)
+*          The right hand side matrix B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  X       (input/output) DOUBLE PRECISION array, dimension (LDX,NRHS)
+*          On entry, the solution matrix X, as computed by DPTTRS.
+*          On exit, the improved solution matrix X.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(1,N).
+*
+*  FERR    (output) DOUBLE PRECISION array, dimension (NRHS)
+*          The forward error bound for each solution vector
+*          X(j) (the j-th column of the solution matrix X).
+*          If XTRUE is the true solution corresponding to X(j), FERR(j)
+*          is an estimated upper bound for the magnitude of the largest
+*          element in (X(j) - XTRUE) divided by the magnitude of the
+*          largest element in X(j).
+*
+*  BERR    (output) DOUBLE PRECISION array, dimension (NRHS)
+*          The componentwise relative backward error of each solution
+*          vector X(j) (i.e., the smallest relative change in
+*          any element of A or B that makes X(j) an exact solution).
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (2*N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Internal Parameters
+*  ===================
+*
+*  ITMAX is the maximum number of steps of iterative refinement.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            ITMAX
+      PARAMETER          ( ITMAX = 5 )
+      DOUBLE PRECISION   ZERO
+      PARAMETER          ( ZERO = 0.0D+0 )
+      DOUBLE PRECISION   ONE
+      PARAMETER          ( ONE = 1.0D+0 )
+      DOUBLE PRECISION   TWO
+      PARAMETER          ( TWO = 2.0D+0 )
+      DOUBLE PRECISION   THREE
+      PARAMETER          ( THREE = 3.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            COUNT, I, IX, J, NZ
+      DOUBLE PRECISION   BI, CX, DX, EPS, EX, LSTRES, S, SAFE1, SAFE2,
+     $                   SAFMIN
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DAXPY, DPTTRS, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX
+*     ..
+*     .. External Functions ..
+      INTEGER            IDAMAX
+      DOUBLE PRECISION   DLAMCH
+      EXTERNAL           IDAMAX, DLAMCH
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( N.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -8
+      ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+         INFO = -10
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DPTRFS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN
+         DO 10 J = 1, NRHS
+            FERR( J ) = ZERO
+            BERR( J ) = ZERO
+   10    CONTINUE
+         RETURN
+      END IF
+*
+*     NZ = maximum number of nonzero elements in each row of A, plus 1
+*
+      NZ = 4
+      EPS = DLAMCH( 'Epsilon' )
+      SAFMIN = DLAMCH( 'Safe minimum' )
+      SAFE1 = NZ*SAFMIN
+      SAFE2 = SAFE1 / EPS
+*
+*     Do for each right hand side
+*
+      DO 90 J = 1, NRHS
+*
+         COUNT = 1
+         LSTRES = THREE
+   20    CONTINUE
+*
+*        Loop until stopping criterion is satisfied.
+*
+*        Compute residual R = B - A * X.  Also compute
+*        abs(A)*abs(x) + abs(b) for use in the backward error bound.
+*
+         IF( N.EQ.1 ) THEN
+            BI = B( 1, J )
+            DX = D( 1 )*X( 1, J )
+            WORK( N+1 ) = BI - DX
+            WORK( 1 ) = ABS( BI ) + ABS( DX )
+         ELSE
+            BI = B( 1, J )
+            DX = D( 1 )*X( 1, J )
+            EX = E( 1 )*X( 2, J )
+            WORK( N+1 ) = BI - DX - EX
+            WORK( 1 ) = ABS( BI ) + ABS( DX ) + ABS( EX )
+            DO 30 I = 2, N - 1
+               BI = B( I, J )
+               CX = E( I-1 )*X( I-1, J )
+               DX = D( I )*X( I, J )
+               EX = E( I )*X( I+1, J )
+               WORK( N+I ) = BI - CX - DX - EX
+               WORK( I ) = ABS( BI ) + ABS( CX ) + ABS( DX ) + ABS( EX )
+   30       CONTINUE
+            BI = B( N, J )
+            CX = E( N-1 )*X( N-1, J )
+            DX = D( N )*X( N, J )
+            WORK( N+N ) = BI - CX - DX
+            WORK( N ) = ABS( BI ) + ABS( CX ) + ABS( DX )
+         END IF
+*
+*        Compute componentwise relative backward error from formula
+*
+*        max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) )
+*
+*        where abs(Z) is the componentwise absolute value of the matrix
+*        or vector Z.  If the i-th component of the denominator is less
+*        than SAFE2, then SAFE1 is added to the i-th components of the
+*        numerator and denominator before dividing.
+*
+         S = ZERO
+         DO 40 I = 1, N
+            IF( WORK( I ).GT.SAFE2 ) THEN
+               S = MAX( S, ABS( WORK( N+I ) ) / WORK( I ) )
+            ELSE
+               S = MAX( S, ( ABS( WORK( N+I ) )+SAFE1 ) /
+     $             ( WORK( I )+SAFE1 ) )
+            END IF
+   40    CONTINUE
+         BERR( J ) = S
+*
+*        Test stopping criterion. Continue iterating if
+*           1) The residual BERR(J) is larger than machine epsilon, and
+*           2) BERR(J) decreased by at least a factor of 2 during the
+*              last iteration, and
+*           3) At most ITMAX iterations tried.
+*
+         IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND.
+     $       COUNT.LE.ITMAX ) THEN
+*
+*           Update solution and try again.
+*
+            CALL DPTTRS( N, 1, DF, EF, WORK( N+1 ), N, INFO )
+            CALL DAXPY( N, ONE, WORK( N+1 ), 1, X( 1, J ), 1 )
+            LSTRES = BERR( J )
+            COUNT = COUNT + 1
+            GO TO 20
+         END IF
+*
+*        Bound error from formula
+*
+*        norm(X - XTRUE) / norm(X) .le. FERR =
+*        norm( abs(inv(A))*
+*           ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X)
+*
+*        where
+*          norm(Z) is the magnitude of the largest component of Z
+*          inv(A) is the inverse of A
+*          abs(Z) is the componentwise absolute value of the matrix or
+*             vector Z
+*          NZ is the maximum number of nonzeros in any row of A, plus 1
+*          EPS is machine epsilon
+*
+*        The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B))
+*        is incremented by SAFE1 if the i-th component of
+*        abs(A)*abs(X) + abs(B) is less than SAFE2.
+*
+         DO 50 I = 1, N
+            IF( WORK( I ).GT.SAFE2 ) THEN
+               WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I )
+            ELSE
+               WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + SAFE1
+            END IF
+   50    CONTINUE
+         IX = IDAMAX( N, WORK, 1 )
+         FERR( J ) = WORK( IX )
+*
+*        Estimate the norm of inv(A).
+*
+*        Solve M(A) * x = e, where M(A) = (m(i,j)) is given by
+*
+*           m(i,j) =  abs(A(i,j)), i = j,
+*           m(i,j) = -abs(A(i,j)), i .ne. j,
+*
+*        and e = [ 1, 1, ..., 1 ]'.  Note M(A) = M(L)*D*M(L)'.
+*
+*        Solve M(L) * x = e.
+*
+         WORK( 1 ) = ONE
+         DO 60 I = 2, N
+            WORK( I ) = ONE + WORK( I-1 )*ABS( EF( I-1 ) )
+   60    CONTINUE
+*
+*        Solve D * M(L)' * x = b.
+*
+         WORK( N ) = WORK( N ) / DF( N )
+         DO 70 I = N - 1, 1, -1
+            WORK( I ) = WORK( I ) / DF( I ) + WORK( I+1 )*ABS( EF( I ) )
+   70    CONTINUE
+*
+*        Compute norm(inv(A)) = max(x(i)), 1<=i<=n.
+*
+         IX = IDAMAX( N, WORK, 1 )
+         FERR( J ) = FERR( J )*ABS( WORK( IX ) )
+*
+*        Normalize error.
+*
+         LSTRES = ZERO
+         DO 80 I = 1, N
+            LSTRES = MAX( LSTRES, ABS( X( I, J ) ) )
+   80    CONTINUE
+         IF( LSTRES.NE.ZERO )
+     $      FERR( J ) = FERR( J ) / LSTRES
+*
+   90 CONTINUE
+*
+      RETURN
+*
+*     End of DPTRFS
+*
+      END
diff --git a/costa/native/external/lapack/dptsv.f b/costa/native/external/lapack/dptsv.f
new file mode 100644
index 000000000..8ec8d7773
--- /dev/null
+++ b/costa/native/external/lapack/dptsv.f
@@ -0,0 +1,100 @@
+      SUBROUTINE DPTSV( N, NRHS, D, E, B, LDB, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 25, 1997
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDB, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   B( LDB, * ), D( * ), E( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DPTSV computes the solution to a real system of linear equations
+*  A*X = B, where A is an N-by-N symmetric positive definite tridiagonal
+*  matrix, and X and B are N-by-NRHS matrices.
+*
+*  A is factored as A = L*D*L**T, and the factored form of A is then
+*  used to solve the system of equations.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  D       (input/output) DOUBLE PRECISION array, dimension (N)
+*          On entry, the n diagonal elements of the tridiagonal matrix
+*          A.  On exit, the n diagonal elements of the diagonal matrix
+*          D from the factorization A = L*D*L**T.
+*
+*  E       (input/output) DOUBLE PRECISION array, dimension (N-1)
+*          On entry, the (n-1) subdiagonal elements of the tridiagonal
+*          matrix A.  On exit, the (n-1) subdiagonal elements of the
+*          unit bidiagonal factor L from the L*D*L**T factorization of
+*          A.  (E can also be regarded as the superdiagonal of the unit
+*          bidiagonal factor U from the U**T*D*U factorization of A.)
+*
+*  B       (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS)
+*          On entry, the N-by-NRHS right hand side matrix B.
+*          On exit, if INFO = 0, the N-by-NRHS solution matrix X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, the leading minor of order i is not
+*                positive definite, and the solution has not been
+*                computed.  The factorization has not been completed
+*                unless i = N.
+*
+*  =====================================================================
+*
+*     .. External Subroutines ..
+      EXTERNAL           DPTTRF, DPTTRS, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( N.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -6
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DPTSV ', -INFO )
+         RETURN
+      END IF
+*
+*     Compute the L*D*L' (or U'*D*U) factorization of A.
+*
+      CALL DPTTRF( N, D, E, INFO )
+      IF( INFO.EQ.0 ) THEN
+*
+*        Solve the system A*X = B, overwriting B with X.
+*
+         CALL DPTTRS( N, NRHS, D, E, B, LDB, INFO )
+      END IF
+      RETURN
+*
+*     End of DPTSV
+*
+      END
diff --git a/costa/native/external/lapack/dptsvx.f b/costa/native/external/lapack/dptsvx.f
new file mode 100644
index 000000000..c6415e79a
--- /dev/null
+++ b/costa/native/external/lapack/dptsvx.f
@@ -0,0 +1,235 @@
+      SUBROUTINE DPTSVX( FACT, N, NRHS, D, E, DF, EF, B, LDB, X, LDX,
+     $                   RCOND, FERR, BERR, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          FACT
+      INTEGER            INFO, LDB, LDX, N, NRHS
+      DOUBLE PRECISION   RCOND
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   B( LDB, * ), BERR( * ), D( * ), DF( * ),
+     $                   E( * ), EF( * ), FERR( * ), WORK( * ),
+     $                   X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DPTSVX uses the factorization A = L*D*L**T to compute the solution
+*  to a real system of linear equations A*X = B, where A is an N-by-N
+*  symmetric positive definite tridiagonal matrix and X and B are
+*  N-by-NRHS matrices.
+*
+*  Error bounds on the solution and a condition estimate are also
+*  provided.
+*
+*  Description
+*  ===========
+*
+*  The following steps are performed:
+*
+*  1. If FACT = 'N', the matrix A is factored as A = L*D*L**T, where L
+*     is a unit lower bidiagonal matrix and D is diagonal.  The
+*     factorization can also be regarded as having the form
+*     A = U**T*D*U.
+*
+*  2. If the leading i-by-i principal minor is not positive definite,
+*     then the routine returns with INFO = i. Otherwise, the factored
+*     form of A is used to estimate the condition number of the matrix
+*     A.  If the reciprocal of the condition number is less than machine
+*     precision, INFO = N+1 is returned as a warning, but the routine
+*     still goes on to solve for X and compute error bounds as
+*     described below.
+*
+*  3. The system of equations is solved for X using the factored form
+*     of A.
+*
+*  4. Iterative refinement is applied to improve the computed solution
+*     matrix and calculate error bounds and backward error estimates
+*     for it.
+*
+*  Arguments
+*  =========
+*
+*  FACT    (input) CHARACTER*1
+*          Specifies whether or not the factored form of A has been
+*          supplied on entry.
+*          = 'F':  On entry, DF and EF contain the factored form of A.
+*                  D, E, DF, and EF will not be modified.
+*          = 'N':  The matrix A will be copied to DF and EF and
+*                  factored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrices B and X.  NRHS >= 0.
+*
+*  D       (input) DOUBLE PRECISION array, dimension (N)
+*          The n diagonal elements of the tridiagonal matrix A.
+*
+*  E       (input) DOUBLE PRECISION array, dimension (N-1)
+*          The (n-1) subdiagonal elements of the tridiagonal matrix A.
+*
+*  DF      (input or output) DOUBLE PRECISION array, dimension (N)
+*          If FACT = 'F', then DF is an input argument and on entry
+*          contains the n diagonal elements of the diagonal matrix D
+*          from the L*D*L**T factorization of A.
+*          If FACT = 'N', then DF is an output argument and on exit
+*          contains the n diagonal elements of the diagonal matrix D
+*          from the L*D*L**T factorization of A.
+*
+*  EF      (input or output) DOUBLE PRECISION array, dimension (N-1)
+*          If FACT = 'F', then EF is an input argument and on entry
+*          contains the (n-1) subdiagonal elements of the unit
+*          bidiagonal factor L from the L*D*L**T factorization of A.
+*          If FACT = 'N', then EF is an output argument and on exit
+*          contains the (n-1) subdiagonal elements of the unit
+*          bidiagonal factor L from the L*D*L**T factorization of A.
+*
+*  B       (input) DOUBLE PRECISION array, dimension (LDB,NRHS)
+*          The N-by-NRHS right hand side matrix B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  X       (output) DOUBLE PRECISION array, dimension (LDX,NRHS)
+*          If INFO = 0 of INFO = N+1, the N-by-NRHS solution matrix X.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(1,N).
+*
+*  RCOND   (output) DOUBLE PRECISION
+*          The reciprocal condition number of the matrix A.  If RCOND
+*          is less than the machine precision (in particular, if
+*          RCOND = 0), the matrix is singular to working precision.
+*          This condition is indicated by a return code of INFO > 0.
+*
+*  FERR    (output) DOUBLE PRECISION array, dimension (NRHS)
+*          The forward error bound for each solution vector
+*          X(j) (the j-th column of the solution matrix X).
+*          If XTRUE is the true solution corresponding to X(j), FERR(j)
+*          is an estimated upper bound for the magnitude of the largest
+*          element in (X(j) - XTRUE) divided by the magnitude of the
+*          largest element in X(j).
+*
+*  BERR    (output) DOUBLE PRECISION array, dimension (NRHS)
+*          The componentwise relative backward error of each solution
+*          vector X(j) (i.e., the smallest relative change in any
+*          element of A or B that makes X(j) an exact solution).
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (2*N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, and i is
+*                <= N:  the leading minor of order i of A is
+*                       not positive definite, so the factorization
+*                       could not be completed, and the solution has not
+*                       been computed. RCOND = 0 is returned.
+*                = N+1: U is nonsingular, but RCOND is less than machine
+*                       precision, meaning that the matrix is singular
+*                       to working precision.  Nevertheless, the
+*                       solution and error bounds are computed because
+*                       there are a number of situations where the
+*                       computed solution can be more accurate than the
+*                       value of RCOND would suggest.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO
+      PARAMETER          ( ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOFACT
+      DOUBLE PRECISION   ANORM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DLAMCH, DLANST
+      EXTERNAL           LSAME, DLAMCH, DLANST
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DCOPY, DLACPY, DPTCON, DPTRFS, DPTTRF, DPTTRS,
+     $                   XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      NOFACT = LSAME( FACT, 'N' )
+      IF( .NOT.NOFACT .AND. .NOT.LSAME( FACT, 'F' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -9
+      ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+         INFO = -11
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DPTSVX', -INFO )
+         RETURN
+      END IF
+*
+      IF( NOFACT ) THEN
+*
+*        Compute the L*D*L' (or U'*D*U) factorization of A.
+*
+         CALL DCOPY( N, D, 1, DF, 1 )
+         IF( N.GT.1 )
+     $      CALL DCOPY( N-1, E, 1, EF, 1 )
+         CALL DPTTRF( N, DF, EF, INFO )
+*
+*        Return if INFO is non-zero.
+*
+         IF( INFO.NE.0 ) THEN
+            IF( INFO.GT.0 )
+     $         RCOND = ZERO
+            RETURN
+         END IF
+      END IF
+*
+*     Compute the norm of the matrix A.
+*
+      ANORM = DLANST( '1', N, D, E )
+*
+*     Compute the reciprocal of the condition number of A.
+*
+      CALL DPTCON( N, DF, EF, ANORM, RCOND, WORK, INFO )
+*
+*     Set INFO = N+1 if the matrix is singular to working precision.
+*
+      IF( RCOND.LT.DLAMCH( 'Epsilon' ) )
+     $   INFO = N + 1
+*
+*     Compute the solution vectors X.
+*
+      CALL DLACPY( 'Full', N, NRHS, B, LDB, X, LDX )
+      CALL DPTTRS( N, NRHS, DF, EF, X, LDX, INFO )
+*
+*     Use iterative refinement to improve the computed solutions and
+*     compute error bounds and backward error estimates for them.
+*
+      CALL DPTRFS( N, NRHS, D, E, DF, EF, B, LDB, X, LDX, FERR, BERR,
+     $             WORK, INFO )
+*
+      RETURN
+*
+*     End of DPTSVX
+*
+      END
diff --git a/costa/native/external/lapack/dpttrf.f b/costa/native/external/lapack/dpttrf.f
new file mode 100644
index 000000000..cb814a3d5
--- /dev/null
+++ b/costa/native/external/lapack/dpttrf.f
@@ -0,0 +1,153 @@
+      SUBROUTINE DPTTRF( N, D, E, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   D( * ), E( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DPTTRF computes the L*D*L' factorization of a real symmetric
+*  positive definite tridiagonal matrix A.  The factorization may also
+*  be regarded as having the form A = U'*D*U.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  D       (input/output) DOUBLE PRECISION array, dimension (N)
+*          On entry, the n diagonal elements of the tridiagonal matrix
+*          A.  On exit, the n diagonal elements of the diagonal matrix
+*          D from the L*D*L' factorization of A.
+*
+*  E       (input/output) DOUBLE PRECISION array, dimension (N-1)
+*          On entry, the (n-1) subdiagonal elements of the tridiagonal
+*          matrix A.  On exit, the (n-1) subdiagonal elements of the
+*          unit bidiagonal factor L from the L*D*L' factorization of A.
+*          E can also be regarded as the superdiagonal of the unit
+*          bidiagonal factor U from the U'*D*U factorization of A.
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -k, the k-th argument had an illegal value
+*          > 0: if INFO = k, the leading minor of order k is not
+*               positive definite; if k < N, the factorization could not
+*               be completed, while if k = N, the factorization was
+*               completed, but D(N) = 0.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO
+      PARAMETER          ( ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, I4
+      DOUBLE PRECISION   EI
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MOD
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( N.LT.0 ) THEN
+         INFO = -1
+         CALL XERBLA( 'DPTTRF', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Compute the L*D*L' (or U'*D*U) factorization of A.
+*
+      I4 = MOD( N-1, 4 )
+      DO 10 I = 1, I4
+         IF( D( I ).LE.ZERO ) THEN
+            INFO = I
+            GO TO 30
+         END IF
+         EI = E( I )
+         E( I ) = EI / D( I )
+         D( I+1 ) = D( I+1 ) - E( I )*EI
+   10 CONTINUE
+*
+      DO 20 I = I4 + 1, N - 4, 4
+*
+*        Drop out of the loop if d(i) <= 0: the matrix is not positive
+*        definite.
+*
+         IF( D( I ).LE.ZERO ) THEN
+            INFO = I
+            GO TO 30
+         END IF
+*
+*        Solve for e(i) and d(i+1).
+*
+         EI = E( I )
+         E( I ) = EI / D( I )
+         D( I+1 ) = D( I+1 ) - E( I )*EI
+*
+         IF( D( I+1 ).LE.ZERO ) THEN
+            INFO = I + 1
+            GO TO 30
+         END IF
+*
+*        Solve for e(i+1) and d(i+2).
+*
+         EI = E( I+1 )
+         E( I+1 ) = EI / D( I+1 )
+         D( I+2 ) = D( I+2 ) - E( I+1 )*EI
+*
+         IF( D( I+2 ).LE.ZERO ) THEN
+            INFO = I + 2
+            GO TO 30
+         END IF
+*
+*        Solve for e(i+2) and d(i+3).
+*
+         EI = E( I+2 )
+         E( I+2 ) = EI / D( I+2 )
+         D( I+3 ) = D( I+3 ) - E( I+2 )*EI
+*
+         IF( D( I+3 ).LE.ZERO ) THEN
+            INFO = I + 3
+            GO TO 30
+         END IF
+*
+*        Solve for e(i+3) and d(i+4).
+*
+         EI = E( I+3 )
+         E( I+3 ) = EI / D( I+3 )
+         D( I+4 ) = D( I+4 ) - E( I+3 )*EI
+   20 CONTINUE
+*
+*     Check d(n) for positive definiteness.
+*
+      IF( D( N ).LE.ZERO )
+     $   INFO = N
+*
+   30 CONTINUE
+      RETURN
+*
+*     End of DPTTRF
+*
+      END
diff --git a/costa/native/external/lapack/dpttrs.f b/costa/native/external/lapack/dpttrs.f
new file mode 100644
index 000000000..d25d20c7a
--- /dev/null
+++ b/costa/native/external/lapack/dpttrs.f
@@ -0,0 +1,115 @@
+      SUBROUTINE DPTTRS( N, NRHS, D, E, B, LDB, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDB, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   B( LDB, * ), D( * ), E( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DPTTRS solves a tridiagonal system of the form
+*     A * X = B
+*  using the L*D*L' factorization of A computed by DPTTRF.  D is a
+*  diagonal matrix specified in the vector D, L is a unit bidiagonal
+*  matrix whose subdiagonal is specified in the vector E, and X and B
+*  are N by NRHS matrices.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The order of the tridiagonal matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  D       (input) DOUBLE PRECISION array, dimension (N)
+*          The n diagonal elements of the diagonal matrix D from the
+*          L*D*L' factorization of A.
+*
+*  E       (input) DOUBLE PRECISION array, dimension (N-1)
+*          The (n-1) subdiagonal elements of the unit bidiagonal factor
+*          L from the L*D*L' factorization of A.  E can also be regarded
+*          as the superdiagonal of the unit bidiagonal factor U from the
+*          factorization A = U'*D*U.
+*
+*  B       (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS)
+*          On entry, the right hand side vectors B for the system of
+*          linear equations.
+*          On exit, the solution vectors, X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -k, the k-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      INTEGER            J, JB, NB
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DPTTS2, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments.
+*
+      INFO = 0
+      IF( N.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -6
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DPTTRS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 )
+     $   RETURN
+*
+*     Determine the number of right-hand sides to solve at a time.
+*
+      IF( NRHS.EQ.1 ) THEN
+         NB = 1
+      ELSE
+         NB = MAX( 1, ILAENV( 1, 'DPTTRS', ' ', N, NRHS, -1, -1 ) )
+      END IF
+*
+      IF( NB.GE.NRHS ) THEN
+         CALL DPTTS2( N, NRHS, D, E, B, LDB )
+      ELSE
+         DO 10 J = 1, NRHS, NB
+            JB = MIN( NRHS-J+1, NB )
+            CALL DPTTS2( N, JB, D, E, B( 1, J ), LDB )
+   10    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of DPTTRS
+*
+      END
diff --git a/costa/native/external/lapack/dptts2.f b/costa/native/external/lapack/dptts2.f
new file mode 100644
index 000000000..b56ad7828
--- /dev/null
+++ b/costa/native/external/lapack/dptts2.f
@@ -0,0 +1,94 @@
+      SUBROUTINE DPTTS2( N, NRHS, D, E, B, LDB )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            LDB, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   B( LDB, * ), D( * ), E( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DPTTS2 solves a tridiagonal system of the form
+*     A * X = B
+*  using the L*D*L' factorization of A computed by DPTTRF.  D is a
+*  diagonal matrix specified in the vector D, L is a unit bidiagonal
+*  matrix whose subdiagonal is specified in the vector E, and X and B
+*  are N by NRHS matrices.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The order of the tridiagonal matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  D       (input) DOUBLE PRECISION array, dimension (N)
+*          The n diagonal elements of the diagonal matrix D from the
+*          L*D*L' factorization of A.
+*
+*  E       (input) DOUBLE PRECISION array, dimension (N-1)
+*          The (n-1) subdiagonal elements of the unit bidiagonal factor
+*          L from the L*D*L' factorization of A.  E can also be regarded
+*          as the superdiagonal of the unit bidiagonal factor U from the
+*          factorization A = U'*D*U.
+*
+*  B       (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS)
+*          On entry, the right hand side vectors B for the system of
+*          linear equations.
+*          On exit, the solution vectors, X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      INTEGER            I, J
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DSCAL
+*     ..
+*     .. Executable Statements ..
+*
+*     Quick return if possible
+*
+      IF( N.LE.1 ) THEN
+         IF( N.EQ.1 )
+     $      CALL DSCAL( NRHS, 1.D0 / D( 1 ), B, LDB )
+         RETURN
+      END IF
+*
+*     Solve A * X = B using the factorization A = L*D*L',
+*     overwriting each right hand side vector with its solution.
+*
+      DO 30 J = 1, NRHS
+*
+*           Solve L * x = b.
+*
+         DO 10 I = 2, N
+            B( I, J ) = B( I, J ) - B( I-1, J )*E( I-1 )
+   10    CONTINUE
+*
+*           Solve D * L' * x = b.
+*
+         B( N, J ) = B( N, J ) / D( N )
+         DO 20 I = N - 1, 1, -1
+            B( I, J ) = B( I, J ) / D( I ) - B( I+1, J )*E( I )
+   20    CONTINUE
+   30 CONTINUE
+*
+      RETURN
+*
+*     End of DPTTS2
+*
+      END
diff --git a/costa/native/external/lapack/drscl.f b/costa/native/external/lapack/drscl.f
new file mode 100644
index 000000000..d268c231f
--- /dev/null
+++ b/costa/native/external/lapack/drscl.f
@@ -0,0 +1,115 @@
+      SUBROUTINE DRSCL( N, SA, SX, INCX )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      INTEGER            INCX, N
+      DOUBLE PRECISION   SA
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   SX( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DRSCL multiplies an n-element real vector x by the real scalar 1/a.
+*  This is done without overflow or underflow as long as
+*  the final result x/a does not overflow or underflow.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The number of components of the vector x.
+*
+*  SA      (input) DOUBLE PRECISION
+*          The scalar a which is used to divide each component of x.
+*          SA must be >= 0, or the subroutine will divide by zero.
+*
+*  SX      (input/output) DOUBLE PRECISION array, dimension
+*                         (1+(N-1)*abs(INCX))
+*          The n-element vector x.
+*
+*  INCX    (input) INTEGER
+*          The increment between successive values of the vector SX.
+*          > 0:  SX(1) = X(1) and SX(1+(i-1)*INCX) = x(i),     1< i<= n
+*
+* =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            DONE
+      DOUBLE PRECISION   BIGNUM, CDEN, CDEN1, CNUM, CNUM1, MUL, SMLNUM
+*     ..
+*     .. External Functions ..
+      DOUBLE PRECISION   DLAMCH
+      EXTERNAL           DLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DSCAL
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS
+*     ..
+*     .. Executable Statements ..
+*
+*     Quick return if possible
+*
+      IF( N.LE.0 )
+     $   RETURN
+*
+*     Get machine parameters
+*
+      SMLNUM = DLAMCH( 'S' )
+      BIGNUM = ONE / SMLNUM
+      CALL DLABAD( SMLNUM, BIGNUM )
+*
+*     Initialize the denominator to SA and the numerator to 1.
+*
+      CDEN = SA
+      CNUM = ONE
+*
+   10 CONTINUE
+      CDEN1 = CDEN*SMLNUM
+      CNUM1 = CNUM / BIGNUM
+      IF( ABS( CDEN1 ).GT.ABS( CNUM ) .AND. CNUM.NE.ZERO ) THEN
+*
+*        Pre-multiply X by SMLNUM if CDEN is large compared to CNUM.
+*
+         MUL = SMLNUM
+         DONE = .FALSE.
+         CDEN = CDEN1
+      ELSE IF( ABS( CNUM1 ).GT.ABS( CDEN ) ) THEN
+*
+*        Pre-multiply X by BIGNUM if CDEN is small compared to CNUM.
+*
+         MUL = BIGNUM
+         DONE = .FALSE.
+         CNUM = CNUM1
+      ELSE
+*
+*        Multiply X by CNUM / CDEN and return.
+*
+         MUL = CNUM / CDEN
+         DONE = .TRUE.
+      END IF
+*
+*     Scale the vector X by MUL
+*
+      CALL DSCAL( N, MUL, SX, INCX )
+*
+      IF( .NOT.DONE )
+     $   GO TO 10
+*
+      RETURN
+*
+*     End of DRSCL
+*
+      END
diff --git a/costa/native/external/lapack/dsbev.f b/costa/native/external/lapack/dsbev.f
new file mode 100644
index 000000000..919764536
--- /dev/null
+++ b/costa/native/external/lapack/dsbev.f
@@ -0,0 +1,206 @@
+      SUBROUTINE DSBEV( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, WORK,
+     $                  INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBZ, UPLO
+      INTEGER            INFO, KD, LDAB, LDZ, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   AB( LDAB, * ), W( * ), WORK( * ), Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DSBEV computes all the eigenvalues and, optionally, eigenvectors of
+*  a real symmetric band matrix A.
+*
+*  Arguments
+*  =========
+*
+*  JOBZ    (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only;
+*          = 'V':  Compute eigenvalues and eigenvectors.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  KD      (input) INTEGER
+*          The number of superdiagonals of the matrix A if UPLO = 'U',
+*          or the number of subdiagonals if UPLO = 'L'.  KD >= 0.
+*
+*  AB      (input/output) DOUBLE PRECISION array, dimension (LDAB, N)
+*          On entry, the upper or lower triangle of the symmetric band
+*          matrix A, stored in the first KD+1 rows of the array.  The
+*          j-th column of A is stored in the j-th column of the array AB
+*          as follows:
+*          if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j;
+*          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+kd).
+*
+*          On exit, AB is overwritten by values generated during the
+*          reduction to tridiagonal form.  If UPLO = 'U', the first
+*          superdiagonal and the diagonal of the tridiagonal matrix T
+*          are returned in rows KD and KD+1 of AB, and if UPLO = 'L',
+*          the diagonal and first subdiagonal of T are returned in the
+*          first two rows of AB.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KD + 1.
+*
+*  W       (output) DOUBLE PRECISION array, dimension (N)
+*          If INFO = 0, the eigenvalues in ascending order.
+*
+*  Z       (output) DOUBLE PRECISION array, dimension (LDZ, N)
+*          If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal
+*          eigenvectors of the matrix A, with the i-th column of Z
+*          holding the eigenvector associated with W(i).
+*          If JOBZ = 'N', then Z is not referenced.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= 1, and if
+*          JOBZ = 'V', LDZ >= max(1,N).
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (max(1,3*N-2))
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, the algorithm failed to converge; i
+*                off-diagonal elements of an intermediate tridiagonal
+*                form did not converge to zero.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LOWER, WANTZ
+      INTEGER            IINFO, IMAX, INDE, INDWRK, ISCALE
+      DOUBLE PRECISION   ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA,
+     $                   SMLNUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DLAMCH, DLANSB
+      EXTERNAL           LSAME, DLAMCH, DLANSB
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLASCL, DSBTRD, DSCAL, DSTEQR, DSTERF, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      WANTZ = LSAME( JOBZ, 'V' )
+      LOWER = LSAME( UPLO, 'L' )
+*
+      INFO = 0
+      IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( KD.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDAB.LT.KD+1 ) THEN
+         INFO = -6
+      ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN
+         INFO = -9
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DSBEV ', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      IF( N.EQ.1 ) THEN
+         IF( LOWER ) THEN
+            W( 1 ) = AB( 1, 1 )
+         ELSE
+            W( 1 ) = AB( KD+1, 1 )
+         END IF
+         IF( WANTZ )
+     $      Z( 1, 1 ) = ONE
+         RETURN
+      END IF
+*
+*     Get machine constants.
+*
+      SAFMIN = DLAMCH( 'Safe minimum' )
+      EPS = DLAMCH( 'Precision' )
+      SMLNUM = SAFMIN / EPS
+      BIGNUM = ONE / SMLNUM
+      RMIN = SQRT( SMLNUM )
+      RMAX = SQRT( BIGNUM )
+*
+*     Scale matrix to allowable range, if necessary.
+*
+      ANRM = DLANSB( 'M', UPLO, N, KD, AB, LDAB, WORK )
+      ISCALE = 0
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN
+         ISCALE = 1
+         SIGMA = RMIN / ANRM
+      ELSE IF( ANRM.GT.RMAX ) THEN
+         ISCALE = 1
+         SIGMA = RMAX / ANRM
+      END IF
+      IF( ISCALE.EQ.1 ) THEN
+         IF( LOWER ) THEN
+            CALL DLASCL( 'B', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO )
+         ELSE
+            CALL DLASCL( 'Q', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO )
+         END IF
+      END IF
+*
+*     Call DSBTRD to reduce symmetric band matrix to tridiagonal form.
+*
+      INDE = 1
+      INDWRK = INDE + N
+      CALL DSBTRD( JOBZ, UPLO, N, KD, AB, LDAB, W, WORK( INDE ), Z, LDZ,
+     $             WORK( INDWRK ), IINFO )
+*
+*     For eigenvalues only, call DSTERF.  For eigenvectors, call SSTEQR.
+*
+      IF( .NOT.WANTZ ) THEN
+         CALL DSTERF( N, W, WORK( INDE ), INFO )
+      ELSE
+         CALL DSTEQR( JOBZ, N, W, WORK( INDE ), Z, LDZ, WORK( INDWRK ),
+     $                INFO )
+      END IF
+*
+*     If matrix was scaled, then rescale eigenvalues appropriately.
+*
+      IF( ISCALE.EQ.1 ) THEN
+         IF( INFO.EQ.0 ) THEN
+            IMAX = N
+         ELSE
+            IMAX = INFO - 1
+         END IF
+         CALL DSCAL( IMAX, ONE / SIGMA, W, 1 )
+      END IF
+*
+      RETURN
+*
+*     End of DSBEV
+*
+      END
diff --git a/costa/native/external/lapack/dsbevd.f b/costa/native/external/lapack/dsbevd.f
new file mode 100644
index 000000000..efa1bb0e6
--- /dev/null
+++ b/costa/native/external/lapack/dsbevd.f
@@ -0,0 +1,265 @@
+      SUBROUTINE DSBEVD( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, WORK,
+     $                   LWORK, IWORK, LIWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBZ, UPLO
+      INTEGER            INFO, KD, LDAB, LDZ, LIWORK, LWORK, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      DOUBLE PRECISION   AB( LDAB, * ), W( * ), WORK( * ), Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DSBEVD computes all the eigenvalues and, optionally, eigenvectors of
+*  a real symmetric band matrix A. If eigenvectors are desired, it uses
+*  a divide and conquer algorithm.
+*
+*  The divide and conquer algorithm makes very mild assumptions about
+*  floating point arithmetic. It will work on machines with a guard
+*  digit in add/subtract, or on those binary machines without guard
+*  digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
+*  Cray-2. It could conceivably fail on hexadecimal or decimal machines
+*  without guard digits, but we know of none.
+*
+*  Arguments
+*  =========
+*
+*  JOBZ    (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only;
+*          = 'V':  Compute eigenvalues and eigenvectors.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  KD      (input) INTEGER
+*          The number of superdiagonals of the matrix A if UPLO = 'U',
+*          or the number of subdiagonals if UPLO = 'L'.  KD >= 0.
+*
+*  AB      (input/output) DOUBLE PRECISION array, dimension (LDAB, N)
+*          On entry, the upper or lower triangle of the symmetric band
+*          matrix A, stored in the first KD+1 rows of the array.  The
+*          j-th column of A is stored in the j-th column of the array AB
+*          as follows:
+*          if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j;
+*          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+kd).
+*
+*          On exit, AB is overwritten by values generated during the
+*          reduction to tridiagonal form.  If UPLO = 'U', the first
+*          superdiagonal and the diagonal of the tridiagonal matrix T
+*          are returned in rows KD and KD+1 of AB, and if UPLO = 'L',
+*          the diagonal and first subdiagonal of T are returned in the
+*          first two rows of AB.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KD + 1.
+*
+*  W       (output) DOUBLE PRECISION array, dimension (N)
+*          If INFO = 0, the eigenvalues in ascending order.
+*
+*  Z       (output) DOUBLE PRECISION array, dimension (LDZ, N)
+*          If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal
+*          eigenvectors of the matrix A, with the i-th column of Z
+*          holding the eigenvector associated with W(i).
+*          If JOBZ = 'N', then Z is not referenced.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= 1, and if
+*          JOBZ = 'V', LDZ >= max(1,N).
+*
+*  WORK    (workspace/output) DOUBLE PRECISION array,
+*                                         dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.
+*          IF N <= 1,                LWORK must be at least 1.
+*          If JOBZ  = 'N' and N > 2, LWORK must be at least 2*N.
+*          If JOBZ  = 'V' and N > 2, LWORK must be at least
+*                         ( 1 + 5*N + 2*N**2 ).
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  IWORK   (workspace/output) INTEGER array, dimension (LIWORK)
+*          On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK.
+*
+*  LIWORK  (input) INTEGER
+*          The dimension of the array LIWORK.
+*          If JOBZ  = 'N' or N <= 1, LIWORK must be at least 1.
+*          If JOBZ  = 'V' and N > 2, LIWORK must be at least 3 + 5*N.
+*
+*          If LIWORK = -1, then a workspace query is assumed; the
+*          routine only calculates the optimal size of the IWORK array,
+*          returns this value as the first entry of the IWORK array, and
+*          no error message related to LIWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, the algorithm failed to converge; i
+*                off-diagonal elements of an intermediate tridiagonal
+*                form did not converge to zero.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LOWER, LQUERY, WANTZ
+      INTEGER            IINFO, INDE, INDWK2, INDWRK, ISCALE, LIWMIN,
+     $                   LLWRK2, LWMIN
+      DOUBLE PRECISION   ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA,
+     $                   SMLNUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DLAMCH, DLANSB
+      EXTERNAL           LSAME, DLAMCH, DLANSB
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DGEMM, DLACPY, DLASCL, DSBTRD, DSCAL, DSTEDC,
+     $                   DSTERF, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      WANTZ = LSAME( JOBZ, 'V' )
+      LOWER = LSAME( UPLO, 'L' )
+      LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 )
+*
+      INFO = 0
+      IF( N.LE.1 ) THEN
+         LIWMIN = 1
+         LWMIN = 1
+      ELSE
+         IF( WANTZ ) THEN
+            LIWMIN = 3 + 5*N
+            LWMIN = 1 + 5*N + 2*N**2
+         ELSE
+            LIWMIN = 1
+            LWMIN = 2*N
+         END IF
+      END IF
+      IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( KD.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDAB.LT.KD+1 ) THEN
+         INFO = -6
+      ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN
+         INFO = -9
+      ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -11
+      ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -13
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         WORK( 1 ) = LWMIN
+         IWORK( 1 ) = LIWMIN
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DSBEVD', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      IF( N.EQ.1 ) THEN
+         W( 1 ) = AB( 1, 1 )
+         IF( WANTZ )
+     $      Z( 1, 1 ) = ONE
+         RETURN
+      END IF
+*
+*     Get machine constants.
+*
+      SAFMIN = DLAMCH( 'Safe minimum' )
+      EPS = DLAMCH( 'Precision' )
+      SMLNUM = SAFMIN / EPS
+      BIGNUM = ONE / SMLNUM
+      RMIN = SQRT( SMLNUM )
+      RMAX = SQRT( BIGNUM )
+*
+*     Scale matrix to allowable range, if necessary.
+*
+      ANRM = DLANSB( 'M', UPLO, N, KD, AB, LDAB, WORK )
+      ISCALE = 0
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN
+         ISCALE = 1
+         SIGMA = RMIN / ANRM
+      ELSE IF( ANRM.GT.RMAX ) THEN
+         ISCALE = 1
+         SIGMA = RMAX / ANRM
+      END IF
+      IF( ISCALE.EQ.1 ) THEN
+         IF( LOWER ) THEN
+            CALL DLASCL( 'B', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO )
+         ELSE
+            CALL DLASCL( 'Q', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO )
+         END IF
+      END IF
+*
+*     Call DSBTRD to reduce symmetric band matrix to tridiagonal form.
+*
+      INDE = 1
+      INDWRK = INDE + N
+      INDWK2 = INDWRK + N*N
+      LLWRK2 = LWORK - INDWK2 + 1
+      CALL DSBTRD( JOBZ, UPLO, N, KD, AB, LDAB, W, WORK( INDE ), Z, LDZ,
+     $             WORK( INDWRK ), IINFO )
+*
+*     For eigenvalues only, call DSTERF.  For eigenvectors, call SSTEDC.
+*
+      IF( .NOT.WANTZ ) THEN
+         CALL DSTERF( N, W, WORK( INDE ), INFO )
+      ELSE
+         CALL DSTEDC( 'I', N, W, WORK( INDE ), WORK( INDWRK ), N,
+     $                WORK( INDWK2 ), LLWRK2, IWORK, LIWORK, INFO )
+         CALL DGEMM( 'N', 'N', N, N, N, ONE, Z, LDZ, WORK( INDWRK ), N,
+     $               ZERO, WORK( INDWK2 ), N )
+         CALL DLACPY( 'A', N, N, WORK( INDWK2 ), N, Z, LDZ )
+      END IF
+*
+*     If matrix was scaled, then rescale eigenvalues appropriately.
+*
+      IF( ISCALE.EQ.1 )
+     $   CALL DSCAL( N, ONE / SIGMA, W, 1 )
+*
+      WORK( 1 ) = LWMIN
+      IWORK( 1 ) = LIWMIN
+      RETURN
+*
+*     End of DSBEVD
+*
+      END
diff --git a/costa/native/external/lapack/dsbevx.f b/costa/native/external/lapack/dsbevx.f
new file mode 100644
index 000000000..f832f1a60
--- /dev/null
+++ b/costa/native/external/lapack/dsbevx.f
@@ -0,0 +1,411 @@
+      SUBROUTINE DSBEVX( JOBZ, RANGE, UPLO, N, KD, AB, LDAB, Q, LDQ, VL,
+     $                   VU, IL, IU, ABSTOL, M, W, Z, LDZ, WORK, IWORK,
+     $                   IFAIL, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBZ, RANGE, UPLO
+      INTEGER            IL, INFO, IU, KD, LDAB, LDQ, LDZ, M, N
+      DOUBLE PRECISION   ABSTOL, VL, VU
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IFAIL( * ), IWORK( * )
+      DOUBLE PRECISION   AB( LDAB, * ), Q( LDQ, * ), W( * ), WORK( * ),
+     $                   Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DSBEVX computes selected eigenvalues and, optionally, eigenvectors
+*  of a real symmetric band matrix A.  Eigenvalues and eigenvectors can
+*  be selected by specifying either a range of values or a range of
+*  indices for the desired eigenvalues.
+*
+*  Arguments
+*  =========
+*
+*  JOBZ    (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only;
+*          = 'V':  Compute eigenvalues and eigenvectors.
+*
+*  RANGE   (input) CHARACTER*1
+*          = 'A': all eigenvalues will be found;
+*          = 'V': all eigenvalues in the half-open interval (VL,VU]
+*                 will be found;
+*          = 'I': the IL-th through IU-th eigenvalues will be found.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  KD      (input) INTEGER
+*          The number of superdiagonals of the matrix A if UPLO = 'U',
+*          or the number of subdiagonals if UPLO = 'L'.  KD >= 0.
+*
+*  AB      (input/output) DOUBLE PRECISION array, dimension (LDAB, N)
+*          On entry, the upper or lower triangle of the symmetric band
+*          matrix A, stored in the first KD+1 rows of the array.  The
+*          j-th column of A is stored in the j-th column of the array AB
+*          as follows:
+*          if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j;
+*          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+kd).
+*
+*          On exit, AB is overwritten by values generated during the
+*          reduction to tridiagonal form.  If UPLO = 'U', the first
+*          superdiagonal and the diagonal of the tridiagonal matrix T
+*          are returned in rows KD and KD+1 of AB, and if UPLO = 'L',
+*          the diagonal and first subdiagonal of T are returned in the
+*          first two rows of AB.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KD + 1.
+*
+*  Q       (output) DOUBLE PRECISION array, dimension (LDQ, N)
+*          If JOBZ = 'V', the N-by-N orthogonal matrix used in the
+*                         reduction to tridiagonal form.
+*          If JOBZ = 'N', the array Q is not referenced.
+*
+*  LDQ     (input) INTEGER
+*          The leading dimension of the array Q.  If JOBZ = 'V', then
+*          LDQ >= max(1,N).
+*
+*  VL      (input) DOUBLE PRECISION
+*  VU      (input) DOUBLE PRECISION
+*          If RANGE='V', the lower and upper bounds of the interval to
+*          be searched for eigenvalues. VL < VU.
+*          Not referenced if RANGE = 'A' or 'I'.
+*
+*  IL      (input) INTEGER
+*  IU      (input) INTEGER
+*          If RANGE='I', the indices (in ascending order) of the
+*          smallest and largest eigenvalues to be returned.
+*          1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0.
+*          Not referenced if RANGE = 'A' or 'V'.
+*
+*  ABSTOL  (input) DOUBLE PRECISION
+*          The absolute error tolerance for the eigenvalues.
+*          An approximate eigenvalue is accepted as converged
+*          when it is determined to lie in an interval [a,b]
+*          of width less than or equal to
+*
+*                  ABSTOL + EPS *   max( |a|,|b| ) ,
+*
+*          where EPS is the machine precision.  If ABSTOL is less than
+*          or equal to zero, then  EPS*|T|  will be used in its place,
+*          where |T| is the 1-norm of the tridiagonal matrix obtained
+*          by reducing AB to tridiagonal form.
+*
+*          Eigenvalues will be computed most accurately when ABSTOL is
+*          set to twice the underflow threshold 2*DLAMCH('S'), not zero.
+*          If this routine returns with INFO>0, indicating that some
+*          eigenvectors did not converge, try setting ABSTOL to
+*          2*DLAMCH('S').
+*
+*          See "Computing Small Singular Values of Bidiagonal Matrices
+*          with Guaranteed High Relative Accuracy," by Demmel and
+*          Kahan, LAPACK Working Note #3.
+*
+*  M       (output) INTEGER
+*          The total number of eigenvalues found.  0 <= M <= N.
+*          If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1.
+*
+*  W       (output) DOUBLE PRECISION array, dimension (N)
+*          The first M elements contain the selected eigenvalues in
+*          ascending order.
+*
+*  Z       (output) DOUBLE PRECISION array, dimension (LDZ, max(1,M))
+*          If JOBZ = 'V', then if INFO = 0, the first M columns of Z
+*          contain the orthonormal eigenvectors of the matrix A
+*          corresponding to the selected eigenvalues, with the i-th
+*          column of Z holding the eigenvector associated with W(i).
+*          If an eigenvector fails to converge, then that column of Z
+*          contains the latest approximation to the eigenvector, and the
+*          index of the eigenvector is returned in IFAIL.
+*          If JOBZ = 'N', then Z is not referenced.
+*          Note: the user must ensure that at least max(1,M) columns are
+*          supplied in the array Z; if RANGE = 'V', the exact value of M
+*          is not known in advance and an upper bound must be used.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= 1, and if
+*          JOBZ = 'V', LDZ >= max(1,N).
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (7*N)
+*
+*  IWORK   (workspace) INTEGER array, dimension (5*N)
+*
+*  IFAIL   (output) INTEGER array, dimension (N)
+*          If JOBZ = 'V', then if INFO = 0, the first M elements of
+*          IFAIL are zero.  If INFO > 0, then IFAIL contains the
+*          indices of the eigenvectors that failed to converge.
+*          If JOBZ = 'N', then IFAIL is not referenced.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          > 0:  if INFO = i, then i eigenvectors failed to converge.
+*                Their indices are stored in array IFAIL.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            ALLEIG, INDEIG, LOWER, VALEIG, WANTZ
+      CHARACTER          ORDER
+      INTEGER            I, IINFO, IMAX, INDD, INDE, INDEE, INDIBL,
+     $                   INDISP, INDIWO, INDWRK, ISCALE, ITMP1, J, JJ,
+     $                   NSPLIT
+      DOUBLE PRECISION   ABSTLL, ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN,
+     $                   SIGMA, SMLNUM, TMP1, VLL, VUU
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DLAMCH, DLANSB
+      EXTERNAL           LSAME, DLAMCH, DLANSB
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DCOPY, DGEMV, DLACPY, DLASCL, DSBTRD, DSCAL,
+     $                   DSTEBZ, DSTEIN, DSTEQR, DSTERF, DSWAP, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      WANTZ = LSAME( JOBZ, 'V' )
+      ALLEIG = LSAME( RANGE, 'A' )
+      VALEIG = LSAME( RANGE, 'V' )
+      INDEIG = LSAME( RANGE, 'I' )
+      LOWER = LSAME( UPLO, 'L' )
+*
+      INFO = 0
+      IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( KD.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( LDAB.LT.KD+1 ) THEN
+         INFO = -7
+      ELSE IF( WANTZ .AND. LDQ.LT.MAX( 1, N ) ) THEN
+         INFO = -9
+      ELSE
+         IF( VALEIG ) THEN
+            IF( N.GT.0 .AND. VU.LE.VL )
+     $         INFO = -11
+         ELSE IF( INDEIG ) THEN
+            IF( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) THEN
+               INFO = -12
+            ELSE IF( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN
+               INFO = -13
+            END IF
+         END IF
+      END IF
+      IF( INFO.EQ.0 ) THEN
+         IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) )
+     $      INFO = -18
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DSBEVX', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      M = 0
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      IF( N.EQ.1 ) THEN
+         M = 1
+         IF( LOWER ) THEN
+            TMP1 = AB( 1, 1 )
+         ELSE
+            TMP1 = AB( KD+1, 1 )
+         END IF
+         IF( VALEIG ) THEN
+            IF( .NOT.( VL.LT.TMP1 .AND. VU.GE.TMP1 ) )
+     $         M = 0
+         END IF
+         IF( M.EQ.1 ) THEN
+            W( 1 ) = TMP1
+            IF( WANTZ )
+     $         Z( 1, 1 ) = ONE
+         END IF
+         RETURN
+      END IF
+*
+*     Get machine constants.
+*
+      SAFMIN = DLAMCH( 'Safe minimum' )
+      EPS = DLAMCH( 'Precision' )
+      SMLNUM = SAFMIN / EPS
+      BIGNUM = ONE / SMLNUM
+      RMIN = SQRT( SMLNUM )
+      RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) )
+*
+*     Scale matrix to allowable range, if necessary.
+*
+      ISCALE = 0
+      ABSTLL = ABSTOL
+      IF( VALEIG ) THEN
+         VLL = VL
+         VUU = VU
+      ELSE
+         VLL = ZERO
+         VUU = ZERO
+      END IF
+      ANRM = DLANSB( 'M', UPLO, N, KD, AB, LDAB, WORK )
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN
+         ISCALE = 1
+         SIGMA = RMIN / ANRM
+      ELSE IF( ANRM.GT.RMAX ) THEN
+         ISCALE = 1
+         SIGMA = RMAX / ANRM
+      END IF
+      IF( ISCALE.EQ.1 ) THEN
+         IF( LOWER ) THEN
+            CALL DLASCL( 'B', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO )
+         ELSE
+            CALL DLASCL( 'Q', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO )
+         END IF
+         IF( ABSTOL.GT.0 )
+     $      ABSTLL = ABSTOL*SIGMA
+         IF( VALEIG ) THEN
+            VLL = VL*SIGMA
+            VUU = VU*SIGMA
+         END IF
+      END IF
+*
+*     Call DSBTRD to reduce symmetric band matrix to tridiagonal form.
+*
+      INDD = 1
+      INDE = INDD + N
+      INDWRK = INDE + N
+      CALL DSBTRD( JOBZ, UPLO, N, KD, AB, LDAB, WORK( INDD ),
+     $             WORK( INDE ), Q, LDQ, WORK( INDWRK ), IINFO )
+*
+*     If all eigenvalues are desired and ABSTOL is less than or equal
+*     to zero, then call DSTERF or SSTEQR.  If this fails for some
+*     eigenvalue, then try DSTEBZ.
+*
+      IF( ( ALLEIG .OR. ( INDEIG .AND. IL.EQ.1 .AND. IU.EQ.N ) ) .AND.
+     $    ( ABSTOL.LE.ZERO ) ) THEN
+         CALL DCOPY( N, WORK( INDD ), 1, W, 1 )
+         INDEE = INDWRK + 2*N
+         IF( .NOT.WANTZ ) THEN
+            CALL DCOPY( N-1, WORK( INDE ), 1, WORK( INDEE ), 1 )
+            CALL DSTERF( N, W, WORK( INDEE ), INFO )
+         ELSE
+            CALL DLACPY( 'A', N, N, Q, LDQ, Z, LDZ )
+            CALL DCOPY( N-1, WORK( INDE ), 1, WORK( INDEE ), 1 )
+            CALL DSTEQR( JOBZ, N, W, WORK( INDEE ), Z, LDZ,
+     $                   WORK( INDWRK ), INFO )
+            IF( INFO.EQ.0 ) THEN
+               DO 10 I = 1, N
+                  IFAIL( I ) = 0
+   10          CONTINUE
+            END IF
+         END IF
+         IF( INFO.EQ.0 ) THEN
+            M = N
+            GO TO 30
+         END IF
+         INFO = 0
+      END IF
+*
+*     Otherwise, call DSTEBZ and, if eigenvectors are desired, SSTEIN.
+*
+      IF( WANTZ ) THEN
+         ORDER = 'B'
+      ELSE
+         ORDER = 'E'
+      END IF
+      INDIBL = 1
+      INDISP = INDIBL + N
+      INDIWO = INDISP + N
+      CALL DSTEBZ( RANGE, ORDER, N, VLL, VUU, IL, IU, ABSTLL,
+     $             WORK( INDD ), WORK( INDE ), M, NSPLIT, W,
+     $             IWORK( INDIBL ), IWORK( INDISP ), WORK( INDWRK ),
+     $             IWORK( INDIWO ), INFO )
+*
+      IF( WANTZ ) THEN
+         CALL DSTEIN( N, WORK( INDD ), WORK( INDE ), M, W,
+     $                IWORK( INDIBL ), IWORK( INDISP ), Z, LDZ,
+     $                WORK( INDWRK ), IWORK( INDIWO ), IFAIL, INFO )
+*
+*        Apply orthogonal matrix used in reduction to tridiagonal
+*        form to eigenvectors returned by DSTEIN.
+*
+         DO 20 J = 1, M
+            CALL DCOPY( N, Z( 1, J ), 1, WORK( 1 ), 1 )
+            CALL DGEMV( 'N', N, N, ONE, Q, LDQ, WORK, 1, ZERO,
+     $                  Z( 1, J ), 1 )
+   20    CONTINUE
+      END IF
+*
+*     If matrix was scaled, then rescale eigenvalues appropriately.
+*
+   30 CONTINUE
+      IF( ISCALE.EQ.1 ) THEN
+         IF( INFO.EQ.0 ) THEN
+            IMAX = M
+         ELSE
+            IMAX = INFO - 1
+         END IF
+         CALL DSCAL( IMAX, ONE / SIGMA, W, 1 )
+      END IF
+*
+*     If eigenvalues are not in order, then sort them, along with
+*     eigenvectors.
+*
+      IF( WANTZ ) THEN
+         DO 50 J = 1, M - 1
+            I = 0
+            TMP1 = W( J )
+            DO 40 JJ = J + 1, M
+               IF( W( JJ ).LT.TMP1 ) THEN
+                  I = JJ
+                  TMP1 = W( JJ )
+               END IF
+   40       CONTINUE
+*
+            IF( I.NE.0 ) THEN
+               ITMP1 = IWORK( INDIBL+I-1 )
+               W( I ) = W( J )
+               IWORK( INDIBL+I-1 ) = IWORK( INDIBL+J-1 )
+               W( J ) = TMP1
+               IWORK( INDIBL+J-1 ) = ITMP1
+               CALL DSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 )
+               IF( INFO.NE.0 ) THEN
+                  ITMP1 = IFAIL( I )
+                  IFAIL( I ) = IFAIL( J )
+                  IFAIL( J ) = ITMP1
+               END IF
+            END IF
+   50    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of DSBEVX
+*
+      END
diff --git a/costa/native/external/lapack/dsbgst.f b/costa/native/external/lapack/dsbgst.f
new file mode 100644
index 000000000..3d975aea3
--- /dev/null
+++ b/costa/native/external/lapack/dsbgst.f
@@ -0,0 +1,1346 @@
+      SUBROUTINE DSBGST( VECT, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, X,
+     $                   LDX, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO, VECT
+      INTEGER            INFO, KA, KB, LDAB, LDBB, LDX, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   AB( LDAB, * ), BB( LDBB, * ), WORK( * ),
+     $                   X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DSBGST reduces a real symmetric-definite banded generalized
+*  eigenproblem  A*x = lambda*B*x  to standard form  C*y = lambda*y,
+*  such that C has the same bandwidth as A.
+*
+*  B must have been previously factorized as S**T*S by DPBSTF, using a
+*  split Cholesky factorization. A is overwritten by C = X**T*A*X, where
+*  X = S**(-1)*Q and Q is an orthogonal matrix chosen to preserve the
+*  bandwidth of A.
+*
+*  Arguments
+*  =========
+*
+*  VECT    (input) CHARACTER*1
+*          = 'N':  do not form the transformation matrix X;
+*          = 'V':  form X.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrices A and B.  N >= 0.
+*
+*  KA      (input) INTEGER
+*          The number of superdiagonals of the matrix A if UPLO = 'U',
+*          or the number of subdiagonals if UPLO = 'L'.  KA >= 0.
+*
+*  KB      (input) INTEGER
+*          The number of superdiagonals of the matrix B if UPLO = 'U',
+*          or the number of subdiagonals if UPLO = 'L'.  KA >= KB >= 0.
+*
+*  AB      (input/output) DOUBLE PRECISION array, dimension (LDAB,N)
+*          On entry, the upper or lower triangle of the symmetric band
+*          matrix A, stored in the first ka+1 rows of the array.  The
+*          j-th column of A is stored in the j-th column of the array AB
+*          as follows:
+*          if UPLO = 'U', AB(ka+1+i-j,j) = A(i,j) for max(1,j-ka)<=i<=j;
+*          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+ka).
+*
+*          On exit, the transformed matrix X**T*A*X, stored in the same
+*          format as A.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KA+1.
+*
+*  BB      (input) DOUBLE PRECISION array, dimension (LDBB,N)
+*          The banded factor S from the split Cholesky factorization of
+*          B, as returned by DPBSTF, stored in the first KB+1 rows of
+*          the array.
+*
+*  LDBB    (input) INTEGER
+*          The leading dimension of the array BB.  LDBB >= KB+1.
+*
+*  X       (output) DOUBLE PRECISION array, dimension (LDX,N)
+*          If VECT = 'V', the n-by-n matrix X.
+*          If VECT = 'N', the array X is not referenced.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.
+*          LDX >= max(1,N) if VECT = 'V'; LDX >= 1 otherwise.
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (2*N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPDATE, UPPER, WANTX
+      INTEGER            I, I0, I1, I2, INCA, J, J1, J1T, J2, J2T, K,
+     $                   KA1, KB1, KBT, L, M, NR, NRT, NX
+      DOUBLE PRECISION   BII, RA, RA1, T
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DGER, DLAR2V, DLARGV, DLARTG, DLARTV, DLASET,
+     $                   DROT, DSCAL, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters
+*
+      WANTX = LSAME( VECT, 'V' )
+      UPPER = LSAME( UPLO, 'U' )
+      KA1 = KA + 1
+      KB1 = KB + 1
+      INFO = 0
+      IF( .NOT.WANTX .AND. .NOT.LSAME( VECT, 'N' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( KA.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( KB.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( LDAB.LT.KA+1 ) THEN
+         INFO = -7
+      ELSE IF( LDBB.LT.KB+1 ) THEN
+         INFO = -9
+      ELSE IF( LDX.LT.1 .OR. WANTX .AND. LDX.LT.MAX( 1, N ) ) THEN
+         INFO = -11
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DSBGST', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      INCA = LDAB*KA1
+*
+*     Initialize X to the unit matrix, if needed
+*
+      IF( WANTX )
+     $   CALL DLASET( 'Full', N, N, ZERO, ONE, X, LDX )
+*
+*     Set M to the splitting point m. It must be the same value as is
+*     used in DPBSTF. The chosen value allows the arrays WORK and RWORK
+*     to be of dimension (N).
+*
+      M = ( N+KB ) / 2
+*
+*     The routine works in two phases, corresponding to the two halves
+*     of the split Cholesky factorization of B as S**T*S where
+*
+*     S = ( U    )
+*         ( M  L )
+*
+*     with U upper triangular of order m, and L lower triangular of
+*     order n-m. S has the same bandwidth as B.
+*
+*     S is treated as a product of elementary matrices:
+*
+*     S = S(m)*S(m-1)*...*S(2)*S(1)*S(m+1)*S(m+2)*...*S(n-1)*S(n)
+*
+*     where S(i) is determined by the i-th row of S.
+*
+*     In phase 1, the index i takes the values n, n-1, ... , m+1;
+*     in phase 2, it takes the values 1, 2, ... , m.
+*
+*     For each value of i, the current matrix A is updated by forming
+*     inv(S(i))**T*A*inv(S(i)). This creates a triangular bulge outside
+*     the band of A. The bulge is then pushed down toward the bottom of
+*     A in phase 1, and up toward the top of A in phase 2, by applying
+*     plane rotations.
+*
+*     There are kb*(kb+1)/2 elements in the bulge, but at most 2*kb-1
+*     of them are linearly independent, so annihilating a bulge requires
+*     only 2*kb-1 plane rotations. The rotations are divided into a 1st
+*     set of kb-1 rotations, and a 2nd set of kb rotations.
+*
+*     Wherever possible, rotations are generated and applied in vector
+*     operations of length NR between the indices J1 and J2 (sometimes
+*     replaced by modified values NRT, J1T or J2T).
+*
+*     The cosines and sines of the rotations are stored in the array
+*     WORK. The cosines of the 1st set of rotations are stored in
+*     elements n+2:n+m-kb-1 and the sines of the 1st set in elements
+*     2:m-kb-1; the cosines of the 2nd set are stored in elements
+*     n+m-kb+1:2*n and the sines of the second set in elements m-kb+1:n.
+*
+*     The bulges are not formed explicitly; nonzero elements outside the
+*     band are created only when they are required for generating new
+*     rotations; they are stored in the array WORK, in positions where
+*     they are later overwritten by the sines of the rotations which
+*     annihilate them.
+*
+*     **************************** Phase 1 *****************************
+*
+*     The logical structure of this phase is:
+*
+*     UPDATE = .TRUE.
+*     DO I = N, M + 1, -1
+*        use S(i) to update A and create a new bulge
+*        apply rotations to push all bulges KA positions downward
+*     END DO
+*     UPDATE = .FALSE.
+*     DO I = M + KA + 1, N - 1
+*        apply rotations to push all bulges KA positions downward
+*     END DO
+*
+*     To avoid duplicating code, the two loops are merged.
+*
+      UPDATE = .TRUE.
+      I = N + 1
+   10 CONTINUE
+      IF( UPDATE ) THEN
+         I = I - 1
+         KBT = MIN( KB, I-1 )
+         I0 = I - 1
+         I1 = MIN( N, I+KA )
+         I2 = I - KBT + KA1
+         IF( I.LT.M+1 ) THEN
+            UPDATE = .FALSE.
+            I = I + 1
+            I0 = M
+            IF( KA.EQ.0 )
+     $         GO TO 480
+            GO TO 10
+         END IF
+      ELSE
+         I = I + KA
+         IF( I.GT.N-1 )
+     $      GO TO 480
+      END IF
+*
+      IF( UPPER ) THEN
+*
+*        Transform A, working with the upper triangle
+*
+         IF( UPDATE ) THEN
+*
+*           Form  inv(S(i))**T * A * inv(S(i))
+*
+            BII = BB( KB1, I )
+            DO 20 J = I, I1
+               AB( I-J+KA1, J ) = AB( I-J+KA1, J ) / BII
+   20       CONTINUE
+            DO 30 J = MAX( 1, I-KA ), I
+               AB( J-I+KA1, I ) = AB( J-I+KA1, I ) / BII
+   30       CONTINUE
+            DO 60 K = I - KBT, I - 1
+               DO 40 J = I - KBT, K
+                  AB( J-K+KA1, K ) = AB( J-K+KA1, K ) -
+     $                               BB( J-I+KB1, I )*AB( K-I+KA1, I ) -
+     $                               BB( K-I+KB1, I )*AB( J-I+KA1, I ) +
+     $                               AB( KA1, I )*BB( J-I+KB1, I )*
+     $                               BB( K-I+KB1, I )
+   40          CONTINUE
+               DO 50 J = MAX( 1, I-KA ), I - KBT - 1
+                  AB( J-K+KA1, K ) = AB( J-K+KA1, K ) -
+     $                               BB( K-I+KB1, I )*AB( J-I+KA1, I )
+   50          CONTINUE
+   60       CONTINUE
+            DO 80 J = I, I1
+               DO 70 K = MAX( J-KA, I-KBT ), I - 1
+                  AB( K-J+KA1, J ) = AB( K-J+KA1, J ) -
+     $                               BB( K-I+KB1, I )*AB( I-J+KA1, J )
+   70          CONTINUE
+   80       CONTINUE
+*
+            IF( WANTX ) THEN
+*
+*              post-multiply X by inv(S(i))
+*
+               CALL DSCAL( N-M, ONE / BII, X( M+1, I ), 1 )
+               IF( KBT.GT.0 )
+     $            CALL DGER( N-M, KBT, -ONE, X( M+1, I ), 1,
+     $                       BB( KB1-KBT, I ), 1, X( M+1, I-KBT ), LDX )
+            END IF
+*
+*           store a(i,i1) in RA1 for use in next loop over K
+*
+            RA1 = AB( I-I1+KA1, I1 )
+         END IF
+*
+*        Generate and apply vectors of rotations to chase all the
+*        existing bulges KA positions down toward the bottom of the
+*        band
+*
+         DO 130 K = 1, KB - 1
+            IF( UPDATE ) THEN
+*
+*              Determine the rotations which would annihilate the bulge
+*              which has in theory just been created
+*
+               IF( I-K+KA.LT.N .AND. I-K.GT.1 ) THEN
+*
+*                 generate rotation to annihilate a(i,i-k+ka+1)
+*
+                  CALL DLARTG( AB( K+1, I-K+KA ), RA1,
+     $                         WORK( N+I-K+KA-M ), WORK( I-K+KA-M ),
+     $                         RA )
+*
+*                 create nonzero element a(i-k,i-k+ka+1) outside the
+*                 band and store it in WORK(i-k)
+*
+                  T = -BB( KB1-K, I )*RA1
+                  WORK( I-K ) = WORK( N+I-K+KA-M )*T -
+     $                          WORK( I-K+KA-M )*AB( 1, I-K+KA )
+                  AB( 1, I-K+KA ) = WORK( I-K+KA-M )*T +
+     $                              WORK( N+I-K+KA-M )*AB( 1, I-K+KA )
+                  RA1 = RA
+               END IF
+            END IF
+            J2 = I - K - 1 + MAX( 1, K-I0+2 )*KA1
+            NR = ( N-J2+KA ) / KA1
+            J1 = J2 + ( NR-1 )*KA1
+            IF( UPDATE ) THEN
+               J2T = MAX( J2, I+2*KA-K+1 )
+            ELSE
+               J2T = J2
+            END IF
+            NRT = ( N-J2T+KA ) / KA1
+            DO 90 J = J2T, J1, KA1
+*
+*              create nonzero element a(j-ka,j+1) outside the band
+*              and store it in WORK(j-m)
+*
+               WORK( J-M ) = WORK( J-M )*AB( 1, J+1 )
+               AB( 1, J+1 ) = WORK( N+J-M )*AB( 1, J+1 )
+   90       CONTINUE
+*
+*           generate rotations in 1st set to annihilate elements which
+*           have been created outside the band
+*
+            IF( NRT.GT.0 )
+     $         CALL DLARGV( NRT, AB( 1, J2T ), INCA, WORK( J2T-M ), KA1,
+     $                      WORK( N+J2T-M ), KA1 )
+            IF( NR.GT.0 ) THEN
+*
+*              apply rotations in 1st set from the right
+*
+               DO 100 L = 1, KA - 1
+                  CALL DLARTV( NR, AB( KA1-L, J2 ), INCA,
+     $                         AB( KA-L, J2+1 ), INCA, WORK( N+J2-M ),
+     $                         WORK( J2-M ), KA1 )
+  100          CONTINUE
+*
+*              apply rotations in 1st set from both sides to diagonal
+*              blocks
+*
+               CALL DLAR2V( NR, AB( KA1, J2 ), AB( KA1, J2+1 ),
+     $                      AB( KA, J2+1 ), INCA, WORK( N+J2-M ),
+     $                      WORK( J2-M ), KA1 )
+*
+            END IF
+*
+*           start applying rotations in 1st set from the left
+*
+            DO 110 L = KA - 1, KB - K + 1, -1
+               NRT = ( N-J2+L ) / KA1
+               IF( NRT.GT.0 )
+     $            CALL DLARTV( NRT, AB( L, J2+KA1-L ), INCA,
+     $                         AB( L+1, J2+KA1-L ), INCA,
+     $                         WORK( N+J2-M ), WORK( J2-M ), KA1 )
+  110       CONTINUE
+*
+            IF( WANTX ) THEN
+*
+*              post-multiply X by product of rotations in 1st set
+*
+               DO 120 J = J2, J1, KA1
+                  CALL DROT( N-M, X( M+1, J ), 1, X( M+1, J+1 ), 1,
+     $                       WORK( N+J-M ), WORK( J-M ) )
+  120          CONTINUE
+            END IF
+  130    CONTINUE
+*
+         IF( UPDATE ) THEN
+            IF( I2.LE.N .AND. KBT.GT.0 ) THEN
+*
+*              create nonzero element a(i-kbt,i-kbt+ka+1) outside the
+*              band and store it in WORK(i-kbt)
+*
+               WORK( I-KBT ) = -BB( KB1-KBT, I )*RA1
+            END IF
+         END IF
+*
+         DO 170 K = KB, 1, -1
+            IF( UPDATE ) THEN
+               J2 = I - K - 1 + MAX( 2, K-I0+1 )*KA1
+            ELSE
+               J2 = I - K - 1 + MAX( 1, K-I0+1 )*KA1
+            END IF
+*
+*           finish applying rotations in 2nd set from the left
+*
+            DO 140 L = KB - K, 1, -1
+               NRT = ( N-J2+KA+L ) / KA1
+               IF( NRT.GT.0 )
+     $            CALL DLARTV( NRT, AB( L, J2-L+1 ), INCA,
+     $                         AB( L+1, J2-L+1 ), INCA, WORK( N+J2-KA ),
+     $                         WORK( J2-KA ), KA1 )
+  140       CONTINUE
+            NR = ( N-J2+KA ) / KA1
+            J1 = J2 + ( NR-1 )*KA1
+            DO 150 J = J1, J2, -KA1
+               WORK( J ) = WORK( J-KA )
+               WORK( N+J ) = WORK( N+J-KA )
+  150       CONTINUE
+            DO 160 J = J2, J1, KA1
+*
+*              create nonzero element a(j-ka,j+1) outside the band
+*              and store it in WORK(j)
+*
+               WORK( J ) = WORK( J )*AB( 1, J+1 )
+               AB( 1, J+1 ) = WORK( N+J )*AB( 1, J+1 )
+  160       CONTINUE
+            IF( UPDATE ) THEN
+               IF( I-K.LT.N-KA .AND. K.LE.KBT )
+     $            WORK( I-K+KA ) = WORK( I-K )
+            END IF
+  170    CONTINUE
+*
+         DO 210 K = KB, 1, -1
+            J2 = I - K - 1 + MAX( 1, K-I0+1 )*KA1
+            NR = ( N-J2+KA ) / KA1
+            J1 = J2 + ( NR-1 )*KA1
+            IF( NR.GT.0 ) THEN
+*
+*              generate rotations in 2nd set to annihilate elements
+*              which have been created outside the band
+*
+               CALL DLARGV( NR, AB( 1, J2 ), INCA, WORK( J2 ), KA1,
+     $                      WORK( N+J2 ), KA1 )
+*
+*              apply rotations in 2nd set from the right
+*
+               DO 180 L = 1, KA - 1
+                  CALL DLARTV( NR, AB( KA1-L, J2 ), INCA,
+     $                         AB( KA-L, J2+1 ), INCA, WORK( N+J2 ),
+     $                         WORK( J2 ), KA1 )
+  180          CONTINUE
+*
+*              apply rotations in 2nd set from both sides to diagonal
+*              blocks
+*
+               CALL DLAR2V( NR, AB( KA1, J2 ), AB( KA1, J2+1 ),
+     $                      AB( KA, J2+1 ), INCA, WORK( N+J2 ),
+     $                      WORK( J2 ), KA1 )
+*
+            END IF
+*
+*           start applying rotations in 2nd set from the left
+*
+            DO 190 L = KA - 1, KB - K + 1, -1
+               NRT = ( N-J2+L ) / KA1
+               IF( NRT.GT.0 )
+     $            CALL DLARTV( NRT, AB( L, J2+KA1-L ), INCA,
+     $                         AB( L+1, J2+KA1-L ), INCA, WORK( N+J2 ),
+     $                         WORK( J2 ), KA1 )
+  190       CONTINUE
+*
+            IF( WANTX ) THEN
+*
+*              post-multiply X by product of rotations in 2nd set
+*
+               DO 200 J = J2, J1, KA1
+                  CALL DROT( N-M, X( M+1, J ), 1, X( M+1, J+1 ), 1,
+     $                       WORK( N+J ), WORK( J ) )
+  200          CONTINUE
+            END IF
+  210    CONTINUE
+*
+         DO 230 K = 1, KB - 1
+            J2 = I - K - 1 + MAX( 1, K-I0+2 )*KA1
+*
+*           finish applying rotations in 1st set from the left
+*
+            DO 220 L = KB - K, 1, -1
+               NRT = ( N-J2+L ) / KA1
+               IF( NRT.GT.0 )
+     $            CALL DLARTV( NRT, AB( L, J2+KA1-L ), INCA,
+     $                         AB( L+1, J2+KA1-L ), INCA,
+     $                         WORK( N+J2-M ), WORK( J2-M ), KA1 )
+  220       CONTINUE
+  230    CONTINUE
+*
+         IF( KB.GT.1 ) THEN
+            DO 240 J = N - 1, I - KB + 2*KA + 1, -1
+               WORK( N+J-M ) = WORK( N+J-KA-M )
+               WORK( J-M ) = WORK( J-KA-M )
+  240       CONTINUE
+         END IF
+*
+      ELSE
+*
+*        Transform A, working with the lower triangle
+*
+         IF( UPDATE ) THEN
+*
+*           Form  inv(S(i))**T * A * inv(S(i))
+*
+            BII = BB( 1, I )
+            DO 250 J = I, I1
+               AB( J-I+1, I ) = AB( J-I+1, I ) / BII
+  250       CONTINUE
+            DO 260 J = MAX( 1, I-KA ), I
+               AB( I-J+1, J ) = AB( I-J+1, J ) / BII
+  260       CONTINUE
+            DO 290 K = I - KBT, I - 1
+               DO 270 J = I - KBT, K
+                  AB( K-J+1, J ) = AB( K-J+1, J ) -
+     $                             BB( I-J+1, J )*AB( I-K+1, K ) -
+     $                             BB( I-K+1, K )*AB( I-J+1, J ) +
+     $                             AB( 1, I )*BB( I-J+1, J )*
+     $                             BB( I-K+1, K )
+  270          CONTINUE
+               DO 280 J = MAX( 1, I-KA ), I - KBT - 1
+                  AB( K-J+1, J ) = AB( K-J+1, J ) -
+     $                             BB( I-K+1, K )*AB( I-J+1, J )
+  280          CONTINUE
+  290       CONTINUE
+            DO 310 J = I, I1
+               DO 300 K = MAX( J-KA, I-KBT ), I - 1
+                  AB( J-K+1, K ) = AB( J-K+1, K ) -
+     $                             BB( I-K+1, K )*AB( J-I+1, I )
+  300          CONTINUE
+  310       CONTINUE
+*
+            IF( WANTX ) THEN
+*
+*              post-multiply X by inv(S(i))
+*
+               CALL DSCAL( N-M, ONE / BII, X( M+1, I ), 1 )
+               IF( KBT.GT.0 )
+     $            CALL DGER( N-M, KBT, -ONE, X( M+1, I ), 1,
+     $                       BB( KBT+1, I-KBT ), LDBB-1,
+     $                       X( M+1, I-KBT ), LDX )
+            END IF
+*
+*           store a(i1,i) in RA1 for use in next loop over K
+*
+            RA1 = AB( I1-I+1, I )
+         END IF
+*
+*        Generate and apply vectors of rotations to chase all the
+*        existing bulges KA positions down toward the bottom of the
+*        band
+*
+         DO 360 K = 1, KB - 1
+            IF( UPDATE ) THEN
+*
+*              Determine the rotations which would annihilate the bulge
+*              which has in theory just been created
+*
+               IF( I-K+KA.LT.N .AND. I-K.GT.1 ) THEN
+*
+*                 generate rotation to annihilate a(i-k+ka+1,i)
+*
+                  CALL DLARTG( AB( KA1-K, I ), RA1, WORK( N+I-K+KA-M ),
+     $                         WORK( I-K+KA-M ), RA )
+*
+*                 create nonzero element a(i-k+ka+1,i-k) outside the
+*                 band and store it in WORK(i-k)
+*
+                  T = -BB( K+1, I-K )*RA1
+                  WORK( I-K ) = WORK( N+I-K+KA-M )*T -
+     $                          WORK( I-K+KA-M )*AB( KA1, I-K )
+                  AB( KA1, I-K ) = WORK( I-K+KA-M )*T +
+     $                             WORK( N+I-K+KA-M )*AB( KA1, I-K )
+                  RA1 = RA
+               END IF
+            END IF
+            J2 = I - K - 1 + MAX( 1, K-I0+2 )*KA1
+            NR = ( N-J2+KA ) / KA1
+            J1 = J2 + ( NR-1 )*KA1
+            IF( UPDATE ) THEN
+               J2T = MAX( J2, I+2*KA-K+1 )
+            ELSE
+               J2T = J2
+            END IF
+            NRT = ( N-J2T+KA ) / KA1
+            DO 320 J = J2T, J1, KA1
+*
+*              create nonzero element a(j+1,j-ka) outside the band
+*              and store it in WORK(j-m)
+*
+               WORK( J-M ) = WORK( J-M )*AB( KA1, J-KA+1 )
+               AB( KA1, J-KA+1 ) = WORK( N+J-M )*AB( KA1, J-KA+1 )
+  320       CONTINUE
+*
+*           generate rotations in 1st set to annihilate elements which
+*           have been created outside the band
+*
+            IF( NRT.GT.0 )
+     $         CALL DLARGV( NRT, AB( KA1, J2T-KA ), INCA, WORK( J2T-M ),
+     $                      KA1, WORK( N+J2T-M ), KA1 )
+            IF( NR.GT.0 ) THEN
+*
+*              apply rotations in 1st set from the left
+*
+               DO 330 L = 1, KA - 1
+                  CALL DLARTV( NR, AB( L+1, J2-L ), INCA,
+     $                         AB( L+2, J2-L ), INCA, WORK( N+J2-M ),
+     $                         WORK( J2-M ), KA1 )
+  330          CONTINUE
+*
+*              apply rotations in 1st set from both sides to diagonal
+*              blocks
+*
+               CALL DLAR2V( NR, AB( 1, J2 ), AB( 1, J2+1 ), AB( 2, J2 ),
+     $                      INCA, WORK( N+J2-M ), WORK( J2-M ), KA1 )
+*
+            END IF
+*
+*           start applying rotations in 1st set from the right
+*
+            DO 340 L = KA - 1, KB - K + 1, -1
+               NRT = ( N-J2+L ) / KA1
+               IF( NRT.GT.0 )
+     $            CALL DLARTV( NRT, AB( KA1-L+1, J2 ), INCA,
+     $                         AB( KA1-L, J2+1 ), INCA, WORK( N+J2-M ),
+     $                         WORK( J2-M ), KA1 )
+  340       CONTINUE
+*
+            IF( WANTX ) THEN
+*
+*              post-multiply X by product of rotations in 1st set
+*
+               DO 350 J = J2, J1, KA1
+                  CALL DROT( N-M, X( M+1, J ), 1, X( M+1, J+1 ), 1,
+     $                       WORK( N+J-M ), WORK( J-M ) )
+  350          CONTINUE
+            END IF
+  360    CONTINUE
+*
+         IF( UPDATE ) THEN
+            IF( I2.LE.N .AND. KBT.GT.0 ) THEN
+*
+*              create nonzero element a(i-kbt+ka+1,i-kbt) outside the
+*              band and store it in WORK(i-kbt)
+*
+               WORK( I-KBT ) = -BB( KBT+1, I-KBT )*RA1
+            END IF
+         END IF
+*
+         DO 400 K = KB, 1, -1
+            IF( UPDATE ) THEN
+               J2 = I - K - 1 + MAX( 2, K-I0+1 )*KA1
+            ELSE
+               J2 = I - K - 1 + MAX( 1, K-I0+1 )*KA1
+            END IF
+*
+*           finish applying rotations in 2nd set from the right
+*
+            DO 370 L = KB - K, 1, -1
+               NRT = ( N-J2+KA+L ) / KA1
+               IF( NRT.GT.0 )
+     $            CALL DLARTV( NRT, AB( KA1-L+1, J2-KA ), INCA,
+     $                         AB( KA1-L, J2-KA+1 ), INCA,
+     $                         WORK( N+J2-KA ), WORK( J2-KA ), KA1 )
+  370       CONTINUE
+            NR = ( N-J2+KA ) / KA1
+            J1 = J2 + ( NR-1 )*KA1
+            DO 380 J = J1, J2, -KA1
+               WORK( J ) = WORK( J-KA )
+               WORK( N+J ) = WORK( N+J-KA )
+  380       CONTINUE
+            DO 390 J = J2, J1, KA1
+*
+*              create nonzero element a(j+1,j-ka) outside the band
+*              and store it in WORK(j)
+*
+               WORK( J ) = WORK( J )*AB( KA1, J-KA+1 )
+               AB( KA1, J-KA+1 ) = WORK( N+J )*AB( KA1, J-KA+1 )
+  390       CONTINUE
+            IF( UPDATE ) THEN
+               IF( I-K.LT.N-KA .AND. K.LE.KBT )
+     $            WORK( I-K+KA ) = WORK( I-K )
+            END IF
+  400    CONTINUE
+*
+         DO 440 K = KB, 1, -1
+            J2 = I - K - 1 + MAX( 1, K-I0+1 )*KA1
+            NR = ( N-J2+KA ) / KA1
+            J1 = J2 + ( NR-1 )*KA1
+            IF( NR.GT.0 ) THEN
+*
+*              generate rotations in 2nd set to annihilate elements
+*              which have been created outside the band
+*
+               CALL DLARGV( NR, AB( KA1, J2-KA ), INCA, WORK( J2 ), KA1,
+     $                      WORK( N+J2 ), KA1 )
+*
+*              apply rotations in 2nd set from the left
+*
+               DO 410 L = 1, KA - 1
+                  CALL DLARTV( NR, AB( L+1, J2-L ), INCA,
+     $                         AB( L+2, J2-L ), INCA, WORK( N+J2 ),
+     $                         WORK( J2 ), KA1 )
+  410          CONTINUE
+*
+*              apply rotations in 2nd set from both sides to diagonal
+*              blocks
+*
+               CALL DLAR2V( NR, AB( 1, J2 ), AB( 1, J2+1 ), AB( 2, J2 ),
+     $                      INCA, WORK( N+J2 ), WORK( J2 ), KA1 )
+*
+            END IF
+*
+*           start applying rotations in 2nd set from the right
+*
+            DO 420 L = KA - 1, KB - K + 1, -1
+               NRT = ( N-J2+L ) / KA1
+               IF( NRT.GT.0 )
+     $            CALL DLARTV( NRT, AB( KA1-L+1, J2 ), INCA,
+     $                         AB( KA1-L, J2+1 ), INCA, WORK( N+J2 ),
+     $                         WORK( J2 ), KA1 )
+  420       CONTINUE
+*
+            IF( WANTX ) THEN
+*
+*              post-multiply X by product of rotations in 2nd set
+*
+               DO 430 J = J2, J1, KA1
+                  CALL DROT( N-M, X( M+1, J ), 1, X( M+1, J+1 ), 1,
+     $                       WORK( N+J ), WORK( J ) )
+  430          CONTINUE
+            END IF
+  440    CONTINUE
+*
+         DO 460 K = 1, KB - 1
+            J2 = I - K - 1 + MAX( 1, K-I0+2 )*KA1
+*
+*           finish applying rotations in 1st set from the right
+*
+            DO 450 L = KB - K, 1, -1
+               NRT = ( N-J2+L ) / KA1
+               IF( NRT.GT.0 )
+     $            CALL DLARTV( NRT, AB( KA1-L+1, J2 ), INCA,
+     $                         AB( KA1-L, J2+1 ), INCA, WORK( N+J2-M ),
+     $                         WORK( J2-M ), KA1 )
+  450       CONTINUE
+  460    CONTINUE
+*
+         IF( KB.GT.1 ) THEN
+            DO 470 J = N - 1, I - KB + 2*KA + 1, -1
+               WORK( N+J-M ) = WORK( N+J-KA-M )
+               WORK( J-M ) = WORK( J-KA-M )
+  470       CONTINUE
+         END IF
+*
+      END IF
+*
+      GO TO 10
+*
+  480 CONTINUE
+*
+*     **************************** Phase 2 *****************************
+*
+*     The logical structure of this phase is:
+*
+*     UPDATE = .TRUE.
+*     DO I = 1, M
+*        use S(i) to update A and create a new bulge
+*        apply rotations to push all bulges KA positions upward
+*     END DO
+*     UPDATE = .FALSE.
+*     DO I = M - KA - 1, 2, -1
+*        apply rotations to push all bulges KA positions upward
+*     END DO
+*
+*     To avoid duplicating code, the two loops are merged.
+*
+      UPDATE = .TRUE.
+      I = 0
+  490 CONTINUE
+      IF( UPDATE ) THEN
+         I = I + 1
+         KBT = MIN( KB, M-I )
+         I0 = I + 1
+         I1 = MAX( 1, I-KA )
+         I2 = I + KBT - KA1
+         IF( I.GT.M ) THEN
+            UPDATE = .FALSE.
+            I = I - 1
+            I0 = M + 1
+            IF( KA.EQ.0 )
+     $         RETURN
+            GO TO 490
+         END IF
+      ELSE
+         I = I - KA
+         IF( I.LT.2 )
+     $      RETURN
+      END IF
+*
+      IF( I.LT.M-KBT ) THEN
+         NX = M
+      ELSE
+         NX = N
+      END IF
+*
+      IF( UPPER ) THEN
+*
+*        Transform A, working with the upper triangle
+*
+         IF( UPDATE ) THEN
+*
+*           Form  inv(S(i))**T * A * inv(S(i))
+*
+            BII = BB( KB1, I )
+            DO 500 J = I1, I
+               AB( J-I+KA1, I ) = AB( J-I+KA1, I ) / BII
+  500       CONTINUE
+            DO 510 J = I, MIN( N, I+KA )
+               AB( I-J+KA1, J ) = AB( I-J+KA1, J ) / BII
+  510       CONTINUE
+            DO 540 K = I + 1, I + KBT
+               DO 520 J = K, I + KBT
+                  AB( K-J+KA1, J ) = AB( K-J+KA1, J ) -
+     $                               BB( I-J+KB1, J )*AB( I-K+KA1, K ) -
+     $                               BB( I-K+KB1, K )*AB( I-J+KA1, J ) +
+     $                               AB( KA1, I )*BB( I-J+KB1, J )*
+     $                               BB( I-K+KB1, K )
+  520          CONTINUE
+               DO 530 J = I + KBT + 1, MIN( N, I+KA )
+                  AB( K-J+KA1, J ) = AB( K-J+KA1, J ) -
+     $                               BB( I-K+KB1, K )*AB( I-J+KA1, J )
+  530          CONTINUE
+  540       CONTINUE
+            DO 560 J = I1, I
+               DO 550 K = I + 1, MIN( J+KA, I+KBT )
+                  AB( J-K+KA1, K ) = AB( J-K+KA1, K ) -
+     $                               BB( I-K+KB1, K )*AB( J-I+KA1, I )
+  550          CONTINUE
+  560       CONTINUE
+*
+            IF( WANTX ) THEN
+*
+*              post-multiply X by inv(S(i))
+*
+               CALL DSCAL( NX, ONE / BII, X( 1, I ), 1 )
+               IF( KBT.GT.0 )
+     $            CALL DGER( NX, KBT, -ONE, X( 1, I ), 1, BB( KB, I+1 ),
+     $                       LDBB-1, X( 1, I+1 ), LDX )
+            END IF
+*
+*           store a(i1,i) in RA1 for use in next loop over K
+*
+            RA1 = AB( I1-I+KA1, I )
+         END IF
+*
+*        Generate and apply vectors of rotations to chase all the
+*        existing bulges KA positions up toward the top of the band
+*
+         DO 610 K = 1, KB - 1
+            IF( UPDATE ) THEN
+*
+*              Determine the rotations which would annihilate the bulge
+*              which has in theory just been created
+*
+               IF( I+K-KA1.GT.0 .AND. I+K.LT.M ) THEN
+*
+*                 generate rotation to annihilate a(i+k-ka-1,i)
+*
+                  CALL DLARTG( AB( K+1, I ), RA1, WORK( N+I+K-KA ),
+     $                         WORK( I+K-KA ), RA )
+*
+*                 create nonzero element a(i+k-ka-1,i+k) outside the
+*                 band and store it in WORK(m-kb+i+k)
+*
+                  T = -BB( KB1-K, I+K )*RA1
+                  WORK( M-KB+I+K ) = WORK( N+I+K-KA )*T -
+     $                               WORK( I+K-KA )*AB( 1, I+K )
+                  AB( 1, I+K ) = WORK( I+K-KA )*T +
+     $                           WORK( N+I+K-KA )*AB( 1, I+K )
+                  RA1 = RA
+               END IF
+            END IF
+            J2 = I + K + 1 - MAX( 1, K+I0-M+1 )*KA1
+            NR = ( J2+KA-1 ) / KA1
+            J1 = J2 - ( NR-1 )*KA1
+            IF( UPDATE ) THEN
+               J2T = MIN( J2, I-2*KA+K-1 )
+            ELSE
+               J2T = J2
+            END IF
+            NRT = ( J2T+KA-1 ) / KA1
+            DO 570 J = J1, J2T, KA1
+*
+*              create nonzero element a(j-1,j+ka) outside the band
+*              and store it in WORK(j)
+*
+               WORK( J ) = WORK( J )*AB( 1, J+KA-1 )
+               AB( 1, J+KA-1 ) = WORK( N+J )*AB( 1, J+KA-1 )
+  570       CONTINUE
+*
+*           generate rotations in 1st set to annihilate elements which
+*           have been created outside the band
+*
+            IF( NRT.GT.0 )
+     $         CALL DLARGV( NRT, AB( 1, J1+KA ), INCA, WORK( J1 ), KA1,
+     $                      WORK( N+J1 ), KA1 )
+            IF( NR.GT.0 ) THEN
+*
+*              apply rotations in 1st set from the left
+*
+               DO 580 L = 1, KA - 1
+                  CALL DLARTV( NR, AB( KA1-L, J1+L ), INCA,
+     $                         AB( KA-L, J1+L ), INCA, WORK( N+J1 ),
+     $                         WORK( J1 ), KA1 )
+  580          CONTINUE
+*
+*              apply rotations in 1st set from both sides to diagonal
+*              blocks
+*
+               CALL DLAR2V( NR, AB( KA1, J1 ), AB( KA1, J1-1 ),
+     $                      AB( KA, J1 ), INCA, WORK( N+J1 ),
+     $                      WORK( J1 ), KA1 )
+*
+            END IF
+*
+*           start applying rotations in 1st set from the right
+*
+            DO 590 L = KA - 1, KB - K + 1, -1
+               NRT = ( J2+L-1 ) / KA1
+               J1T = J2 - ( NRT-1 )*KA1
+               IF( NRT.GT.0 )
+     $            CALL DLARTV( NRT, AB( L, J1T ), INCA,
+     $                         AB( L+1, J1T-1 ), INCA, WORK( N+J1T ),
+     $                         WORK( J1T ), KA1 )
+  590       CONTINUE
+*
+            IF( WANTX ) THEN
+*
+*              post-multiply X by product of rotations in 1st set
+*
+               DO 600 J = J1, J2, KA1
+                  CALL DROT( NX, X( 1, J ), 1, X( 1, J-1 ), 1,
+     $                       WORK( N+J ), WORK( J ) )
+  600          CONTINUE
+            END IF
+  610    CONTINUE
+*
+         IF( UPDATE ) THEN
+            IF( I2.GT.0 .AND. KBT.GT.0 ) THEN
+*
+*              create nonzero element a(i+kbt-ka-1,i+kbt) outside the
+*              band and store it in WORK(m-kb+i+kbt)
+*
+               WORK( M-KB+I+KBT ) = -BB( KB1-KBT, I+KBT )*RA1
+            END IF
+         END IF
+*
+         DO 650 K = KB, 1, -1
+            IF( UPDATE ) THEN
+               J2 = I + K + 1 - MAX( 2, K+I0-M )*KA1
+            ELSE
+               J2 = I + K + 1 - MAX( 1, K+I0-M )*KA1
+            END IF
+*
+*           finish applying rotations in 2nd set from the right
+*
+            DO 620 L = KB - K, 1, -1
+               NRT = ( J2+KA+L-1 ) / KA1
+               J1T = J2 - ( NRT-1 )*KA1
+               IF( NRT.GT.0 )
+     $            CALL DLARTV( NRT, AB( L, J1T+KA ), INCA,
+     $                         AB( L+1, J1T+KA-1 ), INCA,
+     $                         WORK( N+M-KB+J1T+KA ),
+     $                         WORK( M-KB+J1T+KA ), KA1 )
+  620       CONTINUE
+            NR = ( J2+KA-1 ) / KA1
+            J1 = J2 - ( NR-1 )*KA1
+            DO 630 J = J1, J2, KA1
+               WORK( M-KB+J ) = WORK( M-KB+J+KA )
+               WORK( N+M-KB+J ) = WORK( N+M-KB+J+KA )
+  630       CONTINUE
+            DO 640 J = J1, J2, KA1
+*
+*              create nonzero element a(j-1,j+ka) outside the band
+*              and store it in WORK(m-kb+j)
+*
+               WORK( M-KB+J ) = WORK( M-KB+J )*AB( 1, J+KA-1 )
+               AB( 1, J+KA-1 ) = WORK( N+M-KB+J )*AB( 1, J+KA-1 )
+  640       CONTINUE
+            IF( UPDATE ) THEN
+               IF( I+K.GT.KA1 .AND. K.LE.KBT )
+     $            WORK( M-KB+I+K-KA ) = WORK( M-KB+I+K )
+            END IF
+  650    CONTINUE
+*
+         DO 690 K = KB, 1, -1
+            J2 = I + K + 1 - MAX( 1, K+I0-M )*KA1
+            NR = ( J2+KA-1 ) / KA1
+            J1 = J2 - ( NR-1 )*KA1
+            IF( NR.GT.0 ) THEN
+*
+*              generate rotations in 2nd set to annihilate elements
+*              which have been created outside the band
+*
+               CALL DLARGV( NR, AB( 1, J1+KA ), INCA, WORK( M-KB+J1 ),
+     $                      KA1, WORK( N+M-KB+J1 ), KA1 )
+*
+*              apply rotations in 2nd set from the left
+*
+               DO 660 L = 1, KA - 1
+                  CALL DLARTV( NR, AB( KA1-L, J1+L ), INCA,
+     $                         AB( KA-L, J1+L ), INCA,
+     $                         WORK( N+M-KB+J1 ), WORK( M-KB+J1 ), KA1 )
+  660          CONTINUE
+*
+*              apply rotations in 2nd set from both sides to diagonal
+*              blocks
+*
+               CALL DLAR2V( NR, AB( KA1, J1 ), AB( KA1, J1-1 ),
+     $                      AB( KA, J1 ), INCA, WORK( N+M-KB+J1 ),
+     $                      WORK( M-KB+J1 ), KA1 )
+*
+            END IF
+*
+*           start applying rotations in 2nd set from the right
+*
+            DO 670 L = KA - 1, KB - K + 1, -1
+               NRT = ( J2+L-1 ) / KA1
+               J1T = J2 - ( NRT-1 )*KA1
+               IF( NRT.GT.0 )
+     $            CALL DLARTV( NRT, AB( L, J1T ), INCA,
+     $                         AB( L+1, J1T-1 ), INCA,
+     $                         WORK( N+M-KB+J1T ), WORK( M-KB+J1T ),
+     $                         KA1 )
+  670       CONTINUE
+*
+            IF( WANTX ) THEN
+*
+*              post-multiply X by product of rotations in 2nd set
+*
+               DO 680 J = J1, J2, KA1
+                  CALL DROT( NX, X( 1, J ), 1, X( 1, J-1 ), 1,
+     $                       WORK( N+M-KB+J ), WORK( M-KB+J ) )
+  680          CONTINUE
+            END IF
+  690    CONTINUE
+*
+         DO 710 K = 1, KB - 1
+            J2 = I + K + 1 - MAX( 1, K+I0-M+1 )*KA1
+*
+*           finish applying rotations in 1st set from the right
+*
+            DO 700 L = KB - K, 1, -1
+               NRT = ( J2+L-1 ) / KA1
+               J1T = J2 - ( NRT-1 )*KA1
+               IF( NRT.GT.0 )
+     $            CALL DLARTV( NRT, AB( L, J1T ), INCA,
+     $                         AB( L+1, J1T-1 ), INCA, WORK( N+J1T ),
+     $                         WORK( J1T ), KA1 )
+  700       CONTINUE
+  710    CONTINUE
+*
+         IF( KB.GT.1 ) THEN
+            DO 720 J = 2, MIN( I+KB, M ) - 2*KA - 1
+               WORK( N+J ) = WORK( N+J+KA )
+               WORK( J ) = WORK( J+KA )
+  720       CONTINUE
+         END IF
+*
+      ELSE
+*
+*        Transform A, working with the lower triangle
+*
+         IF( UPDATE ) THEN
+*
+*           Form  inv(S(i))**T * A * inv(S(i))
+*
+            BII = BB( 1, I )
+            DO 730 J = I1, I
+               AB( I-J+1, J ) = AB( I-J+1, J ) / BII
+  730       CONTINUE
+            DO 740 J = I, MIN( N, I+KA )
+               AB( J-I+1, I ) = AB( J-I+1, I ) / BII
+  740       CONTINUE
+            DO 770 K = I + 1, I + KBT
+               DO 750 J = K, I + KBT
+                  AB( J-K+1, K ) = AB( J-K+1, K ) -
+     $                             BB( J-I+1, I )*AB( K-I+1, I ) -
+     $                             BB( K-I+1, I )*AB( J-I+1, I ) +
+     $                             AB( 1, I )*BB( J-I+1, I )*
+     $                             BB( K-I+1, I )
+  750          CONTINUE
+               DO 760 J = I + KBT + 1, MIN( N, I+KA )
+                  AB( J-K+1, K ) = AB( J-K+1, K ) -
+     $                             BB( K-I+1, I )*AB( J-I+1, I )
+  760          CONTINUE
+  770       CONTINUE
+            DO 790 J = I1, I
+               DO 780 K = I + 1, MIN( J+KA, I+KBT )
+                  AB( K-J+1, J ) = AB( K-J+1, J ) -
+     $                             BB( K-I+1, I )*AB( I-J+1, J )
+  780          CONTINUE
+  790       CONTINUE
+*
+            IF( WANTX ) THEN
+*
+*              post-multiply X by inv(S(i))
+*
+               CALL DSCAL( NX, ONE / BII, X( 1, I ), 1 )
+               IF( KBT.GT.0 )
+     $            CALL DGER( NX, KBT, -ONE, X( 1, I ), 1, BB( 2, I ), 1,
+     $                       X( 1, I+1 ), LDX )
+            END IF
+*
+*           store a(i,i1) in RA1 for use in next loop over K
+*
+            RA1 = AB( I-I1+1, I1 )
+         END IF
+*
+*        Generate and apply vectors of rotations to chase all the
+*        existing bulges KA positions up toward the top of the band
+*
+         DO 840 K = 1, KB - 1
+            IF( UPDATE ) THEN
+*
+*              Determine the rotations which would annihilate the bulge
+*              which has in theory just been created
+*
+               IF( I+K-KA1.GT.0 .AND. I+K.LT.M ) THEN
+*
+*                 generate rotation to annihilate a(i,i+k-ka-1)
+*
+                  CALL DLARTG( AB( KA1-K, I+K-KA ), RA1,
+     $                         WORK( N+I+K-KA ), WORK( I+K-KA ), RA )
+*
+*                 create nonzero element a(i+k,i+k-ka-1) outside the
+*                 band and store it in WORK(m-kb+i+k)
+*
+                  T = -BB( K+1, I )*RA1
+                  WORK( M-KB+I+K ) = WORK( N+I+K-KA )*T -
+     $                               WORK( I+K-KA )*AB( KA1, I+K-KA )
+                  AB( KA1, I+K-KA ) = WORK( I+K-KA )*T +
+     $                                WORK( N+I+K-KA )*AB( KA1, I+K-KA )
+                  RA1 = RA
+               END IF
+            END IF
+            J2 = I + K + 1 - MAX( 1, K+I0-M+1 )*KA1
+            NR = ( J2+KA-1 ) / KA1
+            J1 = J2 - ( NR-1 )*KA1
+            IF( UPDATE ) THEN
+               J2T = MIN( J2, I-2*KA+K-1 )
+            ELSE
+               J2T = J2
+            END IF
+            NRT = ( J2T+KA-1 ) / KA1
+            DO 800 J = J1, J2T, KA1
+*
+*              create nonzero element a(j+ka,j-1) outside the band
+*              and store it in WORK(j)
+*
+               WORK( J ) = WORK( J )*AB( KA1, J-1 )
+               AB( KA1, J-1 ) = WORK( N+J )*AB( KA1, J-1 )
+  800       CONTINUE
+*
+*           generate rotations in 1st set to annihilate elements which
+*           have been created outside the band
+*
+            IF( NRT.GT.0 )
+     $         CALL DLARGV( NRT, AB( KA1, J1 ), INCA, WORK( J1 ), KA1,
+     $                      WORK( N+J1 ), KA1 )
+            IF( NR.GT.0 ) THEN
+*
+*              apply rotations in 1st set from the right
+*
+               DO 810 L = 1, KA - 1
+                  CALL DLARTV( NR, AB( L+1, J1 ), INCA, AB( L+2, J1-1 ),
+     $                         INCA, WORK( N+J1 ), WORK( J1 ), KA1 )
+  810          CONTINUE
+*
+*              apply rotations in 1st set from both sides to diagonal
+*              blocks
+*
+               CALL DLAR2V( NR, AB( 1, J1 ), AB( 1, J1-1 ),
+     $                      AB( 2, J1-1 ), INCA, WORK( N+J1 ),
+     $                      WORK( J1 ), KA1 )
+*
+            END IF
+*
+*           start applying rotations in 1st set from the left
+*
+            DO 820 L = KA - 1, KB - K + 1, -1
+               NRT = ( J2+L-1 ) / KA1
+               J1T = J2 - ( NRT-1 )*KA1
+               IF( NRT.GT.0 )
+     $            CALL DLARTV( NRT, AB( KA1-L+1, J1T-KA1+L ), INCA,
+     $                         AB( KA1-L, J1T-KA1+L ), INCA,
+     $                         WORK( N+J1T ), WORK( J1T ), KA1 )
+  820       CONTINUE
+*
+            IF( WANTX ) THEN
+*
+*              post-multiply X by product of rotations in 1st set
+*
+               DO 830 J = J1, J2, KA1
+                  CALL DROT( NX, X( 1, J ), 1, X( 1, J-1 ), 1,
+     $                       WORK( N+J ), WORK( J ) )
+  830          CONTINUE
+            END IF
+  840    CONTINUE
+*
+         IF( UPDATE ) THEN
+            IF( I2.GT.0 .AND. KBT.GT.0 ) THEN
+*
+*              create nonzero element a(i+kbt,i+kbt-ka-1) outside the
+*              band and store it in WORK(m-kb+i+kbt)
+*
+               WORK( M-KB+I+KBT ) = -BB( KBT+1, I )*RA1
+            END IF
+         END IF
+*
+         DO 880 K = KB, 1, -1
+            IF( UPDATE ) THEN
+               J2 = I + K + 1 - MAX( 2, K+I0-M )*KA1
+            ELSE
+               J2 = I + K + 1 - MAX( 1, K+I0-M )*KA1
+            END IF
+*
+*           finish applying rotations in 2nd set from the left
+*
+            DO 850 L = KB - K, 1, -1
+               NRT = ( J2+KA+L-1 ) / KA1
+               J1T = J2 - ( NRT-1 )*KA1
+               IF( NRT.GT.0 )
+     $            CALL DLARTV( NRT, AB( KA1-L+1, J1T+L-1 ), INCA,
+     $                         AB( KA1-L, J1T+L-1 ), INCA,
+     $                         WORK( N+M-KB+J1T+KA ),
+     $                         WORK( M-KB+J1T+KA ), KA1 )
+  850       CONTINUE
+            NR = ( J2+KA-1 ) / KA1
+            J1 = J2 - ( NR-1 )*KA1
+            DO 860 J = J1, J2, KA1
+               WORK( M-KB+J ) = WORK( M-KB+J+KA )
+               WORK( N+M-KB+J ) = WORK( N+M-KB+J+KA )
+  860       CONTINUE
+            DO 870 J = J1, J2, KA1
+*
+*              create nonzero element a(j+ka,j-1) outside the band
+*              and store it in WORK(m-kb+j)
+*
+               WORK( M-KB+J ) = WORK( M-KB+J )*AB( KA1, J-1 )
+               AB( KA1, J-1 ) = WORK( N+M-KB+J )*AB( KA1, J-1 )
+  870       CONTINUE
+            IF( UPDATE ) THEN
+               IF( I+K.GT.KA1 .AND. K.LE.KBT )
+     $            WORK( M-KB+I+K-KA ) = WORK( M-KB+I+K )
+            END IF
+  880    CONTINUE
+*
+         DO 920 K = KB, 1, -1
+            J2 = I + K + 1 - MAX( 1, K+I0-M )*KA1
+            NR = ( J2+KA-1 ) / KA1
+            J1 = J2 - ( NR-1 )*KA1
+            IF( NR.GT.0 ) THEN
+*
+*              generate rotations in 2nd set to annihilate elements
+*              which have been created outside the band
+*
+               CALL DLARGV( NR, AB( KA1, J1 ), INCA, WORK( M-KB+J1 ),
+     $                      KA1, WORK( N+M-KB+J1 ), KA1 )
+*
+*              apply rotations in 2nd set from the right
+*
+               DO 890 L = 1, KA - 1
+                  CALL DLARTV( NR, AB( L+1, J1 ), INCA, AB( L+2, J1-1 ),
+     $                         INCA, WORK( N+M-KB+J1 ), WORK( M-KB+J1 ),
+     $                         KA1 )
+  890          CONTINUE
+*
+*              apply rotations in 2nd set from both sides to diagonal
+*              blocks
+*
+               CALL DLAR2V( NR, AB( 1, J1 ), AB( 1, J1-1 ),
+     $                      AB( 2, J1-1 ), INCA, WORK( N+M-KB+J1 ),
+     $                      WORK( M-KB+J1 ), KA1 )
+*
+            END IF
+*
+*           start applying rotations in 2nd set from the left
+*
+            DO 900 L = KA - 1, KB - K + 1, -1
+               NRT = ( J2+L-1 ) / KA1
+               J1T = J2 - ( NRT-1 )*KA1
+               IF( NRT.GT.0 )
+     $            CALL DLARTV( NRT, AB( KA1-L+1, J1T-KA1+L ), INCA,
+     $                         AB( KA1-L, J1T-KA1+L ), INCA,
+     $                         WORK( N+M-KB+J1T ), WORK( M-KB+J1T ),
+     $                         KA1 )
+  900       CONTINUE
+*
+            IF( WANTX ) THEN
+*
+*              post-multiply X by product of rotations in 2nd set
+*
+               DO 910 J = J1, J2, KA1
+                  CALL DROT( NX, X( 1, J ), 1, X( 1, J-1 ), 1,
+     $                       WORK( N+M-KB+J ), WORK( M-KB+J ) )
+  910          CONTINUE
+            END IF
+  920    CONTINUE
+*
+         DO 940 K = 1, KB - 1
+            J2 = I + K + 1 - MAX( 1, K+I0-M+1 )*KA1
+*
+*           finish applying rotations in 1st set from the left
+*
+            DO 930 L = KB - K, 1, -1
+               NRT = ( J2+L-1 ) / KA1
+               J1T = J2 - ( NRT-1 )*KA1
+               IF( NRT.GT.0 )
+     $            CALL DLARTV( NRT, AB( KA1-L+1, J1T-KA1+L ), INCA,
+     $                         AB( KA1-L, J1T-KA1+L ), INCA,
+     $                         WORK( N+J1T ), WORK( J1T ), KA1 )
+  930       CONTINUE
+  940    CONTINUE
+*
+         IF( KB.GT.1 ) THEN
+            DO 950 J = 2, MIN( I+KB, M ) - 2*KA - 1
+               WORK( N+J ) = WORK( N+J+KA )
+               WORK( J ) = WORK( J+KA )
+  950       CONTINUE
+         END IF
+*
+      END IF
+*
+      GO TO 490
+*
+*     End of DSBGST
+*
+      END
diff --git a/costa/native/external/lapack/dsbgv.f b/costa/native/external/lapack/dsbgv.f
new file mode 100644
index 000000000..79f114af1
--- /dev/null
+++ b/costa/native/external/lapack/dsbgv.f
@@ -0,0 +1,189 @@
+      SUBROUTINE DSBGV( JOBZ, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, W, Z,
+     $                  LDZ, WORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBZ, UPLO
+      INTEGER            INFO, KA, KB, LDAB, LDBB, LDZ, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   AB( LDAB, * ), BB( LDBB, * ), W( * ),
+     $                   WORK( * ), Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DSBGV computes all the eigenvalues, and optionally, the eigenvectors
+*  of a real generalized symmetric-definite banded eigenproblem, of
+*  the form A*x=(lambda)*B*x. Here A and B are assumed to be symmetric
+*  and banded, and B is also positive definite.
+*
+*  Arguments
+*  =========
+*
+*  JOBZ    (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only;
+*          = 'V':  Compute eigenvalues and eigenvectors.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangles of A and B are stored;
+*          = 'L':  Lower triangles of A and B are stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrices A and B.  N >= 0.
+*
+*  KA      (input) INTEGER
+*          The number of superdiagonals of the matrix A if UPLO = 'U',
+*          or the number of subdiagonals if UPLO = 'L'. KA >= 0.
+*
+*  KB      (input) INTEGER
+*          The number of superdiagonals of the matrix B if UPLO = 'U',
+*          or the number of subdiagonals if UPLO = 'L'. KB >= 0.
+*
+*  AB      (input/output) DOUBLE PRECISION array, dimension (LDAB, N)
+*          On entry, the upper or lower triangle of the symmetric band
+*          matrix A, stored in the first ka+1 rows of the array.  The
+*          j-th column of A is stored in the j-th column of the array AB
+*          as follows:
+*          if UPLO = 'U', AB(ka+1+i-j,j) = A(i,j) for max(1,j-ka)<=i<=j;
+*          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+ka).
+*
+*          On exit, the contents of AB are destroyed.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KA+1.
+*
+*  BB      (input/output) DOUBLE PRECISION array, dimension (LDBB, N)
+*          On entry, the upper or lower triangle of the symmetric band
+*          matrix B, stored in the first kb+1 rows of the array.  The
+*          j-th column of B is stored in the j-th column of the array BB
+*          as follows:
+*          if UPLO = 'U', BB(kb+1+i-j,j) = B(i,j) for max(1,j-kb)<=i<=j;
+*          if UPLO = 'L', BB(1+i-j,j)    = B(i,j) for j<=i<=min(n,j+kb).
+*
+*          On exit, the factor S from the split Cholesky factorization
+*          B = S**T*S, as returned by DPBSTF.
+*
+*  LDBB    (input) INTEGER
+*          The leading dimension of the array BB.  LDBB >= KB+1.
+*
+*  W       (output) DOUBLE PRECISION array, dimension (N)
+*          If INFO = 0, the eigenvalues in ascending order.
+*
+*  Z       (output) DOUBLE PRECISION array, dimension (LDZ, N)
+*          If JOBZ = 'V', then if INFO = 0, Z contains the matrix Z of
+*          eigenvectors, with the i-th column of Z holding the
+*          eigenvector associated with W(i). The eigenvectors are
+*          normalized so that Z**T*B*Z = I.
+*          If JOBZ = 'N', then Z is not referenced.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= 1, and if
+*          JOBZ = 'V', LDZ >= N.
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (3*N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, and i is:
+*             <= N:  the algorithm failed to converge:
+*                    i off-diagonal elements of an intermediate
+*                    tridiagonal form did not converge to zero;
+*             > N:   if INFO = N + i, for 1 <= i <= N, then DPBSTF
+*                    returned INFO = i: B is not positive definite.
+*                    The factorization of B could not be completed and
+*                    no eigenvalues or eigenvectors were computed.
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            UPPER, WANTZ
+      CHARACTER          VECT
+      INTEGER            IINFO, INDE, INDWRK
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DPBSTF, DSBGST, DSBTRD, DSTEQR, DSTERF, XERBLA
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      WANTZ = LSAME( JOBZ, 'V' )
+      UPPER = LSAME( UPLO, 'U' )
+*
+      INFO = 0
+      IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( KA.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( KB.LT.0 .OR. KB.GT.KA ) THEN
+         INFO = -5
+      ELSE IF( LDAB.LT.KA+1 ) THEN
+         INFO = -7
+      ELSE IF( LDBB.LT.KB+1 ) THEN
+         INFO = -9
+      ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN
+         INFO = -12
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DSBGV ', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Form a split Cholesky factorization of B.
+*
+      CALL DPBSTF( UPLO, N, KB, BB, LDBB, INFO )
+      IF( INFO.NE.0 ) THEN
+         INFO = N + INFO
+         RETURN
+      END IF
+*
+*     Transform problem to standard eigenvalue problem.
+*
+      INDE = 1
+      INDWRK = INDE + N
+      CALL DSBGST( JOBZ, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, Z, LDZ,
+     $             WORK( INDWRK ), IINFO )
+*
+*     Reduce to tridiagonal form.
+*
+      IF( WANTZ ) THEN
+         VECT = 'U'
+      ELSE
+         VECT = 'N'
+      END IF
+      CALL DSBTRD( VECT, UPLO, N, KA, AB, LDAB, W, WORK( INDE ), Z, LDZ,
+     $             WORK( INDWRK ), IINFO )
+*
+*     For eigenvalues only, call DSTERF.  For eigenvectors, call SSTEQR.
+*
+      IF( .NOT.WANTZ ) THEN
+         CALL DSTERF( N, W, WORK( INDE ), INFO )
+      ELSE
+         CALL DSTEQR( JOBZ, N, W, WORK( INDE ), Z, LDZ, WORK( INDWRK ),
+     $                INFO )
+      END IF
+      RETURN
+*
+*     End of DSBGV
+*
+      END
diff --git a/costa/native/external/lapack/dsbgvd.f b/costa/native/external/lapack/dsbgvd.f
new file mode 100644
index 000000000..bc140a86f
--- /dev/null
+++ b/costa/native/external/lapack/dsbgvd.f
@@ -0,0 +1,270 @@
+      SUBROUTINE DSBGVD( JOBZ, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, W,
+     $                   Z, LDZ, WORK, LWORK, IWORK, LIWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBZ, UPLO
+      INTEGER            INFO, KA, KB, LDAB, LDBB, LDZ, LIWORK, LWORK, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      DOUBLE PRECISION   AB( LDAB, * ), BB( LDBB, * ), W( * ),
+     $                   WORK( * ), Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DSBGVD computes all the eigenvalues, and optionally, the eigenvectors
+*  of a real generalized symmetric-definite banded eigenproblem, of the
+*  form A*x=(lambda)*B*x.  Here A and B are assumed to be symmetric and
+*  banded, and B is also positive definite.  If eigenvectors are
+*  desired, it uses a divide and conquer algorithm.
+*
+*  The divide and conquer algorithm makes very mild assumptions about
+*  floating point arithmetic. It will work on machines with a guard
+*  digit in add/subtract, or on those binary machines without guard
+*  digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
+*  Cray-2. It could conceivably fail on hexadecimal or decimal machines
+*  without guard digits, but we know of none.
+*
+*  Arguments
+*  =========
+*
+*  JOBZ    (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only;
+*          = 'V':  Compute eigenvalues and eigenvectors.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangles of A and B are stored;
+*          = 'L':  Lower triangles of A and B are stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrices A and B.  N >= 0.
+*
+*  KA      (input) INTEGER
+*          The number of superdiagonals of the matrix A if UPLO = 'U',
+*          or the number of subdiagonals if UPLO = 'L'.  KA >= 0.
+*
+*  KB      (input) INTEGER
+*          The number of superdiagonals of the matrix B if UPLO = 'U',
+*          or the number of subdiagonals if UPLO = 'L'.  KB >= 0.
+*
+*  AB      (input/output) DOUBLE PRECISION array, dimension (LDAB, N)
+*          On entry, the upper or lower triangle of the symmetric band
+*          matrix A, stored in the first ka+1 rows of the array.  The
+*          j-th column of A is stored in the j-th column of the array AB
+*          as follows:
+*          if UPLO = 'U', AB(ka+1+i-j,j) = A(i,j) for max(1,j-ka)<=i<=j;
+*          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+ka).
+*
+*          On exit, the contents of AB are destroyed.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KA+1.
+*
+*  BB      (input/output) DOUBLE PRECISION array, dimension (LDBB, N)
+*          On entry, the upper or lower triangle of the symmetric band
+*          matrix B, stored in the first kb+1 rows of the array.  The
+*          j-th column of B is stored in the j-th column of the array BB
+*          as follows:
+*          if UPLO = 'U', BB(ka+1+i-j,j) = B(i,j) for max(1,j-kb)<=i<=j;
+*          if UPLO = 'L', BB(1+i-j,j)    = B(i,j) for j<=i<=min(n,j+kb).
+*
+*          On exit, the factor S from the split Cholesky factorization
+*          B = S**T*S, as returned by DPBSTF.
+*
+*  LDBB    (input) INTEGER
+*          The leading dimension of the array BB.  LDBB >= KB+1.
+*
+*  W       (output) DOUBLE PRECISION array, dimension (N)
+*          If INFO = 0, the eigenvalues in ascending order.
+*
+*  Z       (output) DOUBLE PRECISION array, dimension (LDZ, N)
+*          If JOBZ = 'V', then if INFO = 0, Z contains the matrix Z of
+*          eigenvectors, with the i-th column of Z holding the
+*          eigenvector associated with W(i).  The eigenvectors are
+*          normalized so Z**T*B*Z = I.
+*          If JOBZ = 'N', then Z is not referenced.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= 1, and if
+*          JOBZ = 'V', LDZ >= max(1,N).
+*
+*  WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.
+*          If N <= 1,               LWORK >= 1.
+*          If JOBZ = 'N' and N > 1, LWORK >= 3*N.
+*          If JOBZ = 'V' and N > 1, LWORK >= 1 + 5*N + 2*N**2.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  IWORK   (workspace/output) INTEGER array, dimension (LIWORK)
+*          On exit, if LIWORK > 0, IWORK(1) returns the optimal LIWORK.
+*
+*  LIWORK  (input) INTEGER
+*          The dimension of the array IWORK.
+*          If JOBZ  = 'N' or N <= 1, LIWORK >= 1.
+*          If JOBZ  = 'V' and N > 1, LIWORK >= 3 + 5*N.
+*
+*          If LIWORK = -1, then a workspace query is assumed; the
+*          routine only calculates the optimal size of the IWORK array,
+*          returns this value as the first entry of the IWORK array, and
+*          no error message related to LIWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, and i is:
+*             <= N:  the algorithm failed to converge:
+*                    i off-diagonal elements of an intermediate
+*                    tridiagonal form did not converge to zero;
+*             > N:   if INFO = N + i, for 1 <= i <= N, then DPBSTF
+*                    returned INFO = i: B is not positive definite.
+*                    The factorization of B could not be completed and
+*                    no eigenvalues or eigenvectors were computed.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY, UPPER, WANTZ
+      CHARACTER          VECT
+      INTEGER            IINFO, INDE, INDWK2, INDWRK, LIWMIN, LLWRK2,
+     $                   LWMIN
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DGEMM, DLACPY, DPBSTF, DSBGST, DSBTRD, DSTEDC,
+     $                   DSTERF, XERBLA
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      WANTZ = LSAME( JOBZ, 'V' )
+      UPPER = LSAME( UPLO, 'U' )
+      LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 )
+*
+      INFO = 0
+      IF( N.LE.1 ) THEN
+         LIWMIN = 1
+         LWMIN = 1
+      ELSE
+         IF( WANTZ ) THEN
+            LIWMIN = 3 + 5*N
+            LWMIN = 1 + 5*N + 2*N**2
+         ELSE
+            LIWMIN = 1
+            LWMIN = 2*N
+         END IF
+      END IF
+*
+      IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( KA.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( KB.LT.0 .OR. KB.GT.KA ) THEN
+         INFO = -5
+      ELSE IF( LDAB.LT.KA+1 ) THEN
+         INFO = -7
+      ELSE IF( LDBB.LT.KB+1 ) THEN
+         INFO = -9
+      ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN
+         INFO = -12
+      ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -14
+      ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -16
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         WORK( 1 ) = LWMIN
+         IWORK( 1 ) = LIWMIN
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DSBGVD', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Form a split Cholesky factorization of B.
+*
+      CALL DPBSTF( UPLO, N, KB, BB, LDBB, INFO )
+      IF( INFO.NE.0 ) THEN
+         INFO = N + INFO
+         RETURN
+      END IF
+*
+*     Transform problem to standard eigenvalue problem.
+*
+      INDE = 1
+      INDWRK = INDE + N
+      INDWK2 = INDWRK + N*N
+      LLWRK2 = LWORK - INDWK2 + 1
+      CALL DSBGST( JOBZ, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, Z, LDZ,
+     $             WORK( INDWRK ), IINFO )
+*
+*     Reduce to tridiagonal form.
+*
+      IF( WANTZ ) THEN
+         VECT = 'U'
+      ELSE
+         VECT = 'N'
+      END IF
+      CALL DSBTRD( VECT, UPLO, N, KA, AB, LDAB, W, WORK( INDE ), Z, LDZ,
+     $             WORK( INDWRK ), IINFO )
+*
+*     For eigenvalues only, call DSTERF. For eigenvectors, call SSTEDC.
+*
+      IF( .NOT.WANTZ ) THEN
+         CALL DSTERF( N, W, WORK( INDE ), INFO )
+      ELSE
+         CALL DSTEDC( 'I', N, W, WORK( INDE ), WORK( INDWRK ), N,
+     $                WORK( INDWK2 ), LLWRK2, IWORK, LIWORK, INFO )
+         CALL DGEMM( 'N', 'N', N, N, N, ONE, Z, LDZ, WORK( INDWRK ), N,
+     $               ZERO, WORK( INDWK2 ), N )
+         CALL DLACPY( 'A', N, N, WORK( INDWK2 ), N, Z, LDZ )
+      END IF
+*
+      WORK( 1 ) = LWMIN
+      IWORK( 1 ) = LIWMIN
+*
+      RETURN
+*
+*     End of DSBGVD
+*
+      END
diff --git a/costa/native/external/lapack/dsbgvx.f b/costa/native/external/lapack/dsbgvx.f
new file mode 100644
index 000000000..600e6d809
--- /dev/null
+++ b/costa/native/external/lapack/dsbgvx.f
@@ -0,0 +1,371 @@
+      SUBROUTINE DSBGVX( JOBZ, RANGE, UPLO, N, KA, KB, AB, LDAB, BB,
+     $                   LDBB, Q, LDQ, VL, VU, IL, IU, ABSTOL, M, W, Z,
+     $                   LDZ, WORK, IWORK, IFAIL, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBZ, RANGE, UPLO
+      INTEGER            IL, INFO, IU, KA, KB, LDAB, LDBB, LDQ, LDZ, M,
+     $                   N
+      DOUBLE PRECISION   ABSTOL, VL, VU
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IFAIL( * ), IWORK( * )
+      DOUBLE PRECISION   AB( LDAB, * ), BB( LDBB, * ), Q( LDQ, * ),
+     $                   W( * ), WORK( * ), Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DSBGVX computes selected eigenvalues, and optionally, eigenvectors
+*  of a real generalized symmetric-definite banded eigenproblem, of
+*  the form A*x=(lambda)*B*x.  Here A and B are assumed to be symmetric
+*  and banded, and B is also positive definite.  Eigenvalues and
+*  eigenvectors can be selected by specifying either all eigenvalues,
+*  a range of values or a range of indices for the desired eigenvalues.
+*
+*  Arguments
+*  =========
+*
+*  JOBZ    (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only;
+*          = 'V':  Compute eigenvalues and eigenvectors.
+*
+*  RANGE   (input) CHARACTER*1
+*          = 'A': all eigenvalues will be found.
+*          = 'V': all eigenvalues in the half-open interval (VL,VU]
+*                 will be found.
+*          = 'I': the IL-th through IU-th eigenvalues will be found.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangles of A and B are stored;
+*          = 'L':  Lower triangles of A and B are stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrices A and B.  N >= 0.
+*
+*  KA      (input) INTEGER
+*          The number of superdiagonals of the matrix A if UPLO = 'U',
+*          or the number of subdiagonals if UPLO = 'L'.  KA >= 0.
+*
+*  KB      (input) INTEGER
+*          The number of superdiagonals of the matrix B if UPLO = 'U',
+*          or the number of subdiagonals if UPLO = 'L'.  KB >= 0.
+*
+*  AB      (input/output) DOUBLE PRECISION array, dimension (LDAB, N)
+*          On entry, the upper or lower triangle of the symmetric band
+*          matrix A, stored in the first ka+1 rows of the array.  The
+*          j-th column of A is stored in the j-th column of the array AB
+*          as follows:
+*          if UPLO = 'U', AB(ka+1+i-j,j) = A(i,j) for max(1,j-ka)<=i<=j;
+*          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+ka).
+*
+*          On exit, the contents of AB are destroyed.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KA+1.
+*
+*  BB      (input/output) DOUBLE PRECISION array, dimension (LDBB, N)
+*          On entry, the upper or lower triangle of the symmetric band
+*          matrix B, stored in the first kb+1 rows of the array.  The
+*          j-th column of B is stored in the j-th column of the array BB
+*          as follows:
+*          if UPLO = 'U', BB(ka+1+i-j,j) = B(i,j) for max(1,j-kb)<=i<=j;
+*          if UPLO = 'L', BB(1+i-j,j)    = B(i,j) for j<=i<=min(n,j+kb).
+*
+*          On exit, the factor S from the split Cholesky factorization
+*          B = S**T*S, as returned by DPBSTF.
+*
+*  LDBB    (input) INTEGER
+*          The leading dimension of the array BB.  LDBB >= KB+1.
+*
+*  Q       (output) DOUBLE PRECISION array, dimension (LDQ, N)
+*          If JOBZ = 'V', the n-by-n matrix used in the reduction of
+*          A*x = (lambda)*B*x to standard form, i.e. C*x = (lambda)*x,
+*          and consequently C to tridiagonal form.
+*          If JOBZ = 'N', the array Q is not referenced.
+*
+*  LDQ     (input) INTEGER
+*          The leading dimension of the array Q.  If JOBZ = 'N',
+*          LDQ >= 1. If JOBZ = 'V', LDQ >= max(1,N).
+*
+*  VL      (input) DOUBLE PRECISION
+*  VU      (input) DOUBLE PRECISION
+*          If RANGE='V', the lower and upper bounds of the interval to
+*          be searched for eigenvalues. VL < VU.
+*          Not referenced if RANGE = 'A' or 'I'.
+*
+*  IL      (input) INTEGER
+*  IU      (input) INTEGER
+*          If RANGE='I', the indices (in ascending order) of the
+*          smallest and largest eigenvalues to be returned.
+*          1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0.
+*          Not referenced if RANGE = 'A' or 'V'.
+*
+*  ABSTOL  (input) DOUBLE PRECISION
+*          The absolute error tolerance for the eigenvalues.
+*          An approximate eigenvalue is accepted as converged
+*          when it is determined to lie in an interval [a,b]
+*          of width less than or equal to
+*
+*                  ABSTOL + EPS *   max( |a|,|b| ) ,
+*
+*          where EPS is the machine precision.  If ABSTOL is less than
+*          or equal to zero, then  EPS*|T|  will be used in its place,
+*          where |T| is the 1-norm of the tridiagonal matrix obtained
+*          by reducing A to tridiagonal form.
+*
+*          Eigenvalues will be computed most accurately when ABSTOL is
+*          set to twice the underflow threshold 2*DLAMCH('S'), not zero.
+*          If this routine returns with INFO>0, indicating that some
+*          eigenvectors did not converge, try setting ABSTOL to
+*          2*DLAMCH('S').
+*
+*  M       (output) INTEGER
+*          The total number of eigenvalues found.  0 <= M <= N.
+*          If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1.
+*
+*  W       (output) DOUBLE PRECISION array, dimension (N)
+*          If INFO = 0, the eigenvalues in ascending order.
+*
+*  Z       (output) DOUBLE PRECISION array, dimension (LDZ, N)
+*          If JOBZ = 'V', then if INFO = 0, Z contains the matrix Z of
+*          eigenvectors, with the i-th column of Z holding the
+*          eigenvector associated with W(i).  The eigenvectors are
+*          normalized so Z**T*B*Z = I.
+*          If JOBZ = 'N', then Z is not referenced.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= 1, and if
+*          JOBZ = 'V', LDZ >= max(1,N).
+*
+*  WORK    (workspace/output) DOUBLE PRECISION array, dimension (7N)
+*
+*  IWORK   (workspace/output) INTEGER array, dimension (5N)
+*
+*  IFAIL   (input) INTEGER array, dimension (M)
+*          If JOBZ = 'V', then if INFO = 0, the first M elements of
+*          IFAIL are zero.  If INFO > 0, then IFAIL contains the
+*          indices of the eigenvalues that failed to converge.
+*          If JOBZ = 'N', then IFAIL is not referenced.
+*
+*  INFO    (output) INTEGER
+*          = 0 : successful exit
+*          < 0 : if INFO = -i, the i-th argument had an illegal value
+*          <= N: if INFO = i, then i eigenvectors failed to converge.
+*                  Their indices are stored in IFAIL.
+*          > N : DPBSTF returned an error code; i.e.,
+*                if INFO = N + i, for 1 <= i <= N, then the leading
+*                minor of order i of B is not positive definite.
+*                The factorization of B could not be completed and
+*                no eigenvalues or eigenvectors were computed.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            ALLEIG, INDEIG, UPPER, VALEIG, WANTZ
+      CHARACTER          ORDER, VECT
+      INTEGER            I, IINFO, INDD, INDE, INDEE, INDIBL, INDISP,
+     $                   INDIWO, INDWRK, ITMP1, J, JJ, NSPLIT
+      DOUBLE PRECISION   TMP1
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DCOPY, DGEMV, DLACPY, DPBSTF, DSBGST, DSBTRD,
+     $                   DSTEBZ, DSTEIN, DSTEQR, DSTERF, DSWAP, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      WANTZ = LSAME( JOBZ, 'V' )
+      UPPER = LSAME( UPLO, 'U' )
+      ALLEIG = LSAME( RANGE, 'A' )
+      VALEIG = LSAME( RANGE, 'V' )
+      INDEIG = LSAME( RANGE, 'I' )
+*
+      INFO = 0
+      IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( KA.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( KB.LT.0 .OR. KB.GT.KA ) THEN
+         INFO = -6
+      ELSE IF( LDAB.LT.KA+1 ) THEN
+         INFO = -8
+      ELSE IF( LDBB.LT.KB+1 ) THEN
+         INFO = -10
+      ELSE IF( LDQ.LT.1 ) THEN
+         INFO = -12
+      ELSE IF( VALEIG .AND. N.GT.0 .AND. VU.LE.VL ) THEN
+         INFO = -14
+      ELSE IF( INDEIG .AND. IL.LT.1 ) THEN
+         INFO = -15
+      ELSE IF( INDEIG .AND. ( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) ) THEN
+         INFO = -16
+      ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN
+         INFO = -21
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DSBGVX', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      M = 0
+      IF( N.EQ.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+*     Form a split Cholesky factorization of B.
+*
+      CALL DPBSTF( UPLO, N, KB, BB, LDBB, INFO )
+      IF( INFO.NE.0 ) THEN
+         INFO = N + INFO
+         RETURN
+      END IF
+*
+*     Transform problem to standard eigenvalue problem.
+*
+      CALL DSBGST( JOBZ, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, Q, LDQ,
+     $             WORK, IINFO )
+*
+*     Reduce symmetric band matrix to tridiagonal form.
+*
+      INDD = 1
+      INDE = INDD + N
+      INDWRK = INDE + N
+      IF( WANTZ ) THEN
+         VECT = 'U'
+      ELSE
+         VECT = 'N'
+      END IF
+      CALL DSBTRD( VECT, UPLO, N, KA, AB, LDAB, WORK( INDD ),
+     $             WORK( INDE ), Q, LDQ, WORK( INDWRK ), IINFO )
+*
+*     If all eigenvalues are desired and ABSTOL is less than or equal
+*     to zero, then call DSTERF or SSTEQR.  If this fails for some
+*     eigenvalue, then try DSTEBZ.
+*
+      IF( ( ALLEIG .OR. ( INDEIG .AND. IL.EQ.1 .AND. IU.EQ.N ) ) .AND.
+     $    ( ABSTOL.LE.ZERO ) ) THEN
+         CALL DCOPY( N, WORK( INDD ), 1, W, 1 )
+         INDEE = INDWRK + 2*N
+         CALL DCOPY( N-1, WORK( INDE ), 1, WORK( INDEE ), 1 )
+         IF( .NOT.WANTZ ) THEN
+            CALL DSTERF( N, W, WORK( INDEE ), INFO )
+         ELSE
+            CALL DLACPY( 'A', N, N, Q, LDQ, Z, LDZ )
+            CALL DSTEQR( JOBZ, N, W, WORK( INDEE ), Z, LDZ,
+     $                   WORK( INDWRK ), INFO )
+            IF( INFO.EQ.0 ) THEN
+               DO 10 I = 1, N
+                  IFAIL( I ) = 0
+   10          CONTINUE
+            END IF
+         END IF
+         IF( INFO.EQ.0 ) THEN
+            M = N
+            GO TO 30
+         END IF
+         INFO = 0
+      END IF
+*
+*     Otherwise, call DSTEBZ and, if eigenvectors are desired,
+*     call DSTEIN.
+*
+      IF( WANTZ ) THEN
+         ORDER = 'B'
+      ELSE
+         ORDER = 'E'
+      END IF
+      INDIBL = 1
+      INDISP = INDIBL + N
+      INDIWO = INDISP + N
+      CALL DSTEBZ( RANGE, ORDER, N, VL, VU, IL, IU, ABSTOL,
+     $             WORK( INDD ), WORK( INDE ), M, NSPLIT, W,
+     $             IWORK( INDIBL ), IWORK( INDISP ), WORK( INDWRK ),
+     $             IWORK( INDIWO ), INFO )
+*
+      IF( WANTZ ) THEN
+         CALL DSTEIN( N, WORK( INDD ), WORK( INDE ), M, W,
+     $                IWORK( INDIBL ), IWORK( INDISP ), Z, LDZ,
+     $                WORK( INDWRK ), IWORK( INDIWO ), IFAIL, INFO )
+*
+*        Apply transformation matrix used in reduction to tridiagonal
+*        form to eigenvectors returned by DSTEIN.
+*
+         DO 20 J = 1, M
+            CALL DCOPY( N, Z( 1, J ), 1, WORK( 1 ), 1 )
+            CALL DGEMV( 'N', N, N, ONE, Q, LDQ, WORK, 1, ZERO,
+     $                  Z( 1, J ), 1 )
+   20    CONTINUE
+      END IF
+*
+   30 CONTINUE
+*
+*     If eigenvalues are not in order, then sort them, along with
+*     eigenvectors.
+*
+      IF( WANTZ ) THEN
+         DO 50 J = 1, M - 1
+            I = 0
+            TMP1 = W( J )
+            DO 40 JJ = J + 1, M
+               IF( W( JJ ).LT.TMP1 ) THEN
+                  I = JJ
+                  TMP1 = W( JJ )
+               END IF
+   40       CONTINUE
+*
+            IF( I.NE.0 ) THEN
+               ITMP1 = IWORK( INDIBL+I-1 )
+               W( I ) = W( J )
+               IWORK( INDIBL+I-1 ) = IWORK( INDIBL+J-1 )
+               W( J ) = TMP1
+               IWORK( INDIBL+J-1 ) = ITMP1
+               CALL DSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 )
+               IF( INFO.NE.0 ) THEN
+                  ITMP1 = IFAIL( I )
+                  IFAIL( I ) = IFAIL( J )
+                  IFAIL( J ) = ITMP1
+               END IF
+            END IF
+   50    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of DSBGVX
+*
+      END
diff --git a/costa/native/external/lapack/dsbtrd.f b/costa/native/external/lapack/dsbtrd.f
new file mode 100644
index 000000000..94b6e67ca
--- /dev/null
+++ b/costa/native/external/lapack/dsbtrd.f
@@ -0,0 +1,553 @@
+      SUBROUTINE DSBTRD( VECT, UPLO, N, KD, AB, LDAB, D, E, Q, LDQ,
+     $                   WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO, VECT
+      INTEGER            INFO, KD, LDAB, LDQ, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   AB( LDAB, * ), D( * ), E( * ), Q( LDQ, * ),
+     $                   WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DSBTRD reduces a real symmetric band matrix A to symmetric
+*  tridiagonal form T by an orthogonal similarity transformation:
+*  Q**T * A * Q = T.
+*
+*  Arguments
+*  =========
+*
+*  VECT    (input) CHARACTER*1
+*          = 'N':  do not form Q;
+*          = 'V':  form Q;
+*          = 'U':  update a matrix X, by forming X*Q.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  KD      (input) INTEGER
+*          The number of superdiagonals of the matrix A if UPLO = 'U',
+*          or the number of subdiagonals if UPLO = 'L'.  KD >= 0.
+*
+*  AB      (input/output) DOUBLE PRECISION array, dimension (LDAB,N)
+*          On entry, the upper or lower triangle of the symmetric band
+*          matrix A, stored in the first KD+1 rows of the array.  The
+*          j-th column of A is stored in the j-th column of the array AB
+*          as follows:
+*          if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j;
+*          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+kd).
+*          On exit, the diagonal elements of AB are overwritten by the
+*          diagonal elements of the tridiagonal matrix T; if KD > 0, the
+*          elements on the first superdiagonal (if UPLO = 'U') or the
+*          first subdiagonal (if UPLO = 'L') are overwritten by the
+*          off-diagonal elements of T; the rest of AB is overwritten by
+*          values generated during the reduction.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KD+1.
+*
+*  D       (output) DOUBLE PRECISION array, dimension (N)
+*          The diagonal elements of the tridiagonal matrix T.
+*
+*  E       (output) DOUBLE PRECISION array, dimension (N-1)
+*          The off-diagonal elements of the tridiagonal matrix T:
+*          E(i) = T(i,i+1) if UPLO = 'U'; E(i) = T(i+1,i) if UPLO = 'L'.
+*
+*  Q       (input/output) DOUBLE PRECISION array, dimension (LDQ,N)
+*          On entry, if VECT = 'U', then Q must contain an N-by-N
+*          matrix X; if VECT = 'N' or 'V', then Q need not be set.
+*
+*          On exit:
+*          if VECT = 'V', Q contains the N-by-N orthogonal matrix Q;
+*          if VECT = 'U', Q contains the product X*Q;
+*          if VECT = 'N', the array Q is not referenced.
+*
+*  LDQ     (input) INTEGER
+*          The leading dimension of the array Q.
+*          LDQ >= 1, and LDQ >= N if VECT = 'V' or 'U'.
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Further Details
+*  ===============
+*
+*  Modified by Linda Kaufman, Bell Labs.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            INITQ, UPPER, WANTQ
+      INTEGER            I, I2, IBL, INCA, INCX, IQAEND, IQB, IQEND, J,
+     $                   J1, J1END, J1INC, J2, JEND, JIN, JINC, K, KD1,
+     $                   KDM1, KDN, L, LAST, LEND, NQ, NR, NRT
+      DOUBLE PRECISION   TEMP
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLAR2V, DLARGV, DLARTG, DLARTV, DLASET, DROT,
+     $                   XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters
+*
+      INITQ = LSAME( VECT, 'V' )
+      WANTQ = INITQ .OR. LSAME( VECT, 'U' )
+      UPPER = LSAME( UPLO, 'U' )
+      KD1 = KD + 1
+      KDM1 = KD - 1
+      INCX = LDAB - 1
+      IQEND = 1
+*
+      INFO = 0
+      IF( .NOT.WANTQ .AND. .NOT.LSAME( VECT, 'N' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( KD.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDAB.LT.KD1 ) THEN
+         INFO = -6
+      ELSE IF( LDQ.LT.MAX( 1, N ) .AND. WANTQ ) THEN
+         INFO = -10
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DSBTRD', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Initialize Q to the unit matrix, if needed
+*
+      IF( INITQ )
+     $   CALL DLASET( 'Full', N, N, ZERO, ONE, Q, LDQ )
+*
+*     Wherever possible, plane rotations are generated and applied in
+*     vector operations of length NR over the index set J1:J2:KD1.
+*
+*     The cosines and sines of the plane rotations are stored in the
+*     arrays D and WORK.
+*
+      INCA = KD1*LDAB
+      KDN = MIN( N-1, KD )
+      IF( UPPER ) THEN
+*
+         IF( KD.GT.1 ) THEN
+*
+*           Reduce to tridiagonal form, working with upper triangle
+*
+            NR = 0
+            J1 = KDN + 2
+            J2 = 1
+*
+            DO 90 I = 1, N - 2
+*
+*              Reduce i-th row of matrix to tridiagonal form
+*
+               DO 80 K = KDN + 1, 2, -1
+                  J1 = J1 + KDN
+                  J2 = J2 + KDN
+*
+                  IF( NR.GT.0 ) THEN
+*
+*                    generate plane rotations to annihilate nonzero
+*                    elements which have been created outside the band
+*
+                     CALL DLARGV( NR, AB( 1, J1-1 ), INCA, WORK( J1 ),
+     $                            KD1, D( J1 ), KD1 )
+*
+*                    apply rotations from the right
+*
+*
+*                    Dependent on the the number of diagonals either
+*                    DLARTV or DROT is used
+*
+                     IF( NR.GE.2*KD-1 ) THEN
+                        DO 10 L = 1, KD - 1
+                           CALL DLARTV( NR, AB( L+1, J1-1 ), INCA,
+     $                                  AB( L, J1 ), INCA, D( J1 ),
+     $                                  WORK( J1 ), KD1 )
+   10                   CONTINUE
+*
+                     ELSE
+                        JEND = J1 + ( NR-1 )*KD1
+                        DO 20 JINC = J1, JEND, KD1
+                           CALL DROT( KDM1, AB( 2, JINC-1 ), 1,
+     $                                AB( 1, JINC ), 1, D( JINC ),
+     $                                WORK( JINC ) )
+   20                   CONTINUE
+                     END IF
+                  END IF
+*
+*
+                  IF( K.GT.2 ) THEN
+                     IF( K.LE.N-I+1 ) THEN
+*
+*                       generate plane rotation to annihilate a(i,i+k-1)
+*                       within the band
+*
+                        CALL DLARTG( AB( KD-K+3, I+K-2 ),
+     $                               AB( KD-K+2, I+K-1 ), D( I+K-1 ),
+     $                               WORK( I+K-1 ), TEMP )
+                        AB( KD-K+3, I+K-2 ) = TEMP
+*
+*                       apply rotation from the right
+*
+                        CALL DROT( K-3, AB( KD-K+4, I+K-2 ), 1,
+     $                             AB( KD-K+3, I+K-1 ), 1, D( I+K-1 ),
+     $                             WORK( I+K-1 ) )
+                     END IF
+                     NR = NR + 1
+                     J1 = J1 - KDN - 1
+                  END IF
+*
+*                 apply plane rotations from both sides to diagonal
+*                 blocks
+*
+                  IF( NR.GT.0 )
+     $               CALL DLAR2V( NR, AB( KD1, J1-1 ), AB( KD1, J1 ),
+     $                            AB( KD, J1 ), INCA, D( J1 ),
+     $                            WORK( J1 ), KD1 )
+*
+*                 apply plane rotations from the left
+*
+                  IF( NR.GT.0 ) THEN
+                     IF( 2*KD-1.LT.NR ) THEN
+*
+*                    Dependent on the the number of diagonals either
+*                    DLARTV or DROT is used
+*
+                        DO 30 L = 1, KD - 1
+                           IF( J2+L.GT.N ) THEN
+                              NRT = NR - 1
+                           ELSE
+                              NRT = NR
+                           END IF
+                           IF( NRT.GT.0 )
+     $                        CALL DLARTV( NRT, AB( KD-L, J1+L ), INCA,
+     $                                     AB( KD-L+1, J1+L ), INCA,
+     $                                     D( J1 ), WORK( J1 ), KD1 )
+   30                   CONTINUE
+                     ELSE
+                        J1END = J1 + KD1*( NR-2 )
+                        IF( J1END.GE.J1 ) THEN
+                           DO 40 JIN = J1, J1END, KD1
+                              CALL DROT( KD-1, AB( KD-1, JIN+1 ), INCX,
+     $                                   AB( KD, JIN+1 ), INCX,
+     $                                   D( JIN ), WORK( JIN ) )
+   40                      CONTINUE
+                        END IF
+                        LEND = MIN( KDM1, N-J2 )
+                        LAST = J1END + KD1
+                        IF( LEND.GT.0 )
+     $                     CALL DROT( LEND, AB( KD-1, LAST+1 ), INCX,
+     $                                AB( KD, LAST+1 ), INCX, D( LAST ),
+     $                                WORK( LAST ) )
+                     END IF
+                  END IF
+*
+                  IF( WANTQ ) THEN
+*
+*                    accumulate product of plane rotations in Q
+*
+                     IF( INITQ ) THEN
+*
+*                 take advantage of the fact that Q was
+*                 initially the Identity matrix
+*
+                        IQEND = MAX( IQEND, J2 )
+                        I2 = MAX( 0, K-3 )
+                        IQAEND = 1 + I*KD
+                        IF( K.EQ.2 )
+     $                     IQAEND = IQAEND + KD
+                        IQAEND = MIN( IQAEND, IQEND )
+                        DO 50 J = J1, J2, KD1
+                           IBL = I - I2 / KDM1
+                           I2 = I2 + 1
+                           IQB = MAX( 1, J-IBL )
+                           NQ = 1 + IQAEND - IQB
+                           IQAEND = MIN( IQAEND+KD, IQEND )
+                           CALL DROT( NQ, Q( IQB, J-1 ), 1, Q( IQB, J ),
+     $                                1, D( J ), WORK( J ) )
+   50                   CONTINUE
+                     ELSE
+*
+                        DO 60 J = J1, J2, KD1
+                           CALL DROT( N, Q( 1, J-1 ), 1, Q( 1, J ), 1,
+     $                                D( J ), WORK( J ) )
+   60                   CONTINUE
+                     END IF
+*
+                  END IF
+*
+                  IF( J2+KDN.GT.N ) THEN
+*
+*                    adjust J2 to keep within the bounds of the matrix
+*
+                     NR = NR - 1
+                     J2 = J2 - KDN - 1
+                  END IF
+*
+                  DO 70 J = J1, J2, KD1
+*
+*                    create nonzero element a(j-1,j+kd) outside the band
+*                    and store it in WORK
+*
+                     WORK( J+KD ) = WORK( J )*AB( 1, J+KD )
+                     AB( 1, J+KD ) = D( J )*AB( 1, J+KD )
+   70             CONTINUE
+   80          CONTINUE
+   90       CONTINUE
+         END IF
+*
+         IF( KD.GT.0 ) THEN
+*
+*           copy off-diagonal elements to E
+*
+            DO 100 I = 1, N - 1
+               E( I ) = AB( KD, I+1 )
+  100       CONTINUE
+         ELSE
+*
+*           set E to zero if original matrix was diagonal
+*
+            DO 110 I = 1, N - 1
+               E( I ) = ZERO
+  110       CONTINUE
+         END IF
+*
+*        copy diagonal elements to D
+*
+         DO 120 I = 1, N
+            D( I ) = AB( KD1, I )
+  120    CONTINUE
+*
+      ELSE
+*
+         IF( KD.GT.1 ) THEN
+*
+*           Reduce to tridiagonal form, working with lower triangle
+*
+            NR = 0
+            J1 = KDN + 2
+            J2 = 1
+*
+            DO 210 I = 1, N - 2
+*
+*              Reduce i-th column of matrix to tridiagonal form
+*
+               DO 200 K = KDN + 1, 2, -1
+                  J1 = J1 + KDN
+                  J2 = J2 + KDN
+*
+                  IF( NR.GT.0 ) THEN
+*
+*                    generate plane rotations to annihilate nonzero
+*                    elements which have been created outside the band
+*
+                     CALL DLARGV( NR, AB( KD1, J1-KD1 ), INCA,
+     $                            WORK( J1 ), KD1, D( J1 ), KD1 )
+*
+*                    apply plane rotations from one side
+*
+*
+*                    Dependent on the the number of diagonals either
+*                    DLARTV or DROT is used
+*
+                     IF( NR.GT.2*KD-1 ) THEN
+                        DO 130 L = 1, KD - 1
+                           CALL DLARTV( NR, AB( KD1-L, J1-KD1+L ), INCA,
+     $                                  AB( KD1-L+1, J1-KD1+L ), INCA,
+     $                                  D( J1 ), WORK( J1 ), KD1 )
+  130                   CONTINUE
+                     ELSE
+                        JEND = J1 + KD1*( NR-1 )
+                        DO 140 JINC = J1, JEND, KD1
+                           CALL DROT( KDM1, AB( KD, JINC-KD ), INCX,
+     $                                AB( KD1, JINC-KD ), INCX,
+     $                                D( JINC ), WORK( JINC ) )
+  140                   CONTINUE
+                     END IF
+*
+                  END IF
+*
+                  IF( K.GT.2 ) THEN
+                     IF( K.LE.N-I+1 ) THEN
+*
+*                       generate plane rotation to annihilate a(i+k-1,i)
+*                       within the band
+*
+                        CALL DLARTG( AB( K-1, I ), AB( K, I ),
+     $                               D( I+K-1 ), WORK( I+K-1 ), TEMP )
+                        AB( K-1, I ) = TEMP
+*
+*                       apply rotation from the left
+*
+                        CALL DROT( K-3, AB( K-2, I+1 ), LDAB-1,
+     $                             AB( K-1, I+1 ), LDAB-1, D( I+K-1 ),
+     $                             WORK( I+K-1 ) )
+                     END IF
+                     NR = NR + 1
+                     J1 = J1 - KDN - 1
+                  END IF
+*
+*                 apply plane rotations from both sides to diagonal
+*                 blocks
+*
+                  IF( NR.GT.0 )
+     $               CALL DLAR2V( NR, AB( 1, J1-1 ), AB( 1, J1 ),
+     $                            AB( 2, J1-1 ), INCA, D( J1 ),
+     $                            WORK( J1 ), KD1 )
+*
+*                 apply plane rotations from the right
+*
+*
+*                    Dependent on the the number of diagonals either
+*                    DLARTV or DROT is used
+*
+                  IF( NR.GT.0 ) THEN
+                     IF( NR.GT.2*KD-1 ) THEN
+                        DO 150 L = 1, KD - 1
+                           IF( J2+L.GT.N ) THEN
+                              NRT = NR - 1
+                           ELSE
+                              NRT = NR
+                           END IF
+                           IF( NRT.GT.0 )
+     $                        CALL DLARTV( NRT, AB( L+2, J1-1 ), INCA,
+     $                                     AB( L+1, J1 ), INCA, D( J1 ),
+     $                                     WORK( J1 ), KD1 )
+  150                   CONTINUE
+                     ELSE
+                        J1END = J1 + KD1*( NR-2 )
+                        IF( J1END.GE.J1 ) THEN
+                           DO 160 J1INC = J1, J1END, KD1
+                              CALL DROT( KDM1, AB( 3, J1INC-1 ), 1,
+     $                                   AB( 2, J1INC ), 1, D( J1INC ),
+     $                                   WORK( J1INC ) )
+  160                      CONTINUE
+                        END IF
+                        LEND = MIN( KDM1, N-J2 )
+                        LAST = J1END + KD1
+                        IF( LEND.GT.0 )
+     $                     CALL DROT( LEND, AB( 3, LAST-1 ), 1,
+     $                                AB( 2, LAST ), 1, D( LAST ),
+     $                                WORK( LAST ) )
+                     END IF
+                  END IF
+*
+*
+*
+                  IF( WANTQ ) THEN
+*
+*                    accumulate product of plane rotations in Q
+*
+                     IF( INITQ ) THEN
+*
+*                 take advantage of the fact that Q was
+*                 initially the Identity matrix
+*
+                        IQEND = MAX( IQEND, J2 )
+                        I2 = MAX( 0, K-3 )
+                        IQAEND = 1 + I*KD
+                        IF( K.EQ.2 )
+     $                     IQAEND = IQAEND + KD
+                        IQAEND = MIN( IQAEND, IQEND )
+                        DO 170 J = J1, J2, KD1
+                           IBL = I - I2 / KDM1
+                           I2 = I2 + 1
+                           IQB = MAX( 1, J-IBL )
+                           NQ = 1 + IQAEND - IQB
+                           IQAEND = MIN( IQAEND+KD, IQEND )
+                           CALL DROT( NQ, Q( IQB, J-1 ), 1, Q( IQB, J ),
+     $                                1, D( J ), WORK( J ) )
+  170                   CONTINUE
+                     ELSE
+*
+                        DO 180 J = J1, J2, KD1
+                           CALL DROT( N, Q( 1, J-1 ), 1, Q( 1, J ), 1,
+     $                                D( J ), WORK( J ) )
+  180                   CONTINUE
+                     END IF
+                  END IF
+*
+                  IF( J2+KDN.GT.N ) THEN
+*
+*                    adjust J2 to keep within the bounds of the matrix
+*
+                     NR = NR - 1
+                     J2 = J2 - KDN - 1
+                  END IF
+*
+                  DO 190 J = J1, J2, KD1
+*
+*                    create nonzero element a(j+kd,j-1) outside the
+*                    band and store it in WORK
+*
+                     WORK( J+KD ) = WORK( J )*AB( KD1, J )
+                     AB( KD1, J ) = D( J )*AB( KD1, J )
+  190             CONTINUE
+  200          CONTINUE
+  210       CONTINUE
+         END IF
+*
+         IF( KD.GT.0 ) THEN
+*
+*           copy off-diagonal elements to E
+*
+            DO 220 I = 1, N - 1
+               E( I ) = AB( 2, I )
+  220       CONTINUE
+         ELSE
+*
+*           set E to zero if original matrix was diagonal
+*
+            DO 230 I = 1, N - 1
+               E( I ) = ZERO
+  230       CONTINUE
+         END IF
+*
+*        copy diagonal elements to D
+*
+         DO 240 I = 1, N
+            D( I ) = AB( 1, I )
+  240    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of DSBTRD
+*
+      END
diff --git a/costa/native/external/lapack/dsecnd.f b/costa/native/external/lapack/dsecnd.f
new file mode 100644
index 000000000..b9c68c15f
--- /dev/null
+++ b/costa/native/external/lapack/dsecnd.f
@@ -0,0 +1,34 @@
+      DOUBLE PRECISION FUNCTION DSECND( )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*  Purpose
+*  =======
+*
+*  DSECND returns the user time for a process in seconds.
+*  This version gets the time from the system function ETIME.
+*
+* =====================================================================
+*
+*     .. Local Scalars ..
+      REAL               T1
+*     ..
+*     .. Local Arrays ..
+      REAL               TARRAY( 2 )
+*     ..
+*     .. External Functions ..
+      REAL               ETIME
+C      EXTERNAL           ETIME
+*     ..
+*     .. Executable Statements ..
+*
+      T1 = ETIME( TARRAY )
+      DSECND = TARRAY( 1 )
+      RETURN
+*
+*     End of DSECND
+*
+      END
diff --git a/costa/native/external/lapack/dspcon.f b/costa/native/external/lapack/dspcon.f
new file mode 100644
index 000000000..7c6ad1231
--- /dev/null
+++ b/costa/native/external/lapack/dspcon.f
@@ -0,0 +1,158 @@
+      SUBROUTINE DSPCON( UPLO, N, AP, IPIV, ANORM, RCOND, WORK, IWORK,
+     $                   INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, N
+      DOUBLE PRECISION   ANORM, RCOND
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * ), IWORK( * )
+      DOUBLE PRECISION   AP( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DSPCON estimates the reciprocal of the condition number (in the
+*  1-norm) of a real symmetric packed matrix A using the factorization
+*  A = U*D*U**T or A = L*D*L**T computed by DSPTRF.
+*
+*  An estimate is obtained for norm(inv(A)), and the reciprocal of the
+*  condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))).
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the details of the factorization are stored
+*          as an upper or lower triangular matrix.
+*          = 'U':  Upper triangular, form is A = U*D*U**T;
+*          = 'L':  Lower triangular, form is A = L*D*L**T.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  AP      (input) DOUBLE PRECISION array, dimension (N*(N+1)/2)
+*          The block diagonal matrix D and the multipliers used to
+*          obtain the factor U or L as computed by DSPTRF, stored as a
+*          packed triangular matrix.
+*
+*  IPIV    (input) INTEGER array, dimension (N)
+*          Details of the interchanges and the block structure of D
+*          as determined by DSPTRF.
+*
+*  ANORM   (input) DOUBLE PRECISION
+*          The 1-norm of the original matrix A.
+*
+*  RCOND   (output) DOUBLE PRECISION
+*          The reciprocal of the condition number of the matrix A,
+*          computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an
+*          estimate of the 1-norm of inv(A) computed in this routine.
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (2*N)
+*
+*  IWORK    (workspace) INTEGER array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            I, IP, KASE
+      DOUBLE PRECISION   AINVNM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLACON, DSPTRS, XERBLA
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( ANORM.LT.ZERO ) THEN
+         INFO = -5
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DSPCON', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      RCOND = ZERO
+      IF( N.EQ.0 ) THEN
+         RCOND = ONE
+         RETURN
+      ELSE IF( ANORM.LE.ZERO ) THEN
+         RETURN
+      END IF
+*
+*     Check that the diagonal matrix D is nonsingular.
+*
+      IF( UPPER ) THEN
+*
+*        Upper triangular storage: examine D from bottom to top
+*
+         IP = N*( N+1 ) / 2
+         DO 10 I = N, 1, -1
+            IF( IPIV( I ).GT.0 .AND. AP( IP ).EQ.ZERO )
+     $         RETURN
+            IP = IP - I
+   10    CONTINUE
+      ELSE
+*
+*        Lower triangular storage: examine D from top to bottom.
+*
+         IP = 1
+         DO 20 I = 1, N
+            IF( IPIV( I ).GT.0 .AND. AP( IP ).EQ.ZERO )
+     $         RETURN
+            IP = IP + N - I + 1
+   20    CONTINUE
+      END IF
+*
+*     Estimate the 1-norm of the inverse.
+*
+      KASE = 0
+   30 CONTINUE
+      CALL DLACON( N, WORK( N+1 ), WORK, IWORK, AINVNM, KASE )
+      IF( KASE.NE.0 ) THEN
+*
+*        Multiply by inv(L*D*L') or inv(U*D*U').
+*
+         CALL DSPTRS( UPLO, N, 1, AP, IPIV, WORK, N, INFO )
+         GO TO 30
+      END IF
+*
+*     Compute the estimate of the reciprocal condition number.
+*
+      IF( AINVNM.NE.ZERO )
+     $   RCOND = ( ONE / AINVNM ) / ANORM
+*
+      RETURN
+*
+*     End of DSPCON
+*
+      END
diff --git a/costa/native/external/lapack/dspev.f b/costa/native/external/lapack/dspev.f
new file mode 100644
index 000000000..36da8870a
--- /dev/null
+++ b/costa/native/external/lapack/dspev.f
@@ -0,0 +1,188 @@
+      SUBROUTINE DSPEV( JOBZ, UPLO, N, AP, W, Z, LDZ, WORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBZ, UPLO
+      INTEGER            INFO, LDZ, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   AP( * ), W( * ), WORK( * ), Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DSPEV computes all the eigenvalues and, optionally, eigenvectors of a
+*  real symmetric matrix A in packed storage.
+*
+*  Arguments
+*  =========
+*
+*  JOBZ    (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only;
+*          = 'V':  Compute eigenvalues and eigenvectors.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  AP      (input/output) DOUBLE PRECISION array, dimension (N*(N+1)/2)
+*          On entry, the upper or lower triangle of the symmetric matrix
+*          A, packed columnwise in a linear array.  The j-th column of A
+*          is stored in the array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n.
+*
+*          On exit, AP is overwritten by values generated during the
+*          reduction to tridiagonal form.  If UPLO = 'U', the diagonal
+*          and first superdiagonal of the tridiagonal matrix T overwrite
+*          the corresponding elements of A, and if UPLO = 'L', the
+*          diagonal and first subdiagonal of T overwrite the
+*          corresponding elements of A.
+*
+*  W       (output) DOUBLE PRECISION array, dimension (N)
+*          If INFO = 0, the eigenvalues in ascending order.
+*
+*  Z       (output) DOUBLE PRECISION array, dimension (LDZ, N)
+*          If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal
+*          eigenvectors of the matrix A, with the i-th column of Z
+*          holding the eigenvector associated with W(i).
+*          If JOBZ = 'N', then Z is not referenced.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= 1, and if
+*          JOBZ = 'V', LDZ >= max(1,N).
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (3*N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          > 0:  if INFO = i, the algorithm failed to converge; i
+*                off-diagonal elements of an intermediate tridiagonal
+*                form did not converge to zero.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            WANTZ
+      INTEGER            IINFO, IMAX, INDE, INDTAU, INDWRK, ISCALE
+      DOUBLE PRECISION   ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA,
+     $                   SMLNUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DLAMCH, DLANSP
+      EXTERNAL           LSAME, DLAMCH, DLANSP
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DOPGTR, DSCAL, DSPTRD, DSTEQR, DSTERF, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      WANTZ = LSAME( JOBZ, 'V' )
+*
+      INFO = 0
+      IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( LSAME( UPLO, 'U' ) .OR. LSAME( UPLO, 'L' ) ) )
+     $          THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN
+         INFO = -7
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DSPEV ', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      IF( N.EQ.1 ) THEN
+         W( 1 ) = AP( 1 )
+         IF( WANTZ )
+     $      Z( 1, 1 ) = ONE
+         RETURN
+      END IF
+*
+*     Get machine constants.
+*
+      SAFMIN = DLAMCH( 'Safe minimum' )
+      EPS = DLAMCH( 'Precision' )
+      SMLNUM = SAFMIN / EPS
+      BIGNUM = ONE / SMLNUM
+      RMIN = SQRT( SMLNUM )
+      RMAX = SQRT( BIGNUM )
+*
+*     Scale matrix to allowable range, if necessary.
+*
+      ANRM = DLANSP( 'M', UPLO, N, AP, WORK )
+      ISCALE = 0
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN
+         ISCALE = 1
+         SIGMA = RMIN / ANRM
+      ELSE IF( ANRM.GT.RMAX ) THEN
+         ISCALE = 1
+         SIGMA = RMAX / ANRM
+      END IF
+      IF( ISCALE.EQ.1 ) THEN
+         CALL DSCAL( ( N*( N+1 ) ) / 2, SIGMA, AP, 1 )
+      END IF
+*
+*     Call DSPTRD to reduce symmetric packed matrix to tridiagonal form.
+*
+      INDE = 1
+      INDTAU = INDE + N
+      CALL DSPTRD( UPLO, N, AP, W, WORK( INDE ), WORK( INDTAU ), IINFO )
+*
+*     For eigenvalues only, call DSTERF.  For eigenvectors, first call
+*     DOPGTR to generate the orthogonal matrix, then call DSTEQR.
+*
+      IF( .NOT.WANTZ ) THEN
+         CALL DSTERF( N, W, WORK( INDE ), INFO )
+      ELSE
+         INDWRK = INDTAU + N
+         CALL DOPGTR( UPLO, N, AP, WORK( INDTAU ), Z, LDZ,
+     $                WORK( INDWRK ), IINFO )
+         CALL DSTEQR( JOBZ, N, W, WORK( INDE ), Z, LDZ, WORK( INDTAU ),
+     $                INFO )
+      END IF
+*
+*     If matrix was scaled, then rescale eigenvalues appropriately.
+*
+      IF( ISCALE.EQ.1 ) THEN
+         IF( INFO.EQ.0 ) THEN
+            IMAX = N
+         ELSE
+            IMAX = INFO - 1
+         END IF
+         CALL DSCAL( IMAX, ONE / SIGMA, W, 1 )
+      END IF
+*
+      RETURN
+*
+*     End of DSPEV
+*
+      END
diff --git a/costa/native/external/lapack/dspevd.f b/costa/native/external/lapack/dspevd.f
new file mode 100644
index 000000000..217929e25
--- /dev/null
+++ b/costa/native/external/lapack/dspevd.f
@@ -0,0 +1,249 @@
+      SUBROUTINE DSPEVD( JOBZ, UPLO, N, AP, W, Z, LDZ, WORK, LWORK,
+     $                   IWORK, LIWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBZ, UPLO
+      INTEGER            INFO, LDZ, LIWORK, LWORK, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      DOUBLE PRECISION   AP( * ), W( * ), WORK( * ), Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DSPEVD computes all the eigenvalues and, optionally, eigenvectors
+*  of a real symmetric matrix A in packed storage. If eigenvectors are
+*  desired, it uses a divide and conquer algorithm.
+*
+*  The divide and conquer algorithm makes very mild assumptions about
+*  floating point arithmetic. It will work on machines with a guard
+*  digit in add/subtract, or on those binary machines without guard
+*  digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
+*  Cray-2. It could conceivably fail on hexadecimal or decimal machines
+*  without guard digits, but we know of none.
+*
+*  Arguments
+*  =========
+*
+*  JOBZ    (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only;
+*          = 'V':  Compute eigenvalues and eigenvectors.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  AP      (input/output) DOUBLE PRECISION array, dimension (N*(N+1)/2)
+*          On entry, the upper or lower triangle of the symmetric matrix
+*          A, packed columnwise in a linear array.  The j-th column of A
+*          is stored in the array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n.
+*
+*          On exit, AP is overwritten by values generated during the
+*          reduction to tridiagonal form.  If UPLO = 'U', the diagonal
+*          and first superdiagonal of the tridiagonal matrix T overwrite
+*          the corresponding elements of A, and if UPLO = 'L', the
+*          diagonal and first subdiagonal of T overwrite the
+*          corresponding elements of A.
+*
+*  W       (output) DOUBLE PRECISION array, dimension (N)
+*          If INFO = 0, the eigenvalues in ascending order.
+*
+*  Z       (output) DOUBLE PRECISION array, dimension (LDZ, N)
+*          If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal
+*          eigenvectors of the matrix A, with the i-th column of Z
+*          holding the eigenvector associated with W(i).
+*          If JOBZ = 'N', then Z is not referenced.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= 1, and if
+*          JOBZ = 'V', LDZ >= max(1,N).
+*
+*  WORK    (workspace/output) DOUBLE PRECISION array,
+*                                         dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.
+*          If N <= 1,               LWORK must be at least 1.
+*          If JOBZ = 'N' and N > 1, LWORK must be at least 2*N.
+*          If JOBZ = 'V' and N > 1, LWORK must be at least
+*                                                 1 + 6*N + N**2.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  IWORK   (workspace/output) INTEGER array, dimension (LIWORK)
+*          On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK.
+*
+*  LIWORK  (input) INTEGER
+*          The dimension of the array IWORK.
+*          If JOBZ  = 'N' or N <= 1, LIWORK must be at least 1.
+*          If JOBZ  = 'V' and N > 1, LIWORK must be at least 3 + 5*N.
+*
+*          If LIWORK = -1, then a workspace query is assumed; the
+*          routine only calculates the optimal size of the IWORK array,
+*          returns this value as the first entry of the IWORK array, and
+*          no error message related to LIWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          > 0:  if INFO = i, the algorithm failed to converge; i
+*                off-diagonal elements of an intermediate tridiagonal
+*                form did not converge to zero.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY, WANTZ
+      INTEGER            IINFO, INDE, INDTAU, INDWRK, ISCALE, LIWMIN,
+     $                   LLWORK, LWMIN
+      DOUBLE PRECISION   ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA,
+     $                   SMLNUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DLAMCH, DLANSP
+      EXTERNAL           LSAME, DLAMCH, DLANSP
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DOPMTR, DSCAL, DSPTRD, DSTEDC, DSTERF, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      WANTZ = LSAME( JOBZ, 'V' )
+      LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 )
+*
+      INFO = 0
+      IF( N.LE.1 ) THEN
+         LIWMIN = 1
+         LWMIN = 1
+      ELSE
+         IF( WANTZ ) THEN
+            LIWMIN = 3 + 5*N
+            LWMIN = 1 + 6*N + N**2
+         ELSE
+            LIWMIN = 1
+            LWMIN = 2*N
+         END IF
+      END IF
+      IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( LSAME( UPLO, 'U' ) .OR. LSAME( UPLO, 'L' ) ) )
+     $          THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN
+         INFO = -7
+      ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -9
+      ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -11
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         WORK( 1 ) = LWMIN
+         IWORK( 1 ) = LIWMIN
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DSPEVD', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      IF( N.EQ.1 ) THEN
+         W( 1 ) = AP( 1 )
+         IF( WANTZ )
+     $      Z( 1, 1 ) = ONE
+         RETURN
+      END IF
+*
+*     Get machine constants.
+*
+      SAFMIN = DLAMCH( 'Safe minimum' )
+      EPS = DLAMCH( 'Precision' )
+      SMLNUM = SAFMIN / EPS
+      BIGNUM = ONE / SMLNUM
+      RMIN = SQRT( SMLNUM )
+      RMAX = SQRT( BIGNUM )
+*
+*     Scale matrix to allowable range, if necessary.
+*
+      ANRM = DLANSP( 'M', UPLO, N, AP, WORK )
+      ISCALE = 0
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN
+         ISCALE = 1
+         SIGMA = RMIN / ANRM
+      ELSE IF( ANRM.GT.RMAX ) THEN
+         ISCALE = 1
+         SIGMA = RMAX / ANRM
+      END IF
+      IF( ISCALE.EQ.1 ) THEN
+         CALL DSCAL( ( N*( N+1 ) ) / 2, SIGMA, AP, 1 )
+      END IF
+*
+*     Call DSPTRD to reduce symmetric packed matrix to tridiagonal form.
+*
+      INDE = 1
+      INDTAU = INDE + N
+      CALL DSPTRD( UPLO, N, AP, W, WORK( INDE ), WORK( INDTAU ), IINFO )
+*
+*     For eigenvalues only, call DSTERF.  For eigenvectors, first call
+*     DSTEDC to generate the eigenvector matrix, WORK(INDWRK), of the
+*     tridiagonal matrix, then call DOPMTR to multiply it by the
+*     Householder transformations represented in AP.
+*
+      IF( .NOT.WANTZ ) THEN
+         CALL DSTERF( N, W, WORK( INDE ), INFO )
+      ELSE
+         INDWRK = INDTAU + N
+         LLWORK = LWORK - INDWRK + 1
+         CALL DSTEDC( 'I', N, W, WORK( INDE ), Z, LDZ, WORK( INDWRK ),
+     $                LLWORK, IWORK, LIWORK, INFO )
+         CALL DOPMTR( 'L', UPLO, 'N', N, N, AP, WORK( INDTAU ), Z, LDZ,
+     $                WORK( INDWRK ), IINFO )
+      END IF
+*
+*     If matrix was scaled, then rescale eigenvalues appropriately.
+*
+      IF( ISCALE.EQ.1 )
+     $   CALL DSCAL( N, ONE / SIGMA, W, 1 )
+*
+      WORK( 1 ) = LWMIN
+      IWORK( 1 ) = LIWMIN
+      RETURN
+*
+*     End of DSPEVD
+*
+      END
diff --git a/costa/native/external/lapack/dspevx.f b/costa/native/external/lapack/dspevx.f
new file mode 100644
index 000000000..d1ad6e9e9
--- /dev/null
+++ b/costa/native/external/lapack/dspevx.f
@@ -0,0 +1,377 @@
+      SUBROUTINE DSPEVX( JOBZ, RANGE, UPLO, N, AP, VL, VU, IL, IU,
+     $                   ABSTOL, M, W, Z, LDZ, WORK, IWORK, IFAIL,
+     $                   INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBZ, RANGE, UPLO
+      INTEGER            IL, INFO, IU, LDZ, M, N
+      DOUBLE PRECISION   ABSTOL, VL, VU
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IFAIL( * ), IWORK( * )
+      DOUBLE PRECISION   AP( * ), W( * ), WORK( * ), Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DSPEVX computes selected eigenvalues and, optionally, eigenvectors
+*  of a real symmetric matrix A in packed storage.  Eigenvalues/vectors
+*  can be selected by specifying either a range of values or a range of
+*  indices for the desired eigenvalues.
+*
+*  Arguments
+*  =========
+*
+*  JOBZ    (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only;
+*          = 'V':  Compute eigenvalues and eigenvectors.
+*
+*  RANGE   (input) CHARACTER*1
+*          = 'A': all eigenvalues will be found;
+*          = 'V': all eigenvalues in the half-open interval (VL,VU]
+*                 will be found;
+*          = 'I': the IL-th through IU-th eigenvalues will be found.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  AP      (input/output) DOUBLE PRECISION array, dimension (N*(N+1)/2)
+*          On entry, the upper or lower triangle of the symmetric matrix
+*          A, packed columnwise in a linear array.  The j-th column of A
+*          is stored in the array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n.
+*
+*          On exit, AP is overwritten by values generated during the
+*          reduction to tridiagonal form.  If UPLO = 'U', the diagonal
+*          and first superdiagonal of the tridiagonal matrix T overwrite
+*          the corresponding elements of A, and if UPLO = 'L', the
+*          diagonal and first subdiagonal of T overwrite the
+*          corresponding elements of A.
+*
+*  VL      (input) DOUBLE PRECISION
+*  VU      (input) DOUBLE PRECISION
+*          If RANGE='V', the lower and upper bounds of the interval to
+*          be searched for eigenvalues. VL < VU.
+*          Not referenced if RANGE = 'A' or 'I'.
+*
+*  IL      (input) INTEGER
+*  IU      (input) INTEGER
+*          If RANGE='I', the indices (in ascending order) of the
+*          smallest and largest eigenvalues to be returned.
+*          1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0.
+*          Not referenced if RANGE = 'A' or 'V'.
+*
+*  ABSTOL  (input) DOUBLE PRECISION
+*          The absolute error tolerance for the eigenvalues.
+*          An approximate eigenvalue is accepted as converged
+*          when it is determined to lie in an interval [a,b]
+*          of width less than or equal to
+*
+*                  ABSTOL + EPS *   max( |a|,|b| ) ,
+*
+*          where EPS is the machine precision.  If ABSTOL is less than
+*          or equal to zero, then  EPS*|T|  will be used in its place,
+*          where |T| is the 1-norm of the tridiagonal matrix obtained
+*          by reducing AP to tridiagonal form.
+*
+*          Eigenvalues will be computed most accurately when ABSTOL is
+*          set to twice the underflow threshold 2*DLAMCH('S'), not zero.
+*          If this routine returns with INFO>0, indicating that some
+*          eigenvectors did not converge, try setting ABSTOL to
+*          2*DLAMCH('S').
+*
+*          See "Computing Small Singular Values of Bidiagonal Matrices
+*          with Guaranteed High Relative Accuracy," by Demmel and
+*          Kahan, LAPACK Working Note #3.
+*
+*  M       (output) INTEGER
+*          The total number of eigenvalues found.  0 <= M <= N.
+*          If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1.
+*
+*  W       (output) DOUBLE PRECISION array, dimension (N)
+*          If INFO = 0, the selected eigenvalues in ascending order.
+*
+*  Z       (output) DOUBLE PRECISION array, dimension (LDZ, max(1,M))
+*          If JOBZ = 'V', then if INFO = 0, the first M columns of Z
+*          contain the orthonormal eigenvectors of the matrix A
+*          corresponding to the selected eigenvalues, with the i-th
+*          column of Z holding the eigenvector associated with W(i).
+*          If an eigenvector fails to converge, then that column of Z
+*          contains the latest approximation to the eigenvector, and the
+*          index of the eigenvector is returned in IFAIL.
+*          If JOBZ = 'N', then Z is not referenced.
+*          Note: the user must ensure that at least max(1,M) columns are
+*          supplied in the array Z; if RANGE = 'V', the exact value of M
+*          is not known in advance and an upper bound must be used.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= 1, and if
+*          JOBZ = 'V', LDZ >= max(1,N).
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (8*N)
+*
+*  IWORK   (workspace) INTEGER array, dimension (5*N)
+*
+*  IFAIL   (output) INTEGER array, dimension (N)
+*          If JOBZ = 'V', then if INFO = 0, the first M elements of
+*          IFAIL are zero.  If INFO > 0, then IFAIL contains the
+*          indices of the eigenvectors that failed to converge.
+*          If JOBZ = 'N', then IFAIL is not referenced.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, then i eigenvectors failed to converge.
+*                Their indices are stored in array IFAIL.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            ALLEIG, INDEIG, VALEIG, WANTZ
+      CHARACTER          ORDER
+      INTEGER            I, IINFO, IMAX, INDD, INDE, INDEE, INDIBL,
+     $                   INDISP, INDIWO, INDTAU, INDWRK, ISCALE, ITMP1,
+     $                   J, JJ, NSPLIT
+      DOUBLE PRECISION   ABSTLL, ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN,
+     $                   SIGMA, SMLNUM, TMP1, VLL, VUU
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DLAMCH, DLANSP
+      EXTERNAL           LSAME, DLAMCH, DLANSP
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DCOPY, DOPGTR, DOPMTR, DSCAL, DSPTRD, DSTEBZ,
+     $                   DSTEIN, DSTEQR, DSTERF, DSWAP, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      WANTZ = LSAME( JOBZ, 'V' )
+      ALLEIG = LSAME( RANGE, 'A' )
+      VALEIG = LSAME( RANGE, 'V' )
+      INDEIG = LSAME( RANGE, 'I' )
+*
+      INFO = 0
+      IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.( LSAME( UPLO, 'L' ) .OR. LSAME( UPLO, 'U' ) ) )
+     $          THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE
+         IF( VALEIG ) THEN
+            IF( N.GT.0 .AND. VU.LE.VL )
+     $         INFO = -7
+         ELSE IF( INDEIG ) THEN
+            IF( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) THEN
+               INFO = -8
+            ELSE IF( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN
+               INFO = -9
+            END IF
+         END IF
+      END IF
+      IF( INFO.EQ.0 ) THEN
+         IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) )
+     $      INFO = -14
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DSPEVX', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      M = 0
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      IF( N.EQ.1 ) THEN
+         IF( ALLEIG .OR. INDEIG ) THEN
+            M = 1
+            W( 1 ) = AP( 1 )
+         ELSE
+            IF( VL.LT.AP( 1 ) .AND. VU.GE.AP( 1 ) ) THEN
+               M = 1
+               W( 1 ) = AP( 1 )
+            END IF
+         END IF
+         IF( WANTZ )
+     $      Z( 1, 1 ) = ONE
+         RETURN
+      END IF
+*
+*     Get machine constants.
+*
+      SAFMIN = DLAMCH( 'Safe minimum' )
+      EPS = DLAMCH( 'Precision' )
+      SMLNUM = SAFMIN / EPS
+      BIGNUM = ONE / SMLNUM
+      RMIN = SQRT( SMLNUM )
+      RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) )
+*
+*     Scale matrix to allowable range, if necessary.
+*
+      ISCALE = 0
+      ABSTLL = ABSTOL
+      IF( VALEIG ) THEN
+         VLL = VL
+         VUU = VU
+      ELSE
+         VLL = ZERO
+         VUU = ZERO
+      END IF
+      ANRM = DLANSP( 'M', UPLO, N, AP, WORK )
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN
+         ISCALE = 1
+         SIGMA = RMIN / ANRM
+      ELSE IF( ANRM.GT.RMAX ) THEN
+         ISCALE = 1
+         SIGMA = RMAX / ANRM
+      END IF
+      IF( ISCALE.EQ.1 ) THEN
+         CALL DSCAL( ( N*( N+1 ) ) / 2, SIGMA, AP, 1 )
+         IF( ABSTOL.GT.0 )
+     $      ABSTLL = ABSTOL*SIGMA
+         IF( VALEIG ) THEN
+            VLL = VL*SIGMA
+            VUU = VU*SIGMA
+         END IF
+      END IF
+*
+*     Call DSPTRD to reduce symmetric packed matrix to tridiagonal form.
+*
+      INDTAU = 1
+      INDE = INDTAU + N
+      INDD = INDE + N
+      INDWRK = INDD + N
+      CALL DSPTRD( UPLO, N, AP, WORK( INDD ), WORK( INDE ),
+     $             WORK( INDTAU ), IINFO )
+*
+*     If all eigenvalues are desired and ABSTOL is less than or equal
+*     to zero, then call DSTERF or DOPGTR and SSTEQR.  If this fails
+*     for some eigenvalue, then try DSTEBZ.
+*
+      IF( ( ALLEIG .OR. ( INDEIG .AND. IL.EQ.1 .AND. IU.EQ.N ) ) .AND.
+     $    ( ABSTOL.LE.ZERO ) ) THEN
+         CALL DCOPY( N, WORK( INDD ), 1, W, 1 )
+         INDEE = INDWRK + 2*N
+         IF( .NOT.WANTZ ) THEN
+            CALL DCOPY( N-1, WORK( INDE ), 1, WORK( INDEE ), 1 )
+            CALL DSTERF( N, W, WORK( INDEE ), INFO )
+         ELSE
+            CALL DOPGTR( UPLO, N, AP, WORK( INDTAU ), Z, LDZ,
+     $                   WORK( INDWRK ), IINFO )
+            CALL DCOPY( N-1, WORK( INDE ), 1, WORK( INDEE ), 1 )
+            CALL DSTEQR( JOBZ, N, W, WORK( INDEE ), Z, LDZ,
+     $                   WORK( INDWRK ), INFO )
+            IF( INFO.EQ.0 ) THEN
+               DO 10 I = 1, N
+                  IFAIL( I ) = 0
+   10          CONTINUE
+            END IF
+         END IF
+         IF( INFO.EQ.0 ) THEN
+            M = N
+            GO TO 20
+         END IF
+         INFO = 0
+      END IF
+*
+*     Otherwise, call DSTEBZ and, if eigenvectors are desired, SSTEIN.
+*
+      IF( WANTZ ) THEN
+         ORDER = 'B'
+      ELSE
+         ORDER = 'E'
+      END IF
+      INDIBL = 1
+      INDISP = INDIBL + N
+      INDIWO = INDISP + N
+      CALL DSTEBZ( RANGE, ORDER, N, VLL, VUU, IL, IU, ABSTLL,
+     $             WORK( INDD ), WORK( INDE ), M, NSPLIT, W,
+     $             IWORK( INDIBL ), IWORK( INDISP ), WORK( INDWRK ),
+     $             IWORK( INDIWO ), INFO )
+*
+      IF( WANTZ ) THEN
+         CALL DSTEIN( N, WORK( INDD ), WORK( INDE ), M, W,
+     $                IWORK( INDIBL ), IWORK( INDISP ), Z, LDZ,
+     $                WORK( INDWRK ), IWORK( INDIWO ), IFAIL, INFO )
+*
+*        Apply orthogonal matrix used in reduction to tridiagonal
+*        form to eigenvectors returned by DSTEIN.
+*
+         CALL DOPMTR( 'L', UPLO, 'N', N, M, AP, WORK( INDTAU ), Z, LDZ,
+     $                WORK( INDWRK ), INFO )
+      END IF
+*
+*     If matrix was scaled, then rescale eigenvalues appropriately.
+*
+   20 CONTINUE
+      IF( ISCALE.EQ.1 ) THEN
+         IF( INFO.EQ.0 ) THEN
+            IMAX = M
+         ELSE
+            IMAX = INFO - 1
+         END IF
+         CALL DSCAL( IMAX, ONE / SIGMA, W, 1 )
+      END IF
+*
+*     If eigenvalues are not in order, then sort them, along with
+*     eigenvectors.
+*
+      IF( WANTZ ) THEN
+         DO 40 J = 1, M - 1
+            I = 0
+            TMP1 = W( J )
+            DO 30 JJ = J + 1, M
+               IF( W( JJ ).LT.TMP1 ) THEN
+                  I = JJ
+                  TMP1 = W( JJ )
+               END IF
+   30       CONTINUE
+*
+            IF( I.NE.0 ) THEN
+               ITMP1 = IWORK( INDIBL+I-1 )
+               W( I ) = W( J )
+               IWORK( INDIBL+I-1 ) = IWORK( INDIBL+J-1 )
+               W( J ) = TMP1
+               IWORK( INDIBL+J-1 ) = ITMP1
+               CALL DSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 )
+               IF( INFO.NE.0 ) THEN
+                  ITMP1 = IFAIL( I )
+                  IFAIL( I ) = IFAIL( J )
+                  IFAIL( J ) = ITMP1
+               END IF
+            END IF
+   40    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of DSPEVX
+*
+      END
diff --git a/costa/native/external/lapack/dspgst.f b/costa/native/external/lapack/dspgst.f
new file mode 100644
index 000000000..b6e00f25b
--- /dev/null
+++ b/costa/native/external/lapack/dspgst.f
@@ -0,0 +1,209 @@
+      SUBROUTINE DSPGST( ITYPE, UPLO, N, AP, BP, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, ITYPE, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   AP( * ), BP( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DSPGST reduces a real symmetric-definite generalized eigenproblem
+*  to standard form, using packed storage.
+*
+*  If ITYPE = 1, the problem is A*x = lambda*B*x,
+*  and A is overwritten by inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T)
+*
+*  If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or
+*  B*A*x = lambda*x, and A is overwritten by U*A*U**T or L**T*A*L.
+*
+*  B must have been previously factorized as U**T*U or L*L**T by DPPTRF.
+*
+*  Arguments
+*  =========
+*
+*  ITYPE   (input) INTEGER
+*          = 1: compute inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T);
+*          = 2 or 3: compute U*A*U**T or L**T*A*L.
+*
+*  UPLO    (input) CHARACTER
+*          = 'U':  Upper triangle of A is stored and B is factored as
+*                  U**T*U;
+*          = 'L':  Lower triangle of A is stored and B is factored as
+*                  L*L**T.
+*
+*  N       (input) INTEGER
+*          The order of the matrices A and B.  N >= 0.
+*
+*  AP      (input/output) DOUBLE PRECISION array, dimension (N*(N+1)/2)
+*          On entry, the upper or lower triangle of the symmetric matrix
+*          A, packed columnwise in a linear array.  The j-th column of A
+*          is stored in the array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n.
+*
+*          On exit, if INFO = 0, the transformed matrix, stored in the
+*          same format as A.
+*
+*  BP      (input) DOUBLE PRECISION array, dimension (N*(N+1)/2)
+*          The triangular factor from the Cholesky factorization of B,
+*          stored in the same format as A, as returned by DPPTRF.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, HALF
+      PARAMETER          ( ONE = 1.0D0, HALF = 0.5D0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            J, J1, J1J1, JJ, K, K1, K1K1, KK
+      DOUBLE PRECISION   AJJ, AKK, BJJ, BKK, CT
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DAXPY, DSCAL, DSPMV, DSPR2, DTPMV, DTPSV,
+     $                   XERBLA
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DDOT
+      EXTERNAL           LSAME, DDOT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( ITYPE.LT.1 .OR. ITYPE.GT.3 ) THEN
+         INFO = -1
+      ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DSPGST', -INFO )
+         RETURN
+      END IF
+*
+      IF( ITYPE.EQ.1 ) THEN
+         IF( UPPER ) THEN
+*
+*           Compute inv(U')*A*inv(U)
+*
+*           J1 and JJ are the indices of A(1,j) and A(j,j)
+*
+            JJ = 0
+            DO 10 J = 1, N
+               J1 = JJ + 1
+               JJ = JJ + J
+*
+*              Compute the j-th column of the upper triangle of A
+*
+               BJJ = BP( JJ )
+               CALL DTPSV( UPLO, 'Transpose', 'Nonunit', J, BP,
+     $                     AP( J1 ), 1 )
+               CALL DSPMV( UPLO, J-1, -ONE, AP, BP( J1 ), 1, ONE,
+     $                     AP( J1 ), 1 )
+               CALL DSCAL( J-1, ONE / BJJ, AP( J1 ), 1 )
+               AP( JJ ) = ( AP( JJ )-DDOT( J-1, AP( J1 ), 1, BP( J1 ),
+     $                    1 ) ) / BJJ
+   10       CONTINUE
+         ELSE
+*
+*           Compute inv(L)*A*inv(L')
+*
+*           KK and K1K1 are the indices of A(k,k) and A(k+1,k+1)
+*
+            KK = 1
+            DO 20 K = 1, N
+               K1K1 = KK + N - K + 1
+*
+*              Update the lower triangle of A(k:n,k:n)
+*
+               AKK = AP( KK )
+               BKK = BP( KK )
+               AKK = AKK / BKK**2
+               AP( KK ) = AKK
+               IF( K.LT.N ) THEN
+                  CALL DSCAL( N-K, ONE / BKK, AP( KK+1 ), 1 )
+                  CT = -HALF*AKK
+                  CALL DAXPY( N-K, CT, BP( KK+1 ), 1, AP( KK+1 ), 1 )
+                  CALL DSPR2( UPLO, N-K, -ONE, AP( KK+1 ), 1,
+     $                        BP( KK+1 ), 1, AP( K1K1 ) )
+                  CALL DAXPY( N-K, CT, BP( KK+1 ), 1, AP( KK+1 ), 1 )
+                  CALL DTPSV( UPLO, 'No transpose', 'Non-unit', N-K,
+     $                        BP( K1K1 ), AP( KK+1 ), 1 )
+               END IF
+               KK = K1K1
+   20       CONTINUE
+         END IF
+      ELSE
+         IF( UPPER ) THEN
+*
+*           Compute U*A*U'
+*
+*           K1 and KK are the indices of A(1,k) and A(k,k)
+*
+            KK = 0
+            DO 30 K = 1, N
+               K1 = KK + 1
+               KK = KK + K
+*
+*              Update the upper triangle of A(1:k,1:k)
+*
+               AKK = AP( KK )
+               BKK = BP( KK )
+               CALL DTPMV( UPLO, 'No transpose', 'Non-unit', K-1, BP,
+     $                     AP( K1 ), 1 )
+               CT = HALF*AKK
+               CALL DAXPY( K-1, CT, BP( K1 ), 1, AP( K1 ), 1 )
+               CALL DSPR2( UPLO, K-1, ONE, AP( K1 ), 1, BP( K1 ), 1,
+     $                     AP )
+               CALL DAXPY( K-1, CT, BP( K1 ), 1, AP( K1 ), 1 )
+               CALL DSCAL( K-1, BKK, AP( K1 ), 1 )
+               AP( KK ) = AKK*BKK**2
+   30       CONTINUE
+         ELSE
+*
+*           Compute L'*A*L
+*
+*           JJ and J1J1 are the indices of A(j,j) and A(j+1,j+1)
+*
+            JJ = 1
+            DO 40 J = 1, N
+               J1J1 = JJ + N - J + 1
+*
+*              Compute the j-th column of the lower triangle of A
+*
+               AJJ = AP( JJ )
+               BJJ = BP( JJ )
+               AP( JJ ) = AJJ*BJJ + DDOT( N-J, AP( JJ+1 ), 1,
+     $                    BP( JJ+1 ), 1 )
+               CALL DSCAL( N-J, BJJ, AP( JJ+1 ), 1 )
+               CALL DSPMV( UPLO, N-J, ONE, AP( J1J1 ), BP( JJ+1 ), 1,
+     $                     ONE, AP( JJ+1 ), 1 )
+               CALL DTPMV( UPLO, 'Transpose', 'Non-unit', N-J+1,
+     $                     BP( JJ ), AP( JJ ), 1 )
+               JJ = J1J1
+   40       CONTINUE
+         END IF
+      END IF
+      RETURN
+*
+*     End of DSPGST
+*
+      END
diff --git a/costa/native/external/lapack/dspgv.f b/costa/native/external/lapack/dspgv.f
new file mode 100644
index 000000000..672af289e
--- /dev/null
+++ b/costa/native/external/lapack/dspgv.f
@@ -0,0 +1,196 @@
+      SUBROUTINE DSPGV( ITYPE, JOBZ, UPLO, N, AP, BP, W, Z, LDZ, WORK,
+     $                  INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBZ, UPLO
+      INTEGER            INFO, ITYPE, LDZ, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   AP( * ), BP( * ), W( * ), WORK( * ),
+     $                   Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DSPGV computes all the eigenvalues and, optionally, the eigenvectors
+*  of a real generalized symmetric-definite eigenproblem, of the form
+*  A*x=(lambda)*B*x,  A*Bx=(lambda)*x,  or B*A*x=(lambda)*x.
+*  Here A and B are assumed to be symmetric, stored in packed format,
+*  and B is also positive definite.
+*
+*  Arguments
+*  =========
+*
+*  ITYPE   (input) INTEGER
+*          Specifies the problem type to be solved:
+*          = 1:  A*x = (lambda)*B*x
+*          = 2:  A*B*x = (lambda)*x
+*          = 3:  B*A*x = (lambda)*x
+*
+*  JOBZ    (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only;
+*          = 'V':  Compute eigenvalues and eigenvectors.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangles of A and B are stored;
+*          = 'L':  Lower triangles of A and B are stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrices A and B.  N >= 0.
+*
+*  AP      (input/output) DOUBLE PRECISION array, dimension
+*                            (N*(N+1)/2)
+*          On entry, the upper or lower triangle of the symmetric matrix
+*          A, packed columnwise in a linear array.  The j-th column of A
+*          is stored in the array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n.
+*
+*          On exit, the contents of AP are destroyed.
+*
+*  BP      (input/output) DOUBLE PRECISION array, dimension (N*(N+1)/2)
+*          On entry, the upper or lower triangle of the symmetric matrix
+*          B, packed columnwise in a linear array.  The j-th column of B
+*          is stored in the array BP as follows:
+*          if UPLO = 'U', BP(i + (j-1)*j/2) = B(i,j) for 1<=i<=j;
+*          if UPLO = 'L', BP(i + (j-1)*(2*n-j)/2) = B(i,j) for j<=i<=n.
+*
+*          On exit, the triangular factor U or L from the Cholesky
+*          factorization B = U**T*U or B = L*L**T, in the same storage
+*          format as B.
+*
+*  W       (output) DOUBLE PRECISION array, dimension (N)
+*          If INFO = 0, the eigenvalues in ascending order.
+*
+*  Z       (output) DOUBLE PRECISION array, dimension (LDZ, N)
+*          If JOBZ = 'V', then if INFO = 0, Z contains the matrix Z of
+*          eigenvectors.  The eigenvectors are normalized as follows:
+*          if ITYPE = 1 or 2, Z**T*B*Z = I;
+*          if ITYPE = 3, Z**T*inv(B)*Z = I.
+*          If JOBZ = 'N', then Z is not referenced.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= 1, and if
+*          JOBZ = 'V', LDZ >= max(1,N).
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (3*N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  DPPTRF or DSPEV returned an error code:
+*             <= N:  if INFO = i, DSPEV failed to converge;
+*                    i off-diagonal elements of an intermediate
+*                    tridiagonal form did not converge to zero.
+*             > N:   if INFO = n + i, for 1 <= i <= n, then the leading
+*                    minor of order i of B is not positive definite.
+*                    The factorization of B could not be completed and
+*                    no eigenvalues or eigenvectors were computed.
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            UPPER, WANTZ
+      CHARACTER          TRANS
+      INTEGER            J, NEIG
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DPPTRF, DSPEV, DSPGST, DTPMV, DTPSV, XERBLA
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      WANTZ = LSAME( JOBZ, 'V' )
+      UPPER = LSAME( UPLO, 'U' )
+*
+      INFO = 0
+      IF( ITYPE.LT.0 .OR. ITYPE.GT.3 ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN
+         INFO = -9
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DSPGV ', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Form a Cholesky factorization of B.
+*
+      CALL DPPTRF( UPLO, N, BP, INFO )
+      IF( INFO.NE.0 ) THEN
+         INFO = N + INFO
+         RETURN
+      END IF
+*
+*     Transform problem to standard eigenvalue problem and solve.
+*
+      CALL DSPGST( ITYPE, UPLO, N, AP, BP, INFO )
+      CALL DSPEV( JOBZ, UPLO, N, AP, W, Z, LDZ, WORK, INFO )
+*
+      IF( WANTZ ) THEN
+*
+*        Backtransform eigenvectors to the original problem.
+*
+         NEIG = N
+         IF( INFO.GT.0 )
+     $      NEIG = INFO - 1
+         IF( ITYPE.EQ.1 .OR. ITYPE.EQ.2 ) THEN
+*
+*           For A*x=(lambda)*B*x and A*B*x=(lambda)*x;
+*           backtransform eigenvectors: x = inv(L)'*y or inv(U)*y
+*
+            IF( UPPER ) THEN
+               TRANS = 'N'
+            ELSE
+               TRANS = 'T'
+            END IF
+*
+            DO 10 J = 1, NEIG
+               CALL DTPSV( UPLO, TRANS, 'Non-unit', N, BP, Z( 1, J ),
+     $                     1 )
+   10       CONTINUE
+*
+         ELSE IF( ITYPE.EQ.3 ) THEN
+*
+*           For B*A*x=(lambda)*x;
+*           backtransform eigenvectors: x = L*y or U'*y
+*
+            IF( UPPER ) THEN
+               TRANS = 'T'
+            ELSE
+               TRANS = 'N'
+            END IF
+*
+            DO 20 J = 1, NEIG
+               CALL DTPMV( UPLO, TRANS, 'Non-unit', N, BP, Z( 1, J ),
+     $                     1 )
+   20       CONTINUE
+         END IF
+      END IF
+      RETURN
+*
+*     End of DSPGV
+*
+      END
diff --git a/costa/native/external/lapack/dspgvd.f b/costa/native/external/lapack/dspgvd.f
new file mode 100644
index 000000000..049c1015e
--- /dev/null
+++ b/costa/native/external/lapack/dspgvd.f
@@ -0,0 +1,281 @@
+      SUBROUTINE DSPGVD( ITYPE, JOBZ, UPLO, N, AP, BP, W, Z, LDZ, WORK,
+     $                   LWORK, IWORK, LIWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBZ, UPLO
+      INTEGER            INFO, ITYPE, LDZ, LIWORK, LWORK, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      DOUBLE PRECISION   AP( * ), BP( * ), W( * ), WORK( * ),
+     $                   Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DSPGVD computes all the eigenvalues, and optionally, the eigenvectors
+*  of a real generalized symmetric-definite eigenproblem, of the form
+*  A*x=(lambda)*B*x,  A*Bx=(lambda)*x,  or B*A*x=(lambda)*x.  Here A and
+*  B are assumed to be symmetric, stored in packed format, and B is also
+*  positive definite.
+*  If eigenvectors are desired, it uses a divide and conquer algorithm.
+*
+*  The divide and conquer algorithm makes very mild assumptions about
+*  floating point arithmetic. It will work on machines with a guard
+*  digit in add/subtract, or on those binary machines without guard
+*  digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
+*  Cray-2. It could conceivably fail on hexadecimal or decimal machines
+*  without guard digits, but we know of none.
+*
+*  Arguments
+*  =========
+*
+*  ITYPE   (input) INTEGER
+*          Specifies the problem type to be solved:
+*          = 1:  A*x = (lambda)*B*x
+*          = 2:  A*B*x = (lambda)*x
+*          = 3:  B*A*x = (lambda)*x
+*
+*  JOBZ    (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only;
+*          = 'V':  Compute eigenvalues and eigenvectors.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangles of A and B are stored;
+*          = 'L':  Lower triangles of A and B are stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrices A and B.  N >= 0.
+*
+*  AP      (input/output) DOUBLE PRECISION array, dimension (N*(N+1)/2)
+*          On entry, the upper or lower triangle of the symmetric matrix
+*          A, packed columnwise in a linear array.  The j-th column of A
+*          is stored in the array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n.
+*
+*          On exit, the contents of AP are destroyed.
+*
+*  BP      (input/output) DOUBLE PRECISION array, dimension (N*(N+1)/2)
+*          On entry, the upper or lower triangle of the symmetric matrix
+*          B, packed columnwise in a linear array.  The j-th column of B
+*          is stored in the array BP as follows:
+*          if UPLO = 'U', BP(i + (j-1)*j/2) = B(i,j) for 1<=i<=j;
+*          if UPLO = 'L', BP(i + (j-1)*(2*n-j)/2) = B(i,j) for j<=i<=n.
+*
+*          On exit, the triangular factor U or L from the Cholesky
+*          factorization B = U**T*U or B = L*L**T, in the same storage
+*          format as B.
+*
+*  W       (output) DOUBLE PRECISION array, dimension (N)
+*          If INFO = 0, the eigenvalues in ascending order.
+*
+*  Z       (output) DOUBLE PRECISION array, dimension (LDZ, N)
+*          If JOBZ = 'V', then if INFO = 0, Z contains the matrix Z of
+*          eigenvectors.  The eigenvectors are normalized as follows:
+*          if ITYPE = 1 or 2, Z**T*B*Z = I;
+*          if ITYPE = 3, Z**T*inv(B)*Z = I.
+*          If JOBZ = 'N', then Z is not referenced.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= 1, and if
+*          JOBZ = 'V', LDZ >= max(1,N).
+*
+*  WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.
+*          If N <= 1,               LWORK >= 1.
+*          If JOBZ = 'N' and N > 1, LWORK >= 2*N.
+*          If JOBZ = 'V' and N > 1, LWORK >= 1 + 6*N + 2*N**2.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  IWORK   (workspace/output) INTEGER array, dimension (LIWORK)
+*          On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK.
+*
+*  LIWORK  (input) INTEGER
+*          The dimension of the array IWORK.
+*          If JOBZ  = 'N' or N <= 1, LIWORK >= 1.
+*          If JOBZ  = 'V' and N > 1, LIWORK >= 3 + 5*N.
+*
+*          If LIWORK = -1, then a workspace query is assumed; the
+*          routine only calculates the optimal size of the IWORK array,
+*          returns this value as the first entry of the IWORK array, and
+*          no error message related to LIWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  DPPTRF or DSPEVD returned an error code:
+*             <= N:  if INFO = i, DSPEVD failed to converge;
+*                    i off-diagonal elements of an intermediate
+*                    tridiagonal form did not converge to zero;
+*             > N:   if INFO = N + i, for 1 <= i <= N, then the leading
+*                    minor of order i of B is not positive definite.
+*                    The factorization of B could not be completed and
+*                    no eigenvalues or eigenvectors were computed.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   TWO
+      PARAMETER          ( TWO = 2.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY, UPPER, WANTZ
+      CHARACTER          TRANS
+      INTEGER            J, LGN, LIWMIN, LWMIN, NEIG
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DPPTRF, DSPEVD, DSPGST, DTPMV, DTPSV, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          DBLE, INT, LOG, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      WANTZ = LSAME( JOBZ, 'V' )
+      UPPER = LSAME( UPLO, 'U' )
+      LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 )
+*
+      INFO = 0
+      IF( N.LE.1 ) THEN
+         LGN = 0
+         LIWMIN = 1
+         LWMIN = 1
+      ELSE
+         LGN = INT( LOG( DBLE( N ) ) / LOG( TWO ) )
+         IF( 2**LGN.LT.N )
+     $      LGN = LGN + 1
+         IF( 2**LGN.LT.N )
+     $      LGN = LGN + 1
+         IF( WANTZ ) THEN
+            LIWMIN = 3 + 5*N
+            LWMIN = 1 + 5*N + 2*N*LGN + 2*N**2
+         ELSE
+            LIWMIN = 1
+            LWMIN = 2*N
+         END IF
+      END IF
+*
+      IF( ITYPE.LT.0 .OR. ITYPE.GT.3 ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDZ.LT.MAX( 1, N ) ) THEN
+         INFO = -9
+      ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -11
+      ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -13
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         WORK( 1 ) = LWMIN
+         IWORK( 1 ) = LIWMIN
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DSPGVD', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Form a Cholesky factorization of BP.
+*
+      CALL DPPTRF( UPLO, N, BP, INFO )
+      IF( INFO.NE.0 ) THEN
+         INFO = N + INFO
+         RETURN
+      END IF
+*
+*     Transform problem to standard eigenvalue problem and solve.
+*
+      CALL DSPGST( ITYPE, UPLO, N, AP, BP, INFO )
+      CALL DSPEVD( JOBZ, UPLO, N, AP, W, Z, LDZ, WORK, LWORK, IWORK,
+     $             LIWORK, INFO )
+      LWMIN = MAX( DBLE( LWMIN ), DBLE( WORK( 1 ) ) )
+      LIWMIN = MAX( DBLE( LIWMIN ), DBLE( IWORK( 1 ) ) )
+*
+      IF( WANTZ ) THEN
+*
+*        Backtransform eigenvectors to the original problem.
+*
+         NEIG = N
+         IF( INFO.GT.0 )
+     $      NEIG = INFO - 1
+         IF( ITYPE.EQ.1 .OR. ITYPE.EQ.2 ) THEN
+*
+*           For A*x=(lambda)*B*x and A*B*x=(lambda)*x;
+*           backtransform eigenvectors: x = inv(L)'*y or inv(U)*y
+*
+            IF( UPPER ) THEN
+               TRANS = 'N'
+            ELSE
+               TRANS = 'T'
+            END IF
+*
+            DO 10 J = 1, NEIG
+               CALL DTPSV( UPLO, TRANS, 'Non-unit', N, BP, Z( 1, J ),
+     $                     1 )
+   10       CONTINUE
+*
+         ELSE IF( ITYPE.EQ.3 ) THEN
+*
+*           For B*A*x=(lambda)*x;
+*           backtransform eigenvectors: x = L*y or U'*y
+*
+            IF( UPPER ) THEN
+               TRANS = 'T'
+            ELSE
+               TRANS = 'N'
+            END IF
+*
+            DO 20 J = 1, NEIG
+               CALL DTPMV( UPLO, TRANS, 'Non-unit', N, BP, Z( 1, J ),
+     $                     1 )
+   20       CONTINUE
+         END IF
+      END IF
+*
+      WORK( 1 ) = LWMIN
+      IWORK( 1 ) = LIWMIN
+*
+      RETURN
+*
+*     End of DSPGVD
+*
+      END
diff --git a/costa/native/external/lapack/dspgvx.f b/costa/native/external/lapack/dspgvx.f
new file mode 100644
index 000000000..4d2c0d16d
--- /dev/null
+++ b/costa/native/external/lapack/dspgvx.f
@@ -0,0 +1,286 @@
+      SUBROUTINE DSPGVX( ITYPE, JOBZ, RANGE, UPLO, N, AP, BP, VL, VU,
+     $                   IL, IU, ABSTOL, M, W, Z, LDZ, WORK, IWORK,
+     $                   IFAIL, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBZ, RANGE, UPLO
+      INTEGER            IL, INFO, ITYPE, IU, LDZ, M, N
+      DOUBLE PRECISION   ABSTOL, VL, VU
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IFAIL( * ), IWORK( * )
+      DOUBLE PRECISION   AP( * ), BP( * ), W( * ), WORK( * ),
+     $                   Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DSPGVX computes selected eigenvalues, and optionally, eigenvectors
+*  of a real generalized symmetric-definite eigenproblem, of the form
+*  A*x=(lambda)*B*x,  A*Bx=(lambda)*x,  or B*A*x=(lambda)*x.  Here A
+*  and B are assumed to be symmetric, stored in packed storage, and B
+*  is also positive definite.  Eigenvalues and eigenvectors can be
+*  selected by specifying either a range of values or a range of indices
+*  for the desired eigenvalues.
+*
+*  Arguments
+*  =========
+*
+*  ITYPE   (input) INTEGER
+*          Specifies the problem type to be solved:
+*          = 1:  A*x = (lambda)*B*x
+*          = 2:  A*B*x = (lambda)*x
+*          = 3:  B*A*x = (lambda)*x
+*
+*  JOBZ    (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only;
+*          = 'V':  Compute eigenvalues and eigenvectors.
+*
+*  RANGE   (input) CHARACTER*1
+*          = 'A': all eigenvalues will be found.
+*          = 'V': all eigenvalues in the half-open interval (VL,VU]
+*                 will be found.
+*          = 'I': the IL-th through IU-th eigenvalues will be found.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A and B are stored;
+*          = 'L':  Lower triangle of A and B are stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix pencil (A,B).  N >= 0.
+*
+*  AP      (input/output) DOUBLE PRECISION array, dimension (N*(N+1)/2)
+*          On entry, the upper or lower triangle of the symmetric matrix
+*          A, packed columnwise in a linear array.  The j-th column of A
+*          is stored in the array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n.
+*
+*          On exit, the contents of AP are destroyed.
+*
+*  BP      (input/output) DOUBLE PRECISION array, dimension (N*(N+1)/2)
+*          On entry, the upper or lower triangle of the symmetric matrix
+*          B, packed columnwise in a linear array.  The j-th column of B
+*          is stored in the array BP as follows:
+*          if UPLO = 'U', BP(i + (j-1)*j/2) = B(i,j) for 1<=i<=j;
+*          if UPLO = 'L', BP(i + (j-1)*(2*n-j)/2) = B(i,j) for j<=i<=n.
+*
+*          On exit, the triangular factor U or L from the Cholesky
+*          factorization B = U**T*U or B = L*L**T, in the same storage
+*          format as B.
+*
+*  VL      (input) DOUBLE PRECISION
+*  VU      (input) DOUBLE PRECISION
+*          If RANGE='V', the lower and upper bounds of the interval to
+*          be searched for eigenvalues. VL < VU.
+*          Not referenced if RANGE = 'A' or 'I'.
+*
+*  IL      (input) INTEGER
+*  IU      (input) INTEGER
+*          If RANGE='I', the indices (in ascending order) of the
+*          smallest and largest eigenvalues to be returned.
+*          1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0.
+*          Not referenced if RANGE = 'A' or 'V'.
+*
+*  ABSTOL  (input) DOUBLE PRECISION
+*          The absolute error tolerance for the eigenvalues.
+*          An approximate eigenvalue is accepted as converged
+*          when it is determined to lie in an interval [a,b]
+*          of width less than or equal to
+*
+*                  ABSTOL + EPS *   max( |a|,|b| ) ,
+*
+*          where EPS is the machine precision.  If ABSTOL is less than
+*          or equal to zero, then  EPS*|T|  will be used in its place,
+*          where |T| is the 1-norm of the tridiagonal matrix obtained
+*          by reducing A to tridiagonal form.
+*
+*          Eigenvalues will be computed most accurately when ABSTOL is
+*          set to twice the underflow threshold 2*DLAMCH('S'), not zero.
+*          If this routine returns with INFO>0, indicating that some
+*          eigenvectors did not converge, try setting ABSTOL to
+*          2*DLAMCH('S').
+*
+*  M       (output) INTEGER
+*          The total number of eigenvalues found.  0 <= M <= N.
+*          If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1.
+*
+*  W       (output) DOUBLE PRECISION array, dimension (N)
+*          On normal exit, the first M elements contain the selected
+*          eigenvalues in ascending order.
+*
+*  Z       (output) DOUBLE PRECISION array, dimension (LDZ, max(1,M))
+*          If JOBZ = 'N', then Z is not referenced.
+*          If JOBZ = 'V', then if INFO = 0, the first M columns of Z
+*          contain the orthonormal eigenvectors of the matrix A
+*          corresponding to the selected eigenvalues, with the i-th
+*          column of Z holding the eigenvector associated with W(i).
+*          The eigenvectors are normalized as follows:
+*          if ITYPE = 1 or 2, Z**T*B*Z = I;
+*          if ITYPE = 3, Z**T*inv(B)*Z = I.
+*
+*          If an eigenvector fails to converge, then that column of Z
+*          contains the latest approximation to the eigenvector, and the
+*          index of the eigenvector is returned in IFAIL.
+*          Note: the user must ensure that at least max(1,M) columns are
+*          supplied in the array Z; if RANGE = 'V', the exact value of M
+*          is not known in advance and an upper bound must be used.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= 1, and if
+*          JOBZ = 'V', LDZ >= max(1,N).
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (8*N)
+*
+*  IWORK   (workspace) INTEGER array, dimension (5*N)
+*
+*  IFAIL   (output) INTEGER array, dimension (N)
+*          If JOBZ = 'V', then if INFO = 0, the first M elements of
+*          IFAIL are zero.  If INFO > 0, then IFAIL contains the
+*          indices of the eigenvectors that failed to converge.
+*          If JOBZ = 'N', then IFAIL is not referenced.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  DPPTRF or DSPEVX returned an error code:
+*             <= N:  if INFO = i, DSPEVX failed to converge;
+*                    i eigenvectors failed to converge.  Their indices
+*                    are stored in array IFAIL.
+*             > N:   if INFO = N + i, for 1 <= i <= N, then the leading
+*                    minor of order i of B is not positive definite.
+*                    The factorization of B could not be completed and
+*                    no eigenvalues or eigenvectors were computed.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA
+*
+* =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            ALLEIG, INDEIG, UPPER, VALEIG, WANTZ
+      CHARACTER          TRANS
+      INTEGER            J
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DPPTRF, DSPEVX, DSPGST, DTPMV, DTPSV, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      UPPER = LSAME( UPLO, 'U' )
+      WANTZ = LSAME( JOBZ, 'V' )
+      ALLEIG = LSAME( RANGE, 'A' )
+      VALEIG = LSAME( RANGE, 'V' )
+      INDEIG = LSAME( RANGE, 'I' )
+*
+      INFO = 0
+      IF( ITYPE.LT.0 .OR. ITYPE.GT.3 ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN
+         INFO = -3
+      ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN
+         INFO = -4
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( VALEIG .AND. N.GT.0 .AND. VU.LE.VL ) THEN
+         INFO = -9
+      ELSE IF( INDEIG .AND. IL.LT.1 ) THEN
+         INFO = -10
+      ELSE IF( INDEIG .AND. ( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) ) THEN
+         INFO = -11
+      ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN
+         INFO = -16
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DSPGVX', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      M = 0
+      IF( N.EQ.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+*     Form a Cholesky factorization of B.
+*
+      CALL DPPTRF( UPLO, N, BP, INFO )
+      IF( INFO.NE.0 ) THEN
+         INFO = N + INFO
+         RETURN
+      END IF
+*
+*     Transform problem to standard eigenvalue problem and solve.
+*
+      CALL DSPGST( ITYPE, UPLO, N, AP, BP, INFO )
+      CALL DSPEVX( JOBZ, RANGE, UPLO, N, AP, VL, VU, IL, IU, ABSTOL, M,
+     $             W, Z, LDZ, WORK, IWORK, IFAIL, INFO )
+*
+      IF( WANTZ ) THEN
+*
+*        Backtransform eigenvectors to the original problem.
+*
+         IF( INFO.GT.0 )
+     $      M = INFO - 1
+         IF( ITYPE.EQ.1 .OR. ITYPE.EQ.2 ) THEN
+*
+*           For A*x=(lambda)*B*x and A*B*x=(lambda)*x;
+*           backtransform eigenvectors: x = inv(L)'*y or inv(U)*y
+*
+            IF( UPPER ) THEN
+               TRANS = 'N'
+            ELSE
+               TRANS = 'T'
+            END IF
+*
+            DO 10 J = 1, M
+               CALL DTPSV( UPLO, TRANS, 'Non-unit', N, BP, Z( 1, J ),
+     $                     1 )
+   10       CONTINUE
+*
+         ELSE IF( ITYPE.EQ.3 ) THEN
+*
+*           For B*A*x=(lambda)*x;
+*           backtransform eigenvectors: x = L*y or U'*y
+*
+            IF( UPPER ) THEN
+               TRANS = 'T'
+            ELSE
+               TRANS = 'N'
+            END IF
+*
+            DO 20 J = 1, M
+               CALL DTPMV( UPLO, TRANS, 'Non-unit', N, BP, Z( 1, J ),
+     $                     1 )
+   20       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of DSPGVX
+*
+      END
diff --git a/costa/native/external/lapack/dsprfs.f b/costa/native/external/lapack/dsprfs.f
new file mode 100644
index 000000000..47aaaefd8
--- /dev/null
+++ b/costa/native/external/lapack/dsprfs.f
@@ -0,0 +1,331 @@
+      SUBROUTINE DSPRFS( UPLO, N, NRHS, AP, AFP, IPIV, B, LDB, X, LDX,
+     $                   FERR, BERR, WORK, IWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDB, LDX, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * ), IWORK( * )
+      DOUBLE PRECISION   AFP( * ), AP( * ), B( LDB, * ), BERR( * ),
+     $                   FERR( * ), WORK( * ), X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DSPRFS improves the computed solution to a system of linear
+*  equations when the coefficient matrix is symmetric indefinite
+*  and packed, and provides error bounds and backward error estimates
+*  for the solution.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrices B and X.  NRHS >= 0.
+*
+*  AP      (input) DOUBLE PRECISION array, dimension (N*(N+1)/2)
+*          The upper or lower triangle of the symmetric matrix A, packed
+*          columnwise in a linear array.  The j-th column of A is stored
+*          in the array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n.
+*
+*  AFP     (input) DOUBLE PRECISION array, dimension (N*(N+1)/2)
+*          The factored form of the matrix A.  AFP contains the block
+*          diagonal matrix D and the multipliers used to obtain the
+*          factor U or L from the factorization A = U*D*U**T or
+*          A = L*D*L**T as computed by DSPTRF, stored as a packed
+*          triangular matrix.
+*
+*  IPIV    (input) INTEGER array, dimension (N)
+*          Details of the interchanges and the block structure of D
+*          as determined by DSPTRF.
+*
+*  B       (input) DOUBLE PRECISION array, dimension (LDB,NRHS)
+*          The right hand side matrix B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  X       (input/output) DOUBLE PRECISION array, dimension (LDX,NRHS)
+*          On entry, the solution matrix X, as computed by DSPTRS.
+*          On exit, the improved solution matrix X.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(1,N).
+*
+*  FERR    (output) DOUBLE PRECISION array, dimension (NRHS)
+*          The estimated forward error bound for each solution vector
+*          X(j) (the j-th column of the solution matrix X).
+*          If XTRUE is the true solution corresponding to X(j), FERR(j)
+*          is an estimated upper bound for the magnitude of the largest
+*          element in (X(j) - XTRUE) divided by the magnitude of the
+*          largest element in X(j).  The estimate is as reliable as
+*          the estimate for RCOND, and is almost always a slight
+*          overestimate of the true error.
+*
+*  BERR    (output) DOUBLE PRECISION array, dimension (NRHS)
+*          The componentwise relative backward error of each solution
+*          vector X(j) (i.e., the smallest relative change in
+*          any element of A or B that makes X(j) an exact solution).
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (3*N)
+*
+*  IWORK   (workspace) INTEGER array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Internal Parameters
+*  ===================
+*
+*  ITMAX is the maximum number of steps of iterative refinement.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            ITMAX
+      PARAMETER          ( ITMAX = 5 )
+      DOUBLE PRECISION   ZERO
+      PARAMETER          ( ZERO = 0.0D+0 )
+      DOUBLE PRECISION   ONE
+      PARAMETER          ( ONE = 1.0D+0 )
+      DOUBLE PRECISION   TWO
+      PARAMETER          ( TWO = 2.0D+0 )
+      DOUBLE PRECISION   THREE
+      PARAMETER          ( THREE = 3.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            COUNT, I, IK, J, K, KASE, KK, NZ
+      DOUBLE PRECISION   EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DAXPY, DCOPY, DLACON, DSPMV, DSPTRS, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DLAMCH
+      EXTERNAL           LSAME, DLAMCH
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -8
+      ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+         INFO = -10
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DSPRFS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN
+         DO 10 J = 1, NRHS
+            FERR( J ) = ZERO
+            BERR( J ) = ZERO
+   10    CONTINUE
+         RETURN
+      END IF
+*
+*     NZ = maximum number of nonzero elements in each row of A, plus 1
+*
+      NZ = N + 1
+      EPS = DLAMCH( 'Epsilon' )
+      SAFMIN = DLAMCH( 'Safe minimum' )
+      SAFE1 = NZ*SAFMIN
+      SAFE2 = SAFE1 / EPS
+*
+*     Do for each right hand side
+*
+      DO 140 J = 1, NRHS
+*
+         COUNT = 1
+         LSTRES = THREE
+   20    CONTINUE
+*
+*        Loop until stopping criterion is satisfied.
+*
+*        Compute residual R = B - A * X
+*
+         CALL DCOPY( N, B( 1, J ), 1, WORK( N+1 ), 1 )
+         CALL DSPMV( UPLO, N, -ONE, AP, X( 1, J ), 1, ONE, WORK( N+1 ),
+     $               1 )
+*
+*        Compute componentwise relative backward error from formula
+*
+*        max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) )
+*
+*        where abs(Z) is the componentwise absolute value of the matrix
+*        or vector Z.  If the i-th component of the denominator is less
+*        than SAFE2, then SAFE1 is added to the i-th components of the
+*        numerator and denominator before dividing.
+*
+         DO 30 I = 1, N
+            WORK( I ) = ABS( B( I, J ) )
+   30    CONTINUE
+*
+*        Compute abs(A)*abs(X) + abs(B).
+*
+         KK = 1
+         IF( UPPER ) THEN
+            DO 50 K = 1, N
+               S = ZERO
+               XK = ABS( X( K, J ) )
+               IK = KK
+               DO 40 I = 1, K - 1
+                  WORK( I ) = WORK( I ) + ABS( AP( IK ) )*XK
+                  S = S + ABS( AP( IK ) )*ABS( X( I, J ) )
+                  IK = IK + 1
+   40          CONTINUE
+               WORK( K ) = WORK( K ) + ABS( AP( KK+K-1 ) )*XK + S
+               KK = KK + K
+   50       CONTINUE
+         ELSE
+            DO 70 K = 1, N
+               S = ZERO
+               XK = ABS( X( K, J ) )
+               WORK( K ) = WORK( K ) + ABS( AP( KK ) )*XK
+               IK = KK + 1
+               DO 60 I = K + 1, N
+                  WORK( I ) = WORK( I ) + ABS( AP( IK ) )*XK
+                  S = S + ABS( AP( IK ) )*ABS( X( I, J ) )
+                  IK = IK + 1
+   60          CONTINUE
+               WORK( K ) = WORK( K ) + S
+               KK = KK + ( N-K+1 )
+   70       CONTINUE
+         END IF
+         S = ZERO
+         DO 80 I = 1, N
+            IF( WORK( I ).GT.SAFE2 ) THEN
+               S = MAX( S, ABS( WORK( N+I ) ) / WORK( I ) )
+            ELSE
+               S = MAX( S, ( ABS( WORK( N+I ) )+SAFE1 ) /
+     $             ( WORK( I )+SAFE1 ) )
+            END IF
+   80    CONTINUE
+         BERR( J ) = S
+*
+*        Test stopping criterion. Continue iterating if
+*           1) The residual BERR(J) is larger than machine epsilon, and
+*           2) BERR(J) decreased by at least a factor of 2 during the
+*              last iteration, and
+*           3) At most ITMAX iterations tried.
+*
+         IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND.
+     $       COUNT.LE.ITMAX ) THEN
+*
+*           Update solution and try again.
+*
+            CALL DSPTRS( UPLO, N, 1, AFP, IPIV, WORK( N+1 ), N, INFO )
+            CALL DAXPY( N, ONE, WORK( N+1 ), 1, X( 1, J ), 1 )
+            LSTRES = BERR( J )
+            COUNT = COUNT + 1
+            GO TO 20
+         END IF
+*
+*        Bound error from formula
+*
+*        norm(X - XTRUE) / norm(X) .le. FERR =
+*        norm( abs(inv(A))*
+*           ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X)
+*
+*        where
+*          norm(Z) is the magnitude of the largest component of Z
+*          inv(A) is the inverse of A
+*          abs(Z) is the componentwise absolute value of the matrix or
+*             vector Z
+*          NZ is the maximum number of nonzeros in any row of A, plus 1
+*          EPS is machine epsilon
+*
+*        The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B))
+*        is incremented by SAFE1 if the i-th component of
+*        abs(A)*abs(X) + abs(B) is less than SAFE2.
+*
+*        Use DLACON to estimate the infinity-norm of the matrix
+*           inv(A) * diag(W),
+*        where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) )))
+*
+         DO 90 I = 1, N
+            IF( WORK( I ).GT.SAFE2 ) THEN
+               WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I )
+            ELSE
+               WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + SAFE1
+            END IF
+   90    CONTINUE
+*
+         KASE = 0
+  100    CONTINUE
+         CALL DLACON( N, WORK( 2*N+1 ), WORK( N+1 ), IWORK, FERR( J ),
+     $                KASE )
+         IF( KASE.NE.0 ) THEN
+            IF( KASE.EQ.1 ) THEN
+*
+*              Multiply by diag(W)*inv(A').
+*
+               CALL DSPTRS( UPLO, N, 1, AFP, IPIV, WORK( N+1 ), N,
+     $                      INFO )
+               DO 110 I = 1, N
+                  WORK( N+I ) = WORK( I )*WORK( N+I )
+  110          CONTINUE
+            ELSE IF( KASE.EQ.2 ) THEN
+*
+*              Multiply by inv(A)*diag(W).
+*
+               DO 120 I = 1, N
+                  WORK( N+I ) = WORK( I )*WORK( N+I )
+  120          CONTINUE
+               CALL DSPTRS( UPLO, N, 1, AFP, IPIV, WORK( N+1 ), N,
+     $                      INFO )
+            END IF
+            GO TO 100
+         END IF
+*
+*        Normalize error.
+*
+         LSTRES = ZERO
+         DO 130 I = 1, N
+            LSTRES = MAX( LSTRES, ABS( X( I, J ) ) )
+  130    CONTINUE
+         IF( LSTRES.NE.ZERO )
+     $      FERR( J ) = FERR( J ) / LSTRES
+*
+  140 CONTINUE
+*
+      RETURN
+*
+*     End of DSPRFS
+*
+      END
diff --git a/costa/native/external/lapack/dspsv.f b/costa/native/external/lapack/dspsv.f
new file mode 100644
index 000000000..412684b32
--- /dev/null
+++ b/costa/native/external/lapack/dspsv.f
@@ -0,0 +1,149 @@
+      SUBROUTINE DSPSV( UPLO, N, NRHS, AP, IPIV, B, LDB, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDB, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      DOUBLE PRECISION   AP( * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DSPSV computes the solution to a real system of linear equations
+*     A * X = B,
+*  where A is an N-by-N symmetric matrix stored in packed format and X
+*  and B are N-by-NRHS matrices.
+*
+*  The diagonal pivoting method is used to factor A as
+*     A = U * D * U**T,  if UPLO = 'U', or
+*     A = L * D * L**T,  if UPLO = 'L',
+*  where U (or L) is a product of permutation and unit upper (lower)
+*  triangular matrices, D is symmetric and block diagonal with 1-by-1
+*  and 2-by-2 diagonal blocks.  The factored form of A is then used to
+*  solve the system of equations A * X = B.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The number of linear equations, i.e., the order of the
+*          matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  AP      (input/output) DOUBLE PRECISION array, dimension (N*(N+1)/2)
+*          On entry, the upper or lower triangle of the symmetric matrix
+*          A, packed columnwise in a linear array.  The j-th column of A
+*          is stored in the array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n.
+*          See below for further details.
+*
+*          On exit, the block diagonal matrix D and the multipliers used
+*          to obtain the factor U or L from the factorization
+*          A = U*D*U**T or A = L*D*L**T as computed by DSPTRF, stored as
+*          a packed triangular matrix in the same storage format as A.
+*
+*  IPIV    (output) INTEGER array, dimension (N)
+*          Details of the interchanges and the block structure of D, as
+*          determined by DSPTRF.  If IPIV(k) > 0, then rows and columns
+*          k and IPIV(k) were interchanged, and D(k,k) is a 1-by-1
+*          diagonal block.  If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0,
+*          then rows and columns k-1 and -IPIV(k) were interchanged and
+*          D(k-1:k,k-1:k) is a 2-by-2 diagonal block.  If UPLO = 'L' and
+*          IPIV(k) = IPIV(k+1) < 0, then rows and columns k+1 and
+*          -IPIV(k) were interchanged and D(k:k+1,k:k+1) is a 2-by-2
+*          diagonal block.
+*
+*  B       (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS)
+*          On entry, the N-by-NRHS right hand side matrix B.
+*          On exit, if INFO = 0, the N-by-NRHS solution matrix X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, D(i,i) is exactly zero.  The factorization
+*                has been completed, but the block diagonal matrix D is
+*                exactly singular, so the solution could not be
+*                computed.
+*
+*  Further Details
+*  ===============
+*
+*  The packed storage scheme is illustrated by the following example
+*  when N = 4, UPLO = 'U':
+*
+*  Two-dimensional storage of the symmetric matrix A:
+*
+*     a11 a12 a13 a14
+*         a22 a23 a24
+*             a33 a34     (aij = aji)
+*                 a44
+*
+*  Packed storage of the upper triangle of A:
+*
+*  AP = [ a11, a12, a22, a13, a23, a33, a14, a24, a34, a44 ]
+*
+*  =====================================================================
+*
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DSPTRF, DSPTRS, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DSPSV ', -INFO )
+         RETURN
+      END IF
+*
+*     Compute the factorization A = U*D*U' or A = L*D*L'.
+*
+      CALL DSPTRF( UPLO, N, AP, IPIV, INFO )
+      IF( INFO.EQ.0 ) THEN
+*
+*        Solve the system A*X = B, overwriting B with X.
+*
+         CALL DSPTRS( UPLO, N, NRHS, AP, IPIV, B, LDB, INFO )
+*
+      END IF
+      RETURN
+*
+*     End of DSPSV
+*
+      END
diff --git a/costa/native/external/lapack/dspsvx.f b/costa/native/external/lapack/dspsvx.f
new file mode 100644
index 000000000..a414be28a
--- /dev/null
+++ b/costa/native/external/lapack/dspsvx.f
@@ -0,0 +1,279 @@
+      SUBROUTINE DSPSVX( FACT, UPLO, N, NRHS, AP, AFP, IPIV, B, LDB, X,
+     $                   LDX, RCOND, FERR, BERR, WORK, IWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          FACT, UPLO
+      INTEGER            INFO, LDB, LDX, N, NRHS
+      DOUBLE PRECISION   RCOND
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * ), IWORK( * )
+      DOUBLE PRECISION   AFP( * ), AP( * ), B( LDB, * ), BERR( * ),
+     $                   FERR( * ), WORK( * ), X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DSPSVX uses the diagonal pivoting factorization A = U*D*U**T or
+*  A = L*D*L**T to compute the solution to a real system of linear
+*  equations A * X = B, where A is an N-by-N symmetric matrix stored
+*  in packed format and X and B are N-by-NRHS matrices.
+*
+*  Error bounds on the solution and a condition estimate are also
+*  provided.
+*
+*  Description
+*  ===========
+*
+*  The following steps are performed:
+*
+*  1. If FACT = 'N', the diagonal pivoting method is used to factor A as
+*        A = U * D * U**T,  if UPLO = 'U', or
+*        A = L * D * L**T,  if UPLO = 'L',
+*     where U (or L) is a product of permutation and unit upper (lower)
+*     triangular matrices and D is symmetric and block diagonal with
+*     1-by-1 and 2-by-2 diagonal blocks.
+*
+*  2. If some D(i,i)=0, so that D is exactly singular, then the routine
+*     returns with INFO = i. Otherwise, the factored form of A is used
+*     to estimate the condition number of the matrix A.  If the
+*     reciprocal of the condition number is less than machine precision,
+*     INFO = N+1 is returned as a warning, but the routine still goes on
+*     to solve for X and compute error bounds as described below.
+*
+*  3. The system of equations is solved for X using the factored form
+*     of A.
+*
+*  4. Iterative refinement is applied to improve the computed solution
+*     matrix and calculate error bounds and backward error estimates
+*     for it.
+*
+*  Arguments
+*  =========
+*
+*  FACT    (input) CHARACTER*1
+*          Specifies whether or not the factored form of A has been
+*          supplied on entry.
+*          = 'F':  On entry, AFP and IPIV contain the factored form of
+*                  A.  AP, AFP and IPIV will not be modified.
+*          = 'N':  The matrix A will be copied to AFP and factored.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The number of linear equations, i.e., the order of the
+*          matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrices B and X.  NRHS >= 0.
+*
+*  AP      (input) DOUBLE PRECISION array, dimension (N*(N+1)/2)
+*          The upper or lower triangle of the symmetric matrix A, packed
+*          columnwise in a linear array.  The j-th column of A is stored
+*          in the array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n.
+*          See below for further details.
+*
+*  AFP     (input or output) DOUBLE PRECISION array, dimension
+*                            (N*(N+1)/2)
+*          If FACT = 'F', then AFP is an input argument and on entry
+*          contains the block diagonal matrix D and the multipliers used
+*          to obtain the factor U or L from the factorization
+*          A = U*D*U**T or A = L*D*L**T as computed by DSPTRF, stored as
+*          a packed triangular matrix in the same storage format as A.
+*
+*          If FACT = 'N', then AFP is an output argument and on exit
+*          contains the block diagonal matrix D and the multipliers used
+*          to obtain the factor U or L from the factorization
+*          A = U*D*U**T or A = L*D*L**T as computed by DSPTRF, stored as
+*          a packed triangular matrix in the same storage format as A.
+*
+*  IPIV    (input or output) INTEGER array, dimension (N)
+*          If FACT = 'F', then IPIV is an input argument and on entry
+*          contains details of the interchanges and the block structure
+*          of D, as determined by DSPTRF.
+*          If IPIV(k) > 0, then rows and columns k and IPIV(k) were
+*          interchanged and D(k,k) is a 1-by-1 diagonal block.
+*          If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and
+*          columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k)
+*          is a 2-by-2 diagonal block.  If UPLO = 'L' and IPIV(k) =
+*          IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were
+*          interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block.
+*
+*          If FACT = 'N', then IPIV is an output argument and on exit
+*          contains details of the interchanges and the block structure
+*          of D, as determined by DSPTRF.
+*
+*  B       (input) DOUBLE PRECISION array, dimension (LDB,NRHS)
+*          The N-by-NRHS right hand side matrix B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  X       (output) DOUBLE PRECISION array, dimension (LDX,NRHS)
+*          If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(1,N).
+*
+*  RCOND   (output) DOUBLE PRECISION
+*          The estimate of the reciprocal condition number of the matrix
+*          A.  If RCOND is less than the machine precision (in
+*          particular, if RCOND = 0), the matrix is singular to working
+*          precision.  This condition is indicated by a return code of
+*          INFO > 0.
+*
+*  FERR    (output) DOUBLE PRECISION array, dimension (NRHS)
+*          The estimated forward error bound for each solution vector
+*          X(j) (the j-th column of the solution matrix X).
+*          If XTRUE is the true solution corresponding to X(j), FERR(j)
+*          is an estimated upper bound for the magnitude of the largest
+*          element in (X(j) - XTRUE) divided by the magnitude of the
+*          largest element in X(j).  The estimate is as reliable as
+*          the estimate for RCOND, and is almost always a slight
+*          overestimate of the true error.
+*
+*  BERR    (output) DOUBLE PRECISION array, dimension (NRHS)
+*          The componentwise relative backward error of each solution
+*          vector X(j) (i.e., the smallest relative change in
+*          any element of A or B that makes X(j) an exact solution).
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (3*N)
+*
+*  IWORK   (workspace) INTEGER array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, and i is
+*                <= N:  D(i,i) is exactly zero.  The factorization
+*                       has been completed but the factor D is exactly
+*                       singular, so the solution and error bounds could
+*                       not be computed. RCOND = 0 is returned.
+*                = N+1: D is nonsingular, but RCOND is less than machine
+*                       precision, meaning that the matrix is singular
+*                       to working precision.  Nevertheless, the
+*                       solution and error bounds are computed because
+*                       there are a number of situations where the
+*                       computed solution can be more accurate than the
+*                       value of RCOND would suggest.
+*
+*  Further Details
+*  ===============
+*
+*  The packed storage scheme is illustrated by the following example
+*  when N = 4, UPLO = 'U':
+*
+*  Two-dimensional storage of the symmetric matrix A:
+*
+*     a11 a12 a13 a14
+*         a22 a23 a24
+*             a33 a34     (aij = aji)
+*                 a44
+*
+*  Packed storage of the upper triangle of A:
+*
+*  AP = [ a11, a12, a22, a13, a23, a33, a14, a24, a34, a44 ]
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO
+      PARAMETER          ( ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOFACT
+      DOUBLE PRECISION   ANORM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DLAMCH, DLANSP
+      EXTERNAL           LSAME, DLAMCH, DLANSP
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DCOPY, DLACPY, DSPCON, DSPRFS, DSPTRF, DSPTRS,
+     $                   XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      NOFACT = LSAME( FACT, 'N' )
+      IF( .NOT.NOFACT .AND. .NOT.LSAME( FACT, 'F' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) )
+     $          THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -9
+      ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+         INFO = -11
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DSPSVX', -INFO )
+         RETURN
+      END IF
+*
+      IF( NOFACT ) THEN
+*
+*        Compute the factorization A = U*D*U' or A = L*D*L'.
+*
+         CALL DCOPY( N*( N+1 ) / 2, AP, 1, AFP, 1 )
+         CALL DSPTRF( UPLO, N, AFP, IPIV, INFO )
+*
+*        Return if INFO is non-zero.
+*
+         IF( INFO.NE.0 ) THEN
+            IF( INFO.GT.0 )
+     $         RCOND = ZERO
+            RETURN
+         END IF
+      END IF
+*
+*     Compute the norm of the matrix A.
+*
+      ANORM = DLANSP( 'I', UPLO, N, AP, WORK )
+*
+*     Compute the reciprocal of the condition number of A.
+*
+      CALL DSPCON( UPLO, N, AFP, IPIV, ANORM, RCOND, WORK, IWORK, INFO )
+*
+*     Set INFO = N+1 if the matrix is singular to working precision.
+*
+      IF( RCOND.LT.DLAMCH( 'Epsilon' ) )
+     $   INFO = N + 1
+*
+*     Compute the solution vectors X.
+*
+      CALL DLACPY( 'Full', N, NRHS, B, LDB, X, LDX )
+      CALL DSPTRS( UPLO, N, NRHS, AFP, IPIV, X, LDX, INFO )
+*
+*     Use iterative refinement to improve the computed solutions and
+*     compute error bounds and backward error estimates for them.
+*
+      CALL DSPRFS( UPLO, N, NRHS, AP, AFP, IPIV, B, LDB, X, LDX, FERR,
+     $             BERR, WORK, IWORK, INFO )
+*
+      RETURN
+*
+*     End of DSPSVX
+*
+      END
diff --git a/costa/native/external/lapack/dsptrd.f b/costa/native/external/lapack/dsptrd.f
new file mode 100644
index 000000000..148296107
--- /dev/null
+++ b/costa/native/external/lapack/dsptrd.f
@@ -0,0 +1,229 @@
+      SUBROUTINE DSPTRD( UPLO, N, AP, D, E, TAU, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   AP( * ), D( * ), E( * ), TAU( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DSPTRD reduces a real symmetric matrix A stored in packed form to
+*  symmetric tridiagonal form T by an orthogonal similarity
+*  transformation: Q**T * A * Q = T.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  AP      (input/output) DOUBLE PRECISION array, dimension (N*(N+1)/2)
+*          On entry, the upper or lower triangle of the symmetric matrix
+*          A, packed columnwise in a linear array.  The j-th column of A
+*          is stored in the array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n.
+*          On exit, if UPLO = 'U', the diagonal and first superdiagonal
+*          of A are overwritten by the corresponding elements of the
+*          tridiagonal matrix T, and the elements above the first
+*          superdiagonal, with the array TAU, represent the orthogonal
+*          matrix Q as a product of elementary reflectors; if UPLO
+*          = 'L', the diagonal and first subdiagonal of A are over-
+*          written by the corresponding elements of the tridiagonal
+*          matrix T, and the elements below the first subdiagonal, with
+*          the array TAU, represent the orthogonal matrix Q as a product
+*          of elementary reflectors. See Further Details.
+*
+*  D       (output) DOUBLE PRECISION array, dimension (N)
+*          The diagonal elements of the tridiagonal matrix T:
+*          D(i) = A(i,i).
+*
+*  E       (output) DOUBLE PRECISION array, dimension (N-1)
+*          The off-diagonal elements of the tridiagonal matrix T:
+*          E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'.
+*
+*  TAU     (output) DOUBLE PRECISION array, dimension (N-1)
+*          The scalar factors of the elementary reflectors (see Further
+*          Details).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Further Details
+*  ===============
+*
+*  If UPLO = 'U', the matrix Q is represented as a product of elementary
+*  reflectors
+*
+*     Q = H(n-1) . . . H(2) H(1).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - tau * v * v'
+*
+*  where tau is a real scalar, and v is a real vector with
+*  v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in AP,
+*  overwriting A(1:i-1,i+1), and tau is stored in TAU(i).
+*
+*  If UPLO = 'L', the matrix Q is represented as a product of elementary
+*  reflectors
+*
+*     Q = H(1) H(2) . . . H(n-1).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - tau * v * v'
+*
+*  where tau is a real scalar, and v is a real vector with
+*  v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in AP,
+*  overwriting A(i+2:n,i), and tau is stored in TAU(i).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO, HALF
+      PARAMETER          ( ONE = 1.0D0, ZERO = 0.0D0,
+     $                   HALF = 1.0D0 / 2.0D0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            I, I1, I1I1, II
+      DOUBLE PRECISION   ALPHA, TAUI
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DAXPY, DLARFG, DSPMV, DSPR2, XERBLA
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DDOT
+      EXTERNAL           LSAME, DDOT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DSPTRD', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.LE.0 )
+     $   RETURN
+*
+      IF( UPPER ) THEN
+*
+*        Reduce the upper triangle of A.
+*        I1 is the index in AP of A(1,I+1).
+*
+         I1 = N*( N-1 ) / 2 + 1
+         DO 10 I = N - 1, 1, -1
+*
+*           Generate elementary reflector H(i) = I - tau * v * v'
+*           to annihilate A(1:i-1,i+1)
+*
+            CALL DLARFG( I, AP( I1+I-1 ), AP( I1 ), 1, TAUI )
+            E( I ) = AP( I1+I-1 )
+*
+            IF( TAUI.NE.ZERO ) THEN
+*
+*              Apply H(i) from both sides to A(1:i,1:i)
+*
+               AP( I1+I-1 ) = ONE
+*
+*              Compute  y := tau * A * v  storing y in TAU(1:i)
+*
+               CALL DSPMV( UPLO, I, TAUI, AP, AP( I1 ), 1, ZERO, TAU,
+     $                     1 )
+*
+*              Compute  w := y - 1/2 * tau * (y'*v) * v
+*
+               ALPHA = -HALF*TAUI*DDOT( I, TAU, 1, AP( I1 ), 1 )
+               CALL DAXPY( I, ALPHA, AP( I1 ), 1, TAU, 1 )
+*
+*              Apply the transformation as a rank-2 update:
+*                 A := A - v * w' - w * v'
+*
+               CALL DSPR2( UPLO, I, -ONE, AP( I1 ), 1, TAU, 1, AP )
+*
+               AP( I1+I-1 ) = E( I )
+            END IF
+            D( I+1 ) = AP( I1+I )
+            TAU( I ) = TAUI
+            I1 = I1 - I
+   10    CONTINUE
+         D( 1 ) = AP( 1 )
+      ELSE
+*
+*        Reduce the lower triangle of A. II is the index in AP of
+*        A(i,i) and I1I1 is the index of A(i+1,i+1).
+*
+         II = 1
+         DO 20 I = 1, N - 1
+            I1I1 = II + N - I + 1
+*
+*           Generate elementary reflector H(i) = I - tau * v * v'
+*           to annihilate A(i+2:n,i)
+*
+            CALL DLARFG( N-I, AP( II+1 ), AP( II+2 ), 1, TAUI )
+            E( I ) = AP( II+1 )
+*
+            IF( TAUI.NE.ZERO ) THEN
+*
+*              Apply H(i) from both sides to A(i+1:n,i+1:n)
+*
+               AP( II+1 ) = ONE
+*
+*              Compute  y := tau * A * v  storing y in TAU(i:n-1)
+*
+               CALL DSPMV( UPLO, N-I, TAUI, AP( I1I1 ), AP( II+1 ), 1,
+     $                     ZERO, TAU( I ), 1 )
+*
+*              Compute  w := y - 1/2 * tau * (y'*v) * v
+*
+               ALPHA = -HALF*TAUI*DDOT( N-I, TAU( I ), 1, AP( II+1 ),
+     $                 1 )
+               CALL DAXPY( N-I, ALPHA, AP( II+1 ), 1, TAU( I ), 1 )
+*
+*              Apply the transformation as a rank-2 update:
+*                 A := A - v * w' - w * v'
+*
+               CALL DSPR2( UPLO, N-I, -ONE, AP( II+1 ), 1, TAU( I ), 1,
+     $                     AP( I1I1 ) )
+*
+               AP( II+1 ) = E( I )
+            END IF
+            D( I ) = AP( II )
+            TAU( I ) = TAUI
+            II = I1I1
+   20    CONTINUE
+         D( N ) = AP( II )
+      END IF
+*
+      RETURN
+*
+*     End of DSPTRD
+*
+      END
diff --git a/costa/native/external/lapack/dsptrf.f b/costa/native/external/lapack/dsptrf.f
new file mode 100644
index 000000000..cce2df387
--- /dev/null
+++ b/costa/native/external/lapack/dsptrf.f
@@ -0,0 +1,548 @@
+      SUBROUTINE DSPTRF( UPLO, N, AP, IPIV, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      DOUBLE PRECISION   AP( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DSPTRF computes the factorization of a real symmetric matrix A stored
+*  in packed format using the Bunch-Kaufman diagonal pivoting method:
+*
+*     A = U*D*U**T  or  A = L*D*L**T
+*
+*  where U (or L) is a product of permutation and unit upper (lower)
+*  triangular matrices, and D is symmetric and block diagonal with
+*  1-by-1 and 2-by-2 diagonal blocks.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  AP      (input/output) DOUBLE PRECISION array, dimension (N*(N+1)/2)
+*          On entry, the upper or lower triangle of the symmetric matrix
+*          A, packed columnwise in a linear array.  The j-th column of A
+*          is stored in the array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n.
+*
+*          On exit, the block diagonal matrix D and the multipliers used
+*          to obtain the factor U or L, stored as a packed triangular
+*          matrix overwriting A (see below for further details).
+*
+*  IPIV    (output) INTEGER array, dimension (N)
+*          Details of the interchanges and the block structure of D.
+*          If IPIV(k) > 0, then rows and columns k and IPIV(k) were
+*          interchanged and D(k,k) is a 1-by-1 diagonal block.
+*          If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and
+*          columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k)
+*          is a 2-by-2 diagonal block.  If UPLO = 'L' and IPIV(k) =
+*          IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were
+*          interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block.
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*          > 0: if INFO = i, D(i,i) is exactly zero.  The factorization
+*               has been completed, but the block diagonal matrix D is
+*               exactly singular, and division by zero will occur if it
+*               is used to solve a system of equations.
+*
+*  Further Details
+*  ===============
+*
+*  5-96 - Based on modifications by J. Lewis, Boeing Computer Services
+*         Company
+*
+*  If UPLO = 'U', then A = U*D*U', where
+*     U = P(n)*U(n)* ... *P(k)U(k)* ...,
+*  i.e., U is a product of terms P(k)*U(k), where k decreases from n to
+*  1 in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1
+*  and 2-by-2 diagonal blocks D(k).  P(k) is a permutation matrix as
+*  defined by IPIV(k), and U(k) is a unit upper triangular matrix, such
+*  that if the diagonal block D(k) is of order s (s = 1 or 2), then
+*
+*             (   I    v    0   )   k-s
+*     U(k) =  (   0    I    0   )   s
+*             (   0    0    I   )   n-k
+*                k-s   s   n-k
+*
+*  If s = 1, D(k) overwrites A(k,k), and v overwrites A(1:k-1,k).
+*  If s = 2, the upper triangle of D(k) overwrites A(k-1,k-1), A(k-1,k),
+*  and A(k,k), and v overwrites A(1:k-2,k-1:k).
+*
+*  If UPLO = 'L', then A = L*D*L', where
+*     L = P(1)*L(1)* ... *P(k)*L(k)* ...,
+*  i.e., L is a product of terms P(k)*L(k), where k increases from 1 to
+*  n in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1
+*  and 2-by-2 diagonal blocks D(k).  P(k) is a permutation matrix as
+*  defined by IPIV(k), and L(k) is a unit lower triangular matrix, such
+*  that if the diagonal block D(k) is of order s (s = 1 or 2), then
+*
+*             (   I    0     0   )  k-1
+*     L(k) =  (   0    I     0   )  s
+*             (   0    v     I   )  n-k-s+1
+*                k-1   s  n-k-s+1
+*
+*  If s = 1, D(k) overwrites A(k,k), and v overwrites A(k+1:n,k).
+*  If s = 2, the lower triangle of D(k) overwrites A(k,k), A(k+1,k),
+*  and A(k+1,k+1), and v overwrites A(k+2:n,k:k+1).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+      DOUBLE PRECISION   EIGHT, SEVTEN
+      PARAMETER          ( EIGHT = 8.0D+0, SEVTEN = 17.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            I, IMAX, J, JMAX, K, KC, KK, KNC, KP, KPC,
+     $                   KSTEP, KX, NPP
+      DOUBLE PRECISION   ABSAKK, ALPHA, COLMAX, D11, D12, D21, D22, R1,
+     $                   ROWMAX, T, WK, WKM1, WKP1
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            IDAMAX
+      EXTERNAL           LSAME, IDAMAX
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DSCAL, DSPR, DSWAP, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DSPTRF', -INFO )
+         RETURN
+      END IF
+*
+*     Initialize ALPHA for use in choosing pivot block size.
+*
+      ALPHA = ( ONE+SQRT( SEVTEN ) ) / EIGHT
+*
+      IF( UPPER ) THEN
+*
+*        Factorize A as U*D*U' using the upper triangle of A
+*
+*        K is the main loop index, decreasing from N to 1 in steps of
+*        1 or 2
+*
+         K = N
+         KC = ( N-1 )*N / 2 + 1
+   10    CONTINUE
+         KNC = KC
+*
+*        If K < 1, exit from loop
+*
+         IF( K.LT.1 )
+     $      GO TO 110
+         KSTEP = 1
+*
+*        Determine rows and columns to be interchanged and whether
+*        a 1-by-1 or 2-by-2 pivot block will be used
+*
+         ABSAKK = ABS( AP( KC+K-1 ) )
+*
+*        IMAX is the row-index of the largest off-diagonal element in
+*        column K, and COLMAX is its absolute value
+*
+         IF( K.GT.1 ) THEN
+            IMAX = IDAMAX( K-1, AP( KC ), 1 )
+            COLMAX = ABS( AP( KC+IMAX-1 ) )
+         ELSE
+            COLMAX = ZERO
+         END IF
+*
+         IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN
+*
+*           Column K is zero: set INFO and continue
+*
+            IF( INFO.EQ.0 )
+     $         INFO = K
+            KP = K
+         ELSE
+            IF( ABSAKK.GE.ALPHA*COLMAX ) THEN
+*
+*              no interchange, use 1-by-1 pivot block
+*
+               KP = K
+            ELSE
+*
+*              JMAX is the column-index of the largest off-diagonal
+*              element in row IMAX, and ROWMAX is its absolute value
+*
+               ROWMAX = ZERO
+               JMAX = IMAX
+               KX = IMAX*( IMAX+1 ) / 2 + IMAX
+               DO 20 J = IMAX + 1, K
+                  IF( ABS( AP( KX ) ).GT.ROWMAX ) THEN
+                     ROWMAX = ABS( AP( KX ) )
+                     JMAX = J
+                  END IF
+                  KX = KX + J
+   20          CONTINUE
+               KPC = ( IMAX-1 )*IMAX / 2 + 1
+               IF( IMAX.GT.1 ) THEN
+                  JMAX = IDAMAX( IMAX-1, AP( KPC ), 1 )
+                  ROWMAX = MAX( ROWMAX, ABS( AP( KPC+JMAX-1 ) ) )
+               END IF
+*
+               IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN
+*
+*                 no interchange, use 1-by-1 pivot block
+*
+                  KP = K
+               ELSE IF( ABS( AP( KPC+IMAX-1 ) ).GE.ALPHA*ROWMAX ) THEN
+*
+*                 interchange rows and columns K and IMAX, use 1-by-1
+*                 pivot block
+*
+                  KP = IMAX
+               ELSE
+*
+*                 interchange rows and columns K-1 and IMAX, use 2-by-2
+*                 pivot block
+*
+                  KP = IMAX
+                  KSTEP = 2
+               END IF
+            END IF
+*
+            KK = K - KSTEP + 1
+            IF( KSTEP.EQ.2 )
+     $         KNC = KNC - K + 1
+            IF( KP.NE.KK ) THEN
+*
+*              Interchange rows and columns KK and KP in the leading
+*              submatrix A(1:k,1:k)
+*
+               CALL DSWAP( KP-1, AP( KNC ), 1, AP( KPC ), 1 )
+               KX = KPC + KP - 1
+               DO 30 J = KP + 1, KK - 1
+                  KX = KX + J - 1
+                  T = AP( KNC+J-1 )
+                  AP( KNC+J-1 ) = AP( KX )
+                  AP( KX ) = T
+   30          CONTINUE
+               T = AP( KNC+KK-1 )
+               AP( KNC+KK-1 ) = AP( KPC+KP-1 )
+               AP( KPC+KP-1 ) = T
+               IF( KSTEP.EQ.2 ) THEN
+                  T = AP( KC+K-2 )
+                  AP( KC+K-2 ) = AP( KC+KP-1 )
+                  AP( KC+KP-1 ) = T
+               END IF
+            END IF
+*
+*           Update the leading submatrix
+*
+            IF( KSTEP.EQ.1 ) THEN
+*
+*              1-by-1 pivot block D(k): column k now holds
+*
+*              W(k) = U(k)*D(k)
+*
+*              where U(k) is the k-th column of U
+*
+*              Perform a rank-1 update of A(1:k-1,1:k-1) as
+*
+*              A := A - U(k)*D(k)*U(k)' = A - W(k)*1/D(k)*W(k)'
+*
+               R1 = ONE / AP( KC+K-1 )
+               CALL DSPR( UPLO, K-1, -R1, AP( KC ), 1, AP )
+*
+*              Store U(k) in column k
+*
+               CALL DSCAL( K-1, R1, AP( KC ), 1 )
+            ELSE
+*
+*              2-by-2 pivot block D(k): columns k and k-1 now hold
+*
+*              ( W(k-1) W(k) ) = ( U(k-1) U(k) )*D(k)
+*
+*              where U(k) and U(k-1) are the k-th and (k-1)-th columns
+*              of U
+*
+*              Perform a rank-2 update of A(1:k-2,1:k-2) as
+*
+*              A := A - ( U(k-1) U(k) )*D(k)*( U(k-1) U(k) )'
+*                 = A - ( W(k-1) W(k) )*inv(D(k))*( W(k-1) W(k) )'
+*
+               IF( K.GT.2 ) THEN
+*
+                  D12 = AP( K-1+( K-1 )*K / 2 )
+                  D22 = AP( K-1+( K-2 )*( K-1 ) / 2 ) / D12
+                  D11 = AP( K+( K-1 )*K / 2 ) / D12
+                  T = ONE / ( D11*D22-ONE )
+                  D12 = T / D12
+*
+                  DO 50 J = K - 2, 1, -1
+                     WKM1 = D12*( D11*AP( J+( K-2 )*( K-1 ) / 2 )-
+     $                      AP( J+( K-1 )*K / 2 ) )
+                     WK = D12*( D22*AP( J+( K-1 )*K / 2 )-
+     $                    AP( J+( K-2 )*( K-1 ) / 2 ) )
+                     DO 40 I = J, 1, -1
+                        AP( I+( J-1 )*J / 2 ) = AP( I+( J-1 )*J / 2 ) -
+     $                     AP( I+( K-1 )*K / 2 )*WK -
+     $                     AP( I+( K-2 )*( K-1 ) / 2 )*WKM1
+   40                CONTINUE
+                     AP( J+( K-1 )*K / 2 ) = WK
+                     AP( J+( K-2 )*( K-1 ) / 2 ) = WKM1
+   50             CONTINUE
+*
+               END IF
+*
+            END IF
+         END IF
+*
+*        Store details of the interchanges in IPIV
+*
+         IF( KSTEP.EQ.1 ) THEN
+            IPIV( K ) = KP
+         ELSE
+            IPIV( K ) = -KP
+            IPIV( K-1 ) = -KP
+         END IF
+*
+*        Decrease K and return to the start of the main loop
+*
+         K = K - KSTEP
+         KC = KNC - K
+         GO TO 10
+*
+      ELSE
+*
+*        Factorize A as L*D*L' using the lower triangle of A
+*
+*        K is the main loop index, increasing from 1 to N in steps of
+*        1 or 2
+*
+         K = 1
+         KC = 1
+         NPP = N*( N+1 ) / 2
+   60    CONTINUE
+         KNC = KC
+*
+*        If K > N, exit from loop
+*
+         IF( K.GT.N )
+     $      GO TO 110
+         KSTEP = 1
+*
+*        Determine rows and columns to be interchanged and whether
+*        a 1-by-1 or 2-by-2 pivot block will be used
+*
+         ABSAKK = ABS( AP( KC ) )
+*
+*        IMAX is the row-index of the largest off-diagonal element in
+*        column K, and COLMAX is its absolute value
+*
+         IF( K.LT.N ) THEN
+            IMAX = K + IDAMAX( N-K, AP( KC+1 ), 1 )
+            COLMAX = ABS( AP( KC+IMAX-K ) )
+         ELSE
+            COLMAX = ZERO
+         END IF
+*
+         IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN
+*
+*           Column K is zero: set INFO and continue
+*
+            IF( INFO.EQ.0 )
+     $         INFO = K
+            KP = K
+         ELSE
+            IF( ABSAKK.GE.ALPHA*COLMAX ) THEN
+*
+*              no interchange, use 1-by-1 pivot block
+*
+               KP = K
+            ELSE
+*
+*              JMAX is the column-index of the largest off-diagonal
+*              element in row IMAX, and ROWMAX is its absolute value
+*
+               ROWMAX = ZERO
+               KX = KC + IMAX - K
+               DO 70 J = K, IMAX - 1
+                  IF( ABS( AP( KX ) ).GT.ROWMAX ) THEN
+                     ROWMAX = ABS( AP( KX ) )
+                     JMAX = J
+                  END IF
+                  KX = KX + N - J
+   70          CONTINUE
+               KPC = NPP - ( N-IMAX+1 )*( N-IMAX+2 ) / 2 + 1
+               IF( IMAX.LT.N ) THEN
+                  JMAX = IMAX + IDAMAX( N-IMAX, AP( KPC+1 ), 1 )
+                  ROWMAX = MAX( ROWMAX, ABS( AP( KPC+JMAX-IMAX ) ) )
+               END IF
+*
+               IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN
+*
+*                 no interchange, use 1-by-1 pivot block
+*
+                  KP = K
+               ELSE IF( ABS( AP( KPC ) ).GE.ALPHA*ROWMAX ) THEN
+*
+*                 interchange rows and columns K and IMAX, use 1-by-1
+*                 pivot block
+*
+                  KP = IMAX
+               ELSE
+*
+*                 interchange rows and columns K+1 and IMAX, use 2-by-2
+*                 pivot block
+*
+                  KP = IMAX
+                  KSTEP = 2
+               END IF
+            END IF
+*
+            KK = K + KSTEP - 1
+            IF( KSTEP.EQ.2 )
+     $         KNC = KNC + N - K + 1
+            IF( KP.NE.KK ) THEN
+*
+*              Interchange rows and columns KK and KP in the trailing
+*              submatrix A(k:n,k:n)
+*
+               IF( KP.LT.N )
+     $            CALL DSWAP( N-KP, AP( KNC+KP-KK+1 ), 1, AP( KPC+1 ),
+     $                        1 )
+               KX = KNC + KP - KK
+               DO 80 J = KK + 1, KP - 1
+                  KX = KX + N - J + 1
+                  T = AP( KNC+J-KK )
+                  AP( KNC+J-KK ) = AP( KX )
+                  AP( KX ) = T
+   80          CONTINUE
+               T = AP( KNC )
+               AP( KNC ) = AP( KPC )
+               AP( KPC ) = T
+               IF( KSTEP.EQ.2 ) THEN
+                  T = AP( KC+1 )
+                  AP( KC+1 ) = AP( KC+KP-K )
+                  AP( KC+KP-K ) = T
+               END IF
+            END IF
+*
+*           Update the trailing submatrix
+*
+            IF( KSTEP.EQ.1 ) THEN
+*
+*              1-by-1 pivot block D(k): column k now holds
+*
+*              W(k) = L(k)*D(k)
+*
+*              where L(k) is the k-th column of L
+*
+               IF( K.LT.N ) THEN
+*
+*                 Perform a rank-1 update of A(k+1:n,k+1:n) as
+*
+*                 A := A - L(k)*D(k)*L(k)' = A - W(k)*(1/D(k))*W(k)'
+*
+                  R1 = ONE / AP( KC )
+                  CALL DSPR( UPLO, N-K, -R1, AP( KC+1 ), 1,
+     $                       AP( KC+N-K+1 ) )
+*
+*                 Store L(k) in column K
+*
+                  CALL DSCAL( N-K, R1, AP( KC+1 ), 1 )
+               END IF
+            ELSE
+*
+*              2-by-2 pivot block D(k): columns K and K+1 now hold
+*
+*              ( W(k) W(k+1) ) = ( L(k) L(k+1) )*D(k)
+*
+*              where L(k) and L(k+1) are the k-th and (k+1)-th columns
+*              of L
+*
+               IF( K.LT.N-1 ) THEN
+*
+*                 Perform a rank-2 update of A(k+2:n,k+2:n) as
+*
+*                 A := A - ( L(k) L(k+1) )*D(k)*( L(k) L(k+1) )'
+*                    = A - ( W(k) W(k+1) )*inv(D(k))*( W(k) W(k+1) )'
+*
+                  D21 = AP( K+1+( K-1 )*( 2*N-K ) / 2 )
+                  D11 = AP( K+1+K*( 2*N-K-1 ) / 2 ) / D21
+                  D22 = AP( K+( K-1 )*( 2*N-K ) / 2 ) / D21
+                  T = ONE / ( D11*D22-ONE )
+                  D21 = T / D21
+*
+                  DO 100 J = K + 2, N
+                     WK = D21*( D11*AP( J+( K-1 )*( 2*N-K ) / 2 )-
+     $                    AP( J+K*( 2*N-K-1 ) / 2 ) )
+                     WKP1 = D21*( D22*AP( J+K*( 2*N-K-1 ) / 2 )-
+     $                      AP( J+( K-1 )*( 2*N-K ) / 2 ) )
+*
+                     DO 90 I = J, N
+                        AP( I+( J-1 )*( 2*N-J ) / 2 ) = AP( I+( J-1 )*
+     $                     ( 2*N-J ) / 2 ) - AP( I+( K-1 )*( 2*N-K ) /
+     $                     2 )*WK - AP( I+K*( 2*N-K-1 ) / 2 )*WKP1
+   90                CONTINUE
+*
+                     AP( J+( K-1 )*( 2*N-K ) / 2 ) = WK
+                     AP( J+K*( 2*N-K-1 ) / 2 ) = WKP1
+*
+  100             CONTINUE
+               END IF
+            END IF
+         END IF
+*
+*        Store details of the interchanges in IPIV
+*
+         IF( KSTEP.EQ.1 ) THEN
+            IPIV( K ) = KP
+         ELSE
+            IPIV( K ) = -KP
+            IPIV( K+1 ) = -KP
+         END IF
+*
+*        Increase K and return to the start of the main loop
+*
+         K = K + KSTEP
+         KC = KNC + N - K + 2
+         GO TO 60
+*
+      END IF
+*
+  110 CONTINUE
+      RETURN
+*
+*     End of DSPTRF
+*
+      END
diff --git a/costa/native/external/lapack/dsptri.f b/costa/native/external/lapack/dsptri.f
new file mode 100644
index 000000000..211dbaa83
--- /dev/null
+++ b/costa/native/external/lapack/dsptri.f
@@ -0,0 +1,335 @@
+      SUBROUTINE DSPTRI( UPLO, N, AP, IPIV, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      DOUBLE PRECISION   AP( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DSPTRI computes the inverse of a real symmetric indefinite matrix
+*  A in packed storage using the factorization A = U*D*U**T or
+*  A = L*D*L**T computed by DSPTRF.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the details of the factorization are stored
+*          as an upper or lower triangular matrix.
+*          = 'U':  Upper triangular, form is A = U*D*U**T;
+*          = 'L':  Lower triangular, form is A = L*D*L**T.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  AP      (input/output) DOUBLE PRECISION array, dimension (N*(N+1)/2)
+*          On entry, the block diagonal matrix D and the multipliers
+*          used to obtain the factor U or L as computed by DSPTRF,
+*          stored as a packed triangular matrix.
+*
+*          On exit, if INFO = 0, the (symmetric) inverse of the original
+*          matrix, stored as a packed triangular matrix. The j-th column
+*          of inv(A) is stored in the array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = inv(A)(i,j) for 1<=i<=j;
+*          if UPLO = 'L',
+*             AP(i + (j-1)*(2n-j)/2) = inv(A)(i,j) for j<=i<=n.
+*
+*  IPIV    (input) INTEGER array, dimension (N)
+*          Details of the interchanges and the block structure of D
+*          as determined by DSPTRF.
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*          > 0: if INFO = i, D(i,i) = 0; the matrix is singular and its
+*               inverse could not be computed.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            J, K, KC, KCNEXT, KP, KPC, KSTEP, KX, NPP
+      DOUBLE PRECISION   AK, AKKP1, AKP1, D, T, TEMP
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DDOT
+      EXTERNAL           LSAME, DDOT
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DCOPY, DSPMV, DSWAP, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DSPTRI', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Check that the diagonal matrix D is nonsingular.
+*
+      IF( UPPER ) THEN
+*
+*        Upper triangular storage: examine D from bottom to top
+*
+         KP = N*( N+1 ) / 2
+         DO 10 INFO = N, 1, -1
+            IF( IPIV( INFO ).GT.0 .AND. AP( KP ).EQ.ZERO )
+     $         RETURN
+            KP = KP - INFO
+   10    CONTINUE
+      ELSE
+*
+*        Lower triangular storage: examine D from top to bottom.
+*
+         KP = 1
+         DO 20 INFO = 1, N
+            IF( IPIV( INFO ).GT.0 .AND. AP( KP ).EQ.ZERO )
+     $         RETURN
+            KP = KP + N - INFO + 1
+   20    CONTINUE
+      END IF
+      INFO = 0
+*
+      IF( UPPER ) THEN
+*
+*        Compute inv(A) from the factorization A = U*D*U'.
+*
+*        K is the main loop index, increasing from 1 to N in steps of
+*        1 or 2, depending on the size of the diagonal blocks.
+*
+         K = 1
+         KC = 1
+   30    CONTINUE
+*
+*        If K > N, exit from loop.
+*
+         IF( K.GT.N )
+     $      GO TO 50
+*
+         KCNEXT = KC + K
+         IF( IPIV( K ).GT.0 ) THEN
+*
+*           1 x 1 diagonal block
+*
+*           Invert the diagonal block.
+*
+            AP( KC+K-1 ) = ONE / AP( KC+K-1 )
+*
+*           Compute column K of the inverse.
+*
+            IF( K.GT.1 ) THEN
+               CALL DCOPY( K-1, AP( KC ), 1, WORK, 1 )
+               CALL DSPMV( UPLO, K-1, -ONE, AP, WORK, 1, ZERO, AP( KC ),
+     $                     1 )
+               AP( KC+K-1 ) = AP( KC+K-1 ) -
+     $                        DDOT( K-1, WORK, 1, AP( KC ), 1 )
+            END IF
+            KSTEP = 1
+         ELSE
+*
+*           2 x 2 diagonal block
+*
+*           Invert the diagonal block.
+*
+            T = ABS( AP( KCNEXT+K-1 ) )
+            AK = AP( KC+K-1 ) / T
+            AKP1 = AP( KCNEXT+K ) / T
+            AKKP1 = AP( KCNEXT+K-1 ) / T
+            D = T*( AK*AKP1-ONE )
+            AP( KC+K-1 ) = AKP1 / D
+            AP( KCNEXT+K ) = AK / D
+            AP( KCNEXT+K-1 ) = -AKKP1 / D
+*
+*           Compute columns K and K+1 of the inverse.
+*
+            IF( K.GT.1 ) THEN
+               CALL DCOPY( K-1, AP( KC ), 1, WORK, 1 )
+               CALL DSPMV( UPLO, K-1, -ONE, AP, WORK, 1, ZERO, AP( KC ),
+     $                     1 )
+               AP( KC+K-1 ) = AP( KC+K-1 ) -
+     $                        DDOT( K-1, WORK, 1, AP( KC ), 1 )
+               AP( KCNEXT+K-1 ) = AP( KCNEXT+K-1 ) -
+     $                            DDOT( K-1, AP( KC ), 1, AP( KCNEXT ),
+     $                            1 )
+               CALL DCOPY( K-1, AP( KCNEXT ), 1, WORK, 1 )
+               CALL DSPMV( UPLO, K-1, -ONE, AP, WORK, 1, ZERO,
+     $                     AP( KCNEXT ), 1 )
+               AP( KCNEXT+K ) = AP( KCNEXT+K ) -
+     $                          DDOT( K-1, WORK, 1, AP( KCNEXT ), 1 )
+            END IF
+            KSTEP = 2
+            KCNEXT = KCNEXT + K + 1
+         END IF
+*
+         KP = ABS( IPIV( K ) )
+         IF( KP.NE.K ) THEN
+*
+*           Interchange rows and columns K and KP in the leading
+*           submatrix A(1:k+1,1:k+1)
+*
+            KPC = ( KP-1 )*KP / 2 + 1
+            CALL DSWAP( KP-1, AP( KC ), 1, AP( KPC ), 1 )
+            KX = KPC + KP - 1
+            DO 40 J = KP + 1, K - 1
+               KX = KX + J - 1
+               TEMP = AP( KC+J-1 )
+               AP( KC+J-1 ) = AP( KX )
+               AP( KX ) = TEMP
+   40       CONTINUE
+            TEMP = AP( KC+K-1 )
+            AP( KC+K-1 ) = AP( KPC+KP-1 )
+            AP( KPC+KP-1 ) = TEMP
+            IF( KSTEP.EQ.2 ) THEN
+               TEMP = AP( KC+K+K-1 )
+               AP( KC+K+K-1 ) = AP( KC+K+KP-1 )
+               AP( KC+K+KP-1 ) = TEMP
+            END IF
+         END IF
+*
+         K = K + KSTEP
+         KC = KCNEXT
+         GO TO 30
+   50    CONTINUE
+*
+      ELSE
+*
+*        Compute inv(A) from the factorization A = L*D*L'.
+*
+*        K is the main loop index, increasing from 1 to N in steps of
+*        1 or 2, depending on the size of the diagonal blocks.
+*
+         NPP = N*( N+1 ) / 2
+         K = N
+         KC = NPP
+   60    CONTINUE
+*
+*        If K < 1, exit from loop.
+*
+         IF( K.LT.1 )
+     $      GO TO 80
+*
+         KCNEXT = KC - ( N-K+2 )
+         IF( IPIV( K ).GT.0 ) THEN
+*
+*           1 x 1 diagonal block
+*
+*           Invert the diagonal block.
+*
+            AP( KC ) = ONE / AP( KC )
+*
+*           Compute column K of the inverse.
+*
+            IF( K.LT.N ) THEN
+               CALL DCOPY( N-K, AP( KC+1 ), 1, WORK, 1 )
+               CALL DSPMV( UPLO, N-K, -ONE, AP( KC+N-K+1 ), WORK, 1,
+     $                     ZERO, AP( KC+1 ), 1 )
+               AP( KC ) = AP( KC ) - DDOT( N-K, WORK, 1, AP( KC+1 ), 1 )
+            END IF
+            KSTEP = 1
+         ELSE
+*
+*           2 x 2 diagonal block
+*
+*           Invert the diagonal block.
+*
+            T = ABS( AP( KCNEXT+1 ) )
+            AK = AP( KCNEXT ) / T
+            AKP1 = AP( KC ) / T
+            AKKP1 = AP( KCNEXT+1 ) / T
+            D = T*( AK*AKP1-ONE )
+            AP( KCNEXT ) = AKP1 / D
+            AP( KC ) = AK / D
+            AP( KCNEXT+1 ) = -AKKP1 / D
+*
+*           Compute columns K-1 and K of the inverse.
+*
+            IF( K.LT.N ) THEN
+               CALL DCOPY( N-K, AP( KC+1 ), 1, WORK, 1 )
+               CALL DSPMV( UPLO, N-K, -ONE, AP( KC+( N-K+1 ) ), WORK, 1,
+     $                     ZERO, AP( KC+1 ), 1 )
+               AP( KC ) = AP( KC ) - DDOT( N-K, WORK, 1, AP( KC+1 ), 1 )
+               AP( KCNEXT+1 ) = AP( KCNEXT+1 ) -
+     $                          DDOT( N-K, AP( KC+1 ), 1,
+     $                          AP( KCNEXT+2 ), 1 )
+               CALL DCOPY( N-K, AP( KCNEXT+2 ), 1, WORK, 1 )
+               CALL DSPMV( UPLO, N-K, -ONE, AP( KC+( N-K+1 ) ), WORK, 1,
+     $                     ZERO, AP( KCNEXT+2 ), 1 )
+               AP( KCNEXT ) = AP( KCNEXT ) -
+     $                        DDOT( N-K, WORK, 1, AP( KCNEXT+2 ), 1 )
+            END IF
+            KSTEP = 2
+            KCNEXT = KCNEXT - ( N-K+3 )
+         END IF
+*
+         KP = ABS( IPIV( K ) )
+         IF( KP.NE.K ) THEN
+*
+*           Interchange rows and columns K and KP in the trailing
+*           submatrix A(k-1:n,k-1:n)
+*
+            KPC = NPP - ( N-KP+1 )*( N-KP+2 ) / 2 + 1
+            IF( KP.LT.N )
+     $         CALL DSWAP( N-KP, AP( KC+KP-K+1 ), 1, AP( KPC+1 ), 1 )
+            KX = KC + KP - K
+            DO 70 J = K + 1, KP - 1
+               KX = KX + N - J + 1
+               TEMP = AP( KC+J-K )
+               AP( KC+J-K ) = AP( KX )
+               AP( KX ) = TEMP
+   70       CONTINUE
+            TEMP = AP( KC )
+            AP( KC ) = AP( KPC )
+            AP( KPC ) = TEMP
+            IF( KSTEP.EQ.2 ) THEN
+               TEMP = AP( KC-N+K-1 )
+               AP( KC-N+K-1 ) = AP( KC-N+KP-1 )
+               AP( KC-N+KP-1 ) = TEMP
+            END IF
+         END IF
+*
+         K = K - KSTEP
+         KC = KCNEXT
+         GO TO 60
+   80    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of DSPTRI
+*
+      END
diff --git a/costa/native/external/lapack/dsptrs.f b/costa/native/external/lapack/dsptrs.f
new file mode 100644
index 000000000..d71634200
--- /dev/null
+++ b/costa/native/external/lapack/dsptrs.f
@@ -0,0 +1,378 @@
+      SUBROUTINE DSPTRS( UPLO, N, NRHS, AP, IPIV, B, LDB, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDB, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      DOUBLE PRECISION   AP( * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DSPTRS solves a system of linear equations A*X = B with a real
+*  symmetric matrix A stored in packed format using the factorization
+*  A = U*D*U**T or A = L*D*L**T computed by DSPTRF.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the details of the factorization are stored
+*          as an upper or lower triangular matrix.
+*          = 'U':  Upper triangular, form is A = U*D*U**T;
+*          = 'L':  Lower triangular, form is A = L*D*L**T.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  AP      (input) DOUBLE PRECISION array, dimension (N*(N+1)/2)
+*          The block diagonal matrix D and the multipliers used to
+*          obtain the factor U or L as computed by DSPTRF, stored as a
+*          packed triangular matrix.
+*
+*  IPIV    (input) INTEGER array, dimension (N)
+*          Details of the interchanges and the block structure of D
+*          as determined by DSPTRF.
+*
+*  B       (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS)
+*          On entry, the right hand side matrix B.
+*          On exit, the solution matrix X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE
+      PARAMETER          ( ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            J, K, KC, KP
+      DOUBLE PRECISION   AK, AKM1, AKM1K, BK, BKM1, DENOM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DGEMV, DGER, DSCAL, DSWAP, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DSPTRS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 )
+     $   RETURN
+*
+      IF( UPPER ) THEN
+*
+*        Solve A*X = B, where A = U*D*U'.
+*
+*        First solve U*D*X = B, overwriting B with X.
+*
+*        K is the main loop index, decreasing from N to 1 in steps of
+*        1 or 2, depending on the size of the diagonal blocks.
+*
+         K = N
+         KC = N*( N+1 ) / 2 + 1
+   10    CONTINUE
+*
+*        If K < 1, exit from loop.
+*
+         IF( K.LT.1 )
+     $      GO TO 30
+*
+         KC = KC - K
+         IF( IPIV( K ).GT.0 ) THEN
+*
+*           1 x 1 diagonal block
+*
+*           Interchange rows K and IPIV(K).
+*
+            KP = IPIV( K )
+            IF( KP.NE.K )
+     $         CALL DSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB )
+*
+*           Multiply by inv(U(K)), where U(K) is the transformation
+*           stored in column K of A.
+*
+            CALL DGER( K-1, NRHS, -ONE, AP( KC ), 1, B( K, 1 ), LDB,
+     $                 B( 1, 1 ), LDB )
+*
+*           Multiply by the inverse of the diagonal block.
+*
+            CALL DSCAL( NRHS, ONE / AP( KC+K-1 ), B( K, 1 ), LDB )
+            K = K - 1
+         ELSE
+*
+*           2 x 2 diagonal block
+*
+*           Interchange rows K-1 and -IPIV(K).
+*
+            KP = -IPIV( K )
+            IF( KP.NE.K-1 )
+     $         CALL DSWAP( NRHS, B( K-1, 1 ), LDB, B( KP, 1 ), LDB )
+*
+*           Multiply by inv(U(K)), where U(K) is the transformation
+*           stored in columns K-1 and K of A.
+*
+            CALL DGER( K-2, NRHS, -ONE, AP( KC ), 1, B( K, 1 ), LDB,
+     $                 B( 1, 1 ), LDB )
+            CALL DGER( K-2, NRHS, -ONE, AP( KC-( K-1 ) ), 1,
+     $                 B( K-1, 1 ), LDB, B( 1, 1 ), LDB )
+*
+*           Multiply by the inverse of the diagonal block.
+*
+            AKM1K = AP( KC+K-2 )
+            AKM1 = AP( KC-1 ) / AKM1K
+            AK = AP( KC+K-1 ) / AKM1K
+            DENOM = AKM1*AK - ONE
+            DO 20 J = 1, NRHS
+               BKM1 = B( K-1, J ) / AKM1K
+               BK = B( K, J ) / AKM1K
+               B( K-1, J ) = ( AK*BKM1-BK ) / DENOM
+               B( K, J ) = ( AKM1*BK-BKM1 ) / DENOM
+   20       CONTINUE
+            KC = KC - K + 1
+            K = K - 2
+         END IF
+*
+         GO TO 10
+   30    CONTINUE
+*
+*        Next solve U'*X = B, overwriting B with X.
+*
+*        K is the main loop index, increasing from 1 to N in steps of
+*        1 or 2, depending on the size of the diagonal blocks.
+*
+         K = 1
+         KC = 1
+   40    CONTINUE
+*
+*        If K > N, exit from loop.
+*
+         IF( K.GT.N )
+     $      GO TO 50
+*
+         IF( IPIV( K ).GT.0 ) THEN
+*
+*           1 x 1 diagonal block
+*
+*           Multiply by inv(U'(K)), where U(K) is the transformation
+*           stored in column K of A.
+*
+            CALL DGEMV( 'Transpose', K-1, NRHS, -ONE, B, LDB, AP( KC ),
+     $                  1, ONE, B( K, 1 ), LDB )
+*
+*           Interchange rows K and IPIV(K).
+*
+            KP = IPIV( K )
+            IF( KP.NE.K )
+     $         CALL DSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB )
+            KC = KC + K
+            K = K + 1
+         ELSE
+*
+*           2 x 2 diagonal block
+*
+*           Multiply by inv(U'(K+1)), where U(K+1) is the transformation
+*           stored in columns K and K+1 of A.
+*
+            CALL DGEMV( 'Transpose', K-1, NRHS, -ONE, B, LDB, AP( KC ),
+     $                  1, ONE, B( K, 1 ), LDB )
+            CALL DGEMV( 'Transpose', K-1, NRHS, -ONE, B, LDB,
+     $                  AP( KC+K ), 1, ONE, B( K+1, 1 ), LDB )
+*
+*           Interchange rows K and -IPIV(K).
+*
+            KP = -IPIV( K )
+            IF( KP.NE.K )
+     $         CALL DSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB )
+            KC = KC + 2*K + 1
+            K = K + 2
+         END IF
+*
+         GO TO 40
+   50    CONTINUE
+*
+      ELSE
+*
+*        Solve A*X = B, where A = L*D*L'.
+*
+*        First solve L*D*X = B, overwriting B with X.
+*
+*        K is the main loop index, increasing from 1 to N in steps of
+*        1 or 2, depending on the size of the diagonal blocks.
+*
+         K = 1
+         KC = 1
+   60    CONTINUE
+*
+*        If K > N, exit from loop.
+*
+         IF( K.GT.N )
+     $      GO TO 80
+*
+         IF( IPIV( K ).GT.0 ) THEN
+*
+*           1 x 1 diagonal block
+*
+*           Interchange rows K and IPIV(K).
+*
+            KP = IPIV( K )
+            IF( KP.NE.K )
+     $         CALL DSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB )
+*
+*           Multiply by inv(L(K)), where L(K) is the transformation
+*           stored in column K of A.
+*
+            IF( K.LT.N )
+     $         CALL DGER( N-K, NRHS, -ONE, AP( KC+1 ), 1, B( K, 1 ),
+     $                    LDB, B( K+1, 1 ), LDB )
+*
+*           Multiply by the inverse of the diagonal block.
+*
+            CALL DSCAL( NRHS, ONE / AP( KC ), B( K, 1 ), LDB )
+            KC = KC + N - K + 1
+            K = K + 1
+         ELSE
+*
+*           2 x 2 diagonal block
+*
+*           Interchange rows K+1 and -IPIV(K).
+*
+            KP = -IPIV( K )
+            IF( KP.NE.K+1 )
+     $         CALL DSWAP( NRHS, B( K+1, 1 ), LDB, B( KP, 1 ), LDB )
+*
+*           Multiply by inv(L(K)), where L(K) is the transformation
+*           stored in columns K and K+1 of A.
+*
+            IF( K.LT.N-1 ) THEN
+               CALL DGER( N-K-1, NRHS, -ONE, AP( KC+2 ), 1, B( K, 1 ),
+     $                    LDB, B( K+2, 1 ), LDB )
+               CALL DGER( N-K-1, NRHS, -ONE, AP( KC+N-K+2 ), 1,
+     $                    B( K+1, 1 ), LDB, B( K+2, 1 ), LDB )
+            END IF
+*
+*           Multiply by the inverse of the diagonal block.
+*
+            AKM1K = AP( KC+1 )
+            AKM1 = AP( KC ) / AKM1K
+            AK = AP( KC+N-K+1 ) / AKM1K
+            DENOM = AKM1*AK - ONE
+            DO 70 J = 1, NRHS
+               BKM1 = B( K, J ) / AKM1K
+               BK = B( K+1, J ) / AKM1K
+               B( K, J ) = ( AK*BKM1-BK ) / DENOM
+               B( K+1, J ) = ( AKM1*BK-BKM1 ) / DENOM
+   70       CONTINUE
+            KC = KC + 2*( N-K ) + 1
+            K = K + 2
+         END IF
+*
+         GO TO 60
+   80    CONTINUE
+*
+*        Next solve L'*X = B, overwriting B with X.
+*
+*        K is the main loop index, decreasing from N to 1 in steps of
+*        1 or 2, depending on the size of the diagonal blocks.
+*
+         K = N
+         KC = N*( N+1 ) / 2 + 1
+   90    CONTINUE
+*
+*        If K < 1, exit from loop.
+*
+         IF( K.LT.1 )
+     $      GO TO 100
+*
+         KC = KC - ( N-K+1 )
+         IF( IPIV( K ).GT.0 ) THEN
+*
+*           1 x 1 diagonal block
+*
+*           Multiply by inv(L'(K)), where L(K) is the transformation
+*           stored in column K of A.
+*
+            IF( K.LT.N )
+     $         CALL DGEMV( 'Transpose', N-K, NRHS, -ONE, B( K+1, 1 ),
+     $                     LDB, AP( KC+1 ), 1, ONE, B( K, 1 ), LDB )
+*
+*           Interchange rows K and IPIV(K).
+*
+            KP = IPIV( K )
+            IF( KP.NE.K )
+     $         CALL DSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB )
+            K = K - 1
+         ELSE
+*
+*           2 x 2 diagonal block
+*
+*           Multiply by inv(L'(K-1)), where L(K-1) is the transformation
+*           stored in columns K-1 and K of A.
+*
+            IF( K.LT.N ) THEN
+               CALL DGEMV( 'Transpose', N-K, NRHS, -ONE, B( K+1, 1 ),
+     $                     LDB, AP( KC+1 ), 1, ONE, B( K, 1 ), LDB )
+               CALL DGEMV( 'Transpose', N-K, NRHS, -ONE, B( K+1, 1 ),
+     $                     LDB, AP( KC-( N-K ) ), 1, ONE, B( K-1, 1 ),
+     $                     LDB )
+            END IF
+*
+*           Interchange rows K and -IPIV(K).
+*
+            KP = -IPIV( K )
+            IF( KP.NE.K )
+     $         CALL DSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB )
+            KC = KC - ( N-K+2 )
+            K = K - 2
+         END IF
+*
+         GO TO 90
+  100    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of DSPTRS
+*
+      END
diff --git a/costa/native/external/lapack/dstebz.f b/costa/native/external/lapack/dstebz.f
new file mode 100644
index 000000000..349972e85
--- /dev/null
+++ b/costa/native/external/lapack/dstebz.f
@@ -0,0 +1,652 @@
+      SUBROUTINE DSTEBZ( RANGE, ORDER, N, VL, VU, IL, IU, ABSTOL, D, E,
+     $                   M, NSPLIT, W, IBLOCK, ISPLIT, WORK, IWORK,
+     $                   INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          ORDER, RANGE
+      INTEGER            IL, INFO, IU, M, N, NSPLIT
+      DOUBLE PRECISION   ABSTOL, VL, VU
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IBLOCK( * ), ISPLIT( * ), IWORK( * )
+      DOUBLE PRECISION   D( * ), E( * ), W( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DSTEBZ computes the eigenvalues of a symmetric tridiagonal
+*  matrix T.  The user may ask for all eigenvalues, all eigenvalues
+*  in the half-open interval (VL, VU], or the IL-th through IU-th
+*  eigenvalues.
+*
+*  To avoid overflow, the matrix must be scaled so that its
+*  largest element is no greater than overflow**(1/2) *
+*  underflow**(1/4) in absolute value, and for greatest
+*  accuracy, it should not be much smaller than that.
+*
+*  See W. Kahan "Accurate Eigenvalues of a Symmetric Tridiagonal
+*  Matrix", Report CS41, Computer Science Dept., Stanford
+*  University, July 21, 1966.
+*
+*  Arguments
+*  =========
+*
+*  RANGE   (input) CHARACTER
+*          = 'A': ("All")   all eigenvalues will be found.
+*          = 'V': ("Value") all eigenvalues in the half-open interval
+*                           (VL, VU] will be found.
+*          = 'I': ("Index") the IL-th through IU-th eigenvalues (of the
+*                           entire matrix) will be found.
+*
+*  ORDER   (input) CHARACTER
+*          = 'B': ("By Block") the eigenvalues will be grouped by
+*                              split-off block (see IBLOCK, ISPLIT) and
+*                              ordered from smallest to largest within
+*                              the block.
+*          = 'E': ("Entire matrix")
+*                              the eigenvalues for the entire matrix
+*                              will be ordered from smallest to
+*                              largest.
+*
+*  N       (input) INTEGER
+*          The order of the tridiagonal matrix T.  N >= 0.
+*
+*  VL      (input) DOUBLE PRECISION
+*  VU      (input) DOUBLE PRECISION
+*          If RANGE='V', the lower and upper bounds of the interval to
+*          be searched for eigenvalues.  Eigenvalues less than or equal
+*          to VL, or greater than VU, will not be returned.  VL < VU.
+*          Not referenced if RANGE = 'A' or 'I'.
+*
+*  IL      (input) INTEGER
+*  IU      (input) INTEGER
+*          If RANGE='I', the indices (in ascending order) of the
+*          smallest and largest eigenvalues to be returned.
+*          1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0.
+*          Not referenced if RANGE = 'A' or 'V'.
+*
+*  ABSTOL  (input) DOUBLE PRECISION
+*          The absolute tolerance for the eigenvalues.  An eigenvalue
+*          (or cluster) is considered to be located if it has been
+*          determined to lie in an interval whose width is ABSTOL or
+*          less.  If ABSTOL is less than or equal to zero, then ULP*|T|
+*          will be used, where |T| means the 1-norm of T.
+*
+*          Eigenvalues will be computed most accurately when ABSTOL is
+*          set to twice the underflow threshold 2*DLAMCH('S'), not zero.
+*
+*  D       (input) DOUBLE PRECISION array, dimension (N)
+*          The n diagonal elements of the tridiagonal matrix T.
+*
+*  E       (input) DOUBLE PRECISION array, dimension (N-1)
+*          The (n-1) off-diagonal elements of the tridiagonal matrix T.
+*
+*  M       (output) INTEGER
+*          The actual number of eigenvalues found. 0 <= M <= N.
+*          (See also the description of INFO=2,3.)
+*
+*  NSPLIT  (output) INTEGER
+*          The number of diagonal blocks in the matrix T.
+*          1 <= NSPLIT <= N.
+*
+*  W       (output) DOUBLE PRECISION array, dimension (N)
+*          On exit, the first M elements of W will contain the
+*          eigenvalues.  (DSTEBZ may use the remaining N-M elements as
+*          workspace.)
+*
+*  IBLOCK  (output) INTEGER array, dimension (N)
+*          At each row/column j where E(j) is zero or small, the
+*          matrix T is considered to split into a block diagonal
+*          matrix.  On exit, if INFO = 0, IBLOCK(i) specifies to which
+*          block (from 1 to the number of blocks) the eigenvalue W(i)
+*          belongs.  (DSTEBZ may use the remaining N-M elements as
+*          workspace.)
+*
+*  ISPLIT  (output) INTEGER array, dimension (N)
+*          The splitting points, at which T breaks up into submatrices.
+*          The first submatrix consists of rows/columns 1 to ISPLIT(1),
+*          the second of rows/columns ISPLIT(1)+1 through ISPLIT(2),
+*          etc., and the NSPLIT-th consists of rows/columns
+*          ISPLIT(NSPLIT-1)+1 through ISPLIT(NSPLIT)=N.
+*          (Only the first NSPLIT elements will actually be used, but
+*          since the user cannot know a priori what value NSPLIT will
+*          have, N words must be reserved for ISPLIT.)
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (4*N)
+*
+*  IWORK   (workspace) INTEGER array, dimension (3*N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  some or all of the eigenvalues failed to converge or
+*                were not computed:
+*                =1 or 3: Bisection failed to converge for some
+*                        eigenvalues; these eigenvalues are flagged by a
+*                        negative block number.  The effect is that the
+*                        eigenvalues may not be as accurate as the
+*                        absolute and relative tolerances.  This is
+*                        generally caused by unexpectedly inaccurate
+*                        arithmetic.
+*                =2 or 3: RANGE='I' only: Not all of the eigenvalues
+*                        IL:IU were found.
+*                        Effect: M < IU+1-IL
+*                        Cause:  non-monotonic arithmetic, causing the
+*                                Sturm sequence to be non-monotonic.
+*                        Cure:   recalculate, using RANGE='A', and pick
+*                                out eigenvalues IL:IU.  In some cases,
+*                                increasing the PARAMETER "FUDGE" may
+*                                make things work.
+*                = 4:    RANGE='I', and the Gershgorin interval
+*                        initially used was too small.  No eigenvalues
+*                        were computed.
+*                        Probable cause: your machine has sloppy
+*                                        floating-point arithmetic.
+*                        Cure: Increase the PARAMETER "FUDGE",
+*                              recompile, and try again.
+*
+*  Internal Parameters
+*  ===================
+*
+*  RELFAC  DOUBLE PRECISION, default = 2.0e0
+*          The relative tolerance.  An interval (a,b] lies within
+*          "relative tolerance" if  b-a < RELFAC*ulp*max(|a|,|b|),
+*          where "ulp" is the machine precision (distance from 1 to
+*          the next larger floating point number.)
+*
+*  FUDGE   DOUBLE PRECISION, default = 2
+*          A "fudge factor" to widen the Gershgorin intervals.  Ideally,
+*          a value of 1 should work, but on machines with sloppy
+*          arithmetic, this needs to be larger.  The default for
+*          publicly released versions should be large enough to handle
+*          the worst machine around.  Note that this has no effect
+*          on accuracy of the solution.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE, TWO, HALF
+      PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0, TWO = 2.0D0,
+     $                   HALF = 1.0D0 / TWO )
+      DOUBLE PRECISION   FUDGE, RELFAC
+      PARAMETER          ( FUDGE = 2.0D0, RELFAC = 2.0D0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NCNVRG, TOOFEW
+      INTEGER            IB, IBEGIN, IDISCL, IDISCU, IE, IEND, IINFO,
+     $                   IM, IN, IOFF, IORDER, IOUT, IRANGE, ITMAX,
+     $                   ITMP1, IW, IWOFF, J, JB, JDISC, JE, NB, NWL,
+     $                   NWU
+      DOUBLE PRECISION   ATOLI, BNORM, GL, GU, PIVMIN, RTOLI, SAFEMN,
+     $                   TMP1, TMP2, TNORM, ULP, WKILL, WL, WLU, WU, WUL
+*     ..
+*     .. Local Arrays ..
+      INTEGER            IDUMMA( 1 )
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      DOUBLE PRECISION   DLAMCH
+      EXTERNAL           LSAME, ILAENV, DLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLAEBZ, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, INT, LOG, MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+*
+*     Decode RANGE
+*
+      IF( LSAME( RANGE, 'A' ) ) THEN
+         IRANGE = 1
+      ELSE IF( LSAME( RANGE, 'V' ) ) THEN
+         IRANGE = 2
+      ELSE IF( LSAME( RANGE, 'I' ) ) THEN
+         IRANGE = 3
+      ELSE
+         IRANGE = 0
+      END IF
+*
+*     Decode ORDER
+*
+      IF( LSAME( ORDER, 'B' ) ) THEN
+         IORDER = 2
+      ELSE IF( LSAME( ORDER, 'E' ) ) THEN
+         IORDER = 1
+      ELSE
+         IORDER = 0
+      END IF
+*
+*     Check for Errors
+*
+      IF( IRANGE.LE.0 ) THEN
+         INFO = -1
+      ELSE IF( IORDER.LE.0 ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( IRANGE.EQ.2 ) THEN
+         IF( VL.GE.VU )
+     $      INFO = -5
+      ELSE IF( IRANGE.EQ.3 .AND. ( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) )
+     $          THEN
+         INFO = -6
+      ELSE IF( IRANGE.EQ.3 .AND. ( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) )
+     $          THEN
+         INFO = -7
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DSTEBZ', -INFO )
+         RETURN
+      END IF
+*
+*     Initialize error flags
+*
+      INFO = 0
+      NCNVRG = .FALSE.
+      TOOFEW = .FALSE.
+*
+*     Quick return if possible
+*
+      M = 0
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Simplifications:
+*
+      IF( IRANGE.EQ.3 .AND. IL.EQ.1 .AND. IU.EQ.N )
+     $   IRANGE = 1
+*
+*     Get machine constants
+*     NB is the minimum vector length for vector bisection, or 0
+*     if only scalar is to be done.
+*
+      SAFEMN = DLAMCH( 'S' )
+      ULP = DLAMCH( 'P' )
+      RTOLI = ULP*RELFAC
+      NB = ILAENV( 1, 'DSTEBZ', ' ', N, -1, -1, -1 )
+      IF( NB.LE.1 )
+     $   NB = 0
+*
+*     Special Case when N=1
+*
+      IF( N.EQ.1 ) THEN
+         NSPLIT = 1
+         ISPLIT( 1 ) = 1
+         IF( IRANGE.EQ.2 .AND. ( VL.GE.D( 1 ) .OR. VU.LT.D( 1 ) ) ) THEN
+            M = 0
+         ELSE
+            W( 1 ) = D( 1 )
+            IBLOCK( 1 ) = 1
+            M = 1
+         END IF
+         RETURN
+      END IF
+*
+*     Compute Splitting Points
+*
+      NSPLIT = 1
+      WORK( N ) = ZERO
+      PIVMIN = ONE
+*
+*DIR$ NOVECTOR
+      DO 10 J = 2, N
+         TMP1 = E( J-1 )**2
+         IF( ABS( D( J )*D( J-1 ) )*ULP**2+SAFEMN.GT.TMP1 ) THEN
+            ISPLIT( NSPLIT ) = J - 1
+            NSPLIT = NSPLIT + 1
+            WORK( J-1 ) = ZERO
+         ELSE
+            WORK( J-1 ) = TMP1
+            PIVMIN = MAX( PIVMIN, TMP1 )
+         END IF
+   10 CONTINUE
+      ISPLIT( NSPLIT ) = N
+      PIVMIN = PIVMIN*SAFEMN
+*
+*     Compute Interval and ATOLI
+*
+      IF( IRANGE.EQ.3 ) THEN
+*
+*        RANGE='I': Compute the interval containing eigenvalues
+*                   IL through IU.
+*
+*        Compute Gershgorin interval for entire (split) matrix
+*        and use it as the initial interval
+*
+         GU = D( 1 )
+         GL = D( 1 )
+         TMP1 = ZERO
+*
+         DO 20 J = 1, N - 1
+            TMP2 = SQRT( WORK( J ) )
+            GU = MAX( GU, D( J )+TMP1+TMP2 )
+            GL = MIN( GL, D( J )-TMP1-TMP2 )
+            TMP1 = TMP2
+   20    CONTINUE
+*
+         GU = MAX( GU, D( N )+TMP1 )
+         GL = MIN( GL, D( N )-TMP1 )
+         TNORM = MAX( ABS( GL ), ABS( GU ) )
+         GL = GL - FUDGE*TNORM*ULP*N - FUDGE*TWO*PIVMIN
+         GU = GU + FUDGE*TNORM*ULP*N + FUDGE*PIVMIN
+*
+*        Compute Iteration parameters
+*
+         ITMAX = INT( ( LOG( TNORM+PIVMIN )-LOG( PIVMIN ) ) /
+     $           LOG( TWO ) ) + 2
+         IF( ABSTOL.LE.ZERO ) THEN
+            ATOLI = ULP*TNORM
+         ELSE
+            ATOLI = ABSTOL
+         END IF
+*
+         WORK( N+1 ) = GL
+         WORK( N+2 ) = GL
+         WORK( N+3 ) = GU
+         WORK( N+4 ) = GU
+         WORK( N+5 ) = GL
+         WORK( N+6 ) = GU
+         IWORK( 1 ) = -1
+         IWORK( 2 ) = -1
+         IWORK( 3 ) = N + 1
+         IWORK( 4 ) = N + 1
+         IWORK( 5 ) = IL - 1
+         IWORK( 6 ) = IU
+*
+         CALL DLAEBZ( 3, ITMAX, N, 2, 2, NB, ATOLI, RTOLI, PIVMIN, D, E,
+     $                WORK, IWORK( 5 ), WORK( N+1 ), WORK( N+5 ), IOUT,
+     $                IWORK, W, IBLOCK, IINFO )
+*
+         IF( IWORK( 6 ).EQ.IU ) THEN
+            WL = WORK( N+1 )
+            WLU = WORK( N+3 )
+            NWL = IWORK( 1 )
+            WU = WORK( N+4 )
+            WUL = WORK( N+2 )
+            NWU = IWORK( 4 )
+         ELSE
+            WL = WORK( N+2 )
+            WLU = WORK( N+4 )
+            NWL = IWORK( 2 )
+            WU = WORK( N+3 )
+            WUL = WORK( N+1 )
+            NWU = IWORK( 3 )
+         END IF
+*
+         IF( NWL.LT.0 .OR. NWL.GE.N .OR. NWU.LT.1 .OR. NWU.GT.N ) THEN
+            INFO = 4
+            RETURN
+         END IF
+      ELSE
+*
+*        RANGE='A' or 'V' -- Set ATOLI
+*
+         TNORM = MAX( ABS( D( 1 ) )+ABS( E( 1 ) ),
+     $           ABS( D( N ) )+ABS( E( N-1 ) ) )
+*
+         DO 30 J = 2, N - 1
+            TNORM = MAX( TNORM, ABS( D( J ) )+ABS( E( J-1 ) )+
+     $              ABS( E( J ) ) )
+   30    CONTINUE
+*
+         IF( ABSTOL.LE.ZERO ) THEN
+            ATOLI = ULP*TNORM
+         ELSE
+            ATOLI = ABSTOL
+         END IF
+*
+         IF( IRANGE.EQ.2 ) THEN
+            WL = VL
+            WU = VU
+         ELSE
+            WL = ZERO
+            WU = ZERO
+         END IF
+      END IF
+*
+*     Find Eigenvalues -- Loop Over Blocks and recompute NWL and NWU.
+*     NWL accumulates the number of eigenvalues .le. WL,
+*     NWU accumulates the number of eigenvalues .le. WU
+*
+      M = 0
+      IEND = 0
+      INFO = 0
+      NWL = 0
+      NWU = 0
+*
+      DO 70 JB = 1, NSPLIT
+         IOFF = IEND
+         IBEGIN = IOFF + 1
+         IEND = ISPLIT( JB )
+         IN = IEND - IOFF
+*
+         IF( IN.EQ.1 ) THEN
+*
+*           Special Case -- IN=1
+*
+            IF( IRANGE.EQ.1 .OR. WL.GE.D( IBEGIN )-PIVMIN )
+     $         NWL = NWL + 1
+            IF( IRANGE.EQ.1 .OR. WU.GE.D( IBEGIN )-PIVMIN )
+     $         NWU = NWU + 1
+            IF( IRANGE.EQ.1 .OR. ( WL.LT.D( IBEGIN )-PIVMIN .AND. WU.GE.
+     $          D( IBEGIN )-PIVMIN ) ) THEN
+               M = M + 1
+               W( M ) = D( IBEGIN )
+               IBLOCK( M ) = JB
+            END IF
+         ELSE
+*
+*           General Case -- IN > 1
+*
+*           Compute Gershgorin Interval
+*           and use it as the initial interval
+*
+            GU = D( IBEGIN )
+            GL = D( IBEGIN )
+            TMP1 = ZERO
+*
+            DO 40 J = IBEGIN, IEND - 1
+               TMP2 = ABS( E( J ) )
+               GU = MAX( GU, D( J )+TMP1+TMP2 )
+               GL = MIN( GL, D( J )-TMP1-TMP2 )
+               TMP1 = TMP2
+   40       CONTINUE
+*
+            GU = MAX( GU, D( IEND )+TMP1 )
+            GL = MIN( GL, D( IEND )-TMP1 )
+            BNORM = MAX( ABS( GL ), ABS( GU ) )
+            GL = GL - FUDGE*BNORM*ULP*IN - FUDGE*PIVMIN
+            GU = GU + FUDGE*BNORM*ULP*IN + FUDGE*PIVMIN
+*
+*           Compute ATOLI for the current submatrix
+*
+            IF( ABSTOL.LE.ZERO ) THEN
+               ATOLI = ULP*MAX( ABS( GL ), ABS( GU ) )
+            ELSE
+               ATOLI = ABSTOL
+            END IF
+*
+            IF( IRANGE.GT.1 ) THEN
+               IF( GU.LT.WL ) THEN
+                  NWL = NWL + IN
+                  NWU = NWU + IN
+                  GO TO 70
+               END IF
+               GL = MAX( GL, WL )
+               GU = MIN( GU, WU )
+               IF( GL.GE.GU )
+     $            GO TO 70
+            END IF
+*
+*           Set Up Initial Interval
+*
+            WORK( N+1 ) = GL
+            WORK( N+IN+1 ) = GU
+            CALL DLAEBZ( 1, 0, IN, IN, 1, NB, ATOLI, RTOLI, PIVMIN,
+     $                   D( IBEGIN ), E( IBEGIN ), WORK( IBEGIN ),
+     $                   IDUMMA, WORK( N+1 ), WORK( N+2*IN+1 ), IM,
+     $                   IWORK, W( M+1 ), IBLOCK( M+1 ), IINFO )
+*
+            NWL = NWL + IWORK( 1 )
+            NWU = NWU + IWORK( IN+1 )
+            IWOFF = M - IWORK( 1 )
+*
+*           Compute Eigenvalues
+*
+            ITMAX = INT( ( LOG( GU-GL+PIVMIN )-LOG( PIVMIN ) ) /
+     $              LOG( TWO ) ) + 2
+            CALL DLAEBZ( 2, ITMAX, IN, IN, 1, NB, ATOLI, RTOLI, PIVMIN,
+     $                   D( IBEGIN ), E( IBEGIN ), WORK( IBEGIN ),
+     $                   IDUMMA, WORK( N+1 ), WORK( N+2*IN+1 ), IOUT,
+     $                   IWORK, W( M+1 ), IBLOCK( M+1 ), IINFO )
+*
+*           Copy Eigenvalues Into W and IBLOCK
+*           Use -JB for block number for unconverged eigenvalues.
+*
+            DO 60 J = 1, IOUT
+               TMP1 = HALF*( WORK( J+N )+WORK( J+IN+N ) )
+*
+*              Flag non-convergence.
+*
+               IF( J.GT.IOUT-IINFO ) THEN
+                  NCNVRG = .TRUE.
+                  IB = -JB
+               ELSE
+                  IB = JB
+               END IF
+               DO 50 JE = IWORK( J ) + 1 + IWOFF,
+     $                 IWORK( J+IN ) + IWOFF
+                  W( JE ) = TMP1
+                  IBLOCK( JE ) = IB
+   50          CONTINUE
+   60       CONTINUE
+*
+            M = M + IM
+         END IF
+   70 CONTINUE
+*
+*     If RANGE='I', then (WL,WU) contains eigenvalues NWL+1,...,NWU
+*     If NWL+1 < IL or NWU > IU, discard extra eigenvalues.
+*
+      IF( IRANGE.EQ.3 ) THEN
+         IM = 0
+         IDISCL = IL - 1 - NWL
+         IDISCU = NWU - IU
+*
+         IF( IDISCL.GT.0 .OR. IDISCU.GT.0 ) THEN
+            DO 80 JE = 1, M
+               IF( W( JE ).LE.WLU .AND. IDISCL.GT.0 ) THEN
+                  IDISCL = IDISCL - 1
+               ELSE IF( W( JE ).GE.WUL .AND. IDISCU.GT.0 ) THEN
+                  IDISCU = IDISCU - 1
+               ELSE
+                  IM = IM + 1
+                  W( IM ) = W( JE )
+                  IBLOCK( IM ) = IBLOCK( JE )
+               END IF
+   80       CONTINUE
+            M = IM
+         END IF
+         IF( IDISCL.GT.0 .OR. IDISCU.GT.0 ) THEN
+*
+*           Code to deal with effects of bad arithmetic:
+*           Some low eigenvalues to be discarded are not in (WL,WLU],
+*           or high eigenvalues to be discarded are not in (WUL,WU]
+*           so just kill off the smallest IDISCL/largest IDISCU
+*           eigenvalues, by simply finding the smallest/largest
+*           eigenvalue(s).
+*
+*           (If N(w) is monotone non-decreasing, this should never
+*               happen.)
+*
+            IF( IDISCL.GT.0 ) THEN
+               WKILL = WU
+               DO 100 JDISC = 1, IDISCL
+                  IW = 0
+                  DO 90 JE = 1, M
+                     IF( IBLOCK( JE ).NE.0 .AND.
+     $                   ( W( JE ).LT.WKILL .OR. IW.EQ.0 ) ) THEN
+                        IW = JE
+                        WKILL = W( JE )
+                     END IF
+   90             CONTINUE
+                  IBLOCK( IW ) = 0
+  100          CONTINUE
+            END IF
+            IF( IDISCU.GT.0 ) THEN
+*
+               WKILL = WL
+               DO 120 JDISC = 1, IDISCU
+                  IW = 0
+                  DO 110 JE = 1, M
+                     IF( IBLOCK( JE ).NE.0 .AND.
+     $                   ( W( JE ).GT.WKILL .OR. IW.EQ.0 ) ) THEN
+                        IW = JE
+                        WKILL = W( JE )
+                     END IF
+  110             CONTINUE
+                  IBLOCK( IW ) = 0
+  120          CONTINUE
+            END IF
+            IM = 0
+            DO 130 JE = 1, M
+               IF( IBLOCK( JE ).NE.0 ) THEN
+                  IM = IM + 1
+                  W( IM ) = W( JE )
+                  IBLOCK( IM ) = IBLOCK( JE )
+               END IF
+  130       CONTINUE
+            M = IM
+         END IF
+         IF( IDISCL.LT.0 .OR. IDISCU.LT.0 ) THEN
+            TOOFEW = .TRUE.
+         END IF
+      END IF
+*
+*     If ORDER='B', do nothing -- the eigenvalues are already sorted
+*        by block.
+*     If ORDER='E', sort the eigenvalues from smallest to largest
+*
+      IF( IORDER.EQ.1 .AND. NSPLIT.GT.1 ) THEN
+         DO 150 JE = 1, M - 1
+            IE = 0
+            TMP1 = W( JE )
+            DO 140 J = JE + 1, M
+               IF( W( J ).LT.TMP1 ) THEN
+                  IE = J
+                  TMP1 = W( J )
+               END IF
+  140       CONTINUE
+*
+            IF( IE.NE.0 ) THEN
+               ITMP1 = IBLOCK( IE )
+               W( IE ) = W( JE )
+               IBLOCK( IE ) = IBLOCK( JE )
+               W( JE ) = TMP1
+               IBLOCK( JE ) = ITMP1
+            END IF
+  150    CONTINUE
+      END IF
+*
+      INFO = 0
+      IF( NCNVRG )
+     $   INFO = INFO + 1
+      IF( TOOFEW )
+     $   INFO = INFO + 2
+      RETURN
+*
+*     End of DSTEBZ
+*
+      END
diff --git a/costa/native/external/lapack/dstedc.f b/costa/native/external/lapack/dstedc.f
new file mode 100644
index 000000000..e83f640ad
--- /dev/null
+++ b/costa/native/external/lapack/dstedc.f
@@ -0,0 +1,397 @@
+      SUBROUTINE DSTEDC( COMPZ, N, D, E, Z, LDZ, WORK, LWORK, IWORK,
+     $                   LIWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          COMPZ
+      INTEGER            INFO, LDZ, LIWORK, LWORK, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      DOUBLE PRECISION   D( * ), E( * ), WORK( * ), Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DSTEDC computes all eigenvalues and, optionally, eigenvectors of a
+*  symmetric tridiagonal matrix using the divide and conquer method.
+*  The eigenvectors of a full or band real symmetric matrix can also be
+*  found if DSYTRD or DSPTRD or DSBTRD has been used to reduce this
+*  matrix to tridiagonal form.
+*
+*  This code makes very mild assumptions about floating point
+*  arithmetic. It will work on machines with a guard digit in
+*  add/subtract, or on those binary machines without guard digits
+*  which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2.
+*  It could conceivably fail on hexadecimal or decimal machines
+*  without guard digits, but we know of none.  See DLAED3 for details.
+*
+*  Arguments
+*  =========
+*
+*  COMPZ   (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only.
+*          = 'I':  Compute eigenvectors of tridiagonal matrix also.
+*          = 'V':  Compute eigenvectors of original dense symmetric
+*                  matrix also.  On entry, Z contains the orthogonal
+*                  matrix used to reduce the original matrix to
+*                  tridiagonal form.
+*
+*  N       (input) INTEGER
+*          The dimension of the symmetric tridiagonal matrix.  N >= 0.
+*
+*  D       (input/output) DOUBLE PRECISION array, dimension (N)
+*          On entry, the diagonal elements of the tridiagonal matrix.
+*          On exit, if INFO = 0, the eigenvalues in ascending order.
+*
+*  E       (input/output) DOUBLE PRECISION array, dimension (N-1)
+*          On entry, the subdiagonal elements of the tridiagonal matrix.
+*          On exit, E has been destroyed.
+*
+*  Z       (input/output) DOUBLE PRECISION array, dimension (LDZ,N)
+*          On entry, if COMPZ = 'V', then Z contains the orthogonal
+*          matrix used in the reduction to tridiagonal form.
+*          On exit, if INFO = 0, then if COMPZ = 'V', Z contains the
+*          orthonormal eigenvectors of the original symmetric matrix,
+*          and if COMPZ = 'I', Z contains the orthonormal eigenvectors
+*          of the symmetric tridiagonal matrix.
+*          If  COMPZ = 'N', then Z is not referenced.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= 1.
+*          If eigenvectors are desired, then LDZ >= max(1,N).
+*
+*  WORK    (workspace/output) DOUBLE PRECISION array,
+*                                         dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.
+*          If COMPZ = 'N' or N <= 1 then LWORK must be at least 1.
+*          If COMPZ = 'V' and N > 1 then LWORK must be at least
+*                         ( 1 + 3*N + 2*N*lg N + 3*N**2 ),
+*                         where lg( N ) = smallest integer k such
+*                         that 2**k >= N.
+*          If COMPZ = 'I' and N > 1 then LWORK must be at least
+*                         ( 1 + 4*N + N**2 ).
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  IWORK   (workspace/output) INTEGER array, dimension (LIWORK)
+*          On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK.
+*
+*  LIWORK  (input) INTEGER
+*          The dimension of the array IWORK.
+*          If COMPZ = 'N' or N <= 1 then LIWORK must be at least 1.
+*          If COMPZ = 'V' and N > 1 then LIWORK must be at least
+*                         ( 6 + 6*N + 5*N*lg N ).
+*          If COMPZ = 'I' and N > 1 then LIWORK must be at least
+*                         ( 3 + 5*N ).
+*
+*          If LIWORK = -1, then a workspace query is assumed; the
+*          routine only calculates the optimal size of the IWORK array,
+*          returns this value as the first entry of the IWORK array, and
+*          no error message related to LIWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          > 0:  The algorithm failed to compute an eigenvalue while
+*                working on the submatrix lying in rows and columns
+*                INFO/(N+1) through mod(INFO,N+1).
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Jeff Rutter, Computer Science Division, University of California
+*     at Berkeley, USA
+*  Modified by Francoise Tisseur, University of Tennessee.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE, TWO
+      PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0, TWO = 2.0D0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            DTRTRW, END, I, ICOMPZ, II, J, K, LGN, LIWMIN,
+     $                   LWMIN, M, SMLSIZ, START, STOREZ
+      DOUBLE PRECISION   EPS, ORGNRM, P, TINY
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      DOUBLE PRECISION   DLAMCH, DLANST
+      EXTERNAL           LSAME, ILAENV, DLAMCH, DLANST
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DGEMM, DLACPY, DLAED0, DLASCL, DLASET, DLASRT,
+     $                   DSTEQR, DSTERF, DSWAP, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, INT, LOG, MAX, MOD, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 )
+*
+      IF( LSAME( COMPZ, 'N' ) ) THEN
+         ICOMPZ = 0
+      ELSE IF( LSAME( COMPZ, 'V' ) ) THEN
+         ICOMPZ = 1
+      ELSE IF( LSAME( COMPZ, 'I' ) ) THEN
+         ICOMPZ = 2
+      ELSE
+         ICOMPZ = -1
+      END IF
+      IF( N.LE.1 .OR. ICOMPZ.LE.0 ) THEN
+         LIWMIN = 1
+         LWMIN = 1
+      ELSE
+         LGN = INT( LOG( DBLE( N ) ) / LOG( TWO ) )
+         IF( 2**LGN.LT.N )
+     $      LGN = LGN + 1
+         IF( 2**LGN.LT.N )
+     $      LGN = LGN + 1
+         IF( ICOMPZ.EQ.1 ) THEN
+            LWMIN = 1 + 3*N + 2*N*LGN + 3*N**2
+            LIWMIN = 6 + 6*N + 5*N*LGN
+         ELSE IF( ICOMPZ.EQ.2 ) THEN
+            LWMIN = 1 + 4*N + N**2
+            LIWMIN = 3 + 5*N
+         END IF
+      END IF
+      IF( ICOMPZ.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( ( LDZ.LT.1 ) .OR. ( ICOMPZ.GT.0 .AND. LDZ.LT.MAX( 1,
+     $         N ) ) ) THEN
+         INFO = -6
+      ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -8
+      ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -10
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         WORK( 1 ) = LWMIN
+         IWORK( 1 ) = LIWMIN
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DSTEDC', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+      IF( N.EQ.1 ) THEN
+         IF( ICOMPZ.NE.0 )
+     $      Z( 1, 1 ) = ONE
+         RETURN
+      END IF
+*
+      SMLSIZ = ILAENV( 9, 'DSTEDC', ' ', 0, 0, 0, 0 )
+*
+*     If the following conditional clause is removed, then the routine
+*     will use the Divide and Conquer routine to compute only the
+*     eigenvalues, which requires (3N + 3N**2) real workspace and
+*     (2 + 5N + 2N lg(N)) integer workspace.
+*     Since on many architectures DSTERF is much faster than any other
+*     algorithm for finding eigenvalues only, it is used here
+*     as the default.
+*
+*     If COMPZ = 'N', use DSTERF to compute the eigenvalues.
+*
+      IF( ICOMPZ.EQ.0 ) THEN
+         CALL DSTERF( N, D, E, INFO )
+         RETURN
+      END IF
+*
+*     If N is smaller than the minimum divide size (SMLSIZ+1), then
+*     solve the problem with another solver.
+*
+      IF( N.LE.SMLSIZ ) THEN
+         IF( ICOMPZ.EQ.0 ) THEN
+            CALL DSTERF( N, D, E, INFO )
+            RETURN
+         ELSE IF( ICOMPZ.EQ.2 ) THEN
+            CALL DSTEQR( 'I', N, D, E, Z, LDZ, WORK, INFO )
+            RETURN
+         ELSE
+            CALL DSTEQR( 'V', N, D, E, Z, LDZ, WORK, INFO )
+            RETURN
+         END IF
+      END IF
+*
+*     If COMPZ = 'V', the Z matrix must be stored elsewhere for later
+*     use.
+*
+      IF( ICOMPZ.EQ.1 ) THEN
+         STOREZ = 1 + N*N
+      ELSE
+         STOREZ = 1
+      END IF
+*
+      IF( ICOMPZ.EQ.2 ) THEN
+         CALL DLASET( 'Full', N, N, ZERO, ONE, Z, LDZ )
+      END IF
+*
+*     Scale.
+*
+      ORGNRM = DLANST( 'M', N, D, E )
+      IF( ORGNRM.EQ.ZERO )
+     $   RETURN
+*
+      EPS = DLAMCH( 'Epsilon' )
+*
+      START = 1
+*
+*     while ( START <= N )
+*
+   10 CONTINUE
+      IF( START.LE.N ) THEN
+*
+*     Let END be the position of the next subdiagonal entry such that
+*     E( END ) <= TINY or END = N if no such subdiagonal exists.  The
+*     matrix identified by the elements between START and END
+*     constitutes an independent sub-problem.
+*
+         END = START
+   20    CONTINUE
+         IF( END.LT.N ) THEN
+            TINY = EPS*SQRT( ABS( D( END ) ) )*SQRT( ABS( D( END+1 ) ) )
+            IF( ABS( E( END ) ).GT.TINY ) THEN
+               END = END + 1
+               GO TO 20
+            END IF
+         END IF
+*
+*        (Sub) Problem determined.  Compute its size and solve it.
+*
+         M = END - START + 1
+         IF( M.EQ.1 ) THEN
+            START = END + 1
+            GO TO 10
+         END IF
+         IF( M.GT.SMLSIZ ) THEN
+            INFO = SMLSIZ
+*
+*           Scale.
+*
+            ORGNRM = DLANST( 'M', M, D( START ), E( START ) )
+            CALL DLASCL( 'G', 0, 0, ORGNRM, ONE, M, 1, D( START ), M,
+     $                   INFO )
+            CALL DLASCL( 'G', 0, 0, ORGNRM, ONE, M-1, 1, E( START ),
+     $                   M-1, INFO )
+*
+            IF( ICOMPZ.EQ.1 ) THEN
+               DTRTRW = 1
+            ELSE
+               DTRTRW = START
+            END IF
+            CALL DLAED0( ICOMPZ, N, M, D( START ), E( START ),
+     $                   Z( DTRTRW, START ), LDZ, WORK( 1 ), N,
+     $                   WORK( STOREZ ), IWORK, INFO )
+            IF( INFO.NE.0 ) THEN
+               INFO = ( INFO / ( M+1 )+START-1 )*( N+1 ) +
+     $                MOD( INFO, ( M+1 ) ) + START - 1
+               RETURN
+            END IF
+*
+*           Scale back.
+*
+            CALL DLASCL( 'G', 0, 0, ONE, ORGNRM, M, 1, D( START ), M,
+     $                   INFO )
+*
+         ELSE
+            IF( ICOMPZ.EQ.1 ) THEN
+*
+*     Since QR won't update a Z matrix which is larger than the
+*     length of D, we must solve the sub-problem in a workspace and
+*     then multiply back into Z.
+*
+               CALL DSTEQR( 'I', M, D( START ), E( START ), WORK, M,
+     $                      WORK( M*M+1 ), INFO )
+               CALL DLACPY( 'A', N, M, Z( 1, START ), LDZ,
+     $                      WORK( STOREZ ), N )
+               CALL DGEMM( 'N', 'N', N, M, M, ONE, WORK( STOREZ ), LDZ,
+     $                     WORK, M, ZERO, Z( 1, START ), LDZ )
+            ELSE IF( ICOMPZ.EQ.2 ) THEN
+               CALL DSTEQR( 'I', M, D( START ), E( START ),
+     $                      Z( START, START ), LDZ, WORK, INFO )
+            ELSE
+               CALL DSTERF( M, D( START ), E( START ), INFO )
+            END IF
+            IF( INFO.NE.0 ) THEN
+               INFO = START*( N+1 ) + END
+               RETURN
+            END IF
+         END IF
+*
+         START = END + 1
+         GO TO 10
+      END IF
+*
+*     endwhile
+*
+*     If the problem split any number of times, then the eigenvalues
+*     will not be properly ordered.  Here we permute the eigenvalues
+*     (and the associated eigenvectors) into ascending order.
+*
+      IF( M.NE.N ) THEN
+         IF( ICOMPZ.EQ.0 ) THEN
+*
+*        Use Quick Sort
+*
+            CALL DLASRT( 'I', N, D, INFO )
+*
+         ELSE
+*
+*        Use Selection Sort to minimize swaps of eigenvectors
+*
+            DO 40 II = 2, N
+               I = II - 1
+               K = I
+               P = D( I )
+               DO 30 J = II, N
+                  IF( D( J ).LT.P ) THEN
+                     K = J
+                     P = D( J )
+                  END IF
+   30          CONTINUE
+               IF( K.NE.I ) THEN
+                  D( K ) = D( I )
+                  D( I ) = P
+                  CALL DSWAP( N, Z( 1, I ), 1, Z( 1, K ), 1 )
+               END IF
+   40       CONTINUE
+         END IF
+      END IF
+*
+      WORK( 1 ) = LWMIN
+      IWORK( 1 ) = LIWMIN
+*
+      RETURN
+*
+*     End of DSTEDC
+*
+      END
diff --git a/costa/native/external/lapack/dstegr.f b/costa/native/external/lapack/dstegr.f
new file mode 100644
index 000000000..2f2cd78b5
--- /dev/null
+++ b/costa/native/external/lapack/dstegr.f
@@ -0,0 +1,400 @@
+      SUBROUTINE DSTEGR( JOBZ, RANGE, N, D, E, VL, VU, IL, IU, ABSTOL,
+     $                   M, W, Z, LDZ, ISUPPZ, WORK, LWORK, IWORK,
+     $                   LIWORK, INFO )
+*
+*  -- LAPACK computational routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBZ, RANGE
+      INTEGER            IL, INFO, IU, LDZ, LIWORK, LWORK, M, N
+      DOUBLE PRECISION   ABSTOL, VL, VU
+*     ..
+*     .. Array Arguments ..
+      INTEGER            ISUPPZ( * ), IWORK( * )
+      DOUBLE PRECISION   D( * ), E( * ), W( * ), WORK( * ), Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+* DSTEGR computes selected eigenvalues and, optionally, eigenvectors
+* of a real symmetric tridiagonal matrix T.  Eigenvalues and
+* eigenvectors can be selected by specifying either a range of values
+* or a range of indices for the desired eigenvalues. The eigenvalues
+* are computed by the dqds algorithm, while orthogonal eigenvectors are
+* computed from various ``good'' L D L^T representations (also known as
+* Relatively Robust Representations). Gram-Schmidt orthogonalization is
+* avoided as far as possible. More specifically, the various steps of
+* the algorithm are as follows. For the i-th unreduced block of T,
+*     (a) Compute T - sigma_i = L_i D_i L_i^T, such that L_i D_i L_i^T
+*         is a relatively robust representation,
+*     (b) Compute the eigenvalues, lambda_j, of L_i D_i L_i^T to high
+*         relative accuracy by the dqds algorithm,
+*     (c) If there is a cluster of close eigenvalues, "choose" sigma_i
+*         close to the cluster, and go to step (a),
+*     (d) Given the approximate eigenvalue lambda_j of L_i D_i L_i^T,
+*         compute the corresponding eigenvector by forming a
+*         rank-revealing twisted factorization.
+*  The desired accuracy of the output can be specified by the input
+*  parameter ABSTOL.
+*
+*  For more details, see "A new O(n^2) algorithm for the symmetric
+*  tridiagonal eigenvalue/eigenvector problem", by Inderjit Dhillon,
+*  Computer Science Division Technical Report No. UCB/CSD-97-971,
+*  UC Berkeley, May 1997.
+*
+*  Note 1 : Currently DSTEGR is only set up to find ALL the n
+*  eigenvalues and eigenvectors of T in O(n^2) time
+*  Note 2 : Currently the routine DSTEIN is called when an appropriate
+*  sigma_i cannot be chosen in step (c) above. DSTEIN invokes modified
+*  Gram-Schmidt when eigenvalues are close.
+*  Note 3 : DSTEGR works only on machines which follow ieee-754
+*  floating-point standard in their handling of infinities and NaNs.
+*  Normal execution of DSTEGR may create NaNs and infinities and hence
+*  may abort due to a floating point exception in environments which
+*  do not conform to the ieee standard.
+*
+*  Arguments
+*  =========
+*
+*  JOBZ    (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only;
+*          = 'V':  Compute eigenvalues and eigenvectors.
+*
+*  RANGE   (input) CHARACTER*1
+*          = 'A': all eigenvalues will be found.
+*          = 'V': all eigenvalues in the half-open interval (VL,VU]
+*                 will be found.
+*          = 'I': the IL-th through IU-th eigenvalues will be found.
+********** Only RANGE = 'A' is currently supported *********************
+*
+*  N       (input) INTEGER
+*          The order of the matrix.  N >= 0.
+*
+*  D       (input/output) DOUBLE PRECISION array, dimension (N)
+*          On entry, the n diagonal elements of the tridiagonal matrix
+*          T. On exit, D is overwritten.
+*
+*  E       (input/output) DOUBLE PRECISION array, dimension (N)
+*          On entry, the (n-1) subdiagonal elements of the tridiagonal
+*          matrix T in elements 1 to N-1 of E; E(N) need not be set.
+*          On exit, E is overwritten.
+*
+*  VL      (input) DOUBLE PRECISION
+*  VU      (input) DOUBLE PRECISION
+*          If RANGE='V', the lower and upper bounds of the interval to
+*          be searched for eigenvalues. VL < VU.
+*          Not referenced if RANGE = 'A' or 'I'.
+*
+*  IL      (input) INTEGER
+*  IU      (input) INTEGER
+*          If RANGE='I', the indices (in ascending order) of the
+*          smallest and largest eigenvalues to be returned.
+*          1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0.
+*          Not referenced if RANGE = 'A' or 'V'.
+*
+*  ABSTOL  (input) DOUBLE PRECISION
+*          The absolute error tolerance for the
+*          eigenvalues/eigenvectors. IF JOBZ = 'V', the eigenvalues and
+*          eigenvectors output have residual norms bounded by ABSTOL,
+*          and the dot products between different eigenvectors are
+*          bounded by ABSTOL. If ABSTOL is less than N*EPS*|T|, then
+*          N*EPS*|T| will be used in its place, where EPS is the
+*          machine precision and |T| is the 1-norm of the tridiagonal
+*          matrix. The eigenvalues are computed to an accuracy of
+*          EPS*|T| irrespective of ABSTOL. If high relative accuracy
+*          is important, set ABSTOL to DLAMCH( 'Safe minimum' ).
+*          See Barlow and Demmel "Computing Accurate Eigensystems of
+*          Scaled Diagonally Dominant Matrices", LAPACK Working Note #7
+*          for a discussion of which matrices define their eigenvalues
+*          to high relative accuracy.
+*
+*  M       (output) INTEGER
+*          The total number of eigenvalues found.  0 <= M <= N.
+*          If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1.
+*
+*  W       (output) DOUBLE PRECISION array, dimension (N)
+*          The first M elements contain the selected eigenvalues in
+*          ascending order.
+*
+*  Z       (output) DOUBLE PRECISION array, dimension (LDZ, max(1,M) )
+*          If JOBZ = 'V', then if INFO = 0, the first M columns of Z
+*          contain the orthonormal eigenvectors of the matrix T
+*          corresponding to the selected eigenvalues, with the i-th
+*          column of Z holding the eigenvector associated with W(i).
+*          If JOBZ = 'N', then Z is not referenced.
+*          Note: the user must ensure that at least max(1,M) columns are
+*          supplied in the array Z; if RANGE = 'V', the exact value of M
+*          is not known in advance and an upper bound must be used.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= 1, and if
+*          JOBZ = 'V', LDZ >= max(1,N).
+*
+*  ISUPPZ  (output) INTEGER ARRAY, dimension ( 2*max(1,M) )
+*          The support of the eigenvectors in Z, i.e., the indices
+*          indicating the nonzero elements in Z. The i-th eigenvector
+*          is nonzero only in elements ISUPPZ( 2*i-1 ) through
+*          ISUPPZ( 2*i ).
+*
+*  WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal
+*          (and minimal) LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.  LWORK >= max(1,18*N)
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  IWORK   (workspace/output) INTEGER array, dimension (LIWORK)
+*          On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK.
+*
+*  LIWORK  (input) INTEGER
+*          The dimension of the array IWORK.  LIWORK >= max(1,10*N)
+*
+*          If LIWORK = -1, then a workspace query is assumed; the
+*          routine only calculates the optimal size of the IWORK array,
+*          returns this value as the first entry of the IWORK array, and
+*          no error message related to LIWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = 1, internal error in DLARRE,
+*                if INFO = 2, internal error in DLARRV.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Inderjit Dhillon, IBM Almaden, USA
+*     Osni Marques, LBNL/NERSC, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            ALLEIG, INDEIG, LQUERY, VALEIG, WANTZ
+      INTEGER            I, IBEGIN, IEND, IINDBL, IINDWK, IINFO, IINSPL,
+     $                   INDGRS, INDWOF, INDWRK, ITMP, J, JJ, LIWMIN,
+     $                   LWMIN, NSPLIT
+      DOUBLE PRECISION   BIGNUM, EPS, RMAX, RMIN, SAFMIN, SCALE, SMLNUM,
+     $                   THRESH, TMP, TNRM, TOL
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DLAMCH, DLANST
+      EXTERNAL           LSAME, DLAMCH, DLANST
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLARRE, DLARRV, DLASET, DSCAL, DSWAP, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          DBLE, MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      WANTZ = LSAME( JOBZ, 'V' )
+      ALLEIG = LSAME( RANGE, 'A' )
+      VALEIG = LSAME( RANGE, 'V' )
+      INDEIG = LSAME( RANGE, 'I' )
+*
+      LQUERY = ( ( LWORK.EQ.-1 ) .OR. ( LIWORK.EQ.-1 ) )
+      LWMIN = 18*N
+      LIWMIN = 10*N
+*
+      INFO = 0
+      IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN
+         INFO = -2
+*
+*     The following two lines need to be removed once the
+*     RANGE = 'V' and RANGE = 'I' options are provided.
+*
+      ELSE IF( VALEIG .OR. INDEIG ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( VALEIG .AND. N.GT.0 .AND. VU.LE.VL ) THEN
+         INFO = -7
+      ELSE IF( INDEIG .AND. IL.LT.1 ) THEN
+         INFO = -8
+*     The following change should be made in DSTEVX also, otherwise
+*     IL can be specified as N+1 and IU as N.
+*     ELSE IF( INDEIG .AND. ( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) ) THEN
+      ELSE IF( INDEIG .AND. ( IU.LT.IL .OR. IU.GT.N ) ) THEN
+         INFO = -9
+      ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN
+         INFO = -14
+      ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -17
+      ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -19
+      END IF
+      IF( INFO.EQ.0 ) THEN
+         WORK( 1 ) = LWMIN
+         IWORK( 1 ) = LIWMIN
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DSTEGR', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      M = 0
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      IF( N.EQ.1 ) THEN
+         IF( ALLEIG .OR. INDEIG ) THEN
+            M = 1
+            W( 1 ) = D( 1 )
+         ELSE
+            IF( VL.LT.D( 1 ) .AND. VU.GE.D( 1 ) ) THEN
+               M = 1
+               W( 1 ) = D( 1 )
+            END IF
+         END IF
+         IF( WANTZ )
+     $      Z( 1, 1 ) = ONE
+         RETURN
+      END IF
+*
+*     Get machine constants.
+*
+      SAFMIN = DLAMCH( 'Safe minimum' )
+      EPS = DLAMCH( 'Precision' )
+      SMLNUM = SAFMIN / EPS
+      BIGNUM = ONE / SMLNUM
+      RMIN = SQRT( SMLNUM )
+      RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) )
+*
+*     Scale matrix to allowable range, if necessary.
+*
+      SCALE = ONE
+      TNRM = DLANST( 'M', N, D, E )
+      IF( TNRM.GT.ZERO .AND. TNRM.LT.RMIN ) THEN
+         SCALE = RMIN / TNRM
+      ELSE IF( TNRM.GT.RMAX ) THEN
+         SCALE = RMAX / TNRM
+      END IF
+      IF( SCALE.NE.ONE ) THEN
+         CALL DSCAL( N, SCALE, D, 1 )
+         CALL DSCAL( N-1, SCALE, E, 1 )
+         TNRM = TNRM*SCALE
+      END IF
+      INDGRS = 1
+      INDWOF = 2*N + 1
+      INDWRK = 3*N + 1
+*
+      IINSPL = 1
+      IINDBL = N + 1
+      IINDWK = 2*N + 1
+*
+      CALL DLASET( 'Full', N, N, ZERO, ZERO, Z, LDZ )
+*
+*     Compute the desired eigenvalues of the tridiagonal after splitting
+*     into smaller subblocks if the corresponding of-diagonal elements
+*     are small
+*
+      THRESH = EPS*TNRM
+      CALL DLARRE( N, D, E, THRESH, NSPLIT, IWORK( IINSPL ), M, W,
+     $             WORK( INDWOF ), WORK( INDGRS ), WORK( INDWRK ),
+     $             IINFO )
+      IF( IINFO.NE.0 ) THEN
+         INFO = 1
+         RETURN
+      END IF
+*
+      IF( WANTZ ) THEN
+*
+*        Compute the desired eigenvectors corresponding to the computed
+*        eigenvalues
+*
+         TOL = MAX( ABSTOL, DBLE( N )*THRESH )
+         IBEGIN = 1
+         DO 20 I = 1, NSPLIT
+            IEND = IWORK( IINSPL+I-1 )
+            DO 10 J = IBEGIN, IEND
+               IWORK( IINDBL+J-1 ) = I
+   10       CONTINUE
+            IBEGIN = IEND + 1
+   20    CONTINUE
+*
+         CALL DLARRV( N, D, E, IWORK( IINSPL ), M, W, IWORK( IINDBL ),
+     $                WORK( INDGRS ), TOL, Z, LDZ, ISUPPZ,
+     $                WORK( INDWRK ), IWORK( IINDWK ), IINFO )
+         IF( IINFO.NE.0 ) THEN
+            INFO = 2
+            RETURN
+         END IF
+*
+      END IF
+*
+      IBEGIN = 1
+      DO 40 I = 1, NSPLIT
+         IEND = IWORK( IINSPL+I-1 )
+         DO 30 J = IBEGIN, IEND
+            W( J ) = W( J ) + WORK( INDWOF+I-1 )
+   30    CONTINUE
+         IBEGIN = IEND + 1
+   40 CONTINUE
+*
+*     If matrix was scaled, then rescale eigenvalues appropriately.
+*
+      IF( SCALE.NE.ONE ) THEN
+         CALL DSCAL( M, ONE / SCALE, W, 1 )
+      END IF
+*
+*     If eigenvalues are not in order, then sort them, along with
+*     eigenvectors.
+*
+      IF( NSPLIT.GT.1 ) THEN
+         DO 60 J = 1, M - 1
+            I = 0
+            TMP = W( J )
+            DO 50 JJ = J + 1, M
+               IF( W( JJ ).LT.TMP ) THEN
+                  I = JJ
+                  TMP = W( JJ )
+               END IF
+   50       CONTINUE
+            IF( I.NE.0 ) THEN
+               W( I ) = W( J )
+               W( J ) = TMP
+               IF( WANTZ ) THEN
+                  CALL DSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 )
+                  ITMP = ISUPPZ( 2*I-1 )
+                  ISUPPZ( 2*I-1 ) = ISUPPZ( 2*J-1 )
+                  ISUPPZ( 2*J-1 ) = ITMP
+                  ITMP = ISUPPZ( 2*I )
+                  ISUPPZ( 2*I ) = ISUPPZ( 2*J )
+                  ISUPPZ( 2*J ) = ITMP
+               END IF
+            END IF
+   60    CONTINUE
+      END IF
+*
+      WORK( 1 ) = LWMIN
+      IWORK( 1 ) = LIWMIN
+      RETURN
+*
+*     End of DSTEGR
+*
+      END
diff --git a/costa/native/external/lapack/dstein.f b/costa/native/external/lapack/dstein.f
new file mode 100644
index 000000000..48b5fd285
--- /dev/null
+++ b/costa/native/external/lapack/dstein.f
@@ -0,0 +1,362 @@
+      SUBROUTINE DSTEIN( N, D, E, M, W, IBLOCK, ISPLIT, Z, LDZ, WORK,
+     $                   IWORK, IFAIL, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDZ, M, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IBLOCK( * ), IFAIL( * ), ISPLIT( * ),
+     $                   IWORK( * )
+      DOUBLE PRECISION   D( * ), E( * ), W( * ), WORK( * ), Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DSTEIN computes the eigenvectors of a real symmetric tridiagonal
+*  matrix T corresponding to specified eigenvalues, using inverse
+*  iteration.
+*
+*  The maximum number of iterations allowed for each eigenvector is
+*  specified by an internal parameter MAXITS (currently set to 5).
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The order of the matrix.  N >= 0.
+*
+*  D       (input) DOUBLE PRECISION array, dimension (N)
+*          The n diagonal elements of the tridiagonal matrix T.
+*
+*  E       (input) DOUBLE PRECISION array, dimension (N)
+*          The (n-1) subdiagonal elements of the tridiagonal matrix
+*          T, in elements 1 to N-1.  E(N) need not be set.
+*
+*  M       (input) INTEGER
+*          The number of eigenvectors to be found.  0 <= M <= N.
+*
+*  W       (input) DOUBLE PRECISION array, dimension (N)
+*          The first M elements of W contain the eigenvalues for
+*          which eigenvectors are to be computed.  The eigenvalues
+*          should be grouped by split-off block and ordered from
+*          smallest to largest within the block.  ( The output array
+*          W from DSTEBZ with ORDER = 'B' is expected here. )
+*
+*  IBLOCK  (input) INTEGER array, dimension (N)
+*          The submatrix indices associated with the corresponding
+*          eigenvalues in W; IBLOCK(i)=1 if eigenvalue W(i) belongs to
+*          the first submatrix from the top, =2 if W(i) belongs to
+*          the second submatrix, etc.  ( The output array IBLOCK
+*          from DSTEBZ is expected here. )
+*
+*  ISPLIT  (input) INTEGER array, dimension (N)
+*          The splitting points, at which T breaks up into submatrices.
+*          The first submatrix consists of rows/columns 1 to
+*          ISPLIT( 1 ), the second of rows/columns ISPLIT( 1 )+1
+*          through ISPLIT( 2 ), etc.
+*          ( The output array ISPLIT from DSTEBZ is expected here. )
+*
+*  Z       (output) DOUBLE PRECISION array, dimension (LDZ, M)
+*          The computed eigenvectors.  The eigenvector associated
+*          with the eigenvalue W(i) is stored in the i-th column of
+*          Z.  Any vector which fails to converge is set to its current
+*          iterate after MAXITS iterations.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= max(1,N).
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (5*N)
+*
+*  IWORK   (workspace) INTEGER array, dimension (N)
+*
+*  IFAIL   (output) INTEGER array, dimension (M)
+*          On normal exit, all elements of IFAIL are zero.
+*          If one or more eigenvectors fail to converge after
+*          MAXITS iterations, then their indices are stored in
+*          array IFAIL.
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit.
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*          > 0: if INFO = i, then i eigenvectors failed to converge
+*               in MAXITS iterations.  Their indices are stored in
+*               array IFAIL.
+*
+*  Internal Parameters
+*  ===================
+*
+*  MAXITS  INTEGER, default = 5
+*          The maximum number of iterations performed.
+*
+*  EXTRA   INTEGER, default = 2
+*          The number of iterations performed after norm growth
+*          criterion is satisfied, should be at least 1.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE, TEN, ODM3, ODM1
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0, TEN = 1.0D+1,
+     $                   ODM3 = 1.0D-3, ODM1 = 1.0D-1 )
+      INTEGER            MAXITS, EXTRA
+      PARAMETER          ( MAXITS = 5, EXTRA = 2 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            B1, BLKSIZ, BN, GPIND, I, IINFO, INDRV1,
+     $                   INDRV2, INDRV3, INDRV4, INDRV5, ITS, J, J1,
+     $                   JBLK, JMAX, NBLK, NRMCHK
+      DOUBLE PRECISION   DTPCRT, EPS, EPS1, NRM, ONENRM, ORTOL, PERTOL,
+     $                   SCL, SEP, TOL, XJ, XJM, ZTR
+*     ..
+*     .. Local Arrays ..
+      INTEGER            ISEED( 4 )
+*     ..
+*     .. External Functions ..
+      INTEGER            IDAMAX
+      DOUBLE PRECISION   DASUM, DDOT, DLAMCH, DNRM2
+      EXTERNAL           IDAMAX, DASUM, DDOT, DLAMCH, DNRM2
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DAXPY, DCOPY, DLAGTF, DLAGTS, DLARNV, DSCAL,
+     $                   XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      DO 10 I = 1, M
+         IFAIL( I ) = 0
+   10 CONTINUE
+*
+      IF( N.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( M.LT.0 .OR. M.GT.N ) THEN
+         INFO = -4
+      ELSE IF( LDZ.LT.MAX( 1, N ) ) THEN
+         INFO = -9
+      ELSE
+         DO 20 J = 2, M
+            IF( IBLOCK( J ).LT.IBLOCK( J-1 ) ) THEN
+               INFO = -6
+               GO TO 30
+            END IF
+            IF( IBLOCK( J ).EQ.IBLOCK( J-1 ) .AND. W( J ).LT.W( J-1 ) )
+     $           THEN
+               INFO = -5
+               GO TO 30
+            END IF
+   20    CONTINUE
+   30    CONTINUE
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DSTEIN', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. M.EQ.0 ) THEN
+         RETURN
+      ELSE IF( N.EQ.1 ) THEN
+         Z( 1, 1 ) = ONE
+         RETURN
+      END IF
+*
+*     Get machine constants.
+*
+      EPS = DLAMCH( 'Precision' )
+*
+*     Initialize seed for random number generator DLARNV.
+*
+      DO 40 I = 1, 4
+         ISEED( I ) = 1
+   40 CONTINUE
+*
+*     Initialize pointers.
+*
+      INDRV1 = 0
+      INDRV2 = INDRV1 + N
+      INDRV3 = INDRV2 + N
+      INDRV4 = INDRV3 + N
+      INDRV5 = INDRV4 + N
+*
+*     Compute eigenvectors of matrix blocks.
+*
+      J1 = 1
+      DO 160 NBLK = 1, IBLOCK( M )
+*
+*        Find starting and ending indices of block nblk.
+*
+         IF( NBLK.EQ.1 ) THEN
+            B1 = 1
+         ELSE
+            B1 = ISPLIT( NBLK-1 ) + 1
+         END IF
+         BN = ISPLIT( NBLK )
+         BLKSIZ = BN - B1 + 1
+         IF( BLKSIZ.EQ.1 )
+     $      GO TO 60
+         GPIND = B1
+*
+*        Compute reorthogonalization criterion and stopping criterion.
+*
+         ONENRM = ABS( D( B1 ) ) + ABS( E( B1 ) )
+         ONENRM = MAX( ONENRM, ABS( D( BN ) )+ABS( E( BN-1 ) ) )
+         DO 50 I = B1 + 1, BN - 1
+            ONENRM = MAX( ONENRM, ABS( D( I ) )+ABS( E( I-1 ) )+
+     $               ABS( E( I ) ) )
+   50    CONTINUE
+         ORTOL = ODM3*ONENRM
+*
+         DTPCRT = SQRT( ODM1 / BLKSIZ )
+*
+*        Loop through eigenvalues of block nblk.
+*
+   60    CONTINUE
+         JBLK = 0
+         DO 150 J = J1, M
+            IF( IBLOCK( J ).NE.NBLK ) THEN
+               J1 = J
+               GO TO 160
+            END IF
+            JBLK = JBLK + 1
+            XJ = W( J )
+*
+*           Skip all the work if the block size is one.
+*
+            IF( BLKSIZ.EQ.1 ) THEN
+               WORK( INDRV1+1 ) = ONE
+               GO TO 120
+            END IF
+*
+*           If eigenvalues j and j-1 are too close, add a relatively
+*           small perturbation.
+*
+            IF( JBLK.GT.1 ) THEN
+               EPS1 = ABS( EPS*XJ )
+               PERTOL = TEN*EPS1
+               SEP = XJ - XJM
+               IF( SEP.LT.PERTOL )
+     $            XJ = XJM + PERTOL
+            END IF
+*
+            ITS = 0
+            NRMCHK = 0
+*
+*           Get random starting vector.
+*
+            CALL DLARNV( 2, ISEED, BLKSIZ, WORK( INDRV1+1 ) )
+*
+*           Copy the matrix T so it won't be destroyed in factorization.
+*
+            CALL DCOPY( BLKSIZ, D( B1 ), 1, WORK( INDRV4+1 ), 1 )
+            CALL DCOPY( BLKSIZ-1, E( B1 ), 1, WORK( INDRV2+2 ), 1 )
+            CALL DCOPY( BLKSIZ-1, E( B1 ), 1, WORK( INDRV3+1 ), 1 )
+*
+*           Compute LU factors with partial pivoting  ( PT = LU )
+*
+            TOL = ZERO
+            CALL DLAGTF( BLKSIZ, WORK( INDRV4+1 ), XJ, WORK( INDRV2+2 ),
+     $                   WORK( INDRV3+1 ), TOL, WORK( INDRV5+1 ), IWORK,
+     $                   IINFO )
+*
+*           Update iteration count.
+*
+   70       CONTINUE
+            ITS = ITS + 1
+            IF( ITS.GT.MAXITS )
+     $         GO TO 100
+*
+*           Normalize and scale the righthand side vector Pb.
+*
+            SCL = BLKSIZ*ONENRM*MAX( EPS,
+     $            ABS( WORK( INDRV4+BLKSIZ ) ) ) /
+     $            DASUM( BLKSIZ, WORK( INDRV1+1 ), 1 )
+            CALL DSCAL( BLKSIZ, SCL, WORK( INDRV1+1 ), 1 )
+*
+*           Solve the system LU = Pb.
+*
+            CALL DLAGTS( -1, BLKSIZ, WORK( INDRV4+1 ), WORK( INDRV2+2 ),
+     $                   WORK( INDRV3+1 ), WORK( INDRV5+1 ), IWORK,
+     $                   WORK( INDRV1+1 ), TOL, IINFO )
+*
+*           Reorthogonalize by modified Gram-Schmidt if eigenvalues are
+*           close enough.
+*
+            IF( JBLK.EQ.1 )
+     $         GO TO 90
+            IF( ABS( XJ-XJM ).GT.ORTOL )
+     $         GPIND = J
+            IF( GPIND.NE.J ) THEN
+               DO 80 I = GPIND, J - 1
+                  ZTR = -DDOT( BLKSIZ, WORK( INDRV1+1 ), 1, Z( B1, I ),
+     $                  1 )
+                  CALL DAXPY( BLKSIZ, ZTR, Z( B1, I ), 1,
+     $                        WORK( INDRV1+1 ), 1 )
+   80          CONTINUE
+            END IF
+*
+*           Check the infinity norm of the iterate.
+*
+   90       CONTINUE
+            JMAX = IDAMAX( BLKSIZ, WORK( INDRV1+1 ), 1 )
+            NRM = ABS( WORK( INDRV1+JMAX ) )
+*
+*           Continue for additional iterations after norm reaches
+*           stopping criterion.
+*
+            IF( NRM.LT.DTPCRT )
+     $         GO TO 70
+            NRMCHK = NRMCHK + 1
+            IF( NRMCHK.LT.EXTRA+1 )
+     $         GO TO 70
+*
+            GO TO 110
+*
+*           If stopping criterion was not satisfied, update info and
+*           store eigenvector number in array ifail.
+*
+  100       CONTINUE
+            INFO = INFO + 1
+            IFAIL( INFO ) = J
+*
+*           Accept iterate as jth eigenvector.
+*
+  110       CONTINUE
+            SCL = ONE / DNRM2( BLKSIZ, WORK( INDRV1+1 ), 1 )
+            JMAX = IDAMAX( BLKSIZ, WORK( INDRV1+1 ), 1 )
+            IF( WORK( INDRV1+JMAX ).LT.ZERO )
+     $         SCL = -SCL
+            CALL DSCAL( BLKSIZ, SCL, WORK( INDRV1+1 ), 1 )
+  120       CONTINUE
+            DO 130 I = 1, N
+               Z( I, J ) = ZERO
+  130       CONTINUE
+            DO 140 I = 1, BLKSIZ
+               Z( B1+I-1, J ) = WORK( INDRV1+I )
+  140       CONTINUE
+*
+*           Save the shift to check eigenvalue spacing at next
+*           iteration.
+*
+            XJM = XJ
+*
+  150    CONTINUE
+  160 CONTINUE
+*
+      RETURN
+*
+*     End of DSTEIN
+*
+      END
diff --git a/costa/native/external/lapack/dsteqr.f b/costa/native/external/lapack/dsteqr.f
new file mode 100644
index 000000000..20faca125
--- /dev/null
+++ b/costa/native/external/lapack/dsteqr.f
@@ -0,0 +1,501 @@
+      SUBROUTINE DSTEQR( COMPZ, N, D, E, Z, LDZ, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          COMPZ
+      INTEGER            INFO, LDZ, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   D( * ), E( * ), WORK( * ), Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DSTEQR computes all eigenvalues and, optionally, eigenvectors of a
+*  symmetric tridiagonal matrix using the implicit QL or QR method.
+*  The eigenvectors of a full or band symmetric matrix can also be found
+*  if DSYTRD or DSPTRD or DSBTRD has been used to reduce this matrix to
+*  tridiagonal form.
+*
+*  Arguments
+*  =========
+*
+*  COMPZ   (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only.
+*          = 'V':  Compute eigenvalues and eigenvectors of the original
+*                  symmetric matrix.  On entry, Z must contain the
+*                  orthogonal matrix used to reduce the original matrix
+*                  to tridiagonal form.
+*          = 'I':  Compute eigenvalues and eigenvectors of the
+*                  tridiagonal matrix.  Z is initialized to the identity
+*                  matrix.
+*
+*  N       (input) INTEGER
+*          The order of the matrix.  N >= 0.
+*
+*  D       (input/output) DOUBLE PRECISION array, dimension (N)
+*          On entry, the diagonal elements of the tridiagonal matrix.
+*          On exit, if INFO = 0, the eigenvalues in ascending order.
+*
+*  E       (input/output) DOUBLE PRECISION array, dimension (N-1)
+*          On entry, the (n-1) subdiagonal elements of the tridiagonal
+*          matrix.
+*          On exit, E has been destroyed.
+*
+*  Z       (input/output) DOUBLE PRECISION array, dimension (LDZ, N)
+*          On entry, if  COMPZ = 'V', then Z contains the orthogonal
+*          matrix used in the reduction to tridiagonal form.
+*          On exit, if INFO = 0, then if  COMPZ = 'V', Z contains the
+*          orthonormal eigenvectors of the original symmetric matrix,
+*          and if COMPZ = 'I', Z contains the orthonormal eigenvectors
+*          of the symmetric tridiagonal matrix.
+*          If COMPZ = 'N', then Z is not referenced.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= 1, and if
+*          eigenvectors are desired, then  LDZ >= max(1,N).
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (max(1,2*N-2))
+*          If COMPZ = 'N', then WORK is not referenced.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  the algorithm has failed to find all the eigenvalues in
+*                a total of 30*N iterations; if INFO = i, then i
+*                elements of E have not converged to zero; on exit, D
+*                and E contain the elements of a symmetric tridiagonal
+*                matrix which is orthogonally similar to the original
+*                matrix.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE, TWO, THREE
+      PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0, TWO = 2.0D0,
+     $                   THREE = 3.0D0 )
+      INTEGER            MAXIT
+      PARAMETER          ( MAXIT = 30 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, ICOMPZ, II, ISCALE, J, JTOT, K, L, L1, LEND,
+     $                   LENDM1, LENDP1, LENDSV, LM1, LSV, M, MM, MM1,
+     $                   NM1, NMAXIT
+      DOUBLE PRECISION   ANORM, B, C, EPS, EPS2, F, G, P, R, RT1, RT2,
+     $                   S, SAFMAX, SAFMIN, SSFMAX, SSFMIN, TST
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DLAMCH, DLANST, DLAPY2
+      EXTERNAL           LSAME, DLAMCH, DLANST, DLAPY2
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLAE2, DLAEV2, DLARTG, DLASCL, DLASET, DLASR,
+     $                   DLASRT, DSWAP, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, SIGN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+*
+      IF( LSAME( COMPZ, 'N' ) ) THEN
+         ICOMPZ = 0
+      ELSE IF( LSAME( COMPZ, 'V' ) ) THEN
+         ICOMPZ = 1
+      ELSE IF( LSAME( COMPZ, 'I' ) ) THEN
+         ICOMPZ = 2
+      ELSE
+         ICOMPZ = -1
+      END IF
+      IF( ICOMPZ.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( ( LDZ.LT.1 ) .OR. ( ICOMPZ.GT.0 .AND. LDZ.LT.MAX( 1,
+     $         N ) ) ) THEN
+         INFO = -6
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DSTEQR', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      IF( N.EQ.1 ) THEN
+         IF( ICOMPZ.EQ.2 )
+     $      Z( 1, 1 ) = ONE
+         RETURN
+      END IF
+*
+*     Determine the unit roundoff and over/underflow thresholds.
+*
+      EPS = DLAMCH( 'E' )
+      EPS2 = EPS**2
+      SAFMIN = DLAMCH( 'S' )
+      SAFMAX = ONE / SAFMIN
+      SSFMAX = SQRT( SAFMAX ) / THREE
+      SSFMIN = SQRT( SAFMIN ) / EPS2
+*
+*     Compute the eigenvalues and eigenvectors of the tridiagonal
+*     matrix.
+*
+      IF( ICOMPZ.EQ.2 )
+     $   CALL DLASET( 'Full', N, N, ZERO, ONE, Z, LDZ )
+*
+      NMAXIT = N*MAXIT
+      JTOT = 0
+*
+*     Determine where the matrix splits and choose QL or QR iteration
+*     for each block, according to whether top or bottom diagonal
+*     element is smaller.
+*
+      L1 = 1
+      NM1 = N - 1
+*
+   10 CONTINUE
+      IF( L1.GT.N )
+     $   GO TO 160
+      IF( L1.GT.1 )
+     $   E( L1-1 ) = ZERO
+      IF( L1.LE.NM1 ) THEN
+         DO 20 M = L1, NM1
+            TST = ABS( E( M ) )
+            IF( TST.EQ.ZERO )
+     $         GO TO 30
+            IF( TST.LE.( SQRT( ABS( D( M ) ) )*SQRT( ABS( D( M+
+     $          1 ) ) ) )*EPS ) THEN
+               E( M ) = ZERO
+               GO TO 30
+            END IF
+   20    CONTINUE
+      END IF
+      M = N
+*
+   30 CONTINUE
+      L = L1
+      LSV = L
+      LEND = M
+      LENDSV = LEND
+      L1 = M + 1
+      IF( LEND.EQ.L )
+     $   GO TO 10
+*
+*     Scale submatrix in rows and columns L to LEND
+*
+      ANORM = DLANST( 'I', LEND-L+1, D( L ), E( L ) )
+      ISCALE = 0
+      IF( ANORM.EQ.ZERO )
+     $   GO TO 10
+      IF( ANORM.GT.SSFMAX ) THEN
+         ISCALE = 1
+         CALL DLASCL( 'G', 0, 0, ANORM, SSFMAX, LEND-L+1, 1, D( L ), N,
+     $                INFO )
+         CALL DLASCL( 'G', 0, 0, ANORM, SSFMAX, LEND-L, 1, E( L ), N,
+     $                INFO )
+      ELSE IF( ANORM.LT.SSFMIN ) THEN
+         ISCALE = 2
+         CALL DLASCL( 'G', 0, 0, ANORM, SSFMIN, LEND-L+1, 1, D( L ), N,
+     $                INFO )
+         CALL DLASCL( 'G', 0, 0, ANORM, SSFMIN, LEND-L, 1, E( L ), N,
+     $                INFO )
+      END IF
+*
+*     Choose between QL and QR iteration
+*
+      IF( ABS( D( LEND ) ).LT.ABS( D( L ) ) ) THEN
+         LEND = LSV
+         L = LENDSV
+      END IF
+*
+      IF( LEND.GT.L ) THEN
+*
+*        QL Iteration
+*
+*        Look for small subdiagonal element.
+*
+   40    CONTINUE
+         IF( L.NE.LEND ) THEN
+            LENDM1 = LEND - 1
+            DO 50 M = L, LENDM1
+               TST = ABS( E( M ) )**2
+               IF( TST.LE.( EPS2*ABS( D( M ) ) )*ABS( D( M+1 ) )+
+     $             SAFMIN )GO TO 60
+   50       CONTINUE
+         END IF
+*
+         M = LEND
+*
+   60    CONTINUE
+         IF( M.LT.LEND )
+     $      E( M ) = ZERO
+         P = D( L )
+         IF( M.EQ.L )
+     $      GO TO 80
+*
+*        If remaining matrix is 2-by-2, use DLAE2 or SLAEV2
+*        to compute its eigensystem.
+*
+         IF( M.EQ.L+1 ) THEN
+            IF( ICOMPZ.GT.0 ) THEN
+               CALL DLAEV2( D( L ), E( L ), D( L+1 ), RT1, RT2, C, S )
+               WORK( L ) = C
+               WORK( N-1+L ) = S
+               CALL DLASR( 'R', 'V', 'B', N, 2, WORK( L ),
+     $                     WORK( N-1+L ), Z( 1, L ), LDZ )
+            ELSE
+               CALL DLAE2( D( L ), E( L ), D( L+1 ), RT1, RT2 )
+            END IF
+            D( L ) = RT1
+            D( L+1 ) = RT2
+            E( L ) = ZERO
+            L = L + 2
+            IF( L.LE.LEND )
+     $         GO TO 40
+            GO TO 140
+         END IF
+*
+         IF( JTOT.EQ.NMAXIT )
+     $      GO TO 140
+         JTOT = JTOT + 1
+*
+*        Form shift.
+*
+         G = ( D( L+1 )-P ) / ( TWO*E( L ) )
+         R = DLAPY2( G, ONE )
+         G = D( M ) - P + ( E( L ) / ( G+SIGN( R, G ) ) )
+*
+         S = ONE
+         C = ONE
+         P = ZERO
+*
+*        Inner loop
+*
+         MM1 = M - 1
+         DO 70 I = MM1, L, -1
+            F = S*E( I )
+            B = C*E( I )
+            CALL DLARTG( G, F, C, S, R )
+            IF( I.NE.M-1 )
+     $         E( I+1 ) = R
+            G = D( I+1 ) - P
+            R = ( D( I )-G )*S + TWO*C*B
+            P = S*R
+            D( I+1 ) = G + P
+            G = C*R - B
+*
+*           If eigenvectors are desired, then save rotations.
+*
+            IF( ICOMPZ.GT.0 ) THEN
+               WORK( I ) = C
+               WORK( N-1+I ) = -S
+            END IF
+*
+   70    CONTINUE
+*
+*        If eigenvectors are desired, then apply saved rotations.
+*
+         IF( ICOMPZ.GT.0 ) THEN
+            MM = M - L + 1
+            CALL DLASR( 'R', 'V', 'B', N, MM, WORK( L ), WORK( N-1+L ),
+     $                  Z( 1, L ), LDZ )
+         END IF
+*
+         D( L ) = D( L ) - P
+         E( L ) = G
+         GO TO 40
+*
+*        Eigenvalue found.
+*
+   80    CONTINUE
+         D( L ) = P
+*
+         L = L + 1
+         IF( L.LE.LEND )
+     $      GO TO 40
+         GO TO 140
+*
+      ELSE
+*
+*        QR Iteration
+*
+*        Look for small superdiagonal element.
+*
+   90    CONTINUE
+         IF( L.NE.LEND ) THEN
+            LENDP1 = LEND + 1
+            DO 100 M = L, LENDP1, -1
+               TST = ABS( E( M-1 ) )**2
+               IF( TST.LE.( EPS2*ABS( D( M ) ) )*ABS( D( M-1 ) )+
+     $             SAFMIN )GO TO 110
+  100       CONTINUE
+         END IF
+*
+         M = LEND
+*
+  110    CONTINUE
+         IF( M.GT.LEND )
+     $      E( M-1 ) = ZERO
+         P = D( L )
+         IF( M.EQ.L )
+     $      GO TO 130
+*
+*        If remaining matrix is 2-by-2, use DLAE2 or SLAEV2
+*        to compute its eigensystem.
+*
+         IF( M.EQ.L-1 ) THEN
+            IF( ICOMPZ.GT.0 ) THEN
+               CALL DLAEV2( D( L-1 ), E( L-1 ), D( L ), RT1, RT2, C, S )
+               WORK( M ) = C
+               WORK( N-1+M ) = S
+               CALL DLASR( 'R', 'V', 'F', N, 2, WORK( M ),
+     $                     WORK( N-1+M ), Z( 1, L-1 ), LDZ )
+            ELSE
+               CALL DLAE2( D( L-1 ), E( L-1 ), D( L ), RT1, RT2 )
+            END IF
+            D( L-1 ) = RT1
+            D( L ) = RT2
+            E( L-1 ) = ZERO
+            L = L - 2
+            IF( L.GE.LEND )
+     $         GO TO 90
+            GO TO 140
+         END IF
+*
+         IF( JTOT.EQ.NMAXIT )
+     $      GO TO 140
+         JTOT = JTOT + 1
+*
+*        Form shift.
+*
+         G = ( D( L-1 )-P ) / ( TWO*E( L-1 ) )
+         R = DLAPY2( G, ONE )
+         G = D( M ) - P + ( E( L-1 ) / ( G+SIGN( R, G ) ) )
+*
+         S = ONE
+         C = ONE
+         P = ZERO
+*
+*        Inner loop
+*
+         LM1 = L - 1
+         DO 120 I = M, LM1
+            F = S*E( I )
+            B = C*E( I )
+            CALL DLARTG( G, F, C, S, R )
+            IF( I.NE.M )
+     $         E( I-1 ) = R
+            G = D( I ) - P
+            R = ( D( I+1 )-G )*S + TWO*C*B
+            P = S*R
+            D( I ) = G + P
+            G = C*R - B
+*
+*           If eigenvectors are desired, then save rotations.
+*
+            IF( ICOMPZ.GT.0 ) THEN
+               WORK( I ) = C
+               WORK( N-1+I ) = S
+            END IF
+*
+  120    CONTINUE
+*
+*        If eigenvectors are desired, then apply saved rotations.
+*
+         IF( ICOMPZ.GT.0 ) THEN
+            MM = L - M + 1
+            CALL DLASR( 'R', 'V', 'F', N, MM, WORK( M ), WORK( N-1+M ),
+     $                  Z( 1, M ), LDZ )
+         END IF
+*
+         D( L ) = D( L ) - P
+         E( LM1 ) = G
+         GO TO 90
+*
+*        Eigenvalue found.
+*
+  130    CONTINUE
+         D( L ) = P
+*
+         L = L - 1
+         IF( L.GE.LEND )
+     $      GO TO 90
+         GO TO 140
+*
+      END IF
+*
+*     Undo scaling if necessary
+*
+  140 CONTINUE
+      IF( ISCALE.EQ.1 ) THEN
+         CALL DLASCL( 'G', 0, 0, SSFMAX, ANORM, LENDSV-LSV+1, 1,
+     $                D( LSV ), N, INFO )
+         CALL DLASCL( 'G', 0, 0, SSFMAX, ANORM, LENDSV-LSV, 1, E( LSV ),
+     $                N, INFO )
+      ELSE IF( ISCALE.EQ.2 ) THEN
+         CALL DLASCL( 'G', 0, 0, SSFMIN, ANORM, LENDSV-LSV+1, 1,
+     $                D( LSV ), N, INFO )
+         CALL DLASCL( 'G', 0, 0, SSFMIN, ANORM, LENDSV-LSV, 1, E( LSV ),
+     $                N, INFO )
+      END IF
+*
+*     Check for no convergence to an eigenvalue after a total
+*     of N*MAXIT iterations.
+*
+      IF( JTOT.LT.NMAXIT )
+     $   GO TO 10
+      DO 150 I = 1, N - 1
+         IF( E( I ).NE.ZERO )
+     $      INFO = INFO + 1
+  150 CONTINUE
+      GO TO 190
+*
+*     Order eigenvalues and eigenvectors.
+*
+  160 CONTINUE
+      IF( ICOMPZ.EQ.0 ) THEN
+*
+*        Use Quick Sort
+*
+         CALL DLASRT( 'I', N, D, INFO )
+*
+      ELSE
+*
+*        Use Selection Sort to minimize swaps of eigenvectors
+*
+         DO 180 II = 2, N
+            I = II - 1
+            K = I
+            P = D( I )
+            DO 170 J = II, N
+               IF( D( J ).LT.P ) THEN
+                  K = J
+                  P = D( J )
+               END IF
+  170       CONTINUE
+            IF( K.NE.I ) THEN
+               D( K ) = D( I )
+               D( I ) = P
+               CALL DSWAP( N, Z( 1, I ), 1, Z( 1, K ), 1 )
+            END IF
+  180    CONTINUE
+      END IF
+*
+  190 CONTINUE
+      RETURN
+*
+*     End of DSTEQR
+*
+      END
diff --git a/costa/native/external/lapack/dsterf.f b/costa/native/external/lapack/dsterf.f
new file mode 100644
index 000000000..91a32bd26
--- /dev/null
+++ b/costa/native/external/lapack/dsterf.f
@@ -0,0 +1,365 @@
+      SUBROUTINE DSTERF( N, D, E, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   D( * ), E( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DSTERF computes all eigenvalues of a symmetric tridiagonal matrix
+*  using the Pal-Walker-Kahan variant of the QL or QR algorithm.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The order of the matrix.  N >= 0.
+*
+*  D       (input/output) DOUBLE PRECISION array, dimension (N)
+*          On entry, the n diagonal elements of the tridiagonal matrix.
+*          On exit, if INFO = 0, the eigenvalues in ascending order.
+*
+*  E       (input/output) DOUBLE PRECISION array, dimension (N-1)
+*          On entry, the (n-1) subdiagonal elements of the tridiagonal
+*          matrix.
+*          On exit, E has been destroyed.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  the algorithm failed to find all of the eigenvalues in
+*                a total of 30*N iterations; if INFO = i, then i
+*                elements of E have not converged to zero.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE, TWO, THREE
+      PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0, TWO = 2.0D0,
+     $                   THREE = 3.0D0 )
+      INTEGER            MAXIT
+      PARAMETER          ( MAXIT = 30 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, ISCALE, JTOT, L, L1, LEND, LENDSV, LSV, M,
+     $                   NMAXIT
+      DOUBLE PRECISION   ALPHA, ANORM, BB, C, EPS, EPS2, GAMMA, OLDC,
+     $                   OLDGAM, P, R, RT1, RT2, RTE, S, SAFMAX, SAFMIN,
+     $                   SIGMA, SSFMAX, SSFMIN
+*     ..
+*     .. External Functions ..
+      DOUBLE PRECISION   DLAMCH, DLANST, DLAPY2
+      EXTERNAL           DLAMCH, DLANST, DLAPY2
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLAE2, DLASCL, DLASRT, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, SIGN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+*
+*     Quick return if possible
+*
+      IF( N.LT.0 ) THEN
+         INFO = -1
+         CALL XERBLA( 'DSTERF', -INFO )
+         RETURN
+      END IF
+      IF( N.LE.1 )
+     $   RETURN
+*
+*     Determine the unit roundoff for this environment.
+*
+      EPS = DLAMCH( 'E' )
+      EPS2 = EPS**2
+      SAFMIN = DLAMCH( 'S' )
+      SAFMAX = ONE / SAFMIN
+      SSFMAX = SQRT( SAFMAX ) / THREE
+      SSFMIN = SQRT( SAFMIN ) / EPS2
+*
+*     Compute the eigenvalues of the tridiagonal matrix.
+*
+      NMAXIT = N*MAXIT
+      SIGMA = ZERO
+      JTOT = 0
+*
+*     Determine where the matrix splits and choose QL or QR iteration
+*     for each block, according to whether top or bottom diagonal
+*     element is smaller.
+*
+      L1 = 1
+*
+   10 CONTINUE
+      IF( L1.GT.N )
+     $   GO TO 170
+      IF( L1.GT.1 )
+     $   E( L1-1 ) = ZERO
+      DO 20 M = L1, N - 1
+         IF( ABS( E( M ) ).LE.( SQRT( ABS( D( M ) ) )*SQRT( ABS( D( M+
+     $       1 ) ) ) )*EPS ) THEN
+            E( M ) = ZERO
+            GO TO 30
+         END IF
+   20 CONTINUE
+      M = N
+*
+   30 CONTINUE
+      L = L1
+      LSV = L
+      LEND = M
+      LENDSV = LEND
+      L1 = M + 1
+      IF( LEND.EQ.L )
+     $   GO TO 10
+*
+*     Scale submatrix in rows and columns L to LEND
+*
+      ANORM = DLANST( 'I', LEND-L+1, D( L ), E( L ) )
+      ISCALE = 0
+      IF( ANORM.GT.SSFMAX ) THEN
+         ISCALE = 1
+         CALL DLASCL( 'G', 0, 0, ANORM, SSFMAX, LEND-L+1, 1, D( L ), N,
+     $                INFO )
+         CALL DLASCL( 'G', 0, 0, ANORM, SSFMAX, LEND-L, 1, E( L ), N,
+     $                INFO )
+      ELSE IF( ANORM.LT.SSFMIN ) THEN
+         ISCALE = 2
+         CALL DLASCL( 'G', 0, 0, ANORM, SSFMIN, LEND-L+1, 1, D( L ), N,
+     $                INFO )
+         CALL DLASCL( 'G', 0, 0, ANORM, SSFMIN, LEND-L, 1, E( L ), N,
+     $                INFO )
+      END IF
+*
+      DO 40 I = L, LEND - 1
+         E( I ) = E( I )**2
+   40 CONTINUE
+*
+*     Choose between QL and QR iteration
+*
+      IF( ABS( D( LEND ) ).LT.ABS( D( L ) ) ) THEN
+         LEND = LSV
+         L = LENDSV
+      END IF
+*
+      IF( LEND.GE.L ) THEN
+*
+*        QL Iteration
+*
+*        Look for small subdiagonal element.
+*
+   50    CONTINUE
+         IF( L.NE.LEND ) THEN
+            DO 60 M = L, LEND - 1
+               IF( ABS( E( M ) ).LE.EPS2*ABS( D( M )*D( M+1 ) ) )
+     $            GO TO 70
+   60       CONTINUE
+         END IF
+         M = LEND
+*
+   70    CONTINUE
+         IF( M.LT.LEND )
+     $      E( M ) = ZERO
+         P = D( L )
+         IF( M.EQ.L )
+     $      GO TO 90
+*
+*        If remaining matrix is 2 by 2, use DLAE2 to compute its
+*        eigenvalues.
+*
+         IF( M.EQ.L+1 ) THEN
+            RTE = SQRT( E( L ) )
+            CALL DLAE2( D( L ), RTE, D( L+1 ), RT1, RT2 )
+            D( L ) = RT1
+            D( L+1 ) = RT2
+            E( L ) = ZERO
+            L = L + 2
+            IF( L.LE.LEND )
+     $         GO TO 50
+            GO TO 150
+         END IF
+*
+         IF( JTOT.EQ.NMAXIT )
+     $      GO TO 150
+         JTOT = JTOT + 1
+*
+*        Form shift.
+*
+         RTE = SQRT( E( L ) )
+         SIGMA = ( D( L+1 )-P ) / ( TWO*RTE )
+         R = DLAPY2( SIGMA, ONE )
+         SIGMA = P - ( RTE / ( SIGMA+SIGN( R, SIGMA ) ) )
+*
+         C = ONE
+         S = ZERO
+         GAMMA = D( M ) - SIGMA
+         P = GAMMA*GAMMA
+*
+*        Inner loop
+*
+         DO 80 I = M - 1, L, -1
+            BB = E( I )
+            R = P + BB
+            IF( I.NE.M-1 )
+     $         E( I+1 ) = S*R
+            OLDC = C
+            C = P / R
+            S = BB / R
+            OLDGAM = GAMMA
+            ALPHA = D( I )
+            GAMMA = C*( ALPHA-SIGMA ) - S*OLDGAM
+            D( I+1 ) = OLDGAM + ( ALPHA-GAMMA )
+            IF( C.NE.ZERO ) THEN
+               P = ( GAMMA*GAMMA ) / C
+            ELSE
+               P = OLDC*BB
+            END IF
+   80    CONTINUE
+*
+         E( L ) = S*P
+         D( L ) = SIGMA + GAMMA
+         GO TO 50
+*
+*        Eigenvalue found.
+*
+   90    CONTINUE
+         D( L ) = P
+*
+         L = L + 1
+         IF( L.LE.LEND )
+     $      GO TO 50
+         GO TO 150
+*
+      ELSE
+*
+*        QR Iteration
+*
+*        Look for small superdiagonal element.
+*
+  100    CONTINUE
+         DO 110 M = L, LEND + 1, -1
+            IF( ABS( E( M-1 ) ).LE.EPS2*ABS( D( M )*D( M-1 ) ) )
+     $         GO TO 120
+  110    CONTINUE
+         M = LEND
+*
+  120    CONTINUE
+         IF( M.GT.LEND )
+     $      E( M-1 ) = ZERO
+         P = D( L )
+         IF( M.EQ.L )
+     $      GO TO 140
+*
+*        If remaining matrix is 2 by 2, use DLAE2 to compute its
+*        eigenvalues.
+*
+         IF( M.EQ.L-1 ) THEN
+            RTE = SQRT( E( L-1 ) )
+            CALL DLAE2( D( L ), RTE, D( L-1 ), RT1, RT2 )
+            D( L ) = RT1
+            D( L-1 ) = RT2
+            E( L-1 ) = ZERO
+            L = L - 2
+            IF( L.GE.LEND )
+     $         GO TO 100
+            GO TO 150
+         END IF
+*
+         IF( JTOT.EQ.NMAXIT )
+     $      GO TO 150
+         JTOT = JTOT + 1
+*
+*        Form shift.
+*
+         RTE = SQRT( E( L-1 ) )
+         SIGMA = ( D( L-1 )-P ) / ( TWO*RTE )
+         R = DLAPY2( SIGMA, ONE )
+         SIGMA = P - ( RTE / ( SIGMA+SIGN( R, SIGMA ) ) )
+*
+         C = ONE
+         S = ZERO
+         GAMMA = D( M ) - SIGMA
+         P = GAMMA*GAMMA
+*
+*        Inner loop
+*
+         DO 130 I = M, L - 1
+            BB = E( I )
+            R = P + BB
+            IF( I.NE.M )
+     $         E( I-1 ) = S*R
+            OLDC = C
+            C = P / R
+            S = BB / R
+            OLDGAM = GAMMA
+            ALPHA = D( I+1 )
+            GAMMA = C*( ALPHA-SIGMA ) - S*OLDGAM
+            D( I ) = OLDGAM + ( ALPHA-GAMMA )
+            IF( C.NE.ZERO ) THEN
+               P = ( GAMMA*GAMMA ) / C
+            ELSE
+               P = OLDC*BB
+            END IF
+  130    CONTINUE
+*
+         E( L-1 ) = S*P
+         D( L ) = SIGMA + GAMMA
+         GO TO 100
+*
+*        Eigenvalue found.
+*
+  140    CONTINUE
+         D( L ) = P
+*
+         L = L - 1
+         IF( L.GE.LEND )
+     $      GO TO 100
+         GO TO 150
+*
+      END IF
+*
+*     Undo scaling if necessary
+*
+  150 CONTINUE
+      IF( ISCALE.EQ.1 )
+     $   CALL DLASCL( 'G', 0, 0, SSFMAX, ANORM, LENDSV-LSV+1, 1,
+     $                D( LSV ), N, INFO )
+      IF( ISCALE.EQ.2 )
+     $   CALL DLASCL( 'G', 0, 0, SSFMIN, ANORM, LENDSV-LSV+1, 1,
+     $                D( LSV ), N, INFO )
+*
+*     Check for no convergence to an eigenvalue after a total
+*     of N*MAXIT iterations.
+*
+      IF( JTOT.LT.NMAXIT )
+     $   GO TO 10
+      DO 160 I = 1, N - 1
+         IF( E( I ).NE.ZERO )
+     $      INFO = INFO + 1
+  160 CONTINUE
+      GO TO 180
+*
+*     Sort eigenvalues in increasing order.
+*
+  170 CONTINUE
+      CALL DLASRT( 'I', N, D, INFO )
+*
+  180 CONTINUE
+      RETURN
+*
+*     End of DSTERF
+*
+      END
diff --git a/costa/native/external/lapack/dstev.f b/costa/native/external/lapack/dstev.f
new file mode 100644
index 000000000..5b7ed867f
--- /dev/null
+++ b/costa/native/external/lapack/dstev.f
@@ -0,0 +1,165 @@
+      SUBROUTINE DSTEV( JOBZ, N, D, E, Z, LDZ, WORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBZ
+      INTEGER            INFO, LDZ, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   D( * ), E( * ), WORK( * ), Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DSTEV computes all eigenvalues and, optionally, eigenvectors of a
+*  real symmetric tridiagonal matrix A.
+*
+*  Arguments
+*  =========
+*
+*  JOBZ    (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only;
+*          = 'V':  Compute eigenvalues and eigenvectors.
+*
+*  N       (input) INTEGER
+*          The order of the matrix.  N >= 0.
+*
+*  D       (input/output) DOUBLE PRECISION array, dimension (N)
+*          On entry, the n diagonal elements of the tridiagonal matrix
+*          A.
+*          On exit, if INFO = 0, the eigenvalues in ascending order.
+*
+*  E       (input/output) DOUBLE PRECISION array, dimension (N)
+*          On entry, the (n-1) subdiagonal elements of the tridiagonal
+*          matrix A, stored in elements 1 to N-1 of E; E(N) need not
+*          be set, but is used by the routine.
+*          On exit, the contents of E are destroyed.
+*
+*  Z       (output) DOUBLE PRECISION array, dimension (LDZ, N)
+*          If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal
+*          eigenvectors of the matrix A, with the i-th column of Z
+*          holding the eigenvector associated with D(i).
+*          If JOBZ = 'N', then Z is not referenced.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= 1, and if
+*          JOBZ = 'V', LDZ >= max(1,N).
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (max(1,2*N-2))
+*          If JOBZ = 'N', WORK is not referenced.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, the algorithm failed to converge; i
+*                off-diagonal elements of E did not converge to zero.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            WANTZ
+      INTEGER            IMAX, ISCALE
+      DOUBLE PRECISION   BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, SMLNUM,
+     $                   TNRM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DLAMCH, DLANST
+      EXTERNAL           LSAME, DLAMCH, DLANST
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DSCAL, DSTEQR, DSTERF, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      WANTZ = LSAME( JOBZ, 'V' )
+*
+      INFO = 0
+      IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN
+         INFO = -6
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DSTEV ', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      IF( N.EQ.1 ) THEN
+         IF( WANTZ )
+     $      Z( 1, 1 ) = ONE
+         RETURN
+      END IF
+*
+*     Get machine constants.
+*
+      SAFMIN = DLAMCH( 'Safe minimum' )
+      EPS = DLAMCH( 'Precision' )
+      SMLNUM = SAFMIN / EPS
+      BIGNUM = ONE / SMLNUM
+      RMIN = SQRT( SMLNUM )
+      RMAX = SQRT( BIGNUM )
+*
+*     Scale matrix to allowable range, if necessary.
+*
+      ISCALE = 0
+      TNRM = DLANST( 'M', N, D, E )
+      IF( TNRM.GT.ZERO .AND. TNRM.LT.RMIN ) THEN
+         ISCALE = 1
+         SIGMA = RMIN / TNRM
+      ELSE IF( TNRM.GT.RMAX ) THEN
+         ISCALE = 1
+         SIGMA = RMAX / TNRM
+      END IF
+      IF( ISCALE.EQ.1 ) THEN
+         CALL DSCAL( N, SIGMA, D, 1 )
+         CALL DSCAL( N-1, SIGMA, E( 1 ), 1 )
+      END IF
+*
+*     For eigenvalues only, call DSTERF.  For eigenvalues and
+*     eigenvectors, call DSTEQR.
+*
+      IF( .NOT.WANTZ ) THEN
+         CALL DSTERF( N, D, E, INFO )
+      ELSE
+         CALL DSTEQR( 'I', N, D, E, Z, LDZ, WORK, INFO )
+      END IF
+*
+*     If matrix was scaled, then rescale eigenvalues appropriately.
+*
+      IF( ISCALE.EQ.1 ) THEN
+         IF( INFO.EQ.0 ) THEN
+            IMAX = N
+         ELSE
+            IMAX = INFO - 1
+         END IF
+         CALL DSCAL( IMAX, ONE / SIGMA, D, 1 )
+      END IF
+*
+      RETURN
+*
+*     End of DSTEV
+*
+      END
diff --git a/costa/native/external/lapack/dstevd.f b/costa/native/external/lapack/dstevd.f
new file mode 100644
index 000000000..1edcfb1b2
--- /dev/null
+++ b/costa/native/external/lapack/dstevd.f
@@ -0,0 +1,217 @@
+      SUBROUTINE DSTEVD( JOBZ, N, D, E, Z, LDZ, WORK, LWORK, IWORK,
+     $                   LIWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBZ
+      INTEGER            INFO, LDZ, LIWORK, LWORK, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      DOUBLE PRECISION   D( * ), E( * ), WORK( * ), Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DSTEVD computes all eigenvalues and, optionally, eigenvectors of a
+*  real symmetric tridiagonal matrix. If eigenvectors are desired, it
+*  uses a divide and conquer algorithm.
+*
+*  The divide and conquer algorithm makes very mild assumptions about
+*  floating point arithmetic. It will work on machines with a guard
+*  digit in add/subtract, or on those binary machines without guard
+*  digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
+*  Cray-2. It could conceivably fail on hexadecimal or decimal machines
+*  without guard digits, but we know of none.
+*
+*  Arguments
+*  =========
+*
+*  JOBZ    (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only;
+*          = 'V':  Compute eigenvalues and eigenvectors.
+*
+*  N       (input) INTEGER
+*          The order of the matrix.  N >= 0.
+*
+*  D       (input/output) DOUBLE PRECISION array, dimension (N)
+*          On entry, the n diagonal elements of the tridiagonal matrix
+*          A.
+*          On exit, if INFO = 0, the eigenvalues in ascending order.
+*
+*  E       (input/output) DOUBLE PRECISION array, dimension (N)
+*          On entry, the (n-1) subdiagonal elements of the tridiagonal
+*          matrix A, stored in elements 1 to N-1 of E; E(N) need not
+*          be set, but is used by the routine.
+*          On exit, the contents of E are destroyed.
+*
+*  Z       (output) DOUBLE PRECISION array, dimension (LDZ, N)
+*          If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal
+*          eigenvectors of the matrix A, with the i-th column of Z
+*          holding the eigenvector associated with D(i).
+*          If JOBZ = 'N', then Z is not referenced.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= 1, and if
+*          JOBZ = 'V', LDZ >= max(1,N).
+*
+*  WORK    (workspace/output) DOUBLE PRECISION array,
+*                                         dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.
+*          If JOBZ  = 'N' or N <= 1 then LWORK must be at least 1.
+*          If JOBZ  = 'V' and N > 1 then LWORK must be at least
+*                         ( 1 + 4*N + N**2 ).
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  IWORK   (workspace/output) INTEGER array, dimension (LIWORK)
+*          On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK.
+*
+*  LIWORK  (input) INTEGER
+*          The dimension of the array IWORK.
+*          If JOBZ  = 'N' or N <= 1 then LIWORK must be at least 1.
+*          If JOBZ  = 'V' and N > 1 then LIWORK must be at least 3+5*N.
+*
+*          If LIWORK = -1, then a workspace query is assumed; the
+*          routine only calculates the optimal size of the IWORK array,
+*          returns this value as the first entry of the IWORK array, and
+*          no error message related to LIWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, the algorithm failed to converge; i
+*                off-diagonal elements of E did not converge to zero.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY, WANTZ
+      INTEGER            ISCALE, LIWMIN, LWMIN
+      DOUBLE PRECISION   BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, SMLNUM,
+     $                   TNRM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DLAMCH, DLANST
+      EXTERNAL           LSAME, DLAMCH, DLANST
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DSCAL, DSTEDC, DSTERF, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      WANTZ = LSAME( JOBZ, 'V' )
+      LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 )
+*
+      INFO = 0
+      LIWMIN = 1
+      LWMIN = 1
+      IF( N.GT.1 .AND. WANTZ ) THEN
+         LWMIN = 1 + 4*N + N**2
+         LIWMIN = 3 + 5*N
+      END IF
+*
+      IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN
+         INFO = -6
+      ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -8
+      ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -10
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         WORK( 1 ) = LWMIN
+         IWORK( 1 ) = LIWMIN
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DSTEVD', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      IF( N.EQ.1 ) THEN
+         IF( WANTZ )
+     $      Z( 1, 1 ) = ONE
+         RETURN
+      END IF
+*
+*     Get machine constants.
+*
+      SAFMIN = DLAMCH( 'Safe minimum' )
+      EPS = DLAMCH( 'Precision' )
+      SMLNUM = SAFMIN / EPS
+      BIGNUM = ONE / SMLNUM
+      RMIN = SQRT( SMLNUM )
+      RMAX = SQRT( BIGNUM )
+*
+*     Scale matrix to allowable range, if necessary.
+*
+      ISCALE = 0
+      TNRM = DLANST( 'M', N, D, E )
+      IF( TNRM.GT.ZERO .AND. TNRM.LT.RMIN ) THEN
+         ISCALE = 1
+         SIGMA = RMIN / TNRM
+      ELSE IF( TNRM.GT.RMAX ) THEN
+         ISCALE = 1
+         SIGMA = RMAX / TNRM
+      END IF
+      IF( ISCALE.EQ.1 ) THEN
+         CALL DSCAL( N, SIGMA, D, 1 )
+         CALL DSCAL( N-1, SIGMA, E( 1 ), 1 )
+      END IF
+*
+*     For eigenvalues only, call DSTERF.  For eigenvalues and
+*     eigenvectors, call DSTEDC.
+*
+      IF( .NOT.WANTZ ) THEN
+         CALL DSTERF( N, D, E, INFO )
+      ELSE
+         CALL DSTEDC( 'I', N, D, E, Z, LDZ, WORK, LWORK, IWORK, LIWORK,
+     $                INFO )
+      END IF
+*
+*     If matrix was scaled, then rescale eigenvalues appropriately.
+*
+      IF( ISCALE.EQ.1 )
+     $   CALL DSCAL( N, ONE / SIGMA, D, 1 )
+*
+      WORK( 1 ) = LWMIN
+      IWORK( 1 ) = LIWMIN
+*
+      RETURN
+*
+*     End of DSTEVD
+*
+      END
diff --git a/costa/native/external/lapack/dstevr.f b/costa/native/external/lapack/dstevr.f
new file mode 100644
index 000000000..30cd7d391
--- /dev/null
+++ b/costa/native/external/lapack/dstevr.f
@@ -0,0 +1,434 @@
+      SUBROUTINE DSTEVR( JOBZ, RANGE, N, D, E, VL, VU, IL, IU, ABSTOL,
+     $                   M, W, Z, LDZ, ISUPPZ, WORK, LWORK, IWORK,
+     $                   LIWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 20, 2000
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBZ, RANGE
+      INTEGER            IL, INFO, IU, LDZ, LIWORK, LWORK, M, N
+      DOUBLE PRECISION   ABSTOL, VL, VU
+*     ..
+*     .. Array Arguments ..
+      INTEGER            ISUPPZ( * ), IWORK( * )
+      DOUBLE PRECISION   D( * ), E( * ), W( * ), WORK( * ), Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DSTEVR computes selected eigenvalues and, optionally, eigenvectors
+*  of a real symmetric tridiagonal matrix T.  Eigenvalues and
+*  eigenvectors can be selected by specifying either a range of values
+*  or a range of indices for the desired eigenvalues.
+*
+*  Whenever possible, DSTEVR calls SSTEGR to compute the
+*  eigenspectrum using Relatively Robust Representations.  DSTEGR
+*  computes eigenvalues by the dqds algorithm, while orthogonal
+*  eigenvectors are computed from various "good" L D L^T representations
+*  (also known as Relatively Robust Representations). Gram-Schmidt
+*  orthogonalization is avoided as far as possible. More specifically,
+*  the various steps of the algorithm are as follows. For the i-th
+*  unreduced block of T,
+*     (a) Compute T - sigma_i = L_i D_i L_i^T, such that L_i D_i L_i^T
+*          is a relatively robust representation,
+*     (b) Compute the eigenvalues, lambda_j, of L_i D_i L_i^T to high
+*         relative accuracy by the dqds algorithm,
+*     (c) If there is a cluster of close eigenvalues, "choose" sigma_i
+*         close to the cluster, and go to step (a),
+*     (d) Given the approximate eigenvalue lambda_j of L_i D_i L_i^T,
+*         compute the corresponding eigenvector by forming a
+*         rank-revealing twisted factorization.
+*  The desired accuracy of the output can be specified by the input
+*  parameter ABSTOL.
+*
+*  For more details, see "A new O(n^2) algorithm for the symmetric
+*  tridiagonal eigenvalue/eigenvector problem", by Inderjit Dhillon,
+*  Computer Science Division Technical Report No. UCB//CSD-97-971,
+*  UC Berkeley, May 1997.
+*
+*
+*  Note 1 : DSTEVR calls SSTEGR when the full spectrum is requested
+*  on machines which conform to the ieee-754 floating point standard.
+*  DSTEVR calls SSTEBZ and SSTEIN on non-ieee machines and
+*  when partial spectrum requests are made.
+*
+*  Normal execution of DSTEGR may create NaNs and infinities and
+*  hence may abort due to a floating point exception in environments
+*  which do not handle NaNs and infinities in the ieee standard default
+*  manner.
+*
+*  Arguments
+*  =========
+*
+*  JOBZ    (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only;
+*          = 'V':  Compute eigenvalues and eigenvectors.
+*
+*  RANGE   (input) CHARACTER*1
+*          = 'A': all eigenvalues will be found.
+*          = 'V': all eigenvalues in the half-open interval (VL,VU]
+*                 will be found.
+*          = 'I': the IL-th through IU-th eigenvalues will be found.
+********** For RANGE = 'V' or 'I' and IU - IL < N - 1, DSTEBZ and
+********** DSTEIN are called
+*
+*  N       (input) INTEGER
+*          The order of the matrix.  N >= 0.
+*
+*  D       (input/output) DOUBLE PRECISION array, dimension (N)
+*          On entry, the n diagonal elements of the tridiagonal matrix
+*          A.
+*          On exit, D may be multiplied by a constant factor chosen
+*          to avoid over/underflow in computing the eigenvalues.
+*
+*  E       (input/output) DOUBLE PRECISION array, dimension (N)
+*          On entry, the (n-1) subdiagonal elements of the tridiagonal
+*          matrix A in elements 1 to N-1 of E; E(N) need not be set.
+*          On exit, E may be multiplied by a constant factor chosen
+*          to avoid over/underflow in computing the eigenvalues.
+*
+*  VL      (input) DOUBLE PRECISION
+*  VU      (input) DOUBLE PRECISION
+*          If RANGE='V', the lower and upper bounds of the interval to
+*          be searched for eigenvalues. VL < VU.
+*          Not referenced if RANGE = 'A' or 'I'.
+*
+*  IL      (input) INTEGER
+*  IU      (input) INTEGER
+*          If RANGE='I', the indices (in ascending order) of the
+*          smallest and largest eigenvalues to be returned.
+*          1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0.
+*          Not referenced if RANGE = 'A' or 'V'.
+*
+*  ABSTOL  (input) DOUBLE PRECISION
+*          The absolute error tolerance for the eigenvalues.
+*          An approximate eigenvalue is accepted as converged
+*          when it is determined to lie in an interval [a,b]
+*          of width less than or equal to
+*
+*                  ABSTOL + EPS *   max( |a|,|b| ) ,
+*
+*          where EPS is the machine precision.  If ABSTOL is less than
+*          or equal to zero, then  EPS*|T|  will be used in its place,
+*          where |T| is the 1-norm of the tridiagonal matrix obtained
+*          by reducing A to tridiagonal form.
+*
+*          See "Computing Small Singular Values of Bidiagonal Matrices
+*          with Guaranteed High Relative Accuracy," by Demmel and
+*          Kahan, LAPACK Working Note #3.
+*
+*          If high relative accuracy is important, set ABSTOL to
+*          DLAMCH( 'Safe minimum' ).  Doing so will guarantee that
+*          eigenvalues are computed to high relative accuracy when
+*          possible in future releases.  The current code does not
+*          make any guarantees about high relative accuracy, but
+*          future releases will. See J. Barlow and J. Demmel,
+*          "Computing Accurate Eigensystems of Scaled Diagonally
+*          Dominant Matrices", LAPACK Working Note #7, for a discussion
+*          of which matrices define their eigenvalues to high relative
+*          accuracy.
+*
+*  M       (output) INTEGER
+*          The total number of eigenvalues found.  0 <= M <= N.
+*          If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1.
+*
+*  W       (output) DOUBLE PRECISION array, dimension (N)
+*          The first M elements contain the selected eigenvalues in
+*          ascending order.
+*
+*  Z       (output) DOUBLE PRECISION array, dimension (LDZ, max(1,M) )
+*          If JOBZ = 'V', then if INFO = 0, the first M columns of Z
+*          contain the orthonormal eigenvectors of the matrix A
+*          corresponding to the selected eigenvalues, with the i-th
+*          column of Z holding the eigenvector associated with W(i).
+*          Note: the user must ensure that at least max(1,M) columns are
+*          supplied in the array Z; if RANGE = 'V', the exact value of M
+*          is not known in advance and an upper bound must be used.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= 1, and if
+*          JOBZ = 'V', LDZ >= max(1,N).
+*
+*  ISUPPZ  (output) INTEGER array, dimension ( 2*max(1,M) )
+*          The support of the eigenvectors in Z, i.e., the indices
+*          indicating the nonzero elements in Z. The i-th eigenvector
+*          is nonzero only in elements ISUPPZ( 2*i-1 ) through
+*          ISUPPZ( 2*i ).
+********** Implemented only for RANGE = 'A' or 'I' and IU - IL = N - 1
+*
+*  WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal (and
+*          minimal) LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.  LWORK >= 20*N.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  IWORK   (workspace/output) INTEGER array, dimension (LIWORK)
+*          On exit, if INFO = 0, IWORK(1) returns the optimal (and
+*          minimal) LIWORK.
+*
+*  LIWORK  (input) INTEGER
+*          The dimension of the array IWORK.  LIWORK >= 10*N.
+*
+*          If LIWORK = -1, then a workspace query is assumed; the
+*          routine only calculates the optimal size of the IWORK array,
+*          returns this value as the first entry of the IWORK array, and
+*          no error message related to LIWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  Internal error
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Inderjit Dhillon, IBM Almaden, USA
+*     Osni Marques, LBNL/NERSC, USA
+*     Ken Stanley, Computer Science Division, University of
+*       California at Berkeley, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            ALLEIG, INDEIG, LQUERY, VALEIG, WANTZ
+      CHARACTER          ORDER
+      INTEGER            I, IEEEOK, IMAX, INDIBL, INDIFL, INDISP,
+     $                   INDIWO, ISCALE, ITMP1, J, JJ, LIWMIN, LWMIN,
+     $                   NSPLIT
+      DOUBLE PRECISION   BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, SMLNUM,
+     $                   TMP1, TNRM, VLL, VUU
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      DOUBLE PRECISION   DLAMCH, DLANST
+      EXTERNAL           LSAME, ILAENV, DLAMCH, DLANST
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DCOPY, DSCAL, DSTEBZ, DSTEGR, DSTEIN, DSTERF,
+     $                   DSWAP, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*
+*     Test the input parameters.
+*
+      IEEEOK = ILAENV( 10, 'DSTEVR', 'N', 1, 2, 3, 4 )
+*
+      WANTZ = LSAME( JOBZ, 'V' )
+      ALLEIG = LSAME( RANGE, 'A' )
+      VALEIG = LSAME( RANGE, 'V' )
+      INDEIG = LSAME( RANGE, 'I' )
+*
+      LQUERY = ( ( LWORK.EQ.-1 ) .OR. ( LIWORK.EQ.-1 ) )
+      LWMIN = 20*N
+      LIWMIN = 10*N
+*
+*
+      INFO = 0
+      IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE
+         IF( VALEIG ) THEN
+            IF( N.GT.0 .AND. VU.LE.VL )
+     $         INFO = -7
+         ELSE IF( INDEIG ) THEN
+            IF( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) THEN
+               INFO = -8
+            ELSE IF( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN
+               INFO = -9
+            END IF
+         END IF
+      END IF
+      IF( INFO.EQ.0 ) THEN
+         IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN
+            INFO = -14
+         ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN
+            INFO = -17
+         ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN
+            INFO = -19
+         END IF
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         WORK( 1 ) = LWMIN
+         IWORK( 1 ) = LIWMIN
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DSTEVR', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      M = 0
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      IF( N.EQ.1 ) THEN
+         IF( ALLEIG .OR. INDEIG ) THEN
+            M = 1
+            W( 1 ) = D( 1 )
+         ELSE
+            IF( VL.LT.D( 1 ) .AND. VU.GE.D( 1 ) ) THEN
+               M = 1
+               W( 1 ) = D( 1 )
+            END IF
+         END IF
+         IF( WANTZ )
+     $      Z( 1, 1 ) = ONE
+         RETURN
+      END IF
+*
+*     Get machine constants.
+*
+      SAFMIN = DLAMCH( 'Safe minimum' )
+      EPS = DLAMCH( 'Precision' )
+      SMLNUM = SAFMIN / EPS
+      BIGNUM = ONE / SMLNUM
+      RMIN = SQRT( SMLNUM )
+      RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) )
+*
+*
+*     Scale matrix to allowable range, if necessary.
+*
+      ISCALE = 0
+      VLL = VL
+      VUU = VU
+*
+      TNRM = DLANST( 'M', N, D, E )
+      IF( TNRM.GT.ZERO .AND. TNRM.LT.RMIN ) THEN
+         ISCALE = 1
+         SIGMA = RMIN / TNRM
+      ELSE IF( TNRM.GT.RMAX ) THEN
+         ISCALE = 1
+         SIGMA = RMAX / TNRM
+      END IF
+      IF( ISCALE.EQ.1 ) THEN
+         CALL DSCAL( N, SIGMA, D, 1 )
+         CALL DSCAL( N-1, SIGMA, E( 1 ), 1 )
+         IF( VALEIG ) THEN
+            VLL = VL*SIGMA
+            VUU = VU*SIGMA
+         END IF
+      END IF
+*
+*     If all eigenvalues are desired, then
+*     call DSTERF or SSTEGR.  If this fails for some eigenvalue, then
+*     try DSTEBZ.
+*
+*
+      IF( ( ALLEIG .OR. ( INDEIG .AND. IL.EQ.1 .AND. IU.EQ.N ) ) .AND.
+     $    IEEEOK.EQ.1 ) THEN
+         CALL DCOPY( N-1, E( 1 ), 1, WORK( 1 ), 1 )
+         IF( .NOT.WANTZ ) THEN
+            CALL DCOPY( N, D, 1, W, 1 )
+            CALL DSTERF( N, W, WORK, INFO )
+         ELSE
+            CALL DCOPY( N, D, 1, WORK( N+1 ), 1 )
+            CALL DSTEGR( JOBZ, 'A', N, WORK( N+1 ), WORK, VL, VU, IL,
+     $                   IU, ABSTOL, M, W, Z, LDZ, ISUPPZ,
+     $                   WORK( 2*N+1 ), LWORK-2*N, IWORK, LIWORK, INFO )
+*
+         END IF
+         IF( INFO.EQ.0 ) THEN
+            M = N
+            GO TO 10
+         END IF
+         INFO = 0
+      END IF
+*
+*     Otherwise, call DSTEBZ and, if eigenvectors are desired, SSTEIN.
+*
+      IF( WANTZ ) THEN
+         ORDER = 'B'
+      ELSE
+         ORDER = 'E'
+      END IF
+      INDIBL = 1
+      INDISP = INDIBL + N
+      INDIFL = INDISP + N
+      INDIWO = INDIFL + N
+      CALL DSTEBZ( RANGE, ORDER, N, VLL, VUU, IL, IU, ABSTOL, D, E, M,
+     $             NSPLIT, W, IWORK( INDIBL ), IWORK( INDISP ), WORK,
+     $             IWORK( INDIWO ), INFO )
+*
+      IF( WANTZ ) THEN
+         CALL DSTEIN( N, D, E, M, W, IWORK( INDIBL ), IWORK( INDISP ),
+     $                Z, LDZ, WORK, IWORK( INDIWO ), IWORK( INDIFL ),
+     $                INFO )
+      END IF
+*
+*     If matrix was scaled, then rescale eigenvalues appropriately.
+*
+   10 CONTINUE
+      IF( ISCALE.EQ.1 ) THEN
+         IF( INFO.EQ.0 ) THEN
+            IMAX = M
+         ELSE
+            IMAX = INFO - 1
+         END IF
+         CALL DSCAL( IMAX, ONE / SIGMA, W, 1 )
+      END IF
+*
+*     If eigenvalues are not in order, then sort them, along with
+*     eigenvectors.
+*
+      IF( WANTZ ) THEN
+         DO 30 J = 1, M - 1
+            I = 0
+            TMP1 = W( J )
+            DO 20 JJ = J + 1, M
+               IF( W( JJ ).LT.TMP1 ) THEN
+                  I = JJ
+                  TMP1 = W( JJ )
+               END IF
+   20       CONTINUE
+*
+            IF( I.NE.0 ) THEN
+               ITMP1 = IWORK( I )
+               W( I ) = W( J )
+               IWORK( I ) = IWORK( J )
+               W( J ) = TMP1
+               IWORK( J ) = ITMP1
+               CALL DSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 )
+            END IF
+   30    CONTINUE
+      END IF
+*
+*      Causes problems with tests 19 & 20:
+*      IF (wantz .and. INDEIG ) Z( 1,1) = Z(1,1) / 1.002 + .002
+*
+*
+      WORK( 1 ) = LWMIN
+      IWORK( 1 ) = LIWMIN
+      RETURN
+*
+*     End of DSTEVR
+*
+      END
diff --git a/costa/native/external/lapack/dstevx.f b/costa/native/external/lapack/dstevx.f
new file mode 100644
index 000000000..6f9366e55
--- /dev/null
+++ b/costa/native/external/lapack/dstevx.f
@@ -0,0 +1,346 @@
+      SUBROUTINE DSTEVX( JOBZ, RANGE, N, D, E, VL, VU, IL, IU, ABSTOL,
+     $                   M, W, Z, LDZ, WORK, IWORK, IFAIL, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBZ, RANGE
+      INTEGER            IL, INFO, IU, LDZ, M, N
+      DOUBLE PRECISION   ABSTOL, VL, VU
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IFAIL( * ), IWORK( * )
+      DOUBLE PRECISION   D( * ), E( * ), W( * ), WORK( * ), Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DSTEVX computes selected eigenvalues and, optionally, eigenvectors
+*  of a real symmetric tridiagonal matrix A.  Eigenvalues and
+*  eigenvectors can be selected by specifying either a range of values
+*  or a range of indices for the desired eigenvalues.
+*
+*  Arguments
+*  =========
+*
+*  JOBZ    (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only;
+*          = 'V':  Compute eigenvalues and eigenvectors.
+*
+*  RANGE   (input) CHARACTER*1
+*          = 'A': all eigenvalues will be found.
+*          = 'V': all eigenvalues in the half-open interval (VL,VU]
+*                 will be found.
+*          = 'I': the IL-th through IU-th eigenvalues will be found.
+*
+*  N       (input) INTEGER
+*          The order of the matrix.  N >= 0.
+*
+*  D       (input/output) DOUBLE PRECISION array, dimension (N)
+*          On entry, the n diagonal elements of the tridiagonal matrix
+*          A.
+*          On exit, D may be multiplied by a constant factor chosen
+*          to avoid over/underflow in computing the eigenvalues.
+*
+*  E       (input/output) DOUBLE PRECISION array, dimension (N)
+*          On entry, the (n-1) subdiagonal elements of the tridiagonal
+*          matrix A in elements 1 to N-1 of E; E(N) need not be set.
+*          On exit, E may be multiplied by a constant factor chosen
+*          to avoid over/underflow in computing the eigenvalues.
+*
+*  VL      (input) DOUBLE PRECISION
+*  VU      (input) DOUBLE PRECISION
+*          If RANGE='V', the lower and upper bounds of the interval to
+*          be searched for eigenvalues. VL < VU.
+*          Not referenced if RANGE = 'A' or 'I'.
+*
+*  IL      (input) INTEGER
+*  IU      (input) INTEGER
+*          If RANGE='I', the indices (in ascending order) of the
+*          smallest and largest eigenvalues to be returned.
+*          1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0.
+*          Not referenced if RANGE = 'A' or 'V'.
+*
+*  ABSTOL  (input) DOUBLE PRECISION
+*          The absolute error tolerance for the eigenvalues.
+*          An approximate eigenvalue is accepted as converged
+*          when it is determined to lie in an interval [a,b]
+*          of width less than or equal to
+*
+*                  ABSTOL + EPS *   max( |a|,|b| ) ,
+*
+*          where EPS is the machine precision.  If ABSTOL is less
+*          than or equal to zero, then  EPS*|T|  will be used in
+*          its place, where |T| is the 1-norm of the tridiagonal
+*          matrix.
+*
+*          Eigenvalues will be computed most accurately when ABSTOL is
+*          set to twice the underflow threshold 2*DLAMCH('S'), not zero.
+*          If this routine returns with INFO>0, indicating that some
+*          eigenvectors did not converge, try setting ABSTOL to
+*          2*DLAMCH('S').
+*
+*          See "Computing Small Singular Values of Bidiagonal Matrices
+*          with Guaranteed High Relative Accuracy," by Demmel and
+*          Kahan, LAPACK Working Note #3.
+*
+*  M       (output) INTEGER
+*          The total number of eigenvalues found.  0 <= M <= N.
+*          If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1.
+*
+*  W       (output) DOUBLE PRECISION array, dimension (N)
+*          The first M elements contain the selected eigenvalues in
+*          ascending order.
+*
+*  Z       (output) DOUBLE PRECISION array, dimension (LDZ, max(1,M) )
+*          If JOBZ = 'V', then if INFO = 0, the first M columns of Z
+*          contain the orthonormal eigenvectors of the matrix A
+*          corresponding to the selected eigenvalues, with the i-th
+*          column of Z holding the eigenvector associated with W(i).
+*          If an eigenvector fails to converge (INFO > 0), then that
+*          column of Z contains the latest approximation to the
+*          eigenvector, and the index of the eigenvector is returned
+*          in IFAIL.  If JOBZ = 'N', then Z is not referenced.
+*          Note: the user must ensure that at least max(1,M) columns are
+*          supplied in the array Z; if RANGE = 'V', the exact value of M
+*          is not known in advance and an upper bound must be used.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= 1, and if
+*          JOBZ = 'V', LDZ >= max(1,N).
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (5*N)
+*
+*  IWORK   (workspace) INTEGER array, dimension (5*N)
+*
+*  IFAIL   (output) INTEGER array, dimension (N)
+*          If JOBZ = 'V', then if INFO = 0, the first M elements of
+*          IFAIL are zero.  If INFO > 0, then IFAIL contains the
+*          indices of the eigenvectors that failed to converge.
+*          If JOBZ = 'N', then IFAIL is not referenced.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, then i eigenvectors failed to converge.
+*                Their indices are stored in array IFAIL.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            ALLEIG, INDEIG, VALEIG, WANTZ
+      CHARACTER          ORDER
+      INTEGER            I, IMAX, INDIBL, INDISP, INDIWO, INDWRK,
+     $                   ISCALE, ITMP1, J, JJ, NSPLIT
+      DOUBLE PRECISION   BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, SMLNUM,
+     $                   TMP1, TNRM, VLL, VUU
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DLAMCH, DLANST
+      EXTERNAL           LSAME, DLAMCH, DLANST
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DCOPY, DSCAL, DSTEBZ, DSTEIN, DSTEQR, DSTERF,
+     $                   DSWAP, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      WANTZ = LSAME( JOBZ, 'V' )
+      ALLEIG = LSAME( RANGE, 'A' )
+      VALEIG = LSAME( RANGE, 'V' )
+      INDEIG = LSAME( RANGE, 'I' )
+*
+      INFO = 0
+      IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE
+         IF( VALEIG ) THEN
+            IF( N.GT.0 .AND. VU.LE.VL )
+     $         INFO = -7
+         ELSE IF( INDEIG ) THEN
+            IF( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) THEN
+               INFO = -8
+            ELSE IF( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN
+               INFO = -9
+            END IF
+         END IF
+      END IF
+      IF( INFO.EQ.0 ) THEN
+         IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) )
+     $      INFO = -14
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DSTEVX', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      M = 0
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      IF( N.EQ.1 ) THEN
+         IF( ALLEIG .OR. INDEIG ) THEN
+            M = 1
+            W( 1 ) = D( 1 )
+         ELSE
+            IF( VL.LT.D( 1 ) .AND. VU.GE.D( 1 ) ) THEN
+               M = 1
+               W( 1 ) = D( 1 )
+            END IF
+         END IF
+         IF( WANTZ )
+     $      Z( 1, 1 ) = ONE
+         RETURN
+      END IF
+*
+*     Get machine constants.
+*
+      SAFMIN = DLAMCH( 'Safe minimum' )
+      EPS = DLAMCH( 'Precision' )
+      SMLNUM = SAFMIN / EPS
+      BIGNUM = ONE / SMLNUM
+      RMIN = SQRT( SMLNUM )
+      RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) )
+*
+*     Scale matrix to allowable range, if necessary.
+*
+      ISCALE = 0
+      IF( VALEIG ) THEN
+         VLL = VL
+         VUU = VU
+      ELSE
+         VLL = ZERO
+         VUU = ZERO
+      END IF
+      TNRM = DLANST( 'M', N, D, E )
+      IF( TNRM.GT.ZERO .AND. TNRM.LT.RMIN ) THEN
+         ISCALE = 1
+         SIGMA = RMIN / TNRM
+      ELSE IF( TNRM.GT.RMAX ) THEN
+         ISCALE = 1
+         SIGMA = RMAX / TNRM
+      END IF
+      IF( ISCALE.EQ.1 ) THEN
+         CALL DSCAL( N, SIGMA, D, 1 )
+         CALL DSCAL( N-1, SIGMA, E( 1 ), 1 )
+         IF( VALEIG ) THEN
+            VLL = VL*SIGMA
+            VUU = VU*SIGMA
+         END IF
+      END IF
+*
+*     If all eigenvalues are desired and ABSTOL is less than zero, then
+*     call DSTERF or SSTEQR.  If this fails for some eigenvalue, then
+*     try DSTEBZ.
+*
+      IF( ( ALLEIG .OR. ( INDEIG .AND. IL.EQ.1 .AND. IU.EQ.N ) ) .AND.
+     $    ( ABSTOL.LE.ZERO ) ) THEN
+         CALL DCOPY( N, D, 1, W, 1 )
+         CALL DCOPY( N-1, E( 1 ), 1, WORK( 1 ), 1 )
+         INDWRK = N + 1
+         IF( .NOT.WANTZ ) THEN
+            CALL DSTERF( N, W, WORK, INFO )
+         ELSE
+            CALL DSTEQR( 'I', N, W, WORK, Z, LDZ, WORK( INDWRK ), INFO )
+            IF( INFO.EQ.0 ) THEN
+               DO 10 I = 1, N
+                  IFAIL( I ) = 0
+   10          CONTINUE
+            END IF
+         END IF
+         IF( INFO.EQ.0 ) THEN
+            M = N
+            GO TO 20
+         END IF
+         INFO = 0
+      END IF
+*
+*     Otherwise, call DSTEBZ and, if eigenvectors are desired, SSTEIN.
+*
+      IF( WANTZ ) THEN
+         ORDER = 'B'
+      ELSE
+         ORDER = 'E'
+      END IF
+      INDWRK = 1
+      INDIBL = 1
+      INDISP = INDIBL + N
+      INDIWO = INDISP + N
+      CALL DSTEBZ( RANGE, ORDER, N, VLL, VUU, IL, IU, ABSTOL, D, E, M,
+     $             NSPLIT, W, IWORK( INDIBL ), IWORK( INDISP ),
+     $             WORK( INDWRK ), IWORK( INDIWO ), INFO )
+*
+      IF( WANTZ ) THEN
+         CALL DSTEIN( N, D, E, M, W, IWORK( INDIBL ), IWORK( INDISP ),
+     $                Z, LDZ, WORK( INDWRK ), IWORK( INDIWO ), IFAIL,
+     $                INFO )
+      END IF
+*
+*     If matrix was scaled, then rescale eigenvalues appropriately.
+*
+   20 CONTINUE
+      IF( ISCALE.EQ.1 ) THEN
+         IF( INFO.EQ.0 ) THEN
+            IMAX = M
+         ELSE
+            IMAX = INFO - 1
+         END IF
+         CALL DSCAL( IMAX, ONE / SIGMA, W, 1 )
+      END IF
+*
+*     If eigenvalues are not in order, then sort them, along with
+*     eigenvectors.
+*
+      IF( WANTZ ) THEN
+         DO 40 J = 1, M - 1
+            I = 0
+            TMP1 = W( J )
+            DO 30 JJ = J + 1, M
+               IF( W( JJ ).LT.TMP1 ) THEN
+                  I = JJ
+                  TMP1 = W( JJ )
+               END IF
+   30       CONTINUE
+*
+            IF( I.NE.0 ) THEN
+               ITMP1 = IWORK( INDIBL+I-1 )
+               W( I ) = W( J )
+               IWORK( INDIBL+I-1 ) = IWORK( INDIBL+J-1 )
+               W( J ) = TMP1
+               IWORK( INDIBL+J-1 ) = ITMP1
+               CALL DSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 )
+               IF( INFO.NE.0 ) THEN
+                  ITMP1 = IFAIL( I )
+                  IFAIL( I ) = IFAIL( J )
+                  IFAIL( J ) = ITMP1
+               END IF
+            END IF
+   40    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of DSTEVX
+*
+      END
diff --git a/costa/native/external/lapack/dsycon.f b/costa/native/external/lapack/dsycon.f
new file mode 100644
index 000000000..47471312f
--- /dev/null
+++ b/costa/native/external/lapack/dsycon.f
@@ -0,0 +1,161 @@
+      SUBROUTINE DSYCON( UPLO, N, A, LDA, IPIV, ANORM, RCOND, WORK,
+     $                   IWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, N
+      DOUBLE PRECISION   ANORM, RCOND
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * ), IWORK( * )
+      DOUBLE PRECISION   A( LDA, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DSYCON estimates the reciprocal of the condition number (in the
+*  1-norm) of a real symmetric matrix A using the factorization
+*  A = U*D*U**T or A = L*D*L**T computed by DSYTRF.
+*
+*  An estimate is obtained for norm(inv(A)), and the reciprocal of the
+*  condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))).
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the details of the factorization are stored
+*          as an upper or lower triangular matrix.
+*          = 'U':  Upper triangular, form is A = U*D*U**T;
+*          = 'L':  Lower triangular, form is A = L*D*L**T.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input) DOUBLE PRECISION array, dimension (LDA,N)
+*          The block diagonal matrix D and the multipliers used to
+*          obtain the factor U or L as computed by DSYTRF.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  IPIV    (input) INTEGER array, dimension (N)
+*          Details of the interchanges and the block structure of D
+*          as determined by DSYTRF.
+*
+*  ANORM   (input) DOUBLE PRECISION
+*          The 1-norm of the original matrix A.
+*
+*  RCOND   (output) DOUBLE PRECISION
+*          The reciprocal of the condition number of the matrix A,
+*          computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an
+*          estimate of the 1-norm of inv(A) computed in this routine.
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (2*N)
+*
+*  IWORK    (workspace) INTEGER array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            I, KASE
+      DOUBLE PRECISION   AINVNM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLACON, DSYTRS, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -4
+      ELSE IF( ANORM.LT.ZERO ) THEN
+         INFO = -6
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DSYCON', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      RCOND = ZERO
+      IF( N.EQ.0 ) THEN
+         RCOND = ONE
+         RETURN
+      ELSE IF( ANORM.LE.ZERO ) THEN
+         RETURN
+      END IF
+*
+*     Check that the diagonal matrix D is nonsingular.
+*
+      IF( UPPER ) THEN
+*
+*        Upper triangular storage: examine D from bottom to top
+*
+         DO 10 I = N, 1, -1
+            IF( IPIV( I ).GT.0 .AND. A( I, I ).EQ.ZERO )
+     $         RETURN
+   10    CONTINUE
+      ELSE
+*
+*        Lower triangular storage: examine D from top to bottom.
+*
+         DO 20 I = 1, N
+            IF( IPIV( I ).GT.0 .AND. A( I, I ).EQ.ZERO )
+     $         RETURN
+   20    CONTINUE
+      END IF
+*
+*     Estimate the 1-norm of the inverse.
+*
+      KASE = 0
+   30 CONTINUE
+      CALL DLACON( N, WORK( N+1 ), WORK, IWORK, AINVNM, KASE )
+      IF( KASE.NE.0 ) THEN
+*
+*        Multiply by inv(L*D*L') or inv(U*D*U').
+*
+         CALL DSYTRS( UPLO, N, 1, A, LDA, IPIV, WORK, N, INFO )
+         GO TO 30
+      END IF
+*
+*     Compute the estimate of the reciprocal condition number.
+*
+      IF( AINVNM.NE.ZERO )
+     $   RCOND = ( ONE / AINVNM ) / ANORM
+*
+      RETURN
+*
+*     End of DSYCON
+*
+      END
diff --git a/costa/native/external/lapack/dsyev.f b/costa/native/external/lapack/dsyev.f
new file mode 100644
index 000000000..acdb440fc
--- /dev/null
+++ b/costa/native/external/lapack/dsyev.f
@@ -0,0 +1,213 @@
+      SUBROUTINE DSYEV( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBZ, UPLO
+      INTEGER            INFO, LDA, LWORK, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), W( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DSYEV computes all eigenvalues and, optionally, eigenvectors of a
+*  real symmetric matrix A.
+*
+*  Arguments
+*  =========
+*
+*  JOBZ    (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only;
+*          = 'V':  Compute eigenvalues and eigenvectors.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA, N)
+*          On entry, the symmetric matrix A.  If UPLO = 'U', the
+*          leading N-by-N upper triangular part of A contains the
+*          upper triangular part of the matrix A.  If UPLO = 'L',
+*          the leading N-by-N lower triangular part of A contains
+*          the lower triangular part of the matrix A.
+*          On exit, if JOBZ = 'V', then if INFO = 0, A contains the
+*          orthonormal eigenvectors of the matrix A.
+*          If JOBZ = 'N', then on exit the lower triangle (if UPLO='L')
+*          or the upper triangle (if UPLO='U') of A, including the
+*          diagonal, is destroyed.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  W       (output) DOUBLE PRECISION array, dimension (N)
+*          If INFO = 0, the eigenvalues in ascending order.
+*
+*  WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The length of the array WORK.  LWORK >= max(1,3*N-1).
+*          For optimal efficiency, LWORK >= (NB+2)*N,
+*          where NB is the blocksize for DSYTRD returned by ILAENV.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, the algorithm failed to converge; i
+*                off-diagonal elements of an intermediate tridiagonal
+*                form did not converge to zero.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LOWER, LQUERY, WANTZ
+      INTEGER            IINFO, IMAX, INDE, INDTAU, INDWRK, ISCALE,
+     $                   LLWORK, LOPT, LWKOPT, NB
+      DOUBLE PRECISION   ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA,
+     $                   SMLNUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      DOUBLE PRECISION   DLAMCH, DLANSY
+      EXTERNAL           LSAME, ILAENV, DLAMCH, DLANSY
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLASCL, DORGTR, DSCAL, DSTEQR, DSTERF, DSYTRD,
+     $                   XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      WANTZ = LSAME( JOBZ, 'V' )
+      LOWER = LSAME( UPLO, 'L' )
+      LQUERY = ( LWORK.EQ.-1 )
+*
+      INFO = 0
+      IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( LWORK.LT.MAX( 1, 3*N-1 ) .AND. .NOT.LQUERY ) THEN
+         INFO = -8
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         NB = ILAENV( 1, 'DSYTRD', UPLO, N, -1, -1, -1 )
+         LWKOPT = MAX( 1, ( NB+2 )*N )
+         WORK( 1 ) = LWKOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DSYEV ', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      IF( N.EQ.1 ) THEN
+         W( 1 ) = A( 1, 1 )
+         WORK( 1 ) = 3
+         IF( WANTZ )
+     $      A( 1, 1 ) = ONE
+         RETURN
+      END IF
+*
+*     Get machine constants.
+*
+      SAFMIN = DLAMCH( 'Safe minimum' )
+      EPS = DLAMCH( 'Precision' )
+      SMLNUM = SAFMIN / EPS
+      BIGNUM = ONE / SMLNUM
+      RMIN = SQRT( SMLNUM )
+      RMAX = SQRT( BIGNUM )
+*
+*     Scale matrix to allowable range, if necessary.
+*
+      ANRM = DLANSY( 'M', UPLO, N, A, LDA, WORK )
+      ISCALE = 0
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN
+         ISCALE = 1
+         SIGMA = RMIN / ANRM
+      ELSE IF( ANRM.GT.RMAX ) THEN
+         ISCALE = 1
+         SIGMA = RMAX / ANRM
+      END IF
+      IF( ISCALE.EQ.1 )
+     $   CALL DLASCL( UPLO, 0, 0, ONE, SIGMA, N, N, A, LDA, INFO )
+*
+*     Call DSYTRD to reduce symmetric matrix to tridiagonal form.
+*
+      INDE = 1
+      INDTAU = INDE + N
+      INDWRK = INDTAU + N
+      LLWORK = LWORK - INDWRK + 1
+      CALL DSYTRD( UPLO, N, A, LDA, W, WORK( INDE ), WORK( INDTAU ),
+     $             WORK( INDWRK ), LLWORK, IINFO )
+      LOPT = 2*N + WORK( INDWRK )
+*
+*     For eigenvalues only, call DSTERF.  For eigenvectors, first call
+*     DORGTR to generate the orthogonal matrix, then call DSTEQR.
+*
+      IF( .NOT.WANTZ ) THEN
+         CALL DSTERF( N, W, WORK( INDE ), INFO )
+      ELSE
+         CALL DORGTR( UPLO, N, A, LDA, WORK( INDTAU ), WORK( INDWRK ),
+     $                LLWORK, IINFO )
+         CALL DSTEQR( JOBZ, N, W, WORK( INDE ), A, LDA, WORK( INDTAU ),
+     $                INFO )
+      END IF
+*
+*     If matrix was scaled, then rescale eigenvalues appropriately.
+*
+      IF( ISCALE.EQ.1 ) THEN
+         IF( INFO.EQ.0 ) THEN
+            IMAX = N
+         ELSE
+            IMAX = INFO - 1
+         END IF
+         CALL DSCAL( IMAX, ONE / SIGMA, W, 1 )
+      END IF
+*
+*     Set WORK(1) to optimal workspace size.
+*
+      WORK( 1 ) = LWKOPT
+*
+      RETURN
+*
+*     End of DSYEV
+*
+      END
diff --git a/costa/native/external/lapack/dsyevd.f b/costa/native/external/lapack/dsyevd.f
new file mode 100644
index 000000000..62b0c5fc4
--- /dev/null
+++ b/costa/native/external/lapack/dsyevd.f
@@ -0,0 +1,265 @@
+      SUBROUTINE DSYEVD( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, IWORK,
+     $                   LIWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBZ, UPLO
+      INTEGER            INFO, LDA, LIWORK, LWORK, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      DOUBLE PRECISION   A( LDA, * ), W( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DSYEVD computes all eigenvalues and, optionally, eigenvectors of a
+*  real symmetric matrix A. If eigenvectors are desired, it uses a
+*  divide and conquer algorithm.
+*
+*  The divide and conquer algorithm makes very mild assumptions about
+*  floating point arithmetic. It will work on machines with a guard
+*  digit in add/subtract, or on those binary machines without guard
+*  digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
+*  Cray-2. It could conceivably fail on hexadecimal or decimal machines
+*  without guard digits, but we know of none.
+*
+*  Because of large use of BLAS of level 3, DSYEVD needs N**2 more
+*  workspace than DSYEVX.
+*
+*  Arguments
+*  =========
+*
+*  JOBZ    (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only;
+*          = 'V':  Compute eigenvalues and eigenvectors.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA, N)
+*          On entry, the symmetric matrix A.  If UPLO = 'U', the
+*          leading N-by-N upper triangular part of A contains the
+*          upper triangular part of the matrix A.  If UPLO = 'L',
+*          the leading N-by-N lower triangular part of A contains
+*          the lower triangular part of the matrix A.
+*          On exit, if JOBZ = 'V', then if INFO = 0, A contains the
+*          orthonormal eigenvectors of the matrix A.
+*          If JOBZ = 'N', then on exit the lower triangle (if UPLO='L')
+*          or the upper triangle (if UPLO='U') of A, including the
+*          diagonal, is destroyed.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  W       (output) DOUBLE PRECISION array, dimension (N)
+*          If INFO = 0, the eigenvalues in ascending order.
+*
+*  WORK    (workspace/output) DOUBLE PRECISION array,
+*                                         dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.
+*          If N <= 1,               LWORK must be at least 1.
+*          If JOBZ = 'N' and N > 1, LWORK must be at least 2*N+1.
+*          If JOBZ = 'V' and N > 1, LWORK must be at least
+*                                                1 + 6*N + 2*N**2.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  IWORK   (workspace/output) INTEGER array, dimension (LIWORK)
+*          On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK.
+*
+*  LIWORK  (input) INTEGER
+*          The dimension of the array IWORK.
+*          If N <= 1,                LIWORK must be at least 1.
+*          If JOBZ  = 'N' and N > 1, LIWORK must be at least 1.
+*          If JOBZ  = 'V' and N > 1, LIWORK must be at least 3 + 5*N.
+*
+*          If LIWORK = -1, then a workspace query is assumed; the
+*          routine only calculates the optimal size of the IWORK array,
+*          returns this value as the first entry of the IWORK array, and
+*          no error message related to LIWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, the algorithm failed to converge; i
+*                off-diagonal elements of an intermediate tridiagonal
+*                form did not converge to zero.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Jeff Rutter, Computer Science Division, University of California
+*     at Berkeley, USA
+*  Modified by Francoise Tisseur, University of Tennessee.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+*
+      LOGICAL            LOWER, LQUERY, WANTZ
+      INTEGER            IINFO, INDE, INDTAU, INDWK2, INDWRK, ISCALE,
+     $                   LIOPT, LIWMIN, LLWORK, LLWRK2, LOPT, LWMIN
+      DOUBLE PRECISION   ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA,
+     $                   SMLNUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DLAMCH, DLANSY
+      EXTERNAL           LSAME, DLAMCH, DLANSY
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLACPY, DLASCL, DORMTR, DSCAL, DSTEDC, DSTERF,
+     $                   DSYTRD, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      WANTZ = LSAME( JOBZ, 'V' )
+      LOWER = LSAME( UPLO, 'L' )
+      LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 )
+*
+      INFO = 0
+      IF( N.LE.1 ) THEN
+         LIWMIN = 1
+         LWMIN = 1
+         LOPT = LWMIN
+         LIOPT = LIWMIN
+      ELSE
+         IF( WANTZ ) THEN
+            LIWMIN = 3 + 5*N
+            LWMIN = 1 + 6*N + 2*N**2
+         ELSE
+            LIWMIN = 1
+            LWMIN = 2*N + 1
+         END IF
+         LOPT = LWMIN
+         LIOPT = LIWMIN
+      END IF
+      IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -8
+      ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -10
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         WORK( 1 ) = LOPT
+         IWORK( 1 ) = LIOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DSYEVD', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      IF( N.EQ.1 ) THEN
+         W( 1 ) = A( 1, 1 )
+         IF( WANTZ )
+     $      A( 1, 1 ) = ONE
+         RETURN
+      END IF
+*
+*     Get machine constants.
+*
+      SAFMIN = DLAMCH( 'Safe minimum' )
+      EPS = DLAMCH( 'Precision' )
+      SMLNUM = SAFMIN / EPS
+      BIGNUM = ONE / SMLNUM
+      RMIN = SQRT( SMLNUM )
+      RMAX = SQRT( BIGNUM )
+*
+*     Scale matrix to allowable range, if necessary.
+*
+      ANRM = DLANSY( 'M', UPLO, N, A, LDA, WORK )
+      ISCALE = 0
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN
+         ISCALE = 1
+         SIGMA = RMIN / ANRM
+      ELSE IF( ANRM.GT.RMAX ) THEN
+         ISCALE = 1
+         SIGMA = RMAX / ANRM
+      END IF
+      IF( ISCALE.EQ.1 )
+     $   CALL DLASCL( UPLO, 0, 0, ONE, SIGMA, N, N, A, LDA, INFO )
+*
+*     Call DSYTRD to reduce symmetric matrix to tridiagonal form.
+*
+      INDE = 1
+      INDTAU = INDE + N
+      INDWRK = INDTAU + N
+      LLWORK = LWORK - INDWRK + 1
+      INDWK2 = INDWRK + N*N
+      LLWRK2 = LWORK - INDWK2 + 1
+*
+      CALL DSYTRD( UPLO, N, A, LDA, W, WORK( INDE ), WORK( INDTAU ),
+     $             WORK( INDWRK ), LLWORK, IINFO )
+      LOPT = 2*N + WORK( INDWRK )
+*
+*     For eigenvalues only, call DSTERF.  For eigenvectors, first call
+*     DSTEDC to generate the eigenvector matrix, WORK(INDWRK), of the
+*     tridiagonal matrix, then call DORMTR to multiply it by the
+*     Householder transformations stored in A.
+*
+      IF( .NOT.WANTZ ) THEN
+         CALL DSTERF( N, W, WORK( INDE ), INFO )
+      ELSE
+         CALL DSTEDC( 'I', N, W, WORK( INDE ), WORK( INDWRK ), N,
+     $                WORK( INDWK2 ), LLWRK2, IWORK, LIWORK, INFO )
+         CALL DORMTR( 'L', UPLO, 'N', N, N, A, LDA, WORK( INDTAU ),
+     $                WORK( INDWRK ), N, WORK( INDWK2 ), LLWRK2, IINFO )
+         CALL DLACPY( 'A', N, N, WORK( INDWRK ), N, A, LDA )
+         LOPT = MAX( LOPT, 1+6*N+2*N**2 )
+      END IF
+*
+*     If matrix was scaled, then rescale eigenvalues appropriately.
+*
+      IF( ISCALE.EQ.1 )
+     $   CALL DSCAL( N, ONE / SIGMA, W, 1 )
+*
+      WORK( 1 ) = LOPT
+      IWORK( 1 ) = LIOPT
+*
+      RETURN
+*
+*     End of DSYEVD
+*
+      END
diff --git a/costa/native/external/lapack/dsyevr.f b/costa/native/external/lapack/dsyevr.f
new file mode 100644
index 000000000..b308aecd1
--- /dev/null
+++ b/costa/native/external/lapack/dsyevr.f
@@ -0,0 +1,497 @@
+      SUBROUTINE DSYEVR( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, IL, IU,
+     $                   ABSTOL, M, W, Z, LDZ, ISUPPZ, WORK, LWORK,
+     $                   IWORK, LIWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 20, 2000
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBZ, RANGE, UPLO
+      INTEGER            IL, INFO, IU, LDA, LDZ, LIWORK, LWORK, M, N
+      DOUBLE PRECISION   ABSTOL, VL, VU
+*     ..
+*     .. Array Arguments ..
+      INTEGER            ISUPPZ( * ), IWORK( * )
+      DOUBLE PRECISION   A( LDA, * ), W( * ), WORK( * ), Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DSYEVR computes selected eigenvalues and, optionally, eigenvectors
+*  of a real symmetric matrix T.  Eigenvalues and eigenvectors can be
+*  selected by specifying either a range of values or a range of
+*  indices for the desired eigenvalues.
+*
+*  Whenever possible, DSYEVR calls DSTEGR to compute the
+*  eigenspectrum using Relatively Robust Representations.  DSTEGR
+*  computes eigenvalues by the dqds algorithm, while orthogonal
+*  eigenvectors are computed from various "good" L D L^T representations
+*  (also known as Relatively Robust Representations). Gram-Schmidt
+*  orthogonalization is avoided as far as possible. More specifically,
+*  the various steps of the algorithm are as follows. For the i-th
+*  unreduced block of T,
+*     (a) Compute T - sigma_i = L_i D_i L_i^T, such that L_i D_i L_i^T
+*          is a relatively robust representation,
+*     (b) Compute the eigenvalues, lambda_j, of L_i D_i L_i^T to high
+*         relative accuracy by the dqds algorithm,
+*     (c) If there is a cluster of close eigenvalues, "choose" sigma_i
+*         close to the cluster, and go to step (a),
+*     (d) Given the approximate eigenvalue lambda_j of L_i D_i L_i^T,
+*         compute the corresponding eigenvector by forming a
+*         rank-revealing twisted factorization.
+*  The desired accuracy of the output can be specified by the input
+*  parameter ABSTOL.
+*
+*  For more details, see "A new O(n^2) algorithm for the symmetric
+*  tridiagonal eigenvalue/eigenvector problem", by Inderjit Dhillon,
+*  Computer Science Division Technical Report No. UCB//CSD-97-971,
+*  UC Berkeley, May 1997.
+*
+*
+*  Note 1 : DSYEVR calls DSTEGR when the full spectrum is requested
+*  on machines which conform to the ieee-754 floating point standard.
+*  DSYEVR calls DSTEBZ and SSTEIN on non-ieee machines and
+*  when partial spectrum requests are made.
+*
+*  Normal execution of DSTEGR may create NaNs and infinities and
+*  hence may abort due to a floating point exception in environments
+*  which do not handle NaNs and infinities in the ieee standard default
+*  manner.
+*
+*  Arguments
+*  =========
+*
+*  JOBZ    (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only;
+*          = 'V':  Compute eigenvalues and eigenvectors.
+*
+*  RANGE   (input) CHARACTER*1
+*          = 'A': all eigenvalues will be found.
+*          = 'V': all eigenvalues in the half-open interval (VL,VU]
+*                 will be found.
+*          = 'I': the IL-th through IU-th eigenvalues will be found.
+********** For RANGE = 'V' or 'I' and IU - IL < N - 1, DSTEBZ and
+********** DSTEIN are called
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA, N)
+*          On entry, the symmetric matrix A.  If UPLO = 'U', the
+*          leading N-by-N upper triangular part of A contains the
+*          upper triangular part of the matrix A.  If UPLO = 'L',
+*          the leading N-by-N lower triangular part of A contains
+*          the lower triangular part of the matrix A.
+*          On exit, the lower triangle (if UPLO='L') or the upper
+*          triangle (if UPLO='U') of A, including the diagonal, is
+*          destroyed.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  VL      (input) DOUBLE PRECISION
+*  VU      (input) DOUBLE PRECISION
+*          If RANGE='V', the lower and upper bounds of the interval to
+*          be searched for eigenvalues. VL < VU.
+*          Not referenced if RANGE = 'A' or 'I'.
+*
+*  IL      (input) INTEGER
+*  IU      (input) INTEGER
+*          If RANGE='I', the indices (in ascending order) of the
+*          smallest and largest eigenvalues to be returned.
+*          1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0.
+*          Not referenced if RANGE = 'A' or 'V'.
+*
+*  ABSTOL  (input) DOUBLE PRECISION
+*          The absolute error tolerance for the eigenvalues.
+*          An approximate eigenvalue is accepted as converged
+*          when it is determined to lie in an interval [a,b]
+*          of width less than or equal to
+*
+*                  ABSTOL + EPS *   max( |a|,|b| ) ,
+*
+*          where EPS is the machine precision.  If ABSTOL is less than
+*          or equal to zero, then  EPS*|T|  will be used in its place,
+*          where |T| is the 1-norm of the tridiagonal matrix obtained
+*          by reducing A to tridiagonal form.
+*
+*          See "Computing Small Singular Values of Bidiagonal Matrices
+*          with Guaranteed High Relative Accuracy," by Demmel and
+*          Kahan, LAPACK Working Note #3.
+*
+*          If high relative accuracy is important, set ABSTOL to
+*          DLAMCH( 'Safe minimum' ).  Doing so will guarantee that
+*          eigenvalues are computed to high relative accuracy when
+*          possible in future releases.  The current code does not
+*          make any guarantees about high relative accuracy, but
+*          furutre releases will. See J. Barlow and J. Demmel,
+*          "Computing Accurate Eigensystems of Scaled Diagonally
+*          Dominant Matrices", LAPACK Working Note #7, for a discussion
+*          of which matrices define their eigenvalues to high relative
+*          accuracy.
+*
+*  M       (output) INTEGER
+*          The total number of eigenvalues found.  0 <= M <= N.
+*          If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1.
+*
+*  W       (output) DOUBLE PRECISION array, dimension (N)
+*          The first M elements contain the selected eigenvalues in
+*          ascending order.
+*
+*  Z       (output) DOUBLE PRECISION array, dimension (LDZ, max(1,M))
+*          If JOBZ = 'V', then if INFO = 0, the first M columns of Z
+*          contain the orthonormal eigenvectors of the matrix A
+*          corresponding to the selected eigenvalues, with the i-th
+*          column of Z holding the eigenvector associated with W(i).
+*          If JOBZ = 'N', then Z is not referenced.
+*          Note: the user must ensure that at least max(1,M) columns are
+*          supplied in the array Z; if RANGE = 'V', the exact value of M
+*          is not known in advance and an upper bound must be used.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= 1, and if
+*          JOBZ = 'V', LDZ >= max(1,N).
+*
+*  ISUPPZ  (output) INTEGER array, dimension ( 2*max(1,M) )
+*          The support of the eigenvectors in Z, i.e., the indices
+*          indicating the nonzero elements in Z. The i-th eigenvector
+*          is nonzero only in elements ISUPPZ( 2*i-1 ) through
+*          ISUPPZ( 2*i ).
+********** Implemented only for RANGE = 'A' or 'I' and IU - IL = N - 1
+*
+*  WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.  LWORK >= max(1,26*N).
+*          For optimal efficiency, LWORK >= (NB+6)*N,
+*          where NB is the max of the blocksize for DSYTRD and DORMTR
+*          returned by ILAENV.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  IWORK   (workspace/output) INTEGER array, dimension (LIWORK)
+*          On exit, if INFO = 0, IWORK(1) returns the optimal LWORK.
+*
+*  LIWORK  (input) INTEGER
+*          The dimension of the array IWORK.  LIWORK >= max(1,10*N).
+*
+*          If LIWORK = -1, then a workspace query is assumed; the
+*          routine only calculates the optimal size of the IWORK array,
+*          returns this value as the first entry of the IWORK array, and
+*          no error message related to LIWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  Internal error
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Inderjit Dhillon, IBM Almaden, USA
+*     Osni Marques, LBNL/NERSC, USA
+*     Ken Stanley, Computer Science Division, University of
+*       California at Berkeley, USA
+*
+* =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            ALLEIG, INDEIG, LOWER, LQUERY, VALEIG, WANTZ
+      CHARACTER          ORDER
+      INTEGER            I, IEEEOK, IINFO, IMAX, INDD, INDDD, INDE,
+     $                   INDEE, INDIBL, INDIFL, INDISP, INDIWO, INDTAU,
+     $                   INDWK, INDWKN, ISCALE, ITMP1, J, JJ, LIWMIN,
+     $                   LLWORK, LLWRKN, LWKOPT, LWMIN, NB, NSPLIT
+      DOUBLE PRECISION   ABSTLL, ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN,
+     $                   SIGMA, SMLNUM, TMP1, VLL, VUU
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      DOUBLE PRECISION   DLAMCH, DLANSY
+      EXTERNAL           LSAME, ILAENV, DLAMCH, DLANSY
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DCOPY, DORMTR, DSCAL, DSTEBZ, DSTEGR, DSTEIN,
+     $                   DSTERF, DSWAP, DSYTRD, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      IEEEOK = ILAENV( 10, 'DSYEVR', 'N', 1, 2, 3, 4 )
+*
+      LOWER = LSAME( UPLO, 'L' )
+      WANTZ = LSAME( JOBZ, 'V' )
+      ALLEIG = LSAME( RANGE, 'A' )
+      VALEIG = LSAME( RANGE, 'V' )
+      INDEIG = LSAME( RANGE, 'I' )
+*
+      LQUERY = ( ( LWORK.EQ.-1 ) .OR. ( LIWORK.EQ.-1 ) )
+*
+      LWMIN = MAX( 1, 26*N )
+      LIWMIN = MAX( 1, 10*N )
+*
+      INFO = 0
+      IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -6
+      ELSE
+         IF( VALEIG ) THEN
+            IF( N.GT.0 .AND. VU.LE.VL )
+     $         INFO = -8
+         ELSE IF( INDEIG ) THEN
+            IF( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) THEN
+               INFO = -9
+            ELSE IF( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN
+               INFO = -10
+            END IF
+         END IF
+      END IF
+      IF( INFO.EQ.0 ) THEN
+         IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN
+            INFO = -15
+         ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN
+            INFO = -18
+         ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN
+            INFO = -20
+         END IF
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         NB = ILAENV( 1, 'ZHETRD', UPLO, N, -1, -1, -1 )
+         NB = MAX( NB, ILAENV( 1, 'ZUNMTR', UPLO, N, -1, -1, -1 ) )
+         LWKOPT = MAX( ( NB+1 )*N, LWMIN )
+         WORK( 1 ) = LWKOPT
+         IWORK( 1 ) = LIWMIN
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DSYEVR', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      M = 0
+      IF( N.EQ.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      IF( N.EQ.1 ) THEN
+         WORK( 1 ) = 7
+         IF( ALLEIG .OR. INDEIG ) THEN
+            M = 1
+            W( 1 ) = A( 1, 1 )
+         ELSE
+            IF( VL.LT.A( 1, 1 ) .AND. VU.GE.A( 1, 1 ) ) THEN
+               M = 1
+               W( 1 ) = A( 1, 1 )
+            END IF
+         END IF
+         IF( WANTZ )
+     $      Z( 1, 1 ) = ONE
+         RETURN
+      END IF
+*
+*     Get machine constants.
+*
+      SAFMIN = DLAMCH( 'Safe minimum' )
+      EPS = DLAMCH( 'Precision' )
+      SMLNUM = SAFMIN / EPS
+      BIGNUM = ONE / SMLNUM
+      RMIN = SQRT( SMLNUM )
+      RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) )
+*
+*     Scale matrix to allowable range, if necessary.
+*
+      ISCALE = 0
+      ABSTLL = ABSTOL
+      VLL = VL
+      VUU = VU
+      ANRM = DLANSY( 'M', UPLO, N, A, LDA, WORK )
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN
+         ISCALE = 1
+         SIGMA = RMIN / ANRM
+      ELSE IF( ANRM.GT.RMAX ) THEN
+         ISCALE = 1
+         SIGMA = RMAX / ANRM
+      END IF
+      IF( ISCALE.EQ.1 ) THEN
+         IF( LOWER ) THEN
+            DO 10 J = 1, N
+               CALL DSCAL( N-J+1, SIGMA, A( J, J ), 1 )
+   10       CONTINUE
+         ELSE
+            DO 20 J = 1, N
+               CALL DSCAL( J, SIGMA, A( 1, J ), 1 )
+   20       CONTINUE
+         END IF
+         IF( ABSTOL.GT.0 )
+     $      ABSTLL = ABSTOL*SIGMA
+         IF( VALEIG ) THEN
+            VLL = VL*SIGMA
+            VUU = VU*SIGMA
+         END IF
+      END IF
+*
+*     Call DSYTRD to reduce symmetric matrix to tridiagonal form.
+*
+      INDTAU = 1
+      INDE = INDTAU + N
+      INDD = INDE + N
+      INDEE = INDD + N
+      INDDD = INDEE + N
+      INDIFL = INDDD + N
+      INDWK = INDIFL + N
+      LLWORK = LWORK - INDWK + 1
+      CALL DSYTRD( UPLO, N, A, LDA, WORK( INDD ), WORK( INDE ),
+     $             WORK( INDTAU ), WORK( INDWK ), LLWORK, IINFO )
+*
+*     If all eigenvalues are desired
+*     then call DSTERF or SSTEGR and DORMTR.
+*
+      IF( ( ALLEIG .OR. ( INDEIG .AND. IL.EQ.1 .AND. IU.EQ.N ) ) .AND.
+     $    IEEEOK.EQ.1 ) THEN
+         IF( .NOT.WANTZ ) THEN
+            CALL DCOPY( N, WORK( INDD ), 1, W, 1 )
+            CALL DCOPY( N-1, WORK( INDE ), 1, WORK( INDEE ), 1 )
+            CALL DSTERF( N, W, WORK( INDEE ), INFO )
+         ELSE
+            CALL DCOPY( N-1, WORK( INDE ), 1, WORK( INDEE ), 1 )
+            CALL DCOPY( N, WORK( INDD ), 1, WORK( INDDD ), 1 )
+*
+            CALL DSTEGR( JOBZ, 'A', N, WORK( INDDD ), WORK( INDEE ),
+     $                   VL, VU, IL, IU, ABSTOL, M, W, Z, LDZ, ISUPPZ,
+     $                   WORK( INDWK ), LWORK, IWORK, LIWORK, INFO )
+*
+*
+*
+*        Apply orthogonal matrix used in reduction to tridiagonal
+*        form to eigenvectors returned by DSTEIN.
+*
+            IF( WANTZ .AND. INFO.EQ.0 ) THEN
+               INDWKN = INDE
+               LLWRKN = LWORK - INDWKN + 1
+               CALL DORMTR( 'L', UPLO, 'N', N, M, A, LDA,
+     $                      WORK( INDTAU ), Z, LDZ, WORK( INDWKN ),
+     $                      LLWRKN, IINFO )
+            END IF
+         END IF
+*
+*
+         IF( INFO.EQ.0 ) THEN
+            M = N
+            GO TO 30
+         END IF
+         INFO = 0
+      END IF
+*
+*     Otherwise, call DSTEBZ and, if eigenvectors are desired, SSTEIN.
+*     Also call DSTEBZ and SSTEIN if SSTEGR fails.
+*
+      IF( WANTZ ) THEN
+         ORDER = 'B'
+      ELSE
+         ORDER = 'E'
+      END IF
+      INDIFL = 1
+      INDIBL = INDIFL + N
+      INDISP = INDIBL + N
+      INDIWO = INDISP + N
+      CALL DSTEBZ( RANGE, ORDER, N, VLL, VUU, IL, IU, ABSTLL,
+     $             WORK( INDD ), WORK( INDE ), M, NSPLIT, W,
+     $             IWORK( INDIBL ), IWORK( INDISP ), WORK( INDWK ),
+     $             IWORK( INDIWO ), INFO )
+*
+      IF( WANTZ ) THEN
+         CALL DSTEIN( N, WORK( INDD ), WORK( INDE ), M, W,
+     $                IWORK( INDIBL ), IWORK( INDISP ), Z, LDZ,
+     $                WORK( INDWK ), IWORK( INDIWO ), IWORK( INDIFL ),
+     $                INFO )
+*
+*        Apply orthogonal matrix used in reduction to tridiagonal
+*        form to eigenvectors returned by DSTEIN.
+*
+         INDWKN = INDE
+         LLWRKN = LWORK - INDWKN + 1
+         CALL DORMTR( 'L', UPLO, 'N', N, M, A, LDA, WORK( INDTAU ), Z,
+     $                LDZ, WORK( INDWKN ), LLWRKN, IINFO )
+      END IF
+*
+*     If matrix was scaled, then rescale eigenvalues appropriately.
+*
+   30 CONTINUE
+      IF( ISCALE.EQ.1 ) THEN
+         IF( INFO.EQ.0 ) THEN
+            IMAX = M
+         ELSE
+            IMAX = INFO - 1
+         END IF
+         CALL DSCAL( IMAX, ONE / SIGMA, W, 1 )
+      END IF
+*
+*     If eigenvalues are not in order, then sort them, along with
+*     eigenvectors.
+*
+      IF( WANTZ ) THEN
+         DO 50 J = 1, M - 1
+            I = 0
+            TMP1 = W( J )
+            DO 40 JJ = J + 1, M
+               IF( W( JJ ).LT.TMP1 ) THEN
+                  I = JJ
+                  TMP1 = W( JJ )
+               END IF
+   40       CONTINUE
+*
+            IF( I.NE.0 ) THEN
+               ITMP1 = IWORK( INDIBL+I-1 )
+               W( I ) = W( J )
+               IWORK( INDIBL+I-1 ) = IWORK( INDIBL+J-1 )
+               W( J ) = TMP1
+               IWORK( INDIBL+J-1 ) = ITMP1
+               CALL DSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 )
+            END IF
+   50    CONTINUE
+      END IF
+*
+*     Set WORK(1) to optimal workspace size.
+*
+      WORK( 1 ) = LWKOPT
+      IWORK( 1 ) = LIWMIN
+*
+      RETURN
+*
+*     End of DSYEVR
+*
+      END
diff --git a/costa/native/external/lapack/dsyevx.f b/costa/native/external/lapack/dsyevx.f
new file mode 100644
index 000000000..e8fea917a
--- /dev/null
+++ b/costa/native/external/lapack/dsyevx.f
@@ -0,0 +1,421 @@
+      SUBROUTINE DSYEVX( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, IL, IU,
+     $                   ABSTOL, M, W, Z, LDZ, WORK, LWORK, IWORK,
+     $                   IFAIL, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBZ, RANGE, UPLO
+      INTEGER            IL, INFO, IU, LDA, LDZ, LWORK, M, N
+      DOUBLE PRECISION   ABSTOL, VL, VU
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IFAIL( * ), IWORK( * )
+      DOUBLE PRECISION   A( LDA, * ), W( * ), WORK( * ), Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DSYEVX computes selected eigenvalues and, optionally, eigenvectors
+*  of a real symmetric matrix A.  Eigenvalues and eigenvectors can be
+*  selected by specifying either a range of values or a range of indices
+*  for the desired eigenvalues.
+*
+*  Arguments
+*  =========
+*
+*  JOBZ    (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only;
+*          = 'V':  Compute eigenvalues and eigenvectors.
+*
+*  RANGE   (input) CHARACTER*1
+*          = 'A': all eigenvalues will be found.
+*          = 'V': all eigenvalues in the half-open interval (VL,VU]
+*                 will be found.
+*          = 'I': the IL-th through IU-th eigenvalues will be found.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA, N)
+*          On entry, the symmetric matrix A.  If UPLO = 'U', the
+*          leading N-by-N upper triangular part of A contains the
+*          upper triangular part of the matrix A.  If UPLO = 'L',
+*          the leading N-by-N lower triangular part of A contains
+*          the lower triangular part of the matrix A.
+*          On exit, the lower triangle (if UPLO='L') or the upper
+*          triangle (if UPLO='U') of A, including the diagonal, is
+*          destroyed.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  VL      (input) DOUBLE PRECISION
+*  VU      (input) DOUBLE PRECISION
+*          If RANGE='V', the lower and upper bounds of the interval to
+*          be searched for eigenvalues. VL < VU.
+*          Not referenced if RANGE = 'A' or 'I'.
+*
+*  IL      (input) INTEGER
+*  IU      (input) INTEGER
+*          If RANGE='I', the indices (in ascending order) of the
+*          smallest and largest eigenvalues to be returned.
+*          1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0.
+*          Not referenced if RANGE = 'A' or 'V'.
+*
+*  ABSTOL  (input) DOUBLE PRECISION
+*          The absolute error tolerance for the eigenvalues.
+*          An approximate eigenvalue is accepted as converged
+*          when it is determined to lie in an interval [a,b]
+*          of width less than or equal to
+*
+*                  ABSTOL + EPS *   max( |a|,|b| ) ,
+*
+*          where EPS is the machine precision.  If ABSTOL is less than
+*          or equal to zero, then  EPS*|T|  will be used in its place,
+*          where |T| is the 1-norm of the tridiagonal matrix obtained
+*          by reducing A to tridiagonal form.
+*
+*          Eigenvalues will be computed most accurately when ABSTOL is
+*          set to twice the underflow threshold 2*DLAMCH('S'), not zero.
+*          If this routine returns with INFO>0, indicating that some
+*          eigenvectors did not converge, try setting ABSTOL to
+*          2*DLAMCH('S').
+*
+*          See "Computing Small Singular Values of Bidiagonal Matrices
+*          with Guaranteed High Relative Accuracy," by Demmel and
+*          Kahan, LAPACK Working Note #3.
+*
+*  M       (output) INTEGER
+*          The total number of eigenvalues found.  0 <= M <= N.
+*          If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1.
+*
+*  W       (output) DOUBLE PRECISION array, dimension (N)
+*          On normal exit, the first M elements contain the selected
+*          eigenvalues in ascending order.
+*
+*  Z       (output) DOUBLE PRECISION array, dimension (LDZ, max(1,M))
+*          If JOBZ = 'V', then if INFO = 0, the first M columns of Z
+*          contain the orthonormal eigenvectors of the matrix A
+*          corresponding to the selected eigenvalues, with the i-th
+*          column of Z holding the eigenvector associated with W(i).
+*          If an eigenvector fails to converge, then that column of Z
+*          contains the latest approximation to the eigenvector, and the
+*          index of the eigenvector is returned in IFAIL.
+*          If JOBZ = 'N', then Z is not referenced.
+*          Note: the user must ensure that at least max(1,M) columns are
+*          supplied in the array Z; if RANGE = 'V', the exact value of M
+*          is not known in advance and an upper bound must be used.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= 1, and if
+*          JOBZ = 'V', LDZ >= max(1,N).
+*
+*  WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The length of the array WORK.  LWORK >= max(1,8*N).
+*          For optimal efficiency, LWORK >= (NB+3)*N,
+*          where NB is the max of the blocksize for DSYTRD and DORMTR
+*          returned by ILAENV.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  IWORK   (workspace) INTEGER array, dimension (5*N)
+*
+*  IFAIL   (output) INTEGER array, dimension (N)
+*          If JOBZ = 'V', then if INFO = 0, the first M elements of
+*          IFAIL are zero.  If INFO > 0, then IFAIL contains the
+*          indices of the eigenvectors that failed to converge.
+*          If JOBZ = 'N', then IFAIL is not referenced.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, then i eigenvectors failed to converge.
+*                Their indices are stored in array IFAIL.
+*
+* =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            ALLEIG, INDEIG, LOWER, LQUERY, VALEIG, WANTZ
+      CHARACTER          ORDER
+      INTEGER            I, IINFO, IMAX, INDD, INDE, INDEE, INDIBL,
+     $                   INDISP, INDIWO, INDTAU, INDWKN, INDWRK, ISCALE,
+     $                   ITMP1, J, JJ, LLWORK, LLWRKN, LOPT, LWKOPT, NB,
+     $                   NSPLIT
+      DOUBLE PRECISION   ABSTLL, ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN,
+     $                   SIGMA, SMLNUM, TMP1, VLL, VUU
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      DOUBLE PRECISION   DLAMCH, DLANSY
+      EXTERNAL           LSAME, ILAENV, DLAMCH, DLANSY
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DCOPY, DLACPY, DORGTR, DORMTR, DSCAL, DSTEBZ,
+     $                   DSTEIN, DSTEQR, DSTERF, DSWAP, DSYTRD, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      LOWER = LSAME( UPLO, 'L' )
+      WANTZ = LSAME( JOBZ, 'V' )
+      ALLEIG = LSAME( RANGE, 'A' )
+      VALEIG = LSAME( RANGE, 'V' )
+      INDEIG = LSAME( RANGE, 'I' )
+      LQUERY = ( LWORK.EQ.-1 )
+*
+      INFO = 0
+      IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -6
+      ELSE
+         IF( VALEIG ) THEN
+            IF( N.GT.0 .AND. VU.LE.VL )
+     $         INFO = -8
+         ELSE IF( INDEIG ) THEN
+            IF( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) THEN
+               INFO = -9
+            ELSE IF( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN
+               INFO = -10
+            END IF
+         END IF
+      END IF
+      IF( INFO.EQ.0 ) THEN
+         IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN
+            INFO = -15
+         ELSE IF( LWORK.LT.MAX( 1, 8*N ) .AND. .NOT.LQUERY ) THEN
+            INFO = -17
+         END IF
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         NB = ILAENV( 1, 'DSYTRD', UPLO, N, -1, -1, -1 )
+         NB = MAX( NB, ILAENV( 1, 'DORMTR', UPLO, N, -1, -1, -1 ) )
+         LWKOPT = ( NB+3 )*N
+         WORK( 1 ) = LWKOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DSYEVX', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      M = 0
+      IF( N.EQ.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      IF( N.EQ.1 ) THEN
+         WORK( 1 ) = 7
+         IF( ALLEIG .OR. INDEIG ) THEN
+            M = 1
+            W( 1 ) = A( 1, 1 )
+         ELSE
+            IF( VL.LT.A( 1, 1 ) .AND. VU.GE.A( 1, 1 ) ) THEN
+               M = 1
+               W( 1 ) = A( 1, 1 )
+            END IF
+         END IF
+         IF( WANTZ )
+     $      Z( 1, 1 ) = ONE
+         RETURN
+      END IF
+*
+*     Get machine constants.
+*
+      SAFMIN = DLAMCH( 'Safe minimum' )
+      EPS = DLAMCH( 'Precision' )
+      SMLNUM = SAFMIN / EPS
+      BIGNUM = ONE / SMLNUM
+      RMIN = SQRT( SMLNUM )
+      RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) )
+*
+*     Scale matrix to allowable range, if necessary.
+*
+      ISCALE = 0
+      ABSTLL = ABSTOL
+      VLL = VL
+      VUU = VU
+      ANRM = DLANSY( 'M', UPLO, N, A, LDA, WORK )
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN
+         ISCALE = 1
+         SIGMA = RMIN / ANRM
+      ELSE IF( ANRM.GT.RMAX ) THEN
+         ISCALE = 1
+         SIGMA = RMAX / ANRM
+      END IF
+      IF( ISCALE.EQ.1 ) THEN
+         IF( LOWER ) THEN
+            DO 10 J = 1, N
+               CALL DSCAL( N-J+1, SIGMA, A( J, J ), 1 )
+   10       CONTINUE
+         ELSE
+            DO 20 J = 1, N
+               CALL DSCAL( J, SIGMA, A( 1, J ), 1 )
+   20       CONTINUE
+         END IF
+         IF( ABSTOL.GT.0 )
+     $      ABSTLL = ABSTOL*SIGMA
+         IF( VALEIG ) THEN
+            VLL = VL*SIGMA
+            VUU = VU*SIGMA
+         END IF
+      END IF
+*
+*     Call DSYTRD to reduce symmetric matrix to tridiagonal form.
+*
+      INDTAU = 1
+      INDE = INDTAU + N
+      INDD = INDE + N
+      INDWRK = INDD + N
+      LLWORK = LWORK - INDWRK + 1
+      CALL DSYTRD( UPLO, N, A, LDA, WORK( INDD ), WORK( INDE ),
+     $             WORK( INDTAU ), WORK( INDWRK ), LLWORK, IINFO )
+      LOPT = 3*N + WORK( INDWRK )
+*
+*     If all eigenvalues are desired and ABSTOL is less than or equal to
+*     zero, then call DSTERF or DORGTR and SSTEQR.  If this fails for
+*     some eigenvalue, then try DSTEBZ.
+*
+      IF( ( ALLEIG .OR. ( INDEIG .AND. IL.EQ.1 .AND. IU.EQ.N ) ) .AND.
+     $    ( ABSTOL.LE.ZERO ) ) THEN
+         CALL DCOPY( N, WORK( INDD ), 1, W, 1 )
+         INDEE = INDWRK + 2*N
+         IF( .NOT.WANTZ ) THEN
+            CALL DCOPY( N-1, WORK( INDE ), 1, WORK( INDEE ), 1 )
+            CALL DSTERF( N, W, WORK( INDEE ), INFO )
+         ELSE
+            CALL DLACPY( 'A', N, N, A, LDA, Z, LDZ )
+            CALL DORGTR( UPLO, N, Z, LDZ, WORK( INDTAU ),
+     $                   WORK( INDWRK ), LLWORK, IINFO )
+            CALL DCOPY( N-1, WORK( INDE ), 1, WORK( INDEE ), 1 )
+            CALL DSTEQR( JOBZ, N, W, WORK( INDEE ), Z, LDZ,
+     $                   WORK( INDWRK ), INFO )
+            IF( INFO.EQ.0 ) THEN
+               DO 30 I = 1, N
+                  IFAIL( I ) = 0
+   30          CONTINUE
+            END IF
+         END IF
+         IF( INFO.EQ.0 ) THEN
+            M = N
+            GO TO 40
+         END IF
+         INFO = 0
+      END IF
+*
+*     Otherwise, call DSTEBZ and, if eigenvectors are desired, SSTEIN.
+*
+      IF( WANTZ ) THEN
+         ORDER = 'B'
+      ELSE
+         ORDER = 'E'
+      END IF
+      INDIBL = 1
+      INDISP = INDIBL + N
+      INDIWO = INDISP + N
+      CALL DSTEBZ( RANGE, ORDER, N, VLL, VUU, IL, IU, ABSTLL,
+     $             WORK( INDD ), WORK( INDE ), M, NSPLIT, W,
+     $             IWORK( INDIBL ), IWORK( INDISP ), WORK( INDWRK ),
+     $             IWORK( INDIWO ), INFO )
+*
+      IF( WANTZ ) THEN
+         CALL DSTEIN( N, WORK( INDD ), WORK( INDE ), M, W,
+     $                IWORK( INDIBL ), IWORK( INDISP ), Z, LDZ,
+     $                WORK( INDWRK ), IWORK( INDIWO ), IFAIL, INFO )
+*
+*        Apply orthogonal matrix used in reduction to tridiagonal
+*        form to eigenvectors returned by DSTEIN.
+*
+         INDWKN = INDE
+         LLWRKN = LWORK - INDWKN + 1
+         CALL DORMTR( 'L', UPLO, 'N', N, M, A, LDA, WORK( INDTAU ), Z,
+     $                LDZ, WORK( INDWKN ), LLWRKN, IINFO )
+      END IF
+*
+*     If matrix was scaled, then rescale eigenvalues appropriately.
+*
+   40 CONTINUE
+      IF( ISCALE.EQ.1 ) THEN
+         IF( INFO.EQ.0 ) THEN
+            IMAX = M
+         ELSE
+            IMAX = INFO - 1
+         END IF
+         CALL DSCAL( IMAX, ONE / SIGMA, W, 1 )
+      END IF
+*
+*     If eigenvalues are not in order, then sort them, along with
+*     eigenvectors.
+*
+      IF( WANTZ ) THEN
+         DO 60 J = 1, M - 1
+            I = 0
+            TMP1 = W( J )
+            DO 50 JJ = J + 1, M
+               IF( W( JJ ).LT.TMP1 ) THEN
+                  I = JJ
+                  TMP1 = W( JJ )
+               END IF
+   50       CONTINUE
+*
+            IF( I.NE.0 ) THEN
+               ITMP1 = IWORK( INDIBL+I-1 )
+               W( I ) = W( J )
+               IWORK( INDIBL+I-1 ) = IWORK( INDIBL+J-1 )
+               W( J ) = TMP1
+               IWORK( INDIBL+J-1 ) = ITMP1
+               CALL DSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 )
+               IF( INFO.NE.0 ) THEN
+                  ITMP1 = IFAIL( I )
+                  IFAIL( I ) = IFAIL( J )
+                  IFAIL( J ) = ITMP1
+               END IF
+            END IF
+   60    CONTINUE
+      END IF
+*
+*     Set WORK(1) to optimal workspace size.
+*
+      WORK( 1 ) = LWKOPT
+*
+      RETURN
+*
+*     End of DSYEVX
+*
+      END
diff --git a/costa/native/external/lapack/dsygs2.f b/costa/native/external/lapack/dsygs2.f
new file mode 100644
index 000000000..7120376c7
--- /dev/null
+++ b/costa/native/external/lapack/dsygs2.f
@@ -0,0 +1,212 @@
+      SUBROUTINE DSYGS2( ITYPE, UPLO, N, A, LDA, B, LDB, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, ITYPE, LDA, LDB, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DSYGS2 reduces a real symmetric-definite generalized eigenproblem
+*  to standard form.
+*
+*  If ITYPE = 1, the problem is A*x = lambda*B*x,
+*  and A is overwritten by inv(U')*A*inv(U) or inv(L)*A*inv(L')
+*
+*  If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or
+*  B*A*x = lambda*x, and A is overwritten by U*A*U` or L'*A*L.
+*
+*  B must have been previously factorized as U'*U or L*L' by DPOTRF.
+*
+*  Arguments
+*  =========
+*
+*  ITYPE   (input) INTEGER
+*          = 1: compute inv(U')*A*inv(U) or inv(L)*A*inv(L');
+*          = 2 or 3: compute U*A*U' or L'*A*L.
+*
+*  UPLO    (input) CHARACTER
+*          Specifies whether the upper or lower triangular part of the
+*          symmetric matrix A is stored, and how B has been factorized.
+*          = 'U':  Upper triangular
+*          = 'L':  Lower triangular
+*
+*  N       (input) INTEGER
+*          The order of the matrices A and B.  N >= 0.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+*          On entry, the symmetric matrix A.  If UPLO = 'U', the leading
+*          n by n upper triangular part of A contains the upper
+*          triangular part of the matrix A, and the strictly lower
+*          triangular part of A is not referenced.  If UPLO = 'L', the
+*          leading n by n lower triangular part of A contains the lower
+*          triangular part of the matrix A, and the strictly upper
+*          triangular part of A is not referenced.
+*
+*          On exit, if INFO = 0, the transformed matrix, stored in the
+*          same format as A.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  B       (input) DOUBLE PRECISION array, dimension (LDB,N)
+*          The triangular factor from the Cholesky factorization of B,
+*          as returned by DPOTRF.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, HALF
+      PARAMETER          ( ONE = 1.0D0, HALF = 0.5D0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            K
+      DOUBLE PRECISION   AKK, BKK, CT
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DAXPY, DSCAL, DSYR2, DTRMV, DTRSV, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( ITYPE.LT.1 .OR. ITYPE.GT.3 ) THEN
+         INFO = -1
+      ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DSYGS2', -INFO )
+         RETURN
+      END IF
+*
+      IF( ITYPE.EQ.1 ) THEN
+         IF( UPPER ) THEN
+*
+*           Compute inv(U')*A*inv(U)
+*
+            DO 10 K = 1, N
+*
+*              Update the upper triangle of A(k:n,k:n)
+*
+               AKK = A( K, K )
+               BKK = B( K, K )
+               AKK = AKK / BKK**2
+               A( K, K ) = AKK
+               IF( K.LT.N ) THEN
+                  CALL DSCAL( N-K, ONE / BKK, A( K, K+1 ), LDA )
+                  CT = -HALF*AKK
+                  CALL DAXPY( N-K, CT, B( K, K+1 ), LDB, A( K, K+1 ),
+     $                        LDA )
+                  CALL DSYR2( UPLO, N-K, -ONE, A( K, K+1 ), LDA,
+     $                        B( K, K+1 ), LDB, A( K+1, K+1 ), LDA )
+                  CALL DAXPY( N-K, CT, B( K, K+1 ), LDB, A( K, K+1 ),
+     $                        LDA )
+                  CALL DTRSV( UPLO, 'Transpose', 'Non-unit', N-K,
+     $                        B( K+1, K+1 ), LDB, A( K, K+1 ), LDA )
+               END IF
+   10       CONTINUE
+         ELSE
+*
+*           Compute inv(L)*A*inv(L')
+*
+            DO 20 K = 1, N
+*
+*              Update the lower triangle of A(k:n,k:n)
+*
+               AKK = A( K, K )
+               BKK = B( K, K )
+               AKK = AKK / BKK**2
+               A( K, K ) = AKK
+               IF( K.LT.N ) THEN
+                  CALL DSCAL( N-K, ONE / BKK, A( K+1, K ), 1 )
+                  CT = -HALF*AKK
+                  CALL DAXPY( N-K, CT, B( K+1, K ), 1, A( K+1, K ), 1 )
+                  CALL DSYR2( UPLO, N-K, -ONE, A( K+1, K ), 1,
+     $                        B( K+1, K ), 1, A( K+1, K+1 ), LDA )
+                  CALL DAXPY( N-K, CT, B( K+1, K ), 1, A( K+1, K ), 1 )
+                  CALL DTRSV( UPLO, 'No transpose', 'Non-unit', N-K,
+     $                        B( K+1, K+1 ), LDB, A( K+1, K ), 1 )
+               END IF
+   20       CONTINUE
+         END IF
+      ELSE
+         IF( UPPER ) THEN
+*
+*           Compute U*A*U'
+*
+            DO 30 K = 1, N
+*
+*              Update the upper triangle of A(1:k,1:k)
+*
+               AKK = A( K, K )
+               BKK = B( K, K )
+               CALL DTRMV( UPLO, 'No transpose', 'Non-unit', K-1, B,
+     $                     LDB, A( 1, K ), 1 )
+               CT = HALF*AKK
+               CALL DAXPY( K-1, CT, B( 1, K ), 1, A( 1, K ), 1 )
+               CALL DSYR2( UPLO, K-1, ONE, A( 1, K ), 1, B( 1, K ), 1,
+     $                     A, LDA )
+               CALL DAXPY( K-1, CT, B( 1, K ), 1, A( 1, K ), 1 )
+               CALL DSCAL( K-1, BKK, A( 1, K ), 1 )
+               A( K, K ) = AKK*BKK**2
+   30       CONTINUE
+         ELSE
+*
+*           Compute L'*A*L
+*
+            DO 40 K = 1, N
+*
+*              Update the lower triangle of A(1:k,1:k)
+*
+               AKK = A( K, K )
+               BKK = B( K, K )
+               CALL DTRMV( UPLO, 'Transpose', 'Non-unit', K-1, B, LDB,
+     $                     A( K, 1 ), LDA )
+               CT = HALF*AKK
+               CALL DAXPY( K-1, CT, B( K, 1 ), LDB, A( K, 1 ), LDA )
+               CALL DSYR2( UPLO, K-1, ONE, A( K, 1 ), LDA, B( K, 1 ),
+     $                     LDB, A, LDA )
+               CALL DAXPY( K-1, CT, B( K, 1 ), LDB, A( K, 1 ), LDA )
+               CALL DSCAL( K-1, BKK, A( K, 1 ), LDA )
+               A( K, K ) = AKK*BKK**2
+   40       CONTINUE
+         END IF
+      END IF
+      RETURN
+*
+*     End of DSYGS2
+*
+      END
diff --git a/costa/native/external/lapack/dsygst.f b/costa/native/external/lapack/dsygst.f
new file mode 100644
index 000000000..7a42cd71b
--- /dev/null
+++ b/costa/native/external/lapack/dsygst.f
@@ -0,0 +1,250 @@
+      SUBROUTINE DSYGST( ITYPE, UPLO, N, A, LDA, B, LDB, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, ITYPE, LDA, LDB, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DSYGST reduces a real symmetric-definite generalized eigenproblem
+*  to standard form.
+*
+*  If ITYPE = 1, the problem is A*x = lambda*B*x,
+*  and A is overwritten by inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T)
+*
+*  If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or
+*  B*A*x = lambda*x, and A is overwritten by U*A*U**T or L**T*A*L.
+*
+*  B must have been previously factorized as U**T*U or L*L**T by DPOTRF.
+*
+*  Arguments
+*  =========
+*
+*  ITYPE   (input) INTEGER
+*          = 1: compute inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T);
+*          = 2 or 3: compute U*A*U**T or L**T*A*L.
+*
+*  UPLO    (input) CHARACTER
+*          = 'U':  Upper triangle of A is stored and B is factored as
+*                  U**T*U;
+*          = 'L':  Lower triangle of A is stored and B is factored as
+*                  L*L**T.
+*
+*  N       (input) INTEGER
+*          The order of the matrices A and B.  N >= 0.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+*          On entry, the symmetric matrix A.  If UPLO = 'U', the leading
+*          N-by-N upper triangular part of A contains the upper
+*          triangular part of the matrix A, and the strictly lower
+*          triangular part of A is not referenced.  If UPLO = 'L', the
+*          leading N-by-N lower triangular part of A contains the lower
+*          triangular part of the matrix A, and the strictly upper
+*          triangular part of A is not referenced.
+*
+*          On exit, if INFO = 0, the transformed matrix, stored in the
+*          same format as A.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  B       (input) DOUBLE PRECISION array, dimension (LDB,N)
+*          The triangular factor from the Cholesky factorization of B,
+*          as returned by DPOTRF.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, HALF
+      PARAMETER          ( ONE = 1.0D0, HALF = 0.5D0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            K, KB, NB
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DSYGS2, DSYMM, DSYR2K, DTRMM, DTRSM, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           LSAME, ILAENV
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( ITYPE.LT.1 .OR. ITYPE.GT.3 ) THEN
+         INFO = -1
+      ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DSYGST', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Determine the block size for this environment.
+*
+      NB = ILAENV( 1, 'DSYGST', UPLO, N, -1, -1, -1 )
+*
+      IF( NB.LE.1 .OR. NB.GE.N ) THEN
+*
+*        Use unblocked code
+*
+         CALL DSYGS2( ITYPE, UPLO, N, A, LDA, B, LDB, INFO )
+      ELSE
+*
+*        Use blocked code
+*
+         IF( ITYPE.EQ.1 ) THEN
+            IF( UPPER ) THEN
+*
+*              Compute inv(U')*A*inv(U)
+*
+               DO 10 K = 1, N, NB
+                  KB = MIN( N-K+1, NB )
+*
+*                 Update the upper triangle of A(k:n,k:n)
+*
+                  CALL DSYGS2( ITYPE, UPLO, KB, A( K, K ), LDA,
+     $                         B( K, K ), LDB, INFO )
+                  IF( K+KB.LE.N ) THEN
+                     CALL DTRSM( 'Left', UPLO, 'Transpose', 'Non-unit',
+     $                           KB, N-K-KB+1, ONE, B( K, K ), LDB,
+     $                           A( K, K+KB ), LDA )
+                     CALL DSYMM( 'Left', UPLO, KB, N-K-KB+1, -HALF,
+     $                           A( K, K ), LDA, B( K, K+KB ), LDB, ONE,
+     $                           A( K, K+KB ), LDA )
+                     CALL DSYR2K( UPLO, 'Transpose', N-K-KB+1, KB, -ONE,
+     $                            A( K, K+KB ), LDA, B( K, K+KB ), LDB,
+     $                            ONE, A( K+KB, K+KB ), LDA )
+                     CALL DSYMM( 'Left', UPLO, KB, N-K-KB+1, -HALF,
+     $                           A( K, K ), LDA, B( K, K+KB ), LDB, ONE,
+     $                           A( K, K+KB ), LDA )
+                     CALL DTRSM( 'Right', UPLO, 'No transpose',
+     $                           'Non-unit', KB, N-K-KB+1, ONE,
+     $                           B( K+KB, K+KB ), LDB, A( K, K+KB ),
+     $                           LDA )
+                  END IF
+   10          CONTINUE
+            ELSE
+*
+*              Compute inv(L)*A*inv(L')
+*
+               DO 20 K = 1, N, NB
+                  KB = MIN( N-K+1, NB )
+*
+*                 Update the lower triangle of A(k:n,k:n)
+*
+                  CALL DSYGS2( ITYPE, UPLO, KB, A( K, K ), LDA,
+     $                         B( K, K ), LDB, INFO )
+                  IF( K+KB.LE.N ) THEN
+                     CALL DTRSM( 'Right', UPLO, 'Transpose', 'Non-unit',
+     $                           N-K-KB+1, KB, ONE, B( K, K ), LDB,
+     $                           A( K+KB, K ), LDA )
+                     CALL DSYMM( 'Right', UPLO, N-K-KB+1, KB, -HALF,
+     $                           A( K, K ), LDA, B( K+KB, K ), LDB, ONE,
+     $                           A( K+KB, K ), LDA )
+                     CALL DSYR2K( UPLO, 'No transpose', N-K-KB+1, KB,
+     $                            -ONE, A( K+KB, K ), LDA, B( K+KB, K ),
+     $                            LDB, ONE, A( K+KB, K+KB ), LDA )
+                     CALL DSYMM( 'Right', UPLO, N-K-KB+1, KB, -HALF,
+     $                           A( K, K ), LDA, B( K+KB, K ), LDB, ONE,
+     $                           A( K+KB, K ), LDA )
+                     CALL DTRSM( 'Left', UPLO, 'No transpose',
+     $                           'Non-unit', N-K-KB+1, KB, ONE,
+     $                           B( K+KB, K+KB ), LDB, A( K+KB, K ),
+     $                           LDA )
+                  END IF
+   20          CONTINUE
+            END IF
+         ELSE
+            IF( UPPER ) THEN
+*
+*              Compute U*A*U'
+*
+               DO 30 K = 1, N, NB
+                  KB = MIN( N-K+1, NB )
+*
+*                 Update the upper triangle of A(1:k+kb-1,1:k+kb-1)
+*
+                  CALL DTRMM( 'Left', UPLO, 'No transpose', 'Non-unit',
+     $                        K-1, KB, ONE, B, LDB, A( 1, K ), LDA )
+                  CALL DSYMM( 'Right', UPLO, K-1, KB, HALF, A( K, K ),
+     $                        LDA, B( 1, K ), LDB, ONE, A( 1, K ), LDA )
+                  CALL DSYR2K( UPLO, 'No transpose', K-1, KB, ONE,
+     $                         A( 1, K ), LDA, B( 1, K ), LDB, ONE, A,
+     $                         LDA )
+                  CALL DSYMM( 'Right', UPLO, K-1, KB, HALF, A( K, K ),
+     $                        LDA, B( 1, K ), LDB, ONE, A( 1, K ), LDA )
+                  CALL DTRMM( 'Right', UPLO, 'Transpose', 'Non-unit',
+     $                        K-1, KB, ONE, B( K, K ), LDB, A( 1, K ),
+     $                        LDA )
+                  CALL DSYGS2( ITYPE, UPLO, KB, A( K, K ), LDA,
+     $                         B( K, K ), LDB, INFO )
+   30          CONTINUE
+            ELSE
+*
+*              Compute L'*A*L
+*
+               DO 40 K = 1, N, NB
+                  KB = MIN( N-K+1, NB )
+*
+*                 Update the lower triangle of A(1:k+kb-1,1:k+kb-1)
+*
+                  CALL DTRMM( 'Right', UPLO, 'No transpose', 'Non-unit',
+     $                        KB, K-1, ONE, B, LDB, A( K, 1 ), LDA )
+                  CALL DSYMM( 'Left', UPLO, KB, K-1, HALF, A( K, K ),
+     $                        LDA, B( K, 1 ), LDB, ONE, A( K, 1 ), LDA )
+                  CALL DSYR2K( UPLO, 'Transpose', K-1, KB, ONE,
+     $                         A( K, 1 ), LDA, B( K, 1 ), LDB, ONE, A,
+     $                         LDA )
+                  CALL DSYMM( 'Left', UPLO, KB, K-1, HALF, A( K, K ),
+     $                        LDA, B( K, 1 ), LDB, ONE, A( K, 1 ), LDA )
+                  CALL DTRMM( 'Left', UPLO, 'Transpose', 'Non-unit', KB,
+     $                        K-1, ONE, B( K, K ), LDB, A( K, 1 ), LDA )
+                  CALL DSYGS2( ITYPE, UPLO, KB, A( K, K ), LDA,
+     $                         B( K, K ), LDB, INFO )
+   40          CONTINUE
+            END IF
+         END IF
+      END IF
+      RETURN
+*
+*     End of DSYGST
+*
+      END
diff --git a/costa/native/external/lapack/dsygv.f b/costa/native/external/lapack/dsygv.f
new file mode 100644
index 000000000..73c316cb8
--- /dev/null
+++ b/costa/native/external/lapack/dsygv.f
@@ -0,0 +1,227 @@
+      SUBROUTINE DSYGV( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, WORK,
+     $                  LWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBZ, UPLO
+      INTEGER            INFO, ITYPE, LDA, LDB, LWORK, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), B( LDB, * ), W( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DSYGV computes all the eigenvalues, and optionally, the eigenvectors
+*  of a real generalized symmetric-definite eigenproblem, of the form
+*  A*x=(lambda)*B*x,  A*Bx=(lambda)*x,  or B*A*x=(lambda)*x.
+*  Here A and B are assumed to be symmetric and B is also
+*  positive definite.
+*
+*  Arguments
+*  =========
+*
+*  ITYPE   (input) INTEGER
+*          Specifies the problem type to be solved:
+*          = 1:  A*x = (lambda)*B*x
+*          = 2:  A*B*x = (lambda)*x
+*          = 3:  B*A*x = (lambda)*x
+*
+*  JOBZ    (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only;
+*          = 'V':  Compute eigenvalues and eigenvectors.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangles of A and B are stored;
+*          = 'L':  Lower triangles of A and B are stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrices A and B.  N >= 0.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA, N)
+*          On entry, the symmetric matrix A.  If UPLO = 'U', the
+*          leading N-by-N upper triangular part of A contains the
+*          upper triangular part of the matrix A.  If UPLO = 'L',
+*          the leading N-by-N lower triangular part of A contains
+*          the lower triangular part of the matrix A.
+*
+*          On exit, if JOBZ = 'V', then if INFO = 0, A contains the
+*          matrix Z of eigenvectors.  The eigenvectors are normalized
+*          as follows:
+*          if ITYPE = 1 or 2, Z**T*B*Z = I;
+*          if ITYPE = 3, Z**T*inv(B)*Z = I.
+*          If JOBZ = 'N', then on exit the upper triangle (if UPLO='U')
+*          or the lower triangle (if UPLO='L') of A, including the
+*          diagonal, is destroyed.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  B       (input/output) DOUBLE PRECISION array, dimension (LDB, N)
+*          On entry, the symmetric positive definite matrix B.
+*          If UPLO = 'U', the leading N-by-N upper triangular part of B
+*          contains the upper triangular part of the matrix B.
+*          If UPLO = 'L', the leading N-by-N lower triangular part of B
+*          contains the lower triangular part of the matrix B.
+*
+*          On exit, if INFO <= N, the part of B containing the matrix is
+*          overwritten by the triangular factor U or L from the Cholesky
+*          factorization B = U**T*U or B = L*L**T.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  W       (output) DOUBLE PRECISION array, dimension (N)
+*          If INFO = 0, the eigenvalues in ascending order.
+*
+*  WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The length of the array WORK.  LWORK >= max(1,3*N-1).
+*          For optimal efficiency, LWORK >= (NB+2)*N,
+*          where NB is the blocksize for DSYTRD returned by ILAENV.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  DPOTRF or DSYEV returned an error code:
+*             <= N:  if INFO = i, DSYEV failed to converge;
+*                    i off-diagonal elements of an intermediate
+*                    tridiagonal form did not converge to zero;
+*             > N:   if INFO = N + i, for 1 <= i <= N, then the leading
+*                    minor of order i of B is not positive definite.
+*                    The factorization of B could not be completed and
+*                    no eigenvalues or eigenvectors were computed.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE
+      PARAMETER          ( ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY, UPPER, WANTZ
+      CHARACTER          TRANS
+      INTEGER            LWKOPT, NB, NEIG
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           LSAME, ILAENV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DPOTRF, DSYEV, DSYGST, DTRMM, DTRSM, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      WANTZ = LSAME( JOBZ, 'V' )
+      UPPER = LSAME( UPLO, 'U' )
+      LQUERY = ( LWORK.EQ.-1 )
+*
+      INFO = 0
+      IF( ITYPE.LT.1 .OR. ITYPE.GT.3 ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -6
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -8
+      ELSE IF( LWORK.LT.MAX( 1, 3*N-1 ) .AND. .NOT.LQUERY ) THEN
+         INFO = -11
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         NB = ILAENV( 1, 'DSYTRD', UPLO, N, -1, -1, -1 )
+         LWKOPT = ( NB+2 )*N
+         WORK( 1 ) = LWKOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DSYGV ', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Form a Cholesky factorization of B.
+*
+      CALL DPOTRF( UPLO, N, B, LDB, INFO )
+      IF( INFO.NE.0 ) THEN
+         INFO = N + INFO
+         RETURN
+      END IF
+*
+*     Transform problem to standard eigenvalue problem and solve.
+*
+      CALL DSYGST( ITYPE, UPLO, N, A, LDA, B, LDB, INFO )
+      CALL DSYEV( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, INFO )
+*
+      IF( WANTZ ) THEN
+*
+*        Backtransform eigenvectors to the original problem.
+*
+         NEIG = N
+         IF( INFO.GT.0 )
+     $      NEIG = INFO - 1
+         IF( ITYPE.EQ.1 .OR. ITYPE.EQ.2 ) THEN
+*
+*           For A*x=(lambda)*B*x and A*B*x=(lambda)*x;
+*           backtransform eigenvectors: x = inv(L)'*y or inv(U)*y
+*
+            IF( UPPER ) THEN
+               TRANS = 'N'
+            ELSE
+               TRANS = 'T'
+            END IF
+*
+            CALL DTRSM( 'Left', UPLO, TRANS, 'Non-unit', N, NEIG, ONE,
+     $                  B, LDB, A, LDA )
+*
+         ELSE IF( ITYPE.EQ.3 ) THEN
+*
+*           For B*A*x=(lambda)*x;
+*           backtransform eigenvectors: x = L*y or U'*y
+*
+            IF( UPPER ) THEN
+               TRANS = 'T'
+            ELSE
+               TRANS = 'N'
+            END IF
+*
+            CALL DTRMM( 'Left', UPLO, TRANS, 'Non-unit', N, NEIG, ONE,
+     $                  B, LDB, A, LDA )
+         END IF
+      END IF
+*
+      WORK( 1 ) = LWKOPT
+      RETURN
+*
+*     End of DSYGV
+*
+      END
diff --git a/costa/native/external/lapack/dsygvd.f b/costa/native/external/lapack/dsygvd.f
new file mode 100644
index 000000000..7c250b4a1
--- /dev/null
+++ b/costa/native/external/lapack/dsygvd.f
@@ -0,0 +1,277 @@
+      SUBROUTINE DSYGVD( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, WORK,
+     $                   LWORK, IWORK, LIWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBZ, UPLO
+      INTEGER            INFO, ITYPE, LDA, LDB, LIWORK, LWORK, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      DOUBLE PRECISION   A( LDA, * ), B( LDB, * ), W( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DSYGVD computes all the eigenvalues, and optionally, the eigenvectors
+*  of a real generalized symmetric-definite eigenproblem, of the form
+*  A*x=(lambda)*B*x,  A*Bx=(lambda)*x,  or B*A*x=(lambda)*x.  Here A and
+*  B are assumed to be symmetric and B is also positive definite.
+*  If eigenvectors are desired, it uses a divide and conquer algorithm.
+*
+*  The divide and conquer algorithm makes very mild assumptions about
+*  floating point arithmetic. It will work on machines with a guard
+*  digit in add/subtract, or on those binary machines without guard
+*  digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
+*  Cray-2. It could conceivably fail on hexadecimal or decimal machines
+*  without guard digits, but we know of none.
+*
+*  Arguments
+*  =========
+*
+*  ITYPE   (input) INTEGER
+*          Specifies the problem type to be solved:
+*          = 1:  A*x = (lambda)*B*x
+*          = 2:  A*B*x = (lambda)*x
+*          = 3:  B*A*x = (lambda)*x
+*
+*  JOBZ    (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only;
+*          = 'V':  Compute eigenvalues and eigenvectors.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangles of A and B are stored;
+*          = 'L':  Lower triangles of A and B are stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrices A and B.  N >= 0.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA, N)
+*          On entry, the symmetric matrix A.  If UPLO = 'U', the
+*          leading N-by-N upper triangular part of A contains the
+*          upper triangular part of the matrix A.  If UPLO = 'L',
+*          the leading N-by-N lower triangular part of A contains
+*          the lower triangular part of the matrix A.
+*
+*          On exit, if JOBZ = 'V', then if INFO = 0, A contains the
+*          matrix Z of eigenvectors.  The eigenvectors are normalized
+*          as follows:
+*          if ITYPE = 1 or 2, Z**T*B*Z = I;
+*          if ITYPE = 3, Z**T*inv(B)*Z = I.
+*          If JOBZ = 'N', then on exit the upper triangle (if UPLO='U')
+*          or the lower triangle (if UPLO='L') of A, including the
+*          diagonal, is destroyed.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  B       (input/output) DOUBLE PRECISION array, dimension (LDB, N)
+*          On entry, the symmetric matrix B.  If UPLO = 'U', the
+*          leading N-by-N upper triangular part of B contains the
+*          upper triangular part of the matrix B.  If UPLO = 'L',
+*          the leading N-by-N lower triangular part of B contains
+*          the lower triangular part of the matrix B.
+*
+*          On exit, if INFO <= N, the part of B containing the matrix is
+*          overwritten by the triangular factor U or L from the Cholesky
+*          factorization B = U**T*U or B = L*L**T.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  W       (output) DOUBLE PRECISION array, dimension (N)
+*          If INFO = 0, the eigenvalues in ascending order.
+*
+*  WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.
+*          If N <= 1,               LWORK >= 1.
+*          If JOBZ = 'N' and N > 1, LWORK >= 2*N+1.
+*          If JOBZ = 'V' and N > 1, LWORK >= 1 + 6*N + 2*N**2.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  IWORK   (workspace/output) INTEGER array, dimension (LIWORK)
+*          On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK.
+*
+*  LIWORK  (input) INTEGER
+*          The dimension of the array IWORK.
+*          If N <= 1,                LIWORK >= 1.
+*          If JOBZ  = 'N' and N > 1, LIWORK >= 1.
+*          If JOBZ  = 'V' and N > 1, LIWORK >= 3 + 5*N.
+*
+*          If LIWORK = -1, then a workspace query is assumed; the
+*          routine only calculates the optimal size of the IWORK array,
+*          returns this value as the first entry of the IWORK array, and
+*          no error message related to LIWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  DPOTRF or DSYEVD returned an error code:
+*             <= N:  if INFO = i, DSYEVD failed to converge;
+*                    i off-diagonal elements of an intermediate
+*                    tridiagonal form did not converge to zero;
+*             > N:   if INFO = N + i, for 1 <= i <= N, then the leading
+*                    minor of order i of B is not positive definite.
+*                    The factorization of B could not be completed and
+*                    no eigenvalues or eigenvectors were computed.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE
+      PARAMETER          ( ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY, UPPER, WANTZ
+      CHARACTER          TRANS
+      INTEGER            LIOPT, LIWMIN, LOPT, LWMIN, NEIG
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DPOTRF, DSYEVD, DSYGST, DTRMM, DTRSM, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          DBLE, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      WANTZ = LSAME( JOBZ, 'V' )
+      UPPER = LSAME( UPLO, 'U' )
+      LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 )
+*
+      INFO = 0
+      IF( N.LE.1 ) THEN
+         LIWMIN = 1
+         LWMIN = 1
+         LOPT = LWMIN
+         LIOPT = LIWMIN
+      ELSE
+         IF( WANTZ ) THEN
+            LIWMIN = 3 + 5*N
+            LWMIN = 1 + 6*N + 2*N**2
+         ELSE
+            LIWMIN = 1
+            LWMIN = 2*N + 1
+         END IF
+         LOPT = LWMIN
+         LIOPT = LIWMIN
+      END IF
+      IF( ITYPE.LT.0 .OR. ITYPE.GT.3 ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -6
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -8
+      ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -11
+      ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -13
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         WORK( 1 ) = LOPT
+         IWORK( 1 ) = LIOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DSYGVD', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Form a Cholesky factorization of B.
+*
+      CALL DPOTRF( UPLO, N, B, LDB, INFO )
+      IF( INFO.NE.0 ) THEN
+         INFO = N + INFO
+         RETURN
+      END IF
+*
+*     Transform problem to standard eigenvalue problem and solve.
+*
+      CALL DSYGST( ITYPE, UPLO, N, A, LDA, B, LDB, INFO )
+      CALL DSYEVD( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, IWORK, LIWORK,
+     $             INFO )
+      LOPT = MAX( DBLE( LOPT ), DBLE( WORK( 1 ) ) )
+      LIOPT = MAX( DBLE( LIOPT ), DBLE( IWORK( 1 ) ) )
+*
+      IF( WANTZ ) THEN
+*
+*        Backtransform eigenvectors to the original problem.
+*
+         NEIG = N
+         IF( INFO.GT.0 )
+     $      NEIG = INFO - 1
+         IF( ITYPE.EQ.1 .OR. ITYPE.EQ.2 ) THEN
+*
+*           For A*x=(lambda)*B*x and A*B*x=(lambda)*x;
+*           backtransform eigenvectors: x = inv(L)'*y or inv(U)*y
+*
+            IF( UPPER ) THEN
+               TRANS = 'N'
+            ELSE
+               TRANS = 'T'
+            END IF
+*
+            CALL DTRSM( 'Left', UPLO, TRANS, 'Non-unit', N, NEIG, ONE,
+     $                  B, LDB, A, LDA )
+*
+         ELSE IF( ITYPE.EQ.3 ) THEN
+*
+*           For B*A*x=(lambda)*x;
+*           backtransform eigenvectors: x = L*y or U'*y
+*
+            IF( UPPER ) THEN
+               TRANS = 'T'
+            ELSE
+               TRANS = 'N'
+            END IF
+*
+            CALL DTRMM( 'Left', UPLO, TRANS, 'Non-unit', N, NEIG, ONE,
+     $                  B, LDB, A, LDA )
+         END IF
+      END IF
+*
+      WORK( 1 ) = LOPT
+      IWORK( 1 ) = LIOPT
+*
+      RETURN
+*
+*     End of DSYGVD
+*
+      END
diff --git a/costa/native/external/lapack/dsygvx.f b/costa/native/external/lapack/dsygvx.f
new file mode 100644
index 000000000..0815bf584
--- /dev/null
+++ b/costa/native/external/lapack/dsygvx.f
@@ -0,0 +1,326 @@
+      SUBROUTINE DSYGVX( ITYPE, JOBZ, RANGE, UPLO, N, A, LDA, B, LDB,
+     $                   VL, VU, IL, IU, ABSTOL, M, W, Z, LDZ, WORK,
+     $                   LWORK, IWORK, IFAIL, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBZ, RANGE, UPLO
+      INTEGER            IL, INFO, ITYPE, IU, LDA, LDB, LDZ, LWORK, M, N
+      DOUBLE PRECISION   ABSTOL, VL, VU
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IFAIL( * ), IWORK( * )
+      DOUBLE PRECISION   A( LDA, * ), B( LDB, * ), W( * ), WORK( * ),
+     $                   Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DSYGVX computes selected eigenvalues, and optionally, eigenvectors
+*  of a real generalized symmetric-definite eigenproblem, of the form
+*  A*x=(lambda)*B*x,  A*Bx=(lambda)*x,  or B*A*x=(lambda)*x.  Here A
+*  and B are assumed to be symmetric and B is also positive definite.
+*  Eigenvalues and eigenvectors can be selected by specifying either a
+*  range of values or a range of indices for the desired eigenvalues.
+*
+*  Arguments
+*  =========
+*
+*  ITYPE   (input) INTEGER
+*          Specifies the problem type to be solved:
+*          = 1:  A*x = (lambda)*B*x
+*          = 2:  A*B*x = (lambda)*x
+*          = 3:  B*A*x = (lambda)*x
+*
+*  JOBZ    (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only;
+*          = 'V':  Compute eigenvalues and eigenvectors.
+*
+*  RANGE   (input) CHARACTER*1
+*          = 'A': all eigenvalues will be found.
+*          = 'V': all eigenvalues in the half-open interval (VL,VU]
+*                 will be found.
+*          = 'I': the IL-th through IU-th eigenvalues will be found.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A and B are stored;
+*          = 'L':  Lower triangle of A and B are stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix pencil (A,B).  N >= 0.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA, N)
+*          On entry, the symmetric matrix A.  If UPLO = 'U', the
+*          leading N-by-N upper triangular part of A contains the
+*          upper triangular part of the matrix A.  If UPLO = 'L',
+*          the leading N-by-N lower triangular part of A contains
+*          the lower triangular part of the matrix A.
+*
+*          On exit, the lower triangle (if UPLO='L') or the upper
+*          triangle (if UPLO='U') of A, including the diagonal, is
+*          destroyed.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  B       (input/output) DOUBLE PRECISION array, dimension (LDA, N)
+*          On entry, the symmetric matrix B.  If UPLO = 'U', the
+*          leading N-by-N upper triangular part of B contains the
+*          upper triangular part of the matrix B.  If UPLO = 'L',
+*          the leading N-by-N lower triangular part of B contains
+*          the lower triangular part of the matrix B.
+*
+*          On exit, if INFO <= N, the part of B containing the matrix is
+*          overwritten by the triangular factor U or L from the Cholesky
+*          factorization B = U**T*U or B = L*L**T.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  VL      (input) DOUBLE PRECISION
+*  VU      (input) DOUBLE PRECISION
+*          If RANGE='V', the lower and upper bounds of the interval to
+*          be searched for eigenvalues. VL < VU.
+*          Not referenced if RANGE = 'A' or 'I'.
+*
+*  IL      (input) INTEGER
+*  IU      (input) INTEGER
+*          If RANGE='I', the indices (in ascending order) of the
+*          smallest and largest eigenvalues to be returned.
+*          1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0.
+*          Not referenced if RANGE = 'A' or 'V'.
+*
+*  ABSTOL  (input) DOUBLE PRECISION
+*          The absolute error tolerance for the eigenvalues.
+*          An approximate eigenvalue is accepted as converged
+*          when it is determined to lie in an interval [a,b]
+*          of width less than or equal to
+*
+*                  ABSTOL + EPS *   max( |a|,|b| ) ,
+*
+*          where EPS is the machine precision.  If ABSTOL is less than
+*          or equal to zero, then  EPS*|T|  will be used in its place,
+*          where |T| is the 1-norm of the tridiagonal matrix obtained
+*          by reducing A to tridiagonal form.
+*
+*          Eigenvalues will be computed most accurately when ABSTOL is
+*          set to twice the underflow threshold 2*DLAMCH('S'), not zero.
+*          If this routine returns with INFO>0, indicating that some
+*          eigenvectors did not converge, try setting ABSTOL to
+*          2*DLAMCH('S').
+*
+*  M       (output) INTEGER
+*          The total number of eigenvalues found.  0 <= M <= N.
+*          If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1.
+*
+*  W       (output) DOUBLE PRECISION array, dimension (N)
+*          On normal exit, the first M elements contain the selected
+*          eigenvalues in ascending order.
+*
+*  Z       (output) DOUBLE PRECISION array, dimension (LDZ, max(1,M))
+*          If JOBZ = 'N', then Z is not referenced.
+*          If JOBZ = 'V', then if INFO = 0, the first M columns of Z
+*          contain the orthonormal eigenvectors of the matrix A
+*          corresponding to the selected eigenvalues, with the i-th
+*          column of Z holding the eigenvector associated with W(i).
+*          The eigenvectors are normalized as follows:
+*          if ITYPE = 1 or 2, Z**T*B*Z = I;
+*          if ITYPE = 3, Z**T*inv(B)*Z = I.
+*
+*          If an eigenvector fails to converge, then that column of Z
+*          contains the latest approximation to the eigenvector, and the
+*          index of the eigenvector is returned in IFAIL.
+*          Note: the user must ensure that at least max(1,M) columns are
+*          supplied in the array Z; if RANGE = 'V', the exact value of M
+*          is not known in advance and an upper bound must be used.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= 1, and if
+*          JOBZ = 'V', LDZ >= max(1,N).
+*
+*  WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The length of the array WORK.  LWORK >= max(1,8*N).
+*          For optimal efficiency, LWORK >= (NB+3)*N,
+*          where NB is the blocksize for DSYTRD returned by ILAENV.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  IWORK   (workspace) INTEGER array, dimension (5*N)
+*
+*  IFAIL   (output) INTEGER array, dimension (N)
+*          If JOBZ = 'V', then if INFO = 0, the first M elements of
+*          IFAIL are zero.  If INFO > 0, then IFAIL contains the
+*          indices of the eigenvectors that failed to converge.
+*          If JOBZ = 'N', then IFAIL is not referenced.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  DPOTRF or DSYEVX returned an error code:
+*             <= N:  if INFO = i, DSYEVX failed to converge;
+*                    i eigenvectors failed to converge.  Their indices
+*                    are stored in array IFAIL.
+*             > N:   if INFO = N + i, for 1 <= i <= N, then the leading
+*                    minor of order i of B is not positive definite.
+*                    The factorization of B could not be completed and
+*                    no eigenvalues or eigenvectors were computed.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA
+*
+* =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE
+      PARAMETER          ( ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            ALLEIG, INDEIG, LQUERY, UPPER, VALEIG, WANTZ
+      CHARACTER          TRANS
+      INTEGER            LOPT, LWKOPT, NB
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           LSAME, ILAENV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DPOTRF, DSYEVX, DSYGST, DTRMM, DTRSM, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      UPPER = LSAME( UPLO, 'U' )
+      WANTZ = LSAME( JOBZ, 'V' )
+      ALLEIG = LSAME( RANGE, 'A' )
+      VALEIG = LSAME( RANGE, 'V' )
+      INDEIG = LSAME( RANGE, 'I' )
+      LQUERY = ( LWORK.EQ.-1 )
+*
+      INFO = 0
+      IF( ITYPE.LT.0 .OR. ITYPE.GT.3 ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN
+         INFO = -3
+      ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN
+         INFO = -4
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -9
+      ELSE IF( VALEIG .AND. N.GT.0 ) THEN
+         IF( VU.LE.VL )
+     $      INFO = -11
+      ELSE IF( INDEIG .AND. IL.LT.1 ) THEN
+         INFO = -12
+      ELSE IF( INDEIG .AND. ( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) ) THEN
+         INFO = -13
+      ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN
+         INFO = -18
+      ELSE IF( LWORK.LT.MAX( 1, 8*N ) .AND. .NOT.LQUERY ) THEN
+         INFO = -20
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         NB = ILAENV( 1, 'DSYTRD', UPLO, N, -1, -1, -1 )
+         LWKOPT = ( NB+3 )*N
+         WORK( 1 ) = LWKOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DSYGVX', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      M = 0
+      IF( N.EQ.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+*     Form a Cholesky factorization of B.
+*
+      CALL DPOTRF( UPLO, N, B, LDB, INFO )
+      IF( INFO.NE.0 ) THEN
+         INFO = N + INFO
+         RETURN
+      END IF
+*
+*     Transform problem to standard eigenvalue problem and solve.
+*
+      CALL DSYGST( ITYPE, UPLO, N, A, LDA, B, LDB, INFO )
+      CALL DSYEVX( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, IL, IU, ABSTOL,
+     $             M, W, Z, LDZ, WORK, LWORK, IWORK, IFAIL, INFO )
+      LOPT = WORK( 1 )
+*
+      IF( WANTZ ) THEN
+*
+*        Backtransform eigenvectors to the original problem.
+*
+         IF( INFO.GT.0 )
+     $      M = INFO - 1
+         IF( ITYPE.EQ.1 .OR. ITYPE.EQ.2 ) THEN
+*
+*           For A*x=(lambda)*B*x and A*B*x=(lambda)*x;
+*           backtransform eigenvectors: x = inv(L)'*y or inv(U)*y
+*
+            IF( UPPER ) THEN
+               TRANS = 'N'
+            ELSE
+               TRANS = 'T'
+            END IF
+*
+            CALL DTRSM( 'Left', UPLO, TRANS, 'Non-unit', N, M, ONE, B,
+     $                  LDB, Z, LDZ )
+*
+         ELSE IF( ITYPE.EQ.3 ) THEN
+*
+*           For B*A*x=(lambda)*x;
+*           backtransform eigenvectors: x = L*y or U'*y
+*
+            IF( UPPER ) THEN
+               TRANS = 'T'
+            ELSE
+               TRANS = 'N'
+            END IF
+*
+            CALL DTRMM( 'Left', UPLO, TRANS, 'Non-unit', N, M, ONE, B,
+     $                  LDB, Z, LDZ )
+         END IF
+      END IF
+*
+*     Set WORK(1) to optimal workspace size.
+*
+      WORK( 1 ) = LWKOPT
+*
+      RETURN
+*
+*     End of DSYGVX
+*
+      END
diff --git a/costa/native/external/lapack/dsyrfs.f b/costa/native/external/lapack/dsyrfs.f
new file mode 100644
index 000000000..64c745599
--- /dev/null
+++ b/costa/native/external/lapack/dsyrfs.f
@@ -0,0 +1,335 @@
+      SUBROUTINE DSYRFS( UPLO, N, NRHS, A, LDA, AF, LDAF, IPIV, B, LDB,
+     $                   X, LDX, FERR, BERR, WORK, IWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, LDAF, LDB, LDX, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * ), IWORK( * )
+      DOUBLE PRECISION   A( LDA, * ), AF( LDAF, * ), B( LDB, * ),
+     $                   BERR( * ), FERR( * ), WORK( * ), X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DSYRFS improves the computed solution to a system of linear
+*  equations when the coefficient matrix is symmetric indefinite, and
+*  provides error bounds and backward error estimates for the solution.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrices B and X.  NRHS >= 0.
+*
+*  A       (input) DOUBLE PRECISION array, dimension (LDA,N)
+*          The symmetric matrix A.  If UPLO = 'U', the leading N-by-N
+*          upper triangular part of A contains the upper triangular part
+*          of the matrix A, and the strictly lower triangular part of A
+*          is not referenced.  If UPLO = 'L', the leading N-by-N lower
+*          triangular part of A contains the lower triangular part of
+*          the matrix A, and the strictly upper triangular part of A is
+*          not referenced.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  AF      (input) DOUBLE PRECISION array, dimension (LDAF,N)
+*          The factored form of the matrix A.  AF contains the block
+*          diagonal matrix D and the multipliers used to obtain the
+*          factor U or L from the factorization A = U*D*U**T or
+*          A = L*D*L**T as computed by DSYTRF.
+*
+*  LDAF    (input) INTEGER
+*          The leading dimension of the array AF.  LDAF >= max(1,N).
+*
+*  IPIV    (input) INTEGER array, dimension (N)
+*          Details of the interchanges and the block structure of D
+*          as determined by DSYTRF.
+*
+*  B       (input) DOUBLE PRECISION array, dimension (LDB,NRHS)
+*          The right hand side matrix B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  X       (input/output) DOUBLE PRECISION array, dimension (LDX,NRHS)
+*          On entry, the solution matrix X, as computed by DSYTRS.
+*          On exit, the improved solution matrix X.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(1,N).
+*
+*  FERR    (output) DOUBLE PRECISION array, dimension (NRHS)
+*          The estimated forward error bound for each solution vector
+*          X(j) (the j-th column of the solution matrix X).
+*          If XTRUE is the true solution corresponding to X(j), FERR(j)
+*          is an estimated upper bound for the magnitude of the largest
+*          element in (X(j) - XTRUE) divided by the magnitude of the
+*          largest element in X(j).  The estimate is as reliable as
+*          the estimate for RCOND, and is almost always a slight
+*          overestimate of the true error.
+*
+*  BERR    (output) DOUBLE PRECISION array, dimension (NRHS)
+*          The componentwise relative backward error of each solution
+*          vector X(j) (i.e., the smallest relative change in
+*          any element of A or B that makes X(j) an exact solution).
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (3*N)
+*
+*  IWORK   (workspace) INTEGER array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Internal Parameters
+*  ===================
+*
+*  ITMAX is the maximum number of steps of iterative refinement.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            ITMAX
+      PARAMETER          ( ITMAX = 5 )
+      DOUBLE PRECISION   ZERO
+      PARAMETER          ( ZERO = 0.0D+0 )
+      DOUBLE PRECISION   ONE
+      PARAMETER          ( ONE = 1.0D+0 )
+      DOUBLE PRECISION   TWO
+      PARAMETER          ( TWO = 2.0D+0 )
+      DOUBLE PRECISION   THREE
+      PARAMETER          ( THREE = 3.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            COUNT, I, J, K, KASE, NZ
+      DOUBLE PRECISION   EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DAXPY, DCOPY, DLACON, DSYMV, DSYTRS, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DLAMCH
+      EXTERNAL           LSAME, DLAMCH
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -10
+      ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+         INFO = -12
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DSYRFS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN
+         DO 10 J = 1, NRHS
+            FERR( J ) = ZERO
+            BERR( J ) = ZERO
+   10    CONTINUE
+         RETURN
+      END IF
+*
+*     NZ = maximum number of nonzero elements in each row of A, plus 1
+*
+      NZ = N + 1
+      EPS = DLAMCH( 'Epsilon' )
+      SAFMIN = DLAMCH( 'Safe minimum' )
+      SAFE1 = NZ*SAFMIN
+      SAFE2 = SAFE1 / EPS
+*
+*     Do for each right hand side
+*
+      DO 140 J = 1, NRHS
+*
+         COUNT = 1
+         LSTRES = THREE
+   20    CONTINUE
+*
+*        Loop until stopping criterion is satisfied.
+*
+*        Compute residual R = B - A * X
+*
+         CALL DCOPY( N, B( 1, J ), 1, WORK( N+1 ), 1 )
+         CALL DSYMV( UPLO, N, -ONE, A, LDA, X( 1, J ), 1, ONE,
+     $               WORK( N+1 ), 1 )
+*
+*        Compute componentwise relative backward error from formula
+*
+*        max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) )
+*
+*        where abs(Z) is the componentwise absolute value of the matrix
+*        or vector Z.  If the i-th component of the denominator is less
+*        than SAFE2, then SAFE1 is added to the i-th components of the
+*        numerator and denominator before dividing.
+*
+         DO 30 I = 1, N
+            WORK( I ) = ABS( B( I, J ) )
+   30    CONTINUE
+*
+*        Compute abs(A)*abs(X) + abs(B).
+*
+         IF( UPPER ) THEN
+            DO 50 K = 1, N
+               S = ZERO
+               XK = ABS( X( K, J ) )
+               DO 40 I = 1, K - 1
+                  WORK( I ) = WORK( I ) + ABS( A( I, K ) )*XK
+                  S = S + ABS( A( I, K ) )*ABS( X( I, J ) )
+   40          CONTINUE
+               WORK( K ) = WORK( K ) + ABS( A( K, K ) )*XK + S
+   50       CONTINUE
+         ELSE
+            DO 70 K = 1, N
+               S = ZERO
+               XK = ABS( X( K, J ) )
+               WORK( K ) = WORK( K ) + ABS( A( K, K ) )*XK
+               DO 60 I = K + 1, N
+                  WORK( I ) = WORK( I ) + ABS( A( I, K ) )*XK
+                  S = S + ABS( A( I, K ) )*ABS( X( I, J ) )
+   60          CONTINUE
+               WORK( K ) = WORK( K ) + S
+   70       CONTINUE
+         END IF
+         S = ZERO
+         DO 80 I = 1, N
+            IF( WORK( I ).GT.SAFE2 ) THEN
+               S = MAX( S, ABS( WORK( N+I ) ) / WORK( I ) )
+            ELSE
+               S = MAX( S, ( ABS( WORK( N+I ) )+SAFE1 ) /
+     $             ( WORK( I )+SAFE1 ) )
+            END IF
+   80    CONTINUE
+         BERR( J ) = S
+*
+*        Test stopping criterion. Continue iterating if
+*           1) The residual BERR(J) is larger than machine epsilon, and
+*           2) BERR(J) decreased by at least a factor of 2 during the
+*              last iteration, and
+*           3) At most ITMAX iterations tried.
+*
+         IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND.
+     $       COUNT.LE.ITMAX ) THEN
+*
+*           Update solution and try again.
+*
+            CALL DSYTRS( UPLO, N, 1, AF, LDAF, IPIV, WORK( N+1 ), N,
+     $                   INFO )
+            CALL DAXPY( N, ONE, WORK( N+1 ), 1, X( 1, J ), 1 )
+            LSTRES = BERR( J )
+            COUNT = COUNT + 1
+            GO TO 20
+         END IF
+*
+*        Bound error from formula
+*
+*        norm(X - XTRUE) / norm(X) .le. FERR =
+*        norm( abs(inv(A))*
+*           ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X)
+*
+*        where
+*          norm(Z) is the magnitude of the largest component of Z
+*          inv(A) is the inverse of A
+*          abs(Z) is the componentwise absolute value of the matrix or
+*             vector Z
+*          NZ is the maximum number of nonzeros in any row of A, plus 1
+*          EPS is machine epsilon
+*
+*        The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B))
+*        is incremented by SAFE1 if the i-th component of
+*        abs(A)*abs(X) + abs(B) is less than SAFE2.
+*
+*        Use DLACON to estimate the infinity-norm of the matrix
+*           inv(A) * diag(W),
+*        where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) )))
+*
+         DO 90 I = 1, N
+            IF( WORK( I ).GT.SAFE2 ) THEN
+               WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I )
+            ELSE
+               WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + SAFE1
+            END IF
+   90    CONTINUE
+*
+         KASE = 0
+  100    CONTINUE
+         CALL DLACON( N, WORK( 2*N+1 ), WORK( N+1 ), IWORK, FERR( J ),
+     $                KASE )
+         IF( KASE.NE.0 ) THEN
+            IF( KASE.EQ.1 ) THEN
+*
+*              Multiply by diag(W)*inv(A').
+*
+               CALL DSYTRS( UPLO, N, 1, AF, LDAF, IPIV, WORK( N+1 ), N,
+     $                      INFO )
+               DO 110 I = 1, N
+                  WORK( N+I ) = WORK( I )*WORK( N+I )
+  110          CONTINUE
+            ELSE IF( KASE.EQ.2 ) THEN
+*
+*              Multiply by inv(A)*diag(W).
+*
+               DO 120 I = 1, N
+                  WORK( N+I ) = WORK( I )*WORK( N+I )
+  120          CONTINUE
+               CALL DSYTRS( UPLO, N, 1, AF, LDAF, IPIV, WORK( N+1 ), N,
+     $                      INFO )
+            END IF
+            GO TO 100
+         END IF
+*
+*        Normalize error.
+*
+         LSTRES = ZERO
+         DO 130 I = 1, N
+            LSTRES = MAX( LSTRES, ABS( X( I, J ) ) )
+  130    CONTINUE
+         IF( LSTRES.NE.ZERO )
+     $      FERR( J ) = FERR( J ) / LSTRES
+*
+  140 CONTINUE
+*
+      RETURN
+*
+*     End of DSYRFS
+*
+      END
diff --git a/costa/native/external/lapack/dsysv.f b/costa/native/external/lapack/dsysv.f
new file mode 100644
index 000000000..d2f42ea8e
--- /dev/null
+++ b/costa/native/external/lapack/dsysv.f
@@ -0,0 +1,171 @@
+      SUBROUTINE DSYSV( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK,
+     $                  LWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, LDB, LWORK, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      DOUBLE PRECISION   A( LDA, * ), B( LDB, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DSYSV computes the solution to a real system of linear equations
+*     A * X = B,
+*  where A is an N-by-N symmetric matrix and X and B are N-by-NRHS
+*  matrices.
+*
+*  The diagonal pivoting method is used to factor A as
+*     A = U * D * U**T,  if UPLO = 'U', or
+*     A = L * D * L**T,  if UPLO = 'L',
+*  where U (or L) is a product of permutation and unit upper (lower)
+*  triangular matrices, and D is symmetric and block diagonal with
+*  1-by-1 and 2-by-2 diagonal blocks.  The factored form of A is then
+*  used to solve the system of equations A * X = B.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The number of linear equations, i.e., the order of the
+*          matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+*          On entry, the symmetric matrix A.  If UPLO = 'U', the leading
+*          N-by-N upper triangular part of A contains the upper
+*          triangular part of the matrix A, and the strictly lower
+*          triangular part of A is not referenced.  If UPLO = 'L', the
+*          leading N-by-N lower triangular part of A contains the lower
+*          triangular part of the matrix A, and the strictly upper
+*          triangular part of A is not referenced.
+*
+*          On exit, if INFO = 0, the block diagonal matrix D and the
+*          multipliers used to obtain the factor U or L from the
+*          factorization A = U*D*U**T or A = L*D*L**T as computed by
+*          DSYTRF.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  IPIV    (output) INTEGER array, dimension (N)
+*          Details of the interchanges and the block structure of D, as
+*          determined by DSYTRF.  If IPIV(k) > 0, then rows and columns
+*          k and IPIV(k) were interchanged, and D(k,k) is a 1-by-1
+*          diagonal block.  If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0,
+*          then rows and columns k-1 and -IPIV(k) were interchanged and
+*          D(k-1:k,k-1:k) is a 2-by-2 diagonal block.  If UPLO = 'L' and
+*          IPIV(k) = IPIV(k+1) < 0, then rows and columns k+1 and
+*          -IPIV(k) were interchanged and D(k:k+1,k:k+1) is a 2-by-2
+*          diagonal block.
+*
+*  B       (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS)
+*          On entry, the N-by-NRHS right hand side matrix B.
+*          On exit, if INFO = 0, the N-by-NRHS solution matrix X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The length of WORK.  LWORK >= 1, and for best performance
+*          LWORK >= N*NB, where NB is the optimal blocksize for
+*          DSYTRF.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*          > 0: if INFO = i, D(i,i) is exactly zero.  The factorization
+*               has been completed, but the block diagonal matrix D is
+*               exactly singular, so the solution could not be computed.
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            LWKOPT, NB
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           LSAME, ILAENV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DSYTRF, DSYTRS, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -8
+      ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN
+         INFO = -10
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         NB = ILAENV( 1, 'DSYTRF', UPLO, N, -1, -1, -1 )
+         LWKOPT = N*NB
+         WORK( 1 ) = LWKOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DSYSV ', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Compute the factorization A = U*D*U' or A = L*D*L'.
+*
+      CALL DSYTRF( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO )
+      IF( INFO.EQ.0 ) THEN
+*
+*        Solve the system A*X = B, overwriting B with X.
+*
+         CALL DSYTRS( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, INFO )
+*
+      END IF
+*
+      WORK( 1 ) = LWKOPT
+*
+      RETURN
+*
+*     End of DSYSV
+*
+      END
diff --git a/costa/native/external/lapack/dsysvx.f b/costa/native/external/lapack/dsysvx.f
new file mode 100644
index 000000000..dbd51f84e
--- /dev/null
+++ b/costa/native/external/lapack/dsysvx.f
@@ -0,0 +1,297 @@
+      SUBROUTINE DSYSVX( FACT, UPLO, N, NRHS, A, LDA, AF, LDAF, IPIV, B,
+     $                   LDB, X, LDX, RCOND, FERR, BERR, WORK, LWORK,
+     $                   IWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          FACT, UPLO
+      INTEGER            INFO, LDA, LDAF, LDB, LDX, LWORK, N, NRHS
+      DOUBLE PRECISION   RCOND
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * ), IWORK( * )
+      DOUBLE PRECISION   A( LDA, * ), AF( LDAF, * ), B( LDB, * ),
+     $                   BERR( * ), FERR( * ), WORK( * ), X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DSYSVX uses the diagonal pivoting factorization to compute the
+*  solution to a real system of linear equations A * X = B,
+*  where A is an N-by-N symmetric matrix and X and B are N-by-NRHS
+*  matrices.
+*
+*  Error bounds on the solution and a condition estimate are also
+*  provided.
+*
+*  Description
+*  ===========
+*
+*  The following steps are performed:
+*
+*  1. If FACT = 'N', the diagonal pivoting method is used to factor A.
+*     The form of the factorization is
+*        A = U * D * U**T,  if UPLO = 'U', or
+*        A = L * D * L**T,  if UPLO = 'L',
+*     where U (or L) is a product of permutation and unit upper (lower)
+*     triangular matrices, and D is symmetric and block diagonal with
+*     1-by-1 and 2-by-2 diagonal blocks.
+*
+*  2. If some D(i,i)=0, so that D is exactly singular, then the routine
+*     returns with INFO = i. Otherwise, the factored form of A is used
+*     to estimate the condition number of the matrix A.  If the
+*     reciprocal of the condition number is less than machine precision,
+*     INFO = N+1 is returned as a warning, but the routine still goes on
+*     to solve for X and compute error bounds as described below.
+*
+*  3. The system of equations is solved for X using the factored form
+*     of A.
+*
+*  4. Iterative refinement is applied to improve the computed solution
+*     matrix and calculate error bounds and backward error estimates
+*     for it.
+*
+*  Arguments
+*  =========
+*
+*  FACT    (input) CHARACTER*1
+*          Specifies whether or not the factored form of A has been
+*          supplied on entry.
+*          = 'F':  On entry, AF and IPIV contain the factored form of
+*                  A.  AF and IPIV will not be modified.
+*          = 'N':  The matrix A will be copied to AF and factored.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The number of linear equations, i.e., the order of the
+*          matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrices B and X.  NRHS >= 0.
+*
+*  A       (input) DOUBLE PRECISION array, dimension (LDA,N)
+*          The symmetric matrix A.  If UPLO = 'U', the leading N-by-N
+*          upper triangular part of A contains the upper triangular part
+*          of the matrix A, and the strictly lower triangular part of A
+*          is not referenced.  If UPLO = 'L', the leading N-by-N lower
+*          triangular part of A contains the lower triangular part of
+*          the matrix A, and the strictly upper triangular part of A is
+*          not referenced.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  AF      (input or output) DOUBLE PRECISION array, dimension (LDAF,N)
+*          If FACT = 'F', then AF is an input argument and on entry
+*          contains the block diagonal matrix D and the multipliers used
+*          to obtain the factor U or L from the factorization
+*          A = U*D*U**T or A = L*D*L**T as computed by DSYTRF.
+*
+*          If FACT = 'N', then AF is an output argument and on exit
+*          returns the block diagonal matrix D and the multipliers used
+*          to obtain the factor U or L from the factorization
+*          A = U*D*U**T or A = L*D*L**T.
+*
+*  LDAF    (input) INTEGER
+*          The leading dimension of the array AF.  LDAF >= max(1,N).
+*
+*  IPIV    (input or output) INTEGER array, dimension (N)
+*          If FACT = 'F', then IPIV is an input argument and on entry
+*          contains details of the interchanges and the block structure
+*          of D, as determined by DSYTRF.
+*          If IPIV(k) > 0, then rows and columns k and IPIV(k) were
+*          interchanged and D(k,k) is a 1-by-1 diagonal block.
+*          If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and
+*          columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k)
+*          is a 2-by-2 diagonal block.  If UPLO = 'L' and IPIV(k) =
+*          IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were
+*          interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block.
+*
+*          If FACT = 'N', then IPIV is an output argument and on exit
+*          contains details of the interchanges and the block structure
+*          of D, as determined by DSYTRF.
+*
+*  B       (input) DOUBLE PRECISION array, dimension (LDB,NRHS)
+*          The N-by-NRHS right hand side matrix B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  X       (output) DOUBLE PRECISION array, dimension (LDX,NRHS)
+*          If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(1,N).
+*
+*  RCOND   (output) DOUBLE PRECISION
+*          The estimate of the reciprocal condition number of the matrix
+*          A.  If RCOND is less than the machine precision (in
+*          particular, if RCOND = 0), the matrix is singular to working
+*          precision.  This condition is indicated by a return code of
+*          INFO > 0.
+*
+*  FERR    (output) DOUBLE PRECISION array, dimension (NRHS)
+*          The estimated forward error bound for each solution vector
+*          X(j) (the j-th column of the solution matrix X).
+*          If XTRUE is the true solution corresponding to X(j), FERR(j)
+*          is an estimated upper bound for the magnitude of the largest
+*          element in (X(j) - XTRUE) divided by the magnitude of the
+*          largest element in X(j).  The estimate is as reliable as
+*          the estimate for RCOND, and is almost always a slight
+*          overestimate of the true error.
+*
+*  BERR    (output) DOUBLE PRECISION array, dimension (NRHS)
+*          The componentwise relative backward error of each solution
+*          vector X(j) (i.e., the smallest relative change in
+*          any element of A or B that makes X(j) an exact solution).
+*
+*  WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The length of WORK.  LWORK >= 3*N, and for best performance
+*          LWORK >= N*NB, where NB is the optimal blocksize for
+*          DSYTRF.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  IWORK   (workspace) INTEGER array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*          > 0: if INFO = i, and i is
+*                <= N:  D(i,i) is exactly zero.  The factorization
+*                       has been completed but the factor D is exactly
+*                       singular, so the solution and error bounds could
+*                       not be computed. RCOND = 0 is returned.
+*                = N+1: D is nonsingular, but RCOND is less than machine
+*                       precision, meaning that the matrix is singular
+*                       to working precision.  Nevertheless, the
+*                       solution and error bounds are computed because
+*                       there are a number of situations where the
+*                       computed solution can be more accurate than the
+*                       value of RCOND would suggest.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO
+      PARAMETER          ( ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY, NOFACT
+      INTEGER            LWKOPT, NB
+      DOUBLE PRECISION   ANORM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      DOUBLE PRECISION   DLAMCH, DLANSY
+      EXTERNAL           LSAME, ILAENV, DLAMCH, DLANSY
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLACPY, DSYCON, DSYRFS, DSYTRF, DSYTRS, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      NOFACT = LSAME( FACT, 'N' )
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( .NOT.NOFACT .AND. .NOT.LSAME( FACT, 'F' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) )
+     $          THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -6
+      ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN
+         INFO = -8
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -11
+      ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+         INFO = -13
+      ELSE IF( LWORK.LT.MAX( 1, 3*N ) .AND. .NOT.LQUERY ) THEN
+         INFO = -18
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         NB = ILAENV( 1, 'DSYTRF', UPLO, N, -1, -1, -1 )
+         LWKOPT = N*NB
+         WORK( 1 ) = LWKOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DSYSVX', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+      IF( NOFACT ) THEN
+*
+*        Compute the factorization A = U*D*U' or A = L*D*L'.
+*
+         CALL DLACPY( UPLO, N, N, A, LDA, AF, LDAF )
+         CALL DSYTRF( UPLO, N, AF, LDAF, IPIV, WORK, LWORK, INFO )
+*
+*        Return if INFO is non-zero.
+*
+         IF( INFO.NE.0 ) THEN
+            IF( INFO.GT.0 )
+     $         RCOND = ZERO
+            RETURN
+         END IF
+      END IF
+*
+*     Compute the norm of the matrix A.
+*
+      ANORM = DLANSY( 'I', UPLO, N, A, LDA, WORK )
+*
+*     Compute the reciprocal of the condition number of A.
+*
+      CALL DSYCON( UPLO, N, AF, LDAF, IPIV, ANORM, RCOND, WORK, IWORK,
+     $             INFO )
+*
+*     Set INFO = N+1 if the matrix is singular to working precision.
+*
+      IF( RCOND.LT.DLAMCH( 'Epsilon' ) )
+     $   INFO = N + 1
+*
+*     Compute the solution vectors X.
+*
+      CALL DLACPY( 'Full', N, NRHS, B, LDB, X, LDX )
+      CALL DSYTRS( UPLO, N, NRHS, AF, LDAF, IPIV, X, LDX, INFO )
+*
+*     Use iterative refinement to improve the computed solutions and
+*     compute error bounds and backward error estimates for them.
+*
+      CALL DSYRFS( UPLO, N, NRHS, A, LDA, AF, LDAF, IPIV, B, LDB, X,
+     $             LDX, FERR, BERR, WORK, IWORK, INFO )
+*
+      RETURN
+*
+*     End of DSYSVX
+*
+      END
diff --git a/costa/native/external/lapack/dsytd2.f b/costa/native/external/lapack/dsytd2.f
new file mode 100644
index 000000000..a0a086191
--- /dev/null
+++ b/costa/native/external/lapack/dsytd2.f
@@ -0,0 +1,249 @@
+      SUBROUTINE DSYTD2( UPLO, N, A, LDA, D, E, TAU, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), D( * ), E( * ), TAU( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DSYTD2 reduces a real symmetric matrix A to symmetric tridiagonal
+*  form T by an orthogonal similarity transformation: Q' * A * Q = T.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the upper or lower triangular part of the
+*          symmetric matrix A is stored:
+*          = 'U':  Upper triangular
+*          = 'L':  Lower triangular
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+*          On entry, the symmetric matrix A.  If UPLO = 'U', the leading
+*          n-by-n upper triangular part of A contains the upper
+*          triangular part of the matrix A, and the strictly lower
+*          triangular part of A is not referenced.  If UPLO = 'L', the
+*          leading n-by-n lower triangular part of A contains the lower
+*          triangular part of the matrix A, and the strictly upper
+*          triangular part of A is not referenced.
+*          On exit, if UPLO = 'U', the diagonal and first superdiagonal
+*          of A are overwritten by the corresponding elements of the
+*          tridiagonal matrix T, and the elements above the first
+*          superdiagonal, with the array TAU, represent the orthogonal
+*          matrix Q as a product of elementary reflectors; if UPLO
+*          = 'L', the diagonal and first subdiagonal of A are over-
+*          written by the corresponding elements of the tridiagonal
+*          matrix T, and the elements below the first subdiagonal, with
+*          the array TAU, represent the orthogonal matrix Q as a product
+*          of elementary reflectors. See Further Details.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  D       (output) DOUBLE PRECISION array, dimension (N)
+*          The diagonal elements of the tridiagonal matrix T:
+*          D(i) = A(i,i).
+*
+*  E       (output) DOUBLE PRECISION array, dimension (N-1)
+*          The off-diagonal elements of the tridiagonal matrix T:
+*          E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'.
+*
+*  TAU     (output) DOUBLE PRECISION array, dimension (N-1)
+*          The scalar factors of the elementary reflectors (see Further
+*          Details).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*
+*  Further Details
+*  ===============
+*
+*  If UPLO = 'U', the matrix Q is represented as a product of elementary
+*  reflectors
+*
+*     Q = H(n-1) . . . H(2) H(1).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - tau * v * v'
+*
+*  where tau is a real scalar, and v is a real vector with
+*  v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in
+*  A(1:i-1,i+1), and tau in TAU(i).
+*
+*  If UPLO = 'L', the matrix Q is represented as a product of elementary
+*  reflectors
+*
+*     Q = H(1) H(2) . . . H(n-1).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - tau * v * v'
+*
+*  where tau is a real scalar, and v is a real vector with
+*  v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i),
+*  and tau in TAU(i).
+*
+*  The contents of A on exit are illustrated by the following examples
+*  with n = 5:
+*
+*  if UPLO = 'U':                       if UPLO = 'L':
+*
+*    (  d   e   v2  v3  v4 )              (  d                  )
+*    (      d   e   v3  v4 )              (  e   d              )
+*    (          d   e   v4 )              (  v1  e   d          )
+*    (              d   e  )              (  v1  v2  e   d      )
+*    (                  d  )              (  v1  v2  v3  e   d  )
+*
+*  where d and e denote diagonal and off-diagonal elements of T, and vi
+*  denotes an element of the vector defining H(i).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO, HALF
+      PARAMETER          ( ONE = 1.0D0, ZERO = 0.0D0,
+     $                   HALF = 1.0D0 / 2.0D0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            I
+      DOUBLE PRECISION   ALPHA, TAUI
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DAXPY, DLARFG, DSYMV, DSYR2, XERBLA
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DDOT
+      EXTERNAL           LSAME, DDOT
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DSYTD2', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.LE.0 )
+     $   RETURN
+*
+      IF( UPPER ) THEN
+*
+*        Reduce the upper triangle of A
+*
+         DO 10 I = N - 1, 1, -1
+*
+*           Generate elementary reflector H(i) = I - tau * v * v'
+*           to annihilate A(1:i-1,i+1)
+*
+            CALL DLARFG( I, A( I, I+1 ), A( 1, I+1 ), 1, TAUI )
+            E( I ) = A( I, I+1 )
+*
+            IF( TAUI.NE.ZERO ) THEN
+*
+*              Apply H(i) from both sides to A(1:i,1:i)
+*
+               A( I, I+1 ) = ONE
+*
+*              Compute  x := tau * A * v  storing x in TAU(1:i)
+*
+               CALL DSYMV( UPLO, I, TAUI, A, LDA, A( 1, I+1 ), 1, ZERO,
+     $                     TAU, 1 )
+*
+*              Compute  w := x - 1/2 * tau * (x'*v) * v
+*
+               ALPHA = -HALF*TAUI*DDOT( I, TAU, 1, A( 1, I+1 ), 1 )
+               CALL DAXPY( I, ALPHA, A( 1, I+1 ), 1, TAU, 1 )
+*
+*              Apply the transformation as a rank-2 update:
+*                 A := A - v * w' - w * v'
+*
+               CALL DSYR2( UPLO, I, -ONE, A( 1, I+1 ), 1, TAU, 1, A,
+     $                     LDA )
+*
+               A( I, I+1 ) = E( I )
+            END IF
+            D( I+1 ) = A( I+1, I+1 )
+            TAU( I ) = TAUI
+   10    CONTINUE
+         D( 1 ) = A( 1, 1 )
+      ELSE
+*
+*        Reduce the lower triangle of A
+*
+         DO 20 I = 1, N - 1
+*
+*           Generate elementary reflector H(i) = I - tau * v * v'
+*           to annihilate A(i+2:n,i)
+*
+            CALL DLARFG( N-I, A( I+1, I ), A( MIN( I+2, N ), I ), 1,
+     $                   TAUI )
+            E( I ) = A( I+1, I )
+*
+            IF( TAUI.NE.ZERO ) THEN
+*
+*              Apply H(i) from both sides to A(i+1:n,i+1:n)
+*
+               A( I+1, I ) = ONE
+*
+*              Compute  x := tau * A * v  storing y in TAU(i:n-1)
+*
+               CALL DSYMV( UPLO, N-I, TAUI, A( I+1, I+1 ), LDA,
+     $                     A( I+1, I ), 1, ZERO, TAU( I ), 1 )
+*
+*              Compute  w := x - 1/2 * tau * (x'*v) * v
+*
+               ALPHA = -HALF*TAUI*DDOT( N-I, TAU( I ), 1, A( I+1, I ),
+     $                 1 )
+               CALL DAXPY( N-I, ALPHA, A( I+1, I ), 1, TAU( I ), 1 )
+*
+*              Apply the transformation as a rank-2 update:
+*                 A := A - v * w' - w * v'
+*
+               CALL DSYR2( UPLO, N-I, -ONE, A( I+1, I ), 1, TAU( I ), 1,
+     $                     A( I+1, I+1 ), LDA )
+*
+               A( I+1, I ) = E( I )
+            END IF
+            D( I ) = A( I, I )
+            TAU( I ) = TAUI
+   20    CONTINUE
+         D( N ) = A( N, N )
+      END IF
+*
+      RETURN
+*
+*     End of DSYTD2
+*
+      END
diff --git a/costa/native/external/lapack/dsytf2.f b/costa/native/external/lapack/dsytf2.f
new file mode 100644
index 000000000..1a9d69ee2
--- /dev/null
+++ b/costa/native/external/lapack/dsytf2.f
@@ -0,0 +1,511 @@
+      SUBROUTINE DSYTF2( UPLO, N, A, LDA, IPIV, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      DOUBLE PRECISION   A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DSYTF2 computes the factorization of a real symmetric matrix A using
+*  the Bunch-Kaufman diagonal pivoting method:
+*
+*     A = U*D*U'  or  A = L*D*L'
+*
+*  where U (or L) is a product of permutation and unit upper (lower)
+*  triangular matrices, U' is the transpose of U, and D is symmetric and
+*  block diagonal with 1-by-1 and 2-by-2 diagonal blocks.
+*
+*  This is the unblocked version of the algorithm, calling Level 2 BLAS.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the upper or lower triangular part of the
+*          symmetric matrix A is stored:
+*          = 'U':  Upper triangular
+*          = 'L':  Lower triangular
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+*          On entry, the symmetric matrix A.  If UPLO = 'U', the leading
+*          n-by-n upper triangular part of A contains the upper
+*          triangular part of the matrix A, and the strictly lower
+*          triangular part of A is not referenced.  If UPLO = 'L', the
+*          leading n-by-n lower triangular part of A contains the lower
+*          triangular part of the matrix A, and the strictly upper
+*          triangular part of A is not referenced.
+*
+*          On exit, the block diagonal matrix D and the multipliers used
+*          to obtain the factor U or L (see below for further details).
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  IPIV    (output) INTEGER array, dimension (N)
+*          Details of the interchanges and the block structure of D.
+*          If IPIV(k) > 0, then rows and columns k and IPIV(k) were
+*          interchanged and D(k,k) is a 1-by-1 diagonal block.
+*          If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and
+*          columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k)
+*          is a 2-by-2 diagonal block.  If UPLO = 'L' and IPIV(k) =
+*          IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were
+*          interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block.
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -k, the k-th argument had an illegal value
+*          > 0: if INFO = k, D(k,k) is exactly zero.  The factorization
+*               has been completed, but the block diagonal matrix D is
+*               exactly singular, and division by zero will occur if it
+*               is used to solve a system of equations.
+*
+*  Further Details
+*  ===============
+*
+*  1-96 - Based on modifications by J. Lewis, Boeing Computer Services
+*         Company
+*
+*  If UPLO = 'U', then A = U*D*U', where
+*     U = P(n)*U(n)* ... *P(k)U(k)* ...,
+*  i.e., U is a product of terms P(k)*U(k), where k decreases from n to
+*  1 in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1
+*  and 2-by-2 diagonal blocks D(k).  P(k) is a permutation matrix as
+*  defined by IPIV(k), and U(k) is a unit upper triangular matrix, such
+*  that if the diagonal block D(k) is of order s (s = 1 or 2), then
+*
+*             (   I    v    0   )   k-s
+*     U(k) =  (   0    I    0   )   s
+*             (   0    0    I   )   n-k
+*                k-s   s   n-k
+*
+*  If s = 1, D(k) overwrites A(k,k), and v overwrites A(1:k-1,k).
+*  If s = 2, the upper triangle of D(k) overwrites A(k-1,k-1), A(k-1,k),
+*  and A(k,k), and v overwrites A(1:k-2,k-1:k).
+*
+*  If UPLO = 'L', then A = L*D*L', where
+*     L = P(1)*L(1)* ... *P(k)*L(k)* ...,
+*  i.e., L is a product of terms P(k)*L(k), where k increases from 1 to
+*  n in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1
+*  and 2-by-2 diagonal blocks D(k).  P(k) is a permutation matrix as
+*  defined by IPIV(k), and L(k) is a unit lower triangular matrix, such
+*  that if the diagonal block D(k) is of order s (s = 1 or 2), then
+*
+*             (   I    0     0   )  k-1
+*     L(k) =  (   0    I     0   )  s
+*             (   0    v     I   )  n-k-s+1
+*                k-1   s  n-k-s+1
+*
+*  If s = 1, D(k) overwrites A(k,k), and v overwrites A(k+1:n,k).
+*  If s = 2, the lower triangle of D(k) overwrites A(k,k), A(k+1,k),
+*  and A(k+1,k+1), and v overwrites A(k+2:n,k:k+1).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+      DOUBLE PRECISION   EIGHT, SEVTEN
+      PARAMETER          ( EIGHT = 8.0D+0, SEVTEN = 17.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            I, IMAX, J, JMAX, K, KK, KP, KSTEP
+      DOUBLE PRECISION   ABSAKK, ALPHA, COLMAX, D11, D12, D21, D22, R1,
+     $                   ROWMAX, T, WK, WKM1, WKP1
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            IDAMAX
+      EXTERNAL           LSAME, IDAMAX
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DSCAL, DSWAP, DSYR, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DSYTF2', -INFO )
+         RETURN
+      END IF
+*
+*     Initialize ALPHA for use in choosing pivot block size.
+*
+      ALPHA = ( ONE+SQRT( SEVTEN ) ) / EIGHT
+*
+      IF( UPPER ) THEN
+*
+*        Factorize A as U*D*U' using the upper triangle of A
+*
+*        K is the main loop index, decreasing from N to 1 in steps of
+*        1 or 2
+*
+         K = N
+   10    CONTINUE
+*
+*        If K < 1, exit from loop
+*
+         IF( K.LT.1 )
+     $      GO TO 70
+         KSTEP = 1
+*
+*        Determine rows and columns to be interchanged and whether
+*        a 1-by-1 or 2-by-2 pivot block will be used
+*
+         ABSAKK = ABS( A( K, K ) )
+*
+*        IMAX is the row-index of the largest off-diagonal element in
+*        column K, and COLMAX is its absolute value
+*
+         IF( K.GT.1 ) THEN
+            IMAX = IDAMAX( K-1, A( 1, K ), 1 )
+            COLMAX = ABS( A( IMAX, K ) )
+         ELSE
+            COLMAX = ZERO
+         END IF
+*
+         IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN
+*
+*           Column K is zero: set INFO and continue
+*
+            IF( INFO.EQ.0 )
+     $         INFO = K
+            KP = K
+         ELSE
+            IF( ABSAKK.GE.ALPHA*COLMAX ) THEN
+*
+*              no interchange, use 1-by-1 pivot block
+*
+               KP = K
+            ELSE
+*
+*              JMAX is the column-index of the largest off-diagonal
+*              element in row IMAX, and ROWMAX is its absolute value
+*
+               JMAX = IMAX + IDAMAX( K-IMAX, A( IMAX, IMAX+1 ), LDA )
+               ROWMAX = ABS( A( IMAX, JMAX ) )
+               IF( IMAX.GT.1 ) THEN
+                  JMAX = IDAMAX( IMAX-1, A( 1, IMAX ), 1 )
+                  ROWMAX = MAX( ROWMAX, ABS( A( JMAX, IMAX ) ) )
+               END IF
+*
+               IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN
+*
+*                 no interchange, use 1-by-1 pivot block
+*
+                  KP = K
+               ELSE IF( ABS( A( IMAX, IMAX ) ).GE.ALPHA*ROWMAX ) THEN
+*
+*                 interchange rows and columns K and IMAX, use 1-by-1
+*                 pivot block
+*
+                  KP = IMAX
+               ELSE
+*
+*                 interchange rows and columns K-1 and IMAX, use 2-by-2
+*                 pivot block
+*
+                  KP = IMAX
+                  KSTEP = 2
+               END IF
+            END IF
+*
+            KK = K - KSTEP + 1
+            IF( KP.NE.KK ) THEN
+*
+*              Interchange rows and columns KK and KP in the leading
+*              submatrix A(1:k,1:k)
+*
+               CALL DSWAP( KP-1, A( 1, KK ), 1, A( 1, KP ), 1 )
+               CALL DSWAP( KK-KP-1, A( KP+1, KK ), 1, A( KP, KP+1 ),
+     $                     LDA )
+               T = A( KK, KK )
+               A( KK, KK ) = A( KP, KP )
+               A( KP, KP ) = T
+               IF( KSTEP.EQ.2 ) THEN
+                  T = A( K-1, K )
+                  A( K-1, K ) = A( KP, K )
+                  A( KP, K ) = T
+               END IF
+            END IF
+*
+*           Update the leading submatrix
+*
+            IF( KSTEP.EQ.1 ) THEN
+*
+*              1-by-1 pivot block D(k): column k now holds
+*
+*              W(k) = U(k)*D(k)
+*
+*              where U(k) is the k-th column of U
+*
+*              Perform a rank-1 update of A(1:k-1,1:k-1) as
+*
+*              A := A - U(k)*D(k)*U(k)' = A - W(k)*1/D(k)*W(k)'
+*
+               R1 = ONE / A( K, K )
+               CALL DSYR( UPLO, K-1, -R1, A( 1, K ), 1, A, LDA )
+*
+*              Store U(k) in column k
+*
+               CALL DSCAL( K-1, R1, A( 1, K ), 1 )
+            ELSE
+*
+*              2-by-2 pivot block D(k): columns k and k-1 now hold
+*
+*              ( W(k-1) W(k) ) = ( U(k-1) U(k) )*D(k)
+*
+*              where U(k) and U(k-1) are the k-th and (k-1)-th columns
+*              of U
+*
+*              Perform a rank-2 update of A(1:k-2,1:k-2) as
+*
+*              A := A - ( U(k-1) U(k) )*D(k)*( U(k-1) U(k) )'
+*                 = A - ( W(k-1) W(k) )*inv(D(k))*( W(k-1) W(k) )'
+*
+               IF( K.GT.2 ) THEN
+*
+                  D12 = A( K-1, K )
+                  D22 = A( K-1, K-1 ) / D12
+                  D11 = A( K, K ) / D12
+                  T = ONE / ( D11*D22-ONE )
+                  D12 = T / D12
+*
+                  DO 30 J = K - 2, 1, -1
+                     WKM1 = D12*( D11*A( J, K-1 )-A( J, K ) )
+                     WK = D12*( D22*A( J, K )-A( J, K-1 ) )
+                     DO 20 I = J, 1, -1
+                        A( I, J ) = A( I, J ) - A( I, K )*WK -
+     $                              A( I, K-1 )*WKM1
+   20                CONTINUE
+                     A( J, K ) = WK
+                     A( J, K-1 ) = WKM1
+   30             CONTINUE
+*
+               END IF
+*
+            END IF
+         END IF
+*
+*        Store details of the interchanges in IPIV
+*
+         IF( KSTEP.EQ.1 ) THEN
+            IPIV( K ) = KP
+         ELSE
+            IPIV( K ) = -KP
+            IPIV( K-1 ) = -KP
+         END IF
+*
+*        Decrease K and return to the start of the main loop
+*
+         K = K - KSTEP
+         GO TO 10
+*
+      ELSE
+*
+*        Factorize A as L*D*L' using the lower triangle of A
+*
+*        K is the main loop index, increasing from 1 to N in steps of
+*        1 or 2
+*
+         K = 1
+   40    CONTINUE
+*
+*        If K > N, exit from loop
+*
+         IF( K.GT.N )
+     $      GO TO 70
+         KSTEP = 1
+*
+*        Determine rows and columns to be interchanged and whether
+*        a 1-by-1 or 2-by-2 pivot block will be used
+*
+         ABSAKK = ABS( A( K, K ) )
+*
+*        IMAX is the row-index of the largest off-diagonal element in
+*        column K, and COLMAX is its absolute value
+*
+         IF( K.LT.N ) THEN
+            IMAX = K + IDAMAX( N-K, A( K+1, K ), 1 )
+            COLMAX = ABS( A( IMAX, K ) )
+         ELSE
+            COLMAX = ZERO
+         END IF
+*
+         IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN
+*
+*           Column K is zero: set INFO and continue
+*
+            IF( INFO.EQ.0 )
+     $         INFO = K
+            KP = K
+         ELSE
+            IF( ABSAKK.GE.ALPHA*COLMAX ) THEN
+*
+*              no interchange, use 1-by-1 pivot block
+*
+               KP = K
+            ELSE
+*
+*              JMAX is the column-index of the largest off-diagonal
+*              element in row IMAX, and ROWMAX is its absolute value
+*
+               JMAX = K - 1 + IDAMAX( IMAX-K, A( IMAX, K ), LDA )
+               ROWMAX = ABS( A( IMAX, JMAX ) )
+               IF( IMAX.LT.N ) THEN
+                  JMAX = IMAX + IDAMAX( N-IMAX, A( IMAX+1, IMAX ), 1 )
+                  ROWMAX = MAX( ROWMAX, ABS( A( JMAX, IMAX ) ) )
+               END IF
+*
+               IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN
+*
+*                 no interchange, use 1-by-1 pivot block
+*
+                  KP = K
+               ELSE IF( ABS( A( IMAX, IMAX ) ).GE.ALPHA*ROWMAX ) THEN
+*
+*                 interchange rows and columns K and IMAX, use 1-by-1
+*                 pivot block
+*
+                  KP = IMAX
+               ELSE
+*
+*                 interchange rows and columns K+1 and IMAX, use 2-by-2
+*                 pivot block
+*
+                  KP = IMAX
+                  KSTEP = 2
+               END IF
+            END IF
+*
+            KK = K + KSTEP - 1
+            IF( KP.NE.KK ) THEN
+*
+*              Interchange rows and columns KK and KP in the trailing
+*              submatrix A(k:n,k:n)
+*
+               IF( KP.LT.N )
+     $            CALL DSWAP( N-KP, A( KP+1, KK ), 1, A( KP+1, KP ), 1 )
+               CALL DSWAP( KP-KK-1, A( KK+1, KK ), 1, A( KP, KK+1 ),
+     $                     LDA )
+               T = A( KK, KK )
+               A( KK, KK ) = A( KP, KP )
+               A( KP, KP ) = T
+               IF( KSTEP.EQ.2 ) THEN
+                  T = A( K+1, K )
+                  A( K+1, K ) = A( KP, K )
+                  A( KP, K ) = T
+               END IF
+            END IF
+*
+*           Update the trailing submatrix
+*
+            IF( KSTEP.EQ.1 ) THEN
+*
+*              1-by-1 pivot block D(k): column k now holds
+*
+*              W(k) = L(k)*D(k)
+*
+*              where L(k) is the k-th column of L
+*
+               IF( K.LT.N ) THEN
+*
+*                 Perform a rank-1 update of A(k+1:n,k+1:n) as
+*
+*                 A := A - L(k)*D(k)*L(k)' = A - W(k)*(1/D(k))*W(k)'
+*
+                  D11 = ONE / A( K, K )
+                  CALL DSYR( UPLO, N-K, -D11, A( K+1, K ), 1,
+     $                       A( K+1, K+1 ), LDA )
+*
+*                 Store L(k) in column K
+*
+                  CALL DSCAL( N-K, D11, A( K+1, K ), 1 )
+               END IF
+            ELSE
+*
+*              2-by-2 pivot block D(k)
+*
+               IF( K.LT.N-1 ) THEN
+*
+*                 Perform a rank-2 update of A(k+2:n,k+2:n) as
+*
+*                 A := A - ( (A(k) A(k+1))*D(k)**(-1) ) * (A(k) A(k+1))'
+*
+*                 where L(k) and L(k+1) are the k-th and (k+1)-th
+*                 columns of L
+*
+                  D21 = A( K+1, K )
+                  D11 = A( K+1, K+1 ) / D21
+                  D22 = A( K, K ) / D21
+                  T = ONE / ( D11*D22-ONE )
+                  D21 = T / D21
+*
+                  DO 60 J = K + 2, N
+*
+                     WK = D21*( D11*A( J, K )-A( J, K+1 ) )
+                     WKP1 = D21*( D22*A( J, K+1 )-A( J, K ) )
+*
+                     DO 50 I = J, N
+                        A( I, J ) = A( I, J ) - A( I, K )*WK -
+     $                              A( I, K+1 )*WKP1
+   50                CONTINUE
+*
+                     A( J, K ) = WK
+                     A( J, K+1 ) = WKP1
+*
+   60             CONTINUE
+               END IF
+            END IF
+         END IF
+*
+*        Store details of the interchanges in IPIV
+*
+         IF( KSTEP.EQ.1 ) THEN
+            IPIV( K ) = KP
+         ELSE
+            IPIV( K ) = -KP
+            IPIV( K+1 ) = -KP
+         END IF
+*
+*        Increase K and return to the start of the main loop
+*
+         K = K + KSTEP
+         GO TO 40
+*
+      END IF
+*
+   70 CONTINUE
+*
+      RETURN
+*
+*     End of DSYTF2
+*
+      END
diff --git a/costa/native/external/lapack/dsytrd.f b/costa/native/external/lapack/dsytrd.f
new file mode 100644
index 000000000..583675c3f
--- /dev/null
+++ b/costa/native/external/lapack/dsytrd.f
@@ -0,0 +1,295 @@
+      SUBROUTINE DSYTRD( UPLO, N, A, LDA, D, E, TAU, WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, LWORK, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), D( * ), E( * ), TAU( * ),
+     $                   WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DSYTRD reduces a real symmetric matrix A to real symmetric
+*  tridiagonal form T by an orthogonal similarity transformation:
+*  Q**T * A * Q = T.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+*          On entry, the symmetric matrix A.  If UPLO = 'U', the leading
+*          N-by-N upper triangular part of A contains the upper
+*          triangular part of the matrix A, and the strictly lower
+*          triangular part of A is not referenced.  If UPLO = 'L', the
+*          leading N-by-N lower triangular part of A contains the lower
+*          triangular part of the matrix A, and the strictly upper
+*          triangular part of A is not referenced.
+*          On exit, if UPLO = 'U', the diagonal and first superdiagonal
+*          of A are overwritten by the corresponding elements of the
+*          tridiagonal matrix T, and the elements above the first
+*          superdiagonal, with the array TAU, represent the orthogonal
+*          matrix Q as a product of elementary reflectors; if UPLO
+*          = 'L', the diagonal and first subdiagonal of A are over-
+*          written by the corresponding elements of the tridiagonal
+*          matrix T, and the elements below the first subdiagonal, with
+*          the array TAU, represent the orthogonal matrix Q as a product
+*          of elementary reflectors. See Further Details.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  D       (output) DOUBLE PRECISION array, dimension (N)
+*          The diagonal elements of the tridiagonal matrix T:
+*          D(i) = A(i,i).
+*
+*  E       (output) DOUBLE PRECISION array, dimension (N-1)
+*          The off-diagonal elements of the tridiagonal matrix T:
+*          E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'.
+*
+*  TAU     (output) DOUBLE PRECISION array, dimension (N-1)
+*          The scalar factors of the elementary reflectors (see Further
+*          Details).
+*
+*  WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.  LWORK >= 1.
+*          For optimum performance LWORK >= N*NB, where NB is the
+*          optimal blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Further Details
+*  ===============
+*
+*  If UPLO = 'U', the matrix Q is represented as a product of elementary
+*  reflectors
+*
+*     Q = H(n-1) . . . H(2) H(1).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - tau * v * v'
+*
+*  where tau is a real scalar, and v is a real vector with
+*  v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in
+*  A(1:i-1,i+1), and tau in TAU(i).
+*
+*  If UPLO = 'L', the matrix Q is represented as a product of elementary
+*  reflectors
+*
+*     Q = H(1) H(2) . . . H(n-1).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - tau * v * v'
+*
+*  where tau is a real scalar, and v is a real vector with
+*  v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i),
+*  and tau in TAU(i).
+*
+*  The contents of A on exit are illustrated by the following examples
+*  with n = 5:
+*
+*  if UPLO = 'U':                       if UPLO = 'L':
+*
+*    (  d   e   v2  v3  v4 )              (  d                  )
+*    (      d   e   v3  v4 )              (  e   d              )
+*    (          d   e   v4 )              (  v1  e   d          )
+*    (              d   e  )              (  v1  v2  e   d      )
+*    (                  d  )              (  v1  v2  v3  e   d  )
+*
+*  where d and e denote diagonal and off-diagonal elements of T, and vi
+*  denotes an element of the vector defining H(i).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE
+      PARAMETER          ( ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY, UPPER
+      INTEGER            I, IINFO, IWS, J, KK, LDWORK, LWKOPT, NB,
+     $                   NBMIN, NX
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLATRD, DSYR2K, DSYTD2, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           LSAME, ILAENV
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -4
+      ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN
+         INFO = -9
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+*
+*        Determine the block size.
+*
+         NB = ILAENV( 1, 'DSYTRD', UPLO, N, -1, -1, -1 )
+         LWKOPT = N*NB
+         WORK( 1 ) = LWKOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DSYTRD', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      NX = N
+      IWS = 1
+      IF( NB.GT.1 .AND. NB.LT.N ) THEN
+*
+*        Determine when to cross over from blocked to unblocked code
+*        (last block is always handled by unblocked code).
+*
+         NX = MAX( NB, ILAENV( 3, 'DSYTRD', UPLO, N, -1, -1, -1 ) )
+         IF( NX.LT.N ) THEN
+*
+*           Determine if workspace is large enough for blocked code.
+*
+            LDWORK = N
+            IWS = LDWORK*NB
+            IF( LWORK.LT.IWS ) THEN
+*
+*              Not enough workspace to use optimal NB:  determine the
+*              minimum value of NB, and reduce NB or force use of
+*              unblocked code by setting NX = N.
+*
+               NB = MAX( LWORK / LDWORK, 1 )
+               NBMIN = ILAENV( 2, 'DSYTRD', UPLO, N, -1, -1, -1 )
+               IF( NB.LT.NBMIN )
+     $            NX = N
+            END IF
+         ELSE
+            NX = N
+         END IF
+      ELSE
+         NB = 1
+      END IF
+*
+      IF( UPPER ) THEN
+*
+*        Reduce the upper triangle of A.
+*        Columns 1:kk are handled by the unblocked method.
+*
+         KK = N - ( ( N-NX+NB-1 ) / NB )*NB
+         DO 20 I = N - NB + 1, KK + 1, -NB
+*
+*           Reduce columns i:i+nb-1 to tridiagonal form and form the
+*           matrix W which is needed to update the unreduced part of
+*           the matrix
+*
+            CALL DLATRD( UPLO, I+NB-1, NB, A, LDA, E, TAU, WORK,
+     $                   LDWORK )
+*
+*           Update the unreduced submatrix A(1:i-1,1:i-1), using an
+*           update of the form:  A := A - V*W' - W*V'
+*
+            CALL DSYR2K( UPLO, 'No transpose', I-1, NB, -ONE, A( 1, I ),
+     $                   LDA, WORK, LDWORK, ONE, A, LDA )
+*
+*           Copy superdiagonal elements back into A, and diagonal
+*           elements into D
+*
+            DO 10 J = I, I + NB - 1
+               A( J-1, J ) = E( J-1 )
+               D( J ) = A( J, J )
+   10       CONTINUE
+   20    CONTINUE
+*
+*        Use unblocked code to reduce the last or only block
+*
+         CALL DSYTD2( UPLO, KK, A, LDA, D, E, TAU, IINFO )
+      ELSE
+*
+*        Reduce the lower triangle of A
+*
+         DO 40 I = 1, N - NX, NB
+*
+*           Reduce columns i:i+nb-1 to tridiagonal form and form the
+*           matrix W which is needed to update the unreduced part of
+*           the matrix
+*
+            CALL DLATRD( UPLO, N-I+1, NB, A( I, I ), LDA, E( I ),
+     $                   TAU( I ), WORK, LDWORK )
+*
+*           Update the unreduced submatrix A(i+ib:n,i+ib:n), using
+*           an update of the form:  A := A - V*W' - W*V'
+*
+            CALL DSYR2K( UPLO, 'No transpose', N-I-NB+1, NB, -ONE,
+     $                   A( I+NB, I ), LDA, WORK( NB+1 ), LDWORK, ONE,
+     $                   A( I+NB, I+NB ), LDA )
+*
+*           Copy subdiagonal elements back into A, and diagonal
+*           elements into D
+*
+            DO 30 J = I, I + NB - 1
+               A( J+1, J ) = E( J )
+               D( J ) = A( J, J )
+   30       CONTINUE
+   40    CONTINUE
+*
+*        Use unblocked code to reduce the last or only block
+*
+         CALL DSYTD2( UPLO, N-I+1, A( I, I ), LDA, D( I ), E( I ),
+     $                TAU( I ), IINFO )
+      END IF
+*
+      WORK( 1 ) = LWKOPT
+      RETURN
+*
+*     End of DSYTRD
+*
+      END
diff --git a/costa/native/external/lapack/dsytrf.f b/costa/native/external/lapack/dsytrf.f
new file mode 100644
index 000000000..d1199f2bf
--- /dev/null
+++ b/costa/native/external/lapack/dsytrf.f
@@ -0,0 +1,288 @@
+      SUBROUTINE DSYTRF( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, LWORK, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      DOUBLE PRECISION   A( LDA, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DSYTRF computes the factorization of a real symmetric matrix A using
+*  the Bunch-Kaufman diagonal pivoting method.  The form of the
+*  factorization is
+*
+*     A = U*D*U**T  or  A = L*D*L**T
+*
+*  where U (or L) is a product of permutation and unit upper (lower)
+*  triangular matrices, and D is symmetric and block diagonal with
+*  1-by-1 and 2-by-2 diagonal blocks.
+*
+*  This is the blocked version of the algorithm, calling Level 3 BLAS.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+*          On entry, the symmetric matrix A.  If UPLO = 'U', the leading
+*          N-by-N upper triangular part of A contains the upper
+*          triangular part of the matrix A, and the strictly lower
+*          triangular part of A is not referenced.  If UPLO = 'L', the
+*          leading N-by-N lower triangular part of A contains the lower
+*          triangular part of the matrix A, and the strictly upper
+*          triangular part of A is not referenced.
+*
+*          On exit, the block diagonal matrix D and the multipliers used
+*          to obtain the factor U or L (see below for further details).
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  IPIV    (output) INTEGER array, dimension (N)
+*          Details of the interchanges and the block structure of D.
+*          If IPIV(k) > 0, then rows and columns k and IPIV(k) were
+*          interchanged and D(k,k) is a 1-by-1 diagonal block.
+*          If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and
+*          columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k)
+*          is a 2-by-2 diagonal block.  If UPLO = 'L' and IPIV(k) =
+*          IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were
+*          interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block.
+*
+*  WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The length of WORK.  LWORK >=1.  For best performance
+*          LWORK >= N*NB, where NB is the block size returned by ILAENV.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, D(i,i) is exactly zero.  The factorization
+*                has been completed, but the block diagonal matrix D is
+*                exactly singular, and division by zero will occur if it
+*                is used to solve a system of equations.
+*
+*  Further Details
+*  ===============
+*
+*  If UPLO = 'U', then A = U*D*U', where
+*     U = P(n)*U(n)* ... *P(k)U(k)* ...,
+*  i.e., U is a product of terms P(k)*U(k), where k decreases from n to
+*  1 in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1
+*  and 2-by-2 diagonal blocks D(k).  P(k) is a permutation matrix as
+*  defined by IPIV(k), and U(k) is a unit upper triangular matrix, such
+*  that if the diagonal block D(k) is of order s (s = 1 or 2), then
+*
+*             (   I    v    0   )   k-s
+*     U(k) =  (   0    I    0   )   s
+*             (   0    0    I   )   n-k
+*                k-s   s   n-k
+*
+*  If s = 1, D(k) overwrites A(k,k), and v overwrites A(1:k-1,k).
+*  If s = 2, the upper triangle of D(k) overwrites A(k-1,k-1), A(k-1,k),
+*  and A(k,k), and v overwrites A(1:k-2,k-1:k).
+*
+*  If UPLO = 'L', then A = L*D*L', where
+*     L = P(1)*L(1)* ... *P(k)*L(k)* ...,
+*  i.e., L is a product of terms P(k)*L(k), where k increases from 1 to
+*  n in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1
+*  and 2-by-2 diagonal blocks D(k).  P(k) is a permutation matrix as
+*  defined by IPIV(k), and L(k) is a unit lower triangular matrix, such
+*  that if the diagonal block D(k) is of order s (s = 1 or 2), then
+*
+*             (   I    0     0   )  k-1
+*     L(k) =  (   0    I     0   )  s
+*             (   0    v     I   )  n-k-s+1
+*                k-1   s  n-k-s+1
+*
+*  If s = 1, D(k) overwrites A(k,k), and v overwrites A(k+1:n,k).
+*  If s = 2, the lower triangle of D(k) overwrites A(k,k), A(k+1,k),
+*  and A(k+1,k+1), and v overwrites A(k+2:n,k:k+1).
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            LQUERY, UPPER
+      INTEGER            IINFO, IWS, J, K, KB, LDWORK, LWKOPT, NB, NBMIN
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           LSAME, ILAENV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLASYF, DSYTF2, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -4
+      ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN
+         INFO = -7
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+*
+*        Determine the block size
+*
+         NB = ILAENV( 1, 'DSYTRF', UPLO, N, -1, -1, -1 )
+         LWKOPT = N*NB
+         WORK( 1 ) = LWKOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DSYTRF', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+      NBMIN = 2
+      LDWORK = N
+      IF( NB.GT.1 .AND. NB.LT.N ) THEN
+         IWS = LDWORK*NB
+         IF( LWORK.LT.IWS ) THEN
+            NB = MAX( LWORK / LDWORK, 1 )
+            NBMIN = MAX( 2, ILAENV( 2, 'DSYTRF', UPLO, N, -1, -1, -1 ) )
+         END IF
+      ELSE
+         IWS = 1
+      END IF
+      IF( NB.LT.NBMIN )
+     $   NB = N
+*
+      IF( UPPER ) THEN
+*
+*        Factorize A as U*D*U' using the upper triangle of A
+*
+*        K is the main loop index, decreasing from N to 1 in steps of
+*        KB, where KB is the number of columns factorized by DLASYF;
+*        KB is either NB or NB-1, or K for the last block
+*
+         K = N
+   10    CONTINUE
+*
+*        If K < 1, exit from loop
+*
+         IF( K.LT.1 )
+     $      GO TO 40
+*
+         IF( K.GT.NB ) THEN
+*
+*           Factorize columns k-kb+1:k of A and use blocked code to
+*           update columns 1:k-kb
+*
+            CALL DLASYF( UPLO, K, NB, KB, A, LDA, IPIV, WORK, LDWORK,
+     $                   IINFO )
+         ELSE
+*
+*           Use unblocked code to factorize columns 1:k of A
+*
+            CALL DSYTF2( UPLO, K, A, LDA, IPIV, IINFO )
+            KB = K
+         END IF
+*
+*        Set INFO on the first occurrence of a zero pivot
+*
+         IF( INFO.EQ.0 .AND. IINFO.GT.0 )
+     $      INFO = IINFO
+*
+*        Decrease K and return to the start of the main loop
+*
+         K = K - KB
+         GO TO 10
+*
+      ELSE
+*
+*        Factorize A as L*D*L' using the lower triangle of A
+*
+*        K is the main loop index, increasing from 1 to N in steps of
+*        KB, where KB is the number of columns factorized by DLASYF;
+*        KB is either NB or NB-1, or N-K+1 for the last block
+*
+         K = 1
+   20    CONTINUE
+*
+*        If K > N, exit from loop
+*
+         IF( K.GT.N )
+     $      GO TO 40
+*
+         IF( K.LE.N-NB ) THEN
+*
+*           Factorize columns k:k+kb-1 of A and use blocked code to
+*           update columns k+kb:n
+*
+            CALL DLASYF( UPLO, N-K+1, NB, KB, A( K, K ), LDA, IPIV( K ),
+     $                   WORK, LDWORK, IINFO )
+         ELSE
+*
+*           Use unblocked code to factorize columns k:n of A
+*
+            CALL DSYTF2( UPLO, N-K+1, A( K, K ), LDA, IPIV( K ), IINFO )
+            KB = N - K + 1
+         END IF
+*
+*        Set INFO on the first occurrence of a zero pivot
+*
+         IF( INFO.EQ.0 .AND. IINFO.GT.0 )
+     $      INFO = IINFO + K - 1
+*
+*        Adjust IPIV
+*
+         DO 30 J = K, K + KB - 1
+            IF( IPIV( J ).GT.0 ) THEN
+               IPIV( J ) = IPIV( J ) + K - 1
+            ELSE
+               IPIV( J ) = IPIV( J ) - K + 1
+            END IF
+   30    CONTINUE
+*
+*        Increase K and return to the start of the main loop
+*
+         K = K + KB
+         GO TO 20
+*
+      END IF
+*
+   40 CONTINUE
+      WORK( 1 ) = LWKOPT
+      RETURN
+*
+*     End of DSYTRF
+*
+      END
diff --git a/costa/native/external/lapack/dsytri.f b/costa/native/external/lapack/dsytri.f
new file mode 100644
index 000000000..87a3332c2
--- /dev/null
+++ b/costa/native/external/lapack/dsytri.f
@@ -0,0 +1,313 @@
+      SUBROUTINE DSYTRI( UPLO, N, A, LDA, IPIV, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      DOUBLE PRECISION   A( LDA, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DSYTRI computes the inverse of a real symmetric indefinite matrix
+*  A using the factorization A = U*D*U**T or A = L*D*L**T computed by
+*  DSYTRF.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the details of the factorization are stored
+*          as an upper or lower triangular matrix.
+*          = 'U':  Upper triangular, form is A = U*D*U**T;
+*          = 'L':  Lower triangular, form is A = L*D*L**T.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+*          On entry, the block diagonal matrix D and the multipliers
+*          used to obtain the factor U or L as computed by DSYTRF.
+*
+*          On exit, if INFO = 0, the (symmetric) inverse of the original
+*          matrix.  If UPLO = 'U', the upper triangular part of the
+*          inverse is formed and the part of A below the diagonal is not
+*          referenced; if UPLO = 'L' the lower triangular part of the
+*          inverse is formed and the part of A above the diagonal is
+*          not referenced.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  IPIV    (input) INTEGER array, dimension (N)
+*          Details of the interchanges and the block structure of D
+*          as determined by DSYTRF.
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*          > 0: if INFO = i, D(i,i) = 0; the matrix is singular and its
+*               inverse could not be computed.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            K, KP, KSTEP
+      DOUBLE PRECISION   AK, AKKP1, AKP1, D, T, TEMP
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DDOT
+      EXTERNAL           LSAME, DDOT
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DCOPY, DSWAP, DSYMV, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DSYTRI', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Check that the diagonal matrix D is nonsingular.
+*
+      IF( UPPER ) THEN
+*
+*        Upper triangular storage: examine D from bottom to top
+*
+         DO 10 INFO = N, 1, -1
+            IF( IPIV( INFO ).GT.0 .AND. A( INFO, INFO ).EQ.ZERO )
+     $         RETURN
+   10    CONTINUE
+      ELSE
+*
+*        Lower triangular storage: examine D from top to bottom.
+*
+         DO 20 INFO = 1, N
+            IF( IPIV( INFO ).GT.0 .AND. A( INFO, INFO ).EQ.ZERO )
+     $         RETURN
+   20    CONTINUE
+      END IF
+      INFO = 0
+*
+      IF( UPPER ) THEN
+*
+*        Compute inv(A) from the factorization A = U*D*U'.
+*
+*        K is the main loop index, increasing from 1 to N in steps of
+*        1 or 2, depending on the size of the diagonal blocks.
+*
+         K = 1
+   30    CONTINUE
+*
+*        If K > N, exit from loop.
+*
+         IF( K.GT.N )
+     $      GO TO 40
+*
+         IF( IPIV( K ).GT.0 ) THEN
+*
+*           1 x 1 diagonal block
+*
+*           Invert the diagonal block.
+*
+            A( K, K ) = ONE / A( K, K )
+*
+*           Compute column K of the inverse.
+*
+            IF( K.GT.1 ) THEN
+               CALL DCOPY( K-1, A( 1, K ), 1, WORK, 1 )
+               CALL DSYMV( UPLO, K-1, -ONE, A, LDA, WORK, 1, ZERO,
+     $                     A( 1, K ), 1 )
+               A( K, K ) = A( K, K ) - DDOT( K-1, WORK, 1, A( 1, K ),
+     $                     1 )
+            END IF
+            KSTEP = 1
+         ELSE
+*
+*           2 x 2 diagonal block
+*
+*           Invert the diagonal block.
+*
+            T = ABS( A( K, K+1 ) )
+            AK = A( K, K ) / T
+            AKP1 = A( K+1, K+1 ) / T
+            AKKP1 = A( K, K+1 ) / T
+            D = T*( AK*AKP1-ONE )
+            A( K, K ) = AKP1 / D
+            A( K+1, K+1 ) = AK / D
+            A( K, K+1 ) = -AKKP1 / D
+*
+*           Compute columns K and K+1 of the inverse.
+*
+            IF( K.GT.1 ) THEN
+               CALL DCOPY( K-1, A( 1, K ), 1, WORK, 1 )
+               CALL DSYMV( UPLO, K-1, -ONE, A, LDA, WORK, 1, ZERO,
+     $                     A( 1, K ), 1 )
+               A( K, K ) = A( K, K ) - DDOT( K-1, WORK, 1, A( 1, K ),
+     $                     1 )
+               A( K, K+1 ) = A( K, K+1 ) -
+     $                       DDOT( K-1, A( 1, K ), 1, A( 1, K+1 ), 1 )
+               CALL DCOPY( K-1, A( 1, K+1 ), 1, WORK, 1 )
+               CALL DSYMV( UPLO, K-1, -ONE, A, LDA, WORK, 1, ZERO,
+     $                     A( 1, K+1 ), 1 )
+               A( K+1, K+1 ) = A( K+1, K+1 ) -
+     $                         DDOT( K-1, WORK, 1, A( 1, K+1 ), 1 )
+            END IF
+            KSTEP = 2
+         END IF
+*
+         KP = ABS( IPIV( K ) )
+         IF( KP.NE.K ) THEN
+*
+*           Interchange rows and columns K and KP in the leading
+*           submatrix A(1:k+1,1:k+1)
+*
+            CALL DSWAP( KP-1, A( 1, K ), 1, A( 1, KP ), 1 )
+            CALL DSWAP( K-KP-1, A( KP+1, K ), 1, A( KP, KP+1 ), LDA )
+            TEMP = A( K, K )
+            A( K, K ) = A( KP, KP )
+            A( KP, KP ) = TEMP
+            IF( KSTEP.EQ.2 ) THEN
+               TEMP = A( K, K+1 )
+               A( K, K+1 ) = A( KP, K+1 )
+               A( KP, K+1 ) = TEMP
+            END IF
+         END IF
+*
+         K = K + KSTEP
+         GO TO 30
+   40    CONTINUE
+*
+      ELSE
+*
+*        Compute inv(A) from the factorization A = L*D*L'.
+*
+*        K is the main loop index, increasing from 1 to N in steps of
+*        1 or 2, depending on the size of the diagonal blocks.
+*
+         K = N
+   50    CONTINUE
+*
+*        If K < 1, exit from loop.
+*
+         IF( K.LT.1 )
+     $      GO TO 60
+*
+         IF( IPIV( K ).GT.0 ) THEN
+*
+*           1 x 1 diagonal block
+*
+*           Invert the diagonal block.
+*
+            A( K, K ) = ONE / A( K, K )
+*
+*           Compute column K of the inverse.
+*
+            IF( K.LT.N ) THEN
+               CALL DCOPY( N-K, A( K+1, K ), 1, WORK, 1 )
+               CALL DSYMV( UPLO, N-K, -ONE, A( K+1, K+1 ), LDA, WORK, 1,
+     $                     ZERO, A( K+1, K ), 1 )
+               A( K, K ) = A( K, K ) - DDOT( N-K, WORK, 1, A( K+1, K ),
+     $                     1 )
+            END IF
+            KSTEP = 1
+         ELSE
+*
+*           2 x 2 diagonal block
+*
+*           Invert the diagonal block.
+*
+            T = ABS( A( K, K-1 ) )
+            AK = A( K-1, K-1 ) / T
+            AKP1 = A( K, K ) / T
+            AKKP1 = A( K, K-1 ) / T
+            D = T*( AK*AKP1-ONE )
+            A( K-1, K-1 ) = AKP1 / D
+            A( K, K ) = AK / D
+            A( K, K-1 ) = -AKKP1 / D
+*
+*           Compute columns K-1 and K of the inverse.
+*
+            IF( K.LT.N ) THEN
+               CALL DCOPY( N-K, A( K+1, K ), 1, WORK, 1 )
+               CALL DSYMV( UPLO, N-K, -ONE, A( K+1, K+1 ), LDA, WORK, 1,
+     $                     ZERO, A( K+1, K ), 1 )
+               A( K, K ) = A( K, K ) - DDOT( N-K, WORK, 1, A( K+1, K ),
+     $                     1 )
+               A( K, K-1 ) = A( K, K-1 ) -
+     $                       DDOT( N-K, A( K+1, K ), 1, A( K+1, K-1 ),
+     $                       1 )
+               CALL DCOPY( N-K, A( K+1, K-1 ), 1, WORK, 1 )
+               CALL DSYMV( UPLO, N-K, -ONE, A( K+1, K+1 ), LDA, WORK, 1,
+     $                     ZERO, A( K+1, K-1 ), 1 )
+               A( K-1, K-1 ) = A( K-1, K-1 ) -
+     $                         DDOT( N-K, WORK, 1, A( K+1, K-1 ), 1 )
+            END IF
+            KSTEP = 2
+         END IF
+*
+         KP = ABS( IPIV( K ) )
+         IF( KP.NE.K ) THEN
+*
+*           Interchange rows and columns K and KP in the trailing
+*           submatrix A(k-1:n,k-1:n)
+*
+            IF( KP.LT.N )
+     $         CALL DSWAP( N-KP, A( KP+1, K ), 1, A( KP+1, KP ), 1 )
+            CALL DSWAP( KP-K-1, A( K+1, K ), 1, A( KP, K+1 ), LDA )
+            TEMP = A( K, K )
+            A( K, K ) = A( KP, KP )
+            A( KP, KP ) = TEMP
+            IF( KSTEP.EQ.2 ) THEN
+               TEMP = A( K, K-1 )
+               A( K, K-1 ) = A( KP, K-1 )
+               A( KP, K-1 ) = TEMP
+            END IF
+         END IF
+*
+         K = K - KSTEP
+         GO TO 50
+   60    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of DSYTRI
+*
+      END
diff --git a/costa/native/external/lapack/dsytrs.f b/costa/native/external/lapack/dsytrs.f
new file mode 100644
index 000000000..ba70be4a4
--- /dev/null
+++ b/costa/native/external/lapack/dsytrs.f
@@ -0,0 +1,370 @@
+      SUBROUTINE DSYTRS( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, LDB, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      DOUBLE PRECISION   A( LDA, * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DSYTRS solves a system of linear equations A*X = B with a real
+*  symmetric matrix A using the factorization A = U*D*U**T or
+*  A = L*D*L**T computed by DSYTRF.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the details of the factorization are stored
+*          as an upper or lower triangular matrix.
+*          = 'U':  Upper triangular, form is A = U*D*U**T;
+*          = 'L':  Lower triangular, form is A = L*D*L**T.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  A       (input) DOUBLE PRECISION array, dimension (LDA,N)
+*          The block diagonal matrix D and the multipliers used to
+*          obtain the factor U or L as computed by DSYTRF.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  IPIV    (input) INTEGER array, dimension (N)
+*          Details of the interchanges and the block structure of D
+*          as determined by DSYTRF.
+*
+*  B       (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS)
+*          On entry, the right hand side matrix B.
+*          On exit, the solution matrix X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE
+      PARAMETER          ( ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            J, K, KP
+      DOUBLE PRECISION   AK, AKM1, AKM1K, BK, BKM1, DENOM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DGEMV, DGER, DSCAL, DSWAP, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -8
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DSYTRS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 )
+     $   RETURN
+*
+      IF( UPPER ) THEN
+*
+*        Solve A*X = B, where A = U*D*U'.
+*
+*        First solve U*D*X = B, overwriting B with X.
+*
+*        K is the main loop index, decreasing from N to 1 in steps of
+*        1 or 2, depending on the size of the diagonal blocks.
+*
+         K = N
+   10    CONTINUE
+*
+*        If K < 1, exit from loop.
+*
+         IF( K.LT.1 )
+     $      GO TO 30
+*
+         IF( IPIV( K ).GT.0 ) THEN
+*
+*           1 x 1 diagonal block
+*
+*           Interchange rows K and IPIV(K).
+*
+            KP = IPIV( K )
+            IF( KP.NE.K )
+     $         CALL DSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB )
+*
+*           Multiply by inv(U(K)), where U(K) is the transformation
+*           stored in column K of A.
+*
+            CALL DGER( K-1, NRHS, -ONE, A( 1, K ), 1, B( K, 1 ), LDB,
+     $                 B( 1, 1 ), LDB )
+*
+*           Multiply by the inverse of the diagonal block.
+*
+            CALL DSCAL( NRHS, ONE / A( K, K ), B( K, 1 ), LDB )
+            K = K - 1
+         ELSE
+*
+*           2 x 2 diagonal block
+*
+*           Interchange rows K-1 and -IPIV(K).
+*
+            KP = -IPIV( K )
+            IF( KP.NE.K-1 )
+     $         CALL DSWAP( NRHS, B( K-1, 1 ), LDB, B( KP, 1 ), LDB )
+*
+*           Multiply by inv(U(K)), where U(K) is the transformation
+*           stored in columns K-1 and K of A.
+*
+            CALL DGER( K-2, NRHS, -ONE, A( 1, K ), 1, B( K, 1 ), LDB,
+     $                 B( 1, 1 ), LDB )
+            CALL DGER( K-2, NRHS, -ONE, A( 1, K-1 ), 1, B( K-1, 1 ),
+     $                 LDB, B( 1, 1 ), LDB )
+*
+*           Multiply by the inverse of the diagonal block.
+*
+            AKM1K = A( K-1, K )
+            AKM1 = A( K-1, K-1 ) / AKM1K
+            AK = A( K, K ) / AKM1K
+            DENOM = AKM1*AK - ONE
+            DO 20 J = 1, NRHS
+               BKM1 = B( K-1, J ) / AKM1K
+               BK = B( K, J ) / AKM1K
+               B( K-1, J ) = ( AK*BKM1-BK ) / DENOM
+               B( K, J ) = ( AKM1*BK-BKM1 ) / DENOM
+   20       CONTINUE
+            K = K - 2
+         END IF
+*
+         GO TO 10
+   30    CONTINUE
+*
+*        Next solve U'*X = B, overwriting B with X.
+*
+*        K is the main loop index, increasing from 1 to N in steps of
+*        1 or 2, depending on the size of the diagonal blocks.
+*
+         K = 1
+   40    CONTINUE
+*
+*        If K > N, exit from loop.
+*
+         IF( K.GT.N )
+     $      GO TO 50
+*
+         IF( IPIV( K ).GT.0 ) THEN
+*
+*           1 x 1 diagonal block
+*
+*           Multiply by inv(U'(K)), where U(K) is the transformation
+*           stored in column K of A.
+*
+            CALL DGEMV( 'Transpose', K-1, NRHS, -ONE, B, LDB, A( 1, K ),
+     $                  1, ONE, B( K, 1 ), LDB )
+*
+*           Interchange rows K and IPIV(K).
+*
+            KP = IPIV( K )
+            IF( KP.NE.K )
+     $         CALL DSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB )
+            K = K + 1
+         ELSE
+*
+*           2 x 2 diagonal block
+*
+*           Multiply by inv(U'(K+1)), where U(K+1) is the transformation
+*           stored in columns K and K+1 of A.
+*
+            CALL DGEMV( 'Transpose', K-1, NRHS, -ONE, B, LDB, A( 1, K ),
+     $                  1, ONE, B( K, 1 ), LDB )
+            CALL DGEMV( 'Transpose', K-1, NRHS, -ONE, B, LDB,
+     $                  A( 1, K+1 ), 1, ONE, B( K+1, 1 ), LDB )
+*
+*           Interchange rows K and -IPIV(K).
+*
+            KP = -IPIV( K )
+            IF( KP.NE.K )
+     $         CALL DSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB )
+            K = K + 2
+         END IF
+*
+         GO TO 40
+   50    CONTINUE
+*
+      ELSE
+*
+*        Solve A*X = B, where A = L*D*L'.
+*
+*        First solve L*D*X = B, overwriting B with X.
+*
+*        K is the main loop index, increasing from 1 to N in steps of
+*        1 or 2, depending on the size of the diagonal blocks.
+*
+         K = 1
+   60    CONTINUE
+*
+*        If K > N, exit from loop.
+*
+         IF( K.GT.N )
+     $      GO TO 80
+*
+         IF( IPIV( K ).GT.0 ) THEN
+*
+*           1 x 1 diagonal block
+*
+*           Interchange rows K and IPIV(K).
+*
+            KP = IPIV( K )
+            IF( KP.NE.K )
+     $         CALL DSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB )
+*
+*           Multiply by inv(L(K)), where L(K) is the transformation
+*           stored in column K of A.
+*
+            IF( K.LT.N )
+     $         CALL DGER( N-K, NRHS, -ONE, A( K+1, K ), 1, B( K, 1 ),
+     $                    LDB, B( K+1, 1 ), LDB )
+*
+*           Multiply by the inverse of the diagonal block.
+*
+            CALL DSCAL( NRHS, ONE / A( K, K ), B( K, 1 ), LDB )
+            K = K + 1
+         ELSE
+*
+*           2 x 2 diagonal block
+*
+*           Interchange rows K+1 and -IPIV(K).
+*
+            KP = -IPIV( K )
+            IF( KP.NE.K+1 )
+     $         CALL DSWAP( NRHS, B( K+1, 1 ), LDB, B( KP, 1 ), LDB )
+*
+*           Multiply by inv(L(K)), where L(K) is the transformation
+*           stored in columns K and K+1 of A.
+*
+            IF( K.LT.N-1 ) THEN
+               CALL DGER( N-K-1, NRHS, -ONE, A( K+2, K ), 1, B( K, 1 ),
+     $                    LDB, B( K+2, 1 ), LDB )
+               CALL DGER( N-K-1, NRHS, -ONE, A( K+2, K+1 ), 1,
+     $                    B( K+1, 1 ), LDB, B( K+2, 1 ), LDB )
+            END IF
+*
+*           Multiply by the inverse of the diagonal block.
+*
+            AKM1K = A( K+1, K )
+            AKM1 = A( K, K ) / AKM1K
+            AK = A( K+1, K+1 ) / AKM1K
+            DENOM = AKM1*AK - ONE
+            DO 70 J = 1, NRHS
+               BKM1 = B( K, J ) / AKM1K
+               BK = B( K+1, J ) / AKM1K
+               B( K, J ) = ( AK*BKM1-BK ) / DENOM
+               B( K+1, J ) = ( AKM1*BK-BKM1 ) / DENOM
+   70       CONTINUE
+            K = K + 2
+         END IF
+*
+         GO TO 60
+   80    CONTINUE
+*
+*        Next solve L'*X = B, overwriting B with X.
+*
+*        K is the main loop index, decreasing from N to 1 in steps of
+*        1 or 2, depending on the size of the diagonal blocks.
+*
+         K = N
+   90    CONTINUE
+*
+*        If K < 1, exit from loop.
+*
+         IF( K.LT.1 )
+     $      GO TO 100
+*
+         IF( IPIV( K ).GT.0 ) THEN
+*
+*           1 x 1 diagonal block
+*
+*           Multiply by inv(L'(K)), where L(K) is the transformation
+*           stored in column K of A.
+*
+            IF( K.LT.N )
+     $         CALL DGEMV( 'Transpose', N-K, NRHS, -ONE, B( K+1, 1 ),
+     $                     LDB, A( K+1, K ), 1, ONE, B( K, 1 ), LDB )
+*
+*           Interchange rows K and IPIV(K).
+*
+            KP = IPIV( K )
+            IF( KP.NE.K )
+     $         CALL DSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB )
+            K = K - 1
+         ELSE
+*
+*           2 x 2 diagonal block
+*
+*           Multiply by inv(L'(K-1)), where L(K-1) is the transformation
+*           stored in columns K-1 and K of A.
+*
+            IF( K.LT.N ) THEN
+               CALL DGEMV( 'Transpose', N-K, NRHS, -ONE, B( K+1, 1 ),
+     $                     LDB, A( K+1, K ), 1, ONE, B( K, 1 ), LDB )
+               CALL DGEMV( 'Transpose', N-K, NRHS, -ONE, B( K+1, 1 ),
+     $                     LDB, A( K+1, K-1 ), 1, ONE, B( K-1, 1 ),
+     $                     LDB )
+            END IF
+*
+*           Interchange rows K and -IPIV(K).
+*
+            KP = -IPIV( K )
+            IF( KP.NE.K )
+     $         CALL DSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB )
+            K = K - 2
+         END IF
+*
+         GO TO 90
+  100    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of DSYTRS
+*
+      END
diff --git a/costa/native/external/lapack/dtbcon.f b/costa/native/external/lapack/dtbcon.f
new file mode 100644
index 000000000..178dbbb71
--- /dev/null
+++ b/costa/native/external/lapack/dtbcon.f
@@ -0,0 +1,198 @@
+      SUBROUTINE DTBCON( NORM, UPLO, DIAG, N, KD, AB, LDAB, RCOND, WORK,
+     $                   IWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIAG, NORM, UPLO
+      INTEGER            INFO, KD, LDAB, N
+      DOUBLE PRECISION   RCOND
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      DOUBLE PRECISION   AB( LDAB, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DTBCON estimates the reciprocal of the condition number of a
+*  triangular band matrix A, in either the 1-norm or the infinity-norm.
+*
+*  The norm of A is computed and an estimate is obtained for
+*  norm(inv(A)), then the reciprocal of the condition number is
+*  computed as
+*     RCOND = 1 / ( norm(A) * norm(inv(A)) ).
+*
+*  Arguments
+*  =========
+*
+*  NORM    (input) CHARACTER*1
+*          Specifies whether the 1-norm condition number or the
+*          infinity-norm condition number is required:
+*          = '1' or 'O':  1-norm;
+*          = 'I':         Infinity-norm.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  A is upper triangular;
+*          = 'L':  A is lower triangular.
+*
+*  DIAG    (input) CHARACTER*1
+*          = 'N':  A is non-unit triangular;
+*          = 'U':  A is unit triangular.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  KD      (input) INTEGER
+*          The number of superdiagonals or subdiagonals of the
+*          triangular band matrix A.  KD >= 0.
+*
+*  AB      (input) DOUBLE PRECISION array, dimension (LDAB,N)
+*          The upper or lower triangular band matrix A, stored in the
+*          first kd+1 rows of the array. The j-th column of A is stored
+*          in the j-th column of the array AB as follows:
+*          if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j;
+*          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+kd).
+*          If DIAG = 'U', the diagonal elements of A are not referenced
+*          and are assumed to be 1.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KD+1.
+*
+*  RCOND   (output) DOUBLE PRECISION
+*          The reciprocal of the condition number of the matrix A,
+*          computed as RCOND = 1/(norm(A) * norm(inv(A))).
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (3*N)
+*
+*  IWORK   (workspace) INTEGER array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOUNIT, ONENRM, UPPER
+      CHARACTER          NORMIN
+      INTEGER            IX, KASE, KASE1
+      DOUBLE PRECISION   AINVNM, ANORM, SCALE, SMLNUM, XNORM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            IDAMAX
+      DOUBLE PRECISION   DLAMCH, DLANTB
+      EXTERNAL           LSAME, IDAMAX, DLAMCH, DLANTB
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLACON, DLATBS, DRSCL, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      ONENRM = NORM.EQ.'1' .OR. LSAME( NORM, 'O' )
+      NOUNIT = LSAME( DIAG, 'N' )
+*
+      IF( .NOT.ONENRM .AND. .NOT.LSAME( NORM, 'I' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( KD.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( LDAB.LT.KD+1 ) THEN
+         INFO = -7
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DTBCON', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 ) THEN
+         RCOND = ONE
+         RETURN
+      END IF
+*
+      RCOND = ZERO
+      SMLNUM = DLAMCH( 'Safe minimum' )*DBLE( MAX( 1, N ) )
+*
+*     Compute the norm of the triangular matrix A.
+*
+      ANORM = DLANTB( NORM, UPLO, DIAG, N, KD, AB, LDAB, WORK )
+*
+*     Continue only if ANORM > 0.
+*
+      IF( ANORM.GT.ZERO ) THEN
+*
+*        Estimate the norm of the inverse of A.
+*
+         AINVNM = ZERO
+         NORMIN = 'N'
+         IF( ONENRM ) THEN
+            KASE1 = 1
+         ELSE
+            KASE1 = 2
+         END IF
+         KASE = 0
+   10    CONTINUE
+         CALL DLACON( N, WORK( N+1 ), WORK, IWORK, AINVNM, KASE )
+         IF( KASE.NE.0 ) THEN
+            IF( KASE.EQ.KASE1 ) THEN
+*
+*              Multiply by inv(A).
+*
+               CALL DLATBS( UPLO, 'No transpose', DIAG, NORMIN, N, KD,
+     $                      AB, LDAB, WORK, SCALE, WORK( 2*N+1 ), INFO )
+            ELSE
+*
+*              Multiply by inv(A').
+*
+               CALL DLATBS( UPLO, 'Transpose', DIAG, NORMIN, N, KD, AB,
+     $                      LDAB, WORK, SCALE, WORK( 2*N+1 ), INFO )
+            END IF
+            NORMIN = 'Y'
+*
+*           Multiply by 1/SCALE if doing so will not cause overflow.
+*
+            IF( SCALE.NE.ONE ) THEN
+               IX = IDAMAX( N, WORK, 1 )
+               XNORM = ABS( WORK( IX ) )
+               IF( SCALE.LT.XNORM*SMLNUM .OR. SCALE.EQ.ZERO )
+     $            GO TO 20
+               CALL DRSCL( N, SCALE, WORK, 1 )
+            END IF
+            GO TO 10
+         END IF
+*
+*        Compute the estimate of the reciprocal condition number.
+*
+         IF( AINVNM.NE.ZERO )
+     $      RCOND = ( ONE / ANORM ) / AINVNM
+      END IF
+*
+   20 CONTINUE
+      RETURN
+*
+*     End of DTBCON
+*
+      END
diff --git a/costa/native/external/lapack/dtbrfs.f b/costa/native/external/lapack/dtbrfs.f
new file mode 100644
index 000000000..e16407cac
--- /dev/null
+++ b/costa/native/external/lapack/dtbrfs.f
@@ -0,0 +1,381 @@
+      SUBROUTINE DTBRFS( UPLO, TRANS, DIAG, N, KD, NRHS, AB, LDAB, B,
+     $                   LDB, X, LDX, FERR, BERR, WORK, IWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIAG, TRANS, UPLO
+      INTEGER            INFO, KD, LDAB, LDB, LDX, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      DOUBLE PRECISION   AB( LDAB, * ), B( LDB, * ), BERR( * ),
+     $                   FERR( * ), WORK( * ), X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DTBRFS provides error bounds and backward error estimates for the
+*  solution to a system of linear equations with a triangular band
+*  coefficient matrix.
+*
+*  The solution matrix X must be computed by DTBTRS or some other
+*  means before entering this routine.  DTBRFS does not do iterative
+*  refinement because doing so cannot improve the backward error.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  A is upper triangular;
+*          = 'L':  A is lower triangular.
+*
+*  TRANS   (input) CHARACTER*1
+*          Specifies the form of the system of equations:
+*          = 'N':  A * X = B  (No transpose)
+*          = 'T':  A**T * X = B  (Transpose)
+*          = 'C':  A**H * X = B  (Conjugate transpose = Transpose)
+*
+*  DIAG    (input) CHARACTER*1
+*          = 'N':  A is non-unit triangular;
+*          = 'U':  A is unit triangular.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  KD      (input) INTEGER
+*          The number of superdiagonals or subdiagonals of the
+*          triangular band matrix A.  KD >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrices B and X.  NRHS >= 0.
+*
+*  AB      (input) DOUBLE PRECISION array, dimension (LDAB,N)
+*          The upper or lower triangular band matrix A, stored in the
+*          first kd+1 rows of the array. The j-th column of A is stored
+*          in the j-th column of the array AB as follows:
+*          if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j;
+*          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+kd).
+*          If DIAG = 'U', the diagonal elements of A are not referenced
+*          and are assumed to be 1.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KD+1.
+*
+*  B       (input) DOUBLE PRECISION array, dimension (LDB,NRHS)
+*          The right hand side matrix B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  X       (input) DOUBLE PRECISION array, dimension (LDX,NRHS)
+*          The solution matrix X.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(1,N).
+*
+*  FERR    (output) DOUBLE PRECISION array, dimension (NRHS)
+*          The estimated forward error bound for each solution vector
+*          X(j) (the j-th column of the solution matrix X).
+*          If XTRUE is the true solution corresponding to X(j), FERR(j)
+*          is an estimated upper bound for the magnitude of the largest
+*          element in (X(j) - XTRUE) divided by the magnitude of the
+*          largest element in X(j).  The estimate is as reliable as
+*          the estimate for RCOND, and is almost always a slight
+*          overestimate of the true error.
+*
+*  BERR    (output) DOUBLE PRECISION array, dimension (NRHS)
+*          The componentwise relative backward error of each solution
+*          vector X(j) (i.e., the smallest relative change in
+*          any element of A or B that makes X(j) an exact solution).
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (3*N)
+*
+*  IWORK   (workspace) INTEGER array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO
+      PARAMETER          ( ZERO = 0.0D+0 )
+      DOUBLE PRECISION   ONE
+      PARAMETER          ( ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOTRAN, NOUNIT, UPPER
+      CHARACTER          TRANST
+      INTEGER            I, J, K, KASE, NZ
+      DOUBLE PRECISION   EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DAXPY, DCOPY, DLACON, DTBMV, DTBSV, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DLAMCH
+      EXTERNAL           LSAME, DLAMCH
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      NOTRAN = LSAME( TRANS, 'N' )
+      NOUNIT = LSAME( DIAG, 'N' )
+*
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT.
+     $         LSAME( TRANS, 'C' ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( KD.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -6
+      ELSE IF( LDAB.LT.KD+1 ) THEN
+         INFO = -8
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -10
+      ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+         INFO = -12
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DTBRFS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN
+         DO 10 J = 1, NRHS
+            FERR( J ) = ZERO
+            BERR( J ) = ZERO
+   10    CONTINUE
+         RETURN
+      END IF
+*
+      IF( NOTRAN ) THEN
+         TRANST = 'T'
+      ELSE
+         TRANST = 'N'
+      END IF
+*
+*     NZ = maximum number of nonzero elements in each row of A, plus 1
+*
+      NZ = KD + 2
+      EPS = DLAMCH( 'Epsilon' )
+      SAFMIN = DLAMCH( 'Safe minimum' )
+      SAFE1 = NZ*SAFMIN
+      SAFE2 = SAFE1 / EPS
+*
+*     Do for each right hand side
+*
+      DO 250 J = 1, NRHS
+*
+*        Compute residual R = B - op(A) * X,
+*        where op(A) = A or A', depending on TRANS.
+*
+         CALL DCOPY( N, X( 1, J ), 1, WORK( N+1 ), 1 )
+         CALL DTBMV( UPLO, TRANS, DIAG, N, KD, AB, LDAB, WORK( N+1 ),
+     $               1 )
+         CALL DAXPY( N, -ONE, B( 1, J ), 1, WORK( N+1 ), 1 )
+*
+*        Compute componentwise relative backward error from formula
+*
+*        max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) )
+*
+*        where abs(Z) is the componentwise absolute value of the matrix
+*        or vector Z.  If the i-th component of the denominator is less
+*        than SAFE2, then SAFE1 is added to the i-th components of the
+*        numerator and denominator before dividing.
+*
+         DO 20 I = 1, N
+            WORK( I ) = ABS( B( I, J ) )
+   20    CONTINUE
+*
+         IF( NOTRAN ) THEN
+*
+*           Compute abs(A)*abs(X) + abs(B).
+*
+            IF( UPPER ) THEN
+               IF( NOUNIT ) THEN
+                  DO 40 K = 1, N
+                     XK = ABS( X( K, J ) )
+                     DO 30 I = MAX( 1, K-KD ), K
+                        WORK( I ) = WORK( I ) +
+     $                              ABS( AB( KD+1+I-K, K ) )*XK
+   30                CONTINUE
+   40             CONTINUE
+               ELSE
+                  DO 60 K = 1, N
+                     XK = ABS( X( K, J ) )
+                     DO 50 I = MAX( 1, K-KD ), K - 1
+                        WORK( I ) = WORK( I ) +
+     $                              ABS( AB( KD+1+I-K, K ) )*XK
+   50                CONTINUE
+                     WORK( K ) = WORK( K ) + XK
+   60             CONTINUE
+               END IF
+            ELSE
+               IF( NOUNIT ) THEN
+                  DO 80 K = 1, N
+                     XK = ABS( X( K, J ) )
+                     DO 70 I = K, MIN( N, K+KD )
+                        WORK( I ) = WORK( I ) + ABS( AB( 1+I-K, K ) )*XK
+   70                CONTINUE
+   80             CONTINUE
+               ELSE
+                  DO 100 K = 1, N
+                     XK = ABS( X( K, J ) )
+                     DO 90 I = K + 1, MIN( N, K+KD )
+                        WORK( I ) = WORK( I ) + ABS( AB( 1+I-K, K ) )*XK
+   90                CONTINUE
+                     WORK( K ) = WORK( K ) + XK
+  100             CONTINUE
+               END IF
+            END IF
+         ELSE
+*
+*           Compute abs(A')*abs(X) + abs(B).
+*
+            IF( UPPER ) THEN
+               IF( NOUNIT ) THEN
+                  DO 120 K = 1, N
+                     S = ZERO
+                     DO 110 I = MAX( 1, K-KD ), K
+                        S = S + ABS( AB( KD+1+I-K, K ) )*
+     $                      ABS( X( I, J ) )
+  110                CONTINUE
+                     WORK( K ) = WORK( K ) + S
+  120             CONTINUE
+               ELSE
+                  DO 140 K = 1, N
+                     S = ABS( X( K, J ) )
+                     DO 130 I = MAX( 1, K-KD ), K - 1
+                        S = S + ABS( AB( KD+1+I-K, K ) )*
+     $                      ABS( X( I, J ) )
+  130                CONTINUE
+                     WORK( K ) = WORK( K ) + S
+  140             CONTINUE
+               END IF
+            ELSE
+               IF( NOUNIT ) THEN
+                  DO 160 K = 1, N
+                     S = ZERO
+                     DO 150 I = K, MIN( N, K+KD )
+                        S = S + ABS( AB( 1+I-K, K ) )*ABS( X( I, J ) )
+  150                CONTINUE
+                     WORK( K ) = WORK( K ) + S
+  160             CONTINUE
+               ELSE
+                  DO 180 K = 1, N
+                     S = ABS( X( K, J ) )
+                     DO 170 I = K + 1, MIN( N, K+KD )
+                        S = S + ABS( AB( 1+I-K, K ) )*ABS( X( I, J ) )
+  170                CONTINUE
+                     WORK( K ) = WORK( K ) + S
+  180             CONTINUE
+               END IF
+            END IF
+         END IF
+         S = ZERO
+         DO 190 I = 1, N
+            IF( WORK( I ).GT.SAFE2 ) THEN
+               S = MAX( S, ABS( WORK( N+I ) ) / WORK( I ) )
+            ELSE
+               S = MAX( S, ( ABS( WORK( N+I ) )+SAFE1 ) /
+     $             ( WORK( I )+SAFE1 ) )
+            END IF
+  190    CONTINUE
+         BERR( J ) = S
+*
+*        Bound error from formula
+*
+*        norm(X - XTRUE) / norm(X) .le. FERR =
+*        norm( abs(inv(op(A)))*
+*           ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X)
+*
+*        where
+*          norm(Z) is the magnitude of the largest component of Z
+*          inv(op(A)) is the inverse of op(A)
+*          abs(Z) is the componentwise absolute value of the matrix or
+*             vector Z
+*          NZ is the maximum number of nonzeros in any row of A, plus 1
+*          EPS is machine epsilon
+*
+*        The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B))
+*        is incremented by SAFE1 if the i-th component of
+*        abs(op(A))*abs(X) + abs(B) is less than SAFE2.
+*
+*        Use DLACON to estimate the infinity-norm of the matrix
+*           inv(op(A)) * diag(W),
+*        where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) )))
+*
+         DO 200 I = 1, N
+            IF( WORK( I ).GT.SAFE2 ) THEN
+               WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I )
+            ELSE
+               WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + SAFE1
+            END IF
+  200    CONTINUE
+*
+         KASE = 0
+  210    CONTINUE
+         CALL DLACON( N, WORK( 2*N+1 ), WORK( N+1 ), IWORK, FERR( J ),
+     $                KASE )
+         IF( KASE.NE.0 ) THEN
+            IF( KASE.EQ.1 ) THEN
+*
+*              Multiply by diag(W)*inv(op(A)').
+*
+               CALL DTBSV( UPLO, TRANST, DIAG, N, KD, AB, LDAB,
+     $                     WORK( N+1 ), 1 )
+               DO 220 I = 1, N
+                  WORK( N+I ) = WORK( I )*WORK( N+I )
+  220          CONTINUE
+            ELSE
+*
+*              Multiply by inv(op(A))*diag(W).
+*
+               DO 230 I = 1, N
+                  WORK( N+I ) = WORK( I )*WORK( N+I )
+  230          CONTINUE
+               CALL DTBSV( UPLO, TRANS, DIAG, N, KD, AB, LDAB,
+     $                     WORK( N+1 ), 1 )
+            END IF
+            GO TO 210
+         END IF
+*
+*        Normalize error.
+*
+         LSTRES = ZERO
+         DO 240 I = 1, N
+            LSTRES = MAX( LSTRES, ABS( X( I, J ) ) )
+  240    CONTINUE
+         IF( LSTRES.NE.ZERO )
+     $      FERR( J ) = FERR( J ) / LSTRES
+*
+  250 CONTINUE
+*
+      RETURN
+*
+*     End of DTBRFS
+*
+      END
diff --git a/costa/native/external/lapack/dtbtrs.f b/costa/native/external/lapack/dtbtrs.f
new file mode 100644
index 000000000..1409649ff
--- /dev/null
+++ b/costa/native/external/lapack/dtbtrs.f
@@ -0,0 +1,163 @@
+      SUBROUTINE DTBTRS( UPLO, TRANS, DIAG, N, KD, NRHS, AB, LDAB, B,
+     $                   LDB, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIAG, TRANS, UPLO
+      INTEGER            INFO, KD, LDAB, LDB, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   AB( LDAB, * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DTBTRS solves a triangular system of the form
+*
+*     A * X = B  or  A**T * X = B,
+*
+*  where A is a triangular band matrix of order N, and B is an
+*  N-by NRHS matrix.  A check is made to verify that A is nonsingular.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  A is upper triangular;
+*          = 'L':  A is lower triangular.
+*
+*  TRANS   (input) CHARACTER*1
+*          Specifies the form the system of equations:
+*          = 'N':  A * X = B  (No transpose)
+*          = 'T':  A**T * X = B  (Transpose)
+*          = 'C':  A**H * X = B  (Conjugate transpose = Transpose)
+*
+*  DIAG    (input) CHARACTER*1
+*          = 'N':  A is non-unit triangular;
+*          = 'U':  A is unit triangular.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  KD      (input) INTEGER
+*          The number of superdiagonals or subdiagonals of the
+*          triangular band matrix A.  KD >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  AB      (input) DOUBLE PRECISION array, dimension (LDAB,N)
+*          The upper or lower triangular band matrix A, stored in the
+*          first kd+1 rows of AB.  The j-th column of A is stored
+*          in the j-th column of the array AB as follows:
+*          if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j;
+*          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+kd).
+*          If DIAG = 'U', the diagonal elements of A are not referenced
+*          and are assumed to be 1.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KD+1.
+*
+*  B       (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS)
+*          On entry, the right hand side matrix B.
+*          On exit, if INFO = 0, the solution matrix X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, the i-th diagonal element of A is zero,
+*                indicating that the matrix is singular and the
+*                solutions X have not been computed.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO
+      PARAMETER          ( ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOUNIT, UPPER
+      INTEGER            J
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DTBSV, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      NOUNIT = LSAME( DIAG, 'N' )
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.LSAME( TRANS, 'N' ) .AND. .NOT.
+     $         LSAME( TRANS, 'T' ) .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( KD.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -6
+      ELSE IF( LDAB.LT.KD+1 ) THEN
+         INFO = -8
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -10
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DTBTRS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Check for singularity.
+*
+      IF( NOUNIT ) THEN
+         IF( UPPER ) THEN
+            DO 10 INFO = 1, N
+               IF( AB( KD+1, INFO ).EQ.ZERO )
+     $            RETURN
+   10       CONTINUE
+         ELSE
+            DO 20 INFO = 1, N
+               IF( AB( 1, INFO ).EQ.ZERO )
+     $            RETURN
+   20       CONTINUE
+         END IF
+      END IF
+      INFO = 0
+*
+*     Solve A * X = B  or  A' * X = B.
+*
+      DO 30 J = 1, NRHS
+         CALL DTBSV( UPLO, TRANS, DIAG, N, KD, AB, LDAB, B( 1, J ), 1 )
+   30 CONTINUE
+*
+      RETURN
+*
+*     End of DTBTRS
+*
+      END
diff --git a/costa/native/external/lapack/dtgevc.f b/costa/native/external/lapack/dtgevc.f
new file mode 100644
index 000000000..180aa066d
--- /dev/null
+++ b/costa/native/external/lapack/dtgevc.f
@@ -0,0 +1,1146 @@
+      SUBROUTINE DTGEVC( SIDE, HOWMNY, SELECT, N, A, LDA, B, LDB, VL,
+     $                   LDVL, VR, LDVR, MM, M, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          HOWMNY, SIDE
+      INTEGER            INFO, LDA, LDB, LDVL, LDVR, M, MM, N
+*     ..
+*     .. Array Arguments ..
+      LOGICAL            SELECT( * )
+      DOUBLE PRECISION   A( LDA, * ), B( LDB, * ), VL( LDVL, * ),
+     $                   VR( LDVR, * ), WORK( * )
+*     ..
+*
+*
+*  Purpose
+*  =======
+*
+*  DTGEVC computes some or all of the right and/or left generalized
+*  eigenvectors of a pair of real upper triangular matrices (A,B).
+*
+*  The right generalized eigenvector x and the left generalized
+*  eigenvector y of (A,B) corresponding to a generalized eigenvalue
+*  w are defined by:
+*
+*          (A - wB) * x = 0  and  y**H * (A - wB) = 0
+*
+*  where y**H denotes the conjugate tranpose of y.
+*
+*  If an eigenvalue w is determined by zero diagonal elements of both A
+*  and B, a unit vector is returned as the corresponding eigenvector.
+*
+*  If all eigenvectors are requested, the routine may either return
+*  the matrices X and/or Y of right or left eigenvectors of (A,B), or
+*  the products Z*X and/or Q*Y, where Z and Q are input orthogonal
+*  matrices.  If (A,B) was obtained from the generalized real-Schur
+*  factorization of an original pair of matrices
+*     (A0,B0) = (Q*A*Z**H,Q*B*Z**H),
+*  then Z*X and Q*Y are the matrices of right or left eigenvectors of
+*  A.
+*
+*  A must be block upper triangular, with 1-by-1 and 2-by-2 diagonal
+*  blocks.  Corresponding to each 2-by-2 diagonal block is a complex
+*  conjugate pair of eigenvalues and eigenvectors; only one
+*  eigenvector of the pair is computed, namely the one corresponding
+*  to the eigenvalue with positive imaginary part.
+*
+*  Arguments
+*  =========
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'R': compute right eigenvectors only;
+*          = 'L': compute left eigenvectors only;
+*          = 'B': compute both right and left eigenvectors.
+*
+*  HOWMNY  (input) CHARACTER*1
+*          = 'A': compute all right and/or left eigenvectors;
+*          = 'B': compute all right and/or left eigenvectors, and
+*                 backtransform them using the input matrices supplied
+*                 in VR and/or VL;
+*          = 'S': compute selected right and/or left eigenvectors,
+*                 specified by the logical array SELECT.
+*
+*  SELECT  (input) LOGICAL array, dimension (N)
+*          If HOWMNY='S', SELECT specifies the eigenvectors to be
+*          computed.
+*          If HOWMNY='A' or 'B', SELECT is not referenced.
+*          To select the real eigenvector corresponding to the real
+*          eigenvalue w(j), SELECT(j) must be set to .TRUE.  To select
+*          the complex eigenvector corresponding to a complex conjugate
+*          pair w(j) and w(j+1), either SELECT(j) or SELECT(j+1) must
+*          be set to .TRUE..
+*
+*  N       (input) INTEGER
+*          The order of the matrices A and B.  N >= 0.
+*
+*  A       (input) DOUBLE PRECISION array, dimension (LDA,N)
+*          The upper quasi-triangular matrix A.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of array A.  LDA >= max(1, N).
+*
+*  B       (input) DOUBLE PRECISION array, dimension (LDB,N)
+*          The upper triangular matrix B.  If A has a 2-by-2 diagonal
+*          block, then the corresponding 2-by-2 block of B must be
+*          diagonal with positive elements.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of array B.  LDB >= max(1,N).
+*
+*  VL      (input/output) DOUBLE PRECISION array, dimension (LDVL,MM)
+*          On entry, if SIDE = 'L' or 'B' and HOWMNY = 'B', VL must
+*          contain an N-by-N matrix Q (usually the orthogonal matrix Q
+*          of left Schur vectors returned by DHGEQZ).
+*          On exit, if SIDE = 'L' or 'B', VL contains:
+*          if HOWMNY = 'A', the matrix Y of left eigenvectors of (A,B);
+*          if HOWMNY = 'B', the matrix Q*Y;
+*          if HOWMNY = 'S', the left eigenvectors of (A,B) specified by
+*                      SELECT, stored consecutively in the columns of
+*                      VL, in the same order as their eigenvalues.
+*          If SIDE = 'R', VL is not referenced.
+*
+*          A complex eigenvector corresponding to a complex eigenvalue
+*          is stored in two consecutive columns, the first holding the
+*          real part, and the second the imaginary part.
+*
+*  LDVL    (input) INTEGER
+*          The leading dimension of array VL.
+*          LDVL >= max(1,N) if SIDE = 'L' or 'B'; LDVL >= 1 otherwise.
+*
+*  VR      (input/output) DOUBLE PRECISION array, dimension (LDVR,MM)
+*          On entry, if SIDE = 'R' or 'B' and HOWMNY = 'B', VR must
+*          contain an N-by-N matrix Q (usually the orthogonal matrix Z
+*          of right Schur vectors returned by DHGEQZ).
+*          On exit, if SIDE = 'R' or 'B', VR contains:
+*          if HOWMNY = 'A', the matrix X of right eigenvectors of (A,B);
+*          if HOWMNY = 'B', the matrix Z*X;
+*          if HOWMNY = 'S', the right eigenvectors of (A,B) specified by
+*                      SELECT, stored consecutively in the columns of
+*                      VR, in the same order as their eigenvalues.
+*          If SIDE = 'L', VR is not referenced.
+*
+*          A complex eigenvector corresponding to a complex eigenvalue
+*          is stored in two consecutive columns, the first holding the
+*          real part and the second the imaginary part.
+*
+*  LDVR    (input) INTEGER
+*          The leading dimension of the array VR.
+*          LDVR >= max(1,N) if SIDE = 'R' or 'B'; LDVR >= 1 otherwise.
+*
+*  MM      (input) INTEGER
+*          The number of columns in the arrays VL and/or VR. MM >= M.
+*
+*  M       (output) INTEGER
+*          The number of columns in the arrays VL and/or VR actually
+*          used to store the eigenvectors.  If HOWMNY = 'A' or 'B', M
+*          is set to N.  Each selected real eigenvector occupies one
+*          column and each selected complex eigenvector occupies two
+*          columns.
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (6*N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          > 0:  the 2-by-2 block (INFO:INFO+1) does not have a complex
+*                eigenvalue.
+*
+*  Further Details
+*  ===============
+*
+*  Allocation of workspace:
+*  ---------- -- ---------
+*
+*     WORK( j ) = 1-norm of j-th column of A, above the diagonal
+*     WORK( N+j ) = 1-norm of j-th column of B, above the diagonal
+*     WORK( 2*N+1:3*N ) = real part of eigenvector
+*     WORK( 3*N+1:4*N ) = imaginary part of eigenvector
+*     WORK( 4*N+1:5*N ) = real part of back-transformed eigenvector
+*     WORK( 5*N+1:6*N ) = imaginary part of back-transformed eigenvector
+*
+*  Rowwise vs. columnwise solution methods:
+*  ------- --  ---------- -------- -------
+*
+*  Finding a generalized eigenvector consists basically of solving the
+*  singular triangular system
+*
+*   (A - w B) x = 0     (for right) or:   (A - w B)**H y = 0  (for left)
+*
+*  Consider finding the i-th right eigenvector (assume all eigenvalues
+*  are real). The equation to be solved is:
+*       n                   i
+*  0 = sum  C(j,k) v(k)  = sum  C(j,k) v(k)     for j = i,. . .,1
+*      k=j                 k=j
+*
+*  where  C = (A - w B)  (The components v(i+1:n) are 0.)
+*
+*  The "rowwise" method is:
+*
+*  (1)  v(i) := 1
+*  for j = i-1,. . .,1:
+*                          i
+*      (2) compute  s = - sum C(j,k) v(k)   and
+*                        k=j+1
+*
+*      (3) v(j) := s / C(j,j)
+*
+*  Step 2 is sometimes called the "dot product" step, since it is an
+*  inner product between the j-th row and the portion of the eigenvector
+*  that has been computed so far.
+*
+*  The "columnwise" method consists basically in doing the sums
+*  for all the rows in parallel.  As each v(j) is computed, the
+*  contribution of v(j) times the j-th column of C is added to the
+*  partial sums.  Since FORTRAN arrays are stored columnwise, this has
+*  the advantage that at each step, the elements of C that are accessed
+*  are adjacent to one another, whereas with the rowwise method, the
+*  elements accessed at a step are spaced LDA (and LDB) words apart.
+*
+*  When finding left eigenvectors, the matrix in question is the
+*  transpose of the one in storage, so the rowwise method then
+*  actually accesses columns of A and B at each step, and so is the
+*  preferred method.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE, SAFETY
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0,
+     $                   SAFETY = 1.0D+2 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            COMPL, COMPR, IL2BY2, ILABAD, ILALL, ILBACK,
+     $                   ILBBAD, ILCOMP, ILCPLX, LSA, LSB
+      INTEGER            I, IBEG, IEIG, IEND, IHWMNY, IINFO, IM, ISIDE,
+     $                   J, JA, JC, JE, JR, JW, NA, NW
+      DOUBLE PRECISION   ACOEF, ACOEFA, ANORM, ASCALE, BCOEFA, BCOEFI,
+     $                   BCOEFR, BIG, BIGNUM, BNORM, BSCALE, CIM2A,
+     $                   CIM2B, CIMAGA, CIMAGB, CRE2A, CRE2B, CREALA,
+     $                   CREALB, DMIN, SAFMIN, SALFAR, SBETA, SCALE,
+     $                   SMALL, TEMP, TEMP2, TEMP2I, TEMP2R, ULP, XMAX,
+     $                   XSCALE
+*     ..
+*     .. Local Arrays ..
+      DOUBLE PRECISION   BDIAG( 2 ), SUM( 2, 2 ), SUMA( 2, 2 ),
+     $                   SUMB( 2, 2 )
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DLAMCH
+      EXTERNAL           LSAME, DLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DGEMV, DLACPY, DLAG2, DLALN2, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode and Test the input parameters
+*
+      IF( LSAME( HOWMNY, 'A' ) ) THEN
+         IHWMNY = 1
+         ILALL = .TRUE.
+         ILBACK = .FALSE.
+      ELSE IF( LSAME( HOWMNY, 'S' ) ) THEN
+         IHWMNY = 2
+         ILALL = .FALSE.
+         ILBACK = .FALSE.
+      ELSE IF( LSAME( HOWMNY, 'B' ) .OR. LSAME( HOWMNY, 'T' ) ) THEN
+         IHWMNY = 3
+         ILALL = .TRUE.
+         ILBACK = .TRUE.
+      ELSE
+         IHWMNY = -1
+         ILALL = .TRUE.
+      END IF
+*
+      IF( LSAME( SIDE, 'R' ) ) THEN
+         ISIDE = 1
+         COMPL = .FALSE.
+         COMPR = .TRUE.
+      ELSE IF( LSAME( SIDE, 'L' ) ) THEN
+         ISIDE = 2
+         COMPL = .TRUE.
+         COMPR = .FALSE.
+      ELSE IF( LSAME( SIDE, 'B' ) ) THEN
+         ISIDE = 3
+         COMPL = .TRUE.
+         COMPR = .TRUE.
+      ELSE
+         ISIDE = -1
+      END IF
+*
+      INFO = 0
+      IF( ISIDE.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( IHWMNY.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -6
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -8
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DTGEVC', -INFO )
+         RETURN
+      END IF
+*
+*     Count the number of eigenvectors to be computed
+*
+      IF( .NOT.ILALL ) THEN
+         IM = 0
+         ILCPLX = .FALSE.
+         DO 10 J = 1, N
+            IF( ILCPLX ) THEN
+               ILCPLX = .FALSE.
+               GO TO 10
+            END IF
+            IF( J.LT.N ) THEN
+               IF( A( J+1, J ).NE.ZERO )
+     $            ILCPLX = .TRUE.
+            END IF
+            IF( ILCPLX ) THEN
+               IF( SELECT( J ) .OR. SELECT( J+1 ) )
+     $            IM = IM + 2
+            ELSE
+               IF( SELECT( J ) )
+     $            IM = IM + 1
+            END IF
+   10    CONTINUE
+      ELSE
+         IM = N
+      END IF
+*
+*     Check 2-by-2 diagonal blocks of A, B
+*
+      ILABAD = .FALSE.
+      ILBBAD = .FALSE.
+      DO 20 J = 1, N - 1
+         IF( A( J+1, J ).NE.ZERO ) THEN
+            IF( B( J, J ).EQ.ZERO .OR. B( J+1, J+1 ).EQ.ZERO .OR.
+     $          B( J, J+1 ).NE.ZERO )ILBBAD = .TRUE.
+            IF( J.LT.N-1 ) THEN
+               IF( A( J+2, J+1 ).NE.ZERO )
+     $            ILABAD = .TRUE.
+            END IF
+         END IF
+   20 CONTINUE
+*
+      IF( ILABAD ) THEN
+         INFO = -5
+      ELSE IF( ILBBAD ) THEN
+         INFO = -7
+      ELSE IF( COMPL .AND. LDVL.LT.N .OR. LDVL.LT.1 ) THEN
+         INFO = -10
+      ELSE IF( COMPR .AND. LDVR.LT.N .OR. LDVR.LT.1 ) THEN
+         INFO = -12
+      ELSE IF( MM.LT.IM ) THEN
+         INFO = -13
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DTGEVC', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      M = IM
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Machine Constants
+*
+      SAFMIN = DLAMCH( 'Safe minimum' )
+      BIG = ONE / SAFMIN
+      CALL DLABAD( SAFMIN, BIG )
+      ULP = DLAMCH( 'Epsilon' )*DLAMCH( 'Base' )
+      SMALL = SAFMIN*N / ULP
+      BIG = ONE / SMALL
+      BIGNUM = ONE / ( SAFMIN*N )
+*
+*     Compute the 1-norm of each column of the strictly upper triangular
+*     part (i.e., excluding all elements belonging to the diagonal
+*     blocks) of A and B to check for possible overflow in the
+*     triangular solver.
+*
+      ANORM = ABS( A( 1, 1 ) )
+      IF( N.GT.1 )
+     $   ANORM = ANORM + ABS( A( 2, 1 ) )
+      BNORM = ABS( B( 1, 1 ) )
+      WORK( 1 ) = ZERO
+      WORK( N+1 ) = ZERO
+*
+      DO 50 J = 2, N
+         TEMP = ZERO
+         TEMP2 = ZERO
+         IF( A( J, J-1 ).EQ.ZERO ) THEN
+            IEND = J - 1
+         ELSE
+            IEND = J - 2
+         END IF
+         DO 30 I = 1, IEND
+            TEMP = TEMP + ABS( A( I, J ) )
+            TEMP2 = TEMP2 + ABS( B( I, J ) )
+   30    CONTINUE
+         WORK( J ) = TEMP
+         WORK( N+J ) = TEMP2
+         DO 40 I = IEND + 1, MIN( J+1, N )
+            TEMP = TEMP + ABS( A( I, J ) )
+            TEMP2 = TEMP2 + ABS( B( I, J ) )
+   40    CONTINUE
+         ANORM = MAX( ANORM, TEMP )
+         BNORM = MAX( BNORM, TEMP2 )
+   50 CONTINUE
+*
+      ASCALE = ONE / MAX( ANORM, SAFMIN )
+      BSCALE = ONE / MAX( BNORM, SAFMIN )
+*
+*     Left eigenvectors
+*
+      IF( COMPL ) THEN
+         IEIG = 0
+*
+*        Main loop over eigenvalues
+*
+         ILCPLX = .FALSE.
+         DO 220 JE = 1, N
+*
+*           Skip this iteration if (a) HOWMNY='S' and SELECT=.FALSE., or
+*           (b) this would be the second of a complex pair.
+*           Check for complex eigenvalue, so as to be sure of which
+*           entry(-ies) of SELECT to look at.
+*
+            IF( ILCPLX ) THEN
+               ILCPLX = .FALSE.
+               GO TO 220
+            END IF
+            NW = 1
+            IF( JE.LT.N ) THEN
+               IF( A( JE+1, JE ).NE.ZERO ) THEN
+                  ILCPLX = .TRUE.
+                  NW = 2
+               END IF
+            END IF
+            IF( ILALL ) THEN
+               ILCOMP = .TRUE.
+            ELSE IF( ILCPLX ) THEN
+               ILCOMP = SELECT( JE ) .OR. SELECT( JE+1 )
+            ELSE
+               ILCOMP = SELECT( JE )
+            END IF
+            IF( .NOT.ILCOMP )
+     $         GO TO 220
+*
+*           Decide if (a) singular pencil, (b) real eigenvalue, or
+*           (c) complex eigenvalue.
+*
+            IF( .NOT.ILCPLX ) THEN
+               IF( ABS( A( JE, JE ) ).LE.SAFMIN .AND.
+     $             ABS( B( JE, JE ) ).LE.SAFMIN ) THEN
+*
+*                 Singular matrix pencil -- return unit eigenvector
+*
+                  IEIG = IEIG + 1
+                  DO 60 JR = 1, N
+                     VL( JR, IEIG ) = ZERO
+   60             CONTINUE
+                  VL( IEIG, IEIG ) = ONE
+                  GO TO 220
+               END IF
+            END IF
+*
+*           Clear vector
+*
+            DO 70 JR = 1, NW*N
+               WORK( 2*N+JR ) = ZERO
+   70       CONTINUE
+*                                                 T
+*           Compute coefficients in  ( a A - b B )  y = 0
+*              a  is  ACOEF
+*              b  is  BCOEFR + i*BCOEFI
+*
+            IF( .NOT.ILCPLX ) THEN
+*
+*              Real eigenvalue
+*
+               TEMP = ONE / MAX( ABS( A( JE, JE ) )*ASCALE,
+     $                ABS( B( JE, JE ) )*BSCALE, SAFMIN )
+               SALFAR = ( TEMP*A( JE, JE ) )*ASCALE
+               SBETA = ( TEMP*B( JE, JE ) )*BSCALE
+               ACOEF = SBETA*ASCALE
+               BCOEFR = SALFAR*BSCALE
+               BCOEFI = ZERO
+*
+*              Scale to avoid underflow
+*
+               SCALE = ONE
+               LSA = ABS( SBETA ).GE.SAFMIN .AND. ABS( ACOEF ).LT.SMALL
+               LSB = ABS( SALFAR ).GE.SAFMIN .AND. ABS( BCOEFR ).LT.
+     $               SMALL
+               IF( LSA )
+     $            SCALE = ( SMALL / ABS( SBETA ) )*MIN( ANORM, BIG )
+               IF( LSB )
+     $            SCALE = MAX( SCALE, ( SMALL / ABS( SALFAR ) )*
+     $                    MIN( BNORM, BIG ) )
+               IF( LSA .OR. LSB ) THEN
+                  SCALE = MIN( SCALE, ONE /
+     $                    ( SAFMIN*MAX( ONE, ABS( ACOEF ),
+     $                    ABS( BCOEFR ) ) ) )
+                  IF( LSA ) THEN
+                     ACOEF = ASCALE*( SCALE*SBETA )
+                  ELSE
+                     ACOEF = SCALE*ACOEF
+                  END IF
+                  IF( LSB ) THEN
+                     BCOEFR = BSCALE*( SCALE*SALFAR )
+                  ELSE
+                     BCOEFR = SCALE*BCOEFR
+                  END IF
+               END IF
+               ACOEFA = ABS( ACOEF )
+               BCOEFA = ABS( BCOEFR )
+*
+*              First component is 1
+*
+               WORK( 2*N+JE ) = ONE
+               XMAX = ONE
+            ELSE
+*
+*              Complex eigenvalue
+*
+               CALL DLAG2( A( JE, JE ), LDA, B( JE, JE ), LDB,
+     $                     SAFMIN*SAFETY, ACOEF, TEMP, BCOEFR, TEMP2,
+     $                     BCOEFI )
+               BCOEFI = -BCOEFI
+               IF( BCOEFI.EQ.ZERO ) THEN
+                  INFO = JE
+                  RETURN
+               END IF
+*
+*              Scale to avoid over/underflow
+*
+               ACOEFA = ABS( ACOEF )
+               BCOEFA = ABS( BCOEFR ) + ABS( BCOEFI )
+               SCALE = ONE
+               IF( ACOEFA*ULP.LT.SAFMIN .AND. ACOEFA.GE.SAFMIN )
+     $            SCALE = ( SAFMIN / ULP ) / ACOEFA
+               IF( BCOEFA*ULP.LT.SAFMIN .AND. BCOEFA.GE.SAFMIN )
+     $            SCALE = MAX( SCALE, ( SAFMIN / ULP ) / BCOEFA )
+               IF( SAFMIN*ACOEFA.GT.ASCALE )
+     $            SCALE = ASCALE / ( SAFMIN*ACOEFA )
+               IF( SAFMIN*BCOEFA.GT.BSCALE )
+     $            SCALE = MIN( SCALE, BSCALE / ( SAFMIN*BCOEFA ) )
+               IF( SCALE.NE.ONE ) THEN
+                  ACOEF = SCALE*ACOEF
+                  ACOEFA = ABS( ACOEF )
+                  BCOEFR = SCALE*BCOEFR
+                  BCOEFI = SCALE*BCOEFI
+                  BCOEFA = ABS( BCOEFR ) + ABS( BCOEFI )
+               END IF
+*
+*              Compute first two components of eigenvector
+*
+               TEMP = ACOEF*A( JE+1, JE )
+               TEMP2R = ACOEF*A( JE, JE ) - BCOEFR*B( JE, JE )
+               TEMP2I = -BCOEFI*B( JE, JE )
+               IF( ABS( TEMP ).GT.ABS( TEMP2R )+ABS( TEMP2I ) ) THEN
+                  WORK( 2*N+JE ) = ONE
+                  WORK( 3*N+JE ) = ZERO
+                  WORK( 2*N+JE+1 ) = -TEMP2R / TEMP
+                  WORK( 3*N+JE+1 ) = -TEMP2I / TEMP
+               ELSE
+                  WORK( 2*N+JE+1 ) = ONE
+                  WORK( 3*N+JE+1 ) = ZERO
+                  TEMP = ACOEF*A( JE, JE+1 )
+                  WORK( 2*N+JE ) = ( BCOEFR*B( JE+1, JE+1 )-ACOEF*
+     $                             A( JE+1, JE+1 ) ) / TEMP
+                  WORK( 3*N+JE ) = BCOEFI*B( JE+1, JE+1 ) / TEMP
+               END IF
+               XMAX = MAX( ABS( WORK( 2*N+JE ) )+ABS( WORK( 3*N+JE ) ),
+     $                ABS( WORK( 2*N+JE+1 ) )+ABS( WORK( 3*N+JE+1 ) ) )
+            END IF
+*
+            DMIN = MAX( ULP*ACOEFA*ANORM, ULP*BCOEFA*BNORM, SAFMIN )
+*
+*                                           T
+*           Triangular solve of  (a A - b B)  y = 0
+*
+*                                   T
+*           (rowwise in  (a A - b B) , or columnwise in (a A - b B) )
+*
+            IL2BY2 = .FALSE.
+*
+            DO 160 J = JE + NW, N
+               IF( IL2BY2 ) THEN
+                  IL2BY2 = .FALSE.
+                  GO TO 160
+               END IF
+*
+               NA = 1
+               BDIAG( 1 ) = B( J, J )
+               IF( J.LT.N ) THEN
+                  IF( A( J+1, J ).NE.ZERO ) THEN
+                     IL2BY2 = .TRUE.
+                     BDIAG( 2 ) = B( J+1, J+1 )
+                     NA = 2
+                  END IF
+               END IF
+*
+*              Check whether scaling is necessary for dot products
+*
+               XSCALE = ONE / MAX( ONE, XMAX )
+               TEMP = MAX( WORK( J ), WORK( N+J ),
+     $                ACOEFA*WORK( J )+BCOEFA*WORK( N+J ) )
+               IF( IL2BY2 )
+     $            TEMP = MAX( TEMP, WORK( J+1 ), WORK( N+J+1 ),
+     $                   ACOEFA*WORK( J+1 )+BCOEFA*WORK( N+J+1 ) )
+               IF( TEMP.GT.BIGNUM*XSCALE ) THEN
+                  DO 90 JW = 0, NW - 1
+                     DO 80 JR = JE, J - 1
+                        WORK( ( JW+2 )*N+JR ) = XSCALE*
+     $                     WORK( ( JW+2 )*N+JR )
+   80                CONTINUE
+   90             CONTINUE
+                  XMAX = XMAX*XSCALE
+               END IF
+*
+*              Compute dot products
+*
+*                    j-1
+*              SUM = sum  conjg( a*A(k,j) - b*B(k,j) )*x(k)
+*                    k=je
+*
+*              To reduce the op count, this is done as
+*
+*              _        j-1                  _        j-1
+*              a*conjg( sum  A(k,j)*x(k) ) - b*conjg( sum  B(k,j)*x(k) )
+*                       k=je                          k=je
+*
+*              which may cause underflow problems if A or B are close
+*              to underflow.  (E.g., less than SMALL.)
+*
+*
+*              A series of compiler directives to defeat vectorization
+*              for the next loop
+*
+*$PL$ CMCHAR=' '
+CDIR$          NEXTSCALAR
+C$DIR          SCALAR
+CDIR$          NEXT SCALAR
+CVD$L          NOVECTOR
+CDEC$          NOVECTOR
+CVD$           NOVECTOR
+*VDIR          NOVECTOR
+*VOCL          LOOP,SCALAR
+CIBM           PREFER SCALAR
+*$PL$ CMCHAR='*'
+*
+               DO 120 JW = 1, NW
+*
+*$PL$ CMCHAR=' '
+CDIR$             NEXTSCALAR
+C$DIR             SCALAR
+CDIR$             NEXT SCALAR
+CVD$L             NOVECTOR
+CDEC$             NOVECTOR
+CVD$              NOVECTOR
+*VDIR             NOVECTOR
+*VOCL             LOOP,SCALAR
+CIBM              PREFER SCALAR
+*$PL$ CMCHAR='*'
+*
+                  DO 110 JA = 1, NA
+                     SUMA( JA, JW ) = ZERO
+                     SUMB( JA, JW ) = ZERO
+*
+                     DO 100 JR = JE, J - 1
+                        SUMA( JA, JW ) = SUMA( JA, JW ) +
+     $                                   A( JR, J+JA-1 )*
+     $                                   WORK( ( JW+1 )*N+JR )
+                        SUMB( JA, JW ) = SUMB( JA, JW ) +
+     $                                   B( JR, J+JA-1 )*
+     $                                   WORK( ( JW+1 )*N+JR )
+  100                CONTINUE
+  110             CONTINUE
+  120          CONTINUE
+*
+*$PL$ CMCHAR=' '
+CDIR$          NEXTSCALAR
+C$DIR          SCALAR
+CDIR$          NEXT SCALAR
+CVD$L          NOVECTOR
+CDEC$          NOVECTOR
+CVD$           NOVECTOR
+*VDIR          NOVECTOR
+*VOCL          LOOP,SCALAR
+CIBM           PREFER SCALAR
+*$PL$ CMCHAR='*'
+*
+               DO 130 JA = 1, NA
+                  IF( ILCPLX ) THEN
+                     SUM( JA, 1 ) = -ACOEF*SUMA( JA, 1 ) +
+     $                              BCOEFR*SUMB( JA, 1 ) -
+     $                              BCOEFI*SUMB( JA, 2 )
+                     SUM( JA, 2 ) = -ACOEF*SUMA( JA, 2 ) +
+     $                              BCOEFR*SUMB( JA, 2 ) +
+     $                              BCOEFI*SUMB( JA, 1 )
+                  ELSE
+                     SUM( JA, 1 ) = -ACOEF*SUMA( JA, 1 ) +
+     $                              BCOEFR*SUMB( JA, 1 )
+                  END IF
+  130          CONTINUE
+*
+*                                  T
+*              Solve  ( a A - b B )  y = SUM(,)
+*              with scaling and perturbation of the denominator
+*
+               CALL DLALN2( .TRUE., NA, NW, DMIN, ACOEF, A( J, J ), LDA,
+     $                      BDIAG( 1 ), BDIAG( 2 ), SUM, 2, BCOEFR,
+     $                      BCOEFI, WORK( 2*N+J ), N, SCALE, TEMP,
+     $                      IINFO )
+               IF( SCALE.LT.ONE ) THEN
+                  DO 150 JW = 0, NW - 1
+                     DO 140 JR = JE, J - 1
+                        WORK( ( JW+2 )*N+JR ) = SCALE*
+     $                     WORK( ( JW+2 )*N+JR )
+  140                CONTINUE
+  150             CONTINUE
+                  XMAX = SCALE*XMAX
+               END IF
+               XMAX = MAX( XMAX, TEMP )
+  160       CONTINUE
+*
+*           Copy eigenvector to VL, back transforming if
+*           HOWMNY='B'.
+*
+            IEIG = IEIG + 1
+            IF( ILBACK ) THEN
+               DO 170 JW = 0, NW - 1
+                  CALL DGEMV( 'N', N, N+1-JE, ONE, VL( 1, JE ), LDVL,
+     $                        WORK( ( JW+2 )*N+JE ), 1, ZERO,
+     $                        WORK( ( JW+4 )*N+1 ), 1 )
+  170          CONTINUE
+               CALL DLACPY( ' ', N, NW, WORK( 4*N+1 ), N, VL( 1, JE ),
+     $                      LDVL )
+               IBEG = 1
+            ELSE
+               CALL DLACPY( ' ', N, NW, WORK( 2*N+1 ), N, VL( 1, IEIG ),
+     $                      LDVL )
+               IBEG = JE
+            END IF
+*
+*           Scale eigenvector
+*
+            XMAX = ZERO
+            IF( ILCPLX ) THEN
+               DO 180 J = IBEG, N
+                  XMAX = MAX( XMAX, ABS( VL( J, IEIG ) )+
+     $                   ABS( VL( J, IEIG+1 ) ) )
+  180          CONTINUE
+            ELSE
+               DO 190 J = IBEG, N
+                  XMAX = MAX( XMAX, ABS( VL( J, IEIG ) ) )
+  190          CONTINUE
+            END IF
+*
+            IF( XMAX.GT.SAFMIN ) THEN
+               XSCALE = ONE / XMAX
+*
+               DO 210 JW = 0, NW - 1
+                  DO 200 JR = IBEG, N
+                     VL( JR, IEIG+JW ) = XSCALE*VL( JR, IEIG+JW )
+  200             CONTINUE
+  210          CONTINUE
+            END IF
+            IEIG = IEIG + NW - 1
+*
+  220    CONTINUE
+      END IF
+*
+*     Right eigenvectors
+*
+      IF( COMPR ) THEN
+         IEIG = IM + 1
+*
+*        Main loop over eigenvalues
+*
+         ILCPLX = .FALSE.
+         DO 500 JE = N, 1, -1
+*
+*           Skip this iteration if (a) HOWMNY='S' and SELECT=.FALSE., or
+*           (b) this would be the second of a complex pair.
+*           Check for complex eigenvalue, so as to be sure of which
+*           entry(-ies) of SELECT to look at -- if complex, SELECT(JE)
+*           or SELECT(JE-1).
+*           If this is a complex pair, the 2-by-2 diagonal block
+*           corresponding to the eigenvalue is in rows/columns JE-1:JE
+*
+            IF( ILCPLX ) THEN
+               ILCPLX = .FALSE.
+               GO TO 500
+            END IF
+            NW = 1
+            IF( JE.GT.1 ) THEN
+               IF( A( JE, JE-1 ).NE.ZERO ) THEN
+                  ILCPLX = .TRUE.
+                  NW = 2
+               END IF
+            END IF
+            IF( ILALL ) THEN
+               ILCOMP = .TRUE.
+            ELSE IF( ILCPLX ) THEN
+               ILCOMP = SELECT( JE ) .OR. SELECT( JE-1 )
+            ELSE
+               ILCOMP = SELECT( JE )
+            END IF
+            IF( .NOT.ILCOMP )
+     $         GO TO 500
+*
+*           Decide if (a) singular pencil, (b) real eigenvalue, or
+*           (c) complex eigenvalue.
+*
+            IF( .NOT.ILCPLX ) THEN
+               IF( ABS( A( JE, JE ) ).LE.SAFMIN .AND.
+     $             ABS( B( JE, JE ) ).LE.SAFMIN ) THEN
+*
+*                 Singular matrix pencil -- unit eigenvector
+*
+                  IEIG = IEIG - 1
+                  DO 230 JR = 1, N
+                     VR( JR, IEIG ) = ZERO
+  230             CONTINUE
+                  VR( IEIG, IEIG ) = ONE
+                  GO TO 500
+               END IF
+            END IF
+*
+*           Clear vector
+*
+            DO 250 JW = 0, NW - 1
+               DO 240 JR = 1, N
+                  WORK( ( JW+2 )*N+JR ) = ZERO
+  240          CONTINUE
+  250       CONTINUE
+*
+*           Compute coefficients in  ( a A - b B ) x = 0
+*              a  is  ACOEF
+*              b  is  BCOEFR + i*BCOEFI
+*
+            IF( .NOT.ILCPLX ) THEN
+*
+*              Real eigenvalue
+*
+               TEMP = ONE / MAX( ABS( A( JE, JE ) )*ASCALE,
+     $                ABS( B( JE, JE ) )*BSCALE, SAFMIN )
+               SALFAR = ( TEMP*A( JE, JE ) )*ASCALE
+               SBETA = ( TEMP*B( JE, JE ) )*BSCALE
+               ACOEF = SBETA*ASCALE
+               BCOEFR = SALFAR*BSCALE
+               BCOEFI = ZERO
+*
+*              Scale to avoid underflow
+*
+               SCALE = ONE
+               LSA = ABS( SBETA ).GE.SAFMIN .AND. ABS( ACOEF ).LT.SMALL
+               LSB = ABS( SALFAR ).GE.SAFMIN .AND. ABS( BCOEFR ).LT.
+     $               SMALL
+               IF( LSA )
+     $            SCALE = ( SMALL / ABS( SBETA ) )*MIN( ANORM, BIG )
+               IF( LSB )
+     $            SCALE = MAX( SCALE, ( SMALL / ABS( SALFAR ) )*
+     $                    MIN( BNORM, BIG ) )
+               IF( LSA .OR. LSB ) THEN
+                  SCALE = MIN( SCALE, ONE /
+     $                    ( SAFMIN*MAX( ONE, ABS( ACOEF ),
+     $                    ABS( BCOEFR ) ) ) )
+                  IF( LSA ) THEN
+                     ACOEF = ASCALE*( SCALE*SBETA )
+                  ELSE
+                     ACOEF = SCALE*ACOEF
+                  END IF
+                  IF( LSB ) THEN
+                     BCOEFR = BSCALE*( SCALE*SALFAR )
+                  ELSE
+                     BCOEFR = SCALE*BCOEFR
+                  END IF
+               END IF
+               ACOEFA = ABS( ACOEF )
+               BCOEFA = ABS( BCOEFR )
+*
+*              First component is 1
+*
+               WORK( 2*N+JE ) = ONE
+               XMAX = ONE
+*
+*              Compute contribution from column JE of A and B to sum
+*              (See "Further Details", above.)
+*
+               DO 260 JR = 1, JE - 1
+                  WORK( 2*N+JR ) = BCOEFR*B( JR, JE ) -
+     $                             ACOEF*A( JR, JE )
+  260          CONTINUE
+            ELSE
+*
+*              Complex eigenvalue
+*
+               CALL DLAG2( A( JE-1, JE-1 ), LDA, B( JE-1, JE-1 ), LDB,
+     $                     SAFMIN*SAFETY, ACOEF, TEMP, BCOEFR, TEMP2,
+     $                     BCOEFI )
+               IF( BCOEFI.EQ.ZERO ) THEN
+                  INFO = JE - 1
+                  RETURN
+               END IF
+*
+*              Scale to avoid over/underflow
+*
+               ACOEFA = ABS( ACOEF )
+               BCOEFA = ABS( BCOEFR ) + ABS( BCOEFI )
+               SCALE = ONE
+               IF( ACOEFA*ULP.LT.SAFMIN .AND. ACOEFA.GE.SAFMIN )
+     $            SCALE = ( SAFMIN / ULP ) / ACOEFA
+               IF( BCOEFA*ULP.LT.SAFMIN .AND. BCOEFA.GE.SAFMIN )
+     $            SCALE = MAX( SCALE, ( SAFMIN / ULP ) / BCOEFA )
+               IF( SAFMIN*ACOEFA.GT.ASCALE )
+     $            SCALE = ASCALE / ( SAFMIN*ACOEFA )
+               IF( SAFMIN*BCOEFA.GT.BSCALE )
+     $            SCALE = MIN( SCALE, BSCALE / ( SAFMIN*BCOEFA ) )
+               IF( SCALE.NE.ONE ) THEN
+                  ACOEF = SCALE*ACOEF
+                  ACOEFA = ABS( ACOEF )
+                  BCOEFR = SCALE*BCOEFR
+                  BCOEFI = SCALE*BCOEFI
+                  BCOEFA = ABS( BCOEFR ) + ABS( BCOEFI )
+               END IF
+*
+*              Compute first two components of eigenvector
+*              and contribution to sums
+*
+               TEMP = ACOEF*A( JE, JE-1 )
+               TEMP2R = ACOEF*A( JE, JE ) - BCOEFR*B( JE, JE )
+               TEMP2I = -BCOEFI*B( JE, JE )
+               IF( ABS( TEMP ).GE.ABS( TEMP2R )+ABS( TEMP2I ) ) THEN
+                  WORK( 2*N+JE ) = ONE
+                  WORK( 3*N+JE ) = ZERO
+                  WORK( 2*N+JE-1 ) = -TEMP2R / TEMP
+                  WORK( 3*N+JE-1 ) = -TEMP2I / TEMP
+               ELSE
+                  WORK( 2*N+JE-1 ) = ONE
+                  WORK( 3*N+JE-1 ) = ZERO
+                  TEMP = ACOEF*A( JE-1, JE )
+                  WORK( 2*N+JE ) = ( BCOEFR*B( JE-1, JE-1 )-ACOEF*
+     $                             A( JE-1, JE-1 ) ) / TEMP
+                  WORK( 3*N+JE ) = BCOEFI*B( JE-1, JE-1 ) / TEMP
+               END IF
+*
+               XMAX = MAX( ABS( WORK( 2*N+JE ) )+ABS( WORK( 3*N+JE ) ),
+     $                ABS( WORK( 2*N+JE-1 ) )+ABS( WORK( 3*N+JE-1 ) ) )
+*
+*              Compute contribution from columns JE and JE-1
+*              of A and B to the sums.
+*
+               CREALA = ACOEF*WORK( 2*N+JE-1 )
+               CIMAGA = ACOEF*WORK( 3*N+JE-1 )
+               CREALB = BCOEFR*WORK( 2*N+JE-1 ) -
+     $                  BCOEFI*WORK( 3*N+JE-1 )
+               CIMAGB = BCOEFI*WORK( 2*N+JE-1 ) +
+     $                  BCOEFR*WORK( 3*N+JE-1 )
+               CRE2A = ACOEF*WORK( 2*N+JE )
+               CIM2A = ACOEF*WORK( 3*N+JE )
+               CRE2B = BCOEFR*WORK( 2*N+JE ) - BCOEFI*WORK( 3*N+JE )
+               CIM2B = BCOEFI*WORK( 2*N+JE ) + BCOEFR*WORK( 3*N+JE )
+               DO 270 JR = 1, JE - 2
+                  WORK( 2*N+JR ) = -CREALA*A( JR, JE-1 ) +
+     $                             CREALB*B( JR, JE-1 ) -
+     $                             CRE2A*A( JR, JE ) + CRE2B*B( JR, JE )
+                  WORK( 3*N+JR ) = -CIMAGA*A( JR, JE-1 ) +
+     $                             CIMAGB*B( JR, JE-1 ) -
+     $                             CIM2A*A( JR, JE ) + CIM2B*B( JR, JE )
+  270          CONTINUE
+            END IF
+*
+            DMIN = MAX( ULP*ACOEFA*ANORM, ULP*BCOEFA*BNORM, SAFMIN )
+*
+*           Columnwise triangular solve of  (a A - b B)  x = 0
+*
+            IL2BY2 = .FALSE.
+            DO 370 J = JE - NW, 1, -1
+*
+*              If a 2-by-2 block, is in position j-1:j, wait until
+*              next iteration to process it (when it will be j:j+1)
+*
+               IF( .NOT.IL2BY2 .AND. J.GT.1 ) THEN
+                  IF( A( J, J-1 ).NE.ZERO ) THEN
+                     IL2BY2 = .TRUE.
+                     GO TO 370
+                  END IF
+               END IF
+               BDIAG( 1 ) = B( J, J )
+               IF( IL2BY2 ) THEN
+                  NA = 2
+                  BDIAG( 2 ) = B( J+1, J+1 )
+               ELSE
+                  NA = 1
+               END IF
+*
+*              Compute x(j) (and x(j+1), if 2-by-2 block)
+*
+               CALL DLALN2( .FALSE., NA, NW, DMIN, ACOEF, A( J, J ),
+     $                      LDA, BDIAG( 1 ), BDIAG( 2 ), WORK( 2*N+J ),
+     $                      N, BCOEFR, BCOEFI, SUM, 2, SCALE, TEMP,
+     $                      IINFO )
+               IF( SCALE.LT.ONE ) THEN
+*
+                  DO 290 JW = 0, NW - 1
+                     DO 280 JR = 1, JE
+                        WORK( ( JW+2 )*N+JR ) = SCALE*
+     $                     WORK( ( JW+2 )*N+JR )
+  280                CONTINUE
+  290             CONTINUE
+               END IF
+               XMAX = MAX( SCALE*XMAX, TEMP )
+*
+               DO 310 JW = 1, NW
+                  DO 300 JA = 1, NA
+                     WORK( ( JW+1 )*N+J+JA-1 ) = SUM( JA, JW )
+  300             CONTINUE
+  310          CONTINUE
+*
+*              w = w + x(j)*(a A(*,j) - b B(*,j) ) with scaling
+*
+               IF( J.GT.1 ) THEN
+*
+*                 Check whether scaling is necessary for sum.
+*
+                  XSCALE = ONE / MAX( ONE, XMAX )
+                  TEMP = ACOEFA*WORK( J ) + BCOEFA*WORK( N+J )
+                  IF( IL2BY2 )
+     $               TEMP = MAX( TEMP, ACOEFA*WORK( J+1 )+BCOEFA*
+     $                      WORK( N+J+1 ) )
+                  TEMP = MAX( TEMP, ACOEFA, BCOEFA )
+                  IF( TEMP.GT.BIGNUM*XSCALE ) THEN
+*
+                     DO 330 JW = 0, NW - 1
+                        DO 320 JR = 1, JE
+                           WORK( ( JW+2 )*N+JR ) = XSCALE*
+     $                        WORK( ( JW+2 )*N+JR )
+  320                   CONTINUE
+  330                CONTINUE
+                     XMAX = XMAX*XSCALE
+                  END IF
+*
+*                 Compute the contributions of the off-diagonals of
+*                 column j (and j+1, if 2-by-2 block) of A and B to the
+*                 sums.
+*
+*
+                  DO 360 JA = 1, NA
+                     IF( ILCPLX ) THEN
+                        CREALA = ACOEF*WORK( 2*N+J+JA-1 )
+                        CIMAGA = ACOEF*WORK( 3*N+J+JA-1 )
+                        CREALB = BCOEFR*WORK( 2*N+J+JA-1 ) -
+     $                           BCOEFI*WORK( 3*N+J+JA-1 )
+                        CIMAGB = BCOEFI*WORK( 2*N+J+JA-1 ) +
+     $                           BCOEFR*WORK( 3*N+J+JA-1 )
+                        DO 340 JR = 1, J - 1
+                           WORK( 2*N+JR ) = WORK( 2*N+JR ) -
+     $                                      CREALA*A( JR, J+JA-1 ) +
+     $                                      CREALB*B( JR, J+JA-1 )
+                           WORK( 3*N+JR ) = WORK( 3*N+JR ) -
+     $                                      CIMAGA*A( JR, J+JA-1 ) +
+     $                                      CIMAGB*B( JR, J+JA-1 )
+  340                   CONTINUE
+                     ELSE
+                        CREALA = ACOEF*WORK( 2*N+J+JA-1 )
+                        CREALB = BCOEFR*WORK( 2*N+J+JA-1 )
+                        DO 350 JR = 1, J - 1
+                           WORK( 2*N+JR ) = WORK( 2*N+JR ) -
+     $                                      CREALA*A( JR, J+JA-1 ) +
+     $                                      CREALB*B( JR, J+JA-1 )
+  350                   CONTINUE
+                     END IF
+  360             CONTINUE
+               END IF
+*
+               IL2BY2 = .FALSE.
+  370       CONTINUE
+*
+*           Copy eigenvector to VR, back transforming if
+*           HOWMNY='B'.
+*
+            IEIG = IEIG - NW
+            IF( ILBACK ) THEN
+*
+               DO 410 JW = 0, NW - 1
+                  DO 380 JR = 1, N
+                     WORK( ( JW+4 )*N+JR ) = WORK( ( JW+2 )*N+1 )*
+     $                                       VR( JR, 1 )
+  380             CONTINUE
+*
+*                 A series of compiler directives to defeat
+*                 vectorization for the next loop
+*
+*
+                  DO 400 JC = 2, JE
+                     DO 390 JR = 1, N
+                        WORK( ( JW+4 )*N+JR ) = WORK( ( JW+4 )*N+JR ) +
+     $                     WORK( ( JW+2 )*N+JC )*VR( JR, JC )
+  390                CONTINUE
+  400             CONTINUE
+  410          CONTINUE
+*
+               DO 430 JW = 0, NW - 1
+                  DO 420 JR = 1, N
+                     VR( JR, IEIG+JW ) = WORK( ( JW+4 )*N+JR )
+  420             CONTINUE
+  430          CONTINUE
+*
+               IEND = N
+            ELSE
+               DO 450 JW = 0, NW - 1
+                  DO 440 JR = 1, N
+                     VR( JR, IEIG+JW ) = WORK( ( JW+2 )*N+JR )
+  440             CONTINUE
+  450          CONTINUE
+*
+               IEND = JE
+            END IF
+*
+*           Scale eigenvector
+*
+            XMAX = ZERO
+            IF( ILCPLX ) THEN
+               DO 460 J = 1, IEND
+                  XMAX = MAX( XMAX, ABS( VR( J, IEIG ) )+
+     $                   ABS( VR( J, IEIG+1 ) ) )
+  460          CONTINUE
+            ELSE
+               DO 470 J = 1, IEND
+                  XMAX = MAX( XMAX, ABS( VR( J, IEIG ) ) )
+  470          CONTINUE
+            END IF
+*
+            IF( XMAX.GT.SAFMIN ) THEN
+               XSCALE = ONE / XMAX
+               DO 490 JW = 0, NW - 1
+                  DO 480 JR = 1, IEND
+                     VR( JR, IEIG+JW ) = XSCALE*VR( JR, IEIG+JW )
+  480             CONTINUE
+  490          CONTINUE
+            END IF
+  500    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of DTGEVC
+*
+      END
diff --git a/costa/native/external/lapack/dtgex2.f b/costa/native/external/lapack/dtgex2.f
new file mode 100644
index 000000000..81b085e25
--- /dev/null
+++ b/costa/native/external/lapack/dtgex2.f
@@ -0,0 +1,582 @@
+      SUBROUTINE DTGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z,
+     $                   LDZ, J1, N1, N2, WORK, LWORK, INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      LOGICAL            WANTQ, WANTZ
+      INTEGER            INFO, J1, LDA, LDB, LDQ, LDZ, LWORK, N, N1, N2
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), B( LDB, * ), Q( LDQ, * ),
+     $                   WORK( * ), Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DTGEX2 swaps adjacent diagonal blocks (A11, B11) and (A22, B22)
+*  of size 1-by-1 or 2-by-2 in an upper (quasi) triangular matrix pair
+*  (A, B) by an orthogonal equivalence transformation.
+*
+*  (A, B) must be in generalized real Schur canonical form (as returned
+*  by DGGES), i.e. A is block upper triangular with 1-by-1 and 2-by-2
+*  diagonal blocks. B is upper triangular.
+*
+*  Optionally, the matrices Q and Z of generalized Schur vectors are
+*  updated.
+*
+*         Q(in) * A(in) * Z(in)' = Q(out) * A(out) * Z(out)'
+*         Q(in) * B(in) * Z(in)' = Q(out) * B(out) * Z(out)'
+*
+*
+*  Arguments
+*  =========
+*
+*  WANTQ   (input) LOGICAL
+*          .TRUE. : update the left transformation matrix Q;
+*          .FALSE.: do not update Q.
+*
+*  WANTZ   (input) LOGICAL
+*          .TRUE. : update the right transformation matrix Z;
+*          .FALSE.: do not update Z.
+*
+*  N       (input) INTEGER
+*          The order of the matrices A and B. N >= 0.
+*
+*  A      (input/output) DOUBLE PRECISION arrays, dimensions (LDA,N)
+*          On entry, the matrix A in the pair (A, B).
+*          On exit, the updated matrix A.
+*
+*  LDA     (input)  INTEGER
+*          The leading dimension of the array A. LDA >= max(1,N).
+*
+*  B      (input/output) DOUBLE PRECISION arrays, dimensions (LDB,N)
+*          On entry, the matrix B in the pair (A, B).
+*          On exit, the updated matrix B.
+*
+*  LDB     (input)  INTEGER
+*          The leading dimension of the array B. LDB >= max(1,N).
+*
+*  Q       (input/output) DOUBLE PRECISION array, dimension (LDZ,N)
+*          On entry, if WANTQ = .TRUE., the orthogonal matrix Q.
+*          On exit, the updated matrix Q.
+*          Not referenced if WANTQ = .FALSE..
+*
+*  LDQ     (input) INTEGER
+*          The leading dimension of the array Q. LDQ >= 1.
+*          If WANTQ = .TRUE., LDQ >= N.
+*
+*  Z       (input/output) DOUBLE PRECISION array, dimension (LDZ,N)
+*          On entry, if WANTZ =.TRUE., the orthogonal matrix Z.
+*          On exit, the updated matrix Z.
+*          Not referenced if WANTZ = .FALSE..
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z. LDZ >= 1.
+*          If WANTZ = .TRUE., LDZ >= N.
+*
+*  J1      (input) INTEGER
+*          The index to the first block (A11, B11). 1 <= J1 <= N.
+*
+*  N1      (input) INTEGER
+*          The order of the first block (A11, B11). N1 = 0, 1 or 2.
+*
+*  N2      (input) INTEGER
+*          The order of the second block (A22, B22). N2 = 0, 1 or 2.
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (LWORK).
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.
+*          LWORK >=  MAX( N*(N2+N1), (N2+N1)*(N2+N1)*2 )
+*
+*  INFO    (output) INTEGER
+*            =0: Successful exit
+*            >0: If INFO = 1, the transformed matrix (A, B) would be
+*                too far from generalized Schur form; the blocks are
+*                not swapped and (A, B) and (Q, Z) are unchanged.
+*                The problem of swapping is too ill-conditioned.
+*            <0: If INFO = -16: LWORK is too small. Appropriate value
+*                for LWORK is returned in WORK(1).
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Bo Kagstrom and Peter Poromaa, Department of Computing Science,
+*     Umea University, S-901 87 Umea, Sweden.
+*
+*  In the current code both weak and strong stability tests are
+*  performed. The user can omit the strong stability test by changing
+*  the internal logical parameter WANDS to .FALSE.. See ref. [2] for
+*  details.
+*
+*  [1] B. Kagstrom; A Direct Method for Reordering Eigenvalues in the
+*      Generalized Real Schur Form of a Regular Matrix Pair (A, B), in
+*      M.S. Moonen et al (eds), Linear Algebra for Large Scale and
+*      Real-Time Applications, Kluwer Academic Publ. 1993, pp 195-218.
+*
+*  [2] B. Kagstrom and P. Poromaa; Computing Eigenspaces with Specified
+*      Eigenvalues of a Regular Matrix Pair (A, B) and Condition
+*      Estimation: Theory, Algorithms and Software,
+*      Report UMINF - 94.04, Department of Computing Science, Umea
+*      University, S-901 87 Umea, Sweden, 1994. Also as LAPACK Working
+*      Note 87. To appear in Numerical Algorithms, 1996.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+      DOUBLE PRECISION   TEN
+      PARAMETER          ( TEN = 1.0D+01 )
+      INTEGER            LDST
+      PARAMETER          ( LDST = 4 )
+      LOGICAL            WANDS
+      PARAMETER          ( WANDS = .TRUE. )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            DTRONG, WEAK
+      INTEGER            I, IDUM, LINFO, M
+      DOUBLE PRECISION   BQRA21, BRQA21, DDUM, DNORM, DSCALE, DSUM, EPS,
+     $                   F, G, SA, SB, SCALE, SMLNUM, SS, THRESH, WS
+*     ..
+*     .. Local Arrays ..
+      INTEGER            IWORK( LDST )
+      DOUBLE PRECISION   AI( 2 ), AR( 2 ), BE( 2 ), IR( LDST, LDST ),
+     $                   IRCOP( LDST, LDST ), LI( LDST, LDST ),
+     $                   LICOP( LDST, LDST ), S( LDST, LDST ),
+     $                   SCPY( LDST, LDST ), T( LDST, LDST ),
+     $                   TAUL( LDST ), TAUR( LDST ), TCPY( LDST, LDST )
+*     ..
+*     .. External Functions ..
+      DOUBLE PRECISION   DLAMCH
+      EXTERNAL           DLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DCOPY, DGEMM, DGEQR2, DGERQ2, DLACPY, DLAGV2,
+     $                   DLARTG, DLASSQ, DORG2R, DORGR2, DORM2R, DORMR2,
+     $                   DROT, DSCAL, DTGSY2
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+*
+*     Quick return if possible
+*
+      IF( N.LE.1 .OR. N1.LE.0 .OR. N2.LE.0 )
+     $   RETURN
+      IF( N1.GT.N .OR. ( J1+N1 ).GT.N )
+     $   RETURN
+      M = N1 + N2
+      IF( LWORK.LT.MAX( N*M, M*M*2 ) ) THEN
+         INFO = -16
+         WORK( 1 ) = MAX( N*M, M*M*2 )
+         RETURN
+      END IF
+*
+      WEAK = .FALSE.
+      DTRONG = .FALSE.
+*
+*     Make a local copy of selected block
+*
+      CALL DCOPY( LDST*LDST, ZERO, 0, LI, 1 )
+      CALL DCOPY( LDST*LDST, ZERO, 0, IR, 1 )
+      CALL DLACPY( 'Full', M, M, A( J1, J1 ), LDA, S, LDST )
+      CALL DLACPY( 'Full', M, M, B( J1, J1 ), LDB, T, LDST )
+*
+*     Compute threshold for testing acceptance of swapping.
+*
+      EPS = DLAMCH( 'P' )
+      SMLNUM = DLAMCH( 'S' ) / EPS
+      DSCALE = ZERO
+      DSUM = ONE
+      CALL DLACPY( 'Full', M, M, S, LDST, WORK, M )
+      CALL DLASSQ( M*M, WORK, 1, DSCALE, DSUM )
+      CALL DLACPY( 'Full', M, M, T, LDST, WORK, M )
+      CALL DLASSQ( M*M, WORK, 1, DSCALE, DSUM )
+      DNORM = DSCALE*SQRT( DSUM )
+      THRESH = MAX( TEN*EPS*DNORM, SMLNUM )
+*
+      IF( M.EQ.2 ) THEN
+*
+*        CASE 1: Swap 1-by-1 and 1-by-1 blocks.
+*
+*        Compute orthogonal QL and RQ that swap 1-by-1 and 1-by-1 blocks
+*        using Givens rotations and perform the swap tentatively.
+*
+         F = S( 2, 2 )*T( 1, 1 ) - T( 2, 2 )*S( 1, 1 )
+         G = S( 2, 2 )*T( 1, 2 ) - T( 2, 2 )*S( 1, 2 )
+         SB = ABS( T( 2, 2 ) )
+         SA = ABS( S( 2, 2 ) )
+         CALL DLARTG( F, G, IR( 1, 2 ), IR( 1, 1 ), DDUM )
+         IR( 2, 1 ) = -IR( 1, 2 )
+         IR( 2, 2 ) = IR( 1, 1 )
+         CALL DROT( 2, S( 1, 1 ), 1, S( 1, 2 ), 1, IR( 1, 1 ),
+     $              IR( 2, 1 ) )
+         CALL DROT( 2, T( 1, 1 ), 1, T( 1, 2 ), 1, IR( 1, 1 ),
+     $              IR( 2, 1 ) )
+         IF( SA.GE.SB ) THEN
+            CALL DLARTG( S( 1, 1 ), S( 2, 1 ), LI( 1, 1 ), LI( 2, 1 ),
+     $                   DDUM )
+         ELSE
+            CALL DLARTG( T( 1, 1 ), T( 2, 1 ), LI( 1, 1 ), LI( 2, 1 ),
+     $                   DDUM )
+         END IF
+         CALL DROT( 2, S( 1, 1 ), LDST, S( 2, 1 ), LDST, LI( 1, 1 ),
+     $              LI( 2, 1 ) )
+         CALL DROT( 2, T( 1, 1 ), LDST, T( 2, 1 ), LDST, LI( 1, 1 ),
+     $              LI( 2, 1 ) )
+         LI( 2, 2 ) = LI( 1, 1 )
+         LI( 1, 2 ) = -LI( 2, 1 )
+*
+*        Weak stability test:
+*           |S21| + |T21| <= O(EPS * F-norm((S, T)))
+*
+         WS = ABS( S( 2, 1 ) ) + ABS( T( 2, 1 ) )
+         WEAK = WS.LE.THRESH
+         IF( .NOT.WEAK )
+     $      GO TO 70
+*
+         IF( WANDS ) THEN
+*
+*           Strong stability test:
+*             F-norm((A-QL'*S*QR, B-QL'*T*QR)) <= O(EPS*F-norm((A,B)))
+*
+            CALL DLACPY( 'Full', M, M, A( J1, J1 ), LDA, WORK( M*M+1 ),
+     $                   M )
+            CALL DGEMM( 'N', 'N', M, M, M, ONE, LI, LDST, S, LDST, ZERO,
+     $                  WORK, M )
+            CALL DGEMM( 'N', 'T', M, M, M, -ONE, WORK, M, IR, LDST, ONE,
+     $                  WORK( M*M+1 ), M )
+            DSCALE = ZERO
+            DSUM = ONE
+            CALL DLASSQ( M*M, WORK( M*M+1 ), 1, DSCALE, DSUM )
+*
+            CALL DLACPY( 'Full', M, M, B( J1, J1 ), LDB, WORK( M*M+1 ),
+     $                   M )
+            CALL DGEMM( 'N', 'N', M, M, M, ONE, LI, LDST, T, LDST, ZERO,
+     $                  WORK, M )
+            CALL DGEMM( 'N', 'T', M, M, M, -ONE, WORK, M, IR, LDST, ONE,
+     $                  WORK( M*M+1 ), M )
+            CALL DLASSQ( M*M, WORK( M*M+1 ), 1, DSCALE, DSUM )
+            SS = DSCALE*SQRT( DSUM )
+            DTRONG = SS.LE.THRESH
+            IF( .NOT.DTRONG )
+     $         GO TO 70
+         END IF
+*
+*        Update (A(J1:J1+M-1, M+J1:N), B(J1:J1+M-1, M+J1:N)) and
+*               (A(1:J1-1, J1:J1+M), B(1:J1-1, J1:J1+M)).
+*
+         CALL DROT( J1+1, A( 1, J1 ), 1, A( 1, J1+1 ), 1, IR( 1, 1 ),
+     $              IR( 2, 1 ) )
+         CALL DROT( J1+1, B( 1, J1 ), 1, B( 1, J1+1 ), 1, IR( 1, 1 ),
+     $              IR( 2, 1 ) )
+         CALL DROT( N-J1+1, A( J1, J1 ), LDA, A( J1+1, J1 ), LDA,
+     $              LI( 1, 1 ), LI( 2, 1 ) )
+         CALL DROT( N-J1+1, B( J1, J1 ), LDB, B( J1+1, J1 ), LDB,
+     $              LI( 1, 1 ), LI( 2, 1 ) )
+*
+*        Set  N1-by-N2 (2,1) - blocks to ZERO.
+*
+         A( J1+1, J1 ) = ZERO
+         B( J1+1, J1 ) = ZERO
+*
+*        Accumulate transformations into Q and Z if requested.
+*
+         IF( WANTZ )
+     $      CALL DROT( N, Z( 1, J1 ), 1, Z( 1, J1+1 ), 1, IR( 1, 1 ),
+     $                 IR( 2, 1 ) )
+         IF( WANTQ )
+     $      CALL DROT( N, Q( 1, J1 ), 1, Q( 1, J1+1 ), 1, LI( 1, 1 ),
+     $                 LI( 2, 1 ) )
+*
+*        Exit with INFO = 0 if swap was successfully performed.
+*
+         RETURN
+*
+      ELSE
+*
+*        CASE 2: Swap 1-by-1 and 2-by-2 blocks, or 2-by-2
+*                and 2-by-2 blocks.
+*
+*        Solve the generalized Sylvester equation
+*                 S11 * R - L * S22 = SCALE * S12
+*                 T11 * R - L * T22 = SCALE * T12
+*        for R and L. Solutions in LI and IR.
+*
+         CALL DLACPY( 'Full', N1, N2, T( 1, N1+1 ), LDST, LI, LDST )
+         CALL DLACPY( 'Full', N1, N2, S( 1, N1+1 ), LDST,
+     $                IR( N2+1, N1+1 ), LDST )
+         CALL DTGSY2( 'N', 0, N1, N2, S, LDST, S( N1+1, N1+1 ), LDST,
+     $                IR( N2+1, N1+1 ), LDST, T, LDST, T( N1+1, N1+1 ),
+     $                LDST, LI, LDST, SCALE, DSUM, DSCALE, IWORK, IDUM,
+     $                LINFO )
+*
+*        Compute orthogonal matrix QL:
+*
+*                    QL' * LI = [ TL ]
+*                               [ 0  ]
+*        where
+*                    LI =  [      -L              ]
+*                          [ SCALE * identity(N2) ]
+*
+         DO 10 I = 1, N2
+            CALL DSCAL( N1, -ONE, LI( 1, I ), 1 )
+            LI( N1+I, I ) = SCALE
+   10    CONTINUE
+         CALL DGEQR2( M, N2, LI, LDST, TAUL, WORK, LINFO )
+         IF( LINFO.NE.0 )
+     $      GO TO 70
+         CALL DORG2R( M, M, N2, LI, LDST, TAUL, WORK, LINFO )
+         IF( LINFO.NE.0 )
+     $      GO TO 70
+*
+*        Compute orthogonal matrix RQ:
+*
+*                    IR * RQ' =   [ 0  TR],
+*
+*         where IR = [ SCALE * identity(N1), R ]
+*
+         DO 20 I = 1, N1
+            IR( N2+I, I ) = SCALE
+   20    CONTINUE
+         CALL DGERQ2( N1, M, IR( N2+1, 1 ), LDST, TAUR, WORK, LINFO )
+         IF( LINFO.NE.0 )
+     $      GO TO 70
+         CALL DORGR2( M, M, N1, IR, LDST, TAUR, WORK, LINFO )
+         IF( LINFO.NE.0 )
+     $      GO TO 70
+*
+*        Perform the swapping tentatively:
+*
+         CALL DGEMM( 'T', 'N', M, M, M, ONE, LI, LDST, S, LDST, ZERO,
+     $               WORK, M )
+         CALL DGEMM( 'N', 'T', M, M, M, ONE, WORK, M, IR, LDST, ZERO, S,
+     $               LDST )
+         CALL DGEMM( 'T', 'N', M, M, M, ONE, LI, LDST, T, LDST, ZERO,
+     $               WORK, M )
+         CALL DGEMM( 'N', 'T', M, M, M, ONE, WORK, M, IR, LDST, ZERO, T,
+     $               LDST )
+         CALL DLACPY( 'F', M, M, S, LDST, SCPY, LDST )
+         CALL DLACPY( 'F', M, M, T, LDST, TCPY, LDST )
+         CALL DLACPY( 'F', M, M, IR, LDST, IRCOP, LDST )
+         CALL DLACPY( 'F', M, M, LI, LDST, LICOP, LDST )
+*
+*        Triangularize the B-part by an RQ factorization.
+*        Apply transformation (from left) to A-part, giving S.
+*
+         CALL DGERQ2( M, M, T, LDST, TAUR, WORK, LINFO )
+         IF( LINFO.NE.0 )
+     $      GO TO 70
+         CALL DORMR2( 'R', 'T', M, M, M, T, LDST, TAUR, S, LDST, WORK,
+     $                LINFO )
+         IF( LINFO.NE.0 )
+     $      GO TO 70
+         CALL DORMR2( 'L', 'N', M, M, M, T, LDST, TAUR, IR, LDST, WORK,
+     $                LINFO )
+         IF( LINFO.NE.0 )
+     $      GO TO 70
+*
+*        Compute F-norm(S21) in BRQA21. (T21 is 0.)
+*
+         DSCALE = ZERO
+         DSUM = ONE
+         DO 30 I = 1, N2
+            CALL DLASSQ( N1, S( N2+1, I ), 1, DSCALE, DSUM )
+   30    CONTINUE
+         BRQA21 = DSCALE*SQRT( DSUM )
+*
+*        Triangularize the B-part by a QR factorization.
+*        Apply transformation (from right) to A-part, giving S.
+*
+         CALL DGEQR2( M, M, TCPY, LDST, TAUL, WORK, LINFO )
+         IF( LINFO.NE.0 )
+     $      GO TO 70
+         CALL DORM2R( 'L', 'T', M, M, M, TCPY, LDST, TAUL, SCPY, LDST,
+     $                WORK, INFO )
+         CALL DORM2R( 'R', 'N', M, M, M, TCPY, LDST, TAUL, LICOP, LDST,
+     $                WORK, INFO )
+         IF( LINFO.NE.0 )
+     $      GO TO 70
+*
+*        Compute F-norm(S21) in BQRA21. (T21 is 0.)
+*
+         DSCALE = ZERO
+         DSUM = ONE
+         DO 40 I = 1, N2
+            CALL DLASSQ( N1, SCPY( N2+1, I ), 1, DSCALE, DSUM )
+   40    CONTINUE
+         BQRA21 = DSCALE*SQRT( DSUM )
+*
+*        Decide which method to use.
+*          Weak stability test:
+*             F-norm(S21) <= O(EPS * F-norm((S, T)))
+*
+         IF( BQRA21.LE.BRQA21 .AND. BQRA21.LE.THRESH ) THEN
+            CALL DLACPY( 'F', M, M, SCPY, LDST, S, LDST )
+            CALL DLACPY( 'F', M, M, TCPY, LDST, T, LDST )
+            CALL DLACPY( 'F', M, M, IRCOP, LDST, IR, LDST )
+            CALL DLACPY( 'F', M, M, LICOP, LDST, LI, LDST )
+         ELSE IF( BRQA21.GE.THRESH ) THEN
+            GO TO 70
+         END IF
+*
+*        Set lower triangle of B-part to zero
+*
+         DO 50 I = 2, M
+            CALL DCOPY( M-I+1, ZERO, 0, T( I, I-1 ), 1 )
+   50    CONTINUE
+*
+         IF( WANDS ) THEN
+*
+*           Strong stability test:
+*              F-norm((A-QL*S*QR', B-QL*T*QR')) <= O(EPS*F-norm((A,B)))
+*
+            CALL DLACPY( 'Full', M, M, A( J1, J1 ), LDA, WORK( M*M+1 ),
+     $                   M )
+            CALL DGEMM( 'N', 'N', M, M, M, ONE, LI, LDST, S, LDST, ZERO,
+     $                  WORK, M )
+            CALL DGEMM( 'N', 'N', M, M, M, -ONE, WORK, M, IR, LDST, ONE,
+     $                  WORK( M*M+1 ), M )
+            DSCALE = ZERO
+            DSUM = ONE
+            CALL DLASSQ( M*M, WORK( M*M+1 ), 1, DSCALE, DSUM )
+*
+            CALL DLACPY( 'Full', M, M, B( J1, J1 ), LDB, WORK( M*M+1 ),
+     $                   M )
+            CALL DGEMM( 'N', 'N', M, M, M, ONE, LI, LDST, T, LDST, ZERO,
+     $                  WORK, M )
+            CALL DGEMM( 'N', 'N', M, M, M, -ONE, WORK, M, IR, LDST, ONE,
+     $                  WORK( M*M+1 ), M )
+            CALL DLASSQ( M*M, WORK( M*M+1 ), 1, DSCALE, DSUM )
+            SS = DSCALE*SQRT( DSUM )
+            DTRONG = ( SS.LE.THRESH )
+            IF( .NOT.DTRONG )
+     $         GO TO 70
+*
+         END IF
+*
+*        If the swap is accepted ("weakly" and "strongly"), apply the
+*        transformations and set N1-by-N2 (2,1)-block to zero.
+*
+         DO 60 I = 1, N2
+            CALL DCOPY( N1, ZERO, 0, S( N2+1, I ), 1 )
+   60    CONTINUE
+*
+*        copy back M-by-M diagonal block starting at index J1 of (A, B)
+*
+         CALL DLACPY( 'F', M, M, S, LDST, A( J1, J1 ), LDA )
+         CALL DLACPY( 'F', M, M, T, LDST, B( J1, J1 ), LDB )
+         CALL DCOPY( LDST*LDST, ZERO, 0, T, 1 )
+*
+*        Standardize existing 2-by-2 blocks.
+*
+         CALL DCOPY( M*M, ZERO, 0, WORK, 1 )
+         WORK( 1 ) = ONE
+         T( 1, 1 ) = ONE
+         IDUM = LWORK - M*M - 2
+         IF( N2.GT.1 ) THEN
+            CALL DLAGV2( A( J1, J1 ), LDA, B( J1, J1 ), LDB, AR, AI, BE,
+     $                   WORK( 1 ), WORK( 2 ), T( 1, 1 ), T( 2, 1 ) )
+            WORK( M+1 ) = -WORK( 2 )
+            WORK( M+2 ) = WORK( 1 )
+            T( N2, N2 ) = T( 1, 1 )
+            T( 1, 2 ) = -T( 2, 1 )
+         END IF
+         WORK( M*M ) = ONE
+         T( M, M ) = ONE
+*
+         IF( N1.GT.1 ) THEN
+            CALL DLAGV2( A( J1+N2, J1+N2 ), LDA, B( J1+N2, J1+N2 ), LDB,
+     $                   TAUR, TAUL, WORK( M*M+1 ), WORK( N2*M+N2+1 ),
+     $                   WORK( N2*M+N2+2 ), T( N2+1, N2+1 ),
+     $                   T( M, M-1 ) )
+            WORK( M*M ) = WORK( N2*M+N2+1 )
+            WORK( M*M-1 ) = -WORK( N2*M+N2+2 )
+            T( M, M ) = T( N2+1, N2+1 )
+            T( M-1, M ) = -T( M, M-1 )
+         END IF
+         CALL DGEMM( 'T', 'N', N2, N1, N2, ONE, WORK, M, A( J1, J1+N2 ),
+     $               LDA, ZERO, WORK( M*M+1 ), N2 )
+         CALL DLACPY( 'Full', N2, N1, WORK( M*M+1 ), N2, A( J1, J1+N2 ),
+     $                LDA )
+         CALL DGEMM( 'T', 'N', N2, N1, N2, ONE, WORK, M, B( J1, J1+N2 ),
+     $               LDB, ZERO, WORK( M*M+1 ), N2 )
+         CALL DLACPY( 'Full', N2, N1, WORK( M*M+1 ), N2, B( J1, J1+N2 ),
+     $                LDB )
+         CALL DGEMM( 'N', 'N', M, M, M, ONE, LI, LDST, WORK, M, ZERO,
+     $               WORK( M*M+1 ), M )
+         CALL DLACPY( 'Full', M, M, WORK( M*M+1 ), M, LI, LDST )
+         CALL DGEMM( 'N', 'N', N2, N1, N1, ONE, A( J1, J1+N2 ), LDA,
+     $               T( N2+1, N2+1 ), LDST, ZERO, WORK, N2 )
+         CALL DLACPY( 'Full', N2, N1, WORK, N2, A( J1, J1+N2 ), LDA )
+         CALL DGEMM( 'N', 'N', N2, N1, N1, ONE, B( J1, J1+N2 ), LDA,
+     $               T( N2+1, N2+1 ), LDST, ZERO, WORK, N2 )
+         CALL DLACPY( 'Full', N2, N1, WORK, N2, B( J1, J1+N2 ), LDB )
+         CALL DGEMM( 'T', 'N', M, M, M, ONE, IR, LDST, T, LDST, ZERO,
+     $               WORK, M )
+         CALL DLACPY( 'Full', M, M, WORK, M, IR, LDST )
+*
+*        Accumulate transformations into Q and Z if requested.
+*
+         IF( WANTQ ) THEN
+            CALL DGEMM( 'N', 'N', N, M, M, ONE, Q( 1, J1 ), LDQ, LI,
+     $                  LDST, ZERO, WORK, N )
+            CALL DLACPY( 'Full', N, M, WORK, N, Q( 1, J1 ), LDQ )
+*
+         END IF
+*
+         IF( WANTZ ) THEN
+            CALL DGEMM( 'N', 'N', N, M, M, ONE, Z( 1, J1 ), LDZ, IR,
+     $                  LDST, ZERO, WORK, N )
+            CALL DLACPY( 'Full', N, M, WORK, N, Z( 1, J1 ), LDZ )
+*
+         END IF
+*
+*        Update (A(J1:J1+M-1, M+J1:N), B(J1:J1+M-1, M+J1:N)) and
+*                (A(1:J1-1, J1:J1+M), B(1:J1-1, J1:J1+M)).
+*
+         I = J1 + M
+         IF( I.LE.N ) THEN
+            CALL DGEMM( 'T', 'N', M, N-I+1, M, ONE, LI, LDST,
+     $                  A( J1, I ), LDA, ZERO, WORK, M )
+            CALL DLACPY( 'Full', M, N-I+1, WORK, M, A( J1, I ), LDA )
+            CALL DGEMM( 'T', 'N', M, N-I+1, M, ONE, LI, LDST,
+     $                  B( J1, I ), LDA, ZERO, WORK, M )
+            CALL DLACPY( 'Full', M, N-I+1, WORK, M, B( J1, I ), LDA )
+         END IF
+         I = J1 - 1
+         IF( I.GT.0 ) THEN
+            CALL DGEMM( 'N', 'N', I, M, M, ONE, A( 1, J1 ), LDA, IR,
+     $                  LDST, ZERO, WORK, I )
+            CALL DLACPY( 'Full', I, M, WORK, I, A( 1, J1 ), LDA )
+            CALL DGEMM( 'N', 'N', I, M, M, ONE, B( 1, J1 ), LDB, IR,
+     $                  LDST, ZERO, WORK, I )
+            CALL DLACPY( 'Full', I, M, WORK, I, B( 1, J1 ), LDB )
+         END IF
+*
+*        Exit with INFO = 0 if swap was successfully performed.
+*
+         RETURN
+*
+      END IF
+*
+*     Exit with INFO = 1 if swap was rejected.
+*
+   70 CONTINUE
+*
+      INFO = 1
+      RETURN
+*
+*     End of DTGEX2
+*
+      END
diff --git a/costa/native/external/lapack/dtgexc.f b/costa/native/external/lapack/dtgexc.f
new file mode 100644
index 000000000..169f0467e
--- /dev/null
+++ b/costa/native/external/lapack/dtgexc.f
@@ -0,0 +1,434 @@
+      SUBROUTINE DTGEXC( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z,
+     $                   LDZ, IFST, ILST, WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      LOGICAL            WANTQ, WANTZ
+      INTEGER            IFST, ILST, INFO, LDA, LDB, LDQ, LDZ, LWORK, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), B( LDB, * ), Q( LDQ, * ),
+     $                   WORK( * ), Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DTGEXC reorders the generalized real Schur decomposition of a real
+*  matrix pair (A,B) using an orthogonal equivalence transformation
+*
+*                 (A, B) = Q * (A, B) * Z',
+*
+*  so that the diagonal block of (A, B) with row index IFST is moved
+*  to row ILST.
+*
+*  (A, B) must be in generalized real Schur canonical form (as returned
+*  by DGGES), i.e. A is block upper triangular with 1-by-1 and 2-by-2
+*  diagonal blocks. B is upper triangular.
+*
+*  Optionally, the matrices Q and Z of generalized Schur vectors are
+*  updated.
+*
+*         Q(in) * A(in) * Z(in)' = Q(out) * A(out) * Z(out)'
+*         Q(in) * B(in) * Z(in)' = Q(out) * B(out) * Z(out)'
+*
+*
+*  Arguments
+*  =========
+*
+*  WANTQ   (input) LOGICAL
+*          .TRUE. : update the left transformation matrix Q;
+*          .FALSE.: do not update Q.
+*
+*  WANTZ   (input) LOGICAL
+*          .TRUE. : update the right transformation matrix Z;
+*          .FALSE.: do not update Z.
+*
+*  N       (input) INTEGER
+*          The order of the matrices A and B. N >= 0.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+*          On entry, the matrix A in generalized real Schur canonical
+*          form.
+*          On exit, the updated matrix A, again in generalized
+*          real Schur canonical form.
+*
+*  LDA     (input)  INTEGER
+*          The leading dimension of the array A. LDA >= max(1,N).
+*
+*  B       (input/output) DOUBLE PRECISION array, dimension (LDB,N)
+*          On entry, the matrix B in generalized real Schur canonical
+*          form (A,B).
+*          On exit, the updated matrix B, again in generalized
+*          real Schur canonical form (A,B).
+*
+*  LDB     (input)  INTEGER
+*          The leading dimension of the array B. LDB >= max(1,N).
+*
+*  Q       (input/output) DOUBLE PRECISION array, dimension (LDZ,N)
+*          On entry, if WANTQ = .TRUE., the orthogonal matrix Q.
+*          On exit, the updated matrix Q.
+*          If WANTQ = .FALSE., Q is not referenced.
+*
+*  LDQ     (input) INTEGER
+*          The leading dimension of the array Q. LDQ >= 1.
+*          If WANTQ = .TRUE., LDQ >= N.
+*
+*  Z       (input/output) DOUBLE PRECISION array, dimension (LDZ,N)
+*          On entry, if WANTZ = .TRUE., the orthogonal matrix Z.
+*          On exit, the updated matrix Z.
+*          If WANTZ = .FALSE., Z is not referenced.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z. LDZ >= 1.
+*          If WANTZ = .TRUE., LDZ >= N.
+*
+*  IFST    (input/output) INTEGER
+*  ILST    (input/output) INTEGER
+*          Specify the reordering of the diagonal blocks of (A, B).
+*          The block with row index IFST is moved to row ILST, by a
+*          sequence of swapping between adjacent blocks.
+*          On exit, if IFST pointed on entry to the second row of
+*          a 2-by-2 block, it is changed to point to the first row;
+*          ILST always points to the first row of the block in its
+*          final position (which may differ from its input value by
+*          +1 or -1). 1 <= IFST, ILST <= N.
+*
+*  WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK. LWORK >= 4*N + 16.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*           =0:  successful exit.
+*           <0:  if INFO = -i, the i-th argument had an illegal value.
+*           =1:  The transformed matrix pair (A, B) would be too far
+*                from generalized Schur form; the problem is ill-
+*                conditioned. (A, B) may have been partially reordered,
+*                and ILST points to the first row of the current
+*                position of the block being moved.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Bo Kagstrom and Peter Poromaa, Department of Computing Science,
+*     Umea University, S-901 87 Umea, Sweden.
+*
+*  [1] B. Kagstrom; A Direct Method for Reordering Eigenvalues in the
+*      Generalized Real Schur Form of a Regular Matrix Pair (A, B), in
+*      M.S. Moonen et al (eds), Linear Algebra for Large Scale and
+*      Real-Time Applications, Kluwer Academic Publ. 1993, pp 195-218.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO
+      PARAMETER          ( ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            HERE, LWMIN, NBF, NBL, NBNEXT
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DTGEX2, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode and test input arguments.
+*
+      INFO = 0
+      LWMIN = MAX( 1, 4*N+16 )
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      ELSE IF( LDQ.LT.1 .OR. WANTQ .AND. ( LDQ.LT.MAX( 1, N ) ) ) THEN
+         INFO = -9
+      ELSE IF( LDZ.LT.1 .OR. WANTZ .AND. ( LDZ.LT.MAX( 1, N ) ) ) THEN
+         INFO = -11
+      ELSE IF( IFST.LT.1 .OR. IFST.GT.N ) THEN
+         INFO = -12
+      ELSE IF( ILST.LT.1 .OR. ILST.GT.N ) THEN
+         INFO = -13
+      ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -15
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         WORK( 1 ) = LWMIN
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DTGEXC', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.LE.1 )
+     $   RETURN
+*
+*     Determine the first row of the specified block and find out
+*     if it is 1-by-1 or 2-by-2.
+*
+      IF( IFST.GT.1 ) THEN
+         IF( A( IFST, IFST-1 ).NE.ZERO )
+     $      IFST = IFST - 1
+      END IF
+      NBF = 1
+      IF( IFST.LT.N ) THEN
+         IF( A( IFST+1, IFST ).NE.ZERO )
+     $      NBF = 2
+      END IF
+*
+*     Determine the first row of the final block
+*     and find out if it is 1-by-1 or 2-by-2.
+*
+      IF( ILST.GT.1 ) THEN
+         IF( A( ILST, ILST-1 ).NE.ZERO )
+     $      ILST = ILST - 1
+      END IF
+      NBL = 1
+      IF( ILST.LT.N ) THEN
+         IF( A( ILST+1, ILST ).NE.ZERO )
+     $      NBL = 2
+      END IF
+      IF( IFST.EQ.ILST )
+     $   RETURN
+*
+      IF( IFST.LT.ILST ) THEN
+*
+*        Update ILST.
+*
+         IF( NBF.EQ.2 .AND. NBL.EQ.1 )
+     $      ILST = ILST - 1
+         IF( NBF.EQ.1 .AND. NBL.EQ.2 )
+     $      ILST = ILST + 1
+*
+         HERE = IFST
+*
+   10    CONTINUE
+*
+*        Swap with next one below.
+*
+         IF( NBF.EQ.1 .OR. NBF.EQ.2 ) THEN
+*
+*           Current block either 1-by-1 or 2-by-2.
+*
+            NBNEXT = 1
+            IF( HERE+NBF+1.LE.N ) THEN
+               IF( A( HERE+NBF+1, HERE+NBF ).NE.ZERO )
+     $            NBNEXT = 2
+            END IF
+            CALL DTGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z,
+     $                   LDZ, HERE, NBF, NBNEXT, WORK, LWORK, INFO )
+            IF( INFO.NE.0 ) THEN
+               ILST = HERE
+               RETURN
+            END IF
+            HERE = HERE + NBNEXT
+*
+*           Test if 2-by-2 block breaks into two 1-by-1 blocks.
+*
+            IF( NBF.EQ.2 ) THEN
+               IF( A( HERE+1, HERE ).EQ.ZERO )
+     $            NBF = 3
+            END IF
+*
+         ELSE
+*
+*           Current block consists of two 1-by-1 blocks, each of which
+*           must be swapped individually.
+*
+            NBNEXT = 1
+            IF( HERE+3.LE.N ) THEN
+               IF( A( HERE+3, HERE+2 ).NE.ZERO )
+     $            NBNEXT = 2
+            END IF
+            CALL DTGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z,
+     $                   LDZ, HERE+1, 1, NBNEXT, WORK, LWORK, INFO )
+            IF( INFO.NE.0 ) THEN
+               ILST = HERE
+               RETURN
+            END IF
+            IF( NBNEXT.EQ.1 ) THEN
+*
+*              Swap two 1-by-1 blocks.
+*
+               CALL DTGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z,
+     $                      LDZ, HERE, 1, 1, WORK, LWORK, INFO )
+               IF( INFO.NE.0 ) THEN
+                  ILST = HERE
+                  RETURN
+               END IF
+               HERE = HERE + 1
+*
+            ELSE
+*
+*              Recompute NBNEXT in case of 2-by-2 split.
+*
+               IF( A( HERE+2, HERE+1 ).EQ.ZERO )
+     $            NBNEXT = 1
+               IF( NBNEXT.EQ.2 ) THEN
+*
+*                 2-by-2 block did not split.
+*
+                  CALL DTGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ,
+     $                         Z, LDZ, HERE, 1, NBNEXT, WORK, LWORK,
+     $                         INFO )
+                  IF( INFO.NE.0 ) THEN
+                     ILST = HERE
+                     RETURN
+                  END IF
+                  HERE = HERE + 2
+               ELSE
+*
+*                 2-by-2 block did split.
+*
+                  CALL DTGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ,
+     $                         Z, LDZ, HERE, 1, 1, WORK, LWORK, INFO )
+                  IF( INFO.NE.0 ) THEN
+                     ILST = HERE
+                     RETURN
+                  END IF
+                  HERE = HERE + 1
+                  CALL DTGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ,
+     $                         Z, LDZ, HERE, 1, 1, WORK, LWORK, INFO )
+                  IF( INFO.NE.0 ) THEN
+                     ILST = HERE
+                     RETURN
+                  END IF
+                  HERE = HERE + 1
+               END IF
+*
+            END IF
+         END IF
+         IF( HERE.LT.ILST )
+     $      GO TO 10
+      ELSE
+         HERE = IFST
+*
+   20    CONTINUE
+*
+*        Swap with next one below.
+*
+         IF( NBF.EQ.1 .OR. NBF.EQ.2 ) THEN
+*
+*           Current block either 1-by-1 or 2-by-2.
+*
+            NBNEXT = 1
+            IF( HERE.GE.3 ) THEN
+               IF( A( HERE-1, HERE-2 ).NE.ZERO )
+     $            NBNEXT = 2
+            END IF
+            CALL DTGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z,
+     $                   LDZ, HERE-NBNEXT, NBNEXT, NBF, WORK, LWORK,
+     $                   INFO )
+            IF( INFO.NE.0 ) THEN
+               ILST = HERE
+               RETURN
+            END IF
+            HERE = HERE - NBNEXT
+*
+*           Test if 2-by-2 block breaks into two 1-by-1 blocks.
+*
+            IF( NBF.EQ.2 ) THEN
+               IF( A( HERE+1, HERE ).EQ.ZERO )
+     $            NBF = 3
+            END IF
+*
+         ELSE
+*
+*           Current block consists of two 1-by-1 blocks, each of which
+*           must be swapped individually.
+*
+            NBNEXT = 1
+            IF( HERE.GE.3 ) THEN
+               IF( A( HERE-1, HERE-2 ).NE.ZERO )
+     $            NBNEXT = 2
+            END IF
+            CALL DTGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z,
+     $                   LDZ, HERE-NBNEXT, NBNEXT, 1, WORK, LWORK,
+     $                   INFO )
+            IF( INFO.NE.0 ) THEN
+               ILST = HERE
+               RETURN
+            END IF
+            IF( NBNEXT.EQ.1 ) THEN
+*
+*              Swap two 1-by-1 blocks.
+*
+               CALL DTGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z,
+     $                      LDZ, HERE, NBNEXT, 1, WORK, LWORK, INFO )
+               IF( INFO.NE.0 ) THEN
+                  ILST = HERE
+                  RETURN
+               END IF
+               HERE = HERE - 1
+            ELSE
+*
+*             Recompute NBNEXT in case of 2-by-2 split.
+*
+               IF( A( HERE, HERE-1 ).EQ.ZERO )
+     $            NBNEXT = 1
+               IF( NBNEXT.EQ.2 ) THEN
+*
+*                 2-by-2 block did not split.
+*
+                  CALL DTGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ,
+     $                         Z, LDZ, HERE-1, 2, 1, WORK, LWORK, INFO )
+                  IF( INFO.NE.0 ) THEN
+                     ILST = HERE
+                     RETURN
+                  END IF
+                  HERE = HERE - 2
+               ELSE
+*
+*                 2-by-2 block did split.
+*
+                  CALL DTGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ,
+     $                         Z, LDZ, HERE, 1, 1, WORK, LWORK, INFO )
+                  IF( INFO.NE.0 ) THEN
+                     ILST = HERE
+                     RETURN
+                  END IF
+                  HERE = HERE - 1
+                  CALL DTGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ,
+     $                         Z, LDZ, HERE, 1, 1, WORK, LWORK, INFO )
+                  IF( INFO.NE.0 ) THEN
+                     ILST = HERE
+                     RETURN
+                  END IF
+                  HERE = HERE - 1
+               END IF
+            END IF
+         END IF
+         IF( HERE.GT.ILST )
+     $      GO TO 20
+      END IF
+      ILST = HERE
+      WORK( 1 ) = LWMIN
+      RETURN
+*
+*     End of DTGEXC
+*
+      END
diff --git a/costa/native/external/lapack/dtgsen.f b/costa/native/external/lapack/dtgsen.f
new file mode 100644
index 000000000..417adfe93
--- /dev/null
+++ b/costa/native/external/lapack/dtgsen.f
@@ -0,0 +1,718 @@
+      SUBROUTINE DTGSEN( IJOB, WANTQ, WANTZ, SELECT, N, A, LDA, B, LDB,
+     $                   ALPHAR, ALPHAI, BETA, Q, LDQ, Z, LDZ, M, PL,
+     $                   PR, DIF, WORK, LWORK, IWORK, LIWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      LOGICAL            WANTQ, WANTZ
+      INTEGER            IJOB, INFO, LDA, LDB, LDQ, LDZ, LIWORK, LWORK,
+     $                   M, N
+      DOUBLE PRECISION   PL, PR
+*     ..
+*     .. Array Arguments ..
+      LOGICAL            SELECT( * )
+      INTEGER            IWORK( * )
+      DOUBLE PRECISION   A( LDA, * ), ALPHAI( * ), ALPHAR( * ),
+     $                   B( LDB, * ), BETA( * ), DIF( * ), Q( LDQ, * ),
+     $                   WORK( * ), Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DTGSEN reorders the generalized real Schur decomposition of a real
+*  matrix pair (A, B) (in terms of an orthonormal equivalence trans-
+*  formation Q' * (A, B) * Z), so that a selected cluster of eigenvalues
+*  appears in the leading diagonal blocks of the upper quasi-triangular
+*  matrix A and the upper triangular B. The leading columns of Q and
+*  Z form orthonormal bases of the corresponding left and right eigen-
+*  spaces (deflating subspaces). (A, B) must be in generalized real
+*  Schur canonical form (as returned by DGGES), i.e. A is block upper
+*  triangular with 1-by-1 and 2-by-2 diagonal blocks. B is upper
+*  triangular.
+*
+*  DTGSEN also computes the generalized eigenvalues
+*
+*              w(j) = (ALPHAR(j) + i*ALPHAI(j))/BETA(j)
+*
+*  of the reordered matrix pair (A, B).
+*
+*  Optionally, DTGSEN computes the estimates of reciprocal condition
+*  numbers for eigenvalues and eigenspaces. These are Difu[(A11,B11),
+*  (A22,B22)] and Difl[(A11,B11), (A22,B22)], i.e. the separation(s)
+*  between the matrix pairs (A11, B11) and (A22,B22) that correspond to
+*  the selected cluster and the eigenvalues outside the cluster, resp.,
+*  and norms of "projections" onto left and right eigenspaces w.r.t.
+*  the selected cluster in the (1,1)-block.
+*
+*  Arguments
+*  =========
+*
+*  IJOB    (input) INTEGER
+*          Specifies whether condition numbers are required for the
+*          cluster of eigenvalues (PL and PR) or the deflating subspaces
+*          (Difu and Difl):
+*           =0: Only reorder w.r.t. SELECT. No extras.
+*           =1: Reciprocal of norms of "projections" onto left and right
+*               eigenspaces w.r.t. the selected cluster (PL and PR).
+*           =2: Upper bounds on Difu and Difl. F-norm-based estimate
+*               (DIF(1:2)).
+*           =3: Estimate of Difu and Difl. 1-norm-based estimate
+*               (DIF(1:2)).
+*               About 5 times as expensive as IJOB = 2.
+*           =4: Compute PL, PR and DIF (i.e. 0, 1 and 2 above): Economic
+*               version to get it all.
+*           =5: Compute PL, PR and DIF (i.e. 0, 1 and 3 above)
+*
+*  WANTQ   (input) LOGICAL
+*          .TRUE. : update the left transformation matrix Q;
+*          .FALSE.: do not update Q.
+*
+*  WANTZ   (input) LOGICAL
+*          .TRUE. : update the right transformation matrix Z;
+*          .FALSE.: do not update Z.
+*
+*  SELECT  (input) LOGICAL array, dimension (N)
+*          SELECT specifies the eigenvalues in the selected cluster.
+*          To select a real eigenvalue w(j), SELECT(j) must be set to
+*          .TRUE.. To select a complex conjugate pair of eigenvalues
+*          w(j) and w(j+1), corresponding to a 2-by-2 diagonal block,
+*          either SELECT(j) or SELECT(j+1) or both must be set to
+*          .TRUE.; a complex conjugate pair of eigenvalues must be
+*          either both included in the cluster or both excluded.
+*
+*  N       (input) INTEGER
+*          The order of the matrices A and B. N >= 0.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension(LDA,N)
+*          On entry, the upper quasi-triangular matrix A, with (A, B) in
+*          generalized real Schur canonical form.
+*          On exit, A is overwritten by the reordered matrix A.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,N).
+*
+*  B       (input/output) DOUBLE PRECISION array, dimension(LDB,N)
+*          On entry, the upper triangular matrix B, with (A, B) in
+*          generalized real Schur canonical form.
+*          On exit, B is overwritten by the reordered matrix B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B. LDB >= max(1,N).
+*
+*  ALPHAR  (output) DOUBLE PRECISION array, dimension (N)
+*  ALPHAI  (output) DOUBLE PRECISION array, dimension (N)
+*  BETA    (output) DOUBLE PRECISION array, dimension (N)
+*          On exit, (ALPHAR(j) + ALPHAI(j)*i)/BETA(j), j=1,...,N, will
+*          be the generalized eigenvalues.  ALPHAR(j) + ALPHAI(j)*i
+*          and BETA(j),j=1,...,N  are the diagonals of the complex Schur
+*          form (S,T) that would result if the 2-by-2 diagonal blocks of
+*          the real generalized Schur form of (A,B) were further reduced
+*          to triangular form using complex unitary transformations.
+*          If ALPHAI(j) is zero, then the j-th eigenvalue is real; if
+*          positive, then the j-th and (j+1)-st eigenvalues are a
+*          complex conjugate pair, with ALPHAI(j+1) negative.
+*
+*  Q       (input/output) DOUBLE PRECISION array, dimension (LDQ,N)
+*          On entry, if WANTQ = .TRUE., Q is an N-by-N matrix.
+*          On exit, Q has been postmultiplied by the left orthogonal
+*          transformation matrix which reorder (A, B); The leading M
+*          columns of Q form orthonormal bases for the specified pair of
+*          left eigenspaces (deflating subspaces).
+*          If WANTQ = .FALSE., Q is not referenced.
+*
+*  LDQ     (input) INTEGER
+*          The leading dimension of the array Q.  LDQ >= 1;
+*          and if WANTQ = .TRUE., LDQ >= N.
+*
+*  Z       (input/output) DOUBLE PRECISION array, dimension (LDZ,N)
+*          On entry, if WANTZ = .TRUE., Z is an N-by-N matrix.
+*          On exit, Z has been postmultiplied by the left orthogonal
+*          transformation matrix which reorder (A, B); The leading M
+*          columns of Z form orthonormal bases for the specified pair of
+*          left eigenspaces (deflating subspaces).
+*          If WANTZ = .FALSE., Z is not referenced.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z. LDZ >= 1;
+*          If WANTZ = .TRUE., LDZ >= N.
+*
+*  M       (output) INTEGER
+*          The dimension of the specified pair of left and right eigen-
+*          spaces (deflating subspaces). 0 <= M <= N.
+*
+*  PL, PR  (output) DOUBLE PRECISION
+*          If IJOB = 1, 4 or 5, PL, PR are lower bounds on the
+*          reciprocal of the norm of "projections" onto left and right
+*          eigenspaces with respect to the selected cluster.
+*          0 < PL, PR <= 1.
+*          If M = 0 or M = N, PL = PR  = 1.
+*          If IJOB = 0, 2 or 3, PL and PR are not referenced.
+*
+*  DIF     (output) DOUBLE PRECISION array, dimension (2).
+*          If IJOB >= 2, DIF(1:2) store the estimates of Difu and Difl.
+*          If IJOB = 2 or 4, DIF(1:2) are F-norm-based upper bounds on
+*          Difu and Difl. If IJOB = 3 or 5, DIF(1:2) are 1-norm-based
+*          estimates of Difu and Difl.
+*          If M = 0 or N, DIF(1:2) = F-norm([A, B]).
+*          If IJOB = 0 or 1, DIF is not referenced.
+*
+*  WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+*          IF IJOB = 0, WORK is not referenced.  Otherwise,
+*          on exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK. LWORK >=  4*N+16.
+*          If IJOB = 1, 2 or 4, LWORK >= MAX(4*N+16, 2*M*(N-M)).
+*          If IJOB = 3 or 5, LWORK >= MAX(4*N+16, 4*M*(N-M)).
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  IWORK   (workspace/output) INTEGER array, dimension (LIWORK)
+*          IF IJOB = 0, IWORK is not referenced.  Otherwise,
+*          on exit, if INFO = 0, IWORK(1) returns the optimal LIWORK.
+*
+*  LIWORK  (input) INTEGER
+*          The dimension of the array IWORK. LIWORK >= 1.
+*          If IJOB = 1, 2 or 4, LIWORK >=  N+6.
+*          If IJOB = 3 or 5, LIWORK >= MAX(2*M*(N-M), N+6).
+*
+*          If LIWORK = -1, then a workspace query is assumed; the
+*          routine only calculates the optimal size of the IWORK array,
+*          returns this value as the first entry of the IWORK array, and
+*          no error message related to LIWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*            =0: Successful exit.
+*            <0: If INFO = -i, the i-th argument had an illegal value.
+*            =1: Reordering of (A, B) failed because the transformed
+*                matrix pair (A, B) would be too far from generalized
+*                Schur form; the problem is very ill-conditioned.
+*                (A, B) may have been partially reordered.
+*                If requested, 0 is returned in DIF(*), PL and PR.
+*
+*  Further Details
+*  ===============
+*
+*  DTGSEN first collects the selected eigenvalues by computing
+*  orthogonal U and W that move them to the top left corner of (A, B).
+*  In other words, the selected eigenvalues are the eigenvalues of
+*  (A11, B11) in:
+*
+*                U'*(A, B)*W = (A11 A12) (B11 B12) n1
+*                              ( 0  A22),( 0  B22) n2
+*                                n1  n2    n1  n2
+*
+*  where N = n1+n2 and U' means the transpose of U. The first n1 columns
+*  of U and W span the specified pair of left and right eigenspaces
+*  (deflating subspaces) of (A, B).
+*
+*  If (A, B) has been obtained from the generalized real Schur
+*  decomposition of a matrix pair (C, D) = Q*(A, B)*Z', then the
+*  reordered generalized real Schur form of (C, D) is given by
+*
+*           (C, D) = (Q*U)*(U'*(A, B)*W)*(Z*W)',
+*
+*  and the first n1 columns of Q*U and Z*W span the corresponding
+*  deflating subspaces of (C, D) (Q and Z store Q*U and Z*W, resp.).
+*
+*  Note that if the selected eigenvalue is sufficiently ill-conditioned,
+*  then its value may differ significantly from its value before
+*  reordering.
+*
+*  The reciprocal condition numbers of the left and right eigenspaces
+*  spanned by the first n1 columns of U and W (or Q*U and Z*W) may
+*  be returned in DIF(1:2), corresponding to Difu and Difl, resp.
+*
+*  The Difu and Difl are defined as:
+*
+*       Difu[(A11, B11), (A22, B22)] = sigma-min( Zu )
+*  and
+*       Difl[(A11, B11), (A22, B22)] = Difu[(A22, B22), (A11, B11)],
+*
+*  where sigma-min(Zu) is the smallest singular value of the
+*  (2*n1*n2)-by-(2*n1*n2) matrix
+*
+*       Zu = [ kron(In2, A11)  -kron(A22', In1) ]
+*            [ kron(In2, B11)  -kron(B22', In1) ].
+*
+*  Here, Inx is the identity matrix of size nx and A22' is the
+*  transpose of A22. kron(X, Y) is the Kronecker product between
+*  the matrices X and Y.
+*
+*  When DIF(2) is small, small changes in (A, B) can cause large changes
+*  in the deflating subspace. An approximate (asymptotic) bound on the
+*  maximum angular error in the computed deflating subspaces is
+*
+*       EPS * norm((A, B)) / DIF(2),
+*
+*  where EPS is the machine precision.
+*
+*  The reciprocal norm of the projectors on the left and right
+*  eigenspaces associated with (A11, B11) may be returned in PL and PR.
+*  They are computed as follows. First we compute L and R so that
+*  P*(A, B)*Q is block diagonal, where
+*
+*       P = ( I -L ) n1           Q = ( I R ) n1
+*           ( 0  I ) n2    and        ( 0 I ) n2
+*             n1 n2                    n1 n2
+*
+*  and (L, R) is the solution to the generalized Sylvester equation
+*
+*       A11*R - L*A22 = -A12
+*       B11*R - L*B22 = -B12
+*
+*  Then PL = (F-norm(L)**2+1)**(-1/2) and PR = (F-norm(R)**2+1)**(-1/2).
+*  An approximate (asymptotic) bound on the average absolute error of
+*  the selected eigenvalues is
+*
+*       EPS * norm((A, B)) / PL.
+*
+*  There are also global error bounds which valid for perturbations up
+*  to a certain restriction:  A lower bound (x) on the smallest
+*  F-norm(E,F) for which an eigenvalue of (A11, B11) may move and
+*  coalesce with an eigenvalue of (A22, B22) under perturbation (E,F),
+*  (i.e. (A + E, B + F), is
+*
+*   x = min(Difu,Difl)/((1/(PL*PL)+1/(PR*PR))**(1/2)+2*max(1/PL,1/PR)).
+*
+*  An approximate bound on x can be computed from DIF(1:2), PL and PR.
+*
+*  If y = ( F-norm(E,F) / x) <= 1, the angles between the perturbed
+*  (L', R') and unperturbed (L, R) left and right deflating subspaces
+*  associated with the selected cluster in the (1,1)-blocks can be
+*  bounded as
+*
+*   max-angle(L, L') <= arctan( y * PL / (1 - y * (1 - PL * PL)**(1/2))
+*   max-angle(R, R') <= arctan( y * PR / (1 - y * (1 - PR * PR)**(1/2))
+*
+*  See LAPACK User's Guide section 4.11 or the following references
+*  for more information.
+*
+*  Note that if the default method for computing the Frobenius-norm-
+*  based estimate DIF is not wanted (see DLATDF), then the parameter
+*  IDIFJB (see below) should be changed from 3 to 4 (routine DLATDF
+*  (IJOB = 2 will be used)). See DTGSYL for more details.
+*
+*  Based on contributions by
+*     Bo Kagstrom and Peter Poromaa, Department of Computing Science,
+*     Umea University, S-901 87 Umea, Sweden.
+*
+*  References
+*  ==========
+*
+*  [1] B. Kagstrom; A Direct Method for Reordering Eigenvalues in the
+*      Generalized Real Schur Form of a Regular Matrix Pair (A, B), in
+*      M.S. Moonen et al (eds), Linear Algebra for Large Scale and
+*      Real-Time Applications, Kluwer Academic Publ. 1993, pp 195-218.
+*
+*  [2] B. Kagstrom and P. Poromaa; Computing Eigenspaces with Specified
+*      Eigenvalues of a Regular Matrix Pair (A, B) and Condition
+*      Estimation: Theory, Algorithms and Software,
+*      Report UMINF - 94.04, Department of Computing Science, Umea
+*      University, S-901 87 Umea, Sweden, 1994. Also as LAPACK Working
+*      Note 87. To appear in Numerical Algorithms, 1996.
+*
+*  [3] B. Kagstrom and P. Poromaa, LAPACK-Style Algorithms and Software
+*      for Solving the Generalized Sylvester Equation and Estimating the
+*      Separation between Regular Matrix Pairs, Report UMINF - 93.23,
+*      Department of Computing Science, Umea University, S-901 87 Umea,
+*      Sweden, December 1993, Revised April 1994, Also as LAPACK Working
+*      Note 75. To appear in ACM Trans. on Math. Software, Vol 22, No 1,
+*      1996.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            IDIFJB
+      PARAMETER          ( IDIFJB = 3 )
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY, PAIR, SWAP, WANTD, WANTD1, WANTD2,
+     $                   WANTP
+      INTEGER            I, IERR, IJB, K, KASE, KK, KS, LIWMIN, LWMIN,
+     $                   MN2, N1, N2
+      DOUBLE PRECISION   DSCALE, DSUM, EPS, RDSCAL, SMLNUM
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLACON, DLACPY, DLAG2, DLASSQ, DTGEXC, DTGSYL,
+     $                   XERBLA
+*     ..
+*     .. External Functions ..
+      DOUBLE PRECISION   DLAMCH
+      EXTERNAL           DLAMCH
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, SIGN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode and test the input parameters
+*
+      INFO = 0
+      LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 )
+*
+      IF( IJOB.LT.0 .OR. IJOB.GT.5 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -9
+      ELSE IF( LDQ.LT.1 .OR. ( WANTQ .AND. LDQ.LT.N ) ) THEN
+         INFO = -14
+      ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN
+         INFO = -16
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DTGSEN', -INFO )
+         RETURN
+      END IF
+*
+*     Get machine constants
+*
+      EPS = DLAMCH( 'P' )
+      SMLNUM = DLAMCH( 'S' ) / EPS
+      IERR = 0
+*
+      WANTP = IJOB.EQ.1 .OR. IJOB.GE.4
+      WANTD1 = IJOB.EQ.2 .OR. IJOB.EQ.4
+      WANTD2 = IJOB.EQ.3 .OR. IJOB.EQ.5
+      WANTD = WANTD1 .OR. WANTD2
+*
+*     Set M to the dimension of the specified pair of deflating
+*     subspaces.
+*
+      M = 0
+      PAIR = .FALSE.
+      DO 10 K = 1, N
+         IF( PAIR ) THEN
+            PAIR = .FALSE.
+         ELSE
+            IF( K.LT.N ) THEN
+               IF( A( K+1, K ).EQ.ZERO ) THEN
+                  IF( SELECT( K ) )
+     $               M = M + 1
+               ELSE
+                  PAIR = .TRUE.
+                  IF( SELECT( K ) .OR. SELECT( K+1 ) )
+     $               M = M + 2
+               END IF
+            ELSE
+               IF( SELECT( N ) )
+     $            M = M + 1
+            END IF
+         END IF
+   10 CONTINUE
+*
+      IF( IJOB.EQ.1 .OR. IJOB.EQ.2 .OR. IJOB.EQ.4 ) THEN
+         LWMIN = MAX( 1, 4*N+16, 2*M*( N-M ) )
+         LIWMIN = MAX( 1, N+6 )
+      ELSE IF( IJOB.EQ.3 .OR. IJOB.EQ.5 ) THEN
+         LWMIN = MAX( 1, 4*N+16, 4*M*( N-M ) )
+         LIWMIN = MAX( 1, 2*M*( N-M ), N+6 )
+      ELSE
+         LWMIN = MAX( 1, 4*N+16 )
+         LIWMIN = 1
+      END IF
+*
+      WORK( 1 ) = LWMIN
+      IWORK( 1 ) = LIWMIN
+*
+      IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -22
+      ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -24
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DTGSEN', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( M.EQ.N .OR. M.EQ.0 ) THEN
+         IF( WANTP ) THEN
+            PL = ONE
+            PR = ONE
+         END IF
+         IF( WANTD ) THEN
+            DSCALE = ZERO
+            DSUM = ONE
+            DO 20 I = 1, N
+               CALL DLASSQ( N, A( 1, I ), 1, DSCALE, DSUM )
+               CALL DLASSQ( N, B( 1, I ), 1, DSCALE, DSUM )
+   20       CONTINUE
+            DIF( 1 ) = DSCALE*SQRT( DSUM )
+            DIF( 2 ) = DIF( 1 )
+         END IF
+         GO TO 60
+      END IF
+*
+*     Collect the selected blocks at the top-left corner of (A, B).
+*
+      KS = 0
+      PAIR = .FALSE.
+      DO 30 K = 1, N
+         IF( PAIR ) THEN
+            PAIR = .FALSE.
+         ELSE
+*
+            SWAP = SELECT( K )
+            IF( K.LT.N ) THEN
+               IF( A( K+1, K ).NE.ZERO ) THEN
+                  PAIR = .TRUE.
+                  SWAP = SWAP .OR. SELECT( K+1 )
+               END IF
+            END IF
+*
+            IF( SWAP ) THEN
+               KS = KS + 1
+*
+*              Swap the K-th block to position KS.
+*              Perform the reordering of diagonal blocks in (A, B)
+*              by orthogonal transformation matrices and update
+*              Q and Z accordingly (if requested):
+*
+               KK = K
+               IF( K.NE.KS )
+     $            CALL DTGEXC( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ,
+     $                         Z, LDZ, KK, KS, WORK, LWORK, IERR )
+*
+               IF( IERR.GT.0 ) THEN
+*
+*                 Swap is rejected: exit.
+*
+                  INFO = 1
+                  IF( WANTP ) THEN
+                     PL = ZERO
+                     PR = ZERO
+                  END IF
+                  IF( WANTD ) THEN
+                     DIF( 1 ) = ZERO
+                     DIF( 2 ) = ZERO
+                  END IF
+                  GO TO 60
+               END IF
+*
+               IF( PAIR )
+     $            KS = KS + 1
+            END IF
+         END IF
+   30 CONTINUE
+      IF( WANTP ) THEN
+*
+*        Solve generalized Sylvester equation for R and L
+*        and compute PL and PR.
+*
+         N1 = M
+         N2 = N - M
+         I = N1 + 1
+         IJB = 0
+         CALL DLACPY( 'Full', N1, N2, A( 1, I ), LDA, WORK, N1 )
+         CALL DLACPY( 'Full', N1, N2, B( 1, I ), LDB, WORK( N1*N2+1 ),
+     $                N1 )
+         CALL DTGSYL( 'N', IJB, N1, N2, A, LDA, A( I, I ), LDA, WORK,
+     $                N1, B, LDB, B( I, I ), LDB, WORK( N1*N2+1 ), N1,
+     $                DSCALE, DIF( 1 ), WORK( N1*N2*2+1 ),
+     $                LWORK-2*N1*N2, IWORK, IERR )
+*
+*        Estimate the reciprocal of norms of "projections" onto left
+*        and right eigenspaces.
+*
+         RDSCAL = ZERO
+         DSUM = ONE
+         CALL DLASSQ( N1*N2, WORK, 1, RDSCAL, DSUM )
+         PL = RDSCAL*SQRT( DSUM )
+         IF( PL.EQ.ZERO ) THEN
+            PL = ONE
+         ELSE
+            PL = DSCALE / ( SQRT( DSCALE*DSCALE / PL+PL )*SQRT( PL ) )
+         END IF
+         RDSCAL = ZERO
+         DSUM = ONE
+         CALL DLASSQ( N1*N2, WORK( N1*N2+1 ), 1, RDSCAL, DSUM )
+         PR = RDSCAL*SQRT( DSUM )
+         IF( PR.EQ.ZERO ) THEN
+            PR = ONE
+         ELSE
+            PR = DSCALE / ( SQRT( DSCALE*DSCALE / PR+PR )*SQRT( PR ) )
+         END IF
+      END IF
+*
+      IF( WANTD ) THEN
+*
+*        Compute estimates of Difu and Difl.
+*
+         IF( WANTD1 ) THEN
+            N1 = M
+            N2 = N - M
+            I = N1 + 1
+            IJB = IDIFJB
+*
+*           Frobenius norm-based Difu-estimate.
+*
+            CALL DTGSYL( 'N', IJB, N1, N2, A, LDA, A( I, I ), LDA, WORK,
+     $                   N1, B, LDB, B( I, I ), LDB, WORK( N1*N2+1 ),
+     $                   N1, DSCALE, DIF( 1 ), WORK( 2*N1*N2+1 ),
+     $                   LWORK-2*N1*N2, IWORK, IERR )
+*
+*           Frobenius norm-based Difl-estimate.
+*
+            CALL DTGSYL( 'N', IJB, N2, N1, A( I, I ), LDA, A, LDA, WORK,
+     $                   N2, B( I, I ), LDB, B, LDB, WORK( N1*N2+1 ),
+     $                   N2, DSCALE, DIF( 2 ), WORK( 2*N1*N2+1 ),
+     $                   LWORK-2*N1*N2, IWORK, IERR )
+         ELSE
+*
+*
+*           Compute 1-norm-based estimates of Difu and Difl using
+*           reversed communication with DLACON. In each step a
+*           generalized Sylvester equation or a transposed variant
+*           is solved.
+*
+            KASE = 0
+            N1 = M
+            N2 = N - M
+            I = N1 + 1
+            IJB = 0
+            MN2 = 2*N1*N2
+*
+*           1-norm-based estimate of Difu.
+*
+   40       CONTINUE
+            CALL DLACON( MN2, WORK( MN2+1 ), WORK, IWORK, DIF( 1 ),
+     $                   KASE )
+            IF( KASE.NE.0 ) THEN
+               IF( KASE.EQ.1 ) THEN
+*
+*                 Solve generalized Sylvester equation.
+*
+                  CALL DTGSYL( 'N', IJB, N1, N2, A, LDA, A( I, I ), LDA,
+     $                         WORK, N1, B, LDB, B( I, I ), LDB,
+     $                         WORK( N1*N2+1 ), N1, DSCALE, DIF( 1 ),
+     $                         WORK( 2*N1*N2+1 ), LWORK-2*N1*N2, IWORK,
+     $                         IERR )
+               ELSE
+*
+*                 Solve the transposed variant.
+*
+                  CALL DTGSYL( 'T', IJB, N1, N2, A, LDA, A( I, I ), LDA,
+     $                         WORK, N1, B, LDB, B( I, I ), LDB,
+     $                         WORK( N1*N2+1 ), N1, DSCALE, DIF( 1 ),
+     $                         WORK( 2*N1*N2+1 ), LWORK-2*N1*N2, IWORK,
+     $                         IERR )
+               END IF
+               GO TO 40
+            END IF
+            DIF( 1 ) = DSCALE / DIF( 1 )
+*
+*           1-norm-based estimate of Difl.
+*
+   50       CONTINUE
+            CALL DLACON( MN2, WORK( MN2+1 ), WORK, IWORK, DIF( 2 ),
+     $                   KASE )
+            IF( KASE.NE.0 ) THEN
+               IF( KASE.EQ.1 ) THEN
+*
+*                 Solve generalized Sylvester equation.
+*
+                  CALL DTGSYL( 'N', IJB, N2, N1, A( I, I ), LDA, A, LDA,
+     $                         WORK, N2, B( I, I ), LDB, B, LDB,
+     $                         WORK( N1*N2+1 ), N2, DSCALE, DIF( 2 ),
+     $                         WORK( 2*N1*N2+1 ), LWORK-2*N1*N2, IWORK,
+     $                         IERR )
+               ELSE
+*
+*                 Solve the transposed variant.
+*
+                  CALL DTGSYL( 'T', IJB, N2, N1, A( I, I ), LDA, A, LDA,
+     $                         WORK, N2, B( I, I ), LDB, B, LDB,
+     $                         WORK( N1*N2+1 ), N2, DSCALE, DIF( 2 ),
+     $                         WORK( 2*N1*N2+1 ), LWORK-2*N1*N2, IWORK,
+     $                         IERR )
+               END IF
+               GO TO 50
+            END IF
+            DIF( 2 ) = DSCALE / DIF( 2 )
+*
+         END IF
+      END IF
+*
+   60 CONTINUE
+*
+*     Compute generalized eigenvalues of reordered pair (A, B) and
+*     normalize the generalized Schur form.
+*
+      PAIR = .FALSE.
+      DO 80 K = 1, N
+         IF( PAIR ) THEN
+            PAIR = .FALSE.
+         ELSE
+*
+            IF( K.LT.N ) THEN
+               IF( A( K+1, K ).NE.ZERO ) THEN
+                  PAIR = .TRUE.
+               END IF
+            END IF
+*
+            IF( PAIR ) THEN
+*
+*             Compute the eigenvalue(s) at position K.
+*
+               WORK( 1 ) = A( K, K )
+               WORK( 2 ) = A( K+1, K )
+               WORK( 3 ) = A( K, K+1 )
+               WORK( 4 ) = A( K+1, K+1 )
+               WORK( 5 ) = B( K, K )
+               WORK( 6 ) = B( K+1, K )
+               WORK( 7 ) = B( K, K+1 )
+               WORK( 8 ) = B( K+1, K+1 )
+               CALL DLAG2( WORK, 2, WORK( 5 ), 2, SMLNUM*EPS, BETA( K ),
+     $                     BETA( K+1 ), ALPHAR( K ), ALPHAR( K+1 ),
+     $                     ALPHAI( K ) )
+               ALPHAI( K+1 ) = -ALPHAI( K )
+*
+            ELSE
+*
+               IF( SIGN( ONE, B( K, K ) ).LT.ZERO ) THEN
+*
+*                 If B(K,K) is negative, make it positive
+*
+                  DO 70 I = 1, N
+                     A( K, I ) = -A( K, I )
+                     B( K, I ) = -B( K, I )
+                     Q( I, K ) = -Q( I, K )
+   70             CONTINUE
+               END IF
+*
+               ALPHAR( K ) = A( K, K )
+               ALPHAI( K ) = ZERO
+               BETA( K ) = B( K, K )
+*
+            END IF
+         END IF
+   80 CONTINUE
+*
+      WORK( 1 ) = LWMIN
+      IWORK( 1 ) = LIWMIN
+*
+      RETURN
+*
+*     End of DTGSEN
+*
+      END
diff --git a/costa/native/external/lapack/dtgsja.f b/costa/native/external/lapack/dtgsja.f
new file mode 100644
index 000000000..2518d7a6b
--- /dev/null
+++ b/costa/native/external/lapack/dtgsja.f
@@ -0,0 +1,516 @@
+      SUBROUTINE DTGSJA( JOBU, JOBV, JOBQ, M, P, N, K, L, A, LDA, B,
+     $                   LDB, TOLA, TOLB, ALPHA, BETA, U, LDU, V, LDV,
+     $                   Q, LDQ, WORK, NCYCLE, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBQ, JOBU, JOBV
+      INTEGER            INFO, K, L, LDA, LDB, LDQ, LDU, LDV, M, N,
+     $                   NCYCLE, P
+      DOUBLE PRECISION   TOLA, TOLB
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), ALPHA( * ), B( LDB, * ),
+     $                   BETA( * ), Q( LDQ, * ), U( LDU, * ),
+     $                   V( LDV, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DTGSJA computes the generalized singular value decomposition (GSVD)
+*  of two real upper triangular (or trapezoidal) matrices A and B.
+*
+*  On entry, it is assumed that matrices A and B have the following
+*  forms, which may be obtained by the preprocessing subroutine DGGSVP
+*  from a general M-by-N matrix A and P-by-N matrix B:
+*
+*               N-K-L  K    L
+*     A =    K ( 0    A12  A13 ) if M-K-L >= 0;
+*            L ( 0     0   A23 )
+*        M-K-L ( 0     0    0  )
+*
+*             N-K-L  K    L
+*     A =  K ( 0    A12  A13 ) if M-K-L < 0;
+*        M-K ( 0     0   A23 )
+*
+*             N-K-L  K    L
+*     B =  L ( 0     0   B13 )
+*        P-L ( 0     0    0  )
+*
+*  where the K-by-K matrix A12 and L-by-L matrix B13 are nonsingular
+*  upper triangular; A23 is L-by-L upper triangular if M-K-L >= 0,
+*  otherwise A23 is (M-K)-by-L upper trapezoidal.
+*
+*  On exit,
+*
+*              U'*A*Q = D1*( 0 R ),    V'*B*Q = D2*( 0 R ),
+*
+*  where U, V and Q are orthogonal matrices, Z' denotes the transpose
+*  of Z, R is a nonsingular upper triangular matrix, and D1 and D2 are
+*  ``diagonal'' matrices, which are of the following structures:
+*
+*  If M-K-L >= 0,
+*
+*                      K  L
+*         D1 =     K ( I  0 )
+*                  L ( 0  C )
+*              M-K-L ( 0  0 )
+*
+*                    K  L
+*         D2 = L   ( 0  S )
+*              P-L ( 0  0 )
+*
+*                 N-K-L  K    L
+*    ( 0 R ) = K (  0   R11  R12 ) K
+*              L (  0    0   R22 ) L
+*
+*  where
+*
+*    C = diag( ALPHA(K+1), ... , ALPHA(K+L) ),
+*    S = diag( BETA(K+1),  ... , BETA(K+L) ),
+*    C**2 + S**2 = I.
+*
+*    R is stored in A(1:K+L,N-K-L+1:N) on exit.
+*
+*  If M-K-L < 0,
+*
+*                 K M-K K+L-M
+*      D1 =   K ( I  0    0   )
+*           M-K ( 0  C    0   )
+*
+*                   K M-K K+L-M
+*      D2 =   M-K ( 0  S    0   )
+*           K+L-M ( 0  0    I   )
+*             P-L ( 0  0    0   )
+*
+*                 N-K-L  K   M-K  K+L-M
+* ( 0 R ) =    K ( 0    R11  R12  R13  )
+*            M-K ( 0     0   R22  R23  )
+*          K+L-M ( 0     0    0   R33  )
+*
+*  where
+*  C = diag( ALPHA(K+1), ... , ALPHA(M) ),
+*  S = diag( BETA(K+1),  ... , BETA(M) ),
+*  C**2 + S**2 = I.
+*
+*  R = ( R11 R12 R13 ) is stored in A(1:M, N-K-L+1:N) and R33 is stored
+*      (  0  R22 R23 )
+*  in B(M-K+1:L,N+M-K-L+1:N) on exit.
+*
+*  The computation of the orthogonal transformation matrices U, V or Q
+*  is optional.  These matrices may either be formed explicitly, or they
+*  may be postmultiplied into input matrices U1, V1, or Q1.
+*
+*  Arguments
+*  =========
+*
+*  JOBU    (input) CHARACTER*1
+*          = 'U':  U must contain an orthogonal matrix U1 on entry, and
+*                  the product U1*U is returned;
+*          = 'I':  U is initialized to the unit matrix, and the
+*                  orthogonal matrix U is returned;
+*          = 'N':  U is not computed.
+*
+*  JOBV    (input) CHARACTER*1
+*          = 'V':  V must contain an orthogonal matrix V1 on entry, and
+*                  the product V1*V is returned;
+*          = 'I':  V is initialized to the unit matrix, and the
+*                  orthogonal matrix V is returned;
+*          = 'N':  V is not computed.
+*
+*  JOBQ    (input) CHARACTER*1
+*          = 'Q':  Q must contain an orthogonal matrix Q1 on entry, and
+*                  the product Q1*Q is returned;
+*          = 'I':  Q is initialized to the unit matrix, and the
+*                  orthogonal matrix Q is returned;
+*          = 'N':  Q is not computed.
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  P       (input) INTEGER
+*          The number of rows of the matrix B.  P >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrices A and B.  N >= 0.
+*
+*  K       (input) INTEGER
+*  L       (input) INTEGER
+*          K and L specify the subblocks in the input matrices A and B:
+*          A23 = A(K+1:MIN(K+L,M),N-L+1:N) and B13 = B(1:L,N-L+1:N)
+*          of A and B, whose GSVD is going to be computed by DTGSJA.
+*          See Further details.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+*          On entry, the M-by-N matrix A.
+*          On exit, A(N-K+1:N,1:MIN(K+L,M) ) contains the triangular
+*          matrix R or part of R.  See Purpose for details.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,M).
+*
+*  B       (input/output) DOUBLE PRECISION array, dimension (LDB,N)
+*          On entry, the P-by-N matrix B.
+*          On exit, if necessary, B(M-K+1:L,N+M-K-L+1:N) contains
+*          a part of R.  See Purpose for details.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B. LDB >= max(1,P).
+*
+*  TOLA    (input) DOUBLE PRECISION
+*  TOLB    (input) DOUBLE PRECISION
+*          TOLA and TOLB are the convergence criteria for the Jacobi-
+*          Kogbetliantz iteration procedure. Generally, they are the
+*          same as used in the preprocessing step, say
+*              TOLA = max(M,N)*norm(A)*MAZHEPS,
+*              TOLB = max(P,N)*norm(B)*MAZHEPS.
+*
+*  ALPHA   (output) DOUBLE PRECISION array, dimension (N)
+*  BETA    (output) DOUBLE PRECISION array, dimension (N)
+*          On exit, ALPHA and BETA contain the generalized singular
+*          value pairs of A and B;
+*            ALPHA(1:K) = 1,
+*            BETA(1:K)  = 0,
+*          and if M-K-L >= 0,
+*            ALPHA(K+1:K+L) = diag(C),
+*            BETA(K+1:K+L)  = diag(S),
+*          or if M-K-L < 0,
+*            ALPHA(K+1:M)= C, ALPHA(M+1:K+L)= 0
+*            BETA(K+1:M) = S, BETA(M+1:K+L) = 1.
+*          Furthermore, if K+L < N,
+*            ALPHA(K+L+1:N) = 0 and
+*            BETA(K+L+1:N)  = 0.
+*
+*  U       (input/output) DOUBLE PRECISION array, dimension (LDU,M)
+*          On entry, if JOBU = 'U', U must contain a matrix U1 (usually
+*          the orthogonal matrix returned by DGGSVP).
+*          On exit,
+*          if JOBU = 'I', U contains the orthogonal matrix U;
+*          if JOBU = 'U', U contains the product U1*U.
+*          If JOBU = 'N', U is not referenced.
+*
+*  LDU     (input) INTEGER
+*          The leading dimension of the array U. LDU >= max(1,M) if
+*          JOBU = 'U'; LDU >= 1 otherwise.
+*
+*  V       (input/output) DOUBLE PRECISION array, dimension (LDV,P)
+*          On entry, if JOBV = 'V', V must contain a matrix V1 (usually
+*          the orthogonal matrix returned by DGGSVP).
+*          On exit,
+*          if JOBV = 'I', V contains the orthogonal matrix V;
+*          if JOBV = 'V', V contains the product V1*V.
+*          If JOBV = 'N', V is not referenced.
+*
+*  LDV     (input) INTEGER
+*          The leading dimension of the array V. LDV >= max(1,P) if
+*          JOBV = 'V'; LDV >= 1 otherwise.
+*
+*  Q       (input/output) DOUBLE PRECISION array, dimension (LDQ,N)
+*          On entry, if JOBQ = 'Q', Q must contain a matrix Q1 (usually
+*          the orthogonal matrix returned by DGGSVP).
+*          On exit,
+*          if JOBQ = 'I', Q contains the orthogonal matrix Q;
+*          if JOBQ = 'Q', Q contains the product Q1*Q.
+*          If JOBQ = 'N', Q is not referenced.
+*
+*  LDQ     (input) INTEGER
+*          The leading dimension of the array Q. LDQ >= max(1,N) if
+*          JOBQ = 'Q'; LDQ >= 1 otherwise.
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (2*N)
+*
+*  NCYCLE  (output) INTEGER
+*          The number of cycles required for convergence.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          = 1:  the procedure does not converge after MAXIT cycles.
+*
+*  Internal Parameters
+*  ===================
+*
+*  MAXIT   INTEGER
+*          MAXIT specifies the total loops that the iterative procedure
+*          may take. If after MAXIT cycles, the routine fails to
+*          converge, we return INFO = 1.
+*
+*  Further Details
+*  ===============
+*
+*  DTGSJA essentially uses a variant of Kogbetliantz algorithm to reduce
+*  min(L,M-K)-by-L triangular (or trapezoidal) matrix A23 and L-by-L
+*  matrix B13 to the form:
+*
+*           U1'*A13*Q1 = C1*R1; V1'*B13*Q1 = S1*R1,
+*
+*  where U1, V1 and Q1 are orthogonal matrix, and Z' is the transpose
+*  of Z.  C1 and S1 are diagonal matrices satisfying
+*
+*                C1**2 + S1**2 = I,
+*
+*  and R1 is an L-by-L nonsingular upper triangular matrix.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            MAXIT
+      PARAMETER          ( MAXIT = 40 )
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+*
+      LOGICAL            INITQ, INITU, INITV, UPPER, WANTQ, WANTU, WANTV
+      INTEGER            I, J, KCYCLE
+      DOUBLE PRECISION   A1, A2, A3, B1, B2, B3, CSQ, CSU, CSV, ERROR,
+     $                   GAMMA, RWK, SNQ, SNU, SNV, SSMIN
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DCOPY, DLAGS2, DLAPLL, DLARTG, DLASET, DROT,
+     $                   DSCAL, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode and test the input parameters
+*
+      INITU = LSAME( JOBU, 'I' )
+      WANTU = INITU .OR. LSAME( JOBU, 'U' )
+*
+      INITV = LSAME( JOBV, 'I' )
+      WANTV = INITV .OR. LSAME( JOBV, 'V' )
+*
+      INITQ = LSAME( JOBQ, 'I' )
+      WANTQ = INITQ .OR. LSAME( JOBQ, 'Q' )
+*
+      INFO = 0
+      IF( .NOT.( INITU .OR. WANTU .OR. LSAME( JOBU, 'N' ) ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( INITV .OR. WANTV .OR. LSAME( JOBV, 'N' ) ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.( INITQ .OR. WANTQ .OR. LSAME( JOBQ, 'N' ) ) ) THEN
+         INFO = -3
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( P.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -6
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -10
+      ELSE IF( LDB.LT.MAX( 1, P ) ) THEN
+         INFO = -12
+      ELSE IF( LDU.LT.1 .OR. ( WANTU .AND. LDU.LT.M ) ) THEN
+         INFO = -18
+      ELSE IF( LDV.LT.1 .OR. ( WANTV .AND. LDV.LT.P ) ) THEN
+         INFO = -20
+      ELSE IF( LDQ.LT.1 .OR. ( WANTQ .AND. LDQ.LT.N ) ) THEN
+         INFO = -22
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DTGSJA', -INFO )
+         RETURN
+      END IF
+*
+*     Initialize U, V and Q, if necessary
+*
+      IF( INITU )
+     $   CALL DLASET( 'Full', M, M, ZERO, ONE, U, LDU )
+      IF( INITV )
+     $   CALL DLASET( 'Full', P, P, ZERO, ONE, V, LDV )
+      IF( INITQ )
+     $   CALL DLASET( 'Full', N, N, ZERO, ONE, Q, LDQ )
+*
+*     Loop until convergence
+*
+      UPPER = .FALSE.
+      DO 40 KCYCLE = 1, MAXIT
+*
+         UPPER = .NOT.UPPER
+*
+         DO 20 I = 1, L - 1
+            DO 10 J = I + 1, L
+*
+               A1 = ZERO
+               A2 = ZERO
+               A3 = ZERO
+               IF( K+I.LE.M )
+     $            A1 = A( K+I, N-L+I )
+               IF( K+J.LE.M )
+     $            A3 = A( K+J, N-L+J )
+*
+               B1 = B( I, N-L+I )
+               B3 = B( J, N-L+J )
+*
+               IF( UPPER ) THEN
+                  IF( K+I.LE.M )
+     $               A2 = A( K+I, N-L+J )
+                  B2 = B( I, N-L+J )
+               ELSE
+                  IF( K+J.LE.M )
+     $               A2 = A( K+J, N-L+I )
+                  B2 = B( J, N-L+I )
+               END IF
+*
+               CALL DLAGS2( UPPER, A1, A2, A3, B1, B2, B3, CSU, SNU,
+     $                      CSV, SNV, CSQ, SNQ )
+*
+*              Update (K+I)-th and (K+J)-th rows of matrix A: U'*A
+*
+               IF( K+J.LE.M )
+     $            CALL DROT( L, A( K+J, N-L+1 ), LDA, A( K+I, N-L+1 ),
+     $                       LDA, CSU, SNU )
+*
+*              Update I-th and J-th rows of matrix B: V'*B
+*
+               CALL DROT( L, B( J, N-L+1 ), LDB, B( I, N-L+1 ), LDB,
+     $                    CSV, SNV )
+*
+*              Update (N-L+I)-th and (N-L+J)-th columns of matrices
+*              A and B: A*Q and B*Q
+*
+               CALL DROT( MIN( K+L, M ), A( 1, N-L+J ), 1,
+     $                    A( 1, N-L+I ), 1, CSQ, SNQ )
+*
+               CALL DROT( L, B( 1, N-L+J ), 1, B( 1, N-L+I ), 1, CSQ,
+     $                    SNQ )
+*
+               IF( UPPER ) THEN
+                  IF( K+I.LE.M )
+     $               A( K+I, N-L+J ) = ZERO
+                  B( I, N-L+J ) = ZERO
+               ELSE
+                  IF( K+J.LE.M )
+     $               A( K+J, N-L+I ) = ZERO
+                  B( J, N-L+I ) = ZERO
+               END IF
+*
+*              Update orthogonal matrices U, V, Q, if desired.
+*
+               IF( WANTU .AND. K+J.LE.M )
+     $            CALL DROT( M, U( 1, K+J ), 1, U( 1, K+I ), 1, CSU,
+     $                       SNU )
+*
+               IF( WANTV )
+     $            CALL DROT( P, V( 1, J ), 1, V( 1, I ), 1, CSV, SNV )
+*
+               IF( WANTQ )
+     $            CALL DROT( N, Q( 1, N-L+J ), 1, Q( 1, N-L+I ), 1, CSQ,
+     $                       SNQ )
+*
+   10       CONTINUE
+   20    CONTINUE
+*
+         IF( .NOT.UPPER ) THEN
+*
+*           The matrices A13 and B13 were lower triangular at the start
+*           of the cycle, and are now upper triangular.
+*
+*           Convergence test: test the parallelism of the corresponding
+*           rows of A and B.
+*
+            ERROR = ZERO
+            DO 30 I = 1, MIN( L, M-K )
+               CALL DCOPY( L-I+1, A( K+I, N-L+I ), LDA, WORK, 1 )
+               CALL DCOPY( L-I+1, B( I, N-L+I ), LDB, WORK( L+1 ), 1 )
+               CALL DLAPLL( L-I+1, WORK, 1, WORK( L+1 ), 1, SSMIN )
+               ERROR = MAX( ERROR, SSMIN )
+   30       CONTINUE
+*
+            IF( ABS( ERROR ).LE.MIN( TOLA, TOLB ) )
+     $         GO TO 50
+         END IF
+*
+*        End of cycle loop
+*
+   40 CONTINUE
+*
+*     The algorithm has not converged after MAXIT cycles.
+*
+      INFO = 1
+      GO TO 100
+*
+   50 CONTINUE
+*
+*     If ERROR <= MIN(TOLA,TOLB), then the algorithm has converged.
+*     Compute the generalized singular value pairs (ALPHA, BETA), and
+*     set the triangular matrix R to array A.
+*
+      DO 60 I = 1, K
+         ALPHA( I ) = ONE
+         BETA( I ) = ZERO
+   60 CONTINUE
+*
+      DO 70 I = 1, MIN( L, M-K )
+*
+         A1 = A( K+I, N-L+I )
+         B1 = B( I, N-L+I )
+*
+         IF( A1.NE.ZERO ) THEN
+            GAMMA = B1 / A1
+*
+*           change sign if necessary
+*
+            IF( GAMMA.LT.ZERO ) THEN
+               CALL DSCAL( L-I+1, -ONE, B( I, N-L+I ), LDB )
+               IF( WANTV )
+     $            CALL DSCAL( P, -ONE, V( 1, I ), 1 )
+            END IF
+*
+            CALL DLARTG( ABS( GAMMA ), ONE, BETA( K+I ), ALPHA( K+I ),
+     $                   RWK )
+*
+            IF( ALPHA( K+I ).GE.BETA( K+I ) ) THEN
+               CALL DSCAL( L-I+1, ONE / ALPHA( K+I ), A( K+I, N-L+I ),
+     $                     LDA )
+            ELSE
+               CALL DSCAL( L-I+1, ONE / BETA( K+I ), B( I, N-L+I ),
+     $                     LDB )
+               CALL DCOPY( L-I+1, B( I, N-L+I ), LDB, A( K+I, N-L+I ),
+     $                     LDA )
+            END IF
+*
+         ELSE
+*
+            ALPHA( K+I ) = ZERO
+            BETA( K+I ) = ONE
+            CALL DCOPY( L-I+1, B( I, N-L+I ), LDB, A( K+I, N-L+I ),
+     $                  LDA )
+*
+         END IF
+*
+   70 CONTINUE
+*
+*     Post-assignment
+*
+      DO 80 I = M + 1, K + L
+         ALPHA( I ) = ZERO
+         BETA( I ) = ONE
+   80 CONTINUE
+*
+      IF( K+L.LT.N ) THEN
+         DO 90 I = K + L + 1, N
+            ALPHA( I ) = ZERO
+            BETA( I ) = ZERO
+   90    CONTINUE
+      END IF
+*
+  100 CONTINUE
+      NCYCLE = KCYCLE
+      RETURN
+*
+*     End of DTGSJA
+*
+      END
diff --git a/costa/native/external/lapack/dtgsna.f b/costa/native/external/lapack/dtgsna.f
new file mode 100644
index 000000000..0b6abb01a
--- /dev/null
+++ b/costa/native/external/lapack/dtgsna.f
@@ -0,0 +1,585 @@
+      SUBROUTINE DTGSNA( JOB, HOWMNY, SELECT, N, A, LDA, B, LDB, VL,
+     $                   LDVL, VR, LDVR, S, DIF, MM, M, WORK, LWORK,
+     $                   IWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          HOWMNY, JOB
+      INTEGER            INFO, LDA, LDB, LDVL, LDVR, LWORK, M, MM, N
+*     ..
+*     .. Array Arguments ..
+      LOGICAL            SELECT( * )
+      INTEGER            IWORK( * )
+      DOUBLE PRECISION   A( LDA, * ), B( LDB, * ), DIF( * ), S( * ),
+     $                   VL( LDVL, * ), VR( LDVR, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DTGSNA estimates reciprocal condition numbers for specified
+*  eigenvalues and/or eigenvectors of a matrix pair (A, B) in
+*  generalized real Schur canonical form (or of any matrix pair
+*  (Q*A*Z', Q*B*Z') with orthogonal matrices Q and Z, where
+*  Z' denotes the transpose of Z.
+*
+*  (A, B) must be in generalized real Schur form (as returned by DGGES),
+*  i.e. A is block upper triangular with 1-by-1 and 2-by-2 diagonal
+*  blocks. B is upper triangular.
+*
+*
+*  Arguments
+*  =========
+*
+*  JOB     (input) CHARACTER*1
+*          Specifies whether condition numbers are required for
+*          eigenvalues (S) or eigenvectors (DIF):
+*          = 'E': for eigenvalues only (S);
+*          = 'V': for eigenvectors only (DIF);
+*          = 'B': for both eigenvalues and eigenvectors (S and DIF).
+*
+*  HOWMNY  (input) CHARACTER*1
+*          = 'A': compute condition numbers for all eigenpairs;
+*          = 'S': compute condition numbers for selected eigenpairs
+*                 specified by the array SELECT.
+*
+*  SELECT  (input) LOGICAL array, dimension (N)
+*          If HOWMNY = 'S', SELECT specifies the eigenpairs for which
+*          condition numbers are required. To select condition numbers
+*          for the eigenpair corresponding to a real eigenvalue w(j),
+*          SELECT(j) must be set to .TRUE.. To select condition numbers
+*          corresponding to a complex conjugate pair of eigenvalues w(j)
+*          and w(j+1), either SELECT(j) or SELECT(j+1) or both, must be
+*          set to .TRUE..
+*          If HOWMNY = 'A', SELECT is not referenced.
+*
+*  N       (input) INTEGER
+*          The order of the square matrix pair (A, B). N >= 0.
+*
+*  A       (input) DOUBLE PRECISION array, dimension (LDA,N)
+*          The upper quasi-triangular matrix A in the pair (A,B).
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,N).
+*
+*  B       (input) DOUBLE PRECISION array, dimension (LDB,N)
+*          The upper triangular matrix B in the pair (A,B).
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B. LDB >= max(1,N).
+*
+*  VL      (input) DOUBLE PRECISION array, dimension (LDVL,M)
+*          If JOB = 'E' or 'B', VL must contain left eigenvectors of
+*          (A, B), corresponding to the eigenpairs specified by HOWMNY
+*          and SELECT. The eigenvectors must be stored in consecutive
+*          columns of VL, as returned by DTGEVC.
+*          If JOB = 'V', VL is not referenced.
+*
+*  LDVL    (input) INTEGER
+*          The leading dimension of the array VL. LDVL >= 1.
+*          If JOB = 'E' or 'B', LDVL >= N.
+*
+*  VR      (input) DOUBLE PRECISION array, dimension (LDVR,M)
+*          If JOB = 'E' or 'B', VR must contain right eigenvectors of
+*          (A, B), corresponding to the eigenpairs specified by HOWMNY
+*          and SELECT. The eigenvectors must be stored in consecutive
+*          columns ov VR, as returned by DTGEVC.
+*          If JOB = 'V', VR is not referenced.
+*
+*  LDVR    (input) INTEGER
+*          The leading dimension of the array VR. LDVR >= 1.
+*          If JOB = 'E' or 'B', LDVR >= N.
+*
+*  S       (output) DOUBLE PRECISION array, dimension (MM)
+*          If JOB = 'E' or 'B', the reciprocal condition numbers of the
+*          selected eigenvalues, stored in consecutive elements of the
+*          array. For a complex conjugate pair of eigenvalues two
+*          consecutive elements of S are set to the same value. Thus
+*          S(j), DIF(j), and the j-th columns of VL and VR all
+*          correspond to the same eigenpair (but not in general the
+*          j-th eigenpair, unless all eigenpairs are selected).
+*          If JOB = 'V', S is not referenced.
+*
+*  DIF     (output) DOUBLE PRECISION array, dimension (MM)
+*          If JOB = 'V' or 'B', the estimated reciprocal condition
+*          numbers of the selected eigenvectors, stored in consecutive
+*          elements of the array. For a complex eigenvector two
+*          consecutive elements of DIF are set to the same value. If
+*          the eigenvalues cannot be reordered to compute DIF(j), DIF(j)
+*          is set to 0; this can only occur when the true value would be
+*          very small anyway.
+*          If JOB = 'E', DIF is not referenced.
+*
+*  MM      (input) INTEGER
+*          The number of elements in the arrays S and DIF. MM >= M.
+*
+*  M       (output) INTEGER
+*          The number of elements of the arrays S and DIF used to store
+*          the specified condition numbers; for each selected real
+*          eigenvalue one element is used, and for each selected complex
+*          conjugate pair of eigenvalues, two elements are used.
+*          If HOWMNY = 'A', M is set to N.
+*
+*  WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+*          If JOB = 'E', WORK is not referenced.  Otherwise,
+*          on exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK. LWORK >= N.
+*          If JOB = 'V' or 'B' LWORK >= 2*N*(N+2)+16.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  IWORK   (workspace) INTEGER array, dimension (N + 6)
+*          If JOB = 'E', IWORK is not referenced.
+*
+*  INFO    (output) INTEGER
+*          =0: Successful exit
+*          <0: If INFO = -i, the i-th argument had an illegal value
+*
+*
+*  Further Details
+*  ===============
+*
+*  The reciprocal of the condition number of a generalized eigenvalue
+*  w = (a, b) is defined as
+*
+*       S(w) = (|u'Av|**2 + |u'Bv|**2)**(1/2) / (norm(u)*norm(v))
+*
+*  where u and v are the left and right eigenvectors of (A, B)
+*  corresponding to w; |z| denotes the absolute value of the complex
+*  number, and norm(u) denotes the 2-norm of the vector u.
+*  The pair (a, b) corresponds to an eigenvalue w = a/b (= u'Av/u'Bv)
+*  of the matrix pair (A, B). If both a and b equal zero, then (A B) is
+*  singular and S(I) = -1 is returned.
+*
+*  An approximate error bound on the chordal distance between the i-th
+*  computed generalized eigenvalue w and the corresponding exact
+*  eigenvalue lambda is
+*
+*       chord(w, lambda) <= EPS * norm(A, B) / S(I)
+*
+*  where EPS is the machine precision.
+*
+*  The reciprocal of the condition number DIF(i) of right eigenvector u
+*  and left eigenvector v corresponding to the generalized eigenvalue w
+*  is defined as follows:
+*
+*  a) If the i-th eigenvalue w = (a,b) is real
+*
+*     Suppose U and V are orthogonal transformations such that
+*
+*                U'*(A, B)*V  = (S, T) = ( a   *  ) ( b  *  )  1
+*                                        ( 0  S22 ),( 0 T22 )  n-1
+*                                          1  n-1     1 n-1
+*
+*     Then the reciprocal condition number DIF(i) is
+*
+*                Difl((a, b), (S22, T22)) = sigma-min( Zl ),
+*
+*     where sigma-min(Zl) denotes the smallest singular value of the
+*     2(n-1)-by-2(n-1) matrix
+*
+*         Zl = [ kron(a, In-1)  -kron(1, S22) ]
+*              [ kron(b, In-1)  -kron(1, T22) ] .
+*
+*     Here In-1 is the identity matrix of size n-1. kron(X, Y) is the
+*     Kronecker product between the matrices X and Y.
+*
+*     Note that if the default method for computing DIF(i) is wanted
+*     (see DLATDF), then the parameter DIFDRI (see below) should be
+*     changed from 3 to 4 (routine DLATDF(IJOB = 2 will be used)).
+*     See DTGSYL for more details.
+*
+*  b) If the i-th and (i+1)-th eigenvalues are complex conjugate pair,
+*
+*     Suppose U and V are orthogonal transformations such that
+*
+*                U'*(A, B)*V = (S, T) = ( S11  *   ) ( T11  *  )  2
+*                                       ( 0    S22 ),( 0    T22) n-2
+*                                         2    n-2     2    n-2
+*
+*     and (S11, T11) corresponds to the complex conjugate eigenvalue
+*     pair (w, conjg(w)). There exist unitary matrices U1 and V1 such
+*     that
+*
+*         U1'*S11*V1 = ( s11 s12 )   and U1'*T11*V1 = ( t11 t12 )
+*                      (  0  s22 )                    (  0  t22 )
+*
+*     where the generalized eigenvalues w = s11/t11 and
+*     conjg(w) = s22/t22.
+*
+*     Then the reciprocal condition number DIF(i) is bounded by
+*
+*         min( d1, max( 1, |real(s11)/real(s22)| )*d2 )
+*
+*     where, d1 = Difl((s11, t11), (s22, t22)) = sigma-min(Z1), where
+*     Z1 is the complex 2-by-2 matrix
+*
+*              Z1 =  [ s11  -s22 ]
+*                    [ t11  -t22 ],
+*
+*     This is done by computing (using real arithmetic) the
+*     roots of the characteristical polynomial det(Z1' * Z1 - lambda I),
+*     where Z1' denotes the conjugate transpose of Z1 and det(X) denotes
+*     the determinant of X.
+*
+*     and d2 is an upper bound on Difl((S11, T11), (S22, T22)), i.e. an
+*     upper bound on sigma-min(Z2), where Z2 is (2n-2)-by-(2n-2)
+*
+*              Z2 = [ kron(S11', In-2)  -kron(I2, S22) ]
+*                   [ kron(T11', In-2)  -kron(I2, T22) ]
+*
+*     Note that if the default method for computing DIF is wanted (see
+*     DLATDF), then the parameter DIFDRI (see below) should be changed
+*     from 3 to 4 (routine DLATDF(IJOB = 2 will be used)). See DTGSYL
+*     for more details.
+*
+*  For each eigenvalue/vector specified by SELECT, DIF stores a
+*  Frobenius norm-based estimate of Difl.
+*
+*  An approximate error bound for the i-th computed eigenvector VL(i) or
+*  VR(i) is given by
+*
+*             EPS * norm(A, B) / DIF(i).
+*
+*  See ref. [2-3] for more details and further references.
+*
+*  Based on contributions by
+*     Bo Kagstrom and Peter Poromaa, Department of Computing Science,
+*     Umea University, S-901 87 Umea, Sweden.
+*
+*  References
+*  ==========
+*
+*  [1] B. Kagstrom; A Direct Method for Reordering Eigenvalues in the
+*      Generalized Real Schur Form of a Regular Matrix Pair (A, B), in
+*      M.S. Moonen et al (eds), Linear Algebra for Large Scale and
+*      Real-Time Applications, Kluwer Academic Publ. 1993, pp 195-218.
+*
+*  [2] B. Kagstrom and P. Poromaa; Computing Eigenspaces with Specified
+*      Eigenvalues of a Regular Matrix Pair (A, B) and Condition
+*      Estimation: Theory, Algorithms and Software,
+*      Report UMINF - 94.04, Department of Computing Science, Umea
+*      University, S-901 87 Umea, Sweden, 1994. Also as LAPACK Working
+*      Note 87. To appear in Numerical Algorithms, 1996.
+*
+*  [3] B. Kagstrom and P. Poromaa, LAPACK-Style Algorithms and Software
+*      for Solving the Generalized Sylvester Equation and Estimating the
+*      Separation between Regular Matrix Pairs, Report UMINF - 93.23,
+*      Department of Computing Science, Umea University, S-901 87 Umea,
+*      Sweden, December 1993, Revised April 1994, Also as LAPACK Working
+*      Note 75.  To appear in ACM Trans. on Math. Software, Vol 22,
+*      No 1, 1996.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            DIFDRI
+      PARAMETER          ( DIFDRI = 3 )
+      DOUBLE PRECISION   ZERO, ONE, TWO, FOUR
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0, TWO = 2.0D+0,
+     $                   FOUR = 4.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY, PAIR, SOMCON, WANTBH, WANTDF, WANTS
+      INTEGER            I, IERR, IFST, ILST, IZ, K, KS, LWMIN, N1, N2
+      DOUBLE PRECISION   ALPHAI, ALPHAR, ALPRQT, BETA, C1, C2, COND,
+     $                   EPS, LNRM, RNRM, ROOT1, ROOT2, SCALE, SMLNUM,
+     $                   TMPII, TMPIR, TMPRI, TMPRR, UHAV, UHAVI, UHBV,
+     $                   UHBVI
+*     ..
+*     .. Local Arrays ..
+      DOUBLE PRECISION   DUMMY( 1 ), DUMMY1( 1 )
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DDOT, DLAMCH, DLAPY2, DNRM2
+      EXTERNAL           LSAME, DDOT, DLAMCH, DLAPY2, DNRM2
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DGEMV, DLACPY, DLAG2, DTGEXC, DTGSYL, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode and test the input parameters
+*
+      WANTBH = LSAME( JOB, 'B' )
+      WANTS = LSAME( JOB, 'E' ) .OR. WANTBH
+      WANTDF = LSAME( JOB, 'V' ) .OR. WANTBH
+*
+      SOMCON = LSAME( HOWMNY, 'S' )
+*
+      INFO = 0
+      LQUERY = ( LWORK.EQ.-1 )
+*
+      IF( LSAME( JOB, 'V' ) .OR. LSAME( JOB, 'B' ) ) THEN
+         LWMIN = MAX( 1, 2*N*( N+2 )+16 )
+      ELSE
+         LWMIN = 1
+      END IF
+*
+      IF( .NOT.WANTS .AND. .NOT.WANTDF ) THEN
+         INFO = -1
+      ELSE IF( .NOT.LSAME( HOWMNY, 'A' ) .AND. .NOT.SOMCON ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -6
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -8
+      ELSE IF( WANTS .AND. LDVL.LT.N ) THEN
+         INFO = -10
+      ELSE IF( WANTS .AND. LDVR.LT.N ) THEN
+         INFO = -12
+      ELSE
+*
+*        Set M to the number of eigenpairs for which condition numbers
+*        are required, and test MM.
+*
+         IF( SOMCON ) THEN
+            M = 0
+            PAIR = .FALSE.
+            DO 10 K = 1, N
+               IF( PAIR ) THEN
+                  PAIR = .FALSE.
+               ELSE
+                  IF( K.LT.N ) THEN
+                     IF( A( K+1, K ).EQ.ZERO ) THEN
+                        IF( SELECT( K ) )
+     $                     M = M + 1
+                     ELSE
+                        PAIR = .TRUE.
+                        IF( SELECT( K ) .OR. SELECT( K+1 ) )
+     $                     M = M + 2
+                     END IF
+                  ELSE
+                     IF( SELECT( N ) )
+     $                  M = M + 1
+                  END IF
+               END IF
+   10       CONTINUE
+         ELSE
+            M = N
+         END IF
+*
+         IF( MM.LT.M ) THEN
+            INFO = -15
+         ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN
+            INFO = -18
+*        ELSE IF( WANTDF .AND. LWORK.LT.2*N*( N+2 )+16 ) THEN
+*           INFO = -18
+         END IF
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         WORK( 1 ) = LWMIN
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DTGSNA', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Get machine constants
+*
+      EPS = DLAMCH( 'P' )
+      SMLNUM = DLAMCH( 'S' ) / EPS
+      KS = 0
+      PAIR = .FALSE.
+*
+      DO 20 K = 1, N
+*
+*        Determine whether A(k,k) begins a 1-by-1 or 2-by-2 block.
+*
+         IF( PAIR ) THEN
+            PAIR = .FALSE.
+            GO TO 20
+         ELSE
+            IF( K.LT.N )
+     $         PAIR = A( K+1, K ).NE.ZERO
+         END IF
+*
+*        Determine whether condition numbers are required for the k-th
+*        eigenpair.
+*
+         IF( SOMCON ) THEN
+            IF( PAIR ) THEN
+               IF( .NOT.SELECT( K ) .AND. .NOT.SELECT( K+1 ) )
+     $            GO TO 20
+            ELSE
+               IF( .NOT.SELECT( K ) )
+     $            GO TO 20
+            END IF
+         END IF
+*
+         KS = KS + 1
+*
+         IF( WANTS ) THEN
+*
+*           Compute the reciprocal condition number of the k-th
+*           eigenvalue.
+*
+            IF( PAIR ) THEN
+*
+*              Complex eigenvalue pair.
+*
+               RNRM = DLAPY2( DNRM2( N, VR( 1, KS ), 1 ),
+     $                DNRM2( N, VR( 1, KS+1 ), 1 ) )
+               LNRM = DLAPY2( DNRM2( N, VL( 1, KS ), 1 ),
+     $                DNRM2( N, VL( 1, KS+1 ), 1 ) )
+               CALL DGEMV( 'N', N, N, ONE, A, LDA, VR( 1, KS ), 1, ZERO,
+     $                     WORK, 1 )
+               TMPRR = DDOT( N, WORK, 1, VL( 1, KS ), 1 )
+               TMPRI = DDOT( N, WORK, 1, VL( 1, KS+1 ), 1 )
+               CALL DGEMV( 'N', N, N, ONE, A, LDA, VR( 1, KS+1 ), 1,
+     $                     ZERO, WORK, 1 )
+               TMPII = DDOT( N, WORK, 1, VL( 1, KS+1 ), 1 )
+               TMPIR = DDOT( N, WORK, 1, VL( 1, KS ), 1 )
+               UHAV = TMPRR + TMPII
+               UHAVI = TMPIR - TMPRI
+               CALL DGEMV( 'N', N, N, ONE, B, LDB, VR( 1, KS ), 1, ZERO,
+     $                     WORK, 1 )
+               TMPRR = DDOT( N, WORK, 1, VL( 1, KS ), 1 )
+               TMPRI = DDOT( N, WORK, 1, VL( 1, KS+1 ), 1 )
+               CALL DGEMV( 'N', N, N, ONE, B, LDB, VR( 1, KS+1 ), 1,
+     $                     ZERO, WORK, 1 )
+               TMPII = DDOT( N, WORK, 1, VL( 1, KS+1 ), 1 )
+               TMPIR = DDOT( N, WORK, 1, VL( 1, KS ), 1 )
+               UHBV = TMPRR + TMPII
+               UHBVI = TMPIR - TMPRI
+               UHAV = DLAPY2( UHAV, UHAVI )
+               UHBV = DLAPY2( UHBV, UHBVI )
+               COND = DLAPY2( UHAV, UHBV )
+               S( KS ) = COND / ( RNRM*LNRM )
+               S( KS+1 ) = S( KS )
+*
+            ELSE
+*
+*              Real eigenvalue.
+*
+               RNRM = DNRM2( N, VR( 1, KS ), 1 )
+               LNRM = DNRM2( N, VL( 1, KS ), 1 )
+               CALL DGEMV( 'N', N, N, ONE, A, LDA, VR( 1, KS ), 1, ZERO,
+     $                     WORK, 1 )
+               UHAV = DDOT( N, WORK, 1, VL( 1, KS ), 1 )
+               CALL DGEMV( 'N', N, N, ONE, B, LDB, VR( 1, KS ), 1, ZERO,
+     $                     WORK, 1 )
+               UHBV = DDOT( N, WORK, 1, VL( 1, KS ), 1 )
+               COND = DLAPY2( UHAV, UHBV )
+               IF( COND.EQ.ZERO ) THEN
+                  S( KS ) = -ONE
+               ELSE
+                  S( KS ) = COND / ( RNRM*LNRM )
+               END IF
+            END IF
+         END IF
+*
+         IF( WANTDF ) THEN
+            IF( N.EQ.1 ) THEN
+               DIF( KS ) = DLAPY2( A( 1, 1 ), B( 1, 1 ) )
+               GO TO 20
+            END IF
+*
+*           Estimate the reciprocal condition number of the k-th
+*           eigenvectors.
+            IF( PAIR ) THEN
+*
+*              Copy the  2-by 2 pencil beginning at (A(k,k), B(k, k)).
+*              Compute the eigenvalue(s) at position K.
+*
+               WORK( 1 ) = A( K, K )
+               WORK( 2 ) = A( K+1, K )
+               WORK( 3 ) = A( K, K+1 )
+               WORK( 4 ) = A( K+1, K+1 )
+               WORK( 5 ) = B( K, K )
+               WORK( 6 ) = B( K+1, K )
+               WORK( 7 ) = B( K, K+1 )
+               WORK( 8 ) = B( K+1, K+1 )
+               CALL DLAG2( WORK, 2, WORK( 5 ), 2, SMLNUM*EPS, BETA,
+     $                     DUMMY1( 1 ), ALPHAR, DUMMY( 1 ), ALPHAI )
+               ALPRQT = ONE
+               C1 = TWO*( ALPHAR*ALPHAR+ALPHAI*ALPHAI+BETA*BETA )
+               C2 = FOUR*BETA*BETA*ALPHAI*ALPHAI
+               ROOT1 = C1 + SQRT( C1*C1-4.0D0*C2 )
+               ROOT2 = C2 / ROOT1
+               ROOT1 = ROOT1 / TWO
+               COND = MIN( SQRT( ROOT1 ), SQRT( ROOT2 ) )
+            END IF
+*
+*           Copy the matrix (A, B) to the array WORK and swap the
+*           diagonal block beginning at A(k,k) to the (1,1) position.
+*
+            CALL DLACPY( 'Full', N, N, A, LDA, WORK, N )
+            CALL DLACPY( 'Full', N, N, B, LDB, WORK( N*N+1 ), N )
+            IFST = K
+            ILST = 1
+*
+            CALL DTGEXC( .FALSE., .FALSE., N, WORK, N, WORK( N*N+1 ), N,
+     $                   DUMMY, 1, DUMMY1, 1, IFST, ILST,
+     $                   WORK( N*N*2+1 ), LWORK-2*N*N, IERR )
+*
+            IF( IERR.GT.0 ) THEN
+*
+*              Ill-conditioned problem - swap rejected.
+*
+               DIF( KS ) = ZERO
+            ELSE
+*
+*              Reordering successful, solve generalized Sylvester
+*              equation for R and L,
+*                         A22 * R - L * A11 = A12
+*                         B22 * R - L * B11 = B12,
+*              and compute estimate of Difl((A11,B11), (A22, B22)).
+*
+               N1 = 1
+               IF( WORK( 2 ).NE.ZERO )
+     $            N1 = 2
+               N2 = N - N1
+               IF( N2.EQ.0 ) THEN
+                  DIF( KS ) = COND
+               ELSE
+                  I = N*N + 1
+                  IZ = 2*N*N + 1
+                  CALL DTGSYL( 'N', DIFDRI, N2, N1, WORK( N*N1+N1+1 ),
+     $                         N, WORK, N, WORK( N1+1 ), N,
+     $                         WORK( N*N1+N1+I ), N, WORK( I ), N,
+     $                         WORK( N1+I ), N, SCALE, DIF( KS ),
+     $                         WORK( IZ+1 ), LWORK-2*N*N, IWORK, IERR )
+*
+                  IF( PAIR )
+     $               DIF( KS ) = MIN( MAX( ONE, ALPRQT )*DIF( KS ),
+     $                           COND )
+               END IF
+            END IF
+            IF( PAIR )
+     $         DIF( KS+1 ) = DIF( KS )
+         END IF
+         IF( PAIR )
+     $      KS = KS + 1
+*
+   20 CONTINUE
+      WORK( 1 ) = LWMIN
+      RETURN
+*
+*     End of DTGSNA
+*
+      END
diff --git a/costa/native/external/lapack/dtgsy2.f b/costa/native/external/lapack/dtgsy2.f
new file mode 100644
index 000000000..114e753af
--- /dev/null
+++ b/costa/native/external/lapack/dtgsy2.f
@@ -0,0 +1,950 @@
+      SUBROUTINE DTGSY2( TRANS, IJOB, M, N, A, LDA, B, LDB, C, LDC, D,
+     $                   LDD, E, LDE, F, LDF, SCALE, RDSUM, RDSCAL,
+     $                   IWORK, PQ, INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          TRANS
+      INTEGER            IJOB, INFO, LDA, LDB, LDC, LDD, LDE, LDF, M, N,
+     $                   PQ
+      DOUBLE PRECISION   RDSCAL, RDSUM, SCALE
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      DOUBLE PRECISION   A( LDA, * ), B( LDB, * ), C( LDC, * ),
+     $                   D( LDD, * ), E( LDE, * ), F( LDF, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DTGSY2 solves the generalized Sylvester equation:
+*
+*              A * R - L * B = scale * C                (1)
+*              D * R - L * E = scale * F,
+*
+*  using Level 1 and 2 BLAS. where R and L are unknown M-by-N matrices,
+*  (A, D), (B, E) and (C, F) are given matrix pairs of size M-by-M,
+*  N-by-N and M-by-N, respectively, with real entries. (A, D) and (B, E)
+*  must be in generalized Schur canonical form, i.e. A, B are upper
+*  quasi triangular and D, E are upper triangular. The solution (R, L)
+*  overwrites (C, F). 0 <= SCALE <= 1 is an output scaling factor
+*  chosen to avoid overflow.
+*
+*  In matrix notation solving equation (1) corresponds to solve
+*  Z*x = scale*b, where Z is defined as
+*
+*         Z = [ kron(In, A)  -kron(B', Im) ]             (2)
+*             [ kron(In, D)  -kron(E', Im) ],
+*
+*  Ik is the identity matrix of size k and X' is the transpose of X.
+*  kron(X, Y) is the Kronecker product between the matrices X and Y.
+*  In the process of solving (1), we solve a number of such systems
+*  where Dim(In), Dim(In) = 1 or 2.
+*
+*  If TRANS = 'T', solve the transposed system Z'*y = scale*b for y,
+*  which is equivalent to solve for R and L in
+*
+*              A' * R  + D' * L   = scale *  C           (3)
+*              R  * B' + L  * E'  = scale * -F
+*
+*  This case is used to compute an estimate of Dif[(A, D), (B, E)] =
+*  sigma_min(Z) using reverse communicaton with DLACON.
+*
+*  DTGSY2 also (IJOB >= 1) contributes to the computation in STGSYL
+*  of an upper bound on the separation between to matrix pairs. Then
+*  the input (A, D), (B, E) are sub-pencils of the matrix pair in
+*  DTGSYL. See STGSYL for details.
+*
+*  Arguments
+*  =========
+*
+*  TRANS   (input) CHARACTER
+*          = 'N', solve the generalized Sylvester equation (1).
+*          = 'T': solve the 'transposed' system (3).
+*
+*  IJOB    (input) INTEGER
+*          Specifies what kind of functionality to be performed.
+*          = 0: solve (1) only.
+*          = 1: A contribution from this subsystem to a Frobenius
+*               norm-based estimate of the separation between two matrix
+*               pairs is computed. (look ahead strategy is used).
+*          = 2: A contribution from this subsystem to a Frobenius
+*               norm-based estimate of the separation between two matrix
+*               pairs is computed. (DGECON on sub-systems is used.)
+*          Not referenced if TRANS = 'T'.
+*
+*  M       (input) INTEGER
+*          On entry, M specifies the order of A and D, and the row
+*          dimension of C, F, R and L.
+*
+*  N       (input) INTEGER
+*          On entry, N specifies the order of B and E, and the column
+*          dimension of C, F, R and L.
+*
+*  A       (input) DOUBLE PRECISION array, dimension (LDA, M)
+*          On entry, A contains an upper quasi triangular matrix.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the matrix A. LDA >= max(1, M).
+*
+*  B       (input) DOUBLE PRECISION array, dimension (LDB, N)
+*          On entry, B contains an upper quasi triangular matrix.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the matrix B. LDB >= max(1, N).
+*
+*  C       (input/ output) DOUBLE PRECISION array, dimension (LDC, N)
+*          On entry, C contains the right-hand-side of the first matrix
+*          equation in (1).
+*          On exit, if IJOB = 0, C has been overwritten by the
+*          solution R.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the matrix C. LDC >= max(1, M).
+*
+*  D       (input) DOUBLE PRECISION array, dimension (LDD, M)
+*          On entry, D contains an upper triangular matrix.
+*
+*  LDD     (input) INTEGER
+*          The leading dimension of the matrix D. LDD >= max(1, M).
+*
+*  E       (input) DOUBLE PRECISION array, dimension (LDE, N)
+*          On entry, E contains an upper triangular matrix.
+*
+*  LDE     (input) INTEGER
+*          The leading dimension of the matrix E. LDE >= max(1, N).
+*
+*  F       (input/ output) DOUBLE PRECISION array, dimension (LDF, N)
+*          On entry, F contains the right-hand-side of the second matrix
+*          equation in (1).
+*          On exit, if IJOB = 0, F has been overwritten by the
+*          solution L.
+*
+*  LDF     (input) INTEGER
+*          The leading dimension of the matrix F. LDF >= max(1, M).
+*
+*  SCALE   (output) DOUBLE PRECISION
+*          On exit, 0 <= SCALE <= 1. If 0 < SCALE < 1, the solutions
+*          R and L (C and F on entry) will hold the solutions to a
+*          slightly perturbed system but the input matrices A, B, D and
+*          E have not been changed. If SCALE = 0, R and L will hold the
+*          solutions to the homogeneous system with C = F = 0. Normally,
+*          SCALE = 1.
+*
+*  RDSUM   (input/output) DOUBLE PRECISION
+*          On entry, the sum of squares of computed contributions to
+*          the Dif-estimate under computation by DTGSYL, where the
+*          scaling factor RDSCAL (see below) has been factored out.
+*          On exit, the corresponding sum of squares updated with the
+*          contributions from the current sub-system.
+*          If TRANS = 'T' RDSUM is not touched.
+*          NOTE: RDSUM only makes sense when DTGSY2 is called by STGSYL.
+*
+*  RDSCAL  (input/output) DOUBLE PRECISION
+*          On entry, scaling factor used to prevent overflow in RDSUM.
+*          On exit, RDSCAL is updated w.r.t. the current contributions
+*          in RDSUM.
+*          If TRANS = 'T', RDSCAL is not touched.
+*          NOTE: RDSCAL only makes sense when DTGSY2 is called by
+*                DTGSYL.
+*
+*  IWORK   (workspace) INTEGER array, dimension (M+N+2)
+*
+*  PQ      (output) INTEGER
+*          On exit, the number of subsystems (of size 2-by-2, 4-by-4 and
+*          8-by-8) solved by this routine.
+*
+*  INFO    (output) INTEGER
+*          On exit, if INFO is set to
+*            =0: Successful exit
+*            <0: If INFO = -i, the i-th argument had an illegal value.
+*            >0: The matrix pairs (A, D) and (B, E) have common or very
+*                close eigenvalues.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Bo Kagstrom and Peter Poromaa, Department of Computing Science,
+*     Umea University, S-901 87 Umea, Sweden.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            LDZ
+      PARAMETER          ( LDZ = 8 )
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOTRAN
+      INTEGER            I, IE, IERR, II, IS, ISP1, J, JE, JJ, JS, JSP1,
+     $                   K, MB, NB, P, Q, ZDIM
+      DOUBLE PRECISION   ALPHA, SCALOC
+*     ..
+*     .. Local Arrays ..
+      INTEGER            IPIV( LDZ ), JPIV( LDZ )
+      DOUBLE PRECISION   RHS( LDZ ), Z( LDZ, LDZ )
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DAXPY, DCOPY, DGEMM, DGEMV, DGER, DGESC2,
+     $                   DGETC2, DLATDF, DSCAL, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode and test input parameters
+*
+      INFO = 0
+      IERR = 0
+      NOTRAN = LSAME( TRANS, 'N' )
+      IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN
+         INFO = -1
+      ELSE IF( ( IJOB.LT.0 ) .OR. ( IJOB.GT.2 ) ) THEN
+         INFO = -2
+      ELSE IF( M.LE.0 ) THEN
+         INFO = -3
+      ELSE IF( N.LE.0 ) THEN
+         INFO = -4
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -8
+      ELSE IF( LDC.LT.MAX( 1, M ) ) THEN
+         INFO = -10
+      ELSE IF( LDD.LT.MAX( 1, M ) ) THEN
+         INFO = -12
+      ELSE IF( LDE.LT.MAX( 1, N ) ) THEN
+         INFO = -14
+      ELSE IF( LDF.LT.MAX( 1, M ) ) THEN
+         INFO = -16
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DTGSY2', -INFO )
+         RETURN
+      END IF
+*
+*     Determine block structure of A
+*
+      PQ = 0
+      P = 0
+      I = 1
+   10 CONTINUE
+      IF( I.GT.M )
+     $   GO TO 20
+      P = P + 1
+      IWORK( P ) = I
+      IF( I.EQ.M )
+     $   GO TO 20
+      IF( A( I+1, I ).NE.ZERO ) THEN
+         I = I + 2
+      ELSE
+         I = I + 1
+      END IF
+      GO TO 10
+   20 CONTINUE
+      IWORK( P+1 ) = M + 1
+*
+*     Determine block structure of B
+*
+      Q = P + 1
+      J = 1
+   30 CONTINUE
+      IF( J.GT.N )
+     $   GO TO 40
+      Q = Q + 1
+      IWORK( Q ) = J
+      IF( J.EQ.N )
+     $   GO TO 40
+      IF( B( J+1, J ).NE.ZERO ) THEN
+         J = J + 2
+      ELSE
+         J = J + 1
+      END IF
+      GO TO 30
+   40 CONTINUE
+      IWORK( Q+1 ) = N + 1
+      PQ = P*( Q-P-1 )
+*
+      IF( NOTRAN ) THEN
+*
+*        Solve (I, J) - subsystem
+*           A(I, I) * R(I, J) - L(I, J) * B(J, J) = C(I, J)
+*           D(I, I) * R(I, J) - L(I, J) * E(J, J) = F(I, J)
+*        for I = P, P - 1, ..., 1; J = 1, 2, ..., Q
+*
+         SCALE = ONE
+         SCALOC = ONE
+         DO 120 J = P + 2, Q
+            JS = IWORK( J )
+            JSP1 = JS + 1
+            JE = IWORK( J+1 ) - 1
+            NB = JE - JS + 1
+            DO 110 I = P, 1, -1
+*
+               IS = IWORK( I )
+               ISP1 = IS + 1
+               IE = IWORK( I+1 ) - 1
+               MB = IE - IS + 1
+               ZDIM = MB*NB*2
+*
+               IF( ( MB.EQ.1 ) .AND. ( NB.EQ.1 ) ) THEN
+*
+*                 Build a 2-by-2 system Z * x = RHS
+*
+                  Z( 1, 1 ) = A( IS, IS )
+                  Z( 2, 1 ) = D( IS, IS )
+                  Z( 1, 2 ) = -B( JS, JS )
+                  Z( 2, 2 ) = -E( JS, JS )
+*
+*                 Set up right hand side(s)
+*
+                  RHS( 1 ) = C( IS, JS )
+                  RHS( 2 ) = F( IS, JS )
+*
+*                 Solve Z * x = RHS
+*
+                  CALL DGETC2( ZDIM, Z, LDZ, IPIV, JPIV, IERR )
+                  IF( IERR.GT.0 )
+     $               INFO = IERR
+*
+                  IF( IJOB.EQ.0 ) THEN
+                     CALL DGESC2( ZDIM, Z, LDZ, RHS, IPIV, JPIV,
+     $                            SCALOC )
+                     IF( SCALOC.NE.ONE ) THEN
+                        DO 50 K = 1, N
+                           CALL DSCAL( M, SCALOC, C( 1, K ), 1 )
+                           CALL DSCAL( M, SCALOC, F( 1, K ), 1 )
+   50                   CONTINUE
+                        SCALE = SCALE*SCALOC
+                     END IF
+                  ELSE
+                     CALL DLATDF( IJOB, ZDIM, Z, LDZ, RHS, RDSUM,
+     $                            RDSCAL, IPIV, JPIV )
+                  END IF
+*
+*                 Unpack solution vector(s)
+*
+                  C( IS, JS ) = RHS( 1 )
+                  F( IS, JS ) = RHS( 2 )
+*
+*                 Substitute R(I, J) and L(I, J) into remaining
+*                 equation.
+*
+                  IF( I.GT.1 ) THEN
+                     ALPHA = -RHS( 1 )
+                     CALL DAXPY( IS-1, ALPHA, A( 1, IS ), 1, C( 1, JS ),
+     $                           1 )
+                     CALL DAXPY( IS-1, ALPHA, D( 1, IS ), 1, F( 1, JS ),
+     $                           1 )
+                  END IF
+                  IF( J.LT.Q ) THEN
+                     CALL DAXPY( N-JE, RHS( 2 ), B( JS, JE+1 ), LDB,
+     $                           C( IS, JE+1 ), LDC )
+                     CALL DAXPY( N-JE, RHS( 2 ), E( JS, JE+1 ), LDE,
+     $                           F( IS, JE+1 ), LDF )
+                  END IF
+*
+               ELSE IF( ( MB.EQ.1 ) .AND. ( NB.EQ.2 ) ) THEN
+*
+*                 Build a 4-by-4 system Z * x = RHS
+*
+                  Z( 1, 1 ) = A( IS, IS )
+                  Z( 2, 1 ) = ZERO
+                  Z( 3, 1 ) = D( IS, IS )
+                  Z( 4, 1 ) = ZERO
+*
+                  Z( 1, 2 ) = ZERO
+                  Z( 2, 2 ) = A( IS, IS )
+                  Z( 3, 2 ) = ZERO
+                  Z( 4, 2 ) = D( IS, IS )
+*
+                  Z( 1, 3 ) = -B( JS, JS )
+                  Z( 2, 3 ) = -B( JS, JSP1 )
+                  Z( 3, 3 ) = -E( JS, JS )
+                  Z( 4, 3 ) = -E( JS, JSP1 )
+*
+                  Z( 1, 4 ) = -B( JSP1, JS )
+                  Z( 2, 4 ) = -B( JSP1, JSP1 )
+                  Z( 3, 4 ) = ZERO
+                  Z( 4, 4 ) = -E( JSP1, JSP1 )
+*
+*                 Set up right hand side(s)
+*
+                  RHS( 1 ) = C( IS, JS )
+                  RHS( 2 ) = C( IS, JSP1 )
+                  RHS( 3 ) = F( IS, JS )
+                  RHS( 4 ) = F( IS, JSP1 )
+*
+*                 Solve Z * x = RHS
+*
+                  CALL DGETC2( ZDIM, Z, LDZ, IPIV, JPIV, IERR )
+                  IF( IERR.GT.0 )
+     $               INFO = IERR
+*
+                  IF( IJOB.EQ.0 ) THEN
+                     CALL DGESC2( ZDIM, Z, LDZ, RHS, IPIV, JPIV,
+     $                            SCALOC )
+                     IF( SCALOC.NE.ONE ) THEN
+                        DO 60 K = 1, N
+                           CALL DSCAL( M, SCALOC, C( 1, K ), 1 )
+                           CALL DSCAL( M, SCALOC, F( 1, K ), 1 )
+   60                   CONTINUE
+                        SCALE = SCALE*SCALOC
+                     END IF
+                  ELSE
+                     CALL DLATDF( IJOB, ZDIM, Z, LDZ, RHS, RDSUM,
+     $                            RDSCAL, IPIV, JPIV )
+                  END IF
+*
+*                 Unpack solution vector(s)
+*
+                  C( IS, JS ) = RHS( 1 )
+                  C( IS, JSP1 ) = RHS( 2 )
+                  F( IS, JS ) = RHS( 3 )
+                  F( IS, JSP1 ) = RHS( 4 )
+*
+*                 Substitute R(I, J) and L(I, J) into remaining
+*                 equation.
+*
+                  IF( I.GT.1 ) THEN
+                     CALL DGER( IS-1, NB, -ONE, A( 1, IS ), 1, RHS( 1 ),
+     $                          1, C( 1, JS ), LDC )
+                     CALL DGER( IS-1, NB, -ONE, D( 1, IS ), 1, RHS( 1 ),
+     $                          1, F( 1, JS ), LDF )
+                  END IF
+                  IF( J.LT.Q ) THEN
+                     CALL DAXPY( N-JE, RHS( 3 ), B( JS, JE+1 ), LDB,
+     $                           C( IS, JE+1 ), LDC )
+                     CALL DAXPY( N-JE, RHS( 3 ), E( JS, JE+1 ), LDE,
+     $                           F( IS, JE+1 ), LDF )
+                     CALL DAXPY( N-JE, RHS( 4 ), B( JSP1, JE+1 ), LDB,
+     $                           C( IS, JE+1 ), LDC )
+                     CALL DAXPY( N-JE, RHS( 4 ), E( JSP1, JE+1 ), LDE,
+     $                           F( IS, JE+1 ), LDF )
+                  END IF
+*
+               ELSE IF( ( MB.EQ.2 ) .AND. ( NB.EQ.1 ) ) THEN
+*
+*                 Build a 4-by-4 system Z * x = RHS
+*
+                  Z( 1, 1 ) = A( IS, IS )
+                  Z( 2, 1 ) = A( ISP1, IS )
+                  Z( 3, 1 ) = D( IS, IS )
+                  Z( 4, 1 ) = ZERO
+*
+                  Z( 1, 2 ) = A( IS, ISP1 )
+                  Z( 2, 2 ) = A( ISP1, ISP1 )
+                  Z( 3, 2 ) = D( IS, ISP1 )
+                  Z( 4, 2 ) = D( ISP1, ISP1 )
+*
+                  Z( 1, 3 ) = -B( JS, JS )
+                  Z( 2, 3 ) = ZERO
+                  Z( 3, 3 ) = -E( JS, JS )
+                  Z( 4, 3 ) = ZERO
+*
+                  Z( 1, 4 ) = ZERO
+                  Z( 2, 4 ) = -B( JS, JS )
+                  Z( 3, 4 ) = ZERO
+                  Z( 4, 4 ) = -E( JS, JS )
+*
+*                 Set up right hand side(s)
+*
+                  RHS( 1 ) = C( IS, JS )
+                  RHS( 2 ) = C( ISP1, JS )
+                  RHS( 3 ) = F( IS, JS )
+                  RHS( 4 ) = F( ISP1, JS )
+*
+*                 Solve Z * x = RHS
+*
+                  CALL DGETC2( ZDIM, Z, LDZ, IPIV, JPIV, IERR )
+                  IF( IERR.GT.0 )
+     $               INFO = IERR
+                  IF( IJOB.EQ.0 ) THEN
+                     CALL DGESC2( ZDIM, Z, LDZ, RHS, IPIV, JPIV,
+     $                            SCALOC )
+                     IF( SCALOC.NE.ONE ) THEN
+                        DO 70 K = 1, N
+                           CALL DSCAL( M, SCALOC, C( 1, K ), 1 )
+                           CALL DSCAL( M, SCALOC, F( 1, K ), 1 )
+   70                   CONTINUE
+                        SCALE = SCALE*SCALOC
+                     END IF
+                  ELSE
+                     CALL DLATDF( IJOB, ZDIM, Z, LDZ, RHS, RDSUM,
+     $                            RDSCAL, IPIV, JPIV )
+                  END IF
+*
+*                 Unpack solution vector(s)
+*
+                  C( IS, JS ) = RHS( 1 )
+                  C( ISP1, JS ) = RHS( 2 )
+                  F( IS, JS ) = RHS( 3 )
+                  F( ISP1, JS ) = RHS( 4 )
+*
+*                 Substitute R(I, J) and L(I, J) into remaining
+*                 equation.
+*
+                  IF( I.GT.1 ) THEN
+                     CALL DGEMV( 'N', IS-1, MB, -ONE, A( 1, IS ), LDA,
+     $                           RHS( 1 ), 1, ONE, C( 1, JS ), 1 )
+                     CALL DGEMV( 'N', IS-1, MB, -ONE, D( 1, IS ), LDD,
+     $                           RHS( 1 ), 1, ONE, F( 1, JS ), 1 )
+                  END IF
+                  IF( J.LT.Q ) THEN
+                     CALL DGER( MB, N-JE, ONE, RHS( 3 ), 1,
+     $                          B( JS, JE+1 ), LDB, C( IS, JE+1 ), LDC )
+                     CALL DGER( MB, N-JE, ONE, RHS( 3 ), 1,
+     $                          E( JS, JE+1 ), LDB, F( IS, JE+1 ), LDC )
+                  END IF
+*
+               ELSE IF( ( MB.EQ.2 ) .AND. ( NB.EQ.2 ) ) THEN
+*
+*                 Build an 8-by-8 system Z * x = RHS
+*
+                  CALL DCOPY( LDZ*LDZ, ZERO, 0, Z, 1 )
+*
+                  Z( 1, 1 ) = A( IS, IS )
+                  Z( 2, 1 ) = A( ISP1, IS )
+                  Z( 5, 1 ) = D( IS, IS )
+*
+                  Z( 1, 2 ) = A( IS, ISP1 )
+                  Z( 2, 2 ) = A( ISP1, ISP1 )
+                  Z( 5, 2 ) = D( IS, ISP1 )
+                  Z( 6, 2 ) = D( ISP1, ISP1 )
+*
+                  Z( 3, 3 ) = A( IS, IS )
+                  Z( 4, 3 ) = A( ISP1, IS )
+                  Z( 7, 3 ) = D( IS, IS )
+*
+                  Z( 3, 4 ) = A( IS, ISP1 )
+                  Z( 4, 4 ) = A( ISP1, ISP1 )
+                  Z( 7, 4 ) = D( IS, ISP1 )
+                  Z( 8, 4 ) = D( ISP1, ISP1 )
+*
+                  Z( 1, 5 ) = -B( JS, JS )
+                  Z( 3, 5 ) = -B( JS, JSP1 )
+                  Z( 5, 5 ) = -E( JS, JS )
+                  Z( 7, 5 ) = -E( JS, JSP1 )
+*
+                  Z( 2, 6 ) = -B( JS, JS )
+                  Z( 4, 6 ) = -B( JS, JSP1 )
+                  Z( 6, 6 ) = -E( JS, JS )
+                  Z( 8, 6 ) = -E( JS, JSP1 )
+*
+                  Z( 1, 7 ) = -B( JSP1, JS )
+                  Z( 3, 7 ) = -B( JSP1, JSP1 )
+                  Z( 7, 7 ) = -E( JSP1, JSP1 )
+*
+                  Z( 2, 8 ) = -B( JSP1, JS )
+                  Z( 4, 8 ) = -B( JSP1, JSP1 )
+                  Z( 8, 8 ) = -E( JSP1, JSP1 )
+*
+*                 Set up right hand side(s)
+*
+                  K = 1
+                  II = MB*NB + 1
+                  DO 80 JJ = 0, NB - 1
+                     CALL DCOPY( MB, C( IS, JS+JJ ), 1, RHS( K ), 1 )
+                     CALL DCOPY( MB, F( IS, JS+JJ ), 1, RHS( II ), 1 )
+                     K = K + MB
+                     II = II + MB
+   80             CONTINUE
+*
+*                 Solve Z * x = RHS
+*
+                  CALL DGETC2( ZDIM, Z, LDZ, IPIV, JPIV, IERR )
+                  IF( IERR.GT.0 )
+     $               INFO = IERR
+                  IF( IJOB.EQ.0 ) THEN
+                     CALL DGESC2( ZDIM, Z, LDZ, RHS, IPIV, JPIV,
+     $                            SCALOC )
+                     IF( SCALOC.NE.ONE ) THEN
+                        DO 90 K = 1, N
+                           CALL DSCAL( M, SCALOC, C( 1, K ), 1 )
+                           CALL DSCAL( M, SCALOC, F( 1, K ), 1 )
+   90                   CONTINUE
+                        SCALE = SCALE*SCALOC
+                     END IF
+                  ELSE
+                     CALL DLATDF( IJOB, ZDIM, Z, LDZ, RHS, RDSUM,
+     $                            RDSCAL, IPIV, JPIV )
+                  END IF
+*
+*                 Unpack solution vector(s)
+*
+                  K = 1
+                  II = MB*NB + 1
+                  DO 100 JJ = 0, NB - 1
+                     CALL DCOPY( MB, RHS( K ), 1, C( IS, JS+JJ ), 1 )
+                     CALL DCOPY( MB, RHS( II ), 1, F( IS, JS+JJ ), 1 )
+                     K = K + MB
+                     II = II + MB
+  100             CONTINUE
+*
+*                 Substitute R(I, J) and L(I, J) into remaining
+*                 equation.
+*
+                  IF( I.GT.1 ) THEN
+                     CALL DGEMM( 'N', 'N', IS-1, NB, MB, -ONE,
+     $                           A( 1, IS ), LDA, RHS( 1 ), MB, ONE,
+     $                           C( 1, JS ), LDC )
+                     CALL DGEMM( 'N', 'N', IS-1, NB, MB, -ONE,
+     $                           D( 1, IS ), LDD, RHS( 1 ), MB, ONE,
+     $                           F( 1, JS ), LDF )
+                  END IF
+                  IF( J.LT.Q ) THEN
+                     K = MB*NB + 1
+                     CALL DGEMM( 'N', 'N', MB, N-JE, NB, ONE, RHS( K ),
+     $                           MB, B( JS, JE+1 ), LDB, ONE,
+     $                           C( IS, JE+1 ), LDC )
+                     CALL DGEMM( 'N', 'N', MB, N-JE, NB, ONE, RHS( K ),
+     $                           MB, E( JS, JE+1 ), LDE, ONE,
+     $                           F( IS, JE+1 ), LDF )
+                  END IF
+*
+               END IF
+*
+  110       CONTINUE
+  120    CONTINUE
+      ELSE
+*
+*        Solve (I, J) - subsystem
+*             A(I, I)' * R(I, J) + D(I, I)' * L(J, J)  =  C(I, J)
+*             R(I, I)  * B(J, J) + L(I, J)  * E(J, J)  = -F(I, J)
+*        for I = 1, 2, ..., P, J = Q, Q - 1, ..., 1
+*
+         SCALE = ONE
+         SCALOC = ONE
+         DO 200 I = 1, P
+*
+            IS = IWORK( I )
+            ISP1 = IS + 1
+            IE = IWORK( I+1 ) - 1
+            MB = IE - IS + 1
+            DO 190 J = Q, P + 2, -1
+*
+               JS = IWORK( J )
+               JSP1 = JS + 1
+               JE = IWORK( J+1 ) - 1
+               NB = JE - JS + 1
+               ZDIM = MB*NB*2
+               IF( ( MB.EQ.1 ) .AND. ( NB.EQ.1 ) ) THEN
+*
+*                 Build a 2-by-2 system Z' * x = RHS
+*
+                  Z( 1, 1 ) = A( IS, IS )
+                  Z( 2, 1 ) = -B( JS, JS )
+                  Z( 1, 2 ) = D( IS, IS )
+                  Z( 2, 2 ) = -E( JS, JS )
+*
+*                 Set up right hand side(s)
+*
+                  RHS( 1 ) = C( IS, JS )
+                  RHS( 2 ) = F( IS, JS )
+*
+*                 Solve Z' * x = RHS
+*
+                  CALL DGETC2( ZDIM, Z, LDZ, IPIV, JPIV, IERR )
+                  IF( IERR.GT.0 )
+     $               INFO = IERR
+*
+                  CALL DGESC2( ZDIM, Z, LDZ, RHS, IPIV, JPIV, SCALOC )
+                  IF( SCALOC.NE.ONE ) THEN
+                     DO 130 K = 1, N
+                        CALL DSCAL( M, SCALOC, C( 1, K ), 1 )
+                        CALL DSCAL( M, SCALOC, F( 1, K ), 1 )
+  130                CONTINUE
+                     SCALE = SCALE*SCALOC
+                  END IF
+*
+*                 Unpack solution vector(s)
+*
+                  C( IS, JS ) = RHS( 1 )
+                  F( IS, JS ) = RHS( 2 )
+*
+*                 Substitute R(I, J) and L(I, J) into remaining
+*                 equation.
+*
+                  IF( J.GT.P+2 ) THEN
+                     ALPHA = RHS( 1 )
+                     CALL DAXPY( JS-1, ALPHA, B( 1, JS ), 1, F( IS, 1 ),
+     $                           LDF )
+                     ALPHA = RHS( 2 )
+                     CALL DAXPY( JS-1, ALPHA, E( 1, JS ), 1, F( IS, 1 ),
+     $                           LDF )
+                  END IF
+                  IF( I.LT.P ) THEN
+                     ALPHA = -RHS( 1 )
+                     CALL DAXPY( M-IE, ALPHA, A( IS, IE+1 ), LDA,
+     $                           C( IE+1, JS ), 1 )
+                     ALPHA = -RHS( 2 )
+                     CALL DAXPY( M-IE, ALPHA, D( IS, IE+1 ), LDD,
+     $                           C( IE+1, JS ), 1 )
+                  END IF
+*
+               ELSE IF( ( MB.EQ.1 ) .AND. ( NB.EQ.2 ) ) THEN
+*
+*                 Build a 4-by-4 system Z' * x = RHS
+*
+                  Z( 1, 1 ) = A( IS, IS )
+                  Z( 2, 1 ) = ZERO
+                  Z( 3, 1 ) = -B( JS, JS )
+                  Z( 4, 1 ) = -B( JSP1, JS )
+*
+                  Z( 1, 2 ) = ZERO
+                  Z( 2, 2 ) = A( IS, IS )
+                  Z( 3, 2 ) = -B( JS, JSP1 )
+                  Z( 4, 2 ) = -B( JSP1, JSP1 )
+*
+                  Z( 1, 3 ) = D( IS, IS )
+                  Z( 2, 3 ) = ZERO
+                  Z( 3, 3 ) = -E( JS, JS )
+                  Z( 4, 3 ) = ZERO
+*
+                  Z( 1, 4 ) = ZERO
+                  Z( 2, 4 ) = D( IS, IS )
+                  Z( 3, 4 ) = -E( JS, JSP1 )
+                  Z( 4, 4 ) = -E( JSP1, JSP1 )
+*
+*                 Set up right hand side(s)
+*
+                  RHS( 1 ) = C( IS, JS )
+                  RHS( 2 ) = C( IS, JSP1 )
+                  RHS( 3 ) = F( IS, JS )
+                  RHS( 4 ) = F( IS, JSP1 )
+*
+*                 Solve Z' * x = RHS
+*
+                  CALL DGETC2( ZDIM, Z, LDZ, IPIV, JPIV, IERR )
+                  IF( IERR.GT.0 )
+     $               INFO = IERR
+                  CALL DGESC2( ZDIM, Z, LDZ, RHS, IPIV, JPIV, SCALOC )
+                  IF( SCALOC.NE.ONE ) THEN
+                     DO 140 K = 1, N
+                        CALL DSCAL( M, SCALOC, C( 1, K ), 1 )
+                        CALL DSCAL( M, SCALOC, F( 1, K ), 1 )
+  140                CONTINUE
+                     SCALE = SCALE*SCALOC
+                  END IF
+*
+*                 Unpack solution vector(s)
+*
+                  C( IS, JS ) = RHS( 1 )
+                  C( IS, JSP1 ) = RHS( 2 )
+                  F( IS, JS ) = RHS( 3 )
+                  F( IS, JSP1 ) = RHS( 4 )
+*
+*                 Substitute R(I, J) and L(I, J) into remaining
+*                 equation.
+*
+                  IF( J.GT.P+2 ) THEN
+                     CALL DAXPY( JS-1, RHS( 1 ), B( 1, JS ), 1,
+     $                           F( IS, 1 ), LDF )
+                     CALL DAXPY( JS-1, RHS( 2 ), B( 1, JSP1 ), 1,
+     $                           F( IS, 1 ), LDF )
+                     CALL DAXPY( JS-1, RHS( 3 ), E( 1, JS ), 1,
+     $                           F( IS, 1 ), LDF )
+                     CALL DAXPY( JS-1, RHS( 4 ), E( 1, JSP1 ), 1,
+     $                           F( IS, 1 ), LDF )
+                  END IF
+                  IF( I.LT.P ) THEN
+                     CALL DGER( M-IE, NB, -ONE, A( IS, IE+1 ), LDA,
+     $                          RHS( 1 ), 1, C( IE+1, JS ), LDC )
+                     CALL DGER( M-IE, NB, -ONE, D( IS, IE+1 ), LDD,
+     $                          RHS( 3 ), 1, C( IE+1, JS ), LDC )
+                  END IF
+*
+               ELSE IF( ( MB.EQ.2 ) .AND. ( NB.EQ.1 ) ) THEN
+*
+*                 Build a 4-by-4 system Z' * x = RHS
+*
+                  Z( 1, 1 ) = A( IS, IS )
+                  Z( 2, 1 ) = A( IS, ISP1 )
+                  Z( 3, 1 ) = -B( JS, JS )
+                  Z( 4, 1 ) = ZERO
+*
+                  Z( 1, 2 ) = A( ISP1, IS )
+                  Z( 2, 2 ) = A( ISP1, ISP1 )
+                  Z( 3, 2 ) = ZERO
+                  Z( 4, 2 ) = -B( JS, JS )
+*
+                  Z( 1, 3 ) = D( IS, IS )
+                  Z( 2, 3 ) = D( IS, ISP1 )
+                  Z( 3, 3 ) = -E( JS, JS )
+                  Z( 4, 3 ) = ZERO
+*
+                  Z( 1, 4 ) = ZERO
+                  Z( 2, 4 ) = D( ISP1, ISP1 )
+                  Z( 3, 4 ) = ZERO
+                  Z( 4, 4 ) = -E( JS, JS )
+*
+*                 Set up right hand side(s)
+*
+                  RHS( 1 ) = C( IS, JS )
+                  RHS( 2 ) = C( ISP1, JS )
+                  RHS( 3 ) = F( IS, JS )
+                  RHS( 4 ) = F( ISP1, JS )
+*
+*                 Solve Z' * x = RHS
+*
+                  CALL DGETC2( ZDIM, Z, LDZ, IPIV, JPIV, IERR )
+                  IF( IERR.GT.0 )
+     $               INFO = IERR
+*
+                  CALL DGESC2( ZDIM, Z, LDZ, RHS, IPIV, JPIV, SCALOC )
+                  IF( SCALOC.NE.ONE ) THEN
+                     DO 150 K = 1, N
+                        CALL DSCAL( M, SCALOC, C( 1, K ), 1 )
+                        CALL DSCAL( M, SCALOC, F( 1, K ), 1 )
+  150                CONTINUE
+                     SCALE = SCALE*SCALOC
+                  END IF
+*
+*                 Unpack solution vector(s)
+*
+                  C( IS, JS ) = RHS( 1 )
+                  C( ISP1, JS ) = RHS( 2 )
+                  F( IS, JS ) = RHS( 3 )
+                  F( ISP1, JS ) = RHS( 4 )
+*
+*                 Substitute R(I, J) and L(I, J) into remaining
+*                 equation.
+*
+                  IF( J.GT.P+2 ) THEN
+                     CALL DGER( MB, JS-1, ONE, RHS( 1 ), 1, B( 1, JS ),
+     $                          1, F( IS, 1 ), LDF )
+                     CALL DGER( MB, JS-1, ONE, RHS( 3 ), 1, E( 1, JS ),
+     $                          1, F( IS, 1 ), LDF )
+                  END IF
+                  IF( I.LT.P ) THEN
+                     CALL DGEMV( 'T', MB, M-IE, -ONE, A( IS, IE+1 ),
+     $                           LDA, RHS( 1 ), 1, ONE, C( IE+1, JS ),
+     $                           1 )
+                     CALL DGEMV( 'T', MB, M-IE, -ONE, D( IS, IE+1 ),
+     $                           LDD, RHS( 3 ), 1, ONE, C( IE+1, JS ),
+     $                           1 )
+                  END IF
+*
+               ELSE IF( ( MB.EQ.2 ) .AND. ( NB.EQ.2 ) ) THEN
+*
+*                 Build an 8-by-8 system Z' * x = RHS
+*
+                  CALL DCOPY( LDZ*LDZ, ZERO, 0, Z, 1 )
+*
+                  Z( 1, 1 ) = A( IS, IS )
+                  Z( 2, 1 ) = A( IS, ISP1 )
+                  Z( 5, 1 ) = -B( JS, JS )
+                  Z( 7, 1 ) = -B( JSP1, JS )
+*
+                  Z( 1, 2 ) = A( ISP1, IS )
+                  Z( 2, 2 ) = A( ISP1, ISP1 )
+                  Z( 6, 2 ) = -B( JS, JS )
+                  Z( 8, 2 ) = -B( JSP1, JS )
+*
+                  Z( 3, 3 ) = A( IS, IS )
+                  Z( 4, 3 ) = A( IS, ISP1 )
+                  Z( 5, 3 ) = -B( JS, JSP1 )
+                  Z( 7, 3 ) = -B( JSP1, JSP1 )
+*
+                  Z( 3, 4 ) = A( ISP1, IS )
+                  Z( 4, 4 ) = A( ISP1, ISP1 )
+                  Z( 6, 4 ) = -B( JS, JSP1 )
+                  Z( 8, 4 ) = -B( JSP1, JSP1 )
+*
+                  Z( 1, 5 ) = D( IS, IS )
+                  Z( 2, 5 ) = D( IS, ISP1 )
+                  Z( 5, 5 ) = -E( JS, JS )
+*
+                  Z( 2, 6 ) = D( ISP1, ISP1 )
+                  Z( 6, 6 ) = -E( JS, JS )
+*
+                  Z( 3, 7 ) = D( IS, IS )
+                  Z( 4, 7 ) = D( IS, ISP1 )
+                  Z( 5, 7 ) = -E( JS, JSP1 )
+                  Z( 7, 7 ) = -E( JSP1, JSP1 )
+*
+                  Z( 4, 8 ) = D( ISP1, ISP1 )
+                  Z( 6, 8 ) = -E( JS, JSP1 )
+                  Z( 8, 8 ) = -E( JSP1, JSP1 )
+*
+*                 Set up right hand side(s)
+*
+                  K = 1
+                  II = MB*NB + 1
+                  DO 160 JJ = 0, NB - 1
+                     CALL DCOPY( MB, C( IS, JS+JJ ), 1, RHS( K ), 1 )
+                     CALL DCOPY( MB, F( IS, JS+JJ ), 1, RHS( II ), 1 )
+                     K = K + MB
+                     II = II + MB
+  160             CONTINUE
+*
+*
+*                 Solve Z' * x = RHS
+*
+                  CALL DGETC2( ZDIM, Z, LDZ, IPIV, JPIV, IERR )
+                  IF( IERR.GT.0 )
+     $               INFO = IERR
+*
+                  CALL DGESC2( ZDIM, Z, LDZ, RHS, IPIV, JPIV, SCALOC )
+                  IF( SCALOC.NE.ONE ) THEN
+                     DO 170 K = 1, N
+                        CALL DSCAL( M, SCALOC, C( 1, K ), 1 )
+                        CALL DSCAL( M, SCALOC, F( 1, K ), 1 )
+  170                CONTINUE
+                     SCALE = SCALE*SCALOC
+                  END IF
+*
+*                 Unpack solution vector(s)
+*
+                  K = 1
+                  II = MB*NB + 1
+                  DO 180 JJ = 0, NB - 1
+                     CALL DCOPY( MB, RHS( K ), 1, C( IS, JS+JJ ), 1 )
+                     CALL DCOPY( MB, RHS( II ), 1, F( IS, JS+JJ ), 1 )
+                     K = K + MB
+                     II = II + MB
+  180             CONTINUE
+*
+*                 Substitute R(I, J) and L(I, J) into remaining
+*                 equation.
+*
+                  IF( J.GT.P+2 ) THEN
+                     CALL DGEMM( 'N', 'T', MB, JS-1, NB, ONE,
+     $                           C( IS, JS ), LDC, B( 1, JS ), LDB, ONE,
+     $                           F( IS, 1 ), LDF )
+                     CALL DGEMM( 'N', 'T', MB, JS-1, NB, ONE,
+     $                           F( IS, JS ), LDF, E( 1, JS ), LDE, ONE,
+     $                           F( IS, 1 ), LDF )
+                  END IF
+                  IF( I.LT.P ) THEN
+                     CALL DGEMM( 'T', 'N', M-IE, NB, MB, -ONE,
+     $                           A( IS, IE+1 ), LDA, C( IS, JS ), LDC,
+     $                           ONE, C( IE+1, JS ), LDC )
+                     CALL DGEMM( 'T', 'N', M-IE, NB, MB, -ONE,
+     $                           D( IS, IE+1 ), LDD, F( IS, JS ), LDF,
+     $                           ONE, C( IE+1, JS ), LDC )
+                  END IF
+*
+               END IF
+*
+  190       CONTINUE
+  200    CONTINUE
+*
+      END IF
+      RETURN
+*
+*     End of DTGSY2
+*
+      END
diff --git a/costa/native/external/lapack/dtgsyl.f b/costa/native/external/lapack/dtgsyl.f
new file mode 100644
index 000000000..ff00f18f1
--- /dev/null
+++ b/costa/native/external/lapack/dtgsyl.f
@@ -0,0 +1,534 @@
+      SUBROUTINE DTGSYL( TRANS, IJOB, M, N, A, LDA, B, LDB, C, LDC, D,
+     $                   LDD, E, LDE, F, LDF, SCALE, DIF, WORK, LWORK,
+     $                   IWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          TRANS
+      INTEGER            IJOB, INFO, LDA, LDB, LDC, LDD, LDE, LDF,
+     $                   LWORK, M, N
+      DOUBLE PRECISION   DIF, SCALE
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      DOUBLE PRECISION   A( LDA, * ), B( LDB, * ), C( LDC, * ),
+     $                   D( LDD, * ), E( LDE, * ), F( LDF, * ),
+     $                   WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DTGSYL solves the generalized Sylvester equation:
+*
+*              A * R - L * B = scale * C                 (1)
+*              D * R - L * E = scale * F
+*
+*  where R and L are unknown m-by-n matrices, (A, D), (B, E) and
+*  (C, F) are given matrix pairs of size m-by-m, n-by-n and m-by-n,
+*  respectively, with real entries. (A, D) and (B, E) must be in
+*  generalized (real) Schur canonical form, i.e. A, B are upper quasi
+*  triangular and D, E are upper triangular.
+*
+*  The solution (R, L) overwrites (C, F). 0 <= SCALE <= 1 is an output
+*  scaling factor chosen to avoid overflow.
+*
+*  In matrix notation (1) is equivalent to solve  Zx = scale b, where
+*  Z is defined as
+*
+*             Z = [ kron(In, A)  -kron(B', Im) ]         (2)
+*                 [ kron(In, D)  -kron(E', Im) ].
+*
+*  Here Ik is the identity matrix of size k and X' is the transpose of
+*  X. kron(X, Y) is the Kronecker product between the matrices X and Y.
+*
+*  If TRANS = 'T', DTGSYL solves the transposed system Z'*y = scale*b,
+*  which is equivalent to solve for R and L in
+*
+*              A' * R  + D' * L   = scale *  C           (3)
+*              R  * B' + L  * E'  = scale * (-F)
+*
+*  This case (TRANS = 'T') is used to compute an one-norm-based estimate
+*  of Dif[(A,D), (B,E)], the separation between the matrix pairs (A,D)
+*  and (B,E), using DLACON.
+*
+*  If IJOB >= 1, DTGSYL computes a Frobenius norm-based estimate
+*  of Dif[(A,D),(B,E)]. That is, the reciprocal of a lower bound on the
+*  reciprocal of the smallest singular value of Z. See [1-2] for more
+*  information.
+*
+*  This is a level 3 BLAS algorithm.
+*
+*  Arguments
+*  =========
+*
+*  TRANS   (input) CHARACTER*1
+*          = 'N', solve the generalized Sylvester equation (1).
+*          = 'T', solve the 'transposed' system (3).
+*
+*  IJOB    (input) INTEGER
+*          Specifies what kind of functionality to be performed.
+*           =0: solve (1) only.
+*           =1: The functionality of 0 and 3.
+*           =2: The functionality of 0 and 4.
+*           =3: Only an estimate of Dif[(A,D), (B,E)] is computed.
+*               (look ahead strategy IJOB  = 1 is used).
+*           =4: Only an estimate of Dif[(A,D), (B,E)] is computed.
+*               ( DGECON on sub-systems is used ).
+*          Not referenced if TRANS = 'T'.
+*
+*  M       (input) INTEGER
+*          The order of the matrices A and D, and the row dimension of
+*          the matrices C, F, R and L.
+*
+*  N       (input) INTEGER
+*          The order of the matrices B and E, and the column dimension
+*          of the matrices C, F, R and L.
+*
+*  A       (input) DOUBLE PRECISION array, dimension (LDA, M)
+*          The upper quasi triangular matrix A.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1, M).
+*
+*  B       (input) DOUBLE PRECISION array, dimension (LDB, N)
+*          The upper quasi triangular matrix B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B. LDB >= max(1, N).
+*
+*  C       (input/output) DOUBLE PRECISION array, dimension (LDC, N)
+*          On entry, C contains the right-hand-side of the first matrix
+*          equation in (1) or (3).
+*          On exit, if IJOB = 0, 1 or 2, C has been overwritten by
+*          the solution R. If IJOB = 3 or 4 and TRANS = 'N', C holds R,
+*          the solution achieved during the computation of the
+*          Dif-estimate.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C. LDC >= max(1, M).
+*
+*  D       (input) DOUBLE PRECISION array, dimension (LDD, M)
+*          The upper triangular matrix D.
+*
+*  LDD     (input) INTEGER
+*          The leading dimension of the array D. LDD >= max(1, M).
+*
+*  E       (input) DOUBLE PRECISION array, dimension (LDE, N)
+*          The upper triangular matrix E.
+*
+*  LDE     (input) INTEGER
+*          The leading dimension of the array E. LDE >= max(1, N).
+*
+*  F       (input/output) DOUBLE PRECISION array, dimension (LDF, N)
+*          On entry, F contains the right-hand-side of the second matrix
+*          equation in (1) or (3).
+*          On exit, if IJOB = 0, 1 or 2, F has been overwritten by
+*          the solution L. If IJOB = 3 or 4 and TRANS = 'N', F holds L,
+*          the solution achieved during the computation of the
+*          Dif-estimate.
+*
+*  LDF     (input) INTEGER
+*          The leading dimension of the array F. LDF >= max(1, M).
+*
+*  DIF     (output) DOUBLE PRECISION
+*          On exit DIF is the reciprocal of a lower bound of the
+*          reciprocal of the Dif-function, i.e. DIF is an upper bound of
+*          Dif[(A,D), (B,E)] = sigma_min(Z), where Z as in (2).
+*          IF IJOB = 0 or TRANS = 'T', DIF is not touched.
+*
+*  SCALE   (output) DOUBLE PRECISION
+*          On exit SCALE is the scaling factor in (1) or (3).
+*          If 0 < SCALE < 1, C and F hold the solutions R and L, resp.,
+*          to a slightly perturbed system but the input matrices A, B, D
+*          and E have not been changed. If SCALE = 0, C and F hold the
+*          solutions R and L, respectively, to the homogeneous system
+*          with C = F = 0. Normally, SCALE = 1.
+*
+*  WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+*          If IJOB = 0, WORK is not referenced.  Otherwise,
+*          on exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK. LWORK > = 1.
+*          If IJOB = 1 or 2 and TRANS = 'N', LWORK >= 2*M*N.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  IWORK   (workspace) INTEGER array, dimension (M+N+6)
+*
+*  INFO    (output) INTEGER
+*            =0: successful exit
+*            <0: If INFO = -i, the i-th argument had an illegal value.
+*            >0: (A, D) and (B, E) have common or close eigenvalues.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Bo Kagstrom and Peter Poromaa, Department of Computing Science,
+*     Umea University, S-901 87 Umea, Sweden.
+*
+*  [1] B. Kagstrom and P. Poromaa, LAPACK-Style Algorithms and Software
+*      for Solving the Generalized Sylvester Equation and Estimating the
+*      Separation between Regular Matrix Pairs, Report UMINF - 93.23,
+*      Department of Computing Science, Umea University, S-901 87 Umea,
+*      Sweden, December 1993, Revised April 1994, Also as LAPACK Working
+*      Note 75.  To appear in ACM Trans. on Math. Software, Vol 22,
+*      No 1, 1996.
+*
+*  [2] B. Kagstrom, A Perturbation Analysis of the Generalized Sylvester
+*      Equation (AR - LB, DR - LE ) = (C, F), SIAM J. Matrix Anal.
+*      Appl., 15(4):1045-1060, 1994
+*
+*  [3] B. Kagstrom and L. Westin, Generalized Schur Methods with
+*      Condition Estimators for Solving the Generalized Sylvester
+*      Equation, IEEE Transactions on Automatic Control, Vol. 34, No. 7,
+*      July 1989, pp 745-751.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY, NOTRAN
+      INTEGER            I, IE, IFUNC, IROUND, IS, ISOLVE, J, JE, JS, K,
+     $                   LINFO, LWMIN, MB, NB, P, PPQQ, PQ, Q
+      DOUBLE PRECISION   DSCALE, DSUM, SCALE2, SCALOC
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           LSAME, ILAENV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DCOPY, DGEMM, DLACPY, DSCAL, DTGSY2, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          DBLE, MAX, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode and test input parameters
+*
+      INFO = 0
+      NOTRAN = LSAME( TRANS, 'N' )
+      LQUERY = ( LWORK.EQ.-1 )
+*
+      IF( ( IJOB.EQ.1 .OR. IJOB.EQ.2 ) .AND. NOTRAN ) THEN
+         LWMIN = MAX( 1, 2*M*N )
+      ELSE
+         LWMIN = 1
+      END IF
+*
+      IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN
+         INFO = -1
+      ELSE IF( ( IJOB.LT.0 ) .OR. ( IJOB.GT.4 ) ) THEN
+         INFO = -2
+      ELSE IF( M.LE.0 ) THEN
+         INFO = -3
+      ELSE IF( N.LE.0 ) THEN
+         INFO = -4
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -6
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -8
+      ELSE IF( LDC.LT.MAX( 1, M ) ) THEN
+         INFO = -10
+      ELSE IF( LDD.LT.MAX( 1, M ) ) THEN
+         INFO = -12
+      ELSE IF( LDE.LT.MAX( 1, N ) ) THEN
+         INFO = -14
+      ELSE IF( LDF.LT.MAX( 1, M ) ) THEN
+         INFO = -16
+      ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -20
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         WORK( 1 ) = LWMIN
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DTGSYL', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Determine optimal block sizes MB and NB
+*
+      MB = ILAENV( 2, 'DTGSYL', TRANS, M, N, -1, -1 )
+      NB = ILAENV( 5, 'DTGSYL', TRANS, M, N, -1, -1 )
+*
+      ISOLVE = 1
+      IFUNC = 0
+      IF( IJOB.GE.3 .AND. NOTRAN ) THEN
+         IFUNC = IJOB - 2
+         DO 10 J = 1, N
+            CALL DCOPY( M, ZERO, 0, C( 1, J ), 1 )
+            CALL DCOPY( M, ZERO, 0, F( 1, J ), 1 )
+   10    CONTINUE
+      ELSE IF( IJOB.GE.1 .AND. NOTRAN ) THEN
+         ISOLVE = 2
+      END IF
+*
+      IF( ( MB.LE.1 .AND. NB.LE.1 ) .OR. ( MB.GE.M .AND. NB.GE.N ) )
+     $     THEN
+*
+         DO 30 IROUND = 1, ISOLVE
+*
+*           Use unblocked Level 2 solver
+*
+            DSCALE = ZERO
+            DSUM = ONE
+            PQ = 0
+            CALL DTGSY2( TRANS, IFUNC, M, N, A, LDA, B, LDB, C, LDC, D,
+     $                   LDD, E, LDE, F, LDF, SCALE, DSUM, DSCALE,
+     $                   IWORK, PQ, INFO )
+            IF( DSCALE.NE.ZERO ) THEN
+               IF( IJOB.EQ.1 .OR. IJOB.EQ.3 ) THEN
+                  DIF = SQRT( DBLE( 2*M*N ) ) / ( DSCALE*SQRT( DSUM ) )
+               ELSE
+                  DIF = SQRT( DBLE( PQ ) ) / ( DSCALE*SQRT( DSUM ) )
+               END IF
+            END IF
+*
+            IF( ISOLVE.EQ.2 .AND. IROUND.EQ.1 ) THEN
+               IFUNC = IJOB
+               SCALE2 = SCALE
+               CALL DLACPY( 'F', M, N, C, LDC, WORK, M )
+               CALL DLACPY( 'F', M, N, F, LDF, WORK( M*N+1 ), M )
+               DO 20 J = 1, N
+                  CALL DCOPY( M, ZERO, 0, C( 1, J ), 1 )
+                  CALL DCOPY( M, ZERO, 0, F( 1, J ), 1 )
+   20          CONTINUE
+            ELSE IF( ISOLVE.EQ.2 .AND. IROUND.EQ.2 ) THEN
+               CALL DLACPY( 'F', M, N, WORK, M, C, LDC )
+               CALL DLACPY( 'F', M, N, WORK( M*N+1 ), M, F, LDF )
+               SCALE = SCALE2
+            END IF
+   30    CONTINUE
+*
+         RETURN
+      END IF
+*
+*     Determine block structure of A
+*
+      P = 0
+      I = 1
+   40 CONTINUE
+      IF( I.GT.M )
+     $   GO TO 50
+      P = P + 1
+      IWORK( P ) = I
+      I = I + MB
+      IF( I.GE.M )
+     $   GO TO 50
+      IF( A( I, I-1 ).NE.ZERO )
+     $   I = I + 1
+      GO TO 40
+   50 CONTINUE
+*
+      IWORK( P+1 ) = M + 1
+      IF( IWORK( P ).EQ.IWORK( P+1 ) )
+     $   P = P - 1
+*
+*     Determine block structure of B
+*
+      Q = P + 1
+      J = 1
+   60 CONTINUE
+      IF( J.GT.N )
+     $   GO TO 70
+      Q = Q + 1
+      IWORK( Q ) = J
+      J = J + NB
+      IF( J.GE.N )
+     $   GO TO 70
+      IF( B( J, J-1 ).NE.ZERO )
+     $   J = J + 1
+      GO TO 60
+   70 CONTINUE
+*
+      IWORK( Q+1 ) = N + 1
+      IF( IWORK( Q ).EQ.IWORK( Q+1 ) )
+     $   Q = Q - 1
+*
+      IF( NOTRAN ) THEN
+*
+         DO 150 IROUND = 1, ISOLVE
+*
+*           Solve (I, J)-subsystem
+*               A(I, I) * R(I, J) - L(I, J) * B(J, J) = C(I, J)
+*               D(I, I) * R(I, J) - L(I, J) * E(J, J) = F(I, J)
+*           for I = P, P - 1,..., 1; J = 1, 2,..., Q
+*
+            DSCALE = ZERO
+            DSUM = ONE
+            PQ = 0
+            SCALE = ONE
+            DO 130 J = P + 2, Q
+               JS = IWORK( J )
+               JE = IWORK( J+1 ) - 1
+               NB = JE - JS + 1
+               DO 120 I = P, 1, -1
+                  IS = IWORK( I )
+                  IE = IWORK( I+1 ) - 1
+                  MB = IE - IS + 1
+                  PPQQ = 0
+                  CALL DTGSY2( TRANS, IFUNC, MB, NB, A( IS, IS ), LDA,
+     $                         B( JS, JS ), LDB, C( IS, JS ), LDC,
+     $                         D( IS, IS ), LDD, E( JS, JS ), LDE,
+     $                         F( IS, JS ), LDF, SCALOC, DSUM, DSCALE,
+     $                         IWORK( Q+2 ), PPQQ, LINFO )
+                  IF( LINFO.GT.0 )
+     $               INFO = LINFO
+*
+                  PQ = PQ + PPQQ
+                  IF( SCALOC.NE.ONE ) THEN
+                     DO 80 K = 1, JS - 1
+                        CALL DSCAL( M, SCALOC, C( 1, K ), 1 )
+                        CALL DSCAL( M, SCALOC, F( 1, K ), 1 )
+   80                CONTINUE
+                     DO 90 K = JS, JE
+                        CALL DSCAL( IS-1, SCALOC, C( 1, K ), 1 )
+                        CALL DSCAL( IS-1, SCALOC, F( 1, K ), 1 )
+   90                CONTINUE
+                     DO 100 K = JS, JE
+                        CALL DSCAL( M-IE, SCALOC, C( IE+1, K ), 1 )
+                        CALL DSCAL( M-IE, SCALOC, F( IE+1, K ), 1 )
+  100                CONTINUE
+                     DO 110 K = JE + 1, N
+                        CALL DSCAL( M, SCALOC, C( 1, K ), 1 )
+                        CALL DSCAL( M, SCALOC, F( 1, K ), 1 )
+  110                CONTINUE
+                     SCALE = SCALE*SCALOC
+                  END IF
+*
+*                 Substitute R(I, J) and L(I, J) into remaining
+*                 equation.
+*
+                  IF( I.GT.1 ) THEN
+                     CALL DGEMM( 'N', 'N', IS-1, NB, MB, -ONE,
+     $                           A( 1, IS ), LDA, C( IS, JS ), LDC, ONE,
+     $                           C( 1, JS ), LDC )
+                     CALL DGEMM( 'N', 'N', IS-1, NB, MB, -ONE,
+     $                           D( 1, IS ), LDD, C( IS, JS ), LDC, ONE,
+     $                           F( 1, JS ), LDF )
+                  END IF
+                  IF( J.LT.Q ) THEN
+                     CALL DGEMM( 'N', 'N', MB, N-JE, NB, ONE,
+     $                           F( IS, JS ), LDF, B( JS, JE+1 ), LDB,
+     $                           ONE, C( IS, JE+1 ), LDC )
+                     CALL DGEMM( 'N', 'N', MB, N-JE, NB, ONE,
+     $                           F( IS, JS ), LDF, E( JS, JE+1 ), LDE,
+     $                           ONE, F( IS, JE+1 ), LDF )
+                  END IF
+  120          CONTINUE
+  130       CONTINUE
+            IF( DSCALE.NE.ZERO ) THEN
+               IF( IJOB.EQ.1 .OR. IJOB.EQ.3 ) THEN
+                  DIF = SQRT( DBLE( 2*M*N ) ) / ( DSCALE*SQRT( DSUM ) )
+               ELSE
+                  DIF = SQRT( DBLE( PQ ) ) / ( DSCALE*SQRT( DSUM ) )
+               END IF
+            END IF
+            IF( ISOLVE.EQ.2 .AND. IROUND.EQ.1 ) THEN
+               IFUNC = IJOB
+               SCALE2 = SCALE
+               CALL DLACPY( 'F', M, N, C, LDC, WORK, M )
+               CALL DLACPY( 'F', M, N, F, LDF, WORK( M*N+1 ), M )
+               DO 140 J = 1, N
+                  CALL DCOPY( M, ZERO, 0, C( 1, J ), 1 )
+                  CALL DCOPY( M, ZERO, 0, F( 1, J ), 1 )
+  140          CONTINUE
+            ELSE IF( ISOLVE.EQ.2 .AND. IROUND.EQ.2 ) THEN
+               CALL DLACPY( 'F', M, N, WORK, M, C, LDC )
+               CALL DLACPY( 'F', M, N, WORK( M*N+1 ), M, F, LDF )
+               SCALE = SCALE2
+            END IF
+  150    CONTINUE
+*
+      ELSE
+*
+*        Solve transposed (I, J)-subsystem
+*             A(I, I)' * R(I, J)  + D(I, I)' * L(I, J)  =  C(I, J)
+*             R(I, J)  * B(J, J)' + L(I, J)  * E(J, J)' = -F(I, J)
+*        for I = 1,2,..., P; J = Q, Q-1,..., 1
+*
+         SCALE = ONE
+         DO 210 I = 1, P
+            IS = IWORK( I )
+            IE = IWORK( I+1 ) - 1
+            MB = IE - IS + 1
+            DO 200 J = Q, P + 2, -1
+               JS = IWORK( J )
+               JE = IWORK( J+1 ) - 1
+               NB = JE - JS + 1
+               CALL DTGSY2( TRANS, IFUNC, MB, NB, A( IS, IS ), LDA,
+     $                      B( JS, JS ), LDB, C( IS, JS ), LDC,
+     $                      D( IS, IS ), LDD, E( JS, JS ), LDE,
+     $                      F( IS, JS ), LDF, SCALOC, DSUM, DSCALE,
+     $                      IWORK( Q+2 ), PPQQ, LINFO )
+               IF( LINFO.GT.0 )
+     $            INFO = LINFO
+               IF( SCALOC.NE.ONE ) THEN
+                  DO 160 K = 1, JS - 1
+                     CALL DSCAL( M, SCALOC, C( 1, K ), 1 )
+                     CALL DSCAL( M, SCALOC, F( 1, K ), 1 )
+  160             CONTINUE
+                  DO 170 K = JS, JE
+                     CALL DSCAL( IS-1, SCALOC, C( 1, K ), 1 )
+                     CALL DSCAL( IS-1, SCALOC, F( 1, K ), 1 )
+  170             CONTINUE
+                  DO 180 K = JS, JE
+                     CALL DSCAL( M-IE, SCALOC, C( IE+1, K ), 1 )
+                     CALL DSCAL( M-IE, SCALOC, F( IE+1, K ), 1 )
+  180             CONTINUE
+                  DO 190 K = JE + 1, N
+                     CALL DSCAL( M, SCALOC, C( 1, K ), 1 )
+                     CALL DSCAL( M, SCALOC, F( 1, K ), 1 )
+  190             CONTINUE
+                  SCALE = SCALE*SCALOC
+               END IF
+*
+*              Substitute R(I, J) and L(I, J) into remaining equation.
+*
+               IF( J.GT.P+2 ) THEN
+                  CALL DGEMM( 'N', 'T', MB, JS-1, NB, ONE, C( IS, JS ),
+     $                        LDC, B( 1, JS ), LDB, ONE, F( IS, 1 ),
+     $                        LDF )
+                  CALL DGEMM( 'N', 'T', MB, JS-1, NB, ONE, F( IS, JS ),
+     $                        LDF, E( 1, JS ), LDE, ONE, F( IS, 1 ),
+     $                        LDF )
+               END IF
+               IF( I.LT.P ) THEN
+                  CALL DGEMM( 'T', 'N', M-IE, NB, MB, -ONE,
+     $                        A( IS, IE+1 ), LDA, C( IS, JS ), LDC, ONE,
+     $                        C( IE+1, JS ), LDC )
+                  CALL DGEMM( 'T', 'N', M-IE, NB, MB, -ONE,
+     $                        D( IS, IE+1 ), LDD, F( IS, JS ), LDF, ONE,
+     $                        C( IE+1, JS ), LDC )
+               END IF
+  200       CONTINUE
+  210    CONTINUE
+*
+      END IF
+*
+      WORK( 1 ) = LWMIN
+*
+      RETURN
+*
+*     End of DTGSYL
+*
+      END
diff --git a/costa/native/external/lapack/dtpcon.f b/costa/native/external/lapack/dtpcon.f
new file mode 100644
index 000000000..2d66707db
--- /dev/null
+++ b/costa/native/external/lapack/dtpcon.f
@@ -0,0 +1,187 @@
+      SUBROUTINE DTPCON( NORM, UPLO, DIAG, N, AP, RCOND, WORK, IWORK,
+     $                   INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIAG, NORM, UPLO
+      INTEGER            INFO, N
+      DOUBLE PRECISION   RCOND
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      DOUBLE PRECISION   AP( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DTPCON estimates the reciprocal of the condition number of a packed
+*  triangular matrix A, in either the 1-norm or the infinity-norm.
+*
+*  The norm of A is computed and an estimate is obtained for
+*  norm(inv(A)), then the reciprocal of the condition number is
+*  computed as
+*     RCOND = 1 / ( norm(A) * norm(inv(A)) ).
+*
+*  Arguments
+*  =========
+*
+*  NORM    (input) CHARACTER*1
+*          Specifies whether the 1-norm condition number or the
+*          infinity-norm condition number is required:
+*          = '1' or 'O':  1-norm;
+*          = 'I':         Infinity-norm.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  A is upper triangular;
+*          = 'L':  A is lower triangular.
+*
+*  DIAG    (input) CHARACTER*1
+*          = 'N':  A is non-unit triangular;
+*          = 'U':  A is unit triangular.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  AP      (input) DOUBLE PRECISION array, dimension (N*(N+1)/2)
+*          The upper or lower triangular matrix A, packed columnwise in
+*          a linear array.  The j-th column of A is stored in the array
+*          AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n.
+*          If DIAG = 'U', the diagonal elements of A are not referenced
+*          and are assumed to be 1.
+*
+*  RCOND   (output) DOUBLE PRECISION
+*          The reciprocal of the condition number of the matrix A,
+*          computed as RCOND = 1/(norm(A) * norm(inv(A))).
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (3*N)
+*
+*  IWORK   (workspace) INTEGER array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOUNIT, ONENRM, UPPER
+      CHARACTER          NORMIN
+      INTEGER            IX, KASE, KASE1
+      DOUBLE PRECISION   AINVNM, ANORM, SCALE, SMLNUM, XNORM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            IDAMAX
+      DOUBLE PRECISION   DLAMCH, DLANTP
+      EXTERNAL           LSAME, IDAMAX, DLAMCH, DLANTP
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLACON, DLATPS, DRSCL, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      ONENRM = NORM.EQ.'1' .OR. LSAME( NORM, 'O' )
+      NOUNIT = LSAME( DIAG, 'N' )
+*
+      IF( .NOT.ONENRM .AND. .NOT.LSAME( NORM, 'I' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DTPCON', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 ) THEN
+         RCOND = ONE
+         RETURN
+      END IF
+*
+      RCOND = ZERO
+      SMLNUM = DLAMCH( 'Safe minimum' )*DBLE( MAX( 1, N ) )
+*
+*     Compute the norm of the triangular matrix A.
+*
+      ANORM = DLANTP( NORM, UPLO, DIAG, N, AP, WORK )
+*
+*     Continue only if ANORM > 0.
+*
+      IF( ANORM.GT.ZERO ) THEN
+*
+*        Estimate the norm of the inverse of A.
+*
+         AINVNM = ZERO
+         NORMIN = 'N'
+         IF( ONENRM ) THEN
+            KASE1 = 1
+         ELSE
+            KASE1 = 2
+         END IF
+         KASE = 0
+   10    CONTINUE
+         CALL DLACON( N, WORK( N+1 ), WORK, IWORK, AINVNM, KASE )
+         IF( KASE.NE.0 ) THEN
+            IF( KASE.EQ.KASE1 ) THEN
+*
+*              Multiply by inv(A).
+*
+               CALL DLATPS( UPLO, 'No transpose', DIAG, NORMIN, N, AP,
+     $                      WORK, SCALE, WORK( 2*N+1 ), INFO )
+            ELSE
+*
+*              Multiply by inv(A').
+*
+               CALL DLATPS( UPLO, 'Transpose', DIAG, NORMIN, N, AP,
+     $                      WORK, SCALE, WORK( 2*N+1 ), INFO )
+            END IF
+            NORMIN = 'Y'
+*
+*           Multiply by 1/SCALE if doing so will not cause overflow.
+*
+            IF( SCALE.NE.ONE ) THEN
+               IX = IDAMAX( N, WORK, 1 )
+               XNORM = ABS( WORK( IX ) )
+               IF( SCALE.LT.XNORM*SMLNUM .OR. SCALE.EQ.ZERO )
+     $            GO TO 20
+               CALL DRSCL( N, SCALE, WORK, 1 )
+            END IF
+            GO TO 10
+         END IF
+*
+*        Compute the estimate of the reciprocal condition number.
+*
+         IF( AINVNM.NE.ZERO )
+     $      RCOND = ( ONE / ANORM ) / AINVNM
+      END IF
+*
+   20 CONTINUE
+      RETURN
+*
+*     End of DTPCON
+*
+      END
diff --git a/costa/native/external/lapack/dtprfs.f b/costa/native/external/lapack/dtprfs.f
new file mode 100644
index 000000000..da514cac7
--- /dev/null
+++ b/costa/native/external/lapack/dtprfs.f
@@ -0,0 +1,375 @@
+      SUBROUTINE DTPRFS( UPLO, TRANS, DIAG, N, NRHS, AP, B, LDB, X, LDX,
+     $                   FERR, BERR, WORK, IWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIAG, TRANS, UPLO
+      INTEGER            INFO, LDB, LDX, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      DOUBLE PRECISION   AP( * ), B( LDB, * ), BERR( * ), FERR( * ),
+     $                   WORK( * ), X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DTPRFS provides error bounds and backward error estimates for the
+*  solution to a system of linear equations with a triangular packed
+*  coefficient matrix.
+*
+*  The solution matrix X must be computed by DTPTRS or some other
+*  means before entering this routine.  DTPRFS does not do iterative
+*  refinement because doing so cannot improve the backward error.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  A is upper triangular;
+*          = 'L':  A is lower triangular.
+*
+*  TRANS   (input) CHARACTER*1
+*          Specifies the form of the system of equations:
+*          = 'N':  A * X = B  (No transpose)
+*          = 'T':  A**T * X = B  (Transpose)
+*          = 'C':  A**H * X = B  (Conjugate transpose = Transpose)
+*
+*  DIAG    (input) CHARACTER*1
+*          = 'N':  A is non-unit triangular;
+*          = 'U':  A is unit triangular.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrices B and X.  NRHS >= 0.
+*
+*  AP      (input) DOUBLE PRECISION array, dimension (N*(N+1)/2)
+*          The upper or lower triangular matrix A, packed columnwise in
+*          a linear array.  The j-th column of A is stored in the array
+*          AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n.
+*          If DIAG = 'U', the diagonal elements of A are not referenced
+*          and are assumed to be 1.
+*
+*  B       (input) DOUBLE PRECISION array, dimension (LDB,NRHS)
+*          The right hand side matrix B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  X       (input) DOUBLE PRECISION array, dimension (LDX,NRHS)
+*          The solution matrix X.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(1,N).
+*
+*  FERR    (output) DOUBLE PRECISION array, dimension (NRHS)
+*          The estimated forward error bound for each solution vector
+*          X(j) (the j-th column of the solution matrix X).
+*          If XTRUE is the true solution corresponding to X(j), FERR(j)
+*          is an estimated upper bound for the magnitude of the largest
+*          element in (X(j) - XTRUE) divided by the magnitude of the
+*          largest element in X(j).  The estimate is as reliable as
+*          the estimate for RCOND, and is almost always a slight
+*          overestimate of the true error.
+*
+*  BERR    (output) DOUBLE PRECISION array, dimension (NRHS)
+*          The componentwise relative backward error of each solution
+*          vector X(j) (i.e., the smallest relative change in
+*          any element of A or B that makes X(j) an exact solution).
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (3*N)
+*
+*  IWORK   (workspace) INTEGER array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO
+      PARAMETER          ( ZERO = 0.0D+0 )
+      DOUBLE PRECISION   ONE
+      PARAMETER          ( ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOTRAN, NOUNIT, UPPER
+      CHARACTER          TRANST
+      INTEGER            I, J, K, KASE, KC, NZ
+      DOUBLE PRECISION   EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DAXPY, DCOPY, DLACON, DTPMV, DTPSV, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DLAMCH
+      EXTERNAL           LSAME, DLAMCH
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      NOTRAN = LSAME( TRANS, 'N' )
+      NOUNIT = LSAME( DIAG, 'N' )
+*
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT.
+     $         LSAME( TRANS, 'C' ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -8
+      ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+         INFO = -10
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DTPRFS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN
+         DO 10 J = 1, NRHS
+            FERR( J ) = ZERO
+            BERR( J ) = ZERO
+   10    CONTINUE
+         RETURN
+      END IF
+*
+      IF( NOTRAN ) THEN
+         TRANST = 'T'
+      ELSE
+         TRANST = 'N'
+      END IF
+*
+*     NZ = maximum number of nonzero elements in each row of A, plus 1
+*
+      NZ = N + 1
+      EPS = DLAMCH( 'Epsilon' )
+      SAFMIN = DLAMCH( 'Safe minimum' )
+      SAFE1 = NZ*SAFMIN
+      SAFE2 = SAFE1 / EPS
+*
+*     Do for each right hand side
+*
+      DO 250 J = 1, NRHS
+*
+*        Compute residual R = B - op(A) * X,
+*        where op(A) = A or A', depending on TRANS.
+*
+         CALL DCOPY( N, X( 1, J ), 1, WORK( N+1 ), 1 )
+         CALL DTPMV( UPLO, TRANS, DIAG, N, AP, WORK( N+1 ), 1 )
+         CALL DAXPY( N, -ONE, B( 1, J ), 1, WORK( N+1 ), 1 )
+*
+*        Compute componentwise relative backward error from formula
+*
+*        max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) )
+*
+*        where abs(Z) is the componentwise absolute value of the matrix
+*        or vector Z.  If the i-th component of the denominator is less
+*        than SAFE2, then SAFE1 is added to the i-th components of the
+*        numerator and denominator before dividing.
+*
+         DO 20 I = 1, N
+            WORK( I ) = ABS( B( I, J ) )
+   20    CONTINUE
+*
+         IF( NOTRAN ) THEN
+*
+*           Compute abs(A)*abs(X) + abs(B).
+*
+            IF( UPPER ) THEN
+               KC = 1
+               IF( NOUNIT ) THEN
+                  DO 40 K = 1, N
+                     XK = ABS( X( K, J ) )
+                     DO 30 I = 1, K
+                        WORK( I ) = WORK( I ) + ABS( AP( KC+I-1 ) )*XK
+   30                CONTINUE
+                     KC = KC + K
+   40             CONTINUE
+               ELSE
+                  DO 60 K = 1, N
+                     XK = ABS( X( K, J ) )
+                     DO 50 I = 1, K - 1
+                        WORK( I ) = WORK( I ) + ABS( AP( KC+I-1 ) )*XK
+   50                CONTINUE
+                     WORK( K ) = WORK( K ) + XK
+                     KC = KC + K
+   60             CONTINUE
+               END IF
+            ELSE
+               KC = 1
+               IF( NOUNIT ) THEN
+                  DO 80 K = 1, N
+                     XK = ABS( X( K, J ) )
+                     DO 70 I = K, N
+                        WORK( I ) = WORK( I ) + ABS( AP( KC+I-K ) )*XK
+   70                CONTINUE
+                     KC = KC + N - K + 1
+   80             CONTINUE
+               ELSE
+                  DO 100 K = 1, N
+                     XK = ABS( X( K, J ) )
+                     DO 90 I = K + 1, N
+                        WORK( I ) = WORK( I ) + ABS( AP( KC+I-K ) )*XK
+   90                CONTINUE
+                     WORK( K ) = WORK( K ) + XK
+                     KC = KC + N - K + 1
+  100             CONTINUE
+               END IF
+            END IF
+         ELSE
+*
+*           Compute abs(A')*abs(X) + abs(B).
+*
+            IF( UPPER ) THEN
+               KC = 1
+               IF( NOUNIT ) THEN
+                  DO 120 K = 1, N
+                     S = ZERO
+                     DO 110 I = 1, K
+                        S = S + ABS( AP( KC+I-1 ) )*ABS( X( I, J ) )
+  110                CONTINUE
+                     WORK( K ) = WORK( K ) + S
+                     KC = KC + K
+  120             CONTINUE
+               ELSE
+                  DO 140 K = 1, N
+                     S = ABS( X( K, J ) )
+                     DO 130 I = 1, K - 1
+                        S = S + ABS( AP( KC+I-1 ) )*ABS( X( I, J ) )
+  130                CONTINUE
+                     WORK( K ) = WORK( K ) + S
+                     KC = KC + K
+  140             CONTINUE
+               END IF
+            ELSE
+               KC = 1
+               IF( NOUNIT ) THEN
+                  DO 160 K = 1, N
+                     S = ZERO
+                     DO 150 I = K, N
+                        S = S + ABS( AP( KC+I-K ) )*ABS( X( I, J ) )
+  150                CONTINUE
+                     WORK( K ) = WORK( K ) + S
+                     KC = KC + N - K + 1
+  160             CONTINUE
+               ELSE
+                  DO 180 K = 1, N
+                     S = ABS( X( K, J ) )
+                     DO 170 I = K + 1, N
+                        S = S + ABS( AP( KC+I-K ) )*ABS( X( I, J ) )
+  170                CONTINUE
+                     WORK( K ) = WORK( K ) + S
+                     KC = KC + N - K + 1
+  180             CONTINUE
+               END IF
+            END IF
+         END IF
+         S = ZERO
+         DO 190 I = 1, N
+            IF( WORK( I ).GT.SAFE2 ) THEN
+               S = MAX( S, ABS( WORK( N+I ) ) / WORK( I ) )
+            ELSE
+               S = MAX( S, ( ABS( WORK( N+I ) )+SAFE1 ) /
+     $             ( WORK( I )+SAFE1 ) )
+            END IF
+  190    CONTINUE
+         BERR( J ) = S
+*
+*        Bound error from formula
+*
+*        norm(X - XTRUE) / norm(X) .le. FERR =
+*        norm( abs(inv(op(A)))*
+*           ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X)
+*
+*        where
+*          norm(Z) is the magnitude of the largest component of Z
+*          inv(op(A)) is the inverse of op(A)
+*          abs(Z) is the componentwise absolute value of the matrix or
+*             vector Z
+*          NZ is the maximum number of nonzeros in any row of A, plus 1
+*          EPS is machine epsilon
+*
+*        The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B))
+*        is incremented by SAFE1 if the i-th component of
+*        abs(op(A))*abs(X) + abs(B) is less than SAFE2.
+*
+*        Use DLACON to estimate the infinity-norm of the matrix
+*           inv(op(A)) * diag(W),
+*        where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) )))
+*
+         DO 200 I = 1, N
+            IF( WORK( I ).GT.SAFE2 ) THEN
+               WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I )
+            ELSE
+               WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + SAFE1
+            END IF
+  200    CONTINUE
+*
+         KASE = 0
+  210    CONTINUE
+         CALL DLACON( N, WORK( 2*N+1 ), WORK( N+1 ), IWORK, FERR( J ),
+     $                KASE )
+         IF( KASE.NE.0 ) THEN
+            IF( KASE.EQ.1 ) THEN
+*
+*              Multiply by diag(W)*inv(op(A)').
+*
+               CALL DTPSV( UPLO, TRANST, DIAG, N, AP, WORK( N+1 ), 1 )
+               DO 220 I = 1, N
+                  WORK( N+I ) = WORK( I )*WORK( N+I )
+  220          CONTINUE
+            ELSE
+*
+*              Multiply by inv(op(A))*diag(W).
+*
+               DO 230 I = 1, N
+                  WORK( N+I ) = WORK( I )*WORK( N+I )
+  230          CONTINUE
+               CALL DTPSV( UPLO, TRANS, DIAG, N, AP, WORK( N+1 ), 1 )
+            END IF
+            GO TO 210
+         END IF
+*
+*        Normalize error.
+*
+         LSTRES = ZERO
+         DO 240 I = 1, N
+            LSTRES = MAX( LSTRES, ABS( X( I, J ) ) )
+  240    CONTINUE
+         IF( LSTRES.NE.ZERO )
+     $      FERR( J ) = FERR( J ) / LSTRES
+*
+  250 CONTINUE
+*
+      RETURN
+*
+*     End of DTPRFS
+*
+      END
diff --git a/costa/native/external/lapack/dtptri.f b/costa/native/external/lapack/dtptri.f
new file mode 100644
index 000000000..230620d2d
--- /dev/null
+++ b/costa/native/external/lapack/dtptri.f
@@ -0,0 +1,176 @@
+      SUBROUTINE DTPTRI( UPLO, DIAG, N, AP, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIAG, UPLO
+      INTEGER            INFO, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   AP( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DTPTRI computes the inverse of a real upper or lower triangular
+*  matrix A stored in packed format.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  A is upper triangular;
+*          = 'L':  A is lower triangular.
+*
+*  DIAG    (input) CHARACTER*1
+*          = 'N':  A is non-unit triangular;
+*          = 'U':  A is unit triangular.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  AP      (input/output) DOUBLE PRECISION array, dimension (N*(N+1)/2)
+*          On entry, the upper or lower triangular matrix A, stored
+*          columnwise in a linear array.  The j-th column of A is stored
+*          in the array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*((2*n-j)/2) = A(i,j) for j<=i<=n.
+*          See below for further details.
+*          On exit, the (triangular) inverse of the original matrix, in
+*          the same packed storage format.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, A(i,i) is exactly zero.  The triangular
+*                matrix is singular and its inverse can not be computed.
+*
+*  Further Details
+*  ===============
+*
+*  A triangular matrix A can be transferred to packed storage using one
+*  of the following program segments:
+*
+*  UPLO = 'U':                      UPLO = 'L':
+*
+*        JC = 1                           JC = 1
+*        DO 2 J = 1, N                    DO 2 J = 1, N
+*           DO 1 I = 1, J                    DO 1 I = J, N
+*              AP(JC+I-1) = A(I,J)              AP(JC+I-J) = A(I,J)
+*      1    CONTINUE                    1    CONTINUE
+*           JC = JC + J                      JC = JC + N - J + 1
+*      2 CONTINUE                       2 CONTINUE
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOUNIT, UPPER
+      INTEGER            J, JC, JCLAST, JJ
+      DOUBLE PRECISION   AJJ
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DSCAL, DTPMV, XERBLA
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      NOUNIT = LSAME( DIAG, 'N' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DTPTRI', -INFO )
+         RETURN
+      END IF
+*
+*     Check for singularity if non-unit.
+*
+      IF( NOUNIT ) THEN
+         IF( UPPER ) THEN
+            JJ = 0
+            DO 10 INFO = 1, N
+               JJ = JJ + INFO
+               IF( AP( JJ ).EQ.ZERO )
+     $            RETURN
+   10       CONTINUE
+         ELSE
+            JJ = 1
+            DO 20 INFO = 1, N
+               IF( AP( JJ ).EQ.ZERO )
+     $            RETURN
+               JJ = JJ + N - INFO + 1
+   20       CONTINUE
+         END IF
+         INFO = 0
+      END IF
+*
+      IF( UPPER ) THEN
+*
+*        Compute inverse of upper triangular matrix.
+*
+         JC = 1
+         DO 30 J = 1, N
+            IF( NOUNIT ) THEN
+               AP( JC+J-1 ) = ONE / AP( JC+J-1 )
+               AJJ = -AP( JC+J-1 )
+            ELSE
+               AJJ = -ONE
+            END IF
+*
+*           Compute elements 1:j-1 of j-th column.
+*
+            CALL DTPMV( 'Upper', 'No transpose', DIAG, J-1, AP,
+     $                  AP( JC ), 1 )
+            CALL DSCAL( J-1, AJJ, AP( JC ), 1 )
+            JC = JC + J
+   30    CONTINUE
+*
+      ELSE
+*
+*        Compute inverse of lower triangular matrix.
+*
+         JC = N*( N+1 ) / 2
+         DO 40 J = N, 1, -1
+            IF( NOUNIT ) THEN
+               AP( JC ) = ONE / AP( JC )
+               AJJ = -AP( JC )
+            ELSE
+               AJJ = -ONE
+            END IF
+            IF( J.LT.N ) THEN
+*
+*              Compute elements j+1:n of j-th column.
+*
+               CALL DTPMV( 'Lower', 'No transpose', DIAG, N-J,
+     $                     AP( JCLAST ), AP( JC+1 ), 1 )
+               CALL DSCAL( N-J, AJJ, AP( JC+1 ), 1 )
+            END IF
+            JCLAST = JC
+            JC = JC - N + J - 2
+   40    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of DTPTRI
+*
+      END
diff --git a/costa/native/external/lapack/dtptrs.f b/costa/native/external/lapack/dtptrs.f
new file mode 100644
index 000000000..886901528
--- /dev/null
+++ b/costa/native/external/lapack/dtptrs.f
@@ -0,0 +1,154 @@
+      SUBROUTINE DTPTRS( UPLO, TRANS, DIAG, N, NRHS, AP, B, LDB, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIAG, TRANS, UPLO
+      INTEGER            INFO, LDB, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   AP( * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DTPTRS solves a triangular system of the form
+*
+*     A * X = B  or  A**T * X = B,
+*
+*  where A is a triangular matrix of order N stored in packed format,
+*  and B is an N-by-NRHS matrix.  A check is made to verify that A is
+*  nonsingular.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  A is upper triangular;
+*          = 'L':  A is lower triangular.
+*
+*  TRANS   (input) CHARACTER*1
+*          Specifies the form of the system of equations:
+*          = 'N':  A * X = B  (No transpose)
+*          = 'T':  A**T * X = B  (Transpose)
+*          = 'C':  A**H * X = B  (Conjugate transpose = Transpose)
+*
+*  DIAG    (input) CHARACTER*1
+*          = 'N':  A is non-unit triangular;
+*          = 'U':  A is unit triangular.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  AP      (input) DOUBLE PRECISION array, dimension (N*(N+1)/2)
+*          The upper or lower triangular matrix A, packed columnwise in
+*          a linear array.  The j-th column of A is stored in the array
+*          AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n.
+*
+*  B       (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS)
+*          On entry, the right hand side matrix B.
+*          On exit, if INFO = 0, the solution matrix X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, the i-th diagonal element of A is zero,
+*                indicating that the matrix is singular and the
+*                solutions X have not been computed.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO
+      PARAMETER          ( ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOUNIT, UPPER
+      INTEGER            J, JC
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DTPSV, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      NOUNIT = LSAME( DIAG, 'N' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.LSAME( TRANS, 'N' ) .AND. .NOT.
+     $         LSAME( TRANS, 'T' ) .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -8
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DTPTRS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Check for singularity.
+*
+      IF( NOUNIT ) THEN
+         IF( UPPER ) THEN
+            JC = 1
+            DO 10 INFO = 1, N
+               IF( AP( JC+INFO-1 ).EQ.ZERO )
+     $            RETURN
+               JC = JC + INFO
+   10       CONTINUE
+         ELSE
+            JC = 1
+            DO 20 INFO = 1, N
+               IF( AP( JC ).EQ.ZERO )
+     $            RETURN
+               JC = JC + N - INFO + 1
+   20       CONTINUE
+         END IF
+      END IF
+      INFO = 0
+*
+*     Solve A * x = b  or  A' * x = b.
+*
+      DO 30 J = 1, NRHS
+         CALL DTPSV( UPLO, TRANS, DIAG, N, AP, B( 1, J ), 1 )
+   30 CONTINUE
+*
+      RETURN
+*
+*     End of DTPTRS
+*
+      END
diff --git a/costa/native/external/lapack/dtrcon.f b/costa/native/external/lapack/dtrcon.f
new file mode 100644
index 000000000..8da58c760
--- /dev/null
+++ b/costa/native/external/lapack/dtrcon.f
@@ -0,0 +1,193 @@
+      SUBROUTINE DTRCON( NORM, UPLO, DIAG, N, A, LDA, RCOND, WORK,
+     $                   IWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIAG, NORM, UPLO
+      INTEGER            INFO, LDA, N
+      DOUBLE PRECISION   RCOND
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      DOUBLE PRECISION   A( LDA, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DTRCON estimates the reciprocal of the condition number of a
+*  triangular matrix A, in either the 1-norm or the infinity-norm.
+*
+*  The norm of A is computed and an estimate is obtained for
+*  norm(inv(A)), then the reciprocal of the condition number is
+*  computed as
+*     RCOND = 1 / ( norm(A) * norm(inv(A)) ).
+*
+*  Arguments
+*  =========
+*
+*  NORM    (input) CHARACTER*1
+*          Specifies whether the 1-norm condition number or the
+*          infinity-norm condition number is required:
+*          = '1' or 'O':  1-norm;
+*          = 'I':         Infinity-norm.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  A is upper triangular;
+*          = 'L':  A is lower triangular.
+*
+*  DIAG    (input) CHARACTER*1
+*          = 'N':  A is non-unit triangular;
+*          = 'U':  A is unit triangular.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input) DOUBLE PRECISION array, dimension (LDA,N)
+*          The triangular matrix A.  If UPLO = 'U', the leading N-by-N
+*          upper triangular part of the array A contains the upper
+*          triangular matrix, and the strictly lower triangular part of
+*          A is not referenced.  If UPLO = 'L', the leading N-by-N lower
+*          triangular part of the array A contains the lower triangular
+*          matrix, and the strictly upper triangular part of A is not
+*          referenced.  If DIAG = 'U', the diagonal elements of A are
+*          also not referenced and are assumed to be 1.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  RCOND   (output) DOUBLE PRECISION
+*          The reciprocal of the condition number of the matrix A,
+*          computed as RCOND = 1/(norm(A) * norm(inv(A))).
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (3*N)
+*
+*  IWORK   (workspace) INTEGER array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOUNIT, ONENRM, UPPER
+      CHARACTER          NORMIN
+      INTEGER            IX, KASE, KASE1
+      DOUBLE PRECISION   AINVNM, ANORM, SCALE, SMLNUM, XNORM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            IDAMAX
+      DOUBLE PRECISION   DLAMCH, DLANTR
+      EXTERNAL           LSAME, IDAMAX, DLAMCH, DLANTR
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLACON, DLATRS, DRSCL, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      ONENRM = NORM.EQ.'1' .OR. LSAME( NORM, 'O' )
+      NOUNIT = LSAME( DIAG, 'N' )
+*
+      IF( .NOT.ONENRM .AND. .NOT.LSAME( NORM, 'I' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -6
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DTRCON', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 ) THEN
+         RCOND = ONE
+         RETURN
+      END IF
+*
+      RCOND = ZERO
+      SMLNUM = DLAMCH( 'Safe minimum' )*DBLE( MAX( 1, N ) )
+*
+*     Compute the norm of the triangular matrix A.
+*
+      ANORM = DLANTR( NORM, UPLO, DIAG, N, N, A, LDA, WORK )
+*
+*     Continue only if ANORM > 0.
+*
+      IF( ANORM.GT.ZERO ) THEN
+*
+*        Estimate the norm of the inverse of A.
+*
+         AINVNM = ZERO
+         NORMIN = 'N'
+         IF( ONENRM ) THEN
+            KASE1 = 1
+         ELSE
+            KASE1 = 2
+         END IF
+         KASE = 0
+   10    CONTINUE
+         CALL DLACON( N, WORK( N+1 ), WORK, IWORK, AINVNM, KASE )
+         IF( KASE.NE.0 ) THEN
+            IF( KASE.EQ.KASE1 ) THEN
+*
+*              Multiply by inv(A).
+*
+               CALL DLATRS( UPLO, 'No transpose', DIAG, NORMIN, N, A,
+     $                      LDA, WORK, SCALE, WORK( 2*N+1 ), INFO )
+            ELSE
+*
+*              Multiply by inv(A').
+*
+               CALL DLATRS( UPLO, 'Transpose', DIAG, NORMIN, N, A, LDA,
+     $                      WORK, SCALE, WORK( 2*N+1 ), INFO )
+            END IF
+            NORMIN = 'Y'
+*
+*           Multiply by 1/SCALE if doing so will not cause overflow.
+*
+            IF( SCALE.NE.ONE ) THEN
+               IX = IDAMAX( N, WORK, 1 )
+               XNORM = ABS( WORK( IX ) )
+               IF( SCALE.LT.XNORM*SMLNUM .OR. SCALE.EQ.ZERO )
+     $            GO TO 20
+               CALL DRSCL( N, SCALE, WORK, 1 )
+            END IF
+            GO TO 10
+         END IF
+*
+*        Compute the estimate of the reciprocal condition number.
+*
+         IF( AINVNM.NE.ZERO )
+     $      RCOND = ( ONE / ANORM ) / AINVNM
+      END IF
+*
+   20 CONTINUE
+      RETURN
+*
+*     End of DTRCON
+*
+      END
diff --git a/costa/native/external/lapack/dtrevc.f b/costa/native/external/lapack/dtrevc.f
new file mode 100644
index 000000000..b30f1aab0
--- /dev/null
+++ b/costa/native/external/lapack/dtrevc.f
@@ -0,0 +1,1005 @@
+      SUBROUTINE DTREVC( SIDE, HOWMNY, SELECT, N, T, LDT, VL, LDVL, VR,
+     $                   LDVR, MM, M, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          HOWMNY, SIDE
+      INTEGER            INFO, LDT, LDVL, LDVR, M, MM, N
+*     ..
+*     .. Array Arguments ..
+      LOGICAL            SELECT( * )
+      DOUBLE PRECISION   T( LDT, * ), VL( LDVL, * ), VR( LDVR, * ),
+     $                   WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DTREVC computes some or all of the right and/or left eigenvectors of
+*  a real upper quasi-triangular matrix T.
+*
+*  The right eigenvector x and the left eigenvector y of T corresponding
+*  to an eigenvalue w are defined by:
+*
+*               T*x = w*x,     y'*T = w*y'
+*
+*  where y' denotes the conjugate transpose of the vector y.
+*
+*  If all eigenvectors are requested, the routine may either return the
+*  matrices X and/or Y of right or left eigenvectors of T, or the
+*  products Q*X and/or Q*Y, where Q is an input orthogonal
+*  matrix. If T was obtained from the real-Schur factorization of an
+*  original matrix A = Q*T*Q', then Q*X and Q*Y are the matrices of
+*  right or left eigenvectors of A.
+*
+*  T must be in Schur canonical form (as returned by DHSEQR), that is,
+*  block upper triangular with 1-by-1 and 2-by-2 diagonal blocks; each
+*  2-by-2 diagonal block has its diagonal elements equal and its
+*  off-diagonal elements of opposite sign.  Corresponding to each 2-by-2
+*  diagonal block is a complex conjugate pair of eigenvalues and
+*  eigenvectors; only one eigenvector of the pair is computed, namely
+*  the one corresponding to the eigenvalue with positive imaginary part.
+*
+*  Arguments
+*  =========
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'R':  compute right eigenvectors only;
+*          = 'L':  compute left eigenvectors only;
+*          = 'B':  compute both right and left eigenvectors.
+*
+*  HOWMNY  (input) CHARACTER*1
+*          = 'A':  compute all right and/or left eigenvectors;
+*          = 'B':  compute all right and/or left eigenvectors,
+*                  and backtransform them using the input matrices
+*                  supplied in VR and/or VL;
+*          = 'S':  compute selected right and/or left eigenvectors,
+*                  specified by the logical array SELECT.
+*
+*  SELECT  (input/output) LOGICAL array, dimension (N)
+*          If HOWMNY = 'S', SELECT specifies the eigenvectors to be
+*          computed.
+*          If HOWMNY = 'A' or 'B', SELECT is not referenced.
+*          To select the real eigenvector corresponding to a real
+*          eigenvalue w(j), SELECT(j) must be set to .TRUE..  To select
+*          the complex eigenvector corresponding to a complex conjugate
+*          pair w(j) and w(j+1), either SELECT(j) or SELECT(j+1) must be
+*          set to .TRUE.; then on exit SELECT(j) is .TRUE. and
+*          SELECT(j+1) is .FALSE..
+*
+*  N       (input) INTEGER
+*          The order of the matrix T. N >= 0.
+*
+*  T       (input) DOUBLE PRECISION array, dimension (LDT,N)
+*          The upper quasi-triangular matrix T in Schur canonical form.
+*
+*  LDT     (input) INTEGER
+*          The leading dimension of the array T. LDT >= max(1,N).
+*
+*  VL      (input/output) DOUBLE PRECISION array, dimension (LDVL,MM)
+*          On entry, if SIDE = 'L' or 'B' and HOWMNY = 'B', VL must
+*          contain an N-by-N matrix Q (usually the orthogonal matrix Q
+*          of Schur vectors returned by DHSEQR).
+*          On exit, if SIDE = 'L' or 'B', VL contains:
+*          if HOWMNY = 'A', the matrix Y of left eigenvectors of T;
+*                           VL has the same quasi-lower triangular form
+*                           as T'. If T(i,i) is a real eigenvalue, then
+*                           the i-th column VL(i) of VL  is its
+*                           corresponding eigenvector. If T(i:i+1,i:i+1)
+*                           is a 2-by-2 block whose eigenvalues are
+*                           complex-conjugate eigenvalues of T, then
+*                           VL(i)+sqrt(-1)*VL(i+1) is the complex
+*                           eigenvector corresponding to the eigenvalue
+*                           with positive real part.
+*          if HOWMNY = 'B', the matrix Q*Y;
+*          if HOWMNY = 'S', the left eigenvectors of T specified by
+*                           SELECT, stored consecutively in the columns
+*                           of VL, in the same order as their
+*                           eigenvalues.
+*          A complex eigenvector corresponding to a complex eigenvalue
+*          is stored in two consecutive columns, the first holding the
+*          real part, and the second the imaginary part.
+*          If SIDE = 'R', VL is not referenced.
+*
+*  LDVL    (input) INTEGER
+*          The leading dimension of the array VL.  LDVL >= max(1,N) if
+*          SIDE = 'L' or 'B'; LDVL >= 1 otherwise.
+*
+*  VR      (input/output) DOUBLE PRECISION array, dimension (LDVR,MM)
+*          On entry, if SIDE = 'R' or 'B' and HOWMNY = 'B', VR must
+*          contain an N-by-N matrix Q (usually the orthogonal matrix Q
+*          of Schur vectors returned by DHSEQR).
+*          On exit, if SIDE = 'R' or 'B', VR contains:
+*          if HOWMNY = 'A', the matrix X of right eigenvectors of T;
+*                           VR has the same quasi-upper triangular form
+*                           as T. If T(i,i) is a real eigenvalue, then
+*                           the i-th column VR(i) of VR  is its
+*                           corresponding eigenvector. If T(i:i+1,i:i+1)
+*                           is a 2-by-2 block whose eigenvalues are
+*                           complex-conjugate eigenvalues of T, then
+*                           VR(i)+sqrt(-1)*VR(i+1) is the complex
+*                           eigenvector corresponding to the eigenvalue
+*                           with positive real part.
+*          if HOWMNY = 'B', the matrix Q*X;
+*          if HOWMNY = 'S', the right eigenvectors of T specified by
+*                           SELECT, stored consecutively in the columns
+*                           of VR, in the same order as their
+*                           eigenvalues.
+*          A complex eigenvector corresponding to a complex eigenvalue
+*          is stored in two consecutive columns, the first holding the
+*          real part and the second the imaginary part.
+*          If SIDE = 'L', VR is not referenced.
+*
+*  LDVR    (input) INTEGER
+*          The leading dimension of the array VR.  LDVR >= max(1,N) if
+*          SIDE = 'R' or 'B'; LDVR >= 1 otherwise.
+*
+*  MM      (input) INTEGER
+*          The number of columns in the arrays VL and/or VR. MM >= M.
+*
+*  M       (output) INTEGER
+*          The number of columns in the arrays VL and/or VR actually
+*          used to store the eigenvectors.
+*          If HOWMNY = 'A' or 'B', M is set to N.
+*          Each selected real eigenvector occupies one column and each
+*          selected complex eigenvector occupies two columns.
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (3*N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Further Details
+*  ===============
+*
+*  The algorithm used in this program is basically backward (forward)
+*  substitution, with scaling to make the the code robust against
+*  possible overflow.
+*
+*  Each eigenvector is normalized so that the element of largest
+*  magnitude has magnitude 1; here the magnitude of a complex number
+*  (x,y) is taken to be |x| + |y|.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            ALLV, BOTHV, LEFTV, OVER, PAIR, RIGHTV, SOMEV
+      INTEGER            I, IERR, II, IP, IS, J, J1, J2, JNXT, K, KI, N2
+      DOUBLE PRECISION   BETA, BIGNUM, EMAX, OVFL, REC, REMAX, SCALE,
+     $                   SMIN, SMLNUM, ULP, UNFL, VCRIT, VMAX, WI, WR,
+     $                   XNORM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            IDAMAX
+      DOUBLE PRECISION   DDOT, DLAMCH
+      EXTERNAL           LSAME, IDAMAX, DDOT, DLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DAXPY, DCOPY, DGEMV, DLALN2, DSCAL, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, SQRT
+*     ..
+*     .. Local Arrays ..
+      DOUBLE PRECISION   X( 2, 2 )
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode and test the input parameters
+*
+      BOTHV = LSAME( SIDE, 'B' )
+      RIGHTV = LSAME( SIDE, 'R' ) .OR. BOTHV
+      LEFTV = LSAME( SIDE, 'L' ) .OR. BOTHV
+*
+      ALLV = LSAME( HOWMNY, 'A' )
+      OVER = LSAME( HOWMNY, 'B' )
+      SOMEV = LSAME( HOWMNY, 'S' )
+*
+      INFO = 0
+      IF( .NOT.RIGHTV .AND. .NOT.LEFTV ) THEN
+         INFO = -1
+      ELSE IF( .NOT.ALLV .AND. .NOT.OVER .AND. .NOT.SOMEV ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDT.LT.MAX( 1, N ) ) THEN
+         INFO = -6
+      ELSE IF( LDVL.LT.1 .OR. ( LEFTV .AND. LDVL.LT.N ) ) THEN
+         INFO = -8
+      ELSE IF( LDVR.LT.1 .OR. ( RIGHTV .AND. LDVR.LT.N ) ) THEN
+         INFO = -10
+      ELSE
+*
+*        Set M to the number of columns required to store the selected
+*        eigenvectors, standardize the array SELECT if necessary, and
+*        test MM.
+*
+         IF( SOMEV ) THEN
+            M = 0
+            PAIR = .FALSE.
+            DO 10 J = 1, N
+               IF( PAIR ) THEN
+                  PAIR = .FALSE.
+                  SELECT( J ) = .FALSE.
+               ELSE
+                  IF( J.LT.N ) THEN
+                     IF( T( J+1, J ).EQ.ZERO ) THEN
+                        IF( SELECT( J ) )
+     $                     M = M + 1
+                     ELSE
+                        PAIR = .TRUE.
+                        IF( SELECT( J ) .OR. SELECT( J+1 ) ) THEN
+                           SELECT( J ) = .TRUE.
+                           M = M + 2
+                        END IF
+                     END IF
+                  ELSE
+                     IF( SELECT( N ) )
+     $                  M = M + 1
+                  END IF
+               END IF
+   10       CONTINUE
+         ELSE
+            M = N
+         END IF
+*
+         IF( MM.LT.M ) THEN
+            INFO = -11
+         END IF
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DTREVC', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Set the constants to control overflow.
+*
+      UNFL = DLAMCH( 'Safe minimum' )
+      OVFL = ONE / UNFL
+      CALL DLABAD( UNFL, OVFL )
+      ULP = DLAMCH( 'Precision' )
+      SMLNUM = UNFL*( N / ULP )
+      BIGNUM = ( ONE-ULP ) / SMLNUM
+*
+*     Compute 1-norm of each column of strictly upper triangular
+*     part of T to control overflow in triangular solver.
+*
+      WORK( 1 ) = ZERO
+      DO 30 J = 2, N
+         WORK( J ) = ZERO
+         DO 20 I = 1, J - 1
+            WORK( J ) = WORK( J ) + ABS( T( I, J ) )
+   20    CONTINUE
+   30 CONTINUE
+*
+*     Index IP is used to specify the real or complex eigenvalue:
+*       IP = 0, real eigenvalue,
+*            1, first of conjugate complex pair: (wr,wi)
+*           -1, second of conjugate complex pair: (wr,wi)
+*
+      N2 = 2*N
+*
+      IF( RIGHTV ) THEN
+*
+*        Compute right eigenvectors.
+*
+         IP = 0
+         IS = M
+         DO 140 KI = N, 1, -1
+*
+            IF( IP.EQ.1 )
+     $         GO TO 130
+            IF( KI.EQ.1 )
+     $         GO TO 40
+            IF( T( KI, KI-1 ).EQ.ZERO )
+     $         GO TO 40
+            IP = -1
+*
+   40       CONTINUE
+            IF( SOMEV ) THEN
+               IF( IP.EQ.0 ) THEN
+                  IF( .NOT.SELECT( KI ) )
+     $               GO TO 130
+               ELSE
+                  IF( .NOT.SELECT( KI-1 ) )
+     $               GO TO 130
+               END IF
+            END IF
+*
+*           Compute the KI-th eigenvalue (WR,WI).
+*
+            WR = T( KI, KI )
+            WI = ZERO
+            IF( IP.NE.0 )
+     $         WI = SQRT( ABS( T( KI, KI-1 ) ) )*
+     $              SQRT( ABS( T( KI-1, KI ) ) )
+            SMIN = MAX( ULP*( ABS( WR )+ABS( WI ) ), SMLNUM )
+*
+            IF( IP.EQ.0 ) THEN
+*
+*              Real right eigenvector
+*
+               WORK( KI+N ) = ONE
+*
+*              Form right-hand side
+*
+               DO 50 K = 1, KI - 1
+                  WORK( K+N ) = -T( K, KI )
+   50          CONTINUE
+*
+*              Solve the upper quasi-triangular system:
+*                 (T(1:KI-1,1:KI-1) - WR)*X = SCALE*WORK.
+*
+               JNXT = KI - 1
+               DO 60 J = KI - 1, 1, -1
+                  IF( J.GT.JNXT )
+     $               GO TO 60
+                  J1 = J
+                  J2 = J
+                  JNXT = J - 1
+                  IF( J.GT.1 ) THEN
+                     IF( T( J, J-1 ).NE.ZERO ) THEN
+                        J1 = J - 1
+                        JNXT = J - 2
+                     END IF
+                  END IF
+*
+                  IF( J1.EQ.J2 ) THEN
+*
+*                    1-by-1 diagonal block
+*
+                     CALL DLALN2( .FALSE., 1, 1, SMIN, ONE, T( J, J ),
+     $                            LDT, ONE, ONE, WORK( J+N ), N, WR,
+     $                            ZERO, X, 2, SCALE, XNORM, IERR )
+*
+*                    Scale X(1,1) to avoid overflow when updating
+*                    the right-hand side.
+*
+                     IF( XNORM.GT.ONE ) THEN
+                        IF( WORK( J ).GT.BIGNUM / XNORM ) THEN
+                           X( 1, 1 ) = X( 1, 1 ) / XNORM
+                           SCALE = SCALE / XNORM
+                        END IF
+                     END IF
+*
+*                    Scale if necessary
+*
+                     IF( SCALE.NE.ONE )
+     $                  CALL DSCAL( KI, SCALE, WORK( 1+N ), 1 )
+                     WORK( J+N ) = X( 1, 1 )
+*
+*                    Update right-hand side
+*
+                     CALL DAXPY( J-1, -X( 1, 1 ), T( 1, J ), 1,
+     $                           WORK( 1+N ), 1 )
+*
+                  ELSE
+*
+*                    2-by-2 diagonal block
+*
+                     CALL DLALN2( .FALSE., 2, 1, SMIN, ONE,
+     $                            T( J-1, J-1 ), LDT, ONE, ONE,
+     $                            WORK( J-1+N ), N, WR, ZERO, X, 2,
+     $                            SCALE, XNORM, IERR )
+*
+*                    Scale X(1,1) and X(2,1) to avoid overflow when
+*                    updating the right-hand side.
+*
+                     IF( XNORM.GT.ONE ) THEN
+                        BETA = MAX( WORK( J-1 ), WORK( J ) )
+                        IF( BETA.GT.BIGNUM / XNORM ) THEN
+                           X( 1, 1 ) = X( 1, 1 ) / XNORM
+                           X( 2, 1 ) = X( 2, 1 ) / XNORM
+                           SCALE = SCALE / XNORM
+                        END IF
+                     END IF
+*
+*                    Scale if necessary
+*
+                     IF( SCALE.NE.ONE )
+     $                  CALL DSCAL( KI, SCALE, WORK( 1+N ), 1 )
+                     WORK( J-1+N ) = X( 1, 1 )
+                     WORK( J+N ) = X( 2, 1 )
+*
+*                    Update right-hand side
+*
+                     CALL DAXPY( J-2, -X( 1, 1 ), T( 1, J-1 ), 1,
+     $                           WORK( 1+N ), 1 )
+                     CALL DAXPY( J-2, -X( 2, 1 ), T( 1, J ), 1,
+     $                           WORK( 1+N ), 1 )
+                  END IF
+   60          CONTINUE
+*
+*              Copy the vector x or Q*x to VR and normalize.
+*
+               IF( .NOT.OVER ) THEN
+                  CALL DCOPY( KI, WORK( 1+N ), 1, VR( 1, IS ), 1 )
+*
+                  II = IDAMAX( KI, VR( 1, IS ), 1 )
+                  REMAX = ONE / ABS( VR( II, IS ) )
+                  CALL DSCAL( KI, REMAX, VR( 1, IS ), 1 )
+*
+                  DO 70 K = KI + 1, N
+                     VR( K, IS ) = ZERO
+   70             CONTINUE
+               ELSE
+                  IF( KI.GT.1 )
+     $               CALL DGEMV( 'N', N, KI-1, ONE, VR, LDVR,
+     $                           WORK( 1+N ), 1, WORK( KI+N ),
+     $                           VR( 1, KI ), 1 )
+*
+                  II = IDAMAX( N, VR( 1, KI ), 1 )
+                  REMAX = ONE / ABS( VR( II, KI ) )
+                  CALL DSCAL( N, REMAX, VR( 1, KI ), 1 )
+               END IF
+*
+            ELSE
+*
+*              Complex right eigenvector.
+*
+*              Initial solve
+*                [ (T(KI-1,KI-1) T(KI-1,KI) ) - (WR + I* WI)]*X = 0.
+*                [ (T(KI,KI-1)   T(KI,KI)   )               ]
+*
+               IF( ABS( T( KI-1, KI ) ).GE.ABS( T( KI, KI-1 ) ) ) THEN
+                  WORK( KI-1+N ) = ONE
+                  WORK( KI+N2 ) = WI / T( KI-1, KI )
+               ELSE
+                  WORK( KI-1+N ) = -WI / T( KI, KI-1 )
+                  WORK( KI+N2 ) = ONE
+               END IF
+               WORK( KI+N ) = ZERO
+               WORK( KI-1+N2 ) = ZERO
+*
+*              Form right-hand side
+*
+               DO 80 K = 1, KI - 2
+                  WORK( K+N ) = -WORK( KI-1+N )*T( K, KI-1 )
+                  WORK( K+N2 ) = -WORK( KI+N2 )*T( K, KI )
+   80          CONTINUE
+*
+*              Solve upper quasi-triangular system:
+*              (T(1:KI-2,1:KI-2) - (WR+i*WI))*X = SCALE*(WORK+i*WORK2)
+*
+               JNXT = KI - 2
+               DO 90 J = KI - 2, 1, -1
+                  IF( J.GT.JNXT )
+     $               GO TO 90
+                  J1 = J
+                  J2 = J
+                  JNXT = J - 1
+                  IF( J.GT.1 ) THEN
+                     IF( T( J, J-1 ).NE.ZERO ) THEN
+                        J1 = J - 1
+                        JNXT = J - 2
+                     END IF
+                  END IF
+*
+                  IF( J1.EQ.J2 ) THEN
+*
+*                    1-by-1 diagonal block
+*
+                     CALL DLALN2( .FALSE., 1, 2, SMIN, ONE, T( J, J ),
+     $                            LDT, ONE, ONE, WORK( J+N ), N, WR, WI,
+     $                            X, 2, SCALE, XNORM, IERR )
+*
+*                    Scale X(1,1) and X(1,2) to avoid overflow when
+*                    updating the right-hand side.
+*
+                     IF( XNORM.GT.ONE ) THEN
+                        IF( WORK( J ).GT.BIGNUM / XNORM ) THEN
+                           X( 1, 1 ) = X( 1, 1 ) / XNORM
+                           X( 1, 2 ) = X( 1, 2 ) / XNORM
+                           SCALE = SCALE / XNORM
+                        END IF
+                     END IF
+*
+*                    Scale if necessary
+*
+                     IF( SCALE.NE.ONE ) THEN
+                        CALL DSCAL( KI, SCALE, WORK( 1+N ), 1 )
+                        CALL DSCAL( KI, SCALE, WORK( 1+N2 ), 1 )
+                     END IF
+                     WORK( J+N ) = X( 1, 1 )
+                     WORK( J+N2 ) = X( 1, 2 )
+*
+*                    Update the right-hand side
+*
+                     CALL DAXPY( J-1, -X( 1, 1 ), T( 1, J ), 1,
+     $                           WORK( 1+N ), 1 )
+                     CALL DAXPY( J-1, -X( 1, 2 ), T( 1, J ), 1,
+     $                           WORK( 1+N2 ), 1 )
+*
+                  ELSE
+*
+*                    2-by-2 diagonal block
+*
+                     CALL DLALN2( .FALSE., 2, 2, SMIN, ONE,
+     $                            T( J-1, J-1 ), LDT, ONE, ONE,
+     $                            WORK( J-1+N ), N, WR, WI, X, 2, SCALE,
+     $                            XNORM, IERR )
+*
+*                    Scale X to avoid overflow when updating
+*                    the right-hand side.
+*
+                     IF( XNORM.GT.ONE ) THEN
+                        BETA = MAX( WORK( J-1 ), WORK( J ) )
+                        IF( BETA.GT.BIGNUM / XNORM ) THEN
+                           REC = ONE / XNORM
+                           X( 1, 1 ) = X( 1, 1 )*REC
+                           X( 1, 2 ) = X( 1, 2 )*REC
+                           X( 2, 1 ) = X( 2, 1 )*REC
+                           X( 2, 2 ) = X( 2, 2 )*REC
+                           SCALE = SCALE*REC
+                        END IF
+                     END IF
+*
+*                    Scale if necessary
+*
+                     IF( SCALE.NE.ONE ) THEN
+                        CALL DSCAL( KI, SCALE, WORK( 1+N ), 1 )
+                        CALL DSCAL( KI, SCALE, WORK( 1+N2 ), 1 )
+                     END IF
+                     WORK( J-1+N ) = X( 1, 1 )
+                     WORK( J+N ) = X( 2, 1 )
+                     WORK( J-1+N2 ) = X( 1, 2 )
+                     WORK( J+N2 ) = X( 2, 2 )
+*
+*                    Update the right-hand side
+*
+                     CALL DAXPY( J-2, -X( 1, 1 ), T( 1, J-1 ), 1,
+     $                           WORK( 1+N ), 1 )
+                     CALL DAXPY( J-2, -X( 2, 1 ), T( 1, J ), 1,
+     $                           WORK( 1+N ), 1 )
+                     CALL DAXPY( J-2, -X( 1, 2 ), T( 1, J-1 ), 1,
+     $                           WORK( 1+N2 ), 1 )
+                     CALL DAXPY( J-2, -X( 2, 2 ), T( 1, J ), 1,
+     $                           WORK( 1+N2 ), 1 )
+                  END IF
+   90          CONTINUE
+*
+*              Copy the vector x or Q*x to VR and normalize.
+*
+               IF( .NOT.OVER ) THEN
+                  CALL DCOPY( KI, WORK( 1+N ), 1, VR( 1, IS-1 ), 1 )
+                  CALL DCOPY( KI, WORK( 1+N2 ), 1, VR( 1, IS ), 1 )
+*
+                  EMAX = ZERO
+                  DO 100 K = 1, KI
+                     EMAX = MAX( EMAX, ABS( VR( K, IS-1 ) )+
+     $                      ABS( VR( K, IS ) ) )
+  100             CONTINUE
+*
+                  REMAX = ONE / EMAX
+                  CALL DSCAL( KI, REMAX, VR( 1, IS-1 ), 1 )
+                  CALL DSCAL( KI, REMAX, VR( 1, IS ), 1 )
+*
+                  DO 110 K = KI + 1, N
+                     VR( K, IS-1 ) = ZERO
+                     VR( K, IS ) = ZERO
+  110             CONTINUE
+*
+               ELSE
+*
+                  IF( KI.GT.2 ) THEN
+                     CALL DGEMV( 'N', N, KI-2, ONE, VR, LDVR,
+     $                           WORK( 1+N ), 1, WORK( KI-1+N ),
+     $                           VR( 1, KI-1 ), 1 )
+                     CALL DGEMV( 'N', N, KI-2, ONE, VR, LDVR,
+     $                           WORK( 1+N2 ), 1, WORK( KI+N2 ),
+     $                           VR( 1, KI ), 1 )
+                  ELSE
+                     CALL DSCAL( N, WORK( KI-1+N ), VR( 1, KI-1 ), 1 )
+                     CALL DSCAL( N, WORK( KI+N2 ), VR( 1, KI ), 1 )
+                  END IF
+*
+                  EMAX = ZERO
+                  DO 120 K = 1, N
+                     EMAX = MAX( EMAX, ABS( VR( K, KI-1 ) )+
+     $                      ABS( VR( K, KI ) ) )
+  120             CONTINUE
+                  REMAX = ONE / EMAX
+                  CALL DSCAL( N, REMAX, VR( 1, KI-1 ), 1 )
+                  CALL DSCAL( N, REMAX, VR( 1, KI ), 1 )
+               END IF
+            END IF
+*
+            IS = IS - 1
+            IF( IP.NE.0 )
+     $         IS = IS - 1
+  130       CONTINUE
+            IF( IP.EQ.1 )
+     $         IP = 0
+            IF( IP.EQ.-1 )
+     $         IP = 1
+  140    CONTINUE
+      END IF
+*
+      IF( LEFTV ) THEN
+*
+*        Compute left eigenvectors.
+*
+         IP = 0
+         IS = 1
+         DO 260 KI = 1, N
+*
+            IF( IP.EQ.-1 )
+     $         GO TO 250
+            IF( KI.EQ.N )
+     $         GO TO 150
+            IF( T( KI+1, KI ).EQ.ZERO )
+     $         GO TO 150
+            IP = 1
+*
+  150       CONTINUE
+            IF( SOMEV ) THEN
+               IF( .NOT.SELECT( KI ) )
+     $            GO TO 250
+            END IF
+*
+*           Compute the KI-th eigenvalue (WR,WI).
+*
+            WR = T( KI, KI )
+            WI = ZERO
+            IF( IP.NE.0 )
+     $         WI = SQRT( ABS( T( KI, KI+1 ) ) )*
+     $              SQRT( ABS( T( KI+1, KI ) ) )
+            SMIN = MAX( ULP*( ABS( WR )+ABS( WI ) ), SMLNUM )
+*
+            IF( IP.EQ.0 ) THEN
+*
+*              Real left eigenvector.
+*
+               WORK( KI+N ) = ONE
+*
+*              Form right-hand side
+*
+               DO 160 K = KI + 1, N
+                  WORK( K+N ) = -T( KI, K )
+  160          CONTINUE
+*
+*              Solve the quasi-triangular system:
+*                 (T(KI+1:N,KI+1:N) - WR)'*X = SCALE*WORK
+*
+               VMAX = ONE
+               VCRIT = BIGNUM
+*
+               JNXT = KI + 1
+               DO 170 J = KI + 1, N
+                  IF( J.LT.JNXT )
+     $               GO TO 170
+                  J1 = J
+                  J2 = J
+                  JNXT = J + 1
+                  IF( J.LT.N ) THEN
+                     IF( T( J+1, J ).NE.ZERO ) THEN
+                        J2 = J + 1
+                        JNXT = J + 2
+                     END IF
+                  END IF
+*
+                  IF( J1.EQ.J2 ) THEN
+*
+*                    1-by-1 diagonal block
+*
+*                    Scale if necessary to avoid overflow when forming
+*                    the right-hand side.
+*
+                     IF( WORK( J ).GT.VCRIT ) THEN
+                        REC = ONE / VMAX
+                        CALL DSCAL( N-KI+1, REC, WORK( KI+N ), 1 )
+                        VMAX = ONE
+                        VCRIT = BIGNUM
+                     END IF
+*
+                     WORK( J+N ) = WORK( J+N ) -
+     $                             DDOT( J-KI-1, T( KI+1, J ), 1,
+     $                             WORK( KI+1+N ), 1 )
+*
+*                    Solve (T(J,J)-WR)'*X = WORK
+*
+                     CALL DLALN2( .FALSE., 1, 1, SMIN, ONE, T( J, J ),
+     $                            LDT, ONE, ONE, WORK( J+N ), N, WR,
+     $                            ZERO, X, 2, SCALE, XNORM, IERR )
+*
+*                    Scale if necessary
+*
+                     IF( SCALE.NE.ONE )
+     $                  CALL DSCAL( N-KI+1, SCALE, WORK( KI+N ), 1 )
+                     WORK( J+N ) = X( 1, 1 )
+                     VMAX = MAX( ABS( WORK( J+N ) ), VMAX )
+                     VCRIT = BIGNUM / VMAX
+*
+                  ELSE
+*
+*                    2-by-2 diagonal block
+*
+*                    Scale if necessary to avoid overflow when forming
+*                    the right-hand side.
+*
+                     BETA = MAX( WORK( J ), WORK( J+1 ) )
+                     IF( BETA.GT.VCRIT ) THEN
+                        REC = ONE / VMAX
+                        CALL DSCAL( N-KI+1, REC, WORK( KI+N ), 1 )
+                        VMAX = ONE
+                        VCRIT = BIGNUM
+                     END IF
+*
+                     WORK( J+N ) = WORK( J+N ) -
+     $                             DDOT( J-KI-1, T( KI+1, J ), 1,
+     $                             WORK( KI+1+N ), 1 )
+*
+                     WORK( J+1+N ) = WORK( J+1+N ) -
+     $                               DDOT( J-KI-1, T( KI+1, J+1 ), 1,
+     $                               WORK( KI+1+N ), 1 )
+*
+*                    Solve
+*                      [T(J,J)-WR   T(J,J+1)     ]'* X = SCALE*( WORK1 )
+*                      [T(J+1,J)    T(J+1,J+1)-WR]             ( WORK2 )
+*
+                     CALL DLALN2( .TRUE., 2, 1, SMIN, ONE, T( J, J ),
+     $                            LDT, ONE, ONE, WORK( J+N ), N, WR,
+     $                            ZERO, X, 2, SCALE, XNORM, IERR )
+*
+*                    Scale if necessary
+*
+                     IF( SCALE.NE.ONE )
+     $                  CALL DSCAL( N-KI+1, SCALE, WORK( KI+N ), 1 )
+                     WORK( J+N ) = X( 1, 1 )
+                     WORK( J+1+N ) = X( 2, 1 )
+*
+                     VMAX = MAX( ABS( WORK( J+N ) ),
+     $                      ABS( WORK( J+1+N ) ), VMAX )
+                     VCRIT = BIGNUM / VMAX
+*
+                  END IF
+  170          CONTINUE
+*
+*              Copy the vector x or Q*x to VL and normalize.
+*
+               IF( .NOT.OVER ) THEN
+                  CALL DCOPY( N-KI+1, WORK( KI+N ), 1, VL( KI, IS ), 1 )
+*
+                  II = IDAMAX( N-KI+1, VL( KI, IS ), 1 ) + KI - 1
+                  REMAX = ONE / ABS( VL( II, IS ) )
+                  CALL DSCAL( N-KI+1, REMAX, VL( KI, IS ), 1 )
+*
+                  DO 180 K = 1, KI - 1
+                     VL( K, IS ) = ZERO
+  180             CONTINUE
+*
+               ELSE
+*
+                  IF( KI.LT.N )
+     $               CALL DGEMV( 'N', N, N-KI, ONE, VL( 1, KI+1 ), LDVL,
+     $                           WORK( KI+1+N ), 1, WORK( KI+N ),
+     $                           VL( 1, KI ), 1 )
+*
+                  II = IDAMAX( N, VL( 1, KI ), 1 )
+                  REMAX = ONE / ABS( VL( II, KI ) )
+                  CALL DSCAL( N, REMAX, VL( 1, KI ), 1 )
+*
+               END IF
+*
+            ELSE
+*
+*              Complex left eigenvector.
+*
+*               Initial solve:
+*                 ((T(KI,KI)    T(KI,KI+1) )' - (WR - I* WI))*X = 0.
+*                 ((T(KI+1,KI) T(KI+1,KI+1))                )
+*
+               IF( ABS( T( KI, KI+1 ) ).GE.ABS( T( KI+1, KI ) ) ) THEN
+                  WORK( KI+N ) = WI / T( KI, KI+1 )
+                  WORK( KI+1+N2 ) = ONE
+               ELSE
+                  WORK( KI+N ) = ONE
+                  WORK( KI+1+N2 ) = -WI / T( KI+1, KI )
+               END IF
+               WORK( KI+1+N ) = ZERO
+               WORK( KI+N2 ) = ZERO
+*
+*              Form right-hand side
+*
+               DO 190 K = KI + 2, N
+                  WORK( K+N ) = -WORK( KI+N )*T( KI, K )
+                  WORK( K+N2 ) = -WORK( KI+1+N2 )*T( KI+1, K )
+  190          CONTINUE
+*
+*              Solve complex quasi-triangular system:
+*              ( T(KI+2,N:KI+2,N) - (WR-i*WI) )*X = WORK1+i*WORK2
+*
+               VMAX = ONE
+               VCRIT = BIGNUM
+*
+               JNXT = KI + 2
+               DO 200 J = KI + 2, N
+                  IF( J.LT.JNXT )
+     $               GO TO 200
+                  J1 = J
+                  J2 = J
+                  JNXT = J + 1
+                  IF( J.LT.N ) THEN
+                     IF( T( J+1, J ).NE.ZERO ) THEN
+                        J2 = J + 1
+                        JNXT = J + 2
+                     END IF
+                  END IF
+*
+                  IF( J1.EQ.J2 ) THEN
+*
+*                    1-by-1 diagonal block
+*
+*                    Scale if necessary to avoid overflow when
+*                    forming the right-hand side elements.
+*
+                     IF( WORK( J ).GT.VCRIT ) THEN
+                        REC = ONE / VMAX
+                        CALL DSCAL( N-KI+1, REC, WORK( KI+N ), 1 )
+                        CALL DSCAL( N-KI+1, REC, WORK( KI+N2 ), 1 )
+                        VMAX = ONE
+                        VCRIT = BIGNUM
+                     END IF
+*
+                     WORK( J+N ) = WORK( J+N ) -
+     $                             DDOT( J-KI-2, T( KI+2, J ), 1,
+     $                             WORK( KI+2+N ), 1 )
+                     WORK( J+N2 ) = WORK( J+N2 ) -
+     $                              DDOT( J-KI-2, T( KI+2, J ), 1,
+     $                              WORK( KI+2+N2 ), 1 )
+*
+*                    Solve (T(J,J)-(WR-i*WI))*(X11+i*X12)= WK+I*WK2
+*
+                     CALL DLALN2( .FALSE., 1, 2, SMIN, ONE, T( J, J ),
+     $                            LDT, ONE, ONE, WORK( J+N ), N, WR,
+     $                            -WI, X, 2, SCALE, XNORM, IERR )
+*
+*                    Scale if necessary
+*
+                     IF( SCALE.NE.ONE ) THEN
+                        CALL DSCAL( N-KI+1, SCALE, WORK( KI+N ), 1 )
+                        CALL DSCAL( N-KI+1, SCALE, WORK( KI+N2 ), 1 )
+                     END IF
+                     WORK( J+N ) = X( 1, 1 )
+                     WORK( J+N2 ) = X( 1, 2 )
+                     VMAX = MAX( ABS( WORK( J+N ) ),
+     $                      ABS( WORK( J+N2 ) ), VMAX )
+                     VCRIT = BIGNUM / VMAX
+*
+                  ELSE
+*
+*                    2-by-2 diagonal block
+*
+*                    Scale if necessary to avoid overflow when forming
+*                    the right-hand side elements.
+*
+                     BETA = MAX( WORK( J ), WORK( J+1 ) )
+                     IF( BETA.GT.VCRIT ) THEN
+                        REC = ONE / VMAX
+                        CALL DSCAL( N-KI+1, REC, WORK( KI+N ), 1 )
+                        CALL DSCAL( N-KI+1, REC, WORK( KI+N2 ), 1 )
+                        VMAX = ONE
+                        VCRIT = BIGNUM
+                     END IF
+*
+                     WORK( J+N ) = WORK( J+N ) -
+     $                             DDOT( J-KI-2, T( KI+2, J ), 1,
+     $                             WORK( KI+2+N ), 1 )
+*
+                     WORK( J+N2 ) = WORK( J+N2 ) -
+     $                              DDOT( J-KI-2, T( KI+2, J ), 1,
+     $                              WORK( KI+2+N2 ), 1 )
+*
+                     WORK( J+1+N ) = WORK( J+1+N ) -
+     $                               DDOT( J-KI-2, T( KI+2, J+1 ), 1,
+     $                               WORK( KI+2+N ), 1 )
+*
+                     WORK( J+1+N2 ) = WORK( J+1+N2 ) -
+     $                                DDOT( J-KI-2, T( KI+2, J+1 ), 1,
+     $                                WORK( KI+2+N2 ), 1 )
+*
+*                    Solve 2-by-2 complex linear equation
+*                      ([T(j,j)   T(j,j+1)  ]'-(wr-i*wi)*I)*X = SCALE*B
+*                      ([T(j+1,j) T(j+1,j+1)]             )
+*
+                     CALL DLALN2( .TRUE., 2, 2, SMIN, ONE, T( J, J ),
+     $                            LDT, ONE, ONE, WORK( J+N ), N, WR,
+     $                            -WI, X, 2, SCALE, XNORM, IERR )
+*
+*                    Scale if necessary
+*
+                     IF( SCALE.NE.ONE ) THEN
+                        CALL DSCAL( N-KI+1, SCALE, WORK( KI+N ), 1 )
+                        CALL DSCAL( N-KI+1, SCALE, WORK( KI+N2 ), 1 )
+                     END IF
+                     WORK( J+N ) = X( 1, 1 )
+                     WORK( J+N2 ) = X( 1, 2 )
+                     WORK( J+1+N ) = X( 2, 1 )
+                     WORK( J+1+N2 ) = X( 2, 2 )
+                     VMAX = MAX( ABS( X( 1, 1 ) ), ABS( X( 1, 2 ) ),
+     $                      ABS( X( 2, 1 ) ), ABS( X( 2, 2 ) ), VMAX )
+                     VCRIT = BIGNUM / VMAX
+*
+                  END IF
+  200          CONTINUE
+*
+*              Copy the vector x or Q*x to VL and normalize.
+*
+  210          CONTINUE
+               IF( .NOT.OVER ) THEN
+                  CALL DCOPY( N-KI+1, WORK( KI+N ), 1, VL( KI, IS ), 1 )
+                  CALL DCOPY( N-KI+1, WORK( KI+N2 ), 1, VL( KI, IS+1 ),
+     $                        1 )
+*
+                  EMAX = ZERO
+                  DO 220 K = KI, N
+                     EMAX = MAX( EMAX, ABS( VL( K, IS ) )+
+     $                      ABS( VL( K, IS+1 ) ) )
+  220             CONTINUE
+                  REMAX = ONE / EMAX
+                  CALL DSCAL( N-KI+1, REMAX, VL( KI, IS ), 1 )
+                  CALL DSCAL( N-KI+1, REMAX, VL( KI, IS+1 ), 1 )
+*
+                  DO 230 K = 1, KI - 1
+                     VL( K, IS ) = ZERO
+                     VL( K, IS+1 ) = ZERO
+  230             CONTINUE
+               ELSE
+                  IF( KI.LT.N-1 ) THEN
+                     CALL DGEMV( 'N', N, N-KI-1, ONE, VL( 1, KI+2 ),
+     $                           LDVL, WORK( KI+2+N ), 1, WORK( KI+N ),
+     $                           VL( 1, KI ), 1 )
+                     CALL DGEMV( 'N', N, N-KI-1, ONE, VL( 1, KI+2 ),
+     $                           LDVL, WORK( KI+2+N2 ), 1,
+     $                           WORK( KI+1+N2 ), VL( 1, KI+1 ), 1 )
+                  ELSE
+                     CALL DSCAL( N, WORK( KI+N ), VL( 1, KI ), 1 )
+                     CALL DSCAL( N, WORK( KI+1+N2 ), VL( 1, KI+1 ), 1 )
+                  END IF
+*
+                  EMAX = ZERO
+                  DO 240 K = 1, N
+                     EMAX = MAX( EMAX, ABS( VL( K, KI ) )+
+     $                      ABS( VL( K, KI+1 ) ) )
+  240             CONTINUE
+                  REMAX = ONE / EMAX
+                  CALL DSCAL( N, REMAX, VL( 1, KI ), 1 )
+                  CALL DSCAL( N, REMAX, VL( 1, KI+1 ), 1 )
+*
+               END IF
+*
+            END IF
+*
+            IS = IS + 1
+            IF( IP.NE.0 )
+     $         IS = IS + 1
+  250       CONTINUE
+            IF( IP.EQ.-1 )
+     $         IP = 0
+            IF( IP.EQ.1 )
+     $         IP = -1
+*
+  260    CONTINUE
+*
+      END IF
+*
+      RETURN
+*
+*     End of DTREVC
+*
+      END
diff --git a/costa/native/external/lapack/dtrexc.f b/costa/native/external/lapack/dtrexc.f
new file mode 100644
index 000000000..aadadf095
--- /dev/null
+++ b/costa/native/external/lapack/dtrexc.f
@@ -0,0 +1,346 @@
+      SUBROUTINE DTREXC( COMPQ, N, T, LDT, Q, LDQ, IFST, ILST, WORK,
+     $                   INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993
+*
+*     .. Scalar Arguments ..
+      CHARACTER          COMPQ
+      INTEGER            IFST, ILST, INFO, LDQ, LDT, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   Q( LDQ, * ), T( LDT, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DTREXC reorders the real Schur factorization of a real matrix
+*  A = Q*T*Q**T, so that the diagonal block of T with row index IFST is
+*  moved to row ILST.
+*
+*  The real Schur form T is reordered by an orthogonal similarity
+*  transformation Z**T*T*Z, and optionally the matrix Q of Schur vectors
+*  is updated by postmultiplying it with Z.
+*
+*  T must be in Schur canonical form (as returned by DHSEQR), that is,
+*  block upper triangular with 1-by-1 and 2-by-2 diagonal blocks; each
+*  2-by-2 diagonal block has its diagonal elements equal and its
+*  off-diagonal elements of opposite sign.
+*
+*  Arguments
+*  =========
+*
+*  COMPQ   (input) CHARACTER*1
+*          = 'V':  update the matrix Q of Schur vectors;
+*          = 'N':  do not update Q.
+*
+*  N       (input) INTEGER
+*          The order of the matrix T. N >= 0.
+*
+*  T       (input/output) DOUBLE PRECISION array, dimension (LDT,N)
+*          On entry, the upper quasi-triangular matrix T, in Schur
+*          Schur canonical form.
+*          On exit, the reordered upper quasi-triangular matrix, again
+*          in Schur canonical form.
+*
+*  LDT     (input) INTEGER
+*          The leading dimension of the array T. LDT >= max(1,N).
+*
+*  Q       (input/output) DOUBLE PRECISION array, dimension (LDQ,N)
+*          On entry, if COMPQ = 'V', the matrix Q of Schur vectors.
+*          On exit, if COMPQ = 'V', Q has been postmultiplied by the
+*          orthogonal transformation matrix Z which reorders T.
+*          If COMPQ = 'N', Q is not referenced.
+*
+*  LDQ     (input) INTEGER
+*          The leading dimension of the array Q.  LDQ >= max(1,N).
+*
+*  IFST    (input/output) INTEGER
+*  ILST    (input/output) INTEGER
+*          Specify the reordering of the diagonal blocks of T.
+*          The block with row index IFST is moved to row ILST, by a
+*          sequence of transpositions between adjacent blocks.
+*          On exit, if IFST pointed on entry to the second row of a
+*          2-by-2 block, it is changed to point to the first row; ILST
+*          always points to the first row of the block in its final
+*          position (which may differ from its input value by +1 or -1).
+*          1 <= IFST <= N; 1 <= ILST <= N.
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          = 1:  two adjacent blocks were too close to swap (the problem
+*                is very ill-conditioned); T may have been partially
+*                reordered, and ILST points to the first row of the
+*                current position of the block being moved.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO
+      PARAMETER          ( ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            WANTQ
+      INTEGER            HERE, NBF, NBL, NBNEXT
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLAEXC, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode and test the input arguments.
+*
+      INFO = 0
+      WANTQ = LSAME( COMPQ, 'V' )
+      IF( .NOT.WANTQ .AND. .NOT.LSAME( COMPQ, 'N' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDT.LT.MAX( 1, N ) ) THEN
+         INFO = -4
+      ELSE IF( LDQ.LT.1 .OR. ( WANTQ .AND. LDQ.LT.MAX( 1, N ) ) ) THEN
+         INFO = -6
+      ELSE IF( IFST.LT.1 .OR. IFST.GT.N ) THEN
+         INFO = -7
+      ELSE IF( ILST.LT.1 .OR. ILST.GT.N ) THEN
+         INFO = -8
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DTREXC', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.LE.1 )
+     $   RETURN
+*
+*     Determine the first row of specified block
+*     and find out it is 1 by 1 or 2 by 2.
+*
+      IF( IFST.GT.1 ) THEN
+         IF( T( IFST, IFST-1 ).NE.ZERO )
+     $      IFST = IFST - 1
+      END IF
+      NBF = 1
+      IF( IFST.LT.N ) THEN
+         IF( T( IFST+1, IFST ).NE.ZERO )
+     $      NBF = 2
+      END IF
+*
+*     Determine the first row of the final block
+*     and find out it is 1 by 1 or 2 by 2.
+*
+      IF( ILST.GT.1 ) THEN
+         IF( T( ILST, ILST-1 ).NE.ZERO )
+     $      ILST = ILST - 1
+      END IF
+      NBL = 1
+      IF( ILST.LT.N ) THEN
+         IF( T( ILST+1, ILST ).NE.ZERO )
+     $      NBL = 2
+      END IF
+*
+      IF( IFST.EQ.ILST )
+     $   RETURN
+*
+      IF( IFST.LT.ILST ) THEN
+*
+*        Update ILST
+*
+         IF( NBF.EQ.2 .AND. NBL.EQ.1 )
+     $      ILST = ILST - 1
+         IF( NBF.EQ.1 .AND. NBL.EQ.2 )
+     $      ILST = ILST + 1
+*
+         HERE = IFST
+*
+   10    CONTINUE
+*
+*        Swap block with next one below
+*
+         IF( NBF.EQ.1 .OR. NBF.EQ.2 ) THEN
+*
+*           Current block either 1 by 1 or 2 by 2
+*
+            NBNEXT = 1
+            IF( HERE+NBF+1.LE.N ) THEN
+               IF( T( HERE+NBF+1, HERE+NBF ).NE.ZERO )
+     $            NBNEXT = 2
+            END IF
+            CALL DLAEXC( WANTQ, N, T, LDT, Q, LDQ, HERE, NBF, NBNEXT,
+     $                   WORK, INFO )
+            IF( INFO.NE.0 ) THEN
+               ILST = HERE
+               RETURN
+            END IF
+            HERE = HERE + NBNEXT
+*
+*           Test if 2 by 2 block breaks into two 1 by 1 blocks
+*
+            IF( NBF.EQ.2 ) THEN
+               IF( T( HERE+1, HERE ).EQ.ZERO )
+     $            NBF = 3
+            END IF
+*
+         ELSE
+*
+*           Current block consists of two 1 by 1 blocks each of which
+*           must be swapped individually
+*
+            NBNEXT = 1
+            IF( HERE+3.LE.N ) THEN
+               IF( T( HERE+3, HERE+2 ).NE.ZERO )
+     $            NBNEXT = 2
+            END IF
+            CALL DLAEXC( WANTQ, N, T, LDT, Q, LDQ, HERE+1, 1, NBNEXT,
+     $                   WORK, INFO )
+            IF( INFO.NE.0 ) THEN
+               ILST = HERE
+               RETURN
+            END IF
+            IF( NBNEXT.EQ.1 ) THEN
+*
+*              Swap two 1 by 1 blocks, no problems possible
+*
+               CALL DLAEXC( WANTQ, N, T, LDT, Q, LDQ, HERE, 1, NBNEXT,
+     $                      WORK, INFO )
+               HERE = HERE + 1
+            ELSE
+*
+*              Recompute NBNEXT in case 2 by 2 split
+*
+               IF( T( HERE+2, HERE+1 ).EQ.ZERO )
+     $            NBNEXT = 1
+               IF( NBNEXT.EQ.2 ) THEN
+*
+*                 2 by 2 Block did not split
+*
+                  CALL DLAEXC( WANTQ, N, T, LDT, Q, LDQ, HERE, 1,
+     $                         NBNEXT, WORK, INFO )
+                  IF( INFO.NE.0 ) THEN
+                     ILST = HERE
+                     RETURN
+                  END IF
+                  HERE = HERE + 2
+               ELSE
+*
+*                 2 by 2 Block did split
+*
+                  CALL DLAEXC( WANTQ, N, T, LDT, Q, LDQ, HERE, 1, 1,
+     $                         WORK, INFO )
+                  CALL DLAEXC( WANTQ, N, T, LDT, Q, LDQ, HERE+1, 1, 1,
+     $                         WORK, INFO )
+                  HERE = HERE + 2
+               END IF
+            END IF
+         END IF
+         IF( HERE.LT.ILST )
+     $      GO TO 10
+*
+      ELSE
+*
+         HERE = IFST
+   20    CONTINUE
+*
+*        Swap block with next one above
+*
+         IF( NBF.EQ.1 .OR. NBF.EQ.2 ) THEN
+*
+*           Current block either 1 by 1 or 2 by 2
+*
+            NBNEXT = 1
+            IF( HERE.GE.3 ) THEN
+               IF( T( HERE-1, HERE-2 ).NE.ZERO )
+     $            NBNEXT = 2
+            END IF
+            CALL DLAEXC( WANTQ, N, T, LDT, Q, LDQ, HERE-NBNEXT, NBNEXT,
+     $                   NBF, WORK, INFO )
+            IF( INFO.NE.0 ) THEN
+               ILST = HERE
+               RETURN
+            END IF
+            HERE = HERE - NBNEXT
+*
+*           Test if 2 by 2 block breaks into two 1 by 1 blocks
+*
+            IF( NBF.EQ.2 ) THEN
+               IF( T( HERE+1, HERE ).EQ.ZERO )
+     $            NBF = 3
+            END IF
+*
+         ELSE
+*
+*           Current block consists of two 1 by 1 blocks each of which
+*           must be swapped individually
+*
+            NBNEXT = 1
+            IF( HERE.GE.3 ) THEN
+               IF( T( HERE-1, HERE-2 ).NE.ZERO )
+     $            NBNEXT = 2
+            END IF
+            CALL DLAEXC( WANTQ, N, T, LDT, Q, LDQ, HERE-NBNEXT, NBNEXT,
+     $                   1, WORK, INFO )
+            IF( INFO.NE.0 ) THEN
+               ILST = HERE
+               RETURN
+            END IF
+            IF( NBNEXT.EQ.1 ) THEN
+*
+*              Swap two 1 by 1 blocks, no problems possible
+*
+               CALL DLAEXC( WANTQ, N, T, LDT, Q, LDQ, HERE, NBNEXT, 1,
+     $                      WORK, INFO )
+               HERE = HERE - 1
+            ELSE
+*
+*              Recompute NBNEXT in case 2 by 2 split
+*
+               IF( T( HERE, HERE-1 ).EQ.ZERO )
+     $            NBNEXT = 1
+               IF( NBNEXT.EQ.2 ) THEN
+*
+*                 2 by 2 Block did not split
+*
+                  CALL DLAEXC( WANTQ, N, T, LDT, Q, LDQ, HERE-1, 2, 1,
+     $                         WORK, INFO )
+                  IF( INFO.NE.0 ) THEN
+                     ILST = HERE
+                     RETURN
+                  END IF
+                  HERE = HERE - 2
+               ELSE
+*
+*                 2 by 2 Block did split
+*
+                  CALL DLAEXC( WANTQ, N, T, LDT, Q, LDQ, HERE, 1, 1,
+     $                         WORK, INFO )
+                  CALL DLAEXC( WANTQ, N, T, LDT, Q, LDQ, HERE-1, 1, 1,
+     $                         WORK, INFO )
+                  HERE = HERE - 2
+               END IF
+            END IF
+         END IF
+         IF( HERE.GT.ILST )
+     $      GO TO 20
+      END IF
+      ILST = HERE
+*
+      RETURN
+*
+*     End of DTREXC
+*
+      END
diff --git a/costa/native/external/lapack/dtrrfs.f b/costa/native/external/lapack/dtrrfs.f
new file mode 100644
index 000000000..909298471
--- /dev/null
+++ b/costa/native/external/lapack/dtrrfs.f
@@ -0,0 +1,371 @@
+      SUBROUTINE DTRRFS( UPLO, TRANS, DIAG, N, NRHS, A, LDA, B, LDB, X,
+     $                   LDX, FERR, BERR, WORK, IWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIAG, TRANS, UPLO
+      INTEGER            INFO, LDA, LDB, LDX, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      DOUBLE PRECISION   A( LDA, * ), B( LDB, * ), BERR( * ), FERR( * ),
+     $                   WORK( * ), X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DTRRFS provides error bounds and backward error estimates for the
+*  solution to a system of linear equations with a triangular
+*  coefficient matrix.
+*
+*  The solution matrix X must be computed by DTRTRS or some other
+*  means before entering this routine.  DTRRFS does not do iterative
+*  refinement because doing so cannot improve the backward error.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  A is upper triangular;
+*          = 'L':  A is lower triangular.
+*
+*  TRANS   (input) CHARACTER*1
+*          Specifies the form of the system of equations:
+*          = 'N':  A * X = B  (No transpose)
+*          = 'T':  A**T * X = B  (Transpose)
+*          = 'C':  A**H * X = B  (Conjugate transpose = Transpose)
+*
+*  DIAG    (input) CHARACTER*1
+*          = 'N':  A is non-unit triangular;
+*          = 'U':  A is unit triangular.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrices B and X.  NRHS >= 0.
+*
+*  A       (input) DOUBLE PRECISION array, dimension (LDA,N)
+*          The triangular matrix A.  If UPLO = 'U', the leading N-by-N
+*          upper triangular part of the array A contains the upper
+*          triangular matrix, and the strictly lower triangular part of
+*          A is not referenced.  If UPLO = 'L', the leading N-by-N lower
+*          triangular part of the array A contains the lower triangular
+*          matrix, and the strictly upper triangular part of A is not
+*          referenced.  If DIAG = 'U', the diagonal elements of A are
+*          also not referenced and are assumed to be 1.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  B       (input) DOUBLE PRECISION array, dimension (LDB,NRHS)
+*          The right hand side matrix B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  X       (input) DOUBLE PRECISION array, dimension (LDX,NRHS)
+*          The solution matrix X.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(1,N).
+*
+*  FERR    (output) DOUBLE PRECISION array, dimension (NRHS)
+*          The estimated forward error bound for each solution vector
+*          X(j) (the j-th column of the solution matrix X).
+*          If XTRUE is the true solution corresponding to X(j), FERR(j)
+*          is an estimated upper bound for the magnitude of the largest
+*          element in (X(j) - XTRUE) divided by the magnitude of the
+*          largest element in X(j).  The estimate is as reliable as
+*          the estimate for RCOND, and is almost always a slight
+*          overestimate of the true error.
+*
+*  BERR    (output) DOUBLE PRECISION array, dimension (NRHS)
+*          The componentwise relative backward error of each solution
+*          vector X(j) (i.e., the smallest relative change in
+*          any element of A or B that makes X(j) an exact solution).
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (3*N)
+*
+*  IWORK   (workspace) INTEGER array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO
+      PARAMETER          ( ZERO = 0.0D+0 )
+      DOUBLE PRECISION   ONE
+      PARAMETER          ( ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOTRAN, NOUNIT, UPPER
+      CHARACTER          TRANST
+      INTEGER            I, J, K, KASE, NZ
+      DOUBLE PRECISION   EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DAXPY, DCOPY, DLACON, DTRMV, DTRSV, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DLAMCH
+      EXTERNAL           LSAME, DLAMCH
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      NOTRAN = LSAME( TRANS, 'N' )
+      NOUNIT = LSAME( DIAG, 'N' )
+*
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT.
+     $         LSAME( TRANS, 'C' ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -9
+      ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+         INFO = -11
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DTRRFS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN
+         DO 10 J = 1, NRHS
+            FERR( J ) = ZERO
+            BERR( J ) = ZERO
+   10    CONTINUE
+         RETURN
+      END IF
+*
+      IF( NOTRAN ) THEN
+         TRANST = 'T'
+      ELSE
+         TRANST = 'N'
+      END IF
+*
+*     NZ = maximum number of nonzero elements in each row of A, plus 1
+*
+      NZ = N + 1
+      EPS = DLAMCH( 'Epsilon' )
+      SAFMIN = DLAMCH( 'Safe minimum' )
+      SAFE1 = NZ*SAFMIN
+      SAFE2 = SAFE1 / EPS
+*
+*     Do for each right hand side
+*
+      DO 250 J = 1, NRHS
+*
+*        Compute residual R = B - op(A) * X,
+*        where op(A) = A or A', depending on TRANS.
+*
+         CALL DCOPY( N, X( 1, J ), 1, WORK( N+1 ), 1 )
+         CALL DTRMV( UPLO, TRANS, DIAG, N, A, LDA, WORK( N+1 ), 1 )
+         CALL DAXPY( N, -ONE, B( 1, J ), 1, WORK( N+1 ), 1 )
+*
+*        Compute componentwise relative backward error from formula
+*
+*        max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) )
+*
+*        where abs(Z) is the componentwise absolute value of the matrix
+*        or vector Z.  If the i-th component of the denominator is less
+*        than SAFE2, then SAFE1 is added to the i-th components of the
+*        numerator and denominator before dividing.
+*
+         DO 20 I = 1, N
+            WORK( I ) = ABS( B( I, J ) )
+   20    CONTINUE
+*
+         IF( NOTRAN ) THEN
+*
+*           Compute abs(A)*abs(X) + abs(B).
+*
+            IF( UPPER ) THEN
+               IF( NOUNIT ) THEN
+                  DO 40 K = 1, N
+                     XK = ABS( X( K, J ) )
+                     DO 30 I = 1, K
+                        WORK( I ) = WORK( I ) + ABS( A( I, K ) )*XK
+   30                CONTINUE
+   40             CONTINUE
+               ELSE
+                  DO 60 K = 1, N
+                     XK = ABS( X( K, J ) )
+                     DO 50 I = 1, K - 1
+                        WORK( I ) = WORK( I ) + ABS( A( I, K ) )*XK
+   50                CONTINUE
+                     WORK( K ) = WORK( K ) + XK
+   60             CONTINUE
+               END IF
+            ELSE
+               IF( NOUNIT ) THEN
+                  DO 80 K = 1, N
+                     XK = ABS( X( K, J ) )
+                     DO 70 I = K, N
+                        WORK( I ) = WORK( I ) + ABS( A( I, K ) )*XK
+   70                CONTINUE
+   80             CONTINUE
+               ELSE
+                  DO 100 K = 1, N
+                     XK = ABS( X( K, J ) )
+                     DO 90 I = K + 1, N
+                        WORK( I ) = WORK( I ) + ABS( A( I, K ) )*XK
+   90                CONTINUE
+                     WORK( K ) = WORK( K ) + XK
+  100             CONTINUE
+               END IF
+            END IF
+         ELSE
+*
+*           Compute abs(A')*abs(X) + abs(B).
+*
+            IF( UPPER ) THEN
+               IF( NOUNIT ) THEN
+                  DO 120 K = 1, N
+                     S = ZERO
+                     DO 110 I = 1, K
+                        S = S + ABS( A( I, K ) )*ABS( X( I, J ) )
+  110                CONTINUE
+                     WORK( K ) = WORK( K ) + S
+  120             CONTINUE
+               ELSE
+                  DO 140 K = 1, N
+                     S = ABS( X( K, J ) )
+                     DO 130 I = 1, K - 1
+                        S = S + ABS( A( I, K ) )*ABS( X( I, J ) )
+  130                CONTINUE
+                     WORK( K ) = WORK( K ) + S
+  140             CONTINUE
+               END IF
+            ELSE
+               IF( NOUNIT ) THEN
+                  DO 160 K = 1, N
+                     S = ZERO
+                     DO 150 I = K, N
+                        S = S + ABS( A( I, K ) )*ABS( X( I, J ) )
+  150                CONTINUE
+                     WORK( K ) = WORK( K ) + S
+  160             CONTINUE
+               ELSE
+                  DO 180 K = 1, N
+                     S = ABS( X( K, J ) )
+                     DO 170 I = K + 1, N
+                        S = S + ABS( A( I, K ) )*ABS( X( I, J ) )
+  170                CONTINUE
+                     WORK( K ) = WORK( K ) + S
+  180             CONTINUE
+               END IF
+            END IF
+         END IF
+         S = ZERO
+         DO 190 I = 1, N
+            IF( WORK( I ).GT.SAFE2 ) THEN
+               S = MAX( S, ABS( WORK( N+I ) ) / WORK( I ) )
+            ELSE
+               S = MAX( S, ( ABS( WORK( N+I ) )+SAFE1 ) /
+     $             ( WORK( I )+SAFE1 ) )
+            END IF
+  190    CONTINUE
+         BERR( J ) = S
+*
+*        Bound error from formula
+*
+*        norm(X - XTRUE) / norm(X) .le. FERR =
+*        norm( abs(inv(op(A)))*
+*           ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X)
+*
+*        where
+*          norm(Z) is the magnitude of the largest component of Z
+*          inv(op(A)) is the inverse of op(A)
+*          abs(Z) is the componentwise absolute value of the matrix or
+*             vector Z
+*          NZ is the maximum number of nonzeros in any row of A, plus 1
+*          EPS is machine epsilon
+*
+*        The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B))
+*        is incremented by SAFE1 if the i-th component of
+*        abs(op(A))*abs(X) + abs(B) is less than SAFE2.
+*
+*        Use DLACON to estimate the infinity-norm of the matrix
+*           inv(op(A)) * diag(W),
+*        where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) )))
+*
+         DO 200 I = 1, N
+            IF( WORK( I ).GT.SAFE2 ) THEN
+               WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I )
+            ELSE
+               WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + SAFE1
+            END IF
+  200    CONTINUE
+*
+         KASE = 0
+  210    CONTINUE
+         CALL DLACON( N, WORK( 2*N+1 ), WORK( N+1 ), IWORK, FERR( J ),
+     $                KASE )
+         IF( KASE.NE.0 ) THEN
+            IF( KASE.EQ.1 ) THEN
+*
+*              Multiply by diag(W)*inv(op(A)').
+*
+               CALL DTRSV( UPLO, TRANST, DIAG, N, A, LDA, WORK( N+1 ),
+     $                     1 )
+               DO 220 I = 1, N
+                  WORK( N+I ) = WORK( I )*WORK( N+I )
+  220          CONTINUE
+            ELSE
+*
+*              Multiply by inv(op(A))*diag(W).
+*
+               DO 230 I = 1, N
+                  WORK( N+I ) = WORK( I )*WORK( N+I )
+  230          CONTINUE
+               CALL DTRSV( UPLO, TRANS, DIAG, N, A, LDA, WORK( N+1 ),
+     $                     1 )
+            END IF
+            GO TO 210
+         END IF
+*
+*        Normalize error.
+*
+         LSTRES = ZERO
+         DO 240 I = 1, N
+            LSTRES = MAX( LSTRES, ABS( X( I, J ) ) )
+  240    CONTINUE
+         IF( LSTRES.NE.ZERO )
+     $      FERR( J ) = FERR( J ) / LSTRES
+*
+  250 CONTINUE
+*
+      RETURN
+*
+*     End of DTRRFS
+*
+      END
diff --git a/costa/native/external/lapack/dtrsen.f b/costa/native/external/lapack/dtrsen.f
new file mode 100644
index 000000000..65b7f765d
--- /dev/null
+++ b/costa/native/external/lapack/dtrsen.f
@@ -0,0 +1,457 @@
+      SUBROUTINE DTRSEN( JOB, COMPQ, SELECT, N, T, LDT, Q, LDQ, WR, WI,
+     $                   M, S, SEP, WORK, LWORK, IWORK, LIWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          COMPQ, JOB
+      INTEGER            INFO, LDQ, LDT, LIWORK, LWORK, M, N
+      DOUBLE PRECISION   S, SEP
+*     ..
+*     .. Array Arguments ..
+      LOGICAL            SELECT( * )
+      INTEGER            IWORK( * )
+      DOUBLE PRECISION   Q( LDQ, * ), T( LDT, * ), WI( * ), WORK( * ),
+     $                   WR( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DTRSEN reorders the real Schur factorization of a real matrix
+*  A = Q*T*Q**T, so that a selected cluster of eigenvalues appears in
+*  the leading diagonal blocks of the upper quasi-triangular matrix T,
+*  and the leading columns of Q form an orthonormal basis of the
+*  corresponding right invariant subspace.
+*
+*  Optionally the routine computes the reciprocal condition numbers of
+*  the cluster of eigenvalues and/or the invariant subspace.
+*
+*  T must be in Schur canonical form (as returned by DHSEQR), that is,
+*  block upper triangular with 1-by-1 and 2-by-2 diagonal blocks; each
+*  2-by-2 diagonal block has its diagonal elemnts equal and its
+*  off-diagonal elements of opposite sign.
+*
+*  Arguments
+*  =========
+*
+*  JOB     (input) CHARACTER*1
+*          Specifies whether condition numbers are required for the
+*          cluster of eigenvalues (S) or the invariant subspace (SEP):
+*          = 'N': none;
+*          = 'E': for eigenvalues only (S);
+*          = 'V': for invariant subspace only (SEP);
+*          = 'B': for both eigenvalues and invariant subspace (S and
+*                 SEP).
+*
+*  COMPQ   (input) CHARACTER*1
+*          = 'V': update the matrix Q of Schur vectors;
+*          = 'N': do not update Q.
+*
+*  SELECT  (input) LOGICAL array, dimension (N)
+*          SELECT specifies the eigenvalues in the selected cluster. To
+*          select a real eigenvalue w(j), SELECT(j) must be set to
+*          .TRUE.. To select a complex conjugate pair of eigenvalues
+*          w(j) and w(j+1), corresponding to a 2-by-2 diagonal block,
+*          either SELECT(j) or SELECT(j+1) or both must be set to
+*          .TRUE.; a complex conjugate pair of eigenvalues must be
+*          either both included in the cluster or both excluded.
+*
+*  N       (input) INTEGER
+*          The order of the matrix T. N >= 0.
+*
+*  T       (input/output) DOUBLE PRECISION array, dimension (LDT,N)
+*          On entry, the upper quasi-triangular matrix T, in Schur
+*          canonical form.
+*          On exit, T is overwritten by the reordered matrix T, again in
+*          Schur canonical form, with the selected eigenvalues in the
+*          leading diagonal blocks.
+*
+*  LDT     (input) INTEGER
+*          The leading dimension of the array T. LDT >= max(1,N).
+*
+*  Q       (input/output) DOUBLE PRECISION array, dimension (LDQ,N)
+*          On entry, if COMPQ = 'V', the matrix Q of Schur vectors.
+*          On exit, if COMPQ = 'V', Q has been postmultiplied by the
+*          orthogonal transformation matrix which reorders T; the
+*          leading M columns of Q form an orthonormal basis for the
+*          specified invariant subspace.
+*          If COMPQ = 'N', Q is not referenced.
+*
+*  LDQ     (input) INTEGER
+*          The leading dimension of the array Q.
+*          LDQ >= 1; and if COMPQ = 'V', LDQ >= N.
+*
+*  WR      (output) DOUBLE PRECISION array, dimension (N)
+*  WI      (output) DOUBLE PRECISION array, dimension (N)
+*          The real and imaginary parts, respectively, of the reordered
+*          eigenvalues of T. The eigenvalues are stored in the same
+*          order as on the diagonal of T, with WR(i) = T(i,i) and, if
+*          T(i:i+1,i:i+1) is a 2-by-2 diagonal block, WI(i) > 0 and
+*          WI(i+1) = -WI(i). Note that if a complex eigenvalue is
+*          sufficiently ill-conditioned, then its value may differ
+*          significantly from its value before reordering.
+*
+*  M       (output) INTEGER
+*          The dimension of the specified invariant subspace.
+*          0 < = M <= N.
+*
+*  S       (output) DOUBLE PRECISION
+*          If JOB = 'E' or 'B', S is a lower bound on the reciprocal
+*          condition number for the selected cluster of eigenvalues.
+*          S cannot underestimate the true reciprocal condition number
+*          by more than a factor of sqrt(N). If M = 0 or N, S = 1.
+*          If JOB = 'N' or 'V', S is not referenced.
+*
+*  SEP     (output) DOUBLE PRECISION
+*          If JOB = 'V' or 'B', SEP is the estimated reciprocal
+*          condition number of the specified invariant subspace. If
+*          M = 0 or N, SEP = norm(T).
+*          If JOB = 'N' or 'E', SEP is not referenced.
+*
+*  WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.
+*          If JOB = 'N', LWORK >= max(1,N);
+*          if JOB = 'E', LWORK >= M*(N-M);
+*          if JOB = 'V' or 'B', LWORK >= 2*M*(N-M).
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  IWORK   (workspace) INTEGER array, dimension (LIWORK)
+*          IF JOB = 'N' or 'E', IWORK is not referenced.
+*
+*  LIWORK  (input) INTEGER
+*          The dimension of the array IWORK.
+*          If JOB = 'N' or 'E', LIWORK >= 1;
+*          if JOB = 'V' or 'B', LIWORK >= M*(N-M).
+*
+*          If LIWORK = -1, then a workspace query is assumed; the
+*          routine only calculates the optimal size of the IWORK array,
+*          returns this value as the first entry of the IWORK array, and
+*          no error message related to LIWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*          = 1: reordering of T failed because some eigenvalues are too
+*               close to separate (the problem is very ill-conditioned);
+*               T may have been partially reordered, and WR and WI
+*               contain the eigenvalues in the same order as in T; S and
+*               SEP (if requested) are set to zero.
+*
+*  Further Details
+*  ===============
+*
+*  DTRSEN first collects the selected eigenvalues by computing an
+*  orthogonal transformation Z to move them to the top left corner of T.
+*  In other words, the selected eigenvalues are the eigenvalues of T11
+*  in:
+*
+*                Z'*T*Z = ( T11 T12 ) n1
+*                         (  0  T22 ) n2
+*                            n1  n2
+*
+*  where N = n1+n2 and Z' means the transpose of Z. The first n1 columns
+*  of Z span the specified invariant subspace of T.
+*
+*  If T has been obtained from the real Schur factorization of a matrix
+*  A = Q*T*Q', then the reordered real Schur factorization of A is given
+*  by A = (Q*Z)*(Z'*T*Z)*(Q*Z)', and the first n1 columns of Q*Z span
+*  the corresponding invariant subspace of A.
+*
+*  The reciprocal condition number of the average of the eigenvalues of
+*  T11 may be returned in S. S lies between 0 (very badly conditioned)
+*  and 1 (very well conditioned). It is computed as follows. First we
+*  compute R so that
+*
+*                         P = ( I  R ) n1
+*                             ( 0  0 ) n2
+*                               n1 n2
+*
+*  is the projector on the invariant subspace associated with T11.
+*  R is the solution of the Sylvester equation:
+*
+*                        T11*R - R*T22 = T12.
+*
+*  Let F-norm(M) denote the Frobenius-norm of M and 2-norm(M) denote
+*  the two-norm of M. Then S is computed as the lower bound
+*
+*                      (1 + F-norm(R)**2)**(-1/2)
+*
+*  on the reciprocal of 2-norm(P), the true reciprocal condition number.
+*  S cannot underestimate 1 / 2-norm(P) by more than a factor of
+*  sqrt(N).
+*
+*  An approximate error bound for the computed average of the
+*  eigenvalues of T11 is
+*
+*                         EPS * norm(T) / S
+*
+*  where EPS is the machine precision.
+*
+*  The reciprocal condition number of the right invariant subspace
+*  spanned by the first n1 columns of Z (or of Q*Z) is returned in SEP.
+*  SEP is defined as the separation of T11 and T22:
+*
+*                     sep( T11, T22 ) = sigma-min( C )
+*
+*  where sigma-min(C) is the smallest singular value of the
+*  n1*n2-by-n1*n2 matrix
+*
+*     C  = kprod( I(n2), T11 ) - kprod( transpose(T22), I(n1) )
+*
+*  I(m) is an m by m identity matrix, and kprod denotes the Kronecker
+*  product. We estimate sigma-min(C) by the reciprocal of an estimate of
+*  the 1-norm of inverse(C). The true reciprocal 1-norm of inverse(C)
+*  cannot differ from sigma-min(C) by more than a factor of sqrt(n1*n2).
+*
+*  When SEP is small, small changes in T can cause large changes in
+*  the invariant subspace. An approximate bound on the maximum angular
+*  error in the computed right invariant subspace is
+*
+*                      EPS * norm(T) / SEP
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY, PAIR, SWAP, WANTBH, WANTQ, WANTS,
+     $                   WANTSP
+      INTEGER            IERR, K, KASE, KK, KS, LIWMIN, LWMIN, N1, N2,
+     $                   NN
+      DOUBLE PRECISION   EST, RNORM, SCALE
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DLANGE
+      EXTERNAL           LSAME, DLANGE
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLACON, DLACPY, DTREXC, DTRSYL, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode and test the input parameters
+*
+      WANTBH = LSAME( JOB, 'B' )
+      WANTS = LSAME( JOB, 'E' ) .OR. WANTBH
+      WANTSP = LSAME( JOB, 'V' ) .OR. WANTBH
+      WANTQ = LSAME( COMPQ, 'V' )
+*
+      INFO = 0
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( .NOT.LSAME( JOB, 'N' ) .AND. .NOT.WANTS .AND. .NOT.WANTSP )
+     $     THEN
+         INFO = -1
+      ELSE IF( .NOT.LSAME( COMPQ, 'N' ) .AND. .NOT.WANTQ ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDT.LT.MAX( 1, N ) ) THEN
+         INFO = -6
+      ELSE IF( LDQ.LT.1 .OR. ( WANTQ .AND. LDQ.LT.N ) ) THEN
+         INFO = -8
+      ELSE
+*
+*        Set M to the dimension of the specified invariant subspace,
+*        and test LWORK and LIWORK.
+*
+         M = 0
+         PAIR = .FALSE.
+         DO 10 K = 1, N
+            IF( PAIR ) THEN
+               PAIR = .FALSE.
+            ELSE
+               IF( K.LT.N ) THEN
+                  IF( T( K+1, K ).EQ.ZERO ) THEN
+                     IF( SELECT( K ) )
+     $                  M = M + 1
+                  ELSE
+                     PAIR = .TRUE.
+                     IF( SELECT( K ) .OR. SELECT( K+1 ) )
+     $                  M = M + 2
+                  END IF
+               ELSE
+                  IF( SELECT( N ) )
+     $               M = M + 1
+               END IF
+            END IF
+   10    CONTINUE
+*
+         N1 = M
+         N2 = N - M
+         NN = N1*N2
+*
+         IF( WANTSP ) THEN
+            LWMIN = MAX( 1, 2*NN )
+            LIWMIN = MAX( 1, NN )
+         ELSE IF( LSAME( JOB, 'N' ) ) THEN
+            LWMIN = MAX( 1, N )
+            LIWMIN = 1
+         ELSE IF( LSAME( JOB, 'E' ) ) THEN
+            LWMIN = MAX( 1, NN )
+            LIWMIN = 1
+         END IF
+*
+         IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN
+            INFO = -15
+         ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN
+            INFO = -17
+         END IF
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         WORK( 1 ) = LWMIN
+         IWORK( 1 ) = LIWMIN
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DTRSEN', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( M.EQ.N .OR. M.EQ.0 ) THEN
+         IF( WANTS )
+     $      S = ONE
+         IF( WANTSP )
+     $      SEP = DLANGE( '1', N, N, T, LDT, WORK )
+         GO TO 40
+      END IF
+*
+*     Collect the selected blocks at the top-left corner of T.
+*
+      KS = 0
+      PAIR = .FALSE.
+      DO 20 K = 1, N
+         IF( PAIR ) THEN
+            PAIR = .FALSE.
+         ELSE
+            SWAP = SELECT( K )
+            IF( K.LT.N ) THEN
+               IF( T( K+1, K ).NE.ZERO ) THEN
+                  PAIR = .TRUE.
+                  SWAP = SWAP .OR. SELECT( K+1 )
+               END IF
+            END IF
+            IF( SWAP ) THEN
+               KS = KS + 1
+*
+*              Swap the K-th block to position KS.
+*
+               IERR = 0
+               KK = K
+               IF( K.NE.KS )
+     $            CALL DTREXC( COMPQ, N, T, LDT, Q, LDQ, KK, KS, WORK,
+     $                         IERR )
+               IF( IERR.EQ.1 .OR. IERR.EQ.2 ) THEN
+*
+*                 Blocks too close to swap: exit.
+*
+                  INFO = 1
+                  IF( WANTS )
+     $               S = ZERO
+                  IF( WANTSP )
+     $               SEP = ZERO
+                  GO TO 40
+               END IF
+               IF( PAIR )
+     $            KS = KS + 1
+            END IF
+         END IF
+   20 CONTINUE
+*
+      IF( WANTS ) THEN
+*
+*        Solve Sylvester equation for R:
+*
+*           T11*R - R*T22 = scale*T12
+*
+         CALL DLACPY( 'F', N1, N2, T( 1, N1+1 ), LDT, WORK, N1 )
+         CALL DTRSYL( 'N', 'N', -1, N1, N2, T, LDT, T( N1+1, N1+1 ),
+     $                LDT, WORK, N1, SCALE, IERR )
+*
+*        Estimate the reciprocal of the condition number of the cluster
+*        of eigenvalues.
+*
+         RNORM = DLANGE( 'F', N1, N2, WORK, N1, WORK )
+         IF( RNORM.EQ.ZERO ) THEN
+            S = ONE
+         ELSE
+            S = SCALE / ( SQRT( SCALE*SCALE / RNORM+RNORM )*
+     $          SQRT( RNORM ) )
+         END IF
+      END IF
+*
+      IF( WANTSP ) THEN
+*
+*        Estimate sep(T11,T22).
+*
+         EST = ZERO
+         KASE = 0
+   30    CONTINUE
+         CALL DLACON( NN, WORK( NN+1 ), WORK, IWORK, EST, KASE )
+         IF( KASE.NE.0 ) THEN
+            IF( KASE.EQ.1 ) THEN
+*
+*              Solve  T11*R - R*T22 = scale*X.
+*
+               CALL DTRSYL( 'N', 'N', -1, N1, N2, T, LDT,
+     $                      T( N1+1, N1+1 ), LDT, WORK, N1, SCALE,
+     $                      IERR )
+            ELSE
+*
+*              Solve  T11'*R - R*T22' = scale*X.
+*
+               CALL DTRSYL( 'T', 'T', -1, N1, N2, T, LDT,
+     $                      T( N1+1, N1+1 ), LDT, WORK, N1, SCALE,
+     $                      IERR )
+            END IF
+            GO TO 30
+         END IF
+*
+         SEP = SCALE / EST
+      END IF
+*
+   40 CONTINUE
+*
+*     Store the output eigenvalues in WR and WI.
+*
+      DO 50 K = 1, N
+         WR( K ) = T( K, K )
+         WI( K ) = ZERO
+   50 CONTINUE
+      DO 60 K = 1, N - 1
+         IF( T( K+1, K ).NE.ZERO ) THEN
+            WI( K ) = SQRT( ABS( T( K, K+1 ) ) )*
+     $                SQRT( ABS( T( K+1, K ) ) )
+            WI( K+1 ) = -WI( K )
+         END IF
+   60 CONTINUE
+*
+      WORK( 1 ) = LWMIN
+      IWORK( 1 ) = LIWMIN
+*
+      RETURN
+*
+*     End of DTRSEN
+*
+      END
diff --git a/costa/native/external/lapack/dtrsna.f b/costa/native/external/lapack/dtrsna.f
new file mode 100644
index 000000000..11b4ced40
--- /dev/null
+++ b/costa/native/external/lapack/dtrsna.f
@@ -0,0 +1,493 @@
+      SUBROUTINE DTRSNA( JOB, HOWMNY, SELECT, N, T, LDT, VL, LDVL, VR,
+     $                   LDVR, S, SEP, MM, M, WORK, LDWORK, IWORK,
+     $                   INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          HOWMNY, JOB
+      INTEGER            INFO, LDT, LDVL, LDVR, LDWORK, M, MM, N
+*     ..
+*     .. Array Arguments ..
+      LOGICAL            SELECT( * )
+      INTEGER            IWORK( * )
+      DOUBLE PRECISION   S( * ), SEP( * ), T( LDT, * ), VL( LDVL, * ),
+     $                   VR( LDVR, * ), WORK( LDWORK, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DTRSNA estimates reciprocal condition numbers for specified
+*  eigenvalues and/or right eigenvectors of a real upper
+*  quasi-triangular matrix T (or of any matrix Q*T*Q**T with Q
+*  orthogonal).
+*
+*  T must be in Schur canonical form (as returned by DHSEQR), that is,
+*  block upper triangular with 1-by-1 and 2-by-2 diagonal blocks; each
+*  2-by-2 diagonal block has its diagonal elements equal and its
+*  off-diagonal elements of opposite sign.
+*
+*  Arguments
+*  =========
+*
+*  JOB     (input) CHARACTER*1
+*          Specifies whether condition numbers are required for
+*          eigenvalues (S) or eigenvectors (SEP):
+*          = 'E': for eigenvalues only (S);
+*          = 'V': for eigenvectors only (SEP);
+*          = 'B': for both eigenvalues and eigenvectors (S and SEP).
+*
+*  HOWMNY  (input) CHARACTER*1
+*          = 'A': compute condition numbers for all eigenpairs;
+*          = 'S': compute condition numbers for selected eigenpairs
+*                 specified by the array SELECT.
+*
+*  SELECT  (input) LOGICAL array, dimension (N)
+*          If HOWMNY = 'S', SELECT specifies the eigenpairs for which
+*          condition numbers are required. To select condition numbers
+*          for the eigenpair corresponding to a real eigenvalue w(j),
+*          SELECT(j) must be set to .TRUE.. To select condition numbers
+*          corresponding to a complex conjugate pair of eigenvalues w(j)
+*          and w(j+1), either SELECT(j) or SELECT(j+1) or both, must be
+*          set to .TRUE..
+*          If HOWMNY = 'A', SELECT is not referenced.
+*
+*  N       (input) INTEGER
+*          The order of the matrix T. N >= 0.
+*
+*  T       (input) DOUBLE PRECISION array, dimension (LDT,N)
+*          The upper quasi-triangular matrix T, in Schur canonical form.
+*
+*  LDT     (input) INTEGER
+*          The leading dimension of the array T. LDT >= max(1,N).
+*
+*  VL      (input) DOUBLE PRECISION array, dimension (LDVL,M)
+*          If JOB = 'E' or 'B', VL must contain left eigenvectors of T
+*          (or of any Q*T*Q**T with Q orthogonal), corresponding to the
+*          eigenpairs specified by HOWMNY and SELECT. The eigenvectors
+*          must be stored in consecutive columns of VL, as returned by
+*          DHSEIN or DTREVC.
+*          If JOB = 'V', VL is not referenced.
+*
+*  LDVL    (input) INTEGER
+*          The leading dimension of the array VL.
+*          LDVL >= 1; and if JOB = 'E' or 'B', LDVL >= N.
+*
+*  VR      (input) DOUBLE PRECISION array, dimension (LDVR,M)
+*          If JOB = 'E' or 'B', VR must contain right eigenvectors of T
+*          (or of any Q*T*Q**T with Q orthogonal), corresponding to the
+*          eigenpairs specified by HOWMNY and SELECT. The eigenvectors
+*          must be stored in consecutive columns of VR, as returned by
+*          DHSEIN or DTREVC.
+*          If JOB = 'V', VR is not referenced.
+*
+*  LDVR    (input) INTEGER
+*          The leading dimension of the array VR.
+*          LDVR >= 1; and if JOB = 'E' or 'B', LDVR >= N.
+*
+*  S       (output) DOUBLE PRECISION array, dimension (MM)
+*          If JOB = 'E' or 'B', the reciprocal condition numbers of the
+*          selected eigenvalues, stored in consecutive elements of the
+*          array. For a complex conjugate pair of eigenvalues two
+*          consecutive elements of S are set to the same value. Thus
+*          S(j), SEP(j), and the j-th columns of VL and VR all
+*          correspond to the same eigenpair (but not in general the
+*          j-th eigenpair, unless all eigenpairs are selected).
+*          If JOB = 'V', S is not referenced.
+*
+*  SEP     (output) DOUBLE PRECISION array, dimension (MM)
+*          If JOB = 'V' or 'B', the estimated reciprocal condition
+*          numbers of the selected eigenvectors, stored in consecutive
+*          elements of the array. For a complex eigenvector two
+*          consecutive elements of SEP are set to the same value. If
+*          the eigenvalues cannot be reordered to compute SEP(j), SEP(j)
+*          is set to 0; this can only occur when the true value would be
+*          very small anyway.
+*          If JOB = 'E', SEP is not referenced.
+*
+*  MM      (input) INTEGER
+*          The number of elements in the arrays S (if JOB = 'E' or 'B')
+*           and/or SEP (if JOB = 'V' or 'B'). MM >= M.
+*
+*  M       (output) INTEGER
+*          The number of elements of the arrays S and/or SEP actually
+*          used to store the estimated condition numbers.
+*          If HOWMNY = 'A', M is set to N.
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (LDWORK,N+1)
+*          If JOB = 'E', WORK is not referenced.
+*
+*  LDWORK  (input) INTEGER
+*          The leading dimension of the array WORK.
+*          LDWORK >= 1; and if JOB = 'V' or 'B', LDWORK >= N.
+*
+*  IWORK   (workspace) INTEGER array, dimension (N)
+*          If JOB = 'E', IWORK is not referenced.
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*
+*  Further Details
+*  ===============
+*
+*  The reciprocal of the condition number of an eigenvalue lambda is
+*  defined as
+*
+*          S(lambda) = |v'*u| / (norm(u)*norm(v))
+*
+*  where u and v are the right and left eigenvectors of T corresponding
+*  to lambda; v' denotes the conjugate-transpose of v, and norm(u)
+*  denotes the Euclidean norm. These reciprocal condition numbers always
+*  lie between zero (very badly conditioned) and one (very well
+*  conditioned). If n = 1, S(lambda) is defined to be 1.
+*
+*  An approximate error bound for a computed eigenvalue W(i) is given by
+*
+*                      EPS * norm(T) / S(i)
+*
+*  where EPS is the machine precision.
+*
+*  The reciprocal of the condition number of the right eigenvector u
+*  corresponding to lambda is defined as follows. Suppose
+*
+*              T = ( lambda  c  )
+*                  (   0    T22 )
+*
+*  Then the reciprocal condition number is
+*
+*          SEP( lambda, T22 ) = sigma-min( T22 - lambda*I )
+*
+*  where sigma-min denotes the smallest singular value. We approximate
+*  the smallest singular value by the reciprocal of an estimate of the
+*  one-norm of the inverse of T22 - lambda*I. If n = 1, SEP(1) is
+*  defined to be abs(T(1,1)).
+*
+*  An approximate error bound for a computed right eigenvector VR(i)
+*  is given by
+*
+*                      EPS * norm(T) / SEP(i)
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE, TWO
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0, TWO = 2.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            PAIR, SOMCON, WANTBH, WANTS, WANTSP
+      INTEGER            I, IERR, IFST, ILST, J, K, KASE, KS, N2, NN
+      DOUBLE PRECISION   BIGNUM, COND, CS, DELTA, DUMM, EPS, EST, LNRM,
+     $                   MU, PROD, PROD1, PROD2, RNRM, SCALE, SMLNUM, SN
+*     ..
+*     .. Local Arrays ..
+      DOUBLE PRECISION   DUMMY( 1 )
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DDOT, DLAMCH, DLAPY2, DNRM2
+      EXTERNAL           LSAME, DDOT, DLAMCH, DLAPY2, DNRM2
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLACON, DLACPY, DLAQTR, DTREXC, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode and test the input parameters
+*
+      WANTBH = LSAME( JOB, 'B' )
+      WANTS = LSAME( JOB, 'E' ) .OR. WANTBH
+      WANTSP = LSAME( JOB, 'V' ) .OR. WANTBH
+*
+      SOMCON = LSAME( HOWMNY, 'S' )
+*
+      INFO = 0
+      IF( .NOT.WANTS .AND. .NOT.WANTSP ) THEN
+         INFO = -1
+      ELSE IF( .NOT.LSAME( HOWMNY, 'A' ) .AND. .NOT.SOMCON ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDT.LT.MAX( 1, N ) ) THEN
+         INFO = -6
+      ELSE IF( LDVL.LT.1 .OR. ( WANTS .AND. LDVL.LT.N ) ) THEN
+         INFO = -8
+      ELSE IF( LDVR.LT.1 .OR. ( WANTS .AND. LDVR.LT.N ) ) THEN
+         INFO = -10
+      ELSE
+*
+*        Set M to the number of eigenpairs for which condition numbers
+*        are required, and test MM.
+*
+         IF( SOMCON ) THEN
+            M = 0
+            PAIR = .FALSE.
+            DO 10 K = 1, N
+               IF( PAIR ) THEN
+                  PAIR = .FALSE.
+               ELSE
+                  IF( K.LT.N ) THEN
+                     IF( T( K+1, K ).EQ.ZERO ) THEN
+                        IF( SELECT( K ) )
+     $                     M = M + 1
+                     ELSE
+                        PAIR = .TRUE.
+                        IF( SELECT( K ) .OR. SELECT( K+1 ) )
+     $                     M = M + 2
+                     END IF
+                  ELSE
+                     IF( SELECT( N ) )
+     $                  M = M + 1
+                  END IF
+               END IF
+   10       CONTINUE
+         ELSE
+            M = N
+         END IF
+*
+         IF( MM.LT.M ) THEN
+            INFO = -13
+         ELSE IF( LDWORK.LT.1 .OR. ( WANTSP .AND. LDWORK.LT.N ) ) THEN
+            INFO = -16
+         END IF
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DTRSNA', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      IF( N.EQ.1 ) THEN
+         IF( SOMCON ) THEN
+            IF( .NOT.SELECT( 1 ) )
+     $         RETURN
+         END IF
+         IF( WANTS )
+     $      S( 1 ) = ONE
+         IF( WANTSP )
+     $      SEP( 1 ) = ABS( T( 1, 1 ) )
+         RETURN
+      END IF
+*
+*     Get machine constants
+*
+      EPS = DLAMCH( 'P' )
+      SMLNUM = DLAMCH( 'S' ) / EPS
+      BIGNUM = ONE / SMLNUM
+      CALL DLABAD( SMLNUM, BIGNUM )
+*
+      KS = 0
+      PAIR = .FALSE.
+      DO 60 K = 1, N
+*
+*        Determine whether T(k,k) begins a 1-by-1 or 2-by-2 block.
+*
+         IF( PAIR ) THEN
+            PAIR = .FALSE.
+            GO TO 60
+         ELSE
+            IF( K.LT.N )
+     $         PAIR = T( K+1, K ).NE.ZERO
+         END IF
+*
+*        Determine whether condition numbers are required for the k-th
+*        eigenpair.
+*
+         IF( SOMCON ) THEN
+            IF( PAIR ) THEN
+               IF( .NOT.SELECT( K ) .AND. .NOT.SELECT( K+1 ) )
+     $            GO TO 60
+            ELSE
+               IF( .NOT.SELECT( K ) )
+     $            GO TO 60
+            END IF
+         END IF
+*
+         KS = KS + 1
+*
+         IF( WANTS ) THEN
+*
+*           Compute the reciprocal condition number of the k-th
+*           eigenvalue.
+*
+            IF( .NOT.PAIR ) THEN
+*
+*              Real eigenvalue.
+*
+               PROD = DDOT( N, VR( 1, KS ), 1, VL( 1, KS ), 1 )
+               RNRM = DNRM2( N, VR( 1, KS ), 1 )
+               LNRM = DNRM2( N, VL( 1, KS ), 1 )
+               S( KS ) = ABS( PROD ) / ( RNRM*LNRM )
+            ELSE
+*
+*              Complex eigenvalue.
+*
+               PROD1 = DDOT( N, VR( 1, KS ), 1, VL( 1, KS ), 1 )
+               PROD1 = PROD1 + DDOT( N, VR( 1, KS+1 ), 1, VL( 1, KS+1 ),
+     $                 1 )
+               PROD2 = DDOT( N, VL( 1, KS ), 1, VR( 1, KS+1 ), 1 )
+               PROD2 = PROD2 - DDOT( N, VL( 1, KS+1 ), 1, VR( 1, KS ),
+     $                 1 )
+               RNRM = DLAPY2( DNRM2( N, VR( 1, KS ), 1 ),
+     $                DNRM2( N, VR( 1, KS+1 ), 1 ) )
+               LNRM = DLAPY2( DNRM2( N, VL( 1, KS ), 1 ),
+     $                DNRM2( N, VL( 1, KS+1 ), 1 ) )
+               COND = DLAPY2( PROD1, PROD2 ) / ( RNRM*LNRM )
+               S( KS ) = COND
+               S( KS+1 ) = COND
+            END IF
+         END IF
+*
+         IF( WANTSP ) THEN
+*
+*           Estimate the reciprocal condition number of the k-th
+*           eigenvector.
+*
+*           Copy the matrix T to the array WORK and swap the diagonal
+*           block beginning at T(k,k) to the (1,1) position.
+*
+            CALL DLACPY( 'Full', N, N, T, LDT, WORK, LDWORK )
+            IFST = K
+            ILST = 1
+            CALL DTREXC( 'No Q', N, WORK, LDWORK, DUMMY, 1, IFST, ILST,
+     $                   WORK( 1, N+1 ), IERR )
+*
+            IF( IERR.EQ.1 .OR. IERR.EQ.2 ) THEN
+*
+*              Could not swap because blocks not well separated
+*
+               SCALE = ONE
+               EST = BIGNUM
+            ELSE
+*
+*              Reordering successful
+*
+               IF( WORK( 2, 1 ).EQ.ZERO ) THEN
+*
+*                 Form C = T22 - lambda*I in WORK(2:N,2:N).
+*
+                  DO 20 I = 2, N
+                     WORK( I, I ) = WORK( I, I ) - WORK( 1, 1 )
+   20             CONTINUE
+                  N2 = 1
+                  NN = N - 1
+               ELSE
+*
+*                 Triangularize the 2 by 2 block by unitary
+*                 transformation U = [  cs   i*ss ]
+*                                    [ i*ss   cs  ].
+*                 such that the (1,1) position of WORK is complex
+*                 eigenvalue lambda with positive imaginary part. (2,2)
+*                 position of WORK is the complex eigenvalue lambda
+*                 with negative imaginary  part.
+*
+                  MU = SQRT( ABS( WORK( 1, 2 ) ) )*
+     $                 SQRT( ABS( WORK( 2, 1 ) ) )
+                  DELTA = DLAPY2( MU, WORK( 2, 1 ) )
+                  CS = MU / DELTA
+                  SN = -WORK( 2, 1 ) / DELTA
+*
+*                 Form
+*
+*                 C' = WORK(2:N,2:N) + i*[rwork(1) ..... rwork(n-1) ]
+*                                        [   mu                     ]
+*                                        [         ..               ]
+*                                        [             ..           ]
+*                                        [                  mu      ]
+*                 where C' is conjugate transpose of complex matrix C,
+*                 and RWORK is stored starting in the N+1-st column of
+*                 WORK.
+*
+                  DO 30 J = 3, N
+                     WORK( 2, J ) = CS*WORK( 2, J )
+                     WORK( J, J ) = WORK( J, J ) - WORK( 1, 1 )
+   30             CONTINUE
+                  WORK( 2, 2 ) = ZERO
+*
+                  WORK( 1, N+1 ) = TWO*MU
+                  DO 40 I = 2, N - 1
+                     WORK( I, N+1 ) = SN*WORK( 1, I+1 )
+   40             CONTINUE
+                  N2 = 2
+                  NN = 2*( N-1 )
+               END IF
+*
+*              Estimate norm(inv(C'))
+*
+               EST = ZERO
+               KASE = 0
+   50          CONTINUE
+               CALL DLACON( NN, WORK( 1, N+2 ), WORK( 1, N+4 ), IWORK,
+     $                      EST, KASE )
+               IF( KASE.NE.0 ) THEN
+                  IF( KASE.EQ.1 ) THEN
+                     IF( N2.EQ.1 ) THEN
+*
+*                       Real eigenvalue: solve C'*x = scale*c.
+*
+                        CALL DLAQTR( .TRUE., .TRUE., N-1, WORK( 2, 2 ),
+     $                               LDWORK, DUMMY, DUMM, SCALE,
+     $                               WORK( 1, N+4 ), WORK( 1, N+6 ),
+     $                               IERR )
+                     ELSE
+*
+*                       Complex eigenvalue: solve
+*                       C'*(p+iq) = scale*(c+id) in real arithmetic.
+*
+                        CALL DLAQTR( .TRUE., .FALSE., N-1, WORK( 2, 2 ),
+     $                               LDWORK, WORK( 1, N+1 ), MU, SCALE,
+     $                               WORK( 1, N+4 ), WORK( 1, N+6 ),
+     $                               IERR )
+                     END IF
+                  ELSE
+                     IF( N2.EQ.1 ) THEN
+*
+*                       Real eigenvalue: solve C*x = scale*c.
+*
+                        CALL DLAQTR( .FALSE., .TRUE., N-1, WORK( 2, 2 ),
+     $                               LDWORK, DUMMY, DUMM, SCALE,
+     $                               WORK( 1, N+4 ), WORK( 1, N+6 ),
+     $                               IERR )
+                     ELSE
+*
+*                       Complex eigenvalue: solve
+*                       C*(p+iq) = scale*(c+id) in real arithmetic.
+*
+                        CALL DLAQTR( .FALSE., .FALSE., N-1,
+     $                               WORK( 2, 2 ), LDWORK,
+     $                               WORK( 1, N+1 ), MU, SCALE,
+     $                               WORK( 1, N+4 ), WORK( 1, N+6 ),
+     $                               IERR )
+*
+                     END IF
+                  END IF
+*
+                  GO TO 50
+               END IF
+            END IF
+*
+            SEP( KS ) = SCALE / MAX( EST, SMLNUM )
+            IF( PAIR )
+     $         SEP( KS+1 ) = SEP( KS )
+         END IF
+*
+         IF( PAIR )
+     $      KS = KS + 1
+*
+   60 CONTINUE
+      RETURN
+*
+*     End of DTRSNA
+*
+      END
diff --git a/costa/native/external/lapack/dtrsyl.f b/costa/native/external/lapack/dtrsyl.f
new file mode 100644
index 000000000..5da5c3c17
--- /dev/null
+++ b/costa/native/external/lapack/dtrsyl.f
@@ -0,0 +1,914 @@
+      SUBROUTINE DTRSYL( TRANA, TRANB, ISGN, M, N, A, LDA, B, LDB, C,
+     $                   LDC, SCALE, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993
+*
+*     .. Scalar Arguments ..
+      CHARACTER          TRANA, TRANB
+      INTEGER            INFO, ISGN, LDA, LDB, LDC, M, N
+      DOUBLE PRECISION   SCALE
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), B( LDB, * ), C( LDC, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DTRSYL solves the real Sylvester matrix equation:
+*
+*     op(A)*X + X*op(B) = scale*C or
+*     op(A)*X - X*op(B) = scale*C,
+*
+*  where op(A) = A or A**T, and  A and B are both upper quasi-
+*  triangular. A is M-by-M and B is N-by-N; the right hand side C and
+*  the solution X are M-by-N; and scale is an output scale factor, set
+*  <= 1 to avoid overflow in X.
+*
+*  A and B must be in Schur canonical form (as returned by DHSEQR), that
+*  is, block upper triangular with 1-by-1 and 2-by-2 diagonal blocks;
+*  each 2-by-2 diagonal block has its diagonal elements equal and its
+*  off-diagonal elements of opposite sign.
+*
+*  Arguments
+*  =========
+*
+*  TRANA   (input) CHARACTER*1
+*          Specifies the option op(A):
+*          = 'N': op(A) = A    (No transpose)
+*          = 'T': op(A) = A**T (Transpose)
+*          = 'C': op(A) = A**H (Conjugate transpose = Transpose)
+*
+*  TRANB   (input) CHARACTER*1
+*          Specifies the option op(B):
+*          = 'N': op(B) = B    (No transpose)
+*          = 'T': op(B) = B**T (Transpose)
+*          = 'C': op(B) = B**H (Conjugate transpose = Transpose)
+*
+*  ISGN    (input) INTEGER
+*          Specifies the sign in the equation:
+*          = +1: solve op(A)*X + X*op(B) = scale*C
+*          = -1: solve op(A)*X - X*op(B) = scale*C
+*
+*  M       (input) INTEGER
+*          The order of the matrix A, and the number of rows in the
+*          matrices X and C. M >= 0.
+*
+*  N       (input) INTEGER
+*          The order of the matrix B, and the number of columns in the
+*          matrices X and C. N >= 0.
+*
+*  A       (input) DOUBLE PRECISION array, dimension (LDA,M)
+*          The upper quasi-triangular matrix A, in Schur canonical form.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,M).
+*
+*  B       (input) DOUBLE PRECISION array, dimension (LDB,N)
+*          The upper quasi-triangular matrix B, in Schur canonical form.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B. LDB >= max(1,N).
+*
+*  C       (input/output) DOUBLE PRECISION array, dimension (LDC,N)
+*          On entry, the M-by-N right hand side matrix C.
+*          On exit, C is overwritten by the solution matrix X.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C. LDC >= max(1,M)
+*
+*  SCALE   (output) DOUBLE PRECISION
+*          The scale factor, scale, set <= 1 to avoid overflow in X.
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*          = 1: A and B have common or very close eigenvalues; perturbed
+*               values were used to solve the equation (but the matrices
+*               A and B are unchanged).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOTRNA, NOTRNB
+      INTEGER            IERR, J, K, K1, K2, KNEXT, L, L1, L2, LNEXT
+      DOUBLE PRECISION   A11, BIGNUM, DA11, DB, EPS, SCALOC, SGN, SMIN,
+     $                   SMLNUM, SUML, SUMR, XNORM
+*     ..
+*     .. Local Arrays ..
+      DOUBLE PRECISION   DUM( 1 ), VEC( 2, 2 ), X( 2, 2 )
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DDOT, DLAMCH, DLANGE
+      EXTERNAL           LSAME, DDOT, DLAMCH, DLANGE
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLALN2, DLASY2, DSCAL, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode and Test input parameters
+*
+      NOTRNA = LSAME( TRANA, 'N' )
+      NOTRNB = LSAME( TRANB, 'N' )
+*
+      INFO = 0
+      IF( .NOT.NOTRNA .AND. .NOT.LSAME( TRANA, 'T' ) .AND. .NOT.
+     $    LSAME( TRANA, 'C' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.NOTRNB .AND. .NOT.LSAME( TRANB, 'T' ) .AND. .NOT.
+     $         LSAME( TRANB, 'C' ) ) THEN
+         INFO = -2
+      ELSE IF( ISGN.NE.1 .AND. ISGN.NE.-1 ) THEN
+         INFO = -3
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -7
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -9
+      ELSE IF( LDC.LT.MAX( 1, M ) ) THEN
+         INFO = -11
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DTRSYL', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 )
+     $   RETURN
+*
+*     Set constants to control overflow
+*
+      EPS = DLAMCH( 'P' )
+      SMLNUM = DLAMCH( 'S' )
+      BIGNUM = ONE / SMLNUM
+      CALL DLABAD( SMLNUM, BIGNUM )
+      SMLNUM = SMLNUM*DBLE( M*N ) / EPS
+      BIGNUM = ONE / SMLNUM
+*
+      SMIN = MAX( SMLNUM, EPS*DLANGE( 'M', M, M, A, LDA, DUM ),
+     $       EPS*DLANGE( 'M', N, N, B, LDB, DUM ) )
+*
+      SCALE = ONE
+      SGN = ISGN
+*
+      IF( NOTRNA .AND. NOTRNB ) THEN
+*
+*        Solve    A*X + ISGN*X*B = scale*C.
+*
+*        The (K,L)th block of X is determined starting from
+*        bottom-left corner column by column by
+*
+*         A(K,K)*X(K,L) + ISGN*X(K,L)*B(L,L) = C(K,L) - R(K,L)
+*
+*        Where
+*                  M                         L-1
+*        R(K,L) = SUM [A(K,I)*X(I,L)] + ISGN*SUM [X(K,J)*B(J,L)].
+*                I=K+1                       J=1
+*
+*        Start column loop (index = L)
+*        L1 (L2) : column index of the first (first) row of X(K,L).
+*
+         LNEXT = 1
+         DO 60 L = 1, N
+            IF( L.LT.LNEXT )
+     $         GO TO 60
+            IF( L.EQ.N ) THEN
+               L1 = L
+               L2 = L
+            ELSE
+               IF( B( L+1, L ).NE.ZERO ) THEN
+                  L1 = L
+                  L2 = L + 1
+                  LNEXT = L + 2
+               ELSE
+                  L1 = L
+                  L2 = L
+                  LNEXT = L + 1
+               END IF
+            END IF
+*
+*           Start row loop (index = K)
+*           K1 (K2): row index of the first (last) row of X(K,L).
+*
+            KNEXT = M
+            DO 50 K = M, 1, -1
+               IF( K.GT.KNEXT )
+     $            GO TO 50
+               IF( K.EQ.1 ) THEN
+                  K1 = K
+                  K2 = K
+               ELSE
+                  IF( A( K, K-1 ).NE.ZERO ) THEN
+                     K1 = K - 1
+                     K2 = K
+                     KNEXT = K - 2
+                  ELSE
+                     K1 = K
+                     K2 = K
+                     KNEXT = K - 1
+                  END IF
+               END IF
+*
+               IF( L1.EQ.L2 .AND. K1.EQ.K2 ) THEN
+                  SUML = DDOT( M-K1, A( K1, MIN( K1+1, M ) ), LDA,
+     $                   C( MIN( K1+1, M ), L1 ), 1 )
+                  SUMR = DDOT( L1-1, C( K1, 1 ), LDC, B( 1, L1 ), 1 )
+                  VEC( 1, 1 ) = C( K1, L1 ) - ( SUML+SGN*SUMR )
+                  SCALOC = ONE
+*
+                  A11 = A( K1, K1 ) + SGN*B( L1, L1 )
+                  DA11 = ABS( A11 )
+                  IF( DA11.LE.SMIN ) THEN
+                     A11 = SMIN
+                     DA11 = SMIN
+                     INFO = 1
+                  END IF
+                  DB = ABS( VEC( 1, 1 ) )
+                  IF( DA11.LT.ONE .AND. DB.GT.ONE ) THEN
+                     IF( DB.GT.BIGNUM*DA11 )
+     $                  SCALOC = ONE / DB
+                  END IF
+                  X( 1, 1 ) = ( VEC( 1, 1 )*SCALOC ) / A11
+*
+                  IF( SCALOC.NE.ONE ) THEN
+                     DO 10 J = 1, N
+                        CALL DSCAL( M, SCALOC, C( 1, J ), 1 )
+   10                CONTINUE
+                     SCALE = SCALE*SCALOC
+                  END IF
+                  C( K1, L1 ) = X( 1, 1 )
+*
+               ELSE IF( L1.EQ.L2 .AND. K1.NE.K2 ) THEN
+*
+                  SUML = DDOT( M-K2, A( K1, MIN( K2+1, M ) ), LDA,
+     $                   C( MIN( K2+1, M ), L1 ), 1 )
+                  SUMR = DDOT( L1-1, C( K1, 1 ), LDC, B( 1, L1 ), 1 )
+                  VEC( 1, 1 ) = C( K1, L1 ) - ( SUML+SGN*SUMR )
+*
+                  SUML = DDOT( M-K2, A( K2, MIN( K2+1, M ) ), LDA,
+     $                   C( MIN( K2+1, M ), L1 ), 1 )
+                  SUMR = DDOT( L1-1, C( K2, 1 ), LDC, B( 1, L1 ), 1 )
+                  VEC( 2, 1 ) = C( K2, L1 ) - ( SUML+SGN*SUMR )
+*
+                  CALL DLALN2( .FALSE., 2, 1, SMIN, ONE, A( K1, K1 ),
+     $                         LDA, ONE, ONE, VEC, 2, -SGN*B( L1, L1 ),
+     $                         ZERO, X, 2, SCALOC, XNORM, IERR )
+                  IF( IERR.NE.0 )
+     $               INFO = 1
+*
+                  IF( SCALOC.NE.ONE ) THEN
+                     DO 20 J = 1, N
+                        CALL DSCAL( M, SCALOC, C( 1, J ), 1 )
+   20                CONTINUE
+                     SCALE = SCALE*SCALOC
+                  END IF
+                  C( K1, L1 ) = X( 1, 1 )
+                  C( K2, L1 ) = X( 2, 1 )
+*
+               ELSE IF( L1.NE.L2 .AND. K1.EQ.K2 ) THEN
+*
+                  SUML = DDOT( M-K1, A( K1, MIN( K1+1, M ) ), LDA,
+     $                   C( MIN( K1+1, M ), L1 ), 1 )
+                  SUMR = DDOT( L1-1, C( K1, 1 ), LDC, B( 1, L1 ), 1 )
+                  VEC( 1, 1 ) = SGN*( C( K1, L1 )-( SUML+SGN*SUMR ) )
+*
+                  SUML = DDOT( M-K1, A( K1, MIN( K1+1, M ) ), LDA,
+     $                   C( MIN( K1+1, M ), L2 ), 1 )
+                  SUMR = DDOT( L1-1, C( K1, 1 ), LDC, B( 1, L2 ), 1 )
+                  VEC( 2, 1 ) = SGN*( C( K1, L2 )-( SUML+SGN*SUMR ) )
+*
+                  CALL DLALN2( .TRUE., 2, 1, SMIN, ONE, B( L1, L1 ),
+     $                         LDB, ONE, ONE, VEC, 2, -SGN*A( K1, K1 ),
+     $                         ZERO, X, 2, SCALOC, XNORM, IERR )
+                  IF( IERR.NE.0 )
+     $               INFO = 1
+*
+                  IF( SCALOC.NE.ONE ) THEN
+                     DO 30 J = 1, N
+                        CALL DSCAL( M, SCALOC, C( 1, J ), 1 )
+   30                CONTINUE
+                     SCALE = SCALE*SCALOC
+                  END IF
+                  C( K1, L1 ) = X( 1, 1 )
+                  C( K1, L2 ) = X( 2, 1 )
+*
+               ELSE IF( L1.NE.L2 .AND. K1.NE.K2 ) THEN
+*
+                  SUML = DDOT( M-K2, A( K1, MIN( K2+1, M ) ), LDA,
+     $                   C( MIN( K2+1, M ), L1 ), 1 )
+                  SUMR = DDOT( L1-1, C( K1, 1 ), LDC, B( 1, L1 ), 1 )
+                  VEC( 1, 1 ) = C( K1, L1 ) - ( SUML+SGN*SUMR )
+*
+                  SUML = DDOT( M-K2, A( K1, MIN( K2+1, M ) ), LDA,
+     $                   C( MIN( K2+1, M ), L2 ), 1 )
+                  SUMR = DDOT( L1-1, C( K1, 1 ), LDC, B( 1, L2 ), 1 )
+                  VEC( 1, 2 ) = C( K1, L2 ) - ( SUML+SGN*SUMR )
+*
+                  SUML = DDOT( M-K2, A( K2, MIN( K2+1, M ) ), LDA,
+     $                   C( MIN( K2+1, M ), L1 ), 1 )
+                  SUMR = DDOT( L1-1, C( K2, 1 ), LDC, B( 1, L1 ), 1 )
+                  VEC( 2, 1 ) = C( K2, L1 ) - ( SUML+SGN*SUMR )
+*
+                  SUML = DDOT( M-K2, A( K2, MIN( K2+1, M ) ), LDA,
+     $                   C( MIN( K2+1, M ), L2 ), 1 )
+                  SUMR = DDOT( L1-1, C( K2, 1 ), LDC, B( 1, L2 ), 1 )
+                  VEC( 2, 2 ) = C( K2, L2 ) - ( SUML+SGN*SUMR )
+*
+                  CALL DLASY2( .FALSE., .FALSE., ISGN, 2, 2,
+     $                         A( K1, K1 ), LDA, B( L1, L1 ), LDB, VEC,
+     $                         2, SCALOC, X, 2, XNORM, IERR )
+                  IF( IERR.NE.0 )
+     $               INFO = 1
+*
+                  IF( SCALOC.NE.ONE ) THEN
+                     DO 40 J = 1, N
+                        CALL DSCAL( M, SCALOC, C( 1, J ), 1 )
+   40                CONTINUE
+                     SCALE = SCALE*SCALOC
+                  END IF
+                  C( K1, L1 ) = X( 1, 1 )
+                  C( K1, L2 ) = X( 1, 2 )
+                  C( K2, L1 ) = X( 2, 1 )
+                  C( K2, L2 ) = X( 2, 2 )
+               END IF
+*
+   50       CONTINUE
+*
+   60    CONTINUE
+*
+      ELSE IF( .NOT.NOTRNA .AND. NOTRNB ) THEN
+*
+*        Solve    A' *X + ISGN*X*B = scale*C.
+*
+*        The (K,L)th block of X is determined starting from
+*        upper-left corner column by column by
+*
+*          A(K,K)'*X(K,L) + ISGN*X(K,L)*B(L,L) = C(K,L) - R(K,L)
+*
+*        Where
+*                   K-1                        L-1
+*          R(K,L) = SUM [A(I,K)'*X(I,L)] +ISGN*SUM [X(K,J)*B(J,L)]
+*                   I=1                        J=1
+*
+*        Start column loop (index = L)
+*        L1 (L2): column index of the first (last) row of X(K,L)
+*
+         LNEXT = 1
+         DO 120 L = 1, N
+            IF( L.LT.LNEXT )
+     $         GO TO 120
+            IF( L.EQ.N ) THEN
+               L1 = L
+               L2 = L
+            ELSE
+               IF( B( L+1, L ).NE.ZERO ) THEN
+                  L1 = L
+                  L2 = L + 1
+                  LNEXT = L + 2
+               ELSE
+                  L1 = L
+                  L2 = L
+                  LNEXT = L + 1
+               END IF
+            END IF
+*
+*           Start row loop (index = K)
+*           K1 (K2): row index of the first (last) row of X(K,L)
+*
+            KNEXT = 1
+            DO 110 K = 1, M
+               IF( K.LT.KNEXT )
+     $            GO TO 110
+               IF( K.EQ.M ) THEN
+                  K1 = K
+                  K2 = K
+               ELSE
+                  IF( A( K+1, K ).NE.ZERO ) THEN
+                     K1 = K
+                     K2 = K + 1
+                     KNEXT = K + 2
+                  ELSE
+                     K1 = K
+                     K2 = K
+                     KNEXT = K + 1
+                  END IF
+               END IF
+*
+               IF( L1.EQ.L2 .AND. K1.EQ.K2 ) THEN
+                  SUML = DDOT( K1-1, A( 1, K1 ), 1, C( 1, L1 ), 1 )
+                  SUMR = DDOT( L1-1, C( K1, 1 ), LDC, B( 1, L1 ), 1 )
+                  VEC( 1, 1 ) = C( K1, L1 ) - ( SUML+SGN*SUMR )
+                  SCALOC = ONE
+*
+                  A11 = A( K1, K1 ) + SGN*B( L1, L1 )
+                  DA11 = ABS( A11 )
+                  IF( DA11.LE.SMIN ) THEN
+                     A11 = SMIN
+                     DA11 = SMIN
+                     INFO = 1
+                  END IF
+                  DB = ABS( VEC( 1, 1 ) )
+                  IF( DA11.LT.ONE .AND. DB.GT.ONE ) THEN
+                     IF( DB.GT.BIGNUM*DA11 )
+     $                  SCALOC = ONE / DB
+                  END IF
+                  X( 1, 1 ) = ( VEC( 1, 1 )*SCALOC ) / A11
+*
+                  IF( SCALOC.NE.ONE ) THEN
+                     DO 70 J = 1, N
+                        CALL DSCAL( M, SCALOC, C( 1, J ), 1 )
+   70                CONTINUE
+                     SCALE = SCALE*SCALOC
+                  END IF
+                  C( K1, L1 ) = X( 1, 1 )
+*
+               ELSE IF( L1.EQ.L2 .AND. K1.NE.K2 ) THEN
+*
+                  SUML = DDOT( K1-1, A( 1, K1 ), 1, C( 1, L1 ), 1 )
+                  SUMR = DDOT( L1-1, C( K1, 1 ), LDC, B( 1, L1 ), 1 )
+                  VEC( 1, 1 ) = C( K1, L1 ) - ( SUML+SGN*SUMR )
+*
+                  SUML = DDOT( K1-1, A( 1, K2 ), 1, C( 1, L1 ), 1 )
+                  SUMR = DDOT( L1-1, C( K2, 1 ), LDC, B( 1, L1 ), 1 )
+                  VEC( 2, 1 ) = C( K2, L1 ) - ( SUML+SGN*SUMR )
+*
+                  CALL DLALN2( .TRUE., 2, 1, SMIN, ONE, A( K1, K1 ),
+     $                         LDA, ONE, ONE, VEC, 2, -SGN*B( L1, L1 ),
+     $                         ZERO, X, 2, SCALOC, XNORM, IERR )
+                  IF( IERR.NE.0 )
+     $               INFO = 1
+*
+                  IF( SCALOC.NE.ONE ) THEN
+                     DO 80 J = 1, N
+                        CALL DSCAL( M, SCALOC, C( 1, J ), 1 )
+   80                CONTINUE
+                     SCALE = SCALE*SCALOC
+                  END IF
+                  C( K1, L1 ) = X( 1, 1 )
+                  C( K2, L1 ) = X( 2, 1 )
+*
+               ELSE IF( L1.NE.L2 .AND. K1.EQ.K2 ) THEN
+*
+                  SUML = DDOT( K1-1, A( 1, K1 ), 1, C( 1, L1 ), 1 )
+                  SUMR = DDOT( L1-1, C( K1, 1 ), LDC, B( 1, L1 ), 1 )
+                  VEC( 1, 1 ) = SGN*( C( K1, L1 )-( SUML+SGN*SUMR ) )
+*
+                  SUML = DDOT( K1-1, A( 1, K1 ), 1, C( 1, L2 ), 1 )
+                  SUMR = DDOT( L1-1, C( K1, 1 ), LDC, B( 1, L2 ), 1 )
+                  VEC( 2, 1 ) = SGN*( C( K1, L2 )-( SUML+SGN*SUMR ) )
+*
+                  CALL DLALN2( .TRUE., 2, 1, SMIN, ONE, B( L1, L1 ),
+     $                         LDB, ONE, ONE, VEC, 2, -SGN*A( K1, K1 ),
+     $                         ZERO, X, 2, SCALOC, XNORM, IERR )
+                  IF( IERR.NE.0 )
+     $               INFO = 1
+*
+                  IF( SCALOC.NE.ONE ) THEN
+                     DO 90 J = 1, N
+                        CALL DSCAL( M, SCALOC, C( 1, J ), 1 )
+   90                CONTINUE
+                     SCALE = SCALE*SCALOC
+                  END IF
+                  C( K1, L1 ) = X( 1, 1 )
+                  C( K1, L2 ) = X( 2, 1 )
+*
+               ELSE IF( L1.NE.L2 .AND. K1.NE.K2 ) THEN
+*
+                  SUML = DDOT( K1-1, A( 1, K1 ), 1, C( 1, L1 ), 1 )
+                  SUMR = DDOT( L1-1, C( K1, 1 ), LDC, B( 1, L1 ), 1 )
+                  VEC( 1, 1 ) = C( K1, L1 ) - ( SUML+SGN*SUMR )
+*
+                  SUML = DDOT( K1-1, A( 1, K1 ), 1, C( 1, L2 ), 1 )
+                  SUMR = DDOT( L1-1, C( K1, 1 ), LDC, B( 1, L2 ), 1 )
+                  VEC( 1, 2 ) = C( K1, L2 ) - ( SUML+SGN*SUMR )
+*
+                  SUML = DDOT( K1-1, A( 1, K2 ), 1, C( 1, L1 ), 1 )
+                  SUMR = DDOT( L1-1, C( K2, 1 ), LDC, B( 1, L1 ), 1 )
+                  VEC( 2, 1 ) = C( K2, L1 ) - ( SUML+SGN*SUMR )
+*
+                  SUML = DDOT( K1-1, A( 1, K2 ), 1, C( 1, L2 ), 1 )
+                  SUMR = DDOT( L1-1, C( K2, 1 ), LDC, B( 1, L2 ), 1 )
+                  VEC( 2, 2 ) = C( K2, L2 ) - ( SUML+SGN*SUMR )
+*
+                  CALL DLASY2( .TRUE., .FALSE., ISGN, 2, 2, A( K1, K1 ),
+     $                         LDA, B( L1, L1 ), LDB, VEC, 2, SCALOC, X,
+     $                         2, XNORM, IERR )
+                  IF( IERR.NE.0 )
+     $               INFO = 1
+*
+                  IF( SCALOC.NE.ONE ) THEN
+                     DO 100 J = 1, N
+                        CALL DSCAL( M, SCALOC, C( 1, J ), 1 )
+  100                CONTINUE
+                     SCALE = SCALE*SCALOC
+                  END IF
+                  C( K1, L1 ) = X( 1, 1 )
+                  C( K1, L2 ) = X( 1, 2 )
+                  C( K2, L1 ) = X( 2, 1 )
+                  C( K2, L2 ) = X( 2, 2 )
+               END IF
+*
+  110       CONTINUE
+  120    CONTINUE
+*
+      ELSE IF( .NOT.NOTRNA .AND. .NOT.NOTRNB ) THEN
+*
+*        Solve    A'*X + ISGN*X*B' = scale*C.
+*
+*        The (K,L)th block of X is determined starting from
+*        top-right corner column by column by
+*
+*           A(K,K)'*X(K,L) + ISGN*X(K,L)*B(L,L)' = C(K,L) - R(K,L)
+*
+*        Where
+*                     K-1                          N
+*            R(K,L) = SUM [A(I,K)'*X(I,L)] + ISGN*SUM [X(K,J)*B(L,J)'].
+*                     I=1                        J=L+1
+*
+*        Start column loop (index = L)
+*        L1 (L2): column index of the first (last) row of X(K,L)
+*
+         LNEXT = N
+         DO 180 L = N, 1, -1
+            IF( L.GT.LNEXT )
+     $         GO TO 180
+            IF( L.EQ.1 ) THEN
+               L1 = L
+               L2 = L
+            ELSE
+               IF( B( L, L-1 ).NE.ZERO ) THEN
+                  L1 = L - 1
+                  L2 = L
+                  LNEXT = L - 2
+               ELSE
+                  L1 = L
+                  L2 = L
+                  LNEXT = L - 1
+               END IF
+            END IF
+*
+*           Start row loop (index = K)
+*           K1 (K2): row index of the first (last) row of X(K,L)
+*
+            KNEXT = 1
+            DO 170 K = 1, M
+               IF( K.LT.KNEXT )
+     $            GO TO 170
+               IF( K.EQ.M ) THEN
+                  K1 = K
+                  K2 = K
+               ELSE
+                  IF( A( K+1, K ).NE.ZERO ) THEN
+                     K1 = K
+                     K2 = K + 1
+                     KNEXT = K + 2
+                  ELSE
+                     K1 = K
+                     K2 = K
+                     KNEXT = K + 1
+                  END IF
+               END IF
+*
+               IF( L1.EQ.L2 .AND. K1.EQ.K2 ) THEN
+                  SUML = DDOT( K1-1, A( 1, K1 ), 1, C( 1, L1 ), 1 )
+                  SUMR = DDOT( N-L1, C( K1, MIN( L1+1, N ) ), LDC,
+     $                   B( L1, MIN( L1+1, N ) ), LDB )
+                  VEC( 1, 1 ) = C( K1, L1 ) - ( SUML+SGN*SUMR )
+                  SCALOC = ONE
+*
+                  A11 = A( K1, K1 ) + SGN*B( L1, L1 )
+                  DA11 = ABS( A11 )
+                  IF( DA11.LE.SMIN ) THEN
+                     A11 = SMIN
+                     DA11 = SMIN
+                     INFO = 1
+                  END IF
+                  DB = ABS( VEC( 1, 1 ) )
+                  IF( DA11.LT.ONE .AND. DB.GT.ONE ) THEN
+                     IF( DB.GT.BIGNUM*DA11 )
+     $                  SCALOC = ONE / DB
+                  END IF
+                  X( 1, 1 ) = ( VEC( 1, 1 )*SCALOC ) / A11
+*
+                  IF( SCALOC.NE.ONE ) THEN
+                     DO 130 J = 1, N
+                        CALL DSCAL( M, SCALOC, C( 1, J ), 1 )
+  130                CONTINUE
+                     SCALE = SCALE*SCALOC
+                  END IF
+                  C( K1, L1 ) = X( 1, 1 )
+*
+               ELSE IF( L1.EQ.L2 .AND. K1.NE.K2 ) THEN
+*
+                  SUML = DDOT( K1-1, A( 1, K1 ), 1, C( 1, L1 ), 1 )
+                  SUMR = DDOT( N-L2, C( K1, MIN( L2+1, N ) ), LDC,
+     $                   B( L1, MIN( L2+1, N ) ), LDB )
+                  VEC( 1, 1 ) = C( K1, L1 ) - ( SUML+SGN*SUMR )
+*
+                  SUML = DDOT( K1-1, A( 1, K2 ), 1, C( 1, L1 ), 1 )
+                  SUMR = DDOT( N-L2, C( K2, MIN( L2+1, N ) ), LDC,
+     $                   B( L1, MIN( L2+1, N ) ), LDB )
+                  VEC( 2, 1 ) = C( K2, L1 ) - ( SUML+SGN*SUMR )
+*
+                  CALL DLALN2( .TRUE., 2, 1, SMIN, ONE, A( K1, K1 ),
+     $                         LDA, ONE, ONE, VEC, 2, -SGN*B( L1, L1 ),
+     $                         ZERO, X, 2, SCALOC, XNORM, IERR )
+                  IF( IERR.NE.0 )
+     $               INFO = 1
+*
+                  IF( SCALOC.NE.ONE ) THEN
+                     DO 140 J = 1, N
+                        CALL DSCAL( M, SCALOC, C( 1, J ), 1 )
+  140                CONTINUE
+                     SCALE = SCALE*SCALOC
+                  END IF
+                  C( K1, L1 ) = X( 1, 1 )
+                  C( K2, L1 ) = X( 2, 1 )
+*
+               ELSE IF( L1.NE.L2 .AND. K1.EQ.K2 ) THEN
+*
+                  SUML = DDOT( K1-1, A( 1, K1 ), 1, C( 1, L1 ), 1 )
+                  SUMR = DDOT( N-L2, C( K1, MIN( L2+1, N ) ), LDC,
+     $                   B( L1, MIN( L2+1, N ) ), LDB )
+                  VEC( 1, 1 ) = SGN*( C( K1, L1 )-( SUML+SGN*SUMR ) )
+*
+                  SUML = DDOT( K1-1, A( 1, K1 ), 1, C( 1, L2 ), 1 )
+                  SUMR = DDOT( N-L2, C( K1, MIN( L2+1, N ) ), LDC,
+     $                   B( L2, MIN( L2+1, N ) ), LDB )
+                  VEC( 2, 1 ) = SGN*( C( K1, L2 )-( SUML+SGN*SUMR ) )
+*
+                  CALL DLALN2( .FALSE., 2, 1, SMIN, ONE, B( L1, L1 ),
+     $                         LDB, ONE, ONE, VEC, 2, -SGN*A( K1, K1 ),
+     $                         ZERO, X, 2, SCALOC, XNORM, IERR )
+                  IF( IERR.NE.0 )
+     $               INFO = 1
+*
+                  IF( SCALOC.NE.ONE ) THEN
+                     DO 150 J = 1, N
+                        CALL DSCAL( M, SCALOC, C( 1, J ), 1 )
+  150                CONTINUE
+                     SCALE = SCALE*SCALOC
+                  END IF
+                  C( K1, L1 ) = X( 1, 1 )
+                  C( K1, L2 ) = X( 2, 1 )
+*
+               ELSE IF( L1.NE.L2 .AND. K1.NE.K2 ) THEN
+*
+                  SUML = DDOT( K1-1, A( 1, K1 ), 1, C( 1, L1 ), 1 )
+                  SUMR = DDOT( N-L2, C( K1, MIN( L2+1, N ) ), LDC,
+     $                   B( L1, MIN( L2+1, N ) ), LDB )
+                  VEC( 1, 1 ) = C( K1, L1 ) - ( SUML+SGN*SUMR )
+*
+                  SUML = DDOT( K1-1, A( 1, K1 ), 1, C( 1, L2 ), 1 )
+                  SUMR = DDOT( N-L2, C( K1, MIN( L2+1, N ) ), LDC,
+     $                   B( L2, MIN( L2+1, N ) ), LDB )
+                  VEC( 1, 2 ) = C( K1, L2 ) - ( SUML+SGN*SUMR )
+*
+                  SUML = DDOT( K1-1, A( 1, K2 ), 1, C( 1, L1 ), 1 )
+                  SUMR = DDOT( N-L2, C( K2, MIN( L2+1, N ) ), LDC,
+     $                   B( L1, MIN( L2+1, N ) ), LDB )
+                  VEC( 2, 1 ) = C( K2, L1 ) - ( SUML+SGN*SUMR )
+*
+                  SUML = DDOT( K1-1, A( 1, K2 ), 1, C( 1, L2 ), 1 )
+                  SUMR = DDOT( N-L2, C( K2, MIN( L2+1, N ) ), LDC,
+     $                   B( L2, MIN( L2+1, N ) ), LDB )
+                  VEC( 2, 2 ) = C( K2, L2 ) - ( SUML+SGN*SUMR )
+*
+                  CALL DLASY2( .TRUE., .TRUE., ISGN, 2, 2, A( K1, K1 ),
+     $                         LDA, B( L1, L1 ), LDB, VEC, 2, SCALOC, X,
+     $                         2, XNORM, IERR )
+                  IF( IERR.NE.0 )
+     $               INFO = 1
+*
+                  IF( SCALOC.NE.ONE ) THEN
+                     DO 160 J = 1, N
+                        CALL DSCAL( M, SCALOC, C( 1, J ), 1 )
+  160                CONTINUE
+                     SCALE = SCALE*SCALOC
+                  END IF
+                  C( K1, L1 ) = X( 1, 1 )
+                  C( K1, L2 ) = X( 1, 2 )
+                  C( K2, L1 ) = X( 2, 1 )
+                  C( K2, L2 ) = X( 2, 2 )
+               END IF
+*
+  170       CONTINUE
+  180    CONTINUE
+*
+      ELSE IF( NOTRNA .AND. .NOT.NOTRNB ) THEN
+*
+*        Solve    A*X + ISGN*X*B' = scale*C.
+*
+*        The (K,L)th block of X is determined starting from
+*        bottom-right corner column by column by
+*
+*            A(K,K)*X(K,L) + ISGN*X(K,L)*B(L,L)' = C(K,L) - R(K,L)
+*
+*        Where
+*                      M                          N
+*            R(K,L) = SUM [A(K,I)*X(I,L)] + ISGN*SUM [X(K,J)*B(L,J)'].
+*                    I=K+1                      J=L+1
+*
+*        Start column loop (index = L)
+*        L1 (L2): column index of the first (last) row of X(K,L)
+*
+         LNEXT = N
+         DO 240 L = N, 1, -1
+            IF( L.GT.LNEXT )
+     $         GO TO 240
+            IF( L.EQ.1 ) THEN
+               L1 = L
+               L2 = L
+            ELSE
+               IF( B( L, L-1 ).NE.ZERO ) THEN
+                  L1 = L - 1
+                  L2 = L
+                  LNEXT = L - 2
+               ELSE
+                  L1 = L
+                  L2 = L
+                  LNEXT = L - 1
+               END IF
+            END IF
+*
+*           Start row loop (index = K)
+*           K1 (K2): row index of the first (last) row of X(K,L)
+*
+            KNEXT = M
+            DO 230 K = M, 1, -1
+               IF( K.GT.KNEXT )
+     $            GO TO 230
+               IF( K.EQ.1 ) THEN
+                  K1 = K
+                  K2 = K
+               ELSE
+                  IF( A( K, K-1 ).NE.ZERO ) THEN
+                     K1 = K - 1
+                     K2 = K
+                     KNEXT = K - 2
+                  ELSE
+                     K1 = K
+                     K2 = K
+                     KNEXT = K - 1
+                  END IF
+               END IF
+*
+               IF( L1.EQ.L2 .AND. K1.EQ.K2 ) THEN
+                  SUML = DDOT( M-K1, A( K1, MIN( K1+1, M ) ), LDA,
+     $                   C( MIN( K1+1, M ), L1 ), 1 )
+                  SUMR = DDOT( N-L1, C( K1, MIN( L1+1, N ) ), LDC,
+     $                   B( L1, MIN( L1+1, N ) ), LDB )
+                  VEC( 1, 1 ) = C( K1, L1 ) - ( SUML+SGN*SUMR )
+                  SCALOC = ONE
+*
+                  A11 = A( K1, K1 ) + SGN*B( L1, L1 )
+                  DA11 = ABS( A11 )
+                  IF( DA11.LE.SMIN ) THEN
+                     A11 = SMIN
+                     DA11 = SMIN
+                     INFO = 1
+                  END IF
+                  DB = ABS( VEC( 1, 1 ) )
+                  IF( DA11.LT.ONE .AND. DB.GT.ONE ) THEN
+                     IF( DB.GT.BIGNUM*DA11 )
+     $                  SCALOC = ONE / DB
+                  END IF
+                  X( 1, 1 ) = ( VEC( 1, 1 )*SCALOC ) / A11
+*
+                  IF( SCALOC.NE.ONE ) THEN
+                     DO 190 J = 1, N
+                        CALL DSCAL( M, SCALOC, C( 1, J ), 1 )
+  190                CONTINUE
+                     SCALE = SCALE*SCALOC
+                  END IF
+                  C( K1, L1 ) = X( 1, 1 )
+*
+               ELSE IF( L1.EQ.L2 .AND. K1.NE.K2 ) THEN
+*
+                  SUML = DDOT( M-K2, A( K1, MIN( K2+1, M ) ), LDA,
+     $                   C( MIN( K2+1, M ), L1 ), 1 )
+                  SUMR = DDOT( N-L2, C( K1, MIN( L2+1, N ) ), LDC,
+     $                   B( L1, MIN( L2+1, N ) ), LDB )
+                  VEC( 1, 1 ) = C( K1, L1 ) - ( SUML+SGN*SUMR )
+*
+                  SUML = DDOT( M-K2, A( K2, MIN( K2+1, M ) ), LDA,
+     $                   C( MIN( K2+1, M ), L1 ), 1 )
+                  SUMR = DDOT( N-L2, C( K2, MIN( L2+1, N ) ), LDC,
+     $                   B( L1, MIN( L2+1, N ) ), LDB )
+                  VEC( 2, 1 ) = C( K2, L1 ) - ( SUML+SGN*SUMR )
+*
+                  CALL DLALN2( .FALSE., 2, 1, SMIN, ONE, A( K1, K1 ),
+     $                         LDA, ONE, ONE, VEC, 2, -SGN*B( L1, L1 ),
+     $                         ZERO, X, 2, SCALOC, XNORM, IERR )
+                  IF( IERR.NE.0 )
+     $               INFO = 1
+*
+                  IF( SCALOC.NE.ONE ) THEN
+                     DO 200 J = 1, N
+                        CALL DSCAL( M, SCALOC, C( 1, J ), 1 )
+  200                CONTINUE
+                     SCALE = SCALE*SCALOC
+                  END IF
+                  C( K1, L1 ) = X( 1, 1 )
+                  C( K2, L1 ) = X( 2, 1 )
+*
+               ELSE IF( L1.NE.L2 .AND. K1.EQ.K2 ) THEN
+*
+                  SUML = DDOT( M-K1, A( K1, MIN( K1+1, M ) ), LDA,
+     $                   C( MIN( K1+1, M ), L1 ), 1 )
+                  SUMR = DDOT( N-L2, C( K1, MIN( L2+1, N ) ), LDC,
+     $                   B( L1, MIN( L2+1, N ) ), LDB )
+                  VEC( 1, 1 ) = SGN*( C( K1, L1 )-( SUML+SGN*SUMR ) )
+*
+                  SUML = DDOT( M-K1, A( K1, MIN( K1+1, M ) ), LDA,
+     $                   C( MIN( K1+1, M ), L2 ), 1 )
+                  SUMR = DDOT( N-L2, C( K1, MIN( L2+1, N ) ), LDC,
+     $                   B( L2, MIN( L2+1, N ) ), LDB )
+                  VEC( 2, 1 ) = SGN*( C( K1, L2 )-( SUML+SGN*SUMR ) )
+*
+                  CALL DLALN2( .FALSE., 2, 1, SMIN, ONE, B( L1, L1 ),
+     $                         LDB, ONE, ONE, VEC, 2, -SGN*A( K1, K1 ),
+     $                         ZERO, X, 2, SCALOC, XNORM, IERR )
+                  IF( IERR.NE.0 )
+     $               INFO = 1
+*
+                  IF( SCALOC.NE.ONE ) THEN
+                     DO 210 J = 1, N
+                        CALL DSCAL( M, SCALOC, C( 1, J ), 1 )
+  210                CONTINUE
+                     SCALE = SCALE*SCALOC
+                  END IF
+                  C( K1, L1 ) = X( 1, 1 )
+                  C( K1, L2 ) = X( 2, 1 )
+*
+               ELSE IF( L1.NE.L2 .AND. K1.NE.K2 ) THEN
+*
+                  SUML = DDOT( M-K2, A( K1, MIN( K2+1, M ) ), LDA,
+     $                   C( MIN( K2+1, M ), L1 ), 1 )
+                  SUMR = DDOT( N-L2, C( K1, MIN( L2+1, N ) ), LDC,
+     $                   B( L1, MIN( L2+1, N ) ), LDB )
+                  VEC( 1, 1 ) = C( K1, L1 ) - ( SUML+SGN*SUMR )
+*
+                  SUML = DDOT( M-K2, A( K1, MIN( K2+1, M ) ), LDA,
+     $                   C( MIN( K2+1, M ), L2 ), 1 )
+                  SUMR = DDOT( N-L2, C( K1, MIN( L2+1, N ) ), LDC,
+     $                   B( L2, MIN( L2+1, N ) ), LDB )
+                  VEC( 1, 2 ) = C( K1, L2 ) - ( SUML+SGN*SUMR )
+*
+                  SUML = DDOT( M-K2, A( K2, MIN( K2+1, M ) ), LDA,
+     $                   C( MIN( K2+1, M ), L1 ), 1 )
+                  SUMR = DDOT( N-L2, C( K2, MIN( L2+1, N ) ), LDC,
+     $                   B( L1, MIN( L2+1, N ) ), LDB )
+                  VEC( 2, 1 ) = C( K2, L1 ) - ( SUML+SGN*SUMR )
+*
+                  SUML = DDOT( M-K2, A( K2, MIN( K2+1, M ) ), LDA,
+     $                   C( MIN( K2+1, M ), L2 ), 1 )
+                  SUMR = DDOT( N-L2, C( K2, MIN( L2+1, N ) ), LDC,
+     $                   B( L2, MIN( L2+1, N ) ), LDB )
+                  VEC( 2, 2 ) = C( K2, L2 ) - ( SUML+SGN*SUMR )
+*
+                  CALL DLASY2( .FALSE., .TRUE., ISGN, 2, 2, A( K1, K1 ),
+     $                         LDA, B( L1, L1 ), LDB, VEC, 2, SCALOC, X,
+     $                         2, XNORM, IERR )
+                  IF( IERR.NE.0 )
+     $               INFO = 1
+*
+                  IF( SCALOC.NE.ONE ) THEN
+                     DO 220 J = 1, N
+                        CALL DSCAL( M, SCALOC, C( 1, J ), 1 )
+  220                CONTINUE
+                     SCALE = SCALE*SCALOC
+                  END IF
+                  C( K1, L1 ) = X( 1, 1 )
+                  C( K1, L2 ) = X( 1, 2 )
+                  C( K2, L1 ) = X( 2, 1 )
+                  C( K2, L2 ) = X( 2, 2 )
+               END IF
+*
+  230       CONTINUE
+  240    CONTINUE
+*
+      END IF
+*
+      RETURN
+*
+*     End of DTRSYL
+*
+      END
diff --git a/costa/native/external/lapack/dtrti2.f b/costa/native/external/lapack/dtrti2.f
new file mode 100644
index 000000000..1432ea152
--- /dev/null
+++ b/costa/native/external/lapack/dtrti2.f
@@ -0,0 +1,147 @@
+      SUBROUTINE DTRTI2( UPLO, DIAG, N, A, LDA, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIAG, UPLO
+      INTEGER            INFO, LDA, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DTRTI2 computes the inverse of a real upper or lower triangular
+*  matrix.
+*
+*  This is the Level 2 BLAS version of the algorithm.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the matrix A is upper or lower triangular.
+*          = 'U':  Upper triangular
+*          = 'L':  Lower triangular
+*
+*  DIAG    (input) CHARACTER*1
+*          Specifies whether or not the matrix A is unit triangular.
+*          = 'N':  Non-unit triangular
+*          = 'U':  Unit triangular
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+*          On entry, the triangular matrix A.  If UPLO = 'U', the
+*          leading n by n upper triangular part of the array A contains
+*          the upper triangular matrix, and the strictly lower
+*          triangular part of A is not referenced.  If UPLO = 'L', the
+*          leading n by n lower triangular part of the array A contains
+*          the lower triangular matrix, and the strictly upper
+*          triangular part of A is not referenced.  If DIAG = 'U', the
+*          diagonal elements of A are also not referenced and are
+*          assumed to be 1.
+*
+*          On exit, the (triangular) inverse of the original matrix, in
+*          the same storage format.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -k, the k-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE
+      PARAMETER          ( ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOUNIT, UPPER
+      INTEGER            J
+      DOUBLE PRECISION   AJJ
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DSCAL, DTRMV, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      NOUNIT = LSAME( DIAG, 'N' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DTRTI2', -INFO )
+         RETURN
+      END IF
+*
+      IF( UPPER ) THEN
+*
+*        Compute inverse of upper triangular matrix.
+*
+         DO 10 J = 1, N
+            IF( NOUNIT ) THEN
+               A( J, J ) = ONE / A( J, J )
+               AJJ = -A( J, J )
+            ELSE
+               AJJ = -ONE
+            END IF
+*
+*           Compute elements 1:j-1 of j-th column.
+*
+            CALL DTRMV( 'Upper', 'No transpose', DIAG, J-1, A, LDA,
+     $                  A( 1, J ), 1 )
+            CALL DSCAL( J-1, AJJ, A( 1, J ), 1 )
+   10    CONTINUE
+      ELSE
+*
+*        Compute inverse of lower triangular matrix.
+*
+         DO 20 J = N, 1, -1
+            IF( NOUNIT ) THEN
+               A( J, J ) = ONE / A( J, J )
+               AJJ = -A( J, J )
+            ELSE
+               AJJ = -ONE
+            END IF
+            IF( J.LT.N ) THEN
+*
+*              Compute elements j+1:n of j-th column.
+*
+               CALL DTRMV( 'Lower', 'No transpose', DIAG, N-J,
+     $                     A( J+1, J+1 ), LDA, A( J+1, J ), 1 )
+               CALL DSCAL( N-J, AJJ, A( J+1, J ), 1 )
+            END IF
+   20    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of DTRTI2
+*
+      END
diff --git a/costa/native/external/lapack/dtrtri.f b/costa/native/external/lapack/dtrtri.f
new file mode 100644
index 000000000..70f05ed4e
--- /dev/null
+++ b/costa/native/external/lapack/dtrtri.f
@@ -0,0 +1,177 @@
+      SUBROUTINE DTRTRI( UPLO, DIAG, N, A, LDA, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIAG, UPLO
+      INTEGER            INFO, LDA, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DTRTRI computes the inverse of a real upper or lower triangular
+*  matrix A.
+*
+*  This is the Level 3 BLAS version of the algorithm.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  A is upper triangular;
+*          = 'L':  A is lower triangular.
+*
+*  DIAG    (input) CHARACTER*1
+*          = 'N':  A is non-unit triangular;
+*          = 'U':  A is unit triangular.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+*          On entry, the triangular matrix A.  If UPLO = 'U', the
+*          leading N-by-N upper triangular part of the array A contains
+*          the upper triangular matrix, and the strictly lower
+*          triangular part of A is not referenced.  If UPLO = 'L', the
+*          leading N-by-N lower triangular part of the array A contains
+*          the lower triangular matrix, and the strictly upper
+*          triangular part of A is not referenced.  If DIAG = 'U', the
+*          diagonal elements of A are also not referenced and are
+*          assumed to be 1.
+*          On exit, the (triangular) inverse of the original matrix, in
+*          the same storage format.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*          > 0: if INFO = i, A(i,i) is exactly zero.  The triangular
+*               matrix is singular and its inverse can not be computed.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOUNIT, UPPER
+      INTEGER            J, JB, NB, NN
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           LSAME, ILAENV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DTRMM, DTRSM, DTRTI2, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      NOUNIT = LSAME( DIAG, 'N' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DTRTRI', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Check for singularity if non-unit.
+*
+      IF( NOUNIT ) THEN
+         DO 10 INFO = 1, N
+            IF( A( INFO, INFO ).EQ.ZERO )
+     $         RETURN
+   10    CONTINUE
+         INFO = 0
+      END IF
+*
+*     Determine the block size for this environment.
+*
+      NB = ILAENV( 1, 'DTRTRI', UPLO // DIAG, N, -1, -1, -1 )
+      IF( NB.LE.1 .OR. NB.GE.N ) THEN
+*
+*        Use unblocked code
+*
+         CALL DTRTI2( UPLO, DIAG, N, A, LDA, INFO )
+      ELSE
+*
+*        Use blocked code
+*
+         IF( UPPER ) THEN
+*
+*           Compute inverse of upper triangular matrix
+*
+            DO 20 J = 1, N, NB
+               JB = MIN( NB, N-J+1 )
+*
+*              Compute rows 1:j-1 of current block column
+*
+               CALL DTRMM( 'Left', 'Upper', 'No transpose', DIAG, J-1,
+     $                     JB, ONE, A, LDA, A( 1, J ), LDA )
+               CALL DTRSM( 'Right', 'Upper', 'No transpose', DIAG, J-1,
+     $                     JB, -ONE, A( J, J ), LDA, A( 1, J ), LDA )
+*
+*              Compute inverse of current diagonal block
+*
+               CALL DTRTI2( 'Upper', DIAG, JB, A( J, J ), LDA, INFO )
+   20       CONTINUE
+         ELSE
+*
+*           Compute inverse of lower triangular matrix
+*
+            NN = ( ( N-1 ) / NB )*NB + 1
+            DO 30 J = NN, 1, -NB
+               JB = MIN( NB, N-J+1 )
+               IF( J+JB.LE.N ) THEN
+*
+*                 Compute rows j+jb:n of current block column
+*
+                  CALL DTRMM( 'Left', 'Lower', 'No transpose', DIAG,
+     $                        N-J-JB+1, JB, ONE, A( J+JB, J+JB ), LDA,
+     $                        A( J+JB, J ), LDA )
+                  CALL DTRSM( 'Right', 'Lower', 'No transpose', DIAG,
+     $                        N-J-JB+1, JB, -ONE, A( J, J ), LDA,
+     $                        A( J+JB, J ), LDA )
+               END IF
+*
+*              Compute inverse of current diagonal block
+*
+               CALL DTRTI2( 'Lower', DIAG, JB, A( J, J ), LDA, INFO )
+   30       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of DTRTRI
+*
+      END
diff --git a/costa/native/external/lapack/dtrtrs.f b/costa/native/external/lapack/dtrtrs.f
new file mode 100644
index 000000000..c1b4c5c4c
--- /dev/null
+++ b/costa/native/external/lapack/dtrtrs.f
@@ -0,0 +1,148 @@
+      SUBROUTINE DTRTRS( UPLO, TRANS, DIAG, N, NRHS, A, LDA, B, LDB,
+     $                   INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIAG, TRANS, UPLO
+      INTEGER            INFO, LDA, LDB, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DTRTRS solves a triangular system of the form
+*
+*     A * X = B  or  A**T * X = B,
+*
+*  where A is a triangular matrix of order N, and B is an N-by-NRHS
+*  matrix.  A check is made to verify that A is nonsingular.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  A is upper triangular;
+*          = 'L':  A is lower triangular.
+*
+*  TRANS   (input) CHARACTER*1
+*          Specifies the form of the system of equations:
+*          = 'N':  A * X = B  (No transpose)
+*          = 'T':  A**T * X = B  (Transpose)
+*          = 'C':  A**H * X = B  (Conjugate transpose = Transpose)
+*
+*  DIAG    (input) CHARACTER*1
+*          = 'N':  A is non-unit triangular;
+*          = 'U':  A is unit triangular.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  A       (input) DOUBLE PRECISION array, dimension (LDA,N)
+*          The triangular matrix A.  If UPLO = 'U', the leading N-by-N
+*          upper triangular part of the array A contains the upper
+*          triangular matrix, and the strictly lower triangular part of
+*          A is not referenced.  If UPLO = 'L', the leading N-by-N lower
+*          triangular part of the array A contains the lower triangular
+*          matrix, and the strictly upper triangular part of A is not
+*          referenced.  If DIAG = 'U', the diagonal elements of A are
+*          also not referenced and are assumed to be 1.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  B       (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS)
+*          On entry, the right hand side matrix B.
+*          On exit, if INFO = 0, the solution matrix X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*          > 0: if INFO = i, the i-th diagonal element of A is zero,
+*               indicating that the matrix is singular and the solutions
+*               X have not been computed.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOUNIT
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DTRSM, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      NOUNIT = LSAME( DIAG, 'N' )
+      IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.LSAME( TRANS, 'N' ) .AND. .NOT.
+     $         LSAME( TRANS, 'T' ) .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -9
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DTRTRS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Check for singularity.
+*
+      IF( NOUNIT ) THEN
+         DO 10 INFO = 1, N
+            IF( A( INFO, INFO ).EQ.ZERO )
+     $         RETURN
+   10    CONTINUE
+      END IF
+      INFO = 0
+*
+*     Solve A * x = b  or  A' * x = b.
+*
+      CALL DTRSM( 'Left', UPLO, TRANS, DIAG, N, NRHS, ONE, A, LDA, B,
+     $            LDB )
+*
+      RETURN
+*
+*     End of DTRTRS
+*
+      END
diff --git a/costa/native/external/lapack/dtzrqf.f b/costa/native/external/lapack/dtzrqf.f
new file mode 100644
index 000000000..e13bf32e2
--- /dev/null
+++ b/costa/native/external/lapack/dtzrqf.f
@@ -0,0 +1,165 @@
+      SUBROUTINE DTZRQF( M, N, A, LDA, TAU, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, M, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), TAU( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  This routine is deprecated and has been replaced by routine DTZRZF.
+*
+*  DTZRQF reduces the M-by-N ( M<=N ) real upper trapezoidal matrix A
+*  to upper triangular form by means of orthogonal transformations.
+*
+*  The upper trapezoidal matrix A is factored as
+*
+*     A = ( R  0 ) * Z,
+*
+*  where Z is an N-by-N orthogonal matrix and R is an M-by-M upper
+*  triangular matrix.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= M.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+*          On entry, the leading M-by-N upper trapezoidal part of the
+*          array A must contain the matrix to be factorized.
+*          On exit, the leading M-by-M upper triangular part of A
+*          contains the upper triangular matrix R, and elements M+1 to
+*          N of the first M rows of A, with the array TAU, represent the
+*          orthogonal matrix Z as a product of M elementary reflectors.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  TAU     (output) DOUBLE PRECISION array, dimension (M)
+*          The scalar factors of the elementary reflectors.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Further Details
+*  ===============
+*
+*  The factorization is obtained by Householder's method.  The kth
+*  transformation matrix, Z( k ), which is used to introduce zeros into
+*  the ( m - k + 1 )th row of A, is given in the form
+*
+*     Z( k ) = ( I     0   ),
+*              ( 0  T( k ) )
+*
+*  where
+*
+*     T( k ) = I - tau*u( k )*u( k )',   u( k ) = (   1    ),
+*                                                 (   0    )
+*                                                 ( z( k ) )
+*
+*  tau is a scalar and z( k ) is an ( n - m ) element vector.
+*  tau and z( k ) are chosen to annihilate the elements of the kth row
+*  of X.
+*
+*  The scalar tau is returned in the kth element of TAU and the vector
+*  u( k ) in the kth row of A, such that the elements of z( k ) are
+*  in  a( k, m + 1 ), ..., a( k, n ). The elements of R are returned in
+*  the upper triangular part of A.
+*
+*  Z is given by
+*
+*     Z =  Z( 1 ) * Z( 2 ) * ... * Z( m ).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, K, M1
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DAXPY, DCOPY, DGEMV, DGER, DLARFG, XERBLA
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.M ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DTZRQF', -INFO )
+         RETURN
+      END IF
+*
+*     Perform the factorization.
+*
+      IF( M.EQ.0 )
+     $   RETURN
+      IF( M.EQ.N ) THEN
+         DO 10 I = 1, N
+            TAU( I ) = ZERO
+   10    CONTINUE
+      ELSE
+         M1 = MIN( M+1, N )
+         DO 20 K = M, 1, -1
+*
+*           Use a Householder reflection to zero the kth row of A.
+*           First set up the reflection.
+*
+            CALL DLARFG( N-M+1, A( K, K ), A( K, M1 ), LDA, TAU( K ) )
+*
+            IF( ( TAU( K ).NE.ZERO ) .AND. ( K.GT.1 ) ) THEN
+*
+*              We now perform the operation  A := A*P( k ).
+*
+*              Use the first ( k - 1 ) elements of TAU to store  a( k ),
+*              where  a( k ) consists of the first ( k - 1 ) elements of
+*              the  kth column  of  A.  Also  let  B  denote  the  first
+*              ( k - 1 ) rows of the last ( n - m ) columns of A.
+*
+               CALL DCOPY( K-1, A( 1, K ), 1, TAU, 1 )
+*
+*              Form   w = a( k ) + B*z( k )  in TAU.
+*
+               CALL DGEMV( 'No transpose', K-1, N-M, ONE, A( 1, M1 ),
+     $                     LDA, A( K, M1 ), LDA, ONE, TAU, 1 )
+*
+*              Now form  a( k ) := a( k ) - tau*w
+*              and       B      := B      - tau*w*z( k )'.
+*
+               CALL DAXPY( K-1, -TAU( K ), TAU, 1, A( 1, K ), 1 )
+               CALL DGER( K-1, N-M, -TAU( K ), TAU, 1, A( K, M1 ), LDA,
+     $                    A( 1, M1 ), LDA )
+            END IF
+   20    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of DTZRQF
+*
+      END
diff --git a/costa/native/external/lapack/dtzrzf.f b/costa/native/external/lapack/dtzrzf.f
new file mode 100644
index 000000000..895c4833f
--- /dev/null
+++ b/costa/native/external/lapack/dtzrzf.f
@@ -0,0 +1,241 @@
+      SUBROUTINE DTZRZF( M, N, A, LDA, TAU, WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, LWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DTZRZF reduces the M-by-N ( M<=N ) real upper trapezoidal matrix A
+*  to upper triangular form by means of orthogonal transformations.
+*
+*  The upper trapezoidal matrix A is factored as
+*
+*     A = ( R  0 ) * Z,
+*
+*  where Z is an N-by-N orthogonal matrix and R is an M-by-M upper
+*  triangular matrix.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+*          On entry, the leading M-by-N upper trapezoidal part of the
+*          array A must contain the matrix to be factorized.
+*          On exit, the leading M-by-M upper triangular part of A
+*          contains the upper triangular matrix R, and elements M+1 to
+*          N of the first M rows of A, with the array TAU, represent the
+*          orthogonal matrix Z as a product of M elementary reflectors.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  TAU     (output) DOUBLE PRECISION array, dimension (M)
+*          The scalar factors of the elementary reflectors.
+*
+*  WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.  LWORK >= max(1,M).
+*          For optimum performance LWORK >= M*NB, where NB is
+*          the optimal blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*    A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA
+*
+*  The factorization is obtained by Householder's method.  The kth
+*  transformation matrix, Z( k ), which is used to introduce zeros into
+*  the ( m - k + 1 )th row of A, is given in the form
+*
+*     Z( k ) = ( I     0   ),
+*              ( 0  T( k ) )
+*
+*  where
+*
+*     T( k ) = I - tau*u( k )*u( k )',   u( k ) = (   1    ),
+*                                                 (   0    )
+*                                                 ( z( k ) )
+*
+*  tau is a scalar and z( k ) is an ( n - m ) element vector.
+*  tau and z( k ) are chosen to annihilate the elements of the kth row
+*  of X.
+*
+*  The scalar tau is returned in the kth element of TAU and the vector
+*  u( k ) in the kth row of A, such that the elements of z( k ) are
+*  in  a( k, m + 1 ), ..., a( k, n ). The elements of R are returned in
+*  the upper triangular part of A.
+*
+*  Z is given by
+*
+*     Z =  Z( 1 ) * Z( 2 ) * ... * Z( m ).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO
+      PARAMETER          ( ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            I, IB, IWS, KI, KK, LDWORK, LWKOPT, M1, MU, NB,
+     $                   NBMIN, NX
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLARZB, DLARZT, DLATRZ, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.M ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -4
+      ELSE IF( LWORK.LT.MAX( 1, M ) .AND. .NOT.LQUERY ) THEN
+         INFO = -7
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+*
+*        Determine the block size.
+*
+         NB = ILAENV( 1, 'DGERQF', ' ', M, N, -1, -1 )
+         LWKOPT = M*NB
+         WORK( 1 ) = LWKOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DTZRZF', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      ELSE IF( M.EQ.N ) THEN
+         DO 10 I = 1, N
+            TAU( I ) = ZERO
+   10    CONTINUE
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      NBMIN = 2
+      NX = 1
+      IWS = M
+      IF( NB.GT.1 .AND. NB.LT.M ) THEN
+*
+*        Determine when to cross over from blocked to unblocked code.
+*
+         NX = MAX( 0, ILAENV( 3, 'DGERQF', ' ', M, N, -1, -1 ) )
+         IF( NX.LT.M ) THEN
+*
+*           Determine if workspace is large enough for blocked code.
+*
+            LDWORK = M
+            IWS = LDWORK*NB
+            IF( LWORK.LT.IWS ) THEN
+*
+*              Not enough workspace to use optimal NB:  reduce NB and
+*              determine the minimum value of NB.
+*
+               NB = LWORK / LDWORK
+               NBMIN = MAX( 2, ILAENV( 2, 'DGERQF', ' ', M, N, -1,
+     $                 -1 ) )
+            END IF
+         END IF
+      END IF
+*
+      IF( NB.GE.NBMIN .AND. NB.LT.M .AND. NX.LT.M ) THEN
+*
+*        Use blocked code initially.
+*        The last kk rows are handled by the block method.
+*
+         M1 = MIN( M+1, N )
+         KI = ( ( M-NX-1 ) / NB )*NB
+         KK = MIN( M, KI+NB )
+*
+         DO 20 I = M - KK + KI + 1, M - KK + 1, -NB
+            IB = MIN( M-I+1, NB )
+*
+*           Compute the TZ factorization of the current block
+*           A(i:i+ib-1,i:n)
+*
+            CALL DLATRZ( IB, N-I+1, N-M, A( I, I ), LDA, TAU( I ),
+     $                   WORK )
+            IF( I.GT.1 ) THEN
+*
+*              Form the triangular factor of the block reflector
+*              H = H(i+ib-1) . . . H(i+1) H(i)
+*
+               CALL DLARZT( 'Backward', 'Rowwise', N-M, IB, A( I, M1 ),
+     $                      LDA, TAU( I ), WORK, LDWORK )
+*
+*              Apply H to A(1:i-1,i:n) from the right
+*
+               CALL DLARZB( 'Right', 'No transpose', 'Backward',
+     $                      'Rowwise', I-1, N-I+1, IB, N-M, A( I, M1 ),
+     $                      LDA, WORK, LDWORK, A( 1, I ), LDA,
+     $                      WORK( IB+1 ), LDWORK )
+            END IF
+   20    CONTINUE
+         MU = I + NB - 1
+      ELSE
+         MU = M
+      END IF
+*
+*     Use unblocked code to factor the last or only block
+*
+      IF( MU.GT.0 )
+     $   CALL DLATRZ( MU, N, N-M, A, LDA, TAU, WORK )
+*
+      WORK( 1 ) = LWKOPT
+*
+      RETURN
+*
+*     End of DTZRZF
+*
+      END
diff --git a/costa/native/external/lapack/dzsum1.f b/costa/native/external/lapack/dzsum1.f
new file mode 100644
index 000000000..341165cef
--- /dev/null
+++ b/costa/native/external/lapack/dzsum1.f
@@ -0,0 +1,82 @@
+      DOUBLE PRECISION FUNCTION DZSUM1( N, CX, INCX )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      INTEGER            INCX, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         CX( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  DZSUM1 takes the sum of the absolute values of a complex
+*  vector and returns a double precision result.
+*
+*  Based on DZASUM from the Level 1 BLAS.
+*  The change is to use the 'genuine' absolute value.
+*
+*  Contributed by Nick Higham for use with ZLACON.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The number of elements in the vector CX.
+*
+*  CX      (input) COMPLEX*16 array, dimension (N)
+*          The vector whose elements will be summed.
+*
+*  INCX    (input) INTEGER
+*          The spacing between successive values of CX.  INCX > 0.
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      INTEGER            I, NINCX
+      DOUBLE PRECISION   STEMP
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS
+*     ..
+*     .. Executable Statements ..
+*
+      DZSUM1 = 0.0D0
+      STEMP = 0.0D0
+      IF( N.LE.0 )
+     $   RETURN
+      IF( INCX.EQ.1 )
+     $   GO TO 20
+*
+*     CODE FOR INCREMENT NOT EQUAL TO 1
+*
+      NINCX = N*INCX
+      DO 10 I = 1, NINCX, INCX
+*
+*        NEXT LINE MODIFIED.
+*
+         STEMP = STEMP + ABS( CX( I ) )
+   10 CONTINUE
+      DZSUM1 = STEMP
+      RETURN
+*
+*     CODE FOR INCREMENT EQUAL TO 1
+*
+   20 CONTINUE
+      DO 30 I = 1, N
+*
+*        NEXT LINE MODIFIED.
+*
+         STEMP = STEMP + ABS( CX( I ) )
+   30 CONTINUE
+      DZSUM1 = STEMP
+      RETURN
+*
+*     End of DZSUM1
+*
+      END
diff --git a/costa/native/external/lapack/icmax1.f b/costa/native/external/lapack/icmax1.f
new file mode 100644
index 000000000..c96f977bf
--- /dev/null
+++ b/costa/native/external/lapack/icmax1.f
@@ -0,0 +1,96 @@
+      INTEGER          FUNCTION ICMAX1( N, CX, INCX )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INCX, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            CX( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ICMAX1 finds the index of the element whose real part has maximum
+*  absolute value.
+*
+*  Based on ICAMAX from Level 1 BLAS.
+*  The change is to use the 'genuine' absolute value.
+*
+*  Contributed by Nick Higham for use with CLACON.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The number of elements in the vector CX.
+*
+*  CX      (input) COMPLEX array, dimension (N)
+*          The vector whose elements will be summed.
+*
+*  INCX    (input) INTEGER
+*          The spacing between successive values of CX.  INCX >= 1.
+*
+* =====================================================================
+*
+*     .. Local Scalars ..
+      INTEGER            I, IX
+      REAL               SMAX
+      COMPLEX            ZDUM
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, REAL
+*     ..
+*     .. Statement Functions ..
+      REAL               CABS1
+*     ..
+*     .. Statement Function definitions ..
+*
+*     NEXT LINE IS THE ONLY MODIFICATION.
+      CABS1( ZDUM ) = ABS( ZDUM )
+*     ..
+*     .. Executable Statements ..
+*
+      ICMAX1 = 0
+      IF( N.LT.1 )
+     $   RETURN
+      ICMAX1 = 1
+      IF( N.EQ.1 )
+     $   RETURN
+      IF( INCX.EQ.1 )
+     $   GO TO 30
+*
+*     CODE FOR INCREMENT NOT EQUAL TO 1
+*
+      IX = 1
+      SMAX = CABS1( CX( 1 ) )
+      IX = IX + INCX
+      DO 20 I = 2, N
+         IF( CABS1( CX( IX ) ).LE.SMAX )
+     $      GO TO 10
+         ICMAX1 = I
+         SMAX = CABS1( CX( IX ) )
+   10    CONTINUE
+         IX = IX + INCX
+   20 CONTINUE
+      RETURN
+*
+*     CODE FOR INCREMENT EQUAL TO 1
+*
+   30 CONTINUE
+      SMAX = CABS1( CX( 1 ) )
+      DO 40 I = 2, N
+         IF( CABS1( CX( I ) ).LE.SMAX )
+     $      GO TO 40
+         ICMAX1 = I
+         SMAX = CABS1( CX( I ) )
+   40 CONTINUE
+      RETURN
+*
+*     End of ICMAX1
+*
+      END
diff --git a/costa/native/external/lapack/ieeeck.f b/costa/native/external/lapack/ieeeck.f
new file mode 100644
index 000000000..3c09fe95e
--- /dev/null
+++ b/costa/native/external/lapack/ieeeck.f
@@ -0,0 +1,148 @@
+      INTEGER          FUNCTION IEEECK( ISPEC, ZERO, ONE )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1998
+*
+*     .. Scalar Arguments ..
+      INTEGER            ISPEC
+      REAL               ONE, ZERO
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  IEEECK is called from the ILAENV to verify that Infinity and
+*  possibly NaN arithmetic is safe (i.e. will not trap).
+*
+*  Arguments
+*  =========
+*
+*  ISPEC   (input) INTEGER
+*          Specifies whether to test just for inifinity arithmetic
+*          or whether to test for infinity and NaN arithmetic.
+*          = 0: Verify infinity arithmetic only.
+*          = 1: Verify infinity and NaN arithmetic.
+*
+*  ZERO    (input) REAL
+*          Must contain the value 0.0
+*          This is passed to prevent the compiler from optimizing
+*          away this code.
+*
+*  ONE     (input) REAL
+*          Must contain the value 1.0
+*          This is passed to prevent the compiler from optimizing
+*          away this code.
+*
+*  RETURN VALUE:  INTEGER
+*          = 0:  Arithmetic failed to produce the correct answers
+*          = 1:  Arithmetic produced the correct answers
+*
+*     .. Local Scalars ..
+      REAL               NAN1, NAN2, NAN3, NAN4, NAN5, NAN6, NEGINF,
+     $                   NEGZRO, NEWZRO, POSINF
+*     ..
+*     .. Executable Statements ..
+      IEEECK = 1
+*
+      POSINF = ONE / ZERO
+      IF( POSINF.LE.ONE ) THEN
+         IEEECK = 0
+         RETURN
+      END IF
+*
+      NEGINF = -ONE / ZERO
+      IF( NEGINF.GE.ZERO ) THEN
+         IEEECK = 0
+         RETURN
+      END IF
+*
+      NEGZRO = ONE / ( NEGINF+ONE )
+      IF( NEGZRO.NE.ZERO ) THEN
+         IEEECK = 0
+         RETURN
+      END IF
+*
+      NEGINF = ONE / NEGZRO
+      IF( NEGINF.GE.ZERO ) THEN
+         IEEECK = 0
+         RETURN
+      END IF
+*
+      NEWZRO = NEGZRO + ZERO
+      IF( NEWZRO.NE.ZERO ) THEN
+         IEEECK = 0
+         RETURN
+      END IF
+*
+      POSINF = ONE / NEWZRO
+      IF( POSINF.LE.ONE ) THEN
+         IEEECK = 0
+         RETURN
+      END IF
+*
+      NEGINF = NEGINF*POSINF
+      IF( NEGINF.GE.ZERO ) THEN
+         IEEECK = 0
+         RETURN
+      END IF
+*
+      POSINF = POSINF*POSINF
+      IF( POSINF.LE.ONE ) THEN
+         IEEECK = 0
+         RETURN
+      END IF
+*
+*
+*
+*
+*     Return if we were only asked to check infinity arithmetic
+*
+      IF( ISPEC.EQ.0 )
+     $   RETURN
+*
+      NAN1 = POSINF + NEGINF
+*
+      NAN2 = POSINF / NEGINF
+*
+      NAN3 = POSINF / POSINF
+*
+      NAN4 = POSINF*ZERO
+*
+      NAN5 = NEGINF*NEGZRO
+*
+      NAN6 = NAN5*0.0
+*
+      IF( NAN1.EQ.NAN1 ) THEN
+         IEEECK = 0
+         RETURN
+      END IF
+*
+      IF( NAN2.EQ.NAN2 ) THEN
+         IEEECK = 0
+         RETURN
+      END IF
+*
+      IF( NAN3.EQ.NAN3 ) THEN
+         IEEECK = 0
+         RETURN
+      END IF
+*
+      IF( NAN4.EQ.NAN4 ) THEN
+         IEEECK = 0
+         RETURN
+      END IF
+*
+      IF( NAN5.EQ.NAN5 ) THEN
+         IEEECK = 0
+         RETURN
+      END IF
+*
+      IF( NAN6.EQ.NAN6 ) THEN
+         IEEECK = 0
+         RETURN
+      END IF
+*
+      RETURN
+      END
diff --git a/costa/native/external/lapack/ilaenv.f b/costa/native/external/lapack/ilaenv.f
new file mode 100644
index 000000000..7263d6010
--- /dev/null
+++ b/costa/native/external/lapack/ilaenv.f
@@ -0,0 +1,547 @@
+      INTEGER          FUNCTION ILAENV( ISPEC, NAME, OPTS, N1, N2, N3,
+     $                 N4 )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER*( * )    NAME, OPTS
+      INTEGER            ISPEC, N1, N2, N3, N4
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ILAENV is called from the LAPACK routines to choose problem-dependent
+*  parameters for the local environment.  See ISPEC for a description of
+*  the parameters.
+*
+*  This version provides a set of parameters which should give good,
+*  but not optimal, performance on many of the currently available
+*  computers.  Users are encouraged to modify this subroutine to set
+*  the tuning parameters for their particular machine using the option
+*  and problem size information in the arguments.
+*
+*  This routine will not function correctly if it is converted to all
+*  lower case.  Converting it to all upper case is allowed.
+*
+*  Arguments
+*  =========
+*
+*  ISPEC   (input) INTEGER
+*          Specifies the parameter to be returned as the value of
+*          ILAENV.
+*          = 1: the optimal blocksize; if this value is 1, an unblocked
+*               algorithm will give the best performance.
+*          = 2: the minimum block size for which the block routine
+*               should be used; if the usable block size is less than
+*               this value, an unblocked routine should be used.
+*          = 3: the crossover point (in a block routine, for N less
+*               than this value, an unblocked routine should be used)
+*          = 4: the number of shifts, used in the nonsymmetric
+*               eigenvalue routines
+*          = 5: the minimum column dimension for blocking to be used;
+*               rectangular blocks must have dimension at least k by m,
+*               where k is given by ILAENV(2,...) and m by ILAENV(5,...)
+*          = 6: the crossover point for the SVD (when reducing an m by n
+*               matrix to bidiagonal form, if max(m,n)/min(m,n) exceeds
+*               this value, a QR factorization is used first to reduce
+*               the matrix to a triangular form.)
+*          = 7: the number of processors
+*          = 8: the crossover point for the multishift QR and QZ methods
+*               for nonsymmetric eigenvalue problems.
+*          = 9: maximum size of the subproblems at the bottom of the
+*               computation tree in the divide-and-conquer algorithm
+*               (used by xGELSD and xGESDD)
+*          =10: ieee NaN arithmetic can be trusted not to trap
+*          =11: infinity arithmetic can be trusted not to trap
+*
+*  NAME    (input) CHARACTER*(*)
+*          The name of the calling subroutine, in either upper case or
+*          lower case.
+*
+*  OPTS    (input) CHARACTER*(*)
+*          The character options to the subroutine NAME, concatenated
+*          into a single character string.  For example, UPLO = 'U',
+*          TRANS = 'T', and DIAG = 'N' for a triangular routine would
+*          be specified as OPTS = 'UTN'.
+*
+*  N1      (input) INTEGER
+*  N2      (input) INTEGER
+*  N3      (input) INTEGER
+*  N4      (input) INTEGER
+*          Problem dimensions for the subroutine NAME; these may not all
+*          be required.
+*
+* (ILAENV) (output) INTEGER
+*          >= 0: the value of the parameter specified by ISPEC
+*          < 0:  if ILAENV = -k, the k-th argument had an illegal value.
+*
+*  Further Details
+*  ===============
+*
+*  The following conventions have been used when calling ILAENV from the
+*  LAPACK routines:
+*  1)  OPTS is a concatenation of all of the character options to
+*      subroutine NAME, in the same order that they appear in the
+*      argument list for NAME, even if they are not used in determining
+*      the value of the parameter specified by ISPEC.
+*  2)  The problem dimensions N1, N2, N3, N4 are specified in the order
+*      that they appear in the argument list for NAME.  N1 is used
+*      first, N2 second, and so on, and unused problem dimensions are
+*      passed a value of -1.
+*  3)  The parameter value returned by ILAENV is checked for validity in
+*      the calling subroutine.  For example, ILAENV is used to retrieve
+*      the optimal blocksize for STRTRI as follows:
+*
+*      NB = ILAENV( 1, 'STRTRI', UPLO // DIAG, N, -1, -1, -1 )
+*      IF( NB.LE.1 ) NB = MAX( 1, N )
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            CNAME, SNAME
+      CHARACTER*1        C1
+      CHARACTER*2        C2, C4
+      CHARACTER*3        C3
+      CHARACTER*6        SUBNAM
+      INTEGER            I, IC, IZ, NB, NBMIN, NX
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          CHAR, ICHAR, INT, MIN, REAL
+*     ..
+*     .. External Functions ..
+      INTEGER            IEEECK
+      EXTERNAL           IEEECK
+*     ..
+*     .. Executable Statements ..
+*
+      GO TO ( 100, 100, 100, 400, 500, 600, 700, 800, 900, 1000,
+     $        1100 ) ISPEC
+*
+*     Invalid value for ISPEC
+*
+      ILAENV = -1
+      RETURN
+*
+  100 CONTINUE
+*
+*     Convert NAME to upper case if the first character is lower case.
+*
+      ILAENV = 1
+      SUBNAM = NAME
+      IC = ICHAR( SUBNAM( 1:1 ) )
+      IZ = ICHAR( 'Z' )
+      IF( IZ.EQ.90 .OR. IZ.EQ.122 ) THEN
+*
+*        ASCII character set
+*
+         IF( IC.GE.97 .AND. IC.LE.122 ) THEN
+            SUBNAM( 1:1 ) = CHAR( IC-32 )
+            DO 10 I = 2, 6
+               IC = ICHAR( SUBNAM( I:I ) )
+               IF( IC.GE.97 .AND. IC.LE.122 )
+     $            SUBNAM( I:I ) = CHAR( IC-32 )
+   10       CONTINUE
+         END IF
+*
+      ELSE IF( IZ.EQ.233 .OR. IZ.EQ.169 ) THEN
+*
+*        EBCDIC character set
+*
+         IF( ( IC.GE.129 .AND. IC.LE.137 ) .OR.
+     $       ( IC.GE.145 .AND. IC.LE.153 ) .OR.
+     $       ( IC.GE.162 .AND. IC.LE.169 ) ) THEN
+            SUBNAM( 1:1 ) = CHAR( IC+64 )
+            DO 20 I = 2, 6
+               IC = ICHAR( SUBNAM( I:I ) )
+               IF( ( IC.GE.129 .AND. IC.LE.137 ) .OR.
+     $             ( IC.GE.145 .AND. IC.LE.153 ) .OR.
+     $             ( IC.GE.162 .AND. IC.LE.169 ) )
+     $            SUBNAM( I:I ) = CHAR( IC+64 )
+   20       CONTINUE
+         END IF
+*
+      ELSE IF( IZ.EQ.218 .OR. IZ.EQ.250 ) THEN
+*
+*        Prime machines:  ASCII+128
+*
+         IF( IC.GE.225 .AND. IC.LE.250 ) THEN
+            SUBNAM( 1:1 ) = CHAR( IC-32 )
+            DO 30 I = 2, 6
+               IC = ICHAR( SUBNAM( I:I ) )
+               IF( IC.GE.225 .AND. IC.LE.250 )
+     $            SUBNAM( I:I ) = CHAR( IC-32 )
+   30       CONTINUE
+         END IF
+      END IF
+*
+      C1 = SUBNAM( 1:1 )
+      SNAME = C1.EQ.'S' .OR. C1.EQ.'D'
+      CNAME = C1.EQ.'C' .OR. C1.EQ.'Z'
+      IF( .NOT.( CNAME .OR. SNAME ) )
+     $   RETURN
+      C2 = SUBNAM( 2:3 )
+      C3 = SUBNAM( 4:6 )
+      C4 = C3( 2:3 )
+*
+      GO TO ( 110, 200, 300 ) ISPEC
+*
+  110 CONTINUE
+*
+*     ISPEC = 1:  block size
+*
+*     In these examples, separate code is provided for setting NB for
+*     real and complex.  We assume that NB will take the same value in
+*     single or double precision.
+*
+      NB = 1
+*
+      IF( C2.EQ.'GE' ) THEN
+         IF( C3.EQ.'TRF' ) THEN
+            IF( SNAME ) THEN
+               NB = 64
+            ELSE
+               NB = 64
+            END IF
+         ELSE IF( C3.EQ.'QRF' .OR. C3.EQ.'RQF' .OR. C3.EQ.'LQF' .OR.
+     $            C3.EQ.'QLF' ) THEN
+            IF( SNAME ) THEN
+               NB = 32
+            ELSE
+               NB = 32
+            END IF
+         ELSE IF( C3.EQ.'HRD' ) THEN
+            IF( SNAME ) THEN
+               NB = 32
+            ELSE
+               NB = 32
+            END IF
+         ELSE IF( C3.EQ.'BRD' ) THEN
+            IF( SNAME ) THEN
+               NB = 32
+            ELSE
+               NB = 32
+            END IF
+         ELSE IF( C3.EQ.'TRI' ) THEN
+            IF( SNAME ) THEN
+               NB = 64
+            ELSE
+               NB = 64
+            END IF
+         END IF
+      ELSE IF( C2.EQ.'PO' ) THEN
+         IF( C3.EQ.'TRF' ) THEN
+            IF( SNAME ) THEN
+               NB = 64
+            ELSE
+               NB = 64
+            END IF
+         END IF
+      ELSE IF( C2.EQ.'SY' ) THEN
+         IF( C3.EQ.'TRF' ) THEN
+            IF( SNAME ) THEN
+               NB = 64
+            ELSE
+               NB = 64
+            END IF
+         ELSE IF( SNAME .AND. C3.EQ.'TRD' ) THEN
+            NB = 32
+         ELSE IF( SNAME .AND. C3.EQ.'GST' ) THEN
+            NB = 64
+         END IF
+      ELSE IF( CNAME .AND. C2.EQ.'HE' ) THEN
+         IF( C3.EQ.'TRF' ) THEN
+            NB = 64
+         ELSE IF( C3.EQ.'TRD' ) THEN
+            NB = 32
+         ELSE IF( C3.EQ.'GST' ) THEN
+            NB = 64
+         END IF
+      ELSE IF( SNAME .AND. C2.EQ.'OR' ) THEN
+         IF( C3( 1:1 ).EQ.'G' ) THEN
+            IF( C4.EQ.'QR' .OR. C4.EQ.'RQ' .OR. C4.EQ.'LQ' .OR.
+     $          C4.EQ.'QL' .OR. C4.EQ.'HR' .OR. C4.EQ.'TR' .OR.
+     $          C4.EQ.'BR' ) THEN
+               NB = 32
+            END IF
+         ELSE IF( C3( 1:1 ).EQ.'M' ) THEN
+            IF( C4.EQ.'QR' .OR. C4.EQ.'RQ' .OR. C4.EQ.'LQ' .OR.
+     $          C4.EQ.'QL' .OR. C4.EQ.'HR' .OR. C4.EQ.'TR' .OR.
+     $          C4.EQ.'BR' ) THEN
+               NB = 32
+            END IF
+         END IF
+      ELSE IF( CNAME .AND. C2.EQ.'UN' ) THEN
+         IF( C3( 1:1 ).EQ.'G' ) THEN
+            IF( C4.EQ.'QR' .OR. C4.EQ.'RQ' .OR. C4.EQ.'LQ' .OR.
+     $          C4.EQ.'QL' .OR. C4.EQ.'HR' .OR. C4.EQ.'TR' .OR.
+     $          C4.EQ.'BR' ) THEN
+               NB = 32
+            END IF
+         ELSE IF( C3( 1:1 ).EQ.'M' ) THEN
+            IF( C4.EQ.'QR' .OR. C4.EQ.'RQ' .OR. C4.EQ.'LQ' .OR.
+     $          C4.EQ.'QL' .OR. C4.EQ.'HR' .OR. C4.EQ.'TR' .OR.
+     $          C4.EQ.'BR' ) THEN
+               NB = 32
+            END IF
+         END IF
+      ELSE IF( C2.EQ.'GB' ) THEN
+         IF( C3.EQ.'TRF' ) THEN
+            IF( SNAME ) THEN
+               IF( N4.LE.64 ) THEN
+                  NB = 1
+               ELSE
+                  NB = 32
+               END IF
+            ELSE
+               IF( N4.LE.64 ) THEN
+                  NB = 1
+               ELSE
+                  NB = 32
+               END IF
+            END IF
+         END IF
+      ELSE IF( C2.EQ.'PB' ) THEN
+         IF( C3.EQ.'TRF' ) THEN
+            IF( SNAME ) THEN
+               IF( N2.LE.64 ) THEN
+                  NB = 1
+               ELSE
+                  NB = 32
+               END IF
+            ELSE
+               IF( N2.LE.64 ) THEN
+                  NB = 1
+               ELSE
+                  NB = 32
+               END IF
+            END IF
+         END IF
+      ELSE IF( C2.EQ.'TR' ) THEN
+         IF( C3.EQ.'TRI' ) THEN
+            IF( SNAME ) THEN
+               NB = 64
+            ELSE
+               NB = 64
+            END IF
+         END IF
+      ELSE IF( C2.EQ.'LA' ) THEN
+         IF( C3.EQ.'UUM' ) THEN
+            IF( SNAME ) THEN
+               NB = 64
+            ELSE
+               NB = 64
+            END IF
+         END IF
+      ELSE IF( SNAME .AND. C2.EQ.'ST' ) THEN
+         IF( C3.EQ.'EBZ' ) THEN
+            NB = 1
+         END IF
+      END IF
+      ILAENV = NB
+      RETURN
+*
+  200 CONTINUE
+*
+*     ISPEC = 2:  minimum block size
+*
+      NBMIN = 2
+      IF( C2.EQ.'GE' ) THEN
+         IF( C3.EQ.'QRF' .OR. C3.EQ.'RQF' .OR. C3.EQ.'LQF' .OR.
+     $       C3.EQ.'QLF' ) THEN
+            IF( SNAME ) THEN
+               NBMIN = 2
+            ELSE
+               NBMIN = 2
+            END IF
+         ELSE IF( C3.EQ.'HRD' ) THEN
+            IF( SNAME ) THEN
+               NBMIN = 2
+            ELSE
+               NBMIN = 2
+            END IF
+         ELSE IF( C3.EQ.'BRD' ) THEN
+            IF( SNAME ) THEN
+               NBMIN = 2
+            ELSE
+               NBMIN = 2
+            END IF
+         ELSE IF( C3.EQ.'TRI' ) THEN
+            IF( SNAME ) THEN
+               NBMIN = 2
+            ELSE
+               NBMIN = 2
+            END IF
+         END IF
+      ELSE IF( C2.EQ.'SY' ) THEN
+         IF( C3.EQ.'TRF' ) THEN
+            IF( SNAME ) THEN
+               NBMIN = 8
+            ELSE
+               NBMIN = 8
+            END IF
+         ELSE IF( SNAME .AND. C3.EQ.'TRD' ) THEN
+            NBMIN = 2
+         END IF
+      ELSE IF( CNAME .AND. C2.EQ.'HE' ) THEN
+         IF( C3.EQ.'TRD' ) THEN
+            NBMIN = 2
+         END IF
+      ELSE IF( SNAME .AND. C2.EQ.'OR' ) THEN
+         IF( C3( 1:1 ).EQ.'G' ) THEN
+            IF( C4.EQ.'QR' .OR. C4.EQ.'RQ' .OR. C4.EQ.'LQ' .OR.
+     $          C4.EQ.'QL' .OR. C4.EQ.'HR' .OR. C4.EQ.'TR' .OR.
+     $          C4.EQ.'BR' ) THEN
+               NBMIN = 2
+            END IF
+         ELSE IF( C3( 1:1 ).EQ.'M' ) THEN
+            IF( C4.EQ.'QR' .OR. C4.EQ.'RQ' .OR. C4.EQ.'LQ' .OR.
+     $          C4.EQ.'QL' .OR. C4.EQ.'HR' .OR. C4.EQ.'TR' .OR.
+     $          C4.EQ.'BR' ) THEN
+               NBMIN = 2
+            END IF
+         END IF
+      ELSE IF( CNAME .AND. C2.EQ.'UN' ) THEN
+         IF( C3( 1:1 ).EQ.'G' ) THEN
+            IF( C4.EQ.'QR' .OR. C4.EQ.'RQ' .OR. C4.EQ.'LQ' .OR.
+     $          C4.EQ.'QL' .OR. C4.EQ.'HR' .OR. C4.EQ.'TR' .OR.
+     $          C4.EQ.'BR' ) THEN
+               NBMIN = 2
+            END IF
+         ELSE IF( C3( 1:1 ).EQ.'M' ) THEN
+            IF( C4.EQ.'QR' .OR. C4.EQ.'RQ' .OR. C4.EQ.'LQ' .OR.
+     $          C4.EQ.'QL' .OR. C4.EQ.'HR' .OR. C4.EQ.'TR' .OR.
+     $          C4.EQ.'BR' ) THEN
+               NBMIN = 2
+            END IF
+         END IF
+      END IF
+      ILAENV = NBMIN
+      RETURN
+*
+  300 CONTINUE
+*
+*     ISPEC = 3:  crossover point
+*
+      NX = 0
+      IF( C2.EQ.'GE' ) THEN
+         IF( C3.EQ.'QRF' .OR. C3.EQ.'RQF' .OR. C3.EQ.'LQF' .OR.
+     $       C3.EQ.'QLF' ) THEN
+            IF( SNAME ) THEN
+               NX = 128
+            ELSE
+               NX = 128
+            END IF
+         ELSE IF( C3.EQ.'HRD' ) THEN
+            IF( SNAME ) THEN
+               NX = 128
+            ELSE
+               NX = 128
+            END IF
+         ELSE IF( C3.EQ.'BRD' ) THEN
+            IF( SNAME ) THEN
+               NX = 128
+            ELSE
+               NX = 128
+            END IF
+         END IF
+      ELSE IF( C2.EQ.'SY' ) THEN
+         IF( SNAME .AND. C3.EQ.'TRD' ) THEN
+            NX = 32
+         END IF
+      ELSE IF( CNAME .AND. C2.EQ.'HE' ) THEN
+         IF( C3.EQ.'TRD' ) THEN
+            NX = 32
+         END IF
+      ELSE IF( SNAME .AND. C2.EQ.'OR' ) THEN
+         IF( C3( 1:1 ).EQ.'G' ) THEN
+            IF( C4.EQ.'QR' .OR. C4.EQ.'RQ' .OR. C4.EQ.'LQ' .OR.
+     $          C4.EQ.'QL' .OR. C4.EQ.'HR' .OR. C4.EQ.'TR' .OR.
+     $          C4.EQ.'BR' ) THEN
+               NX = 128
+            END IF
+         END IF
+      ELSE IF( CNAME .AND. C2.EQ.'UN' ) THEN
+         IF( C3( 1:1 ).EQ.'G' ) THEN
+            IF( C4.EQ.'QR' .OR. C4.EQ.'RQ' .OR. C4.EQ.'LQ' .OR.
+     $          C4.EQ.'QL' .OR. C4.EQ.'HR' .OR. C4.EQ.'TR' .OR.
+     $          C4.EQ.'BR' ) THEN
+               NX = 128
+            END IF
+         END IF
+      END IF
+      ILAENV = NX
+      RETURN
+*
+  400 CONTINUE
+*
+*     ISPEC = 4:  number of shifts (used by xHSEQR)
+*
+      ILAENV = 6
+      RETURN
+*
+  500 CONTINUE
+*
+*     ISPEC = 5:  minimum column dimension (not used)
+*
+      ILAENV = 2
+      RETURN
+*
+  600 CONTINUE 
+*
+*     ISPEC = 6:  crossover point for SVD (used by xGELSS and xGESVD)
+*
+      ILAENV = INT( REAL( MIN( N1, N2 ) )*1.6E0 )
+      RETURN
+*
+  700 CONTINUE
+*
+*     ISPEC = 7:  number of processors (not used)
+*
+      ILAENV = 1
+      RETURN
+*
+  800 CONTINUE
+*
+*     ISPEC = 8:  crossover point for multishift (used by xHSEQR)
+*
+      ILAENV = 50
+      RETURN
+*
+  900 CONTINUE
+*
+*     ISPEC = 9:  maximum size of the subproblems at the bottom of the
+*                 computation tree in the divide-and-conquer algorithm
+*                 (used by xGELSD and xGESDD)
+*
+      ILAENV = 25
+      RETURN
+*
+ 1000 CONTINUE
+*
+*     ISPEC = 10: ieee NaN arithmetic can be trusted not to trap
+*
+C     ILAENV = 0
+      ILAENV = 1
+      IF( ILAENV.EQ.1 ) THEN
+         ILAENV = IEEECK( 0, 0.0, 1.0 ) 
+      END IF
+      RETURN
+*
+ 1100 CONTINUE
+*
+*     ISPEC = 11: infinity arithmetic can be trusted not to trap
+*
+C     ILAENV = 0
+      ILAENV = 1
+      IF( ILAENV.EQ.1 ) THEN
+         ILAENV = IEEECK( 1, 0.0, 1.0 ) 
+      END IF
+      RETURN
+*
+*     End of ILAENV
+*
+      END
diff --git a/costa/native/external/lapack/izmax1.f b/costa/native/external/lapack/izmax1.f
new file mode 100644
index 000000000..a3488ea4c
--- /dev/null
+++ b/costa/native/external/lapack/izmax1.f
@@ -0,0 +1,96 @@
+      INTEGER          FUNCTION IZMAX1( N, CX, INCX )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INCX, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         CX( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  IZMAX1 finds the index of the element whose real part has maximum
+*  absolute value.
+*
+*  Based on IZAMAX from Level 1 BLAS.
+*  The change is to use the 'genuine' absolute value.
+*
+*  Contributed by Nick Higham for use with ZLACON.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The number of elements in the vector CX.
+*
+*  CX      (input) COMPLEX*16 array, dimension (N)
+*          The vector whose elements will be summed.
+*
+*  INCX    (input) INTEGER
+*          The spacing between successive values of CX.  INCX >= 1.
+*
+* =====================================================================
+*
+*     .. Local Scalars ..
+      INTEGER            I, IX
+      DOUBLE PRECISION   SMAX
+      COMPLEX*16         ZDUM
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE
+*     ..
+*     .. Statement Functions ..
+      DOUBLE PRECISION   CABS1
+*     ..
+*     .. Statement Function definitions ..
+*
+*     NEXT LINE IS THE ONLY MODIFICATION.
+      CABS1( ZDUM ) = ABS( ZDUM )
+*     ..
+*     .. Executable Statements ..
+*
+      IZMAX1 = 0
+      IF( N.LT.1 )
+     $   RETURN
+      IZMAX1 = 1
+      IF( N.EQ.1 )
+     $   RETURN
+      IF( INCX.EQ.1 )
+     $   GO TO 30
+*
+*     CODE FOR INCREMENT NOT EQUAL TO 1
+*
+      IX = 1
+      SMAX = CABS1( CX( 1 ) )
+      IX = IX + INCX
+      DO 20 I = 2, N
+         IF( CABS1( CX( IX ) ).LE.SMAX )
+     $      GO TO 10
+         IZMAX1 = I
+         SMAX = CABS1( CX( IX ) )
+   10    CONTINUE
+         IX = IX + INCX
+   20 CONTINUE
+      RETURN
+*
+*     CODE FOR INCREMENT EQUAL TO 1
+*
+   30 CONTINUE
+      SMAX = CABS1( CX( 1 ) )
+      DO 40 I = 2, N
+         IF( CABS1( CX( I ) ).LE.SMAX )
+     $      GO TO 40
+         IZMAX1 = I
+         SMAX = CABS1( CX( I ) )
+   40 CONTINUE
+      RETURN
+*
+*     End of IZMAX1
+*
+      END
diff --git a/costa/native/external/lapack/lsame.f b/costa/native/external/lapack/lsame.f
new file mode 100644
index 000000000..bf25d86f2
--- /dev/null
+++ b/costa/native/external/lapack/lsame.f
@@ -0,0 +1,87 @@
+      LOGICAL          FUNCTION LSAME( CA, CB )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          CA, CB
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  LSAME returns .TRUE. if CA is the same letter as CB regardless of
+*  case.
+*
+*  Arguments
+*  =========
+*
+*  CA      (input) CHARACTER*1
+*  CB      (input) CHARACTER*1
+*          CA and CB specify the single characters to be compared.
+*
+* =====================================================================
+*
+*     .. Intrinsic Functions ..
+      INTRINSIC          ICHAR
+*     ..
+*     .. Local Scalars ..
+      INTEGER            INTA, INTB, ZCODE
+*     ..
+*     .. Executable Statements ..
+*
+*     Test if the characters are equal
+*
+      LSAME = CA.EQ.CB
+      IF( LSAME )
+     $   RETURN
+*
+*     Now test for equivalence if both characters are alphabetic.
+*
+      ZCODE = ICHAR( 'Z' )
+*
+*     Use 'Z' rather than 'A' so that ASCII can be detected on Prime
+*     machines, on which ICHAR returns a value with bit 8 set.
+*     ICHAR('A') on Prime machines returns 193 which is the same as
+*     ICHAR('A') on an EBCDIC machine.
+*
+      INTA = ICHAR( CA )
+      INTB = ICHAR( CB )
+*
+      IF( ZCODE.EQ.90 .OR. ZCODE.EQ.122 ) THEN
+*
+*        ASCII is assumed - ZCODE is the ASCII code of either lower or
+*        upper case 'Z'.
+*
+         IF( INTA.GE.97 .AND. INTA.LE.122 ) INTA = INTA - 32
+         IF( INTB.GE.97 .AND. INTB.LE.122 ) INTB = INTB - 32
+*
+      ELSE IF( ZCODE.EQ.233 .OR. ZCODE.EQ.169 ) THEN
+*
+*        EBCDIC is assumed - ZCODE is the EBCDIC code of either lower or
+*        upper case 'Z'.
+*
+         IF( INTA.GE.129 .AND. INTA.LE.137 .OR.
+     $       INTA.GE.145 .AND. INTA.LE.153 .OR.
+     $       INTA.GE.162 .AND. INTA.LE.169 ) INTA = INTA + 64
+         IF( INTB.GE.129 .AND. INTB.LE.137 .OR.
+     $       INTB.GE.145 .AND. INTB.LE.153 .OR.
+     $       INTB.GE.162 .AND. INTB.LE.169 ) INTB = INTB + 64
+*
+      ELSE IF( ZCODE.EQ.218 .OR. ZCODE.EQ.250 ) THEN
+*
+*        ASCII is assumed, on Prime machines - ZCODE is the ASCII code
+*        plus 128 of either lower or upper case 'Z'.
+*
+         IF( INTA.GE.225 .AND. INTA.LE.250 ) INTA = INTA - 32
+         IF( INTB.GE.225 .AND. INTB.LE.250 ) INTB = INTB - 32
+      END IF
+      LSAME = INTA.EQ.INTB
+*
+*     RETURN
+*
+*     End of LSAME
+*
+      END
diff --git a/costa/native/external/lapack/lsamen.f b/costa/native/external/lapack/lsamen.f
new file mode 100644
index 000000000..4ba41e6d3
--- /dev/null
+++ b/costa/native/external/lapack/lsamen.f
@@ -0,0 +1,68 @@
+      LOGICAL          FUNCTION LSAMEN( N, CA, CB )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER*( * )    CA, CB
+      INTEGER            N
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  LSAMEN  tests if the first N letters of CA are the same as the
+*  first N letters of CB, regardless of case.
+*  LSAMEN returns .TRUE. if CA and CB are equivalent except for case
+*  and .FALSE. otherwise.  LSAMEN also returns .FALSE. if LEN( CA )
+*  or LEN( CB ) is less than N.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The number of characters in CA and CB to be compared.
+*
+*  CA      (input) CHARACTER*(*)
+*  CB      (input) CHARACTER*(*)
+*          CA and CB specify two character strings of length at least N.
+*          Only the first N characters of each string will be accessed.
+*
+* =====================================================================
+*
+*     .. Local Scalars ..
+      INTEGER            I
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          LEN
+*     ..
+*     .. Executable Statements ..
+*
+      LSAMEN = .FALSE.
+      IF( LEN( CA ).LT.N .OR. LEN( CB ).LT.N )
+     $   GO TO 20
+*
+*     Do for each character in the two strings.
+*
+      DO 10 I = 1, N
+*
+*        Test if the characters are equal using LSAME.
+*
+         IF( .NOT.LSAME( CA( I: I ), CB( I: I ) ) )
+     $      GO TO 20
+*
+   10 CONTINUE
+      LSAMEN = .TRUE.
+*
+   20 CONTINUE
+      RETURN
+*
+*     End of LSAMEN
+*
+      END
diff --git a/costa/native/external/lapack/sbdsdc.f b/costa/native/external/lapack/sbdsdc.f
new file mode 100644
index 000000000..625bfeb40
--- /dev/null
+++ b/costa/native/external/lapack/sbdsdc.f
@@ -0,0 +1,427 @@
+      SUBROUTINE SBDSDC( UPLO, COMPQ, N, D, E, U, LDU, VT, LDVT, Q, IQ,
+     $                   WORK, IWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     December 1, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          COMPQ, UPLO
+      INTEGER            INFO, LDU, LDVT, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IQ( * ), IWORK( * )
+      REAL               D( * ), E( * ), Q( * ), U( LDU, * ),
+     $                   VT( LDVT, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SBDSDC computes the singular value decomposition (SVD) of a real
+*  N-by-N (upper or lower) bidiagonal matrix B:  B = U * S * VT,
+*  using a divide and conquer method, where S is a diagonal matrix
+*  with non-negative diagonal elements (the singular values of B), and
+*  U and VT are orthogonal matrices of left and right singular vectors,
+*  respectively. SBDSDC can be used to compute all singular values,
+*  and optionally, singular vectors or singular vectors in compact form.
+*
+*  This code makes very mild assumptions about floating point
+*  arithmetic. It will work on machines with a guard digit in
+*  add/subtract, or on those binary machines without guard digits
+*  which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2.
+*  It could conceivably fail on hexadecimal or decimal machines
+*  without guard digits, but we know of none.  See SLASD3 for details.
+*
+*  The code currently call SLASDQ if singular values only are desired.
+*  However, it can be slightly modified to compute singular values
+*  using the divide and conquer method.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  B is upper bidiagonal.
+*          = 'L':  B is lower bidiagonal.
+*
+*  COMPQ   (input) CHARACTER*1
+*          Specifies whether singular vectors are to be computed
+*          as follows:
+*          = 'N':  Compute singular values only;
+*          = 'P':  Compute singular values and compute singular
+*                  vectors in compact form;
+*          = 'I':  Compute singular values and singular vectors.
+*
+*  N       (input) INTEGER
+*          The order of the matrix B.  N >= 0.
+*
+*  D       (input/output) REAL array, dimension (N)
+*          On entry, the n diagonal elements of the bidiagonal matrix B.
+*          On exit, if INFO=0, the singular values of B.
+*
+*  E       (input/output) REAL array, dimension (N)
+*          On entry, the elements of E contain the offdiagonal
+*          elements of the bidiagonal matrix whose SVD is desired.
+*          On exit, E has been destroyed.
+*
+*  U       (output) REAL array, dimension (LDU,N)
+*          If  COMPQ = 'I', then:
+*             On exit, if INFO = 0, U contains the left singular vectors
+*             of the bidiagonal matrix.
+*          For other values of COMPQ, U is not referenced.
+*
+*  LDU     (input) INTEGER
+*          The leading dimension of the array U.  LDU >= 1.
+*          If singular vectors are desired, then LDU >= max( 1, N ).
+*
+*  VT      (output) REAL array, dimension (LDVT,N)
+*          If  COMPQ = 'I', then:
+*             On exit, if INFO = 0, VT' contains the right singular
+*             vectors of the bidiagonal matrix.
+*          For other values of COMPQ, VT is not referenced.
+*
+*  LDVT    (input) INTEGER
+*          The leading dimension of the array VT.  LDVT >= 1.
+*          If singular vectors are desired, then LDVT >= max( 1, N ).
+*
+*  Q       (output) REAL array, dimension (LDQ)
+*          If  COMPQ = 'P', then:
+*             On exit, if INFO = 0, Q and IQ contain the left
+*             and right singular vectors in a compact form,
+*             requiring O(N log N) space instead of 2*N**2.
+*             In particular, Q contains all the REAL data in
+*             LDQ >= N*(11 + 2*SMLSIZ + 8*INT(LOG_2(N/(SMLSIZ+1))))
+*             words of memory, where SMLSIZ is returned by ILAENV and
+*             is equal to the maximum size of the subproblems at the
+*             bottom of the computation tree (usually about 25).
+*          For other values of COMPQ, Q is not referenced.
+*
+*  IQ      (output) INTEGER array, dimension (LDIQ)
+*          If  COMPQ = 'P', then:
+*             On exit, if INFO = 0, Q and IQ contain the left
+*             and right singular vectors in a compact form,
+*             requiring O(N log N) space instead of 2*N**2.
+*             In particular, IQ contains all INTEGER data in
+*             LDIQ >= N*(3 + 3*INT(LOG_2(N/(SMLSIZ+1))))
+*             words of memory, where SMLSIZ is returned by ILAENV and
+*             is equal to the maximum size of the subproblems at the
+*             bottom of the computation tree (usually about 25).
+*          For other values of COMPQ, IQ is not referenced.
+*
+*  WORK    (workspace) REAL array, dimension (LWORK)
+*          If COMPQ = 'N' then LWORK >= (4 * N).
+*          If COMPQ = 'P' then LWORK >= (6 * N).
+*          If COMPQ = 'I' then LWORK >= (3 * N**2 + 4 * N).
+*
+*  IWORK   (workspace) INTEGER array, dimension (8*N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          > 0:  The algorithm failed to compute an singular value.
+*                The update process of divide and conquer failed.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Ming Gu and Huan Ren, Computer Science Division, University of
+*     California at Berkeley, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE, TWO
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0, TWO = 2.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            DIFL, DIFR, GIVCOL, GIVNUM, GIVPTR, I, IC,
+     $                   ICOMPQ, IERR, II, IS, IU, IUPLO, IVT, J, K, KK,
+     $                   MLVL, NM1, NSIZE, PERM, POLES, QSTART, SMLSIZ,
+     $                   SMLSZP, SQRE, START, WSTART, Z
+      REAL               CS, EPS, ORGNRM, P, R, SN
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      REAL               SLAMCH, SLANST
+      EXTERNAL           SLAMCH, SLANST, ILAENV, LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SCOPY, SLARTG, SLASCL, SLASD0, SLASDA, SLASDQ,
+     $                   SLASET, SLASR, SSWAP, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          REAL, ABS, INT, LOG, SIGN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+*
+      IUPLO = 0
+      IF( LSAME( UPLO, 'U' ) )
+     $   IUPLO = 1
+      IF( LSAME( UPLO, 'L' ) )
+     $   IUPLO = 2
+      IF( LSAME( COMPQ, 'N' ) ) THEN
+         ICOMPQ = 0
+      ELSE IF( LSAME( COMPQ, 'P' ) ) THEN
+         ICOMPQ = 1
+      ELSE IF( LSAME( COMPQ, 'I' ) ) THEN
+         ICOMPQ = 2
+      ELSE
+         ICOMPQ = -1
+      END IF
+      IF( IUPLO.EQ.0 ) THEN
+         INFO = -1
+      ELSE IF( ICOMPQ.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( ( LDU.LT.1 ) .OR. ( ( ICOMPQ.EQ.2 ) .AND. ( LDU.LT.
+     $         N ) ) ) THEN
+         INFO = -7
+      ELSE IF( ( LDVT.LT.1 ) .OR. ( ( ICOMPQ.EQ.2 ) .AND. ( LDVT.LT.
+     $         N ) ) ) THEN
+         INFO = -9
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SBDSDC', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+      SMLSIZ = ILAENV( 9, 'SBDSDC', ' ', 0, 0, 0, 0 )
+      IF( N.EQ.1 ) THEN
+         IF( ICOMPQ.EQ.1 ) THEN
+            Q( 1 ) = SIGN( ONE, D( 1 ) )
+            Q( 1+SMLSIZ*N ) = ONE
+         ELSE IF( ICOMPQ.EQ.2 ) THEN
+            U( 1, 1 ) = SIGN( ONE, D( 1 ) )
+            VT( 1, 1 ) = ONE
+         END IF
+         D( 1 ) = ABS( D( 1 ) )
+         RETURN
+      END IF
+      NM1 = N - 1
+*
+*     If matrix lower bidiagonal, rotate to be upper bidiagonal
+*     by applying Givens rotations on the left
+*
+      WSTART = 1
+      QSTART = 3
+      IF( ICOMPQ.EQ.1 ) THEN
+         CALL SCOPY( N, D, 1, Q( 1 ), 1 )
+         CALL SCOPY( N-1, E, 1, Q( N+1 ), 1 )
+      END IF
+      IF( IUPLO.EQ.2 ) THEN
+         QSTART = 5
+         WSTART = 2*N - 1
+         DO 10 I = 1, N - 1
+            CALL SLARTG( D( I ), E( I ), CS, SN, R )
+            D( I ) = R
+            E( I ) = SN*D( I+1 )
+            D( I+1 ) = CS*D( I+1 )
+            IF( ICOMPQ.EQ.1 ) THEN
+               Q( I+2*N ) = CS
+               Q( I+3*N ) = SN
+            ELSE IF( ICOMPQ.EQ.2 ) THEN
+               WORK( I ) = CS
+               WORK( NM1+I ) = -SN
+            END IF
+   10    CONTINUE
+      END IF
+*
+*     If ICOMPQ = 0, use SLASDQ to compute the singular values.
+*
+      IF( ICOMPQ.EQ.0 ) THEN
+         CALL SLASDQ( 'U', 0, N, 0, 0, 0, D, E, VT, LDVT, U, LDU, U,
+     $                LDU, WORK( WSTART ), INFO )
+         GO TO 40
+      END IF
+*
+*     If N is smaller than the minimum divide size SMLSIZ, then solve
+*     the problem with another solver.
+*
+      IF( N.LE.SMLSIZ ) THEN
+         IF( ICOMPQ.EQ.2 ) THEN
+            CALL SLASET( 'A', N, N, ZERO, ONE, U, LDU )
+            CALL SLASET( 'A', N, N, ZERO, ONE, VT, LDVT )
+            CALL SLASDQ( 'U', 0, N, N, N, 0, D, E, VT, LDVT, U, LDU, U,
+     $                   LDU, WORK( WSTART ), INFO )
+         ELSE IF( ICOMPQ.EQ.1 ) THEN
+            IU = 1
+            IVT = IU + N
+            CALL SLASET( 'A', N, N, ZERO, ONE, Q( IU+( QSTART-1 )*N ),
+     $                   N )
+            CALL SLASET( 'A', N, N, ZERO, ONE, Q( IVT+( QSTART-1 )*N ),
+     $                   N )
+            CALL SLASDQ( 'U', 0, N, N, N, 0, D, E,
+     $                   Q( IVT+( QSTART-1 )*N ), N,
+     $                   Q( IU+( QSTART-1 )*N ), N,
+     $                   Q( IU+( QSTART-1 )*N ), N, WORK( WSTART ),
+     $                   INFO )
+         END IF
+         GO TO 40
+      END IF
+*
+      IF( ICOMPQ.EQ.2 ) THEN
+         CALL SLASET( 'A', N, N, ZERO, ONE, U, LDU )
+         CALL SLASET( 'A', N, N, ZERO, ONE, VT, LDVT )
+      END IF
+*
+*     Scale.
+*
+      ORGNRM = SLANST( 'M', N, D, E )
+      IF( ORGNRM.EQ.ZERO )
+     $   RETURN
+      CALL SLASCL( 'G', 0, 0, ORGNRM, ONE, N, 1, D, N, IERR )
+      CALL SLASCL( 'G', 0, 0, ORGNRM, ONE, NM1, 1, E, NM1, IERR )
+*
+      EPS = SLAMCH( 'Epsilon' )
+*
+      MLVL = INT( LOG( REAL( N ) / REAL( SMLSIZ+1 ) ) / LOG( TWO ) ) + 1
+      SMLSZP = SMLSIZ + 1
+*
+      IF( ICOMPQ.EQ.1 ) THEN
+         IU = 1
+         IVT = 1 + SMLSIZ
+         DIFL = IVT + SMLSZP
+         DIFR = DIFL + MLVL
+         Z = DIFR + MLVL*2
+         IC = Z + MLVL
+         IS = IC + 1
+         POLES = IS + 1
+         GIVNUM = POLES + 2*MLVL
+*
+         K = 1
+         GIVPTR = 2
+         PERM = 3
+         GIVCOL = PERM + MLVL
+      END IF
+*
+      DO 20 I = 1, N
+         IF( ABS( D( I ) ).LT.EPS ) THEN
+            D( I ) = SIGN( EPS, D( I ) )
+         END IF
+   20 CONTINUE
+*
+      START = 1
+      SQRE = 0
+*
+      DO 30 I = 1, NM1
+         IF( ( ABS( E( I ) ).LT.EPS ) .OR. ( I.EQ.NM1 ) ) THEN
+*
+*        Subproblem found. First determine its size and then
+*        apply divide and conquer on it.
+*
+            IF( I.LT.NM1 ) THEN
+*
+*        A subproblem with E(I) small for I < NM1.
+*
+               NSIZE = I - START + 1
+            ELSE IF( ABS( E( I ) ).GE.EPS ) THEN
+*
+*        A subproblem with E(NM1) not too small but I = NM1.
+*
+               NSIZE = N - START + 1
+            ELSE
+*
+*        A subproblem with E(NM1) small. This implies an
+*        1-by-1 subproblem at D(N). Solve this 1-by-1 problem
+*        first.
+*
+               NSIZE = I - START + 1
+               IF( ICOMPQ.EQ.2 ) THEN
+                  U( N, N ) = SIGN( ONE, D( N ) )
+                  VT( N, N ) = ONE
+               ELSE IF( ICOMPQ.EQ.1 ) THEN
+                  Q( N+( QSTART-1 )*N ) = SIGN( ONE, D( N ) )
+                  Q( N+( SMLSIZ+QSTART-1 )*N ) = ONE
+               END IF
+               D( N ) = ABS( D( N ) )
+            END IF
+            IF( ICOMPQ.EQ.2 ) THEN
+               CALL SLASD0( NSIZE, SQRE, D( START ), E( START ),
+     $                      U( START, START ), LDU, VT( START, START ),
+     $                      LDVT, SMLSIZ, IWORK, WORK( WSTART ), INFO )
+            ELSE
+               CALL SLASDA( ICOMPQ, SMLSIZ, NSIZE, SQRE, D( START ),
+     $                      E( START ), Q( START+( IU+QSTART-2 )*N ), N,
+     $                      Q( START+( IVT+QSTART-2 )*N ),
+     $                      IQ( START+K*N ), Q( START+( DIFL+QSTART-2 )*
+     $                      N ), Q( START+( DIFR+QSTART-2 )*N ),
+     $                      Q( START+( Z+QSTART-2 )*N ),
+     $                      Q( START+( POLES+QSTART-2 )*N ),
+     $                      IQ( START+GIVPTR*N ), IQ( START+GIVCOL*N ),
+     $                      N, IQ( START+PERM*N ),
+     $                      Q( START+( GIVNUM+QSTART-2 )*N ),
+     $                      Q( START+( IC+QSTART-2 )*N ),
+     $                      Q( START+( IS+QSTART-2 )*N ),
+     $                      WORK( WSTART ), IWORK, INFO )
+               IF( INFO.NE.0 ) THEN
+                  RETURN
+               END IF
+            END IF
+            START = I + 1
+         END IF
+   30 CONTINUE
+*
+*     Unscale
+*
+      CALL SLASCL( 'G', 0, 0, ONE, ORGNRM, N, 1, D, N, IERR )
+   40 CONTINUE
+*
+*     Use Selection Sort to minimize swaps of singular vectors
+*
+      DO 60 II = 2, N
+         I = II - 1
+         KK = I
+         P = D( I )
+         DO 50 J = II, N
+            IF( D( J ).GT.P ) THEN
+               KK = J
+               P = D( J )
+            END IF
+   50    CONTINUE
+         IF( KK.NE.I ) THEN
+            D( KK ) = D( I )
+            D( I ) = P
+            IF( ICOMPQ.EQ.1 ) THEN
+               IQ( I ) = KK
+            ELSE IF( ICOMPQ.EQ.2 ) THEN
+               CALL SSWAP( N, U( 1, I ), 1, U( 1, KK ), 1 )
+               CALL SSWAP( N, VT( I, 1 ), LDVT, VT( KK, 1 ), LDVT )
+            END IF
+         ELSE IF( ICOMPQ.EQ.1 ) THEN
+            IQ( I ) = I
+         END IF
+   60 CONTINUE
+*
+*     If ICOMPQ = 1, use IQ(N,1) as the indicator for UPLO
+*
+      IF( ICOMPQ.EQ.1 ) THEN
+         IF( IUPLO.EQ.1 ) THEN
+            IQ( N ) = 1
+         ELSE
+            IQ( N ) = 0
+         END IF
+      END IF
+*
+*     If B is lower bidiagonal, update U by those Givens rotations
+*     which rotated B to be upper bidiagonal
+*
+      IF( ( IUPLO.EQ.2 ) .AND. ( ICOMPQ.EQ.2 ) )
+     $   CALL SLASR( 'L', 'V', 'B', N, N, WORK( 1 ), WORK( N ), U, LDU )
+*
+      RETURN
+*
+*     End of SBDSDC
+*
+      END
diff --git a/costa/native/external/lapack/sbdsqr.f b/costa/native/external/lapack/sbdsqr.f
new file mode 100644
index 000000000..d075d84f2
--- /dev/null
+++ b/costa/native/external/lapack/sbdsqr.f
@@ -0,0 +1,733 @@
+      SUBROUTINE SBDSQR( UPLO, N, NCVT, NRU, NCC, D, E, VT, LDVT, U,
+     $                   LDU, C, LDC, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDC, LDU, LDVT, N, NCC, NCVT, NRU
+*     ..
+*     .. Array Arguments ..
+      REAL               C( LDC, * ), D( * ), E( * ), U( LDU, * ),
+     $                   VT( LDVT, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SBDSQR computes the singular value decomposition (SVD) of a real
+*  N-by-N (upper or lower) bidiagonal matrix B:  B = Q * S * P' (P'
+*  denotes the transpose of P), where S is a diagonal matrix with
+*  non-negative diagonal elements (the singular values of B), and Q
+*  and P are orthogonal matrices.
+*
+*  The routine computes S, and optionally computes U * Q, P' * VT,
+*  or Q' * C, for given real input matrices U, VT, and C.
+*
+*  See "Computing  Small Singular Values of Bidiagonal Matrices With
+*  Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan,
+*  LAPACK Working Note #3 (or SIAM J. Sci. Statist. Comput. vol. 11,
+*  no. 5, pp. 873-912, Sept 1990) and
+*  "Accurate singular values and differential qd algorithms," by
+*  B. Parlett and V. Fernando, Technical Report CPAM-554, Mathematics
+*  Department, University of California at Berkeley, July 1992
+*  for a detailed description of the algorithm.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  B is upper bidiagonal;
+*          = 'L':  B is lower bidiagonal.
+*
+*  N       (input) INTEGER
+*          The order of the matrix B.  N >= 0.
+*
+*  NCVT    (input) INTEGER
+*          The number of columns of the matrix VT. NCVT >= 0.
+*
+*  NRU     (input) INTEGER
+*          The number of rows of the matrix U. NRU >= 0.
+*
+*  NCC     (input) INTEGER
+*          The number of columns of the matrix C. NCC >= 0.
+*
+*  D       (input/output) REAL array, dimension (N)
+*          On entry, the n diagonal elements of the bidiagonal matrix B.
+*          On exit, if INFO=0, the singular values of B in decreasing
+*          order.
+*
+*  E       (input/output) REAL array, dimension (N)
+*          On entry, the elements of E contain the
+*          offdiagonal elements of the bidiagonal matrix whose SVD
+*          is desired. On normal exit (INFO = 0), E is destroyed.
+*          If the algorithm does not converge (INFO > 0), D and E
+*          will contain the diagonal and superdiagonal elements of a
+*          bidiagonal matrix orthogonally equivalent to the one given
+*          as input. E(N) is used for workspace.
+*
+*  VT      (input/output) REAL array, dimension (LDVT, NCVT)
+*          On entry, an N-by-NCVT matrix VT.
+*          On exit, VT is overwritten by P' * VT.
+*          VT is not referenced if NCVT = 0.
+*
+*  LDVT    (input) INTEGER
+*          The leading dimension of the array VT.
+*          LDVT >= max(1,N) if NCVT > 0; LDVT >= 1 if NCVT = 0.
+*
+*  U       (input/output) REAL array, dimension (LDU, N)
+*          On entry, an NRU-by-N matrix U.
+*          On exit, U is overwritten by U * Q.
+*          U is not referenced if NRU = 0.
+*
+*  LDU     (input) INTEGER
+*          The leading dimension of the array U.  LDU >= max(1,NRU).
+*
+*  C       (input/output) REAL array, dimension (LDC, NCC)
+*          On entry, an N-by-NCC matrix C.
+*          On exit, C is overwritten by Q' * C.
+*          C is not referenced if NCC = 0.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C.
+*          LDC >= max(1,N) if NCC > 0; LDC >=1 if NCC = 0.
+*
+*  WORK    (workspace) REAL array, dimension (4*N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  If INFO = -i, the i-th argument had an illegal value
+*          > 0:  the algorithm did not converge; D and E contain the
+*                elements of a bidiagonal matrix which is orthogonally
+*                similar to the input matrix B;  if INFO = i, i
+*                elements of E have not converged to zero.
+*
+*  Internal Parameters
+*  ===================
+*
+*  TOLMUL  REAL, default = max(10,min(100,EPS**(-1/8)))
+*          TOLMUL controls the convergence criterion of the QR loop.
+*          If it is positive, TOLMUL*EPS is the desired relative
+*             precision in the computed singular values.
+*          If it is negative, abs(TOLMUL*EPS*sigma_max) is the
+*             desired absolute accuracy in the computed singular
+*             values (corresponds to relative accuracy
+*             abs(TOLMUL*EPS) in the largest singular value.
+*          abs(TOLMUL) should be between 1 and 1/EPS, and preferably
+*             between 10 (for fast convergence) and .1/EPS
+*             (for there to be some accuracy in the results).
+*          Default is to lose at either one eighth or 2 of the
+*             available decimal digits in each computed singular value
+*             (whichever is smaller).
+*
+*  MAXITR  INTEGER, default = 6
+*          MAXITR controls the maximum number of passes of the
+*          algorithm through its inner loop. The algorithms stops
+*          (and so fails to converge) if the number of passes
+*          through the inner loop exceeds MAXITR*N**2.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO
+      PARAMETER          ( ZERO = 0.0E0 )
+      REAL               ONE
+      PARAMETER          ( ONE = 1.0E0 )
+      REAL               NEGONE
+      PARAMETER          ( NEGONE = -1.0E0 )
+      REAL               HNDRTH
+      PARAMETER          ( HNDRTH = 0.01E0 )
+      REAL               TEN
+      PARAMETER          ( TEN = 10.0E0 )
+      REAL               HNDRD
+      PARAMETER          ( HNDRD = 100.0E0 )
+      REAL               MEIGTH
+      PARAMETER          ( MEIGTH = -0.125E0 )
+      INTEGER            MAXITR
+      PARAMETER          ( MAXITR = 6 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LOWER, ROTATE
+      INTEGER            I, IDIR, ISUB, ITER, J, LL, LLL, M, MAXIT, NM1,
+     $                   NM12, NM13, OLDLL, OLDM
+      REAL               ABSE, ABSS, COSL, COSR, CS, EPS, F, G, H, MU,
+     $                   OLDCS, OLDSN, R, SHIFT, SIGMN, SIGMX, SINL,
+     $                   SINR, SLL, SMAX, SMIN, SMINL, SMINLO, SMINOA,
+     $                   SN, THRESH, TOL, TOLMUL, UNFL
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               SLAMCH
+      EXTERNAL           LSAME, SLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLARTG, SLAS2, SLASQ1, SLASR, SLASV2, SROT,
+     $                   SSCAL, SSWAP, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN, REAL, SIGN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      LOWER = LSAME( UPLO, 'L' )
+      IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LOWER ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NCVT.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( NRU.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( NCC.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( ( NCVT.EQ.0 .AND. LDVT.LT.1 ) .OR.
+     $         ( NCVT.GT.0 .AND. LDVT.LT.MAX( 1, N ) ) ) THEN
+         INFO = -9
+      ELSE IF( LDU.LT.MAX( 1, NRU ) ) THEN
+         INFO = -11
+      ELSE IF( ( NCC.EQ.0 .AND. LDC.LT.1 ) .OR.
+     $         ( NCC.GT.0 .AND. LDC.LT.MAX( 1, N ) ) ) THEN
+         INFO = -13
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SBDSQR', -INFO )
+         RETURN
+      END IF
+      IF( N.EQ.0 )
+     $   RETURN
+      IF( N.EQ.1 )
+     $   GO TO 160
+*
+*     ROTATE is true if any singular vectors desired, false otherwise
+*
+      ROTATE = ( NCVT.GT.0 ) .OR. ( NRU.GT.0 ) .OR. ( NCC.GT.0 )
+*
+*     If no singular vectors desired, use qd algorithm
+*
+      IF( .NOT.ROTATE ) THEN
+         CALL SLASQ1( N, D, E, WORK, INFO )
+         RETURN
+      END IF
+*
+      NM1 = N - 1
+      NM12 = NM1 + NM1
+      NM13 = NM12 + NM1
+      IDIR = 0
+*
+*     Get machine constants
+*
+      EPS = SLAMCH( 'Epsilon' )
+      UNFL = SLAMCH( 'Safe minimum' )
+*
+*     If matrix lower bidiagonal, rotate to be upper bidiagonal
+*     by applying Givens rotations on the left
+*
+      IF( LOWER ) THEN
+         DO 10 I = 1, N - 1
+            CALL SLARTG( D( I ), E( I ), CS, SN, R )
+            D( I ) = R
+            E( I ) = SN*D( I+1 )
+            D( I+1 ) = CS*D( I+1 )
+            WORK( I ) = CS
+            WORK( NM1+I ) = SN
+   10    CONTINUE
+*
+*        Update singular vectors if desired
+*
+         IF( NRU.GT.0 )
+     $      CALL SLASR( 'R', 'V', 'F', NRU, N, WORK( 1 ), WORK( N ), U,
+     $                  LDU )
+         IF( NCC.GT.0 )
+     $      CALL SLASR( 'L', 'V', 'F', N, NCC, WORK( 1 ), WORK( N ), C,
+     $                  LDC )
+      END IF
+*
+*     Compute singular values to relative accuracy TOL
+*     (By setting TOL to be negative, algorithm will compute
+*     singular values to absolute accuracy ABS(TOL)*norm(input matrix))
+*
+      TOLMUL = MAX( TEN, MIN( HNDRD, EPS**MEIGTH ) )
+      TOL = TOLMUL*EPS
+*
+*     Compute approximate maximum, minimum singular values
+*
+      SMAX = ZERO
+      DO 20 I = 1, N
+         SMAX = MAX( SMAX, ABS( D( I ) ) )
+   20 CONTINUE
+      DO 30 I = 1, N - 1
+         SMAX = MAX( SMAX, ABS( E( I ) ) )
+   30 CONTINUE
+      SMINL = ZERO
+      IF( TOL.GE.ZERO ) THEN
+*
+*        Relative accuracy desired
+*
+         SMINOA = ABS( D( 1 ) )
+         IF( SMINOA.EQ.ZERO )
+     $      GO TO 50
+         MU = SMINOA
+         DO 40 I = 2, N
+            MU = ABS( D( I ) )*( MU / ( MU+ABS( E( I-1 ) ) ) )
+            SMINOA = MIN( SMINOA, MU )
+            IF( SMINOA.EQ.ZERO )
+     $         GO TO 50
+   40    CONTINUE
+   50    CONTINUE
+         SMINOA = SMINOA / SQRT( REAL( N ) )
+         THRESH = MAX( TOL*SMINOA, MAXITR*N*N*UNFL )
+      ELSE
+*
+*        Absolute accuracy desired
+*
+         THRESH = MAX( ABS( TOL )*SMAX, MAXITR*N*N*UNFL )
+      END IF
+*
+*     Prepare for main iteration loop for the singular values
+*     (MAXIT is the maximum number of passes through the inner
+*     loop permitted before nonconvergence signalled.)
+*
+      MAXIT = MAXITR*N*N
+      ITER = 0
+      OLDLL = -1
+      OLDM = -1
+*
+*     M points to last element of unconverged part of matrix
+*
+      M = N
+*
+*     Begin main iteration loop
+*
+   60 CONTINUE
+*
+*     Check for convergence or exceeding iteration count
+*
+      IF( M.LE.1 )
+     $   GO TO 160
+      IF( ITER.GT.MAXIT )
+     $   GO TO 200
+*
+*     Find diagonal block of matrix to work on
+*
+      IF( TOL.LT.ZERO .AND. ABS( D( M ) ).LE.THRESH )
+     $   D( M ) = ZERO
+      SMAX = ABS( D( M ) )
+      SMIN = SMAX
+      DO 70 LLL = 1, M - 1
+         LL = M - LLL
+         ABSS = ABS( D( LL ) )
+         ABSE = ABS( E( LL ) )
+         IF( TOL.LT.ZERO .AND. ABSS.LE.THRESH )
+     $      D( LL ) = ZERO
+         IF( ABSE.LE.THRESH )
+     $      GO TO 80
+         SMIN = MIN( SMIN, ABSS )
+         SMAX = MAX( SMAX, ABSS, ABSE )
+   70 CONTINUE
+      LL = 0
+      GO TO 90
+   80 CONTINUE
+      E( LL ) = ZERO
+*
+*     Matrix splits since E(LL) = 0
+*
+      IF( LL.EQ.M-1 ) THEN
+*
+*        Convergence of bottom singular value, return to top of loop
+*
+         M = M - 1
+         GO TO 60
+      END IF
+   90 CONTINUE
+      LL = LL + 1
+*
+*     E(LL) through E(M-1) are nonzero, E(LL-1) is zero
+*
+      IF( LL.EQ.M-1 ) THEN
+*
+*        2 by 2 block, handle separately
+*
+         CALL SLASV2( D( M-1 ), E( M-1 ), D( M ), SIGMN, SIGMX, SINR,
+     $                COSR, SINL, COSL )
+         D( M-1 ) = SIGMX
+         E( M-1 ) = ZERO
+         D( M ) = SIGMN
+*
+*        Compute singular vectors, if desired
+*
+         IF( NCVT.GT.0 )
+     $      CALL SROT( NCVT, VT( M-1, 1 ), LDVT, VT( M, 1 ), LDVT, COSR,
+     $                 SINR )
+         IF( NRU.GT.0 )
+     $      CALL SROT( NRU, U( 1, M-1 ), 1, U( 1, M ), 1, COSL, SINL )
+         IF( NCC.GT.0 )
+     $      CALL SROT( NCC, C( M-1, 1 ), LDC, C( M, 1 ), LDC, COSL,
+     $                 SINL )
+         M = M - 2
+         GO TO 60
+      END IF
+*
+*     If working on new submatrix, choose shift direction
+*     (from larger end diagonal element towards smaller)
+*
+      IF( LL.GT.OLDM .OR. M.LT.OLDLL ) THEN
+         IF( ABS( D( LL ) ).GE.ABS( D( M ) ) ) THEN
+*
+*           Chase bulge from top (big end) to bottom (small end)
+*
+            IDIR = 1
+         ELSE
+*
+*           Chase bulge from bottom (big end) to top (small end)
+*
+            IDIR = 2
+         END IF
+      END IF
+*
+*     Apply convergence tests
+*
+      IF( IDIR.EQ.1 ) THEN
+*
+*        Run convergence test in forward direction
+*        First apply standard test to bottom of matrix
+*
+         IF( ABS( E( M-1 ) ).LE.ABS( TOL )*ABS( D( M ) ) .OR.
+     $       ( TOL.LT.ZERO .AND. ABS( E( M-1 ) ).LE.THRESH ) ) THEN
+            E( M-1 ) = ZERO
+            GO TO 60
+         END IF
+*
+         IF( TOL.GE.ZERO ) THEN
+*
+*           If relative accuracy desired,
+*           apply convergence criterion forward
+*
+            MU = ABS( D( LL ) )
+            SMINL = MU
+            DO 100 LLL = LL, M - 1
+               IF( ABS( E( LLL ) ).LE.TOL*MU ) THEN
+                  E( LLL ) = ZERO
+                  GO TO 60
+               END IF
+               SMINLO = SMINL
+               MU = ABS( D( LLL+1 ) )*( MU / ( MU+ABS( E( LLL ) ) ) )
+               SMINL = MIN( SMINL, MU )
+  100       CONTINUE
+         END IF
+*
+      ELSE
+*
+*        Run convergence test in backward direction
+*        First apply standard test to top of matrix
+*
+         IF( ABS( E( LL ) ).LE.ABS( TOL )*ABS( D( LL ) ) .OR.
+     $       ( TOL.LT.ZERO .AND. ABS( E( LL ) ).LE.THRESH ) ) THEN
+            E( LL ) = ZERO
+            GO TO 60
+         END IF
+*
+         IF( TOL.GE.ZERO ) THEN
+*
+*           If relative accuracy desired,
+*           apply convergence criterion backward
+*
+            MU = ABS( D( M ) )
+            SMINL = MU
+            DO 110 LLL = M - 1, LL, -1
+               IF( ABS( E( LLL ) ).LE.TOL*MU ) THEN
+                  E( LLL ) = ZERO
+                  GO TO 60
+               END IF
+               SMINLO = SMINL
+               MU = ABS( D( LLL ) )*( MU / ( MU+ABS( E( LLL ) ) ) )
+               SMINL = MIN( SMINL, MU )
+  110       CONTINUE
+         END IF
+      END IF
+      OLDLL = LL
+      OLDM = M
+*
+*     Compute shift.  First, test if shifting would ruin relative
+*     accuracy, and if so set the shift to zero.
+*
+      IF( TOL.GE.ZERO .AND. N*TOL*( SMINL / SMAX ).LE.
+     $    MAX( EPS, HNDRTH*TOL ) ) THEN
+*
+*        Use a zero shift to avoid loss of relative accuracy
+*
+         SHIFT = ZERO
+      ELSE
+*
+*        Compute the shift from 2-by-2 block at end of matrix
+*
+         IF( IDIR.EQ.1 ) THEN
+            SLL = ABS( D( LL ) )
+            CALL SLAS2( D( M-1 ), E( M-1 ), D( M ), SHIFT, R )
+         ELSE
+            SLL = ABS( D( M ) )
+            CALL SLAS2( D( LL ), E( LL ), D( LL+1 ), SHIFT, R )
+         END IF
+*
+*        Test if shift negligible, and if so set to zero
+*
+         IF( SLL.GT.ZERO ) THEN
+            IF( ( SHIFT / SLL )**2.LT.EPS )
+     $         SHIFT = ZERO
+         END IF
+      END IF
+*
+*     Increment iteration count
+*
+      ITER = ITER + M - LL
+*
+*     If SHIFT = 0, do simplified QR iteration
+*
+      IF( SHIFT.EQ.ZERO ) THEN
+         IF( IDIR.EQ.1 ) THEN
+*
+*           Chase bulge from top to bottom
+*           Save cosines and sines for later singular vector updates
+*
+            CS = ONE
+            OLDCS = ONE
+            DO 120 I = LL, M - 1
+               CALL SLARTG( D( I )*CS, E( I ), CS, SN, R )
+               IF( I.GT.LL )
+     $            E( I-1 ) = OLDSN*R
+               CALL SLARTG( OLDCS*R, D( I+1 )*SN, OLDCS, OLDSN, D( I ) )
+               WORK( I-LL+1 ) = CS
+               WORK( I-LL+1+NM1 ) = SN
+               WORK( I-LL+1+NM12 ) = OLDCS
+               WORK( I-LL+1+NM13 ) = OLDSN
+  120       CONTINUE
+            H = D( M )*CS
+            D( M ) = H*OLDCS
+            E( M-1 ) = H*OLDSN
+*
+*           Update singular vectors
+*
+            IF( NCVT.GT.0 )
+     $         CALL SLASR( 'L', 'V', 'F', M-LL+1, NCVT, WORK( 1 ),
+     $                     WORK( N ), VT( LL, 1 ), LDVT )
+            IF( NRU.GT.0 )
+     $         CALL SLASR( 'R', 'V', 'F', NRU, M-LL+1, WORK( NM12+1 ),
+     $                     WORK( NM13+1 ), U( 1, LL ), LDU )
+            IF( NCC.GT.0 )
+     $         CALL SLASR( 'L', 'V', 'F', M-LL+1, NCC, WORK( NM12+1 ),
+     $                     WORK( NM13+1 ), C( LL, 1 ), LDC )
+*
+*           Test convergence
+*
+            IF( ABS( E( M-1 ) ).LE.THRESH )
+     $         E( M-1 ) = ZERO
+*
+         ELSE
+*
+*           Chase bulge from bottom to top
+*           Save cosines and sines for later singular vector updates
+*
+            CS = ONE
+            OLDCS = ONE
+            DO 130 I = M, LL + 1, -1
+               CALL SLARTG( D( I )*CS, E( I-1 ), CS, SN, R )
+               IF( I.LT.M )
+     $            E( I ) = OLDSN*R
+               CALL SLARTG( OLDCS*R, D( I-1 )*SN, OLDCS, OLDSN, D( I ) )
+               WORK( I-LL ) = CS
+               WORK( I-LL+NM1 ) = -SN
+               WORK( I-LL+NM12 ) = OLDCS
+               WORK( I-LL+NM13 ) = -OLDSN
+  130       CONTINUE
+            H = D( LL )*CS
+            D( LL ) = H*OLDCS
+            E( LL ) = H*OLDSN
+*
+*           Update singular vectors
+*
+            IF( NCVT.GT.0 )
+     $         CALL SLASR( 'L', 'V', 'B', M-LL+1, NCVT, WORK( NM12+1 ),
+     $                     WORK( NM13+1 ), VT( LL, 1 ), LDVT )
+            IF( NRU.GT.0 )
+     $         CALL SLASR( 'R', 'V', 'B', NRU, M-LL+1, WORK( 1 ),
+     $                     WORK( N ), U( 1, LL ), LDU )
+            IF( NCC.GT.0 )
+     $         CALL SLASR( 'L', 'V', 'B', M-LL+1, NCC, WORK( 1 ),
+     $                     WORK( N ), C( LL, 1 ), LDC )
+*
+*           Test convergence
+*
+            IF( ABS( E( LL ) ).LE.THRESH )
+     $         E( LL ) = ZERO
+         END IF
+      ELSE
+*
+*        Use nonzero shift
+*
+         IF( IDIR.EQ.1 ) THEN
+*
+*           Chase bulge from top to bottom
+*           Save cosines and sines for later singular vector updates
+*
+            F = ( ABS( D( LL ) )-SHIFT )*
+     $          ( SIGN( ONE, D( LL ) )+SHIFT / D( LL ) )
+            G = E( LL )
+            DO 140 I = LL, M - 1
+               CALL SLARTG( F, G, COSR, SINR, R )
+               IF( I.GT.LL )
+     $            E( I-1 ) = R
+               F = COSR*D( I ) + SINR*E( I )
+               E( I ) = COSR*E( I ) - SINR*D( I )
+               G = SINR*D( I+1 )
+               D( I+1 ) = COSR*D( I+1 )
+               CALL SLARTG( F, G, COSL, SINL, R )
+               D( I ) = R
+               F = COSL*E( I ) + SINL*D( I+1 )
+               D( I+1 ) = COSL*D( I+1 ) - SINL*E( I )
+               IF( I.LT.M-1 ) THEN
+                  G = SINL*E( I+1 )
+                  E( I+1 ) = COSL*E( I+1 )
+               END IF
+               WORK( I-LL+1 ) = COSR
+               WORK( I-LL+1+NM1 ) = SINR
+               WORK( I-LL+1+NM12 ) = COSL
+               WORK( I-LL+1+NM13 ) = SINL
+  140       CONTINUE
+            E( M-1 ) = F
+*
+*           Update singular vectors
+*
+            IF( NCVT.GT.0 )
+     $         CALL SLASR( 'L', 'V', 'F', M-LL+1, NCVT, WORK( 1 ),
+     $                     WORK( N ), VT( LL, 1 ), LDVT )
+            IF( NRU.GT.0 )
+     $         CALL SLASR( 'R', 'V', 'F', NRU, M-LL+1, WORK( NM12+1 ),
+     $                     WORK( NM13+1 ), U( 1, LL ), LDU )
+            IF( NCC.GT.0 )
+     $         CALL SLASR( 'L', 'V', 'F', M-LL+1, NCC, WORK( NM12+1 ),
+     $                     WORK( NM13+1 ), C( LL, 1 ), LDC )
+*
+*           Test convergence
+*
+            IF( ABS( E( M-1 ) ).LE.THRESH )
+     $         E( M-1 ) = ZERO
+*
+         ELSE
+*
+*           Chase bulge from bottom to top
+*           Save cosines and sines for later singular vector updates
+*
+            F = ( ABS( D( M ) )-SHIFT )*( SIGN( ONE, D( M ) )+SHIFT /
+     $          D( M ) )
+            G = E( M-1 )
+            DO 150 I = M, LL + 1, -1
+               CALL SLARTG( F, G, COSR, SINR, R )
+               IF( I.LT.M )
+     $            E( I ) = R
+               F = COSR*D( I ) + SINR*E( I-1 )
+               E( I-1 ) = COSR*E( I-1 ) - SINR*D( I )
+               G = SINR*D( I-1 )
+               D( I-1 ) = COSR*D( I-1 )
+               CALL SLARTG( F, G, COSL, SINL, R )
+               D( I ) = R
+               F = COSL*E( I-1 ) + SINL*D( I-1 )
+               D( I-1 ) = COSL*D( I-1 ) - SINL*E( I-1 )
+               IF( I.GT.LL+1 ) THEN
+                  G = SINL*E( I-2 )
+                  E( I-2 ) = COSL*E( I-2 )
+               END IF
+               WORK( I-LL ) = COSR
+               WORK( I-LL+NM1 ) = -SINR
+               WORK( I-LL+NM12 ) = COSL
+               WORK( I-LL+NM13 ) = -SINL
+  150       CONTINUE
+            E( LL ) = F
+*
+*           Test convergence
+*
+            IF( ABS( E( LL ) ).LE.THRESH )
+     $         E( LL ) = ZERO
+*
+*           Update singular vectors if desired
+*
+            IF( NCVT.GT.0 )
+     $         CALL SLASR( 'L', 'V', 'B', M-LL+1, NCVT, WORK( NM12+1 ),
+     $                     WORK( NM13+1 ), VT( LL, 1 ), LDVT )
+            IF( NRU.GT.0 )
+     $         CALL SLASR( 'R', 'V', 'B', NRU, M-LL+1, WORK( 1 ),
+     $                     WORK( N ), U( 1, LL ), LDU )
+            IF( NCC.GT.0 )
+     $         CALL SLASR( 'L', 'V', 'B', M-LL+1, NCC, WORK( 1 ),
+     $                     WORK( N ), C( LL, 1 ), LDC )
+         END IF
+      END IF
+*
+*     QR iteration finished, go back and check convergence
+*
+      GO TO 60
+*
+*     All singular values converged, so make them positive
+*
+  160 CONTINUE
+      DO 170 I = 1, N
+         IF( D( I ).LT.ZERO ) THEN
+            D( I ) = -D( I )
+*
+*           Change sign of singular vectors, if desired
+*
+            IF( NCVT.GT.0 )
+     $         CALL SSCAL( NCVT, NEGONE, VT( I, 1 ), LDVT )
+         END IF
+  170 CONTINUE
+*
+*     Sort the singular values into decreasing order (insertion sort on
+*     singular values, but only one transposition per singular vector)
+*
+      DO 190 I = 1, N - 1
+*
+*        Scan for smallest D(I)
+*
+         ISUB = 1
+         SMIN = D( 1 )
+         DO 180 J = 2, N + 1 - I
+            IF( D( J ).LE.SMIN ) THEN
+               ISUB = J
+               SMIN = D( J )
+            END IF
+  180    CONTINUE
+         IF( ISUB.NE.N+1-I ) THEN
+*
+*           Swap singular values and vectors
+*
+            D( ISUB ) = D( N+1-I )
+            D( N+1-I ) = SMIN
+            IF( NCVT.GT.0 )
+     $         CALL SSWAP( NCVT, VT( ISUB, 1 ), LDVT, VT( N+1-I, 1 ),
+     $                     LDVT )
+            IF( NRU.GT.0 )
+     $         CALL SSWAP( NRU, U( 1, ISUB ), 1, U( 1, N+1-I ), 1 )
+            IF( NCC.GT.0 )
+     $         CALL SSWAP( NCC, C( ISUB, 1 ), LDC, C( N+1-I, 1 ), LDC )
+         END IF
+  190 CONTINUE
+      GO TO 220
+*
+*     Maximum number of iterations exceeded, failure to converge
+*
+  200 CONTINUE
+      INFO = 0
+      DO 210 I = 1, N - 1
+         IF( E( I ).NE.ZERO )
+     $      INFO = INFO + 1
+  210 CONTINUE
+  220 CONTINUE
+      RETURN
+*
+*     End of SBDSQR
+*
+      END
diff --git a/costa/native/external/lapack/scsum1.f b/costa/native/external/lapack/scsum1.f
new file mode 100644
index 000000000..03adac2a1
--- /dev/null
+++ b/costa/native/external/lapack/scsum1.f
@@ -0,0 +1,82 @@
+      REAL             FUNCTION SCSUM1( N, CX, INCX )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      INTEGER            INCX, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            CX( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SCSUM1 takes the sum of the absolute values of a complex
+*  vector and returns a single precision result.
+*
+*  Based on SCASUM from the Level 1 BLAS.
+*  The change is to use the 'genuine' absolute value.
+*
+*  Contributed by Nick Higham for use with CLACON.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The number of elements in the vector CX.
+*
+*  CX      (input) COMPLEX array, dimension (N)
+*          The vector whose elements will be summed.
+*
+*  INCX    (input) INTEGER
+*          The spacing between successive values of CX.  INCX > 0.
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      INTEGER            I, NINCX
+      REAL               STEMP
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS
+*     ..
+*     .. Executable Statements ..
+*
+      SCSUM1 = 0.0E0
+      STEMP = 0.0E0
+      IF( N.LE.0 )
+     $   RETURN
+      IF( INCX.EQ.1 )
+     $   GO TO 20
+*
+*     CODE FOR INCREMENT NOT EQUAL TO 1
+*
+      NINCX = N*INCX
+      DO 10 I = 1, NINCX, INCX
+*
+*        NEXT LINE MODIFIED.
+*
+         STEMP = STEMP + ABS( CX( I ) )
+   10 CONTINUE
+      SCSUM1 = STEMP
+      RETURN
+*
+*     CODE FOR INCREMENT EQUAL TO 1
+*
+   20 CONTINUE
+      DO 30 I = 1, N
+*
+*        NEXT LINE MODIFIED.
+*
+         STEMP = STEMP + ABS( CX( I ) )
+   30 CONTINUE
+      SCSUM1 = STEMP
+      RETURN
+*
+*     End of SCSUM1
+*
+      END
diff --git a/costa/native/external/lapack/sdisna.f b/costa/native/external/lapack/sdisna.f
new file mode 100644
index 000000000..438282e2a
--- /dev/null
+++ b/costa/native/external/lapack/sdisna.f
@@ -0,0 +1,180 @@
+      SUBROUTINE SDISNA( JOB, M, N, D, SEP, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOB
+      INTEGER            INFO, M, N
+*     ..
+*     .. Array Arguments ..
+      REAL               D( * ), SEP( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SDISNA computes the reciprocal condition numbers for the eigenvectors
+*  of a real symmetric or complex Hermitian matrix or for the left or
+*  right singular vectors of a general m-by-n matrix. The reciprocal
+*  condition number is the 'gap' between the corresponding eigenvalue or
+*  singular value and the nearest other one.
+*
+*  The bound on the error, measured by angle in radians, in the I-th
+*  computed vector is given by
+*
+*         SLAMCH( 'E' ) * ( ANORM / SEP( I ) )
+*
+*  where ANORM = 2-norm(A) = max( abs( D(j) ) ).  SEP(I) is not allowed
+*  to be smaller than SLAMCH( 'E' )*ANORM in order to limit the size of
+*  the error bound.
+*
+*  SDISNA may also be used to compute error bounds for eigenvectors of
+*  the generalized symmetric definite eigenproblem.
+*
+*  Arguments
+*  =========
+*
+*  JOB     (input) CHARACTER*1
+*          Specifies for which problem the reciprocal condition numbers
+*          should be computed:
+*          = 'E':  the eigenvectors of a symmetric/Hermitian matrix;
+*          = 'L':  the left singular vectors of a general matrix;
+*          = 'R':  the right singular vectors of a general matrix.
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix. M >= 0.
+*
+*  N       (input) INTEGER
+*          If JOB = 'L' or 'R', the number of columns of the matrix,
+*          in which case N >= 0. Ignored if JOB = 'E'.
+*
+*  D       (input) REAL array, dimension (M) if JOB = 'E'
+*                              dimension (min(M,N)) if JOB = 'L' or 'R'
+*          The eigenvalues (if JOB = 'E') or singular values (if JOB =
+*          'L' or 'R') of the matrix, in either increasing or decreasing
+*          order. If singular values, they must be non-negative.
+*
+*  SEP     (output) REAL array, dimension (M) if JOB = 'E'
+*                               dimension (min(M,N)) if JOB = 'L' or 'R'
+*          The reciprocal condition numbers of the vectors.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO
+      PARAMETER          ( ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            DECR, EIGEN, INCR, LEFT, RIGHT, SING
+      INTEGER            I, K
+      REAL               ANORM, EPS, NEWGAP, OLDGAP, SAFMIN, THRESH
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               SLAMCH
+      EXTERNAL           LSAME, SLAMCH
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      EIGEN = LSAME( JOB, 'E' )
+      LEFT = LSAME( JOB, 'L' )
+      RIGHT = LSAME( JOB, 'R' )
+      SING = LEFT .OR. RIGHT
+      IF( EIGEN ) THEN
+         K = M
+      ELSE IF( SING ) THEN
+         K = MIN( M, N )
+      END IF
+      IF( .NOT.EIGEN .AND. .NOT.SING ) THEN
+         INFO = -1
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( K.LT.0 ) THEN
+         INFO = -3
+      ELSE
+         INCR = .TRUE.
+         DECR = .TRUE.
+         DO 10 I = 1, K - 1
+            IF( INCR )
+     $         INCR = INCR .AND. D( I ).LE.D( I+1 )
+            IF( DECR )
+     $         DECR = DECR .AND. D( I ).GE.D( I+1 )
+   10    CONTINUE
+         IF( SING .AND. K.GT.0 ) THEN
+            IF( INCR )
+     $         INCR = INCR .AND. ZERO.LE.D( 1 )
+            IF( DECR )
+     $         DECR = DECR .AND. D( K ).GE.ZERO
+         END IF
+         IF( .NOT.( INCR .OR. DECR ) )
+     $      INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SDISNA', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( K.EQ.0 )
+     $   RETURN
+*
+*     Compute reciprocal condition numbers
+*
+      IF( K.EQ.1 ) THEN
+         SEP( 1 ) = SLAMCH( 'O' )
+      ELSE
+         OLDGAP = ABS( D( 2 )-D( 1 ) )
+         SEP( 1 ) = OLDGAP
+         DO 20 I = 2, K - 1
+            NEWGAP = ABS( D( I+1 )-D( I ) )
+            SEP( I ) = MIN( OLDGAP, NEWGAP )
+            OLDGAP = NEWGAP
+   20    CONTINUE
+         SEP( K ) = OLDGAP
+      END IF
+      IF( SING ) THEN
+         IF( ( LEFT .AND. M.GT.N ) .OR. ( RIGHT .AND. M.LT.N ) ) THEN
+            IF( INCR )
+     $         SEP( 1 ) = MIN( SEP( 1 ), D( 1 ) )
+            IF( DECR )
+     $         SEP( K ) = MIN( SEP( K ), D( K ) )
+         END IF
+      END IF
+*
+*     Ensure that reciprocal condition numbers are not less than
+*     threshold, in order to limit the size of the error bound
+*
+      EPS = SLAMCH( 'E' )
+      SAFMIN = SLAMCH( 'S' )
+      ANORM = MAX( ABS( D( 1 ) ), ABS( D( K ) ) )
+      IF( ANORM.EQ.ZERO ) THEN
+         THRESH = EPS
+      ELSE
+         THRESH = MAX( EPS*ANORM, SAFMIN )
+      END IF
+      DO 30 I = 1, K
+         SEP( I ) = MAX( SEP( I ), THRESH )
+   30 CONTINUE
+*
+      RETURN
+*
+*     End of SDISNA
+*
+      END
diff --git a/costa/native/external/lapack/second.f b/costa/native/external/lapack/second.f
new file mode 100644
index 000000000..0094216ab
--- /dev/null
+++ b/costa/native/external/lapack/second.f
@@ -0,0 +1,34 @@
+      REAL             FUNCTION SECOND( )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*  Purpose
+*  =======
+*
+*  SECOND returns the user time for a process in seconds.
+*  This version gets the time from the system function ETIME.
+*
+* =====================================================================
+*
+*     .. Local Scalars ..
+      REAL               T1
+*     ..
+*     .. Local Arrays ..
+      REAL               TARRAY( 2 )
+*     ..
+*     .. External Functions ..
+      REAL               ETIME
+C      EXTERNAL           ETIME
+*     ..
+*     .. Executable Statements ..
+*
+      T1 = ETIME( TARRAY )
+      SECOND = TARRAY( 1 )
+      RETURN
+*
+*     End of SECOND
+*
+      END
diff --git a/costa/native/external/lapack/sgbbrd.f b/costa/native/external/lapack/sgbbrd.f
new file mode 100644
index 000000000..7bd467538
--- /dev/null
+++ b/costa/native/external/lapack/sgbbrd.f
@@ -0,0 +1,444 @@
+      SUBROUTINE SGBBRD( VECT, M, N, NCC, KL, KU, AB, LDAB, D, E, Q,
+     $                   LDQ, PT, LDPT, C, LDC, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          VECT
+      INTEGER            INFO, KL, KU, LDAB, LDC, LDPT, LDQ, M, N, NCC
+*     ..
+*     .. Array Arguments ..
+      REAL               AB( LDAB, * ), C( LDC, * ), D( * ), E( * ),
+     $                   PT( LDPT, * ), Q( LDQ, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SGBBRD reduces a real general m-by-n band matrix A to upper
+*  bidiagonal form B by an orthogonal transformation: Q' * A * P = B.
+*
+*  The routine computes B, and optionally forms Q or P', or computes
+*  Q'*C for a given matrix C.
+*
+*  Arguments
+*  =========
+*
+*  VECT    (input) CHARACTER*1
+*          Specifies whether or not the matrices Q and P' are to be
+*          formed.
+*          = 'N': do not form Q or P';
+*          = 'Q': form Q only;
+*          = 'P': form P' only;
+*          = 'B': form both.
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  NCC     (input) INTEGER
+*          The number of columns of the matrix C.  NCC >= 0.
+*
+*  KL      (input) INTEGER
+*          The number of subdiagonals of the matrix A. KL >= 0.
+*
+*  KU      (input) INTEGER
+*          The number of superdiagonals of the matrix A. KU >= 0.
+*
+*  AB      (input/output) REAL array, dimension (LDAB,N)
+*          On entry, the m-by-n band matrix A, stored in rows 1 to
+*          KL+KU+1. The j-th column of A is stored in the j-th column of
+*          the array AB as follows:
+*          AB(ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl).
+*          On exit, A is overwritten by values generated during the
+*          reduction.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array A. LDAB >= KL+KU+1.
+*
+*  D       (output) REAL array, dimension (min(M,N))
+*          The diagonal elements of the bidiagonal matrix B.
+*
+*  E       (output) REAL array, dimension (min(M,N)-1)
+*          The superdiagonal elements of the bidiagonal matrix B.
+*
+*  Q       (output) REAL array, dimension (LDQ,M)
+*          If VECT = 'Q' or 'B', the m-by-m orthogonal matrix Q.
+*          If VECT = 'N' or 'P', the array Q is not referenced.
+*
+*  LDQ     (input) INTEGER
+*          The leading dimension of the array Q.
+*          LDQ >= max(1,M) if VECT = 'Q' or 'B'; LDQ >= 1 otherwise.
+*
+*  PT      (output) REAL array, dimension (LDPT,N)
+*          If VECT = 'P' or 'B', the n-by-n orthogonal matrix P'.
+*          If VECT = 'N' or 'Q', the array PT is not referenced.
+*
+*  LDPT    (input) INTEGER
+*          The leading dimension of the array PT.
+*          LDPT >= max(1,N) if VECT = 'P' or 'B'; LDPT >= 1 otherwise.
+*
+*  C       (input/output) REAL array, dimension (LDC,NCC)
+*          On entry, an m-by-ncc matrix C.
+*          On exit, C is overwritten by Q'*C.
+*          C is not referenced if NCC = 0.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C.
+*          LDC >= max(1,M) if NCC > 0; LDC >= 1 if NCC = 0.
+*
+*  WORK    (workspace) REAL array, dimension (2*max(M,N))
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            WANTB, WANTC, WANTPT, WANTQ
+      INTEGER            I, INCA, J, J1, J2, KB, KB1, KK, KLM, KLU1,
+     $                   KUN, L, MINMN, ML, ML0, MN, MU, MU0, NR, NRT
+      REAL               RA, RB, RC, RS
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLARGV, SLARTG, SLARTV, SLASET, SROT, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters
+*
+      WANTB = LSAME( VECT, 'B' )
+      WANTQ = LSAME( VECT, 'Q' ) .OR. WANTB
+      WANTPT = LSAME( VECT, 'P' ) .OR. WANTB
+      WANTC = NCC.GT.0
+      KLU1 = KL + KU + 1
+      INFO = 0
+      IF( .NOT.WANTQ .AND. .NOT.WANTPT .AND. .NOT.LSAME( VECT, 'N' ) )
+     $     THEN
+         INFO = -1
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( NCC.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( KL.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( KU.LT.0 ) THEN
+         INFO = -6
+      ELSE IF( LDAB.LT.KLU1 ) THEN
+         INFO = -8
+      ELSE IF( LDQ.LT.1 .OR. WANTQ .AND. LDQ.LT.MAX( 1, M ) ) THEN
+         INFO = -12
+      ELSE IF( LDPT.LT.1 .OR. WANTPT .AND. LDPT.LT.MAX( 1, N ) ) THEN
+         INFO = -14
+      ELSE IF( LDC.LT.1 .OR. WANTC .AND. LDC.LT.MAX( 1, M ) ) THEN
+         INFO = -16
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SGBBRD', -INFO )
+         RETURN
+      END IF
+*
+*     Initialize Q and P' to the unit matrix, if needed
+*
+      IF( WANTQ )
+     $   CALL SLASET( 'Full', M, M, ZERO, ONE, Q, LDQ )
+      IF( WANTPT )
+     $   CALL SLASET( 'Full', N, N, ZERO, ONE, PT, LDPT )
+*
+*     Quick return if possible.
+*
+      IF( M.EQ.0 .OR. N.EQ.0 )
+     $   RETURN
+*
+      MINMN = MIN( M, N )
+*
+      IF( KL+KU.GT.1 ) THEN
+*
+*        Reduce to upper bidiagonal form if KU > 0; if KU = 0, reduce
+*        first to lower bidiagonal form and then transform to upper
+*        bidiagonal
+*
+         IF( KU.GT.0 ) THEN
+            ML0 = 1
+            MU0 = 2
+         ELSE
+            ML0 = 2
+            MU0 = 1
+         END IF
+*
+*        Wherever possible, plane rotations are generated and applied in
+*        vector operations of length NR over the index set J1:J2:KLU1.
+*
+*        The sines of the plane rotations are stored in WORK(1:max(m,n))
+*        and the cosines in WORK(max(m,n)+1:2*max(m,n)).
+*
+         MN = MAX( M, N )
+         KLM = MIN( M-1, KL )
+         KUN = MIN( N-1, KU )
+         KB = KLM + KUN
+         KB1 = KB + 1
+         INCA = KB1*LDAB
+         NR = 0
+         J1 = KLM + 2
+         J2 = 1 - KUN
+*
+         DO 90 I = 1, MINMN
+*
+*           Reduce i-th column and i-th row of matrix to bidiagonal form
+*
+            ML = KLM + 1
+            MU = KUN + 1
+            DO 80 KK = 1, KB
+               J1 = J1 + KB
+               J2 = J2 + KB
+*
+*              generate plane rotations to annihilate nonzero elements
+*              which have been created below the band
+*
+               IF( NR.GT.0 )
+     $            CALL SLARGV( NR, AB( KLU1, J1-KLM-1 ), INCA,
+     $                         WORK( J1 ), KB1, WORK( MN+J1 ), KB1 )
+*
+*              apply plane rotations from the left
+*
+               DO 10 L = 1, KB
+                  IF( J2-KLM+L-1.GT.N ) THEN
+                     NRT = NR - 1
+                  ELSE
+                     NRT = NR
+                  END IF
+                  IF( NRT.GT.0 )
+     $               CALL SLARTV( NRT, AB( KLU1-L, J1-KLM+L-1 ), INCA,
+     $                            AB( KLU1-L+1, J1-KLM+L-1 ), INCA,
+     $                            WORK( MN+J1 ), WORK( J1 ), KB1 )
+   10          CONTINUE
+*
+               IF( ML.GT.ML0 ) THEN
+                  IF( ML.LE.M-I+1 ) THEN
+*
+*                    generate plane rotation to annihilate a(i+ml-1,i)
+*                    within the band, and apply rotation from the left
+*
+                     CALL SLARTG( AB( KU+ML-1, I ), AB( KU+ML, I ),
+     $                            WORK( MN+I+ML-1 ), WORK( I+ML-1 ),
+     $                            RA )
+                     AB( KU+ML-1, I ) = RA
+                     IF( I.LT.N )
+     $                  CALL SROT( MIN( KU+ML-2, N-I ),
+     $                             AB( KU+ML-2, I+1 ), LDAB-1,
+     $                             AB( KU+ML-1, I+1 ), LDAB-1,
+     $                             WORK( MN+I+ML-1 ), WORK( I+ML-1 ) )
+                  END IF
+                  NR = NR + 1
+                  J1 = J1 - KB1
+               END IF
+*
+               IF( WANTQ ) THEN
+*
+*                 accumulate product of plane rotations in Q
+*
+                  DO 20 J = J1, J2, KB1
+                     CALL SROT( M, Q( 1, J-1 ), 1, Q( 1, J ), 1,
+     $                          WORK( MN+J ), WORK( J ) )
+   20             CONTINUE
+               END IF
+*
+               IF( WANTC ) THEN
+*
+*                 apply plane rotations to C
+*
+                  DO 30 J = J1, J2, KB1
+                     CALL SROT( NCC, C( J-1, 1 ), LDC, C( J, 1 ), LDC,
+     $                          WORK( MN+J ), WORK( J ) )
+   30             CONTINUE
+               END IF
+*
+               IF( J2+KUN.GT.N ) THEN
+*
+*                 adjust J2 to keep within the bounds of the matrix
+*
+                  NR = NR - 1
+                  J2 = J2 - KB1
+               END IF
+*
+               DO 40 J = J1, J2, KB1
+*
+*                 create nonzero element a(j-1,j+ku) above the band
+*                 and store it in WORK(n+1:2*n)
+*
+                  WORK( J+KUN ) = WORK( J )*AB( 1, J+KUN )
+                  AB( 1, J+KUN ) = WORK( MN+J )*AB( 1, J+KUN )
+   40          CONTINUE
+*
+*              generate plane rotations to annihilate nonzero elements
+*              which have been generated above the band
+*
+               IF( NR.GT.0 )
+     $            CALL SLARGV( NR, AB( 1, J1+KUN-1 ), INCA,
+     $                         WORK( J1+KUN ), KB1, WORK( MN+J1+KUN ),
+     $                         KB1 )
+*
+*              apply plane rotations from the right
+*
+               DO 50 L = 1, KB
+                  IF( J2+L-1.GT.M ) THEN
+                     NRT = NR - 1
+                  ELSE
+                     NRT = NR
+                  END IF
+                  IF( NRT.GT.0 )
+     $               CALL SLARTV( NRT, AB( L+1, J1+KUN-1 ), INCA,
+     $                            AB( L, J1+KUN ), INCA,
+     $                            WORK( MN+J1+KUN ), WORK( J1+KUN ),
+     $                            KB1 )
+   50          CONTINUE
+*
+               IF( ML.EQ.ML0 .AND. MU.GT.MU0 ) THEN
+                  IF( MU.LE.N-I+1 ) THEN
+*
+*                    generate plane rotation to annihilate a(i,i+mu-1)
+*                    within the band, and apply rotation from the right
+*
+                     CALL SLARTG( AB( KU-MU+3, I+MU-2 ),
+     $                            AB( KU-MU+2, I+MU-1 ),
+     $                            WORK( MN+I+MU-1 ), WORK( I+MU-1 ),
+     $                            RA )
+                     AB( KU-MU+3, I+MU-2 ) = RA
+                     CALL SROT( MIN( KL+MU-2, M-I ),
+     $                          AB( KU-MU+4, I+MU-2 ), 1,
+     $                          AB( KU-MU+3, I+MU-1 ), 1,
+     $                          WORK( MN+I+MU-1 ), WORK( I+MU-1 ) )
+                  END IF
+                  NR = NR + 1
+                  J1 = J1 - KB1
+               END IF
+*
+               IF( WANTPT ) THEN
+*
+*                 accumulate product of plane rotations in P'
+*
+                  DO 60 J = J1, J2, KB1
+                     CALL SROT( N, PT( J+KUN-1, 1 ), LDPT,
+     $                          PT( J+KUN, 1 ), LDPT, WORK( MN+J+KUN ),
+     $                          WORK( J+KUN ) )
+   60             CONTINUE
+               END IF
+*
+               IF( J2+KB.GT.M ) THEN
+*
+*                 adjust J2 to keep within the bounds of the matrix
+*
+                  NR = NR - 1
+                  J2 = J2 - KB1
+               END IF
+*
+               DO 70 J = J1, J2, KB1
+*
+*                 create nonzero element a(j+kl+ku,j+ku-1) below the
+*                 band and store it in WORK(1:n)
+*
+                  WORK( J+KB ) = WORK( J+KUN )*AB( KLU1, J+KUN )
+                  AB( KLU1, J+KUN ) = WORK( MN+J+KUN )*AB( KLU1, J+KUN )
+   70          CONTINUE
+*
+               IF( ML.GT.ML0 ) THEN
+                  ML = ML - 1
+               ELSE
+                  MU = MU - 1
+               END IF
+   80       CONTINUE
+   90    CONTINUE
+      END IF
+*
+      IF( KU.EQ.0 .AND. KL.GT.0 ) THEN
+*
+*        A has been reduced to lower bidiagonal form
+*
+*        Transform lower bidiagonal form to upper bidiagonal by applying
+*        plane rotations from the left, storing diagonal elements in D
+*        and off-diagonal elements in E
+*
+         DO 100 I = 1, MIN( M-1, N )
+            CALL SLARTG( AB( 1, I ), AB( 2, I ), RC, RS, RA )
+            D( I ) = RA
+            IF( I.LT.N ) THEN
+               E( I ) = RS*AB( 1, I+1 )
+               AB( 1, I+1 ) = RC*AB( 1, I+1 )
+            END IF
+            IF( WANTQ )
+     $         CALL SROT( M, Q( 1, I ), 1, Q( 1, I+1 ), 1, RC, RS )
+            IF( WANTC )
+     $         CALL SROT( NCC, C( I, 1 ), LDC, C( I+1, 1 ), LDC, RC,
+     $                    RS )
+  100    CONTINUE
+         IF( M.LE.N )
+     $      D( M ) = AB( 1, M )
+      ELSE IF( KU.GT.0 ) THEN
+*
+*        A has been reduced to upper bidiagonal form
+*
+         IF( M.LT.N ) THEN
+*
+*           Annihilate a(m,m+1) by applying plane rotations from the
+*           right, storing diagonal elements in D and off-diagonal
+*           elements in E
+*
+            RB = AB( KU, M+1 )
+            DO 110 I = M, 1, -1
+               CALL SLARTG( AB( KU+1, I ), RB, RC, RS, RA )
+               D( I ) = RA
+               IF( I.GT.1 ) THEN
+                  RB = -RS*AB( KU, I )
+                  E( I-1 ) = RC*AB( KU, I )
+               END IF
+               IF( WANTPT )
+     $            CALL SROT( N, PT( I, 1 ), LDPT, PT( M+1, 1 ), LDPT,
+     $                       RC, RS )
+  110       CONTINUE
+         ELSE
+*
+*           Copy off-diagonal elements to E and diagonal elements to D
+*
+            DO 120 I = 1, MINMN - 1
+               E( I ) = AB( KU, I+1 )
+  120       CONTINUE
+            DO 130 I = 1, MINMN
+               D( I ) = AB( KU+1, I )
+  130       CONTINUE
+         END IF
+      ELSE
+*
+*        A is diagonal. Set elements of E to zero and copy diagonal
+*        elements to D.
+*
+         DO 140 I = 1, MINMN - 1
+            E( I ) = ZERO
+  140    CONTINUE
+         DO 150 I = 1, MINMN
+            D( I ) = AB( 1, I )
+  150    CONTINUE
+      END IF
+      RETURN
+*
+*     End of SGBBRD
+*
+      END
diff --git a/costa/native/external/lapack/sgbcon.f b/costa/native/external/lapack/sgbcon.f
new file mode 100644
index 000000000..cfc387c39
--- /dev/null
+++ b/costa/native/external/lapack/sgbcon.f
@@ -0,0 +1,222 @@
+      SUBROUTINE SGBCON( NORM, N, KL, KU, AB, LDAB, IPIV, ANORM, RCOND,
+     $                   WORK, IWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          NORM
+      INTEGER            INFO, KL, KU, LDAB, N
+      REAL               ANORM, RCOND
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * ), IWORK( * )
+      REAL               AB( LDAB, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SGBCON estimates the reciprocal of the condition number of a real
+*  general band matrix A, in either the 1-norm or the infinity-norm,
+*  using the LU factorization computed by SGBTRF.
+*
+*  An estimate is obtained for norm(inv(A)), and the reciprocal of the
+*  condition number is computed as
+*     RCOND = 1 / ( norm(A) * norm(inv(A)) ).
+*
+*  Arguments
+*  =========
+*
+*  NORM    (input) CHARACTER*1
+*          Specifies whether the 1-norm condition number or the
+*          infinity-norm condition number is required:
+*          = '1' or 'O':  1-norm;
+*          = 'I':         Infinity-norm.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  KL      (input) INTEGER
+*          The number of subdiagonals within the band of A.  KL >= 0.
+*
+*  KU      (input) INTEGER
+*          The number of superdiagonals within the band of A.  KU >= 0.
+*
+*  AB      (input) REAL array, dimension (LDAB,N)
+*          Details of the LU factorization of the band matrix A, as
+*          computed by SGBTRF.  U is stored as an upper triangular band
+*          matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, and
+*          the multipliers used during the factorization are stored in
+*          rows KL+KU+2 to 2*KL+KU+1.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= 2*KL+KU+1.
+*
+*  IPIV    (input) INTEGER array, dimension (N)
+*          The pivot indices; for 1 <= i <= N, row i of the matrix was
+*          interchanged with row IPIV(i).
+*
+*  ANORM   (input) REAL
+*          If NORM = '1' or 'O', the 1-norm of the original matrix A.
+*          If NORM = 'I', the infinity-norm of the original matrix A.
+*
+*  RCOND   (output) REAL
+*          The reciprocal of the condition number of the matrix A,
+*          computed as RCOND = 1/(norm(A) * norm(inv(A))).
+*
+*  WORK    (workspace) REAL array, dimension (3*N)
+*
+*  IWORK   (workspace) INTEGER array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LNOTI, ONENRM
+      CHARACTER          NORMIN
+      INTEGER            IX, J, JP, KASE, KASE1, KD, LM
+      REAL               AINVNM, SCALE, SMLNUM, T
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ISAMAX
+      REAL               SDOT, SLAMCH
+      EXTERNAL           LSAME, ISAMAX, SDOT, SLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SAXPY, SLACON, SLATBS, SRSCL, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      ONENRM = NORM.EQ.'1' .OR. LSAME( NORM, 'O' )
+      IF( .NOT.ONENRM .AND. .NOT.LSAME( NORM, 'I' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( KL.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( KU.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDAB.LT.2*KL+KU+1 ) THEN
+         INFO = -6
+      ELSE IF( ANORM.LT.ZERO ) THEN
+         INFO = -8
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SGBCON', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      RCOND = ZERO
+      IF( N.EQ.0 ) THEN
+         RCOND = ONE
+         RETURN
+      ELSE IF( ANORM.EQ.ZERO ) THEN
+         RETURN
+      END IF
+*
+      SMLNUM = SLAMCH( 'Safe minimum' )
+*
+*     Estimate the norm of inv(A).
+*
+      AINVNM = ZERO
+      NORMIN = 'N'
+      IF( ONENRM ) THEN
+         KASE1 = 1
+      ELSE
+         KASE1 = 2
+      END IF
+      KD = KL + KU + 1
+      LNOTI = KL.GT.0
+      KASE = 0
+   10 CONTINUE
+      CALL SLACON( N, WORK( N+1 ), WORK, IWORK, AINVNM, KASE )
+      IF( KASE.NE.0 ) THEN
+         IF( KASE.EQ.KASE1 ) THEN
+*
+*           Multiply by inv(L).
+*
+            IF( LNOTI ) THEN
+               DO 20 J = 1, N - 1
+                  LM = MIN( KL, N-J )
+                  JP = IPIV( J )
+                  T = WORK( JP )
+                  IF( JP.NE.J ) THEN
+                     WORK( JP ) = WORK( J )
+                     WORK( J ) = T
+                  END IF
+                  CALL SAXPY( LM, -T, AB( KD+1, J ), 1, WORK( J+1 ), 1 )
+   20          CONTINUE
+            END IF
+*
+*           Multiply by inv(U).
+*
+            CALL SLATBS( 'Upper', 'No transpose', 'Non-unit', NORMIN, N,
+     $                   KL+KU, AB, LDAB, WORK, SCALE, WORK( 2*N+1 ),
+     $                   INFO )
+         ELSE
+*
+*           Multiply by inv(U').
+*
+            CALL SLATBS( 'Upper', 'Transpose', 'Non-unit', NORMIN, N,
+     $                   KL+KU, AB, LDAB, WORK, SCALE, WORK( 2*N+1 ),
+     $                   INFO )
+*
+*           Multiply by inv(L').
+*
+            IF( LNOTI ) THEN
+               DO 30 J = N - 1, 1, -1
+                  LM = MIN( KL, N-J )
+                  WORK( J ) = WORK( J ) - SDOT( LM, AB( KD+1, J ), 1,
+     $                        WORK( J+1 ), 1 )
+                  JP = IPIV( J )
+                  IF( JP.NE.J ) THEN
+                     T = WORK( JP )
+                     WORK( JP ) = WORK( J )
+                     WORK( J ) = T
+                  END IF
+   30          CONTINUE
+            END IF
+         END IF
+*
+*        Divide X by 1/SCALE if doing so will not cause overflow.
+*
+         NORMIN = 'Y'
+         IF( SCALE.NE.ONE ) THEN
+            IX = ISAMAX( N, WORK, 1 )
+            IF( SCALE.LT.ABS( WORK( IX ) )*SMLNUM .OR. SCALE.EQ.ZERO )
+     $         GO TO 40
+            CALL SRSCL( N, SCALE, WORK, 1 )
+         END IF
+         GO TO 10
+      END IF
+*
+*     Compute the estimate of the reciprocal condition number.
+*
+      IF( AINVNM.NE.ZERO )
+     $   RCOND = ( ONE / AINVNM ) / ANORM
+*
+   40 CONTINUE
+      RETURN
+*
+*     End of SGBCON
+*
+      END
diff --git a/costa/native/external/lapack/sgbequ.f b/costa/native/external/lapack/sgbequ.f
new file mode 100644
index 000000000..d354045e6
--- /dev/null
+++ b/costa/native/external/lapack/sgbequ.f
@@ -0,0 +1,240 @@
+      SUBROUTINE SGBEQU( M, N, KL, KU, AB, LDAB, R, C, ROWCND, COLCND,
+     $                   AMAX, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993 
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, KL, KU, LDAB, M, N
+      REAL               AMAX, COLCND, ROWCND
+*     ..
+*     .. Array Arguments ..
+      REAL               AB( LDAB, * ), C( * ), R( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SGBEQU computes row and column scalings intended to equilibrate an
+*  M-by-N band matrix A and reduce its condition number.  R returns the
+*  row scale factors and C the column scale factors, chosen to try to
+*  make the largest element in each row and column of the matrix B with
+*  elements B(i,j)=R(i)*A(i,j)*C(j) have absolute value 1.
+*
+*  R(i) and C(j) are restricted to be between SMLNUM = smallest safe
+*  number and BIGNUM = largest safe number.  Use of these scaling
+*  factors is not guaranteed to reduce the condition number of A but
+*  works well in practice.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  KL      (input) INTEGER
+*          The number of subdiagonals within the band of A.  KL >= 0.
+*
+*  KU      (input) INTEGER
+*          The number of superdiagonals within the band of A.  KU >= 0.
+*
+*  AB      (input) REAL array, dimension (LDAB,N)
+*          The band matrix A, stored in rows 1 to KL+KU+1.  The j-th
+*          column of A is stored in the j-th column of the array AB as
+*          follows:
+*          AB(ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl).
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KL+KU+1.
+*
+*  R       (output) REAL array, dimension (M)
+*          If INFO = 0, or INFO > M, R contains the row scale factors
+*          for A.
+*
+*  C       (output) REAL array, dimension (N)
+*          If INFO = 0, C contains the column scale factors for A.
+*
+*  ROWCND  (output) REAL
+*          If INFO = 0 or INFO > M, ROWCND contains the ratio of the
+*          smallest R(i) to the largest R(i).  If ROWCND >= 0.1 and
+*          AMAX is neither too large nor too small, it is not worth
+*          scaling by R.
+*
+*  COLCND  (output) REAL
+*          If INFO = 0, COLCND contains the ratio of the smallest
+*          C(i) to the largest C(i).  If COLCND >= 0.1, it is not
+*          worth scaling by C.
+*
+*  AMAX    (output) REAL
+*          Absolute value of largest matrix element.  If AMAX is very
+*          close to overflow or very close to underflow, the matrix
+*          should be scaled.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, and i is
+*                <= M:  the i-th row of A is exactly zero
+*                >  M:  the (i-M)-th column of A is exactly zero
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J, KD
+      REAL               BIGNUM, RCMAX, RCMIN, SMLNUM
+*     ..
+*     .. External Functions ..
+      REAL               SLAMCH
+      EXTERNAL           SLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters
+*
+      INFO = 0
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( KL.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( KU.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDAB.LT.KL+KU+1 ) THEN
+         INFO = -6
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SGBEQU', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 ) THEN
+         ROWCND = ONE
+         COLCND = ONE
+         AMAX = ZERO
+         RETURN
+      END IF
+*
+*     Get machine constants.
+*
+      SMLNUM = SLAMCH( 'S' )
+      BIGNUM = ONE / SMLNUM
+*
+*     Compute row scale factors.
+*
+      DO 10 I = 1, M
+         R( I ) = ZERO
+   10 CONTINUE
+*
+*     Find the maximum element in each row.
+*
+      KD = KU + 1
+      DO 30 J = 1, N
+         DO 20 I = MAX( J-KU, 1 ), MIN( J+KL, M )
+            R( I ) = MAX( R( I ), ABS( AB( KD+I-J, J ) ) )
+   20    CONTINUE
+   30 CONTINUE
+*
+*     Find the maximum and minimum scale factors.
+*
+      RCMIN = BIGNUM
+      RCMAX = ZERO
+      DO 40 I = 1, M
+         RCMAX = MAX( RCMAX, R( I ) )
+         RCMIN = MIN( RCMIN, R( I ) )
+   40 CONTINUE
+      AMAX = RCMAX
+*
+      IF( RCMIN.EQ.ZERO ) THEN
+*
+*        Find the first zero scale factor and return an error code.
+*
+         DO 50 I = 1, M
+            IF( R( I ).EQ.ZERO ) THEN
+               INFO = I
+               RETURN
+            END IF
+   50    CONTINUE
+      ELSE
+*
+*        Invert the scale factors.
+*
+         DO 60 I = 1, M
+            R( I ) = ONE / MIN( MAX( R( I ), SMLNUM ), BIGNUM )
+   60    CONTINUE
+*
+*        Compute ROWCND = min(R(I)) / max(R(I))
+*
+         ROWCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM )
+      END IF
+*
+*     Compute column scale factors
+*
+      DO 70 J = 1, N
+         C( J ) = ZERO
+   70 CONTINUE
+*
+*     Find the maximum element in each column,
+*     assuming the row scaling computed above.
+*
+      KD = KU + 1
+      DO 90 J = 1, N
+         DO 80 I = MAX( J-KU, 1 ), MIN( J+KL, M )
+            C( J ) = MAX( C( J ), ABS( AB( KD+I-J, J ) )*R( I ) )
+   80    CONTINUE
+   90 CONTINUE
+*
+*     Find the maximum and minimum scale factors.
+*
+      RCMIN = BIGNUM
+      RCMAX = ZERO
+      DO 100 J = 1, N
+         RCMIN = MIN( RCMIN, C( J ) )
+         RCMAX = MAX( RCMAX, C( J ) )
+  100 CONTINUE
+*
+      IF( RCMIN.EQ.ZERO ) THEN
+*
+*        Find the first zero scale factor and return an error code.
+*
+         DO 110 J = 1, N
+            IF( C( J ).EQ.ZERO ) THEN
+               INFO = M + J
+               RETURN
+            END IF
+  110    CONTINUE
+      ELSE
+*
+*        Invert the scale factors.
+*
+         DO 120 J = 1, N
+            C( J ) = ONE / MIN( MAX( C( J ), SMLNUM ), BIGNUM )
+  120    CONTINUE
+*
+*        Compute COLCND = min(C(J)) / max(C(J))
+*
+         COLCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM )
+      END IF
+*
+      RETURN
+*
+*     End of SGBEQU
+*
+      END
diff --git a/costa/native/external/lapack/sgbrfs.f b/costa/native/external/lapack/sgbrfs.f
new file mode 100644
index 000000000..ff4d5737d
--- /dev/null
+++ b/costa/native/external/lapack/sgbrfs.f
@@ -0,0 +1,351 @@
+      SUBROUTINE SGBRFS( TRANS, N, KL, KU, NRHS, AB, LDAB, AFB, LDAFB,
+     $                   IPIV, B, LDB, X, LDX, FERR, BERR, WORK, IWORK,
+     $                   INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          TRANS
+      INTEGER            INFO, KL, KU, LDAB, LDAFB, LDB, LDX, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * ), IWORK( * )
+      REAL               AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ),
+     $                   BERR( * ), FERR( * ), WORK( * ), X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SGBRFS improves the computed solution to a system of linear
+*  equations when the coefficient matrix is banded, and provides
+*  error bounds and backward error estimates for the solution.
+*
+*  Arguments
+*  =========
+*
+*  TRANS   (input) CHARACTER*1
+*          Specifies the form of the system of equations:
+*          = 'N':  A * X = B     (No transpose)
+*          = 'T':  A**T * X = B  (Transpose)
+*          = 'C':  A**H * X = B  (Conjugate transpose = Transpose)
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  KL      (input) INTEGER
+*          The number of subdiagonals within the band of A.  KL >= 0.
+*
+*  KU      (input) INTEGER
+*          The number of superdiagonals within the band of A.  KU >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrices B and X.  NRHS >= 0.
+*
+*  AB      (input) REAL array, dimension (LDAB,N)
+*          The original band matrix A, stored in rows 1 to KL+KU+1.
+*          The j-th column of A is stored in the j-th column of the
+*          array AB as follows:
+*          AB(ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(n,j+kl).
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KL+KU+1.
+*
+*  AFB     (input) REAL array, dimension (LDAFB,N)
+*          Details of the LU factorization of the band matrix A, as
+*          computed by SGBTRF.  U is stored as an upper triangular band
+*          matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, and
+*          the multipliers used during the factorization are stored in
+*          rows KL+KU+2 to 2*KL+KU+1.
+*
+*  LDAFB   (input) INTEGER
+*          The leading dimension of the array AFB.  LDAFB >= 2*KL*KU+1.
+*
+*  IPIV    (input) INTEGER array, dimension (N)
+*          The pivot indices from SGBTRF; for 1<=i<=N, row i of the
+*          matrix was interchanged with row IPIV(i).
+*
+*  B       (input) REAL array, dimension (LDB,NRHS)
+*          The right hand side matrix B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  X       (input/output) REAL array, dimension (LDX,NRHS)
+*          On entry, the solution matrix X, as computed by SGBTRS.
+*          On exit, the improved solution matrix X.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(1,N).
+*
+*  FERR    (output) REAL array, dimension (NRHS)
+*          The estimated forward error bound for each solution vector
+*          X(j) (the j-th column of the solution matrix X).
+*          If XTRUE is the true solution corresponding to X(j), FERR(j)
+*          is an estimated upper bound for the magnitude of the largest
+*          element in (X(j) - XTRUE) divided by the magnitude of the
+*          largest element in X(j).  The estimate is as reliable as
+*          the estimate for RCOND, and is almost always a slight
+*          overestimate of the true error.
+*
+*  BERR    (output) REAL array, dimension (NRHS)
+*          The componentwise relative backward error of each solution
+*          vector X(j) (i.e., the smallest relative change in
+*          any element of A or B that makes X(j) an exact solution).
+*
+*  WORK    (workspace) REAL array, dimension (3*N)
+*
+*  IWORK   (workspace) INTEGER array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Internal Parameters
+*  ===================
+*
+*  ITMAX is the maximum number of steps of iterative refinement.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            ITMAX
+      PARAMETER          ( ITMAX = 5 )
+      REAL               ZERO
+      PARAMETER          ( ZERO = 0.0E+0 )
+      REAL               ONE
+      PARAMETER          ( ONE = 1.0E+0 )
+      REAL               TWO
+      PARAMETER          ( TWO = 2.0E+0 )
+      REAL               THREE
+      PARAMETER          ( THREE = 3.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOTRAN
+      CHARACTER          TRANST
+      INTEGER            COUNT, I, J, K, KASE, KK, NZ
+      REAL               EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SAXPY, SCOPY, SGBMV, SGBTRS, SLACON, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               SLAMCH
+      EXTERNAL           LSAME, SLAMCH
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      NOTRAN = LSAME( TRANS, 'N' )
+      IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT.
+     $    LSAME( TRANS, 'C' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( KL.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( KU.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( LDAB.LT.KL+KU+1 ) THEN
+         INFO = -7
+      ELSE IF( LDAFB.LT.2*KL+KU+1 ) THEN
+         INFO = -9
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -12
+      ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+         INFO = -14
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SGBRFS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN
+         DO 10 J = 1, NRHS
+            FERR( J ) = ZERO
+            BERR( J ) = ZERO
+   10    CONTINUE
+         RETURN
+      END IF
+*
+      IF( NOTRAN ) THEN
+         TRANST = 'T'
+      ELSE
+         TRANST = 'N'
+      END IF
+*
+*     NZ = maximum number of nonzero elements in each row of A, plus 1
+*
+      NZ = MIN( KL+KU+2, N+1 )
+      EPS = SLAMCH( 'Epsilon' )
+      SAFMIN = SLAMCH( 'Safe minimum' )
+      SAFE1 = NZ*SAFMIN
+      SAFE2 = SAFE1 / EPS
+*
+*     Do for each right hand side
+*
+      DO 140 J = 1, NRHS
+*
+         COUNT = 1
+         LSTRES = THREE
+   20    CONTINUE
+*
+*        Loop until stopping criterion is satisfied.
+*
+*        Compute residual R = B - op(A) * X,
+*        where op(A) = A, A**T, or A**H, depending on TRANS.
+*
+         CALL SCOPY( N, B( 1, J ), 1, WORK( N+1 ), 1 )
+         CALL SGBMV( TRANS, N, N, KL, KU, -ONE, AB, LDAB, X( 1, J ), 1,
+     $               ONE, WORK( N+1 ), 1 )
+*
+*        Compute componentwise relative backward error from formula
+*
+*        max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) )
+*
+*        where abs(Z) is the componentwise absolute value of the matrix
+*        or vector Z.  If the i-th component of the denominator is less
+*        than SAFE2, then SAFE1 is added to the i-th components of the
+*        numerator and denominator before dividing.
+*
+         DO 30 I = 1, N
+            WORK( I ) = ABS( B( I, J ) )
+   30    CONTINUE
+*
+*        Compute abs(op(A))*abs(X) + abs(B).
+*
+         IF( NOTRAN ) THEN
+            DO 50 K = 1, N
+               KK = KU + 1 - K
+               XK = ABS( X( K, J ) )
+               DO 40 I = MAX( 1, K-KU ), MIN( N, K+KL )
+                  WORK( I ) = WORK( I ) + ABS( AB( KK+I, K ) )*XK
+   40          CONTINUE
+   50       CONTINUE
+         ELSE
+            DO 70 K = 1, N
+               S = ZERO
+               KK = KU + 1 - K
+               DO 60 I = MAX( 1, K-KU ), MIN( N, K+KL )
+                  S = S + ABS( AB( KK+I, K ) )*ABS( X( I, J ) )
+   60          CONTINUE
+               WORK( K ) = WORK( K ) + S
+   70       CONTINUE
+         END IF
+         S = ZERO
+         DO 80 I = 1, N
+            IF( WORK( I ).GT.SAFE2 ) THEN
+               S = MAX( S, ABS( WORK( N+I ) ) / WORK( I ) )
+            ELSE
+               S = MAX( S, ( ABS( WORK( N+I ) )+SAFE1 ) /
+     $             ( WORK( I )+SAFE1 ) )
+            END IF
+   80    CONTINUE
+         BERR( J ) = S
+*
+*        Test stopping criterion. Continue iterating if
+*           1) The residual BERR(J) is larger than machine epsilon, and
+*           2) BERR(J) decreased by at least a factor of 2 during the
+*              last iteration, and
+*           3) At most ITMAX iterations tried.
+*
+         IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND.
+     $       COUNT.LE.ITMAX ) THEN
+*
+*           Update solution and try again.
+*
+            CALL SGBTRS( TRANS, N, KL, KU, 1, AFB, LDAFB, IPIV,
+     $                   WORK( N+1 ), N, INFO )
+            CALL SAXPY( N, ONE, WORK( N+1 ), 1, X( 1, J ), 1 )
+            LSTRES = BERR( J )
+            COUNT = COUNT + 1
+            GO TO 20
+         END IF
+*
+*        Bound error from formula
+*
+*        norm(X - XTRUE) / norm(X) .le. FERR =
+*        norm( abs(inv(op(A)))*
+*           ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X)
+*
+*        where
+*          norm(Z) is the magnitude of the largest component of Z
+*          inv(op(A)) is the inverse of op(A)
+*          abs(Z) is the componentwise absolute value of the matrix or
+*             vector Z
+*          NZ is the maximum number of nonzeros in any row of A, plus 1
+*          EPS is machine epsilon
+*
+*        The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B))
+*        is incremented by SAFE1 if the i-th component of
+*        abs(op(A))*abs(X) + abs(B) is less than SAFE2.
+*
+*        Use SLACON to estimate the infinity-norm of the matrix
+*           inv(op(A)) * diag(W),
+*        where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) )))
+*
+         DO 90 I = 1, N
+            IF( WORK( I ).GT.SAFE2 ) THEN
+               WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I )
+            ELSE
+               WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + SAFE1
+            END IF
+   90    CONTINUE
+*
+         KASE = 0
+  100    CONTINUE
+         CALL SLACON( N, WORK( 2*N+1 ), WORK( N+1 ), IWORK, FERR( J ),
+     $                KASE )
+         IF( KASE.NE.0 ) THEN
+            IF( KASE.EQ.1 ) THEN
+*
+*              Multiply by diag(W)*inv(op(A)**T).
+*
+               CALL SGBTRS( TRANST, N, KL, KU, 1, AFB, LDAFB, IPIV,
+     $                      WORK( N+1 ), N, INFO )
+               DO 110 I = 1, N
+                  WORK( N+I ) = WORK( N+I )*WORK( I )
+  110          CONTINUE
+            ELSE
+*
+*              Multiply by inv(op(A))*diag(W).
+*
+               DO 120 I = 1, N
+                  WORK( N+I ) = WORK( N+I )*WORK( I )
+  120          CONTINUE
+               CALL SGBTRS( TRANS, N, KL, KU, 1, AFB, LDAFB, IPIV,
+     $                      WORK( N+1 ), N, INFO )
+            END IF
+            GO TO 100
+         END IF
+*
+*        Normalize error.
+*
+         LSTRES = ZERO
+         DO 130 I = 1, N
+            LSTRES = MAX( LSTRES, ABS( X( I, J ) ) )
+  130    CONTINUE
+         IF( LSTRES.NE.ZERO )
+     $      FERR( J ) = FERR( J ) / LSTRES
+*
+  140 CONTINUE
+*
+      RETURN
+*
+*     End of SGBRFS
+*
+      END
diff --git a/costa/native/external/lapack/sgbsv.f b/costa/native/external/lapack/sgbsv.f
new file mode 100644
index 000000000..17dd5ea0d
--- /dev/null
+++ b/costa/native/external/lapack/sgbsv.f
@@ -0,0 +1,143 @@
+      SUBROUTINE SGBSV( N, KL, KU, NRHS, AB, LDAB, IPIV, B, LDB, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993 
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, KL, KU, LDAB, LDB, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      REAL               AB( LDAB, * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SGBSV computes the solution to a real system of linear equations
+*  A * X = B, where A is a band matrix of order N with KL subdiagonals
+*  and KU superdiagonals, and X and B are N-by-NRHS matrices.
+*
+*  The LU decomposition with partial pivoting and row interchanges is
+*  used to factor A as A = L * U, where L is a product of permutation
+*  and unit lower triangular matrices with KL subdiagonals, and U is
+*  upper triangular with KL+KU superdiagonals.  The factored form of A
+*  is then used to solve the system of equations A * X = B.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The number of linear equations, i.e., the order of the
+*          matrix A.  N >= 0.
+*
+*  KL      (input) INTEGER
+*          The number of subdiagonals within the band of A.  KL >= 0.
+*
+*  KU      (input) INTEGER
+*          The number of superdiagonals within the band of A.  KU >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  AB      (input/output) REAL array, dimension (LDAB,N)
+*          On entry, the matrix A in band storage, in rows KL+1 to
+*          2*KL+KU+1; rows 1 to KL of the array need not be set.
+*          The j-th column of A is stored in the j-th column of the
+*          array AB as follows:
+*          AB(KL+KU+1+i-j,j) = A(i,j) for max(1,j-KU)<=i<=min(N,j+KL)
+*          On exit, details of the factorization: U is stored as an
+*          upper triangular band matrix with KL+KU superdiagonals in
+*          rows 1 to KL+KU+1, and the multipliers used during the
+*          factorization are stored in rows KL+KU+2 to 2*KL+KU+1.
+*          See below for further details.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= 2*KL+KU+1.
+*
+*  IPIV    (output) INTEGER array, dimension (N)
+*          The pivot indices that define the permutation matrix P;
+*          row i of the matrix was interchanged with row IPIV(i).
+*
+*  B       (input/output) REAL array, dimension (LDB,NRHS)
+*          On entry, the N-by-NRHS right hand side matrix B.
+*          On exit, if INFO = 0, the N-by-NRHS solution matrix X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, U(i,i) is exactly zero.  The factorization
+*                has been completed, but the factor U is exactly
+*                singular, and the solution has not been computed.
+*
+*  Further Details
+*  ===============
+*
+*  The band storage scheme is illustrated by the following example, when
+*  M = N = 6, KL = 2, KU = 1:
+*
+*  On entry:                       On exit:
+*
+*      *    *    *    +    +    +       *    *    *   u14  u25  u36
+*      *    *    +    +    +    +       *    *   u13  u24  u35  u46
+*      *   a12  a23  a34  a45  a56      *   u12  u23  u34  u45  u56
+*     a11  a22  a33  a44  a55  a66     u11  u22  u33  u44  u55  u66
+*     a21  a32  a43  a54  a65   *      m21  m32  m43  m54  m65   *
+*     a31  a42  a53  a64   *    *      m31  m42  m53  m64   *    *
+*
+*  Array elements marked * are not used by the routine; elements marked
+*  + need not be set on entry, but are required by the routine to store
+*  elements of U because of fill-in resulting from the row interchanges.
+*
+*  =====================================================================
+*
+*     .. External Subroutines ..
+      EXTERNAL           SGBTRF, SGBTRS, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( N.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( KL.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( KU.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDAB.LT.2*KL+KU+1 ) THEN
+         INFO = -6
+      ELSE IF( LDB.LT.MAX( N, 1 ) ) THEN
+         INFO = -9
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SGBSV ', -INFO )
+         RETURN
+      END IF
+*
+*     Compute the LU factorization of the band matrix A.
+*
+      CALL SGBTRF( N, N, KL, KU, AB, LDAB, IPIV, INFO )
+      IF( INFO.EQ.0 ) THEN
+*
+*        Solve the system A*X = B, overwriting B with X.
+*
+         CALL SGBTRS( 'No transpose', N, KL, KU, NRHS, AB, LDAB, IPIV,
+     $                B, LDB, INFO )
+      END IF
+      RETURN
+*
+*     End of SGBSV
+*
+      END
diff --git a/costa/native/external/lapack/sgbsvx.f b/costa/native/external/lapack/sgbsvx.f
new file mode 100644
index 000000000..62037dc6f
--- /dev/null
+++ b/costa/native/external/lapack/sgbsvx.f
@@ -0,0 +1,517 @@
+      SUBROUTINE SGBSVX( FACT, TRANS, N, KL, KU, NRHS, AB, LDAB, AFB,
+     $                   LDAFB, IPIV, EQUED, R, C, B, LDB, X, LDX,
+     $                   RCOND, FERR, BERR, WORK, IWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          EQUED, FACT, TRANS
+      INTEGER            INFO, KL, KU, LDAB, LDAFB, LDB, LDX, N, NRHS
+      REAL               RCOND
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * ), IWORK( * )
+      REAL               AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ),
+     $                   BERR( * ), C( * ), FERR( * ), R( * ),
+     $                   WORK( * ), X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SGBSVX uses the LU factorization to compute the solution to a real
+*  system of linear equations A * X = B, A**T * X = B, or A**H * X = B,
+*  where A is a band matrix of order N with KL subdiagonals and KU
+*  superdiagonals, and X and B are N-by-NRHS matrices.
+*
+*  Error bounds on the solution and a condition estimate are also
+*  provided.
+*
+*  Description
+*  ===========
+*
+*  The following steps are performed by this subroutine:
+*
+*  1. If FACT = 'E', real scaling factors are computed to equilibrate
+*     the system:
+*        TRANS = 'N':  diag(R)*A*diag(C)     *inv(diag(C))*X = diag(R)*B
+*        TRANS = 'T': (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B
+*        TRANS = 'C': (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B
+*     Whether or not the system will be equilibrated depends on the
+*     scaling of the matrix A, but if equilibration is used, A is
+*     overwritten by diag(R)*A*diag(C) and B by diag(R)*B (if TRANS='N')
+*     or diag(C)*B (if TRANS = 'T' or 'C').
+*
+*  2. If FACT = 'N' or 'E', the LU decomposition is used to factor the
+*     matrix A (after equilibration if FACT = 'E') as
+*        A = L * U,
+*     where L is a product of permutation and unit lower triangular
+*     matrices with KL subdiagonals, and U is upper triangular with
+*     KL+KU superdiagonals.
+*
+*  3. If some U(i,i)=0, so that U is exactly singular, then the routine
+*     returns with INFO = i. Otherwise, the factored form of A is used
+*     to estimate the condition number of the matrix A.  If the
+*     reciprocal of the condition number is less than machine precision,
+*     INFO = N+1 is returned as a warning, but the routine still goes on
+*     to solve for X and compute error bounds as described below.
+*
+*  4. The system of equations is solved for X using the factored form
+*     of A.
+*
+*  5. Iterative refinement is applied to improve the computed solution
+*     matrix and calculate error bounds and backward error estimates
+*     for it.
+*
+*  6. If equilibration was used, the matrix X is premultiplied by
+*     diag(C) (if TRANS = 'N') or diag(R) (if TRANS = 'T' or 'C') so
+*     that it solves the original system before equilibration.
+*
+*  Arguments
+*  =========
+*
+*  FACT    (input) CHARACTER*1
+*          Specifies whether or not the factored form of the matrix A is
+*          supplied on entry, and if not, whether the matrix A should be
+*          equilibrated before it is factored.
+*          = 'F':  On entry, AFB and IPIV contain the factored form of
+*                  A.  If EQUED is not 'N', the matrix A has been
+*                  equilibrated with scaling factors given by R and C.
+*                  AB, AFB, and IPIV are not modified.
+*          = 'N':  The matrix A will be copied to AFB and factored.
+*          = 'E':  The matrix A will be equilibrated if necessary, then
+*                  copied to AFB and factored.
+*
+*  TRANS   (input) CHARACTER*1
+*          Specifies the form of the system of equations.
+*          = 'N':  A * X = B     (No transpose)
+*          = 'T':  A**T * X = B  (Transpose)
+*          = 'C':  A**H * X = B  (Transpose)
+*
+*  N       (input) INTEGER
+*          The number of linear equations, i.e., the order of the
+*          matrix A.  N >= 0.
+*
+*  KL      (input) INTEGER
+*          The number of subdiagonals within the band of A.  KL >= 0.
+*
+*  KU      (input) INTEGER
+*          The number of superdiagonals within the band of A.  KU >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrices B and X.  NRHS >= 0.
+*
+*  AB      (input/output) REAL array, dimension (LDAB,N)
+*          On entry, the matrix A in band storage, in rows 1 to KL+KU+1.
+*          The j-th column of A is stored in the j-th column of the
+*          array AB as follows:
+*          AB(KU+1+i-j,j) = A(i,j) for max(1,j-KU)<=i<=min(N,j+kl)
+*
+*          If FACT = 'F' and EQUED is not 'N', then A must have been
+*          equilibrated by the scaling factors in R and/or C.  AB is not
+*          modified if FACT = 'F' or 'N', or if FACT = 'E' and
+*          EQUED = 'N' on exit.
+*
+*          On exit, if EQUED .ne. 'N', A is scaled as follows:
+*          EQUED = 'R':  A := diag(R) * A
+*          EQUED = 'C':  A := A * diag(C)
+*          EQUED = 'B':  A := diag(R) * A * diag(C).
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KL+KU+1.
+*
+*  AFB     (input or output) REAL array, dimension (LDAFB,N)
+*          If FACT = 'F', then AFB is an input argument and on entry
+*          contains details of the LU factorization of the band matrix
+*          A, as computed by SGBTRF.  U is stored as an upper triangular
+*          band matrix with KL+KU superdiagonals in rows 1 to KL+KU+1,
+*          and the multipliers used during the factorization are stored
+*          in rows KL+KU+2 to 2*KL+KU+1.  If EQUED .ne. 'N', then AFB is
+*          the factored form of the equilibrated matrix A.
+*
+*          If FACT = 'N', then AFB is an output argument and on exit
+*          returns details of the LU factorization of A.
+*
+*          If FACT = 'E', then AFB is an output argument and on exit
+*          returns details of the LU factorization of the equilibrated
+*          matrix A (see the description of AB for the form of the
+*          equilibrated matrix).
+*
+*  LDAFB   (input) INTEGER
+*          The leading dimension of the array AFB.  LDAFB >= 2*KL+KU+1.
+*
+*  IPIV    (input or output) INTEGER array, dimension (N)
+*          If FACT = 'F', then IPIV is an input argument and on entry
+*          contains the pivot indices from the factorization A = L*U
+*          as computed by SGBTRF; row i of the matrix was interchanged
+*          with row IPIV(i).
+*
+*          If FACT = 'N', then IPIV is an output argument and on exit
+*          contains the pivot indices from the factorization A = L*U
+*          of the original matrix A.
+*
+*          If FACT = 'E', then IPIV is an output argument and on exit
+*          contains the pivot indices from the factorization A = L*U
+*          of the equilibrated matrix A.
+*
+*  EQUED   (input or output) CHARACTER*1
+*          Specifies the form of equilibration that was done.
+*          = 'N':  No equilibration (always true if FACT = 'N').
+*          = 'R':  Row equilibration, i.e., A has been premultiplied by
+*                  diag(R).
+*          = 'C':  Column equilibration, i.e., A has been postmultiplied
+*                  by diag(C).
+*          = 'B':  Both row and column equilibration, i.e., A has been
+*                  replaced by diag(R) * A * diag(C).
+*          EQUED is an input argument if FACT = 'F'; otherwise, it is an
+*          output argument.
+*
+*  R       (input or output) REAL array, dimension (N)
+*          The row scale factors for A.  If EQUED = 'R' or 'B', A is
+*          multiplied on the left by diag(R); if EQUED = 'N' or 'C', R
+*          is not accessed.  R is an input argument if FACT = 'F';
+*          otherwise, R is an output argument.  If FACT = 'F' and
+*          EQUED = 'R' or 'B', each element of R must be positive.
+*
+*  C       (input or output) REAL array, dimension (N)
+*          The column scale factors for A.  If EQUED = 'C' or 'B', A is
+*          multiplied on the right by diag(C); if EQUED = 'N' or 'R', C
+*          is not accessed.  C is an input argument if FACT = 'F';
+*          otherwise, C is an output argument.  If FACT = 'F' and
+*          EQUED = 'C' or 'B', each element of C must be positive.
+*
+*  B       (input/output) REAL array, dimension (LDB,NRHS)
+*          On entry, the right hand side matrix B.
+*          On exit,
+*          if EQUED = 'N', B is not modified;
+*          if TRANS = 'N' and EQUED = 'R' or 'B', B is overwritten by
+*          diag(R)*B;
+*          if TRANS = 'T' or 'C' and EQUED = 'C' or 'B', B is
+*          overwritten by diag(C)*B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  X       (output) REAL array, dimension (LDX,NRHS)
+*          If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X
+*          to the original system of equations.  Note that A and B are
+*          modified on exit if EQUED .ne. 'N', and the solution to the
+*          equilibrated system is inv(diag(C))*X if TRANS = 'N' and
+*          EQUED = 'C' or 'B', or inv(diag(R))*X if TRANS = 'T' or 'C'
+*          and EQUED = 'R' or 'B'.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(1,N).
+*
+*  RCOND   (output) REAL
+*          The estimate of the reciprocal condition number of the matrix
+*          A after equilibration (if done).  If RCOND is less than the
+*          machine precision (in particular, if RCOND = 0), the matrix
+*          is singular to working precision.  This condition is
+*          indicated by a return code of INFO > 0.
+*
+*  FERR    (output) REAL array, dimension (NRHS)
+*          The estimated forward error bound for each solution vector
+*          X(j) (the j-th column of the solution matrix X).
+*          If XTRUE is the true solution corresponding to X(j), FERR(j)
+*          is an estimated upper bound for the magnitude of the largest
+*          element in (X(j) - XTRUE) divided by the magnitude of the
+*          largest element in X(j).  The estimate is as reliable as
+*          the estimate for RCOND, and is almost always a slight
+*          overestimate of the true error.
+*
+*  BERR    (output) REAL array, dimension (NRHS)
+*          The componentwise relative backward error of each solution
+*          vector X(j) (i.e., the smallest relative change in
+*          any element of A or B that makes X(j) an exact solution).
+*
+*  WORK    (workspace/output) REAL array, dimension (3*N)
+*          On exit, WORK(1) contains the reciprocal pivot growth
+*          factor norm(A)/norm(U). The "max absolute element" norm is
+*          used. If WORK(1) is much less than 1, then the stability
+*          of the LU factorization of the (equilibrated) matrix A
+*          could be poor. This also means that the solution X, condition
+*          estimator RCOND, and forward error bound FERR could be
+*          unreliable. If factorization fails with 0<INFO<=N, then
+*          WORK(1) contains the reciprocal pivot growth factor for the
+*          leading INFO columns of A.
+*
+*  IWORK   (workspace) INTEGER array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, and i is
+*                <= N:  U(i,i) is exactly zero.  The factorization
+*                       has been completed, but the factor U is exactly
+*                       singular, so the solution and error bounds
+*                       could not be computed. RCOND = 0 is returned.
+*                = N+1: U is nonsingular, but RCOND is less than machine
+*                       precision, meaning that the matrix is singular
+*                       to working precision.  Nevertheless, the
+*                       solution and error bounds are computed because
+*                       there are a number of situations where the
+*                       computed solution can be more accurate than the
+*                       value of RCOND would suggest.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            COLEQU, EQUIL, NOFACT, NOTRAN, ROWEQU
+      CHARACTER          NORM
+      INTEGER            I, INFEQU, J, J1, J2
+      REAL               AMAX, ANORM, BIGNUM, COLCND, RCMAX, RCMIN,
+     $                   ROWCND, RPVGRW, SMLNUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               SLAMCH, SLANGB, SLANTB
+      EXTERNAL           LSAME, SLAMCH, SLANGB, SLANTB
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SCOPY, SGBCON, SGBEQU, SGBRFS, SGBTRF, SGBTRS,
+     $                   SLACPY, SLAQGB, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+      NOFACT = LSAME( FACT, 'N' )
+      EQUIL = LSAME( FACT, 'E' )
+      NOTRAN = LSAME( TRANS, 'N' )
+      IF( NOFACT .OR. EQUIL ) THEN
+         EQUED = 'N'
+         ROWEQU = .FALSE.
+         COLEQU = .FALSE.
+      ELSE
+         ROWEQU = LSAME( EQUED, 'R' ) .OR. LSAME( EQUED, 'B' )
+         COLEQU = LSAME( EQUED, 'C' ) .OR. LSAME( EQUED, 'B' )
+         SMLNUM = SLAMCH( 'Safe minimum' )
+         BIGNUM = ONE / SMLNUM
+      END IF
+*
+*     Test the input parameters.
+*
+      IF( .NOT.NOFACT .AND. .NOT.EQUIL .AND. .NOT.LSAME( FACT, 'F' ) )
+     $     THEN
+         INFO = -1
+      ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT.
+     $         LSAME( TRANS, 'C' ) ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( KL.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( KU.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -6
+      ELSE IF( LDAB.LT.KL+KU+1 ) THEN
+         INFO = -8
+      ELSE IF( LDAFB.LT.2*KL+KU+1 ) THEN
+         INFO = -10
+      ELSE IF( LSAME( FACT, 'F' ) .AND. .NOT.
+     $         ( ROWEQU .OR. COLEQU .OR. LSAME( EQUED, 'N' ) ) ) THEN
+         INFO = -12
+      ELSE
+         IF( ROWEQU ) THEN
+            RCMIN = BIGNUM
+            RCMAX = ZERO
+            DO 10 J = 1, N
+               RCMIN = MIN( RCMIN, R( J ) )
+               RCMAX = MAX( RCMAX, R( J ) )
+   10       CONTINUE
+            IF( RCMIN.LE.ZERO ) THEN
+               INFO = -13
+            ELSE IF( N.GT.0 ) THEN
+               ROWCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM )
+            ELSE
+               ROWCND = ONE
+            END IF
+         END IF
+         IF( COLEQU .AND. INFO.EQ.0 ) THEN
+            RCMIN = BIGNUM
+            RCMAX = ZERO
+            DO 20 J = 1, N
+               RCMIN = MIN( RCMIN, C( J ) )
+               RCMAX = MAX( RCMAX, C( J ) )
+   20       CONTINUE
+            IF( RCMIN.LE.ZERO ) THEN
+               INFO = -14
+            ELSE IF( N.GT.0 ) THEN
+               COLCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM )
+            ELSE
+               COLCND = ONE
+            END IF
+         END IF
+         IF( INFO.EQ.0 ) THEN
+            IF( LDB.LT.MAX( 1, N ) ) THEN
+               INFO = -16
+            ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+               INFO = -18
+            END IF
+         END IF
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SGBSVX', -INFO )
+         RETURN
+      END IF
+*
+      IF( EQUIL ) THEN
+*
+*        Compute row and column scalings to equilibrate the matrix A.
+*
+         CALL SGBEQU( N, N, KL, KU, AB, LDAB, R, C, ROWCND, COLCND,
+     $                AMAX, INFEQU )
+         IF( INFEQU.EQ.0 ) THEN
+*
+*           Equilibrate the matrix.
+*
+            CALL SLAQGB( N, N, KL, KU, AB, LDAB, R, C, ROWCND, COLCND,
+     $                   AMAX, EQUED )
+            ROWEQU = LSAME( EQUED, 'R' ) .OR. LSAME( EQUED, 'B' )
+            COLEQU = LSAME( EQUED, 'C' ) .OR. LSAME( EQUED, 'B' )
+         END IF
+      END IF
+*
+*     Scale the right hand side.
+*
+      IF( NOTRAN ) THEN
+         IF( ROWEQU ) THEN
+            DO 40 J = 1, NRHS
+               DO 30 I = 1, N
+                  B( I, J ) = R( I )*B( I, J )
+   30          CONTINUE
+   40       CONTINUE
+         END IF
+      ELSE IF( COLEQU ) THEN
+         DO 60 J = 1, NRHS
+            DO 50 I = 1, N
+               B( I, J ) = C( I )*B( I, J )
+   50       CONTINUE
+   60    CONTINUE
+      END IF
+*
+      IF( NOFACT .OR. EQUIL ) THEN
+*
+*        Compute the LU factorization of the band matrix A.
+*
+         DO 70 J = 1, N
+            J1 = MAX( J-KU, 1 )
+            J2 = MIN( J+KL, N )
+            CALL SCOPY( J2-J1+1, AB( KU+1-J+J1, J ), 1,
+     $                  AFB( KL+KU+1-J+J1, J ), 1 )
+   70    CONTINUE
+*
+         CALL SGBTRF( N, N, KL, KU, AFB, LDAFB, IPIV, INFO )
+*
+*        Return if INFO is non-zero.
+*
+         IF( INFO.NE.0 ) THEN
+            IF( INFO.GT.0 ) THEN
+*
+*              Compute the reciprocal pivot growth factor of the
+*              leading rank-deficient INFO columns of A.
+*
+               ANORM = ZERO
+               DO 90 J = 1, INFO
+                  DO 80 I = MAX( KU+2-J, 1 ),
+     $                    MIN( N+KU+1-J, KL+KU+1 )
+                     ANORM = MAX( ANORM, ABS( AB( I, J ) ) )
+   80             CONTINUE
+   90          CONTINUE
+               RPVGRW = SLANTB( 'M', 'U', 'N', INFO,
+     $                  MIN( INFO-1, KL+KU ), AFB( MAX( 1,
+     $                  KL+KU+2-INFO ), 1 ), LDAFB, WORK )
+               IF( RPVGRW.EQ.ZERO ) THEN
+                  RPVGRW = ONE
+               ELSE
+                  RPVGRW = ANORM / RPVGRW
+               END IF
+               WORK( 1 ) = RPVGRW
+               RCOND = ZERO
+            END IF
+            RETURN
+         END IF
+      END IF
+*
+*     Compute the norm of the matrix A and the
+*     reciprocal pivot growth factor RPVGRW.
+*
+      IF( NOTRAN ) THEN
+         NORM = '1'
+      ELSE
+         NORM = 'I'
+      END IF
+      ANORM = SLANGB( NORM, N, KL, KU, AB, LDAB, WORK )
+      RPVGRW = SLANTB( 'M', 'U', 'N', N, KL+KU, AFB, LDAFB, WORK )
+      IF( RPVGRW.EQ.ZERO ) THEN
+         RPVGRW = ONE
+      ELSE
+         RPVGRW = SLANGB( 'M', N, KL, KU, AB, LDAB, WORK ) / RPVGRW
+      END IF
+*
+*     Compute the reciprocal of the condition number of A.
+*
+      CALL SGBCON( NORM, N, KL, KU, AFB, LDAFB, IPIV, ANORM, RCOND,
+     $             WORK, IWORK, INFO )
+*
+*     Set INFO = N+1 if the matrix is singular to working precision.
+*
+      IF( RCOND.LT.SLAMCH( 'Epsilon' ) )
+     $   INFO = N + 1
+*
+*     Compute the solution matrix X.
+*
+      CALL SLACPY( 'Full', N, NRHS, B, LDB, X, LDX )
+      CALL SGBTRS( TRANS, N, KL, KU, NRHS, AFB, LDAFB, IPIV, X, LDX,
+     $             INFO )
+*
+*     Use iterative refinement to improve the computed solution and
+*     compute error bounds and backward error estimates for it.
+*
+      CALL SGBRFS( TRANS, N, KL, KU, NRHS, AB, LDAB, AFB, LDAFB, IPIV,
+     $             B, LDB, X, LDX, FERR, BERR, WORK, IWORK, INFO )
+*
+*     Transform the solution matrix X to a solution of the original
+*     system.
+*
+      IF( NOTRAN ) THEN
+         IF( COLEQU ) THEN
+            DO 110 J = 1, NRHS
+               DO 100 I = 1, N
+                  X( I, J ) = C( I )*X( I, J )
+  100          CONTINUE
+  110       CONTINUE
+            DO 120 J = 1, NRHS
+               FERR( J ) = FERR( J ) / COLCND
+  120       CONTINUE
+         END IF
+      ELSE IF( ROWEQU ) THEN
+         DO 140 J = 1, NRHS
+            DO 130 I = 1, N
+               X( I, J ) = R( I )*X( I, J )
+  130       CONTINUE
+  140    CONTINUE
+         DO 150 J = 1, NRHS
+            FERR( J ) = FERR( J ) / ROWCND
+  150    CONTINUE
+      END IF
+*
+      WORK( 1 ) = RPVGRW
+      RETURN
+*
+*     End of SGBSVX
+*
+      END
diff --git a/costa/native/external/lapack/sgbtf2.f b/costa/native/external/lapack/sgbtf2.f
new file mode 100644
index 000000000..1c6da88db
--- /dev/null
+++ b/costa/native/external/lapack/sgbtf2.f
@@ -0,0 +1,203 @@
+      SUBROUTINE SGBTF2( M, N, KL, KU, AB, LDAB, IPIV, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, KL, KU, LDAB, M, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      REAL               AB( LDAB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SGBTF2 computes an LU factorization of a real m-by-n band matrix A
+*  using partial pivoting with row interchanges.
+*
+*  This is the unblocked version of the algorithm, calling Level 2 BLAS.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  KL      (input) INTEGER
+*          The number of subdiagonals within the band of A.  KL >= 0.
+*
+*  KU      (input) INTEGER
+*          The number of superdiagonals within the band of A.  KU >= 0.
+*
+*  AB      (input/output) REAL array, dimension (LDAB,N)
+*          On entry, the matrix A in band storage, in rows KL+1 to
+*          2*KL+KU+1; rows 1 to KL of the array need not be set.
+*          The j-th column of A is stored in the j-th column of the
+*          array AB as follows:
+*          AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl)
+*
+*          On exit, details of the factorization: U is stored as an
+*          upper triangular band matrix with KL+KU superdiagonals in
+*          rows 1 to KL+KU+1, and the multipliers used during the
+*          factorization are stored in rows KL+KU+2 to 2*KL+KU+1.
+*          See below for further details.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= 2*KL+KU+1.
+*
+*  IPIV    (output) INTEGER array, dimension (min(M,N))
+*          The pivot indices; for 1 <= i <= min(M,N), row i of the
+*          matrix was interchanged with row IPIV(i).
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*          > 0: if INFO = +i, U(i,i) is exactly zero. The factorization
+*               has been completed, but the factor U is exactly
+*               singular, and division by zero will occur if it is used
+*               to solve a system of equations.
+*
+*  Further Details
+*  ===============
+*
+*  The band storage scheme is illustrated by the following example, when
+*  M = N = 6, KL = 2, KU = 1:
+*
+*  On entry:                       On exit:
+*
+*      *    *    *    +    +    +       *    *    *   u14  u25  u36
+*      *    *    +    +    +    +       *    *   u13  u24  u35  u46
+*      *   a12  a23  a34  a45  a56      *   u12  u23  u34  u45  u56
+*     a11  a22  a33  a44  a55  a66     u11  u22  u33  u44  u55  u66
+*     a21  a32  a43  a54  a65   *      m21  m32  m43  m54  m65   *
+*     a31  a42  a53  a64   *    *      m31  m42  m53  m64   *    *
+*
+*  Array elements marked * are not used by the routine; elements marked
+*  + need not be set on entry, but are required by the routine to store
+*  elements of U, because of fill-in resulting from the row
+*  interchanges.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J, JP, JU, KM, KV
+*     ..
+*     .. External Functions ..
+      INTEGER            ISAMAX
+      EXTERNAL           ISAMAX
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SGER, SSCAL, SSWAP, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     KV is the number of superdiagonals in the factor U, allowing for
+*     fill-in.
+*
+      KV = KU + KL
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( KL.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( KU.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDAB.LT.KL+KV+1 ) THEN
+         INFO = -6
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SGBTF2', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 )
+     $   RETURN
+*
+*     Gaussian elimination with partial pivoting
+*
+*     Set fill-in elements in columns KU+2 to KV to zero.
+*
+      DO 20 J = KU + 2, MIN( KV, N )
+         DO 10 I = KV - J + 2, KL
+            AB( I, J ) = ZERO
+   10    CONTINUE
+   20 CONTINUE
+*
+*     JU is the index of the last column affected by the current stage
+*     of the factorization.
+*
+      JU = 1
+*
+      DO 40 J = 1, MIN( M, N )
+*
+*        Set fill-in elements in column J+KV to zero.
+*
+         IF( J+KV.LE.N ) THEN
+            DO 30 I = 1, KL
+               AB( I, J+KV ) = ZERO
+   30       CONTINUE
+         END IF
+*
+*        Find pivot and test for singularity. KM is the number of
+*        subdiagonal elements in the current column.
+*
+         KM = MIN( KL, M-J )
+         JP = ISAMAX( KM+1, AB( KV+1, J ), 1 )
+         IPIV( J ) = JP + J - 1
+         IF( AB( KV+JP, J ).NE.ZERO ) THEN
+            JU = MAX( JU, MIN( J+KU+JP-1, N ) )
+*
+*           Apply interchange to columns J to JU.
+*
+            IF( JP.NE.1 )
+     $         CALL SSWAP( JU-J+1, AB( KV+JP, J ), LDAB-1,
+     $                     AB( KV+1, J ), LDAB-1 )
+*
+            IF( KM.GT.0 ) THEN
+*
+*              Compute multipliers.
+*
+               CALL SSCAL( KM, ONE / AB( KV+1, J ), AB( KV+2, J ), 1 )
+*
+*              Update trailing submatrix within the band.
+*
+               IF( JU.GT.J )
+     $            CALL SGER( KM, JU-J, -ONE, AB( KV+2, J ), 1,
+     $                       AB( KV, J+1 ), LDAB-1, AB( KV+1, J+1 ),
+     $                       LDAB-1 )
+            END IF
+         ELSE
+*
+*           If pivot is zero, set INFO to the index of the pivot
+*           unless a zero pivot has already been found.
+*
+            IF( INFO.EQ.0 )
+     $         INFO = J
+         END IF
+   40 CONTINUE
+      RETURN
+*
+*     End of SGBTF2
+*
+      END
diff --git a/costa/native/external/lapack/sgbtrf.f b/costa/native/external/lapack/sgbtrf.f
new file mode 100644
index 000000000..45c628b05
--- /dev/null
+++ b/costa/native/external/lapack/sgbtrf.f
@@ -0,0 +1,442 @@
+      SUBROUTINE SGBTRF( M, N, KL, KU, AB, LDAB, IPIV, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, KL, KU, LDAB, M, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      REAL               AB( LDAB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SGBTRF computes an LU factorization of a real m-by-n band matrix A
+*  using partial pivoting with row interchanges.
+*
+*  This is the blocked version of the algorithm, calling Level 3 BLAS.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  KL      (input) INTEGER
+*          The number of subdiagonals within the band of A.  KL >= 0.
+*
+*  KU      (input) INTEGER
+*          The number of superdiagonals within the band of A.  KU >= 0.
+*
+*  AB      (input/output) REAL array, dimension (LDAB,N)
+*          On entry, the matrix A in band storage, in rows KL+1 to
+*          2*KL+KU+1; rows 1 to KL of the array need not be set.
+*          The j-th column of A is stored in the j-th column of the
+*          array AB as follows:
+*          AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl)
+*
+*          On exit, details of the factorization: U is stored as an
+*          upper triangular band matrix with KL+KU superdiagonals in
+*          rows 1 to KL+KU+1, and the multipliers used during the
+*          factorization are stored in rows KL+KU+2 to 2*KL+KU+1.
+*          See below for further details.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= 2*KL+KU+1.
+*
+*  IPIV    (output) INTEGER array, dimension (min(M,N))
+*          The pivot indices; for 1 <= i <= min(M,N), row i of the
+*          matrix was interchanged with row IPIV(i).
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*          > 0: if INFO = +i, U(i,i) is exactly zero. The factorization
+*               has been completed, but the factor U is exactly
+*               singular, and division by zero will occur if it is used
+*               to solve a system of equations.
+*
+*  Further Details
+*  ===============
+*
+*  The band storage scheme is illustrated by the following example, when
+*  M = N = 6, KL = 2, KU = 1:
+*
+*  On entry:                       On exit:
+*
+*      *    *    *    +    +    +       *    *    *   u14  u25  u36
+*      *    *    +    +    +    +       *    *   u13  u24  u35  u46
+*      *   a12  a23  a34  a45  a56      *   u12  u23  u34  u45  u56
+*     a11  a22  a33  a44  a55  a66     u11  u22  u33  u44  u55  u66
+*     a21  a32  a43  a54  a65   *      m21  m32  m43  m54  m65   *
+*     a31  a42  a53  a64   *    *      m31  m42  m53  m64   *    *
+*
+*  Array elements marked * are not used by the routine; elements marked
+*  + need not be set on entry, but are required by the routine to store
+*  elements of U because of fill-in resulting from the row interchanges.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+      INTEGER            NBMAX, LDWORK
+      PARAMETER          ( NBMAX = 64, LDWORK = NBMAX+1 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, I2, I3, II, IP, J, J2, J3, JB, JJ, JM, JP,
+     $                   JU, K2, KM, KV, NB, NW
+      REAL               TEMP
+*     ..
+*     .. Local Arrays ..
+      REAL               WORK13( LDWORK, NBMAX ),
+     $                   WORK31( LDWORK, NBMAX )
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV, ISAMAX
+      EXTERNAL           ILAENV, ISAMAX
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SCOPY, SGBTF2, SGEMM, SGER, SLASWP, SSCAL,
+     $                   SSWAP, STRSM, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     KV is the number of superdiagonals in the factor U, allowing for
+*     fill-in
+*
+      KV = KU + KL
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( KL.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( KU.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDAB.LT.KL+KV+1 ) THEN
+         INFO = -6
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SGBTRF', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 )
+     $   RETURN
+*
+*     Determine the block size for this environment
+*
+      NB = ILAENV( 1, 'SGBTRF', ' ', M, N, KL, KU )
+*
+*     The block size must not exceed the limit set by the size of the
+*     local arrays WORK13 and WORK31.
+*
+      NB = MIN( NB, NBMAX )
+*
+      IF( NB.LE.1 .OR. NB.GT.KL ) THEN
+*
+*        Use unblocked code
+*
+         CALL SGBTF2( M, N, KL, KU, AB, LDAB, IPIV, INFO )
+      ELSE
+*
+*        Use blocked code
+*
+*        Zero the superdiagonal elements of the work array WORK13
+*
+         DO 20 J = 1, NB
+            DO 10 I = 1, J - 1
+               WORK13( I, J ) = ZERO
+   10       CONTINUE
+   20    CONTINUE
+*
+*        Zero the subdiagonal elements of the work array WORK31
+*
+         DO 40 J = 1, NB
+            DO 30 I = J + 1, NB
+               WORK31( I, J ) = ZERO
+   30       CONTINUE
+   40    CONTINUE
+*
+*        Gaussian elimination with partial pivoting
+*
+*        Set fill-in elements in columns KU+2 to KV to zero
+*
+         DO 60 J = KU + 2, MIN( KV, N )
+            DO 50 I = KV - J + 2, KL
+               AB( I, J ) = ZERO
+   50       CONTINUE
+   60    CONTINUE
+*
+*        JU is the index of the last column affected by the current
+*        stage of the factorization
+*
+         JU = 1
+*
+         DO 180 J = 1, MIN( M, N ), NB
+            JB = MIN( NB, MIN( M, N )-J+1 )
+*
+*           The active part of the matrix is partitioned
+*
+*              A11   A12   A13
+*              A21   A22   A23
+*              A31   A32   A33
+*
+*           Here A11, A21 and A31 denote the current block of JB columns
+*           which is about to be factorized. The number of rows in the
+*           partitioning are JB, I2, I3 respectively, and the numbers
+*           of columns are JB, J2, J3. The superdiagonal elements of A13
+*           and the subdiagonal elements of A31 lie outside the band.
+*
+            I2 = MIN( KL-JB, M-J-JB+1 )
+            I3 = MIN( JB, M-J-KL+1 )
+*
+*           J2 and J3 are computed after JU has been updated.
+*
+*           Factorize the current block of JB columns
+*
+            DO 80 JJ = J, J + JB - 1
+*
+*              Set fill-in elements in column JJ+KV to zero
+*
+               IF( JJ+KV.LE.N ) THEN
+                  DO 70 I = 1, KL
+                     AB( I, JJ+KV ) = ZERO
+   70             CONTINUE
+               END IF
+*
+*              Find pivot and test for singularity. KM is the number of
+*              subdiagonal elements in the current column.
+*
+               KM = MIN( KL, M-JJ )
+               JP = ISAMAX( KM+1, AB( KV+1, JJ ), 1 )
+               IPIV( JJ ) = JP + JJ - J
+               IF( AB( KV+JP, JJ ).NE.ZERO ) THEN
+                  JU = MAX( JU, MIN( JJ+KU+JP-1, N ) )
+                  IF( JP.NE.1 ) THEN
+*
+*                    Apply interchange to columns J to J+JB-1
+*
+                     IF( JP+JJ-1.LT.J+KL ) THEN
+*
+                        CALL SSWAP( JB, AB( KV+1+JJ-J, J ), LDAB-1,
+     $                              AB( KV+JP+JJ-J, J ), LDAB-1 )
+                     ELSE
+*
+*                       The interchange affects columns J to JJ-1 of A31
+*                       which are stored in the work array WORK31
+*
+                        CALL SSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,
+     $                              WORK31( JP+JJ-J-KL, 1 ), LDWORK )
+                        CALL SSWAP( J+JB-JJ, AB( KV+1, JJ ), LDAB-1,
+     $                              AB( KV+JP, JJ ), LDAB-1 )
+                     END IF
+                  END IF
+*
+*                 Compute multipliers
+*
+                  CALL SSCAL( KM, ONE / AB( KV+1, JJ ), AB( KV+2, JJ ),
+     $                        1 )
+*
+*                 Update trailing submatrix within the band and within
+*                 the current block. JM is the index of the last column
+*                 which needs to be updated.
+*
+                  JM = MIN( JU, J+JB-1 )
+                  IF( JM.GT.JJ )
+     $               CALL SGER( KM, JM-JJ, -ONE, AB( KV+2, JJ ), 1,
+     $                          AB( KV, JJ+1 ), LDAB-1,
+     $                          AB( KV+1, JJ+1 ), LDAB-1 )
+               ELSE
+*
+*                 If pivot is zero, set INFO to the index of the pivot
+*                 unless a zero pivot has already been found.
+*
+                  IF( INFO.EQ.0 )
+     $               INFO = JJ
+               END IF
+*
+*              Copy current column of A31 into the work array WORK31
+*
+               NW = MIN( JJ-J+1, I3 )
+               IF( NW.GT.0 )
+     $            CALL SCOPY( NW, AB( KV+KL+1-JJ+J, JJ ), 1,
+     $                        WORK31( 1, JJ-J+1 ), 1 )
+   80       CONTINUE
+            IF( J+JB.LE.N ) THEN
+*
+*              Apply the row interchanges to the other blocks.
+*
+               J2 = MIN( JU-J+1, KV ) - JB
+               J3 = MAX( 0, JU-J-KV+1 )
+*
+*              Use SLASWP to apply the row interchanges to A12, A22, and
+*              A32.
+*
+               CALL SLASWP( J2, AB( KV+1-JB, J+JB ), LDAB-1, 1, JB,
+     $                      IPIV( J ), 1 )
+*
+*              Adjust the pivot indices.
+*
+               DO 90 I = J, J + JB - 1
+                  IPIV( I ) = IPIV( I ) + J - 1
+   90          CONTINUE
+*
+*              Apply the row interchanges to A13, A23, and A33
+*              columnwise.
+*
+               K2 = J - 1 + JB + J2
+               DO 110 I = 1, J3
+                  JJ = K2 + I
+                  DO 100 II = J + I - 1, J + JB - 1
+                     IP = IPIV( II )
+                     IF( IP.NE.II ) THEN
+                        TEMP = AB( KV+1+II-JJ, JJ )
+                        AB( KV+1+II-JJ, JJ ) = AB( KV+1+IP-JJ, JJ )
+                        AB( KV+1+IP-JJ, JJ ) = TEMP
+                     END IF
+  100             CONTINUE
+  110          CONTINUE
+*
+*              Update the relevant part of the trailing submatrix
+*
+               IF( J2.GT.0 ) THEN
+*
+*                 Update A12
+*
+                  CALL STRSM( 'Left', 'Lower', 'No transpose', 'Unit',
+     $                        JB, J2, ONE, AB( KV+1, J ), LDAB-1,
+     $                        AB( KV+1-JB, J+JB ), LDAB-1 )
+*
+                  IF( I2.GT.0 ) THEN
+*
+*                    Update A22
+*
+                     CALL SGEMM( 'No transpose', 'No transpose', I2, J2,
+     $                           JB, -ONE, AB( KV+1+JB, J ), LDAB-1,
+     $                           AB( KV+1-JB, J+JB ), LDAB-1, ONE,
+     $                           AB( KV+1, J+JB ), LDAB-1 )
+                  END IF
+*
+                  IF( I3.GT.0 ) THEN
+*
+*                    Update A32
+*
+                     CALL SGEMM( 'No transpose', 'No transpose', I3, J2,
+     $                           JB, -ONE, WORK31, LDWORK,
+     $                           AB( KV+1-JB, J+JB ), LDAB-1, ONE,
+     $                           AB( KV+KL+1-JB, J+JB ), LDAB-1 )
+                  END IF
+               END IF
+*
+               IF( J3.GT.0 ) THEN
+*
+*                 Copy the lower triangle of A13 into the work array
+*                 WORK13
+*
+                  DO 130 JJ = 1, J3
+                     DO 120 II = JJ, JB
+                        WORK13( II, JJ ) = AB( II-JJ+1, JJ+J+KV-1 )
+  120                CONTINUE
+  130             CONTINUE
+*
+*                 Update A13 in the work array
+*
+                  CALL STRSM( 'Left', 'Lower', 'No transpose', 'Unit',
+     $                        JB, J3, ONE, AB( KV+1, J ), LDAB-1,
+     $                        WORK13, LDWORK )
+*
+                  IF( I2.GT.0 ) THEN
+*
+*                    Update A23
+*
+                     CALL SGEMM( 'No transpose', 'No transpose', I2, J3,
+     $                           JB, -ONE, AB( KV+1+JB, J ), LDAB-1,
+     $                           WORK13, LDWORK, ONE, AB( 1+JB, J+KV ),
+     $                           LDAB-1 )
+                  END IF
+*
+                  IF( I3.GT.0 ) THEN
+*
+*                    Update A33
+*
+                     CALL SGEMM( 'No transpose', 'No transpose', I3, J3,
+     $                           JB, -ONE, WORK31, LDWORK, WORK13,
+     $                           LDWORK, ONE, AB( 1+KL, J+KV ), LDAB-1 )
+                  END IF
+*
+*                 Copy the lower triangle of A13 back into place
+*
+                  DO 150 JJ = 1, J3
+                     DO 140 II = JJ, JB
+                        AB( II-JJ+1, JJ+J+KV-1 ) = WORK13( II, JJ )
+  140                CONTINUE
+  150             CONTINUE
+               END IF
+            ELSE
+*
+*              Adjust the pivot indices.
+*
+               DO 160 I = J, J + JB - 1
+                  IPIV( I ) = IPIV( I ) + J - 1
+  160          CONTINUE
+            END IF
+*
+*           Partially undo the interchanges in the current block to
+*           restore the upper triangular form of A31 and copy the upper
+*           triangle of A31 back into place
+*
+            DO 170 JJ = J + JB - 1, J, -1
+               JP = IPIV( JJ ) - JJ + 1
+               IF( JP.NE.1 ) THEN
+*
+*                 Apply interchange to columns J to JJ-1
+*
+                  IF( JP+JJ-1.LT.J+KL ) THEN
+*
+*                    The interchange does not affect A31
+*
+                     CALL SSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,
+     $                           AB( KV+JP+JJ-J, J ), LDAB-1 )
+                  ELSE
+*
+*                    The interchange does affect A31
+*
+                     CALL SSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,
+     $                           WORK31( JP+JJ-J-KL, 1 ), LDWORK )
+                  END IF
+               END IF
+*
+*              Copy the current column of A31 back into place
+*
+               NW = MIN( I3, JJ-J+1 )
+               IF( NW.GT.0 )
+     $            CALL SCOPY( NW, WORK31( 1, JJ-J+1 ), 1,
+     $                        AB( KV+KL+1-JJ+J, JJ ), 1 )
+  170       CONTINUE
+  180    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of SGBTRF
+*
+      END
diff --git a/costa/native/external/lapack/sgbtrs.f b/costa/native/external/lapack/sgbtrs.f
new file mode 100644
index 000000000..bb00ae32b
--- /dev/null
+++ b/costa/native/external/lapack/sgbtrs.f
@@ -0,0 +1,187 @@
+      SUBROUTINE SGBTRS( TRANS, N, KL, KU, NRHS, AB, LDAB, IPIV, B, LDB,
+     $                   INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993 
+*
+*     .. Scalar Arguments ..
+      CHARACTER          TRANS
+      INTEGER            INFO, KL, KU, LDAB, LDB, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      REAL               AB( LDAB, * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SGBTRS solves a system of linear equations
+*     A * X = B  or  A' * X = B
+*  with a general band matrix A using the LU factorization computed
+*  by SGBTRF.
+*
+*  Arguments
+*  =========
+*
+*  TRANS   (input) CHARACTER*1
+*          Specifies the form of the system of equations.
+*          = 'N':  A * X = B  (No transpose)
+*          = 'T':  A'* X = B  (Transpose)
+*          = 'C':  A'* X = B  (Conjugate transpose = Transpose)
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  KL      (input) INTEGER
+*          The number of subdiagonals within the band of A.  KL >= 0.
+*
+*  KU      (input) INTEGER
+*          The number of superdiagonals within the band of A.  KU >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  AB      (input) REAL array, dimension (LDAB,N)
+*          Details of the LU factorization of the band matrix A, as
+*          computed by SGBTRF.  U is stored as an upper triangular band
+*          matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, and
+*          the multipliers used during the factorization are stored in
+*          rows KL+KU+2 to 2*KL+KU+1.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= 2*KL+KU+1.
+*
+*  IPIV    (input) INTEGER array, dimension (N)
+*          The pivot indices; for 1 <= i <= N, row i of the matrix was
+*          interchanged with row IPIV(i).
+*
+*  B       (input/output) REAL array, dimension (LDB,NRHS)
+*          On entry, the right hand side matrix B.
+*          On exit, the solution matrix X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE
+      PARAMETER          ( ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LNOTI, NOTRAN
+      INTEGER            I, J, KD, L, LM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SGEMV, SGER, SSWAP, STBSV, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      NOTRAN = LSAME( TRANS, 'N' )
+      IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT.
+     $    LSAME( TRANS, 'C' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( KL.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( KU.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( LDAB.LT.( 2*KL+KU+1 ) ) THEN
+         INFO = -7
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -10
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SGBTRS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 )
+     $   RETURN
+*
+      KD = KU + KL + 1
+      LNOTI = KL.GT.0
+*
+      IF( NOTRAN ) THEN
+*
+*        Solve  A*X = B.
+*
+*        Solve L*X = B, overwriting B with X.
+*
+*        L is represented as a product of permutations and unit lower
+*        triangular matrices L = P(1) * L(1) * ... * P(n-1) * L(n-1),
+*        where each transformation L(i) is a rank-one modification of
+*        the identity matrix.
+*
+         IF( LNOTI ) THEN
+            DO 10 J = 1, N - 1
+               LM = MIN( KL, N-J )
+               L = IPIV( J )
+               IF( L.NE.J )
+     $            CALL SSWAP( NRHS, B( L, 1 ), LDB, B( J, 1 ), LDB )
+               CALL SGER( LM, NRHS, -ONE, AB( KD+1, J ), 1, B( J, 1 ),
+     $                    LDB, B( J+1, 1 ), LDB )
+   10       CONTINUE
+         END IF
+*
+         DO 20 I = 1, NRHS
+*
+*           Solve U*X = B, overwriting B with X.
+*
+            CALL STBSV( 'Upper', 'No transpose', 'Non-unit', N, KL+KU,
+     $                  AB, LDAB, B( 1, I ), 1 )
+   20    CONTINUE
+*
+      ELSE
+*
+*        Solve A'*X = B.
+*
+         DO 30 I = 1, NRHS
+*
+*           Solve U'*X = B, overwriting B with X.
+*
+            CALL STBSV( 'Upper', 'Transpose', 'Non-unit', N, KL+KU, AB,
+     $                  LDAB, B( 1, I ), 1 )
+   30    CONTINUE
+*
+*        Solve L'*X = B, overwriting B with X.
+*
+         IF( LNOTI ) THEN
+            DO 40 J = N - 1, 1, -1
+               LM = MIN( KL, N-J )
+               CALL SGEMV( 'Transpose', LM, NRHS, -ONE, B( J+1, 1 ),
+     $                     LDB, AB( KD+1, J ), 1, ONE, B( J, 1 ), LDB )
+               L = IPIV( J )
+               IF( L.NE.J )
+     $            CALL SSWAP( NRHS, B( L, 1 ), LDB, B( J, 1 ), LDB )
+   40       CONTINUE
+         END IF
+      END IF
+      RETURN
+*
+*     End of SGBTRS
+*
+      END
diff --git a/costa/native/external/lapack/sgebak.f b/costa/native/external/lapack/sgebak.f
new file mode 100644
index 000000000..e0e72e7a5
--- /dev/null
+++ b/costa/native/external/lapack/sgebak.f
@@ -0,0 +1,189 @@
+      SUBROUTINE SGEBAK( JOB, SIDE, N, ILO, IHI, SCALE, M, V, LDV,
+     $                   INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOB, SIDE
+      INTEGER            IHI, ILO, INFO, LDV, M, N
+*     ..
+*     .. Array Arguments ..
+      REAL               V( LDV, * ), SCALE( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SGEBAK forms the right or left eigenvectors of a real general matrix
+*  by backward transformation on the computed eigenvectors of the
+*  balanced matrix output by SGEBAL.
+*
+*  Arguments
+*  =========
+*
+*  JOB     (input) CHARACTER*1
+*          Specifies the type of backward transformation required:
+*          = 'N', do nothing, return immediately;
+*          = 'P', do backward transformation for permutation only;
+*          = 'S', do backward transformation for scaling only;
+*          = 'B', do backward transformations for both permutation and
+*                 scaling.
+*          JOB must be the same as the argument JOB supplied to SGEBAL.
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'R':  V contains right eigenvectors;
+*          = 'L':  V contains left eigenvectors.
+*
+*  N       (input) INTEGER
+*          The number of rows of the matrix V.  N >= 0.
+*
+*  ILO     (input) INTEGER
+*  IHI     (input) INTEGER
+*          The integers ILO and IHI determined by SGEBAL.
+*          1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
+*
+*  SCALE   (input) REAL array, dimension (N)
+*          Details of the permutation and scaling factors, as returned
+*          by SGEBAL.
+*
+*  M       (input) INTEGER
+*          The number of columns of the matrix V.  M >= 0.
+*
+*  V       (input/output) REAL array, dimension (LDV,M)
+*          On entry, the matrix of right or left eigenvectors to be
+*          transformed, as returned by SHSEIN or STREVC.
+*          On exit, V is overwritten by the transformed eigenvectors.
+*
+*  LDV     (input) INTEGER
+*          The leading dimension of the array V. LDV >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE
+      PARAMETER          ( ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LEFTV, RIGHTV
+      INTEGER            I, II, K
+      REAL               S
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SSCAL, SSWAP, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode and Test the input parameters
+*
+      RIGHTV = LSAME( SIDE, 'R' )
+      LEFTV = LSAME( SIDE, 'L' )
+*
+      INFO = 0
+      IF( .NOT.LSAME( JOB, 'N' ) .AND. .NOT.LSAME( JOB, 'P' ) .AND.
+     $    .NOT.LSAME( JOB, 'S' ) .AND. .NOT.LSAME( JOB, 'B' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.RIGHTV .AND. .NOT.LEFTV ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, N ) ) THEN
+         INFO = -4
+      ELSE IF( IHI.LT.MIN( ILO, N ) .OR. IHI.GT.N ) THEN
+         INFO = -5
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -7
+      ELSE IF( LDV.LT.MAX( 1, N ) ) THEN
+         INFO = -9
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SGEBAK', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+      IF( M.EQ.0 )
+     $   RETURN
+      IF( LSAME( JOB, 'N' ) )
+     $   RETURN
+*
+      IF( ILO.EQ.IHI )
+     $   GO TO 30
+*
+*     Backward balance
+*
+      IF( LSAME( JOB, 'S' ) .OR. LSAME( JOB, 'B' ) ) THEN
+*
+         IF( RIGHTV ) THEN
+            DO 10 I = ILO, IHI
+               S = SCALE( I )
+               CALL SSCAL( M, S, V( I, 1 ), LDV )
+   10       CONTINUE
+         END IF
+*
+         IF( LEFTV ) THEN
+            DO 20 I = ILO, IHI
+               S = ONE / SCALE( I )
+               CALL SSCAL( M, S, V( I, 1 ), LDV )
+   20       CONTINUE
+         END IF
+*
+      END IF
+*
+*     Backward permutation
+*
+*     For  I = ILO-1 step -1 until 1,
+*              IHI+1 step 1 until N do --
+*
+   30 CONTINUE
+      IF( LSAME( JOB, 'P' ) .OR. LSAME( JOB, 'B' ) ) THEN
+         IF( RIGHTV ) THEN
+            DO 40 II = 1, N
+               I = II
+               IF( I.GE.ILO .AND. I.LE.IHI )
+     $            GO TO 40
+               IF( I.LT.ILO )
+     $            I = ILO - II
+               K = SCALE( I )
+               IF( K.EQ.I )
+     $            GO TO 40
+               CALL SSWAP( M, V( I, 1 ), LDV, V( K, 1 ), LDV )
+   40       CONTINUE
+         END IF
+*
+         IF( LEFTV ) THEN
+            DO 50 II = 1, N
+               I = II
+               IF( I.GE.ILO .AND. I.LE.IHI )
+     $            GO TO 50
+               IF( I.LT.ILO )
+     $            I = ILO - II
+               K = SCALE( I )
+               IF( K.EQ.I )
+     $            GO TO 50
+               CALL SSWAP( M, V( I, 1 ), LDV, V( K, 1 ), LDV )
+   50       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of SGEBAK
+*
+      END
diff --git a/costa/native/external/lapack/sgebal.f b/costa/native/external/lapack/sgebal.f
new file mode 100644
index 000000000..e253abbb5
--- /dev/null
+++ b/costa/native/external/lapack/sgebal.f
@@ -0,0 +1,323 @@
+      SUBROUTINE SGEBAL( JOB, N, A, LDA, ILO, IHI, SCALE, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOB
+      INTEGER            IHI, ILO, INFO, LDA, N
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), SCALE( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SGEBAL balances a general real matrix A.  This involves, first,
+*  permuting A by a similarity transformation to isolate eigenvalues
+*  in the first 1 to ILO-1 and last IHI+1 to N elements on the
+*  diagonal; and second, applying a diagonal similarity transformation
+*  to rows and columns ILO to IHI to make the rows and columns as
+*  close in norm as possible.  Both steps are optional.
+*
+*  Balancing may reduce the 1-norm of the matrix, and improve the
+*  accuracy of the computed eigenvalues and/or eigenvectors.
+*
+*  Arguments
+*  =========
+*
+*  JOB     (input) CHARACTER*1
+*          Specifies the operations to be performed on A:
+*          = 'N':  none:  simply set ILO = 1, IHI = N, SCALE(I) = 1.0
+*                  for i = 1,...,N;
+*          = 'P':  permute only;
+*          = 'S':  scale only;
+*          = 'B':  both permute and scale.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input/output) REAL array, dimension (LDA,N)
+*          On entry, the input matrix A.
+*          On exit,  A is overwritten by the balanced matrix.
+*          If JOB = 'N', A is not referenced.
+*          See Further Details.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  ILO     (output) INTEGER
+*  IHI     (output) INTEGER
+*          ILO and IHI are set to integers such that on exit
+*          A(i,j) = 0 if i > j and j = 1,...,ILO-1 or I = IHI+1,...,N.
+*          If JOB = 'N' or 'S', ILO = 1 and IHI = N.
+*
+*  SCALE   (output) REAL array, dimension (N)
+*          Details of the permutations and scaling factors applied to
+*          A.  If P(j) is the index of the row and column interchanged
+*          with row and column j and D(j) is the scaling factor
+*          applied to row and column j, then
+*          SCALE(j) = P(j)    for j = 1,...,ILO-1
+*                   = D(j)    for j = ILO,...,IHI
+*                   = P(j)    for j = IHI+1,...,N.
+*          The order in which the interchanges are made is N to IHI+1,
+*          then 1 to ILO-1.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*
+*  Further Details
+*  ===============
+*
+*  The permutations consist of row and column interchanges which put
+*  the matrix in the form
+*
+*             ( T1   X   Y  )
+*     P A P = (  0   B   Z  )
+*             (  0   0   T2 )
+*
+*  where T1 and T2 are upper triangular matrices whose eigenvalues lie
+*  along the diagonal.  The column indices ILO and IHI mark the starting
+*  and ending columns of the submatrix B. Balancing consists of applying
+*  a diagonal similarity transformation inv(D) * B * D to make the
+*  1-norms of each row of B and its corresponding column nearly equal.
+*  The output matrix is
+*
+*     ( T1     X*D          Y    )
+*     (  0  inv(D)*B*D  inv(D)*Z ).
+*     (  0      0           T2   )
+*
+*  Information about the permutations P and the diagonal matrix D is
+*  returned in the vector SCALE.
+*
+*  This subroutine is based on the EISPACK routine BALANC.
+*
+*  Modified by Tzu-Yi Chen, Computer Science Division, University of
+*    California at Berkeley, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+      REAL               SCLFAC
+      PARAMETER          ( SCLFAC = 0.8E+1 )
+      REAL               FACTOR
+      PARAMETER          ( FACTOR = 0.95E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOCONV
+      INTEGER            I, ICA, IEXC, IRA, J, K, L, M
+      REAL               C, CA, F, G, R, RA, S, SFMAX1, SFMAX2, SFMIN1,
+     $                   SFMIN2
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ISAMAX
+      REAL               SLAMCH
+      EXTERNAL           LSAME, ISAMAX, SLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SSCAL, SSWAP, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters
+*
+      INFO = 0
+      IF( .NOT.LSAME( JOB, 'N' ) .AND. .NOT.LSAME( JOB, 'P' ) .AND.
+     $    .NOT.LSAME( JOB, 'S' ) .AND. .NOT.LSAME( JOB, 'B' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SGEBAL', -INFO )
+         RETURN
+      END IF
+*
+      K = 1
+      L = N
+*
+      IF( N.EQ.0 )
+     $   GO TO 210
+*
+      IF( LSAME( JOB, 'N' ) ) THEN
+         DO 10 I = 1, N
+            SCALE( I ) = ONE
+   10    CONTINUE
+         GO TO 210
+      END IF
+*
+      IF( LSAME( JOB, 'S' ) )
+     $   GO TO 120
+*
+*     Permutation to isolate eigenvalues if possible
+*
+      GO TO 50
+*
+*     Row and column exchange.
+*
+   20 CONTINUE
+      SCALE( M ) = J
+      IF( J.EQ.M )
+     $   GO TO 30
+*
+      CALL SSWAP( L, A( 1, J ), 1, A( 1, M ), 1 )
+      CALL SSWAP( N-K+1, A( J, K ), LDA, A( M, K ), LDA )
+*
+   30 CONTINUE
+      GO TO ( 40, 80 )IEXC
+*
+*     Search for rows isolating an eigenvalue and push them down.
+*
+   40 CONTINUE
+      IF( L.EQ.1 )
+     $   GO TO 210
+      L = L - 1
+*
+   50 CONTINUE
+      DO 70 J = L, 1, -1
+*
+         DO 60 I = 1, L
+            IF( I.EQ.J )
+     $         GO TO 60
+            IF( A( J, I ).NE.ZERO )
+     $         GO TO 70
+   60    CONTINUE
+*
+         M = L
+         IEXC = 1
+         GO TO 20
+   70 CONTINUE
+*
+      GO TO 90
+*
+*     Search for columns isolating an eigenvalue and push them left.
+*
+   80 CONTINUE
+      K = K + 1
+*
+   90 CONTINUE
+      DO 110 J = K, L
+*
+         DO 100 I = K, L
+            IF( I.EQ.J )
+     $         GO TO 100
+            IF( A( I, J ).NE.ZERO )
+     $         GO TO 110
+  100    CONTINUE
+*
+         M = K
+         IEXC = 2
+         GO TO 20
+  110 CONTINUE
+*
+  120 CONTINUE
+      DO 130 I = K, L
+         SCALE( I ) = ONE
+  130 CONTINUE
+*
+      IF( LSAME( JOB, 'P' ) )
+     $   GO TO 210
+*
+*     Balance the submatrix in rows K to L.
+*
+*     Iterative loop for norm reduction
+*
+      SFMIN1 = SLAMCH( 'S' ) / SLAMCH( 'P' )
+      SFMAX1 = ONE / SFMIN1
+      SFMIN2 = SFMIN1*SCLFAC
+      SFMAX2 = ONE / SFMIN2
+  140 CONTINUE
+      NOCONV = .FALSE.
+*
+      DO 200 I = K, L
+         C = ZERO
+         R = ZERO
+*
+         DO 150 J = K, L
+            IF( J.EQ.I )
+     $         GO TO 150
+            C = C + ABS( A( J, I ) )
+            R = R + ABS( A( I, J ) )
+  150    CONTINUE
+         ICA = ISAMAX( L, A( 1, I ), 1 )
+         CA = ABS( A( ICA, I ) )
+         IRA = ISAMAX( N-K+1, A( I, K ), LDA )
+         RA = ABS( A( I, IRA+K-1 ) )
+*
+*        Guard against zero C or R due to underflow.
+*
+         IF( C.EQ.ZERO .OR. R.EQ.ZERO )
+     $      GO TO 200
+         G = R / SCLFAC
+         F = ONE
+         S = C + R
+  160    CONTINUE
+         IF( C.GE.G .OR. MAX( F, C, CA ).GE.SFMAX2 .OR.
+     $       MIN( R, G, RA ).LE.SFMIN2 )GO TO 170
+         F = F*SCLFAC
+         C = C*SCLFAC
+         CA = CA*SCLFAC
+         R = R / SCLFAC
+         G = G / SCLFAC
+         RA = RA / SCLFAC
+         GO TO 160
+*
+  170    CONTINUE
+         G = C / SCLFAC
+  180    CONTINUE
+         IF( G.LT.R .OR. MAX( R, RA ).GE.SFMAX2 .OR.
+     $       MIN( F, C, G, CA ).LE.SFMIN2 )GO TO 190
+         F = F / SCLFAC
+         C = C / SCLFAC
+         G = G / SCLFAC
+         CA = CA / SCLFAC
+         R = R*SCLFAC
+         RA = RA*SCLFAC
+         GO TO 180
+*
+*        Now balance.
+*
+  190    CONTINUE
+         IF( ( C+R ).GE.FACTOR*S )
+     $      GO TO 200
+         IF( F.LT.ONE .AND. SCALE( I ).LT.ONE ) THEN
+            IF( F*SCALE( I ).LE.SFMIN1 )
+     $         GO TO 200
+         END IF
+         IF( F.GT.ONE .AND. SCALE( I ).GT.ONE ) THEN
+            IF( SCALE( I ).GE.SFMAX1 / F )
+     $         GO TO 200
+         END IF
+         G = ONE / F
+         SCALE( I ) = SCALE( I )*F
+         NOCONV = .TRUE.
+*
+         CALL SSCAL( N-K+1, G, A( I, K ), LDA )
+         CALL SSCAL( L, F, A( 1, I ), 1 )
+*
+  200 CONTINUE
+*
+      IF( NOCONV )
+     $   GO TO 140
+*
+  210 CONTINUE
+      ILO = K
+      IHI = L
+*
+      RETURN
+*
+*     End of SGEBAL
+*
+      END
diff --git a/costa/native/external/lapack/sgebd2.f b/costa/native/external/lapack/sgebd2.f
new file mode 100644
index 000000000..e81504f96
--- /dev/null
+++ b/costa/native/external/lapack/sgebd2.f
@@ -0,0 +1,238 @@
+      SUBROUTINE SGEBD2( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, M, N
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), D( * ), E( * ), TAUP( * ),
+     $                   TAUQ( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SGEBD2 reduces a real general m by n matrix A to upper or lower
+*  bidiagonal form B by an orthogonal transformation: Q' * A * P = B.
+*
+*  If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows in the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns in the matrix A.  N >= 0.
+*
+*  A       (input/output) REAL array, dimension (LDA,N)
+*          On entry, the m by n general matrix to be reduced.
+*          On exit,
+*          if m >= n, the diagonal and the first superdiagonal are
+*            overwritten with the upper bidiagonal matrix B; the
+*            elements below the diagonal, with the array TAUQ, represent
+*            the orthogonal matrix Q as a product of elementary
+*            reflectors, and the elements above the first superdiagonal,
+*            with the array TAUP, represent the orthogonal matrix P as
+*            a product of elementary reflectors;
+*          if m < n, the diagonal and the first subdiagonal are
+*            overwritten with the lower bidiagonal matrix B; the
+*            elements below the first subdiagonal, with the array TAUQ,
+*            represent the orthogonal matrix Q as a product of
+*            elementary reflectors, and the elements above the diagonal,
+*            with the array TAUP, represent the orthogonal matrix P as
+*            a product of elementary reflectors.
+*          See Further Details.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  D       (output) REAL array, dimension (min(M,N))
+*          The diagonal elements of the bidiagonal matrix B:
+*          D(i) = A(i,i).
+*
+*  E       (output) REAL array, dimension (min(M,N)-1)
+*          The off-diagonal elements of the bidiagonal matrix B:
+*          if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1;
+*          if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1.
+*
+*  TAUQ    (output) REAL array dimension (min(M,N))
+*          The scalar factors of the elementary reflectors which
+*          represent the orthogonal matrix Q. See Further Details.
+*
+*  TAUP    (output) REAL array, dimension (min(M,N))
+*          The scalar factors of the elementary reflectors which
+*          represent the orthogonal matrix P. See Further Details.
+*
+*  WORK    (workspace) REAL array, dimension (max(M,N))
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit.
+*          < 0: if INFO = -i, the i-th argument had an illegal value.
+*
+*  Further Details
+*  ===============
+*
+*  The matrices Q and P are represented as products of elementary
+*  reflectors:
+*
+*  If m >= n,
+*
+*     Q = H(1) H(2) . . . H(n)  and  P = G(1) G(2) . . . G(n-1)
+*
+*  Each H(i) and G(i) has the form:
+*
+*     H(i) = I - tauq * v * v'  and G(i) = I - taup * u * u'
+*
+*  where tauq and taup are real scalars, and v and u are real vectors;
+*  v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in A(i+1:m,i);
+*  u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in A(i,i+2:n);
+*  tauq is stored in TAUQ(i) and taup in TAUP(i).
+*
+*  If m < n,
+*
+*     Q = H(1) H(2) . . . H(m-1)  and  P = G(1) G(2) . . . G(m)
+*
+*  Each H(i) and G(i) has the form:
+*
+*     H(i) = I - tauq * v * v'  and G(i) = I - taup * u * u'
+*
+*  where tauq and taup are real scalars, and v and u are real vectors;
+*  v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i);
+*  u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n);
+*  tauq is stored in TAUQ(i) and taup in TAUP(i).
+*
+*  The contents of A on exit are illustrated by the following examples:
+*
+*  m = 6 and n = 5 (m > n):          m = 5 and n = 6 (m < n):
+*
+*    (  d   e   u1  u1  u1 )           (  d   u1  u1  u1  u1  u1 )
+*    (  v1  d   e   u2  u2 )           (  e   d   u2  u2  u2  u2 )
+*    (  v1  v2  d   e   u3 )           (  v1  e   d   u3  u3  u3 )
+*    (  v1  v2  v3  d   e  )           (  v1  v2  e   d   u4  u4 )
+*    (  v1  v2  v3  v4  d  )           (  v1  v2  v3  e   d   u5 )
+*    (  v1  v2  v3  v4  v5 )
+*
+*  where d and e denote diagonal and off-diagonal elements of B, vi
+*  denotes an element of the vector defining H(i), and ui an element of
+*  the vector defining G(i).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLARF, SLARFG, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters
+*
+      INFO = 0
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.LT.0 ) THEN
+         CALL XERBLA( 'SGEBD2', -INFO )
+         RETURN
+      END IF
+*
+      IF( M.GE.N ) THEN
+*
+*        Reduce to upper bidiagonal form
+*
+         DO 10 I = 1, N
+*
+*           Generate elementary reflector H(i) to annihilate A(i+1:m,i)
+*
+            CALL SLARFG( M-I+1, A( I, I ), A( MIN( I+1, M ), I ), 1,
+     $                   TAUQ( I ) )
+            D( I ) = A( I, I )
+            A( I, I ) = ONE
+*
+*           Apply H(i) to A(i:m,i+1:n) from the left
+*
+            CALL SLARF( 'Left', M-I+1, N-I, A( I, I ), 1, TAUQ( I ),
+     $                  A( I, I+1 ), LDA, WORK )
+            A( I, I ) = D( I )
+*
+            IF( I.LT.N ) THEN
+*
+*              Generate elementary reflector G(i) to annihilate
+*              A(i,i+2:n)
+*
+               CALL SLARFG( N-I, A( I, I+1 ), A( I, MIN( I+2, N ) ),
+     $                      LDA, TAUP( I ) )
+               E( I ) = A( I, I+1 )
+               A( I, I+1 ) = ONE
+*
+*              Apply G(i) to A(i+1:m,i+1:n) from the right
+*
+               CALL SLARF( 'Right', M-I, N-I, A( I, I+1 ), LDA,
+     $                     TAUP( I ), A( I+1, I+1 ), LDA, WORK )
+               A( I, I+1 ) = E( I )
+            ELSE
+               TAUP( I ) = ZERO
+            END IF
+   10    CONTINUE
+      ELSE
+*
+*        Reduce to lower bidiagonal form
+*
+         DO 20 I = 1, M
+*
+*           Generate elementary reflector G(i) to annihilate A(i,i+1:n)
+*
+            CALL SLARFG( N-I+1, A( I, I ), A( I, MIN( I+1, N ) ), LDA,
+     $                   TAUP( I ) )
+            D( I ) = A( I, I )
+            A( I, I ) = ONE
+*
+*           Apply G(i) to A(i+1:m,i:n) from the right
+*
+            CALL SLARF( 'Right', M-I, N-I+1, A( I, I ), LDA, TAUP( I ),
+     $                  A( MIN( I+1, M ), I ), LDA, WORK )
+            A( I, I ) = D( I )
+*
+            IF( I.LT.M ) THEN
+*
+*              Generate elementary reflector H(i) to annihilate
+*              A(i+2:m,i)
+*
+               CALL SLARFG( M-I, A( I+1, I ), A( MIN( I+2, M ), I ), 1,
+     $                      TAUQ( I ) )
+               E( I ) = A( I+1, I )
+               A( I+1, I ) = ONE
+*
+*              Apply H(i) to A(i+1:m,i+1:n) from the left
+*
+               CALL SLARF( 'Left', M-I, N-I, A( I+1, I ), 1, TAUQ( I ),
+     $                     A( I+1, I+1 ), LDA, WORK )
+               A( I+1, I ) = E( I )
+            ELSE
+               TAUQ( I ) = ZERO
+            END IF
+   20    CONTINUE
+      END IF
+      RETURN
+*
+*     End of SGEBD2
+*
+      END
diff --git a/costa/native/external/lapack/sgebrd.f b/costa/native/external/lapack/sgebrd.f
new file mode 100644
index 000000000..91cca0733
--- /dev/null
+++ b/costa/native/external/lapack/sgebrd.f
@@ -0,0 +1,269 @@
+      SUBROUTINE SGEBRD( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, LWORK,
+     $                   INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, LWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), D( * ), E( * ), TAUP( * ),
+     $                   TAUQ( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SGEBRD reduces a general real M-by-N matrix A to upper or lower
+*  bidiagonal form B by an orthogonal transformation: Q**T * A * P = B.
+*
+*  If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows in the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns in the matrix A.  N >= 0.
+*
+*  A       (input/output) REAL array, dimension (LDA,N)
+*          On entry, the M-by-N general matrix to be reduced.
+*          On exit,
+*          if m >= n, the diagonal and the first superdiagonal are
+*            overwritten with the upper bidiagonal matrix B; the
+*            elements below the diagonal, with the array TAUQ, represent
+*            the orthogonal matrix Q as a product of elementary
+*            reflectors, and the elements above the first superdiagonal,
+*            with the array TAUP, represent the orthogonal matrix P as
+*            a product of elementary reflectors;
+*          if m < n, the diagonal and the first subdiagonal are
+*            overwritten with the lower bidiagonal matrix B; the
+*            elements below the first subdiagonal, with the array TAUQ,
+*            represent the orthogonal matrix Q as a product of
+*            elementary reflectors, and the elements above the diagonal,
+*            with the array TAUP, represent the orthogonal matrix P as
+*            a product of elementary reflectors.
+*          See Further Details.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  D       (output) REAL array, dimension (min(M,N))
+*          The diagonal elements of the bidiagonal matrix B:
+*          D(i) = A(i,i).
+*
+*  E       (output) REAL array, dimension (min(M,N)-1)
+*          The off-diagonal elements of the bidiagonal matrix B:
+*          if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1;
+*          if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1.
+*
+*  TAUQ    (output) REAL array dimension (min(M,N))
+*          The scalar factors of the elementary reflectors which
+*          represent the orthogonal matrix Q. See Further Details.
+*
+*  TAUP    (output) REAL array, dimension (min(M,N))
+*          The scalar factors of the elementary reflectors which
+*          represent the orthogonal matrix P. See Further Details.
+*
+*  WORK    (workspace/output) REAL array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The length of the array WORK.  LWORK >= max(1,M,N).
+*          For optimum performance LWORK >= (M+N)*NB, where NB
+*          is the optimal blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit 
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*
+*  Further Details
+*  ===============
+*
+*  The matrices Q and P are represented as products of elementary
+*  reflectors:
+*
+*  If m >= n,
+*
+*     Q = H(1) H(2) . . . H(n)  and  P = G(1) G(2) . . . G(n-1)
+*
+*  Each H(i) and G(i) has the form:
+*
+*     H(i) = I - tauq * v * v'  and G(i) = I - taup * u * u'
+*
+*  where tauq and taup are real scalars, and v and u are real vectors;
+*  v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in A(i+1:m,i);
+*  u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in A(i,i+2:n);
+*  tauq is stored in TAUQ(i) and taup in TAUP(i).
+*
+*  If m < n,
+*
+*     Q = H(1) H(2) . . . H(m-1)  and  P = G(1) G(2) . . . G(m)
+*
+*  Each H(i) and G(i) has the form:
+*
+*     H(i) = I - tauq * v * v'  and G(i) = I - taup * u * u'
+*
+*  where tauq and taup are real scalars, and v and u are real vectors;
+*  v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i);
+*  u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n);
+*  tauq is stored in TAUQ(i) and taup in TAUP(i).
+*
+*  The contents of A on exit are illustrated by the following examples:
+*
+*  m = 6 and n = 5 (m > n):          m = 5 and n = 6 (m < n):
+*
+*    (  d   e   u1  u1  u1 )           (  d   u1  u1  u1  u1  u1 )
+*    (  v1  d   e   u2  u2 )           (  e   d   u2  u2  u2  u2 )
+*    (  v1  v2  d   e   u3 )           (  v1  e   d   u3  u3  u3 )
+*    (  v1  v2  v3  d   e  )           (  v1  v2  e   d   u4  u4 )
+*    (  v1  v2  v3  v4  d  )           (  v1  v2  v3  e   d   u5 )
+*    (  v1  v2  v3  v4  v5 )
+*
+*  where d and e denote diagonal and off-diagonal elements of B, vi
+*  denotes an element of the vector defining H(i), and ui an element of
+*  the vector defining G(i).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE
+      PARAMETER          ( ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            I, IINFO, J, LDWRKX, LDWRKY, LWKOPT, MINMN, NB,
+     $                   NBMIN, NX
+      REAL               WS
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SGEBD2, SGEMM, SLABRD, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN, REAL
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters
+*
+      INFO = 0
+      NB = MAX( 1, ILAENV( 1, 'SGEBRD', ' ', M, N, -1, -1 ) )
+      LWKOPT = ( M+N )*NB
+      WORK( 1 ) = REAL( LWKOPT )
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -4
+      ELSE IF( LWORK.LT.MAX( 1, M, N ) .AND. .NOT.LQUERY ) THEN
+         INFO = -10
+      END IF
+      IF( INFO.LT.0 ) THEN
+         CALL XERBLA( 'SGEBRD', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      MINMN = MIN( M, N )
+      IF( MINMN.EQ.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      WS = MAX( M, N )
+      LDWRKX = M
+      LDWRKY = N
+*
+      IF( NB.GT.1 .AND. NB.LT.MINMN ) THEN
+*
+*        Set the crossover point NX.
+*
+         NX = MAX( NB, ILAENV( 3, 'SGEBRD', ' ', M, N, -1, -1 ) )
+*
+*        Determine when to switch from blocked to unblocked code.
+*
+         IF( NX.LT.MINMN ) THEN
+            WS = ( M+N )*NB
+            IF( LWORK.LT.WS ) THEN
+*
+*              Not enough work space for the optimal NB, consider using
+*              a smaller block size.
+*
+               NBMIN = ILAENV( 2, 'SGEBRD', ' ', M, N, -1, -1 )
+               IF( LWORK.GE.( M+N )*NBMIN ) THEN
+                  NB = LWORK / ( M+N )
+               ELSE
+                  NB = 1
+                  NX = MINMN
+               END IF
+            END IF
+         END IF
+      ELSE
+         NX = MINMN
+      END IF
+*
+      DO 30 I = 1, MINMN - NX, NB
+*
+*        Reduce rows and columns i:i+nb-1 to bidiagonal form and return
+*        the matrices X and Y which are needed to update the unreduced
+*        part of the matrix
+*
+         CALL SLABRD( M-I+1, N-I+1, NB, A( I, I ), LDA, D( I ), E( I ),
+     $                TAUQ( I ), TAUP( I ), WORK, LDWRKX,
+     $                WORK( LDWRKX*NB+1 ), LDWRKY )
+*
+*        Update the trailing submatrix A(i+nb:m,i+nb:n), using an update
+*        of the form  A := A - V*Y' - X*U'
+*
+         CALL SGEMM( 'No transpose', 'Transpose', M-I-NB+1, N-I-NB+1,
+     $               NB, -ONE, A( I+NB, I ), LDA,
+     $               WORK( LDWRKX*NB+NB+1 ), LDWRKY, ONE,
+     $               A( I+NB, I+NB ), LDA )
+         CALL SGEMM( 'No transpose', 'No transpose', M-I-NB+1, N-I-NB+1,
+     $               NB, -ONE, WORK( NB+1 ), LDWRKX, A( I, I+NB ), LDA,
+     $               ONE, A( I+NB, I+NB ), LDA )
+*
+*        Copy diagonal and off-diagonal elements of B back into A
+*
+         IF( M.GE.N ) THEN
+            DO 10 J = I, I + NB - 1
+               A( J, J ) = D( J )
+               A( J, J+1 ) = E( J )
+   10       CONTINUE
+         ELSE
+            DO 20 J = I, I + NB - 1
+               A( J, J ) = D( J )
+               A( J+1, J ) = E( J )
+   20       CONTINUE
+         END IF
+   30 CONTINUE
+*
+*     Use unblocked code to reduce the remainder of the matrix
+*
+      CALL SGEBD2( M-I+1, N-I+1, A( I, I ), LDA, D( I ), E( I ),
+     $             TAUQ( I ), TAUP( I ), WORK, IINFO )
+      WORK( 1 ) = WS
+      RETURN
+*
+*     End of SGEBRD
+*
+      END
diff --git a/costa/native/external/lapack/sgecon.f b/costa/native/external/lapack/sgecon.f
new file mode 100644
index 000000000..2429fd259
--- /dev/null
+++ b/costa/native/external/lapack/sgecon.f
@@ -0,0 +1,181 @@
+      SUBROUTINE SGECON( NORM, N, A, LDA, ANORM, RCOND, WORK, IWORK,
+     $                   INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          NORM
+      INTEGER            INFO, LDA, N
+      REAL               ANORM, RCOND
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      REAL               A( LDA, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SGECON estimates the reciprocal of the condition number of a general
+*  real matrix A, in either the 1-norm or the infinity-norm, using
+*  the LU factorization computed by SGETRF.
+*
+*  An estimate is obtained for norm(inv(A)), and the reciprocal of the
+*  condition number is computed as
+*     RCOND = 1 / ( norm(A) * norm(inv(A)) ).
+*
+*  Arguments
+*  =========
+*
+*  NORM    (input) CHARACTER*1
+*          Specifies whether the 1-norm condition number or the
+*          infinity-norm condition number is required:
+*          = '1' or 'O':  1-norm;
+*          = 'I':         Infinity-norm.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input) REAL array, dimension (LDA,N)
+*          The factors L and U from the factorization A = P*L*U
+*          as computed by SGETRF.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  ANORM   (input) REAL
+*          If NORM = '1' or 'O', the 1-norm of the original matrix A.
+*          If NORM = 'I', the infinity-norm of the original matrix A.
+*
+*  RCOND   (output) REAL
+*          The reciprocal of the condition number of the matrix A,
+*          computed as RCOND = 1/(norm(A) * norm(inv(A))).
+*
+*  WORK    (workspace) REAL array, dimension (4*N)
+*
+*  IWORK   (workspace) INTEGER array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            ONENRM
+      CHARACTER          NORMIN
+      INTEGER            IX, KASE, KASE1
+      REAL               AINVNM, SCALE, SL, SMLNUM, SU
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ISAMAX
+      REAL               SLAMCH
+      EXTERNAL           LSAME, ISAMAX, SLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLACON, SLATRS, SRSCL, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      ONENRM = NORM.EQ.'1' .OR. LSAME( NORM, 'O' )
+      IF( .NOT.ONENRM .AND. .NOT.LSAME( NORM, 'I' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -4
+      ELSE IF( ANORM.LT.ZERO ) THEN
+         INFO = -5
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SGECON', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      RCOND = ZERO
+      IF( N.EQ.0 ) THEN
+         RCOND = ONE
+         RETURN
+      ELSE IF( ANORM.EQ.ZERO ) THEN
+         RETURN
+      END IF
+*
+      SMLNUM = SLAMCH( 'Safe minimum' )
+*
+*     Estimate the norm of inv(A).
+*
+      AINVNM = ZERO
+      NORMIN = 'N'
+      IF( ONENRM ) THEN
+         KASE1 = 1
+      ELSE
+         KASE1 = 2
+      END IF
+      KASE = 0
+   10 CONTINUE
+      CALL SLACON( N, WORK( N+1 ), WORK, IWORK, AINVNM, KASE )
+      IF( KASE.NE.0 ) THEN
+         IF( KASE.EQ.KASE1 ) THEN
+*
+*           Multiply by inv(L).
+*
+            CALL SLATRS( 'Lower', 'No transpose', 'Unit', NORMIN, N, A,
+     $                   LDA, WORK, SL, WORK( 2*N+1 ), INFO )
+*
+*           Multiply by inv(U).
+*
+            CALL SLATRS( 'Upper', 'No transpose', 'Non-unit', NORMIN, N,
+     $                   A, LDA, WORK, SU, WORK( 3*N+1 ), INFO )
+         ELSE
+*
+*           Multiply by inv(U').
+*
+            CALL SLATRS( 'Upper', 'Transpose', 'Non-unit', NORMIN, N, A,
+     $                   LDA, WORK, SU, WORK( 3*N+1 ), INFO )
+*
+*           Multiply by inv(L').
+*
+            CALL SLATRS( 'Lower', 'Transpose', 'Unit', NORMIN, N, A,
+     $                   LDA, WORK, SL, WORK( 2*N+1 ), INFO )
+         END IF
+*
+*        Divide X by 1/(SL*SU) if doing so will not cause overflow.
+*
+         SCALE = SL*SU
+         NORMIN = 'Y'
+         IF( SCALE.NE.ONE ) THEN
+            IX = ISAMAX( N, WORK, 1 )
+            IF( SCALE.LT.ABS( WORK( IX ) )*SMLNUM .OR. SCALE.EQ.ZERO )
+     $         GO TO 20
+            CALL SRSCL( N, SCALE, WORK, 1 )
+         END IF
+         GO TO 10
+      END IF
+*
+*     Compute the estimate of the reciprocal condition number.
+*
+      IF( AINVNM.NE.ZERO )
+     $   RCOND = ( ONE / AINVNM ) / ANORM
+*
+   20 CONTINUE
+      RETURN
+*
+*     End of SGECON
+*
+      END
diff --git a/costa/native/external/lapack/sgeequ.f b/costa/native/external/lapack/sgeequ.f
new file mode 100644
index 000000000..9008e08de
--- /dev/null
+++ b/costa/native/external/lapack/sgeequ.f
@@ -0,0 +1,226 @@
+      SUBROUTINE SGEEQU( M, N, A, LDA, R, C, ROWCND, COLCND, AMAX,
+     $                   INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993 
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, M, N
+      REAL               AMAX, COLCND, ROWCND
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), C( * ), R( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SGEEQU computes row and column scalings intended to equilibrate an
+*  M-by-N matrix A and reduce its condition number.  R returns the row
+*  scale factors and C the column scale factors, chosen to try to make
+*  the largest element in each row and column of the matrix B with
+*  elements B(i,j)=R(i)*A(i,j)*C(j) have absolute value 1.
+*
+*  R(i) and C(j) are restricted to be between SMLNUM = smallest safe
+*  number and BIGNUM = largest safe number.  Use of these scaling
+*  factors is not guaranteed to reduce the condition number of A but
+*  works well in practice.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  A       (input) REAL array, dimension (LDA,N)
+*          The M-by-N matrix whose equilibration factors are
+*          to be computed.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  R       (output) REAL array, dimension (M)
+*          If INFO = 0 or INFO > M, R contains the row scale factors
+*          for A.
+*
+*  C       (output) REAL array, dimension (N)
+*          If INFO = 0,  C contains the column scale factors for A.
+*
+*  ROWCND  (output) REAL
+*          If INFO = 0 or INFO > M, ROWCND contains the ratio of the
+*          smallest R(i) to the largest R(i).  If ROWCND >= 0.1 and
+*          AMAX is neither too large nor too small, it is not worth
+*          scaling by R.
+*
+*  COLCND  (output) REAL
+*          If INFO = 0, COLCND contains the ratio of the smallest
+*          C(i) to the largest C(i).  If COLCND >= 0.1, it is not
+*          worth scaling by C.
+*
+*  AMAX    (output) REAL
+*          Absolute value of largest matrix element.  If AMAX is very
+*          close to overflow or very close to underflow, the matrix
+*          should be scaled.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i,  and i is
+*                <= M:  the i-th row of A is exactly zero
+*                >  M:  the (i-M)-th column of A is exactly zero
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J
+      REAL               BIGNUM, RCMAX, RCMIN, SMLNUM
+*     ..
+*     .. External Functions ..
+      REAL               SLAMCH
+      EXTERNAL           SLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SGEEQU', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 ) THEN
+         ROWCND = ONE
+         COLCND = ONE
+         AMAX = ZERO
+         RETURN
+      END IF
+*
+*     Get machine constants.
+*
+      SMLNUM = SLAMCH( 'S' )
+      BIGNUM = ONE / SMLNUM
+*
+*     Compute row scale factors.
+*
+      DO 10 I = 1, M
+         R( I ) = ZERO
+   10 CONTINUE
+*
+*     Find the maximum element in each row.
+*
+      DO 30 J = 1, N
+         DO 20 I = 1, M
+            R( I ) = MAX( R( I ), ABS( A( I, J ) ) )
+   20    CONTINUE
+   30 CONTINUE
+*
+*     Find the maximum and minimum scale factors.
+*
+      RCMIN = BIGNUM
+      RCMAX = ZERO
+      DO 40 I = 1, M
+         RCMAX = MAX( RCMAX, R( I ) )
+         RCMIN = MIN( RCMIN, R( I ) )
+   40 CONTINUE
+      AMAX = RCMAX
+*
+      IF( RCMIN.EQ.ZERO ) THEN
+*
+*        Find the first zero scale factor and return an error code.
+*
+         DO 50 I = 1, M
+            IF( R( I ).EQ.ZERO ) THEN
+               INFO = I
+               RETURN
+            END IF
+   50    CONTINUE
+      ELSE
+*
+*        Invert the scale factors.
+*
+         DO 60 I = 1, M
+            R( I ) = ONE / MIN( MAX( R( I ), SMLNUM ), BIGNUM )
+   60    CONTINUE
+*
+*        Compute ROWCND = min(R(I)) / max(R(I))
+*
+         ROWCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM )
+      END IF
+*
+*     Compute column scale factors
+*
+      DO 70 J = 1, N
+         C( J ) = ZERO
+   70 CONTINUE
+*
+*     Find the maximum element in each column,
+*     assuming the row scaling computed above.
+*
+      DO 90 J = 1, N
+         DO 80 I = 1, M
+            C( J ) = MAX( C( J ), ABS( A( I, J ) )*R( I ) )
+   80    CONTINUE
+   90 CONTINUE
+*
+*     Find the maximum and minimum scale factors.
+*
+      RCMIN = BIGNUM
+      RCMAX = ZERO
+      DO 100 J = 1, N
+         RCMIN = MIN( RCMIN, C( J ) )
+         RCMAX = MAX( RCMAX, C( J ) )
+  100 CONTINUE
+*
+      IF( RCMIN.EQ.ZERO ) THEN
+*
+*        Find the first zero scale factor and return an error code.
+*
+         DO 110 J = 1, N
+            IF( C( J ).EQ.ZERO ) THEN
+               INFO = M + J
+               RETURN
+            END IF
+  110    CONTINUE
+      ELSE
+*
+*        Invert the scale factors.
+*
+         DO 120 J = 1, N
+            C( J ) = ONE / MIN( MAX( C( J ), SMLNUM ), BIGNUM )
+  120    CONTINUE
+*
+*        Compute COLCND = min(C(J)) / max(C(J))
+*
+         COLCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM )
+      END IF
+*
+      RETURN
+*
+*     End of SGEEQU
+*
+      END
diff --git a/costa/native/external/lapack/sgees.f b/costa/native/external/lapack/sgees.f
new file mode 100644
index 000000000..0db56d7d0
--- /dev/null
+++ b/costa/native/external/lapack/sgees.f
@@ -0,0 +1,431 @@
+      SUBROUTINE SGEES( JOBVS, SORT, SELECT, N, A, LDA, SDIM, WR, WI,
+     $                  VS, LDVS, WORK, LWORK, BWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBVS, SORT
+      INTEGER            INFO, LDA, LDVS, LWORK, N, SDIM
+*     ..
+*     .. Array Arguments ..
+      LOGICAL            BWORK( * )
+      REAL               A( LDA, * ), VS( LDVS, * ), WI( * ), WORK( * ),
+     $                   WR( * )
+*     ..
+*     .. Function Arguments ..
+      LOGICAL            SELECT
+      EXTERNAL           SELECT
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SGEES computes for an N-by-N real nonsymmetric matrix A, the
+*  eigenvalues, the real Schur form T, and, optionally, the matrix of
+*  Schur vectors Z.  This gives the Schur factorization A = Z*T*(Z**T).
+*
+*  Optionally, it also orders the eigenvalues on the diagonal of the
+*  real Schur form so that selected eigenvalues are at the top left.
+*  The leading columns of Z then form an orthonormal basis for the
+*  invariant subspace corresponding to the selected eigenvalues.
+*
+*  A matrix is in real Schur form if it is upper quasi-triangular with
+*  1-by-1 and 2-by-2 blocks. 2-by-2 blocks will be standardized in the
+*  form
+*          [  a  b  ]
+*          [  c  a  ]
+*
+*  where b*c < 0. The eigenvalues of such a block are a +- sqrt(bc).
+*
+*  Arguments
+*  =========
+*
+*  JOBVS   (input) CHARACTER*1
+*          = 'N': Schur vectors are not computed;
+*          = 'V': Schur vectors are computed.
+*
+*  SORT    (input) CHARACTER*1
+*          Specifies whether or not to order the eigenvalues on the
+*          diagonal of the Schur form.
+*          = 'N': Eigenvalues are not ordered;
+*          = 'S': Eigenvalues are ordered (see SELECT).
+*
+*  SELECT  (input) LOGICAL FUNCTION of two REAL arguments
+*          SELECT must be declared EXTERNAL in the calling subroutine.
+*          If SORT = 'S', SELECT is used to select eigenvalues to sort
+*          to the top left of the Schur form.
+*          If SORT = 'N', SELECT is not referenced.
+*          An eigenvalue WR(j)+sqrt(-1)*WI(j) is selected if
+*          SELECT(WR(j),WI(j)) is true; i.e., if either one of a complex
+*          conjugate pair of eigenvalues is selected, then both complex
+*          eigenvalues are selected.
+*          Note that a selected complex eigenvalue may no longer
+*          satisfy SELECT(WR(j),WI(j)) = .TRUE. after ordering, since
+*          ordering may change the value of complex eigenvalues
+*          (especially if the eigenvalue is ill-conditioned); in this
+*          case INFO is set to N+2 (see INFO below).
+*
+*  N       (input) INTEGER
+*          The order of the matrix A. N >= 0.
+*
+*  A       (input/output) REAL array, dimension (LDA,N)
+*          On entry, the N-by-N matrix A.
+*          On exit, A has been overwritten by its real Schur form T.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  SDIM    (output) INTEGER
+*          If SORT = 'N', SDIM = 0.
+*          If SORT = 'S', SDIM = number of eigenvalues (after sorting)
+*                         for which SELECT is true. (Complex conjugate
+*                         pairs for which SELECT is true for either
+*                         eigenvalue count as 2.)
+*
+*  WR      (output) REAL array, dimension (N)
+*  WI      (output) REAL array, dimension (N)
+*          WR and WI contain the real and imaginary parts,
+*          respectively, of the computed eigenvalues in the same order
+*          that they appear on the diagonal of the output Schur form T.
+*          Complex conjugate pairs of eigenvalues will appear
+*          consecutively with the eigenvalue having the positive
+*          imaginary part first.
+*
+*  VS      (output) REAL array, dimension (LDVS,N)
+*          If JOBVS = 'V', VS contains the orthogonal matrix Z of Schur
+*          vectors.
+*          If JOBVS = 'N', VS is not referenced.
+*
+*  LDVS    (input) INTEGER
+*          The leading dimension of the array VS.  LDVS >= 1; if
+*          JOBVS = 'V', LDVS >= N.
+*
+*  WORK    (workspace/output) REAL array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) contains the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.  LWORK >= max(1,3*N).
+*          For good performance, LWORK must generally be larger.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  BWORK   (workspace) LOGICAL array, dimension (N)
+*          Not referenced if SORT = 'N'.
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value.
+*          > 0: if INFO = i, and i is
+*             <= N: the QR algorithm failed to compute all the
+*                   eigenvalues; elements 1:ILO-1 and i+1:N of WR and WI
+*                   contain those eigenvalues which have converged; if
+*                   JOBVS = 'V', VS contains the matrix which reduces A
+*                   to its partially converged Schur form.
+*             = N+1: the eigenvalues could not be reordered because some
+*                   eigenvalues were too close to separate (the problem
+*                   is very ill-conditioned);
+*             = N+2: after reordering, roundoff changed values of some
+*                   complex eigenvalues so that leading eigenvalues in
+*                   the Schur form no longer satisfy SELECT=.TRUE.  This
+*                   could also be caused by underflow due to scaling.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E0, ONE = 1.0E0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            CURSL, LASTSL, LQUERY, LST2SL, SCALEA, WANTST,
+     $                   WANTVS
+      INTEGER            HSWORK, I, I1, I2, IBAL, ICOND, IERR, IEVAL,
+     $                   IHI, ILO, INXT, IP, ITAU, IWRK, K, MAXB,
+     $                   MAXWRK, MINWRK
+      REAL               ANRM, BIGNUM, CSCALE, EPS, S, SEP, SMLNUM
+*     ..
+*     .. Local Arrays ..
+      INTEGER            IDUM( 1 )
+      REAL               DUM( 1 )
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SCOPY, SGEBAK, SGEBAL, SGEHRD, SHSEQR, SLABAD,
+     $                   SLACPY, SLASCL, SORGHR, SSWAP, STRSEN, XERBLA
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      REAL               SLAMCH, SLANGE
+      EXTERNAL           LSAME, ILAENV, SLAMCH, SLANGE
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      LQUERY = ( LWORK.EQ.-1 )
+      WANTVS = LSAME( JOBVS, 'V' )
+      WANTST = LSAME( SORT, 'S' )
+      IF( ( .NOT.WANTVS ) .AND. ( .NOT.LSAME( JOBVS, 'N' ) ) ) THEN
+         INFO = -1
+      ELSE IF( ( .NOT.WANTST ) .AND. ( .NOT.LSAME( SORT, 'N' ) ) ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -6
+      ELSE IF( LDVS.LT.1 .OR. ( WANTVS .AND. LDVS.LT.N ) ) THEN
+         INFO = -11
+      END IF
+*
+*     Compute workspace
+*      (Note: Comments in the code beginning "Workspace:" describe the
+*       minimal amount of workspace needed at that point in the code,
+*       as well as the preferred amount for good performance.
+*       NB refers to the optimal block size for the immediately
+*       following subroutine, as returned by ILAENV.
+*       HSWORK refers to the workspace preferred by SHSEQR, as
+*       calculated below. HSWORK is computed assuming ILO=1 and IHI=N,
+*       the worst case.)
+*
+      MINWRK = 1
+      IF( INFO.EQ.0 .AND. ( LWORK.GE.1 .OR. LQUERY ) ) THEN
+         MAXWRK = 2*N + N*ILAENV( 1, 'SGEHRD', ' ', N, 1, N, 0 )
+         MINWRK = MAX( 1, 3*N )
+         IF( .NOT.WANTVS ) THEN
+            MAXB = MAX( ILAENV( 8, 'SHSEQR', 'SN', N, 1, N, -1 ), 2 )
+            K = MIN( MAXB, N, MAX( 2, ILAENV( 4, 'SHSEQR', 'SN', N, 1,
+     $          N, -1 ) ) )
+            HSWORK = MAX( K*( K+2 ), 2*N )
+            MAXWRK = MAX( MAXWRK, N+HSWORK, 1 )
+         ELSE
+            MAXWRK = MAX( MAXWRK, 2*N+( N-1 )*
+     $               ILAENV( 1, 'SORGHR', ' ', N, 1, N, -1 ) )
+            MAXB = MAX( ILAENV( 8, 'SHSEQR', 'EN', N, 1, N, -1 ), 2 )
+            K = MIN( MAXB, N, MAX( 2, ILAENV( 4, 'SHSEQR', 'EN', N, 1,
+     $          N, -1 ) ) )
+            HSWORK = MAX( K*( K+2 ), 2*N )
+            MAXWRK = MAX( MAXWRK, N+HSWORK, 1 )
+         END IF
+         WORK( 1 ) = MAXWRK
+      END IF
+      IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN
+         INFO = -13
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SGEES ', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 ) THEN
+         SDIM = 0
+         RETURN
+      END IF
+*
+*     Get machine constants
+*
+      EPS = SLAMCH( 'P' )
+      SMLNUM = SLAMCH( 'S' )
+      BIGNUM = ONE / SMLNUM
+      CALL SLABAD( SMLNUM, BIGNUM )
+      SMLNUM = SQRT( SMLNUM ) / EPS
+      BIGNUM = ONE / SMLNUM
+*
+*     Scale A if max element outside range [SMLNUM,BIGNUM]
+*
+      ANRM = SLANGE( 'M', N, N, A, LDA, DUM )
+      SCALEA = .FALSE.
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
+         SCALEA = .TRUE.
+         CSCALE = SMLNUM
+      ELSE IF( ANRM.GT.BIGNUM ) THEN
+         SCALEA = .TRUE.
+         CSCALE = BIGNUM
+      END IF
+      IF( SCALEA )
+     $   CALL SLASCL( 'G', 0, 0, ANRM, CSCALE, N, N, A, LDA, IERR )
+*
+*     Permute the matrix to make it more nearly triangular
+*     (Workspace: need N)
+*
+      IBAL = 1
+      CALL SGEBAL( 'P', N, A, LDA, ILO, IHI, WORK( IBAL ), IERR )
+*
+*     Reduce to upper Hessenberg form
+*     (Workspace: need 3*N, prefer 2*N+N*NB)
+*
+      ITAU = N + IBAL
+      IWRK = N + ITAU
+      CALL SGEHRD( N, ILO, IHI, A, LDA, WORK( ITAU ), WORK( IWRK ),
+     $             LWORK-IWRK+1, IERR )
+*
+      IF( WANTVS ) THEN
+*
+*        Copy Householder vectors to VS
+*
+         CALL SLACPY( 'L', N, N, A, LDA, VS, LDVS )
+*
+*        Generate orthogonal matrix in VS
+*        (Workspace: need 3*N-1, prefer 2*N+(N-1)*NB)
+*
+         CALL SORGHR( N, ILO, IHI, VS, LDVS, WORK( ITAU ), WORK( IWRK ),
+     $                LWORK-IWRK+1, IERR )
+      END IF
+*
+      SDIM = 0
+*
+*     Perform QR iteration, accumulating Schur vectors in VS if desired
+*     (Workspace: need N+1, prefer N+HSWORK (see comments) )
+*
+      IWRK = ITAU
+      CALL SHSEQR( 'S', JOBVS, N, ILO, IHI, A, LDA, WR, WI, VS, LDVS,
+     $             WORK( IWRK ), LWORK-IWRK+1, IEVAL )
+      IF( IEVAL.GT.0 )
+     $   INFO = IEVAL
+*
+*     Sort eigenvalues if desired
+*
+      IF( WANTST .AND. INFO.EQ.0 ) THEN
+         IF( SCALEA ) THEN
+            CALL SLASCL( 'G', 0, 0, CSCALE, ANRM, N, 1, WR, N, IERR )
+            CALL SLASCL( 'G', 0, 0, CSCALE, ANRM, N, 1, WI, N, IERR )
+         END IF
+         DO 10 I = 1, N
+            BWORK( I ) = SELECT( WR( I ), WI( I ) )
+   10    CONTINUE
+*
+*        Reorder eigenvalues and transform Schur vectors
+*        (Workspace: none needed)
+*
+         CALL STRSEN( 'N', JOBVS, BWORK, N, A, LDA, VS, LDVS, WR, WI,
+     $                SDIM, S, SEP, WORK( IWRK ), LWORK-IWRK+1, IDUM, 1,
+     $                ICOND )
+         IF( ICOND.GT.0 )
+     $      INFO = N + ICOND
+      END IF
+*
+      IF( WANTVS ) THEN
+*
+*        Undo balancing
+*        (Workspace: need N)
+*
+         CALL SGEBAK( 'P', 'R', N, ILO, IHI, WORK( IBAL ), N, VS, LDVS,
+     $                IERR )
+      END IF
+*
+      IF( SCALEA ) THEN
+*
+*        Undo scaling for the Schur form of A
+*
+         CALL SLASCL( 'H', 0, 0, CSCALE, ANRM, N, N, A, LDA, IERR )
+         CALL SCOPY( N, A, LDA+1, WR, 1 )
+         IF( CSCALE.EQ.SMLNUM ) THEN
+*
+*           If scaling back towards underflow, adjust WI if an
+*           offdiagonal element of a 2-by-2 block in the Schur form
+*           underflows.
+*
+            IF( IEVAL.GT.0 ) THEN
+               I1 = IEVAL + 1
+               I2 = IHI - 1
+               CALL SLASCL( 'G', 0, 0, CSCALE, ANRM, ILO-1, 1, WI,
+     $                      MAX( ILO-1, 1 ), IERR )
+            ELSE IF( WANTST ) THEN
+               I1 = 1
+               I2 = N - 1
+            ELSE
+               I1 = ILO
+               I2 = IHI - 1
+            END IF
+            INXT = I1 - 1
+            DO 20 I = I1, I2
+               IF( I.LT.INXT )
+     $            GO TO 20
+               IF( WI( I ).EQ.ZERO ) THEN
+                  INXT = I + 1
+               ELSE
+                  IF( A( I+1, I ).EQ.ZERO ) THEN
+                     WI( I ) = ZERO
+                     WI( I+1 ) = ZERO
+                  ELSE IF( A( I+1, I ).NE.ZERO .AND. A( I, I+1 ).EQ.
+     $                     ZERO ) THEN
+                     WI( I ) = ZERO
+                     WI( I+1 ) = ZERO
+                     IF( I.GT.1 )
+     $                  CALL SSWAP( I-1, A( 1, I ), 1, A( 1, I+1 ), 1 )
+                     IF( N.GT.I+1 )
+     $                  CALL SSWAP( N-I-1, A( I, I+2 ), LDA,
+     $                              A( I+1, I+2 ), LDA )
+                     CALL SSWAP( N, VS( 1, I ), 1, VS( 1, I+1 ), 1 )
+                     A( I, I+1 ) = A( I+1, I )
+                     A( I+1, I ) = ZERO
+                  END IF
+                  INXT = I + 2
+               END IF
+   20       CONTINUE
+         END IF
+*
+*        Undo scaling for the imaginary part of the eigenvalues
+*
+         CALL SLASCL( 'G', 0, 0, CSCALE, ANRM, N-IEVAL, 1,
+     $                WI( IEVAL+1 ), MAX( N-IEVAL, 1 ), IERR )
+      END IF
+*
+      IF( WANTST .AND. INFO.EQ.0 ) THEN
+*
+*        Check if reordering successful
+*
+         LASTSL = .TRUE.
+         LST2SL = .TRUE.
+         SDIM = 0
+         IP = 0
+         DO 30 I = 1, N
+            CURSL = SELECT( WR( I ), WI( I ) )
+            IF( WI( I ).EQ.ZERO ) THEN
+               IF( CURSL )
+     $            SDIM = SDIM + 1
+               IP = 0
+               IF( CURSL .AND. .NOT.LASTSL )
+     $            INFO = N + 2
+            ELSE
+               IF( IP.EQ.1 ) THEN
+*
+*                 Last eigenvalue of conjugate pair
+*
+                  CURSL = CURSL .OR. LASTSL
+                  LASTSL = CURSL
+                  IF( CURSL )
+     $               SDIM = SDIM + 2
+                  IP = -1
+                  IF( CURSL .AND. .NOT.LST2SL )
+     $               INFO = N + 2
+               ELSE
+*
+*                 First eigenvalue of conjugate pair
+*
+                  IP = 1
+               END IF
+            END IF
+            LST2SL = LASTSL
+            LASTSL = CURSL
+   30    CONTINUE
+      END IF
+*
+      WORK( 1 ) = MAXWRK
+      RETURN
+*
+*     End of SGEES
+*
+      END
diff --git a/costa/native/external/lapack/sgeesx.f b/costa/native/external/lapack/sgeesx.f
new file mode 100644
index 000000000..2672bb2bb
--- /dev/null
+++ b/costa/native/external/lapack/sgeesx.f
@@ -0,0 +1,502 @@
+      SUBROUTINE SGEESX( JOBVS, SORT, SELECT, SENSE, N, A, LDA, SDIM,
+     $                   WR, WI, VS, LDVS, RCONDE, RCONDV, WORK, LWORK,
+     $                   IWORK, LIWORK, BWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999 
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBVS, SENSE, SORT
+      INTEGER            INFO, LDA, LDVS, LIWORK, LWORK, N, SDIM
+      REAL               RCONDE, RCONDV
+*     ..
+*     .. Array Arguments ..
+      LOGICAL            BWORK( * )
+      INTEGER            IWORK( * )
+      REAL               A( LDA, * ), VS( LDVS, * ), WI( * ), WORK( * ),
+     $                   WR( * )
+*     ..
+*     .. Function Arguments ..
+      LOGICAL            SELECT
+      EXTERNAL           SELECT
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SGEESX computes for an N-by-N real nonsymmetric matrix A, the
+*  eigenvalues, the real Schur form T, and, optionally, the matrix of
+*  Schur vectors Z.  This gives the Schur factorization A = Z*T*(Z**T).
+*
+*  Optionally, it also orders the eigenvalues on the diagonal of the
+*  real Schur form so that selected eigenvalues are at the top left;
+*  computes a reciprocal condition number for the average of the
+*  selected eigenvalues (RCONDE); and computes a reciprocal condition
+*  number for the right invariant subspace corresponding to the
+*  selected eigenvalues (RCONDV).  The leading columns of Z form an
+*  orthonormal basis for this invariant subspace.
+*
+*  For further explanation of the reciprocal condition numbers RCONDE
+*  and RCONDV, see Section 4.10 of the LAPACK Users' Guide (where
+*  these quantities are called s and sep respectively).
+*
+*  A real matrix is in real Schur form if it is upper quasi-triangular
+*  with 1-by-1 and 2-by-2 blocks. 2-by-2 blocks will be standardized in
+*  the form
+*            [  a  b  ]
+*            [  c  a  ]
+*
+*  where b*c < 0. The eigenvalues of such a block are a +- sqrt(bc).
+*
+*  Arguments
+*  =========
+*
+*  JOBVS   (input) CHARACTER*1
+*          = 'N': Schur vectors are not computed;
+*          = 'V': Schur vectors are computed.
+*
+*  SORT    (input) CHARACTER*1
+*          Specifies whether or not to order the eigenvalues on the
+*          diagonal of the Schur form.
+*          = 'N': Eigenvalues are not ordered;
+*          = 'S': Eigenvalues are ordered (see SELECT).
+*
+*  SELECT  (input) LOGICAL FUNCTION of two REAL arguments
+*          SELECT must be declared EXTERNAL in the calling subroutine.
+*          If SORT = 'S', SELECT is used to select eigenvalues to sort
+*          to the top left of the Schur form.
+*          If SORT = 'N', SELECT is not referenced.
+*          An eigenvalue WR(j)+sqrt(-1)*WI(j) is selected if
+*          SELECT(WR(j),WI(j)) is true; i.e., if either one of a
+*          complex conjugate pair of eigenvalues is selected, then both
+*          are.  Note that a selected complex eigenvalue may no longer
+*          satisfy SELECT(WR(j),WI(j)) = .TRUE. after ordering, since
+*          ordering may change the value of complex eigenvalues
+*          (especially if the eigenvalue is ill-conditioned); in this
+*          case INFO may be set to N+3 (see INFO below).
+*
+*  SENSE   (input) CHARACTER*1
+*          Determines which reciprocal condition numbers are computed.
+*          = 'N': None are computed;
+*          = 'E': Computed for average of selected eigenvalues only;
+*          = 'V': Computed for selected right invariant subspace only;
+*          = 'B': Computed for both.
+*          If SENSE = 'E', 'V' or 'B', SORT must equal 'S'.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A. N >= 0.
+*
+*  A       (input/output) REAL array, dimension (LDA, N)
+*          On entry, the N-by-N matrix A.
+*          On exit, A is overwritten by its real Schur form T.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  SDIM    (output) INTEGER
+*          If SORT = 'N', SDIM = 0.
+*          If SORT = 'S', SDIM = number of eigenvalues (after sorting)
+*                         for which SELECT is true. (Complex conjugate
+*                         pairs for which SELECT is true for either
+*                         eigenvalue count as 2.)
+*
+*  WR      (output) REAL array, dimension (N)
+*  WI      (output) REAL array, dimension (N)
+*          WR and WI contain the real and imaginary parts, respectively,
+*          of the computed eigenvalues, in the same order that they
+*          appear on the diagonal of the output Schur form T.  Complex
+*          conjugate pairs of eigenvalues appear consecutively with the
+*          eigenvalue having the positive imaginary part first.
+*
+*  VS      (output) REAL array, dimension (LDVS,N)
+*          If JOBVS = 'V', VS contains the orthogonal matrix Z of Schur
+*          vectors.
+*          If JOBVS = 'N', VS is not referenced.
+*
+*  LDVS    (input) INTEGER
+*          The leading dimension of the array VS.  LDVS >= 1, and if
+*          JOBVS = 'V', LDVS >= N.
+*
+*  RCONDE  (output) REAL
+*          If SENSE = 'E' or 'B', RCONDE contains the reciprocal
+*          condition number for the average of the selected eigenvalues.
+*          Not referenced if SENSE = 'N' or 'V'.
+*
+*  RCONDV  (output) REAL
+*          If SENSE = 'V' or 'B', RCONDV contains the reciprocal
+*          condition number for the selected right invariant subspace.
+*          Not referenced if SENSE = 'N' or 'E'.
+*
+*  WORK    (workspace/output) REAL array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.  LWORK >= max(1,3*N).
+*          Also, if SENSE = 'E' or 'V' or 'B',
+*          LWORK >= N+2*SDIM*(N-SDIM), where SDIM is the number of
+*          selected eigenvalues computed by this routine.  Note that
+*          N+2*SDIM*(N-SDIM) <= N+N*N/2.
+*          For good performance, LWORK must generally be larger.
+*
+*  IWORK   (workspace/output) INTEGER array, dimension (LIWORK)
+*          Not referenced if SENSE = 'N' or 'E'.
+*          On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK.
+*
+*  LIWORK  (input) INTEGER
+*          The dimension of the array IWORK.
+*          LIWORK >= 1; if SENSE = 'V' or 'B', LIWORK >= SDIM*(N-SDIM).
+*
+*  BWORK   (workspace) LOGICAL array, dimension (N)
+*          Not referenced if SORT = 'N'.
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value.
+*          > 0: if INFO = i, and i is
+*             <= N: the QR algorithm failed to compute all the
+*                   eigenvalues; elements 1:ILO-1 and i+1:N of WR and WI
+*                   contain those eigenvalues which have converged; if
+*                   JOBVS = 'V', VS contains the transformation which
+*                   reduces A to its partially converged Schur form.
+*             = N+1: the eigenvalues could not be reordered because some
+*                   eigenvalues were too close to separate (the problem
+*                   is very ill-conditioned);
+*             = N+2: after reordering, roundoff changed values of some
+*                   complex eigenvalues so that leading eigenvalues in
+*                   the Schur form no longer satisfy SELECT=.TRUE.  This
+*                   could also be caused by underflow due to scaling.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E0, ONE = 1.0E0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            CURSL, LASTSL, LST2SL, SCALEA, WANTSB,
+     $                   WANTSE, WANTSN, WANTST, WANTSV, WANTVS
+      INTEGER            HSWORK, I, I1, I2, IBAL, ICOND, IERR, IEVAL,
+     $                   IHI, ILO, INXT, IP, ITAU, IWRK, K, MAXB,
+     $                   MAXWRK, MINWRK
+      REAL               ANRM, BIGNUM, CSCALE, EPS, SMLNUM
+*     ..
+*     .. Local Arrays ..
+      REAL               DUM( 1 )
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SCOPY, SGEBAK, SGEBAL, SGEHRD, SHSEQR, SLABAD,
+     $                   SLACPY, SLASCL, SORGHR, SSWAP, STRSEN, XERBLA
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      REAL               SLAMCH, SLANGE
+      EXTERNAL           LSAME, ILAENV, SLAMCH, SLANGE
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      WANTVS = LSAME( JOBVS, 'V' )
+      WANTST = LSAME( SORT, 'S' )
+      WANTSN = LSAME( SENSE, 'N' )
+      WANTSE = LSAME( SENSE, 'E' )
+      WANTSV = LSAME( SENSE, 'V' )
+      WANTSB = LSAME( SENSE, 'B' )
+      IF( ( .NOT.WANTVS ) .AND. ( .NOT.LSAME( JOBVS, 'N' ) ) ) THEN
+         INFO = -1
+      ELSE IF( ( .NOT.WANTST ) .AND. ( .NOT.LSAME( SORT, 'N' ) ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.( WANTSN .OR. WANTSE .OR. WANTSV .OR. WANTSB ) .OR.
+     $         ( .NOT.WANTST .AND. .NOT.WANTSN ) ) THEN
+         INFO = -4
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      ELSE IF( LDVS.LT.1 .OR. ( WANTVS .AND. LDVS.LT.N ) ) THEN
+         INFO = -12
+      END IF
+*
+*     Compute workspace
+*      (Note: Comments in the code beginning "RWorkspace:" describe the
+*       minimal amount of real workspace needed at that point in the
+*       code, as well as the preferred amount for good performance.
+*       IWorkspace refers to integer workspace.
+*       NB refers to the optimal block size for the immediately
+*       following subroutine, as returned by ILAENV.
+*       HSWORK refers to the workspace preferred by SHSEQR, as
+*       calculated below. HSWORK is computed assuming ILO=1 and IHI=N,
+*       the worst case.
+*       If SENSE = 'E', 'V' or 'B', then the amount of workspace needed
+*       depends on SDIM, which is computed by the routine STRSEN later
+*       in the code.)
+*
+      MINWRK = 1
+      IF( INFO.EQ.0 .AND. LWORK.GE.1 ) THEN
+         MAXWRK = 2*N + N*ILAENV( 1, 'SGEHRD', ' ', N, 1, N, 0 )
+         MINWRK = MAX( 1, 3*N )
+         IF( .NOT.WANTVS ) THEN
+            MAXB = MAX( ILAENV( 8, 'SHSEQR', 'SN', N, 1, N, -1 ), 2 )
+            K = MIN( MAXB, N, MAX( 2, ILAENV( 4, 'SHSEQR', 'SN', N, 1,
+     $          N, -1 ) ) )
+            HSWORK = MAX( K*( K+2 ), 2*N )
+            MAXWRK = MAX( MAXWRK, N+HSWORK, 1 )
+         ELSE
+            MAXWRK = MAX( MAXWRK, 2*N+( N-1 )*
+     $               ILAENV( 1, 'SORGHR', ' ', N, 1, N, -1 ) )
+            MAXB = MAX( ILAENV( 8, 'SHSEQR', 'SV', N, 1, N, -1 ), 2 )
+            K = MIN( MAXB, N, MAX( 2, ILAENV( 4, 'SHSEQR', 'SV', N, 1,
+     $          N, -1 ) ) )
+            HSWORK = MAX( K*( K+2 ), 2*N )
+            MAXWRK = MAX( MAXWRK, N+HSWORK, 1 )
+         END IF
+         WORK( 1 ) = MAXWRK
+      END IF
+      IF( LWORK.LT.MINWRK ) THEN
+         INFO = -16
+      END IF
+      IF( LIWORK.LT.1 ) THEN
+         INFO = -18
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SGEESX', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 ) THEN
+         SDIM = 0
+         RETURN
+      END IF
+*
+*     Get machine constants
+*
+      EPS = SLAMCH( 'P' )
+      SMLNUM = SLAMCH( 'S' )
+      BIGNUM = ONE / SMLNUM
+      CALL SLABAD( SMLNUM, BIGNUM )
+      SMLNUM = SQRT( SMLNUM ) / EPS
+      BIGNUM = ONE / SMLNUM
+*
+*     Scale A if max element outside range [SMLNUM,BIGNUM]
+*
+      ANRM = SLANGE( 'M', N, N, A, LDA, DUM )
+      SCALEA = .FALSE.
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
+         SCALEA = .TRUE.
+         CSCALE = SMLNUM
+      ELSE IF( ANRM.GT.BIGNUM ) THEN
+         SCALEA = .TRUE.
+         CSCALE = BIGNUM
+      END IF
+      IF( SCALEA )
+     $   CALL SLASCL( 'G', 0, 0, ANRM, CSCALE, N, N, A, LDA, IERR )
+*
+*     Permute the matrix to make it more nearly triangular
+*     (RWorkspace: need N)
+*
+      IBAL = 1
+      CALL SGEBAL( 'P', N, A, LDA, ILO, IHI, WORK( IBAL ), IERR )
+*
+*     Reduce to upper Hessenberg form
+*     (RWorkspace: need 3*N, prefer 2*N+N*NB)
+*
+      ITAU = N + IBAL
+      IWRK = N + ITAU
+      CALL SGEHRD( N, ILO, IHI, A, LDA, WORK( ITAU ), WORK( IWRK ),
+     $             LWORK-IWRK+1, IERR )
+*
+      IF( WANTVS ) THEN
+*
+*        Copy Householder vectors to VS
+*
+         CALL SLACPY( 'L', N, N, A, LDA, VS, LDVS )
+*
+*        Generate orthogonal matrix in VS
+*        (RWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
+*
+         CALL SORGHR( N, ILO, IHI, VS, LDVS, WORK( ITAU ), WORK( IWRK ),
+     $                LWORK-IWRK+1, IERR )
+      END IF
+*
+      SDIM = 0
+*
+*     Perform QR iteration, accumulating Schur vectors in VS if desired
+*     (RWorkspace: need N+1, prefer N+HSWORK (see comments) )
+*
+      IWRK = ITAU
+      CALL SHSEQR( 'S', JOBVS, N, ILO, IHI, A, LDA, WR, WI, VS, LDVS,
+     $             WORK( IWRK ), LWORK-IWRK+1, IEVAL )
+      IF( IEVAL.GT.0 )
+     $   INFO = IEVAL
+*
+*     Sort eigenvalues if desired
+*
+      IF( WANTST .AND. INFO.EQ.0 ) THEN
+         IF( SCALEA ) THEN
+            CALL SLASCL( 'G', 0, 0, CSCALE, ANRM, N, 1, WR, N, IERR )
+            CALL SLASCL( 'G', 0, 0, CSCALE, ANRM, N, 1, WI, N, IERR )
+         END IF
+         DO 10 I = 1, N
+            BWORK( I ) = SELECT( WR( I ), WI( I ) )
+   10    CONTINUE
+*
+*        Reorder eigenvalues, transform Schur vectors, and compute
+*        reciprocal condition numbers
+*        (RWorkspace: if SENSE is not 'N', need N+2*SDIM*(N-SDIM)
+*                     otherwise, need N )
+*        (IWorkspace: if SENSE is 'V' or 'B', need SDIM*(N-SDIM)
+*                     otherwise, need 0 )
+*
+         CALL STRSEN( SENSE, JOBVS, BWORK, N, A, LDA, VS, LDVS, WR, WI,
+     $                SDIM, RCONDE, RCONDV, WORK( IWRK ), LWORK-IWRK+1,
+     $                IWORK, LIWORK, ICOND )
+         IF( .NOT.WANTSN )
+     $      MAXWRK = MAX( MAXWRK, N+2*SDIM*( N-SDIM ) )
+         IF( ICOND.EQ.-15 ) THEN
+*
+*           Not enough real workspace
+*
+            INFO = -16
+         ELSE IF( ICOND.EQ.-17 ) THEN
+*
+*           Not enough integer workspace
+*
+            INFO = -18
+         ELSE IF( ICOND.GT.0 ) THEN
+*
+*           STRSEN failed to reorder or to restore standard Schur form
+*
+            INFO = ICOND + N
+         END IF
+      END IF
+*
+      IF( WANTVS ) THEN
+*
+*        Undo balancing
+*        (RWorkspace: need N)
+*
+         CALL SGEBAK( 'P', 'R', N, ILO, IHI, WORK( IBAL ), N, VS, LDVS,
+     $                IERR )
+      END IF
+*
+      IF( SCALEA ) THEN
+*
+*        Undo scaling for the Schur form of A
+*
+         CALL SLASCL( 'H', 0, 0, CSCALE, ANRM, N, N, A, LDA, IERR )
+         CALL SCOPY( N, A, LDA+1, WR, 1 )
+         IF( ( WANTSV .OR. WANTSB ) .AND. INFO.EQ.0 ) THEN
+            DUM( 1 ) = RCONDV
+            CALL SLASCL( 'G', 0, 0, CSCALE, ANRM, 1, 1, DUM, 1, IERR )
+            RCONDV = DUM( 1 )
+         END IF
+         IF( CSCALE.EQ.SMLNUM ) THEN
+*
+*           If scaling back towards underflow, adjust WI if an
+*           offdiagonal element of a 2-by-2 block in the Schur form
+*           underflows.
+*
+            IF( IEVAL.GT.0 ) THEN
+               I1 = IEVAL + 1
+               I2 = IHI - 1
+               CALL SLASCL( 'G', 0, 0, CSCALE, ANRM, ILO-1, 1, WI, N,
+     $                      IERR )
+            ELSE IF( WANTST ) THEN
+               I1 = 1
+               I2 = N - 1
+            ELSE
+               I1 = ILO
+               I2 = IHI - 1
+            END IF
+            INXT = I1 - 1
+            DO 20 I = I1, I2
+               IF( I.LT.INXT )
+     $            GO TO 20
+               IF( WI( I ).EQ.ZERO ) THEN
+                  INXT = I + 1
+               ELSE
+                  IF( A( I+1, I ).EQ.ZERO ) THEN
+                     WI( I ) = ZERO
+                     WI( I+1 ) = ZERO
+                  ELSE IF( A( I+1, I ).NE.ZERO .AND. A( I, I+1 ).EQ.
+     $                     ZERO ) THEN
+                     WI( I ) = ZERO
+                     WI( I+1 ) = ZERO
+                     IF( I.GT.1 )
+     $                  CALL SSWAP( I-1, A( 1, I ), 1, A( 1, I+1 ), 1 )
+                     IF( N.GT.I+1 )
+     $                  CALL SSWAP( N-I-1, A( I, I+2 ), LDA,
+     $                              A( I+1, I+2 ), LDA )
+                     CALL SSWAP( N, VS( 1, I ), 1, VS( 1, I+1 ), 1 )
+                     A( I, I+1 ) = A( I+1, I )
+                     A( I+1, I ) = ZERO
+                  END IF
+                  INXT = I + 2
+               END IF
+   20       CONTINUE
+         END IF
+         CALL SLASCL( 'G', 0, 0, CSCALE, ANRM, N-IEVAL, 1,
+     $                WI( IEVAL+1 ), MAX( N-IEVAL, 1 ), IERR )
+      END IF
+*
+      IF( WANTST .AND. INFO.EQ.0 ) THEN
+*
+*        Check if reordering successful
+*
+         LASTSL = .TRUE.
+         LST2SL = .TRUE.
+         SDIM = 0
+         IP = 0
+         DO 30 I = 1, N
+            CURSL = SELECT( WR( I ), WI( I ) )
+            IF( WI( I ).EQ.ZERO ) THEN
+               IF( CURSL )
+     $            SDIM = SDIM + 1
+               IP = 0
+               IF( CURSL .AND. .NOT.LASTSL )
+     $            INFO = N + 2
+            ELSE
+               IF( IP.EQ.1 ) THEN
+*
+*                 Last eigenvalue of conjugate pair
+*
+                  CURSL = CURSL .OR. LASTSL
+                  LASTSL = CURSL
+                  IF( CURSL )
+     $               SDIM = SDIM + 2
+                  IP = -1
+                  IF( CURSL .AND. .NOT.LST2SL )
+     $               INFO = N + 2
+               ELSE
+*
+*                 First eigenvalue of conjugate pair
+*
+                  IP = 1
+               END IF
+            END IF
+            LST2SL = LASTSL
+            LASTSL = CURSL
+   30    CONTINUE
+      END IF
+*
+      WORK( 1 ) = MAXWRK
+      IF( WANTSV .OR. WANTSB ) THEN
+         IWORK( 1 ) = SDIM*(N-SDIM)
+      ELSE
+         IWORK( 1 ) = 1
+      END IF
+*
+      RETURN
+*
+*     End of SGEESX
+*
+      END
diff --git a/costa/native/external/lapack/sgeev.f b/costa/native/external/lapack/sgeev.f
new file mode 100644
index 000000000..203402ece
--- /dev/null
+++ b/costa/native/external/lapack/sgeev.f
@@ -0,0 +1,410 @@
+      SUBROUTINE SGEEV( JOBVL, JOBVR, N, A, LDA, WR, WI, VL, LDVL, VR,
+     $                  LDVR, WORK, LWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     December 8, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBVL, JOBVR
+      INTEGER            INFO, LDA, LDVL, LDVR, LWORK, N
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), VL( LDVL, * ), VR( LDVR, * ),
+     $                   WI( * ), WORK( * ), WR( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SGEEV computes for an N-by-N real nonsymmetric matrix A, the
+*  eigenvalues and, optionally, the left and/or right eigenvectors.
+*
+*  The right eigenvector v(j) of A satisfies
+*                   A * v(j) = lambda(j) * v(j)
+*  where lambda(j) is its eigenvalue.
+*  The left eigenvector u(j) of A satisfies
+*                u(j)**H * A = lambda(j) * u(j)**H
+*  where u(j)**H denotes the conjugate transpose of u(j).
+*
+*  The computed eigenvectors are normalized to have Euclidean norm
+*  equal to 1 and largest component real.
+*
+*  Arguments
+*  =========
+*
+*  JOBVL   (input) CHARACTER*1
+*          = 'N': left eigenvectors of A are not computed;
+*          = 'V': left eigenvectors of A are computed.
+*
+*  JOBVR   (input) CHARACTER*1
+*          = 'N': right eigenvectors of A are not computed;
+*          = 'V': right eigenvectors of A are computed.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A. N >= 0.
+*
+*  A       (input/output) REAL array, dimension (LDA,N)
+*          On entry, the N-by-N matrix A.
+*          On exit, A has been overwritten.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  WR      (output) REAL array, dimension (N)
+*  WI      (output) REAL array, dimension (N)
+*          WR and WI contain the real and imaginary parts,
+*          respectively, of the computed eigenvalues.  Complex
+*          conjugate pairs of eigenvalues appear consecutively
+*          with the eigenvalue having the positive imaginary part
+*          first.
+*
+*  VL      (output) REAL array, dimension (LDVL,N)
+*          If JOBVL = 'V', the left eigenvectors u(j) are stored one
+*          after another in the columns of VL, in the same order
+*          as their eigenvalues.
+*          If JOBVL = 'N', VL is not referenced.
+*          If the j-th eigenvalue is real, then u(j) = VL(:,j),
+*          the j-th column of VL.
+*          If the j-th and (j+1)-st eigenvalues form a complex
+*          conjugate pair, then u(j) = VL(:,j) + i*VL(:,j+1) and
+*          u(j+1) = VL(:,j) - i*VL(:,j+1).
+*
+*  LDVL    (input) INTEGER
+*          The leading dimension of the array VL.  LDVL >= 1; if
+*          JOBVL = 'V', LDVL >= N.
+*
+*  VR      (output) REAL array, dimension (LDVR,N)
+*          If JOBVR = 'V', the right eigenvectors v(j) are stored one
+*          after another in the columns of VR, in the same order
+*          as their eigenvalues.
+*          If JOBVR = 'N', VR is not referenced.
+*          If the j-th eigenvalue is real, then v(j) = VR(:,j),
+*          the j-th column of VR.
+*          If the j-th and (j+1)-st eigenvalues form a complex
+*          conjugate pair, then v(j) = VR(:,j) + i*VR(:,j+1) and
+*          v(j+1) = VR(:,j) - i*VR(:,j+1).
+*
+*  LDVR    (input) INTEGER
+*          The leading dimension of the array VR.  LDVR >= 1; if
+*          JOBVR = 'V', LDVR >= N.
+*
+*  WORK    (workspace/output) REAL array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.  LWORK >= max(1,3*N), and
+*          if JOBVL = 'V' or JOBVR = 'V', LWORK >= 4*N.  For good
+*          performance, LWORK must generally be larger.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          > 0:  if INFO = i, the QR algorithm failed to compute all the
+*                eigenvalues, and no eigenvectors have been computed;
+*                elements i+1:N of WR and WI contain eigenvalues which
+*                have converged.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E0, ONE = 1.0E0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY, SCALEA, WANTVL, WANTVR
+      CHARACTER          SIDE
+      INTEGER            HSWORK, I, IBAL, IERR, IHI, ILO, ITAU, IWRK, K,
+     $                   MAXB, MAXWRK, MINWRK, NOUT
+      REAL               ANRM, BIGNUM, CS, CSCALE, EPS, R, SCL, SMLNUM,
+     $                   SN
+*     ..
+*     .. Local Arrays ..
+      LOGICAL            SELECT( 1 )
+      REAL               DUM( 1 )
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SGEBAK, SGEBAL, SGEHRD, SHSEQR, SLABAD, SLACPY,
+     $                   SLARTG, SLASCL, SORGHR, SROT, SSCAL, STREVC,
+     $                   XERBLA
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV, ISAMAX
+      REAL               SLAMCH, SLANGE, SLAPY2, SNRM2
+      EXTERNAL           LSAME, ILAENV, ISAMAX, SLAMCH, SLANGE, SLAPY2,
+     $                   SNRM2
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      LQUERY = ( LWORK.EQ.-1 )
+      WANTVL = LSAME( JOBVL, 'V' )
+      WANTVR = LSAME( JOBVR, 'V' )
+      IF( ( .NOT.WANTVL ) .AND. ( .NOT.LSAME( JOBVL, 'N' ) ) ) THEN
+         INFO = -1
+      ELSE IF( ( .NOT.WANTVR ) .AND. ( .NOT.LSAME( JOBVR, 'N' ) ) ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( LDVL.LT.1 .OR. ( WANTVL .AND. LDVL.LT.N ) ) THEN
+         INFO = -9
+      ELSE IF( LDVR.LT.1 .OR. ( WANTVR .AND. LDVR.LT.N ) ) THEN
+         INFO = -11
+      END IF
+*
+*     Compute workspace
+*      (Note: Comments in the code beginning "Workspace:" describe the
+*       minimal amount of workspace needed at that point in the code,
+*       as well as the preferred amount for good performance.
+*       NB refers to the optimal block size for the immediately
+*       following subroutine, as returned by ILAENV.
+*       HSWORK refers to the workspace preferred by SHSEQR, as
+*       calculated below. HSWORK is computed assuming ILO=1 and IHI=N,
+*       the worst case.)
+*
+      MINWRK = 1
+      IF( INFO.EQ.0 .AND. ( LWORK.GE.1 .OR. LQUERY ) ) THEN
+         MAXWRK = 2*N + N*ILAENV( 1, 'SGEHRD', ' ', N, 1, N, 0 )
+         IF( ( .NOT.WANTVL ) .AND. ( .NOT.WANTVR ) ) THEN
+            MINWRK = MAX( 1, 3*N )
+            MAXB = MAX( ILAENV( 8, 'SHSEQR', 'EN', N, 1, N, -1 ), 2 )
+            K = MIN( MAXB, N, MAX( 2, ILAENV( 4, 'SHSEQR', 'EN', N, 1,
+     $          N, -1 ) ) )
+            HSWORK = MAX( K*( K+2 ), 2*N )
+            MAXWRK = MAX( MAXWRK, N+1, N+HSWORK )
+         ELSE
+            MINWRK = MAX( 1, 4*N )
+            MAXWRK = MAX( MAXWRK, 2*N+( N-1 )*
+     $               ILAENV( 1, 'SORGHR', ' ', N, 1, N, -1 ) )
+            MAXB = MAX( ILAENV( 8, 'SHSEQR', 'SV', N, 1, N, -1 ), 2 )
+            K = MIN( MAXB, N, MAX( 2, ILAENV( 4, 'SHSEQR', 'SV', N, 1,
+     $          N, -1 ) ) )
+            HSWORK = MAX( K*( K+2 ), 2*N )
+            MAXWRK = MAX( MAXWRK, N+1, N+HSWORK )
+            MAXWRK = MAX( MAXWRK, 4*N )
+         END IF
+         WORK( 1 ) = MAXWRK
+      END IF
+      IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN
+         INFO = -13
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SGEEV ', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Get machine constants
+*
+      EPS = SLAMCH( 'P' )
+      SMLNUM = SLAMCH( 'S' )
+      BIGNUM = ONE / SMLNUM
+      CALL SLABAD( SMLNUM, BIGNUM )
+      SMLNUM = SQRT( SMLNUM ) / EPS
+      BIGNUM = ONE / SMLNUM
+*
+*     Scale A if max element outside range [SMLNUM,BIGNUM]
+*
+      ANRM = SLANGE( 'M', N, N, A, LDA, DUM )
+      SCALEA = .FALSE.
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
+         SCALEA = .TRUE.
+         CSCALE = SMLNUM
+      ELSE IF( ANRM.GT.BIGNUM ) THEN
+         SCALEA = .TRUE.
+         CSCALE = BIGNUM
+      END IF
+      IF( SCALEA )
+     $   CALL SLASCL( 'G', 0, 0, ANRM, CSCALE, N, N, A, LDA, IERR )
+*
+*     Balance the matrix
+*     (Workspace: need N)
+*
+      IBAL = 1
+      CALL SGEBAL( 'B', N, A, LDA, ILO, IHI, WORK( IBAL ), IERR )
+*
+*     Reduce to upper Hessenberg form
+*     (Workspace: need 3*N, prefer 2*N+N*NB)
+*
+      ITAU = IBAL + N
+      IWRK = ITAU + N
+      CALL SGEHRD( N, ILO, IHI, A, LDA, WORK( ITAU ), WORK( IWRK ),
+     $             LWORK-IWRK+1, IERR )
+*
+      IF( WANTVL ) THEN
+*
+*        Want left eigenvectors
+*        Copy Householder vectors to VL
+*
+         SIDE = 'L'
+         CALL SLACPY( 'L', N, N, A, LDA, VL, LDVL )
+*
+*        Generate orthogonal matrix in VL
+*        (Workspace: need 3*N-1, prefer 2*N+(N-1)*NB)
+*
+         CALL SORGHR( N, ILO, IHI, VL, LDVL, WORK( ITAU ), WORK( IWRK ),
+     $                LWORK-IWRK+1, IERR )
+*
+*        Perform QR iteration, accumulating Schur vectors in VL
+*        (Workspace: need N+1, prefer N+HSWORK (see comments) )
+*
+         IWRK = ITAU
+         CALL SHSEQR( 'S', 'V', N, ILO, IHI, A, LDA, WR, WI, VL, LDVL,
+     $                WORK( IWRK ), LWORK-IWRK+1, INFO )
+*
+         IF( WANTVR ) THEN
+*
+*           Want left and right eigenvectors
+*           Copy Schur vectors to VR
+*
+            SIDE = 'B'
+            CALL SLACPY( 'F', N, N, VL, LDVL, VR, LDVR )
+         END IF
+*
+      ELSE IF( WANTVR ) THEN
+*
+*        Want right eigenvectors
+*        Copy Householder vectors to VR
+*
+         SIDE = 'R'
+         CALL SLACPY( 'L', N, N, A, LDA, VR, LDVR )
+*
+*        Generate orthogonal matrix in VR
+*        (Workspace: need 3*N-1, prefer 2*N+(N-1)*NB)
+*
+         CALL SORGHR( N, ILO, IHI, VR, LDVR, WORK( ITAU ), WORK( IWRK ),
+     $                LWORK-IWRK+1, IERR )
+*
+*        Perform QR iteration, accumulating Schur vectors in VR
+*        (Workspace: need N+1, prefer N+HSWORK (see comments) )
+*
+         IWRK = ITAU
+         CALL SHSEQR( 'S', 'V', N, ILO, IHI, A, LDA, WR, WI, VR, LDVR,
+     $                WORK( IWRK ), LWORK-IWRK+1, INFO )
+*
+      ELSE
+*
+*        Compute eigenvalues only
+*        (Workspace: need N+1, prefer N+HSWORK (see comments) )
+*
+         IWRK = ITAU
+         CALL SHSEQR( 'E', 'N', N, ILO, IHI, A, LDA, WR, WI, VR, LDVR,
+     $                WORK( IWRK ), LWORK-IWRK+1, INFO )
+      END IF
+*
+*     If INFO > 0 from SHSEQR, then quit
+*
+      IF( INFO.GT.0 )
+     $   GO TO 50
+*
+      IF( WANTVL .OR. WANTVR ) THEN
+*
+*        Compute left and/or right eigenvectors
+*        (Workspace: need 4*N)
+*
+         CALL STREVC( SIDE, 'B', SELECT, N, A, LDA, VL, LDVL, VR, LDVR,
+     $                N, NOUT, WORK( IWRK ), IERR )
+      END IF
+*
+      IF( WANTVL ) THEN
+*
+*        Undo balancing of left eigenvectors
+*        (Workspace: need N)
+*
+         CALL SGEBAK( 'B', 'L', N, ILO, IHI, WORK( IBAL ), N, VL, LDVL,
+     $                IERR )
+*
+*        Normalize left eigenvectors and make largest component real
+*
+         DO 20 I = 1, N
+            IF( WI( I ).EQ.ZERO ) THEN
+               SCL = ONE / SNRM2( N, VL( 1, I ), 1 )
+               CALL SSCAL( N, SCL, VL( 1, I ), 1 )
+            ELSE IF( WI( I ).GT.ZERO ) THEN
+               SCL = ONE / SLAPY2( SNRM2( N, VL( 1, I ), 1 ),
+     $               SNRM2( N, VL( 1, I+1 ), 1 ) )
+               CALL SSCAL( N, SCL, VL( 1, I ), 1 )
+               CALL SSCAL( N, SCL, VL( 1, I+1 ), 1 )
+               DO 10 K = 1, N
+                  WORK( IWRK+K-1 ) = VL( K, I )**2 + VL( K, I+1 )**2
+   10          CONTINUE
+               K = ISAMAX( N, WORK( IWRK ), 1 )
+               CALL SLARTG( VL( K, I ), VL( K, I+1 ), CS, SN, R )
+               CALL SROT( N, VL( 1, I ), 1, VL( 1, I+1 ), 1, CS, SN )
+               VL( K, I+1 ) = ZERO
+            END IF
+   20    CONTINUE
+      END IF
+*
+      IF( WANTVR ) THEN
+*
+*        Undo balancing of right eigenvectors
+*        (Workspace: need N)
+*
+         CALL SGEBAK( 'B', 'R', N, ILO, IHI, WORK( IBAL ), N, VR, LDVR,
+     $                IERR )
+*
+*        Normalize right eigenvectors and make largest component real
+*
+         DO 40 I = 1, N
+            IF( WI( I ).EQ.ZERO ) THEN
+               SCL = ONE / SNRM2( N, VR( 1, I ), 1 )
+               CALL SSCAL( N, SCL, VR( 1, I ), 1 )
+            ELSE IF( WI( I ).GT.ZERO ) THEN
+               SCL = ONE / SLAPY2( SNRM2( N, VR( 1, I ), 1 ),
+     $               SNRM2( N, VR( 1, I+1 ), 1 ) )
+               CALL SSCAL( N, SCL, VR( 1, I ), 1 )
+               CALL SSCAL( N, SCL, VR( 1, I+1 ), 1 )
+               DO 30 K = 1, N
+                  WORK( IWRK+K-1 ) = VR( K, I )**2 + VR( K, I+1 )**2
+   30          CONTINUE
+               K = ISAMAX( N, WORK( IWRK ), 1 )
+               CALL SLARTG( VR( K, I ), VR( K, I+1 ), CS, SN, R )
+               CALL SROT( N, VR( 1, I ), 1, VR( 1, I+1 ), 1, CS, SN )
+               VR( K, I+1 ) = ZERO
+            END IF
+   40    CONTINUE
+      END IF
+*
+*     Undo scaling if necessary
+*
+   50 CONTINUE
+      IF( SCALEA ) THEN
+         CALL SLASCL( 'G', 0, 0, CSCALE, ANRM, N-INFO, 1, WR( INFO+1 ),
+     $                MAX( N-INFO, 1 ), IERR )
+         CALL SLASCL( 'G', 0, 0, CSCALE, ANRM, N-INFO, 1, WI( INFO+1 ),
+     $                MAX( N-INFO, 1 ), IERR )
+         IF( INFO.GT.0 ) THEN
+            CALL SLASCL( 'G', 0, 0, CSCALE, ANRM, ILO-1, 1, WR, N,
+     $                   IERR )
+            CALL SLASCL( 'G', 0, 0, CSCALE, ANRM, ILO-1, 1, WI, N,
+     $                   IERR )
+         END IF
+      END IF
+*
+      WORK( 1 ) = MAXWRK
+      RETURN
+*
+*     End of SGEEV
+*
+      END
diff --git a/costa/native/external/lapack/sgeevx.f b/costa/native/external/lapack/sgeevx.f
new file mode 100644
index 000000000..ab5898c6e
--- /dev/null
+++ b/costa/native/external/lapack/sgeevx.f
@@ -0,0 +1,543 @@
+      SUBROUTINE SGEEVX( BALANC, JOBVL, JOBVR, SENSE, N, A, LDA, WR, WI,
+     $                   VL, LDVL, VR, LDVR, ILO, IHI, SCALE, ABNRM,
+     $                   RCONDE, RCONDV, WORK, LWORK, IWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          BALANC, JOBVL, JOBVR, SENSE
+      INTEGER            IHI, ILO, INFO, LDA, LDVL, LDVR, LWORK, N
+      REAL               ABNRM
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      REAL               A( LDA, * ), RCONDE( * ), RCONDV( * ),
+     $                   SCALE( * ), VL( LDVL, * ), VR( LDVR, * ),
+     $                   WI( * ), WORK( * ), WR( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SGEEVX computes for an N-by-N real nonsymmetric matrix A, the
+*  eigenvalues and, optionally, the left and/or right eigenvectors.
+*
+*  Optionally also, it computes a balancing transformation to improve
+*  the conditioning of the eigenvalues and eigenvectors (ILO, IHI,
+*  SCALE, and ABNRM), reciprocal condition numbers for the eigenvalues
+*  (RCONDE), and reciprocal condition numbers for the right
+*  eigenvectors (RCONDV).
+*
+*  The right eigenvector v(j) of A satisfies
+*                   A * v(j) = lambda(j) * v(j)
+*  where lambda(j) is its eigenvalue.
+*  The left eigenvector u(j) of A satisfies
+*                u(j)**H * A = lambda(j) * u(j)**H
+*  where u(j)**H denotes the conjugate transpose of u(j).
+*
+*  The computed eigenvectors are normalized to have Euclidean norm
+*  equal to 1 and largest component real.
+*
+*  Balancing a matrix means permuting the rows and columns to make it
+*  more nearly upper triangular, and applying a diagonal similarity
+*  transformation D * A * D**(-1), where D is a diagonal matrix, to
+*  make its rows and columns closer in norm and the condition numbers
+*  of its eigenvalues and eigenvectors smaller.  The computed
+*  reciprocal condition numbers correspond to the balanced matrix.
+*  Permuting rows and columns will not change the condition numbers
+*  (in exact arithmetic) but diagonal scaling will.  For further
+*  explanation of balancing, see section 4.10.2 of the LAPACK
+*  Users' Guide.
+*
+*  Arguments
+*  =========
+*
+*  BALANC  (input) CHARACTER*1
+*          Indicates how the input matrix should be diagonally scaled
+*          and/or permuted to improve the conditioning of its
+*          eigenvalues.
+*          = 'N': Do not diagonally scale or permute;
+*          = 'P': Perform permutations to make the matrix more nearly
+*                 upper triangular. Do not diagonally scale;
+*          = 'S': Diagonally scale the matrix, i.e. replace A by
+*                 D*A*D**(-1), where D is a diagonal matrix chosen
+*                 to make the rows and columns of A more equal in
+*                 norm. Do not permute;
+*          = 'B': Both diagonally scale and permute A.
+*
+*          Computed reciprocal condition numbers will be for the matrix
+*          after balancing and/or permuting. Permuting does not change
+*          condition numbers (in exact arithmetic), but balancing does.
+*
+*  JOBVL   (input) CHARACTER*1
+*          = 'N': left eigenvectors of A are not computed;
+*          = 'V': left eigenvectors of A are computed.
+*          If SENSE = 'E' or 'B', JOBVL must = 'V'.
+*
+*  JOBVR   (input) CHARACTER*1
+*          = 'N': right eigenvectors of A are not computed;
+*          = 'V': right eigenvectors of A are computed.
+*          If SENSE = 'E' or 'B', JOBVR must = 'V'.
+*
+*  SENSE   (input) CHARACTER*1
+*          Determines which reciprocal condition numbers are computed.
+*          = 'N': None are computed;
+*          = 'E': Computed for eigenvalues only;
+*          = 'V': Computed for right eigenvectors only;
+*          = 'B': Computed for eigenvalues and right eigenvectors.
+*
+*          If SENSE = 'E' or 'B', both left and right eigenvectors
+*          must also be computed (JOBVL = 'V' and JOBVR = 'V').
+*
+*  N       (input) INTEGER
+*          The order of the matrix A. N >= 0.
+*
+*  A       (input/output) REAL array, dimension (LDA,N)
+*          On entry, the N-by-N matrix A.
+*          On exit, A has been overwritten.  If JOBVL = 'V' or
+*          JOBVR = 'V', A contains the real Schur form of the balanced
+*          version of the input matrix A.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  WR      (output) REAL array, dimension (N)
+*  WI      (output) REAL array, dimension (N)
+*          WR and WI contain the real and imaginary parts,
+*          respectively, of the computed eigenvalues.  Complex
+*          conjugate pairs of eigenvalues will appear consecutively
+*          with the eigenvalue having the positive imaginary part
+*          first.
+*
+*  VL      (output) REAL array, dimension (LDVL,N)
+*          If JOBVL = 'V', the left eigenvectors u(j) are stored one
+*          after another in the columns of VL, in the same order
+*          as their eigenvalues.
+*          If JOBVL = 'N', VL is not referenced.
+*          If the j-th eigenvalue is real, then u(j) = VL(:,j),
+*          the j-th column of VL.
+*          If the j-th and (j+1)-st eigenvalues form a complex
+*          conjugate pair, then u(j) = VL(:,j) + i*VL(:,j+1) and
+*          u(j+1) = VL(:,j) - i*VL(:,j+1).
+*
+*  LDVL    (input) INTEGER
+*          The leading dimension of the array VL.  LDVL >= 1; if
+*          JOBVL = 'V', LDVL >= N.
+*
+*  VR      (output) REAL array, dimension (LDVR,N)
+*          If JOBVR = 'V', the right eigenvectors v(j) are stored one
+*          after another in the columns of VR, in the same order
+*          as their eigenvalues.
+*          If JOBVR = 'N', VR is not referenced.
+*          If the j-th eigenvalue is real, then v(j) = VR(:,j),
+*          the j-th column of VR.
+*          If the j-th and (j+1)-st eigenvalues form a complex
+*          conjugate pair, then v(j) = VR(:,j) + i*VR(:,j+1) and
+*          v(j+1) = VR(:,j) - i*VR(:,j+1).
+*
+*  LDVR    (input) INTEGER
+*          The leading dimension of the array VR.  LDVR >= 1, and if
+*          JOBVR = 'V', LDVR >= N.
+*
+*  ILO,IHI (output) INTEGER
+*          ILO and IHI are integer values determined when A was
+*          balanced.  The balanced A(i,j) = 0 if I > J and 
+*          J = 1,...,ILO-1 or I = IHI+1,...,N.
+*
+*  SCALE   (output) REAL array, dimension (N)
+*          Details of the permutations and scaling factors applied
+*          when balancing A.  If P(j) is the index of the row and column
+*          interchanged with row and column j, and D(j) is the scaling
+*          factor applied to row and column j, then
+*          SCALE(J) = P(J),    for J = 1,...,ILO-1
+*                   = D(J),    for J = ILO,...,IHI
+*                   = P(J)     for J = IHI+1,...,N.
+*          The order in which the interchanges are made is N to IHI+1,
+*          then 1 to ILO-1.
+*
+*  ABNRM   (output) REAL
+*          The one-norm of the balanced matrix (the maximum
+*          of the sum of absolute values of elements of any column).
+*
+*  RCONDE  (output) REAL array, dimension (N)
+*          RCONDE(j) is the reciprocal condition number of the j-th
+*          eigenvalue.
+*
+*  RCONDV  (output) REAL array, dimension (N)
+*          RCONDV(j) is the reciprocal condition number of the j-th
+*          right eigenvector.
+*
+*  WORK    (workspace/output) REAL array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.   If SENSE = 'N' or 'E',
+*          LWORK >= max(1,2*N), and if JOBVL = 'V' or JOBVR = 'V',
+*          LWORK >= 3*N.  If SENSE = 'V' or 'B', LWORK >= N*(N+6).
+*          For good performance, LWORK must generally be larger.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  IWORK   (workspace) INTEGER array, dimension (2*N-2)
+*          If SENSE = 'N' or 'E', not referenced.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          > 0:  if INFO = i, the QR algorithm failed to compute all the
+*                eigenvalues, and no eigenvectors or condition numbers
+*                have been computed; elements 1:ILO-1 and i+1:N of WR
+*                and WI contain eigenvalues which have converged.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E0, ONE = 1.0E0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY, SCALEA, WANTVL, WANTVR, WNTSNB, WNTSNE,
+     $                   WNTSNN, WNTSNV
+      CHARACTER          JOB, SIDE
+      INTEGER            HSWORK, I, ICOND, IERR, ITAU, IWRK, K, MAXB,
+     $                   MAXWRK, MINWRK, NOUT
+      REAL               ANRM, BIGNUM, CS, CSCALE, EPS, R, SCL, SMLNUM,
+     $                   SN
+*     ..
+*     .. Local Arrays ..
+      LOGICAL            SELECT( 1 )
+      REAL               DUM( 1 )
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SGEBAK, SGEBAL, SGEHRD, SHSEQR, SLABAD, SLACPY,
+     $                   SLARTG, SLASCL, SORGHR, SROT, SSCAL, STREVC,
+     $                   STRSNA, XERBLA
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV, ISAMAX
+      REAL               SLAMCH, SLANGE, SLAPY2, SNRM2
+      EXTERNAL           LSAME, ILAENV, ISAMAX, SLAMCH, SLANGE, SLAPY2,
+     $                   SNRM2
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      LQUERY = ( LWORK.EQ.-1 )
+      WANTVL = LSAME( JOBVL, 'V' )
+      WANTVR = LSAME( JOBVR, 'V' )
+      WNTSNN = LSAME( SENSE, 'N' )
+      WNTSNE = LSAME( SENSE, 'E' )
+      WNTSNV = LSAME( SENSE, 'V' )
+      WNTSNB = LSAME( SENSE, 'B' )
+      IF( .NOT.( LSAME( BALANC, 'N' ) .OR. LSAME( BALANC, 'S' ) .OR.
+     $    LSAME( BALANC, 'P' ) .OR. LSAME( BALANC, 'B' ) ) ) THEN
+         INFO = -1
+      ELSE IF( ( .NOT.WANTVL ) .AND. ( .NOT.LSAME( JOBVL, 'N' ) ) ) THEN
+         INFO = -2
+      ELSE IF( ( .NOT.WANTVR ) .AND. ( .NOT.LSAME( JOBVR, 'N' ) ) ) THEN
+         INFO = -3
+      ELSE IF( .NOT.( WNTSNN .OR. WNTSNE .OR. WNTSNB .OR. WNTSNV ) .OR.
+     $         ( ( WNTSNE .OR. WNTSNB ) .AND. .NOT.( WANTVL .AND.
+     $         WANTVR ) ) ) THEN
+         INFO = -4
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      ELSE IF( LDVL.LT.1 .OR. ( WANTVL .AND. LDVL.LT.N ) ) THEN
+         INFO = -11
+      ELSE IF( LDVR.LT.1 .OR. ( WANTVR .AND. LDVR.LT.N ) ) THEN
+         INFO = -13
+      END IF
+*
+*     Compute workspace
+*      (Note: Comments in the code beginning "Workspace:" describe the
+*       minimal amount of workspace needed at that point in the code,
+*       as well as the preferred amount for good performance.
+*       NB refers to the optimal block size for the immediately
+*       following subroutine, as returned by ILAENV.
+*       HSWORK refers to the workspace preferred by SHSEQR, as
+*       calculated below. HSWORK is computed assuming ILO=1 and IHI=N,
+*       the worst case.)
+*
+      MINWRK = 1
+      IF( INFO.EQ.0 .AND. ( LWORK.GE.1 .OR. LQUERY ) ) THEN
+         MAXWRK = N + N*ILAENV( 1, 'SGEHRD', ' ', N, 1, N, 0 )
+         IF( ( .NOT.WANTVL ) .AND. ( .NOT.WANTVR ) ) THEN
+            MINWRK = MAX( 1, 2*N )
+            IF( .NOT.WNTSNN )
+     $         MINWRK = MAX( MINWRK, N*N+6*N )
+            MAXB = MAX( ILAENV( 8, 'SHSEQR', 'SN', N, 1, N, -1 ), 2 )
+            IF( WNTSNN ) THEN
+               K = MIN( MAXB, N, MAX( 2, ILAENV( 4, 'SHSEQR', 'EN', N,
+     $             1, N, -1 ) ) )
+            ELSE
+               K = MIN( MAXB, N, MAX( 2, ILAENV( 4, 'SHSEQR', 'SN', N,
+     $             1, N, -1 ) ) )
+            END IF
+            HSWORK = MAX( K*( K+2 ), 2*N )
+            MAXWRK = MAX( MAXWRK, 1, HSWORK )
+            IF( .NOT.WNTSNN )
+     $         MAXWRK = MAX( MAXWRK, N*N+6*N )
+         ELSE
+            MINWRK = MAX( 1, 3*N )
+            IF( ( .NOT.WNTSNN ) .AND. ( .NOT.WNTSNE ) )
+     $         MINWRK = MAX( MINWRK, N*N+6*N )
+            MAXB = MAX( ILAENV( 8, 'SHSEQR', 'SN', N, 1, N, -1 ), 2 )
+            K = MIN( MAXB, N, MAX( 2, ILAENV( 4, 'SHSEQR', 'EN', N, 1,
+     $          N, -1 ) ) )
+            HSWORK = MAX( K*( K+2 ), 2*N )
+            MAXWRK = MAX( MAXWRK, 1, HSWORK )
+            MAXWRK = MAX( MAXWRK, N+( N-1 )*
+     $               ILAENV( 1, 'SORGHR', ' ', N, 1, N, -1 ) )
+            IF( ( .NOT.WNTSNN ) .AND. ( .NOT.WNTSNE ) )
+     $         MAXWRK = MAX( MAXWRK, N*N+6*N )
+            MAXWRK = MAX( MAXWRK, 3*N, 1 )
+         END IF
+         WORK( 1 ) = MAXWRK
+      END IF
+      IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN
+         INFO = -21
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SGEEVX', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Get machine constants
+*
+      EPS = SLAMCH( 'P' )
+      SMLNUM = SLAMCH( 'S' )
+      BIGNUM = ONE / SMLNUM
+      CALL SLABAD( SMLNUM, BIGNUM )
+      SMLNUM = SQRT( SMLNUM ) / EPS
+      BIGNUM = ONE / SMLNUM
+*
+*     Scale A if max element outside range [SMLNUM,BIGNUM]
+*
+      ICOND = 0
+      ANRM = SLANGE( 'M', N, N, A, LDA, DUM )
+      SCALEA = .FALSE.
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
+         SCALEA = .TRUE.
+         CSCALE = SMLNUM
+      ELSE IF( ANRM.GT.BIGNUM ) THEN
+         SCALEA = .TRUE.
+         CSCALE = BIGNUM
+      END IF
+      IF( SCALEA )
+     $   CALL SLASCL( 'G', 0, 0, ANRM, CSCALE, N, N, A, LDA, IERR )
+*
+*     Balance the matrix and compute ABNRM
+*
+      CALL SGEBAL( BALANC, N, A, LDA, ILO, IHI, SCALE, IERR )
+      ABNRM = SLANGE( '1', N, N, A, LDA, DUM )
+      IF( SCALEA ) THEN
+         DUM( 1 ) = ABNRM
+         CALL SLASCL( 'G', 0, 0, CSCALE, ANRM, 1, 1, DUM, 1, IERR )
+         ABNRM = DUM( 1 )
+      END IF
+*
+*     Reduce to upper Hessenberg form
+*     (Workspace: need 2*N, prefer N+N*NB)
+*
+      ITAU = 1
+      IWRK = ITAU + N
+      CALL SGEHRD( N, ILO, IHI, A, LDA, WORK( ITAU ), WORK( IWRK ),
+     $             LWORK-IWRK+1, IERR )
+*
+      IF( WANTVL ) THEN
+*
+*        Want left eigenvectors
+*        Copy Householder vectors to VL
+*
+         SIDE = 'L'
+         CALL SLACPY( 'L', N, N, A, LDA, VL, LDVL )
+*
+*        Generate orthogonal matrix in VL
+*        (Workspace: need 2*N-1, prefer N+(N-1)*NB)
+*
+         CALL SORGHR( N, ILO, IHI, VL, LDVL, WORK( ITAU ), WORK( IWRK ),
+     $                LWORK-IWRK+1, IERR )
+*
+*        Perform QR iteration, accumulating Schur vectors in VL
+*        (Workspace: need 1, prefer HSWORK (see comments) )
+*
+         IWRK = ITAU
+         CALL SHSEQR( 'S', 'V', N, ILO, IHI, A, LDA, WR, WI, VL, LDVL,
+     $                WORK( IWRK ), LWORK-IWRK+1, INFO )
+*
+         IF( WANTVR ) THEN
+*
+*           Want left and right eigenvectors
+*           Copy Schur vectors to VR
+*
+            SIDE = 'B'
+            CALL SLACPY( 'F', N, N, VL, LDVL, VR, LDVR )
+         END IF
+*
+      ELSE IF( WANTVR ) THEN
+*
+*        Want right eigenvectors
+*        Copy Householder vectors to VR
+*
+         SIDE = 'R'
+         CALL SLACPY( 'L', N, N, A, LDA, VR, LDVR )
+*
+*        Generate orthogonal matrix in VR
+*        (Workspace: need 2*N-1, prefer N+(N-1)*NB)
+*
+         CALL SORGHR( N, ILO, IHI, VR, LDVR, WORK( ITAU ), WORK( IWRK ),
+     $                LWORK-IWRK+1, IERR )
+*
+*        Perform QR iteration, accumulating Schur vectors in VR
+*        (Workspace: need 1, prefer HSWORK (see comments) )
+*
+         IWRK = ITAU
+         CALL SHSEQR( 'S', 'V', N, ILO, IHI, A, LDA, WR, WI, VR, LDVR,
+     $                WORK( IWRK ), LWORK-IWRK+1, INFO )
+*
+      ELSE
+*
+*        Compute eigenvalues only
+*        If condition numbers desired, compute Schur form
+*
+         IF( WNTSNN ) THEN
+            JOB = 'E'
+         ELSE
+            JOB = 'S'
+         END IF
+*
+*        (Workspace: need 1, prefer HSWORK (see comments) )
+*
+         IWRK = ITAU
+         CALL SHSEQR( JOB, 'N', N, ILO, IHI, A, LDA, WR, WI, VR, LDVR,
+     $                WORK( IWRK ), LWORK-IWRK+1, INFO )
+      END IF
+*
+*     If INFO > 0 from SHSEQR, then quit
+*
+      IF( INFO.GT.0 )
+     $   GO TO 50
+*
+      IF( WANTVL .OR. WANTVR ) THEN
+*
+*        Compute left and/or right eigenvectors
+*        (Workspace: need 3*N)
+*
+         CALL STREVC( SIDE, 'B', SELECT, N, A, LDA, VL, LDVL, VR, LDVR,
+     $                N, NOUT, WORK( IWRK ), IERR )
+      END IF
+*
+*     Compute condition numbers if desired
+*     (Workspace: need N*N+6*N unless SENSE = 'E')
+*
+      IF( .NOT.WNTSNN ) THEN
+         CALL STRSNA( SENSE, 'A', SELECT, N, A, LDA, VL, LDVL, VR, LDVR,
+     $                RCONDE, RCONDV, N, NOUT, WORK( IWRK ), N, IWORK,
+     $                ICOND )
+      END IF
+*
+      IF( WANTVL ) THEN
+*
+*        Undo balancing of left eigenvectors
+*
+         CALL SGEBAK( BALANC, 'L', N, ILO, IHI, SCALE, N, VL, LDVL,
+     $                IERR )
+*
+*        Normalize left eigenvectors and make largest component real
+*
+         DO 20 I = 1, N
+            IF( WI( I ).EQ.ZERO ) THEN
+               SCL = ONE / SNRM2( N, VL( 1, I ), 1 )
+               CALL SSCAL( N, SCL, VL( 1, I ), 1 )
+            ELSE IF( WI( I ).GT.ZERO ) THEN
+               SCL = ONE / SLAPY2( SNRM2( N, VL( 1, I ), 1 ),
+     $               SNRM2( N, VL( 1, I+1 ), 1 ) )
+               CALL SSCAL( N, SCL, VL( 1, I ), 1 )
+               CALL SSCAL( N, SCL, VL( 1, I+1 ), 1 )
+               DO 10 K = 1, N
+                  WORK( K ) = VL( K, I )**2 + VL( K, I+1 )**2
+   10          CONTINUE
+               K = ISAMAX( N, WORK, 1 )
+               CALL SLARTG( VL( K, I ), VL( K, I+1 ), CS, SN, R )
+               CALL SROT( N, VL( 1, I ), 1, VL( 1, I+1 ), 1, CS, SN )
+               VL( K, I+1 ) = ZERO
+            END IF
+   20    CONTINUE
+      END IF
+*
+      IF( WANTVR ) THEN
+*
+*        Undo balancing of right eigenvectors
+*
+         CALL SGEBAK( BALANC, 'R', N, ILO, IHI, SCALE, N, VR, LDVR,
+     $                IERR )
+*
+*        Normalize right eigenvectors and make largest component real
+*
+         DO 40 I = 1, N
+            IF( WI( I ).EQ.ZERO ) THEN
+               SCL = ONE / SNRM2( N, VR( 1, I ), 1 )
+               CALL SSCAL( N, SCL, VR( 1, I ), 1 )
+            ELSE IF( WI( I ).GT.ZERO ) THEN
+               SCL = ONE / SLAPY2( SNRM2( N, VR( 1, I ), 1 ),
+     $               SNRM2( N, VR( 1, I+1 ), 1 ) )
+               CALL SSCAL( N, SCL, VR( 1, I ), 1 )
+               CALL SSCAL( N, SCL, VR( 1, I+1 ), 1 )
+               DO 30 K = 1, N
+                  WORK( K ) = VR( K, I )**2 + VR( K, I+1 )**2
+   30          CONTINUE
+               K = ISAMAX( N, WORK, 1 )
+               CALL SLARTG( VR( K, I ), VR( K, I+1 ), CS, SN, R )
+               CALL SROT( N, VR( 1, I ), 1, VR( 1, I+1 ), 1, CS, SN )
+               VR( K, I+1 ) = ZERO
+            END IF
+   40    CONTINUE
+      END IF
+*
+*     Undo scaling if necessary
+*
+   50 CONTINUE
+      IF( SCALEA ) THEN
+         CALL SLASCL( 'G', 0, 0, CSCALE, ANRM, N-INFO, 1, WR( INFO+1 ),
+     $                MAX( N-INFO, 1 ), IERR )
+         CALL SLASCL( 'G', 0, 0, CSCALE, ANRM, N-INFO, 1, WI( INFO+1 ),
+     $                MAX( N-INFO, 1 ), IERR )
+         IF( INFO.EQ.0 ) THEN
+            IF( ( WNTSNV .OR. WNTSNB ) .AND. ICOND.EQ.0 )
+     $         CALL SLASCL( 'G', 0, 0, CSCALE, ANRM, N, 1, RCONDV, N,
+     $                      IERR )
+         ELSE
+            CALL SLASCL( 'G', 0, 0, CSCALE, ANRM, ILO-1, 1, WR, N,
+     $                   IERR )
+            CALL SLASCL( 'G', 0, 0, CSCALE, ANRM, ILO-1, 1, WI, N,
+     $                   IERR )
+         END IF
+      END IF
+*
+      WORK( 1 ) = MAXWRK
+      RETURN
+*
+*     End of SGEEVX
+*
+      END
diff --git a/costa/native/external/lapack/sgegs.f b/costa/native/external/lapack/sgegs.f
new file mode 100644
index 000000000..c5f9237b2
--- /dev/null
+++ b/costa/native/external/lapack/sgegs.f
@@ -0,0 +1,470 @@
+      SUBROUTINE SGEGS( JOBVSL, JOBVSR, N, A, LDA, B, LDB, ALPHAR,
+     $                  ALPHAI, BETA, VSL, LDVSL, VSR, LDVSR, WORK,
+     $                  LWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBVSL, JOBVSR
+      INTEGER            INFO, LDA, LDB, LDVSL, LDVSR, LWORK, N
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), ALPHAI( * ), ALPHAR( * ),
+     $                   B( LDB, * ), BETA( * ), VSL( LDVSL, * ),
+     $                   VSR( LDVSR, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  This routine is deprecated and has been replaced by routine SGGES.
+*
+*  SGEGS computes for a pair of N-by-N real nonsymmetric matrices A, B:
+*  the generalized eigenvalues (alphar +/- alphai*i, beta), the real
+*  Schur form (A, B), and optionally left and/or right Schur vectors
+*  (VSL and VSR).
+*
+*  (If only the generalized eigenvalues are needed, use the driver SGEGV
+*  instead.)
+*
+*  A generalized eigenvalue for a pair of matrices (A,B) is, roughly
+*  speaking, a scalar w or a ratio  alpha/beta = w, such that  A - w*B
+*  is singular.  It is usually represented as the pair (alpha,beta),
+*  as there is a reasonable interpretation for beta=0, and even for
+*  both being zero.  A good beginning reference is the book, "Matrix
+*  Computations", by G. Golub & C. van Loan (Johns Hopkins U. Press)
+*
+*  The (generalized) Schur form of a pair of matrices is the result of
+*  multiplying both matrices on the left by one orthogonal matrix and
+*  both on the right by another orthogonal matrix, these two orthogonal
+*  matrices being chosen so as to bring the pair of matrices into
+*  (real) Schur form.
+*
+*  A pair of matrices A, B is in generalized real Schur form if B is
+*  upper triangular with non-negative diagonal and A is block upper
+*  triangular with 1-by-1 and 2-by-2 blocks.  1-by-1 blocks correspond
+*  to real generalized eigenvalues, while 2-by-2 blocks of A will be
+*  "standardized" by making the corresponding elements of B have the
+*  form:
+*          [  a  0  ]
+*          [  0  b  ]
+*
+*  and the pair of corresponding 2-by-2 blocks in A and B will
+*  have a complex conjugate pair of generalized eigenvalues.
+*
+*  The left and right Schur vectors are the columns of VSL and VSR,
+*  respectively, where VSL and VSR are the orthogonal matrices
+*  which reduce A and B to Schur form:
+*
+*  Schur form of (A,B) = ( (VSL)**T A (VSR), (VSL)**T B (VSR) )
+*
+*  Arguments
+*  =========
+*
+*  JOBVSL  (input) CHARACTER*1
+*          = 'N':  do not compute the left Schur vectors;
+*          = 'V':  compute the left Schur vectors.
+*
+*  JOBVSR  (input) CHARACTER*1
+*          = 'N':  do not compute the right Schur vectors;
+*          = 'V':  compute the right Schur vectors.
+*
+*  N       (input) INTEGER
+*          The order of the matrices A, B, VSL, and VSR.  N >= 0.
+*
+*  A       (input/output) REAL array, dimension (LDA, N)
+*          On entry, the first of the pair of matrices whose generalized
+*          eigenvalues and (optionally) Schur vectors are to be
+*          computed.
+*          On exit, the generalized Schur form of A.
+*          Note: to avoid overflow, the Frobenius norm of the matrix
+*          A should be less than the overflow threshold.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of A.  LDA >= max(1,N).
+*
+*  B       (input/output) REAL array, dimension (LDB, N)
+*          On entry, the second of the pair of matrices whose
+*          generalized eigenvalues and (optionally) Schur vectors are
+*          to be computed.
+*          On exit, the generalized Schur form of B.
+*          Note: to avoid overflow, the Frobenius norm of the matrix
+*          B should be less than the overflow threshold.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of B.  LDB >= max(1,N).
+*
+*  ALPHAR  (output) REAL array, dimension (N)
+*  ALPHAI  (output) REAL array, dimension (N)
+*  BETA    (output) REAL array, dimension (N)
+*          On exit, (ALPHAR(j) + ALPHAI(j)*i)/BETA(j), j=1,...,N, will
+*          be the generalized eigenvalues.  ALPHAR(j) + ALPHAI(j)*i,
+*          j=1,...,N  and  BETA(j),j=1,...,N  are the diagonals of the
+*          complex Schur form (A,B) that would result if the 2-by-2
+*          diagonal blocks of the real Schur form of (A,B) were further
+*          reduced to triangular form using 2-by-2 complex unitary
+*          transformations.  If ALPHAI(j) is zero, then the j-th
+*          eigenvalue is real; if positive, then the j-th and (j+1)-st
+*          eigenvalues are a complex conjugate pair, with ALPHAI(j+1)
+*          negative.
+*
+*          Note: the quotients ALPHAR(j)/BETA(j) and ALPHAI(j)/BETA(j)
+*          may easily over- or underflow, and BETA(j) may even be zero.
+*          Thus, the user should avoid naively computing the ratio
+*          alpha/beta.  However, ALPHAR and ALPHAI will be always less
+*          than and usually comparable with norm(A) in magnitude, and
+*          BETA always less than and usually comparable with norm(B).
+*
+*  VSL     (output) REAL array, dimension (LDVSL,N)
+*          If JOBVSL = 'V', VSL will contain the left Schur vectors.
+*          (See "Purpose", above.)
+*          Not referenced if JOBVSL = 'N'.
+*
+*  LDVSL   (input) INTEGER
+*          The leading dimension of the matrix VSL. LDVSL >=1, and
+*          if JOBVSL = 'V', LDVSL >= N.
+*
+*  VSR     (output) REAL array, dimension (LDVSR,N)
+*          If JOBVSR = 'V', VSR will contain the right Schur vectors.
+*          (See "Purpose", above.)
+*          Not referenced if JOBVSR = 'N'.
+*
+*  LDVSR   (input) INTEGER
+*          The leading dimension of the matrix VSR. LDVSR >= 1, and
+*          if JOBVSR = 'V', LDVSR >= N.
+*
+*  WORK    (workspace/output) REAL array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.  LWORK >= max(1,4*N).
+*          For good performance, LWORK must generally be larger.
+*          To compute the optimal value of LWORK, call ILAENV to get
+*          blocksizes (for SGEQRF, SORMQR, and SORGQR.)  Then compute:
+*          NB  -- MAX of the blocksizes for SGEQRF, SORMQR, and SORGQR
+*          The optimal LWORK is  2*N + N*(NB+1).
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          = 1,...,N:
+*                The QZ iteration failed.  (A,B) are not in Schur
+*                form, but ALPHAR(j), ALPHAI(j), and BETA(j) should
+*                be correct for j=INFO+1,...,N.
+*          > N:  errors that usually indicate LAPACK problems:
+*                =N+1: error return from SGGBAL
+*                =N+2: error return from SGEQRF
+*                =N+3: error return from SORMQR
+*                =N+4: error return from SORGQR
+*                =N+5: error return from SGGHRD
+*                =N+6: error return from SHGEQZ (other than failed
+*                                                iteration)
+*                =N+7: error return from SGGBAK (computing VSL)
+*                =N+8: error return from SGGBAK (computing VSR)
+*                =N+9: error return from SLASCL (various places)
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E0, ONE = 1.0E0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            ILASCL, ILBSCL, ILVSL, ILVSR, LQUERY
+      INTEGER            ICOLS, IHI, IINFO, IJOBVL, IJOBVR, ILEFT,
+     $                   ILO, IRIGHT, IROWS, ITAU, IWORK, LOPT, LWKMIN,
+     $                   LWKOPT, NB, NB1, NB2, NB3
+      REAL               ANRM, ANRMTO, BIGNUM, BNRM, BNRMTO, EPS,
+     $                   SAFMIN, SMLNUM
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SGEQRF, SGGBAK, SGGBAL, SGGHRD, SHGEQZ, SLACPY,
+     $                   SLASCL, SLASET, SORGQR, SORMQR, XERBLA
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      REAL               SLAMCH, SLANGE
+      EXTERNAL           ILAENV, LSAME, SLAMCH, SLANGE
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          INT, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode the input arguments
+*
+      IF( LSAME( JOBVSL, 'N' ) ) THEN
+         IJOBVL = 1
+         ILVSL = .FALSE.
+      ELSE IF( LSAME( JOBVSL, 'V' ) ) THEN
+         IJOBVL = 2
+         ILVSL = .TRUE.
+      ELSE
+         IJOBVL = -1
+         ILVSL = .FALSE.
+      END IF
+*
+      IF( LSAME( JOBVSR, 'N' ) ) THEN
+         IJOBVR = 1
+         ILVSR = .FALSE.
+      ELSE IF( LSAME( JOBVSR, 'V' ) ) THEN
+         IJOBVR = 2
+         ILVSR = .TRUE.
+      ELSE
+         IJOBVR = -1
+         ILVSR = .FALSE.
+      END IF
+*
+*     Test the input arguments
+*
+      LWKMIN = MAX( 4*N, 1 )
+      LWKOPT = LWKMIN
+      WORK( 1 ) = LWKOPT
+      LQUERY = ( LWORK.EQ.-1 )
+      INFO = 0
+      IF( IJOBVL.LE.0 ) THEN
+         INFO = -1
+      ELSE IF( IJOBVR.LE.0 ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      ELSE IF( LDVSL.LT.1 .OR. ( ILVSL .AND. LDVSL.LT.N ) ) THEN
+         INFO = -12
+      ELSE IF( LDVSR.LT.1 .OR. ( ILVSR .AND. LDVSR.LT.N ) ) THEN
+         INFO = -14
+      ELSE IF( LWORK.LT.LWKMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -16
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         NB1 = ILAENV( 1, 'SGEQRF', ' ', N, N, -1, -1 )
+         NB2 = ILAENV( 1, 'SORMQR', ' ', N, N, N, -1 )
+         NB3 = ILAENV( 1, 'SORGQR', ' ', N, N, N, -1 )
+         NB = MAX( NB1, NB2, NB3 )
+         LOPT = 2*N+N*(NB+1)
+         WORK( 1 ) = LOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SGEGS ', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Get machine constants
+*
+      EPS = SLAMCH( 'E' )*SLAMCH( 'B' )
+      SAFMIN = SLAMCH( 'S' )
+      SMLNUM = N*SAFMIN / EPS
+      BIGNUM = ONE / SMLNUM
+*
+*     Scale A if max element outside range [SMLNUM,BIGNUM]
+*
+      ANRM = SLANGE( 'M', N, N, A, LDA, WORK )
+      ILASCL = .FALSE.
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
+         ANRMTO = SMLNUM
+         ILASCL = .TRUE.
+      ELSE IF( ANRM.GT.BIGNUM ) THEN
+         ANRMTO = BIGNUM
+         ILASCL = .TRUE.
+      END IF
+*
+      IF( ILASCL ) THEN
+         CALL SLASCL( 'G', -1, -1, ANRM, ANRMTO, N, N, A, LDA, IINFO )
+         IF( IINFO.NE.0 ) THEN
+            INFO = N + 9
+            RETURN
+         END IF
+      END IF
+*
+*     Scale B if max element outside range [SMLNUM,BIGNUM]
+*
+      BNRM = SLANGE( 'M', N, N, B, LDB, WORK )
+      ILBSCL = .FALSE.
+      IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN
+         BNRMTO = SMLNUM
+         ILBSCL = .TRUE.
+      ELSE IF( BNRM.GT.BIGNUM ) THEN
+         BNRMTO = BIGNUM
+         ILBSCL = .TRUE.
+      END IF
+*
+      IF( ILBSCL ) THEN
+         CALL SLASCL( 'G', -1, -1, BNRM, BNRMTO, N, N, B, LDB, IINFO )
+         IF( IINFO.NE.0 ) THEN
+            INFO = N + 9
+            RETURN
+         END IF
+      END IF
+*
+*     Permute the matrix to make it more nearly triangular
+*     Workspace layout:  (2*N words -- "work..." not actually used)
+*        left_permutation, right_permutation, work...
+*
+      ILEFT = 1
+      IRIGHT = N + 1
+      IWORK = IRIGHT + N
+      CALL SGGBAL( 'P', N, A, LDA, B, LDB, ILO, IHI, WORK( ILEFT ),
+     $             WORK( IRIGHT ), WORK( IWORK ), IINFO )
+      IF( IINFO.NE.0 ) THEN
+         INFO = N + 1
+         GO TO 10
+      END IF
+*
+*     Reduce B to triangular form, and initialize VSL and/or VSR
+*     Workspace layout:  ("work..." must have at least N words)
+*        left_permutation, right_permutation, tau, work...
+*
+      IROWS = IHI + 1 - ILO
+      ICOLS = N + 1 - ILO
+      ITAU = IWORK
+      IWORK = ITAU + IROWS
+      CALL SGEQRF( IROWS, ICOLS, B( ILO, ILO ), LDB, WORK( ITAU ),
+     $             WORK( IWORK ), LWORK+1-IWORK, IINFO )
+      IF( IINFO.GE.0 )
+     $   LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 )
+      IF( IINFO.NE.0 ) THEN
+         INFO = N + 2
+         GO TO 10
+      END IF
+*
+      CALL SORMQR( 'L', 'T', IROWS, ICOLS, IROWS, B( ILO, ILO ), LDB,
+     $             WORK( ITAU ), A( ILO, ILO ), LDA, WORK( IWORK ),
+     $             LWORK+1-IWORK, IINFO )
+      IF( IINFO.GE.0 )
+     $   LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 )
+      IF( IINFO.NE.0 ) THEN
+         INFO = N + 3
+         GO TO 10
+      END IF
+*
+      IF( ILVSL ) THEN
+         CALL SLASET( 'Full', N, N, ZERO, ONE, VSL, LDVSL )
+         CALL SLACPY( 'L', IROWS-1, IROWS-1, B( ILO+1, ILO ), LDB,
+     $                VSL( ILO+1, ILO ), LDVSL )
+         CALL SORGQR( IROWS, IROWS, IROWS, VSL( ILO, ILO ), LDVSL,
+     $                WORK( ITAU ), WORK( IWORK ), LWORK+1-IWORK,
+     $                IINFO )
+         IF( IINFO.GE.0 )
+     $      LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 )
+         IF( IINFO.NE.0 ) THEN
+            INFO = N + 4
+            GO TO 10
+         END IF
+      END IF
+*
+      IF( ILVSR )
+     $   CALL SLASET( 'Full', N, N, ZERO, ONE, VSR, LDVSR )
+*
+*     Reduce to generalized Hessenberg form
+*
+      CALL SGGHRD( JOBVSL, JOBVSR, N, ILO, IHI, A, LDA, B, LDB, VSL,
+     $             LDVSL, VSR, LDVSR, IINFO )
+      IF( IINFO.NE.0 ) THEN
+         INFO = N + 5
+         GO TO 10
+      END IF
+*
+*     Perform QZ algorithm, computing Schur vectors if desired
+*     Workspace layout:  ("work..." must have at least 1 word)
+*        left_permutation, right_permutation, work...
+*
+      IWORK = ITAU
+      CALL SHGEQZ( 'S', JOBVSL, JOBVSR, N, ILO, IHI, A, LDA, B, LDB,
+     $             ALPHAR, ALPHAI, BETA, VSL, LDVSL, VSR, LDVSR,
+     $             WORK( IWORK ), LWORK+1-IWORK, IINFO )
+      IF( IINFO.GE.0 )
+     $   LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 )
+      IF( IINFO.NE.0 ) THEN
+         IF( IINFO.GT.0 .AND. IINFO.LE.N ) THEN
+            INFO = IINFO
+         ELSE IF( IINFO.GT.N .AND. IINFO.LE.2*N ) THEN
+            INFO = IINFO - N
+         ELSE
+            INFO = N + 6
+         END IF
+         GO TO 10
+      END IF
+*
+*     Apply permutation to VSL and VSR
+*
+      IF( ILVSL ) THEN
+         CALL SGGBAK( 'P', 'L', N, ILO, IHI, WORK( ILEFT ),
+     $                WORK( IRIGHT ), N, VSL, LDVSL, IINFO )
+         IF( IINFO.NE.0 ) THEN
+            INFO = N + 7
+            GO TO 10
+         END IF
+      END IF
+      IF( ILVSR ) THEN
+         CALL SGGBAK( 'P', 'R', N, ILO, IHI, WORK( ILEFT ),
+     $                WORK( IRIGHT ), N, VSR, LDVSR, IINFO )
+         IF( IINFO.NE.0 ) THEN
+            INFO = N + 8
+            GO TO 10
+         END IF
+      END IF
+*
+*     Undo scaling
+*
+      IF( ILASCL ) THEN
+         CALL SLASCL( 'H', -1, -1, ANRMTO, ANRM, N, N, A, LDA, IINFO )
+         IF( IINFO.NE.0 ) THEN
+            INFO = N + 9
+            RETURN
+         END IF
+         CALL SLASCL( 'G', -1, -1, ANRMTO, ANRM, N, 1, ALPHAR, N,
+     $                IINFO )
+         IF( IINFO.NE.0 ) THEN
+            INFO = N + 9
+            RETURN
+         END IF
+         CALL SLASCL( 'G', -1, -1, ANRMTO, ANRM, N, 1, ALPHAI, N,
+     $                IINFO )
+         IF( IINFO.NE.0 ) THEN
+            INFO = N + 9
+            RETURN
+         END IF
+      END IF
+*
+      IF( ILBSCL ) THEN
+         CALL SLASCL( 'U', -1, -1, BNRMTO, BNRM, N, N, B, LDB, IINFO )
+         IF( IINFO.NE.0 ) THEN
+            INFO = N + 9
+            RETURN
+         END IF
+         CALL SLASCL( 'G', -1, -1, BNRMTO, BNRM, N, 1, BETA, N, IINFO )
+         IF( IINFO.NE.0 ) THEN
+            INFO = N + 9
+            RETURN
+         END IF
+      END IF
+*
+   10 CONTINUE
+      WORK( 1 ) = LWKOPT
+*
+      RETURN
+*
+*     End of SGEGS
+*
+      END
diff --git a/costa/native/external/lapack/sgegv.f b/costa/native/external/lapack/sgegv.f
new file mode 100644
index 000000000..ab5a4953a
--- /dev/null
+++ b/costa/native/external/lapack/sgegv.f
@@ -0,0 +1,641 @@
+      SUBROUTINE SGEGV( JOBVL, JOBVR, N, A, LDA, B, LDB, ALPHAR, ALPHAI,
+     $                  BETA, VL, LDVL, VR, LDVR, WORK, LWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBVL, JOBVR
+      INTEGER            INFO, LDA, LDB, LDVL, LDVR, LWORK, N
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), ALPHAI( * ), ALPHAR( * ),
+     $                   B( LDB, * ), BETA( * ), VL( LDVL, * ),
+     $                   VR( LDVR, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  This routine is deprecated and has been replaced by routine SGGEV.
+*
+*  SGEGV computes for a pair of n-by-n real nonsymmetric matrices A and
+*  B, the generalized eigenvalues (alphar +/- alphai*i, beta), and
+*  optionally, the left and/or right generalized eigenvectors (VL and
+*  VR).
+*
+*  A generalized eigenvalue for a pair of matrices (A,B) is, roughly
+*  speaking, a scalar w or a ratio  alpha/beta = w, such that  A - w*B
+*  is singular.  It is usually represented as the pair (alpha,beta),
+*  as there is a reasonable interpretation for beta=0, and even for
+*  both being zero.  A good beginning reference is the book, "Matrix
+*  Computations", by G. Golub & C. van Loan (Johns Hopkins U. Press)
+*
+*  A right generalized eigenvector corresponding to a generalized
+*  eigenvalue  w  for a pair of matrices (A,B) is a vector  r  such
+*  that  (A - w B) r = 0 .  A left generalized eigenvector is a vector
+*  l such that l**H * (A - w B) = 0, where l**H is the
+*  conjugate-transpose of l.
+*
+*  Note: this routine performs "full balancing" on A and B -- see
+*  "Further Details", below.
+*
+*  Arguments
+*  =========
+*
+*  JOBVL   (input) CHARACTER*1
+*          = 'N':  do not compute the left generalized eigenvectors;
+*          = 'V':  compute the left generalized eigenvectors.
+*
+*  JOBVR   (input) CHARACTER*1
+*          = 'N':  do not compute the right generalized eigenvectors;
+*          = 'V':  compute the right generalized eigenvectors.
+*
+*  N       (input) INTEGER
+*          The order of the matrices A, B, VL, and VR.  N >= 0.
+*
+*  A       (input/output) REAL array, dimension (LDA, N)
+*          On entry, the first of the pair of matrices whose
+*          generalized eigenvalues and (optionally) generalized
+*          eigenvectors are to be computed.
+*          On exit, the contents will have been destroyed.  (For a
+*          description of the contents of A on exit, see "Further
+*          Details", below.)
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of A.  LDA >= max(1,N).
+*
+*  B       (input/output) REAL array, dimension (LDB, N)
+*          On entry, the second of the pair of matrices whose
+*          generalized eigenvalues and (optionally) generalized
+*          eigenvectors are to be computed.
+*          On exit, the contents will have been destroyed.  (For a
+*          description of the contents of B on exit, see "Further
+*          Details", below.)
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of B.  LDB >= max(1,N).
+*
+*  ALPHAR  (output) REAL array, dimension (N)
+*  ALPHAI  (output) REAL array, dimension (N)
+*  BETA    (output) REAL array, dimension (N)
+*          On exit, (ALPHAR(j) + ALPHAI(j)*i)/BETA(j), j=1,...,N, will
+*          be the generalized eigenvalues.  If ALPHAI(j) is zero, then
+*          the j-th eigenvalue is real; if positive, then the j-th and
+*          (j+1)-st eigenvalues are a complex conjugate pair, with
+*          ALPHAI(j+1) negative.
+*
+*          Note: the quotients ALPHAR(j)/BETA(j) and ALPHAI(j)/BETA(j)
+*          may easily over- or underflow, and BETA(j) may even be zero.
+*          Thus, the user should avoid naively computing the ratio
+*          alpha/beta.  However, ALPHAR and ALPHAI will be always less
+*          than and usually comparable with norm(A) in magnitude, and
+*          BETA always less than and usually comparable with norm(B).
+*
+*  VL      (output) REAL array, dimension (LDVL,N)
+*          If JOBVL = 'V', the left generalized eigenvectors.  (See
+*          "Purpose", above.)  Real eigenvectors take one column,
+*          complex take two columns, the first for the real part and
+*          the second for the imaginary part.  Complex eigenvectors
+*          correspond to an eigenvalue with positive imaginary part.
+*          Each eigenvector will be scaled so the largest component
+*          will have abs(real part) + abs(imag. part) = 1, *except*
+*          that for eigenvalues with alpha=beta=0, a zero vector will
+*          be returned as the corresponding eigenvector.
+*          Not referenced if JOBVL = 'N'.
+*
+*  LDVL    (input) INTEGER
+*          The leading dimension of the matrix VL. LDVL >= 1, and
+*          if JOBVL = 'V', LDVL >= N.
+*
+*  VR      (output) REAL array, dimension (LDVR,N)
+*          If JOBVR = 'V', the right generalized eigenvectors.  (See
+*          "Purpose", above.)  Real eigenvectors take one column,
+*          complex take two columns, the first for the real part and
+*          the second for the imaginary part.  Complex eigenvectors
+*          correspond to an eigenvalue with positive imaginary part.
+*          Each eigenvector will be scaled so the largest component
+*          will have abs(real part) + abs(imag. part) = 1, *except*
+*          that for eigenvalues with alpha=beta=0, a zero vector will
+*          be returned as the corresponding eigenvector.
+*          Not referenced if JOBVR = 'N'.
+*
+*  LDVR    (input) INTEGER
+*          The leading dimension of the matrix VR. LDVR >= 1, and
+*          if JOBVR = 'V', LDVR >= N.
+*
+*  WORK    (workspace/output) REAL array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.  LWORK >= max(1,8*N).
+*          For good performance, LWORK must generally be larger.
+*          To compute the optimal value of LWORK, call ILAENV to get
+*          blocksizes (for SGEQRF, SORMQR, and SORGQR.)  Then compute:
+*          NB  -- MAX of the blocksizes for SGEQRF, SORMQR, and SORGQR;
+*          The optimal LWORK is:
+*              2*N + MAX( 6*N, N*(NB+1) ).
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          = 1,...,N:
+*                The QZ iteration failed.  No eigenvectors have been
+*                calculated, but ALPHAR(j), ALPHAI(j), and BETA(j)
+*                should be correct for j=INFO+1,...,N.
+*          > N:  errors that usually indicate LAPACK problems:
+*                =N+1: error return from SGGBAL
+*                =N+2: error return from SGEQRF
+*                =N+3: error return from SORMQR
+*                =N+4: error return from SORGQR
+*                =N+5: error return from SGGHRD
+*                =N+6: error return from SHGEQZ (other than failed
+*                                                iteration)
+*                =N+7: error return from STGEVC
+*                =N+8: error return from SGGBAK (computing VL)
+*                =N+9: error return from SGGBAK (computing VR)
+*                =N+10: error return from SLASCL (various calls)
+*
+*  Further Details
+*  ===============
+*
+*  Balancing
+*  ---------
+*
+*  This driver calls SGGBAL to both permute and scale rows and columns
+*  of A and B.  The permutations PL and PR are chosen so that PL*A*PR
+*  and PL*B*R will be upper triangular except for the diagonal blocks
+*  A(i:j,i:j) and B(i:j,i:j), with i and j as close together as
+*  possible.  The diagonal scaling matrices DL and DR are chosen so
+*  that the pair  DL*PL*A*PR*DR, DL*PL*B*PR*DR have elements close to
+*  one (except for the elements that start out zero.)
+*
+*  After the eigenvalues and eigenvectors of the balanced matrices
+*  have been computed, SGGBAK transforms the eigenvectors back to what
+*  they would have been (in perfect arithmetic) if they had not been
+*  balanced.
+*
+*  Contents of A and B on Exit
+*  -------- -- - --- - -- ----
+*
+*  If any eigenvectors are computed (either JOBVL='V' or JOBVR='V' or
+*  both), then on exit the arrays A and B will contain the real Schur
+*  form[*] of the "balanced" versions of A and B.  If no eigenvectors
+*  are computed, then only the diagonal blocks will be correct.
+*
+*  [*] See SHGEQZ, SGEGS, or read the book "Matrix Computations",
+*      by Golub & van Loan, pub. by Johns Hopkins U. Press.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E0, ONE = 1.0E0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            ILIMIT, ILV, ILVL, ILVR, LQUERY
+      CHARACTER          CHTEMP
+      INTEGER            ICOLS, IHI, IINFO, IJOBVL, IJOBVR, ILEFT, ILO,
+     $                   IN, IRIGHT, IROWS, ITAU, IWORK, JC, JR, LOPT,
+     $                   LWKMIN, LWKOPT, NB, NB1, NB2, NB3
+      REAL               ABSAI, ABSAR, ABSB, ANRM, ANRM1, ANRM2, BNRM,
+     $                   BNRM1, BNRM2, EPS, ONEPLS, SAFMAX, SAFMIN,
+     $                   SALFAI, SALFAR, SBETA, SCALE, TEMP
+*     ..
+*     .. Local Arrays ..
+      LOGICAL            LDUMMA( 1 )
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SGEQRF, SGGBAK, SGGBAL, SGGHRD, SHGEQZ, SLACPY,
+     $                   SLASCL, SLASET, SORGQR, SORMQR, STGEVC, XERBLA
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      REAL               SLAMCH, SLANGE
+      EXTERNAL           ILAENV, LSAME, SLAMCH, SLANGE
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, INT, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode the input arguments
+*
+      IF( LSAME( JOBVL, 'N' ) ) THEN
+         IJOBVL = 1
+         ILVL = .FALSE.
+      ELSE IF( LSAME( JOBVL, 'V' ) ) THEN
+         IJOBVL = 2
+         ILVL = .TRUE.
+      ELSE
+         IJOBVL = -1
+         ILVL = .FALSE.
+      END IF
+*
+      IF( LSAME( JOBVR, 'N' ) ) THEN
+         IJOBVR = 1
+         ILVR = .FALSE.
+      ELSE IF( LSAME( JOBVR, 'V' ) ) THEN
+         IJOBVR = 2
+         ILVR = .TRUE.
+      ELSE
+         IJOBVR = -1
+         ILVR = .FALSE.
+      END IF
+      ILV = ILVL .OR. ILVR
+*
+*     Test the input arguments
+*
+      LWKMIN = MAX( 8*N, 1 )
+      LWKOPT = LWKMIN
+      WORK( 1 ) = LWKOPT
+      LQUERY = ( LWORK.EQ.-1 )
+      INFO = 0
+      IF( IJOBVL.LE.0 ) THEN
+         INFO = -1
+      ELSE IF( IJOBVR.LE.0 ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      ELSE IF( LDVL.LT.1 .OR. ( ILVL .AND. LDVL.LT.N ) ) THEN
+         INFO = -12
+      ELSE IF( LDVR.LT.1 .OR. ( ILVR .AND. LDVR.LT.N ) ) THEN
+         INFO = -14
+      ELSE IF( LWORK.LT.LWKMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -16
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         NB1 = ILAENV( 1, 'SGEQRF', ' ', N, N, -1, -1 )
+         NB2 = ILAENV( 1, 'SORMQR', ' ', N, N, N, -1 )
+         NB3 = ILAENV( 1, 'SORGQR', ' ', N, N, N, -1 )
+         NB = MAX( NB1, NB2, NB3 )
+         LOPT = 2*N + MAX( 6*N, N*(NB+1) )
+         WORK( 1 ) = LOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SGEGV ', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Get machine constants
+*
+      EPS = SLAMCH( 'E' )*SLAMCH( 'B' )
+      SAFMIN = SLAMCH( 'S' )
+      SAFMIN = SAFMIN + SAFMIN
+      SAFMAX = ONE / SAFMIN
+      ONEPLS = ONE + ( 4*EPS )
+*
+*     Scale A
+*
+      ANRM = SLANGE( 'M', N, N, A, LDA, WORK )
+      ANRM1 = ANRM
+      ANRM2 = ONE
+      IF( ANRM.LT.ONE ) THEN
+         IF( SAFMAX*ANRM.LT.ONE ) THEN
+            ANRM1 = SAFMIN
+            ANRM2 = SAFMAX*ANRM
+         END IF
+      END IF
+*
+      IF( ANRM.GT.ZERO ) THEN
+         CALL SLASCL( 'G', -1, -1, ANRM, ONE, N, N, A, LDA, IINFO )
+         IF( IINFO.NE.0 ) THEN
+            INFO = N + 10
+            RETURN
+         END IF
+      END IF
+*
+*     Scale B
+*
+      BNRM = SLANGE( 'M', N, N, B, LDB, WORK )
+      BNRM1 = BNRM
+      BNRM2 = ONE
+      IF( BNRM.LT.ONE ) THEN
+         IF( SAFMAX*BNRM.LT.ONE ) THEN
+            BNRM1 = SAFMIN
+            BNRM2 = SAFMAX*BNRM
+         END IF
+      END IF
+*
+      IF( BNRM.GT.ZERO ) THEN
+         CALL SLASCL( 'G', -1, -1, BNRM, ONE, N, N, B, LDB, IINFO )
+         IF( IINFO.NE.0 ) THEN
+            INFO = N + 10
+            RETURN
+         END IF
+      END IF
+*
+*     Permute the matrix to make it more nearly triangular
+*     Workspace layout:  (8*N words -- "work" requires 6*N words)
+*        left_permutation, right_permutation, work...
+*
+      ILEFT = 1
+      IRIGHT = N + 1
+      IWORK = IRIGHT + N
+      CALL SGGBAL( 'P', N, A, LDA, B, LDB, ILO, IHI, WORK( ILEFT ),
+     $             WORK( IRIGHT ), WORK( IWORK ), IINFO )
+      IF( IINFO.NE.0 ) THEN
+         INFO = N + 1
+         GO TO 120
+      END IF
+*
+*     Reduce B to triangular form, and initialize VL and/or VR
+*     Workspace layout:  ("work..." must have at least N words)
+*        left_permutation, right_permutation, tau, work...
+*
+      IROWS = IHI + 1 - ILO
+      IF( ILV ) THEN
+         ICOLS = N + 1 - ILO
+      ELSE
+         ICOLS = IROWS
+      END IF
+      ITAU = IWORK
+      IWORK = ITAU + IROWS
+      CALL SGEQRF( IROWS, ICOLS, B( ILO, ILO ), LDB, WORK( ITAU ),
+     $             WORK( IWORK ), LWORK+1-IWORK, IINFO )
+      IF( IINFO.GE.0 )
+     $   LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 )
+      IF( IINFO.NE.0 ) THEN
+         INFO = N + 2
+         GO TO 120
+      END IF
+*
+      CALL SORMQR( 'L', 'T', IROWS, ICOLS, IROWS, B( ILO, ILO ), LDB,
+     $             WORK( ITAU ), A( ILO, ILO ), LDA, WORK( IWORK ),
+     $             LWORK+1-IWORK, IINFO )
+      IF( IINFO.GE.0 )
+     $   LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 )
+      IF( IINFO.NE.0 ) THEN
+         INFO = N + 3
+         GO TO 120
+      END IF
+*
+      IF( ILVL ) THEN
+         CALL SLASET( 'Full', N, N, ZERO, ONE, VL, LDVL )
+         CALL SLACPY( 'L', IROWS-1, IROWS-1, B( ILO+1, ILO ), LDB,
+     $                VL( ILO+1, ILO ), LDVL )
+         CALL SORGQR( IROWS, IROWS, IROWS, VL( ILO, ILO ), LDVL,
+     $                WORK( ITAU ), WORK( IWORK ), LWORK+1-IWORK,
+     $                IINFO )
+         IF( IINFO.GE.0 )
+     $      LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 )
+         IF( IINFO.NE.0 ) THEN
+            INFO = N + 4
+            GO TO 120
+         END IF
+      END IF
+*
+      IF( ILVR )
+     $   CALL SLASET( 'Full', N, N, ZERO, ONE, VR, LDVR )
+*
+*     Reduce to generalized Hessenberg form
+*
+      IF( ILV ) THEN
+*
+*        Eigenvectors requested -- work on whole matrix.
+*
+         CALL SGGHRD( JOBVL, JOBVR, N, ILO, IHI, A, LDA, B, LDB, VL,
+     $                LDVL, VR, LDVR, IINFO )
+      ELSE
+         CALL SGGHRD( 'N', 'N', IROWS, 1, IROWS, A( ILO, ILO ), LDA,
+     $                B( ILO, ILO ), LDB, VL, LDVL, VR, LDVR, IINFO )
+      END IF
+      IF( IINFO.NE.0 ) THEN
+         INFO = N + 5
+         GO TO 120
+      END IF
+*
+*     Perform QZ algorithm
+*     Workspace layout:  ("work..." must have at least 1 word)
+*        left_permutation, right_permutation, work...
+*
+      IWORK = ITAU
+      IF( ILV ) THEN
+         CHTEMP = 'S'
+      ELSE
+         CHTEMP = 'E'
+      END IF
+      CALL SHGEQZ( CHTEMP, JOBVL, JOBVR, N, ILO, IHI, A, LDA, B, LDB,
+     $             ALPHAR, ALPHAI, BETA, VL, LDVL, VR, LDVR,
+     $             WORK( IWORK ), LWORK+1-IWORK, IINFO )
+      IF( IINFO.GE.0 )
+     $   LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 )
+      IF( IINFO.NE.0 ) THEN
+         IF( IINFO.GT.0 .AND. IINFO.LE.N ) THEN
+            INFO = IINFO
+         ELSE IF( IINFO.GT.N .AND. IINFO.LE.2*N ) THEN
+            INFO = IINFO - N
+         ELSE
+            INFO = N + 6
+         END IF
+         GO TO 120
+      END IF
+*
+      IF( ILV ) THEN
+*
+*        Compute Eigenvectors  (STGEVC requires 6*N words of workspace)
+*
+         IF( ILVL ) THEN
+            IF( ILVR ) THEN
+               CHTEMP = 'B'
+            ELSE
+               CHTEMP = 'L'
+            END IF
+         ELSE
+            CHTEMP = 'R'
+         END IF
+*
+         CALL STGEVC( CHTEMP, 'B', LDUMMA, N, A, LDA, B, LDB, VL, LDVL,
+     $                VR, LDVR, N, IN, WORK( IWORK ), IINFO )
+         IF( IINFO.NE.0 ) THEN
+            INFO = N + 7
+            GO TO 120
+         END IF
+*
+*        Undo balancing on VL and VR, rescale
+*
+         IF( ILVL ) THEN
+            CALL SGGBAK( 'P', 'L', N, ILO, IHI, WORK( ILEFT ),
+     $                   WORK( IRIGHT ), N, VL, LDVL, IINFO )
+            IF( IINFO.NE.0 ) THEN
+               INFO = N + 8
+               GO TO 120
+            END IF
+            DO 50 JC = 1, N
+               IF( ALPHAI( JC ).LT.ZERO )
+     $            GO TO 50
+               TEMP = ZERO
+               IF( ALPHAI( JC ).EQ.ZERO ) THEN
+                  DO 10 JR = 1, N
+                     TEMP = MAX( TEMP, ABS( VL( JR, JC ) ) )
+   10             CONTINUE
+               ELSE
+                  DO 20 JR = 1, N
+                     TEMP = MAX( TEMP, ABS( VL( JR, JC ) )+
+     $                      ABS( VL( JR, JC+1 ) ) )
+   20             CONTINUE
+               END IF
+               IF( TEMP.LT.SAFMIN )
+     $            GO TO 50
+               TEMP = ONE / TEMP
+               IF( ALPHAI( JC ).EQ.ZERO ) THEN
+                  DO 30 JR = 1, N
+                     VL( JR, JC ) = VL( JR, JC )*TEMP
+   30             CONTINUE
+               ELSE
+                  DO 40 JR = 1, N
+                     VL( JR, JC ) = VL( JR, JC )*TEMP
+                     VL( JR, JC+1 ) = VL( JR, JC+1 )*TEMP
+   40             CONTINUE
+               END IF
+   50       CONTINUE
+         END IF
+         IF( ILVR ) THEN
+            CALL SGGBAK( 'P', 'R', N, ILO, IHI, WORK( ILEFT ),
+     $                   WORK( IRIGHT ), N, VR, LDVR, IINFO )
+            IF( IINFO.NE.0 ) THEN
+               INFO = N + 9
+               GO TO 120
+            END IF
+            DO 100 JC = 1, N
+               IF( ALPHAI( JC ).LT.ZERO )
+     $            GO TO 100
+               TEMP = ZERO
+               IF( ALPHAI( JC ).EQ.ZERO ) THEN
+                  DO 60 JR = 1, N
+                     TEMP = MAX( TEMP, ABS( VR( JR, JC ) ) )
+   60             CONTINUE
+               ELSE
+                  DO 70 JR = 1, N
+                     TEMP = MAX( TEMP, ABS( VR( JR, JC ) )+
+     $                      ABS( VR( JR, JC+1 ) ) )
+   70             CONTINUE
+               END IF
+               IF( TEMP.LT.SAFMIN )
+     $            GO TO 100
+               TEMP = ONE / TEMP
+               IF( ALPHAI( JC ).EQ.ZERO ) THEN
+                  DO 80 JR = 1, N
+                     VR( JR, JC ) = VR( JR, JC )*TEMP
+   80             CONTINUE
+               ELSE
+                  DO 90 JR = 1, N
+                     VR( JR, JC ) = VR( JR, JC )*TEMP
+                     VR( JR, JC+1 ) = VR( JR, JC+1 )*TEMP
+   90             CONTINUE
+               END IF
+  100       CONTINUE
+         END IF
+*
+*        End of eigenvector calculation
+*
+      END IF
+*
+*     Undo scaling in alpha, beta
+*
+*     Note: this does not give the alpha and beta for the unscaled
+*     problem.
+*
+*     Un-scaling is limited to avoid underflow in alpha and beta
+*     if they are significant.
+*
+      DO 110 JC = 1, N
+         ABSAR = ABS( ALPHAR( JC ) )
+         ABSAI = ABS( ALPHAI( JC ) )
+         ABSB = ABS( BETA( JC ) )
+         SALFAR = ANRM*ALPHAR( JC )
+         SALFAI = ANRM*ALPHAI( JC )
+         SBETA = BNRM*BETA( JC )
+         ILIMIT = .FALSE.
+         SCALE = ONE
+*
+*        Check for significant underflow in ALPHAI
+*
+         IF( ABS( SALFAI ).LT.SAFMIN .AND. ABSAI.GE.
+     $       MAX( SAFMIN, EPS*ABSAR, EPS*ABSB ) ) THEN
+            ILIMIT = .TRUE.
+            SCALE = ( ONEPLS*SAFMIN / ANRM1 ) /
+     $              MAX( ONEPLS*SAFMIN, ANRM2*ABSAI )
+*
+         ELSE IF( SALFAI.EQ.ZERO ) THEN
+*
+*           If insignificant underflow in ALPHAI, then make the
+*           conjugate eigenvalue real.
+*
+            IF( ALPHAI( JC ).LT.ZERO .AND. JC.GT.1 ) THEN
+               ALPHAI( JC-1 ) = ZERO
+            ELSE IF( ALPHAI( JC ).GT.ZERO .AND. JC.LT.N ) THEN
+               ALPHAI( JC+1 ) = ZERO
+            END IF
+         END IF
+*
+*        Check for significant underflow in ALPHAR
+*
+         IF( ABS( SALFAR ).LT.SAFMIN .AND. ABSAR.GE.
+     $       MAX( SAFMIN, EPS*ABSAI, EPS*ABSB ) ) THEN
+            ILIMIT = .TRUE.
+            SCALE = MAX( SCALE, ( ONEPLS*SAFMIN / ANRM1 ) /
+     $              MAX( ONEPLS*SAFMIN, ANRM2*ABSAR ) )
+         END IF
+*
+*        Check for significant underflow in BETA
+*
+         IF( ABS( SBETA ).LT.SAFMIN .AND. ABSB.GE.
+     $       MAX( SAFMIN, EPS*ABSAR, EPS*ABSAI ) ) THEN
+            ILIMIT = .TRUE.
+            SCALE = MAX( SCALE, ( ONEPLS*SAFMIN / BNRM1 ) /
+     $              MAX( ONEPLS*SAFMIN, BNRM2*ABSB ) )
+         END IF
+*
+*        Check for possible overflow when limiting scaling
+*
+         IF( ILIMIT ) THEN
+            TEMP = ( SCALE*SAFMIN )*MAX( ABS( SALFAR ), ABS( SALFAI ),
+     $             ABS( SBETA ) )
+            IF( TEMP.GT.ONE )
+     $         SCALE = SCALE / TEMP
+            IF( SCALE.LT.ONE )
+     $         ILIMIT = .FALSE.
+         END IF
+*
+*        Recompute un-scaled ALPHAR, ALPHAI, BETA if necessary.
+*
+         IF( ILIMIT ) THEN
+            SALFAR = ( SCALE*ALPHAR( JC ) )*ANRM
+            SALFAI = ( SCALE*ALPHAI( JC ) )*ANRM
+            SBETA = ( SCALE*BETA( JC ) )*BNRM
+         END IF
+         ALPHAR( JC ) = SALFAR
+         ALPHAI( JC ) = SALFAI
+         BETA( JC ) = SBETA
+  110 CONTINUE
+*
+  120 CONTINUE
+      WORK( 1 ) = LWKOPT
+*
+      RETURN
+*
+*     End of SGEGV
+*
+      END
diff --git a/costa/native/external/lapack/sgehd2.f b/costa/native/external/lapack/sgehd2.f
new file mode 100644
index 000000000..45c133e37
--- /dev/null
+++ b/costa/native/external/lapack/sgehd2.f
@@ -0,0 +1,150 @@
+      SUBROUTINE SGEHD2( N, ILO, IHI, A, LDA, TAU, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      INTEGER            IHI, ILO, INFO, LDA, N
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SGEHD2 reduces a real general matrix A to upper Hessenberg form H by
+*  an orthogonal similarity transformation:  Q' * A * Q = H .
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  ILO     (input) INTEGER
+*  IHI     (input) INTEGER
+*          It is assumed that A is already upper triangular in rows
+*          and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally
+*          set by a previous call to SGEBAL; otherwise they should be
+*          set to 1 and N respectively. See Further Details.
+*          1 <= ILO <= IHI <= max(1,N).
+*
+*  A       (input/output) REAL array, dimension (LDA,N)
+*          On entry, the n by n general matrix to be reduced.
+*          On exit, the upper triangle and the first subdiagonal of A
+*          are overwritten with the upper Hessenberg matrix H, and the
+*          elements below the first subdiagonal, with the array TAU,
+*          represent the orthogonal matrix Q as a product of elementary
+*          reflectors. See Further Details.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  TAU     (output) REAL array, dimension (N-1)
+*          The scalar factors of the elementary reflectors (see Further
+*          Details).
+*
+*  WORK    (workspace) REAL array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*
+*  Further Details
+*  ===============
+*
+*  The matrix Q is represented as a product of (ihi-ilo) elementary
+*  reflectors
+*
+*     Q = H(ilo) H(ilo+1) . . . H(ihi-1).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - tau * v * v'
+*
+*  where tau is a real scalar, and v is a real vector with
+*  v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on
+*  exit in A(i+2:ihi,i), and tau in TAU(i).
+*
+*  The contents of A are illustrated by the following example, with
+*  n = 7, ilo = 2 and ihi = 6:
+*
+*  on entry,                        on exit,
+*
+*  ( a   a   a   a   a   a   a )    (  a   a   h   h   h   h   a )
+*  (     a   a   a   a   a   a )    (      a   h   h   h   h   a )
+*  (     a   a   a   a   a   a )    (      h   h   h   h   h   h )
+*  (     a   a   a   a   a   a )    (      v2  h   h   h   h   h )
+*  (     a   a   a   a   a   a )    (      v2  v3  h   h   h   h )
+*  (     a   a   a   a   a   a )    (      v2  v3  v4  h   h   h )
+*  (                         a )    (                          a )
+*
+*  where a denotes an element of the original matrix A, h denotes a
+*  modified element of the upper Hessenberg matrix H, and vi denotes an
+*  element of the vector defining H(i).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE
+      PARAMETER          ( ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I
+      REAL               AII
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLARF, SLARFG, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters
+*
+      INFO = 0
+      IF( N.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, N ) ) THEN
+         INFO = -2
+      ELSE IF( IHI.LT.MIN( ILO, N ) .OR. IHI.GT.N ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SGEHD2', -INFO )
+         RETURN
+      END IF
+*
+      DO 10 I = ILO, IHI - 1
+*
+*        Compute elementary reflector H(i) to annihilate A(i+2:ihi,i)
+*
+         CALL SLARFG( IHI-I, A( I+1, I ), A( MIN( I+2, N ), I ), 1,
+     $                TAU( I ) )
+         AII = A( I+1, I )
+         A( I+1, I ) = ONE
+*
+*        Apply H(i) to A(1:ihi,i+1:ihi) from the right
+*
+         CALL SLARF( 'Right', IHI, IHI-I, A( I+1, I ), 1, TAU( I ),
+     $               A( 1, I+1 ), LDA, WORK )
+*
+*        Apply H(i) to A(i+1:ihi,i+1:n) from the left
+*
+         CALL SLARF( 'Left', IHI-I, N-I, A( I+1, I ), 1, TAU( I ),
+     $               A( I+1, I+1 ), LDA, WORK )
+*
+         A( I+1, I ) = AII
+   10 CONTINUE
+*
+      RETURN
+*
+*     End of SGEHD2
+*
+      END
diff --git a/costa/native/external/lapack/sgehrd.f b/costa/native/external/lapack/sgehrd.f
new file mode 100644
index 000000000..9de584f5d
--- /dev/null
+++ b/costa/native/external/lapack/sgehrd.f
@@ -0,0 +1,255 @@
+      SUBROUTINE SGEHRD( N, ILO, IHI, A, LDA, TAU, WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            IHI, ILO, INFO, LDA, LWORK, N
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SGEHRD reduces a real general matrix A to upper Hessenberg form H by
+*  an orthogonal similarity transformation:  Q' * A * Q = H .
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  ILO     (input) INTEGER
+*  IHI     (input) INTEGER
+*          It is assumed that A is already upper triangular in rows
+*          and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally
+*          set by a previous call to SGEBAL; otherwise they should be
+*          set to 1 and N respectively. See Further Details.
+*          1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
+*
+*  A       (input/output) REAL array, dimension (LDA,N)
+*          On entry, the N-by-N general matrix to be reduced.
+*          On exit, the upper triangle and the first subdiagonal of A
+*          are overwritten with the upper Hessenberg matrix H, and the
+*          elements below the first subdiagonal, with the array TAU,
+*          represent the orthogonal matrix Q as a product of elementary
+*          reflectors. See Further Details.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  TAU     (output) REAL array, dimension (N-1)
+*          The scalar factors of the elementary reflectors (see Further
+*          Details). Elements 1:ILO-1 and IHI:N-1 of TAU are set to
+*          zero.
+*
+*  WORK    (workspace/output) REAL array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The length of the array WORK.  LWORK >= max(1,N).
+*          For optimum performance LWORK >= N*NB, where NB is the
+*          optimal blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*
+*  Further Details
+*  ===============
+*
+*  The matrix Q is represented as a product of (ihi-ilo) elementary
+*  reflectors
+*
+*     Q = H(ilo) H(ilo+1) . . . H(ihi-1).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - tau * v * v'
+*
+*  where tau is a real scalar, and v is a real vector with
+*  v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on
+*  exit in A(i+2:ihi,i), and tau in TAU(i).
+*
+*  The contents of A are illustrated by the following example, with
+*  n = 7, ilo = 2 and ihi = 6:
+*
+*  on entry,                        on exit,
+*
+*  ( a   a   a   a   a   a   a )    (  a   a   h   h   h   h   a )
+*  (     a   a   a   a   a   a )    (      a   h   h   h   h   a )
+*  (     a   a   a   a   a   a )    (      h   h   h   h   h   h )
+*  (     a   a   a   a   a   a )    (      v2  h   h   h   h   h )
+*  (     a   a   a   a   a   a )    (      v2  v3  h   h   h   h )
+*  (     a   a   a   a   a   a )    (      v2  v3  v4  h   h   h )
+*  (                         a )    (                          a )
+*
+*  where a denotes an element of the original matrix A, h denotes a
+*  modified element of the upper Hessenberg matrix H, and vi denotes an
+*  element of the vector defining H(i).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            NBMAX, LDT
+      PARAMETER          ( NBMAX = 64, LDT = NBMAX+1 )
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            I, IB, IINFO, IWS, LDWORK, LWKOPT, NB, NBMIN,
+     $                   NH, NX
+      REAL               EI
+*     ..
+*     .. Local Arrays ..
+      REAL               T( LDT, NBMAX )
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SGEHD2, SGEMM, SLAHRD, SLARFB, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters
+*
+      INFO = 0
+      NB = MIN( NBMAX, ILAENV( 1, 'SGEHRD', ' ', N, ILO, IHI, -1 ) )
+      LWKOPT = N*NB
+      WORK( 1 ) = LWKOPT
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( N.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, N ) ) THEN
+         INFO = -2
+      ELSE IF( IHI.LT.MIN( ILO, N ) .OR. IHI.GT.N ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN
+         INFO = -8
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SGEHRD', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Set elements 1:ILO-1 and IHI:N-1 of TAU to zero
+*
+      DO 10 I = 1, ILO - 1
+         TAU( I ) = ZERO
+   10 CONTINUE
+      DO 20 I = MAX( 1, IHI ), N - 1
+         TAU( I ) = ZERO
+   20 CONTINUE
+*
+*     Quick return if possible
+*
+      NH = IHI - ILO + 1
+      IF( NH.LE.1 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+*     Determine the block size.
+*
+      NB = MIN( NBMAX, ILAENV( 1, 'SGEHRD', ' ', N, ILO, IHI, -1 ) )
+      NBMIN = 2
+      IWS = 1
+      IF( NB.GT.1 .AND. NB.LT.NH ) THEN
+*
+*        Determine when to cross over from blocked to unblocked code
+*        (last block is always handled by unblocked code).
+*
+         NX = MAX( NB, ILAENV( 3, 'SGEHRD', ' ', N, ILO, IHI, -1 ) )
+         IF( NX.LT.NH ) THEN
+*
+*           Determine if workspace is large enough for blocked code.
+*
+            IWS = N*NB
+            IF( LWORK.LT.IWS ) THEN
+*
+*              Not enough workspace to use optimal NB:  determine the
+*              minimum value of NB, and reduce NB or force use of
+*              unblocked code.
+*
+               NBMIN = MAX( 2, ILAENV( 2, 'SGEHRD', ' ', N, ILO, IHI,
+     $                 -1 ) )
+               IF( LWORK.GE.N*NBMIN ) THEN
+                  NB = LWORK / N
+               ELSE
+                  NB = 1
+               END IF
+            END IF
+         END IF
+      END IF
+      LDWORK = N
+*
+      IF( NB.LT.NBMIN .OR. NB.GE.NH ) THEN
+*
+*        Use unblocked code below
+*
+         I = ILO
+*
+      ELSE
+*
+*        Use blocked code
+*
+         DO 30 I = ILO, IHI - 1 - NX, NB
+            IB = MIN( NB, IHI-I )
+*
+*           Reduce columns i:i+ib-1 to Hessenberg form, returning the
+*           matrices V and T of the block reflector H = I - V*T*V'
+*           which performs the reduction, and also the matrix Y = A*V*T
+*
+            CALL SLAHRD( IHI, I, IB, A( 1, I ), LDA, TAU( I ), T, LDT,
+     $                   WORK, LDWORK )
+*
+*           Apply the block reflector H to A(1:ihi,i+ib:ihi) from the
+*           right, computing  A := A - Y * V'. V(i+ib,ib-1) must be set
+*           to 1.
+*
+            EI = A( I+IB, I+IB-1 )
+            A( I+IB, I+IB-1 ) = ONE
+            CALL SGEMM( 'No transpose', 'Transpose', IHI, IHI-I-IB+1,
+     $                  IB, -ONE, WORK, LDWORK, A( I+IB, I ), LDA, ONE,
+     $                  A( 1, I+IB ), LDA )
+            A( I+IB, I+IB-1 ) = EI
+*
+*           Apply the block reflector H to A(i+1:ihi,i+ib:n) from the
+*           left
+*
+            CALL SLARFB( 'Left', 'Transpose', 'Forward', 'Columnwise',
+     $                   IHI-I, N-I-IB+1, IB, A( I+1, I ), LDA, T, LDT,
+     $                   A( I+1, I+IB ), LDA, WORK, LDWORK )
+   30    CONTINUE
+      END IF
+*
+*     Use unblocked code to reduce the rest of the matrix
+*
+      CALL SGEHD2( N, I, IHI, A, LDA, TAU, WORK, IINFO )
+      WORK( 1 ) = IWS
+*
+      RETURN
+*
+*     End of SGEHRD
+*
+      END
diff --git a/costa/native/external/lapack/sgelq2.f b/costa/native/external/lapack/sgelq2.f
new file mode 100644
index 000000000..63a1b32c1
--- /dev/null
+++ b/costa/native/external/lapack/sgelq2.f
@@ -0,0 +1,122 @@
+      SUBROUTINE SGELQ2( M, N, A, LDA, TAU, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, M, N
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SGELQ2 computes an LQ factorization of a real m by n matrix A:
+*  A = L * Q.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  A       (input/output) REAL array, dimension (LDA,N)
+*          On entry, the m by n matrix A.
+*          On exit, the elements on and below the diagonal of the array
+*          contain the m by min(m,n) lower trapezoidal matrix L (L is
+*          lower triangular if m <= n); the elements above the diagonal,
+*          with the array TAU, represent the orthogonal matrix Q as a
+*          product of elementary reflectors (see Further Details).
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  TAU     (output) REAL array, dimension (min(M,N))
+*          The scalar factors of the elementary reflectors (see Further
+*          Details).
+*
+*  WORK    (workspace) REAL array, dimension (M)
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*
+*  Further Details
+*  ===============
+*
+*  The matrix Q is represented as a product of elementary reflectors
+*
+*     Q = H(k) . . . H(2) H(1), where k = min(m,n).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - tau * v * v'
+*
+*  where tau is a real scalar, and v is a real vector with
+*  v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i,i+1:n),
+*  and tau in TAU(i).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE
+      PARAMETER          ( ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, K
+      REAL               AII
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLARF, SLARFG, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SGELQ2', -INFO )
+         RETURN
+      END IF
+*
+      K = MIN( M, N )
+*
+      DO 10 I = 1, K
+*
+*        Generate elementary reflector H(i) to annihilate A(i,i+1:n)
+*
+         CALL SLARFG( N-I+1, A( I, I ), A( I, MIN( I+1, N ) ), LDA,
+     $                TAU( I ) )
+         IF( I.LT.M ) THEN
+*
+*           Apply H(i) to A(i+1:m,i:n) from the right
+*
+            AII = A( I, I )
+            A( I, I ) = ONE
+            CALL SLARF( 'Right', M-I, N-I+1, A( I, I ), LDA, TAU( I ),
+     $                  A( I+1, I ), LDA, WORK )
+            A( I, I ) = AII
+         END IF
+   10 CONTINUE
+      RETURN
+*
+*     End of SGELQ2
+*
+      END
diff --git a/costa/native/external/lapack/sgelqf.f b/costa/native/external/lapack/sgelqf.f
new file mode 100644
index 000000000..d2797a6e4
--- /dev/null
+++ b/costa/native/external/lapack/sgelqf.f
@@ -0,0 +1,196 @@
+      SUBROUTINE SGELQF( M, N, A, LDA, TAU, WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, LWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SGELQF computes an LQ factorization of a real M-by-N matrix A:
+*  A = L * Q.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  A       (input/output) REAL array, dimension (LDA,N)
+*          On entry, the M-by-N matrix A.
+*          On exit, the elements on and below the diagonal of the array
+*          contain the m-by-min(m,n) lower trapezoidal matrix L (L is
+*          lower triangular if m <= n); the elements above the diagonal,
+*          with the array TAU, represent the orthogonal matrix Q as a
+*          product of elementary reflectors (see Further Details).
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  TAU     (output) REAL array, dimension (min(M,N))
+*          The scalar factors of the elementary reflectors (see Further
+*          Details).
+*
+*  WORK    (workspace/output) REAL array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.  LWORK >= max(1,M).
+*          For optimum performance LWORK >= M*NB, where NB is the
+*          optimal blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Further Details
+*  ===============
+*
+*  The matrix Q is represented as a product of elementary reflectors
+*
+*     Q = H(k) . . . H(2) H(1), where k = min(m,n).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - tau * v * v'
+*
+*  where tau is a real scalar, and v is a real vector with
+*  v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i,i+1:n),
+*  and tau in TAU(i).
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            I, IB, IINFO, IWS, K, LDWORK, LWKOPT, NB,
+     $                   NBMIN, NX
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SGELQ2, SLARFB, SLARFT, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      NB = ILAENV( 1, 'SGELQF', ' ', M, N, -1, -1 )
+      LWKOPT = M*NB
+      WORK( 1 ) = LWKOPT
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -4
+      ELSE IF( LWORK.LT.MAX( 1, M ) .AND. .NOT.LQUERY ) THEN
+         INFO = -7
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SGELQF', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      K = MIN( M, N )
+      IF( K.EQ.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      NBMIN = 2
+      NX = 0
+      IWS = M
+      IF( NB.GT.1 .AND. NB.LT.K ) THEN
+*
+*        Determine when to cross over from blocked to unblocked code.
+*
+         NX = MAX( 0, ILAENV( 3, 'SGELQF', ' ', M, N, -1, -1 ) )
+         IF( NX.LT.K ) THEN
+*
+*           Determine if workspace is large enough for blocked code.
+*
+            LDWORK = M
+            IWS = LDWORK*NB
+            IF( LWORK.LT.IWS ) THEN
+*
+*              Not enough workspace to use optimal NB:  reduce NB and
+*              determine the minimum value of NB.
+*
+               NB = LWORK / LDWORK
+               NBMIN = MAX( 2, ILAENV( 2, 'SGELQF', ' ', M, N, -1,
+     $                 -1 ) )
+            END IF
+         END IF
+      END IF
+*
+      IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN
+*
+*        Use blocked code initially
+*
+         DO 10 I = 1, K - NX, NB
+            IB = MIN( K-I+1, NB )
+*
+*           Compute the LQ factorization of the current block
+*           A(i:i+ib-1,i:n)
+*
+            CALL SGELQ2( IB, N-I+1, A( I, I ), LDA, TAU( I ), WORK,
+     $                   IINFO )
+            IF( I+IB.LE.M ) THEN
+*
+*              Form the triangular factor of the block reflector
+*              H = H(i) H(i+1) . . . H(i+ib-1)
+*
+               CALL SLARFT( 'Forward', 'Rowwise', N-I+1, IB, A( I, I ),
+     $                      LDA, TAU( I ), WORK, LDWORK )
+*
+*              Apply H to A(i+ib:m,i:n) from the right
+*
+               CALL SLARFB( 'Right', 'No transpose', 'Forward',
+     $                      'Rowwise', M-I-IB+1, N-I+1, IB, A( I, I ),
+     $                      LDA, WORK, LDWORK, A( I+IB, I ), LDA,
+     $                      WORK( IB+1 ), LDWORK )
+            END IF
+   10    CONTINUE
+      ELSE
+         I = 1
+      END IF
+*
+*     Use unblocked code to factor the last or only block.
+*
+      IF( I.LE.K )
+     $   CALL SGELQ2( M-I+1, N-I+1, A( I, I ), LDA, TAU( I ), WORK,
+     $                IINFO )
+*
+      WORK( 1 ) = IWS
+      RETURN
+*
+*     End of SGELQF
+*
+      END
diff --git a/costa/native/external/lapack/sgels.f b/costa/native/external/lapack/sgels.f
new file mode 100644
index 000000000..a749a0348
--- /dev/null
+++ b/costa/native/external/lapack/sgels.f
@@ -0,0 +1,403 @@
+      SUBROUTINE SGELS( TRANS, M, N, NRHS, A, LDA, B, LDB, WORK, LWORK,
+     $                  INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          TRANS
+      INTEGER            INFO, LDA, LDB, LWORK, M, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), B( LDB, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SGELS solves overdetermined or underdetermined real linear systems
+*  involving an M-by-N matrix A, or its transpose, using a QR or LQ
+*  factorization of A.  It is assumed that A has full rank.
+*
+*  The following options are provided: 
+*
+*  1. If TRANS = 'N' and m >= n:  find the least squares solution of
+*     an overdetermined system, i.e., solve the least squares problem
+*                  minimize || B - A*X ||.
+*
+*  2. If TRANS = 'N' and m < n:  find the minimum norm solution of
+*     an underdetermined system A * X = B.
+*
+*  3. If TRANS = 'T' and m >= n:  find the minimum norm solution of
+*     an undetermined system A**T * X = B.
+*
+*  4. If TRANS = 'T' and m < n:  find the least squares solution of
+*     an overdetermined system, i.e., solve the least squares problem
+*                  minimize || B - A**T * X ||.
+*
+*  Several right hand side vectors b and solution vectors x can be 
+*  handled in a single call; they are stored as the columns of the
+*  M-by-NRHS right hand side matrix B and the N-by-NRHS solution 
+*  matrix X.
+*
+*  Arguments
+*  =========
+*
+*  TRANS   (input) CHARACTER
+*          = 'N': the linear system involves A;
+*          = 'T': the linear system involves A**T. 
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of
+*          columns of the matrices B and X. NRHS >=0.
+*
+*  A       (input/output) REAL array, dimension (LDA,N)
+*          On entry, the M-by-N matrix A.
+*          On exit,
+*            if M >= N, A is overwritten by details of its QR
+*                       factorization as returned by SGEQRF;
+*            if M <  N, A is overwritten by details of its LQ
+*                       factorization as returned by SGELQF.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  B       (input/output) REAL array, dimension (LDB,NRHS)
+*          On entry, the matrix B of right hand side vectors, stored
+*          columnwise; B is M-by-NRHS if TRANS = 'N', or N-by-NRHS
+*          if TRANS = 'T'.  
+*          On exit, B is overwritten by the solution vectors, stored
+*          columnwise:
+*          if TRANS = 'N' and m >= n, rows 1 to n of B contain the least
+*          squares solution vectors; the residual sum of squares for the
+*          solution in each column is given by the sum of squares of
+*          elements N+1 to M in that column;
+*          if TRANS = 'N' and m < n, rows 1 to N of B contain the
+*          minimum norm solution vectors;
+*          if TRANS = 'T' and m >= n, rows 1 to M of B contain the
+*          minimum norm solution vectors;
+*          if TRANS = 'T' and m < n, rows 1 to M of B contain the
+*          least squares solution vectors; the residual sum of squares
+*          for the solution in each column is given by the sum of
+*          squares of elements M+1 to N in that column.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B. LDB >= MAX(1,M,N).
+*
+*  WORK    (workspace/output) REAL array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.
+*          LWORK >= max( 1, MN + max( MN, NRHS ) ).
+*          For optimal performance,
+*          LWORK >= max( 1, MN + max( MN, NRHS )*NB ).
+*          where MN = min(M,N) and NB is the optimum block size.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E0, ONE = 1.0E0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY, TPSD
+      INTEGER            BROW, I, IASCL, IBSCL, J, MN, NB, SCLLEN, WSIZE
+      REAL               ANRM, BIGNUM, BNRM, SMLNUM
+*     ..
+*     .. Local Arrays ..
+      REAL               RWORK( 1 )
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      REAL               SLAMCH, SLANGE
+      EXTERNAL           LSAME, ILAENV, SLAMCH, SLANGE
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SGELQF, SGEQRF, SLABAD, SLASCL, SLASET, SORMLQ,
+     $                   SORMQR, STRSM, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN, REAL
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments.
+*
+      INFO = 0
+      MN = MIN( M, N )
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( .NOT.( LSAME( TRANS, 'N' ) .OR. LSAME( TRANS, 'T' ) ) ) THEN
+         INFO = -1
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -6
+      ELSE IF( LDB.LT.MAX( 1, M, N ) ) THEN
+         INFO = -8
+      ELSE IF( LWORK.LT.MAX( 1, MN + MAX( MN, NRHS ) ) .AND.
+     $   .NOT.LQUERY ) THEN
+         INFO = -10
+      END IF
+*
+*     Figure out optimal block size
+*
+      IF( INFO.EQ.0 .OR. INFO.EQ.-10 ) THEN
+*
+         TPSD = .TRUE.
+         IF( LSAME( TRANS, 'N' ) )
+     $      TPSD = .FALSE.
+*
+         IF( M.GE.N ) THEN
+            NB = ILAENV( 1, 'SGEQRF', ' ', M, N, -1, -1 )
+            IF( TPSD ) THEN
+               NB = MAX( NB, ILAENV( 1, 'SORMQR', 'LN', M, NRHS, N,
+     $              -1 ) )
+            ELSE
+               NB = MAX( NB, ILAENV( 1, 'SORMQR', 'LT', M, NRHS, N,
+     $              -1 ) )
+            END IF
+         ELSE
+            NB = ILAENV( 1, 'SGELQF', ' ', M, N, -1, -1 )
+            IF( TPSD ) THEN
+               NB = MAX( NB, ILAENV( 1, 'SORMLQ', 'LT', N, NRHS, M,
+     $              -1 ) )
+            ELSE
+               NB = MAX( NB, ILAENV( 1, 'SORMLQ', 'LN', N, NRHS, M,
+     $              -1 ) )
+            END IF
+         END IF
+*
+         WSIZE = MAX( 1, MN + MAX( MN, NRHS )*NB )
+         WORK( 1 ) = REAL( WSIZE )
+*
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SGELS ', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( MIN( M, N, NRHS ).EQ.0 ) THEN
+         CALL SLASET( 'Full', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB )
+         RETURN
+      END IF
+*
+*     Get machine parameters
+*
+      SMLNUM = SLAMCH( 'S' ) / SLAMCH( 'P' )
+      BIGNUM = ONE / SMLNUM
+      CALL SLABAD( SMLNUM, BIGNUM )
+*
+*     Scale A, B if max element outside range [SMLNUM,BIGNUM]
+*
+      ANRM = SLANGE( 'M', M, N, A, LDA, RWORK )
+      IASCL = 0
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
+*
+*        Scale matrix norm up to SMLNUM
+*
+         CALL SLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO )
+         IASCL = 1
+      ELSE IF( ANRM.GT.BIGNUM ) THEN
+*
+*        Scale matrix norm down to BIGNUM
+*
+         CALL SLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO )
+         IASCL = 2
+      ELSE IF( ANRM.EQ.ZERO ) THEN
+*
+*        Matrix all zero. Return zero solution.
+*
+         CALL SLASET( 'F', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB )
+         GO TO 50
+      END IF
+*
+      BROW = M
+      IF( TPSD )
+     $   BROW = N
+      BNRM = SLANGE( 'M', BROW, NRHS, B, LDB, RWORK )
+      IBSCL = 0
+      IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN
+*
+*        Scale matrix norm up to SMLNUM
+*
+         CALL SLASCL( 'G', 0, 0, BNRM, SMLNUM, BROW, NRHS, B, LDB,
+     $                INFO )
+         IBSCL = 1
+      ELSE IF( BNRM.GT.BIGNUM ) THEN
+*
+*        Scale matrix norm down to BIGNUM
+*
+         CALL SLASCL( 'G', 0, 0, BNRM, BIGNUM, BROW, NRHS, B, LDB,
+     $                INFO )
+         IBSCL = 2
+      END IF
+*
+      IF( M.GE.N ) THEN
+*
+*        compute QR factorization of A
+*
+         CALL SGEQRF( M, N, A, LDA, WORK( 1 ), WORK( MN+1 ), LWORK-MN,
+     $                INFO )
+*
+*        workspace at least N, optimally N*NB
+*
+         IF( .NOT.TPSD ) THEN
+*
+*           Least-Squares Problem min || A * X - B ||
+*
+*           B(1:M,1:NRHS) := Q' * B(1:M,1:NRHS)
+*
+            CALL SORMQR( 'Left', 'Transpose', M, NRHS, N, A, LDA,
+     $                   WORK( 1 ), B, LDB, WORK( MN+1 ), LWORK-MN,
+     $                   INFO )
+*
+*           workspace at least NRHS, optimally NRHS*NB
+*
+*           B(1:N,1:NRHS) := inv(R) * B(1:N,1:NRHS)
+*
+            CALL STRSM( 'Left', 'Upper', 'No transpose', 'Non-unit', N,
+     $                  NRHS, ONE, A, LDA, B, LDB )
+*
+            SCLLEN = N
+*
+         ELSE
+*
+*           Overdetermined system of equations A' * X = B
+*
+*           B(1:N,1:NRHS) := inv(R') * B(1:N,1:NRHS)
+*
+            CALL STRSM( 'Left', 'Upper', 'Transpose', 'Non-unit', N,
+     $                  NRHS, ONE, A, LDA, B, LDB )
+*
+*           B(N+1:M,1:NRHS) = ZERO
+*
+            DO 20 J = 1, NRHS
+               DO 10 I = N + 1, M
+                  B( I, J ) = ZERO
+   10          CONTINUE
+   20       CONTINUE
+*
+*           B(1:M,1:NRHS) := Q(1:N,:) * B(1:N,1:NRHS)
+*
+            CALL SORMQR( 'Left', 'No transpose', M, NRHS, N, A, LDA,
+     $                   WORK( 1 ), B, LDB, WORK( MN+1 ), LWORK-MN,
+     $                   INFO )
+*
+*           workspace at least NRHS, optimally NRHS*NB
+*
+            SCLLEN = M
+*
+         END IF
+*
+      ELSE
+*
+*        Compute LQ factorization of A
+*
+         CALL SGELQF( M, N, A, LDA, WORK( 1 ), WORK( MN+1 ), LWORK-MN,
+     $                INFO )
+*
+*        workspace at least M, optimally M*NB.
+*
+         IF( .NOT.TPSD ) THEN
+*
+*           underdetermined system of equations A * X = B
+*
+*           B(1:M,1:NRHS) := inv(L) * B(1:M,1:NRHS)
+*
+            CALL STRSM( 'Left', 'Lower', 'No transpose', 'Non-unit', M,
+     $                  NRHS, ONE, A, LDA, B, LDB )
+*
+*           B(M+1:N,1:NRHS) = 0
+*
+            DO 40 J = 1, NRHS
+               DO 30 I = M + 1, N
+                  B( I, J ) = ZERO
+   30          CONTINUE
+   40       CONTINUE
+*
+*           B(1:N,1:NRHS) := Q(1:N,:)' * B(1:M,1:NRHS)
+*
+            CALL SORMLQ( 'Left', 'Transpose', N, NRHS, M, A, LDA,
+     $                   WORK( 1 ), B, LDB, WORK( MN+1 ), LWORK-MN,
+     $                   INFO )
+*
+*           workspace at least NRHS, optimally NRHS*NB
+*
+            SCLLEN = N
+*
+         ELSE
+*
+*           overdetermined system min || A' * X - B ||
+*
+*           B(1:N,1:NRHS) := Q * B(1:N,1:NRHS)
+*
+            CALL SORMLQ( 'Left', 'No transpose', N, NRHS, M, A, LDA,
+     $                   WORK( 1 ), B, LDB, WORK( MN+1 ), LWORK-MN,
+     $                   INFO )
+*
+*           workspace at least NRHS, optimally NRHS*NB
+*
+*           B(1:M,1:NRHS) := inv(L') * B(1:M,1:NRHS)
+*
+            CALL STRSM( 'Left', 'Lower', 'Transpose', 'Non-unit', M,
+     $                  NRHS, ONE, A, LDA, B, LDB )
+*
+            SCLLEN = M
+*
+         END IF
+*
+      END IF
+*
+*     Undo scaling
+*
+      IF( IASCL.EQ.1 ) THEN
+         CALL SLASCL( 'G', 0, 0, ANRM, SMLNUM, SCLLEN, NRHS, B, LDB,
+     $                INFO )
+      ELSE IF( IASCL.EQ.2 ) THEN
+         CALL SLASCL( 'G', 0, 0, ANRM, BIGNUM, SCLLEN, NRHS, B, LDB,
+     $                INFO )
+      END IF
+      IF( IBSCL.EQ.1 ) THEN
+         CALL SLASCL( 'G', 0, 0, SMLNUM, BNRM, SCLLEN, NRHS, B, LDB,
+     $                INFO )
+      ELSE IF( IBSCL.EQ.2 ) THEN
+         CALL SLASCL( 'G', 0, 0, BIGNUM, BNRM, SCLLEN, NRHS, B, LDB,
+     $                INFO )
+      END IF
+*
+   50 CONTINUE
+      WORK( 1 ) = REAL( WSIZE )
+*
+      RETURN
+*
+*     End of SGELS
+*
+      END
diff --git a/costa/native/external/lapack/sgelsd.f b/costa/native/external/lapack/sgelsd.f
new file mode 100644
index 000000000..12e9ed52b
--- /dev/null
+++ b/costa/native/external/lapack/sgelsd.f
@@ -0,0 +1,530 @@
+      SUBROUTINE SGELSD( M, N, NRHS, A, LDA, B, LDB, S, RCOND,
+     $                   RANK, WORK, LWORK, IWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, LDB, LWORK, M, N, NRHS, RANK
+      REAL               RCOND
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      REAL               A( LDA, * ), B( LDB, * ), S( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SGELSD computes the minimum-norm solution to a real linear least
+*  squares problem:
+*      minimize 2-norm(| b - A*x |)
+*  using the singular value decomposition (SVD) of A. A is an M-by-N
+*  matrix which may be rank-deficient.
+*
+*  Several right hand side vectors b and solution vectors x can be
+*  handled in a single call; they are stored as the columns of the
+*  M-by-NRHS right hand side matrix B and the N-by-NRHS solution
+*  matrix X.
+*
+*  The problem is solved in three steps:
+*  (1) Reduce the coefficient matrix A to bidiagonal form with
+*      Householder transformations, reducing the original problem
+*      into a "bidiagonal least squares problem" (BLS)
+*  (2) Solve the BLS using a divide and conquer approach.
+*  (3) Apply back all the Householder tranformations to solve
+*      the original least squares problem.
+*
+*  The effective rank of A is determined by treating as zero those
+*  singular values which are less than RCOND times the largest singular
+*  value.
+*
+*  The divide and conquer algorithm makes very mild assumptions about
+*  floating point arithmetic. It will work on machines with a guard
+*  digit in add/subtract, or on those binary machines without guard
+*  digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
+*  Cray-2. It could conceivably fail on hexadecimal or decimal machines
+*  without guard digits, but we know of none.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of A. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of A. N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrices B and X. NRHS >= 0.
+*
+*  A       (input) REAL array, dimension (LDA,N)
+*          On entry, the M-by-N matrix A.
+*          On exit, A has been destroyed.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  B       (input/output) REAL array, dimension (LDB,NRHS)
+*          On entry, the M-by-NRHS right hand side matrix B.
+*          On exit, B is overwritten by the N-by-NRHS solution
+*          matrix X.  If m >= n and RANK = n, the residual
+*          sum-of-squares for the solution in the i-th column is given
+*          by the sum of squares of elements n+1:m in that column.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B. LDB >= max(1,max(M,N)).
+*
+*  S       (output) REAL array, dimension (min(M,N))
+*          The singular values of A in decreasing order.
+*          The condition number of A in the 2-norm = S(1)/S(min(m,n)).
+*
+*  RCOND   (input) REAL
+*          RCOND is used to determine the effective rank of A.
+*          Singular values S(i) <= RCOND*S(1) are treated as zero.
+*          If RCOND < 0, machine precision is used instead.
+*
+*  RANK    (output) INTEGER
+*          The effective rank of A, i.e., the number of singular values
+*          which are greater than RCOND*S(1).
+*
+*  WORK    (workspace/output) REAL array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK. LWORK must be at least 1.
+*          The exact minimum amount of workspace needed depends on M,
+*          N and NRHS. As long as LWORK is at least
+*              12*N + 2*N*SMLSIZ + 8*N*NLVL + N*NRHS + (SMLSIZ+1)**2,
+*          if M is greater than or equal to N or
+*              12*M + 2*M*SMLSIZ + 8*M*NLVL + M*NRHS + (SMLSIZ+1)**2,
+*          if M is less than N, the code will execute correctly.
+*          SMLSIZ is returned by ILAENV and is equal to the maximum
+*          size of the subproblems at the bottom of the computation
+*          tree (usually about 25), and
+*             NLVL = MAX( 0, INT( LOG_2( MIN( M,N )/(SMLSIZ+1) ) ) + 1 )
+*          For good performance, LWORK should generally be larger.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*
+*  IWORK   (workspace) INTEGER array, dimension (LIWORK)
+*          LIWORK >= 3 * MINMN * NLVL + 11 * MINMN,
+*          where MINMN = MIN( M,N ).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          > 0:  the algorithm for computing the SVD failed to converge;
+*                if INFO = i, i off-diagonal elements of an intermediate
+*                bidiagonal form did not converge to zero.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Ming Gu and Ren-Cang Li, Computer Science Division, University of
+*       California at Berkeley, USA
+*     Osni Marques, LBNL/NERSC, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE, TWO
+      PARAMETER          ( ZERO = 0.0E0, ONE = 1.0E0, TWO = 2.0E0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            IASCL, IBSCL, IE, IL, ITAU, ITAUP, ITAUQ,
+     $                   LDWORK, MAXMN, MAXWRK, MINMN, MINWRK, MM,
+     $                   MNTHR, NLVL, NWORK, SMLSIZ, WLALSD
+      REAL               ANRM, BIGNUM, BNRM, EPS, SFMIN, SMLNUM
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SGEBRD, SGELQF, SGEQRF, SLABAD, SLACPY, SLALSD,
+     $                   SLASCL, SLASET, SORMBR, SORMLQ, SORMQR, XERBLA
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      REAL               SLAMCH, SLANGE
+      EXTERNAL           SLAMCH, SLANGE, ILAENV
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          REAL, INT, LOG, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments.
+*
+      INFO = 0
+      MINMN = MIN( M, N )
+      MAXMN = MAX( M, N )
+      MNTHR = ILAENV( 6, 'SGELSD', ' ', M, N, NRHS, -1 )
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, MAXMN ) ) THEN
+         INFO = -7
+      END IF
+*
+      SMLSIZ = ILAENV( 9, 'SGELSD', ' ', 0, 0, 0, 0 )
+*
+*     Compute workspace.
+*     (Note: Comments in the code beginning "Workspace:" describe the
+*     minimal amount of workspace needed at that point in the code,
+*     as well as the preferred amount for good performance.
+*     NB refers to the optimal block size for the immediately
+*     following subroutine, as returned by ILAENV.)
+*
+      MINWRK = 1
+      MINMN = MAX( 1, MINMN )
+      NLVL = MAX( INT( LOG( REAL( MINMN ) / REAL( SMLSIZ+1 ) ) / 
+     $       LOG( TWO ) ) + 1, 0 )
+*
+      IF( INFO.EQ.0 ) THEN
+         MAXWRK = 0
+         MM = M
+         IF( M.GE.N .AND. M.GE.MNTHR ) THEN
+*
+*           Path 1a - overdetermined, with many more rows than columns.
+*
+            MM = N
+            MAXWRK = MAX( MAXWRK, N+N*ILAENV( 1, 'SGEQRF', ' ', M, N,
+     $               -1, -1 ) )
+            MAXWRK = MAX( MAXWRK, N+NRHS*
+     $               ILAENV( 1, 'SORMQR', 'LT', M, NRHS, N, -1 ) )
+         END IF
+         IF( M.GE.N ) THEN
+*
+*           Path 1 - overdetermined or exactly determined.
+*
+            MAXWRK = MAX( MAXWRK, 3*N+( MM+N )*
+     $               ILAENV( 1, 'SGEBRD', ' ', MM, N, -1, -1 ) )
+            MAXWRK = MAX( MAXWRK, 3*N+NRHS*
+     $               ILAENV( 1, 'SORMBR', 'QLT', MM, NRHS, N, -1 ) )
+            MAXWRK = MAX( MAXWRK, 3*N+( N-1 )*
+     $               ILAENV( 1, 'SORMBR', 'PLN', N, NRHS, N, -1 ) )
+            WLALSD = 9*N+2*N*SMLSIZ+8*N*NLVL+N*NRHS+(SMLSIZ+1)**2
+            MAXWRK = MAX( MAXWRK, 3*N+WLALSD )
+            MINWRK = MAX( 3*N+MM, 3*N+NRHS, 3*N+WLALSD )
+         END IF
+         IF( N.GT.M ) THEN
+            WLALSD = 9*M+2*M*SMLSIZ+8*M*NLVL+M*NRHS+(SMLSIZ+1)**2
+            IF( N.GE.MNTHR ) THEN
+*
+*              Path 2a - underdetermined, with many more columns
+*              than rows.
+*
+               MAXWRK = M + M*ILAENV( 1, 'SGELQF', ' ', M, N, -1, -1 )
+               MAXWRK = MAX( MAXWRK, M*M+4*M+2*M*
+     $                  ILAENV( 1, 'SGEBRD', ' ', M, M, -1, -1 ) )
+               MAXWRK = MAX( MAXWRK, M*M+4*M+NRHS*
+     $                  ILAENV( 1, 'SORMBR', 'QLT', M, NRHS, M, -1 ) )
+               MAXWRK = MAX( MAXWRK, M*M+4*M+( M-1 )*
+     $                  ILAENV( 1, 'SORMBR', 'PLN', M, NRHS, M, -1 ) )
+               IF( NRHS.GT.1 ) THEN
+                  MAXWRK = MAX( MAXWRK, M*M+M+M*NRHS )
+               ELSE
+                  MAXWRK = MAX( MAXWRK, M*M+2*M )
+               END IF
+               MAXWRK = MAX( MAXWRK, M+NRHS*
+     $                  ILAENV( 1, 'SORMLQ', 'LT', N, NRHS, M, -1 ) )
+               MAXWRK = MAX( MAXWRK, M*M+4*M+WLALSD )
+            ELSE
+*
+*              Path 2 - remaining underdetermined cases.
+*
+               MAXWRK = 3*M + ( N+M )*ILAENV( 1, 'SGEBRD', ' ', M, N,
+     $                  -1, -1 )
+               MAXWRK = MAX( MAXWRK, 3*M+NRHS*
+     $                  ILAENV( 1, 'SORMBR', 'QLT', M, NRHS, N, -1 ) )
+               MAXWRK = MAX( MAXWRK, 3*M+M*
+     $                  ILAENV( 1, 'SORMBR', 'PLN', N, NRHS, M, -1 ) )
+               MAXWRK = MAX( MAXWRK, 3*M+WLALSD )
+            END IF
+            MINWRK = MAX( 3*M+NRHS, 3*M+M, 3*M+WLALSD )
+         END IF
+         MINWRK = MIN( MINWRK, MAXWRK )
+         WORK( 1 ) = MAXWRK
+         IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN
+            INFO = -12
+         END IF
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SGELSD', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         GO TO 10
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( M.EQ.0 .OR. N.EQ.0 ) THEN
+         RANK = 0
+         RETURN
+      END IF
+*
+*     Get machine parameters.
+*
+      EPS = SLAMCH( 'P' )
+      SFMIN = SLAMCH( 'S' )
+      SMLNUM = SFMIN / EPS
+      BIGNUM = ONE / SMLNUM
+      CALL SLABAD( SMLNUM, BIGNUM )
+*
+*     Scale A if max entry outside range [SMLNUM,BIGNUM].
+*
+      ANRM = SLANGE( 'M', M, N, A, LDA, WORK )
+      IASCL = 0
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
+*
+*        Scale matrix norm up to SMLNUM.
+*
+         CALL SLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO )
+         IASCL = 1
+      ELSE IF( ANRM.GT.BIGNUM ) THEN
+*
+*        Scale matrix norm down to BIGNUM.
+*
+         CALL SLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO )
+         IASCL = 2
+      ELSE IF( ANRM.EQ.ZERO ) THEN
+*
+*        Matrix all zero. Return zero solution.
+*
+         CALL SLASET( 'F', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB )
+         CALL SLASET( 'F', MINMN, 1, ZERO, ZERO, S, 1 )
+         RANK = 0
+         GO TO 10
+      END IF
+*
+*     Scale B if max entry outside range [SMLNUM,BIGNUM].
+*
+      BNRM = SLANGE( 'M', M, NRHS, B, LDB, WORK )
+      IBSCL = 0
+      IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN
+*
+*        Scale matrix norm up to SMLNUM.
+*
+         CALL SLASCL( 'G', 0, 0, BNRM, SMLNUM, M, NRHS, B, LDB, INFO )
+         IBSCL = 1
+      ELSE IF( BNRM.GT.BIGNUM ) THEN
+*
+*        Scale matrix norm down to BIGNUM.
+*
+         CALL SLASCL( 'G', 0, 0, BNRM, BIGNUM, M, NRHS, B, LDB, INFO )
+         IBSCL = 2
+      END IF
+*
+*     If M < N make sure certain entries of B are zero.
+*
+      IF( M.LT.N )
+     $   CALL SLASET( 'F', N-M, NRHS, ZERO, ZERO, B( M+1, 1 ), LDB )
+*
+*     Overdetermined case.
+*
+      IF( M.GE.N ) THEN
+*
+*        Path 1 - overdetermined or exactly determined.
+*
+         MM = M
+         IF( M.GE.MNTHR ) THEN
+*
+*           Path 1a - overdetermined, with many more rows than columns.
+*
+            MM = N
+            ITAU = 1
+            NWORK = ITAU + N
+*
+*           Compute A=Q*R.
+*           (Workspace: need 2*N, prefer N+N*NB)
+*
+            CALL SGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ),
+     $                   LWORK-NWORK+1, INFO )
+*
+*           Multiply B by transpose(Q).
+*           (Workspace: need N+NRHS, prefer N+NRHS*NB)
+*
+            CALL SORMQR( 'L', 'T', M, NRHS, N, A, LDA, WORK( ITAU ), B,
+     $                   LDB, WORK( NWORK ), LWORK-NWORK+1, INFO )
+*
+*           Zero out below R.
+*
+            IF( N.GT.1 ) THEN
+               CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ), LDA )
+            END IF
+         END IF
+*
+         IE = 1
+         ITAUQ = IE + N
+         ITAUP = ITAUQ + N
+         NWORK = ITAUP + N
+*
+*        Bidiagonalize R in A.
+*        (Workspace: need 3*N+MM, prefer 3*N+(MM+N)*NB)
+*
+         CALL SGEBRD( MM, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
+     $                WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1,
+     $                INFO )
+*
+*        Multiply B by transpose of left bidiagonalizing vectors of R.
+*        (Workspace: need 3*N+NRHS, prefer 3*N+NRHS*NB)
+*
+         CALL SORMBR( 'Q', 'L', 'T', MM, NRHS, N, A, LDA, WORK( ITAUQ ),
+     $                B, LDB, WORK( NWORK ), LWORK-NWORK+1, INFO )
+*
+*        Solve the bidiagonal least squares problem.
+*
+         CALL SLALSD( 'U', SMLSIZ, N, NRHS, S, WORK( IE ), B, LDB,
+     $                RCOND, RANK, WORK( NWORK ), IWORK, INFO )
+         IF( INFO.NE.0 ) THEN
+            GO TO 10
+         END IF
+*
+*        Multiply B by right bidiagonalizing vectors of R.
+*
+         CALL SORMBR( 'P', 'L', 'N', N, NRHS, N, A, LDA, WORK( ITAUP ),
+     $                B, LDB, WORK( NWORK ), LWORK-NWORK+1, INFO )
+*
+      ELSE IF( N.GE.MNTHR .AND. LWORK.GE.4*M+M*M+
+     $         MAX( M, 2*M-4, NRHS, N-3*M ) ) THEN
+*
+*        Path 2a - underdetermined, with many more columns than rows
+*        and sufficient workspace for an efficient algorithm.
+*
+         LDWORK = M
+         IF( LWORK.GE.MAX( 4*M+M*LDA+MAX( M, 2*M-4, NRHS, N-3*M ),
+     $       M*LDA+M+M*NRHS ) )LDWORK = LDA
+         ITAU = 1
+         NWORK = M + 1
+*
+*        Compute A=L*Q.
+*        (Workspace: need 2*M, prefer M+M*NB)
+*
+         CALL SGELQF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ),
+     $                LWORK-NWORK+1, INFO )
+         IL = NWORK
+*
+*        Copy L to WORK(IL), zeroing out above its diagonal.
+*
+         CALL SLACPY( 'L', M, M, A, LDA, WORK( IL ), LDWORK )
+         CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, WORK( IL+LDWORK ),
+     $                LDWORK )
+         IE = IL + LDWORK*M
+         ITAUQ = IE + M
+         ITAUP = ITAUQ + M
+         NWORK = ITAUP + M
+*
+*        Bidiagonalize L in WORK(IL).
+*        (Workspace: need M*M+5*M, prefer M*M+4*M+2*M*NB)
+*
+         CALL SGEBRD( M, M, WORK( IL ), LDWORK, S, WORK( IE ),
+     $                WORK( ITAUQ ), WORK( ITAUP ), WORK( NWORK ),
+     $                LWORK-NWORK+1, INFO )
+*
+*        Multiply B by transpose of left bidiagonalizing vectors of L.
+*        (Workspace: need M*M+4*M+NRHS, prefer M*M+4*M+NRHS*NB)
+*
+         CALL SORMBR( 'Q', 'L', 'T', M, NRHS, M, WORK( IL ), LDWORK,
+     $                WORK( ITAUQ ), B, LDB, WORK( NWORK ),
+     $                LWORK-NWORK+1, INFO )
+*
+*        Solve the bidiagonal least squares problem.
+*
+         CALL SLALSD( 'U', SMLSIZ, M, NRHS, S, WORK( IE ), B, LDB,
+     $                RCOND, RANK, WORK( NWORK ), IWORK, INFO )
+         IF( INFO.NE.0 ) THEN
+            GO TO 10
+         END IF
+*
+*        Multiply B by right bidiagonalizing vectors of L.
+*
+         CALL SORMBR( 'P', 'L', 'N', M, NRHS, M, WORK( IL ), LDWORK,
+     $                WORK( ITAUP ), B, LDB, WORK( NWORK ),
+     $                LWORK-NWORK+1, INFO )
+*
+*        Zero out below first M rows of B.
+*
+         CALL SLASET( 'F', N-M, NRHS, ZERO, ZERO, B( M+1, 1 ), LDB )
+         NWORK = ITAU + M
+*
+*        Multiply transpose(Q) by B.
+*        (Workspace: need M+NRHS, prefer M+NRHS*NB)
+*
+         CALL SORMLQ( 'L', 'T', N, NRHS, M, A, LDA, WORK( ITAU ), B,
+     $                LDB, WORK( NWORK ), LWORK-NWORK+1, INFO )
+*
+      ELSE
+*
+*        Path 2 - remaining underdetermined cases.
+*
+         IE = 1
+         ITAUQ = IE + M
+         ITAUP = ITAUQ + M
+         NWORK = ITAUP + M
+*
+*        Bidiagonalize A.
+*        (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
+*
+         CALL SGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
+     $                WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1,
+     $                INFO )
+*
+*        Multiply B by transpose of left bidiagonalizing vectors.
+*        (Workspace: need 3*M+NRHS, prefer 3*M+NRHS*NB)
+*
+         CALL SORMBR( 'Q', 'L', 'T', M, NRHS, N, A, LDA, WORK( ITAUQ ),
+     $                B, LDB, WORK( NWORK ), LWORK-NWORK+1, INFO )
+*
+*        Solve the bidiagonal least squares problem.
+*
+         CALL SLALSD( 'L', SMLSIZ, M, NRHS, S, WORK( IE ), B, LDB,
+     $                RCOND, RANK, WORK( NWORK ), IWORK, INFO )
+         IF( INFO.NE.0 ) THEN
+            GO TO 10
+         END IF
+*
+*        Multiply B by right bidiagonalizing vectors of A.
+*
+         CALL SORMBR( 'P', 'L', 'N', N, NRHS, M, A, LDA, WORK( ITAUP ),
+     $                B, LDB, WORK( NWORK ), LWORK-NWORK+1, INFO )
+*
+      END IF
+*
+*     Undo scaling.
+*
+      IF( IASCL.EQ.1 ) THEN
+         CALL SLASCL( 'G', 0, 0, ANRM, SMLNUM, N, NRHS, B, LDB, INFO )
+         CALL SLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN,
+     $                INFO )
+      ELSE IF( IASCL.EQ.2 ) THEN
+         CALL SLASCL( 'G', 0, 0, ANRM, BIGNUM, N, NRHS, B, LDB, INFO )
+         CALL SLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN,
+     $                INFO )
+      END IF
+      IF( IBSCL.EQ.1 ) THEN
+         CALL SLASCL( 'G', 0, 0, SMLNUM, BNRM, N, NRHS, B, LDB, INFO )
+      ELSE IF( IBSCL.EQ.2 ) THEN
+         CALL SLASCL( 'G', 0, 0, BIGNUM, BNRM, N, NRHS, B, LDB, INFO )
+      END IF
+*
+   10 CONTINUE
+      WORK( 1 ) = MAXWRK
+      RETURN
+*
+*     End of SGELSD
+*
+      END
diff --git a/costa/native/external/lapack/sgelss.f b/costa/native/external/lapack/sgelss.f
new file mode 100644
index 000000000..d13ae96ae
--- /dev/null
+++ b/costa/native/external/lapack/sgelss.f
@@ -0,0 +1,613 @@
+      SUBROUTINE SGELSS( M, N, NRHS, A, LDA, B, LDB, S, RCOND, RANK,
+     $                   WORK, LWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, LDB, LWORK, M, N, NRHS, RANK
+      REAL               RCOND
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), B( LDB, * ), S( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SGELSS computes the minimum norm solution to a real linear least
+*  squares problem:
+*
+*  Minimize 2-norm(| b - A*x |).
+*
+*  using the singular value decomposition (SVD) of A. A is an M-by-N
+*  matrix which may be rank-deficient.
+*
+*  Several right hand side vectors b and solution vectors x can be
+*  handled in a single call; they are stored as the columns of the
+*  M-by-NRHS right hand side matrix B and the N-by-NRHS solution matrix
+*  X.
+*
+*  The effective rank of A is determined by treating as zero those
+*  singular values which are less than RCOND times the largest singular
+*  value.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A. N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrices B and X. NRHS >= 0.
+*
+*  A       (input/output) REAL array, dimension (LDA,N)
+*          On entry, the M-by-N matrix A.
+*          On exit, the first min(m,n) rows of A are overwritten with
+*          its right singular vectors, stored rowwise.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  B       (input/output) REAL array, dimension (LDB,NRHS)
+*          On entry, the M-by-NRHS right hand side matrix B.
+*          On exit, B is overwritten by the N-by-NRHS solution
+*          matrix X.  If m >= n and RANK = n, the residual
+*          sum-of-squares for the solution in the i-th column is given
+*          by the sum of squares of elements n+1:m in that column.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B. LDB >= max(1,max(M,N)).
+*
+*  S       (output) REAL array, dimension (min(M,N))
+*          The singular values of A in decreasing order.
+*          The condition number of A in the 2-norm = S(1)/S(min(m,n)).
+*
+*  RCOND   (input) REAL
+*          RCOND is used to determine the effective rank of A.
+*          Singular values S(i) <= RCOND*S(1) are treated as zero.
+*          If RCOND < 0, machine precision is used instead.
+*
+*  RANK    (output) INTEGER
+*          The effective rank of A, i.e., the number of singular values
+*          which are greater than RCOND*S(1).
+*
+*  WORK    (workspace/output) REAL array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK. LWORK >= 1, and also:
+*          LWORK >= 3*min(M,N) + max( 2*min(M,N), max(M,N), NRHS )
+*          For good performance, LWORK should generally be larger.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          > 0:  the algorithm for computing the SVD failed to converge;
+*                if INFO = i, i off-diagonal elements of an intermediate
+*                bidiagonal form did not converge to zero.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            BDSPAC, BL, CHUNK, I, IASCL, IBSCL, IE, IL,
+     $                   ITAU, ITAUP, ITAUQ, IWORK, LDWORK, MAXMN,
+     $                   MAXWRK, MINMN, MINWRK, MM, MNTHR
+      REAL               ANRM, BIGNUM, BNRM, EPS, SFMIN, SMLNUM, THR
+*     ..
+*     .. Local Arrays ..
+      REAL               VDUM( 1 )
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SBDSQR, SCOPY, SGEBRD, SGELQF, SGEMM, SGEMV,
+     $                   SGEQRF, SLABAD, SLACPY, SLASCL, SLASET, SORGBR,
+     $                   SORMBR, SORMLQ, SORMQR, SRSCL, XERBLA
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      REAL               SLAMCH, SLANGE
+      EXTERNAL           ILAENV, SLAMCH, SLANGE
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      MINMN = MIN( M, N )
+      MAXMN = MAX( M, N )
+      MNTHR = ILAENV( 6, 'SGELSS', ' ', M, N, NRHS, -1 )
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, MAXMN ) ) THEN
+         INFO = -7
+      END IF
+*
+*     Compute workspace
+*      (Note: Comments in the code beginning "Workspace:" describe the
+*       minimal amount of workspace needed at that point in the code,
+*       as well as the preferred amount for good performance.
+*       NB refers to the optimal block size for the immediately
+*       following subroutine, as returned by ILAENV.)
+*
+      MINWRK = 1
+      IF( INFO.EQ.0 .AND. ( LWORK.GE.1 .OR. LQUERY ) ) THEN
+         MAXWRK = 0
+         MM = M
+         IF( M.GE.N .AND. M.GE.MNTHR ) THEN
+*
+*           Path 1a - overdetermined, with many more rows than columns
+*
+            MM = N
+            MAXWRK = MAX( MAXWRK, N+N*ILAENV( 1, 'SGEQRF', ' ', M, N,
+     $               -1, -1 ) )
+            MAXWRK = MAX( MAXWRK, N+NRHS*
+     $               ILAENV( 1, 'SORMQR', 'LT', M, NRHS, N, -1 ) )
+         END IF
+         IF( M.GE.N ) THEN
+*
+*           Path 1 - overdetermined or exactly determined
+*
+*           Compute workspace needed for SBDSQR
+*
+            BDSPAC = MAX( 1, 5*N )
+            MAXWRK = MAX( MAXWRK, 3*N+( MM+N )*
+     $               ILAENV( 1, 'SGEBRD', ' ', MM, N, -1, -1 ) )
+            MAXWRK = MAX( MAXWRK, 3*N+NRHS*
+     $               ILAENV( 1, 'SORMBR', 'QLT', MM, NRHS, N, -1 ) )
+            MAXWRK = MAX( MAXWRK, 3*N+( N-1 )*
+     $               ILAENV( 1, 'SORGBR', 'P', N, N, N, -1 ) )
+            MAXWRK = MAX( MAXWRK, BDSPAC )
+            MAXWRK = MAX( MAXWRK, N*NRHS )
+            MINWRK = MAX( 3*N+MM, 3*N+NRHS, BDSPAC )
+            MAXWRK = MAX( MINWRK, MAXWRK )
+         END IF
+         IF( N.GT.M ) THEN
+*
+*           Compute workspace needed for SBDSQR
+*
+            BDSPAC = MAX( 1, 5*M )
+            MINWRK = MAX( 3*M+NRHS, 3*M+N, BDSPAC )
+            IF( N.GE.MNTHR ) THEN
+*
+*              Path 2a - underdetermined, with many more columns
+*              than rows
+*
+               MAXWRK = M + M*ILAENV( 1, 'SGELQF', ' ', M, N, -1, -1 )
+               MAXWRK = MAX( MAXWRK, M*M+4*M+2*M*
+     $                  ILAENV( 1, 'SGEBRD', ' ', M, M, -1, -1 ) )
+               MAXWRK = MAX( MAXWRK, M*M+4*M+NRHS*
+     $                  ILAENV( 1, 'SORMBR', 'QLT', M, NRHS, M, -1 ) )
+               MAXWRK = MAX( MAXWRK, M*M+4*M+( M-1 )*
+     $                  ILAENV( 1, 'SORGBR', 'P', M, M, M, -1 ) )
+               MAXWRK = MAX( MAXWRK, M*M+M+BDSPAC )
+               IF( NRHS.GT.1 ) THEN
+                  MAXWRK = MAX( MAXWRK, M*M+M+M*NRHS )
+               ELSE
+                  MAXWRK = MAX( MAXWRK, M*M+2*M )
+               END IF
+               MAXWRK = MAX( MAXWRK, M+NRHS*
+     $                  ILAENV( 1, 'SORMLQ', 'LT', N, NRHS, M, -1 ) )
+            ELSE
+*
+*              Path 2 - underdetermined
+*
+               MAXWRK = 3*M + ( N+M )*ILAENV( 1, 'SGEBRD', ' ', M, N,
+     $                  -1, -1 )
+               MAXWRK = MAX( MAXWRK, 3*M+NRHS*
+     $                  ILAENV( 1, 'SORMBR', 'QLT', M, NRHS, M, -1 ) )
+               MAXWRK = MAX( MAXWRK, 3*M+M*
+     $                  ILAENV( 1, 'SORGBR', 'P', M, N, M, -1 ) )
+               MAXWRK = MAX( MAXWRK, BDSPAC )
+               MAXWRK = MAX( MAXWRK, N*NRHS )
+            END IF
+         END IF
+         MAXWRK = MAX( MINWRK, MAXWRK )
+         WORK( 1 ) = MAXWRK
+      END IF
+*
+      MINWRK = MAX( MINWRK, 1 )
+      IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY )
+     $   INFO = -12
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SGELSS', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 ) THEN
+         RANK = 0
+         RETURN
+      END IF
+*
+*     Get machine parameters
+*
+      EPS = SLAMCH( 'P' )
+      SFMIN = SLAMCH( 'S' )
+      SMLNUM = SFMIN / EPS
+      BIGNUM = ONE / SMLNUM
+      CALL SLABAD( SMLNUM, BIGNUM )
+*
+*     Scale A if max element outside range [SMLNUM,BIGNUM]
+*
+      ANRM = SLANGE( 'M', M, N, A, LDA, WORK )
+      IASCL = 0
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
+*
+*        Scale matrix norm up to SMLNUM
+*
+         CALL SLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO )
+         IASCL = 1
+      ELSE IF( ANRM.GT.BIGNUM ) THEN
+*
+*        Scale matrix norm down to BIGNUM
+*
+         CALL SLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO )
+         IASCL = 2
+      ELSE IF( ANRM.EQ.ZERO ) THEN
+*
+*        Matrix all zero. Return zero solution.
+*
+         CALL SLASET( 'F', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB )
+         CALL SLASET( 'F', MINMN, 1, ZERO, ZERO, S, 1 )
+         RANK = 0
+         GO TO 70
+      END IF
+*
+*     Scale B if max element outside range [SMLNUM,BIGNUM]
+*
+      BNRM = SLANGE( 'M', M, NRHS, B, LDB, WORK )
+      IBSCL = 0
+      IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN
+*
+*        Scale matrix norm up to SMLNUM
+*
+         CALL SLASCL( 'G', 0, 0, BNRM, SMLNUM, M, NRHS, B, LDB, INFO )
+         IBSCL = 1
+      ELSE IF( BNRM.GT.BIGNUM ) THEN
+*
+*        Scale matrix norm down to BIGNUM
+*
+         CALL SLASCL( 'G', 0, 0, BNRM, BIGNUM, M, NRHS, B, LDB, INFO )
+         IBSCL = 2
+      END IF
+*
+*     Overdetermined case
+*
+      IF( M.GE.N ) THEN
+*
+*        Path 1 - overdetermined or exactly determined
+*
+         MM = M
+         IF( M.GE.MNTHR ) THEN
+*
+*           Path 1a - overdetermined, with many more rows than columns
+*
+            MM = N
+            ITAU = 1
+            IWORK = ITAU + N
+*
+*           Compute A=Q*R
+*           (Workspace: need 2*N, prefer N+N*NB)
+*
+            CALL SGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ),
+     $                   LWORK-IWORK+1, INFO )
+*
+*           Multiply B by transpose(Q)
+*           (Workspace: need N+NRHS, prefer N+NRHS*NB)
+*
+            CALL SORMQR( 'L', 'T', M, NRHS, N, A, LDA, WORK( ITAU ), B,
+     $                   LDB, WORK( IWORK ), LWORK-IWORK+1, INFO )
+*
+*           Zero out below R
+*
+            IF( N.GT.1 )
+     $         CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ), LDA )
+         END IF
+*
+         IE = 1
+         ITAUQ = IE + N
+         ITAUP = ITAUQ + N
+         IWORK = ITAUP + N
+*
+*        Bidiagonalize R in A
+*        (Workspace: need 3*N+MM, prefer 3*N+(MM+N)*NB)
+*
+         CALL SGEBRD( MM, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
+     $                WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
+     $                INFO )
+*
+*        Multiply B by transpose of left bidiagonalizing vectors of R
+*        (Workspace: need 3*N+NRHS, prefer 3*N+NRHS*NB)
+*
+         CALL SORMBR( 'Q', 'L', 'T', MM, NRHS, N, A, LDA, WORK( ITAUQ ),
+     $                B, LDB, WORK( IWORK ), LWORK-IWORK+1, INFO )
+*
+*        Generate right bidiagonalizing vectors of R in A
+*        (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
+*
+         CALL SORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
+     $                WORK( IWORK ), LWORK-IWORK+1, INFO )
+         IWORK = IE + N
+*
+*        Perform bidiagonal QR iteration
+*          multiply B by transpose of left singular vectors
+*          compute right singular vectors in A
+*        (Workspace: need BDSPAC)
+*
+         CALL SBDSQR( 'U', N, N, 0, NRHS, S, WORK( IE ), A, LDA, VDUM,
+     $                1, B, LDB, WORK( IWORK ), INFO )
+         IF( INFO.NE.0 )
+     $      GO TO 70
+*
+*        Multiply B by reciprocals of singular values
+*
+         THR = MAX( RCOND*S( 1 ), SFMIN )
+         IF( RCOND.LT.ZERO )
+     $      THR = MAX( EPS*S( 1 ), SFMIN )
+         RANK = 0
+         DO 10 I = 1, N
+            IF( S( I ).GT.THR ) THEN
+               CALL SRSCL( NRHS, S( I ), B( I, 1 ), LDB )
+               RANK = RANK + 1
+            ELSE
+               CALL SLASET( 'F', 1, NRHS, ZERO, ZERO, B( I, 1 ), LDB )
+            END IF
+   10    CONTINUE
+*
+*        Multiply B by right singular vectors
+*        (Workspace: need N, prefer N*NRHS)
+*
+         IF( LWORK.GE.LDB*NRHS .AND. NRHS.GT.1 ) THEN
+            CALL SGEMM( 'T', 'N', N, NRHS, N, ONE, A, LDA, B, LDB, ZERO,
+     $                  WORK, LDB )
+            CALL SLACPY( 'G', N, NRHS, WORK, LDB, B, LDB )
+         ELSE IF( NRHS.GT.1 ) THEN
+            CHUNK = LWORK / N
+            DO 20 I = 1, NRHS, CHUNK
+               BL = MIN( NRHS-I+1, CHUNK )
+               CALL SGEMM( 'T', 'N', N, BL, N, ONE, A, LDA, B( 1, I ),
+     $                     LDB, ZERO, WORK, N )
+               CALL SLACPY( 'G', N, BL, WORK, N, B( 1, I ), LDB )
+   20       CONTINUE
+         ELSE
+            CALL SGEMV( 'T', N, N, ONE, A, LDA, B, 1, ZERO, WORK, 1 )
+            CALL SCOPY( N, WORK, 1, B, 1 )
+         END IF
+*
+      ELSE IF( N.GE.MNTHR .AND. LWORK.GE.4*M+M*M+
+     $         MAX( M, 2*M-4, NRHS, N-3*M ) ) THEN
+*
+*        Path 2a - underdetermined, with many more columns than rows
+*        and sufficient workspace for an efficient algorithm
+*
+         LDWORK = M
+         IF( LWORK.GE.MAX( 4*M+M*LDA+MAX( M, 2*M-4, NRHS, N-3*M ),
+     $       M*LDA+M+M*NRHS ) )LDWORK = LDA
+         ITAU = 1
+         IWORK = M + 1
+*
+*        Compute A=L*Q
+*        (Workspace: need 2*M, prefer M+M*NB)
+*
+         CALL SGELQF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ),
+     $                LWORK-IWORK+1, INFO )
+         IL = IWORK
+*
+*        Copy L to WORK(IL), zeroing out above it
+*
+         CALL SLACPY( 'L', M, M, A, LDA, WORK( IL ), LDWORK )
+         CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, WORK( IL+LDWORK ),
+     $                LDWORK )
+         IE = IL + LDWORK*M
+         ITAUQ = IE + M
+         ITAUP = ITAUQ + M
+         IWORK = ITAUP + M
+*
+*        Bidiagonalize L in WORK(IL)
+*        (Workspace: need M*M+5*M, prefer M*M+4*M+2*M*NB)
+*
+         CALL SGEBRD( M, M, WORK( IL ), LDWORK, S, WORK( IE ),
+     $                WORK( ITAUQ ), WORK( ITAUP ), WORK( IWORK ),
+     $                LWORK-IWORK+1, INFO )
+*
+*        Multiply B by transpose of left bidiagonalizing vectors of L
+*        (Workspace: need M*M+4*M+NRHS, prefer M*M+4*M+NRHS*NB)
+*
+         CALL SORMBR( 'Q', 'L', 'T', M, NRHS, M, WORK( IL ), LDWORK,
+     $                WORK( ITAUQ ), B, LDB, WORK( IWORK ),
+     $                LWORK-IWORK+1, INFO )
+*
+*        Generate right bidiagonalizing vectors of R in WORK(IL)
+*        (Workspace: need M*M+5*M-1, prefer M*M+4*M+(M-1)*NB)
+*
+         CALL SORGBR( 'P', M, M, M, WORK( IL ), LDWORK, WORK( ITAUP ),
+     $                WORK( IWORK ), LWORK-IWORK+1, INFO )
+         IWORK = IE + M
+*
+*        Perform bidiagonal QR iteration,
+*           computing right singular vectors of L in WORK(IL) and
+*           multiplying B by transpose of left singular vectors
+*        (Workspace: need M*M+M+BDSPAC)
+*
+         CALL SBDSQR( 'U', M, M, 0, NRHS, S, WORK( IE ), WORK( IL ),
+     $                LDWORK, A, LDA, B, LDB, WORK( IWORK ), INFO )
+         IF( INFO.NE.0 )
+     $      GO TO 70
+*
+*        Multiply B by reciprocals of singular values
+*
+         THR = MAX( RCOND*S( 1 ), SFMIN )
+         IF( RCOND.LT.ZERO )
+     $      THR = MAX( EPS*S( 1 ), SFMIN )
+         RANK = 0
+         DO 30 I = 1, M
+            IF( S( I ).GT.THR ) THEN
+               CALL SRSCL( NRHS, S( I ), B( I, 1 ), LDB )
+               RANK = RANK + 1
+            ELSE
+               CALL SLASET( 'F', 1, NRHS, ZERO, ZERO, B( I, 1 ), LDB )
+            END IF
+   30    CONTINUE
+         IWORK = IE
+*
+*        Multiply B by right singular vectors of L in WORK(IL)
+*        (Workspace: need M*M+2*M, prefer M*M+M+M*NRHS)
+*
+         IF( LWORK.GE.LDB*NRHS+IWORK-1 .AND. NRHS.GT.1 ) THEN
+            CALL SGEMM( 'T', 'N', M, NRHS, M, ONE, WORK( IL ), LDWORK,
+     $                  B, LDB, ZERO, WORK( IWORK ), LDB )
+            CALL SLACPY( 'G', M, NRHS, WORK( IWORK ), LDB, B, LDB )
+         ELSE IF( NRHS.GT.1 ) THEN
+            CHUNK = ( LWORK-IWORK+1 ) / M
+            DO 40 I = 1, NRHS, CHUNK
+               BL = MIN( NRHS-I+1, CHUNK )
+               CALL SGEMM( 'T', 'N', M, BL, M, ONE, WORK( IL ), LDWORK,
+     $                     B( 1, I ), LDB, ZERO, WORK( IWORK ), N )
+               CALL SLACPY( 'G', M, BL, WORK( IWORK ), N, B( 1, I ),
+     $                      LDB )
+   40       CONTINUE
+         ELSE
+            CALL SGEMV( 'T', M, M, ONE, WORK( IL ), LDWORK, B( 1, 1 ),
+     $                  1, ZERO, WORK( IWORK ), 1 )
+            CALL SCOPY( M, WORK( IWORK ), 1, B( 1, 1 ), 1 )
+         END IF
+*
+*        Zero out below first M rows of B
+*
+         CALL SLASET( 'F', N-M, NRHS, ZERO, ZERO, B( M+1, 1 ), LDB )
+         IWORK = ITAU + M
+*
+*        Multiply transpose(Q) by B
+*        (Workspace: need M+NRHS, prefer M+NRHS*NB)
+*
+         CALL SORMLQ( 'L', 'T', N, NRHS, M, A, LDA, WORK( ITAU ), B,
+     $                LDB, WORK( IWORK ), LWORK-IWORK+1, INFO )
+*
+      ELSE
+*
+*        Path 2 - remaining underdetermined cases
+*
+         IE = 1
+         ITAUQ = IE + M
+         ITAUP = ITAUQ + M
+         IWORK = ITAUP + M
+*
+*        Bidiagonalize A
+*        (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
+*
+         CALL SGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
+     $                WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
+     $                INFO )
+*
+*        Multiply B by transpose of left bidiagonalizing vectors
+*        (Workspace: need 3*M+NRHS, prefer 3*M+NRHS*NB)
+*
+         CALL SORMBR( 'Q', 'L', 'T', M, NRHS, N, A, LDA, WORK( ITAUQ ),
+     $                B, LDB, WORK( IWORK ), LWORK-IWORK+1, INFO )
+*
+*        Generate right bidiagonalizing vectors in A
+*        (Workspace: need 4*M, prefer 3*M+M*NB)
+*
+         CALL SORGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ),
+     $                WORK( IWORK ), LWORK-IWORK+1, INFO )
+         IWORK = IE + M
+*
+*        Perform bidiagonal QR iteration,
+*           computing right singular vectors of A in A and
+*           multiplying B by transpose of left singular vectors
+*        (Workspace: need BDSPAC)
+*
+         CALL SBDSQR( 'L', M, N, 0, NRHS, S, WORK( IE ), A, LDA, VDUM,
+     $                1, B, LDB, WORK( IWORK ), INFO )
+         IF( INFO.NE.0 )
+     $      GO TO 70
+*
+*        Multiply B by reciprocals of singular values
+*
+         THR = MAX( RCOND*S( 1 ), SFMIN )
+         IF( RCOND.LT.ZERO )
+     $      THR = MAX( EPS*S( 1 ), SFMIN )
+         RANK = 0
+         DO 50 I = 1, M
+            IF( S( I ).GT.THR ) THEN
+               CALL SRSCL( NRHS, S( I ), B( I, 1 ), LDB )
+               RANK = RANK + 1
+            ELSE
+               CALL SLASET( 'F', 1, NRHS, ZERO, ZERO, B( I, 1 ), LDB )
+            END IF
+   50    CONTINUE
+*
+*        Multiply B by right singular vectors of A
+*        (Workspace: need N, prefer N*NRHS)
+*
+         IF( LWORK.GE.LDB*NRHS .AND. NRHS.GT.1 ) THEN
+            CALL SGEMM( 'T', 'N', N, NRHS, M, ONE, A, LDA, B, LDB, ZERO,
+     $                  WORK, LDB )
+            CALL SLACPY( 'F', N, NRHS, WORK, LDB, B, LDB )
+         ELSE IF( NRHS.GT.1 ) THEN
+            CHUNK = LWORK / N
+            DO 60 I = 1, NRHS, CHUNK
+               BL = MIN( NRHS-I+1, CHUNK )
+               CALL SGEMM( 'T', 'N', N, BL, M, ONE, A, LDA, B( 1, I ),
+     $                     LDB, ZERO, WORK, N )
+               CALL SLACPY( 'F', N, BL, WORK, N, B( 1, I ), LDB )
+   60       CONTINUE
+         ELSE
+            CALL SGEMV( 'T', M, N, ONE, A, LDA, B, 1, ZERO, WORK, 1 )
+            CALL SCOPY( N, WORK, 1, B, 1 )
+         END IF
+      END IF
+*
+*     Undo scaling
+*
+      IF( IASCL.EQ.1 ) THEN
+         CALL SLASCL( 'G', 0, 0, ANRM, SMLNUM, N, NRHS, B, LDB, INFO )
+         CALL SLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN,
+     $                INFO )
+      ELSE IF( IASCL.EQ.2 ) THEN
+         CALL SLASCL( 'G', 0, 0, ANRM, BIGNUM, N, NRHS, B, LDB, INFO )
+         CALL SLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN,
+     $                INFO )
+      END IF
+      IF( IBSCL.EQ.1 ) THEN
+         CALL SLASCL( 'G', 0, 0, SMLNUM, BNRM, N, NRHS, B, LDB, INFO )
+      ELSE IF( IBSCL.EQ.2 ) THEN
+         CALL SLASCL( 'G', 0, 0, BIGNUM, BNRM, N, NRHS, B, LDB, INFO )
+      END IF
+*
+   70 CONTINUE
+      WORK( 1 ) = MAXWRK
+      RETURN
+*
+*     End of SGELSS
+*
+      END
diff --git a/costa/native/external/lapack/sgelsx.f b/costa/native/external/lapack/sgelsx.f
new file mode 100644
index 000000000..b0016cf8c
--- /dev/null
+++ b/costa/native/external/lapack/sgelsx.f
@@ -0,0 +1,350 @@
+      SUBROUTINE SGELSX( M, N, NRHS, A, LDA, B, LDB, JPVT, RCOND, RANK,
+     $                   WORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993 
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, LDB, M, N, NRHS, RANK
+      REAL               RCOND
+*     ..
+*     .. Array Arguments ..
+      INTEGER            JPVT( * )
+      REAL               A( LDA, * ), B( LDB, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  This routine is deprecated and has been replaced by routine SGELSY.
+*
+*  SGELSX computes the minimum-norm solution to a real linear least
+*  squares problem:
+*      minimize || A * X - B ||
+*  using a complete orthogonal factorization of A.  A is an M-by-N
+*  matrix which may be rank-deficient.
+*
+*  Several right hand side vectors b and solution vectors x can be 
+*  handled in a single call; they are stored as the columns of the
+*  M-by-NRHS right hand side matrix B and the N-by-NRHS solution
+*  matrix X.
+*
+*  The routine first computes a QR factorization with column pivoting:
+*      A * P = Q * [ R11 R12 ]
+*                  [  0  R22 ]
+*  with R11 defined as the largest leading submatrix whose estimated
+*  condition number is less than 1/RCOND.  The order of R11, RANK,
+*  is the effective rank of A.
+*
+*  Then, R22 is considered to be negligible, and R12 is annihilated
+*  by orthogonal transformations from the right, arriving at the
+*  complete orthogonal factorization:
+*     A * P = Q * [ T11 0 ] * Z
+*                 [  0  0 ]
+*  The minimum-norm solution is then
+*     X = P * Z' [ inv(T11)*Q1'*B ]
+*                [        0       ]
+*  where Q1 consists of the first RANK columns of Q.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of
+*          columns of matrices B and X. NRHS >= 0.
+*
+*  A       (input/output) REAL array, dimension (LDA,N)
+*          On entry, the M-by-N matrix A.
+*          On exit, A has been overwritten by details of its
+*          complete orthogonal factorization.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  B       (input/output) REAL array, dimension (LDB,NRHS)
+*          On entry, the M-by-NRHS right hand side matrix B.
+*          On exit, the N-by-NRHS solution matrix X.
+*          If m >= n and RANK = n, the residual sum-of-squares for
+*          the solution in the i-th column is given by the sum of
+*          squares of elements N+1:M in that column.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B. LDB >= max(1,M,N).
+*
+*  JPVT    (input/output) INTEGER array, dimension (N)
+*          On entry, if JPVT(i) .ne. 0, the i-th column of A is an
+*          initial column, otherwise it is a free column.  Before
+*          the QR factorization of A, all initial columns are
+*          permuted to the leading positions; only the remaining
+*          free columns are moved as a result of column pivoting
+*          during the factorization.
+*          On exit, if JPVT(i) = k, then the i-th column of A*P
+*          was the k-th column of A.
+*
+*  RCOND   (input) REAL
+*          RCOND is used to determine the effective rank of A, which
+*          is defined as the order of the largest leading triangular
+*          submatrix R11 in the QR factorization with pivoting of A,
+*          whose estimated condition number < 1/RCOND.
+*
+*  RANK    (output) INTEGER
+*          The effective rank of A, i.e., the order of the submatrix
+*          R11.  This is the same as the order of the submatrix T11
+*          in the complete orthogonal factorization of A.
+*
+*  WORK    (workspace) REAL array, dimension
+*                      (max( min(M,N)+3*N, 2*min(M,N)+NRHS )),
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            IMAX, IMIN
+      PARAMETER          ( IMAX = 1, IMIN = 2 )
+      REAL               ZERO, ONE, DONE, NTDONE
+      PARAMETER          ( ZERO = 0.0E0, ONE = 1.0E0, DONE = ZERO,
+     $                   NTDONE = ONE )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, IASCL, IBSCL, ISMAX, ISMIN, J, K, MN
+      REAL               ANRM, BIGNUM, BNRM, C1, C2, S1, S2, SMAX,
+     $                   SMAXPR, SMIN, SMINPR, SMLNUM, T1, T2
+*     ..
+*     .. External Functions ..
+      REAL               SLAMCH, SLANGE
+      EXTERNAL           SLAMCH, SLANGE
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SGEQPF, SLABAD, SLAIC1, SLASCL, SLASET, SLATZM,
+     $                   SORM2R, STRSM, STZRQF, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+      MN = MIN( M, N )
+      ISMIN = MN + 1
+      ISMAX = 2*MN + 1
+*
+*     Test the input arguments.
+*
+      INFO = 0
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, M, N ) ) THEN
+         INFO = -7
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SGELSX', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( MIN( M, N, NRHS ).EQ.0 ) THEN
+         RANK = 0
+         RETURN
+      END IF
+*
+*     Get machine parameters
+*
+      SMLNUM = SLAMCH( 'S' ) / SLAMCH( 'P' )
+      BIGNUM = ONE / SMLNUM
+      CALL SLABAD( SMLNUM, BIGNUM )
+*
+*     Scale A, B if max elements outside range [SMLNUM,BIGNUM]
+*
+      ANRM = SLANGE( 'M', M, N, A, LDA, WORK )
+      IASCL = 0
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
+*
+*        Scale matrix norm up to SMLNUM
+*
+         CALL SLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO )
+         IASCL = 1
+      ELSE IF( ANRM.GT.BIGNUM ) THEN
+*
+*        Scale matrix norm down to BIGNUM
+*
+         CALL SLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO )
+         IASCL = 2
+      ELSE IF( ANRM.EQ.ZERO ) THEN
+*
+*        Matrix all zero. Return zero solution.
+*
+         CALL SLASET( 'F', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB )
+         RANK = 0
+         GO TO 100
+      END IF
+*
+      BNRM = SLANGE( 'M', M, NRHS, B, LDB, WORK )
+      IBSCL = 0
+      IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN
+*
+*        Scale matrix norm up to SMLNUM
+*
+         CALL SLASCL( 'G', 0, 0, BNRM, SMLNUM, M, NRHS, B, LDB, INFO )
+         IBSCL = 1
+      ELSE IF( BNRM.GT.BIGNUM ) THEN
+*
+*        Scale matrix norm down to BIGNUM
+*
+         CALL SLASCL( 'G', 0, 0, BNRM, BIGNUM, M, NRHS, B, LDB, INFO )
+         IBSCL = 2
+      END IF
+*
+*     Compute QR factorization with column pivoting of A:
+*        A * P = Q * R
+*
+      CALL SGEQPF( M, N, A, LDA, JPVT, WORK( 1 ), WORK( MN+1 ), INFO )
+*
+*     workspace 3*N. Details of Householder rotations stored
+*     in WORK(1:MN).
+*
+*     Determine RANK using incremental condition estimation
+*
+      WORK( ISMIN ) = ONE
+      WORK( ISMAX ) = ONE
+      SMAX = ABS( A( 1, 1 ) )
+      SMIN = SMAX
+      IF( ABS( A( 1, 1 ) ).EQ.ZERO ) THEN
+         RANK = 0
+         CALL SLASET( 'F', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB )
+         GO TO 100
+      ELSE
+         RANK = 1
+      END IF
+*
+   10 CONTINUE
+      IF( RANK.LT.MN ) THEN
+         I = RANK + 1
+         CALL SLAIC1( IMIN, RANK, WORK( ISMIN ), SMIN, A( 1, I ),
+     $                A( I, I ), SMINPR, S1, C1 )
+         CALL SLAIC1( IMAX, RANK, WORK( ISMAX ), SMAX, A( 1, I ),
+     $                A( I, I ), SMAXPR, S2, C2 )
+*
+         IF( SMAXPR*RCOND.LE.SMINPR ) THEN
+            DO 20 I = 1, RANK
+               WORK( ISMIN+I-1 ) = S1*WORK( ISMIN+I-1 )
+               WORK( ISMAX+I-1 ) = S2*WORK( ISMAX+I-1 )
+   20       CONTINUE
+            WORK( ISMIN+RANK ) = C1
+            WORK( ISMAX+RANK ) = C2
+            SMIN = SMINPR
+            SMAX = SMAXPR
+            RANK = RANK + 1
+            GO TO 10
+         END IF
+      END IF
+*
+*     Logically partition R = [ R11 R12 ]
+*                             [  0  R22 ]
+*     where R11 = R(1:RANK,1:RANK)
+*
+*     [R11,R12] = [ T11, 0 ] * Y
+*
+      IF( RANK.LT.N )
+     $   CALL STZRQF( RANK, N, A, LDA, WORK( MN+1 ), INFO )
+*
+*     Details of Householder rotations stored in WORK(MN+1:2*MN)
+*
+*     B(1:M,1:NRHS) := Q' * B(1:M,1:NRHS)
+*
+      CALL SORM2R( 'Left', 'Transpose', M, NRHS, MN, A, LDA, WORK( 1 ),
+     $             B, LDB, WORK( 2*MN+1 ), INFO )
+*
+*     workspace NRHS
+*
+*     B(1:RANK,1:NRHS) := inv(T11) * B(1:RANK,1:NRHS)
+*
+      CALL STRSM( 'Left', 'Upper', 'No transpose', 'Non-unit', RANK,
+     $            NRHS, ONE, A, LDA, B, LDB )
+*
+      DO 40 I = RANK + 1, N
+         DO 30 J = 1, NRHS
+            B( I, J ) = ZERO
+   30    CONTINUE
+   40 CONTINUE
+*
+*     B(1:N,1:NRHS) := Y' * B(1:N,1:NRHS)
+*
+      IF( RANK.LT.N ) THEN
+         DO 50 I = 1, RANK
+            CALL SLATZM( 'Left', N-RANK+1, NRHS, A( I, RANK+1 ), LDA,
+     $                   WORK( MN+I ), B( I, 1 ), B( RANK+1, 1 ), LDB,
+     $                   WORK( 2*MN+1 ) )
+   50    CONTINUE
+      END IF
+*
+*     workspace NRHS
+*
+*     B(1:N,1:NRHS) := P * B(1:N,1:NRHS)
+*
+      DO 90 J = 1, NRHS
+         DO 60 I = 1, N
+            WORK( 2*MN+I ) = NTDONE
+   60    CONTINUE
+         DO 80 I = 1, N
+            IF( WORK( 2*MN+I ).EQ.NTDONE ) THEN
+               IF( JPVT( I ).NE.I ) THEN
+                  K = I
+                  T1 = B( K, J )
+                  T2 = B( JPVT( K ), J )
+   70             CONTINUE
+                  B( JPVT( K ), J ) = T1
+                  WORK( 2*MN+K ) = DONE
+                  T1 = T2
+                  K = JPVT( K )
+                  T2 = B( JPVT( K ), J )
+                  IF( JPVT( K ).NE.I )
+     $               GO TO 70
+                  B( I, J ) = T1
+                  WORK( 2*MN+K ) = DONE
+               END IF
+            END IF
+   80    CONTINUE
+   90 CONTINUE
+*
+*     Undo scaling
+*
+      IF( IASCL.EQ.1 ) THEN
+         CALL SLASCL( 'G', 0, 0, ANRM, SMLNUM, N, NRHS, B, LDB, INFO )
+         CALL SLASCL( 'U', 0, 0, SMLNUM, ANRM, RANK, RANK, A, LDA,
+     $                INFO )
+      ELSE IF( IASCL.EQ.2 ) THEN
+         CALL SLASCL( 'G', 0, 0, ANRM, BIGNUM, N, NRHS, B, LDB, INFO )
+         CALL SLASCL( 'U', 0, 0, BIGNUM, ANRM, RANK, RANK, A, LDA,
+     $                INFO )
+      END IF
+      IF( IBSCL.EQ.1 ) THEN
+         CALL SLASCL( 'G', 0, 0, SMLNUM, BNRM, N, NRHS, B, LDB, INFO )
+      ELSE IF( IBSCL.EQ.2 ) THEN
+         CALL SLASCL( 'G', 0, 0, BIGNUM, BNRM, N, NRHS, B, LDB, INFO )
+      END IF
+*
+  100 CONTINUE
+*
+      RETURN
+*
+*     End of SGELSX
+*
+      END
diff --git a/costa/native/external/lapack/sgelsy.f b/costa/native/external/lapack/sgelsy.f
new file mode 100644
index 000000000..0a4883793
--- /dev/null
+++ b/costa/native/external/lapack/sgelsy.f
@@ -0,0 +1,379 @@
+      SUBROUTINE SGELSY( M, N, NRHS, A, LDA, B, LDB, JPVT, RCOND, RANK,
+     $                   WORK, LWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, LDB, LWORK, M, N, NRHS, RANK
+      REAL               RCOND
+*     ..
+*     .. Array Arguments ..
+      INTEGER            JPVT( * )
+      REAL               A( LDA, * ), B( LDB, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SGELSY computes the minimum-norm solution to a real linear least
+*  squares problem:
+*      minimize || A * X - B ||
+*  using a complete orthogonal factorization of A.  A is an M-by-N
+*  matrix which may be rank-deficient.
+*
+*  Several right hand side vectors b and solution vectors x can be
+*  handled in a single call; they are stored as the columns of the
+*  M-by-NRHS right hand side matrix B and the N-by-NRHS solution
+*  matrix X.
+*
+*  The routine first computes a QR factorization with column pivoting:
+*      A * P = Q * [ R11 R12 ]
+*                  [  0  R22 ]
+*  with R11 defined as the largest leading submatrix whose estimated
+*  condition number is less than 1/RCOND.  The order of R11, RANK,
+*  is the effective rank of A.
+*
+*  Then, R22 is considered to be negligible, and R12 is annihilated
+*  by orthogonal transformations from the right, arriving at the
+*  complete orthogonal factorization:
+*     A * P = Q * [ T11 0 ] * Z
+*                 [  0  0 ]
+*  The minimum-norm solution is then
+*     X = P * Z' [ inv(T11)*Q1'*B ]
+*                [        0       ]
+*  where Q1 consists of the first RANK columns of Q.
+*
+*  This routine is basically identical to the original xGELSX except
+*  three differences:
+*    o The call to the subroutine xGEQPF has been substituted by the
+*      the call to the subroutine xGEQP3. This subroutine is a Blas-3
+*      version of the QR factorization with column pivoting.
+*    o Matrix B (the right hand side) is updated with Blas-3.
+*    o The permutation of matrix B (the right hand side) is faster and
+*      more simple.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of
+*          columns of matrices B and X. NRHS >= 0.
+*
+*  A       (input/output) REAL array, dimension (LDA,N)
+*          On entry, the M-by-N matrix A.
+*          On exit, A has been overwritten by details of its
+*          complete orthogonal factorization.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  B       (input/output) REAL array, dimension (LDB,NRHS)
+*          On entry, the M-by-NRHS right hand side matrix B.
+*          On exit, the N-by-NRHS solution matrix X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B. LDB >= max(1,M,N).
+*
+*  JPVT    (input/output) INTEGER array, dimension (N)
+*          On entry, if JPVT(i) .ne. 0, the i-th column of A is permuted
+*          to the front of AP, otherwise column i is a free column.
+*          On exit, if JPVT(i) = k, then the i-th column of AP
+*          was the k-th column of A.
+*
+*  RCOND   (input) REAL
+*          RCOND is used to determine the effective rank of A, which
+*          is defined as the order of the largest leading triangular
+*          submatrix R11 in the QR factorization with pivoting of A,
+*          whose estimated condition number < 1/RCOND.
+*
+*  RANK    (output) INTEGER
+*          The effective rank of A, i.e., the order of the submatrix
+*          R11.  This is the same as the order of the submatrix T11
+*          in the complete orthogonal factorization of A.
+*
+*  WORK    (workspace/output) REAL array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.
+*          The unblocked strategy requires that:
+*             LWORK >= MAX( MN+3*N+1, 2*MN+NRHS ),
+*          where MN = min( M, N ).
+*          The block algorithm requires that:
+*             LWORK >= MAX( MN+2*N+NB*(N+1), 2*MN+NB*NRHS ),
+*          where NB is an upper bound on the blocksize returned
+*          by ILAENV for the routines SGEQP3, STZRZF, STZRQF, SORMQR,
+*          and SORMRZ.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: If INFO = -i, the i-th argument had an illegal value.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*    A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA
+*    E. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain
+*    G. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            IMAX, IMIN
+      PARAMETER          ( IMAX = 1, IMIN = 2 )
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            I, IASCL, IBSCL, ISMAX, ISMIN, J, LWKOPT, MN,
+     $                   NB, NB1, NB2, NB3, NB4
+      REAL               ANRM, BIGNUM, BNRM, C1, C2, S1, S2, SMAX,
+     $                   SMAXPR, SMIN, SMINPR, SMLNUM, WSIZE
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      REAL               SLAMCH, SLANGE
+      EXTERNAL           ILAENV, SLAMCH, SLANGE
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SCOPY, SGEQP3, SLABAD, SLAIC1, SLASCL, SLASET,
+     $                   SORMQR, SORMRZ, STRSM, STZRZF, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN, REAL
+*     ..
+*     .. Executable Statements ..
+*
+      MN = MIN( M, N )
+      ISMIN = MN + 1
+      ISMAX = 2*MN + 1
+*
+*     Test the input arguments.
+*
+      INFO = 0
+      NB1 = ILAENV( 1, 'SGEQRF', ' ', M, N, -1, -1 )
+      NB2 = ILAENV( 1, 'SGERQF', ' ', M, N, -1, -1 )
+      NB3 = ILAENV( 1, 'SORMQR', ' ', M, N, NRHS, -1 )
+      NB4 = ILAENV( 1, 'SORMRQ', ' ', M, N, NRHS, -1 )
+      NB = MAX( NB1, NB2, NB3, NB4 )
+      LWKOPT = MAX( 1, MN+2*N+NB*(N+1), 2*MN+NB*NRHS )
+      WORK( 1 ) = REAL( LWKOPT )
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, M, N ) ) THEN
+         INFO = -7
+      ELSE IF( LWORK.LT.MAX( 1, MN+3*N+1, 2*MN+NRHS ) .AND.
+     $   .NOT.LQUERY ) THEN
+         INFO = -12
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SGELSY', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( MIN( M, N, NRHS ).EQ.0 ) THEN
+         RANK = 0
+         RETURN
+      END IF
+*
+*     Get machine parameters
+*
+      SMLNUM = SLAMCH( 'S' ) / SLAMCH( 'P' )
+      BIGNUM = ONE / SMLNUM
+      CALL SLABAD( SMLNUM, BIGNUM )
+*
+*     Scale A, B if max entries outside range [SMLNUM,BIGNUM]
+*
+      ANRM = SLANGE( 'M', M, N, A, LDA, WORK )
+      IASCL = 0
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
+*
+*        Scale matrix norm up to SMLNUM
+*
+         CALL SLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO )
+         IASCL = 1
+      ELSE IF( ANRM.GT.BIGNUM ) THEN
+*
+*        Scale matrix norm down to BIGNUM
+*
+         CALL SLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO )
+         IASCL = 2
+      ELSE IF( ANRM.EQ.ZERO ) THEN
+*
+*        Matrix all zero. Return zero solution.
+*
+         CALL SLASET( 'F', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB )
+         RANK = 0
+         GO TO 70
+      END IF
+*
+      BNRM = SLANGE( 'M', M, NRHS, B, LDB, WORK )
+      IBSCL = 0
+      IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN
+*
+*        Scale matrix norm up to SMLNUM
+*
+         CALL SLASCL( 'G', 0, 0, BNRM, SMLNUM, M, NRHS, B, LDB, INFO )
+         IBSCL = 1
+      ELSE IF( BNRM.GT.BIGNUM ) THEN
+*
+*        Scale matrix norm down to BIGNUM
+*
+         CALL SLASCL( 'G', 0, 0, BNRM, BIGNUM, M, NRHS, B, LDB, INFO )
+         IBSCL = 2
+      END IF
+*
+*     Compute QR factorization with column pivoting of A:
+*        A * P = Q * R
+*
+      CALL SGEQP3( M, N, A, LDA, JPVT, WORK( 1 ), WORK( MN+1 ),
+     $             LWORK-MN, INFO )
+      WSIZE = MN + WORK( MN+1 )
+*
+*     workspace: MN+2*N+NB*(N+1).
+*     Details of Householder rotations stored in WORK(1:MN).
+*
+*     Determine RANK using incremental condition estimation
+*
+      WORK( ISMIN ) = ONE
+      WORK( ISMAX ) = ONE
+      SMAX = ABS( A( 1, 1 ) )
+      SMIN = SMAX
+      IF( ABS( A( 1, 1 ) ).EQ.ZERO ) THEN
+         RANK = 0
+         CALL SLASET( 'F', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB )
+         GO TO 70
+      ELSE
+         RANK = 1
+      END IF
+*
+   10 CONTINUE
+      IF( RANK.LT.MN ) THEN
+         I = RANK + 1
+         CALL SLAIC1( IMIN, RANK, WORK( ISMIN ), SMIN, A( 1, I ),
+     $                A( I, I ), SMINPR, S1, C1 )
+         CALL SLAIC1( IMAX, RANK, WORK( ISMAX ), SMAX, A( 1, I ),
+     $                A( I, I ), SMAXPR, S2, C2 )
+*
+         IF( SMAXPR*RCOND.LE.SMINPR ) THEN
+            DO 20 I = 1, RANK
+               WORK( ISMIN+I-1 ) = S1*WORK( ISMIN+I-1 )
+               WORK( ISMAX+I-1 ) = S2*WORK( ISMAX+I-1 )
+   20       CONTINUE
+            WORK( ISMIN+RANK ) = C1
+            WORK( ISMAX+RANK ) = C2
+            SMIN = SMINPR
+            SMAX = SMAXPR
+            RANK = RANK + 1
+            GO TO 10
+         END IF
+      END IF
+*
+*     workspace: 3*MN.
+*
+*     Logically partition R = [ R11 R12 ]
+*                             [  0  R22 ]
+*     where R11 = R(1:RANK,1:RANK)
+*
+*     [R11,R12] = [ T11, 0 ] * Y
+*
+      IF( RANK.LT.N )
+     $   CALL STZRZF( RANK, N, A, LDA, WORK( MN+1 ), WORK( 2*MN+1 ),
+     $                LWORK-2*MN, INFO )
+*
+*     workspace: 2*MN.
+*     Details of Householder rotations stored in WORK(MN+1:2*MN)
+*
+*     B(1:M,1:NRHS) := Q' * B(1:M,1:NRHS)
+*
+      CALL SORMQR( 'Left', 'Transpose', M, NRHS, MN, A, LDA, WORK( 1 ),
+     $             B, LDB, WORK( 2*MN+1 ), LWORK-2*MN, INFO )
+      WSIZE = MAX( WSIZE, 2*MN+WORK( 2*MN+1 ) )
+*
+*     workspace: 2*MN+NB*NRHS.
+*
+*     B(1:RANK,1:NRHS) := inv(T11) * B(1:RANK,1:NRHS)
+*
+      CALL STRSM( 'Left', 'Upper', 'No transpose', 'Non-unit', RANK,
+     $            NRHS, ONE, A, LDA, B, LDB )
+*
+      DO 40 J = 1, NRHS
+         DO 30 I = RANK + 1, N
+            B( I, J ) = ZERO
+   30    CONTINUE
+   40 CONTINUE
+*
+*     B(1:N,1:NRHS) := Y' * B(1:N,1:NRHS)
+*
+      IF( RANK.LT.N ) THEN
+         CALL SORMRZ( 'Left', 'Transpose', N, NRHS, RANK, N-RANK, A,
+     $                LDA, WORK( MN+1 ), B, LDB, WORK( 2*MN+1 ),
+     $                LWORK-2*MN, INFO )
+      END IF
+*
+*     workspace: 2*MN+NRHS.
+*
+*     B(1:N,1:NRHS) := P * B(1:N,1:NRHS)
+*
+      DO 60 J = 1, NRHS
+         DO 50 I = 1, N
+            WORK( JPVT( I ) ) = B( I, J )
+   50    CONTINUE
+         CALL SCOPY( N, WORK( 1 ), 1, B( 1, J ), 1 )
+   60 CONTINUE
+*
+*     workspace: N.
+*
+*     Undo scaling
+*
+      IF( IASCL.EQ.1 ) THEN
+         CALL SLASCL( 'G', 0, 0, ANRM, SMLNUM, N, NRHS, B, LDB, INFO )
+         CALL SLASCL( 'U', 0, 0, SMLNUM, ANRM, RANK, RANK, A, LDA,
+     $                INFO )
+      ELSE IF( IASCL.EQ.2 ) THEN
+         CALL SLASCL( 'G', 0, 0, ANRM, BIGNUM, N, NRHS, B, LDB, INFO )
+         CALL SLASCL( 'U', 0, 0, BIGNUM, ANRM, RANK, RANK, A, LDA,
+     $                INFO )
+      END IF
+      IF( IBSCL.EQ.1 ) THEN
+         CALL SLASCL( 'G', 0, 0, SMLNUM, BNRM, N, NRHS, B, LDB, INFO )
+      ELSE IF( IBSCL.EQ.2 ) THEN
+         CALL SLASCL( 'G', 0, 0, BIGNUM, BNRM, N, NRHS, B, LDB, INFO )
+      END IF
+*
+   70 CONTINUE
+      WORK( 1 ) = REAL( LWKOPT )
+*
+      RETURN
+*
+*     End of SGELSY
+*
+      END
diff --git a/costa/native/external/lapack/sgeql2.f b/costa/native/external/lapack/sgeql2.f
new file mode 100644
index 000000000..6dd47937c
--- /dev/null
+++ b/costa/native/external/lapack/sgeql2.f
@@ -0,0 +1,123 @@
+      SUBROUTINE SGEQL2( M, N, A, LDA, TAU, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, M, N
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SGEQL2 computes a QL factorization of a real m by n matrix A:
+*  A = Q * L.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  A       (input/output) REAL array, dimension (LDA,N)
+*          On entry, the m by n matrix A.
+*          On exit, if m >= n, the lower triangle of the subarray
+*          A(m-n+1:m,1:n) contains the n by n lower triangular matrix L;
+*          if m <= n, the elements on and below the (n-m)-th
+*          superdiagonal contain the m by n lower trapezoidal matrix L;
+*          the remaining elements, with the array TAU, represent the
+*          orthogonal matrix Q as a product of elementary reflectors
+*          (see Further Details).
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  TAU     (output) REAL array, dimension (min(M,N))
+*          The scalar factors of the elementary reflectors (see Further
+*          Details).
+*
+*  WORK    (workspace) REAL array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*
+*  Further Details
+*  ===============
+*
+*  The matrix Q is represented as a product of elementary reflectors
+*
+*     Q = H(k) . . . H(2) H(1), where k = min(m,n).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - tau * v * v'
+*
+*  where tau is a real scalar, and v is a real vector with
+*  v(m-k+i+1:m) = 0 and v(m-k+i) = 1; v(1:m-k+i-1) is stored on exit in
+*  A(1:m-k+i-1,n-k+i), and tau in TAU(i).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE
+      PARAMETER          ( ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, K
+      REAL               AII
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLARF, SLARFG, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SGEQL2', -INFO )
+         RETURN
+      END IF
+*
+      K = MIN( M, N )
+*
+      DO 10 I = K, 1, -1
+*
+*        Generate elementary reflector H(i) to annihilate
+*        A(1:m-k+i-1,n-k+i)
+*
+         CALL SLARFG( M-K+I, A( M-K+I, N-K+I ), A( 1, N-K+I ), 1,
+     $                TAU( I ) )
+*
+*        Apply H(i) to A(1:m-k+i,1:n-k+i-1) from the left
+*
+         AII = A( M-K+I, N-K+I )
+         A( M-K+I, N-K+I ) = ONE
+         CALL SLARF( 'Left', M-K+I, N-K+I-1, A( 1, N-K+I ), 1, TAU( I ),
+     $               A, LDA, WORK )
+         A( M-K+I, N-K+I ) = AII
+   10 CONTINUE
+      RETURN
+*
+*     End of SGEQL2
+*
+      END
diff --git a/costa/native/external/lapack/sgeqlf.f b/costa/native/external/lapack/sgeqlf.f
new file mode 100644
index 000000000..5078932ad
--- /dev/null
+++ b/costa/native/external/lapack/sgeqlf.f
@@ -0,0 +1,205 @@
+      SUBROUTINE SGEQLF( M, N, A, LDA, TAU, WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, LWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SGEQLF computes a QL factorization of a real M-by-N matrix A:
+*  A = Q * L.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  A       (input/output) REAL array, dimension (LDA,N)
+*          On entry, the M-by-N matrix A.
+*          On exit,
+*          if m >= n, the lower triangle of the subarray
+*          A(m-n+1:m,1:n) contains the N-by-N lower triangular matrix L;
+*          if m <= n, the elements on and below the (n-m)-th
+*          superdiagonal contain the M-by-N lower trapezoidal matrix L;
+*          the remaining elements, with the array TAU, represent the
+*          orthogonal matrix Q as a product of elementary reflectors
+*          (see Further Details).
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  TAU     (output) REAL array, dimension (min(M,N))
+*          The scalar factors of the elementary reflectors (see Further
+*          Details).
+*
+*  WORK    (workspace/output) REAL array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.  LWORK >= max(1,N).
+*          For optimum performance LWORK >= N*NB, where NB is the
+*          optimal blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Further Details
+*  ===============
+*
+*  The matrix Q is represented as a product of elementary reflectors
+*
+*     Q = H(k) . . . H(2) H(1), where k = min(m,n).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - tau * v * v'
+*
+*  where tau is a real scalar, and v is a real vector with
+*  v(m-k+i+1:m) = 0 and v(m-k+i) = 1; v(1:m-k+i-1) is stored on exit in
+*  A(1:m-k+i-1,n-k+i), and tau in TAU(i).
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            I, IB, IINFO, IWS, K, KI, KK, LDWORK, LWKOPT,
+     $                   MU, NB, NBMIN, NU, NX
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SGEQL2, SLARFB, SLARFT, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      NB = ILAENV( 1, 'SGEQLF', ' ', M, N, -1, -1 )
+      LWKOPT = N*NB
+      WORK( 1 ) = LWKOPT
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -4
+      ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN
+         INFO = -7
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SGEQLF', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      K = MIN( M, N )
+      IF( K.EQ.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      NBMIN = 2
+      NX = 1
+      IWS = N
+      IF( NB.GT.1 .AND. NB.LT.K ) THEN
+*
+*        Determine when to cross over from blocked to unblocked code.
+*
+         NX = MAX( 0, ILAENV( 3, 'SGEQLF', ' ', M, N, -1, -1 ) )
+         IF( NX.LT.K ) THEN
+*
+*           Determine if workspace is large enough for blocked code.
+*
+            LDWORK = N
+            IWS = LDWORK*NB
+            IF( LWORK.LT.IWS ) THEN
+*
+*              Not enough workspace to use optimal NB:  reduce NB and
+*              determine the minimum value of NB.
+*
+               NB = LWORK / LDWORK
+               NBMIN = MAX( 2, ILAENV( 2, 'SGEQLF', ' ', M, N, -1,
+     $                 -1 ) )
+            END IF
+         END IF
+      END IF
+*
+      IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN
+*
+*        Use blocked code initially.
+*        The last kk columns are handled by the block method.
+*
+         KI = ( ( K-NX-1 ) / NB )*NB
+         KK = MIN( K, KI+NB )
+*
+         DO 10 I = K - KK + KI + 1, K - KK + 1, -NB
+            IB = MIN( K-I+1, NB )
+*
+*           Compute the QL factorization of the current block
+*           A(1:m-k+i+ib-1,n-k+i:n-k+i+ib-1)
+*
+            CALL SGEQL2( M-K+I+IB-1, IB, A( 1, N-K+I ), LDA, TAU( I ),
+     $                   WORK, IINFO )
+            IF( N-K+I.GT.1 ) THEN
+*
+*              Form the triangular factor of the block reflector
+*              H = H(i+ib-1) . . . H(i+1) H(i)
+*
+               CALL SLARFT( 'Backward', 'Columnwise', M-K+I+IB-1, IB,
+     $                      A( 1, N-K+I ), LDA, TAU( I ), WORK, LDWORK )
+*
+*              Apply H' to A(1:m-k+i+ib-1,1:n-k+i-1) from the left
+*
+               CALL SLARFB( 'Left', 'Transpose', 'Backward',
+     $                      'Columnwise', M-K+I+IB-1, N-K+I-1, IB,
+     $                      A( 1, N-K+I ), LDA, WORK, LDWORK, A, LDA,
+     $                      WORK( IB+1 ), LDWORK )
+            END IF
+   10    CONTINUE
+         MU = M - K + I + NB - 1
+         NU = N - K + I + NB - 1
+      ELSE
+         MU = M
+         NU = N
+      END IF
+*
+*     Use unblocked code to factor the last or only block
+*
+      IF( MU.GT.0 .AND. NU.GT.0 )
+     $   CALL SGEQL2( MU, NU, A, LDA, TAU, WORK, IINFO )
+*
+      WORK( 1 ) = IWS
+      RETURN
+*
+*     End of SGEQLF
+*
+      END
diff --git a/costa/native/external/lapack/sgeqp3.f b/costa/native/external/lapack/sgeqp3.f
new file mode 100644
index 000000000..a52a5d70a
--- /dev/null
+++ b/costa/native/external/lapack/sgeqp3.f
@@ -0,0 +1,279 @@
+      SUBROUTINE SGEQP3( M, N, A, LDA, JPVT, TAU, WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, LWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            JPVT( * )
+      REAL               A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SGEQP3 computes a QR factorization with column pivoting of a
+*  matrix A:  A*P = Q*R  using Level 3 BLAS.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  A       (input/output) REAL array, dimension (LDA,N)
+*          On entry, the M-by-N matrix A.
+*          On exit, the upper triangle of the array contains the
+*          min(M,N)-by-N upper trapezoidal matrix R; the elements below
+*          the diagonal, together with the array TAU, represent the
+*          orthogonal matrix Q as a product of min(M,N) elementary
+*          reflectors.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,M).
+*
+*  JPVT    (input/output) INTEGER array, dimension (N)
+*          On entry, if JPVT(J).ne.0, the J-th column of A is permuted
+*          to the front of A*P (a leading column); if JPVT(J)=0,
+*          the J-th column of A is a free column.
+*          On exit, if JPVT(J)=K, then the J-th column of A*P was the
+*          the K-th column of A.
+*
+*  TAU     (output) REAL array, dimension (min(M,N))
+*          The scalar factors of the elementary reflectors.
+*
+*  WORK    (workspace/output) REAL array, dimension (LWORK)
+*          On exit, if INFO=0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK. LWORK >= 3*N+1.
+*          For optimal performance LWORK >= 2*N+( N+1 )*NB, where NB
+*          is the optimal blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit.
+*          < 0: if INFO = -i, the i-th argument had an illegal value.
+*
+*  Further Details
+*  ===============
+*
+*  The matrix Q is represented as a product of elementary reflectors
+*
+*     Q = H(1) H(2) . . . H(k), where k = min(m,n).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - tau * v * v'
+*
+*  where tau is a real/complex scalar, and v is a real/complex vector
+*  with v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in
+*  A(i+1:m,i), and tau in TAU(i).
+*
+*  Based on contributions by
+*    G. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain
+*    X. Sun, Computer Science Dept., Duke University, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            INB, INBMIN, IXOVER
+      PARAMETER          ( INB = 1, INBMIN = 2, IXOVER = 3 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            FJB, IWS, J, JB, LWKOPT, MINMN, MINWS, NA, NB,
+     $                   NBMIN, NFXD, NX, SM, SMINMN, SN, TOPBMN
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SGEQRF, SLAQP2, SLAQPS, SORMQR, SSWAP, XERBLA
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      REAL               SNRM2
+      EXTERNAL           ILAENV, SNRM2
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          INT, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+      IWS = 3*N + 1
+      MINMN = MIN( M, N )
+*
+*     Test input arguments
+*     ====================
+*
+      INFO = 0
+      NB = ILAENV( INB, 'SGEQRF', ' ', M, N, -1, -1 )
+      LWKOPT = 2*N+( N+1 )*NB
+      WORK( 1 ) = LWKOPT
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -4
+      ELSE IF( ( LWORK.LT.IWS ) .AND. .NOT.LQUERY ) THEN
+         INFO = -8
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SGEQP3', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( MINMN.EQ.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+*     Move initial columns up front.
+*
+      NFXD = 1
+      DO 10 J = 1, N
+         IF( JPVT( J ).NE.0 ) THEN
+            IF( J.NE.NFXD ) THEN
+               CALL SSWAP( M, A( 1, J ), 1, A( 1, NFXD ), 1 )
+               JPVT( J ) = JPVT( NFXD )
+               JPVT( NFXD ) = J
+            ELSE
+               JPVT( J ) = J
+            END IF
+            NFXD = NFXD + 1
+         ELSE
+            JPVT( J ) = J
+         END IF
+   10 CONTINUE
+      NFXD = NFXD - 1
+*
+*     Factorize fixed columns
+*     =======================
+*
+*     Compute the QR factorization of fixed columns and update
+*     remaining columns.
+*
+      IF( NFXD.GT.0 ) THEN
+         NA = MIN( M, NFXD )
+*CC      CALL SGEQR2( M, NA, A, LDA, TAU, WORK, INFO )
+         CALL SGEQRF( M, NA, A, LDA, TAU, WORK, LWORK, INFO )
+         IWS = MAX( IWS, INT( WORK( 1 ) ) )
+         IF( NA.LT.N ) THEN
+*CC         CALL SORM2R( 'Left', 'Transpose', M, N-NA, NA, A, LDA,
+*CC  $                   TAU, A( 1, NA+1 ), LDA, WORK, INFO )
+            CALL SORMQR( 'Left', 'Transpose', M, N-NA, NA, A, LDA, TAU,
+     $                   A( 1, NA+1 ), LDA, WORK, LWORK, INFO )
+            IWS = MAX( IWS, INT( WORK( 1 ) ) )
+         END IF
+      END IF
+*
+*     Factorize free columns
+*     ======================
+*
+      IF( NFXD.LT.MINMN ) THEN
+*
+         SM = M - NFXD
+         SN = N - NFXD
+         SMINMN = MINMN - NFXD
+*
+*        Determine the block size.
+*
+         NB = ILAENV( INB, 'SGEQRF', ' ', SM, SN, -1, -1 )
+         NBMIN = 2
+         NX = 0
+*
+         IF( ( NB.GT.1 ) .AND. ( NB.LT.SMINMN ) ) THEN
+*
+*           Determine when to cross over from blocked to unblocked code.
+*
+            NX = MAX( 0, ILAENV( IXOVER, 'SGEQRF', ' ', SM, SN, -1,
+     $           -1 ) )
+*
+*
+            IF( NX.LT.SMINMN ) THEN
+*
+*              Determine if workspace is large enough for blocked code.
+*
+               MINWS = 2*SN + ( SN+1 )*NB
+               IWS = MAX( IWS, MINWS )
+               IF( LWORK.LT.MINWS ) THEN
+*
+*                 Not enough workspace to use optimal NB: Reduce NB and
+*                 determine the minimum value of NB.
+*
+                  NB = ( LWORK-2*SN ) / ( SN+1 )
+                  NBMIN = MAX( 2, ILAENV( INBMIN, 'SGEQRF', ' ', SM, SN,
+     $                    -1, -1 ) )
+*
+*
+               END IF
+            END IF
+         END IF
+*
+*        Initialize partial column norms. The first N elements of work
+*        store the exact column norms.
+*
+         DO 20 J = NFXD + 1, N
+            WORK( J ) = SNRM2( SM, A( NFXD+1, J ), 1 )
+            WORK( N+J ) = WORK( J )
+   20    CONTINUE
+*
+         IF( ( NB.GE.NBMIN ) .AND. ( NB.LT.SMINMN ) .AND.
+     $       ( NX.LT.SMINMN ) ) THEN
+*
+*           Use blocked code initially.
+*
+            J = NFXD + 1
+*
+*           Compute factorization: while loop.
+*
+*
+            TOPBMN = MINMN - NX
+   30       CONTINUE
+            IF( J.LE.TOPBMN ) THEN
+               JB = MIN( NB, TOPBMN-J+1 )
+*
+*              Factorize JB columns among columns J:N.
+*
+               CALL SLAQPS( M, N-J+1, J-1, JB, FJB, A( 1, J ), LDA,
+     $                      JPVT( J ), TAU( J ), WORK( J ), WORK( N+J ),
+     $                      WORK( 2*N+1 ), WORK( 2*N+JB+1 ), N-J+1 )
+*
+               J = J + FJB
+               GO TO 30
+            END IF
+         ELSE
+            J = NFXD + 1
+         END IF
+*
+*        Use unblocked code to factor the last or only block.
+*
+*
+         IF( J.LE.MINMN )
+     $      CALL SLAQP2( M, N-J+1, J-1, A( 1, J ), LDA, JPVT( J ),
+     $                   TAU( J ), WORK( J ), WORK( N+J ),
+     $                   WORK( 2*N+1 ) )
+*
+      END IF
+*
+      WORK( 1 ) = IWS
+      RETURN
+*
+*     End of SGEQP3
+*
+      END
diff --git a/costa/native/external/lapack/sgeqpf.f b/costa/native/external/lapack/sgeqpf.f
new file mode 100644
index 000000000..8156b0867
--- /dev/null
+++ b/costa/native/external/lapack/sgeqpf.f
@@ -0,0 +1,221 @@
+      SUBROUTINE SGEQPF( M, N, A, LDA, JPVT, TAU, WORK, INFO )
+*
+*  -- LAPACK test routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, M, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            JPVT( * )
+      REAL               A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  This routine is deprecated and has been replaced by routine SGEQP3.
+*
+*  SGEQPF computes a QR factorization with column pivoting of a
+*  real M-by-N matrix A: A*P = Q*R.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A. N >= 0
+*
+*  A       (input/output) REAL array, dimension (LDA,N)
+*          On entry, the M-by-N matrix A.
+*          On exit, the upper triangle of the array contains the
+*          min(M,N)-by-N upper triangular matrix R; the elements
+*          below the diagonal, together with the array TAU,
+*          represent the orthogonal matrix Q as a product of
+*          min(m,n) elementary reflectors.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,M).
+*
+*  JPVT    (input/output) INTEGER array, dimension (N)
+*          On entry, if JPVT(i) .ne. 0, the i-th column of A is permuted
+*          to the front of A*P (a leading column); if JPVT(i) = 0,
+*          the i-th column of A is a free column.
+*          On exit, if JPVT(i) = k, then the i-th column of A*P
+*          was the k-th column of A.
+*
+*  TAU     (output) REAL array, dimension (min(M,N))
+*          The scalar factors of the elementary reflectors.
+*
+*  WORK    (workspace) REAL array, dimension (3*N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Further Details
+*  ===============
+*
+*  The matrix Q is represented as a product of elementary reflectors
+*
+*     Q = H(1) H(2) . . . H(n)
+*
+*  Each H(i) has the form
+*
+*     H = I - tau * v * v'
+*
+*  where tau is a real scalar, and v is a real vector with
+*  v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i).
+*
+*  The matrix P is represented in jpvt as follows: If
+*     jpvt(j) = i
+*  then the jth column of P is the ith canonical unit vector.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, ITEMP, J, MA, MN, PVT
+      REAL               AII, TEMP, TEMP2
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SGEQR2, SLARF, SLARFG, SORM2R, SSWAP, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN, SQRT
+*     ..
+*     .. External Functions ..
+      INTEGER            ISAMAX
+      REAL               SNRM2
+      EXTERNAL           ISAMAX, SNRM2
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SGEQPF', -INFO )
+         RETURN
+      END IF
+*
+      MN = MIN( M, N )
+*
+*     Move initial columns up front
+*
+      ITEMP = 1
+      DO 10 I = 1, N
+         IF( JPVT( I ).NE.0 ) THEN
+            IF( I.NE.ITEMP ) THEN
+               CALL SSWAP( M, A( 1, I ), 1, A( 1, ITEMP ), 1 )
+               JPVT( I ) = JPVT( ITEMP )
+               JPVT( ITEMP ) = I
+            ELSE
+               JPVT( I ) = I
+            END IF
+            ITEMP = ITEMP + 1
+         ELSE
+            JPVT( I ) = I
+         END IF
+   10 CONTINUE
+      ITEMP = ITEMP - 1
+*
+*     Compute the QR factorization and update remaining columns
+*
+      IF( ITEMP.GT.0 ) THEN
+         MA = MIN( ITEMP, M )
+         CALL SGEQR2( M, MA, A, LDA, TAU, WORK, INFO )
+         IF( MA.LT.N ) THEN
+            CALL SORM2R( 'Left', 'Transpose', M, N-MA, MA, A, LDA, TAU,
+     $                   A( 1, MA+1 ), LDA, WORK, INFO )
+         END IF
+      END IF
+*
+      IF( ITEMP.LT.MN ) THEN
+*
+*        Initialize partial column norms. The first n elements of
+*        work store the exact column norms.
+*
+         DO 20 I = ITEMP + 1, N
+            WORK( I ) = SNRM2( M-ITEMP, A( ITEMP+1, I ), 1 )
+            WORK( N+I ) = WORK( I )
+   20    CONTINUE
+*
+*        Compute factorization
+*
+         DO 40 I = ITEMP + 1, MN
+*
+*           Determine ith pivot column and swap if necessary
+*
+            PVT = ( I-1 ) + ISAMAX( N-I+1, WORK( I ), 1 )
+*
+            IF( PVT.NE.I ) THEN
+               CALL SSWAP( M, A( 1, PVT ), 1, A( 1, I ), 1 )
+               ITEMP = JPVT( PVT )
+               JPVT( PVT ) = JPVT( I )
+               JPVT( I ) = ITEMP
+               WORK( PVT ) = WORK( I )
+               WORK( N+PVT ) = WORK( N+I )
+            END IF
+*
+*           Generate elementary reflector H(i)
+*
+            IF( I.LT.M ) THEN
+               CALL SLARFG( M-I+1, A( I, I ), A( I+1, I ), 1, TAU( I ) )
+            ELSE
+               CALL SLARFG( 1, A( M, M ), A( M, M ), 1, TAU( M ) )
+            END IF
+*
+            IF( I.LT.N ) THEN
+*
+*              Apply H(i) to A(i:m,i+1:n) from the left
+*
+               AII = A( I, I )
+               A( I, I ) = ONE
+               CALL SLARF( 'LEFT', M-I+1, N-I, A( I, I ), 1, TAU( I ),
+     $                     A( I, I+1 ), LDA, WORK( 2*N+1 ) )
+               A( I, I ) = AII
+            END IF
+*
+*           Update partial column norms
+*
+            DO 30 J = I + 1, N
+               IF( WORK( J ).NE.ZERO ) THEN
+                  TEMP = ONE - ( ABS( A( I, J ) ) / WORK( J ) )**2
+                  TEMP = MAX( TEMP, ZERO )
+                  TEMP2 = ONE + 0.05*TEMP*( WORK( J ) / WORK( N+J ) )**2
+                  IF( TEMP2.EQ.ONE ) THEN
+                     IF( M-I.GT.0 ) THEN
+                        WORK( J ) = SNRM2( M-I, A( I+1, J ), 1 )
+                        WORK( N+J ) = WORK( J )
+                     ELSE
+                        WORK( J ) = ZERO
+                        WORK( N+J ) = ZERO
+                     END IF
+                  ELSE
+                     WORK( J ) = WORK( J )*SQRT( TEMP )
+                  END IF
+               END IF
+   30       CONTINUE
+*
+   40    CONTINUE
+      END IF
+      RETURN
+*
+*     End of SGEQPF
+*
+      END
diff --git a/costa/native/external/lapack/sgeqr2.f b/costa/native/external/lapack/sgeqr2.f
new file mode 100644
index 000000000..4eae8cf99
--- /dev/null
+++ b/costa/native/external/lapack/sgeqr2.f
@@ -0,0 +1,122 @@
+      SUBROUTINE SGEQR2( M, N, A, LDA, TAU, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, M, N
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SGEQR2 computes a QR factorization of a real m by n matrix A:
+*  A = Q * R.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  A       (input/output) REAL array, dimension (LDA,N)
+*          On entry, the m by n matrix A.
+*          On exit, the elements on and above the diagonal of the array
+*          contain the min(m,n) by n upper trapezoidal matrix R (R is
+*          upper triangular if m >= n); the elements below the diagonal,
+*          with the array TAU, represent the orthogonal matrix Q as a
+*          product of elementary reflectors (see Further Details).
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  TAU     (output) REAL array, dimension (min(M,N))
+*          The scalar factors of the elementary reflectors (see Further
+*          Details).
+*
+*  WORK    (workspace) REAL array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*
+*  Further Details
+*  ===============
+*
+*  The matrix Q is represented as a product of elementary reflectors
+*
+*     Q = H(1) H(2) . . . H(k), where k = min(m,n).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - tau * v * v'
+*
+*  where tau is a real scalar, and v is a real vector with
+*  v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i),
+*  and tau in TAU(i).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE
+      PARAMETER          ( ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, K
+      REAL               AII
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLARF, SLARFG, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SGEQR2', -INFO )
+         RETURN
+      END IF
+*
+      K = MIN( M, N )
+*
+      DO 10 I = 1, K
+*
+*        Generate elementary reflector H(i) to annihilate A(i+1:m,i)
+*
+         CALL SLARFG( M-I+1, A( I, I ), A( MIN( I+1, M ), I ), 1,
+     $                TAU( I ) )
+         IF( I.LT.N ) THEN
+*
+*           Apply H(i) to A(i:m,i+1:n) from the left
+*
+            AII = A( I, I )
+            A( I, I ) = ONE
+            CALL SLARF( 'Left', M-I+1, N-I, A( I, I ), 1, TAU( I ),
+     $                  A( I, I+1 ), LDA, WORK )
+            A( I, I ) = AII
+         END IF
+   10 CONTINUE
+      RETURN
+*
+*     End of SGEQR2
+*
+      END
diff --git a/costa/native/external/lapack/sgeqrf.f b/costa/native/external/lapack/sgeqrf.f
new file mode 100644
index 000000000..39da1a2c3
--- /dev/null
+++ b/costa/native/external/lapack/sgeqrf.f
@@ -0,0 +1,197 @@
+      SUBROUTINE SGEQRF( M, N, A, LDA, TAU, WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, LWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SGEQRF computes a QR factorization of a real M-by-N matrix A:
+*  A = Q * R.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  A       (input/output) REAL array, dimension (LDA,N)
+*          On entry, the M-by-N matrix A.
+*          On exit, the elements on and above the diagonal of the array
+*          contain the min(M,N)-by-N upper trapezoidal matrix R (R is
+*          upper triangular if m >= n); the elements below the diagonal,
+*          with the array TAU, represent the orthogonal matrix Q as a
+*          product of min(m,n) elementary reflectors (see Further
+*          Details).
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  TAU     (output) REAL array, dimension (min(M,N))
+*          The scalar factors of the elementary reflectors (see Further
+*          Details).
+*
+*  WORK    (workspace/output) REAL array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.  LWORK >= max(1,N).
+*          For optimum performance LWORK >= N*NB, where NB is 
+*          the optimal blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Further Details
+*  ===============
+*
+*  The matrix Q is represented as a product of elementary reflectors
+*
+*     Q = H(1) H(2) . . . H(k), where k = min(m,n).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - tau * v * v'
+*
+*  where tau is a real scalar, and v is a real vector with
+*  v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i),
+*  and tau in TAU(i).
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            I, IB, IINFO, IWS, K, LDWORK, LWKOPT, NB,
+     $                   NBMIN, NX
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SGEQR2, SLARFB, SLARFT, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      NB = ILAENV( 1, 'SGEQRF', ' ', M, N, -1, -1 )
+      LWKOPT = N*NB
+      WORK( 1 ) = LWKOPT
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -4
+      ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN
+         INFO = -7
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SGEQRF', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      K = MIN( M, N )
+      IF( K.EQ.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      NBMIN = 2
+      NX = 0
+      IWS = N
+      IF( NB.GT.1 .AND. NB.LT.K ) THEN
+*
+*        Determine when to cross over from blocked to unblocked code.
+*
+         NX = MAX( 0, ILAENV( 3, 'SGEQRF', ' ', M, N, -1, -1 ) )
+         IF( NX.LT.K ) THEN
+*
+*           Determine if workspace is large enough for blocked code.
+*
+            LDWORK = N
+            IWS = LDWORK*NB
+            IF( LWORK.LT.IWS ) THEN
+*
+*              Not enough workspace to use optimal NB:  reduce NB and
+*              determine the minimum value of NB.
+*
+               NB = LWORK / LDWORK
+               NBMIN = MAX( 2, ILAENV( 2, 'SGEQRF', ' ', M, N, -1,
+     $                 -1 ) )
+            END IF
+         END IF
+      END IF
+*
+      IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN
+*
+*        Use blocked code initially
+*
+         DO 10 I = 1, K - NX, NB
+            IB = MIN( K-I+1, NB )
+*
+*           Compute the QR factorization of the current block
+*           A(i:m,i:i+ib-1)
+*
+            CALL SGEQR2( M-I+1, IB, A( I, I ), LDA, TAU( I ), WORK,
+     $                   IINFO )
+            IF( I+IB.LE.N ) THEN
+*
+*              Form the triangular factor of the block reflector
+*              H = H(i) H(i+1) . . . H(i+ib-1)
+*
+               CALL SLARFT( 'Forward', 'Columnwise', M-I+1, IB,
+     $                      A( I, I ), LDA, TAU( I ), WORK, LDWORK )
+*
+*              Apply H' to A(i:m,i+ib:n) from the left
+*
+               CALL SLARFB( 'Left', 'Transpose', 'Forward',
+     $                      'Columnwise', M-I+1, N-I-IB+1, IB,
+     $                      A( I, I ), LDA, WORK, LDWORK, A( I, I+IB ),
+     $                      LDA, WORK( IB+1 ), LDWORK )
+            END IF
+   10    CONTINUE
+      ELSE
+         I = 1
+      END IF
+*
+*     Use unblocked code to factor the last or only block.
+*
+      IF( I.LE.K )
+     $   CALL SGEQR2( M-I+1, N-I+1, A( I, I ), LDA, TAU( I ), WORK,
+     $                IINFO )
+*
+      WORK( 1 ) = IWS
+      RETURN
+*
+*     End of SGEQRF
+*
+      END
diff --git a/costa/native/external/lapack/sgerfs.f b/costa/native/external/lapack/sgerfs.f
new file mode 100644
index 000000000..56eef6157
--- /dev/null
+++ b/costa/native/external/lapack/sgerfs.f
@@ -0,0 +1,332 @@
+      SUBROUTINE SGERFS( TRANS, N, NRHS, A, LDA, AF, LDAF, IPIV, B, LDB,
+     $                   X, LDX, FERR, BERR, WORK, IWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          TRANS
+      INTEGER            INFO, LDA, LDAF, LDB, LDX, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * ), IWORK( * )
+      REAL               A( LDA, * ), AF( LDAF, * ), B( LDB, * ),
+     $                   BERR( * ), FERR( * ), WORK( * ), X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SGERFS improves the computed solution to a system of linear
+*  equations and provides error bounds and backward error estimates for
+*  the solution.
+*
+*  Arguments
+*  =========
+*
+*  TRANS   (input) CHARACTER*1
+*          Specifies the form of the system of equations:
+*          = 'N':  A * X = B     (No transpose)
+*          = 'T':  A**T * X = B  (Transpose)
+*          = 'C':  A**H * X = B  (Conjugate transpose = Transpose)
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrices B and X.  NRHS >= 0.
+*
+*  A       (input) REAL array, dimension (LDA,N)
+*          The original N-by-N matrix A.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  AF      (input) REAL array, dimension (LDAF,N)
+*          The factors L and U from the factorization A = P*L*U
+*          as computed by SGETRF.
+*
+*  LDAF    (input) INTEGER
+*          The leading dimension of the array AF.  LDAF >= max(1,N).
+*
+*  IPIV    (input) INTEGER array, dimension (N)
+*          The pivot indices from SGETRF; for 1<=i<=N, row i of the
+*          matrix was interchanged with row IPIV(i).
+*
+*  B       (input) REAL array, dimension (LDB,NRHS)
+*          The right hand side matrix B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  X       (input/output) REAL array, dimension (LDX,NRHS)
+*          On entry, the solution matrix X, as computed by SGETRS.
+*          On exit, the improved solution matrix X.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(1,N).
+*
+*  FERR    (output) REAL array, dimension (NRHS)
+*          The estimated forward error bound for each solution vector
+*          X(j) (the j-th column of the solution matrix X).
+*          If XTRUE is the true solution corresponding to X(j), FERR(j)
+*          is an estimated upper bound for the magnitude of the largest
+*          element in (X(j) - XTRUE) divided by the magnitude of the
+*          largest element in X(j).  The estimate is as reliable as
+*          the estimate for RCOND, and is almost always a slight
+*          overestimate of the true error.
+*
+*  BERR    (output) REAL array, dimension (NRHS)
+*          The componentwise relative backward error of each solution
+*          vector X(j) (i.e., the smallest relative change in
+*          any element of A or B that makes X(j) an exact solution).
+*
+*  WORK    (workspace) REAL array, dimension (3*N)
+*
+*  IWORK   (workspace) INTEGER array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Internal Parameters
+*  ===================
+*
+*  ITMAX is the maximum number of steps of iterative refinement.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            ITMAX
+      PARAMETER          ( ITMAX = 5 )
+      REAL               ZERO
+      PARAMETER          ( ZERO = 0.0E+0 )
+      REAL               ONE
+      PARAMETER          ( ONE = 1.0E+0 )
+      REAL               TWO
+      PARAMETER          ( TWO = 2.0E+0 )
+      REAL               THREE
+      PARAMETER          ( THREE = 3.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOTRAN
+      CHARACTER          TRANST
+      INTEGER            COUNT, I, J, K, KASE, NZ
+      REAL               EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SAXPY, SCOPY, SGEMV, SGETRS, SLACON, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               SLAMCH
+      EXTERNAL           LSAME, SLAMCH
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      NOTRAN = LSAME( TRANS, 'N' )
+      IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT.
+     $    LSAME( TRANS, 'C' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -10
+      ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+         INFO = -12
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SGERFS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN
+         DO 10 J = 1, NRHS
+            FERR( J ) = ZERO
+            BERR( J ) = ZERO
+   10    CONTINUE
+         RETURN
+      END IF
+*
+      IF( NOTRAN ) THEN
+         TRANST = 'T'
+      ELSE
+         TRANST = 'N'
+      END IF
+*
+*     NZ = maximum number of nonzero elements in each row of A, plus 1
+*
+      NZ = N + 1
+      EPS = SLAMCH( 'Epsilon' )
+      SAFMIN = SLAMCH( 'Safe minimum' )
+      SAFE1 = NZ*SAFMIN
+      SAFE2 = SAFE1 / EPS
+*
+*     Do for each right hand side
+*
+      DO 140 J = 1, NRHS
+*
+         COUNT = 1
+         LSTRES = THREE
+   20    CONTINUE
+*
+*        Loop until stopping criterion is satisfied.
+*
+*        Compute residual R = B - op(A) * X,
+*        where op(A) = A, A**T, or A**H, depending on TRANS.
+*
+         CALL SCOPY( N, B( 1, J ), 1, WORK( N+1 ), 1 )
+         CALL SGEMV( TRANS, N, N, -ONE, A, LDA, X( 1, J ), 1, ONE,
+     $               WORK( N+1 ), 1 )
+*
+*        Compute componentwise relative backward error from formula
+*
+*        max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) )
+*
+*        where abs(Z) is the componentwise absolute value of the matrix
+*        or vector Z.  If the i-th component of the denominator is less
+*        than SAFE2, then SAFE1 is added to the i-th components of the
+*        numerator and denominator before dividing.
+*
+         DO 30 I = 1, N
+            WORK( I ) = ABS( B( I, J ) )
+   30    CONTINUE
+*
+*        Compute abs(op(A))*abs(X) + abs(B).
+*
+         IF( NOTRAN ) THEN
+            DO 50 K = 1, N
+               XK = ABS( X( K, J ) )
+               DO 40 I = 1, N
+                  WORK( I ) = WORK( I ) + ABS( A( I, K ) )*XK
+   40          CONTINUE
+   50       CONTINUE
+         ELSE
+            DO 70 K = 1, N
+               S = ZERO
+               DO 60 I = 1, N
+                  S = S + ABS( A( I, K ) )*ABS( X( I, J ) )
+   60          CONTINUE
+               WORK( K ) = WORK( K ) + S
+   70       CONTINUE
+         END IF
+         S = ZERO
+         DO 80 I = 1, N
+            IF( WORK( I ).GT.SAFE2 ) THEN
+               S = MAX( S, ABS( WORK( N+I ) ) / WORK( I ) )
+            ELSE
+               S = MAX( S, ( ABS( WORK( N+I ) )+SAFE1 ) /
+     $             ( WORK( I )+SAFE1 ) )
+            END IF
+   80    CONTINUE
+         BERR( J ) = S
+*
+*        Test stopping criterion. Continue iterating if
+*           1) The residual BERR(J) is larger than machine epsilon, and
+*           2) BERR(J) decreased by at least a factor of 2 during the
+*              last iteration, and
+*           3) At most ITMAX iterations tried.
+*
+         IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND.
+     $       COUNT.LE.ITMAX ) THEN
+*
+*           Update solution and try again.
+*
+            CALL SGETRS( TRANS, N, 1, AF, LDAF, IPIV, WORK( N+1 ), N,
+     $                   INFO )
+            CALL SAXPY( N, ONE, WORK( N+1 ), 1, X( 1, J ), 1 )
+            LSTRES = BERR( J )
+            COUNT = COUNT + 1
+            GO TO 20
+         END IF
+*
+*        Bound error from formula
+*
+*        norm(X - XTRUE) / norm(X) .le. FERR =
+*        norm( abs(inv(op(A)))*
+*           ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X)
+*
+*        where
+*          norm(Z) is the magnitude of the largest component of Z
+*          inv(op(A)) is the inverse of op(A)
+*          abs(Z) is the componentwise absolute value of the matrix or
+*             vector Z
+*          NZ is the maximum number of nonzeros in any row of A, plus 1
+*          EPS is machine epsilon
+*
+*        The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B))
+*        is incremented by SAFE1 if the i-th component of
+*        abs(op(A))*abs(X) + abs(B) is less than SAFE2.
+*
+*        Use SLACON to estimate the infinity-norm of the matrix
+*           inv(op(A)) * diag(W),
+*        where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) )))
+*
+         DO 90 I = 1, N
+            IF( WORK( I ).GT.SAFE2 ) THEN
+               WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I )
+            ELSE
+               WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + SAFE1
+            END IF
+   90    CONTINUE
+*
+         KASE = 0
+  100    CONTINUE
+         CALL SLACON( N, WORK( 2*N+1 ), WORK( N+1 ), IWORK, FERR( J ),
+     $                KASE )
+         IF( KASE.NE.0 ) THEN
+            IF( KASE.EQ.1 ) THEN
+*
+*              Multiply by diag(W)*inv(op(A)**T).
+*
+               CALL SGETRS( TRANST, N, 1, AF, LDAF, IPIV, WORK( N+1 ),
+     $                      N, INFO )
+               DO 110 I = 1, N
+                  WORK( N+I ) = WORK( I )*WORK( N+I )
+  110          CONTINUE
+            ELSE
+*
+*              Multiply by inv(op(A))*diag(W).
+*
+               DO 120 I = 1, N
+                  WORK( N+I ) = WORK( I )*WORK( N+I )
+  120          CONTINUE
+               CALL SGETRS( TRANS, N, 1, AF, LDAF, IPIV, WORK( N+1 ), N,
+     $                      INFO )
+            END IF
+            GO TO 100
+         END IF
+*
+*        Normalize error.
+*
+         LSTRES = ZERO
+         DO 130 I = 1, N
+            LSTRES = MAX( LSTRES, ABS( X( I, J ) ) )
+  130    CONTINUE
+         IF( LSTRES.NE.ZERO )
+     $      FERR( J ) = FERR( J ) / LSTRES
+*
+  140 CONTINUE
+*
+      RETURN
+*
+*     End of SGERFS
+*
+      END
diff --git a/costa/native/external/lapack/sgerq2.f b/costa/native/external/lapack/sgerq2.f
new file mode 100644
index 000000000..db0fa748e
--- /dev/null
+++ b/costa/native/external/lapack/sgerq2.f
@@ -0,0 +1,123 @@
+      SUBROUTINE SGERQ2( M, N, A, LDA, TAU, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, M, N
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SGERQ2 computes an RQ factorization of a real m by n matrix A:
+*  A = R * Q.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  A       (input/output) REAL array, dimension (LDA,N)
+*          On entry, the m by n matrix A.
+*          On exit, if m <= n, the upper triangle of the subarray
+*          A(1:m,n-m+1:n) contains the m by m upper triangular matrix R;
+*          if m >= n, the elements on and above the (m-n)-th subdiagonal
+*          contain the m by n upper trapezoidal matrix R; the remaining
+*          elements, with the array TAU, represent the orthogonal matrix
+*          Q as a product of elementary reflectors (see Further
+*          Details).
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  TAU     (output) REAL array, dimension (min(M,N))
+*          The scalar factors of the elementary reflectors (see Further
+*          Details).
+*
+*  WORK    (workspace) REAL array, dimension (M)
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*
+*  Further Details
+*  ===============
+*
+*  The matrix Q is represented as a product of elementary reflectors
+*
+*     Q = H(1) H(2) . . . H(k), where k = min(m,n).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - tau * v * v'
+*
+*  where tau is a real scalar, and v is a real vector with
+*  v(n-k+i+1:n) = 0 and v(n-k+i) = 1; v(1:n-k+i-1) is stored on exit in
+*  A(m-k+i,1:n-k+i-1), and tau in TAU(i).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE
+      PARAMETER          ( ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, K
+      REAL               AII
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLARF, SLARFG, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SGERQ2', -INFO )
+         RETURN
+      END IF
+*
+      K = MIN( M, N )
+*
+      DO 10 I = K, 1, -1
+*
+*        Generate elementary reflector H(i) to annihilate
+*        A(m-k+i,1:n-k+i-1)
+*
+         CALL SLARFG( N-K+I, A( M-K+I, N-K+I ), A( M-K+I, 1 ), LDA,
+     $                TAU( I ) )
+*
+*        Apply H(i) to A(1:m-k+i-1,1:n-k+i) from the right
+*
+         AII = A( M-K+I, N-K+I )
+         A( M-K+I, N-K+I ) = ONE
+         CALL SLARF( 'Right', M-K+I-1, N-K+I, A( M-K+I, 1 ), LDA,
+     $               TAU( I ), A, LDA, WORK )
+         A( M-K+I, N-K+I ) = AII
+   10 CONTINUE
+      RETURN
+*
+*     End of SGERQ2
+*
+      END
diff --git a/costa/native/external/lapack/sgerqf.f b/costa/native/external/lapack/sgerqf.f
new file mode 100644
index 000000000..e5163dbfa
--- /dev/null
+++ b/costa/native/external/lapack/sgerqf.f
@@ -0,0 +1,205 @@
+      SUBROUTINE SGERQF( M, N, A, LDA, TAU, WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, LWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SGERQF computes an RQ factorization of a real M-by-N matrix A:
+*  A = R * Q.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  A       (input/output) REAL array, dimension (LDA,N)
+*          On entry, the M-by-N matrix A.
+*          On exit,
+*          if m <= n, the upper triangle of the subarray
+*          A(1:m,n-m+1:n) contains the M-by-M upper triangular matrix R;
+*          if m >= n, the elements on and above the (m-n)-th subdiagonal
+*          contain the M-by-N upper trapezoidal matrix R;
+*          the remaining elements, with the array TAU, represent the
+*          orthogonal matrix Q as a product of min(m,n) elementary
+*          reflectors (see Further Details).
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  TAU     (output) REAL array, dimension (min(M,N))
+*          The scalar factors of the elementary reflectors (see Further
+*          Details).
+*
+*  WORK    (workspace/output) REAL array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.  LWORK >= max(1,M).
+*          For optimum performance LWORK >= M*NB, where NB is
+*          the optimal blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Further Details
+*  ===============
+*
+*  The matrix Q is represented as a product of elementary reflectors
+*
+*     Q = H(1) H(2) . . . H(k), where k = min(m,n).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - tau * v * v'
+*
+*  where tau is a real scalar, and v is a real vector with
+*  v(n-k+i+1:n) = 0 and v(n-k+i) = 1; v(1:n-k+i-1) is stored on exit in
+*  A(m-k+i,1:n-k+i-1), and tau in TAU(i).
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            I, IB, IINFO, IWS, K, KI, KK, LDWORK, LWKOPT,
+     $                   MU, NB, NBMIN, NU, NX
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SGERQ2, SLARFB, SLARFT, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      NB = ILAENV( 1, 'SGERQF', ' ', M, N, -1, -1 )
+      LWKOPT = M*NB
+      WORK( 1 ) = LWKOPT
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -4
+      ELSE IF( LWORK.LT.MAX( 1, M ) .AND. .NOT.LQUERY ) THEN
+         INFO = -7
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SGERQF', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      K = MIN( M, N )
+      IF( K.EQ.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      NBMIN = 2
+      NX = 1
+      IWS = M
+      IF( NB.GT.1 .AND. NB.LT.K ) THEN
+*
+*        Determine when to cross over from blocked to unblocked code.
+*
+         NX = MAX( 0, ILAENV( 3, 'SGERQF', ' ', M, N, -1, -1 ) )
+         IF( NX.LT.K ) THEN
+*
+*           Determine if workspace is large enough for blocked code.
+*
+            LDWORK = M
+            IWS = LDWORK*NB
+            IF( LWORK.LT.IWS ) THEN
+*
+*              Not enough workspace to use optimal NB:  reduce NB and
+*              determine the minimum value of NB.
+*
+               NB = LWORK / LDWORK
+               NBMIN = MAX( 2, ILAENV( 2, 'SGERQF', ' ', M, N, -1,
+     $                 -1 ) )
+            END IF
+         END IF
+      END IF
+*
+      IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN
+*
+*        Use blocked code initially.
+*        The last kk rows are handled by the block method.
+*
+         KI = ( ( K-NX-1 ) / NB )*NB
+         KK = MIN( K, KI+NB )
+*
+         DO 10 I = K - KK + KI + 1, K - KK + 1, -NB
+            IB = MIN( K-I+1, NB )
+*
+*           Compute the RQ factorization of the current block
+*           A(m-k+i:m-k+i+ib-1,1:n-k+i+ib-1)
+*
+            CALL SGERQ2( IB, N-K+I+IB-1, A( M-K+I, 1 ), LDA, TAU( I ),
+     $                   WORK, IINFO )
+            IF( M-K+I.GT.1 ) THEN
+*
+*              Form the triangular factor of the block reflector
+*              H = H(i+ib-1) . . . H(i+1) H(i)
+*
+               CALL SLARFT( 'Backward', 'Rowwise', N-K+I+IB-1, IB,
+     $                      A( M-K+I, 1 ), LDA, TAU( I ), WORK, LDWORK )
+*
+*              Apply H to A(1:m-k+i-1,1:n-k+i+ib-1) from the right
+*
+               CALL SLARFB( 'Right', 'No transpose', 'Backward',
+     $                      'Rowwise', M-K+I-1, N-K+I+IB-1, IB,
+     $                      A( M-K+I, 1 ), LDA, WORK, LDWORK, A, LDA,
+     $                      WORK( IB+1 ), LDWORK )
+            END IF
+   10    CONTINUE
+         MU = M - K + I + NB - 1
+         NU = N - K + I + NB - 1
+      ELSE
+         MU = M
+         NU = N
+      END IF
+*
+*     Use unblocked code to factor the last or only block
+*
+      IF( MU.GT.0 .AND. NU.GT.0 )
+     $   CALL SGERQ2( MU, NU, A, LDA, TAU, WORK, IINFO )
+*
+      WORK( 1 ) = IWS
+      RETURN
+*
+*     End of SGERQF
+*
+      END
diff --git a/costa/native/external/lapack/sgesc2.f b/costa/native/external/lapack/sgesc2.f
new file mode 100644
index 000000000..28af144aa
--- /dev/null
+++ b/costa/native/external/lapack/sgesc2.f
@@ -0,0 +1,133 @@
+      SUBROUTINE SGESC2( N, A, LDA, RHS, IPIV, JPIV, SCALE )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            LDA, N
+      REAL               SCALE
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * ), JPIV( * )
+      REAL               A( LDA, * ), RHS( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SGESC2 solves a system of linear equations
+*
+*            A * X = scale* RHS
+*
+*  with a general N-by-N matrix A using the LU factorization with
+*  complete pivoting computed by SGETC2.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.
+*
+*  A       (input) REAL array, dimension (LDA,N)
+*          On entry, the  LU part of the factorization of the n-by-n
+*          matrix A computed by SGETC2:  A = P * L * U * Q
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1, N).
+*
+*  RHS     (input/output) REAL array, dimension (N).
+*          On entry, the right hand side vector b.
+*          On exit, the solution vector X.
+*
+*  IPIV    (iput) INTEGER array, dimension (N).
+*          The pivot indices; for 1 <= i <= N, row i of the
+*          matrix has been interchanged with row IPIV(i).
+*
+*  JPIV    (iput) INTEGER array, dimension (N).
+*          The pivot indices; for 1 <= j <= N, column j of the
+*          matrix has been interchanged with column JPIV(j).
+*
+*  SCALE    (output) REAL
+*           On exit, SCALE contains the scale factor. SCALE is chosen
+*           0 <= SCALE <= 1 to prevent owerflow in the solution.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Bo Kagstrom and Peter Poromaa, Department of Computing Science,
+*     Umea University, S-901 87 Umea, Sweden.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, TWO
+      PARAMETER          ( ONE = 1.0E+0, TWO = 2.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J
+      REAL               BIGNUM, EPS, SMLNUM, TEMP
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLABAD, SLASWP, SSCAL
+*     ..
+*     .. External Functions ..
+      INTEGER            ISAMAX
+      REAL               SLAMCH
+      EXTERNAL           ISAMAX, SLAMCH
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS
+*     ..
+*     .. Executable Statements ..
+*
+*      Set constant to control owerflow
+*
+      EPS = SLAMCH( 'P' )
+      SMLNUM = SLAMCH( 'S' ) / EPS
+      BIGNUM = ONE / SMLNUM
+      CALL SLABAD( SMLNUM, BIGNUM )
+*
+*     Apply permutations IPIV to RHS
+*
+      CALL SLASWP( 1, RHS, LDA, 1, N-1, IPIV, 1 )
+*
+*     Solve for L part
+*
+      DO 20 I = 1, N - 1
+         DO 10 J = I + 1, N
+            RHS( J ) = RHS( J ) - A( J, I )*RHS( I )
+   10    CONTINUE
+   20 CONTINUE
+*
+*     Solve for U part
+*
+      SCALE = ONE
+*
+*     Check for scaling
+*
+      I = ISAMAX( N, RHS, 1 )
+      IF( TWO*SMLNUM*ABS( RHS( I ) ).GT.ABS( A( N, N ) ) ) THEN
+         TEMP = ( ONE / TWO ) / ABS( RHS( I ) )
+         CALL SSCAL( N, TEMP, RHS( 1 ), 1 )
+         SCALE = SCALE*TEMP
+      END IF
+*
+      DO 40 I = N, 1, -1
+         TEMP = ONE / A( I, I )
+         RHS( I ) = RHS( I )*TEMP
+         DO 30 J = I + 1, N
+            RHS( I ) = RHS( I ) - RHS( J )*( A( I, J )*TEMP )
+   30    CONTINUE
+   40 CONTINUE
+*
+*     Apply permutations JPIV to the solution (RHS)
+*
+      CALL SLASWP( 1, RHS, LDA, 1, N-1, JPIV, -1 )
+      RETURN
+*
+*     End of SGESC2
+*
+      END
diff --git a/costa/native/external/lapack/sgesdd.f b/costa/native/external/lapack/sgesdd.f
new file mode 100644
index 000000000..52f5d2273
--- /dev/null
+++ b/costa/native/external/lapack/sgesdd.f
@@ -0,0 +1,1335 @@
+      SUBROUTINE SGESDD( JOBZ, M, N, A, LDA, S, U, LDU, VT, LDVT, WORK,
+     $                   LWORK, IWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBZ
+      INTEGER            INFO, LDA, LDU, LDVT, LWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      REAL               A( LDA, * ), S( * ), U( LDU, * ),
+     $                   VT( LDVT, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SGESDD computes the singular value decomposition (SVD) of a real
+*  M-by-N matrix A, optionally computing the left and right singular
+*  vectors.  If singular vectors are desired, it uses a
+*  divide-and-conquer algorithm.
+*
+*  The SVD is written
+*
+*       A = U * SIGMA * transpose(V)
+*
+*  where SIGMA is an M-by-N matrix which is zero except for its
+*  min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and
+*  V is an N-by-N orthogonal matrix.  The diagonal elements of SIGMA
+*  are the singular values of A; they are real and non-negative, and
+*  are returned in descending order.  The first min(m,n) columns of
+*  U and V are the left and right singular vectors of A.
+*
+*  Note that the routine returns VT = V**T, not V.
+*
+*  The divide and conquer algorithm makes very mild assumptions about
+*  floating point arithmetic. It will work on machines with a guard
+*  digit in add/subtract, or on those binary machines without guard
+*  digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
+*  Cray-2. It could conceivably fail on hexadecimal or decimal machines
+*  without guard digits, but we know of none.
+*
+*  Arguments
+*  =========
+*
+*  JOBZ    (input) CHARACTER*1
+*          Specifies options for computing all or part of the matrix U:
+*          = 'A':  all M columns of U and all N rows of V**T are
+*                  returned in the arrays U and VT;
+*          = 'S':  the first min(M,N) columns of U and the first
+*                  min(M,N) rows of V**T are returned in the arrays U
+*                  and VT;
+*          = 'O':  If M >= N, the first N columns of U are overwritten
+*                  on the array A and all rows of V**T are returned in
+*                  the array VT;
+*                  otherwise, all columns of U are returned in the
+*                  array U and the first M rows of V**T are overwritten
+*                  in the array VT;
+*          = 'N':  no columns of U or rows of V**T are computed.
+*
+*  M       (input) INTEGER
+*          The number of rows of the input matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the input matrix A.  N >= 0.
+*
+*  A       (input/output) REAL array, dimension (LDA,N)
+*          On entry, the M-by-N matrix A.
+*          On exit,
+*          if JOBZ = 'O',  A is overwritten with the first N columns
+*                          of U (the left singular vectors, stored
+*                          columnwise) if M >= N;
+*                          A is overwritten with the first M rows
+*                          of V**T (the right singular vectors, stored
+*                          rowwise) otherwise.
+*          if JOBZ .ne. 'O', the contents of A are destroyed.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  S       (output) REAL array, dimension (min(M,N))
+*          The singular values of A, sorted so that S(i) >= S(i+1).
+*
+*  U       (output) REAL array, dimension (LDU,UCOL)
+*          UCOL = M if JOBZ = 'A' or JOBZ = 'O' and M < N;
+*          UCOL = min(M,N) if JOBZ = 'S'.
+*          If JOBZ = 'A' or JOBZ = 'O' and M < N, U contains the M-by-M
+*          orthogonal matrix U;
+*          if JOBZ = 'S', U contains the first min(M,N) columns of U
+*          (the left singular vectors, stored columnwise);
+*          if JOBZ = 'O' and M >= N, or JOBZ = 'N', U is not referenced.
+*
+*  LDU     (input) INTEGER
+*          The leading dimension of the array U.  LDU >= 1; if
+*          JOBZ = 'S' or 'A' or JOBZ = 'O' and M < N, LDU >= M.
+*
+*  VT      (output) REAL array, dimension (LDVT,N)
+*          If JOBZ = 'A' or JOBZ = 'O' and M >= N, VT contains the
+*          N-by-N orthogonal matrix V**T;
+*          if JOBZ = 'S', VT contains the first min(M,N) rows of
+*          V**T (the right singular vectors, stored rowwise);
+*          if JOBZ = 'O' and M < N, or JOBZ = 'N', VT is not referenced.
+*
+*  LDVT    (input) INTEGER
+*          The leading dimension of the array VT.  LDVT >= 1; if
+*          JOBZ = 'A' or JOBZ = 'O' and M >= N, LDVT >= N;
+*          if JOBZ = 'S', LDVT >= min(M,N).
+*
+*  WORK    (workspace/output) REAL array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK;
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK. LWORK >= 1.
+*          If JOBZ = 'N',
+*            LWORK >= 3*min(M,N) + max(max(M,N),6*min(M,N)).
+*          If JOBZ = 'O',
+*            LWORK >= 3*min(M,N)*min(M,N) + 
+*                     max(max(M,N),5*min(M,N)*min(M,N)+4*min(M,N)).
+*          If JOBZ = 'S' or 'A'
+*            LWORK >= 3*min(M,N)*min(M,N) +
+*                     max(max(M,N),4*min(M,N)*min(M,N)+4*min(M,N)).
+*          For good performance, LWORK should generally be larger.
+*          If LWORK < 0 but other input arguments are legal, WORK(1)
+*          returns the optimal LWORK.
+*
+*  IWORK   (workspace) INTEGER array, dimension (8*min(M,N))
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          > 0:  SBDSDC did not converge, updating process failed.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Ming Gu and Huan Ren, Computer Science Division, University of
+*     California at Berkeley, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E0, ONE = 1.0E0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY, WNTQA, WNTQAS, WNTQN, WNTQO, WNTQS
+      INTEGER            BDSPAC, BLK, CHUNK, I, IE, IERR, IL,
+     $                   IR, ISCL, ITAU, ITAUP, ITAUQ, IU, IVT, LDWKVT,
+     $                   LDWRKL, LDWRKR, LDWRKU, MAXWRK, MINMN, MINWRK,
+     $                   MNTHR, NWORK, WRKBL
+      REAL               ANRM, BIGNUM, EPS, SMLNUM
+*     ..
+*     .. Local Arrays ..
+      INTEGER            IDUM( 1 )
+      REAL               DUM( 1 )
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SBDSDC, SGEBRD, SGELQF, SGEMM, SGEQRF, SLACPY,
+     $                   SLASCL, SLASET, SORGBR, SORGLQ, SORGQR, SORMBR,
+     $                   XERBLA
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      REAL               SLAMCH, SLANGE
+      EXTERNAL           ILAENV, LSAME, SLAMCH, SLANGE
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          INT, MAX, MIN, REAL, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      MINMN = MIN( M, N )
+      MNTHR = INT( MINMN*11.0E0 / 6.0E0 )
+      WNTQA = LSAME( JOBZ, 'A' )
+      WNTQS = LSAME( JOBZ, 'S' )
+      WNTQAS = WNTQA .OR. WNTQS
+      WNTQO = LSAME( JOBZ, 'O' )
+      WNTQN = LSAME( JOBZ, 'N' )
+      MINWRK = 1
+      MAXWRK = 1
+      LQUERY = ( LWORK.EQ.-1 )
+*
+      IF( .NOT.( WNTQA .OR. WNTQS .OR. WNTQO .OR. WNTQN ) ) THEN
+         INFO = -1
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -5
+      ELSE IF( LDU.LT.1 .OR. ( WNTQAS .AND. LDU.LT.M ) .OR.
+     $         ( WNTQO .AND. M.LT.N .AND. LDU.LT.M ) ) THEN
+         INFO = -8
+      ELSE IF( LDVT.LT.1 .OR. ( WNTQA .AND. LDVT.LT.N ) .OR.
+     $         ( WNTQS .AND. LDVT.LT.MINMN ) .OR.
+     $         ( WNTQO .AND. M.GE.N .AND. LDVT.LT.N ) ) THEN
+         INFO = -10
+      END IF
+*
+*     Compute workspace
+*      (Note: Comments in the code beginning "Workspace:" describe the
+*       minimal amount of workspace needed at that point in the code,
+*       as well as the preferred amount for good performance.
+*       NB refers to the optimal block size for the immediately
+*       following subroutine, as returned by ILAENV.)
+*
+      IF( INFO.EQ.0 .AND. M.GT.0 .AND. N.GT.0 ) THEN
+         IF( M.GE.N ) THEN
+*
+*           Compute space needed for SBDSDC
+*
+            IF( WNTQN ) THEN
+               BDSPAC = 7*N
+            ELSE
+               BDSPAC = 3*N*N + 4*N
+            END IF
+            IF( M.GE.MNTHR ) THEN
+               IF( WNTQN ) THEN
+*
+*                 Path 1 (M much larger than N, JOBZ='N')
+*
+                  WRKBL = N + N*ILAENV( 1, 'SGEQRF', ' ', M, N, -1,
+     $                    -1 )
+                  WRKBL = MAX( WRKBL, 3*N+2*N*
+     $                    ILAENV( 1, 'SGEBRD', ' ', N, N, -1, -1 ) )
+                  MAXWRK = MAX( WRKBL, BDSPAC+N )
+                  MINWRK = BDSPAC + N
+               ELSE IF( WNTQO ) THEN
+*
+*                 Path 2 (M much larger than N, JOBZ='O')
+*
+                  WRKBL = N + N*ILAENV( 1, 'SGEQRF', ' ', M, N, -1, -1 )
+                  WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'SORGQR', ' ', M,
+     $                    N, N, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*N+2*N*
+     $                    ILAENV( 1, 'SGEBRD', ' ', N, N, -1, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*N+N*
+     $                    ILAENV( 1, 'SORMBR', 'QLN', N, N, N, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*N+N*
+     $                    ILAENV( 1, 'SORMBR', 'PRT', N, N, N, -1 ) )
+                  WRKBL = MAX( WRKBL, BDSPAC+3*N )
+                  MAXWRK = WRKBL + 2*N*N
+                  MINWRK = BDSPAC + 2*N*N + 3*N
+               ELSE IF( WNTQS ) THEN
+*
+*                 Path 3 (M much larger than N, JOBZ='S')
+*
+                  WRKBL = N + N*ILAENV( 1, 'SGEQRF', ' ', M, N, -1, -1 )
+                  WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'SORGQR', ' ', M,
+     $                    N, N, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*N+2*N*
+     $                    ILAENV( 1, 'SGEBRD', ' ', N, N, -1, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*N+N*
+     $                    ILAENV( 1, 'SORMBR', 'QLN', N, N, N, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*N+N*
+     $                    ILAENV( 1, 'SORMBR', 'PRT', N, N, N, -1 ) )
+                  WRKBL = MAX( WRKBL, BDSPAC+3*N )
+                  MAXWRK = WRKBL + N*N
+                  MINWRK = BDSPAC + N*N + 3*N
+               ELSE IF( WNTQA ) THEN
+*
+*                 Path 4 (M much larger than N, JOBZ='A')
+*
+                  WRKBL = N + N*ILAENV( 1, 'SGEQRF', ' ', M, N, -1, -1 )
+                  WRKBL = MAX( WRKBL, N+M*ILAENV( 1, 'SORGQR', ' ', M,
+     $                    M, N, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*N+2*N*
+     $                    ILAENV( 1, 'SGEBRD', ' ', N, N, -1, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*N+N*
+     $                    ILAENV( 1, 'SORMBR', 'QLN', N, N, N, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*N+N*
+     $                    ILAENV( 1, 'SORMBR', 'PRT', N, N, N, -1 ) )
+                  WRKBL = MAX( WRKBL, BDSPAC+3*N )
+                  MAXWRK = WRKBL + N*N
+                  MINWRK = BDSPAC + N*N + 3*N
+               END IF
+            ELSE
+*
+*              Path 5 (M at least N, but not much larger)
+*
+               WRKBL = 3*N + ( M+N )*ILAENV( 1, 'SGEBRD', ' ', M, N, -1,
+     $                 -1 )
+               IF( WNTQN ) THEN
+                  MAXWRK = MAX( WRKBL, BDSPAC+3*N )
+                  MINWRK = 3*N + MAX( M, BDSPAC )
+               ELSE IF( WNTQO ) THEN
+                  WRKBL = MAX( WRKBL, 3*N+N*
+     $                    ILAENV( 1, 'SORMBR', 'QLN', M, N, N, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*N+N*
+     $                    ILAENV( 1, 'SORMBR', 'PRT', N, N, N, -1 ) )
+                  WRKBL = MAX( WRKBL, BDSPAC+3*N )
+                  MAXWRK = WRKBL + M*N
+                  MINWRK = 3*N + MAX( M, N*N+BDSPAC )
+               ELSE IF( WNTQS ) THEN
+                  WRKBL = MAX( WRKBL, 3*N+N*
+     $                    ILAENV( 1, 'SORMBR', 'QLN', M, N, N, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*N+N*
+     $                    ILAENV( 1, 'SORMBR', 'PRT', N, N, N, -1 ) )
+                  MAXWRK = MAX( WRKBL, BDSPAC+3*N )
+                  MINWRK = 3*N + MAX( M, BDSPAC )
+               ELSE IF( WNTQA ) THEN
+                  WRKBL = MAX( WRKBL, 3*N+M*
+     $                    ILAENV( 1, 'SORMBR', 'QLN', M, M, N, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*N+N*
+     $                    ILAENV( 1, 'SORMBR', 'PRT', N, N, N, -1 ) )
+                  MAXWRK = MAX( MAXWRK, BDSPAC+3*N )
+                  MINWRK = 3*N + MAX( M, BDSPAC )
+               END IF
+            END IF
+         ELSE
+*
+*           Compute space needed for SBDSDC
+*
+            IF( WNTQN ) THEN
+               BDSPAC = 7*M
+            ELSE
+               BDSPAC = 3*M*M + 4*M
+            END IF
+            IF( N.GE.MNTHR ) THEN
+               IF( WNTQN ) THEN
+*
+*                 Path 1t (N much larger than M, JOBZ='N')
+*
+                  WRKBL = M + M*ILAENV( 1, 'SGELQF', ' ', M, N, -1,
+     $                    -1 )
+                  WRKBL = MAX( WRKBL, 3*M+2*M*
+     $                    ILAENV( 1, 'SGEBRD', ' ', M, M, -1, -1 ) )
+                  MAXWRK = MAX( WRKBL, BDSPAC+M )
+                  MINWRK = BDSPAC + M
+               ELSE IF( WNTQO ) THEN
+*
+*                 Path 2t (N much larger than M, JOBZ='O')
+*
+                  WRKBL = M + M*ILAENV( 1, 'SGELQF', ' ', M, N, -1, -1 )
+                  WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'SORGLQ', ' ', M,
+     $                    N, M, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*M+2*M*
+     $                    ILAENV( 1, 'SGEBRD', ' ', M, M, -1, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*M+M*
+     $                    ILAENV( 1, 'SORMBR', 'QLN', M, M, M, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*M+M*
+     $                    ILAENV( 1, 'SORMBR', 'PRT', M, M, M, -1 ) )
+                  WRKBL = MAX( WRKBL, BDSPAC+3*M )
+                  MAXWRK = WRKBL + 2*M*M
+                  MINWRK = BDSPAC + 2*M*M + 3*M
+               ELSE IF( WNTQS ) THEN
+*
+*                 Path 3t (N much larger than M, JOBZ='S')
+*
+                  WRKBL = M + M*ILAENV( 1, 'SGELQF', ' ', M, N, -1, -1 )
+                  WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'SORGLQ', ' ', M,
+     $                    N, M, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*M+2*M*
+     $                    ILAENV( 1, 'SGEBRD', ' ', M, M, -1, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*M+M*
+     $                    ILAENV( 1, 'SORMBR', 'QLN', M, M, M, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*M+M*
+     $                    ILAENV( 1, 'SORMBR', 'PRT', M, M, M, -1 ) )
+                  WRKBL = MAX( WRKBL, BDSPAC+3*M )
+                  MAXWRK = WRKBL + M*M
+                  MINWRK = BDSPAC + M*M + 3*M
+               ELSE IF( WNTQA ) THEN
+*
+*                 Path 4t (N much larger than M, JOBZ='A')
+*
+                  WRKBL = M + M*ILAENV( 1, 'SGELQF', ' ', M, N, -1, -1 )
+                  WRKBL = MAX( WRKBL, M+N*ILAENV( 1, 'SORGLQ', ' ', N,
+     $                    N, M, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*M+2*M*
+     $                    ILAENV( 1, 'SGEBRD', ' ', M, M, -1, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*M+M*
+     $                    ILAENV( 1, 'SORMBR', 'QLN', M, M, M, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*M+M*
+     $                    ILAENV( 1, 'SORMBR', 'PRT', M, M, M, -1 ) )
+                  WRKBL = MAX( WRKBL, BDSPAC+3*M )
+                  MAXWRK = WRKBL + M*M
+                  MINWRK = BDSPAC + M*M + 3*M
+               END IF
+            ELSE
+*
+*              Path 5t (N greater than M, but not much larger)
+*
+               WRKBL = 3*M + ( M+N )*ILAENV( 1, 'SGEBRD', ' ', M, N, -1,
+     $                 -1 )
+               IF( WNTQN ) THEN
+                  MAXWRK = MAX( WRKBL, BDSPAC+3*M )
+                  MINWRK = 3*M + MAX( N, BDSPAC )
+               ELSE IF( WNTQO ) THEN
+                  WRKBL = MAX( WRKBL, 3*M+M*
+     $                    ILAENV( 1, 'SORMBR', 'QLN', M, M, N, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*M+M*
+     $                    ILAENV( 1, 'SORMBR', 'PRT', M, N, M, -1 ) )
+                  WRKBL = MAX( WRKBL, BDSPAC+3*M )
+                  MAXWRK = WRKBL + M*N
+                  MINWRK = 3*M + MAX( N, M*M+BDSPAC )
+               ELSE IF( WNTQS ) THEN
+                  WRKBL = MAX( WRKBL, 3*M+M*
+     $                    ILAENV( 1, 'SORMBR', 'QLN', M, M, N, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*M+M*
+     $                    ILAENV( 1, 'SORMBR', 'PRT', M, N, M, -1 ) )
+                  MAXWRK = MAX( WRKBL, BDSPAC+3*M )
+                  MINWRK = 3*M + MAX( N, BDSPAC )
+               ELSE IF( WNTQA ) THEN
+                  WRKBL = MAX( WRKBL, 3*M+M*
+     $                    ILAENV( 1, 'SORMBR', 'QLN', M, M, N, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*M+M*
+     $                    ILAENV( 1, 'SORMBR', 'PRT', N, N, M, -1 ) )
+                  MAXWRK = MAX( WRKBL, BDSPAC+3*M )
+                  MINWRK = 3*M + MAX( N, BDSPAC )
+               END IF
+            END IF
+         END IF
+         WORK( 1 ) = MAXWRK
+      END IF
+*
+      IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN
+         INFO = -12
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SGESDD', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 ) THEN
+         IF( LWORK.GE.1 )
+     $      WORK( 1 ) = ONE
+         RETURN
+      END IF
+*
+*     Get machine constants
+*
+      EPS = SLAMCH( 'P' )
+      SMLNUM = SQRT( SLAMCH( 'S' ) ) / EPS
+      BIGNUM = ONE / SMLNUM
+*
+*     Scale A if max element outside range [SMLNUM,BIGNUM]
+*
+      ANRM = SLANGE( 'M', M, N, A, LDA, DUM )
+      ISCL = 0
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
+         ISCL = 1
+         CALL SLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, IERR )
+      ELSE IF( ANRM.GT.BIGNUM ) THEN
+         ISCL = 1
+         CALL SLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, IERR )
+      END IF
+*
+      IF( M.GE.N ) THEN
+*
+*        A has at least as many rows as columns. If A has sufficiently
+*        more rows than columns, first reduce using the QR
+*        decomposition (if sufficient workspace available)
+*
+         IF( M.GE.MNTHR ) THEN
+*
+            IF( WNTQN ) THEN
+*
+*              Path 1 (M much larger than N, JOBZ='N')
+*              No singular vectors to be computed
+*
+               ITAU = 1
+               NWORK = ITAU + N
+*
+*              Compute A=Q*R
+*              (Workspace: need 2*N, prefer N+N*NB)
+*
+               CALL SGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+*
+*              Zero out below R
+*
+               CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ), LDA )
+               IE = 1
+               ITAUQ = IE + N
+               ITAUP = ITAUQ + N
+               NWORK = ITAUP + N
+*
+*              Bidiagonalize R in A
+*              (Workspace: need 4*N, prefer 3*N+2*N*NB)
+*
+               CALL SGEBRD( N, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
+     $                      WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1,
+     $                      IERR )
+               NWORK = IE + N
+*
+*              Perform bidiagonal SVD, computing singular values only
+*              (Workspace: need N+BDSPAC)
+*
+               CALL SBDSDC( 'U', 'N', N, S, WORK( IE ), DUM, 1, DUM, 1,
+     $                      DUM, IDUM, WORK( NWORK ), IWORK, INFO )
+*
+            ELSE IF( WNTQO ) THEN
+*
+*              Path 2 (M much larger than N, JOBZ = 'O')
+*              N left singular vectors to be overwritten on A and
+*              N right singular vectors to be computed in VT
+*
+               IR = 1
+*
+*              WORK(IR) is LDWRKR by N
+*
+               IF( LWORK.GE.LDA*N+N*N+3*N+BDSPAC ) THEN
+                  LDWRKR = LDA
+               ELSE
+                  LDWRKR = ( LWORK-N*N-3*N-BDSPAC ) / N
+               END IF
+               ITAU = IR + LDWRKR*N
+               NWORK = ITAU + N
+*
+*              Compute A=Q*R
+*              (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+*
+               CALL SGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+*
+*              Copy R to WORK(IR), zeroing out below it
+*
+               CALL SLACPY( 'U', N, N, A, LDA, WORK( IR ), LDWRKR )
+               CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, WORK( IR+1 ),
+     $                      LDWRKR )
+*
+*              Generate Q in A
+*              (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+*
+               CALL SORGQR( M, N, N, A, LDA, WORK( ITAU ),
+     $                      WORK( NWORK ), LWORK-NWORK+1, IERR )
+               IE = ITAU
+               ITAUQ = IE + N
+               ITAUP = ITAUQ + N
+               NWORK = ITAUP + N
+*
+*              Bidiagonalize R in VT, copying result to WORK(IR)
+*              (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
+*
+               CALL SGEBRD( N, N, WORK( IR ), LDWRKR, S, WORK( IE ),
+     $                      WORK( ITAUQ ), WORK( ITAUP ), WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+*
+*              WORK(IU) is N by N
+*
+               IU = NWORK
+               NWORK = IU + N*N
+*
+*              Perform bidiagonal SVD, computing left singular vectors
+*              of bidiagonal matrix in WORK(IU) and computing right
+*              singular vectors of bidiagonal matrix in VT
+*              (Workspace: need N+N*N+BDSPAC)
+*
+               CALL SBDSDC( 'U', 'I', N, S, WORK( IE ), WORK( IU ), N,
+     $                      VT, LDVT, DUM, IDUM, WORK( NWORK ), IWORK,
+     $                      INFO )
+*
+*              Overwrite WORK(IU) by left singular vectors of R
+*              and VT by right singular vectors of R
+*              (Workspace: need 2*N*N+3*N, prefer 2*N*N+2*N+N*NB)
+*
+               CALL SORMBR( 'Q', 'L', 'N', N, N, N, WORK( IR ), LDWRKR,
+     $                      WORK( ITAUQ ), WORK( IU ), N, WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+               CALL SORMBR( 'P', 'R', 'T', N, N, N, WORK( IR ), LDWRKR,
+     $                      WORK( ITAUP ), VT, LDVT, WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+*
+*              Multiply Q in A by left singular vectors of R in
+*              WORK(IU), storing result in WORK(IR) and copying to A
+*              (Workspace: need 2*N*N, prefer N*N+M*N)
+*
+               DO 10 I = 1, M, LDWRKR
+                  CHUNK = MIN( M-I+1, LDWRKR )
+                  CALL SGEMM( 'N', 'N', CHUNK, N, N, ONE, A( I, 1 ),
+     $                        LDA, WORK( IU ), N, ZERO, WORK( IR ),
+     $                        LDWRKR )
+                  CALL SLACPY( 'F', CHUNK, N, WORK( IR ), LDWRKR,
+     $                         A( I, 1 ), LDA )
+   10          CONTINUE
+*
+            ELSE IF( WNTQS ) THEN
+*
+*              Path 3 (M much larger than N, JOBZ='S')
+*              N left singular vectors to be computed in U and
+*              N right singular vectors to be computed in VT
+*
+               IR = 1
+*
+*              WORK(IR) is N by N
+*
+               LDWRKR = N
+               ITAU = IR + LDWRKR*N
+               NWORK = ITAU + N
+*
+*              Compute A=Q*R
+*              (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+*
+               CALL SGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+*
+*              Copy R to WORK(IR), zeroing out below it
+*
+               CALL SLACPY( 'U', N, N, A, LDA, WORK( IR ), LDWRKR )
+               CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, WORK( IR+1 ),
+     $                      LDWRKR )
+*
+*              Generate Q in A
+*              (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+*
+               CALL SORGQR( M, N, N, A, LDA, WORK( ITAU ),
+     $                      WORK( NWORK ), LWORK-NWORK+1, IERR )
+               IE = ITAU
+               ITAUQ = IE + N
+               ITAUP = ITAUQ + N
+               NWORK = ITAUP + N
+*
+*              Bidiagonalize R in WORK(IR)
+*              (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
+*
+               CALL SGEBRD( N, N, WORK( IR ), LDWRKR, S, WORK( IE ),
+     $                      WORK( ITAUQ ), WORK( ITAUP ), WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+*
+*              Perform bidiagonal SVD, computing left singular vectors
+*              of bidiagoal matrix in U and computing right singular
+*              vectors of bidiagonal matrix in VT
+*              (Workspace: need N+BDSPAC)
+*
+               CALL SBDSDC( 'U', 'I', N, S, WORK( IE ), U, LDU, VT,
+     $                      LDVT, DUM, IDUM, WORK( NWORK ), IWORK,
+     $                      INFO )
+*
+*              Overwrite U by left singular vectors of R and VT
+*              by right singular vectors of R
+*              (Workspace: need N*N+3*N, prefer N*N+2*N+N*NB)
+*
+               CALL SORMBR( 'Q', 'L', 'N', N, N, N, WORK( IR ), LDWRKR,
+     $                      WORK( ITAUQ ), U, LDU, WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+*
+               CALL SORMBR( 'P', 'R', 'T', N, N, N, WORK( IR ), LDWRKR,
+     $                      WORK( ITAUP ), VT, LDVT, WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+*
+*              Multiply Q in A by left singular vectors of R in
+*              WORK(IR), storing result in U
+*              (Workspace: need N*N)
+*
+               CALL SLACPY( 'F', N, N, U, LDU, WORK( IR ), LDWRKR )
+               CALL SGEMM( 'N', 'N', M, N, N, ONE, A, LDA, WORK( IR ),
+     $                     LDWRKR, ZERO, U, LDU )
+*
+            ELSE IF( WNTQA ) THEN
+*
+*              Path 4 (M much larger than N, JOBZ='A')
+*              M left singular vectors to be computed in U and
+*              N right singular vectors to be computed in VT
+*
+               IU = 1
+*
+*              WORK(IU) is N by N
+*
+               LDWRKU = N
+               ITAU = IU + LDWRKU*N
+               NWORK = ITAU + N
+*
+*              Compute A=Q*R, copying result to U
+*              (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+*
+               CALL SGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+               CALL SLACPY( 'L', M, N, A, LDA, U, LDU )
+*
+*              Generate Q in U
+*              (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+               CALL SORGQR( M, M, N, U, LDU, WORK( ITAU ),
+     $                      WORK( NWORK ), LWORK-NWORK+1, IERR )
+*
+*              Produce R in A, zeroing out other entries
+*
+               CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ), LDA )
+               IE = ITAU
+               ITAUQ = IE + N
+               ITAUP = ITAUQ + N
+               NWORK = ITAUP + N
+*
+*              Bidiagonalize R in A
+*              (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
+*
+               CALL SGEBRD( N, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
+     $                      WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1,
+     $                      IERR )
+*
+*              Perform bidiagonal SVD, computing left singular vectors
+*              of bidiagonal matrix in WORK(IU) and computing right
+*              singular vectors of bidiagonal matrix in VT
+*              (Workspace: need N+N*N+BDSPAC)
+*
+               CALL SBDSDC( 'U', 'I', N, S, WORK( IE ), WORK( IU ), N,
+     $                      VT, LDVT, DUM, IDUM, WORK( NWORK ), IWORK,
+     $                      INFO )
+*
+*              Overwrite WORK(IU) by left singular vectors of R and VT
+*              by right singular vectors of R
+*              (Workspace: need N*N+3*N, prefer N*N+2*N+N*NB)
+*
+               CALL SORMBR( 'Q', 'L', 'N', N, N, N, A, LDA,
+     $                      WORK( ITAUQ ), WORK( IU ), LDWRKU,
+     $                      WORK( NWORK ), LWORK-NWORK+1, IERR )
+               CALL SORMBR( 'P', 'R', 'T', N, N, N, A, LDA,
+     $                      WORK( ITAUP ), VT, LDVT, WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+*
+*              Multiply Q in U by left singular vectors of R in
+*              WORK(IU), storing result in A
+*              (Workspace: need N*N)
+*
+               CALL SGEMM( 'N', 'N', M, N, N, ONE, U, LDU, WORK( IU ),
+     $                     LDWRKU, ZERO, A, LDA )
+*
+*              Copy left singular vectors of A from A to U
+*
+               CALL SLACPY( 'F', M, N, A, LDA, U, LDU )
+*
+            END IF
+*
+         ELSE
+*
+*           M .LT. MNTHR
+*
+*           Path 5 (M at least N, but not much larger)
+*           Reduce to bidiagonal form without QR decomposition
+*
+            IE = 1
+            ITAUQ = IE + N
+            ITAUP = ITAUQ + N
+            NWORK = ITAUP + N
+*
+*           Bidiagonalize A
+*           (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB)
+*
+            CALL SGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
+     $                   WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1,
+     $                   IERR )
+            IF( WNTQN ) THEN
+*
+*              Perform bidiagonal SVD, only computing singular values
+*              (Workspace: need N+BDSPAC)
+*
+               CALL SBDSDC( 'U', 'N', N, S, WORK( IE ), DUM, 1, DUM, 1,
+     $                      DUM, IDUM, WORK( NWORK ), IWORK, INFO )
+            ELSE IF( WNTQO ) THEN
+               IU = NWORK
+               IF( LWORK.GE.M*N+3*N+BDSPAC ) THEN
+*
+*                 WORK( IU ) is M by N
+*
+                  LDWRKU = M
+                  NWORK = IU + LDWRKU*N
+                  CALL SLASET( 'F', M, N, ZERO, ZERO, WORK( IU ),
+     $                         LDWRKU )
+               ELSE
+*
+*                 WORK( IU ) is N by N
+*
+                  LDWRKU = N
+                  NWORK = IU + LDWRKU*N
+*
+*                 WORK(IR) is LDWRKR by N
+*
+                  IR = NWORK
+                  LDWRKR = ( LWORK-N*N-3*N ) / N
+               END IF
+               NWORK = IU + LDWRKU*N
+*
+*              Perform bidiagonal SVD, computing left singular vectors
+*              of bidiagonal matrix in WORK(IU) and computing right
+*              singular vectors of bidiagonal matrix in VT
+*              (Workspace: need N+N*N+BDSPAC)
+*
+               CALL SBDSDC( 'U', 'I', N, S, WORK( IE ), WORK( IU ),
+     $                      LDWRKU, VT, LDVT, DUM, IDUM, WORK( NWORK ),
+     $                      IWORK, INFO )
+*
+*              Overwrite VT by right singular vectors of A
+*              (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+*
+               CALL SORMBR( 'P', 'R', 'T', N, N, N, A, LDA,
+     $                      WORK( ITAUP ), VT, LDVT, WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+*
+               IF( LWORK.GE.M*N+3*N+BDSPAC ) THEN
+*
+*                 Overwrite WORK(IU) by left singular vectors of A
+*                 (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+*
+                  CALL SORMBR( 'Q', 'L', 'N', M, N, N, A, LDA,
+     $                         WORK( ITAUQ ), WORK( IU ), LDWRKU,
+     $                         WORK( NWORK ), LWORK-NWORK+1, IERR )
+*
+*                 Copy left singular vectors of A from WORK(IU) to A
+*
+                  CALL SLACPY( 'F', M, N, WORK( IU ), LDWRKU, A, LDA )
+               ELSE
+*
+*                 Generate Q in A
+*                 (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+*
+                  CALL SORGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ),
+     $                         WORK( NWORK ), LWORK-NWORK+1, IERR )
+*
+*                 Multiply Q in A by left singular vectors of
+*                 bidiagonal matrix in WORK(IU), storing result in
+*                 WORK(IR) and copying to A
+*                 (Workspace: need 2*N*N, prefer N*N+M*N)
+*
+                  DO 20 I = 1, M, LDWRKR
+                     CHUNK = MIN( M-I+1, LDWRKR )
+                     CALL SGEMM( 'N', 'N', CHUNK, N, N, ONE, A( I, 1 ),
+     $                           LDA, WORK( IU ), LDWRKU, ZERO,
+     $                           WORK( IR ), LDWRKR )
+                     CALL SLACPY( 'F', CHUNK, N, WORK( IR ), LDWRKR,
+     $                            A( I, 1 ), LDA )
+   20             CONTINUE
+               END IF
+*
+            ELSE IF( WNTQS ) THEN
+*
+*              Perform bidiagonal SVD, computing left singular vectors
+*              of bidiagonal matrix in U and computing right singular
+*              vectors of bidiagonal matrix in VT
+*              (Workspace: need N+BDSPAC)
+*
+               CALL SLASET( 'F', M, N, ZERO, ZERO, U, LDU )
+               CALL SBDSDC( 'U', 'I', N, S, WORK( IE ), U, LDU, VT,
+     $                      LDVT, DUM, IDUM, WORK( NWORK ), IWORK,
+     $                      INFO )
+*
+*              Overwrite U by left singular vectors of A and VT
+*              by right singular vectors of A
+*              (Workspace: need 3*N, prefer 2*N+N*NB)
+*
+               CALL SORMBR( 'Q', 'L', 'N', M, N, N, A, LDA,
+     $                      WORK( ITAUQ ), U, LDU, WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+               CALL SORMBR( 'P', 'R', 'T', N, N, N, A, LDA,
+     $                      WORK( ITAUP ), VT, LDVT, WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+            ELSE IF( WNTQA ) THEN
+*
+*              Perform bidiagonal SVD, computing left singular vectors
+*              of bidiagonal matrix in U and computing right singular
+*              vectors of bidiagonal matrix in VT
+*              (Workspace: need N+BDSPAC)
+*
+               CALL SLASET( 'F', M, M, ZERO, ZERO, U, LDU )
+               CALL SBDSDC( 'U', 'I', N, S, WORK( IE ), U, LDU, VT,
+     $                      LDVT, DUM, IDUM, WORK( NWORK ), IWORK,
+     $                      INFO )
+*
+*              Set the right corner of U to identity matrix
+*
+               CALL SLASET( 'F', M-N, M-N, ZERO, ONE, U( N+1, N+1 ),
+     $                      LDU )
+*
+*              Overwrite U by left singular vectors of A and VT
+*              by right singular vectors of A
+*              (Workspace: need N*N+2*N+M, prefer N*N+2*N+M*NB)
+*
+               CALL SORMBR( 'Q', 'L', 'N', M, M, N, A, LDA,
+     $                      WORK( ITAUQ ), U, LDU, WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+               CALL SORMBR( 'P', 'R', 'T', N, N, M, A, LDA,
+     $                      WORK( ITAUP ), VT, LDVT, WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+            END IF
+*
+         END IF
+*
+      ELSE
+*
+*        A has more columns than rows. If A has sufficiently more
+*        columns than rows, first reduce using the LQ decomposition (if
+*        sufficient workspace available)
+*
+         IF( N.GE.MNTHR ) THEN
+*
+            IF( WNTQN ) THEN
+*
+*              Path 1t (N much larger than M, JOBZ='N')
+*              No singular vectors to be computed
+*
+               ITAU = 1
+               NWORK = ITAU + M
+*
+*              Compute A=L*Q
+*              (Workspace: need 2*M, prefer M+M*NB)
+*
+               CALL SGELQF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+*
+*              Zero out above L
+*
+               CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ), LDA )
+               IE = 1
+               ITAUQ = IE + M
+               ITAUP = ITAUQ + M
+               NWORK = ITAUP + M
+*
+*              Bidiagonalize L in A
+*              (Workspace: need 4*M, prefer 3*M+2*M*NB)
+*
+               CALL SGEBRD( M, M, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
+     $                      WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1,
+     $                      IERR )
+               NWORK = IE + M
+*
+*              Perform bidiagonal SVD, computing singular values only
+*              (Workspace: need M+BDSPAC)
+*
+               CALL SBDSDC( 'U', 'N', M, S, WORK( IE ), DUM, 1, DUM, 1,
+     $                      DUM, IDUM, WORK( NWORK ), IWORK, INFO )
+*
+            ELSE IF( WNTQO ) THEN
+*
+*              Path 2t (N much larger than M, JOBZ='O')
+*              M right singular vectors to be overwritten on A and
+*              M left singular vectors to be computed in U
+*
+               IVT = 1
+*
+*              IVT is M by M
+*
+               IL = IVT + M*M
+               IF( LWORK.GE.M*N+M*M+3*M+BDSPAC ) THEN
+*
+*                 WORK(IL) is M by N
+*
+                  LDWRKL = M
+                  CHUNK = N
+               ELSE
+                  LDWRKL = M
+                  CHUNK = ( LWORK-M*M ) / M
+               END IF
+               ITAU = IL + LDWRKL*M
+               NWORK = ITAU + M
+*
+*              Compute A=L*Q
+*              (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+*
+               CALL SGELQF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+*
+*              Copy L to WORK(IL), zeroing about above it
+*
+               CALL SLACPY( 'L', M, M, A, LDA, WORK( IL ), LDWRKL )
+               CALL SLASET( 'U', M-1, M-1, ZERO, ZERO,
+     $                      WORK( IL+LDWRKL ), LDWRKL )
+*
+*              Generate Q in A
+*              (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+*
+               CALL SORGLQ( M, N, M, A, LDA, WORK( ITAU ),
+     $                      WORK( NWORK ), LWORK-NWORK+1, IERR )
+               IE = ITAU
+               ITAUQ = IE + M
+               ITAUP = ITAUQ + M
+               NWORK = ITAUP + M
+*
+*              Bidiagonalize L in WORK(IL)
+*              (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
+*
+               CALL SGEBRD( M, M, WORK( IL ), LDWRKL, S, WORK( IE ),
+     $                      WORK( ITAUQ ), WORK( ITAUP ), WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+*
+*              Perform bidiagonal SVD, computing left singular vectors
+*              of bidiagonal matrix in U, and computing right singular
+*              vectors of bidiagonal matrix in WORK(IVT)
+*              (Workspace: need M+M*M+BDSPAC)
+*
+               CALL SBDSDC( 'U', 'I', M, S, WORK( IE ), U, LDU,
+     $                      WORK( IVT ), M, DUM, IDUM, WORK( NWORK ),
+     $                      IWORK, INFO )
+*
+*              Overwrite U by left singular vectors of L and WORK(IVT)
+*              by right singular vectors of L
+*              (Workspace: need 2*M*M+3*M, prefer 2*M*M+2*M+M*NB)
+*
+               CALL SORMBR( 'Q', 'L', 'N', M, M, M, WORK( IL ), LDWRKL,
+     $                      WORK( ITAUQ ), U, LDU, WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+               CALL SORMBR( 'P', 'R', 'T', M, M, M, WORK( IL ), LDWRKL,
+     $                      WORK( ITAUP ), WORK( IVT ), M,
+     $                      WORK( NWORK ), LWORK-NWORK+1, IERR )
+*
+*              Multiply right singular vectors of L in WORK(IVT) by Q
+*              in A, storing result in WORK(IL) and copying to A
+*              (Workspace: need 2*M*M, prefer M*M+M*N)
+*
+               DO 30 I = 1, N, CHUNK
+                  BLK = MIN( N-I+1, CHUNK )
+                  CALL SGEMM( 'N', 'N', M, BLK, M, ONE, WORK( IVT ), M,
+     $                        A( 1, I ), LDA, ZERO, WORK( IL ), LDWRKL )
+                  CALL SLACPY( 'F', M, BLK, WORK( IL ), LDWRKL,
+     $                         A( 1, I ), LDA )
+   30          CONTINUE
+*
+            ELSE IF( WNTQS ) THEN
+*
+*              Path 3t (N much larger than M, JOBZ='S')
+*              M right singular vectors to be computed in VT and
+*              M left singular vectors to be computed in U
+*
+               IL = 1
+*
+*              WORK(IL) is M by M
+*
+               LDWRKL = M
+               ITAU = IL + LDWRKL*M
+               NWORK = ITAU + M
+*
+*              Compute A=L*Q
+*              (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+*
+               CALL SGELQF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+*
+*              Copy L to WORK(IL), zeroing out above it
+*
+               CALL SLACPY( 'L', M, M, A, LDA, WORK( IL ), LDWRKL )
+               CALL SLASET( 'U', M-1, M-1, ZERO, ZERO,
+     $                      WORK( IL+LDWRKL ), LDWRKL )
+*
+*              Generate Q in A
+*              (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+*
+               CALL SORGLQ( M, N, M, A, LDA, WORK( ITAU ),
+     $                      WORK( NWORK ), LWORK-NWORK+1, IERR )
+               IE = ITAU
+               ITAUQ = IE + M
+               ITAUP = ITAUQ + M
+               NWORK = ITAUP + M
+*
+*              Bidiagonalize L in WORK(IU), copying result to U
+*              (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
+*
+               CALL SGEBRD( M, M, WORK( IL ), LDWRKL, S, WORK( IE ),
+     $                      WORK( ITAUQ ), WORK( ITAUP ), WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+*
+*              Perform bidiagonal SVD, computing left singular vectors
+*              of bidiagonal matrix in U and computing right singular
+*              vectors of bidiagonal matrix in VT
+*              (Workspace: need M+BDSPAC)
+*
+               CALL SBDSDC( 'U', 'I', M, S, WORK( IE ), U, LDU, VT,
+     $                      LDVT, DUM, IDUM, WORK( NWORK ), IWORK,
+     $                      INFO )
+*
+*              Overwrite U by left singular vectors of L and VT
+*              by right singular vectors of L
+*              (Workspace: need M*M+3*M, prefer M*M+2*M+M*NB)
+*
+               CALL SORMBR( 'Q', 'L', 'N', M, M, M, WORK( IL ), LDWRKL,
+     $                      WORK( ITAUQ ), U, LDU, WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+               CALL SORMBR( 'P', 'R', 'T', M, M, M, WORK( IL ), LDWRKL,
+     $                      WORK( ITAUP ), VT, LDVT, WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+*
+*              Multiply right singular vectors of L in WORK(IL) by
+*              Q in A, storing result in VT
+*              (Workspace: need M*M)
+*
+               CALL SLACPY( 'F', M, M, VT, LDVT, WORK( IL ), LDWRKL )
+               CALL SGEMM( 'N', 'N', M, N, M, ONE, WORK( IL ), LDWRKL,
+     $                     A, LDA, ZERO, VT, LDVT )
+*
+            ELSE IF( WNTQA ) THEN
+*
+*              Path 4t (N much larger than M, JOBZ='A')
+*              N right singular vectors to be computed in VT and
+*              M left singular vectors to be computed in U
+*
+               IVT = 1
+*
+*              WORK(IVT) is M by M
+*
+               LDWKVT = M
+               ITAU = IVT + LDWKVT*M
+               NWORK = ITAU + M
+*
+*              Compute A=L*Q, copying result to VT
+*              (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+*
+               CALL SGELQF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+               CALL SLACPY( 'U', M, N, A, LDA, VT, LDVT )
+*
+*              Generate Q in VT
+*              (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+*
+               CALL SORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
+     $                      WORK( NWORK ), LWORK-NWORK+1, IERR )
+*
+*              Produce L in A, zeroing out other entries
+*
+               CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ), LDA )
+               IE = ITAU
+               ITAUQ = IE + M
+               ITAUP = ITAUQ + M
+               NWORK = ITAUP + M
+*
+*              Bidiagonalize L in A
+*              (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
+*
+               CALL SGEBRD( M, M, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
+     $                      WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1,
+     $                      IERR )
+*
+*              Perform bidiagonal SVD, computing left singular vectors
+*              of bidiagonal matrix in U and computing right singular
+*              vectors of bidiagonal matrix in WORK(IVT)
+*              (Workspace: need M+M*M+BDSPAC)
+*
+               CALL SBDSDC( 'U', 'I', M, S, WORK( IE ), U, LDU,
+     $                      WORK( IVT ), LDWKVT, DUM, IDUM,
+     $                      WORK( NWORK ), IWORK, INFO )
+*
+*              Overwrite U by left singular vectors of L and WORK(IVT)
+*              by right singular vectors of L
+*              (Workspace: need M*M+3*M, prefer M*M+2*M+M*NB)
+*
+               CALL SORMBR( 'Q', 'L', 'N', M, M, M, A, LDA,
+     $                      WORK( ITAUQ ), U, LDU, WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+               CALL SORMBR( 'P', 'R', 'T', M, M, M, A, LDA,
+     $                      WORK( ITAUP ), WORK( IVT ), LDWKVT,
+     $                      WORK( NWORK ), LWORK-NWORK+1, IERR )
+*
+*              Multiply right singular vectors of L in WORK(IVT) by
+*              Q in VT, storing result in A
+*              (Workspace: need M*M)
+*
+               CALL SGEMM( 'N', 'N', M, N, M, ONE, WORK( IVT ), LDWKVT,
+     $                     VT, LDVT, ZERO, A, LDA )
+*
+*              Copy right singular vectors of A from A to VT
+*
+               CALL SLACPY( 'F', M, N, A, LDA, VT, LDVT )
+*
+            END IF
+*
+         ELSE
+*
+*           N .LT. MNTHR
+*
+*           Path 5t (N greater than M, but not much larger)
+*           Reduce to bidiagonal form without LQ decomposition
+*
+            IE = 1
+            ITAUQ = IE + M
+            ITAUP = ITAUQ + M
+            NWORK = ITAUP + M
+*
+*           Bidiagonalize A
+*           (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
+*
+            CALL SGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
+     $                   WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1,
+     $                   IERR )
+            IF( WNTQN ) THEN
+*
+*              Perform bidiagonal SVD, only computing singular values
+*              (Workspace: need M+BDSPAC)
+*
+               CALL SBDSDC( 'L', 'N', M, S, WORK( IE ), DUM, 1, DUM, 1,
+     $                      DUM, IDUM, WORK( NWORK ), IWORK, INFO )
+            ELSE IF( WNTQO ) THEN
+               LDWKVT = M
+               IVT = NWORK
+               IF( LWORK.GE.M*N+3*M+BDSPAC ) THEN
+*
+*                 WORK( IVT ) is M by N
+*
+                  CALL SLASET( 'F', M, N, ZERO, ZERO, WORK( IVT ),
+     $                         LDWKVT )
+                  NWORK = IVT + LDWKVT*N
+               ELSE
+*
+*                 WORK( IVT ) is M by M
+*
+                  NWORK = IVT + LDWKVT*M
+                  IL = NWORK
+*
+*                 WORK(IL) is M by CHUNK
+*
+                  CHUNK = ( LWORK-M*M-3*M ) / M
+               END IF
+*
+*              Perform bidiagonal SVD, computing left singular vectors
+*              of bidiagonal matrix in U and computing right singular
+*              vectors of bidiagonal matrix in WORK(IVT)
+*              (Workspace: need M*M+BDSPAC)
+*
+               CALL SBDSDC( 'L', 'I', M, S, WORK( IE ), U, LDU,
+     $                      WORK( IVT ), LDWKVT, DUM, IDUM,
+     $                      WORK( NWORK ), IWORK, INFO )
+*
+*              Overwrite U by left singular vectors of A
+*              (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+*
+               CALL SORMBR( 'Q', 'L', 'N', M, M, N, A, LDA,
+     $                      WORK( ITAUQ ), U, LDU, WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+*
+               IF( LWORK.GE.M*N+3*M+BDSPAC ) THEN
+*
+*                 Overwrite WORK(IVT) by left singular vectors of A
+*                 (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+*
+                  CALL SORMBR( 'P', 'R', 'T', M, N, M, A, LDA,
+     $                         WORK( ITAUP ), WORK( IVT ), LDWKVT,
+     $                         WORK( NWORK ), LWORK-NWORK+1, IERR )
+*
+*                 Copy right singular vectors of A from WORK(IVT) to A
+*
+                  CALL SLACPY( 'F', M, N, WORK( IVT ), LDWKVT, A, LDA )
+               ELSE
+*
+*                 Generate P**T in A
+*                 (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+*
+                  CALL SORGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ),
+     $                         WORK( NWORK ), LWORK-NWORK+1, IERR )
+*
+*                 Multiply Q in A by right singular vectors of
+*                 bidiagonal matrix in WORK(IVT), storing result in
+*                 WORK(IL) and copying to A
+*                 (Workspace: need 2*M*M, prefer M*M+M*N)
+*
+                  DO 40 I = 1, N, CHUNK
+                     BLK = MIN( N-I+1, CHUNK )
+                     CALL SGEMM( 'N', 'N', M, BLK, M, ONE, WORK( IVT ),
+     $                           LDWKVT, A( 1, I ), LDA, ZERO,
+     $                           WORK( IL ), M )
+                     CALL SLACPY( 'F', M, BLK, WORK( IL ), M, A( 1, I ),
+     $                            LDA )
+   40             CONTINUE
+               END IF
+            ELSE IF( WNTQS ) THEN
+*
+*              Perform bidiagonal SVD, computing left singular vectors
+*              of bidiagonal matrix in U and computing right singular
+*              vectors of bidiagonal matrix in VT
+*              (Workspace: need M+BDSPAC)
+*
+               CALL SLASET( 'F', M, N, ZERO, ZERO, VT, LDVT )
+               CALL SBDSDC( 'L', 'I', M, S, WORK( IE ), U, LDU, VT,
+     $                      LDVT, DUM, IDUM, WORK( NWORK ), IWORK,
+     $                      INFO )
+*
+*              Overwrite U by left singular vectors of A and VT
+*              by right singular vectors of A
+*              (Workspace: need 3*M, prefer 2*M+M*NB)
+*
+               CALL SORMBR( 'Q', 'L', 'N', M, M, N, A, LDA,
+     $                      WORK( ITAUQ ), U, LDU, WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+               CALL SORMBR( 'P', 'R', 'T', M, N, M, A, LDA,
+     $                      WORK( ITAUP ), VT, LDVT, WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+            ELSE IF( WNTQA ) THEN
+*
+*              Perform bidiagonal SVD, computing left singular vectors
+*              of bidiagonal matrix in U and computing right singular
+*              vectors of bidiagonal matrix in VT
+*              (Workspace: need M+BDSPAC)
+*
+               CALL SLASET( 'F', N, N, ZERO, ZERO, VT, LDVT )
+               CALL SBDSDC( 'L', 'I', M, S, WORK( IE ), U, LDU, VT,
+     $                      LDVT, DUM, IDUM, WORK( NWORK ), IWORK,
+     $                      INFO )
+*
+*              Set the right corner of VT to identity matrix
+*
+               CALL SLASET( 'F', N-M, N-M, ZERO, ONE, VT( M+1, M+1 ),
+     $                      LDVT )
+*
+*              Overwrite U by left singular vectors of A and VT
+*              by right singular vectors of A
+*              (Workspace: need 2*M+N, prefer 2*M+N*NB)
+*
+               CALL SORMBR( 'Q', 'L', 'N', M, M, N, A, LDA,
+     $                      WORK( ITAUQ ), U, LDU, WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+               CALL SORMBR( 'P', 'R', 'T', N, N, M, A, LDA,
+     $                      WORK( ITAUP ), VT, LDVT, WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+            END IF
+*
+         END IF
+*
+      END IF
+*
+*     Undo scaling if necessary
+*
+      IF( ISCL.EQ.1 ) THEN
+         IF( ANRM.GT.BIGNUM )
+     $      CALL SLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN,
+     $                   IERR )
+         IF( ANRM.LT.SMLNUM )
+     $      CALL SLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN,
+     $                   IERR )
+      END IF
+*
+*     Return optimal workspace in WORK(1)
+*
+      WORK( 1 ) = REAL( MAXWRK )
+*
+      RETURN
+*
+*     End of SGESDD
+*
+      END
diff --git a/costa/native/external/lapack/sgesv.f b/costa/native/external/lapack/sgesv.f
new file mode 100644
index 000000000..7844aa38c
--- /dev/null
+++ b/costa/native/external/lapack/sgesv.f
@@ -0,0 +1,108 @@
+      SUBROUTINE SGESV( N, NRHS, A, LDA, IPIV, B, LDB, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993 
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, LDB, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      REAL               A( LDA, * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SGESV computes the solution to a real system of linear equations
+*     A * X = B,
+*  where A is an N-by-N matrix and X and B are N-by-NRHS matrices.
+*
+*  The LU decomposition with partial pivoting and row interchanges is
+*  used to factor A as
+*     A = P * L * U,
+*  where P is a permutation matrix, L is unit lower triangular, and U is
+*  upper triangular.  The factored form of A is then used to solve the
+*  system of equations A * X = B.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The number of linear equations, i.e., the order of the
+*          matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  A       (input/output) REAL array, dimension (LDA,N)
+*          On entry, the N-by-N coefficient matrix A.
+*          On exit, the factors L and U from the factorization
+*          A = P*L*U; the unit diagonal elements of L are not stored.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  IPIV    (output) INTEGER array, dimension (N)
+*          The pivot indices that define the permutation matrix P;
+*          row i of the matrix was interchanged with row IPIV(i).
+*
+*  B       (input/output) REAL array, dimension (LDB,NRHS)
+*          On entry, the N-by-NRHS matrix of right hand side matrix B.
+*          On exit, if INFO = 0, the N-by-NRHS solution matrix X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, U(i,i) is exactly zero.  The factorization
+*                has been completed, but the factor U is exactly
+*                singular, so the solution could not be computed.
+*
+*  =====================================================================
+*
+*     .. External Subroutines ..
+      EXTERNAL           SGETRF, SGETRS, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( N.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -4
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SGESV ', -INFO )
+         RETURN
+      END IF
+*
+*     Compute the LU factorization of A.
+*
+      CALL SGETRF( N, N, A, LDA, IPIV, INFO )
+      IF( INFO.EQ.0 ) THEN
+*
+*        Solve the system A*X = B, overwriting B with X.
+*
+         CALL SGETRS( 'No transpose', N, NRHS, A, LDA, IPIV, B, LDB,
+     $                INFO )
+      END IF
+      RETURN
+*
+*     End of SGESV
+*
+      END
diff --git a/costa/native/external/lapack/sgesvd.f b/costa/native/external/lapack/sgesvd.f
new file mode 100644
index 000000000..77c69d659
--- /dev/null
+++ b/costa/native/external/lapack/sgesvd.f
@@ -0,0 +1,3417 @@
+      SUBROUTINE SGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT,
+     $                   WORK, LWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBU, JOBVT
+      INTEGER            INFO, LDA, LDU, LDVT, LWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), S( * ), U( LDU, * ),
+     $                   VT( LDVT, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SGESVD computes the singular value decomposition (SVD) of a real
+*  M-by-N matrix A, optionally computing the left and/or right singular
+*  vectors. The SVD is written
+*
+*       A = U * SIGMA * transpose(V)
+*
+*  where SIGMA is an M-by-N matrix which is zero except for its
+*  min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and
+*  V is an N-by-N orthogonal matrix.  The diagonal elements of SIGMA
+*  are the singular values of A; they are real and non-negative, and
+*  are returned in descending order.  The first min(m,n) columns of
+*  U and V are the left and right singular vectors of A.
+*
+*  Note that the routine returns V**T, not V.
+*
+*  Arguments
+*  =========
+*
+*  JOBU    (input) CHARACTER*1
+*          Specifies options for computing all or part of the matrix U:
+*          = 'A':  all M columns of U are returned in array U:
+*          = 'S':  the first min(m,n) columns of U (the left singular
+*                  vectors) are returned in the array U;
+*          = 'O':  the first min(m,n) columns of U (the left singular
+*                  vectors) are overwritten on the array A;
+*          = 'N':  no columns of U (no left singular vectors) are
+*                  computed.
+*
+*  JOBVT   (input) CHARACTER*1
+*          Specifies options for computing all or part of the matrix
+*          V**T:
+*          = 'A':  all N rows of V**T are returned in the array VT;
+*          = 'S':  the first min(m,n) rows of V**T (the right singular
+*                  vectors) are returned in the array VT;
+*          = 'O':  the first min(m,n) rows of V**T (the right singular
+*                  vectors) are overwritten on the array A;
+*          = 'N':  no rows of V**T (no right singular vectors) are
+*                  computed.
+*
+*          JOBVT and JOBU cannot both be 'O'.
+*
+*  M       (input) INTEGER
+*          The number of rows of the input matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the input matrix A.  N >= 0.
+*
+*  A       (input/output) REAL array, dimension (LDA,N)
+*          On entry, the M-by-N matrix A.
+*          On exit,
+*          if JOBU = 'O',  A is overwritten with the first min(m,n)
+*                          columns of U (the left singular vectors,
+*                          stored columnwise);
+*          if JOBVT = 'O', A is overwritten with the first min(m,n)
+*                          rows of V**T (the right singular vectors,
+*                          stored rowwise);
+*          if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A
+*                          are destroyed.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  S       (output) REAL array, dimension (min(M,N))
+*          The singular values of A, sorted so that S(i) >= S(i+1).
+*
+*  U       (output) REAL array, dimension (LDU,UCOL)
+*          (LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'.
+*          If JOBU = 'A', U contains the M-by-M orthogonal matrix U;
+*          if JOBU = 'S', U contains the first min(m,n) columns of U
+*          (the left singular vectors, stored columnwise);
+*          if JOBU = 'N' or 'O', U is not referenced.
+*
+*  LDU     (input) INTEGER
+*          The leading dimension of the array U.  LDU >= 1; if
+*          JOBU = 'S' or 'A', LDU >= M.
+*
+*  VT      (output) REAL array, dimension (LDVT,N)
+*          If JOBVT = 'A', VT contains the N-by-N orthogonal matrix
+*          V**T;
+*          if JOBVT = 'S', VT contains the first min(m,n) rows of
+*          V**T (the right singular vectors, stored rowwise);
+*          if JOBVT = 'N' or 'O', VT is not referenced.
+*
+*  LDVT    (input) INTEGER
+*          The leading dimension of the array VT.  LDVT >= 1; if
+*          JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N).
+*
+*  WORK    (workspace/output) REAL array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK;
+*          if INFO > 0, WORK(2:MIN(M,N)) contains the unconverged
+*          superdiagonal elements of an upper bidiagonal matrix B
+*          whose diagonal is in S (not necessarily sorted). B
+*          satisfies A = U * B * VT, so it has the same singular values
+*          as A, and singular vectors related by U and VT.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK. LWORK >= 1.
+*          LWORK >= MAX(3*MIN(M,N)+MAX(M,N),5*MIN(M,N)).
+*          For good performance, LWORK should generally be larger.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          > 0:  if SBDSQR did not converge, INFO specifies how many
+*                superdiagonals of an intermediate bidiagonal form B
+*                did not converge to zero. See the description of WORK
+*                above for details.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E0, ONE = 1.0E0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY, WNTUA, WNTUAS, WNTUN, WNTUO, WNTUS,
+     $                   WNTVA, WNTVAS, WNTVN, WNTVO, WNTVS
+      INTEGER            BDSPAC, BLK, CHUNK, I, IE, IERR, IR, ISCL,
+     $                   ITAU, ITAUP, ITAUQ, IU, IWORK, LDWRKR, LDWRKU,
+     $                   MAXWRK, MINMN, MINWRK, MNTHR, NCU, NCVT, NRU,
+     $                   NRVT, WRKBL
+      REAL               ANRM, BIGNUM, EPS, SMLNUM
+*     ..
+*     .. Local Arrays ..
+      REAL               DUM( 1 )
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SBDSQR, SGEBRD, SGELQF, SGEMM, SGEQRF, SLACPY,
+     $                   SLASCL, SLASET, SORGBR, SORGLQ, SORGQR, SORMBR,
+     $                   XERBLA
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      REAL               SLAMCH, SLANGE
+      EXTERNAL           LSAME, ILAENV, SLAMCH, SLANGE
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      MINMN = MIN( M, N )
+      MNTHR = ILAENV( 6, 'SGESVD', JOBU // JOBVT, M, N, 0, 0 )
+      WNTUA = LSAME( JOBU, 'A' )
+      WNTUS = LSAME( JOBU, 'S' )
+      WNTUAS = WNTUA .OR. WNTUS
+      WNTUO = LSAME( JOBU, 'O' )
+      WNTUN = LSAME( JOBU, 'N' )
+      WNTVA = LSAME( JOBVT, 'A' )
+      WNTVS = LSAME( JOBVT, 'S' )
+      WNTVAS = WNTVA .OR. WNTVS
+      WNTVO = LSAME( JOBVT, 'O' )
+      WNTVN = LSAME( JOBVT, 'N' )
+      MINWRK = 1
+      LQUERY = ( LWORK.EQ.-1 )
+*
+      IF( .NOT.( WNTUA .OR. WNTUS .OR. WNTUO .OR. WNTUN ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( WNTVA .OR. WNTVS .OR. WNTVO .OR. WNTVN ) .OR.
+     $         ( WNTVO .AND. WNTUO ) ) THEN
+         INFO = -2
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -6
+      ELSE IF( LDU.LT.1 .OR. ( WNTUAS .AND. LDU.LT.M ) ) THEN
+         INFO = -9
+      ELSE IF( LDVT.LT.1 .OR. ( WNTVA .AND. LDVT.LT.N ) .OR.
+     $         ( WNTVS .AND. LDVT.LT.MINMN ) ) THEN
+         INFO = -11
+      END IF
+*
+*     Compute workspace
+*      (Note: Comments in the code beginning "Workspace:" describe the
+*       minimal amount of workspace needed at that point in the code,
+*       as well as the preferred amount for good performance.
+*       NB refers to the optimal block size for the immediately
+*       following subroutine, as returned by ILAENV.)
+*
+      IF( INFO.EQ.0 .AND. ( LWORK.GE.1 .OR. LQUERY ) .AND. M.GT.0 .AND.
+     $   N.GT.0 ) THEN
+         IF( M.GE.N ) THEN
+*
+*           Compute space needed for SBDSQR
+*
+            BDSPAC = 5*N
+            IF( M.GE.MNTHR ) THEN
+               IF( WNTUN ) THEN
+*
+*                 Path 1 (M much larger than N, JOBU='N')
+*
+                  MAXWRK = N + N*ILAENV( 1, 'SGEQRF', ' ', M, N, -1,
+     $                     -1 )
+                  MAXWRK = MAX( MAXWRK, 3*N+2*N*
+     $                     ILAENV( 1, 'SGEBRD', ' ', N, N, -1, -1 ) )
+                  IF( WNTVO .OR. WNTVAS )
+     $               MAXWRK = MAX( MAXWRK, 3*N+( N-1 )*
+     $                        ILAENV( 1, 'SORGBR', 'P', N, N, N, -1 ) )
+                  MAXWRK = MAX( MAXWRK, BDSPAC )
+                  MINWRK = MAX( 4*N, BDSPAC )
+                  MAXWRK = MAX( MAXWRK, MINWRK )
+               ELSE IF( WNTUO .AND. WNTVN ) THEN
+*
+*                 Path 2 (M much larger than N, JOBU='O', JOBVT='N')
+*
+                  WRKBL = N + N*ILAENV( 1, 'SGEQRF', ' ', M, N, -1, -1 )
+                  WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'SORGQR', ' ', M,
+     $                    N, N, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*N+2*N*
+     $                    ILAENV( 1, 'SGEBRD', ' ', N, N, -1, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*N+N*
+     $                    ILAENV( 1, 'SORGBR', 'Q', N, N, N, -1 ) )
+                  WRKBL = MAX( WRKBL, BDSPAC )
+                  MAXWRK = MAX( N*N+WRKBL, N*N+M*N+N )
+                  MINWRK = MAX( 3*N+M, BDSPAC )
+                  MAXWRK = MAX( MAXWRK, MINWRK )
+               ELSE IF( WNTUO .AND. WNTVAS ) THEN
+*
+*                 Path 3 (M much larger than N, JOBU='O', JOBVT='S' or
+*                 'A')
+*
+                  WRKBL = N + N*ILAENV( 1, 'SGEQRF', ' ', M, N, -1, -1 )
+                  WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'SORGQR', ' ', M,
+     $                    N, N, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*N+2*N*
+     $                    ILAENV( 1, 'SGEBRD', ' ', N, N, -1, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*N+N*
+     $                    ILAENV( 1, 'SORGBR', 'Q', N, N, N, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*N+( N-1 )*
+     $                    ILAENV( 1, 'SORGBR', 'P', N, N, N, -1 ) )
+                  WRKBL = MAX( WRKBL, BDSPAC )
+                  MAXWRK = MAX( N*N+WRKBL, N*N+M*N+N )
+                  MINWRK = MAX( 3*N+M, BDSPAC )
+                  MAXWRK = MAX( MAXWRK, MINWRK )
+               ELSE IF( WNTUS .AND. WNTVN ) THEN
+*
+*                 Path 4 (M much larger than N, JOBU='S', JOBVT='N')
+*
+                  WRKBL = N + N*ILAENV( 1, 'SGEQRF', ' ', M, N, -1, -1 )
+                  WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'SORGQR', ' ', M,
+     $                    N, N, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*N+2*N*
+     $                    ILAENV( 1, 'SGEBRD', ' ', N, N, -1, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*N+N*
+     $                    ILAENV( 1, 'SORGBR', 'Q', N, N, N, -1 ) )
+                  WRKBL = MAX( WRKBL, BDSPAC )
+                  MAXWRK = N*N + WRKBL
+                  MINWRK = MAX( 3*N+M, BDSPAC )
+                  MAXWRK = MAX( MAXWRK, MINWRK )
+               ELSE IF( WNTUS .AND. WNTVO ) THEN
+*
+*                 Path 5 (M much larger than N, JOBU='S', JOBVT='O')
+*
+                  WRKBL = N + N*ILAENV( 1, 'SGEQRF', ' ', M, N, -1, -1 )
+                  WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'SORGQR', ' ', M,
+     $                    N, N, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*N+2*N*
+     $                    ILAENV( 1, 'SGEBRD', ' ', N, N, -1, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*N+N*
+     $                    ILAENV( 1, 'SORGBR', 'Q', N, N, N, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*N+( N-1 )*
+     $                    ILAENV( 1, 'SORGBR', 'P', N, N, N, -1 ) )
+                  WRKBL = MAX( WRKBL, BDSPAC )
+                  MAXWRK = 2*N*N + WRKBL
+                  MINWRK = MAX( 3*N+M, BDSPAC )
+                  MAXWRK = MAX( MAXWRK, MINWRK )
+               ELSE IF( WNTUS .AND. WNTVAS ) THEN
+*
+*                 Path 6 (M much larger than N, JOBU='S', JOBVT='S' or
+*                 'A')
+*
+                  WRKBL = N + N*ILAENV( 1, 'SGEQRF', ' ', M, N, -1, -1 )
+                  WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'SORGQR', ' ', M,
+     $                    N, N, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*N+2*N*
+     $                    ILAENV( 1, 'SGEBRD', ' ', N, N, -1, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*N+N*
+     $                    ILAENV( 1, 'SORGBR', 'Q', N, N, N, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*N+( N-1 )*
+     $                    ILAENV( 1, 'SORGBR', 'P', N, N, N, -1 ) )
+                  WRKBL = MAX( WRKBL, BDSPAC )
+                  MAXWRK = N*N + WRKBL
+                  MINWRK = MAX( 3*N+M, BDSPAC )
+                  MAXWRK = MAX( MAXWRK, MINWRK )
+               ELSE IF( WNTUA .AND. WNTVN ) THEN
+*
+*                 Path 7 (M much larger than N, JOBU='A', JOBVT='N')
+*
+                  WRKBL = N + N*ILAENV( 1, 'SGEQRF', ' ', M, N, -1, -1 )
+                  WRKBL = MAX( WRKBL, N+M*ILAENV( 1, 'SORGQR', ' ', M,
+     $                    M, N, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*N+2*N*
+     $                    ILAENV( 1, 'SGEBRD', ' ', N, N, -1, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*N+N*
+     $                    ILAENV( 1, 'SORGBR', 'Q', N, N, N, -1 ) )
+                  WRKBL = MAX( WRKBL, BDSPAC )
+                  MAXWRK = N*N + WRKBL
+                  MINWRK = MAX( 3*N+M, BDSPAC )
+                  MAXWRK = MAX( MAXWRK, MINWRK )
+               ELSE IF( WNTUA .AND. WNTVO ) THEN
+*
+*                 Path 8 (M much larger than N, JOBU='A', JOBVT='O')
+*
+                  WRKBL = N + N*ILAENV( 1, 'SGEQRF', ' ', M, N, -1, -1 )
+                  WRKBL = MAX( WRKBL, N+M*ILAENV( 1, 'SORGQR', ' ', M,
+     $                    M, N, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*N+2*N*
+     $                    ILAENV( 1, 'SGEBRD', ' ', N, N, -1, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*N+N*
+     $                    ILAENV( 1, 'SORGBR', 'Q', N, N, N, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*N+( N-1 )*
+     $                    ILAENV( 1, 'SORGBR', 'P', N, N, N, -1 ) )
+                  WRKBL = MAX( WRKBL, BDSPAC )
+                  MAXWRK = 2*N*N + WRKBL
+                  MINWRK = MAX( 3*N+M, BDSPAC )
+                  MAXWRK = MAX( MAXWRK, MINWRK )
+               ELSE IF( WNTUA .AND. WNTVAS ) THEN
+*
+*                 Path 9 (M much larger than N, JOBU='A', JOBVT='S' or
+*                 'A')
+*
+                  WRKBL = N + N*ILAENV( 1, 'SGEQRF', ' ', M, N, -1, -1 )
+                  WRKBL = MAX( WRKBL, N+M*ILAENV( 1, 'SORGQR', ' ', M,
+     $                    M, N, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*N+2*N*
+     $                    ILAENV( 1, 'SGEBRD', ' ', N, N, -1, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*N+N*
+     $                    ILAENV( 1, 'SORGBR', 'Q', N, N, N, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*N+( N-1 )*
+     $                    ILAENV( 1, 'SORGBR', 'P', N, N, N, -1 ) )
+                  WRKBL = MAX( WRKBL, BDSPAC )
+                  MAXWRK = N*N + WRKBL
+                  MINWRK = MAX( 3*N+M, BDSPAC )
+                  MAXWRK = MAX( MAXWRK, MINWRK )
+               END IF
+            ELSE
+*
+*              Path 10 (M at least N, but not much larger)
+*
+               MAXWRK = 3*N + ( M+N )*ILAENV( 1, 'SGEBRD', ' ', M, N,
+     $                  -1, -1 )
+               IF( WNTUS .OR. WNTUO )
+     $            MAXWRK = MAX( MAXWRK, 3*N+N*
+     $                     ILAENV( 1, 'SORGBR', 'Q', M, N, N, -1 ) )
+               IF( WNTUA )
+     $            MAXWRK = MAX( MAXWRK, 3*N+M*
+     $                     ILAENV( 1, 'SORGBR', 'Q', M, M, N, -1 ) )
+               IF( .NOT.WNTVN )
+     $            MAXWRK = MAX( MAXWRK, 3*N+( N-1 )*
+     $                     ILAENV( 1, 'SORGBR', 'P', N, N, N, -1 ) )
+               MAXWRK = MAX( MAXWRK, BDSPAC )
+               MINWRK = MAX( 3*N+M, BDSPAC )
+               MAXWRK = MAX( MAXWRK, MINWRK )
+            END IF
+         ELSE
+*
+*           Compute space needed for SBDSQR
+*
+            BDSPAC = 5*M
+            IF( N.GE.MNTHR ) THEN
+               IF( WNTVN ) THEN
+*
+*                 Path 1t(N much larger than M, JOBVT='N')
+*
+                  MAXWRK = M + M*ILAENV( 1, 'SGELQF', ' ', M, N, -1,
+     $                     -1 )
+                  MAXWRK = MAX( MAXWRK, 3*M+2*M*
+     $                     ILAENV( 1, 'SGEBRD', ' ', M, M, -1, -1 ) )
+                  IF( WNTUO .OR. WNTUAS )
+     $               MAXWRK = MAX( MAXWRK, 3*M+M*
+     $                        ILAENV( 1, 'SORGBR', 'Q', M, M, M, -1 ) )
+                  MAXWRK = MAX( MAXWRK, BDSPAC )
+                  MINWRK = MAX( 4*M, BDSPAC )
+                  MAXWRK = MAX( MAXWRK, MINWRK )
+               ELSE IF( WNTVO .AND. WNTUN ) THEN
+*
+*                 Path 2t(N much larger than M, JOBU='N', JOBVT='O')
+*
+                  WRKBL = M + M*ILAENV( 1, 'SGELQF', ' ', M, N, -1, -1 )
+                  WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'SORGLQ', ' ', M,
+     $                    N, M, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*M+2*M*
+     $                    ILAENV( 1, 'SGEBRD', ' ', M, M, -1, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*M+( M-1 )*
+     $                    ILAENV( 1, 'SORGBR', 'P', M, M, M, -1 ) )
+                  WRKBL = MAX( WRKBL, BDSPAC )
+                  MAXWRK = MAX( M*M+WRKBL, M*M+M*N+M )
+                  MINWRK = MAX( 3*M+N, BDSPAC )
+                  MAXWRK = MAX( MAXWRK, MINWRK )
+               ELSE IF( WNTVO .AND. WNTUAS ) THEN
+*
+*                 Path 3t(N much larger than M, JOBU='S' or 'A',
+*                 JOBVT='O')
+*
+                  WRKBL = M + M*ILAENV( 1, 'SGELQF', ' ', M, N, -1, -1 )
+                  WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'SORGLQ', ' ', M,
+     $                    N, M, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*M+2*M*
+     $                    ILAENV( 1, 'SGEBRD', ' ', M, M, -1, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*M+( M-1 )*
+     $                    ILAENV( 1, 'SORGBR', 'P', M, M, M, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*M+M*
+     $                    ILAENV( 1, 'SORGBR', 'Q', M, M, M, -1 ) )
+                  WRKBL = MAX( WRKBL, BDSPAC )
+                  MAXWRK = MAX( M*M+WRKBL, M*M+M*N+M )
+                  MINWRK = MAX( 3*M+N, BDSPAC )
+                  MAXWRK = MAX( MAXWRK, MINWRK )
+               ELSE IF( WNTVS .AND. WNTUN ) THEN
+*
+*                 Path 4t(N much larger than M, JOBU='N', JOBVT='S')
+*
+                  WRKBL = M + M*ILAENV( 1, 'SGELQF', ' ', M, N, -1, -1 )
+                  WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'SORGLQ', ' ', M,
+     $                    N, M, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*M+2*M*
+     $                    ILAENV( 1, 'SGEBRD', ' ', M, M, -1, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*M+( M-1 )*
+     $                    ILAENV( 1, 'SORGBR', 'P', M, M, M, -1 ) )
+                  WRKBL = MAX( WRKBL, BDSPAC )
+                  MAXWRK = M*M + WRKBL
+                  MINWRK = MAX( 3*M+N, BDSPAC )
+                  MAXWRK = MAX( MAXWRK, MINWRK )
+               ELSE IF( WNTVS .AND. WNTUO ) THEN
+*
+*                 Path 5t(N much larger than M, JOBU='O', JOBVT='S')
+*
+                  WRKBL = M + M*ILAENV( 1, 'SGELQF', ' ', M, N, -1, -1 )
+                  WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'SORGLQ', ' ', M,
+     $                    N, M, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*M+2*M*
+     $                    ILAENV( 1, 'SGEBRD', ' ', M, M, -1, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*M+( M-1 )*
+     $                    ILAENV( 1, 'SORGBR', 'P', M, M, M, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*M+M*
+     $                    ILAENV( 1, 'SORGBR', 'Q', M, M, M, -1 ) )
+                  WRKBL = MAX( WRKBL, BDSPAC )
+                  MAXWRK = 2*M*M + WRKBL
+                  MINWRK = MAX( 3*M+N, BDSPAC )
+                  MAXWRK = MAX( MAXWRK, MINWRK )
+               ELSE IF( WNTVS .AND. WNTUAS ) THEN
+*
+*                 Path 6t(N much larger than M, JOBU='S' or 'A',
+*                 JOBVT='S')
+*
+                  WRKBL = M + M*ILAENV( 1, 'SGELQF', ' ', M, N, -1, -1 )
+                  WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'SORGLQ', ' ', M,
+     $                    N, M, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*M+2*M*
+     $                    ILAENV( 1, 'SGEBRD', ' ', M, M, -1, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*M+( M-1 )*
+     $                    ILAENV( 1, 'SORGBR', 'P', M, M, M, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*M+M*
+     $                    ILAENV( 1, 'SORGBR', 'Q', M, M, M, -1 ) )
+                  WRKBL = MAX( WRKBL, BDSPAC )
+                  MAXWRK = M*M + WRKBL
+                  MINWRK = MAX( 3*M+N, BDSPAC )
+                  MAXWRK = MAX( MAXWRK, MINWRK )
+               ELSE IF( WNTVA .AND. WNTUN ) THEN
+*
+*                 Path 7t(N much larger than M, JOBU='N', JOBVT='A')
+*
+                  WRKBL = M + M*ILAENV( 1, 'SGELQF', ' ', M, N, -1, -1 )
+                  WRKBL = MAX( WRKBL, M+N*ILAENV( 1, 'SORGLQ', ' ', N,
+     $                    N, M, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*M+2*M*
+     $                    ILAENV( 1, 'SGEBRD', ' ', M, M, -1, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*M+( M-1 )*
+     $                    ILAENV( 1, 'SORGBR', 'P', M, M, M, -1 ) )
+                  WRKBL = MAX( WRKBL, BDSPAC )
+                  MAXWRK = M*M + WRKBL
+                  MINWRK = MAX( 3*M+N, BDSPAC )
+                  MAXWRK = MAX( MAXWRK, MINWRK )
+               ELSE IF( WNTVA .AND. WNTUO ) THEN
+*
+*                 Path 8t(N much larger than M, JOBU='O', JOBVT='A')
+*
+                  WRKBL = M + M*ILAENV( 1, 'SGELQF', ' ', M, N, -1, -1 )
+                  WRKBL = MAX( WRKBL, M+N*ILAENV( 1, 'SORGLQ', ' ', N,
+     $                    N, M, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*M+2*M*
+     $                    ILAENV( 1, 'SGEBRD', ' ', M, M, -1, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*M+( M-1 )*
+     $                    ILAENV( 1, 'SORGBR', 'P', M, M, M, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*M+M*
+     $                    ILAENV( 1, 'SORGBR', 'Q', M, M, M, -1 ) )
+                  WRKBL = MAX( WRKBL, BDSPAC )
+                  MAXWRK = 2*M*M + WRKBL
+                  MINWRK = MAX( 3*M+N, BDSPAC )
+                  MAXWRK = MAX( MAXWRK, MINWRK )
+               ELSE IF( WNTVA .AND. WNTUAS ) THEN
+*
+*                 Path 9t(N much larger than M, JOBU='S' or 'A',
+*                 JOBVT='A')
+*
+                  WRKBL = M + M*ILAENV( 1, 'SGELQF', ' ', M, N, -1, -1 )
+                  WRKBL = MAX( WRKBL, M+N*ILAENV( 1, 'SORGLQ', ' ', N,
+     $                    N, M, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*M+2*M*
+     $                    ILAENV( 1, 'SGEBRD', ' ', M, M, -1, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*M+( M-1 )*
+     $                    ILAENV( 1, 'SORGBR', 'P', M, M, M, -1 ) )
+                  WRKBL = MAX( WRKBL, 3*M+M*
+     $                    ILAENV( 1, 'SORGBR', 'Q', M, M, M, -1 ) )
+                  WRKBL = MAX( WRKBL, BDSPAC )
+                  MAXWRK = M*M + WRKBL
+                  MINWRK = MAX( 3*M+N, BDSPAC )
+                  MAXWRK = MAX( MAXWRK, MINWRK )
+               END IF
+            ELSE
+*
+*              Path 10t(N greater than M, but not much larger)
+*
+               MAXWRK = 3*M + ( M+N )*ILAENV( 1, 'SGEBRD', ' ', M, N,
+     $                  -1, -1 )
+               IF( WNTVS .OR. WNTVO )
+     $            MAXWRK = MAX( MAXWRK, 3*M+M*
+     $                     ILAENV( 1, 'SORGBR', 'P', M, N, M, -1 ) )
+               IF( WNTVA )
+     $            MAXWRK = MAX( MAXWRK, 3*M+N*
+     $                     ILAENV( 1, 'SORGBR', 'P', N, N, M, -1 ) )
+               IF( .NOT.WNTUN )
+     $            MAXWRK = MAX( MAXWRK, 3*M+( M-1 )*
+     $                     ILAENV( 1, 'SORGBR', 'Q', M, M, M, -1 ) )
+               MAXWRK = MAX( MAXWRK, BDSPAC )
+               MINWRK = MAX( 3*M+N, BDSPAC )
+               MAXWRK = MAX( MAXWRK, MINWRK )
+            END IF
+         END IF
+         WORK( 1 ) = MAXWRK
+      END IF
+*
+      IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN
+         INFO = -13
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SGESVD', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 ) THEN
+         IF( LWORK.GE.1 )
+     $      WORK( 1 ) = ONE
+         RETURN
+      END IF
+*
+*     Get machine constants
+*
+      EPS = SLAMCH( 'P' )
+      SMLNUM = SQRT( SLAMCH( 'S' ) ) / EPS
+      BIGNUM = ONE / SMLNUM
+*
+*     Scale A if max element outside range [SMLNUM,BIGNUM]
+*
+      ANRM = SLANGE( 'M', M, N, A, LDA, DUM )
+      ISCL = 0
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
+         ISCL = 1
+         CALL SLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, IERR )
+      ELSE IF( ANRM.GT.BIGNUM ) THEN
+         ISCL = 1
+         CALL SLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, IERR )
+      END IF
+*
+      IF( M.GE.N ) THEN
+*
+*        A has at least as many rows as columns. If A has sufficiently
+*        more rows than columns, first reduce using the QR
+*        decomposition (if sufficient workspace available)
+*
+         IF( M.GE.MNTHR ) THEN
+*
+            IF( WNTUN ) THEN
+*
+*              Path 1 (M much larger than N, JOBU='N')
+*              No left singular vectors to be computed
+*
+               ITAU = 1
+               IWORK = ITAU + N
+*
+*              Compute A=Q*R
+*              (Workspace: need 2*N, prefer N+N*NB)
+*
+               CALL SGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ),
+     $                      LWORK-IWORK+1, IERR )
+*
+*              Zero out below R
+*
+               CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ), LDA )
+               IE = 1
+               ITAUQ = IE + N
+               ITAUP = ITAUQ + N
+               IWORK = ITAUP + N
+*
+*              Bidiagonalize R in A
+*              (Workspace: need 4*N, prefer 3*N+2*N*NB)
+*
+               CALL SGEBRD( N, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
+     $                      WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
+     $                      IERR )
+               NCVT = 0
+               IF( WNTVO .OR. WNTVAS ) THEN
+*
+*                 If right singular vectors desired, generate P'.
+*                 (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
+*
+                  CALL SORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+                  NCVT = N
+               END IF
+               IWORK = IE + N
+*
+*              Perform bidiagonal QR iteration, computing right
+*              singular vectors of A in A if desired
+*              (Workspace: need BDSPAC)
+*
+               CALL SBDSQR( 'U', N, NCVT, 0, 0, S, WORK( IE ), A, LDA,
+     $                      DUM, 1, DUM, 1, WORK( IWORK ), INFO )
+*
+*              If right singular vectors desired in VT, copy them there
+*
+               IF( WNTVAS )
+     $            CALL SLACPY( 'F', N, N, A, LDA, VT, LDVT )
+*
+            ELSE IF( WNTUO .AND. WNTVN ) THEN
+*
+*              Path 2 (M much larger than N, JOBU='O', JOBVT='N')
+*              N left singular vectors to be overwritten on A and
+*              no right singular vectors to be computed
+*
+               IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN
+*
+*                 Sufficient workspace for a fast algorithm
+*
+                  IR = 1
+                  IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+LDA*N ) THEN
+*
+*                    WORK(IU) is LDA by N, WORK(IR) is LDA by N
+*
+                     LDWRKU = LDA
+                     LDWRKR = LDA
+                  ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+N*N ) THEN
+*
+*                    WORK(IU) is LDA by N, WORK(IR) is N by N
+*
+                     LDWRKU = LDA
+                     LDWRKR = N
+                  ELSE
+*
+*                    WORK(IU) is LDWRKU by N, WORK(IR) is N by N
+*
+                     LDWRKU = ( LWORK-N*N-N ) / N
+                     LDWRKR = N
+                  END IF
+                  ITAU = IR + LDWRKR*N
+                  IWORK = ITAU + N
+*
+*                 Compute A=Q*R
+*                 (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+*
+                  CALL SGEQRF( M, N, A, LDA, WORK( ITAU ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                 Copy R to WORK(IR) and zero out below it
+*
+                  CALL SLACPY( 'U', N, N, A, LDA, WORK( IR ), LDWRKR )
+                  CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, WORK( IR+1 ),
+     $                         LDWRKR )
+*
+*                 Generate Q in A
+*                 (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+*
+                  CALL SORGQR( M, N, N, A, LDA, WORK( ITAU ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+                  IE = ITAU
+                  ITAUQ = IE + N
+                  ITAUP = ITAUQ + N
+                  IWORK = ITAUP + N
+*
+*                 Bidiagonalize R in WORK(IR)
+*                 (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
+*
+                  CALL SGEBRD( N, N, WORK( IR ), LDWRKR, S, WORK( IE ),
+     $                         WORK( ITAUQ ), WORK( ITAUP ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                 Generate left vectors bidiagonalizing R
+*                 (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
+*
+                  CALL SORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
+     $                         WORK( ITAUQ ), WORK( IWORK ),
+     $                         LWORK-IWORK+1, IERR )
+                  IWORK = IE + N
+*
+*                 Perform bidiagonal QR iteration, computing left
+*                 singular vectors of R in WORK(IR)
+*                 (Workspace: need N*N+BDSPAC)
+*
+                  CALL SBDSQR( 'U', N, 0, N, 0, S, WORK( IE ), DUM, 1,
+     $                         WORK( IR ), LDWRKR, DUM, 1,
+     $                         WORK( IWORK ), INFO )
+                  IU = IE + N
+*
+*                 Multiply Q in A by left singular vectors of R in
+*                 WORK(IR), storing result in WORK(IU) and copying to A
+*                 (Workspace: need N*N+2*N, prefer N*N+M*N+N)
+*
+                  DO 10 I = 1, M, LDWRKU
+                     CHUNK = MIN( M-I+1, LDWRKU )
+                     CALL SGEMM( 'N', 'N', CHUNK, N, N, ONE, A( I, 1 ),
+     $                           LDA, WORK( IR ), LDWRKR, ZERO,
+     $                           WORK( IU ), LDWRKU )
+                     CALL SLACPY( 'F', CHUNK, N, WORK( IU ), LDWRKU,
+     $                            A( I, 1 ), LDA )
+   10             CONTINUE
+*
+               ELSE
+*
+*                 Insufficient workspace for a fast algorithm
+*
+                  IE = 1
+                  ITAUQ = IE + N
+                  ITAUP = ITAUQ + N
+                  IWORK = ITAUP + N
+*
+*                 Bidiagonalize A
+*                 (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB)
+*
+                  CALL SGEBRD( M, N, A, LDA, S, WORK( IE ),
+     $                         WORK( ITAUQ ), WORK( ITAUP ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                 Generate left vectors bidiagonalizing A
+*                 (Workspace: need 4*N, prefer 3*N+N*NB)
+*
+                  CALL SORGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+                  IWORK = IE + N
+*
+*                 Perform bidiagonal QR iteration, computing left
+*                 singular vectors of A in A
+*                 (Workspace: need BDSPAC)
+*
+                  CALL SBDSQR( 'U', N, 0, M, 0, S, WORK( IE ), DUM, 1,
+     $                         A, LDA, DUM, 1, WORK( IWORK ), INFO )
+*
+               END IF
+*
+            ELSE IF( WNTUO .AND. WNTVAS ) THEN
+*
+*              Path 3 (M much larger than N, JOBU='O', JOBVT='S' or 'A')
+*              N left singular vectors to be overwritten on A and
+*              N right singular vectors to be computed in VT
+*
+               IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN
+*
+*                 Sufficient workspace for a fast algorithm
+*
+                  IR = 1
+                  IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+LDA*N ) THEN
+*
+*                    WORK(IU) is LDA by N and WORK(IR) is LDA by N
+*
+                     LDWRKU = LDA
+                     LDWRKR = LDA
+                  ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+N*N ) THEN
+*
+*                    WORK(IU) is LDA by N and WORK(IR) is N by N
+*
+                     LDWRKU = LDA
+                     LDWRKR = N
+                  ELSE
+*
+*                    WORK(IU) is LDWRKU by N and WORK(IR) is N by N
+*
+                     LDWRKU = ( LWORK-N*N-N ) / N
+                     LDWRKR = N
+                  END IF
+                  ITAU = IR + LDWRKR*N
+                  IWORK = ITAU + N
+*
+*                 Compute A=Q*R
+*                 (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+*
+                  CALL SGEQRF( M, N, A, LDA, WORK( ITAU ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                 Copy R to VT, zeroing out below it
+*
+                  CALL SLACPY( 'U', N, N, A, LDA, VT, LDVT )
+                  CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, VT( 2, 1 ),
+     $                         LDVT )
+*
+*                 Generate Q in A
+*                 (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+*
+                  CALL SORGQR( M, N, N, A, LDA, WORK( ITAU ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+                  IE = ITAU
+                  ITAUQ = IE + N
+                  ITAUP = ITAUQ + N
+                  IWORK = ITAUP + N
+*
+*                 Bidiagonalize R in VT, copying result to WORK(IR)
+*                 (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
+*
+                  CALL SGEBRD( N, N, VT, LDVT, S, WORK( IE ),
+     $                         WORK( ITAUQ ), WORK( ITAUP ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+                  CALL SLACPY( 'L', N, N, VT, LDVT, WORK( IR ), LDWRKR )
+*
+*                 Generate left vectors bidiagonalizing R in WORK(IR)
+*                 (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
+*
+                  CALL SORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
+     $                         WORK( ITAUQ ), WORK( IWORK ),
+     $                         LWORK-IWORK+1, IERR )
+*
+*                 Generate right vectors bidiagonalizing R in VT
+*                 (Workspace: need N*N+4*N-1, prefer N*N+3*N+(N-1)*NB)
+*
+                  CALL SORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+                  IWORK = IE + N
+*
+*                 Perform bidiagonal QR iteration, computing left
+*                 singular vectors of R in WORK(IR) and computing right
+*                 singular vectors of R in VT
+*                 (Workspace: need N*N+BDSPAC)
+*
+                  CALL SBDSQR( 'U', N, N, N, 0, S, WORK( IE ), VT, LDVT,
+     $                         WORK( IR ), LDWRKR, DUM, 1,
+     $                         WORK( IWORK ), INFO )
+                  IU = IE + N
+*
+*                 Multiply Q in A by left singular vectors of R in
+*                 WORK(IR), storing result in WORK(IU) and copying to A
+*                 (Workspace: need N*N+2*N, prefer N*N+M*N+N)
+*
+                  DO 20 I = 1, M, LDWRKU
+                     CHUNK = MIN( M-I+1, LDWRKU )
+                     CALL SGEMM( 'N', 'N', CHUNK, N, N, ONE, A( I, 1 ),
+     $                           LDA, WORK( IR ), LDWRKR, ZERO,
+     $                           WORK( IU ), LDWRKU )
+                     CALL SLACPY( 'F', CHUNK, N, WORK( IU ), LDWRKU,
+     $                            A( I, 1 ), LDA )
+   20             CONTINUE
+*
+               ELSE
+*
+*                 Insufficient workspace for a fast algorithm
+*
+                  ITAU = 1
+                  IWORK = ITAU + N
+*
+*                 Compute A=Q*R
+*                 (Workspace: need 2*N, prefer N+N*NB)
+*
+                  CALL SGEQRF( M, N, A, LDA, WORK( ITAU ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                 Copy R to VT, zeroing out below it
+*
+                  CALL SLACPY( 'U', N, N, A, LDA, VT, LDVT )
+                  CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, VT( 2, 1 ),
+     $                         LDVT )
+*
+*                 Generate Q in A
+*                 (Workspace: need 2*N, prefer N+N*NB)
+*
+                  CALL SORGQR( M, N, N, A, LDA, WORK( ITAU ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+                  IE = ITAU
+                  ITAUQ = IE + N
+                  ITAUP = ITAUQ + N
+                  IWORK = ITAUP + N
+*
+*                 Bidiagonalize R in VT
+*                 (Workspace: need 4*N, prefer 3*N+2*N*NB)
+*
+                  CALL SGEBRD( N, N, VT, LDVT, S, WORK( IE ),
+     $                         WORK( ITAUQ ), WORK( ITAUP ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                 Multiply Q in A by left vectors bidiagonalizing R
+*                 (Workspace: need 3*N+M, prefer 3*N+M*NB)
+*
+                  CALL SORMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT,
+     $                         WORK( ITAUQ ), A, LDA, WORK( IWORK ),
+     $                         LWORK-IWORK+1, IERR )
+*
+*                 Generate right vectors bidiagonalizing R in VT
+*                 (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
+*
+                  CALL SORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+                  IWORK = IE + N
+*
+*                 Perform bidiagonal QR iteration, computing left
+*                 singular vectors of A in A and computing right
+*                 singular vectors of A in VT
+*                 (Workspace: need BDSPAC)
+*
+                  CALL SBDSQR( 'U', N, N, M, 0, S, WORK( IE ), VT, LDVT,
+     $                         A, LDA, DUM, 1, WORK( IWORK ), INFO )
+*
+               END IF
+*
+            ELSE IF( WNTUS ) THEN
+*
+               IF( WNTVN ) THEN
+*
+*                 Path 4 (M much larger than N, JOBU='S', JOBVT='N')
+*                 N left singular vectors to be computed in U and
+*                 no right singular vectors to be computed
+*
+                  IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN
+*
+*                    Sufficient workspace for a fast algorithm
+*
+                     IR = 1
+                     IF( LWORK.GE.WRKBL+LDA*N ) THEN
+*
+*                       WORK(IR) is LDA by N
+*
+                        LDWRKR = LDA
+                     ELSE
+*
+*                       WORK(IR) is N by N
+*
+                        LDWRKR = N
+                     END IF
+                     ITAU = IR + LDWRKR*N
+                     IWORK = ITAU + N
+*
+*                    Compute A=Q*R
+*                    (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+*
+                     CALL SGEQRF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Copy R to WORK(IR), zeroing out below it
+*
+                     CALL SLACPY( 'U', N, N, A, LDA, WORK( IR ),
+     $                            LDWRKR )
+                     CALL SLASET( 'L', N-1, N-1, ZERO, ZERO,
+     $                            WORK( IR+1 ), LDWRKR )
+*
+*                    Generate Q in A
+*                    (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+*
+                     CALL SORGQR( M, N, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IE = ITAU
+                     ITAUQ = IE + N
+                     ITAUP = ITAUQ + N
+                     IWORK = ITAUP + N
+*
+*                    Bidiagonalize R in WORK(IR)
+*                    (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
+*
+                     CALL SGEBRD( N, N, WORK( IR ), LDWRKR, S,
+     $                            WORK( IE ), WORK( ITAUQ ),
+     $                            WORK( ITAUP ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+*
+*                    Generate left vectors bidiagonalizing R in WORK(IR)
+*                    (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
+*
+                     CALL SORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
+     $                            WORK( ITAUQ ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+                     IWORK = IE + N
+*
+*                    Perform bidiagonal QR iteration, computing left
+*                    singular vectors of R in WORK(IR)
+*                    (Workspace: need N*N+BDSPAC)
+*
+                     CALL SBDSQR( 'U', N, 0, N, 0, S, WORK( IE ), DUM,
+     $                            1, WORK( IR ), LDWRKR, DUM, 1,
+     $                            WORK( IWORK ), INFO )
+*
+*                    Multiply Q in A by left singular vectors of R in
+*                    WORK(IR), storing result in U
+*                    (Workspace: need N*N)
+*
+                     CALL SGEMM( 'N', 'N', M, N, N, ONE, A, LDA,
+     $                           WORK( IR ), LDWRKR, ZERO, U, LDU )
+*
+                  ELSE
+*
+*                    Insufficient workspace for a fast algorithm
+*
+                     ITAU = 1
+                     IWORK = ITAU + N
+*
+*                    Compute A=Q*R, copying result to U
+*                    (Workspace: need 2*N, prefer N+N*NB)
+*
+                     CALL SGEQRF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     CALL SLACPY( 'L', M, N, A, LDA, U, LDU )
+*
+*                    Generate Q in U
+*                    (Workspace: need 2*N, prefer N+N*NB)
+*
+                     CALL SORGQR( M, N, N, U, LDU, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IE = ITAU
+                     ITAUQ = IE + N
+                     ITAUP = ITAUQ + N
+                     IWORK = ITAUP + N
+*
+*                    Zero out below R in A
+*
+                     CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ),
+     $                            LDA )
+*
+*                    Bidiagonalize R in A
+*                    (Workspace: need 4*N, prefer 3*N+2*N*NB)
+*
+                     CALL SGEBRD( N, N, A, LDA, S, WORK( IE ),
+     $                            WORK( ITAUQ ), WORK( ITAUP ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Multiply Q in U by left vectors bidiagonalizing R
+*                    (Workspace: need 3*N+M, prefer 3*N+M*NB)
+*
+                     CALL SORMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
+     $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+                     IWORK = IE + N
+*
+*                    Perform bidiagonal QR iteration, computing left
+*                    singular vectors of A in U
+*                    (Workspace: need BDSPAC)
+*
+                     CALL SBDSQR( 'U', N, 0, M, 0, S, WORK( IE ), DUM,
+     $                            1, U, LDU, DUM, 1, WORK( IWORK ),
+     $                            INFO )
+*
+                  END IF
+*
+               ELSE IF( WNTVO ) THEN
+*
+*                 Path 5 (M much larger than N, JOBU='S', JOBVT='O')
+*                 N left singular vectors to be computed in U and
+*                 N right singular vectors to be overwritten on A
+*
+                  IF( LWORK.GE.2*N*N+MAX( 4*N, BDSPAC ) ) THEN
+*
+*                    Sufficient workspace for a fast algorithm
+*
+                     IU = 1
+                     IF( LWORK.GE.WRKBL+2*LDA*N ) THEN
+*
+*                       WORK(IU) is LDA by N and WORK(IR) is LDA by N
+*
+                        LDWRKU = LDA
+                        IR = IU + LDWRKU*N
+                        LDWRKR = LDA
+                     ELSE IF( LWORK.GE.WRKBL+( LDA+N )*N ) THEN
+*
+*                       WORK(IU) is LDA by N and WORK(IR) is N by N
+*
+                        LDWRKU = LDA
+                        IR = IU + LDWRKU*N
+                        LDWRKR = N
+                     ELSE
+*
+*                       WORK(IU) is N by N and WORK(IR) is N by N
+*
+                        LDWRKU = N
+                        IR = IU + LDWRKU*N
+                        LDWRKR = N
+                     END IF
+                     ITAU = IR + LDWRKR*N
+                     IWORK = ITAU + N
+*
+*                    Compute A=Q*R
+*                    (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
+*
+                     CALL SGEQRF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Copy R to WORK(IU), zeroing out below it
+*
+                     CALL SLACPY( 'U', N, N, A, LDA, WORK( IU ),
+     $                            LDWRKU )
+                     CALL SLASET( 'L', N-1, N-1, ZERO, ZERO,
+     $                            WORK( IU+1 ), LDWRKU )
+*
+*                    Generate Q in A
+*                    (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
+*
+                     CALL SORGQR( M, N, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IE = ITAU
+                     ITAUQ = IE + N
+                     ITAUP = ITAUQ + N
+                     IWORK = ITAUP + N
+*
+*                    Bidiagonalize R in WORK(IU), copying result to
+*                    WORK(IR)
+*                    (Workspace: need 2*N*N+4*N,
+*                                prefer 2*N*N+3*N+2*N*NB)
+*
+                     CALL SGEBRD( N, N, WORK( IU ), LDWRKU, S,
+     $                            WORK( IE ), WORK( ITAUQ ),
+     $                            WORK( ITAUP ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+                     CALL SLACPY( 'U', N, N, WORK( IU ), LDWRKU,
+     $                            WORK( IR ), LDWRKR )
+*
+*                    Generate left bidiagonalizing vectors in WORK(IU)
+*                    (Workspace: need 2*N*N+4*N, prefer 2*N*N+3*N+N*NB)
+*
+                     CALL SORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
+     $                            WORK( ITAUQ ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+*
+*                    Generate right bidiagonalizing vectors in WORK(IR)
+*                    (Workspace: need 2*N*N+4*N-1,
+*                                prefer 2*N*N+3*N+(N-1)*NB)
+*
+                     CALL SORGBR( 'P', N, N, N, WORK( IR ), LDWRKR,
+     $                            WORK( ITAUP ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+                     IWORK = IE + N
+*
+*                    Perform bidiagonal QR iteration, computing left
+*                    singular vectors of R in WORK(IU) and computing
+*                    right singular vectors of R in WORK(IR)
+*                    (Workspace: need 2*N*N+BDSPAC)
+*
+                     CALL SBDSQR( 'U', N, N, N, 0, S, WORK( IE ),
+     $                            WORK( IR ), LDWRKR, WORK( IU ),
+     $                            LDWRKU, DUM, 1, WORK( IWORK ), INFO )
+*
+*                    Multiply Q in A by left singular vectors of R in
+*                    WORK(IU), storing result in U
+*                    (Workspace: need N*N)
+*
+                     CALL SGEMM( 'N', 'N', M, N, N, ONE, A, LDA,
+     $                           WORK( IU ), LDWRKU, ZERO, U, LDU )
+*
+*                    Copy right singular vectors of R to A
+*                    (Workspace: need N*N)
+*
+                     CALL SLACPY( 'F', N, N, WORK( IR ), LDWRKR, A,
+     $                            LDA )
+*
+                  ELSE
+*
+*                    Insufficient workspace for a fast algorithm
+*
+                     ITAU = 1
+                     IWORK = ITAU + N
+*
+*                    Compute A=Q*R, copying result to U
+*                    (Workspace: need 2*N, prefer N+N*NB)
+*
+                     CALL SGEQRF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     CALL SLACPY( 'L', M, N, A, LDA, U, LDU )
+*
+*                    Generate Q in U
+*                    (Workspace: need 2*N, prefer N+N*NB)
+*
+                     CALL SORGQR( M, N, N, U, LDU, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IE = ITAU
+                     ITAUQ = IE + N
+                     ITAUP = ITAUQ + N
+                     IWORK = ITAUP + N
+*
+*                    Zero out below R in A
+*
+                     CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ),
+     $                            LDA )
+*
+*                    Bidiagonalize R in A
+*                    (Workspace: need 4*N, prefer 3*N+2*N*NB)
+*
+                     CALL SGEBRD( N, N, A, LDA, S, WORK( IE ),
+     $                            WORK( ITAUQ ), WORK( ITAUP ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Multiply Q in U by left vectors bidiagonalizing R
+*                    (Workspace: need 3*N+M, prefer 3*N+M*NB)
+*
+                     CALL SORMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
+     $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+*
+*                    Generate right vectors bidiagonalizing R in A
+*                    (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
+*
+                     CALL SORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IWORK = IE + N
+*
+*                    Perform bidiagonal QR iteration, computing left
+*                    singular vectors of A in U and computing right
+*                    singular vectors of A in A
+*                    (Workspace: need BDSPAC)
+*
+                     CALL SBDSQR( 'U', N, N, M, 0, S, WORK( IE ), A,
+     $                            LDA, U, LDU, DUM, 1, WORK( IWORK ),
+     $                            INFO )
+*
+                  END IF
+*
+               ELSE IF( WNTVAS ) THEN
+*
+*                 Path 6 (M much larger than N, JOBU='S', JOBVT='S'
+*                         or 'A')
+*                 N left singular vectors to be computed in U and
+*                 N right singular vectors to be computed in VT
+*
+                  IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN
+*
+*                    Sufficient workspace for a fast algorithm
+*
+                     IU = 1
+                     IF( LWORK.GE.WRKBL+LDA*N ) THEN
+*
+*                       WORK(IU) is LDA by N
+*
+                        LDWRKU = LDA
+                     ELSE
+*
+*                       WORK(IU) is N by N
+*
+                        LDWRKU = N
+                     END IF
+                     ITAU = IU + LDWRKU*N
+                     IWORK = ITAU + N
+*
+*                    Compute A=Q*R
+*                    (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+*
+                     CALL SGEQRF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Copy R to WORK(IU), zeroing out below it
+*
+                     CALL SLACPY( 'U', N, N, A, LDA, WORK( IU ),
+     $                            LDWRKU )
+                     CALL SLASET( 'L', N-1, N-1, ZERO, ZERO,
+     $                            WORK( IU+1 ), LDWRKU )
+*
+*                    Generate Q in A
+*                    (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+*
+                     CALL SORGQR( M, N, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IE = ITAU
+                     ITAUQ = IE + N
+                     ITAUP = ITAUQ + N
+                     IWORK = ITAUP + N
+*
+*                    Bidiagonalize R in WORK(IU), copying result to VT
+*                    (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
+*
+                     CALL SGEBRD( N, N, WORK( IU ), LDWRKU, S,
+     $                            WORK( IE ), WORK( ITAUQ ),
+     $                            WORK( ITAUP ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+                     CALL SLACPY( 'U', N, N, WORK( IU ), LDWRKU, VT,
+     $                            LDVT )
+*
+*                    Generate left bidiagonalizing vectors in WORK(IU)
+*                    (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
+*
+                     CALL SORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
+     $                            WORK( ITAUQ ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+*
+*                    Generate right bidiagonalizing vectors in VT
+*                    (Workspace: need N*N+4*N-1,
+*                                prefer N*N+3*N+(N-1)*NB)
+*
+                     CALL SORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IWORK = IE + N
+*
+*                    Perform bidiagonal QR iteration, computing left
+*                    singular vectors of R in WORK(IU) and computing
+*                    right singular vectors of R in VT
+*                    (Workspace: need N*N+BDSPAC)
+*
+                     CALL SBDSQR( 'U', N, N, N, 0, S, WORK( IE ), VT,
+     $                            LDVT, WORK( IU ), LDWRKU, DUM, 1,
+     $                            WORK( IWORK ), INFO )
+*
+*                    Multiply Q in A by left singular vectors of R in
+*                    WORK(IU), storing result in U
+*                    (Workspace: need N*N)
+*
+                     CALL SGEMM( 'N', 'N', M, N, N, ONE, A, LDA,
+     $                           WORK( IU ), LDWRKU, ZERO, U, LDU )
+*
+                  ELSE
+*
+*                    Insufficient workspace for a fast algorithm
+*
+                     ITAU = 1
+                     IWORK = ITAU + N
+*
+*                    Compute A=Q*R, copying result to U
+*                    (Workspace: need 2*N, prefer N+N*NB)
+*
+                     CALL SGEQRF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     CALL SLACPY( 'L', M, N, A, LDA, U, LDU )
+*
+*                    Generate Q in U
+*                    (Workspace: need 2*N, prefer N+N*NB)
+*
+                     CALL SORGQR( M, N, N, U, LDU, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Copy R to VT, zeroing out below it
+*
+                     CALL SLACPY( 'U', N, N, A, LDA, VT, LDVT )
+                     CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, VT( 2, 1 ),
+     $                            LDVT )
+                     IE = ITAU
+                     ITAUQ = IE + N
+                     ITAUP = ITAUQ + N
+                     IWORK = ITAUP + N
+*
+*                    Bidiagonalize R in VT
+*                    (Workspace: need 4*N, prefer 3*N+2*N*NB)
+*
+                     CALL SGEBRD( N, N, VT, LDVT, S, WORK( IE ),
+     $                            WORK( ITAUQ ), WORK( ITAUP ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Multiply Q in U by left bidiagonalizing vectors
+*                    in VT
+*                    (Workspace: need 3*N+M, prefer 3*N+M*NB)
+*
+                     CALL SORMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT,
+     $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+*
+*                    Generate right bidiagonalizing vectors in VT
+*                    (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
+*
+                     CALL SORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IWORK = IE + N
+*
+*                    Perform bidiagonal QR iteration, computing left
+*                    singular vectors of A in U and computing right
+*                    singular vectors of A in VT
+*                    (Workspace: need BDSPAC)
+*
+                     CALL SBDSQR( 'U', N, N, M, 0, S, WORK( IE ), VT,
+     $                            LDVT, U, LDU, DUM, 1, WORK( IWORK ),
+     $                            INFO )
+*
+                  END IF
+*
+               END IF
+*
+            ELSE IF( WNTUA ) THEN
+*
+               IF( WNTVN ) THEN
+*
+*                 Path 7 (M much larger than N, JOBU='A', JOBVT='N')
+*                 M left singular vectors to be computed in U and
+*                 no right singular vectors to be computed
+*
+                  IF( LWORK.GE.N*N+MAX( N+M, 4*N, BDSPAC ) ) THEN
+*
+*                    Sufficient workspace for a fast algorithm
+*
+                     IR = 1
+                     IF( LWORK.GE.WRKBL+LDA*N ) THEN
+*
+*                       WORK(IR) is LDA by N
+*
+                        LDWRKR = LDA
+                     ELSE
+*
+*                       WORK(IR) is N by N
+*
+                        LDWRKR = N
+                     END IF
+                     ITAU = IR + LDWRKR*N
+                     IWORK = ITAU + N
+*
+*                    Compute A=Q*R, copying result to U
+*                    (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+*
+                     CALL SGEQRF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     CALL SLACPY( 'L', M, N, A, LDA, U, LDU )
+*
+*                    Copy R to WORK(IR), zeroing out below it
+*
+                     CALL SLACPY( 'U', N, N, A, LDA, WORK( IR ),
+     $                            LDWRKR )
+                     CALL SLASET( 'L', N-1, N-1, ZERO, ZERO,
+     $                            WORK( IR+1 ), LDWRKR )
+*
+*                    Generate Q in U
+*                    (Workspace: need N*N+N+M, prefer N*N+N+M*NB)
+*
+                     CALL SORGQR( M, M, N, U, LDU, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IE = ITAU
+                     ITAUQ = IE + N
+                     ITAUP = ITAUQ + N
+                     IWORK = ITAUP + N
+*
+*                    Bidiagonalize R in WORK(IR)
+*                    (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
+*
+                     CALL SGEBRD( N, N, WORK( IR ), LDWRKR, S,
+     $                            WORK( IE ), WORK( ITAUQ ),
+     $                            WORK( ITAUP ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+*
+*                    Generate left bidiagonalizing vectors in WORK(IR)
+*                    (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
+*
+                     CALL SORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
+     $                            WORK( ITAUQ ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+                     IWORK = IE + N
+*
+*                    Perform bidiagonal QR iteration, computing left
+*                    singular vectors of R in WORK(IR)
+*                    (Workspace: need N*N+BDSPAC)
+*
+                     CALL SBDSQR( 'U', N, 0, N, 0, S, WORK( IE ), DUM,
+     $                            1, WORK( IR ), LDWRKR, DUM, 1,
+     $                            WORK( IWORK ), INFO )
+*
+*                    Multiply Q in U by left singular vectors of R in
+*                    WORK(IR), storing result in A
+*                    (Workspace: need N*N)
+*
+                     CALL SGEMM( 'N', 'N', M, N, N, ONE, U, LDU,
+     $                           WORK( IR ), LDWRKR, ZERO, A, LDA )
+*
+*                    Copy left singular vectors of A from A to U
+*
+                     CALL SLACPY( 'F', M, N, A, LDA, U, LDU )
+*
+                  ELSE
+*
+*                    Insufficient workspace for a fast algorithm
+*
+                     ITAU = 1
+                     IWORK = ITAU + N
+*
+*                    Compute A=Q*R, copying result to U
+*                    (Workspace: need 2*N, prefer N+N*NB)
+*
+                     CALL SGEQRF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     CALL SLACPY( 'L', M, N, A, LDA, U, LDU )
+*
+*                    Generate Q in U
+*                    (Workspace: need N+M, prefer N+M*NB)
+*
+                     CALL SORGQR( M, M, N, U, LDU, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IE = ITAU
+                     ITAUQ = IE + N
+                     ITAUP = ITAUQ + N
+                     IWORK = ITAUP + N
+*
+*                    Zero out below R in A
+*
+                     CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ),
+     $                            LDA )
+*
+*                    Bidiagonalize R in A
+*                    (Workspace: need 4*N, prefer 3*N+2*N*NB)
+*
+                     CALL SGEBRD( N, N, A, LDA, S, WORK( IE ),
+     $                            WORK( ITAUQ ), WORK( ITAUP ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Multiply Q in U by left bidiagonalizing vectors
+*                    in A
+*                    (Workspace: need 3*N+M, prefer 3*N+M*NB)
+*
+                     CALL SORMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
+     $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+                     IWORK = IE + N
+*
+*                    Perform bidiagonal QR iteration, computing left
+*                    singular vectors of A in U
+*                    (Workspace: need BDSPAC)
+*
+                     CALL SBDSQR( 'U', N, 0, M, 0, S, WORK( IE ), DUM,
+     $                            1, U, LDU, DUM, 1, WORK( IWORK ),
+     $                            INFO )
+*
+                  END IF
+*
+               ELSE IF( WNTVO ) THEN
+*
+*                 Path 8 (M much larger than N, JOBU='A', JOBVT='O')
+*                 M left singular vectors to be computed in U and
+*                 N right singular vectors to be overwritten on A
+*
+                  IF( LWORK.GE.2*N*N+MAX( N+M, 4*N, BDSPAC ) ) THEN
+*
+*                    Sufficient workspace for a fast algorithm
+*
+                     IU = 1
+                     IF( LWORK.GE.WRKBL+2*LDA*N ) THEN
+*
+*                       WORK(IU) is LDA by N and WORK(IR) is LDA by N
+*
+                        LDWRKU = LDA
+                        IR = IU + LDWRKU*N
+                        LDWRKR = LDA
+                     ELSE IF( LWORK.GE.WRKBL+( LDA+N )*N ) THEN
+*
+*                       WORK(IU) is LDA by N and WORK(IR) is N by N
+*
+                        LDWRKU = LDA
+                        IR = IU + LDWRKU*N
+                        LDWRKR = N
+                     ELSE
+*
+*                       WORK(IU) is N by N and WORK(IR) is N by N
+*
+                        LDWRKU = N
+                        IR = IU + LDWRKU*N
+                        LDWRKR = N
+                     END IF
+                     ITAU = IR + LDWRKR*N
+                     IWORK = ITAU + N
+*
+*                    Compute A=Q*R, copying result to U
+*                    (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
+*
+                     CALL SGEQRF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     CALL SLACPY( 'L', M, N, A, LDA, U, LDU )
+*
+*                    Generate Q in U
+*                    (Workspace: need 2*N*N+N+M, prefer 2*N*N+N+M*NB)
+*
+                     CALL SORGQR( M, M, N, U, LDU, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Copy R to WORK(IU), zeroing out below it
+*
+                     CALL SLACPY( 'U', N, N, A, LDA, WORK( IU ),
+     $                            LDWRKU )
+                     CALL SLASET( 'L', N-1, N-1, ZERO, ZERO,
+     $                            WORK( IU+1 ), LDWRKU )
+                     IE = ITAU
+                     ITAUQ = IE + N
+                     ITAUP = ITAUQ + N
+                     IWORK = ITAUP + N
+*
+*                    Bidiagonalize R in WORK(IU), copying result to
+*                    WORK(IR)
+*                    (Workspace: need 2*N*N+4*N,
+*                                prefer 2*N*N+3*N+2*N*NB)
+*
+                     CALL SGEBRD( N, N, WORK( IU ), LDWRKU, S,
+     $                            WORK( IE ), WORK( ITAUQ ),
+     $                            WORK( ITAUP ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+                     CALL SLACPY( 'U', N, N, WORK( IU ), LDWRKU,
+     $                            WORK( IR ), LDWRKR )
+*
+*                    Generate left bidiagonalizing vectors in WORK(IU)
+*                    (Workspace: need 2*N*N+4*N, prefer 2*N*N+3*N+N*NB)
+*
+                     CALL SORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
+     $                            WORK( ITAUQ ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+*
+*                    Generate right bidiagonalizing vectors in WORK(IR)
+*                    (Workspace: need 2*N*N+4*N-1,
+*                                prefer 2*N*N+3*N+(N-1)*NB)
+*
+                     CALL SORGBR( 'P', N, N, N, WORK( IR ), LDWRKR,
+     $                            WORK( ITAUP ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+                     IWORK = IE + N
+*
+*                    Perform bidiagonal QR iteration, computing left
+*                    singular vectors of R in WORK(IU) and computing
+*                    right singular vectors of R in WORK(IR)
+*                    (Workspace: need 2*N*N+BDSPAC)
+*
+                     CALL SBDSQR( 'U', N, N, N, 0, S, WORK( IE ),
+     $                            WORK( IR ), LDWRKR, WORK( IU ),
+     $                            LDWRKU, DUM, 1, WORK( IWORK ), INFO )
+*
+*                    Multiply Q in U by left singular vectors of R in
+*                    WORK(IU), storing result in A
+*                    (Workspace: need N*N)
+*
+                     CALL SGEMM( 'N', 'N', M, N, N, ONE, U, LDU,
+     $                           WORK( IU ), LDWRKU, ZERO, A, LDA )
+*
+*                    Copy left singular vectors of A from A to U
+*
+                     CALL SLACPY( 'F', M, N, A, LDA, U, LDU )
+*
+*                    Copy right singular vectors of R from WORK(IR) to A
+*
+                     CALL SLACPY( 'F', N, N, WORK( IR ), LDWRKR, A,
+     $                            LDA )
+*
+                  ELSE
+*
+*                    Insufficient workspace for a fast algorithm
+*
+                     ITAU = 1
+                     IWORK = ITAU + N
+*
+*                    Compute A=Q*R, copying result to U
+*                    (Workspace: need 2*N, prefer N+N*NB)
+*
+                     CALL SGEQRF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     CALL SLACPY( 'L', M, N, A, LDA, U, LDU )
+*
+*                    Generate Q in U
+*                    (Workspace: need N+M, prefer N+M*NB)
+*
+                     CALL SORGQR( M, M, N, U, LDU, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IE = ITAU
+                     ITAUQ = IE + N
+                     ITAUP = ITAUQ + N
+                     IWORK = ITAUP + N
+*
+*                    Zero out below R in A
+*
+                     CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ),
+     $                            LDA )
+*
+*                    Bidiagonalize R in A
+*                    (Workspace: need 4*N, prefer 3*N+2*N*NB)
+*
+                     CALL SGEBRD( N, N, A, LDA, S, WORK( IE ),
+     $                            WORK( ITAUQ ), WORK( ITAUP ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Multiply Q in U by left bidiagonalizing vectors
+*                    in A
+*                    (Workspace: need 3*N+M, prefer 3*N+M*NB)
+*
+                     CALL SORMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
+     $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+*
+*                    Generate right bidiagonalizing vectors in A
+*                    (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
+*
+                     CALL SORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IWORK = IE + N
+*
+*                    Perform bidiagonal QR iteration, computing left
+*                    singular vectors of A in U and computing right
+*                    singular vectors of A in A
+*                    (Workspace: need BDSPAC)
+*
+                     CALL SBDSQR( 'U', N, N, M, 0, S, WORK( IE ), A,
+     $                            LDA, U, LDU, DUM, 1, WORK( IWORK ),
+     $                            INFO )
+*
+                  END IF
+*
+               ELSE IF( WNTVAS ) THEN
+*
+*                 Path 9 (M much larger than N, JOBU='A', JOBVT='S'
+*                         or 'A')
+*                 M left singular vectors to be computed in U and
+*                 N right singular vectors to be computed in VT
+*
+                  IF( LWORK.GE.N*N+MAX( N+M, 4*N, BDSPAC ) ) THEN
+*
+*                    Sufficient workspace for a fast algorithm
+*
+                     IU = 1
+                     IF( LWORK.GE.WRKBL+LDA*N ) THEN
+*
+*                       WORK(IU) is LDA by N
+*
+                        LDWRKU = LDA
+                     ELSE
+*
+*                       WORK(IU) is N by N
+*
+                        LDWRKU = N
+                     END IF
+                     ITAU = IU + LDWRKU*N
+                     IWORK = ITAU + N
+*
+*                    Compute A=Q*R, copying result to U
+*                    (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+*
+                     CALL SGEQRF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     CALL SLACPY( 'L', M, N, A, LDA, U, LDU )
+*
+*                    Generate Q in U
+*                    (Workspace: need N*N+N+M, prefer N*N+N+M*NB)
+*
+                     CALL SORGQR( M, M, N, U, LDU, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Copy R to WORK(IU), zeroing out below it
+*
+                     CALL SLACPY( 'U', N, N, A, LDA, WORK( IU ),
+     $                            LDWRKU )
+                     CALL SLASET( 'L', N-1, N-1, ZERO, ZERO,
+     $                            WORK( IU+1 ), LDWRKU )
+                     IE = ITAU
+                     ITAUQ = IE + N
+                     ITAUP = ITAUQ + N
+                     IWORK = ITAUP + N
+*
+*                    Bidiagonalize R in WORK(IU), copying result to VT
+*                    (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
+*
+                     CALL SGEBRD( N, N, WORK( IU ), LDWRKU, S,
+     $                            WORK( IE ), WORK( ITAUQ ),
+     $                            WORK( ITAUP ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+                     CALL SLACPY( 'U', N, N, WORK( IU ), LDWRKU, VT,
+     $                            LDVT )
+*
+*                    Generate left bidiagonalizing vectors in WORK(IU)
+*                    (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
+*
+                     CALL SORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
+     $                            WORK( ITAUQ ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+*
+*                    Generate right bidiagonalizing vectors in VT
+*                    (Workspace: need N*N+4*N-1,
+*                                prefer N*N+3*N+(N-1)*NB)
+*
+                     CALL SORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IWORK = IE + N
+*
+*                    Perform bidiagonal QR iteration, computing left
+*                    singular vectors of R in WORK(IU) and computing
+*                    right singular vectors of R in VT
+*                    (Workspace: need N*N+BDSPAC)
+*
+                     CALL SBDSQR( 'U', N, N, N, 0, S, WORK( IE ), VT,
+     $                            LDVT, WORK( IU ), LDWRKU, DUM, 1,
+     $                            WORK( IWORK ), INFO )
+*
+*                    Multiply Q in U by left singular vectors of R in
+*                    WORK(IU), storing result in A
+*                    (Workspace: need N*N)
+*
+                     CALL SGEMM( 'N', 'N', M, N, N, ONE, U, LDU,
+     $                           WORK( IU ), LDWRKU, ZERO, A, LDA )
+*
+*                    Copy left singular vectors of A from A to U
+*
+                     CALL SLACPY( 'F', M, N, A, LDA, U, LDU )
+*
+                  ELSE
+*
+*                    Insufficient workspace for a fast algorithm
+*
+                     ITAU = 1
+                     IWORK = ITAU + N
+*
+*                    Compute A=Q*R, copying result to U
+*                    (Workspace: need 2*N, prefer N+N*NB)
+*
+                     CALL SGEQRF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     CALL SLACPY( 'L', M, N, A, LDA, U, LDU )
+*
+*                    Generate Q in U
+*                    (Workspace: need N+M, prefer N+M*NB)
+*
+                     CALL SORGQR( M, M, N, U, LDU, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Copy R from A to VT, zeroing out below it
+*
+                     CALL SLACPY( 'U', N, N, A, LDA, VT, LDVT )
+                     CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, VT( 2, 1 ),
+     $                            LDVT )
+                     IE = ITAU
+                     ITAUQ = IE + N
+                     ITAUP = ITAUQ + N
+                     IWORK = ITAUP + N
+*
+*                    Bidiagonalize R in VT
+*                    (Workspace: need 4*N, prefer 3*N+2*N*NB)
+*
+                     CALL SGEBRD( N, N, VT, LDVT, S, WORK( IE ),
+     $                            WORK( ITAUQ ), WORK( ITAUP ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Multiply Q in U by left bidiagonalizing vectors
+*                    in VT
+*                    (Workspace: need 3*N+M, prefer 3*N+M*NB)
+*
+                     CALL SORMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT,
+     $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+*
+*                    Generate right bidiagonalizing vectors in VT
+*                    (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
+*
+                     CALL SORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IWORK = IE + N
+*
+*                    Perform bidiagonal QR iteration, computing left
+*                    singular vectors of A in U and computing right
+*                    singular vectors of A in VT
+*                    (Workspace: need BDSPAC)
+*
+                     CALL SBDSQR( 'U', N, N, M, 0, S, WORK( IE ), VT,
+     $                            LDVT, U, LDU, DUM, 1, WORK( IWORK ),
+     $                            INFO )
+*
+                  END IF
+*
+               END IF
+*
+            END IF
+*
+         ELSE
+*
+*           M .LT. MNTHR
+*
+*           Path 10 (M at least N, but not much larger)
+*           Reduce to bidiagonal form without QR decomposition
+*
+            IE = 1
+            ITAUQ = IE + N
+            ITAUP = ITAUQ + N
+            IWORK = ITAUP + N
+*
+*           Bidiagonalize A
+*           (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB)
+*
+            CALL SGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
+     $                   WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
+     $                   IERR )
+            IF( WNTUAS ) THEN
+*
+*              If left singular vectors desired in U, copy result to U
+*              and generate left bidiagonalizing vectors in U
+*              (Workspace: need 3*N+NCU, prefer 3*N+NCU*NB)
+*
+               CALL SLACPY( 'L', M, N, A, LDA, U, LDU )
+               IF( WNTUS )
+     $            NCU = N
+               IF( WNTUA )
+     $            NCU = M
+               CALL SORGBR( 'Q', M, NCU, N, U, LDU, WORK( ITAUQ ),
+     $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
+            END IF
+            IF( WNTVAS ) THEN
+*
+*              If right singular vectors desired in VT, copy result to
+*              VT and generate right bidiagonalizing vectors in VT
+*              (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
+*
+               CALL SLACPY( 'U', N, N, A, LDA, VT, LDVT )
+               CALL SORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
+     $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
+            END IF
+            IF( WNTUO ) THEN
+*
+*              If left singular vectors desired in A, generate left
+*              bidiagonalizing vectors in A
+*              (Workspace: need 4*N, prefer 3*N+N*NB)
+*
+               CALL SORGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ),
+     $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
+            END IF
+            IF( WNTVO ) THEN
+*
+*              If right singular vectors desired in A, generate right
+*              bidiagonalizing vectors in A
+*              (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
+*
+               CALL SORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
+     $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
+            END IF
+            IWORK = IE + N
+            IF( WNTUAS .OR. WNTUO )
+     $         NRU = M
+            IF( WNTUN )
+     $         NRU = 0
+            IF( WNTVAS .OR. WNTVO )
+     $         NCVT = N
+            IF( WNTVN )
+     $         NCVT = 0
+            IF( ( .NOT.WNTUO ) .AND. ( .NOT.WNTVO ) ) THEN
+*
+*              Perform bidiagonal QR iteration, if desired, computing
+*              left singular vectors in U and computing right singular
+*              vectors in VT
+*              (Workspace: need BDSPAC)
+*
+               CALL SBDSQR( 'U', N, NCVT, NRU, 0, S, WORK( IE ), VT,
+     $                      LDVT, U, LDU, DUM, 1, WORK( IWORK ), INFO )
+            ELSE IF( ( .NOT.WNTUO ) .AND. WNTVO ) THEN
+*
+*              Perform bidiagonal QR iteration, if desired, computing
+*              left singular vectors in U and computing right singular
+*              vectors in A
+*              (Workspace: need BDSPAC)
+*
+               CALL SBDSQR( 'U', N, NCVT, NRU, 0, S, WORK( IE ), A, LDA,
+     $                      U, LDU, DUM, 1, WORK( IWORK ), INFO )
+            ELSE
+*
+*              Perform bidiagonal QR iteration, if desired, computing
+*              left singular vectors in A and computing right singular
+*              vectors in VT
+*              (Workspace: need BDSPAC)
+*
+               CALL SBDSQR( 'U', N, NCVT, NRU, 0, S, WORK( IE ), VT,
+     $                      LDVT, A, LDA, DUM, 1, WORK( IWORK ), INFO )
+            END IF
+*
+         END IF
+*
+      ELSE
+*
+*        A has more columns than rows. If A has sufficiently more
+*        columns than rows, first reduce using the LQ decomposition (if
+*        sufficient workspace available)
+*
+         IF( N.GE.MNTHR ) THEN
+*
+            IF( WNTVN ) THEN
+*
+*              Path 1t(N much larger than M, JOBVT='N')
+*              No right singular vectors to be computed
+*
+               ITAU = 1
+               IWORK = ITAU + M
+*
+*              Compute A=L*Q
+*              (Workspace: need 2*M, prefer M+M*NB)
+*
+               CALL SGELQF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ),
+     $                      LWORK-IWORK+1, IERR )
+*
+*              Zero out above L
+*
+               CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ), LDA )
+               IE = 1
+               ITAUQ = IE + M
+               ITAUP = ITAUQ + M
+               IWORK = ITAUP + M
+*
+*              Bidiagonalize L in A
+*              (Workspace: need 4*M, prefer 3*M+2*M*NB)
+*
+               CALL SGEBRD( M, M, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
+     $                      WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
+     $                      IERR )
+               IF( WNTUO .OR. WNTUAS ) THEN
+*
+*                 If left singular vectors desired, generate Q
+*                 (Workspace: need 4*M, prefer 3*M+M*NB)
+*
+                  CALL SORGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+               END IF
+               IWORK = IE + M
+               NRU = 0
+               IF( WNTUO .OR. WNTUAS )
+     $            NRU = M
+*
+*              Perform bidiagonal QR iteration, computing left singular
+*              vectors of A in A if desired
+*              (Workspace: need BDSPAC)
+*
+               CALL SBDSQR( 'U', M, 0, NRU, 0, S, WORK( IE ), DUM, 1, A,
+     $                      LDA, DUM, 1, WORK( IWORK ), INFO )
+*
+*              If left singular vectors desired in U, copy them there
+*
+               IF( WNTUAS )
+     $            CALL SLACPY( 'F', M, M, A, LDA, U, LDU )
+*
+            ELSE IF( WNTVO .AND. WNTUN ) THEN
+*
+*              Path 2t(N much larger than M, JOBU='N', JOBVT='O')
+*              M right singular vectors to be overwritten on A and
+*              no left singular vectors to be computed
+*
+               IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN
+*
+*                 Sufficient workspace for a fast algorithm
+*
+                  IR = 1
+                  IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+LDA*M ) THEN
+*
+*                    WORK(IU) is LDA by N and WORK(IR) is LDA by M
+*
+                     LDWRKU = LDA
+                     CHUNK = N
+                     LDWRKR = LDA
+                  ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+M*M ) THEN
+*
+*                    WORK(IU) is LDA by N and WORK(IR) is M by M
+*
+                     LDWRKU = LDA
+                     CHUNK = N
+                     LDWRKR = M
+                  ELSE
+*
+*                    WORK(IU) is M by CHUNK and WORK(IR) is M by M
+*
+                     LDWRKU = M
+                     CHUNK = ( LWORK-M*M-M ) / M
+                     LDWRKR = M
+                  END IF
+                  ITAU = IR + LDWRKR*M
+                  IWORK = ITAU + M
+*
+*                 Compute A=L*Q
+*                 (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+*
+                  CALL SGELQF( M, N, A, LDA, WORK( ITAU ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                 Copy L to WORK(IR) and zero out above it
+*
+                  CALL SLACPY( 'L', M, M, A, LDA, WORK( IR ), LDWRKR )
+                  CALL SLASET( 'U', M-1, M-1, ZERO, ZERO,
+     $                         WORK( IR+LDWRKR ), LDWRKR )
+*
+*                 Generate Q in A
+*                 (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+*
+                  CALL SORGLQ( M, N, M, A, LDA, WORK( ITAU ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+                  IE = ITAU
+                  ITAUQ = IE + M
+                  ITAUP = ITAUQ + M
+                  IWORK = ITAUP + M
+*
+*                 Bidiagonalize L in WORK(IR)
+*                 (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
+*
+                  CALL SGEBRD( M, M, WORK( IR ), LDWRKR, S, WORK( IE ),
+     $                         WORK( ITAUQ ), WORK( ITAUP ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                 Generate right vectors bidiagonalizing L
+*                 (Workspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB)
+*
+                  CALL SORGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
+     $                         WORK( ITAUP ), WORK( IWORK ),
+     $                         LWORK-IWORK+1, IERR )
+                  IWORK = IE + M
+*
+*                 Perform bidiagonal QR iteration, computing right
+*                 singular vectors of L in WORK(IR)
+*                 (Workspace: need M*M+BDSPAC)
+*
+                  CALL SBDSQR( 'U', M, M, 0, 0, S, WORK( IE ),
+     $                         WORK( IR ), LDWRKR, DUM, 1, DUM, 1,
+     $                         WORK( IWORK ), INFO )
+                  IU = IE + M
+*
+*                 Multiply right singular vectors of L in WORK(IR) by Q
+*                 in A, storing result in WORK(IU) and copying to A
+*                 (Workspace: need M*M+2*M, prefer M*M+M*N+M)
+*
+                  DO 30 I = 1, N, CHUNK
+                     BLK = MIN( N-I+1, CHUNK )
+                     CALL SGEMM( 'N', 'N', M, BLK, M, ONE, WORK( IR ),
+     $                           LDWRKR, A( 1, I ), LDA, ZERO,
+     $                           WORK( IU ), LDWRKU )
+                     CALL SLACPY( 'F', M, BLK, WORK( IU ), LDWRKU,
+     $                            A( 1, I ), LDA )
+   30             CONTINUE
+*
+               ELSE
+*
+*                 Insufficient workspace for a fast algorithm
+*
+                  IE = 1
+                  ITAUQ = IE + M
+                  ITAUP = ITAUQ + M
+                  IWORK = ITAUP + M
+*
+*                 Bidiagonalize A
+*                 (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
+*
+                  CALL SGEBRD( M, N, A, LDA, S, WORK( IE ),
+     $                         WORK( ITAUQ ), WORK( ITAUP ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                 Generate right vectors bidiagonalizing A
+*                 (Workspace: need 4*M, prefer 3*M+M*NB)
+*
+                  CALL SORGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+                  IWORK = IE + M
+*
+*                 Perform bidiagonal QR iteration, computing right
+*                 singular vectors of A in A
+*                 (Workspace: need BDSPAC)
+*
+                  CALL SBDSQR( 'L', M, N, 0, 0, S, WORK( IE ), A, LDA,
+     $                         DUM, 1, DUM, 1, WORK( IWORK ), INFO )
+*
+               END IF
+*
+            ELSE IF( WNTVO .AND. WNTUAS ) THEN
+*
+*              Path 3t(N much larger than M, JOBU='S' or 'A', JOBVT='O')
+*              M right singular vectors to be overwritten on A and
+*              M left singular vectors to be computed in U
+*
+               IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN
+*
+*                 Sufficient workspace for a fast algorithm
+*
+                  IR = 1
+                  IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+LDA*M ) THEN
+*
+*                    WORK(IU) is LDA by N and WORK(IR) is LDA by M
+*
+                     LDWRKU = LDA
+                     CHUNK = N
+                     LDWRKR = LDA
+                  ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+M*M ) THEN
+*
+*                    WORK(IU) is LDA by N and WORK(IR) is M by M
+*
+                     LDWRKU = LDA
+                     CHUNK = N
+                     LDWRKR = M
+                  ELSE
+*
+*                    WORK(IU) is M by CHUNK and WORK(IR) is M by M
+*
+                     LDWRKU = M
+                     CHUNK = ( LWORK-M*M-M ) / M
+                     LDWRKR = M
+                  END IF
+                  ITAU = IR + LDWRKR*M
+                  IWORK = ITAU + M
+*
+*                 Compute A=L*Q
+*                 (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+*
+                  CALL SGELQF( M, N, A, LDA, WORK( ITAU ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                 Copy L to U, zeroing about above it
+*
+                  CALL SLACPY( 'L', M, M, A, LDA, U, LDU )
+                  CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ),
+     $                         LDU )
+*
+*                 Generate Q in A
+*                 (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+*
+                  CALL SORGLQ( M, N, M, A, LDA, WORK( ITAU ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+                  IE = ITAU
+                  ITAUQ = IE + M
+                  ITAUP = ITAUQ + M
+                  IWORK = ITAUP + M
+*
+*                 Bidiagonalize L in U, copying result to WORK(IR)
+*                 (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
+*
+                  CALL SGEBRD( M, M, U, LDU, S, WORK( IE ),
+     $                         WORK( ITAUQ ), WORK( ITAUP ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+                  CALL SLACPY( 'U', M, M, U, LDU, WORK( IR ), LDWRKR )
+*
+*                 Generate right vectors bidiagonalizing L in WORK(IR)
+*                 (Workspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB)
+*
+                  CALL SORGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
+     $                         WORK( ITAUP ), WORK( IWORK ),
+     $                         LWORK-IWORK+1, IERR )
+*
+*                 Generate left vectors bidiagonalizing L in U
+*                 (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB)
+*
+                  CALL SORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+                  IWORK = IE + M
+*
+*                 Perform bidiagonal QR iteration, computing left
+*                 singular vectors of L in U, and computing right
+*                 singular vectors of L in WORK(IR)
+*                 (Workspace: need M*M+BDSPAC)
+*
+                  CALL SBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
+     $                         WORK( IR ), LDWRKR, U, LDU, DUM, 1,
+     $                         WORK( IWORK ), INFO )
+                  IU = IE + M
+*
+*                 Multiply right singular vectors of L in WORK(IR) by Q
+*                 in A, storing result in WORK(IU) and copying to A
+*                 (Workspace: need M*M+2*M, prefer M*M+M*N+M))
+*
+                  DO 40 I = 1, N, CHUNK
+                     BLK = MIN( N-I+1, CHUNK )
+                     CALL SGEMM( 'N', 'N', M, BLK, M, ONE, WORK( IR ),
+     $                           LDWRKR, A( 1, I ), LDA, ZERO,
+     $                           WORK( IU ), LDWRKU )
+                     CALL SLACPY( 'F', M, BLK, WORK( IU ), LDWRKU,
+     $                            A( 1, I ), LDA )
+   40             CONTINUE
+*
+               ELSE
+*
+*                 Insufficient workspace for a fast algorithm
+*
+                  ITAU = 1
+                  IWORK = ITAU + M
+*
+*                 Compute A=L*Q
+*                 (Workspace: need 2*M, prefer M+M*NB)
+*
+                  CALL SGELQF( M, N, A, LDA, WORK( ITAU ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                 Copy L to U, zeroing out above it
+*
+                  CALL SLACPY( 'L', M, M, A, LDA, U, LDU )
+                  CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ),
+     $                         LDU )
+*
+*                 Generate Q in A
+*                 (Workspace: need 2*M, prefer M+M*NB)
+*
+                  CALL SORGLQ( M, N, M, A, LDA, WORK( ITAU ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+                  IE = ITAU
+                  ITAUQ = IE + M
+                  ITAUP = ITAUQ + M
+                  IWORK = ITAUP + M
+*
+*                 Bidiagonalize L in U
+*                 (Workspace: need 4*M, prefer 3*M+2*M*NB)
+*
+                  CALL SGEBRD( M, M, U, LDU, S, WORK( IE ),
+     $                         WORK( ITAUQ ), WORK( ITAUP ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                 Multiply right vectors bidiagonalizing L by Q in A
+*                 (Workspace: need 3*M+N, prefer 3*M+N*NB)
+*
+                  CALL SORMBR( 'P', 'L', 'T', M, N, M, U, LDU,
+     $                         WORK( ITAUP ), A, LDA, WORK( IWORK ),
+     $                         LWORK-IWORK+1, IERR )
+*
+*                 Generate left vectors bidiagonalizing L in U
+*                 (Workspace: need 4*M, prefer 3*M+M*NB)
+*
+                  CALL SORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+                  IWORK = IE + M
+*
+*                 Perform bidiagonal QR iteration, computing left
+*                 singular vectors of A in U and computing right
+*                 singular vectors of A in A
+*                 (Workspace: need BDSPAC)
+*
+                  CALL SBDSQR( 'U', M, N, M, 0, S, WORK( IE ), A, LDA,
+     $                         U, LDU, DUM, 1, WORK( IWORK ), INFO )
+*
+               END IF
+*
+            ELSE IF( WNTVS ) THEN
+*
+               IF( WNTUN ) THEN
+*
+*                 Path 4t(N much larger than M, JOBU='N', JOBVT='S')
+*                 M right singular vectors to be computed in VT and
+*                 no left singular vectors to be computed
+*
+                  IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN
+*
+*                    Sufficient workspace for a fast algorithm
+*
+                     IR = 1
+                     IF( LWORK.GE.WRKBL+LDA*M ) THEN
+*
+*                       WORK(IR) is LDA by M
+*
+                        LDWRKR = LDA
+                     ELSE
+*
+*                       WORK(IR) is M by M
+*
+                        LDWRKR = M
+                     END IF
+                     ITAU = IR + LDWRKR*M
+                     IWORK = ITAU + M
+*
+*                    Compute A=L*Q
+*                    (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+*
+                     CALL SGELQF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Copy L to WORK(IR), zeroing out above it
+*
+                     CALL SLACPY( 'L', M, M, A, LDA, WORK( IR ),
+     $                            LDWRKR )
+                     CALL SLASET( 'U', M-1, M-1, ZERO, ZERO,
+     $                            WORK( IR+LDWRKR ), LDWRKR )
+*
+*                    Generate Q in A
+*                    (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+*
+                     CALL SORGLQ( M, N, M, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IE = ITAU
+                     ITAUQ = IE + M
+                     ITAUP = ITAUQ + M
+                     IWORK = ITAUP + M
+*
+*                    Bidiagonalize L in WORK(IR)
+*                    (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
+*
+                     CALL SGEBRD( M, M, WORK( IR ), LDWRKR, S,
+     $                            WORK( IE ), WORK( ITAUQ ),
+     $                            WORK( ITAUP ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+*
+*                    Generate right vectors bidiagonalizing L in
+*                    WORK(IR)
+*                    (Workspace: need M*M+4*M, prefer M*M+3*M+(M-1)*NB)
+*
+                     CALL SORGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
+     $                            WORK( ITAUP ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+                     IWORK = IE + M
+*
+*                    Perform bidiagonal QR iteration, computing right
+*                    singular vectors of L in WORK(IR)
+*                    (Workspace: need M*M+BDSPAC)
+*
+                     CALL SBDSQR( 'U', M, M, 0, 0, S, WORK( IE ),
+     $                            WORK( IR ), LDWRKR, DUM, 1, DUM, 1,
+     $                            WORK( IWORK ), INFO )
+*
+*                    Multiply right singular vectors of L in WORK(IR) by
+*                    Q in A, storing result in VT
+*                    (Workspace: need M*M)
+*
+                     CALL SGEMM( 'N', 'N', M, N, M, ONE, WORK( IR ),
+     $                           LDWRKR, A, LDA, ZERO, VT, LDVT )
+*
+                  ELSE
+*
+*                    Insufficient workspace for a fast algorithm
+*
+                     ITAU = 1
+                     IWORK = ITAU + M
+*
+*                    Compute A=L*Q
+*                    (Workspace: need 2*M, prefer M+M*NB)
+*
+                     CALL SGELQF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Copy result to VT
+*
+                     CALL SLACPY( 'U', M, N, A, LDA, VT, LDVT )
+*
+*                    Generate Q in VT
+*                    (Workspace: need 2*M, prefer M+M*NB)
+*
+                     CALL SORGLQ( M, N, M, VT, LDVT, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IE = ITAU
+                     ITAUQ = IE + M
+                     ITAUP = ITAUQ + M
+                     IWORK = ITAUP + M
+*
+*                    Zero out above L in A
+*
+                     CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ),
+     $                            LDA )
+*
+*                    Bidiagonalize L in A
+*                    (Workspace: need 4*M, prefer 3*M+2*M*NB)
+*
+                     CALL SGEBRD( M, M, A, LDA, S, WORK( IE ),
+     $                            WORK( ITAUQ ), WORK( ITAUP ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Multiply right vectors bidiagonalizing L by Q in VT
+*                    (Workspace: need 3*M+N, prefer 3*M+N*NB)
+*
+                     CALL SORMBR( 'P', 'L', 'T', M, N, M, A, LDA,
+     $                            WORK( ITAUP ), VT, LDVT,
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IWORK = IE + M
+*
+*                    Perform bidiagonal QR iteration, computing right
+*                    singular vectors of A in VT
+*                    (Workspace: need BDSPAC)
+*
+                     CALL SBDSQR( 'U', M, N, 0, 0, S, WORK( IE ), VT,
+     $                            LDVT, DUM, 1, DUM, 1, WORK( IWORK ),
+     $                            INFO )
+*
+                  END IF
+*
+               ELSE IF( WNTUO ) THEN
+*
+*                 Path 5t(N much larger than M, JOBU='O', JOBVT='S')
+*                 M right singular vectors to be computed in VT and
+*                 M left singular vectors to be overwritten on A
+*
+                  IF( LWORK.GE.2*M*M+MAX( 4*M, BDSPAC ) ) THEN
+*
+*                    Sufficient workspace for a fast algorithm
+*
+                     IU = 1
+                     IF( LWORK.GE.WRKBL+2*LDA*M ) THEN
+*
+*                       WORK(IU) is LDA by M and WORK(IR) is LDA by M
+*
+                        LDWRKU = LDA
+                        IR = IU + LDWRKU*M
+                        LDWRKR = LDA
+                     ELSE IF( LWORK.GE.WRKBL+( LDA+M )*M ) THEN
+*
+*                       WORK(IU) is LDA by M and WORK(IR) is M by M
+*
+                        LDWRKU = LDA
+                        IR = IU + LDWRKU*M
+                        LDWRKR = M
+                     ELSE
+*
+*                       WORK(IU) is M by M and WORK(IR) is M by M
+*
+                        LDWRKU = M
+                        IR = IU + LDWRKU*M
+                        LDWRKR = M
+                     END IF
+                     ITAU = IR + LDWRKR*M
+                     IWORK = ITAU + M
+*
+*                    Compute A=L*Q
+*                    (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
+*
+                     CALL SGELQF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Copy L to WORK(IU), zeroing out below it
+*
+                     CALL SLACPY( 'L', M, M, A, LDA, WORK( IU ),
+     $                            LDWRKU )
+                     CALL SLASET( 'U', M-1, M-1, ZERO, ZERO,
+     $                            WORK( IU+LDWRKU ), LDWRKU )
+*
+*                    Generate Q in A
+*                    (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
+*
+                     CALL SORGLQ( M, N, M, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IE = ITAU
+                     ITAUQ = IE + M
+                     ITAUP = ITAUQ + M
+                     IWORK = ITAUP + M
+*
+*                    Bidiagonalize L in WORK(IU), copying result to
+*                    WORK(IR)
+*                    (Workspace: need 2*M*M+4*M,
+*                                prefer 2*M*M+3*M+2*M*NB)
+*
+                     CALL SGEBRD( M, M, WORK( IU ), LDWRKU, S,
+     $                            WORK( IE ), WORK( ITAUQ ),
+     $                            WORK( ITAUP ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+                     CALL SLACPY( 'L', M, M, WORK( IU ), LDWRKU,
+     $                            WORK( IR ), LDWRKR )
+*
+*                    Generate right bidiagonalizing vectors in WORK(IU)
+*                    (Workspace: need 2*M*M+4*M-1,
+*                                prefer 2*M*M+3*M+(M-1)*NB)
+*
+                     CALL SORGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
+     $                            WORK( ITAUP ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+*
+*                    Generate left bidiagonalizing vectors in WORK(IR)
+*                    (Workspace: need 2*M*M+4*M, prefer 2*M*M+3*M+M*NB)
+*
+                     CALL SORGBR( 'Q', M, M, M, WORK( IR ), LDWRKR,
+     $                            WORK( ITAUQ ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+                     IWORK = IE + M
+*
+*                    Perform bidiagonal QR iteration, computing left
+*                    singular vectors of L in WORK(IR) and computing
+*                    right singular vectors of L in WORK(IU)
+*                    (Workspace: need 2*M*M+BDSPAC)
+*
+                     CALL SBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
+     $                            WORK( IU ), LDWRKU, WORK( IR ),
+     $                            LDWRKR, DUM, 1, WORK( IWORK ), INFO )
+*
+*                    Multiply right singular vectors of L in WORK(IU) by
+*                    Q in A, storing result in VT
+*                    (Workspace: need M*M)
+*
+                     CALL SGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ),
+     $                           LDWRKU, A, LDA, ZERO, VT, LDVT )
+*
+*                    Copy left singular vectors of L to A
+*                    (Workspace: need M*M)
+*
+                     CALL SLACPY( 'F', M, M, WORK( IR ), LDWRKR, A,
+     $                            LDA )
+*
+                  ELSE
+*
+*                    Insufficient workspace for a fast algorithm
+*
+                     ITAU = 1
+                     IWORK = ITAU + M
+*
+*                    Compute A=L*Q, copying result to VT
+*                    (Workspace: need 2*M, prefer M+M*NB)
+*
+                     CALL SGELQF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     CALL SLACPY( 'U', M, N, A, LDA, VT, LDVT )
+*
+*                    Generate Q in VT
+*                    (Workspace: need 2*M, prefer M+M*NB)
+*
+                     CALL SORGLQ( M, N, M, VT, LDVT, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IE = ITAU
+                     ITAUQ = IE + M
+                     ITAUP = ITAUQ + M
+                     IWORK = ITAUP + M
+*
+*                    Zero out above L in A
+*
+                     CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ),
+     $                            LDA )
+*
+*                    Bidiagonalize L in A
+*                    (Workspace: need 4*M, prefer 3*M+2*M*NB)
+*
+                     CALL SGEBRD( M, M, A, LDA, S, WORK( IE ),
+     $                            WORK( ITAUQ ), WORK( ITAUP ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Multiply right vectors bidiagonalizing L by Q in VT
+*                    (Workspace: need 3*M+N, prefer 3*M+N*NB)
+*
+                     CALL SORMBR( 'P', 'L', 'T', M, N, M, A, LDA,
+     $                            WORK( ITAUP ), VT, LDVT,
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Generate left bidiagonalizing vectors of L in A
+*                    (Workspace: need 4*M, prefer 3*M+M*NB)
+*
+                     CALL SORGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IWORK = IE + M
+*
+*                    Perform bidiagonal QR iteration, compute left
+*                    singular vectors of A in A and compute right
+*                    singular vectors of A in VT
+*                    (Workspace: need BDSPAC)
+*
+                     CALL SBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT,
+     $                            LDVT, A, LDA, DUM, 1, WORK( IWORK ),
+     $                            INFO )
+*
+                  END IF
+*
+               ELSE IF( WNTUAS ) THEN
+*
+*                 Path 6t(N much larger than M, JOBU='S' or 'A',
+*                         JOBVT='S')
+*                 M right singular vectors to be computed in VT and
+*                 M left singular vectors to be computed in U
+*
+                  IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN
+*
+*                    Sufficient workspace for a fast algorithm
+*
+                     IU = 1
+                     IF( LWORK.GE.WRKBL+LDA*M ) THEN
+*
+*                       WORK(IU) is LDA by N
+*
+                        LDWRKU = LDA
+                     ELSE
+*
+*                       WORK(IU) is LDA by M
+*
+                        LDWRKU = M
+                     END IF
+                     ITAU = IU + LDWRKU*M
+                     IWORK = ITAU + M
+*
+*                    Compute A=L*Q
+*                    (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+*
+                     CALL SGELQF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Copy L to WORK(IU), zeroing out above it
+*
+                     CALL SLACPY( 'L', M, M, A, LDA, WORK( IU ),
+     $                            LDWRKU )
+                     CALL SLASET( 'U', M-1, M-1, ZERO, ZERO,
+     $                            WORK( IU+LDWRKU ), LDWRKU )
+*
+*                    Generate Q in A
+*                    (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+*
+                     CALL SORGLQ( M, N, M, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IE = ITAU
+                     ITAUQ = IE + M
+                     ITAUP = ITAUQ + M
+                     IWORK = ITAUP + M
+*
+*                    Bidiagonalize L in WORK(IU), copying result to U
+*                    (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
+*
+                     CALL SGEBRD( M, M, WORK( IU ), LDWRKU, S,
+     $                            WORK( IE ), WORK( ITAUQ ),
+     $                            WORK( ITAUP ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+                     CALL SLACPY( 'L', M, M, WORK( IU ), LDWRKU, U,
+     $                            LDU )
+*
+*                    Generate right bidiagonalizing vectors in WORK(IU)
+*                    (Workspace: need M*M+4*M-1,
+*                                prefer M*M+3*M+(M-1)*NB)
+*
+                     CALL SORGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
+     $                            WORK( ITAUP ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+*
+*                    Generate left bidiagonalizing vectors in U
+*                    (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB)
+*
+                     CALL SORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IWORK = IE + M
+*
+*                    Perform bidiagonal QR iteration, computing left
+*                    singular vectors of L in U and computing right
+*                    singular vectors of L in WORK(IU)
+*                    (Workspace: need M*M+BDSPAC)
+*
+                     CALL SBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
+     $                            WORK( IU ), LDWRKU, U, LDU, DUM, 1,
+     $                            WORK( IWORK ), INFO )
+*
+*                    Multiply right singular vectors of L in WORK(IU) by
+*                    Q in A, storing result in VT
+*                    (Workspace: need M*M)
+*
+                     CALL SGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ),
+     $                           LDWRKU, A, LDA, ZERO, VT, LDVT )
+*
+                  ELSE
+*
+*                    Insufficient workspace for a fast algorithm
+*
+                     ITAU = 1
+                     IWORK = ITAU + M
+*
+*                    Compute A=L*Q, copying result to VT
+*                    (Workspace: need 2*M, prefer M+M*NB)
+*
+                     CALL SGELQF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     CALL SLACPY( 'U', M, N, A, LDA, VT, LDVT )
+*
+*                    Generate Q in VT
+*                    (Workspace: need 2*M, prefer M+M*NB)
+*
+                     CALL SORGLQ( M, N, M, VT, LDVT, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Copy L to U, zeroing out above it
+*
+                     CALL SLACPY( 'L', M, M, A, LDA, U, LDU )
+                     CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ),
+     $                            LDU )
+                     IE = ITAU
+                     ITAUQ = IE + M
+                     ITAUP = ITAUQ + M
+                     IWORK = ITAUP + M
+*
+*                    Bidiagonalize L in U
+*                    (Workspace: need 4*M, prefer 3*M+2*M*NB)
+*
+                     CALL SGEBRD( M, M, U, LDU, S, WORK( IE ),
+     $                            WORK( ITAUQ ), WORK( ITAUP ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Multiply right bidiagonalizing vectors in U by Q
+*                    in VT
+*                    (Workspace: need 3*M+N, prefer 3*M+N*NB)
+*
+                     CALL SORMBR( 'P', 'L', 'T', M, N, M, U, LDU,
+     $                            WORK( ITAUP ), VT, LDVT,
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Generate left bidiagonalizing vectors in U
+*                    (Workspace: need 4*M, prefer 3*M+M*NB)
+*
+                     CALL SORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IWORK = IE + M
+*
+*                    Perform bidiagonal QR iteration, computing left
+*                    singular vectors of A in U and computing right
+*                    singular vectors of A in VT
+*                    (Workspace: need BDSPAC)
+*
+                     CALL SBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT,
+     $                            LDVT, U, LDU, DUM, 1, WORK( IWORK ),
+     $                            INFO )
+*
+                  END IF
+*
+               END IF
+*
+            ELSE IF( WNTVA ) THEN
+*
+               IF( WNTUN ) THEN
+*
+*                 Path 7t(N much larger than M, JOBU='N', JOBVT='A')
+*                 N right singular vectors to be computed in VT and
+*                 no left singular vectors to be computed
+*
+                  IF( LWORK.GE.M*M+MAX( N+M, 4*M, BDSPAC ) ) THEN
+*
+*                    Sufficient workspace for a fast algorithm
+*
+                     IR = 1
+                     IF( LWORK.GE.WRKBL+LDA*M ) THEN
+*
+*                       WORK(IR) is LDA by M
+*
+                        LDWRKR = LDA
+                     ELSE
+*
+*                       WORK(IR) is M by M
+*
+                        LDWRKR = M
+                     END IF
+                     ITAU = IR + LDWRKR*M
+                     IWORK = ITAU + M
+*
+*                    Compute A=L*Q, copying result to VT
+*                    (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+*
+                     CALL SGELQF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     CALL SLACPY( 'U', M, N, A, LDA, VT, LDVT )
+*
+*                    Copy L to WORK(IR), zeroing out above it
+*
+                     CALL SLACPY( 'L', M, M, A, LDA, WORK( IR ),
+     $                            LDWRKR )
+                     CALL SLASET( 'U', M-1, M-1, ZERO, ZERO,
+     $                            WORK( IR+LDWRKR ), LDWRKR )
+*
+*                    Generate Q in VT
+*                    (Workspace: need M*M+M+N, prefer M*M+M+N*NB)
+*
+                     CALL SORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IE = ITAU
+                     ITAUQ = IE + M
+                     ITAUP = ITAUQ + M
+                     IWORK = ITAUP + M
+*
+*                    Bidiagonalize L in WORK(IR)
+*                    (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
+*
+                     CALL SGEBRD( M, M, WORK( IR ), LDWRKR, S,
+     $                            WORK( IE ), WORK( ITAUQ ),
+     $                            WORK( ITAUP ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+*
+*                    Generate right bidiagonalizing vectors in WORK(IR)
+*                    (Workspace: need M*M+4*M-1,
+*                                prefer M*M+3*M+(M-1)*NB)
+*
+                     CALL SORGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
+     $                            WORK( ITAUP ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+                     IWORK = IE + M
+*
+*                    Perform bidiagonal QR iteration, computing right
+*                    singular vectors of L in WORK(IR)
+*                    (Workspace: need M*M+BDSPAC)
+*
+                     CALL SBDSQR( 'U', M, M, 0, 0, S, WORK( IE ),
+     $                            WORK( IR ), LDWRKR, DUM, 1, DUM, 1,
+     $                            WORK( IWORK ), INFO )
+*
+*                    Multiply right singular vectors of L in WORK(IR) by
+*                    Q in VT, storing result in A
+*                    (Workspace: need M*M)
+*
+                     CALL SGEMM( 'N', 'N', M, N, M, ONE, WORK( IR ),
+     $                           LDWRKR, VT, LDVT, ZERO, A, LDA )
+*
+*                    Copy right singular vectors of A from A to VT
+*
+                     CALL SLACPY( 'F', M, N, A, LDA, VT, LDVT )
+*
+                  ELSE
+*
+*                    Insufficient workspace for a fast algorithm
+*
+                     ITAU = 1
+                     IWORK = ITAU + M
+*
+*                    Compute A=L*Q, copying result to VT
+*                    (Workspace: need 2*M, prefer M+M*NB)
+*
+                     CALL SGELQF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     CALL SLACPY( 'U', M, N, A, LDA, VT, LDVT )
+*
+*                    Generate Q in VT
+*                    (Workspace: need M+N, prefer M+N*NB)
+*
+                     CALL SORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IE = ITAU
+                     ITAUQ = IE + M
+                     ITAUP = ITAUQ + M
+                     IWORK = ITAUP + M
+*
+*                    Zero out above L in A
+*
+                     CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ),
+     $                            LDA )
+*
+*                    Bidiagonalize L in A
+*                    (Workspace: need 4*M, prefer 3*M+2*M*NB)
+*
+                     CALL SGEBRD( M, M, A, LDA, S, WORK( IE ),
+     $                            WORK( ITAUQ ), WORK( ITAUP ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Multiply right bidiagonalizing vectors in A by Q
+*                    in VT
+*                    (Workspace: need 3*M+N, prefer 3*M+N*NB)
+*
+                     CALL SORMBR( 'P', 'L', 'T', M, N, M, A, LDA,
+     $                            WORK( ITAUP ), VT, LDVT,
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IWORK = IE + M
+*
+*                    Perform bidiagonal QR iteration, computing right
+*                    singular vectors of A in VT
+*                    (Workspace: need BDSPAC)
+*
+                     CALL SBDSQR( 'U', M, N, 0, 0, S, WORK( IE ), VT,
+     $                            LDVT, DUM, 1, DUM, 1, WORK( IWORK ),
+     $                            INFO )
+*
+                  END IF
+*
+               ELSE IF( WNTUO ) THEN
+*
+*                 Path 8t(N much larger than M, JOBU='O', JOBVT='A')
+*                 N right singular vectors to be computed in VT and
+*                 M left singular vectors to be overwritten on A
+*
+                  IF( LWORK.GE.2*M*M+MAX( N+M, 4*M, BDSPAC ) ) THEN
+*
+*                    Sufficient workspace for a fast algorithm
+*
+                     IU = 1
+                     IF( LWORK.GE.WRKBL+2*LDA*M ) THEN
+*
+*                       WORK(IU) is LDA by M and WORK(IR) is LDA by M
+*
+                        LDWRKU = LDA
+                        IR = IU + LDWRKU*M
+                        LDWRKR = LDA
+                     ELSE IF( LWORK.GE.WRKBL+( LDA+M )*M ) THEN
+*
+*                       WORK(IU) is LDA by M and WORK(IR) is M by M
+*
+                        LDWRKU = LDA
+                        IR = IU + LDWRKU*M
+                        LDWRKR = M
+                     ELSE
+*
+*                       WORK(IU) is M by M and WORK(IR) is M by M
+*
+                        LDWRKU = M
+                        IR = IU + LDWRKU*M
+                        LDWRKR = M
+                     END IF
+                     ITAU = IR + LDWRKR*M
+                     IWORK = ITAU + M
+*
+*                    Compute A=L*Q, copying result to VT
+*                    (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
+*
+                     CALL SGELQF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     CALL SLACPY( 'U', M, N, A, LDA, VT, LDVT )
+*
+*                    Generate Q in VT
+*                    (Workspace: need 2*M*M+M+N, prefer 2*M*M+M+N*NB)
+*
+                     CALL SORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Copy L to WORK(IU), zeroing out above it
+*
+                     CALL SLACPY( 'L', M, M, A, LDA, WORK( IU ),
+     $                            LDWRKU )
+                     CALL SLASET( 'U', M-1, M-1, ZERO, ZERO,
+     $                            WORK( IU+LDWRKU ), LDWRKU )
+                     IE = ITAU
+                     ITAUQ = IE + M
+                     ITAUP = ITAUQ + M
+                     IWORK = ITAUP + M
+*
+*                    Bidiagonalize L in WORK(IU), copying result to
+*                    WORK(IR)
+*                    (Workspace: need 2*M*M+4*M,
+*                                prefer 2*M*M+3*M+2*M*NB)
+*
+                     CALL SGEBRD( M, M, WORK( IU ), LDWRKU, S,
+     $                            WORK( IE ), WORK( ITAUQ ),
+     $                            WORK( ITAUP ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+                     CALL SLACPY( 'L', M, M, WORK( IU ), LDWRKU,
+     $                            WORK( IR ), LDWRKR )
+*
+*                    Generate right bidiagonalizing vectors in WORK(IU)
+*                    (Workspace: need 2*M*M+4*M-1,
+*                                prefer 2*M*M+3*M+(M-1)*NB)
+*
+                     CALL SORGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
+     $                            WORK( ITAUP ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+*
+*                    Generate left bidiagonalizing vectors in WORK(IR)
+*                    (Workspace: need 2*M*M+4*M, prefer 2*M*M+3*M+M*NB)
+*
+                     CALL SORGBR( 'Q', M, M, M, WORK( IR ), LDWRKR,
+     $                            WORK( ITAUQ ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+                     IWORK = IE + M
+*
+*                    Perform bidiagonal QR iteration, computing left
+*                    singular vectors of L in WORK(IR) and computing
+*                    right singular vectors of L in WORK(IU)
+*                    (Workspace: need 2*M*M+BDSPAC)
+*
+                     CALL SBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
+     $                            WORK( IU ), LDWRKU, WORK( IR ),
+     $                            LDWRKR, DUM, 1, WORK( IWORK ), INFO )
+*
+*                    Multiply right singular vectors of L in WORK(IU) by
+*                    Q in VT, storing result in A
+*                    (Workspace: need M*M)
+*
+                     CALL SGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ),
+     $                           LDWRKU, VT, LDVT, ZERO, A, LDA )
+*
+*                    Copy right singular vectors of A from A to VT
+*
+                     CALL SLACPY( 'F', M, N, A, LDA, VT, LDVT )
+*
+*                    Copy left singular vectors of A from WORK(IR) to A
+*
+                     CALL SLACPY( 'F', M, M, WORK( IR ), LDWRKR, A,
+     $                            LDA )
+*
+                  ELSE
+*
+*                    Insufficient workspace for a fast algorithm
+*
+                     ITAU = 1
+                     IWORK = ITAU + M
+*
+*                    Compute A=L*Q, copying result to VT
+*                    (Workspace: need 2*M, prefer M+M*NB)
+*
+                     CALL SGELQF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     CALL SLACPY( 'U', M, N, A, LDA, VT, LDVT )
+*
+*                    Generate Q in VT
+*                    (Workspace: need M+N, prefer M+N*NB)
+*
+                     CALL SORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IE = ITAU
+                     ITAUQ = IE + M
+                     ITAUP = ITAUQ + M
+                     IWORK = ITAUP + M
+*
+*                    Zero out above L in A
+*
+                     CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ),
+     $                            LDA )
+*
+*                    Bidiagonalize L in A
+*                    (Workspace: need 4*M, prefer 3*M+2*M*NB)
+*
+                     CALL SGEBRD( M, M, A, LDA, S, WORK( IE ),
+     $                            WORK( ITAUQ ), WORK( ITAUP ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Multiply right bidiagonalizing vectors in A by Q
+*                    in VT
+*                    (Workspace: need 3*M+N, prefer 3*M+N*NB)
+*
+                     CALL SORMBR( 'P', 'L', 'T', M, N, M, A, LDA,
+     $                            WORK( ITAUP ), VT, LDVT,
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Generate left bidiagonalizing vectors in A
+*                    (Workspace: need 4*M, prefer 3*M+M*NB)
+*
+                     CALL SORGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IWORK = IE + M
+*
+*                    Perform bidiagonal QR iteration, computing left
+*                    singular vectors of A in A and computing right
+*                    singular vectors of A in VT
+*                    (Workspace: need BDSPAC)
+*
+                     CALL SBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT,
+     $                            LDVT, A, LDA, DUM, 1, WORK( IWORK ),
+     $                            INFO )
+*
+                  END IF
+*
+               ELSE IF( WNTUAS ) THEN
+*
+*                 Path 9t(N much larger than M, JOBU='S' or 'A',
+*                         JOBVT='A')
+*                 N right singular vectors to be computed in VT and
+*                 M left singular vectors to be computed in U
+*
+                  IF( LWORK.GE.M*M+MAX( N+M, 4*M, BDSPAC ) ) THEN
+*
+*                    Sufficient workspace for a fast algorithm
+*
+                     IU = 1
+                     IF( LWORK.GE.WRKBL+LDA*M ) THEN
+*
+*                       WORK(IU) is LDA by M
+*
+                        LDWRKU = LDA
+                     ELSE
+*
+*                       WORK(IU) is M by M
+*
+                        LDWRKU = M
+                     END IF
+                     ITAU = IU + LDWRKU*M
+                     IWORK = ITAU + M
+*
+*                    Compute A=L*Q, copying result to VT
+*                    (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+*
+                     CALL SGELQF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     CALL SLACPY( 'U', M, N, A, LDA, VT, LDVT )
+*
+*                    Generate Q in VT
+*                    (Workspace: need M*M+M+N, prefer M*M+M+N*NB)
+*
+                     CALL SORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Copy L to WORK(IU), zeroing out above it
+*
+                     CALL SLACPY( 'L', M, M, A, LDA, WORK( IU ),
+     $                            LDWRKU )
+                     CALL SLASET( 'U', M-1, M-1, ZERO, ZERO,
+     $                            WORK( IU+LDWRKU ), LDWRKU )
+                     IE = ITAU
+                     ITAUQ = IE + M
+                     ITAUP = ITAUQ + M
+                     IWORK = ITAUP + M
+*
+*                    Bidiagonalize L in WORK(IU), copying result to U
+*                    (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
+*
+                     CALL SGEBRD( M, M, WORK( IU ), LDWRKU, S,
+     $                            WORK( IE ), WORK( ITAUQ ),
+     $                            WORK( ITAUP ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+                     CALL SLACPY( 'L', M, M, WORK( IU ), LDWRKU, U,
+     $                            LDU )
+*
+*                    Generate right bidiagonalizing vectors in WORK(IU)
+*                    (Workspace: need M*M+4*M, prefer M*M+3*M+(M-1)*NB)
+*
+                     CALL SORGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
+     $                            WORK( ITAUP ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+*
+*                    Generate left bidiagonalizing vectors in U
+*                    (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB)
+*
+                     CALL SORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IWORK = IE + M
+*
+*                    Perform bidiagonal QR iteration, computing left
+*                    singular vectors of L in U and computing right
+*                    singular vectors of L in WORK(IU)
+*                    (Workspace: need M*M+BDSPAC)
+*
+                     CALL SBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
+     $                            WORK( IU ), LDWRKU, U, LDU, DUM, 1,
+     $                            WORK( IWORK ), INFO )
+*
+*                    Multiply right singular vectors of L in WORK(IU) by
+*                    Q in VT, storing result in A
+*                    (Workspace: need M*M)
+*
+                     CALL SGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ),
+     $                           LDWRKU, VT, LDVT, ZERO, A, LDA )
+*
+*                    Copy right singular vectors of A from A to VT
+*
+                     CALL SLACPY( 'F', M, N, A, LDA, VT, LDVT )
+*
+                  ELSE
+*
+*                    Insufficient workspace for a fast algorithm
+*
+                     ITAU = 1
+                     IWORK = ITAU + M
+*
+*                    Compute A=L*Q, copying result to VT
+*                    (Workspace: need 2*M, prefer M+M*NB)
+*
+                     CALL SGELQF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     CALL SLACPY( 'U', M, N, A, LDA, VT, LDVT )
+*
+*                    Generate Q in VT
+*                    (Workspace: need M+N, prefer M+N*NB)
+*
+                     CALL SORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Copy L to U, zeroing out above it
+*
+                     CALL SLACPY( 'L', M, M, A, LDA, U, LDU )
+                     CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ),
+     $                            LDU )
+                     IE = ITAU
+                     ITAUQ = IE + M
+                     ITAUP = ITAUQ + M
+                     IWORK = ITAUP + M
+*
+*                    Bidiagonalize L in U
+*                    (Workspace: need 4*M, prefer 3*M+2*M*NB)
+*
+                     CALL SGEBRD( M, M, U, LDU, S, WORK( IE ),
+     $                            WORK( ITAUQ ), WORK( ITAUP ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Multiply right bidiagonalizing vectors in U by Q
+*                    in VT
+*                    (Workspace: need 3*M+N, prefer 3*M+N*NB)
+*
+                     CALL SORMBR( 'P', 'L', 'T', M, N, M, U, LDU,
+     $                            WORK( ITAUP ), VT, LDVT,
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Generate left bidiagonalizing vectors in U
+*                    (Workspace: need 4*M, prefer 3*M+M*NB)
+*
+                     CALL SORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IWORK = IE + M
+*
+*                    Perform bidiagonal QR iteration, computing left
+*                    singular vectors of A in U and computing right
+*                    singular vectors of A in VT
+*                    (Workspace: need BDSPAC)
+*
+                     CALL SBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT,
+     $                            LDVT, U, LDU, DUM, 1, WORK( IWORK ),
+     $                            INFO )
+*
+                  END IF
+*
+               END IF
+*
+            END IF
+*
+         ELSE
+*
+*           N .LT. MNTHR
+*
+*           Path 10t(N greater than M, but not much larger)
+*           Reduce to bidiagonal form without LQ decomposition
+*
+            IE = 1
+            ITAUQ = IE + M
+            ITAUP = ITAUQ + M
+            IWORK = ITAUP + M
+*
+*           Bidiagonalize A
+*           (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
+*
+            CALL SGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
+     $                   WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
+     $                   IERR )
+            IF( WNTUAS ) THEN
+*
+*              If left singular vectors desired in U, copy result to U
+*              and generate left bidiagonalizing vectors in U
+*              (Workspace: need 4*M-1, prefer 3*M+(M-1)*NB)
+*
+               CALL SLACPY( 'L', M, M, A, LDA, U, LDU )
+               CALL SORGBR( 'Q', M, M, N, U, LDU, WORK( ITAUQ ),
+     $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
+            END IF
+            IF( WNTVAS ) THEN
+*
+*              If right singular vectors desired in VT, copy result to
+*              VT and generate right bidiagonalizing vectors in VT
+*              (Workspace: need 3*M+NRVT, prefer 3*M+NRVT*NB)
+*
+               CALL SLACPY( 'U', M, N, A, LDA, VT, LDVT )
+               IF( WNTVA )
+     $            NRVT = N
+               IF( WNTVS )
+     $            NRVT = M
+               CALL SORGBR( 'P', NRVT, N, M, VT, LDVT, WORK( ITAUP ),
+     $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
+            END IF
+            IF( WNTUO ) THEN
+*
+*              If left singular vectors desired in A, generate left
+*              bidiagonalizing vectors in A
+*              (Workspace: need 4*M-1, prefer 3*M+(M-1)*NB)
+*
+               CALL SORGBR( 'Q', M, M, N, A, LDA, WORK( ITAUQ ),
+     $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
+            END IF
+            IF( WNTVO ) THEN
+*
+*              If right singular vectors desired in A, generate right
+*              bidiagonalizing vectors in A
+*              (Workspace: need 4*M, prefer 3*M+M*NB)
+*
+               CALL SORGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ),
+     $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
+            END IF
+            IWORK = IE + M
+            IF( WNTUAS .OR. WNTUO )
+     $         NRU = M
+            IF( WNTUN )
+     $         NRU = 0
+            IF( WNTVAS .OR. WNTVO )
+     $         NCVT = N
+            IF( WNTVN )
+     $         NCVT = 0
+            IF( ( .NOT.WNTUO ) .AND. ( .NOT.WNTVO ) ) THEN
+*
+*              Perform bidiagonal QR iteration, if desired, computing
+*              left singular vectors in U and computing right singular
+*              vectors in VT
+*              (Workspace: need BDSPAC)
+*
+               CALL SBDSQR( 'L', M, NCVT, NRU, 0, S, WORK( IE ), VT,
+     $                      LDVT, U, LDU, DUM, 1, WORK( IWORK ), INFO )
+            ELSE IF( ( .NOT.WNTUO ) .AND. WNTVO ) THEN
+*
+*              Perform bidiagonal QR iteration, if desired, computing
+*              left singular vectors in U and computing right singular
+*              vectors in A
+*              (Workspace: need BDSPAC)
+*
+               CALL SBDSQR( 'L', M, NCVT, NRU, 0, S, WORK( IE ), A, LDA,
+     $                      U, LDU, DUM, 1, WORK( IWORK ), INFO )
+            ELSE
+*
+*              Perform bidiagonal QR iteration, if desired, computing
+*              left singular vectors in A and computing right singular
+*              vectors in VT
+*              (Workspace: need BDSPAC)
+*
+               CALL SBDSQR( 'L', M, NCVT, NRU, 0, S, WORK( IE ), VT,
+     $                      LDVT, A, LDA, DUM, 1, WORK( IWORK ), INFO )
+            END IF
+*
+         END IF
+*
+      END IF
+*
+*     If SBDSQR failed to converge, copy unconverged superdiagonals
+*     to WORK( 2:MINMN )
+*
+      IF( INFO.NE.0 ) THEN
+         IF( IE.GT.2 ) THEN
+            DO 50 I = 1, MINMN - 1
+               WORK( I+1 ) = WORK( I+IE-1 )
+   50       CONTINUE
+         END IF
+         IF( IE.LT.2 ) THEN
+            DO 60 I = MINMN - 1, 1, -1
+               WORK( I+1 ) = WORK( I+IE-1 )
+   60       CONTINUE
+         END IF
+      END IF
+*
+*     Undo scaling if necessary
+*
+      IF( ISCL.EQ.1 ) THEN
+         IF( ANRM.GT.BIGNUM )
+     $      CALL SLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN,
+     $                   IERR )
+         IF( INFO.NE.0 .AND. ANRM.GT.BIGNUM )
+     $      CALL SLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN-1, 1, WORK( 2 ),
+     $                   MINMN, IERR )
+         IF( ANRM.LT.SMLNUM )
+     $      CALL SLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN,
+     $                   IERR )
+         IF( INFO.NE.0 .AND. ANRM.LT.SMLNUM )
+     $      CALL SLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN-1, 1, WORK( 2 ),
+     $                   MINMN, IERR )
+      END IF
+*
+*     Return optimal workspace in WORK(1)
+*
+      WORK( 1 ) = MAXWRK
+*
+      RETURN
+*
+*     End of SGESVD
+*
+      END
diff --git a/costa/native/external/lapack/sgesvx.f b/costa/native/external/lapack/sgesvx.f
new file mode 100644
index 000000000..59456add6
--- /dev/null
+++ b/costa/native/external/lapack/sgesvx.f
@@ -0,0 +1,482 @@
+      SUBROUTINE SGESVX( FACT, TRANS, N, NRHS, A, LDA, AF, LDAF, IPIV,
+     $                   EQUED, R, C, B, LDB, X, LDX, RCOND, FERR, BERR,
+     $                   WORK, IWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          EQUED, FACT, TRANS
+      INTEGER            INFO, LDA, LDAF, LDB, LDX, N, NRHS
+      REAL               RCOND
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * ), IWORK( * )
+      REAL               A( LDA, * ), AF( LDAF, * ), B( LDB, * ),
+     $                   BERR( * ), C( * ), FERR( * ), R( * ),
+     $                   WORK( * ), X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SGESVX uses the LU factorization to compute the solution to a real
+*  system of linear equations
+*     A * X = B,
+*  where A is an N-by-N matrix and X and B are N-by-NRHS matrices.
+*
+*  Error bounds on the solution and a condition estimate are also
+*  provided.
+*
+*  Description
+*  ===========
+*
+*  The following steps are performed:
+*
+*  1. If FACT = 'E', real scaling factors are computed to equilibrate
+*     the system:
+*        TRANS = 'N':  diag(R)*A*diag(C)     *inv(diag(C))*X = diag(R)*B
+*        TRANS = 'T': (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B
+*        TRANS = 'C': (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B
+*     Whether or not the system will be equilibrated depends on the
+*     scaling of the matrix A, but if equilibration is used, A is
+*     overwritten by diag(R)*A*diag(C) and B by diag(R)*B (if TRANS='N')
+*     or diag(C)*B (if TRANS = 'T' or 'C').
+*
+*  2. If FACT = 'N' or 'E', the LU decomposition is used to factor the
+*     matrix A (after equilibration if FACT = 'E') as
+*        A = P * L * U,
+*     where P is a permutation matrix, L is a unit lower triangular
+*     matrix, and U is upper triangular.
+*
+*  3. If some U(i,i)=0, so that U is exactly singular, then the routine
+*     returns with INFO = i. Otherwise, the factored form of A is used
+*     to estimate the condition number of the matrix A.  If the
+*     reciprocal of the condition number is less than machine precision,
+*     INFO = N+1 is returned as a warning, but the routine still goes on
+*     to solve for X and compute error bounds as described below.
+*
+*  4. The system of equations is solved for X using the factored form
+*     of A.
+*
+*  5. Iterative refinement is applied to improve the computed solution
+*     matrix and calculate error bounds and backward error estimates
+*     for it.
+*
+*  6. If equilibration was used, the matrix X is premultiplied by
+*     diag(C) (if TRANS = 'N') or diag(R) (if TRANS = 'T' or 'C') so
+*     that it solves the original system before equilibration.
+*
+*  Arguments
+*  =========
+*
+*  FACT    (input) CHARACTER*1
+*          Specifies whether or not the factored form of the matrix A is
+*          supplied on entry, and if not, whether the matrix A should be
+*          equilibrated before it is factored.
+*          = 'F':  On entry, AF and IPIV contain the factored form of A.
+*                  If EQUED is not 'N', the matrix A has been
+*                  equilibrated with scaling factors given by R and C.
+*                  A, AF, and IPIV are not modified.
+*          = 'N':  The matrix A will be copied to AF and factored.
+*          = 'E':  The matrix A will be equilibrated if necessary, then
+*                  copied to AF and factored.
+*
+*  TRANS   (input) CHARACTER*1
+*          Specifies the form of the system of equations:
+*          = 'N':  A * X = B     (No transpose)
+*          = 'T':  A**T * X = B  (Transpose)
+*          = 'C':  A**H * X = B  (Transpose)
+*
+*  N       (input) INTEGER
+*          The number of linear equations, i.e., the order of the
+*          matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrices B and X.  NRHS >= 0.
+*
+*  A       (input/output) REAL array, dimension (LDA,N)
+*          On entry, the N-by-N matrix A.  If FACT = 'F' and EQUED is
+*          not 'N', then A must have been equilibrated by the scaling
+*          factors in R and/or C.  A is not modified if FACT = 'F' or
+*          'N', or if FACT = 'E' and EQUED = 'N' on exit.
+*
+*          On exit, if EQUED .ne. 'N', A is scaled as follows:
+*          EQUED = 'R':  A := diag(R) * A
+*          EQUED = 'C':  A := A * diag(C)
+*          EQUED = 'B':  A := diag(R) * A * diag(C).
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  AF      (input or output) REAL array, dimension (LDAF,N)
+*          If FACT = 'F', then AF is an input argument and on entry
+*          contains the factors L and U from the factorization
+*          A = P*L*U as computed by SGETRF.  If EQUED .ne. 'N', then
+*          AF is the factored form of the equilibrated matrix A.
+*
+*          If FACT = 'N', then AF is an output argument and on exit
+*          returns the factors L and U from the factorization A = P*L*U
+*          of the original matrix A.
+*
+*          If FACT = 'E', then AF is an output argument and on exit
+*          returns the factors L and U from the factorization A = P*L*U
+*          of the equilibrated matrix A (see the description of A for
+*          the form of the equilibrated matrix).
+*
+*  LDAF    (input) INTEGER
+*          The leading dimension of the array AF.  LDAF >= max(1,N).
+*
+*  IPIV    (input or output) INTEGER array, dimension (N)
+*          If FACT = 'F', then IPIV is an input argument and on entry
+*          contains the pivot indices from the factorization A = P*L*U
+*          as computed by SGETRF; row i of the matrix was interchanged
+*          with row IPIV(i).
+*
+*          If FACT = 'N', then IPIV is an output argument and on exit
+*          contains the pivot indices from the factorization A = P*L*U
+*          of the original matrix A.
+*
+*          If FACT = 'E', then IPIV is an output argument and on exit
+*          contains the pivot indices from the factorization A = P*L*U
+*          of the equilibrated matrix A.
+*
+*  EQUED   (input or output) CHARACTER*1
+*          Specifies the form of equilibration that was done.
+*          = 'N':  No equilibration (always true if FACT = 'N').
+*          = 'R':  Row equilibration, i.e., A has been premultiplied by
+*                  diag(R).
+*          = 'C':  Column equilibration, i.e., A has been postmultiplied
+*                  by diag(C).
+*          = 'B':  Both row and column equilibration, i.e., A has been
+*                  replaced by diag(R) * A * diag(C).
+*          EQUED is an input argument if FACT = 'F'; otherwise, it is an
+*          output argument.
+*
+*  R       (input or output) REAL array, dimension (N)
+*          The row scale factors for A.  If EQUED = 'R' or 'B', A is
+*          multiplied on the left by diag(R); if EQUED = 'N' or 'C', R
+*          is not accessed.  R is an input argument if FACT = 'F';
+*          otherwise, R is an output argument.  If FACT = 'F' and
+*          EQUED = 'R' or 'B', each element of R must be positive.
+*
+*  C       (input or output) REAL array, dimension (N)
+*          The column scale factors for A.  If EQUED = 'C' or 'B', A is
+*          multiplied on the right by diag(C); if EQUED = 'N' or 'R', C
+*          is not accessed.  C is an input argument if FACT = 'F';
+*          otherwise, C is an output argument.  If FACT = 'F' and
+*          EQUED = 'C' or 'B', each element of C must be positive.
+*
+*  B       (input/output) REAL array, dimension (LDB,NRHS)
+*          On entry, the N-by-NRHS right hand side matrix B.
+*          On exit,
+*          if EQUED = 'N', B is not modified;
+*          if TRANS = 'N' and EQUED = 'R' or 'B', B is overwritten by
+*          diag(R)*B;
+*          if TRANS = 'T' or 'C' and EQUED = 'C' or 'B', B is
+*          overwritten by diag(C)*B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  X       (output) REAL array, dimension (LDX,NRHS)
+*          If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X
+*          to the original system of equations.  Note that A and B are
+*          modified on exit if EQUED .ne. 'N', and the solution to the
+*          equilibrated system is inv(diag(C))*X if TRANS = 'N' and
+*          EQUED = 'C' or 'B', or inv(diag(R))*X if TRANS = 'T' or 'C'
+*          and EQUED = 'R' or 'B'.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(1,N).
+*
+*  RCOND   (output) REAL
+*          The estimate of the reciprocal condition number of the matrix
+*          A after equilibration (if done).  If RCOND is less than the
+*          machine precision (in particular, if RCOND = 0), the matrix
+*          is singular to working precision.  This condition is
+*          indicated by a return code of INFO > 0.
+*
+*  FERR    (output) REAL array, dimension (NRHS)
+*          The estimated forward error bound for each solution vector
+*          X(j) (the j-th column of the solution matrix X).
+*          If XTRUE is the true solution corresponding to X(j), FERR(j)
+*          is an estimated upper bound for the magnitude of the largest
+*          element in (X(j) - XTRUE) divided by the magnitude of the
+*          largest element in X(j).  The estimate is as reliable as
+*          the estimate for RCOND, and is almost always a slight
+*          overestimate of the true error.
+*
+*  BERR    (output) REAL array, dimension (NRHS)
+*          The componentwise relative backward error of each solution
+*          vector X(j) (i.e., the smallest relative change in
+*          any element of A or B that makes X(j) an exact solution).
+*
+*  WORK    (workspace/output) REAL array, dimension (4*N)
+*          On exit, WORK(1) contains the reciprocal pivot growth
+*          factor norm(A)/norm(U). The "max absolute element" norm is
+*          used. If WORK(1) is much less than 1, then the stability
+*          of the LU factorization of the (equilibrated) matrix A
+*          could be poor. This also means that the solution X, condition
+*          estimator RCOND, and forward error bound FERR could be
+*          unreliable. If factorization fails with 0<INFO<=N, then
+*          WORK(1) contains the reciprocal pivot growth factor for the
+*          leading INFO columns of A.
+*
+*  IWORK   (workspace) INTEGER array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, and i is
+*                <= N:  U(i,i) is exactly zero.  The factorization has
+*                       been completed, but the factor U is exactly
+*                       singular, so the solution and error bounds
+*                       could not be computed. RCOND = 0 is returned.
+*                = N+1: U is nonsingular, but RCOND is less than machine
+*                       precision, meaning that the matrix is singular
+*                       to working precision.  Nevertheless, the
+*                       solution and error bounds are computed because
+*                       there are a number of situations where the
+*                       computed solution can be more accurate than the
+*                       value of RCOND would suggest.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            COLEQU, EQUIL, NOFACT, NOTRAN, ROWEQU
+      CHARACTER          NORM
+      INTEGER            I, INFEQU, J
+      REAL               AMAX, ANORM, BIGNUM, COLCND, RCMAX, RCMIN,
+     $                   ROWCND, RPVGRW, SMLNUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               SLAMCH, SLANGE, SLANTR
+      EXTERNAL           LSAME, SLAMCH, SLANGE, SLANTR
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SGECON, SGEEQU, SGERFS, SGETRF, SGETRS, SLACPY,
+     $                   SLAQGE, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+      NOFACT = LSAME( FACT, 'N' )
+      EQUIL = LSAME( FACT, 'E' )
+      NOTRAN = LSAME( TRANS, 'N' )
+      IF( NOFACT .OR. EQUIL ) THEN
+         EQUED = 'N'
+         ROWEQU = .FALSE.
+         COLEQU = .FALSE.
+      ELSE
+         ROWEQU = LSAME( EQUED, 'R' ) .OR. LSAME( EQUED, 'B' )
+         COLEQU = LSAME( EQUED, 'C' ) .OR. LSAME( EQUED, 'B' )
+         SMLNUM = SLAMCH( 'Safe minimum' )
+         BIGNUM = ONE / SMLNUM
+      END IF
+*
+*     Test the input parameters.
+*
+      IF( .NOT.NOFACT .AND. .NOT.EQUIL .AND. .NOT.LSAME( FACT, 'F' ) )
+     $     THEN
+         INFO = -1
+      ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT.
+     $         LSAME( TRANS, 'C' ) ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -6
+      ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN
+         INFO = -8
+      ELSE IF( LSAME( FACT, 'F' ) .AND. .NOT.
+     $         ( ROWEQU .OR. COLEQU .OR. LSAME( EQUED, 'N' ) ) ) THEN
+         INFO = -10
+      ELSE
+         IF( ROWEQU ) THEN
+            RCMIN = BIGNUM
+            RCMAX = ZERO
+            DO 10 J = 1, N
+               RCMIN = MIN( RCMIN, R( J ) )
+               RCMAX = MAX( RCMAX, R( J ) )
+   10       CONTINUE
+            IF( RCMIN.LE.ZERO ) THEN
+               INFO = -11
+            ELSE IF( N.GT.0 ) THEN
+               ROWCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM )
+            ELSE
+               ROWCND = ONE
+            END IF
+         END IF
+         IF( COLEQU .AND. INFO.EQ.0 ) THEN
+            RCMIN = BIGNUM
+            RCMAX = ZERO
+            DO 20 J = 1, N
+               RCMIN = MIN( RCMIN, C( J ) )
+               RCMAX = MAX( RCMAX, C( J ) )
+   20       CONTINUE
+            IF( RCMIN.LE.ZERO ) THEN
+               INFO = -12
+            ELSE IF( N.GT.0 ) THEN
+               COLCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM )
+            ELSE
+               COLCND = ONE
+            END IF
+         END IF
+         IF( INFO.EQ.0 ) THEN
+            IF( LDB.LT.MAX( 1, N ) ) THEN
+               INFO = -14
+            ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+               INFO = -16
+            END IF
+         END IF
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SGESVX', -INFO )
+         RETURN
+      END IF
+*
+      IF( EQUIL ) THEN
+*
+*        Compute row and column scalings to equilibrate the matrix A.
+*
+         CALL SGEEQU( N, N, A, LDA, R, C, ROWCND, COLCND, AMAX, INFEQU )
+         IF( INFEQU.EQ.0 ) THEN
+*
+*           Equilibrate the matrix.
+*
+            CALL SLAQGE( N, N, A, LDA, R, C, ROWCND, COLCND, AMAX,
+     $                   EQUED )
+            ROWEQU = LSAME( EQUED, 'R' ) .OR. LSAME( EQUED, 'B' )
+            COLEQU = LSAME( EQUED, 'C' ) .OR. LSAME( EQUED, 'B' )
+         END IF
+      END IF
+*
+*     Scale the right hand side.
+*
+      IF( NOTRAN ) THEN
+         IF( ROWEQU ) THEN
+            DO 40 J = 1, NRHS
+               DO 30 I = 1, N
+                  B( I, J ) = R( I )*B( I, J )
+   30          CONTINUE
+   40       CONTINUE
+         END IF
+      ELSE IF( COLEQU ) THEN
+         DO 60 J = 1, NRHS
+            DO 50 I = 1, N
+               B( I, J ) = C( I )*B( I, J )
+   50       CONTINUE
+   60    CONTINUE
+      END IF
+*
+      IF( NOFACT .OR. EQUIL ) THEN
+*
+*        Compute the LU factorization of A.
+*
+         CALL SLACPY( 'Full', N, N, A, LDA, AF, LDAF )
+         CALL SGETRF( N, N, AF, LDAF, IPIV, INFO )
+*
+*        Return if INFO is non-zero.
+*
+         IF( INFO.NE.0 ) THEN
+            IF( INFO.GT.0 ) THEN
+*
+*              Compute the reciprocal pivot growth factor of the
+*              leading rank-deficient INFO columns of A.
+*
+               RPVGRW = SLANTR( 'M', 'U', 'N', INFO, INFO, AF, LDAF,
+     $                  WORK )
+               IF( RPVGRW.EQ.ZERO ) THEN
+                  RPVGRW = ONE
+               ELSE
+                  RPVGRW = SLANGE( 'M', N, INFO, A, LDA, WORK ) / RPVGRW
+               END IF
+               WORK( 1 ) = RPVGRW
+               RCOND = ZERO
+            END IF
+            RETURN
+         END IF
+      END IF
+*
+*     Compute the norm of the matrix A and the
+*     reciprocal pivot growth factor RPVGRW.
+*
+      IF( NOTRAN ) THEN
+         NORM = '1'
+      ELSE
+         NORM = 'I'
+      END IF
+      ANORM = SLANGE( NORM, N, N, A, LDA, WORK )
+      RPVGRW = SLANTR( 'M', 'U', 'N', N, N, AF, LDAF, WORK )
+      IF( RPVGRW.EQ.ZERO ) THEN
+         RPVGRW = ONE
+      ELSE
+         RPVGRW = SLANGE( 'M', N, N, A, LDA, WORK ) / RPVGRW
+      END IF
+*
+*     Compute the reciprocal of the condition number of A.
+*
+      CALL SGECON( NORM, N, AF, LDAF, ANORM, RCOND, WORK, IWORK, INFO )
+*
+*     Set INFO = N+1 if the matrix is singular to working precision.
+*
+      IF( RCOND.LT.SLAMCH( 'Epsilon' ) )
+     $   INFO = N + 1
+*
+*     Compute the solution matrix X.
+*
+      CALL SLACPY( 'Full', N, NRHS, B, LDB, X, LDX )
+      CALL SGETRS( TRANS, N, NRHS, AF, LDAF, IPIV, X, LDX, INFO )
+*
+*     Use iterative refinement to improve the computed solution and
+*     compute error bounds and backward error estimates for it.
+*
+      CALL SGERFS( TRANS, N, NRHS, A, LDA, AF, LDAF, IPIV, B, LDB, X,
+     $             LDX, FERR, BERR, WORK, IWORK, INFO )
+*
+*     Transform the solution matrix X to a solution of the original
+*     system.
+*
+      IF( NOTRAN ) THEN
+         IF( COLEQU ) THEN
+            DO 80 J = 1, NRHS
+               DO 70 I = 1, N
+                  X( I, J ) = C( I )*X( I, J )
+   70          CONTINUE
+   80       CONTINUE
+            DO 90 J = 1, NRHS
+               FERR( J ) = FERR( J ) / COLCND
+   90       CONTINUE
+         END IF
+      ELSE IF( ROWEQU ) THEN
+         DO 110 J = 1, NRHS
+            DO 100 I = 1, N
+               X( I, J ) = R( I )*X( I, J )
+  100       CONTINUE
+  110    CONTINUE
+         DO 120 J = 1, NRHS
+            FERR( J ) = FERR( J ) / ROWCND
+  120    CONTINUE
+      END IF
+*
+      WORK( 1 ) = RPVGRW
+      RETURN
+*
+*     End of SGESVX
+*
+      END
diff --git a/costa/native/external/lapack/sgetc2.f b/costa/native/external/lapack/sgetc2.f
new file mode 100644
index 000000000..2456e97e4
--- /dev/null
+++ b/costa/native/external/lapack/sgetc2.f
@@ -0,0 +1,147 @@
+      SUBROUTINE SGETC2( N, A, LDA, IPIV, JPIV, INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * ), JPIV( * )
+      REAL               A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SGETC2 computes an LU factorization with complete pivoting of the
+*  n-by-n matrix A. The factorization has the form A = P * L * U * Q,
+*  where P and Q are permutation matrices, L is lower triangular with
+*  unit diagonal elements and U is upper triangular.
+*
+*  This is the Level 2 BLAS algorithm.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The order of the matrix A. N >= 0.
+*
+*  A       (input/output) REAL array, dimension (LDA, N)
+*          On entry, the n-by-n matrix A to be factored.
+*          On exit, the factors L and U from the factorization
+*          A = P*L*U*Q; the unit diagonal elements of L are not stored.
+*          If U(k, k) appears to be less than SMIN, U(k, k) is given the
+*          value of SMIN, i.e., giving a nonsingular perturbed system.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  IPIV    (output) INTEGER array, dimension(N).
+*          The pivot indices; for 1 <= i <= N, row i of the
+*          matrix has been interchanged with row IPIV(i).
+*
+*  JPIV    (output) INTEGER array, dimension(N).
+*          The pivot indices; for 1 <= j <= N, column j of the
+*          matrix has been interchanged with column JPIV(j).
+*
+*  INFO    (output) INTEGER
+*           = 0: successful exit
+*           > 0: if INFO = k, U(k, k) is likely to produce owerflow if
+*                we try to solve for x in Ax = b. So U is perturbed to
+*                avoid the overflow.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Bo Kagstrom and Peter Poromaa, Department of Computing Science,
+*     Umea University, S-901 87 Umea, Sweden.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, IP, IPV, J, JP, JPV
+      REAL               BIGNUM, EPS, SMIN, SMLNUM, XMAX
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SGER, SLABAD, SSWAP
+*     ..
+*     .. External Functions ..
+      REAL               SLAMCH
+      EXTERNAL           SLAMCH
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Set constants to control overflow
+*
+      INFO = 0
+      EPS = SLAMCH( 'P' )
+      SMLNUM = SLAMCH( 'S' ) / EPS
+      BIGNUM = ONE / SMLNUM
+      CALL SLABAD( SMLNUM, BIGNUM )
+*
+*     Factorize A using complete pivoting.
+*     Set pivots less than SMIN to SMIN.
+*
+      DO 40 I = 1, N - 1
+*
+*        Find max element in matrix A
+*
+         XMAX = ZERO
+         DO 20 IP = I, N
+            DO 10 JP = I, N
+               IF( ABS( A( IP, JP ) ).GE.XMAX ) THEN
+                  XMAX = ABS( A( IP, JP ) )
+                  IPV = IP
+                  JPV = JP
+               END IF
+   10       CONTINUE
+   20    CONTINUE
+         IF( I.EQ.1 )
+     $      SMIN = MAX( EPS*XMAX, SMLNUM )
+*
+*        Swap rows
+*
+         IF( IPV.NE.I )
+     $      CALL SSWAP( N, A( IPV, 1 ), LDA, A( I, 1 ), LDA )
+         IPIV( I ) = IPV
+*
+*        Swap columns
+*
+         IF( JPV.NE.I )
+     $      CALL SSWAP( N, A( 1, JPV ), 1, A( 1, I ), 1 )
+         JPIV( I ) = JPV
+*
+*        Check for singularity
+*
+         IF( ABS( A( I, I ) ).LT.SMIN ) THEN
+            INFO = I
+            A( I, I ) = SMIN
+         END IF
+         DO 30 J = I + 1, N
+            A( J, I ) = A( J, I ) / A( I, I )
+   30    CONTINUE
+         CALL SGER( N-I, N-I, -ONE, A( I+1, I ), 1, A( I, I+1 ), LDA,
+     $              A( I+1, I+1 ), LDA )
+   40 CONTINUE
+*
+      IF( ABS( A( N, N ) ).LT.SMIN ) THEN
+         INFO = N
+         A( N, N ) = SMIN
+      END IF
+*
+      RETURN
+*
+*     End of SGETC2
+*
+      END
diff --git a/costa/native/external/lapack/sgetf2.f b/costa/native/external/lapack/sgetf2.f
new file mode 100644
index 000000000..ff641eef7
--- /dev/null
+++ b/costa/native/external/lapack/sgetf2.f
@@ -0,0 +1,135 @@
+      SUBROUTINE SGETF2( M, N, A, LDA, IPIV, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1992
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, M, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      REAL               A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SGETF2 computes an LU factorization of a general m-by-n matrix A
+*  using partial pivoting with row interchanges.
+*
+*  The factorization has the form
+*     A = P * L * U
+*  where P is a permutation matrix, L is lower triangular with unit
+*  diagonal elements (lower trapezoidal if m > n), and U is upper
+*  triangular (upper trapezoidal if m < n).
+*
+*  This is the right-looking Level 2 BLAS version of the algorithm.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  A       (input/output) REAL array, dimension (LDA,N)
+*          On entry, the m by n matrix to be factored.
+*          On exit, the factors L and U from the factorization
+*          A = P*L*U; the unit diagonal elements of L are not stored.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  IPIV    (output) INTEGER array, dimension (min(M,N))
+*          The pivot indices; for 1 <= i <= min(M,N), row i of the
+*          matrix was interchanged with row IPIV(i).
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -k, the k-th argument had an illegal value
+*          > 0: if INFO = k, U(k,k) is exactly zero. The factorization
+*               has been completed, but the factor U is exactly
+*               singular, and division by zero will occur if it is used
+*               to solve a system of equations.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            J, JP
+*     ..
+*     .. External Functions ..
+      INTEGER            ISAMAX
+      EXTERNAL           ISAMAX
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SGER, SSCAL, SSWAP, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SGETF2', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 )
+     $   RETURN
+*
+      DO 10 J = 1, MIN( M, N )
+*
+*        Find pivot and test for singularity.
+*
+         JP = J - 1 + ISAMAX( M-J+1, A( J, J ), 1 )
+         IPIV( J ) = JP
+         IF( A( JP, J ).NE.ZERO ) THEN
+*
+*           Apply the interchange to columns 1:N.
+*
+            IF( JP.NE.J )
+     $         CALL SSWAP( N, A( J, 1 ), LDA, A( JP, 1 ), LDA )
+*
+*           Compute elements J+1:M of J-th column.
+*
+            IF( J.LT.M )
+     $         CALL SSCAL( M-J, ONE / A( J, J ), A( J+1, J ), 1 )
+*
+         ELSE IF( INFO.EQ.0 ) THEN
+*
+            INFO = J
+         END IF
+*
+         IF( J.LT.MIN( M, N ) ) THEN
+*
+*           Update trailing submatrix.
+*
+            CALL SGER( M-J, N-J, -ONE, A( J+1, J ), 1, A( J, J+1 ), LDA,
+     $                 A( J+1, J+1 ), LDA )
+         END IF
+   10 CONTINUE
+      RETURN
+*
+*     End of SGETF2
+*
+      END
diff --git a/costa/native/external/lapack/sgetrf.f b/costa/native/external/lapack/sgetrf.f
new file mode 100644
index 000000000..577ef21d9
--- /dev/null
+++ b/costa/native/external/lapack/sgetrf.f
@@ -0,0 +1,160 @@
+      SUBROUTINE SGETRF( M, N, A, LDA, IPIV, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993 
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, M, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      REAL               A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SGETRF computes an LU factorization of a general M-by-N matrix A
+*  using partial pivoting with row interchanges.
+*
+*  The factorization has the form
+*     A = P * L * U
+*  where P is a permutation matrix, L is lower triangular with unit
+*  diagonal elements (lower trapezoidal if m > n), and U is upper
+*  triangular (upper trapezoidal if m < n).
+*
+*  This is the right-looking Level 3 BLAS version of the algorithm.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  A       (input/output) REAL array, dimension (LDA,N)
+*          On entry, the M-by-N matrix to be factored.
+*          On exit, the factors L and U from the factorization
+*          A = P*L*U; the unit diagonal elements of L are not stored.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  IPIV    (output) INTEGER array, dimension (min(M,N))
+*          The pivot indices; for 1 <= i <= min(M,N), row i of the
+*          matrix was interchanged with row IPIV(i).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, U(i,i) is exactly zero. The factorization
+*                has been completed, but the factor U is exactly
+*                singular, and division by zero will occur if it is used
+*                to solve a system of equations.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE
+      PARAMETER          ( ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, IINFO, J, JB, NB
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SGEMM, SGETF2, SLASWP, STRSM, XERBLA
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SGETRF', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 )
+     $   RETURN
+*
+*     Determine the block size for this environment.
+*
+      NB = ILAENV( 1, 'SGETRF', ' ', M, N, -1, -1 )
+      IF( NB.LE.1 .OR. NB.GE.MIN( M, N ) ) THEN
+*
+*        Use unblocked code.
+*
+         CALL SGETF2( M, N, A, LDA, IPIV, INFO )
+      ELSE
+*
+*        Use blocked code.
+*
+         DO 20 J = 1, MIN( M, N ), NB
+            JB = MIN( MIN( M, N )-J+1, NB )
+*
+*           Factor diagonal and subdiagonal blocks and test for exact
+*           singularity.
+*
+            CALL SGETF2( M-J+1, JB, A( J, J ), LDA, IPIV( J ), IINFO )
+*
+*           Adjust INFO and the pivot indices.
+*
+            IF( INFO.EQ.0 .AND. IINFO.GT.0 )
+     $         INFO = IINFO + J - 1
+            DO 10 I = J, MIN( M, J+JB-1 )
+               IPIV( I ) = J - 1 + IPIV( I )
+   10       CONTINUE
+*
+*           Apply interchanges to columns 1:J-1.
+*
+            CALL SLASWP( J-1, A, LDA, J, J+JB-1, IPIV, 1 )
+*
+            IF( J+JB.LE.N ) THEN
+*
+*              Apply interchanges to columns J+JB:N.
+*
+               CALL SLASWP( N-J-JB+1, A( 1, J+JB ), LDA, J, J+JB-1,
+     $                      IPIV, 1 )
+*
+*              Compute block row of U.
+*
+               CALL STRSM( 'Left', 'Lower', 'No transpose', 'Unit', JB,
+     $                     N-J-JB+1, ONE, A( J, J ), LDA, A( J, J+JB ),
+     $                     LDA )
+               IF( J+JB.LE.M ) THEN
+*
+*                 Update trailing submatrix.
+*
+                  CALL SGEMM( 'No transpose', 'No transpose', M-J-JB+1,
+     $                        N-J-JB+1, JB, -ONE, A( J+JB, J ), LDA,
+     $                        A( J, J+JB ), LDA, ONE, A( J+JB, J+JB ),
+     $                        LDA )
+               END IF
+            END IF
+   20    CONTINUE
+      END IF
+      RETURN
+*
+*     End of SGETRF
+*
+      END
diff --git a/costa/native/external/lapack/sgetri.f b/costa/native/external/lapack/sgetri.f
new file mode 100644
index 000000000..ec5932f16
--- /dev/null
+++ b/costa/native/external/lapack/sgetri.f
@@ -0,0 +1,193 @@
+      SUBROUTINE SGETRI( N, A, LDA, IPIV, WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, LWORK, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      REAL               A( LDA, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SGETRI computes the inverse of a matrix using the LU factorization
+*  computed by SGETRF.
+*
+*  This method inverts U and then computes inv(A) by solving the system
+*  inv(A)*L = inv(U) for inv(A).
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input/output) REAL array, dimension (LDA,N)
+*          On entry, the factors L and U from the factorization
+*          A = P*L*U as computed by SGETRF.
+*          On exit, if INFO = 0, the inverse of the original matrix A.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  IPIV    (input) INTEGER array, dimension (N)
+*          The pivot indices from SGETRF; for 1<=i<=N, row i of the
+*          matrix was interchanged with row IPIV(i).
+*
+*  WORK    (workspace/output) REAL array, dimension (LWORK)
+*          On exit, if INFO=0, then WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.  LWORK >= max(1,N).
+*          For optimal performance LWORK >= N*NB, where NB is
+*          the optimal blocksize returned by ILAENV.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, U(i,i) is exactly zero; the matrix is
+*                singular and its inverse could not be computed.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            I, IWS, J, JB, JJ, JP, LDWORK, LWKOPT, NB,
+     $                   NBMIN, NN
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SGEMM, SGEMV, SSWAP, STRSM, STRTRI, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      NB = ILAENV( 1, 'SGETRI', ' ', N, -1, -1, -1 )
+      LWKOPT = N*NB
+      WORK( 1 ) = LWKOPT
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( N.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -3
+      ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN
+         INFO = -6
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SGETRI', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Form inv(U).  If INFO > 0 from STRTRI, then U is singular,
+*     and the inverse is not computed.
+*
+      CALL STRTRI( 'Upper', 'Non-unit', N, A, LDA, INFO )
+      IF( INFO.GT.0 )
+     $   RETURN
+*
+      NBMIN = 2
+      LDWORK = N
+      IF( NB.GT.1 .AND. NB.LT.N ) THEN
+         IWS = MAX( LDWORK*NB, 1 )
+         IF( LWORK.LT.IWS ) THEN
+            NB = LWORK / LDWORK
+            NBMIN = MAX( 2, ILAENV( 2, 'SGETRI', ' ', N, -1, -1, -1 ) )
+         END IF
+      ELSE
+         IWS = N
+      END IF
+*
+*     Solve the equation inv(A)*L = inv(U) for inv(A).
+*
+      IF( NB.LT.NBMIN .OR. NB.GE.N ) THEN
+*
+*        Use unblocked code.
+*
+         DO 20 J = N, 1, -1
+*
+*           Copy current column of L to WORK and replace with zeros.
+*
+            DO 10 I = J + 1, N
+               WORK( I ) = A( I, J )
+               A( I, J ) = ZERO
+   10       CONTINUE
+*
+*           Compute current column of inv(A).
+*
+            IF( J.LT.N )
+     $         CALL SGEMV( 'No transpose', N, N-J, -ONE, A( 1, J+1 ),
+     $                     LDA, WORK( J+1 ), 1, ONE, A( 1, J ), 1 )
+   20    CONTINUE
+      ELSE
+*
+*        Use blocked code.
+*
+         NN = ( ( N-1 ) / NB )*NB + 1
+         DO 50 J = NN, 1, -NB
+            JB = MIN( NB, N-J+1 )
+*
+*           Copy current block column of L to WORK and replace with
+*           zeros.
+*
+            DO 40 JJ = J, J + JB - 1
+               DO 30 I = JJ + 1, N
+                  WORK( I+( JJ-J )*LDWORK ) = A( I, JJ )
+                  A( I, JJ ) = ZERO
+   30          CONTINUE
+   40       CONTINUE
+*
+*           Compute current block column of inv(A).
+*
+            IF( J+JB.LE.N )
+     $         CALL SGEMM( 'No transpose', 'No transpose', N, JB,
+     $                     N-J-JB+1, -ONE, A( 1, J+JB ), LDA,
+     $                     WORK( J+JB ), LDWORK, ONE, A( 1, J ), LDA )
+            CALL STRSM( 'Right', 'Lower', 'No transpose', 'Unit', N, JB,
+     $                  ONE, WORK( J ), LDWORK, A( 1, J ), LDA )
+   50    CONTINUE
+      END IF
+*
+*     Apply column interchanges.
+*
+      DO 60 J = N - 1, 1, -1
+         JP = IPIV( J )
+         IF( JP.NE.J )
+     $      CALL SSWAP( N, A( 1, J ), 1, A( 1, JP ), 1 )
+   60 CONTINUE
+*
+      WORK( 1 ) = IWS
+      RETURN
+*
+*     End of SGETRI
+*
+      END
diff --git a/costa/native/external/lapack/sgetrs.f b/costa/native/external/lapack/sgetrs.f
new file mode 100644
index 000000000..e04d8c20c
--- /dev/null
+++ b/costa/native/external/lapack/sgetrs.f
@@ -0,0 +1,150 @@
+      SUBROUTINE SGETRS( TRANS, N, NRHS, A, LDA, IPIV, B, LDB, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993 
+*
+*     .. Scalar Arguments ..
+      CHARACTER          TRANS
+      INTEGER            INFO, LDA, LDB, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      REAL               A( LDA, * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SGETRS solves a system of linear equations
+*     A * X = B  or  A' * X = B
+*  with a general N-by-N matrix A using the LU factorization computed
+*  by SGETRF.
+*
+*  Arguments
+*  =========
+*
+*  TRANS   (input) CHARACTER*1
+*          Specifies the form of the system of equations:
+*          = 'N':  A * X = B  (No transpose)
+*          = 'T':  A'* X = B  (Transpose)
+*          = 'C':  A'* X = B  (Conjugate transpose = Transpose)
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  A       (input) REAL array, dimension (LDA,N)
+*          The factors L and U from the factorization A = P*L*U
+*          as computed by SGETRF.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  IPIV    (input) INTEGER array, dimension (N)
+*          The pivot indices from SGETRF; for 1<=i<=N, row i of the
+*          matrix was interchanged with row IPIV(i).
+*
+*  B       (input/output) REAL array, dimension (LDB,NRHS)
+*          On entry, the right hand side matrix B.
+*          On exit, the solution matrix X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE
+      PARAMETER          ( ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOTRAN
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLASWP, STRSM, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      NOTRAN = LSAME( TRANS, 'N' )
+      IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT.
+     $    LSAME( TRANS, 'C' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -8
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SGETRS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 )
+     $   RETURN
+*
+      IF( NOTRAN ) THEN
+*
+*        Solve A * X = B.
+*
+*        Apply row interchanges to the right hand sides.
+*
+         CALL SLASWP( NRHS, B, LDB, 1, N, IPIV, 1 )
+*
+*        Solve L*X = B, overwriting B with X.
+*
+         CALL STRSM( 'Left', 'Lower', 'No transpose', 'Unit', N, NRHS,
+     $               ONE, A, LDA, B, LDB )
+*
+*        Solve U*X = B, overwriting B with X.
+*
+         CALL STRSM( 'Left', 'Upper', 'No transpose', 'Non-unit', N,
+     $               NRHS, ONE, A, LDA, B, LDB )
+      ELSE
+*
+*        Solve A' * X = B.
+*
+*        Solve U'*X = B, overwriting B with X.
+*
+         CALL STRSM( 'Left', 'Upper', 'Transpose', 'Non-unit', N, NRHS,
+     $               ONE, A, LDA, B, LDB )
+*
+*        Solve L'*X = B, overwriting B with X.
+*
+         CALL STRSM( 'Left', 'Lower', 'Transpose', 'Unit', N, NRHS, ONE,
+     $               A, LDA, B, LDB )
+*
+*        Apply row interchanges to the solution vectors.
+*
+         CALL SLASWP( NRHS, B, LDB, 1, N, IPIV, -1 )
+      END IF
+*
+      RETURN
+*
+*     End of SGETRS
+*
+      END
diff --git a/costa/native/external/lapack/sggbak.f b/costa/native/external/lapack/sggbak.f
new file mode 100644
index 000000000..3d717752e
--- /dev/null
+++ b/costa/native/external/lapack/sggbak.f
@@ -0,0 +1,216 @@
+      SUBROUTINE SGGBAK( JOB, SIDE, N, ILO, IHI, LSCALE, RSCALE, M, V,
+     $                   LDV, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOB, SIDE
+      INTEGER            IHI, ILO, INFO, LDV, M, N
+*     ..
+*     .. Array Arguments ..
+      REAL               LSCALE( * ), RSCALE( * ), V( LDV, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SGGBAK forms the right or left eigenvectors of a real generalized
+*  eigenvalue problem A*x = lambda*B*x, by backward transformation on
+*  the computed eigenvectors of the balanced pair of matrices output by
+*  SGGBAL.
+*
+*  Arguments
+*  =========
+*
+*  JOB     (input) CHARACTER*1
+*          Specifies the type of backward transformation required:
+*          = 'N':  do nothing, return immediately;
+*          = 'P':  do backward transformation for permutation only;
+*          = 'S':  do backward transformation for scaling only;
+*          = 'B':  do backward transformations for both permutation and
+*                  scaling.
+*          JOB must be the same as the argument JOB supplied to SGGBAL.
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'R':  V contains right eigenvectors;
+*          = 'L':  V contains left eigenvectors.
+*
+*  N       (input) INTEGER
+*          The number of rows of the matrix V.  N >= 0.
+*
+*  ILO     (input) INTEGER
+*  IHI     (input) INTEGER
+*          The integers ILO and IHI determined by SGGBAL.
+*          1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
+*
+*  LSCALE  (input) REAL array, dimension (N)
+*          Details of the permutations and/or scaling factors applied
+*          to the left side of A and B, as returned by SGGBAL.
+*
+*  RSCALE  (input) REAL array, dimension (N)
+*          Details of the permutations and/or scaling factors applied
+*          to the right side of A and B, as returned by SGGBAL.
+*
+*  M       (input) INTEGER
+*          The number of columns of the matrix V.  M >= 0.
+*
+*  V       (input/output) REAL array, dimension (LDV,M)
+*          On entry, the matrix of right or left eigenvectors to be
+*          transformed, as returned by STGEVC.
+*          On exit, V is overwritten by the transformed eigenvectors.
+*
+*  LDV     (input) INTEGER
+*          The leading dimension of the matrix V. LDV >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*
+*  Further Details
+*  ===============
+*
+*  See R.C. Ward, Balancing the generalized eigenvalue problem,
+*                 SIAM J. Sci. Stat. Comp. 2 (1981), 141-152.
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            LEFTV, RIGHTV
+      INTEGER            I, K
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SSCAL, SSWAP, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters
+*
+      RIGHTV = LSAME( SIDE, 'R' )
+      LEFTV = LSAME( SIDE, 'L' )
+*
+      INFO = 0
+      IF( .NOT.LSAME( JOB, 'N' ) .AND. .NOT.LSAME( JOB, 'P' ) .AND.
+     $    .NOT.LSAME( JOB, 'S' ) .AND. .NOT.LSAME( JOB, 'B' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.RIGHTV .AND. .NOT.LEFTV ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( ILO.LT.1 ) THEN
+         INFO = -4
+      ELSE IF( IHI.LT.ILO .OR. IHI.GT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -6
+      ELSE IF( LDV.LT.MAX( 1, N ) ) THEN
+         INFO = -10
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SGGBAK', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+      IF( M.EQ.0 )
+     $   RETURN
+      IF( LSAME( JOB, 'N' ) )
+     $   RETURN
+*
+      IF( ILO.EQ.IHI )
+     $   GO TO 30
+*
+*     Backward balance
+*
+      IF( LSAME( JOB, 'S' ) .OR. LSAME( JOB, 'B' ) ) THEN
+*
+*        Backward transformation on right eigenvectors
+*
+         IF( RIGHTV ) THEN
+            DO 10 I = ILO, IHI
+               CALL SSCAL( M, RSCALE( I ), V( I, 1 ), LDV )
+   10       CONTINUE
+         END IF
+*
+*        Backward transformation on left eigenvectors
+*
+         IF( LEFTV ) THEN
+            DO 20 I = ILO, IHI
+               CALL SSCAL( M, LSCALE( I ), V( I, 1 ), LDV )
+   20       CONTINUE
+         END IF
+      END IF
+*
+*     Backward permutation
+*
+   30 CONTINUE
+      IF( LSAME( JOB, 'P' ) .OR. LSAME( JOB, 'B' ) ) THEN
+*
+*        Backward permutation on right eigenvectors
+*
+         IF( RIGHTV ) THEN
+            IF( ILO.EQ.1 )
+     $         GO TO 50
+*
+            DO 40 I = ILO - 1, 1, -1
+               K = RSCALE( I )
+               IF( K.EQ.I )
+     $            GO TO 40
+               CALL SSWAP( M, V( I, 1 ), LDV, V( K, 1 ), LDV )
+   40       CONTINUE
+*
+   50       CONTINUE
+            IF( IHI.EQ.N )
+     $         GO TO 70
+            DO 60 I = IHI + 1, N
+               K = RSCALE( I )
+               IF( K.EQ.I )
+     $            GO TO 60
+               CALL SSWAP( M, V( I, 1 ), LDV, V( K, 1 ), LDV )
+   60       CONTINUE
+         END IF
+*
+*        Backward permutation on left eigenvectors
+*
+   70    CONTINUE
+         IF( LEFTV ) THEN
+            IF( ILO.EQ.1 )
+     $         GO TO 90
+            DO 80 I = ILO - 1, 1, -1
+               K = LSCALE( I )
+               IF( K.EQ.I )
+     $            GO TO 80
+               CALL SSWAP( M, V( I, 1 ), LDV, V( K, 1 ), LDV )
+   80       CONTINUE
+*
+   90       CONTINUE
+            IF( IHI.EQ.N )
+     $         GO TO 110
+            DO 100 I = IHI + 1, N
+               K = LSCALE( I )
+               IF( K.EQ.I )
+     $            GO TO 100
+               CALL SSWAP( M, V( I, 1 ), LDV, V( K, 1 ), LDV )
+  100       CONTINUE
+         END IF
+      END IF
+*
+  110 CONTINUE
+*
+      RETURN
+*
+*     End of SGGBAK
+*
+      END
diff --git a/costa/native/external/lapack/sggbal.f b/costa/native/external/lapack/sggbal.f
new file mode 100644
index 000000000..4e0df23f2
--- /dev/null
+++ b/costa/native/external/lapack/sggbal.f
@@ -0,0 +1,461 @@
+      SUBROUTINE SGGBAL( JOB, N, A, LDA, B, LDB, ILO, IHI, LSCALE,
+     $                   RSCALE, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOB
+      INTEGER            IHI, ILO, INFO, LDA, LDB, N
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), B( LDB, * ), LSCALE( * ),
+     $                   RSCALE( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SGGBAL balances a pair of general real matrices (A,B).  This
+*  involves, first, permuting A and B by similarity transformations to
+*  isolate eigenvalues in the first 1 to ILO$-$1 and last IHI+1 to N
+*  elements on the diagonal; and second, applying a diagonal similarity
+*  transformation to rows and columns ILO to IHI to make the rows
+*  and columns as close in norm as possible. Both steps are optional.
+*
+*  Balancing may reduce the 1-norm of the matrices, and improve the
+*  accuracy of the computed eigenvalues and/or eigenvectors in the
+*  generalized eigenvalue problem A*x = lambda*B*x.
+*
+*  Arguments
+*  =========
+*
+*  JOB     (input) CHARACTER*1
+*          Specifies the operations to be performed on A and B:
+*          = 'N':  none:  simply set ILO = 1, IHI = N, LSCALE(I) = 1.0
+*                  and RSCALE(I) = 1.0 for i = 1,...,N.
+*          = 'P':  permute only;
+*          = 'S':  scale only;
+*          = 'B':  both permute and scale.
+*
+*  N       (input) INTEGER
+*          The order of the matrices A and B.  N >= 0.
+*
+*  A       (input/output) REAL array, dimension (LDA,N)
+*          On entry, the input matrix A.
+*          On exit,  A is overwritten by the balanced matrix.
+*          If JOB = 'N', A is not referenced.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,N).
+*
+*  B       (input/output) REAL array, dimension (LDB,N)
+*          On entry, the input matrix B.
+*          On exit,  B is overwritten by the balanced matrix.
+*          If JOB = 'N', B is not referenced.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B. LDB >= max(1,N).
+*
+*  ILO     (output) INTEGER
+*  IHI     (output) INTEGER
+*          ILO and IHI are set to integers such that on exit
+*          A(i,j) = 0 and B(i,j) = 0 if i > j and
+*          j = 1,...,ILO-1 or i = IHI+1,...,N.
+*          If JOB = 'N' or 'S', ILO = 1 and IHI = N.
+*
+*  LSCALE  (output) REAL array, dimension (N)
+*          Details of the permutations and scaling factors applied
+*          to the left side of A and B.  If P(j) is the index of the
+*          row interchanged with row j, and D(j)
+*          is the scaling factor applied to row j, then
+*            LSCALE(j) = P(j)    for J = 1,...,ILO-1
+*                      = D(j)    for J = ILO,...,IHI
+*                      = P(j)    for J = IHI+1,...,N.
+*          The order in which the interchanges are made is N to IHI+1,
+*          then 1 to ILO-1.
+*
+*  RSCALE  (output) REAL array, dimension (N)
+*          Details of the permutations and scaling factors applied
+*          to the right side of A and B.  If P(j) is the index of the
+*          column interchanged with column j, and D(j)
+*          is the scaling factor applied to column j, then
+*            LSCALE(j) = P(j)    for J = 1,...,ILO-1
+*                      = D(j)    for J = ILO,...,IHI
+*                      = P(j)    for J = IHI+1,...,N.
+*          The order in which the interchanges are made is N to IHI+1,
+*          then 1 to ILO-1.
+*
+*  WORK    (workspace) REAL array, dimension (6*N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*
+*  Further Details
+*  ===============
+*
+*  See R.C. WARD, Balancing the generalized eigenvalue problem,
+*                 SIAM J. Sci. Stat. Comp. 2 (1981), 141-152.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, HALF, ONE
+      PARAMETER          ( ZERO = 0.0E+0, HALF = 0.5E+0, ONE = 1.0E+0 )
+      REAL               THREE, SCLFAC
+      PARAMETER          ( THREE = 3.0E+0, SCLFAC = 1.0E+1 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, ICAB, IFLOW, IP1, IR, IRAB, IT, J, JC, JP1,
+     $                   K, KOUNT, L, LCAB, LM1, LRAB, LSFMAX, LSFMIN,
+     $                   M, NR, NRP2
+      REAL               ALPHA, BASL, BETA, CAB, CMAX, COEF, COEF2,
+     $                   COEF5, COR, EW, EWC, GAMMA, PGAMMA, RAB, SFMAX,
+     $                   SFMIN, SUM, T, TA, TB, TC
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ISAMAX
+      REAL               SDOT, SLAMCH
+      EXTERNAL           LSAME, ISAMAX, SDOT, SLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SAXPY, SSCAL, SSWAP, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, INT, LOG10, MAX, MIN, REAL, SIGN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters
+*
+      INFO = 0
+      IF( .NOT.LSAME( JOB, 'N' ) .AND. .NOT.LSAME( JOB, 'P' ) .AND.
+     $    .NOT.LSAME( JOB, 'S' ) .AND. .NOT.LSAME( JOB, 'B' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -4
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SGGBAL', -INFO )
+         RETURN
+      END IF
+*
+      K = 1
+      L = N
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      IF( LSAME( JOB, 'N' ) ) THEN
+         ILO = 1
+         IHI = N
+         DO 10 I = 1, N
+            LSCALE( I ) = ONE
+            RSCALE( I ) = ONE
+   10    CONTINUE
+         RETURN
+      END IF
+*
+      IF( K.EQ.L ) THEN
+         ILO = 1
+         IHI = 1
+         LSCALE( 1 ) = ONE
+         RSCALE( 1 ) = ONE
+         RETURN
+      END IF
+*
+      IF( LSAME( JOB, 'S' ) )
+     $   GO TO 190
+*
+      GO TO 30
+*
+*     Permute the matrices A and B to isolate the eigenvalues.
+*
+*     Find row with one nonzero in columns 1 through L
+*
+   20 CONTINUE
+      L = LM1
+      IF( L.NE.1 )
+     $   GO TO 30
+*
+      RSCALE( 1 ) = 1
+      LSCALE( 1 ) = 1
+      GO TO 190
+*
+   30 CONTINUE
+      LM1 = L - 1
+      DO 80 I = L, 1, -1
+         DO 40 J = 1, LM1
+            JP1 = J + 1
+            IF( A( I, J ).NE.ZERO .OR. B( I, J ).NE.ZERO )
+     $         GO TO 50
+   40    CONTINUE
+         J = L
+         GO TO 70
+*
+   50    CONTINUE
+         DO 60 J = JP1, L
+            IF( A( I, J ).NE.ZERO .OR. B( I, J ).NE.ZERO )
+     $         GO TO 80
+   60    CONTINUE
+         J = JP1 - 1
+*
+   70    CONTINUE
+         M = L
+         IFLOW = 1
+         GO TO 160
+   80 CONTINUE
+      GO TO 100
+*
+*     Find column with one nonzero in rows K through N
+*
+   90 CONTINUE
+      K = K + 1
+*
+  100 CONTINUE
+      DO 150 J = K, L
+         DO 110 I = K, LM1
+            IP1 = I + 1
+            IF( A( I, J ).NE.ZERO .OR. B( I, J ).NE.ZERO )
+     $         GO TO 120
+  110    CONTINUE
+         I = L
+         GO TO 140
+  120    CONTINUE
+         DO 130 I = IP1, L
+            IF( A( I, J ).NE.ZERO .OR. B( I, J ).NE.ZERO )
+     $         GO TO 150
+  130    CONTINUE
+         I = IP1 - 1
+  140    CONTINUE
+         M = K
+         IFLOW = 2
+         GO TO 160
+  150 CONTINUE
+      GO TO 190
+*
+*     Permute rows M and I
+*
+  160 CONTINUE
+      LSCALE( M ) = I
+      IF( I.EQ.M )
+     $   GO TO 170
+      CALL SSWAP( N-K+1, A( I, K ), LDA, A( M, K ), LDA )
+      CALL SSWAP( N-K+1, B( I, K ), LDB, B( M, K ), LDB )
+*
+*     Permute columns M and J
+*
+  170 CONTINUE
+      RSCALE( M ) = J
+      IF( J.EQ.M )
+     $   GO TO 180
+      CALL SSWAP( L, A( 1, J ), 1, A( 1, M ), 1 )
+      CALL SSWAP( L, B( 1, J ), 1, B( 1, M ), 1 )
+*
+  180 CONTINUE
+      GO TO ( 20, 90 )IFLOW
+*
+  190 CONTINUE
+      ILO = K
+      IHI = L
+*
+      IF( ILO.EQ.IHI )
+     $   RETURN
+*
+      IF( LSAME( JOB, 'P' ) )
+     $   RETURN
+*
+*     Balance the submatrix in rows ILO to IHI.
+*
+      NR = IHI - ILO + 1
+      DO 200 I = ILO, IHI
+         RSCALE( I ) = ZERO
+         LSCALE( I ) = ZERO
+*
+         WORK( I ) = ZERO
+         WORK( I+N ) = ZERO
+         WORK( I+2*N ) = ZERO
+         WORK( I+3*N ) = ZERO
+         WORK( I+4*N ) = ZERO
+         WORK( I+5*N ) = ZERO
+  200 CONTINUE
+*
+*     Compute right side vector in resulting linear equations
+*
+      BASL = LOG10( SCLFAC )
+      DO 240 I = ILO, IHI
+         DO 230 J = ILO, IHI
+            TB = B( I, J )
+            TA = A( I, J )
+            IF( TA.EQ.ZERO )
+     $         GO TO 210
+            TA = LOG10( ABS( TA ) ) / BASL
+  210       CONTINUE
+            IF( TB.EQ.ZERO )
+     $         GO TO 220
+            TB = LOG10( ABS( TB ) ) / BASL
+  220       CONTINUE
+            WORK( I+4*N ) = WORK( I+4*N ) - TA - TB
+            WORK( J+5*N ) = WORK( J+5*N ) - TA - TB
+  230    CONTINUE
+  240 CONTINUE
+*
+      COEF = ONE / REAL( 2*NR )
+      COEF2 = COEF*COEF
+      COEF5 = HALF*COEF2
+      NRP2 = NR + 2
+      BETA = ZERO
+      IT = 1
+*
+*     Start generalized conjugate gradient iteration
+*
+  250 CONTINUE
+*
+      GAMMA = SDOT( NR, WORK( ILO+4*N ), 1, WORK( ILO+4*N ), 1 ) +
+     $        SDOT( NR, WORK( ILO+5*N ), 1, WORK( ILO+5*N ), 1 )
+*
+      EW = ZERO
+      EWC = ZERO
+      DO 260 I = ILO, IHI
+         EW = EW + WORK( I+4*N )
+         EWC = EWC + WORK( I+5*N )
+  260 CONTINUE
+*
+      GAMMA = COEF*GAMMA - COEF2*( EW**2+EWC**2 ) - COEF5*( EW-EWC )**2
+      IF( GAMMA.EQ.ZERO )
+     $   GO TO 350
+      IF( IT.NE.1 )
+     $   BETA = GAMMA / PGAMMA
+      T = COEF5*( EWC-THREE*EW )
+      TC = COEF5*( EW-THREE*EWC )
+*
+      CALL SSCAL( NR, BETA, WORK( ILO ), 1 )
+      CALL SSCAL( NR, BETA, WORK( ILO+N ), 1 )
+*
+      CALL SAXPY( NR, COEF, WORK( ILO+4*N ), 1, WORK( ILO+N ), 1 )
+      CALL SAXPY( NR, COEF, WORK( ILO+5*N ), 1, WORK( ILO ), 1 )
+*
+      DO 270 I = ILO, IHI
+         WORK( I ) = WORK( I ) + TC
+         WORK( I+N ) = WORK( I+N ) + T
+  270 CONTINUE
+*
+*     Apply matrix to vector
+*
+      DO 300 I = ILO, IHI
+         KOUNT = 0
+         SUM = ZERO
+         DO 290 J = ILO, IHI
+            IF( A( I, J ).EQ.ZERO )
+     $         GO TO 280
+            KOUNT = KOUNT + 1
+            SUM = SUM + WORK( J )
+  280       CONTINUE
+            IF( B( I, J ).EQ.ZERO )
+     $         GO TO 290
+            KOUNT = KOUNT + 1
+            SUM = SUM + WORK( J )
+  290    CONTINUE
+         WORK( I+2*N ) = REAL( KOUNT )*WORK( I+N ) + SUM
+  300 CONTINUE
+*
+      DO 330 J = ILO, IHI
+         KOUNT = 0
+         SUM = ZERO
+         DO 320 I = ILO, IHI
+            IF( A( I, J ).EQ.ZERO )
+     $         GO TO 310
+            KOUNT = KOUNT + 1
+            SUM = SUM + WORK( I+N )
+  310       CONTINUE
+            IF( B( I, J ).EQ.ZERO )
+     $         GO TO 320
+            KOUNT = KOUNT + 1
+            SUM = SUM + WORK( I+N )
+  320    CONTINUE
+         WORK( J+3*N ) = REAL( KOUNT )*WORK( J ) + SUM
+  330 CONTINUE
+*
+      SUM = SDOT( NR, WORK( ILO+N ), 1, WORK( ILO+2*N ), 1 ) +
+     $      SDOT( NR, WORK( ILO ), 1, WORK( ILO+3*N ), 1 )
+      ALPHA = GAMMA / SUM
+*
+*     Determine correction to current iteration
+*
+      CMAX = ZERO
+      DO 340 I = ILO, IHI
+         COR = ALPHA*WORK( I+N )
+         IF( ABS( COR ).GT.CMAX )
+     $      CMAX = ABS( COR )
+         LSCALE( I ) = LSCALE( I ) + COR
+         COR = ALPHA*WORK( I )
+         IF( ABS( COR ).GT.CMAX )
+     $      CMAX = ABS( COR )
+         RSCALE( I ) = RSCALE( I ) + COR
+  340 CONTINUE
+      IF( CMAX.LT.HALF )
+     $   GO TO 350
+*
+      CALL SAXPY( NR, -ALPHA, WORK( ILO+2*N ), 1, WORK( ILO+4*N ), 1 )
+      CALL SAXPY( NR, -ALPHA, WORK( ILO+3*N ), 1, WORK( ILO+5*N ), 1 )
+*
+      PGAMMA = GAMMA
+      IT = IT + 1
+      IF( IT.LE.NRP2 )
+     $   GO TO 250
+*
+*     End generalized conjugate gradient iteration
+*
+  350 CONTINUE
+      SFMIN = SLAMCH( 'S' )
+      SFMAX = ONE / SFMIN
+      LSFMIN = INT( LOG10( SFMIN ) / BASL+ONE )
+      LSFMAX = INT( LOG10( SFMAX ) / BASL )
+      DO 360 I = ILO, IHI
+         IRAB = ISAMAX( N-ILO+1, A( I, ILO ), LDA )
+         RAB = ABS( A( I, IRAB+ILO-1 ) )
+         IRAB = ISAMAX( N-ILO+1, B( I, ILO ), LDA )
+         RAB = MAX( RAB, ABS( B( I, IRAB+ILO-1 ) ) )
+         LRAB = INT( LOG10( RAB+SFMIN ) / BASL+ONE )
+         IR = LSCALE( I ) + SIGN( HALF, LSCALE( I ) )
+         IR = MIN( MAX( IR, LSFMIN ), LSFMAX, LSFMAX-LRAB )
+         LSCALE( I ) = SCLFAC**IR
+         ICAB = ISAMAX( IHI, A( 1, I ), 1 )
+         CAB = ABS( A( ICAB, I ) )
+         ICAB = ISAMAX( IHI, B( 1, I ), 1 )
+         CAB = MAX( CAB, ABS( B( ICAB, I ) ) )
+         LCAB = INT( LOG10( CAB+SFMIN ) / BASL+ONE )
+         JC = RSCALE( I ) + SIGN( HALF, RSCALE( I ) )
+         JC = MIN( MAX( JC, LSFMIN ), LSFMAX, LSFMAX-LCAB )
+         RSCALE( I ) = SCLFAC**JC
+  360 CONTINUE
+*
+*     Row scaling of matrices A and B
+*
+      DO 370 I = ILO, IHI
+         CALL SSCAL( N-ILO+1, LSCALE( I ), A( I, ILO ), LDA )
+         CALL SSCAL( N-ILO+1, LSCALE( I ), B( I, ILO ), LDB )
+  370 CONTINUE
+*
+*     Column scaling of matrices A and B
+*
+      DO 380 J = ILO, IHI
+         CALL SSCAL( IHI, RSCALE( J ), A( 1, J ), 1 )
+         CALL SSCAL( IHI, RSCALE( J ), B( 1, J ), 1 )
+  380 CONTINUE
+*
+      RETURN
+*
+*     End of SGGBAL
+*
+      END
diff --git a/costa/native/external/lapack/sgges.f b/costa/native/external/lapack/sgges.f
new file mode 100644
index 000000000..727b9da90
--- /dev/null
+++ b/costa/native/external/lapack/sgges.f
@@ -0,0 +1,548 @@
+      SUBROUTINE SGGES( JOBVSL, JOBVSR, SORT, SELCTG, N, A, LDA, B, LDB,
+     $                  SDIM, ALPHAR, ALPHAI, BETA, VSL, LDVSL, VSR,
+     $                  LDVSR, WORK, LWORK, BWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBVSL, JOBVSR, SORT
+      INTEGER            INFO, LDA, LDB, LDVSL, LDVSR, LWORK, N, SDIM
+*     ..
+*     .. Array Arguments ..
+      LOGICAL            BWORK( * )
+      REAL               A( LDA, * ), ALPHAI( * ), ALPHAR( * ),
+     $                   B( LDB, * ), BETA( * ), VSL( LDVSL, * ),
+     $                   VSR( LDVSR, * ), WORK( * )
+*     ..
+*     .. Function Arguments ..
+      LOGICAL            SELCTG
+      EXTERNAL           SELCTG
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SGGES computes for a pair of N-by-N real nonsymmetric matrices (A,B),
+*  the generalized eigenvalues, the generalized real Schur form (S,T),
+*  optionally, the left and/or right matrices of Schur vectors (VSL and
+*  VSR). This gives the generalized Schur factorization
+*
+*           (A,B) = ( (VSL)*S*(VSR)**T, (VSL)*T*(VSR)**T )
+*
+*  Optionally, it also orders the eigenvalues so that a selected cluster
+*  of eigenvalues appears in the leading diagonal blocks of the upper
+*  quasi-triangular matrix S and the upper triangular matrix T.The
+*  leading columns of VSL and VSR then form an orthonormal basis for the
+*  corresponding left and right eigenspaces (deflating subspaces).
+*
+*  (If only the generalized eigenvalues are needed, use the driver
+*  SGGEV instead, which is faster.)
+*
+*  A generalized eigenvalue for a pair of matrices (A,B) is a scalar w
+*  or a ratio alpha/beta = w, such that  A - w*B is singular.  It is
+*  usually represented as the pair (alpha,beta), as there is a
+*  reasonable interpretation for beta=0 or both being zero.
+*
+*  A pair of matrices (S,T) is in generalized real Schur form if T is
+*  upper triangular with non-negative diagonal and S is block upper
+*  triangular with 1-by-1 and 2-by-2 blocks.  1-by-1 blocks correspond
+*  to real generalized eigenvalues, while 2-by-2 blocks of S will be
+*  "standardized" by making the corresponding elements of T have the
+*  form:
+*          [  a  0  ]
+*          [  0  b  ]
+*
+*  and the pair of corresponding 2-by-2 blocks in S and T will have a
+*  complex conjugate pair of generalized eigenvalues.
+*
+*
+*  Arguments
+*  =========
+*
+*  JOBVSL  (input) CHARACTER*1
+*          = 'N':  do not compute the left Schur vectors;
+*          = 'V':  compute the left Schur vectors.
+*
+*  JOBVSR  (input) CHARACTER*1
+*          = 'N':  do not compute the right Schur vectors;
+*          = 'V':  compute the right Schur vectors.
+*
+*  SORT    (input) CHARACTER*1
+*          Specifies whether or not to order the eigenvalues on the
+*          diagonal of the generalized Schur form.
+*          = 'N':  Eigenvalues are not ordered;
+*          = 'S':  Eigenvalues are ordered (see SELCTG);
+*
+*  SELCTG  (input) LOGICAL FUNCTION of three REAL arguments
+*          SELCTG must be declared EXTERNAL in the calling subroutine.
+*          If SORT = 'N', SELCTG is not referenced.
+*          If SORT = 'S', SELCTG is used to select eigenvalues to sort
+*          to the top left of the Schur form.
+*          An eigenvalue (ALPHAR(j)+ALPHAI(j))/BETA(j) is selected if
+*          SELCTG(ALPHAR(j),ALPHAI(j),BETA(j)) is true; i.e. if either
+*          one of a complex conjugate pair of eigenvalues is selected,
+*          then both complex eigenvalues are selected.
+*
+*          Note that in the ill-conditioned case, a selected complex
+*          eigenvalue may no longer satisfy SELCTG(ALPHAR(j),ALPHAI(j),
+*          BETA(j)) = .TRUE. after ordering. INFO is to be set to N+2
+*          in this case.
+*
+*  N       (input) INTEGER
+*          The order of the matrices A, B, VSL, and VSR.  N >= 0.
+*
+*  A       (input/output) REAL array, dimension (LDA, N)
+*          On entry, the first of the pair of matrices.
+*          On exit, A has been overwritten by its generalized Schur
+*          form S.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of A.  LDA >= max(1,N).
+*
+*  B       (input/output) REAL array, dimension (LDB, N)
+*          On entry, the second of the pair of matrices.
+*          On exit, B has been overwritten by its generalized Schur
+*          form T.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of B.  LDB >= max(1,N).
+*
+*  SDIM    (output) INTEGER
+*          If SORT = 'N', SDIM = 0.
+*          If SORT = 'S', SDIM = number of eigenvalues (after sorting)
+*          for which SELCTG is true.  (Complex conjugate pairs for which
+*          SELCTG is true for either eigenvalue count as 2.)
+*
+*  ALPHAR  (output) REAL array, dimension (N)
+*  ALPHAI  (output) REAL array, dimension (N)
+*  BETA    (output) REAL array, dimension (N)
+*          On exit, (ALPHAR(j) + ALPHAI(j)*i)/BETA(j), j=1,...,N, will
+*          be the generalized eigenvalues.  ALPHAR(j) + ALPHAI(j)*i,
+*          and  BETA(j),j=1,...,N are the diagonals of the complex Schur
+*          form (S,T) that would result if the 2-by-2 diagonal blocks of
+*          the real Schur form of (A,B) were further reduced to
+*          triangular form using 2-by-2 complex unitary transformations.
+*          If ALPHAI(j) is zero, then the j-th eigenvalue is real; if
+*          positive, then the j-th and (j+1)-st eigenvalues are a
+*          complex conjugate pair, with ALPHAI(j+1) negative.
+*
+*          Note: the quotients ALPHAR(j)/BETA(j) and ALPHAI(j)/BETA(j)
+*          may easily over- or underflow, and BETA(j) may even be zero.
+*          Thus, the user should avoid naively computing the ratio.
+*          However, ALPHAR and ALPHAI will be always less than and
+*          usually comparable with norm(A) in magnitude, and BETA always
+*          less than and usually comparable with norm(B).
+*
+*  VSL     (output) REAL array, dimension (LDVSL,N)
+*          If JOBVSL = 'V', VSL will contain the left Schur vectors.
+*          Not referenced if JOBVSL = 'N'.
+*
+*  LDVSL   (input) INTEGER
+*          The leading dimension of the matrix VSL. LDVSL >=1, and
+*          if JOBVSL = 'V', LDVSL >= N.
+*
+*  VSR     (output) REAL array, dimension (LDVSR,N)
+*          If JOBVSR = 'V', VSR will contain the right Schur vectors.
+*          Not referenced if JOBVSR = 'N'.
+*
+*  LDVSR   (input) INTEGER
+*          The leading dimension of the matrix VSR. LDVSR >= 1, and
+*          if JOBVSR = 'V', LDVSR >= N.
+*
+*  WORK    (workspace/output) REAL array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.  LWORK >= 8*N+16.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  BWORK   (workspace) LOGICAL array, dimension (N)
+*          Not referenced if SORT = 'N'.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          = 1,...,N:
+*                The QZ iteration failed.  (A,B) are not in Schur
+*                form, but ALPHAR(j), ALPHAI(j), and BETA(j) should
+*                be correct for j=INFO+1,...,N.
+*          > N:  =N+1: other than QZ iteration failed in SHGEQZ.
+*                =N+2: after reordering, roundoff changed values of
+*                      some complex eigenvalues so that leading
+*                      eigenvalues in the Generalized Schur form no
+*                      longer satisfy SELCTG=.TRUE.  This could also
+*                      be caused due to scaling.
+*                =N+3: reordering failed in STGSEN.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            CURSL, ILASCL, ILBSCL, ILVSL, ILVSR, LASTSL,
+     $                   LQUERY, LST2SL, WANTST
+      INTEGER            I, ICOLS, IERR, IHI, IJOBVL, IJOBVR, ILEFT,
+     $                   ILO, IP, IRIGHT, IROWS, ITAU, IWRK, MAXWRK,
+     $                   MINWRK
+      REAL               ANRM, ANRMTO, BIGNUM, BNRM, BNRMTO, EPS, PVSL,
+     $                   PVSR, SAFMAX, SAFMIN, SMLNUM
+*     ..
+*     .. Local Arrays ..
+      INTEGER            IDUM( 1 )
+      REAL               DIF( 2 )
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SGEQRF, SGGBAK, SGGBAL, SGGHRD, SHGEQZ, SLABAD,
+     $                   SLACPY, SLASCL, SLASET, SORGQR, SORMQR, STGSEN,
+     $                   XERBLA
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      REAL               SLAMCH, SLANGE
+      EXTERNAL           LSAME, ILAENV, SLAMCH, SLANGE
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode the input arguments
+*
+      IF( LSAME( JOBVSL, 'N' ) ) THEN
+         IJOBVL = 1
+         ILVSL = .FALSE.
+      ELSE IF( LSAME( JOBVSL, 'V' ) ) THEN
+         IJOBVL = 2
+         ILVSL = .TRUE.
+      ELSE
+         IJOBVL = -1
+         ILVSL = .FALSE.
+      END IF
+*
+      IF( LSAME( JOBVSR, 'N' ) ) THEN
+         IJOBVR = 1
+         ILVSR = .FALSE.
+      ELSE IF( LSAME( JOBVSR, 'V' ) ) THEN
+         IJOBVR = 2
+         ILVSR = .TRUE.
+      ELSE
+         IJOBVR = -1
+         ILVSR = .FALSE.
+      END IF
+*
+      WANTST = LSAME( SORT, 'S' )
+*
+*     Test the input arguments
+*
+      INFO = 0
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( IJOBVL.LE.0 ) THEN
+         INFO = -1
+      ELSE IF( IJOBVR.LE.0 ) THEN
+         INFO = -2
+      ELSE IF( ( .NOT.WANTST ) .AND. ( .NOT.LSAME( SORT, 'N' ) ) ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -9
+      ELSE IF( LDVSL.LT.1 .OR. ( ILVSL .AND. LDVSL.LT.N ) ) THEN
+         INFO = -15
+      ELSE IF( LDVSR.LT.1 .OR. ( ILVSR .AND. LDVSR.LT.N ) ) THEN
+         INFO = -17
+      END IF
+*
+*     Compute workspace
+*      (Note: Comments in the code beginning "Workspace:" describe the
+*       minimal amount of workspace needed at that point in the code,
+*       as well as the preferred amount for good performance.
+*       NB refers to the optimal block size for the immediately
+*       following subroutine, as returned by ILAENV.)
+*
+      MINWRK = 1
+      IF( INFO.EQ.0 .AND. ( LWORK.GE.1 .OR. LQUERY ) ) THEN
+         MINWRK = 7*( N+1 ) + 16
+         MAXWRK = 7*( N+1 ) + N*ILAENV( 1, 'SGEQRF', ' ', N, 1, N, 0 ) +
+     $            16
+         IF( ILVSL ) THEN
+            MAXWRK = MAX( MAXWRK, 7*( N+1 )+N*
+     $               ILAENV( 1, 'SORGQR', ' ', N, 1, N, -1 ) )
+         END IF
+         WORK( 1 ) = MAXWRK
+      END IF
+*
+      IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY )
+     $   INFO = -19
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SGGES ', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 ) THEN
+         SDIM = 0
+         RETURN
+      END IF
+*
+*     Get machine constants
+*
+      EPS = SLAMCH( 'P' )
+      SAFMIN = SLAMCH( 'S' )
+      SAFMAX = ONE / SAFMIN
+      CALL SLABAD( SAFMIN, SAFMAX )
+      SMLNUM = SQRT( SAFMIN ) / EPS
+      BIGNUM = ONE / SMLNUM
+*
+*     Scale A if max element outside range [SMLNUM,BIGNUM]
+*
+      ANRM = SLANGE( 'M', N, N, A, LDA, WORK )
+      ILASCL = .FALSE.
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
+         ANRMTO = SMLNUM
+         ILASCL = .TRUE.
+      ELSE IF( ANRM.GT.BIGNUM ) THEN
+         ANRMTO = BIGNUM
+         ILASCL = .TRUE.
+      END IF
+      IF( ILASCL )
+     $   CALL SLASCL( 'G', 0, 0, ANRM, ANRMTO, N, N, A, LDA, IERR )
+*
+*     Scale B if max element outside range [SMLNUM,BIGNUM]
+*
+      BNRM = SLANGE( 'M', N, N, B, LDB, WORK )
+      ILBSCL = .FALSE.
+      IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN
+         BNRMTO = SMLNUM
+         ILBSCL = .TRUE.
+      ELSE IF( BNRM.GT.BIGNUM ) THEN
+         BNRMTO = BIGNUM
+         ILBSCL = .TRUE.
+      END IF
+      IF( ILBSCL )
+     $   CALL SLASCL( 'G', 0, 0, BNRM, BNRMTO, N, N, B, LDB, IERR )
+*
+*     Permute the matrix to make it more nearly triangular
+*     (Workspace: need 6*N + 2*N space for storing balancing factors)
+*
+      ILEFT = 1
+      IRIGHT = N + 1
+      IWRK = IRIGHT + N
+      CALL SGGBAL( 'P', N, A, LDA, B, LDB, ILO, IHI, WORK( ILEFT ),
+     $             WORK( IRIGHT ), WORK( IWRK ), IERR )
+*
+*     Reduce B to triangular form (QR decomposition of B)
+*     (Workspace: need N, prefer N*NB)
+*
+      IROWS = IHI + 1 - ILO
+      ICOLS = N + 1 - ILO
+      ITAU = IWRK
+      IWRK = ITAU + IROWS
+      CALL SGEQRF( IROWS, ICOLS, B( ILO, ILO ), LDB, WORK( ITAU ),
+     $             WORK( IWRK ), LWORK+1-IWRK, IERR )
+*
+*     Apply the orthogonal transformation to matrix A
+*     (Workspace: need N, prefer N*NB)
+*
+      CALL SORMQR( 'L', 'T', IROWS, ICOLS, IROWS, B( ILO, ILO ), LDB,
+     $             WORK( ITAU ), A( ILO, ILO ), LDA, WORK( IWRK ),
+     $             LWORK+1-IWRK, IERR )
+*
+*     Initialize VSL
+*     (Workspace: need N, prefer N*NB)
+*
+      IF( ILVSL ) THEN
+         CALL SLASET( 'Full', N, N, ZERO, ONE, VSL, LDVSL )
+         CALL SLACPY( 'L', IROWS-1, IROWS-1, B( ILO+1, ILO ), LDB,
+     $                VSL( ILO+1, ILO ), LDVSL )
+         CALL SORGQR( IROWS, IROWS, IROWS, VSL( ILO, ILO ), LDVSL,
+     $                WORK( ITAU ), WORK( IWRK ), LWORK+1-IWRK, IERR )
+      END IF
+*
+*     Initialize VSR
+*
+      IF( ILVSR )
+     $   CALL SLASET( 'Full', N, N, ZERO, ONE, VSR, LDVSR )
+*
+*     Reduce to generalized Hessenberg form
+*     (Workspace: none needed)
+*
+      CALL SGGHRD( JOBVSL, JOBVSR, N, ILO, IHI, A, LDA, B, LDB, VSL,
+     $             LDVSL, VSR, LDVSR, IERR )
+*
+*     Perform QZ algorithm, computing Schur vectors if desired
+*     (Workspace: need N)
+*
+      IWRK = ITAU
+      CALL SHGEQZ( 'S', JOBVSL, JOBVSR, N, ILO, IHI, A, LDA, B, LDB,
+     $             ALPHAR, ALPHAI, BETA, VSL, LDVSL, VSR, LDVSR,
+     $             WORK( IWRK ), LWORK+1-IWRK, IERR )
+      IF( IERR.NE.0 ) THEN
+         IF( IERR.GT.0 .AND. IERR.LE.N ) THEN
+            INFO = IERR
+         ELSE IF( IERR.GT.N .AND. IERR.LE.2*N ) THEN
+            INFO = IERR - N
+         ELSE
+            INFO = N + 1
+         END IF
+         GO TO 40
+      END IF
+*
+*     Sort eigenvalues ALPHA/BETA if desired
+*     (Workspace: need 4*N+16 )
+*
+      SDIM = 0
+      IF( WANTST ) THEN
+*
+*        Undo scaling on eigenvalues before SELCTGing
+*
+         IF( ILASCL ) THEN
+            CALL SLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHAR, N,
+     $                   IERR )
+            CALL SLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHAI, N,
+     $                   IERR )
+         END IF
+         IF( ILBSCL )
+     $      CALL SLASCL( 'G', 0, 0, BNRMTO, BNRM, N, 1, BETA, N, IERR )
+*
+*        Select eigenvalues
+*
+         DO 10 I = 1, N
+            BWORK( I ) = SELCTG( ALPHAR( I ), ALPHAI( I ), BETA( I ) )
+   10    CONTINUE
+*
+         CALL STGSEN( 0, ILVSL, ILVSR, BWORK, N, A, LDA, B, LDB, ALPHAR,
+     $                ALPHAI, BETA, VSL, LDVSL, VSR, LDVSR, SDIM, PVSL,
+     $                PVSR, DIF, WORK( IWRK ), LWORK-IWRK+1, IDUM, 1,
+     $                IERR )
+         IF( IERR.EQ.1 )
+     $      INFO = N + 3
+*
+      END IF
+*
+*     Apply back-permutation to VSL and VSR
+*     (Workspace: none needed)
+*
+      IF( ILVSL )
+     $   CALL SGGBAK( 'P', 'L', N, ILO, IHI, WORK( ILEFT ),
+     $                WORK( IRIGHT ), N, VSL, LDVSL, IERR )
+*
+      IF( ILVSR )
+     $   CALL SGGBAK( 'P', 'R', N, ILO, IHI, WORK( ILEFT ),
+     $                WORK( IRIGHT ), N, VSR, LDVSR, IERR )
+*
+*     Check if unscaling would cause over/underflow, if so, rescale 
+*     (ALPHAR(I),ALPHAI(I),BETA(I)) so BETA(I) is on the order of 
+*     B(I,I) and ALPHAR(I) and ALPHAI(I) are on the order of A(I,I)
+*
+      IF( ILASCL )THEN
+         DO 50 I = 1, N 
+            IF( ALPHAI( I ).NE.ZERO ) THEN 
+               IF( ( ALPHAR( I )/SAFMAX ).GT.( ANRMTO/ANRM ) .OR.
+     $             ( SAFMIN/ALPHAR( I ) ).GT.( ANRM/ANRMTO ) ) THEN
+                  WORK( 1 ) = ABS( A( I, I )/ALPHAR( I ) )
+                  BETA( I ) = BETA( I )*WORK( 1 )
+                  ALPHAR( I ) = ALPHAR( I )*WORK( 1 )
+                  ALPHAI( I ) = ALPHAI( I )*WORK( 1 )
+               ELSE IF( ( ALPHAI( I )/SAFMAX ).GT.( ANRMTO/ANRM ) .OR.         
+     $             ( SAFMIN/ALPHAI( I ) ).GT.( ANRM/ANRMTO ) ) THEN
+                  WORK( 1 ) = ABS( A( I, I+1 )/ALPHAI( I ) )
+                  BETA( I ) = BETA( I )*WORK( 1 )
+                  ALPHAR( I ) = ALPHAR( I )*WORK( 1 )
+                  ALPHAI( I ) = ALPHAI( I )*WORK( 1 )
+               END IF
+            END IF
+   50    CONTINUE
+      END IF 
+*
+      IF( ILBSCL )THEN 
+         DO 60 I = 1, N
+            IF( ALPHAI( I ).NE.ZERO ) THEN
+                IF( ( BETA( I )/SAFMAX ).GT.( BNRMTO/BNRM ) .OR.
+     $              ( SAFMIN/BETA( I ) ).GT.( BNRM/BNRMTO ) ) THEN
+                   WORK( 1 ) = ABS(B( I, I )/BETA( I ))
+                   BETA( I ) = BETA( I )*WORK( 1 )
+                   ALPHAR( I ) = ALPHAR( I )*WORK( 1 )
+                   ALPHAI( I ) = ALPHAI( I )*WORK( 1 )
+                END IF 
+             END IF
+   60    CONTINUE 
+      END IF 
+*
+*     Undo scaling
+*
+      IF( ILASCL ) THEN
+         CALL SLASCL( 'H', 0, 0, ANRMTO, ANRM, N, N, A, LDA, IERR )
+         CALL SLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHAR, N, IERR )
+         CALL SLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHAI, N, IERR )
+      END IF
+*
+      IF( ILBSCL ) THEN
+         CALL SLASCL( 'U', 0, 0, BNRMTO, BNRM, N, N, B, LDB, IERR )
+         CALL SLASCL( 'G', 0, 0, BNRMTO, BNRM, N, 1, BETA, N, IERR )
+      END IF
+*
+      IF( WANTST ) THEN
+*
+*        Check if reordering is correct
+*
+         LASTSL = .TRUE.
+         LST2SL = .TRUE.
+         SDIM = 0
+         IP = 0
+         DO 30 I = 1, N
+            CURSL = SELCTG( ALPHAR( I ), ALPHAI( I ), BETA( I ) )
+            IF( ALPHAI( I ).EQ.ZERO ) THEN
+               IF( CURSL )
+     $            SDIM = SDIM + 1
+               IP = 0
+               IF( CURSL .AND. .NOT.LASTSL )
+     $            INFO = N + 2
+            ELSE
+               IF( IP.EQ.1 ) THEN
+*
+*                 Last eigenvalue of conjugate pair
+*
+                  CURSL = CURSL .OR. LASTSL
+                  LASTSL = CURSL
+                  IF( CURSL )
+     $               SDIM = SDIM + 2
+                  IP = -1
+                  IF( CURSL .AND. .NOT.LST2SL )
+     $               INFO = N + 2
+               ELSE
+*
+*                 First eigenvalue of conjugate pair
+*
+                  IP = 1
+               END IF
+            END IF
+            LST2SL = LASTSL
+            LASTSL = CURSL
+   30    CONTINUE
+*
+      END IF
+*
+   40 CONTINUE
+*
+      WORK( 1 ) = MAXWRK
+*
+      RETURN
+*
+*     End of SGGES
+*
+      END
diff --git a/costa/native/external/lapack/sggesx.f b/costa/native/external/lapack/sggesx.f
new file mode 100644
index 000000000..e7f1a33ea
--- /dev/null
+++ b/costa/native/external/lapack/sggesx.f
@@ -0,0 +1,640 @@
+      SUBROUTINE SGGESX( JOBVSL, JOBVSR, SORT, SELCTG, SENSE, N, A, LDA,
+     $                   B, LDB, SDIM, ALPHAR, ALPHAI, BETA, VSL, LDVSL,
+     $                   VSR, LDVSR, RCONDE, RCONDV, WORK, LWORK, IWORK,
+     $                   LIWORK, BWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBVSL, JOBVSR, SENSE, SORT
+      INTEGER            INFO, LDA, LDB, LDVSL, LDVSR, LIWORK, LWORK, N,
+     $                   SDIM
+*     ..
+*     .. Array Arguments ..
+      LOGICAL            BWORK( * )
+      INTEGER            IWORK( * )
+      REAL               A( LDA, * ), ALPHAI( * ), ALPHAR( * ),
+     $                   B( LDB, * ), BETA( * ), RCONDE( 2 ),
+     $                   RCONDV( 2 ), VSL( LDVSL, * ), VSR( LDVSR, * ),
+     $                   WORK( * )
+*     ..
+*     .. Function Arguments ..
+      LOGICAL            SELCTG
+      EXTERNAL           SELCTG
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SGGESX computes for a pair of N-by-N real nonsymmetric matrices
+*  (A,B), the generalized eigenvalues, the real Schur form (S,T), and,
+*  optionally, the left and/or right matrices of Schur vectors (VSL and
+*  VSR).  This gives the generalized Schur factorization
+*
+*       (A,B) = ( (VSL) S (VSR)**T, (VSL) T (VSR)**T )
+*
+*  Optionally, it also orders the eigenvalues so that a selected cluster
+*  of eigenvalues appears in the leading diagonal blocks of the upper
+*  quasi-triangular matrix S and the upper triangular matrix T; computes
+*  a reciprocal condition number for the average of the selected
+*  eigenvalues (RCONDE); and computes a reciprocal condition number for
+*  the right and left deflating subspaces corresponding to the selected
+*  eigenvalues (RCONDV). The leading columns of VSL and VSR then form
+*  an orthonormal basis for the corresponding left and right eigenspaces
+*  (deflating subspaces).
+*
+*  A generalized eigenvalue for a pair of matrices (A,B) is a scalar w
+*  or a ratio alpha/beta = w, such that  A - w*B is singular.  It is
+*  usually represented as the pair (alpha,beta), as there is a
+*  reasonable interpretation for beta=0 or for both being zero.
+*
+*  A pair of matrices (S,T) is in generalized real Schur form if T is
+*  upper triangular with non-negative diagonal and S is block upper
+*  triangular with 1-by-1 and 2-by-2 blocks.  1-by-1 blocks correspond
+*  to real generalized eigenvalues, while 2-by-2 blocks of S will be
+*  "standardized" by making the corresponding elements of T have the
+*  form:
+*          [  a  0  ]
+*          [  0  b  ]
+*
+*  and the pair of corresponding 2-by-2 blocks in S and T will have a
+*  complex conjugate pair of generalized eigenvalues.
+*
+*
+*  Arguments
+*  =========
+*
+*  JOBVSL  (input) CHARACTER*1
+*          = 'N':  do not compute the left Schur vectors;
+*          = 'V':  compute the left Schur vectors.
+*
+*  JOBVSR  (input) CHARACTER*1
+*          = 'N':  do not compute the right Schur vectors;
+*          = 'V':  compute the right Schur vectors.
+*
+*  SORT    (input) CHARACTER*1
+*          Specifies whether or not to order the eigenvalues on the
+*          diagonal of the generalized Schur form.
+*          = 'N':  Eigenvalues are not ordered;
+*          = 'S':  Eigenvalues are ordered (see SELCTG).
+*
+*  SELCTG  (input) LOGICAL FUNCTION of three REAL arguments
+*          SELCTG must be declared EXTERNAL in the calling subroutine.
+*          If SORT = 'N', SELCTG is not referenced.
+*          If SORT = 'S', SELCTG is used to select eigenvalues to sort
+*          to the top left of the Schur form.
+*          An eigenvalue (ALPHAR(j)+ALPHAI(j))/BETA(j) is selected if
+*          SELCTG(ALPHAR(j),ALPHAI(j),BETA(j)) is true; i.e. if either
+*          one of a complex conjugate pair of eigenvalues is selected,
+*          then both complex eigenvalues are selected.
+*          Note that a selected complex eigenvalue may no longer satisfy
+*          SELCTG(ALPHAR(j),ALPHAI(j),BETA(j)) = .TRUE. after ordering,
+*          since ordering may change the value of complex eigenvalues
+*          (especially if the eigenvalue is ill-conditioned), in this
+*          case INFO is set to N+3.
+*
+*  SENSE   (input) CHARACTER
+*          Determines which reciprocal condition numbers are computed.
+*          = 'N' : None are computed;
+*          = 'E' : Computed for average of selected eigenvalues only;
+*          = 'V' : Computed for selected deflating subspaces only;
+*          = 'B' : Computed for both.
+*          If SENSE = 'E', 'V', or 'B', SORT must equal 'S'.
+*
+*  N       (input) INTEGER
+*          The order of the matrices A, B, VSL, and VSR.  N >= 0.
+*
+*  A       (input/output) REAL array, dimension (LDA, N)
+*          On entry, the first of the pair of matrices.
+*          On exit, A has been overwritten by its generalized Schur
+*          form S.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of A.  LDA >= max(1,N).
+*
+*  B       (input/output) REAL array, dimension (LDB, N)
+*          On entry, the second of the pair of matrices.
+*          On exit, B has been overwritten by its generalized Schur
+*          form T.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of B.  LDB >= max(1,N).
+*
+*  SDIM    (output) INTEGER
+*          If SORT = 'N', SDIM = 0.
+*          If SORT = 'S', SDIM = number of eigenvalues (after sorting)
+*          for which SELCTG is true.  (Complex conjugate pairs for which
+*          SELCTG is true for either eigenvalue count as 2.)
+*
+*  ALPHAR  (output) REAL array, dimension (N)
+*  ALPHAI  (output) REAL array, dimension (N)
+*  BETA    (output) REAL array, dimension (N)
+*          On exit, (ALPHAR(j) + ALPHAI(j)*i)/BETA(j), j=1,...,N, will
+*          be the generalized eigenvalues.  ALPHAR(j) + ALPHAI(j)*i
+*          and BETA(j),j=1,...,N  are the diagonals of the complex Schur
+*          form (S,T) that would result if the 2-by-2 diagonal blocks of
+*          the real Schur form of (A,B) were further reduced to
+*          triangular form using 2-by-2 complex unitary transformations.
+*          If ALPHAI(j) is zero, then the j-th eigenvalue is real; if
+*          positive, then the j-th and (j+1)-st eigenvalues are a
+*          complex conjugate pair, with ALPHAI(j+1) negative.
+*
+*          Note: the quotients ALPHAR(j)/BETA(j) and ALPHAI(j)/BETA(j)
+*          may easily over- or underflow, and BETA(j) may even be zero.
+*          Thus, the user should avoid naively computing the ratio.
+*          However, ALPHAR and ALPHAI will be always less than and
+*          usually comparable with norm(A) in magnitude, and BETA always
+*          less than and usually comparable with norm(B).
+*
+*  VSL     (output) REAL array, dimension (LDVSL,N)
+*          If JOBVSL = 'V', VSL will contain the left Schur vectors.
+*          Not referenced if JOBVSL = 'N'.
+*
+*  LDVSL   (input) INTEGER
+*          The leading dimension of the matrix VSL. LDVSL >=1, and
+*          if JOBVSL = 'V', LDVSL >= N.
+*
+*  VSR     (output) REAL array, dimension (LDVSR,N)
+*          If JOBVSR = 'V', VSR will contain the right Schur vectors.
+*          Not referenced if JOBVSR = 'N'.
+*
+*  LDVSR   (input) INTEGER
+*          The leading dimension of the matrix VSR. LDVSR >= 1, and
+*          if JOBVSR = 'V', LDVSR >= N.
+*
+*  RCONDE  (output) REAL array, dimension ( 2 )
+*          If SENSE = 'E' or 'B', RCONDE(1) and RCONDE(2) contain the
+*          reciprocal condition numbers for the average of the selected
+*          eigenvalues.
+*          Not referenced if SENSE = 'N' or 'V'.
+*
+*  RCONDV  (output) REAL array, dimension ( 2 )
+*          If SENSE = 'V' or 'B', RCONDV(1) and RCONDV(2) contain the
+*          reciprocal condition numbers for the selected deflating
+*          subspaces.
+*          Not referenced if SENSE = 'N' or 'E'.
+*
+*  WORK    (workspace/output) REAL array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.  LWORK >= 8*(N+1)+16.
+*          If SENSE = 'E', 'V', or 'B',
+*          LWORK >= MAX( 8*(N+1)+16, 2*SDIM*(N-SDIM) ).
+*
+*  IWORK   (workspace) INTEGER array, dimension (LIWORK)
+*          Not referenced if SENSE = 'N'.
+*
+*  LIWORK  (input) INTEGER
+*          The dimension of the array WORK.  LIWORK >= N+6.
+*
+*  BWORK   (workspace) LOGICAL array, dimension (N)
+*          Not referenced if SORT = 'N'.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          = 1,...,N:
+*                The QZ iteration failed.  (A,B) are not in Schur
+*                form, but ALPHAR(j), ALPHAI(j), and BETA(j) should
+*                be correct for j=INFO+1,...,N.
+*          > N:  =N+1: other than QZ iteration failed in SHGEQZ
+*                =N+2: after reordering, roundoff changed values of
+*                      some complex eigenvalues so that leading
+*                      eigenvalues in the Generalized Schur form no
+*                      longer satisfy SELCTG=.TRUE.  This could also
+*                      be caused due to scaling.
+*                =N+3: reordering failed in STGSEN.
+*
+*  Further details
+*  ===============
+*
+*  An approximate (asymptotic) bound on the average absolute error of
+*  the selected eigenvalues is
+*
+*       EPS * norm((A, B)) / RCONDE( 1 ).
+*
+*  An approximate (asymptotic) bound on the maximum angular error in
+*  the computed deflating subspaces is
+*
+*       EPS * norm((A, B)) / RCONDV( 2 ).
+*
+*  See LAPACK User's Guide, section 4.11 for more information.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            CURSL, ILASCL, ILBSCL, ILVSL, ILVSR, LASTSL,
+     $                   LST2SL, WANTSB, WANTSE, WANTSN, WANTST, WANTSV
+      INTEGER            I, ICOLS, IERR, IHI, IJOB, IJOBVL, IJOBVR,
+     $                   ILEFT, ILO, IP, IRIGHT, IROWS, ITAU, IWRK,
+     $                   LIWMIN, MAXWRK, MINWRK
+      REAL               ANRM, ANRMTO, BIGNUM, BNRM, BNRMTO, EPS, PL,
+     $                   PR, SAFMAX, SAFMIN, SMLNUM
+*     ..
+*     .. Local Arrays ..
+      REAL               DIF( 2 )
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SGEQRF, SGGBAK, SGGBAL, SGGHRD, SHGEQZ, SLABAD,
+     $                   SLACPY, SLASCL, SLASET, SORGQR, SORMQR, STGSEN,
+     $                   XERBLA
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      REAL               SLAMCH, SLANGE
+      EXTERNAL           LSAME, ILAENV, SLAMCH, SLANGE
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode the input arguments
+*
+      IF( LSAME( JOBVSL, 'N' ) ) THEN
+         IJOBVL = 1
+         ILVSL = .FALSE.
+      ELSE IF( LSAME( JOBVSL, 'V' ) ) THEN
+         IJOBVL = 2
+         ILVSL = .TRUE.
+      ELSE
+         IJOBVL = -1
+         ILVSL = .FALSE.
+      END IF
+*
+      IF( LSAME( JOBVSR, 'N' ) ) THEN
+         IJOBVR = 1
+         ILVSR = .FALSE.
+      ELSE IF( LSAME( JOBVSR, 'V' ) ) THEN
+         IJOBVR = 2
+         ILVSR = .TRUE.
+      ELSE
+         IJOBVR = -1
+         ILVSR = .FALSE.
+      END IF
+*
+      WANTST = LSAME( SORT, 'S' )
+      WANTSN = LSAME( SENSE, 'N' )
+      WANTSE = LSAME( SENSE, 'E' )
+      WANTSV = LSAME( SENSE, 'V' )
+      WANTSB = LSAME( SENSE, 'B' )
+      IF( WANTSN ) THEN
+         IJOB = 0
+         IWORK( 1 ) = 1
+      ELSE IF( WANTSE ) THEN
+         IJOB = 1
+      ELSE IF( WANTSV ) THEN
+         IJOB = 2
+      ELSE IF( WANTSB ) THEN
+         IJOB = 4
+      END IF
+*
+*     Test the input arguments
+*
+      INFO = 0
+      IF( IJOBVL.LE.0 ) THEN
+         INFO = -1
+      ELSE IF( IJOBVR.LE.0 ) THEN
+         INFO = -2
+      ELSE IF( ( .NOT.WANTST ) .AND. ( .NOT.LSAME( SORT, 'N' ) ) ) THEN
+         INFO = -3
+      ELSE IF( .NOT.( WANTSN .OR. WANTSE .OR. WANTSV .OR. WANTSB ) .OR.
+     $         ( .NOT.WANTST .AND. .NOT.WANTSN ) ) THEN
+         INFO = -5
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -6
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -8
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -10
+      ELSE IF( LDVSL.LT.1 .OR. ( ILVSL .AND. LDVSL.LT.N ) ) THEN
+         INFO = -16
+      ELSE IF( LDVSR.LT.1 .OR. ( ILVSR .AND. LDVSR.LT.N ) ) THEN
+         INFO = -18
+      END IF
+*
+*     Compute workspace
+*      (Note: Comments in the code beginning "Workspace:" describe the
+*       minimal amount of workspace needed at that point in the code,
+*       as well as the preferred amount for good performance.
+*       NB refers to the optimal block size for the immediately
+*       following subroutine, as returned by ILAENV.)
+*
+      MINWRK = 1
+      IF( INFO.EQ.0 .AND. LWORK.GE.1 ) THEN
+         MINWRK = 8*( N+1 ) + 16
+         MAXWRK = 7*( N+1 ) + N*ILAENV( 1, 'SGEQRF', ' ', N, 1, N, 0 ) +
+     $            16
+         IF( ILVSL ) THEN
+            MAXWRK = MAX( MAXWRK, 8*( N+1 )+N*
+     $               ILAENV( 1, 'SORGQR', ' ', N, 1, N, -1 )+16 )
+         END IF
+         WORK( 1 ) = MAXWRK
+      END IF
+      IF( .NOT.WANTSN ) THEN
+         LIWMIN = 1
+      ELSE
+         LIWMIN = N + 6
+      END IF
+      IWORK( 1 ) = LIWMIN
+*
+      IF( INFO.EQ.0 .AND. LWORK.LT.MINWRK ) THEN
+         INFO = -22
+      ELSE IF( INFO.EQ.0 .AND. IJOB.GE.1 ) THEN
+         IF( LIWORK.LT.LIWMIN )
+     $      INFO = -24
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SGGESX', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 ) THEN
+         SDIM = 0
+         RETURN
+      END IF
+*
+*     Get machine constants
+*
+      EPS = SLAMCH( 'P' )
+      SAFMIN = SLAMCH( 'S' )
+      SAFMAX = ONE / SAFMIN
+      CALL SLABAD( SAFMIN, SAFMAX )
+      SMLNUM = SQRT( SAFMIN ) / EPS
+      BIGNUM = ONE / SMLNUM
+*
+*     Scale A if max element outside range [SMLNUM,BIGNUM]
+*
+      ANRM = SLANGE( 'M', N, N, A, LDA, WORK )
+      ILASCL = .FALSE.
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
+         ANRMTO = SMLNUM
+         ILASCL = .TRUE.
+      ELSE IF( ANRM.GT.BIGNUM ) THEN
+         ANRMTO = BIGNUM
+         ILASCL = .TRUE.
+      END IF
+      IF( ILASCL )
+     $   CALL SLASCL( 'G', 0, 0, ANRM, ANRMTO, N, N, A, LDA, IERR )
+*
+*     Scale B if max element outside range [SMLNUM,BIGNUM]
+*
+      BNRM = SLANGE( 'M', N, N, B, LDB, WORK )
+      ILBSCL = .FALSE.
+      IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN
+         BNRMTO = SMLNUM
+         ILBSCL = .TRUE.
+      ELSE IF( BNRM.GT.BIGNUM ) THEN
+         BNRMTO = BIGNUM
+         ILBSCL = .TRUE.
+      END IF
+      IF( ILBSCL )
+     $   CALL SLASCL( 'G', 0, 0, BNRM, BNRMTO, N, N, B, LDB, IERR )
+*
+*     Permute the matrix to make it more nearly triangular
+*     (Workspace: need 6*N + 2*N for permutation parameters)
+*
+      ILEFT = 1
+      IRIGHT = N + 1
+      IWRK = IRIGHT + N
+      CALL SGGBAL( 'P', N, A, LDA, B, LDB, ILO, IHI, WORK( ILEFT ),
+     $             WORK( IRIGHT ), WORK( IWRK ), IERR )
+*
+*     Reduce B to triangular form (QR decomposition of B)
+*     (Workspace: need N, prefer N*NB)
+*
+      IROWS = IHI + 1 - ILO
+      ICOLS = N + 1 - ILO
+      ITAU = IWRK
+      IWRK = ITAU + IROWS
+      CALL SGEQRF( IROWS, ICOLS, B( ILO, ILO ), LDB, WORK( ITAU ),
+     $             WORK( IWRK ), LWORK+1-IWRK, IERR )
+*
+*     Apply the orthogonal transformation to matrix A
+*     (Workspace: need N, prefer N*NB)
+*
+      CALL SORMQR( 'L', 'T', IROWS, ICOLS, IROWS, B( ILO, ILO ), LDB,
+     $             WORK( ITAU ), A( ILO, ILO ), LDA, WORK( IWRK ),
+     $             LWORK+1-IWRK, IERR )
+*
+*     Initialize VSL
+*     (Workspace: need N, prefer N*NB)
+*
+      IF( ILVSL ) THEN
+         CALL SLASET( 'Full', N, N, ZERO, ONE, VSL, LDVSL )
+         CALL SLACPY( 'L', IROWS-1, IROWS-1, B( ILO+1, ILO ), LDB,
+     $                VSL( ILO+1, ILO ), LDVSL )
+         CALL SORGQR( IROWS, IROWS, IROWS, VSL( ILO, ILO ), LDVSL,
+     $                WORK( ITAU ), WORK( IWRK ), LWORK+1-IWRK, IERR )
+      END IF
+*
+*     Initialize VSR
+*
+      IF( ILVSR )
+     $   CALL SLASET( 'Full', N, N, ZERO, ONE, VSR, LDVSR )
+*
+*     Reduce to generalized Hessenberg form
+*     (Workspace: none needed)
+*
+      CALL SGGHRD( JOBVSL, JOBVSR, N, ILO, IHI, A, LDA, B, LDB, VSL,
+     $             LDVSL, VSR, LDVSR, IERR )
+*
+      SDIM = 0
+*
+*     Perform QZ algorithm, computing Schur vectors if desired
+*     (Workspace: need N)
+*
+      IWRK = ITAU
+      CALL SHGEQZ( 'S', JOBVSL, JOBVSR, N, ILO, IHI, A, LDA, B, LDB,
+     $             ALPHAR, ALPHAI, BETA, VSL, LDVSL, VSR, LDVSR,
+     $             WORK( IWRK ), LWORK+1-IWRK, IERR )
+      IF( IERR.NE.0 ) THEN
+         IF( IERR.GT.0 .AND. IERR.LE.N ) THEN
+            INFO = IERR
+         ELSE IF( IERR.GT.N .AND. IERR.LE.2*N ) THEN
+            INFO = IERR - N
+         ELSE
+            INFO = N + 1
+         END IF
+         GO TO 50
+      END IF
+*
+*     Sort eigenvalues ALPHA/BETA and compute the reciprocal of
+*     condition number(s)
+*     (Workspace: If IJOB >= 1, need MAX( 8*(N+1), 2*SDIM*(N-SDIM) )
+*                 otherwise, need 8*(N+1) )
+*
+      IF( WANTST ) THEN
+*
+*        Undo scaling on eigenvalues before SELCTGing
+*
+         IF( ILASCL ) THEN
+            CALL SLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHAR, N,
+     $                   IERR )
+            CALL SLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHAI, N,
+     $                   IERR )
+         END IF
+         IF( ILBSCL )
+     $      CALL SLASCL( 'G', 0, 0, BNRMTO, BNRM, N, 1, BETA, N, IERR )
+*
+*        Select eigenvalues
+*
+         DO 10 I = 1, N
+            BWORK( I ) = SELCTG( ALPHAR( I ), ALPHAI( I ), BETA( I ) )
+   10    CONTINUE
+*
+*        Reorder eigenvalues, transform Generalized Schur vectors, and
+*        compute reciprocal condition numbers
+*
+         CALL STGSEN( IJOB, ILVSL, ILVSR, BWORK, N, A, LDA, B, LDB,
+     $                ALPHAR, ALPHAI, BETA, VSL, LDVSL, VSR, LDVSR,
+     $                SDIM, PL, PR, DIF, WORK( IWRK ), LWORK-IWRK+1,
+     $                IWORK, LIWORK, IERR )
+*
+         IF( IJOB.GE.1 )
+     $      MAXWRK = MAX( MAXWRK, 2*SDIM*( N-SDIM ) )
+         IF( IERR.EQ.-22 ) THEN
+*
+*            not enough real workspace
+*
+            INFO = -22
+         ELSE
+            RCONDE( 1 ) = PL
+            RCONDE( 2 ) = PR
+            RCONDV( 1 ) = DIF( 1 )
+            RCONDV( 2 ) = DIF( 2 )
+            IF( IERR.EQ.1 )
+     $         INFO = N + 3
+         END IF
+*
+      END IF
+*
+*     Apply permutation to VSL and VSR
+*     (Workspace: none needed)
+*
+      IF( ILVSL )
+     $   CALL SGGBAK( 'P', 'L', N, ILO, IHI, WORK( ILEFT ),
+     $                WORK( IRIGHT ), N, VSL, LDVSL, IERR )
+*
+      IF( ILVSR )
+     $   CALL SGGBAK( 'P', 'R', N, ILO, IHI, WORK( ILEFT ),
+     $                WORK( IRIGHT ), N, VSR, LDVSR, IERR )
+*
+*     Check if unscaling would cause over/underflow, if so, rescale
+*     (ALPHAR(I),ALPHAI(I),BETA(I)) so BETA(I) is on the order of
+*     B(I,I) and ALPHAR(I) and ALPHAI(I) are on the order of A(I,I)
+*
+      IF( ILASCL ) THEN  
+         DO 20 I = 1, N  
+            IF( ALPHAI( I ).NE.ZERO ) THEN
+               IF( ( ALPHAR( I ) / SAFMAX ).GT.( ANRMTO / ANRM ) .OR.
+     $             ( SAFMIN / ALPHAR( I ) ).GT.( ANRM / ANRMTO ) )        
+     $            THEN
+                  WORK( 1 ) = ABS( A( I, I ) / ALPHAR( I ) )
+                  BETA( I ) = BETA( I )*WORK( 1 )
+                  ALPHAR( I ) = ALPHAR( I )*WORK( 1 )
+                  ALPHAI( I ) = ALPHAI( I )*WORK( 1 )
+               ELSE IF( ( ALPHAI( I ) / SAFMAX ).GT.( ANRMTO / ANRM ) 
+     $            .OR. ( SAFMIN / ALPHAI( I ) ).GT.( ANRM / ANRMTO ) )
+     $            THEN
+                  WORK( 1 ) = ABS( A( I, I+1 ) / ALPHAI( I ) )
+                  BETA( I ) = BETA( I )*WORK( 1 )
+                  ALPHAR( I ) = ALPHAR( I )*WORK( 1 )
+                  ALPHAI( I ) = ALPHAI( I )*WORK( 1 )
+               END IF
+            END IF
+   20    CONTINUE
+      END IF 
+*
+      IF( ILBSCL ) THEN 
+         DO 25 I = 1, N
+            IF( ALPHAI( I ).NE.ZERO ) THEN
+               IF( ( BETA( I ) / SAFMAX ).GT.( BNRMTO / BNRM ) .OR.
+     $             ( SAFMIN / BETA( I ) ).GT.( BNRM / BNRMTO ) ) THEN
+                  WORK( 1 ) = ABS( B( I, I ) / BETA( I ) )
+                  BETA( I ) = BETA( I )*WORK( 1 )
+                  ALPHAR( I ) = ALPHAR( I )*WORK( 1 )
+                  ALPHAI( I ) = ALPHAI( I )*WORK( 1 )
+               END IF 
+            END IF 
+   25    CONTINUE
+      END IF 
+*
+*     Undo scaling
+*
+      IF( ILASCL ) THEN
+         CALL SLASCL( 'H', 0, 0, ANRMTO, ANRM, N, N, A, LDA, IERR )
+         CALL SLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHAR, N, IERR )
+         CALL SLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHAI, N, IERR )
+      END IF
+*
+      IF( ILBSCL ) THEN
+         CALL SLASCL( 'U', 0, 0, BNRMTO, BNRM, N, N, B, LDB, IERR )
+         CALL SLASCL( 'G', 0, 0, BNRMTO, BNRM, N, 1, BETA, N, IERR )
+      END IF
+*
+   30 CONTINUE
+*
+      IF( WANTST ) THEN
+*
+*        Check if reordering is correct
+*
+         LASTSL = .TRUE.
+         LST2SL = .TRUE.
+         SDIM = 0
+         IP = 0
+         DO 40 I = 1, N
+            CURSL = SELCTG( ALPHAR( I ), ALPHAI( I ), BETA( I ) )
+            IF( ALPHAI( I ).EQ.ZERO ) THEN
+               IF( CURSL )
+     $            SDIM = SDIM + 1
+               IP = 0
+               IF( CURSL .AND. .NOT.LASTSL )
+     $            INFO = N + 2
+            ELSE
+               IF( IP.EQ.1 ) THEN
+*
+*                 Last eigenvalue of conjugate pair
+*
+                  CURSL = CURSL .OR. LASTSL
+                  LASTSL = CURSL
+                  IF( CURSL )
+     $               SDIM = SDIM + 2
+                  IP = -1
+                  IF( CURSL .AND. .NOT.LST2SL )
+     $               INFO = N + 2
+               ELSE
+*
+*                 First eigenvalue of conjugate pair
+*
+                  IP = 1
+               END IF
+            END IF
+            LST2SL = LASTSL
+            LASTSL = CURSL
+   40    CONTINUE
+*
+      END IF
+*
+   50 CONTINUE
+*
+      WORK( 1 ) = MAXWRK
+      IWORK( 1 ) = LIWMIN
+*
+      RETURN
+*
+*     End of SGGESX
+*
+      END
diff --git a/costa/native/external/lapack/sggev.f b/costa/native/external/lapack/sggev.f
new file mode 100644
index 000000000..3e75fc8a7
--- /dev/null
+++ b/costa/native/external/lapack/sggev.f
@@ -0,0 +1,482 @@
+      SUBROUTINE SGGEV( JOBVL, JOBVR, N, A, LDA, B, LDB, ALPHAR, ALPHAI,
+     $                  BETA, VL, LDVL, VR, LDVR, WORK, LWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBVL, JOBVR
+      INTEGER            INFO, LDA, LDB, LDVL, LDVR, LWORK, N
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), ALPHAI( * ), ALPHAR( * ),
+     $                   B( LDB, * ), BETA( * ), VL( LDVL, * ),
+     $                   VR( LDVR, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SGGEV computes for a pair of N-by-N real nonsymmetric matrices (A,B)
+*  the generalized eigenvalues, and optionally, the left and/or right
+*  generalized eigenvectors.
+*
+*  A generalized eigenvalue for a pair of matrices (A,B) is a scalar
+*  lambda or a ratio alpha/beta = lambda, such that A - lambda*B is
+*  singular. It is usually represented as the pair (alpha,beta), as
+*  there is a reasonable interpretation for beta=0, and even for both
+*  being zero.
+*
+*  The right eigenvector v(j) corresponding to the eigenvalue lambda(j)
+*  of (A,B) satisfies
+*
+*                   A * v(j) = lambda(j) * B * v(j).
+*
+*  The left eigenvector u(j) corresponding to the eigenvalue lambda(j)
+*  of (A,B) satisfies
+*
+*                   u(j)**H * A  = lambda(j) * u(j)**H * B .
+*
+*  where u(j)**H is the conjugate-transpose of u(j).
+*
+*
+*  Arguments
+*  =========
+*
+*  JOBVL   (input) CHARACTER*1
+*          = 'N':  do not compute the left generalized eigenvectors;
+*          = 'V':  compute the left generalized eigenvectors.
+*
+*  JOBVR   (input) CHARACTER*1
+*          = 'N':  do not compute the right generalized eigenvectors;
+*          = 'V':  compute the right generalized eigenvectors.
+*
+*  N       (input) INTEGER
+*          The order of the matrices A, B, VL, and VR.  N >= 0.
+*
+*  A       (input/output) REAL array, dimension (LDA, N)
+*          On entry, the matrix A in the pair (A,B).
+*          On exit, A has been overwritten.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of A.  LDA >= max(1,N).
+*
+*  B       (input/output) REAL array, dimension (LDB, N)
+*          On entry, the matrix B in the pair (A,B).
+*          On exit, B has been overwritten.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of B.  LDB >= max(1,N).
+*
+*  ALPHAR  (output) REAL array, dimension (N)
+*  ALPHAI  (output) REAL array, dimension (N)
+*  BETA    (output) REAL array, dimension (N)
+*          On exit, (ALPHAR(j) + ALPHAI(j)*i)/BETA(j), j=1,...,N, will
+*          be the generalized eigenvalues.  If ALPHAI(j) is zero, then
+*          the j-th eigenvalue is real; if positive, then the j-th and
+*          (j+1)-st eigenvalues are a complex conjugate pair, with
+*          ALPHAI(j+1) negative.
+*
+*          Note: the quotients ALPHAR(j)/BETA(j) and ALPHAI(j)/BETA(j)
+*          may easily over- or underflow, and BETA(j) may even be zero.
+*          Thus, the user should avoid naively computing the ratio
+*          alpha/beta.  However, ALPHAR and ALPHAI will be always less
+*          than and usually comparable with norm(A) in magnitude, and
+*          BETA always less than and usually comparable with norm(B).
+*
+*  VL      (output) REAL array, dimension (LDVL,N)
+*          If JOBVL = 'V', the left eigenvectors u(j) are stored one
+*          after another in the columns of VL, in the same order as
+*          their eigenvalues. If the j-th eigenvalue is real, then
+*          u(j) = VL(:,j), the j-th column of VL. If the j-th and
+*          (j+1)-th eigenvalues form a complex conjugate pair, then
+*          u(j) = VL(:,j)+i*VL(:,j+1) and u(j+1) = VL(:,j)-i*VL(:,j+1).
+*          Each eigenvector will be scaled so the largest component have
+*          abs(real part)+abs(imag. part)=1.
+*          Not referenced if JOBVL = 'N'.
+*
+*  LDVL    (input) INTEGER
+*          The leading dimension of the matrix VL. LDVL >= 1, and
+*          if JOBVL = 'V', LDVL >= N.
+*
+*  VR      (output) REAL array, dimension (LDVR,N)
+*          If JOBVR = 'V', the right eigenvectors v(j) are stored one
+*          after another in the columns of VR, in the same order as
+*          their eigenvalues. If the j-th eigenvalue is real, then
+*          v(j) = VR(:,j), the j-th column of VR. If the j-th and
+*          (j+1)-th eigenvalues form a complex conjugate pair, then
+*          v(j) = VR(:,j)+i*VR(:,j+1) and v(j+1) = VR(:,j)-i*VR(:,j+1).
+*          Each eigenvector will be scaled so the largest component have
+*          abs(real part)+abs(imag. part)=1.
+*          Not referenced if JOBVR = 'N'.
+*
+*  LDVR    (input) INTEGER
+*          The leading dimension of the matrix VR. LDVR >= 1, and
+*          if JOBVR = 'V', LDVR >= N.
+*
+*  WORK    (workspace/output) REAL array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.  LWORK >= max(1,8*N).
+*          For good performance, LWORK must generally be larger.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          = 1,...,N:
+*                The QZ iteration failed.  No eigenvectors have been
+*                calculated, but ALPHAR(j), ALPHAI(j), and BETA(j)
+*                should be correct for j=INFO+1,...,N.
+*          > N:  =N+1: other than QZ iteration failed in SHGEQZ.
+*                =N+2: error return from STGEVC.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            ILASCL, ILBSCL, ILV, ILVL, ILVR, LQUERY
+      CHARACTER          CHTEMP
+      INTEGER            ICOLS, IERR, IHI, IJOBVL, IJOBVR, ILEFT, ILO,
+     $                   IN, IRIGHT, IROWS, ITAU, IWRK, JC, JR, MAXWRK,
+     $                   MINWRK
+      REAL               ANRM, ANRMTO, BIGNUM, BNRM, BNRMTO, EPS,
+     $                   SMLNUM, TEMP
+*     ..
+*     .. Local Arrays ..
+      LOGICAL            LDUMMA( 1 )
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SGEQRF, SGGBAK, SGGBAL, SGGHRD, SHGEQZ, SLABAD,
+     $                   SLACPY, SLASCL, SLASET, SORGQR, SORMQR, STGEVC,
+     $                   XERBLA
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      REAL               SLAMCH, SLANGE
+      EXTERNAL           LSAME, ILAENV, SLAMCH, SLANGE
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode the input arguments
+*
+      IF( LSAME( JOBVL, 'N' ) ) THEN
+         IJOBVL = 1
+         ILVL = .FALSE.
+      ELSE IF( LSAME( JOBVL, 'V' ) ) THEN
+         IJOBVL = 2
+         ILVL = .TRUE.
+      ELSE
+         IJOBVL = -1
+         ILVL = .FALSE.
+      END IF
+*
+      IF( LSAME( JOBVR, 'N' ) ) THEN
+         IJOBVR = 1
+         ILVR = .FALSE.
+      ELSE IF( LSAME( JOBVR, 'V' ) ) THEN
+         IJOBVR = 2
+         ILVR = .TRUE.
+      ELSE
+         IJOBVR = -1
+         ILVR = .FALSE.
+      END IF
+      ILV = ILVL .OR. ILVR
+*
+*     Test the input arguments
+*
+      INFO = 0
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( IJOBVL.LE.0 ) THEN
+         INFO = -1
+      ELSE IF( IJOBVR.LE.0 ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      ELSE IF( LDVL.LT.1 .OR. ( ILVL .AND. LDVL.LT.N ) ) THEN
+         INFO = -12
+      ELSE IF( LDVR.LT.1 .OR. ( ILVR .AND. LDVR.LT.N ) ) THEN
+         INFO = -14
+      END IF
+*
+*     Compute workspace
+*      (Note: Comments in the code beginning "Workspace:" describe the
+*       minimal amount of workspace needed at that point in the code,
+*       as well as the preferred amount for good performance.
+*       NB refers to the optimal block size for the immediately
+*       following subroutine, as returned by ILAENV. The workspace is
+*       computed assuming ILO = 1 and IHI = N, the worst case.)
+*
+      MINWRK = 1
+      IF( INFO.EQ.0 .AND. ( LWORK.GE.1 .OR. LQUERY ) ) THEN
+         MAXWRK = 7*N + N*ILAENV( 1, 'SGEQRF', ' ', N, 1, N, 0 )
+         MINWRK = MAX( 1, 8*N )
+         WORK( 1 ) = MAXWRK
+      END IF
+*
+      IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY )
+     $   INFO = -16
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SGGEV ', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Get machine constants
+*
+      EPS = SLAMCH( 'P' )
+      SMLNUM = SLAMCH( 'S' )
+      BIGNUM = ONE / SMLNUM
+      CALL SLABAD( SMLNUM, BIGNUM )
+      SMLNUM = SQRT( SMLNUM ) / EPS
+      BIGNUM = ONE / SMLNUM
+*
+*     Scale A if max element outside range [SMLNUM,BIGNUM]
+*
+      ANRM = SLANGE( 'M', N, N, A, LDA, WORK )
+      ILASCL = .FALSE.
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
+         ANRMTO = SMLNUM
+         ILASCL = .TRUE.
+      ELSE IF( ANRM.GT.BIGNUM ) THEN
+         ANRMTO = BIGNUM
+         ILASCL = .TRUE.
+      END IF
+      IF( ILASCL )
+     $   CALL SLASCL( 'G', 0, 0, ANRM, ANRMTO, N, N, A, LDA, IERR )
+*
+*     Scale B if max element outside range [SMLNUM,BIGNUM]
+*
+      BNRM = SLANGE( 'M', N, N, B, LDB, WORK )
+      ILBSCL = .FALSE.
+      IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN
+         BNRMTO = SMLNUM
+         ILBSCL = .TRUE.
+      ELSE IF( BNRM.GT.BIGNUM ) THEN
+         BNRMTO = BIGNUM
+         ILBSCL = .TRUE.
+      END IF
+      IF( ILBSCL )
+     $   CALL SLASCL( 'G', 0, 0, BNRM, BNRMTO, N, N, B, LDB, IERR )
+*
+*     Permute the matrices A, B to isolate eigenvalues if possible
+*     (Workspace: need 6*N)
+*
+      ILEFT = 1
+      IRIGHT = N + 1
+      IWRK = IRIGHT + N
+      CALL SGGBAL( 'P', N, A, LDA, B, LDB, ILO, IHI, WORK( ILEFT ),
+     $             WORK( IRIGHT ), WORK( IWRK ), IERR )
+*
+*     Reduce B to triangular form (QR decomposition of B)
+*     (Workspace: need N, prefer N*NB)
+*
+      IROWS = IHI + 1 - ILO
+      IF( ILV ) THEN
+         ICOLS = N + 1 - ILO
+      ELSE
+         ICOLS = IROWS
+      END IF
+      ITAU = IWRK
+      IWRK = ITAU + IROWS
+      CALL SGEQRF( IROWS, ICOLS, B( ILO, ILO ), LDB, WORK( ITAU ),
+     $             WORK( IWRK ), LWORK+1-IWRK, IERR )
+*
+*     Apply the orthogonal transformation to matrix A
+*     (Workspace: need N, prefer N*NB)
+*
+      CALL SORMQR( 'L', 'T', IROWS, ICOLS, IROWS, B( ILO, ILO ), LDB,
+     $             WORK( ITAU ), A( ILO, ILO ), LDA, WORK( IWRK ),
+     $             LWORK+1-IWRK, IERR )
+*
+*     Initialize VL
+*     (Workspace: need N, prefer N*NB)
+*
+      IF( ILVL ) THEN
+         CALL SLASET( 'Full', N, N, ZERO, ONE, VL, LDVL )
+         CALL SLACPY( 'L', IROWS-1, IROWS-1, B( ILO+1, ILO ), LDB,
+     $                VL( ILO+1, ILO ), LDVL )
+         CALL SORGQR( IROWS, IROWS, IROWS, VL( ILO, ILO ), LDVL,
+     $                WORK( ITAU ), WORK( IWRK ), LWORK+1-IWRK, IERR )
+      END IF
+*
+*     Initialize VR
+*
+      IF( ILVR )
+     $   CALL SLASET( 'Full', N, N, ZERO, ONE, VR, LDVR )
+*
+*     Reduce to generalized Hessenberg form
+*     (Workspace: none needed)
+*
+      IF( ILV ) THEN
+*
+*        Eigenvectors requested -- work on whole matrix.
+*
+         CALL SGGHRD( JOBVL, JOBVR, N, ILO, IHI, A, LDA, B, LDB, VL,
+     $                LDVL, VR, LDVR, IERR )
+      ELSE
+         CALL SGGHRD( 'N', 'N', IROWS, 1, IROWS, A( ILO, ILO ), LDA,
+     $                B( ILO, ILO ), LDB, VL, LDVL, VR, LDVR, IERR )
+      END IF
+*
+*     Perform QZ algorithm (Compute eigenvalues, and optionally, the
+*     Schur forms and Schur vectors)
+*     (Workspace: need N)
+*
+      IWRK = ITAU
+      IF( ILV ) THEN
+         CHTEMP = 'S'
+      ELSE
+         CHTEMP = 'E'
+      END IF
+      CALL SHGEQZ( CHTEMP, JOBVL, JOBVR, N, ILO, IHI, A, LDA, B, LDB,
+     $             ALPHAR, ALPHAI, BETA, VL, LDVL, VR, LDVR,
+     $             WORK( IWRK ), LWORK+1-IWRK, IERR )
+      IF( IERR.NE.0 ) THEN
+         IF( IERR.GT.0 .AND. IERR.LE.N ) THEN
+            INFO = IERR
+         ELSE IF( IERR.GT.N .AND. IERR.LE.2*N ) THEN
+            INFO = IERR - N
+         ELSE
+            INFO = N + 1
+         END IF
+         GO TO 110
+      END IF
+*
+*     Compute Eigenvectors
+*     (Workspace: need 6*N)
+*
+      IF( ILV ) THEN
+         IF( ILVL ) THEN
+            IF( ILVR ) THEN
+               CHTEMP = 'B'
+            ELSE
+               CHTEMP = 'L'
+            END IF
+         ELSE
+            CHTEMP = 'R'
+         END IF
+         CALL STGEVC( CHTEMP, 'B', LDUMMA, N, A, LDA, B, LDB, VL, LDVL,
+     $                VR, LDVR, N, IN, WORK( IWRK ), IERR )
+         IF( IERR.NE.0 ) THEN
+            INFO = N + 2
+            GO TO 110
+         END IF
+*
+*        Undo balancing on VL and VR and normalization
+*        (Workspace: none needed)
+*
+         IF( ILVL ) THEN
+            CALL SGGBAK( 'P', 'L', N, ILO, IHI, WORK( ILEFT ),
+     $                   WORK( IRIGHT ), N, VL, LDVL, IERR )
+            DO 50 JC = 1, N
+               IF( ALPHAI( JC ).LT.ZERO )
+     $            GO TO 50
+               TEMP = ZERO
+               IF( ALPHAI( JC ).EQ.ZERO ) THEN
+                  DO 10 JR = 1, N
+                     TEMP = MAX( TEMP, ABS( VL( JR, JC ) ) )
+   10             CONTINUE
+               ELSE
+                  DO 20 JR = 1, N
+                     TEMP = MAX( TEMP, ABS( VL( JR, JC ) )+
+     $                      ABS( VL( JR, JC+1 ) ) )
+   20             CONTINUE
+               END IF
+               IF( TEMP.LT.SMLNUM )
+     $            GO TO 50
+               TEMP = ONE / TEMP
+               IF( ALPHAI( JC ).EQ.ZERO ) THEN
+                  DO 30 JR = 1, N
+                     VL( JR, JC ) = VL( JR, JC )*TEMP
+   30             CONTINUE
+               ELSE
+                  DO 40 JR = 1, N
+                     VL( JR, JC ) = VL( JR, JC )*TEMP
+                     VL( JR, JC+1 ) = VL( JR, JC+1 )*TEMP
+   40             CONTINUE
+               END IF
+   50       CONTINUE
+         END IF
+         IF( ILVR ) THEN
+            CALL SGGBAK( 'P', 'R', N, ILO, IHI, WORK( ILEFT ),
+     $                   WORK( IRIGHT ), N, VR, LDVR, IERR )
+            DO 100 JC = 1, N
+               IF( ALPHAI( JC ).LT.ZERO )
+     $            GO TO 100
+               TEMP = ZERO
+               IF( ALPHAI( JC ).EQ.ZERO ) THEN
+                  DO 60 JR = 1, N
+                     TEMP = MAX( TEMP, ABS( VR( JR, JC ) ) )
+   60             CONTINUE
+               ELSE
+                  DO 70 JR = 1, N
+                     TEMP = MAX( TEMP, ABS( VR( JR, JC ) )+
+     $                      ABS( VR( JR, JC+1 ) ) )
+   70             CONTINUE
+               END IF
+               IF( TEMP.LT.SMLNUM )
+     $            GO TO 100
+               TEMP = ONE / TEMP
+               IF( ALPHAI( JC ).EQ.ZERO ) THEN
+                  DO 80 JR = 1, N
+                     VR( JR, JC ) = VR( JR, JC )*TEMP
+   80             CONTINUE
+               ELSE
+                  DO 90 JR = 1, N
+                     VR( JR, JC ) = VR( JR, JC )*TEMP
+                     VR( JR, JC+1 ) = VR( JR, JC+1 )*TEMP
+   90             CONTINUE
+               END IF
+  100       CONTINUE
+         END IF
+*
+*        End of eigenvector calculation
+*
+      END IF
+*
+*     Undo scaling if necessary
+*
+      IF( ILASCL ) THEN
+         CALL SLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHAR, N, IERR )
+         CALL SLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHAI, N, IERR )
+      END IF
+*
+      IF( ILBSCL ) THEN
+         CALL SLASCL( 'G', 0, 0, BNRMTO, BNRM, N, 1, BETA, N, IERR )
+      END IF
+*
+  110 CONTINUE
+*
+      WORK( 1 ) = MAXWRK
+*
+      RETURN
+*
+*     End of SGGEV
+*
+      END
diff --git a/costa/native/external/lapack/sggevx.f b/costa/native/external/lapack/sggevx.f
new file mode 100644
index 000000000..e42999be8
--- /dev/null
+++ b/costa/native/external/lapack/sggevx.f
@@ -0,0 +1,698 @@
+      SUBROUTINE SGGEVX( BALANC, JOBVL, JOBVR, SENSE, N, A, LDA, B, LDB,
+     $                   ALPHAR, ALPHAI, BETA, VL, LDVL, VR, LDVR, ILO,
+     $                   IHI, LSCALE, RSCALE, ABNRM, BBNRM, RCONDE,
+     $                   RCONDV, WORK, LWORK, IWORK, BWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          BALANC, JOBVL, JOBVR, SENSE
+      INTEGER            IHI, ILO, INFO, LDA, LDB, LDVL, LDVR, LWORK, N
+      REAL               ABNRM, BBNRM
+*     ..
+*     .. Array Arguments ..
+      LOGICAL            BWORK( * )
+      INTEGER            IWORK( * )
+      REAL               A( LDA, * ), ALPHAI( * ), ALPHAR( * ),
+     $                   B( LDB, * ), BETA( * ), LSCALE( * ),
+     $                   RCONDE( * ), RCONDV( * ), RSCALE( * ),
+     $                   VL( LDVL, * ), VR( LDVR, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SGGEVX computes for a pair of N-by-N real nonsymmetric matrices (A,B)
+*  the generalized eigenvalues, and optionally, the left and/or right
+*  generalized eigenvectors.
+*
+*  Optionally also, it computes a balancing transformation to improve
+*  the conditioning of the eigenvalues and eigenvectors (ILO, IHI,
+*  LSCALE, RSCALE, ABNRM, and BBNRM), reciprocal condition numbers for
+*  the eigenvalues (RCONDE), and reciprocal condition numbers for the
+*  right eigenvectors (RCONDV).
+*
+*  A generalized eigenvalue for a pair of matrices (A,B) is a scalar
+*  lambda or a ratio alpha/beta = lambda, such that A - lambda*B is
+*  singular. It is usually represented as the pair (alpha,beta), as
+*  there is a reasonable interpretation for beta=0, and even for both
+*  being zero.
+*
+*  The right eigenvector v(j) corresponding to the eigenvalue lambda(j)
+*  of (A,B) satisfies
+*
+*                   A * v(j) = lambda(j) * B * v(j) .
+*
+*  The left eigenvector u(j) corresponding to the eigenvalue lambda(j)
+*  of (A,B) satisfies
+*
+*                   u(j)**H * A  = lambda(j) * u(j)**H * B.
+*
+*  where u(j)**H is the conjugate-transpose of u(j).
+*
+*
+*  Arguments
+*  =========
+*
+*  BALANC  (input) CHARACTER*1
+*          Specifies the balance option to be performed.
+*          = 'N':  do not diagonally scale or permute;
+*          = 'P':  permute only;
+*          = 'S':  scale only;
+*          = 'B':  both permute and scale.
+*          Computed reciprocal condition numbers will be for the
+*          matrices after permuting and/or balancing. Permuting does
+*          not change condition numbers (in exact arithmetic), but
+*          balancing does.
+*
+*  JOBVL   (input) CHARACTER*1
+*          = 'N':  do not compute the left generalized eigenvectors;
+*          = 'V':  compute the left generalized eigenvectors.
+*
+*  JOBVR   (input) CHARACTER*1
+*          = 'N':  do not compute the right generalized eigenvectors;
+*          = 'V':  compute the right generalized eigenvectors.
+*
+*  SENSE   (input) CHARACTER*1
+*          Determines which reciprocal condition numbers are computed.
+*          = 'N': none are computed;
+*          = 'E': computed for eigenvalues only;
+*          = 'V': computed for eigenvectors only;
+*          = 'B': computed for eigenvalues and eigenvectors.
+*
+*  N       (input) INTEGER
+*          The order of the matrices A, B, VL, and VR.  N >= 0.
+*
+*  A       (input/output) REAL array, dimension (LDA, N)
+*          On entry, the matrix A in the pair (A,B).
+*          On exit, A has been overwritten. If JOBVL='V' or JOBVR='V'
+*          or both, then A contains the first part of the real Schur
+*          form of the "balanced" versions of the input A and B.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of A.  LDA >= max(1,N).
+*
+*  B       (input/output) REAL array, dimension (LDB, N)
+*          On entry, the matrix B in the pair (A,B).
+*          On exit, B has been overwritten. If JOBVL='V' or JOBVR='V'
+*          or both, then B contains the second part of the real Schur
+*          form of the "balanced" versions of the input A and B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of B.  LDB >= max(1,N).
+*
+*  ALPHAR  (output) REAL array, dimension (N)
+*  ALPHAI  (output) REAL array, dimension (N)
+*  BETA    (output) REAL array, dimension (N)
+*          On exit, (ALPHAR(j) + ALPHAI(j)*i)/BETA(j), j=1,...,N, will
+*          be the generalized eigenvalues.  If ALPHAI(j) is zero, then
+*          the j-th eigenvalue is real; if positive, then the j-th and
+*          (j+1)-st eigenvalues are a complex conjugate pair, with
+*          ALPHAI(j+1) negative.
+*
+*          Note: the quotients ALPHAR(j)/BETA(j) and ALPHAI(j)/BETA(j)
+*          may easily over- or underflow, and BETA(j) may even be zero.
+*          Thus, the user should avoid naively computing the ratio
+*          ALPHA/BETA. However, ALPHAR and ALPHAI will be always less
+*          than and usually comparable with norm(A) in magnitude, and
+*          BETA always less than and usually comparable with norm(B).
+*
+*  VL      (output) REAL array, dimension (LDVL,N)
+*          If JOBVL = 'V', the left eigenvectors u(j) are stored one
+*          after another in the columns of VL, in the same order as
+*          their eigenvalues. If the j-th eigenvalue is real, then
+*          u(j) = VL(:,j), the j-th column of VL. If the j-th and
+*          (j+1)-th eigenvalues form a complex conjugate pair, then
+*          u(j) = VL(:,j)+i*VL(:,j+1) and u(j+1) = VL(:,j)-i*VL(:,j+1).
+*          Each eigenvector will be scaled so the largest component have
+*          abs(real part) + abs(imag. part) = 1.
+*          Not referenced if JOBVL = 'N'.
+*
+*  LDVL    (input) INTEGER
+*          The leading dimension of the matrix VL. LDVL >= 1, and
+*          if JOBVL = 'V', LDVL >= N.
+*
+*  VR      (output) REAL array, dimension (LDVR,N)
+*          If JOBVR = 'V', the right eigenvectors v(j) are stored one
+*          after another in the columns of VR, in the same order as
+*          their eigenvalues. If the j-th eigenvalue is real, then
+*          v(j) = VR(:,j), the j-th column of VR. If the j-th and
+*          (j+1)-th eigenvalues form a complex conjugate pair, then
+*          v(j) = VR(:,j)+i*VR(:,j+1) and v(j+1) = VR(:,j)-i*VR(:,j+1).
+*          Each eigenvector will be scaled so the largest component have
+*          abs(real part) + abs(imag. part) = 1.
+*          Not referenced if JOBVR = 'N'.
+*
+*  LDVR    (input) INTEGER
+*          The leading dimension of the matrix VR. LDVR >= 1, and
+*          if JOBVR = 'V', LDVR >= N.
+*
+*  ILO,IHI (output) INTEGER
+*          ILO and IHI are integer values such that on exit
+*          A(i,j) = 0 and B(i,j) = 0 if i > j and
+*          j = 1,...,ILO-1 or i = IHI+1,...,N.
+*          If BALANC = 'N' or 'S', ILO = 1 and IHI = N.
+*
+*  LSCALE  (output) REAL array, dimension (N)
+*          Details of the permutations and scaling factors applied
+*          to the left side of A and B.  If PL(j) is the index of the
+*          row interchanged with row j, and DL(j) is the scaling
+*          factor applied to row j, then
+*            LSCALE(j) = PL(j)  for j = 1,...,ILO-1
+*                      = DL(j)  for j = ILO,...,IHI
+*                      = PL(j)  for j = IHI+1,...,N.
+*          The order in which the interchanges are made is N to IHI+1,
+*          then 1 to ILO-1.
+*
+*  RSCALE  (output) REAL array, dimension (N)
+*          Details of the permutations and scaling factors applied
+*          to the right side of A and B.  If PR(j) is the index of the
+*          column interchanged with column j, and DR(j) is the scaling
+*          factor applied to column j, then
+*            RSCALE(j) = PR(j)  for j = 1,...,ILO-1
+*                      = DR(j)  for j = ILO,...,IHI
+*                      = PR(j)  for j = IHI+1,...,N
+*          The order in which the interchanges are made is N to IHI+1,
+*          then 1 to ILO-1.
+*
+*  ABNRM   (output) REAL
+*          The one-norm of the balanced matrix A.
+*
+*  BBNRM   (output) REAL
+*          The one-norm of the balanced matrix B.
+*
+*  RCONDE  (output) REAL array, dimension (N)
+*          If SENSE = 'E' or 'B', the reciprocal condition numbers of
+*          the selected eigenvalues, stored in consecutive elements of
+*          the array. For a complex conjugate pair of eigenvalues two
+*          consecutive elements of RCONDE are set to the same value.
+*          Thus RCONDE(j), RCONDV(j), and the j-th columns of VL and VR
+*          all correspond to the same eigenpair (but not in general the
+*          j-th eigenpair, unless all eigenpairs are selected).
+*          If SENSE = 'V', RCONDE is not referenced.
+*
+*  RCONDV  (output) REAL array, dimension (N)
+*          If SENSE = 'V' or 'B', the estimated reciprocal condition
+*          numbers of the selected eigenvectors, stored in consecutive
+*          elements of the array. For a complex eigenvector two
+*          consecutive elements of RCONDV are set to the same value. If
+*          the eigenvalues cannot be reordered to compute RCONDV(j),
+*          RCONDV(j) is set to 0; this can only occur when the true
+*          value would be very small anyway.
+*          If SENSE = 'E', RCONDV is not referenced.
+*
+*  WORK    (workspace/output) REAL array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK. LWORK >= max(1,6*N).
+*          If SENSE = 'E', LWORK >= 12*N.
+*          If SENSE = 'V' or 'B', LWORK >= 2*N*N+12*N+16.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  IWORK   (workspace) INTEGER array, dimension (N+6)
+*          If SENSE = 'E', IWORK is not referenced.
+*
+*  BWORK   (workspace) LOGICAL array, dimension (N)
+*          If SENSE = 'N', BWORK is not referenced.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          = 1,...,N:
+*                The QZ iteration failed.  No eigenvectors have been
+*                calculated, but ALPHAR(j), ALPHAI(j), and BETA(j)
+*                should be correct for j=INFO+1,...,N.
+*          > N:  =N+1: other than QZ iteration failed in SHGEQZ.
+*                =N+2: error return from STGEVC.
+*
+*  Further Details
+*  ===============
+*
+*  Balancing a matrix pair (A,B) includes, first, permuting rows and
+*  columns to isolate eigenvalues, second, applying diagonal similarity
+*  transformation to the rows and columns to make the rows and columns
+*  as close in norm as possible. The computed reciprocal condition
+*  numbers correspond to the balanced matrix. Permuting rows and columns
+*  will not change the condition numbers (in exact arithmetic) but
+*  diagonal scaling will.  For further explanation of balancing, see
+*  section 4.11.1.2 of LAPACK Users' Guide.
+*
+*  An approximate error bound on the chordal distance between the i-th
+*  computed generalized eigenvalue w and the corresponding exact
+*  eigenvalue lambda is
+*
+*       chord(w, lambda) <= EPS * norm(ABNRM, BBNRM) / RCONDE(I)
+*
+*  An approximate error bound for the angle between the i-th computed
+*  eigenvector VL(i) or VR(i) is given by
+*
+*       EPS * norm(ABNRM, BBNRM) / DIF(i).
+*
+*  For further explanation of the reciprocal condition numbers RCONDE
+*  and RCONDV, see section 4.11 of LAPACK User's Guide.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            ILASCL, ILBSCL, ILV, ILVL, ILVR, LQUERY, PAIR,
+     $                   WANTSB, WANTSE, WANTSN, WANTSV
+      CHARACTER          CHTEMP
+      INTEGER            I, ICOLS, IERR, IJOBVL, IJOBVR, IN, IROWS,
+     $                   ITAU, IWRK, IWRK1, J, JC, JR, M, MAXWRK,
+     $                   MINWRK, MM
+      REAL               ANRM, ANRMTO, BIGNUM, BNRM, BNRMTO, EPS,
+     $                   SMLNUM, TEMP
+*     ..
+*     .. Local Arrays ..
+      LOGICAL            LDUMMA( 1 )
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SGEQRF, SGGBAK, SGGBAL, SGGHRD, SHGEQZ, SLABAD,
+     $                   SLACPY, SLASCL, SLASET, SORGQR, SORMQR, STGEVC,
+     $                   STGSNA, XERBLA
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      REAL               SLAMCH, SLANGE
+      EXTERNAL           LSAME, ILAENV, SLAMCH, SLANGE
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode the input arguments
+*
+      IF( LSAME( JOBVL, 'N' ) ) THEN
+         IJOBVL = 1
+         ILVL = .FALSE.
+      ELSE IF( LSAME( JOBVL, 'V' ) ) THEN
+         IJOBVL = 2
+         ILVL = .TRUE.
+      ELSE
+         IJOBVL = -1
+         ILVL = .FALSE.
+      END IF
+*
+      IF( LSAME( JOBVR, 'N' ) ) THEN
+         IJOBVR = 1
+         ILVR = .FALSE.
+      ELSE IF( LSAME( JOBVR, 'V' ) ) THEN
+         IJOBVR = 2
+         ILVR = .TRUE.
+      ELSE
+         IJOBVR = -1
+         ILVR = .FALSE.
+      END IF
+      ILV = ILVL .OR. ILVR
+*
+      WANTSN = LSAME( SENSE, 'N' )
+      WANTSE = LSAME( SENSE, 'E' )
+      WANTSV = LSAME( SENSE, 'V' )
+      WANTSB = LSAME( SENSE, 'B' )
+*
+*     Test the input arguments
+*
+      INFO = 0
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( .NOT.( LSAME( BALANC, 'N' ) .OR. LSAME( BALANC,
+     $    'S' ) .OR. LSAME( BALANC, 'P' ) .OR. LSAME( BALANC, 'B' ) ) )
+     $     THEN
+         INFO = -1
+      ELSE IF( IJOBVL.LE.0 ) THEN
+         INFO = -2
+      ELSE IF( IJOBVR.LE.0 ) THEN
+         INFO = -3
+      ELSE IF( .NOT.( WANTSN .OR. WANTSE .OR. WANTSB .OR. WANTSV ) )
+     $          THEN
+         INFO = -4
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -9
+      ELSE IF( LDVL.LT.1 .OR. ( ILVL .AND. LDVL.LT.N ) ) THEN
+         INFO = -14
+      ELSE IF( LDVR.LT.1 .OR. ( ILVR .AND. LDVR.LT.N ) ) THEN
+         INFO = -16
+      END IF
+*
+*     Compute workspace
+*      (Note: Comments in the code beginning "Workspace:" describe the
+*       minimal amount of workspace needed at that point in the code,
+*       as well as the preferred amount for good performance.
+*       NB refers to the optimal block size for the immediately
+*       following subroutine, as returned by ILAENV. The workspace is
+*       computed assuming ILO = 1 and IHI = N, the worst case.)
+*
+      MINWRK = 1
+      IF( INFO.EQ.0 .AND. ( LWORK.GE.1 .OR. LQUERY ) ) THEN
+         MAXWRK = 5*N + N*ILAENV( 1, 'SGEQRF', ' ', N, 1, N, 0 )
+         MINWRK = MAX( 1, 6*N )
+         IF( WANTSE ) THEN
+            MINWRK = MAX( 1, 12*N )
+         ELSE IF( WANTSV .OR. WANTSB ) THEN
+            MINWRK = 2*N*N + 12*N + 16
+            MAXWRK = MAX( MAXWRK, 2*N*N+12*N+16 )
+         END IF
+         WORK( 1 ) = MAXWRK
+      END IF
+*
+      IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN
+         INFO = -26
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SGGEVX', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*
+*     Get machine constants
+*
+      EPS = SLAMCH( 'P' )
+      SMLNUM = SLAMCH( 'S' )
+      BIGNUM = ONE / SMLNUM
+      CALL SLABAD( SMLNUM, BIGNUM )
+      SMLNUM = SQRT( SMLNUM ) / EPS
+      BIGNUM = ONE / SMLNUM
+*
+*     Scale A if max element outside range [SMLNUM,BIGNUM]
+*
+      ANRM = SLANGE( 'M', N, N, A, LDA, WORK )
+      ILASCL = .FALSE.
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
+         ANRMTO = SMLNUM
+         ILASCL = .TRUE.
+      ELSE IF( ANRM.GT.BIGNUM ) THEN
+         ANRMTO = BIGNUM
+         ILASCL = .TRUE.
+      END IF
+      IF( ILASCL )
+     $   CALL SLASCL( 'G', 0, 0, ANRM, ANRMTO, N, N, A, LDA, IERR )
+*
+*     Scale B if max element outside range [SMLNUM,BIGNUM]
+*
+      BNRM = SLANGE( 'M', N, N, B, LDB, WORK )
+      ILBSCL = .FALSE.
+      IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN
+         BNRMTO = SMLNUM
+         ILBSCL = .TRUE.
+      ELSE IF( BNRM.GT.BIGNUM ) THEN
+         BNRMTO = BIGNUM
+         ILBSCL = .TRUE.
+      END IF
+      IF( ILBSCL )
+     $   CALL SLASCL( 'G', 0, 0, BNRM, BNRMTO, N, N, B, LDB, IERR )
+*
+*     Permute and/or balance the matrix pair (A,B)
+*     (Workspace: need 6*N)
+*
+      CALL SGGBAL( BALANC, N, A, LDA, B, LDB, ILO, IHI, LSCALE, RSCALE,
+     $             WORK, IERR )
+*
+*     Compute ABNRM and BBNRM
+*
+      ABNRM = SLANGE( '1', N, N, A, LDA, WORK( 1 ) )
+      IF( ILASCL ) THEN
+         WORK( 1 ) = ABNRM
+         CALL SLASCL( 'G', 0, 0, ANRMTO, ANRM, 1, 1, WORK( 1 ), 1,
+     $                IERR )
+         ABNRM = WORK( 1 )
+      END IF
+*
+      BBNRM = SLANGE( '1', N, N, B, LDB, WORK( 1 ) )
+      IF( ILBSCL ) THEN
+         WORK( 1 ) = BBNRM
+         CALL SLASCL( 'G', 0, 0, BNRMTO, BNRM, 1, 1, WORK( 1 ), 1,
+     $                IERR )
+         BBNRM = WORK( 1 )
+      END IF
+*
+*     Reduce B to triangular form (QR decomposition of B)
+*     (Workspace: need N, prefer N*NB )
+*
+      IROWS = IHI + 1 - ILO
+      IF( ILV .OR. .NOT.WANTSN ) THEN
+         ICOLS = N + 1 - ILO
+      ELSE
+         ICOLS = IROWS
+      END IF
+      ITAU = 1
+      IWRK = ITAU + IROWS
+      CALL SGEQRF( IROWS, ICOLS, B( ILO, ILO ), LDB, WORK( ITAU ),
+     $             WORK( IWRK ), LWORK+1-IWRK, IERR )
+*
+*     Apply the orthogonal transformation to A
+*     (Workspace: need N, prefer N*NB)
+*
+      CALL SORMQR( 'L', 'T', IROWS, ICOLS, IROWS, B( ILO, ILO ), LDB,
+     $             WORK( ITAU ), A( ILO, ILO ), LDA, WORK( IWRK ),
+     $             LWORK+1-IWRK, IERR )
+*
+*     Initialize VL and/or VR
+*     (Workspace: need N, prefer N*NB)
+*
+      IF( ILVL ) THEN
+         CALL SLASET( 'Full', N, N, ZERO, ONE, VL, LDVL )
+         CALL SLACPY( 'L', IROWS-1, IROWS-1, B( ILO+1, ILO ), LDB,
+     $                VL( ILO+1, ILO ), LDVL )
+         CALL SORGQR( IROWS, IROWS, IROWS, VL( ILO, ILO ), LDVL,
+     $                WORK( ITAU ), WORK( IWRK ), LWORK+1-IWRK, IERR )
+      END IF
+*
+      IF( ILVR )
+     $   CALL SLASET( 'Full', N, N, ZERO, ONE, VR, LDVR )
+*
+*     Reduce to generalized Hessenberg form
+*     (Workspace: none needed)
+*
+      IF( ILV .OR. .NOT.WANTSN ) THEN
+*
+*        Eigenvectors requested -- work on whole matrix.
+*
+         CALL SGGHRD( JOBVL, JOBVR, N, ILO, IHI, A, LDA, B, LDB, VL,
+     $                LDVL, VR, LDVR, IERR )
+      ELSE
+         CALL SGGHRD( 'N', 'N', IROWS, 1, IROWS, A( ILO, ILO ), LDA,
+     $                B( ILO, ILO ), LDB, VL, LDVL, VR, LDVR, IERR )
+      END IF
+*
+*     Perform QZ algorithm (Compute eigenvalues, and optionally, the
+*     Schur forms and Schur vectors)
+*     (Workspace: need N)
+*
+      IF( ILV .OR. .NOT.WANTSN ) THEN
+         CHTEMP = 'S'
+      ELSE
+         CHTEMP = 'E'
+      END IF
+*
+      CALL SHGEQZ( CHTEMP, JOBVL, JOBVR, N, ILO, IHI, A, LDA, B, LDB,
+     $             ALPHAR, ALPHAI, BETA, VL, LDVL, VR, LDVR, WORK,
+     $             LWORK, IERR )
+      IF( IERR.NE.0 ) THEN
+         IF( IERR.GT.0 .AND. IERR.LE.N ) THEN
+            INFO = IERR
+         ELSE IF( IERR.GT.N .AND. IERR.LE.2*N ) THEN
+            INFO = IERR - N
+         ELSE
+            INFO = N + 1
+         END IF
+         GO TO 130
+      END IF
+*
+*     Compute Eigenvectors and estimate condition numbers if desired
+*     (Workspace: STGEVC: need 6*N
+*                 STGSNA: need 2*N*(N+2)+16 if SENSE = 'V' or 'B',
+*                         need N otherwise )
+*
+      IF( ILV .OR. .NOT.WANTSN ) THEN
+         IF( ILV ) THEN
+            IF( ILVL ) THEN
+               IF( ILVR ) THEN
+                  CHTEMP = 'B'
+               ELSE
+                  CHTEMP = 'L'
+               END IF
+            ELSE
+               CHTEMP = 'R'
+            END IF
+*
+            CALL STGEVC( CHTEMP, 'B', LDUMMA, N, A, LDA, B, LDB, VL,
+     $                   LDVL, VR, LDVR, N, IN, WORK, IERR )
+            IF( IERR.NE.0 ) THEN
+               INFO = N + 2
+               GO TO 130
+            END IF
+         END IF
+*
+         IF( .NOT.WANTSN ) THEN
+*
+*           compute eigenvectors (STGEVC) and estimate condition
+*           numbers (STGSNA). Note that the definition of the condition
+*           number is not invariant under transformation (u,v) to
+*           (Q*u, Z*v), where (u,v) are eigenvectors of the generalized
+*           Schur form (S,T), Q and Z are orthogonal matrices. In order
+*           to avoid using extra 2*N*N workspace, we have to recalculate
+*           eigenvectors and estimate one condition numbers at a time.
+*
+            PAIR = .FALSE.
+            DO 20 I = 1, N
+*
+               IF( PAIR ) THEN
+                  PAIR = .FALSE.
+                  GO TO 20
+               END IF
+               MM = 1
+               IF( I.LT.N ) THEN
+                  IF( A( I+1, I ).NE.ZERO ) THEN
+                     PAIR = .TRUE.
+                     MM = 2
+                  END IF
+               END IF
+*
+               DO 10 J = 1, N
+                  BWORK( J ) = .FALSE.
+   10          CONTINUE
+               IF( MM.EQ.1 ) THEN
+                  BWORK( I ) = .TRUE.
+               ELSE IF( MM.EQ.2 ) THEN
+                  BWORK( I ) = .TRUE.
+                  BWORK( I+1 ) = .TRUE.
+               END IF
+*
+               IWRK = MM*N + 1
+               IWRK1 = IWRK + MM*N
+*
+*              Compute a pair of left and right eigenvectors.
+*              (compute workspace: need up to 4*N + 6*N)
+*
+               IF( WANTSE .OR. WANTSB ) THEN
+                  CALL STGEVC( 'B', 'S', BWORK, N, A, LDA, B, LDB,
+     $                         WORK( 1 ), N, WORK( IWRK ), N, MM, M,
+     $                         WORK( IWRK1 ), IERR )
+                  IF( IERR.NE.0 ) THEN
+                     INFO = N + 2
+                     GO TO 130
+                  END IF
+               END IF
+*
+               CALL STGSNA( SENSE, 'S', BWORK, N, A, LDA, B, LDB,
+     $                      WORK( 1 ), N, WORK( IWRK ), N, RCONDE( I ),
+     $                      RCONDV( I ), MM, M, WORK( IWRK1 ),
+     $                      LWORK-IWRK1+1, IWORK, IERR )
+*
+   20       CONTINUE
+         END IF
+      END IF
+*
+*     Undo balancing on VL and VR and normalization
+*     (Workspace: none needed)
+*
+      IF( ILVL ) THEN
+         CALL SGGBAK( BALANC, 'L', N, ILO, IHI, LSCALE, RSCALE, N, VL,
+     $                LDVL, IERR )
+*
+         DO 70 JC = 1, N
+            IF( ALPHAI( JC ).LT.ZERO )
+     $         GO TO 70
+            TEMP = ZERO
+            IF( ALPHAI( JC ).EQ.ZERO ) THEN
+               DO 30 JR = 1, N
+                  TEMP = MAX( TEMP, ABS( VL( JR, JC ) ) )
+   30          CONTINUE
+            ELSE
+               DO 40 JR = 1, N
+                  TEMP = MAX( TEMP, ABS( VL( JR, JC ) )+
+     $                   ABS( VL( JR, JC+1 ) ) )
+   40          CONTINUE
+            END IF
+            IF( TEMP.LT.SMLNUM )
+     $         GO TO 70
+            TEMP = ONE / TEMP
+            IF( ALPHAI( JC ).EQ.ZERO ) THEN
+               DO 50 JR = 1, N
+                  VL( JR, JC ) = VL( JR, JC )*TEMP
+   50          CONTINUE
+            ELSE
+               DO 60 JR = 1, N
+                  VL( JR, JC ) = VL( JR, JC )*TEMP
+                  VL( JR, JC+1 ) = VL( JR, JC+1 )*TEMP
+   60          CONTINUE
+            END IF
+   70    CONTINUE
+      END IF
+      IF( ILVR ) THEN
+         CALL SGGBAK( BALANC, 'R', N, ILO, IHI, LSCALE, RSCALE, N, VR,
+     $                LDVR, IERR )
+         DO 120 JC = 1, N
+            IF( ALPHAI( JC ).LT.ZERO )
+     $         GO TO 120
+            TEMP = ZERO
+            IF( ALPHAI( JC ).EQ.ZERO ) THEN
+               DO 80 JR = 1, N
+                  TEMP = MAX( TEMP, ABS( VR( JR, JC ) ) )
+   80          CONTINUE
+            ELSE
+               DO 90 JR = 1, N
+                  TEMP = MAX( TEMP, ABS( VR( JR, JC ) )+
+     $                   ABS( VR( JR, JC+1 ) ) )
+   90          CONTINUE
+            END IF
+            IF( TEMP.LT.SMLNUM )
+     $         GO TO 120
+            TEMP = ONE / TEMP
+            IF( ALPHAI( JC ).EQ.ZERO ) THEN
+               DO 100 JR = 1, N
+                  VR( JR, JC ) = VR( JR, JC )*TEMP
+  100          CONTINUE
+            ELSE
+               DO 110 JR = 1, N
+                  VR( JR, JC ) = VR( JR, JC )*TEMP
+                  VR( JR, JC+1 ) = VR( JR, JC+1 )*TEMP
+  110          CONTINUE
+            END IF
+  120    CONTINUE
+      END IF
+*
+*     Undo scaling if necessary
+*
+      IF( ILASCL ) THEN
+         CALL SLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHAR, N, IERR )
+         CALL SLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHAI, N, IERR )
+      END IF
+*
+      IF( ILBSCL ) THEN
+         CALL SLASCL( 'G', 0, 0, BNRMTO, BNRM, N, 1, BETA, N, IERR )
+      END IF
+*
+  130 CONTINUE
+      WORK( 1 ) = MAXWRK
+*
+      RETURN
+*
+*     End of SGGEVX
+*
+      END
diff --git a/costa/native/external/lapack/sggglm.f b/costa/native/external/lapack/sggglm.f
new file mode 100644
index 000000000..4abef5320
--- /dev/null
+++ b/costa/native/external/lapack/sggglm.f
@@ -0,0 +1,212 @@
+      SUBROUTINE SGGGLM( N, M, P, A, LDA, B, LDB, D, X, Y, WORK, LWORK,
+     $                   INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, LDB, LWORK, M, N, P
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), B( LDB, * ), D( * ), WORK( * ),
+     $                   X( * ), Y( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SGGGLM solves a general Gauss-Markov linear model (GLM) problem:
+*
+*          minimize || y ||_2   subject to   d = A*x + B*y
+*              x
+*
+*  where A is an N-by-M matrix, B is an N-by-P matrix, and d is a
+*  given N-vector. It is assumed that M <= N <= M+P, and
+*
+*             rank(A) = M    and    rank( A B ) = N.
+*
+*  Under these assumptions, the constrained equation is always
+*  consistent, and there is a unique solution x and a minimal 2-norm
+*  solution y, which is obtained using a generalized QR factorization
+*  of A and B.
+*
+*  In particular, if matrix B is square nonsingular, then the problem
+*  GLM is equivalent to the following weighted linear least squares
+*  problem
+*
+*               minimize || inv(B)*(d-A*x) ||_2
+*                   x
+*
+*  where inv(B) denotes the inverse of B.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The number of rows of the matrices A and B.  N >= 0.
+*
+*  M       (input) INTEGER
+*          The number of columns of the matrix A.  0 <= M <= N.
+*
+*  P       (input) INTEGER
+*          The number of columns of the matrix B.  P >= N-M.
+*
+*  A       (input/output) REAL array, dimension (LDA,M)
+*          On entry, the N-by-M matrix A.
+*          On exit, A is destroyed.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,N).
+*
+*  B       (input/output) REAL array, dimension (LDB,P)
+*          On entry, the N-by-P matrix B.
+*          On exit, B is destroyed.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B. LDB >= max(1,N).
+*
+*  D       (input/output) REAL array, dimension (N)
+*          On entry, D is the left hand side of the GLM equation.
+*          On exit, D is destroyed.
+*
+*  X       (output) REAL array, dimension (M)
+*  Y       (output) REAL array, dimension (P)
+*          On exit, X and Y are the solutions of the GLM problem.
+*
+*  WORK    (workspace/output) REAL array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK. LWORK >= max(1,N+M+P).
+*          For optimum performance, LWORK >= M+min(N,P)+max(N,P)*NB,
+*          where NB is an upper bound for the optimal blocksizes for
+*          SGEQRF, SGERQF, SORMQR and SORMRQ.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*
+*  ===================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            I, LOPT, LWKOPT, NB, NB1, NB2, NB3, NB4, NP
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SCOPY, SGEMV, SGGQRF, SORMQR, SORMRQ, STRSV,
+     $                   XERBLA
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV 
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          INT, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters
+*
+      INFO = 0
+      NP = MIN( N, P )
+      NB1 = ILAENV( 1, 'SGEQRF', ' ', N, M, -1, -1 )
+      NB2 = ILAENV( 1, 'SGERQF', ' ', N, M, -1, -1 )
+      NB3 = ILAENV( 1, 'SORMQR', ' ', N, M, P, -1 )
+      NB4 = ILAENV( 1, 'SORMRQ', ' ', N, M, P, -1 )
+      NB = MAX( NB1, NB2, NB3, NB4 )
+      LWKOPT = M + NP + MAX( N, P )*NB
+      WORK( 1 ) = LWKOPT
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( N.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( M.LT.0 .OR. M.GT.N ) THEN
+         INFO = -2
+      ELSE IF( P.LT.0 .OR. P.LT.N-M ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      ELSE IF( LWORK.LT.MAX( 1, N+M+P ) .AND. .NOT.LQUERY ) THEN
+         INFO = -12
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SGGGLM', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Compute the GQR factorization of matrices A and B:
+*
+*            Q'*A = ( R11 ) M,    Q'*B*Z' = ( T11   T12 ) M
+*                   (  0  ) N-M             (  0    T22 ) N-M
+*                      M                     M+P-N  N-M
+*
+*     where R11 and T22 are upper triangular, and Q and Z are
+*     orthogonal.
+*
+      CALL SGGQRF( N, M, P, A, LDA, WORK, B, LDB, WORK( M+1 ),
+     $             WORK( M+NP+1 ), LWORK-M-NP, INFO )
+      LOPT = WORK( M+NP+1 )
+*
+*     Update left-hand-side vector d = Q'*d = ( d1 ) M
+*                                             ( d2 ) N-M
+*
+      CALL SORMQR( 'Left', 'Transpose', N, 1, M, A, LDA, WORK, D,
+     $             MAX( 1, N ), WORK( M+NP+1 ), LWORK-M-NP, INFO )
+      LOPT = MAX( LOPT, INT( WORK( M+NP+1 ) ) )
+*
+*     Solve T22*y2 = d2 for y2
+*
+      CALL STRSV( 'Upper', 'No transpose', 'Non unit', N-M,
+     $            B( M+1, M+P-N+1 ), LDB, D( M+1 ), 1 )
+      CALL SCOPY( N-M, D( M+1 ), 1, Y( M+P-N+1 ), 1 )
+*
+*     Set y1 = 0
+*
+      DO 10 I = 1, M + P - N
+         Y( I ) = ZERO
+   10 CONTINUE
+*
+*     Update d1 = d1 - T12*y2
+*
+      CALL SGEMV( 'No transpose', M, N-M, -ONE, B( 1, M+P-N+1 ), LDB,
+     $            Y( M+P-N+1 ), 1, ONE, D, 1 )
+*
+*     Solve triangular system: R11*x = d1
+*
+      CALL STRSV( 'Upper', 'No Transpose', 'Non unit', M, A, LDA, D, 1 )
+*
+*     Copy D to X
+*
+      CALL SCOPY( M, D, 1, X, 1 )
+*
+*     Backward transformation y = Z'*y
+*
+      CALL SORMRQ( 'Left', 'Transpose', P, 1, NP,
+     $             B( MAX( 1, N-P+1 ), 1 ), LDB, WORK( M+1 ), Y,
+     $             MAX( 1, P ), WORK( M+NP+1 ), LWORK-M-NP, INFO )
+      WORK( 1 ) = M + NP + MAX( LOPT, INT( WORK( M+NP+1 ) ) )
+*
+      RETURN
+*
+*     End of SGGGLM
+*
+      END
diff --git a/costa/native/external/lapack/sgghrd.f b/costa/native/external/lapack/sgghrd.f
new file mode 100644
index 000000000..dcba074e9
--- /dev/null
+++ b/costa/native/external/lapack/sgghrd.f
@@ -0,0 +1,253 @@
+      SUBROUTINE SGGHRD( COMPQ, COMPZ, N, ILO, IHI, A, LDA, B, LDB, Q,
+     $                   LDQ, Z, LDZ, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          COMPQ, COMPZ
+      INTEGER            IHI, ILO, INFO, LDA, LDB, LDQ, LDZ, N
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), B( LDB, * ), Q( LDQ, * ),
+     $                   Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SGGHRD reduces a pair of real matrices (A,B) to generalized upper
+*  Hessenberg form using orthogonal transformations, where A is a
+*  general matrix and B is upper triangular:  Q' * A * Z = H and
+*  Q' * B * Z = T, where H is upper Hessenberg, T is upper triangular,
+*  and Q and Z are orthogonal, and ' means transpose.
+*
+*  The orthogonal matrices Q and Z are determined as products of Givens
+*  rotations.  They may either be formed explicitly, or they may be
+*  postmultiplied into input matrices Q1 and Z1, so that 
+*
+*       Q1 * A * Z1' = (Q1*Q) * H * (Z1*Z)'
+*       Q1 * B * Z1' = (Q1*Q) * T * (Z1*Z)'
+*
+*  Arguments
+*  =========
+*
+*  COMPQ   (input) CHARACTER*1
+*          = 'N': do not compute Q;
+*          = 'I': Q is initialized to the unit matrix, and the
+*                 orthogonal matrix Q is returned;
+*          = 'V': Q must contain an orthogonal matrix Q1 on entry,
+*                 and the product Q1*Q is returned.
+*
+*  COMPZ   (input) CHARACTER*1
+*          = 'N': do not compute Z;
+*          = 'I': Z is initialized to the unit matrix, and the
+*                 orthogonal matrix Z is returned;
+*          = 'V': Z must contain an orthogonal matrix Z1 on entry,
+*                 and the product Z1*Z is returned.
+*
+*  N       (input) INTEGER
+*          The order of the matrices A and B.  N >= 0.
+*
+*  ILO     (input) INTEGER
+*  IHI     (input) INTEGER
+*          It is assumed that A is already upper triangular in rows and
+*          columns 1:ILO-1 and IHI+1:N.  ILO and IHI are normally set
+*          by a previous call to SGGBAL; otherwise they should be set
+*          to 1 and N respectively.
+*          1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
+*
+*  A       (input/output) REAL array, dimension (LDA, N)
+*          On entry, the N-by-N general matrix to be reduced.
+*          On exit, the upper triangle and the first subdiagonal of A
+*          are overwritten with the upper Hessenberg matrix H, and the
+*          rest is set to zero.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  B       (input/output) REAL array, dimension (LDB, N)
+*          On entry, the N-by-N upper triangular matrix B.
+*          On exit, the upper triangular matrix T = Q' B Z.  The
+*          elements below the diagonal are set to zero.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  Q       (input/output) REAL array, dimension (LDQ, N)
+*          If COMPQ='N':  Q is not referenced.
+*          If COMPQ='I':  on entry, Q need not be set, and on exit it
+*                         contains the orthogonal matrix Q, where Q'
+*                         is the product of the Givens transformations
+*                         which are applied to A and B on the left.
+*          If COMPQ='V':  on entry, Q must contain an orthogonal matrix
+*                         Q1, and on exit this is overwritten by Q1*Q.
+*
+*  LDQ     (input) INTEGER
+*          The leading dimension of the array Q.
+*          LDQ >= N if COMPQ='V' or 'I'; LDQ >= 1 otherwise.
+*
+*  Z       (input/output) REAL array, dimension (LDZ, N)
+*          If COMPZ='N':  Z is not referenced.
+*          If COMPZ='I':  on entry, Z need not be set, and on exit it 
+*                         contains the orthogonal matrix Z, which is
+*                         the product of the Givens transformations
+*                         which are applied to A and B on the right.
+*          If COMPZ='V':  on entry, Z must contain an orthogonal matrix
+*                         Z1, and on exit this is overwritten by Z1*Z.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.
+*          LDZ >= N if COMPZ='V' or 'I'; LDZ >= 1 otherwise.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*
+*  Further Details
+*  ===============
+*
+*  This routine reduces A to Hessenberg and B to triangular form by
+*  an unblocked reduction, as described in _Matrix_Computations_,
+*  by Golub and Van Loan (Johns Hopkins Press.)
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            ILQ, ILZ
+      INTEGER            ICOMPQ, ICOMPZ, JCOL, JROW
+      REAL               C, S, TEMP
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLARTG, SLASET, SROT, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode COMPQ
+*
+      IF( LSAME( COMPQ, 'N' ) ) THEN
+         ILQ = .FALSE.
+         ICOMPQ = 1
+      ELSE IF( LSAME( COMPQ, 'V' ) ) THEN
+         ILQ = .TRUE.
+         ICOMPQ = 2
+      ELSE IF( LSAME( COMPQ, 'I' ) ) THEN
+         ILQ = .TRUE.
+         ICOMPQ = 3
+      ELSE
+         ICOMPQ = 0
+      END IF
+*
+*     Decode COMPZ
+*
+      IF( LSAME( COMPZ, 'N' ) ) THEN
+         ILZ = .FALSE.
+         ICOMPZ = 1
+      ELSE IF( LSAME( COMPZ, 'V' ) ) THEN
+         ILZ = .TRUE.
+         ICOMPZ = 2
+      ELSE IF( LSAME( COMPZ, 'I' ) ) THEN
+         ILZ = .TRUE.
+         ICOMPZ = 3
+      ELSE
+         ICOMPZ = 0
+      END IF
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( ICOMPQ.LE.0 ) THEN
+         INFO = -1
+      ELSE IF( ICOMPZ.LE.0 ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( ILO.LT.1 ) THEN
+         INFO = -4
+      ELSE IF( IHI.GT.N .OR. IHI.LT.ILO-1 ) THEN
+         INFO = -5
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -9
+      ELSE IF( ( ILQ .AND. LDQ.LT.N ) .OR. LDQ.LT.1 ) THEN
+         INFO = -11
+      ELSE IF( ( ILZ .AND. LDZ.LT.N ) .OR. LDZ.LT.1 ) THEN
+         INFO = -13
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SGGHRD', -INFO )
+         RETURN
+      END IF
+*
+*     Initialize Q and Z if desired.
+*
+      IF( ICOMPQ.EQ.3 )
+     $   CALL SLASET( 'Full', N, N, ZERO, ONE, Q, LDQ )
+      IF( ICOMPZ.EQ.3 )
+     $   CALL SLASET( 'Full', N, N, ZERO, ONE, Z, LDZ )
+*
+*     Quick return if possible
+*
+      IF( N.LE.1 )
+     $   RETURN
+*
+*     Zero out lower triangle of B
+*
+      DO 20 JCOL = 1, N - 1
+         DO 10 JROW = JCOL + 1, N
+            B( JROW, JCOL ) = ZERO
+   10    CONTINUE
+   20 CONTINUE
+*
+*     Reduce A and B
+*
+      DO 40 JCOL = ILO, IHI - 2
+*
+         DO 30 JROW = IHI, JCOL + 2, -1
+*
+*           Step 1: rotate rows JROW-1, JROW to kill A(JROW,JCOL)
+*
+            TEMP = A( JROW-1, JCOL )
+            CALL SLARTG( TEMP, A( JROW, JCOL ), C, S,
+     $                   A( JROW-1, JCOL ) )
+            A( JROW, JCOL ) = ZERO
+            CALL SROT( N-JCOL, A( JROW-1, JCOL+1 ), LDA,
+     $                 A( JROW, JCOL+1 ), LDA, C, S )
+            CALL SROT( N+2-JROW, B( JROW-1, JROW-1 ), LDB,
+     $                 B( JROW, JROW-1 ), LDB, C, S )
+            IF( ILQ )
+     $         CALL SROT( N, Q( 1, JROW-1 ), 1, Q( 1, JROW ), 1, C, S )
+*
+*           Step 2: rotate columns JROW, JROW-1 to kill B(JROW,JROW-1)
+*
+            TEMP = B( JROW, JROW )
+            CALL SLARTG( TEMP, B( JROW, JROW-1 ), C, S,
+     $                   B( JROW, JROW ) )
+            B( JROW, JROW-1 ) = ZERO
+            CALL SROT( IHI, A( 1, JROW ), 1, A( 1, JROW-1 ), 1, C, S )
+            CALL SROT( JROW-1, B( 1, JROW ), 1, B( 1, JROW-1 ), 1, C,
+     $                 S )
+            IF( ILZ )
+     $         CALL SROT( N, Z( 1, JROW ), 1, Z( 1, JROW-1 ), 1, C, S )
+   30    CONTINUE
+   40 CONTINUE
+*
+      RETURN
+*
+*     End of SGGHRD
+*
+      END
diff --git a/costa/native/external/lapack/sgglse.f b/costa/native/external/lapack/sgglse.f
new file mode 100644
index 000000000..7f36a7525
--- /dev/null
+++ b/costa/native/external/lapack/sgglse.f
@@ -0,0 +1,217 @@
+      SUBROUTINE SGGLSE( M, N, P, A, LDA, B, LDB, C, D, X, WORK, LWORK,
+     $                   INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, LDB, LWORK, M, N, P
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), B( LDB, * ), C( * ), D( * ),
+     $                   WORK( * ), X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SGGLSE solves the linear equality-constrained least squares (LSE)
+*  problem:
+*
+*          minimize || c - A*x ||_2   subject to   B*x = d
+*
+*  where A is an M-by-N matrix, B is a P-by-N matrix, c is a given
+*  M-vector, and d is a given P-vector. It is assumed that
+*  P <= N <= M+P, and
+*
+*           rank(B) = P and  rank( ( A ) ) = N.
+*                                ( ( B ) )
+*
+*  These conditions ensure that the LSE problem has a unique solution,
+*  which is obtained using a GRQ factorization of the matrices B and A.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrices A and B. N >= 0.
+*
+*  P       (input) INTEGER
+*          The number of rows of the matrix B. 0 <= P <= N <= M+P.
+*
+*  A       (input/output) REAL array, dimension (LDA,N)
+*          On entry, the M-by-N matrix A.
+*          On exit, A is destroyed.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,M).
+*
+*  B       (input/output) REAL array, dimension (LDB,N)
+*          On entry, the P-by-N matrix B.
+*          On exit, B is destroyed.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B. LDB >= max(1,P).
+*
+*  C       (input/output) REAL array, dimension (M)
+*          On entry, C contains the right hand side vector for the
+*          least squares part of the LSE problem.
+*          On exit, the residual sum of squares for the solution
+*          is given by the sum of squares of elements N-P+1 to M of
+*          vector C.
+*
+*  D       (input/output) REAL array, dimension (P)
+*          On entry, D contains the right hand side vector for the
+*          constrained equation.
+*          On exit, D is destroyed.
+*
+*  X       (output) REAL array, dimension (N)
+*          On exit, X is the solution of the LSE problem.
+*
+*  WORK    (workspace/output) REAL array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK. LWORK >= max(1,M+N+P).
+*          For optimum performance LWORK >= P+min(M,N)+max(M,N)*NB,
+*          where NB is an upper bound for the optimal blocksizes for
+*          SGEQRF, SGERQF, SORMQR and SORMRQ.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE
+      PARAMETER          ( ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            LOPT, LWKOPT, MN, NB, NB1, NB2, NB3, NB4, NR
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SAXPY, SCOPY, SGEMV, SGGRQF, SORMQR, SORMRQ,
+     $                   STRMV, STRSV, XERBLA
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV 
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          INT, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters
+*
+      INFO = 0
+      MN = MIN( M, N )
+      NB1 = ILAENV( 1, 'SGEQRF', ' ', M, N, -1, -1 )
+      NB2 = ILAENV( 1, 'SGERQF', ' ', M, N, -1, -1 )
+      NB3 = ILAENV( 1, 'SORMQR', ' ', M, N, P, -1 )
+      NB4 = ILAENV( 1, 'SORMRQ', ' ', M, N, P, -1 )
+      NB = MAX( NB1, NB2, NB3, NB4 )
+      LWKOPT = P + MN + MAX( M, N )*NB
+      WORK( 1 ) = LWKOPT
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( P.LT.0 .OR. P.GT.N .OR. P.LT.N-M ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, P ) ) THEN
+         INFO = -7
+      ELSE IF( LWORK.LT.MAX( 1, M+N+P ) .AND. .NOT.LQUERY ) THEN
+         INFO = -12
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SGGLSE', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Compute the GRQ factorization of matrices B and A:
+*
+*            B*Q' = (  0  T12 ) P   Z'*A*Q' = ( R11 R12 ) N-P
+*                     N-P  P                  (  0  R22 ) M+P-N
+*                                               N-P  P
+*
+*     where T12 and R11 are upper triangular, and Q and Z are
+*     orthogonal.
+*
+      CALL SGGRQF( P, M, N, B, LDB, WORK, A, LDA, WORK( P+1 ),
+     $             WORK( P+MN+1 ), LWORK-P-MN, INFO )
+      LOPT = WORK( P+MN+1 )
+*
+*     Update c = Z'*c = ( c1 ) N-P
+*                       ( c2 ) M+P-N
+*
+      CALL SORMQR( 'Left', 'Transpose', M, 1, MN, A, LDA, WORK( P+1 ),
+     $             C, MAX( 1, M ), WORK( P+MN+1 ), LWORK-P-MN, INFO )
+      LOPT = MAX( LOPT, INT( WORK( P+MN+1 ) ) )
+*
+*     Solve T12*x2 = d for x2
+*
+      CALL STRSV( 'Upper', 'No transpose', 'Non unit', P, B( 1, N-P+1 ),
+     $            LDB, D, 1 )
+*
+*     Update c1
+*
+      CALL SGEMV( 'No transpose', N-P, P, -ONE, A( 1, N-P+1 ), LDA, D,
+     $            1, ONE, C, 1 )
+*
+*     Sovle R11*x1 = c1 for x1
+*
+      CALL STRSV( 'Upper', 'No transpose', 'Non unit', N-P, A, LDA, C,
+     $            1 )
+*
+*     Put the solutions in X
+*
+      CALL SCOPY( N-P, C, 1, X, 1 )
+      CALL SCOPY( P, D, 1, X( N-P+1 ), 1 )
+*
+*     Compute the residual vector:
+*
+      IF( M.LT.N ) THEN
+         NR = M + P - N
+         CALL SGEMV( 'No transpose', NR, N-M, -ONE, A( N-P+1, M+1 ),
+     $               LDA, D( NR+1 ), 1, ONE, C( N-P+1 ), 1 )
+      ELSE
+         NR = P
+      END IF
+      CALL STRMV( 'Upper', 'No transpose', 'Non unit', NR,
+     $            A( N-P+1, N-P+1 ), LDA, D, 1 )
+      CALL SAXPY( NR, -ONE, D, 1, C( N-P+1 ), 1 )
+*
+*     Backward transformation x = Q'*x
+*
+      CALL SORMRQ( 'Left', 'Transpose', N, 1, P, B, LDB, WORK( 1 ), X,
+     $             N, WORK( P+MN+1 ), LWORK-P-MN, INFO )
+      WORK( 1 ) = P + MN + MAX( LOPT, INT( WORK( P+MN+1 ) ) )
+*
+      RETURN
+*
+*     End of SGGLSE
+*
+      END
diff --git a/costa/native/external/lapack/sggqrf.f b/costa/native/external/lapack/sggqrf.f
new file mode 100644
index 000000000..bd26f715f
--- /dev/null
+++ b/costa/native/external/lapack/sggqrf.f
@@ -0,0 +1,212 @@
+      SUBROUTINE SGGQRF( N, M, P, A, LDA, TAUA, B, LDB, TAUB, WORK,
+     $                   LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, LDB, LWORK, M, N, P
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), B( LDB, * ), TAUA( * ), TAUB( * ),
+     $                   WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SGGQRF computes a generalized QR factorization of an N-by-M matrix A
+*  and an N-by-P matrix B:
+*
+*              A = Q*R,        B = Q*T*Z,
+*
+*  where Q is an N-by-N orthogonal matrix, Z is a P-by-P orthogonal
+*  matrix, and R and T assume one of the forms:
+*
+*  if N >= M,  R = ( R11 ) M  ,   or if N < M,  R = ( R11  R12 ) N,
+*                  (  0  ) N-M                         N   M-N
+*                     M
+*
+*  where R11 is upper triangular, and
+*
+*  if N <= P,  T = ( 0  T12 ) N,   or if N > P,  T = ( T11 ) N-P,
+*                   P-N  N                           ( T21 ) P
+*                                                       P
+*
+*  where T12 or T21 is upper triangular.
+*
+*  In particular, if B is square and nonsingular, the GQR factorization
+*  of A and B implicitly gives the QR factorization of inv(B)*A:
+*
+*               inv(B)*A = Z'*(inv(T)*R)
+*
+*  where inv(B) denotes the inverse of the matrix B, and Z' denotes the
+*  transpose of the matrix Z.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The number of rows of the matrices A and B. N >= 0.
+*
+*  M       (input) INTEGER
+*          The number of columns of the matrix A.  M >= 0.
+*
+*  P       (input) INTEGER
+*          The number of columns of the matrix B.  P >= 0.
+*
+*  A       (input/output) REAL array, dimension (LDA,M)
+*          On entry, the N-by-M matrix A.
+*          On exit, the elements on and above the diagonal of the array
+*          contain the min(N,M)-by-M upper trapezoidal matrix R (R is
+*          upper triangular if N >= M); the elements below the diagonal,
+*          with the array TAUA, represent the orthogonal matrix Q as a
+*          product of min(N,M) elementary reflectors (see Further
+*          Details).
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,N).
+*
+*  TAUA    (output) REAL array, dimension (min(N,M))
+*          The scalar factors of the elementary reflectors which
+*          represent the orthogonal matrix Q (see Further Details).
+*
+*  B       (input/output) REAL array, dimension (LDB,P)
+*          On entry, the N-by-P matrix B.
+*          On exit, if N <= P, the upper triangle of the subarray
+*          B(1:N,P-N+1:P) contains the N-by-N upper triangular matrix T;
+*          if N > P, the elements on and above the (N-P)-th subdiagonal
+*          contain the N-by-P upper trapezoidal matrix T; the remaining
+*          elements, with the array TAUB, represent the orthogonal
+*          matrix Z as a product of elementary reflectors (see Further
+*          Details).
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B. LDB >= max(1,N).
+*
+*  TAUB    (output) REAL array, dimension (min(N,P))
+*          The scalar factors of the elementary reflectors which
+*          represent the orthogonal matrix Z (see Further Details).
+*
+*  WORK    (workspace/output) REAL array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK. LWORK >= max(1,N,M,P).
+*          For optimum performance LWORK >= max(N,M,P)*max(NB1,NB2,NB3),
+*          where NB1 is the optimal blocksize for the QR factorization
+*          of an N-by-M matrix, NB2 is the optimal blocksize for the
+*          RQ factorization of an N-by-P matrix, and NB3 is the optimal
+*          blocksize for a call of SORMQR.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*
+*  Further Details
+*  ===============
+*
+*  The matrix Q is represented as a product of elementary reflectors
+*
+*     Q = H(1) H(2) . . . H(k), where k = min(n,m).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - taua * v * v'
+*
+*  where taua is a real scalar, and v is a real vector with
+*  v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i+1:n,i),
+*  and taua in TAUA(i).
+*  To form Q explicitly, use LAPACK subroutine SORGQR.
+*  To use Q to update another matrix, use LAPACK subroutine SORMQR.
+*
+*  The matrix Z is represented as a product of elementary reflectors
+*
+*     Z = H(1) H(2) . . . H(k), where k = min(n,p).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - taub * v * v'
+*
+*  where taub is a real scalar, and v is a real vector with
+*  v(p-k+i+1:p) = 0 and v(p-k+i) = 1; v(1:p-k+i-1) is stored on exit in
+*  B(n-k+i,1:p-k+i-1), and taub in TAUB(i).
+*  To form Z explicitly, use LAPACK subroutine SORGRQ.
+*  To use Z to update another matrix, use LAPACK subroutine SORMRQ.
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            LOPT, LWKOPT, NB, NB1, NB2, NB3
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SGEQRF, SGERQF, SORMQR, XERBLA
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV 
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          INT, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters
+*
+      INFO = 0
+      NB1 = ILAENV( 1, 'SGEQRF', ' ', N, M, -1, -1 )
+      NB2 = ILAENV( 1, 'SGERQF', ' ', N, P, -1, -1 )
+      NB3 = ILAENV( 1, 'SORMQR', ' ', N, M, P, -1 )
+      NB = MAX( NB1, NB2, NB3 )
+      LWKOPT = MAX( N, M, P )*NB
+      WORK( 1 ) = LWKOPT
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( N.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( P.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -8
+      ELSE IF( LWORK.LT.MAX( 1, N, M, P ) .AND. .NOT.LQUERY ) THEN
+         INFO = -11
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SGGQRF', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     QR factorization of N-by-M matrix A: A = Q*R
+*
+      CALL SGEQRF( N, M, A, LDA, TAUA, WORK, LWORK, INFO )
+      LOPT = WORK( 1 )
+*
+*     Update B := Q'*B.
+*
+      CALL SORMQR( 'Left', 'Transpose', N, P, MIN( N, M ), A, LDA, TAUA,
+     $             B, LDB, WORK, LWORK, INFO )
+      LOPT = MAX( LOPT, INT( WORK( 1 ) ) )
+*
+*     RQ factorization of N-by-P matrix B: B = T*Z.
+*
+      CALL SGERQF( N, P, B, LDB, TAUB, WORK, LWORK, INFO )
+      WORK( 1 ) = MAX( LOPT, INT( WORK( 1 ) ) )
+*
+      RETURN
+*
+*     End of SGGQRF
+*
+      END
diff --git a/costa/native/external/lapack/sggrqf.f b/costa/native/external/lapack/sggrqf.f
new file mode 100644
index 000000000..7af19ecf6
--- /dev/null
+++ b/costa/native/external/lapack/sggrqf.f
@@ -0,0 +1,212 @@
+      SUBROUTINE SGGRQF( M, P, N, A, LDA, TAUA, B, LDB, TAUB, WORK,
+     $                   LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, LDB, LWORK, M, N, P
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), B( LDB, * ), TAUA( * ), TAUB( * ),
+     $                   WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SGGRQF computes a generalized RQ factorization of an M-by-N matrix A
+*  and a P-by-N matrix B:
+*
+*              A = R*Q,        B = Z*T*Q,
+*
+*  where Q is an N-by-N orthogonal matrix, Z is a P-by-P orthogonal
+*  matrix, and R and T assume one of the forms:
+*
+*  if M <= N,  R = ( 0  R12 ) M,   or if M > N,  R = ( R11 ) M-N,
+*                   N-M  M                           ( R21 ) N
+*                                                       N
+*
+*  where R12 or R21 is upper triangular, and
+*
+*  if P >= N,  T = ( T11 ) N  ,   or if P < N,  T = ( T11  T12 ) P,
+*                  (  0  ) P-N                         P   N-P
+*                     N
+*
+*  where T11 is upper triangular.
+*
+*  In particular, if B is square and nonsingular, the GRQ factorization
+*  of A and B implicitly gives the RQ factorization of A*inv(B):
+*
+*               A*inv(B) = (R*inv(T))*Z'
+*
+*  where inv(B) denotes the inverse of the matrix B, and Z' denotes the
+*  transpose of the matrix Z.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  P       (input) INTEGER
+*          The number of rows of the matrix B.  P >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrices A and B. N >= 0.
+*
+*  A       (input/output) REAL array, dimension (LDA,N)
+*          On entry, the M-by-N matrix A.
+*          On exit, if M <= N, the upper triangle of the subarray
+*          A(1:M,N-M+1:N) contains the M-by-M upper triangular matrix R;
+*          if M > N, the elements on and above the (M-N)-th subdiagonal
+*          contain the M-by-N upper trapezoidal matrix R; the remaining
+*          elements, with the array TAUA, represent the orthogonal
+*          matrix Q as a product of elementary reflectors (see Further
+*          Details).
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,M).
+*
+*  TAUA    (output) REAL array, dimension (min(M,N))
+*          The scalar factors of the elementary reflectors which
+*          represent the orthogonal matrix Q (see Further Details).
+*
+*  B       (input/output) REAL array, dimension (LDB,N)
+*          On entry, the P-by-N matrix B.
+*          On exit, the elements on and above the diagonal of the array
+*          contain the min(P,N)-by-N upper trapezoidal matrix T (T is
+*          upper triangular if P >= N); the elements below the diagonal,
+*          with the array TAUB, represent the orthogonal matrix Z as a
+*          product of elementary reflectors (see Further Details).
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B. LDB >= max(1,P).
+*
+*  TAUB    (output) REAL array, dimension (min(P,N))
+*          The scalar factors of the elementary reflectors which
+*          represent the orthogonal matrix Z (see Further Details).
+*
+*  WORK    (workspace/output) REAL array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK. LWORK >= max(1,N,M,P).
+*          For optimum performance LWORK >= max(N,M,P)*max(NB1,NB2,NB3),
+*          where NB1 is the optimal blocksize for the RQ factorization
+*          of an M-by-N matrix, NB2 is the optimal blocksize for the
+*          QR factorization of a P-by-N matrix, and NB3 is the optimal
+*          blocksize for a call of SORMRQ.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INF0= -i, the i-th argument had an illegal value.
+*
+*  Further Details
+*  ===============
+*
+*  The matrix Q is represented as a product of elementary reflectors
+*
+*     Q = H(1) H(2) . . . H(k), where k = min(m,n).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - taua * v * v'
+*
+*  where taua is a real scalar, and v is a real vector with
+*  v(n-k+i+1:n) = 0 and v(n-k+i) = 1; v(1:n-k+i-1) is stored on exit in
+*  A(m-k+i,1:n-k+i-1), and taua in TAUA(i).
+*  To form Q explicitly, use LAPACK subroutine SORGRQ.
+*  To use Q to update another matrix, use LAPACK subroutine SORMRQ.
+*
+*  The matrix Z is represented as a product of elementary reflectors
+*
+*     Z = H(1) H(2) . . . H(k), where k = min(p,n).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - taub * v * v'
+*
+*  where taub is a real scalar, and v is a real vector with
+*  v(1:i-1) = 0 and v(i) = 1; v(i+1:p) is stored on exit in B(i+1:p,i),
+*  and taub in TAUB(i).
+*  To form Z explicitly, use LAPACK subroutine SORGQR.
+*  To use Z to update another matrix, use LAPACK subroutine SORMQR.
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            LOPT, LWKOPT, NB, NB1, NB2, NB3
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SGEQRF, SGERQF, SORMRQ, XERBLA
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV 
+      EXTERNAL           ILAENV 
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          INT, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters
+*
+      INFO = 0
+      NB1 = ILAENV( 1, 'SGERQF', ' ', M, N, -1, -1 )
+      NB2 = ILAENV( 1, 'SGEQRF', ' ', P, N, -1, -1 )
+      NB3 = ILAENV( 1, 'SORMRQ', ' ', M, N, P, -1 )
+      NB = MAX( NB1, NB2, NB3 )
+      LWKOPT = MAX( N, M, P)*NB
+      WORK( 1 ) = LWKOPT
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( P.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, P ) ) THEN
+         INFO = -8
+      ELSE IF( LWORK.LT.MAX( 1, M, P, N ) .AND. .NOT.LQUERY ) THEN
+         INFO = -11
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SGGRQF', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     RQ factorization of M-by-N matrix A: A = R*Q
+*
+      CALL SGERQF( M, N, A, LDA, TAUA, WORK, LWORK, INFO )
+      LOPT = WORK( 1 )
+*
+*     Update B := B*Q'
+*
+      CALL SORMRQ( 'Right', 'Transpose', P, N, MIN( M, N ),
+     $             A( MAX( 1, M-N+1 ), 1 ), LDA, TAUA, B, LDB, WORK,
+     $             LWORK, INFO )
+      LOPT = MAX( LOPT, INT( WORK( 1 ) ) )
+*
+*     QR factorization of P-by-N matrix B: B = Z*T
+*
+      CALL SGEQRF( P, N, B, LDB, TAUB, WORK, LWORK, INFO )
+      WORK( 1 ) = MAX( LOPT, INT( WORK( 1 ) ) )
+*
+      RETURN
+*
+*     End of SGGRQF
+*
+      END
diff --git a/costa/native/external/lapack/sggsvd.f b/costa/native/external/lapack/sggsvd.f
new file mode 100644
index 000000000..1e2612843
--- /dev/null
+++ b/costa/native/external/lapack/sggsvd.f
@@ -0,0 +1,336 @@
+      SUBROUTINE SGGSVD( JOBU, JOBV, JOBQ, M, N, P, K, L, A, LDA, B,
+     $                   LDB, ALPHA, BETA, U, LDU, V, LDV, Q, LDQ, WORK,
+     $                   IWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBQ, JOBU, JOBV
+      INTEGER            INFO, K, L, LDA, LDB, LDQ, LDU, LDV, M, N, P
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      REAL               A( LDA, * ), ALPHA( * ), B( LDB, * ),
+     $                   BETA( * ), Q( LDQ, * ), U( LDU, * ),
+     $                   V( LDV, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SGGSVD computes the generalized singular value decomposition (GSVD)
+*  of an M-by-N real matrix A and P-by-N real matrix B:
+*
+*      U'*A*Q = D1*( 0 R ),    V'*B*Q = D2*( 0 R )
+*
+*  where U, V and Q are orthogonal matrices, and Z' is the transpose
+*  of Z.  Let K+L = the effective numerical rank of the matrix (A',B')',
+*  then R is a K+L-by-K+L nonsingular upper triangular matrix, D1 and
+*  D2 are M-by-(K+L) and P-by-(K+L) "diagonal" matrices and of the
+*  following structures, respectively:
+*
+*  If M-K-L >= 0,
+*
+*                      K  L
+*         D1 =     K ( I  0 )
+*                  L ( 0  C )
+*              M-K-L ( 0  0 )
+*
+*                    K  L
+*         D2 =   L ( 0  S )
+*              P-L ( 0  0 )
+*
+*                  N-K-L  K    L
+*    ( 0 R ) = K (  0   R11  R12 )
+*              L (  0    0   R22 )
+*
+*  where
+*
+*    C = diag( ALPHA(K+1), ... , ALPHA(K+L) ),
+*    S = diag( BETA(K+1),  ... , BETA(K+L) ),
+*    C**2 + S**2 = I.
+*
+*    R is stored in A(1:K+L,N-K-L+1:N) on exit.
+*
+*  If M-K-L < 0,
+*
+*                    K M-K K+L-M
+*         D1 =   K ( I  0    0   )
+*              M-K ( 0  C    0   )
+*
+*                      K M-K K+L-M
+*         D2 =   M-K ( 0  S    0  )
+*              K+L-M ( 0  0    I  )
+*                P-L ( 0  0    0  )
+*
+*                     N-K-L  K   M-K  K+L-M
+*    ( 0 R ) =     K ( 0    R11  R12  R13  )
+*                M-K ( 0     0   R22  R23  )
+*              K+L-M ( 0     0    0   R33  )
+*
+*  where
+*
+*    C = diag( ALPHA(K+1), ... , ALPHA(M) ),
+*    S = diag( BETA(K+1),  ... , BETA(M) ),
+*    C**2 + S**2 = I.
+*
+*    (R11 R12 R13 ) is stored in A(1:M, N-K-L+1:N), and R33 is stored
+*    ( 0  R22 R23 )
+*    in B(M-K+1:L,N+M-K-L+1:N) on exit.
+*
+*  The routine computes C, S, R, and optionally the orthogonal
+*  transformation matrices U, V and Q.
+*
+*  In particular, if B is an N-by-N nonsingular matrix, then the GSVD of
+*  A and B implicitly gives the SVD of A*inv(B):
+*                       A*inv(B) = U*(D1*inv(D2))*V'.
+*  If ( A',B')' has orthonormal columns, then the GSVD of A and B is
+*  also equal to the CS decomposition of A and B. Furthermore, the GSVD
+*  can be used to derive the solution of the eigenvalue problem:
+*                       A'*A x = lambda* B'*B x.
+*  In some literature, the GSVD of A and B is presented in the form
+*                   U'*A*X = ( 0 D1 ),   V'*B*X = ( 0 D2 )
+*  where U and V are orthogonal and X is nonsingular, D1 and D2 are
+*  ``diagonal''.  The former GSVD form can be converted to the latter
+*  form by taking the nonsingular matrix X as
+*
+*                       X = Q*( I   0    )
+*                             ( 0 inv(R) ).
+*
+*  Arguments
+*  =========
+*
+*  JOBU    (input) CHARACTER*1
+*          = 'U':  Orthogonal matrix U is computed;
+*          = 'N':  U is not computed.
+*
+*  JOBV    (input) CHARACTER*1
+*          = 'V':  Orthogonal matrix V is computed;
+*          = 'N':  V is not computed.
+*
+*  JOBQ    (input) CHARACTER*1
+*          = 'Q':  Orthogonal matrix Q is computed;
+*          = 'N':  Q is not computed.
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrices A and B.  N >= 0.
+*
+*  P       (input) INTEGER
+*          The number of rows of the matrix B.  P >= 0.
+*
+*  K       (output) INTEGER
+*  L       (output) INTEGER
+*          On exit, K and L specify the dimension of the subblocks
+*          described in the Purpose section.
+*          K + L = effective numerical rank of (A',B')'.
+*
+*  A       (input/output) REAL array, dimension (LDA,N)
+*          On entry, the M-by-N matrix A.
+*          On exit, A contains the triangular matrix R, or part of R.
+*          See Purpose for details.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,M).
+*
+*  B       (input/output) REAL array, dimension (LDB,N)
+*          On entry, the P-by-N matrix B.
+*          On exit, B contains the triangular matrix R if M-K-L < 0.
+*          See Purpose for details.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B. LDA >= max(1,P).
+*
+*  ALPHA   (output) REAL array, dimension (N)
+*  BETA    (output) REAL array, dimension (N)
+*          On exit, ALPHA and BETA contain the generalized singular
+*          value pairs of A and B;
+*            ALPHA(1:K) = 1,
+*            BETA(1:K)  = 0,
+*          and if M-K-L >= 0,
+*            ALPHA(K+1:K+L) = C,
+*            BETA(K+1:K+L)  = S,
+*          or if M-K-L < 0,
+*            ALPHA(K+1:M)=C, ALPHA(M+1:K+L)=0
+*            BETA(K+1:M) =S, BETA(M+1:K+L) =1
+*          and
+*            ALPHA(K+L+1:N) = 0
+*            BETA(K+L+1:N)  = 0
+*
+*  U       (output) REAL array, dimension (LDU,M)
+*          If JOBU = 'U', U contains the M-by-M orthogonal matrix U.
+*          If JOBU = 'N', U is not referenced.
+*
+*  LDU     (input) INTEGER
+*          The leading dimension of the array U. LDU >= max(1,M) if
+*          JOBU = 'U'; LDU >= 1 otherwise.
+*
+*  V       (output) REAL array, dimension (LDV,P)
+*          If JOBV = 'V', V contains the P-by-P orthogonal matrix V.
+*          If JOBV = 'N', V is not referenced.
+*
+*  LDV     (input) INTEGER
+*          The leading dimension of the array V. LDV >= max(1,P) if
+*          JOBV = 'V'; LDV >= 1 otherwise.
+*
+*  Q       (output) REAL array, dimension (LDQ,N)
+*          If JOBQ = 'Q', Q contains the N-by-N orthogonal matrix Q.
+*          If JOBQ = 'N', Q is not referenced.
+*
+*  LDQ     (input) INTEGER
+*          The leading dimension of the array Q. LDQ >= max(1,N) if
+*          JOBQ = 'Q'; LDQ >= 1 otherwise.
+*
+*  WORK    (workspace) REAL array,
+*                      dimension (max(3*N,M,P)+N)
+*
+*  IWORK   (workspace/output) INTEGER array, dimension (N)
+*          On exit, IWORK stores the sorting information. More
+*          precisely, the following loop will sort ALPHA
+*             for I = K+1, min(M,K+L)
+*                 swap ALPHA(I) and ALPHA(IWORK(I))
+*             endfor
+*          such that ALPHA(1) >= ALPHA(2) >= ... >= ALPHA(N).
+*
+*  INFO    (output)INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          > 0:  if INFO = 1, the Jacobi-type procedure failed to
+*                converge.  For further details, see subroutine STGSJA.
+*
+*  Internal Parameters
+*  ===================
+*
+*  TOLA    REAL
+*  TOLB    REAL
+*          TOLA and TOLB are the thresholds to determine the effective
+*          rank of (A',B')'. Generally, they are set to
+*                   TOLA = MAX(M,N)*norm(A)*MACHEPS,
+*                   TOLB = MAX(P,N)*norm(B)*MACHEPS.
+*          The size of TOLA and TOLB may affect the size of backward
+*          errors of the decomposition.
+*
+*  Further Details
+*  ===============
+*
+*  2-96 Based on modifications by
+*     Ming Gu and Huan Ren, Computer Science Division, University of
+*     California at Berkeley, USA
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            WANTQ, WANTU, WANTV
+      INTEGER            I, IBND, ISUB, J, NCYCLE
+      REAL               ANORM, BNORM, SMAX, TEMP, TOLA, TOLB, ULP, UNFL
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               SLAMCH, SLANGE
+      EXTERNAL           LSAME, SLAMCH, SLANGE
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SCOPY, SGGSVP, STGSJA, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters
+*
+      WANTU = LSAME( JOBU, 'U' )
+      WANTV = LSAME( JOBV, 'V' )
+      WANTQ = LSAME( JOBQ, 'Q' )
+*
+      INFO = 0
+      IF( .NOT.( WANTU .OR. LSAME( JOBU, 'N' ) ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( WANTV .OR. LSAME( JOBV, 'N' ) ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.( WANTQ .OR. LSAME( JOBQ, 'N' ) ) ) THEN
+         INFO = -3
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( P.LT.0 ) THEN
+         INFO = -6
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -10
+      ELSE IF( LDB.LT.MAX( 1, P ) ) THEN
+         INFO = -12
+      ELSE IF( LDU.LT.1 .OR. ( WANTU .AND. LDU.LT.M ) ) THEN
+         INFO = -16
+      ELSE IF( LDV.LT.1 .OR. ( WANTV .AND. LDV.LT.P ) ) THEN
+         INFO = -18
+      ELSE IF( LDQ.LT.1 .OR. ( WANTQ .AND. LDQ.LT.N ) ) THEN
+         INFO = -20
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SGGSVD', -INFO )
+         RETURN
+      END IF
+*
+*     Compute the Frobenius norm of matrices A and B
+*
+      ANORM = SLANGE( '1', M, N, A, LDA, WORK )
+      BNORM = SLANGE( '1', P, N, B, LDB, WORK )
+*
+*     Get machine precision and set up threshold for determining
+*     the effective numerical rank of the matrices A and B.
+*
+      ULP = SLAMCH( 'Precision' )
+      UNFL = SLAMCH( 'Safe Minimum' )
+      TOLA = MAX( M, N )*MAX( ANORM, UNFL )*ULP
+      TOLB = MAX( P, N )*MAX( BNORM, UNFL )*ULP
+*
+*     Preprocessing
+*
+      CALL SGGSVP( JOBU, JOBV, JOBQ, M, P, N, A, LDA, B, LDB, TOLA,
+     $             TOLB, K, L, U, LDU, V, LDV, Q, LDQ, IWORK, WORK,
+     $             WORK( N+1 ), INFO )
+*
+*     Compute the GSVD of two upper "triangular" matrices
+*
+      CALL STGSJA( JOBU, JOBV, JOBQ, M, P, N, K, L, A, LDA, B, LDB,
+     $             TOLA, TOLB, ALPHA, BETA, U, LDU, V, LDV, Q, LDQ,
+     $             WORK, NCYCLE, INFO )
+*
+*     Sort the singular values and store the pivot indices in IWORK
+*     Copy ALPHA to WORK, then sort ALPHA in WORK
+*
+      CALL SCOPY( N, ALPHA, 1, WORK, 1 )
+      IBND = MIN( L, M-K )
+      DO 20 I = 1, IBND
+*
+*        Scan for largest ALPHA(K+I)
+*
+         ISUB = I
+         SMAX = WORK( K+I )
+         DO 10 J = I + 1, IBND
+            TEMP = WORK( K+J )
+            IF( TEMP.GT.SMAX ) THEN
+               ISUB = J
+               SMAX = TEMP
+            END IF
+   10    CONTINUE
+         IF( ISUB.NE.I ) THEN
+            WORK( K+ISUB ) = WORK( K+I )
+            WORK( K+I ) = SMAX
+            IWORK( K+I ) = K + ISUB
+         ELSE
+            IWORK( K+I ) = K + I
+         END IF
+   20 CONTINUE
+*
+      RETURN
+*
+*     End of SGGSVD
+*
+      END
diff --git a/costa/native/external/lapack/sggsvp.f b/costa/native/external/lapack/sggsvp.f
new file mode 100644
index 000000000..fcd021870
--- /dev/null
+++ b/costa/native/external/lapack/sggsvp.f
@@ -0,0 +1,394 @@
+      SUBROUTINE SGGSVP( JOBU, JOBV, JOBQ, M, P, N, A, LDA, B, LDB,
+     $                   TOLA, TOLB, K, L, U, LDU, V, LDV, Q, LDQ,
+     $                   IWORK, TAU, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBQ, JOBU, JOBV
+      INTEGER            INFO, K, L, LDA, LDB, LDQ, LDU, LDV, M, N, P
+      REAL               TOLA, TOLB
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      REAL               A( LDA, * ), B( LDB, * ), Q( LDQ, * ),
+     $                   TAU( * ), U( LDU, * ), V( LDV, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SGGSVP computes orthogonal matrices U, V and Q such that
+*
+*                   N-K-L  K    L
+*   U'*A*Q =     K ( 0    A12  A13 )  if M-K-L >= 0;
+*                L ( 0     0   A23 )
+*            M-K-L ( 0     0    0  )
+*
+*                   N-K-L  K    L
+*          =     K ( 0    A12  A13 )  if M-K-L < 0;
+*              M-K ( 0     0   A23 )
+*
+*                 N-K-L  K    L
+*   V'*B*Q =   L ( 0     0   B13 )
+*            P-L ( 0     0    0  )
+*
+*  where the K-by-K matrix A12 and L-by-L matrix B13 are nonsingular
+*  upper triangular; A23 is L-by-L upper triangular if M-K-L >= 0,
+*  otherwise A23 is (M-K)-by-L upper trapezoidal.  K+L = the effective
+*  numerical rank of the (M+P)-by-N matrix (A',B')'.  Z' denotes the
+*  transpose of Z.
+*
+*  This decomposition is the preprocessing step for computing the
+*  Generalized Singular Value Decomposition (GSVD), see subroutine
+*  SGGSVD.
+*
+*  Arguments
+*  =========
+*
+*  JOBU    (input) CHARACTER*1
+*          = 'U':  Orthogonal matrix U is computed;
+*          = 'N':  U is not computed.
+*
+*  JOBV    (input) CHARACTER*1
+*          = 'V':  Orthogonal matrix V is computed;
+*          = 'N':  V is not computed.
+*
+*  JOBQ    (input) CHARACTER*1
+*          = 'Q':  Orthogonal matrix Q is computed;
+*          = 'N':  Q is not computed.
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  P       (input) INTEGER
+*          The number of rows of the matrix B.  P >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrices A and B.  N >= 0.
+*
+*  A       (input/output) REAL array, dimension (LDA,N)
+*          On entry, the M-by-N matrix A.
+*          On exit, A contains the triangular (or trapezoidal) matrix
+*          described in the Purpose section.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,M).
+*
+*  B       (input/output) REAL array, dimension (LDB,N)
+*          On entry, the P-by-N matrix B.
+*          On exit, B contains the triangular matrix described in
+*          the Purpose section.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B. LDB >= max(1,P).
+*
+*  TOLA    (input) REAL
+*  TOLB    (input) REAL
+*          TOLA and TOLB are the thresholds to determine the effective
+*          numerical rank of matrix B and a subblock of A. Generally,
+*          they are set to
+*             TOLA = MAX(M,N)*norm(A)*MACHEPS,
+*             TOLB = MAX(P,N)*norm(B)*MACHEPS.
+*          The size of TOLA and TOLB may affect the size of backward
+*          errors of the decomposition.
+*
+*  K       (output) INTEGER
+*  L       (output) INTEGER
+*          On exit, K and L specify the dimension of the subblocks
+*          described in Purpose.
+*          K + L = effective numerical rank of (A',B')'.
+*
+*  U       (output) REAL array, dimension (LDU,M)
+*          If JOBU = 'U', U contains the orthogonal matrix U.
+*          If JOBU = 'N', U is not referenced.
+*
+*  LDU     (input) INTEGER
+*          The leading dimension of the array U. LDU >= max(1,M) if
+*          JOBU = 'U'; LDU >= 1 otherwise.
+*
+*  V       (output) REAL array, dimension (LDV,M)
+*          If JOBV = 'V', V contains the orthogonal matrix V.
+*          If JOBV = 'N', V is not referenced.
+*
+*  LDV     (input) INTEGER
+*          The leading dimension of the array V. LDV >= max(1,P) if
+*          JOBV = 'V'; LDV >= 1 otherwise.
+*
+*  Q       (output) REAL array, dimension (LDQ,N)
+*          If JOBQ = 'Q', Q contains the orthogonal matrix Q.
+*          If JOBQ = 'N', Q is not referenced.
+*
+*  LDQ     (input) INTEGER
+*          The leading dimension of the array Q. LDQ >= max(1,N) if
+*          JOBQ = 'Q'; LDQ >= 1 otherwise.
+*
+*  IWORK   (workspace) INTEGER array, dimension (N)
+*
+*  TAU     (workspace) REAL array, dimension (N)
+*
+*  WORK    (workspace) REAL array, dimension (max(3*N,M,P))
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*
+*
+*  Further Details
+*  ===============
+*
+*  The subroutine uses LAPACK subroutine SGEQPF for the QR factorization
+*  with column pivoting to detect the effective numerical rank of the
+*  a matrix. It may be replaced by a better rank determination strategy.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            FORWRD, WANTQ, WANTU, WANTV
+      INTEGER            I, J
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SGEQPF, SGEQR2, SGERQ2, SLACPY, SLAPMT, SLASET,
+     $                   SORG2R, SORM2R, SORMR2, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters
+*
+      WANTU = LSAME( JOBU, 'U' )
+      WANTV = LSAME( JOBV, 'V' )
+      WANTQ = LSAME( JOBQ, 'Q' )
+      FORWRD = .TRUE.
+*
+      INFO = 0
+      IF( .NOT.( WANTU .OR. LSAME( JOBU, 'N' ) ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( WANTV .OR. LSAME( JOBV, 'N' ) ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.( WANTQ .OR. LSAME( JOBQ, 'N' ) ) ) THEN
+         INFO = -3
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( P.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -6
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -8
+      ELSE IF( LDB.LT.MAX( 1, P ) ) THEN
+         INFO = -10
+      ELSE IF( LDU.LT.1 .OR. ( WANTU .AND. LDU.LT.M ) ) THEN
+         INFO = -16
+      ELSE IF( LDV.LT.1 .OR. ( WANTV .AND. LDV.LT.P ) ) THEN
+         INFO = -18
+      ELSE IF( LDQ.LT.1 .OR. ( WANTQ .AND. LDQ.LT.N ) ) THEN
+         INFO = -20
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SGGSVP', -INFO )
+         RETURN
+      END IF
+*
+*     QR with column pivoting of B: B*P = V*( S11 S12 )
+*                                           (  0   0  )
+*
+      DO 10 I = 1, N
+         IWORK( I ) = 0
+   10 CONTINUE
+      CALL SGEQPF( P, N, B, LDB, IWORK, TAU, WORK, INFO )
+*
+*     Update A := A*P
+*
+      CALL SLAPMT( FORWRD, M, N, A, LDA, IWORK )
+*
+*     Determine the effective rank of matrix B.
+*
+      L = 0
+      DO 20 I = 1, MIN( P, N )
+         IF( ABS( B( I, I ) ).GT.TOLB )
+     $      L = L + 1
+   20 CONTINUE
+*
+      IF( WANTV ) THEN
+*
+*        Copy the details of V, and form V.
+*
+         CALL SLASET( 'Full', P, P, ZERO, ZERO, V, LDV )
+         IF( P.GT.1 )
+     $      CALL SLACPY( 'Lower', P-1, N, B( 2, 1 ), LDB, V( 2, 1 ),
+     $                   LDV )
+         CALL SORG2R( P, P, MIN( P, N ), V, LDV, TAU, WORK, INFO )
+      END IF
+*
+*     Clean up B
+*
+      DO 40 J = 1, L - 1
+         DO 30 I = J + 1, L
+            B( I, J ) = ZERO
+   30    CONTINUE
+   40 CONTINUE
+      IF( P.GT.L )
+     $   CALL SLASET( 'Full', P-L, N, ZERO, ZERO, B( L+1, 1 ), LDB )
+*
+      IF( WANTQ ) THEN
+*
+*        Set Q = I and Update Q := Q*P
+*
+         CALL SLASET( 'Full', N, N, ZERO, ONE, Q, LDQ )
+         CALL SLAPMT( FORWRD, N, N, Q, LDQ, IWORK )
+      END IF
+*
+      IF( P.GE.L .AND. N.NE.L ) THEN
+*
+*        RQ factorization of (S11 S12): ( S11 S12 ) = ( 0 S12 )*Z
+*
+         CALL SGERQ2( L, N, B, LDB, TAU, WORK, INFO )
+*
+*        Update A := A*Z'
+*
+         CALL SORMR2( 'Right', 'Transpose', M, N, L, B, LDB, TAU, A,
+     $                LDA, WORK, INFO )
+*
+         IF( WANTQ ) THEN
+*
+*           Update Q := Q*Z'
+*
+            CALL SORMR2( 'Right', 'Transpose', N, N, L, B, LDB, TAU, Q,
+     $                   LDQ, WORK, INFO )
+         END IF
+*
+*        Clean up B
+*
+         CALL SLASET( 'Full', L, N-L, ZERO, ZERO, B, LDB )
+         DO 60 J = N - L + 1, N
+            DO 50 I = J - N + L + 1, L
+               B( I, J ) = ZERO
+   50       CONTINUE
+   60    CONTINUE
+*
+      END IF
+*
+*     Let              N-L     L
+*                A = ( A11    A12 ) M,
+*
+*     then the following does the complete QR decomposition of A11:
+*
+*              A11 = U*(  0  T12 )*P1'
+*                      (  0   0  )
+*
+      DO 70 I = 1, N - L
+         IWORK( I ) = 0
+   70 CONTINUE
+      CALL SGEQPF( M, N-L, A, LDA, IWORK, TAU, WORK, INFO )
+*
+*     Determine the effective rank of A11
+*
+      K = 0
+      DO 80 I = 1, MIN( M, N-L )
+         IF( ABS( A( I, I ) ).GT.TOLA )
+     $      K = K + 1
+   80 CONTINUE
+*
+*     Update A12 := U'*A12, where A12 = A( 1:M, N-L+1:N )
+*
+      CALL SORM2R( 'Left', 'Transpose', M, L, MIN( M, N-L ), A, LDA,
+     $             TAU, A( 1, N-L+1 ), LDA, WORK, INFO )
+*
+      IF( WANTU ) THEN
+*
+*        Copy the details of U, and form U
+*
+         CALL SLASET( 'Full', M, M, ZERO, ZERO, U, LDU )
+         IF( M.GT.1 )
+     $      CALL SLACPY( 'Lower', M-1, N-L, A( 2, 1 ), LDA, U( 2, 1 ),
+     $                   LDU )
+         CALL SORG2R( M, M, MIN( M, N-L ), U, LDU, TAU, WORK, INFO )
+      END IF
+*
+      IF( WANTQ ) THEN
+*
+*        Update Q( 1:N, 1:N-L )  = Q( 1:N, 1:N-L )*P1
+*
+         CALL SLAPMT( FORWRD, N, N-L, Q, LDQ, IWORK )
+      END IF
+*
+*     Clean up A: set the strictly lower triangular part of
+*     A(1:K, 1:K) = 0, and A( K+1:M, 1:N-L ) = 0.
+*
+      DO 100 J = 1, K - 1
+         DO 90 I = J + 1, K
+            A( I, J ) = ZERO
+   90    CONTINUE
+  100 CONTINUE
+      IF( M.GT.K )
+     $   CALL SLASET( 'Full', M-K, N-L, ZERO, ZERO, A( K+1, 1 ), LDA )
+*
+      IF( N-L.GT.K ) THEN
+*
+*        RQ factorization of ( T11 T12 ) = ( 0 T12 )*Z1
+*
+         CALL SGERQ2( K, N-L, A, LDA, TAU, WORK, INFO )
+*
+         IF( WANTQ ) THEN
+*
+*           Update Q( 1:N,1:N-L ) = Q( 1:N,1:N-L )*Z1'
+*
+            CALL SORMR2( 'Right', 'Transpose', N, N-L, K, A, LDA, TAU,
+     $                   Q, LDQ, WORK, INFO )
+         END IF
+*
+*        Clean up A
+*
+         CALL SLASET( 'Full', K, N-L-K, ZERO, ZERO, A, LDA )
+         DO 120 J = N - L - K + 1, N - L
+            DO 110 I = J - N + L + K + 1, K
+               A( I, J ) = ZERO
+  110       CONTINUE
+  120    CONTINUE
+*
+      END IF
+*
+      IF( M.GT.K ) THEN
+*
+*        QR factorization of A( K+1:M,N-L+1:N )
+*
+         CALL SGEQR2( M-K, L, A( K+1, N-L+1 ), LDA, TAU, WORK, INFO )
+*
+         IF( WANTU ) THEN
+*
+*           Update U(:,K+1:M) := U(:,K+1:M)*U1
+*
+            CALL SORM2R( 'Right', 'No transpose', M, M-K, MIN( M-K, L ),
+     $                   A( K+1, N-L+1 ), LDA, TAU, U( 1, K+1 ), LDU,
+     $                   WORK, INFO )
+         END IF
+*
+*        Clean up
+*
+         DO 140 J = N - L + 1, N
+            DO 130 I = J - N + K + L + 1, M
+               A( I, J ) = ZERO
+  130       CONTINUE
+  140    CONTINUE
+*
+      END IF
+*
+      RETURN
+*
+*     End of SGGSVP
+*
+      END
diff --git a/costa/native/external/lapack/sgtcon.f b/costa/native/external/lapack/sgtcon.f
new file mode 100644
index 000000000..c719e6998
--- /dev/null
+++ b/costa/native/external/lapack/sgtcon.f
@@ -0,0 +1,166 @@
+      SUBROUTINE SGTCON( NORM, N, DL, D, DU, DU2, IPIV, ANORM, RCOND,
+     $                   WORK, IWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994 
+*
+*     .. Scalar Arguments ..
+      CHARACTER          NORM
+      INTEGER            INFO, N
+      REAL               ANORM, RCOND
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * ), IWORK( * )
+      REAL               D( * ), DL( * ), DU( * ), DU2( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SGTCON estimates the reciprocal of the condition number of a real
+*  tridiagonal matrix A using the LU factorization as computed by
+*  SGTTRF.
+*
+*  An estimate is obtained for norm(inv(A)), and the reciprocal of the
+*  condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))).
+*
+*  Arguments
+*  =========
+*
+*  NORM    (input) CHARACTER*1
+*          Specifies whether the 1-norm condition number or the
+*          infinity-norm condition number is required:
+*          = '1' or 'O':  1-norm;
+*          = 'I':         Infinity-norm.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  DL      (input) REAL array, dimension (N-1)
+*          The (n-1) multipliers that define the matrix L from the
+*          LU factorization of A as computed by SGTTRF.
+*
+*  D       (input) REAL array, dimension (N)
+*          The n diagonal elements of the upper triangular matrix U from
+*          the LU factorization of A.
+*
+*  DU      (input) REAL array, dimension (N-1)
+*          The (n-1) elements of the first superdiagonal of U.
+*
+*  DU2     (input) REAL array, dimension (N-2)
+*          The (n-2) elements of the second superdiagonal of U.
+*
+*  IPIV    (input) INTEGER array, dimension (N)
+*          The pivot indices; for 1 <= i <= n, row i of the matrix was
+*          interchanged with row IPIV(i).  IPIV(i) will always be either
+*          i or i+1; IPIV(i) = i indicates a row interchange was not
+*          required.
+*
+*  ANORM   (input) REAL
+*          If NORM = '1' or 'O', the 1-norm of the original matrix A.
+*          If NORM = 'I', the infinity-norm of the original matrix A.
+*
+*  RCOND   (output) REAL
+*          The reciprocal of the condition number of the matrix A,
+*          computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an
+*          estimate of the 1-norm of inv(A) computed in this routine.
+*
+*  WORK    (workspace) REAL array, dimension (2*N)
+*
+*  IWORK   (workspace) INTEGER array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            ONENRM
+      INTEGER            I, KASE, KASE1
+      REAL               AINVNM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SGTTRS, SLACON, XERBLA
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments.
+*
+      INFO = 0
+      ONENRM = NORM.EQ.'1' .OR. LSAME( NORM, 'O' )
+      IF( .NOT.ONENRM .AND. .NOT.LSAME( NORM, 'I' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( ANORM.LT.ZERO ) THEN
+         INFO = -8
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SGTCON', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      RCOND = ZERO
+      IF( N.EQ.0 ) THEN
+         RCOND = ONE
+         RETURN
+      ELSE IF( ANORM.EQ.ZERO ) THEN
+         RETURN
+      END IF
+*
+*     Check that D(1:N) is non-zero.
+*
+      DO 10 I = 1, N
+         IF( D( I ).EQ.ZERO )
+     $      RETURN
+   10 CONTINUE
+*
+      AINVNM = ZERO
+      IF( ONENRM ) THEN
+         KASE1 = 1
+      ELSE
+         KASE1 = 2
+      END IF
+      KASE = 0
+   20 CONTINUE
+      CALL SLACON( N, WORK( N+1 ), WORK, IWORK, AINVNM, KASE )
+      IF( KASE.NE.0 ) THEN
+         IF( KASE.EQ.KASE1 ) THEN
+*
+*           Multiply by inv(U)*inv(L).
+*
+            CALL SGTTRS( 'No transpose', N, 1, DL, D, DU, DU2, IPIV,
+     $                   WORK, N, INFO )
+         ELSE
+*
+*           Multiply by inv(L')*inv(U').
+*
+            CALL SGTTRS( 'Transpose', N, 1, DL, D, DU, DU2, IPIV, WORK,
+     $                   N, INFO )
+         END IF
+         GO TO 20
+      END IF
+*
+*     Compute the estimate of the reciprocal condition number.
+*
+      IF( AINVNM.NE.ZERO )
+     $   RCOND = ( ONE / AINVNM ) / ANORM
+*
+      RETURN
+*
+*     End of SGTCON
+*
+      END
diff --git a/costa/native/external/lapack/sgtrfs.f b/costa/native/external/lapack/sgtrfs.f
new file mode 100644
index 000000000..531cf7dea
--- /dev/null
+++ b/costa/native/external/lapack/sgtrfs.f
@@ -0,0 +1,357 @@
+      SUBROUTINE SGTRFS( TRANS, N, NRHS, DL, D, DU, DLF, DF, DUF, DU2,
+     $                   IPIV, B, LDB, X, LDX, FERR, BERR, WORK, IWORK,
+     $                   INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          TRANS
+      INTEGER            INFO, LDB, LDX, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * ), IWORK( * )
+      REAL               B( LDB, * ), BERR( * ), D( * ), DF( * ),
+     $                   DL( * ), DLF( * ), DU( * ), DU2( * ), DUF( * ),
+     $                   FERR( * ), WORK( * ), X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SGTRFS improves the computed solution to a system of linear
+*  equations when the coefficient matrix is tridiagonal, and provides
+*  error bounds and backward error estimates for the solution.
+*
+*  Arguments
+*  =========
+*
+*  TRANS   (input) CHARACTER*1
+*          Specifies the form of the system of equations:
+*          = 'N':  A * X = B     (No transpose)
+*          = 'T':  A**T * X = B  (Transpose)
+*          = 'C':  A**H * X = B  (Conjugate transpose = Transpose)
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  DL      (input) REAL array, dimension (N-1)
+*          The (n-1) subdiagonal elements of A.
+*
+*  D       (input) REAL array, dimension (N)
+*          The diagonal elements of A.
+*
+*  DU      (input) REAL array, dimension (N-1)
+*          The (n-1) superdiagonal elements of A.
+*
+*  DLF     (input) REAL array, dimension (N-1)
+*          The (n-1) multipliers that define the matrix L from the
+*          LU factorization of A as computed by SGTTRF.
+*
+*  DF      (input) REAL array, dimension (N)
+*          The n diagonal elements of the upper triangular matrix U from
+*          the LU factorization of A.
+*
+*  DUF     (input) REAL array, dimension (N-1)
+*          The (n-1) elements of the first superdiagonal of U.
+*
+*  DU2     (input) REAL array, dimension (N-2)
+*          The (n-2) elements of the second superdiagonal of U.
+*
+*  IPIV    (input) INTEGER array, dimension (N)
+*          The pivot indices; for 1 <= i <= n, row i of the matrix was
+*          interchanged with row IPIV(i).  IPIV(i) will always be either
+*          i or i+1; IPIV(i) = i indicates a row interchange was not
+*          required.
+*
+*  B       (input) REAL array, dimension (LDB,NRHS)
+*          The right hand side matrix B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  X       (input/output) REAL array, dimension (LDX,NRHS)
+*          On entry, the solution matrix X, as computed by SGTTRS.
+*          On exit, the improved solution matrix X.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(1,N).
+*
+*  FERR    (output) REAL array, dimension (NRHS)
+*          The estimated forward error bound for each solution vector
+*          X(j) (the j-th column of the solution matrix X).
+*          If XTRUE is the true solution corresponding to X(j), FERR(j)
+*          is an estimated upper bound for the magnitude of the largest
+*          element in (X(j) - XTRUE) divided by the magnitude of the
+*          largest element in X(j).  The estimate is as reliable as
+*          the estimate for RCOND, and is almost always a slight
+*          overestimate of the true error.
+*
+*  BERR    (output) REAL array, dimension (NRHS)
+*          The componentwise relative backward error of each solution
+*          vector X(j) (i.e., the smallest relative change in
+*          any element of A or B that makes X(j) an exact solution).
+*
+*  WORK    (workspace) REAL array, dimension (3*N)
+*
+*  IWORK   (workspace) INTEGER array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Internal Parameters
+*  ===================
+*
+*  ITMAX is the maximum number of steps of iterative refinement.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            ITMAX
+      PARAMETER          ( ITMAX = 5 )
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+      REAL               TWO
+      PARAMETER          ( TWO = 2.0E+0 )
+      REAL               THREE
+      PARAMETER          ( THREE = 3.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOTRAN
+      CHARACTER          TRANSN, TRANST
+      INTEGER            COUNT, I, J, KASE, NZ
+      REAL               EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SAXPY, SCOPY, SGTTRS, SLACON, SLAGTM, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               SLAMCH
+      EXTERNAL           LSAME, SLAMCH
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      NOTRAN = LSAME( TRANS, 'N' )
+      IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT.
+     $    LSAME( TRANS, 'C' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -13
+      ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+         INFO = -15
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SGTRFS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN
+         DO 10 J = 1, NRHS
+            FERR( J ) = ZERO
+            BERR( J ) = ZERO
+   10    CONTINUE
+         RETURN
+      END IF
+*
+      IF( NOTRAN ) THEN
+         TRANSN = 'N'
+         TRANST = 'T'
+      ELSE
+         TRANSN = 'T'
+         TRANST = 'N'
+      END IF
+*
+*     NZ = maximum number of nonzero elements in each row of A, plus 1
+*
+      NZ = 4
+      EPS = SLAMCH( 'Epsilon' )
+      SAFMIN = SLAMCH( 'Safe minimum' )
+      SAFE1 = NZ*SAFMIN
+      SAFE2 = SAFE1 / EPS
+*
+*     Do for each right hand side
+*
+      DO 110 J = 1, NRHS
+*
+         COUNT = 1
+         LSTRES = THREE
+   20    CONTINUE
+*
+*        Loop until stopping criterion is satisfied.
+*
+*        Compute residual R = B - op(A) * X,
+*        where op(A) = A, A**T, or A**H, depending on TRANS.
+*
+         CALL SCOPY( N, B( 1, J ), 1, WORK( N+1 ), 1 )
+         CALL SLAGTM( TRANS, N, 1, -ONE, DL, D, DU, X( 1, J ), LDX, ONE,
+     $                WORK( N+1 ), N )
+*
+*        Compute abs(op(A))*abs(x) + abs(b) for use in the backward
+*        error bound.
+*
+         IF( NOTRAN ) THEN
+            IF( N.EQ.1 ) THEN
+               WORK( 1 ) = ABS( B( 1, J ) ) + ABS( D( 1 )*X( 1, J ) )
+            ELSE
+               WORK( 1 ) = ABS( B( 1, J ) ) + ABS( D( 1 )*X( 1, J ) ) +
+     $                     ABS( DU( 1 )*X( 2, J ) )
+               DO 30 I = 2, N - 1
+                  WORK( I ) = ABS( B( I, J ) ) +
+     $                        ABS( DL( I-1 )*X( I-1, J ) ) +
+     $                        ABS( D( I )*X( I, J ) ) +
+     $                        ABS( DU( I )*X( I+1, J ) )
+   30          CONTINUE
+               WORK( N ) = ABS( B( N, J ) ) +
+     $                     ABS( DL( N-1 )*X( N-1, J ) ) +
+     $                     ABS( D( N )*X( N, J ) )
+            END IF
+         ELSE
+            IF( N.EQ.1 ) THEN
+               WORK( 1 ) = ABS( B( 1, J ) ) + ABS( D( 1 )*X( 1, J ) )
+            ELSE
+               WORK( 1 ) = ABS( B( 1, J ) ) + ABS( D( 1 )*X( 1, J ) ) +
+     $                     ABS( DL( 1 )*X( 2, J ) )
+               DO 40 I = 2, N - 1
+                  WORK( I ) = ABS( B( I, J ) ) +
+     $                        ABS( DU( I-1 )*X( I-1, J ) ) +
+     $                        ABS( D( I )*X( I, J ) ) +
+     $                        ABS( DL( I )*X( I+1, J ) )
+   40          CONTINUE
+               WORK( N ) = ABS( B( N, J ) ) +
+     $                     ABS( DU( N-1 )*X( N-1, J ) ) +
+     $                     ABS( D( N )*X( N, J ) )
+            END IF
+         END IF
+*
+*        Compute componentwise relative backward error from formula
+*
+*        max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) )
+*
+*        where abs(Z) is the componentwise absolute value of the matrix
+*        or vector Z.  If the i-th component of the denominator is less
+*        than SAFE2, then SAFE1 is added to the i-th components of the
+*        numerator and denominator before dividing.
+*
+         S = ZERO
+         DO 50 I = 1, N
+            IF( WORK( I ).GT.SAFE2 ) THEN
+               S = MAX( S, ABS( WORK( N+I ) ) / WORK( I ) )
+            ELSE
+               S = MAX( S, ( ABS( WORK( N+I ) )+SAFE1 ) /
+     $             ( WORK( I )+SAFE1 ) )
+            END IF
+   50    CONTINUE
+         BERR( J ) = S
+*
+*        Test stopping criterion. Continue iterating if
+*           1) The residual BERR(J) is larger than machine epsilon, and
+*           2) BERR(J) decreased by at least a factor of 2 during the
+*              last iteration, and
+*           3) At most ITMAX iterations tried.
+*
+         IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND.
+     $       COUNT.LE.ITMAX ) THEN
+*
+*           Update solution and try again.
+*
+            CALL SGTTRS( TRANS, N, 1, DLF, DF, DUF, DU2, IPIV,
+     $                   WORK( N+1 ), N, INFO )
+            CALL SAXPY( N, ONE, WORK( N+1 ), 1, X( 1, J ), 1 )
+            LSTRES = BERR( J )
+            COUNT = COUNT + 1
+            GO TO 20
+         END IF
+*
+*        Bound error from formula
+*
+*        norm(X - XTRUE) / norm(X) .le. FERR =
+*        norm( abs(inv(op(A)))*
+*           ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X)
+*
+*        where
+*          norm(Z) is the magnitude of the largest component of Z
+*          inv(op(A)) is the inverse of op(A)
+*          abs(Z) is the componentwise absolute value of the matrix or
+*             vector Z
+*          NZ is the maximum number of nonzeros in any row of A, plus 1
+*          EPS is machine epsilon
+*
+*        The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B))
+*        is incremented by SAFE1 if the i-th component of
+*        abs(op(A))*abs(X) + abs(B) is less than SAFE2.
+*
+*        Use SLACON to estimate the infinity-norm of the matrix
+*           inv(op(A)) * diag(W),
+*        where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) )))
+*
+         DO 60 I = 1, N
+            IF( WORK( I ).GT.SAFE2 ) THEN
+               WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I )
+            ELSE
+               WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + SAFE1
+            END IF
+   60    CONTINUE
+*
+         KASE = 0
+   70    CONTINUE
+         CALL SLACON( N, WORK( 2*N+1 ), WORK( N+1 ), IWORK, FERR( J ),
+     $                KASE )
+         IF( KASE.NE.0 ) THEN
+            IF( KASE.EQ.1 ) THEN
+*
+*              Multiply by diag(W)*inv(op(A)**T).
+*
+               CALL SGTTRS( TRANST, N, 1, DLF, DF, DUF, DU2, IPIV,
+     $                      WORK( N+1 ), N, INFO )
+               DO 80 I = 1, N
+                  WORK( N+I ) = WORK( I )*WORK( N+I )
+   80          CONTINUE
+            ELSE
+*
+*              Multiply by inv(op(A))*diag(W).
+*
+               DO 90 I = 1, N
+                  WORK( N+I ) = WORK( I )*WORK( N+I )
+   90          CONTINUE
+               CALL SGTTRS( TRANSN, N, 1, DLF, DF, DUF, DU2, IPIV,
+     $                      WORK( N+1 ), N, INFO )
+            END IF
+            GO TO 70
+         END IF
+*
+*        Normalize error.
+*
+         LSTRES = ZERO
+         DO 100 I = 1, N
+            LSTRES = MAX( LSTRES, ABS( X( I, J ) ) )
+  100    CONTINUE
+         IF( LSTRES.NE.ZERO )
+     $      FERR( J ) = FERR( J ) / LSTRES
+*
+  110 CONTINUE
+*
+      RETURN
+*
+*     End of SGTRFS
+*
+      END
diff --git a/costa/native/external/lapack/sgtsv.f b/costa/native/external/lapack/sgtsv.f
new file mode 100644
index 000000000..7422e47d5
--- /dev/null
+++ b/costa/native/external/lapack/sgtsv.f
@@ -0,0 +1,263 @@
+      SUBROUTINE SGTSV( N, NRHS, DL, D, DU, B, LDB, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDB, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      REAL               B( LDB, * ), D( * ), DL( * ), DU( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SGTSV  solves the equation
+*
+*     A*X = B,
+*
+*  where A is an n by n tridiagonal matrix, by Gaussian elimination with
+*  partial pivoting.
+*
+*  Note that the equation  A'*X = B  may be solved by interchanging the
+*  order of the arguments DU and DL.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  DL      (input/output) REAL array, dimension (N-1)
+*          On entry, DL must contain the (n-1) sub-diagonal elements of
+*          A.
+*
+*          On exit, DL is overwritten by the (n-2) elements of the
+*          second super-diagonal of the upper triangular matrix U from
+*          the LU factorization of A, in DL(1), ..., DL(n-2).
+*
+*  D       (input/output) REAL array, dimension (N)
+*          On entry, D must contain the diagonal elements of A.
+*
+*          On exit, D is overwritten by the n diagonal elements of U.
+*
+*  DU      (input/output) REAL array, dimension (N-1)
+*          On entry, DU must contain the (n-1) super-diagonal elements
+*          of A.
+*
+*          On exit, DU is overwritten by the (n-1) elements of the first
+*          super-diagonal of U.
+*
+*  B       (input/output) REAL array, dimension (LDB,NRHS)
+*          On entry, the N by NRHS matrix of right hand side matrix B.
+*          On exit, if INFO = 0, the N by NRHS solution matrix X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*          > 0: if INFO = i, U(i,i) is exactly zero, and the solution
+*               has not been computed.  The factorization has not been
+*               completed unless i = N.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO
+      PARAMETER          ( ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J
+      REAL               FACT, TEMP
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+      IF( N.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SGTSV ', -INFO )
+         RETURN
+      END IF
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      IF( NRHS.EQ.1 ) THEN
+         DO 10 I = 1, N - 2
+            IF( ABS( D( I ) ).GE.ABS( DL( I ) ) ) THEN
+*
+*              No row interchange required
+*
+               IF( D( I ).NE.ZERO ) THEN
+                  FACT = DL( I ) / D( I )
+                  D( I+1 ) = D( I+1 ) - FACT*DU( I )
+                  B( I+1, 1 ) = B( I+1, 1 ) - FACT*B( I, 1 )
+               ELSE
+                  INFO = I
+                  RETURN
+               END IF
+               DL( I ) = ZERO
+            ELSE
+*
+*              Interchange rows I and I+1
+*
+               FACT = D( I ) / DL( I )
+               D( I ) = DL( I )
+               TEMP = D( I+1 )
+               D( I+1 ) = DU( I ) - FACT*TEMP
+               DL( I ) = DU( I+1 )
+               DU( I+1 ) = -FACT*DL( I )
+               DU( I ) = TEMP
+               TEMP = B( I, 1 )
+               B( I, 1 ) = B( I+1, 1 )
+               B( I+1, 1 ) = TEMP - FACT*B( I+1, 1 )
+            END IF
+   10    CONTINUE
+         IF( N.GT.1 ) THEN
+            I = N - 1
+            IF( ABS( D( I ) ).GE.ABS( DL( I ) ) ) THEN
+               IF( D( I ).NE.ZERO ) THEN
+                  FACT = DL( I ) / D( I )
+                  D( I+1 ) = D( I+1 ) - FACT*DU( I )
+                  B( I+1, 1 ) = B( I+1, 1 ) - FACT*B( I, 1 )
+               ELSE
+                  INFO = I
+                  RETURN
+               END IF
+            ELSE
+               FACT = D( I ) / DL( I )
+               D( I ) = DL( I )
+               TEMP = D( I+1 )
+               D( I+1 ) = DU( I ) - FACT*TEMP
+               DU( I ) = TEMP
+               TEMP = B( I, 1 )
+               B( I, 1 ) = B( I+1, 1 )
+               B( I+1, 1 ) = TEMP - FACT*B( I+1, 1 )
+            END IF
+         END IF
+         IF( D( N ).EQ.ZERO ) THEN
+            INFO = N
+            RETURN
+         END IF
+      ELSE
+         DO 40 I = 1, N - 2
+            IF( ABS( D( I ) ).GE.ABS( DL( I ) ) ) THEN
+*
+*              No row interchange required
+*
+               IF( D( I ).NE.ZERO ) THEN
+                  FACT = DL( I ) / D( I )
+                  D( I+1 ) = D( I+1 ) - FACT*DU( I )
+                  DO 20 J = 1, NRHS
+                     B( I+1, J ) = B( I+1, J ) - FACT*B( I, J )
+   20             CONTINUE
+               ELSE
+                  INFO = I
+                  RETURN
+               END IF
+               DL( I ) = ZERO
+            ELSE
+*
+*              Interchange rows I and I+1
+*
+               FACT = D( I ) / DL( I )
+               D( I ) = DL( I )
+               TEMP = D( I+1 )
+               D( I+1 ) = DU( I ) - FACT*TEMP
+               DL( I ) = DU( I+1 )
+               DU( I+1 ) = -FACT*DL( I )
+               DU( I ) = TEMP
+               DO 30 J = 1, NRHS
+                  TEMP = B( I, J )
+                  B( I, J ) = B( I+1, J )
+                  B( I+1, J ) = TEMP - FACT*B( I+1, J )
+   30          CONTINUE
+            END IF
+   40    CONTINUE
+         IF( N.GT.1 ) THEN
+            I = N - 1
+            IF( ABS( D( I ) ).GE.ABS( DL( I ) ) ) THEN
+               IF( D( I ).NE.ZERO ) THEN
+                  FACT = DL( I ) / D( I )
+                  D( I+1 ) = D( I+1 ) - FACT*DU( I )
+                  DO 50 J = 1, NRHS
+                     B( I+1, J ) = B( I+1, J ) - FACT*B( I, J )
+   50             CONTINUE
+               ELSE
+                  INFO = I
+                  RETURN
+               END IF
+            ELSE
+               FACT = D( I ) / DL( I )
+               D( I ) = DL( I )
+               TEMP = D( I+1 )
+               D( I+1 ) = DU( I ) - FACT*TEMP
+               DU( I ) = TEMP
+               DO 60 J = 1, NRHS
+                  TEMP = B( I, J )
+                  B( I, J ) = B( I+1, J )
+                  B( I+1, J ) = TEMP - FACT*B( I+1, J )
+   60          CONTINUE
+            END IF
+         END IF
+         IF( D( N ).EQ.ZERO ) THEN
+            INFO = N
+            RETURN
+         END IF
+      END IF
+*
+*     Back solve with the matrix U from the factorization.
+*
+      IF( NRHS.LE.2 ) THEN
+         J = 1
+   70    CONTINUE
+         B( N, J ) = B( N, J ) / D( N )
+         IF( N.GT.1 )
+     $      B( N-1, J ) = ( B( N-1, J )-DU( N-1 )*B( N, J ) ) / D( N-1 )
+         DO 80 I = N - 2, 1, -1
+            B( I, J ) = ( B( I, J )-DU( I )*B( I+1, J )-DL( I )*
+     $                  B( I+2, J ) ) / D( I )
+   80    CONTINUE
+         IF( J.LT.NRHS ) THEN
+            J = J + 1
+            GO TO 70
+         END IF
+      ELSE
+         DO 100 J = 1, NRHS
+            B( N, J ) = B( N, J ) / D( N )
+            IF( N.GT.1 )
+     $         B( N-1, J ) = ( B( N-1, J )-DU( N-1 )*B( N, J ) ) /
+     $                       D( N-1 )
+            DO 90 I = N - 2, 1, -1
+               B( I, J ) = ( B( I, J )-DU( I )*B( I+1, J )-DL( I )*
+     $                     B( I+2, J ) ) / D( I )
+   90       CONTINUE
+  100    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of SGTSV
+*
+      END
diff --git a/costa/native/external/lapack/sgtsvx.f b/costa/native/external/lapack/sgtsvx.f
new file mode 100644
index 000000000..2188bd6a2
--- /dev/null
+++ b/costa/native/external/lapack/sgtsvx.f
@@ -0,0 +1,293 @@
+      SUBROUTINE SGTSVX( FACT, TRANS, N, NRHS, DL, D, DU, DLF, DF, DUF,
+     $                   DU2, IPIV, B, LDB, X, LDX, RCOND, FERR, BERR,
+     $                   WORK, IWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          FACT, TRANS
+      INTEGER            INFO, LDB, LDX, N, NRHS
+      REAL               RCOND
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * ), IWORK( * )
+      REAL               B( LDB, * ), BERR( * ), D( * ), DF( * ),
+     $                   DL( * ), DLF( * ), DU( * ), DU2( * ), DUF( * ),
+     $                   FERR( * ), WORK( * ), X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SGTSVX uses the LU factorization to compute the solution to a real
+*  system of linear equations A * X = B or A**T * X = B,
+*  where A is a tridiagonal matrix of order N and X and B are N-by-NRHS
+*  matrices.
+*
+*  Error bounds on the solution and a condition estimate are also
+*  provided.
+*
+*  Description
+*  ===========
+*
+*  The following steps are performed:
+*
+*  1. If FACT = 'N', the LU decomposition is used to factor the matrix A
+*     as A = L * U, where L is a product of permutation and unit lower
+*     bidiagonal matrices and U is upper triangular with nonzeros in
+*     only the main diagonal and first two superdiagonals.
+*
+*  2. If some U(i,i)=0, so that U is exactly singular, then the routine
+*     returns with INFO = i. Otherwise, the factored form of A is used
+*     to estimate the condition number of the matrix A.  If the
+*     reciprocal of the condition number is less than machine precision,
+*     INFO = N+1 is returned as a warning, but the routine still goes on
+*     to solve for X and compute error bounds as described below.
+*
+*  3. The system of equations is solved for X using the factored form
+*     of A.
+*
+*  4. Iterative refinement is applied to improve the computed solution
+*     matrix and calculate error bounds and backward error estimates
+*     for it.
+*
+*  Arguments
+*  =========
+*
+*  FACT    (input) CHARACTER*1
+*          Specifies whether or not the factored form of A has been
+*          supplied on entry.
+*          = 'F':  DLF, DF, DUF, DU2, and IPIV contain the factored
+*                  form of A; DL, D, DU, DLF, DF, DUF, DU2 and IPIV
+*                  will not be modified.
+*          = 'N':  The matrix will be copied to DLF, DF, and DUF
+*                  and factored.
+*
+*  TRANS   (input) CHARACTER*1
+*          Specifies the form of the system of equations:
+*          = 'N':  A * X = B     (No transpose)
+*          = 'T':  A**T * X = B  (Transpose)
+*          = 'C':  A**H * X = B  (Conjugate transpose = Transpose)
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  DL      (input) REAL array, dimension (N-1)
+*          The (n-1) subdiagonal elements of A.
+*
+*  D       (input) REAL array, dimension (N)
+*          The n diagonal elements of A.
+*
+*  DU      (input) REAL array, dimension (N-1)
+*          The (n-1) superdiagonal elements of A.
+*
+*  DLF     (input or output) REAL array, dimension (N-1)
+*          If FACT = 'F', then DLF is an input argument and on entry
+*          contains the (n-1) multipliers that define the matrix L from
+*          the LU factorization of A as computed by SGTTRF.
+*
+*          If FACT = 'N', then DLF is an output argument and on exit
+*          contains the (n-1) multipliers that define the matrix L from
+*          the LU factorization of A.
+*
+*  DF      (input or output) REAL array, dimension (N)
+*          If FACT = 'F', then DF is an input argument and on entry
+*          contains the n diagonal elements of the upper triangular
+*          matrix U from the LU factorization of A.
+*
+*          If FACT = 'N', then DF is an output argument and on exit
+*          contains the n diagonal elements of the upper triangular
+*          matrix U from the LU factorization of A.
+*
+*  DUF     (input or output) REAL array, dimension (N-1)
+*          If FACT = 'F', then DUF is an input argument and on entry
+*          contains the (n-1) elements of the first superdiagonal of U.
+*
+*          If FACT = 'N', then DUF is an output argument and on exit
+*          contains the (n-1) elements of the first superdiagonal of U.
+*
+*  DU2     (input or output) REAL array, dimension (N-2)
+*          If FACT = 'F', then DU2 is an input argument and on entry
+*          contains the (n-2) elements of the second superdiagonal of
+*          U.
+*
+*          If FACT = 'N', then DU2 is an output argument and on exit
+*          contains the (n-2) elements of the second superdiagonal of
+*          U.
+*
+*  IPIV    (input or output) INTEGER array, dimension (N)
+*          If FACT = 'F', then IPIV is an input argument and on entry
+*          contains the pivot indices from the LU factorization of A as
+*          computed by SGTTRF.
+*
+*          If FACT = 'N', then IPIV is an output argument and on exit
+*          contains the pivot indices from the LU factorization of A;
+*          row i of the matrix was interchanged with row IPIV(i).
+*          IPIV(i) will always be either i or i+1; IPIV(i) = i indicates
+*          a row interchange was not required.
+*
+*  B       (input) REAL array, dimension (LDB,NRHS)
+*          The N-by-NRHS right hand side matrix B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  X       (output) REAL array, dimension (LDX,NRHS)
+*          If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(1,N).
+*
+*  RCOND   (output) REAL
+*          The estimate of the reciprocal condition number of the matrix
+*          A.  If RCOND is less than the machine precision (in
+*          particular, if RCOND = 0), the matrix is singular to working
+*          precision.  This condition is indicated by a return code of
+*          INFO > 0.
+*
+*  FERR    (output) REAL array, dimension (NRHS)
+*          The estimated forward error bound for each solution vector
+*          X(j) (the j-th column of the solution matrix X).
+*          If XTRUE is the true solution corresponding to X(j), FERR(j)
+*          is an estimated upper bound for the magnitude of the largest
+*          element in (X(j) - XTRUE) divided by the magnitude of the
+*          largest element in X(j).  The estimate is as reliable as
+*          the estimate for RCOND, and is almost always a slight
+*          overestimate of the true error.
+*
+*  BERR    (output) REAL array, dimension (NRHS)
+*          The componentwise relative backward error of each solution
+*          vector X(j) (i.e., the smallest relative change in
+*          any element of A or B that makes X(j) an exact solution).
+*
+*  WORK    (workspace) REAL array, dimension (3*N)
+*
+*  IWORK   (workspace) INTEGER array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, and i is
+*                <= N:  U(i,i) is exactly zero.  The factorization
+*                       has not been completed unless i = N, but the
+*                       factor U is exactly singular, so the solution
+*                       and error bounds could not be computed.
+*                       RCOND = 0 is returned.
+*                = N+1: U is nonsingular, but RCOND is less than machine
+*                       precision, meaning that the matrix is singular
+*                       to working precision.  Nevertheless, the
+*                       solution and error bounds are computed because
+*                       there are a number of situations where the
+*                       computed solution can be more accurate than the
+*                       value of RCOND would suggest.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO
+      PARAMETER          ( ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOFACT, NOTRAN
+      CHARACTER          NORM
+      REAL               ANORM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               SLAMCH, SLANGT
+      EXTERNAL           LSAME, SLAMCH, SLANGT
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SCOPY, SGTCON, SGTRFS, SGTTRF, SGTTRS, SLACPY,
+     $                   XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+      NOFACT = LSAME( FACT, 'N' )
+      NOTRAN = LSAME( TRANS, 'N' )
+      IF( .NOT.NOFACT .AND. .NOT.LSAME( FACT, 'F' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT.
+     $         LSAME( TRANS, 'C' ) ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -14
+      ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+         INFO = -16
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SGTSVX', -INFO )
+         RETURN
+      END IF
+*
+      IF( NOFACT ) THEN
+*
+*        Compute the LU factorization of A.
+*
+         CALL SCOPY( N, D, 1, DF, 1 )
+         IF( N.GT.1 ) THEN
+            CALL SCOPY( N-1, DL, 1, DLF, 1 )
+            CALL SCOPY( N-1, DU, 1, DUF, 1 )
+         END IF
+         CALL SGTTRF( N, DLF, DF, DUF, DU2, IPIV, INFO )
+*
+*        Return if INFO is non-zero.
+*
+         IF( INFO.NE.0 ) THEN
+            IF( INFO.GT.0 )
+     $         RCOND = ZERO
+            RETURN
+         END IF
+      END IF
+*
+*     Compute the norm of the matrix A.
+*
+      IF( NOTRAN ) THEN
+         NORM = '1'
+      ELSE
+         NORM = 'I'
+      END IF
+      ANORM = SLANGT( NORM, N, DL, D, DU )
+*
+*     Compute the reciprocal of the condition number of A.
+*
+      CALL SGTCON( NORM, N, DLF, DF, DUF, DU2, IPIV, ANORM, RCOND, WORK,
+     $             IWORK, INFO )
+*
+*     Set INFO = N+1 if the matrix is singular to working precision.
+*
+      IF( RCOND.LT.SLAMCH( 'Epsilon' ) )
+     $   INFO = N + 1
+*
+*     Compute the solution vectors X.
+*
+      CALL SLACPY( 'Full', N, NRHS, B, LDB, X, LDX )
+      CALL SGTTRS( TRANS, N, NRHS, DLF, DF, DUF, DU2, IPIV, X, LDX,
+     $             INFO )
+*
+*     Use iterative refinement to improve the computed solutions and
+*     compute error bounds and backward error estimates for them.
+*
+      CALL SGTRFS( TRANS, N, NRHS, DL, D, DU, DLF, DF, DUF, DU2, IPIV,
+     $             B, LDB, X, LDX, FERR, BERR, WORK, IWORK, INFO )
+*
+      RETURN
+*
+*     End of SGTSVX
+*
+      END
diff --git a/costa/native/external/lapack/sgttrf.f b/costa/native/external/lapack/sgttrf.f
new file mode 100644
index 000000000..c7732ba80
--- /dev/null
+++ b/costa/native/external/lapack/sgttrf.f
@@ -0,0 +1,169 @@
+      SUBROUTINE SGTTRF( N, DL, D, DU, DU2, IPIV, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      REAL               D( * ), DL( * ), DU( * ), DU2( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SGTTRF computes an LU factorization of a real tridiagonal matrix A
+*  using elimination with partial pivoting and row interchanges.
+*
+*  The factorization has the form
+*     A = L * U
+*  where L is a product of permutation and unit lower bidiagonal
+*  matrices and U is upper triangular with nonzeros in only the main
+*  diagonal and first two superdiagonals.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.
+*
+*  DL      (input/output) REAL array, dimension (N-1)
+*          On entry, DL must contain the (n-1) sub-diagonal elements of
+*          A.
+*
+*          On exit, DL is overwritten by the (n-1) multipliers that
+*          define the matrix L from the LU factorization of A.
+*
+*  D       (input/output) REAL array, dimension (N)
+*          On entry, D must contain the diagonal elements of A.
+*
+*          On exit, D is overwritten by the n diagonal elements of the
+*          upper triangular matrix U from the LU factorization of A.
+*
+*  DU      (input/output) REAL array, dimension (N-1)
+*          On entry, DU must contain the (n-1) super-diagonal elements
+*          of A.
+*
+*          On exit, DU is overwritten by the (n-1) elements of the first
+*          super-diagonal of U.
+*
+*  DU2     (output) REAL array, dimension (N-2)
+*          On exit, DU2 is overwritten by the (n-2) elements of the
+*          second super-diagonal of U.
+*
+*  IPIV    (output) INTEGER array, dimension (N)
+*          The pivot indices; for 1 <= i <= n, row i of the matrix was
+*          interchanged with row IPIV(i).  IPIV(i) will always be either
+*          i or i+1; IPIV(i) = i indicates a row interchange was not
+*          required.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -k, the k-th argument had an illegal value
+*          > 0:  if INFO = k, U(k,k) is exactly zero. The factorization
+*                has been completed, but the factor U is exactly
+*                singular, and division by zero will occur if it is used
+*                to solve a system of equations.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO
+      PARAMETER          ( ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I
+      REAL               FACT, TEMP
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+      IF( N.LT.0 ) THEN
+         INFO = -1
+         CALL XERBLA( 'SGTTRF', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Initialize IPIV(i) = i and DU2(I) = 0
+*
+      DO 10 I = 1, N
+         IPIV( I ) = I
+   10 CONTINUE
+      DO 20 I = 1, N - 2
+         DU2( I ) = ZERO
+   20 CONTINUE
+*
+      DO 30 I = 1, N - 2
+         IF( ABS( D( I ) ).GE.ABS( DL( I ) ) ) THEN
+*
+*           No row interchange required, eliminate DL(I)
+*
+            IF( D( I ).NE.ZERO ) THEN
+               FACT = DL( I ) / D( I )
+               DL( I ) = FACT
+               D( I+1 ) = D( I+1 ) - FACT*DU( I )
+            END IF
+         ELSE
+*
+*           Interchange rows I and I+1, eliminate DL(I)
+*
+            FACT = D( I ) / DL( I )
+            D( I ) = DL( I )
+            DL( I ) = FACT
+            TEMP = DU( I )
+            DU( I ) = D( I+1 )
+            D( I+1 ) = TEMP - FACT*D( I+1 )
+            DU2( I ) = DU( I+1 )
+            DU( I+1 ) = -FACT*DU( I+1 )
+            IPIV( I ) = I + 1
+         END IF
+   30 CONTINUE
+      IF( N.GT.1 ) THEN
+         I = N - 1
+         IF( ABS( D( I ) ).GE.ABS( DL( I ) ) ) THEN
+            IF( D( I ).NE.ZERO ) THEN
+               FACT = DL( I ) / D( I )
+               DL( I ) = FACT
+               D( I+1 ) = D( I+1 ) - FACT*DU( I )
+            END IF
+         ELSE
+            FACT = D( I ) / DL( I )
+            D( I ) = DL( I )
+            DL( I ) = FACT
+            TEMP = DU( I )
+            DU( I ) = D( I+1 )
+            D( I+1 ) = TEMP - FACT*D( I+1 )
+            IPIV( I ) = I + 1
+         END IF
+      END IF
+*
+*     Check for a zero on the diagonal of U.
+*
+      DO 40 I = 1, N
+         IF( D( I ).EQ.ZERO ) THEN
+            INFO = I
+            GO TO 50
+         END IF
+   40 CONTINUE
+   50 CONTINUE
+*
+      RETURN
+*
+*     End of SGTTRF
+*
+      END
diff --git a/costa/native/external/lapack/sgttrs.f b/costa/native/external/lapack/sgttrs.f
new file mode 100644
index 000000000..4d19cc570
--- /dev/null
+++ b/costa/native/external/lapack/sgttrs.f
@@ -0,0 +1,141 @@
+      SUBROUTINE SGTTRS( TRANS, N, NRHS, DL, D, DU, DU2, IPIV, B, LDB,
+     $                   INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          TRANS
+      INTEGER            INFO, LDB, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      REAL               B( LDB, * ), D( * ), DL( * ), DU( * ), DU2( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SGTTRS solves one of the systems of equations
+*     A*X = B  or  A'*X = B,
+*  with a tridiagonal matrix A using the LU factorization computed
+*  by SGTTRF.
+*
+*  Arguments
+*  =========
+*
+*  TRANS   (input) CHARACTER
+*          Specifies the form of the system of equations.
+*          = 'N':  A * X = B  (No transpose)
+*          = 'T':  A'* X = B  (Transpose)
+*          = 'C':  A'* X = B  (Conjugate transpose = Transpose)
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  DL      (input) REAL array, dimension (N-1)
+*          The (n-1) multipliers that define the matrix L from the
+*          LU factorization of A.
+*
+*  D       (input) REAL array, dimension (N)
+*          The n diagonal elements of the upper triangular matrix U from
+*          the LU factorization of A.
+*
+*  DU      (input) REAL array, dimension (N-1)
+*          The (n-1) elements of the first super-diagonal of U.
+*
+*  DU2     (input) REAL array, dimension (N-2)
+*          The (n-2) elements of the second super-diagonal of U.
+*
+*  IPIV    (input) INTEGER array, dimension (N)
+*          The pivot indices; for 1 <= i <= n, row i of the matrix was
+*          interchanged with row IPIV(i).  IPIV(i) will always be either
+*          i or i+1; IPIV(i) = i indicates a row interchange was not
+*          required.
+*
+*  B       (input/output) REAL array, dimension (LDB,NRHS)
+*          On entry, the matrix of right hand side vectors B.
+*          On exit, B is overwritten by the solution vectors X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            NOTRAN
+      INTEGER            ITRANS, J, JB, NB
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SGTTS2, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+      NOTRAN = ( TRANS.EQ.'N' .OR. TRANS.EQ.'n' )
+      IF( .NOT.NOTRAN .AND. .NOT.( TRANS.EQ.'T' .OR. TRANS.EQ.
+     $    't' ) .AND. .NOT.( TRANS.EQ.'C' .OR. TRANS.EQ.'c' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDB.LT.MAX( N, 1 ) ) THEN
+         INFO = -10
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SGTTRS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 )
+     $   RETURN
+*
+*     Decode TRANS
+*
+      IF( NOTRAN ) THEN
+         ITRANS = 0
+      ELSE
+         ITRANS = 1
+      END IF
+*
+*     Determine the number of right-hand sides to solve at a time.
+*
+      IF( NRHS.EQ.1 ) THEN
+         NB = 1
+      ELSE
+         NB = MAX( 1, ILAENV( 1, 'SGTTRS', TRANS, N, NRHS, -1, -1 ) )
+      END IF
+*
+      IF( NB.GE.NRHS ) THEN
+         CALL SGTTS2( ITRANS, N, NRHS, DL, D, DU, DU2, IPIV, B, LDB )
+      ELSE
+         DO 10 J = 1, NRHS, NB
+            JB = MIN( NRHS-J+1, NB )
+            CALL SGTTS2( ITRANS, N, JB, DL, D, DU, DU2, IPIV, B( 1, J ),
+     $                   LDB )
+   10    CONTINUE
+      END IF
+*
+*     End of SGTTRS
+*
+      END
diff --git a/costa/native/external/lapack/sgtts2.f b/costa/native/external/lapack/sgtts2.f
new file mode 100644
index 000000000..268fdb968
--- /dev/null
+++ b/costa/native/external/lapack/sgtts2.f
@@ -0,0 +1,197 @@
+      SUBROUTINE SGTTS2( ITRANS, N, NRHS, DL, D, DU, DU2, IPIV, B, LDB )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            ITRANS, LDB, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      REAL               B( LDB, * ), D( * ), DL( * ), DU( * ), DU2( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SGTTS2 solves one of the systems of equations
+*     A*X = B  or  A'*X = B,
+*  with a tridiagonal matrix A using the LU factorization computed
+*  by SGTTRF.
+*
+*  Arguments
+*  =========
+*
+*  ITRANS  (input) INTEGER
+*          Specifies the form of the system of equations.
+*          = 0:  A * X = B  (No transpose)
+*          = 1:  A'* X = B  (Transpose)
+*          = 2:  A'* X = B  (Conjugate transpose = Transpose)
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  DL      (input) REAL array, dimension (N-1)
+*          The (n-1) multipliers that define the matrix L from the
+*          LU factorization of A.
+*
+*  D       (input) REAL array, dimension (N)
+*          The n diagonal elements of the upper triangular matrix U from
+*          the LU factorization of A.
+*
+*  DU      (input) REAL array, dimension (N-1)
+*          The (n-1) elements of the first super-diagonal of U.
+*
+*  DU2     (input) REAL array, dimension (N-2)
+*          The (n-2) elements of the second super-diagonal of U.
+*
+*  IPIV    (input) INTEGER array, dimension (N)
+*          The pivot indices; for 1 <= i <= n, row i of the matrix was
+*          interchanged with row IPIV(i).  IPIV(i) will always be either
+*          i or i+1; IPIV(i) = i indicates a row interchange was not
+*          required.
+*
+*  B       (input/output) REAL array, dimension (LDB,NRHS)
+*          On entry, the matrix of right hand side vectors B.
+*          On exit, B is overwritten by the solution vectors X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      INTEGER            I, IP, J
+      REAL               TEMP
+*     ..
+*     .. Executable Statements ..
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 )
+     $   RETURN
+*
+      IF( ITRANS.EQ.0 ) THEN
+*
+*        Solve A*X = B using the LU factorization of A,
+*        overwriting each right hand side vector with its solution.
+*
+         IF( NRHS.LE.1 ) THEN
+            J = 1
+   10       CONTINUE
+*
+*           Solve L*x = b.
+*
+            DO 20 I = 1, N - 1
+               IP = IPIV( I )
+               TEMP = B( I+1-IP+I, J ) - DL( I )*B( IP, J )
+               B( I, J ) = B( IP, J )
+               B( I+1, J ) = TEMP
+   20       CONTINUE
+*
+*           Solve U*x = b.
+*
+            B( N, J ) = B( N, J ) / D( N )
+            IF( N.GT.1 )
+     $         B( N-1, J ) = ( B( N-1, J )-DU( N-1 )*B( N, J ) ) /
+     $                       D( N-1 )
+            DO 30 I = N - 2, 1, -1
+               B( I, J ) = ( B( I, J )-DU( I )*B( I+1, J )-DU2( I )*
+     $                     B( I+2, J ) ) / D( I )
+   30       CONTINUE
+            IF( J.LT.NRHS ) THEN
+               J = J + 1
+               GO TO 10
+            END IF
+         ELSE
+            DO 60 J = 1, NRHS
+*
+*              Solve L*x = b.
+*
+               DO 40 I = 1, N - 1
+                  IF( IPIV( I ).EQ.I ) THEN
+                     B( I+1, J ) = B( I+1, J ) - DL( I )*B( I, J )
+                  ELSE
+                     TEMP = B( I, J )
+                     B( I, J ) = B( I+1, J )
+                     B( I+1, J ) = TEMP - DL( I )*B( I, J )
+                  END IF
+   40          CONTINUE
+*
+*              Solve U*x = b.
+*
+               B( N, J ) = B( N, J ) / D( N )
+               IF( N.GT.1 )
+     $            B( N-1, J ) = ( B( N-1, J )-DU( N-1 )*B( N, J ) ) /
+     $                          D( N-1 )
+               DO 50 I = N - 2, 1, -1
+                  B( I, J ) = ( B( I, J )-DU( I )*B( I+1, J )-DU2( I )*
+     $                        B( I+2, J ) ) / D( I )
+   50          CONTINUE
+   60       CONTINUE
+         END IF
+      ELSE
+*
+*        Solve A' * X = B.
+*
+         IF( NRHS.LE.1 ) THEN
+*
+*           Solve U'*x = b.
+*
+            J = 1
+   70       CONTINUE
+            B( 1, J ) = B( 1, J ) / D( 1 )
+            IF( N.GT.1 )
+     $         B( 2, J ) = ( B( 2, J )-DU( 1 )*B( 1, J ) ) / D( 2 )
+            DO 80 I = 3, N
+               B( I, J ) = ( B( I, J )-DU( I-1 )*B( I-1, J )-DU2( I-2 )*
+     $                     B( I-2, J ) ) / D( I )
+   80       CONTINUE
+*
+*           Solve L'*x = b.
+*
+            DO 90 I = N - 1, 1, -1
+               IP = IPIV( I )
+               TEMP = B( I, J ) - DL( I )*B( I+1, J )
+               B( I, J ) = B( IP, J )
+               B( IP, J ) = TEMP
+   90       CONTINUE
+            IF( J.LT.NRHS ) THEN
+               J = J + 1
+               GO TO 70
+            END IF
+*
+         ELSE
+            DO 120 J = 1, NRHS
+*
+*              Solve U'*x = b.
+*
+               B( 1, J ) = B( 1, J ) / D( 1 )
+               IF( N.GT.1 )
+     $            B( 2, J ) = ( B( 2, J )-DU( 1 )*B( 1, J ) ) / D( 2 )
+               DO 100 I = 3, N
+                  B( I, J ) = ( B( I, J )-DU( I-1 )*B( I-1, J )-
+     $                        DU2( I-2 )*B( I-2, J ) ) / D( I )
+  100          CONTINUE
+               DO 110 I = N - 1, 1, -1
+                  IF( IPIV( I ).EQ.I ) THEN
+                     B( I, J ) = B( I, J ) - DL( I )*B( I+1, J )
+                  ELSE
+                     TEMP = B( I+1, J )
+                     B( I+1, J ) = B( I, J ) - DL( I )*TEMP
+                     B( I, J ) = TEMP
+                  END IF
+  110          CONTINUE
+  120       CONTINUE
+         END IF
+      END IF
+*
+*     End of SGTTS2
+*
+      END
diff --git a/costa/native/external/lapack/shgeqz.f b/costa/native/external/lapack/shgeqz.f
new file mode 100644
index 000000000..0f5a28363
--- /dev/null
+++ b/costa/native/external/lapack/shgeqz.f
@@ -0,0 +1,1243 @@
+      SUBROUTINE SHGEQZ( JOB, COMPQ, COMPZ, N, ILO, IHI, A, LDA, B, LDB,
+     $                   ALPHAR, ALPHAI, BETA, Q, LDQ, Z, LDZ, WORK,
+     $                   LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          COMPQ, COMPZ, JOB
+      INTEGER            IHI, ILO, INFO, LDA, LDB, LDQ, LDZ, LWORK, N
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), ALPHAI( * ), ALPHAR( * ),
+     $                   B( LDB, * ), BETA( * ), Q( LDQ, * ), WORK( * ),
+     $                   Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SHGEQZ implements a single-/double-shift version of the QZ method for
+*  finding the generalized eigenvalues
+*
+*  w(j)=(ALPHAR(j) + i*ALPHAI(j))/BETAR(j)   of the equation
+*
+*       det( A - w(i) B ) = 0
+*
+*  In addition, the pair A,B may be reduced to generalized Schur form:
+*  B is upper triangular, and A is block upper triangular, where the
+*  diagonal blocks are either 1-by-1 or 2-by-2, the 2-by-2 blocks having
+*  complex generalized eigenvalues (see the description of the argument
+*  JOB.)
+*
+*  If JOB='S', then the pair (A,B) is simultaneously reduced to Schur
+*  form by applying one orthogonal tranformation (usually called Q) on
+*  the left and another (usually called Z) on the right.  The 2-by-2
+*  upper-triangular diagonal blocks of B corresponding to 2-by-2 blocks
+*  of A will be reduced to positive diagonal matrices.  (I.e.,
+*  if A(j+1,j) is non-zero, then B(j+1,j)=B(j,j+1)=0 and B(j,j) and
+*  B(j+1,j+1) will be positive.)
+*
+*  If JOB='E', then at each iteration, the same transformations
+*  are computed, but they are only applied to those parts of A and B
+*  which are needed to compute ALPHAR, ALPHAI, and BETAR.
+*
+*  If JOB='S' and COMPQ and COMPZ are 'V' or 'I', then the orthogonal
+*  transformations used to reduce (A,B) are accumulated into the arrays
+*  Q and Z s.t.:
+*
+*       Q(in) A(in) Z(in)* = Q(out) A(out) Z(out)*
+*       Q(in) B(in) Z(in)* = Q(out) B(out) Z(out)*
+*
+*  Ref: C.B. Moler & G.W. Stewart, "An Algorithm for Generalized Matrix
+*       Eigenvalue Problems", SIAM J. Numer. Anal., 10(1973),
+*       pp. 241--256.
+*
+*  Arguments
+*  =========
+*
+*  JOB     (input) CHARACTER*1
+*          = 'E': compute only ALPHAR, ALPHAI, and BETA.  A and B will
+*                 not necessarily be put into generalized Schur form.
+*          = 'S': put A and B into generalized Schur form, as well
+*                 as computing ALPHAR, ALPHAI, and BETA.
+*
+*  COMPQ   (input) CHARACTER*1
+*          = 'N': do not modify Q.
+*          = 'V': multiply the array Q on the right by the transpose of
+*                 the orthogonal tranformation that is applied to the
+*                 left side of A and B to reduce them to Schur form.
+*          = 'I': like COMPQ='V', except that Q will be initialized to
+*                 the identity first.
+*
+*  COMPZ   (input) CHARACTER*1
+*          = 'N': do not modify Z.
+*          = 'V': multiply the array Z on the right by the orthogonal
+*                 tranformation that is applied to the right side of
+*                 A and B to reduce them to Schur form.
+*          = 'I': like COMPZ='V', except that Z will be initialized to
+*                 the identity first.
+*
+*  N       (input) INTEGER
+*          The order of the matrices A, B, Q, and Z.  N >= 0.
+*
+*  ILO     (input) INTEGER
+*  IHI     (input) INTEGER
+*          It is assumed that A is already upper triangular in rows and
+*          columns 1:ILO-1 and IHI+1:N.
+*          1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
+*
+*  A       (input/output) REAL array, dimension (LDA, N)
+*          On entry, the N-by-N upper Hessenberg matrix A.  Elements
+*          below the subdiagonal must be zero.
+*          If JOB='S', then on exit A and B will have been
+*             simultaneously reduced to generalized Schur form.
+*          If JOB='E', then on exit A will have been destroyed.
+*             The diagonal blocks will be correct, but the off-diagonal
+*             portion will be meaningless.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max( 1, N ).
+*
+*  B       (input/output) REAL array, dimension (LDB, N)
+*          On entry, the N-by-N upper triangular matrix B.  Elements
+*          below the diagonal must be zero.  2-by-2 blocks in B
+*          corresponding to 2-by-2 blocks in A will be reduced to
+*          positive diagonal form.  (I.e., if A(j+1,j) is non-zero,
+*          then B(j+1,j)=B(j,j+1)=0 and B(j,j) and B(j+1,j+1) will be
+*          positive.)
+*          If JOB='S', then on exit A and B will have been
+*             simultaneously reduced to Schur form.
+*          If JOB='E', then on exit B will have been destroyed.
+*             Elements corresponding to diagonal blocks of A will be
+*             correct, but the off-diagonal portion will be meaningless.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max( 1, N ).
+*
+*  ALPHAR  (output) REAL array, dimension (N)
+*          ALPHAR(1:N) will be set to real parts of the diagonal
+*          elements of A that would result from reducing A and B to
+*          Schur form and then further reducing them both to triangular
+*          form using unitary transformations s.t. the diagonal of B
+*          was non-negative real.  Thus, if A(j,j) is in a 1-by-1 block
+*          (i.e., A(j+1,j)=A(j,j+1)=0), then ALPHAR(j)=A(j,j).
+*          Note that the (real or complex) values
+*          (ALPHAR(j) + i*ALPHAI(j))/BETA(j), j=1,...,N, are the
+*          generalized eigenvalues of the matrix pencil A - wB.
+*
+*  ALPHAI  (output) REAL array, dimension (N)
+*          ALPHAI(1:N) will be set to imaginary parts of the diagonal
+*          elements of A that would result from reducing A and B to
+*          Schur form and then further reducing them both to triangular
+*          form using unitary transformations s.t. the diagonal of B
+*          was non-negative real.  Thus, if A(j,j) is in a 1-by-1 block
+*          (i.e., A(j+1,j)=A(j,j+1)=0), then ALPHAR(j)=0.
+*          Note that the (real or complex) values
+*          (ALPHAR(j) + i*ALPHAI(j))/BETA(j), j=1,...,N, are the
+*          generalized eigenvalues of the matrix pencil A - wB.
+*
+*  BETA    (output) REAL array, dimension (N)
+*          BETA(1:N) will be set to the (real) diagonal elements of B
+*          that would result from reducing A and B to Schur form and
+*          then further reducing them both to triangular form using
+*          unitary transformations s.t. the diagonal of B was
+*          non-negative real.  Thus, if A(j,j) is in a 1-by-1 block
+*          (i.e., A(j+1,j)=A(j,j+1)=0), then BETA(j)=B(j,j).
+*          Note that the (real or complex) values
+*          (ALPHAR(j) + i*ALPHAI(j))/BETA(j), j=1,...,N, are the
+*          generalized eigenvalues of the matrix pencil A - wB.
+*          (Note that BETA(1:N) will always be non-negative, and no
+*          BETAI is necessary.)
+*
+*  Q       (input/output) REAL array, dimension (LDQ, N)
+*          If COMPQ='N', then Q will not be referenced.
+*          If COMPQ='V' or 'I', then the transpose of the orthogonal
+*             transformations which are applied to A and B on the left
+*             will be applied to the array Q on the right.
+*
+*  LDQ     (input) INTEGER
+*          The leading dimension of the array Q.  LDQ >= 1.
+*          If COMPQ='V' or 'I', then LDQ >= N.
+*
+*  Z       (input/output) REAL array, dimension (LDZ, N)
+*          If COMPZ='N', then Z will not be referenced.
+*          If COMPZ='V' or 'I', then the orthogonal transformations
+*             which are applied to A and B on the right will be applied
+*             to the array Z on the right.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= 1.
+*          If COMPZ='V' or 'I', then LDZ >= N.
+*
+*  WORK    (workspace/output) REAL array, dimension (LWORK)
+*          On exit, if INFO >= 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.  LWORK >= max(1,N).
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*          = 1,...,N: the QZ iteration did not converge.  (A,B) is not
+*                     in Schur form, but ALPHAR(i), ALPHAI(i), and
+*                     BETA(i), i=INFO+1,...,N should be correct.
+*          = N+1,...,2*N: the shift calculation failed.  (A,B) is not
+*                     in Schur form, but ALPHAR(i), ALPHAI(i), and
+*                     BETA(i), i=INFO-N+1,...,N should be correct.
+*          > 2*N:     various "impossible" errors.
+*
+*  Further Details
+*  ===============
+*
+*  Iteration counters:
+*
+*  JITER  -- counts iterations.
+*  IITER  -- counts iterations run since ILAST was last
+*            changed.  This is therefore reset only when a 1-by-1 or
+*            2-by-2 block deflates off the bottom.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+*    $                     SAFETY = 1.0E+0 )
+      REAL               HALF, ZERO, ONE, SAFETY
+      PARAMETER          ( HALF = 0.5E+0, ZERO = 0.0E+0, ONE = 1.0E+0,
+     $                   SAFETY = 1.0E+2 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            ILAZR2, ILAZRO, ILPIVT, ILQ, ILSCHR, ILZ,
+     $                   LQUERY
+      INTEGER            ICOMPQ, ICOMPZ, IFIRST, IFRSTM, IITER, ILAST,
+     $                   ILASTM, IN, ISCHUR, ISTART, J, JC, JCH, JITER,
+     $                   JR, MAXIT
+      REAL               A11, A12, A1I, A1R, A21, A22, A2I, A2R, AD11,
+     $                   AD11L, AD12, AD12L, AD21, AD21L, AD22, AD22L,
+     $                   AD32L, AN, ANORM, ASCALE, ATOL, B11, B1A, B1I,
+     $                   B1R, B22, B2A, B2I, B2R, BN, BNORM, BSCALE,
+     $                   BTOL, C, C11I, C11R, C12, C21, C22I, C22R, CL,
+     $                   CQ, CR, CZ, ESHIFT, S, S1, S1INV, S2, SAFMAX,
+     $                   SAFMIN, SCALE, SL, SQI, SQR, SR, SZI, SZR, T,
+     $                   TAU, TEMP, TEMP2, TEMPI, TEMPR, U1, U12, U12L,
+     $                   U2, ULP, VS, W11, W12, W21, W22, WABS, WI, WR,
+     $                   WR2
+*     ..
+*     .. Local Arrays ..
+      REAL               V( 3 )
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               SLAMCH, SLANHS, SLAPY2, SLAPY3
+      EXTERNAL           LSAME, SLAMCH, SLANHS, SLAPY2, SLAPY3
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLAG2, SLARFG, SLARTG, SLASET, SLASV2, SROT,
+     $                   XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN, REAL, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode JOB, COMPQ, COMPZ
+*
+      IF( LSAME( JOB, 'E' ) ) THEN
+         ILSCHR = .FALSE.
+         ISCHUR = 1
+      ELSE IF( LSAME( JOB, 'S' ) ) THEN
+         ILSCHR = .TRUE.
+         ISCHUR = 2
+      ELSE
+         ISCHUR = 0
+      END IF
+*
+      IF( LSAME( COMPQ, 'N' ) ) THEN
+         ILQ = .FALSE.
+         ICOMPQ = 1
+      ELSE IF( LSAME( COMPQ, 'V' ) ) THEN
+         ILQ = .TRUE.
+         ICOMPQ = 2
+      ELSE IF( LSAME( COMPQ, 'I' ) ) THEN
+         ILQ = .TRUE.
+         ICOMPQ = 3
+      ELSE
+         ICOMPQ = 0
+      END IF
+*
+      IF( LSAME( COMPZ, 'N' ) ) THEN
+         ILZ = .FALSE.
+         ICOMPZ = 1
+      ELSE IF( LSAME( COMPZ, 'V' ) ) THEN
+         ILZ = .TRUE.
+         ICOMPZ = 2
+      ELSE IF( LSAME( COMPZ, 'I' ) ) THEN
+         ILZ = .TRUE.
+         ICOMPZ = 3
+      ELSE
+         ICOMPZ = 0
+      END IF
+*
+*     Check Argument Values
+*
+      INFO = 0
+      WORK( 1 ) = MAX( 1, N )
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( ISCHUR.EQ.0 ) THEN
+         INFO = -1
+      ELSE IF( ICOMPQ.EQ.0 ) THEN
+         INFO = -2
+      ELSE IF( ICOMPZ.EQ.0 ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( ILO.LT.1 ) THEN
+         INFO = -5
+      ELSE IF( IHI.GT.N .OR. IHI.LT.ILO-1 ) THEN
+         INFO = -6
+      ELSE IF( LDA.LT.N ) THEN
+         INFO = -8
+      ELSE IF( LDB.LT.N ) THEN
+         INFO = -10
+      ELSE IF( LDQ.LT.1 .OR. ( ILQ .AND. LDQ.LT.N ) ) THEN
+         INFO = -15
+      ELSE IF( LDZ.LT.1 .OR. ( ILZ .AND. LDZ.LT.N ) ) THEN
+         INFO = -17
+      ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN
+         INFO = -19
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SHGEQZ', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.LE.0 ) THEN
+         WORK( 1 ) = REAL( 1 )
+         RETURN
+      END IF
+*
+*     Initialize Q and Z
+*
+      IF( ICOMPQ.EQ.3 )
+     $   CALL SLASET( 'Full', N, N, ZERO, ONE, Q, LDQ )
+      IF( ICOMPZ.EQ.3 )
+     $   CALL SLASET( 'Full', N, N, ZERO, ONE, Z, LDZ )
+*
+*     Machine Constants
+*
+      IN = IHI + 1 - ILO
+      SAFMIN = SLAMCH( 'S' )
+      SAFMAX = ONE / SAFMIN
+      ULP = SLAMCH( 'E' )*SLAMCH( 'B' )
+      ANORM = SLANHS( 'F', IN, A( ILO, ILO ), LDA, WORK )
+      BNORM = SLANHS( 'F', IN, B( ILO, ILO ), LDB, WORK )
+      ATOL = MAX( SAFMIN, ULP*ANORM )
+      BTOL = MAX( SAFMIN, ULP*BNORM )
+      ASCALE = ONE / MAX( SAFMIN, ANORM )
+      BSCALE = ONE / MAX( SAFMIN, BNORM )
+*
+*     Set Eigenvalues IHI+1:N
+*
+      DO 30 J = IHI + 1, N
+         IF( B( J, J ).LT.ZERO ) THEN
+            IF( ILSCHR ) THEN
+               DO 10 JR = 1, J
+                  A( JR, J ) = -A( JR, J )
+                  B( JR, J ) = -B( JR, J )
+   10          CONTINUE
+            ELSE
+               A( J, J ) = -A( J, J )
+               B( J, J ) = -B( J, J )
+            END IF
+            IF( ILZ ) THEN
+               DO 20 JR = 1, N
+                  Z( JR, J ) = -Z( JR, J )
+   20          CONTINUE
+            END IF
+         END IF
+         ALPHAR( J ) = A( J, J )
+         ALPHAI( J ) = ZERO
+         BETA( J ) = B( J, J )
+   30 CONTINUE
+*
+*     If IHI < ILO, skip QZ steps
+*
+      IF( IHI.LT.ILO )
+     $   GO TO 380
+*
+*     MAIN QZ ITERATION LOOP
+*
+*     Initialize dynamic indices
+*
+*     Eigenvalues ILAST+1:N have been found.
+*        Column operations modify rows IFRSTM:whatever.
+*        Row operations modify columns whatever:ILASTM.
+*
+*     If only eigenvalues are being computed, then
+*        IFRSTM is the row of the last splitting row above row ILAST;
+*        this is always at least ILO.
+*     IITER counts iterations since the last eigenvalue was found,
+*        to tell when to use an extraordinary shift.
+*     MAXIT is the maximum number of QZ sweeps allowed.
+*
+      ILAST = IHI
+      IF( ILSCHR ) THEN
+         IFRSTM = 1
+         ILASTM = N
+      ELSE
+         IFRSTM = ILO
+         ILASTM = IHI
+      END IF
+      IITER = 0
+      ESHIFT = ZERO
+      MAXIT = 30*( IHI-ILO+1 )
+*
+      DO 360 JITER = 1, MAXIT
+*
+*        Split the matrix if possible.
+*
+*        Two tests:
+*           1: A(j,j-1)=0  or  j=ILO
+*           2: B(j,j)=0
+*
+         IF( ILAST.EQ.ILO ) THEN
+*
+*           Special case: j=ILAST
+*
+            GO TO 80
+         ELSE
+            IF( ABS( A( ILAST, ILAST-1 ) ).LE.ATOL ) THEN
+               A( ILAST, ILAST-1 ) = ZERO
+               GO TO 80
+            END IF
+         END IF
+*
+         IF( ABS( B( ILAST, ILAST ) ).LE.BTOL ) THEN
+            B( ILAST, ILAST ) = ZERO
+            GO TO 70
+         END IF
+*
+*        General case: j<ILAST
+*
+         DO 60 J = ILAST - 1, ILO, -1
+*
+*           Test 1: for A(j,j-1)=0 or j=ILO
+*
+            IF( J.EQ.ILO ) THEN
+               ILAZRO = .TRUE.
+            ELSE
+               IF( ABS( A( J, J-1 ) ).LE.ATOL ) THEN
+                  A( J, J-1 ) = ZERO
+                  ILAZRO = .TRUE.
+               ELSE
+                  ILAZRO = .FALSE.
+               END IF
+            END IF
+*
+*           Test 2: for B(j,j)=0
+*
+            IF( ABS( B( J, J ) ).LT.BTOL ) THEN
+               B( J, J ) = ZERO
+*
+*              Test 1a: Check for 2 consecutive small subdiagonals in A
+*
+               ILAZR2 = .FALSE.
+               IF( .NOT.ILAZRO ) THEN
+                  TEMP = ABS( A( J, J-1 ) )
+                  TEMP2 = ABS( A( J, J ) )
+                  TEMPR = MAX( TEMP, TEMP2 )
+                  IF( TEMPR.LT.ONE .AND. TEMPR.NE.ZERO ) THEN
+                     TEMP = TEMP / TEMPR
+                     TEMP2 = TEMP2 / TEMPR
+                  END IF
+                  IF( TEMP*( ASCALE*ABS( A( J+1, J ) ) ).LE.TEMP2*
+     $                ( ASCALE*ATOL ) )ILAZR2 = .TRUE.
+               END IF
+*
+*              If both tests pass (1 & 2), i.e., the leading diagonal
+*              element of B in the block is zero, split a 1x1 block off
+*              at the top. (I.e., at the J-th row/column) The leading
+*              diagonal element of the remainder can also be zero, so
+*              this may have to be done repeatedly.
+*
+               IF( ILAZRO .OR. ILAZR2 ) THEN
+                  DO 40 JCH = J, ILAST - 1
+                     TEMP = A( JCH, JCH )
+                     CALL SLARTG( TEMP, A( JCH+1, JCH ), C, S,
+     $                            A( JCH, JCH ) )
+                     A( JCH+1, JCH ) = ZERO
+                     CALL SROT( ILASTM-JCH, A( JCH, JCH+1 ), LDA,
+     $                          A( JCH+1, JCH+1 ), LDA, C, S )
+                     CALL SROT( ILASTM-JCH, B( JCH, JCH+1 ), LDB,
+     $                          B( JCH+1, JCH+1 ), LDB, C, S )
+                     IF( ILQ )
+     $                  CALL SROT( N, Q( 1, JCH ), 1, Q( 1, JCH+1 ), 1,
+     $                             C, S )
+                     IF( ILAZR2 )
+     $                  A( JCH, JCH-1 ) = A( JCH, JCH-1 )*C
+                     ILAZR2 = .FALSE.
+                     IF( ABS( B( JCH+1, JCH+1 ) ).GE.BTOL ) THEN
+                        IF( JCH+1.GE.ILAST ) THEN
+                           GO TO 80
+                        ELSE
+                           IFIRST = JCH + 1
+                           GO TO 110
+                        END IF
+                     END IF
+                     B( JCH+1, JCH+1 ) = ZERO
+   40             CONTINUE
+                  GO TO 70
+               ELSE
+*
+*                 Only test 2 passed -- chase the zero to B(ILAST,ILAST)
+*                 Then process as in the case B(ILAST,ILAST)=0
+*
+                  DO 50 JCH = J, ILAST - 1
+                     TEMP = B( JCH, JCH+1 )
+                     CALL SLARTG( TEMP, B( JCH+1, JCH+1 ), C, S,
+     $                            B( JCH, JCH+1 ) )
+                     B( JCH+1, JCH+1 ) = ZERO
+                     IF( JCH.LT.ILASTM-1 )
+     $                  CALL SROT( ILASTM-JCH-1, B( JCH, JCH+2 ), LDB,
+     $                             B( JCH+1, JCH+2 ), LDB, C, S )
+                     CALL SROT( ILASTM-JCH+2, A( JCH, JCH-1 ), LDA,
+     $                          A( JCH+1, JCH-1 ), LDA, C, S )
+                     IF( ILQ )
+     $                  CALL SROT( N, Q( 1, JCH ), 1, Q( 1, JCH+1 ), 1,
+     $                             C, S )
+                     TEMP = A( JCH+1, JCH )
+                     CALL SLARTG( TEMP, A( JCH+1, JCH-1 ), C, S,
+     $                            A( JCH+1, JCH ) )
+                     A( JCH+1, JCH-1 ) = ZERO
+                     CALL SROT( JCH+1-IFRSTM, A( IFRSTM, JCH ), 1,
+     $                          A( IFRSTM, JCH-1 ), 1, C, S )
+                     CALL SROT( JCH-IFRSTM, B( IFRSTM, JCH ), 1,
+     $                          B( IFRSTM, JCH-1 ), 1, C, S )
+                     IF( ILZ )
+     $                  CALL SROT( N, Z( 1, JCH ), 1, Z( 1, JCH-1 ), 1,
+     $                             C, S )
+   50             CONTINUE
+                  GO TO 70
+               END IF
+            ELSE IF( ILAZRO ) THEN
+*
+*              Only test 1 passed -- work on J:ILAST
+*
+               IFIRST = J
+               GO TO 110
+            END IF
+*
+*           Neither test passed -- try next J
+*
+   60    CONTINUE
+*
+*        (Drop-through is "impossible")
+*
+         INFO = N + 1
+         GO TO 420
+*
+*        B(ILAST,ILAST)=0 -- clear A(ILAST,ILAST-1) to split off a
+*        1x1 block.
+*
+   70    CONTINUE
+         TEMP = A( ILAST, ILAST )
+         CALL SLARTG( TEMP, A( ILAST, ILAST-1 ), C, S,
+     $                A( ILAST, ILAST ) )
+         A( ILAST, ILAST-1 ) = ZERO
+         CALL SROT( ILAST-IFRSTM, A( IFRSTM, ILAST ), 1,
+     $              A( IFRSTM, ILAST-1 ), 1, C, S )
+         CALL SROT( ILAST-IFRSTM, B( IFRSTM, ILAST ), 1,
+     $              B( IFRSTM, ILAST-1 ), 1, C, S )
+         IF( ILZ )
+     $      CALL SROT( N, Z( 1, ILAST ), 1, Z( 1, ILAST-1 ), 1, C, S )
+*
+*        A(ILAST,ILAST-1)=0 -- Standardize B, set ALPHAR, ALPHAI,
+*                              and BETA
+*
+   80    CONTINUE
+         IF( B( ILAST, ILAST ).LT.ZERO ) THEN
+            IF( ILSCHR ) THEN
+               DO 90 J = IFRSTM, ILAST
+                  A( J, ILAST ) = -A( J, ILAST )
+                  B( J, ILAST ) = -B( J, ILAST )
+   90          CONTINUE
+            ELSE
+               A( ILAST, ILAST ) = -A( ILAST, ILAST )
+               B( ILAST, ILAST ) = -B( ILAST, ILAST )
+            END IF
+            IF( ILZ ) THEN
+               DO 100 J = 1, N
+                  Z( J, ILAST ) = -Z( J, ILAST )
+  100          CONTINUE
+            END IF
+         END IF
+         ALPHAR( ILAST ) = A( ILAST, ILAST )
+         ALPHAI( ILAST ) = ZERO
+         BETA( ILAST ) = B( ILAST, ILAST )
+*
+*        Go to next block -- exit if finished.
+*
+         ILAST = ILAST - 1
+         IF( ILAST.LT.ILO )
+     $      GO TO 380
+*
+*        Reset counters
+*
+         IITER = 0
+         ESHIFT = ZERO
+         IF( .NOT.ILSCHR ) THEN
+            ILASTM = ILAST
+            IF( IFRSTM.GT.ILAST )
+     $         IFRSTM = ILO
+         END IF
+         GO TO 350
+*
+*        QZ step
+*
+*        This iteration only involves rows/columns IFIRST:ILAST. We
+*        assume IFIRST < ILAST, and that the diagonal of B is non-zero.
+*
+  110    CONTINUE
+         IITER = IITER + 1
+         IF( .NOT.ILSCHR ) THEN
+            IFRSTM = IFIRST
+         END IF
+*
+*        Compute single shifts.
+*
+*        At this point, IFIRST < ILAST, and the diagonal elements of
+*        B(IFIRST:ILAST,IFIRST,ILAST) are larger than BTOL (in
+*        magnitude)
+*
+         IF( ( IITER / 10 )*10.EQ.IITER ) THEN
+*
+*           Exceptional shift.  Chosen for no particularly good reason.
+*           (Single shift only.)
+*
+            IF( ( REAL( MAXIT )*SAFMIN )*ABS( A( ILAST-1, ILAST ) ).LT.
+     $          ABS( B( ILAST-1, ILAST-1 ) ) ) THEN
+               ESHIFT = ESHIFT + A( ILAST-1, ILAST ) /
+     $                  B( ILAST-1, ILAST-1 )
+            ELSE
+               ESHIFT = ESHIFT + ONE / ( SAFMIN*REAL( MAXIT ) )
+            END IF
+            S1 = ONE
+            WR = ESHIFT
+*
+         ELSE
+*
+*           Shifts based on the generalized eigenvalues of the
+*           bottom-right 2x2 block of A and B. The first eigenvalue
+*           returned by SLAG2 is the Wilkinson shift (AEP p.512),
+*
+            CALL SLAG2( A( ILAST-1, ILAST-1 ), LDA,
+     $                  B( ILAST-1, ILAST-1 ), LDB, SAFMIN*SAFETY, S1,
+     $                  S2, WR, WR2, WI )
+*
+            TEMP = MAX( S1, SAFMIN*MAX( ONE, ABS( WR ), ABS( WI ) ) )
+            IF( WI.NE.ZERO )
+     $         GO TO 200
+         END IF
+*
+*        Fiddle with shift to avoid overflow
+*
+         TEMP = MIN( ASCALE, ONE )*( HALF*SAFMAX )
+         IF( S1.GT.TEMP ) THEN
+            SCALE = TEMP / S1
+         ELSE
+            SCALE = ONE
+         END IF
+*
+         TEMP = MIN( BSCALE, ONE )*( HALF*SAFMAX )
+         IF( ABS( WR ).GT.TEMP )
+     $      SCALE = MIN( SCALE, TEMP / ABS( WR ) )
+         S1 = SCALE*S1
+         WR = SCALE*WR
+*
+*        Now check for two consecutive small subdiagonals.
+*
+         DO 120 J = ILAST - 1, IFIRST + 1, -1
+            ISTART = J
+            TEMP = ABS( S1*A( J, J-1 ) )
+            TEMP2 = ABS( S1*A( J, J )-WR*B( J, J ) )
+            TEMPR = MAX( TEMP, TEMP2 )
+            IF( TEMPR.LT.ONE .AND. TEMPR.NE.ZERO ) THEN
+               TEMP = TEMP / TEMPR
+               TEMP2 = TEMP2 / TEMPR
+            END IF
+            IF( ABS( ( ASCALE*A( J+1, J ) )*TEMP ).LE.( ASCALE*ATOL )*
+     $          TEMP2 )GO TO 130
+  120    CONTINUE
+*
+         ISTART = IFIRST
+  130    CONTINUE
+*
+*        Do an implicit single-shift QZ sweep.
+*
+*        Initial Q
+*
+         TEMP = S1*A( ISTART, ISTART ) - WR*B( ISTART, ISTART )
+         TEMP2 = S1*A( ISTART+1, ISTART )
+         CALL SLARTG( TEMP, TEMP2, C, S, TEMPR )
+*
+*        Sweep
+*
+         DO 190 J = ISTART, ILAST - 1
+            IF( J.GT.ISTART ) THEN
+               TEMP = A( J, J-1 )
+               CALL SLARTG( TEMP, A( J+1, J-1 ), C, S, A( J, J-1 ) )
+               A( J+1, J-1 ) = ZERO
+            END IF
+*
+            DO 140 JC = J, ILASTM
+               TEMP = C*A( J, JC ) + S*A( J+1, JC )
+               A( J+1, JC ) = -S*A( J, JC ) + C*A( J+1, JC )
+               A( J, JC ) = TEMP
+               TEMP2 = C*B( J, JC ) + S*B( J+1, JC )
+               B( J+1, JC ) = -S*B( J, JC ) + C*B( J+1, JC )
+               B( J, JC ) = TEMP2
+  140       CONTINUE
+            IF( ILQ ) THEN
+               DO 150 JR = 1, N
+                  TEMP = C*Q( JR, J ) + S*Q( JR, J+1 )
+                  Q( JR, J+1 ) = -S*Q( JR, J ) + C*Q( JR, J+1 )
+                  Q( JR, J ) = TEMP
+  150          CONTINUE
+            END IF
+*
+            TEMP = B( J+1, J+1 )
+            CALL SLARTG( TEMP, B( J+1, J ), C, S, B( J+1, J+1 ) )
+            B( J+1, J ) = ZERO
+*
+            DO 160 JR = IFRSTM, MIN( J+2, ILAST )
+               TEMP = C*A( JR, J+1 ) + S*A( JR, J )
+               A( JR, J ) = -S*A( JR, J+1 ) + C*A( JR, J )
+               A( JR, J+1 ) = TEMP
+  160       CONTINUE
+            DO 170 JR = IFRSTM, J
+               TEMP = C*B( JR, J+1 ) + S*B( JR, J )
+               B( JR, J ) = -S*B( JR, J+1 ) + C*B( JR, J )
+               B( JR, J+1 ) = TEMP
+  170       CONTINUE
+            IF( ILZ ) THEN
+               DO 180 JR = 1, N
+                  TEMP = C*Z( JR, J+1 ) + S*Z( JR, J )
+                  Z( JR, J ) = -S*Z( JR, J+1 ) + C*Z( JR, J )
+                  Z( JR, J+1 ) = TEMP
+  180          CONTINUE
+            END IF
+  190    CONTINUE
+*
+         GO TO 350
+*
+*        Use Francis double-shift
+*
+*        Note: the Francis double-shift should work with real shifts,
+*              but only if the block is at least 3x3.
+*              This code may break if this point is reached with
+*              a 2x2 block with real eigenvalues.
+*
+  200    CONTINUE
+         IF( IFIRST+1.EQ.ILAST ) THEN
+*
+*           Special case -- 2x2 block with complex eigenvectors
+*
+*           Step 1: Standardize, that is, rotate so that
+*
+*                       ( B11  0  )
+*                   B = (         )  with B11 non-negative.
+*                       (  0  B22 )
+*
+            CALL SLASV2( B( ILAST-1, ILAST-1 ), B( ILAST-1, ILAST ),
+     $                   B( ILAST, ILAST ), B22, B11, SR, CR, SL, CL )
+*
+            IF( B11.LT.ZERO ) THEN
+               CR = -CR
+               SR = -SR
+               B11 = -B11
+               B22 = -B22
+            END IF
+*
+            CALL SROT( ILASTM+1-IFIRST, A( ILAST-1, ILAST-1 ), LDA,
+     $                 A( ILAST, ILAST-1 ), LDA, CL, SL )
+            CALL SROT( ILAST+1-IFRSTM, A( IFRSTM, ILAST-1 ), 1,
+     $                 A( IFRSTM, ILAST ), 1, CR, SR )
+*
+            IF( ILAST.LT.ILASTM )
+     $         CALL SROT( ILASTM-ILAST, B( ILAST-1, ILAST+1 ), LDB,
+     $                    B( ILAST, ILAST+1 ), LDA, CL, SL )
+            IF( IFRSTM.LT.ILAST-1 )
+     $         CALL SROT( IFIRST-IFRSTM, B( IFRSTM, ILAST-1 ), 1,
+     $                    B( IFRSTM, ILAST ), 1, CR, SR )
+*
+            IF( ILQ )
+     $         CALL SROT( N, Q( 1, ILAST-1 ), 1, Q( 1, ILAST ), 1, CL,
+     $                    SL )
+            IF( ILZ )
+     $         CALL SROT( N, Z( 1, ILAST-1 ), 1, Z( 1, ILAST ), 1, CR,
+     $                    SR )
+*
+            B( ILAST-1, ILAST-1 ) = B11
+            B( ILAST-1, ILAST ) = ZERO
+            B( ILAST, ILAST-1 ) = ZERO
+            B( ILAST, ILAST ) = B22
+*
+*           If B22 is negative, negate column ILAST
+*
+            IF( B22.LT.ZERO ) THEN
+               DO 210 J = IFRSTM, ILAST
+                  A( J, ILAST ) = -A( J, ILAST )
+                  B( J, ILAST ) = -B( J, ILAST )
+  210          CONTINUE
+*
+               IF( ILZ ) THEN
+                  DO 220 J = 1, N
+                     Z( J, ILAST ) = -Z( J, ILAST )
+  220             CONTINUE
+               END IF
+            END IF
+*
+*           Step 2: Compute ALPHAR, ALPHAI, and BETA (see refs.)
+*
+*           Recompute shift
+*
+            CALL SLAG2( A( ILAST-1, ILAST-1 ), LDA,
+     $                  B( ILAST-1, ILAST-1 ), LDB, SAFMIN*SAFETY, S1,
+     $                  TEMP, WR, TEMP2, WI )
+*
+*           If standardization has perturbed the shift onto real line,
+*           do another (real single-shift) QR step.
+*
+            IF( WI.EQ.ZERO )
+     $         GO TO 350
+            S1INV = ONE / S1
+*
+*           Do EISPACK (QZVAL) computation of alpha and beta
+*
+            A11 = A( ILAST-1, ILAST-1 )
+            A21 = A( ILAST, ILAST-1 )
+            A12 = A( ILAST-1, ILAST )
+            A22 = A( ILAST, ILAST )
+*
+*           Compute complex Givens rotation on right
+*           (Assume some element of C = (sA - wB) > unfl )
+*                            __
+*           (sA - wB) ( CZ   -SZ )
+*                     ( SZ    CZ )
+*
+            C11R = S1*A11 - WR*B11
+            C11I = -WI*B11
+            C12 = S1*A12
+            C21 = S1*A21
+            C22R = S1*A22 - WR*B22
+            C22I = -WI*B22
+*
+            IF( ABS( C11R )+ABS( C11I )+ABS( C12 ).GT.ABS( C21 )+
+     $          ABS( C22R )+ABS( C22I ) ) THEN
+               T = SLAPY3( C12, C11R, C11I )
+               CZ = C12 / T
+               SZR = -C11R / T
+               SZI = -C11I / T
+            ELSE
+               CZ = SLAPY2( C22R, C22I )
+               IF( CZ.LE.SAFMIN ) THEN
+                  CZ = ZERO
+                  SZR = ONE
+                  SZI = ZERO
+               ELSE
+                  TEMPR = C22R / CZ
+                  TEMPI = C22I / CZ
+                  T = SLAPY2( CZ, C21 )
+                  CZ = CZ / T
+                  SZR = -C21*TEMPR / T
+                  SZI = C21*TEMPI / T
+               END IF
+            END IF
+*
+*           Compute Givens rotation on left
+*
+*           (  CQ   SQ )
+*           (  __      )  A or B
+*           ( -SQ   CQ )
+*
+            AN = ABS( A11 ) + ABS( A12 ) + ABS( A21 ) + ABS( A22 )
+            BN = ABS( B11 ) + ABS( B22 )
+            WABS = ABS( WR ) + ABS( WI )
+            IF( S1*AN.GT.WABS*BN ) THEN
+               CQ = CZ*B11
+               SQR = SZR*B22
+               SQI = -SZI*B22
+            ELSE
+               A1R = CZ*A11 + SZR*A12
+               A1I = SZI*A12
+               A2R = CZ*A21 + SZR*A22
+               A2I = SZI*A22
+               CQ = SLAPY2( A1R, A1I )
+               IF( CQ.LE.SAFMIN ) THEN
+                  CQ = ZERO
+                  SQR = ONE
+                  SQI = ZERO
+               ELSE
+                  TEMPR = A1R / CQ
+                  TEMPI = A1I / CQ
+                  SQR = TEMPR*A2R + TEMPI*A2I
+                  SQI = TEMPI*A2R - TEMPR*A2I
+               END IF
+            END IF
+            T = SLAPY3( CQ, SQR, SQI )
+            CQ = CQ / T
+            SQR = SQR / T
+            SQI = SQI / T
+*
+*           Compute diagonal elements of QBZ
+*
+            TEMPR = SQR*SZR - SQI*SZI
+            TEMPI = SQR*SZI + SQI*SZR
+            B1R = CQ*CZ*B11 + TEMPR*B22
+            B1I = TEMPI*B22
+            B1A = SLAPY2( B1R, B1I )
+            B2R = CQ*CZ*B22 + TEMPR*B11
+            B2I = -TEMPI*B11
+            B2A = SLAPY2( B2R, B2I )
+*
+*           Normalize so beta > 0, and Im( alpha1 ) > 0
+*
+            BETA( ILAST-1 ) = B1A
+            BETA( ILAST ) = B2A
+            ALPHAR( ILAST-1 ) = ( WR*B1A )*S1INV
+            ALPHAI( ILAST-1 ) = ( WI*B1A )*S1INV
+            ALPHAR( ILAST ) = ( WR*B2A )*S1INV
+            ALPHAI( ILAST ) = -( WI*B2A )*S1INV
+*
+*           Step 3: Go to next block -- exit if finished.
+*
+            ILAST = IFIRST - 1
+            IF( ILAST.LT.ILO )
+     $         GO TO 380
+*
+*           Reset counters
+*
+            IITER = 0
+            ESHIFT = ZERO
+            IF( .NOT.ILSCHR ) THEN
+               ILASTM = ILAST
+               IF( IFRSTM.GT.ILAST )
+     $            IFRSTM = ILO
+            END IF
+            GO TO 350
+         ELSE
+*
+*           Usual case: 3x3 or larger block, using Francis implicit
+*                       double-shift
+*
+*                                    2
+*           Eigenvalue equation is  w  - c w + d = 0,
+*
+*                                         -1 2        -1
+*           so compute 1st column of  (A B  )  - c A B   + d
+*           using the formula in QZIT (from EISPACK)
+*
+*           We assume that the block is at least 3x3
+*
+            AD11 = ( ASCALE*A( ILAST-1, ILAST-1 ) ) /
+     $             ( BSCALE*B( ILAST-1, ILAST-1 ) )
+            AD21 = ( ASCALE*A( ILAST, ILAST-1 ) ) /
+     $             ( BSCALE*B( ILAST-1, ILAST-1 ) )
+            AD12 = ( ASCALE*A( ILAST-1, ILAST ) ) /
+     $             ( BSCALE*B( ILAST, ILAST ) )
+            AD22 = ( ASCALE*A( ILAST, ILAST ) ) /
+     $             ( BSCALE*B( ILAST, ILAST ) )
+            U12 = B( ILAST-1, ILAST ) / B( ILAST, ILAST )
+            AD11L = ( ASCALE*A( IFIRST, IFIRST ) ) /
+     $              ( BSCALE*B( IFIRST, IFIRST ) )
+            AD21L = ( ASCALE*A( IFIRST+1, IFIRST ) ) /
+     $              ( BSCALE*B( IFIRST, IFIRST ) )
+            AD12L = ( ASCALE*A( IFIRST, IFIRST+1 ) ) /
+     $              ( BSCALE*B( IFIRST+1, IFIRST+1 ) )
+            AD22L = ( ASCALE*A( IFIRST+1, IFIRST+1 ) ) /
+     $              ( BSCALE*B( IFIRST+1, IFIRST+1 ) )
+            AD32L = ( ASCALE*A( IFIRST+2, IFIRST+1 ) ) /
+     $              ( BSCALE*B( IFIRST+1, IFIRST+1 ) )
+            U12L = B( IFIRST, IFIRST+1 ) / B( IFIRST+1, IFIRST+1 )
+*
+            V( 1 ) = ( AD11-AD11L )*( AD22-AD11L ) - AD12*AD21 +
+     $               AD21*U12*AD11L + ( AD12L-AD11L*U12L )*AD21L
+            V( 2 ) = ( ( AD22L-AD11L )-AD21L*U12L-( AD11-AD11L )-
+     $               ( AD22-AD11L )+AD21*U12 )*AD21L
+            V( 3 ) = AD32L*AD21L
+*
+            ISTART = IFIRST
+*
+            CALL SLARFG( 3, V( 1 ), V( 2 ), 1, TAU )
+            V( 1 ) = ONE
+*
+*           Sweep
+*
+            DO 290 J = ISTART, ILAST - 2
+*
+*              All but last elements: use 3x3 Householder transforms.
+*
+*              Zero (j-1)st column of A
+*
+               IF( J.GT.ISTART ) THEN
+                  V( 1 ) = A( J, J-1 )
+                  V( 2 ) = A( J+1, J-1 )
+                  V( 3 ) = A( J+2, J-1 )
+*
+                  CALL SLARFG( 3, A( J, J-1 ), V( 2 ), 1, TAU )
+                  V( 1 ) = ONE
+                  A( J+1, J-1 ) = ZERO
+                  A( J+2, J-1 ) = ZERO
+               END IF
+*
+               DO 230 JC = J, ILASTM
+                  TEMP = TAU*( A( J, JC )+V( 2 )*A( J+1, JC )+V( 3 )*
+     $                   A( J+2, JC ) )
+                  A( J, JC ) = A( J, JC ) - TEMP
+                  A( J+1, JC ) = A( J+1, JC ) - TEMP*V( 2 )
+                  A( J+2, JC ) = A( J+2, JC ) - TEMP*V( 3 )
+                  TEMP2 = TAU*( B( J, JC )+V( 2 )*B( J+1, JC )+V( 3 )*
+     $                    B( J+2, JC ) )
+                  B( J, JC ) = B( J, JC ) - TEMP2
+                  B( J+1, JC ) = B( J+1, JC ) - TEMP2*V( 2 )
+                  B( J+2, JC ) = B( J+2, JC ) - TEMP2*V( 3 )
+  230          CONTINUE
+               IF( ILQ ) THEN
+                  DO 240 JR = 1, N
+                     TEMP = TAU*( Q( JR, J )+V( 2 )*Q( JR, J+1 )+V( 3 )*
+     $                      Q( JR, J+2 ) )
+                     Q( JR, J ) = Q( JR, J ) - TEMP
+                     Q( JR, J+1 ) = Q( JR, J+1 ) - TEMP*V( 2 )
+                     Q( JR, J+2 ) = Q( JR, J+2 ) - TEMP*V( 3 )
+  240             CONTINUE
+               END IF
+*
+*              Zero j-th column of B (see SLAGBC for details)
+*
+*              Swap rows to pivot
+*
+               ILPIVT = .FALSE.
+               TEMP = MAX( ABS( B( J+1, J+1 ) ), ABS( B( J+1, J+2 ) ) )
+               TEMP2 = MAX( ABS( B( J+2, J+1 ) ), ABS( B( J+2, J+2 ) ) )
+               IF( MAX( TEMP, TEMP2 ).LT.SAFMIN ) THEN
+                  SCALE = ZERO
+                  U1 = ONE
+                  U2 = ZERO
+                  GO TO 250
+               ELSE IF( TEMP.GE.TEMP2 ) THEN
+                  W11 = B( J+1, J+1 )
+                  W21 = B( J+2, J+1 )
+                  W12 = B( J+1, J+2 )
+                  W22 = B( J+2, J+2 )
+                  U1 = B( J+1, J )
+                  U2 = B( J+2, J )
+               ELSE
+                  W21 = B( J+1, J+1 )
+                  W11 = B( J+2, J+1 )
+                  W22 = B( J+1, J+2 )
+                  W12 = B( J+2, J+2 )
+                  U2 = B( J+1, J )
+                  U1 = B( J+2, J )
+               END IF
+*
+*              Swap columns if nec.
+*
+               IF( ABS( W12 ).GT.ABS( W11 ) ) THEN
+                  ILPIVT = .TRUE.
+                  TEMP = W12
+                  TEMP2 = W22
+                  W12 = W11
+                  W22 = W21
+                  W11 = TEMP
+                  W21 = TEMP2
+               END IF
+*
+*              LU-factor
+*
+               TEMP = W21 / W11
+               U2 = U2 - TEMP*U1
+               W22 = W22 - TEMP*W12
+               W21 = ZERO
+*
+*              Compute SCALE
+*
+               SCALE = ONE
+               IF( ABS( W22 ).LT.SAFMIN ) THEN
+                  SCALE = ZERO
+                  U2 = ONE
+                  U1 = -W12 / W11
+                  GO TO 250
+               END IF
+               IF( ABS( W22 ).LT.ABS( U2 ) )
+     $            SCALE = ABS( W22 / U2 )
+               IF( ABS( W11 ).LT.ABS( U1 ) )
+     $            SCALE = MIN( SCALE, ABS( W11 / U1 ) )
+*
+*              Solve
+*
+               U2 = ( SCALE*U2 ) / W22
+               U1 = ( SCALE*U1-W12*U2 ) / W11
+*
+  250          CONTINUE
+               IF( ILPIVT ) THEN
+                  TEMP = U2
+                  U2 = U1
+                  U1 = TEMP
+               END IF
+*
+*              Compute Householder Vector
+*
+               T = SQRT( SCALE**2+U1**2+U2**2 )
+               TAU = ONE + SCALE / T
+               VS = -ONE / ( SCALE+T )
+               V( 1 ) = ONE
+               V( 2 ) = VS*U1
+               V( 3 ) = VS*U2
+*
+*              Apply transformations from the right.
+*
+               DO 260 JR = IFRSTM, MIN( J+3, ILAST )
+                  TEMP = TAU*( A( JR, J )+V( 2 )*A( JR, J+1 )+V( 3 )*
+     $                   A( JR, J+2 ) )
+                  A( JR, J ) = A( JR, J ) - TEMP
+                  A( JR, J+1 ) = A( JR, J+1 ) - TEMP*V( 2 )
+                  A( JR, J+2 ) = A( JR, J+2 ) - TEMP*V( 3 )
+  260          CONTINUE
+               DO 270 JR = IFRSTM, J + 2
+                  TEMP = TAU*( B( JR, J )+V( 2 )*B( JR, J+1 )+V( 3 )*
+     $                   B( JR, J+2 ) )
+                  B( JR, J ) = B( JR, J ) - TEMP
+                  B( JR, J+1 ) = B( JR, J+1 ) - TEMP*V( 2 )
+                  B( JR, J+2 ) = B( JR, J+2 ) - TEMP*V( 3 )
+  270          CONTINUE
+               IF( ILZ ) THEN
+                  DO 280 JR = 1, N
+                     TEMP = TAU*( Z( JR, J )+V( 2 )*Z( JR, J+1 )+V( 3 )*
+     $                      Z( JR, J+2 ) )
+                     Z( JR, J ) = Z( JR, J ) - TEMP
+                     Z( JR, J+1 ) = Z( JR, J+1 ) - TEMP*V( 2 )
+                     Z( JR, J+2 ) = Z( JR, J+2 ) - TEMP*V( 3 )
+  280             CONTINUE
+               END IF
+               B( J+1, J ) = ZERO
+               B( J+2, J ) = ZERO
+  290       CONTINUE
+*
+*           Last elements: Use Givens rotations
+*
+*           Rotations from the left
+*
+            J = ILAST - 1
+            TEMP = A( J, J-1 )
+            CALL SLARTG( TEMP, A( J+1, J-1 ), C, S, A( J, J-1 ) )
+            A( J+1, J-1 ) = ZERO
+*
+            DO 300 JC = J, ILASTM
+               TEMP = C*A( J, JC ) + S*A( J+1, JC )
+               A( J+1, JC ) = -S*A( J, JC ) + C*A( J+1, JC )
+               A( J, JC ) = TEMP
+               TEMP2 = C*B( J, JC ) + S*B( J+1, JC )
+               B( J+1, JC ) = -S*B( J, JC ) + C*B( J+1, JC )
+               B( J, JC ) = TEMP2
+  300       CONTINUE
+            IF( ILQ ) THEN
+               DO 310 JR = 1, N
+                  TEMP = C*Q( JR, J ) + S*Q( JR, J+1 )
+                  Q( JR, J+1 ) = -S*Q( JR, J ) + C*Q( JR, J+1 )
+                  Q( JR, J ) = TEMP
+  310          CONTINUE
+            END IF
+*
+*           Rotations from the right.
+*
+            TEMP = B( J+1, J+1 )
+            CALL SLARTG( TEMP, B( J+1, J ), C, S, B( J+1, J+1 ) )
+            B( J+1, J ) = ZERO
+*
+            DO 320 JR = IFRSTM, ILAST
+               TEMP = C*A( JR, J+1 ) + S*A( JR, J )
+               A( JR, J ) = -S*A( JR, J+1 ) + C*A( JR, J )
+               A( JR, J+1 ) = TEMP
+  320       CONTINUE
+            DO 330 JR = IFRSTM, ILAST - 1
+               TEMP = C*B( JR, J+1 ) + S*B( JR, J )
+               B( JR, J ) = -S*B( JR, J+1 ) + C*B( JR, J )
+               B( JR, J+1 ) = TEMP
+  330       CONTINUE
+            IF( ILZ ) THEN
+               DO 340 JR = 1, N
+                  TEMP = C*Z( JR, J+1 ) + S*Z( JR, J )
+                  Z( JR, J ) = -S*Z( JR, J+1 ) + C*Z( JR, J )
+                  Z( JR, J+1 ) = TEMP
+  340          CONTINUE
+            END IF
+*
+*           End of Double-Shift code
+*
+         END IF
+*
+         GO TO 350
+*
+*        End of iteration loop
+*
+  350    CONTINUE
+  360 CONTINUE
+*
+*     Drop-through = non-convergence
+*
+  370 CONTINUE
+      INFO = ILAST
+      GO TO 420
+*
+*     Successful completion of all QZ steps
+*
+  380 CONTINUE
+*
+*     Set Eigenvalues 1:ILO-1
+*
+      DO 410 J = 1, ILO - 1
+         IF( B( J, J ).LT.ZERO ) THEN
+            IF( ILSCHR ) THEN
+               DO 390 JR = 1, J
+                  A( JR, J ) = -A( JR, J )
+                  B( JR, J ) = -B( JR, J )
+  390          CONTINUE
+            ELSE
+               A( J, J ) = -A( J, J )
+               B( J, J ) = -B( J, J )
+            END IF
+            IF( ILZ ) THEN
+               DO 400 JR = 1, N
+                  Z( JR, J ) = -Z( JR, J )
+  400          CONTINUE
+            END IF
+         END IF
+         ALPHAR( J ) = A( J, J )
+         ALPHAI( J ) = ZERO
+         BETA( J ) = B( J, J )
+  410 CONTINUE
+*
+*     Normal Termination
+*
+      INFO = 0
+*
+*     Exit (other than argument error) -- return optimal workspace size
+*
+  420 CONTINUE
+      WORK( 1 ) = REAL( N )
+      RETURN
+*
+*     End of SHGEQZ
+*
+      END
diff --git a/costa/native/external/lapack/shsein.f b/costa/native/external/lapack/shsein.f
new file mode 100644
index 000000000..2f57f5b2b
--- /dev/null
+++ b/costa/native/external/lapack/shsein.f
@@ -0,0 +1,412 @@
+      SUBROUTINE SHSEIN( SIDE, EIGSRC, INITV, SELECT, N, H, LDH, WR, WI,
+     $                   VL, LDVL, VR, LDVR, MM, M, WORK, IFAILL,
+     $                   IFAILR, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          EIGSRC, INITV, SIDE
+      INTEGER            INFO, LDH, LDVL, LDVR, M, MM, N
+*     ..
+*     .. Array Arguments ..
+      LOGICAL            SELECT( * )
+      INTEGER            IFAILL( * ), IFAILR( * )
+      REAL               H( LDH, * ), VL( LDVL, * ), VR( LDVR, * ),
+     $                   WI( * ), WORK( * ), WR( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SHSEIN uses inverse iteration to find specified right and/or left
+*  eigenvectors of a real upper Hessenberg matrix H.
+*
+*  The right eigenvector x and the left eigenvector y of the matrix H
+*  corresponding to an eigenvalue w are defined by:
+*
+*               H * x = w * x,     y**h * H = w * y**h
+*
+*  where y**h denotes the conjugate transpose of the vector y.
+*
+*  Arguments
+*  =========
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'R': compute right eigenvectors only;
+*          = 'L': compute left eigenvectors only;
+*          = 'B': compute both right and left eigenvectors.
+*
+*  EIGSRC  (input) CHARACTER*1
+*          Specifies the source of eigenvalues supplied in (WR,WI):
+*          = 'Q': the eigenvalues were found using SHSEQR; thus, if
+*                 H has zero subdiagonal elements, and so is
+*                 block-triangular, then the j-th eigenvalue can be
+*                 assumed to be an eigenvalue of the block containing
+*                 the j-th row/column.  This property allows SHSEIN to
+*                 perform inverse iteration on just one diagonal block.
+*          = 'N': no assumptions are made on the correspondence
+*                 between eigenvalues and diagonal blocks.  In this
+*                 case, SHSEIN must always perform inverse iteration
+*                 using the whole matrix H.
+*
+*  INITV   (input) CHARACTER*1
+*          = 'N': no initial vectors are supplied;
+*          = 'U': user-supplied initial vectors are stored in the arrays
+*                 VL and/or VR.
+*
+*  SELECT  (input/output) LOGICAL array, dimension (N)
+*          Specifies the eigenvectors to be computed. To select the
+*          real eigenvector corresponding to a real eigenvalue WR(j),
+*          SELECT(j) must be set to .TRUE.. To select the complex
+*          eigenvector corresponding to a complex eigenvalue
+*          (WR(j),WI(j)), with complex conjugate (WR(j+1),WI(j+1)),
+*          either SELECT(j) or SELECT(j+1) or both must be set to
+*          .TRUE.; then on exit SELECT(j) is .TRUE. and SELECT(j+1) is
+*          .FALSE..
+*
+*  N       (input) INTEGER
+*          The order of the matrix H.  N >= 0.
+*
+*  H       (input) REAL array, dimension (LDH,N)
+*          The upper Hessenberg matrix H.
+*
+*  LDH     (input) INTEGER
+*          The leading dimension of the array H.  LDH >= max(1,N).
+*
+*  WR      (input/output) REAL array, dimension (N)
+*  WI      (input) REAL array, dimension (N)
+*          On entry, the real and imaginary parts of the eigenvalues of
+*          H; a complex conjugate pair of eigenvalues must be stored in
+*          consecutive elements of WR and WI.
+*          On exit, WR may have been altered since close eigenvalues
+*          are perturbed slightly in searching for independent
+*          eigenvectors.
+*
+*  VL      (input/output) REAL array, dimension (LDVL,MM)
+*          On entry, if INITV = 'U' and SIDE = 'L' or 'B', VL must
+*          contain starting vectors for the inverse iteration for the
+*          left eigenvectors; the starting vector for each eigenvector
+*          must be in the same column(s) in which the eigenvector will
+*          be stored.
+*          On exit, if SIDE = 'L' or 'B', the left eigenvectors
+*          specified by SELECT will be stored consecutively in the
+*          columns of VL, in the same order as their eigenvalues. A
+*          complex eigenvector corresponding to a complex eigenvalue is
+*          stored in two consecutive columns, the first holding the real
+*          part and the second the imaginary part.
+*          If SIDE = 'R', VL is not referenced.
+*
+*  LDVL    (input) INTEGER
+*          The leading dimension of the array VL.
+*          LDVL >= max(1,N) if SIDE = 'L' or 'B'; LDVL >= 1 otherwise.
+*
+*  VR      (input/output) REAL array, dimension (LDVR,MM)
+*          On entry, if INITV = 'U' and SIDE = 'R' or 'B', VR must
+*          contain starting vectors for the inverse iteration for the
+*          right eigenvectors; the starting vector for each eigenvector
+*          must be in the same column(s) in which the eigenvector will
+*          be stored.
+*          On exit, if SIDE = 'R' or 'B', the right eigenvectors
+*          specified by SELECT will be stored consecutively in the
+*          columns of VR, in the same order as their eigenvalues. A
+*          complex eigenvector corresponding to a complex eigenvalue is
+*          stored in two consecutive columns, the first holding the real
+*          part and the second the imaginary part.
+*          If SIDE = 'L', VR is not referenced.
+*
+*  LDVR    (input) INTEGER
+*          The leading dimension of the array VR.
+*          LDVR >= max(1,N) if SIDE = 'R' or 'B'; LDVR >= 1 otherwise.
+*
+*  MM      (input) INTEGER
+*          The number of columns in the arrays VL and/or VR. MM >= M.
+*
+*  M       (output) INTEGER
+*          The number of columns in the arrays VL and/or VR required to
+*          store the eigenvectors; each selected real eigenvector
+*          occupies one column and each selected complex eigenvector
+*          occupies two columns.
+*
+*  WORK    (workspace) REAL array, dimension ((N+2)*N)
+*
+*  IFAILL  (output) INTEGER array, dimension (MM)
+*          If SIDE = 'L' or 'B', IFAILL(i) = j > 0 if the left
+*          eigenvector in the i-th column of VL (corresponding to the
+*          eigenvalue w(j)) failed to converge; IFAILL(i) = 0 if the
+*          eigenvector converged satisfactorily. If the i-th and (i+1)th
+*          columns of VL hold a complex eigenvector, then IFAILL(i) and
+*          IFAILL(i+1) are set to the same value.
+*          If SIDE = 'R', IFAILL is not referenced.
+*
+*  IFAILR  (output) INTEGER array, dimension (MM)
+*          If SIDE = 'R' or 'B', IFAILR(i) = j > 0 if the right
+*          eigenvector in the i-th column of VR (corresponding to the
+*          eigenvalue w(j)) failed to converge; IFAILR(i) = 0 if the
+*          eigenvector converged satisfactorily. If the i-th and (i+1)th
+*          columns of VR hold a complex eigenvector, then IFAILR(i) and
+*          IFAILR(i+1) are set to the same value.
+*          If SIDE = 'L', IFAILR is not referenced.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, i is the number of eigenvectors which
+*                failed to converge; see IFAILL and IFAILR for further
+*                details.
+*
+*  Further Details
+*  ===============
+*
+*  Each eigenvector is normalized so that the element of largest
+*  magnitude has magnitude 1; here the magnitude of a complex number
+*  (x,y) is taken to be |x|+|y|.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            BOTHV, FROMQR, LEFTV, NOINIT, PAIR, RIGHTV
+      INTEGER            I, IINFO, K, KL, KLN, KR, KSI, KSR, LDWORK
+      REAL               BIGNUM, EPS3, HNORM, SMLNUM, ULP, UNFL, WKI,
+     $                   WKR
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               SLAMCH, SLANHS
+      EXTERNAL           LSAME, SLAMCH, SLANHS
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLAEIN, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode and test the input parameters.
+*
+      BOTHV = LSAME( SIDE, 'B' )
+      RIGHTV = LSAME( SIDE, 'R' ) .OR. BOTHV
+      LEFTV = LSAME( SIDE, 'L' ) .OR. BOTHV
+*
+      FROMQR = LSAME( EIGSRC, 'Q' )
+*
+      NOINIT = LSAME( INITV, 'N' )
+*
+*     Set M to the number of columns required to store the selected
+*     eigenvectors, and standardize the array SELECT.
+*
+      M = 0
+      PAIR = .FALSE.
+      DO 10 K = 1, N
+         IF( PAIR ) THEN
+            PAIR = .FALSE.
+            SELECT( K ) = .FALSE.
+         ELSE
+            IF( WI( K ).EQ.ZERO ) THEN
+               IF( SELECT( K ) )
+     $            M = M + 1
+            ELSE
+               PAIR = .TRUE.
+               IF( SELECT( K ) .OR. SELECT( K+1 ) ) THEN
+                  SELECT( K ) = .TRUE.
+                  M = M + 2
+               END IF
+            END IF
+         END IF
+   10 CONTINUE
+*
+      INFO = 0
+      IF( .NOT.RIGHTV .AND. .NOT.LEFTV ) THEN
+         INFO = -1
+      ELSE IF( .NOT.FROMQR .AND. .NOT.LSAME( EIGSRC, 'N' ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.NOINIT .AND. .NOT.LSAME( INITV, 'U' ) ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( LDH.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      ELSE IF( LDVL.LT.1 .OR. ( LEFTV .AND. LDVL.LT.N ) ) THEN
+         INFO = -11
+      ELSE IF( LDVR.LT.1 .OR. ( RIGHTV .AND. LDVR.LT.N ) ) THEN
+         INFO = -13
+      ELSE IF( MM.LT.M ) THEN
+         INFO = -14
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SHSEIN', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Set machine-dependent constants.
+*
+      UNFL = SLAMCH( 'Safe minimum' )
+      ULP = SLAMCH( 'Precision' )
+      SMLNUM = UNFL*( N / ULP )
+      BIGNUM = ( ONE-ULP ) / SMLNUM
+*
+      LDWORK = N + 1
+*
+      KL = 1
+      KLN = 0
+      IF( FROMQR ) THEN
+         KR = 0
+      ELSE
+         KR = N
+      END IF
+      KSR = 1
+*
+      DO 120 K = 1, N
+         IF( SELECT( K ) ) THEN
+*
+*           Compute eigenvector(s) corresponding to W(K).
+*
+            IF( FROMQR ) THEN
+*
+*              If affiliation of eigenvalues is known, check whether
+*              the matrix splits.
+*
+*              Determine KL and KR such that 1 <= KL <= K <= KR <= N
+*              and H(KL,KL-1) and H(KR+1,KR) are zero (or KL = 1 or
+*              KR = N).
+*
+*              Then inverse iteration can be performed with the
+*              submatrix H(KL:N,KL:N) for a left eigenvector, and with
+*              the submatrix H(1:KR,1:KR) for a right eigenvector.
+*
+               DO 20 I = K, KL + 1, -1
+                  IF( H( I, I-1 ).EQ.ZERO )
+     $               GO TO 30
+   20          CONTINUE
+   30          CONTINUE
+               KL = I
+               IF( K.GT.KR ) THEN
+                  DO 40 I = K, N - 1
+                     IF( H( I+1, I ).EQ.ZERO )
+     $                  GO TO 50
+   40             CONTINUE
+   50             CONTINUE
+                  KR = I
+               END IF
+            END IF
+*
+            IF( KL.NE.KLN ) THEN
+               KLN = KL
+*
+*              Compute infinity-norm of submatrix H(KL:KR,KL:KR) if it
+*              has not ben computed before.
+*
+               HNORM = SLANHS( 'I', KR-KL+1, H( KL, KL ), LDH, WORK )
+               IF( HNORM.GT.ZERO ) THEN
+                  EPS3 = HNORM*ULP
+               ELSE
+                  EPS3 = SMLNUM
+               END IF
+            END IF
+*
+*           Perturb eigenvalue if it is close to any previous
+*           selected eigenvalues affiliated to the submatrix
+*           H(KL:KR,KL:KR). Close roots are modified by EPS3.
+*
+            WKR = WR( K )
+            WKI = WI( K )
+   60       CONTINUE
+            DO 70 I = K - 1, KL, -1
+               IF( SELECT( I ) .AND. ABS( WR( I )-WKR )+
+     $             ABS( WI( I )-WKI ).LT.EPS3 ) THEN
+                  WKR = WKR + EPS3
+                  GO TO 60
+               END IF
+   70       CONTINUE
+            WR( K ) = WKR
+*
+            PAIR = WKI.NE.ZERO
+            IF( PAIR ) THEN
+               KSI = KSR + 1
+            ELSE
+               KSI = KSR
+            END IF
+            IF( LEFTV ) THEN
+*
+*              Compute left eigenvector.
+*
+               CALL SLAEIN( .FALSE., NOINIT, N-KL+1, H( KL, KL ), LDH,
+     $                      WKR, WKI, VL( KL, KSR ), VL( KL, KSI ),
+     $                      WORK, LDWORK, WORK( N*N+N+1 ), EPS3, SMLNUM,
+     $                      BIGNUM, IINFO )
+               IF( IINFO.GT.0 ) THEN
+                  IF( PAIR ) THEN
+                     INFO = INFO + 2
+                  ELSE
+                     INFO = INFO + 1
+                  END IF
+                  IFAILL( KSR ) = K
+                  IFAILL( KSI ) = K
+               ELSE
+                  IFAILL( KSR ) = 0
+                  IFAILL( KSI ) = 0
+               END IF
+               DO 80 I = 1, KL - 1
+                  VL( I, KSR ) = ZERO
+   80          CONTINUE
+               IF( PAIR ) THEN
+                  DO 90 I = 1, KL - 1
+                     VL( I, KSI ) = ZERO
+   90             CONTINUE
+               END IF
+            END IF
+            IF( RIGHTV ) THEN
+*
+*              Compute right eigenvector.
+*
+               CALL SLAEIN( .TRUE., NOINIT, KR, H, LDH, WKR, WKI,
+     $                      VR( 1, KSR ), VR( 1, KSI ), WORK, LDWORK,
+     $                      WORK( N*N+N+1 ), EPS3, SMLNUM, BIGNUM,
+     $                      IINFO )
+               IF( IINFO.GT.0 ) THEN
+                  IF( PAIR ) THEN
+                     INFO = INFO + 2
+                  ELSE
+                     INFO = INFO + 1
+                  END IF
+                  IFAILR( KSR ) = K
+                  IFAILR( KSI ) = K
+               ELSE
+                  IFAILR( KSR ) = 0
+                  IFAILR( KSI ) = 0
+               END IF
+               DO 100 I = KR + 1, N
+                  VR( I, KSR ) = ZERO
+  100          CONTINUE
+               IF( PAIR ) THEN
+                  DO 110 I = KR + 1, N
+                     VR( I, KSI ) = ZERO
+  110             CONTINUE
+               END IF
+            END IF
+*
+            IF( PAIR ) THEN
+               KSR = KSR + 2
+            ELSE
+               KSR = KSR + 1
+            END IF
+         END IF
+  120 CONTINUE
+*
+      RETURN
+*
+*     End of SHSEIN
+*
+      END
diff --git a/costa/native/external/lapack/shseqr.f b/costa/native/external/lapack/shseqr.f
new file mode 100644
index 000000000..00c08ab39
--- /dev/null
+++ b/costa/native/external/lapack/shseqr.f
@@ -0,0 +1,467 @@
+      SUBROUTINE SHSEQR( JOB, COMPZ, N, ILO, IHI, H, LDH, WR, WI, Z,
+     $                   LDZ, WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          COMPZ, JOB
+      INTEGER            IHI, ILO, INFO, LDH, LDZ, LWORK, N
+*     ..
+*     .. Array Arguments ..
+      REAL               H( LDH, * ), WI( * ), WORK( * ), WR( * ),
+     $                   Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SHSEQR computes the eigenvalues of a real upper Hessenberg matrix H
+*  and, optionally, the matrices T and Z from the Schur decomposition
+*  H = Z T Z**T, where T is an upper quasi-triangular matrix (the Schur
+*  form), and Z is the orthogonal matrix of Schur vectors.
+*
+*  Optionally Z may be postmultiplied into an input orthogonal matrix Q,
+*  so that this routine can give the Schur factorization of a matrix A
+*  which has been reduced to the Hessenberg form H by the orthogonal
+*  matrix Q:  A = Q*H*Q**T = (QZ)*T*(QZ)**T.
+*
+*  Arguments
+*  =========
+*
+*  JOB     (input) CHARACTER*1
+*          = 'E':  compute eigenvalues only;
+*          = 'S':  compute eigenvalues and the Schur form T.
+*
+*  COMPZ   (input) CHARACTER*1
+*          = 'N':  no Schur vectors are computed;
+*          = 'I':  Z is initialized to the unit matrix and the matrix Z
+*                  of Schur vectors of H is returned;
+*          = 'V':  Z must contain an orthogonal matrix Q on entry, and
+*                  the product Q*Z is returned.
+*
+*  N       (input) INTEGER
+*          The order of the matrix H.  N >= 0.
+*
+*  ILO     (input) INTEGER
+*  IHI     (input) INTEGER
+*          It is assumed that H is already upper triangular in rows
+*          and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally
+*          set by a previous call to SGEBAL, and then passed to SGEHRD
+*          when the matrix output by SGEBAL is reduced to Hessenberg
+*          form. Otherwise ILO and IHI should be set to 1 and N
+*          respectively.
+*          1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
+*
+*  H       (input/output) REAL array, dimension (LDH,N)
+*          On entry, the upper Hessenberg matrix H.
+*          On exit, if JOB = 'S', H contains the upper quasi-triangular
+*          matrix T from the Schur decomposition (the Schur form);
+*          2-by-2 diagonal blocks (corresponding to complex conjugate
+*          pairs of eigenvalues) are returned in standard form, with
+*          H(i,i) = H(i+1,i+1) and H(i+1,i)*H(i,i+1) < 0. If JOB = 'E',
+*          the contents of H are unspecified on exit.
+*
+*  LDH     (input) INTEGER
+*          The leading dimension of the array H. LDH >= max(1,N).
+*
+*  WR      (output) REAL array, dimension (N)
+*  WI      (output) REAL array, dimension (N)
+*          The real and imaginary parts, respectively, of the computed
+*          eigenvalues. If two eigenvalues are computed as a complex
+*          conjugate pair, they are stored in consecutive elements of
+*          WR and WI, say the i-th and (i+1)th, with WI(i) > 0 and
+*          WI(i+1) < 0. If JOB = 'S', the eigenvalues are stored in the
+*          same order as on the diagonal of the Schur form returned in
+*          H, with WR(i) = H(i,i) and, if H(i:i+1,i:i+1) is a 2-by-2
+*          diagonal block, WI(i) = sqrt(H(i+1,i)*H(i,i+1)) and
+*          WI(i+1) = -WI(i).
+*
+*  Z       (input/output) REAL array, dimension (LDZ,N)
+*          If COMPZ = 'N': Z is not referenced.
+*          If COMPZ = 'I': on entry, Z need not be set, and on exit, Z
+*          contains the orthogonal matrix Z of the Schur vectors of H.
+*          If COMPZ = 'V': on entry Z must contain an N-by-N matrix Q,
+*          which is assumed to be equal to the unit matrix except for
+*          the submatrix Z(ILO:IHI,ILO:IHI); on exit Z contains Q*Z.
+*          Normally Q is the orthogonal matrix generated by SORGHR after
+*          the call to SGEHRD which formed the Hessenberg matrix H.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.
+*          LDZ >= max(1,N) if COMPZ = 'I' or 'V'; LDZ >= 1 otherwise.
+*
+*  WORK    (workspace/output) REAL array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.  LWORK >= max(1,N).
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, SHSEQR failed to compute all of the
+*                eigenvalues in a total of 30*(IHI-ILO+1) iterations;
+*                elements 1:ilo-1 and i+1:n of WR and WI contain those
+*                eigenvalues which have been successfully computed.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE, TWO
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0, TWO = 2.0E+0 )
+      REAL               CONST
+      PARAMETER          ( CONST = 1.5E+0 )
+      INTEGER            NSMAX, LDS
+      PARAMETER          ( NSMAX = 15, LDS = NSMAX )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            INITZ, LQUERY, WANTT, WANTZ
+      INTEGER            I, I1, I2, IERR, II, ITEMP, ITN, ITS, J, K, L,
+     $                   MAXB, NH, NR, NS, NV
+      REAL               ABSW, OVFL, SMLNUM, TAU, TEMP, TST1, ULP, UNFL
+*     ..
+*     .. Local Arrays ..
+      REAL               S( LDS, NSMAX ), V( NSMAX+1 ), VV( NSMAX+1 )
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV, ISAMAX
+      REAL               SLAMCH, SLANHS, SLAPY2
+      EXTERNAL           LSAME, ILAENV, ISAMAX, SLAMCH, SLANHS, SLAPY2
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SCOPY, SGEMV, SLABAD, SLACPY, SLAHQR, SLARFG,
+     $                   SLARFX, SLASET, SSCAL, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode and test the input parameters
+*
+      WANTT = LSAME( JOB, 'S' )
+      INITZ = LSAME( COMPZ, 'I' )
+      WANTZ = INITZ .OR. LSAME( COMPZ, 'V' )
+*
+      INFO = 0
+      WORK( 1 ) = MAX( 1, N )
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( .NOT.LSAME( JOB, 'E' ) .AND. .NOT.WANTT ) THEN
+         INFO = -1
+      ELSE IF( .NOT.LSAME( COMPZ, 'N' ) .AND. .NOT.WANTZ ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, N ) ) THEN
+         INFO = -4
+      ELSE IF( IHI.LT.MIN( ILO, N ) .OR. IHI.GT.N ) THEN
+         INFO = -5
+      ELSE IF( LDH.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      ELSE IF( LDZ.LT.1 .OR. WANTZ .AND. LDZ.LT.MAX( 1, N ) ) THEN
+         INFO = -11
+      ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN
+         INFO = -13
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SHSEQR', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Initialize Z, if necessary
+*
+      IF( INITZ )
+     $   CALL SLASET( 'Full', N, N, ZERO, ONE, Z, LDZ )
+*
+*     Store the eigenvalues isolated by SGEBAL.
+*
+      DO 10 I = 1, ILO - 1
+         WR( I ) = H( I, I )
+         WI( I ) = ZERO
+   10 CONTINUE
+      DO 20 I = IHI + 1, N
+         WR( I ) = H( I, I )
+         WI( I ) = ZERO
+   20 CONTINUE
+*
+*     Quick return if possible.
+*
+      IF( N.EQ.0 )
+     $   RETURN
+      IF( ILO.EQ.IHI ) THEN
+         WR( ILO ) = H( ILO, ILO )
+         WI( ILO ) = ZERO
+         RETURN
+      END IF
+*
+*     Set rows and columns ILO to IHI to zero below the first
+*     subdiagonal.
+*
+      DO 40 J = ILO, IHI - 2
+         DO 30 I = J + 2, N
+            H( I, J ) = ZERO
+   30    CONTINUE
+   40 CONTINUE
+      NH = IHI - ILO + 1
+*
+*     Determine the order of the multi-shift QR algorithm to be used.
+*
+      NS = ILAENV( 4, 'SHSEQR', JOB // COMPZ, N, ILO, IHI, -1 )
+      MAXB = ILAENV( 8, 'SHSEQR', JOB // COMPZ, N, ILO, IHI, -1 )
+      IF( NS.LE.2 .OR. NS.GT.NH .OR. MAXB.GE.NH ) THEN
+*
+*        Use the standard double-shift algorithm
+*
+         CALL SLAHQR( WANTT, WANTZ, N, ILO, IHI, H, LDH, WR, WI, ILO,
+     $                IHI, Z, LDZ, INFO )
+         RETURN
+      END IF
+      MAXB = MAX( 3, MAXB )
+      NS = MIN( NS, MAXB, NSMAX )
+*
+*     Now 2 < NS <= MAXB < NH.
+*
+*     Set machine-dependent constants for the stopping criterion.
+*     If norm(H) <= sqrt(OVFL), overflow should not occur.
+*
+      UNFL = SLAMCH( 'Safe minimum' )
+      OVFL = ONE / UNFL
+      CALL SLABAD( UNFL, OVFL )
+      ULP = SLAMCH( 'Precision' )
+      SMLNUM = UNFL*( NH / ULP )
+*
+*     I1 and I2 are the indices of the first row and last column of H
+*     to which transformations must be applied. If eigenvalues only are
+*     being computed, I1 and I2 are set inside the main loop.
+*
+      IF( WANTT ) THEN
+         I1 = 1
+         I2 = N
+      END IF
+*
+*     ITN is the total number of multiple-shift QR iterations allowed.
+*
+      ITN = 30*NH
+*
+*     The main loop begins here. I is the loop index and decreases from
+*     IHI to ILO in steps of at most MAXB. Each iteration of the loop
+*     works with the active submatrix in rows and columns L to I.
+*     Eigenvalues I+1 to IHI have already converged. Either L = ILO or
+*     H(L,L-1) is negligible so that the matrix splits.
+*
+      I = IHI
+   50 CONTINUE
+      L = ILO
+      IF( I.LT.ILO )
+     $   GO TO 170
+*
+*     Perform multiple-shift QR iterations on rows and columns ILO to I
+*     until a submatrix of order at most MAXB splits off at the bottom
+*     because a subdiagonal element has become negligible.
+*
+      DO 150 ITS = 0, ITN
+*
+*        Look for a single small subdiagonal element.
+*
+         DO 60 K = I, L + 1, -1
+            TST1 = ABS( H( K-1, K-1 ) ) + ABS( H( K, K ) )
+            IF( TST1.EQ.ZERO )
+     $         TST1 = SLANHS( '1', I-L+1, H( L, L ), LDH, WORK )
+            IF( ABS( H( K, K-1 ) ).LE.MAX( ULP*TST1, SMLNUM ) )
+     $         GO TO 70
+   60    CONTINUE
+   70    CONTINUE
+         L = K
+         IF( L.GT.ILO ) THEN
+*
+*           H(L,L-1) is negligible.
+*
+            H( L, L-1 ) = ZERO
+         END IF
+*
+*        Exit from loop if a submatrix of order <= MAXB has split off.
+*
+         IF( L.GE.I-MAXB+1 )
+     $      GO TO 160
+*
+*        Now the active submatrix is in rows and columns L to I. If
+*        eigenvalues only are being computed, only the active submatrix
+*        need be transformed.
+*
+         IF( .NOT.WANTT ) THEN
+            I1 = L
+            I2 = I
+         END IF
+*
+         IF( ITS.EQ.20 .OR. ITS.EQ.30 ) THEN
+*
+*           Exceptional shifts.
+*
+            DO 80 II = I - NS + 1, I
+               WR( II ) = CONST*( ABS( H( II, II-1 ) )+
+     $                    ABS( H( II, II ) ) )
+               WI( II ) = ZERO
+   80       CONTINUE
+         ELSE
+*
+*           Use eigenvalues of trailing submatrix of order NS as shifts.
+*
+            CALL SLACPY( 'Full', NS, NS, H( I-NS+1, I-NS+1 ), LDH, S,
+     $                   LDS )
+            CALL SLAHQR( .FALSE., .FALSE., NS, 1, NS, S, LDS,
+     $                   WR( I-NS+1 ), WI( I-NS+1 ), 1, NS, Z, LDZ,
+     $                   IERR )
+            IF( IERR.GT.0 ) THEN
+*
+*              If SLAHQR failed to compute all NS eigenvalues, use the
+*              unconverged diagonal elements as the remaining shifts.
+*
+               DO 90 II = 1, IERR
+                  WR( I-NS+II ) = S( II, II )
+                  WI( I-NS+II ) = ZERO
+   90          CONTINUE
+            END IF
+         END IF
+*
+*        Form the first column of (G-w(1)) (G-w(2)) . . . (G-w(ns))
+*        where G is the Hessenberg submatrix H(L:I,L:I) and w is
+*        the vector of shifts (stored in WR and WI). The result is
+*        stored in the local array V.
+*
+         V( 1 ) = ONE
+         DO 100 II = 2, NS + 1
+            V( II ) = ZERO
+  100    CONTINUE
+         NV = 1
+         DO 120 J = I - NS + 1, I
+            IF( WI( J ).GE.ZERO ) THEN
+               IF( WI( J ).EQ.ZERO ) THEN
+*
+*                 real shift
+*
+                  CALL SCOPY( NV+1, V, 1, VV, 1 )
+                  CALL SGEMV( 'No transpose', NV+1, NV, ONE, H( L, L ),
+     $                        LDH, VV, 1, -WR( J ), V, 1 )
+                  NV = NV + 1
+               ELSE IF( WI( J ).GT.ZERO ) THEN
+*
+*                 complex conjugate pair of shifts
+*
+                  CALL SCOPY( NV+1, V, 1, VV, 1 )
+                  CALL SGEMV( 'No transpose', NV+1, NV, ONE, H( L, L ),
+     $                        LDH, V, 1, -TWO*WR( J ), VV, 1 )
+                  ITEMP = ISAMAX( NV+1, VV, 1 )
+                  TEMP = ONE / MAX( ABS( VV( ITEMP ) ), SMLNUM )
+                  CALL SSCAL( NV+1, TEMP, VV, 1 )
+                  ABSW = SLAPY2( WR( J ), WI( J ) )
+                  TEMP = ( TEMP*ABSW )*ABSW
+                  CALL SGEMV( 'No transpose', NV+2, NV+1, ONE,
+     $                        H( L, L ), LDH, VV, 1, TEMP, V, 1 )
+                  NV = NV + 2
+               END IF
+*
+*              Scale V(1:NV) so that max(abs(V(i))) = 1. If V is zero,
+*              reset it to the unit vector.
+*
+               ITEMP = ISAMAX( NV, V, 1 )
+               TEMP = ABS( V( ITEMP ) )
+               IF( TEMP.EQ.ZERO ) THEN
+                  V( 1 ) = ONE
+                  DO 110 II = 2, NV
+                     V( II ) = ZERO
+  110             CONTINUE
+               ELSE
+                  TEMP = MAX( TEMP, SMLNUM )
+                  CALL SSCAL( NV, ONE / TEMP, V, 1 )
+               END IF
+            END IF
+  120    CONTINUE
+*
+*        Multiple-shift QR step
+*
+         DO 140 K = L, I - 1
+*
+*           The first iteration of this loop determines a reflection G
+*           from the vector V and applies it from left and right to H,
+*           thus creating a nonzero bulge below the subdiagonal.
+*
+*           Each subsequent iteration determines a reflection G to
+*           restore the Hessenberg form in the (K-1)th column, and thus
+*           chases the bulge one step toward the bottom of the active
+*           submatrix. NR is the order of G.
+*
+            NR = MIN( NS+1, I-K+1 )
+            IF( K.GT.L )
+     $         CALL SCOPY( NR, H( K, K-1 ), 1, V, 1 )
+            CALL SLARFG( NR, V( 1 ), V( 2 ), 1, TAU )
+            IF( K.GT.L ) THEN
+               H( K, K-1 ) = V( 1 )
+               DO 130 II = K + 1, I
+                  H( II, K-1 ) = ZERO
+  130          CONTINUE
+            END IF
+            V( 1 ) = ONE
+*
+*           Apply G from the left to transform the rows of the matrix in
+*           columns K to I2.
+*
+            CALL SLARFX( 'Left', NR, I2-K+1, V, TAU, H( K, K ), LDH,
+     $                   WORK )
+*
+*           Apply G from the right to transform the columns of the
+*           matrix in rows I1 to min(K+NR,I).
+*
+            CALL SLARFX( 'Right', MIN( K+NR, I )-I1+1, NR, V, TAU,
+     $                   H( I1, K ), LDH, WORK )
+*
+            IF( WANTZ ) THEN
+*
+*              Accumulate transformations in the matrix Z
+*
+               CALL SLARFX( 'Right', NH, NR, V, TAU, Z( ILO, K ), LDZ,
+     $                      WORK )
+            END IF
+  140    CONTINUE
+*
+  150 CONTINUE
+*
+*     Failure to converge in remaining number of iterations
+*
+      INFO = I
+      RETURN
+*
+  160 CONTINUE
+*
+*     A submatrix of order <= MAXB in rows and columns L to I has split
+*     off. Use the double-shift QR algorithm to handle it.
+*
+      CALL SLAHQR( WANTT, WANTZ, N, L, I, H, LDH, WR, WI, ILO, IHI, Z,
+     $             LDZ, INFO )
+      IF( INFO.GT.0 )
+     $   RETURN
+*
+*     Decrement number of remaining iterations, and return to start of
+*     the main loop with a new value of I.
+*
+      ITN = ITN - ITS
+      I = L - 1
+      GO TO 50
+*
+  170 CONTINUE
+      WORK( 1 ) = MAX( 1, N )
+      RETURN
+*
+*     End of SHSEQR
+*
+      END
diff --git a/costa/native/external/lapack/slabad.f b/costa/native/external/lapack/slabad.f
new file mode 100644
index 000000000..f5c8a0bae
--- /dev/null
+++ b/costa/native/external/lapack/slabad.f
@@ -0,0 +1,56 @@
+      SUBROUTINE SLABAD( SMALL, LARGE )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      REAL               LARGE, SMALL
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLABAD takes as input the values computed by SLAMCH for underflow and
+*  overflow, and returns the square root of each of these values if the
+*  log of LARGE is sufficiently large.  This subroutine is intended to
+*  identify machines with a large exponent range, such as the Crays, and
+*  redefine the underflow and overflow limits to be the square roots of
+*  the values computed by SLAMCH.  This subroutine is needed because
+*  SLAMCH does not compensate for poor arithmetic in the upper half of
+*  the exponent range, as is found on a Cray.
+*
+*  Arguments
+*  =========
+*
+*  SMALL   (input/output) REAL
+*          On entry, the underflow threshold as computed by SLAMCH.
+*          On exit, if LOG10(LARGE) is sufficiently large, the square
+*          root of SMALL, otherwise unchanged.
+*
+*  LARGE   (input/output) REAL
+*          On entry, the overflow threshold as computed by SLAMCH.
+*          On exit, if LOG10(LARGE) is sufficiently large, the square
+*          root of LARGE, otherwise unchanged.
+*
+*  =====================================================================
+*
+*     .. Intrinsic Functions ..
+      INTRINSIC          LOG10, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     If it looks like we're on a Cray, take the square root of
+*     SMALL and LARGE to avoid overflow and underflow problems.
+*
+      IF( LOG10( LARGE ).GT.2000. ) THEN
+         SMALL = SQRT( SMALL )
+         LARGE = SQRT( LARGE )
+      END IF
+*
+      RETURN
+*
+*     End of SLABAD
+*
+      END
diff --git a/costa/native/external/lapack/slabrd.f b/costa/native/external/lapack/slabrd.f
new file mode 100644
index 000000000..d6da46050
--- /dev/null
+++ b/costa/native/external/lapack/slabrd.f
@@ -0,0 +1,291 @@
+      SUBROUTINE SLABRD( M, N, NB, A, LDA, D, E, TAUQ, TAUP, X, LDX, Y,
+     $                   LDY )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      INTEGER            LDA, LDX, LDY, M, N, NB
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), D( * ), E( * ), TAUP( * ),
+     $                   TAUQ( * ), X( LDX, * ), Y( LDY, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLABRD reduces the first NB rows and columns of a real general
+*  m by n matrix A to upper or lower bidiagonal form by an orthogonal
+*  transformation Q' * A * P, and returns the matrices X and Y which
+*  are needed to apply the transformation to the unreduced part of A.
+*
+*  If m >= n, A is reduced to upper bidiagonal form; if m < n, to lower
+*  bidiagonal form.
+*
+*  This is an auxiliary routine called by SGEBRD
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows in the matrix A.
+*
+*  N       (input) INTEGER
+*          The number of columns in the matrix A.
+*
+*  NB      (input) INTEGER
+*          The number of leading rows and columns of A to be reduced.
+*
+*  A       (input/output) REAL array, dimension (LDA,N)
+*          On entry, the m by n general matrix to be reduced.
+*          On exit, the first NB rows and columns of the matrix are
+*          overwritten; the rest of the array is unchanged.
+*          If m >= n, elements on and below the diagonal in the first NB
+*            columns, with the array TAUQ, represent the orthogonal
+*            matrix Q as a product of elementary reflectors; and
+*            elements above the diagonal in the first NB rows, with the
+*            array TAUP, represent the orthogonal matrix P as a product
+*            of elementary reflectors.
+*          If m < n, elements below the diagonal in the first NB
+*            columns, with the array TAUQ, represent the orthogonal
+*            matrix Q as a product of elementary reflectors, and
+*            elements on and above the diagonal in the first NB rows,
+*            with the array TAUP, represent the orthogonal matrix P as
+*            a product of elementary reflectors.
+*          See Further Details.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  D       (output) REAL array, dimension (NB)
+*          The diagonal elements of the first NB rows and columns of
+*          the reduced matrix.  D(i) = A(i,i).
+*
+*  E       (output) REAL array, dimension (NB)
+*          The off-diagonal elements of the first NB rows and columns of
+*          the reduced matrix.
+*
+*  TAUQ    (output) REAL array dimension (NB)
+*          The scalar factors of the elementary reflectors which
+*          represent the orthogonal matrix Q. See Further Details.
+*
+*  TAUP    (output) REAL array, dimension (NB)
+*          The scalar factors of the elementary reflectors which
+*          represent the orthogonal matrix P. See Further Details.
+*
+*  X       (output) REAL array, dimension (LDX,NB)
+*          The m-by-nb matrix X required to update the unreduced part
+*          of A.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X. LDX >= M.
+*
+*  Y       (output) REAL array, dimension (LDY,NB)
+*          The n-by-nb matrix Y required to update the unreduced part
+*          of A.
+*
+*  LDY     (output) INTEGER
+*          The leading dimension of the array Y. LDY >= N.
+*
+*  Further Details
+*  ===============
+*
+*  The matrices Q and P are represented as products of elementary
+*  reflectors:
+*
+*     Q = H(1) H(2) . . . H(nb)  and  P = G(1) G(2) . . . G(nb)
+*
+*  Each H(i) and G(i) has the form:
+*
+*     H(i) = I - tauq * v * v'  and G(i) = I - taup * u * u'
+*
+*  where tauq and taup are real scalars, and v and u are real vectors.
+*
+*  If m >= n, v(1:i-1) = 0, v(i) = 1, and v(i:m) is stored on exit in
+*  A(i:m,i); u(1:i) = 0, u(i+1) = 1, and u(i+1:n) is stored on exit in
+*  A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i).
+*
+*  If m < n, v(1:i) = 0, v(i+1) = 1, and v(i+1:m) is stored on exit in
+*  A(i+2:m,i); u(1:i-1) = 0, u(i) = 1, and u(i:n) is stored on exit in
+*  A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i).
+*
+*  The elements of the vectors v and u together form the m-by-nb matrix
+*  V and the nb-by-n matrix U' which are needed, with X and Y, to apply
+*  the transformation to the unreduced part of the matrix, using a block
+*  update of the form:  A := A - V*Y' - X*U'.
+*
+*  The contents of A on exit are illustrated by the following examples
+*  with nb = 2:
+*
+*  m = 6 and n = 5 (m > n):          m = 5 and n = 6 (m < n):
+*
+*    (  1   1   u1  u1  u1 )           (  1   u1  u1  u1  u1  u1 )
+*    (  v1  1   1   u2  u2 )           (  1   1   u2  u2  u2  u2 )
+*    (  v1  v2  a   a   a  )           (  v1  1   a   a   a   a  )
+*    (  v1  v2  a   a   a  )           (  v1  v2  a   a   a   a  )
+*    (  v1  v2  a   a   a  )           (  v1  v2  a   a   a   a  )
+*    (  v1  v2  a   a   a  )
+*
+*  where a denotes an element of the original matrix which is unchanged,
+*  vi denotes an element of the vector defining H(i), and ui an element
+*  of the vector defining G(i).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E0, ONE = 1.0E0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SGEMV, SLARFG, SSCAL
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Quick return if possible
+*
+      IF( M.LE.0 .OR. N.LE.0 )
+     $   RETURN
+*
+      IF( M.GE.N ) THEN
+*
+*        Reduce to upper bidiagonal form
+*
+         DO 10 I = 1, NB
+*
+*           Update A(i:m,i)
+*
+            CALL SGEMV( 'No transpose', M-I+1, I-1, -ONE, A( I, 1 ),
+     $                  LDA, Y( I, 1 ), LDY, ONE, A( I, I ), 1 )
+            CALL SGEMV( 'No transpose', M-I+1, I-1, -ONE, X( I, 1 ),
+     $                  LDX, A( 1, I ), 1, ONE, A( I, I ), 1 )
+*
+*           Generate reflection Q(i) to annihilate A(i+1:m,i)
+*
+            CALL SLARFG( M-I+1, A( I, I ), A( MIN( I+1, M ), I ), 1,
+     $                   TAUQ( I ) )
+            D( I ) = A( I, I )
+            IF( I.LT.N ) THEN
+               A( I, I ) = ONE
+*
+*              Compute Y(i+1:n,i)
+*
+               CALL SGEMV( 'Transpose', M-I+1, N-I, ONE, A( I, I+1 ),
+     $                     LDA, A( I, I ), 1, ZERO, Y( I+1, I ), 1 )
+               CALL SGEMV( 'Transpose', M-I+1, I-1, ONE, A( I, 1 ), LDA,
+     $                     A( I, I ), 1, ZERO, Y( 1, I ), 1 )
+               CALL SGEMV( 'No transpose', N-I, I-1, -ONE, Y( I+1, 1 ),
+     $                     LDY, Y( 1, I ), 1, ONE, Y( I+1, I ), 1 )
+               CALL SGEMV( 'Transpose', M-I+1, I-1, ONE, X( I, 1 ), LDX,
+     $                     A( I, I ), 1, ZERO, Y( 1, I ), 1 )
+               CALL SGEMV( 'Transpose', I-1, N-I, -ONE, A( 1, I+1 ),
+     $                     LDA, Y( 1, I ), 1, ONE, Y( I+1, I ), 1 )
+               CALL SSCAL( N-I, TAUQ( I ), Y( I+1, I ), 1 )
+*
+*              Update A(i,i+1:n)
+*
+               CALL SGEMV( 'No transpose', N-I, I, -ONE, Y( I+1, 1 ),
+     $                     LDY, A( I, 1 ), LDA, ONE, A( I, I+1 ), LDA )
+               CALL SGEMV( 'Transpose', I-1, N-I, -ONE, A( 1, I+1 ),
+     $                     LDA, X( I, 1 ), LDX, ONE, A( I, I+1 ), LDA )
+*
+*              Generate reflection P(i) to annihilate A(i,i+2:n)
+*
+               CALL SLARFG( N-I, A( I, I+1 ), A( I, MIN( I+2, N ) ),
+     $                      LDA, TAUP( I ) )
+               E( I ) = A( I, I+1 )
+               A( I, I+1 ) = ONE
+*
+*              Compute X(i+1:m,i)
+*
+               CALL SGEMV( 'No transpose', M-I, N-I, ONE, A( I+1, I+1 ),
+     $                     LDA, A( I, I+1 ), LDA, ZERO, X( I+1, I ), 1 )
+               CALL SGEMV( 'Transpose', N-I, I, ONE, Y( I+1, 1 ), LDY,
+     $                     A( I, I+1 ), LDA, ZERO, X( 1, I ), 1 )
+               CALL SGEMV( 'No transpose', M-I, I, -ONE, A( I+1, 1 ),
+     $                     LDA, X( 1, I ), 1, ONE, X( I+1, I ), 1 )
+               CALL SGEMV( 'No transpose', I-1, N-I, ONE, A( 1, I+1 ),
+     $                     LDA, A( I, I+1 ), LDA, ZERO, X( 1, I ), 1 )
+               CALL SGEMV( 'No transpose', M-I, I-1, -ONE, X( I+1, 1 ),
+     $                     LDX, X( 1, I ), 1, ONE, X( I+1, I ), 1 )
+               CALL SSCAL( M-I, TAUP( I ), X( I+1, I ), 1 )
+            END IF
+   10    CONTINUE
+      ELSE
+*
+*        Reduce to lower bidiagonal form
+*
+         DO 20 I = 1, NB
+*
+*           Update A(i,i:n)
+*
+            CALL SGEMV( 'No transpose', N-I+1, I-1, -ONE, Y( I, 1 ),
+     $                  LDY, A( I, 1 ), LDA, ONE, A( I, I ), LDA )
+            CALL SGEMV( 'Transpose', I-1, N-I+1, -ONE, A( 1, I ), LDA,
+     $                  X( I, 1 ), LDX, ONE, A( I, I ), LDA )
+*
+*           Generate reflection P(i) to annihilate A(i,i+1:n)
+*
+            CALL SLARFG( N-I+1, A( I, I ), A( I, MIN( I+1, N ) ), LDA,
+     $                   TAUP( I ) )
+            D( I ) = A( I, I )
+            IF( I.LT.M ) THEN
+               A( I, I ) = ONE
+*
+*              Compute X(i+1:m,i)
+*
+               CALL SGEMV( 'No transpose', M-I, N-I+1, ONE, A( I+1, I ),
+     $                     LDA, A( I, I ), LDA, ZERO, X( I+1, I ), 1 )
+               CALL SGEMV( 'Transpose', N-I+1, I-1, ONE, Y( I, 1 ), LDY,
+     $                     A( I, I ), LDA, ZERO, X( 1, I ), 1 )
+               CALL SGEMV( 'No transpose', M-I, I-1, -ONE, A( I+1, 1 ),
+     $                     LDA, X( 1, I ), 1, ONE, X( I+1, I ), 1 )
+               CALL SGEMV( 'No transpose', I-1, N-I+1, ONE, A( 1, I ),
+     $                     LDA, A( I, I ), LDA, ZERO, X( 1, I ), 1 )
+               CALL SGEMV( 'No transpose', M-I, I-1, -ONE, X( I+1, 1 ),
+     $                     LDX, X( 1, I ), 1, ONE, X( I+1, I ), 1 )
+               CALL SSCAL( M-I, TAUP( I ), X( I+1, I ), 1 )
+*
+*              Update A(i+1:m,i)
+*
+               CALL SGEMV( 'No transpose', M-I, I-1, -ONE, A( I+1, 1 ),
+     $                     LDA, Y( I, 1 ), LDY, ONE, A( I+1, I ), 1 )
+               CALL SGEMV( 'No transpose', M-I, I, -ONE, X( I+1, 1 ),
+     $                     LDX, A( 1, I ), 1, ONE, A( I+1, I ), 1 )
+*
+*              Generate reflection Q(i) to annihilate A(i+2:m,i)
+*
+               CALL SLARFG( M-I, A( I+1, I ), A( MIN( I+2, M ), I ), 1,
+     $                      TAUQ( I ) )
+               E( I ) = A( I+1, I )
+               A( I+1, I ) = ONE
+*
+*              Compute Y(i+1:n,i)
+*
+               CALL SGEMV( 'Transpose', M-I, N-I, ONE, A( I+1, I+1 ),
+     $                     LDA, A( I+1, I ), 1, ZERO, Y( I+1, I ), 1 )
+               CALL SGEMV( 'Transpose', M-I, I-1, ONE, A( I+1, 1 ), LDA,
+     $                     A( I+1, I ), 1, ZERO, Y( 1, I ), 1 )
+               CALL SGEMV( 'No transpose', N-I, I-1, -ONE, Y( I+1, 1 ),
+     $                     LDY, Y( 1, I ), 1, ONE, Y( I+1, I ), 1 )
+               CALL SGEMV( 'Transpose', M-I, I, ONE, X( I+1, 1 ), LDX,
+     $                     A( I+1, I ), 1, ZERO, Y( 1, I ), 1 )
+               CALL SGEMV( 'Transpose', I, N-I, -ONE, A( 1, I+1 ), LDA,
+     $                     Y( 1, I ), 1, ONE, Y( I+1, I ), 1 )
+               CALL SSCAL( N-I, TAUQ( I ), Y( I+1, I ), 1 )
+            END IF
+   20    CONTINUE
+      END IF
+      RETURN
+*
+*     End of SLABRD
+*
+      END
diff --git a/costa/native/external/lapack/slacon.f b/costa/native/external/lapack/slacon.f
new file mode 100644
index 000000000..719f87e9c
--- /dev/null
+++ b/costa/native/external/lapack/slacon.f
@@ -0,0 +1,204 @@
+      SUBROUTINE SLACON( N, V, X, ISGN, EST, KASE )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      INTEGER            KASE, N
+      REAL               EST
+*     ..
+*     .. Array Arguments ..
+      INTEGER            ISGN( * )
+      REAL               V( * ), X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLACON estimates the 1-norm of a square, real matrix A.
+*  Reverse communication is used for evaluating matrix-vector products.
+*
+*  Arguments
+*  =========
+*
+*  N      (input) INTEGER
+*         The order of the matrix.  N >= 1.
+*
+*  V      (workspace) REAL array, dimension (N)
+*         On the final return, V = A*W,  where  EST = norm(V)/norm(W)
+*         (W is not returned).
+*
+*  X      (input/output) REAL array, dimension (N)
+*         On an intermediate return, X should be overwritten by
+*               A * X,   if KASE=1,
+*               A' * X,  if KASE=2,
+*         and SLACON must be re-called with all the other parameters
+*         unchanged.
+*
+*  ISGN   (workspace) INTEGER array, dimension (N)
+*
+*  EST    (output) REAL
+*         An estimate (a lower bound) for norm(A).
+*
+*  KASE   (input/output) INTEGER
+*         On the initial call to SLACON, KASE should be 0.
+*         On an intermediate return, KASE will be 1 or 2, indicating
+*         whether X should be overwritten by A * X  or A' * X.
+*         On the final return from SLACON, KASE will again be 0.
+*
+*  Further Details
+*  ======= =======
+*
+*  Contributed by Nick Higham, University of Manchester.
+*  Originally named SONEST, dated March 16, 1988.
+*
+*  Reference: N.J. Higham, "FORTRAN codes for estimating the one-norm of
+*  a real or complex matrix, with applications to condition estimation",
+*  ACM Trans. Math. Soft., vol. 14, no. 4, pp. 381-396, December 1988.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            ITMAX
+      PARAMETER          ( ITMAX = 5 )
+      REAL               ZERO, ONE, TWO
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0, TWO = 2.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, ITER, J, JLAST, JUMP
+      REAL               ALTSGN, ESTOLD, TEMP
+*     ..
+*     .. External Functions ..
+      INTEGER            ISAMAX
+      REAL               SASUM
+      EXTERNAL           ISAMAX, SASUM
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SCOPY
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, NINT, REAL, SIGN
+*     ..
+*     .. Save statement ..
+      SAVE
+*     ..
+*     .. Executable Statements ..
+*
+      IF( KASE.EQ.0 ) THEN
+         DO 10 I = 1, N
+            X( I ) = ONE / REAL( N )
+   10    CONTINUE
+         KASE = 1
+         JUMP = 1
+         RETURN
+      END IF
+*
+      GO TO ( 20, 40, 70, 110, 140 )JUMP
+*
+*     ................ ENTRY   (JUMP = 1)
+*     FIRST ITERATION.  X HAS BEEN OVERWRITTEN BY A*X.
+*
+   20 CONTINUE
+      IF( N.EQ.1 ) THEN
+         V( 1 ) = X( 1 )
+         EST = ABS( V( 1 ) )
+*        ... QUIT
+         GO TO 150
+      END IF
+      EST = SASUM( N, X, 1 )
+*
+      DO 30 I = 1, N
+         X( I ) = SIGN( ONE, X( I ) )
+         ISGN( I ) = NINT( X( I ) )
+   30 CONTINUE
+      KASE = 2
+      JUMP = 2
+      RETURN
+*
+*     ................ ENTRY   (JUMP = 2)
+*     FIRST ITERATION.  X HAS BEEN OVERWRITTEN BY TRANSPOSE(A)*X.
+*
+   40 CONTINUE
+      J = ISAMAX( N, X, 1 )
+      ITER = 2
+*
+*     MAIN LOOP - ITERATIONS 2,3,...,ITMAX.
+*
+   50 CONTINUE
+      DO 60 I = 1, N
+         X( I ) = ZERO
+   60 CONTINUE
+      X( J ) = ONE
+      KASE = 1
+      JUMP = 3
+      RETURN
+*
+*     ................ ENTRY   (JUMP = 3)
+*     X HAS BEEN OVERWRITTEN BY A*X.
+*
+   70 CONTINUE
+      CALL SCOPY( N, X, 1, V, 1 )
+      ESTOLD = EST
+      EST = SASUM( N, V, 1 )
+      DO 80 I = 1, N
+         IF( NINT( SIGN( ONE, X( I ) ) ).NE.ISGN( I ) )
+     $      GO TO 90
+   80 CONTINUE
+*     REPEATED SIGN VECTOR DETECTED, HENCE ALGORITHM HAS CONVERGED.
+      GO TO 120
+*
+   90 CONTINUE
+*     TEST FOR CYCLING.
+      IF( EST.LE.ESTOLD )
+     $   GO TO 120
+*
+      DO 100 I = 1, N
+         X( I ) = SIGN( ONE, X( I ) )
+         ISGN( I ) = NINT( X( I ) )
+  100 CONTINUE
+      KASE = 2
+      JUMP = 4
+      RETURN
+*
+*     ................ ENTRY   (JUMP = 4)
+*     X HAS BEEN OVERWRITTEN BY TRANSPOSE(A)*X.
+*
+  110 CONTINUE
+      JLAST = J
+      J = ISAMAX( N, X, 1 )
+      IF( ( X( JLAST ).NE.ABS( X( J ) ) ) .AND. ( ITER.LT.ITMAX ) ) THEN
+         ITER = ITER + 1
+         GO TO 50
+      END IF
+*
+*     ITERATION COMPLETE.  FINAL STAGE.
+*
+  120 CONTINUE
+      ALTSGN = ONE
+      DO 130 I = 1, N
+         X( I ) = ALTSGN*( ONE+REAL( I-1 ) / REAL( N-1 ) )
+         ALTSGN = -ALTSGN
+  130 CONTINUE
+      KASE = 1
+      JUMP = 5
+      RETURN
+*
+*     ................ ENTRY   (JUMP = 5)
+*     X HAS BEEN OVERWRITTEN BY A*X.
+*
+  140 CONTINUE
+      TEMP = TWO*( SASUM( N, X, 1 ) / REAL( 3*N ) )
+      IF( TEMP.GT.EST ) THEN
+         CALL SCOPY( N, X, 1, V, 1 )
+         EST = TEMP
+      END IF
+*
+  150 CONTINUE
+      KASE = 0
+      RETURN
+*
+*     End of SLACON
+*
+      END
diff --git a/costa/native/external/lapack/slacpy.f b/costa/native/external/lapack/slacpy.f
new file mode 100644
index 000000000..ac22a9c6e
--- /dev/null
+++ b/costa/native/external/lapack/slacpy.f
@@ -0,0 +1,88 @@
+      SUBROUTINE SLACPY( UPLO, M, N, A, LDA, B, LDB )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            LDA, LDB, M, N
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLACPY copies all or part of a two-dimensional matrix A to another
+*  matrix B.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies the part of the matrix A to be copied to B.
+*          = 'U':      Upper triangular part
+*          = 'L':      Lower triangular part
+*          Otherwise:  All of the matrix A
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  A       (input) REAL array, dimension (LDA,N)
+*          The m by n matrix A.  If UPLO = 'U', only the upper triangle
+*          or trapezoid is accessed; if UPLO = 'L', only the lower
+*          triangle or trapezoid is accessed.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  B       (output) REAL array, dimension (LDB,N)
+*          On exit, B = A in the locations specified by UPLO.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,M).
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      INTEGER            I, J
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MIN
+*     ..
+*     .. Executable Statements ..
+*
+      IF( LSAME( UPLO, 'U' ) ) THEN
+         DO 20 J = 1, N
+            DO 10 I = 1, MIN( J, M )
+               B( I, J ) = A( I, J )
+   10       CONTINUE
+   20    CONTINUE
+      ELSE IF( LSAME( UPLO, 'L' ) ) THEN
+         DO 40 J = 1, N
+            DO 30 I = J, M
+               B( I, J ) = A( I, J )
+   30       CONTINUE
+   40    CONTINUE
+      ELSE
+         DO 60 J = 1, N
+            DO 50 I = 1, M
+               B( I, J ) = A( I, J )
+   50       CONTINUE
+   60    CONTINUE
+      END IF
+      RETURN
+*
+*     End of SLACPY
+*
+      END
diff --git a/costa/native/external/lapack/sladiv.f b/costa/native/external/lapack/sladiv.f
new file mode 100644
index 000000000..f6354c842
--- /dev/null
+++ b/costa/native/external/lapack/sladiv.f
@@ -0,0 +1,63 @@
+      SUBROUTINE SLADIV( A, B, C, D, P, Q )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      REAL               A, B, C, D, P, Q
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLADIV performs complex division in  real arithmetic
+*
+*                        a + i*b
+*             p + i*q = ---------
+*                        c + i*d
+*
+*  The algorithm is due to Robert L. Smith and can be found
+*  in D. Knuth, The art of Computer Programming, Vol.2, p.195
+*
+*  Arguments
+*  =========
+*
+*  A       (input) REAL
+*  B       (input) REAL
+*  C       (input) REAL
+*  D       (input) REAL
+*          The scalars a, b, c, and d in the above expression.
+*
+*  P       (output) REAL
+*  Q       (output) REAL
+*          The scalars p and q in the above expression.
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      REAL               E, F
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS
+*     ..
+*     .. Executable Statements ..
+*
+      IF( ABS( D ).LT.ABS( C ) ) THEN
+         E = D / C
+         F = C + D*E
+         P = ( A+B*E ) / F
+         Q = ( B-A*E ) / F
+      ELSE
+         E = C / D
+         F = D + C*E
+         P = ( B+A*E ) / F
+         Q = ( -A+B*E ) / F
+      END IF
+*
+      RETURN
+*
+*     End of SLADIV
+*
+      END
diff --git a/costa/native/external/lapack/slae2.f b/costa/native/external/lapack/slae2.f
new file mode 100644
index 000000000..ddb8ef446
--- /dev/null
+++ b/costa/native/external/lapack/slae2.f
@@ -0,0 +1,124 @@
+      SUBROUTINE SLAE2( A, B, C, RT1, RT2 )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      REAL               A, B, C, RT1, RT2
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLAE2  computes the eigenvalues of a 2-by-2 symmetric matrix
+*     [  A   B  ]
+*     [  B   C  ].
+*  On return, RT1 is the eigenvalue of larger absolute value, and RT2
+*  is the eigenvalue of smaller absolute value.
+*
+*  Arguments
+*  =========
+*
+*  A       (input) REAL
+*          The (1,1) element of the 2-by-2 matrix.
+*
+*  B       (input) REAL
+*          The (1,2) and (2,1) elements of the 2-by-2 matrix.
+*
+*  C       (input) REAL
+*          The (2,2) element of the 2-by-2 matrix.
+*
+*  RT1     (output) REAL
+*          The eigenvalue of larger absolute value.
+*
+*  RT2     (output) REAL
+*          The eigenvalue of smaller absolute value.
+*
+*  Further Details
+*  ===============
+*
+*  RT1 is accurate to a few ulps barring over/underflow.
+*
+*  RT2 may be inaccurate if there is massive cancellation in the
+*  determinant A*C-B*B; higher precision or correctly rounded or
+*  correctly truncated arithmetic would be needed to compute RT2
+*  accurately in all cases.
+*
+*  Overflow is possible only if RT1 is within a factor of 5 of overflow.
+*  Underflow is harmless if the input data is 0 or exceeds
+*     underflow_threshold / macheps.
+*
+* =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE
+      PARAMETER          ( ONE = 1.0E0 )
+      REAL               TWO
+      PARAMETER          ( TWO = 2.0E0 )
+      REAL               ZERO
+      PARAMETER          ( ZERO = 0.0E0 )
+      REAL               HALF
+      PARAMETER          ( HALF = 0.5E0 )
+*     ..
+*     .. Local Scalars ..
+      REAL               AB, ACMN, ACMX, ADF, DF, RT, SM, TB
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Compute the eigenvalues
+*
+      SM = A + C
+      DF = A - C
+      ADF = ABS( DF )
+      TB = B + B
+      AB = ABS( TB )
+      IF( ABS( A ).GT.ABS( C ) ) THEN
+         ACMX = A
+         ACMN = C
+      ELSE
+         ACMX = C
+         ACMN = A
+      END IF
+      IF( ADF.GT.AB ) THEN
+         RT = ADF*SQRT( ONE+( AB / ADF )**2 )
+      ELSE IF( ADF.LT.AB ) THEN
+         RT = AB*SQRT( ONE+( ADF / AB )**2 )
+      ELSE
+*
+*        Includes case AB=ADF=0
+*
+         RT = AB*SQRT( TWO )
+      END IF
+      IF( SM.LT.ZERO ) THEN
+         RT1 = HALF*( SM-RT )
+*
+*        Order of execution important.
+*        To get fully accurate smaller eigenvalue,
+*        next line needs to be executed in higher precision.
+*
+         RT2 = ( ACMX / RT1 )*ACMN - ( B / RT1 )*B
+      ELSE IF( SM.GT.ZERO ) THEN
+         RT1 = HALF*( SM+RT )
+*
+*        Order of execution important.
+*        To get fully accurate smaller eigenvalue,
+*        next line needs to be executed in higher precision.
+*
+         RT2 = ( ACMX / RT1 )*ACMN - ( B / RT1 )*B
+      ELSE
+*
+*        Includes case RT1 = RT2 = 0
+*
+         RT1 = HALF*RT
+         RT2 = -HALF*RT
+      END IF
+      RETURN
+*
+*     End of SLAE2
+*
+      END
diff --git a/costa/native/external/lapack/slaebz.f b/costa/native/external/lapack/slaebz.f
new file mode 100644
index 000000000..3617e9385
--- /dev/null
+++ b/costa/native/external/lapack/slaebz.f
@@ -0,0 +1,552 @@
+      SUBROUTINE SLAEBZ( IJOB, NITMAX, N, MMAX, MINP, NBMIN, ABSTOL,
+     $                   RELTOL, PIVMIN, D, E, E2, NVAL, AB, C, MOUT,
+     $                   NAB, WORK, IWORK, INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            IJOB, INFO, MINP, MMAX, MOUT, N, NBMIN, NITMAX
+      REAL               ABSTOL, PIVMIN, RELTOL
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * ), NAB( MMAX, * ), NVAL( * )
+      REAL               AB( MMAX, * ), C( * ), D( * ), E( * ), E2( * ),
+     $                   WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLAEBZ contains the iteration loops which compute and use the
+*  function N(w), which is the count of eigenvalues of a symmetric
+*  tridiagonal matrix T less than or equal to its argument  w.  It
+*  performs a choice of two types of loops:
+*
+*  IJOB=1, followed by
+*  IJOB=2: It takes as input a list of intervals and returns a list of
+*          sufficiently small intervals whose union contains the same
+*          eigenvalues as the union of the original intervals.
+*          The input intervals are (AB(j,1),AB(j,2)], j=1,...,MINP.
+*          The output interval (AB(j,1),AB(j,2)] will contain
+*          eigenvalues NAB(j,1)+1,...,NAB(j,2), where 1 <= j <= MOUT.
+*
+*  IJOB=3: It performs a binary search in each input interval
+*          (AB(j,1),AB(j,2)] for a point  w(j)  such that
+*          N(w(j))=NVAL(j), and uses  C(j)  as the starting point of
+*          the search.  If such a w(j) is found, then on output
+*          AB(j,1)=AB(j,2)=w.  If no such w(j) is found, then on output
+*          (AB(j,1),AB(j,2)] will be a small interval containing the
+*          point where N(w) jumps through NVAL(j), unless that point
+*          lies outside the initial interval.
+*
+*  Note that the intervals are in all cases half-open intervals,
+*  i.e., of the form  (a,b] , which includes  b  but not  a .
+*
+*  To avoid underflow, the matrix should be scaled so that its largest
+*  element is no greater than  overflow**(1/2) * underflow**(1/4)
+*  in absolute value.  To assure the most accurate computation
+*  of small eigenvalues, the matrix should be scaled to be
+*  not much smaller than that, either.
+*
+*  See W. Kahan "Accurate Eigenvalues of a Symmetric Tridiagonal
+*  Matrix", Report CS41, Computer Science Dept., Stanford
+*  University, July 21, 1966
+*
+*  Note: the arguments are, in general, *not* checked for unreasonable
+*  values.
+*
+*  Arguments
+*  =========
+*
+*  IJOB    (input) INTEGER
+*          Specifies what is to be done:
+*          = 1:  Compute NAB for the initial intervals.
+*          = 2:  Perform bisection iteration to find eigenvalues of T.
+*          = 3:  Perform bisection iteration to invert N(w), i.e.,
+*                to find a point which has a specified number of
+*                eigenvalues of T to its left.
+*          Other values will cause SLAEBZ to return with INFO=-1.
+*
+*  NITMAX  (input) INTEGER
+*          The maximum number of "levels" of bisection to be
+*          performed, i.e., an interval of width W will not be made
+*          smaller than 2^(-NITMAX) * W.  If not all intervals
+*          have converged after NITMAX iterations, then INFO is set
+*          to the number of non-converged intervals.
+*
+*  N       (input) INTEGER
+*          The dimension n of the tridiagonal matrix T.  It must be at
+*          least 1.
+*
+*  MMAX    (input) INTEGER
+*          The maximum number of intervals.  If more than MMAX intervals
+*          are generated, then SLAEBZ will quit with INFO=MMAX+1.
+*
+*  MINP    (input) INTEGER
+*          The initial number of intervals.  It may not be greater than
+*          MMAX.
+*
+*  NBMIN   (input) INTEGER
+*          The smallest number of intervals that should be processed
+*          using a vector loop.  If zero, then only the scalar loop
+*          will be used.
+*
+*  ABSTOL  (input) REAL
+*          The minimum (absolute) width of an interval.  When an
+*          interval is narrower than ABSTOL, or than RELTOL times the
+*          larger (in magnitude) endpoint, then it is considered to be
+*          sufficiently small, i.e., converged.  This must be at least
+*          zero.
+*
+*  RELTOL  (input) REAL
+*          The minimum relative width of an interval.  When an interval
+*          is narrower than ABSTOL, or than RELTOL times the larger (in
+*          magnitude) endpoint, then it is considered to be
+*          sufficiently small, i.e., converged.  Note: this should
+*          always be at least radix*machine epsilon.
+*
+*  PIVMIN  (input) REAL
+*          The minimum absolute value of a "pivot" in the Sturm
+*          sequence loop.  This *must* be at least  max |e(j)**2| *
+*          safe_min  and at least safe_min, where safe_min is at least
+*          the smallest number that can divide one without overflow.
+*
+*  D       (input) REAL array, dimension (N)
+*          The diagonal elements of the tridiagonal matrix T.
+*
+*  E       (input) REAL array, dimension (N)
+*          The offdiagonal elements of the tridiagonal matrix T in
+*          positions 1 through N-1.  E(N) is arbitrary.
+*
+*  E2      (input) REAL array, dimension (N)
+*          The squares of the offdiagonal elements of the tridiagonal
+*          matrix T.  E2(N) is ignored.
+*
+*  NVAL    (input/output) INTEGER array, dimension (MINP)
+*          If IJOB=1 or 2, not referenced.
+*          If IJOB=3, the desired values of N(w).  The elements of NVAL
+*          will be reordered to correspond with the intervals in AB.
+*          Thus, NVAL(j) on output will not, in general be the same as
+*          NVAL(j) on input, but it will correspond with the interval
+*          (AB(j,1),AB(j,2)] on output.
+*
+*  AB      (input/output) REAL array, dimension (MMAX,2)
+*          The endpoints of the intervals.  AB(j,1) is  a(j), the left
+*          endpoint of the j-th interval, and AB(j,2) is b(j), the
+*          right endpoint of the j-th interval.  The input intervals
+*          will, in general, be modified, split, and reordered by the
+*          calculation.
+*
+*  C       (input/output) REAL array, dimension (MMAX)
+*          If IJOB=1, ignored.
+*          If IJOB=2, workspace.
+*          If IJOB=3, then on input C(j) should be initialized to the
+*          first search point in the binary search.
+*
+*  MOUT    (output) INTEGER
+*          If IJOB=1, the number of eigenvalues in the intervals.
+*          If IJOB=2 or 3, the number of intervals output.
+*          If IJOB=3, MOUT will equal MINP.
+*
+*  NAB     (input/output) INTEGER array, dimension (MMAX,2)
+*          If IJOB=1, then on output NAB(i,j) will be set to N(AB(i,j)).
+*          If IJOB=2, then on input, NAB(i,j) should be set.  It must
+*             satisfy the condition:
+*             N(AB(i,1)) <= NAB(i,1) <= NAB(i,2) <= N(AB(i,2)),
+*             which means that in interval i only eigenvalues
+*             NAB(i,1)+1,...,NAB(i,2) will be considered.  Usually,
+*             NAB(i,j)=N(AB(i,j)), from a previous call to SLAEBZ with
+*             IJOB=1.
+*             On output, NAB(i,j) will contain
+*             max(na(k),min(nb(k),N(AB(i,j)))), where k is the index of
+*             the input interval that the output interval
+*             (AB(j,1),AB(j,2)] came from, and na(k) and nb(k) are the
+*             the input values of NAB(k,1) and NAB(k,2).
+*          If IJOB=3, then on output, NAB(i,j) contains N(AB(i,j)),
+*             unless N(w) > NVAL(i) for all search points  w , in which
+*             case NAB(i,1) will not be modified, i.e., the output
+*             value will be the same as the input value (modulo
+*             reorderings -- see NVAL and AB), or unless N(w) < NVAL(i)
+*             for all search points  w , in which case NAB(i,2) will
+*             not be modified.  Normally, NAB should be set to some
+*             distinctive value(s) before SLAEBZ is called.
+*
+*  WORK    (workspace) REAL array, dimension (MMAX)
+*          Workspace.
+*
+*  IWORK   (workspace) INTEGER array, dimension (MMAX)
+*          Workspace.
+*
+*  INFO    (output) INTEGER
+*          = 0:       All intervals converged.
+*          = 1--MMAX: The last INFO intervals did not converge.
+*          = MMAX+1:  More than MMAX intervals were generated.
+*
+*  Further Details
+*  ===============
+*
+*      This routine is intended to be called only by other LAPACK
+*  routines, thus the interface is less user-friendly.  It is intended
+*  for two purposes:
+*
+*  (a) finding eigenvalues.  In this case, SLAEBZ should have one or
+*      more initial intervals set up in AB, and SLAEBZ should be called
+*      with IJOB=1.  This sets up NAB, and also counts the eigenvalues.
+*      Intervals with no eigenvalues would usually be thrown out at
+*      this point.  Also, if not all the eigenvalues in an interval i
+*      are desired, NAB(i,1) can be increased or NAB(i,2) decreased.
+*      For example, set NAB(i,1)=NAB(i,2)-1 to get the largest
+*      eigenvalue.  SLAEBZ is then called with IJOB=2 and MMAX
+*      no smaller than the value of MOUT returned by the call with
+*      IJOB=1.  After this (IJOB=2) call, eigenvalues NAB(i,1)+1
+*      through NAB(i,2) are approximately AB(i,1) (or AB(i,2)) to the
+*      tolerance specified by ABSTOL and RELTOL.
+*
+*  (b) finding an interval (a',b'] containing eigenvalues w(f),...,w(l).
+*      In this case, start with a Gershgorin interval  (a,b).  Set up
+*      AB to contain 2 search intervals, both initially (a,b).  One
+*      NVAL element should contain  f-1  and the other should contain  l
+*      , while C should contain a and b, resp.  NAB(i,1) should be -1
+*      and NAB(i,2) should be N+1, to flag an error if the desired
+*      interval does not lie in (a,b).  SLAEBZ is then called with
+*      IJOB=3.  On exit, if w(f-1) < w(f), then one of the intervals --
+*      j -- will have AB(j,1)=AB(j,2) and NAB(j,1)=NAB(j,2)=f-1, while
+*      if, to the specified tolerance, w(f-k)=...=w(f+r), k > 0 and r
+*      >= 0, then the interval will have  N(AB(j,1))=NAB(j,1)=f-k and
+*      N(AB(j,2))=NAB(j,2)=f+r.  The cases w(l) < w(l+1) and
+*      w(l-r)=...=w(l+k) are handled similarly.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, TWO, HALF
+      PARAMETER          ( ZERO = 0.0E0, TWO = 2.0E0,
+     $                   HALF = 1.0E0 / TWO )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            ITMP1, ITMP2, J, JI, JIT, JP, KF, KFNEW, KL,
+     $                   KLNEW
+      REAL               TMP1, TMP2
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Check for Errors
+*
+      INFO = 0
+      IF( IJOB.LT.1 .OR. IJOB.GT.3 ) THEN
+         INFO = -1
+         RETURN
+      END IF
+*
+*     Initialize NAB
+*
+      IF( IJOB.EQ.1 ) THEN
+*
+*        Compute the number of eigenvalues in the initial intervals.
+*
+         MOUT = 0
+CDIR$ NOVECTOR
+         DO 30 JI = 1, MINP
+            DO 20 JP = 1, 2
+               TMP1 = D( 1 ) - AB( JI, JP )
+               IF( ABS( TMP1 ).LT.PIVMIN )
+     $            TMP1 = -PIVMIN
+               NAB( JI, JP ) = 0
+               IF( TMP1.LE.ZERO )
+     $            NAB( JI, JP ) = 1
+*
+               DO 10 J = 2, N
+                  TMP1 = D( J ) - E2( J-1 ) / TMP1 - AB( JI, JP )
+                  IF( ABS( TMP1 ).LT.PIVMIN )
+     $               TMP1 = -PIVMIN
+                  IF( TMP1.LE.ZERO )
+     $               NAB( JI, JP ) = NAB( JI, JP ) + 1
+   10          CONTINUE
+   20       CONTINUE
+            MOUT = MOUT + NAB( JI, 2 ) - NAB( JI, 1 )
+   30    CONTINUE
+         RETURN
+      END IF
+*
+*     Initialize for loop
+*
+*     KF and KL have the following meaning:
+*        Intervals 1,...,KF-1 have converged.
+*        Intervals KF,...,KL  still need to be refined.
+*
+      KF = 1
+      KL = MINP
+*
+*     If IJOB=2, initialize C.
+*     If IJOB=3, use the user-supplied starting point.
+*
+      IF( IJOB.EQ.2 ) THEN
+         DO 40 JI = 1, MINP
+            C( JI ) = HALF*( AB( JI, 1 )+AB( JI, 2 ) )
+   40    CONTINUE
+      END IF
+*
+*     Iteration loop
+*
+      DO 130 JIT = 1, NITMAX
+*
+*        Loop over intervals
+*
+         IF( KL-KF+1.GE.NBMIN .AND. NBMIN.GT.0 ) THEN
+*
+*           Begin of Parallel Version of the loop
+*
+            DO 60 JI = KF, KL
+*
+*              Compute N(c), the number of eigenvalues less than c
+*
+               WORK( JI ) = D( 1 ) - C( JI )
+               IWORK( JI ) = 0
+               IF( WORK( JI ).LE.PIVMIN ) THEN
+                  IWORK( JI ) = 1
+                  WORK( JI ) = MIN( WORK( JI ), -PIVMIN )
+               END IF
+*
+               DO 50 J = 2, N
+                  WORK( JI ) = D( J ) - E2( J-1 ) / WORK( JI ) - C( JI )
+                  IF( WORK( JI ).LE.PIVMIN ) THEN
+                     IWORK( JI ) = IWORK( JI ) + 1
+                     WORK( JI ) = MIN( WORK( JI ), -PIVMIN )
+                  END IF
+   50          CONTINUE
+   60       CONTINUE
+*
+            IF( IJOB.LE.2 ) THEN
+*
+*              IJOB=2: Choose all intervals containing eigenvalues.
+*
+               KLNEW = KL
+               DO 70 JI = KF, KL
+*
+*                 Insure that N(w) is monotone
+*
+                  IWORK( JI ) = MIN( NAB( JI, 2 ),
+     $                          MAX( NAB( JI, 1 ), IWORK( JI ) ) )
+*
+*                 Update the Queue -- add intervals if both halves
+*                 contain eigenvalues.
+*
+                  IF( IWORK( JI ).EQ.NAB( JI, 2 ) ) THEN
+*
+*                    No eigenvalue in the upper interval:
+*                    just use the lower interval.
+*
+                     AB( JI, 2 ) = C( JI )
+*
+                  ELSE IF( IWORK( JI ).EQ.NAB( JI, 1 ) ) THEN
+*
+*                    No eigenvalue in the lower interval:
+*                    just use the upper interval.
+*
+                     AB( JI, 1 ) = C( JI )
+                  ELSE
+                     KLNEW = KLNEW + 1
+                     IF( KLNEW.LE.MMAX ) THEN
+*
+*                       Eigenvalue in both intervals -- add upper to
+*                       queue.
+*
+                        AB( KLNEW, 2 ) = AB( JI, 2 )
+                        NAB( KLNEW, 2 ) = NAB( JI, 2 )
+                        AB( KLNEW, 1 ) = C( JI )
+                        NAB( KLNEW, 1 ) = IWORK( JI )
+                        AB( JI, 2 ) = C( JI )
+                        NAB( JI, 2 ) = IWORK( JI )
+                     ELSE
+                        INFO = MMAX + 1
+                     END IF
+                  END IF
+   70          CONTINUE
+               IF( INFO.NE.0 )
+     $            RETURN
+               KL = KLNEW
+            ELSE
+*
+*              IJOB=3: Binary search.  Keep only the interval containing
+*                      w   s.t. N(w) = NVAL
+*
+               DO 80 JI = KF, KL
+                  IF( IWORK( JI ).LE.NVAL( JI ) ) THEN
+                     AB( JI, 1 ) = C( JI )
+                     NAB( JI, 1 ) = IWORK( JI )
+                  END IF
+                  IF( IWORK( JI ).GE.NVAL( JI ) ) THEN
+                     AB( JI, 2 ) = C( JI )
+                     NAB( JI, 2 ) = IWORK( JI )
+                  END IF
+   80          CONTINUE
+            END IF
+*
+         ELSE
+*
+*           End of Parallel Version of the loop
+*
+*           Begin of Serial Version of the loop
+*
+            KLNEW = KL
+            DO 100 JI = KF, KL
+*
+*              Compute N(w), the number of eigenvalues less than w
+*
+               TMP1 = C( JI )
+               TMP2 = D( 1 ) - TMP1
+               ITMP1 = 0
+               IF( TMP2.LE.PIVMIN ) THEN
+                  ITMP1 = 1
+                  TMP2 = MIN( TMP2, -PIVMIN )
+               END IF
+*
+*              A series of compiler directives to defeat vectorization
+*              for the next loop
+*
+*$PL$ CMCHAR=' '
+CDIR$          NEXTSCALAR
+C$DIR          SCALAR
+CDIR$          NEXT SCALAR
+CVD$L          NOVECTOR
+CDEC$          NOVECTOR
+CVD$           NOVECTOR
+*VDIR          NOVECTOR
+*VOCL          LOOP,SCALAR
+CIBM           PREFER SCALAR
+*$PL$ CMCHAR='*'
+*
+               DO 90 J = 2, N
+                  TMP2 = D( J ) - E2( J-1 ) / TMP2 - TMP1
+                  IF( TMP2.LE.PIVMIN ) THEN
+                     ITMP1 = ITMP1 + 1
+                     TMP2 = MIN( TMP2, -PIVMIN )
+                  END IF
+   90          CONTINUE
+*
+               IF( IJOB.LE.2 ) THEN
+*
+*                 IJOB=2: Choose all intervals containing eigenvalues.
+*
+*                 Insure that N(w) is monotone
+*
+                  ITMP1 = MIN( NAB( JI, 2 ),
+     $                    MAX( NAB( JI, 1 ), ITMP1 ) )
+*
+*                 Update the Queue -- add intervals if both halves
+*                 contain eigenvalues.
+*
+                  IF( ITMP1.EQ.NAB( JI, 2 ) ) THEN
+*
+*                    No eigenvalue in the upper interval:
+*                    just use the lower interval.
+*
+                     AB( JI, 2 ) = TMP1
+*
+                  ELSE IF( ITMP1.EQ.NAB( JI, 1 ) ) THEN
+*
+*                    No eigenvalue in the lower interval:
+*                    just use the upper interval.
+*
+                     AB( JI, 1 ) = TMP1
+                  ELSE IF( KLNEW.LT.MMAX ) THEN
+*
+*                    Eigenvalue in both intervals -- add upper to queue.
+*
+                     KLNEW = KLNEW + 1
+                     AB( KLNEW, 2 ) = AB( JI, 2 )
+                     NAB( KLNEW, 2 ) = NAB( JI, 2 )
+                     AB( KLNEW, 1 ) = TMP1
+                     NAB( KLNEW, 1 ) = ITMP1
+                     AB( JI, 2 ) = TMP1
+                     NAB( JI, 2 ) = ITMP1
+                  ELSE
+                     INFO = MMAX + 1
+                     RETURN
+                  END IF
+               ELSE
+*
+*                 IJOB=3: Binary search.  Keep only the interval
+*                         containing  w  s.t. N(w) = NVAL
+*
+                  IF( ITMP1.LE.NVAL( JI ) ) THEN
+                     AB( JI, 1 ) = TMP1
+                     NAB( JI, 1 ) = ITMP1
+                  END IF
+                  IF( ITMP1.GE.NVAL( JI ) ) THEN
+                     AB( JI, 2 ) = TMP1
+                     NAB( JI, 2 ) = ITMP1
+                  END IF
+               END IF
+  100       CONTINUE
+            KL = KLNEW
+*
+*           End of Serial Version of the loop
+*
+         END IF
+*
+*        Check for convergence
+*
+         KFNEW = KF
+         DO 110 JI = KF, KL
+            TMP1 = ABS( AB( JI, 2 )-AB( JI, 1 ) )
+            TMP2 = MAX( ABS( AB( JI, 2 ) ), ABS( AB( JI, 1 ) ) )
+            IF( TMP1.LT.MAX( ABSTOL, PIVMIN, RELTOL*TMP2 ) .OR.
+     $          NAB( JI, 1 ).GE.NAB( JI, 2 ) ) THEN
+*
+*              Converged -- Swap with position KFNEW,
+*                           then increment KFNEW
+*
+               IF( JI.GT.KFNEW ) THEN
+                  TMP1 = AB( JI, 1 )
+                  TMP2 = AB( JI, 2 )
+                  ITMP1 = NAB( JI, 1 )
+                  ITMP2 = NAB( JI, 2 )
+                  AB( JI, 1 ) = AB( KFNEW, 1 )
+                  AB( JI, 2 ) = AB( KFNEW, 2 )
+                  NAB( JI, 1 ) = NAB( KFNEW, 1 )
+                  NAB( JI, 2 ) = NAB( KFNEW, 2 )
+                  AB( KFNEW, 1 ) = TMP1
+                  AB( KFNEW, 2 ) = TMP2
+                  NAB( KFNEW, 1 ) = ITMP1
+                  NAB( KFNEW, 2 ) = ITMP2
+                  IF( IJOB.EQ.3 ) THEN
+                     ITMP1 = NVAL( JI )
+                     NVAL( JI ) = NVAL( KFNEW )
+                     NVAL( KFNEW ) = ITMP1
+                  END IF
+               END IF
+               KFNEW = KFNEW + 1
+            END IF
+  110    CONTINUE
+         KF = KFNEW
+*
+*        Choose Midpoints
+*
+         DO 120 JI = KF, KL
+            C( JI ) = HALF*( AB( JI, 1 )+AB( JI, 2 ) )
+  120    CONTINUE
+*
+*        If no more intervals to refine, quit.
+*
+         IF( KF.GT.KL )
+     $      GO TO 140
+  130 CONTINUE
+*
+*     Converged
+*
+  140 CONTINUE
+      INFO = MAX( KL+1-KF, 0 )
+      MOUT = KL
+*
+      RETURN
+*
+*     End of SLAEBZ
+*
+      END
diff --git a/costa/native/external/lapack/slaed0.f b/costa/native/external/lapack/slaed0.f
new file mode 100644
index 000000000..8fc44a510
--- /dev/null
+++ b/costa/native/external/lapack/slaed0.f
@@ -0,0 +1,350 @@
+      SUBROUTINE SLAED0( ICOMPQ, QSIZ, N, D, E, Q, LDQ, QSTORE, LDQS,
+     $                   WORK, IWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            ICOMPQ, INFO, LDQ, LDQS, N, QSIZ
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      REAL               D( * ), E( * ), Q( LDQ, * ), QSTORE( LDQS, * ),
+     $                   WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLAED0 computes all eigenvalues and corresponding eigenvectors of a
+*  symmetric tridiagonal matrix using the divide and conquer method.
+*
+*  Arguments
+*  =========
+*
+*  ICOMPQ  (input) INTEGER
+*          = 0:  Compute eigenvalues only.
+*          = 1:  Compute eigenvectors of original dense symmetric matrix
+*                also.  On entry, Q contains the orthogonal matrix used
+*                to reduce the original matrix to tridiagonal form.
+*          = 2:  Compute eigenvalues and eigenvectors of tridiagonal
+*                matrix.
+*
+*  QSIZ   (input) INTEGER
+*         The dimension of the orthogonal matrix used to reduce
+*         the full matrix to tridiagonal form.  QSIZ >= N if ICOMPQ = 1.
+*
+*  N      (input) INTEGER
+*         The dimension of the symmetric tridiagonal matrix.  N >= 0.
+*
+*  D      (input/output) REAL array, dimension (N)
+*         On entry, the main diagonal of the tridiagonal matrix.
+*         On exit, its eigenvalues.
+*
+*  E      (input) REAL array, dimension (N-1)
+*         The off-diagonal elements of the tridiagonal matrix.
+*         On exit, E has been destroyed.
+*
+*  Q      (input/output) REAL array, dimension (LDQ, N)
+*         On entry, Q must contain an N-by-N orthogonal matrix.
+*         If ICOMPQ = 0    Q is not referenced.
+*         If ICOMPQ = 1    On entry, Q is a subset of the columns of the
+*                          orthogonal matrix used to reduce the full
+*                          matrix to tridiagonal form corresponding to
+*                          the subset of the full matrix which is being
+*                          decomposed at this time.
+*         If ICOMPQ = 2    On entry, Q will be the identity matrix.
+*                          On exit, Q contains the eigenvectors of the
+*                          tridiagonal matrix.
+*
+*  LDQ    (input) INTEGER
+*         The leading dimension of the array Q.  If eigenvectors are
+*         desired, then  LDQ >= max(1,N).  In any case,  LDQ >= 1.
+*
+*  QSTORE (workspace) REAL array, dimension (LDQS, N)
+*         Referenced only when ICOMPQ = 1.  Used to store parts of
+*         the eigenvector matrix when the updating matrix multiplies
+*         take place.
+*
+*  LDQS   (input) INTEGER
+*         The leading dimension of the array QSTORE.  If ICOMPQ = 1,
+*         then  LDQS >= max(1,N).  In any case,  LDQS >= 1.
+*
+*  WORK   (workspace) REAL array,
+*         If ICOMPQ = 0 or 1, the dimension of WORK must be at least
+*                     1 + 3*N + 2*N*lg N + 2*N**2
+*                     ( lg( N ) = smallest integer k
+*                                 such that 2^k >= N )
+*         If ICOMPQ = 2, the dimension of WORK must be at least
+*                     4*N + N**2.
+*
+*  IWORK  (workspace) INTEGER array,
+*         If ICOMPQ = 0 or 1, the dimension of IWORK must be at least
+*                        6 + 6*N + 5*N*lg N.
+*                        ( lg( N ) = smallest integer k
+*                                    such that 2^k >= N )
+*         If ICOMPQ = 2, the dimension of IWORK must be at least
+*                        3 + 5*N.
+*
+*  INFO   (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          > 0:  The algorithm failed to compute an eigenvalue while
+*                working on the submatrix lying in rows and columns
+*                INFO/(N+1) through mod(INFO,N+1).
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Jeff Rutter, Computer Science Division, University of California
+*     at Berkeley, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE, TWO
+      PARAMETER          ( ZERO = 0.E0, ONE = 1.E0, TWO = 2.E0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            CURLVL, CURPRB, CURR, I, IGIVCL, IGIVNM,
+     $                   IGIVPT, INDXQ, IPERM, IPRMPT, IQ, IQPTR, IWREM,
+     $                   J, K, LGN, MATSIZ, MSD2, SMLSIZ, SMM1, SPM1,
+     $                   SPM2, SUBMAT, SUBPBS, TLVLS
+      REAL               TEMP
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SCOPY, SGEMM, SLACPY, SLAED1, SLAED7, SSTEQR,
+     $                   XERBLA
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, INT, LOG, MAX, REAL
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+*
+      IF( ICOMPQ.LT.0 .OR. ICOMPQ.GT.2 ) THEN
+         INFO = -1
+      ELSE IF( ( ICOMPQ.EQ.1 ) .AND. ( QSIZ.LT.MAX( 0, N ) ) ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      ELSE IF( LDQS.LT.MAX( 1, N ) ) THEN
+         INFO = -9
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SLAED0', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      SMLSIZ = ILAENV( 9, 'SLAED0', ' ', 0, 0, 0, 0 )
+*
+*     Determine the size and placement of the submatrices, and save in
+*     the leading elements of IWORK.
+*
+      IWORK( 1 ) = N
+      SUBPBS = 1
+      TLVLS = 0
+   10 CONTINUE
+      IF( IWORK( SUBPBS ).GT.SMLSIZ ) THEN
+         DO 20 J = SUBPBS, 1, -1
+            IWORK( 2*J ) = ( IWORK( J )+1 ) / 2
+            IWORK( 2*J-1 ) = IWORK( J ) / 2
+   20    CONTINUE
+         TLVLS = TLVLS + 1
+         SUBPBS = 2*SUBPBS
+         GO TO 10
+      END IF
+      DO 30 J = 2, SUBPBS
+         IWORK( J ) = IWORK( J ) + IWORK( J-1 )
+   30 CONTINUE
+*
+*     Divide the matrix into SUBPBS submatrices of size at most SMLSIZ+1
+*     using rank-1 modifications (cuts).
+*
+      SPM1 = SUBPBS - 1
+      DO 40 I = 1, SPM1
+         SUBMAT = IWORK( I ) + 1
+         SMM1 = SUBMAT - 1
+         D( SMM1 ) = D( SMM1 ) - ABS( E( SMM1 ) )
+         D( SUBMAT ) = D( SUBMAT ) - ABS( E( SMM1 ) )
+   40 CONTINUE
+*
+      INDXQ = 4*N + 3
+      IF( ICOMPQ.NE.2 ) THEN
+*
+*        Set up workspaces for eigenvalues only/accumulate new vectors
+*        routine
+*
+         TEMP = LOG( REAL( N ) ) / LOG( TWO )
+         LGN = INT( TEMP )
+         IF( 2**LGN.LT.N )
+     $      LGN = LGN + 1
+         IF( 2**LGN.LT.N )
+     $      LGN = LGN + 1
+         IPRMPT = INDXQ + N + 1
+         IPERM = IPRMPT + N*LGN
+         IQPTR = IPERM + N*LGN
+         IGIVPT = IQPTR + N + 2
+         IGIVCL = IGIVPT + N*LGN
+*
+         IGIVNM = 1
+         IQ = IGIVNM + 2*N*LGN
+         IWREM = IQ + N**2 + 1
+*
+*        Initialize pointers
+*
+         DO 50 I = 0, SUBPBS
+            IWORK( IPRMPT+I ) = 1
+            IWORK( IGIVPT+I ) = 1
+   50    CONTINUE
+         IWORK( IQPTR ) = 1
+      END IF
+*
+*     Solve each submatrix eigenproblem at the bottom of the divide and
+*     conquer tree.
+*
+      CURR = 0
+      DO 70 I = 0, SPM1
+         IF( I.EQ.0 ) THEN
+            SUBMAT = 1
+            MATSIZ = IWORK( 1 )
+         ELSE
+            SUBMAT = IWORK( I ) + 1
+            MATSIZ = IWORK( I+1 ) - IWORK( I )
+         END IF
+         IF( ICOMPQ.EQ.2 ) THEN
+            CALL SSTEQR( 'I', MATSIZ, D( SUBMAT ), E( SUBMAT ),
+     $                   Q( SUBMAT, SUBMAT ), LDQ, WORK, INFO )
+            IF( INFO.NE.0 )
+     $         GO TO 130
+         ELSE
+            CALL SSTEQR( 'I', MATSIZ, D( SUBMAT ), E( SUBMAT ),
+     $                   WORK( IQ-1+IWORK( IQPTR+CURR ) ), MATSIZ, WORK,
+     $                   INFO )
+            IF( INFO.NE.0 )
+     $         GO TO 130
+            IF( ICOMPQ.EQ.1 ) THEN
+               CALL SGEMM( 'N', 'N', QSIZ, MATSIZ, MATSIZ, ONE,
+     $                     Q( 1, SUBMAT ), LDQ, WORK( IQ-1+IWORK( IQPTR+
+     $                     CURR ) ), MATSIZ, ZERO, QSTORE( 1, SUBMAT ),
+     $                     LDQS )
+            END IF
+            IWORK( IQPTR+CURR+1 ) = IWORK( IQPTR+CURR ) + MATSIZ**2
+            CURR = CURR + 1
+         END IF
+         K = 1
+         DO 60 J = SUBMAT, IWORK( I+1 )
+            IWORK( INDXQ+J ) = K
+            K = K + 1
+   60    CONTINUE
+   70 CONTINUE
+*
+*     Successively merge eigensystems of adjacent submatrices
+*     into eigensystem for the corresponding larger matrix.
+*
+*     while ( SUBPBS > 1 )
+*
+      CURLVL = 1
+   80 CONTINUE
+      IF( SUBPBS.GT.1 ) THEN
+         SPM2 = SUBPBS - 2
+         DO 90 I = 0, SPM2, 2
+            IF( I.EQ.0 ) THEN
+               SUBMAT = 1
+               MATSIZ = IWORK( 2 )
+               MSD2 = IWORK( 1 )
+               CURPRB = 0
+            ELSE
+               SUBMAT = IWORK( I ) + 1
+               MATSIZ = IWORK( I+2 ) - IWORK( I )
+               MSD2 = MATSIZ / 2
+               CURPRB = CURPRB + 1
+            END IF
+*
+*     Merge lower order eigensystems (of size MSD2 and MATSIZ - MSD2)
+*     into an eigensystem of size MATSIZ.
+*     SLAED1 is used only for the full eigensystem of a tridiagonal
+*     matrix.
+*     SLAED7 handles the cases in which eigenvalues only or eigenvalues
+*     and eigenvectors of a full symmetric matrix (which was reduced to
+*     tridiagonal form) are desired.
+*
+            IF( ICOMPQ.EQ.2 ) THEN
+               CALL SLAED1( MATSIZ, D( SUBMAT ), Q( SUBMAT, SUBMAT ),
+     $                      LDQ, IWORK( INDXQ+SUBMAT ),
+     $                      E( SUBMAT+MSD2-1 ), MSD2, WORK,
+     $                      IWORK( SUBPBS+1 ), INFO )
+            ELSE
+               CALL SLAED7( ICOMPQ, MATSIZ, QSIZ, TLVLS, CURLVL, CURPRB,
+     $                      D( SUBMAT ), QSTORE( 1, SUBMAT ), LDQS,
+     $                      IWORK( INDXQ+SUBMAT ), E( SUBMAT+MSD2-1 ),
+     $                      MSD2, WORK( IQ ), IWORK( IQPTR ),
+     $                      IWORK( IPRMPT ), IWORK( IPERM ),
+     $                      IWORK( IGIVPT ), IWORK( IGIVCL ),
+     $                      WORK( IGIVNM ), WORK( IWREM ),
+     $                      IWORK( SUBPBS+1 ), INFO )
+            END IF
+            IF( INFO.NE.0 )
+     $         GO TO 130
+            IWORK( I / 2+1 ) = IWORK( I+2 )
+   90    CONTINUE
+         SUBPBS = SUBPBS / 2
+         CURLVL = CURLVL + 1
+         GO TO 80
+      END IF
+*
+*     end while
+*
+*     Re-merge the eigenvalues/vectors which were deflated at the final
+*     merge step.
+*
+      IF( ICOMPQ.EQ.1 ) THEN
+         DO 100 I = 1, N
+            J = IWORK( INDXQ+I )
+            WORK( I ) = D( J )
+            CALL SCOPY( QSIZ, QSTORE( 1, J ), 1, Q( 1, I ), 1 )
+  100    CONTINUE
+         CALL SCOPY( N, WORK, 1, D, 1 )
+      ELSE IF( ICOMPQ.EQ.2 ) THEN
+         DO 110 I = 1, N
+            J = IWORK( INDXQ+I )
+            WORK( I ) = D( J )
+            CALL SCOPY( N, Q( 1, J ), 1, WORK( N*I+1 ), 1 )
+  110    CONTINUE
+         CALL SCOPY( N, WORK, 1, D, 1 )
+         CALL SLACPY( 'A', N, N, WORK( N+1 ), N, Q, LDQ )
+      ELSE
+         DO 120 I = 1, N
+            J = IWORK( INDXQ+I )
+            WORK( I ) = D( J )
+  120    CONTINUE
+         CALL SCOPY( N, WORK, 1, D, 1 )
+      END IF
+      GO TO 140
+*
+  130 CONTINUE
+      INFO = SUBMAT*( N+1 ) + SUBMAT + MATSIZ - 1
+*
+  140 CONTINUE
+      RETURN
+*
+*     End of SLAED0
+*
+      END
diff --git a/costa/native/external/lapack/slaed1.f b/costa/native/external/lapack/slaed1.f
new file mode 100644
index 000000000..b1f825459
--- /dev/null
+++ b/costa/native/external/lapack/slaed1.f
@@ -0,0 +1,196 @@
+      SUBROUTINE SLAED1( N, D, Q, LDQ, INDXQ, RHO, CUTPNT, WORK, IWORK,
+     $                   INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            CUTPNT, INFO, LDQ, N
+      REAL               RHO
+*     ..
+*     .. Array Arguments ..
+      INTEGER            INDXQ( * ), IWORK( * )
+      REAL               D( * ), Q( LDQ, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLAED1 computes the updated eigensystem of a diagonal
+*  matrix after modification by a rank-one symmetric matrix.  This
+*  routine is used only for the eigenproblem which requires all
+*  eigenvalues and eigenvectors of a tridiagonal matrix.  SLAED7 handles
+*  the case in which eigenvalues only or eigenvalues and eigenvectors
+*  of a full symmetric matrix (which was reduced to tridiagonal form)
+*  are desired.
+*
+*    T = Q(in) ( D(in) + RHO * Z*Z' ) Q'(in) = Q(out) * D(out) * Q'(out)
+*
+*     where Z = Q'u, u is a vector of length N with ones in the
+*     CUTPNT and CUTPNT + 1 th elements and zeros elsewhere.
+*
+*     The eigenvectors of the original matrix are stored in Q, and the
+*     eigenvalues are in D.  The algorithm consists of three stages:
+*
+*        The first stage consists of deflating the size of the problem
+*        when there are multiple eigenvalues or if there is a zero in
+*        the Z vector.  For each such occurence the dimension of the
+*        secular equation problem is reduced by one.  This stage is
+*        performed by the routine SLAED2.
+*
+*        The second stage consists of calculating the updated
+*        eigenvalues. This is done by finding the roots of the secular
+*        equation via the routine SLAED4 (as called by SLAED3).
+*        This routine also calculates the eigenvectors of the current
+*        problem.
+*
+*        The final stage consists of computing the updated eigenvectors
+*        directly using the updated eigenvalues.  The eigenvectors for
+*        the current problem are multiplied with the eigenvectors from
+*        the overall problem.
+*
+*  Arguments
+*  =========
+*
+*  N      (input) INTEGER
+*         The dimension of the symmetric tridiagonal matrix.  N >= 0.
+*
+*  D      (input/output) REAL array, dimension (N)
+*         On entry, the eigenvalues of the rank-1-perturbed matrix.
+*         On exit, the eigenvalues of the repaired matrix.
+*
+*  Q      (input/output) REAL array, dimension (LDQ,N)
+*         On entry, the eigenvectors of the rank-1-perturbed matrix.
+*         On exit, the eigenvectors of the repaired tridiagonal matrix.
+*
+*  LDQ    (input) INTEGER
+*         The leading dimension of the array Q.  LDQ >= max(1,N).
+*
+*  INDXQ  (input/output) INTEGER array, dimension (N)
+*         On entry, the permutation which separately sorts the two
+*         subproblems in D into ascending order.
+*         On exit, the permutation which will reintegrate the
+*         subproblems back into sorted order,
+*         i.e. D( INDXQ( I = 1, N ) ) will be in ascending order.
+*
+*  RHO    (input) REAL
+*         The subdiagonal entry used to create the rank-1 modification.
+*
+*  CUTPNT (input) INTEGER
+*         The location of the last eigenvalue in the leading sub-matrix.
+*         min(1,N) <= CUTPNT <= N/2.
+*
+*  WORK   (workspace) REAL array, dimension (4*N + N**2)
+*
+*  IWORK  (workspace) INTEGER array, dimension (4*N)
+*
+*  INFO   (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          > 0:  if INFO = 1, an eigenvalue did not converge
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Jeff Rutter, Computer Science Division, University of California
+*     at Berkeley, USA
+*  Modified by Francoise Tisseur, University of Tennessee.
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      INTEGER            COLTYP, CPP1, I, IDLMDA, INDX, INDXC, INDXP,
+     $                   IQ2, IS, IW, IZ, K, N1, N2
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SCOPY, SLAED2, SLAED3, SLAMRG, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+*
+      IF( N.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN
+         INFO = -4
+      ELSE IF( MIN( 1, N / 2 ).GT.CUTPNT .OR. ( N / 2 ).LT.CUTPNT ) THEN
+         INFO = -7
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SLAED1', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     The following values are integer pointers which indicate
+*     the portion of the workspace
+*     used by a particular array in SLAED2 and SLAED3.
+*
+      IZ = 1
+      IDLMDA = IZ + N
+      IW = IDLMDA + N
+      IQ2 = IW + N
+*
+      INDX = 1
+      INDXC = INDX + N
+      COLTYP = INDXC + N
+      INDXP = COLTYP + N
+*
+*
+*     Form the z-vector which consists of the last row of Q_1 and the
+*     first row of Q_2.
+*
+      CALL SCOPY( CUTPNT, Q( CUTPNT, 1 ), LDQ, WORK( IZ ), 1 )
+      CPP1 = CUTPNT + 1
+      CALL SCOPY( N-CUTPNT, Q( CPP1, CPP1 ), LDQ, WORK( IZ+CUTPNT ), 1 )
+*
+*     Deflate eigenvalues.
+*
+      CALL SLAED2( K, N, CUTPNT, D, Q, LDQ, INDXQ, RHO, WORK( IZ ),
+     $             WORK( IDLMDA ), WORK( IW ), WORK( IQ2 ),
+     $             IWORK( INDX ), IWORK( INDXC ), IWORK( INDXP ),
+     $             IWORK( COLTYP ), INFO )
+*
+      IF( INFO.NE.0 )
+     $   GO TO 20
+*
+*     Solve Secular Equation.
+*
+      IF( K.NE.0 ) THEN
+         IS = ( IWORK( COLTYP )+IWORK( COLTYP+1 ) )*CUTPNT +
+     $        ( IWORK( COLTYP+1 )+IWORK( COLTYP+2 ) )*( N-CUTPNT ) + IQ2
+         CALL SLAED3( K, N, CUTPNT, D, Q, LDQ, RHO, WORK( IDLMDA ),
+     $                WORK( IQ2 ), IWORK( INDXC ), IWORK( COLTYP ),
+     $                WORK( IW ), WORK( IS ), INFO )
+         IF( INFO.NE.0 )
+     $      GO TO 20
+*
+*     Prepare the INDXQ sorting permutation.
+*
+         N1 = K
+         N2 = N - K
+         CALL SLAMRG( N1, N2, D, 1, -1, INDXQ )
+      ELSE
+         DO 10 I = 1, N
+            INDXQ( I ) = I
+   10    CONTINUE
+      END IF
+*
+   20 CONTINUE
+      RETURN
+*
+*     End of SLAED1
+*
+      END
diff --git a/costa/native/external/lapack/slaed2.f b/costa/native/external/lapack/slaed2.f
new file mode 100644
index 000000000..c31de3540
--- /dev/null
+++ b/costa/native/external/lapack/slaed2.f
@@ -0,0 +1,435 @@
+      SUBROUTINE SLAED2( K, N, N1, D, Q, LDQ, INDXQ, RHO, Z, DLAMDA, W,
+     $                   Q2, INDX, INDXC, INDXP, COLTYP, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, K, LDQ, N, N1
+      REAL               RHO
+*     ..
+*     .. Array Arguments ..
+      INTEGER            COLTYP( * ), INDX( * ), INDXC( * ), INDXP( * ),
+     $                   INDXQ( * )
+      REAL               D( * ), DLAMDA( * ), Q( LDQ, * ), Q2( * ),
+     $                   W( * ), Z( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLAED2 merges the two sets of eigenvalues together into a single
+*  sorted set.  Then it tries to deflate the size of the problem.
+*  There are two ways in which deflation can occur:  when two or more
+*  eigenvalues are close together or if there is a tiny entry in the
+*  Z vector.  For each such occurrence the order of the related secular
+*  equation problem is reduced by one.
+*
+*  Arguments
+*  =========
+*
+*  K      (output) INTEGER
+*         The number of non-deflated eigenvalues, and the order of the
+*         related secular equation. 0 <= K <=N.
+*
+*  N      (input) INTEGER
+*         The dimension of the symmetric tridiagonal matrix.  N >= 0.
+*
+*  N1     (input) INTEGER
+*         The location of the last eigenvalue in the leading sub-matrix.
+*         min(1,N) <= N1 <= N/2.
+*
+*  D      (input/output) REAL array, dimension (N)
+*         On entry, D contains the eigenvalues of the two submatrices to
+*         be combined.
+*         On exit, D contains the trailing (N-K) updated eigenvalues
+*         (those which were deflated) sorted into increasing order.
+*
+*  Q      (input/output) REAL array, dimension (LDQ, N)
+*         On entry, Q contains the eigenvectors of two submatrices in
+*         the two square blocks with corners at (1,1), (N1,N1)
+*         and (N1+1, N1+1), (N,N).
+*         On exit, Q contains the trailing (N-K) updated eigenvectors
+*         (those which were deflated) in its last N-K columns.
+*
+*  LDQ    (input) INTEGER
+*         The leading dimension of the array Q.  LDQ >= max(1,N).
+*
+*  INDXQ  (input/output) INTEGER array, dimension (N)
+*         The permutation which separately sorts the two sub-problems
+*         in D into ascending order.  Note that elements in the second
+*         half of this permutation must first have N1 added to their
+*         values. Destroyed on exit.
+*
+*  RHO    (input/output) REAL
+*         On entry, the off-diagonal element associated with the rank-1
+*         cut which originally split the two submatrices which are now
+*         being recombined.
+*         On exit, RHO has been modified to the value required by
+*         SLAED3.
+*
+*  Z      (input) REAL array, dimension (N)
+*         On entry, Z contains the updating vector (the last
+*         row of the first sub-eigenvector matrix and the first row of
+*         the second sub-eigenvector matrix).
+*         On exit, the contents of Z have been destroyed by the updating
+*         process.
+*
+*  DLAMDA (output) REAL array, dimension (N)
+*         A copy of the first K eigenvalues which will be used by
+*         SLAED3 to form the secular equation.
+*
+*  W      (output) REAL array, dimension (N)
+*         The first k values of the final deflation-altered z-vector
+*         which will be passed to SLAED3.
+*
+*  Q2     (output) REAL array, dimension (N1**2+(N-N1)**2)
+*         A copy of the first K eigenvectors which will be used by
+*         SLAED3 in a matrix multiply (SGEMM) to solve for the new
+*         eigenvectors.
+*
+*  INDX   (workspace) INTEGER array, dimension (N)
+*         The permutation used to sort the contents of DLAMDA into
+*         ascending order.
+*
+*  INDXC  (output) INTEGER array, dimension (N)
+*         The permutation used to arrange the columns of the deflated
+*         Q matrix into three groups:  the first group contains non-zero
+*         elements only at and above N1, the second contains
+*         non-zero elements only below N1, and the third is dense.
+*
+*  INDXP  (workspace) INTEGER array, dimension (N)
+*         The permutation used to place deflated values of D at the end
+*         of the array.  INDXP(1:K) points to the nondeflated D-values
+*         and INDXP(K+1:N) points to the deflated eigenvalues.
+*
+*  COLTYP (workspace/output) INTEGER array, dimension (N)
+*         During execution, a label which will indicate which of the
+*         following types a column in the Q2 matrix is:
+*         1 : non-zero in the upper half only;
+*         2 : dense;
+*         3 : non-zero in the lower half only;
+*         4 : deflated.
+*         On exit, COLTYP(i) is the number of columns of type i,
+*         for i=1 to 4 only.
+*
+*  INFO   (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Jeff Rutter, Computer Science Division, University of California
+*     at Berkeley, USA
+*  Modified by Francoise Tisseur, University of Tennessee.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               MONE, ZERO, ONE, TWO, EIGHT
+      PARAMETER          ( MONE = -1.0E0, ZERO = 0.0E0, ONE = 1.0E0,
+     $                   TWO = 2.0E0, EIGHT = 8.0E0 )
+*     ..
+*     .. Local Arrays ..
+      INTEGER            CTOT( 4 ), PSM( 4 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            CT, I, IMAX, IQ1, IQ2, J, JMAX, JS, K2, N1P1,
+     $                   N2, NJ, PJ
+      REAL               C, EPS, S, T, TAU, TOL
+*     ..
+*     .. External Functions ..
+      INTEGER            ISAMAX
+      REAL               SLAMCH, SLAPY2
+      EXTERNAL           ISAMAX, SLAMCH, SLAPY2
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SCOPY, SLACPY, SLAMRG, SROT, SSCAL, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+*
+      IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN
+         INFO = -6
+      ELSE IF( MIN( 1, ( N / 2 ) ).GT.N1 .OR. ( N / 2 ).LT.N1 ) THEN
+         INFO = -3
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SLAED2', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      N2 = N - N1
+      N1P1 = N1 + 1
+*
+      IF( RHO.LT.ZERO ) THEN
+         CALL SSCAL( N2, MONE, Z( N1P1 ), 1 )
+      END IF
+*
+*     Normalize z so that norm(z) = 1.  Since z is the concatenation of
+*     two normalized vectors, norm2(z) = sqrt(2).
+*
+      T = ONE / SQRT( TWO )
+      CALL SSCAL( N, T, Z, 1 )
+*
+*     RHO = ABS( norm(z)**2 * RHO )
+*
+      RHO = ABS( TWO*RHO )
+*
+*     Sort the eigenvalues into increasing order
+*
+      DO 10 I = N1P1, N
+         INDXQ( I ) = INDXQ( I ) + N1
+   10 CONTINUE
+*
+*     re-integrate the deflated parts from the last pass
+*
+      DO 20 I = 1, N
+         DLAMDA( I ) = D( INDXQ( I ) )
+   20 CONTINUE
+      CALL SLAMRG( N1, N2, DLAMDA, 1, 1, INDXC )
+      DO 30 I = 1, N
+         INDX( I ) = INDXQ( INDXC( I ) )
+   30 CONTINUE
+*
+*     Calculate the allowable deflation tolerance
+*
+      IMAX = ISAMAX( N, Z, 1 )
+      JMAX = ISAMAX( N, D, 1 )
+      EPS = SLAMCH( 'Epsilon' )
+      TOL = EIGHT*EPS*MAX( ABS( D( JMAX ) ), ABS( Z( IMAX ) ) )
+*
+*     If the rank-1 modifier is small enough, no more needs to be done
+*     except to reorganize Q so that its columns correspond with the
+*     elements in D.
+*
+      IF( RHO*ABS( Z( IMAX ) ).LE.TOL ) THEN
+         K = 0
+         IQ2 = 1
+         DO 40 J = 1, N
+            I = INDX( J )
+            CALL SCOPY( N, Q( 1, I ), 1, Q2( IQ2 ), 1 )
+            DLAMDA( J ) = D( I )
+            IQ2 = IQ2 + N
+   40    CONTINUE
+         CALL SLACPY( 'A', N, N, Q2, N, Q, LDQ )
+         CALL SCOPY( N, DLAMDA, 1, D, 1 )
+         GO TO 190
+      END IF
+*
+*     If there are multiple eigenvalues then the problem deflates.  Here
+*     the number of equal eigenvalues are found.  As each equal
+*     eigenvalue is found, an elementary reflector is computed to rotate
+*     the corresponding eigensubspace so that the corresponding
+*     components of Z are zero in this new basis.
+*
+      DO 50 I = 1, N1
+         COLTYP( I ) = 1
+   50 CONTINUE
+      DO 60 I = N1P1, N
+         COLTYP( I ) = 3
+   60 CONTINUE
+*
+*
+      K = 0
+      K2 = N + 1
+      DO 70 J = 1, N
+         NJ = INDX( J )
+         IF( RHO*ABS( Z( NJ ) ).LE.TOL ) THEN
+*
+*           Deflate due to small z component.
+*
+            K2 = K2 - 1
+            COLTYP( NJ ) = 4
+            INDXP( K2 ) = NJ
+            IF( J.EQ.N )
+     $         GO TO 100
+         ELSE
+            PJ = NJ
+            GO TO 80
+         END IF
+   70 CONTINUE
+   80 CONTINUE
+      J = J + 1
+      NJ = INDX( J )
+      IF( J.GT.N )
+     $   GO TO 100
+      IF( RHO*ABS( Z( NJ ) ).LE.TOL ) THEN
+*
+*        Deflate due to small z component.
+*
+         K2 = K2 - 1
+         COLTYP( NJ ) = 4
+         INDXP( K2 ) = NJ
+      ELSE
+*
+*        Check if eigenvalues are close enough to allow deflation.
+*
+         S = Z( PJ )
+         C = Z( NJ )
+*
+*        Find sqrt(a**2+b**2) without overflow or
+*        destructive underflow.
+*
+         TAU = SLAPY2( C, S )
+         T = D( NJ ) - D( PJ )
+         C = C / TAU
+         S = -S / TAU
+         IF( ABS( T*C*S ).LE.TOL ) THEN
+*
+*           Deflation is possible.
+*
+            Z( NJ ) = TAU
+            Z( PJ ) = ZERO
+            IF( COLTYP( NJ ).NE.COLTYP( PJ ) )
+     $         COLTYP( NJ ) = 2
+            COLTYP( PJ ) = 4
+            CALL SROT( N, Q( 1, PJ ), 1, Q( 1, NJ ), 1, C, S )
+            T = D( PJ )*C**2 + D( NJ )*S**2
+            D( NJ ) = D( PJ )*S**2 + D( NJ )*C**2
+            D( PJ ) = T
+            K2 = K2 - 1
+            I = 1
+   90       CONTINUE
+            IF( K2+I.LE.N ) THEN
+               IF( D( PJ ).LT.D( INDXP( K2+I ) ) ) THEN
+                  INDXP( K2+I-1 ) = INDXP( K2+I )
+                  INDXP( K2+I ) = PJ
+                  I = I + 1
+                  GO TO 90
+               ELSE
+                  INDXP( K2+I-1 ) = PJ
+               END IF
+            ELSE
+               INDXP( K2+I-1 ) = PJ
+            END IF
+            PJ = NJ
+         ELSE
+            K = K + 1
+            DLAMDA( K ) = D( PJ )
+            W( K ) = Z( PJ )
+            INDXP( K ) = PJ
+            PJ = NJ
+         END IF
+      END IF
+      GO TO 80
+  100 CONTINUE
+*
+*     Record the last eigenvalue.
+*
+      K = K + 1
+      DLAMDA( K ) = D( PJ )
+      W( K ) = Z( PJ )
+      INDXP( K ) = PJ
+*
+*     Count up the total number of the various types of columns, then
+*     form a permutation which positions the four column types into
+*     four uniform groups (although one or more of these groups may be
+*     empty).
+*
+      DO 110 J = 1, 4
+         CTOT( J ) = 0
+  110 CONTINUE
+      DO 120 J = 1, N
+         CT = COLTYP( J )
+         CTOT( CT ) = CTOT( CT ) + 1
+  120 CONTINUE
+*
+*     PSM(*) = Position in SubMatrix (of types 1 through 4)
+*
+      PSM( 1 ) = 1
+      PSM( 2 ) = 1 + CTOT( 1 )
+      PSM( 3 ) = PSM( 2 ) + CTOT( 2 )
+      PSM( 4 ) = PSM( 3 ) + CTOT( 3 )
+      K = N - CTOT( 4 )
+*
+*     Fill out the INDXC array so that the permutation which it induces
+*     will place all type-1 columns first, all type-2 columns next,
+*     then all type-3's, and finally all type-4's.
+*
+      DO 130 J = 1, N
+         JS = INDXP( J )
+         CT = COLTYP( JS )
+         INDX( PSM( CT ) ) = JS
+         INDXC( PSM( CT ) ) = J
+         PSM( CT ) = PSM( CT ) + 1
+  130 CONTINUE
+*
+*     Sort the eigenvalues and corresponding eigenvectors into DLAMDA
+*     and Q2 respectively.  The eigenvalues/vectors which were not
+*     deflated go into the first K slots of DLAMDA and Q2 respectively,
+*     while those which were deflated go into the last N - K slots.
+*
+      I = 1
+      IQ1 = 1
+      IQ2 = 1 + ( CTOT( 1 )+CTOT( 2 ) )*N1
+      DO 140 J = 1, CTOT( 1 )
+         JS = INDX( I )
+         CALL SCOPY( N1, Q( 1, JS ), 1, Q2( IQ1 ), 1 )
+         Z( I ) = D( JS )
+         I = I + 1
+         IQ1 = IQ1 + N1
+  140 CONTINUE
+*
+      DO 150 J = 1, CTOT( 2 )
+         JS = INDX( I )
+         CALL SCOPY( N1, Q( 1, JS ), 1, Q2( IQ1 ), 1 )
+         CALL SCOPY( N2, Q( N1+1, JS ), 1, Q2( IQ2 ), 1 )
+         Z( I ) = D( JS )
+         I = I + 1
+         IQ1 = IQ1 + N1
+         IQ2 = IQ2 + N2
+  150 CONTINUE
+*
+      DO 160 J = 1, CTOT( 3 )
+         JS = INDX( I )
+         CALL SCOPY( N2, Q( N1+1, JS ), 1, Q2( IQ2 ), 1 )
+         Z( I ) = D( JS )
+         I = I + 1
+         IQ2 = IQ2 + N2
+  160 CONTINUE
+*
+      IQ1 = IQ2
+      DO 170 J = 1, CTOT( 4 )
+         JS = INDX( I )
+         CALL SCOPY( N, Q( 1, JS ), 1, Q2( IQ2 ), 1 )
+         IQ2 = IQ2 + N
+         Z( I ) = D( JS )
+         I = I + 1
+  170 CONTINUE
+*
+*     The deflated eigenvalues and their corresponding vectors go back
+*     into the last N - K slots of D and Q respectively.
+*
+      CALL SLACPY( 'A', N, CTOT( 4 ), Q2( IQ1 ), N, Q( 1, K+1 ), LDQ )
+      CALL SCOPY( N-K, Z( K+1 ), 1, D( K+1 ), 1 )
+*
+*     Copy CTOT into COLTYP for referencing in SLAED3.
+*
+      DO 180 J = 1, 4
+         COLTYP( J ) = CTOT( J )
+  180 CONTINUE
+*
+  190 CONTINUE
+      RETURN
+*
+*     End of SLAED2
+*
+      END
diff --git a/costa/native/external/lapack/slaed3.f b/costa/native/external/lapack/slaed3.f
new file mode 100644
index 000000000..011f1a644
--- /dev/null
+++ b/costa/native/external/lapack/slaed3.f
@@ -0,0 +1,265 @@
+      SUBROUTINE SLAED3( K, N, N1, D, Q, LDQ, RHO, DLAMDA, Q2, INDX,
+     $                   CTOT, W, S, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
+*     Courant Institute, NAG Ltd., and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, K, LDQ, N, N1
+      REAL               RHO
+*     ..
+*     .. Array Arguments ..
+      INTEGER            CTOT( * ), INDX( * )
+      REAL               D( * ), DLAMDA( * ), Q( LDQ, * ), Q2( * ),
+     $                   S( * ), W( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLAED3 finds the roots of the secular equation, as defined by the
+*  values in D, W, and RHO, between 1 and K.  It makes the
+*  appropriate calls to SLAED4 and then updates the eigenvectors by
+*  multiplying the matrix of eigenvectors of the pair of eigensystems
+*  being combined by the matrix of eigenvectors of the K-by-K system
+*  which is solved here.
+*
+*  This code makes very mild assumptions about floating point
+*  arithmetic. It will work on machines with a guard digit in
+*  add/subtract, or on those binary machines without guard digits
+*  which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2.
+*  It could conceivably fail on hexadecimal or decimal machines
+*  without guard digits, but we know of none.
+*
+*  Arguments
+*  =========
+*
+*  K       (input) INTEGER
+*          The number of terms in the rational function to be solved by
+*          SLAED4.  K >= 0.
+*
+*  N       (input) INTEGER
+*          The number of rows and columns in the Q matrix.
+*          N >= K (deflation may result in N>K).
+*
+*  N1      (input) INTEGER
+*          The location of the last eigenvalue in the leading submatrix.
+*          min(1,N) <= N1 <= N/2.
+*
+*  D       (output) REAL array, dimension (N)
+*          D(I) contains the updated eigenvalues for
+*          1 <= I <= K.
+*
+*  Q       (output) REAL array, dimension (LDQ,N)
+*          Initially the first K columns are used as workspace.
+*          On output the columns 1 to K contain
+*          the updated eigenvectors.
+*
+*  LDQ     (input) INTEGER
+*          The leading dimension of the array Q.  LDQ >= max(1,N).
+*
+*  RHO     (input) REAL
+*          The value of the parameter in the rank one update equation.
+*          RHO >= 0 required.
+*
+*  DLAMDA  (input/output) REAL array, dimension (K)
+*          The first K elements of this array contain the old roots
+*          of the deflated updating problem.  These are the poles
+*          of the secular equation. May be changed on output by
+*          having lowest order bit set to zero on Cray X-MP, Cray Y-MP,
+*          Cray-2, or Cray C-90, as described above.
+*
+*  Q2      (input) REAL array, dimension (LDQ2, N)
+*          The first K columns of this matrix contain the non-deflated
+*          eigenvectors for the split problem.
+*
+*  INDX    (input) INTEGER array, dimension (N)
+*          The permutation used to arrange the columns of the deflated
+*          Q matrix into three groups (see SLAED2).
+*          The rows of the eigenvectors found by SLAED4 must be likewise
+*          permuted before the matrix multiply can take place.
+*
+*  CTOT    (input) INTEGER array, dimension (4)
+*          A count of the total number of the various types of columns
+*          in Q, as described in INDX.  The fourth column type is any
+*          column which has been deflated.
+*
+*  W       (input/output) REAL array, dimension (K)
+*          The first K elements of this array contain the components
+*          of the deflation-adjusted updating vector. Destroyed on
+*          output.
+*
+*  S       (workspace) REAL array, dimension (N1 + 1)*K
+*          Will contain the eigenvectors of the repaired matrix which
+*          will be multiplied by the previously accumulated eigenvectors
+*          to update the system.
+*
+*  LDS     (input) INTEGER
+*          The leading dimension of S.  LDS >= max(1,K).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          > 0:  if INFO = 1, an eigenvalue did not converge
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Jeff Rutter, Computer Science Division, University of California
+*     at Berkeley, USA
+*  Modified by Francoise Tisseur, University of Tennessee.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E0, ZERO = 0.0E0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, II, IQ2, J, N12, N2, N23
+      REAL               TEMP
+*     ..
+*     .. External Functions ..
+      REAL               SLAMC3, SNRM2
+      EXTERNAL           SLAMC3, SNRM2
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SCOPY, SGEMM, SLACPY, SLAED4, SLASET, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, SIGN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+*
+      IF( K.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.K ) THEN
+         INFO = -2
+      ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN
+         INFO = -6
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SLAED3', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( K.EQ.0 )
+     $   RETURN
+*
+*     Modify values DLAMDA(i) to make sure all DLAMDA(i)-DLAMDA(j) can
+*     be computed with high relative accuracy (barring over/underflow).
+*     This is a problem on machines without a guard digit in
+*     add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2).
+*     The following code replaces DLAMDA(I) by 2*DLAMDA(I)-DLAMDA(I),
+*     which on any of these machines zeros out the bottommost
+*     bit of DLAMDA(I) if it is 1; this makes the subsequent
+*     subtractions DLAMDA(I)-DLAMDA(J) unproblematic when cancellation
+*     occurs. On binary machines with a guard digit (almost all
+*     machines) it does not change DLAMDA(I) at all. On hexadecimal
+*     and decimal machines with a guard digit, it slightly
+*     changes the bottommost bits of DLAMDA(I). It does not account
+*     for hexadecimal or decimal machines without guard digits
+*     (we know of none). We use a subroutine call to compute
+*     2*DLAMBDA(I) to prevent optimizing compilers from eliminating
+*     this code.
+*
+      DO 10 I = 1, K
+         DLAMDA( I ) = SLAMC3( DLAMDA( I ), DLAMDA( I ) ) - DLAMDA( I )
+   10 CONTINUE
+*
+      DO 20 J = 1, K
+         CALL SLAED4( K, J, DLAMDA, W, Q( 1, J ), RHO, D( J ), INFO )
+*
+*        If the zero finder fails, the computation is terminated.
+*
+         IF( INFO.NE.0 )
+     $      GO TO 120
+   20 CONTINUE
+*
+      IF( K.EQ.1 )
+     $   GO TO 110
+      IF( K.EQ.2 ) THEN
+         DO 30 J = 1, K
+            W( 1 ) = Q( 1, J )
+            W( 2 ) = Q( 2, J )
+            II = INDX( 1 )
+            Q( 1, J ) = W( II )
+            II = INDX( 2 )
+            Q( 2, J ) = W( II )
+   30    CONTINUE
+         GO TO 110
+      END IF
+*
+*     Compute updated W.
+*
+      CALL SCOPY( K, W, 1, S, 1 )
+*
+*     Initialize W(I) = Q(I,I)
+*
+      CALL SCOPY( K, Q, LDQ+1, W, 1 )
+      DO 60 J = 1, K
+         DO 40 I = 1, J - 1
+            W( I ) = W( I )*( Q( I, J ) / ( DLAMDA( I )-DLAMDA( J ) ) )
+   40    CONTINUE
+         DO 50 I = J + 1, K
+            W( I ) = W( I )*( Q( I, J ) / ( DLAMDA( I )-DLAMDA( J ) ) )
+   50    CONTINUE
+   60 CONTINUE
+      DO 70 I = 1, K
+         W( I ) = SIGN( SQRT( -W( I ) ), S( I ) )
+   70 CONTINUE
+*
+*     Compute eigenvectors of the modified rank-1 modification.
+*
+      DO 100 J = 1, K
+         DO 80 I = 1, K
+            S( I ) = W( I ) / Q( I, J )
+   80    CONTINUE
+         TEMP = SNRM2( K, S, 1 )
+         DO 90 I = 1, K
+            II = INDX( I )
+            Q( I, J ) = S( II ) / TEMP
+   90    CONTINUE
+  100 CONTINUE
+*
+*     Compute the updated eigenvectors.
+*
+  110 CONTINUE
+*
+      N2 = N - N1
+      N12 = CTOT( 1 ) + CTOT( 2 )
+      N23 = CTOT( 2 ) + CTOT( 3 )
+*
+      CALL SLACPY( 'A', N23, K, Q( CTOT( 1 )+1, 1 ), LDQ, S, N23 )
+      IQ2 = N1*N12 + 1
+      IF( N23.NE.0 ) THEN
+         CALL SGEMM( 'N', 'N', N2, K, N23, ONE, Q2( IQ2 ), N2, S, N23,
+     $               ZERO, Q( N1+1, 1 ), LDQ )
+      ELSE
+         CALL SLASET( 'A', N2, K, ZERO, ZERO, Q( N1+1, 1 ), LDQ )
+      END IF
+*
+      CALL SLACPY( 'A', N12, K, Q, LDQ, S, N12 )
+      IF( N12.NE.0 ) THEN
+         CALL SGEMM( 'N', 'N', N1, K, N12, ONE, Q2, N1, S, N12, ZERO, Q,
+     $               LDQ )
+      ELSE
+         CALL SLASET( 'A', N1, K, ZERO, ZERO, Q( 1, 1 ), LDQ )
+      END IF
+*
+*
+  120 CONTINUE
+      RETURN
+*
+*     End of SLAED3
+*
+      END
diff --git a/costa/native/external/lapack/slaed4.f b/costa/native/external/lapack/slaed4.f
new file mode 100644
index 000000000..d091585e2
--- /dev/null
+++ b/costa/native/external/lapack/slaed4.f
@@ -0,0 +1,846 @@
+      SUBROUTINE SLAED4( N, I, D, Z, DELTA, RHO, DLAM, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
+*     Courant Institute, NAG Ltd., and Rice University
+*     December 23, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            I, INFO, N
+      REAL               DLAM, RHO
+*     ..
+*     .. Array Arguments ..
+      REAL               D( * ), DELTA( * ), Z( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  This subroutine computes the I-th updated eigenvalue of a symmetric
+*  rank-one modification to a diagonal matrix whose elements are
+*  given in the array d, and that
+*
+*             D(i) < D(j)  for  i < j
+*
+*  and that RHO > 0.  This is arranged by the calling routine, and is
+*  no loss in generality.  The rank-one modified system is thus
+*
+*             diag( D )  +  RHO *  Z * Z_transpose.
+*
+*  where we assume the Euclidean norm of Z is 1.
+*
+*  The method consists of approximating the rational functions in the
+*  secular equation by simpler interpolating rational functions.
+*
+*  Arguments
+*  =========
+*
+*  N      (input) INTEGER
+*         The length of all arrays.
+*
+*  I      (input) INTEGER
+*         The index of the eigenvalue to be computed.  1 <= I <= N.
+*
+*  D      (input) REAL array, dimension (N)
+*         The original eigenvalues.  It is assumed that they are in
+*         order, D(I) < D(J)  for I < J.
+*
+*  Z      (input) REAL array, dimension (N)
+*         The components of the updating vector.
+*
+*  DELTA  (output) REAL array, dimension (N)
+*         If N .ne. 1, DELTA contains (D(j) - lambda_I) in its  j-th
+*         component.  If N = 1, then DELTA(1) = 1.  The vector DELTA
+*         contains the information necessary to construct the
+*         eigenvectors.
+*
+*  RHO    (input) REAL
+*         The scalar in the symmetric updating formula.
+*
+*  DLAM   (output) REAL
+*         The computed lambda_I, the I-th updated eigenvalue.
+*
+*  INFO   (output) INTEGER
+*         = 0:  successful exit
+*         > 0:  if INFO = 1, the updating process failed.
+*
+*  Internal Parameters
+*  ===================
+*
+*  Logical variable ORGATI (origin-at-i?) is used for distinguishing
+*  whether D(i) or D(i+1) is treated as the origin.
+*
+*            ORGATI = .true.    origin at i
+*            ORGATI = .false.   origin at i+1
+*
+*   Logical variable SWTCH3 (switch-for-3-poles?) is for noting
+*   if we are working with THREE poles!
+*
+*   MAXIT is the maximum number of iterations allowed for each
+*   eigenvalue.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Ren-Cang Li, Computer Science Division, University of California
+*     at Berkeley, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            MAXIT
+      PARAMETER          ( MAXIT = 30 )
+      REAL               ZERO, ONE, TWO, THREE, FOUR, EIGHT, TEN
+      PARAMETER          ( ZERO = 0.0E0, ONE = 1.0E0, TWO = 2.0E0,
+     $                   THREE = 3.0E0, FOUR = 4.0E0, EIGHT = 8.0E0,
+     $                   TEN = 10.0E0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            ORGATI, SWTCH, SWTCH3
+      INTEGER            II, IIM1, IIP1, IP1, ITER, J, NITER
+      REAL               A, B, C, DEL, DLTLB, DLTUB, DPHI, DPSI, DW,
+     $                   EPS, ERRETM, ETA, MIDPT, PHI, PREW, PSI,
+     $                   RHOINV, TAU, TEMP, TEMP1, W
+*     ..
+*     .. Local Arrays ..
+      REAL               ZZ( 3 )
+*     ..
+*     .. External Functions ..
+      REAL               SLAMCH
+      EXTERNAL           SLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLAED5, SLAED6
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Since this routine is called in an inner loop, we do no argument
+*     checking.
+*
+*     Quick return for N=1 and 2.
+*
+      INFO = 0
+      IF( N.EQ.1 ) THEN
+*
+*         Presumably, I=1 upon entry
+*
+         DLAM = D( 1 ) + RHO*Z( 1 )*Z( 1 )
+         DELTA( 1 ) = ONE
+         RETURN
+      END IF
+      IF( N.EQ.2 ) THEN
+         CALL SLAED5( I, D, Z, DELTA, RHO, DLAM )
+         RETURN
+      END IF
+*
+*     Compute machine epsilon
+*
+      EPS = SLAMCH( 'Epsilon' )
+      RHOINV = ONE / RHO
+*
+*     The case I = N
+*
+      IF( I.EQ.N ) THEN
+*
+*        Initialize some basic variables
+*
+         II = N - 1
+         NITER = 1
+*
+*        Calculate initial guess
+*
+         MIDPT = RHO / TWO
+*
+*        If ||Z||_2 is not one, then TEMP should be set to
+*        RHO * ||Z||_2^2 / TWO
+*
+         DO 10 J = 1, N
+            DELTA( J ) = ( D( J )-D( I ) ) - MIDPT
+   10    CONTINUE
+*
+         PSI = ZERO
+         DO 20 J = 1, N - 2
+            PSI = PSI + Z( J )*Z( J ) / DELTA( J )
+   20    CONTINUE
+*
+         C = RHOINV + PSI
+         W = C + Z( II )*Z( II ) / DELTA( II ) +
+     $       Z( N )*Z( N ) / DELTA( N )
+*
+         IF( W.LE.ZERO ) THEN
+            TEMP = Z( N-1 )*Z( N-1 ) / ( D( N )-D( N-1 )+RHO ) +
+     $             Z( N )*Z( N ) / RHO
+            IF( C.LE.TEMP ) THEN
+               TAU = RHO
+            ELSE
+               DEL = D( N ) - D( N-1 )
+               A = -C*DEL + Z( N-1 )*Z( N-1 ) + Z( N )*Z( N )
+               B = Z( N )*Z( N )*DEL
+               IF( A.LT.ZERO ) THEN
+                  TAU = TWO*B / ( SQRT( A*A+FOUR*B*C )-A )
+               ELSE
+                  TAU = ( A+SQRT( A*A+FOUR*B*C ) ) / ( TWO*C )
+               END IF
+            END IF
+*
+*           It can be proved that
+*               D(N)+RHO/2 <= LAMBDA(N) < D(N)+TAU <= D(N)+RHO
+*
+            DLTLB = MIDPT
+            DLTUB = RHO
+         ELSE
+            DEL = D( N ) - D( N-1 )
+            A = -C*DEL + Z( N-1 )*Z( N-1 ) + Z( N )*Z( N )
+            B = Z( N )*Z( N )*DEL
+            IF( A.LT.ZERO ) THEN
+               TAU = TWO*B / ( SQRT( A*A+FOUR*B*C )-A )
+            ELSE
+               TAU = ( A+SQRT( A*A+FOUR*B*C ) ) / ( TWO*C )
+            END IF
+*
+*           It can be proved that
+*               D(N) < D(N)+TAU < LAMBDA(N) < D(N)+RHO/2
+*
+            DLTLB = ZERO
+            DLTUB = MIDPT
+         END IF
+*
+         DO 30 J = 1, N
+            DELTA( J ) = ( D( J )-D( I ) ) - TAU
+   30    CONTINUE
+*
+*        Evaluate PSI and the derivative DPSI
+*
+         DPSI = ZERO
+         PSI = ZERO
+         ERRETM = ZERO
+         DO 40 J = 1, II
+            TEMP = Z( J ) / DELTA( J )
+            PSI = PSI + Z( J )*TEMP
+            DPSI = DPSI + TEMP*TEMP
+            ERRETM = ERRETM + PSI
+   40    CONTINUE
+         ERRETM = ABS( ERRETM )
+*
+*        Evaluate PHI and the derivative DPHI
+*
+         TEMP = Z( N ) / DELTA( N )
+         PHI = Z( N )*TEMP
+         DPHI = TEMP*TEMP
+         ERRETM = EIGHT*( -PHI-PSI ) + ERRETM - PHI + RHOINV +
+     $            ABS( TAU )*( DPSI+DPHI )
+*
+         W = RHOINV + PHI + PSI
+*
+*        Test for convergence
+*
+         IF( ABS( W ).LE.EPS*ERRETM ) THEN
+            DLAM = D( I ) + TAU
+            GO TO 250
+         END IF
+*
+         IF( W.LE.ZERO ) THEN
+            DLTLB = MAX( DLTLB, TAU )
+         ELSE
+            DLTUB = MIN( DLTUB, TAU )
+         END IF
+*
+*        Calculate the new step
+*
+         NITER = NITER + 1
+         C = W - DELTA( N-1 )*DPSI - DELTA( N )*DPHI
+         A = ( DELTA( N-1 )+DELTA( N ) )*W -
+     $       DELTA( N-1 )*DELTA( N )*( DPSI+DPHI )
+         B = DELTA( N-1 )*DELTA( N )*W
+         IF( C.LT.ZERO )
+     $      C = ABS( C )
+         IF( C.EQ.ZERO ) THEN
+*          ETA = B/A
+*           ETA = RHO - TAU
+            ETA = DLTUB - TAU
+         ELSE IF( A.GE.ZERO ) THEN
+            ETA = ( A+SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C )
+         ELSE
+            ETA = TWO*B / ( A-SQRT( ABS( A*A-FOUR*B*C ) ) )
+         END IF
+*
+*        Note, eta should be positive if w is negative, and
+*        eta should be negative otherwise. However,
+*        if for some reason caused by roundoff, eta*w > 0,
+*        we simply use one Newton step instead. This way
+*        will guarantee eta*w < 0.
+*
+         IF( W*ETA.GT.ZERO )
+     $      ETA = -W / ( DPSI+DPHI )
+         TEMP = TAU + ETA
+         IF( TEMP.GT.DLTUB .OR. TEMP.LT.DLTLB ) THEN
+            IF( W.LT.ZERO ) THEN
+               ETA = ( DLTUB-TAU ) / TWO
+            ELSE
+               ETA = ( DLTLB-TAU ) / TWO
+            END IF
+         END IF
+         DO 50 J = 1, N
+            DELTA( J ) = DELTA( J ) - ETA
+   50    CONTINUE
+*
+         TAU = TAU + ETA
+*
+*        Evaluate PSI and the derivative DPSI
+*
+         DPSI = ZERO
+         PSI = ZERO
+         ERRETM = ZERO
+         DO 60 J = 1, II
+            TEMP = Z( J ) / DELTA( J )
+            PSI = PSI + Z( J )*TEMP
+            DPSI = DPSI + TEMP*TEMP
+            ERRETM = ERRETM + PSI
+   60    CONTINUE
+         ERRETM = ABS( ERRETM )
+*
+*        Evaluate PHI and the derivative DPHI
+*
+         TEMP = Z( N ) / DELTA( N )
+         PHI = Z( N )*TEMP
+         DPHI = TEMP*TEMP
+         ERRETM = EIGHT*( -PHI-PSI ) + ERRETM - PHI + RHOINV +
+     $            ABS( TAU )*( DPSI+DPHI )
+*
+         W = RHOINV + PHI + PSI
+*
+*        Main loop to update the values of the array   DELTA
+*
+         ITER = NITER + 1
+*
+         DO 90 NITER = ITER, MAXIT
+*
+*           Test for convergence
+*
+            IF( ABS( W ).LE.EPS*ERRETM ) THEN
+               DLAM = D( I ) + TAU
+               GO TO 250
+            END IF
+*
+            IF( W.LE.ZERO ) THEN
+               DLTLB = MAX( DLTLB, TAU )
+            ELSE
+               DLTUB = MIN( DLTUB, TAU )
+            END IF
+*
+*           Calculate the new step
+*
+            C = W - DELTA( N-1 )*DPSI - DELTA( N )*DPHI
+            A = ( DELTA( N-1 )+DELTA( N ) )*W -
+     $          DELTA( N-1 )*DELTA( N )*( DPSI+DPHI )
+            B = DELTA( N-1 )*DELTA( N )*W
+            IF( A.GE.ZERO ) THEN
+               ETA = ( A+SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C )
+            ELSE
+               ETA = TWO*B / ( A-SQRT( ABS( A*A-FOUR*B*C ) ) )
+            END IF
+*
+*           Note, eta should be positive if w is negative, and
+*           eta should be negative otherwise. However,
+*           if for some reason caused by roundoff, eta*w > 0,
+*           we simply use one Newton step instead. This way
+*           will guarantee eta*w < 0.
+*
+            IF( W*ETA.GT.ZERO )
+     $         ETA = -W / ( DPSI+DPHI )
+            TEMP = TAU + ETA
+            IF( TEMP.GT.DLTUB .OR. TEMP.LT.DLTLB ) THEN
+               IF( W.LT.ZERO ) THEN
+                  ETA = ( DLTUB-TAU ) / TWO
+               ELSE
+                  ETA = ( DLTLB-TAU ) / TWO
+               END IF
+            END IF
+            DO 70 J = 1, N
+               DELTA( J ) = DELTA( J ) - ETA
+   70       CONTINUE
+*
+            TAU = TAU + ETA
+*
+*           Evaluate PSI and the derivative DPSI
+*
+            DPSI = ZERO
+            PSI = ZERO
+            ERRETM = ZERO
+            DO 80 J = 1, II
+               TEMP = Z( J ) / DELTA( J )
+               PSI = PSI + Z( J )*TEMP
+               DPSI = DPSI + TEMP*TEMP
+               ERRETM = ERRETM + PSI
+   80       CONTINUE
+            ERRETM = ABS( ERRETM )
+*
+*           Evaluate PHI and the derivative DPHI
+*
+            TEMP = Z( N ) / DELTA( N )
+            PHI = Z( N )*TEMP
+            DPHI = TEMP*TEMP
+            ERRETM = EIGHT*( -PHI-PSI ) + ERRETM - PHI + RHOINV +
+     $               ABS( TAU )*( DPSI+DPHI )
+*
+            W = RHOINV + PHI + PSI
+   90    CONTINUE
+*
+*        Return with INFO = 1, NITER = MAXIT and not converged
+*
+         INFO = 1
+         DLAM = D( I ) + TAU
+         GO TO 250
+*
+*        End for the case I = N
+*
+      ELSE
+*
+*        The case for I < N
+*
+         NITER = 1
+         IP1 = I + 1
+*
+*        Calculate initial guess
+*
+         DEL = D( IP1 ) - D( I )
+         MIDPT = DEL / TWO
+         DO 100 J = 1, N
+            DELTA( J ) = ( D( J )-D( I ) ) - MIDPT
+  100    CONTINUE
+*
+         PSI = ZERO
+         DO 110 J = 1, I - 1
+            PSI = PSI + Z( J )*Z( J ) / DELTA( J )
+  110    CONTINUE
+*
+         PHI = ZERO
+         DO 120 J = N, I + 2, -1
+            PHI = PHI + Z( J )*Z( J ) / DELTA( J )
+  120    CONTINUE
+         C = RHOINV + PSI + PHI
+         W = C + Z( I )*Z( I ) / DELTA( I ) +
+     $       Z( IP1 )*Z( IP1 ) / DELTA( IP1 )
+*
+         IF( W.GT.ZERO ) THEN
+*
+*           d(i)< the ith eigenvalue < (d(i)+d(i+1))/2
+*
+*           We choose d(i) as origin.
+*
+            ORGATI = .TRUE.
+            A = C*DEL + Z( I )*Z( I ) + Z( IP1 )*Z( IP1 )
+            B = Z( I )*Z( I )*DEL
+            IF( A.GT.ZERO ) THEN
+               TAU = TWO*B / ( A+SQRT( ABS( A*A-FOUR*B*C ) ) )
+            ELSE
+               TAU = ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C )
+            END IF
+            DLTLB = ZERO
+            DLTUB = MIDPT
+         ELSE
+*
+*           (d(i)+d(i+1))/2 <= the ith eigenvalue < d(i+1)
+*
+*           We choose d(i+1) as origin.
+*
+            ORGATI = .FALSE.
+            A = C*DEL - Z( I )*Z( I ) - Z( IP1 )*Z( IP1 )
+            B = Z( IP1 )*Z( IP1 )*DEL
+            IF( A.LT.ZERO ) THEN
+               TAU = TWO*B / ( A-SQRT( ABS( A*A+FOUR*B*C ) ) )
+            ELSE
+               TAU = -( A+SQRT( ABS( A*A+FOUR*B*C ) ) ) / ( TWO*C )
+            END IF
+            DLTLB = -MIDPT
+            DLTUB = ZERO
+         END IF
+*
+         IF( ORGATI ) THEN
+            DO 130 J = 1, N
+               DELTA( J ) = ( D( J )-D( I ) ) - TAU
+  130       CONTINUE
+         ELSE
+            DO 140 J = 1, N
+               DELTA( J ) = ( D( J )-D( IP1 ) ) - TAU
+  140       CONTINUE
+         END IF
+         IF( ORGATI ) THEN
+            II = I
+         ELSE
+            II = I + 1
+         END IF
+         IIM1 = II - 1
+         IIP1 = II + 1
+*
+*        Evaluate PSI and the derivative DPSI
+*
+         DPSI = ZERO
+         PSI = ZERO
+         ERRETM = ZERO
+         DO 150 J = 1, IIM1
+            TEMP = Z( J ) / DELTA( J )
+            PSI = PSI + Z( J )*TEMP
+            DPSI = DPSI + TEMP*TEMP
+            ERRETM = ERRETM + PSI
+  150    CONTINUE
+         ERRETM = ABS( ERRETM )
+*
+*        Evaluate PHI and the derivative DPHI
+*
+         DPHI = ZERO
+         PHI = ZERO
+         DO 160 J = N, IIP1, -1
+            TEMP = Z( J ) / DELTA( J )
+            PHI = PHI + Z( J )*TEMP
+            DPHI = DPHI + TEMP*TEMP
+            ERRETM = ERRETM + PHI
+  160    CONTINUE
+*
+         W = RHOINV + PHI + PSI
+*
+*        W is the value of the secular function with
+*        its ii-th element removed.
+*
+         SWTCH3 = .FALSE.
+         IF( ORGATI ) THEN
+            IF( W.LT.ZERO )
+     $         SWTCH3 = .TRUE.
+         ELSE
+            IF( W.GT.ZERO )
+     $         SWTCH3 = .TRUE.
+         END IF
+         IF( II.EQ.1 .OR. II.EQ.N )
+     $      SWTCH3 = .FALSE.
+*
+         TEMP = Z( II ) / DELTA( II )
+         DW = DPSI + DPHI + TEMP*TEMP
+         TEMP = Z( II )*TEMP
+         W = W + TEMP
+         ERRETM = EIGHT*( PHI-PSI ) + ERRETM + TWO*RHOINV +
+     $            THREE*ABS( TEMP ) + ABS( TAU )*DW
+*
+*        Test for convergence
+*
+         IF( ABS( W ).LE.EPS*ERRETM ) THEN
+            IF( ORGATI ) THEN
+               DLAM = D( I ) + TAU
+            ELSE
+               DLAM = D( IP1 ) + TAU
+            END IF
+            GO TO 250
+         END IF
+*
+         IF( W.LE.ZERO ) THEN
+            DLTLB = MAX( DLTLB, TAU )
+         ELSE
+            DLTUB = MIN( DLTUB, TAU )
+         END IF
+*
+*        Calculate the new step
+*
+         NITER = NITER + 1
+         IF( .NOT.SWTCH3 ) THEN
+            IF( ORGATI ) THEN
+               C = W - DELTA( IP1 )*DW - ( D( I )-D( IP1 ) )*
+     $             ( Z( I ) / DELTA( I ) )**2
+            ELSE
+               C = W - DELTA( I )*DW - ( D( IP1 )-D( I ) )*
+     $             ( Z( IP1 ) / DELTA( IP1 ) )**2
+            END IF
+            A = ( DELTA( I )+DELTA( IP1 ) )*W -
+     $          DELTA( I )*DELTA( IP1 )*DW
+            B = DELTA( I )*DELTA( IP1 )*W
+            IF( C.EQ.ZERO ) THEN
+               IF( A.EQ.ZERO ) THEN
+                  IF( ORGATI ) THEN
+                     A = Z( I )*Z( I ) + DELTA( IP1 )*DELTA( IP1 )*
+     $                   ( DPSI+DPHI )
+                  ELSE
+                     A = Z( IP1 )*Z( IP1 ) + DELTA( I )*DELTA( I )*
+     $                   ( DPSI+DPHI )
+                  END IF
+               END IF
+               ETA = B / A
+            ELSE IF( A.LE.ZERO ) THEN
+               ETA = ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C )
+            ELSE
+               ETA = TWO*B / ( A+SQRT( ABS( A*A-FOUR*B*C ) ) )
+            END IF
+         ELSE
+*
+*           Interpolation using THREE most relevant poles
+*
+            TEMP = RHOINV + PSI + PHI
+            IF( ORGATI ) THEN
+               TEMP1 = Z( IIM1 ) / DELTA( IIM1 )
+               TEMP1 = TEMP1*TEMP1
+               C = TEMP - DELTA( IIP1 )*( DPSI+DPHI ) -
+     $             ( D( IIM1 )-D( IIP1 ) )*TEMP1
+               ZZ( 1 ) = Z( IIM1 )*Z( IIM1 )
+               ZZ( 3 ) = DELTA( IIP1 )*DELTA( IIP1 )*
+     $                   ( ( DPSI-TEMP1 )+DPHI )
+            ELSE
+               TEMP1 = Z( IIP1 ) / DELTA( IIP1 )
+               TEMP1 = TEMP1*TEMP1
+               C = TEMP - DELTA( IIM1 )*( DPSI+DPHI ) -
+     $             ( D( IIP1 )-D( IIM1 ) )*TEMP1
+               ZZ( 1 ) = DELTA( IIM1 )*DELTA( IIM1 )*
+     $                   ( DPSI+( DPHI-TEMP1 ) )
+               ZZ( 3 ) = Z( IIP1 )*Z( IIP1 )
+            END IF
+            ZZ( 2 ) = Z( II )*Z( II )
+            CALL SLAED6( NITER, ORGATI, C, DELTA( IIM1 ), ZZ, W, ETA,
+     $                   INFO )
+            IF( INFO.NE.0 )
+     $         GO TO 250
+         END IF
+*
+*        Note, eta should be positive if w is negative, and
+*        eta should be negative otherwise. However,
+*        if for some reason caused by roundoff, eta*w > 0,
+*        we simply use one Newton step instead. This way
+*        will guarantee eta*w < 0.
+*
+         IF( W*ETA.GE.ZERO )
+     $      ETA = -W / DW
+         TEMP = TAU + ETA
+         IF( TEMP.GT.DLTUB .OR. TEMP.LT.DLTLB ) THEN
+            IF( W.LT.ZERO ) THEN
+               ETA = ( DLTUB-TAU ) / TWO
+            ELSE
+               ETA = ( DLTLB-TAU ) / TWO
+            END IF
+         END IF
+*
+         PREW = W
+*
+  170    CONTINUE
+         DO 180 J = 1, N
+            DELTA( J ) = DELTA( J ) - ETA
+  180    CONTINUE
+*
+*        Evaluate PSI and the derivative DPSI
+*
+         DPSI = ZERO
+         PSI = ZERO
+         ERRETM = ZERO
+         DO 190 J = 1, IIM1
+            TEMP = Z( J ) / DELTA( J )
+            PSI = PSI + Z( J )*TEMP
+            DPSI = DPSI + TEMP*TEMP
+            ERRETM = ERRETM + PSI
+  190    CONTINUE
+         ERRETM = ABS( ERRETM )
+*
+*        Evaluate PHI and the derivative DPHI
+*
+         DPHI = ZERO
+         PHI = ZERO
+         DO 200 J = N, IIP1, -1
+            TEMP = Z( J ) / DELTA( J )
+            PHI = PHI + Z( J )*TEMP
+            DPHI = DPHI + TEMP*TEMP
+            ERRETM = ERRETM + PHI
+  200    CONTINUE
+*
+         TEMP = Z( II ) / DELTA( II )
+         DW = DPSI + DPHI + TEMP*TEMP
+         TEMP = Z( II )*TEMP
+         W = RHOINV + PHI + PSI + TEMP
+         ERRETM = EIGHT*( PHI-PSI ) + ERRETM + TWO*RHOINV +
+     $            THREE*ABS( TEMP ) + ABS( TAU+ETA )*DW
+*
+         SWTCH = .FALSE.
+         IF( ORGATI ) THEN
+            IF( -W.GT.ABS( PREW ) / TEN )
+     $         SWTCH = .TRUE.
+         ELSE
+            IF( W.GT.ABS( PREW ) / TEN )
+     $         SWTCH = .TRUE.
+         END IF
+*
+         TAU = TAU + ETA
+*
+*        Main loop to update the values of the array   DELTA
+*
+         ITER = NITER + 1
+*
+         DO 240 NITER = ITER, MAXIT
+*
+*           Test for convergence
+*
+            IF( ABS( W ).LE.EPS*ERRETM ) THEN
+               IF( ORGATI ) THEN
+                  DLAM = D( I ) + TAU
+               ELSE
+                  DLAM = D( IP1 ) + TAU
+               END IF
+               GO TO 250
+            END IF
+*
+            IF( W.LE.ZERO ) THEN
+               DLTLB = MAX( DLTLB, TAU )
+            ELSE
+               DLTUB = MIN( DLTUB, TAU )
+            END IF
+*
+*           Calculate the new step
+*
+            IF( .NOT.SWTCH3 ) THEN
+               IF( .NOT.SWTCH ) THEN
+                  IF( ORGATI ) THEN
+                     C = W - DELTA( IP1 )*DW -
+     $                   ( D( I )-D( IP1 ) )*( Z( I ) / DELTA( I ) )**2
+                  ELSE
+                     C = W - DELTA( I )*DW - ( D( IP1 )-D( I ) )*
+     $                   ( Z( IP1 ) / DELTA( IP1 ) )**2
+                  END IF
+               ELSE
+                  TEMP = Z( II ) / DELTA( II )
+                  IF( ORGATI ) THEN
+                     DPSI = DPSI + TEMP*TEMP
+                  ELSE
+                     DPHI = DPHI + TEMP*TEMP
+                  END IF
+                  C = W - DELTA( I )*DPSI - DELTA( IP1 )*DPHI
+               END IF
+               A = ( DELTA( I )+DELTA( IP1 ) )*W -
+     $             DELTA( I )*DELTA( IP1 )*DW
+               B = DELTA( I )*DELTA( IP1 )*W
+               IF( C.EQ.ZERO ) THEN
+                  IF( A.EQ.ZERO ) THEN
+                     IF( .NOT.SWTCH ) THEN
+                        IF( ORGATI ) THEN
+                           A = Z( I )*Z( I ) + DELTA( IP1 )*
+     $                         DELTA( IP1 )*( DPSI+DPHI )
+                        ELSE
+                           A = Z( IP1 )*Z( IP1 ) +
+     $                         DELTA( I )*DELTA( I )*( DPSI+DPHI )
+                        END IF
+                     ELSE
+                        A = DELTA( I )*DELTA( I )*DPSI +
+     $                      DELTA( IP1 )*DELTA( IP1 )*DPHI
+                     END IF
+                  END IF
+                  ETA = B / A
+               ELSE IF( A.LE.ZERO ) THEN
+                  ETA = ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C )
+               ELSE
+                  ETA = TWO*B / ( A+SQRT( ABS( A*A-FOUR*B*C ) ) )
+               END IF
+            ELSE
+*
+*              Interpolation using THREE most relevant poles
+*
+               TEMP = RHOINV + PSI + PHI
+               IF( SWTCH ) THEN
+                  C = TEMP - DELTA( IIM1 )*DPSI - DELTA( IIP1 )*DPHI
+                  ZZ( 1 ) = DELTA( IIM1 )*DELTA( IIM1 )*DPSI
+                  ZZ( 3 ) = DELTA( IIP1 )*DELTA( IIP1 )*DPHI
+               ELSE
+                  IF( ORGATI ) THEN
+                     TEMP1 = Z( IIM1 ) / DELTA( IIM1 )
+                     TEMP1 = TEMP1*TEMP1
+                     C = TEMP - DELTA( IIP1 )*( DPSI+DPHI ) -
+     $                   ( D( IIM1 )-D( IIP1 ) )*TEMP1
+                     ZZ( 1 ) = Z( IIM1 )*Z( IIM1 )
+                     ZZ( 3 ) = DELTA( IIP1 )*DELTA( IIP1 )*
+     $                         ( ( DPSI-TEMP1 )+DPHI )
+                  ELSE
+                     TEMP1 = Z( IIP1 ) / DELTA( IIP1 )
+                     TEMP1 = TEMP1*TEMP1
+                     C = TEMP - DELTA( IIM1 )*( DPSI+DPHI ) -
+     $                   ( D( IIP1 )-D( IIM1 ) )*TEMP1
+                     ZZ( 1 ) = DELTA( IIM1 )*DELTA( IIM1 )*
+     $                         ( DPSI+( DPHI-TEMP1 ) )
+                     ZZ( 3 ) = Z( IIP1 )*Z( IIP1 )
+                  END IF
+               END IF
+               CALL SLAED6( NITER, ORGATI, C, DELTA( IIM1 ), ZZ, W, ETA,
+     $                      INFO )
+               IF( INFO.NE.0 )
+     $            GO TO 250
+            END IF
+*
+*           Note, eta should be positive if w is negative, and
+*           eta should be negative otherwise. However,
+*           if for some reason caused by roundoff, eta*w > 0,
+*           we simply use one Newton step instead. This way
+*           will guarantee eta*w < 0.
+*
+            IF( W*ETA.GE.ZERO )
+     $         ETA = -W / DW
+            TEMP = TAU + ETA
+            IF( TEMP.GT.DLTUB .OR. TEMP.LT.DLTLB ) THEN
+               IF( W.LT.ZERO ) THEN
+                  ETA = ( DLTUB-TAU ) / TWO
+               ELSE
+                  ETA = ( DLTLB-TAU ) / TWO
+               END IF
+            END IF
+*
+            DO 210 J = 1, N
+               DELTA( J ) = DELTA( J ) - ETA
+  210       CONTINUE
+*
+            TAU = TAU + ETA
+            PREW = W
+*
+*           Evaluate PSI and the derivative DPSI
+*
+            DPSI = ZERO
+            PSI = ZERO
+            ERRETM = ZERO
+            DO 220 J = 1, IIM1
+               TEMP = Z( J ) / DELTA( J )
+               PSI = PSI + Z( J )*TEMP
+               DPSI = DPSI + TEMP*TEMP
+               ERRETM = ERRETM + PSI
+  220       CONTINUE
+            ERRETM = ABS( ERRETM )
+*
+*           Evaluate PHI and the derivative DPHI
+*
+            DPHI = ZERO
+            PHI = ZERO
+            DO 230 J = N, IIP1, -1
+               TEMP = Z( J ) / DELTA( J )
+               PHI = PHI + Z( J )*TEMP
+               DPHI = DPHI + TEMP*TEMP
+               ERRETM = ERRETM + PHI
+  230       CONTINUE
+*
+            TEMP = Z( II ) / DELTA( II )
+            DW = DPSI + DPHI + TEMP*TEMP
+            TEMP = Z( II )*TEMP
+            W = RHOINV + PHI + PSI + TEMP
+            ERRETM = EIGHT*( PHI-PSI ) + ERRETM + TWO*RHOINV +
+     $               THREE*ABS( TEMP ) + ABS( TAU )*DW
+            IF( W*PREW.GT.ZERO .AND. ABS( W ).GT.ABS( PREW ) / TEN )
+     $         SWTCH = .NOT.SWTCH
+*
+  240    CONTINUE
+*
+*        Return with INFO = 1, NITER = MAXIT and not converged
+*
+         INFO = 1
+         IF( ORGATI ) THEN
+            DLAM = D( I ) + TAU
+         ELSE
+            DLAM = D( IP1 ) + TAU
+         END IF
+*
+      END IF
+*
+  250 CONTINUE
+*
+      RETURN
+*
+*     End of SLAED4
+*
+      END
diff --git a/costa/native/external/lapack/slaed5.f b/costa/native/external/lapack/slaed5.f
new file mode 100644
index 000000000..b92934132
--- /dev/null
+++ b/costa/native/external/lapack/slaed5.f
@@ -0,0 +1,125 @@
+      SUBROUTINE SLAED5( I, D, Z, DELTA, RHO, DLAM )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
+*     Courant Institute, NAG Ltd., and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      INTEGER            I
+      REAL               DLAM, RHO
+*     ..
+*     .. Array Arguments ..
+      REAL               D( 2 ), DELTA( 2 ), Z( 2 )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  This subroutine computes the I-th eigenvalue of a symmetric rank-one
+*  modification of a 2-by-2 diagonal matrix
+*
+*             diag( D )  +  RHO *  Z * transpose(Z) .
+*
+*  The diagonal elements in the array D are assumed to satisfy
+*
+*             D(i) < D(j)  for  i < j .
+*
+*  We also assume RHO > 0 and that the Euclidean norm of the vector
+*  Z is one.
+*
+*  Arguments
+*  =========
+*
+*  I      (input) INTEGER
+*         The index of the eigenvalue to be computed.  I = 1 or I = 2.
+*
+*  D      (input) REAL array, dimension (2)
+*         The original eigenvalues.  We assume D(1) < D(2).
+*
+*  Z      (input) REAL array, dimension (2)
+*         The components of the updating vector.
+*
+*  DELTA  (output) REAL array, dimension (2)
+*         The vector DELTA contains the information necessary
+*         to construct the eigenvectors.
+*
+*  RHO    (input) REAL
+*         The scalar in the symmetric updating formula.
+*
+*  DLAM   (output) REAL
+*         The computed lambda_I, the I-th updated eigenvalue.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Ren-Cang Li, Computer Science Division, University of California
+*     at Berkeley, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE, TWO, FOUR
+      PARAMETER          ( ZERO = 0.0E0, ONE = 1.0E0, TWO = 2.0E0,
+     $                   FOUR = 4.0E0 )
+*     ..
+*     .. Local Scalars ..
+      REAL               B, C, DEL, TAU, TEMP, W
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+      DEL = D( 2 ) - D( 1 )
+      IF( I.EQ.1 ) THEN
+         W = ONE + TWO*RHO*( Z( 2 )*Z( 2 )-Z( 1 )*Z( 1 ) ) / DEL
+         IF( W.GT.ZERO ) THEN
+            B = DEL + RHO*( Z( 1 )*Z( 1 )+Z( 2 )*Z( 2 ) )
+            C = RHO*Z( 1 )*Z( 1 )*DEL
+*
+*           B > ZERO, always
+*
+            TAU = TWO*C / ( B+SQRT( ABS( B*B-FOUR*C ) ) )
+            DLAM = D( 1 ) + TAU
+            DELTA( 1 ) = -Z( 1 ) / TAU
+            DELTA( 2 ) = Z( 2 ) / ( DEL-TAU )
+         ELSE
+            B = -DEL + RHO*( Z( 1 )*Z( 1 )+Z( 2 )*Z( 2 ) )
+            C = RHO*Z( 2 )*Z( 2 )*DEL
+            IF( B.GT.ZERO ) THEN
+               TAU = -TWO*C / ( B+SQRT( B*B+FOUR*C ) )
+            ELSE
+               TAU = ( B-SQRT( B*B+FOUR*C ) ) / TWO
+            END IF
+            DLAM = D( 2 ) + TAU
+            DELTA( 1 ) = -Z( 1 ) / ( DEL+TAU )
+            DELTA( 2 ) = -Z( 2 ) / TAU
+         END IF
+         TEMP = SQRT( DELTA( 1 )*DELTA( 1 )+DELTA( 2 )*DELTA( 2 ) )
+         DELTA( 1 ) = DELTA( 1 ) / TEMP
+         DELTA( 2 ) = DELTA( 2 ) / TEMP
+      ELSE
+*
+*     Now I=2
+*
+         B = -DEL + RHO*( Z( 1 )*Z( 1 )+Z( 2 )*Z( 2 ) )
+         C = RHO*Z( 2 )*Z( 2 )*DEL
+         IF( B.GT.ZERO ) THEN
+            TAU = ( B+SQRT( B*B+FOUR*C ) ) / TWO
+         ELSE
+            TAU = TWO*C / ( -B+SQRT( B*B+FOUR*C ) )
+         END IF
+         DLAM = D( 2 ) + TAU
+         DELTA( 1 ) = -Z( 1 ) / ( DEL+TAU )
+         DELTA( 2 ) = -Z( 2 ) / TAU
+         TEMP = SQRT( DELTA( 1 )*DELTA( 1 )+DELTA( 2 )*DELTA( 2 ) )
+         DELTA( 1 ) = DELTA( 1 ) / TEMP
+         DELTA( 2 ) = DELTA( 2 ) / TEMP
+      END IF
+      RETURN
+*
+*     End OF SLAED5
+*
+      END
diff --git a/costa/native/external/lapack/slaed6.f b/costa/native/external/lapack/slaed6.f
new file mode 100644
index 000000000..d3ec92450
--- /dev/null
+++ b/costa/native/external/lapack/slaed6.f
@@ -0,0 +1,299 @@
+      SUBROUTINE SLAED6( KNITER, ORGATI, RHO, D, Z, FINIT, TAU, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
+*     Courant Institute, NAG Ltd., and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      LOGICAL            ORGATI
+      INTEGER            INFO, KNITER
+      REAL               FINIT, RHO, TAU
+*     ..
+*     .. Array Arguments ..
+      REAL               D( 3 ), Z( 3 )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLAED6 computes the positive or negative root (closest to the origin)
+*  of
+*                   z(1)        z(2)        z(3)
+*  f(x) =   rho + --------- + ---------- + ---------
+*                  d(1)-x      d(2)-x      d(3)-x
+*
+*  It is assumed that
+*
+*        if ORGATI = .true. the root is between d(2) and d(3);
+*        otherwise it is between d(1) and d(2)
+*
+*  This routine will be called by SLAED4 when necessary. In most cases,
+*  the root sought is the smallest in magnitude, though it might not be
+*  in some extremely rare situations.
+*
+*  Arguments
+*  =========
+*
+*  KNITER       (input) INTEGER
+*               Refer to SLAED4 for its significance.
+*
+*  ORGATI       (input) LOGICAL
+*               If ORGATI is true, the needed root is between d(2) and
+*               d(3); otherwise it is between d(1) and d(2).  See
+*               SLAED4 for further details.
+*
+*  RHO          (input) REAL
+*               Refer to the equation f(x) above.
+*
+*  D            (input) REAL array, dimension (3)
+*               D satisfies d(1) < d(2) < d(3).
+*
+*  Z            (input) REAL array, dimension (3)
+*               Each of the elements in z must be positive.
+*
+*  FINIT        (input) REAL
+*               The value of f at 0. It is more accurate than the one
+*               evaluated inside this routine (if someone wants to do
+*               so).
+*
+*  TAU          (output) REAL
+*               The root of the equation f(x).
+*
+*  INFO         (output) INTEGER
+*               = 0: successful exit
+*               > 0: if INFO = 1, failure to converge
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Ren-Cang Li, Computer Science Division, University of California
+*     at Berkeley, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            MAXIT
+      PARAMETER          ( MAXIT = 20 )
+      REAL               ZERO, ONE, TWO, THREE, FOUR, EIGHT
+      PARAMETER          ( ZERO = 0.0E0, ONE = 1.0E0, TWO = 2.0E0,
+     $                   THREE = 3.0E0, FOUR = 4.0E0, EIGHT = 8.0E0 )
+*     ..
+*     .. External Functions ..
+      REAL               SLAMCH
+      EXTERNAL           SLAMCH
+*     ..
+*     .. Local Arrays ..
+      REAL               DSCALE( 3 ), ZSCALE( 3 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            FIRST, SCALE
+      INTEGER            I, ITER, NITER
+      REAL               A, B, BASE, C, DDF, DF, EPS, ERRETM, ETA, F,
+     $                   FC, SCLFAC, SCLINV, SMALL1, SMALL2, SMINV1,
+     $                   SMINV2, TEMP, TEMP1, TEMP2, TEMP3, TEMP4
+*     ..
+*     .. Save statement ..
+      SAVE               FIRST, SMALL1, SMINV1, SMALL2, SMINV2, EPS
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, INT, LOG, MAX, MIN, SQRT
+*     ..
+*     .. Data statements ..
+      DATA               FIRST / .TRUE. /
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+*
+      NITER = 1
+      TAU = ZERO
+      IF( KNITER.EQ.2 ) THEN
+         IF( ORGATI ) THEN
+            TEMP = ( D( 3 )-D( 2 ) ) / TWO
+            C = RHO + Z( 1 ) / ( ( D( 1 )-D( 2 ) )-TEMP )
+            A = C*( D( 2 )+D( 3 ) ) + Z( 2 ) + Z( 3 )
+            B = C*D( 2 )*D( 3 ) + Z( 2 )*D( 3 ) + Z( 3 )*D( 2 )
+         ELSE
+            TEMP = ( D( 1 )-D( 2 ) ) / TWO
+            C = RHO + Z( 3 ) / ( ( D( 3 )-D( 2 ) )-TEMP )
+            A = C*( D( 1 )+D( 2 ) ) + Z( 1 ) + Z( 2 )
+            B = C*D( 1 )*D( 2 ) + Z( 1 )*D( 2 ) + Z( 2 )*D( 1 )
+         END IF
+         TEMP = MAX( ABS( A ), ABS( B ), ABS( C ) )
+         A = A / TEMP
+         B = B / TEMP
+         C = C / TEMP
+         IF( C.EQ.ZERO ) THEN
+            TAU = B / A
+         ELSE IF( A.LE.ZERO ) THEN
+            TAU = ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C )
+         ELSE
+            TAU = TWO*B / ( A+SQRT( ABS( A*A-FOUR*B*C ) ) )
+         END IF
+         TEMP = RHO + Z( 1 ) / ( D( 1 )-TAU ) +
+     $          Z( 2 ) / ( D( 2 )-TAU ) + Z( 3 ) / ( D( 3 )-TAU )
+         IF( ABS( FINIT ).LE.ABS( TEMP ) )
+     $      TAU = ZERO
+      END IF
+*
+*     On first call to routine, get machine parameters for
+*     possible scaling to avoid overflow
+*
+      IF( FIRST ) THEN
+         EPS = SLAMCH( 'Epsilon' )
+         BASE = SLAMCH( 'Base' )
+         SMALL1 = BASE**( INT( LOG( SLAMCH( 'SafMin' ) ) / LOG( BASE ) /
+     $            THREE ) )
+         SMINV1 = ONE / SMALL1
+         SMALL2 = SMALL1*SMALL1
+         SMINV2 = SMINV1*SMINV1
+         FIRST = .FALSE.
+      END IF
+*
+*     Determine if scaling of inputs necessary to avoid overflow
+*     when computing 1/TEMP**3
+*
+      IF( ORGATI ) THEN
+         TEMP = MIN( ABS( D( 2 )-TAU ), ABS( D( 3 )-TAU ) )
+      ELSE
+         TEMP = MIN( ABS( D( 1 )-TAU ), ABS( D( 2 )-TAU ) )
+      END IF
+      SCALE = .FALSE.
+      IF( TEMP.LE.SMALL1 ) THEN
+         SCALE = .TRUE.
+         IF( TEMP.LE.SMALL2 ) THEN
+*
+*        Scale up by power of radix nearest 1/SAFMIN**(2/3)
+*
+            SCLFAC = SMINV2
+            SCLINV = SMALL2
+         ELSE
+*
+*        Scale up by power of radix nearest 1/SAFMIN**(1/3)
+*
+            SCLFAC = SMINV1
+            SCLINV = SMALL1
+         END IF
+*
+*        Scaling up safe because D, Z, TAU scaled elsewhere to be O(1)
+*
+         DO 10 I = 1, 3
+            DSCALE( I ) = D( I )*SCLFAC
+            ZSCALE( I ) = Z( I )*SCLFAC
+   10    CONTINUE
+         TAU = TAU*SCLFAC
+      ELSE
+*
+*        Copy D and Z to DSCALE and ZSCALE
+*
+         DO 20 I = 1, 3
+            DSCALE( I ) = D( I )
+            ZSCALE( I ) = Z( I )
+   20    CONTINUE
+      END IF
+*
+      FC = ZERO
+      DF = ZERO
+      DDF = ZERO
+      DO 30 I = 1, 3
+         TEMP = ONE / ( DSCALE( I )-TAU )
+         TEMP1 = ZSCALE( I )*TEMP
+         TEMP2 = TEMP1*TEMP
+         TEMP3 = TEMP2*TEMP
+         FC = FC + TEMP1 / DSCALE( I )
+         DF = DF + TEMP2
+         DDF = DDF + TEMP3
+   30 CONTINUE
+      F = FINIT + TAU*FC
+*
+      IF( ABS( F ).LE.ZERO )
+     $   GO TO 60
+*
+*        Iteration begins
+*
+*     It is not hard to see that
+*
+*           1) Iterations will go up monotonically
+*              if FINIT < 0;
+*
+*           2) Iterations will go down monotonically
+*              if FINIT > 0.
+*
+      ITER = NITER + 1
+*
+      DO 50 NITER = ITER, MAXIT
+*
+         IF( ORGATI ) THEN
+            TEMP1 = DSCALE( 2 ) - TAU
+            TEMP2 = DSCALE( 3 ) - TAU
+         ELSE
+            TEMP1 = DSCALE( 1 ) - TAU
+            TEMP2 = DSCALE( 2 ) - TAU
+         END IF
+         A = ( TEMP1+TEMP2 )*F - TEMP1*TEMP2*DF
+         B = TEMP1*TEMP2*F
+         C = F - ( TEMP1+TEMP2 )*DF + TEMP1*TEMP2*DDF
+         TEMP = MAX( ABS( A ), ABS( B ), ABS( C ) )
+         A = A / TEMP
+         B = B / TEMP
+         C = C / TEMP
+         IF( C.EQ.ZERO ) THEN
+            ETA = B / A
+         ELSE IF( A.LE.ZERO ) THEN
+            ETA = ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C )
+         ELSE
+            ETA = TWO*B / ( A+SQRT( ABS( A*A-FOUR*B*C ) ) )
+         END IF
+         IF( F*ETA.GE.ZERO ) THEN
+            ETA = -F / DF
+         END IF
+*
+         TEMP = ETA + TAU
+         IF( ORGATI ) THEN
+            IF( ETA.GT.ZERO .AND. TEMP.GE.DSCALE( 3 ) )
+     $         ETA = ( DSCALE( 3 )-TAU ) / TWO
+            IF( ETA.LT.ZERO .AND. TEMP.LE.DSCALE( 2 ) )
+     $         ETA = ( DSCALE( 2 )-TAU ) / TWO
+         ELSE
+            IF( ETA.GT.ZERO .AND. TEMP.GE.DSCALE( 2 ) )
+     $         ETA = ( DSCALE( 2 )-TAU ) / TWO
+            IF( ETA.LT.ZERO .AND. TEMP.LE.DSCALE( 1 ) )
+     $         ETA = ( DSCALE( 1 )-TAU ) / TWO
+         END IF
+         TAU = TAU + ETA
+*
+         FC = ZERO
+         ERRETM = ZERO
+         DF = ZERO
+         DDF = ZERO
+         DO 40 I = 1, 3
+            TEMP = ONE / ( DSCALE( I )-TAU )
+            TEMP1 = ZSCALE( I )*TEMP
+            TEMP2 = TEMP1*TEMP
+            TEMP3 = TEMP2*TEMP
+            TEMP4 = TEMP1 / DSCALE( I )
+            FC = FC + TEMP4
+            ERRETM = ERRETM + ABS( TEMP4 )
+            DF = DF + TEMP2
+            DDF = DDF + TEMP3
+   40    CONTINUE
+         F = FINIT + TAU*FC
+         ERRETM = EIGHT*( ABS( FINIT )+ABS( TAU )*ERRETM ) +
+     $            ABS( TAU )*DF
+         IF( ABS( F ).LE.EPS*ERRETM )
+     $      GO TO 60
+   50 CONTINUE
+      INFO = 1
+   60 CONTINUE
+*
+*     Undo scaling
+*
+      IF( SCALE )
+     $   TAU = TAU*SCLINV
+      RETURN
+*
+*     End of SLAED6
+*
+      END
diff --git a/costa/native/external/lapack/slaed7.f b/costa/native/external/lapack/slaed7.f
new file mode 100644
index 000000000..5678348f1
--- /dev/null
+++ b/costa/native/external/lapack/slaed7.f
@@ -0,0 +1,288 @@
+      SUBROUTINE SLAED7( ICOMPQ, N, QSIZ, TLVLS, CURLVL, CURPBM, D, Q,
+     $                   LDQ, INDXQ, RHO, CUTPNT, QSTORE, QPTR, PRMPTR,
+     $                   PERM, GIVPTR, GIVCOL, GIVNUM, WORK, IWORK,
+     $                   INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      INTEGER            CURLVL, CURPBM, CUTPNT, ICOMPQ, INFO, LDQ, N,
+     $                   QSIZ, TLVLS
+      REAL               RHO
+*     ..
+*     .. Array Arguments ..
+      INTEGER            GIVCOL( 2, * ), GIVPTR( * ), INDXQ( * ),
+     $                   IWORK( * ), PERM( * ), PRMPTR( * ), QPTR( * )
+      REAL               D( * ), GIVNUM( 2, * ), Q( LDQ, * ),
+     $                   QSTORE( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLAED7 computes the updated eigensystem of a diagonal
+*  matrix after modification by a rank-one symmetric matrix. This
+*  routine is used only for the eigenproblem which requires all
+*  eigenvalues and optionally eigenvectors of a dense symmetric matrix
+*  that has been reduced to tridiagonal form.  SLAED1 handles
+*  the case in which all eigenvalues and eigenvectors of a symmetric
+*  tridiagonal matrix are desired.
+*
+*    T = Q(in) ( D(in) + RHO * Z*Z' ) Q'(in) = Q(out) * D(out) * Q'(out)
+*
+*     where Z = Q'u, u is a vector of length N with ones in the
+*     CUTPNT and CUTPNT + 1 th elements and zeros elsewhere.
+*
+*     The eigenvectors of the original matrix are stored in Q, and the
+*     eigenvalues are in D.  The algorithm consists of three stages:
+*
+*        The first stage consists of deflating the size of the problem
+*        when there are multiple eigenvalues or if there is a zero in
+*        the Z vector.  For each such occurence the dimension of the
+*        secular equation problem is reduced by one.  This stage is
+*        performed by the routine SLAED8.
+*
+*        The second stage consists of calculating the updated
+*        eigenvalues. This is done by finding the roots of the secular
+*        equation via the routine SLAED4 (as called by SLAED9).
+*        This routine also calculates the eigenvectors of the current
+*        problem.
+*
+*        The final stage consists of computing the updated eigenvectors
+*        directly using the updated eigenvalues.  The eigenvectors for
+*        the current problem are multiplied with the eigenvectors from
+*        the overall problem.
+*
+*  Arguments
+*  =========
+*
+*  ICOMPQ  (input) INTEGER
+*          = 0:  Compute eigenvalues only.
+*          = 1:  Compute eigenvectors of original dense symmetric matrix
+*                also.  On entry, Q contains the orthogonal matrix used
+*                to reduce the original matrix to tridiagonal form.
+*
+*  N      (input) INTEGER
+*         The dimension of the symmetric tridiagonal matrix.  N >= 0.
+*
+*  QSIZ   (input) INTEGER
+*         The dimension of the orthogonal matrix used to reduce
+*         the full matrix to tridiagonal form.  QSIZ >= N if ICOMPQ = 1.
+*
+*  TLVLS  (input) INTEGER
+*         The total number of merging levels in the overall divide and
+*         conquer tree.
+*
+*  CURLVL (input) INTEGER
+*         The current level in the overall merge routine,
+*         0 <= CURLVL <= TLVLS.
+*
+*  CURPBM (input) INTEGER
+*         The current problem in the current level in the overall
+*         merge routine (counting from upper left to lower right).
+*
+*  D      (input/output) REAL array, dimension (N)
+*         On entry, the eigenvalues of the rank-1-perturbed matrix.
+*         On exit, the eigenvalues of the repaired matrix.
+*
+*  Q      (input/output) REAL array, dimension (LDQ, N)
+*         On entry, the eigenvectors of the rank-1-perturbed matrix.
+*         On exit, the eigenvectors of the repaired tridiagonal matrix.
+*
+*  LDQ    (input) INTEGER
+*         The leading dimension of the array Q.  LDQ >= max(1,N).
+*
+*  INDXQ  (output) INTEGER array, dimension (N)
+*         The permutation which will reintegrate the subproblem just
+*         solved back into sorted order, i.e., D( INDXQ( I = 1, N ) )
+*         will be in ascending order.
+*
+*  RHO    (input) REAL
+*         The subdiagonal element used to create the rank-1
+*         modification.
+*
+*  CUTPNT (input) INTEGER
+*         Contains the location of the last eigenvalue in the leading
+*         sub-matrix.  min(1,N) <= CUTPNT <= N.
+*
+*  QSTORE (input/output) REAL array, dimension (N**2+1)
+*         Stores eigenvectors of submatrices encountered during
+*         divide and conquer, packed together. QPTR points to
+*         beginning of the submatrices.
+*
+*  QPTR   (input/output) INTEGER array, dimension (N+2)
+*         List of indices pointing to beginning of submatrices stored
+*         in QSTORE. The submatrices are numbered starting at the
+*         bottom left of the divide and conquer tree, from left to
+*         right and bottom to top.
+*
+*  PRMPTR (input) INTEGER array, dimension (N lg N)
+*         Contains a list of pointers which indicate where in PERM a
+*         level's permutation is stored.  PRMPTR(i+1) - PRMPTR(i)
+*         indicates the size of the permutation and also the size of
+*         the full, non-deflated problem.
+*
+*  PERM   (input) INTEGER array, dimension (N lg N)
+*         Contains the permutations (from deflation and sorting) to be
+*         applied to each eigenblock.
+*
+*  GIVPTR (input) INTEGER array, dimension (N lg N)
+*         Contains a list of pointers which indicate where in GIVCOL a
+*         level's Givens rotations are stored.  GIVPTR(i+1) - GIVPTR(i)
+*         indicates the number of Givens rotations.
+*
+*  GIVCOL (input) INTEGER array, dimension (2, N lg N)
+*         Each pair of numbers indicates a pair of columns to take place
+*         in a Givens rotation.
+*
+*  GIVNUM (input) REAL array, dimension (2, N lg N)
+*         Each number indicates the S value to be used in the
+*         corresponding Givens rotation.
+*
+*  WORK   (workspace) REAL array, dimension (3*N+QSIZ*N)
+*
+*  IWORK  (workspace) INTEGER array, dimension (4*N)
+*
+*  INFO   (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          > 0:  if INFO = 1, an eigenvalue did not converge
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Jeff Rutter, Computer Science Division, University of California
+*     at Berkeley, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E0, ZERO = 0.0E0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            COLTYP, CURR, I, IDLMDA, INDX, INDXC, INDXP,
+     $                   IQ2, IS, IW, IZ, K, LDQ2, N1, N2, PTR
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SGEMM, SLAED8, SLAED9, SLAEDA, SLAMRG, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+*
+      IF( ICOMPQ.LT.0 .OR. ICOMPQ.GT.1 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( ICOMPQ.EQ.1 .AND. QSIZ.LT.N ) THEN
+         INFO = -4
+      ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN
+         INFO = -9
+      ELSE IF( MIN( 1, N ).GT.CUTPNT .OR. N.LT.CUTPNT ) THEN
+         INFO = -12
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SLAED7', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     The following values are for bookkeeping purposes only.  They are
+*     integer pointers which indicate the portion of the workspace
+*     used by a particular array in SLAED8 and SLAED9.
+*
+      IF( ICOMPQ.EQ.1 ) THEN
+         LDQ2 = QSIZ
+      ELSE
+         LDQ2 = N
+      END IF
+*
+      IZ = 1
+      IDLMDA = IZ + N
+      IW = IDLMDA + N
+      IQ2 = IW + N
+      IS = IQ2 + N*LDQ2
+*
+      INDX = 1
+      INDXC = INDX + N
+      COLTYP = INDXC + N
+      INDXP = COLTYP + N
+*
+*     Form the z-vector which consists of the last row of Q_1 and the
+*     first row of Q_2.
+*
+      PTR = 1 + 2**TLVLS
+      DO 10 I = 1, CURLVL - 1
+         PTR = PTR + 2**( TLVLS-I )
+   10 CONTINUE
+      CURR = PTR + CURPBM
+      CALL SLAEDA( N, TLVLS, CURLVL, CURPBM, PRMPTR, PERM, GIVPTR,
+     $             GIVCOL, GIVNUM, QSTORE, QPTR, WORK( IZ ),
+     $             WORK( IZ+N ), INFO )
+*
+*     When solving the final problem, we no longer need the stored data,
+*     so we will overwrite the data from this level onto the previously
+*     used storage space.
+*
+      IF( CURLVL.EQ.TLVLS ) THEN
+         QPTR( CURR ) = 1
+         PRMPTR( CURR ) = 1
+         GIVPTR( CURR ) = 1
+      END IF
+*
+*     Sort and Deflate eigenvalues.
+*
+      CALL SLAED8( ICOMPQ, K, N, QSIZ, D, Q, LDQ, INDXQ, RHO, CUTPNT,
+     $             WORK( IZ ), WORK( IDLMDA ), WORK( IQ2 ), LDQ2,
+     $             WORK( IW ), PERM( PRMPTR( CURR ) ), GIVPTR( CURR+1 ),
+     $             GIVCOL( 1, GIVPTR( CURR ) ),
+     $             GIVNUM( 1, GIVPTR( CURR ) ), IWORK( INDXP ),
+     $             IWORK( INDX ), INFO )
+      PRMPTR( CURR+1 ) = PRMPTR( CURR ) + N
+      GIVPTR( CURR+1 ) = GIVPTR( CURR+1 ) + GIVPTR( CURR )
+*
+*     Solve Secular Equation.
+*
+      IF( K.NE.0 ) THEN
+         CALL SLAED9( K, 1, K, N, D, WORK( IS ), K, RHO, WORK( IDLMDA ),
+     $                WORK( IW ), QSTORE( QPTR( CURR ) ), K, INFO )
+         IF( INFO.NE.0 )
+     $      GO TO 30
+         IF( ICOMPQ.EQ.1 ) THEN
+            CALL SGEMM( 'N', 'N', QSIZ, K, K, ONE, WORK( IQ2 ), LDQ2,
+     $                  QSTORE( QPTR( CURR ) ), K, ZERO, Q, LDQ )
+         END IF
+         QPTR( CURR+1 ) = QPTR( CURR ) + K**2
+*
+*     Prepare the INDXQ sorting permutation.
+*
+         N1 = K
+         N2 = N - K
+         CALL SLAMRG( N1, N2, D, 1, -1, INDXQ )
+      ELSE
+         QPTR( CURR+1 ) = QPTR( CURR )
+         DO 20 I = 1, N
+            INDXQ( I ) = I
+   20    CONTINUE
+      END IF
+*
+   30 CONTINUE
+      RETURN
+*
+*     End of SLAED7
+*
+      END
diff --git a/costa/native/external/lapack/slaed8.f b/costa/native/external/lapack/slaed8.f
new file mode 100644
index 000000000..05571eb05
--- /dev/null
+++ b/costa/native/external/lapack/slaed8.f
@@ -0,0 +1,400 @@
+      SUBROUTINE SLAED8( ICOMPQ, K, N, QSIZ, D, Q, LDQ, INDXQ, RHO,
+     $                   CUTPNT, Z, DLAMDA, Q2, LDQ2, W, PERM, GIVPTR,
+     $                   GIVCOL, GIVNUM, INDXP, INDX, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
+*     Courant Institute, NAG Ltd., and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      INTEGER            CUTPNT, GIVPTR, ICOMPQ, INFO, K, LDQ, LDQ2, N,
+     $                   QSIZ
+      REAL               RHO
+*     ..
+*     .. Array Arguments ..
+      INTEGER            GIVCOL( 2, * ), INDX( * ), INDXP( * ),
+     $                   INDXQ( * ), PERM( * )
+      REAL               D( * ), DLAMDA( * ), GIVNUM( 2, * ),
+     $                   Q( LDQ, * ), Q2( LDQ2, * ), W( * ), Z( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLAED8 merges the two sets of eigenvalues together into a single
+*  sorted set.  Then it tries to deflate the size of the problem.
+*  There are two ways in which deflation can occur:  when two or more
+*  eigenvalues are close together or if there is a tiny element in the
+*  Z vector.  For each such occurrence the order of the related secular
+*  equation problem is reduced by one.
+*
+*  Arguments
+*  =========
+*
+*  ICOMPQ  (input) INTEGER
+*          = 0:  Compute eigenvalues only.
+*          = 1:  Compute eigenvectors of original dense symmetric matrix
+*                also.  On entry, Q contains the orthogonal matrix used
+*                to reduce the original matrix to tridiagonal form.
+*
+*  K      (output) INTEGER
+*         The number of non-deflated eigenvalues, and the order of the
+*         related secular equation.
+*
+*  N      (input) INTEGER
+*         The dimension of the symmetric tridiagonal matrix.  N >= 0.
+*
+*  QSIZ   (input) INTEGER
+*         The dimension of the orthogonal matrix used to reduce
+*         the full matrix to tridiagonal form.  QSIZ >= N if ICOMPQ = 1.
+*
+*  D      (input/output) REAL array, dimension (N)
+*         On entry, the eigenvalues of the two submatrices to be
+*         combined.  On exit, the trailing (N-K) updated eigenvalues
+*         (those which were deflated) sorted into increasing order.
+*
+*  Q      (input/output) REAL array, dimension (LDQ,N)
+*         If ICOMPQ = 0, Q is not referenced.  Otherwise,
+*         on entry, Q contains the eigenvectors of the partially solved
+*         system which has been previously updated in matrix
+*         multiplies with other partially solved eigensystems.
+*         On exit, Q contains the trailing (N-K) updated eigenvectors
+*         (those which were deflated) in its last N-K columns.
+*
+*  LDQ    (input) INTEGER
+*         The leading dimension of the array Q.  LDQ >= max(1,N).
+*
+*  INDXQ  (input) INTEGER array, dimension (N)
+*         The permutation which separately sorts the two sub-problems
+*         in D into ascending order.  Note that elements in the second
+*         half of this permutation must first have CUTPNT added to
+*         their values in order to be accurate.
+*
+*  RHO    (input/output) REAL
+*         On entry, the off-diagonal element associated with the rank-1
+*         cut which originally split the two submatrices which are now
+*         being recombined.
+*         On exit, RHO has been modified to the value required by
+*         SLAED3.
+*
+*  CUTPNT (input) INTEGER
+*         The location of the last eigenvalue in the leading
+*         sub-matrix.  min(1,N) <= CUTPNT <= N.
+*
+*  Z      (input) REAL array, dimension (N)
+*         On entry, Z contains the updating vector (the last row of
+*         the first sub-eigenvector matrix and the first row of the
+*         second sub-eigenvector matrix).
+*         On exit, the contents of Z are destroyed by the updating
+*         process.
+*
+*  DLAMDA (output) REAL array, dimension (N)
+*         A copy of the first K eigenvalues which will be used by
+*         SLAED3 to form the secular equation.
+*
+*  Q2     (output) REAL array, dimension (LDQ2,N)
+*         If ICOMPQ = 0, Q2 is not referenced.  Otherwise,
+*         a copy of the first K eigenvectors which will be used by
+*         SLAED7 in a matrix multiply (SGEMM) to update the new
+*         eigenvectors.
+*
+*  LDQ2   (input) INTEGER
+*         The leading dimension of the array Q2.  LDQ2 >= max(1,N).
+*
+*  W      (output) REAL array, dimension (N)
+*         The first k values of the final deflation-altered z-vector and
+*         will be passed to SLAED3.
+*
+*  PERM   (output) INTEGER array, dimension (N)
+*         The permutations (from deflation and sorting) to be applied
+*         to each eigenblock.
+*
+*  GIVPTR (output) INTEGER
+*         The number of Givens rotations which took place in this
+*         subproblem.
+*
+*  GIVCOL (output) INTEGER array, dimension (2, N)
+*         Each pair of numbers indicates a pair of columns to take place
+*         in a Givens rotation.
+*
+*  GIVNUM (output) REAL array, dimension (2, N)
+*         Each number indicates the S value to be used in the
+*         corresponding Givens rotation.
+*
+*  INDXP  (workspace) INTEGER array, dimension (N)
+*         The permutation used to place deflated values of D at the end
+*         of the array.  INDXP(1:K) points to the nondeflated D-values
+*         and INDXP(K+1:N) points to the deflated eigenvalues.
+*
+*  INDX   (workspace) INTEGER array, dimension (N)
+*         The permutation used to sort the contents of D into ascending
+*         order.
+*
+*  INFO   (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Jeff Rutter, Computer Science Division, University of California
+*     at Berkeley, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               MONE, ZERO, ONE, TWO, EIGHT
+      PARAMETER          ( MONE = -1.0E0, ZERO = 0.0E0, ONE = 1.0E0,
+     $                   TWO = 2.0E0, EIGHT = 8.0E0 )
+*     ..
+*     .. Local Scalars ..
+*
+      INTEGER            I, IMAX, J, JLAM, JMAX, JP, K2, N1, N1P1, N2
+      REAL               C, EPS, S, T, TAU, TOL
+*     ..
+*     .. External Functions ..
+      INTEGER            ISAMAX
+      REAL               SLAMCH, SLAPY2
+      EXTERNAL           ISAMAX, SLAMCH, SLAPY2
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SCOPY, SLACPY, SLAMRG, SROT, SSCAL, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+*
+      IF( ICOMPQ.LT.0 .OR. ICOMPQ.GT.1 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( ICOMPQ.EQ.1 .AND. QSIZ.LT.N ) THEN
+         INFO = -4
+      ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      ELSE IF( CUTPNT.LT.MIN( 1, N ) .OR. CUTPNT.GT.N ) THEN
+         INFO = -10
+      ELSE IF( LDQ2.LT.MAX( 1, N ) ) THEN
+         INFO = -14
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SLAED8', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      N1 = CUTPNT
+      N2 = N - N1
+      N1P1 = N1 + 1
+*
+      IF( RHO.LT.ZERO ) THEN
+         CALL SSCAL( N2, MONE, Z( N1P1 ), 1 )
+      END IF
+*
+*     Normalize z so that norm(z) = 1
+*
+      T = ONE / SQRT( TWO )
+      DO 10 J = 1, N
+         INDX( J ) = J
+   10 CONTINUE
+      CALL SSCAL( N, T, Z, 1 )
+      RHO = ABS( TWO*RHO )
+*
+*     Sort the eigenvalues into increasing order
+*
+      DO 20 I = CUTPNT + 1, N
+         INDXQ( I ) = INDXQ( I ) + CUTPNT
+   20 CONTINUE
+      DO 30 I = 1, N
+         DLAMDA( I ) = D( INDXQ( I ) )
+         W( I ) = Z( INDXQ( I ) )
+   30 CONTINUE
+      I = 1
+      J = CUTPNT + 1
+      CALL SLAMRG( N1, N2, DLAMDA, 1, 1, INDX )
+      DO 40 I = 1, N
+         D( I ) = DLAMDA( INDX( I ) )
+         Z( I ) = W( INDX( I ) )
+   40 CONTINUE
+*
+*     Calculate the allowable deflation tolerence
+*
+      IMAX = ISAMAX( N, Z, 1 )
+      JMAX = ISAMAX( N, D, 1 )
+      EPS = SLAMCH( 'Epsilon' )
+      TOL = EIGHT*EPS*ABS( D( JMAX ) )
+*
+*     If the rank-1 modifier is small enough, no more needs to be done
+*     except to reorganize Q so that its columns correspond with the
+*     elements in D.
+*
+      IF( RHO*ABS( Z( IMAX ) ).LE.TOL ) THEN
+         K = 0
+         IF( ICOMPQ.EQ.0 ) THEN
+            DO 50 J = 1, N
+               PERM( J ) = INDXQ( INDX( J ) )
+   50       CONTINUE
+         ELSE
+            DO 60 J = 1, N
+               PERM( J ) = INDXQ( INDX( J ) )
+               CALL SCOPY( QSIZ, Q( 1, PERM( J ) ), 1, Q2( 1, J ), 1 )
+   60       CONTINUE
+            CALL SLACPY( 'A', QSIZ, N, Q2( 1, 1 ), LDQ2, Q( 1, 1 ),
+     $                   LDQ )
+         END IF
+         RETURN
+      END IF
+*
+*     If there are multiple eigenvalues then the problem deflates.  Here
+*     the number of equal eigenvalues are found.  As each equal
+*     eigenvalue is found, an elementary reflector is computed to rotate
+*     the corresponding eigensubspace so that the corresponding
+*     components of Z are zero in this new basis.
+*
+      K = 0
+      GIVPTR = 0
+      K2 = N + 1
+      DO 70 J = 1, N
+         IF( RHO*ABS( Z( J ) ).LE.TOL ) THEN
+*
+*           Deflate due to small z component.
+*
+            K2 = K2 - 1
+            INDXP( K2 ) = J
+            IF( J.EQ.N )
+     $         GO TO 110
+         ELSE
+            JLAM = J
+            GO TO 80
+         END IF
+   70 CONTINUE
+   80 CONTINUE
+      J = J + 1
+      IF( J.GT.N )
+     $   GO TO 100
+      IF( RHO*ABS( Z( J ) ).LE.TOL ) THEN
+*
+*        Deflate due to small z component.
+*
+         K2 = K2 - 1
+         INDXP( K2 ) = J
+      ELSE
+*
+*        Check if eigenvalues are close enough to allow deflation.
+*
+         S = Z( JLAM )
+         C = Z( J )
+*
+*        Find sqrt(a**2+b**2) without overflow or
+*        destructive underflow.
+*
+         TAU = SLAPY2( C, S )
+         T = D( J ) - D( JLAM )
+         C = C / TAU
+         S = -S / TAU
+         IF( ABS( T*C*S ).LE.TOL ) THEN
+*
+*           Deflation is possible.
+*
+            Z( J ) = TAU
+            Z( JLAM ) = ZERO
+*
+*           Record the appropriate Givens rotation
+*
+            GIVPTR = GIVPTR + 1
+            GIVCOL( 1, GIVPTR ) = INDXQ( INDX( JLAM ) )
+            GIVCOL( 2, GIVPTR ) = INDXQ( INDX( J ) )
+            GIVNUM( 1, GIVPTR ) = C
+            GIVNUM( 2, GIVPTR ) = S
+            IF( ICOMPQ.EQ.1 ) THEN
+               CALL SROT( QSIZ, Q( 1, INDXQ( INDX( JLAM ) ) ), 1,
+     $                    Q( 1, INDXQ( INDX( J ) ) ), 1, C, S )
+            END IF
+            T = D( JLAM )*C*C + D( J )*S*S
+            D( J ) = D( JLAM )*S*S + D( J )*C*C
+            D( JLAM ) = T
+            K2 = K2 - 1
+            I = 1
+   90       CONTINUE
+            IF( K2+I.LE.N ) THEN
+               IF( D( JLAM ).LT.D( INDXP( K2+I ) ) ) THEN
+                  INDXP( K2+I-1 ) = INDXP( K2+I )
+                  INDXP( K2+I ) = JLAM
+                  I = I + 1
+                  GO TO 90
+               ELSE
+                  INDXP( K2+I-1 ) = JLAM
+               END IF
+            ELSE
+               INDXP( K2+I-1 ) = JLAM
+            END IF
+            JLAM = J
+         ELSE
+            K = K + 1
+            W( K ) = Z( JLAM )
+            DLAMDA( K ) = D( JLAM )
+            INDXP( K ) = JLAM
+            JLAM = J
+         END IF
+      END IF
+      GO TO 80
+  100 CONTINUE
+*
+*     Record the last eigenvalue.
+*
+      K = K + 1
+      W( K ) = Z( JLAM )
+      DLAMDA( K ) = D( JLAM )
+      INDXP( K ) = JLAM
+*
+  110 CONTINUE
+*
+*     Sort the eigenvalues and corresponding eigenvectors into DLAMDA
+*     and Q2 respectively.  The eigenvalues/vectors which were not
+*     deflated go into the first K slots of DLAMDA and Q2 respectively,
+*     while those which were deflated go into the last N - K slots.
+*
+      IF( ICOMPQ.EQ.0 ) THEN
+         DO 120 J = 1, N
+            JP = INDXP( J )
+            DLAMDA( J ) = D( JP )
+            PERM( J ) = INDXQ( INDX( JP ) )
+  120    CONTINUE
+      ELSE
+         DO 130 J = 1, N
+            JP = INDXP( J )
+            DLAMDA( J ) = D( JP )
+            PERM( J ) = INDXQ( INDX( JP ) )
+            CALL SCOPY( QSIZ, Q( 1, PERM( J ) ), 1, Q2( 1, J ), 1 )
+  130    CONTINUE
+      END IF
+*
+*     The deflated eigenvalues and their corresponding vectors go back
+*     into the last N - K slots of D and Q respectively.
+*
+      IF( K.LT.N ) THEN
+         IF( ICOMPQ.EQ.0 ) THEN
+            CALL SCOPY( N-K, DLAMDA( K+1 ), 1, D( K+1 ), 1 )
+         ELSE
+            CALL SCOPY( N-K, DLAMDA( K+1 ), 1, D( K+1 ), 1 )
+            CALL SLACPY( 'A', QSIZ, N-K, Q2( 1, K+1 ), LDQ2,
+     $                   Q( 1, K+1 ), LDQ )
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of SLAED8
+*
+      END
diff --git a/costa/native/external/lapack/slaed9.f b/costa/native/external/lapack/slaed9.f
new file mode 100644
index 000000000..cdac9dc00
--- /dev/null
+++ b/costa/native/external/lapack/slaed9.f
@@ -0,0 +1,206 @@
+      SUBROUTINE SLAED9( K, KSTART, KSTOP, N, D, Q, LDQ, RHO, DLAMDA, W,
+     $                   S, LDS, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
+*     Courant Institute, NAG Ltd., and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, K, KSTART, KSTOP, LDQ, LDS, N
+      REAL               RHO
+*     ..
+*     .. Array Arguments ..
+      REAL               D( * ), DLAMDA( * ), Q( LDQ, * ), S( LDS, * ),
+     $                   W( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLAED9 finds the roots of the secular equation, as defined by the
+*  values in D, Z, and RHO, between KSTART and KSTOP.  It makes the
+*  appropriate calls to SLAED4 and then stores the new matrix of
+*  eigenvectors for use in calculating the next level of Z vectors.
+*
+*  Arguments
+*  =========
+*
+*  K       (input) INTEGER
+*          The number of terms in the rational function to be solved by
+*          SLAED4.  K >= 0.
+*
+*  KSTART  (input) INTEGER
+*  KSTOP   (input) INTEGER
+*          The updated eigenvalues Lambda(I), KSTART <= I <= KSTOP
+*          are to be computed.  1 <= KSTART <= KSTOP <= K.
+*
+*  N       (input) INTEGER
+*          The number of rows and columns in the Q matrix.
+*          N >= K (delation may result in N > K).
+*
+*  D       (output) REAL array, dimension (N)
+*          D(I) contains the updated eigenvalues
+*          for KSTART <= I <= KSTOP.
+*
+*  Q       (workspace) REAL array, dimension (LDQ,N)
+*
+*  LDQ     (input) INTEGER
+*          The leading dimension of the array Q.  LDQ >= max( 1, N ).
+*
+*  RHO     (input) REAL
+*          The value of the parameter in the rank one update equation.
+*          RHO >= 0 required.
+*
+*  DLAMDA  (input) REAL array, dimension (K)
+*          The first K elements of this array contain the old roots
+*          of the deflated updating problem.  These are the poles
+*          of the secular equation.
+*
+*  W       (input) REAL array, dimension (K)
+*          The first K elements of this array contain the components
+*          of the deflation-adjusted updating vector.
+*
+*  S       (output) REAL array, dimension (LDS, K)
+*          Will contain the eigenvectors of the repaired matrix which
+*          will be stored for subsequent Z vector calculation and
+*          multiplied by the previously accumulated eigenvectors
+*          to update the system.
+*
+*  LDS     (input) INTEGER
+*          The leading dimension of S.  LDS >= max( 1, K ).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          > 0:  if INFO = 1, an eigenvalue did not converge
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Jeff Rutter, Computer Science Division, University of California
+*     at Berkeley, USA
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      INTEGER            I, J
+      REAL               TEMP
+*     ..
+*     .. External Functions ..
+      REAL               SLAMC3, SNRM2
+      EXTERNAL           SLAMC3, SNRM2
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SCOPY, SLAED4, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, SIGN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+*
+      IF( K.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( KSTART.LT.1 .OR. KSTART.GT.MAX( 1, K ) ) THEN
+         INFO = -2
+      ELSE IF( MAX( 1, KSTOP ).LT.KSTART .OR. KSTOP.GT.MAX( 1, K ) )
+     $          THEN
+         INFO = -3
+      ELSE IF( N.LT.K ) THEN
+         INFO = -4
+      ELSE IF( LDQ.LT.MAX( 1, K ) ) THEN
+         INFO = -7
+      ELSE IF( LDS.LT.MAX( 1, K ) ) THEN
+         INFO = -12
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SLAED9', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( K.EQ.0 )
+     $   RETURN
+*
+*     Modify values DLAMDA(i) to make sure all DLAMDA(i)-DLAMDA(j) can
+*     be computed with high relative accuracy (barring over/underflow).
+*     This is a problem on machines without a guard digit in
+*     add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2).
+*     The following code replaces DLAMDA(I) by 2*DLAMDA(I)-DLAMDA(I),
+*     which on any of these machines zeros out the bottommost
+*     bit of DLAMDA(I) if it is 1; this makes the subsequent
+*     subtractions DLAMDA(I)-DLAMDA(J) unproblematic when cancellation
+*     occurs. On binary machines with a guard digit (almost all
+*     machines) it does not change DLAMDA(I) at all. On hexadecimal
+*     and decimal machines with a guard digit, it slightly
+*     changes the bottommost bits of DLAMDA(I). It does not account
+*     for hexadecimal or decimal machines without guard digits
+*     (we know of none). We use a subroutine call to compute
+*     2*DLAMBDA(I) to prevent optimizing compilers from eliminating
+*     this code.
+*
+      DO 10 I = 1, N
+         DLAMDA( I ) = SLAMC3( DLAMDA( I ), DLAMDA( I ) ) - DLAMDA( I )
+   10 CONTINUE
+*
+      DO 20 J = KSTART, KSTOP
+         CALL SLAED4( K, J, DLAMDA, W, Q( 1, J ), RHO, D( J ), INFO )
+*
+*        If the zero finder fails, the computation is terminated.
+*
+         IF( INFO.NE.0 )
+     $      GO TO 120
+   20 CONTINUE
+*
+      IF( K.EQ.1 .OR. K.EQ.2 ) THEN
+         DO 40 I = 1, K
+            DO 30 J = 1, K
+               S( J, I ) = Q( J, I )
+   30       CONTINUE
+   40    CONTINUE
+         GO TO 120
+      END IF
+*
+*     Compute updated W.
+*
+      CALL SCOPY( K, W, 1, S, 1 )
+*
+*     Initialize W(I) = Q(I,I)
+*
+      CALL SCOPY( K, Q, LDQ+1, W, 1 )
+      DO 70 J = 1, K
+         DO 50 I = 1, J - 1
+            W( I ) = W( I )*( Q( I, J ) / ( DLAMDA( I )-DLAMDA( J ) ) )
+   50    CONTINUE
+         DO 60 I = J + 1, K
+            W( I ) = W( I )*( Q( I, J ) / ( DLAMDA( I )-DLAMDA( J ) ) )
+   60    CONTINUE
+   70 CONTINUE
+      DO 80 I = 1, K
+         W( I ) = SIGN( SQRT( -W( I ) ), S( I, 1 ) )
+   80 CONTINUE
+*
+*     Compute eigenvectors of the modified rank-1 modification.
+*
+      DO 110 J = 1, K
+         DO 90 I = 1, K
+            Q( I, J ) = W( I ) / Q( I, J )
+   90    CONTINUE
+         TEMP = SNRM2( K, Q( 1, J ), 1 )
+         DO 100 I = 1, K
+            S( I, J ) = Q( I, J ) / TEMP
+  100    CONTINUE
+  110 CONTINUE
+*
+  120 CONTINUE
+      RETURN
+*
+*     End of SLAED9
+*
+      END
diff --git a/costa/native/external/lapack/slaeda.f b/costa/native/external/lapack/slaeda.f
new file mode 100644
index 000000000..33933ff82
--- /dev/null
+++ b/costa/native/external/lapack/slaeda.f
@@ -0,0 +1,218 @@
+      SUBROUTINE SLAEDA( N, TLVLS, CURLVL, CURPBM, PRMPTR, PERM, GIVPTR,
+     $                   GIVCOL, GIVNUM, Q, QPTR, Z, ZTEMP, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      INTEGER            CURLVL, CURPBM, INFO, N, TLVLS
+*     ..
+*     .. Array Arguments ..
+      INTEGER            GIVCOL( 2, * ), GIVPTR( * ), PERM( * ),
+     $                   PRMPTR( * ), QPTR( * )
+      REAL               GIVNUM( 2, * ), Q( * ), Z( * ), ZTEMP( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLAEDA computes the Z vector corresponding to the merge step in the
+*  CURLVLth step of the merge process with TLVLS steps for the CURPBMth
+*  problem.
+*
+*  Arguments
+*  =========
+*
+*  N      (input) INTEGER
+*         The dimension of the symmetric tridiagonal matrix.  N >= 0.
+*
+*  TLVLS  (input) INTEGER
+*         The total number of merging levels in the overall divide and
+*         conquer tree.
+*
+*  CURLVL (input) INTEGER
+*         The current level in the overall merge routine,
+*         0 <= curlvl <= tlvls.
+*
+*  CURPBM (input) INTEGER
+*         The current problem in the current level in the overall
+*         merge routine (counting from upper left to lower right).
+*
+*  PRMPTR (input) INTEGER array, dimension (N lg N)
+*         Contains a list of pointers which indicate where in PERM a
+*         level's permutation is stored.  PRMPTR(i+1) - PRMPTR(i)
+*         indicates the size of the permutation and incidentally the
+*         size of the full, non-deflated problem.
+*
+*  PERM   (input) INTEGER array, dimension (N lg N)
+*         Contains the permutations (from deflation and sorting) to be
+*         applied to each eigenblock.
+*
+*  GIVPTR (input) INTEGER array, dimension (N lg N)
+*         Contains a list of pointers which indicate where in GIVCOL a
+*         level's Givens rotations are stored.  GIVPTR(i+1) - GIVPTR(i)
+*         indicates the number of Givens rotations.
+*
+*  GIVCOL (input) INTEGER array, dimension (2, N lg N)
+*         Each pair of numbers indicates a pair of columns to take place
+*         in a Givens rotation.
+*
+*  GIVNUM (input) REAL array, dimension (2, N lg N)
+*         Each number indicates the S value to be used in the
+*         corresponding Givens rotation.
+*
+*  Q      (input) REAL array, dimension (N**2)
+*         Contains the square eigenblocks from previous levels, the
+*         starting positions for blocks are given by QPTR.
+*
+*  QPTR   (input) INTEGER array, dimension (N+2)
+*         Contains a list of pointers which indicate where in Q an
+*         eigenblock is stored.  SQRT( QPTR(i+1) - QPTR(i) ) indicates
+*         the size of the block.
+*
+*  Z      (output) REAL array, dimension (N)
+*         On output this vector contains the updating vector (the last
+*         row of the first sub-eigenvector matrix and the first row of
+*         the second sub-eigenvector matrix).
+*
+*  ZTEMP  (workspace) REAL array, dimension (N)
+*
+*  INFO   (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Jeff Rutter, Computer Science Division, University of California
+*     at Berkeley, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, HALF, ONE
+      PARAMETER          ( ZERO = 0.0E0, HALF = 0.5E0, ONE = 1.0E0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            BSIZ1, BSIZ2, CURR, I, K, MID, PSIZ1, PSIZ2,
+     $                   PTR, ZPTR1
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SCOPY, SGEMV, SROT, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          INT, REAL, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+*
+      IF( N.LT.0 ) THEN
+         INFO = -1
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SLAEDA', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Determine location of first number in second half.
+*
+      MID = N / 2 + 1
+*
+*     Gather last/first rows of appropriate eigenblocks into center of Z
+*
+      PTR = 1
+*
+*     Determine location of lowest level subproblem in the full storage
+*     scheme
+*
+      CURR = PTR + CURPBM*2**CURLVL + 2**( CURLVL-1 ) - 1
+*
+*     Determine size of these matrices.  We add HALF to the value of
+*     the SQRT in case the machine underestimates one of these square
+*     roots.
+*
+      BSIZ1 = INT( HALF+SQRT( REAL( QPTR( CURR+1 )-QPTR( CURR ) ) ) )
+      BSIZ2 = INT( HALF+SQRT( REAL( QPTR( CURR+2 )-QPTR( CURR+1 ) ) ) )
+      DO 10 K = 1, MID - BSIZ1 - 1
+         Z( K ) = ZERO
+   10 CONTINUE
+      CALL SCOPY( BSIZ1, Q( QPTR( CURR )+BSIZ1-1 ), BSIZ1,
+     $            Z( MID-BSIZ1 ), 1 )
+      CALL SCOPY( BSIZ2, Q( QPTR( CURR+1 ) ), BSIZ2, Z( MID ), 1 )
+      DO 20 K = MID + BSIZ2, N
+         Z( K ) = ZERO
+   20 CONTINUE
+*
+*     Loop thru remaining levels 1 -> CURLVL applying the Givens
+*     rotations and permutation and then multiplying the center matrices
+*     against the current Z.
+*
+      PTR = 2**TLVLS + 1
+      DO 70 K = 1, CURLVL - 1
+         CURR = PTR + CURPBM*2**( CURLVL-K ) + 2**( CURLVL-K-1 ) - 1
+         PSIZ1 = PRMPTR( CURR+1 ) - PRMPTR( CURR )
+         PSIZ2 = PRMPTR( CURR+2 ) - PRMPTR( CURR+1 )
+         ZPTR1 = MID - PSIZ1
+*
+*       Apply Givens at CURR and CURR+1
+*
+         DO 30 I = GIVPTR( CURR ), GIVPTR( CURR+1 ) - 1
+            CALL SROT( 1, Z( ZPTR1+GIVCOL( 1, I )-1 ), 1,
+     $                 Z( ZPTR1+GIVCOL( 2, I )-1 ), 1, GIVNUM( 1, I ),
+     $                 GIVNUM( 2, I ) )
+   30    CONTINUE
+         DO 40 I = GIVPTR( CURR+1 ), GIVPTR( CURR+2 ) - 1
+            CALL SROT( 1, Z( MID-1+GIVCOL( 1, I ) ), 1,
+     $                 Z( MID-1+GIVCOL( 2, I ) ), 1, GIVNUM( 1, I ),
+     $                 GIVNUM( 2, I ) )
+   40    CONTINUE
+         PSIZ1 = PRMPTR( CURR+1 ) - PRMPTR( CURR )
+         PSIZ2 = PRMPTR( CURR+2 ) - PRMPTR( CURR+1 )
+         DO 50 I = 0, PSIZ1 - 1
+            ZTEMP( I+1 ) = Z( ZPTR1+PERM( PRMPTR( CURR )+I )-1 )
+   50    CONTINUE
+         DO 60 I = 0, PSIZ2 - 1
+            ZTEMP( PSIZ1+I+1 ) = Z( MID+PERM( PRMPTR( CURR+1 )+I )-1 )
+   60    CONTINUE
+*
+*        Multiply Blocks at CURR and CURR+1
+*
+*        Determine size of these matrices.  We add HALF to the value of
+*        the SQRT in case the machine underestimates one of these
+*        square roots.
+*
+         BSIZ1 = INT( HALF+SQRT( REAL( QPTR( CURR+1 )-QPTR( CURR ) ) ) )
+         BSIZ2 = INT( HALF+SQRT( REAL( QPTR( CURR+2 )-QPTR( CURR+
+     $           1 ) ) ) )
+         IF( BSIZ1.GT.0 ) THEN
+            CALL SGEMV( 'T', BSIZ1, BSIZ1, ONE, Q( QPTR( CURR ) ),
+     $                  BSIZ1, ZTEMP( 1 ), 1, ZERO, Z( ZPTR1 ), 1 )
+         END IF
+         CALL SCOPY( PSIZ1-BSIZ1, ZTEMP( BSIZ1+1 ), 1, Z( ZPTR1+BSIZ1 ),
+     $               1 )
+         IF( BSIZ2.GT.0 ) THEN
+            CALL SGEMV( 'T', BSIZ2, BSIZ2, ONE, Q( QPTR( CURR+1 ) ),
+     $                  BSIZ2, ZTEMP( PSIZ1+1 ), 1, ZERO, Z( MID ), 1 )
+         END IF
+         CALL SCOPY( PSIZ2-BSIZ2, ZTEMP( PSIZ1+BSIZ2+1 ), 1,
+     $               Z( MID+BSIZ2 ), 1 )
+*
+         PTR = PTR + 2**( TLVLS-K )
+   70 CONTINUE
+*
+      RETURN
+*
+*     End of SLAEDA
+*
+      END
diff --git a/costa/native/external/lapack/slaein.f b/costa/native/external/lapack/slaein.f
new file mode 100644
index 000000000..455e65c2a
--- /dev/null
+++ b/costa/native/external/lapack/slaein.f
@@ -0,0 +1,532 @@
+      SUBROUTINE SLAEIN( RIGHTV, NOINIT, N, H, LDH, WR, WI, VR, VI, B,
+     $                   LDB, WORK, EPS3, SMLNUM, BIGNUM, INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      LOGICAL            NOINIT, RIGHTV
+      INTEGER            INFO, LDB, LDH, N
+      REAL               BIGNUM, EPS3, SMLNUM, WI, WR
+*     ..
+*     .. Array Arguments ..
+      REAL               B( LDB, * ), H( LDH, * ), VI( * ), VR( * ),
+     $                   WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLAEIN uses inverse iteration to find a right or left eigenvector
+*  corresponding to the eigenvalue (WR,WI) of a real upper Hessenberg
+*  matrix H.
+*
+*  Arguments
+*  =========
+*
+*  RIGHTV   (input) LOGICAL
+*          = .TRUE. : compute right eigenvector;
+*          = .FALSE.: compute left eigenvector.
+*
+*  NOINIT   (input) LOGICAL
+*          = .TRUE. : no initial vector supplied in (VR,VI).
+*          = .FALSE.: initial vector supplied in (VR,VI).
+*
+*  N       (input) INTEGER
+*          The order of the matrix H.  N >= 0.
+*
+*  H       (input) REAL array, dimension (LDH,N)
+*          The upper Hessenberg matrix H.
+*
+*  LDH     (input) INTEGER
+*          The leading dimension of the array H.  LDH >= max(1,N).
+*
+*  WR      (input) REAL
+*  WI      (input) REAL
+*          The real and imaginary parts of the eigenvalue of H whose
+*          corresponding right or left eigenvector is to be computed.
+*
+*  VR      (input/output) REAL array, dimension (N)
+*  VI      (input/output) REAL array, dimension (N)
+*          On entry, if NOINIT = .FALSE. and WI = 0.0, VR must contain
+*          a real starting vector for inverse iteration using the real
+*          eigenvalue WR; if NOINIT = .FALSE. and WI.ne.0.0, VR and VI
+*          must contain the real and imaginary parts of a complex
+*          starting vector for inverse iteration using the complex
+*          eigenvalue (WR,WI); otherwise VR and VI need not be set.
+*          On exit, if WI = 0.0 (real eigenvalue), VR contains the
+*          computed real eigenvector; if WI.ne.0.0 (complex eigenvalue),
+*          VR and VI contain the real and imaginary parts of the
+*          computed complex eigenvector. The eigenvector is normalized
+*          so that the component of largest magnitude has magnitude 1;
+*          here the magnitude of a complex number (x,y) is taken to be
+*          |x| + |y|.
+*          VI is not referenced if WI = 0.0.
+*
+*  B       (workspace) REAL array, dimension (LDB,N)
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= N+1.
+*
+*  WORK   (workspace) REAL array, dimension (N)
+*
+*  EPS3    (input) REAL
+*          A small machine-dependent value which is used to perturb
+*          close eigenvalues, and to replace zero pivots.
+*
+*  SMLNUM  (input) REAL
+*          A machine-dependent value close to the underflow threshold.
+*
+*  BIGNUM  (input) REAL
+*          A machine-dependent value close to the overflow threshold.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          = 1:  inverse iteration did not converge; VR is set to the
+*                last iterate, and so is VI if WI.ne.0.0.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE, TENTH
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0, TENTH = 1.0E-1 )
+*     ..
+*     .. Local Scalars ..
+      CHARACTER          NORMIN, TRANS
+      INTEGER            I, I1, I2, I3, IERR, ITS, J
+      REAL               ABSBII, ABSBJJ, EI, EJ, GROWTO, NORM, NRMSML,
+     $                   REC, ROOTN, SCALE, TEMP, VCRIT, VMAX, VNORM, W,
+     $                   W1, X, XI, XR, Y
+*     ..
+*     .. External Functions ..
+      INTEGER            ISAMAX
+      REAL               SASUM, SLAPY2, SNRM2
+      EXTERNAL           ISAMAX, SASUM, SLAPY2, SNRM2
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLADIV, SLATRS, SSCAL
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, REAL, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+*
+*     GROWTO is the threshold used in the acceptance test for an
+*     eigenvector.
+*
+      ROOTN = SQRT( REAL( N ) )
+      GROWTO = TENTH / ROOTN
+      NRMSML = MAX( ONE, EPS3*ROOTN )*SMLNUM
+*
+*     Form B = H - (WR,WI)*I (except that the subdiagonal elements and
+*     the imaginary parts of the diagonal elements are not stored).
+*
+      DO 20 J = 1, N
+         DO 10 I = 1, J - 1
+            B( I, J ) = H( I, J )
+   10    CONTINUE
+         B( J, J ) = H( J, J ) - WR
+   20 CONTINUE
+*
+      IF( WI.EQ.ZERO ) THEN
+*
+*        Real eigenvalue.
+*
+         IF( NOINIT ) THEN
+*
+*           Set initial vector.
+*
+            DO 30 I = 1, N
+               VR( I ) = EPS3
+   30       CONTINUE
+         ELSE
+*
+*           Scale supplied initial vector.
+*
+            VNORM = SNRM2( N, VR, 1 )
+            CALL SSCAL( N, ( EPS3*ROOTN ) / MAX( VNORM, NRMSML ), VR,
+     $                  1 )
+         END IF
+*
+         IF( RIGHTV ) THEN
+*
+*           LU decomposition with partial pivoting of B, replacing zero
+*           pivots by EPS3.
+*
+            DO 60 I = 1, N - 1
+               EI = H( I+1, I )
+               IF( ABS( B( I, I ) ).LT.ABS( EI ) ) THEN
+*
+*                 Interchange rows and eliminate.
+*
+                  X = B( I, I ) / EI
+                  B( I, I ) = EI
+                  DO 40 J = I + 1, N
+                     TEMP = B( I+1, J )
+                     B( I+1, J ) = B( I, J ) - X*TEMP
+                     B( I, J ) = TEMP
+   40             CONTINUE
+               ELSE
+*
+*                 Eliminate without interchange.
+*
+                  IF( B( I, I ).EQ.ZERO )
+     $               B( I, I ) = EPS3
+                  X = EI / B( I, I )
+                  IF( X.NE.ZERO ) THEN
+                     DO 50 J = I + 1, N
+                        B( I+1, J ) = B( I+1, J ) - X*B( I, J )
+   50                CONTINUE
+                  END IF
+               END IF
+   60       CONTINUE
+            IF( B( N, N ).EQ.ZERO )
+     $         B( N, N ) = EPS3
+*
+            TRANS = 'N'
+*
+         ELSE
+*
+*           UL decomposition with partial pivoting of B, replacing zero
+*           pivots by EPS3.
+*
+            DO 90 J = N, 2, -1
+               EJ = H( J, J-1 )
+               IF( ABS( B( J, J ) ).LT.ABS( EJ ) ) THEN
+*
+*                 Interchange columns and eliminate.
+*
+                  X = B( J, J ) / EJ
+                  B( J, J ) = EJ
+                  DO 70 I = 1, J - 1
+                     TEMP = B( I, J-1 )
+                     B( I, J-1 ) = B( I, J ) - X*TEMP
+                     B( I, J ) = TEMP
+   70             CONTINUE
+               ELSE
+*
+*                 Eliminate without interchange.
+*
+                  IF( B( J, J ).EQ.ZERO )
+     $               B( J, J ) = EPS3
+                  X = EJ / B( J, J )
+                  IF( X.NE.ZERO ) THEN
+                     DO 80 I = 1, J - 1
+                        B( I, J-1 ) = B( I, J-1 ) - X*B( I, J )
+   80                CONTINUE
+                  END IF
+               END IF
+   90       CONTINUE
+            IF( B( 1, 1 ).EQ.ZERO )
+     $         B( 1, 1 ) = EPS3
+*
+            TRANS = 'T'
+*
+         END IF
+*
+         NORMIN = 'N'
+         DO 110 ITS = 1, N
+*
+*           Solve U*x = scale*v for a right eigenvector
+*             or U'*x = scale*v for a left eigenvector,
+*           overwriting x on v.
+*
+            CALL SLATRS( 'Upper', TRANS, 'Nonunit', NORMIN, N, B, LDB,
+     $                   VR, SCALE, WORK, IERR )
+            NORMIN = 'Y'
+*
+*           Test for sufficient growth in the norm of v.
+*
+            VNORM = SASUM( N, VR, 1 )
+            IF( VNORM.GE.GROWTO*SCALE )
+     $         GO TO 120
+*
+*           Choose new orthogonal starting vector and try again.
+*
+            TEMP = EPS3 / ( ROOTN+ONE )
+            VR( 1 ) = EPS3
+            DO 100 I = 2, N
+               VR( I ) = TEMP
+  100       CONTINUE
+            VR( N-ITS+1 ) = VR( N-ITS+1 ) - EPS3*ROOTN
+  110    CONTINUE
+*
+*        Failure to find eigenvector in N iterations.
+*
+         INFO = 1
+*
+  120    CONTINUE
+*
+*        Normalize eigenvector.
+*
+         I = ISAMAX( N, VR, 1 )
+         CALL SSCAL( N, ONE / ABS( VR( I ) ), VR, 1 )
+      ELSE
+*
+*        Complex eigenvalue.
+*
+         IF( NOINIT ) THEN
+*
+*           Set initial vector.
+*
+            DO 130 I = 1, N
+               VR( I ) = EPS3
+               VI( I ) = ZERO
+  130       CONTINUE
+         ELSE
+*
+*           Scale supplied initial vector.
+*
+            NORM = SLAPY2( SNRM2( N, VR, 1 ), SNRM2( N, VI, 1 ) )
+            REC = ( EPS3*ROOTN ) / MAX( NORM, NRMSML )
+            CALL SSCAL( N, REC, VR, 1 )
+            CALL SSCAL( N, REC, VI, 1 )
+         END IF
+*
+         IF( RIGHTV ) THEN
+*
+*           LU decomposition with partial pivoting of B, replacing zero
+*           pivots by EPS3.
+*
+*           The imaginary part of the (i,j)-th element of U is stored in
+*           B(j+1,i).
+*
+            B( 2, 1 ) = -WI
+            DO 140 I = 2, N
+               B( I+1, 1 ) = ZERO
+  140       CONTINUE
+*
+            DO 170 I = 1, N - 1
+               ABSBII = SLAPY2( B( I, I ), B( I+1, I ) )
+               EI = H( I+1, I )
+               IF( ABSBII.LT.ABS( EI ) ) THEN
+*
+*                 Interchange rows and eliminate.
+*
+                  XR = B( I, I ) / EI
+                  XI = B( I+1, I ) / EI
+                  B( I, I ) = EI
+                  B( I+1, I ) = ZERO
+                  DO 150 J = I + 1, N
+                     TEMP = B( I+1, J )
+                     B( I+1, J ) = B( I, J ) - XR*TEMP
+                     B( J+1, I+1 ) = B( J+1, I ) - XI*TEMP
+                     B( I, J ) = TEMP
+                     B( J+1, I ) = ZERO
+  150             CONTINUE
+                  B( I+2, I ) = -WI
+                  B( I+1, I+1 ) = B( I+1, I+1 ) - XI*WI
+                  B( I+2, I+1 ) = B( I+2, I+1 ) + XR*WI
+               ELSE
+*
+*                 Eliminate without interchanging rows.
+*
+                  IF( ABSBII.EQ.ZERO ) THEN
+                     B( I, I ) = EPS3
+                     B( I+1, I ) = ZERO
+                     ABSBII = EPS3
+                  END IF
+                  EI = ( EI / ABSBII ) / ABSBII
+                  XR = B( I, I )*EI
+                  XI = -B( I+1, I )*EI
+                  DO 160 J = I + 1, N
+                     B( I+1, J ) = B( I+1, J ) - XR*B( I, J ) +
+     $                             XI*B( J+1, I )
+                     B( J+1, I+1 ) = -XR*B( J+1, I ) - XI*B( I, J )
+  160             CONTINUE
+                  B( I+2, I+1 ) = B( I+2, I+1 ) - WI
+               END IF
+*
+*              Compute 1-norm of offdiagonal elements of i-th row.
+*
+               WORK( I ) = SASUM( N-I, B( I, I+1 ), LDB ) +
+     $                     SASUM( N-I, B( I+2, I ), 1 )
+  170       CONTINUE
+            IF( B( N, N ).EQ.ZERO .AND. B( N+1, N ).EQ.ZERO )
+     $         B( N, N ) = EPS3
+            WORK( N ) = ZERO
+*
+            I1 = N
+            I2 = 1
+            I3 = -1
+         ELSE
+*
+*           UL decomposition with partial pivoting of conjg(B),
+*           replacing zero pivots by EPS3.
+*
+*           The imaginary part of the (i,j)-th element of U is stored in
+*           B(j+1,i).
+*
+            B( N+1, N ) = WI
+            DO 180 J = 1, N - 1
+               B( N+1, J ) = ZERO
+  180       CONTINUE
+*
+            DO 210 J = N, 2, -1
+               EJ = H( J, J-1 )
+               ABSBJJ = SLAPY2( B( J, J ), B( J+1, J ) )
+               IF( ABSBJJ.LT.ABS( EJ ) ) THEN
+*
+*                 Interchange columns and eliminate
+*
+                  XR = B( J, J ) / EJ
+                  XI = B( J+1, J ) / EJ
+                  B( J, J ) = EJ
+                  B( J+1, J ) = ZERO
+                  DO 190 I = 1, J - 1
+                     TEMP = B( I, J-1 )
+                     B( I, J-1 ) = B( I, J ) - XR*TEMP
+                     B( J, I ) = B( J+1, I ) - XI*TEMP
+                     B( I, J ) = TEMP
+                     B( J+1, I ) = ZERO
+  190             CONTINUE
+                  B( J+1, J-1 ) = WI
+                  B( J-1, J-1 ) = B( J-1, J-1 ) + XI*WI
+                  B( J, J-1 ) = B( J, J-1 ) - XR*WI
+               ELSE
+*
+*                 Eliminate without interchange.
+*
+                  IF( ABSBJJ.EQ.ZERO ) THEN
+                     B( J, J ) = EPS3
+                     B( J+1, J ) = ZERO
+                     ABSBJJ = EPS3
+                  END IF
+                  EJ = ( EJ / ABSBJJ ) / ABSBJJ
+                  XR = B( J, J )*EJ
+                  XI = -B( J+1, J )*EJ
+                  DO 200 I = 1, J - 1
+                     B( I, J-1 ) = B( I, J-1 ) - XR*B( I, J ) +
+     $                             XI*B( J+1, I )
+                     B( J, I ) = -XR*B( J+1, I ) - XI*B( I, J )
+  200             CONTINUE
+                  B( J, J-1 ) = B( J, J-1 ) + WI
+               END IF
+*
+*              Compute 1-norm of offdiagonal elements of j-th column.
+*
+               WORK( J ) = SASUM( J-1, B( 1, J ), 1 ) +
+     $                     SASUM( J-1, B( J+1, 1 ), LDB )
+  210       CONTINUE
+            IF( B( 1, 1 ).EQ.ZERO .AND. B( 2, 1 ).EQ.ZERO )
+     $         B( 1, 1 ) = EPS3
+            WORK( 1 ) = ZERO
+*
+            I1 = 1
+            I2 = N
+            I3 = 1
+         END IF
+*
+         DO 270 ITS = 1, N
+            SCALE = ONE
+            VMAX = ONE
+            VCRIT = BIGNUM
+*
+*           Solve U*(xr,xi) = scale*(vr,vi) for a right eigenvector,
+*             or U'*(xr,xi) = scale*(vr,vi) for a left eigenvector,
+*           overwriting (xr,xi) on (vr,vi).
+*
+            DO 250 I = I1, I2, I3
+*
+               IF( WORK( I ).GT.VCRIT ) THEN
+                  REC = ONE / VMAX
+                  CALL SSCAL( N, REC, VR, 1 )
+                  CALL SSCAL( N, REC, VI, 1 )
+                  SCALE = SCALE*REC
+                  VMAX = ONE
+                  VCRIT = BIGNUM
+               END IF
+*
+               XR = VR( I )
+               XI = VI( I )
+               IF( RIGHTV ) THEN
+                  DO 220 J = I + 1, N
+                     XR = XR - B( I, J )*VR( J ) + B( J+1, I )*VI( J )
+                     XI = XI - B( I, J )*VI( J ) - B( J+1, I )*VR( J )
+  220             CONTINUE
+               ELSE
+                  DO 230 J = 1, I - 1
+                     XR = XR - B( J, I )*VR( J ) + B( I+1, J )*VI( J )
+                     XI = XI - B( J, I )*VI( J ) - B( I+1, J )*VR( J )
+  230             CONTINUE
+               END IF
+*
+               W = ABS( B( I, I ) ) + ABS( B( I+1, I ) )
+               IF( W.GT.SMLNUM ) THEN
+                  IF( W.LT.ONE ) THEN
+                     W1 = ABS( XR ) + ABS( XI )
+                     IF( W1.GT.W*BIGNUM ) THEN
+                        REC = ONE / W1
+                        CALL SSCAL( N, REC, VR, 1 )
+                        CALL SSCAL( N, REC, VI, 1 )
+                        XR = VR( I )
+                        XI = VI( I )
+                        SCALE = SCALE*REC
+                        VMAX = VMAX*REC
+                     END IF
+                  END IF
+*
+*                 Divide by diagonal element of B.
+*
+                  CALL SLADIV( XR, XI, B( I, I ), B( I+1, I ), VR( I ),
+     $                         VI( I ) )
+                  VMAX = MAX( ABS( VR( I ) )+ABS( VI( I ) ), VMAX )
+                  VCRIT = BIGNUM / VMAX
+               ELSE
+                  DO 240 J = 1, N
+                     VR( J ) = ZERO
+                     VI( J ) = ZERO
+  240             CONTINUE
+                  VR( I ) = ONE
+                  VI( I ) = ONE
+                  SCALE = ZERO
+                  VMAX = ONE
+                  VCRIT = BIGNUM
+               END IF
+  250       CONTINUE
+*
+*           Test for sufficient growth in the norm of (VR,VI).
+*
+            VNORM = SASUM( N, VR, 1 ) + SASUM( N, VI, 1 )
+            IF( VNORM.GE.GROWTO*SCALE )
+     $         GO TO 280
+*
+*           Choose a new orthogonal starting vector and try again.
+*
+            Y = EPS3 / ( ROOTN+ONE )
+            VR( 1 ) = EPS3
+            VI( 1 ) = ZERO
+*
+            DO 260 I = 2, N
+               VR( I ) = Y
+               VI( I ) = ZERO
+  260       CONTINUE
+            VR( N-ITS+1 ) = VR( N-ITS+1 ) - EPS3*ROOTN
+  270    CONTINUE
+*
+*        Failure to find eigenvector in N iterations
+*
+         INFO = 1
+*
+  280    CONTINUE
+*
+*        Normalize eigenvector.
+*
+         VNORM = ZERO
+         DO 290 I = 1, N
+            VNORM = MAX( VNORM, ABS( VR( I ) )+ABS( VI( I ) ) )
+  290    CONTINUE
+         CALL SSCAL( N, ONE / VNORM, VR, 1 )
+         CALL SSCAL( N, ONE / VNORM, VI, 1 )
+*
+      END IF
+*
+      RETURN
+*
+*     End of SLAEIN
+*
+      END
diff --git a/costa/native/external/lapack/slaev2.f b/costa/native/external/lapack/slaev2.f
new file mode 100644
index 000000000..b90eb51ac
--- /dev/null
+++ b/costa/native/external/lapack/slaev2.f
@@ -0,0 +1,170 @@
+      SUBROUTINE SLAEV2( A, B, C, RT1, RT2, CS1, SN1 )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      REAL               A, B, C, CS1, RT1, RT2, SN1
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLAEV2 computes the eigendecomposition of a 2-by-2 symmetric matrix
+*     [  A   B  ]
+*     [  B   C  ].
+*  On return, RT1 is the eigenvalue of larger absolute value, RT2 is the
+*  eigenvalue of smaller absolute value, and (CS1,SN1) is the unit right
+*  eigenvector for RT1, giving the decomposition
+*
+*     [ CS1  SN1 ] [  A   B  ] [ CS1 -SN1 ]  =  [ RT1  0  ]
+*     [-SN1  CS1 ] [  B   C  ] [ SN1  CS1 ]     [  0  RT2 ].
+*
+*  Arguments
+*  =========
+*
+*  A       (input) REAL
+*          The (1,1) element of the 2-by-2 matrix.
+*
+*  B       (input) REAL
+*          The (1,2) element and the conjugate of the (2,1) element of
+*          the 2-by-2 matrix.
+*
+*  C       (input) REAL
+*          The (2,2) element of the 2-by-2 matrix.
+*
+*  RT1     (output) REAL
+*          The eigenvalue of larger absolute value.
+*
+*  RT2     (output) REAL
+*          The eigenvalue of smaller absolute value.
+*
+*  CS1     (output) REAL
+*  SN1     (output) REAL
+*          The vector (CS1, SN1) is a unit right eigenvector for RT1.
+*
+*  Further Details
+*  ===============
+*
+*  RT1 is accurate to a few ulps barring over/underflow.
+*
+*  RT2 may be inaccurate if there is massive cancellation in the
+*  determinant A*C-B*B; higher precision or correctly rounded or
+*  correctly truncated arithmetic would be needed to compute RT2
+*  accurately in all cases.
+*
+*  CS1 and SN1 are accurate to a few ulps barring over/underflow.
+*
+*  Overflow is possible only if RT1 is within a factor of 5 of overflow.
+*  Underflow is harmless if the input data is 0 or exceeds
+*     underflow_threshold / macheps.
+*
+* =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE
+      PARAMETER          ( ONE = 1.0E0 )
+      REAL               TWO
+      PARAMETER          ( TWO = 2.0E0 )
+      REAL               ZERO
+      PARAMETER          ( ZERO = 0.0E0 )
+      REAL               HALF
+      PARAMETER          ( HALF = 0.5E0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            SGN1, SGN2
+      REAL               AB, ACMN, ACMX, ACS, ADF, CS, CT, DF, RT, SM,
+     $                   TB, TN
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Compute the eigenvalues
+*
+      SM = A + C
+      DF = A - C
+      ADF = ABS( DF )
+      TB = B + B
+      AB = ABS( TB )
+      IF( ABS( A ).GT.ABS( C ) ) THEN
+         ACMX = A
+         ACMN = C
+      ELSE
+         ACMX = C
+         ACMN = A
+      END IF
+      IF( ADF.GT.AB ) THEN
+         RT = ADF*SQRT( ONE+( AB / ADF )**2 )
+      ELSE IF( ADF.LT.AB ) THEN
+         RT = AB*SQRT( ONE+( ADF / AB )**2 )
+      ELSE
+*
+*        Includes case AB=ADF=0
+*
+         RT = AB*SQRT( TWO )
+      END IF
+      IF( SM.LT.ZERO ) THEN
+         RT1 = HALF*( SM-RT )
+         SGN1 = -1
+*
+*        Order of execution important.
+*        To get fully accurate smaller eigenvalue,
+*        next line needs to be executed in higher precision.
+*
+         RT2 = ( ACMX / RT1 )*ACMN - ( B / RT1 )*B
+      ELSE IF( SM.GT.ZERO ) THEN
+         RT1 = HALF*( SM+RT )
+         SGN1 = 1
+*
+*        Order of execution important.
+*        To get fully accurate smaller eigenvalue,
+*        next line needs to be executed in higher precision.
+*
+         RT2 = ( ACMX / RT1 )*ACMN - ( B / RT1 )*B
+      ELSE
+*
+*        Includes case RT1 = RT2 = 0
+*
+         RT1 = HALF*RT
+         RT2 = -HALF*RT
+         SGN1 = 1
+      END IF
+*
+*     Compute the eigenvector
+*
+      IF( DF.GE.ZERO ) THEN
+         CS = DF + RT
+         SGN2 = 1
+      ELSE
+         CS = DF - RT
+         SGN2 = -1
+      END IF
+      ACS = ABS( CS )
+      IF( ACS.GT.AB ) THEN
+         CT = -TB / CS
+         SN1 = ONE / SQRT( ONE+CT*CT )
+         CS1 = CT*SN1
+      ELSE
+         IF( AB.EQ.ZERO ) THEN
+            CS1 = ONE
+            SN1 = ZERO
+         ELSE
+            TN = -CS / TB
+            CS1 = ONE / SQRT( ONE+TN*TN )
+            SN1 = TN*CS1
+         END IF
+      END IF
+      IF( SGN1.EQ.SGN2 ) THEN
+         TN = CS1
+         CS1 = -SN1
+         SN1 = TN
+      END IF
+      RETURN
+*
+*     End of SLAEV2
+*
+      END
diff --git a/costa/native/external/lapack/slaexc.f b/costa/native/external/lapack/slaexc.f
new file mode 100644
index 000000000..bc6080b40
--- /dev/null
+++ b/costa/native/external/lapack/slaexc.f
@@ -0,0 +1,354 @@
+      SUBROUTINE SLAEXC( WANTQ, N, T, LDT, Q, LDQ, J1, N1, N2, WORK,
+     $                   INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      LOGICAL            WANTQ
+      INTEGER            INFO, J1, LDQ, LDT, N, N1, N2
+*     ..
+*     .. Array Arguments ..
+      REAL               Q( LDQ, * ), T( LDT, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLAEXC swaps adjacent diagonal blocks T11 and T22 of order 1 or 2 in
+*  an upper quasi-triangular matrix T by an orthogonal similarity
+*  transformation.
+*
+*  T must be in Schur canonical form, that is, block upper triangular
+*  with 1-by-1 and 2-by-2 diagonal blocks; each 2-by-2 diagonal block
+*  has its diagonal elemnts equal and its off-diagonal elements of
+*  opposite sign.
+*
+*  Arguments
+*  =========
+*
+*  WANTQ   (input) LOGICAL
+*          = .TRUE. : accumulate the transformation in the matrix Q;
+*          = .FALSE.: do not accumulate the transformation.
+*
+*  N       (input) INTEGER
+*          The order of the matrix T. N >= 0.
+*
+*  T       (input/output) REAL array, dimension (LDT,N)
+*          On entry, the upper quasi-triangular matrix T, in Schur
+*          canonical form.
+*          On exit, the updated matrix T, again in Schur canonical form.
+*
+*  LDT     (input)  INTEGER
+*          The leading dimension of the array T. LDT >= max(1,N).
+*
+*  Q       (input/output) REAL array, dimension (LDQ,N)
+*          On entry, if WANTQ is .TRUE., the orthogonal matrix Q.
+*          On exit, if WANTQ is .TRUE., the updated matrix Q.
+*          If WANTQ is .FALSE., Q is not referenced.
+*
+*  LDQ     (input) INTEGER
+*          The leading dimension of the array Q.
+*          LDQ >= 1; and if WANTQ is .TRUE., LDQ >= N.
+*
+*  J1      (input) INTEGER
+*          The index of the first row of the first block T11.
+*
+*  N1      (input) INTEGER
+*          The order of the first block T11. N1 = 0, 1 or 2.
+*
+*  N2      (input) INTEGER
+*          The order of the second block T22. N2 = 0, 1 or 2.
+*
+*  WORK    (workspace) REAL array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          = 1: the transformed matrix T would be too far from Schur
+*               form; the blocks are not swapped and T and Q are
+*               unchanged.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+      REAL               TEN
+      PARAMETER          ( TEN = 1.0E+1 )
+      INTEGER            LDD, LDX
+      PARAMETER          ( LDD = 4, LDX = 2 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            IERR, J2, J3, J4, K, ND
+      REAL               CS, DNORM, EPS, SCALE, SMLNUM, SN, T11, T22,
+     $                   T33, TAU, TAU1, TAU2, TEMP, THRESH, WI1, WI2,
+     $                   WR1, WR2, XNORM
+*     ..
+*     .. Local Arrays ..
+      REAL               D( LDD, 4 ), U( 3 ), U1( 3 ), U2( 3 ),
+     $                   X( LDX, 2 )
+*     ..
+*     .. External Functions ..
+      REAL               SLAMCH, SLANGE
+      EXTERNAL           SLAMCH, SLANGE
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLACPY, SLANV2, SLARFG, SLARFX, SLARTG, SLASY2,
+     $                   SROT
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. N1.EQ.0 .OR. N2.EQ.0 )
+     $   RETURN
+      IF( J1+N1.GT.N )
+     $   RETURN
+*
+      J2 = J1 + 1
+      J3 = J1 + 2
+      J4 = J1 + 3
+*
+      IF( N1.EQ.1 .AND. N2.EQ.1 ) THEN
+*
+*        Swap two 1-by-1 blocks.
+*
+         T11 = T( J1, J1 )
+         T22 = T( J2, J2 )
+*
+*        Determine the transformation to perform the interchange.
+*
+         CALL SLARTG( T( J1, J2 ), T22-T11, CS, SN, TEMP )
+*
+*        Apply transformation to the matrix T.
+*
+         IF( J3.LE.N )
+     $      CALL SROT( N-J1-1, T( J1, J3 ), LDT, T( J2, J3 ), LDT, CS,
+     $                 SN )
+         CALL SROT( J1-1, T( 1, J1 ), 1, T( 1, J2 ), 1, CS, SN )
+*
+         T( J1, J1 ) = T22
+         T( J2, J2 ) = T11
+*
+         IF( WANTQ ) THEN
+*
+*           Accumulate transformation in the matrix Q.
+*
+            CALL SROT( N, Q( 1, J1 ), 1, Q( 1, J2 ), 1, CS, SN )
+         END IF
+*
+      ELSE
+*
+*        Swapping involves at least one 2-by-2 block.
+*
+*        Copy the diagonal block of order N1+N2 to the local array D
+*        and compute its norm.
+*
+         ND = N1 + N2
+         CALL SLACPY( 'Full', ND, ND, T( J1, J1 ), LDT, D, LDD )
+         DNORM = SLANGE( 'Max', ND, ND, D, LDD, WORK )
+*
+*        Compute machine-dependent threshold for test for accepting
+*        swap.
+*
+         EPS = SLAMCH( 'P' )
+         SMLNUM = SLAMCH( 'S' ) / EPS
+         THRESH = MAX( TEN*EPS*DNORM, SMLNUM )
+*
+*        Solve T11*X - X*T22 = scale*T12 for X.
+*
+         CALL SLASY2( .FALSE., .FALSE., -1, N1, N2, D, LDD,
+     $                D( N1+1, N1+1 ), LDD, D( 1, N1+1 ), LDD, SCALE, X,
+     $                LDX, XNORM, IERR )
+*
+*        Swap the adjacent diagonal blocks.
+*
+         K = N1 + N1 + N2 - 3
+         GO TO ( 10, 20, 30 )K
+*
+   10    CONTINUE
+*
+*        N1 = 1, N2 = 2: generate elementary reflector H so that:
+*
+*        ( scale, X11, X12 ) H = ( 0, 0, * )
+*
+         U( 1 ) = SCALE
+         U( 2 ) = X( 1, 1 )
+         U( 3 ) = X( 1, 2 )
+         CALL SLARFG( 3, U( 3 ), U, 1, TAU )
+         U( 3 ) = ONE
+         T11 = T( J1, J1 )
+*
+*        Perform swap provisionally on diagonal block in D.
+*
+         CALL SLARFX( 'L', 3, 3, U, TAU, D, LDD, WORK )
+         CALL SLARFX( 'R', 3, 3, U, TAU, D, LDD, WORK )
+*
+*        Test whether to reject swap.
+*
+         IF( MAX( ABS( D( 3, 1 ) ), ABS( D( 3, 2 ) ), ABS( D( 3,
+     $       3 )-T11 ) ).GT.THRESH )GO TO 50
+*
+*        Accept swap: apply transformation to the entire matrix T.
+*
+         CALL SLARFX( 'L', 3, N-J1+1, U, TAU, T( J1, J1 ), LDT, WORK )
+         CALL SLARFX( 'R', J2, 3, U, TAU, T( 1, J1 ), LDT, WORK )
+*
+         T( J3, J1 ) = ZERO
+         T( J3, J2 ) = ZERO
+         T( J3, J3 ) = T11
+*
+         IF( WANTQ ) THEN
+*
+*           Accumulate transformation in the matrix Q.
+*
+            CALL SLARFX( 'R', N, 3, U, TAU, Q( 1, J1 ), LDQ, WORK )
+         END IF
+         GO TO 40
+*
+   20    CONTINUE
+*
+*        N1 = 2, N2 = 1: generate elementary reflector H so that:
+*
+*        H (  -X11 ) = ( * )
+*          (  -X21 ) = ( 0 )
+*          ( scale ) = ( 0 )
+*
+         U( 1 ) = -X( 1, 1 )
+         U( 2 ) = -X( 2, 1 )
+         U( 3 ) = SCALE
+         CALL SLARFG( 3, U( 1 ), U( 2 ), 1, TAU )
+         U( 1 ) = ONE
+         T33 = T( J3, J3 )
+*
+*        Perform swap provisionally on diagonal block in D.
+*
+         CALL SLARFX( 'L', 3, 3, U, TAU, D, LDD, WORK )
+         CALL SLARFX( 'R', 3, 3, U, TAU, D, LDD, WORK )
+*
+*        Test whether to reject swap.
+*
+         IF( MAX( ABS( D( 2, 1 ) ), ABS( D( 3, 1 ) ), ABS( D( 1,
+     $       1 )-T33 ) ).GT.THRESH )GO TO 50
+*
+*        Accept swap: apply transformation to the entire matrix T.
+*
+         CALL SLARFX( 'R', J3, 3, U, TAU, T( 1, J1 ), LDT, WORK )
+         CALL SLARFX( 'L', 3, N-J1, U, TAU, T( J1, J2 ), LDT, WORK )
+*
+         T( J1, J1 ) = T33
+         T( J2, J1 ) = ZERO
+         T( J3, J1 ) = ZERO
+*
+         IF( WANTQ ) THEN
+*
+*           Accumulate transformation in the matrix Q.
+*
+            CALL SLARFX( 'R', N, 3, U, TAU, Q( 1, J1 ), LDQ, WORK )
+         END IF
+         GO TO 40
+*
+   30    CONTINUE
+*
+*        N1 = 2, N2 = 2: generate elementary reflectors H(1) and H(2) so
+*        that:
+*
+*        H(2) H(1) (  -X11  -X12 ) = (  *  * )
+*                  (  -X21  -X22 )   (  0  * )
+*                  ( scale    0  )   (  0  0 )
+*                  (    0  scale )   (  0  0 )
+*
+         U1( 1 ) = -X( 1, 1 )
+         U1( 2 ) = -X( 2, 1 )
+         U1( 3 ) = SCALE
+         CALL SLARFG( 3, U1( 1 ), U1( 2 ), 1, TAU1 )
+         U1( 1 ) = ONE
+*
+         TEMP = -TAU1*( X( 1, 2 )+U1( 2 )*X( 2, 2 ) )
+         U2( 1 ) = -TEMP*U1( 2 ) - X( 2, 2 )
+         U2( 2 ) = -TEMP*U1( 3 )
+         U2( 3 ) = SCALE
+         CALL SLARFG( 3, U2( 1 ), U2( 2 ), 1, TAU2 )
+         U2( 1 ) = ONE
+*
+*        Perform swap provisionally on diagonal block in D.
+*
+         CALL SLARFX( 'L', 3, 4, U1, TAU1, D, LDD, WORK )
+         CALL SLARFX( 'R', 4, 3, U1, TAU1, D, LDD, WORK )
+         CALL SLARFX( 'L', 3, 4, U2, TAU2, D( 2, 1 ), LDD, WORK )
+         CALL SLARFX( 'R', 4, 3, U2, TAU2, D( 1, 2 ), LDD, WORK )
+*
+*        Test whether to reject swap.
+*
+         IF( MAX( ABS( D( 3, 1 ) ), ABS( D( 3, 2 ) ), ABS( D( 4, 1 ) ),
+     $       ABS( D( 4, 2 ) ) ).GT.THRESH )GO TO 50
+*
+*        Accept swap: apply transformation to the entire matrix T.
+*
+         CALL SLARFX( 'L', 3, N-J1+1, U1, TAU1, T( J1, J1 ), LDT, WORK )
+         CALL SLARFX( 'R', J4, 3, U1, TAU1, T( 1, J1 ), LDT, WORK )
+         CALL SLARFX( 'L', 3, N-J1+1, U2, TAU2, T( J2, J1 ), LDT, WORK )
+         CALL SLARFX( 'R', J4, 3, U2, TAU2, T( 1, J2 ), LDT, WORK )
+*
+         T( J3, J1 ) = ZERO
+         T( J3, J2 ) = ZERO
+         T( J4, J1 ) = ZERO
+         T( J4, J2 ) = ZERO
+*
+         IF( WANTQ ) THEN
+*
+*           Accumulate transformation in the matrix Q.
+*
+            CALL SLARFX( 'R', N, 3, U1, TAU1, Q( 1, J1 ), LDQ, WORK )
+            CALL SLARFX( 'R', N, 3, U2, TAU2, Q( 1, J2 ), LDQ, WORK )
+         END IF
+*
+   40    CONTINUE
+*
+         IF( N2.EQ.2 ) THEN
+*
+*           Standardize new 2-by-2 block T11
+*
+            CALL SLANV2( T( J1, J1 ), T( J1, J2 ), T( J2, J1 ),
+     $                   T( J2, J2 ), WR1, WI1, WR2, WI2, CS, SN )
+            CALL SROT( N-J1-1, T( J1, J1+2 ), LDT, T( J2, J1+2 ), LDT,
+     $                 CS, SN )
+            CALL SROT( J1-1, T( 1, J1 ), 1, T( 1, J2 ), 1, CS, SN )
+            IF( WANTQ )
+     $         CALL SROT( N, Q( 1, J1 ), 1, Q( 1, J2 ), 1, CS, SN )
+         END IF
+*
+         IF( N1.EQ.2 ) THEN
+*
+*           Standardize new 2-by-2 block T22
+*
+            J3 = J1 + N2
+            J4 = J3 + 1
+            CALL SLANV2( T( J3, J3 ), T( J3, J4 ), T( J4, J3 ),
+     $                   T( J4, J4 ), WR1, WI1, WR2, WI2, CS, SN )
+            IF( J3+2.LE.N )
+     $         CALL SROT( N-J3-1, T( J3, J3+2 ), LDT, T( J4, J3+2 ),
+     $                    LDT, CS, SN )
+            CALL SROT( J3-1, T( 1, J3 ), 1, T( 1, J4 ), 1, CS, SN )
+            IF( WANTQ )
+     $         CALL SROT( N, Q( 1, J3 ), 1, Q( 1, J4 ), 1, CS, SN )
+         END IF
+*
+      END IF
+      RETURN
+*
+*     Exit with INFO = 1 if swap was rejected.
+*
+   50 INFO = 1
+      RETURN
+*
+*     End of SLAEXC
+*
+      END
diff --git a/costa/native/external/lapack/slag2.f b/costa/native/external/lapack/slag2.f
new file mode 100644
index 000000000..f5c357251
--- /dev/null
+++ b/costa/native/external/lapack/slag2.f
@@ -0,0 +1,301 @@
+      SUBROUTINE SLAG2( A, LDA, B, LDB, SAFMIN, SCALE1, SCALE2, WR1,
+     $                  WR2, WI )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993
+*
+*     .. Scalar Arguments ..
+      INTEGER            LDA, LDB
+      REAL               SAFMIN, SCALE1, SCALE2, WI, WR1, WR2
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLAG2 computes the eigenvalues of a 2 x 2 generalized eigenvalue
+*  problem  A - w B, with scaling as necessary to avoid over-/underflow.
+*
+*  The scaling factor "s" results in a modified eigenvalue equation
+*
+*      s A - w B
+*
+*  where  s  is a non-negative scaling factor chosen so that  w,  w B,
+*  and  s A  do not overflow and, if possible, do not underflow, either.
+*
+*  Arguments
+*  =========
+*
+*  A       (input) REAL array, dimension (LDA, 2)
+*          On entry, the 2 x 2 matrix A.  It is assumed that its 1-norm
+*          is less than 1/SAFMIN.  Entries less than
+*          sqrt(SAFMIN)*norm(A) are subject to being treated as zero.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= 2.
+*
+*  B       (input) REAL array, dimension (LDB, 2)
+*          On entry, the 2 x 2 upper triangular matrix B.  It is
+*          assumed that the one-norm of B is less than 1/SAFMIN.  The
+*          diagonals should be at least sqrt(SAFMIN) times the largest
+*          element of B (in absolute value); if a diagonal is smaller
+*          than that, then  +/- sqrt(SAFMIN) will be used instead of
+*          that diagonal.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= 2.
+*
+*  SAFMIN  (input) REAL
+*          The smallest positive number s.t. 1/SAFMIN does not
+*          overflow.  (This should always be SLAMCH('S') -- it is an
+*          argument in order to avoid having to call SLAMCH frequently.)
+*
+*  SCALE1  (output) REAL
+*          A scaling factor used to avoid over-/underflow in the
+*          eigenvalue equation which defines the first eigenvalue.  If
+*          the eigenvalues are complex, then the eigenvalues are
+*          ( WR1  +/-  WI i ) / SCALE1  (which may lie outside the
+*          exponent range of the machine), SCALE1=SCALE2, and SCALE1
+*          will always be positive.  If the eigenvalues are real, then
+*          the first (real) eigenvalue is  WR1 / SCALE1 , but this may
+*          overflow or underflow, and in fact, SCALE1 may be zero or
+*          less than the underflow threshhold if the exact eigenvalue
+*          is sufficiently large.
+*
+*  SCALE2  (output) REAL
+*          A scaling factor used to avoid over-/underflow in the
+*          eigenvalue equation which defines the second eigenvalue.  If
+*          the eigenvalues are complex, then SCALE2=SCALE1.  If the
+*          eigenvalues are real, then the second (real) eigenvalue is
+*          WR2 / SCALE2 , but this may overflow or underflow, and in
+*          fact, SCALE2 may be zero or less than the underflow
+*          threshhold if the exact eigenvalue is sufficiently large.
+*
+*  WR1     (output) REAL
+*          If the eigenvalue is real, then WR1 is SCALE1 times the
+*          eigenvalue closest to the (2,2) element of A B**(-1).  If the
+*          eigenvalue is complex, then WR1=WR2 is SCALE1 times the real
+*          part of the eigenvalues.
+*
+*  WR2     (output) REAL
+*          If the eigenvalue is real, then WR2 is SCALE2 times the
+*          other eigenvalue.  If the eigenvalue is complex, then
+*          WR1=WR2 is SCALE1 times the real part of the eigenvalues.
+*
+*  WI      (output) REAL
+*          If the eigenvalue is real, then WI is zero.  If the
+*          eigenvalue is complex, then WI is SCALE1 times the imaginary
+*          part of the eigenvalues.  WI will always be non-negative.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE, TWO
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0, TWO = 2.0E+0 )
+      REAL               HALF
+      PARAMETER          ( HALF = ONE / TWO )
+      REAL               FUZZY1
+      PARAMETER          ( FUZZY1 = ONE+1.0E-5 )
+*     ..
+*     .. Local Scalars ..
+      REAL               A11, A12, A21, A22, ABI22, ANORM, AS11, AS12,
+     $                   AS22, ASCALE, B11, B12, B22, BINV11, BINV22,
+     $                   BMIN, BNORM, BSCALE, BSIZE, C1, C2, C3, C4, C5,
+     $                   DIFF, DISCR, PP, QQ, R, RTMAX, RTMIN, S1, S2,
+     $                   SAFMAX, SHIFT, SS, SUM, WABS, WBIG, WDET,
+     $                   WSCALE, WSIZE, WSMALL
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN, SIGN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+      RTMIN = SQRT( SAFMIN )
+      RTMAX = ONE / RTMIN
+      SAFMAX = ONE / SAFMIN
+*
+*     Scale A
+*
+      ANORM = MAX( ABS( A( 1, 1 ) )+ABS( A( 2, 1 ) ),
+     $        ABS( A( 1, 2 ) )+ABS( A( 2, 2 ) ), SAFMIN )
+      ASCALE = ONE / ANORM
+      A11 = ASCALE*A( 1, 1 )
+      A21 = ASCALE*A( 2, 1 )
+      A12 = ASCALE*A( 1, 2 )
+      A22 = ASCALE*A( 2, 2 )
+*
+*     Perturb B if necessary to insure non-singularity
+*
+      B11 = B( 1, 1 )
+      B12 = B( 1, 2 )
+      B22 = B( 2, 2 )
+      BMIN = RTMIN*MAX( ABS( B11 ), ABS( B12 ), ABS( B22 ), RTMIN )
+      IF( ABS( B11 ).LT.BMIN )
+     $   B11 = SIGN( BMIN, B11 )
+      IF( ABS( B22 ).LT.BMIN )
+     $   B22 = SIGN( BMIN, B22 )
+*
+*     Scale B
+*
+      BNORM = MAX( ABS( B11 ), ABS( B12 )+ABS( B22 ), SAFMIN )
+      BSIZE = MAX( ABS( B11 ), ABS( B22 ) )
+      BSCALE = ONE / BSIZE
+      B11 = B11*BSCALE
+      B12 = B12*BSCALE
+      B22 = B22*BSCALE
+*
+*     Compute larger eigenvalue by method described by C. van Loan
+*
+*     ( AS is A shifted by -SHIFT*B )
+*
+      BINV11 = ONE / B11
+      BINV22 = ONE / B22
+      S1 = A11*BINV11
+      S2 = A22*BINV22
+      IF( ABS( S1 ).LE.ABS( S2 ) ) THEN
+         AS12 = A12 - S1*B12
+         AS22 = A22 - S1*B22
+         SS = A21*( BINV11*BINV22 )
+         ABI22 = AS22*BINV22 - SS*B12
+         PP = HALF*ABI22
+         SHIFT = S1
+      ELSE
+         AS12 = A12 - S2*B12
+         AS11 = A11 - S2*B11
+         SS = A21*( BINV11*BINV22 )
+         ABI22 = -SS*B12
+         PP = HALF*( AS11*BINV11+ABI22 )
+         SHIFT = S2
+      END IF
+      QQ = SS*AS12
+      IF( ABS( PP*RTMIN ).GE.ONE ) THEN
+         DISCR = ( RTMIN*PP )**2 + QQ*SAFMIN
+         R = SQRT( ABS( DISCR ) )*RTMAX
+      ELSE
+         IF( PP**2+ABS( QQ ).LE.SAFMIN ) THEN
+            DISCR = ( RTMAX*PP )**2 + QQ*SAFMAX
+            R = SQRT( ABS( DISCR ) )*RTMIN
+         ELSE
+            DISCR = PP**2 + QQ
+            R = SQRT( ABS( DISCR ) )
+         END IF
+      END IF
+*
+*     Note: the test of R in the following IF is to cover the case when
+*           DISCR is small and negative and is flushed to zero during
+*           the calculation of R.  On machines which have a consistent
+*           flush-to-zero threshhold and handle numbers above that
+*           threshhold correctly, it would not be necessary.
+*
+      IF( DISCR.GE.ZERO .OR. R.EQ.ZERO ) THEN
+         SUM = PP + SIGN( R, PP )
+         DIFF = PP - SIGN( R, PP )
+         WBIG = SHIFT + SUM
+*
+*        Compute smaller eigenvalue
+*
+         WSMALL = SHIFT + DIFF
+         IF( HALF*ABS( WBIG ).GT.MAX( ABS( WSMALL ), SAFMIN ) ) THEN
+            WDET = ( A11*A22-A12*A21 )*( BINV11*BINV22 )
+            WSMALL = WDET / WBIG
+         END IF
+*
+*        Choose (real) eigenvalue closest to 2,2 element of A*B**(-1)
+*        for WR1.
+*
+         IF( PP.GT.ABI22 ) THEN
+            WR1 = MIN( WBIG, WSMALL )
+            WR2 = MAX( WBIG, WSMALL )
+         ELSE
+            WR1 = MAX( WBIG, WSMALL )
+            WR2 = MIN( WBIG, WSMALL )
+         END IF
+         WI = ZERO
+      ELSE
+*
+*        Complex eigenvalues
+*
+         WR1 = SHIFT + PP
+         WR2 = WR1
+         WI = R
+      END IF
+*
+*     Further scaling to avoid underflow and overflow in computing
+*     SCALE1 and overflow in computing w*B.
+*
+*     This scale factor (WSCALE) is bounded from above using C1 and C2,
+*     and from below using C3 and C4.
+*        C1 implements the condition  s A  must never overflow.
+*        C2 implements the condition  w B  must never overflow.
+*        C3, with C2,
+*           implement the condition that s A - w B must never overflow.
+*        C4 implements the condition  s    should not underflow.
+*        C5 implements the condition  max(s,|w|) should be at least 2.
+*
+      C1 = BSIZE*( SAFMIN*MAX( ONE, ASCALE ) )
+      C2 = SAFMIN*MAX( ONE, BNORM )
+      C3 = BSIZE*SAFMIN
+      IF( ASCALE.LE.ONE .AND. BSIZE.LE.ONE ) THEN
+         C4 = MIN( ONE, ( ASCALE / SAFMIN )*BSIZE )
+      ELSE
+         C4 = ONE
+      END IF
+      IF( ASCALE.LE.ONE .OR. BSIZE.LE.ONE ) THEN
+         C5 = MIN( ONE, ASCALE*BSIZE )
+      ELSE
+         C5 = ONE
+      END IF
+*
+*     Scale first eigenvalue
+*
+      WABS = ABS( WR1 ) + ABS( WI )
+      WSIZE = MAX( SAFMIN, C1, FUZZY1*( WABS*C2+C3 ),
+     $        MIN( C4, HALF*MAX( WABS, C5 ) ) )
+      IF( WSIZE.NE.ONE ) THEN
+         WSCALE = ONE / WSIZE
+         IF( WSIZE.GT.ONE ) THEN
+            SCALE1 = ( MAX( ASCALE, BSIZE )*WSCALE )*
+     $               MIN( ASCALE, BSIZE )
+         ELSE
+            SCALE1 = ( MIN( ASCALE, BSIZE )*WSCALE )*
+     $               MAX( ASCALE, BSIZE )
+         END IF
+         WR1 = WR1*WSCALE
+         IF( WI.NE.ZERO ) THEN
+            WI = WI*WSCALE
+            WR2 = WR1
+            SCALE2 = SCALE1
+         END IF
+      ELSE
+         SCALE1 = ASCALE*BSIZE
+         SCALE2 = SCALE1
+      END IF
+*
+*     Scale second eigenvalue (if real)
+*
+      IF( WI.EQ.ZERO ) THEN
+         WSIZE = MAX( SAFMIN, C1, FUZZY1*( ABS( WR2 )*C2+C3 ),
+     $           MIN( C4, HALF*MAX( ABS( WR2 ), C5 ) ) )
+         IF( WSIZE.NE.ONE ) THEN
+            WSCALE = ONE / WSIZE
+            IF( WSIZE.GT.ONE ) THEN
+               SCALE2 = ( MAX( ASCALE, BSIZE )*WSCALE )*
+     $                  MIN( ASCALE, BSIZE )
+            ELSE
+               SCALE2 = ( MIN( ASCALE, BSIZE )*WSCALE )*
+     $                  MAX( ASCALE, BSIZE )
+            END IF
+            WR2 = WR2*WSCALE
+         ELSE
+            SCALE2 = ASCALE*BSIZE
+         END IF
+      END IF
+*
+*     End of SLAG2
+*
+      RETURN
+      END
diff --git a/costa/native/external/lapack/slags2.f b/costa/native/external/lapack/slags2.f
new file mode 100644
index 000000000..0d833e7a1
--- /dev/null
+++ b/costa/native/external/lapack/slags2.f
@@ -0,0 +1,270 @@
+      SUBROUTINE SLAGS2( UPPER, A1, A2, A3, B1, B2, B3, CSU, SNU, CSV,
+     $                   SNV, CSQ, SNQ )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      LOGICAL            UPPER
+      REAL               A1, A2, A3, B1, B2, B3, CSQ, CSU, CSV, SNQ,
+     $                   SNU, SNV
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLAGS2 computes 2-by-2 orthogonal matrices U, V and Q, such
+*  that if ( UPPER ) then
+*
+*            U'*A*Q = U'*( A1 A2 )*Q = ( x  0  )
+*                        ( 0  A3 )     ( x  x  )
+*  and
+*            V'*B*Q = V'*( B1 B2 )*Q = ( x  0  )
+*                        ( 0  B3 )     ( x  x  )
+*
+*  or if ( .NOT.UPPER ) then
+*
+*            U'*A*Q = U'*( A1 0  )*Q = ( x  x  )
+*                        ( A2 A3 )     ( 0  x  )
+*  and
+*            V'*B*Q = V'*( B1 0  )*Q = ( x  x  )
+*                        ( B2 B3 )     ( 0  x  )
+*
+*  The rows of the transformed A and B are parallel, where
+*
+*    U = (  CSU  SNU ), V = (  CSV SNV ), Q = (  CSQ   SNQ )
+*        ( -SNU  CSU )      ( -SNV CSV )      ( -SNQ   CSQ )
+*
+*  Z' denotes the transpose of Z.
+*
+*
+*  Arguments
+*  =========
+*
+*  UPPER   (input) LOGICAL
+*          = .TRUE.: the input matrices A and B are upper triangular.
+*          = .FALSE.: the input matrices A and B are lower triangular.
+*
+*  A1      (input) REAL
+*  A2      (input) REAL
+*  A3      (input) REAL
+*          On entry, A1, A2 and A3 are elements of the input 2-by-2
+*          upper (lower) triangular matrix A.
+*
+*  B1      (input) REAL
+*  B2      (input) REAL
+*  B3      (input) REAL
+*          On entry, B1, B2 and B3 are elements of the input 2-by-2
+*          upper (lower) triangular matrix B.
+*
+*  CSU     (output) REAL
+*  SNU     (output) REAL
+*          The desired orthogonal matrix U.
+*
+*  CSV     (output) REAL
+*  SNV     (output) REAL
+*          The desired orthogonal matrix V.
+*
+*  CSQ     (output) REAL
+*  SNQ     (output) REAL
+*          The desired orthogonal matrix Q.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO
+      PARAMETER          ( ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      REAL               A, AUA11, AUA12, AUA21, AUA22, AVB11, AVB12,
+     $                   AVB21, AVB22, CSL, CSR, D, S1, S2, SNL,
+     $                   SNR, UA11R, UA22R, VB11R, VB22R, B, C, R, UA11,
+     $                   UA12, UA21, UA22, VB11, VB12, VB21, VB22
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLARTG, SLASV2
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS
+*     ..
+*     .. Executable Statements ..
+*
+      IF( UPPER ) THEN
+*
+*        Input matrices A and B are upper triangular matrices
+*
+*        Form matrix C = A*adj(B) = ( a b )
+*                                   ( 0 d )
+*
+         A = A1*B3
+         D = A3*B1
+         B = A2*B1 - A1*B2
+*
+*        The SVD of real 2-by-2 triangular C
+*
+*         ( CSL -SNL )*( A B )*(  CSR  SNR ) = ( R 0 )
+*         ( SNL  CSL ) ( 0 D ) ( -SNR  CSR )   ( 0 T )
+*
+         CALL SLASV2( A, B, D, S1, S2, SNR, CSR, SNL, CSL )
+*
+         IF( ABS( CSL ).GE.ABS( SNL ) .OR. ABS( CSR ).GE.ABS( SNR ) )
+     $        THEN
+*
+*           Compute the (1,1) and (1,2) elements of U'*A and V'*B,
+*           and (1,2) element of |U|'*|A| and |V|'*|B|.
+*
+            UA11R = CSL*A1
+            UA12 = CSL*A2 + SNL*A3
+*
+            VB11R = CSR*B1
+            VB12 = CSR*B2 + SNR*B3
+*
+            AUA12 = ABS( CSL )*ABS( A2 ) + ABS( SNL )*ABS( A3 )
+            AVB12 = ABS( CSR )*ABS( B2 ) + ABS( SNR )*ABS( B3 )
+*
+*           zero (1,2) elements of U'*A and V'*B
+*
+            IF( ( ABS( UA11R )+ABS( UA12 ) ).NE.ZERO ) THEN
+               IF( AUA12 / ( ABS( UA11R )+ABS( UA12 ) ).LE.AVB12 /
+     $             ( ABS( VB11R )+ABS( VB12 ) ) ) THEN
+                  CALL SLARTG( -UA11R, UA12, CSQ, SNQ, R )
+               ELSE
+                  CALL SLARTG( -VB11R, VB12, CSQ, SNQ, R )
+               END IF
+            ELSE
+               CALL SLARTG( -VB11R, VB12, CSQ, SNQ, R )
+            END IF
+*
+            CSU = CSL
+            SNU = -SNL
+            CSV = CSR
+            SNV = -SNR
+*
+         ELSE
+*
+*           Compute the (2,1) and (2,2) elements of U'*A and V'*B,
+*           and (2,2) element of |U|'*|A| and |V|'*|B|.
+*
+            UA21 = -SNL*A1
+            UA22 = -SNL*A2 + CSL*A3
+*
+            VB21 = -SNR*B1
+            VB22 = -SNR*B2 + CSR*B3
+*
+            AUA22 = ABS( SNL )*ABS( A2 ) + ABS( CSL )*ABS( A3 )
+            AVB22 = ABS( SNR )*ABS( B2 ) + ABS( CSR )*ABS( B3 )
+*
+*           zero (2,2) elements of U'*A and V'*B, and then swap.
+*
+            IF( ( ABS( UA21 )+ABS( UA22 ) ).NE.ZERO ) THEN
+               IF( AUA22 / ( ABS( UA21 )+ABS( UA22 ) ).LE.AVB22 /
+     $             ( ABS( VB21 )+ABS( VB22 ) ) ) THEN
+                  CALL SLARTG( -UA21, UA22, CSQ, SNQ, R )
+               ELSE
+                  CALL SLARTG( -VB21, VB22, CSQ, SNQ, R )
+               END IF
+            ELSE
+               CALL SLARTG( -VB21, VB22, CSQ, SNQ, R )
+            END IF
+*
+            CSU = SNL
+            SNU = CSL
+            CSV = SNR
+            SNV = CSR
+*
+         END IF
+*
+      ELSE
+*
+*        Input matrices A and B are lower triangular matrices
+*
+*        Form matrix C = A*adj(B) = ( a 0 )
+*                                   ( c d )
+*
+         A = A1*B3
+         D = A3*B1
+         C = A2*B3 - A3*B2
+*
+*        The SVD of real 2-by-2 triangular C
+*
+*         ( CSL -SNL )*( A 0 )*(  CSR  SNR ) = ( R 0 )
+*         ( SNL  CSL ) ( C D ) ( -SNR  CSR )   ( 0 T )
+*
+         CALL SLASV2( A, C, D, S1, S2, SNR, CSR, SNL, CSL )
+*
+         IF( ABS( CSR ).GE.ABS( SNR ) .OR. ABS( CSL ).GE.ABS( SNL ) )
+     $        THEN
+*
+*           Compute the (2,1) and (2,2) elements of U'*A and V'*B,
+*           and (2,1) element of |U|'*|A| and |V|'*|B|.
+*
+            UA21 = -SNR*A1 + CSR*A2
+            UA22R = CSR*A3
+*
+            VB21 = -SNL*B1 + CSL*B2
+            VB22R = CSL*B3
+*
+            AUA21 = ABS( SNR )*ABS( A1 ) + ABS( CSR )*ABS( A2 )
+            AVB21 = ABS( SNL )*ABS( B1 ) + ABS( CSL )*ABS( B2 )
+*
+*           zero (2,1) elements of U'*A and V'*B.
+*
+            IF( ( ABS( UA21 )+ABS( UA22R ) ).NE.ZERO ) THEN
+               IF( AUA21 / ( ABS( UA21 )+ABS( UA22R ) ).LE.AVB21 /
+     $             ( ABS( VB21 )+ABS( VB22R ) ) ) THEN
+                  CALL SLARTG( UA22R, UA21, CSQ, SNQ, R )
+               ELSE
+                  CALL SLARTG( VB22R, VB21, CSQ, SNQ, R )
+               END IF
+            ELSE
+               CALL SLARTG( VB22R, VB21, CSQ, SNQ, R )
+            END IF
+*
+            CSU = CSR
+            SNU = -SNR
+            CSV = CSL
+            SNV = -SNL
+*
+         ELSE
+*
+*           Compute the (1,1) and (1,2) elements of U'*A and V'*B,
+*           and (1,1) element of |U|'*|A| and |V|'*|B|.
+*
+            UA11 = CSR*A1 + SNR*A2
+            UA12 = SNR*A3
+*
+            VB11 = CSL*B1 + SNL*B2
+            VB12 = SNL*B3
+*
+            AUA11 = ABS( CSR )*ABS( A1 ) + ABS( SNR )*ABS( A2 )
+            AVB11 = ABS( CSL )*ABS( B1 ) + ABS( SNL )*ABS( B2 )
+*
+*           zero (1,1) elements of U'*A and V'*B, and then swap.
+*
+            IF( ( ABS( UA11 )+ABS( UA12 ) ).NE.ZERO ) THEN
+               IF( AUA11 / ( ABS( UA11 )+ABS( UA12 ) ).LE.AVB11 /
+     $             ( ABS( VB11 )+ABS( VB12 ) ) ) THEN
+                  CALL SLARTG( UA12, UA11, CSQ, SNQ, R )
+               ELSE
+                  CALL SLARTG( VB12, VB11, CSQ, SNQ, R )
+               END IF
+            ELSE
+               CALL SLARTG( VB12, VB11, CSQ, SNQ, R )
+            END IF
+*
+            CSU = SNR
+            SNU = CSR
+            CSV = SNL
+            SNV = CSL
+*
+         END IF
+*
+      END IF
+*
+      RETURN
+*
+*     End of SLAGS2
+*
+      END
diff --git a/costa/native/external/lapack/slagtf.f b/costa/native/external/lapack/slagtf.f
new file mode 100644
index 000000000..c736e4b2b
--- /dev/null
+++ b/costa/native/external/lapack/slagtf.f
@@ -0,0 +1,191 @@
+      SUBROUTINE SLAGTF( N, A, LAMBDA, B, C, TOL, D, IN, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, N
+      REAL               LAMBDA, TOL
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IN( * )
+      REAL               A( * ), B( * ), C( * ), D( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLAGTF factorizes the matrix (T - lambda*I), where T is an n by n
+*  tridiagonal matrix and lambda is a scalar, as
+*
+*     T - lambda*I = PLU,
+*
+*  where P is a permutation matrix, L is a unit lower tridiagonal matrix
+*  with at most one non-zero sub-diagonal elements per column and U is
+*  an upper triangular matrix with at most two non-zero super-diagonal
+*  elements per column.
+*
+*  The factorization is obtained by Gaussian elimination with partial
+*  pivoting and implicit row scaling.
+*
+*  The parameter LAMBDA is included in the routine so that SLAGTF may
+*  be used, in conjunction with SLAGTS, to obtain eigenvectors of T by
+*  inverse iteration.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The order of the matrix T.
+*
+*  A       (input/output) REAL array, dimension (N)
+*          On entry, A must contain the diagonal elements of T.
+*
+*          On exit, A is overwritten by the n diagonal elements of the
+*          upper triangular matrix U of the factorization of T.
+*
+*  LAMBDA  (input) REAL
+*          On entry, the scalar lambda.
+*
+*  B       (input/output) REAL array, dimension (N-1)
+*          On entry, B must contain the (n-1) super-diagonal elements of
+*          T.
+*
+*          On exit, B is overwritten by the (n-1) super-diagonal
+*          elements of the matrix U of the factorization of T.
+*
+*  C       (input/output) REAL array, dimension (N-1)
+*          On entry, C must contain the (n-1) sub-diagonal elements of
+*          T.
+*
+*          On exit, C is overwritten by the (n-1) sub-diagonal elements
+*          of the matrix L of the factorization of T.
+*
+*  TOL     (input) REAL
+*          On entry, a relative tolerance used to indicate whether or
+*          not the matrix (T - lambda*I) is nearly singular. TOL should
+*          normally be chose as approximately the largest relative error
+*          in the elements of T. For example, if the elements of T are
+*          correct to about 4 significant figures, then TOL should be
+*          set to about 5*10**(-4). If TOL is supplied as less than eps,
+*          where eps is the relative machine precision, then the value
+*          eps is used in place of TOL.
+*
+*  D       (output) REAL array, dimension (N-2)
+*          On exit, D is overwritten by the (n-2) second super-diagonal
+*          elements of the matrix U of the factorization of T.
+*
+*  IN      (output) INTEGER array, dimension (N)
+*          On exit, IN contains details of the permutation matrix P. If
+*          an interchange occurred at the kth step of the elimination,
+*          then IN(k) = 1, otherwise IN(k) = 0. The element IN(n)
+*          returns the smallest positive integer j such that
+*
+*             abs( u(j,j) ).le. norm( (T - lambda*I)(j) )*TOL,
+*
+*          where norm( A(j) ) denotes the sum of the absolute values of
+*          the jth row of the matrix A. If no such j exists then IN(n)
+*          is returned as zero. If IN(n) is returned as positive, then a
+*          diagonal element of U is small, indicating that
+*          (T - lambda*I) is singular or nearly singular,
+*
+*  INFO    (output) INTEGER
+*          = 0   : successful exit
+*          .lt. 0: if INFO = -k, the kth argument had an illegal value
+*
+* =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO
+      PARAMETER          ( ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            K
+      REAL               EPS, MULT, PIV1, PIV2, SCALE1, SCALE2, TEMP, TL
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX
+*     ..
+*     .. External Functions ..
+      REAL               SLAMCH
+      EXTERNAL           SLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+      IF( N.LT.0 ) THEN
+         INFO = -1
+         CALL XERBLA( 'SLAGTF', -INFO )
+         RETURN
+      END IF
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      A( 1 ) = A( 1 ) - LAMBDA
+      IN( N ) = 0
+      IF( N.EQ.1 ) THEN
+         IF( A( 1 ).EQ.ZERO )
+     $      IN( 1 ) = 1
+         RETURN
+      END IF
+*
+      EPS = SLAMCH( 'Epsilon' )
+*
+      TL = MAX( TOL, EPS )
+      SCALE1 = ABS( A( 1 ) ) + ABS( B( 1 ) )
+      DO 10 K = 1, N - 1
+         A( K+1 ) = A( K+1 ) - LAMBDA
+         SCALE2 = ABS( C( K ) ) + ABS( A( K+1 ) )
+         IF( K.LT.( N-1 ) )
+     $      SCALE2 = SCALE2 + ABS( B( K+1 ) )
+         IF( A( K ).EQ.ZERO ) THEN
+            PIV1 = ZERO
+         ELSE
+            PIV1 = ABS( A( K ) ) / SCALE1
+         END IF
+         IF( C( K ).EQ.ZERO ) THEN
+            IN( K ) = 0
+            PIV2 = ZERO
+            SCALE1 = SCALE2
+            IF( K.LT.( N-1 ) )
+     $         D( K ) = ZERO
+         ELSE
+            PIV2 = ABS( C( K ) ) / SCALE2
+            IF( PIV2.LE.PIV1 ) THEN
+               IN( K ) = 0
+               SCALE1 = SCALE2
+               C( K ) = C( K ) / A( K )
+               A( K+1 ) = A( K+1 ) - C( K )*B( K )
+               IF( K.LT.( N-1 ) )
+     $            D( K ) = ZERO
+            ELSE
+               IN( K ) = 1
+               MULT = A( K ) / C( K )
+               A( K ) = C( K )
+               TEMP = A( K+1 )
+               A( K+1 ) = B( K ) - MULT*TEMP
+               IF( K.LT.( N-1 ) ) THEN
+                  D( K ) = B( K+1 )
+                  B( K+1 ) = -MULT*D( K )
+               END IF
+               B( K ) = TEMP
+               C( K ) = MULT
+            END IF
+         END IF
+         IF( ( MAX( PIV1, PIV2 ).LE.TL ) .AND. ( IN( N ).EQ.0 ) )
+     $      IN( N ) = K
+   10 CONTINUE
+      IF( ( ABS( A( N ) ).LE.SCALE1*TL ) .AND. ( IN( N ).EQ.0 ) )
+     $   IN( N ) = N
+*
+      RETURN
+*
+*     End of SLAGTF
+*
+      END
diff --git a/costa/native/external/lapack/slagtm.f b/costa/native/external/lapack/slagtm.f
new file mode 100644
index 000000000..7b6d336e6
--- /dev/null
+++ b/costa/native/external/lapack/slagtm.f
@@ -0,0 +1,191 @@
+      SUBROUTINE SLAGTM( TRANS, N, NRHS, ALPHA, DL, D, DU, X, LDX, BETA,
+     $                   B, LDB )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          TRANS
+      INTEGER            LDB, LDX, N, NRHS
+      REAL               ALPHA, BETA
+*     ..
+*     .. Array Arguments ..
+      REAL               B( LDB, * ), D( * ), DL( * ), DU( * ),
+     $                   X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLAGTM performs a matrix-vector product of the form
+*
+*     B := alpha * A * X + beta * B
+*
+*  where A is a tridiagonal matrix of order N, B and X are N by NRHS
+*  matrices, and alpha and beta are real scalars, each of which may be
+*  0., 1., or -1.
+*
+*  Arguments
+*  =========
+*
+*  TRANS   (input) CHARACTER
+*          Specifies the operation applied to A.
+*          = 'N':  No transpose, B := alpha * A * X + beta * B
+*          = 'T':  Transpose,    B := alpha * A'* X + beta * B
+*          = 'C':  Conjugate transpose = Transpose
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrices X and B.
+*
+*  ALPHA   (input) REAL
+*          The scalar alpha.  ALPHA must be 0., 1., or -1.; otherwise,
+*          it is assumed to be 0.
+*
+*  DL      (input) REAL array, dimension (N-1)
+*          The (n-1) sub-diagonal elements of T.
+*
+*  D       (input) REAL array, dimension (N)
+*          The diagonal elements of T.
+*
+*  DU      (input) REAL array, dimension (N-1)
+*          The (n-1) super-diagonal elements of T.
+*
+*  X       (input) REAL array, dimension (LDX,NRHS)
+*          The N by NRHS matrix X.
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(N,1).
+*
+*  BETA    (input) REAL
+*          The scalar beta.  BETA must be 0., 1., or -1.; otherwise,
+*          it is assumed to be 1.
+*
+*  B       (input/output) REAL array, dimension (LDB,NRHS)
+*          On entry, the N by NRHS matrix B.
+*          On exit, B is overwritten by the matrix expression
+*          B := alpha * A * X + beta * B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(N,1).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. Executable Statements ..
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Multiply B by BETA if BETA.NE.1.
+*
+      IF( BETA.EQ.ZERO ) THEN
+         DO 20 J = 1, NRHS
+            DO 10 I = 1, N
+               B( I, J ) = ZERO
+   10       CONTINUE
+   20    CONTINUE
+      ELSE IF( BETA.EQ.-ONE ) THEN
+         DO 40 J = 1, NRHS
+            DO 30 I = 1, N
+               B( I, J ) = -B( I, J )
+   30       CONTINUE
+   40    CONTINUE
+      END IF
+*
+      IF( ALPHA.EQ.ONE ) THEN
+         IF( LSAME( TRANS, 'N' ) ) THEN
+*
+*           Compute B := B + A*X
+*
+            DO 60 J = 1, NRHS
+               IF( N.EQ.1 ) THEN
+                  B( 1, J ) = B( 1, J ) + D( 1 )*X( 1, J )
+               ELSE
+                  B( 1, J ) = B( 1, J ) + D( 1 )*X( 1, J ) +
+     $                        DU( 1 )*X( 2, J )
+                  B( N, J ) = B( N, J ) + DL( N-1 )*X( N-1, J ) +
+     $                        D( N )*X( N, J )
+                  DO 50 I = 2, N - 1
+                     B( I, J ) = B( I, J ) + DL( I-1 )*X( I-1, J ) +
+     $                           D( I )*X( I, J ) + DU( I )*X( I+1, J )
+   50             CONTINUE
+               END IF
+   60       CONTINUE
+         ELSE
+*
+*           Compute B := B + A'*X
+*
+            DO 80 J = 1, NRHS
+               IF( N.EQ.1 ) THEN
+                  B( 1, J ) = B( 1, J ) + D( 1 )*X( 1, J )
+               ELSE
+                  B( 1, J ) = B( 1, J ) + D( 1 )*X( 1, J ) +
+     $                        DL( 1 )*X( 2, J )
+                  B( N, J ) = B( N, J ) + DU( N-1 )*X( N-1, J ) +
+     $                        D( N )*X( N, J )
+                  DO 70 I = 2, N - 1
+                     B( I, J ) = B( I, J ) + DU( I-1 )*X( I-1, J ) +
+     $                           D( I )*X( I, J ) + DL( I )*X( I+1, J )
+   70             CONTINUE
+               END IF
+   80       CONTINUE
+         END IF
+      ELSE IF( ALPHA.EQ.-ONE ) THEN
+         IF( LSAME( TRANS, 'N' ) ) THEN
+*
+*           Compute B := B - A*X
+*
+            DO 100 J = 1, NRHS
+               IF( N.EQ.1 ) THEN
+                  B( 1, J ) = B( 1, J ) - D( 1 )*X( 1, J )
+               ELSE
+                  B( 1, J ) = B( 1, J ) - D( 1 )*X( 1, J ) -
+     $                        DU( 1 )*X( 2, J )
+                  B( N, J ) = B( N, J ) - DL( N-1 )*X( N-1, J ) -
+     $                        D( N )*X( N, J )
+                  DO 90 I = 2, N - 1
+                     B( I, J ) = B( I, J ) - DL( I-1 )*X( I-1, J ) -
+     $                           D( I )*X( I, J ) - DU( I )*X( I+1, J )
+   90             CONTINUE
+               END IF
+  100       CONTINUE
+         ELSE
+*
+*           Compute B := B - A'*X
+*
+            DO 120 J = 1, NRHS
+               IF( N.EQ.1 ) THEN
+                  B( 1, J ) = B( 1, J ) - D( 1 )*X( 1, J )
+               ELSE
+                  B( 1, J ) = B( 1, J ) - D( 1 )*X( 1, J ) -
+     $                        DL( 1 )*X( 2, J )
+                  B( N, J ) = B( N, J ) - DU( N-1 )*X( N-1, J ) -
+     $                        D( N )*X( N, J )
+                  DO 110 I = 2, N - 1
+                     B( I, J ) = B( I, J ) - DU( I-1 )*X( I-1, J ) -
+     $                           D( I )*X( I, J ) - DL( I )*X( I+1, J )
+  110             CONTINUE
+               END IF
+  120       CONTINUE
+         END IF
+      END IF
+      RETURN
+*
+*     End of SLAGTM
+*
+      END
diff --git a/costa/native/external/lapack/slagts.f b/costa/native/external/lapack/slagts.f
new file mode 100644
index 000000000..44687dded
--- /dev/null
+++ b/costa/native/external/lapack/slagts.f
@@ -0,0 +1,305 @@
+      SUBROUTINE SLAGTS( JOB, N, A, B, C, D, IN, Y, TOL, INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, JOB, N
+      REAL               TOL
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IN( * )
+      REAL               A( * ), B( * ), C( * ), D( * ), Y( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLAGTS may be used to solve one of the systems of equations
+*
+*     (T - lambda*I)*x = y   or   (T - lambda*I)'*x = y,
+*
+*  where T is an n by n tridiagonal matrix, for x, following the
+*  factorization of (T - lambda*I) as
+*
+*     (T - lambda*I) = P*L*U ,
+*
+*  by routine SLAGTF. The choice of equation to be solved is
+*  controlled by the argument JOB, and in each case there is an option
+*  to perturb zero or very small diagonal elements of U, this option
+*  being intended for use in applications such as inverse iteration.
+*
+*  Arguments
+*  =========
+*
+*  JOB     (input) INTEGER
+*          Specifies the job to be performed by SLAGTS as follows:
+*          =  1: The equations  (T - lambda*I)x = y  are to be solved,
+*                but diagonal elements of U are not to be perturbed.
+*          = -1: The equations  (T - lambda*I)x = y  are to be solved
+*                and, if overflow would otherwise occur, the diagonal
+*                elements of U are to be perturbed. See argument TOL
+*                below.
+*          =  2: The equations  (T - lambda*I)'x = y  are to be solved,
+*                but diagonal elements of U are not to be perturbed.
+*          = -2: The equations  (T - lambda*I)'x = y  are to be solved
+*                and, if overflow would otherwise occur, the diagonal
+*                elements of U are to be perturbed. See argument TOL
+*                below.
+*
+*  N       (input) INTEGER
+*          The order of the matrix T.
+*
+*  A       (input) REAL array, dimension (N)
+*          On entry, A must contain the diagonal elements of U as
+*          returned from SLAGTF.
+*
+*  B       (input) REAL array, dimension (N-1)
+*          On entry, B must contain the first super-diagonal elements of
+*          U as returned from SLAGTF.
+*
+*  C       (input) REAL array, dimension (N-1)
+*          On entry, C must contain the sub-diagonal elements of L as
+*          returned from SLAGTF.
+*
+*  D       (input) REAL array, dimension (N-2)
+*          On entry, D must contain the second super-diagonal elements
+*          of U as returned from SLAGTF.
+*
+*  IN      (input) INTEGER array, dimension (N)
+*          On entry, IN must contain details of the matrix P as returned
+*          from SLAGTF.
+*
+*  Y       (input/output) REAL array, dimension (N)
+*          On entry, the right hand side vector y.
+*          On exit, Y is overwritten by the solution vector x.
+*
+*  TOL     (input/output) REAL
+*          On entry, with  JOB .lt. 0, TOL should be the minimum
+*          perturbation to be made to very small diagonal elements of U.
+*          TOL should normally be chosen as about eps*norm(U), where eps
+*          is the relative machine precision, but if TOL is supplied as
+*          non-positive, then it is reset to eps*max( abs( u(i,j) ) ).
+*          If  JOB .gt. 0  then TOL is not referenced.
+*
+*          On exit, TOL is changed as described above, only if TOL is
+*          non-positive on entry. Otherwise TOL is unchanged.
+*
+*  INFO    (output) INTEGER
+*          = 0   : successful exit
+*          .lt. 0: if INFO = -i, the i-th argument had an illegal value
+*          .gt. 0: overflow would occur when computing the INFO(th)
+*                  element of the solution vector x. This can only occur
+*                  when JOB is supplied as positive and either means
+*                  that a diagonal element of U is very small, or that
+*                  the elements of the right-hand side vector y are very
+*                  large.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            K
+      REAL               ABSAK, AK, BIGNUM, EPS, PERT, SFMIN, TEMP
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, SIGN
+*     ..
+*     .. External Functions ..
+      REAL               SLAMCH
+      EXTERNAL           SLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+      IF( ( ABS( JOB ).GT.2 ) .OR. ( JOB.EQ.0 ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SLAGTS', -INFO )
+         RETURN
+      END IF
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      EPS = SLAMCH( 'Epsilon' )
+      SFMIN = SLAMCH( 'Safe minimum' )
+      BIGNUM = ONE / SFMIN
+*
+      IF( JOB.LT.0 ) THEN
+         IF( TOL.LE.ZERO ) THEN
+            TOL = ABS( A( 1 ) )
+            IF( N.GT.1 )
+     $         TOL = MAX( TOL, ABS( A( 2 ) ), ABS( B( 1 ) ) )
+            DO 10 K = 3, N
+               TOL = MAX( TOL, ABS( A( K ) ), ABS( B( K-1 ) ),
+     $               ABS( D( K-2 ) ) )
+   10       CONTINUE
+            TOL = TOL*EPS
+            IF( TOL.EQ.ZERO )
+     $         TOL = EPS
+         END IF
+      END IF
+*
+      IF( ABS( JOB ).EQ.1 ) THEN
+         DO 20 K = 2, N
+            IF( IN( K-1 ).EQ.0 ) THEN
+               Y( K ) = Y( K ) - C( K-1 )*Y( K-1 )
+            ELSE
+               TEMP = Y( K-1 )
+               Y( K-1 ) = Y( K )
+               Y( K ) = TEMP - C( K-1 )*Y( K )
+            END IF
+   20    CONTINUE
+         IF( JOB.EQ.1 ) THEN
+            DO 30 K = N, 1, -1
+               IF( K.LE.N-2 ) THEN
+                  TEMP = Y( K ) - B( K )*Y( K+1 ) - D( K )*Y( K+2 )
+               ELSE IF( K.EQ.N-1 ) THEN
+                  TEMP = Y( K ) - B( K )*Y( K+1 )
+               ELSE
+                  TEMP = Y( K )
+               END IF
+               AK = A( K )
+               ABSAK = ABS( AK )
+               IF( ABSAK.LT.ONE ) THEN
+                  IF( ABSAK.LT.SFMIN ) THEN
+                     IF( ABSAK.EQ.ZERO .OR. ABS( TEMP )*SFMIN.GT.ABSAK )
+     $                    THEN
+                        INFO = K
+                        RETURN
+                     ELSE
+                        TEMP = TEMP*BIGNUM
+                        AK = AK*BIGNUM
+                     END IF
+                  ELSE IF( ABS( TEMP ).GT.ABSAK*BIGNUM ) THEN
+                     INFO = K
+                     RETURN
+                  END IF
+               END IF
+               Y( K ) = TEMP / AK
+   30       CONTINUE
+         ELSE
+            DO 50 K = N, 1, -1
+               IF( K.LE.N-2 ) THEN
+                  TEMP = Y( K ) - B( K )*Y( K+1 ) - D( K )*Y( K+2 )
+               ELSE IF( K.EQ.N-1 ) THEN
+                  TEMP = Y( K ) - B( K )*Y( K+1 )
+               ELSE
+                  TEMP = Y( K )
+               END IF
+               AK = A( K )
+               PERT = SIGN( TOL, AK )
+   40          CONTINUE
+               ABSAK = ABS( AK )
+               IF( ABSAK.LT.ONE ) THEN
+                  IF( ABSAK.LT.SFMIN ) THEN
+                     IF( ABSAK.EQ.ZERO .OR. ABS( TEMP )*SFMIN.GT.ABSAK )
+     $                    THEN
+                        AK = AK + PERT
+                        PERT = 2*PERT
+                        GO TO 40
+                     ELSE
+                        TEMP = TEMP*BIGNUM
+                        AK = AK*BIGNUM
+                     END IF
+                  ELSE IF( ABS( TEMP ).GT.ABSAK*BIGNUM ) THEN
+                     AK = AK + PERT
+                     PERT = 2*PERT
+                     GO TO 40
+                  END IF
+               END IF
+               Y( K ) = TEMP / AK
+   50       CONTINUE
+         END IF
+      ELSE
+*
+*        Come to here if  JOB = 2 or -2
+*
+         IF( JOB.EQ.2 ) THEN
+            DO 60 K = 1, N
+               IF( K.GE.3 ) THEN
+                  TEMP = Y( K ) - B( K-1 )*Y( K-1 ) - D( K-2 )*Y( K-2 )
+               ELSE IF( K.EQ.2 ) THEN
+                  TEMP = Y( K ) - B( K-1 )*Y( K-1 )
+               ELSE
+                  TEMP = Y( K )
+               END IF
+               AK = A( K )
+               ABSAK = ABS( AK )
+               IF( ABSAK.LT.ONE ) THEN
+                  IF( ABSAK.LT.SFMIN ) THEN
+                     IF( ABSAK.EQ.ZERO .OR. ABS( TEMP )*SFMIN.GT.ABSAK )
+     $                    THEN
+                        INFO = K
+                        RETURN
+                     ELSE
+                        TEMP = TEMP*BIGNUM
+                        AK = AK*BIGNUM
+                     END IF
+                  ELSE IF( ABS( TEMP ).GT.ABSAK*BIGNUM ) THEN
+                     INFO = K
+                     RETURN
+                  END IF
+               END IF
+               Y( K ) = TEMP / AK
+   60       CONTINUE
+         ELSE
+            DO 80 K = 1, N
+               IF( K.GE.3 ) THEN
+                  TEMP = Y( K ) - B( K-1 )*Y( K-1 ) - D( K-2 )*Y( K-2 )
+               ELSE IF( K.EQ.2 ) THEN
+                  TEMP = Y( K ) - B( K-1 )*Y( K-1 )
+               ELSE
+                  TEMP = Y( K )
+               END IF
+               AK = A( K )
+               PERT = SIGN( TOL, AK )
+   70          CONTINUE
+               ABSAK = ABS( AK )
+               IF( ABSAK.LT.ONE ) THEN
+                  IF( ABSAK.LT.SFMIN ) THEN
+                     IF( ABSAK.EQ.ZERO .OR. ABS( TEMP )*SFMIN.GT.ABSAK )
+     $                    THEN
+                        AK = AK + PERT
+                        PERT = 2*PERT
+                        GO TO 70
+                     ELSE
+                        TEMP = TEMP*BIGNUM
+                        AK = AK*BIGNUM
+                     END IF
+                  ELSE IF( ABS( TEMP ).GT.ABSAK*BIGNUM ) THEN
+                     AK = AK + PERT
+                     PERT = 2*PERT
+                     GO TO 70
+                  END IF
+               END IF
+               Y( K ) = TEMP / AK
+   80       CONTINUE
+         END IF
+*
+         DO 90 K = N, 2, -1
+            IF( IN( K-1 ).EQ.0 ) THEN
+               Y( K-1 ) = Y( K-1 ) - C( K-1 )*Y( K )
+            ELSE
+               TEMP = Y( K-1 )
+               Y( K-1 ) = Y( K )
+               Y( K ) = TEMP - C( K-1 )*Y( K )
+            END IF
+   90    CONTINUE
+      END IF
+*
+*     End of SLAGTS
+*
+      END
diff --git a/costa/native/external/lapack/slagv2.f b/costa/native/external/lapack/slagv2.f
new file mode 100644
index 000000000..2aa0f1fb6
--- /dev/null
+++ b/costa/native/external/lapack/slagv2.f
@@ -0,0 +1,290 @@
+      SUBROUTINE SLAGV2( A, LDA, B, LDB, ALPHAR, ALPHAI, BETA, CSL, SNL,
+     $                   CSR, SNR )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            LDA, LDB
+      REAL               CSL, CSR, SNL, SNR
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), ALPHAI( 2 ), ALPHAR( 2 ),
+     $                   B( LDB, * ), BETA( 2 )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLAGV2 computes the Generalized Schur factorization of a real 2-by-2
+*  matrix pencil (A,B) where B is upper triangular. This routine
+*  computes orthogonal (rotation) matrices given by CSL, SNL and CSR,
+*  SNR such that
+*
+*  1) if the pencil (A,B) has two real eigenvalues (include 0/0 or 1/0
+*     types), then
+*
+*     [ a11 a12 ] := [  CSL  SNL ] [ a11 a12 ] [  CSR -SNR ]
+*     [  0  a22 ]    [ -SNL  CSL ] [ a21 a22 ] [  SNR  CSR ]
+*
+*     [ b11 b12 ] := [  CSL  SNL ] [ b11 b12 ] [  CSR -SNR ]
+*     [  0  b22 ]    [ -SNL  CSL ] [  0  b22 ] [  SNR  CSR ],
+*
+*  2) if the pencil (A,B) has a pair of complex conjugate eigenvalues,
+*     then
+*
+*     [ a11 a12 ] := [  CSL  SNL ] [ a11 a12 ] [  CSR -SNR ]
+*     [ a21 a22 ]    [ -SNL  CSL ] [ a21 a22 ] [  SNR  CSR ]
+*
+*     [ b11  0  ] := [  CSL  SNL ] [ b11 b12 ] [  CSR -SNR ]
+*     [  0  b22 ]    [ -SNL  CSL ] [  0  b22 ] [  SNR  CSR ]
+*
+*     where b11 >= b22 > 0.
+*
+*
+*  Arguments
+*  =========
+*
+*  A       (input/output) REAL array, dimension (LDA, 2)
+*          On entry, the 2 x 2 matrix A.
+*          On exit, A is overwritten by the ``A-part'' of the
+*          generalized Schur form.
+*
+*  LDA     (input) INTEGER
+*          THe leading dimension of the array A.  LDA >= 2.
+*
+*  B       (input/output) REAL array, dimension (LDB, 2)
+*          On entry, the upper triangular 2 x 2 matrix B.
+*          On exit, B is overwritten by the ``B-part'' of the
+*          generalized Schur form.
+*
+*  LDB     (input) INTEGER
+*          THe leading dimension of the array B.  LDB >= 2.
+*
+*  ALPHAR  (output) REAL array, dimension (2)
+*  ALPHAI  (output) REAL array, dimension (2)
+*  BETA    (output) REAL array, dimension (2)
+*          (ALPHAR(k)+i*ALPHAI(k))/BETA(k) are the eigenvalues of the
+*          pencil (A,B), k=1,2, i = sqrt(-1).  Note that BETA(k) may
+*          be zero.
+*
+*  CSL     (output) REAL
+*          The cosine of the left rotation matrix.
+*
+*  SNL     (output) REAL
+*          The sine of the left rotation matrix.
+*
+*  CSR     (output) REAL
+*          The cosine of the right rotation matrix.
+*
+*  SNR     (output) REAL
+*          The sine of the right rotation matrix.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      REAL               ANORM, ASCALE, BNORM, BSCALE, H1, H2, H3, QQ,
+     $                   R, RR, SAFMIN, SCALE1, SCALE2, T, ULP, WI, WR1,
+     $                   WR2
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLAG2, SLARTG, SLASV2, SROT
+*     ..
+*     .. External Functions ..
+      REAL               SLAMCH, SLAPY2
+      EXTERNAL           SLAMCH, SLAPY2
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX
+*     ..
+*     .. Executable Statements ..
+*
+      SAFMIN = SLAMCH( 'S' )
+      ULP = SLAMCH( 'P' )
+*
+*     Scale A
+*
+      ANORM = MAX( ABS( A( 1, 1 ) )+ABS( A( 2, 1 ) ),
+     $        ABS( A( 1, 2 ) )+ABS( A( 2, 2 ) ), SAFMIN )
+      ASCALE = ONE / ANORM
+      A( 1, 1 ) = ASCALE*A( 1, 1 )
+      A( 1, 2 ) = ASCALE*A( 1, 2 )
+      A( 2, 1 ) = ASCALE*A( 2, 1 )
+      A( 2, 2 ) = ASCALE*A( 2, 2 )
+*
+*     Scale B
+*
+      BNORM = MAX( ABS( B( 1, 1 ) ), ABS( B( 1, 2 ) )+ABS( B( 2, 2 ) ),
+     $        SAFMIN )
+      BSCALE = ONE / BNORM
+      B( 1, 1 ) = BSCALE*B( 1, 1 )
+      B( 1, 2 ) = BSCALE*B( 1, 2 )
+      B( 2, 2 ) = BSCALE*B( 2, 2 )
+*
+*     Check if A can be deflated
+*
+      IF( ABS( A( 2, 1 ) ).LE.ULP ) THEN
+         CSL = ONE
+         SNL = ZERO
+         CSR = ONE
+         SNR = ZERO
+         A( 2, 1 ) = ZERO
+         B( 2, 1 ) = ZERO
+*
+*     Check if B is singular
+*
+      ELSE IF( ABS( B( 1, 1 ) ).LE.ULP ) THEN
+         CALL SLARTG( A( 1, 1 ), A( 2, 1 ), CSL, SNL, R )
+         CSR = ONE
+         SNR = ZERO
+         CALL SROT( 2, A( 1, 1 ), LDA, A( 2, 1 ), LDA, CSL, SNL )
+         CALL SROT( 2, B( 1, 1 ), LDB, B( 2, 1 ), LDB, CSL, SNL )
+         A( 2, 1 ) = ZERO
+         B( 1, 1 ) = ZERO
+         B( 2, 1 ) = ZERO
+*
+      ELSE IF( ABS( B( 2, 2 ) ).LE.ULP ) THEN
+         CALL SLARTG( A( 2, 2 ), A( 2, 1 ), CSR, SNR, T )
+         SNR = -SNR
+         CALL SROT( 2, A( 1, 1 ), 1, A( 1, 2 ), 1, CSR, SNR )
+         CALL SROT( 2, B( 1, 1 ), 1, B( 1, 2 ), 1, CSR, SNR )
+         CSL = ONE
+         SNL = ZERO
+         A( 2, 1 ) = ZERO
+         B( 2, 1 ) = ZERO
+         B( 2, 2 ) = ZERO
+*
+      ELSE
+*
+*        B is nonsingular, first compute the eigenvalues of (A,B)
+*
+         CALL SLAG2( A, LDA, B, LDB, SAFMIN, SCALE1, SCALE2, WR1, WR2,
+     $               WI )
+*
+         IF( WI.EQ.ZERO ) THEN
+*
+*           two real eigenvalues, compute s*A-w*B
+*
+            H1 = SCALE1*A( 1, 1 ) - WR1*B( 1, 1 )
+            H2 = SCALE1*A( 1, 2 ) - WR1*B( 1, 2 )
+            H3 = SCALE1*A( 2, 2 ) - WR1*B( 2, 2 )
+*
+            RR = SLAPY2( H1, H2 )
+            QQ = SLAPY2( SCALE1*A( 2, 1 ), H3 )
+*
+            IF( RR.GT.QQ ) THEN
+*
+*              find right rotation matrix to zero 1,1 element of
+*              (sA - wB)
+*
+               CALL SLARTG( H2, H1, CSR, SNR, T )
+*
+            ELSE
+*
+*              find right rotation matrix to zero 2,1 element of
+*              (sA - wB)
+*
+               CALL SLARTG( H3, SCALE1*A( 2, 1 ), CSR, SNR, T )
+*
+            END IF
+*
+            SNR = -SNR
+            CALL SROT( 2, A( 1, 1 ), 1, A( 1, 2 ), 1, CSR, SNR )
+            CALL SROT( 2, B( 1, 1 ), 1, B( 1, 2 ), 1, CSR, SNR )
+*
+*           compute inf norms of A and B
+*
+            H1 = MAX( ABS( A( 1, 1 ) )+ABS( A( 1, 2 ) ),
+     $           ABS( A( 2, 1 ) )+ABS( A( 2, 2 ) ) )
+            H2 = MAX( ABS( B( 1, 1 ) )+ABS( B( 1, 2 ) ),
+     $           ABS( B( 2, 1 ) )+ABS( B( 2, 2 ) ) )
+*
+            IF( ( SCALE1*H1 ).GE.ABS( WR1 )*H2 ) THEN
+*
+*              find left rotation matrix Q to zero out B(2,1)
+*
+               CALL SLARTG( B( 1, 1 ), B( 2, 1 ), CSL, SNL, R )
+*
+            ELSE
+*
+*              find left rotation matrix Q to zero out A(2,1)
+*
+               CALL SLARTG( A( 1, 1 ), A( 2, 1 ), CSL, SNL, R )
+*
+            END IF
+*
+            CALL SROT( 2, A( 1, 1 ), LDA, A( 2, 1 ), LDA, CSL, SNL )
+            CALL SROT( 2, B( 1, 1 ), LDB, B( 2, 1 ), LDB, CSL, SNL )
+*
+            A( 2, 1 ) = ZERO
+            B( 2, 1 ) = ZERO
+*
+         ELSE
+*
+*           a pair of complex conjugate eigenvalues
+*           first compute the SVD of the matrix B
+*
+            CALL SLASV2( B( 1, 1 ), B( 1, 2 ), B( 2, 2 ), R, T, SNR,
+     $                   CSR, SNL, CSL )
+*
+*           Form (A,B) := Q(A,B)Z' where Q is left rotation matrix and
+*           Z is right rotation matrix computed from SLASV2
+*
+            CALL SROT( 2, A( 1, 1 ), LDA, A( 2, 1 ), LDA, CSL, SNL )
+            CALL SROT( 2, B( 1, 1 ), LDB, B( 2, 1 ), LDB, CSL, SNL )
+            CALL SROT( 2, A( 1, 1 ), 1, A( 1, 2 ), 1, CSR, SNR )
+            CALL SROT( 2, B( 1, 1 ), 1, B( 1, 2 ), 1, CSR, SNR )
+*
+            B( 2, 1 ) = ZERO
+            B( 1, 2 ) = ZERO
+*
+         END IF
+*
+      END IF
+*
+*     Unscaling
+*
+      A( 1, 1 ) = ANORM*A( 1, 1 )
+      A( 2, 1 ) = ANORM*A( 2, 1 )
+      A( 1, 2 ) = ANORM*A( 1, 2 )
+      A( 2, 2 ) = ANORM*A( 2, 2 )
+      B( 1, 1 ) = BNORM*B( 1, 1 )
+      B( 2, 1 ) = BNORM*B( 2, 1 )
+      B( 1, 2 ) = BNORM*B( 1, 2 )
+      B( 2, 2 ) = BNORM*B( 2, 2 )
+*
+      IF( WI.EQ.ZERO ) THEN
+         ALPHAR( 1 ) = A( 1, 1 )
+         ALPHAR( 2 ) = A( 2, 2 )
+         ALPHAI( 1 ) = ZERO
+         ALPHAI( 2 ) = ZERO
+         BETA( 1 ) = B( 1, 1 )
+         BETA( 2 ) = B( 2, 2 )
+      ELSE
+         ALPHAR( 1 ) = ANORM*WR1 / SCALE1 / BNORM
+         ALPHAI( 1 ) = ANORM*WI / SCALE1 / BNORM
+         ALPHAR( 2 ) = ALPHAR( 1 )
+         ALPHAI( 2 ) = -ALPHAI( 1 )
+         BETA( 1 ) = ONE
+         BETA( 2 ) = ONE
+      END IF
+*
+   10 CONTINUE
+*
+      RETURN
+*
+*     End of SLAGV2
+*
+      END
diff --git a/costa/native/external/lapack/slahqr.f b/costa/native/external/lapack/slahqr.f
new file mode 100644
index 000000000..9bbd14eea
--- /dev/null
+++ b/costa/native/external/lapack/slahqr.f
@@ -0,0 +1,435 @@
+      SUBROUTINE SLAHQR( WANTT, WANTZ, N, ILO, IHI, H, LDH, WR, WI,
+     $                   ILOZ, IHIZ, Z, LDZ, INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      LOGICAL            WANTT, WANTZ
+      INTEGER            IHI, IHIZ, ILO, ILOZ, INFO, LDH, LDZ, N
+*     ..
+*     .. Array Arguments ..
+      REAL               H( LDH, * ), WI( * ), WR( * ), Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLAHQR is an auxiliary routine called by SHSEQR to update the
+*  eigenvalues and Schur decomposition already computed by SHSEQR, by
+*  dealing with the Hessenberg submatrix in rows and columns ILO to IHI.
+*
+*  Arguments
+*  =========
+*
+*  WANTT   (input) LOGICAL
+*          = .TRUE. : the full Schur form T is required;
+*          = .FALSE.: only eigenvalues are required.
+*
+*  WANTZ   (input) LOGICAL
+*          = .TRUE. : the matrix of Schur vectors Z is required;
+*          = .FALSE.: Schur vectors are not required.
+*
+*  N       (input) INTEGER
+*          The order of the matrix H.  N >= 0.
+*
+*  ILO     (input) INTEGER
+*  IHI     (input) INTEGER
+*          It is assumed that H is already upper quasi-triangular in
+*          rows and columns IHI+1:N, and that H(ILO,ILO-1) = 0 (unless
+*          ILO = 1). SLAHQR works primarily with the Hessenberg
+*          submatrix in rows and columns ILO to IHI, but applies
+*          transformations to all of H if WANTT is .TRUE..
+*          1 <= ILO <= max(1,IHI); IHI <= N.
+*
+*  H       (input/output) REAL array, dimension (LDH,N)
+*          On entry, the upper Hessenberg matrix H.
+*          On exit, if WANTT is .TRUE., H is upper quasi-triangular in
+*          rows and columns ILO:IHI, with any 2-by-2 diagonal blocks in
+*          standard form. If WANTT is .FALSE., the contents of H are
+*          unspecified on exit.
+*
+*  LDH     (input) INTEGER
+*          The leading dimension of the array H. LDH >= max(1,N).
+*
+*  WR      (output) REAL array, dimension (N)
+*  WI      (output) REAL array, dimension (N)
+*          The real and imaginary parts, respectively, of the computed
+*          eigenvalues ILO to IHI are stored in the corresponding
+*          elements of WR and WI. If two eigenvalues are computed as a
+*          complex conjugate pair, they are stored in consecutive
+*          elements of WR and WI, say the i-th and (i+1)th, with
+*          WI(i) > 0 and WI(i+1) < 0. If WANTT is .TRUE., the
+*          eigenvalues are stored in the same order as on the diagonal
+*          of the Schur form returned in H, with WR(i) = H(i,i), and, if
+*          H(i:i+1,i:i+1) is a 2-by-2 diagonal block,
+*          WI(i) = sqrt(H(i+1,i)*H(i,i+1)) and WI(i+1) = -WI(i).
+*
+*  ILOZ    (input) INTEGER
+*  IHIZ    (input) INTEGER
+*          Specify the rows of Z to which transformations must be
+*          applied if WANTZ is .TRUE..
+*          1 <= ILOZ <= ILO; IHI <= IHIZ <= N.
+*
+*  Z       (input/output) REAL array, dimension (LDZ,N)
+*          If WANTZ is .TRUE., on entry Z must contain the current
+*          matrix Z of transformations accumulated by SHSEQR, and on
+*          exit Z has been updated; transformations are applied only to
+*          the submatrix Z(ILOZ:IHIZ,ILO:IHI).
+*          If WANTZ is .FALSE., Z is not referenced.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z. LDZ >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          > 0: SLAHQR failed to compute all the eigenvalues ILO to IHI
+*               in a total of 30*(IHI-ILO+1) iterations; if INFO = i,
+*               elements i+1:ihi of WR and WI contain those eigenvalues
+*               which have been successfully computed.
+*
+*  Further Details
+*  ===============
+*
+*  2-96 Based on modifications by
+*     David Day, Sandia National Laboratory, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE, HALF
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0, HALF = 0.5E0 )
+      REAL               DAT1, DAT2
+      PARAMETER          ( DAT1 = 0.75E+0, DAT2 = -0.4375E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, I1, I2, ITN, ITS, J, K, L, M, NH, NR, NZ
+      REAL               AVE, CS, DISC, H00, H10, H11, H12, H21, H22,
+     $                   H33, H33S, H43H34, H44, H44S, OVFL, S, SMLNUM,
+     $                   SN, SUM, T1, T2, T3, TST1, ULP, UNFL, V1, V2,
+     $                   V3
+*     ..
+*     .. Local Arrays ..
+      REAL               V( 3 ), WORK( 1 )
+*     ..
+*     .. External Functions ..
+      REAL               SLAMCH, SLANHS
+      EXTERNAL           SLAMCH, SLANHS
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SCOPY, SLABAD, SLANV2, SLARFG, SROT
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN, SIGN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+      IF( ILO.EQ.IHI ) THEN
+         WR( ILO ) = H( ILO, ILO )
+         WI( ILO ) = ZERO
+         RETURN
+      END IF
+*
+      NH = IHI - ILO + 1
+      NZ = IHIZ - ILOZ + 1
+*
+*     Set machine-dependent constants for the stopping criterion.
+*     If norm(H) <= sqrt(OVFL), overflow should not occur.
+*
+      UNFL = SLAMCH( 'Safe minimum' )
+      OVFL = ONE / UNFL
+      CALL SLABAD( UNFL, OVFL )
+      ULP = SLAMCH( 'Precision' )
+      SMLNUM = UNFL*( NH / ULP )
+*
+*     I1 and I2 are the indices of the first row and last column of H
+*     to which transformations must be applied. If eigenvalues only are
+*     being computed, I1 and I2 are set inside the main loop.
+*
+      IF( WANTT ) THEN
+         I1 = 1
+         I2 = N
+      END IF
+*
+*     ITN is the total number of QR iterations allowed.
+*
+      ITN = 30*NH
+*
+*     The main loop begins here. I is the loop index and decreases from
+*     IHI to ILO in steps of 1 or 2. Each iteration of the loop works
+*     with the active submatrix in rows and columns L to I.
+*     Eigenvalues I+1 to IHI have already converged. Either L = ILO or
+*     H(L,L-1) is negligible so that the matrix splits.
+*
+      I = IHI
+   10 CONTINUE
+      L = ILO
+      IF( I.LT.ILO )
+     $   GO TO 150
+*
+*     Perform QR iterations on rows and columns ILO to I until a
+*     submatrix of order 1 or 2 splits off at the bottom because a
+*     subdiagonal element has become negligible.
+*
+      DO 130 ITS = 0, ITN
+*
+*        Look for a single small subdiagonal element.
+*
+         DO 20 K = I, L + 1, -1
+            TST1 = ABS( H( K-1, K-1 ) ) + ABS( H( K, K ) )
+            IF( TST1.EQ.ZERO )
+     $         TST1 = SLANHS( '1', I-L+1, H( L, L ), LDH, WORK )
+            IF( ABS( H( K, K-1 ) ).LE.MAX( ULP*TST1, SMLNUM ) )
+     $         GO TO 30
+   20    CONTINUE
+   30    CONTINUE
+         L = K
+         IF( L.GT.ILO ) THEN
+*
+*           H(L,L-1) is negligible
+*
+            H( L, L-1 ) = ZERO
+         END IF
+*
+*        Exit from loop if a submatrix of order 1 or 2 has split off.
+*
+         IF( L.GE.I-1 )
+     $      GO TO 140
+*
+*        Now the active submatrix is in rows and columns L to I. If
+*        eigenvalues only are being computed, only the active submatrix
+*        need be transformed.
+*
+         IF( .NOT.WANTT ) THEN
+            I1 = L
+            I2 = I
+         END IF
+*
+         IF( ITS.EQ.10 .OR. ITS.EQ.20 ) THEN
+*
+*           Exceptional shift.
+*
+            S = ABS( H( I, I-1 ) ) + ABS( H( I-1, I-2 ) )
+            H44 = DAT1*S + H( I, I )
+            H33 = H44
+            H43H34 = DAT2*S*S
+         ELSE
+*
+*           Prepare to use Francis' double shift
+*           (i.e. 2nd degree generalized Rayleigh quotient)
+*
+            H44 = H( I, I )
+            H33 = H( I-1, I-1 )
+            H43H34 = H( I, I-1 )*H( I-1, I )
+            S = H( I-1, I-2 )*H( I-1, I-2 )
+            DISC = ( H33-H44 )*HALF
+            DISC = DISC*DISC + H43H34
+            IF( DISC.GT.ZERO ) THEN
+*
+*              Real roots: use Wilkinson's shift twice
+*
+               DISC = SQRT( DISC )
+               AVE = HALF*( H33+H44 )
+               IF( ABS( H33 )-ABS( H44 ).GT.ZERO ) THEN
+                  H33 = H33*H44 - H43H34
+                  H44 = H33 / ( SIGN( DISC, AVE )+AVE )
+               ELSE
+                  H44 = SIGN( DISC, AVE ) + AVE
+               END IF
+               H33 = H44
+               H43H34 = ZERO
+            END IF
+         END IF
+*
+*        Look for two consecutive small subdiagonal elements.
+*
+         DO 40 M = I - 2, L, -1
+*           Determine the effect of starting the double-shift QR
+*           iteration at row M, and see if this would make H(M,M-1)
+*           negligible.
+*
+            H11 = H( M, M )
+            H22 = H( M+1, M+1 )
+            H21 = H( M+1, M )
+            H12 = H( M, M+1 )
+            H44S = H44 - H11
+            H33S = H33 - H11
+            V1 = ( H33S*H44S-H43H34 ) / H21 + H12
+            V2 = H22 - H11 - H33S - H44S
+            V3 = H( M+2, M+1 )
+            S = ABS( V1 ) + ABS( V2 ) + ABS( V3 )
+            V1 = V1 / S
+            V2 = V2 / S
+            V3 = V3 / S
+            V( 1 ) = V1
+            V( 2 ) = V2
+            V( 3 ) = V3
+            IF( M.EQ.L )
+     $         GO TO 50
+            H00 = H( M-1, M-1 )
+            H10 = H( M, M-1 )
+            TST1 = ABS( V1 )*( ABS( H00 )+ABS( H11 )+ABS( H22 ) )
+            IF( ABS( H10 )*( ABS( V2 )+ABS( V3 ) ).LE.ULP*TST1 )
+     $         GO TO 50
+   40    CONTINUE
+   50    CONTINUE
+*
+*        Double-shift QR step
+*
+         DO 120 K = M, I - 1
+*
+*           The first iteration of this loop determines a reflection G
+*           from the vector V and applies it from left and right to H,
+*           thus creating a nonzero bulge below the subdiagonal.
+*
+*           Each subsequent iteration determines a reflection G to
+*           restore the Hessenberg form in the (K-1)th column, and thus
+*           chases the bulge one step toward the bottom of the active
+*           submatrix. NR is the order of G.
+*
+            NR = MIN( 3, I-K+1 )
+            IF( K.GT.M )
+     $         CALL SCOPY( NR, H( K, K-1 ), 1, V, 1 )
+            CALL SLARFG( NR, V( 1 ), V( 2 ), 1, T1 )
+            IF( K.GT.M ) THEN
+               H( K, K-1 ) = V( 1 )
+               H( K+1, K-1 ) = ZERO
+               IF( K.LT.I-1 )
+     $            H( K+2, K-1 ) = ZERO
+            ELSE IF( M.GT.L ) THEN
+               H( K, K-1 ) = -H( K, K-1 )
+            END IF
+            V2 = V( 2 )
+            T2 = T1*V2
+            IF( NR.EQ.3 ) THEN
+               V3 = V( 3 )
+               T3 = T1*V3
+*
+*              Apply G from the left to transform the rows of the matrix
+*              in columns K to I2.
+*
+               DO 60 J = K, I2
+                  SUM = H( K, J ) + V2*H( K+1, J ) + V3*H( K+2, J )
+                  H( K, J ) = H( K, J ) - SUM*T1
+                  H( K+1, J ) = H( K+1, J ) - SUM*T2
+                  H( K+2, J ) = H( K+2, J ) - SUM*T3
+   60          CONTINUE
+*
+*              Apply G from the right to transform the columns of the
+*              matrix in rows I1 to min(K+3,I).
+*
+               DO 70 J = I1, MIN( K+3, I )
+                  SUM = H( J, K ) + V2*H( J, K+1 ) + V3*H( J, K+2 )
+                  H( J, K ) = H( J, K ) - SUM*T1
+                  H( J, K+1 ) = H( J, K+1 ) - SUM*T2
+                  H( J, K+2 ) = H( J, K+2 ) - SUM*T3
+   70          CONTINUE
+*
+               IF( WANTZ ) THEN
+*
+*                 Accumulate transformations in the matrix Z
+*
+                  DO 80 J = ILOZ, IHIZ
+                     SUM = Z( J, K ) + V2*Z( J, K+1 ) + V3*Z( J, K+2 )
+                     Z( J, K ) = Z( J, K ) - SUM*T1
+                     Z( J, K+1 ) = Z( J, K+1 ) - SUM*T2
+                     Z( J, K+2 ) = Z( J, K+2 ) - SUM*T3
+   80             CONTINUE
+               END IF
+            ELSE IF( NR.EQ.2 ) THEN
+*
+*              Apply G from the left to transform the rows of the matrix
+*              in columns K to I2.
+*
+               DO 90 J = K, I2
+                  SUM = H( K, J ) + V2*H( K+1, J )
+                  H( K, J ) = H( K, J ) - SUM*T1
+                  H( K+1, J ) = H( K+1, J ) - SUM*T2
+   90          CONTINUE
+*
+*              Apply G from the right to transform the columns of the
+*              matrix in rows I1 to min(K+3,I).
+*
+               DO 100 J = I1, I
+                  SUM = H( J, K ) + V2*H( J, K+1 )
+                  H( J, K ) = H( J, K ) - SUM*T1
+                  H( J, K+1 ) = H( J, K+1 ) - SUM*T2
+  100          CONTINUE
+*
+               IF( WANTZ ) THEN
+*
+*                 Accumulate transformations in the matrix Z
+*
+                  DO 110 J = ILOZ, IHIZ
+                     SUM = Z( J, K ) + V2*Z( J, K+1 )
+                     Z( J, K ) = Z( J, K ) - SUM*T1
+                     Z( J, K+1 ) = Z( J, K+1 ) - SUM*T2
+  110             CONTINUE
+               END IF
+            END IF
+  120    CONTINUE
+*
+  130 CONTINUE
+*
+*     Failure to converge in remaining number of iterations
+*
+      INFO = I
+      RETURN
+*
+  140 CONTINUE
+*
+      IF( L.EQ.I ) THEN
+*
+*        H(I,I-1) is negligible: one eigenvalue has converged.
+*
+         WR( I ) = H( I, I )
+         WI( I ) = ZERO
+      ELSE IF( L.EQ.I-1 ) THEN
+*
+*        H(I-1,I-2) is negligible: a pair of eigenvalues have converged.
+*
+*        Transform the 2-by-2 submatrix to standard Schur form,
+*        and compute and store the eigenvalues.
+*
+         CALL SLANV2( H( I-1, I-1 ), H( I-1, I ), H( I, I-1 ),
+     $                H( I, I ), WR( I-1 ), WI( I-1 ), WR( I ), WI( I ),
+     $                CS, SN )
+*
+         IF( WANTT ) THEN
+*
+*           Apply the transformation to the rest of H.
+*
+            IF( I2.GT.I )
+     $         CALL SROT( I2-I, H( I-1, I+1 ), LDH, H( I, I+1 ), LDH,
+     $                    CS, SN )
+            CALL SROT( I-I1-1, H( I1, I-1 ), 1, H( I1, I ), 1, CS, SN )
+         END IF
+         IF( WANTZ ) THEN
+*
+*           Apply the transformation to Z.
+*
+            CALL SROT( NZ, Z( ILOZ, I-1 ), 1, Z( ILOZ, I ), 1, CS, SN )
+         END IF
+      END IF
+*
+*     Decrement number of remaining iterations, and return to start of
+*     the main loop with new value of I.
+*
+      ITN = ITN - ITS
+      I = L - 1
+      GO TO 10
+*
+  150 CONTINUE
+      RETURN
+*
+*     End of SLAHQR
+*
+      END
diff --git a/costa/native/external/lapack/slahrd.f b/costa/native/external/lapack/slahrd.f
new file mode 100644
index 000000000..6d18352e5
--- /dev/null
+++ b/costa/native/external/lapack/slahrd.f
@@ -0,0 +1,206 @@
+      SUBROUTINE SLAHRD( N, K, NB, A, LDA, TAU, T, LDT, Y, LDY )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            K, LDA, LDT, LDY, N, NB
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), T( LDT, NB ), TAU( NB ),
+     $                   Y( LDY, NB )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLAHRD reduces the first NB columns of a real general n-by-(n-k+1)
+*  matrix A so that elements below the k-th subdiagonal are zero. The
+*  reduction is performed by an orthogonal similarity transformation
+*  Q' * A * Q. The routine returns the matrices V and T which determine
+*  Q as a block reflector I - V*T*V', and also the matrix Y = A * V * T.
+*
+*  This is an auxiliary routine called by SGEHRD.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.
+*
+*  K       (input) INTEGER
+*          The offset for the reduction. Elements below the k-th
+*          subdiagonal in the first NB columns are reduced to zero.
+*
+*  NB      (input) INTEGER
+*          The number of columns to be reduced.
+*
+*  A       (input/output) REAL array, dimension (LDA,N-K+1)
+*          On entry, the n-by-(n-k+1) general matrix A.
+*          On exit, the elements on and above the k-th subdiagonal in
+*          the first NB columns are overwritten with the corresponding
+*          elements of the reduced matrix; the elements below the k-th
+*          subdiagonal, with the array TAU, represent the matrix Q as a
+*          product of elementary reflectors. The other columns of A are
+*          unchanged. See Further Details.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  TAU     (output) REAL array, dimension (NB)
+*          The scalar factors of the elementary reflectors. See Further
+*          Details.
+*
+*  T       (output) REAL array, dimension (LDT,NB)
+*          The upper triangular matrix T.
+*
+*  LDT     (input) INTEGER
+*          The leading dimension of the array T.  LDT >= NB.
+*
+*  Y       (output) REAL array, dimension (LDY,NB)
+*          The n-by-nb matrix Y.
+*
+*  LDY     (input) INTEGER
+*          The leading dimension of the array Y. LDY >= N.
+*
+*  Further Details
+*  ===============
+*
+*  The matrix Q is represented as a product of nb elementary reflectors
+*
+*     Q = H(1) H(2) . . . H(nb).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - tau * v * v'
+*
+*  where tau is a real scalar, and v is a real vector with
+*  v(1:i+k-1) = 0, v(i+k) = 1; v(i+k+1:n) is stored on exit in
+*  A(i+k+1:n,i), and tau in TAU(i).
+*
+*  The elements of the vectors v together form the (n-k+1)-by-nb matrix
+*  V which is needed, with T and Y, to apply the transformation to the
+*  unreduced part of the matrix, using an update of the form:
+*  A := (I - V*T*V') * (A - Y*V').
+*
+*  The contents of A on exit are illustrated by the following example
+*  with n = 7, k = 3 and nb = 2:
+*
+*     ( a   h   a   a   a )
+*     ( a   h   a   a   a )
+*     ( a   h   a   a   a )
+*     ( h   h   a   a   a )
+*     ( v1  h   a   a   a )
+*     ( v1  v2  a   a   a )
+*     ( v1  v2  a   a   a )
+*
+*  where a denotes an element of the original matrix A, h denotes a
+*  modified element of the upper Hessenberg matrix H, and vi denotes an
+*  element of the vector defining H(i).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I
+      REAL               EI
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SAXPY, SCOPY, SGEMV, SLARFG, SSCAL, STRMV
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Quick return if possible
+*
+      IF( N.LE.1 )
+     $   RETURN
+*
+      DO 10 I = 1, NB
+         IF( I.GT.1 ) THEN
+*
+*           Update A(1:n,i)
+*
+*           Compute i-th column of A - Y * V'
+*
+            CALL SGEMV( 'No transpose', N, I-1, -ONE, Y, LDY,
+     $                  A( K+I-1, 1 ), LDA, ONE, A( 1, I ), 1 )
+*
+*           Apply I - V * T' * V' to this column (call it b) from the
+*           left, using the last column of T as workspace
+*
+*           Let  V = ( V1 )   and   b = ( b1 )   (first I-1 rows)
+*                    ( V2 )             ( b2 )
+*
+*           where V1 is unit lower triangular
+*
+*           w := V1' * b1
+*
+            CALL SCOPY( I-1, A( K+1, I ), 1, T( 1, NB ), 1 )
+            CALL STRMV( 'Lower', 'Transpose', 'Unit', I-1, A( K+1, 1 ),
+     $                  LDA, T( 1, NB ), 1 )
+*
+*           w := w + V2'*b2
+*
+            CALL SGEMV( 'Transpose', N-K-I+1, I-1, ONE, A( K+I, 1 ),
+     $                  LDA, A( K+I, I ), 1, ONE, T( 1, NB ), 1 )
+*
+*           w := T'*w
+*
+            CALL STRMV( 'Upper', 'Transpose', 'Non-unit', I-1, T, LDT,
+     $                  T( 1, NB ), 1 )
+*
+*           b2 := b2 - V2*w
+*
+            CALL SGEMV( 'No transpose', N-K-I+1, I-1, -ONE, A( K+I, 1 ),
+     $                  LDA, T( 1, NB ), 1, ONE, A( K+I, I ), 1 )
+*
+*           b1 := b1 - V1*w
+*
+            CALL STRMV( 'Lower', 'No transpose', 'Unit', I-1,
+     $                  A( K+1, 1 ), LDA, T( 1, NB ), 1 )
+            CALL SAXPY( I-1, -ONE, T( 1, NB ), 1, A( K+1, I ), 1 )
+*
+            A( K+I-1, I-1 ) = EI
+         END IF
+*
+*        Generate the elementary reflector H(i) to annihilate
+*        A(k+i+1:n,i)
+*
+         CALL SLARFG( N-K-I+1, A( K+I, I ), A( MIN( K+I+1, N ), I ), 1,
+     $                TAU( I ) )
+         EI = A( K+I, I )
+         A( K+I, I ) = ONE
+*
+*        Compute  Y(1:n,i)
+*
+         CALL SGEMV( 'No transpose', N, N-K-I+1, ONE, A( 1, I+1 ), LDA,
+     $               A( K+I, I ), 1, ZERO, Y( 1, I ), 1 )
+         CALL SGEMV( 'Transpose', N-K-I+1, I-1, ONE, A( K+I, 1 ), LDA,
+     $               A( K+I, I ), 1, ZERO, T( 1, I ), 1 )
+         CALL SGEMV( 'No transpose', N, I-1, -ONE, Y, LDY, T( 1, I ), 1,
+     $               ONE, Y( 1, I ), 1 )
+         CALL SSCAL( N, TAU( I ), Y( 1, I ), 1 )
+*
+*        Compute T(1:i,i)
+*
+         CALL SSCAL( I-1, -TAU( I ), T( 1, I ), 1 )
+         CALL STRMV( 'Upper', 'No transpose', 'Non-unit', I-1, T, LDT,
+     $               T( 1, I ), 1 )
+         T( I, I ) = TAU( I )
+*
+   10 CONTINUE
+      A( K+NB, NB ) = EI
+*
+      RETURN
+*
+*     End of SLAHRD
+*
+      END
diff --git a/costa/native/external/lapack/slaic1.f b/costa/native/external/lapack/slaic1.f
new file mode 100644
index 000000000..651059fb0
--- /dev/null
+++ b/costa/native/external/lapack/slaic1.f
@@ -0,0 +1,293 @@
+      SUBROUTINE SLAIC1( JOB, J, X, SEST, W, GAMMA, SESTPR, S, C )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      INTEGER            J, JOB
+      REAL               C, GAMMA, S, SEST, SESTPR
+*     ..
+*     .. Array Arguments ..
+      REAL               W( J ), X( J )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLAIC1 applies one step of incremental condition estimation in
+*  its simplest version:
+*
+*  Let x, twonorm(x) = 1, be an approximate singular vector of an j-by-j
+*  lower triangular matrix L, such that
+*           twonorm(L*x) = sest
+*  Then SLAIC1 computes sestpr, s, c such that
+*  the vector
+*                  [ s*x ]
+*           xhat = [  c  ]
+*  is an approximate singular vector of
+*                  [ L     0  ]
+*           Lhat = [ w' gamma ]
+*  in the sense that
+*           twonorm(Lhat*xhat) = sestpr.
+*
+*  Depending on JOB, an estimate for the largest or smallest singular
+*  value is computed.
+*
+*  Note that [s c]' and sestpr**2 is an eigenpair of the system
+*
+*      diag(sest*sest, 0) + [alpha  gamma] * [ alpha ]
+*                                            [ gamma ]
+*
+*  where  alpha =  x'*w.
+*
+*  Arguments
+*  =========
+*
+*  JOB     (input) INTEGER
+*          = 1: an estimate for the largest singular value is computed.
+*          = 2: an estimate for the smallest singular value is computed.
+*
+*  J       (input) INTEGER
+*          Length of X and W
+*
+*  X       (input) REAL array, dimension (J)
+*          The j-vector x.
+*
+*  SEST    (input) REAL
+*          Estimated singular value of j by j matrix L
+*
+*  W       (input) REAL array, dimension (J)
+*          The j-vector w.
+*
+*  GAMMA   (input) REAL
+*          The diagonal element gamma.
+*
+*  SESTPR  (output) REAL
+*          Estimated singular value of (j+1) by (j+1) matrix Lhat.
+*
+*  S       (output) REAL
+*          Sine needed in forming xhat.
+*
+*  C       (output) REAL
+*          Cosine needed in forming xhat.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE, TWO
+      PARAMETER          ( ZERO = 0.0E0, ONE = 1.0E0, TWO = 2.0E0 )
+      REAL               HALF, FOUR
+      PARAMETER          ( HALF = 0.5E0, FOUR = 4.0E0 )
+*     ..
+*     .. Local Scalars ..
+      REAL               ABSALP, ABSEST, ABSGAM, ALPHA, B, COSINE, EPS,
+     $                   NORMA, S1, S2, SINE, T, TEST, TMP, ZETA1, ZETA2
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, SIGN, SQRT
+*     ..
+*     .. External Functions ..
+      REAL               SDOT, SLAMCH
+      EXTERNAL           SDOT, SLAMCH
+*     ..
+*     .. Executable Statements ..
+*
+      EPS = SLAMCH( 'Epsilon' )
+      ALPHA = SDOT( J, X, 1, W, 1 )
+*
+      ABSALP = ABS( ALPHA )
+      ABSGAM = ABS( GAMMA )
+      ABSEST = ABS( SEST )
+*
+      IF( JOB.EQ.1 ) THEN
+*
+*        Estimating largest singular value
+*
+*        special cases
+*
+         IF( SEST.EQ.ZERO ) THEN
+            S1 = MAX( ABSGAM, ABSALP )
+            IF( S1.EQ.ZERO ) THEN
+               S = ZERO
+               C = ONE
+               SESTPR = ZERO
+            ELSE
+               S = ALPHA / S1
+               C = GAMMA / S1
+               TMP = SQRT( S*S+C*C )
+               S = S / TMP
+               C = C / TMP
+               SESTPR = S1*TMP
+            END IF
+            RETURN
+         ELSE IF( ABSGAM.LE.EPS*ABSEST ) THEN
+            S = ONE
+            C = ZERO
+            TMP = MAX( ABSEST, ABSALP )
+            S1 = ABSEST / TMP
+            S2 = ABSALP / TMP
+            SESTPR = TMP*SQRT( S1*S1+S2*S2 )
+            RETURN
+         ELSE IF( ABSALP.LE.EPS*ABSEST ) THEN
+            S1 = ABSGAM
+            S2 = ABSEST
+            IF( S1.LE.S2 ) THEN
+               S = ONE
+               C = ZERO
+               SESTPR = S2
+            ELSE
+               S = ZERO
+               C = ONE
+               SESTPR = S1
+            END IF
+            RETURN
+         ELSE IF( ABSEST.LE.EPS*ABSALP .OR. ABSEST.LE.EPS*ABSGAM ) THEN
+            S1 = ABSGAM
+            S2 = ABSALP
+            IF( S1.LE.S2 ) THEN
+               TMP = S1 / S2
+               S = SQRT( ONE+TMP*TMP )
+               SESTPR = S2*S
+               C = ( GAMMA / S2 ) / S
+               S = SIGN( ONE, ALPHA ) / S
+            ELSE
+               TMP = S2 / S1
+               C = SQRT( ONE+TMP*TMP )
+               SESTPR = S1*C
+               S = ( ALPHA / S1 ) / C
+               C = SIGN( ONE, GAMMA ) / C
+            END IF
+            RETURN
+         ELSE
+*
+*           normal case
+*
+            ZETA1 = ALPHA / ABSEST
+            ZETA2 = GAMMA / ABSEST
+*
+            B = ( ONE-ZETA1*ZETA1-ZETA2*ZETA2 )*HALF
+            C = ZETA1*ZETA1
+            IF( B.GT.ZERO ) THEN
+               T = C / ( B+SQRT( B*B+C ) )
+            ELSE
+               T = SQRT( B*B+C ) - B
+            END IF
+*
+            SINE = -ZETA1 / T
+            COSINE = -ZETA2 / ( ONE+T )
+            TMP = SQRT( SINE*SINE+COSINE*COSINE )
+            S = SINE / TMP
+            C = COSINE / TMP
+            SESTPR = SQRT( T+ONE )*ABSEST
+            RETURN
+         END IF
+*
+      ELSE IF( JOB.EQ.2 ) THEN
+*
+*        Estimating smallest singular value
+*
+*        special cases
+*
+         IF( SEST.EQ.ZERO ) THEN
+            SESTPR = ZERO
+            IF( MAX( ABSGAM, ABSALP ).EQ.ZERO ) THEN
+               SINE = ONE
+               COSINE = ZERO
+            ELSE
+               SINE = -GAMMA
+               COSINE = ALPHA
+            END IF
+            S1 = MAX( ABS( SINE ), ABS( COSINE ) )
+            S = SINE / S1
+            C = COSINE / S1
+            TMP = SQRT( S*S+C*C )
+            S = S / TMP
+            C = C / TMP
+            RETURN
+         ELSE IF( ABSGAM.LE.EPS*ABSEST ) THEN
+            S = ZERO
+            C = ONE
+            SESTPR = ABSGAM
+            RETURN
+         ELSE IF( ABSALP.LE.EPS*ABSEST ) THEN
+            S1 = ABSGAM
+            S2 = ABSEST
+            IF( S1.LE.S2 ) THEN
+               S = ZERO
+               C = ONE
+               SESTPR = S1
+            ELSE
+               S = ONE
+               C = ZERO
+               SESTPR = S2
+            END IF
+            RETURN
+         ELSE IF( ABSEST.LE.EPS*ABSALP .OR. ABSEST.LE.EPS*ABSGAM ) THEN
+            S1 = ABSGAM
+            S2 = ABSALP
+            IF( S1.LE.S2 ) THEN
+               TMP = S1 / S2
+               C = SQRT( ONE+TMP*TMP )
+               SESTPR = ABSEST*( TMP / C )
+               S = -( GAMMA / S2 ) / C
+               C = SIGN( ONE, ALPHA ) / C
+            ELSE
+               TMP = S2 / S1
+               S = SQRT( ONE+TMP*TMP )
+               SESTPR = ABSEST / S
+               C = ( ALPHA / S1 ) / S
+               S = -SIGN( ONE, GAMMA ) / S
+            END IF
+            RETURN
+         ELSE
+*
+*           normal case
+*
+            ZETA1 = ALPHA / ABSEST
+            ZETA2 = GAMMA / ABSEST
+*
+            NORMA = MAX( ONE+ZETA1*ZETA1+ABS( ZETA1*ZETA2 ),
+     $              ABS( ZETA1*ZETA2 )+ZETA2*ZETA2 )
+*
+*           See if root is closer to zero or to ONE
+*
+            TEST = ONE + TWO*( ZETA1-ZETA2 )*( ZETA1+ZETA2 )
+            IF( TEST.GE.ZERO ) THEN
+*
+*              root is close to zero, compute directly
+*
+               B = ( ZETA1*ZETA1+ZETA2*ZETA2+ONE )*HALF
+               C = ZETA2*ZETA2
+               T = C / ( B+SQRT( ABS( B*B-C ) ) )
+               SINE = ZETA1 / ( ONE-T )
+               COSINE = -ZETA2 / T
+               SESTPR = SQRT( T+FOUR*EPS*EPS*NORMA )*ABSEST
+            ELSE
+*
+*              root is closer to ONE, shift by that amount
+*
+               B = ( ZETA2*ZETA2+ZETA1*ZETA1-ONE )*HALF
+               C = ZETA1*ZETA1
+               IF( B.GE.ZERO ) THEN
+                  T = -C / ( B+SQRT( B*B+C ) )
+               ELSE
+                  T = B - SQRT( B*B+C )
+               END IF
+               SINE = -ZETA1 / T
+               COSINE = -ZETA2 / ( ONE+T )
+               SESTPR = SQRT( ONE+T+FOUR*EPS*EPS*NORMA )*ABSEST
+            END IF
+            TMP = SQRT( SINE*SINE+COSINE*COSINE )
+            S = SINE / TMP
+            C = COSINE / TMP
+            RETURN
+*
+         END IF
+      END IF
+      RETURN
+*
+*     End of SLAIC1
+*
+      END
diff --git a/costa/native/external/lapack/slaln2.f b/costa/native/external/lapack/slaln2.f
new file mode 100644
index 000000000..7b4c3cc92
--- /dev/null
+++ b/costa/native/external/lapack/slaln2.f
@@ -0,0 +1,508 @@
+      SUBROUTINE SLALN2( LTRANS, NA, NW, SMIN, CA, A, LDA, D1, D2, B,
+     $                   LDB, WR, WI, X, LDX, SCALE, XNORM, INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      LOGICAL            LTRANS
+      INTEGER            INFO, LDA, LDB, LDX, NA, NW
+      REAL               CA, D1, D2, SCALE, SMIN, WI, WR, XNORM
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), B( LDB, * ), X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLALN2 solves a system of the form  (ca A - w D ) X = s B
+*  or (ca A' - w D) X = s B   with possible scaling ("s") and
+*  perturbation of A.  (A' means A-transpose.)
+*
+*  A is an NA x NA real matrix, ca is a real scalar, D is an NA x NA
+*  real diagonal matrix, w is a real or complex value, and X and B are
+*  NA x 1 matrices -- real if w is real, complex if w is complex.  NA
+*  may be 1 or 2.
+*
+*  If w is complex, X and B are represented as NA x 2 matrices,
+*  the first column of each being the real part and the second
+*  being the imaginary part.
+*
+*  "s" is a scaling factor (.LE. 1), computed by SLALN2, which is
+*  so chosen that X can be computed without overflow.  X is further
+*  scaled if necessary to assure that norm(ca A - w D)*norm(X) is less
+*  than overflow.
+*
+*  If both singular values of (ca A - w D) are less than SMIN,
+*  SMIN*identity will be used instead of (ca A - w D).  If only one
+*  singular value is less than SMIN, one element of (ca A - w D) will be
+*  perturbed enough to make the smallest singular value roughly SMIN.
+*  If both singular values are at least SMIN, (ca A - w D) will not be
+*  perturbed.  In any case, the perturbation will be at most some small
+*  multiple of max( SMIN, ulp*norm(ca A - w D) ).  The singular values
+*  are computed by infinity-norm approximations, and thus will only be
+*  correct to a factor of 2 or so.
+*
+*  Note: all input quantities are assumed to be smaller than overflow
+*  by a reasonable factor.  (See BIGNUM.)
+*
+*  Arguments
+*  ==========
+*
+*  LTRANS  (input) LOGICAL
+*          =.TRUE.:  A-transpose will be used.
+*          =.FALSE.: A will be used (not transposed.)
+*
+*  NA      (input) INTEGER
+*          The size of the matrix A.  It may (only) be 1 or 2.
+*
+*  NW      (input) INTEGER
+*          1 if "w" is real, 2 if "w" is complex.  It may only be 1
+*          or 2.
+*
+*  SMIN    (input) REAL
+*          The desired lower bound on the singular values of A.  This
+*          should be a safe distance away from underflow or overflow,
+*          say, between (underflow/machine precision) and  (machine
+*          precision * overflow ).  (See BIGNUM and ULP.)
+*
+*  CA      (input) REAL
+*          The coefficient c, which A is multiplied by.
+*
+*  A       (input) REAL array, dimension (LDA,NA)
+*          The NA x NA matrix A.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of A.  It must be at least NA.
+*
+*  D1      (input) REAL
+*          The 1,1 element in the diagonal matrix D.
+*
+*  D2      (input) REAL
+*          The 2,2 element in the diagonal matrix D.  Not used if NW=1.
+*
+*  B       (input) REAL array, dimension (LDB,NW)
+*          The NA x NW matrix B (right-hand side).  If NW=2 ("w" is
+*          complex), column 1 contains the real part of B and column 2
+*          contains the imaginary part.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of B.  It must be at least NA.
+*
+*  WR      (input) REAL
+*          The real part of the scalar "w".
+*
+*  WI      (input) REAL
+*          The imaginary part of the scalar "w".  Not used if NW=1.
+*
+*  X       (output) REAL array, dimension (LDX,NW)
+*          The NA x NW matrix X (unknowns), as computed by SLALN2.
+*          If NW=2 ("w" is complex), on exit, column 1 will contain
+*          the real part of X and column 2 will contain the imaginary
+*          part.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of X.  It must be at least NA.
+*
+*  SCALE   (output) REAL
+*          The scale factor that B must be multiplied by to insure
+*          that overflow does not occur when computing X.  Thus,
+*          (ca A - w D) X  will be SCALE*B, not B (ignoring
+*          perturbations of A.)  It will be at most 1.
+*
+*  XNORM   (output) REAL
+*          The infinity-norm of X, when X is regarded as an NA x NW
+*          real matrix.
+*
+*  INFO    (output) INTEGER
+*          An error flag.  It will be set to zero if no error occurs,
+*          a negative number if an argument is in error, or a positive
+*          number if  ca A - w D  had to be perturbed.
+*          The possible values are:
+*          = 0: No error occurred, and (ca A - w D) did not have to be
+*                 perturbed.
+*          = 1: (ca A - w D) had to be perturbed to make its smallest
+*               (or only) singular value greater than SMIN.
+*          NOTE: In the interests of speed, this routine does not
+*                check the inputs for errors.
+*
+* =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E0, ONE = 1.0E0 )
+      REAL               TWO
+      PARAMETER          ( TWO = 2.0E0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            ICMAX, J
+      REAL               BBND, BI1, BI2, BIGNUM, BNORM, BR1, BR2, CI21,
+     $                   CI22, CMAX, CNORM, CR21, CR22, CSI, CSR, LI21,
+     $                   LR21, SMINI, SMLNUM, TEMP, U22ABS, UI11, UI11R,
+     $                   UI12, UI12S, UI22, UR11, UR11R, UR12, UR12S,
+     $                   UR22, XI1, XI2, XR1, XR2
+*     ..
+*     .. Local Arrays ..
+      LOGICAL            CSWAP( 4 ), RSWAP( 4 )
+      INTEGER            IPIVOT( 4, 4 )
+      REAL               CI( 2, 2 ), CIV( 4 ), CR( 2, 2 ), CRV( 4 )
+*     ..
+*     .. External Functions ..
+      REAL               SLAMCH
+      EXTERNAL           SLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLADIV
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX
+*     ..
+*     .. Equivalences ..
+      EQUIVALENCE        ( CI( 1, 1 ), CIV( 1 ) ),
+     $                   ( CR( 1, 1 ), CRV( 1 ) )
+*     ..
+*     .. Data statements ..
+      DATA               CSWAP / .FALSE., .FALSE., .TRUE., .TRUE. /
+      DATA               RSWAP / .FALSE., .TRUE., .FALSE., .TRUE. /
+      DATA               IPIVOT / 1, 2, 3, 4, 2, 1, 4, 3, 3, 4, 1, 2, 4,
+     $                   3, 2, 1 /
+*     ..
+*     .. Executable Statements ..
+*
+*     Compute BIGNUM
+*
+      SMLNUM = TWO*SLAMCH( 'Safe minimum' )
+      BIGNUM = ONE / SMLNUM
+      SMINI = MAX( SMIN, SMLNUM )
+*
+*     Don't check for input errors
+*
+      INFO = 0
+*
+*     Standard Initializations
+*
+      SCALE = ONE
+*
+      IF( NA.EQ.1 ) THEN
+*
+*        1 x 1  (i.e., scalar) system   C X = B
+*
+         IF( NW.EQ.1 ) THEN
+*
+*           Real 1x1 system.
+*
+*           C = ca A - w D
+*
+            CSR = CA*A( 1, 1 ) - WR*D1
+            CNORM = ABS( CSR )
+*
+*           If | C | < SMINI, use C = SMINI
+*
+            IF( CNORM.LT.SMINI ) THEN
+               CSR = SMINI
+               CNORM = SMINI
+               INFO = 1
+            END IF
+*
+*           Check scaling for  X = B / C
+*
+            BNORM = ABS( B( 1, 1 ) )
+            IF( CNORM.LT.ONE .AND. BNORM.GT.ONE ) THEN
+               IF( BNORM.GT.BIGNUM*CNORM )
+     $            SCALE = ONE / BNORM
+            END IF
+*
+*           Compute X
+*
+            X( 1, 1 ) = ( B( 1, 1 )*SCALE ) / CSR
+            XNORM = ABS( X( 1, 1 ) )
+         ELSE
+*
+*           Complex 1x1 system (w is complex)
+*
+*           C = ca A - w D
+*
+            CSR = CA*A( 1, 1 ) - WR*D1
+            CSI = -WI*D1
+            CNORM = ABS( CSR ) + ABS( CSI )
+*
+*           If | C | < SMINI, use C = SMINI
+*
+            IF( CNORM.LT.SMINI ) THEN
+               CSR = SMINI
+               CSI = ZERO
+               CNORM = SMINI
+               INFO = 1
+            END IF
+*
+*           Check scaling for  X = B / C
+*
+            BNORM = ABS( B( 1, 1 ) ) + ABS( B( 1, 2 ) )
+            IF( CNORM.LT.ONE .AND. BNORM.GT.ONE ) THEN
+               IF( BNORM.GT.BIGNUM*CNORM )
+     $            SCALE = ONE / BNORM
+            END IF
+*
+*           Compute X
+*
+            CALL SLADIV( SCALE*B( 1, 1 ), SCALE*B( 1, 2 ), CSR, CSI,
+     $                   X( 1, 1 ), X( 1, 2 ) )
+            XNORM = ABS( X( 1, 1 ) ) + ABS( X( 1, 2 ) )
+         END IF
+*
+      ELSE
+*
+*        2x2 System
+*
+*        Compute the real part of  C = ca A - w D  (or  ca A' - w D )
+*
+         CR( 1, 1 ) = CA*A( 1, 1 ) - WR*D1
+         CR( 2, 2 ) = CA*A( 2, 2 ) - WR*D2
+         IF( LTRANS ) THEN
+            CR( 1, 2 ) = CA*A( 2, 1 )
+            CR( 2, 1 ) = CA*A( 1, 2 )
+         ELSE
+            CR( 2, 1 ) = CA*A( 2, 1 )
+            CR( 1, 2 ) = CA*A( 1, 2 )
+         END IF
+*
+         IF( NW.EQ.1 ) THEN
+*
+*           Real 2x2 system  (w is real)
+*
+*           Find the largest element in C
+*
+            CMAX = ZERO
+            ICMAX = 0
+*
+            DO 10 J = 1, 4
+               IF( ABS( CRV( J ) ).GT.CMAX ) THEN
+                  CMAX = ABS( CRV( J ) )
+                  ICMAX = J
+               END IF
+   10       CONTINUE
+*
+*           If norm(C) < SMINI, use SMINI*identity.
+*
+            IF( CMAX.LT.SMINI ) THEN
+               BNORM = MAX( ABS( B( 1, 1 ) ), ABS( B( 2, 1 ) ) )
+               IF( SMINI.LT.ONE .AND. BNORM.GT.ONE ) THEN
+                  IF( BNORM.GT.BIGNUM*SMINI )
+     $               SCALE = ONE / BNORM
+               END IF
+               TEMP = SCALE / SMINI
+               X( 1, 1 ) = TEMP*B( 1, 1 )
+               X( 2, 1 ) = TEMP*B( 2, 1 )
+               XNORM = TEMP*BNORM
+               INFO = 1
+               RETURN
+            END IF
+*
+*           Gaussian elimination with complete pivoting.
+*
+            UR11 = CRV( ICMAX )
+            CR21 = CRV( IPIVOT( 2, ICMAX ) )
+            UR12 = CRV( IPIVOT( 3, ICMAX ) )
+            CR22 = CRV( IPIVOT( 4, ICMAX ) )
+            UR11R = ONE / UR11
+            LR21 = UR11R*CR21
+            UR22 = CR22 - UR12*LR21
+*
+*           If smaller pivot < SMINI, use SMINI
+*
+            IF( ABS( UR22 ).LT.SMINI ) THEN
+               UR22 = SMINI
+               INFO = 1
+            END IF
+            IF( RSWAP( ICMAX ) ) THEN
+               BR1 = B( 2, 1 )
+               BR2 = B( 1, 1 )
+            ELSE
+               BR1 = B( 1, 1 )
+               BR2 = B( 2, 1 )
+            END IF
+            BR2 = BR2 - LR21*BR1
+            BBND = MAX( ABS( BR1*( UR22*UR11R ) ), ABS( BR2 ) )
+            IF( BBND.GT.ONE .AND. ABS( UR22 ).LT.ONE ) THEN
+               IF( BBND.GE.BIGNUM*ABS( UR22 ) )
+     $            SCALE = ONE / BBND
+            END IF
+*
+            XR2 = ( BR2*SCALE ) / UR22
+            XR1 = ( SCALE*BR1 )*UR11R - XR2*( UR11R*UR12 )
+            IF( CSWAP( ICMAX ) ) THEN
+               X( 1, 1 ) = XR2
+               X( 2, 1 ) = XR1
+            ELSE
+               X( 1, 1 ) = XR1
+               X( 2, 1 ) = XR2
+            END IF
+            XNORM = MAX( ABS( XR1 ), ABS( XR2 ) )
+*
+*           Further scaling if  norm(A) norm(X) > overflow
+*
+            IF( XNORM.GT.ONE .AND. CMAX.GT.ONE ) THEN
+               IF( XNORM.GT.BIGNUM / CMAX ) THEN
+                  TEMP = CMAX / BIGNUM
+                  X( 1, 1 ) = TEMP*X( 1, 1 )
+                  X( 2, 1 ) = TEMP*X( 2, 1 )
+                  XNORM = TEMP*XNORM
+                  SCALE = TEMP*SCALE
+               END IF
+            END IF
+         ELSE
+*
+*           Complex 2x2 system  (w is complex)
+*
+*           Find the largest element in C
+*
+            CI( 1, 1 ) = -WI*D1
+            CI( 2, 1 ) = ZERO
+            CI( 1, 2 ) = ZERO
+            CI( 2, 2 ) = -WI*D2
+            CMAX = ZERO
+            ICMAX = 0
+*
+            DO 20 J = 1, 4
+               IF( ABS( CRV( J ) )+ABS( CIV( J ) ).GT.CMAX ) THEN
+                  CMAX = ABS( CRV( J ) ) + ABS( CIV( J ) )
+                  ICMAX = J
+               END IF
+   20       CONTINUE
+*
+*           If norm(C) < SMINI, use SMINI*identity.
+*
+            IF( CMAX.LT.SMINI ) THEN
+               BNORM = MAX( ABS( B( 1, 1 ) )+ABS( B( 1, 2 ) ),
+     $                 ABS( B( 2, 1 ) )+ABS( B( 2, 2 ) ) )
+               IF( SMINI.LT.ONE .AND. BNORM.GT.ONE ) THEN
+                  IF( BNORM.GT.BIGNUM*SMINI )
+     $               SCALE = ONE / BNORM
+               END IF
+               TEMP = SCALE / SMINI
+               X( 1, 1 ) = TEMP*B( 1, 1 )
+               X( 2, 1 ) = TEMP*B( 2, 1 )
+               X( 1, 2 ) = TEMP*B( 1, 2 )
+               X( 2, 2 ) = TEMP*B( 2, 2 )
+               XNORM = TEMP*BNORM
+               INFO = 1
+               RETURN
+            END IF
+*
+*           Gaussian elimination with complete pivoting.
+*
+            UR11 = CRV( ICMAX )
+            UI11 = CIV( ICMAX )
+            CR21 = CRV( IPIVOT( 2, ICMAX ) )
+            CI21 = CIV( IPIVOT( 2, ICMAX ) )
+            UR12 = CRV( IPIVOT( 3, ICMAX ) )
+            UI12 = CIV( IPIVOT( 3, ICMAX ) )
+            CR22 = CRV( IPIVOT( 4, ICMAX ) )
+            CI22 = CIV( IPIVOT( 4, ICMAX ) )
+            IF( ICMAX.EQ.1 .OR. ICMAX.EQ.4 ) THEN
+*
+*              Code when off-diagonals of pivoted C are real
+*
+               IF( ABS( UR11 ).GT.ABS( UI11 ) ) THEN
+                  TEMP = UI11 / UR11
+                  UR11R = ONE / ( UR11*( ONE+TEMP**2 ) )
+                  UI11R = -TEMP*UR11R
+               ELSE
+                  TEMP = UR11 / UI11
+                  UI11R = -ONE / ( UI11*( ONE+TEMP**2 ) )
+                  UR11R = -TEMP*UI11R
+               END IF
+               LR21 = CR21*UR11R
+               LI21 = CR21*UI11R
+               UR12S = UR12*UR11R
+               UI12S = UR12*UI11R
+               UR22 = CR22 - UR12*LR21
+               UI22 = CI22 - UR12*LI21
+            ELSE
+*
+*              Code when diagonals of pivoted C are real
+*
+               UR11R = ONE / UR11
+               UI11R = ZERO
+               LR21 = CR21*UR11R
+               LI21 = CI21*UR11R
+               UR12S = UR12*UR11R
+               UI12S = UI12*UR11R
+               UR22 = CR22 - UR12*LR21 + UI12*LI21
+               UI22 = -UR12*LI21 - UI12*LR21
+            END IF
+            U22ABS = ABS( UR22 ) + ABS( UI22 )
+*
+*           If smaller pivot < SMINI, use SMINI
+*
+            IF( U22ABS.LT.SMINI ) THEN
+               UR22 = SMINI
+               UI22 = ZERO
+               INFO = 1
+            END IF
+            IF( RSWAP( ICMAX ) ) THEN
+               BR2 = B( 1, 1 )
+               BR1 = B( 2, 1 )
+               BI2 = B( 1, 2 )
+               BI1 = B( 2, 2 )
+            ELSE
+               BR1 = B( 1, 1 )
+               BR2 = B( 2, 1 )
+               BI1 = B( 1, 2 )
+               BI2 = B( 2, 2 )
+            END IF
+            BR2 = BR2 - LR21*BR1 + LI21*BI1
+            BI2 = BI2 - LI21*BR1 - LR21*BI1
+            BBND = MAX( ( ABS( BR1 )+ABS( BI1 ) )*
+     $             ( U22ABS*( ABS( UR11R )+ABS( UI11R ) ) ),
+     $             ABS( BR2 )+ABS( BI2 ) )
+            IF( BBND.GT.ONE .AND. U22ABS.LT.ONE ) THEN
+               IF( BBND.GE.BIGNUM*U22ABS ) THEN
+                  SCALE = ONE / BBND
+                  BR1 = SCALE*BR1
+                  BI1 = SCALE*BI1
+                  BR2 = SCALE*BR2
+                  BI2 = SCALE*BI2
+               END IF
+            END IF
+*
+            CALL SLADIV( BR2, BI2, UR22, UI22, XR2, XI2 )
+            XR1 = UR11R*BR1 - UI11R*BI1 - UR12S*XR2 + UI12S*XI2
+            XI1 = UI11R*BR1 + UR11R*BI1 - UI12S*XR2 - UR12S*XI2
+            IF( CSWAP( ICMAX ) ) THEN
+               X( 1, 1 ) = XR2
+               X( 2, 1 ) = XR1
+               X( 1, 2 ) = XI2
+               X( 2, 2 ) = XI1
+            ELSE
+               X( 1, 1 ) = XR1
+               X( 2, 1 ) = XR2
+               X( 1, 2 ) = XI1
+               X( 2, 2 ) = XI2
+            END IF
+            XNORM = MAX( ABS( XR1 )+ABS( XI1 ), ABS( XR2 )+ABS( XI2 ) )
+*
+*           Further scaling if  norm(A) norm(X) > overflow
+*
+            IF( XNORM.GT.ONE .AND. CMAX.GT.ONE ) THEN
+               IF( XNORM.GT.BIGNUM / CMAX ) THEN
+                  TEMP = CMAX / BIGNUM
+                  X( 1, 1 ) = TEMP*X( 1, 1 )
+                  X( 2, 1 ) = TEMP*X( 2, 1 )
+                  X( 1, 2 ) = TEMP*X( 1, 2 )
+                  X( 2, 2 ) = TEMP*X( 2, 2 )
+                  XNORM = TEMP*XNORM
+                  SCALE = TEMP*SCALE
+               END IF
+            END IF
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of SLALN2
+*
+      END
diff --git a/costa/native/external/lapack/slals0.f b/costa/native/external/lapack/slals0.f
new file mode 100644
index 000000000..bf9004bea
--- /dev/null
+++ b/costa/native/external/lapack/slals0.f
@@ -0,0 +1,375 @@
+      SUBROUTINE SLALS0( ICOMPQ, NL, NR, SQRE, NRHS, B, LDB, BX, LDBX,
+     $                   PERM, GIVPTR, GIVCOL, LDGCOL, GIVNUM, LDGNUM,
+     $                   POLES, DIFL, DIFR, Z, K, C, S, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     December 1, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            GIVPTR, ICOMPQ, INFO, K, LDB, LDBX, LDGCOL,
+     $                   LDGNUM, NL, NR, NRHS, SQRE
+      REAL               C, S
+*     ..
+*     .. Array Arguments ..
+      INTEGER            GIVCOL( LDGCOL, * ), PERM( * )
+      REAL               B( LDB, * ), BX( LDBX, * ), DIFL( * ),
+     $                   DIFR( LDGNUM, * ), GIVNUM( LDGNUM, * ),
+     $                   POLES( LDGNUM, * ), WORK( * ), Z( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLALS0 applies back the multiplying factors of either the left or the
+*  right singular vector matrix of a diagonal matrix appended by a row
+*  to the right hand side matrix B in solving the least squares problem
+*  using the divide-and-conquer SVD approach.
+*
+*  For the left singular vector matrix, three types of orthogonal
+*  matrices are involved:
+*
+*  (1L) Givens rotations: the number of such rotations is GIVPTR; the
+*       pairs of columns/rows they were applied to are stored in GIVCOL;
+*       and the C- and S-values of these rotations are stored in GIVNUM.
+*
+*  (2L) Permutation. The (NL+1)-st row of B is to be moved to the first
+*       row, and for J=2:N, PERM(J)-th row of B is to be moved to the
+*       J-th row.
+*
+*  (3L) The left singular vector matrix of the remaining matrix.
+*
+*  For the right singular vector matrix, four types of orthogonal
+*  matrices are involved:
+*
+*  (1R) The right singular vector matrix of the remaining matrix.
+*
+*  (2R) If SQRE = 1, one extra Givens rotation to generate the right
+*       null space.
+*
+*  (3R) The inverse transformation of (2L).
+*
+*  (4R) The inverse transformation of (1L).
+*
+*  Arguments
+*  =========
+*
+*  ICOMPQ (input) INTEGER
+*         Specifies whether singular vectors are to be computed in
+*         factored form:
+*         = 0: Left singular vector matrix.
+*         = 1: Right singular vector matrix.
+*
+*  NL     (input) INTEGER
+*         The row dimension of the upper block. NL >= 1.
+*
+*  NR     (input) INTEGER
+*         The row dimension of the lower block. NR >= 1.
+*
+*  SQRE   (input) INTEGER
+*         = 0: the lower block is an NR-by-NR square matrix.
+*         = 1: the lower block is an NR-by-(NR+1) rectangular matrix.
+*
+*         The bidiagonal matrix has row dimension N = NL + NR + 1,
+*         and column dimension M = N + SQRE.
+*
+*  NRHS   (input) INTEGER
+*         The number of columns of B and BX. NRHS must be at least 1.
+*
+*  B      (input/output) REAL array, dimension ( LDB, NRHS )
+*         On input, B contains the right hand sides of the least
+*         squares problem in rows 1 through M. On output, B contains
+*         the solution X in rows 1 through N.
+*
+*  LDB    (input) INTEGER
+*         The leading dimension of B. LDB must be at least
+*         max(1,MAX( M, N ) ).
+*
+*  BX     (workspace) REAL array, dimension ( LDBX, NRHS )
+*
+*  LDBX   (input) INTEGER
+*         The leading dimension of BX.
+*
+*  PERM   (input) INTEGER array, dimension ( N )
+*         The permutations (from deflation and sorting) applied
+*         to the two blocks.
+*
+*  GIVPTR (input) INTEGER
+*         The number of Givens rotations which took place in this
+*         subproblem.
+*
+*  GIVCOL (input) INTEGER array, dimension ( LDGCOL, 2 )
+*         Each pair of numbers indicates a pair of rows/columns
+*         involved in a Givens rotation.
+*
+*  LDGCOL (input) INTEGER
+*         The leading dimension of GIVCOL, must be at least N.
+*
+*  GIVNUM (input) REAL array, dimension ( LDGNUM, 2 )
+*         Each number indicates the C or S value used in the
+*         corresponding Givens rotation.
+*
+*  LDGNUM (input) INTEGER
+*         The leading dimension of arrays DIFR, POLES and
+*         GIVNUM, must be at least K.
+*
+*  POLES  (input) REAL array, dimension ( LDGNUM, 2 )
+*         On entry, POLES(1:K, 1) contains the new singular
+*         values obtained from solving the secular equation, and
+*         POLES(1:K, 2) is an array containing the poles in the secular
+*         equation.
+*
+*  DIFL   (input) REAL array, dimension ( K ).
+*         On entry, DIFL(I) is the distance between I-th updated
+*         (undeflated) singular value and the I-th (undeflated) old
+*         singular value.
+*
+*  DIFR   (input) REAL array, dimension ( LDGNUM, 2 ).
+*         On entry, DIFR(I, 1) contains the distances between I-th
+*         updated (undeflated) singular value and the I+1-th
+*         (undeflated) old singular value. And DIFR(I, 2) is the
+*         normalizing factor for the I-th right singular vector.
+*
+*  Z      (input) REAL array, dimension ( K )
+*         Contain the components of the deflation-adjusted updating row
+*         vector.
+*
+*  K      (input) INTEGER
+*         Contains the dimension of the non-deflated matrix,
+*         This is the order of the related secular equation. 1 <= K <=N.
+*
+*  C      (input) REAL
+*         C contains garbage if SQRE =0 and the C-value of a Givens
+*         rotation related to the right null space if SQRE = 1.
+*
+*  S      (input) REAL
+*         S contains garbage if SQRE =0 and the S-value of a Givens
+*         rotation related to the right null space if SQRE = 1.
+*
+*  WORK   (workspace) REAL array, dimension ( K )
+*
+*  INFO   (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Ming Gu and Ren-Cang Li, Computer Science Division, University of
+*       California at Berkeley, USA
+*     Osni Marques, LBNL/NERSC, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO, NEGONE
+      PARAMETER          ( ONE = 1.0E0, ZERO = 0.0E0, NEGONE = -1.0E0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J, M, N, NLP1
+      REAL               DIFLJ, DIFRJ, DJ, DSIGJ, DSIGJP, TEMP
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SCOPY, SGEMV, SLACPY, SLASCL, SROT, SSCAL,
+     $                   XERBLA
+*     ..
+*     .. External Functions ..
+      REAL               SLAMC3, SNRM2
+      EXTERNAL           SLAMC3, SNRM2
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+*
+      IF( ( ICOMPQ.LT.0 ) .OR. ( ICOMPQ.GT.1 ) ) THEN
+         INFO = -1
+      ELSE IF( NL.LT.1 ) THEN
+         INFO = -2
+      ELSE IF( NR.LT.1 ) THEN
+         INFO = -3
+      ELSE IF( ( SQRE.LT.0 ) .OR. ( SQRE.GT.1 ) ) THEN
+         INFO = -4
+      END IF
+*
+      N = NL + NR + 1
+*
+      IF( NRHS.LT.1 ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.N ) THEN
+         INFO = -7
+      ELSE IF( LDBX.LT.N ) THEN
+         INFO = -9
+      ELSE IF( GIVPTR.LT.0 ) THEN
+         INFO = -11
+      ELSE IF( LDGCOL.LT.N ) THEN
+         INFO = -13
+      ELSE IF( LDGNUM.LT.N ) THEN
+         INFO = -15
+      ELSE IF( K.LT.1 ) THEN
+         INFO = -20
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SLALS0', -INFO )
+         RETURN
+      END IF
+*
+      M = N + SQRE
+      NLP1 = NL + 1
+*
+      IF( ICOMPQ.EQ.0 ) THEN
+*
+*        Apply back orthogonal transformations from the left.
+*
+*        Step (1L): apply back the Givens rotations performed.
+*
+         DO 10 I = 1, GIVPTR
+            CALL SROT( NRHS, B( GIVCOL( I, 2 ), 1 ), LDB,
+     $                 B( GIVCOL( I, 1 ), 1 ), LDB, GIVNUM( I, 2 ),
+     $                 GIVNUM( I, 1 ) )
+   10    CONTINUE
+*
+*        Step (2L): permute rows of B.
+*
+         CALL SCOPY( NRHS, B( NLP1, 1 ), LDB, BX( 1, 1 ), LDBX )
+         DO 20 I = 2, N
+            CALL SCOPY( NRHS, B( PERM( I ), 1 ), LDB, BX( I, 1 ), LDBX )
+   20    CONTINUE
+*
+*        Step (3L): apply the inverse of the left singular vector
+*        matrix to BX.
+*
+         IF( K.EQ.1 ) THEN
+            CALL SCOPY( NRHS, BX, LDBX, B, LDB )
+            IF( Z( 1 ).LT.ZERO ) THEN
+               CALL SSCAL( NRHS, NEGONE, B, LDB )
+            END IF
+         ELSE
+            DO 50 J = 1, K
+               DIFLJ = DIFL( J )
+               DJ = POLES( J, 1 )
+               DSIGJ = -POLES( J, 2 )
+               IF( J.LT.K ) THEN
+                  DIFRJ = -DIFR( J, 1 )
+                  DSIGJP = -POLES( J+1, 2 )
+               END IF
+               IF( ( Z( J ).EQ.ZERO ) .OR. ( POLES( J, 2 ).EQ.ZERO ) )
+     $              THEN
+                  WORK( J ) = ZERO
+               ELSE
+                  WORK( J ) = -POLES( J, 2 )*Z( J ) / DIFLJ /
+     $                        ( POLES( J, 2 )+DJ )
+               END IF
+               DO 30 I = 1, J - 1
+                  IF( ( Z( I ).EQ.ZERO ) .OR.
+     $                ( POLES( I, 2 ).EQ.ZERO ) ) THEN
+                     WORK( I ) = ZERO
+                  ELSE
+                     WORK( I ) = POLES( I, 2 )*Z( I ) /
+     $                           ( SLAMC3( POLES( I, 2 ), DSIGJ )-
+     $                           DIFLJ ) / ( POLES( I, 2 )+DJ )
+                  END IF
+   30          CONTINUE
+               DO 40 I = J + 1, K
+                  IF( ( Z( I ).EQ.ZERO ) .OR.
+     $                ( POLES( I, 2 ).EQ.ZERO ) ) THEN
+                     WORK( I ) = ZERO
+                  ELSE
+                     WORK( I ) = POLES( I, 2 )*Z( I ) /
+     $                           ( SLAMC3( POLES( I, 2 ), DSIGJP )+
+     $                           DIFRJ ) / ( POLES( I, 2 )+DJ )
+                  END IF
+   40          CONTINUE
+               WORK( 1 ) = NEGONE
+               TEMP = SNRM2( K, WORK, 1 )
+               CALL SGEMV( 'T', K, NRHS, ONE, BX, LDBX, WORK, 1, ZERO,
+     $                     B( J, 1 ), LDB )
+               CALL SLASCL( 'G', 0, 0, TEMP, ONE, 1, NRHS, B( J, 1 ),
+     $                      LDB, INFO )
+   50       CONTINUE
+         END IF
+*
+*        Move the deflated rows of BX to B also.
+*
+         IF( K.LT.MAX( M, N ) )
+     $      CALL SLACPY( 'A', N-K, NRHS, BX( K+1, 1 ), LDBX,
+     $                   B( K+1, 1 ), LDB )
+      ELSE
+*
+*        Apply back the right orthogonal transformations.
+*
+*        Step (1R): apply back the new right singular vector matrix
+*        to B.
+*
+         IF( K.EQ.1 ) THEN
+            CALL SCOPY( NRHS, B, LDB, BX, LDBX )
+         ELSE
+            DO 80 J = 1, K
+               DSIGJ = POLES( J, 2 )
+               IF( Z( J ).EQ.ZERO ) THEN
+                  WORK( J ) = ZERO
+               ELSE
+                  WORK( J ) = -Z( J ) / DIFL( J ) /
+     $                        ( DSIGJ+POLES( J, 1 ) ) / DIFR( J, 2 )
+               END IF
+               DO 60 I = 1, J - 1
+                  IF( Z( J ).EQ.ZERO ) THEN
+                     WORK( I ) = ZERO
+                  ELSE
+                     WORK( I ) = Z( J ) / ( SLAMC3( DSIGJ, -POLES( I+1,
+     $                           2 ) )-DIFR( I, 1 ) ) /
+     $                           ( DSIGJ+POLES( I, 1 ) ) / DIFR( I, 2 )
+                  END IF
+   60          CONTINUE
+               DO 70 I = J + 1, K
+                  IF( Z( J ).EQ.ZERO ) THEN
+                     WORK( I ) = ZERO
+                  ELSE
+                     WORK( I ) = Z( J ) / ( SLAMC3( DSIGJ, -POLES( I,
+     $                           2 ) )-DIFL( I ) ) /
+     $                           ( DSIGJ+POLES( I, 1 ) ) / DIFR( I, 2 )
+                  END IF
+   70          CONTINUE
+               CALL SGEMV( 'T', K, NRHS, ONE, B, LDB, WORK, 1, ZERO,
+     $                     BX( J, 1 ), LDBX )
+   80       CONTINUE
+         END IF
+*
+*        Step (2R): if SQRE = 1, apply back the rotation that is
+*        related to the right null space of the subproblem.
+*
+         IF( SQRE.EQ.1 ) THEN
+            CALL SCOPY( NRHS, B( M, 1 ), LDB, BX( M, 1 ), LDBX )
+            CALL SROT( NRHS, BX( 1, 1 ), LDBX, BX( M, 1 ), LDBX, C, S )
+         END IF
+         IF( K.LT.MAX( M, N ) )
+     $      CALL SLACPY( 'A', N-K, NRHS, B( K+1, 1 ), LDB, BX( K+1, 1 ),
+     $                   LDBX )
+*
+*        Step (3R): permute rows of B.
+*
+         CALL SCOPY( NRHS, BX( 1, 1 ), LDBX, B( NLP1, 1 ), LDB )
+         IF( SQRE.EQ.1 ) THEN
+            CALL SCOPY( NRHS, BX( M, 1 ), LDBX, B( M, 1 ), LDB )
+         END IF
+         DO 90 I = 2, N
+            CALL SCOPY( NRHS, BX( I, 1 ), LDBX, B( PERM( I ), 1 ), LDB )
+   90    CONTINUE
+*
+*        Step (4R): apply back the Givens rotations performed.
+*
+         DO 100 I = GIVPTR, 1, -1
+            CALL SROT( NRHS, B( GIVCOL( I, 2 ), 1 ), LDB,
+     $                 B( GIVCOL( I, 1 ), 1 ), LDB, GIVNUM( I, 2 ),
+     $                 -GIVNUM( I, 1 ) )
+  100    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of SLALS0
+*
+      END
diff --git a/costa/native/external/lapack/slalsa.f b/costa/native/external/lapack/slalsa.f
new file mode 100644
index 000000000..1294010ca
--- /dev/null
+++ b/costa/native/external/lapack/slalsa.f
@@ -0,0 +1,363 @@
+      SUBROUTINE SLALSA( ICOMPQ, SMLSIZ, N, NRHS, B, LDB, BX, LDBX, U,
+     $                   LDU, VT, K, DIFL, DIFR, Z, POLES, GIVPTR,
+     $                   GIVCOL, LDGCOL, PERM, GIVNUM, C, S, WORK,
+     $                   IWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            ICOMPQ, INFO, LDB, LDBX, LDGCOL, LDU, N, NRHS,
+     $                   SMLSIZ
+*     ..
+*     .. Array Arguments ..
+      INTEGER            GIVCOL( LDGCOL, * ), GIVPTR( * ), IWORK( * ),
+     $                   K( * ), PERM( LDGCOL, * )
+      REAL               B( LDB, * ), BX( LDBX, * ), C( * ),
+     $                   DIFL( LDU, * ), DIFR( LDU, * ),
+     $                   GIVNUM( LDU, * ), POLES( LDU, * ), S( * ),
+     $                   U( LDU, * ), VT( LDU, * ), WORK( * ),
+     $                   Z( LDU, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLALSA is an itermediate step in solving the least squares problem
+*  by computing the SVD of the coefficient matrix in compact form (The
+*  singular vectors are computed as products of simple orthorgonal
+*  matrices.).
+*
+*  If ICOMPQ = 0, SLALSA applies the inverse of the left singular vector
+*  matrix of an upper bidiagonal matrix to the right hand side; and if
+*  ICOMPQ = 1, SLALSA applies the right singular vector matrix to the
+*  right hand side. The singular vector matrices were generated in
+*  compact form by SLALSA.
+*
+*  Arguments
+*  =========
+*
+*
+*  ICOMPQ (input) INTEGER
+*         Specifies whether the left or the right singular vector
+*         matrix is involved.
+*         = 0: Left singular vector matrix
+*         = 1: Right singular vector matrix
+*
+*  SMLSIZ (input) INTEGER
+*         The maximum size of the subproblems at the bottom of the
+*         computation tree.
+*
+*  N      (input) INTEGER
+*         The row and column dimensions of the upper bidiagonal matrix.
+*
+*  NRHS   (input) INTEGER
+*         The number of columns of B and BX. NRHS must be at least 1.
+*
+*  B      (input) REAL array, dimension ( LDB, NRHS )
+*         On input, B contains the right hand sides of the least
+*         squares problem in rows 1 through M. On output, B contains
+*         the solution X in rows 1 through N.
+*
+*  LDB    (input) INTEGER
+*         The leading dimension of B in the calling subprogram.
+*         LDB must be at least max(1,MAX( M, N ) ).
+*
+*  BX     (output) REAL array, dimension ( LDBX, NRHS )
+*         On exit, the result of applying the left or right singular
+*         vector matrix to B.
+*
+*  LDBX   (input) INTEGER
+*         The leading dimension of BX.
+*
+*  U      (input) REAL array, dimension ( LDU, SMLSIZ ).
+*         On entry, U contains the left singular vector matrices of all
+*         subproblems at the bottom level.
+*
+*  LDU    (input) INTEGER, LDU = > N.
+*         The leading dimension of arrays U, VT, DIFL, DIFR,
+*         POLES, GIVNUM, and Z.
+*
+*  VT     (input) REAL array, dimension ( LDU, SMLSIZ+1 ).
+*         On entry, VT' contains the right singular vector matrices of
+*         all subproblems at the bottom level.
+*
+*  K      (input) INTEGER array, dimension ( N ).
+*
+*  DIFL   (input) REAL array, dimension ( LDU, NLVL ).
+*         where NLVL = INT(log_2 (N/(SMLSIZ+1))) + 1.
+*
+*  DIFR   (input) REAL array, dimension ( LDU, 2 * NLVL ).
+*         On entry, DIFL(*, I) and DIFR(*, 2 * I -1) record
+*         distances between singular values on the I-th level and
+*         singular values on the (I -1)-th level, and DIFR(*, 2 * I)
+*         record the normalizing factors of the right singular vectors
+*         matrices of subproblems on I-th level.
+*
+*  Z      (input) REAL array, dimension ( LDU, NLVL ).
+*         On entry, Z(1, I) contains the components of the deflation-
+*         adjusted updating row vector for subproblems on the I-th
+*         level.
+*
+*  POLES  (input) REAL array, dimension ( LDU, 2 * NLVL ).
+*         On entry, POLES(*, 2 * I -1: 2 * I) contains the new and old
+*         singular values involved in the secular equations on the I-th
+*         level.
+*
+*  GIVPTR (input) INTEGER array, dimension ( N ).
+*         On entry, GIVPTR( I ) records the number of Givens
+*         rotations performed on the I-th problem on the computation
+*         tree.
+*
+*  GIVCOL (input) INTEGER array, dimension ( LDGCOL, 2 * NLVL ).
+*         On entry, for each I, GIVCOL(*, 2 * I - 1: 2 * I) records the
+*         locations of Givens rotations performed on the I-th level on
+*         the computation tree.
+*
+*  LDGCOL (input) INTEGER, LDGCOL = > N.
+*         The leading dimension of arrays GIVCOL and PERM.
+*
+*  PERM   (input) INTEGER array, dimension ( LDGCOL, NLVL ).
+*         On entry, PERM(*, I) records permutations done on the I-th
+*         level of the computation tree.
+*
+*  GIVNUM (input) REAL array, dimension ( LDU, 2 * NLVL ).
+*         On entry, GIVNUM(*, 2 *I -1 : 2 * I) records the C- and S-
+*         values of Givens rotations performed on the I-th level on the
+*         computation tree.
+*
+*  C      (input) REAL array, dimension ( N ).
+*         On entry, if the I-th subproblem is not square,
+*         C( I ) contains the C-value of a Givens rotation related to
+*         the right null space of the I-th subproblem.
+*
+*  S      (input) REAL array, dimension ( N ).
+*         On entry, if the I-th subproblem is not square,
+*         S( I ) contains the S-value of a Givens rotation related to
+*         the right null space of the I-th subproblem.
+*
+*  WORK   (workspace) REAL array.
+*         The dimension must be at least N.
+*
+*  IWORK  (workspace) INTEGER array.
+*         The dimension must be at least 3 * N
+*
+*  INFO   (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Ming Gu and Ren-Cang Li, Computer Science Division, University of
+*       California at Berkeley, USA
+*     Osni Marques, LBNL/NERSC, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E0, ONE = 1.0E0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, I1, IC, IM1, INODE, J, LF, LL, LVL, LVL2,
+     $                   ND, NDB1, NDIML, NDIMR, NL, NLF, NLP1, NLVL,
+     $                   NR, NRF, NRP1, SQRE
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SCOPY, SGEMM, SLALS0, SLASDT, XERBLA
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+*
+      IF( ( ICOMPQ.LT.0 ) .OR. ( ICOMPQ.GT.1 ) ) THEN
+         INFO = -1
+      ELSE IF( SMLSIZ.LT.3 ) THEN
+         INFO = -2
+      ELSE IF( N.LT.SMLSIZ ) THEN
+         INFO = -3
+      ELSE IF( NRHS.LT.1 ) THEN
+         INFO = -4
+      ELSE IF( LDB.LT.N ) THEN
+         INFO = -6
+      ELSE IF( LDBX.LT.N ) THEN
+         INFO = -8
+      ELSE IF( LDU.LT.N ) THEN
+         INFO = -10
+      ELSE IF( LDGCOL.LT.N ) THEN
+         INFO = -19
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SLALSA', -INFO )
+         RETURN
+      END IF
+*
+*     Book-keeping and  setting up the computation tree.
+*
+      INODE = 1
+      NDIML = INODE + N
+      NDIMR = NDIML + N
+*
+      CALL SLASDT( N, NLVL, ND, IWORK( INODE ), IWORK( NDIML ),
+     $             IWORK( NDIMR ), SMLSIZ )
+*
+*     The following code applies back the left singular vector factors.
+*     For applying back the right singular vector factors, go to 50.
+*
+      IF( ICOMPQ.EQ.1 ) THEN
+         GO TO 50
+      END IF
+*
+*     The nodes on the bottom level of the tree were solved
+*     by SLASDQ. The corresponding left and right singular vector
+*     matrices are in explicit form. First apply back the left
+*     singular vector matrices.
+*
+      NDB1 = ( ND+1 ) / 2
+      DO 10 I = NDB1, ND
+*
+*        IC : center row of each node
+*        NL : number of rows of left  subproblem
+*        NR : number of rows of right subproblem
+*        NLF: starting row of the left   subproblem
+*        NRF: starting row of the right  subproblem
+*
+         I1 = I - 1
+         IC = IWORK( INODE+I1 )
+         NL = IWORK( NDIML+I1 )
+         NR = IWORK( NDIMR+I1 )
+         NLF = IC - NL
+         NRF = IC + 1
+         CALL SGEMM( 'T', 'N', NL, NRHS, NL, ONE, U( NLF, 1 ), LDU,
+     $               B( NLF, 1 ), LDB, ZERO, BX( NLF, 1 ), LDBX )
+         CALL SGEMM( 'T', 'N', NR, NRHS, NR, ONE, U( NRF, 1 ), LDU,
+     $               B( NRF, 1 ), LDB, ZERO, BX( NRF, 1 ), LDBX )
+   10 CONTINUE
+*
+*     Next copy the rows of B that correspond to unchanged rows
+*     in the bidiagonal matrix to BX.
+*
+      DO 20 I = 1, ND
+         IC = IWORK( INODE+I-1 )
+         CALL SCOPY( NRHS, B( IC, 1 ), LDB, BX( IC, 1 ), LDBX )
+   20 CONTINUE
+*
+*     Finally go through the left singular vector matrices of all
+*     the other subproblems bottom-up on the tree.
+*
+      J = 2**NLVL
+      SQRE = 0
+*
+      DO 40 LVL = NLVL, 1, -1
+         LVL2 = 2*LVL - 1
+*
+*        find the first node LF and last node LL on
+*        the current level LVL
+*
+         IF( LVL.EQ.1 ) THEN
+            LF = 1
+            LL = 1
+         ELSE
+            LF = 2**( LVL-1 )
+            LL = 2*LF - 1
+         END IF
+         DO 30 I = LF, LL
+            IM1 = I - 1
+            IC = IWORK( INODE+IM1 )
+            NL = IWORK( NDIML+IM1 )
+            NR = IWORK( NDIMR+IM1 )
+            NLF = IC - NL
+            NRF = IC + 1
+            J = J - 1
+            CALL SLALS0( ICOMPQ, NL, NR, SQRE, NRHS, BX( NLF, 1 ), LDBX,
+     $                   B( NLF, 1 ), LDB, PERM( NLF, LVL ),
+     $                   GIVPTR( J ), GIVCOL( NLF, LVL2 ), LDGCOL,
+     $                   GIVNUM( NLF, LVL2 ), LDU, POLES( NLF, LVL2 ),
+     $                   DIFL( NLF, LVL ), DIFR( NLF, LVL2 ),
+     $                   Z( NLF, LVL ), K( J ), C( J ), S( J ), WORK,
+     $                   INFO )
+   30    CONTINUE
+   40 CONTINUE
+      GO TO 90
+*
+*     ICOMPQ = 1: applying back the right singular vector factors.
+*
+   50 CONTINUE
+*
+*     First now go through the right singular vector matrices of all
+*     the tree nodes top-down.
+*
+      J = 0
+      DO 70 LVL = 1, NLVL
+         LVL2 = 2*LVL - 1
+*
+*        Find the first node LF and last node LL on
+*        the current level LVL.
+*
+         IF( LVL.EQ.1 ) THEN
+            LF = 1
+            LL = 1
+         ELSE
+            LF = 2**( LVL-1 )
+            LL = 2*LF - 1
+         END IF
+         DO 60 I = LL, LF, -1
+            IM1 = I - 1
+            IC = IWORK( INODE+IM1 )
+            NL = IWORK( NDIML+IM1 )
+            NR = IWORK( NDIMR+IM1 )
+            NLF = IC - NL
+            NRF = IC + 1
+            IF( I.EQ.LL ) THEN
+               SQRE = 0
+            ELSE
+               SQRE = 1
+            END IF
+            J = J + 1
+            CALL SLALS0( ICOMPQ, NL, NR, SQRE, NRHS, B( NLF, 1 ), LDB,
+     $                   BX( NLF, 1 ), LDBX, PERM( NLF, LVL ),
+     $                   GIVPTR( J ), GIVCOL( NLF, LVL2 ), LDGCOL,
+     $                   GIVNUM( NLF, LVL2 ), LDU, POLES( NLF, LVL2 ),
+     $                   DIFL( NLF, LVL ), DIFR( NLF, LVL2 ),
+     $                   Z( NLF, LVL ), K( J ), C( J ), S( J ), WORK,
+     $                   INFO )
+   60    CONTINUE
+   70 CONTINUE
+*
+*     The nodes on the bottom level of the tree were solved
+*     by SLASDQ. The corresponding right singular vector
+*     matrices are in explicit form. Apply them back.
+*
+      NDB1 = ( ND+1 ) / 2
+      DO 80 I = NDB1, ND
+         I1 = I - 1
+         IC = IWORK( INODE+I1 )
+         NL = IWORK( NDIML+I1 )
+         NR = IWORK( NDIMR+I1 )
+         NLP1 = NL + 1
+         IF( I.EQ.ND ) THEN
+            NRP1 = NR
+         ELSE
+            NRP1 = NR + 1
+         END IF
+         NLF = IC - NL
+         NRF = IC + 1
+         CALL SGEMM( 'T', 'N', NLP1, NRHS, NLP1, ONE, VT( NLF, 1 ), LDU,
+     $               B( NLF, 1 ), LDB, ZERO, BX( NLF, 1 ), LDBX )
+         CALL SGEMM( 'T', 'N', NRP1, NRHS, NRP1, ONE, VT( NRF, 1 ), LDU,
+     $               B( NRF, 1 ), LDB, ZERO, BX( NRF, 1 ), LDBX )
+   80 CONTINUE
+*
+   90 CONTINUE
+*
+      RETURN
+*
+*     End of SLALSA
+*
+      END
diff --git a/costa/native/external/lapack/slalsd.f b/costa/native/external/lapack/slalsd.f
new file mode 100644
index 000000000..b14739e8b
--- /dev/null
+++ b/costa/native/external/lapack/slalsd.f
@@ -0,0 +1,433 @@
+      SUBROUTINE SLALSD( UPLO, SMLSIZ, N, NRHS, D, E, B, LDB, RCOND,
+     $                   RANK, WORK, IWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDB, N, NRHS, RANK, SMLSIZ
+      REAL               RCOND
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      REAL               B( LDB, * ), D( * ), E( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLALSD uses the singular value decomposition of A to solve the least
+*  squares problem of finding X to minimize the Euclidean norm of each
+*  column of A*X-B, where A is N-by-N upper bidiagonal, and X and B
+*  are N-by-NRHS. The solution X overwrites B.
+*
+*  The singular values of A smaller than RCOND times the largest
+*  singular value are treated as zero in solving the least squares
+*  problem; in this case a minimum norm solution is returned.
+*  The actual singular values are returned in D in ascending order.
+*
+*  This code makes very mild assumptions about floating point
+*  arithmetic. It will work on machines with a guard digit in
+*  add/subtract, or on those binary machines without guard digits
+*  which subtract like the Cray XMP, Cray YMP, Cray C 90, or Cray 2.
+*  It could conceivably fail on hexadecimal or decimal machines
+*  without guard digits, but we know of none.
+*
+*  Arguments
+*  =========
+*
+*  UPLO   (input) CHARACTER*1
+*         = 'U': D and E define an upper bidiagonal matrix.
+*         = 'L': D and E define a  lower bidiagonal matrix.
+*
+*  SMLSIZ (input) INTEGER
+*         The maximum size of the subproblems at the bottom of the
+*         computation tree.
+*
+*  N      (input) INTEGER
+*         The dimension of the  bidiagonal matrix.  N >= 0.
+*
+*  NRHS   (input) INTEGER
+*         The number of columns of B. NRHS must be at least 1.
+*
+*  D      (input/output) REAL array, dimension (N)
+*         On entry D contains the main diagonal of the bidiagonal
+*         matrix. On exit, if INFO = 0, D contains its singular values.
+*
+*  E      (input) REAL array, dimension (N-1)
+*         Contains the super-diagonal entries of the bidiagonal matrix.
+*         On exit, E has been destroyed.
+*
+*  B      (input/output) REAL array, dimension (LDB,NRHS)
+*         On input, B contains the right hand sides of the least
+*         squares problem. On output, B contains the solution X.
+*
+*  LDB    (input) INTEGER
+*         The leading dimension of B in the calling subprogram.
+*         LDB must be at least max(1,N).
+*
+*  RCOND  (input) REAL
+*         The singular values of A less than or equal to RCOND times
+*         the largest singular value are treated as zero in solving
+*         the least squares problem. If RCOND is negative,
+*         machine precision is used instead.
+*         For example, if diag(S)*X=B were the least squares problem,
+*         where diag(S) is a diagonal matrix of singular values, the
+*         solution would be X(i) = B(i) / S(i) if S(i) is greater than
+*         RCOND*max(S), and X(i) = 0 if S(i) is less than or equal to
+*         RCOND*max(S).
+*
+*  RANK   (output) INTEGER
+*         The number of singular values of A greater than RCOND times
+*         the largest singular value.
+*
+*  WORK   (workspace) REAL array, dimension at least
+*         (9*N + 2*N*SMLSIZ + 8*N*NLVL + N*NRHS + (SMLSIZ+1)**2),
+*         where NLVL = max(0, INT(log_2 (N/(SMLSIZ+1))) + 1).
+*
+*  IWORK  (workspace) INTEGER array, dimension at least
+*         (3*N*NLVL + 11*N)
+*
+*  INFO   (output) INTEGER
+*         = 0:  successful exit.
+*         < 0:  if INFO = -i, the i-th argument had an illegal value.
+*         > 0:  The algorithm failed to compute an singular value while
+*               working on the submatrix lying in rows and columns
+*               INFO/(N+1) through MOD(INFO,N+1).
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Ming Gu and Ren-Cang Li, Computer Science Division, University of
+*       California at Berkeley, USA
+*     Osni Marques, LBNL/NERSC, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE, TWO
+      PARAMETER          ( ZERO = 0.0E0, ONE = 1.0E0, TWO = 2.0E0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            BX, BXST, C, DIFL, DIFR, GIVCOL, GIVNUM,
+     $                   GIVPTR, I, ICMPQ1, ICMPQ2, IWK, J, K, NLVL,
+     $                   NM1, NSIZE, NSUB, NWORK, PERM, POLES, S, SIZEI,
+     $                   SMLSZP, SQRE, ST, ST1, U, VT, Z
+      REAL               CS, EPS, ORGNRM, R, SN, TOL
+*     ..
+*     .. External Functions ..
+      INTEGER            ISAMAX
+      REAL               SLAMCH, SLANST
+      EXTERNAL           ISAMAX, SLAMCH, SLANST
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SCOPY, SGEMM, SLACPY, SLALSA, SLARTG, SLASCL,
+     $                   SLASDA, SLASDQ, SLASET, SLASRT, SROT, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, INT, LOG, REAL, SIGN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+*
+      IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( NRHS.LT.1 ) THEN
+         INFO = -4
+      ELSE IF( ( LDB.LT.1 ) .OR. ( LDB.LT.N ) ) THEN
+         INFO = -8
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SLALSD', -INFO )
+         RETURN
+      END IF
+*
+      EPS = SLAMCH( 'Epsilon' )
+*
+*     Set up the tolerance.
+*
+      IF( ( RCOND.LE.ZERO ) .OR. ( RCOND.GE.ONE ) ) THEN
+         RCOND = EPS
+      END IF
+*
+      RANK = 0
+*
+*     Quick return if possible.
+*
+      IF( N.EQ.0 ) THEN
+         RETURN
+      ELSE IF( N.EQ.1 ) THEN
+         IF( D( 1 ).EQ.ZERO ) THEN
+            CALL SLASET( 'A', 1, NRHS, ZERO, ZERO, B, LDB )
+         ELSE
+            RANK = 1
+            CALL SLASCL( 'G', 0, 0, D( 1 ), ONE, 1, NRHS, B, LDB, INFO )
+            D( 1 ) = ABS( D( 1 ) )
+         END IF
+         RETURN
+      END IF
+*
+*     Rotate the matrix if it is lower bidiagonal.
+*
+      IF( UPLO.EQ.'L' ) THEN
+         DO 10 I = 1, N - 1
+            CALL SLARTG( D( I ), E( I ), CS, SN, R )
+            D( I ) = R
+            E( I ) = SN*D( I+1 )
+            D( I+1 ) = CS*D( I+1 )
+            IF( NRHS.EQ.1 ) THEN
+               CALL SROT( 1, B( I, 1 ), 1, B( I+1, 1 ), 1, CS, SN )
+            ELSE
+               WORK( I*2-1 ) = CS
+               WORK( I*2 ) = SN
+            END IF
+   10    CONTINUE
+         IF( NRHS.GT.1 ) THEN
+            DO 30 I = 1, NRHS
+               DO 20 J = 1, N - 1
+                  CS = WORK( J*2-1 )
+                  SN = WORK( J*2 )
+                  CALL SROT( 1, B( J, I ), 1, B( J+1, I ), 1, CS, SN )
+   20          CONTINUE
+   30       CONTINUE
+         END IF
+      END IF
+*
+*     Scale.
+*
+      NM1 = N - 1
+      ORGNRM = SLANST( 'M', N, D, E )
+      IF( ORGNRM.EQ.ZERO ) THEN
+         CALL SLASET( 'A', N, NRHS, ZERO, ZERO, B, LDB )
+         RETURN
+      END IF
+*
+      CALL SLASCL( 'G', 0, 0, ORGNRM, ONE, N, 1, D, N, INFO )
+      CALL SLASCL( 'G', 0, 0, ORGNRM, ONE, NM1, 1, E, NM1, INFO )
+*
+*     If N is smaller than the minimum divide size SMLSIZ, then solve
+*     the problem with another solver.
+*
+      IF( N.LE.SMLSIZ ) THEN
+         NWORK = 1 + N*N
+         CALL SLASET( 'A', N, N, ZERO, ONE, WORK, N )
+         CALL SLASDQ( 'U', 0, N, N, 0, NRHS, D, E, WORK, N, WORK, N, B,
+     $                LDB, WORK( NWORK ), INFO )
+         IF( INFO.NE.0 ) THEN
+            RETURN
+         END IF
+         TOL = RCOND*ABS( D( ISAMAX( N, D, 1 ) ) )
+         DO 40 I = 1, N
+            IF( D( I ).LE.TOL ) THEN
+               CALL SLASET( 'A', 1, NRHS, ZERO, ZERO, B( I, 1 ), LDB )
+            ELSE
+               CALL SLASCL( 'G', 0, 0, D( I ), ONE, 1, NRHS, B( I, 1 ),
+     $                      LDB, INFO )
+               RANK = RANK + 1
+            END IF
+   40    CONTINUE
+         CALL SGEMM( 'T', 'N', N, NRHS, N, ONE, WORK, N, B, LDB, ZERO,
+     $               WORK( NWORK ), N )
+         CALL SLACPY( 'A', N, NRHS, WORK( NWORK ), N, B, LDB )
+*
+*        Unscale.
+*
+         CALL SLASCL( 'G', 0, 0, ONE, ORGNRM, N, 1, D, N, INFO )
+         CALL SLASRT( 'D', N, D, INFO )
+         CALL SLASCL( 'G', 0, 0, ORGNRM, ONE, N, NRHS, B, LDB, INFO )
+*
+         RETURN
+      END IF
+*
+*     Book-keeping and setting up some constants.
+*
+      NLVL = INT( LOG( REAL( N ) / REAL( SMLSIZ+1 ) ) / LOG( TWO ) ) + 1
+*
+      SMLSZP = SMLSIZ + 1
+*
+      U = 1
+      VT = 1 + SMLSIZ*N
+      DIFL = VT + SMLSZP*N
+      DIFR = DIFL + NLVL*N
+      Z = DIFR + NLVL*N*2
+      C = Z + NLVL*N
+      S = C + N
+      POLES = S + N
+      GIVNUM = POLES + 2*NLVL*N
+      BX = GIVNUM + 2*NLVL*N
+      NWORK = BX + N*NRHS
+*
+      SIZEI = 1 + N
+      K = SIZEI + N
+      GIVPTR = K + N
+      PERM = GIVPTR + N
+      GIVCOL = PERM + NLVL*N
+      IWK = GIVCOL + NLVL*N*2
+*
+      ST = 1
+      SQRE = 0
+      ICMPQ1 = 1
+      ICMPQ2 = 0
+      NSUB = 0
+*
+      DO 50 I = 1, N
+         IF( ABS( D( I ) ).LT.EPS ) THEN
+            D( I ) = SIGN( EPS, D( I ) )
+         END IF
+   50 CONTINUE
+*
+      DO 60 I = 1, NM1
+         IF( ( ABS( E( I ) ).LT.EPS ) .OR. ( I.EQ.NM1 ) ) THEN
+            NSUB = NSUB + 1
+            IWORK( NSUB ) = ST
+*
+*           Subproblem found. First determine its size and then
+*           apply divide and conquer on it.
+*
+            IF( I.LT.NM1 ) THEN
+*
+*              A subproblem with E(I) small for I < NM1.
+*
+               NSIZE = I - ST + 1
+               IWORK( SIZEI+NSUB-1 ) = NSIZE
+            ELSE IF( ABS( E( I ) ).GE.EPS ) THEN
+*
+*              A subproblem with E(NM1) not too small but I = NM1.
+*
+               NSIZE = N - ST + 1
+               IWORK( SIZEI+NSUB-1 ) = NSIZE
+            ELSE
+*
+*              A subproblem with E(NM1) small. This implies an
+*              1-by-1 subproblem at D(N), which is not solved
+*              explicitly.
+*
+               NSIZE = I - ST + 1
+               IWORK( SIZEI+NSUB-1 ) = NSIZE
+               NSUB = NSUB + 1
+               IWORK( NSUB ) = N
+               IWORK( SIZEI+NSUB-1 ) = 1
+               CALL SCOPY( NRHS, B( N, 1 ), LDB, WORK( BX+NM1 ), N )
+            END IF
+            ST1 = ST - 1
+            IF( NSIZE.EQ.1 ) THEN
+*
+*              This is a 1-by-1 subproblem and is not solved
+*              explicitly.
+*
+               CALL SCOPY( NRHS, B( ST, 1 ), LDB, WORK( BX+ST1 ), N )
+            ELSE IF( NSIZE.LE.SMLSIZ ) THEN
+*
+*              This is a small subproblem and is solved by SLASDQ.
+*
+               CALL SLASET( 'A', NSIZE, NSIZE, ZERO, ONE,
+     $                      WORK( VT+ST1 ), N )
+               CALL SLASDQ( 'U', 0, NSIZE, NSIZE, 0, NRHS, D( ST ),
+     $                      E( ST ), WORK( VT+ST1 ), N, WORK( NWORK ),
+     $                      N, B( ST, 1 ), LDB, WORK( NWORK ), INFO )
+               IF( INFO.NE.0 ) THEN
+                  RETURN
+               END IF
+               CALL SLACPY( 'A', NSIZE, NRHS, B( ST, 1 ), LDB,
+     $                      WORK( BX+ST1 ), N )
+            ELSE
+*
+*              A large problem. Solve it using divide and conquer.
+*
+               CALL SLASDA( ICMPQ1, SMLSIZ, NSIZE, SQRE, D( ST ),
+     $                      E( ST ), WORK( U+ST1 ), N, WORK( VT+ST1 ),
+     $                      IWORK( K+ST1 ), WORK( DIFL+ST1 ),
+     $                      WORK( DIFR+ST1 ), WORK( Z+ST1 ),
+     $                      WORK( POLES+ST1 ), IWORK( GIVPTR+ST1 ),
+     $                      IWORK( GIVCOL+ST1 ), N, IWORK( PERM+ST1 ),
+     $                      WORK( GIVNUM+ST1 ), WORK( C+ST1 ),
+     $                      WORK( S+ST1 ), WORK( NWORK ), IWORK( IWK ),
+     $                      INFO )
+               IF( INFO.NE.0 ) THEN
+                  RETURN
+               END IF
+               BXST = BX + ST1
+               CALL SLALSA( ICMPQ2, SMLSIZ, NSIZE, NRHS, B( ST, 1 ),
+     $                      LDB, WORK( BXST ), N, WORK( U+ST1 ), N,
+     $                      WORK( VT+ST1 ), IWORK( K+ST1 ),
+     $                      WORK( DIFL+ST1 ), WORK( DIFR+ST1 ),
+     $                      WORK( Z+ST1 ), WORK( POLES+ST1 ),
+     $                      IWORK( GIVPTR+ST1 ), IWORK( GIVCOL+ST1 ), N,
+     $                      IWORK( PERM+ST1 ), WORK( GIVNUM+ST1 ),
+     $                      WORK( C+ST1 ), WORK( S+ST1 ), WORK( NWORK ),
+     $                      IWORK( IWK ), INFO )
+               IF( INFO.NE.0 ) THEN
+                  RETURN
+               END IF
+            END IF
+            ST = I + 1
+         END IF
+   60 CONTINUE
+*
+*     Apply the singular values and treat the tiny ones as zero.
+*
+      TOL = RCOND*ABS( D( ISAMAX( N, D, 1 ) ) )
+*
+      DO 70 I = 1, N
+*
+*        Some of the elements in D can be negative because 1-by-1
+*        subproblems were not solved explicitly.
+*
+         IF( ABS( D( I ) ).LE.TOL ) THEN
+            CALL SLASET( 'A', 1, NRHS, ZERO, ZERO, WORK( BX+I-1 ), N )
+         ELSE
+            RANK = RANK + 1
+            CALL SLASCL( 'G', 0, 0, D( I ), ONE, 1, NRHS,
+     $                   WORK( BX+I-1 ), N, INFO )
+         END IF
+         D( I ) = ABS( D( I ) )
+   70 CONTINUE
+*
+*     Now apply back the right singular vectors.
+*
+      ICMPQ2 = 1
+      DO 80 I = 1, NSUB
+         ST = IWORK( I )
+         ST1 = ST - 1
+         NSIZE = IWORK( SIZEI+I-1 )
+         BXST = BX + ST1
+         IF( NSIZE.EQ.1 ) THEN
+            CALL SCOPY( NRHS, WORK( BXST ), N, B( ST, 1 ), LDB )
+         ELSE IF( NSIZE.LE.SMLSIZ ) THEN
+            CALL SGEMM( 'T', 'N', NSIZE, NRHS, NSIZE, ONE,
+     $                  WORK( VT+ST1 ), N, WORK( BXST ), N, ZERO,
+     $                  B( ST, 1 ), LDB )
+         ELSE
+            CALL SLALSA( ICMPQ2, SMLSIZ, NSIZE, NRHS, WORK( BXST ), N,
+     $                   B( ST, 1 ), LDB, WORK( U+ST1 ), N,
+     $                   WORK( VT+ST1 ), IWORK( K+ST1 ),
+     $                   WORK( DIFL+ST1 ), WORK( DIFR+ST1 ),
+     $                   WORK( Z+ST1 ), WORK( POLES+ST1 ),
+     $                   IWORK( GIVPTR+ST1 ), IWORK( GIVCOL+ST1 ), N,
+     $                   IWORK( PERM+ST1 ), WORK( GIVNUM+ST1 ),
+     $                   WORK( C+ST1 ), WORK( S+ST1 ), WORK( NWORK ),
+     $                   IWORK( IWK ), INFO )
+            IF( INFO.NE.0 ) THEN
+               RETURN
+            END IF
+         END IF
+   80 CONTINUE
+*
+*     Unscale and sort the singular values.
+*
+      CALL SLASCL( 'G', 0, 0, ONE, ORGNRM, N, 1, D, N, INFO )
+      CALL SLASRT( 'D', N, D, INFO )
+      CALL SLASCL( 'G', 0, 0, ORGNRM, ONE, N, NRHS, B, LDB, INFO )
+*
+      RETURN
+*
+*     End of SLALSD
+*
+      END
diff --git a/costa/native/external/lapack/slamch.f b/costa/native/external/lapack/slamch.f
new file mode 100644
index 000000000..afb4d3686
--- /dev/null
+++ b/costa/native/external/lapack/slamch.f
@@ -0,0 +1,857 @@
+      REAL             FUNCTION SLAMCH( CMACH )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992 
+*
+*     .. Scalar Arguments ..
+      CHARACTER          CMACH
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLAMCH determines single precision machine parameters.
+*
+*  Arguments
+*  =========
+*
+*  CMACH   (input) CHARACTER*1
+*          Specifies the value to be returned by SLAMCH:
+*          = 'E' or 'e',   SLAMCH := eps
+*          = 'S' or 's ,   SLAMCH := sfmin
+*          = 'B' or 'b',   SLAMCH := base
+*          = 'P' or 'p',   SLAMCH := eps*base
+*          = 'N' or 'n',   SLAMCH := t
+*          = 'R' or 'r',   SLAMCH := rnd
+*          = 'M' or 'm',   SLAMCH := emin
+*          = 'U' or 'u',   SLAMCH := rmin
+*          = 'L' or 'l',   SLAMCH := emax
+*          = 'O' or 'o',   SLAMCH := rmax
+*
+*          where
+*
+*          eps   = relative machine precision
+*          sfmin = safe minimum, such that 1/sfmin does not overflow
+*          base  = base of the machine
+*          prec  = eps*base
+*          t     = number of (base) digits in the mantissa
+*          rnd   = 1.0 when rounding occurs in addition, 0.0 otherwise
+*          emin  = minimum exponent before (gradual) underflow
+*          rmin  = underflow threshold - base**(emin-1)
+*          emax  = largest exponent before overflow
+*          rmax  = overflow threshold  - (base**emax)*(1-eps)
+*
+* =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            FIRST, LRND
+      INTEGER            BETA, IMAX, IMIN, IT
+      REAL               BASE, EMAX, EMIN, EPS, PREC, RMACH, RMAX, RMIN,
+     $                   RND, SFMIN, SMALL, T
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLAMC2
+*     ..
+*     .. Save statement ..
+      SAVE               FIRST, EPS, SFMIN, BASE, T, RND, EMIN, RMIN,
+     $                   EMAX, RMAX, PREC
+*     ..
+*     .. Data statements ..
+      DATA               FIRST / .TRUE. /
+*     ..
+*     .. Executable Statements ..
+*
+      IF( FIRST ) THEN
+         FIRST = .FALSE.
+         CALL SLAMC2( BETA, IT, LRND, EPS, IMIN, RMIN, IMAX, RMAX )
+         BASE = BETA
+         T = IT
+         IF( LRND ) THEN
+            RND = ONE
+            EPS = ( BASE**( 1-IT ) ) / 2
+         ELSE
+            RND = ZERO
+            EPS = BASE**( 1-IT )
+         END IF
+         PREC = EPS*BASE
+         EMIN = IMIN
+         EMAX = IMAX
+         SFMIN = RMIN
+         SMALL = ONE / RMAX
+         IF( SMALL.GE.SFMIN ) THEN
+*
+*           Use SMALL plus a bit, to avoid the possibility of rounding
+*           causing overflow when computing  1/sfmin.
+*
+            SFMIN = SMALL*( ONE+EPS )
+         END IF
+      END IF
+*
+      IF( LSAME( CMACH, 'E' ) ) THEN
+         RMACH = EPS
+      ELSE IF( LSAME( CMACH, 'S' ) ) THEN
+         RMACH = SFMIN
+      ELSE IF( LSAME( CMACH, 'B' ) ) THEN
+         RMACH = BASE
+      ELSE IF( LSAME( CMACH, 'P' ) ) THEN
+         RMACH = PREC
+      ELSE IF( LSAME( CMACH, 'N' ) ) THEN
+         RMACH = T
+      ELSE IF( LSAME( CMACH, 'R' ) ) THEN
+         RMACH = RND
+      ELSE IF( LSAME( CMACH, 'M' ) ) THEN
+         RMACH = EMIN
+      ELSE IF( LSAME( CMACH, 'U' ) ) THEN
+         RMACH = RMIN
+      ELSE IF( LSAME( CMACH, 'L' ) ) THEN
+         RMACH = EMAX
+      ELSE IF( LSAME( CMACH, 'O' ) ) THEN
+         RMACH = RMAX
+      END IF
+*
+      SLAMCH = RMACH
+      RETURN
+*
+*     End of SLAMCH
+*
+      END
+*
+************************************************************************
+*
+      SUBROUTINE SLAMC1( BETA, T, RND, IEEE1 )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      LOGICAL            IEEE1, RND
+      INTEGER            BETA, T
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLAMC1 determines the machine parameters given by BETA, T, RND, and
+*  IEEE1.
+*
+*  Arguments
+*  =========
+*
+*  BETA    (output) INTEGER
+*          The base of the machine.
+*
+*  T       (output) INTEGER
+*          The number of ( BETA ) digits in the mantissa.
+*
+*  RND     (output) LOGICAL
+*          Specifies whether proper rounding  ( RND = .TRUE. )  or
+*          chopping  ( RND = .FALSE. )  occurs in addition. This may not
+*          be a reliable guide to the way in which the machine performs
+*          its arithmetic.
+*
+*  IEEE1   (output) LOGICAL
+*          Specifies whether rounding appears to be done in the IEEE
+*          'round to nearest' style.
+*
+*  Further Details
+*  ===============
+*
+*  The routine is based on the routine  ENVRON  by Malcolm and
+*  incorporates suggestions by Gentleman and Marovich. See
+*
+*     Malcolm M. A. (1972) Algorithms to reveal properties of
+*        floating-point arithmetic. Comms. of the ACM, 15, 949-951.
+*
+*     Gentleman W. M. and Marovich S. B. (1974) More on algorithms
+*        that reveal properties of floating point arithmetic units.
+*        Comms. of the ACM, 17, 276-277.
+*
+* =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            FIRST, LIEEE1, LRND
+      INTEGER            LBETA, LT
+      REAL               A, B, C, F, ONE, QTR, SAVEC, T1, T2
+*     ..
+*     .. External Functions ..
+      REAL               SLAMC3
+      EXTERNAL           SLAMC3
+*     ..
+*     .. Save statement ..
+      SAVE               FIRST, LIEEE1, LBETA, LRND, LT
+*     ..
+*     .. Data statements ..
+      DATA               FIRST / .TRUE. /
+*     ..
+*     .. Executable Statements ..
+*
+      IF( FIRST ) THEN
+         FIRST = .FALSE.
+         ONE = 1
+*
+*        LBETA,  LIEEE1,  LT and  LRND  are the  local values  of  BETA,
+*        IEEE1, T and RND.
+*
+*        Throughout this routine  we use the function  SLAMC3  to ensure
+*        that relevant values are  stored and not held in registers,  or
+*        are not affected by optimizers.
+*
+*        Compute  a = 2.0**m  with the  smallest positive integer m such
+*        that
+*
+*           fl( a + 1.0 ) = a.
+*
+         A = 1
+         C = 1
+*
+*+       WHILE( C.EQ.ONE )LOOP
+   10    CONTINUE
+         IF( C.EQ.ONE ) THEN
+            A = 2*A
+            C = SLAMC3( A, ONE )
+            C = SLAMC3( C, -A )
+            GO TO 10
+         END IF
+*+       END WHILE
+*
+*        Now compute  b = 2.0**m  with the smallest positive integer m
+*        such that
+*
+*           fl( a + b ) .gt. a.
+*
+         B = 1
+         C = SLAMC3( A, B )
+*
+*+       WHILE( C.EQ.A )LOOP
+   20    CONTINUE
+         IF( C.EQ.A ) THEN
+            B = 2*B
+            C = SLAMC3( A, B )
+            GO TO 20
+         END IF
+*+       END WHILE
+*
+*        Now compute the base.  a and c  are neighbouring floating point
+*        numbers  in the  interval  ( beta**t, beta**( t + 1 ) )  and so
+*        their difference is beta. Adding 0.25 to c is to ensure that it
+*        is truncated to beta and not ( beta - 1 ).
+*
+         QTR = ONE / 4
+         SAVEC = C
+         C = SLAMC3( C, -A )
+         LBETA = C + QTR
+*
+*        Now determine whether rounding or chopping occurs,  by adding a
+*        bit  less  than  beta/2  and a  bit  more  than  beta/2  to  a.
+*
+         B = LBETA
+         F = SLAMC3( B / 2, -B / 100 )
+         C = SLAMC3( F, A )
+         IF( C.EQ.A ) THEN
+            LRND = .TRUE.
+         ELSE
+            LRND = .FALSE.
+         END IF
+         F = SLAMC3( B / 2, B / 100 )
+         C = SLAMC3( F, A )
+         IF( ( LRND ) .AND. ( C.EQ.A ) )
+     $      LRND = .FALSE.
+*
+*        Try and decide whether rounding is done in the  IEEE  'round to
+*        nearest' style. B/2 is half a unit in the last place of the two
+*        numbers A and SAVEC. Furthermore, A is even, i.e. has last  bit
+*        zero, and SAVEC is odd. Thus adding B/2 to A should not  change
+*        A, but adding B/2 to SAVEC should change SAVEC.
+*
+         T1 = SLAMC3( B / 2, A )
+         T2 = SLAMC3( B / 2, SAVEC )
+         LIEEE1 = ( T1.EQ.A ) .AND. ( T2.GT.SAVEC ) .AND. LRND
+*
+*        Now find  the  mantissa, t.  It should  be the  integer part of
+*        log to the base beta of a,  however it is safer to determine  t
+*        by powering.  So we find t as the smallest positive integer for
+*        which
+*
+*           fl( beta**t + 1.0 ) = 1.0.
+*
+         LT = 0
+         A = 1
+         C = 1
+*
+*+       WHILE( C.EQ.ONE )LOOP
+   30    CONTINUE
+         IF( C.EQ.ONE ) THEN
+            LT = LT + 1
+            A = A*LBETA
+            C = SLAMC3( A, ONE )
+            C = SLAMC3( C, -A )
+            GO TO 30
+         END IF
+*+       END WHILE
+*
+      END IF
+*
+      BETA = LBETA
+      T = LT
+      RND = LRND
+      IEEE1 = LIEEE1
+      RETURN
+*
+*     End of SLAMC1
+*
+      END
+*
+************************************************************************
+*
+      SUBROUTINE SLAMC2( BETA, T, RND, EPS, EMIN, RMIN, EMAX, RMAX )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      LOGICAL            RND
+      INTEGER            BETA, EMAX, EMIN, T
+      REAL               EPS, RMAX, RMIN
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLAMC2 determines the machine parameters specified in its argument
+*  list.
+*
+*  Arguments
+*  =========
+*
+*  BETA    (output) INTEGER
+*          The base of the machine.
+*
+*  T       (output) INTEGER
+*          The number of ( BETA ) digits in the mantissa.
+*
+*  RND     (output) LOGICAL
+*          Specifies whether proper rounding  ( RND = .TRUE. )  or
+*          chopping  ( RND = .FALSE. )  occurs in addition. This may not
+*          be a reliable guide to the way in which the machine performs
+*          its arithmetic.
+*
+*  EPS     (output) REAL
+*          The smallest positive number such that
+*
+*             fl( 1.0 - EPS ) .LT. 1.0,
+*
+*          where fl denotes the computed value.
+*
+*  EMIN    (output) INTEGER
+*          The minimum exponent before (gradual) underflow occurs.
+*
+*  RMIN    (output) REAL
+*          The smallest normalized number for the machine, given by
+*          BASE**( EMIN - 1 ), where  BASE  is the floating point value
+*          of BETA.
+*
+*  EMAX    (output) INTEGER
+*          The maximum exponent before overflow occurs.
+*
+*  RMAX    (output) REAL
+*          The largest positive number for the machine, given by
+*          BASE**EMAX * ( 1 - EPS ), where  BASE  is the floating point
+*          value of BETA.
+*
+*  Further Details
+*  ===============
+*
+*  The computation of  EPS  is based on a routine PARANOIA by
+*  W. Kahan of the University of California at Berkeley.
+*
+* =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            FIRST, IEEE, IWARN, LIEEE1, LRND
+      INTEGER            GNMIN, GPMIN, I, LBETA, LEMAX, LEMIN, LT,
+     $                   NGNMIN, NGPMIN
+      REAL               A, B, C, HALF, LEPS, LRMAX, LRMIN, ONE, RBASE,
+     $                   SIXTH, SMALL, THIRD, TWO, ZERO
+*     ..
+*     .. External Functions ..
+      REAL               SLAMC3
+      EXTERNAL           SLAMC3
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLAMC1, SLAMC4, SLAMC5
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN
+*     ..
+*     .. Save statement ..
+      SAVE               FIRST, IWARN, LBETA, LEMAX, LEMIN, LEPS, LRMAX,
+     $                   LRMIN, LT
+*     ..
+*     .. Data statements ..
+      DATA               FIRST / .TRUE. / , IWARN / .FALSE. /
+*     ..
+*     .. Executable Statements ..
+*
+      IF( FIRST ) THEN
+         FIRST = .FALSE.
+         ZERO = 0
+         ONE = 1
+         TWO = 2
+*
+*        LBETA, LT, LRND, LEPS, LEMIN and LRMIN  are the local values of
+*        BETA, T, RND, EPS, EMIN and RMIN.
+*
+*        Throughout this routine  we use the function  SLAMC3  to ensure
+*        that relevant values are stored  and not held in registers,  or
+*        are not affected by optimizers.
+*
+*        SLAMC1 returns the parameters  LBETA, LT, LRND and LIEEE1.
+*
+         CALL SLAMC1( LBETA, LT, LRND, LIEEE1 )
+*
+*        Start to find EPS.
+*
+         B = LBETA
+         A = B**( -LT )
+         LEPS = A
+*
+*        Try some tricks to see whether or not this is the correct  EPS.
+*
+         B = TWO / 3
+         HALF = ONE / 2
+         SIXTH = SLAMC3( B, -HALF )
+         THIRD = SLAMC3( SIXTH, SIXTH )
+         B = SLAMC3( THIRD, -HALF )
+         B = SLAMC3( B, SIXTH )
+         B = ABS( B )
+         IF( B.LT.LEPS )
+     $      B = LEPS
+*
+         LEPS = 1
+*
+*+       WHILE( ( LEPS.GT.B ).AND.( B.GT.ZERO ) )LOOP
+   10    CONTINUE
+         IF( ( LEPS.GT.B ) .AND. ( B.GT.ZERO ) ) THEN
+            LEPS = B
+            C = SLAMC3( HALF*LEPS, ( TWO**5 )*( LEPS**2 ) )
+            C = SLAMC3( HALF, -C )
+            B = SLAMC3( HALF, C )
+            C = SLAMC3( HALF, -B )
+            B = SLAMC3( HALF, C )
+            GO TO 10
+         END IF
+*+       END WHILE
+*
+         IF( A.LT.LEPS )
+     $      LEPS = A
+*
+*        Computation of EPS complete.
+*
+*        Now find  EMIN.  Let A = + or - 1, and + or - (1 + BASE**(-3)).
+*        Keep dividing  A by BETA until (gradual) underflow occurs. This
+*        is detected when we cannot recover the previous A.
+*
+         RBASE = ONE / LBETA
+         SMALL = ONE
+         DO 20 I = 1, 3
+            SMALL = SLAMC3( SMALL*RBASE, ZERO )
+   20    CONTINUE
+         A = SLAMC3( ONE, SMALL )
+         CALL SLAMC4( NGPMIN, ONE, LBETA )
+         CALL SLAMC4( NGNMIN, -ONE, LBETA )
+         CALL SLAMC4( GPMIN, A, LBETA )
+         CALL SLAMC4( GNMIN, -A, LBETA )
+         IEEE = .FALSE.
+*
+         IF( ( NGPMIN.EQ.NGNMIN ) .AND. ( GPMIN.EQ.GNMIN ) ) THEN
+            IF( NGPMIN.EQ.GPMIN ) THEN
+               LEMIN = NGPMIN
+*            ( Non twos-complement machines, no gradual underflow;
+*              e.g.,  VAX )
+            ELSE IF( ( GPMIN-NGPMIN ).EQ.3 ) THEN
+               LEMIN = NGPMIN - 1 + LT
+               IEEE = .TRUE.
+*            ( Non twos-complement machines, with gradual underflow;
+*              e.g., IEEE standard followers )
+            ELSE
+               LEMIN = MIN( NGPMIN, GPMIN )
+*            ( A guess; no known machine )
+               IWARN = .TRUE.
+            END IF
+*
+         ELSE IF( ( NGPMIN.EQ.GPMIN ) .AND. ( NGNMIN.EQ.GNMIN ) ) THEN
+            IF( ABS( NGPMIN-NGNMIN ).EQ.1 ) THEN
+               LEMIN = MAX( NGPMIN, NGNMIN )
+*            ( Twos-complement machines, no gradual underflow;
+*              e.g., CYBER 205 )
+            ELSE
+               LEMIN = MIN( NGPMIN, NGNMIN )
+*            ( A guess; no known machine )
+               IWARN = .TRUE.
+            END IF
+*
+         ELSE IF( ( ABS( NGPMIN-NGNMIN ).EQ.1 ) .AND.
+     $            ( GPMIN.EQ.GNMIN ) ) THEN
+            IF( ( GPMIN-MIN( NGPMIN, NGNMIN ) ).EQ.3 ) THEN
+               LEMIN = MAX( NGPMIN, NGNMIN ) - 1 + LT
+*            ( Twos-complement machines with gradual underflow;
+*              no known machine )
+            ELSE
+               LEMIN = MIN( NGPMIN, NGNMIN )
+*            ( A guess; no known machine )
+               IWARN = .TRUE.
+            END IF
+*
+         ELSE
+            LEMIN = MIN( NGPMIN, NGNMIN, GPMIN, GNMIN )
+*         ( A guess; no known machine )
+            IWARN = .TRUE.
+         END IF
+***
+* Comment out this if block if EMIN is ok
+         IF( IWARN ) THEN
+            FIRST = .TRUE.
+            WRITE( 6, FMT = 9999 )LEMIN
+         END IF
+***
+*
+*        Assume IEEE arithmetic if we found denormalised  numbers above,
+*        or if arithmetic seems to round in the  IEEE style,  determined
+*        in routine SLAMC1. A true IEEE machine should have both  things
+*        true; however, faulty machines may have one or the other.
+*
+         IEEE = IEEE .OR. LIEEE1
+*
+*        Compute  RMIN by successive division by  BETA. We could compute
+*        RMIN as BASE**( EMIN - 1 ),  but some machines underflow during
+*        this computation.
+*
+         LRMIN = 1
+         DO 30 I = 1, 1 - LEMIN
+            LRMIN = SLAMC3( LRMIN*RBASE, ZERO )
+   30    CONTINUE
+*
+*        Finally, call SLAMC5 to compute EMAX and RMAX.
+*
+         CALL SLAMC5( LBETA, LT, LEMIN, IEEE, LEMAX, LRMAX )
+      END IF
+*
+      BETA = LBETA
+      T = LT
+      RND = LRND
+      EPS = LEPS
+      EMIN = LEMIN
+      RMIN = LRMIN
+      EMAX = LEMAX
+      RMAX = LRMAX
+*
+      RETURN
+*
+ 9999 FORMAT( / / ' WARNING. The value EMIN may be incorrect:-',
+     $      '  EMIN = ', I8, /
+     $      ' If, after inspection, the value EMIN looks',
+     $      ' acceptable please comment out ',
+     $      / ' the IF block as marked within the code of routine',
+     $      ' SLAMC2,', / ' otherwise supply EMIN explicitly.', / )
+*
+*     End of SLAMC2
+*
+      END
+*
+************************************************************************
+*
+      REAL             FUNCTION SLAMC3( A, B )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      REAL               A, B
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLAMC3  is intended to force  A  and  B  to be stored prior to doing
+*  the addition of  A  and  B ,  for use in situations where optimizers
+*  might hold one of these in a register.
+*
+*  Arguments
+*  =========
+*
+*  A, B    (input) REAL
+*          The values A and B.
+*
+* =====================================================================
+*
+*     .. Executable Statements ..
+*
+      SLAMC3 = A + B
+*
+      RETURN
+*
+*     End of SLAMC3
+*
+      END
+*
+************************************************************************
+*
+      SUBROUTINE SLAMC4( EMIN, START, BASE )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      INTEGER            BASE, EMIN
+      REAL               START
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLAMC4 is a service routine for SLAMC2.
+*
+*  Arguments
+*  =========
+*
+*  EMIN    (output) EMIN
+*          The minimum exponent before (gradual) underflow, computed by
+*          setting A = START and dividing by BASE until the previous A
+*          can not be recovered.
+*
+*  START   (input) REAL
+*          The starting point for determining EMIN.
+*
+*  BASE    (input) INTEGER
+*          The base of the machine.
+*
+* =====================================================================
+*
+*     .. Local Scalars ..
+      INTEGER            I
+      REAL               A, B1, B2, C1, C2, D1, D2, ONE, RBASE, ZERO
+*     ..
+*     .. External Functions ..
+      REAL               SLAMC3
+      EXTERNAL           SLAMC3
+*     ..
+*     .. Executable Statements ..
+*
+      A = START
+      ONE = 1
+      RBASE = ONE / BASE
+      ZERO = 0
+      EMIN = 1
+      B1 = SLAMC3( A*RBASE, ZERO )
+      C1 = A
+      C2 = A
+      D1 = A
+      D2 = A
+*+    WHILE( ( C1.EQ.A ).AND.( C2.EQ.A ).AND.
+*    $       ( D1.EQ.A ).AND.( D2.EQ.A )      )LOOP
+   10 CONTINUE
+      IF( ( C1.EQ.A ) .AND. ( C2.EQ.A ) .AND. ( D1.EQ.A ) .AND.
+     $    ( D2.EQ.A ) ) THEN
+         EMIN = EMIN - 1
+         A = B1
+         B1 = SLAMC3( A / BASE, ZERO )
+         C1 = SLAMC3( B1*BASE, ZERO )
+         D1 = ZERO
+         DO 20 I = 1, BASE
+            D1 = D1 + B1
+   20    CONTINUE
+         B2 = SLAMC3( A*RBASE, ZERO )
+         C2 = SLAMC3( B2 / RBASE, ZERO )
+         D2 = ZERO
+         DO 30 I = 1, BASE
+            D2 = D2 + B2
+   30    CONTINUE
+         GO TO 10
+      END IF
+*+    END WHILE
+*
+      RETURN
+*
+*     End of SLAMC4
+*
+      END
+*
+************************************************************************
+*
+      SUBROUTINE SLAMC5( BETA, P, EMIN, IEEE, EMAX, RMAX )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      LOGICAL            IEEE
+      INTEGER            BETA, EMAX, EMIN, P
+      REAL               RMAX
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLAMC5 attempts to compute RMAX, the largest machine floating-point
+*  number, without overflow.  It assumes that EMAX + abs(EMIN) sum
+*  approximately to a power of 2.  It will fail on machines where this
+*  assumption does not hold, for example, the Cyber 205 (EMIN = -28625,
+*  EMAX = 28718).  It will also fail if the value supplied for EMIN is
+*  too large (i.e. too close to zero), probably with overflow.
+*
+*  Arguments
+*  =========
+*
+*  BETA    (input) INTEGER
+*          The base of floating-point arithmetic.
+*
+*  P       (input) INTEGER
+*          The number of base BETA digits in the mantissa of a
+*          floating-point value.
+*
+*  EMIN    (input) INTEGER
+*          The minimum exponent before (gradual) underflow.
+*
+*  IEEE    (input) LOGICAL
+*          A logical flag specifying whether or not the arithmetic
+*          system is thought to comply with the IEEE standard.
+*
+*  EMAX    (output) INTEGER
+*          The largest exponent before overflow
+*
+*  RMAX    (output) REAL
+*          The largest machine floating-point number.
+*
+* =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E0, ONE = 1.0E0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            EXBITS, EXPSUM, I, LEXP, NBITS, TRY, UEXP
+      REAL               OLDY, RECBAS, Y, Z
+*     ..
+*     .. External Functions ..
+      REAL               SLAMC3
+      EXTERNAL           SLAMC3
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MOD
+*     ..
+*     .. Executable Statements ..
+*
+*     First compute LEXP and UEXP, two powers of 2 that bound
+*     abs(EMIN). We then assume that EMAX + abs(EMIN) will sum
+*     approximately to the bound that is closest to abs(EMIN).
+*     (EMAX is the exponent of the required number RMAX).
+*
+      LEXP = 1
+      EXBITS = 1
+   10 CONTINUE
+      TRY = LEXP*2
+      IF( TRY.LE.( -EMIN ) ) THEN
+         LEXP = TRY
+         EXBITS = EXBITS + 1
+         GO TO 10
+      END IF
+      IF( LEXP.EQ.-EMIN ) THEN
+         UEXP = LEXP
+      ELSE
+         UEXP = TRY
+         EXBITS = EXBITS + 1
+      END IF
+*
+*     Now -LEXP is less than or equal to EMIN, and -UEXP is greater
+*     than or equal to EMIN. EXBITS is the number of bits needed to
+*     store the exponent.
+*
+      IF( ( UEXP+EMIN ).GT.( -LEXP-EMIN ) ) THEN
+         EXPSUM = 2*LEXP
+      ELSE
+         EXPSUM = 2*UEXP
+      END IF
+*
+*     EXPSUM is the exponent range, approximately equal to
+*     EMAX - EMIN + 1 .
+*
+      EMAX = EXPSUM + EMIN - 1
+      NBITS = 1 + EXBITS + P
+*
+*     NBITS is the total number of bits needed to store a
+*     floating-point number.
+*
+      IF( ( MOD( NBITS, 2 ).EQ.1 ) .AND. ( BETA.EQ.2 ) ) THEN
+*
+*        Either there are an odd number of bits used to store a
+*        floating-point number, which is unlikely, or some bits are
+*        not used in the representation of numbers, which is possible,
+*        (e.g. Cray machines) or the mantissa has an implicit bit,
+*        (e.g. IEEE machines, Dec Vax machines), which is perhaps the
+*        most likely. We have to assume the last alternative.
+*        If this is true, then we need to reduce EMAX by one because
+*        there must be some way of representing zero in an implicit-bit
+*        system. On machines like Cray, we are reducing EMAX by one
+*        unnecessarily.
+*
+         EMAX = EMAX - 1
+      END IF
+*
+      IF( IEEE ) THEN
+*
+*        Assume we are on an IEEE machine which reserves one exponent
+*        for infinity and NaN.
+*
+         EMAX = EMAX - 1
+      END IF
+*
+*     Now create RMAX, the largest machine number, which should
+*     be equal to (1.0 - BETA**(-P)) * BETA**EMAX .
+*
+*     First compute 1.0 - BETA**(-P), being careful that the
+*     result is less than 1.0 .
+*
+      RECBAS = ONE / BETA
+      Z = BETA - ONE
+      Y = ZERO
+      DO 20 I = 1, P
+         Z = Z*RECBAS
+         IF( Y.LT.ONE )
+     $      OLDY = Y
+         Y = SLAMC3( Y, Z )
+   20 CONTINUE
+      IF( Y.GE.ONE )
+     $   Y = OLDY
+*
+*     Now multiply by BETA**EMAX to get RMAX.
+*
+      DO 30 I = 1, EMAX
+         Y = SLAMC3( Y*BETA, ZERO )
+   30 CONTINUE
+*
+      RMAX = Y
+      RETURN
+*
+*     End of SLAMC5
+*
+      END
diff --git a/costa/native/external/lapack/slamrg.f b/costa/native/external/lapack/slamrg.f
new file mode 100644
index 000000000..402189a11
--- /dev/null
+++ b/costa/native/external/lapack/slamrg.f
@@ -0,0 +1,104 @@
+      SUBROUTINE SLAMRG( N1, N2, A, STRD1, STRD2, INDEX )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      INTEGER            N1, N2, STRD1, STRD2
+*     ..
+*     .. Array Arguments ..
+      INTEGER            INDEX( * )
+      REAL               A( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLAMRG will create a permutation list which will merge the elements
+*  of A (which is composed of two independently sorted sets) into a
+*  single set which is sorted in ascending order.
+*
+*  Arguments
+*  =========
+*
+*  N1     (input) INTEGER
+*  N2     (input) INTEGER
+*         These arguements contain the respective lengths of the two
+*         sorted lists to be merged.
+*
+*  A      (input) REAL array, dimension (N1+N2)
+*         The first N1 elements of A contain a list of numbers which
+*         are sorted in either ascending or descending order.  Likewise
+*         for the final N2 elements.
+*
+*  STRD1  (input) INTEGER
+*  STRD2  (input) INTEGER
+*         These are the strides to be taken through the array A.
+*         Allowable strides are 1 and -1.  They indicate whether a
+*         subset of A is sorted in ascending (STRDx = 1) or descending
+*         (STRDx = -1) order.
+*
+*  INDEX  (output) INTEGER array, dimension (N1+N2)
+*         On exit this array will contain a permutation such that
+*         if B( I ) = A( INDEX( I ) ) for I=1,N1+N2, then B will be
+*         sorted in ascending order.
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      INTEGER            I, IND1, IND2, N1SV, N2SV
+*     ..
+*     .. Executable Statements ..
+*
+      N1SV = N1
+      N2SV = N2
+      IF( STRD1.GT.0 ) THEN
+         IND1 = 1
+      ELSE
+         IND1 = N1
+      END IF
+      IF( STRD2.GT.0 ) THEN
+         IND2 = 1 + N1
+      ELSE
+         IND2 = N1 + N2
+      END IF
+      I = 1
+*     while ( (N1SV > 0) & (N2SV > 0) )
+   10 CONTINUE
+      IF( N1SV.GT.0 .AND. N2SV.GT.0 ) THEN
+         IF( A( IND1 ).LE.A( IND2 ) ) THEN
+            INDEX( I ) = IND1
+            I = I + 1
+            IND1 = IND1 + STRD1
+            N1SV = N1SV - 1
+         ELSE
+            INDEX( I ) = IND2
+            I = I + 1
+            IND2 = IND2 + STRD2
+            N2SV = N2SV - 1
+         END IF
+         GO TO 10
+      END IF
+*     end while
+      IF( N1SV.EQ.0 ) THEN
+         DO 20 N1SV = 1, N2SV
+            INDEX( I ) = IND2
+            I = I + 1
+            IND2 = IND2 + STRD2
+   20    CONTINUE
+      ELSE
+*     N2SV .EQ. 0
+         DO 30 N2SV = 1, N1SV
+            INDEX( I ) = IND1
+            I = I + 1
+            IND1 = IND1 + STRD1
+   30    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of SLAMRG
+*
+      END
diff --git a/costa/native/external/lapack/slangb.f b/costa/native/external/lapack/slangb.f
new file mode 100644
index 000000000..3b1e51abb
--- /dev/null
+++ b/costa/native/external/lapack/slangb.f
@@ -0,0 +1,155 @@
+      REAL             FUNCTION SLANGB( NORM, N, KL, KU, AB, LDAB,
+     $                 WORK )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          NORM
+      INTEGER            KL, KU, LDAB, N
+*     ..
+*     .. Array Arguments ..
+      REAL               AB( LDAB, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLANGB  returns the value of the one norm,  or the Frobenius norm, or
+*  the  infinity norm,  or the element of  largest absolute value  of an
+*  n by n band matrix  A,  with kl sub-diagonals and ku super-diagonals.
+*
+*  Description
+*  ===========
+*
+*  SLANGB returns the value
+*
+*     SLANGB = ( max(abs(A(i,j))), NORM = 'M' or 'm'
+*              (
+*              ( norm1(A),         NORM = '1', 'O' or 'o'
+*              (
+*              ( normI(A),         NORM = 'I' or 'i'
+*              (
+*              ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
+*
+*  where  norm1  denotes the  one norm of a matrix (maximum column sum),
+*  normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
+*  normF  denotes the  Frobenius norm of a matrix (square root of sum of
+*  squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
+*
+*  Arguments
+*  =========
+*
+*  NORM    (input) CHARACTER*1
+*          Specifies the value to be returned in SLANGB as described
+*          above.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.  When N = 0, SLANGB is
+*          set to zero.
+*
+*  KL      (input) INTEGER
+*          The number of sub-diagonals of the matrix A.  KL >= 0.
+*
+*  KU      (input) INTEGER
+*          The number of super-diagonals of the matrix A.  KU >= 0.
+*
+*  AB      (input) REAL array, dimension (LDAB,N)
+*          The band matrix A, stored in rows 1 to KL+KU+1.  The j-th
+*          column of A is stored in the j-th column of the array AB as
+*          follows:
+*          AB(ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(n,j+kl).
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KL+KU+1.
+*
+*  WORK    (workspace) REAL array, dimension (LWORK),
+*          where LWORK >= N when NORM = 'I'; otherwise, WORK is not
+*          referenced.
+*
+* =====================================================================
+*
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J, K, L
+      REAL               SCALE, SUM, VALUE
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLASSQ
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+      IF( N.EQ.0 ) THEN
+         VALUE = ZERO
+      ELSE IF( LSAME( NORM, 'M' ) ) THEN
+*
+*        Find max(abs(A(i,j))).
+*
+         VALUE = ZERO
+         DO 20 J = 1, N
+            DO 10 I = MAX( KU+2-J, 1 ), MIN( N+KU+1-J, KL+KU+1 )
+               VALUE = MAX( VALUE, ABS( AB( I, J ) ) )
+   10       CONTINUE
+   20    CONTINUE
+      ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN
+*
+*        Find norm1(A).
+*
+         VALUE = ZERO
+         DO 40 J = 1, N
+            SUM = ZERO
+            DO 30 I = MAX( KU+2-J, 1 ), MIN( N+KU+1-J, KL+KU+1 )
+               SUM = SUM + ABS( AB( I, J ) )
+   30       CONTINUE
+            VALUE = MAX( VALUE, SUM )
+   40    CONTINUE
+      ELSE IF( LSAME( NORM, 'I' ) ) THEN
+*
+*        Find normI(A).
+*
+         DO 50 I = 1, N
+            WORK( I ) = ZERO
+   50    CONTINUE
+         DO 70 J = 1, N
+            K = KU + 1 - J
+            DO 60 I = MAX( 1, J-KU ), MIN( N, J+KL )
+               WORK( I ) = WORK( I ) + ABS( AB( K+I, J ) )
+   60       CONTINUE
+   70    CONTINUE
+         VALUE = ZERO
+         DO 80 I = 1, N
+            VALUE = MAX( VALUE, WORK( I ) )
+   80    CONTINUE
+      ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
+*
+*        Find normF(A).
+*
+         SCALE = ZERO
+         SUM = ONE
+         DO 90 J = 1, N
+            L = MAX( 1, J-KU )
+            K = KU + 1 - J + L
+            CALL SLASSQ( MIN( N, J+KL )-L+1, AB( K, J ), 1, SCALE, SUM )
+   90    CONTINUE
+         VALUE = SCALE*SQRT( SUM )
+      END IF
+*
+      SLANGB = VALUE
+      RETURN
+*
+*     End of SLANGB
+*
+      END
diff --git a/costa/native/external/lapack/slange.f b/costa/native/external/lapack/slange.f
new file mode 100644
index 000000000..beaeab919
--- /dev/null
+++ b/costa/native/external/lapack/slange.f
@@ -0,0 +1,145 @@
+      REAL             FUNCTION SLANGE( NORM, M, N, A, LDA, WORK )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          NORM
+      INTEGER            LDA, M, N
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLANGE  returns the value of the one norm,  or the Frobenius norm, or
+*  the  infinity norm,  or the  element of  largest absolute value  of a
+*  real matrix A.
+*
+*  Description
+*  ===========
+*
+*  SLANGE returns the value
+*
+*     SLANGE = ( max(abs(A(i,j))), NORM = 'M' or 'm'
+*              (
+*              ( norm1(A),         NORM = '1', 'O' or 'o'
+*              (
+*              ( normI(A),         NORM = 'I' or 'i'
+*              (
+*              ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
+*
+*  where  norm1  denotes the  one norm of a matrix (maximum column sum),
+*  normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
+*  normF  denotes the  Frobenius norm of a matrix (square root of sum of
+*  squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
+*
+*  Arguments
+*  =========
+*
+*  NORM    (input) CHARACTER*1
+*          Specifies the value to be returned in SLANGE as described
+*          above.
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.  When M = 0,
+*          SLANGE is set to zero.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.  When N = 0,
+*          SLANGE is set to zero.
+*
+*  A       (input) REAL array, dimension (LDA,N)
+*          The m by n matrix A.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(M,1).
+*
+*  WORK    (workspace) REAL array, dimension (LWORK),
+*          where LWORK >= M when NORM = 'I'; otherwise, WORK is not
+*          referenced.
+*
+* =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J
+      REAL               SCALE, SUM, VALUE
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLASSQ
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+      IF( MIN( M, N ).EQ.0 ) THEN
+         VALUE = ZERO
+      ELSE IF( LSAME( NORM, 'M' ) ) THEN
+*
+*        Find max(abs(A(i,j))).
+*
+         VALUE = ZERO
+         DO 20 J = 1, N
+            DO 10 I = 1, M
+               VALUE = MAX( VALUE, ABS( A( I, J ) ) )
+   10       CONTINUE
+   20    CONTINUE
+      ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN
+*
+*        Find norm1(A).
+*
+         VALUE = ZERO
+         DO 40 J = 1, N
+            SUM = ZERO
+            DO 30 I = 1, M
+               SUM = SUM + ABS( A( I, J ) )
+   30       CONTINUE
+            VALUE = MAX( VALUE, SUM )
+   40    CONTINUE
+      ELSE IF( LSAME( NORM, 'I' ) ) THEN
+*
+*        Find normI(A).
+*
+         DO 50 I = 1, M
+            WORK( I ) = ZERO
+   50    CONTINUE
+         DO 70 J = 1, N
+            DO 60 I = 1, M
+               WORK( I ) = WORK( I ) + ABS( A( I, J ) )
+   60       CONTINUE
+   70    CONTINUE
+         VALUE = ZERO
+         DO 80 I = 1, M
+            VALUE = MAX( VALUE, WORK( I ) )
+   80    CONTINUE
+      ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
+*
+*        Find normF(A).
+*
+         SCALE = ZERO
+         SUM = ONE
+         DO 90 J = 1, N
+            CALL SLASSQ( M, A( 1, J ), 1, SCALE, SUM )
+   90    CONTINUE
+         VALUE = SCALE*SQRT( SUM )
+      END IF
+*
+      SLANGE = VALUE
+      RETURN
+*
+*     End of SLANGE
+*
+      END
diff --git a/costa/native/external/lapack/slangt.f b/costa/native/external/lapack/slangt.f
new file mode 100644
index 000000000..7dd25cfee
--- /dev/null
+++ b/costa/native/external/lapack/slangt.f
@@ -0,0 +1,142 @@
+      REAL             FUNCTION SLANGT( NORM, N, DL, D, DU )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          NORM
+      INTEGER            N
+*     ..
+*     .. Array Arguments ..
+      REAL               D( * ), DL( * ), DU( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLANGT  returns the value of the one norm,  or the Frobenius norm, or
+*  the  infinity norm,  or the  element of  largest absolute value  of a
+*  real tridiagonal matrix A.
+*
+*  Description
+*  ===========
+*
+*  SLANGT returns the value
+*
+*     SLANGT = ( max(abs(A(i,j))), NORM = 'M' or 'm'
+*              (
+*              ( norm1(A),         NORM = '1', 'O' or 'o'
+*              (
+*              ( normI(A),         NORM = 'I' or 'i'
+*              (
+*              ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
+*
+*  where  norm1  denotes the  one norm of a matrix (maximum column sum),
+*  normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
+*  normF  denotes the  Frobenius norm of a matrix (square root of sum of
+*  squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
+*
+*  Arguments
+*  =========
+*
+*  NORM    (input) CHARACTER*1
+*          Specifies the value to be returned in SLANGT as described
+*          above.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.  When N = 0, SLANGT is
+*          set to zero.
+*
+*  DL      (input) REAL array, dimension (N-1)
+*          The (n-1) sub-diagonal elements of A.
+*
+*  D       (input) REAL array, dimension (N)
+*          The diagonal elements of A.
+*
+*  DU      (input) REAL array, dimension (N-1)
+*          The (n-1) super-diagonal elements of A.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I
+      REAL               ANORM, SCALE, SUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLASSQ
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+      IF( N.LE.0 ) THEN
+         ANORM = ZERO
+      ELSE IF( LSAME( NORM, 'M' ) ) THEN
+*
+*        Find max(abs(A(i,j))).
+*
+         ANORM = ABS( D( N ) )
+         DO 10 I = 1, N - 1
+            ANORM = MAX( ANORM, ABS( DL( I ) ) )
+            ANORM = MAX( ANORM, ABS( D( I ) ) )
+            ANORM = MAX( ANORM, ABS( DU( I ) ) )
+   10    CONTINUE
+      ELSE IF( LSAME( NORM, 'O' ) .OR. NORM.EQ.'1' ) THEN
+*
+*        Find norm1(A).
+*
+         IF( N.EQ.1 ) THEN
+            ANORM = ABS( D( 1 ) )
+         ELSE
+            ANORM = MAX( ABS( D( 1 ) )+ABS( DL( 1 ) ),
+     $              ABS( D( N ) )+ABS( DU( N-1 ) ) )
+            DO 20 I = 2, N - 1
+               ANORM = MAX( ANORM, ABS( D( I ) )+ABS( DL( I ) )+
+     $                 ABS( DU( I-1 ) ) )
+   20       CONTINUE
+         END IF
+      ELSE IF( LSAME( NORM, 'I' ) ) THEN
+*
+*        Find normI(A).
+*
+         IF( N.EQ.1 ) THEN
+            ANORM = ABS( D( 1 ) )
+         ELSE
+            ANORM = MAX( ABS( D( 1 ) )+ABS( DU( 1 ) ),
+     $              ABS( D( N ) )+ABS( DL( N-1 ) ) )
+            DO 30 I = 2, N - 1
+               ANORM = MAX( ANORM, ABS( D( I ) )+ABS( DU( I ) )+
+     $                 ABS( DL( I-1 ) ) )
+   30       CONTINUE
+         END IF
+      ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
+*
+*        Find normF(A).
+*
+         SCALE = ZERO
+         SUM = ONE
+         CALL SLASSQ( N, D, 1, SCALE, SUM )
+         IF( N.GT.1 ) THEN
+            CALL SLASSQ( N-1, DL, 1, SCALE, SUM )
+            CALL SLASSQ( N-1, DU, 1, SCALE, SUM )
+         END IF
+         ANORM = SCALE*SQRT( SUM )
+      END IF
+*
+      SLANGT = ANORM
+      RETURN
+*
+*     End of SLANGT
+*
+      END
diff --git a/costa/native/external/lapack/slanhs.f b/costa/native/external/lapack/slanhs.f
new file mode 100644
index 000000000..a8c314bbe
--- /dev/null
+++ b/costa/native/external/lapack/slanhs.f
@@ -0,0 +1,142 @@
+      REAL             FUNCTION SLANHS( NORM, N, A, LDA, WORK )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          NORM
+      INTEGER            LDA, N
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLANHS  returns the value of the one norm,  or the Frobenius norm, or
+*  the  infinity norm,  or the  element of  largest absolute value  of a
+*  Hessenberg matrix A.
+*
+*  Description
+*  ===========
+*
+*  SLANHS returns the value
+*
+*     SLANHS = ( max(abs(A(i,j))), NORM = 'M' or 'm'
+*              (
+*              ( norm1(A),         NORM = '1', 'O' or 'o'
+*              (
+*              ( normI(A),         NORM = 'I' or 'i'
+*              (
+*              ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
+*
+*  where  norm1  denotes the  one norm of a matrix (maximum column sum),
+*  normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
+*  normF  denotes the  Frobenius norm of a matrix (square root of sum of
+*  squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
+*
+*  Arguments
+*  =========
+*
+*  NORM    (input) CHARACTER*1
+*          Specifies the value to be returned in SLANHS as described
+*          above.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.  When N = 0, SLANHS is
+*          set to zero.
+*
+*  A       (input) REAL array, dimension (LDA,N)
+*          The n by n upper Hessenberg matrix A; the part of A below the
+*          first sub-diagonal is not referenced.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(N,1).
+*
+*  WORK    (workspace) REAL array, dimension (LWORK),
+*          where LWORK >= N when NORM = 'I'; otherwise, WORK is not
+*          referenced.
+*
+* =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J
+      REAL               SCALE, SUM, VALUE
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLASSQ
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+      IF( N.EQ.0 ) THEN
+         VALUE = ZERO
+      ELSE IF( LSAME( NORM, 'M' ) ) THEN
+*
+*        Find max(abs(A(i,j))).
+*
+         VALUE = ZERO
+         DO 20 J = 1, N
+            DO 10 I = 1, MIN( N, J+1 )
+               VALUE = MAX( VALUE, ABS( A( I, J ) ) )
+   10       CONTINUE
+   20    CONTINUE
+      ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN
+*
+*        Find norm1(A).
+*
+         VALUE = ZERO
+         DO 40 J = 1, N
+            SUM = ZERO
+            DO 30 I = 1, MIN( N, J+1 )
+               SUM = SUM + ABS( A( I, J ) )
+   30       CONTINUE
+            VALUE = MAX( VALUE, SUM )
+   40    CONTINUE
+      ELSE IF( LSAME( NORM, 'I' ) ) THEN
+*
+*        Find normI(A).
+*
+         DO 50 I = 1, N
+            WORK( I ) = ZERO
+   50    CONTINUE
+         DO 70 J = 1, N
+            DO 60 I = 1, MIN( N, J+1 )
+               WORK( I ) = WORK( I ) + ABS( A( I, J ) )
+   60       CONTINUE
+   70    CONTINUE
+         VALUE = ZERO
+         DO 80 I = 1, N
+            VALUE = MAX( VALUE, WORK( I ) )
+   80    CONTINUE
+      ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
+*
+*        Find normF(A).
+*
+         SCALE = ZERO
+         SUM = ONE
+         DO 90 J = 1, N
+            CALL SLASSQ( MIN( N, J+1 ), A( 1, J ), 1, SCALE, SUM )
+   90    CONTINUE
+         VALUE = SCALE*SQRT( SUM )
+      END IF
+*
+      SLANHS = VALUE
+      RETURN
+*
+*     End of SLANHS
+*
+      END
diff --git a/costa/native/external/lapack/slansb.f b/costa/native/external/lapack/slansb.f
new file mode 100644
index 000000000..a0e0280c9
--- /dev/null
+++ b/costa/native/external/lapack/slansb.f
@@ -0,0 +1,187 @@
+      REAL             FUNCTION SLANSB( NORM, UPLO, N, K, AB, LDAB,
+     $                 WORK )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          NORM, UPLO
+      INTEGER            K, LDAB, N
+*     ..
+*     .. Array Arguments ..
+      REAL               AB( LDAB, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLANSB  returns the value of the one norm,  or the Frobenius norm, or
+*  the  infinity norm,  or the element of  largest absolute value  of an
+*  n by n symmetric band matrix A,  with k super-diagonals.
+*
+*  Description
+*  ===========
+*
+*  SLANSB returns the value
+*
+*     SLANSB = ( max(abs(A(i,j))), NORM = 'M' or 'm'
+*              (
+*              ( norm1(A),         NORM = '1', 'O' or 'o'
+*              (
+*              ( normI(A),         NORM = 'I' or 'i'
+*              (
+*              ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
+*
+*  where  norm1  denotes the  one norm of a matrix (maximum column sum),
+*  normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
+*  normF  denotes the  Frobenius norm of a matrix (square root of sum of
+*  squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
+*
+*  Arguments
+*  =========
+*
+*  NORM    (input) CHARACTER*1
+*          Specifies the value to be returned in SLANSB as described
+*          above.
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the upper or lower triangular part of the
+*          band matrix A is supplied.
+*          = 'U':  Upper triangular part is supplied
+*          = 'L':  Lower triangular part is supplied
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.  When N = 0, SLANSB is
+*          set to zero.
+*
+*  K       (input) INTEGER
+*          The number of super-diagonals or sub-diagonals of the
+*          band matrix A.  K >= 0.
+*
+*  AB      (input) REAL array, dimension (LDAB,N)
+*          The upper or lower triangle of the symmetric band matrix A,
+*          stored in the first K+1 rows of AB.  The j-th column of A is
+*          stored in the j-th column of the array AB as follows:
+*          if UPLO = 'U', AB(k+1+i-j,j) = A(i,j) for max(1,j-k)<=i<=j;
+*          if UPLO = 'L', AB(1+i-j,j)   = A(i,j) for j<=i<=min(n,j+k).
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= K+1.
+*
+*  WORK    (workspace) REAL array, dimension (LWORK),
+*          where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise,
+*          WORK is not referenced.
+*
+* =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J, L
+      REAL               ABSA, SCALE, SUM, VALUE
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLASSQ
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+      IF( N.EQ.0 ) THEN
+         VALUE = ZERO
+      ELSE IF( LSAME( NORM, 'M' ) ) THEN
+*
+*        Find max(abs(A(i,j))).
+*
+         VALUE = ZERO
+         IF( LSAME( UPLO, 'U' ) ) THEN
+            DO 20 J = 1, N
+               DO 10 I = MAX( K+2-J, 1 ), K + 1
+                  VALUE = MAX( VALUE, ABS( AB( I, J ) ) )
+   10          CONTINUE
+   20       CONTINUE
+         ELSE
+            DO 40 J = 1, N
+               DO 30 I = 1, MIN( N+1-J, K+1 )
+                  VALUE = MAX( VALUE, ABS( AB( I, J ) ) )
+   30          CONTINUE
+   40       CONTINUE
+         END IF
+      ELSE IF( ( LSAME( NORM, 'I' ) ) .OR. ( LSAME( NORM, 'O' ) ) .OR.
+     $         ( NORM.EQ.'1' ) ) THEN
+*
+*        Find normI(A) ( = norm1(A), since A is symmetric).
+*
+         VALUE = ZERO
+         IF( LSAME( UPLO, 'U' ) ) THEN
+            DO 60 J = 1, N
+               SUM = ZERO
+               L = K + 1 - J
+               DO 50 I = MAX( 1, J-K ), J - 1
+                  ABSA = ABS( AB( L+I, J ) )
+                  SUM = SUM + ABSA
+                  WORK( I ) = WORK( I ) + ABSA
+   50          CONTINUE
+               WORK( J ) = SUM + ABS( AB( K+1, J ) )
+   60       CONTINUE
+            DO 70 I = 1, N
+               VALUE = MAX( VALUE, WORK( I ) )
+   70       CONTINUE
+         ELSE
+            DO 80 I = 1, N
+               WORK( I ) = ZERO
+   80       CONTINUE
+            DO 100 J = 1, N
+               SUM = WORK( J ) + ABS( AB( 1, J ) )
+               L = 1 - J
+               DO 90 I = J + 1, MIN( N, J+K )
+                  ABSA = ABS( AB( L+I, J ) )
+                  SUM = SUM + ABSA
+                  WORK( I ) = WORK( I ) + ABSA
+   90          CONTINUE
+               VALUE = MAX( VALUE, SUM )
+  100       CONTINUE
+         END IF
+      ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
+*
+*        Find normF(A).
+*
+         SCALE = ZERO
+         SUM = ONE
+         IF( K.GT.0 ) THEN
+            IF( LSAME( UPLO, 'U' ) ) THEN
+               DO 110 J = 2, N
+                  CALL SLASSQ( MIN( J-1, K ), AB( MAX( K+2-J, 1 ), J ),
+     $                         1, SCALE, SUM )
+  110          CONTINUE
+               L = K + 1
+            ELSE
+               DO 120 J = 1, N - 1
+                  CALL SLASSQ( MIN( N-J, K ), AB( 2, J ), 1, SCALE,
+     $                         SUM )
+  120          CONTINUE
+               L = 1
+            END IF
+            SUM = 2*SUM
+         ELSE
+            L = 1
+         END IF
+         CALL SLASSQ( N, AB( L, 1 ), LDAB, SCALE, SUM )
+         VALUE = SCALE*SQRT( SUM )
+      END IF
+*
+      SLANSB = VALUE
+      RETURN
+*
+*     End of SLANSB
+*
+      END
diff --git a/costa/native/external/lapack/slansp.f b/costa/native/external/lapack/slansp.f
new file mode 100644
index 000000000..263728cfb
--- /dev/null
+++ b/costa/native/external/lapack/slansp.f
@@ -0,0 +1,197 @@
+      REAL             FUNCTION SLANSP( NORM, UPLO, N, AP, WORK )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          NORM, UPLO
+      INTEGER            N
+*     ..
+*     .. Array Arguments ..
+      REAL               AP( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLANSP  returns the value of the one norm,  or the Frobenius norm, or
+*  the  infinity norm,  or the  element of  largest absolute value  of a
+*  real symmetric matrix A,  supplied in packed form.
+*
+*  Description
+*  ===========
+*
+*  SLANSP returns the value
+*
+*     SLANSP = ( max(abs(A(i,j))), NORM = 'M' or 'm'
+*              (
+*              ( norm1(A),         NORM = '1', 'O' or 'o'
+*              (
+*              ( normI(A),         NORM = 'I' or 'i'
+*              (
+*              ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
+*
+*  where  norm1  denotes the  one norm of a matrix (maximum column sum),
+*  normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
+*  normF  denotes the  Frobenius norm of a matrix (square root of sum of
+*  squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
+*
+*  Arguments
+*  =========
+*
+*  NORM    (input) CHARACTER*1
+*          Specifies the value to be returned in SLANSP as described
+*          above.
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the upper or lower triangular part of the
+*          symmetric matrix A is supplied.
+*          = 'U':  Upper triangular part of A is supplied
+*          = 'L':  Lower triangular part of A is supplied
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.  When N = 0, SLANSP is
+*          set to zero.
+*
+*  AP      (input) REAL array, dimension (N*(N+1)/2)
+*          The upper or lower triangle of the symmetric matrix A, packed
+*          columnwise in a linear array.  The j-th column of A is stored
+*          in the array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n.
+*
+*  WORK    (workspace) REAL array, dimension (LWORK),
+*          where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise,
+*          WORK is not referenced.
+*
+* =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J, K
+      REAL               ABSA, SCALE, SUM, VALUE
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLASSQ
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+      IF( N.EQ.0 ) THEN
+         VALUE = ZERO
+      ELSE IF( LSAME( NORM, 'M' ) ) THEN
+*
+*        Find max(abs(A(i,j))).
+*
+         VALUE = ZERO
+         IF( LSAME( UPLO, 'U' ) ) THEN
+            K = 1
+            DO 20 J = 1, N
+               DO 10 I = K, K + J - 1
+                  VALUE = MAX( VALUE, ABS( AP( I ) ) )
+   10          CONTINUE
+               K = K + J
+   20       CONTINUE
+         ELSE
+            K = 1
+            DO 40 J = 1, N
+               DO 30 I = K, K + N - J
+                  VALUE = MAX( VALUE, ABS( AP( I ) ) )
+   30          CONTINUE
+               K = K + N - J + 1
+   40       CONTINUE
+         END IF
+      ELSE IF( ( LSAME( NORM, 'I' ) ) .OR. ( LSAME( NORM, 'O' ) ) .OR.
+     $         ( NORM.EQ.'1' ) ) THEN
+*
+*        Find normI(A) ( = norm1(A), since A is symmetric).
+*
+         VALUE = ZERO
+         K = 1
+         IF( LSAME( UPLO, 'U' ) ) THEN
+            DO 60 J = 1, N
+               SUM = ZERO
+               DO 50 I = 1, J - 1
+                  ABSA = ABS( AP( K ) )
+                  SUM = SUM + ABSA
+                  WORK( I ) = WORK( I ) + ABSA
+                  K = K + 1
+   50          CONTINUE
+               WORK( J ) = SUM + ABS( AP( K ) )
+               K = K + 1
+   60       CONTINUE
+            DO 70 I = 1, N
+               VALUE = MAX( VALUE, WORK( I ) )
+   70       CONTINUE
+         ELSE
+            DO 80 I = 1, N
+               WORK( I ) = ZERO
+   80       CONTINUE
+            DO 100 J = 1, N
+               SUM = WORK( J ) + ABS( AP( K ) )
+               K = K + 1
+               DO 90 I = J + 1, N
+                  ABSA = ABS( AP( K ) )
+                  SUM = SUM + ABSA
+                  WORK( I ) = WORK( I ) + ABSA
+                  K = K + 1
+   90          CONTINUE
+               VALUE = MAX( VALUE, SUM )
+  100       CONTINUE
+         END IF
+      ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
+*
+*        Find normF(A).
+*
+         SCALE = ZERO
+         SUM = ONE
+         K = 2
+         IF( LSAME( UPLO, 'U' ) ) THEN
+            DO 110 J = 2, N
+               CALL SLASSQ( J-1, AP( K ), 1, SCALE, SUM )
+               K = K + J
+  110       CONTINUE
+         ELSE
+            DO 120 J = 1, N - 1
+               CALL SLASSQ( N-J, AP( K ), 1, SCALE, SUM )
+               K = K + N - J + 1
+  120       CONTINUE
+         END IF
+         SUM = 2*SUM
+         K = 1
+         DO 130 I = 1, N
+            IF( AP( K ).NE.ZERO ) THEN
+               ABSA = ABS( AP( K ) )
+               IF( SCALE.LT.ABSA ) THEN
+                  SUM = ONE + SUM*( SCALE / ABSA )**2
+                  SCALE = ABSA
+               ELSE
+                  SUM = SUM + ( ABSA / SCALE )**2
+               END IF
+            END IF
+            IF( LSAME( UPLO, 'U' ) ) THEN
+               K = K + I + 1
+            ELSE
+               K = K + N - I + 1
+            END IF
+  130    CONTINUE
+         VALUE = SCALE*SQRT( SUM )
+      END IF
+*
+      SLANSP = VALUE
+      RETURN
+*
+*     End of SLANSP
+*
+      END
diff --git a/costa/native/external/lapack/slanst.f b/costa/native/external/lapack/slanst.f
new file mode 100644
index 000000000..71aa0b1d9
--- /dev/null
+++ b/costa/native/external/lapack/slanst.f
@@ -0,0 +1,125 @@
+      REAL             FUNCTION SLANST( NORM, N, D, E )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          NORM
+      INTEGER            N
+*     ..
+*     .. Array Arguments ..
+      REAL               D( * ), E( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLANST  returns the value of the one norm,  or the Frobenius norm, or
+*  the  infinity norm,  or the  element of  largest absolute value  of a
+*  real symmetric tridiagonal matrix A.
+*
+*  Description
+*  ===========
+*
+*  SLANST returns the value
+*
+*     SLANST = ( max(abs(A(i,j))), NORM = 'M' or 'm'
+*              (
+*              ( norm1(A),         NORM = '1', 'O' or 'o'
+*              (
+*              ( normI(A),         NORM = 'I' or 'i'
+*              (
+*              ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
+*
+*  where  norm1  denotes the  one norm of a matrix (maximum column sum),
+*  normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
+*  normF  denotes the  Frobenius norm of a matrix (square root of sum of
+*  squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
+*
+*  Arguments
+*  =========
+*
+*  NORM    (input) CHARACTER*1
+*          Specifies the value to be returned in SLANST as described
+*          above.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.  When N = 0, SLANST is
+*          set to zero.
+*
+*  D       (input) REAL array, dimension (N)
+*          The diagonal elements of A.
+*
+*  E       (input) REAL array, dimension (N-1)
+*          The (n-1) sub-diagonal or super-diagonal elements of A.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I
+      REAL               ANORM, SCALE, SUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLASSQ
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+      IF( N.LE.0 ) THEN
+         ANORM = ZERO
+      ELSE IF( LSAME( NORM, 'M' ) ) THEN
+*
+*        Find max(abs(A(i,j))).
+*
+         ANORM = ABS( D( N ) )
+         DO 10 I = 1, N - 1
+            ANORM = MAX( ANORM, ABS( D( I ) ) )
+            ANORM = MAX( ANORM, ABS( E( I ) ) )
+   10    CONTINUE
+      ELSE IF( LSAME( NORM, 'O' ) .OR. NORM.EQ.'1' .OR.
+     $         LSAME( NORM, 'I' ) ) THEN
+*
+*        Find norm1(A).
+*
+         IF( N.EQ.1 ) THEN
+            ANORM = ABS( D( 1 ) )
+         ELSE
+            ANORM = MAX( ABS( D( 1 ) )+ABS( E( 1 ) ),
+     $              ABS( E( N-1 ) )+ABS( D( N ) ) )
+            DO 20 I = 2, N - 1
+               ANORM = MAX( ANORM, ABS( D( I ) )+ABS( E( I ) )+
+     $                 ABS( E( I-1 ) ) )
+   20       CONTINUE
+         END IF
+      ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
+*
+*        Find normF(A).
+*
+         SCALE = ZERO
+         SUM = ONE
+         IF( N.GT.1 ) THEN
+            CALL SLASSQ( N-1, E, 1, SCALE, SUM )
+            SUM = 2*SUM
+         END IF
+         CALL SLASSQ( N, D, 1, SCALE, SUM )
+         ANORM = SCALE*SQRT( SUM )
+      END IF
+*
+      SLANST = ANORM
+      RETURN
+*
+*     End of SLANST
+*
+      END
diff --git a/costa/native/external/lapack/slansy.f b/costa/native/external/lapack/slansy.f
new file mode 100644
index 000000000..fb922ea17
--- /dev/null
+++ b/costa/native/external/lapack/slansy.f
@@ -0,0 +1,174 @@
+      REAL             FUNCTION SLANSY( NORM, UPLO, N, A, LDA, WORK )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          NORM, UPLO
+      INTEGER            LDA, N
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLANSY  returns the value of the one norm,  or the Frobenius norm, or
+*  the  infinity norm,  or the  element of  largest absolute value  of a
+*  real symmetric matrix A.
+*
+*  Description
+*  ===========
+*
+*  SLANSY returns the value
+*
+*     SLANSY = ( max(abs(A(i,j))), NORM = 'M' or 'm'
+*              (
+*              ( norm1(A),         NORM = '1', 'O' or 'o'
+*              (
+*              ( normI(A),         NORM = 'I' or 'i'
+*              (
+*              ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
+*
+*  where  norm1  denotes the  one norm of a matrix (maximum column sum),
+*  normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
+*  normF  denotes the  Frobenius norm of a matrix (square root of sum of
+*  squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
+*
+*  Arguments
+*  =========
+*
+*  NORM    (input) CHARACTER*1
+*          Specifies the value to be returned in SLANSY as described
+*          above.
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the upper or lower triangular part of the
+*          symmetric matrix A is to be referenced.
+*          = 'U':  Upper triangular part of A is referenced
+*          = 'L':  Lower triangular part of A is referenced
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.  When N = 0, SLANSY is
+*          set to zero.
+*
+*  A       (input) REAL array, dimension (LDA,N)
+*          The symmetric matrix A.  If UPLO = 'U', the leading n by n
+*          upper triangular part of A contains the upper triangular part
+*          of the matrix A, and the strictly lower triangular part of A
+*          is not referenced.  If UPLO = 'L', the leading n by n lower
+*          triangular part of A contains the lower triangular part of
+*          the matrix A, and the strictly upper triangular part of A is
+*          not referenced.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(N,1).
+*
+*  WORK    (workspace) REAL array, dimension (LWORK),
+*          where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise,
+*          WORK is not referenced.
+*
+* =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J
+      REAL               ABSA, SCALE, SUM, VALUE
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLASSQ
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+      IF( N.EQ.0 ) THEN
+         VALUE = ZERO
+      ELSE IF( LSAME( NORM, 'M' ) ) THEN
+*
+*        Find max(abs(A(i,j))).
+*
+         VALUE = ZERO
+         IF( LSAME( UPLO, 'U' ) ) THEN
+            DO 20 J = 1, N
+               DO 10 I = 1, J
+                  VALUE = MAX( VALUE, ABS( A( I, J ) ) )
+   10          CONTINUE
+   20       CONTINUE
+         ELSE
+            DO 40 J = 1, N
+               DO 30 I = J, N
+                  VALUE = MAX( VALUE, ABS( A( I, J ) ) )
+   30          CONTINUE
+   40       CONTINUE
+         END IF
+      ELSE IF( ( LSAME( NORM, 'I' ) ) .OR. ( LSAME( NORM, 'O' ) ) .OR.
+     $         ( NORM.EQ.'1' ) ) THEN
+*
+*        Find normI(A) ( = norm1(A), since A is symmetric).
+*
+         VALUE = ZERO
+         IF( LSAME( UPLO, 'U' ) ) THEN
+            DO 60 J = 1, N
+               SUM = ZERO
+               DO 50 I = 1, J - 1
+                  ABSA = ABS( A( I, J ) )
+                  SUM = SUM + ABSA
+                  WORK( I ) = WORK( I ) + ABSA
+   50          CONTINUE
+               WORK( J ) = SUM + ABS( A( J, J ) )
+   60       CONTINUE
+            DO 70 I = 1, N
+               VALUE = MAX( VALUE, WORK( I ) )
+   70       CONTINUE
+         ELSE
+            DO 80 I = 1, N
+               WORK( I ) = ZERO
+   80       CONTINUE
+            DO 100 J = 1, N
+               SUM = WORK( J ) + ABS( A( J, J ) )
+               DO 90 I = J + 1, N
+                  ABSA = ABS( A( I, J ) )
+                  SUM = SUM + ABSA
+                  WORK( I ) = WORK( I ) + ABSA
+   90          CONTINUE
+               VALUE = MAX( VALUE, SUM )
+  100       CONTINUE
+         END IF
+      ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
+*
+*        Find normF(A).
+*
+         SCALE = ZERO
+         SUM = ONE
+         IF( LSAME( UPLO, 'U' ) ) THEN
+            DO 110 J = 2, N
+               CALL SLASSQ( J-1, A( 1, J ), 1, SCALE, SUM )
+  110       CONTINUE
+         ELSE
+            DO 120 J = 1, N - 1
+               CALL SLASSQ( N-J, A( J+1, J ), 1, SCALE, SUM )
+  120       CONTINUE
+         END IF
+         SUM = 2*SUM
+         CALL SLASSQ( N, A, LDA+1, SCALE, SUM )
+         VALUE = SCALE*SQRT( SUM )
+      END IF
+*
+      SLANSY = VALUE
+      RETURN
+*
+*     End of SLANSY
+*
+      END
diff --git a/costa/native/external/lapack/slantb.f b/costa/native/external/lapack/slantb.f
new file mode 100644
index 000000000..3b01cf0ea
--- /dev/null
+++ b/costa/native/external/lapack/slantb.f
@@ -0,0 +1,285 @@
+      REAL             FUNCTION SLANTB( NORM, UPLO, DIAG, N, K, AB,
+     $                 LDAB, WORK )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIAG, NORM, UPLO
+      INTEGER            K, LDAB, N
+*     ..
+*     .. Array Arguments ..
+      REAL               AB( LDAB, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLANTB  returns the value of the one norm,  or the Frobenius norm, or
+*  the  infinity norm,  or the element of  largest absolute value  of an
+*  n by n triangular band matrix A,  with ( k + 1 ) diagonals.
+*
+*  Description
+*  ===========
+*
+*  SLANTB returns the value
+*
+*     SLANTB = ( max(abs(A(i,j))), NORM = 'M' or 'm'
+*              (
+*              ( norm1(A),         NORM = '1', 'O' or 'o'
+*              (
+*              ( normI(A),         NORM = 'I' or 'i'
+*              (
+*              ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
+*
+*  where  norm1  denotes the  one norm of a matrix (maximum column sum),
+*  normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
+*  normF  denotes the  Frobenius norm of a matrix (square root of sum of
+*  squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
+*
+*  Arguments
+*  =========
+*
+*  NORM    (input) CHARACTER*1
+*          Specifies the value to be returned in SLANTB as described
+*          above.
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the matrix A is upper or lower triangular.
+*          = 'U':  Upper triangular
+*          = 'L':  Lower triangular
+*
+*  DIAG    (input) CHARACTER*1
+*          Specifies whether or not the matrix A is unit triangular.
+*          = 'N':  Non-unit triangular
+*          = 'U':  Unit triangular
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.  When N = 0, SLANTB is
+*          set to zero.
+*
+*  K       (input) INTEGER
+*          The number of super-diagonals of the matrix A if UPLO = 'U',
+*          or the number of sub-diagonals of the matrix A if UPLO = 'L'.
+*          K >= 0.
+*
+*  AB      (input) REAL array, dimension (LDAB,N)
+*          The upper or lower triangular band matrix A, stored in the
+*          first k+1 rows of AB.  The j-th column of A is stored
+*          in the j-th column of the array AB as follows:
+*          if UPLO = 'U', AB(k+1+i-j,j) = A(i,j) for max(1,j-k)<=i<=j;
+*          if UPLO = 'L', AB(1+i-j,j)   = A(i,j) for j<=i<=min(n,j+k).
+*          Note that when DIAG = 'U', the elements of the array AB
+*          corresponding to the diagonal elements of the matrix A are
+*          not referenced, but are assumed to be one.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= K+1.
+*
+*  WORK    (workspace) REAL array, dimension (LWORK),
+*          where LWORK >= N when NORM = 'I'; otherwise, WORK is not
+*          referenced.
+*
+* =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UDIAG
+      INTEGER            I, J, L
+      REAL               SCALE, SUM, VALUE
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLASSQ
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+      IF( N.EQ.0 ) THEN
+         VALUE = ZERO
+      ELSE IF( LSAME( NORM, 'M' ) ) THEN
+*
+*        Find max(abs(A(i,j))).
+*
+         IF( LSAME( DIAG, 'U' ) ) THEN
+            VALUE = ONE
+            IF( LSAME( UPLO, 'U' ) ) THEN
+               DO 20 J = 1, N
+                  DO 10 I = MAX( K+2-J, 1 ), K
+                     VALUE = MAX( VALUE, ABS( AB( I, J ) ) )
+   10             CONTINUE
+   20          CONTINUE
+            ELSE
+               DO 40 J = 1, N
+                  DO 30 I = 2, MIN( N+1-J, K+1 )
+                     VALUE = MAX( VALUE, ABS( AB( I, J ) ) )
+   30             CONTINUE
+   40          CONTINUE
+            END IF
+         ELSE
+            VALUE = ZERO
+            IF( LSAME( UPLO, 'U' ) ) THEN
+               DO 60 J = 1, N
+                  DO 50 I = MAX( K+2-J, 1 ), K + 1
+                     VALUE = MAX( VALUE, ABS( AB( I, J ) ) )
+   50             CONTINUE
+   60          CONTINUE
+            ELSE
+               DO 80 J = 1, N
+                  DO 70 I = 1, MIN( N+1-J, K+1 )
+                     VALUE = MAX( VALUE, ABS( AB( I, J ) ) )
+   70             CONTINUE
+   80          CONTINUE
+            END IF
+         END IF
+      ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN
+*
+*        Find norm1(A).
+*
+         VALUE = ZERO
+         UDIAG = LSAME( DIAG, 'U' )
+         IF( LSAME( UPLO, 'U' ) ) THEN
+            DO 110 J = 1, N
+               IF( UDIAG ) THEN
+                  SUM = ONE
+                  DO 90 I = MAX( K+2-J, 1 ), K
+                     SUM = SUM + ABS( AB( I, J ) )
+   90             CONTINUE
+               ELSE
+                  SUM = ZERO
+                  DO 100 I = MAX( K+2-J, 1 ), K + 1
+                     SUM = SUM + ABS( AB( I, J ) )
+  100             CONTINUE
+               END IF
+               VALUE = MAX( VALUE, SUM )
+  110       CONTINUE
+         ELSE
+            DO 140 J = 1, N
+               IF( UDIAG ) THEN
+                  SUM = ONE
+                  DO 120 I = 2, MIN( N+1-J, K+1 )
+                     SUM = SUM + ABS( AB( I, J ) )
+  120             CONTINUE
+               ELSE
+                  SUM = ZERO
+                  DO 130 I = 1, MIN( N+1-J, K+1 )
+                     SUM = SUM + ABS( AB( I, J ) )
+  130             CONTINUE
+               END IF
+               VALUE = MAX( VALUE, SUM )
+  140       CONTINUE
+         END IF
+      ELSE IF( LSAME( NORM, 'I' ) ) THEN
+*
+*        Find normI(A).
+*
+         VALUE = ZERO
+         IF( LSAME( UPLO, 'U' ) ) THEN
+            IF( LSAME( DIAG, 'U' ) ) THEN
+               DO 150 I = 1, N
+                  WORK( I ) = ONE
+  150          CONTINUE
+               DO 170 J = 1, N
+                  L = K + 1 - J
+                  DO 160 I = MAX( 1, J-K ), J - 1
+                     WORK( I ) = WORK( I ) + ABS( AB( L+I, J ) )
+  160             CONTINUE
+  170          CONTINUE
+            ELSE
+               DO 180 I = 1, N
+                  WORK( I ) = ZERO
+  180          CONTINUE
+               DO 200 J = 1, N
+                  L = K + 1 - J
+                  DO 190 I = MAX( 1, J-K ), J
+                     WORK( I ) = WORK( I ) + ABS( AB( L+I, J ) )
+  190             CONTINUE
+  200          CONTINUE
+            END IF
+         ELSE
+            IF( LSAME( DIAG, 'U' ) ) THEN
+               DO 210 I = 1, N
+                  WORK( I ) = ONE
+  210          CONTINUE
+               DO 230 J = 1, N
+                  L = 1 - J
+                  DO 220 I = J + 1, MIN( N, J+K )
+                     WORK( I ) = WORK( I ) + ABS( AB( L+I, J ) )
+  220             CONTINUE
+  230          CONTINUE
+            ELSE
+               DO 240 I = 1, N
+                  WORK( I ) = ZERO
+  240          CONTINUE
+               DO 260 J = 1, N
+                  L = 1 - J
+                  DO 250 I = J, MIN( N, J+K )
+                     WORK( I ) = WORK( I ) + ABS( AB( L+I, J ) )
+  250             CONTINUE
+  260          CONTINUE
+            END IF
+         END IF
+         DO 270 I = 1, N
+            VALUE = MAX( VALUE, WORK( I ) )
+  270    CONTINUE
+      ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
+*
+*        Find normF(A).
+*
+         IF( LSAME( UPLO, 'U' ) ) THEN
+            IF( LSAME( DIAG, 'U' ) ) THEN
+               SCALE = ONE
+               SUM = N
+               IF( K.GT.0 ) THEN
+                  DO 280 J = 2, N
+                     CALL SLASSQ( MIN( J-1, K ),
+     $                            AB( MAX( K+2-J, 1 ), J ), 1, SCALE,
+     $                            SUM )
+  280             CONTINUE
+               END IF
+            ELSE
+               SCALE = ZERO
+               SUM = ONE
+               DO 290 J = 1, N
+                  CALL SLASSQ( MIN( J, K+1 ), AB( MAX( K+2-J, 1 ), J ),
+     $                         1, SCALE, SUM )
+  290          CONTINUE
+            END IF
+         ELSE
+            IF( LSAME( DIAG, 'U' ) ) THEN
+               SCALE = ONE
+               SUM = N
+               IF( K.GT.0 ) THEN
+                  DO 300 J = 1, N - 1
+                     CALL SLASSQ( MIN( N-J, K ), AB( 2, J ), 1, SCALE,
+     $                            SUM )
+  300             CONTINUE
+               END IF
+            ELSE
+               SCALE = ZERO
+               SUM = ONE
+               DO 310 J = 1, N
+                  CALL SLASSQ( MIN( N-J+1, K+1 ), AB( 1, J ), 1, SCALE,
+     $                         SUM )
+  310          CONTINUE
+            END IF
+         END IF
+         VALUE = SCALE*SQRT( SUM )
+      END IF
+*
+      SLANTB = VALUE
+      RETURN
+*
+*     End of SLANTB
+*
+      END
diff --git a/costa/native/external/lapack/slantp.f b/costa/native/external/lapack/slantp.f
new file mode 100644
index 000000000..d034508d5
--- /dev/null
+++ b/costa/native/external/lapack/slantp.f
@@ -0,0 +1,286 @@
+      REAL             FUNCTION SLANTP( NORM, UPLO, DIAG, N, AP, WORK )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIAG, NORM, UPLO
+      INTEGER            N
+*     ..
+*     .. Array Arguments ..
+      REAL               AP( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLANTP  returns the value of the one norm,  or the Frobenius norm, or
+*  the  infinity norm,  or the  element of  largest absolute value  of a
+*  triangular matrix A, supplied in packed form.
+*
+*  Description
+*  ===========
+*
+*  SLANTP returns the value
+*
+*     SLANTP = ( max(abs(A(i,j))), NORM = 'M' or 'm'
+*              (
+*              ( norm1(A),         NORM = '1', 'O' or 'o'
+*              (
+*              ( normI(A),         NORM = 'I' or 'i'
+*              (
+*              ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
+*
+*  where  norm1  denotes the  one norm of a matrix (maximum column sum),
+*  normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
+*  normF  denotes the  Frobenius norm of a matrix (square root of sum of
+*  squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
+*
+*  Arguments
+*  =========
+*
+*  NORM    (input) CHARACTER*1
+*          Specifies the value to be returned in SLANTP as described
+*          above.
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the matrix A is upper or lower triangular.
+*          = 'U':  Upper triangular
+*          = 'L':  Lower triangular
+*
+*  DIAG    (input) CHARACTER*1
+*          Specifies whether or not the matrix A is unit triangular.
+*          = 'N':  Non-unit triangular
+*          = 'U':  Unit triangular
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.  When N = 0, SLANTP is
+*          set to zero.
+*
+*  AP      (input) REAL array, dimension (N*(N+1)/2)
+*          The upper or lower triangular matrix A, packed columnwise in
+*          a linear array.  The j-th column of A is stored in the array
+*          AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n.
+*          Note that when DIAG = 'U', the elements of the array AP
+*          corresponding to the diagonal elements of the matrix A are
+*          not referenced, but are assumed to be one.
+*
+*  WORK    (workspace) REAL array, dimension (LWORK),
+*          where LWORK >= N when NORM = 'I'; otherwise, WORK is not
+*          referenced.
+*
+* =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UDIAG
+      INTEGER            I, J, K
+      REAL               SCALE, SUM, VALUE
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLASSQ
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+      IF( N.EQ.0 ) THEN
+         VALUE = ZERO
+      ELSE IF( LSAME( NORM, 'M' ) ) THEN
+*
+*        Find max(abs(A(i,j))).
+*
+         K = 1
+         IF( LSAME( DIAG, 'U' ) ) THEN
+            VALUE = ONE
+            IF( LSAME( UPLO, 'U' ) ) THEN
+               DO 20 J = 1, N
+                  DO 10 I = K, K + J - 2
+                     VALUE = MAX( VALUE, ABS( AP( I ) ) )
+   10             CONTINUE
+                  K = K + J
+   20          CONTINUE
+            ELSE
+               DO 40 J = 1, N
+                  DO 30 I = K + 1, K + N - J
+                     VALUE = MAX( VALUE, ABS( AP( I ) ) )
+   30             CONTINUE
+                  K = K + N - J + 1
+   40          CONTINUE
+            END IF
+         ELSE
+            VALUE = ZERO
+            IF( LSAME( UPLO, 'U' ) ) THEN
+               DO 60 J = 1, N
+                  DO 50 I = K, K + J - 1
+                     VALUE = MAX( VALUE, ABS( AP( I ) ) )
+   50             CONTINUE
+                  K = K + J
+   60          CONTINUE
+            ELSE
+               DO 80 J = 1, N
+                  DO 70 I = K, K + N - J
+                     VALUE = MAX( VALUE, ABS( AP( I ) ) )
+   70             CONTINUE
+                  K = K + N - J + 1
+   80          CONTINUE
+            END IF
+         END IF
+      ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN
+*
+*        Find norm1(A).
+*
+         VALUE = ZERO
+         K = 1
+         UDIAG = LSAME( DIAG, 'U' )
+         IF( LSAME( UPLO, 'U' ) ) THEN
+            DO 110 J = 1, N
+               IF( UDIAG ) THEN
+                  SUM = ONE
+                  DO 90 I = K, K + J - 2
+                     SUM = SUM + ABS( AP( I ) )
+   90             CONTINUE
+               ELSE
+                  SUM = ZERO
+                  DO 100 I = K, K + J - 1
+                     SUM = SUM + ABS( AP( I ) )
+  100             CONTINUE
+               END IF
+               K = K + J
+               VALUE = MAX( VALUE, SUM )
+  110       CONTINUE
+         ELSE
+            DO 140 J = 1, N
+               IF( UDIAG ) THEN
+                  SUM = ONE
+                  DO 120 I = K + 1, K + N - J
+                     SUM = SUM + ABS( AP( I ) )
+  120             CONTINUE
+               ELSE
+                  SUM = ZERO
+                  DO 130 I = K, K + N - J
+                     SUM = SUM + ABS( AP( I ) )
+  130             CONTINUE
+               END IF
+               K = K + N - J + 1
+               VALUE = MAX( VALUE, SUM )
+  140       CONTINUE
+         END IF
+      ELSE IF( LSAME( NORM, 'I' ) ) THEN
+*
+*        Find normI(A).
+*
+         K = 1
+         IF( LSAME( UPLO, 'U' ) ) THEN
+            IF( LSAME( DIAG, 'U' ) ) THEN
+               DO 150 I = 1, N
+                  WORK( I ) = ONE
+  150          CONTINUE
+               DO 170 J = 1, N
+                  DO 160 I = 1, J - 1
+                     WORK( I ) = WORK( I ) + ABS( AP( K ) )
+                     K = K + 1
+  160             CONTINUE
+                  K = K + 1
+  170          CONTINUE
+            ELSE
+               DO 180 I = 1, N
+                  WORK( I ) = ZERO
+  180          CONTINUE
+               DO 200 J = 1, N
+                  DO 190 I = 1, J
+                     WORK( I ) = WORK( I ) + ABS( AP( K ) )
+                     K = K + 1
+  190             CONTINUE
+  200          CONTINUE
+            END IF
+         ELSE
+            IF( LSAME( DIAG, 'U' ) ) THEN
+               DO 210 I = 1, N
+                  WORK( I ) = ONE
+  210          CONTINUE
+               DO 230 J = 1, N
+                  K = K + 1
+                  DO 220 I = J + 1, N
+                     WORK( I ) = WORK( I ) + ABS( AP( K ) )
+                     K = K + 1
+  220             CONTINUE
+  230          CONTINUE
+            ELSE
+               DO 240 I = 1, N
+                  WORK( I ) = ZERO
+  240          CONTINUE
+               DO 260 J = 1, N
+                  DO 250 I = J, N
+                     WORK( I ) = WORK( I ) + ABS( AP( K ) )
+                     K = K + 1
+  250             CONTINUE
+  260          CONTINUE
+            END IF
+         END IF
+         VALUE = ZERO
+         DO 270 I = 1, N
+            VALUE = MAX( VALUE, WORK( I ) )
+  270    CONTINUE
+      ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
+*
+*        Find normF(A).
+*
+         IF( LSAME( UPLO, 'U' ) ) THEN
+            IF( LSAME( DIAG, 'U' ) ) THEN
+               SCALE = ONE
+               SUM = N
+               K = 2
+               DO 280 J = 2, N
+                  CALL SLASSQ( J-1, AP( K ), 1, SCALE, SUM )
+                  K = K + J
+  280          CONTINUE
+            ELSE
+               SCALE = ZERO
+               SUM = ONE
+               K = 1
+               DO 290 J = 1, N
+                  CALL SLASSQ( J, AP( K ), 1, SCALE, SUM )
+                  K = K + J
+  290          CONTINUE
+            END IF
+         ELSE
+            IF( LSAME( DIAG, 'U' ) ) THEN
+               SCALE = ONE
+               SUM = N
+               K = 2
+               DO 300 J = 1, N - 1
+                  CALL SLASSQ( N-J, AP( K ), 1, SCALE, SUM )
+                  K = K + N - J + 1
+  300          CONTINUE
+            ELSE
+               SCALE = ZERO
+               SUM = ONE
+               K = 1
+               DO 310 J = 1, N
+                  CALL SLASSQ( N-J+1, AP( K ), 1, SCALE, SUM )
+                  K = K + N - J + 1
+  310          CONTINUE
+            END IF
+         END IF
+         VALUE = SCALE*SQRT( SUM )
+      END IF
+*
+      SLANTP = VALUE
+      RETURN
+*
+*     End of SLANTP
+*
+      END
diff --git a/costa/native/external/lapack/slantr.f b/costa/native/external/lapack/slantr.f
new file mode 100644
index 000000000..89e6afd34
--- /dev/null
+++ b/costa/native/external/lapack/slantr.f
@@ -0,0 +1,277 @@
+      REAL             FUNCTION SLANTR( NORM, UPLO, DIAG, M, N, A, LDA,
+     $                 WORK )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIAG, NORM, UPLO
+      INTEGER            LDA, M, N
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLANTR  returns the value of the one norm,  or the Frobenius norm, or
+*  the  infinity norm,  or the  element of  largest absolute value  of a
+*  trapezoidal or triangular matrix A.
+*
+*  Description
+*  ===========
+*
+*  SLANTR returns the value
+*
+*     SLANTR = ( max(abs(A(i,j))), NORM = 'M' or 'm'
+*              (
+*              ( norm1(A),         NORM = '1', 'O' or 'o'
+*              (
+*              ( normI(A),         NORM = 'I' or 'i'
+*              (
+*              ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
+*
+*  where  norm1  denotes the  one norm of a matrix (maximum column sum),
+*  normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
+*  normF  denotes the  Frobenius norm of a matrix (square root of sum of
+*  squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
+*
+*  Arguments
+*  =========
+*
+*  NORM    (input) CHARACTER*1
+*          Specifies the value to be returned in SLANTR as described
+*          above.
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the matrix A is upper or lower trapezoidal.
+*          = 'U':  Upper trapezoidal
+*          = 'L':  Lower trapezoidal
+*          Note that A is triangular instead of trapezoidal if M = N.
+*
+*  DIAG    (input) CHARACTER*1
+*          Specifies whether or not the matrix A has unit diagonal.
+*          = 'N':  Non-unit diagonal
+*          = 'U':  Unit diagonal
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0, and if
+*          UPLO = 'U', M <= N.  When M = 0, SLANTR is set to zero.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0, and if
+*          UPLO = 'L', N <= M.  When N = 0, SLANTR is set to zero.
+*
+*  A       (input) REAL array, dimension (LDA,N)
+*          The trapezoidal matrix A (A is triangular if M = N).
+*          If UPLO = 'U', the leading m by n upper trapezoidal part of
+*          the array A contains the upper trapezoidal matrix, and the
+*          strictly lower triangular part of A is not referenced.
+*          If UPLO = 'L', the leading m by n lower trapezoidal part of
+*          the array A contains the lower trapezoidal matrix, and the
+*          strictly upper triangular part of A is not referenced.  Note
+*          that when DIAG = 'U', the diagonal elements of A are not
+*          referenced and are assumed to be one.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(M,1).
+*
+*  WORK    (workspace) REAL array, dimension (LWORK),
+*          where LWORK >= M when NORM = 'I'; otherwise, WORK is not
+*          referenced.
+*
+* =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UDIAG
+      INTEGER            I, J
+      REAL               SCALE, SUM, VALUE
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLASSQ
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+      IF( MIN( M, N ).EQ.0 ) THEN
+         VALUE = ZERO
+      ELSE IF( LSAME( NORM, 'M' ) ) THEN
+*
+*        Find max(abs(A(i,j))).
+*
+         IF( LSAME( DIAG, 'U' ) ) THEN
+            VALUE = ONE
+            IF( LSAME( UPLO, 'U' ) ) THEN
+               DO 20 J = 1, N
+                  DO 10 I = 1, MIN( M, J-1 )
+                     VALUE = MAX( VALUE, ABS( A( I, J ) ) )
+   10             CONTINUE
+   20          CONTINUE
+            ELSE
+               DO 40 J = 1, N
+                  DO 30 I = J + 1, M
+                     VALUE = MAX( VALUE, ABS( A( I, J ) ) )
+   30             CONTINUE
+   40          CONTINUE
+            END IF
+         ELSE
+            VALUE = ZERO
+            IF( LSAME( UPLO, 'U' ) ) THEN
+               DO 60 J = 1, N
+                  DO 50 I = 1, MIN( M, J )
+                     VALUE = MAX( VALUE, ABS( A( I, J ) ) )
+   50             CONTINUE
+   60          CONTINUE
+            ELSE
+               DO 80 J = 1, N
+                  DO 70 I = J, M
+                     VALUE = MAX( VALUE, ABS( A( I, J ) ) )
+   70             CONTINUE
+   80          CONTINUE
+            END IF
+         END IF
+      ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN
+*
+*        Find norm1(A).
+*
+         VALUE = ZERO
+         UDIAG = LSAME( DIAG, 'U' )
+         IF( LSAME( UPLO, 'U' ) ) THEN
+            DO 110 J = 1, N
+               IF( ( UDIAG ) .AND. ( J.LE.M ) ) THEN
+                  SUM = ONE
+                  DO 90 I = 1, J - 1
+                     SUM = SUM + ABS( A( I, J ) )
+   90             CONTINUE
+               ELSE
+                  SUM = ZERO
+                  DO 100 I = 1, MIN( M, J )
+                     SUM = SUM + ABS( A( I, J ) )
+  100             CONTINUE
+               END IF
+               VALUE = MAX( VALUE, SUM )
+  110       CONTINUE
+         ELSE
+            DO 140 J = 1, N
+               IF( UDIAG ) THEN
+                  SUM = ONE
+                  DO 120 I = J + 1, M
+                     SUM = SUM + ABS( A( I, J ) )
+  120             CONTINUE
+               ELSE
+                  SUM = ZERO
+                  DO 130 I = J, M
+                     SUM = SUM + ABS( A( I, J ) )
+  130             CONTINUE
+               END IF
+               VALUE = MAX( VALUE, SUM )
+  140       CONTINUE
+         END IF
+      ELSE IF( LSAME( NORM, 'I' ) ) THEN
+*
+*        Find normI(A).
+*
+         IF( LSAME( UPLO, 'U' ) ) THEN
+            IF( LSAME( DIAG, 'U' ) ) THEN
+               DO 150 I = 1, M
+                  WORK( I ) = ONE
+  150          CONTINUE
+               DO 170 J = 1, N
+                  DO 160 I = 1, MIN( M, J-1 )
+                     WORK( I ) = WORK( I ) + ABS( A( I, J ) )
+  160             CONTINUE
+  170          CONTINUE
+            ELSE
+               DO 180 I = 1, M
+                  WORK( I ) = ZERO
+  180          CONTINUE
+               DO 200 J = 1, N
+                  DO 190 I = 1, MIN( M, J )
+                     WORK( I ) = WORK( I ) + ABS( A( I, J ) )
+  190             CONTINUE
+  200          CONTINUE
+            END IF
+         ELSE
+            IF( LSAME( DIAG, 'U' ) ) THEN
+               DO 210 I = 1, N
+                  WORK( I ) = ONE
+  210          CONTINUE
+               DO 220 I = N + 1, M
+                  WORK( I ) = ZERO
+  220          CONTINUE
+               DO 240 J = 1, N
+                  DO 230 I = J + 1, M
+                     WORK( I ) = WORK( I ) + ABS( A( I, J ) )
+  230             CONTINUE
+  240          CONTINUE
+            ELSE
+               DO 250 I = 1, M
+                  WORK( I ) = ZERO
+  250          CONTINUE
+               DO 270 J = 1, N
+                  DO 260 I = J, M
+                     WORK( I ) = WORK( I ) + ABS( A( I, J ) )
+  260             CONTINUE
+  270          CONTINUE
+            END IF
+         END IF
+         VALUE = ZERO
+         DO 280 I = 1, M
+            VALUE = MAX( VALUE, WORK( I ) )
+  280    CONTINUE
+      ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
+*
+*        Find normF(A).
+*
+         IF( LSAME( UPLO, 'U' ) ) THEN
+            IF( LSAME( DIAG, 'U' ) ) THEN
+               SCALE = ONE
+               SUM = MIN( M, N )
+               DO 290 J = 2, N
+                  CALL SLASSQ( MIN( M, J-1 ), A( 1, J ), 1, SCALE, SUM )
+  290          CONTINUE
+            ELSE
+               SCALE = ZERO
+               SUM = ONE
+               DO 300 J = 1, N
+                  CALL SLASSQ( MIN( M, J ), A( 1, J ), 1, SCALE, SUM )
+  300          CONTINUE
+            END IF
+         ELSE
+            IF( LSAME( DIAG, 'U' ) ) THEN
+               SCALE = ONE
+               SUM = MIN( M, N )
+               DO 310 J = 1, N
+                  CALL SLASSQ( M-J, A( MIN( M, J+1 ), J ), 1, SCALE,
+     $                         SUM )
+  310          CONTINUE
+            ELSE
+               SCALE = ZERO
+               SUM = ONE
+               DO 320 J = 1, N
+                  CALL SLASSQ( M-J+1, A( J, J ), 1, SCALE, SUM )
+  320          CONTINUE
+            END IF
+         END IF
+         VALUE = SCALE*SQRT( SUM )
+      END IF
+*
+      SLANTR = VALUE
+      RETURN
+*
+*     End of SLANTR
+*
+      END
diff --git a/costa/native/external/lapack/slanv2.f b/costa/native/external/lapack/slanv2.f
new file mode 100644
index 000000000..fdf81f56b
--- /dev/null
+++ b/costa/native/external/lapack/slanv2.f
@@ -0,0 +1,206 @@
+      SUBROUTINE SLANV2( A, B, C, D, RT1R, RT1I, RT2R, RT2I, CS, SN )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      REAL               A, B, C, CS, D, RT1I, RT1R, RT2I, RT2R, SN
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLANV2 computes the Schur factorization of a real 2-by-2 nonsymmetric
+*  matrix in standard form:
+*
+*       [ A  B ] = [ CS -SN ] [ AA  BB ] [ CS  SN ]
+*       [ C  D ]   [ SN  CS ] [ CC  DD ] [-SN  CS ]
+*
+*  where either
+*  1) CC = 0 so that AA and DD are real eigenvalues of the matrix, or
+*  2) AA = DD and BB*CC < 0, so that AA + or - sqrt(BB*CC) are complex
+*  conjugate eigenvalues.
+*
+*  Arguments
+*  =========
+*
+*  A       (input/output) REAL            
+*  B       (input/output) REAL            
+*  C       (input/output) REAL            
+*  D       (input/output) REAL            
+*          On entry, the elements of the input matrix.
+*          On exit, they are overwritten by the elements of the
+*          standardised Schur form.
+*
+*  RT1R    (output) REAL 
+*  RT1I    (output) REAL            
+*  RT2R    (output) REAL            
+*  RT2I    (output) REAL            
+*          The real and imaginary parts of the eigenvalues. If the
+*          eigenvalues are a complex conjugate pair, RT1I > 0.
+*
+*  CS      (output) REAL            
+*  SN      (output) REAL            
+*          Parameters of the rotation matrix.
+*
+*  Further Details
+*  ===============
+*
+*  Modified by V. Sima, Research Institute for Informatics, Bucharest,
+*  Romania, to reduce the risk of cancellation errors,
+*  when computing real eigenvalues, and to ensure, if possible, that
+*  abs(RT1R) >= abs(RT2R).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, HALF, ONE
+      PARAMETER          ( ZERO = 0.0E+0, HALF = 0.5E+0, ONE = 1.0E+0 )
+      REAL               MULTPL
+      PARAMETER          ( MULTPL = 4.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      REAL               AA, BB, BCMAX, BCMIS, CC, CS1, DD, EPS, P, SAB, 
+     $                   SAC, SCALE, SIGMA, SN1, TAU, TEMP, Z
+*     ..
+*     .. External Functions ..
+      REAL               SLAMCH, SLAPY2
+      EXTERNAL           SLAMCH, SLAPY2
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN, SIGN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+      EPS = SLAMCH( 'P' )
+      IF( C.EQ.ZERO ) THEN
+         CS = ONE
+         SN = ZERO
+         GO TO 10
+*
+      ELSE IF( B.EQ.ZERO ) THEN
+*
+*        Swap rows and columns
+*
+         CS = ZERO
+         SN = ONE
+         TEMP = D
+         D = A
+         A = TEMP
+         B = -C
+         C = ZERO
+         GO TO 10
+      ELSE IF( (A-D).EQ.ZERO .AND. SIGN( ONE, B ).NE.
+     $   SIGN( ONE, C ) ) THEN
+         CS = ONE
+         SN = ZERO
+         GO TO 10
+      ELSE
+*
+         TEMP = A - D
+         P = HALF*TEMP
+         BCMAX = MAX( ABS( B ), ABS( C ) )
+         BCMIS = MIN( ABS( B ), ABS( C ) )*SIGN( ONE, B )*SIGN( ONE, C )
+         SCALE = MAX( ABS( P ), BCMAX )
+         Z = ( P / SCALE )*P + ( BCMAX / SCALE )*BCMIS
+*
+*        If Z is of the order of the machine accuracy, postpone the
+*        decision on the nature of eigenvalues
+*
+         IF( Z.GE.MULTPL*EPS ) THEN
+*
+*           Real eigenvalues. Compute A and D.
+*
+            Z = P + SIGN( SQRT( SCALE )*SQRT( Z ), P )
+            A = D + Z
+            D = D - ( BCMAX / Z )*BCMIS
+*
+*           Compute B and the rotation matrix
+*
+            TAU = SLAPY2( C, Z )
+            CS = Z / TAU
+            SN = C / TAU
+            B = B - C
+            C = ZERO
+         ELSE
+*
+*           Complex eigenvalues, or real (almost) equal eigenvalues.
+*           Make diagonal elements equal.
+*
+            SIGMA = B + C
+            TAU = SLAPY2( SIGMA, TEMP )
+            CS = SQRT( HALF*( ONE+ABS( SIGMA ) / TAU ) )
+            SN = -( P / ( TAU*CS ) )*SIGN( ONE, SIGMA )
+*
+*           Compute [ AA  BB ] = [ A  B ] [ CS -SN ]
+*                   [ CC  DD ]   [ C  D ] [ SN  CS ]
+*
+            AA = A*CS + B*SN
+            BB = -A*SN + B*CS
+            CC = C*CS + D*SN
+            DD = -C*SN + D*CS
+*
+*           Compute [ A  B ] = [ CS  SN ] [ AA  BB ]
+*                   [ C  D ]   [-SN  CS ] [ CC  DD ]
+*
+            A = AA*CS + CC*SN
+            B = BB*CS + DD*SN
+            C = -AA*SN + CC*CS
+            D = -BB*SN + DD*CS
+*
+            TEMP = HALF*( A+D )
+            A = TEMP
+            D = TEMP
+*
+            IF( C.NE.ZERO ) THEN
+               IF( B.NE.ZERO ) THEN
+                  IF( SIGN( ONE, B ).EQ.SIGN( ONE, C ) ) THEN
+*
+*                    Real eigenvalues: reduce to upper triangular form
+*
+                     SAB = SQRT( ABS( B ) )
+                     SAC = SQRT( ABS( C ) )
+                     P = SIGN( SAB*SAC, C )
+                     TAU = ONE / SQRT( ABS( B+C ) )
+                     A = TEMP + P
+                     D = TEMP - P
+                     B = B - C
+                     C = ZERO
+                     CS1 = SAB*TAU
+                     SN1 = SAC*TAU
+                     TEMP = CS*CS1 - SN*SN1
+                     SN = CS*SN1 + SN*CS1
+                     CS = TEMP
+                  END IF
+               ELSE
+                  B = -C
+                  C = ZERO
+                  TEMP = CS
+                  CS = -SN
+                  SN = TEMP
+               END IF
+            END IF
+         END IF
+*
+      END IF
+*
+   10 CONTINUE
+*
+*     Store eigenvalues in (RT1R,RT1I) and (RT2R,RT2I).
+*
+      RT1R = A
+      RT2R = D
+      IF( C.EQ.ZERO ) THEN
+         RT1I = ZERO
+         RT2I = ZERO
+      ELSE
+         RT1I = SQRT( ABS( B ) )*SQRT( ABS( C ) )
+         RT2I = -RT1I
+      END IF
+      RETURN
+*
+*     End of SLANV2
+*
+      END
diff --git a/costa/native/external/lapack/slapll.f b/costa/native/external/lapack/slapll.f
new file mode 100644
index 000000000..790c8473a
--- /dev/null
+++ b/costa/native/external/lapack/slapll.f
@@ -0,0 +1,100 @@
+      SUBROUTINE SLAPLL( N, X, INCX, Y, INCY, SSMIN )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993
+*
+*     .. Scalar Arguments ..
+      INTEGER            INCX, INCY, N
+      REAL               SSMIN
+*     ..
+*     .. Array Arguments ..
+      REAL               X( * ), Y( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  Given two column vectors X and Y, let
+*
+*                       A = ( X Y ).
+*
+*  The subroutine first computes the QR factorization of A = Q*R,
+*  and then computes the SVD of the 2-by-2 upper triangular matrix R.
+*  The smaller singular value of R is returned in SSMIN, which is used
+*  as the measurement of the linear dependency of the vectors X and Y.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The length of the vectors X and Y.
+*
+*  X       (input/output) REAL array,
+*                         dimension (1+(N-1)*INCX)
+*          On entry, X contains the N-vector X.
+*          On exit, X is overwritten.
+*
+*  INCX    (input) INTEGER
+*          The increment between successive elements of X. INCX > 0.
+*
+*  Y       (input/output) REAL array,
+*                         dimension (1+(N-1)*INCY)
+*          On entry, Y contains the N-vector Y.
+*          On exit, Y is overwritten.
+*
+*  INCY    (input) INTEGER
+*          The increment between successive elements of Y. INCY > 0.
+*
+*  SSMIN   (output) REAL
+*          The smallest singular value of the N-by-2 matrix A = ( X Y ).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      REAL               A11, A12, A22, C, SSMAX, TAU
+*     ..
+*     .. External Functions ..
+      REAL               SDOT
+      EXTERNAL           SDOT
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SAXPY, SLARFG, SLAS2
+*     ..
+*     .. Executable Statements ..
+*
+*     Quick return if possible
+*
+      IF( N.LE.1 ) THEN
+         SSMIN = ZERO
+         RETURN
+      END IF
+*
+*     Compute the QR factorization of the N-by-2 matrix ( X Y )
+*
+      CALL SLARFG( N, X( 1 ), X( 1+INCX ), INCX, TAU )
+      A11 = X( 1 )
+      X( 1 ) = ONE
+*
+      C = -TAU*SDOT( N, X, INCX, Y, INCY )
+      CALL SAXPY( N, C, X, INCX, Y, INCY )
+*
+      CALL SLARFG( N-1, Y( 1+INCY ), Y( 1+2*INCY ), INCY, TAU )
+*
+      A12 = Y( 1 )
+      A22 = Y( 1+INCY )
+*
+*     Compute the SVD of 2-by-2 Upper triangular matrix.
+*
+      CALL SLAS2( A11, A12, A22, SSMIN, SSMAX )
+*
+      RETURN
+*
+*     End of SLAPLL
+*
+      END
diff --git a/costa/native/external/lapack/slapmt.f b/costa/native/external/lapack/slapmt.f
new file mode 100644
index 000000000..13be7b2d4
--- /dev/null
+++ b/costa/native/external/lapack/slapmt.f
@@ -0,0 +1,135 @@
+      SUBROUTINE SLAPMT( FORWRD, M, N, X, LDX, K )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993
+*
+*     .. Scalar Arguments ..
+      LOGICAL            FORWRD
+      INTEGER            LDX, M, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            K( * )
+      REAL               X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLAPMT rearranges the columns of the M by N matrix X as specified
+*  by the permutation K(1),K(2),...,K(N) of the integers 1,...,N.
+*  If FORWRD = .TRUE.,  forward permutation:
+*
+*       X(*,K(J)) is moved X(*,J) for J = 1,2,...,N.
+*
+*  If FORWRD = .FALSE., backward permutation:
+*
+*       X(*,J) is moved to X(*,K(J)) for J = 1,2,...,N.
+*
+*  Arguments
+*  =========
+*
+*  FORWRD  (input) LOGICAL
+*          = .TRUE., forward permutation
+*          = .FALSE., backward permutation
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix X. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix X. N >= 0.
+*
+*  X       (input/output) REAL array, dimension (LDX,N)
+*          On entry, the M by N matrix X.
+*          On exit, X contains the permuted matrix X.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X, LDX >= MAX(1,M).
+*
+*  K       (input) INTEGER array, dimension (N)
+*          On entry, K contains the permutation vector.
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      INTEGER            I, II, J, IN
+      REAL               TEMP
+*     ..
+*     .. Executable Statements ..
+*
+      IF( N.LE.1 )
+     $   RETURN
+*
+      DO 10 I = 1, N
+         K( I ) = -K( I )
+   10 CONTINUE
+*
+      IF( FORWRD ) THEN
+*
+*        Forward permutation
+*
+         DO 60 I = 1, N
+*
+            IF( K( I ).GT.0 )
+     $         GO TO 40
+*
+            J = I
+            K( J ) = -K( J )
+            IN = K( J )
+*
+   20       CONTINUE
+            IF( K( IN ).GT.0 )
+     $         GO TO 40
+*
+            DO 30 II = 1, M
+               TEMP = X( II, J )
+               X( II, J ) = X( II, IN )
+               X( II, IN ) = TEMP
+   30       CONTINUE
+*
+            K( IN ) = -K( IN )
+            J = IN
+            IN = K( IN )
+            GO TO 20
+*
+   40       CONTINUE
+*
+   60    CONTINUE
+*
+      ELSE
+*
+*        Backward permutation
+*
+         DO 110 I = 1, N
+*
+            IF( K( I ).GT.0 )
+     $         GO TO 100
+*
+            K( I ) = -K( I )
+            J = K( I )
+   80       CONTINUE
+            IF( J.EQ.I )
+     $         GO TO 100
+*
+            DO 90 II = 1, M
+               TEMP = X( II, I )
+               X( II, I ) = X( II, J )
+               X( II, J ) = TEMP
+   90       CONTINUE
+*
+            K( J ) = -K( J )
+            J = K( J )
+            GO TO 80
+*
+  100       CONTINUE
+
+  110    CONTINUE
+*
+      END IF
+*
+      RETURN
+*
+*     End of SLAPMT
+*
+      END
diff --git a/costa/native/external/lapack/slapy2.f b/costa/native/external/lapack/slapy2.f
new file mode 100644
index 000000000..b3f26b7e8
--- /dev/null
+++ b/costa/native/external/lapack/slapy2.f
@@ -0,0 +1,54 @@
+      REAL             FUNCTION SLAPY2( X, Y )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      REAL               X, Y
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLAPY2 returns sqrt(x**2+y**2), taking care not to cause unnecessary
+*  overflow.
+*
+*  Arguments
+*  =========
+*
+*  X       (input) REAL
+*  Y       (input) REAL
+*          X and Y specify the values x and y.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO
+      PARAMETER          ( ZERO = 0.0E0 )
+      REAL               ONE
+      PARAMETER          ( ONE = 1.0E0 )
+*     ..
+*     .. Local Scalars ..
+      REAL               W, XABS, YABS, Z
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+      XABS = ABS( X )
+      YABS = ABS( Y )
+      W = MAX( XABS, YABS )
+      Z = MIN( XABS, YABS )
+      IF( Z.EQ.ZERO ) THEN
+         SLAPY2 = W
+      ELSE
+         SLAPY2 = W*SQRT( ONE+( Z / W )**2 )
+      END IF
+      RETURN
+*
+*     End of SLAPY2
+*
+      END
diff --git a/costa/native/external/lapack/slapy3.f b/costa/native/external/lapack/slapy3.f
new file mode 100644
index 000000000..06b97237f
--- /dev/null
+++ b/costa/native/external/lapack/slapy3.f
@@ -0,0 +1,54 @@
+      REAL             FUNCTION SLAPY3( X, Y, Z )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      REAL               X, Y, Z
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLAPY3 returns sqrt(x**2+y**2+z**2), taking care not to cause
+*  unnecessary overflow.
+*
+*  Arguments
+*  =========
+*
+*  X       (input) REAL
+*  Y       (input) REAL
+*  Z       (input) REAL
+*          X, Y and Z specify the values x, y and z.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO
+      PARAMETER          ( ZERO = 0.0E0 )
+*     ..
+*     .. Local Scalars ..
+      REAL               W, XABS, YABS, ZABS
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+      XABS = ABS( X )
+      YABS = ABS( Y )
+      ZABS = ABS( Z )
+      W = MAX( XABS, YABS, ZABS )
+      IF( W.EQ.ZERO ) THEN
+         SLAPY3 = ZERO
+      ELSE
+         SLAPY3 = W*SQRT( ( XABS / W )**2+( YABS / W )**2+
+     $            ( ZABS / W )**2 )
+      END IF
+      RETURN
+*
+*     End of SLAPY3
+*
+      END
diff --git a/costa/native/external/lapack/slaqgb.f b/costa/native/external/lapack/slaqgb.f
new file mode 100644
index 000000000..4303602ff
--- /dev/null
+++ b/costa/native/external/lapack/slaqgb.f
@@ -0,0 +1,169 @@
+      SUBROUTINE SLAQGB( M, N, KL, KU, AB, LDAB, R, C, ROWCND, COLCND,
+     $                   AMAX, EQUED )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          EQUED
+      INTEGER            KL, KU, LDAB, M, N
+      REAL               AMAX, COLCND, ROWCND
+*     ..
+*     .. Array Arguments ..
+      REAL               AB( LDAB, * ), C( * ), R( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLAQGB equilibrates a general M by N band matrix A with KL
+*  subdiagonals and KU superdiagonals using the row and scaling factors
+*  in the vectors R and C.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  KL      (input) INTEGER
+*          The number of subdiagonals within the band of A.  KL >= 0.
+*
+*  KU      (input) INTEGER
+*          The number of superdiagonals within the band of A.  KU >= 0.
+*
+*  AB      (input/output) REAL array, dimension (LDAB,N)
+*          On entry, the matrix A in band storage, in rows 1 to KL+KU+1.
+*          The j-th column of A is stored in the j-th column of the
+*          array AB as follows:
+*          AB(ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl)
+*
+*          On exit, the equilibrated matrix, in the same storage format
+*          as A.  See EQUED for the form of the equilibrated matrix.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDA >= KL+KU+1.
+*
+*  R       (output) REAL array, dimension (M)
+*          The row scale factors for A.
+*
+*  C       (output) REAL array, dimension (N)
+*          The column scale factors for A.
+*
+*  ROWCND  (output) REAL
+*          Ratio of the smallest R(i) to the largest R(i).
+*
+*  COLCND  (output) REAL
+*          Ratio of the smallest C(i) to the largest C(i).
+*
+*  AMAX    (input) REAL
+*          Absolute value of largest matrix entry.
+*
+*  EQUED   (output) CHARACTER*1
+*          Specifies the form of equilibration that was done.
+*          = 'N':  No equilibration
+*          = 'R':  Row equilibration, i.e., A has been premultiplied by
+*                  diag(R).
+*          = 'C':  Column equilibration, i.e., A has been postmultiplied
+*                  by diag(C).
+*          = 'B':  Both row and column equilibration, i.e., A has been
+*                  replaced by diag(R) * A * diag(C).
+*
+*  Internal Parameters
+*  ===================
+*
+*  THRESH is a threshold value used to decide if row or column scaling
+*  should be done based on the ratio of the row or column scaling
+*  factors.  If ROWCND < THRESH, row scaling is done, and if
+*  COLCND < THRESH, column scaling is done.
+*
+*  LARGE and SMALL are threshold values used to decide if row scaling
+*  should be done based on the absolute size of the largest matrix
+*  element.  If AMAX > LARGE or AMAX < SMALL, row scaling is done.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, THRESH
+      PARAMETER          ( ONE = 1.0E+0, THRESH = 0.1E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J
+      REAL               CJ, LARGE, SMALL
+*     ..
+*     .. External Functions ..
+      REAL               SLAMCH
+      EXTERNAL           SLAMCH
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Quick return if possible
+*
+      IF( M.LE.0 .OR. N.LE.0 ) THEN
+         EQUED = 'N'
+         RETURN
+      END IF
+*
+*     Initialize LARGE and SMALL.
+*
+      SMALL = SLAMCH( 'Safe minimum' ) / SLAMCH( 'Precision' )
+      LARGE = ONE / SMALL
+*
+      IF( ROWCND.GE.THRESH .AND. AMAX.GE.SMALL .AND. AMAX.LE.LARGE )
+     $     THEN
+*
+*        No row scaling
+*
+         IF( COLCND.GE.THRESH ) THEN
+*
+*           No column scaling
+*
+            EQUED = 'N'
+         ELSE
+*
+*           Column scaling
+*
+            DO 20 J = 1, N
+               CJ = C( J )
+               DO 10 I = MAX( 1, J-KU ), MIN( M, J+KL )
+                  AB( KU+1+I-J, J ) = CJ*AB( KU+1+I-J, J )
+   10          CONTINUE
+   20       CONTINUE
+            EQUED = 'C'
+         END IF
+      ELSE IF( COLCND.GE.THRESH ) THEN
+*
+*        Row scaling, no column scaling
+*
+         DO 40 J = 1, N
+            DO 30 I = MAX( 1, J-KU ), MIN( M, J+KL )
+               AB( KU+1+I-J, J ) = R( I )*AB( KU+1+I-J, J )
+   30       CONTINUE
+   40    CONTINUE
+         EQUED = 'R'
+      ELSE
+*
+*        Row and column scaling
+*
+         DO 60 J = 1, N
+            CJ = C( J )
+            DO 50 I = MAX( 1, J-KU ), MIN( M, J+KL )
+               AB( KU+1+I-J, J ) = CJ*R( I )*AB( KU+1+I-J, J )
+   50       CONTINUE
+   60    CONTINUE
+         EQUED = 'B'
+      END IF
+*
+      RETURN
+*
+*     End of SLAQGB
+*
+      END
diff --git a/costa/native/external/lapack/slaqge.f b/costa/native/external/lapack/slaqge.f
new file mode 100644
index 000000000..bbef3372a
--- /dev/null
+++ b/costa/native/external/lapack/slaqge.f
@@ -0,0 +1,155 @@
+      SUBROUTINE SLAQGE( M, N, A, LDA, R, C, ROWCND, COLCND, AMAX,
+     $                   EQUED )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          EQUED
+      INTEGER            LDA, M, N
+      REAL               AMAX, COLCND, ROWCND
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), C( * ), R( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLAQGE equilibrates a general M by N matrix A using the row and
+*  scaling factors in the vectors R and C.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  A       (input/output) REAL array, dimension (LDA,N)
+*          On entry, the M by N matrix A.
+*          On exit, the equilibrated matrix.  See EQUED for the form of
+*          the equilibrated matrix.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(M,1).
+*
+*  R       (input) REAL array, dimension (M)
+*          The row scale factors for A.
+*
+*  C       (input) REAL array, dimension (N)
+*          The column scale factors for A.
+*
+*  ROWCND  (input) REAL
+*          Ratio of the smallest R(i) to the largest R(i).
+*
+*  COLCND  (input) REAL
+*          Ratio of the smallest C(i) to the largest C(i).
+*
+*  AMAX    (input) REAL
+*          Absolute value of largest matrix entry.
+*
+*  EQUED   (output) CHARACTER*1
+*          Specifies the form of equilibration that was done.
+*          = 'N':  No equilibration
+*          = 'R':  Row equilibration, i.e., A has been premultiplied by
+*                  diag(R).
+*          = 'C':  Column equilibration, i.e., A has been postmultiplied
+*                  by diag(C).
+*          = 'B':  Both row and column equilibration, i.e., A has been
+*                  replaced by diag(R) * A * diag(C).
+*
+*  Internal Parameters
+*  ===================
+*
+*  THRESH is a threshold value used to decide if row or column scaling
+*  should be done based on the ratio of the row or column scaling
+*  factors.  If ROWCND < THRESH, row scaling is done, and if
+*  COLCND < THRESH, column scaling is done.
+*
+*  LARGE and SMALL are threshold values used to decide if row scaling
+*  should be done based on the absolute size of the largest matrix
+*  element.  If AMAX > LARGE or AMAX < SMALL, row scaling is done.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, THRESH
+      PARAMETER          ( ONE = 1.0E+0, THRESH = 0.1E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J
+      REAL               CJ, LARGE, SMALL
+*     ..
+*     .. External Functions ..
+      REAL               SLAMCH
+      EXTERNAL           SLAMCH
+*     ..
+*     .. Executable Statements ..
+*
+*     Quick return if possible
+*
+      IF( M.LE.0 .OR. N.LE.0 ) THEN
+         EQUED = 'N'
+         RETURN
+      END IF
+*
+*     Initialize LARGE and SMALL.
+*
+      SMALL = SLAMCH( 'Safe minimum' ) / SLAMCH( 'Precision' )
+      LARGE = ONE / SMALL
+*
+      IF( ROWCND.GE.THRESH .AND. AMAX.GE.SMALL .AND. AMAX.LE.LARGE )
+     $     THEN
+*
+*        No row scaling
+*
+         IF( COLCND.GE.THRESH ) THEN
+*
+*           No column scaling
+*
+            EQUED = 'N'
+         ELSE
+*
+*           Column scaling
+*
+            DO 20 J = 1, N
+               CJ = C( J )
+               DO 10 I = 1, M
+                  A( I, J ) = CJ*A( I, J )
+   10          CONTINUE
+   20       CONTINUE
+            EQUED = 'C'
+         END IF
+      ELSE IF( COLCND.GE.THRESH ) THEN
+*
+*        Row scaling, no column scaling
+*
+         DO 40 J = 1, N
+            DO 30 I = 1, M
+               A( I, J ) = R( I )*A( I, J )
+   30       CONTINUE
+   40    CONTINUE
+         EQUED = 'R'
+      ELSE
+*
+*        Row and column scaling
+*
+         DO 60 J = 1, N
+            CJ = C( J )
+            DO 50 I = 1, M
+               A( I, J ) = CJ*R( I )*A( I, J )
+   50       CONTINUE
+   60    CONTINUE
+         EQUED = 'B'
+      END IF
+*
+      RETURN
+*
+*     End of SLAQGE
+*
+      END
diff --git a/costa/native/external/lapack/slaqp2.f b/costa/native/external/lapack/slaqp2.f
new file mode 100644
index 000000000..65a42779f
--- /dev/null
+++ b/costa/native/external/lapack/slaqp2.f
@@ -0,0 +1,166 @@
+      SUBROUTINE SLAQP2( M, N, OFFSET, A, LDA, JPVT, TAU, VN1, VN2,
+     $                   WORK )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            LDA, M, N, OFFSET
+*     ..
+*     .. Array Arguments ..
+      INTEGER            JPVT( * )
+      REAL               A( LDA, * ), TAU( * ), VN1( * ), VN2( * ),
+     $                   WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLAQP2 computes a QR factorization with column pivoting of
+*  the block A(OFFSET+1:M,1:N).
+*  The block A(1:OFFSET,1:N) is accordingly pivoted, but not factorized.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A. N >= 0.
+*
+*  OFFSET  (input) INTEGER
+*          The number of rows of the matrix A that must be pivoted
+*          but no factorized. OFFSET >= 0.
+*
+*  A       (input/output) REAL array, dimension (LDA,N)
+*          On entry, the M-by-N matrix A.
+*          On exit, the upper triangle of block A(OFFSET+1:M,1:N) is 
+*          the triangular factor obtained; the elements in block 
+*          A(OFFSET+1:M,1:N) below the diagonal, together with the 
+*          array TAU, represent the orthogonal matrix Q as a product of
+*          elementary reflectors. Block A(1:OFFSET,1:N) has been 
+*          accordingly pivoted, but no factorized.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,M).
+*
+*  JPVT    (input/output) INTEGER array, dimension (N)
+*          On entry, if JPVT(i) .ne. 0, the i-th column of A is permuted
+*          to the front of A*P (a leading column); if JPVT(i) = 0,
+*          the i-th column of A is a free column.
+*          On exit, if JPVT(i) = k, then the i-th column of A*P
+*          was the k-th column of A.
+*
+*  TAU     (output) REAL array, dimension (min(M,N))
+*          The scalar factors of the elementary reflectors.
+*
+*  VN1     (input/output) REAL array, dimension (N)
+*          The vector with the partial column norms.
+*
+*  VN2     (input/output) REAL array, dimension (N)
+*          The vector with the exact column norms.
+*
+*  WORK    (workspace) REAL array, dimension (N)
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*    G. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain
+*    X. Sun, Computer Science Dept., Duke University, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, ITEMP, J, MN, OFFPI, PVT
+      REAL               AII, TEMP, TEMP2
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLARF, SLARFG, SSWAP
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN, SQRT
+*     ..
+*     .. External Functions ..
+      INTEGER            ISAMAX
+      REAL               SNRM2
+      EXTERNAL           ISAMAX, SNRM2
+*     ..
+*     .. Executable Statements ..
+*
+      MN = MIN( M-OFFSET, N )
+*
+*     Compute factorization.
+*
+      DO 20 I = 1, MN
+*
+         OFFPI = OFFSET + I
+*
+*        Determine ith pivot column and swap if necessary.
+*
+         PVT = ( I-1 ) + ISAMAX( N-I+1, VN1( I ), 1 )
+*
+         IF( PVT.NE.I ) THEN
+            CALL SSWAP( M, A( 1, PVT ), 1, A( 1, I ), 1 )
+            ITEMP = JPVT( PVT )
+            JPVT( PVT ) = JPVT( I )
+            JPVT( I ) = ITEMP
+            VN1( PVT ) = VN1( I )
+            VN2( PVT ) = VN2( I )
+         END IF
+*
+*        Generate elementary reflector H(i).
+*
+         IF( OFFPI.LT.M ) THEN
+            CALL SLARFG( M-OFFPI+1, A( OFFPI, I ), A( OFFPI+1, I ), 1,
+     $                   TAU( I ) )
+         ELSE
+            CALL SLARFG( 1, A( M, I ), A( M, I ), 1, TAU( I ) )
+         END IF
+*
+         IF( I.LT.N ) THEN
+*
+*           Apply H(i)' to A(offset+i:m,i+1:n) from the left.
+*
+            AII = A( OFFPI, I )
+            A( OFFPI, I ) = ONE
+            CALL SLARF( 'Left', M-OFFPI+1, N-I, A( OFFPI, I ), 1,
+     $                  TAU( I ), A( OFFPI, I+1 ), LDA, WORK( 1 ) )
+            A( OFFPI, I ) = AII
+         END IF
+*
+*        Update partial column norms.
+*
+         DO 10 J = I + 1, N
+            IF( VN1( J ).NE.ZERO ) THEN
+               TEMP = ONE - ( ABS( A( OFFPI, J ) ) / VN1( J ) )**2
+               TEMP = MAX( TEMP, ZERO )
+               TEMP2 = ONE + 0.05*TEMP*( VN1( J ) / VN2( J ) )**2
+               IF( TEMP2.EQ.ONE ) THEN
+                  IF( OFFPI.LT.M ) THEN
+                     VN1( J ) = SNRM2( M-OFFPI, A( OFFPI+1, J ), 1 )
+                     VN2( J ) = VN1( J )
+                  ELSE
+                     VN1( J ) = ZERO
+                     VN2( J ) = ZERO
+                  END IF
+               ELSE
+                  VN1( J ) = VN1( J )*SQRT( TEMP )
+               END IF
+            END IF
+   10    CONTINUE
+*
+   20 CONTINUE
+*
+      RETURN
+*
+*     End of SLAQP2
+*
+      END
diff --git a/costa/native/external/lapack/slaqps.f b/costa/native/external/lapack/slaqps.f
new file mode 100644
index 000000000..5b26b03cb
--- /dev/null
+++ b/costa/native/external/lapack/slaqps.f
@@ -0,0 +1,245 @@
+      SUBROUTINE SLAQPS( M, N, OFFSET, NB, KB, A, LDA, JPVT, TAU, VN1,
+     $                   VN2, AUXV, F, LDF )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            KB, LDA, LDF, M, N, NB, OFFSET
+*     ..
+*     .. Array Arguments ..
+      INTEGER            JPVT( * )
+      REAL               A( LDA, * ), AUXV( * ), F( LDF, * ), TAU( * ),
+     $                   VN1( * ), VN2( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLAQPS computes a step of QR factorization with column pivoting
+*  of a real M-by-N matrix A by using Blas-3.  It tries to factorize
+*  NB columns from A starting from the row OFFSET+1, and updates all
+*  of the matrix with Blas-3 xGEMM.
+*
+*  In some cases, due to catastrophic cancellations, it cannot
+*  factorize NB columns.  Hence, the actual number of factorized
+*  columns is returned in KB.
+*
+*  Block A(1:OFFSET,1:N) is accordingly pivoted, but not factorized.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A. N >= 0
+*
+*  OFFSET  (input) INTEGER
+*          The number of rows of A that have been factorized in
+*          previous steps.
+*
+*  NB      (input) INTEGER
+*          The number of columns to factorize.
+*
+*  KB      (output) INTEGER
+*          The number of columns actually factorized.
+*
+*  A       (input/output) REAL array, dimension (LDA,N)
+*          On entry, the M-by-N matrix A.
+*          On exit, block A(OFFSET+1:M,1:KB) is the triangular
+*          factor obtained and block A(1:OFFSET,1:N) has been
+*          accordingly pivoted, but no factorized.
+*          The rest of the matrix, block A(OFFSET+1:M,KB+1:N) has
+*          been updated.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,M).
+*
+*  JPVT    (input/output) INTEGER array, dimension (N)
+*          JPVT(I) = K <==> Column K of the full matrix A has been
+*          permuted into position I in AP.
+*
+*  TAU     (output) REAL array, dimension (KB)
+*          The scalar factors of the elementary reflectors.
+*
+*  VN1     (input/output) REAL array, dimension (N)
+*          The vector with the partial column norms.
+*
+*  VN2     (input/output) REAL array, dimension (N)
+*          The vector with the exact column norms.
+*
+*  AUXV    (input/output) REAL array, dimension (NB)
+*          Auxiliar vector.
+*
+*  F       (input/output) REAL array, dimension (LDF,NB)
+*          Matrix F' = L*Y'*A.
+*
+*  LDF     (input) INTEGER
+*          The leading dimension of the array F. LDF >= max(1,N).
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*    G. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain
+*    X. Sun, Computer Science Dept., Duke University, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            ITEMP, J, K, LASTRK, LSTICC, PVT, RK
+      REAL               AKK, TEMP, TEMP2
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SGEMM, SGEMV, SLARFG, SSWAP
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN, NINT, REAL, SQRT
+*     ..
+*     .. External Functions ..
+      INTEGER            ISAMAX
+      REAL               SNRM2
+      EXTERNAL           ISAMAX, SNRM2
+*     ..
+*     .. Executable Statements ..
+*
+      LASTRK = MIN( M, N+OFFSET )
+      LSTICC = 0
+      K = 0
+*
+*     Beginning of while loop.
+*
+   10 CONTINUE
+      IF( ( K.LT.NB ) .AND. ( LSTICC.EQ.0 ) ) THEN
+         K = K + 1
+         RK = OFFSET + K
+*
+*        Determine ith pivot column and swap if necessary
+*
+         PVT = ( K-1 ) + ISAMAX( N-K+1, VN1( K ), 1 )
+         IF( PVT.NE.K ) THEN
+            CALL SSWAP( M, A( 1, PVT ), 1, A( 1, K ), 1 )
+            CALL SSWAP( K-1, F( PVT, 1 ), LDF, F( K, 1 ), LDF )
+            ITEMP = JPVT( PVT )
+            JPVT( PVT ) = JPVT( K )
+            JPVT( K ) = ITEMP
+            VN1( PVT ) = VN1( K )
+            VN2( PVT ) = VN2( K )
+         END IF
+*
+*        Apply previous Householder reflectors to column K:
+*        A(RK:M,K) := A(RK:M,K) - A(RK:M,1:K-1)*F(K,1:K-1)'.
+*
+         IF( K.GT.1 ) THEN
+            CALL SGEMV( 'No transpose', M-RK+1, K-1, -ONE, A( RK, 1 ),
+     $                  LDA, F( K, 1 ), LDF, ONE, A( RK, K ), 1 )
+         END IF
+*
+*        Generate elementary reflector H(k).
+*
+         IF( RK.LT.M ) THEN
+            CALL SLARFG( M-RK+1, A( RK, K ), A( RK+1, K ), 1, TAU( K ) )
+         ELSE
+            CALL SLARFG( 1, A( RK, K ), A( RK, K ), 1, TAU( K ) )
+         END IF
+*
+         AKK = A( RK, K )
+         A( RK, K ) = ONE
+*
+*        Compute Kth column of F:
+*
+*        Compute  F(K+1:N,K) := tau(K)*A(RK:M,K+1:N)'*A(RK:M,K).
+*
+         IF( K.LT.N ) THEN
+            CALL SGEMV( 'Transpose', M-RK+1, N-K, TAU( K ),
+     $                  A( RK, K+1 ), LDA, A( RK, K ), 1, ZERO,
+     $                  F( K+1, K ), 1 )
+         END IF
+*
+*        Padding F(1:K,K) with zeros.
+*
+         DO 20 J = 1, K
+            F( J, K ) = ZERO
+   20    CONTINUE
+*
+*        Incremental updating of F:
+*        F(1:N,K) := F(1:N,K) - tau(K)*F(1:N,1:K-1)*A(RK:M,1:K-1)'
+*                    *A(RK:M,K).
+*
+         IF( K.GT.1 ) THEN
+            CALL SGEMV( 'Transpose', M-RK+1, K-1, -TAU( K ), A( RK, 1 ),
+     $                  LDA, A( RK, K ), 1, ZERO, AUXV( 1 ), 1 )
+*
+            CALL SGEMV( 'No transpose', N, K-1, ONE, F( 1, 1 ), LDF,
+     $                  AUXV( 1 ), 1, ONE, F( 1, K ), 1 )
+         END IF
+*
+*        Update the current row of A:
+*        A(RK,K+1:N) := A(RK,K+1:N) - A(RK,1:K)*F(K+1:N,1:K)'.
+*
+         IF( K.LT.N ) THEN
+            CALL SGEMV( 'No transpose', N-K, K, -ONE, F( K+1, 1 ), LDF,
+     $                  A( RK, 1 ), LDA, ONE, A( RK, K+1 ), LDA )
+         END IF
+*
+*        Update partial column norms.
+*
+         IF( RK.LT.LASTRK ) THEN
+            DO 30 J = K + 1, N
+               IF( VN1( J ).NE.ZERO ) THEN
+                  TEMP = ABS( A( RK, J ) ) / VN1( J )
+                  TEMP = MAX( ZERO, ( ONE+TEMP )*( ONE-TEMP ) )
+                  TEMP2 = ONE + 0.05*TEMP*( VN1( J ) / VN2( J ) )**2
+                  IF( TEMP2.EQ.ONE ) THEN
+                     VN2( J ) = REAL( LSTICC )
+                     LSTICC = J
+                  ELSE
+                     VN1( J ) = VN1( J )*SQRT( TEMP )
+                  END IF
+               END IF
+   30       CONTINUE
+         END IF
+*
+         A( RK, K ) = AKK
+*
+*        End of while loop.
+*
+         GO TO 10
+      END IF
+      KB = K
+      RK = OFFSET + KB
+*
+*     Apply the block reflector to the rest of the matrix:
+*     A(OFFSET+KB+1:M,KB+1:N) := A(OFFSET+KB+1:M,KB+1:N) -
+*                         A(OFFSET+KB+1:M,1:KB)*F(KB+1:N,1:KB)'.
+*
+      IF( KB.LT.MIN( N, M-OFFSET ) ) THEN
+         CALL SGEMM( 'No transpose', 'Transpose', M-RK, N-KB, KB, -ONE,
+     $               A( RK+1, 1 ), LDA, F( KB+1, 1 ), LDF, ONE,
+     $               A( RK+1, KB+1 ), LDA )
+      END IF
+*
+*     Recomputation of difficult columns.
+*
+   40 CONTINUE
+      IF( LSTICC.GT.0 ) THEN
+         ITEMP = NINT( VN2( LSTICC ) )
+         VN1( LSTICC ) = SNRM2( M-RK, A( RK+1, LSTICC ), 1 )
+         VN2( LSTICC ) = VN1( LSTICC )
+         LSTICC = ITEMP
+         GO TO 40
+      END IF
+*
+      RETURN
+*
+*     End of SLAQPS
+*
+      END
diff --git a/costa/native/external/lapack/slaqsb.f b/costa/native/external/lapack/slaqsb.f
new file mode 100644
index 000000000..6662c5086
--- /dev/null
+++ b/costa/native/external/lapack/slaqsb.f
@@ -0,0 +1,149 @@
+      SUBROUTINE SLAQSB( UPLO, N, KD, AB, LDAB, S, SCOND, AMAX, EQUED )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          EQUED, UPLO
+      INTEGER            KD, LDAB, N
+      REAL               AMAX, SCOND
+*     ..
+*     .. Array Arguments ..
+      REAL               AB( LDAB, * ), S( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLAQSB equilibrates a symmetric band matrix A using the scaling
+*  factors in the vector S.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the upper or lower triangular part of the
+*          symmetric matrix A is stored.
+*          = 'U':  Upper triangular
+*          = 'L':  Lower triangular
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  KD      (input) INTEGER
+*          The number of super-diagonals of the matrix A if UPLO = 'U',
+*          or the number of sub-diagonals if UPLO = 'L'.  KD >= 0.
+*
+*  AB      (input/output) REAL array, dimension (LDAB,N)
+*          On entry, the upper or lower triangle of the symmetric band
+*          matrix A, stored in the first KD+1 rows of the array.  The
+*          j-th column of A is stored in the j-th column of the array AB
+*          as follows:
+*          if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j;
+*          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+kd).
+*
+*          On exit, if INFO = 0, the triangular factor U or L from the
+*          Cholesky factorization A = U'*U or A = L*L' of the band
+*          matrix A, in the same storage format as A.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KD+1.
+*
+*  S       (output) REAL array, dimension (N)
+*          The scale factors for A.
+*
+*  SCOND   (input) REAL
+*          Ratio of the smallest S(i) to the largest S(i).
+*
+*  AMAX    (input) REAL
+*          Absolute value of largest matrix entry.
+*
+*  EQUED   (output) CHARACTER*1
+*          Specifies whether or not equilibration was done.
+*          = 'N':  No equilibration.
+*          = 'Y':  Equilibration was done, i.e., A has been replaced by
+*                  diag(S) * A * diag(S).
+*
+*  Internal Parameters
+*  ===================
+*
+*  THRESH is a threshold value used to decide if scaling should be done
+*  based on the ratio of the scaling factors.  If SCOND < THRESH,
+*  scaling is done.
+*
+*  LARGE and SMALL are threshold values used to decide if scaling should
+*  be done based on the absolute size of the largest matrix element.
+*  If AMAX > LARGE or AMAX < SMALL, scaling is done.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, THRESH
+      PARAMETER          ( ONE = 1.0E+0, THRESH = 0.1E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J
+      REAL               CJ, LARGE, SMALL
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               SLAMCH
+      EXTERNAL           LSAME, SLAMCH
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Quick return if possible
+*
+      IF( N.LE.0 ) THEN
+         EQUED = 'N'
+         RETURN
+      END IF
+*
+*     Initialize LARGE and SMALL.
+*
+      SMALL = SLAMCH( 'Safe minimum' ) / SLAMCH( 'Precision' )
+      LARGE = ONE / SMALL
+*
+      IF( SCOND.GE.THRESH .AND. AMAX.GE.SMALL .AND. AMAX.LE.LARGE ) THEN
+*
+*        No equilibration
+*
+         EQUED = 'N'
+      ELSE
+*
+*        Replace A by diag(S) * A * diag(S).
+*
+         IF( LSAME( UPLO, 'U' ) ) THEN
+*
+*           Upper triangle of A is stored in band format.
+*
+            DO 20 J = 1, N
+               CJ = S( J )
+               DO 10 I = MAX( 1, J-KD ), J
+                  AB( KD+1+I-J, J ) = CJ*S( I )*AB( KD+1+I-J, J )
+   10          CONTINUE
+   20       CONTINUE
+         ELSE
+*
+*           Lower triangle of A is stored.
+*
+            DO 40 J = 1, N
+               CJ = S( J )
+               DO 30 I = J, MIN( N, J+KD )
+                  AB( 1+I-J, J ) = CJ*S( I )*AB( 1+I-J, J )
+   30          CONTINUE
+   40       CONTINUE
+         END IF
+         EQUED = 'Y'
+      END IF
+*
+      RETURN
+*
+*     End of SLAQSB
+*
+      END
diff --git a/costa/native/external/lapack/slaqsp.f b/costa/native/external/lapack/slaqsp.f
new file mode 100644
index 000000000..f6d788f38
--- /dev/null
+++ b/costa/native/external/lapack/slaqsp.f
@@ -0,0 +1,141 @@
+      SUBROUTINE SLAQSP( UPLO, N, AP, S, SCOND, AMAX, EQUED )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          EQUED, UPLO
+      INTEGER            N
+      REAL               AMAX, SCOND
+*     ..
+*     .. Array Arguments ..
+      REAL               AP( * ), S( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLAQSP equilibrates a symmetric matrix A using the scaling factors
+*  in the vector S.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the upper or lower triangular part of the
+*          symmetric matrix A is stored.
+*          = 'U':  Upper triangular
+*          = 'L':  Lower triangular
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  AP      (input/output) REAL array, dimension (N*(N+1)/2)
+*          On entry, the upper or lower triangle of the symmetric matrix
+*          A, packed columnwise in a linear array.  The j-th column of A
+*          is stored in the array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n.
+*
+*          On exit, the equilibrated matrix:  diag(S) * A * diag(S), in
+*          the same storage format as A.
+*
+*  S       (input) REAL array, dimension (N)
+*          The scale factors for A.
+*
+*  SCOND   (input) REAL
+*          Ratio of the smallest S(i) to the largest S(i).
+*
+*  AMAX    (input) REAL
+*          Absolute value of largest matrix entry.
+*
+*  EQUED   (output) CHARACTER*1
+*          Specifies whether or not equilibration was done.
+*          = 'N':  No equilibration.
+*          = 'Y':  Equilibration was done, i.e., A has been replaced by
+*                  diag(S) * A * diag(S).
+*
+*  Internal Parameters
+*  ===================
+*
+*  THRESH is a threshold value used to decide if scaling should be done
+*  based on the ratio of the scaling factors.  If SCOND < THRESH,
+*  scaling is done.
+*
+*  LARGE and SMALL are threshold values used to decide if scaling should
+*  be done based on the absolute size of the largest matrix element.
+*  If AMAX > LARGE or AMAX < SMALL, scaling is done.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, THRESH
+      PARAMETER          ( ONE = 1.0E+0, THRESH = 0.1E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J, JC
+      REAL               CJ, LARGE, SMALL
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               SLAMCH
+      EXTERNAL           LSAME, SLAMCH
+*     ..
+*     .. Executable Statements ..
+*
+*     Quick return if possible
+*
+      IF( N.LE.0 ) THEN
+         EQUED = 'N'
+         RETURN
+      END IF
+*
+*     Initialize LARGE and SMALL.
+*
+      SMALL = SLAMCH( 'Safe minimum' ) / SLAMCH( 'Precision' )
+      LARGE = ONE / SMALL
+*
+      IF( SCOND.GE.THRESH .AND. AMAX.GE.SMALL .AND. AMAX.LE.LARGE ) THEN
+*
+*        No equilibration
+*
+         EQUED = 'N'
+      ELSE
+*
+*        Replace A by diag(S) * A * diag(S).
+*
+         IF( LSAME( UPLO, 'U' ) ) THEN
+*
+*           Upper triangle of A is stored.
+*
+            JC = 1
+            DO 20 J = 1, N
+               CJ = S( J )
+               DO 10 I = 1, J
+                  AP( JC+I-1 ) = CJ*S( I )*AP( JC+I-1 )
+   10          CONTINUE
+               JC = JC + J
+   20       CONTINUE
+         ELSE
+*
+*           Lower triangle of A is stored.
+*
+            JC = 1
+            DO 40 J = 1, N
+               CJ = S( J )
+               DO 30 I = J, N
+                  AP( JC+I-J ) = CJ*S( I )*AP( JC+I-J )
+   30          CONTINUE
+               JC = JC + N - J + 1
+   40       CONTINUE
+         END IF
+         EQUED = 'Y'
+      END IF
+*
+      RETURN
+*
+*     End of SLAQSP
+*
+      END
diff --git a/costa/native/external/lapack/slaqsy.f b/costa/native/external/lapack/slaqsy.f
new file mode 100644
index 000000000..0c22da64e
--- /dev/null
+++ b/costa/native/external/lapack/slaqsy.f
@@ -0,0 +1,142 @@
+      SUBROUTINE SLAQSY( UPLO, N, A, LDA, S, SCOND, AMAX, EQUED )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          EQUED, UPLO
+      INTEGER            LDA, N
+      REAL               AMAX, SCOND
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), S( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLAQSY equilibrates a symmetric matrix A using the scaling factors
+*  in the vector S.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the upper or lower triangular part of the
+*          symmetric matrix A is stored.
+*          = 'U':  Upper triangular
+*          = 'L':  Lower triangular
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input/output) REAL array, dimension (LDA,N)
+*          On entry, the symmetric matrix A.  If UPLO = 'U', the leading
+*          n by n upper triangular part of A contains the upper
+*          triangular part of the matrix A, and the strictly lower
+*          triangular part of A is not referenced.  If UPLO = 'L', the
+*          leading n by n lower triangular part of A contains the lower
+*          triangular part of the matrix A, and the strictly upper
+*          triangular part of A is not referenced.
+*
+*          On exit, if EQUED = 'Y', the equilibrated matrix:
+*          diag(S) * A * diag(S).
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(N,1).
+*
+*  S       (input) REAL array, dimension (N)
+*          The scale factors for A.
+*
+*  SCOND   (input) REAL
+*          Ratio of the smallest S(i) to the largest S(i).
+*
+*  AMAX    (input) REAL
+*          Absolute value of largest matrix entry.
+*
+*  EQUED   (output) CHARACTER*1
+*          Specifies whether or not equilibration was done.
+*          = 'N':  No equilibration.
+*          = 'Y':  Equilibration was done, i.e., A has been replaced by
+*                  diag(S) * A * diag(S).
+*
+*  Internal Parameters
+*  ===================
+*
+*  THRESH is a threshold value used to decide if scaling should be done
+*  based on the ratio of the scaling factors.  If SCOND < THRESH,
+*  scaling is done.
+*
+*  LARGE and SMALL are threshold values used to decide if scaling should
+*  be done based on the absolute size of the largest matrix element.
+*  If AMAX > LARGE or AMAX < SMALL, scaling is done.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, THRESH
+      PARAMETER          ( ONE = 1.0E+0, THRESH = 0.1E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J
+      REAL               CJ, LARGE, SMALL
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               SLAMCH
+      EXTERNAL           LSAME, SLAMCH
+*     ..
+*     .. Executable Statements ..
+*
+*     Quick return if possible
+*
+      IF( N.LE.0 ) THEN
+         EQUED = 'N'
+         RETURN
+      END IF
+*
+*     Initialize LARGE and SMALL.
+*
+      SMALL = SLAMCH( 'Safe minimum' ) / SLAMCH( 'Precision' )
+      LARGE = ONE / SMALL
+*
+      IF( SCOND.GE.THRESH .AND. AMAX.GE.SMALL .AND. AMAX.LE.LARGE ) THEN
+*
+*        No equilibration
+*
+         EQUED = 'N'
+      ELSE
+*
+*        Replace A by diag(S) * A * diag(S).
+*
+         IF( LSAME( UPLO, 'U' ) ) THEN
+*
+*           Upper triangle of A is stored.
+*
+            DO 20 J = 1, N
+               CJ = S( J )
+               DO 10 I = 1, J
+                  A( I, J ) = CJ*S( I )*A( I, J )
+   10          CONTINUE
+   20       CONTINUE
+         ELSE
+*
+*           Lower triangle of A is stored.
+*
+            DO 40 J = 1, N
+               CJ = S( J )
+               DO 30 I = J, N
+                  A( I, J ) = CJ*S( I )*A( I, J )
+   30          CONTINUE
+   40       CONTINUE
+         END IF
+         EQUED = 'Y'
+      END IF
+*
+      RETURN
+*
+*     End of SLAQSY
+*
+      END
diff --git a/costa/native/external/lapack/slaqtr.f b/costa/native/external/lapack/slaqtr.f
new file mode 100644
index 000000000..1a6c1b825
--- /dev/null
+++ b/costa/native/external/lapack/slaqtr.f
@@ -0,0 +1,666 @@
+      SUBROUTINE SLAQTR( LTRAN, LREAL, N, T, LDT, B, W, SCALE, X, WORK,
+     $                   INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      LOGICAL            LREAL, LTRAN
+      INTEGER            INFO, LDT, N
+      REAL               SCALE, W
+*     ..
+*     .. Array Arguments ..
+      REAL               B( * ), T( LDT, * ), WORK( * ), X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLAQTR solves the real quasi-triangular system
+*
+*               op(T)*p = scale*c,               if LREAL = .TRUE.
+*
+*  or the complex quasi-triangular systems
+*
+*             op(T + iB)*(p+iq) = scale*(c+id),  if LREAL = .FALSE.
+*
+*  in real arithmetic, where T is upper quasi-triangular.
+*  If LREAL = .FALSE., then the first diagonal block of T must be
+*  1 by 1, B is the specially structured matrix
+*
+*                 B = [ b(1) b(2) ... b(n) ]
+*                     [       w            ]
+*                     [           w        ]
+*                     [              .     ]
+*                     [                 w  ]
+*
+*  op(A) = A or A', A' denotes the conjugate transpose of
+*  matrix A.
+*
+*  On input, X = [ c ].  On output, X = [ p ].
+*                [ d ]                  [ q ]
+*
+*  This subroutine is designed for the condition number estimation
+*  in routine STRSNA.
+*
+*  Arguments
+*  =========
+*
+*  LTRAN   (input) LOGICAL
+*          On entry, LTRAN specifies the option of conjugate transpose:
+*             = .FALSE.,    op(T+i*B) = T+i*B,
+*             = .TRUE.,     op(T+i*B) = (T+i*B)'.
+*
+*  LREAL   (input) LOGICAL
+*          On entry, LREAL specifies the input matrix structure:
+*             = .FALSE.,    the input is complex
+*             = .TRUE.,     the input is real
+*
+*  N       (input) INTEGER
+*          On entry, N specifies the order of T+i*B. N >= 0.
+*
+*  T       (input) REAL array, dimension (LDT,N)
+*          On entry, T contains a matrix in Schur canonical form.
+*          If LREAL = .FALSE., then the first diagonal block of T must
+*          be 1 by 1.
+*
+*  LDT     (input) INTEGER
+*          The leading dimension of the matrix T. LDT >= max(1,N).
+*
+*  B       (input) REAL array, dimension (N)
+*          On entry, B contains the elements to form the matrix
+*          B as described above.
+*          If LREAL = .TRUE., B is not referenced.
+*
+*  W       (input) REAL
+*          On entry, W is the diagonal element of the matrix B.
+*          If LREAL = .TRUE., W is not referenced.
+*
+*  SCALE   (output) REAL
+*          On exit, SCALE is the scale factor.
+*
+*  X       (input/output) REAL array, dimension (2*N)
+*          On entry, X contains the right hand side of the system.
+*          On exit, X is overwritten by the solution.
+*
+*  WORK    (workspace) REAL array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          On exit, INFO is set to
+*             0: successful exit.
+*               1: the some diagonal 1 by 1 block has been perturbed by
+*                  a small number SMIN to keep nonsingularity.
+*               2: the some diagonal 2 by 2 block has been perturbed by
+*                  a small number in SLALN2 to keep nonsingularity.
+*          NOTE: In the interests of speed, this routine does not
+*                check the inputs for errors.
+*
+* =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOTRAN
+      INTEGER            I, IERR, J, J1, J2, JNEXT, K, N1, N2
+      REAL               BIGNUM, EPS, REC, SCALOC, SI, SMIN, SMINW,
+     $                   SMLNUM, SR, TJJ, TMP, XJ, XMAX, XNORM, Z
+*     ..
+*     .. Local Arrays ..
+      REAL               D( 2, 2 ), V( 2, 2 )
+*     ..
+*     .. External Functions ..
+      INTEGER            ISAMAX
+      REAL               SASUM, SDOT, SLAMCH, SLANGE
+      EXTERNAL           ISAMAX, SASUM, SDOT, SLAMCH, SLANGE
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SAXPY, SLADIV, SLALN2, SSCAL
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Do not test the input parameters for errors
+*
+      NOTRAN = .NOT.LTRAN
+      INFO = 0
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Set constants to control overflow
+*
+      EPS = SLAMCH( 'P' )
+      SMLNUM = SLAMCH( 'S' ) / EPS
+      BIGNUM = ONE / SMLNUM
+*
+      XNORM = SLANGE( 'M', N, N, T, LDT, D )
+      IF( .NOT.LREAL )
+     $   XNORM = MAX( XNORM, ABS( W ), SLANGE( 'M', N, 1, B, N, D ) )
+      SMIN = MAX( SMLNUM, EPS*XNORM )
+*
+*     Compute 1-norm of each column of strictly upper triangular
+*     part of T to control overflow in triangular solver.
+*
+      WORK( 1 ) = ZERO
+      DO 10 J = 2, N
+         WORK( J ) = SASUM( J-1, T( 1, J ), 1 )
+   10 CONTINUE
+*
+      IF( .NOT.LREAL ) THEN
+         DO 20 I = 2, N
+            WORK( I ) = WORK( I ) + ABS( B( I ) )
+   20    CONTINUE
+      END IF
+*
+      N2 = 2*N
+      N1 = N
+      IF( .NOT.LREAL )
+     $   N1 = N2
+      K = ISAMAX( N1, X, 1 )
+      XMAX = ABS( X( K ) )
+      SCALE = ONE
+*
+      IF( XMAX.GT.BIGNUM ) THEN
+         SCALE = BIGNUM / XMAX
+         CALL SSCAL( N1, SCALE, X, 1 )
+         XMAX = BIGNUM
+      END IF
+*
+      IF( LREAL ) THEN
+*
+         IF( NOTRAN ) THEN
+*
+*           Solve T*p = scale*c
+*
+            JNEXT = N
+            DO 30 J = N, 1, -1
+               IF( J.GT.JNEXT )
+     $            GO TO 30
+               J1 = J
+               J2 = J
+               JNEXT = J - 1
+               IF( J.GT.1 ) THEN
+                  IF( T( J, J-1 ).NE.ZERO ) THEN
+                     J1 = J - 1
+                     JNEXT = J - 2
+                  END IF
+               END IF
+*
+               IF( J1.EQ.J2 ) THEN
+*
+*                 Meet 1 by 1 diagonal block
+*
+*                 Scale to avoid overflow when computing
+*                     x(j) = b(j)/T(j,j)
+*
+                  XJ = ABS( X( J1 ) )
+                  TJJ = ABS( T( J1, J1 ) )
+                  TMP = T( J1, J1 )
+                  IF( TJJ.LT.SMIN ) THEN
+                     TMP = SMIN
+                     TJJ = SMIN
+                     INFO = 1
+                  END IF
+*
+                  IF( XJ.EQ.ZERO )
+     $               GO TO 30
+*
+                  IF( TJJ.LT.ONE ) THEN
+                     IF( XJ.GT.BIGNUM*TJJ ) THEN
+                        REC = ONE / XJ
+                        CALL SSCAL( N, REC, X, 1 )
+                        SCALE = SCALE*REC
+                        XMAX = XMAX*REC
+                     END IF
+                  END IF
+                  X( J1 ) = X( J1 ) / TMP
+                  XJ = ABS( X( J1 ) )
+*
+*                 Scale x if necessary to avoid overflow when adding a
+*                 multiple of column j1 of T.
+*
+                  IF( XJ.GT.ONE ) THEN
+                     REC = ONE / XJ
+                     IF( WORK( J1 ).GT.( BIGNUM-XMAX )*REC ) THEN
+                        CALL SSCAL( N, REC, X, 1 )
+                        SCALE = SCALE*REC
+                     END IF
+                  END IF
+                  IF( J1.GT.1 ) THEN
+                     CALL SAXPY( J1-1, -X( J1 ), T( 1, J1 ), 1, X, 1 )
+                     K = ISAMAX( J1-1, X, 1 )
+                     XMAX = ABS( X( K ) )
+                  END IF
+*
+               ELSE
+*
+*                 Meet 2 by 2 diagonal block
+*
+*                 Call 2 by 2 linear system solve, to take
+*                 care of possible overflow by scaling factor.
+*
+                  D( 1, 1 ) = X( J1 )
+                  D( 2, 1 ) = X( J2 )
+                  CALL SLALN2( .FALSE., 2, 1, SMIN, ONE, T( J1, J1 ),
+     $                         LDT, ONE, ONE, D, 2, ZERO, ZERO, V, 2,
+     $                         SCALOC, XNORM, IERR )
+                  IF( IERR.NE.0 )
+     $               INFO = 2
+*
+                  IF( SCALOC.NE.ONE ) THEN
+                     CALL SSCAL( N, SCALOC, X, 1 )
+                     SCALE = SCALE*SCALOC
+                  END IF
+                  X( J1 ) = V( 1, 1 )
+                  X( J2 ) = V( 2, 1 )
+*
+*                 Scale V(1,1) (= X(J1)) and/or V(2,1) (=X(J2))
+*                 to avoid overflow in updating right-hand side.
+*
+                  XJ = MAX( ABS( V( 1, 1 ) ), ABS( V( 2, 1 ) ) )
+                  IF( XJ.GT.ONE ) THEN
+                     REC = ONE / XJ
+                     IF( MAX( WORK( J1 ), WORK( J2 ) ).GT.
+     $                   ( BIGNUM-XMAX )*REC ) THEN
+                        CALL SSCAL( N, REC, X, 1 )
+                        SCALE = SCALE*REC
+                     END IF
+                  END IF
+*
+*                 Update right-hand side
+*
+                  IF( J1.GT.1 ) THEN
+                     CALL SAXPY( J1-1, -X( J1 ), T( 1, J1 ), 1, X, 1 )
+                     CALL SAXPY( J1-1, -X( J2 ), T( 1, J2 ), 1, X, 1 )
+                     K = ISAMAX( J1-1, X, 1 )
+                     XMAX = ABS( X( K ) )
+                  END IF
+*
+               END IF
+*
+   30       CONTINUE
+*
+         ELSE
+*
+*           Solve T'*p = scale*c
+*
+            JNEXT = 1
+            DO 40 J = 1, N
+               IF( J.LT.JNEXT )
+     $            GO TO 40
+               J1 = J
+               J2 = J
+               JNEXT = J + 1
+               IF( J.LT.N ) THEN
+                  IF( T( J+1, J ).NE.ZERO ) THEN
+                     J2 = J + 1
+                     JNEXT = J + 2
+                  END IF
+               END IF
+*
+               IF( J1.EQ.J2 ) THEN
+*
+*                 1 by 1 diagonal block
+*
+*                 Scale if necessary to avoid overflow in forming the
+*                 right-hand side element by inner product.
+*
+                  XJ = ABS( X( J1 ) )
+                  IF( XMAX.GT.ONE ) THEN
+                     REC = ONE / XMAX
+                     IF( WORK( J1 ).GT.( BIGNUM-XJ )*REC ) THEN
+                        CALL SSCAL( N, REC, X, 1 )
+                        SCALE = SCALE*REC
+                        XMAX = XMAX*REC
+                     END IF
+                  END IF
+*
+                  X( J1 ) = X( J1 ) - SDOT( J1-1, T( 1, J1 ), 1, X, 1 )
+*
+                  XJ = ABS( X( J1 ) )
+                  TJJ = ABS( T( J1, J1 ) )
+                  TMP = T( J1, J1 )
+                  IF( TJJ.LT.SMIN ) THEN
+                     TMP = SMIN
+                     TJJ = SMIN
+                     INFO = 1
+                  END IF
+*
+                  IF( TJJ.LT.ONE ) THEN
+                     IF( XJ.GT.BIGNUM*TJJ ) THEN
+                        REC = ONE / XJ
+                        CALL SSCAL( N, REC, X, 1 )
+                        SCALE = SCALE*REC
+                        XMAX = XMAX*REC
+                     END IF
+                  END IF
+                  X( J1 ) = X( J1 ) / TMP
+                  XMAX = MAX( XMAX, ABS( X( J1 ) ) )
+*
+               ELSE
+*
+*                 2 by 2 diagonal block
+*
+*                 Scale if necessary to avoid overflow in forming the
+*                 right-hand side elements by inner product.
+*
+                  XJ = MAX( ABS( X( J1 ) ), ABS( X( J2 ) ) )
+                  IF( XMAX.GT.ONE ) THEN
+                     REC = ONE / XMAX
+                     IF( MAX( WORK( J2 ), WORK( J1 ) ).GT.( BIGNUM-XJ )*
+     $                   REC ) THEN
+                        CALL SSCAL( N, REC, X, 1 )
+                        SCALE = SCALE*REC
+                        XMAX = XMAX*REC
+                     END IF
+                  END IF
+*
+                  D( 1, 1 ) = X( J1 ) - SDOT( J1-1, T( 1, J1 ), 1, X,
+     $                        1 )
+                  D( 2, 1 ) = X( J2 ) - SDOT( J1-1, T( 1, J2 ), 1, X,
+     $                        1 )
+*
+                  CALL SLALN2( .TRUE., 2, 1, SMIN, ONE, T( J1, J1 ),
+     $                         LDT, ONE, ONE, D, 2, ZERO, ZERO, V, 2,
+     $                         SCALOC, XNORM, IERR )
+                  IF( IERR.NE.0 )
+     $               INFO = 2
+*
+                  IF( SCALOC.NE.ONE ) THEN
+                     CALL SSCAL( N, SCALOC, X, 1 )
+                     SCALE = SCALE*SCALOC
+                  END IF
+                  X( J1 ) = V( 1, 1 )
+                  X( J2 ) = V( 2, 1 )
+                  XMAX = MAX( ABS( X( J1 ) ), ABS( X( J2 ) ), XMAX )
+*
+               END IF
+   40       CONTINUE
+         END IF
+*
+      ELSE
+*
+         SMINW = MAX( EPS*ABS( W ), SMIN )
+         IF( NOTRAN ) THEN
+*
+*           Solve (T + iB)*(p+iq) = c+id
+*
+            JNEXT = N
+            DO 70 J = N, 1, -1
+               IF( J.GT.JNEXT )
+     $            GO TO 70
+               J1 = J
+               J2 = J
+               JNEXT = J - 1
+               IF( J.GT.1 ) THEN
+                  IF( T( J, J-1 ).NE.ZERO ) THEN
+                     J1 = J - 1
+                     JNEXT = J - 2
+                  END IF
+               END IF
+*
+               IF( J1.EQ.J2 ) THEN
+*
+*                 1 by 1 diagonal block
+*
+*                 Scale if necessary to avoid overflow in division
+*
+                  Z = W
+                  IF( J1.EQ.1 )
+     $               Z = B( 1 )
+                  XJ = ABS( X( J1 ) ) + ABS( X( N+J1 ) )
+                  TJJ = ABS( T( J1, J1 ) ) + ABS( Z )
+                  TMP = T( J1, J1 )
+                  IF( TJJ.LT.SMINW ) THEN
+                     TMP = SMINW
+                     TJJ = SMINW
+                     INFO = 1
+                  END IF
+*
+                  IF( XJ.EQ.ZERO )
+     $               GO TO 70
+*
+                  IF( TJJ.LT.ONE ) THEN
+                     IF( XJ.GT.BIGNUM*TJJ ) THEN
+                        REC = ONE / XJ
+                        CALL SSCAL( N2, REC, X, 1 )
+                        SCALE = SCALE*REC
+                        XMAX = XMAX*REC
+                     END IF
+                  END IF
+                  CALL SLADIV( X( J1 ), X( N+J1 ), TMP, Z, SR, SI )
+                  X( J1 ) = SR
+                  X( N+J1 ) = SI
+                  XJ = ABS( X( J1 ) ) + ABS( X( N+J1 ) )
+*
+*                 Scale x if necessary to avoid overflow when adding a
+*                 multiple of column j1 of T.
+*
+                  IF( XJ.GT.ONE ) THEN
+                     REC = ONE / XJ
+                     IF( WORK( J1 ).GT.( BIGNUM-XMAX )*REC ) THEN
+                        CALL SSCAL( N2, REC, X, 1 )
+                        SCALE = SCALE*REC
+                     END IF
+                  END IF
+*
+                  IF( J1.GT.1 ) THEN
+                     CALL SAXPY( J1-1, -X( J1 ), T( 1, J1 ), 1, X, 1 )
+                     CALL SAXPY( J1-1, -X( N+J1 ), T( 1, J1 ), 1,
+     $                           X( N+1 ), 1 )
+*
+                     X( 1 ) = X( 1 ) + B( J1 )*X( N+J1 )
+                     X( N+1 ) = X( N+1 ) - B( J1 )*X( J1 )
+*
+                     XMAX = ZERO
+                     DO 50 K = 1, J1 - 1
+                        XMAX = MAX( XMAX, ABS( X( K ) )+
+     $                         ABS( X( K+N ) ) )
+   50                CONTINUE
+                  END IF
+*
+               ELSE
+*
+*                 Meet 2 by 2 diagonal block
+*
+                  D( 1, 1 ) = X( J1 )
+                  D( 2, 1 ) = X( J2 )
+                  D( 1, 2 ) = X( N+J1 )
+                  D( 2, 2 ) = X( N+J2 )
+                  CALL SLALN2( .FALSE., 2, 2, SMINW, ONE, T( J1, J1 ),
+     $                         LDT, ONE, ONE, D, 2, ZERO, -W, V, 2,
+     $                         SCALOC, XNORM, IERR )
+                  IF( IERR.NE.0 )
+     $               INFO = 2
+*
+                  IF( SCALOC.NE.ONE ) THEN
+                     CALL SSCAL( 2*N, SCALOC, X, 1 )
+                     SCALE = SCALOC*SCALE
+                  END IF
+                  X( J1 ) = V( 1, 1 )
+                  X( J2 ) = V( 2, 1 )
+                  X( N+J1 ) = V( 1, 2 )
+                  X( N+J2 ) = V( 2, 2 )
+*
+*                 Scale X(J1), .... to avoid overflow in
+*                 updating right hand side.
+*
+                  XJ = MAX( ABS( V( 1, 1 ) )+ABS( V( 1, 2 ) ),
+     $                 ABS( V( 2, 1 ) )+ABS( V( 2, 2 ) ) )
+                  IF( XJ.GT.ONE ) THEN
+                     REC = ONE / XJ
+                     IF( MAX( WORK( J1 ), WORK( J2 ) ).GT.
+     $                   ( BIGNUM-XMAX )*REC ) THEN
+                        CALL SSCAL( N2, REC, X, 1 )
+                        SCALE = SCALE*REC
+                     END IF
+                  END IF
+*
+*                 Update the right-hand side.
+*
+                  IF( J1.GT.1 ) THEN
+                     CALL SAXPY( J1-1, -X( J1 ), T( 1, J1 ), 1, X, 1 )
+                     CALL SAXPY( J1-1, -X( J2 ), T( 1, J2 ), 1, X, 1 )
+*
+                     CALL SAXPY( J1-1, -X( N+J1 ), T( 1, J1 ), 1,
+     $                           X( N+1 ), 1 )
+                     CALL SAXPY( J1-1, -X( N+J2 ), T( 1, J2 ), 1,
+     $                           X( N+1 ), 1 )
+*
+                     X( 1 ) = X( 1 ) + B( J1 )*X( N+J1 ) +
+     $                        B( J2 )*X( N+J2 )
+                     X( N+1 ) = X( N+1 ) - B( J1 )*X( J1 ) -
+     $                          B( J2 )*X( J2 )
+*
+                     XMAX = ZERO
+                     DO 60 K = 1, J1 - 1
+                        XMAX = MAX( ABS( X( K ) )+ABS( X( K+N ) ),
+     $                         XMAX )
+   60                CONTINUE
+                  END IF
+*
+               END IF
+   70       CONTINUE
+*
+         ELSE
+*
+*           Solve (T + iB)'*(p+iq) = c+id
+*
+            JNEXT = 1
+            DO 80 J = 1, N
+               IF( J.LT.JNEXT )
+     $            GO TO 80
+               J1 = J
+               J2 = J
+               JNEXT = J + 1
+               IF( J.LT.N ) THEN
+                  IF( T( J+1, J ).NE.ZERO ) THEN
+                     J2 = J + 1
+                     JNEXT = J + 2
+                  END IF
+               END IF
+*
+               IF( J1.EQ.J2 ) THEN
+*
+*                 1 by 1 diagonal block
+*
+*                 Scale if necessary to avoid overflow in forming the
+*                 right-hand side element by inner product.
+*
+                  XJ = ABS( X( J1 ) ) + ABS( X( J1+N ) )
+                  IF( XMAX.GT.ONE ) THEN
+                     REC = ONE / XMAX
+                     IF( WORK( J1 ).GT.( BIGNUM-XJ )*REC ) THEN
+                        CALL SSCAL( N2, REC, X, 1 )
+                        SCALE = SCALE*REC
+                        XMAX = XMAX*REC
+                     END IF
+                  END IF
+*
+                  X( J1 ) = X( J1 ) - SDOT( J1-1, T( 1, J1 ), 1, X, 1 )
+                  X( N+J1 ) = X( N+J1 ) - SDOT( J1-1, T( 1, J1 ), 1,
+     $                        X( N+1 ), 1 )
+                  IF( J1.GT.1 ) THEN
+                     X( J1 ) = X( J1 ) - B( J1 )*X( N+1 )
+                     X( N+J1 ) = X( N+J1 ) + B( J1 )*X( 1 )
+                  END IF
+                  XJ = ABS( X( J1 ) ) + ABS( X( J1+N ) )
+*
+                  Z = W
+                  IF( J1.EQ.1 )
+     $               Z = B( 1 )
+*
+*                 Scale if necessary to avoid overflow in
+*                 complex division
+*
+                  TJJ = ABS( T( J1, J1 ) ) + ABS( Z )
+                  TMP = T( J1, J1 )
+                  IF( TJJ.LT.SMINW ) THEN
+                     TMP = SMINW
+                     TJJ = SMINW
+                     INFO = 1
+                  END IF
+*
+                  IF( TJJ.LT.ONE ) THEN
+                     IF( XJ.GT.BIGNUM*TJJ ) THEN
+                        REC = ONE / XJ
+                        CALL SSCAL( N2, REC, X, 1 )
+                        SCALE = SCALE*REC
+                        XMAX = XMAX*REC
+                     END IF
+                  END IF
+                  CALL SLADIV( X( J1 ), X( N+J1 ), TMP, -Z, SR, SI )
+                  X( J1 ) = SR
+                  X( J1+N ) = SI
+                  XMAX = MAX( ABS( X( J1 ) )+ABS( X( J1+N ) ), XMAX )
+*
+               ELSE
+*
+*                 2 by 2 diagonal block
+*
+*                 Scale if necessary to avoid overflow in forming the
+*                 right-hand side element by inner product.
+*
+                  XJ = MAX( ABS( X( J1 ) )+ABS( X( N+J1 ) ),
+     $                 ABS( X( J2 ) )+ABS( X( N+J2 ) ) )
+                  IF( XMAX.GT.ONE ) THEN
+                     REC = ONE / XMAX
+                     IF( MAX( WORK( J1 ), WORK( J2 ) ).GT.
+     $                   ( BIGNUM-XJ ) / XMAX ) THEN
+                        CALL SSCAL( N2, REC, X, 1 )
+                        SCALE = SCALE*REC
+                        XMAX = XMAX*REC
+                     END IF
+                  END IF
+*
+                  D( 1, 1 ) = X( J1 ) - SDOT( J1-1, T( 1, J1 ), 1, X,
+     $                        1 )
+                  D( 2, 1 ) = X( J2 ) - SDOT( J1-1, T( 1, J2 ), 1, X,
+     $                        1 )
+                  D( 1, 2 ) = X( N+J1 ) - SDOT( J1-1, T( 1, J1 ), 1,
+     $                        X( N+1 ), 1 )
+                  D( 2, 2 ) = X( N+J2 ) - SDOT( J1-1, T( 1, J2 ), 1,
+     $                        X( N+1 ), 1 )
+                  D( 1, 1 ) = D( 1, 1 ) - B( J1 )*X( N+1 )
+                  D( 2, 1 ) = D( 2, 1 ) - B( J2 )*X( N+1 )
+                  D( 1, 2 ) = D( 1, 2 ) + B( J1 )*X( 1 )
+                  D( 2, 2 ) = D( 2, 2 ) + B( J2 )*X( 1 )
+*
+                  CALL SLALN2( .TRUE., 2, 2, SMINW, ONE, T( J1, J1 ),
+     $                         LDT, ONE, ONE, D, 2, ZERO, W, V, 2,
+     $                         SCALOC, XNORM, IERR )
+                  IF( IERR.NE.0 )
+     $               INFO = 2
+*
+                  IF( SCALOC.NE.ONE ) THEN
+                     CALL SSCAL( N2, SCALOC, X, 1 )
+                     SCALE = SCALOC*SCALE
+                  END IF
+                  X( J1 ) = V( 1, 1 )
+                  X( J2 ) = V( 2, 1 )
+                  X( N+J1 ) = V( 1, 2 )
+                  X( N+J2 ) = V( 2, 2 )
+                  XMAX = MAX( ABS( X( J1 ) )+ABS( X( N+J1 ) ),
+     $                   ABS( X( J2 ) )+ABS( X( N+J2 ) ), XMAX )
+*
+               END IF
+*
+   80       CONTINUE
+*
+         END IF
+*
+      END IF
+*
+      RETURN
+*
+*     End of SLAQTR
+*
+      END
diff --git a/costa/native/external/lapack/slar1v.f b/costa/native/external/lapack/slar1v.f
new file mode 100644
index 000000000..9838098bd
--- /dev/null
+++ b/costa/native/external/lapack/slar1v.f
@@ -0,0 +1,323 @@
+      SUBROUTINE SLAR1V( N, B1, BN, SIGMA, D, L, LD, LLD, GERSCH, Z,
+     $                   ZTZ, MINGMA, R, ISUPPZ, WORK )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            B1, BN, N, R
+      REAL               MINGMA, SIGMA, ZTZ
+*     ..
+*     .. Array Arguments ..
+      INTEGER            ISUPPZ( * )
+      REAL               D( * ), GERSCH( * ), L( * ), LD( * ), LLD( * ),
+     $                   WORK( * ), Z( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLAR1V computes the (scaled) r-th column of the inverse of
+*  the sumbmatrix in rows B1 through BN of the tridiagonal matrix
+*  L D L^T - sigma I. The following steps accomplish this computation :
+*  (a) Stationary qd transform,  L D L^T - sigma I = L(+) D(+) L(+)^T,
+*  (b) Progressive qd transform, L D L^T - sigma I = U(-) D(-) U(-)^T,
+*  (c) Computation of the diagonal elements of the inverse of
+*      L D L^T - sigma I by combining the above transforms, and choosing
+*      r as the index where the diagonal of the inverse is (one of the)
+*      largest in magnitude.
+*  (d) Computation of the (scaled) r-th column of the inverse using the
+*      twisted factorization obtained by combining the top part of the
+*      the stationary and the bottom part of the progressive transform.
+*
+*  Arguments
+*  =========
+*
+*  N        (input) INTEGER
+*           The order of the matrix L D L^T.
+*
+*  B1       (input) INTEGER
+*           First index of the submatrix of L D L^T.
+*
+*  BN       (input) INTEGER
+*           Last index of the submatrix of L D L^T.
+*
+*  SIGMA    (input) REAL
+*           The shift. Initially, when R = 0, SIGMA should be a good
+*           approximation to an eigenvalue of L D L^T.
+*
+*  L        (input) REAL array, dimension (N-1)
+*           The (n-1) subdiagonal elements of the unit bidiagonal matrix
+*           L, in elements 1 to N-1.
+*
+*  D        (input) REAL array, dimension (N)
+*           The n diagonal elements of the diagonal matrix D.
+*
+*  LD       (input) REAL array, dimension (N-1)
+*           The n-1 elements L(i)*D(i).
+*
+*  LLD      (input) REAL array, dimension (N-1)
+*           The n-1 elements L(i)*L(i)*D(i).
+*
+*  GERSCH   (input) REAL array, dimension (2*N)
+*           The n Gerschgorin intervals. These are used to restrict
+*           the initial search for R, when R is input as 0.
+*
+*  Z        (output) REAL array, dimension (N)
+*           The (scaled) r-th column of the inverse. Z(R) is returned
+*           to be 1.
+*
+*  ZTZ      (output) REAL
+*           The square of the norm of Z.
+*
+*  MINGMA   (output) REAL
+*           The reciprocal of the largest (in magnitude) diagonal
+*           element of the inverse of L D L^T - sigma I.
+*
+*  R        (input/output) INTEGER
+*           Initially, R should be input to be 0 and is then output as
+*           the index where the diagonal element of the inverse is
+*           largest in magnitude. In later iterations, this same value
+*           of R should be input.
+*
+*  ISUPPZ   (output) INTEGER array, dimension (2)
+*           The support of the vector in Z, i.e., the vector Z is
+*           nonzero only in elements ISUPPZ(1) through ISUPPZ( 2 ).
+*
+*  WORK     (workspace) REAL array, dimension (4*N)
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Inderjit Dhillon, IBM Almaden, USA
+*     Osni Marques, LBNL/NERSC, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            BLKSIZ
+      PARAMETER          ( BLKSIZ = 32 )
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E0, ONE = 1.0E0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            SAWNAN
+      INTEGER            FROM, I, INDP, INDS, INDUMN, J, R1, R2, TO
+      REAL               DMINUS, DPLUS, EPS, S, TMP
+*     ..
+*     .. External Functions ..
+      REAL               SLAMCH
+      EXTERNAL           SLAMCH
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+      EPS = SLAMCH( 'Precision' )
+      IF( R.EQ.0 ) THEN
+*
+*        Eliminate the top and bottom indices from the possible values
+*        of R where the desired eigenvector is largest in magnitude.
+*
+         R1 = B1
+         DO 10 I = B1, BN
+            IF( SIGMA.GE.GERSCH( 2*I-1 ) .OR. SIGMA.LE.GERSCH( 2*I ) )
+     $           THEN
+               R1 = I
+               GO TO 20
+            END IF
+   10    CONTINUE
+   20    CONTINUE
+         R2 = BN
+         DO 30 I = BN, B1, -1
+            IF( SIGMA.GE.GERSCH( 2*I-1 ) .OR. SIGMA.LE.GERSCH( 2*I ) )
+     $           THEN
+               R2 = I
+               GO TO 40
+            END IF
+   30    CONTINUE
+   40    CONTINUE
+      ELSE
+         R1 = R
+         R2 = R
+      END IF
+*
+      INDUMN = N
+      INDS = 2*N + 1
+      INDP = 3*N + 1
+      SAWNAN = .FALSE.
+*
+*     Compute the stationary transform (using the differential form)
+*     untill the index R2
+*
+      IF( B1.EQ.1 ) THEN
+         WORK( INDS ) = ZERO
+      ELSE
+         WORK( INDS ) = LLD( B1-1 )
+      END IF
+      S = WORK( INDS ) - SIGMA
+      DO 50 I = B1, R2 - 1
+         DPLUS = D( I ) + S
+         WORK( I ) = LD( I ) / DPLUS
+         WORK( INDS+I ) = S*WORK( I )*L( I )
+         S = WORK( INDS+I ) - SIGMA
+   50 CONTINUE
+*
+      IF( .NOT.( S.GT.ZERO .OR. S.LT.ONE ) ) THEN
+*
+*        Run a slower version of the above loop if a NaN is detected
+*
+         SAWNAN = .TRUE.
+         J = B1 + 1
+   60    CONTINUE
+         IF( WORK( INDS+J ).GT.ZERO .OR. WORK( INDS+J ).LT.ONE ) THEN
+            J = J + 1
+            GO TO 60
+         END IF
+         WORK( INDS+J ) = LLD( J )
+         S = WORK( INDS+J ) - SIGMA
+         DO 70 I = J + 1, R2 - 1
+            DPLUS = D( I ) + S
+            WORK( I ) = LD( I ) / DPLUS
+            IF( WORK( I ).EQ.ZERO ) THEN
+               WORK( INDS+I ) = LLD( I )
+            ELSE
+               WORK( INDS+I ) = S*WORK( I )*L( I )
+            END IF
+            S = WORK( INDS+I ) - SIGMA
+   70    CONTINUE
+      END IF
+      WORK( INDP+BN-1 ) = D( BN ) - SIGMA
+      DO 80 I = BN - 1, R1, -1
+         DMINUS = LLD( I ) + WORK( INDP+I )
+         TMP = D( I ) / DMINUS
+         WORK( INDUMN+I ) = L( I )*TMP
+         WORK( INDP+I-1 ) = WORK( INDP+I )*TMP - SIGMA
+   80 CONTINUE
+      TMP = WORK( INDP+R1-1 )
+      IF( .NOT.( TMP.GT.ZERO .OR. TMP.LT.ONE ) ) THEN
+*
+*        Run a slower version of the above loop if a NaN is detected
+*
+         SAWNAN = .TRUE.
+         J = BN - 3
+   90    CONTINUE
+         IF( WORK( INDP+J ).GT.ZERO .OR. WORK( INDP+J ).LT.ONE ) THEN
+            J = J - 1
+            GO TO 90
+         END IF
+         WORK( INDP+J ) = D( J+1 ) - SIGMA
+         DO 100 I = J, R1, -1
+            DMINUS = LLD( I ) + WORK( INDP+I )
+            TMP = D( I ) / DMINUS
+            WORK( INDUMN+I ) = L( I )*TMP
+            IF( TMP.EQ.ZERO ) THEN
+               WORK( INDP+I-1 ) = D( I ) - SIGMA
+            ELSE
+               WORK( INDP+I-1 ) = WORK( INDP+I )*TMP - SIGMA
+            END IF
+  100    CONTINUE
+      END IF
+*
+*     Find the index (from R1 to R2) of the largest (in magnitude)
+*     diagonal element of the inverse
+*
+      MINGMA = WORK( INDS+R1-1 ) + WORK( INDP+R1-1 )
+      IF( MINGMA.EQ.ZERO )
+     $   MINGMA = EPS*WORK( INDS+R1-1 )
+      R = R1
+      DO 110 I = R1, R2 - 1
+         TMP = WORK( INDS+I ) + WORK( INDP+I )
+         IF( TMP.EQ.ZERO )
+     $      TMP = EPS*WORK( INDS+I )
+         IF( ABS( TMP ).LT.ABS( MINGMA ) ) THEN
+            MINGMA = TMP
+            R = I + 1
+         END IF
+  110 CONTINUE
+*
+*     Compute the (scaled) r-th column of the inverse
+*
+      ISUPPZ( 1 ) = B1
+      ISUPPZ( 2 ) = BN
+      Z( R ) = ONE
+      ZTZ = ONE
+      IF( .NOT.SAWNAN ) THEN
+         FROM = R - 1
+         TO = MAX( R-BLKSIZ, B1 )
+  120    CONTINUE
+         IF( FROM.GE.B1 ) THEN
+            DO 130 I = FROM, TO, -1
+               Z( I ) = -( WORK( I )*Z( I+1 ) )
+               ZTZ = ZTZ + Z( I )*Z( I )
+  130       CONTINUE
+            IF( ABS( Z( TO ) ).LE.EPS .AND. ABS( Z( TO+1 ) ).LE.EPS )
+     $           THEN
+               ISUPPZ( 1 ) = TO + 2
+            ELSE
+               FROM = TO - 1
+               TO = MAX( TO-BLKSIZ, B1 )
+               GO TO 120
+            END IF
+         END IF
+         FROM = R + 1
+         TO = MIN( R+BLKSIZ, BN )
+  140    CONTINUE
+         IF( FROM.LE.BN ) THEN
+            DO 150 I = FROM, TO
+               Z( I ) = -( WORK( INDUMN+I-1 )*Z( I-1 ) )
+               ZTZ = ZTZ + Z( I )*Z( I )
+  150       CONTINUE
+            IF( ABS( Z( TO ) ).LE.EPS .AND. ABS( Z( TO-1 ) ).LE.EPS )
+     $           THEN
+               ISUPPZ( 2 ) = TO - 2
+            ELSE
+               FROM = TO + 1
+               TO = MIN( TO+BLKSIZ, BN )
+               GO TO 140
+            END IF
+         END IF
+      ELSE
+         DO 160 I = R - 1, B1, -1
+            IF( Z( I+1 ).EQ.ZERO ) THEN
+               Z( I ) = -( LD( I+1 ) / LD( I ) )*Z( I+2 )
+            ELSE IF( ABS( Z( I+1 ) ).LE.EPS .AND. ABS( Z( I+2 ) ).LE.
+     $               EPS ) THEN
+               ISUPPZ( 1 ) = I + 3
+               GO TO 170
+            ELSE
+               Z( I ) = -( WORK( I )*Z( I+1 ) )
+            END IF
+            ZTZ = ZTZ + Z( I )*Z( I )
+  160    CONTINUE
+  170    CONTINUE
+         DO 180 I = R, BN - 1
+            IF( Z( I ).EQ.ZERO ) THEN
+               Z( I+1 ) = -( LD( I-1 ) / LD( I ) )*Z( I-1 )
+            ELSE IF( ABS( Z( I ) ).LE.EPS .AND. ABS( Z( I-1 ) ).LE.EPS )
+     $                THEN
+               ISUPPZ( 2 ) = I - 2
+               GO TO 190
+            ELSE
+               Z( I+1 ) = -( WORK( INDUMN+I )*Z( I ) )
+            END IF
+            ZTZ = ZTZ + Z( I+1 )*Z( I+1 )
+  180    CONTINUE
+  190    CONTINUE
+      END IF
+      DO 200 I = B1, ISUPPZ( 1 ) - 3
+         Z( I ) = ZERO
+  200 CONTINUE
+      DO 210 I = ISUPPZ( 2 ) + 3, BN
+         Z( I ) = ZERO
+  210 CONTINUE
+*
+      RETURN
+*
+*     End of SLAR1V
+*
+      END
diff --git a/costa/native/external/lapack/slar2v.f b/costa/native/external/lapack/slar2v.f
new file mode 100644
index 000000000..ec9248d5f
--- /dev/null
+++ b/costa/native/external/lapack/slar2v.f
@@ -0,0 +1,87 @@
+      SUBROUTINE SLAR2V( N, X, Y, Z, INCX, C, S, INCC )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      INTEGER            INCC, INCX, N
+*     ..
+*     .. Array Arguments ..
+      REAL               C( * ), S( * ), X( * ), Y( * ), Z( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLAR2V applies a vector of real plane rotations from both sides to
+*  a sequence of 2-by-2 real symmetric matrices, defined by the elements
+*  of the vectors x, y and z. For i = 1,2,...,n
+*
+*     ( x(i)  z(i) ) := (  c(i)  s(i) ) ( x(i)  z(i) ) ( c(i) -s(i) )
+*     ( z(i)  y(i) )    ( -s(i)  c(i) ) ( z(i)  y(i) ) ( s(i)  c(i) )
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The number of plane rotations to be applied.
+*
+*  X       (input/output) REAL array,
+*                         dimension (1+(N-1)*INCX)
+*          The vector x.
+*
+*  Y       (input/output) REAL array,
+*                         dimension (1+(N-1)*INCX)
+*          The vector y.
+*
+*  Z       (input/output) REAL array,
+*                         dimension (1+(N-1)*INCX)
+*          The vector z.
+*
+*  INCX    (input) INTEGER
+*          The increment between elements of X, Y and Z. INCX > 0.
+*
+*  C       (input) REAL array, dimension (1+(N-1)*INCC)
+*          The cosines of the plane rotations.
+*
+*  S       (input) REAL array, dimension (1+(N-1)*INCC)
+*          The sines of the plane rotations.
+*
+*  INCC    (input) INTEGER
+*          The increment between elements of C and S. INCC > 0.
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      INTEGER            I, IC, IX
+      REAL               CI, SI, T1, T2, T3, T4, T5, T6, XI, YI, ZI
+*     ..
+*     .. Executable Statements ..
+*
+      IX = 1
+      IC = 1
+      DO 10 I = 1, N
+         XI = X( IX )
+         YI = Y( IX )
+         ZI = Z( IX )
+         CI = C( IC )
+         SI = S( IC )
+         T1 = SI*ZI
+         T2 = CI*ZI
+         T3 = T2 - SI*XI
+         T4 = T2 + SI*YI
+         T5 = CI*XI + T1
+         T6 = CI*YI - T1
+         X( IX ) = CI*T5 + SI*T4
+         Y( IX ) = CI*T6 - SI*T3
+         Z( IX ) = CI*T4 - SI*T5
+         IX = IX + INCX
+         IC = IC + INCC
+   10 CONTINUE
+*
+*     End of SLAR2V
+*
+      RETURN
+      END
diff --git a/costa/native/external/lapack/slarf.f b/costa/native/external/lapack/slarf.f
new file mode 100644
index 000000000..a5624ce09
--- /dev/null
+++ b/costa/native/external/lapack/slarf.f
@@ -0,0 +1,116 @@
+      SUBROUTINE SLARF( SIDE, M, N, V, INCV, TAU, C, LDC, WORK )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          SIDE
+      INTEGER            INCV, LDC, M, N
+      REAL               TAU
+*     ..
+*     .. Array Arguments ..
+      REAL               C( LDC, * ), V( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLARF applies a real elementary reflector H to a real m by n matrix
+*  C, from either the left or the right. H is represented in the form
+*
+*        H = I - tau * v * v'
+*
+*  where tau is a real scalar and v is a real vector.
+*
+*  If tau = 0, then H is taken to be the unit matrix.
+*
+*  Arguments
+*  =========
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'L': form  H * C
+*          = 'R': form  C * H
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix C.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix C.
+*
+*  V       (input) REAL array, dimension
+*                     (1 + (M-1)*abs(INCV)) if SIDE = 'L'
+*                  or (1 + (N-1)*abs(INCV)) if SIDE = 'R'
+*          The vector v in the representation of H. V is not used if
+*          TAU = 0.
+*
+*  INCV    (input) INTEGER
+*          The increment between elements of v. INCV <> 0.
+*
+*  TAU     (input) REAL
+*          The value tau in the representation of H.
+*
+*  C       (input/output) REAL array, dimension (LDC,N)
+*          On entry, the m by n matrix C.
+*          On exit, C is overwritten by the matrix H * C if SIDE = 'L',
+*          or C * H if SIDE = 'R'.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C. LDC >= max(1,M).
+*
+*  WORK    (workspace) REAL array, dimension
+*                         (N) if SIDE = 'L'
+*                      or (M) if SIDE = 'R'
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SGEMV, SGER
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. Executable Statements ..
+*
+      IF( LSAME( SIDE, 'L' ) ) THEN
+*
+*        Form  H * C
+*
+         IF( TAU.NE.ZERO ) THEN
+*
+*           w := C' * v
+*
+            CALL SGEMV( 'Transpose', M, N, ONE, C, LDC, V, INCV, ZERO,
+     $                  WORK, 1 )
+*
+*           C := C - v * w'
+*
+            CALL SGER( M, N, -TAU, V, INCV, WORK, 1, C, LDC )
+         END IF
+      ELSE
+*
+*        Form  C * H
+*
+         IF( TAU.NE.ZERO ) THEN
+*
+*           w := C * v
+*
+            CALL SGEMV( 'No transpose', M, N, ONE, C, LDC, V, INCV,
+     $                  ZERO, WORK, 1 )
+*
+*           C := C - w * v'
+*
+            CALL SGER( M, N, -TAU, WORK, 1, V, INCV, C, LDC )
+         END IF
+      END IF
+      RETURN
+*
+*     End of SLARF
+*
+      END
diff --git a/costa/native/external/lapack/slarfb.f b/costa/native/external/lapack/slarfb.f
new file mode 100644
index 000000000..7cc29efa5
--- /dev/null
+++ b/costa/native/external/lapack/slarfb.f
@@ -0,0 +1,588 @@
+      SUBROUTINE SLARFB( SIDE, TRANS, DIRECT, STOREV, M, N, K, V, LDV,
+     $                   T, LDT, C, LDC, WORK, LDWORK )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIRECT, SIDE, STOREV, TRANS
+      INTEGER            K, LDC, LDT, LDV, LDWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      REAL               C( LDC, * ), T( LDT, * ), V( LDV, * ),
+     $                   WORK( LDWORK, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLARFB applies a real block reflector H or its transpose H' to a
+*  real m by n matrix C, from either the left or the right.
+*
+*  Arguments
+*  =========
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'L': apply H or H' from the Left
+*          = 'R': apply H or H' from the Right
+*
+*  TRANS   (input) CHARACTER*1
+*          = 'N': apply H (No transpose)
+*          = 'T': apply H' (Transpose)
+*
+*  DIRECT  (input) CHARACTER*1
+*          Indicates how H is formed from a product of elementary
+*          reflectors
+*          = 'F': H = H(1) H(2) . . . H(k) (Forward)
+*          = 'B': H = H(k) . . . H(2) H(1) (Backward)
+*
+*  STOREV  (input) CHARACTER*1
+*          Indicates how the vectors which define the elementary
+*          reflectors are stored:
+*          = 'C': Columnwise
+*          = 'R': Rowwise
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix C.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix C.
+*
+*  K       (input) INTEGER
+*          The order of the matrix T (= the number of elementary
+*          reflectors whose product defines the block reflector).
+*
+*  V       (input) REAL array, dimension
+*                                (LDV,K) if STOREV = 'C'
+*                                (LDV,M) if STOREV = 'R' and SIDE = 'L'
+*                                (LDV,N) if STOREV = 'R' and SIDE = 'R'
+*          The matrix V. See further details.
+*
+*  LDV     (input) INTEGER
+*          The leading dimension of the array V.
+*          If STOREV = 'C' and SIDE = 'L', LDV >= max(1,M);
+*          if STOREV = 'C' and SIDE = 'R', LDV >= max(1,N);
+*          if STOREV = 'R', LDV >= K.
+*
+*  T       (input) REAL array, dimension (LDT,K)
+*          The triangular k by k matrix T in the representation of the
+*          block reflector.
+*
+*  LDT     (input) INTEGER
+*          The leading dimension of the array T. LDT >= K.
+*
+*  C       (input/output) REAL array, dimension (LDC,N)
+*          On entry, the m by n matrix C.
+*          On exit, C is overwritten by H*C or H'*C or C*H or C*H'.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C. LDA >= max(1,M).
+*
+*  WORK    (workspace) REAL array, dimension (LDWORK,K)
+*
+*  LDWORK  (input) INTEGER
+*          The leading dimension of the array WORK.
+*          If SIDE = 'L', LDWORK >= max(1,N);
+*          if SIDE = 'R', LDWORK >= max(1,M).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE
+      PARAMETER          ( ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      CHARACTER          TRANST
+      INTEGER            I, J
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SCOPY, SGEMM, STRMM
+*     ..
+*     .. Executable Statements ..
+*
+*     Quick return if possible
+*
+      IF( M.LE.0 .OR. N.LE.0 )
+     $   RETURN
+*
+      IF( LSAME( TRANS, 'N' ) ) THEN
+         TRANST = 'T'
+      ELSE
+         TRANST = 'N'
+      END IF
+*
+      IF( LSAME( STOREV, 'C' ) ) THEN
+*
+         IF( LSAME( DIRECT, 'F' ) ) THEN
+*
+*           Let  V =  ( V1 )    (first K rows)
+*                     ( V2 )
+*           where  V1  is unit lower triangular.
+*
+            IF( LSAME( SIDE, 'L' ) ) THEN
+*
+*              Form  H * C  or  H' * C  where  C = ( C1 )
+*                                                  ( C2 )
+*
+*              W := C' * V  =  (C1'*V1 + C2'*V2)  (stored in WORK)
+*
+*              W := C1'
+*
+               DO 10 J = 1, K
+                  CALL SCOPY( N, C( J, 1 ), LDC, WORK( 1, J ), 1 )
+   10          CONTINUE
+*
+*              W := W * V1
+*
+               CALL STRMM( 'Right', 'Lower', 'No transpose', 'Unit', N,
+     $                     K, ONE, V, LDV, WORK, LDWORK )
+               IF( M.GT.K ) THEN
+*
+*                 W := W + C2'*V2
+*
+                  CALL SGEMM( 'Transpose', 'No transpose', N, K, M-K,
+     $                        ONE, C( K+1, 1 ), LDC, V( K+1, 1 ), LDV,
+     $                        ONE, WORK, LDWORK )
+               END IF
+*
+*              W := W * T'  or  W * T
+*
+               CALL STRMM( 'Right', 'Upper', TRANST, 'Non-unit', N, K,
+     $                     ONE, T, LDT, WORK, LDWORK )
+*
+*              C := C - V * W'
+*
+               IF( M.GT.K ) THEN
+*
+*                 C2 := C2 - V2 * W'
+*
+                  CALL SGEMM( 'No transpose', 'Transpose', M-K, N, K,
+     $                        -ONE, V( K+1, 1 ), LDV, WORK, LDWORK, ONE,
+     $                        C( K+1, 1 ), LDC )
+               END IF
+*
+*              W := W * V1'
+*
+               CALL STRMM( 'Right', 'Lower', 'Transpose', 'Unit', N, K,
+     $                     ONE, V, LDV, WORK, LDWORK )
+*
+*              C1 := C1 - W'
+*
+               DO 30 J = 1, K
+                  DO 20 I = 1, N
+                     C( J, I ) = C( J, I ) - WORK( I, J )
+   20             CONTINUE
+   30          CONTINUE
+*
+            ELSE IF( LSAME( SIDE, 'R' ) ) THEN
+*
+*              Form  C * H  or  C * H'  where  C = ( C1  C2 )
+*
+*              W := C * V  =  (C1*V1 + C2*V2)  (stored in WORK)
+*
+*              W := C1
+*
+               DO 40 J = 1, K
+                  CALL SCOPY( M, C( 1, J ), 1, WORK( 1, J ), 1 )
+   40          CONTINUE
+*
+*              W := W * V1
+*
+               CALL STRMM( 'Right', 'Lower', 'No transpose', 'Unit', M,
+     $                     K, ONE, V, LDV, WORK, LDWORK )
+               IF( N.GT.K ) THEN
+*
+*                 W := W + C2 * V2
+*
+                  CALL SGEMM( 'No transpose', 'No transpose', M, K, N-K,
+     $                        ONE, C( 1, K+1 ), LDC, V( K+1, 1 ), LDV,
+     $                        ONE, WORK, LDWORK )
+               END IF
+*
+*              W := W * T  or  W * T'
+*
+               CALL STRMM( 'Right', 'Upper', TRANS, 'Non-unit', M, K,
+     $                     ONE, T, LDT, WORK, LDWORK )
+*
+*              C := C - W * V'
+*
+               IF( N.GT.K ) THEN
+*
+*                 C2 := C2 - W * V2'
+*
+                  CALL SGEMM( 'No transpose', 'Transpose', M, N-K, K,
+     $                        -ONE, WORK, LDWORK, V( K+1, 1 ), LDV, ONE,
+     $                        C( 1, K+1 ), LDC )
+               END IF
+*
+*              W := W * V1'
+*
+               CALL STRMM( 'Right', 'Lower', 'Transpose', 'Unit', M, K,
+     $                     ONE, V, LDV, WORK, LDWORK )
+*
+*              C1 := C1 - W
+*
+               DO 60 J = 1, K
+                  DO 50 I = 1, M
+                     C( I, J ) = C( I, J ) - WORK( I, J )
+   50             CONTINUE
+   60          CONTINUE
+            END IF
+*
+         ELSE
+*
+*           Let  V =  ( V1 )
+*                     ( V2 )    (last K rows)
+*           where  V2  is unit upper triangular.
+*
+            IF( LSAME( SIDE, 'L' ) ) THEN
+*
+*              Form  H * C  or  H' * C  where  C = ( C1 )
+*                                                  ( C2 )
+*
+*              W := C' * V  =  (C1'*V1 + C2'*V2)  (stored in WORK)
+*
+*              W := C2'
+*
+               DO 70 J = 1, K
+                  CALL SCOPY( N, C( M-K+J, 1 ), LDC, WORK( 1, J ), 1 )
+   70          CONTINUE
+*
+*              W := W * V2
+*
+               CALL STRMM( 'Right', 'Upper', 'No transpose', 'Unit', N,
+     $                     K, ONE, V( M-K+1, 1 ), LDV, WORK, LDWORK )
+               IF( M.GT.K ) THEN
+*
+*                 W := W + C1'*V1
+*
+                  CALL SGEMM( 'Transpose', 'No transpose', N, K, M-K,
+     $                        ONE, C, LDC, V, LDV, ONE, WORK, LDWORK )
+               END IF
+*
+*              W := W * T'  or  W * T
+*
+               CALL STRMM( 'Right', 'Lower', TRANST, 'Non-unit', N, K,
+     $                     ONE, T, LDT, WORK, LDWORK )
+*
+*              C := C - V * W'
+*
+               IF( M.GT.K ) THEN
+*
+*                 C1 := C1 - V1 * W'
+*
+                  CALL SGEMM( 'No transpose', 'Transpose', M-K, N, K,
+     $                        -ONE, V, LDV, WORK, LDWORK, ONE, C, LDC )
+               END IF
+*
+*              W := W * V2'
+*
+               CALL STRMM( 'Right', 'Upper', 'Transpose', 'Unit', N, K,
+     $                     ONE, V( M-K+1, 1 ), LDV, WORK, LDWORK )
+*
+*              C2 := C2 - W'
+*
+               DO 90 J = 1, K
+                  DO 80 I = 1, N
+                     C( M-K+J, I ) = C( M-K+J, I ) - WORK( I, J )
+   80             CONTINUE
+   90          CONTINUE
+*
+            ELSE IF( LSAME( SIDE, 'R' ) ) THEN
+*
+*              Form  C * H  or  C * H'  where  C = ( C1  C2 )
+*
+*              W := C * V  =  (C1*V1 + C2*V2)  (stored in WORK)
+*
+*              W := C2
+*
+               DO 100 J = 1, K
+                  CALL SCOPY( M, C( 1, N-K+J ), 1, WORK( 1, J ), 1 )
+  100          CONTINUE
+*
+*              W := W * V2
+*
+               CALL STRMM( 'Right', 'Upper', 'No transpose', 'Unit', M,
+     $                     K, ONE, V( N-K+1, 1 ), LDV, WORK, LDWORK )
+               IF( N.GT.K ) THEN
+*
+*                 W := W + C1 * V1
+*
+                  CALL SGEMM( 'No transpose', 'No transpose', M, K, N-K,
+     $                        ONE, C, LDC, V, LDV, ONE, WORK, LDWORK )
+               END IF
+*
+*              W := W * T  or  W * T'
+*
+               CALL STRMM( 'Right', 'Lower', TRANS, 'Non-unit', M, K,
+     $                     ONE, T, LDT, WORK, LDWORK )
+*
+*              C := C - W * V'
+*
+               IF( N.GT.K ) THEN
+*
+*                 C1 := C1 - W * V1'
+*
+                  CALL SGEMM( 'No transpose', 'Transpose', M, N-K, K,
+     $                        -ONE, WORK, LDWORK, V, LDV, ONE, C, LDC )
+               END IF
+*
+*              W := W * V2'
+*
+               CALL STRMM( 'Right', 'Upper', 'Transpose', 'Unit', M, K,
+     $                     ONE, V( N-K+1, 1 ), LDV, WORK, LDWORK )
+*
+*              C2 := C2 - W
+*
+               DO 120 J = 1, K
+                  DO 110 I = 1, M
+                     C( I, N-K+J ) = C( I, N-K+J ) - WORK( I, J )
+  110             CONTINUE
+  120          CONTINUE
+            END IF
+         END IF
+*
+      ELSE IF( LSAME( STOREV, 'R' ) ) THEN
+*
+         IF( LSAME( DIRECT, 'F' ) ) THEN
+*
+*           Let  V =  ( V1  V2 )    (V1: first K columns)
+*           where  V1  is unit upper triangular.
+*
+            IF( LSAME( SIDE, 'L' ) ) THEN
+*
+*              Form  H * C  or  H' * C  where  C = ( C1 )
+*                                                  ( C2 )
+*
+*              W := C' * V'  =  (C1'*V1' + C2'*V2') (stored in WORK)
+*
+*              W := C1'
+*
+               DO 130 J = 1, K
+                  CALL SCOPY( N, C( J, 1 ), LDC, WORK( 1, J ), 1 )
+  130          CONTINUE
+*
+*              W := W * V1'
+*
+               CALL STRMM( 'Right', 'Upper', 'Transpose', 'Unit', N, K,
+     $                     ONE, V, LDV, WORK, LDWORK )
+               IF( M.GT.K ) THEN
+*
+*                 W := W + C2'*V2'
+*
+                  CALL SGEMM( 'Transpose', 'Transpose', N, K, M-K, ONE,
+     $                        C( K+1, 1 ), LDC, V( 1, K+1 ), LDV, ONE,
+     $                        WORK, LDWORK )
+               END IF
+*
+*              W := W * T'  or  W * T
+*
+               CALL STRMM( 'Right', 'Upper', TRANST, 'Non-unit', N, K,
+     $                     ONE, T, LDT, WORK, LDWORK )
+*
+*              C := C - V' * W'
+*
+               IF( M.GT.K ) THEN
+*
+*                 C2 := C2 - V2' * W'
+*
+                  CALL SGEMM( 'Transpose', 'Transpose', M-K, N, K, -ONE,
+     $                        V( 1, K+1 ), LDV, WORK, LDWORK, ONE,
+     $                        C( K+1, 1 ), LDC )
+               END IF
+*
+*              W := W * V1
+*
+               CALL STRMM( 'Right', 'Upper', 'No transpose', 'Unit', N,
+     $                     K, ONE, V, LDV, WORK, LDWORK )
+*
+*              C1 := C1 - W'
+*
+               DO 150 J = 1, K
+                  DO 140 I = 1, N
+                     C( J, I ) = C( J, I ) - WORK( I, J )
+  140             CONTINUE
+  150          CONTINUE
+*
+            ELSE IF( LSAME( SIDE, 'R' ) ) THEN
+*
+*              Form  C * H  or  C * H'  where  C = ( C1  C2 )
+*
+*              W := C * V'  =  (C1*V1' + C2*V2')  (stored in WORK)
+*
+*              W := C1
+*
+               DO 160 J = 1, K
+                  CALL SCOPY( M, C( 1, J ), 1, WORK( 1, J ), 1 )
+  160          CONTINUE
+*
+*              W := W * V1'
+*
+               CALL STRMM( 'Right', 'Upper', 'Transpose', 'Unit', M, K,
+     $                     ONE, V, LDV, WORK, LDWORK )
+               IF( N.GT.K ) THEN
+*
+*                 W := W + C2 * V2'
+*
+                  CALL SGEMM( 'No transpose', 'Transpose', M, K, N-K,
+     $                        ONE, C( 1, K+1 ), LDC, V( 1, K+1 ), LDV,
+     $                        ONE, WORK, LDWORK )
+               END IF
+*
+*              W := W * T  or  W * T'
+*
+               CALL STRMM( 'Right', 'Upper', TRANS, 'Non-unit', M, K,
+     $                     ONE, T, LDT, WORK, LDWORK )
+*
+*              C := C - W * V
+*
+               IF( N.GT.K ) THEN
+*
+*                 C2 := C2 - W * V2
+*
+                  CALL SGEMM( 'No transpose', 'No transpose', M, N-K, K,
+     $                        -ONE, WORK, LDWORK, V( 1, K+1 ), LDV, ONE,
+     $                        C( 1, K+1 ), LDC )
+               END IF
+*
+*              W := W * V1
+*
+               CALL STRMM( 'Right', 'Upper', 'No transpose', 'Unit', M,
+     $                     K, ONE, V, LDV, WORK, LDWORK )
+*
+*              C1 := C1 - W
+*
+               DO 180 J = 1, K
+                  DO 170 I = 1, M
+                     C( I, J ) = C( I, J ) - WORK( I, J )
+  170             CONTINUE
+  180          CONTINUE
+*
+            END IF
+*
+         ELSE
+*
+*           Let  V =  ( V1  V2 )    (V2: last K columns)
+*           where  V2  is unit lower triangular.
+*
+            IF( LSAME( SIDE, 'L' ) ) THEN
+*
+*              Form  H * C  or  H' * C  where  C = ( C1 )
+*                                                  ( C2 )
+*
+*              W := C' * V'  =  (C1'*V1' + C2'*V2') (stored in WORK)
+*
+*              W := C2'
+*
+               DO 190 J = 1, K
+                  CALL SCOPY( N, C( M-K+J, 1 ), LDC, WORK( 1, J ), 1 )
+  190          CONTINUE
+*
+*              W := W * V2'
+*
+               CALL STRMM( 'Right', 'Lower', 'Transpose', 'Unit', N, K,
+     $                     ONE, V( 1, M-K+1 ), LDV, WORK, LDWORK )
+               IF( M.GT.K ) THEN
+*
+*                 W := W + C1'*V1'
+*
+                  CALL SGEMM( 'Transpose', 'Transpose', N, K, M-K, ONE,
+     $                        C, LDC, V, LDV, ONE, WORK, LDWORK )
+               END IF
+*
+*              W := W * T'  or  W * T
+*
+               CALL STRMM( 'Right', 'Lower', TRANST, 'Non-unit', N, K,
+     $                     ONE, T, LDT, WORK, LDWORK )
+*
+*              C := C - V' * W'
+*
+               IF( M.GT.K ) THEN
+*
+*                 C1 := C1 - V1' * W'
+*
+                  CALL SGEMM( 'Transpose', 'Transpose', M-K, N, K, -ONE,
+     $                        V, LDV, WORK, LDWORK, ONE, C, LDC )
+               END IF
+*
+*              W := W * V2
+*
+               CALL STRMM( 'Right', 'Lower', 'No transpose', 'Unit', N,
+     $                     K, ONE, V( 1, M-K+1 ), LDV, WORK, LDWORK )
+*
+*              C2 := C2 - W'
+*
+               DO 210 J = 1, K
+                  DO 200 I = 1, N
+                     C( M-K+J, I ) = C( M-K+J, I ) - WORK( I, J )
+  200             CONTINUE
+  210          CONTINUE
+*
+            ELSE IF( LSAME( SIDE, 'R' ) ) THEN
+*
+*              Form  C * H  or  C * H'  where  C = ( C1  C2 )
+*
+*              W := C * V'  =  (C1*V1' + C2*V2')  (stored in WORK)
+*
+*              W := C2
+*
+               DO 220 J = 1, K
+                  CALL SCOPY( M, C( 1, N-K+J ), 1, WORK( 1, J ), 1 )
+  220          CONTINUE
+*
+*              W := W * V2'
+*
+               CALL STRMM( 'Right', 'Lower', 'Transpose', 'Unit', M, K,
+     $                     ONE, V( 1, N-K+1 ), LDV, WORK, LDWORK )
+               IF( N.GT.K ) THEN
+*
+*                 W := W + C1 * V1'
+*
+                  CALL SGEMM( 'No transpose', 'Transpose', M, K, N-K,
+     $                        ONE, C, LDC, V, LDV, ONE, WORK, LDWORK )
+               END IF
+*
+*              W := W * T  or  W * T'
+*
+               CALL STRMM( 'Right', 'Lower', TRANS, 'Non-unit', M, K,
+     $                     ONE, T, LDT, WORK, LDWORK )
+*
+*              C := C - W * V
+*
+               IF( N.GT.K ) THEN
+*
+*                 C1 := C1 - W * V1
+*
+                  CALL SGEMM( 'No transpose', 'No transpose', M, N-K, K,
+     $                        -ONE, WORK, LDWORK, V, LDV, ONE, C, LDC )
+               END IF
+*
+*              W := W * V2
+*
+               CALL STRMM( 'Right', 'Lower', 'No transpose', 'Unit', M,
+     $                     K, ONE, V( 1, N-K+1 ), LDV, WORK, LDWORK )
+*
+*              C1 := C1 - W
+*
+               DO 240 J = 1, K
+                  DO 230 I = 1, M
+                     C( I, N-K+J ) = C( I, N-K+J ) - WORK( I, J )
+  230             CONTINUE
+  240          CONTINUE
+*
+            END IF
+*
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of SLARFB
+*
+      END
diff --git a/costa/native/external/lapack/slarfg.f b/costa/native/external/lapack/slarfg.f
new file mode 100644
index 000000000..1a7539808
--- /dev/null
+++ b/costa/native/external/lapack/slarfg.f
@@ -0,0 +1,138 @@
+      SUBROUTINE SLARFG( N, ALPHA, X, INCX, TAU )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      INTEGER            INCX, N
+      REAL               ALPHA, TAU
+*     ..
+*     .. Array Arguments ..
+      REAL               X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLARFG generates a real elementary reflector H of order n, such
+*  that
+*
+*        H * ( alpha ) = ( beta ),   H' * H = I.
+*            (   x   )   (   0  )
+*
+*  where alpha and beta are scalars, and x is an (n-1)-element real
+*  vector. H is represented in the form
+*
+*        H = I - tau * ( 1 ) * ( 1 v' ) ,
+*                      ( v )
+*
+*  where tau is a real scalar and v is a real (n-1)-element
+*  vector.
+*
+*  If the elements of x are all zero, then tau = 0 and H is taken to be
+*  the unit matrix.
+*
+*  Otherwise  1 <= tau <= 2.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The order of the elementary reflector.
+*
+*  ALPHA   (input/output) REAL
+*          On entry, the value alpha.
+*          On exit, it is overwritten with the value beta.
+*
+*  X       (input/output) REAL array, dimension
+*                         (1+(N-2)*abs(INCX))
+*          On entry, the vector x.
+*          On exit, it is overwritten with the vector v.
+*
+*  INCX    (input) INTEGER
+*          The increment between elements of X. INCX > 0.
+*
+*  TAU     (output) REAL
+*          The value tau.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            J, KNT
+      REAL               BETA, RSAFMN, SAFMIN, XNORM
+*     ..
+*     .. External Functions ..
+      REAL               SLAMCH, SLAPY2, SNRM2
+      EXTERNAL           SLAMCH, SLAPY2, SNRM2
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, SIGN
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SSCAL
+*     ..
+*     .. Executable Statements ..
+*
+      IF( N.LE.1 ) THEN
+         TAU = ZERO
+         RETURN
+      END IF
+*
+      XNORM = SNRM2( N-1, X, INCX )
+*
+      IF( XNORM.EQ.ZERO ) THEN
+*
+*        H  =  I
+*
+         TAU = ZERO
+      ELSE
+*
+*        general case
+*
+         BETA = -SIGN( SLAPY2( ALPHA, XNORM ), ALPHA )
+         SAFMIN = SLAMCH( 'S' ) / SLAMCH( 'E' )
+         IF( ABS( BETA ).LT.SAFMIN ) THEN
+*
+*           XNORM, BETA may be inaccurate; scale X and recompute them
+*
+            RSAFMN = ONE / SAFMIN
+            KNT = 0
+   10       CONTINUE
+            KNT = KNT + 1
+            CALL SSCAL( N-1, RSAFMN, X, INCX )
+            BETA = BETA*RSAFMN
+            ALPHA = ALPHA*RSAFMN
+            IF( ABS( BETA ).LT.SAFMIN )
+     $         GO TO 10
+*
+*           New BETA is at most 1, at least SAFMIN
+*
+            XNORM = SNRM2( N-1, X, INCX )
+            BETA = -SIGN( SLAPY2( ALPHA, XNORM ), ALPHA )
+            TAU = ( BETA-ALPHA ) / BETA
+            CALL SSCAL( N-1, ONE / ( ALPHA-BETA ), X, INCX )
+*
+*           If ALPHA is subnormal, it may lose relative accuracy
+*
+            ALPHA = BETA
+            DO 20 J = 1, KNT
+               ALPHA = ALPHA*SAFMIN
+   20       CONTINUE
+         ELSE
+            TAU = ( BETA-ALPHA ) / BETA
+            CALL SSCAL( N-1, ONE / ( ALPHA-BETA ), X, INCX )
+            ALPHA = BETA
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of SLARFG
+*
+      END
diff --git a/costa/native/external/lapack/slarft.f b/costa/native/external/lapack/slarft.f
new file mode 100644
index 000000000..c72a0eb94
--- /dev/null
+++ b/costa/native/external/lapack/slarft.f
@@ -0,0 +1,218 @@
+      SUBROUTINE SLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIRECT, STOREV
+      INTEGER            K, LDT, LDV, N
+*     ..
+*     .. Array Arguments ..
+      REAL               T( LDT, * ), TAU( * ), V( LDV, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLARFT forms the triangular factor T of a real block reflector H
+*  of order n, which is defined as a product of k elementary reflectors.
+*
+*  If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular;
+*
+*  If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular.
+*
+*  If STOREV = 'C', the vector which defines the elementary reflector
+*  H(i) is stored in the i-th column of the array V, and
+*
+*     H  =  I - V * T * V'
+*
+*  If STOREV = 'R', the vector which defines the elementary reflector
+*  H(i) is stored in the i-th row of the array V, and
+*
+*     H  =  I - V' * T * V
+*
+*  Arguments
+*  =========
+*
+*  DIRECT  (input) CHARACTER*1
+*          Specifies the order in which the elementary reflectors are
+*          multiplied to form the block reflector:
+*          = 'F': H = H(1) H(2) . . . H(k) (Forward)
+*          = 'B': H = H(k) . . . H(2) H(1) (Backward)
+*
+*  STOREV  (input) CHARACTER*1
+*          Specifies how the vectors which define the elementary
+*          reflectors are stored (see also Further Details):
+*          = 'C': columnwise
+*          = 'R': rowwise
+*
+*  N       (input) INTEGER
+*          The order of the block reflector H. N >= 0.
+*
+*  K       (input) INTEGER
+*          The order of the triangular factor T (= the number of
+*          elementary reflectors). K >= 1.
+*
+*  V       (input/output) REAL array, dimension
+*                               (LDV,K) if STOREV = 'C'
+*                               (LDV,N) if STOREV = 'R'
+*          The matrix V. See further details.
+*
+*  LDV     (input) INTEGER
+*          The leading dimension of the array V.
+*          If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K.
+*
+*  TAU     (input) REAL array, dimension (K)
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i).
+*
+*  T       (output) REAL array, dimension (LDT,K)
+*          The k by k triangular factor T of the block reflector.
+*          If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is
+*          lower triangular. The rest of the array is not used.
+*
+*  LDT     (input) INTEGER
+*          The leading dimension of the array T. LDT >= K.
+*
+*  Further Details
+*  ===============
+*
+*  The shape of the matrix V and the storage of the vectors which define
+*  the H(i) is best illustrated by the following example with n = 5 and
+*  k = 3. The elements equal to 1 are not stored; the corresponding
+*  array elements are modified but restored on exit. The rest of the
+*  array is not used.
+*
+*  DIRECT = 'F' and STOREV = 'C':         DIRECT = 'F' and STOREV = 'R':
+*
+*               V = (  1       )                 V = (  1 v1 v1 v1 v1 )
+*                   ( v1  1    )                     (     1 v2 v2 v2 )
+*                   ( v1 v2  1 )                     (        1 v3 v3 )
+*                   ( v1 v2 v3 )
+*                   ( v1 v2 v3 )
+*
+*  DIRECT = 'B' and STOREV = 'C':         DIRECT = 'B' and STOREV = 'R':
+*
+*               V = ( v1 v2 v3 )                 V = ( v1 v1  1       )
+*                   ( v1 v2 v3 )                     ( v2 v2 v2  1    )
+*                   (  1 v2 v3 )                     ( v3 v3 v3 v3  1 )
+*                   (     1 v3 )
+*                   (        1 )
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J
+      REAL               VII
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SGEMV, STRMV
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. Executable Statements ..
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      IF( LSAME( DIRECT, 'F' ) ) THEN
+         DO 20 I = 1, K
+            IF( TAU( I ).EQ.ZERO ) THEN
+*
+*              H(i)  =  I
+*
+               DO 10 J = 1, I
+                  T( J, I ) = ZERO
+   10          CONTINUE
+            ELSE
+*
+*              general case
+*
+               VII = V( I, I )
+               V( I, I ) = ONE
+               IF( LSAME( STOREV, 'C' ) ) THEN
+*
+*                 T(1:i-1,i) := - tau(i) * V(i:n,1:i-1)' * V(i:n,i)
+*
+                  CALL SGEMV( 'Transpose', N-I+1, I-1, -TAU( I ),
+     $                        V( I, 1 ), LDV, V( I, I ), 1, ZERO,
+     $                        T( 1, I ), 1 )
+               ELSE
+*
+*                 T(1:i-1,i) := - tau(i) * V(1:i-1,i:n) * V(i,i:n)'
+*
+                  CALL SGEMV( 'No transpose', I-1, N-I+1, -TAU( I ),
+     $                        V( 1, I ), LDV, V( I, I ), LDV, ZERO,
+     $                        T( 1, I ), 1 )
+               END IF
+               V( I, I ) = VII
+*
+*              T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i)
+*
+               CALL STRMV( 'Upper', 'No transpose', 'Non-unit', I-1, T,
+     $                     LDT, T( 1, I ), 1 )
+               T( I, I ) = TAU( I )
+            END IF
+   20    CONTINUE
+      ELSE
+         DO 40 I = K, 1, -1
+            IF( TAU( I ).EQ.ZERO ) THEN
+*
+*              H(i)  =  I
+*
+               DO 30 J = I, K
+                  T( J, I ) = ZERO
+   30          CONTINUE
+            ELSE
+*
+*              general case
+*
+               IF( I.LT.K ) THEN
+                  IF( LSAME( STOREV, 'C' ) ) THEN
+                     VII = V( N-K+I, I )
+                     V( N-K+I, I ) = ONE
+*
+*                    T(i+1:k,i) :=
+*                            - tau(i) * V(1:n-k+i,i+1:k)' * V(1:n-k+i,i)
+*
+                     CALL SGEMV( 'Transpose', N-K+I, K-I, -TAU( I ),
+     $                           V( 1, I+1 ), LDV, V( 1, I ), 1, ZERO,
+     $                           T( I+1, I ), 1 )
+                     V( N-K+I, I ) = VII
+                  ELSE
+                     VII = V( I, N-K+I )
+                     V( I, N-K+I ) = ONE
+*
+*                    T(i+1:k,i) :=
+*                            - tau(i) * V(i+1:k,1:n-k+i) * V(i,1:n-k+i)'
+*
+                     CALL SGEMV( 'No transpose', K-I, N-K+I, -TAU( I ),
+     $                           V( I+1, 1 ), LDV, V( I, 1 ), LDV, ZERO,
+     $                           T( I+1, I ), 1 )
+                     V( I, N-K+I ) = VII
+                  END IF
+*
+*                 T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i)
+*
+                  CALL STRMV( 'Lower', 'No transpose', 'Non-unit', K-I,
+     $                        T( I+1, I+1 ), LDT, T( I+1, I ), 1 )
+               END IF
+               T( I, I ) = TAU( I )
+            END IF
+   40    CONTINUE
+      END IF
+      RETURN
+*
+*     End of SLARFT
+*
+      END
diff --git a/costa/native/external/lapack/slarfx.f b/costa/native/external/lapack/slarfx.f
new file mode 100644
index 000000000..1df9d8a19
--- /dev/null
+++ b/costa/native/external/lapack/slarfx.f
@@ -0,0 +1,638 @@
+      SUBROUTINE SLARFX( SIDE, M, N, V, TAU, C, LDC, WORK )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          SIDE
+      INTEGER            LDC, M, N
+      REAL               TAU
+*     ..
+*     .. Array Arguments ..
+      REAL               C( LDC, * ), V( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLARFX applies a real elementary reflector H to a real m by n
+*  matrix C, from either the left or the right. H is represented in the
+*  form
+*
+*        H = I - tau * v * v'
+*
+*  where tau is a real scalar and v is a real vector.
+*
+*  If tau = 0, then H is taken to be the unit matrix
+*
+*  This version uses inline code if H has order < 11.
+*
+*  Arguments
+*  =========
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'L': form  H * C
+*          = 'R': form  C * H
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix C.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix C.
+*
+*  V       (input) REAL array, dimension (M) if SIDE = 'L'
+*                                     or (N) if SIDE = 'R'
+*          The vector v in the representation of H.
+*
+*  TAU     (input) REAL
+*          The value tau in the representation of H.
+*
+*  C       (input/output) REAL array, dimension (LDC,N)
+*          On entry, the m by n matrix C.
+*          On exit, C is overwritten by the matrix H * C if SIDE = 'L',
+*          or C * H if SIDE = 'R'.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C. LDA >= (1,M).
+*
+*  WORK    (workspace) REAL array, dimension
+*                      (N) if SIDE = 'L'
+*                      or (M) if SIDE = 'R'
+*          WORK is not referenced if H has order < 11.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            J
+      REAL               SUM, T1, T10, T2, T3, T4, T5, T6, T7, T8, T9,
+     $                   V1, V10, V2, V3, V4, V5, V6, V7, V8, V9
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SGEMV, SGER
+*     ..
+*     .. Executable Statements ..
+*
+      IF( TAU.EQ.ZERO )
+     $   RETURN
+      IF( LSAME( SIDE, 'L' ) ) THEN
+*
+*        Form  H * C, where H has order m.
+*
+         GO TO ( 10, 30, 50, 70, 90, 110, 130, 150,
+     $           170, 190 )M
+*
+*        Code for general M
+*
+*        w := C'*v
+*
+         CALL SGEMV( 'Transpose', M, N, ONE, C, LDC, V, 1, ZERO, WORK,
+     $               1 )
+*
+*        C := C - tau * v * w'
+*
+         CALL SGER( M, N, -TAU, V, 1, WORK, 1, C, LDC )
+         GO TO 410
+   10    CONTINUE
+*
+*        Special code for 1 x 1 Householder
+*
+         T1 = ONE - TAU*V( 1 )*V( 1 )
+         DO 20 J = 1, N
+            C( 1, J ) = T1*C( 1, J )
+   20    CONTINUE
+         GO TO 410
+   30    CONTINUE
+*
+*        Special code for 2 x 2 Householder
+*
+         V1 = V( 1 )
+         T1 = TAU*V1
+         V2 = V( 2 )
+         T2 = TAU*V2
+         DO 40 J = 1, N
+            SUM = V1*C( 1, J ) + V2*C( 2, J )
+            C( 1, J ) = C( 1, J ) - SUM*T1
+            C( 2, J ) = C( 2, J ) - SUM*T2
+   40    CONTINUE
+         GO TO 410
+   50    CONTINUE
+*
+*        Special code for 3 x 3 Householder
+*
+         V1 = V( 1 )
+         T1 = TAU*V1
+         V2 = V( 2 )
+         T2 = TAU*V2
+         V3 = V( 3 )
+         T3 = TAU*V3
+         DO 60 J = 1, N
+            SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J )
+            C( 1, J ) = C( 1, J ) - SUM*T1
+            C( 2, J ) = C( 2, J ) - SUM*T2
+            C( 3, J ) = C( 3, J ) - SUM*T3
+   60    CONTINUE
+         GO TO 410
+   70    CONTINUE
+*
+*        Special code for 4 x 4 Householder
+*
+         V1 = V( 1 )
+         T1 = TAU*V1
+         V2 = V( 2 )
+         T2 = TAU*V2
+         V3 = V( 3 )
+         T3 = TAU*V3
+         V4 = V( 4 )
+         T4 = TAU*V4
+         DO 80 J = 1, N
+            SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) +
+     $            V4*C( 4, J )
+            C( 1, J ) = C( 1, J ) - SUM*T1
+            C( 2, J ) = C( 2, J ) - SUM*T2
+            C( 3, J ) = C( 3, J ) - SUM*T3
+            C( 4, J ) = C( 4, J ) - SUM*T4
+   80    CONTINUE
+         GO TO 410
+   90    CONTINUE
+*
+*        Special code for 5 x 5 Householder
+*
+         V1 = V( 1 )
+         T1 = TAU*V1
+         V2 = V( 2 )
+         T2 = TAU*V2
+         V3 = V( 3 )
+         T3 = TAU*V3
+         V4 = V( 4 )
+         T4 = TAU*V4
+         V5 = V( 5 )
+         T5 = TAU*V5
+         DO 100 J = 1, N
+            SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) +
+     $            V4*C( 4, J ) + V5*C( 5, J )
+            C( 1, J ) = C( 1, J ) - SUM*T1
+            C( 2, J ) = C( 2, J ) - SUM*T2
+            C( 3, J ) = C( 3, J ) - SUM*T3
+            C( 4, J ) = C( 4, J ) - SUM*T4
+            C( 5, J ) = C( 5, J ) - SUM*T5
+  100    CONTINUE
+         GO TO 410
+  110    CONTINUE
+*
+*        Special code for 6 x 6 Householder
+*
+         V1 = V( 1 )
+         T1 = TAU*V1
+         V2 = V( 2 )
+         T2 = TAU*V2
+         V3 = V( 3 )
+         T3 = TAU*V3
+         V4 = V( 4 )
+         T4 = TAU*V4
+         V5 = V( 5 )
+         T5 = TAU*V5
+         V6 = V( 6 )
+         T6 = TAU*V6
+         DO 120 J = 1, N
+            SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) +
+     $            V4*C( 4, J ) + V5*C( 5, J ) + V6*C( 6, J )
+            C( 1, J ) = C( 1, J ) - SUM*T1
+            C( 2, J ) = C( 2, J ) - SUM*T2
+            C( 3, J ) = C( 3, J ) - SUM*T3
+            C( 4, J ) = C( 4, J ) - SUM*T4
+            C( 5, J ) = C( 5, J ) - SUM*T5
+            C( 6, J ) = C( 6, J ) - SUM*T6
+  120    CONTINUE
+         GO TO 410
+  130    CONTINUE
+*
+*        Special code for 7 x 7 Householder
+*
+         V1 = V( 1 )
+         T1 = TAU*V1
+         V2 = V( 2 )
+         T2 = TAU*V2
+         V3 = V( 3 )
+         T3 = TAU*V3
+         V4 = V( 4 )
+         T4 = TAU*V4
+         V5 = V( 5 )
+         T5 = TAU*V5
+         V6 = V( 6 )
+         T6 = TAU*V6
+         V7 = V( 7 )
+         T7 = TAU*V7
+         DO 140 J = 1, N
+            SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) +
+     $            V4*C( 4, J ) + V5*C( 5, J ) + V6*C( 6, J ) +
+     $            V7*C( 7, J )
+            C( 1, J ) = C( 1, J ) - SUM*T1
+            C( 2, J ) = C( 2, J ) - SUM*T2
+            C( 3, J ) = C( 3, J ) - SUM*T3
+            C( 4, J ) = C( 4, J ) - SUM*T4
+            C( 5, J ) = C( 5, J ) - SUM*T5
+            C( 6, J ) = C( 6, J ) - SUM*T6
+            C( 7, J ) = C( 7, J ) - SUM*T7
+  140    CONTINUE
+         GO TO 410
+  150    CONTINUE
+*
+*        Special code for 8 x 8 Householder
+*
+         V1 = V( 1 )
+         T1 = TAU*V1
+         V2 = V( 2 )
+         T2 = TAU*V2
+         V3 = V( 3 )
+         T3 = TAU*V3
+         V4 = V( 4 )
+         T4 = TAU*V4
+         V5 = V( 5 )
+         T5 = TAU*V5
+         V6 = V( 6 )
+         T6 = TAU*V6
+         V7 = V( 7 )
+         T7 = TAU*V7
+         V8 = V( 8 )
+         T8 = TAU*V8
+         DO 160 J = 1, N
+            SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) +
+     $            V4*C( 4, J ) + V5*C( 5, J ) + V6*C( 6, J ) +
+     $            V7*C( 7, J ) + V8*C( 8, J )
+            C( 1, J ) = C( 1, J ) - SUM*T1
+            C( 2, J ) = C( 2, J ) - SUM*T2
+            C( 3, J ) = C( 3, J ) - SUM*T3
+            C( 4, J ) = C( 4, J ) - SUM*T4
+            C( 5, J ) = C( 5, J ) - SUM*T5
+            C( 6, J ) = C( 6, J ) - SUM*T6
+            C( 7, J ) = C( 7, J ) - SUM*T7
+            C( 8, J ) = C( 8, J ) - SUM*T8
+  160    CONTINUE
+         GO TO 410
+  170    CONTINUE
+*
+*        Special code for 9 x 9 Householder
+*
+         V1 = V( 1 )
+         T1 = TAU*V1
+         V2 = V( 2 )
+         T2 = TAU*V2
+         V3 = V( 3 )
+         T3 = TAU*V3
+         V4 = V( 4 )
+         T4 = TAU*V4
+         V5 = V( 5 )
+         T5 = TAU*V5
+         V6 = V( 6 )
+         T6 = TAU*V6
+         V7 = V( 7 )
+         T7 = TAU*V7
+         V8 = V( 8 )
+         T8 = TAU*V8
+         V9 = V( 9 )
+         T9 = TAU*V9
+         DO 180 J = 1, N
+            SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) +
+     $            V4*C( 4, J ) + V5*C( 5, J ) + V6*C( 6, J ) +
+     $            V7*C( 7, J ) + V8*C( 8, J ) + V9*C( 9, J )
+            C( 1, J ) = C( 1, J ) - SUM*T1
+            C( 2, J ) = C( 2, J ) - SUM*T2
+            C( 3, J ) = C( 3, J ) - SUM*T3
+            C( 4, J ) = C( 4, J ) - SUM*T4
+            C( 5, J ) = C( 5, J ) - SUM*T5
+            C( 6, J ) = C( 6, J ) - SUM*T6
+            C( 7, J ) = C( 7, J ) - SUM*T7
+            C( 8, J ) = C( 8, J ) - SUM*T8
+            C( 9, J ) = C( 9, J ) - SUM*T9
+  180    CONTINUE
+         GO TO 410
+  190    CONTINUE
+*
+*        Special code for 10 x 10 Householder
+*
+         V1 = V( 1 )
+         T1 = TAU*V1
+         V2 = V( 2 )
+         T2 = TAU*V2
+         V3 = V( 3 )
+         T3 = TAU*V3
+         V4 = V( 4 )
+         T4 = TAU*V4
+         V5 = V( 5 )
+         T5 = TAU*V5
+         V6 = V( 6 )
+         T6 = TAU*V6
+         V7 = V( 7 )
+         T7 = TAU*V7
+         V8 = V( 8 )
+         T8 = TAU*V8
+         V9 = V( 9 )
+         T9 = TAU*V9
+         V10 = V( 10 )
+         T10 = TAU*V10
+         DO 200 J = 1, N
+            SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) +
+     $            V4*C( 4, J ) + V5*C( 5, J ) + V6*C( 6, J ) +
+     $            V7*C( 7, J ) + V8*C( 8, J ) + V9*C( 9, J ) +
+     $            V10*C( 10, J )
+            C( 1, J ) = C( 1, J ) - SUM*T1
+            C( 2, J ) = C( 2, J ) - SUM*T2
+            C( 3, J ) = C( 3, J ) - SUM*T3
+            C( 4, J ) = C( 4, J ) - SUM*T4
+            C( 5, J ) = C( 5, J ) - SUM*T5
+            C( 6, J ) = C( 6, J ) - SUM*T6
+            C( 7, J ) = C( 7, J ) - SUM*T7
+            C( 8, J ) = C( 8, J ) - SUM*T8
+            C( 9, J ) = C( 9, J ) - SUM*T9
+            C( 10, J ) = C( 10, J ) - SUM*T10
+  200    CONTINUE
+         GO TO 410
+      ELSE
+*
+*        Form  C * H, where H has order n.
+*
+         GO TO ( 210, 230, 250, 270, 290, 310, 330, 350,
+     $           370, 390 )N
+*
+*        Code for general N
+*
+*        w := C * v
+*
+         CALL SGEMV( 'No transpose', M, N, ONE, C, LDC, V, 1, ZERO,
+     $               WORK, 1 )
+*
+*        C := C - tau * w * v'
+*
+         CALL SGER( M, N, -TAU, WORK, 1, V, 1, C, LDC )
+         GO TO 410
+  210    CONTINUE
+*
+*        Special code for 1 x 1 Householder
+*
+         T1 = ONE - TAU*V( 1 )*V( 1 )
+         DO 220 J = 1, M
+            C( J, 1 ) = T1*C( J, 1 )
+  220    CONTINUE
+         GO TO 410
+  230    CONTINUE
+*
+*        Special code for 2 x 2 Householder
+*
+         V1 = V( 1 )
+         T1 = TAU*V1
+         V2 = V( 2 )
+         T2 = TAU*V2
+         DO 240 J = 1, M
+            SUM = V1*C( J, 1 ) + V2*C( J, 2 )
+            C( J, 1 ) = C( J, 1 ) - SUM*T1
+            C( J, 2 ) = C( J, 2 ) - SUM*T2
+  240    CONTINUE
+         GO TO 410
+  250    CONTINUE
+*
+*        Special code for 3 x 3 Householder
+*
+         V1 = V( 1 )
+         T1 = TAU*V1
+         V2 = V( 2 )
+         T2 = TAU*V2
+         V3 = V( 3 )
+         T3 = TAU*V3
+         DO 260 J = 1, M
+            SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 )
+            C( J, 1 ) = C( J, 1 ) - SUM*T1
+            C( J, 2 ) = C( J, 2 ) - SUM*T2
+            C( J, 3 ) = C( J, 3 ) - SUM*T3
+  260    CONTINUE
+         GO TO 410
+  270    CONTINUE
+*
+*        Special code for 4 x 4 Householder
+*
+         V1 = V( 1 )
+         T1 = TAU*V1
+         V2 = V( 2 )
+         T2 = TAU*V2
+         V3 = V( 3 )
+         T3 = TAU*V3
+         V4 = V( 4 )
+         T4 = TAU*V4
+         DO 280 J = 1, M
+            SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) +
+     $            V4*C( J, 4 )
+            C( J, 1 ) = C( J, 1 ) - SUM*T1
+            C( J, 2 ) = C( J, 2 ) - SUM*T2
+            C( J, 3 ) = C( J, 3 ) - SUM*T3
+            C( J, 4 ) = C( J, 4 ) - SUM*T4
+  280    CONTINUE
+         GO TO 410
+  290    CONTINUE
+*
+*        Special code for 5 x 5 Householder
+*
+         V1 = V( 1 )
+         T1 = TAU*V1
+         V2 = V( 2 )
+         T2 = TAU*V2
+         V3 = V( 3 )
+         T3 = TAU*V3
+         V4 = V( 4 )
+         T4 = TAU*V4
+         V5 = V( 5 )
+         T5 = TAU*V5
+         DO 300 J = 1, M
+            SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) +
+     $            V4*C( J, 4 ) + V5*C( J, 5 )
+            C( J, 1 ) = C( J, 1 ) - SUM*T1
+            C( J, 2 ) = C( J, 2 ) - SUM*T2
+            C( J, 3 ) = C( J, 3 ) - SUM*T3
+            C( J, 4 ) = C( J, 4 ) - SUM*T4
+            C( J, 5 ) = C( J, 5 ) - SUM*T5
+  300    CONTINUE
+         GO TO 410
+  310    CONTINUE
+*
+*        Special code for 6 x 6 Householder
+*
+         V1 = V( 1 )
+         T1 = TAU*V1
+         V2 = V( 2 )
+         T2 = TAU*V2
+         V3 = V( 3 )
+         T3 = TAU*V3
+         V4 = V( 4 )
+         T4 = TAU*V4
+         V5 = V( 5 )
+         T5 = TAU*V5
+         V6 = V( 6 )
+         T6 = TAU*V6
+         DO 320 J = 1, M
+            SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) +
+     $            V4*C( J, 4 ) + V5*C( J, 5 ) + V6*C( J, 6 )
+            C( J, 1 ) = C( J, 1 ) - SUM*T1
+            C( J, 2 ) = C( J, 2 ) - SUM*T2
+            C( J, 3 ) = C( J, 3 ) - SUM*T3
+            C( J, 4 ) = C( J, 4 ) - SUM*T4
+            C( J, 5 ) = C( J, 5 ) - SUM*T5
+            C( J, 6 ) = C( J, 6 ) - SUM*T6
+  320    CONTINUE
+         GO TO 410
+  330    CONTINUE
+*
+*        Special code for 7 x 7 Householder
+*
+         V1 = V( 1 )
+         T1 = TAU*V1
+         V2 = V( 2 )
+         T2 = TAU*V2
+         V3 = V( 3 )
+         T3 = TAU*V3
+         V4 = V( 4 )
+         T4 = TAU*V4
+         V5 = V( 5 )
+         T5 = TAU*V5
+         V6 = V( 6 )
+         T6 = TAU*V6
+         V7 = V( 7 )
+         T7 = TAU*V7
+         DO 340 J = 1, M
+            SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) +
+     $            V4*C( J, 4 ) + V5*C( J, 5 ) + V6*C( J, 6 ) +
+     $            V7*C( J, 7 )
+            C( J, 1 ) = C( J, 1 ) - SUM*T1
+            C( J, 2 ) = C( J, 2 ) - SUM*T2
+            C( J, 3 ) = C( J, 3 ) - SUM*T3
+            C( J, 4 ) = C( J, 4 ) - SUM*T4
+            C( J, 5 ) = C( J, 5 ) - SUM*T5
+            C( J, 6 ) = C( J, 6 ) - SUM*T6
+            C( J, 7 ) = C( J, 7 ) - SUM*T7
+  340    CONTINUE
+         GO TO 410
+  350    CONTINUE
+*
+*        Special code for 8 x 8 Householder
+*
+         V1 = V( 1 )
+         T1 = TAU*V1
+         V2 = V( 2 )
+         T2 = TAU*V2
+         V3 = V( 3 )
+         T3 = TAU*V3
+         V4 = V( 4 )
+         T4 = TAU*V4
+         V5 = V( 5 )
+         T5 = TAU*V5
+         V6 = V( 6 )
+         T6 = TAU*V6
+         V7 = V( 7 )
+         T7 = TAU*V7
+         V8 = V( 8 )
+         T8 = TAU*V8
+         DO 360 J = 1, M
+            SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) +
+     $            V4*C( J, 4 ) + V5*C( J, 5 ) + V6*C( J, 6 ) +
+     $            V7*C( J, 7 ) + V8*C( J, 8 )
+            C( J, 1 ) = C( J, 1 ) - SUM*T1
+            C( J, 2 ) = C( J, 2 ) - SUM*T2
+            C( J, 3 ) = C( J, 3 ) - SUM*T3
+            C( J, 4 ) = C( J, 4 ) - SUM*T4
+            C( J, 5 ) = C( J, 5 ) - SUM*T5
+            C( J, 6 ) = C( J, 6 ) - SUM*T6
+            C( J, 7 ) = C( J, 7 ) - SUM*T7
+            C( J, 8 ) = C( J, 8 ) - SUM*T8
+  360    CONTINUE
+         GO TO 410
+  370    CONTINUE
+*
+*        Special code for 9 x 9 Householder
+*
+         V1 = V( 1 )
+         T1 = TAU*V1
+         V2 = V( 2 )
+         T2 = TAU*V2
+         V3 = V( 3 )
+         T3 = TAU*V3
+         V4 = V( 4 )
+         T4 = TAU*V4
+         V5 = V( 5 )
+         T5 = TAU*V5
+         V6 = V( 6 )
+         T6 = TAU*V6
+         V7 = V( 7 )
+         T7 = TAU*V7
+         V8 = V( 8 )
+         T8 = TAU*V8
+         V9 = V( 9 )
+         T9 = TAU*V9
+         DO 380 J = 1, M
+            SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) +
+     $            V4*C( J, 4 ) + V5*C( J, 5 ) + V6*C( J, 6 ) +
+     $            V7*C( J, 7 ) + V8*C( J, 8 ) + V9*C( J, 9 )
+            C( J, 1 ) = C( J, 1 ) - SUM*T1
+            C( J, 2 ) = C( J, 2 ) - SUM*T2
+            C( J, 3 ) = C( J, 3 ) - SUM*T3
+            C( J, 4 ) = C( J, 4 ) - SUM*T4
+            C( J, 5 ) = C( J, 5 ) - SUM*T5
+            C( J, 6 ) = C( J, 6 ) - SUM*T6
+            C( J, 7 ) = C( J, 7 ) - SUM*T7
+            C( J, 8 ) = C( J, 8 ) - SUM*T8
+            C( J, 9 ) = C( J, 9 ) - SUM*T9
+  380    CONTINUE
+         GO TO 410
+  390    CONTINUE
+*
+*        Special code for 10 x 10 Householder
+*
+         V1 = V( 1 )
+         T1 = TAU*V1
+         V2 = V( 2 )
+         T2 = TAU*V2
+         V3 = V( 3 )
+         T3 = TAU*V3
+         V4 = V( 4 )
+         T4 = TAU*V4
+         V5 = V( 5 )
+         T5 = TAU*V5
+         V6 = V( 6 )
+         T6 = TAU*V6
+         V7 = V( 7 )
+         T7 = TAU*V7
+         V8 = V( 8 )
+         T8 = TAU*V8
+         V9 = V( 9 )
+         T9 = TAU*V9
+         V10 = V( 10 )
+         T10 = TAU*V10
+         DO 400 J = 1, M
+            SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) +
+     $            V4*C( J, 4 ) + V5*C( J, 5 ) + V6*C( J, 6 ) +
+     $            V7*C( J, 7 ) + V8*C( J, 8 ) + V9*C( J, 9 ) +
+     $            V10*C( J, 10 )
+            C( J, 1 ) = C( J, 1 ) - SUM*T1
+            C( J, 2 ) = C( J, 2 ) - SUM*T2
+            C( J, 3 ) = C( J, 3 ) - SUM*T3
+            C( J, 4 ) = C( J, 4 ) - SUM*T4
+            C( J, 5 ) = C( J, 5 ) - SUM*T5
+            C( J, 6 ) = C( J, 6 ) - SUM*T6
+            C( J, 7 ) = C( J, 7 ) - SUM*T7
+            C( J, 8 ) = C( J, 8 ) - SUM*T8
+            C( J, 9 ) = C( J, 9 ) - SUM*T9
+            C( J, 10 ) = C( J, 10 ) - SUM*T10
+  400    CONTINUE
+         GO TO 410
+      END IF
+  410 RETURN
+*
+*     End of SLARFX
+*
+      END
diff --git a/costa/native/external/lapack/slargv.f b/costa/native/external/lapack/slargv.f
new file mode 100644
index 000000000..d75674069
--- /dev/null
+++ b/costa/native/external/lapack/slargv.f
@@ -0,0 +1,100 @@
+      SUBROUTINE SLARGV( N, X, INCX, Y, INCY, C, INCC )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INCC, INCX, INCY, N
+*     ..
+*     .. Array Arguments ..
+      REAL               C( * ), X( * ), Y( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLARGV generates a vector of real plane rotations, determined by
+*  elements of the real vectors x and y. For i = 1,2,...,n
+*
+*     (  c(i)  s(i) ) ( x(i) ) = ( a(i) )
+*     ( -s(i)  c(i) ) ( y(i) ) = (   0  )
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The number of plane rotations to be generated.
+*
+*  X       (input/output) REAL array,
+*                         dimension (1+(N-1)*INCX)
+*          On entry, the vector x.
+*          On exit, x(i) is overwritten by a(i), for i = 1,...,n.
+*
+*  INCX    (input) INTEGER
+*          The increment between elements of X. INCX > 0.
+*
+*  Y       (input/output) REAL array,
+*                         dimension (1+(N-1)*INCY)
+*          On entry, the vector y.
+*          On exit, the sines of the plane rotations.
+*
+*  INCY    (input) INTEGER
+*          The increment between elements of Y. INCY > 0.
+*
+*  C       (output) REAL array, dimension (1+(N-1)*INCC)
+*          The cosines of the plane rotations.
+*
+*  INCC    (input) INTEGER
+*          The increment between elements of C. INCC > 0.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, IC, IX, IY
+      REAL               F, G, T, TT
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+      IX = 1
+      IY = 1
+      IC = 1
+      DO 10 I = 1, N
+         F = X( IX )
+         G = Y( IY )
+         IF( G.EQ.ZERO ) THEN
+            C( IC ) = ONE
+         ELSE IF( F.EQ.ZERO ) THEN
+            C( IC ) = ZERO
+            Y( IY ) = ONE
+            X( IX ) = G
+         ELSE IF( ABS( F ).GT.ABS( G ) ) THEN
+            T = G / F
+            TT = SQRT( ONE+T*T )
+            C( IC ) = ONE / TT
+            Y( IY ) = T*C( IC )
+            X( IX ) = F*TT
+         ELSE
+            T = F / G
+            TT = SQRT( ONE+T*T )
+            Y( IY ) = ONE / TT
+            C( IC ) = T*Y( IY )
+            X( IX ) = G*TT
+         END IF
+         IC = IC + INCC
+         IY = IY + INCY
+         IX = IX + INCX
+   10 CONTINUE
+      RETURN
+*
+*     End of SLARGV
+*
+      END
diff --git a/costa/native/external/lapack/slarnv.f b/costa/native/external/lapack/slarnv.f
new file mode 100644
index 000000000..9b1d0c769
--- /dev/null
+++ b/costa/native/external/lapack/slarnv.f
@@ -0,0 +1,116 @@
+      SUBROUTINE SLARNV( IDIST, ISEED, N, X )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      INTEGER            IDIST, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            ISEED( 4 )
+      REAL               X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLARNV returns a vector of n random real numbers from a uniform or
+*  normal distribution.
+*
+*  Arguments
+*  =========
+*
+*  IDIST   (input) INTEGER
+*          Specifies the distribution of the random numbers:
+*          = 1:  uniform (0,1)
+*          = 2:  uniform (-1,1)
+*          = 3:  normal (0,1)
+*
+*  ISEED   (input/output) INTEGER array, dimension (4)
+*          On entry, the seed of the random number generator; the array
+*          elements must be between 0 and 4095, and ISEED(4) must be
+*          odd.
+*          On exit, the seed is updated.
+*
+*  N       (input) INTEGER
+*          The number of random numbers to be generated.
+*
+*  X       (output) REAL array, dimension (N)
+*          The generated random numbers.
+*
+*  Further Details
+*  ===============
+*
+*  This routine calls the auxiliary routine SLARUV to generate random
+*  real numbers from a uniform (0,1) distribution, in batches of up to
+*  128 using vectorisable code. The Box-Muller method is used to
+*  transform numbers from a uniform to a normal distribution.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, TWO
+      PARAMETER          ( ONE = 1.0E+0, TWO = 2.0E+0 )
+      INTEGER            LV
+      PARAMETER          ( LV = 128 )
+      REAL               TWOPI
+      PARAMETER          ( TWOPI = 6.2831853071795864769252867663E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, IL, IL2, IV
+*     ..
+*     .. Local Arrays ..
+      REAL               U( LV )
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          COS, LOG, MIN, SQRT
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLARUV
+*     ..
+*     .. Executable Statements ..
+*
+      DO 40 IV = 1, N, LV / 2
+         IL = MIN( LV / 2, N-IV+1 )
+         IF( IDIST.EQ.3 ) THEN
+            IL2 = 2*IL
+         ELSE
+            IL2 = IL
+         END IF
+*
+*        Call SLARUV to generate IL2 numbers from a uniform (0,1)
+*        distribution (IL2 <= LV)
+*
+         CALL SLARUV( ISEED, IL2, U )
+*
+         IF( IDIST.EQ.1 ) THEN
+*
+*           Copy generated numbers
+*
+            DO 10 I = 1, IL
+               X( IV+I-1 ) = U( I )
+   10       CONTINUE
+         ELSE IF( IDIST.EQ.2 ) THEN
+*
+*           Convert generated numbers to uniform (-1,1) distribution
+*
+            DO 20 I = 1, IL
+               X( IV+I-1 ) = TWO*U( I ) - ONE
+   20       CONTINUE
+         ELSE IF( IDIST.EQ.3 ) THEN
+*
+*           Convert generated numbers to normal (0,1) distribution
+*
+            DO 30 I = 1, IL
+               X( IV+I-1 ) = SQRT( -TWO*LOG( U( 2*I-1 ) ) )*
+     $                       COS( TWOPI*U( 2*I ) )
+   30       CONTINUE
+         END IF
+   40 CONTINUE
+      RETURN
+*
+*     End of SLARNV
+*
+      END
diff --git a/costa/native/external/lapack/slarrb.f b/costa/native/external/lapack/slarrb.f
new file mode 100644
index 000000000..6d12ccd3d
--- /dev/null
+++ b/costa/native/external/lapack/slarrb.f
@@ -0,0 +1,282 @@
+      SUBROUTINE SLARRB( N, D, L, LD, LLD, IFIRST, ILAST, SIGMA, RELTOL,
+     $                   W, WGAP, WERR, WORK, IWORK, INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            IFIRST, ILAST, INFO, N
+      REAL               RELTOL, SIGMA
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      REAL               D( * ), L( * ), LD( * ), LLD( * ), W( * ),
+     $                   WERR( * ), WGAP( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  Given the relatively robust representation(RRR) L D L^T, SLARRB
+*  does ``limited'' bisection to locate the eigenvalues of L D L^T,
+*  W( IFIRST ) thru' W( ILAST ), to more accuracy. Intervals
+*  [left, right] are maintained by storing their mid-points and
+*  semi-widths in the arrays W and WERR respectively.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The order of the matrix.
+*
+*  D       (input) REAL array, dimension (N)
+*          The n diagonal elements of the diagonal matrix D.
+*
+*  L       (input) REAL array, dimension (N-1)
+*          The n-1 subdiagonal elements of the unit bidiagonal matrix L.
+*
+*  LD      (input) REAL array, dimension (N-1)
+*          The n-1 elements L(i)*D(i).
+*
+*  LLD     (input) REAL array, dimension (N-1)
+*          The n-1 elements L(i)*L(i)*D(i).
+*
+*  IFIRST  (input) INTEGER
+*          The index of the first eigenvalue in the cluster.
+*
+*  ILAST   (input) INTEGER
+*          The index of the last eigenvalue in the cluster.
+*
+*  SIGMA   (input) REAL
+*          The shift used to form L D L^T (see SLARRF).
+*
+*  RELTOL  (input) REAL
+*          The relative tolerance.
+*
+*  W       (input/output) REAL array, dimension (N)
+*          On input, W( IFIRST ) thru' W( ILAST ) are estimates of the
+*          corresponding eigenvalues of L D L^T.
+*          On output, these estimates are ``refined''.
+*
+*  WGAP    (input/output) REAL array, dimension (N)
+*          The gaps between the eigenvalues of L D L^T. Very small
+*          gaps are changed on output.
+*
+*  WERR    (input/output) REAL array, dimension (N)
+*          On input, WERR( IFIRST ) thru' WERR( ILAST ) are the errors
+*          in the estimates W( IFIRST ) thru' W( ILAST ).
+*          On output, these are the ``refined'' errors.
+*
+*****Reminder to Inder --- WORK is never used in this subroutine *****
+*  WORK    (input) REAL array, dimension (???)
+*          Workspace.
+*
+*  IWORK   (input) INTEGER array, dimension (2*N)
+*          Workspace.
+*
+*****Reminder to Inder --- INFO is never set in this subroutine ******
+*  INFO    (output) INTEGER
+*          Error flag.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Inderjit Dhillon, IBM Almaden, USA
+*     Osni Marques, LBNL/NERSC, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, TWO, HALF
+      PARAMETER          ( ZERO = 0.0E0, TWO = 2.0E0, HALF = 0.5E0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            CNT, I, I1, I2, INITI1, INITI2, J, K, NCNVRG,
+     $                   NEIG, NINT, NRIGHT, OLNINT
+      REAL               DELTA, EPS, GAP, LEFT, MID, PERT, RIGHT, S,
+     $                   THRESH, TMP, WIDTH
+*     ..
+*     .. External Functions ..
+      REAL               SLAMCH
+      EXTERNAL           SLAMCH
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+      EPS = SLAMCH( 'Precision' )
+      I1 = IFIRST
+      I2 = IFIRST
+      NEIG = ILAST - IFIRST + 1
+      NCNVRG = 0
+      THRESH = RELTOL
+      DO 10 I = IFIRST, ILAST
+         IWORK( I ) = 0
+         PERT = EPS*( ABS( SIGMA )+ABS( W( I ) ) )
+         WERR( I ) = WERR( I ) + PERT
+         IF( WGAP( I ).LT.PERT )
+     $      WGAP( I ) = PERT
+   10 CONTINUE
+      DO 20 I = I1, ILAST
+         IF( I.EQ.1 ) THEN
+            GAP = WGAP( I )
+         ELSE IF( I.EQ.N ) THEN
+            GAP = WGAP( I-1 )
+         ELSE
+            GAP = MIN( WGAP( I-1 ), WGAP( I ) )
+         END IF
+         IF( WERR( I ).LT.THRESH*GAP ) THEN
+            NCNVRG = NCNVRG + 1
+            IWORK( I ) = 1
+            IF( I1.EQ.I )
+     $         I1 = I1 + 1
+         ELSE
+            I2 = I
+         END IF
+   20 CONTINUE
+*
+*     Initialize the unconverged intervals.
+*
+      I = I1
+      NINT = 0
+      RIGHT = ZERO
+   30 CONTINUE
+      IF( I.LE.I2 ) THEN
+         IF( IWORK( I ).EQ.0 ) THEN
+            DELTA = EPS
+            LEFT = W( I ) - WERR( I )
+*
+*           Do while( CNT(LEFT).GT.I-1 )
+*
+   40       CONTINUE
+            IF( I.GT.I1 .AND. LEFT.LE.RIGHT ) THEN
+               LEFT = RIGHT
+               CNT = I - 1
+            ELSE
+               S = -LEFT
+               CNT = 0
+               DO 50 J = 1, N - 1
+                  TMP = D( J ) + S
+                  S = S*( LD( J ) / TMP )*L( J ) - LEFT
+                  IF( TMP.LT.ZERO )
+     $               CNT = CNT + 1
+   50          CONTINUE
+               TMP = D( N ) + S
+               IF( TMP.LT.ZERO )
+     $            CNT = CNT + 1
+               IF( CNT.GT.I-1 ) THEN
+                  DELTA = TWO*DELTA
+                  LEFT = LEFT - ( ABS( SIGMA )+ABS( LEFT ) )*DELTA
+                  GO TO 40
+               END IF
+            END IF
+            DELTA = EPS
+            RIGHT = W( I ) + WERR( I )
+*
+*           Do while( CNT(RIGHT).LT.I )
+*
+   60       CONTINUE
+            S = -RIGHT
+            CNT = 0
+            DO 70 J = 1, N - 1
+               TMP = D( J ) + S
+               S = S*( LD( J ) / TMP )*L( J ) - RIGHT
+               IF( TMP.LT.ZERO )
+     $            CNT = CNT + 1
+   70       CONTINUE
+            TMP = D( N ) + S
+            IF( TMP.LT.ZERO )
+     $         CNT = CNT + 1
+            IF( CNT.LT.I ) THEN
+               DELTA = TWO*DELTA
+               RIGHT = RIGHT + ( ABS( SIGMA )+ABS( RIGHT ) )*DELTA
+               GO TO 60
+            END IF
+            WERR( I ) = LEFT
+            W( I ) = RIGHT
+            IWORK( N+I ) = CNT
+            NINT = NINT + 1
+            I = CNT + 1
+         ELSE
+            I = I + 1
+         END IF
+         GO TO 30
+      END IF
+*
+*     While( NCNVRG.LT.NEIG )
+*
+      INITI1 = I1
+      INITI2 = I2
+   80 CONTINUE
+      IF( NCNVRG.LT.NEIG ) THEN
+         OLNINT = NINT
+         I = I1
+         DO 100 K = 1, OLNINT
+            NRIGHT = IWORK( N+I )
+            IF( IWORK( I ).EQ.0 ) THEN
+               MID = HALF*( WERR( I )+W( I ) )
+               S = -MID
+               CNT = 0
+               DO 90 J = 1, N - 1
+                  TMP = D( J ) + S
+                  S = S*( LD( J ) / TMP )*L( J ) - MID
+                  IF( TMP.LT.ZERO )
+     $               CNT = CNT + 1
+   90          CONTINUE
+               TMP = D( N ) + S
+               IF( TMP.LT.ZERO )
+     $            CNT = CNT + 1
+               CNT = MAX( I-1, MIN( NRIGHT, CNT ) )
+               IF( I.EQ.NRIGHT ) THEN
+                  IF( I.EQ.IFIRST ) THEN
+                     GAP = WERR( I+1 ) - W( I )
+                  ELSE IF( I.EQ.ILAST ) THEN
+                     GAP = WERR( I ) - W( I-1 )
+                  ELSE
+                     GAP = MIN( WERR( I+1 )-W( I ), WERR( I )-W( I-1 ) )
+                  END IF
+                  WIDTH = W( I ) - MID
+                  IF( WIDTH.LT.THRESH*GAP ) THEN
+                     NCNVRG = NCNVRG + 1
+                     IWORK( I ) = 1
+                     IF( I1.EQ.I ) THEN
+                        I1 = I1 + 1
+                        NINT = NINT - 1
+                     END IF
+                  END IF
+               END IF
+               IF( IWORK( I ).EQ.0 )
+     $            I2 = K
+               IF( CNT.EQ.I-1 ) THEN
+                  WERR( I ) = MID
+               ELSE IF( CNT.EQ.NRIGHT ) THEN
+                  W( I ) = MID
+               ELSE
+                  IWORK( N+I ) = CNT
+                  NINT = NINT + 1
+                  WERR( CNT+1 ) = MID
+                  W( CNT+1 ) = W( I )
+                  W( I ) = MID
+                  I = CNT + 1
+                  IWORK( N+I ) = NRIGHT
+               END IF
+            END IF
+            I = NRIGHT + 1
+  100    CONTINUE
+         NINT = NINT - OLNINT + I2
+         GO TO 80
+      END IF
+      DO 110 I = INITI1, INITI2
+         W( I ) = HALF*( WERR( I )+W( I ) )
+         WERR( I ) = W( I ) - WERR( I )
+  110 CONTINUE
+*
+      RETURN
+*
+*     End of SLARRB
+*
+      END
diff --git a/costa/native/external/lapack/slarre.f b/costa/native/external/lapack/slarre.f
new file mode 100644
index 000000000..d47b772d4
--- /dev/null
+++ b/costa/native/external/lapack/slarre.f
@@ -0,0 +1,318 @@
+      SUBROUTINE SLARRE( N, D, E, TOL, NSPLIT, ISPLIT, M, W, WOFF,
+     $                   GERSCH, WORK, INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, M, N, NSPLIT
+      REAL               TOL
+*     ..
+*     .. Array Arguments ..
+      INTEGER            ISPLIT( * )
+      REAL               D( * ), E( * ), GERSCH( * ), W( * ), WOFF( * ),
+     $                   WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  Given the tridiagonal matrix T, SLARRE sets "small" off-diagonal
+*  elements to zero, and for each unreduced block T_i, it finds
+*  (i) the numbers sigma_i
+*  (ii) the base T_i - sigma_i I = L_i D_i L_i^T representations and
+*  (iii) eigenvalues of each L_i D_i L_i^T.
+*  The representations and eigenvalues found are then used by
+*  SSTEGR to compute the eigenvectors of a symmetric tridiagonal
+*  matrix. Currently, the base representations are limited to being
+*  positive or negative definite, and the eigenvalues of the definite
+*  matrices are found by the dqds algorithm (subroutine SLASQ2). As
+*  an added benefit, SLARRE also outputs the n Gerschgorin
+*  intervals for each L_i D_i L_i^T.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The order of the matrix.
+*
+*  D       (input/output) REAL array, dimension (N)
+*          On entry, the n diagonal elements of the tridiagonal
+*          matrix T.
+*          On exit, the n diagonal elements of the diagonal
+*          matrices D_i.
+*
+*  E       (input/output) REAL array, dimension (N)
+*          On entry, the (n-1) subdiagonal elements of the tridiagonal
+*          matrix T; E(N) need not be set.
+*          On exit, the subdiagonal elements of the unit bidiagonal
+*          matrices L_i.
+*
+*  TOL     (input) REAL
+*          The threshold for splitting. If on input |E(i)| < TOL, then
+*          the matrix T is split into smaller blocks.
+*
+*  NSPLIT  (input) INTEGER
+*          The number of blocks T splits into. 1 <= NSPLIT <= N.
+*
+*  ISPLIT  (output) INTEGER array, dimension (2*N)
+*          The splitting points, at which T breaks up into submatrices.
+*          The first submatrix consists of rows/columns 1 to ISPLIT(1),
+*          the second of rows/columns ISPLIT(1)+1 through ISPLIT(2),
+*          etc., and the NSPLIT-th consists of rows/columns
+*          ISPLIT(NSPLIT-1)+1 through ISPLIT(NSPLIT)=N.
+*
+*  M       (output) INTEGER
+*          The total number of eigenvalues (of all the L_i D_i L_i^T)
+*          found.
+*
+*  W       (output) REAL array, dimension (N)
+*          The first M elements contain the eigenvalues. The
+*          eigenvalues of each of the blocks, L_i D_i L_i^T, are
+*          sorted in ascending order.
+*
+*  WOFF    (output) REAL array, dimension (N)
+*          The NSPLIT base points sigma_i.
+*
+*  GERSCH  (output) REAL array, dimension (2*N)
+*          The n Gerschgorin intervals.
+*
+*  WORK    (input) REAL array, dimension (4*N???)
+*          Workspace.
+*
+*  INFO    (output) INTEGER
+*          Output error code from SLASQ2
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Inderjit Dhillon, IBM Almaden, USA
+*     Osni Marques, LBNL/NERSC, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE, TWO, FOUR, FOURTH
+      PARAMETER          ( ZERO = 0.0E0, ONE = 1.0E0, TWO = 2.0E0,
+     $                   FOUR = 4.0E0, FOURTH = ONE / FOUR )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            CNT, I, IBEGIN, IEND, IN, J, JBLK, MAXCNT
+      REAL               DELTA, EPS, GL, GU, NRM, OFFD, S, SGNDEF, 
+     $                   SIGMA, TAU, TMP1, WIDTH
+*     ..
+*     .. External Functions ..
+      REAL               SLAMCH
+      EXTERNAL           SLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SCOPY, SLASQ2 
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+      EPS = SLAMCH( 'Precision' )
+*
+*     Compute Splitting Points
+*
+      NSPLIT = 1
+      DO 10 I = 1, N - 1
+         IF( ABS( E( I ) ).LE.TOL ) THEN
+            ISPLIT( NSPLIT ) = I
+            NSPLIT = NSPLIT + 1
+         END IF
+   10 CONTINUE
+      ISPLIT( NSPLIT ) = N
+*
+      IBEGIN = 1
+      DO 170 JBLK = 1, NSPLIT
+         IEND = ISPLIT( JBLK )
+         IF( IBEGIN.EQ.IEND ) THEN
+            W( IBEGIN ) = D( IBEGIN )
+            WOFF( JBLK ) = ZERO
+            IBEGIN = IEND + 1
+            GO TO 170
+         END IF
+         IN = IEND - IBEGIN + 1
+*
+*        Form the n Gerschgorin intervals
+*
+         GL = D( IBEGIN ) - ABS( E( IBEGIN ) )
+         GU = D( IBEGIN ) + ABS( E( IBEGIN ) )
+         GERSCH( 2*IBEGIN-1 ) = GL
+         GERSCH( 2*IBEGIN ) = GU
+         GERSCH( 2*IEND-1 ) = D( IEND ) - ABS( E( IEND-1 ) )
+         GERSCH( 2*IEND ) = D( IEND ) + ABS( E( IEND-1 ) )
+         GL = MIN( GERSCH( 2*IEND-1 ), GL )
+         GU = MAX( GERSCH( 2*IEND ), GU )
+         DO 20 I = IBEGIN + 1, IEND - 1
+            OFFD = ABS( E( I-1 ) ) + ABS( E( I ) )
+            GERSCH( 2*I-1 ) = D( I ) - OFFD
+            GL = MIN( GERSCH( 2*I-1 ), GL )
+            GERSCH( 2*I ) = D( I ) + OFFD
+            GU = MAX( GERSCH( 2*I ), GU )
+   20    CONTINUE
+         NRM = MAX( ABS( GL ), ABS( GU ) )
+*
+*        Find the number SIGMA where the base representation
+*        T - sigma I = L D L^T is to be formed.
+*
+         WIDTH = GU - GL
+         DO 30 I = IBEGIN, IEND - 1
+            WORK( I ) = E( I )*E( I )
+   30    CONTINUE
+         DO 50 J = 1, 2
+            IF( J.EQ.1 ) THEN
+               TAU = GL + FOURTH*WIDTH
+            ELSE
+               TAU = GU - FOURTH*WIDTH
+            END IF
+            TMP1 = D( IBEGIN ) - TAU
+            IF( TMP1.LT.ZERO ) THEN
+               CNT = 1
+            ELSE
+               CNT = 0
+            END IF
+            DO 40 I = IBEGIN + 1, IEND
+               TMP1 = D( I ) - TAU - WORK( I-1 ) / TMP1
+               IF( TMP1.LT.ZERO )
+     $            CNT = CNT + 1
+   40       CONTINUE
+            IF( CNT.EQ.0 ) THEN
+               GL = TAU
+            ELSE IF( CNT.EQ.IN ) THEN
+               GU = TAU
+            END IF
+            IF( J.EQ.1 ) THEN
+               MAXCNT = CNT
+               SIGMA = GL
+               SGNDEF = ONE
+            ELSE
+               IF( IN-CNT.GT.MAXCNT ) THEN
+                  SIGMA = GU
+                  SGNDEF = -ONE
+               END IF
+            END IF
+   50    CONTINUE
+*
+*        Find the base L D L^T representation
+*
+         WORK( 3*IN ) = ONE
+         DELTA = EPS
+         TAU = SGNDEF*NRM
+   60    CONTINUE
+         SIGMA = SIGMA - DELTA*TAU
+         WORK( 1 ) = D( IBEGIN ) - SIGMA
+         J = IBEGIN
+         DO 70 I = 1, IN - 1
+            WORK( 2*IN+I ) = ONE / WORK( 2*I-1 )
+            TMP1 = E( J )*WORK( 2*IN+I )
+            WORK( 2*I+1 ) = ( D( J+1 )-SIGMA ) - TMP1*E( J )
+            WORK( 2*I ) = TMP1
+            J = J + 1
+   70    CONTINUE
+         DO 80 I = IN, 1, -1
+            TMP1 = SGNDEF*WORK( 2*I-1 )
+            IF( TMP1.LT.ZERO .OR. WORK( 2*IN+I ).EQ.ZERO .OR. .NOT.
+     $          ( TMP1.GT.ZERO .OR. TMP1.LT.ONE ) ) THEN
+               DELTA = TWO*DELTA
+               GO TO 60
+            END IF
+            J = J - 1
+   80    CONTINUE
+*
+         J = IBEGIN
+         D( IBEGIN ) = WORK( 1 )
+         WORK( 1 ) = ABS( WORK( 1 ) )
+         DO 90 I = 1, IN - 1
+            TMP1 = E( J )
+            E( J ) = WORK( 2*I )
+            WORK( 2*I ) = ABS( TMP1*WORK( 2*I ) )
+            J = J + 1
+            D( J ) = WORK( 2*I+1 )
+            WORK( 2*I+1 ) = ABS( WORK( 2*I+1 ) )
+   90    CONTINUE
+*
+         CALL SLASQ2( IN, WORK, INFO )
+*
+         TAU = SGNDEF*WORK( IN )
+         WORK( 3*IN ) = ONE
+         DELTA = TWO*EPS
+  100    CONTINUE
+         TAU = TAU*( ONE-DELTA )
+*
+         S = -TAU
+         J = IBEGIN
+         DO 110 I = 1, IN - 1
+            WORK( I ) = D( J ) + S
+            WORK( 2*IN+I ) = ONE / WORK( I )
+*           WORK( N+I ) = ( E( I ) * D( I ) ) / WORK( I )
+            WORK( IN+I ) = ( E( J )*D( J ) )*WORK( 2*IN+I )
+            S = S*WORK( IN+I )*E( J ) - TAU
+            J = J + 1
+  110    CONTINUE
+         WORK( IN ) = D( IEND ) + S
+*
+*        Checking to see if all the diagonal elements of the new
+*        L D L^T representation have the same sign
+*
+         DO 120 I = IN, 1, -1
+            TMP1 = SGNDEF*WORK( I )
+            IF( TMP1.LT.ZERO .OR. WORK( 2*IN+I ).EQ.ZERO .OR. .NOT.
+     $          ( TMP1.GT.ZERO .OR. TMP1.LT.ONE ) ) THEN
+               DELTA = TWO*DELTA
+               GO TO 100
+            END IF
+  120    CONTINUE
+*
+         SIGMA = SIGMA + TAU
+         CALL SCOPY( IN, WORK, 1, D( IBEGIN ), 1 )
+         CALL SCOPY( IN-1, WORK( IN+1 ), 1, E( IBEGIN ), 1 )
+         WOFF( JBLK ) = SIGMA
+*
+*        Update the n Gerschgorin intervals
+*
+         DO 130 I = IBEGIN, IEND
+            GERSCH( 2*I-1 ) = GERSCH( 2*I-1 ) - SIGMA
+            GERSCH( 2*I ) = GERSCH( 2*I ) - SIGMA
+  130    CONTINUE
+*
+*        Compute the eigenvalues of L D L^T.
+*
+         J = IBEGIN
+         DO 140 I = 1, IN - 1
+            WORK( 2*I-1 ) = ABS( D( J ) )
+            WORK( 2*I ) = E( J )*E( J )*WORK( 2*I-1 )
+            J = J + 1
+  140    CONTINUE
+         WORK( 2*IN-1 ) = ABS( D( IEND ) )
+*
+         CALL SLASQ2( IN, WORK, INFO )
+*
+         J = IBEGIN
+         IF( SGNDEF.GT.ZERO ) THEN
+            DO 150 I = 1, IN
+               W( J ) = WORK( IN-I+1 )
+               J = J + 1
+  150       CONTINUE
+         ELSE
+            DO 160 I = 1, IN
+               W( J ) = -WORK( I )
+               J = J + 1
+  160       CONTINUE
+         END IF
+         IBEGIN = IEND + 1
+  170 CONTINUE
+      M = N
+*   
+      RETURN
+*
+*     End of SLARRE
+*
+      END
diff --git a/costa/native/external/lapack/slarrf.f b/costa/native/external/lapack/slarrf.f
new file mode 100644
index 000000000..eb08d9b4f
--- /dev/null
+++ b/costa/native/external/lapack/slarrf.f
@@ -0,0 +1,150 @@
+      SUBROUTINE SLARRF( N, D, L, LD, LLD, IFIRST, ILAST, W, DPLUS,
+     $                   LPLUS, WORK, IWORK, INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            IFIRST, ILAST, INFO, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      REAL               D( * ), DPLUS( * ), L( * ), LD( * ), LLD( * ),
+     $                   LPLUS( * ), W( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  Given the initial representation L D L^T and its cluster of close
+*  eigenvalues (in a relative measure), W( IFIRST ), W( IFIRST+1 ), ...
+*  W( ILAST ), SLARRF finds a new relatively robust representation
+*  L D L^T - SIGMA I = L(+) D(+) L(+)^T such that at least one of the
+*  eigenvalues of L(+) D(+) L(+)^T is relatively isolated.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The order of the matrix.
+*
+*  D       (input) REAL array, dimension (N)
+*          The n diagonal elements of the diagonal matrix D.
+*
+*  L       (input) REAL array, dimension (N-1)
+*          The (n-1) subdiagonal elements of the unit bidiagonal
+*          matrix L.
+*
+*  LD      (input) REAL array, dimension (N-1)
+*          The n-1 elements L(i)*D(i).
+*
+*  LLD     (input) REAL array, dimension (N-1)
+*          The n-1 elements L(i)*L(i)*D(i).
+*
+*  IFIRST  (input) INTEGER
+*          The index of the first eigenvalue in the cluster.
+*
+*  ILAST   (input) INTEGER
+*          The index of the last eigenvalue in the cluster.
+*
+*  W       (input/output) REAL array, dimension (N)
+*          On input, the eigenvalues of L D L^T in ascending order.
+*          W( IFIRST ) through W( ILAST ) form the cluster of relatively
+*          close eigenalues.
+*          On output, W( IFIRST ) thru' W( ILAST ) are estimates of the
+*          corresponding eigenvalues of L(+) D(+) L(+)^T.
+*
+*  SIGMA   (input) REAL
+*          The shift used to form L(+) D(+) L(+)^T.
+*
+*  DPLUS   (output) REAL array, dimension (N)
+*          The n diagonal elements of the diagonal matrix D(+).
+*
+*  LPLUS   (output) REAL array, dimension (N)
+*          The first (n-1) elements of LPLUS contain the subdiagonal
+*          elements of the unit bidiagonal matrix L(+). LPLUS( N ) is
+*          set to SIGMA.
+*
+*  WORK    (input) REAL array, dimension (???)
+*          Workspace.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Inderjit Dhillon, IBM Almaden, USA
+*     Osni Marques, LBNL/NERSC, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, TWO
+      PARAMETER          ( ZERO = 0.0E0, TWO = 2.0E0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I
+      REAL               DELTA, EPS, S, SIGMA
+*     ..
+*     .. External Functions ..
+      REAL               SLAMCH
+      EXTERNAL           SLAMCH
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+      EPS = SLAMCH( 'Precision' )
+      IF( IFIRST.EQ.1 ) THEN
+         SIGMA = W( IFIRST )
+      ELSE IF( ILAST.EQ.N ) THEN
+         SIGMA = W( ILAST )
+      ELSE
+         INFO = 1
+         RETURN
+      END IF
+*
+*     Compute the new relatively robust representation (RRR)
+*
+      DELTA = TWO*EPS
+   10 CONTINUE
+      IF( IFIRST.EQ.1 ) THEN
+         SIGMA = SIGMA - ABS( SIGMA )*DELTA
+      ELSE
+         SIGMA = SIGMA + ABS( SIGMA )*DELTA
+      END IF
+      S = -SIGMA
+      DO 20 I = 1, N - 1
+         DPLUS( I ) = D( I ) + S
+         LPLUS( I ) = LD( I ) / DPLUS( I )
+         S = S*LPLUS( I )*L( I ) - SIGMA
+   20 CONTINUE
+      DPLUS( N ) = D( N ) + S
+      IF( IFIRST.EQ.1 ) THEN
+         DO 30 I = 1, N
+            IF( DPLUS( I ).LT.ZERO ) THEN
+               DELTA = TWO*DELTA
+               GO TO 10
+            END IF
+   30    CONTINUE
+      ELSE
+         DO 40 I = 1, N
+            IF( DPLUS( I ).GT.ZERO ) THEN
+               DELTA = TWO*DELTA
+               GO TO 10
+            END IF
+   40    CONTINUE
+      END IF
+      DO 50 I = IFIRST, ILAST
+         W( I ) = W( I ) - SIGMA
+   50 CONTINUE
+      LPLUS( N ) = SIGMA
+*
+      RETURN
+*
+*     End of SLARRF
+*
+      END
diff --git a/costa/native/external/lapack/slarrv.f b/costa/native/external/lapack/slarrv.f
new file mode 100644
index 000000000..74d782320
--- /dev/null
+++ b/costa/native/external/lapack/slarrv.f
@@ -0,0 +1,417 @@
+      SUBROUTINE SLARRV( N, D, L, ISPLIT, M, W, IBLOCK, GERSCH, TOL, Z,
+     $                   LDZ, ISUPPZ, WORK, IWORK, INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDZ, M, N
+      REAL               TOL
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IBLOCK( * ), ISPLIT( * ), ISUPPZ( * ),
+     $                   IWORK( * )
+      REAL               D( * ), GERSCH( * ), L( * ), W( * ), WORK( * ),
+     $                   Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLARRV computes the eigenvectors of the tridiagonal matrix
+*  T = L D L^T given L, D and the eigenvalues of L D L^T.
+*  The input eigenvalues should have high relative accuracy with
+*  respect to the entries of L and D. The desired accuracy of the
+*  output can be specified by the input parameter TOL.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The order of the matrix.  N >= 0.
+*
+*  D       (input/output) REAL array, dimension (N)
+*          On entry, the n diagonal elements of the diagonal matrix D.
+*          On exit, D may be overwritten.
+*
+*  L       (input/output) REAL array, dimension (N-1)
+*          On entry, the (n-1) subdiagonal elements of the unit
+*          bidiagonal matrix L in elements 1 to N-1 of L. L(N) need
+*          not be set. On exit, L is overwritten.
+*
+*  ISPLIT  (input) INTEGER array, dimension (N)
+*          The splitting points, at which T breaks up into submatrices.
+*          The first submatrix consists of rows/columns 1 to
+*          ISPLIT( 1 ), the second of rows/columns ISPLIT( 1 )+1
+*          through ISPLIT( 2 ), etc.
+*
+*  TOL     (input) REAL
+*          The absolute error tolerance for the
+*          eigenvalues/eigenvectors.
+*          Errors in the input eigenvalues must be bounded by TOL.
+*          The eigenvectors output have residual norms
+*          bounded by TOL, and the dot products between different
+*          eigenvectors are bounded by TOL. TOL must be at least
+*          N*EPS*|T|, where EPS is the machine precision and |T| is
+*          the 1-norm of the tridiagonal matrix.
+*
+*  M       (input) INTEGER
+*          The total number of eigenvalues found.  0 <= M <= N.
+*          If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1.
+*
+*  W       (input) REAL array, dimension (N)
+*          The first M elements of W contain the eigenvalues for
+*          which eigenvectors are to be computed.  The eigenvalues
+*          should be grouped by split-off block and ordered from
+*          smallest to largest within the block ( The output array
+*          W from SLARRE is expected here ).
+*          Errors in W must be bounded by TOL (see above).
+*
+*  IBLOCK  (input) INTEGER array, dimension (N)
+*          The submatrix indices associated with the corresponding
+*          eigenvalues in W; IBLOCK(i)=1 if eigenvalue W(i) belongs to
+*          the first submatrix from the top, =2 if W(i) belongs to
+*          the second submatrix, etc. 
+*
+*  Z       (output) REAL array, dimension (LDZ, max(1,M) )
+*          If JOBZ = 'V', then if INFO = 0, the first M columns of Z
+*          contain the orthonormal eigenvectors of the matrix T
+*          corresponding to the selected eigenvalues, with the i-th
+*          column of Z holding the eigenvector associated with W(i).
+*          If JOBZ = 'N', then Z is not referenced.
+*          Note: the user must ensure that at least max(1,M) columns are
+*          supplied in the array Z; if RANGE = 'V', the exact value of M
+*          is not known in advance and an upper bound must be used.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= 1, and if
+*          JOBZ = 'V', LDZ >= max(1,N).
+*
+*  ISUPPZ  (output) INTEGER ARRAY, dimension ( 2*max(1,M) )
+*          The support of the eigenvectors in Z, i.e., the indices
+*          indicating the nonzero elements in Z. The i-th eigenvector
+*          is nonzero only in elements ISUPPZ( 2*i-1 ) through
+*          ISUPPZ( 2*i ).
+*
+*  WORK    (workspace) REAL array, dimension (13*N)
+*
+*  IWORK   (workspace) INTEGER array, dimension (6*N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = 1, internal error in SLARRB
+*                if INFO = 2, internal error in SSTEIN
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Inderjit Dhillon, IBM Almaden, USA
+*     Osni Marques, LBNL/NERSC, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            MGSSIZ
+      PARAMETER          ( MGSSIZ = 20 )
+      REAL               ZERO, ONE, FOUR
+      PARAMETER          ( ZERO = 0.0E0, ONE = 1.0E0, FOUR = 4.0E0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            MGSCLS
+      INTEGER            I, IBEGIN, IEND, IINDC1, IINDC2, IINDR, IINDWK, 
+     $                   IINFO, IM, IN, INDERR, INDGAP, INDLD, INDLLD, 
+     $                   INDWRK, ITER, ITMP1, ITMP2, J, JBLK, K, KTOT, 
+     $                   LSBDPT, MAXITR, NCLUS, NDEPTH, NDONE, NEWCLS, 
+     $                   NEWFRS, NEWFTT, NEWLST, NEWSIZ, NSPLIT, OLDCLS, 
+     $                   OLDFST, OLDIEN, OLDLST, OLDNCL, P, Q, 
+     $                   TEMP( 1 )
+      REAL               EPS, GAP, LAMBDA, MGSTOL, MINGMA, MINRGP, 
+     $                   NRMINV, RELGAP, RELTOL, RESID, RQCORR, SIGMA, 
+     $                   TMP1, ZTZ
+*     ..
+*     .. External Functions ..
+      REAL               SDOT, SLAMCH, SNRM2
+      EXTERNAL           SDOT, SLAMCH, SNRM2
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SAXPY, SCOPY, SLAR1V, SLARRB, SLARRF, SLASET, 
+     $                   SSCAL, SSTEIN
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN, REAL, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INDERR = N + 1
+      INDLD = 2*N
+      INDLLD = 3*N
+      INDGAP = 4*N
+      INDWRK = 5*N + 1
+*
+      IINDR = N
+      IINDC1 = 2*N
+      IINDC2 = 3*N
+      IINDWK = 4*N + 1
+*
+      EPS = SLAMCH( 'Precision' )
+*
+      DO 10 I = 1, 2*N
+         IWORK( I ) = 0
+   10 CONTINUE
+      DO 20 I = 1, M
+         WORK( INDERR+I-1 ) = EPS*ABS( W( I ) )
+   20 CONTINUE
+      CALL SLASET( 'Full', N, N, ZERO, ZERO, Z, LDZ )
+      MGSTOL = 5.0E0*EPS
+*
+      NSPLIT = IBLOCK( M )
+      IBEGIN = 1
+      DO 170 JBLK = 1, NSPLIT
+         IEND = ISPLIT( JBLK )
+*
+*        Find the eigenvectors of the submatrix indexed IBEGIN
+*        through IEND.
+*
+         IF( IBEGIN.EQ.IEND ) THEN
+            Z( IBEGIN, IBEGIN ) = ONE
+            ISUPPZ( 2*IBEGIN-1 ) = IBEGIN
+            ISUPPZ( 2*IBEGIN ) = IBEGIN
+            IBEGIN = IEND + 1
+            GO TO 170
+         END IF
+         OLDIEN = IBEGIN - 1
+         IN = IEND - OLDIEN
+         RELTOL = MIN( 1.0E-2, ONE / REAL( IN ) )
+         IM = IN
+         CALL SCOPY( IM, W( IBEGIN ), 1, WORK, 1 )
+         DO 30 I = 1, IN - 1
+            WORK( INDGAP+I ) = WORK( I+1 ) - WORK( I )
+   30    CONTINUE
+         WORK( INDGAP+IN ) = MAX( ABS( WORK( IN ) ), EPS )
+         NDONE = 0
+*
+         NDEPTH = 0
+         LSBDPT = 1
+         NCLUS = 1
+         IWORK( IINDC1+1 ) = 1
+         IWORK( IINDC1+2 ) = IN
+*
+*        While( NDONE.LT.IM ) do
+*
+   40    CONTINUE
+         IF( NDONE.LT.IM ) THEN
+            OLDNCL = NCLUS
+            NCLUS = 0
+            LSBDPT = 1 - LSBDPT
+            DO 150 I = 1, OLDNCL
+               IF( LSBDPT.EQ.0 ) THEN
+                  OLDCLS = IINDC1
+                  NEWCLS = IINDC2
+               ELSE
+                  OLDCLS = IINDC2
+                  NEWCLS = IINDC1
+               END IF
+*
+*              If NDEPTH > 1, retrieve the relatively robust
+*              representation (RRR) and perform limited bisection
+*              (if necessary) to get approximate eigenvalues.
+*
+               J = OLDCLS + 2*I
+               OLDFST = IWORK( J-1 )
+               OLDLST = IWORK( J )
+               IF( NDEPTH.GT.0 ) THEN
+                  J = OLDIEN + OLDFST
+                  CALL SCOPY( IN, Z( IBEGIN, J ), 1, D( IBEGIN ), 1 )
+                  CALL SCOPY( IN, Z( IBEGIN, J+1 ), 1, L( IBEGIN ), 1 )
+                  SIGMA = L( IEND )
+               END IF
+               K = IBEGIN
+               DO 50 J = 1, IN - 1
+                  WORK( INDLD+J ) = D( K )*L( K )
+                  WORK( INDLLD+J ) = WORK( INDLD+J )*L( K )
+                  K = K + 1
+   50          CONTINUE
+               IF( NDEPTH.GT.0 ) THEN
+                  CALL SLARRB( IN, D( IBEGIN ), L( IBEGIN ),
+     $                         WORK( INDLD+1 ), WORK( INDLLD+1 ),
+     $                         OLDFST, OLDLST, SIGMA, RELTOL, WORK,
+     $                         WORK( INDGAP+1 ), WORK( INDERR ),
+     $                         WORK( INDWRK ), IWORK( IINDWK ), IINFO )
+                  IF( IINFO.NE.0 ) THEN
+                     INFO = 1
+                     RETURN
+                  END IF
+               END IF
+*
+*              Classify eigenvalues of the current representation (RRR)
+*              as (i) isolated, (ii) loosely clustered or (iii) tightly
+*              clustered
+*
+               NEWFRS = OLDFST
+               DO 140 J = OLDFST, OLDLST
+                  IF( J.EQ.OLDLST .OR. WORK( INDGAP+J ).GE.RELTOL*
+     $                ABS( WORK( J ) ) ) THEN
+                     NEWLST = J
+                  ELSE
+*
+*                    continue (to the next loop)
+*
+                     RELGAP = WORK( INDGAP+J ) / ABS( WORK( J ) )
+                     IF( J.EQ.NEWFRS ) THEN
+                        MINRGP = RELGAP
+                     ELSE
+                        MINRGP = MIN( MINRGP, RELGAP )
+                     END IF
+                     GO TO 140
+                  END IF
+                  NEWSIZ = NEWLST - NEWFRS + 1
+                  MAXITR = 10
+                  NEWFTT = OLDIEN + NEWFRS
+                  IF( NEWSIZ.GT.1 ) THEN
+                     MGSCLS = NEWSIZ.LE.MGSSIZ .AND. MINRGP.GE.MGSTOL
+                     IF( .NOT.MGSCLS ) THEN
+                        CALL SLARRF( IN, D( IBEGIN ), L( IBEGIN ),
+     $                               WORK( INDLD+1 ), WORK( INDLLD+1 ),
+     $                               NEWFRS, NEWLST, WORK,
+     $                               Z( IBEGIN, NEWFTT ),
+     $                               Z( IBEGIN, NEWFTT+1 ),
+     $                               WORK( INDWRK ), IWORK( IINDWK ),
+     $                               INFO )
+                        IF( INFO.EQ.0 ) THEN
+                           NCLUS = NCLUS + 1
+                           K = NEWCLS + 2*NCLUS
+                           IWORK( K-1 ) = NEWFRS
+                           IWORK( K ) = NEWLST
+                        ELSE
+                           INFO = 0
+                           IF( MINRGP.GE.MGSTOL ) THEN
+                              MGSCLS = .TRUE.
+                           ELSE
+*                           
+*                             Call SSTEIN to process this tight cluster.
+*                             This happens only if MINRGP <= MGSTOL
+*                             and SLARRF returns INFO = 1. The latter
+*                             means that a new RRR to "break" the
+*                             cluster could not be found.
+*
+                              WORK( INDWRK ) = D( IBEGIN )
+                              DO 60 K = 1, IN - 1
+                                 WORK( INDWRK+K ) = D( IBEGIN+K ) +
+     $                                              WORK( INDLLD+K )
+   60                         CONTINUE
+                              DO 70 K = 1, NEWSIZ
+                                 IWORK( IINDWK+K-1 ) = 1
+   70                         CONTINUE
+                              DO 80 K = NEWFRS, NEWLST
+                                 ISUPPZ( 2*( IBEGIN+K )-3 ) = 1
+                                 ISUPPZ( 2*( IBEGIN+K )-2 ) = IN
+   80                         CONTINUE
+                              TEMP( 1 ) = IN
+                              CALL SSTEIN( IN, WORK( INDWRK ),
+     $                                     WORK( INDLD+1 ), NEWSIZ,
+     $                                     WORK( NEWFRS ),
+     $                                     IWORK( IINDWK ), TEMP( 1 ),
+     $                                     Z( IBEGIN, NEWFTT ), LDZ,
+     $                                     WORK( INDWRK+IN ),
+     $                                     IWORK( IINDWK+IN ),
+     $                                     IWORK( IINDWK+2*IN ), IINFO )
+                              IF( IINFO.NE.0 ) THEN
+                                 INFO = 2
+                                 RETURN
+                              END IF
+                              NDONE = NDONE + NEWSIZ
+                           END IF
+                        END IF
+                     END IF
+                  ELSE
+                     MGSCLS = .FALSE.
+                  END IF
+                  IF( NEWSIZ.EQ.1 .OR. MGSCLS ) THEN
+                     KTOT = NEWFTT
+                     DO 100 K = NEWFRS, NEWLST
+                        ITER = 0
+   90                   CONTINUE
+                        LAMBDA = WORK( K )
+                        CALL SLAR1V( IN, 1, IN, LAMBDA, D( IBEGIN ),
+     $                               L( IBEGIN ), WORK( INDLD+1 ),
+     $                               WORK( INDLLD+1 ),
+     $                               GERSCH( 2*OLDIEN+1 ),
+     $                               Z( IBEGIN, KTOT ), ZTZ, MINGMA,
+     $                               IWORK( IINDR+KTOT ),
+     $                               ISUPPZ( 2*KTOT-1 ),
+     $                               WORK( INDWRK ) )
+                        TMP1 = ONE / ZTZ
+                        NRMINV = SQRT( TMP1 )
+                        RESID = ABS( MINGMA )*NRMINV
+                        RQCORR = MINGMA*TMP1
+                        IF( K.EQ.IN ) THEN
+                           GAP = WORK( INDGAP+K-1 )
+                        ELSE IF( K.EQ.1 ) THEN
+                           GAP = WORK( INDGAP+K )
+                        ELSE
+                           GAP = MIN( WORK( INDGAP+K-1 ),
+     $                           WORK( INDGAP+K ) )
+                        END IF
+                        ITER = ITER + 1
+                        IF( RESID.GT.TOL*GAP .AND. ABS( RQCORR ).GT.
+     $                      FOUR*EPS*ABS( LAMBDA ) ) THEN
+                           WORK( K ) = LAMBDA + RQCORR
+                           IF( ITER.LT.MAXITR ) THEN
+                              GO TO 90
+                           END IF
+                        END IF
+                        IWORK( KTOT ) = 1
+                        IF( NEWSIZ.EQ.1 )
+     $                     NDONE = NDONE + 1
+                        CALL SSCAL( IN, NRMINV, Z( IBEGIN, KTOT ), 1 )
+                        KTOT = KTOT + 1
+  100                CONTINUE
+                     IF( NEWSIZ.GT.1 ) THEN
+                        ITMP1 = ISUPPZ( 2*NEWFTT-1 )
+                        ITMP2 = ISUPPZ( 2*NEWFTT )
+                        KTOT = OLDIEN + NEWLST
+                        DO 120 P = NEWFTT + 1, KTOT
+                           DO 110 Q = NEWFTT, P - 1
+                              TMP1 = -SDOT( IN, Z( IBEGIN, P ), 1,
+     $                               Z( IBEGIN, Q ), 1 )
+                              CALL SAXPY( IN, TMP1, Z( IBEGIN, Q ), 1,
+     $                                    Z( IBEGIN, P ), 1 )
+  110                      CONTINUE
+                           TMP1 = ONE / SNRM2( IN, Z( IBEGIN, P ), 1 )
+                           CALL SSCAL( IN, TMP1, Z( IBEGIN, P ), 1 )
+                           ITMP1 = MIN( ITMP1, ISUPPZ( 2*P-1 ) )
+                           ITMP2 = MAX( ITMP2, ISUPPZ( 2*P ) )
+  120                   CONTINUE
+                        DO 130 P = NEWFTT, KTOT
+                           ISUPPZ( 2*P-1 ) = ITMP1
+                           ISUPPZ( 2*P ) = ITMP2
+  130                   CONTINUE
+                        NDONE = NDONE + NEWSIZ
+                     END IF
+                  END IF
+                  NEWFRS = J + 1
+  140          CONTINUE
+  150       CONTINUE
+            NDEPTH = NDEPTH + 1
+            GO TO 40
+         END IF
+         J = 2*IBEGIN
+         DO 160 I = IBEGIN, IEND
+            ISUPPZ( J-1 ) = ISUPPZ( J-1 ) + OLDIEN
+            ISUPPZ( J ) = ISUPPZ( J ) + OLDIEN
+            J = J + 2
+  160    CONTINUE
+         IBEGIN = IEND + 1
+  170 CONTINUE
+*
+      RETURN
+*
+*     End of SLARRV
+*
+      END
diff --git a/costa/native/external/lapack/slartg.f b/costa/native/external/lapack/slartg.f
new file mode 100644
index 000000000..8ee53ac6e
--- /dev/null
+++ b/costa/native/external/lapack/slartg.f
@@ -0,0 +1,143 @@
+      SUBROUTINE SLARTG( F, G, CS, SN, R )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      REAL               CS, F, G, R, SN
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLARTG generate a plane rotation so that
+*
+*     [  CS  SN  ]  .  [ F ]  =  [ R ]   where CS**2 + SN**2 = 1.
+*     [ -SN  CS  ]     [ G ]     [ 0 ]
+*
+*  This is a slower, more accurate version of the BLAS1 routine SROTG,
+*  with the following other differences:
+*     F and G are unchanged on return.
+*     If G=0, then CS=1 and SN=0.
+*     If F=0 and (G .ne. 0), then CS=0 and SN=1 without doing any
+*        floating point operations (saves work in SBDSQR when
+*        there are zeros on the diagonal).
+*
+*  If F exceeds G in magnitude, CS will be positive.
+*
+*  Arguments
+*  =========
+*
+*  F       (input) REAL
+*          The first component of vector to be rotated.
+*
+*  G       (input) REAL
+*          The second component of vector to be rotated.
+*
+*  CS      (output) REAL
+*          The cosine of the rotation.
+*
+*  SN      (output) REAL
+*          The sine of the rotation.
+*
+*  R       (output) REAL
+*          The nonzero component of the rotated vector.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO
+      PARAMETER          ( ZERO = 0.0E0 )
+      REAL               ONE
+      PARAMETER          ( ONE = 1.0E0 )
+      REAL               TWO
+      PARAMETER          ( TWO = 2.0E0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            FIRST
+      INTEGER            COUNT, I
+      REAL               EPS, F1, G1, SAFMIN, SAFMN2, SAFMX2, SCALE
+*     ..
+*     .. External Functions ..
+      REAL               SLAMCH
+      EXTERNAL           SLAMCH
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, INT, LOG, MAX, SQRT
+*     ..
+*     .. Save statement ..
+      SAVE               FIRST, SAFMX2, SAFMIN, SAFMN2
+*     ..
+*     .. Data statements ..
+      DATA               FIRST / .TRUE. /
+*     ..
+*     .. Executable Statements ..
+*
+      IF( FIRST ) THEN
+         FIRST = .FALSE.
+         SAFMIN = SLAMCH( 'S' )
+         EPS = SLAMCH( 'E' )
+         SAFMN2 = SLAMCH( 'B' )**INT( LOG( SAFMIN / EPS ) /
+     $            LOG( SLAMCH( 'B' ) ) / TWO )
+         SAFMX2 = ONE / SAFMN2
+      END IF
+      IF( G.EQ.ZERO ) THEN
+         CS = ONE
+         SN = ZERO
+         R = F
+      ELSE IF( F.EQ.ZERO ) THEN
+         CS = ZERO
+         SN = ONE
+         R = G
+      ELSE
+         F1 = F
+         G1 = G
+         SCALE = MAX( ABS( F1 ), ABS( G1 ) )
+         IF( SCALE.GE.SAFMX2 ) THEN
+            COUNT = 0
+   10       CONTINUE
+            COUNT = COUNT + 1
+            F1 = F1*SAFMN2
+            G1 = G1*SAFMN2
+            SCALE = MAX( ABS( F1 ), ABS( G1 ) )
+            IF( SCALE.GE.SAFMX2 )
+     $         GO TO 10
+            R = SQRT( F1**2+G1**2 )
+            CS = F1 / R
+            SN = G1 / R
+            DO 20 I = 1, COUNT
+               R = R*SAFMX2
+   20       CONTINUE
+         ELSE IF( SCALE.LE.SAFMN2 ) THEN
+            COUNT = 0
+   30       CONTINUE
+            COUNT = COUNT + 1
+            F1 = F1*SAFMX2
+            G1 = G1*SAFMX2
+            SCALE = MAX( ABS( F1 ), ABS( G1 ) )
+            IF( SCALE.LE.SAFMN2 )
+     $         GO TO 30
+            R = SQRT( F1**2+G1**2 )
+            CS = F1 / R
+            SN = G1 / R
+            DO 40 I = 1, COUNT
+               R = R*SAFMN2
+   40       CONTINUE
+         ELSE
+            R = SQRT( F1**2+G1**2 )
+            CS = F1 / R
+            SN = G1 / R
+         END IF
+         IF( ABS( F ).GT.ABS( G ) .AND. CS.LT.ZERO ) THEN
+            CS = -CS
+            SN = -SN
+            R = -R
+         END IF
+      END IF
+      RETURN
+*
+*     End of SLARTG
+*
+      END
diff --git a/costa/native/external/lapack/slartv.f b/costa/native/external/lapack/slartv.f
new file mode 100644
index 000000000..e0420090a
--- /dev/null
+++ b/costa/native/external/lapack/slartv.f
@@ -0,0 +1,77 @@
+      SUBROUTINE SLARTV( N, X, INCX, Y, INCY, C, S, INCC )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      INTEGER            INCC, INCX, INCY, N
+*     ..
+*     .. Array Arguments ..
+      REAL               C( * ), S( * ), X( * ), Y( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLARTV applies a vector of real plane rotations to elements of the
+*  real vectors x and y. For i = 1,2,...,n
+*
+*     ( x(i) ) := (  c(i)  s(i) ) ( x(i) )
+*     ( y(i) )    ( -s(i)  c(i) ) ( y(i) )
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The number of plane rotations to be applied.
+*
+*  X       (input/output) REAL array,
+*                         dimension (1+(N-1)*INCX)
+*          The vector x.
+*
+*  INCX    (input) INTEGER
+*          The increment between elements of X. INCX > 0.
+*
+*  Y       (input/output) REAL array,
+*                         dimension (1+(N-1)*INCY)
+*          The vector y.
+*
+*  INCY    (input) INTEGER
+*          The increment between elements of Y. INCY > 0.
+*
+*  C       (input) REAL array, dimension (1+(N-1)*INCC)
+*          The cosines of the plane rotations.
+*
+*  S       (input) REAL array, dimension (1+(N-1)*INCC)
+*          The sines of the plane rotations.
+*
+*  INCC    (input) INTEGER
+*          The increment between elements of C and S. INCC > 0.
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      INTEGER            I, IC, IX, IY
+      REAL               XI, YI
+*     ..
+*     .. Executable Statements ..
+*
+      IX = 1
+      IY = 1
+      IC = 1
+      DO 10 I = 1, N
+         XI = X( IX )
+         YI = Y( IY )
+         X( IX ) = C( IC )*XI + S( IC )*YI
+         Y( IY ) = C( IC )*YI - S( IC )*XI
+         IX = IX + INCX
+         IY = IY + INCY
+         IC = IC + INCC
+   10 CONTINUE
+      RETURN
+*
+*     End of SLARTV
+*
+      END
diff --git a/costa/native/external/lapack/slaruv.f b/costa/native/external/lapack/slaruv.f
new file mode 100644
index 000000000..d3d1fc416
--- /dev/null
+++ b/costa/native/external/lapack/slaruv.f
@@ -0,0 +1,368 @@
+      SUBROUTINE SLARUV( ISEED, N, X )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      INTEGER            N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            ISEED( 4 )
+      REAL               X( N )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLARUV returns a vector of n random real numbers from a uniform (0,1)
+*  distribution (n <= 128).
+*
+*  This is an auxiliary routine called by SLARNV and CLARNV.
+*
+*  Arguments
+*  =========
+*
+*  ISEED   (input/output) INTEGER array, dimension (4)
+*          On entry, the seed of the random number generator; the array
+*          elements must be between 0 and 4095, and ISEED(4) must be
+*          odd.
+*          On exit, the seed is updated.
+*
+*  N       (input) INTEGER
+*          The number of random numbers to be generated. N <= 128.
+*
+*  X       (output) REAL array, dimension (N)
+*          The generated random numbers.
+*
+*  Further Details
+*  ===============
+*
+*  This routine uses a multiplicative congruential method with modulus
+*  2**48 and multiplier 33952834046453 (see G.S.Fishman,
+*  'Multiplicative congruential random number generators with modulus
+*  2**b: an exhaustive analysis for b = 32 and a partial analysis for
+*  b = 48', Math. Comp. 189, pp 331-344, 1990).
+*
+*  48-bit integers are stored in 4 integer array elements with 12 bits
+*  per element. Hence the routine is portable across machines with
+*  integers of 32 bits or more.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE
+      PARAMETER          ( ONE = 1.0E0 )
+      INTEGER            LV, IPW2
+      REAL               R
+      PARAMETER          ( LV = 128, IPW2 = 4096, R = ONE / IPW2 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, I1, I2, I3, I4, IT1, IT2, IT3, IT4, J
+*     ..
+*     .. Local Arrays ..
+      INTEGER            MM( LV, 4 )
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MIN, MOD, REAL
+*     ..
+*     .. Data statements ..
+      DATA               ( MM( 1, J ), J = 1, 4 ) / 494, 322, 2508,
+     $                   2549 /
+      DATA               ( MM( 2, J ), J = 1, 4 ) / 2637, 789, 3754,
+     $                   1145 /
+      DATA               ( MM( 3, J ), J = 1, 4 ) / 255, 1440, 1766,
+     $                   2253 /
+      DATA               ( MM( 4, J ), J = 1, 4 ) / 2008, 752, 3572,
+     $                   305 /
+      DATA               ( MM( 5, J ), J = 1, 4 ) / 1253, 2859, 2893,
+     $                   3301 /
+      DATA               ( MM( 6, J ), J = 1, 4 ) / 3344, 123, 307,
+     $                   1065 /
+      DATA               ( MM( 7, J ), J = 1, 4 ) / 4084, 1848, 1297,
+     $                   3133 /
+      DATA               ( MM( 8, J ), J = 1, 4 ) / 1739, 643, 3966,
+     $                   2913 /
+      DATA               ( MM( 9, J ), J = 1, 4 ) / 3143, 2405, 758,
+     $                   3285 /
+      DATA               ( MM( 10, J ), J = 1, 4 ) / 3468, 2638, 2598,
+     $                   1241 /
+      DATA               ( MM( 11, J ), J = 1, 4 ) / 688, 2344, 3406,
+     $                   1197 /
+      DATA               ( MM( 12, J ), J = 1, 4 ) / 1657, 46, 2922,
+     $                   3729 /
+      DATA               ( MM( 13, J ), J = 1, 4 ) / 1238, 3814, 1038,
+     $                   2501 /
+      DATA               ( MM( 14, J ), J = 1, 4 ) / 3166, 913, 2934,
+     $                   1673 /
+      DATA               ( MM( 15, J ), J = 1, 4 ) / 1292, 3649, 2091,
+     $                   541 /
+      DATA               ( MM( 16, J ), J = 1, 4 ) / 3422, 339, 2451,
+     $                   2753 /
+      DATA               ( MM( 17, J ), J = 1, 4 ) / 1270, 3808, 1580,
+     $                   949 /
+      DATA               ( MM( 18, J ), J = 1, 4 ) / 2016, 822, 1958,
+     $                   2361 /
+      DATA               ( MM( 19, J ), J = 1, 4 ) / 154, 2832, 2055,
+     $                   1165 /
+      DATA               ( MM( 20, J ), J = 1, 4 ) / 2862, 3078, 1507,
+     $                   4081 /
+      DATA               ( MM( 21, J ), J = 1, 4 ) / 697, 3633, 1078,
+     $                   2725 /
+      DATA               ( MM( 22, J ), J = 1, 4 ) / 1706, 2970, 3273,
+     $                   3305 /
+      DATA               ( MM( 23, J ), J = 1, 4 ) / 491, 637, 17,
+     $                   3069 /
+      DATA               ( MM( 24, J ), J = 1, 4 ) / 931, 2249, 854,
+     $                   3617 /
+      DATA               ( MM( 25, J ), J = 1, 4 ) / 1444, 2081, 2916,
+     $                   3733 /
+      DATA               ( MM( 26, J ), J = 1, 4 ) / 444, 4019, 3971,
+     $                   409 /
+      DATA               ( MM( 27, J ), J = 1, 4 ) / 3577, 1478, 2889,
+     $                   2157 /
+      DATA               ( MM( 28, J ), J = 1, 4 ) / 3944, 242, 3831,
+     $                   1361 /
+      DATA               ( MM( 29, J ), J = 1, 4 ) / 2184, 481, 2621,
+     $                   3973 /
+      DATA               ( MM( 30, J ), J = 1, 4 ) / 1661, 2075, 1541,
+     $                   1865 /
+      DATA               ( MM( 31, J ), J = 1, 4 ) / 3482, 4058, 893,
+     $                   2525 /
+      DATA               ( MM( 32, J ), J = 1, 4 ) / 657, 622, 736,
+     $                   1409 /
+      DATA               ( MM( 33, J ), J = 1, 4 ) / 3023, 3376, 3992,
+     $                   3445 /
+      DATA               ( MM( 34, J ), J = 1, 4 ) / 3618, 812, 787,
+     $                   3577 /
+      DATA               ( MM( 35, J ), J = 1, 4 ) / 1267, 234, 2125,
+     $                   77 /
+      DATA               ( MM( 36, J ), J = 1, 4 ) / 1828, 641, 2364,
+     $                   3761 /
+      DATA               ( MM( 37, J ), J = 1, 4 ) / 164, 4005, 2460,
+     $                   2149 /
+      DATA               ( MM( 38, J ), J = 1, 4 ) / 3798, 1122, 257,
+     $                   1449 /
+      DATA               ( MM( 39, J ), J = 1, 4 ) / 3087, 3135, 1574,
+     $                   3005 /
+      DATA               ( MM( 40, J ), J = 1, 4 ) / 2400, 2640, 3912,
+     $                   225 /
+      DATA               ( MM( 41, J ), J = 1, 4 ) / 2870, 2302, 1216,
+     $                   85 /
+      DATA               ( MM( 42, J ), J = 1, 4 ) / 3876, 40, 3248,
+     $                   3673 /
+      DATA               ( MM( 43, J ), J = 1, 4 ) / 1905, 1832, 3401,
+     $                   3117 /
+      DATA               ( MM( 44, J ), J = 1, 4 ) / 1593, 2247, 2124,
+     $                   3089 /
+      DATA               ( MM( 45, J ), J = 1, 4 ) / 1797, 2034, 2762,
+     $                   1349 /
+      DATA               ( MM( 46, J ), J = 1, 4 ) / 1234, 2637, 149,
+     $                   2057 /
+      DATA               ( MM( 47, J ), J = 1, 4 ) / 3460, 1287, 2245,
+     $                   413 /
+      DATA               ( MM( 48, J ), J = 1, 4 ) / 328, 1691, 166,
+     $                   65 /
+      DATA               ( MM( 49, J ), J = 1, 4 ) / 2861, 496, 466,
+     $                   1845 /
+      DATA               ( MM( 50, J ), J = 1, 4 ) / 1950, 1597, 4018,
+     $                   697 /
+      DATA               ( MM( 51, J ), J = 1, 4 ) / 617, 2394, 1399,
+     $                   3085 /
+      DATA               ( MM( 52, J ), J = 1, 4 ) / 2070, 2584, 190,
+     $                   3441 /
+      DATA               ( MM( 53, J ), J = 1, 4 ) / 3331, 1843, 2879,
+     $                   1573 /
+      DATA               ( MM( 54, J ), J = 1, 4 ) / 769, 336, 153,
+     $                   3689 /
+      DATA               ( MM( 55, J ), J = 1, 4 ) / 1558, 1472, 2320,
+     $                   2941 /
+      DATA               ( MM( 56, J ), J = 1, 4 ) / 2412, 2407, 18,
+     $                   929 /
+      DATA               ( MM( 57, J ), J = 1, 4 ) / 2800, 433, 712,
+     $                   533 /
+      DATA               ( MM( 58, J ), J = 1, 4 ) / 189, 2096, 2159,
+     $                   2841 /
+      DATA               ( MM( 59, J ), J = 1, 4 ) / 287, 1761, 2318,
+     $                   4077 /
+      DATA               ( MM( 60, J ), J = 1, 4 ) / 2045, 2810, 2091,
+     $                   721 /
+      DATA               ( MM( 61, J ), J = 1, 4 ) / 1227, 566, 3443,
+     $                   2821 /
+      DATA               ( MM( 62, J ), J = 1, 4 ) / 2838, 442, 1510,
+     $                   2249 /
+      DATA               ( MM( 63, J ), J = 1, 4 ) / 209, 41, 449,
+     $                   2397 /
+      DATA               ( MM( 64, J ), J = 1, 4 ) / 2770, 1238, 1956,
+     $                   2817 /
+      DATA               ( MM( 65, J ), J = 1, 4 ) / 3654, 1086, 2201,
+     $                   245 /
+      DATA               ( MM( 66, J ), J = 1, 4 ) / 3993, 603, 3137,
+     $                   1913 /
+      DATA               ( MM( 67, J ), J = 1, 4 ) / 192, 840, 3399,
+     $                   1997 /
+      DATA               ( MM( 68, J ), J = 1, 4 ) / 2253, 3168, 1321,
+     $                   3121 /
+      DATA               ( MM( 69, J ), J = 1, 4 ) / 3491, 1499, 2271,
+     $                   997 /
+      DATA               ( MM( 70, J ), J = 1, 4 ) / 2889, 1084, 3667,
+     $                   1833 /
+      DATA               ( MM( 71, J ), J = 1, 4 ) / 2857, 3438, 2703,
+     $                   2877 /
+      DATA               ( MM( 72, J ), J = 1, 4 ) / 2094, 2408, 629,
+     $                   1633 /
+      DATA               ( MM( 73, J ), J = 1, 4 ) / 1818, 1589, 2365,
+     $                   981 /
+      DATA               ( MM( 74, J ), J = 1, 4 ) / 688, 2391, 2431,
+     $                   2009 /
+      DATA               ( MM( 75, J ), J = 1, 4 ) / 1407, 288, 1113,
+     $                   941 /
+      DATA               ( MM( 76, J ), J = 1, 4 ) / 634, 26, 3922,
+     $                   2449 /
+      DATA               ( MM( 77, J ), J = 1, 4 ) / 3231, 512, 2554,
+     $                   197 /
+      DATA               ( MM( 78, J ), J = 1, 4 ) / 815, 1456, 184,
+     $                   2441 /
+      DATA               ( MM( 79, J ), J = 1, 4 ) / 3524, 171, 2099,
+     $                   285 /
+      DATA               ( MM( 80, J ), J = 1, 4 ) / 1914, 1677, 3228,
+     $                   1473 /
+      DATA               ( MM( 81, J ), J = 1, 4 ) / 516, 2657, 4012,
+     $                   2741 /
+      DATA               ( MM( 82, J ), J = 1, 4 ) / 164, 2270, 1921,
+     $                   3129 /
+      DATA               ( MM( 83, J ), J = 1, 4 ) / 303, 2587, 3452,
+     $                   909 /
+      DATA               ( MM( 84, J ), J = 1, 4 ) / 2144, 2961, 3901,
+     $                   2801 /
+      DATA               ( MM( 85, J ), J = 1, 4 ) / 3480, 1970, 572,
+     $                   421 /
+      DATA               ( MM( 86, J ), J = 1, 4 ) / 119, 1817, 3309,
+     $                   4073 /
+      DATA               ( MM( 87, J ), J = 1, 4 ) / 3357, 676, 3171,
+     $                   2813 /
+      DATA               ( MM( 88, J ), J = 1, 4 ) / 837, 1410, 817,
+     $                   2337 /
+      DATA               ( MM( 89, J ), J = 1, 4 ) / 2826, 3723, 3039,
+     $                   1429 /
+      DATA               ( MM( 90, J ), J = 1, 4 ) / 2332, 2803, 1696,
+     $                   1177 /
+      DATA               ( MM( 91, J ), J = 1, 4 ) / 2089, 3185, 1256,
+     $                   1901 /
+      DATA               ( MM( 92, J ), J = 1, 4 ) / 3780, 184, 3715,
+     $                   81 /
+      DATA               ( MM( 93, J ), J = 1, 4 ) / 1700, 663, 2077,
+     $                   1669 /
+      DATA               ( MM( 94, J ), J = 1, 4 ) / 3712, 499, 3019,
+     $                   2633 /
+      DATA               ( MM( 95, J ), J = 1, 4 ) / 150, 3784, 1497,
+     $                   2269 /
+      DATA               ( MM( 96, J ), J = 1, 4 ) / 2000, 1631, 1101,
+     $                   129 /
+      DATA               ( MM( 97, J ), J = 1, 4 ) / 3375, 1925, 717,
+     $                   1141 /
+      DATA               ( MM( 98, J ), J = 1, 4 ) / 1621, 3912, 51,
+     $                   249 /
+      DATA               ( MM( 99, J ), J = 1, 4 ) / 3090, 1398, 981,
+     $                   3917 /
+      DATA               ( MM( 100, J ), J = 1, 4 ) / 3765, 1349, 1978,
+     $                   2481 /
+      DATA               ( MM( 101, J ), J = 1, 4 ) / 1149, 1441, 1813,
+     $                   3941 /
+      DATA               ( MM( 102, J ), J = 1, 4 ) / 3146, 2224, 3881,
+     $                   2217 /
+      DATA               ( MM( 103, J ), J = 1, 4 ) / 33, 2411, 76,
+     $                   2749 /
+      DATA               ( MM( 104, J ), J = 1, 4 ) / 3082, 1907, 3846,
+     $                   3041 /
+      DATA               ( MM( 105, J ), J = 1, 4 ) / 2741, 3192, 3694,
+     $                   1877 /
+      DATA               ( MM( 106, J ), J = 1, 4 ) / 359, 2786, 1682,
+     $                   345 /
+      DATA               ( MM( 107, J ), J = 1, 4 ) / 3316, 382, 124,
+     $                   2861 /
+      DATA               ( MM( 108, J ), J = 1, 4 ) / 1749, 37, 1660,
+     $                   1809 /
+      DATA               ( MM( 109, J ), J = 1, 4 ) / 185, 759, 3997,
+     $                   3141 /
+      DATA               ( MM( 110, J ), J = 1, 4 ) / 2784, 2948, 479,
+     $                   2825 /
+      DATA               ( MM( 111, J ), J = 1, 4 ) / 2202, 1862, 1141,
+     $                   157 /
+      DATA               ( MM( 112, J ), J = 1, 4 ) / 2199, 3802, 886,
+     $                   2881 /
+      DATA               ( MM( 113, J ), J = 1, 4 ) / 1364, 2423, 3514,
+     $                   3637 /
+      DATA               ( MM( 114, J ), J = 1, 4 ) / 1244, 2051, 1301,
+     $                   1465 /
+      DATA               ( MM( 115, J ), J = 1, 4 ) / 2020, 2295, 3604,
+     $                   2829 /
+      DATA               ( MM( 116, J ), J = 1, 4 ) / 3160, 1332, 1888,
+     $                   2161 /
+      DATA               ( MM( 117, J ), J = 1, 4 ) / 2785, 1832, 1836,
+     $                   3365 /
+      DATA               ( MM( 118, J ), J = 1, 4 ) / 2772, 2405, 1990,
+     $                   361 /
+      DATA               ( MM( 119, J ), J = 1, 4 ) / 1217, 3638, 2058,
+     $                   2685 /
+      DATA               ( MM( 120, J ), J = 1, 4 ) / 1822, 3661, 692,
+     $                   3745 /
+      DATA               ( MM( 121, J ), J = 1, 4 ) / 1245, 327, 1194,
+     $                   2325 /
+      DATA               ( MM( 122, J ), J = 1, 4 ) / 2252, 3660, 20,
+     $                   3609 /
+      DATA               ( MM( 123, J ), J = 1, 4 ) / 3904, 716, 3285,
+     $                   3821 /
+      DATA               ( MM( 124, J ), J = 1, 4 ) / 2774, 1842, 2046,
+     $                   3537 /
+      DATA               ( MM( 125, J ), J = 1, 4 ) / 997, 3987, 2107,
+     $                   517 /
+      DATA               ( MM( 126, J ), J = 1, 4 ) / 2573, 1368, 3508,
+     $                   3017 /
+      DATA               ( MM( 127, J ), J = 1, 4 ) / 1148, 1848, 3525,
+     $                   2141 /
+      DATA               ( MM( 128, J ), J = 1, 4 ) / 545, 2366, 3801,
+     $                   1537 /
+*     ..
+*     .. Executable Statements ..
+*
+      I1 = ISEED( 1 )
+      I2 = ISEED( 2 )
+      I3 = ISEED( 3 )
+      I4 = ISEED( 4 )
+*
+      DO 10 I = 1, MIN( N, LV )
+*
+*        Multiply the seed by i-th power of the multiplier modulo 2**48
+*
+         IT4 = I4*MM( I, 4 )
+         IT3 = IT4 / IPW2
+         IT4 = IT4 - IPW2*IT3
+         IT3 = IT3 + I3*MM( I, 4 ) + I4*MM( I, 3 )
+         IT2 = IT3 / IPW2
+         IT3 = IT3 - IPW2*IT2
+         IT2 = IT2 + I2*MM( I, 4 ) + I3*MM( I, 3 ) + I4*MM( I, 2 )
+         IT1 = IT2 / IPW2
+         IT2 = IT2 - IPW2*IT1
+         IT1 = IT1 + I1*MM( I, 4 ) + I2*MM( I, 3 ) + I3*MM( I, 2 ) +
+     $         I4*MM( I, 1 )
+         IT1 = MOD( IT1, IPW2 )
+*
+*        Convert 48-bit integer to a real number in the interval (0,1)
+*
+         X( I ) = R*( REAL( IT1 )+R*( REAL( IT2 )+R*( REAL( IT3 )+R*
+     $            REAL( IT4 ) ) ) )
+   10 CONTINUE
+*
+*     Return final value of seed
+*
+      ISEED( 1 ) = IT1
+      ISEED( 2 ) = IT2
+      ISEED( 3 ) = IT3
+      ISEED( 4 ) = IT4
+      RETURN
+*
+*     End of SLARUV
+*
+      END
diff --git a/costa/native/external/lapack/slarz.f b/costa/native/external/lapack/slarz.f
new file mode 100644
index 000000000..de91e468f
--- /dev/null
+++ b/costa/native/external/lapack/slarz.f
@@ -0,0 +1,153 @@
+      SUBROUTINE SLARZ( SIDE, M, N, L, V, INCV, TAU, C, LDC, WORK )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          SIDE
+      INTEGER            INCV, L, LDC, M, N
+      REAL               TAU
+*     ..
+*     .. Array Arguments ..
+      REAL               C( LDC, * ), V( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLARZ applies a real elementary reflector H to a real M-by-N
+*  matrix C, from either the left or the right. H is represented in the
+*  form
+*
+*        H = I - tau * v * v'
+*
+*  where tau is a real scalar and v is a real vector.
+*
+*  If tau = 0, then H is taken to be the unit matrix.
+*
+*
+*  H is a product of k elementary reflectors as returned by STZRZF.
+*
+*  Arguments
+*  =========
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'L': form  H * C
+*          = 'R': form  C * H
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix C.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix C.
+*
+*  L       (input) INTEGER
+*          The number of entries of the vector V containing
+*          the meaningful part of the Householder vectors.
+*          If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0.
+*
+*  V       (input) REAL array, dimension (1+(L-1)*abs(INCV))
+*          The vector v in the representation of H as returned by
+*          STZRZF. V is not used if TAU = 0.
+*
+*  INCV    (input) INTEGER
+*          The increment between elements of v. INCV <> 0.
+*
+*  TAU     (input) REAL
+*          The value tau in the representation of H.
+*
+*  C       (input/output) REAL array, dimension (LDC,N)
+*          On entry, the M-by-N matrix C.
+*          On exit, C is overwritten by the matrix H * C if SIDE = 'L',
+*          or C * H if SIDE = 'R'.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C. LDC >= max(1,M).
+*
+*  WORK    (workspace) REAL array, dimension
+*                         (N) if SIDE = 'L'
+*                      or (M) if SIDE = 'R'
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*    A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SAXPY, SCOPY, SGEMV, SGER
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. Executable Statements ..
+*
+      IF( LSAME( SIDE, 'L' ) ) THEN
+*
+*        Form  H * C
+*
+         IF( TAU.NE.ZERO ) THEN
+*
+*           w( 1:n ) = C( 1, 1:n )
+*
+            CALL SCOPY( N, C, LDC, WORK, 1 )
+*
+*           w( 1:n ) = w( 1:n ) + C( m-l+1:m, 1:n )' * v( 1:l )
+*
+            CALL SGEMV( 'Transpose', L, N, ONE, C( M-L+1, 1 ), LDC, V,
+     $                  INCV, ONE, WORK, 1 )
+*
+*           C( 1, 1:n ) = C( 1, 1:n ) - tau * w( 1:n )
+*
+            CALL SAXPY( N, -TAU, WORK, 1, C, LDC )
+*
+*           C( m-l+1:m, 1:n ) = C( m-l+1:m, 1:n ) - ...
+*                               tau * v( 1:l ) * w( 1:n )'
+*
+            CALL SGER( L, N, -TAU, V, INCV, WORK, 1, C( M-L+1, 1 ),
+     $                 LDC )
+         END IF
+*
+      ELSE
+*
+*        Form  C * H
+*
+         IF( TAU.NE.ZERO ) THEN
+*
+*           w( 1:m ) = C( 1:m, 1 )
+*
+            CALL SCOPY( M, C, 1, WORK, 1 )
+*
+*           w( 1:m ) = w( 1:m ) + C( 1:m, n-l+1:n, 1:n ) * v( 1:l )
+*
+            CALL SGEMV( 'No transpose', M, L, ONE, C( 1, N-L+1 ), LDC,
+     $                  V, INCV, ONE, WORK, 1 )
+*
+*           C( 1:m, 1 ) = C( 1:m, 1 ) - tau * w( 1:m )
+*
+            CALL SAXPY( M, -TAU, WORK, 1, C, 1 )
+*
+*           C( 1:m, n-l+1:n ) = C( 1:m, n-l+1:n ) - ...
+*                               tau * w( 1:m ) * v( 1:l )'
+*
+            CALL SGER( M, L, -TAU, WORK, 1, V, INCV, C( 1, N-L+1 ),
+     $                 LDC )
+*
+         END IF
+*
+      END IF
+*
+      RETURN
+*
+*     End of SLARZ
+*
+      END
diff --git a/costa/native/external/lapack/slarzb.f b/costa/native/external/lapack/slarzb.f
new file mode 100644
index 000000000..94b3aa0d7
--- /dev/null
+++ b/costa/native/external/lapack/slarzb.f
@@ -0,0 +1,221 @@
+      SUBROUTINE SLARZB( SIDE, TRANS, DIRECT, STOREV, M, N, K, L, V,
+     $                   LDV, T, LDT, C, LDC, WORK, LDWORK )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     December 1, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIRECT, SIDE, STOREV, TRANS
+      INTEGER            K, L, LDC, LDT, LDV, LDWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      REAL               C( LDC, * ), T( LDT, * ), V( LDV, * ),
+     $                   WORK( LDWORK, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLARZB applies a real block reflector H or its transpose H**T to
+*  a real distributed M-by-N  C from the left or the right.
+*
+*  Currently, only STOREV = 'R' and DIRECT = 'B' are supported.
+*
+*  Arguments
+*  =========
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'L': apply H or H' from the Left
+*          = 'R': apply H or H' from the Right
+*
+*  TRANS   (input) CHARACTER*1
+*          = 'N': apply H (No transpose)
+*          = 'C': apply H' (Transpose)
+*
+*  DIRECT  (input) CHARACTER*1
+*          Indicates how H is formed from a product of elementary
+*          reflectors
+*          = 'F': H = H(1) H(2) . . . H(k) (Forward, not supported yet)
+*          = 'B': H = H(k) . . . H(2) H(1) (Backward)
+*
+*  STOREV  (input) CHARACTER*1
+*          Indicates how the vectors which define the elementary
+*          reflectors are stored:
+*          = 'C': Columnwise                        (not supported yet)
+*          = 'R': Rowwise
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix C.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix C.
+*
+*  K       (input) INTEGER
+*          The order of the matrix T (= the number of elementary
+*          reflectors whose product defines the block reflector).
+*
+*  L       (input) INTEGER
+*          The number of columns of the matrix V containing the
+*          meaningful part of the Householder reflectors.
+*          If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0.
+*
+*  V       (input) REAL array, dimension (LDV,NV).
+*          If STOREV = 'C', NV = K; if STOREV = 'R', NV = L.
+*
+*  LDV     (input) INTEGER
+*          The leading dimension of the array V.
+*          If STOREV = 'C', LDV >= L; if STOREV = 'R', LDV >= K.
+*
+*  T       (input) REAL array, dimension (LDT,K)
+*          The triangular K-by-K matrix T in the representation of the
+*          block reflector.
+*
+*  LDT     (input) INTEGER
+*          The leading dimension of the array T. LDT >= K.
+*
+*  C       (input/output) REAL array, dimension (LDC,N)
+*          On entry, the M-by-N matrix C.
+*          On exit, C is overwritten by H*C or H'*C or C*H or C*H'.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C. LDC >= max(1,M).
+*
+*  WORK    (workspace) REAL array, dimension (LDWORK,K)
+*
+*  LDWORK  (input) INTEGER
+*          The leading dimension of the array WORK.
+*          If SIDE = 'L', LDWORK >= max(1,N);
+*          if SIDE = 'R', LDWORK >= max(1,M).
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*    A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE
+      PARAMETER          ( ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      CHARACTER          TRANST
+      INTEGER            I, INFO, J
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SCOPY, SGEMM, STRMM, XERBLA
+*     ..
+*     .. Executable Statements ..
+*
+*     Quick return if possible
+*
+      IF( M.LE.0 .OR. N.LE.0 )
+     $   RETURN
+*
+*     Check for currently supported options
+*
+      INFO = 0
+      IF( .NOT.LSAME( DIRECT, 'B' ) ) THEN
+         INFO = -3
+      ELSE IF( .NOT.LSAME( STOREV, 'R' ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SLARZB', -INFO )
+         RETURN
+      END IF
+*
+      IF( LSAME( TRANS, 'N' ) ) THEN
+         TRANST = 'T'
+      ELSE
+         TRANST = 'N'
+      END IF
+*
+      IF( LSAME( SIDE, 'L' ) ) THEN
+*
+*        Form  H * C  or  H' * C
+*
+*        W( 1:n, 1:k ) = C( 1:k, 1:n )'
+*
+         DO 10 J = 1, K
+            CALL SCOPY( N, C( J, 1 ), LDC, WORK( 1, J ), 1 )
+   10    CONTINUE
+*
+*        W( 1:n, 1:k ) = W( 1:n, 1:k ) + ...
+*                        C( m-l+1:m, 1:n )' * V( 1:k, 1:l )'
+*
+         IF( L.GT.0 )
+     $      CALL SGEMM( 'Transpose', 'Transpose', N, K, L, ONE,
+     $                  C( M-L+1, 1 ), LDC, V, LDV, ONE, WORK, LDWORK )
+*
+*        W( 1:n, 1:k ) = W( 1:n, 1:k ) * T'  or  W( 1:m, 1:k ) * T
+*
+         CALL STRMM( 'Right', 'Lower', TRANST, 'Non-unit', N, K, ONE, T,
+     $               LDT, WORK, LDWORK )
+*
+*        C( 1:k, 1:n ) = C( 1:k, 1:n ) - W( 1:n, 1:k )'
+*
+         DO 30 J = 1, N
+            DO 20 I = 1, K
+               C( I, J ) = C( I, J ) - WORK( J, I )
+   20       CONTINUE
+   30    CONTINUE
+*
+*        C( m-l+1:m, 1:n ) = C( m-l+1:m, 1:n ) - ...
+*                            V( 1:k, 1:l )' * W( 1:n, 1:k )'
+*
+         IF( L.GT.0 )
+     $      CALL SGEMM( 'Transpose', 'Transpose', L, N, K, -ONE, V, LDV,
+     $                  WORK, LDWORK, ONE, C( M-L+1, 1 ), LDC )
+*
+      ELSE IF( LSAME( SIDE, 'R' ) ) THEN
+*
+*        Form  C * H  or  C * H'
+*
+*        W( 1:m, 1:k ) = C( 1:m, 1:k )
+*
+         DO 40 J = 1, K
+            CALL SCOPY( M, C( 1, J ), 1, WORK( 1, J ), 1 )
+   40    CONTINUE
+*
+*        W( 1:m, 1:k ) = W( 1:m, 1:k ) + ...
+*                        C( 1:m, n-l+1:n ) * V( 1:k, 1:l )'
+*
+         IF( L.GT.0 )
+     $      CALL SGEMM( 'No transpose', 'Transpose', M, K, L, ONE,
+     $                  C( 1, N-L+1 ), LDC, V, LDV, ONE, WORK, LDWORK )
+*
+*        W( 1:m, 1:k ) = W( 1:m, 1:k ) * T  or  W( 1:m, 1:k ) * T'
+*
+         CALL STRMM( 'Right', 'Lower', TRANS, 'Non-unit', M, K, ONE, T,
+     $               LDT, WORK, LDWORK )
+*
+*        C( 1:m, 1:k ) = C( 1:m, 1:k ) - W( 1:m, 1:k )
+*
+         DO 60 J = 1, K
+            DO 50 I = 1, M
+               C( I, J ) = C( I, J ) - WORK( I, J )
+   50       CONTINUE
+   60    CONTINUE
+*
+*        C( 1:m, n-l+1:n ) = C( 1:m, n-l+1:n ) - ...
+*                            W( 1:m, 1:k ) * V( 1:k, 1:l )
+*
+         IF( L.GT.0 )
+     $      CALL SGEMM( 'No transpose', 'No transpose', M, L, K, -ONE,
+     $                  WORK, LDWORK, V, LDV, ONE, C( 1, N-L+1 ), LDC )
+*
+      END IF
+*
+      RETURN
+*
+*     End of SLARZB
+*
+      END
diff --git a/costa/native/external/lapack/slarzt.f b/costa/native/external/lapack/slarzt.f
new file mode 100644
index 000000000..a57df4fb5
--- /dev/null
+++ b/costa/native/external/lapack/slarzt.f
@@ -0,0 +1,185 @@
+      SUBROUTINE SLARZT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIRECT, STOREV
+      INTEGER            K, LDT, LDV, N
+*     ..
+*     .. Array Arguments ..
+      REAL               T( LDT, * ), TAU( * ), V( LDV, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLARZT forms the triangular factor T of a real block reflector
+*  H of order > n, which is defined as a product of k elementary
+*  reflectors.
+*
+*  If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular;
+*
+*  If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular.
+*
+*  If STOREV = 'C', the vector which defines the elementary reflector
+*  H(i) is stored in the i-th column of the array V, and
+*
+*     H  =  I - V * T * V'
+*
+*  If STOREV = 'R', the vector which defines the elementary reflector
+*  H(i) is stored in the i-th row of the array V, and
+*
+*     H  =  I - V' * T * V
+*
+*  Currently, only STOREV = 'R' and DIRECT = 'B' are supported.
+*
+*  Arguments
+*  =========
+*
+*  DIRECT  (input) CHARACTER*1
+*          Specifies the order in which the elementary reflectors are
+*          multiplied to form the block reflector:
+*          = 'F': H = H(1) H(2) . . . H(k) (Forward, not supported yet)
+*          = 'B': H = H(k) . . . H(2) H(1) (Backward)
+*
+*  STOREV  (input) CHARACTER*1
+*          Specifies how the vectors which define the elementary
+*          reflectors are stored (see also Further Details):
+*          = 'C': columnwise                        (not supported yet)
+*          = 'R': rowwise
+*
+*  N       (input) INTEGER
+*          The order of the block reflector H. N >= 0.
+*
+*  K       (input) INTEGER
+*          The order of the triangular factor T (= the number of
+*          elementary reflectors). K >= 1.
+*
+*  V       (input/output) REAL array, dimension
+*                               (LDV,K) if STOREV = 'C'
+*                               (LDV,N) if STOREV = 'R'
+*          The matrix V. See further details.
+*
+*  LDV     (input) INTEGER
+*          The leading dimension of the array V.
+*          If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K.
+*
+*  TAU     (input) REAL array, dimension (K)
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i).
+*
+*  T       (output) REAL array, dimension (LDT,K)
+*          The k by k triangular factor T of the block reflector.
+*          If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is
+*          lower triangular. The rest of the array is not used.
+*
+*  LDT     (input) INTEGER
+*          The leading dimension of the array T. LDT >= K.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*    A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA
+*
+*  The shape of the matrix V and the storage of the vectors which define
+*  the H(i) is best illustrated by the following example with n = 5 and
+*  k = 3. The elements equal to 1 are not stored; the corresponding
+*  array elements are modified but restored on exit. The rest of the
+*  array is not used.
+*
+*  DIRECT = 'F' and STOREV = 'C':         DIRECT = 'F' and STOREV = 'R':
+*
+*                                              ______V_____
+*         ( v1 v2 v3 )                        /            \
+*         ( v1 v2 v3 )                      ( v1 v1 v1 v1 v1 . . . . 1 )
+*     V = ( v1 v2 v3 )                      ( v2 v2 v2 v2 v2 . . . 1   )
+*         ( v1 v2 v3 )                      ( v3 v3 v3 v3 v3 . . 1     )
+*         ( v1 v2 v3 )
+*            .  .  .
+*            .  .  .
+*            1  .  .
+*               1  .
+*                  1
+*
+*  DIRECT = 'B' and STOREV = 'C':         DIRECT = 'B' and STOREV = 'R':
+*
+*                                                        ______V_____
+*            1                                          /            \
+*            .  1                           ( 1 . . . . v1 v1 v1 v1 v1 )
+*            .  .  1                        ( . 1 . . . v2 v2 v2 v2 v2 )
+*            .  .  .                        ( . . 1 . . v3 v3 v3 v3 v3 )
+*            .  .  .
+*         ( v1 v2 v3 )
+*         ( v1 v2 v3 )
+*     V = ( v1 v2 v3 )
+*         ( v1 v2 v3 )
+*         ( v1 v2 v3 )
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO
+      PARAMETER          ( ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, INFO, J
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SGEMV, STRMV, XERBLA
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. Executable Statements ..
+*
+*     Check for currently supported options
+*
+      INFO = 0
+      IF( .NOT.LSAME( DIRECT, 'B' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.LSAME( STOREV, 'R' ) ) THEN
+         INFO = -2
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SLARZT', -INFO )
+         RETURN
+      END IF
+*
+      DO 20 I = K, 1, -1
+         IF( TAU( I ).EQ.ZERO ) THEN
+*
+*           H(i)  =  I
+*
+            DO 10 J = I, K
+               T( J, I ) = ZERO
+   10       CONTINUE
+         ELSE
+*
+*           general case
+*
+            IF( I.LT.K ) THEN
+*
+*              T(i+1:k,i) = - tau(i) * V(i+1:k,1:n) * V(i,1:n)'
+*
+               CALL SGEMV( 'No transpose', K-I, N, -TAU( I ),
+     $                     V( I+1, 1 ), LDV, V( I, 1 ), LDV, ZERO,
+     $                     T( I+1, I ), 1 )
+*
+*              T(i+1:k,i) = T(i+1:k,i+1:k) * T(i+1:k,i)
+*
+               CALL STRMV( 'Lower', 'No transpose', 'Non-unit', K-I,
+     $                     T( I+1, I+1 ), LDT, T( I+1, I ), 1 )
+            END IF
+            T( I, I ) = TAU( I )
+         END IF
+   20 CONTINUE
+      RETURN
+*
+*     End of SLARZT
+*
+      END
diff --git a/costa/native/external/lapack/slas2.f b/costa/native/external/lapack/slas2.f
new file mode 100644
index 000000000..475ed8915
--- /dev/null
+++ b/costa/native/external/lapack/slas2.f
@@ -0,0 +1,122 @@
+      SUBROUTINE SLAS2( F, G, H, SSMIN, SSMAX )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      REAL               F, G, H, SSMAX, SSMIN
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLAS2  computes the singular values of the 2-by-2 matrix
+*     [  F   G  ]
+*     [  0   H  ].
+*  On return, SSMIN is the smaller singular value and SSMAX is the
+*  larger singular value.
+*
+*  Arguments
+*  =========
+*
+*  F       (input) REAL
+*          The (1,1) element of the 2-by-2 matrix.
+*
+*  G       (input) REAL
+*          The (1,2) element of the 2-by-2 matrix.
+*
+*  H       (input) REAL
+*          The (2,2) element of the 2-by-2 matrix.
+*
+*  SSMIN   (output) REAL
+*          The smaller singular value.
+*
+*  SSMAX   (output) REAL
+*          The larger singular value.
+*
+*  Further Details
+*  ===============
+*
+*  Barring over/underflow, all output quantities are correct to within
+*  a few units in the last place (ulps), even in the absence of a guard
+*  digit in addition/subtraction.
+*
+*  In IEEE arithmetic, the code works correctly if one matrix element is
+*  infinite.
+*
+*  Overflow will not occur unless the largest singular value itself
+*  overflows, or is within a few ulps of overflow. (On machines with
+*  partial overflow, like the Cray, overflow may occur if the largest
+*  singular value is within a factor of 2 of overflow.)
+*
+*  Underflow is harmless if underflow is gradual. Otherwise, results
+*  may correspond to a matrix modified by perturbations of size near
+*  the underflow threshold.
+*
+*  ====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO
+      PARAMETER          ( ZERO = 0.0E0 )
+      REAL               ONE
+      PARAMETER          ( ONE = 1.0E0 )
+      REAL               TWO
+      PARAMETER          ( TWO = 2.0E0 )
+*     ..
+*     .. Local Scalars ..
+      REAL               AS, AT, AU, C, FA, FHMN, FHMX, GA, HA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+      FA = ABS( F )
+      GA = ABS( G )
+      HA = ABS( H )
+      FHMN = MIN( FA, HA )
+      FHMX = MAX( FA, HA )
+      IF( FHMN.EQ.ZERO ) THEN
+         SSMIN = ZERO
+         IF( FHMX.EQ.ZERO ) THEN
+            SSMAX = GA
+         ELSE
+            SSMAX = MAX( FHMX, GA )*SQRT( ONE+
+     $              ( MIN( FHMX, GA ) / MAX( FHMX, GA ) )**2 )
+         END IF
+      ELSE
+         IF( GA.LT.FHMX ) THEN
+            AS = ONE + FHMN / FHMX
+            AT = ( FHMX-FHMN ) / FHMX
+            AU = ( GA / FHMX )**2
+            C = TWO / ( SQRT( AS*AS+AU )+SQRT( AT*AT+AU ) )
+            SSMIN = FHMN*C
+            SSMAX = FHMX / C
+         ELSE
+            AU = FHMX / GA
+            IF( AU.EQ.ZERO ) THEN
+*
+*              Avoid possible harmful underflow if exponent range
+*              asymmetric (true SSMIN may not underflow even if
+*              AU underflows)
+*
+               SSMIN = ( FHMN*FHMX ) / GA
+               SSMAX = GA
+            ELSE
+               AS = ONE + FHMN / FHMX
+               AT = ( FHMX-FHMN ) / FHMX
+               C = ONE / ( SQRT( ONE+( AS*AU )**2 )+
+     $             SQRT( ONE+( AT*AU )**2 ) )
+               SSMIN = ( FHMN*C )*AU
+               SSMIN = SSMIN + SSMIN
+               SSMAX = GA / ( C+C )
+            END IF
+         END IF
+      END IF
+      RETURN
+*
+*     End of SLAS2
+*
+      END
diff --git a/costa/native/external/lapack/slascl.f b/costa/native/external/lapack/slascl.f
new file mode 100644
index 000000000..b1dc0b1b2
--- /dev/null
+++ b/costa/native/external/lapack/slascl.f
@@ -0,0 +1,268 @@
+      SUBROUTINE SLASCL( TYPE, KL, KU, CFROM, CTO, M, N, A, LDA, INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          TYPE
+      INTEGER            INFO, KL, KU, LDA, M, N
+      REAL               CFROM, CTO
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLASCL multiplies the M by N real matrix A by the real scalar
+*  CTO/CFROM.  This is done without over/underflow as long as the final
+*  result CTO*A(I,J)/CFROM does not over/underflow. TYPE specifies that
+*  A may be full, upper triangular, lower triangular, upper Hessenberg,
+*  or banded.
+*
+*  Arguments
+*  =========
+*
+*  TYPE    (input) CHARACTER*1
+*          TYPE indices the storage type of the input matrix.
+*          = 'G':  A is a full matrix.
+*          = 'L':  A is a lower triangular matrix.
+*          = 'U':  A is an upper triangular matrix.
+*          = 'H':  A is an upper Hessenberg matrix.
+*          = 'B':  A is a symmetric band matrix with lower bandwidth KL
+*                  and upper bandwidth KU and with the only the lower
+*                  half stored.
+*          = 'Q':  A is a symmetric band matrix with lower bandwidth KL
+*                  and upper bandwidth KU and with the only the upper
+*                  half stored.
+*          = 'Z':  A is a band matrix with lower bandwidth KL and upper
+*                  bandwidth KU.
+*
+*  KL      (input) INTEGER
+*          The lower bandwidth of A.  Referenced only if TYPE = 'B',
+*          'Q' or 'Z'.
+*
+*  KU      (input) INTEGER
+*          The upper bandwidth of A.  Referenced only if TYPE = 'B',
+*          'Q' or 'Z'.
+*
+*  CFROM   (input) REAL
+*  CTO     (input) REAL
+*          The matrix A is multiplied by CTO/CFROM. A(I,J) is computed
+*          without over/underflow if the final result CTO*A(I,J)/CFROM
+*          can be represented without over/underflow.  CFROM must be
+*          nonzero.
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  A       (input/output) REAL array, dimension (LDA,M)
+*          The matrix to be multiplied by CTO/CFROM.  See TYPE for the
+*          storage type.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  INFO    (output) INTEGER
+*          0  - successful exit
+*          <0 - if INFO = -i, the i-th argument had an illegal value.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E0, ONE = 1.0E0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            DONE
+      INTEGER            I, ITYPE, J, K1, K2, K3, K4
+      REAL               BIGNUM, CFROM1, CFROMC, CTO1, CTOC, MUL, SMLNUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               SLAMCH
+      EXTERNAL           LSAME, SLAMCH
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+*
+      IF( LSAME( TYPE, 'G' ) ) THEN
+         ITYPE = 0
+      ELSE IF( LSAME( TYPE, 'L' ) ) THEN
+         ITYPE = 1
+      ELSE IF( LSAME( TYPE, 'U' ) ) THEN
+         ITYPE = 2
+      ELSE IF( LSAME( TYPE, 'H' ) ) THEN
+         ITYPE = 3
+      ELSE IF( LSAME( TYPE, 'B' ) ) THEN
+         ITYPE = 4
+      ELSE IF( LSAME( TYPE, 'Q' ) ) THEN
+         ITYPE = 5
+      ELSE IF( LSAME( TYPE, 'Z' ) ) THEN
+         ITYPE = 6
+      ELSE
+         ITYPE = -1
+      END IF
+*
+      IF( ITYPE.EQ.-1 ) THEN
+         INFO = -1
+      ELSE IF( CFROM.EQ.ZERO ) THEN
+         INFO = -4
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -6
+      ELSE IF( N.LT.0 .OR. ( ITYPE.EQ.4 .AND. N.NE.M ) .OR.
+     $         ( ITYPE.EQ.5 .AND. N.NE.M ) ) THEN
+         INFO = -7
+      ELSE IF( ITYPE.LE.3 .AND. LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -9
+      ELSE IF( ITYPE.GE.4 ) THEN
+         IF( KL.LT.0 .OR. KL.GT.MAX( M-1, 0 ) ) THEN
+            INFO = -2
+         ELSE IF( KU.LT.0 .OR. KU.GT.MAX( N-1, 0 ) .OR.
+     $            ( ( ITYPE.EQ.4 .OR. ITYPE.EQ.5 ) .AND. KL.NE.KU ) )
+     $             THEN
+            INFO = -3
+         ELSE IF( ( ITYPE.EQ.4 .AND. LDA.LT.KL+1 ) .OR.
+     $            ( ITYPE.EQ.5 .AND. LDA.LT.KU+1 ) .OR.
+     $            ( ITYPE.EQ.6 .AND. LDA.LT.2*KL+KU+1 ) ) THEN
+            INFO = -9
+         END IF
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SLASCL', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. M.EQ.0 )
+     $   RETURN
+*
+*     Get machine parameters
+*
+      SMLNUM = SLAMCH( 'S' )
+      BIGNUM = ONE / SMLNUM
+*
+      CFROMC = CFROM
+      CTOC = CTO
+*
+   10 CONTINUE
+      CFROM1 = CFROMC*SMLNUM
+      CTO1 = CTOC / BIGNUM
+      IF( ABS( CFROM1 ).GT.ABS( CTOC ) .AND. CTOC.NE.ZERO ) THEN
+         MUL = SMLNUM
+         DONE = .FALSE.
+         CFROMC = CFROM1
+      ELSE IF( ABS( CTO1 ).GT.ABS( CFROMC ) ) THEN
+         MUL = BIGNUM
+         DONE = .FALSE.
+         CTOC = CTO1
+      ELSE
+         MUL = CTOC / CFROMC
+         DONE = .TRUE.
+      END IF
+*
+      IF( ITYPE.EQ.0 ) THEN
+*
+*        Full matrix
+*
+         DO 30 J = 1, N
+            DO 20 I = 1, M
+               A( I, J ) = A( I, J )*MUL
+   20       CONTINUE
+   30    CONTINUE
+*
+      ELSE IF( ITYPE.EQ.1 ) THEN
+*
+*        Lower triangular matrix
+*
+         DO 50 J = 1, N
+            DO 40 I = J, M
+               A( I, J ) = A( I, J )*MUL
+   40       CONTINUE
+   50    CONTINUE
+*
+      ELSE IF( ITYPE.EQ.2 ) THEN
+*
+*        Upper triangular matrix
+*
+         DO 70 J = 1, N
+            DO 60 I = 1, MIN( J, M )
+               A( I, J ) = A( I, J )*MUL
+   60       CONTINUE
+   70    CONTINUE
+*
+      ELSE IF( ITYPE.EQ.3 ) THEN
+*
+*        Upper Hessenberg matrix
+*
+         DO 90 J = 1, N
+            DO 80 I = 1, MIN( J+1, M )
+               A( I, J ) = A( I, J )*MUL
+   80       CONTINUE
+   90    CONTINUE
+*
+      ELSE IF( ITYPE.EQ.4 ) THEN
+*
+*        Lower half of a symmetric band matrix
+*
+         K3 = KL + 1
+         K4 = N + 1
+         DO 110 J = 1, N
+            DO 100 I = 1, MIN( K3, K4-J )
+               A( I, J ) = A( I, J )*MUL
+  100       CONTINUE
+  110    CONTINUE
+*
+      ELSE IF( ITYPE.EQ.5 ) THEN
+*
+*        Upper half of a symmetric band matrix
+*
+         K1 = KU + 2
+         K3 = KU + 1
+         DO 130 J = 1, N
+            DO 120 I = MAX( K1-J, 1 ), K3
+               A( I, J ) = A( I, J )*MUL
+  120       CONTINUE
+  130    CONTINUE
+*
+      ELSE IF( ITYPE.EQ.6 ) THEN
+*
+*        Band matrix
+*
+         K1 = KL + KU + 2
+         K2 = KL + 1
+         K3 = 2*KL + KU + 1
+         K4 = KL + KU + 1 + M
+         DO 150 J = 1, N
+            DO 140 I = MAX( K1-J, K2 ), MIN( K3, K4-J )
+               A( I, J ) = A( I, J )*MUL
+  140       CONTINUE
+  150    CONTINUE
+*
+      END IF
+*
+      IF( .NOT.DONE )
+     $   GO TO 10
+*
+      RETURN
+*
+*     End of SLASCL
+*
+      END
diff --git a/costa/native/external/lapack/slasd0.f b/costa/native/external/lapack/slasd0.f
new file mode 100644
index 000000000..d39d129b6
--- /dev/null
+++ b/costa/native/external/lapack/slasd0.f
@@ -0,0 +1,231 @@
+      SUBROUTINE SLASD0( N, SQRE, D, E, U, LDU, VT, LDVT, SMLSIZ, IWORK,
+     $                   WORK, INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDU, LDVT, N, SMLSIZ, SQRE
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      REAL               D( * ), E( * ), U( LDU, * ), VT( LDVT, * ),
+     $                   WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  Using a divide and conquer approach, SLASD0 computes the singular
+*  value decomposition (SVD) of a real upper bidiagonal N-by-M
+*  matrix B with diagonal D and offdiagonal E, where M = N + SQRE.
+*  The algorithm computes orthogonal matrices U and VT such that
+*  B = U * S * VT. The singular values S are overwritten on D.
+*
+*  A related subroutine, SLASDA, computes only the singular values,
+*  and optionally, the singular vectors in compact form.
+*
+*  Arguments
+*  =========
+*
+*  N      (input) INTEGER
+*         On entry, the row dimension of the upper bidiagonal matrix.
+*         This is also the dimension of the main diagonal array D.
+*
+*  SQRE   (input) INTEGER
+*         Specifies the column dimension of the bidiagonal matrix.
+*         = 0: The bidiagonal matrix has column dimension M = N;
+*         = 1: The bidiagonal matrix has column dimension M = N+1;
+*
+*  D      (input/output) REAL array, dimension (N)
+*         On entry D contains the main diagonal of the bidiagonal
+*         matrix.
+*         On exit D, if INFO = 0, contains its singular values.
+*
+*  E      (input) REAL array, dimension (M-1)
+*         Contains the subdiagonal entries of the bidiagonal matrix.
+*         On exit, E has been destroyed.
+*
+*  U      (output) REAL array, dimension at least (LDQ, N)
+*         On exit, U contains the left singular vectors.
+*
+*  LDU    (input) INTEGER
+*         On entry, leading dimension of U.
+*
+*  VT     (output) REAL array, dimension at least (LDVT, M)
+*         On exit, VT' contains the right singular vectors.
+*
+*  LDVT   (input) INTEGER
+*         On entry, leading dimension of VT.
+*
+*  SMLSIZ (input) INTEGER
+*         On entry, maximum size of the subproblems at the
+*         bottom of the computation tree.
+*
+*  IWORK  INTEGER work array.
+*         Dimension must be at least (8 * N)
+*
+*  WORK   REAL work array.
+*         Dimension must be at least (3 * M**2 + 2 * M)
+*
+*  INFO   (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          > 0:  if INFO = 1, an singular value did not converge
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Ming Gu and Huan Ren, Computer Science Division, University of
+*     California at Berkeley, USA
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      INTEGER            I, I1, IC, IDXQ, IDXQC, IM1, INODE, ITEMP, IWK,
+     $                   J, LF, LL, LVL, M, NCC, ND, NDB1, NDIML, NDIMR,
+     $                   NL, NLF, NLP1, NLVL, NR, NRF, NRP1, SQREI
+      REAL               ALPHA, BETA
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLASD1, SLASDQ, SLASDT, XERBLA
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+*
+      IF( N.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( ( SQRE.LT.0 ) .OR. ( SQRE.GT.1 ) ) THEN
+         INFO = -2
+      END IF
+*
+      M = N + SQRE
+*
+      IF( LDU.LT.N ) THEN
+         INFO = -6
+      ELSE IF( LDVT.LT.M ) THEN
+         INFO = -8
+      ELSE IF( SMLSIZ.LT.3 ) THEN
+         INFO = -9
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SLASD0', -INFO )
+         RETURN
+      END IF
+*
+*     If the input matrix is too small, call SLASDQ to find the SVD.
+*
+      IF( N.LE.SMLSIZ ) THEN
+         CALL SLASDQ( 'U', SQRE, N, M, N, 0, D, E, VT, LDVT, U, LDU, U,
+     $                LDU, WORK, INFO )
+         RETURN
+      END IF
+*
+*     Set up the computation tree.
+*
+      INODE = 1
+      NDIML = INODE + N
+      NDIMR = NDIML + N
+      IDXQ = NDIMR + N
+      IWK = IDXQ + N
+      CALL SLASDT( N, NLVL, ND, IWORK( INODE ), IWORK( NDIML ),
+     $             IWORK( NDIMR ), SMLSIZ )
+*
+*     For the nodes on bottom level of the tree, solve
+*     their subproblems by SLASDQ.
+*
+      NDB1 = ( ND+1 ) / 2
+      NCC = 0
+      DO 30 I = NDB1, ND
+*
+*     IC : center row of each node
+*     NL : number of rows of left  subproblem
+*     NR : number of rows of right subproblem
+*     NLF: starting row of the left   subproblem
+*     NRF: starting row of the right  subproblem
+*
+         I1 = I - 1
+         IC = IWORK( INODE+I1 )
+         NL = IWORK( NDIML+I1 )
+         NLP1 = NL + 1
+         NR = IWORK( NDIMR+I1 )
+         NRP1 = NR + 1
+         NLF = IC - NL
+         NRF = IC + 1
+         SQREI = 1
+         CALL SLASDQ( 'U', SQREI, NL, NLP1, NL, NCC, D( NLF ), E( NLF ),
+     $                VT( NLF, NLF ), LDVT, U( NLF, NLF ), LDU,
+     $                U( NLF, NLF ), LDU, WORK, INFO )
+         IF( INFO.NE.0 ) THEN
+            RETURN
+         END IF
+         ITEMP = IDXQ + NLF - 2
+         DO 10 J = 1, NL
+            IWORK( ITEMP+J ) = J
+   10    CONTINUE
+         IF( I.EQ.ND ) THEN
+            SQREI = SQRE
+         ELSE
+            SQREI = 1
+         END IF
+         NRP1 = NR + SQREI
+         CALL SLASDQ( 'U', SQREI, NR, NRP1, NR, NCC, D( NRF ), E( NRF ),
+     $                VT( NRF, NRF ), LDVT, U( NRF, NRF ), LDU,
+     $                U( NRF, NRF ), LDU, WORK, INFO )
+         IF( INFO.NE.0 ) THEN
+            RETURN
+         END IF
+         ITEMP = IDXQ + IC
+         DO 20 J = 1, NR
+            IWORK( ITEMP+J-1 ) = J
+   20    CONTINUE
+   30 CONTINUE
+*
+*     Now conquer each subproblem bottom-up.
+*
+      DO 50 LVL = NLVL, 1, -1
+*
+*        Find the first node LF and last node LL on the
+*        current level LVL.
+*
+         IF( LVL.EQ.1 ) THEN
+            LF = 1
+            LL = 1
+         ELSE
+            LF = 2**( LVL-1 )
+            LL = 2*LF - 1
+         END IF
+         DO 40 I = LF, LL
+            IM1 = I - 1
+            IC = IWORK( INODE+IM1 )
+            NL = IWORK( NDIML+IM1 )
+            NR = IWORK( NDIMR+IM1 )
+            NLF = IC - NL
+            IF( ( SQRE.EQ.0 ) .AND. ( I.EQ.LL ) ) THEN
+               SQREI = SQRE
+            ELSE
+               SQREI = 1
+            END IF
+            IDXQC = IDXQ + NLF - 1
+            ALPHA = D( IC )
+            BETA = E( IC )
+            CALL SLASD1( NL, NR, SQREI, D( NLF ), ALPHA, BETA,
+     $                   U( NLF, NLF ), LDU, VT( NLF, NLF ), LDVT,
+     $                   IWORK( IDXQC ), IWORK( IWK ), WORK, INFO )
+            IF( INFO.NE.0 ) THEN
+               RETURN
+            END IF
+   40    CONTINUE
+   50 CONTINUE
+*
+      RETURN
+*
+*     End of SLASD0
+*
+      END
diff --git a/costa/native/external/lapack/slasd1.f b/costa/native/external/lapack/slasd1.f
new file mode 100644
index 000000000..03418b926
--- /dev/null
+++ b/costa/native/external/lapack/slasd1.f
@@ -0,0 +1,233 @@
+      SUBROUTINE SLASD1( NL, NR, SQRE, D, ALPHA, BETA, U, LDU, VT, LDVT,
+     $                   IDXQ, IWORK, WORK, INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDU, LDVT, NL, NR, SQRE
+      REAL               ALPHA, BETA
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IDXQ( * ), IWORK( * )
+      REAL               D( * ), U( LDU, * ), VT( LDVT, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLASD1 computes the SVD of an upper bidiagonal N-by-M matrix B,
+*  where N = NL + NR + 1 and M = N + SQRE. SLASD1 is called from SLASD0.
+*
+*  A related subroutine SLASD7 handles the case in which the singular
+*  values (and the singular vectors in factored form) are desired.
+*
+*  SLASD1 computes the SVD as follows:
+*
+*                ( D1(in)  0    0     0 )
+*    B = U(in) * (   Z1'   a   Z2'    b ) * VT(in)
+*                (   0     0   D2(in) 0 )
+*
+*      = U(out) * ( D(out) 0) * VT(out)
+*
+*  where Z' = (Z1' a Z2' b) = u' VT', and u is a vector of dimension M
+*  with ALPHA and BETA in the NL+1 and NL+2 th entries and zeros
+*  elsewhere; and the entry b is empty if SQRE = 0.
+*
+*  The left singular vectors of the original matrix are stored in U, and
+*  the transpose of the right singular vectors are stored in VT, and the
+*  singular values are in D.  The algorithm consists of three stages:
+*
+*     The first stage consists of deflating the size of the problem
+*     when there are multiple singular values or when there are zeros in
+*     the Z vector.  For each such occurence the dimension of the
+*     secular equation problem is reduced by one.  This stage is
+*     performed by the routine SLASD2.
+*
+*     The second stage consists of calculating the updated
+*     singular values. This is done by finding the square roots of the
+*     roots of the secular equation via the routine SLASD4 (as called
+*     by SLASD3). This routine also calculates the singular vectors of
+*     the current problem.
+*
+*     The final stage consists of computing the updated singular vectors
+*     directly using the updated singular values.  The singular vectors
+*     for the current problem are multiplied with the singular vectors
+*     from the overall problem.
+*
+*  Arguments
+*  =========
+*
+*  NL     (input) INTEGER
+*         The row dimension of the upper block.  NL >= 1.
+*
+*  NR     (input) INTEGER
+*         The row dimension of the lower block.  NR >= 1.
+*
+*  SQRE   (input) INTEGER
+*         = 0: the lower block is an NR-by-NR square matrix.
+*         = 1: the lower block is an NR-by-(NR+1) rectangular matrix.
+*
+*         The bidiagonal matrix has row dimension N = NL + NR + 1,
+*         and column dimension M = N + SQRE.
+*
+*  D      (input/output) REAL array,
+*                        dimension (N = NL+NR+1).
+*         On entry D(1:NL,1:NL) contains the singular values of the
+*         upper block; and D(NL+2:N) contains the singular values of
+*         the lower block. On exit D(1:N) contains the singular values
+*         of the modified matrix.
+*
+*  ALPHA  (input) REAL
+*         Contains the diagonal element associated with the added row.
+*
+*  BETA   (input) REAL
+*         Contains the off-diagonal element associated with the added
+*         row.
+*
+*  U      (input/output) REAL array, dimension(LDU,N)
+*         On entry U(1:NL, 1:NL) contains the left singular vectors of
+*         the upper block; U(NL+2:N, NL+2:N) contains the left singular
+*         vectors of the lower block. On exit U contains the left
+*         singular vectors of the bidiagonal matrix.
+*
+*  LDU    (input) INTEGER
+*         The leading dimension of the array U.  LDU >= max( 1, N ).
+*
+*  VT     (input/output) REAL array, dimension(LDVT,M)
+*         where M = N + SQRE.
+*         On entry VT(1:NL+1, 1:NL+1)' contains the right singular
+*         vectors of the upper block; VT(NL+2:M, NL+2:M)' contains
+*         the right singular vectors of the lower block. On exit
+*         VT' contains the right singular vectors of the
+*         bidiagonal matrix.
+*
+*  LDVT   (input) INTEGER
+*         The leading dimension of the array VT.  LDVT >= max( 1, M ).
+*
+*  IDXQ  (output) INTEGER array, dimension(N)
+*         This contains the permutation which will reintegrate the
+*         subproblem just solved back into sorted order, i.e.
+*         D( IDXQ( I = 1, N ) ) will be in ascending order.
+*
+*  IWORK  (workspace) INTEGER array, dimension( 4 * N )
+*
+*  WORK   (workspace) REAL array, dimension( 3*M**2 + 2*M )
+*
+*  INFO   (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          > 0:  if INFO = 1, an singular value did not converge
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Ming Gu and Huan Ren, Computer Science Division, University of
+*     California at Berkeley, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+*
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            COLTYP, I, IDX, IDXC, IDXP, IQ, ISIGMA, IU2,
+     $                   IVT2, IZ, K, LDQ, LDU2, LDVT2, M, N, N1, N2
+      REAL               ORGNRM
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLAMRG, SLASCL, SLASD2, SLASD3, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+*
+      IF( NL.LT.1 ) THEN
+         INFO = -1
+      ELSE IF( NR.LT.1 ) THEN
+         INFO = -2
+      ELSE IF( ( SQRE.LT.0 ) .OR. ( SQRE.GT.1 ) ) THEN
+         INFO = -3
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SLASD1', -INFO )
+         RETURN
+      END IF
+*
+      N = NL + NR + 1
+      M = N + SQRE
+*
+*     The following values are for bookkeeping purposes only.  They are
+*     integer pointers which indicate the portion of the workspace
+*     used by a particular array in SLASD2 and SLASD3.
+*
+      LDU2 = N
+      LDVT2 = M
+*
+      IZ = 1
+      ISIGMA = IZ + M
+      IU2 = ISIGMA + N
+      IVT2 = IU2 + LDU2*N
+      IQ = IVT2 + LDVT2*M
+*
+      IDX = 1
+      IDXC = IDX + N
+      COLTYP = IDXC + N
+      IDXP = COLTYP + N
+*
+*     Scale.
+*
+      ORGNRM = MAX( ABS( ALPHA ), ABS( BETA ) )
+      D( NL+1 ) = ZERO
+      DO 10 I = 1, N
+         IF( ABS( D( I ) ).GT.ORGNRM ) THEN
+            ORGNRM = ABS( D( I ) )
+         END IF
+   10 CONTINUE
+      CALL SLASCL( 'G', 0, 0, ORGNRM, ONE, N, 1, D, N, INFO )
+      ALPHA = ALPHA / ORGNRM
+      BETA = BETA / ORGNRM
+*
+*     Deflate singular values.
+*
+      CALL SLASD2( NL, NR, SQRE, K, D, WORK( IZ ), ALPHA, BETA, U, LDU,
+     $             VT, LDVT, WORK( ISIGMA ), WORK( IU2 ), LDU2,
+     $             WORK( IVT2 ), LDVT2, IWORK( IDXP ), IWORK( IDX ),
+     $             IWORK( IDXC ), IDXQ, IWORK( COLTYP ), INFO )
+*
+*     Solve Secular Equation and update singular vectors.
+*
+      LDQ = K
+      CALL SLASD3( NL, NR, SQRE, K, D, WORK( IQ ), LDQ, WORK( ISIGMA ),
+     $             U, LDU, WORK( IU2 ), LDU2, VT, LDVT, WORK( IVT2 ),
+     $             LDVT2, IWORK( IDXC ), IWORK( COLTYP ), WORK( IZ ),
+     $             INFO )
+      IF( INFO.NE.0 ) THEN
+         RETURN
+      END IF
+*
+*     Unscale.
+*
+      CALL SLASCL( 'G', 0, 0, ONE, ORGNRM, N, 1, D, N, INFO )
+*
+*     Prepare the IDXQ sorting permutation.
+*
+      N1 = K
+      N2 = N - K
+      CALL SLAMRG( N1, N2, D, 1, -1, IDXQ )
+*
+      RETURN
+*
+*     End of SLASD1
+*
+      END
diff --git a/costa/native/external/lapack/slasd2.f b/costa/native/external/lapack/slasd2.f
new file mode 100644
index 000000000..dd9c10716
--- /dev/null
+++ b/costa/native/external/lapack/slasd2.f
@@ -0,0 +1,513 @@
+      SUBROUTINE SLASD2( NL, NR, SQRE, K, D, Z, ALPHA, BETA, U, LDU, VT,
+     $                   LDVT, DSIGMA, U2, LDU2, VT2, LDVT2, IDXP, IDX,
+     $                   IDXC, IDXQ, COLTYP, INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
+*     Courant Institute, NAG Ltd., and Rice University
+*     October 31, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, K, LDU, LDU2, LDVT, LDVT2, NL, NR, SQRE
+      REAL               ALPHA, BETA
+*     ..
+*     .. Array Arguments ..
+      INTEGER            COLTYP( * ), IDX( * ), IDXC( * ), IDXP( * ),
+     $                   IDXQ( * )
+      REAL               D( * ), DSIGMA( * ), U( LDU, * ),
+     $                   U2( LDU2, * ), VT( LDVT, * ), VT2( LDVT2, * ),
+     $                   Z( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLASD2 merges the two sets of singular values together into a single
+*  sorted set.  Then it tries to deflate the size of the problem.
+*  There are two ways in which deflation can occur:  when two or more
+*  singular values are close together or if there is a tiny entry in the
+*  Z vector.  For each such occurrence the order of the related secular
+*  equation problem is reduced by one.
+*
+*  SLASD2 is called from SLASD1.
+*
+*  Arguments
+*  =========
+*
+*  NL     (input) INTEGER
+*         The row dimension of the upper block.  NL >= 1.
+*
+*  NR     (input) INTEGER
+*         The row dimension of the lower block.  NR >= 1.
+*
+*  SQRE   (input) INTEGER
+*         = 0: the lower block is an NR-by-NR square matrix.
+*         = 1: the lower block is an NR-by-(NR+1) rectangular matrix.
+*
+*         The bidiagonal matrix has N = NL + NR + 1 rows and
+*         M = N + SQRE >= N columns.
+*
+*  K      (output) INTEGER
+*         Contains the dimension of the non-deflated matrix,
+*         This is the order of the related secular equation. 1 <= K <=N.
+*
+*  D      (input/output) REAL array, dimension(N)
+*         On entry D contains the singular values of the two submatrices
+*         to be combined.  On exit D contains the trailing (N-K) updated
+*         singular values (those which were deflated) sorted into
+*         increasing order.
+*
+*  ALPHA  (input) REAL
+*         Contains the diagonal element associated with the added row.
+*
+*  BETA   (input) REAL
+*         Contains the off-diagonal element associated with the added
+*         row.
+*
+*  U      (input/output) REAL array, dimension(LDU,N)
+*         On entry U contains the left singular vectors of two
+*         submatrices in the two square blocks with corners at (1,1),
+*         (NL, NL), and (NL+2, NL+2), (N,N).
+*         On exit U contains the trailing (N-K) updated left singular
+*         vectors (those which were deflated) in its last N-K columns.
+*
+*  LDU    (input) INTEGER
+*         The leading dimension of the array U.  LDU >= N.
+*
+*  Z      (output) REAL array, dimension(N)
+*         On exit Z contains the updating row vector in the secular
+*         equation.
+*
+*  DSIGMA (output) REAL array, dimension (N)
+*         Contains a copy of the diagonal elements (K-1 singular values
+*         and one zero) in the secular equation.
+*
+*  U2     (output) REAL array, dimension(LDU2,N)
+*         Contains a copy of the first K-1 left singular vectors which
+*         will be used by SLASD3 in a matrix multiply (SGEMM) to solve
+*         for the new left singular vectors. U2 is arranged into four
+*         blocks. The first block contains a column with 1 at NL+1 and
+*         zero everywhere else; the second block contains non-zero
+*         entries only at and above NL; the third contains non-zero
+*         entries only below NL+1; and the fourth is dense.
+*
+*  LDU2   (input) INTEGER
+*         The leading dimension of the array U2.  LDU2 >= N.
+*
+*  VT     (input/output) REAL array, dimension(LDVT,M)
+*         On entry VT' contains the right singular vectors of two
+*         submatrices in the two square blocks with corners at (1,1),
+*         (NL+1, NL+1), and (NL+2, NL+2), (M,M).
+*         On exit VT' contains the trailing (N-K) updated right singular
+*         vectors (those which were deflated) in its last N-K columns.
+*         In case SQRE =1, the last row of VT spans the right null
+*         space.
+*
+*  LDVT   (input) INTEGER
+*         The leading dimension of the array VT.  LDVT >= M.
+*
+*  VT2    (output) REAL array, dimension(LDVT2,N)
+*         VT2' contains a copy of the first K right singular vectors
+*         which will be used by SLASD3 in a matrix multiply (SGEMM) to
+*         solve for the new right singular vectors. VT2 is arranged into
+*         three blocks. The first block contains a row that corresponds
+*         to the special 0 diagonal element in SIGMA; the second block
+*         contains non-zeros only at and before NL +1; the third block
+*         contains non-zeros only at and after  NL +2.
+*
+*  LDVT2  (input) INTEGER
+*         The leading dimension of the array VT2.  LDVT2 >= M.
+*
+*  IDXP   (workspace) INTEGER array, dimension(N)
+*         This will contain the permutation used to place deflated
+*         values of D at the end of the array. On output IDXP(2:K)
+*         points to the nondeflated D-values and IDXP(K+1:N)
+*         points to the deflated singular values.
+*
+*  IDX    (workspace) INTEGER array, dimension(N)
+*         This will contain the permutation used to sort the contents of
+*         D into ascending order.
+*
+*  IDXC   (output) INTEGER array, dimension(N)
+*         This will contain the permutation used to arrange the columns
+*         of the deflated U matrix into three groups:  the first group
+*         contains non-zero entries only at and above NL, the second
+*         contains non-zero entries only below NL+2, and the third is
+*         dense.
+*
+*  COLTYP (workspace/output) INTEGER array, dimension(N)
+*         As workspace, this will contain a label which will indicate
+*         which of the following types a column in the U2 matrix or a
+*         row in the VT2 matrix is:
+*         1 : non-zero in the upper half only
+*         2 : non-zero in the lower half only
+*         3 : dense
+*         4 : deflated
+*
+*         On exit, it is an array of dimension 4, with COLTYP(I) being
+*         the dimension of the I-th type columns.
+*
+*  IDXQ   (input) INTEGER array, dimension(N)
+*         This contains the permutation which separately sorts the two
+*         sub-problems in D into ascending order.  Note that entries in
+*         the first hlaf of this permutation must first be moved one
+*         position backward; and entries in the second half
+*         must first have NL+1 added to their values.
+*
+*  INFO   (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Ming Gu and Huan Ren, Computer Science Division, University of
+*     California at Berkeley, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE, TWO, EIGHT
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0, TWO = 2.0E+0,
+     $                   EIGHT = 8.0E+0 )
+*     ..
+*     .. Local Arrays ..
+      INTEGER            CTOT( 4 ), PSM( 4 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            CT, I, IDXI, IDXJ, IDXJP, J, JP, JPREV, K2, M,
+     $                   N, NLP1, NLP2
+      REAL               C, EPS, HLFTOL, S, TAU, TOL, Z1
+*     ..
+*     .. External Functions ..
+      REAL               SLAMCH, SLAPY2
+      EXTERNAL           SLAMCH, SLAPY2
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SCOPY, SLACPY, SLAMRG, SLASET, SROT, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+*
+      IF( NL.LT.1 ) THEN
+         INFO = -1
+      ELSE IF( NR.LT.1 ) THEN
+         INFO = -2
+      ELSE IF( ( SQRE.NE.1 ) .AND. ( SQRE.NE.0 ) ) THEN
+         INFO = -3
+      END IF
+*
+      N = NL + NR + 1
+      M = N + SQRE
+*
+      IF( LDU.LT.N ) THEN
+         INFO = -10
+      ELSE IF( LDVT.LT.M ) THEN
+         INFO = -12
+      ELSE IF( LDU2.LT.N ) THEN
+         INFO = -15
+      ELSE IF( LDVT2.LT.M ) THEN
+         INFO = -17
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SLASD2', -INFO )
+         RETURN
+      END IF
+*
+      NLP1 = NL + 1
+      NLP2 = NL + 2
+*
+*     Generate the first part of the vector Z; and move the singular
+*     values in the first part of D one position backward.
+*
+      Z1 = ALPHA*VT( NLP1, NLP1 )
+      Z( 1 ) = Z1
+      DO 10 I = NL, 1, -1
+         Z( I+1 ) = ALPHA*VT( I, NLP1 )
+         D( I+1 ) = D( I )
+         IDXQ( I+1 ) = IDXQ( I ) + 1
+   10 CONTINUE
+*
+*     Generate the second part of the vector Z.
+*
+      DO 20 I = NLP2, M
+         Z( I ) = BETA*VT( I, NLP2 )
+   20 CONTINUE
+*
+*     Initialize some reference arrays.
+*
+      DO 30 I = 2, NLP1
+         COLTYP( I ) = 1
+   30 CONTINUE
+      DO 40 I = NLP2, N
+         COLTYP( I ) = 2
+   40 CONTINUE
+*
+*     Sort the singular values into increasing order
+*
+      DO 50 I = NLP2, N
+         IDXQ( I ) = IDXQ( I ) + NLP1
+   50 CONTINUE
+*
+*     DSIGMA, IDXC, IDXC, and the first column of U2
+*     are used as storage space.
+*
+      DO 60 I = 2, N
+         DSIGMA( I ) = D( IDXQ( I ) )
+         U2( I, 1 ) = Z( IDXQ( I ) )
+         IDXC( I ) = COLTYP( IDXQ( I ) )
+   60 CONTINUE
+*
+      CALL SLAMRG( NL, NR, DSIGMA( 2 ), 1, 1, IDX( 2 ) )
+*
+      DO 70 I = 2, N
+         IDXI = 1 + IDX( I )
+         D( I ) = DSIGMA( IDXI )
+         Z( I ) = U2( IDXI, 1 )
+         COLTYP( I ) = IDXC( IDXI )
+   70 CONTINUE
+*
+*     Calculate the allowable deflation tolerance
+*
+      EPS = SLAMCH( 'Epsilon' )
+      TOL = MAX( ABS( ALPHA ), ABS( BETA ) )
+      TOL = EIGHT*EPS*MAX( ABS( D( N ) ), TOL )
+*
+*     There are 2 kinds of deflation -- first a value in the z-vector
+*     is small, second two (or more) singular values are very close
+*     together (their difference is small).
+*
+*     If the value in the z-vector is small, we simply permute the
+*     array so that the corresponding singular value is moved to the
+*     end.
+*
+*     If two values in the D-vector are close, we perform a two-sided
+*     rotation designed to make one of the corresponding z-vector
+*     entries zero, and then permute the array so that the deflated
+*     singular value is moved to the end.
+*
+*     If there are multiple singular values then the problem deflates.
+*     Here the number of equal singular values are found.  As each equal
+*     singular value is found, an elementary reflector is computed to
+*     rotate the corresponding singular subspace so that the
+*     corresponding components of Z are zero in this new basis.
+*
+      K = 1
+      K2 = N + 1
+      DO 80 J = 2, N
+         IF( ABS( Z( J ) ).LE.TOL ) THEN
+*
+*           Deflate due to small z component.
+*
+            K2 = K2 - 1
+            IDXP( K2 ) = J
+            COLTYP( J ) = 4
+            IF( J.EQ.N )
+     $         GO TO 120
+         ELSE
+            JPREV = J
+            GO TO 90
+         END IF
+   80 CONTINUE
+   90 CONTINUE
+      J = JPREV
+  100 CONTINUE
+      J = J + 1
+      IF( J.GT.N )
+     $   GO TO 110
+      IF( ABS( Z( J ) ).LE.TOL ) THEN
+*
+*        Deflate due to small z component.
+*
+         K2 = K2 - 1
+         IDXP( K2 ) = J
+         COLTYP( J ) = 4
+      ELSE
+*
+*        Check if singular values are close enough to allow deflation.
+*
+         IF( ABS( D( J )-D( JPREV ) ).LE.TOL ) THEN
+*
+*           Deflation is possible.
+*
+            S = Z( JPREV )
+            C = Z( J )
+*
+*           Find sqrt(a**2+b**2) without overflow or
+*           destructive underflow.
+*
+            TAU = SLAPY2( C, S )
+            C = C / TAU
+            S = -S / TAU
+            Z( J ) = TAU
+            Z( JPREV ) = ZERO
+*
+*           Apply back the Givens rotation to the left and right
+*           singular vector matrices.
+*
+            IDXJP = IDXQ( IDX( JPREV )+1 )
+            IDXJ = IDXQ( IDX( J )+1 )
+            IF( IDXJP.LE.NLP1 ) THEN
+               IDXJP = IDXJP - 1
+            END IF
+            IF( IDXJ.LE.NLP1 ) THEN
+               IDXJ = IDXJ - 1
+            END IF
+            CALL SROT( N, U( 1, IDXJP ), 1, U( 1, IDXJ ), 1, C, S )
+            CALL SROT( M, VT( IDXJP, 1 ), LDVT, VT( IDXJ, 1 ), LDVT, C,
+     $                 S )
+            IF( COLTYP( J ).NE.COLTYP( JPREV ) ) THEN
+               COLTYP( J ) = 3
+            END IF
+            COLTYP( JPREV ) = 4
+            K2 = K2 - 1
+            IDXP( K2 ) = JPREV
+            JPREV = J
+         ELSE
+            K = K + 1
+            U2( K, 1 ) = Z( JPREV )
+            DSIGMA( K ) = D( JPREV )
+            IDXP( K ) = JPREV
+            JPREV = J
+         END IF
+      END IF
+      GO TO 100
+  110 CONTINUE
+*
+*     Record the last singular value.
+*
+      K = K + 1
+      U2( K, 1 ) = Z( JPREV )
+      DSIGMA( K ) = D( JPREV )
+      IDXP( K ) = JPREV
+*
+  120 CONTINUE
+*
+*     Count up the total number of the various types of columns, then
+*     form a permutation which positions the four column types into
+*     four groups of uniform structure (although one or more of these
+*     groups may be empty).
+*
+      DO 130 J = 1, 4
+         CTOT( J ) = 0
+  130 CONTINUE
+      DO 140 J = 2, N
+         CT = COLTYP( J )
+         CTOT( CT ) = CTOT( CT ) + 1
+  140 CONTINUE
+*
+*     PSM(*) = Position in SubMatrix (of types 1 through 4)
+*
+      PSM( 1 ) = 2
+      PSM( 2 ) = 2 + CTOT( 1 )
+      PSM( 3 ) = PSM( 2 ) + CTOT( 2 )
+      PSM( 4 ) = PSM( 3 ) + CTOT( 3 )
+*
+*     Fill out the IDXC array so that the permutation which it induces
+*     will place all type-1 columns first, all type-2 columns next,
+*     then all type-3's, and finally all type-4's, starting from the
+*     second column. This applies similarly to the rows of VT.
+*
+      DO 150 J = 2, N
+         JP = IDXP( J )
+         CT = COLTYP( JP )
+         IDXC( PSM( CT ) ) = J
+         PSM( CT ) = PSM( CT ) + 1
+  150 CONTINUE
+*
+*     Sort the singular values and corresponding singular vectors into
+*     DSIGMA, U2, and VT2 respectively.  The singular values/vectors
+*     which were not deflated go into the first K slots of DSIGMA, U2,
+*     and VT2 respectively, while those which were deflated go into the
+*     last N - K slots, except that the first column/row will be treated
+*     separately.
+*
+      DO 160 J = 2, N
+         JP = IDXP( J )
+         DSIGMA( J ) = D( JP )
+         IDXJ = IDXQ( IDX( IDXP( IDXC( J ) ) )+1 )
+         IF( IDXJ.LE.NLP1 ) THEN
+            IDXJ = IDXJ - 1
+         END IF
+         CALL SCOPY( N, U( 1, IDXJ ), 1, U2( 1, J ), 1 )
+         CALL SCOPY( M, VT( IDXJ, 1 ), LDVT, VT2( J, 1 ), LDVT2 )
+  160 CONTINUE
+*
+*     Determine DSIGMA(1), DSIGMA(2) and Z(1)
+*
+      DSIGMA( 1 ) = ZERO
+      HLFTOL = TOL / TWO
+      IF( ABS( DSIGMA( 2 ) ).LE.HLFTOL )
+     $   DSIGMA( 2 ) = HLFTOL
+      IF( M.GT.N ) THEN
+         Z( 1 ) = SLAPY2( Z1, Z( M ) )
+         IF( Z( 1 ).LE.TOL ) THEN
+            C = ONE
+            S = ZERO
+            Z( 1 ) = TOL
+         ELSE
+            C = Z1 / Z( 1 )
+            S = Z( M ) / Z( 1 )
+         END IF
+      ELSE
+         IF( ABS( Z1 ).LE.TOL ) THEN
+            Z( 1 ) = TOL
+         ELSE
+            Z( 1 ) = Z1
+         END IF
+      END IF
+*
+*     Move the rest of the updating row to Z.
+*
+      CALL SCOPY( K-1, U2( 2, 1 ), 1, Z( 2 ), 1 )
+*
+*     Determine the first column of U2, the first row of VT2 and the
+*     last row of VT.
+*
+      CALL SLASET( 'A', N, 1, ZERO, ZERO, U2, LDU2 )
+      U2( NLP1, 1 ) = ONE
+      IF( M.GT.N ) THEN
+         DO 170 I = 1, NLP1
+            VT( M, I ) = -S*VT( NLP1, I )
+            VT2( 1, I ) = C*VT( NLP1, I )
+  170    CONTINUE
+         DO 180 I = NLP2, M
+            VT2( 1, I ) = S*VT( M, I )
+            VT( M, I ) = C*VT( M, I )
+  180    CONTINUE
+      ELSE
+         CALL SCOPY( M, VT( NLP1, 1 ), LDVT, VT2( 1, 1 ), LDVT2 )
+      END IF
+      IF( M.GT.N ) THEN
+         CALL SCOPY( M, VT( M, 1 ), LDVT, VT2( M, 1 ), LDVT2 )
+      END IF
+*
+*     The deflated singular values and their corresponding vectors go
+*     into the back of D, U, and V respectively.
+*
+      IF( N.GT.K ) THEN
+         CALL SCOPY( N-K, DSIGMA( K+1 ), 1, D( K+1 ), 1 )
+         CALL SLACPY( 'A', N, N-K, U2( 1, K+1 ), LDU2, U( 1, K+1 ),
+     $                LDU )
+         CALL SLACPY( 'A', N-K, M, VT2( K+1, 1 ), LDVT2, VT( K+1, 1 ),
+     $                LDVT )
+      END IF
+*
+*     Copy CTOT into COLTYP for referencing in SLASD3.
+*
+      DO 190 J = 1, 4
+         COLTYP( J ) = CTOT( J )
+  190 CONTINUE
+*
+      RETURN
+*
+*     End of SLASD2
+*
+      END
diff --git a/costa/native/external/lapack/slasd3.f b/costa/native/external/lapack/slasd3.f
new file mode 100644
index 000000000..2e1af51cd
--- /dev/null
+++ b/costa/native/external/lapack/slasd3.f
@@ -0,0 +1,359 @@
+      SUBROUTINE SLASD3( NL, NR, SQRE, K, D, Q, LDQ, DSIGMA, U, LDU, U2,
+     $                   LDU2, VT, LDVT, VT2, LDVT2, IDXC, CTOT, Z,
+     $                   INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
+*     Courant Institute, NAG Ltd., and Rice University
+*     October 31, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, K, LDQ, LDU, LDU2, LDVT, LDVT2, NL, NR,
+     $                   SQRE
+*     ..
+*     .. Array Arguments ..
+      INTEGER            CTOT( * ), IDXC( * )
+      REAL               D( * ), DSIGMA( * ), Q( LDQ, * ), U( LDU, * ),
+     $                   U2( LDU2, * ), VT( LDVT, * ), VT2( LDVT2, * ),
+     $                   Z( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLASD3 finds all the square roots of the roots of the secular
+*  equation, as defined by the values in D and Z.  It makes the
+*  appropriate calls to SLASD4 and then updates the singular
+*  vectors by matrix multiplication.
+*
+*  This code makes very mild assumptions about floating point
+*  arithmetic. It will work on machines with a guard digit in
+*  add/subtract, or on those binary machines without guard digits
+*  which subtract like the Cray XMP, Cray YMP, Cray C 90, or Cray 2.
+*  It could conceivably fail on hexadecimal or decimal machines
+*  without guard digits, but we know of none.
+*
+*  SLASD3 is called from SLASD1.
+*
+*  Arguments
+*  =========
+*
+*  NL     (input) INTEGER
+*         The row dimension of the upper block.  NL >= 1.
+*
+*  NR     (input) INTEGER
+*         The row dimension of the lower block.  NR >= 1.
+*
+*  SQRE   (input) INTEGER
+*         = 0: the lower block is an NR-by-NR square matrix.
+*         = 1: the lower block is an NR-by-(NR+1) rectangular matrix.
+*
+*         The bidiagonal matrix has N = NL + NR + 1 rows and
+*         M = N + SQRE >= N columns.
+*
+*  K      (input) INTEGER
+*         The size of the secular equation, 1 =< K = < N.
+*
+*  D      (output) REAL array, dimension(K)
+*         On exit the square roots of the roots of the secular equation,
+*         in ascending order.
+*
+*  Q      (workspace) REAL array,
+*                     dimension at least (LDQ,K).
+*
+*  LDQ    (input) INTEGER
+*         The leading dimension of the array Q.  LDQ >= K.
+*
+*  DSIGMA (input) REAL array, dimension(K)
+*         The first K elements of this array contain the old roots
+*         of the deflated updating problem.  These are the poles
+*         of the secular equation.
+*
+*  U      (input) REAL array, dimension (LDU, N)
+*         The last N - K columns of this matrix contain the deflated
+*         left singular vectors.
+*
+*  LDU    (input) INTEGER
+*         The leading dimension of the array U.  LDU >= N.
+*
+*  U2     (input) REAL array, dimension (LDU2, N)
+*         The first K columns of this matrix contain the non-deflated
+*         left singular vectors for the split problem.
+*
+*  LDU2   (input) INTEGER
+*         The leading dimension of the array U2.  LDU2 >= N.
+*
+*  VT     (input) REAL array, dimension (LDVT, M)
+*         The last M - K columns of VT' contain the deflated
+*         right singular vectors.
+*
+*  LDVT   (input) INTEGER
+*         The leading dimension of the array VT.  LDVT >= N.
+*
+*  VT2    (input) REAL array, dimension (LDVT2, N)
+*         The first K columns of VT2' contain the non-deflated
+*         right singular vectors for the split problem.
+*
+*  LDVT2  (input) INTEGER
+*         The leading dimension of the array VT2.  LDVT2 >= N.
+*
+*  IDXC   (input) INTEGER array, dimension ( N )
+*         The permutation used to arrange the columns of U (and rows of
+*         VT) into three groups:  the first group contains non-zero
+*         entries only at and above (or before) NL +1; the second
+*         contains non-zero entries only at and below (or after) NL+2;
+*         and the third is dense. The first column of U and the row of
+*         VT are treated separately, however.
+*
+*         The rows of the singular vectors found by SLASD4
+*         must be likewise permuted before the matrix multiplies can
+*         take place.
+*
+*  CTOT   (input) INTEGER array, dimension ( 4 )
+*         A count of the total number of the various types of columns
+*         in U (or rows in VT), as described in IDXC. The fourth column
+*         type is any column which has been deflated.
+*
+*  Z      (input) REAL array, dimension (K)
+*         The first K elements of this array contain the components
+*         of the deflation-adjusted updating row vector.
+*
+*  INFO   (output) INTEGER
+*         = 0:  successful exit.
+*         < 0:  if INFO = -i, the i-th argument had an illegal value.
+*         > 0:  if INFO = 1, an singular value did not converge
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Ming Gu and Huan Ren, Computer Science Division, University of
+*     California at Berkeley, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO, NEGONE
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0,
+     $                     NEGONE = -1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            CTEMP, I, J, JC, KTEMP, M, N, NLP1, NLP2, NRP1
+      REAL               RHO, TEMP
+*     ..
+*     .. External Functions ..
+      REAL               SLAMC3, SNRM2
+      EXTERNAL           SLAMC3, SNRM2
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SCOPY, SGEMM, SLACPY, SLASCL, SLASD4, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, SIGN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+*
+      IF( NL.LT.1 ) THEN
+         INFO = -1
+      ELSE IF( NR.LT.1 ) THEN
+         INFO = -2
+      ELSE IF( ( SQRE.NE.1 ) .AND. ( SQRE.NE.0 ) ) THEN
+         INFO = -3
+      END IF
+*
+      N = NL + NR + 1
+      M = N + SQRE
+      NLP1 = NL + 1
+      NLP2 = NL + 2
+*
+      IF( ( K.LT.1 ) .OR. ( K.GT.N ) ) THEN
+         INFO = -4
+      ELSE IF( LDQ.LT.K ) THEN
+         INFO = -7
+      ELSE IF( LDU.LT.N ) THEN
+         INFO = -10
+      ELSE IF( LDU2.LT.N ) THEN
+         INFO = -12
+      ELSE IF( LDVT.LT.M ) THEN
+         INFO = -14
+      ELSE IF( LDVT2.LT.M ) THEN
+         INFO = -16
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SLASD3', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( K.EQ.1 ) THEN
+         D( 1 ) = ABS( Z( 1 ) )
+         CALL SCOPY( M, VT2( 1, 1 ), LDVT2, VT( 1, 1 ), LDVT )
+         IF( Z( 1 ).GT.ZERO ) THEN
+            CALL SCOPY( N, U2( 1, 1 ), 1, U( 1, 1 ), 1 )
+         ELSE
+            DO 10 I = 1, N
+               U( I, 1 ) = -U2( I, 1 )
+   10       CONTINUE
+         END IF
+         RETURN
+      END IF
+*
+*     Modify values DSIGMA(i) to make sure all DSIGMA(i)-DSIGMA(j) can
+*     be computed with high relative accuracy (barring over/underflow).
+*     This is a problem on machines without a guard digit in
+*     add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2).
+*     The following code replaces DSIGMA(I) by 2*DSIGMA(I)-DSIGMA(I),
+*     which on any of these machines zeros out the bottommost
+*     bit of DSIGMA(I) if it is 1; this makes the subsequent
+*     subtractions DSIGMA(I)-DSIGMA(J) unproblematic when cancellation
+*     occurs. On binary machines with a guard digit (almost all
+*     machines) it does not change DSIGMA(I) at all. On hexadecimal
+*     and decimal machines with a guard digit, it slightly
+*     changes the bottommost bits of DSIGMA(I). It does not account
+*     for hexadecimal or decimal machines without guard digits
+*     (we know of none). We use a subroutine call to compute
+*     2*DLAMBDA(I) to prevent optimizing compilers from eliminating
+*     this code.
+*
+      DO 20 I = 1, K
+         DSIGMA( I ) = SLAMC3( DSIGMA( I ), DSIGMA( I ) ) - DSIGMA( I )
+   20 CONTINUE
+*
+*     Keep a copy of Z.
+*
+      CALL SCOPY( K, Z, 1, Q, 1 )
+*
+*     Normalize Z.
+*
+      RHO = SNRM2( K, Z, 1 )
+      CALL SLASCL( 'G', 0, 0, RHO, ONE, K, 1, Z, K, INFO )
+      RHO = RHO*RHO
+*
+*     Find the new singular values.
+*
+      DO 30 J = 1, K
+         CALL SLASD4( K, J, DSIGMA, Z, U( 1, J ), RHO, D( J ),
+     $                VT( 1, J ), INFO )
+*
+*        If the zero finder fails, the computation is terminated.
+*
+         IF( INFO.NE.0 ) THEN
+            RETURN
+         END IF
+   30 CONTINUE
+*
+*     Compute updated Z.
+*
+      DO 60 I = 1, K
+         Z( I ) = U( I, K )*VT( I, K )
+         DO 40 J = 1, I - 1
+            Z( I ) = Z( I )*( U( I, J )*VT( I, J ) /
+     $               ( DSIGMA( I )-DSIGMA( J ) ) /
+     $               ( DSIGMA( I )+DSIGMA( J ) ) )
+   40    CONTINUE
+         DO 50 J = I, K - 1
+            Z( I ) = Z( I )*( U( I, J )*VT( I, J ) /
+     $               ( DSIGMA( I )-DSIGMA( J+1 ) ) /
+     $               ( DSIGMA( I )+DSIGMA( J+1 ) ) )
+   50    CONTINUE
+         Z( I ) = SIGN( SQRT( ABS( Z( I ) ) ), Q( I, 1 ) )
+   60 CONTINUE
+*
+*     Compute left singular vectors of the modified diagonal matrix,
+*     and store related information for the right singular vectors.
+*
+      DO 90 I = 1, K
+         VT( 1, I ) = Z( 1 ) / U( 1, I ) / VT( 1, I )
+         U( 1, I ) = NEGONE
+         DO 70 J = 2, K
+            VT( J, I ) = Z( J ) / U( J, I ) / VT( J, I )
+            U( J, I ) = DSIGMA( J )*VT( J, I )
+   70    CONTINUE
+         TEMP = SNRM2( K, U( 1, I ), 1 )
+         Q( 1, I ) = U( 1, I ) / TEMP
+         DO 80 J = 2, K
+            JC = IDXC( J )
+            Q( J, I ) = U( JC, I ) / TEMP
+   80    CONTINUE
+   90 CONTINUE
+*
+*     Update the left singular vector matrix.
+*
+      IF( K.EQ.2 ) THEN
+         CALL SGEMM( 'N', 'N', N, K, K, ONE, U2, LDU2, Q, LDQ, ZERO, U,
+     $               LDU )
+         GO TO 100
+      END IF
+      IF( CTOT( 1 ).GT.0 ) THEN
+         CALL SGEMM( 'N', 'N', NL, K, CTOT( 1 ), ONE, U2( 1, 2 ), LDU2,
+     $               Q( 2, 1 ), LDQ, ZERO, U( 1, 1 ), LDU )
+         IF( CTOT( 3 ).GT.0 ) THEN
+            KTEMP = 2 + CTOT( 1 ) + CTOT( 2 )
+            CALL SGEMM( 'N', 'N', NL, K, CTOT( 3 ), ONE, U2( 1, KTEMP ),
+     $                  LDU2, Q( KTEMP, 1 ), LDQ, ONE, U( 1, 1 ), LDU )
+         END IF
+      ELSE IF( CTOT( 3 ).GT.0 ) THEN
+         KTEMP = 2 + CTOT( 1 ) + CTOT( 2 )
+         CALL SGEMM( 'N', 'N', NL, K, CTOT( 3 ), ONE, U2( 1, KTEMP ),
+     $               LDU2, Q( KTEMP, 1 ), LDQ, ZERO, U( 1, 1 ), LDU )
+      ELSE
+         CALL SLACPY( 'F', NL, K, U2, LDU2, U, LDU )
+      END IF
+      CALL SCOPY( K, Q( 1, 1 ), LDQ, U( NLP1, 1 ), LDU )
+      KTEMP = 2 + CTOT( 1 )
+      CTEMP = CTOT( 2 ) + CTOT( 3 )
+      CALL SGEMM( 'N', 'N', NR, K, CTEMP, ONE, U2( NLP2, KTEMP ), LDU2,
+     $            Q( KTEMP, 1 ), LDQ, ZERO, U( NLP2, 1 ), LDU )
+*
+*     Generate the right singular vectors.
+*
+  100 CONTINUE
+      DO 120 I = 1, K
+         TEMP = SNRM2( K, VT( 1, I ), 1 )
+         Q( I, 1 ) = VT( 1, I ) / TEMP
+         DO 110 J = 2, K
+            JC = IDXC( J )
+            Q( I, J ) = VT( JC, I ) / TEMP
+  110    CONTINUE
+  120 CONTINUE
+*
+*     Update the right singular vector matrix.
+*
+      IF( K.EQ.2 ) THEN
+         CALL SGEMM( 'N', 'N', K, M, K, ONE, Q, LDQ, VT2, LDVT2, ZERO,
+     $               VT, LDVT )
+         RETURN
+      END IF
+      KTEMP = 1 + CTOT( 1 )
+      CALL SGEMM( 'N', 'N', K, NLP1, KTEMP, ONE, Q( 1, 1 ), LDQ,
+     $            VT2( 1, 1 ), LDVT2, ZERO, VT( 1, 1 ), LDVT )
+      KTEMP = 2 + CTOT( 1 ) + CTOT( 2 )
+      IF( KTEMP.LE.LDVT2 )
+     $   CALL SGEMM( 'N', 'N', K, NLP1, CTOT( 3 ), ONE, Q( 1, KTEMP ),
+     $               LDQ, VT2( KTEMP, 1 ), LDVT2, ONE, VT( 1, 1 ),
+     $               LDVT )
+*
+      KTEMP = CTOT( 1 ) + 1
+      NRP1 = NR + SQRE
+      IF( KTEMP.GT.1 ) THEN
+         DO 130 I = 1, K
+            Q( I, KTEMP ) = Q( I, 1 )
+  130    CONTINUE
+         DO 140 I = NLP2, M
+            VT2( KTEMP, I ) = VT2( 1, I )
+  140    CONTINUE
+      END IF
+      CTEMP = 1 + CTOT( 2 ) + CTOT( 3 )
+      CALL SGEMM( 'N', 'N', K, NRP1, CTEMP, ONE, Q( 1, KTEMP ), LDQ,
+     $            VT2( KTEMP, NLP2 ), LDVT2, ZERO, VT( 1, NLP2 ), LDVT )
+*
+      RETURN
+*
+*     End of SLASD3
+*
+      END
diff --git a/costa/native/external/lapack/slasd4.f b/costa/native/external/lapack/slasd4.f
new file mode 100644
index 000000000..35454324b
--- /dev/null
+++ b/costa/native/external/lapack/slasd4.f
@@ -0,0 +1,891 @@
+      SUBROUTINE SLASD4( N, I, D, Z, DELTA, RHO, SIGMA, WORK, INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
+*     Courant Institute, NAG Ltd., and Rice University
+*     October 31, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            I, INFO, N
+      REAL               RHO, SIGMA
+*     ..
+*     .. Array Arguments ..
+      REAL               D( * ), DELTA( * ), WORK( * ), Z( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  This subroutine computes the square root of the I-th updated
+*  eigenvalue of a positive symmetric rank-one modification to
+*  a positive diagonal matrix whose entries are given as the squares
+*  of the corresponding entries in the array d, and that
+*
+*         0 <= D(i) < D(j)  for  i < j
+*
+*  and that RHO > 0. This is arranged by the calling routine, and is
+*  no loss in generality.  The rank-one modified system is thus
+*
+*         diag( D ) * diag( D ) +  RHO *  Z * Z_transpose.
+*
+*  where we assume the Euclidean norm of Z is 1.
+*
+*  The method consists of approximating the rational functions in the
+*  secular equation by simpler interpolating rational functions.
+*
+*  Arguments
+*  =========
+*
+*  N      (input) INTEGER
+*         The length of all arrays.
+*
+*  I      (input) INTEGER
+*         The index of the eigenvalue to be computed.  1 <= I <= N.
+*
+*  D      (input) REAL array, dimension ( N )
+*         The original eigenvalues.  It is assumed that they are in
+*         order, 0 <= D(I) < D(J)  for I < J.
+*
+*  Z      (input) REAL array, dimension ( N )
+*         The components of the updating vector.
+*
+*  DELTA  (output) REAL array, dimension ( N )
+*         If N .ne. 1, DELTA contains (D(j) - sigma_I) in its  j-th
+*         component.  If N = 1, then DELTA(1) = 1.  The vector DELTA
+*         contains the information necessary to construct the
+*         (singular) eigenvectors.
+*
+*  RHO    (input) REAL
+*         The scalar in the symmetric updating formula.
+*
+*  SIGMA  (output) REAL
+*         The computed lambda_I, the I-th updated eigenvalue.
+*
+*  WORK   (workspace) REAL array, dimension ( N )
+*         If N .ne. 1, WORK contains (D(j) + sigma_I) in its  j-th
+*         component.  If N = 1, then WORK( 1 ) = 1.
+*
+*  INFO   (output) INTEGER
+*         = 0:  successful exit
+*         > 0:  if INFO = 1, the updating process failed.
+*
+*  Internal Parameters
+*  ===================
+*
+*  Logical variable ORGATI (origin-at-i?) is used for distinguishing
+*  whether D(i) or D(i+1) is treated as the origin.
+*
+*            ORGATI = .true.    origin at i
+*            ORGATI = .false.   origin at i+1
+*
+*  Logical variable SWTCH3 (switch-for-3-poles?) is for noting
+*  if we are working with THREE poles!
+*
+*  MAXIT is the maximum number of iterations allowed for each
+*  eigenvalue.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Ren-Cang Li, Computer Science Division, University of California
+*     at Berkeley, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            MAXIT
+      PARAMETER          ( MAXIT = 20 )
+      REAL               ZERO, ONE, TWO, THREE, FOUR, EIGHT, TEN
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0, TWO = 2.0E+0,
+     $                   THREE = 3.0E+0, FOUR = 4.0E+0, EIGHT = 8.0E+0,
+     $                   TEN = 10.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            ORGATI, SWTCH, SWTCH3
+      INTEGER            II, IIM1, IIP1, IP1, ITER, J, NITER
+      REAL               A, B, C, DELSQ, DELSQ2, DPHI, DPSI, DTIIM,
+     $                   DTIIP, DTIPSQ, DTISQ, DTNSQ, DTNSQ1, DW, EPS,
+     $                   ERRETM, ETA, PHI, PREW, PSI, RHOINV, SG2LB,
+     $                   SG2UB, TAU, TEMP, TEMP1, TEMP2, W
+*     ..
+*     .. Local Arrays ..
+      REAL               DD( 3 ), ZZ( 3 )
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLAED6, SLASD5
+*     ..
+*     .. External Functions ..
+      REAL               SLAMCH
+      EXTERNAL           SLAMCH
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Since this routine is called in an inner loop, we do no argument
+*     checking.
+*
+*     Quick return for N=1 and 2.
+*
+      INFO = 0
+      IF( N.EQ.1 ) THEN
+*
+*        Presumably, I=1 upon entry
+*
+         SIGMA = SQRT( D( 1 )*D( 1 )+RHO*Z( 1 )*Z( 1 ) )
+         DELTA( 1 ) = ONE
+         WORK( 1 ) = ONE
+         RETURN
+      END IF
+      IF( N.EQ.2 ) THEN
+         CALL SLASD5( I, D, Z, DELTA, RHO, SIGMA, WORK )
+         RETURN
+      END IF
+*
+*     Compute machine epsilon
+*
+      EPS = SLAMCH( 'Epsilon' )
+      RHOINV = ONE / RHO
+*
+*     The case I = N
+*
+      IF( I.EQ.N ) THEN
+*
+*        Initialize some basic variables
+*
+         II = N - 1
+         NITER = 1
+*
+*        Calculate initial guess
+*
+         TEMP = RHO / TWO
+*
+*        If ||Z||_2 is not one, then TEMP should be set to
+*        RHO * ||Z||_2^2 / TWO
+*
+         TEMP1 = TEMP / ( D( N )+SQRT( D( N )*D( N )+TEMP ) )
+         DO 10 J = 1, N
+            WORK( J ) = D( J ) + D( N ) + TEMP1
+            DELTA( J ) = ( D( J )-D( N ) ) - TEMP1
+   10    CONTINUE
+*
+         PSI = ZERO
+         DO 20 J = 1, N - 2
+            PSI = PSI + Z( J )*Z( J ) / ( DELTA( J )*WORK( J ) )
+   20    CONTINUE
+*
+         C = RHOINV + PSI
+         W = C + Z( II )*Z( II ) / ( DELTA( II )*WORK( II ) ) +
+     $       Z( N )*Z( N ) / ( DELTA( N )*WORK( N ) )
+*
+         IF( W.LE.ZERO ) THEN
+            TEMP1 = SQRT( D( N )*D( N )+RHO )
+            TEMP = Z( N-1 )*Z( N-1 ) / ( ( D( N-1 )+TEMP1 )*
+     $             ( D( N )-D( N-1 )+RHO / ( D( N )+TEMP1 ) ) ) +
+     $             Z( N )*Z( N ) / RHO
+*
+*           The following TAU is to approximate
+*           SIGMA_n^2 - D( N )*D( N )
+*
+            IF( C.LE.TEMP ) THEN
+               TAU = RHO
+            ELSE
+               DELSQ = ( D( N )-D( N-1 ) )*( D( N )+D( N-1 ) )
+               A = -C*DELSQ + Z( N-1 )*Z( N-1 ) + Z( N )*Z( N )
+               B = Z( N )*Z( N )*DELSQ
+               IF( A.LT.ZERO ) THEN
+                  TAU = TWO*B / ( SQRT( A*A+FOUR*B*C )-A )
+               ELSE
+                  TAU = ( A+SQRT( A*A+FOUR*B*C ) ) / ( TWO*C )
+               END IF
+            END IF
+*
+*           It can be proved that
+*               D(N)^2+RHO/2 <= SIGMA_n^2 < D(N)^2+TAU <= D(N)^2+RHO
+*
+         ELSE
+            DELSQ = ( D( N )-D( N-1 ) )*( D( N )+D( N-1 ) )
+            A = -C*DELSQ + Z( N-1 )*Z( N-1 ) + Z( N )*Z( N )
+            B = Z( N )*Z( N )*DELSQ
+*
+*           The following TAU is to approximate
+*           SIGMA_n^2 - D( N )*D( N )
+*
+            IF( A.LT.ZERO ) THEN
+               TAU = TWO*B / ( SQRT( A*A+FOUR*B*C )-A )
+            ELSE
+               TAU = ( A+SQRT( A*A+FOUR*B*C ) ) / ( TWO*C )
+            END IF
+*
+*           It can be proved that
+*           D(N)^2 < D(N)^2+TAU < SIGMA(N)^2 < D(N)^2+RHO/2
+*
+         END IF
+*
+*        The following ETA is to approximate SIGMA_n - D( N )
+*
+         ETA = TAU / ( D( N )+SQRT( D( N )*D( N )+TAU ) )
+*
+         SIGMA = D( N ) + ETA
+         DO 30 J = 1, N
+            DELTA( J ) = ( D( J )-D( I ) ) - ETA
+            WORK( J ) = D( J ) + D( I ) + ETA
+   30    CONTINUE
+*
+*        Evaluate PSI and the derivative DPSI
+*
+         DPSI = ZERO
+         PSI = ZERO
+         ERRETM = ZERO
+         DO 40 J = 1, II
+            TEMP = Z( J ) / ( DELTA( J )*WORK( J ) )
+            PSI = PSI + Z( J )*TEMP
+            DPSI = DPSI + TEMP*TEMP
+            ERRETM = ERRETM + PSI
+   40    CONTINUE
+         ERRETM = ABS( ERRETM )
+*
+*        Evaluate PHI and the derivative DPHI
+*
+         TEMP = Z( N ) / ( DELTA( N )*WORK( N ) )
+         PHI = Z( N )*TEMP
+         DPHI = TEMP*TEMP
+         ERRETM = EIGHT*( -PHI-PSI ) + ERRETM - PHI + RHOINV +
+     $            ABS( TAU )*( DPSI+DPHI )
+*
+         W = RHOINV + PHI + PSI
+*
+*        Test for convergence
+*
+         IF( ABS( W ).LE.EPS*ERRETM ) THEN
+            GO TO 240
+         END IF
+*
+*        Calculate the new step
+*
+         NITER = NITER + 1
+         DTNSQ1 = WORK( N-1 )*DELTA( N-1 )
+         DTNSQ = WORK( N )*DELTA( N )
+         C = W - DTNSQ1*DPSI - DTNSQ*DPHI
+         A = ( DTNSQ+DTNSQ1 )*W - DTNSQ*DTNSQ1*( DPSI+DPHI )
+         B = DTNSQ*DTNSQ1*W
+         IF( C.LT.ZERO )
+     $      C = ABS( C )
+         IF( C.EQ.ZERO ) THEN
+            ETA = RHO - SIGMA*SIGMA
+         ELSE IF( A.GE.ZERO ) THEN
+            ETA = ( A+SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C )
+         ELSE
+            ETA = TWO*B / ( A-SQRT( ABS( A*A-FOUR*B*C ) ) )
+         END IF
+*
+*        Note, eta should be positive if w is negative, and
+*        eta should be negative otherwise. However,
+*        if for some reason caused by roundoff, eta*w > 0,
+*        we simply use one Newton step instead. This way
+*        will guarantee eta*w < 0.
+*
+         IF( W*ETA.GT.ZERO )
+     $      ETA = -W / ( DPSI+DPHI )
+         TEMP = ETA - DTNSQ
+         IF( TEMP.GT.RHO )
+     $      ETA = RHO + DTNSQ
+*
+         TAU = TAU + ETA
+         ETA = ETA / ( SIGMA+SQRT( ETA+SIGMA*SIGMA ) )
+         DO 50 J = 1, N
+            DELTA( J ) = DELTA( J ) - ETA
+            WORK( J ) = WORK( J ) + ETA
+   50    CONTINUE
+*
+         SIGMA = SIGMA + ETA
+*
+*        Evaluate PSI and the derivative DPSI
+*
+         DPSI = ZERO
+         PSI = ZERO
+         ERRETM = ZERO
+         DO 60 J = 1, II
+            TEMP = Z( J ) / ( WORK( J )*DELTA( J ) )
+            PSI = PSI + Z( J )*TEMP
+            DPSI = DPSI + TEMP*TEMP
+            ERRETM = ERRETM + PSI
+   60    CONTINUE
+         ERRETM = ABS( ERRETM )
+*
+*        Evaluate PHI and the derivative DPHI
+*
+         TEMP = Z( N ) / ( WORK( N )*DELTA( N ) )
+         PHI = Z( N )*TEMP
+         DPHI = TEMP*TEMP
+         ERRETM = EIGHT*( -PHI-PSI ) + ERRETM - PHI + RHOINV +
+     $            ABS( TAU )*( DPSI+DPHI )
+*
+         W = RHOINV + PHI + PSI
+*
+*        Main loop to update the values of the array   DELTA
+*
+         ITER = NITER + 1
+*
+         DO 90 NITER = ITER, MAXIT
+*
+*           Test for convergence
+*
+            IF( ABS( W ).LE.EPS*ERRETM ) THEN
+               GO TO 240
+            END IF
+*
+*           Calculate the new step
+*
+            DTNSQ1 = WORK( N-1 )*DELTA( N-1 )
+            DTNSQ = WORK( N )*DELTA( N )
+            C = W - DTNSQ1*DPSI - DTNSQ*DPHI
+            A = ( DTNSQ+DTNSQ1 )*W - DTNSQ1*DTNSQ*( DPSI+DPHI )
+            B = DTNSQ1*DTNSQ*W
+            IF( A.GE.ZERO ) THEN
+               ETA = ( A+SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C )
+            ELSE
+               ETA = TWO*B / ( A-SQRT( ABS( A*A-FOUR*B*C ) ) )
+            END IF
+*
+*           Note, eta should be positive if w is negative, and
+*           eta should be negative otherwise. However,
+*           if for some reason caused by roundoff, eta*w > 0,
+*           we simply use one Newton step instead. This way
+*           will guarantee eta*w < 0.
+*
+            IF( W*ETA.GT.ZERO )
+     $         ETA = -W / ( DPSI+DPHI )
+            TEMP = ETA - DTNSQ
+            IF( TEMP.LE.ZERO )
+     $         ETA = ETA / TWO
+*
+            TAU = TAU + ETA
+            ETA = ETA / ( SIGMA+SQRT( ETA+SIGMA*SIGMA ) )
+            DO 70 J = 1, N
+               DELTA( J ) = DELTA( J ) - ETA
+               WORK( J ) = WORK( J ) + ETA
+   70       CONTINUE
+*
+            SIGMA = SIGMA + ETA
+*
+*           Evaluate PSI and the derivative DPSI
+*
+            DPSI = ZERO
+            PSI = ZERO
+            ERRETM = ZERO
+            DO 80 J = 1, II
+               TEMP = Z( J ) / ( WORK( J )*DELTA( J ) )
+               PSI = PSI + Z( J )*TEMP
+               DPSI = DPSI + TEMP*TEMP
+               ERRETM = ERRETM + PSI
+   80       CONTINUE
+            ERRETM = ABS( ERRETM )
+*
+*           Evaluate PHI and the derivative DPHI
+*
+            TEMP = Z( N ) / ( WORK( N )*DELTA( N ) )
+            PHI = Z( N )*TEMP
+            DPHI = TEMP*TEMP
+            ERRETM = EIGHT*( -PHI-PSI ) + ERRETM - PHI + RHOINV +
+     $               ABS( TAU )*( DPSI+DPHI )
+*
+            W = RHOINV + PHI + PSI
+   90    CONTINUE
+*
+*        Return with INFO = 1, NITER = MAXIT and not converged
+*
+         INFO = 1
+         GO TO 240
+*
+*        End for the case I = N
+*
+      ELSE
+*
+*        The case for I < N
+*
+         NITER = 1
+         IP1 = I + 1
+*
+*        Calculate initial guess
+*
+         DELSQ = ( D( IP1 )-D( I ) )*( D( IP1 )+D( I ) )
+         DELSQ2 = DELSQ / TWO
+         TEMP = DELSQ2 / ( D( I )+SQRT( D( I )*D( I )+DELSQ2 ) )
+         DO 100 J = 1, N
+            WORK( J ) = D( J ) + D( I ) + TEMP
+            DELTA( J ) = ( D( J )-D( I ) ) - TEMP
+  100    CONTINUE
+*
+         PSI = ZERO
+         DO 110 J = 1, I - 1
+            PSI = PSI + Z( J )*Z( J ) / ( WORK( J )*DELTA( J ) )
+  110    CONTINUE
+*
+         PHI = ZERO
+         DO 120 J = N, I + 2, -1
+            PHI = PHI + Z( J )*Z( J ) / ( WORK( J )*DELTA( J ) )
+  120    CONTINUE
+         C = RHOINV + PSI + PHI
+         W = C + Z( I )*Z( I ) / ( WORK( I )*DELTA( I ) ) +
+     $       Z( IP1 )*Z( IP1 ) / ( WORK( IP1 )*DELTA( IP1 ) )
+*
+         IF( W.GT.ZERO ) THEN
+*
+*           d(i)^2 < the ith sigma^2 < (d(i)^2+d(i+1)^2)/2
+*
+*           We choose d(i) as origin.
+*
+            ORGATI = .TRUE.
+            SG2LB = ZERO
+            SG2UB = DELSQ2
+            A = C*DELSQ + Z( I )*Z( I ) + Z( IP1 )*Z( IP1 )
+            B = Z( I )*Z( I )*DELSQ
+            IF( A.GT.ZERO ) THEN
+               TAU = TWO*B / ( A+SQRT( ABS( A*A-FOUR*B*C ) ) )
+            ELSE
+               TAU = ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C )
+            END IF
+*
+*           TAU now is an estimation of SIGMA^2 - D( I )^2. The
+*           following, however, is the corresponding estimation of
+*           SIGMA - D( I ).
+*
+            ETA = TAU / ( D( I )+SQRT( D( I )*D( I )+TAU ) )
+         ELSE
+*
+*           (d(i)^2+d(i+1)^2)/2 <= the ith sigma^2 < d(i+1)^2/2
+*
+*           We choose d(i+1) as origin.
+*
+            ORGATI = .FALSE.
+            SG2LB = -DELSQ2
+            SG2UB = ZERO
+            A = C*DELSQ - Z( I )*Z( I ) - Z( IP1 )*Z( IP1 )
+            B = Z( IP1 )*Z( IP1 )*DELSQ
+            IF( A.LT.ZERO ) THEN
+               TAU = TWO*B / ( A-SQRT( ABS( A*A+FOUR*B*C ) ) )
+            ELSE
+               TAU = -( A+SQRT( ABS( A*A+FOUR*B*C ) ) ) / ( TWO*C )
+            END IF
+*
+*           TAU now is an estimation of SIGMA^2 - D( IP1 )^2. The
+*           following, however, is the corresponding estimation of
+*           SIGMA - D( IP1 ).
+*
+            ETA = TAU / ( D( IP1 )+SQRT( ABS( D( IP1 )*D( IP1 )+
+     $            TAU ) ) )
+         END IF
+*
+         IF( ORGATI ) THEN
+            II = I
+            SIGMA = D( I ) + ETA
+            DO 130 J = 1, N
+               WORK( J ) = D( J ) + D( I ) + ETA
+               DELTA( J ) = ( D( J )-D( I ) ) - ETA
+  130       CONTINUE
+         ELSE
+            II = I + 1
+            SIGMA = D( IP1 ) + ETA
+            DO 140 J = 1, N
+               WORK( J ) = D( J ) + D( IP1 ) + ETA
+               DELTA( J ) = ( D( J )-D( IP1 ) ) - ETA
+  140       CONTINUE
+         END IF
+         IIM1 = II - 1
+         IIP1 = II + 1
+*
+*        Evaluate PSI and the derivative DPSI
+*
+         DPSI = ZERO
+         PSI = ZERO
+         ERRETM = ZERO
+         DO 150 J = 1, IIM1
+            TEMP = Z( J ) / ( WORK( J )*DELTA( J ) )
+            PSI = PSI + Z( J )*TEMP
+            DPSI = DPSI + TEMP*TEMP
+            ERRETM = ERRETM + PSI
+  150    CONTINUE
+         ERRETM = ABS( ERRETM )
+*
+*        Evaluate PHI and the derivative DPHI
+*
+         DPHI = ZERO
+         PHI = ZERO
+         DO 160 J = N, IIP1, -1
+            TEMP = Z( J ) / ( WORK( J )*DELTA( J ) )
+            PHI = PHI + Z( J )*TEMP
+            DPHI = DPHI + TEMP*TEMP
+            ERRETM = ERRETM + PHI
+  160    CONTINUE
+*
+         W = RHOINV + PHI + PSI
+*
+*        W is the value of the secular function with
+*        its ii-th element removed.
+*
+         SWTCH3 = .FALSE.
+         IF( ORGATI ) THEN
+            IF( W.LT.ZERO )
+     $         SWTCH3 = .TRUE.
+         ELSE
+            IF( W.GT.ZERO )
+     $         SWTCH3 = .TRUE.
+         END IF
+         IF( II.EQ.1 .OR. II.EQ.N )
+     $      SWTCH3 = .FALSE.
+*
+         TEMP = Z( II ) / ( WORK( II )*DELTA( II ) )
+         DW = DPSI + DPHI + TEMP*TEMP
+         TEMP = Z( II )*TEMP
+         W = W + TEMP
+         ERRETM = EIGHT*( PHI-PSI ) + ERRETM + TWO*RHOINV +
+     $            THREE*ABS( TEMP ) + ABS( TAU )*DW
+*
+*        Test for convergence
+*
+         IF( ABS( W ).LE.EPS*ERRETM ) THEN
+            GO TO 240
+         END IF
+*
+         IF( W.LE.ZERO ) THEN
+            SG2LB = MAX( SG2LB, TAU )
+         ELSE
+            SG2UB = MIN( SG2UB, TAU )
+         END IF
+*
+*        Calculate the new step
+*
+         NITER = NITER + 1
+         IF( .NOT.SWTCH3 ) THEN
+            DTIPSQ = WORK( IP1 )*DELTA( IP1 )
+            DTISQ = WORK( I )*DELTA( I )
+            IF( ORGATI ) THEN
+               C = W - DTIPSQ*DW + DELSQ*( Z( I ) / DTISQ )**2
+            ELSE
+               C = W - DTISQ*DW - DELSQ*( Z( IP1 ) / DTIPSQ )**2
+            END IF
+            A = ( DTIPSQ+DTISQ )*W - DTIPSQ*DTISQ*DW
+            B = DTIPSQ*DTISQ*W
+            IF( C.EQ.ZERO ) THEN
+               IF( A.EQ.ZERO ) THEN
+                  IF( ORGATI ) THEN
+                     A = Z( I )*Z( I ) + DTIPSQ*DTIPSQ*( DPSI+DPHI )
+                  ELSE
+                     A = Z( IP1 )*Z( IP1 ) + DTISQ*DTISQ*( DPSI+DPHI )
+                  END IF
+               END IF
+               ETA = B / A
+            ELSE IF( A.LE.ZERO ) THEN
+               ETA = ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C )
+            ELSE
+               ETA = TWO*B / ( A+SQRT( ABS( A*A-FOUR*B*C ) ) )
+            END IF
+         ELSE
+*
+*           Interpolation using THREE most relevant poles
+*
+            DTIIM = WORK( IIM1 )*DELTA( IIM1 )
+            DTIIP = WORK( IIP1 )*DELTA( IIP1 )
+            TEMP = RHOINV + PSI + PHI
+            IF( ORGATI ) THEN
+               TEMP1 = Z( IIM1 ) / DTIIM
+               TEMP1 = TEMP1*TEMP1
+               C = ( TEMP - DTIIP*( DPSI+DPHI ) ) -
+     $             ( D( IIM1 )-D( IIP1 ) )*( D( IIM1 )+D( IIP1 ) )*TEMP1
+               ZZ( 1 ) = Z( IIM1 )*Z( IIM1 )
+               IF( DPSI.LT.TEMP1 ) THEN
+                  ZZ( 3 ) = DTIIP*DTIIP*DPHI
+               ELSE
+                  ZZ( 3 ) = DTIIP*DTIIP*( ( DPSI-TEMP1 )+DPHI )
+               END IF
+            ELSE
+               TEMP1 = Z( IIP1 ) / DTIIP
+               TEMP1 = TEMP1*TEMP1
+               C = ( TEMP - DTIIM*( DPSI+DPHI ) ) -
+     $             ( D( IIP1 )-D( IIM1 ) )*( D( IIM1 )+D( IIP1 ) )*TEMP1
+               IF( DPHI.LT.TEMP1 ) THEN
+                  ZZ( 1 ) = DTIIM*DTIIM*DPSI
+               ELSE
+                  ZZ( 1 ) = DTIIM*DTIIM*( DPSI+( DPHI-TEMP1 ) )
+               END IF
+               ZZ( 3 ) = Z( IIP1 )*Z( IIP1 )
+            END IF
+            ZZ( 2 ) = Z( II )*Z( II )
+            DD( 1 ) = DTIIM
+            DD( 2 ) = DELTA( II )*WORK( II )
+            DD( 3 ) = DTIIP
+            CALL SLAED6( NITER, ORGATI, C, DD, ZZ, W, ETA, INFO )
+            IF( INFO.NE.0 )
+     $         GO TO 240
+         END IF
+*
+*        Note, eta should be positive if w is negative, and
+*        eta should be negative otherwise. However,
+*        if for some reason caused by roundoff, eta*w > 0,
+*        we simply use one Newton step instead. This way
+*        will guarantee eta*w < 0.
+*
+         IF( W*ETA.GE.ZERO )
+     $      ETA = -W / DW
+         IF( ORGATI ) THEN
+            TEMP1 = WORK( I )*DELTA( I )
+            TEMP = ETA - TEMP1
+         ELSE
+            TEMP1 = WORK( IP1 )*DELTA( IP1 )
+            TEMP = ETA - TEMP1
+         END IF
+         IF( TEMP.GT.SG2UB .OR. TEMP.LT.SG2LB ) THEN
+            IF( W.LT.ZERO ) THEN
+               ETA = ( SG2UB-TAU ) / TWO
+            ELSE
+               ETA = ( SG2LB-TAU ) / TWO
+            END IF
+         END IF
+*
+         TAU = TAU + ETA
+         ETA = ETA / ( SIGMA+SQRT( SIGMA*SIGMA+ETA ) )
+*
+         PREW = W
+*
+         SIGMA = SIGMA + ETA
+         DO 170 J = 1, N
+            WORK( J ) = WORK( J ) + ETA
+            DELTA( J ) = DELTA( J ) - ETA
+  170    CONTINUE
+*
+*        Evaluate PSI and the derivative DPSI
+*
+         DPSI = ZERO
+         PSI = ZERO
+         ERRETM = ZERO
+         DO 180 J = 1, IIM1
+            TEMP = Z( J ) / ( WORK( J )*DELTA( J ) )
+            PSI = PSI + Z( J )*TEMP
+            DPSI = DPSI + TEMP*TEMP
+            ERRETM = ERRETM + PSI
+  180    CONTINUE
+         ERRETM = ABS( ERRETM )
+*
+*        Evaluate PHI and the derivative DPHI
+*
+         DPHI = ZERO
+         PHI = ZERO
+         DO 190 J = N, IIP1, -1
+            TEMP = Z( J ) / ( WORK( J )*DELTA( J ) )
+            PHI = PHI + Z( J )*TEMP
+            DPHI = DPHI + TEMP*TEMP
+            ERRETM = ERRETM + PHI
+  190    CONTINUE
+*
+         TEMP = Z( II ) / ( WORK( II )*DELTA( II ) )
+         DW = DPSI + DPHI + TEMP*TEMP
+         TEMP = Z( II )*TEMP
+         W = RHOINV + PHI + PSI + TEMP
+         ERRETM = EIGHT*( PHI-PSI ) + ERRETM + TWO*RHOINV +
+     $            THREE*ABS( TEMP ) + ABS( TAU )*DW
+*
+         IF( W.LE.ZERO ) THEN
+            SG2LB = MAX( SG2LB, TAU )
+         ELSE
+            SG2UB = MIN( SG2UB, TAU )
+         END IF
+*
+         SWTCH = .FALSE.
+         IF( ORGATI ) THEN
+            IF( -W.GT.ABS( PREW ) / TEN )
+     $         SWTCH = .TRUE.
+         ELSE
+            IF( W.GT.ABS( PREW ) / TEN )
+     $         SWTCH = .TRUE.
+         END IF
+*
+*        Main loop to update the values of the array   DELTA and WORK
+*
+         ITER = NITER + 1
+*
+         DO 230 NITER = ITER, MAXIT
+*
+*           Test for convergence
+*
+            IF( ABS( W ).LE.EPS*ERRETM ) THEN
+               GO TO 240
+            END IF
+*
+*           Calculate the new step
+*
+            IF( .NOT.SWTCH3 ) THEN
+               DTIPSQ = WORK( IP1 )*DELTA( IP1 )
+               DTISQ = WORK( I )*DELTA( I )
+               IF( .NOT.SWTCH ) THEN
+                  IF( ORGATI ) THEN
+                     C = W - DTIPSQ*DW + DELSQ*( Z( I ) / DTISQ )**2
+                  ELSE
+                     C = W - DTISQ*DW - DELSQ*( Z( IP1 ) / DTIPSQ )**2
+                  END IF
+               ELSE
+                  TEMP = Z( II ) / ( WORK( II )*DELTA( II ) )
+                  IF( ORGATI ) THEN
+                     DPSI = DPSI + TEMP*TEMP
+                  ELSE
+                     DPHI = DPHI + TEMP*TEMP
+                  END IF
+                  C = W - DTISQ*DPSI - DTIPSQ*DPHI
+               END IF
+               A = ( DTIPSQ+DTISQ )*W - DTIPSQ*DTISQ*DW
+               B = DTIPSQ*DTISQ*W
+               IF( C.EQ.ZERO ) THEN
+                  IF( A.EQ.ZERO ) THEN
+                     IF( .NOT.SWTCH ) THEN
+                        IF( ORGATI ) THEN
+                           A = Z( I )*Z( I ) + DTIPSQ*DTIPSQ*
+     $                         ( DPSI+DPHI )
+                        ELSE
+                           A = Z( IP1 )*Z( IP1 ) +
+     $                         DTISQ*DTISQ*( DPSI+DPHI )
+                        END IF
+                     ELSE
+                        A = DTISQ*DTISQ*DPSI + DTIPSQ*DTIPSQ*DPHI
+                     END IF
+                  END IF
+                  ETA = B / A
+               ELSE IF( A.LE.ZERO ) THEN
+                  ETA = ( A-SQRT( ABS( A*A-FOUR*B*C ) ) ) / ( TWO*C )
+               ELSE
+                  ETA = TWO*B / ( A+SQRT( ABS( A*A-FOUR*B*C ) ) )
+               END IF
+            ELSE
+*
+*              Interpolation using THREE most relevant poles
+*
+               DTIIM = WORK( IIM1 )*DELTA( IIM1 )
+               DTIIP = WORK( IIP1 )*DELTA( IIP1 )
+               TEMP = RHOINV + PSI + PHI
+               IF( SWTCH ) THEN
+                  C = TEMP - DTIIM*DPSI - DTIIP*DPHI
+                  ZZ( 1 ) = DTIIM*DTIIM*DPSI
+                  ZZ( 3 ) = DTIIP*DTIIP*DPHI
+               ELSE
+                  IF( ORGATI ) THEN
+                     TEMP1 = Z( IIM1 ) / DTIIM
+                     TEMP1 = TEMP1*TEMP1
+                     TEMP2 = ( D( IIM1 )-D( IIP1 ) )*
+     $                       ( D( IIM1 )+D( IIP1 ) )*TEMP1
+                     C = TEMP - DTIIP*( DPSI+DPHI ) - TEMP2
+                     ZZ( 1 ) = Z( IIM1 )*Z( IIM1 )
+                     IF( DPSI.LT.TEMP1 ) THEN
+                        ZZ( 3 ) = DTIIP*DTIIP*DPHI
+                     ELSE
+                        ZZ( 3 ) = DTIIP*DTIIP*( ( DPSI-TEMP1 )+DPHI )
+                     END IF
+                  ELSE
+                     TEMP1 = Z( IIP1 ) / DTIIP
+                     TEMP1 = TEMP1*TEMP1
+                     TEMP2 = ( D( IIP1 )-D( IIM1 ) )*
+     $                       ( D( IIM1 )+D( IIP1 ) )*TEMP1
+                     C = TEMP - DTIIM*( DPSI+DPHI ) - TEMP2
+                     IF( DPHI.LT.TEMP1 ) THEN
+                        ZZ( 1 ) = DTIIM*DTIIM*DPSI
+                     ELSE
+                        ZZ( 1 ) = DTIIM*DTIIM*( DPSI+( DPHI-TEMP1 ) )
+                     END IF
+                     ZZ( 3 ) = Z( IIP1 )*Z( IIP1 )
+                  END IF
+               END IF
+               DD( 1 ) = DTIIM
+               DD( 2 ) = DELTA( II )*WORK( II )
+               DD( 3 ) = DTIIP
+               CALL SLAED6( NITER, ORGATI, C, DD, ZZ, W, ETA, INFO )
+               IF( INFO.NE.0 )
+     $            GO TO 240
+            END IF
+*
+*           Note, eta should be positive if w is negative, and
+*           eta should be negative otherwise. However,
+*           if for some reason caused by roundoff, eta*w > 0,
+*           we simply use one Newton step instead. This way
+*           will guarantee eta*w < 0.
+*
+            IF( W*ETA.GE.ZERO )
+     $         ETA = -W / DW
+            IF( ORGATI ) THEN
+               TEMP1 = WORK( I )*DELTA( I )
+               TEMP = ETA - TEMP1
+            ELSE
+               TEMP1 = WORK( IP1 )*DELTA( IP1 )
+               TEMP = ETA - TEMP1
+            END IF
+            IF( TEMP.GT.SG2UB .OR. TEMP.LT.SG2LB ) THEN
+               IF( W.LT.ZERO ) THEN
+                  ETA = ( SG2UB-TAU ) / TWO
+               ELSE
+                  ETA = ( SG2LB-TAU ) / TWO
+               END IF
+            END IF
+*
+            TAU = TAU + ETA
+            ETA = ETA / ( SIGMA+SQRT( SIGMA*SIGMA+ETA ) )
+*
+            SIGMA = SIGMA + ETA
+            DO 200 J = 1, N
+               WORK( J ) = WORK( J ) + ETA
+               DELTA( J ) = DELTA( J ) - ETA
+  200       CONTINUE
+*
+            PREW = W
+*
+*           Evaluate PSI and the derivative DPSI
+*
+            DPSI = ZERO
+            PSI = ZERO
+            ERRETM = ZERO
+            DO 210 J = 1, IIM1
+               TEMP = Z( J ) / ( WORK( J )*DELTA( J ) )
+               PSI = PSI + Z( J )*TEMP
+               DPSI = DPSI + TEMP*TEMP
+               ERRETM = ERRETM + PSI
+  210       CONTINUE
+            ERRETM = ABS( ERRETM )
+*
+*           Evaluate PHI and the derivative DPHI
+*
+            DPHI = ZERO
+            PHI = ZERO
+            DO 220 J = N, IIP1, -1
+               TEMP = Z( J ) / ( WORK( J )*DELTA( J ) )
+               PHI = PHI + Z( J )*TEMP
+               DPHI = DPHI + TEMP*TEMP
+               ERRETM = ERRETM + PHI
+  220       CONTINUE
+*
+            TEMP = Z( II ) / ( WORK( II )*DELTA( II ) )
+            DW = DPSI + DPHI + TEMP*TEMP
+            TEMP = Z( II )*TEMP
+            W = RHOINV + PHI + PSI + TEMP
+            ERRETM = EIGHT*( PHI-PSI ) + ERRETM + TWO*RHOINV +
+     $               THREE*ABS( TEMP ) + ABS( TAU )*DW
+            IF( W*PREW.GT.ZERO .AND. ABS( W ).GT.ABS( PREW ) / TEN )
+     $         SWTCH = .NOT.SWTCH
+*
+            IF( W.LE.ZERO ) THEN
+               SG2LB = MAX( SG2LB, TAU )
+            ELSE
+               SG2UB = MIN( SG2UB, TAU )
+            END IF
+*
+  230    CONTINUE
+*
+*        Return with INFO = 1, NITER = MAXIT and not converged
+*
+         INFO = 1
+*
+      END IF
+*
+  240 CONTINUE
+      RETURN
+*
+*     End of SLASD4
+*
+      END
diff --git a/costa/native/external/lapack/slasd5.f b/costa/native/external/lapack/slasd5.f
new file mode 100644
index 000000000..7325bfe61
--- /dev/null
+++ b/costa/native/external/lapack/slasd5.f
@@ -0,0 +1,164 @@
+      SUBROUTINE SLASD5( I, D, Z, DELTA, RHO, DSIGMA, WORK )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
+*     Courant Institute, NAG Ltd., and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            I
+      REAL               DSIGMA, RHO
+*     ..
+*     .. Array Arguments ..
+      REAL               D( 2 ), DELTA( 2 ), WORK( 2 ), Z( 2 )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  This subroutine computes the square root of the I-th eigenvalue
+*  of a positive symmetric rank-one modification of a 2-by-2 diagonal
+*  matrix
+*
+*             diag( D ) * diag( D ) +  RHO *  Z * transpose(Z) .
+*
+*  The diagonal entries in the array D are assumed to satisfy
+*
+*             0 <= D(i) < D(j)  for  i < j .
+*
+*  We also assume RHO > 0 and that the Euclidean norm of the vector
+*  Z is one.
+*
+*  Arguments
+*  =========
+*
+*  I      (input) INTEGER
+*         The index of the eigenvalue to be computed.  I = 1 or I = 2.
+*
+*  D      (input) REAL array, dimension ( 2 )
+*         The original eigenvalues.  We assume 0 <= D(1) < D(2).
+*
+*  Z      (input) REAL array, dimension ( 2 )
+*         The components of the updating vector.
+*
+*  DELTA  (output) REAL array, dimension ( 2 )
+*         Contains (D(j) - lambda_I) in its  j-th component.
+*         The vector DELTA contains the information necessary
+*         to construct the eigenvectors.
+*
+*  RHO    (input) REAL
+*         The scalar in the symmetric updating formula.
+*
+*  DSIGMA (output) REAL
+*         The computed lambda_I, the I-th updated eigenvalue.
+*
+*  WORK   (workspace) REAL array, dimension ( 2 )
+*         WORK contains (D(j) + sigma_I) in its  j-th component.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Ren-Cang Li, Computer Science Division, University of California
+*     at Berkeley, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE, TWO, THREE, FOUR
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0, TWO = 2.0E+0,
+     $                   THREE = 3.0E+0, FOUR = 4.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      REAL               B, C, DEL, DELSQ, TAU, W
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+      DEL = D( 2 ) - D( 1 )
+      DELSQ = DEL*( D( 2 )+D( 1 ) )
+      IF( I.EQ.1 ) THEN
+         W = ONE + FOUR*RHO*( Z( 2 )*Z( 2 ) / ( D( 1 )+THREE*D( 2 ) )-
+     $       Z( 1 )*Z( 1 ) / ( THREE*D( 1 )+D( 2 ) ) ) / DEL
+         IF( W.GT.ZERO ) THEN
+            B = DELSQ + RHO*( Z( 1 )*Z( 1 )+Z( 2 )*Z( 2 ) )
+            C = RHO*Z( 1 )*Z( 1 )*DELSQ
+*
+*           B > ZERO, always
+*
+*           The following TAU is DSIGMA * DSIGMA - D( 1 ) * D( 1 )
+*
+            TAU = TWO*C / ( B+SQRT( ABS( B*B-FOUR*C ) ) )
+*
+*           The following TAU is DSIGMA - D( 1 )
+*
+            TAU = TAU / ( D( 1 )+SQRT( D( 1 )*D( 1 )+TAU ) )
+            DSIGMA = D( 1 ) + TAU
+            DELTA( 1 ) = -TAU
+            DELTA( 2 ) = DEL - TAU
+            WORK( 1 ) = TWO*D( 1 ) + TAU
+            WORK( 2 ) = ( D( 1 )+TAU ) + D( 2 )
+*           DELTA( 1 ) = -Z( 1 ) / TAU
+*           DELTA( 2 ) = Z( 2 ) / ( DEL-TAU )
+         ELSE
+            B = -DELSQ + RHO*( Z( 1 )*Z( 1 )+Z( 2 )*Z( 2 ) )
+            C = RHO*Z( 2 )*Z( 2 )*DELSQ
+*
+*           The following TAU is DSIGMA * DSIGMA - D( 2 ) * D( 2 )
+*
+            IF( B.GT.ZERO ) THEN
+               TAU = -TWO*C / ( B+SQRT( B*B+FOUR*C ) )
+            ELSE
+               TAU = ( B-SQRT( B*B+FOUR*C ) ) / TWO
+            END IF
+*
+*           The following TAU is DSIGMA - D( 2 )
+*
+            TAU = TAU / ( D( 2 )+SQRT( ABS( D( 2 )*D( 2 )+TAU ) ) )
+            DSIGMA = D( 2 ) + TAU
+            DELTA( 1 ) = -( DEL+TAU )
+            DELTA( 2 ) = -TAU
+            WORK( 1 ) = D( 1 ) + TAU + D( 2 )
+            WORK( 2 ) = TWO*D( 2 ) + TAU
+*           DELTA( 1 ) = -Z( 1 ) / ( DEL+TAU )
+*           DELTA( 2 ) = -Z( 2 ) / TAU
+         END IF
+*        TEMP = SQRT( DELTA( 1 )*DELTA( 1 )+DELTA( 2 )*DELTA( 2 ) )
+*        DELTA( 1 ) = DELTA( 1 ) / TEMP
+*        DELTA( 2 ) = DELTA( 2 ) / TEMP
+      ELSE
+*
+*        Now I=2
+*
+         B = -DELSQ + RHO*( Z( 1 )*Z( 1 )+Z( 2 )*Z( 2 ) )
+         C = RHO*Z( 2 )*Z( 2 )*DELSQ
+*
+*        The following TAU is DSIGMA * DSIGMA - D( 2 ) * D( 2 )
+*
+         IF( B.GT.ZERO ) THEN
+            TAU = ( B+SQRT( B*B+FOUR*C ) ) / TWO
+         ELSE
+            TAU = TWO*C / ( -B+SQRT( B*B+FOUR*C ) )
+         END IF
+*
+*        The following TAU is DSIGMA - D( 2 )
+*
+         TAU = TAU / ( D( 2 )+SQRT( D( 2 )*D( 2 )+TAU ) )
+         DSIGMA = D( 2 ) + TAU
+         DELTA( 1 ) = -( DEL+TAU )
+         DELTA( 2 ) = -TAU
+         WORK( 1 ) = D( 1 ) + TAU + D( 2 )
+         WORK( 2 ) = TWO*D( 2 ) + TAU
+*        DELTA( 1 ) = -Z( 1 ) / ( DEL+TAU )
+*        DELTA( 2 ) = -Z( 2 ) / TAU
+*        TEMP = SQRT( DELTA( 1 )*DELTA( 1 )+DELTA( 2 )*DELTA( 2 ) )
+*        DELTA( 1 ) = DELTA( 1 ) / TEMP
+*        DELTA( 2 ) = DELTA( 2 ) / TEMP
+      END IF
+      RETURN
+*
+*     End of SLASD5
+*
+      END
diff --git a/costa/native/external/lapack/slasd6.f b/costa/native/external/lapack/slasd6.f
new file mode 100644
index 000000000..e25223f66
--- /dev/null
+++ b/costa/native/external/lapack/slasd6.f
@@ -0,0 +1,306 @@
+      SUBROUTINE SLASD6( ICOMPQ, NL, NR, SQRE, D, VF, VL, ALPHA, BETA,
+     $                   IDXQ, PERM, GIVPTR, GIVCOL, LDGCOL, GIVNUM,
+     $                   LDGNUM, POLES, DIFL, DIFR, Z, K, C, S, WORK,
+     $                   IWORK, INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            GIVPTR, ICOMPQ, INFO, K, LDGCOL, LDGNUM, NL,
+     $                   NR, SQRE
+      REAL               ALPHA, BETA, C, S
+*     ..
+*     .. Array Arguments ..
+      INTEGER            GIVCOL( LDGCOL, * ), IDXQ( * ), IWORK( * ),
+     $                   PERM( * )
+      REAL               D( * ), DIFL( * ), DIFR( * ),
+     $                   GIVNUM( LDGNUM, * ), POLES( LDGNUM, * ),
+     $                   VF( * ), VL( * ), WORK( * ), Z( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLASD6 computes the SVD of an updated upper bidiagonal matrix B
+*  obtained by merging two smaller ones by appending a row. This
+*  routine is used only for the problem which requires all singular
+*  values and optionally singular vector matrices in factored form.
+*  B is an N-by-M matrix with N = NL + NR + 1 and M = N + SQRE.
+*  A related subroutine, SLASD1, handles the case in which all singular
+*  values and singular vectors of the bidiagonal matrix are desired.
+*
+*  SLASD6 computes the SVD as follows:
+*
+*                ( D1(in)  0    0     0 )
+*    B = U(in) * (   Z1'   a   Z2'    b ) * VT(in)
+*                (   0     0   D2(in) 0 )
+*
+*      = U(out) * ( D(out) 0) * VT(out)
+*
+*  where Z' = (Z1' a Z2' b) = u' VT', and u is a vector of dimension M
+*  with ALPHA and BETA in the NL+1 and NL+2 th entries and zeros
+*  elsewhere; and the entry b is empty if SQRE = 0.
+*
+*  The singular values of B can be computed using D1, D2, the first
+*  components of all the right singular vectors of the lower block, and
+*  the last components of all the right singular vectors of the upper
+*  block. These components are stored and updated in VF and VL,
+*  respectively, in SLASD6. Hence U and VT are not explicitly
+*  referenced.
+*
+*  The singular values are stored in D. The algorithm consists of two
+*  stages:
+*
+*        The first stage consists of deflating the size of the problem
+*        when there are multiple singular values or if there is a zero
+*        in the Z vector. For each such occurence the dimension of the
+*        secular equation problem is reduced by one. This stage is
+*        performed by the routine SLASD7.
+*
+*        The second stage consists of calculating the updated
+*        singular values. This is done by finding the roots of the
+*        secular equation via the routine SLASD4 (as called by SLASD8).
+*        This routine also updates VF and VL and computes the distances
+*        between the updated singular values and the old singular
+*        values.
+*
+*  SLASD6 is called from SLASDA.
+*
+*  Arguments
+*  =========
+*
+*  ICOMPQ (input) INTEGER
+*         Specifies whether singular vectors are to be computed in
+*         factored form:
+*         = 0: Compute singular values only.
+*         = 1: Compute singular vectors in factored form as well.
+*
+*  NL     (input) INTEGER
+*         The row dimension of the upper block.  NL >= 1.
+*
+*  NR     (input) INTEGER
+*         The row dimension of the lower block.  NR >= 1.
+*
+*  SQRE   (input) INTEGER
+*         = 0: the lower block is an NR-by-NR square matrix.
+*         = 1: the lower block is an NR-by-(NR+1) rectangular matrix.
+*
+*         The bidiagonal matrix has row dimension N = NL + NR + 1,
+*         and column dimension M = N + SQRE.
+*
+*  D      (input/output) REAL array, dimension ( NL+NR+1 ).
+*         On entry D(1:NL,1:NL) contains the singular values of the
+*         upper block, and D(NL+2:N) contains the singular values
+*         of the lower block. On exit D(1:N) contains the singular
+*         values of the modified matrix.
+*
+*  VF     (input/output) REAL array, dimension ( M )
+*         On entry, VF(1:NL+1) contains the first components of all
+*         right singular vectors of the upper block; and VF(NL+2:M)
+*         contains the first components of all right singular vectors
+*         of the lower block. On exit, VF contains the first components
+*         of all right singular vectors of the bidiagonal matrix.
+*
+*  VL     (input/output) REAL array, dimension ( M )
+*         On entry, VL(1:NL+1) contains the  last components of all
+*         right singular vectors of the upper block; and VL(NL+2:M)
+*         contains the last components of all right singular vectors of
+*         the lower block. On exit, VL contains the last components of
+*         all right singular vectors of the bidiagonal matrix.
+*
+*  ALPHA  (input) REAL
+*         Contains the diagonal element associated with the added row.
+*
+*  BETA   (input) REAL
+*         Contains the off-diagonal element associated with the added
+*         row.
+*
+*  IDXQ   (output) INTEGER array, dimension ( N )
+*         This contains the permutation which will reintegrate the
+*         subproblem just solved back into sorted order, i.e.
+*         D( IDXQ( I = 1, N ) ) will be in ascending order.
+*
+*  PERM   (output) INTEGER array, dimension ( N )
+*         The permutations (from deflation and sorting) to be applied
+*         to each block. Not referenced if ICOMPQ = 0.
+*
+*  GIVPTR (output) INTEGER
+*         The number of Givens rotations which took place in this
+*         subproblem. Not referenced if ICOMPQ = 0.
+*
+*  GIVCOL (output) INTEGER array, dimension ( LDGCOL, 2 )
+*         Each pair of numbers indicates a pair of columns to take place
+*         in a Givens rotation. Not referenced if ICOMPQ = 0.
+*
+*  LDGCOL (input) INTEGER
+*         leading dimension of GIVCOL, must be at least N.
+*
+*  GIVNUM (output) REAL array, dimension ( LDGNUM, 2 )
+*         Each number indicates the C or S value to be used in the
+*         corresponding Givens rotation. Not referenced if ICOMPQ = 0.
+*
+*  LDGNUM (input) INTEGER
+*         The leading dimension of GIVNUM and POLES, must be at least N.
+*
+*  POLES  (output) REAL array, dimension ( LDGNUM, 2 )
+*         On exit, POLES(1,*) is an array containing the new singular
+*         values obtained from solving the secular equation, and
+*         POLES(2,*) is an array containing the poles in the secular
+*         equation. Not referenced if ICOMPQ = 0.
+*
+*  DIFL   (output) REAL array, dimension ( N )
+*         On exit, DIFL(I) is the distance between I-th updated
+*         (undeflated) singular value and the I-th (undeflated) old
+*         singular value.
+*
+*  DIFR   (output) REAL array,
+*                  dimension ( LDGNUM, 2 ) if ICOMPQ = 1 and
+*                  dimension ( N ) if ICOMPQ = 0.
+*         On exit, DIFR(I, 1) is the distance between I-th updated
+*         (undeflated) singular value and the I+1-th (undeflated) old
+*         singular value.
+*
+*         If ICOMPQ = 1, DIFR(1:K,2) is an array containing the
+*         normalizing factors for the right singular vector matrix.
+*
+*         See SLASD8 for details on DIFL and DIFR.
+*
+*  Z      (output) REAL array, dimension ( M )
+*         The first elements of this array contain the components
+*         of the deflation-adjusted updating row vector.
+*
+*  K      (output) INTEGER
+*         Contains the dimension of the non-deflated matrix,
+*         This is the order of the related secular equation. 1 <= K <=N.
+*
+*  C      (output) REAL
+*         C contains garbage if SQRE =0 and the C-value of a Givens
+*         rotation related to the right null space if SQRE = 1.
+*
+*  S      (output) REAL
+*         S contains garbage if SQRE =0 and the S-value of a Givens
+*         rotation related to the right null space if SQRE = 1.
+*
+*  WORK   (workspace) REAL array, dimension ( 4 * M )
+*
+*  IWORK  (workspace) INTEGER array, dimension ( 3 * N )
+*
+*  INFO   (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          > 0:  if INFO = 1, an singular value did not converge
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Ming Gu and Huan Ren, Computer Science Division, University of
+*     California at Berkeley, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, IDX, IDXC, IDXP, ISIGMA, IVFW, IVLW, IW, M,
+     $                   N, N1, N2
+      REAL               ORGNRM
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SCOPY, SLAMRG, SLASCL, SLASD7, SLASD8, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      N = NL + NR + 1
+      M = N + SQRE
+*
+      IF( ( ICOMPQ.LT.0 ) .OR. ( ICOMPQ.GT.1 ) ) THEN
+         INFO = -1
+      ELSE IF( NL.LT.1 ) THEN
+         INFO = -2
+      ELSE IF( NR.LT.1 ) THEN
+         INFO = -3
+      ELSE IF( ( SQRE.LT.0 ) .OR. ( SQRE.GT.1 ) ) THEN
+         INFO = -4
+      ELSE IF( LDGCOL.LT.N ) THEN
+         INFO = -14
+      ELSE IF( LDGNUM.LT.N ) THEN
+         INFO = -16
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SLASD6', -INFO )
+         RETURN
+      END IF
+*
+*     The following values are for bookkeeping purposes only.  They are
+*     integer pointers which indicate the portion of the workspace
+*     used by a particular array in SLASD7 and SLASD8.
+*
+      ISIGMA = 1
+      IW = ISIGMA + N
+      IVFW = IW + M
+      IVLW = IVFW + M
+*
+      IDX = 1
+      IDXC = IDX + N
+      IDXP = IDXC + N
+*
+*     Scale.
+*
+      ORGNRM = MAX( ABS( ALPHA ), ABS( BETA ) )
+      D( NL+1 ) = ZERO
+      DO 10 I = 1, N
+         IF( ABS( D( I ) ).GT.ORGNRM ) THEN
+            ORGNRM = ABS( D( I ) )
+         END IF
+   10 CONTINUE
+      CALL SLASCL( 'G', 0, 0, ORGNRM, ONE, N, 1, D, N, INFO )
+      ALPHA = ALPHA / ORGNRM
+      BETA = BETA / ORGNRM
+*
+*     Sort and Deflate singular values.
+*
+      CALL SLASD7( ICOMPQ, NL, NR, SQRE, K, D, Z, WORK( IW ), VF,
+     $             WORK( IVFW ), VL, WORK( IVLW ), ALPHA, BETA,
+     $             WORK( ISIGMA ), IWORK( IDX ), IWORK( IDXP ), IDXQ,
+     $             PERM, GIVPTR, GIVCOL, LDGCOL, GIVNUM, LDGNUM, C, S,
+     $             INFO )
+*
+*     Solve Secular Equation, compute DIFL, DIFR, and update VF, VL.
+*
+      CALL SLASD8( ICOMPQ, K, D, Z, VF, VL, DIFL, DIFR, LDGNUM,
+     $             WORK( ISIGMA ), WORK( IW ), INFO )
+*
+*     Save the poles if ICOMPQ = 1.
+*
+      IF( ICOMPQ.EQ.1 ) THEN
+         CALL SCOPY( K, D, 1, POLES( 1, 1 ), 1 )
+         CALL SCOPY( K, WORK( ISIGMA ), 1, POLES( 1, 2 ), 1 )
+      END IF
+*
+*     Unscale.
+*
+      CALL SLASCL( 'G', 0, 0, ONE, ORGNRM, N, 1, D, N, INFO )
+*
+*     Prepare the IDXQ sorting permutation.
+*
+      N1 = K
+      N2 = N - K
+      CALL SLAMRG( N1, N2, D, 1, -1, IDXQ )
+*
+      RETURN
+*
+*     End of SLASD6
+*
+      END
diff --git a/costa/native/external/lapack/slasd7.f b/costa/native/external/lapack/slasd7.f
new file mode 100644
index 000000000..0b3e6442a
--- /dev/null
+++ b/costa/native/external/lapack/slasd7.f
@@ -0,0 +1,445 @@
+      SUBROUTINE SLASD7( ICOMPQ, NL, NR, SQRE, K, D, Z, ZW, VF, VFW, VL,
+     $                   VLW, ALPHA, BETA, DSIGMA, IDX, IDXP, IDXQ,
+     $                   PERM, GIVPTR, GIVCOL, LDGCOL, GIVNUM, LDGNUM,
+     $                   C, S, INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
+*     Courant Institute, NAG Ltd., and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            GIVPTR, ICOMPQ, INFO, K, LDGCOL, LDGNUM, NL,
+     $                   NR, SQRE
+      REAL               ALPHA, BETA, C, S
+*     ..
+*     .. Array Arguments ..
+      INTEGER            GIVCOL( LDGCOL, * ), IDX( * ), IDXP( * ),
+     $                   IDXQ( * ), PERM( * )
+      REAL               D( * ), DSIGMA( * ), GIVNUM( LDGNUM, * ),
+     $                   VF( * ), VFW( * ), VL( * ), VLW( * ), Z( * ),
+     $                   ZW( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLASD7 merges the two sets of singular values together into a single
+*  sorted set. Then it tries to deflate the size of the problem. There
+*  are two ways in which deflation can occur:  when two or more singular
+*  values are close together or if there is a tiny entry in the Z
+*  vector. For each such occurrence the order of the related
+*  secular equation problem is reduced by one.
+*
+*  SLASD7 is called from SLASD6.
+*
+*  Arguments
+*  =========
+*
+*  ICOMPQ  (input) INTEGER
+*          Specifies whether singular vectors are to be computed
+*          in compact form, as follows:
+*          = 0: Compute singular values only.
+*          = 1: Compute singular vectors of upper
+*               bidiagonal matrix in compact form.
+*
+*  NL     (input) INTEGER
+*         The row dimension of the upper block. NL >= 1.
+*
+*  NR     (input) INTEGER
+*         The row dimension of the lower block. NR >= 1.
+*
+*  SQRE   (input) INTEGER
+*         = 0: the lower block is an NR-by-NR square matrix.
+*         = 1: the lower block is an NR-by-(NR+1) rectangular matrix.
+*
+*         The bidiagonal matrix has
+*         N = NL + NR + 1 rows and
+*         M = N + SQRE >= N columns.
+*
+*  K      (output) INTEGER
+*         Contains the dimension of the non-deflated matrix, this is
+*         the order of the related secular equation. 1 <= K <=N.
+*
+*  D      (input/output) REAL array, dimension ( N )
+*         On entry D contains the singular values of the two submatrices
+*         to be combined. On exit D contains the trailing (N-K) updated
+*         singular values (those which were deflated) sorted into
+*         increasing order.
+*
+*  Z      (output) REAL array, dimension ( M )
+*         On exit Z contains the updating row vector in the secular
+*         equation.
+*
+*  ZW     (workspace) REAL array, dimension ( M )
+*         Workspace for Z.
+*
+*  VF     (input/output) REAL array, dimension ( M )
+*         On entry, VF(1:NL+1) contains the first components of all
+*         right singular vectors of the upper block; and VF(NL+2:M)
+*         contains the first components of all right singular vectors
+*         of the lower block. On exit, VF contains the first components
+*         of all right singular vectors of the bidiagonal matrix.
+*
+*  VFW    (workspace) REAL array, dimension ( M )
+*         Workspace for VF.
+*
+*  VL     (input/output) REAL array, dimension ( M )
+*         On entry, VL(1:NL+1) contains the  last components of all
+*         right singular vectors of the upper block; and VL(NL+2:M)
+*         contains the last components of all right singular vectors
+*         of the lower block. On exit, VL contains the last components
+*         of all right singular vectors of the bidiagonal matrix.
+*
+*  VLW    (workspace) REAL array, dimension ( M )
+*         Workspace for VL.
+*
+*  ALPHA  (input) REAL
+*         Contains the diagonal element associated with the added row.
+*
+*  BETA   (input) REAL
+*         Contains the off-diagonal element associated with the added
+*         row.
+*
+*  DSIGMA (output) REAL array, dimension ( N )
+*         Contains a copy of the diagonal elements (K-1 singular values
+*         and one zero) in the secular equation.
+*
+*  IDX    (workspace) INTEGER array, dimension ( N )
+*         This will contain the permutation used to sort the contents of
+*         D into ascending order.
+*
+*  IDXP   (workspace) INTEGER array, dimension ( N )
+*         This will contain the permutation used to place deflated
+*         values of D at the end of the array. On output IDXP(2:K)
+*         points to the nondeflated D-values and IDXP(K+1:N)
+*         points to the deflated singular values.
+*
+*  IDXQ   (input) INTEGER array, dimension ( N )
+*         This contains the permutation which separately sorts the two
+*         sub-problems in D into ascending order.  Note that entries in
+*         the first half of this permutation must first be moved one
+*         position backward; and entries in the second half
+*         must first have NL+1 added to their values.
+*
+*  PERM   (output) INTEGER array, dimension ( N )
+*         The permutations (from deflation and sorting) to be applied
+*         to each singular block. Not referenced if ICOMPQ = 0.
+*
+*  GIVPTR (output) INTEGER
+*         The number of Givens rotations which took place in this
+*         subproblem. Not referenced if ICOMPQ = 0.
+*
+*  GIVCOL (output) INTEGER array, dimension ( LDGCOL, 2 )
+*         Each pair of numbers indicates a pair of columns to take place
+*         in a Givens rotation. Not referenced if ICOMPQ = 0.
+*
+*  LDGCOL (input) INTEGER
+*         The leading dimension of GIVCOL, must be at least N.
+*
+*  GIVNUM (output) REAL array, dimension ( LDGNUM, 2 )
+*         Each number indicates the C or S value to be used in the
+*         corresponding Givens rotation. Not referenced if ICOMPQ = 0.
+*
+*  LDGNUM (input) INTEGER
+*         The leading dimension of GIVNUM, must be at least N.
+*
+*  C      (output) REAL
+*         C contains garbage if SQRE =0 and the C-value of a Givens
+*         rotation related to the right null space if SQRE = 1.
+*
+*  S      (output) REAL
+*         S contains garbage if SQRE =0 and the S-value of a Givens
+*         rotation related to the right null space if SQRE = 1.
+*
+*  INFO   (output) INTEGER
+*         = 0:  successful exit.
+*         < 0:  if INFO = -i, the i-th argument had an illegal value.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Ming Gu and Huan Ren, Computer Science Division, University of
+*     California at Berkeley, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE, TWO, EIGHT
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0, TWO = 2.0E+0,
+     $                   EIGHT = 8.0E+0 )
+*     ..
+*     .. Local Scalars ..
+*
+      INTEGER            I, IDXI, IDXJ, IDXJP, J, JP, JPREV, K2, M, N,
+     $                   NLP1, NLP2
+      REAL               EPS, HLFTOL, TAU, TOL, Z1
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SCOPY, SLAMRG, SROT, XERBLA
+*     ..
+*     .. External Functions ..
+      REAL               SLAMCH, SLAPY2
+      EXTERNAL           SLAMCH, SLAPY2
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      N = NL + NR + 1
+      M = N + SQRE
+*
+      IF( ( ICOMPQ.LT.0 ) .OR. ( ICOMPQ.GT.1 ) ) THEN
+         INFO = -1
+      ELSE IF( NL.LT.1 ) THEN
+         INFO = -2
+      ELSE IF( NR.LT.1 ) THEN
+         INFO = -3
+      ELSE IF( ( SQRE.LT.0 ) .OR. ( SQRE.GT.1 ) ) THEN
+         INFO = -4
+      ELSE IF( LDGCOL.LT.N ) THEN
+         INFO = -22
+      ELSE IF( LDGNUM.LT.N ) THEN
+         INFO = -24
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SLASD7', -INFO )
+         RETURN
+      END IF
+*
+      NLP1 = NL + 1
+      NLP2 = NL + 2
+      IF( ICOMPQ.EQ.1 ) THEN
+         GIVPTR = 0
+      END IF
+*
+*     Generate the first part of the vector Z and move the singular
+*     values in the first part of D one position backward.
+*
+      Z1 = ALPHA*VL( NLP1 )
+      VL( NLP1 ) = ZERO
+      TAU = VF( NLP1 )
+      DO 10 I = NL, 1, -1
+         Z( I+1 ) = ALPHA*VL( I )
+         VL( I ) = ZERO
+         VF( I+1 ) = VF( I )
+         D( I+1 ) = D( I )
+         IDXQ( I+1 ) = IDXQ( I ) + 1
+   10 CONTINUE
+      VF( 1 ) = TAU
+*
+*     Generate the second part of the vector Z.
+*
+      DO 20 I = NLP2, M
+         Z( I ) = BETA*VF( I )
+         VF( I ) = ZERO
+   20 CONTINUE
+*
+*     Sort the singular values into increasing order
+*
+      DO 30 I = NLP2, N
+         IDXQ( I ) = IDXQ( I ) + NLP1
+   30 CONTINUE
+*
+*     DSIGMA, IDXC, IDXC, and ZW are used as storage space.
+*
+      DO 40 I = 2, N
+         DSIGMA( I ) = D( IDXQ( I ) )
+         ZW( I ) = Z( IDXQ( I ) )
+         VFW( I ) = VF( IDXQ( I ) )
+         VLW( I ) = VL( IDXQ( I ) )
+   40 CONTINUE
+*
+      CALL SLAMRG( NL, NR, DSIGMA( 2 ), 1, 1, IDX( 2 ) )
+*
+      DO 50 I = 2, N
+         IDXI = 1 + IDX( I )
+         D( I ) = DSIGMA( IDXI )
+         Z( I ) = ZW( IDXI )
+         VF( I ) = VFW( IDXI )
+         VL( I ) = VLW( IDXI )
+   50 CONTINUE
+*
+*     Calculate the allowable deflation tolerence
+*
+      EPS = SLAMCH( 'Epsilon' )
+      TOL = MAX( ABS( ALPHA ), ABS( BETA ) )
+      TOL = EIGHT*EIGHT*EPS*MAX( ABS( D( N ) ), TOL )
+*
+*     There are 2 kinds of deflation -- first a value in the z-vector
+*     is small, second two (or more) singular values are very close
+*     together (their difference is small).
+*
+*     If the value in the z-vector is small, we simply permute the
+*     array so that the corresponding singular value is moved to the
+*     end.
+*
+*     If two values in the D-vector are close, we perform a two-sided
+*     rotation designed to make one of the corresponding z-vector
+*     entries zero, and then permute the array so that the deflated
+*     singular value is moved to the end.
+*
+*     If there are multiple singular values then the problem deflates.
+*     Here the number of equal singular values are found.  As each equal
+*     singular value is found, an elementary reflector is computed to
+*     rotate the corresponding singular subspace so that the
+*     corresponding components of Z are zero in this new basis.
+*
+      K = 1
+      K2 = N + 1
+      DO 60 J = 2, N
+         IF( ABS( Z( J ) ).LE.TOL ) THEN
+*
+*           Deflate due to small z component.
+*
+            K2 = K2 - 1
+            IDXP( K2 ) = J
+            IF( J.EQ.N )
+     $         GO TO 100
+         ELSE
+            JPREV = J
+            GO TO 70
+         END IF
+   60 CONTINUE
+   70 CONTINUE
+      J = JPREV
+   80 CONTINUE
+      J = J + 1
+      IF( J.GT.N )
+     $   GO TO 90
+      IF( ABS( Z( J ) ).LE.TOL ) THEN
+*
+*        Deflate due to small z component.
+*
+         K2 = K2 - 1
+         IDXP( K2 ) = J
+      ELSE
+*
+*        Check if singular values are close enough to allow deflation.
+*
+         IF( ABS( D( J )-D( JPREV ) ).LE.TOL ) THEN
+*
+*           Deflation is possible.
+*
+            S = Z( JPREV )
+            C = Z( J )
+*
+*           Find sqrt(a**2+b**2) without overflow or
+*           destructive underflow.
+*
+            TAU = SLAPY2( C, S )
+            Z( J ) = TAU
+            Z( JPREV ) = ZERO
+            C = C / TAU
+            S = -S / TAU
+*
+*           Record the appropriate Givens rotation
+*
+            IF( ICOMPQ.EQ.1 ) THEN
+               GIVPTR = GIVPTR + 1
+               IDXJP = IDXQ( IDX( JPREV )+1 )
+               IDXJ = IDXQ( IDX( J )+1 )
+               IF( IDXJP.LE.NLP1 ) THEN
+                  IDXJP = IDXJP - 1
+               END IF
+               IF( IDXJ.LE.NLP1 ) THEN
+                  IDXJ = IDXJ - 1
+               END IF
+               GIVCOL( GIVPTR, 2 ) = IDXJP
+               GIVCOL( GIVPTR, 1 ) = IDXJ
+               GIVNUM( GIVPTR, 2 ) = C
+               GIVNUM( GIVPTR, 1 ) = S
+            END IF
+            CALL SROT( 1, VF( JPREV ), 1, VF( J ), 1, C, S )
+            CALL SROT( 1, VL( JPREV ), 1, VL( J ), 1, C, S )
+            K2 = K2 - 1
+            IDXP( K2 ) = JPREV
+            JPREV = J
+         ELSE
+            K = K + 1
+            ZW( K ) = Z( JPREV )
+            DSIGMA( K ) = D( JPREV )
+            IDXP( K ) = JPREV
+            JPREV = J
+         END IF
+      END IF
+      GO TO 80
+   90 CONTINUE
+*
+*     Record the last singular value.
+*
+      K = K + 1
+      ZW( K ) = Z( JPREV )
+      DSIGMA( K ) = D( JPREV )
+      IDXP( K ) = JPREV
+*
+  100 CONTINUE
+*
+*     Sort the singular values into DSIGMA. The singular values which
+*     were not deflated go into the first K slots of DSIGMA, except
+*     that DSIGMA(1) is treated separately.
+*
+      DO 110 J = 2, N
+         JP = IDXP( J )
+         DSIGMA( J ) = D( JP )
+         VFW( J ) = VF( JP )
+         VLW( J ) = VL( JP )
+  110 CONTINUE
+      IF( ICOMPQ.EQ.1 ) THEN
+         DO 120 J = 2, N
+            JP = IDXP( J )
+            PERM( J ) = IDXQ( IDX( JP )+1 )
+            IF( PERM( J ).LE.NLP1 ) THEN
+               PERM( J ) = PERM( J ) - 1
+            END IF
+  120    CONTINUE
+      END IF
+*
+*     The deflated singular values go back into the last N - K slots of
+*     D.
+*
+      CALL SCOPY( N-K, DSIGMA( K+1 ), 1, D( K+1 ), 1 )
+*
+*     Determine DSIGMA(1), DSIGMA(2), Z(1), VF(1), VL(1), VF(M), and
+*     VL(M).
+*
+      DSIGMA( 1 ) = ZERO
+      HLFTOL = TOL / TWO
+      IF( ABS( DSIGMA( 2 ) ).LE.HLFTOL )
+     $   DSIGMA( 2 ) = HLFTOL
+      IF( M.GT.N ) THEN
+         Z( 1 ) = SLAPY2( Z1, Z( M ) )
+         IF( Z( 1 ).LE.TOL ) THEN
+            C = ONE
+            S = ZERO
+            Z( 1 ) = TOL
+         ELSE
+            C = Z1 / Z( 1 )
+            S = -Z( M ) / Z( 1 )
+         END IF
+         CALL SROT( 1, VF( M ), 1, VF( 1 ), 1, C, S )
+         CALL SROT( 1, VL( M ), 1, VL( 1 ), 1, C, S )
+      ELSE
+         IF( ABS( Z1 ).LE.TOL ) THEN
+            Z( 1 ) = TOL
+         ELSE
+            Z( 1 ) = Z1
+         END IF
+      END IF
+*
+*     Restore Z, VF, and VL.
+*
+      CALL SCOPY( K-1, ZW( 2 ), 1, Z( 2 ), 1 )
+      CALL SCOPY( N-1, VFW( 2 ), 1, VF( 2 ), 1 )
+      CALL SCOPY( N-1, VLW( 2 ), 1, VL( 2 ), 1 )
+*
+      RETURN
+*
+*     End of SLASD7
+*
+      END
diff --git a/costa/native/external/lapack/slasd8.f b/costa/native/external/lapack/slasd8.f
new file mode 100644
index 000000000..803afc2a8
--- /dev/null
+++ b/costa/native/external/lapack/slasd8.f
@@ -0,0 +1,254 @@
+      SUBROUTINE SLASD8( ICOMPQ, K, D, Z, VF, VL, DIFL, DIFR, LDDIFR,
+     $                   DSIGMA, WORK, INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
+*     Courant Institute, NAG Ltd., and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            ICOMPQ, INFO, K, LDDIFR
+*     ..
+*     .. Array Arguments ..
+      REAL               D( * ), DIFL( * ), DIFR( LDDIFR, * ),
+     $                   DSIGMA( * ), VF( * ), VL( * ), WORK( * ),
+     $                   Z( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLASD8 finds the square roots of the roots of the secular equation,
+*  as defined by the values in DSIGMA and Z. It makes the appropriate
+*  calls to SLASD4, and stores, for each  element in D, the distance
+*  to its two nearest poles (elements in DSIGMA). It also updates
+*  the arrays VF and VL, the first and last components of all the
+*  right singular vectors of the original bidiagonal matrix.
+*
+*  SLASD8 is called from SLASD6.
+*
+*  Arguments
+*  =========
+*
+*  ICOMPQ  (input) INTEGER
+*          Specifies whether singular vectors are to be computed in
+*          factored form in the calling routine:
+*          = 0: Compute singular values only.
+*          = 1: Compute singular vectors in factored form as well.
+*
+*  K       (input) INTEGER
+*          The number of terms in the rational function to be solved
+*          by SLASD4.  K >= 1.
+*
+*  D       (output) REAL array, dimension ( K )
+*          On output, D contains the updated singular values.
+*
+*  Z       (input) REAL array, dimension ( K )
+*          The first K elements of this array contain the components
+*          of the deflation-adjusted updating row vector.
+*
+*  VF      (input/output) REAL array, dimension ( K )
+*          On entry, VF contains  information passed through DBEDE8.
+*          On exit, VF contains the first K components of the first
+*          components of all right singular vectors of the bidiagonal
+*          matrix.
+*
+*  VL      (input/output) REAL array, dimension ( K )
+*          On entry, VL contains  information passed through DBEDE8.
+*          On exit, VL contains the first K components of the last
+*          components of all right singular vectors of the bidiagonal
+*          matrix.
+*
+*  DIFL    (output) REAL array, dimension ( K )
+*          On exit, DIFL(I) = D(I) - DSIGMA(I).
+*
+*  DIFR    (output) REAL array,
+*                   dimension ( LDDIFR, 2 ) if ICOMPQ = 1 and
+*                   dimension ( K ) if ICOMPQ = 0.
+*          On exit, DIFR(I,1) = D(I) - DSIGMA(I+1), DIFR(K,1) is not
+*          defined and will not be referenced.
+*
+*          If ICOMPQ = 1, DIFR(1:K,2) is an array containing the
+*          normalizing factors for the right singular vector matrix.
+*
+*  LDDIFR  (input) INTEGER
+*          The leading dimension of DIFR, must be at least K.
+*
+*  DSIGMA  (input) REAL array, dimension ( K )
+*          The first K elements of this array contain the old roots
+*          of the deflated updating problem.  These are the poles
+*          of the secular equation.
+*
+*  WORK    (workspace) REAL array, dimension at least 3 * K
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          > 0:  if INFO = 1, an singular value did not converge
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Ming Gu and Huan Ren, Computer Science Division, University of
+*     California at Berkeley, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE
+      PARAMETER          ( ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, IWK1, IWK2, IWK2I, IWK3, IWK3I, J
+      REAL               DIFLJ, DIFRJ, DJ, DSIGJ, DSIGJP, RHO, TEMP
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SCOPY, SLASCL, SLASD4, SLASET, XERBLA
+*     ..
+*     .. External Functions ..
+      REAL               SDOT, SLAMC3, SNRM2
+      EXTERNAL           SDOT, SLAMC3, SNRM2
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, SIGN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+*
+      IF( ( ICOMPQ.LT.0 ) .OR. ( ICOMPQ.GT.1 ) ) THEN
+         INFO = -1
+      ELSE IF( K.LT.1 ) THEN
+         INFO = -2
+      ELSE IF( LDDIFR.LT.K ) THEN
+         INFO = -9
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SLASD8', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( K.EQ.1 ) THEN
+         D( 1 ) = ABS( Z( 1 ) )
+         DIFL( 1 ) = D( 1 )
+         IF( ICOMPQ.EQ.1 ) THEN
+            DIFL( 2 ) = ONE
+            DIFR( 1, 2 ) = ONE
+         END IF
+         RETURN
+      END IF
+*
+*     Modify values DSIGMA(i) to make sure all DSIGMA(i)-DSIGMA(j) can
+*     be computed with high relative accuracy (barring over/underflow).
+*     This is a problem on machines without a guard digit in
+*     add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2).
+*     The following code replaces DSIGMA(I) by 2*DSIGMA(I)-DSIGMA(I),
+*     which on any of these machines zeros out the bottommost
+*     bit of DSIGMA(I) if it is 1; this makes the subsequent
+*     subtractions DSIGMA(I)-DSIGMA(J) unproblematic when cancellation
+*     occurs. On binary machines with a guard digit (almost all
+*     machines) it does not change DSIGMA(I) at all. On hexadecimal
+*     and decimal machines with a guard digit, it slightly
+*     changes the bottommost bits of DSIGMA(I). It does not account
+*     for hexadecimal or decimal machines without guard digits
+*     (we know of none). We use a subroutine call to compute
+*     2*DLAMBDA(I) to prevent optimizing compilers from eliminating
+*     this code.
+*
+      DO 10 I = 1, K
+         DSIGMA( I ) = SLAMC3( DSIGMA( I ), DSIGMA( I ) ) - DSIGMA( I )
+   10 CONTINUE
+*
+*     Book keeping.
+*
+      IWK1 = 1
+      IWK2 = IWK1 + K
+      IWK3 = IWK2 + K
+      IWK2I = IWK2 - 1
+      IWK3I = IWK3 - 1
+*
+*     Normalize Z.
+*
+      RHO = SNRM2( K, Z, 1 )
+      CALL SLASCL( 'G', 0, 0, RHO, ONE, K, 1, Z, K, INFO )
+      RHO = RHO*RHO
+*
+*     Initialize WORK(IWK3).
+*
+      CALL SLASET( 'A', K, 1, ONE, ONE, WORK( IWK3 ), K )
+*
+*     Compute the updated singular values, the arrays DIFL, DIFR,
+*     and the updated Z.
+*
+      DO 40 J = 1, K
+         CALL SLASD4( K, J, DSIGMA, Z, WORK( IWK1 ), RHO, D( J ),
+     $                WORK( IWK2 ), INFO )
+*
+*        If the root finder fails, the computation is terminated.
+*
+         IF( INFO.NE.0 ) THEN
+            RETURN
+         END IF
+         WORK( IWK3I+J ) = WORK( IWK3I+J )*WORK( J )*WORK( IWK2I+J )
+         DIFL( J ) = -WORK( J )
+         DIFR( J, 1 ) = -WORK( J+1 )
+         DO 20 I = 1, J - 1
+            WORK( IWK3I+I ) = WORK( IWK3I+I )*WORK( I )*
+     $                        WORK( IWK2I+I ) / ( DSIGMA( I )-
+     $                        DSIGMA( J ) ) / ( DSIGMA( I )+
+     $                        DSIGMA( J ) )
+   20    CONTINUE
+         DO 30 I = J + 1, K
+            WORK( IWK3I+I ) = WORK( IWK3I+I )*WORK( I )*
+     $                        WORK( IWK2I+I ) / ( DSIGMA( I )-
+     $                        DSIGMA( J ) ) / ( DSIGMA( I )+
+     $                        DSIGMA( J ) )
+   30    CONTINUE
+   40 CONTINUE
+*
+*     Compute updated Z.
+*
+      DO 50 I = 1, K
+         Z( I ) = SIGN( SQRT( ABS( WORK( IWK3I+I ) ) ), Z( I ) )
+   50 CONTINUE
+*
+*     Update VF and VL.
+*
+      DO 80 J = 1, K
+         DIFLJ = DIFL( J )
+         DJ = D( J )
+         DSIGJ = -DSIGMA( J )
+         IF( J.LT.K ) THEN
+            DIFRJ = -DIFR( J, 1 )
+            DSIGJP = -DSIGMA( J+1 )
+         END IF
+         WORK( J ) = -Z( J ) / DIFLJ / ( DSIGMA( J )+DJ )
+         DO 60 I = 1, J - 1
+            WORK( I ) = Z( I ) / ( SLAMC3( DSIGMA( I ), DSIGJ )-DIFLJ )
+     $                   / ( DSIGMA( I )+DJ )
+   60    CONTINUE
+         DO 70 I = J + 1, K
+            WORK( I ) = Z( I ) / ( SLAMC3( DSIGMA( I ), DSIGJP )+DIFRJ )
+     $                   / ( DSIGMA( I )+DJ )
+   70    CONTINUE
+         TEMP = SNRM2( K, WORK, 1 )
+         WORK( IWK2I+J ) = SDOT( K, WORK, 1, VF, 1 ) / TEMP
+         WORK( IWK3I+J ) = SDOT( K, WORK, 1, VL, 1 ) / TEMP
+         IF( ICOMPQ.EQ.1 ) THEN
+            DIFR( J, 2 ) = TEMP
+         END IF
+   80 CONTINUE
+*
+      CALL SCOPY( K, WORK( IWK2 ), 1, VF, 1 )
+      CALL SCOPY( K, WORK( IWK3 ), 1, VL, 1 )
+*
+      RETURN
+*
+*     End of SLASD8
+*
+      END
diff --git a/costa/native/external/lapack/slasd9.f b/costa/native/external/lapack/slasd9.f
new file mode 100644
index 000000000..99dce6291
--- /dev/null
+++ b/costa/native/external/lapack/slasd9.f
@@ -0,0 +1,256 @@
+      SUBROUTINE SLASD9( ICOMPQ, LDU, K, D, Z, VF, VL, DIFL, DIFR,
+     $                   DSIGMA, WORK, INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
+*     Courant Institute, NAG Ltd., and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            ICOMPQ, INFO, K, LDU
+*     ..
+*     .. Array Arguments ..
+      REAL               D( * ), DIFL( * ), DIFR( LDU, * ), DSIGMA( * ),
+     $                   VF( * ), VL( * ), WORK( * ), Z( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLASD9 finds the square roots of the roots of the secular equation,
+*  as defined by the values in DSIGMA and Z.  It makes the
+*  appropriate calls to SLASD4, and stores, for each  element in D,
+*  the distance to its two nearest poles (elements in DSIGMA). It also
+*  updates the arrays VF and VL, the first and last components of all
+*  the right singular vectors of the original bidiagonal matrix.
+*
+*  SLASD9 is called from SLASD7.
+*
+*  Arguments
+*  =========
+*
+*  ICOMPQ  (input) INTEGER
+*          Specifies whether singular vectors are to be computed in
+*          factored form in the calling routine:
+*
+*             ICOMPQ = 0             Compute singular values only.
+*
+*             ICOMPQ = 1             Compute singular vector matrices in
+*                                    factored form also.
+*  K       (input) INTEGER
+*          The number of terms in the rational function to be solved by
+*          SLASD4.  K >= 1.
+*
+*  D       (output) REAL array, dimension(K)
+*          D(I) contains the updated singular values.
+*
+*  DSIGMA  (input) REAL array, dimension(K)
+*          The first K elements of this array contain the old roots
+*          of the deflated updating problem.  These are the poles
+*          of the secular equation.
+*
+*  Z       (input) REAL array, dimension (K)
+*          The first K elements of this array contain the components
+*          of the deflation-adjusted updating row vector.
+*
+*  VF      (input/output) REAL array, dimension(K)
+*          On entry, VF contains  information passed through SBEDE8.f
+*          On exit, VF contains the first K components of the first
+*          components of all right singular vectors of the bidiagonal
+*          matrix.
+*
+*  VL      (input/output) REAL array, dimension(K)
+*          On entry, VL contains  information passed through SBEDE8.f
+*          On exit, VL contains the first K components of the last
+*          components of all right singular vectors of the bidiagonal
+*          matrix.
+*
+*  DIFL    (output) REAL array, dimension (K).
+*          On exit, DIFL(I) = D(I) - DSIGMA(I).
+*
+*  DIFR    (output) REAL array,
+*                              dimension (LDU, 2) if ICOMPQ =1 and
+*                              dimension (K) if ICOMPQ = 0.
+*          On exit, DIFR(I, 1) = D(I) - DSIGMA(I+1), DIFR(K, 1) is not
+*          defined and will not be referenced.
+*
+*          If ICOMPQ = 1, DIFR(1:K, 2) is an array containing the
+*          normalizing factors for the right singular vector matrix.
+*
+*  WORK    (workspace) REAL array,
+*                                 dimension at least (3 * K)
+*          Workspace.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          > 0:  if INFO = 1, an singular value did not converge
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Ming Gu and Huan Ren, Computer Science Division, University of
+*     California at Berkeley, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE
+      PARAMETER          ( ONE = 1.0E0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, IWK1, IWK2, IWK2I, IWK3, IWK3I, J
+      REAL               DIFLJ, DIFRJ, DJ, DJP1, DSIGJ, DSIGJP, RHO,
+     $                   TEMP
+*     ..
+*     .. External Functions ..
+      REAL               SDOT, SLAMC3, SNRM2
+      EXTERNAL           SDOT, SLAMC3, SNRM2
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SCOPY, SLASCL, SLASD4, SLASET, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, SIGN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+*
+      IF( ( ICOMPQ.LT.0 ) .OR. ( ICOMPQ.GT.1 ) ) THEN
+         INFO = -1
+      ELSE IF( K.LT.1 ) THEN
+         INFO = -3
+      ELSE IF( LDU.LT.K ) THEN
+         INFO = -2
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SLASD9', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( K.EQ.1 ) THEN
+         D( 1 ) = ABS( Z( 1 ) )
+         DIFL( 1 ) = D( 1 )
+         IF( ICOMPQ.EQ.1 ) THEN
+            DIFL( 2 ) = ONE
+            DIFR( 1, 2 ) = ONE
+         END IF
+         RETURN
+      END IF
+*
+*     Modify values DSIGMA(i) to make sure all DSIGMA(i)-DSIGMA(j) can
+*     be computed with high relative accuracy (barring over/underflow).
+*     This is a problem on machines without a guard digit in
+*     add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2).
+*     The following code replaces DSIGMA(I) by 2*DSIGMA(I)-DSIGMA(I),
+*     which on any of these machines zeros out the bottommost
+*     bit of DSIGMA(I) if it is 1; this makes the subsequent
+*     subtractions DSIGMA(I)-DSIGMA(J) unproblematic when cancellation
+*     occurs. On binary machines with a guard digit (almost all
+*     machines) it does not change DSIGMA(I) at all. On hexadecimal
+*     and decimal machines with a guard digit, it slightly
+*     changes the bottommost bits of DSIGMA(I). It does not account
+*     for hexadecimal or decimal machines without guard digits
+*     (we know of none). We use a subroutine call to compute
+*     2*DLAMBDA(I) to prevent optimizing compilers from eliminating
+*     this code.
+*
+      DO 10 I = 1, K
+         DSIGMA( I ) = SLAMC3( DSIGMA( I ), DSIGMA( I ) ) - DSIGMA( I )
+   10 CONTINUE
+*
+*     Book keeping.
+*
+      IWK1 = 1
+      IWK2 = IWK1 + K
+      IWK3 = IWK2 + K
+      IWK2I = IWK2 - 1
+      IWK3I = IWK3 - 1
+*
+*     Normalize Z.
+*
+      RHO = SNRM2( K, Z, 1 )
+      CALL SLASCL( 'G', 0, 0, RHO, ONE, K, 1, Z, K, INFO )
+      RHO = RHO*RHO
+*
+*     Initialize WORK(IWK3).
+*
+      CALL SLASET( 'A', K, 1, ONE, ONE, WORK( IWK3 ), K )
+*
+*     Compute the updated singular values, the arrays DIFL, DIFR,
+*     and the updated Z.
+*
+      DO 40 J = 1, K
+         CALL SLASD4( K, J, DSIGMA, Z, WORK( IWK1 ), RHO, D( J ),
+     $                WORK( IWK2 ), INFO )
+*
+*        If the root finder fails, the computation is terminated.
+*
+         IF( INFO.NE.0 ) THEN
+            RETURN
+         END IF
+         WORK( IWK3I+J ) = WORK( IWK3I+J )*WORK( J )*WORK( IWK2I+J )
+         DIFL( J ) = -WORK( J )
+         DIFR( J, 1 ) = -WORK( J+1 )
+         DO 20 I = 1, J - 1
+            WORK( IWK3I+I ) = WORK( IWK3I+I )*WORK( I )*
+     $                        WORK( IWK2I+I ) / ( DSIGMA( I )-
+     $                        DSIGMA( J ) ) / ( DSIGMA( I )+
+     $                        DSIGMA( J ) )
+   20    CONTINUE
+         DO 30 I = J + 1, K
+            WORK( IWK3I+I ) = WORK( IWK3I+I )*WORK( I )*
+     $                        WORK( IWK2I+I ) / ( DSIGMA( I )-
+     $                        DSIGMA( J ) ) / ( DSIGMA( I )+
+     $                        DSIGMA( J ) )
+   30    CONTINUE
+   40 CONTINUE
+*
+*     Compute updated Z.
+*
+      DO 50 I = 1, K
+         Z( I ) = SIGN( SQRT( ABS( WORK( IWK3I+I ) ) ), Z( I ) )
+   50 CONTINUE
+*
+*     Update VF and VL.
+*
+      DO 80 J = 1, K
+         DIFLJ = DIFL( J )
+         DJ = D( J )
+         DSIGJ = -DSIGMA( J )
+         IF( J.LT.K ) THEN
+            DIFRJ = -DIFR( J, 1 )
+            DJP1 = D( J+1 )
+            DSIGJP = -DSIGMA( J+1 )
+         END IF
+         WORK( J ) = -Z( J ) / DIFLJ / ( DSIGMA( J )+DJ )
+         DO 60 I = 1, J - 1
+            WORK( I ) = Z( I ) / ( SLAMC3( DSIGMA( I ), DSIGJ )-DIFLJ )
+     $                   / ( DSIGMA( I )+DJ )
+   60    CONTINUE
+         DO 70 I = J + 1, K
+            WORK( I ) = Z( I ) / ( SLAMC3( DSIGMA( I ), DSIGJP )+DIFRJ )
+     $                   / ( DSIGMA( I )+DJ )
+   70    CONTINUE
+         TEMP = SNRM2( K, WORK, 1 )
+         WORK( IWK2I+J ) = SDOT( K, WORK, 1, VF, 1 ) / TEMP
+         WORK( IWK3I+J ) = SDOT( K, WORK, 1, VL, 1 ) / TEMP
+         IF( ICOMPQ.EQ.1 ) THEN
+            DIFR( J, 2 ) = TEMP
+         END IF
+   80 CONTINUE
+*
+      CALL SCOPY( K, WORK( IWK2 ), 1, VF, 1 )
+      CALL SCOPY( K, WORK( IWK3 ), 1, VL, 1 )
+*
+      RETURN
+*
+*     End of SLASD9
+*
+      END
diff --git a/costa/native/external/lapack/slasda.f b/costa/native/external/lapack/slasda.f
new file mode 100644
index 000000000..d00eb0cff
--- /dev/null
+++ b/costa/native/external/lapack/slasda.f
@@ -0,0 +1,391 @@
+      SUBROUTINE SLASDA( ICOMPQ, SMLSIZ, N, SQRE, D, E, U, LDU, VT, K,
+     $                   DIFL, DIFR, Z, POLES, GIVPTR, GIVCOL, LDGCOL,
+     $                   PERM, GIVNUM, C, S, WORK, IWORK, INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            ICOMPQ, INFO, LDGCOL, LDU, N, SMLSIZ, SQRE
+*     ..
+*     .. Array Arguments ..
+      INTEGER            GIVCOL( LDGCOL, * ), GIVPTR( * ), IWORK( * ),
+     $                   K( * ), PERM( LDGCOL, * )
+      REAL               C( * ), D( * ), DIFL( LDU, * ), DIFR( LDU, * ),
+     $                   E( * ), GIVNUM( LDU, * ), POLES( LDU, * ),
+     $                   S( * ), U( LDU, * ), VT( LDU, * ), WORK( * ),
+     $                   Z( LDU, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  Using a divide and conquer approach, SLASDA computes the singular
+*  value decomposition (SVD) of a real upper bidiagonal N-by-M matrix
+*  B with diagonal D and offdiagonal E, where M = N + SQRE. The
+*  algorithm computes the singular values in the SVD B = U * S * VT.
+*  The orthogonal matrices U and VT are optionally computed in
+*  compact form.
+*
+*  A related subroutine, SLASD0, computes the singular values and
+*  the singular vectors in explicit form.
+*
+*  Arguments
+*  =========
+*
+*  ICOMPQ (input) INTEGER
+*         Specifies whether singular vectors are to be computed
+*         in compact form, as follows
+*         = 0: Compute singular values only.
+*         = 1: Compute singular vectors of upper bidiagonal
+*              matrix in compact form.
+*
+*  SMLSIZ (input) INTEGER
+*         The maximum size of the subproblems at the bottom of the
+*         computation tree.
+*
+*  N      (input) INTEGER
+*         The row dimension of the upper bidiagonal matrix. This is
+*         also the dimension of the main diagonal array D.
+*
+*  SQRE   (input) INTEGER
+*         Specifies the column dimension of the bidiagonal matrix.
+*         = 0: The bidiagonal matrix has column dimension M = N;
+*         = 1: The bidiagonal matrix has column dimension M = N + 1.
+*
+*  D      (input/output) REAL array, dimension ( N )
+*         On entry D contains the main diagonal of the bidiagonal
+*         matrix. On exit D, if INFO = 0, contains its singular values.
+*
+*  E      (input) REAL array, dimension ( M-1 )
+*         Contains the subdiagonal entries of the bidiagonal matrix.
+*         On exit, E has been destroyed.
+*
+*  U      (output) REAL array,
+*         dimension ( LDU, SMLSIZ ) if ICOMPQ = 1, and not referenced
+*         if ICOMPQ = 0. If ICOMPQ = 1, on exit, U contains the left
+*         singular vector matrices of all subproblems at the bottom
+*         level.
+*
+*  LDU    (input) INTEGER, LDU = > N.
+*         The leading dimension of arrays U, VT, DIFL, DIFR, POLES,
+*         GIVNUM, and Z.
+*
+*  VT     (output) REAL array,
+*         dimension ( LDU, SMLSIZ+1 ) if ICOMPQ = 1, and not referenced
+*         if ICOMPQ = 0. If ICOMPQ = 1, on exit, VT' contains the right
+*         singular vector matrices of all subproblems at the bottom
+*         level.
+*
+*  K      (output) INTEGER array,
+*         dimension ( N ) if ICOMPQ = 1 and dimension 1 if ICOMPQ = 0.
+*         If ICOMPQ = 1, on exit, K(I) is the dimension of the I-th
+*         secular equation on the computation tree.
+*
+*  DIFL   (output) REAL array, dimension ( LDU, NLVL ),
+*         where NLVL = floor(log_2 (N/SMLSIZ))).
+*
+*  DIFR   (output) REAL array,
+*                  dimension ( LDU, 2 * NLVL ) if ICOMPQ = 1 and
+*                  dimension ( N ) if ICOMPQ = 0.
+*         If ICOMPQ = 1, on exit, DIFL(1:N, I) and DIFR(1:N, 2 * I - 1)
+*         record distances between singular values on the I-th
+*         level and singular values on the (I -1)-th level, and
+*         DIFR(1:N, 2 * I ) contains the normalizing factors for
+*         the right singular vector matrix. See SLASD8 for details.
+*
+*  Z      (output) REAL array,
+*                  dimension ( LDU, NLVL ) if ICOMPQ = 1 and
+*                  dimension ( N ) if ICOMPQ = 0.
+*         The first K elements of Z(1, I) contain the components of
+*         the deflation-adjusted updating row vector for subproblems
+*         on the I-th level.
+*
+*  POLES  (output) REAL array,
+*         dimension ( LDU, 2 * NLVL ) if ICOMPQ = 1, and not referenced
+*         if ICOMPQ = 0. If ICOMPQ = 1, on exit, POLES(1, 2*I - 1) and
+*         POLES(1, 2*I) contain  the new and old singular values
+*         involved in the secular equations on the I-th level.
+*
+*  GIVPTR (output) INTEGER array,
+*         dimension ( N ) if ICOMPQ = 1, and not referenced if
+*         ICOMPQ = 0. If ICOMPQ = 1, on exit, GIVPTR( I ) records
+*         the number of Givens rotations performed on the I-th
+*         problem on the computation tree.
+*
+*  GIVCOL (output) INTEGER array,
+*         dimension ( LDGCOL, 2 * NLVL ) if ICOMPQ = 1, and not
+*         referenced if ICOMPQ = 0. If ICOMPQ = 1, on exit, for each I,
+*         GIVCOL(1, 2 *I - 1) and GIVCOL(1, 2 *I) record the locations
+*         of Givens rotations performed on the I-th level on the
+*         computation tree.
+*
+*  LDGCOL (input) INTEGER, LDGCOL = > N.
+*         The leading dimension of arrays GIVCOL and PERM.
+*
+*  PERM   (output) INTEGER array,
+*         dimension ( LDGCOL, NLVL ) if ICOMPQ = 1, and not referenced
+*         if ICOMPQ = 0. If ICOMPQ = 1, on exit, PERM(1, I) records
+*         permutations done on the I-th level of the computation tree.
+*
+*  GIVNUM (output) REAL array,
+*         dimension ( LDU,  2 * NLVL ) if ICOMPQ = 1, and not
+*         referenced if ICOMPQ = 0. If ICOMPQ = 1, on exit, for each I,
+*         GIVNUM(1, 2 *I - 1) and GIVNUM(1, 2 *I) record the C- and S-
+*         values of Givens rotations performed on the I-th level on
+*         the computation tree.
+*
+*  C      (output) REAL array,
+*         dimension ( N ) if ICOMPQ = 1, and dimension 1 if ICOMPQ = 0.
+*         If ICOMPQ = 1 and the I-th subproblem is not square, on exit,
+*         C( I ) contains the C-value of a Givens rotation related to
+*         the right null space of the I-th subproblem.
+*
+*  S      (output) REAL array, dimension ( N ) if
+*         ICOMPQ = 1, and dimension 1 if ICOMPQ = 0. If ICOMPQ = 1
+*         and the I-th subproblem is not square, on exit, S( I )
+*         contains the S-value of a Givens rotation related to
+*         the right null space of the I-th subproblem.
+*
+*  WORK   (workspace) REAL array, dimension
+*         (6 * N + (SMLSIZ + 1)*(SMLSIZ + 1)).
+*
+*  IWORK  (workspace) INTEGER array.
+*         Dimension must be at least (7 * N).
+*
+*  INFO   (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          > 0:  if INFO = 1, an singular value did not converge
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Ming Gu and Huan Ren, Computer Science Division, University of
+*     California at Berkeley, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, I1, IC, IDXQ, IDXQI, IM1, INODE, ITEMP, IWK,
+     $                   J, LF, LL, LVL, LVL2, M, NCC, ND, NDB1, NDIML,
+     $                   NDIMR, NL, NLF, NLP1, NLVL, NR, NRF, NRP1, NRU,
+     $                   NWORK1, NWORK2, SMLSZP, SQREI, VF, VFI, VL, VLI
+      REAL               ALPHA, BETA
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SCOPY, SLASD6, SLASDQ, SLASDT, SLASET, XERBLA
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+*
+      IF( ( ICOMPQ.LT.0 ) .OR. ( ICOMPQ.GT.1 ) ) THEN
+         INFO = -1
+      ELSE IF( SMLSIZ.LT.3 ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( ( SQRE.LT.0 ) .OR. ( SQRE.GT.1 ) ) THEN
+         INFO = -4
+      ELSE IF( LDU.LT.( N+SQRE ) ) THEN
+         INFO = -8
+      ELSE IF( LDGCOL.LT.N ) THEN
+         INFO = -17
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SLASDA', -INFO )
+         RETURN
+      END IF
+*
+      M = N + SQRE
+*
+*     If the input matrix is too small, call SLASDQ to find the SVD.
+*
+      IF( N.LE.SMLSIZ ) THEN
+         IF( ICOMPQ.EQ.0 ) THEN
+            CALL SLASDQ( 'U', SQRE, N, 0, 0, 0, D, E, VT, LDU, U, LDU,
+     $                   U, LDU, WORK, INFO )
+         ELSE
+            CALL SLASDQ( 'U', SQRE, N, M, N, 0, D, E, VT, LDU, U, LDU,
+     $                   U, LDU, WORK, INFO )
+         END IF
+         RETURN
+      END IF
+*
+*     Book-keeping and  set up the computation tree.
+*
+      INODE = 1
+      NDIML = INODE + N
+      NDIMR = NDIML + N
+      IDXQ = NDIMR + N
+      IWK = IDXQ + N
+*
+      NCC = 0
+      NRU = 0
+*
+      SMLSZP = SMLSIZ + 1
+      VF = 1
+      VL = VF + M
+      NWORK1 = VL + M
+      NWORK2 = NWORK1 + SMLSZP*SMLSZP
+*
+      CALL SLASDT( N, NLVL, ND, IWORK( INODE ), IWORK( NDIML ),
+     $             IWORK( NDIMR ), SMLSIZ )
+*
+*     for the nodes on bottom level of the tree, solve
+*     their subproblems by SLASDQ.
+*
+      NDB1 = ( ND+1 ) / 2
+      DO 30 I = NDB1, ND
+*
+*        IC : center row of each node
+*        NL : number of rows of left  subproblem
+*        NR : number of rows of right subproblem
+*        NLF: starting row of the left   subproblem
+*        NRF: starting row of the right  subproblem
+*
+         I1 = I - 1
+         IC = IWORK( INODE+I1 )
+         NL = IWORK( NDIML+I1 )
+         NLP1 = NL + 1
+         NR = IWORK( NDIMR+I1 )
+         NLF = IC - NL
+         NRF = IC + 1
+         IDXQI = IDXQ + NLF - 2
+         VFI = VF + NLF - 1
+         VLI = VL + NLF - 1
+         SQREI = 1
+         IF( ICOMPQ.EQ.0 ) THEN
+            CALL SLASET( 'A', NLP1, NLP1, ZERO, ONE, WORK( NWORK1 ),
+     $                   SMLSZP )
+            CALL SLASDQ( 'U', SQREI, NL, NLP1, NRU, NCC, D( NLF ),
+     $                   E( NLF ), WORK( NWORK1 ), SMLSZP,
+     $                   WORK( NWORK2 ), NL, WORK( NWORK2 ), NL,
+     $                   WORK( NWORK2 ), INFO )
+            ITEMP = NWORK1 + NL*SMLSZP
+            CALL SCOPY( NLP1, WORK( NWORK1 ), 1, WORK( VFI ), 1 )
+            CALL SCOPY( NLP1, WORK( ITEMP ), 1, WORK( VLI ), 1 )
+         ELSE
+            CALL SLASET( 'A', NL, NL, ZERO, ONE, U( NLF, 1 ), LDU )
+            CALL SLASET( 'A', NLP1, NLP1, ZERO, ONE, VT( NLF, 1 ), LDU )
+            CALL SLASDQ( 'U', SQREI, NL, NLP1, NL, NCC, D( NLF ),
+     $                   E( NLF ), VT( NLF, 1 ), LDU, U( NLF, 1 ), LDU,
+     $                   U( NLF, 1 ), LDU, WORK( NWORK1 ), INFO )
+            CALL SCOPY( NLP1, VT( NLF, 1 ), 1, WORK( VFI ), 1 )
+            CALL SCOPY( NLP1, VT( NLF, NLP1 ), 1, WORK( VLI ), 1 )
+         END IF
+         IF( INFO.NE.0 ) THEN
+            RETURN
+         END IF
+         DO 10 J = 1, NL
+            IWORK( IDXQI+J ) = J
+   10    CONTINUE
+         IF( ( I.EQ.ND ) .AND. ( SQRE.EQ.0 ) ) THEN
+            SQREI = 0
+         ELSE
+            SQREI = 1
+         END IF
+         IDXQI = IDXQI + NLP1
+         VFI = VFI + NLP1
+         VLI = VLI + NLP1
+         NRP1 = NR + SQREI
+         IF( ICOMPQ.EQ.0 ) THEN
+            CALL SLASET( 'A', NRP1, NRP1, ZERO, ONE, WORK( NWORK1 ),
+     $                   SMLSZP )
+            CALL SLASDQ( 'U', SQREI, NR, NRP1, NRU, NCC, D( NRF ),
+     $                   E( NRF ), WORK( NWORK1 ), SMLSZP,
+     $                   WORK( NWORK2 ), NR, WORK( NWORK2 ), NR,
+     $                   WORK( NWORK2 ), INFO )
+            ITEMP = NWORK1 + ( NRP1-1 )*SMLSZP
+            CALL SCOPY( NRP1, WORK( NWORK1 ), 1, WORK( VFI ), 1 )
+            CALL SCOPY( NRP1, WORK( ITEMP ), 1, WORK( VLI ), 1 )
+         ELSE
+            CALL SLASET( 'A', NR, NR, ZERO, ONE, U( NRF, 1 ), LDU )
+            CALL SLASET( 'A', NRP1, NRP1, ZERO, ONE, VT( NRF, 1 ), LDU )
+            CALL SLASDQ( 'U', SQREI, NR, NRP1, NR, NCC, D( NRF ),
+     $                   E( NRF ), VT( NRF, 1 ), LDU, U( NRF, 1 ), LDU,
+     $                   U( NRF, 1 ), LDU, WORK( NWORK1 ), INFO )
+            CALL SCOPY( NRP1, VT( NRF, 1 ), 1, WORK( VFI ), 1 )
+            CALL SCOPY( NRP1, VT( NRF, NRP1 ), 1, WORK( VLI ), 1 )
+         END IF
+         IF( INFO.NE.0 ) THEN
+            RETURN
+         END IF
+         DO 20 J = 1, NR
+            IWORK( IDXQI+J ) = J
+   20    CONTINUE
+   30 CONTINUE
+*
+*     Now conquer each subproblem bottom-up.
+*
+      J = 2**NLVL
+      DO 50 LVL = NLVL, 1, -1
+         LVL2 = LVL*2 - 1
+*
+*        Find the first node LF and last node LL on
+*        the current level LVL.
+*
+         IF( LVL.EQ.1 ) THEN
+            LF = 1
+            LL = 1
+         ELSE
+            LF = 2**( LVL-1 )
+            LL = 2*LF - 1
+         END IF
+         DO 40 I = LF, LL
+            IM1 = I - 1
+            IC = IWORK( INODE+IM1 )
+            NL = IWORK( NDIML+IM1 )
+            NR = IWORK( NDIMR+IM1 )
+            NLF = IC - NL
+            NRF = IC + 1
+            IF( I.EQ.LL ) THEN
+               SQREI = SQRE
+            ELSE
+               SQREI = 1
+            END IF
+            VFI = VF + NLF - 1
+            VLI = VL + NLF - 1
+            IDXQI = IDXQ + NLF - 1
+            ALPHA = D( IC )
+            BETA = E( IC )
+            IF( ICOMPQ.EQ.0 ) THEN
+               CALL SLASD6( ICOMPQ, NL, NR, SQREI, D( NLF ),
+     $                      WORK( VFI ), WORK( VLI ), ALPHA, BETA,
+     $                      IWORK( IDXQI ), PERM, GIVPTR( 1 ), GIVCOL,
+     $                      LDGCOL, GIVNUM, LDU, POLES, DIFL, DIFR, Z,
+     $                      K( 1 ), C( 1 ), S( 1 ), WORK( NWORK1 ),
+     $                      IWORK( IWK ), INFO )
+            ELSE
+               J = J - 1
+               CALL SLASD6( ICOMPQ, NL, NR, SQREI, D( NLF ),
+     $                      WORK( VFI ), WORK( VLI ), ALPHA, BETA,
+     $                      IWORK( IDXQI ), PERM( NLF, LVL ),
+     $                      GIVPTR( J ), GIVCOL( NLF, LVL2 ), LDGCOL,
+     $                      GIVNUM( NLF, LVL2 ), LDU,
+     $                      POLES( NLF, LVL2 ), DIFL( NLF, LVL ),
+     $                      DIFR( NLF, LVL2 ), Z( NLF, LVL ), K( J ),
+     $                      C( J ), S( J ), WORK( NWORK1 ),
+     $                      IWORK( IWK ), INFO )
+            END IF
+            IF( INFO.NE.0 ) THEN
+               RETURN
+            END IF
+   40    CONTINUE
+   50 CONTINUE
+*
+      RETURN
+*
+*     End of SLASDA
+*
+      END
diff --git a/costa/native/external/lapack/slasdq.f b/costa/native/external/lapack/slasdq.f
new file mode 100644
index 000000000..b1a689e6d
--- /dev/null
+++ b/costa/native/external/lapack/slasdq.f
@@ -0,0 +1,317 @@
+      SUBROUTINE SLASDQ( UPLO, SQRE, N, NCVT, NRU, NCC, D, E, VT, LDVT,
+     $                   U, LDU, C, LDC, WORK, INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDC, LDU, LDVT, N, NCC, NCVT, NRU, SQRE
+*     ..
+*     .. Array Arguments ..
+      REAL               C( LDC, * ), D( * ), E( * ), U( LDU, * ),
+     $                   VT( LDVT, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLASDQ computes the singular value decomposition (SVD) of a real
+*  (upper or lower) bidiagonal matrix with diagonal D and offdiagonal
+*  E, accumulating the transformations if desired. Letting B denote
+*  the input bidiagonal matrix, the algorithm computes orthogonal
+*  matrices Q and P such that B = Q * S * P' (P' denotes the transpose
+*  of P). The singular values S are overwritten on D.
+*
+*  The input matrix U  is changed to U  * Q  if desired.
+*  The input matrix VT is changed to P' * VT if desired.
+*  The input matrix C  is changed to Q' * C  if desired.
+*
+*  See "Computing  Small Singular Values of Bidiagonal Matrices With
+*  Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan,
+*  LAPACK Working Note #3, for a detailed description of the algorithm.
+*
+*  Arguments
+*  =========
+*
+*  UPLO  (input) CHARACTER*1
+*        On entry, UPLO specifies whether the input bidiagonal matrix
+*        is upper or lower bidiagonal, and wether it is square are
+*        not.
+*           UPLO = 'U' or 'u'   B is upper bidiagonal.
+*           UPLO = 'L' or 'l'   B is lower bidiagonal.
+*
+*  SQRE  (input) INTEGER
+*        = 0: then the input matrix is N-by-N.
+*        = 1: then the input matrix is N-by-(N+1) if UPLU = 'U' and
+*             (N+1)-by-N if UPLU = 'L'.
+*
+*        The bidiagonal matrix has
+*        N = NL + NR + 1 rows and
+*        M = N + SQRE >= N columns.
+*
+*  N     (input) INTEGER
+*        On entry, N specifies the number of rows and columns
+*        in the matrix. N must be at least 0.
+*
+*  NCVT  (input) INTEGER
+*        On entry, NCVT specifies the number of columns of
+*        the matrix VT. NCVT must be at least 0.
+*
+*  NRU   (input) INTEGER
+*        On entry, NRU specifies the number of rows of
+*        the matrix U. NRU must be at least 0.
+*
+*  NCC   (input) INTEGER
+*        On entry, NCC specifies the number of columns of
+*        the matrix C. NCC must be at least 0.
+*
+*  D     (input/output) REAL array, dimension (N)
+*        On entry, D contains the diagonal entries of the
+*        bidiagonal matrix whose SVD is desired. On normal exit,
+*        D contains the singular values in ascending order.
+*
+*  E     (input/output) REAL array.
+*        dimension is (N-1) if SQRE = 0 and N if SQRE = 1.
+*        On entry, the entries of E contain the offdiagonal entries
+*        of the bidiagonal matrix whose SVD is desired. On normal
+*        exit, E will contain 0. If the algorithm does not converge,
+*        D and E will contain the diagonal and superdiagonal entries
+*        of a bidiagonal matrix orthogonally equivalent to the one
+*        given as input.
+*
+*  VT    (input/output) REAL array, dimension (LDVT, NCVT)
+*        On entry, contains a matrix which on exit has been
+*        premultiplied by P', dimension N-by-NCVT if SQRE = 0
+*        and (N+1)-by-NCVT if SQRE = 1 (not referenced if NCVT=0).
+*
+*  LDVT  (input) INTEGER
+*        On entry, LDVT specifies the leading dimension of VT as
+*        declared in the calling (sub) program. LDVT must be at
+*        least 1. If NCVT is nonzero LDVT must also be at least N.
+*
+*  U     (input/output) REAL array, dimension (LDU, N)
+*        On entry, contains a  matrix which on exit has been
+*        postmultiplied by Q, dimension NRU-by-N if SQRE = 0
+*        and NRU-by-(N+1) if SQRE = 1 (not referenced if NRU=0).
+*
+*  LDU   (input) INTEGER
+*        On entry, LDU  specifies the leading dimension of U as
+*        declared in the calling (sub) program. LDU must be at
+*        least max( 1, NRU ) .
+*
+*  C     (input/output) REAL array, dimension (LDC, NCC)
+*        On entry, contains an N-by-NCC matrix which on exit
+*        has been premultiplied by Q'  dimension N-by-NCC if SQRE = 0
+*        and (N+1)-by-NCC if SQRE = 1 (not referenced if NCC=0).
+*
+*  LDC   (input) INTEGER
+*        On entry, LDC  specifies the leading dimension of C as
+*        declared in the calling (sub) program. LDC must be at
+*        least 1. If NCC is nonzero, LDC must also be at least N.
+*
+*  WORK  (workspace) REAL array, dimension (4*N)
+*        Workspace. Only referenced if one of NCVT, NRU, or NCC is
+*        nonzero, and if N is at least 2.
+*
+*  INFO  (output) INTEGER
+*        On exit, a value of 0 indicates a successful exit.
+*        If INFO < 0, argument number -INFO is illegal.
+*        If INFO > 0, the algorithm did not converge, and INFO
+*        specifies how many superdiagonals did not converge.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Ming Gu and Huan Ren, Computer Science Division, University of
+*     California at Berkeley, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO
+      PARAMETER          ( ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            ROTATE
+      INTEGER            I, ISUB, IUPLO, J, NP1, SQRE1
+      REAL               CS, R, SMIN, SN
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SBDSQR, SLARTG, SLASR, SSWAP, XERBLA
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IUPLO = 0
+      IF( LSAME( UPLO, 'U' ) )
+     $   IUPLO = 1
+      IF( LSAME( UPLO, 'L' ) )
+     $   IUPLO = 2
+      IF( IUPLO.EQ.0 ) THEN
+         INFO = -1
+      ELSE IF( ( SQRE.LT.0 ) .OR. ( SQRE.GT.1 ) ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( NCVT.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( NRU.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( NCC.LT.0 ) THEN
+         INFO = -6
+      ELSE IF( ( NCVT.EQ.0 .AND. LDVT.LT.1 ) .OR.
+     $         ( NCVT.GT.0 .AND. LDVT.LT.MAX( 1, N ) ) ) THEN
+         INFO = -10
+      ELSE IF( LDU.LT.MAX( 1, NRU ) ) THEN
+         INFO = -12
+      ELSE IF( ( NCC.EQ.0 .AND. LDC.LT.1 ) .OR.
+     $         ( NCC.GT.0 .AND. LDC.LT.MAX( 1, N ) ) ) THEN
+         INFO = -14
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SLASDQ', -INFO )
+         RETURN
+      END IF
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     ROTATE is true if any singular vectors desired, false otherwise
+*
+      ROTATE = ( NCVT.GT.0 ) .OR. ( NRU.GT.0 ) .OR. ( NCC.GT.0 )
+      NP1 = N + 1
+      SQRE1 = SQRE
+*
+*     If matrix non-square upper bidiagonal, rotate to be lower
+*     bidiagonal.  The rotations are on the right.
+*
+      IF( ( IUPLO.EQ.1 ) .AND. ( SQRE1.EQ.1 ) ) THEN
+         DO 10 I = 1, N - 1
+            CALL SLARTG( D( I ), E( I ), CS, SN, R )
+            D( I ) = R
+            E( I ) = SN*D( I+1 )
+            D( I+1 ) = CS*D( I+1 )
+            IF( ROTATE ) THEN
+               WORK( I ) = CS
+               WORK( N+I ) = SN
+            END IF
+   10    CONTINUE
+         CALL SLARTG( D( N ), E( N ), CS, SN, R )
+         D( N ) = R
+         E( N ) = ZERO
+         IF( ROTATE ) THEN
+            WORK( N ) = CS
+            WORK( N+N ) = SN
+         END IF
+         IUPLO = 2
+         SQRE1 = 0
+*
+*        Update singular vectors if desired.
+*
+         IF( NCVT.GT.0 )
+     $      CALL SLASR( 'L', 'V', 'F', NP1, NCVT, WORK( 1 ),
+     $                  WORK( NP1 ), VT, LDVT )
+      END IF
+*
+*     If matrix lower bidiagonal, rotate to be upper bidiagonal
+*     by applying Givens rotations on the left.
+*
+      IF( IUPLO.EQ.2 ) THEN
+         DO 20 I = 1, N - 1
+            CALL SLARTG( D( I ), E( I ), CS, SN, R )
+            D( I ) = R
+            E( I ) = SN*D( I+1 )
+            D( I+1 ) = CS*D( I+1 )
+            IF( ROTATE ) THEN
+               WORK( I ) = CS
+               WORK( N+I ) = SN
+            END IF
+   20    CONTINUE
+*
+*        If matrix (N+1)-by-N lower bidiagonal, one additional
+*        rotation is needed.
+*
+         IF( SQRE1.EQ.1 ) THEN
+            CALL SLARTG( D( N ), E( N ), CS, SN, R )
+            D( N ) = R
+            IF( ROTATE ) THEN
+               WORK( N ) = CS
+               WORK( N+N ) = SN
+            END IF
+         END IF
+*
+*        Update singular vectors if desired.
+*
+         IF( NRU.GT.0 ) THEN
+            IF( SQRE1.EQ.0 ) THEN
+               CALL SLASR( 'R', 'V', 'F', NRU, N, WORK( 1 ),
+     $                     WORK( NP1 ), U, LDU )
+            ELSE
+               CALL SLASR( 'R', 'V', 'F', NRU, NP1, WORK( 1 ),
+     $                     WORK( NP1 ), U, LDU )
+            END IF
+         END IF
+         IF( NCC.GT.0 ) THEN
+            IF( SQRE1.EQ.0 ) THEN
+               CALL SLASR( 'L', 'V', 'F', N, NCC, WORK( 1 ),
+     $                     WORK( NP1 ), C, LDC )
+            ELSE
+               CALL SLASR( 'L', 'V', 'F', NP1, NCC, WORK( 1 ),
+     $                     WORK( NP1 ), C, LDC )
+            END IF
+         END IF
+      END IF
+*
+*     Call SBDSQR to compute the SVD of the reduced real
+*     N-by-N upper bidiagonal matrix.
+*
+      CALL SBDSQR( 'U', N, NCVT, NRU, NCC, D, E, VT, LDVT, U, LDU, C,
+     $             LDC, WORK, INFO )
+*
+*     Sort the singular values into ascending order (insertion sort on
+*     singular values, but only one transposition per singular vector)
+*
+      DO 40 I = 1, N
+*
+*        Scan for smallest D(I).
+*
+         ISUB = I
+         SMIN = D( I )
+         DO 30 J = I + 1, N
+            IF( D( J ).LT.SMIN ) THEN
+               ISUB = J
+               SMIN = D( J )
+            END IF
+   30    CONTINUE
+         IF( ISUB.NE.I ) THEN
+*
+*           Swap singular values and vectors.
+*
+            D( ISUB ) = D( I )
+            D( I ) = SMIN
+            IF( NCVT.GT.0 )
+     $         CALL SSWAP( NCVT, VT( ISUB, 1 ), LDVT, VT( I, 1 ), LDVT )
+            IF( NRU.GT.0 )
+     $         CALL SSWAP( NRU, U( 1, ISUB ), 1, U( 1, I ), 1 )
+            IF( NCC.GT.0 )
+     $         CALL SSWAP( NCC, C( ISUB, 1 ), LDC, C( I, 1 ), LDC )
+         END IF
+   40 CONTINUE
+*
+      RETURN
+*
+*     End of SLASDQ
+*
+      END
diff --git a/costa/native/external/lapack/slasdt.f b/costa/native/external/lapack/slasdt.f
new file mode 100644
index 000000000..116e1d0ac
--- /dev/null
+++ b/costa/native/external/lapack/slasdt.f
@@ -0,0 +1,106 @@
+      SUBROUTINE SLASDT( N, LVL, ND, INODE, NDIML, NDIMR, MSUB )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            LVL, MSUB, N, ND
+*     ..
+*     .. Array Arguments ..
+      INTEGER            INODE( * ), NDIML( * ), NDIMR( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLASDT creates a tree of subproblems for bidiagonal divide and
+*  conquer.
+*
+*  Arguments
+*  =========
+*
+*   N      (input) INTEGER
+*          On entry, the number of diagonal elements of the
+*          bidiagonal matrix.
+*
+*   LVL    (output) INTEGER
+*          On exit, the number of levels on the computation tree.
+*
+*   ND     (output) INTEGER
+*          On exit, the number of nodes on the tree.
+*
+*   INODE  (output) INTEGER array, dimension ( N )
+*          On exit, centers of subproblems.
+*
+*   NDIML  (output) INTEGER array, dimension ( N )
+*          On exit, row dimensions of left children.
+*
+*   NDIMR  (output) INTEGER array, dimension ( N )
+*          On exit, row dimensions of right children.
+*
+*   MSUB   (input) INTEGER.
+*          On entry, the maximum row dimension each subproblem at the
+*          bottom of the tree can be of.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Ming Gu and Huan Ren, Computer Science Division, University of
+*     California at Berkeley, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               TWO
+      PARAMETER          ( TWO = 2.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, IL, IR, LLST, MAXN, NCRNT, NLVL
+      REAL               TEMP
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          INT, LOG, MAX, REAL
+*     ..
+*     .. Executable Statements ..
+*
+*     Find the number of levels on the tree.
+*
+      MAXN = MAX( 1, N )
+      TEMP = LOG( REAL( MAXN ) / REAL( MSUB+1 ) ) / LOG( TWO )
+      LVL = INT( TEMP ) + 1
+*
+      I = N / 2
+      INODE( 1 ) = I + 1
+      NDIML( 1 ) = I
+      NDIMR( 1 ) = N - I - 1
+      IL = 0
+      IR = 1
+      LLST = 1
+      DO 20 NLVL = 1, LVL - 1
+*
+*        Constructing the tree at (NLVL+1)-st level. The number of
+*        nodes created on this level is LLST * 2.
+*
+         DO 10 I = 0, LLST - 1
+            IL = IL + 2
+            IR = IR + 2
+            NCRNT = LLST + I
+            NDIML( IL ) = NDIML( NCRNT ) / 2
+            NDIMR( IL ) = NDIML( NCRNT ) - NDIML( IL ) - 1
+            INODE( IL ) = INODE( NCRNT ) - NDIMR( IL ) - 1
+            NDIML( IR ) = NDIMR( NCRNT ) / 2
+            NDIMR( IR ) = NDIMR( NCRNT ) - NDIML( IR ) - 1
+            INODE( IR ) = INODE( NCRNT ) + NDIML( IR ) + 1
+   10    CONTINUE
+         LLST = LLST*2
+   20 CONTINUE
+      ND = LLST*2 - 1
+*
+      RETURN
+*
+*     End of SLASDT
+*
+      END
diff --git a/costa/native/external/lapack/slaset.f b/costa/native/external/lapack/slaset.f
new file mode 100644
index 000000000..af596ecf9
--- /dev/null
+++ b/costa/native/external/lapack/slaset.f
@@ -0,0 +1,115 @@
+      SUBROUTINE SLASET( UPLO, M, N, ALPHA, BETA, A, LDA )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            LDA, M, N
+      REAL               ALPHA, BETA
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLASET initializes an m-by-n matrix A to BETA on the diagonal and
+*  ALPHA on the offdiagonals.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies the part of the matrix A to be set.
+*          = 'U':      Upper triangular part is set; the strictly lower
+*                      triangular part of A is not changed.
+*          = 'L':      Lower triangular part is set; the strictly upper
+*                      triangular part of A is not changed.
+*          Otherwise:  All of the matrix A is set.
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  ALPHA   (input) REAL
+*          The constant to which the offdiagonal elements are to be set.
+*
+*  BETA    (input) REAL
+*          The constant to which the diagonal elements are to be set.
+*
+*  A       (input/output) REAL array, dimension (LDA,N)
+*          On exit, the leading m-by-n submatrix of A is set as follows:
+*
+*          if UPLO = 'U', A(i,j) = ALPHA, 1<=i<=j-1, 1<=j<=n,
+*          if UPLO = 'L', A(i,j) = ALPHA, j+1<=i<=m, 1<=j<=n,
+*          otherwise,     A(i,j) = ALPHA, 1<=i<=m, 1<=j<=n, i.ne.j,
+*
+*          and, for all UPLO, A(i,i) = BETA, 1<=i<=min(m,n).
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+* =====================================================================
+*
+*     .. Local Scalars ..
+      INTEGER            I, J
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MIN
+*     ..
+*     .. Executable Statements ..
+*
+      IF( LSAME( UPLO, 'U' ) ) THEN
+*
+*        Set the strictly upper triangular or trapezoidal part of the
+*        array to ALPHA.
+*
+         DO 20 J = 2, N
+            DO 10 I = 1, MIN( J-1, M )
+               A( I, J ) = ALPHA
+   10       CONTINUE
+   20    CONTINUE
+*
+      ELSE IF( LSAME( UPLO, 'L' ) ) THEN
+*
+*        Set the strictly lower triangular or trapezoidal part of the
+*        array to ALPHA.
+*
+         DO 40 J = 1, MIN( M, N )
+            DO 30 I = J + 1, M
+               A( I, J ) = ALPHA
+   30       CONTINUE
+   40    CONTINUE
+*
+      ELSE
+*
+*        Set the leading m-by-n submatrix to ALPHA.
+*
+         DO 60 J = 1, N
+            DO 50 I = 1, M
+               A( I, J ) = ALPHA
+   50       CONTINUE
+   60    CONTINUE
+      END IF
+*
+*     Set the first min(M,N) diagonal elements to BETA.
+*
+      DO 70 I = 1, MIN( M, N )
+         A( I, I ) = BETA
+   70 CONTINUE
+*
+      RETURN
+*
+*     End of SLASET
+*
+      END
diff --git a/costa/native/external/lapack/slasq1.f b/costa/native/external/lapack/slasq1.f
new file mode 100644
index 000000000..4a119381d
--- /dev/null
+++ b/costa/native/external/lapack/slasq1.f
@@ -0,0 +1,149 @@
+      SUBROUTINE SLASQ1( N, D, E, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1999 
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, N
+*     ..
+*     .. Array Arguments ..
+      REAL               D( * ), E( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLASQ1 computes the singular values of a real N-by-N bidiagonal
+*  matrix with diagonal D and off-diagonal E. The singular values
+*  are computed to high relative accuracy, in the absence of
+*  denormalization, underflow and overflow. The algorithm was first
+*  presented in
+*
+*  "Accurate singular values and differential qd algorithms" by K. V.
+*  Fernando and B. N. Parlett, Numer. Math., Vol-67, No. 2, pp. 191-230,
+*  1994,
+*
+*  and the present implementation is described in "An implementation of
+*  the dqds Algorithm (Positive Case)", LAPACK Working Note.
+*
+*  Arguments
+*  =========
+*
+*  N     (input) INTEGER
+*        The number of rows and columns in the matrix. N >= 0.
+*
+*  D     (input/output) REAL array, dimension (N)
+*        On entry, D contains the diagonal elements of the
+*        bidiagonal matrix whose SVD is desired. On normal exit,
+*        D contains the singular values in decreasing order.
+*
+*  E     (input/output) REAL array, dimension (N)
+*        On entry, elements E(1:N-1) contain the off-diagonal elements
+*        of the bidiagonal matrix whose SVD is desired.
+*        On exit, E is overwritten.
+*
+*  WORK  (workspace) REAL array, dimension (4*N)
+*
+*  INFO  (output) INTEGER
+*        = 0: successful exit
+*        < 0: if INFO = -i, the i-th argument had an illegal value
+*        > 0: the algorithm failed
+*             = 1, a split was marked by a positive value in E
+*             = 2, current block of Z not diagonalized after 30*N
+*                  iterations (in inner while loop)
+*             = 3, termination criterion of outer while loop not met 
+*                  (program created more than N unreduced blocks)
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO
+      PARAMETER          ( ZERO = 0.0E0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, IINFO
+      REAL               EPS, SCALE, SAFMIN, SIGMN, SIGMX
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLAS2, SLASQ2, SLASRT, XERBLA
+*     ..
+*     .. External Functions ..
+      REAL               SLAMCH
+      EXTERNAL           SLAMCH
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+      IF( N.LT.0 ) THEN
+         INFO = -2
+         CALL XERBLA( 'SLASQ1', -INFO )
+         RETURN
+      ELSE IF( N.EQ.0 ) THEN
+         RETURN
+      ELSE IF( N.EQ.1 ) THEN
+         D( 1 ) = ABS( D( 1 ) )
+         RETURN
+      ELSE IF( N.EQ.2 ) THEN
+         CALL SLAS2( D( 1 ), E( 1 ), D( 2 ), SIGMN, SIGMX )
+         D( 1 ) = SIGMX
+         D( 2 ) = SIGMN
+         RETURN
+      END IF
+*
+*     Estimate the largest singular value.
+*
+      SIGMX = ZERO
+      DO 10 I = 1, N - 1
+         D( I ) = ABS( D( I ) )
+         SIGMX = MAX( SIGMX, ABS( E( I ) ) )
+   10 CONTINUE
+      D( N ) = ABS( D( N ) )
+*
+*     Early return if SIGMX is zero (matrix is already diagonal).
+*
+      IF( SIGMX.EQ.ZERO ) THEN
+         CALL SLASRT( 'D', N, D, IINFO )
+         RETURN
+      END IF
+*
+      DO 20 I = 1, N
+         SIGMX = MAX( SIGMX, D( I ) )
+   20 CONTINUE
+*
+*     Copy D and E into WORK (in the Z format) and scale (squaring the
+*     input data makes scaling by a power of the radix pointless).
+*
+      EPS = SLAMCH( 'Precision' )
+      SAFMIN = SLAMCH( 'Safe minimum' )
+      SCALE = SQRT( EPS / SAFMIN )
+      CALL SCOPY( N, D, 1, WORK( 1 ), 2 )
+      CALL SCOPY( N-1, E, 1, WORK( 2 ), 2 )
+      CALL SLASCL( 'G', 0, 0, SIGMX, SCALE, 2*N-1, 1, WORK, 2*N-1,
+     $             IINFO )
+*         
+*     Compute the q's and e's.
+*
+      DO 30 I = 1, 2*N - 1
+         WORK( I ) = WORK( I )**2
+   30 CONTINUE
+      WORK( 2*N ) = ZERO
+*
+      CALL SLASQ2( N, WORK, INFO )
+*
+      IF( INFO.EQ.0 ) THEN
+         DO 40 I = 1, N
+            D( I ) = SQRT( WORK( I ) )
+   40    CONTINUE
+         CALL SLASCL( 'G', 0, 0, SCALE, SIGMX, N, 1, D, N, IINFO )
+      END IF
+*
+      RETURN
+*
+*     End of SLASQ1
+*
+      END
diff --git a/costa/native/external/lapack/slasq2.f b/costa/native/external/lapack/slasq2.f
new file mode 100644
index 000000000..560826cbf
--- /dev/null
+++ b/costa/native/external/lapack/slasq2.f
@@ -0,0 +1,436 @@
+      SUBROUTINE SLASQ2( N, Z, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1999 
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, N
+*     ..
+*     .. Array Arguments ..
+      REAL               Z( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLASQ2 computes all the eigenvalues of the symmetric positive 
+*  definite tridiagonal matrix associated with the qd array Z to high
+*  relative accuracy are computed to high relative accuracy, in the
+*  absence of denormalization, underflow and overflow.
+*
+*  To see the relation of Z to the tridiagonal matrix, let L be a
+*  unit lower bidiagonal matrix with subdiagonals Z(2,4,6,,..) and
+*  let U be an upper bidiagonal matrix with 1's above and diagonal
+*  Z(1,3,5,,..). The tridiagonal is L*U or, if you prefer, the
+*  symmetric tridiagonal to which it is similar.
+*
+*  Note : SLASQ2 defines a logical variable, IEEE, which is true
+*  on machines which follow ieee-754 floating-point standard in their
+*  handling of infinities and NaNs, and false otherwise. This variable
+*  is passed to SLASQ3.
+*
+*  Arguments
+*  =========
+*
+*  N     (input) INTEGER
+*        The number of rows and columns in the matrix. N >= 0.
+*
+*  Z     (workspace) REAL array, dimension ( 4*N )
+*        On entry Z holds the qd array. On exit, entries 1 to N hold
+*        the eigenvalues in decreasing order, Z( 2*N+1 ) holds the
+*        trace, and Z( 2*N+2 ) holds the sum of the eigenvalues. If
+*        N > 2, then Z( 2*N+3 ) holds the iteration count, Z( 2*N+4 )
+*        holds NDIVS/NIN^2, and Z( 2*N+5 ) holds the percentage of
+*        shifts that failed.
+*
+*  INFO  (output) INTEGER
+*        = 0: successful exit
+*        < 0: if the i-th argument is a scalar and had an illegal
+*             value, then INFO = -i, if the i-th argument is an
+*             array and the j-entry had an illegal value, then
+*             INFO = -(i*100+j)
+*        > 0: the algorithm failed
+*              = 1, a split was marked by a positive value in E
+*              = 2, current block of Z not diagonalized after 30*N
+*                   iterations (in inner while loop)
+*              = 3, termination criterion of outer while loop not met 
+*                   (program created more than N unreduced blocks)
+*
+*  Further Details
+*  ===============
+*  Local Variables: I0:N0 defines a current unreduced segment of Z.
+*  The shifts are accumulated in SIGMA. Iteration count is in ITER.
+*  Ping-pong is controlled by PP (alternates between 0 and 1).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               CBIAS
+      PARAMETER          ( CBIAS = 1.50E0 )
+      REAL               ZERO, HALF, ONE, TWO, FOUR, HUNDRD
+      PARAMETER          ( ZERO = 0.0E0, HALF = 0.5E0, ONE = 1.0E0,
+     $                     TWO = 2.0E0, FOUR = 4.0E0, HUNDRD = 100.0E0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            IEEE
+      INTEGER            I0, I4, IINFO, IPN4, ITER, IWHILA, IWHILB, K, 
+     $                   N0, NBIG, NDIV, NFAIL, PP, SPLT
+      REAL               D, DESIG, DMIN, E, EMAX, EMIN, EPS, OLDEMN, 
+     $                   QMAX, QMIN, S, SAFMIN, SIGMA, T, TEMP, TOL, 
+     $                   TOL2, TRACE, ZMAX
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLASQ3, SLASRT, XERBLA
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      REAL               SLAMCH
+      EXTERNAL           ILAENV, SLAMCH
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN, REAL, SQRT
+*     ..
+*     .. Executable Statements ..
+*      
+*     Test the input arguments.
+*     (in case SLASQ2 is not called by SLASQ1)
+*
+      INFO = 0
+      EPS = SLAMCH( 'Precision' )
+      SAFMIN = SLAMCH( 'Safe minimum' )
+      TOL = EPS*HUNDRD
+      TOL2 = TOL**2
+*
+      IF( N.LT.0 ) THEN
+         INFO = -1
+         CALL XERBLA( 'SLASQ2', 1 )
+         RETURN
+      ELSE IF( N.EQ.0 ) THEN
+         RETURN
+      ELSE IF( N.EQ.1 ) THEN
+*
+*        1-by-1 case.
+*
+         IF( Z( 1 ).LT.ZERO ) THEN
+            INFO = -201
+            CALL XERBLA( 'SLASQ2', 2 )
+         END IF
+         RETURN
+      ELSE IF( N.EQ.2 ) THEN
+*
+*        2-by-2 case.
+*
+         IF( Z( 2 ).LT.ZERO .OR. Z( 3 ).LT.ZERO ) THEN
+            INFO = -2
+            CALL XERBLA( 'SLASQ2', 2 )
+            RETURN
+         ELSE IF( Z( 3 ).GT.Z( 1 ) ) THEN
+            D = Z( 3 )
+            Z( 3 ) = Z( 1 )
+            Z( 1 ) = D
+         END IF
+         Z( 5 ) = Z( 1 ) + Z( 2 ) + Z( 3 )
+         IF( Z( 2 ).GT.Z( 3 )*TOL2 ) THEN
+            T = HALF*( ( Z( 1 )-Z( 3 ) )+Z( 2 ) ) 
+            S = Z( 3 )*( Z( 2 ) / T )
+            IF( S.LE.T ) THEN
+               S = Z( 3 )*( Z( 2 ) / ( T*( ONE+SQRT( ONE+S / T ) ) ) )
+            ELSE
+               S = Z( 3 )*( Z( 2 ) / ( T+SQRT( T )*SQRT( T+S ) ) )
+            END IF
+            T = Z( 1 ) + ( S+Z( 2 ) )
+            Z( 3 ) = Z( 3 )*( Z( 1 ) / T )
+            Z( 1 ) = T
+         END IF
+         Z( 2 ) = Z( 3 )
+         Z( 6 ) = Z( 2 ) + Z( 1 )
+         RETURN
+      END IF
+*
+*     Check for negative data and compute sums of q's and e's.
+*
+      Z( 2*N ) = ZERO
+      EMIN = Z( 2 )
+      QMAX = ZERO
+      ZMAX = ZERO
+      D = ZERO
+      E = ZERO
+*
+      DO 10 K = 1, 2*( N-1 ), 2
+         IF( Z( K ).LT.ZERO ) THEN
+            INFO = -( 200+K )
+            CALL XERBLA( 'SLASQ2', 2 )
+            RETURN
+         ELSE IF( Z( K+1 ).LT.ZERO ) THEN
+            INFO = -( 200+K+1 )
+            CALL XERBLA( 'SLASQ2', 2 )
+            RETURN
+         END IF
+         D = D + Z( K )
+         E = E + Z( K+1 )
+         QMAX = MAX( QMAX, Z( K ) )
+         EMIN = MIN( EMIN, Z( K+1 ) )
+         ZMAX = MAX( QMAX, ZMAX, Z( K+1 ) )
+   10 CONTINUE
+      IF( Z( 2*N-1 ).LT.ZERO ) THEN
+         INFO = -( 200+2*N-1 )
+         CALL XERBLA( 'SLASQ2', 2 )
+         RETURN
+      END IF
+      D = D + Z( 2*N-1 )
+      QMAX = MAX( QMAX, Z( 2*N-1 ) )
+      ZMAX = MAX( QMAX, ZMAX )
+*
+*     Check for diagonality.
+*
+      IF( E.EQ.ZERO ) THEN
+         DO 20 K = 2, N
+            Z( K ) = Z( 2*K-1 )
+   20    CONTINUE
+         CALL SLASRT( 'D', N, Z, IINFO )
+         Z( 2*N-1 ) = D
+         RETURN
+      END IF
+*
+      TRACE = D + E
+*
+*     Check for zero data.
+*
+      IF( TRACE.EQ.ZERO ) THEN
+         Z( 2*N-1 ) = ZERO
+         RETURN
+      END IF
+*         
+*     Check whether the machine is IEEE conformable.
+*         
+      IEEE = ILAENV( 10, 'SLASQ2', 'N', 1, 2, 3, 4 ).EQ.1 .AND.
+     $       ILAENV( 11, 'SLASQ2', 'N', 1, 2, 3, 4 ).EQ.1      
+*         
+*     Rearrange data for locality: Z=(q1,qq1,e1,ee1,q2,qq2,e2,ee2,...).
+*
+      DO 30 K = 2*N, 2, -2
+         Z( 2*K ) = ZERO 
+         Z( 2*K-1 ) = Z( K ) 
+         Z( 2*K-2 ) = ZERO 
+         Z( 2*K-3 ) = Z( K-1 ) 
+   30 CONTINUE
+*
+      I0 = 1
+      N0 = N
+*
+*     Reverse the qd-array, if warranted.
+*
+      IF( CBIAS*Z( 4*I0-3 ).LT.Z( 4*N0-3 ) ) THEN
+         IPN4 = 4*( I0+N0 )
+         DO 40 I4 = 4*I0, 2*( I0+N0-1 ), 4
+            TEMP = Z( I4-3 )
+            Z( I4-3 ) = Z( IPN4-I4-3 )
+            Z( IPN4-I4-3 ) = TEMP
+            TEMP = Z( I4-1 )
+            Z( I4-1 ) = Z( IPN4-I4-5 )
+            Z( IPN4-I4-5 ) = TEMP
+   40    CONTINUE
+      END IF
+*
+*     Initial split checking via dqd and Li's test.
+*
+      PP = 0
+*
+      DO 80 K = 1, 2
+*
+         D = Z( 4*N0+PP-3 )
+         DO 50 I4 = 4*( N0-1 ) + PP, 4*I0 + PP, -4
+            IF( Z( I4-1 ).LE.TOL2*D ) THEN
+               Z( I4-1 ) = -ZERO
+               D = Z( I4-3 )
+            ELSE
+               D = Z( I4-3 )*( D / ( D+Z( I4-1 ) ) )
+            END IF
+   50    CONTINUE
+*
+*        dqd maps Z to ZZ plus Li's test.
+*
+         EMIN = Z( 4*I0+PP+1 )
+         D = Z( 4*I0+PP-3 )
+         DO 60 I4 = 4*I0 + PP, 4*( N0-1 ) + PP, 4
+            Z( I4-2*PP-2 ) = D + Z( I4-1 )
+            IF( Z( I4-1 ).LE.TOL2*D ) THEN
+               Z( I4-1 ) = -ZERO
+               Z( I4-2*PP-2 ) = D
+               Z( I4-2*PP ) = ZERO
+               D = Z( I4+1 )
+            ELSE IF( SAFMIN*Z( I4+1 ).LT.Z( I4-2*PP-2 ) .AND.
+     $               SAFMIN*Z( I4-2*PP-2 ).LT.Z( I4+1 ) ) THEN
+               TEMP = Z( I4+1 ) / Z( I4-2*PP-2 )
+               Z( I4-2*PP ) = Z( I4-1 )*TEMP
+               D = D*TEMP
+            ELSE
+               Z( I4-2*PP ) = Z( I4+1 )*( Z( I4-1 ) / Z( I4-2*PP-2 ) )
+               D = Z( I4+1 )*( D / Z( I4-2*PP-2 ) )
+            END IF
+            EMIN = MIN( EMIN, Z( I4-2*PP ) )
+   60    CONTINUE 
+         Z( 4*N0-PP-2 ) = D
+*
+*        Now find qmax.
+*
+         QMAX = Z( 4*I0-PP-2 )
+         DO 70 I4 = 4*I0 - PP + 2, 4*N0 - PP - 2, 4
+            QMAX = MAX( QMAX, Z( I4 ) )
+   70    CONTINUE
+*
+*        Prepare for the next iteration on K.
+*
+         PP = 1 - PP
+   80 CONTINUE
+*
+      ITER = 2
+      NFAIL = 0
+      NDIV = 2*( N0-I0 )
+*
+      DO 140 IWHILA = 1, N + 1
+         IF( N0.LT.1 ) 
+     $      GO TO 150
+*
+*        While array unfinished do 
+*
+*        E(N0) holds the value of SIGMA when submatrix in I0:N0
+*        splits from the rest of the array, but is negated.
+*      
+         DESIG = ZERO
+         IF( N0.EQ.N ) THEN
+            SIGMA = ZERO
+         ELSE
+            SIGMA = -Z( 4*N0-1 )
+         END IF
+         IF( SIGMA.LT.ZERO ) THEN
+            INFO = 1
+            RETURN
+         END IF
+*
+*        Find last unreduced submatrix's top index I0, find QMAX and
+*        EMIN. Find Gershgorin-type bound if Q's much greater than E's.
+*
+         EMAX = ZERO 
+         IF( N0.GT.I0 ) THEN
+            EMIN = ABS( Z( 4*N0-5 ) )
+         ELSE
+            EMIN = ZERO
+         END IF
+         QMIN = Z( 4*N0-3 )
+         QMAX = QMIN
+         DO 90 I4 = 4*N0, 8, -4
+            IF( Z( I4-5 ).LE.ZERO )
+     $         GO TO 100
+            IF( QMIN.GE.FOUR*EMAX ) THEN
+               QMIN = MIN( QMIN, Z( I4-3 ) )
+               EMAX = MAX( EMAX, Z( I4-5 ) )
+            END IF
+            QMAX = MAX( QMAX, Z( I4-7 )+Z( I4-5 ) )
+            EMIN = MIN( EMIN, Z( I4-5 ) )
+   90    CONTINUE
+         I4 = 4 
+*
+  100    CONTINUE
+         I0 = I4 / 4
+*
+*        Store EMIN for passing to SLASQ3.
+*
+         Z( 4*N0-1 ) = EMIN
+*
+*        Put -(initial shift) into DMIN.
+*
+         DMIN = -MAX( ZERO, QMIN-TWO*SQRT( QMIN )*SQRT( EMAX ) )
+*
+*        Now I0:N0 is unreduced. PP = 0 for ping, PP = 1 for pong.
+*
+         PP = 0 
+*
+         NBIG = 30*( N0-I0+1 )
+         DO 120 IWHILB = 1, NBIG
+            IF( I0.GT.N0 ) 
+     $         GO TO 130
+*
+*           While submatrix unfinished take a good dqds step.
+*
+            CALL SLASQ3( I0, N0, Z, PP, DMIN, SIGMA, DESIG, QMAX, NFAIL,
+     $                   ITER, NDIV, IEEE )
+*
+            PP = 1 - PP
+*
+*           When EMIN is very small check for splits.
+*
+            IF( PP.EQ.0 .AND. N0-I0.GE.3 ) THEN
+               IF( Z( 4*N0 ).LE.TOL2*QMAX .OR.
+     $             Z( 4*N0-1 ).LE.TOL2*SIGMA ) THEN
+                  SPLT = I0 - 1
+                  QMAX = Z( 4*I0-3 )
+                  EMIN = Z( 4*I0-1 )
+                  OLDEMN = Z( 4*I0 )
+                  DO 110 I4 = 4*I0, 4*( N0-3 ), 4
+                     IF( Z( I4 ).LE.TOL2*Z( I4-3 ) .OR.
+     $                   Z( I4-1 ).LE.TOL2*SIGMA ) THEN
+                        Z( I4-1 ) = -SIGMA
+                        SPLT = I4 / 4
+                        QMAX = ZERO
+                        EMIN = Z( I4+3 )
+                        OLDEMN = Z( I4+4 )
+                     ELSE
+                        QMAX = MAX( QMAX, Z( I4+1 ) )
+                        EMIN = MIN( EMIN, Z( I4-1 ) )
+                        OLDEMN = MIN( OLDEMN, Z( I4 ) )
+                     END IF
+  110             CONTINUE
+                  Z( 4*N0-1 ) = EMIN
+                  Z( 4*N0 ) = OLDEMN
+                  I0 = SPLT + 1
+               END IF
+            END IF
+*
+  120    CONTINUE
+*
+         INFO = 2
+         RETURN
+*
+*        end IWHILB
+*
+  130    CONTINUE
+*
+  140 CONTINUE
+*
+      INFO = 3
+      RETURN
+*
+*     end IWHILA   
+*
+  150 CONTINUE
+*      
+*     Move q's to the front.
+*      
+      DO 160 K = 2, N
+         Z( K ) = Z( 4*K-3 )
+  160 CONTINUE
+*      
+*     Sort and compute sum of eigenvalues.
+*
+      CALL SLASRT( 'D', N, Z, IINFO )
+*
+      E = ZERO
+      DO 170 K = N, 1, -1
+         E = E + Z( K )
+  170 CONTINUE
+*
+*     Store trace, sum(eigenvalues) and information on performance.
+*
+      Z( 2*N+1 ) = TRACE 
+      Z( 2*N+2 ) = E
+      Z( 2*N+3 ) = REAL( ITER )
+      Z( 2*N+4 ) = REAL( NDIV ) / REAL( N**2 )
+      Z( 2*N+5 ) = HUNDRD*NFAIL / REAL( ITER )
+      RETURN
+*
+*     End of SLASQ2
+*
+      END
diff --git a/costa/native/external/lapack/slasq3.f b/costa/native/external/lapack/slasq3.f
new file mode 100644
index 000000000..d7c495eb0
--- /dev/null
+++ b/costa/native/external/lapack/slasq3.f
@@ -0,0 +1,298 @@
+      SUBROUTINE SLASQ3( I0, N0, Z, PP, DMIN, SIGMA, DESIG, QMAX, NFAIL,
+     $                   ITER, NDIV, IEEE )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     May 17, 2000
+*
+*     .. Scalar Arguments ..
+      LOGICAL            IEEE
+      INTEGER            I0, ITER, N0, NDIV, NFAIL, PP
+      REAL               DESIG, DMIN, QMAX, SIGMA
+*     ..
+*     .. Array Arguments ..
+      REAL               Z( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLASQ3 checks for deflation, computes a shift (TAU) and calls dqds.
+*  In case of failure it changes shifts, and tries again until output
+*  is positive.
+*
+*  Arguments
+*  =========
+*
+*  I0     (input) INTEGER
+*         First index.
+*
+*  N0     (input) INTEGER
+*         Last index.
+*
+*  Z      (input) REAL array, dimension ( 4*N )
+*         Z holds the qd array.
+*
+*  PP     (input) INTEGER
+*         PP=0 for ping, PP=1 for pong.
+*
+*  DMIN   (output) REAL
+*         Minimum value of d.
+*
+*  SIGMA  (output) REAL
+*         Sum of shifts used in current segment.
+*
+*  DESIG  (input/output) REAL
+*         Lower order part of SIGMA
+*
+*  QMAX   (input) REAL
+*         Maximum value of q.
+*
+*  NFAIL  (output) INTEGER
+*         Number of times shift was too big.
+*
+*  ITER   (output) INTEGER
+*         Number of iterations.
+*
+*  NDIV   (output) INTEGER
+*         Number of divisions.
+*
+*  TTYPE  (output) INTEGER
+*         Shift type.
+*
+*  IEEE   (input) LOGICAL
+*         Flag for IEEE or non IEEE arithmetic (passed to SLASQ5).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               CBIAS
+      PARAMETER          ( CBIAS = 1.50E0 )
+      REAL               ZERO, QURTR, HALF, ONE, TWO, HUNDRD
+      PARAMETER          ( ZERO = 0.0E0, QURTR = 0.250E0, HALF = 0.5E0,
+     $                     ONE = 1.0E0, TWO = 2.0E0, HUNDRD = 100.0E0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            IPN4, J4, N0IN, NN, TTYPE
+      REAL               DMIN1, DMIN2, DN, DN1, DN2, EPS, S, SAFMIN, T,
+     $                   TAU, TEMP, TOL, TOL2
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLASQ4, SLASQ5, SLASQ6
+*     ..
+*     .. External Function ..
+      REAL               SLAMCH
+      EXTERNAL           SLAMCH
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MIN, SQRT
+*     ..
+*     .. Save statement ..
+      SAVE               TTYPE
+      SAVE               DMIN1, DMIN2, DN, DN1, DN2, TAU
+*     ..
+*     .. Data statement ..
+      DATA               TTYPE / 0 /
+      DATA               DMIN1 / ZERO /, DMIN2 / ZERO /, DN / ZERO /,
+     $                   DN1 / ZERO /, DN2 / ZERO /, TAU / ZERO /
+*     ..
+*     .. Executable Statements ..
+*
+      N0IN = N0
+      EPS = SLAMCH( 'Precision' )
+      SAFMIN = SLAMCH( 'Safe minimum' )
+      TOL = EPS*HUNDRD
+      TOL2 = TOL**2
+*
+*     Check for deflation.
+*
+   10 CONTINUE
+*
+      IF( N0.LT.I0 )
+     $   RETURN
+      IF( N0.EQ.I0 )
+     $   GO TO 20
+      NN = 4*N0 + PP
+      IF( N0.EQ.( I0+1 ) )
+     $   GO TO 40
+*
+*     Check whether E(N0-1) is negligible, 1 eigenvalue.
+*
+      IF( Z( NN-5 ).GT.TOL2*( SIGMA+Z( NN-3 ) ) .AND.
+     $    Z( NN-2*PP-4 ).GT.TOL2*Z( NN-7 ) )
+     $   GO TO 30
+*
+   20 CONTINUE
+*
+      Z( 4*N0-3 ) = Z( 4*N0+PP-3 ) + SIGMA
+      N0 = N0 - 1
+      GO TO 10
+*
+*     Check  whether E(N0-2) is negligible, 2 eigenvalues.
+*
+   30 CONTINUE
+*
+      IF( Z( NN-9 ).GT.TOL2*SIGMA .AND.
+     $    Z( NN-2*PP-8 ).GT.TOL2*Z( NN-11 ) )
+     $   GO TO 50
+*
+   40 CONTINUE
+*
+      IF( Z( NN-3 ).GT.Z( NN-7 ) ) THEN
+         S = Z( NN-3 )
+         Z( NN-3 ) = Z( NN-7 )
+         Z( NN-7 ) = S
+      END IF
+      IF( Z( NN-5 ).GT.Z( NN-3 )*TOL2 ) THEN
+         T = HALF*( ( Z( NN-7 )-Z( NN-3 ) )+Z( NN-5 ) )
+         S = Z( NN-3 )*( Z( NN-5 ) / T )
+         IF( S.LE.T ) THEN
+            S = Z( NN-3 )*( Z( NN-5 ) /
+     $          ( T*( ONE+SQRT( ONE+S / T ) ) ) )
+         ELSE
+            S = Z( NN-3 )*( Z( NN-5 ) / ( T+SQRT( T )*SQRT( T+S ) ) )
+         END IF
+         T = Z( NN-7 ) + ( S+Z( NN-5 ) )
+         Z( NN-3 ) = Z( NN-3 )*( Z( NN-7 ) / T )
+         Z( NN-7 ) = T
+      END IF
+      Z( 4*N0-7 ) = Z( NN-7 ) + SIGMA
+      Z( 4*N0-3 ) = Z( NN-3 ) + SIGMA
+      N0 = N0 - 2
+      GO TO 10
+*
+   50 CONTINUE
+*
+*     Reverse the qd-array, if warranted.
+*
+      IF( DMIN.LE.ZERO .OR. N0.LT.N0IN ) THEN
+         IF( CBIAS*Z( 4*I0+PP-3 ).LT.Z( 4*N0+PP-3 ) ) THEN
+            IPN4 = 4*( I0+N0 )
+            DO 60 J4 = 4*I0, 2*( I0+N0-1 ), 4
+               TEMP = Z( J4-3 )
+               Z( J4-3 ) = Z( IPN4-J4-3 )
+               Z( IPN4-J4-3 ) = TEMP
+               TEMP = Z( J4-2 )
+               Z( J4-2 ) = Z( IPN4-J4-2 )
+               Z( IPN4-J4-2 ) = TEMP
+               TEMP = Z( J4-1 )
+               Z( J4-1 ) = Z( IPN4-J4-5 )
+               Z( IPN4-J4-5 ) = TEMP
+               TEMP = Z( J4 )
+               Z( J4 ) = Z( IPN4-J4-4 )
+               Z( IPN4-J4-4 ) = TEMP
+   60       CONTINUE
+            IF( N0-I0.LE.4 ) THEN
+               Z( 4*N0+PP-1 ) = Z( 4*I0+PP-1 )
+               Z( 4*N0-PP ) = Z( 4*I0-PP )
+            END IF
+            DMIN2 = MIN( DMIN2, Z( 4*N0+PP-1 ) )
+            Z( 4*N0+PP-1 ) = MIN( Z( 4*N0+PP-1 ), Z( 4*I0+PP-1 ),
+     $                            Z( 4*I0+PP+3 ) )
+            Z( 4*N0-PP ) = MIN( Z( 4*N0-PP ), Z( 4*I0-PP ),
+     $                          Z( 4*I0-PP+4 ) )
+            QMAX = MAX( QMAX, Z( 4*I0+PP-3 ), Z( 4*I0+PP+1 ) )
+            DMIN = -ZERO
+         END IF
+      END IF
+*
+   70 CONTINUE
+*
+      IF( DMIN.LT.ZERO .OR. SAFMIN*QMAX.LT.MIN( Z( 4*N0+PP-1 ),
+     $    Z( 4*N0+PP-9 ), DMIN2+Z( 4*N0-PP ) ) ) THEN
+*
+*        Choose a shift.
+*
+         CALL SLASQ4( I0, N0, Z, PP, N0IN, DMIN, DMIN1, DMIN2, DN, DN1,
+     $                DN2, TAU, TTYPE )
+*
+*        Call dqds until DMIN > 0.
+*
+   80    CONTINUE
+*
+         CALL SLASQ5( I0, N0, Z, PP, TAU, DMIN, DMIN1, DMIN2, DN,
+     $                DN1, DN2, IEEE )
+*
+         NDIV = NDIV + ( N0-I0+2 )
+         ITER = ITER + 1
+*
+*        Check status.
+*
+         IF( DMIN.GE.ZERO .AND. DMIN1.GT.ZERO ) THEN
+*
+*           Success.
+*
+            GO TO 100
+*
+         ELSE IF( DMIN.LT.ZERO .AND. DMIN1.GT.ZERO .AND.
+     $            Z( 4*( N0-1 )-PP ).LT.TOL*( SIGMA+DN1 ) .AND.
+     $            ABS( DN ).LT.TOL*SIGMA ) THEN
+*
+*           Convergence hidden by negative DN.
+*
+            Z( 4*( N0-1 )-PP+2 ) = ZERO
+            DMIN = ZERO
+            GO TO 100
+         ELSE IF( DMIN.LT.ZERO ) THEN
+*
+*           TAU too big. Select new TAU and try again.
+*
+            NFAIL = NFAIL + 1
+            IF( TTYPE.LT.-22 ) THEN
+*
+*              Failed twice. Play it safe.
+*
+               TAU = ZERO
+            ELSE IF( DMIN1.GT.ZERO ) THEN
+*
+*              Late failure. Gives excellent shift.
+*
+               TAU = ( TAU+DMIN )*( ONE-TWO*EPS )
+               TTYPE = TTYPE - 11
+            ELSE
+*
+*              Early failure. Divide by 4.
+*
+               TAU = QURTR*TAU
+               TTYPE = TTYPE - 12
+            END IF
+            GO TO 80
+         ELSE IF( DMIN.NE.DMIN ) THEN
+*
+*           NaN.
+*
+            TAU = ZERO
+            GO TO 80
+         ELSE
+*
+*           Possible underflow. Play it safe.
+*
+            GO TO 90
+         END IF
+      END IF
+*
+*     Risk of underflow.
+*
+   90 CONTINUE
+      CALL SLASQ6( I0, N0, Z, PP, DMIN, DMIN1, DMIN2, DN, DN1, DN2 )
+      NDIV = NDIV + ( N0-I0+2 )
+      ITER = ITER + 1
+      TAU = ZERO
+*
+  100 CONTINUE
+      IF( TAU.LT.SIGMA ) THEN
+         DESIG = DESIG + TAU
+         T = SIGMA + DESIG
+         DESIG = DESIG - ( T-SIGMA )
+      ELSE
+         T = SIGMA + TAU
+         DESIG = SIGMA - ( T-TAU ) + DESIG
+      END IF
+      SIGMA = T
+*
+      RETURN
+*
+*     End of SLASQ3
+*
+      END
diff --git a/costa/native/external/lapack/slasq4.f b/costa/native/external/lapack/slasq4.f
new file mode 100644
index 000000000..2e7d3ece8
--- /dev/null
+++ b/costa/native/external/lapack/slasq4.f
@@ -0,0 +1,330 @@
+      SUBROUTINE SLASQ4( I0, N0, Z, PP, N0IN, DMIN, DMIN1, DMIN2, DN,
+     $                   DN1, DN2, TAU, TTYPE )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            I0, N0, N0IN, PP, TTYPE
+      REAL               DMIN, DMIN1, DMIN2, DN, DN1, DN2, TAU
+*     ..
+*     .. Array Arguments ..
+      REAL               Z( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLASQ4 computes an approximation TAU to the smallest eigenvalue 
+*  using values of d from the previous transform.
+*
+*  I0    (input) INTEGER
+*        First index.
+*
+*  N0    (input) INTEGER
+*        Last index.
+*
+*  Z     (input) REAL array, dimension ( 4*N )
+*        Z holds the qd array.
+*
+*  PP    (input) INTEGER
+*        PP=0 for ping, PP=1 for pong.
+*
+*  NOIN  (input) INTEGER
+*        The value of N0 at start of EIGTEST.
+*
+*  DMIN  (input) REAL
+*        Minimum value of d.
+*
+*  DMIN1 (input) REAL
+*        Minimum value of d, excluding D( N0 ).
+*
+*  DMIN2 (input) REAL
+*        Minimum value of d, excluding D( N0 ) and D( N0-1 ).
+*
+*  DN    (input) REAL
+*        d(N)
+*
+*  DN1   (input) REAL
+*        d(N-1)
+*
+*  DN2   (input) REAL
+*        d(N-2)
+*
+*  TAU   (output) REAL
+*        This is the shift.
+*
+*  TTYPE (output) INTEGER
+*        Shift type.
+*
+*  Further Details
+*  ===============
+*  CNST1 = 9/16
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               CNST1, CNST2, CNST3
+      PARAMETER          ( CNST1 = 0.5630E0, CNST2 = 1.010E0,
+     $                   CNST3 = 1.050E0 )
+      REAL               QURTR, THIRD, HALF, ZERO, ONE, TWO, HUNDRD
+      PARAMETER          ( QURTR = 0.250E0, THIRD = 0.3330E0,
+     $                   HALF = 0.50E0, ZERO = 0.0E0, ONE = 1.0E0,
+     $                   TWO = 2.0E0, HUNDRD = 100.0E0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I4, NN, NP
+      REAL               A2, B1, B2, G, GAM, GAP1, GAP2, S
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN, SQRT
+*     ..
+*     .. Save statement ..
+      SAVE               G
+*     ..
+*     .. Data statement ..
+      DATA               G / ZERO /
+*     ..
+*     .. Executable Statements ..
+*
+*     A negative DMIN forces the shift to take that absolute value
+*     TTYPE records the type of shift.
+*
+      IF( DMIN.LE.ZERO ) THEN
+         TAU = -DMIN
+         TTYPE = -1
+         RETURN
+      END IF
+*       
+      NN = 4*N0 + PP
+      IF( N0IN.EQ.N0 ) THEN
+*
+*        No eigenvalues deflated.
+*
+         IF( DMIN.EQ.DN .OR. DMIN.EQ.DN1 ) THEN
+*
+            B1 = SQRT( Z( NN-3 ) )*SQRT( Z( NN-5 ) )
+            B2 = SQRT( Z( NN-7 ) )*SQRT( Z( NN-9 ) )
+            A2 = Z( NN-7 ) + Z( NN-5 )
+*
+*           Cases 2 and 3.
+*
+            IF( DMIN.EQ.DN .AND. DMIN1.EQ.DN1 ) THEN
+               GAP2 = DMIN2 - A2 - DMIN2*QURTR
+               IF( GAP2.GT.ZERO .AND. GAP2.GT.B2 ) THEN
+                  GAP1 = A2 - DN - ( B2 / GAP2 )*B2
+               ELSE
+                  GAP1 = A2 - DN - ( B1+B2 )
+               END IF
+               IF( GAP1.GT.ZERO .AND. GAP1.GT.B1 ) THEN
+                  S = MAX( DN-( B1 / GAP1 )*B1, HALF*DMIN )
+                  TTYPE = -2
+               ELSE
+                  S = ZERO
+                  IF( DN.GT.B1 )
+     $               S = DN - B1
+                  IF( A2.GT.( B1+B2 ) )
+     $               S = MIN( S, A2-( B1+B2 ) )
+                  S = MAX( S, THIRD*DMIN )
+                  TTYPE = -3
+               END IF
+            ELSE
+*
+*              Case 4.
+*
+               TTYPE = -4
+               S = QURTR*DMIN
+               IF( DMIN.EQ.DN ) THEN
+                  GAM = DN
+                  A2 = ZERO
+                  IF( Z( NN-5 ) .GT. Z( NN-7 ) )
+     $               RETURN
+                  B2 = Z( NN-5 ) / Z( NN-7 )
+                  NP = NN - 9
+               ELSE
+                  NP = NN - 2*PP
+                  B2 = Z( NP-2 )
+                  GAM = DN1
+                  IF( Z( NP-4 ) .GT. Z( NP-2 ) )
+     $               RETURN
+                  A2 = Z( NP-4 ) / Z( NP-2 )
+                  IF( Z( NN-9 ) .GT. Z( NN-11 ) )
+     $               RETURN
+                  B2 = Z( NN-9 ) / Z( NN-11 )
+                  NP = NN - 13
+               END IF
+*
+*              Approximate contribution to norm squared from I < NN-1.
+*
+               A2 = A2 + B2
+               DO 10 I4 = NP, 4*I0 - 1 + PP, -4
+                  IF( B2.EQ.ZERO )
+     $               GO TO 20
+                  B1 = B2
+                  IF( Z( I4 ) .GT. Z( I4-2 ) )
+     $               RETURN
+                  B2 = B2*( Z( I4 ) / Z( I4-2 ) )
+                  A2 = A2 + B2
+                  IF( HUNDRD*MAX( B2, B1 ).LT.A2 .OR. CNST1.LT.A2 ) 
+     $               GO TO 20
+   10          CONTINUE
+   20          CONTINUE
+               A2 = CNST3*A2
+*
+*              Rayleigh quotient residual bound.
+*
+               IF( A2.LT.CNST1 )
+     $            S = GAM*( ONE-SQRT( A2 ) ) / ( ONE+A2 )
+            END IF
+         ELSE IF( DMIN.EQ.DN2 ) THEN
+*
+*           Case 5.
+*
+            TTYPE = -5
+            S = QURTR*DMIN
+*
+*           Compute contribution to norm squared from I > NN-2.
+*
+            NP = NN - 2*PP
+            B1 = Z( NP-2 )
+            B2 = Z( NP-6 )
+            GAM = DN2
+            IF( Z( NP-8 ).GT.B2 .OR. Z( NP-4 ).GT.B1 )
+     $         RETURN
+            A2 = ( Z( NP-8 ) / B2 )*( ONE+Z( NP-4 ) / B1 )
+*
+*           Approximate contribution to norm squared from I < NN-2.
+*
+            IF( N0-I0.GT.2 ) THEN
+               B2 = Z( NN-13 ) / Z( NN-15 )
+               A2 = A2 + B2
+               DO 30 I4 = NN - 17, 4*I0 - 1 + PP, -4
+                  IF( B2.EQ.ZERO )
+     $               GO TO 40
+                  B1 = B2
+                  IF( Z( I4 ) .GT. Z( I4-2 ) )
+     $               RETURN
+                  B2 = B2*( Z( I4 ) / Z( I4-2 ) )
+                  A2 = A2 + B2
+                  IF( HUNDRD*MAX( B2, B1 ).LT.A2 .OR. CNST1.LT.A2 ) 
+     $               GO TO 40
+   30          CONTINUE
+   40          CONTINUE
+               A2 = CNST3*A2
+            END IF
+*
+            IF( A2.LT.CNST1 )
+     $         S = GAM*( ONE-SQRT( A2 ) ) / ( ONE+A2 )
+         ELSE
+*
+*           Case 6, no information to guide us.
+*
+            IF( TTYPE.EQ.-6 ) THEN
+               G = G + THIRD*( ONE-G )
+            ELSE IF( TTYPE.EQ.-18 ) THEN
+               G = QURTR*THIRD
+            ELSE
+               G = QURTR
+            END IF
+            S = G*DMIN
+            TTYPE = -6
+         END IF
+*
+      ELSE IF( N0IN.EQ.( N0+1 ) ) THEN
+*
+*        One eigenvalue just deflated. Use DMIN1, DN1 for DMIN and DN.
+*
+         IF( DMIN1.EQ.DN1 .AND. DMIN2.EQ.DN2 ) THEN 
+*
+*           Cases 7 and 8.
+*
+            TTYPE = -7
+            S = THIRD*DMIN1
+            IF( Z( NN-5 ).GT.Z( NN-7 ) )
+     $         RETURN
+            B1 = Z( NN-5 ) / Z( NN-7 )
+            B2 = B1
+            IF( B2.EQ.ZERO )
+     $         GO TO 60
+            DO 50 I4 = 4*N0 - 9 + PP, 4*I0 - 1 + PP, -4
+               A2 = B1
+               IF( Z( I4 ).GT.Z( I4-2 ) )
+     $            RETURN
+               B1 = B1*( Z( I4 ) / Z( I4-2 ) )
+               B2 = B2 + B1
+               IF( HUNDRD*MAX( B1, A2 ).LT.B2 ) 
+     $            GO TO 60
+   50       CONTINUE
+   60       CONTINUE
+            B2 = SQRT( CNST3*B2 )
+            A2 = DMIN1 / ( ONE+B2**2 )
+            GAP2 = HALF*DMIN2 - A2
+            IF( GAP2.GT.ZERO .AND. GAP2.GT.B2*A2 ) THEN
+               S = MAX( S, A2*( ONE-CNST2*A2*( B2 / GAP2 )*B2 ) )
+            ELSE 
+               S = MAX( S, A2*( ONE-CNST2*B2 ) )
+               TTYPE = -8
+            END IF
+         ELSE
+*
+*           Case 9.
+*
+            S = QURTR*DMIN1
+            IF( DMIN1.EQ.DN1 )
+     $         S = HALF*DMIN1
+            TTYPE = -9
+         END IF
+*
+      ELSE IF( N0IN.EQ.( N0+2 ) ) THEN
+*
+*        Two eigenvalues deflated. Use DMIN2, DN2 for DMIN and DN.
+*
+*        Cases 10 and 11.
+*
+         IF( DMIN2.EQ.DN2 .AND. TWO*Z( NN-5 ).LT.Z( NN-7 ) ) THEN 
+            TTYPE = -10
+            S = THIRD*DMIN2
+            IF( Z( NN-5 ).GT.Z( NN-7 ) )
+     $         RETURN
+            B1 = Z( NN-5 ) / Z( NN-7 )
+            B2 = B1
+            IF( B2.EQ.ZERO )
+     $         GO TO 80
+            DO 70 I4 = 4*N0 - 9 + PP, 4*I0 - 1 + PP, -4
+               IF( Z( I4 ).GT.Z( I4-2 ) )
+     $            RETURN
+               B1 = B1*( Z( I4 ) / Z( I4-2 ) )
+               B2 = B2 + B1
+               IF( HUNDRD*B1.LT.B2 )
+     $            GO TO 80
+   70       CONTINUE
+   80       CONTINUE
+            B2 = SQRT( CNST3*B2 )
+            A2 = DMIN2 / ( ONE+B2**2 )
+            GAP2 = Z( NN-7 ) + Z( NN-9 ) -
+     $             SQRT( Z( NN-11 ) )*SQRT( Z( NN-9 ) ) - A2
+            IF( GAP2.GT.ZERO .AND. GAP2.GT.B2*A2 ) THEN
+               S = MAX( S, A2*( ONE-CNST2*A2*( B2 / GAP2 )*B2 ) )
+            ELSE 
+               S = MAX( S, A2*( ONE-CNST2*B2 ) )
+            END IF
+         ELSE
+            S = QURTR*DMIN2
+            TTYPE = -11
+         END IF
+      ELSE IF( N0IN.GT.( N0+2 ) ) THEN
+*
+*        Case 12, more than two eigenvalues deflated. No information.
+*
+         S = ZERO 
+         TTYPE = -12
+      END IF
+*
+      TAU = S
+      RETURN
+*
+*     End of SLASQ4
+*
+      END
diff --git a/costa/native/external/lapack/slasq5.f b/costa/native/external/lapack/slasq5.f
new file mode 100644
index 000000000..7e5e00719
--- /dev/null
+++ b/costa/native/external/lapack/slasq5.f
@@ -0,0 +1,196 @@
+      SUBROUTINE SLASQ5( I0, N0, Z, PP, TAU, DMIN, DMIN1, DMIN2, DN,
+     $                   DNM1, DNM2, IEEE )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     May 17, 2000
+*
+*     .. Scalar Arguments ..
+      LOGICAL            IEEE
+      INTEGER            I0, N0, PP
+      REAL               DMIN, DMIN1, DMIN2, DN, DNM1, DNM2, TAU
+*     ..
+*     .. Array Arguments ..
+      REAL               Z( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLASQ5 computes one dqds transform in ping-pong form, one
+*  version for IEEE machines another for non IEEE machines.
+*
+*  Arguments
+*  =========
+*
+*  I0    (input) INTEGER
+*        First index.
+*
+*  N0    (input) INTEGER
+*        Last index.
+*
+*  Z     (input) REAL array, dimension ( 4*N )
+*        Z holds the qd array. EMIN is stored in Z(4*N0) to avoid
+*        an extra argument.
+*
+*  PP    (input) INTEGER
+*        PP=0 for ping, PP=1 for pong.
+*
+*  TAU   (input) REAL
+*        This is the shift.
+*
+*  DMIN  (output) REAL
+*        Minimum value of d.
+*
+*  DMIN1 (output) REAL
+*        Minimum value of d, excluding D( N0 ).
+*
+*  DMIN2 (output) REAL
+*        Minimum value of d, excluding D( N0 ) and D( N0-1 ).
+*
+*  DN    (output) REAL
+*        d(N0), the last value of d.
+*
+*  DNM1  (output) REAL
+*        d(N0-1).
+*
+*  DNM2  (output) REAL
+*        d(N0-2).
+*
+*  IEEE  (input) LOGICAL
+*        Flag for IEEE or non IEEE arithmetic.
+*
+*  =====================================================================
+*
+*     .. Parameter ..
+      REAL               ZERO
+      PARAMETER          ( ZERO = 0.0E0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            J4, J4P2
+      REAL               D, EMIN, TEMP
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MIN
+*     ..
+*     .. Executable Statements ..
+*
+      IF( ( N0-I0-1 ).LE.0 )
+     $   RETURN
+*
+      J4 = 4*I0 + PP - 3
+      EMIN = Z( J4+4 )
+      D = Z( J4 ) - TAU
+      DMIN = D
+      DMIN1 = -Z( J4 )
+*
+      IF( IEEE ) THEN
+*
+*        Code for IEEE arithmetic.
+*
+         IF( PP.EQ.0 ) THEN
+            DO 10 J4 = 4*I0, 4*( N0-3 ), 4
+               Z( J4-2 ) = D + Z( J4-1 )
+               TEMP = Z( J4+1 ) / Z( J4-2 )
+               D = D*TEMP - TAU
+               DMIN = MIN( DMIN, D )
+               Z( J4 ) = Z( J4-1 )*TEMP
+               EMIN = MIN( Z( J4 ), EMIN )
+   10       CONTINUE
+         ELSE
+            DO 20 J4 = 4*I0, 4*( N0-3 ), 4
+               Z( J4-3 ) = D + Z( J4 )
+               TEMP = Z( J4+2 ) / Z( J4-3 )
+               D = D*TEMP - TAU
+               DMIN = MIN( DMIN, D )
+               Z( J4-1 ) = Z( J4 )*TEMP
+               EMIN = MIN( Z( J4-1 ), EMIN )
+   20       CONTINUE
+         END IF
+*
+*        Unroll last two steps.
+*
+         DNM2 = D
+         DMIN2 = DMIN
+         J4 = 4*( N0-2 ) - PP
+         J4P2 = J4 + 2*PP - 1
+         Z( J4-2 ) = DNM2 + Z( J4P2 )
+         Z( J4 ) = Z( J4P2+2 )*( Z( J4P2 ) / Z( J4-2 ) )
+         DNM1 = Z( J4P2+2 )*( DNM2 / Z( J4-2 ) ) - TAU
+         DMIN = MIN( DMIN, DNM1 )
+*
+         DMIN1 = DMIN
+         J4 = J4 + 4
+         J4P2 = J4 + 2*PP - 1
+         Z( J4-2 ) = DNM1 + Z( J4P2 )
+         Z( J4 ) = Z( J4P2+2 )*( Z( J4P2 ) / Z( J4-2 ) )
+         DN = Z( J4P2+2 )*( DNM1 / Z( J4-2 ) ) - TAU
+         DMIN = MIN( DMIN, DN )
+*
+      ELSE
+*
+*        Code for non IEEE arithmetic.
+*
+         IF( PP.EQ.0 ) THEN
+            DO 30 J4 = 4*I0, 4*( N0-3 ), 4
+               Z( J4-2 ) = D + Z( J4-1 )
+               IF( D.LT.ZERO ) THEN
+                  RETURN
+               ELSE
+                  Z( J4 ) = Z( J4+1 )*( Z( J4-1 ) / Z( J4-2 ) )
+                  D = Z( J4+1 )*( D / Z( J4-2 ) ) - TAU
+               END IF
+               DMIN = MIN( DMIN, D )
+               EMIN = MIN( EMIN, Z( J4 ) )
+   30       CONTINUE
+         ELSE
+            DO 40 J4 = 4*I0, 4*( N0-3 ), 4
+               Z( J4-3 ) = D + Z( J4 )
+               IF( D.LT.ZERO ) THEN
+                  RETURN
+               ELSE
+                  Z( J4-1 ) = Z( J4+2 )*( Z( J4 ) / Z( J4-3 ) )
+                  D = Z( J4+2 )*( D / Z( J4-3 ) ) - TAU
+               END IF
+               DMIN = MIN( DMIN, D )
+               EMIN = MIN( EMIN, Z( J4-1 ) )
+   40       CONTINUE
+         END IF
+*
+*        Unroll last two steps.
+*
+         DNM2 = D
+         DMIN2 = DMIN
+         J4 = 4*( N0-2 ) - PP
+         J4P2 = J4 + 2*PP - 1
+         Z( J4-2 ) = DNM2 + Z( J4P2 )
+         IF( DNM2.LT.ZERO ) THEN
+            RETURN
+         ELSE
+            Z( J4 ) = Z( J4P2+2 )*( Z( J4P2 ) / Z( J4-2 ) )
+            DNM1 = Z( J4P2+2 )*( DNM2 / Z( J4-2 ) ) - TAU
+         END IF
+         DMIN = MIN( DMIN, DNM1 )
+*
+         DMIN1 = DMIN
+         J4 = J4 + 4
+         J4P2 = J4 + 2*PP - 1
+         Z( J4-2 ) = DNM1 + Z( J4P2 )
+         IF( DNM1.LT.ZERO ) THEN
+            RETURN
+         ELSE
+            Z( J4 ) = Z( J4P2+2 )*( Z( J4P2 ) / Z( J4-2 ) )
+            DN = Z( J4P2+2 )*( DNM1 / Z( J4-2 ) ) - TAU
+         END IF
+         DMIN = MIN( DMIN, DN )
+*
+      END IF
+*
+      Z( J4+2 ) = DN
+      Z( 4*N0-PP ) = EMIN
+      RETURN
+*
+*     End of SLASQ5
+*
+      END
diff --git a/costa/native/external/lapack/slasq6.f b/costa/native/external/lapack/slasq6.f
new file mode 100644
index 000000000..1543a3c79
--- /dev/null
+++ b/costa/native/external/lapack/slasq6.f
@@ -0,0 +1,176 @@
+      SUBROUTINE SLASQ6( I0, N0, Z, PP, DMIN, DMIN1, DMIN2, DN,
+     $                   DNM1, DNM2 )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            I0, N0, PP
+      REAL               DMIN, DMIN1, DMIN2, DN, DNM1, DNM2
+*     ..
+*     .. Array Arguments ..
+      REAL               Z( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLASQ6 computes one dqd (shift equal to zero) transform in
+*  ping-pong form, with protection against underflow and overflow.
+*
+*  Arguments
+*  =========
+*
+*  I0    (input) INTEGER
+*        First index.
+*
+*  N0    (input) INTEGER
+*        Last index.
+*
+*  Z     (input) REAL array, dimension ( 4*N )
+*        Z holds the qd array. EMIN is stored in Z(4*N0) to avoid
+*        an extra argument.
+*
+*  PP    (input) INTEGER
+*        PP=0 for ping, PP=1 for pong.
+*
+*  DMIN  (output) REAL
+*        Minimum value of d.
+*
+*  DMIN1 (output) REAL
+*        Minimum value of d, excluding D( N0 ).
+*
+*  DMIN2 (output) REAL
+*        Minimum value of d, excluding D( N0 ) and D( N0-1 ).
+*
+*  DN    (output) REAL
+*        d(N0), the last value of d.
+*
+*  DNM1  (output) REAL
+*        d(N0-1).
+*
+*  DNM2  (output) REAL
+*        d(N0-2).
+*
+*  =====================================================================
+*
+*     .. Parameter ..
+      REAL               ZERO
+      PARAMETER          ( ZERO = 0.0E0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            J4, J4P2
+      REAL               D, EMIN, SAFMIN, TEMP
+*     ..
+*     .. External Function ..
+      REAL               SLAMCH
+      EXTERNAL           SLAMCH
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MIN
+*     ..
+*     .. Executable Statements ..
+*
+      IF( ( N0-I0-1 ).LE.0 )
+     $   RETURN
+*
+      SAFMIN = SLAMCH( 'Safe minimum' )
+      J4 = 4*I0 + PP - 3
+      EMIN = Z( J4+4 ) 
+      D = Z( J4 )
+      DMIN = D
+*
+      IF( PP.EQ.0 ) THEN
+         DO 10 J4 = 4*I0, 4*( N0-3 ), 4
+            Z( J4-2 ) = D + Z( J4-1 ) 
+            IF( Z( J4-2 ).EQ.ZERO ) THEN
+               Z( J4 ) = ZERO
+               D = Z( J4+1 )
+               DMIN = D
+               EMIN = ZERO
+            ELSE IF( SAFMIN*Z( J4+1 ).LT.Z( J4-2 ) .AND.
+     $               SAFMIN*Z( J4-2 ).LT.Z( J4+1 ) ) THEN
+               TEMP = Z( J4+1 ) / Z( J4-2 )
+               Z( J4 ) = Z( J4-1 )*TEMP
+               D = D*TEMP
+            ELSE 
+               Z( J4 ) = Z( J4+1 )*( Z( J4-1 ) / Z( J4-2 ) )
+               D = Z( J4+1 )*( D / Z( J4-2 ) )
+            END IF
+            DMIN = MIN( DMIN, D )
+            EMIN = MIN( EMIN, Z( J4 ) )
+   10    CONTINUE
+      ELSE
+         DO 20 J4 = 4*I0, 4*( N0-3 ), 4
+            Z( J4-3 ) = D + Z( J4 ) 
+            IF( Z( J4-3 ).EQ.ZERO ) THEN
+               Z( J4-1 ) = ZERO
+               D = Z( J4+2 )
+               DMIN = D
+               EMIN = ZERO
+            ELSE IF( SAFMIN*Z( J4+2 ).LT.Z( J4-3 ) .AND.
+     $               SAFMIN*Z( J4-3 ).LT.Z( J4+2 ) ) THEN
+               TEMP = Z( J4+2 ) / Z( J4-3 )
+               Z( J4-1 ) = Z( J4 )*TEMP
+               D = D*TEMP
+            ELSE 
+               Z( J4-1 ) = Z( J4+2 )*( Z( J4 ) / Z( J4-3 ) )
+               D = Z( J4+2 )*( D / Z( J4-3 ) )
+            END IF
+            DMIN = MIN( DMIN, D )
+            EMIN = MIN( EMIN, Z( J4-1 ) )
+   20    CONTINUE
+      END IF
+*
+*     Unroll last two steps. 
+*
+      DNM2 = D
+      DMIN2 = DMIN
+      J4 = 4*( N0-2 ) - PP
+      J4P2 = J4 + 2*PP - 1
+      Z( J4-2 ) = DNM2 + Z( J4P2 )
+      IF( Z( J4-2 ).EQ.ZERO ) THEN
+         Z( J4 ) = ZERO
+         DNM1 = Z( J4P2+2 )
+         DMIN = DNM1
+         EMIN = ZERO
+      ELSE IF( SAFMIN*Z( J4P2+2 ).LT.Z( J4-2 ) .AND.
+     $         SAFMIN*Z( J4-2 ).LT.Z( J4P2+2 ) ) THEN
+         TEMP = Z( J4P2+2 ) / Z( J4-2 )
+         Z( J4 ) = Z( J4P2 )*TEMP
+         DNM1 = DNM2*TEMP
+      ELSE
+         Z( J4 ) = Z( J4P2+2 )*( Z( J4P2 ) / Z( J4-2 ) )
+         DNM1 = Z( J4P2+2 )*( DNM2 / Z( J4-2 ) )
+      END IF
+      DMIN = MIN( DMIN, DNM1 )
+*
+      DMIN1 = DMIN
+      J4 = J4 + 4
+      J4P2 = J4 + 2*PP - 1
+      Z( J4-2 ) = DNM1 + Z( J4P2 )
+      IF( Z( J4-2 ).EQ.ZERO ) THEN
+         Z( J4 ) = ZERO
+         DN = Z( J4P2+2 )
+         DMIN = DN
+         EMIN = ZERO
+      ELSE IF( SAFMIN*Z( J4P2+2 ).LT.Z( J4-2 ) .AND.
+     $         SAFMIN*Z( J4-2 ).LT.Z( J4P2+2 ) ) THEN
+         TEMP = Z( J4P2+2 ) / Z( J4-2 )
+         Z( J4 ) = Z( J4P2 )*TEMP
+         DN = DNM1*TEMP
+      ELSE
+         Z( J4 ) = Z( J4P2+2 )*( Z( J4P2 ) / Z( J4-2 ) )
+         DN = Z( J4P2+2 )*( DNM1 / Z( J4-2 ) )
+      END IF
+      DMIN = MIN( DMIN, DN )
+*
+      Z( J4+2 ) = DN
+      Z( 4*N0-PP ) = EMIN
+      RETURN
+*
+*     End of SLASQ6
+*
+      END
diff --git a/costa/native/external/lapack/slasr.f b/costa/native/external/lapack/slasr.f
new file mode 100644
index 000000000..5f3d6f340
--- /dev/null
+++ b/costa/native/external/lapack/slasr.f
@@ -0,0 +1,325 @@
+      SUBROUTINE SLASR( SIDE, PIVOT, DIRECT, M, N, C, S, A, LDA )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIRECT, PIVOT, SIDE
+      INTEGER            LDA, M, N
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), C( * ), S( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLASR   performs the transformation
+*
+*     A := P*A,   when SIDE = 'L' or 'l'  (  Left-hand side )
+*
+*     A := A*P',  when SIDE = 'R' or 'r'  ( Right-hand side )
+*
+*  where A is an m by n real matrix and P is an orthogonal matrix,
+*  consisting of a sequence of plane rotations determined by the
+*  parameters PIVOT and DIRECT as follows ( z = m when SIDE = 'L' or 'l'
+*  and z = n when SIDE = 'R' or 'r' ):
+*
+*  When  DIRECT = 'F' or 'f'  ( Forward sequence ) then
+*
+*     P = P( z - 1 )*...*P( 2 )*P( 1 ),
+*
+*  and when DIRECT = 'B' or 'b'  ( Backward sequence ) then
+*
+*     P = P( 1 )*P( 2 )*...*P( z - 1 ),
+*
+*  where  P( k ) is a plane rotation matrix for the following planes:
+*
+*     when  PIVOT = 'V' or 'v'  ( Variable pivot ),
+*        the plane ( k, k + 1 )
+*
+*     when  PIVOT = 'T' or 't'  ( Top pivot ),
+*        the plane ( 1, k + 1 )
+*
+*     when  PIVOT = 'B' or 'b'  ( Bottom pivot ),
+*        the plane ( k, z )
+*
+*  c( k ) and s( k )  must contain the  cosine and sine that define the
+*  matrix  P( k ).  The two by two plane rotation part of the matrix
+*  P( k ), R( k ), is assumed to be of the form
+*
+*     R( k ) = (  c( k )  s( k ) ).
+*              ( -s( k )  c( k ) )
+*
+*  This version vectorises across rows of the array A when SIDE = 'L'.
+*
+*  Arguments
+*  =========
+*
+*  SIDE    (input) CHARACTER*1
+*          Specifies whether the plane rotation matrix P is applied to
+*          A on the left or the right.
+*          = 'L':  Left, compute A := P*A
+*          = 'R':  Right, compute A:= A*P'
+*
+*  DIRECT  (input) CHARACTER*1
+*          Specifies whether P is a forward or backward sequence of
+*          plane rotations.
+*          = 'F':  Forward, P = P( z - 1 )*...*P( 2 )*P( 1 )
+*          = 'B':  Backward, P = P( 1 )*P( 2 )*...*P( z - 1 )
+*
+*  PIVOT   (input) CHARACTER*1
+*          Specifies the plane for which P(k) is a plane rotation
+*          matrix.
+*          = 'V':  Variable pivot, the plane (k,k+1)
+*          = 'T':  Top pivot, the plane (1,k+1)
+*          = 'B':  Bottom pivot, the plane (k,z)
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  If m <= 1, an immediate
+*          return is effected.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  If n <= 1, an
+*          immediate return is effected.
+*
+*  C, S    (input) REAL arrays, dimension
+*                  (M-1) if SIDE = 'L'
+*                  (N-1) if SIDE = 'R'
+*          c(k) and s(k) contain the cosine and sine that define the
+*          matrix P(k).  The two by two plane rotation part of the
+*          matrix P(k), R(k), is assumed to be of the form
+*          R( k ) = (  c( k )  s( k ) ).
+*                   ( -s( k )  c( k ) )
+*
+*  A       (input/output) REAL array, dimension (LDA,N)
+*          The m by n matrix A.  On exit, A is overwritten by P*A if
+*          SIDE = 'R' or by A*P' if SIDE = 'L'.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, INFO, J
+      REAL               CTEMP, STEMP, TEMP
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters
+*
+      INFO = 0
+      IF( .NOT.( LSAME( SIDE, 'L' ) .OR. LSAME( SIDE, 'R' ) ) ) THEN
+         INFO = 1
+      ELSE IF( .NOT.( LSAME( PIVOT, 'V' ) .OR. LSAME( PIVOT,
+     $         'T' ) .OR. LSAME( PIVOT, 'B' ) ) ) THEN
+         INFO = 2
+      ELSE IF( .NOT.( LSAME( DIRECT, 'F' ) .OR. LSAME( DIRECT, 'B' ) ) )
+     $          THEN
+         INFO = 3
+      ELSE IF( M.LT.0 ) THEN
+         INFO = 4
+      ELSE IF( N.LT.0 ) THEN
+         INFO = 5
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = 9
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SLASR ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( ( M.EQ.0 ) .OR. ( N.EQ.0 ) )
+     $   RETURN
+      IF( LSAME( SIDE, 'L' ) ) THEN
+*
+*        Form  P * A
+*
+         IF( LSAME( PIVOT, 'V' ) ) THEN
+            IF( LSAME( DIRECT, 'F' ) ) THEN
+               DO 20 J = 1, M - 1
+                  CTEMP = C( J )
+                  STEMP = S( J )
+                  IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN
+                     DO 10 I = 1, N
+                        TEMP = A( J+1, I )
+                        A( J+1, I ) = CTEMP*TEMP - STEMP*A( J, I )
+                        A( J, I ) = STEMP*TEMP + CTEMP*A( J, I )
+   10                CONTINUE
+                  END IF
+   20          CONTINUE
+            ELSE IF( LSAME( DIRECT, 'B' ) ) THEN
+               DO 40 J = M - 1, 1, -1
+                  CTEMP = C( J )
+                  STEMP = S( J )
+                  IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN
+                     DO 30 I = 1, N
+                        TEMP = A( J+1, I )
+                        A( J+1, I ) = CTEMP*TEMP - STEMP*A( J, I )
+                        A( J, I ) = STEMP*TEMP + CTEMP*A( J, I )
+   30                CONTINUE
+                  END IF
+   40          CONTINUE
+            END IF
+         ELSE IF( LSAME( PIVOT, 'T' ) ) THEN
+            IF( LSAME( DIRECT, 'F' ) ) THEN
+               DO 60 J = 2, M
+                  CTEMP = C( J-1 )
+                  STEMP = S( J-1 )
+                  IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN
+                     DO 50 I = 1, N
+                        TEMP = A( J, I )
+                        A( J, I ) = CTEMP*TEMP - STEMP*A( 1, I )
+                        A( 1, I ) = STEMP*TEMP + CTEMP*A( 1, I )
+   50                CONTINUE
+                  END IF
+   60          CONTINUE
+            ELSE IF( LSAME( DIRECT, 'B' ) ) THEN
+               DO 80 J = M, 2, -1
+                  CTEMP = C( J-1 )
+                  STEMP = S( J-1 )
+                  IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN
+                     DO 70 I = 1, N
+                        TEMP = A( J, I )
+                        A( J, I ) = CTEMP*TEMP - STEMP*A( 1, I )
+                        A( 1, I ) = STEMP*TEMP + CTEMP*A( 1, I )
+   70                CONTINUE
+                  END IF
+   80          CONTINUE
+            END IF
+         ELSE IF( LSAME( PIVOT, 'B' ) ) THEN
+            IF( LSAME( DIRECT, 'F' ) ) THEN
+               DO 100 J = 1, M - 1
+                  CTEMP = C( J )
+                  STEMP = S( J )
+                  IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN
+                     DO 90 I = 1, N
+                        TEMP = A( J, I )
+                        A( J, I ) = STEMP*A( M, I ) + CTEMP*TEMP
+                        A( M, I ) = CTEMP*A( M, I ) - STEMP*TEMP
+   90                CONTINUE
+                  END IF
+  100          CONTINUE
+            ELSE IF( LSAME( DIRECT, 'B' ) ) THEN
+               DO 120 J = M - 1, 1, -1
+                  CTEMP = C( J )
+                  STEMP = S( J )
+                  IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN
+                     DO 110 I = 1, N
+                        TEMP = A( J, I )
+                        A( J, I ) = STEMP*A( M, I ) + CTEMP*TEMP
+                        A( M, I ) = CTEMP*A( M, I ) - STEMP*TEMP
+  110                CONTINUE
+                  END IF
+  120          CONTINUE
+            END IF
+         END IF
+      ELSE IF( LSAME( SIDE, 'R' ) ) THEN
+*
+*        Form A * P'
+*
+         IF( LSAME( PIVOT, 'V' ) ) THEN
+            IF( LSAME( DIRECT, 'F' ) ) THEN
+               DO 140 J = 1, N - 1
+                  CTEMP = C( J )
+                  STEMP = S( J )
+                  IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN
+                     DO 130 I = 1, M
+                        TEMP = A( I, J+1 )
+                        A( I, J+1 ) = CTEMP*TEMP - STEMP*A( I, J )
+                        A( I, J ) = STEMP*TEMP + CTEMP*A( I, J )
+  130                CONTINUE
+                  END IF
+  140          CONTINUE
+            ELSE IF( LSAME( DIRECT, 'B' ) ) THEN
+               DO 160 J = N - 1, 1, -1
+                  CTEMP = C( J )
+                  STEMP = S( J )
+                  IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN
+                     DO 150 I = 1, M
+                        TEMP = A( I, J+1 )
+                        A( I, J+1 ) = CTEMP*TEMP - STEMP*A( I, J )
+                        A( I, J ) = STEMP*TEMP + CTEMP*A( I, J )
+  150                CONTINUE
+                  END IF
+  160          CONTINUE
+            END IF
+         ELSE IF( LSAME( PIVOT, 'T' ) ) THEN
+            IF( LSAME( DIRECT, 'F' ) ) THEN
+               DO 180 J = 2, N
+                  CTEMP = C( J-1 )
+                  STEMP = S( J-1 )
+                  IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN
+                     DO 170 I = 1, M
+                        TEMP = A( I, J )
+                        A( I, J ) = CTEMP*TEMP - STEMP*A( I, 1 )
+                        A( I, 1 ) = STEMP*TEMP + CTEMP*A( I, 1 )
+  170                CONTINUE
+                  END IF
+  180          CONTINUE
+            ELSE IF( LSAME( DIRECT, 'B' ) ) THEN
+               DO 200 J = N, 2, -1
+                  CTEMP = C( J-1 )
+                  STEMP = S( J-1 )
+                  IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN
+                     DO 190 I = 1, M
+                        TEMP = A( I, J )
+                        A( I, J ) = CTEMP*TEMP - STEMP*A( I, 1 )
+                        A( I, 1 ) = STEMP*TEMP + CTEMP*A( I, 1 )
+  190                CONTINUE
+                  END IF
+  200          CONTINUE
+            END IF
+         ELSE IF( LSAME( PIVOT, 'B' ) ) THEN
+            IF( LSAME( DIRECT, 'F' ) ) THEN
+               DO 220 J = 1, N - 1
+                  CTEMP = C( J )
+                  STEMP = S( J )
+                  IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN
+                     DO 210 I = 1, M
+                        TEMP = A( I, J )
+                        A( I, J ) = STEMP*A( I, N ) + CTEMP*TEMP
+                        A( I, N ) = CTEMP*A( I, N ) - STEMP*TEMP
+  210                CONTINUE
+                  END IF
+  220          CONTINUE
+            ELSE IF( LSAME( DIRECT, 'B' ) ) THEN
+               DO 240 J = N - 1, 1, -1
+                  CTEMP = C( J )
+                  STEMP = S( J )
+                  IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN
+                     DO 230 I = 1, M
+                        TEMP = A( I, J )
+                        A( I, J ) = STEMP*A( I, N ) + CTEMP*TEMP
+                        A( I, N ) = CTEMP*A( I, N ) - STEMP*TEMP
+  230                CONTINUE
+                  END IF
+  240          CONTINUE
+            END IF
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of SLASR
+*
+      END
diff --git a/costa/native/external/lapack/slasrt.f b/costa/native/external/lapack/slasrt.f
new file mode 100644
index 000000000..debdd820d
--- /dev/null
+++ b/costa/native/external/lapack/slasrt.f
@@ -0,0 +1,244 @@
+      SUBROUTINE SLASRT( ID, N, D, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          ID
+      INTEGER            INFO, N
+*     ..
+*     .. Array Arguments ..
+      REAL               D( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  Sort the numbers in D in increasing order (if ID = 'I') or
+*  in decreasing order (if ID = 'D' ).
+*
+*  Use Quick Sort, reverting to Insertion sort on arrays of
+*  size <= 20. Dimension of STACK limits N to about 2**32.
+*
+*  Arguments
+*  =========
+*
+*  ID      (input) CHARACTER*1
+*          = 'I': sort D in increasing order;
+*          = 'D': sort D in decreasing order.
+*
+*  N       (input) INTEGER
+*          The length of the array D.
+*
+*  D       (input/output) REAL array, dimension (N)
+*          On entry, the array to be sorted.
+*          On exit, D has been sorted into increasing order
+*          (D(1) <= ... <= D(N) ) or into decreasing order
+*          (D(1) >= ... >= D(N) ), depending on ID.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            SELECT
+      PARAMETER          ( SELECT = 20 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            DIR, ENDD, I, J, START, STKPNT
+      REAL               D1, D2, D3, DMNMX, TMP
+*     ..
+*     .. Local Arrays ..
+      INTEGER            STACK( 2, 32 )
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input paramters.
+*
+      INFO = 0
+      DIR = -1
+      IF( LSAME( ID, 'D' ) ) THEN
+         DIR = 0
+      ELSE IF( LSAME( ID, 'I' ) ) THEN
+         DIR = 1
+      END IF
+      IF( DIR.EQ.-1 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SLASRT', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.LE.1 )
+     $   RETURN
+*
+      STKPNT = 1
+      STACK( 1, 1 ) = 1
+      STACK( 2, 1 ) = N
+   10 CONTINUE
+      START = STACK( 1, STKPNT )
+      ENDD = STACK( 2, STKPNT )
+      STKPNT = STKPNT - 1
+      IF( ENDD-START.LE.SELECT .AND. ENDD-START.GT.0 ) THEN
+*
+*        Do Insertion sort on D( START:ENDD )
+*
+         IF( DIR.EQ.0 ) THEN
+*
+*           Sort into decreasing order
+*
+            DO 30 I = START + 1, ENDD
+               DO 20 J = I, START + 1, -1
+                  IF( D( J ).GT.D( J-1 ) ) THEN
+                     DMNMX = D( J )
+                     D( J ) = D( J-1 )
+                     D( J-1 ) = DMNMX
+                  ELSE
+                     GO TO 30
+                  END IF
+   20          CONTINUE
+   30       CONTINUE
+*
+         ELSE
+*
+*           Sort into increasing order
+*
+            DO 50 I = START + 1, ENDD
+               DO 40 J = I, START + 1, -1
+                  IF( D( J ).LT.D( J-1 ) ) THEN
+                     DMNMX = D( J )
+                     D( J ) = D( J-1 )
+                     D( J-1 ) = DMNMX
+                  ELSE
+                     GO TO 50
+                  END IF
+   40          CONTINUE
+   50       CONTINUE
+*
+         END IF
+*
+      ELSE IF( ENDD-START.GT.SELECT ) THEN
+*
+*        Partition D( START:ENDD ) and stack parts, largest one first
+*
+*        Choose partition entry as median of 3
+*
+         D1 = D( START )
+         D2 = D( ENDD )
+         I = ( START+ENDD ) / 2
+         D3 = D( I )
+         IF( D1.LT.D2 ) THEN
+            IF( D3.LT.D1 ) THEN
+               DMNMX = D1
+            ELSE IF( D3.LT.D2 ) THEN
+               DMNMX = D3
+            ELSE
+               DMNMX = D2
+            END IF
+         ELSE
+            IF( D3.LT.D2 ) THEN
+               DMNMX = D2
+            ELSE IF( D3.LT.D1 ) THEN
+               DMNMX = D3
+            ELSE
+               DMNMX = D1
+            END IF
+         END IF
+*
+         IF( DIR.EQ.0 ) THEN
+*
+*           Sort into decreasing order
+*
+            I = START - 1
+            J = ENDD + 1
+   60       CONTINUE
+   70       CONTINUE
+            J = J - 1
+            IF( D( J ).LT.DMNMX )
+     $         GO TO 70
+   80       CONTINUE
+            I = I + 1
+            IF( D( I ).GT.DMNMX )
+     $         GO TO 80
+            IF( I.LT.J ) THEN
+               TMP = D( I )
+               D( I ) = D( J )
+               D( J ) = TMP
+               GO TO 60
+            END IF
+            IF( J-START.GT.ENDD-J-1 ) THEN
+               STKPNT = STKPNT + 1
+               STACK( 1, STKPNT ) = START
+               STACK( 2, STKPNT ) = J
+               STKPNT = STKPNT + 1
+               STACK( 1, STKPNT ) = J + 1
+               STACK( 2, STKPNT ) = ENDD
+            ELSE
+               STKPNT = STKPNT + 1
+               STACK( 1, STKPNT ) = J + 1
+               STACK( 2, STKPNT ) = ENDD
+               STKPNT = STKPNT + 1
+               STACK( 1, STKPNT ) = START
+               STACK( 2, STKPNT ) = J
+            END IF
+         ELSE
+*
+*           Sort into increasing order
+*
+            I = START - 1
+            J = ENDD + 1
+   90       CONTINUE
+  100       CONTINUE
+            J = J - 1
+            IF( D( J ).GT.DMNMX )
+     $         GO TO 100
+  110       CONTINUE
+            I = I + 1
+            IF( D( I ).LT.DMNMX )
+     $         GO TO 110
+            IF( I.LT.J ) THEN
+               TMP = D( I )
+               D( I ) = D( J )
+               D( J ) = TMP
+               GO TO 90
+            END IF
+            IF( J-START.GT.ENDD-J-1 ) THEN
+               STKPNT = STKPNT + 1
+               STACK( 1, STKPNT ) = START
+               STACK( 2, STKPNT ) = J
+               STKPNT = STKPNT + 1
+               STACK( 1, STKPNT ) = J + 1
+               STACK( 2, STKPNT ) = ENDD
+            ELSE
+               STKPNT = STKPNT + 1
+               STACK( 1, STKPNT ) = J + 1
+               STACK( 2, STKPNT ) = ENDD
+               STKPNT = STKPNT + 1
+               STACK( 1, STKPNT ) = START
+               STACK( 2, STKPNT ) = J
+            END IF
+         END IF
+      END IF
+      IF( STKPNT.GT.0 )
+     $   GO TO 10
+      RETURN
+*
+*     End of SLASRT
+*
+      END
diff --git a/costa/native/external/lapack/slassq.f b/costa/native/external/lapack/slassq.f
new file mode 100644
index 000000000..c58ddb43d
--- /dev/null
+++ b/costa/native/external/lapack/slassq.f
@@ -0,0 +1,89 @@
+      SUBROUTINE SLASSQ( N, X, INCX, SCALE, SUMSQ )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INCX, N
+      REAL               SCALE, SUMSQ
+*     ..
+*     .. Array Arguments ..
+      REAL               X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLASSQ  returns the values  scl  and  smsq  such that
+*
+*     ( scl**2 )*smsq = x( 1 )**2 +...+ x( n )**2 + ( scale**2 )*sumsq,
+*
+*  where  x( i ) = X( 1 + ( i - 1 )*INCX ). The value of  sumsq  is
+*  assumed to be non-negative and  scl  returns the value
+*
+*     scl = max( scale, abs( x( i ) ) ).
+*
+*  scale and sumsq must be supplied in SCALE and SUMSQ and
+*  scl and smsq are overwritten on SCALE and SUMSQ respectively.
+*
+*  The routine makes only one pass through the vector x.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The number of elements to be used from the vector X.
+*
+*  X       (input) REAL array, dimension (N)
+*          The vector for which a scaled sum of squares is computed.
+*             x( i )  = X( 1 + ( i - 1 )*INCX ), 1 <= i <= n.
+*
+*  INCX    (input) INTEGER
+*          The increment between successive values of the vector X.
+*          INCX > 0.
+*
+*  SCALE   (input/output) REAL
+*          On entry, the value  scale  in the equation above.
+*          On exit, SCALE is overwritten with  scl , the scaling factor
+*          for the sum of squares.
+*
+*  SUMSQ   (input/output) REAL
+*          On entry, the value  sumsq  in the equation above.
+*          On exit, SUMSQ is overwritten with  smsq , the basic sum of
+*          squares from which  scl  has been factored out.
+*
+* =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO
+      PARAMETER          ( ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            IX
+      REAL               ABSXI
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS
+*     ..
+*     .. Executable Statements ..
+*
+      IF( N.GT.0 ) THEN
+         DO 10 IX = 1, 1 + ( N-1 )*INCX, INCX
+            IF( X( IX ).NE.ZERO ) THEN
+               ABSXI = ABS( X( IX ) )
+               IF( SCALE.LT.ABSXI ) THEN
+                  SUMSQ = 1 + SUMSQ*( SCALE / ABSXI )**2
+                  SCALE = ABSXI
+               ELSE
+                  SUMSQ = SUMSQ + ( ABSXI / SCALE )**2
+               END IF
+            END IF
+   10    CONTINUE
+      END IF
+      RETURN
+*
+*     End of SLASSQ
+*
+      END
diff --git a/costa/native/external/lapack/slasv2.f b/costa/native/external/lapack/slasv2.f
new file mode 100644
index 000000000..7073d9e58
--- /dev/null
+++ b/costa/native/external/lapack/slasv2.f
@@ -0,0 +1,250 @@
+      SUBROUTINE SLASV2( F, G, H, SSMIN, SSMAX, SNR, CSR, SNL, CSL )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      REAL               CSL, CSR, F, G, H, SNL, SNR, SSMAX, SSMIN
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLASV2 computes the singular value decomposition of a 2-by-2
+*  triangular matrix
+*     [  F   G  ]
+*     [  0   H  ].
+*  On return, abs(SSMAX) is the larger singular value, abs(SSMIN) is the
+*  smaller singular value, and (CSL,SNL) and (CSR,SNR) are the left and
+*  right singular vectors for abs(SSMAX), giving the decomposition
+*
+*     [ CSL  SNL ] [  F   G  ] [ CSR -SNR ]  =  [ SSMAX   0   ]
+*     [-SNL  CSL ] [  0   H  ] [ SNR  CSR ]     [  0    SSMIN ].
+*
+*  Arguments
+*  =========
+*
+*  F       (input) REAL
+*          The (1,1) element of the 2-by-2 matrix.
+*
+*  G       (input) REAL
+*          The (1,2) element of the 2-by-2 matrix.
+*
+*  H       (input) REAL
+*          The (2,2) element of the 2-by-2 matrix.
+*
+*  SSMIN   (output) REAL
+*          abs(SSMIN) is the smaller singular value.
+*
+*  SSMAX   (output) REAL
+*          abs(SSMAX) is the larger singular value.
+*
+*  SNL     (output) REAL
+*  CSL     (output) REAL
+*          The vector (CSL, SNL) is a unit left singular vector for the
+*          singular value abs(SSMAX).
+*
+*  SNR     (output) REAL
+*  CSR     (output) REAL
+*          The vector (CSR, SNR) is a unit right singular vector for the
+*          singular value abs(SSMAX).
+*
+*  Further Details
+*  ===============
+*
+*  Any input parameter may be aliased with any output parameter.
+*
+*  Barring over/underflow and assuming a guard digit in subtraction, all
+*  output quantities are correct to within a few units in the last
+*  place (ulps).
+*
+*  In IEEE arithmetic, the code works correctly if one matrix element is
+*  infinite.
+*
+*  Overflow will not occur unless the largest singular value itself
+*  overflows or is within a few ulps of overflow. (On machines with
+*  partial overflow, like the Cray, overflow may occur if the largest
+*  singular value is within a factor of 2 of overflow.)
+*
+*  Underflow is harmless if underflow is gradual. Otherwise, results
+*  may correspond to a matrix modified by perturbations of size near
+*  the underflow threshold.
+*
+* =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO
+      PARAMETER          ( ZERO = 0.0E0 )
+      REAL               HALF
+      PARAMETER          ( HALF = 0.5E0 )
+      REAL               ONE
+      PARAMETER          ( ONE = 1.0E0 )
+      REAL               TWO
+      PARAMETER          ( TWO = 2.0E0 )
+      REAL               FOUR
+      PARAMETER          ( FOUR = 4.0E0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            GASMAL, SWAP
+      INTEGER            PMAX
+      REAL               A, CLT, CRT, D, FA, FT, GA, GT, HA, HT, L, M,
+     $                   MM, R, S, SLT, SRT, T, TEMP, TSIGN, TT
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, SIGN, SQRT
+*     ..
+*     .. External Functions ..
+      REAL               SLAMCH
+      EXTERNAL           SLAMCH
+*     ..
+*     .. Executable Statements ..
+*
+      FT = F
+      FA = ABS( FT )
+      HT = H
+      HA = ABS( H )
+*
+*     PMAX points to the maximum absolute element of matrix
+*       PMAX = 1 if F largest in absolute values
+*       PMAX = 2 if G largest in absolute values
+*       PMAX = 3 if H largest in absolute values
+*
+      PMAX = 1
+      SWAP = ( HA.GT.FA )
+      IF( SWAP ) THEN
+         PMAX = 3
+         TEMP = FT
+         FT = HT
+         HT = TEMP
+         TEMP = FA
+         FA = HA
+         HA = TEMP
+*
+*        Now FA .ge. HA
+*
+      END IF
+      GT = G
+      GA = ABS( GT )
+      IF( GA.EQ.ZERO ) THEN
+*
+*        Diagonal matrix
+*
+         SSMIN = HA
+         SSMAX = FA
+         CLT = ONE
+         CRT = ONE
+         SLT = ZERO
+         SRT = ZERO
+      ELSE
+         GASMAL = .TRUE.
+         IF( GA.GT.FA ) THEN
+            PMAX = 2
+            IF( ( FA / GA ).LT.SLAMCH( 'EPS' ) ) THEN
+*
+*              Case of very large GA
+*
+               GASMAL = .FALSE.
+               SSMAX = GA
+               IF( HA.GT.ONE ) THEN
+                  SSMIN = FA / ( GA / HA )
+               ELSE
+                  SSMIN = ( FA / GA )*HA
+               END IF
+               CLT = ONE
+               SLT = HT / GT
+               SRT = ONE
+               CRT = FT / GT
+            END IF
+         END IF
+         IF( GASMAL ) THEN
+*
+*           Normal case
+*
+            D = FA - HA
+            IF( D.EQ.FA ) THEN
+*
+*              Copes with infinite F or H
+*
+               L = ONE
+            ELSE
+               L = D / FA
+            END IF
+*
+*           Note that 0 .le. L .le. 1
+*
+            M = GT / FT
+*
+*           Note that abs(M) .le. 1/macheps
+*
+            T = TWO - L
+*
+*           Note that T .ge. 1
+*
+            MM = M*M
+            TT = T*T
+            S = SQRT( TT+MM )
+*
+*           Note that 1 .le. S .le. 1 + 1/macheps
+*
+            IF( L.EQ.ZERO ) THEN
+               R = ABS( M )
+            ELSE
+               R = SQRT( L*L+MM )
+            END IF
+*
+*           Note that 0 .le. R .le. 1 + 1/macheps
+*
+            A = HALF*( S+R )
+*
+*           Note that 1 .le. A .le. 1 + abs(M)
+*
+            SSMIN = HA / A
+            SSMAX = FA*A
+            IF( MM.EQ.ZERO ) THEN
+*
+*              Note that M is very tiny
+*
+               IF( L.EQ.ZERO ) THEN
+                  T = SIGN( TWO, FT )*SIGN( ONE, GT )
+               ELSE
+                  T = GT / SIGN( D, FT ) + M / T
+               END IF
+            ELSE
+               T = ( M / ( S+T )+M / ( R+L ) )*( ONE+A )
+            END IF
+            L = SQRT( T*T+FOUR )
+            CRT = TWO / L
+            SRT = T / L
+            CLT = ( CRT+SRT*M ) / A
+            SLT = ( HT / FT )*SRT / A
+         END IF
+      END IF
+      IF( SWAP ) THEN
+         CSL = SRT
+         SNL = CRT
+         CSR = SLT
+         SNR = CLT
+      ELSE
+         CSL = CLT
+         SNL = SLT
+         CSR = CRT
+         SNR = SRT
+      END IF
+*
+*     Correct signs of SSMAX and SSMIN
+*
+      IF( PMAX.EQ.1 )
+     $   TSIGN = SIGN( ONE, CSR )*SIGN( ONE, CSL )*SIGN( ONE, F )
+      IF( PMAX.EQ.2 )
+     $   TSIGN = SIGN( ONE, SNR )*SIGN( ONE, CSL )*SIGN( ONE, G )
+      IF( PMAX.EQ.3 )
+     $   TSIGN = SIGN( ONE, SNR )*SIGN( ONE, SNL )*SIGN( ONE, H )
+      SSMAX = SIGN( SSMAX, TSIGN )
+      SSMIN = SIGN( SSMIN, TSIGN*SIGN( ONE, F )*SIGN( ONE, H ) )
+      RETURN
+*
+*     End of SLASV2
+*
+      END
diff --git a/costa/native/external/lapack/slaswp.f b/costa/native/external/lapack/slaswp.f
new file mode 100644
index 000000000..2240badec
--- /dev/null
+++ b/costa/native/external/lapack/slaswp.f
@@ -0,0 +1,120 @@
+      SUBROUTINE SLASWP( N, A, LDA, K1, K2, IPIV, INCX )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INCX, K1, K2, LDA, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      REAL               A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLASWP performs a series of row interchanges on the matrix A.
+*  One row interchange is initiated for each of rows K1 through K2 of A.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.
+*
+*  A       (input/output) REAL array, dimension (LDA,N)
+*          On entry, the matrix of column dimension N to which the row
+*          interchanges will be applied.
+*          On exit, the permuted matrix.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.
+*
+*  K1      (input) INTEGER
+*          The first element of IPIV for which a row interchange will
+*          be done.
+*
+*  K2      (input) INTEGER
+*          The last element of IPIV for which a row interchange will
+*          be done.
+*
+*  IPIV    (input) INTEGER array, dimension (M*abs(INCX))
+*          The vector of pivot indices.  Only the elements in positions
+*          K1 through K2 of IPIV are accessed.
+*          IPIV(K) = L implies rows K and L are to be interchanged.
+*
+*  INCX    (input) INTEGER
+*          The increment between successive values of IPIV.  If IPIV
+*          is negative, the pivots are applied in reverse order.
+*
+*  Further Details
+*  ===============
+*
+*  Modified by
+*   R. C. Whaley, Computer Science Dept., Univ. of Tenn., Knoxville, USA
+*
+* =====================================================================
+*
+*     .. Local Scalars ..
+      INTEGER            I, I1, I2, INC, IP, IX, IX0, J, K, N32
+      REAL               TEMP
+*     ..
+*     .. Executable Statements ..
+*
+*     Interchange row I with row IPIV(I) for each of rows K1 through K2.
+*
+      IF( INCX.GT.0 ) THEN
+         IX0 = K1
+         I1 = K1
+         I2 = K2
+         INC = 1
+      ELSE IF( INCX.LT.0 ) THEN
+         IX0 = 1 + ( 1-K2 )*INCX
+         I1 = K2
+         I2 = K1
+         INC = -1
+      ELSE
+         RETURN
+      END IF
+*
+      N32 = ( N / 32 )*32
+      IF( N32.NE.0 ) THEN
+         DO 30 J = 1, N32, 32
+            IX = IX0
+            DO 20 I = I1, I2, INC
+               IP = IPIV( IX )
+               IF( IP.NE.I ) THEN
+                  DO 10 K = J, J + 31
+                     TEMP = A( I, K )
+                     A( I, K ) = A( IP, K )
+                     A( IP, K ) = TEMP
+   10             CONTINUE
+               END IF
+               IX = IX + INCX
+   20       CONTINUE
+   30    CONTINUE
+      END IF
+      IF( N32.NE.N ) THEN
+         N32 = N32 + 1
+         IX = IX0
+         DO 50 I = I1, I2, INC
+            IP = IPIV( IX )
+            IF( IP.NE.I ) THEN
+               DO 40 K = N32, N
+                  TEMP = A( I, K )
+                  A( I, K ) = A( IP, K )
+                  A( IP, K ) = TEMP
+   40          CONTINUE
+            END IF
+            IX = IX + INCX
+   50    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of SLASWP
+*
+      END
diff --git a/costa/native/external/lapack/slasy2.f b/costa/native/external/lapack/slasy2.f
new file mode 100644
index 000000000..43e8427fb
--- /dev/null
+++ b/costa/native/external/lapack/slasy2.f
@@ -0,0 +1,382 @@
+      SUBROUTINE SLASY2( LTRANL, LTRANR, ISGN, N1, N2, TL, LDTL, TR,
+     $                   LDTR, B, LDB, SCALE, X, LDX, XNORM, INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      LOGICAL            LTRANL, LTRANR
+      INTEGER            INFO, ISGN, LDB, LDTL, LDTR, LDX, N1, N2
+      REAL               SCALE, XNORM
+*     ..
+*     .. Array Arguments ..
+      REAL               B( LDB, * ), TL( LDTL, * ), TR( LDTR, * ),
+     $                   X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLASY2 solves for the N1 by N2 matrix X, 1 <= N1,N2 <= 2, in
+*
+*         op(TL)*X + ISGN*X*op(TR) = SCALE*B,
+*
+*  where TL is N1 by N1, TR is N2 by N2, B is N1 by N2, and ISGN = 1 or
+*  -1.  op(T) = T or T', where T' denotes the transpose of T.
+*
+*  Arguments
+*  =========
+*
+*  LTRANL  (input) LOGICAL
+*          On entry, LTRANL specifies the op(TL):
+*             = .FALSE., op(TL) = TL,
+*             = .TRUE., op(TL) = TL'.
+*
+*  LTRANR  (input) LOGICAL
+*          On entry, LTRANR specifies the op(TR):
+*            = .FALSE., op(TR) = TR,
+*            = .TRUE., op(TR) = TR'.
+*
+*  ISGN    (input) INTEGER
+*          On entry, ISGN specifies the sign of the equation
+*          as described before. ISGN may only be 1 or -1.
+*
+*  N1      (input) INTEGER
+*          On entry, N1 specifies the order of matrix TL.
+*          N1 may only be 0, 1 or 2.
+*
+*  N2      (input) INTEGER
+*          On entry, N2 specifies the order of matrix TR.
+*          N2 may only be 0, 1 or 2.
+*
+*  TL      (input) REAL array, dimension (LDTL,2)
+*          On entry, TL contains an N1 by N1 matrix.
+*
+*  LDTL    (input) INTEGER
+*          The leading dimension of the matrix TL. LDTL >= max(1,N1).
+*
+*  TR      (input) REAL array, dimension (LDTR,2)
+*          On entry, TR contains an N2 by N2 matrix.
+*
+*  LDTR    (input) INTEGER
+*          The leading dimension of the matrix TR. LDTR >= max(1,N2).
+*
+*  B       (input) REAL array, dimension (LDB,2)
+*          On entry, the N1 by N2 matrix B contains the right-hand
+*          side of the equation.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the matrix B. LDB >= max(1,N1).
+*
+*  SCALE   (output) REAL
+*          On exit, SCALE contains the scale factor. SCALE is chosen
+*          less than or equal to 1 to prevent the solution overflowing.
+*
+*  X       (output) REAL array, dimension (LDX,2)
+*          On exit, X contains the N1 by N2 solution.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the matrix X. LDX >= max(1,N1).
+*
+*  XNORM   (output) REAL
+*          On exit, XNORM is the infinity-norm of the solution.
+*
+*  INFO    (output) INTEGER
+*          On exit, INFO is set to
+*             0: successful exit.
+*             1: TL and TR have too close eigenvalues, so TL or
+*                TR is perturbed to get a nonsingular equation.
+*          NOTE: In the interests of speed, this routine does not
+*                check the inputs for errors.
+*
+* =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+      REAL               TWO, HALF, EIGHT
+      PARAMETER          ( TWO = 2.0E+0, HALF = 0.5E+0, EIGHT = 8.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            BSWAP, XSWAP
+      INTEGER            I, IP, IPIV, IPSV, J, JP, JPSV, K
+      REAL               BET, EPS, GAM, L21, SGN, SMIN, SMLNUM, TAU1,
+     $                   TEMP, U11, U12, U22, XMAX
+*     ..
+*     .. Local Arrays ..
+      LOGICAL            BSWPIV( 4 ), XSWPIV( 4 )
+      INTEGER            JPIV( 4 ), LOCL21( 4 ), LOCU12( 4 ),
+     $                   LOCU22( 4 )
+      REAL               BTMP( 4 ), T16( 4, 4 ), TMP( 4 ), X2( 2 )
+*     ..
+*     .. External Functions ..
+      INTEGER            ISAMAX
+      REAL               SLAMCH
+      EXTERNAL           ISAMAX, SLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SCOPY, SSWAP
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX
+*     ..
+*     .. Data statements ..
+      DATA               LOCU12 / 3, 4, 1, 2 / , LOCL21 / 2, 1, 4, 3 / ,
+     $                   LOCU22 / 4, 3, 2, 1 /
+      DATA               XSWPIV / .FALSE., .FALSE., .TRUE., .TRUE. /
+      DATA               BSWPIV / .FALSE., .TRUE., .FALSE., .TRUE. /
+*     ..
+*     .. Executable Statements ..
+*
+*     Do not check the input parameters for errors
+*
+      INFO = 0
+*
+*     Quick return if possible
+*
+      IF( N1.EQ.0 .OR. N2.EQ.0 )
+     $   RETURN
+*
+*     Set constants to control overflow
+*
+      EPS = SLAMCH( 'P' )
+      SMLNUM = SLAMCH( 'S' ) / EPS
+      SGN = ISGN
+*
+      K = N1 + N1 + N2 - 2
+      GO TO ( 10, 20, 30, 50 )K
+*
+*     1 by 1: TL11*X + SGN*X*TR11 = B11
+*
+   10 CONTINUE
+      TAU1 = TL( 1, 1 ) + SGN*TR( 1, 1 )
+      BET = ABS( TAU1 )
+      IF( BET.LE.SMLNUM ) THEN
+         TAU1 = SMLNUM
+         BET = SMLNUM
+         INFO = 1
+      END IF
+*
+      SCALE = ONE
+      GAM = ABS( B( 1, 1 ) )
+      IF( SMLNUM*GAM.GT.BET )
+     $   SCALE = ONE / GAM
+*
+      X( 1, 1 ) = ( B( 1, 1 )*SCALE ) / TAU1
+      XNORM = ABS( X( 1, 1 ) )
+      RETURN
+*
+*     1 by 2:
+*     TL11*[X11 X12] + ISGN*[X11 X12]*op[TR11 TR12]  = [B11 B12]
+*                                       [TR21 TR22]
+*
+   20 CONTINUE
+*
+      SMIN = MAX( EPS*MAX( ABS( TL( 1, 1 ) ), ABS( TR( 1, 1 ) ),
+     $       ABS( TR( 1, 2 ) ), ABS( TR( 2, 1 ) ), ABS( TR( 2, 2 ) ) ),
+     $       SMLNUM )
+      TMP( 1 ) = TL( 1, 1 ) + SGN*TR( 1, 1 )
+      TMP( 4 ) = TL( 1, 1 ) + SGN*TR( 2, 2 )
+      IF( LTRANR ) THEN
+         TMP( 2 ) = SGN*TR( 2, 1 )
+         TMP( 3 ) = SGN*TR( 1, 2 )
+      ELSE
+         TMP( 2 ) = SGN*TR( 1, 2 )
+         TMP( 3 ) = SGN*TR( 2, 1 )
+      END IF
+      BTMP( 1 ) = B( 1, 1 )
+      BTMP( 2 ) = B( 1, 2 )
+      GO TO 40
+*
+*     2 by 1:
+*          op[TL11 TL12]*[X11] + ISGN* [X11]*TR11  = [B11]
+*            [TL21 TL22] [X21]         [X21]         [B21]
+*
+   30 CONTINUE
+      SMIN = MAX( EPS*MAX( ABS( TR( 1, 1 ) ), ABS( TL( 1, 1 ) ),
+     $       ABS( TL( 1, 2 ) ), ABS( TL( 2, 1 ) ), ABS( TL( 2, 2 ) ) ),
+     $       SMLNUM )
+      TMP( 1 ) = TL( 1, 1 ) + SGN*TR( 1, 1 )
+      TMP( 4 ) = TL( 2, 2 ) + SGN*TR( 1, 1 )
+      IF( LTRANL ) THEN
+         TMP( 2 ) = TL( 1, 2 )
+         TMP( 3 ) = TL( 2, 1 )
+      ELSE
+         TMP( 2 ) = TL( 2, 1 )
+         TMP( 3 ) = TL( 1, 2 )
+      END IF
+      BTMP( 1 ) = B( 1, 1 )
+      BTMP( 2 ) = B( 2, 1 )
+   40 CONTINUE
+*
+*     Solve 2 by 2 system using complete pivoting.
+*     Set pivots less than SMIN to SMIN.
+*
+      IPIV = ISAMAX( 4, TMP, 1 )
+      U11 = TMP( IPIV )
+      IF( ABS( U11 ).LE.SMIN ) THEN
+         INFO = 1
+         U11 = SMIN
+      END IF
+      U12 = TMP( LOCU12( IPIV ) )
+      L21 = TMP( LOCL21( IPIV ) ) / U11
+      U22 = TMP( LOCU22( IPIV ) ) - U12*L21
+      XSWAP = XSWPIV( IPIV )
+      BSWAP = BSWPIV( IPIV )
+      IF( ABS( U22 ).LE.SMIN ) THEN
+         INFO = 1
+         U22 = SMIN
+      END IF
+      IF( BSWAP ) THEN
+         TEMP = BTMP( 2 )
+         BTMP( 2 ) = BTMP( 1 ) - L21*TEMP
+         BTMP( 1 ) = TEMP
+      ELSE
+         BTMP( 2 ) = BTMP( 2 ) - L21*BTMP( 1 )
+      END IF
+      SCALE = ONE
+      IF( ( TWO*SMLNUM )*ABS( BTMP( 2 ) ).GT.ABS( U22 ) .OR.
+     $    ( TWO*SMLNUM )*ABS( BTMP( 1 ) ).GT.ABS( U11 ) ) THEN
+         SCALE = HALF / MAX( ABS( BTMP( 1 ) ), ABS( BTMP( 2 ) ) )
+         BTMP( 1 ) = BTMP( 1 )*SCALE
+         BTMP( 2 ) = BTMP( 2 )*SCALE
+      END IF
+      X2( 2 ) = BTMP( 2 ) / U22
+      X2( 1 ) = BTMP( 1 ) / U11 - ( U12 / U11 )*X2( 2 )
+      IF( XSWAP ) THEN
+         TEMP = X2( 2 )
+         X2( 2 ) = X2( 1 )
+         X2( 1 ) = TEMP
+      END IF
+      X( 1, 1 ) = X2( 1 )
+      IF( N1.EQ.1 ) THEN
+         X( 1, 2 ) = X2( 2 )
+         XNORM = ABS( X( 1, 1 ) ) + ABS( X( 1, 2 ) )
+      ELSE
+         X( 2, 1 ) = X2( 2 )
+         XNORM = MAX( ABS( X( 1, 1 ) ), ABS( X( 2, 1 ) ) )
+      END IF
+      RETURN
+*
+*     2 by 2:
+*     op[TL11 TL12]*[X11 X12] +ISGN* [X11 X12]*op[TR11 TR12] = [B11 B12]
+*       [TL21 TL22] [X21 X22]        [X21 X22]   [TR21 TR22]   [B21 B22]
+*
+*     Solve equivalent 4 by 4 system using complete pivoting.
+*     Set pivots less than SMIN to SMIN.
+*
+   50 CONTINUE
+      SMIN = MAX( ABS( TR( 1, 1 ) ), ABS( TR( 1, 2 ) ),
+     $       ABS( TR( 2, 1 ) ), ABS( TR( 2, 2 ) ) )
+      SMIN = MAX( SMIN, ABS( TL( 1, 1 ) ), ABS( TL( 1, 2 ) ),
+     $       ABS( TL( 2, 1 ) ), ABS( TL( 2, 2 ) ) )
+      SMIN = MAX( EPS*SMIN, SMLNUM )
+      BTMP( 1 ) = ZERO
+      CALL SCOPY( 16, BTMP, 0, T16, 1 )
+      T16( 1, 1 ) = TL( 1, 1 ) + SGN*TR( 1, 1 )
+      T16( 2, 2 ) = TL( 2, 2 ) + SGN*TR( 1, 1 )
+      T16( 3, 3 ) = TL( 1, 1 ) + SGN*TR( 2, 2 )
+      T16( 4, 4 ) = TL( 2, 2 ) + SGN*TR( 2, 2 )
+      IF( LTRANL ) THEN
+         T16( 1, 2 ) = TL( 2, 1 )
+         T16( 2, 1 ) = TL( 1, 2 )
+         T16( 3, 4 ) = TL( 2, 1 )
+         T16( 4, 3 ) = TL( 1, 2 )
+      ELSE
+         T16( 1, 2 ) = TL( 1, 2 )
+         T16( 2, 1 ) = TL( 2, 1 )
+         T16( 3, 4 ) = TL( 1, 2 )
+         T16( 4, 3 ) = TL( 2, 1 )
+      END IF
+      IF( LTRANR ) THEN
+         T16( 1, 3 ) = SGN*TR( 1, 2 )
+         T16( 2, 4 ) = SGN*TR( 1, 2 )
+         T16( 3, 1 ) = SGN*TR( 2, 1 )
+         T16( 4, 2 ) = SGN*TR( 2, 1 )
+      ELSE
+         T16( 1, 3 ) = SGN*TR( 2, 1 )
+         T16( 2, 4 ) = SGN*TR( 2, 1 )
+         T16( 3, 1 ) = SGN*TR( 1, 2 )
+         T16( 4, 2 ) = SGN*TR( 1, 2 )
+      END IF
+      BTMP( 1 ) = B( 1, 1 )
+      BTMP( 2 ) = B( 2, 1 )
+      BTMP( 3 ) = B( 1, 2 )
+      BTMP( 4 ) = B( 2, 2 )
+*
+*     Perform elimination
+*
+      DO 100 I = 1, 3
+         XMAX = ZERO
+         DO 70 IP = I, 4
+            DO 60 JP = I, 4
+               IF( ABS( T16( IP, JP ) ).GE.XMAX ) THEN
+                  XMAX = ABS( T16( IP, JP ) )
+                  IPSV = IP
+                  JPSV = JP
+               END IF
+   60       CONTINUE
+   70    CONTINUE
+         IF( IPSV.NE.I ) THEN
+            CALL SSWAP( 4, T16( IPSV, 1 ), 4, T16( I, 1 ), 4 )
+            TEMP = BTMP( I )
+            BTMP( I ) = BTMP( IPSV )
+            BTMP( IPSV ) = TEMP
+         END IF
+         IF( JPSV.NE.I )
+     $      CALL SSWAP( 4, T16( 1, JPSV ), 1, T16( 1, I ), 1 )
+         JPIV( I ) = JPSV
+         IF( ABS( T16( I, I ) ).LT.SMIN ) THEN
+            INFO = 1
+            T16( I, I ) = SMIN
+         END IF
+         DO 90 J = I + 1, 4
+            T16( J, I ) = T16( J, I ) / T16( I, I )
+            BTMP( J ) = BTMP( J ) - T16( J, I )*BTMP( I )
+            DO 80 K = I + 1, 4
+               T16( J, K ) = T16( J, K ) - T16( J, I )*T16( I, K )
+   80       CONTINUE
+   90    CONTINUE
+  100 CONTINUE
+      IF( ABS( T16( 4, 4 ) ).LT.SMIN )
+     $   T16( 4, 4 ) = SMIN
+      SCALE = ONE
+      IF( ( EIGHT*SMLNUM )*ABS( BTMP( 1 ) ).GT.ABS( T16( 1, 1 ) ) .OR.
+     $    ( EIGHT*SMLNUM )*ABS( BTMP( 2 ) ).GT.ABS( T16( 2, 2 ) ) .OR.
+     $    ( EIGHT*SMLNUM )*ABS( BTMP( 3 ) ).GT.ABS( T16( 3, 3 ) ) .OR.
+     $    ( EIGHT*SMLNUM )*ABS( BTMP( 4 ) ).GT.ABS( T16( 4, 4 ) ) ) THEN
+         SCALE = ( ONE / EIGHT ) / MAX( ABS( BTMP( 1 ) ),
+     $           ABS( BTMP( 2 ) ), ABS( BTMP( 3 ) ), ABS( BTMP( 4 ) ) )
+         BTMP( 1 ) = BTMP( 1 )*SCALE
+         BTMP( 2 ) = BTMP( 2 )*SCALE
+         BTMP( 3 ) = BTMP( 3 )*SCALE
+         BTMP( 4 ) = BTMP( 4 )*SCALE
+      END IF
+      DO 120 I = 1, 4
+         K = 5 - I
+         TEMP = ONE / T16( K, K )
+         TMP( K ) = BTMP( K )*TEMP
+         DO 110 J = K + 1, 4
+            TMP( K ) = TMP( K ) - ( TEMP*T16( K, J ) )*TMP( J )
+  110    CONTINUE
+  120 CONTINUE
+      DO 130 I = 1, 3
+         IF( JPIV( 4-I ).NE.4-I ) THEN
+            TEMP = TMP( 4-I )
+            TMP( 4-I ) = TMP( JPIV( 4-I ) )
+            TMP( JPIV( 4-I ) ) = TEMP
+         END IF
+  130 CONTINUE
+      X( 1, 1 ) = TMP( 1 )
+      X( 2, 1 ) = TMP( 2 )
+      X( 1, 2 ) = TMP( 3 )
+      X( 2, 2 ) = TMP( 4 )
+      XNORM = MAX( ABS( TMP( 1 ) )+ABS( TMP( 3 ) ),
+     $        ABS( TMP( 2 ) )+ABS( TMP( 4 ) ) )
+      RETURN
+*
+*     End of SLASY2
+*
+      END
diff --git a/costa/native/external/lapack/slasyf.f b/costa/native/external/lapack/slasyf.f
new file mode 100644
index 000000000..a5e4dd171
--- /dev/null
+++ b/costa/native/external/lapack/slasyf.f
@@ -0,0 +1,588 @@
+      SUBROUTINE SLASYF( UPLO, N, NB, KB, A, LDA, IPIV, W, LDW, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, KB, LDA, LDW, N, NB
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      REAL               A( LDA, * ), W( LDW, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLASYF computes a partial factorization of a real symmetric matrix A
+*  using the Bunch-Kaufman diagonal pivoting method. The partial
+*  factorization has the form:
+*
+*  A  =  ( I  U12 ) ( A11  0  ) (  I    0   )  if UPLO = 'U', or:
+*        ( 0  U22 ) (  0   D  ) ( U12' U22' )
+*
+*  A  =  ( L11  0 ) (  D   0  ) ( L11' L21' )  if UPLO = 'L'
+*        ( L21  I ) (  0  A22 ) (  0    I   )
+*
+*  where the order of D is at most NB. The actual order is returned in
+*  the argument KB, and is either NB or NB-1, or N if N <= NB.
+*
+*  SLASYF is an auxiliary routine called by SSYTRF. It uses blocked code
+*  (calling Level 3 BLAS) to update the submatrix A11 (if UPLO = 'U') or
+*  A22 (if UPLO = 'L').
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the upper or lower triangular part of the
+*          symmetric matrix A is stored:
+*          = 'U':  Upper triangular
+*          = 'L':  Lower triangular
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NB      (input) INTEGER
+*          The maximum number of columns of the matrix A that should be
+*          factored.  NB should be at least 2 to allow for 2-by-2 pivot
+*          blocks.
+*
+*  KB      (output) INTEGER
+*          The number of columns of A that were actually factored.
+*          KB is either NB-1 or NB, or N if N <= NB.
+*
+*  A       (input/output) REAL array, dimension (LDA,N)
+*          On entry, the symmetric matrix A.  If UPLO = 'U', the leading
+*          n-by-n upper triangular part of A contains the upper
+*          triangular part of the matrix A, and the strictly lower
+*          triangular part of A is not referenced.  If UPLO = 'L', the
+*          leading n-by-n lower triangular part of A contains the lower
+*          triangular part of the matrix A, and the strictly upper
+*          triangular part of A is not referenced.
+*          On exit, A contains details of the partial factorization.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  IPIV    (output) INTEGER array, dimension (N)
+*          Details of the interchanges and the block structure of D.
+*          If UPLO = 'U', only the last KB elements of IPIV are set;
+*          if UPLO = 'L', only the first KB elements are set.
+*
+*          If IPIV(k) > 0, then rows and columns k and IPIV(k) were
+*          interchanged and D(k,k) is a 1-by-1 diagonal block.
+*          If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and
+*          columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k)
+*          is a 2-by-2 diagonal block.  If UPLO = 'L' and IPIV(k) =
+*          IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were
+*          interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block.
+*
+*  W       (workspace) REAL array, dimension (LDW,NB)
+*
+*  LDW     (input) INTEGER
+*          The leading dimension of the array W.  LDW >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          > 0: if INFO = k, D(k,k) is exactly zero.  The factorization
+*               has been completed, but the block diagonal matrix D is
+*               exactly singular.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+      REAL               EIGHT, SEVTEN
+      PARAMETER          ( EIGHT = 8.0E+0, SEVTEN = 17.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            IMAX, J, JB, JJ, JMAX, JP, K, KK, KKW, KP,
+     $                   KSTEP, KW
+      REAL               ABSAKK, ALPHA, COLMAX, D11, D21, D22, R1,
+     $                   ROWMAX, T
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ISAMAX
+      EXTERNAL           LSAME, ISAMAX
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SCOPY, SGEMM, SGEMV, SSCAL, SSWAP
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+*
+*     Initialize ALPHA for use in choosing pivot block size.
+*
+      ALPHA = ( ONE+SQRT( SEVTEN ) ) / EIGHT
+*
+      IF( LSAME( UPLO, 'U' ) ) THEN
+*
+*        Factorize the trailing columns of A using the upper triangle
+*        of A and working backwards, and compute the matrix W = U12*D
+*        for use in updating A11
+*
+*        K is the main loop index, decreasing from N in steps of 1 or 2
+*
+*        KW is the column of W which corresponds to column K of A
+*
+         K = N
+   10    CONTINUE
+         KW = NB + K - N
+*
+*        Exit from loop
+*
+         IF( ( K.LE.N-NB+1 .AND. NB.LT.N ) .OR. K.LT.1 )
+     $      GO TO 30
+*
+*        Copy column K of A to column KW of W and update it
+*
+         CALL SCOPY( K, A( 1, K ), 1, W( 1, KW ), 1 )
+         IF( K.LT.N )
+     $      CALL SGEMV( 'No transpose', K, N-K, -ONE, A( 1, K+1 ), LDA,
+     $                  W( K, KW+1 ), LDW, ONE, W( 1, KW ), 1 )
+*
+         KSTEP = 1
+*
+*        Determine rows and columns to be interchanged and whether
+*        a 1-by-1 or 2-by-2 pivot block will be used
+*
+         ABSAKK = ABS( W( K, KW ) )
+*
+*        IMAX is the row-index of the largest off-diagonal element in
+*        column K, and COLMAX is its absolute value
+*
+         IF( K.GT.1 ) THEN
+            IMAX = ISAMAX( K-1, W( 1, KW ), 1 )
+            COLMAX = ABS( W( IMAX, KW ) )
+         ELSE
+            COLMAX = ZERO
+         END IF
+*
+         IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN
+*
+*           Column K is zero: set INFO and continue
+*
+            IF( INFO.EQ.0 )
+     $         INFO = K
+            KP = K
+         ELSE
+            IF( ABSAKK.GE.ALPHA*COLMAX ) THEN
+*
+*              no interchange, use 1-by-1 pivot block
+*
+               KP = K
+            ELSE
+*
+*              Copy column IMAX to column KW-1 of W and update it
+*
+               CALL SCOPY( IMAX, A( 1, IMAX ), 1, W( 1, KW-1 ), 1 )
+               CALL SCOPY( K-IMAX, A( IMAX, IMAX+1 ), LDA,
+     $                     W( IMAX+1, KW-1 ), 1 )
+               IF( K.LT.N )
+     $            CALL SGEMV( 'No transpose', K, N-K, -ONE, A( 1, K+1 ),
+     $                        LDA, W( IMAX, KW+1 ), LDW, ONE,
+     $                        W( 1, KW-1 ), 1 )
+*
+*              JMAX is the column-index of the largest off-diagonal
+*              element in row IMAX, and ROWMAX is its absolute value
+*
+               JMAX = IMAX + ISAMAX( K-IMAX, W( IMAX+1, KW-1 ), 1 )
+               ROWMAX = ABS( W( JMAX, KW-1 ) )
+               IF( IMAX.GT.1 ) THEN
+                  JMAX = ISAMAX( IMAX-1, W( 1, KW-1 ), 1 )
+                  ROWMAX = MAX( ROWMAX, ABS( W( JMAX, KW-1 ) ) )
+               END IF
+*
+               IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN
+*
+*                 no interchange, use 1-by-1 pivot block
+*
+                  KP = K
+               ELSE IF( ABS( W( IMAX, KW-1 ) ).GE.ALPHA*ROWMAX ) THEN
+*
+*                 interchange rows and columns K and IMAX, use 1-by-1
+*                 pivot block
+*
+                  KP = IMAX
+*
+*                 copy column KW-1 of W to column KW
+*
+                  CALL SCOPY( K, W( 1, KW-1 ), 1, W( 1, KW ), 1 )
+               ELSE
+*
+*                 interchange rows and columns K-1 and IMAX, use 2-by-2
+*                 pivot block
+*
+                  KP = IMAX
+                  KSTEP = 2
+               END IF
+            END IF
+*
+            KK = K - KSTEP + 1
+            KKW = NB + KK - N
+*
+*           Updated column KP is already stored in column KKW of W
+*
+            IF( KP.NE.KK ) THEN
+*
+*              Copy non-updated column KK to column KP
+*
+               A( KP, K ) = A( KK, K )
+               CALL SCOPY( K-1-KP, A( KP+1, KK ), 1, A( KP, KP+1 ),
+     $                     LDA )
+               CALL SCOPY( KP, A( 1, KK ), 1, A( 1, KP ), 1 )
+*
+*              Interchange rows KK and KP in last KK columns of A and W
+*
+               CALL SSWAP( N-KK+1, A( KK, KK ), LDA, A( KP, KK ), LDA )
+               CALL SSWAP( N-KK+1, W( KK, KKW ), LDW, W( KP, KKW ),
+     $                     LDW )
+            END IF
+*
+            IF( KSTEP.EQ.1 ) THEN
+*
+*              1-by-1 pivot block D(k): column KW of W now holds
+*
+*              W(k) = U(k)*D(k)
+*
+*              where U(k) is the k-th column of U
+*
+*              Store U(k) in column k of A
+*
+               CALL SCOPY( K, W( 1, KW ), 1, A( 1, K ), 1 )
+               R1 = ONE / A( K, K )
+               CALL SSCAL( K-1, R1, A( 1, K ), 1 )
+            ELSE
+*
+*              2-by-2 pivot block D(k): columns KW and KW-1 of W now
+*              hold
+*
+*              ( W(k-1) W(k) ) = ( U(k-1) U(k) )*D(k)
+*
+*              where U(k) and U(k-1) are the k-th and (k-1)-th columns
+*              of U
+*
+               IF( K.GT.2 ) THEN
+*
+*                 Store U(k) and U(k-1) in columns k and k-1 of A
+*
+                  D21 = W( K-1, KW )
+                  D11 = W( K, KW ) / D21
+                  D22 = W( K-1, KW-1 ) / D21
+                  T = ONE / ( D11*D22-ONE )
+                  D21 = T / D21
+                  DO 20 J = 1, K - 2
+                     A( J, K-1 ) = D21*( D11*W( J, KW-1 )-W( J, KW ) )
+                     A( J, K ) = D21*( D22*W( J, KW )-W( J, KW-1 ) )
+   20             CONTINUE
+               END IF
+*
+*              Copy D(k) to A
+*
+               A( K-1, K-1 ) = W( K-1, KW-1 )
+               A( K-1, K ) = W( K-1, KW )
+               A( K, K ) = W( K, KW )
+            END IF
+         END IF
+*
+*        Store details of the interchanges in IPIV
+*
+         IF( KSTEP.EQ.1 ) THEN
+            IPIV( K ) = KP
+         ELSE
+            IPIV( K ) = -KP
+            IPIV( K-1 ) = -KP
+         END IF
+*
+*        Decrease K and return to the start of the main loop
+*
+         K = K - KSTEP
+         GO TO 10
+*
+   30    CONTINUE
+*
+*        Update the upper triangle of A11 (= A(1:k,1:k)) as
+*
+*        A11 := A11 - U12*D*U12' = A11 - U12*W'
+*
+*        computing blocks of NB columns at a time
+*
+         DO 50 J = ( ( K-1 ) / NB )*NB + 1, 1, -NB
+            JB = MIN( NB, K-J+1 )
+*
+*           Update the upper triangle of the diagonal block
+*
+            DO 40 JJ = J, J + JB - 1
+               CALL SGEMV( 'No transpose', JJ-J+1, N-K, -ONE,
+     $                     A( J, K+1 ), LDA, W( JJ, KW+1 ), LDW, ONE,
+     $                     A( J, JJ ), 1 )
+   40       CONTINUE
+*
+*           Update the rectangular superdiagonal block
+*
+            CALL SGEMM( 'No transpose', 'Transpose', J-1, JB, N-K, -ONE,
+     $                  A( 1, K+1 ), LDA, W( J, KW+1 ), LDW, ONE,
+     $                  A( 1, J ), LDA )
+   50    CONTINUE
+*
+*        Put U12 in standard form by partially undoing the interchanges
+*        in columns k+1:n
+*
+         J = K + 1
+   60    CONTINUE
+         JJ = J
+         JP = IPIV( J )
+         IF( JP.LT.0 ) THEN
+            JP = -JP
+            J = J + 1
+         END IF
+         J = J + 1
+         IF( JP.NE.JJ .AND. J.LE.N )
+     $      CALL SSWAP( N-J+1, A( JP, J ), LDA, A( JJ, J ), LDA )
+         IF( J.LE.N )
+     $      GO TO 60
+*
+*        Set KB to the number of columns factorized
+*
+         KB = N - K
+*
+      ELSE
+*
+*        Factorize the leading columns of A using the lower triangle
+*        of A and working forwards, and compute the matrix W = L21*D
+*        for use in updating A22
+*
+*        K is the main loop index, increasing from 1 in steps of 1 or 2
+*
+         K = 1
+   70    CONTINUE
+*
+*        Exit from loop
+*
+         IF( ( K.GE.NB .AND. NB.LT.N ) .OR. K.GT.N )
+     $      GO TO 90
+*
+*        Copy column K of A to column K of W and update it
+*
+         CALL SCOPY( N-K+1, A( K, K ), 1, W( K, K ), 1 )
+         CALL SGEMV( 'No transpose', N-K+1, K-1, -ONE, A( K, 1 ), LDA,
+     $               W( K, 1 ), LDW, ONE, W( K, K ), 1 )
+*
+         KSTEP = 1
+*
+*        Determine rows and columns to be interchanged and whether
+*        a 1-by-1 or 2-by-2 pivot block will be used
+*
+         ABSAKK = ABS( W( K, K ) )
+*
+*        IMAX is the row-index of the largest off-diagonal element in
+*        column K, and COLMAX is its absolute value
+*
+         IF( K.LT.N ) THEN
+            IMAX = K + ISAMAX( N-K, W( K+1, K ), 1 )
+            COLMAX = ABS( W( IMAX, K ) )
+         ELSE
+            COLMAX = ZERO
+         END IF
+*
+         IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN
+*
+*           Column K is zero: set INFO and continue
+*
+            IF( INFO.EQ.0 )
+     $         INFO = K
+            KP = K
+         ELSE
+            IF( ABSAKK.GE.ALPHA*COLMAX ) THEN
+*
+*              no interchange, use 1-by-1 pivot block
+*
+               KP = K
+            ELSE
+*
+*              Copy column IMAX to column K+1 of W and update it
+*
+               CALL SCOPY( IMAX-K, A( IMAX, K ), LDA, W( K, K+1 ), 1 )
+               CALL SCOPY( N-IMAX+1, A( IMAX, IMAX ), 1, W( IMAX, K+1 ),
+     $                     1 )
+               CALL SGEMV( 'No transpose', N-K+1, K-1, -ONE, A( K, 1 ),
+     $                     LDA, W( IMAX, 1 ), LDW, ONE, W( K, K+1 ), 1 )
+*
+*              JMAX is the column-index of the largest off-diagonal
+*              element in row IMAX, and ROWMAX is its absolute value
+*
+               JMAX = K - 1 + ISAMAX( IMAX-K, W( K, K+1 ), 1 )
+               ROWMAX = ABS( W( JMAX, K+1 ) )
+               IF( IMAX.LT.N ) THEN
+                  JMAX = IMAX + ISAMAX( N-IMAX, W( IMAX+1, K+1 ), 1 )
+                  ROWMAX = MAX( ROWMAX, ABS( W( JMAX, K+1 ) ) )
+               END IF
+*
+               IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN
+*
+*                 no interchange, use 1-by-1 pivot block
+*
+                  KP = K
+               ELSE IF( ABS( W( IMAX, K+1 ) ).GE.ALPHA*ROWMAX ) THEN
+*
+*                 interchange rows and columns K and IMAX, use 1-by-1
+*                 pivot block
+*
+                  KP = IMAX
+*
+*                 copy column K+1 of W to column K
+*
+                  CALL SCOPY( N-K+1, W( K, K+1 ), 1, W( K, K ), 1 )
+               ELSE
+*
+*                 interchange rows and columns K+1 and IMAX, use 2-by-2
+*                 pivot block
+*
+                  KP = IMAX
+                  KSTEP = 2
+               END IF
+            END IF
+*
+            KK = K + KSTEP - 1
+*
+*           Updated column KP is already stored in column KK of W
+*
+            IF( KP.NE.KK ) THEN
+*
+*              Copy non-updated column KK to column KP
+*
+               A( KP, K ) = A( KK, K )
+               CALL SCOPY( KP-K-1, A( K+1, KK ), 1, A( KP, K+1 ), LDA )
+               CALL SCOPY( N-KP+1, A( KP, KK ), 1, A( KP, KP ), 1 )
+*
+*              Interchange rows KK and KP in first KK columns of A and W
+*
+               CALL SSWAP( KK, A( KK, 1 ), LDA, A( KP, 1 ), LDA )
+               CALL SSWAP( KK, W( KK, 1 ), LDW, W( KP, 1 ), LDW )
+            END IF
+*
+            IF( KSTEP.EQ.1 ) THEN
+*
+*              1-by-1 pivot block D(k): column k of W now holds
+*
+*              W(k) = L(k)*D(k)
+*
+*              where L(k) is the k-th column of L
+*
+*              Store L(k) in column k of A
+*
+               CALL SCOPY( N-K+1, W( K, K ), 1, A( K, K ), 1 )
+               IF( K.LT.N ) THEN
+                  R1 = ONE / A( K, K )
+                  CALL SSCAL( N-K, R1, A( K+1, K ), 1 )
+               END IF
+            ELSE
+*
+*              2-by-2 pivot block D(k): columns k and k+1 of W now hold
+*
+*              ( W(k) W(k+1) ) = ( L(k) L(k+1) )*D(k)
+*
+*              where L(k) and L(k+1) are the k-th and (k+1)-th columns
+*              of L
+*
+               IF( K.LT.N-1 ) THEN
+*
+*                 Store L(k) and L(k+1) in columns k and k+1 of A
+*
+                  D21 = W( K+1, K )
+                  D11 = W( K+1, K+1 ) / D21
+                  D22 = W( K, K ) / D21
+                  T = ONE / ( D11*D22-ONE )
+                  D21 = T / D21
+                  DO 80 J = K + 2, N
+                     A( J, K ) = D21*( D11*W( J, K )-W( J, K+1 ) )
+                     A( J, K+1 ) = D21*( D22*W( J, K+1 )-W( J, K ) )
+   80             CONTINUE
+               END IF
+*
+*              Copy D(k) to A
+*
+               A( K, K ) = W( K, K )
+               A( K+1, K ) = W( K+1, K )
+               A( K+1, K+1 ) = W( K+1, K+1 )
+            END IF
+         END IF
+*
+*        Store details of the interchanges in IPIV
+*
+         IF( KSTEP.EQ.1 ) THEN
+            IPIV( K ) = KP
+         ELSE
+            IPIV( K ) = -KP
+            IPIV( K+1 ) = -KP
+         END IF
+*
+*        Increase K and return to the start of the main loop
+*
+         K = K + KSTEP
+         GO TO 70
+*
+   90    CONTINUE
+*
+*        Update the lower triangle of A22 (= A(k:n,k:n)) as
+*
+*        A22 := A22 - L21*D*L21' = A22 - L21*W'
+*
+*        computing blocks of NB columns at a time
+*
+         DO 110 J = K, N, NB
+            JB = MIN( NB, N-J+1 )
+*
+*           Update the lower triangle of the diagonal block
+*
+            DO 100 JJ = J, J + JB - 1
+               CALL SGEMV( 'No transpose', J+JB-JJ, K-1, -ONE,
+     $                     A( JJ, 1 ), LDA, W( JJ, 1 ), LDW, ONE,
+     $                     A( JJ, JJ ), 1 )
+  100       CONTINUE
+*
+*           Update the rectangular subdiagonal block
+*
+            IF( J+JB.LE.N )
+     $         CALL SGEMM( 'No transpose', 'Transpose', N-J-JB+1, JB,
+     $                     K-1, -ONE, A( J+JB, 1 ), LDA, W( J, 1 ), LDW,
+     $                     ONE, A( J+JB, J ), LDA )
+  110    CONTINUE
+*
+*        Put L21 in standard form by partially undoing the interchanges
+*        in columns 1:k-1
+*
+         J = K - 1
+  120    CONTINUE
+         JJ = J
+         JP = IPIV( J )
+         IF( JP.LT.0 ) THEN
+            JP = -JP
+            J = J - 1
+         END IF
+         J = J - 1
+         IF( JP.NE.JJ .AND. J.GE.1 )
+     $      CALL SSWAP( J, A( JP, 1 ), LDA, A( JJ, 1 ), LDA )
+         IF( J.GE.1 )
+     $      GO TO 120
+*
+*        Set KB to the number of columns factorized
+*
+         KB = K - 1
+*
+      END IF
+      RETURN
+*
+*     End of SLASYF
+*
+      END
diff --git a/costa/native/external/lapack/slatbs.f b/costa/native/external/lapack/slatbs.f
new file mode 100644
index 000000000..8d37a9e56
--- /dev/null
+++ b/costa/native/external/lapack/slatbs.f
@@ -0,0 +1,724 @@
+      SUBROUTINE SLATBS( UPLO, TRANS, DIAG, NORMIN, N, KD, AB, LDAB, X,
+     $                   SCALE, CNORM, INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIAG, NORMIN, TRANS, UPLO
+      INTEGER            INFO, KD, LDAB, N
+      REAL               SCALE
+*     ..
+*     .. Array Arguments ..
+      REAL               AB( LDAB, * ), CNORM( * ), X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLATBS solves one of the triangular systems
+*
+*     A *x = s*b  or  A'*x = s*b
+*
+*  with scaling to prevent overflow, where A is an upper or lower
+*  triangular band matrix.  Here A' denotes the transpose of A, x and b
+*  are n-element vectors, and s is a scaling factor, usually less than
+*  or equal to 1, chosen so that the components of x will be less than
+*  the overflow threshold.  If the unscaled problem will not cause
+*  overflow, the Level 2 BLAS routine STBSV is called.  If the matrix A
+*  is singular (A(j,j) = 0 for some j), then s is set to 0 and a
+*  non-trivial solution to A*x = 0 is returned.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the matrix A is upper or lower triangular.
+*          = 'U':  Upper triangular
+*          = 'L':  Lower triangular
+*
+*  TRANS   (input) CHARACTER*1
+*          Specifies the operation applied to A.
+*          = 'N':  Solve A * x = s*b  (No transpose)
+*          = 'T':  Solve A'* x = s*b  (Transpose)
+*          = 'C':  Solve A'* x = s*b  (Conjugate transpose = Transpose)
+*
+*  DIAG    (input) CHARACTER*1
+*          Specifies whether or not the matrix A is unit triangular.
+*          = 'N':  Non-unit triangular
+*          = 'U':  Unit triangular
+*
+*  NORMIN  (input) CHARACTER*1
+*          Specifies whether CNORM has been set or not.
+*          = 'Y':  CNORM contains the column norms on entry
+*          = 'N':  CNORM is not set on entry.  On exit, the norms will
+*                  be computed and stored in CNORM.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  KD      (input) INTEGER
+*          The number of subdiagonals or superdiagonals in the
+*          triangular matrix A.  KD >= 0.
+*
+*  AB      (input) REAL array, dimension (LDAB,N)
+*          The upper or lower triangular band matrix A, stored in the
+*          first KD+1 rows of the array. The j-th column of A is stored
+*          in the j-th column of the array AB as follows:
+*          if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j;
+*          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+kd).
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KD+1.
+*
+*  X       (input/output) REAL array, dimension (N)
+*          On entry, the right hand side b of the triangular system.
+*          On exit, X is overwritten by the solution vector x.
+*
+*  SCALE   (output) REAL
+*          The scaling factor s for the triangular system
+*             A * x = s*b  or  A'* x = s*b.
+*          If SCALE = 0, the matrix A is singular or badly scaled, and
+*          the vector x is an exact or approximate solution to A*x = 0.
+*
+*  CNORM   (input or output) REAL array, dimension (N)
+*
+*          If NORMIN = 'Y', CNORM is an input argument and CNORM(j)
+*          contains the norm of the off-diagonal part of the j-th column
+*          of A.  If TRANS = 'N', CNORM(j) must be greater than or equal
+*          to the infinity-norm, and if TRANS = 'T' or 'C', CNORM(j)
+*          must be greater than or equal to the 1-norm.
+*
+*          If NORMIN = 'N', CNORM is an output argument and CNORM(j)
+*          returns the 1-norm of the offdiagonal part of the j-th column
+*          of A.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -k, the k-th argument had an illegal value
+*
+*  Further Details
+*  ======= =======
+*
+*  A rough bound on x is computed; if that is less than overflow, STBSV
+*  is called, otherwise, specific code is used which checks for possible
+*  overflow or divide-by-zero at every operation.
+*
+*  A columnwise scheme is used for solving A*x = b.  The basic algorithm
+*  if A is lower triangular is
+*
+*       x[1:n] := b[1:n]
+*       for j = 1, ..., n
+*            x(j) := x(j) / A(j,j)
+*            x[j+1:n] := x[j+1:n] - x(j) * A[j+1:n,j]
+*       end
+*
+*  Define bounds on the components of x after j iterations of the loop:
+*     M(j) = bound on x[1:j]
+*     G(j) = bound on x[j+1:n]
+*  Initially, let M(0) = 0 and G(0) = max{x(i), i=1,...,n}.
+*
+*  Then for iteration j+1 we have
+*     M(j+1) <= G(j) / | A(j+1,j+1) |
+*     G(j+1) <= G(j) + M(j+1) * | A[j+2:n,j+1] |
+*            <= G(j) ( 1 + CNORM(j+1) / | A(j+1,j+1) | )
+*
+*  where CNORM(j+1) is greater than or equal to the infinity-norm of
+*  column j+1 of A, not counting the diagonal.  Hence
+*
+*     G(j) <= G(0) product ( 1 + CNORM(i) / | A(i,i) | )
+*                  1<=i<=j
+*  and
+*
+*     |x(j)| <= ( G(0) / |A(j,j)| ) product ( 1 + CNORM(i) / |A(i,i)| )
+*                                   1<=i< j
+*
+*  Since |x(j)| <= M(j), we use the Level 2 BLAS routine STBSV if the
+*  reciprocal of the largest M(j), j=1,..,n, is larger than
+*  max(underflow, 1/overflow).
+*
+*  The bound on x(j) is also used to determine when a step in the
+*  columnwise method can be performed without fear of overflow.  If
+*  the computed bound is greater than a large constant, x is scaled to
+*  prevent overflow, but if the bound overflows, x is set to 0, x(j) to
+*  1, and scale to 0, and a non-trivial solution to A*x = 0 is found.
+*
+*  Similarly, a row-wise scheme is used to solve A'*x = b.  The basic
+*  algorithm for A upper triangular is
+*
+*       for j = 1, ..., n
+*            x(j) := ( b(j) - A[1:j-1,j]' * x[1:j-1] ) / A(j,j)
+*       end
+*
+*  We simultaneously compute two bounds
+*       G(j) = bound on ( b(i) - A[1:i-1,i]' * x[1:i-1] ), 1<=i<=j
+*       M(j) = bound on x(i), 1<=i<=j
+*
+*  The initial values are G(0) = 0, M(0) = max{b(i), i=1,..,n}, and we
+*  add the constraint G(j) >= G(j-1) and M(j) >= M(j-1) for j >= 1.
+*  Then the bound on x(j) is
+*
+*       M(j) <= M(j-1) * ( 1 + CNORM(j) ) / | A(j,j) |
+*
+*            <= M(0) * product ( ( 1 + CNORM(i) ) / |A(i,i)| )
+*                      1<=i<=j
+*
+*  and we can safely call STBSV if 1/M(n) and 1/G(n) are both greater
+*  than max(underflow, 1/overflow).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, HALF, ONE
+      PARAMETER          ( ZERO = 0.0E+0, HALF = 0.5E+0, ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOTRAN, NOUNIT, UPPER
+      INTEGER            I, IMAX, J, JFIRST, JINC, JLAST, JLEN, MAIND
+      REAL               BIGNUM, GROW, REC, SMLNUM, SUMJ, TJJ, TJJS,
+     $                   TMAX, TSCAL, USCAL, XBND, XJ, XMAX
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ISAMAX
+      REAL               SASUM, SDOT, SLAMCH
+      EXTERNAL           LSAME, ISAMAX, SASUM, SDOT, SLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SAXPY, SSCAL, STBSV, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      NOTRAN = LSAME( TRANS, 'N' )
+      NOUNIT = LSAME( DIAG, 'N' )
+*
+*     Test the input parameters.
+*
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT.
+     $         LSAME( TRANS, 'C' ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN
+         INFO = -3
+      ELSE IF( .NOT.LSAME( NORMIN, 'Y' ) .AND. .NOT.
+     $         LSAME( NORMIN, 'N' ) ) THEN
+         INFO = -4
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( KD.LT.0 ) THEN
+         INFO = -6
+      ELSE IF( LDAB.LT.KD+1 ) THEN
+         INFO = -8
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SLATBS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Determine machine dependent parameters to control overflow.
+*
+      SMLNUM = SLAMCH( 'Safe minimum' ) / SLAMCH( 'Precision' )
+      BIGNUM = ONE / SMLNUM
+      SCALE = ONE
+*
+      IF( LSAME( NORMIN, 'N' ) ) THEN
+*
+*        Compute the 1-norm of each column, not including the diagonal.
+*
+         IF( UPPER ) THEN
+*
+*           A is upper triangular.
+*
+            DO 10 J = 1, N
+               JLEN = MIN( KD, J-1 )
+               CNORM( J ) = SASUM( JLEN, AB( KD+1-JLEN, J ), 1 )
+   10       CONTINUE
+         ELSE
+*
+*           A is lower triangular.
+*
+            DO 20 J = 1, N
+               JLEN = MIN( KD, N-J )
+               IF( JLEN.GT.0 ) THEN
+                  CNORM( J ) = SASUM( JLEN, AB( 2, J ), 1 )
+               ELSE
+                  CNORM( J ) = ZERO
+               END IF
+   20       CONTINUE
+         END IF
+      END IF
+*
+*     Scale the column norms by TSCAL if the maximum element in CNORM is
+*     greater than BIGNUM.
+*
+      IMAX = ISAMAX( N, CNORM, 1 )
+      TMAX = CNORM( IMAX )
+      IF( TMAX.LE.BIGNUM ) THEN
+         TSCAL = ONE
+      ELSE
+         TSCAL = ONE / ( SMLNUM*TMAX )
+         CALL SSCAL( N, TSCAL, CNORM, 1 )
+      END IF
+*
+*     Compute a bound on the computed solution vector to see if the
+*     Level 2 BLAS routine STBSV can be used.
+*
+      J = ISAMAX( N, X, 1 )
+      XMAX = ABS( X( J ) )
+      XBND = XMAX
+      IF( NOTRAN ) THEN
+*
+*        Compute the growth in A * x = b.
+*
+         IF( UPPER ) THEN
+            JFIRST = N
+            JLAST = 1
+            JINC = -1
+            MAIND = KD + 1
+         ELSE
+            JFIRST = 1
+            JLAST = N
+            JINC = 1
+            MAIND = 1
+         END IF
+*
+         IF( TSCAL.NE.ONE ) THEN
+            GROW = ZERO
+            GO TO 50
+         END IF
+*
+         IF( NOUNIT ) THEN
+*
+*           A is non-unit triangular.
+*
+*           Compute GROW = 1/G(j) and XBND = 1/M(j).
+*           Initially, G(0) = max{x(i), i=1,...,n}.
+*
+            GROW = ONE / MAX( XBND, SMLNUM )
+            XBND = GROW
+            DO 30 J = JFIRST, JLAST, JINC
+*
+*              Exit the loop if the growth factor is too small.
+*
+               IF( GROW.LE.SMLNUM )
+     $            GO TO 50
+*
+*              M(j) = G(j-1) / abs(A(j,j))
+*
+               TJJ = ABS( AB( MAIND, J ) )
+               XBND = MIN( XBND, MIN( ONE, TJJ )*GROW )
+               IF( TJJ+CNORM( J ).GE.SMLNUM ) THEN
+*
+*                 G(j) = G(j-1)*( 1 + CNORM(j) / abs(A(j,j)) )
+*
+                  GROW = GROW*( TJJ / ( TJJ+CNORM( J ) ) )
+               ELSE
+*
+*                 G(j) could overflow, set GROW to 0.
+*
+                  GROW = ZERO
+               END IF
+   30       CONTINUE
+            GROW = XBND
+         ELSE
+*
+*           A is unit triangular.
+*
+*           Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}.
+*
+            GROW = MIN( ONE, ONE / MAX( XBND, SMLNUM ) )
+            DO 40 J = JFIRST, JLAST, JINC
+*
+*              Exit the loop if the growth factor is too small.
+*
+               IF( GROW.LE.SMLNUM )
+     $            GO TO 50
+*
+*              G(j) = G(j-1)*( 1 + CNORM(j) )
+*
+               GROW = GROW*( ONE / ( ONE+CNORM( J ) ) )
+   40       CONTINUE
+         END IF
+   50    CONTINUE
+*
+      ELSE
+*
+*        Compute the growth in A' * x = b.
+*
+         IF( UPPER ) THEN
+            JFIRST = 1
+            JLAST = N
+            JINC = 1
+            MAIND = KD + 1
+         ELSE
+            JFIRST = N
+            JLAST = 1
+            JINC = -1
+            MAIND = 1
+         END IF
+*
+         IF( TSCAL.NE.ONE ) THEN
+            GROW = ZERO
+            GO TO 80
+         END IF
+*
+         IF( NOUNIT ) THEN
+*
+*           A is non-unit triangular.
+*
+*           Compute GROW = 1/G(j) and XBND = 1/M(j).
+*           Initially, M(0) = max{x(i), i=1,...,n}.
+*
+            GROW = ONE / MAX( XBND, SMLNUM )
+            XBND = GROW
+            DO 60 J = JFIRST, JLAST, JINC
+*
+*              Exit the loop if the growth factor is too small.
+*
+               IF( GROW.LE.SMLNUM )
+     $            GO TO 80
+*
+*              G(j) = max( G(j-1), M(j-1)*( 1 + CNORM(j) ) )
+*
+               XJ = ONE + CNORM( J )
+               GROW = MIN( GROW, XBND / XJ )
+*
+*              M(j) = M(j-1)*( 1 + CNORM(j) ) / abs(A(j,j))
+*
+               TJJ = ABS( AB( MAIND, J ) )
+               IF( XJ.GT.TJJ )
+     $            XBND = XBND*( TJJ / XJ )
+   60       CONTINUE
+            GROW = MIN( GROW, XBND )
+         ELSE
+*
+*           A is unit triangular.
+*
+*           Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}.
+*
+            GROW = MIN( ONE, ONE / MAX( XBND, SMLNUM ) )
+            DO 70 J = JFIRST, JLAST, JINC
+*
+*              Exit the loop if the growth factor is too small.
+*
+               IF( GROW.LE.SMLNUM )
+     $            GO TO 80
+*
+*              G(j) = ( 1 + CNORM(j) )*G(j-1)
+*
+               XJ = ONE + CNORM( J )
+               GROW = GROW / XJ
+   70       CONTINUE
+         END IF
+   80    CONTINUE
+      END IF
+*
+      IF( ( GROW*TSCAL ).GT.SMLNUM ) THEN
+*
+*        Use the Level 2 BLAS solve if the reciprocal of the bound on
+*        elements of X is not too small.
+*
+         CALL STBSV( UPLO, TRANS, DIAG, N, KD, AB, LDAB, X, 1 )
+      ELSE
+*
+*        Use a Level 1 BLAS solve, scaling intermediate results.
+*
+         IF( XMAX.GT.BIGNUM ) THEN
+*
+*           Scale X so that its components are less than or equal to
+*           BIGNUM in absolute value.
+*
+            SCALE = BIGNUM / XMAX
+            CALL SSCAL( N, SCALE, X, 1 )
+            XMAX = BIGNUM
+         END IF
+*
+         IF( NOTRAN ) THEN
+*
+*           Solve A * x = b
+*
+            DO 100 J = JFIRST, JLAST, JINC
+*
+*              Compute x(j) = b(j) / A(j,j), scaling x if necessary.
+*
+               XJ = ABS( X( J ) )
+               IF( NOUNIT ) THEN
+                  TJJS = AB( MAIND, J )*TSCAL
+               ELSE
+                  TJJS = TSCAL
+                  IF( TSCAL.EQ.ONE )
+     $               GO TO 95
+               END IF
+                  TJJ = ABS( TJJS )
+                  IF( TJJ.GT.SMLNUM ) THEN
+*
+*                    abs(A(j,j)) > SMLNUM:
+*
+                     IF( TJJ.LT.ONE ) THEN
+                        IF( XJ.GT.TJJ*BIGNUM ) THEN
+*
+*                          Scale x by 1/b(j).
+*
+                           REC = ONE / XJ
+                           CALL SSCAL( N, REC, X, 1 )
+                           SCALE = SCALE*REC
+                           XMAX = XMAX*REC
+                        END IF
+                     END IF
+                     X( J ) = X( J ) / TJJS
+                     XJ = ABS( X( J ) )
+                  ELSE IF( TJJ.GT.ZERO ) THEN
+*
+*                    0 < abs(A(j,j)) <= SMLNUM:
+*
+                     IF( XJ.GT.TJJ*BIGNUM ) THEN
+*
+*                       Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM
+*                       to avoid overflow when dividing by A(j,j).
+*
+                        REC = ( TJJ*BIGNUM ) / XJ
+                        IF( CNORM( J ).GT.ONE ) THEN
+*
+*                          Scale by 1/CNORM(j) to avoid overflow when
+*                          multiplying x(j) times column j.
+*
+                           REC = REC / CNORM( J )
+                        END IF
+                        CALL SSCAL( N, REC, X, 1 )
+                        SCALE = SCALE*REC
+                        XMAX = XMAX*REC
+                     END IF
+                     X( J ) = X( J ) / TJJS
+                     XJ = ABS( X( J ) )
+                  ELSE
+*
+*                    A(j,j) = 0:  Set x(1:n) = 0, x(j) = 1, and
+*                    scale = 0, and compute a solution to A*x = 0.
+*
+                     DO 90 I = 1, N
+                        X( I ) = ZERO
+   90                CONTINUE
+                     X( J ) = ONE
+                     XJ = ONE
+                     SCALE = ZERO
+                     XMAX = ZERO
+                  END IF
+   95          CONTINUE
+*
+*              Scale x if necessary to avoid overflow when adding a
+*              multiple of column j of A.
+*
+               IF( XJ.GT.ONE ) THEN
+                  REC = ONE / XJ
+                  IF( CNORM( J ).GT.( BIGNUM-XMAX )*REC ) THEN
+*
+*                    Scale x by 1/(2*abs(x(j))).
+*
+                     REC = REC*HALF
+                     CALL SSCAL( N, REC, X, 1 )
+                     SCALE = SCALE*REC
+                  END IF
+               ELSE IF( XJ*CNORM( J ).GT.( BIGNUM-XMAX ) ) THEN
+*
+*                 Scale x by 1/2.
+*
+                  CALL SSCAL( N, HALF, X, 1 )
+                  SCALE = SCALE*HALF
+               END IF
+*
+               IF( UPPER ) THEN
+                  IF( J.GT.1 ) THEN
+*
+*                    Compute the update
+*                       x(max(1,j-kd):j-1) := x(max(1,j-kd):j-1) -
+*                                             x(j)* A(max(1,j-kd):j-1,j)
+*
+                     JLEN = MIN( KD, J-1 )
+                     CALL SAXPY( JLEN, -X( J )*TSCAL,
+     $                           AB( KD+1-JLEN, J ), 1, X( J-JLEN ), 1 )
+                     I = ISAMAX( J-1, X, 1 )
+                     XMAX = ABS( X( I ) )
+                  END IF
+               ELSE IF( J.LT.N ) THEN
+*
+*                 Compute the update
+*                    x(j+1:min(j+kd,n)) := x(j+1:min(j+kd,n)) -
+*                                          x(j) * A(j+1:min(j+kd,n),j)
+*
+                  JLEN = MIN( KD, N-J )
+                  IF( JLEN.GT.0 )
+     $               CALL SAXPY( JLEN, -X( J )*TSCAL, AB( 2, J ), 1,
+     $                           X( J+1 ), 1 )
+                  I = J + ISAMAX( N-J, X( J+1 ), 1 )
+                  XMAX = ABS( X( I ) )
+               END IF
+  100       CONTINUE
+*
+         ELSE
+*
+*           Solve A' * x = b
+*
+            DO 140 J = JFIRST, JLAST, JINC
+*
+*              Compute x(j) = b(j) - sum A(k,j)*x(k).
+*                                    k<>j
+*
+               XJ = ABS( X( J ) )
+               USCAL = TSCAL
+               REC = ONE / MAX( XMAX, ONE )
+               IF( CNORM( J ).GT.( BIGNUM-XJ )*REC ) THEN
+*
+*                 If x(j) could overflow, scale x by 1/(2*XMAX).
+*
+                  REC = REC*HALF
+                  IF( NOUNIT ) THEN
+                     TJJS = AB( MAIND, J )*TSCAL
+                  ELSE
+                     TJJS = TSCAL
+                  END IF
+                     TJJ = ABS( TJJS )
+                     IF( TJJ.GT.ONE ) THEN
+*
+*                       Divide by A(j,j) when scaling x if A(j,j) > 1.
+*
+                        REC = MIN( ONE, REC*TJJ )
+                        USCAL = USCAL / TJJS
+                     END IF
+                  IF( REC.LT.ONE ) THEN
+                     CALL SSCAL( N, REC, X, 1 )
+                     SCALE = SCALE*REC
+                     XMAX = XMAX*REC
+                  END IF
+               END IF
+*
+               SUMJ = ZERO
+               IF( USCAL.EQ.ONE ) THEN
+*
+*                 If the scaling needed for A in the dot product is 1,
+*                 call SDOT to perform the dot product.
+*
+                  IF( UPPER ) THEN
+                     JLEN = MIN( KD, J-1 )
+                     SUMJ = SDOT( JLEN, AB( KD+1-JLEN, J ), 1,
+     $                      X( J-JLEN ), 1 )
+                  ELSE
+                     JLEN = MIN( KD, N-J )
+                     IF( JLEN.GT.0 )
+     $                  SUMJ = SDOT( JLEN, AB( 2, J ), 1, X( J+1 ), 1 )
+                  END IF
+               ELSE
+*
+*                 Otherwise, use in-line code for the dot product.
+*
+                  IF( UPPER ) THEN
+                     JLEN = MIN( KD, J-1 )
+                     DO 110 I = 1, JLEN
+                        SUMJ = SUMJ + ( AB( KD+I-JLEN, J )*USCAL )*
+     $                         X( J-JLEN-1+I )
+  110                CONTINUE
+                  ELSE
+                     JLEN = MIN( KD, N-J )
+                     DO 120 I = 1, JLEN
+                        SUMJ = SUMJ + ( AB( I+1, J )*USCAL )*X( J+I )
+  120                CONTINUE
+                  END IF
+               END IF
+*
+               IF( USCAL.EQ.TSCAL ) THEN
+*
+*                 Compute x(j) := ( x(j) - sumj ) / A(j,j) if 1/A(j,j)
+*                 was not used to scale the dotproduct.
+*
+                  X( J ) = X( J ) - SUMJ
+                  XJ = ABS( X( J ) )
+                  IF( NOUNIT ) THEN
+*
+*                    Compute x(j) = x(j) / A(j,j), scaling if necessary.
+*
+                     TJJS = AB( MAIND, J )*TSCAL
+                  ELSE
+                     TJJS = TSCAL
+                     IF( TSCAL.EQ.ONE )
+     $                  GO TO 135
+                  END IF
+                     TJJ = ABS( TJJS )
+                     IF( TJJ.GT.SMLNUM ) THEN
+*
+*                       abs(A(j,j)) > SMLNUM:
+*
+                        IF( TJJ.LT.ONE ) THEN
+                           IF( XJ.GT.TJJ*BIGNUM ) THEN
+*
+*                             Scale X by 1/abs(x(j)).
+*
+                              REC = ONE / XJ
+                              CALL SSCAL( N, REC, X, 1 )
+                              SCALE = SCALE*REC
+                              XMAX = XMAX*REC
+                           END IF
+                        END IF
+                        X( J ) = X( J ) / TJJS
+                     ELSE IF( TJJ.GT.ZERO ) THEN
+*
+*                       0 < abs(A(j,j)) <= SMLNUM:
+*
+                        IF( XJ.GT.TJJ*BIGNUM ) THEN
+*
+*                          Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM.
+*
+                           REC = ( TJJ*BIGNUM ) / XJ
+                           CALL SSCAL( N, REC, X, 1 )
+                           SCALE = SCALE*REC
+                           XMAX = XMAX*REC
+                        END IF
+                        X( J ) = X( J ) / TJJS
+                     ELSE
+*
+*                       A(j,j) = 0:  Set x(1:n) = 0, x(j) = 1, and
+*                       scale = 0, and compute a solution to A'*x = 0.
+*
+                        DO 130 I = 1, N
+                           X( I ) = ZERO
+  130                   CONTINUE
+                        X( J ) = ONE
+                        SCALE = ZERO
+                        XMAX = ZERO
+                     END IF
+  135             CONTINUE
+               ELSE
+*
+*                 Compute x(j) := x(j) / A(j,j) - sumj if the dot
+*                 product has already been divided by 1/A(j,j).
+*
+                  X( J ) = X( J ) / TJJS - SUMJ
+               END IF
+               XMAX = MAX( XMAX, ABS( X( J ) ) )
+  140       CONTINUE
+         END IF
+         SCALE = SCALE / TSCAL
+      END IF
+*
+*     Scale the column norms by 1/TSCAL for return.
+*
+      IF( TSCAL.NE.ONE ) THEN
+         CALL SSCAL( N, ONE / TSCAL, CNORM, 1 )
+      END IF
+*
+      RETURN
+*
+*     End of SLATBS
+*
+      END
diff --git a/costa/native/external/lapack/slatdf.f b/costa/native/external/lapack/slatdf.f
new file mode 100644
index 000000000..9bae7a1a1
--- /dev/null
+++ b/costa/native/external/lapack/slatdf.f
@@ -0,0 +1,238 @@
+      SUBROUTINE SLATDF( IJOB, N, Z, LDZ, RHS, RDSUM, RDSCAL, IPIV,
+     $                   JPIV )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            IJOB, LDZ, N
+      REAL               RDSCAL, RDSUM
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * ), JPIV( * )
+      REAL               RHS( * ), Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLATDF uses the LU factorization of the n-by-n matrix Z computed by
+*  SGETC2 and computes a contribution to the reciprocal Dif-estimate
+*  by solving Z * x = b for x, and choosing the r.h.s. b such that
+*  the norm of x is as large as possible. On entry RHS = b holds the
+*  contribution from earlier solved sub-systems, and on return RHS = x.
+*
+*  The factorization of Z returned by SGETC2 has the form Z = P*L*U*Q,
+*  where P and Q are permutation matrices. L is lower triangular with
+*  unit diagonal elements and U is upper triangular.
+*
+*  Arguments
+*  =========
+*
+*  IJOB    (input) INTEGER
+*          IJOB = 2: First compute an approximative null-vector e
+*              of Z using SGECON, e is normalized and solve for
+*              Zx = +-e - f with the sign giving the greater value
+*              of 2-norm(x). About 5 times as expensive as Default.
+*          IJOB .ne. 2: Local look ahead strategy where all entries of
+*              the r.h.s. b is choosen as either +1 or -1 (Default).
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix Z.
+*
+*  Z       (input) REAL array, dimension (LDZ, N)
+*          On entry, the LU part of the factorization of the n-by-n
+*          matrix Z computed by SGETC2:  Z = P * L * U * Q
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDA >= max(1, N).
+*
+*  RHS     (input/output) REAL array, dimension N.
+*          On entry, RHS contains contributions from other subsystems.
+*          On exit, RHS contains the solution of the subsystem with
+*          entries acoording to the value of IJOB (see above).
+*
+*  RDSUM   (input/output) REAL
+*          On entry, the sum of squares of computed contributions to
+*          the Dif-estimate under computation by STGSYL, where the
+*          scaling factor RDSCAL (see below) has been factored out.
+*          On exit, the corresponding sum of squares updated with the
+*          contributions from the current sub-system.
+*          If TRANS = 'T' RDSUM is not touched.
+*          NOTE: RDSUM only makes sense when STGSY2 is called by STGSYL.
+*
+*  RDSCAL  (input/output) REAL
+*          On entry, scaling factor used to prevent overflow in RDSUM.
+*          On exit, RDSCAL is updated w.r.t. the current contributions
+*          in RDSUM.
+*          If TRANS = 'T', RDSCAL is not touched.
+*          NOTE: RDSCAL only makes sense when STGSY2 is called by
+*                STGSYL.
+*
+*  IPIV    (input) INTEGER array, dimension (N).
+*          The pivot indices; for 1 <= i <= N, row i of the
+*          matrix has been interchanged with row IPIV(i).
+*
+*  JPIV    (input) INTEGER array, dimension (N).
+*          The pivot indices; for 1 <= j <= N, column j of the
+*          matrix has been interchanged with column JPIV(j).
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Bo Kagstrom and Peter Poromaa, Department of Computing Science,
+*     Umea University, S-901 87 Umea, Sweden.
+*
+*  This routine is a further developed implementation of algorithm
+*  BSOLVE in [1] using complete pivoting in the LU factorization.
+*
+*  [1] Bo Kagstrom and Lars Westin,
+*      Generalized Schur Methods with Condition Estimators for
+*      Solving the Generalized Sylvester Equation, IEEE Transactions
+*      on Automatic Control, Vol. 34, No. 7, July 1989, pp 745-751.
+*
+*  [2] Peter Poromaa,
+*      On Efficient and Robust Estimators for the Separation
+*      between two Regular Matrix Pairs with Applications in
+*      Condition Estimation. Report IMINF-95.05, Departement of
+*      Computing Science, Umea University, S-901 87 Umea, Sweden, 1995.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            MAXDIM
+      PARAMETER          ( MAXDIM = 8 )
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, INFO, J, K
+      REAL               BM, BP, PMONE, SMINU, SPLUS, TEMP
+*     ..
+*     .. Local Arrays ..
+      INTEGER            IWORK( MAXDIM )
+      REAL               WORK( 4*MAXDIM ), XM( MAXDIM ), XP( MAXDIM )
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SAXPY, SCOPY, SGECON, SGESC2, SLASSQ, SLASWP,
+     $                   SSCAL
+*     ..
+*     .. External Functions ..
+      REAL               SASUM, SDOT
+      EXTERNAL           SASUM, SDOT
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+      IF( IJOB.NE.2 ) THEN
+*
+*        Apply permutations IPIV to RHS
+*
+         CALL SLASWP( 1, RHS, LDZ, 1, N-1, IPIV, 1 )
+*
+*        Solve for L-part choosing RHS either to +1 or -1.
+*
+         PMONE = -ONE
+*
+         DO 10 J = 1, N - 1
+            BP = RHS( J ) + ONE
+            BM = RHS( J ) - ONE
+            SPLUS = ONE
+*
+*           Look-ahead for L-part RHS(1:N-1) = + or -1, SPLUS and
+*           SMIN computed more efficiently than in BSOLVE [1].
+*
+            SPLUS = SPLUS + SDOT( N-J, Z( J+1, J ), 1, Z( J+1, J ), 1 )
+            SMINU = SDOT( N-J, Z( J+1, J ), 1, RHS( J+1 ), 1 )
+            SPLUS = SPLUS*RHS( J )
+            IF( SPLUS.GT.SMINU ) THEN
+               RHS( J ) = BP
+            ELSE IF( SMINU.GT.SPLUS ) THEN
+               RHS( J ) = BM
+            ELSE
+*
+*              In this case the updating sums are equal and we can
+*              choose RHS(J) +1 or -1. The first time this happens
+*              we choose -1, thereafter +1. This is a simple way to
+*              get good estimates of matrices like Byers well-known
+*              example (see [1]). (Not done in BSOLVE.)
+*
+               RHS( J ) = RHS( J ) + PMONE
+               PMONE = ONE
+            END IF
+*
+*           Compute the remaining r.h.s.
+*
+            TEMP = -RHS( J )
+            CALL SAXPY( N-J, TEMP, Z( J+1, J ), 1, RHS( J+1 ), 1 )
+*
+   10    CONTINUE
+*
+*        Solve for U-part, look-ahead for RHS(N) = +-1. This is not done
+*        in BSOLVE and will hopefully give us a better estimate because
+*        any ill-conditioning of the original matrix is transfered to U
+*        and not to L. U(N, N) is an approximation to sigma_min(LU).
+*
+         CALL SCOPY( N-1, RHS, 1, XP, 1 )
+         XP( N ) = RHS( N ) + ONE
+         RHS( N ) = RHS( N ) - ONE
+         SPLUS = ZERO
+         SMINU = ZERO
+         DO 30 I = N, 1, -1
+            TEMP = ONE / Z( I, I )
+            XP( I ) = XP( I )*TEMP
+            RHS( I ) = RHS( I )*TEMP
+            DO 20 K = I + 1, N
+               XP( I ) = XP( I ) - XP( K )*( Z( I, K )*TEMP )
+               RHS( I ) = RHS( I ) - RHS( K )*( Z( I, K )*TEMP )
+   20       CONTINUE
+            SPLUS = SPLUS + ABS( XP( I ) )
+            SMINU = SMINU + ABS( RHS( I ) )
+   30    CONTINUE
+         IF( SPLUS.GT.SMINU )
+     $      CALL SCOPY( N, XP, 1, RHS, 1 )
+*
+*        Apply the permutations JPIV to the computed solution (RHS)
+*
+         CALL SLASWP( 1, RHS, LDZ, 1, N-1, JPIV, -1 )
+*
+*        Compute the sum of squares
+*
+         CALL SLASSQ( N, RHS, 1, RDSCAL, RDSUM )
+*
+      ELSE
+*
+*        IJOB = 2, Compute approximate nullvector XM of Z
+*
+         CALL SGECON( 'I', N, Z, LDZ, ONE, TEMP, WORK, IWORK, INFO )
+         CALL SCOPY( N, WORK( N+1 ), 1, XM, 1 )
+*
+*        Compute RHS
+*
+         CALL SLASWP( 1, XM, LDZ, 1, N-1, IPIV, -1 )
+         TEMP = ONE / SQRT( SDOT( N, XM, 1, XM, 1 ) )
+         CALL SSCAL( N, TEMP, XM, 1 )
+         CALL SCOPY( N, XM, 1, XP, 1 )
+         CALL SAXPY( N, ONE, RHS, 1, XP, 1 )
+         CALL SAXPY( N, -ONE, XM, 1, RHS, 1 )
+         CALL SGESC2( N, Z, LDZ, RHS, IPIV, JPIV, TEMP )
+         CALL SGESC2( N, Z, LDZ, XP, IPIV, JPIV, TEMP )
+         IF( SASUM( N, XP, 1 ).GT.SASUM( N, RHS, 1 ) )
+     $      CALL SCOPY( N, XP, 1, RHS, 1 )
+*
+*        Compute the sum of squares
+*
+         CALL SLASSQ( N, RHS, 1, RDSCAL, RDSUM )
+*
+      END IF
+*
+      RETURN
+*
+*     End of SLATDF
+*
+      END
diff --git a/costa/native/external/lapack/slatps.f b/costa/native/external/lapack/slatps.f
new file mode 100644
index 000000000..a71f99845
--- /dev/null
+++ b/costa/native/external/lapack/slatps.f
@@ -0,0 +1,713 @@
+      SUBROUTINE SLATPS( UPLO, TRANS, DIAG, NORMIN, N, AP, X, SCALE,
+     $                   CNORM, INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIAG, NORMIN, TRANS, UPLO
+      INTEGER            INFO, N
+      REAL               SCALE
+*     ..
+*     .. Array Arguments ..
+      REAL               AP( * ), CNORM( * ), X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLATPS solves one of the triangular systems
+*
+*     A *x = s*b  or  A'*x = s*b
+*
+*  with scaling to prevent overflow, where A is an upper or lower
+*  triangular matrix stored in packed form.  Here A' denotes the
+*  transpose of A, x and b are n-element vectors, and s is a scaling
+*  factor, usually less than or equal to 1, chosen so that the
+*  components of x will be less than the overflow threshold.  If the
+*  unscaled problem will not cause overflow, the Level 2 BLAS routine
+*  STPSV is called. If the matrix A is singular (A(j,j) = 0 for some j),
+*  then s is set to 0 and a non-trivial solution to A*x = 0 is returned.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the matrix A is upper or lower triangular.
+*          = 'U':  Upper triangular
+*          = 'L':  Lower triangular
+*
+*  TRANS   (input) CHARACTER*1
+*          Specifies the operation applied to A.
+*          = 'N':  Solve A * x = s*b  (No transpose)
+*          = 'T':  Solve A'* x = s*b  (Transpose)
+*          = 'C':  Solve A'* x = s*b  (Conjugate transpose = Transpose)
+*
+*  DIAG    (input) CHARACTER*1
+*          Specifies whether or not the matrix A is unit triangular.
+*          = 'N':  Non-unit triangular
+*          = 'U':  Unit triangular
+*
+*  NORMIN  (input) CHARACTER*1
+*          Specifies whether CNORM has been set or not.
+*          = 'Y':  CNORM contains the column norms on entry
+*          = 'N':  CNORM is not set on entry.  On exit, the norms will
+*                  be computed and stored in CNORM.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  AP      (input) REAL array, dimension (N*(N+1)/2)
+*          The upper or lower triangular matrix A, packed columnwise in
+*          a linear array.  The j-th column of A is stored in the array
+*          AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n.
+*
+*  X       (input/output) REAL array, dimension (N)
+*          On entry, the right hand side b of the triangular system.
+*          On exit, X is overwritten by the solution vector x.
+*
+*  SCALE   (output) REAL
+*          The scaling factor s for the triangular system
+*             A * x = s*b  or  A'* x = s*b.
+*          If SCALE = 0, the matrix A is singular or badly scaled, and
+*          the vector x is an exact or approximate solution to A*x = 0.
+*
+*  CNORM   (input or output) REAL array, dimension (N)
+*
+*          If NORMIN = 'Y', CNORM is an input argument and CNORM(j)
+*          contains the norm of the off-diagonal part of the j-th column
+*          of A.  If TRANS = 'N', CNORM(j) must be greater than or equal
+*          to the infinity-norm, and if TRANS = 'T' or 'C', CNORM(j)
+*          must be greater than or equal to the 1-norm.
+*
+*          If NORMIN = 'N', CNORM is an output argument and CNORM(j)
+*          returns the 1-norm of the offdiagonal part of the j-th column
+*          of A.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -k, the k-th argument had an illegal value
+*
+*  Further Details
+*  ======= =======
+*
+*  A rough bound on x is computed; if that is less than overflow, STPSV
+*  is called, otherwise, specific code is used which checks for possible
+*  overflow or divide-by-zero at every operation.
+*
+*  A columnwise scheme is used for solving A*x = b.  The basic algorithm
+*  if A is lower triangular is
+*
+*       x[1:n] := b[1:n]
+*       for j = 1, ..., n
+*            x(j) := x(j) / A(j,j)
+*            x[j+1:n] := x[j+1:n] - x(j) * A[j+1:n,j]
+*       end
+*
+*  Define bounds on the components of x after j iterations of the loop:
+*     M(j) = bound on x[1:j]
+*     G(j) = bound on x[j+1:n]
+*  Initially, let M(0) = 0 and G(0) = max{x(i), i=1,...,n}.
+*
+*  Then for iteration j+1 we have
+*     M(j+1) <= G(j) / | A(j+1,j+1) |
+*     G(j+1) <= G(j) + M(j+1) * | A[j+2:n,j+1] |
+*            <= G(j) ( 1 + CNORM(j+1) / | A(j+1,j+1) | )
+*
+*  where CNORM(j+1) is greater than or equal to the infinity-norm of
+*  column j+1 of A, not counting the diagonal.  Hence
+*
+*     G(j) <= G(0) product ( 1 + CNORM(i) / | A(i,i) | )
+*                  1<=i<=j
+*  and
+*
+*     |x(j)| <= ( G(0) / |A(j,j)| ) product ( 1 + CNORM(i) / |A(i,i)| )
+*                                   1<=i< j
+*
+*  Since |x(j)| <= M(j), we use the Level 2 BLAS routine STPSV if the
+*  reciprocal of the largest M(j), j=1,..,n, is larger than
+*  max(underflow, 1/overflow).
+*
+*  The bound on x(j) is also used to determine when a step in the
+*  columnwise method can be performed without fear of overflow.  If
+*  the computed bound is greater than a large constant, x is scaled to
+*  prevent overflow, but if the bound overflows, x is set to 0, x(j) to
+*  1, and scale to 0, and a non-trivial solution to A*x = 0 is found.
+*
+*  Similarly, a row-wise scheme is used to solve A'*x = b.  The basic
+*  algorithm for A upper triangular is
+*
+*       for j = 1, ..., n
+*            x(j) := ( b(j) - A[1:j-1,j]' * x[1:j-1] ) / A(j,j)
+*       end
+*
+*  We simultaneously compute two bounds
+*       G(j) = bound on ( b(i) - A[1:i-1,i]' * x[1:i-1] ), 1<=i<=j
+*       M(j) = bound on x(i), 1<=i<=j
+*
+*  The initial values are G(0) = 0, M(0) = max{b(i), i=1,..,n}, and we
+*  add the constraint G(j) >= G(j-1) and M(j) >= M(j-1) for j >= 1.
+*  Then the bound on x(j) is
+*
+*       M(j) <= M(j-1) * ( 1 + CNORM(j) ) / | A(j,j) |
+*
+*            <= M(0) * product ( ( 1 + CNORM(i) ) / |A(i,i)| )
+*                      1<=i<=j
+*
+*  and we can safely call STPSV if 1/M(n) and 1/G(n) are both greater
+*  than max(underflow, 1/overflow).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, HALF, ONE
+      PARAMETER          ( ZERO = 0.0E+0, HALF = 0.5E+0, ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOTRAN, NOUNIT, UPPER
+      INTEGER            I, IMAX, IP, J, JFIRST, JINC, JLAST, JLEN
+      REAL               BIGNUM, GROW, REC, SMLNUM, SUMJ, TJJ, TJJS,
+     $                   TMAX, TSCAL, USCAL, XBND, XJ, XMAX
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ISAMAX
+      REAL               SASUM, SDOT, SLAMCH
+      EXTERNAL           LSAME, ISAMAX, SASUM, SDOT, SLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SAXPY, SSCAL, STPSV, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      NOTRAN = LSAME( TRANS, 'N' )
+      NOUNIT = LSAME( DIAG, 'N' )
+*
+*     Test the input parameters.
+*
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT.
+     $         LSAME( TRANS, 'C' ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN
+         INFO = -3
+      ELSE IF( .NOT.LSAME( NORMIN, 'Y' ) .AND. .NOT.
+     $         LSAME( NORMIN, 'N' ) ) THEN
+         INFO = -4
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -5
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SLATPS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Determine machine dependent parameters to control overflow.
+*
+      SMLNUM = SLAMCH( 'Safe minimum' ) / SLAMCH( 'Precision' )
+      BIGNUM = ONE / SMLNUM
+      SCALE = ONE
+*
+      IF( LSAME( NORMIN, 'N' ) ) THEN
+*
+*        Compute the 1-norm of each column, not including the diagonal.
+*
+         IF( UPPER ) THEN
+*
+*           A is upper triangular.
+*
+            IP = 1
+            DO 10 J = 1, N
+               CNORM( J ) = SASUM( J-1, AP( IP ), 1 )
+               IP = IP + J
+   10       CONTINUE
+         ELSE
+*
+*           A is lower triangular.
+*
+            IP = 1
+            DO 20 J = 1, N - 1
+               CNORM( J ) = SASUM( N-J, AP( IP+1 ), 1 )
+               IP = IP + N - J + 1
+   20       CONTINUE
+            CNORM( N ) = ZERO
+         END IF
+      END IF
+*
+*     Scale the column norms by TSCAL if the maximum element in CNORM is
+*     greater than BIGNUM.
+*
+      IMAX = ISAMAX( N, CNORM, 1 )
+      TMAX = CNORM( IMAX )
+      IF( TMAX.LE.BIGNUM ) THEN
+         TSCAL = ONE
+      ELSE
+         TSCAL = ONE / ( SMLNUM*TMAX )
+         CALL SSCAL( N, TSCAL, CNORM, 1 )
+      END IF
+*
+*     Compute a bound on the computed solution vector to see if the
+*     Level 2 BLAS routine STPSV can be used.
+*
+      J = ISAMAX( N, X, 1 )
+      XMAX = ABS( X( J ) )
+      XBND = XMAX
+      IF( NOTRAN ) THEN
+*
+*        Compute the growth in A * x = b.
+*
+         IF( UPPER ) THEN
+            JFIRST = N
+            JLAST = 1
+            JINC = -1
+         ELSE
+            JFIRST = 1
+            JLAST = N
+            JINC = 1
+         END IF
+*
+         IF( TSCAL.NE.ONE ) THEN
+            GROW = ZERO
+            GO TO 50
+         END IF
+*
+         IF( NOUNIT ) THEN
+*
+*           A is non-unit triangular.
+*
+*           Compute GROW = 1/G(j) and XBND = 1/M(j).
+*           Initially, G(0) = max{x(i), i=1,...,n}.
+*
+            GROW = ONE / MAX( XBND, SMLNUM )
+            XBND = GROW
+            IP = JFIRST*( JFIRST+1 ) / 2
+            JLEN = N
+            DO 30 J = JFIRST, JLAST, JINC
+*
+*              Exit the loop if the growth factor is too small.
+*
+               IF( GROW.LE.SMLNUM )
+     $            GO TO 50
+*
+*              M(j) = G(j-1) / abs(A(j,j))
+*
+               TJJ = ABS( AP( IP ) )
+               XBND = MIN( XBND, MIN( ONE, TJJ )*GROW )
+               IF( TJJ+CNORM( J ).GE.SMLNUM ) THEN
+*
+*                 G(j) = G(j-1)*( 1 + CNORM(j) / abs(A(j,j)) )
+*
+                  GROW = GROW*( TJJ / ( TJJ+CNORM( J ) ) )
+               ELSE
+*
+*                 G(j) could overflow, set GROW to 0.
+*
+                  GROW = ZERO
+               END IF
+               IP = IP + JINC*JLEN
+               JLEN = JLEN - 1
+   30       CONTINUE
+            GROW = XBND
+         ELSE
+*
+*           A is unit triangular.
+*
+*           Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}.
+*
+            GROW = MIN( ONE, ONE / MAX( XBND, SMLNUM ) )
+            DO 40 J = JFIRST, JLAST, JINC
+*
+*              Exit the loop if the growth factor is too small.
+*
+               IF( GROW.LE.SMLNUM )
+     $            GO TO 50
+*
+*              G(j) = G(j-1)*( 1 + CNORM(j) )
+*
+               GROW = GROW*( ONE / ( ONE+CNORM( J ) ) )
+   40       CONTINUE
+         END IF
+   50    CONTINUE
+*
+      ELSE
+*
+*        Compute the growth in A' * x = b.
+*
+         IF( UPPER ) THEN
+            JFIRST = 1
+            JLAST = N
+            JINC = 1
+         ELSE
+            JFIRST = N
+            JLAST = 1
+            JINC = -1
+         END IF
+*
+         IF( TSCAL.NE.ONE ) THEN
+            GROW = ZERO
+            GO TO 80
+         END IF
+*
+         IF( NOUNIT ) THEN
+*
+*           A is non-unit triangular.
+*
+*           Compute GROW = 1/G(j) and XBND = 1/M(j).
+*           Initially, M(0) = max{x(i), i=1,...,n}.
+*
+            GROW = ONE / MAX( XBND, SMLNUM )
+            XBND = GROW
+            IP = JFIRST*( JFIRST+1 ) / 2
+            JLEN = 1
+            DO 60 J = JFIRST, JLAST, JINC
+*
+*              Exit the loop if the growth factor is too small.
+*
+               IF( GROW.LE.SMLNUM )
+     $            GO TO 80
+*
+*              G(j) = max( G(j-1), M(j-1)*( 1 + CNORM(j) ) )
+*
+               XJ = ONE + CNORM( J )
+               GROW = MIN( GROW, XBND / XJ )
+*
+*              M(j) = M(j-1)*( 1 + CNORM(j) ) / abs(A(j,j))
+*
+               TJJ = ABS( AP( IP ) )
+               IF( XJ.GT.TJJ )
+     $            XBND = XBND*( TJJ / XJ )
+               JLEN = JLEN + 1
+               IP = IP + JINC*JLEN
+   60       CONTINUE
+            GROW = MIN( GROW, XBND )
+         ELSE
+*
+*           A is unit triangular.
+*
+*           Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}.
+*
+            GROW = MIN( ONE, ONE / MAX( XBND, SMLNUM ) )
+            DO 70 J = JFIRST, JLAST, JINC
+*
+*              Exit the loop if the growth factor is too small.
+*
+               IF( GROW.LE.SMLNUM )
+     $            GO TO 80
+*
+*              G(j) = ( 1 + CNORM(j) )*G(j-1)
+*
+               XJ = ONE + CNORM( J )
+               GROW = GROW / XJ
+   70       CONTINUE
+         END IF
+   80    CONTINUE
+      END IF
+*
+      IF( ( GROW*TSCAL ).GT.SMLNUM ) THEN
+*
+*        Use the Level 2 BLAS solve if the reciprocal of the bound on
+*        elements of X is not too small.
+*
+         CALL STPSV( UPLO, TRANS, DIAG, N, AP, X, 1 )
+      ELSE
+*
+*        Use a Level 1 BLAS solve, scaling intermediate results.
+*
+         IF( XMAX.GT.BIGNUM ) THEN
+*
+*           Scale X so that its components are less than or equal to
+*           BIGNUM in absolute value.
+*
+            SCALE = BIGNUM / XMAX
+            CALL SSCAL( N, SCALE, X, 1 )
+            XMAX = BIGNUM
+         END IF
+*
+         IF( NOTRAN ) THEN
+*
+*           Solve A * x = b
+*
+            IP = JFIRST*( JFIRST+1 ) / 2
+            DO 100 J = JFIRST, JLAST, JINC
+*
+*              Compute x(j) = b(j) / A(j,j), scaling x if necessary.
+*
+               XJ = ABS( X( J ) )
+               IF( NOUNIT ) THEN
+                  TJJS = AP( IP )*TSCAL
+               ELSE
+                  TJJS = TSCAL
+                  IF( TSCAL.EQ.ONE )
+     $               GO TO 95
+               END IF
+                  TJJ = ABS( TJJS )
+                  IF( TJJ.GT.SMLNUM ) THEN
+*
+*                    abs(A(j,j)) > SMLNUM:
+*
+                     IF( TJJ.LT.ONE ) THEN
+                        IF( XJ.GT.TJJ*BIGNUM ) THEN
+*
+*                          Scale x by 1/b(j).
+*
+                           REC = ONE / XJ
+                           CALL SSCAL( N, REC, X, 1 )
+                           SCALE = SCALE*REC
+                           XMAX = XMAX*REC
+                        END IF
+                     END IF
+                     X( J ) = X( J ) / TJJS
+                     XJ = ABS( X( J ) )
+                  ELSE IF( TJJ.GT.ZERO ) THEN
+*
+*                    0 < abs(A(j,j)) <= SMLNUM:
+*
+                     IF( XJ.GT.TJJ*BIGNUM ) THEN
+*
+*                       Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM
+*                       to avoid overflow when dividing by A(j,j).
+*
+                        REC = ( TJJ*BIGNUM ) / XJ
+                        IF( CNORM( J ).GT.ONE ) THEN
+*
+*                          Scale by 1/CNORM(j) to avoid overflow when
+*                          multiplying x(j) times column j.
+*
+                           REC = REC / CNORM( J )
+                        END IF
+                        CALL SSCAL( N, REC, X, 1 )
+                        SCALE = SCALE*REC
+                        XMAX = XMAX*REC
+                     END IF
+                     X( J ) = X( J ) / TJJS
+                     XJ = ABS( X( J ) )
+                  ELSE
+*
+*                    A(j,j) = 0:  Set x(1:n) = 0, x(j) = 1, and
+*                    scale = 0, and compute a solution to A*x = 0.
+*
+                     DO 90 I = 1, N
+                        X( I ) = ZERO
+   90                CONTINUE
+                     X( J ) = ONE
+                     XJ = ONE
+                     SCALE = ZERO
+                     XMAX = ZERO
+                  END IF
+   95          CONTINUE
+*
+*              Scale x if necessary to avoid overflow when adding a
+*              multiple of column j of A.
+*
+               IF( XJ.GT.ONE ) THEN
+                  REC = ONE / XJ
+                  IF( CNORM( J ).GT.( BIGNUM-XMAX )*REC ) THEN
+*
+*                    Scale x by 1/(2*abs(x(j))).
+*
+                     REC = REC*HALF
+                     CALL SSCAL( N, REC, X, 1 )
+                     SCALE = SCALE*REC
+                  END IF
+               ELSE IF( XJ*CNORM( J ).GT.( BIGNUM-XMAX ) ) THEN
+*
+*                 Scale x by 1/2.
+*
+                  CALL SSCAL( N, HALF, X, 1 )
+                  SCALE = SCALE*HALF
+               END IF
+*
+               IF( UPPER ) THEN
+                  IF( J.GT.1 ) THEN
+*
+*                    Compute the update
+*                       x(1:j-1) := x(1:j-1) - x(j) * A(1:j-1,j)
+*
+                     CALL SAXPY( J-1, -X( J )*TSCAL, AP( IP-J+1 ), 1, X,
+     $                           1 )
+                     I = ISAMAX( J-1, X, 1 )
+                     XMAX = ABS( X( I ) )
+                  END IF
+                  IP = IP - J
+               ELSE
+                  IF( J.LT.N ) THEN
+*
+*                    Compute the update
+*                       x(j+1:n) := x(j+1:n) - x(j) * A(j+1:n,j)
+*
+                     CALL SAXPY( N-J, -X( J )*TSCAL, AP( IP+1 ), 1,
+     $                           X( J+1 ), 1 )
+                     I = J + ISAMAX( N-J, X( J+1 ), 1 )
+                     XMAX = ABS( X( I ) )
+                  END IF
+                  IP = IP + N - J + 1
+               END IF
+  100       CONTINUE
+*
+         ELSE
+*
+*           Solve A' * x = b
+*
+            IP = JFIRST*( JFIRST+1 ) / 2
+            JLEN = 1
+            DO 140 J = JFIRST, JLAST, JINC
+*
+*              Compute x(j) = b(j) - sum A(k,j)*x(k).
+*                                    k<>j
+*
+               XJ = ABS( X( J ) )
+               USCAL = TSCAL
+               REC = ONE / MAX( XMAX, ONE )
+               IF( CNORM( J ).GT.( BIGNUM-XJ )*REC ) THEN
+*
+*                 If x(j) could overflow, scale x by 1/(2*XMAX).
+*
+                  REC = REC*HALF
+                  IF( NOUNIT ) THEN
+                     TJJS = AP( IP )*TSCAL
+                  ELSE
+                     TJJS = TSCAL
+                  END IF
+                     TJJ = ABS( TJJS )
+                     IF( TJJ.GT.ONE ) THEN
+*
+*                       Divide by A(j,j) when scaling x if A(j,j) > 1.
+*
+                        REC = MIN( ONE, REC*TJJ )
+                        USCAL = USCAL / TJJS
+                     END IF
+                  IF( REC.LT.ONE ) THEN
+                     CALL SSCAL( N, REC, X, 1 )
+                     SCALE = SCALE*REC
+                     XMAX = XMAX*REC
+                  END IF
+               END IF
+*
+               SUMJ = ZERO
+               IF( USCAL.EQ.ONE ) THEN
+*
+*                 If the scaling needed for A in the dot product is 1,
+*                 call SDOT to perform the dot product.
+*
+                  IF( UPPER ) THEN
+                     SUMJ = SDOT( J-1, AP( IP-J+1 ), 1, X, 1 )
+                  ELSE IF( J.LT.N ) THEN
+                     SUMJ = SDOT( N-J, AP( IP+1 ), 1, X( J+1 ), 1 )
+                  END IF
+               ELSE
+*
+*                 Otherwise, use in-line code for the dot product.
+*
+                  IF( UPPER ) THEN
+                     DO 110 I = 1, J - 1
+                        SUMJ = SUMJ + ( AP( IP-J+I )*USCAL )*X( I )
+  110                CONTINUE
+                  ELSE IF( J.LT.N ) THEN
+                     DO 120 I = 1, N - J
+                        SUMJ = SUMJ + ( AP( IP+I )*USCAL )*X( J+I )
+  120                CONTINUE
+                  END IF
+               END IF
+*
+               IF( USCAL.EQ.TSCAL ) THEN
+*
+*                 Compute x(j) := ( x(j) - sumj ) / A(j,j) if 1/A(j,j)
+*                 was not used to scale the dotproduct.
+*
+                  X( J ) = X( J ) - SUMJ
+                  XJ = ABS( X( J ) )
+                  IF( NOUNIT ) THEN
+*
+*                    Compute x(j) = x(j) / A(j,j), scaling if necessary.
+*
+                     TJJS = AP( IP )*TSCAL
+                  ELSE
+                     TJJS = TSCAL
+                     IF( TSCAL.EQ.ONE )
+     $                  GO TO 135
+                  END IF
+                     TJJ = ABS( TJJS )
+                     IF( TJJ.GT.SMLNUM ) THEN
+*
+*                       abs(A(j,j)) > SMLNUM:
+*
+                        IF( TJJ.LT.ONE ) THEN
+                           IF( XJ.GT.TJJ*BIGNUM ) THEN
+*
+*                             Scale X by 1/abs(x(j)).
+*
+                              REC = ONE / XJ
+                              CALL SSCAL( N, REC, X, 1 )
+                              SCALE = SCALE*REC
+                              XMAX = XMAX*REC
+                           END IF
+                        END IF
+                        X( J ) = X( J ) / TJJS
+                     ELSE IF( TJJ.GT.ZERO ) THEN
+*
+*                       0 < abs(A(j,j)) <= SMLNUM:
+*
+                        IF( XJ.GT.TJJ*BIGNUM ) THEN
+*
+*                          Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM.
+*
+                           REC = ( TJJ*BIGNUM ) / XJ
+                           CALL SSCAL( N, REC, X, 1 )
+                           SCALE = SCALE*REC
+                           XMAX = XMAX*REC
+                        END IF
+                        X( J ) = X( J ) / TJJS
+                     ELSE
+*
+*                       A(j,j) = 0:  Set x(1:n) = 0, x(j) = 1, and
+*                       scale = 0, and compute a solution to A'*x = 0.
+*
+                        DO 130 I = 1, N
+                           X( I ) = ZERO
+  130                   CONTINUE
+                        X( J ) = ONE
+                        SCALE = ZERO
+                        XMAX = ZERO
+                     END IF
+  135             CONTINUE
+               ELSE
+*
+*                 Compute x(j) := x(j) / A(j,j)  - sumj if the dot
+*                 product has already been divided by 1/A(j,j).
+*
+                  X( J ) = X( J ) / TJJS - SUMJ
+               END IF
+               XMAX = MAX( XMAX, ABS( X( J ) ) )
+               JLEN = JLEN + 1
+               IP = IP + JINC*JLEN
+  140       CONTINUE
+         END IF
+         SCALE = SCALE / TSCAL
+      END IF
+*
+*     Scale the column norms by 1/TSCAL for return.
+*
+      IF( TSCAL.NE.ONE ) THEN
+         CALL SSCAL( N, ONE / TSCAL, CNORM, 1 )
+      END IF
+*
+      RETURN
+*
+*     End of SLATPS
+*
+      END
diff --git a/costa/native/external/lapack/slatrd.f b/costa/native/external/lapack/slatrd.f
new file mode 100644
index 000000000..065814879
--- /dev/null
+++ b/costa/native/external/lapack/slatrd.f
@@ -0,0 +1,259 @@
+      SUBROUTINE SLATRD( UPLO, N, NB, A, LDA, E, TAU, W, LDW )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            LDA, LDW, N, NB
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), E( * ), TAU( * ), W( LDW, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLATRD reduces NB rows and columns of a real symmetric matrix A to
+*  symmetric tridiagonal form by an orthogonal similarity
+*  transformation Q' * A * Q, and returns the matrices V and W which are
+*  needed to apply the transformation to the unreduced part of A.
+*
+*  If UPLO = 'U', SLATRD reduces the last NB rows and columns of a
+*  matrix, of which the upper triangle is supplied;
+*  if UPLO = 'L', SLATRD reduces the first NB rows and columns of a
+*  matrix, of which the lower triangle is supplied.
+*
+*  This is an auxiliary routine called by SSYTRD.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER
+*          Specifies whether the upper or lower triangular part of the
+*          symmetric matrix A is stored:
+*          = 'U': Upper triangular
+*          = 'L': Lower triangular
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.
+*
+*  NB      (input) INTEGER
+*          The number of rows and columns to be reduced.
+*
+*  A       (input/output) REAL array, dimension (LDA,N)
+*          On entry, the symmetric matrix A.  If UPLO = 'U', the leading
+*          n-by-n upper triangular part of A contains the upper
+*          triangular part of the matrix A, and the strictly lower
+*          triangular part of A is not referenced.  If UPLO = 'L', the
+*          leading n-by-n lower triangular part of A contains the lower
+*          triangular part of the matrix A, and the strictly upper
+*          triangular part of A is not referenced.
+*          On exit:
+*          if UPLO = 'U', the last NB columns have been reduced to
+*            tridiagonal form, with the diagonal elements overwriting
+*            the diagonal elements of A; the elements above the diagonal
+*            with the array TAU, represent the orthogonal matrix Q as a
+*            product of elementary reflectors;
+*          if UPLO = 'L', the first NB columns have been reduced to
+*            tridiagonal form, with the diagonal elements overwriting
+*            the diagonal elements of A; the elements below the diagonal
+*            with the array TAU, represent the  orthogonal matrix Q as a
+*            product of elementary reflectors.
+*          See Further Details.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= (1,N).
+*
+*  E       (output) REAL array, dimension (N-1)
+*          If UPLO = 'U', E(n-nb:n-1) contains the superdiagonal
+*          elements of the last NB columns of the reduced matrix;
+*          if UPLO = 'L', E(1:nb) contains the subdiagonal elements of
+*          the first NB columns of the reduced matrix.
+*
+*  TAU     (output) REAL array, dimension (N-1)
+*          The scalar factors of the elementary reflectors, stored in
+*          TAU(n-nb:n-1) if UPLO = 'U', and in TAU(1:nb) if UPLO = 'L'.
+*          See Further Details.
+*
+*  W       (output) REAL array, dimension (LDW,NB)
+*          The n-by-nb matrix W required to update the unreduced part
+*          of A.
+*
+*  LDW     (input) INTEGER
+*          The leading dimension of the array W. LDW >= max(1,N).
+*
+*  Further Details
+*  ===============
+*
+*  If UPLO = 'U', the matrix Q is represented as a product of elementary
+*  reflectors
+*
+*     Q = H(n) H(n-1) . . . H(n-nb+1).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - tau * v * v'
+*
+*  where tau is a real scalar, and v is a real vector with
+*  v(i:n) = 0 and v(i-1) = 1; v(1:i-1) is stored on exit in A(1:i-1,i),
+*  and tau in TAU(i-1).
+*
+*  If UPLO = 'L', the matrix Q is represented as a product of elementary
+*  reflectors
+*
+*     Q = H(1) H(2) . . . H(nb).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - tau * v * v'
+*
+*  where tau is a real scalar, and v is a real vector with
+*  v(1:i) = 0 and v(i+1) = 1; v(i+1:n) is stored on exit in A(i+1:n,i),
+*  and tau in TAU(i).
+*
+*  The elements of the vectors v together form the n-by-nb matrix V
+*  which is needed, with W, to apply the transformation to the unreduced
+*  part of the matrix, using a symmetric rank-2k update of the form:
+*  A := A - V*W' - W*V'.
+*
+*  The contents of A on exit are illustrated by the following examples
+*  with n = 5 and nb = 2:
+*
+*  if UPLO = 'U':                       if UPLO = 'L':
+*
+*    (  a   a   a   v4  v5 )              (  d                  )
+*    (      a   a   v4  v5 )              (  1   d              )
+*    (          a   1   v5 )              (  v1  1   a          )
+*    (              d   1  )              (  v1  v2  a   a      )
+*    (                  d  )              (  v1  v2  a   a   a  )
+*
+*  where d denotes a diagonal element of the reduced matrix, a denotes
+*  an element of the original matrix that is unchanged, and vi denotes
+*  an element of the vector defining H(i).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE, HALF
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0, HALF = 0.5E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, IW
+      REAL               ALPHA
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SAXPY, SGEMV, SLARFG, SSCAL, SSYMV
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               SDOT
+      EXTERNAL           LSAME, SDOT
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Quick return if possible
+*
+      IF( N.LE.0 )
+     $   RETURN
+*
+      IF( LSAME( UPLO, 'U' ) ) THEN
+*
+*        Reduce last NB columns of upper triangle
+*
+         DO 10 I = N, N - NB + 1, -1
+            IW = I - N + NB
+            IF( I.LT.N ) THEN
+*
+*              Update A(1:i,i)
+*
+               CALL SGEMV( 'No transpose', I, N-I, -ONE, A( 1, I+1 ),
+     $                     LDA, W( I, IW+1 ), LDW, ONE, A( 1, I ), 1 )
+               CALL SGEMV( 'No transpose', I, N-I, -ONE, W( 1, IW+1 ),
+     $                     LDW, A( I, I+1 ), LDA, ONE, A( 1, I ), 1 )
+            END IF
+            IF( I.GT.1 ) THEN
+*
+*              Generate elementary reflector H(i) to annihilate
+*              A(1:i-2,i)
+*
+               CALL SLARFG( I-1, A( I-1, I ), A( 1, I ), 1, TAU( I-1 ) )
+               E( I-1 ) = A( I-1, I )
+               A( I-1, I ) = ONE
+*
+*              Compute W(1:i-1,i)
+*
+               CALL SSYMV( 'Upper', I-1, ONE, A, LDA, A( 1, I ), 1,
+     $                     ZERO, W( 1, IW ), 1 )
+               IF( I.LT.N ) THEN
+                  CALL SGEMV( 'Transpose', I-1, N-I, ONE, W( 1, IW+1 ),
+     $                        LDW, A( 1, I ), 1, ZERO, W( I+1, IW ), 1 )
+                  CALL SGEMV( 'No transpose', I-1, N-I, -ONE,
+     $                        A( 1, I+1 ), LDA, W( I+1, IW ), 1, ONE,
+     $                        W( 1, IW ), 1 )
+                  CALL SGEMV( 'Transpose', I-1, N-I, ONE, A( 1, I+1 ),
+     $                        LDA, A( 1, I ), 1, ZERO, W( I+1, IW ), 1 )
+                  CALL SGEMV( 'No transpose', I-1, N-I, -ONE,
+     $                        W( 1, IW+1 ), LDW, W( I+1, IW ), 1, ONE,
+     $                        W( 1, IW ), 1 )
+               END IF
+               CALL SSCAL( I-1, TAU( I-1 ), W( 1, IW ), 1 )
+               ALPHA = -HALF*TAU( I-1 )*SDOT( I-1, W( 1, IW ), 1,
+     $                 A( 1, I ), 1 )
+               CALL SAXPY( I-1, ALPHA, A( 1, I ), 1, W( 1, IW ), 1 )
+            END IF
+*
+   10    CONTINUE
+      ELSE
+*
+*        Reduce first NB columns of lower triangle
+*
+         DO 20 I = 1, NB
+*
+*           Update A(i:n,i)
+*
+            CALL SGEMV( 'No transpose', N-I+1, I-1, -ONE, A( I, 1 ),
+     $                  LDA, W( I, 1 ), LDW, ONE, A( I, I ), 1 )
+            CALL SGEMV( 'No transpose', N-I+1, I-1, -ONE, W( I, 1 ),
+     $                  LDW, A( I, 1 ), LDA, ONE, A( I, I ), 1 )
+            IF( I.LT.N ) THEN
+*
+*              Generate elementary reflector H(i) to annihilate
+*              A(i+2:n,i)
+*
+               CALL SLARFG( N-I, A( I+1, I ), A( MIN( I+2, N ), I ), 1,
+     $                      TAU( I ) )
+               E( I ) = A( I+1, I )
+               A( I+1, I ) = ONE
+*
+*              Compute W(i+1:n,i)
+*
+               CALL SSYMV( 'Lower', N-I, ONE, A( I+1, I+1 ), LDA,
+     $                     A( I+1, I ), 1, ZERO, W( I+1, I ), 1 )
+               CALL SGEMV( 'Transpose', N-I, I-1, ONE, W( I+1, 1 ), LDW,
+     $                     A( I+1, I ), 1, ZERO, W( 1, I ), 1 )
+               CALL SGEMV( 'No transpose', N-I, I-1, -ONE, A( I+1, 1 ),
+     $                     LDA, W( 1, I ), 1, ONE, W( I+1, I ), 1 )
+               CALL SGEMV( 'Transpose', N-I, I-1, ONE, A( I+1, 1 ), LDA,
+     $                     A( I+1, I ), 1, ZERO, W( 1, I ), 1 )
+               CALL SGEMV( 'No transpose', N-I, I-1, -ONE, W( I+1, 1 ),
+     $                     LDW, W( 1, I ), 1, ONE, W( I+1, I ), 1 )
+               CALL SSCAL( N-I, TAU( I ), W( I+1, I ), 1 )
+               ALPHA = -HALF*TAU( I )*SDOT( N-I, W( I+1, I ), 1,
+     $                 A( I+1, I ), 1 )
+               CALL SAXPY( N-I, ALPHA, A( I+1, I ), 1, W( I+1, I ), 1 )
+            END IF
+*
+   20    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of SLATRD
+*
+      END
diff --git a/costa/native/external/lapack/slatrs.f b/costa/native/external/lapack/slatrs.f
new file mode 100644
index 000000000..aa4961a0c
--- /dev/null
+++ b/costa/native/external/lapack/slatrs.f
@@ -0,0 +1,702 @@
+      SUBROUTINE SLATRS( UPLO, TRANS, DIAG, NORMIN, N, A, LDA, X, SCALE,
+     $                   CNORM, INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIAG, NORMIN, TRANS, UPLO
+      INTEGER            INFO, LDA, N
+      REAL               SCALE
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), CNORM( * ), X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLATRS solves one of the triangular systems
+*
+*     A *x = s*b  or  A'*x = s*b
+*
+*  with scaling to prevent overflow.  Here A is an upper or lower
+*  triangular matrix, A' denotes the transpose of A, x and b are
+*  n-element vectors, and s is a scaling factor, usually less than
+*  or equal to 1, chosen so that the components of x will be less than
+*  the overflow threshold.  If the unscaled problem will not cause
+*  overflow, the Level 2 BLAS routine STRSV is called.  If the matrix A
+*  is singular (A(j,j) = 0 for some j), then s is set to 0 and a
+*  non-trivial solution to A*x = 0 is returned.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the matrix A is upper or lower triangular.
+*          = 'U':  Upper triangular
+*          = 'L':  Lower triangular
+*
+*  TRANS   (input) CHARACTER*1
+*          Specifies the operation applied to A.
+*          = 'N':  Solve A * x = s*b  (No transpose)
+*          = 'T':  Solve A'* x = s*b  (Transpose)
+*          = 'C':  Solve A'* x = s*b  (Conjugate transpose = Transpose)
+*
+*  DIAG    (input) CHARACTER*1
+*          Specifies whether or not the matrix A is unit triangular.
+*          = 'N':  Non-unit triangular
+*          = 'U':  Unit triangular
+*
+*  NORMIN  (input) CHARACTER*1
+*          Specifies whether CNORM has been set or not.
+*          = 'Y':  CNORM contains the column norms on entry
+*          = 'N':  CNORM is not set on entry.  On exit, the norms will
+*                  be computed and stored in CNORM.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input) REAL array, dimension (LDA,N)
+*          The triangular matrix A.  If UPLO = 'U', the leading n by n
+*          upper triangular part of the array A contains the upper
+*          triangular matrix, and the strictly lower triangular part of
+*          A is not referenced.  If UPLO = 'L', the leading n by n lower
+*          triangular part of the array A contains the lower triangular
+*          matrix, and the strictly upper triangular part of A is not
+*          referenced.  If DIAG = 'U', the diagonal elements of A are
+*          also not referenced and are assumed to be 1.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max (1,N).
+*
+*  X       (input/output) REAL array, dimension (N)
+*          On entry, the right hand side b of the triangular system.
+*          On exit, X is overwritten by the solution vector x.
+*
+*  SCALE   (output) REAL
+*          The scaling factor s for the triangular system
+*             A * x = s*b  or  A'* x = s*b.
+*          If SCALE = 0, the matrix A is singular or badly scaled, and
+*          the vector x is an exact or approximate solution to A*x = 0.
+*
+*  CNORM   (input or output) REAL array, dimension (N)
+*
+*          If NORMIN = 'Y', CNORM is an input argument and CNORM(j)
+*          contains the norm of the off-diagonal part of the j-th column
+*          of A.  If TRANS = 'N', CNORM(j) must be greater than or equal
+*          to the infinity-norm, and if TRANS = 'T' or 'C', CNORM(j)
+*          must be greater than or equal to the 1-norm.
+*
+*          If NORMIN = 'N', CNORM is an output argument and CNORM(j)
+*          returns the 1-norm of the offdiagonal part of the j-th column
+*          of A.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -k, the k-th argument had an illegal value
+*
+*  Further Details
+*  ======= =======
+*
+*  A rough bound on x is computed; if that is less than overflow, STRSV
+*  is called, otherwise, specific code is used which checks for possible
+*  overflow or divide-by-zero at every operation.
+*
+*  A columnwise scheme is used for solving A*x = b.  The basic algorithm
+*  if A is lower triangular is
+*
+*       x[1:n] := b[1:n]
+*       for j = 1, ..., n
+*            x(j) := x(j) / A(j,j)
+*            x[j+1:n] := x[j+1:n] - x(j) * A[j+1:n,j]
+*       end
+*
+*  Define bounds on the components of x after j iterations of the loop:
+*     M(j) = bound on x[1:j]
+*     G(j) = bound on x[j+1:n]
+*  Initially, let M(0) = 0 and G(0) = max{x(i), i=1,...,n}.
+*
+*  Then for iteration j+1 we have
+*     M(j+1) <= G(j) / | A(j+1,j+1) |
+*     G(j+1) <= G(j) + M(j+1) * | A[j+2:n,j+1] |
+*            <= G(j) ( 1 + CNORM(j+1) / | A(j+1,j+1) | )
+*
+*  where CNORM(j+1) is greater than or equal to the infinity-norm of
+*  column j+1 of A, not counting the diagonal.  Hence
+*
+*     G(j) <= G(0) product ( 1 + CNORM(i) / | A(i,i) | )
+*                  1<=i<=j
+*  and
+*
+*     |x(j)| <= ( G(0) / |A(j,j)| ) product ( 1 + CNORM(i) / |A(i,i)| )
+*                                   1<=i< j
+*
+*  Since |x(j)| <= M(j), we use the Level 2 BLAS routine STRSV if the
+*  reciprocal of the largest M(j), j=1,..,n, is larger than
+*  max(underflow, 1/overflow).
+*
+*  The bound on x(j) is also used to determine when a step in the
+*  columnwise method can be performed without fear of overflow.  If
+*  the computed bound is greater than a large constant, x is scaled to
+*  prevent overflow, but if the bound overflows, x is set to 0, x(j) to
+*  1, and scale to 0, and a non-trivial solution to A*x = 0 is found.
+*
+*  Similarly, a row-wise scheme is used to solve A'*x = b.  The basic
+*  algorithm for A upper triangular is
+*
+*       for j = 1, ..., n
+*            x(j) := ( b(j) - A[1:j-1,j]' * x[1:j-1] ) / A(j,j)
+*       end
+*
+*  We simultaneously compute two bounds
+*       G(j) = bound on ( b(i) - A[1:i-1,i]' * x[1:i-1] ), 1<=i<=j
+*       M(j) = bound on x(i), 1<=i<=j
+*
+*  The initial values are G(0) = 0, M(0) = max{b(i), i=1,..,n}, and we
+*  add the constraint G(j) >= G(j-1) and M(j) >= M(j-1) for j >= 1.
+*  Then the bound on x(j) is
+*
+*       M(j) <= M(j-1) * ( 1 + CNORM(j) ) / | A(j,j) |
+*
+*            <= M(0) * product ( ( 1 + CNORM(i) ) / |A(i,i)| )
+*                      1<=i<=j
+*
+*  and we can safely call STRSV if 1/M(n) and 1/G(n) are both greater
+*  than max(underflow, 1/overflow).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, HALF, ONE
+      PARAMETER          ( ZERO = 0.0E+0, HALF = 0.5E+0, ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOTRAN, NOUNIT, UPPER
+      INTEGER            I, IMAX, J, JFIRST, JINC, JLAST
+      REAL               BIGNUM, GROW, REC, SMLNUM, SUMJ, TJJ, TJJS,
+     $                   TMAX, TSCAL, USCAL, XBND, XJ, XMAX
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ISAMAX
+      REAL               SASUM, SDOT, SLAMCH
+      EXTERNAL           LSAME, ISAMAX, SASUM, SDOT, SLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SAXPY, SSCAL, STRSV, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      NOTRAN = LSAME( TRANS, 'N' )
+      NOUNIT = LSAME( DIAG, 'N' )
+*
+*     Test the input parameters.
+*
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT.
+     $         LSAME( TRANS, 'C' ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN
+         INFO = -3
+      ELSE IF( .NOT.LSAME( NORMIN, 'Y' ) .AND. .NOT.
+     $         LSAME( NORMIN, 'N' ) ) THEN
+         INFO = -4
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SLATRS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Determine machine dependent parameters to control overflow.
+*
+      SMLNUM = SLAMCH( 'Safe minimum' ) / SLAMCH( 'Precision' )
+      BIGNUM = ONE / SMLNUM
+      SCALE = ONE
+*
+      IF( LSAME( NORMIN, 'N' ) ) THEN
+*
+*        Compute the 1-norm of each column, not including the diagonal.
+*
+         IF( UPPER ) THEN
+*
+*           A is upper triangular.
+*
+            DO 10 J = 1, N
+               CNORM( J ) = SASUM( J-1, A( 1, J ), 1 )
+   10       CONTINUE
+         ELSE
+*
+*           A is lower triangular.
+*
+            DO 20 J = 1, N - 1
+               CNORM( J ) = SASUM( N-J, A( J+1, J ), 1 )
+   20       CONTINUE
+            CNORM( N ) = ZERO
+         END IF
+      END IF
+*
+*     Scale the column norms by TSCAL if the maximum element in CNORM is
+*     greater than BIGNUM.
+*
+      IMAX = ISAMAX( N, CNORM, 1 )
+      TMAX = CNORM( IMAX )
+      IF( TMAX.LE.BIGNUM ) THEN
+         TSCAL = ONE
+      ELSE
+         TSCAL = ONE / ( SMLNUM*TMAX )
+         CALL SSCAL( N, TSCAL, CNORM, 1 )
+      END IF
+*
+*     Compute a bound on the computed solution vector to see if the
+*     Level 2 BLAS routine STRSV can be used.
+*
+      J = ISAMAX( N, X, 1 )
+      XMAX = ABS( X( J ) )
+      XBND = XMAX
+      IF( NOTRAN ) THEN
+*
+*        Compute the growth in A * x = b.
+*
+         IF( UPPER ) THEN
+            JFIRST = N
+            JLAST = 1
+            JINC = -1
+         ELSE
+            JFIRST = 1
+            JLAST = N
+            JINC = 1
+         END IF
+*
+         IF( TSCAL.NE.ONE ) THEN
+            GROW = ZERO
+            GO TO 50
+         END IF
+*
+         IF( NOUNIT ) THEN
+*
+*           A is non-unit triangular.
+*
+*           Compute GROW = 1/G(j) and XBND = 1/M(j).
+*           Initially, G(0) = max{x(i), i=1,...,n}.
+*
+            GROW = ONE / MAX( XBND, SMLNUM )
+            XBND = GROW
+            DO 30 J = JFIRST, JLAST, JINC
+*
+*              Exit the loop if the growth factor is too small.
+*
+               IF( GROW.LE.SMLNUM )
+     $            GO TO 50
+*
+*              M(j) = G(j-1) / abs(A(j,j))
+*
+               TJJ = ABS( A( J, J ) )
+               XBND = MIN( XBND, MIN( ONE, TJJ )*GROW )
+               IF( TJJ+CNORM( J ).GE.SMLNUM ) THEN
+*
+*                 G(j) = G(j-1)*( 1 + CNORM(j) / abs(A(j,j)) )
+*
+                  GROW = GROW*( TJJ / ( TJJ+CNORM( J ) ) )
+               ELSE
+*
+*                 G(j) could overflow, set GROW to 0.
+*
+                  GROW = ZERO
+               END IF
+   30       CONTINUE
+            GROW = XBND
+         ELSE
+*
+*           A is unit triangular.
+*
+*           Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}.
+*
+            GROW = MIN( ONE, ONE / MAX( XBND, SMLNUM ) )
+            DO 40 J = JFIRST, JLAST, JINC
+*
+*              Exit the loop if the growth factor is too small.
+*
+               IF( GROW.LE.SMLNUM )
+     $            GO TO 50
+*
+*              G(j) = G(j-1)*( 1 + CNORM(j) )
+*
+               GROW = GROW*( ONE / ( ONE+CNORM( J ) ) )
+   40       CONTINUE
+         END IF
+   50    CONTINUE
+*
+      ELSE
+*
+*        Compute the growth in A' * x = b.
+*
+         IF( UPPER ) THEN
+            JFIRST = 1
+            JLAST = N
+            JINC = 1
+         ELSE
+            JFIRST = N
+            JLAST = 1
+            JINC = -1
+         END IF
+*
+         IF( TSCAL.NE.ONE ) THEN
+            GROW = ZERO
+            GO TO 80
+         END IF
+*
+         IF( NOUNIT ) THEN
+*
+*           A is non-unit triangular.
+*
+*           Compute GROW = 1/G(j) and XBND = 1/M(j).
+*           Initially, M(0) = max{x(i), i=1,...,n}.
+*
+            GROW = ONE / MAX( XBND, SMLNUM )
+            XBND = GROW
+            DO 60 J = JFIRST, JLAST, JINC
+*
+*              Exit the loop if the growth factor is too small.
+*
+               IF( GROW.LE.SMLNUM )
+     $            GO TO 80
+*
+*              G(j) = max( G(j-1), M(j-1)*( 1 + CNORM(j) ) )
+*
+               XJ = ONE + CNORM( J )
+               GROW = MIN( GROW, XBND / XJ )
+*
+*              M(j) = M(j-1)*( 1 + CNORM(j) ) / abs(A(j,j))
+*
+               TJJ = ABS( A( J, J ) )
+               IF( XJ.GT.TJJ )
+     $            XBND = XBND*( TJJ / XJ )
+   60       CONTINUE
+            GROW = MIN( GROW, XBND )
+         ELSE
+*
+*           A is unit triangular.
+*
+*           Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}.
+*
+            GROW = MIN( ONE, ONE / MAX( XBND, SMLNUM ) )
+            DO 70 J = JFIRST, JLAST, JINC
+*
+*              Exit the loop if the growth factor is too small.
+*
+               IF( GROW.LE.SMLNUM )
+     $            GO TO 80
+*
+*              G(j) = ( 1 + CNORM(j) )*G(j-1)
+*
+               XJ = ONE + CNORM( J )
+               GROW = GROW / XJ
+   70       CONTINUE
+         END IF
+   80    CONTINUE
+      END IF
+*
+      IF( ( GROW*TSCAL ).GT.SMLNUM ) THEN
+*
+*        Use the Level 2 BLAS solve if the reciprocal of the bound on
+*        elements of X is not too small.
+*
+         CALL STRSV( UPLO, TRANS, DIAG, N, A, LDA, X, 1 )
+      ELSE
+*
+*        Use a Level 1 BLAS solve, scaling intermediate results.
+*
+         IF( XMAX.GT.BIGNUM ) THEN
+*
+*           Scale X so that its components are less than or equal to
+*           BIGNUM in absolute value.
+*
+            SCALE = BIGNUM / XMAX
+            CALL SSCAL( N, SCALE, X, 1 )
+            XMAX = BIGNUM
+         END IF
+*
+         IF( NOTRAN ) THEN
+*
+*           Solve A * x = b
+*
+            DO 100 J = JFIRST, JLAST, JINC
+*
+*              Compute x(j) = b(j) / A(j,j), scaling x if necessary.
+*
+               XJ = ABS( X( J ) )
+               IF( NOUNIT ) THEN
+                  TJJS = A( J, J )*TSCAL
+               ELSE
+                  TJJS = TSCAL
+                  IF( TSCAL.EQ.ONE )
+     $               GO TO 95
+               END IF
+                  TJJ = ABS( TJJS )
+                  IF( TJJ.GT.SMLNUM ) THEN
+*
+*                    abs(A(j,j)) > SMLNUM:
+*
+                     IF( TJJ.LT.ONE ) THEN
+                        IF( XJ.GT.TJJ*BIGNUM ) THEN
+*
+*                          Scale x by 1/b(j).
+*
+                           REC = ONE / XJ
+                           CALL SSCAL( N, REC, X, 1 )
+                           SCALE = SCALE*REC
+                           XMAX = XMAX*REC
+                        END IF
+                     END IF
+                     X( J ) = X( J ) / TJJS
+                     XJ = ABS( X( J ) )
+                  ELSE IF( TJJ.GT.ZERO ) THEN
+*
+*                    0 < abs(A(j,j)) <= SMLNUM:
+*
+                     IF( XJ.GT.TJJ*BIGNUM ) THEN
+*
+*                       Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM
+*                       to avoid overflow when dividing by A(j,j).
+*
+                        REC = ( TJJ*BIGNUM ) / XJ
+                        IF( CNORM( J ).GT.ONE ) THEN
+*
+*                          Scale by 1/CNORM(j) to avoid overflow when
+*                          multiplying x(j) times column j.
+*
+                           REC = REC / CNORM( J )
+                        END IF
+                        CALL SSCAL( N, REC, X, 1 )
+                        SCALE = SCALE*REC
+                        XMAX = XMAX*REC
+                     END IF
+                     X( J ) = X( J ) / TJJS
+                     XJ = ABS( X( J ) )
+                  ELSE
+*
+*                    A(j,j) = 0:  Set x(1:n) = 0, x(j) = 1, and
+*                    scale = 0, and compute a solution to A*x = 0.
+*
+                     DO 90 I = 1, N
+                        X( I ) = ZERO
+   90                CONTINUE
+                     X( J ) = ONE
+                     XJ = ONE
+                     SCALE = ZERO
+                     XMAX = ZERO
+                  END IF
+   95          CONTINUE
+*
+*              Scale x if necessary to avoid overflow when adding a
+*              multiple of column j of A.
+*
+               IF( XJ.GT.ONE ) THEN
+                  REC = ONE / XJ
+                  IF( CNORM( J ).GT.( BIGNUM-XMAX )*REC ) THEN
+*
+*                    Scale x by 1/(2*abs(x(j))).
+*
+                     REC = REC*HALF
+                     CALL SSCAL( N, REC, X, 1 )
+                     SCALE = SCALE*REC
+                  END IF
+               ELSE IF( XJ*CNORM( J ).GT.( BIGNUM-XMAX ) ) THEN
+*
+*                 Scale x by 1/2.
+*
+                  CALL SSCAL( N, HALF, X, 1 )
+                  SCALE = SCALE*HALF
+               END IF
+*
+               IF( UPPER ) THEN
+                  IF( J.GT.1 ) THEN
+*
+*                    Compute the update
+*                       x(1:j-1) := x(1:j-1) - x(j) * A(1:j-1,j)
+*
+                     CALL SAXPY( J-1, -X( J )*TSCAL, A( 1, J ), 1, X,
+     $                           1 )
+                     I = ISAMAX( J-1, X, 1 )
+                     XMAX = ABS( X( I ) )
+                  END IF
+               ELSE
+                  IF( J.LT.N ) THEN
+*
+*                    Compute the update
+*                       x(j+1:n) := x(j+1:n) - x(j) * A(j+1:n,j)
+*
+                     CALL SAXPY( N-J, -X( J )*TSCAL, A( J+1, J ), 1,
+     $                           X( J+1 ), 1 )
+                     I = J + ISAMAX( N-J, X( J+1 ), 1 )
+                     XMAX = ABS( X( I ) )
+                  END IF
+               END IF
+  100       CONTINUE
+*
+         ELSE
+*
+*           Solve A' * x = b
+*
+            DO 140 J = JFIRST, JLAST, JINC
+*
+*              Compute x(j) = b(j) - sum A(k,j)*x(k).
+*                                    k<>j
+*
+               XJ = ABS( X( J ) )
+               USCAL = TSCAL
+               REC = ONE / MAX( XMAX, ONE )
+               IF( CNORM( J ).GT.( BIGNUM-XJ )*REC ) THEN
+*
+*                 If x(j) could overflow, scale x by 1/(2*XMAX).
+*
+                  REC = REC*HALF
+                  IF( NOUNIT ) THEN
+                     TJJS = A( J, J )*TSCAL
+                  ELSE
+                     TJJS = TSCAL
+                  END IF
+                     TJJ = ABS( TJJS )
+                     IF( TJJ.GT.ONE ) THEN
+*
+*                       Divide by A(j,j) when scaling x if A(j,j) > 1.
+*
+                        REC = MIN( ONE, REC*TJJ )
+                        USCAL = USCAL / TJJS
+                     END IF
+                  IF( REC.LT.ONE ) THEN
+                     CALL SSCAL( N, REC, X, 1 )
+                     SCALE = SCALE*REC
+                     XMAX = XMAX*REC
+                  END IF
+               END IF
+*
+               SUMJ = ZERO
+               IF( USCAL.EQ.ONE ) THEN
+*
+*                 If the scaling needed for A in the dot product is 1,
+*                 call SDOT to perform the dot product.
+*
+                  IF( UPPER ) THEN
+                     SUMJ = SDOT( J-1, A( 1, J ), 1, X, 1 )
+                  ELSE IF( J.LT.N ) THEN
+                     SUMJ = SDOT( N-J, A( J+1, J ), 1, X( J+1 ), 1 )
+                  END IF
+               ELSE
+*
+*                 Otherwise, use in-line code for the dot product.
+*
+                  IF( UPPER ) THEN
+                     DO 110 I = 1, J - 1
+                        SUMJ = SUMJ + ( A( I, J )*USCAL )*X( I )
+  110                CONTINUE
+                  ELSE IF( J.LT.N ) THEN
+                     DO 120 I = J + 1, N
+                        SUMJ = SUMJ + ( A( I, J )*USCAL )*X( I )
+  120                CONTINUE
+                  END IF
+               END IF
+*
+               IF( USCAL.EQ.TSCAL ) THEN
+*
+*                 Compute x(j) := ( x(j) - sumj ) / A(j,j) if 1/A(j,j)
+*                 was not used to scale the dotproduct.
+*
+                  X( J ) = X( J ) - SUMJ
+                  XJ = ABS( X( J ) )
+                  IF( NOUNIT ) THEN
+                     TJJS = A( J, J )*TSCAL
+                  ELSE
+                     TJJS = TSCAL
+                     IF( TSCAL.EQ.ONE )
+     $                  GO TO 135
+                  END IF
+*
+*                    Compute x(j) = x(j) / A(j,j), scaling if necessary.
+*
+                     TJJ = ABS( TJJS )
+                     IF( TJJ.GT.SMLNUM ) THEN
+*
+*                       abs(A(j,j)) > SMLNUM:
+*
+                        IF( TJJ.LT.ONE ) THEN
+                           IF( XJ.GT.TJJ*BIGNUM ) THEN
+*
+*                             Scale X by 1/abs(x(j)).
+*
+                              REC = ONE / XJ
+                              CALL SSCAL( N, REC, X, 1 )
+                              SCALE = SCALE*REC
+                              XMAX = XMAX*REC
+                           END IF
+                        END IF
+                        X( J ) = X( J ) / TJJS
+                     ELSE IF( TJJ.GT.ZERO ) THEN
+*
+*                       0 < abs(A(j,j)) <= SMLNUM:
+*
+                        IF( XJ.GT.TJJ*BIGNUM ) THEN
+*
+*                          Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM.
+*
+                           REC = ( TJJ*BIGNUM ) / XJ
+                           CALL SSCAL( N, REC, X, 1 )
+                           SCALE = SCALE*REC
+                           XMAX = XMAX*REC
+                        END IF
+                        X( J ) = X( J ) / TJJS
+                     ELSE
+*
+*                       A(j,j) = 0:  Set x(1:n) = 0, x(j) = 1, and
+*                       scale = 0, and compute a solution to A'*x = 0.
+*
+                        DO 130 I = 1, N
+                           X( I ) = ZERO
+  130                   CONTINUE
+                        X( J ) = ONE
+                        SCALE = ZERO
+                        XMAX = ZERO
+                     END IF
+  135             CONTINUE
+               ELSE
+*
+*                 Compute x(j) := x(j) / A(j,j)  - sumj if the dot
+*                 product has already been divided by 1/A(j,j).
+*
+                  X( J ) = X( J ) / TJJS - SUMJ
+               END IF
+               XMAX = MAX( XMAX, ABS( X( J ) ) )
+  140       CONTINUE
+         END IF
+         SCALE = SCALE / TSCAL
+      END IF
+*
+*     Scale the column norms by 1/TSCAL for return.
+*
+      IF( TSCAL.NE.ONE ) THEN
+         CALL SSCAL( N, ONE / TSCAL, CNORM, 1 )
+      END IF
+*
+      RETURN
+*
+*     End of SLATRS
+*
+      END
diff --git a/costa/native/external/lapack/slatrz.f b/costa/native/external/lapack/slatrz.f
new file mode 100644
index 000000000..70cb31e93
--- /dev/null
+++ b/costa/native/external/lapack/slatrz.f
@@ -0,0 +1,128 @@
+      SUBROUTINE SLATRZ( M, N, L, A, LDA, TAU, WORK )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            L, LDA, M, N
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLATRZ factors the M-by-(M+L) real upper trapezoidal matrix
+*  [ A1 A2 ] = [ A(1:M,1:M) A(1:M,N-L+1:N) ] as ( R  0 ) * Z, by means
+*  of orthogonal transformations.  Z is an (M+L)-by-(M+L) orthogonal
+*  matrix and, R and A1 are M-by-M upper triangular matrices.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  L       (input) INTEGER
+*          The number of columns of the matrix A containing the
+*          meaningful part of the Householder vectors. N-M >= L >= 0.
+*
+*  A       (input/output) REAL array, dimension (LDA,N)
+*          On entry, the leading M-by-N upper trapezoidal part of the
+*          array A must contain the matrix to be factorized.
+*          On exit, the leading M-by-M upper triangular part of A
+*          contains the upper triangular matrix R, and elements N-L+1 to
+*          N of the first M rows of A, with the array TAU, represent the
+*          orthogonal matrix Z as a product of M elementary reflectors.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  TAU     (output) REAL array, dimension (M)
+*          The scalar factors of the elementary reflectors.
+*
+*  WORK    (workspace) REAL array, dimension (M)
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*    A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA
+*
+*  The factorization is obtained by Householder's method.  The kth
+*  transformation matrix, Z( k ), which is used to introduce zeros into
+*  the ( m - k + 1 )th row of A, is given in the form
+*
+*     Z( k ) = ( I     0   ),
+*              ( 0  T( k ) )
+*
+*  where
+*
+*     T( k ) = I - tau*u( k )*u( k )',   u( k ) = (   1    ),
+*                                                 (   0    )
+*                                                 ( z( k ) )
+*
+*  tau is a scalar and z( k ) is an l element vector. tau and z( k )
+*  are chosen to annihilate the elements of the kth row of A2.
+*
+*  The scalar tau is returned in the kth element of TAU and the vector
+*  u( k ) in the kth row of A2, such that the elements of z( k ) are
+*  in  a( k, l + 1 ), ..., a( k, n ). The elements of R are returned in
+*  the upper triangular part of A1.
+*
+*  Z is given by
+*
+*     Z =  Z( 1 ) * Z( 2 ) * ... * Z( m ).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO
+      PARAMETER          ( ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLARFG, SLARZ
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 ) THEN
+         RETURN
+      ELSE IF( M.EQ.N ) THEN
+         DO 10 I = 1, N
+            TAU( I ) = ZERO
+   10    CONTINUE
+         RETURN
+      END IF
+*
+      DO 20 I = M, 1, -1
+*
+*        Generate elementary reflector H(i) to annihilate
+*        [ A(i,i) A(i,n-l+1:n) ]
+*
+         CALL SLARFG( L+1, A( I, I ), A( I, N-L+1 ), LDA, TAU( I ) )
+*
+*        Apply H(i) to A(1:i-1,i:n) from the right
+*
+         CALL SLARZ( 'Right', I-1, N-I+1, L, A( I, N-L+1 ), LDA,
+     $               TAU( I ), A( 1, I ), LDA, WORK )
+*
+   20 CONTINUE
+*
+      RETURN
+*
+*     End of SLATRZ
+*
+      END
diff --git a/costa/native/external/lapack/slatzm.f b/costa/native/external/lapack/slatzm.f
new file mode 100644
index 000000000..6f6886bf2
--- /dev/null
+++ b/costa/native/external/lapack/slatzm.f
@@ -0,0 +1,143 @@
+      SUBROUTINE SLATZM( SIDE, M, N, V, INCV, TAU, C1, C2, LDC, WORK )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          SIDE
+      INTEGER            INCV, LDC, M, N
+      REAL               TAU
+*     ..
+*     .. Array Arguments ..
+      REAL               C1( LDC, * ), C2( LDC, * ), V( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  This routine is deprecated and has been replaced by routine SORMRZ.
+*
+*  SLATZM applies a Householder matrix generated by STZRQF to a matrix.
+*
+*  Let P = I - tau*u*u',   u = ( 1 ),
+*                              ( v )
+*  where v is an (m-1) vector if SIDE = 'L', or a (n-1) vector if
+*  SIDE = 'R'.
+*
+*  If SIDE equals 'L', let
+*         C = [ C1 ] 1
+*             [ C2 ] m-1
+*               n
+*  Then C is overwritten by P*C.
+*
+*  If SIDE equals 'R', let
+*         C = [ C1, C2 ] m
+*                1  n-1
+*  Then C is overwritten by C*P.
+*
+*  Arguments
+*  =========
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'L': form P * C
+*          = 'R': form C * P
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix C.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix C.
+*
+*  V       (input) REAL array, dimension
+*                  (1 + (M-1)*abs(INCV)) if SIDE = 'L'
+*                  (1 + (N-1)*abs(INCV)) if SIDE = 'R'
+*          The vector v in the representation of P. V is not used
+*          if TAU = 0.
+*
+*  INCV    (input) INTEGER
+*          The increment between elements of v. INCV <> 0
+*
+*  TAU     (input) REAL
+*          The value tau in the representation of P.
+*
+*  C1      (input/output) REAL array, dimension
+*                         (LDC,N) if SIDE = 'L'
+*                         (M,1)   if SIDE = 'R'
+*          On entry, the n-vector C1 if SIDE = 'L', or the m-vector C1
+*          if SIDE = 'R'.
+*
+*          On exit, the first row of P*C if SIDE = 'L', or the first
+*          column of C*P if SIDE = 'R'.
+*
+*  C2      (input/output) REAL array, dimension
+*                         (LDC, N)   if SIDE = 'L'
+*                         (LDC, N-1) if SIDE = 'R'
+*          On entry, the (m - 1) x n matrix C2 if SIDE = 'L', or the
+*          m x (n - 1) matrix C2 if SIDE = 'R'.
+*
+*          On exit, rows 2:m of P*C if SIDE = 'L', or columns 2:m of C*P
+*          if SIDE = 'R'.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the arrays C1 and C2. LDC >= (1,M).
+*
+*  WORK    (workspace) REAL array, dimension
+*                      (N) if SIDE = 'L'
+*                      (M) if SIDE = 'R'
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SAXPY, SCOPY, SGEMV, SGER
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MIN
+*     ..
+*     .. Executable Statements ..
+*
+      IF( ( MIN( M, N ).EQ.0 ) .OR. ( TAU.EQ.ZERO ) )
+     $   RETURN
+*
+      IF( LSAME( SIDE, 'L' ) ) THEN
+*
+*        w := C1 + v' * C2
+*
+         CALL SCOPY( N, C1, LDC, WORK, 1 )
+         CALL SGEMV( 'Transpose', M-1, N, ONE, C2, LDC, V, INCV, ONE,
+     $               WORK, 1 )
+*
+*        [ C1 ] := [ C1 ] - tau* [ 1 ] * w'
+*        [ C2 ]    [ C2 ]        [ v ]
+*
+         CALL SAXPY( N, -TAU, WORK, 1, C1, LDC )
+         CALL SGER( M-1, N, -TAU, V, INCV, WORK, 1, C2, LDC )
+*
+      ELSE IF( LSAME( SIDE, 'R' ) ) THEN
+*
+*        w := C1 + C2 * v
+*
+         CALL SCOPY( M, C1, 1, WORK, 1 )
+         CALL SGEMV( 'No transpose', M, N-1, ONE, C2, LDC, V, INCV, ONE,
+     $               WORK, 1 )
+*
+*        [ C1, C2 ] := [ C1, C2 ] - tau* w * [ 1 , v']
+*
+         CALL SAXPY( M, -TAU, WORK, 1, C1, 1 )
+         CALL SGER( M, N-1, -TAU, WORK, 1, V, INCV, C2, LDC )
+      END IF
+*
+      RETURN
+*
+*     End of SLATZM
+*
+      END
diff --git a/costa/native/external/lapack/slauu2.f b/costa/native/external/lapack/slauu2.f
new file mode 100644
index 000000000..b666f50f1
--- /dev/null
+++ b/costa/native/external/lapack/slauu2.f
@@ -0,0 +1,136 @@
+      SUBROUTINE SLAUU2( UPLO, N, A, LDA, INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, N
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLAUU2 computes the product U * U' or L' * L, where the triangular
+*  factor U or L is stored in the upper or lower triangular part of
+*  the array A.
+*
+*  If UPLO = 'U' or 'u' then the upper triangle of the result is stored,
+*  overwriting the factor U in A.
+*  If UPLO = 'L' or 'l' then the lower triangle of the result is stored,
+*  overwriting the factor L in A.
+*
+*  This is the unblocked form of the algorithm, calling Level 2 BLAS.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the triangular factor stored in the array A
+*          is upper or lower triangular:
+*          = 'U':  Upper triangular
+*          = 'L':  Lower triangular
+*
+*  N       (input) INTEGER
+*          The order of the triangular factor U or L.  N >= 0.
+*
+*  A       (input/output) REAL array, dimension (LDA,N)
+*          On entry, the triangular factor U or L.
+*          On exit, if UPLO = 'U', the upper triangle of A is
+*          overwritten with the upper triangle of the product U * U';
+*          if UPLO = 'L', the lower triangle of A is overwritten with
+*          the lower triangle of the product L' * L.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -k, the k-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE
+      PARAMETER          ( ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            I
+      REAL               AII
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               SDOT
+      EXTERNAL           LSAME, SDOT
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SGEMV, SSCAL, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SLAUU2', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      IF( UPPER ) THEN
+*
+*        Compute the product U * U'.
+*
+         DO 10 I = 1, N
+            AII = A( I, I )
+            IF( I.LT.N ) THEN
+               A( I, I ) = SDOT( N-I+1, A( I, I ), LDA, A( I, I ), LDA )
+               CALL SGEMV( 'No transpose', I-1, N-I, ONE, A( 1, I+1 ),
+     $                     LDA, A( I, I+1 ), LDA, AII, A( 1, I ), 1 )
+            ELSE
+               CALL SSCAL( I, AII, A( 1, I ), 1 )
+            END IF
+   10    CONTINUE
+*
+      ELSE
+*
+*        Compute the product L' * L.
+*
+         DO 20 I = 1, N
+            AII = A( I, I )
+            IF( I.LT.N ) THEN
+               A( I, I ) = SDOT( N-I+1, A( I, I ), 1, A( I, I ), 1 )
+               CALL SGEMV( 'Transpose', N-I, I-1, ONE, A( I+1, 1 ), LDA,
+     $                     A( I+1, I ), 1, AII, A( I, 1 ), LDA )
+            ELSE
+               CALL SSCAL( I, AII, A( I, 1 ), LDA )
+            END IF
+   20    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of SLAUU2
+*
+      END
diff --git a/costa/native/external/lapack/slauum.f b/costa/native/external/lapack/slauum.f
new file mode 100644
index 000000000..646f34e0c
--- /dev/null
+++ b/costa/native/external/lapack/slauum.f
@@ -0,0 +1,156 @@
+      SUBROUTINE SLAUUM( UPLO, N, A, LDA, INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, N
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SLAUUM computes the product U * U' or L' * L, where the triangular
+*  factor U or L is stored in the upper or lower triangular part of
+*  the array A.
+*
+*  If UPLO = 'U' or 'u' then the upper triangle of the result is stored,
+*  overwriting the factor U in A.
+*  If UPLO = 'L' or 'l' then the lower triangle of the result is stored,
+*  overwriting the factor L in A.
+*
+*  This is the blocked form of the algorithm, calling Level 3 BLAS.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the triangular factor stored in the array A
+*          is upper or lower triangular:
+*          = 'U':  Upper triangular
+*          = 'L':  Lower triangular
+*
+*  N       (input) INTEGER
+*          The order of the triangular factor U or L.  N >= 0.
+*
+*  A       (input/output) REAL array, dimension (LDA,N)
+*          On entry, the triangular factor U or L.
+*          On exit, if UPLO = 'U', the upper triangle of A is
+*          overwritten with the upper triangle of the product U * U';
+*          if UPLO = 'L', the lower triangle of A is overwritten with
+*          the lower triangle of the product L' * L.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -k, the k-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE
+      PARAMETER          ( ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            I, IB, NB
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           LSAME, ILAENV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SGEMM, SLAUU2, SSYRK, STRMM, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SLAUUM', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Determine the block size for this environment.
+*
+      NB = ILAENV( 1, 'SLAUUM', UPLO, N, -1, -1, -1 )
+*
+      IF( NB.LE.1 .OR. NB.GE.N ) THEN
+*
+*        Use unblocked code
+*
+         CALL SLAUU2( UPLO, N, A, LDA, INFO )
+      ELSE
+*
+*        Use blocked code
+*
+         IF( UPPER ) THEN
+*
+*           Compute the product U * U'.
+*
+            DO 10 I = 1, N, NB
+               IB = MIN( NB, N-I+1 )
+               CALL STRMM( 'Right', 'Upper', 'Transpose', 'Non-unit',
+     $                     I-1, IB, ONE, A( I, I ), LDA, A( 1, I ),
+     $                     LDA )
+               CALL SLAUU2( 'Upper', IB, A( I, I ), LDA, INFO )
+               IF( I+IB.LE.N ) THEN
+                  CALL SGEMM( 'No transpose', 'Transpose', I-1, IB,
+     $                        N-I-IB+1, ONE, A( 1, I+IB ), LDA,
+     $                        A( I, I+IB ), LDA, ONE, A( 1, I ), LDA )
+                  CALL SSYRK( 'Upper', 'No transpose', IB, N-I-IB+1,
+     $                        ONE, A( I, I+IB ), LDA, ONE, A( I, I ),
+     $                        LDA )
+               END IF
+   10       CONTINUE
+         ELSE
+*
+*           Compute the product L' * L.
+*
+            DO 20 I = 1, N, NB
+               IB = MIN( NB, N-I+1 )
+               CALL STRMM( 'Left', 'Lower', 'Transpose', 'Non-unit', IB,
+     $                     I-1, ONE, A( I, I ), LDA, A( I, 1 ), LDA )
+               CALL SLAUU2( 'Lower', IB, A( I, I ), LDA, INFO )
+               IF( I+IB.LE.N ) THEN
+                  CALL SGEMM( 'Transpose', 'No transpose', IB, I-1,
+     $                        N-I-IB+1, ONE, A( I+IB, I ), LDA,
+     $                        A( I+IB, 1 ), LDA, ONE, A( I, 1 ), LDA )
+                  CALL SSYRK( 'Lower', 'Transpose', IB, N-I-IB+1, ONE,
+     $                        A( I+IB, I ), LDA, ONE, A( I, I ), LDA )
+               END IF
+   20       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of SLAUUM
+*
+      END
diff --git a/costa/native/external/lapack/sopgtr.f b/costa/native/external/lapack/sopgtr.f
new file mode 100644
index 000000000..4b98a5a9b
--- /dev/null
+++ b/costa/native/external/lapack/sopgtr.f
@@ -0,0 +1,161 @@
+      SUBROUTINE SOPGTR( UPLO, N, AP, TAU, Q, LDQ, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDQ, N
+*     ..
+*     .. Array Arguments ..
+      REAL               AP( * ), Q( LDQ, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SOPGTR generates a real orthogonal matrix Q which is defined as the
+*  product of n-1 elementary reflectors H(i) of order n, as returned by
+*  SSPTRD using packed storage:
+*
+*  if UPLO = 'U', Q = H(n-1) . . . H(2) H(1),
+*
+*  if UPLO = 'L', Q = H(1) H(2) . . . H(n-1).
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U': Upper triangular packed storage used in previous
+*                 call to SSPTRD;
+*          = 'L': Lower triangular packed storage used in previous
+*                 call to SSPTRD.
+*
+*  N       (input) INTEGER
+*          The order of the matrix Q. N >= 0.
+*
+*  AP      (input) REAL array, dimension (N*(N+1)/2)
+*          The vectors which define the elementary reflectors, as
+*          returned by SSPTRD.
+*
+*  TAU     (input) REAL array, dimension (N-1)
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by SSPTRD.
+*
+*  Q       (output) REAL array, dimension (LDQ,N)
+*          The N-by-N orthogonal matrix Q.
+*
+*  LDQ     (input) INTEGER
+*          The leading dimension of the array Q. LDQ >= max(1,N).
+*
+*  WORK    (workspace) REAL array, dimension (N-1)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            I, IINFO, IJ, J
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SORG2L, SORG2R, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN
+         INFO = -6
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SOPGTR', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      IF( UPPER ) THEN
+*
+*        Q was determined by a call to SSPTRD with UPLO = 'U'
+*
+*        Unpack the vectors which define the elementary reflectors and
+*        set the last row and column of Q equal to those of the unit
+*        matrix
+*
+         IJ = 2
+         DO 20 J = 1, N - 1
+            DO 10 I = 1, J - 1
+               Q( I, J ) = AP( IJ )
+               IJ = IJ + 1
+   10       CONTINUE
+            IJ = IJ + 2
+            Q( N, J ) = ZERO
+   20    CONTINUE
+         DO 30 I = 1, N - 1
+            Q( I, N ) = ZERO
+   30    CONTINUE
+         Q( N, N ) = ONE
+*
+*        Generate Q(1:n-1,1:n-1)
+*
+         CALL SORG2L( N-1, N-1, N-1, Q, LDQ, TAU, WORK, IINFO )
+*
+      ELSE
+*
+*        Q was determined by a call to SSPTRD with UPLO = 'L'.
+*
+*        Unpack the vectors which define the elementary reflectors and
+*        set the first row and column of Q equal to those of the unit
+*        matrix
+*
+         Q( 1, 1 ) = ONE
+         DO 40 I = 2, N
+            Q( I, 1 ) = ZERO
+   40    CONTINUE
+         IJ = 3
+         DO 60 J = 2, N
+            Q( 1, J ) = ZERO
+            DO 50 I = J + 1, N
+               Q( I, J ) = AP( IJ )
+               IJ = IJ + 1
+   50       CONTINUE
+            IJ = IJ + 2
+   60    CONTINUE
+         IF( N.GT.1 ) THEN
+*
+*           Generate Q(2:n,2:n)
+*
+            CALL SORG2R( N-1, N-1, N-1, Q( 2, 2 ), LDQ, TAU, WORK,
+     $                   IINFO )
+         END IF
+      END IF
+      RETURN
+*
+*     End of SOPGTR
+*
+      END
diff --git a/costa/native/external/lapack/sopmtr.f b/costa/native/external/lapack/sopmtr.f
new file mode 100644
index 000000000..a089d8f78
--- /dev/null
+++ b/costa/native/external/lapack/sopmtr.f
@@ -0,0 +1,258 @@
+      SUBROUTINE SOPMTR( SIDE, UPLO, TRANS, M, N, AP, TAU, C, LDC, WORK,
+     $                   INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          SIDE, TRANS, UPLO
+      INTEGER            INFO, LDC, M, N
+*     ..
+*     .. Array Arguments ..
+      REAL               AP( * ), C( LDC, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SOPMTR overwrites the general real M-by-N matrix C with
+*
+*                  SIDE = 'L'     SIDE = 'R'
+*  TRANS = 'N':      Q * C          C * Q
+*  TRANS = 'T':      Q**T * C       C * Q**T
+*
+*  where Q is a real orthogonal matrix of order nq, with nq = m if
+*  SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of
+*  nq-1 elementary reflectors, as returned by SSPTRD using packed
+*  storage:
+*
+*  if UPLO = 'U', Q = H(nq-1) . . . H(2) H(1);
+*
+*  if UPLO = 'L', Q = H(1) H(2) . . . H(nq-1).
+*
+*  Arguments
+*  =========
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'L': apply Q or Q**T from the Left;
+*          = 'R': apply Q or Q**T from the Right.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U': Upper triangular packed storage used in previous
+*                 call to SSPTRD;
+*          = 'L': Lower triangular packed storage used in previous
+*                 call to SSPTRD.
+*
+*  TRANS   (input) CHARACTER*1
+*          = 'N':  No transpose, apply Q;
+*          = 'T':  Transpose, apply Q**T.
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix C. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix C. N >= 0.
+*
+*  AP      (input) REAL array, dimension
+*                               (M*(M+1)/2) if SIDE = 'L'
+*                               (N*(N+1)/2) if SIDE = 'R'
+*          The vectors which define the elementary reflectors, as
+*          returned by SSPTRD.  AP is modified by the routine but
+*          restored on exit.
+*
+*  TAU     (input) REAL array, dimension (M-1) if SIDE = 'L'
+*                                     or (N-1) if SIDE = 'R'
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by SSPTRD.
+*
+*  C       (input/output) REAL array, dimension (LDC,N)
+*          On entry, the M-by-N matrix C.
+*          On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C. LDC >= max(1,M).
+*
+*  WORK    (workspace) REAL array, dimension
+*                                   (N) if SIDE = 'L'
+*                                   (M) if SIDE = 'R'
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE
+      PARAMETER          ( ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            FORWRD, LEFT, NOTRAN, UPPER
+      INTEGER            I, I1, I2, I3, IC, II, JC, MI, NI, NQ
+      REAL               AII
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLARF, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      LEFT = LSAME( SIDE, 'L' )
+      NOTRAN = LSAME( TRANS, 'N' )
+      UPPER = LSAME( UPLO, 'U' )
+*
+*     NQ is the order of Q
+*
+      IF( LEFT ) THEN
+         NQ = M
+      ELSE
+         NQ = N
+      END IF
+      IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN
+         INFO = -3
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( LDC.LT.MAX( 1, M ) ) THEN
+         INFO = -9
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SOPMTR', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 )
+     $   RETURN
+*
+      IF( UPPER ) THEN
+*
+*        Q was determined by a call to SSPTRD with UPLO = 'U'
+*
+         FORWRD = ( LEFT .AND. NOTRAN ) .OR.
+     $            ( .NOT.LEFT .AND. .NOT.NOTRAN )
+*
+         IF( FORWRD ) THEN
+            I1 = 1
+            I2 = NQ - 1
+            I3 = 1
+            II = 2
+         ELSE
+            I1 = NQ - 1
+            I2 = 1
+            I3 = -1
+            II = NQ*( NQ+1 ) / 2 - 1
+         END IF
+*
+         IF( LEFT ) THEN
+            NI = N
+         ELSE
+            MI = M
+         END IF
+*
+         DO 10 I = I1, I2, I3
+            IF( LEFT ) THEN
+*
+*              H(i) is applied to C(1:i,1:n)
+*
+               MI = I
+            ELSE
+*
+*              H(i) is applied to C(1:m,1:i)
+*
+               NI = I
+            END IF
+*
+*           Apply H(i)
+*
+            AII = AP( II )
+            AP( II ) = ONE
+            CALL SLARF( SIDE, MI, NI, AP( II-I+1 ), 1, TAU( I ), C, LDC,
+     $                  WORK )
+            AP( II ) = AII
+*
+            IF( FORWRD ) THEN
+               II = II + I + 2
+            ELSE
+               II = II - I - 1
+            END IF
+   10    CONTINUE
+      ELSE
+*
+*        Q was determined by a call to SSPTRD with UPLO = 'L'.
+*
+         FORWRD = ( LEFT .AND. .NOT.NOTRAN ) .OR.
+     $            ( .NOT.LEFT .AND. NOTRAN )
+*
+         IF( FORWRD ) THEN
+            I1 = 1
+            I2 = NQ - 1
+            I3 = 1
+            II = 2
+         ELSE
+            I1 = NQ - 1
+            I2 = 1
+            I3 = -1
+            II = NQ*( NQ+1 ) / 2 - 1
+         END IF
+*
+         IF( LEFT ) THEN
+            NI = N
+            JC = 1
+         ELSE
+            MI = M
+            IC = 1
+         END IF
+*
+         DO 20 I = I1, I2, I3
+            AII = AP( II )
+            AP( II ) = ONE
+            IF( LEFT ) THEN
+*
+*              H(i) is applied to C(i+1:m,1:n)
+*
+               MI = M - I
+               IC = I + 1
+            ELSE
+*
+*              H(i) is applied to C(1:m,i+1:n)
+*
+               NI = N - I
+               JC = I + 1
+            END IF
+*
+*           Apply H(i)
+*
+            CALL SLARF( SIDE, MI, NI, AP( II ), 1, TAU( I ),
+     $                  C( IC, JC ), LDC, WORK )
+            AP( II ) = AII
+*
+            IF( FORWRD ) THEN
+               II = II + NQ - I + 1
+            ELSE
+               II = II - NQ + I - 2
+            END IF
+   20    CONTINUE
+      END IF
+      RETURN
+*
+*     End of SOPMTR
+*
+      END
diff --git a/costa/native/external/lapack/sorg2l.f b/costa/native/external/lapack/sorg2l.f
new file mode 100644
index 000000000..7a21a6740
--- /dev/null
+++ b/costa/native/external/lapack/sorg2l.f
@@ -0,0 +1,128 @@
+      SUBROUTINE SORG2L( M, N, K, A, LDA, TAU, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, K, LDA, M, N
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SORG2L generates an m by n real matrix Q with orthonormal columns,
+*  which is defined as the last n columns of a product of k elementary
+*  reflectors of order m
+*
+*        Q  =  H(k) . . . H(2) H(1)
+*
+*  as returned by SGEQLF.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix Q. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix Q. M >= N >= 0.
+*
+*  K       (input) INTEGER
+*          The number of elementary reflectors whose product defines the
+*          matrix Q. N >= K >= 0.
+*
+*  A       (input/output) REAL array, dimension (LDA,N)
+*          On entry, the (n-k+i)-th column must contain the vector which
+*          defines the elementary reflector H(i), for i = 1,2,...,k, as
+*          returned by SGEQLF in the last k columns of its array
+*          argument A.
+*          On exit, the m by n matrix Q.
+*
+*  LDA     (input) INTEGER
+*          The first dimension of the array A. LDA >= max(1,M).
+*
+*  TAU     (input) REAL array, dimension (K)
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by SGEQLF.
+*
+*  WORK    (workspace) REAL array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument has an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, II, J, L
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLARF, SSCAL, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 .OR. N.GT.M ) THEN
+         INFO = -2
+      ELSE IF( K.LT.0 .OR. K.GT.N ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -5
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SORG2L', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.LE.0 )
+     $   RETURN
+*
+*     Initialise columns 1:n-k to columns of the unit matrix
+*
+      DO 20 J = 1, N - K
+         DO 10 L = 1, M
+            A( L, J ) = ZERO
+   10    CONTINUE
+         A( M-N+J, J ) = ONE
+   20 CONTINUE
+*
+      DO 40 I = 1, K
+         II = N - K + I
+*
+*        Apply H(i) to A(1:m-k+i,1:n-k+i) from the left
+*
+         A( M-N+II, II ) = ONE
+         CALL SLARF( 'Left', M-N+II, II-1, A( 1, II ), 1, TAU( I ), A,
+     $               LDA, WORK )
+         CALL SSCAL( M-N+II-1, -TAU( I ), A( 1, II ), 1 )
+         A( M-N+II, II ) = ONE - TAU( I )
+*
+*        Set A(m-k+i+1:m,n-k+i) to zero
+*
+         DO 30 L = M - N + II + 1, M
+            A( L, II ) = ZERO
+   30    CONTINUE
+   40 CONTINUE
+      RETURN
+*
+*     End of SORG2L
+*
+      END
diff --git a/costa/native/external/lapack/sorg2r.f b/costa/native/external/lapack/sorg2r.f
new file mode 100644
index 000000000..4af5506fc
--- /dev/null
+++ b/costa/native/external/lapack/sorg2r.f
@@ -0,0 +1,130 @@
+      SUBROUTINE SORG2R( M, N, K, A, LDA, TAU, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, K, LDA, M, N
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SORG2R generates an m by n real matrix Q with orthonormal columns,
+*  which is defined as the first n columns of a product of k elementary
+*  reflectors of order m
+*
+*        Q  =  H(1) H(2) . . . H(k)
+*
+*  as returned by SGEQRF.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix Q. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix Q. M >= N >= 0.
+*
+*  K       (input) INTEGER
+*          The number of elementary reflectors whose product defines the
+*          matrix Q. N >= K >= 0.
+*
+*  A       (input/output) REAL array, dimension (LDA,N)
+*          On entry, the i-th column must contain the vector which
+*          defines the elementary reflector H(i), for i = 1,2,...,k, as
+*          returned by SGEQRF in the first k columns of its array
+*          argument A.
+*          On exit, the m-by-n matrix Q.
+*
+*  LDA     (input) INTEGER
+*          The first dimension of the array A. LDA >= max(1,M).
+*
+*  TAU     (input) REAL array, dimension (K)
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by SGEQRF.
+*
+*  WORK    (workspace) REAL array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument has an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J, L
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLARF, SSCAL, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 .OR. N.GT.M ) THEN
+         INFO = -2
+      ELSE IF( K.LT.0 .OR. K.GT.N ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -5
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SORG2R', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.LE.0 )
+     $   RETURN
+*
+*     Initialise columns k+1:n to columns of the unit matrix
+*
+      DO 20 J = K + 1, N
+         DO 10 L = 1, M
+            A( L, J ) = ZERO
+   10    CONTINUE
+         A( J, J ) = ONE
+   20 CONTINUE
+*
+      DO 40 I = K, 1, -1
+*
+*        Apply H(i) to A(i:m,i:n) from the left
+*
+         IF( I.LT.N ) THEN
+            A( I, I ) = ONE
+            CALL SLARF( 'Left', M-I+1, N-I, A( I, I ), 1, TAU( I ),
+     $                  A( I, I+1 ), LDA, WORK )
+         END IF
+         IF( I.LT.M )
+     $      CALL SSCAL( M-I, -TAU( I ), A( I+1, I ), 1 )
+         A( I, I ) = ONE - TAU( I )
+*
+*        Set A(1:i-1,i) to zero
+*
+         DO 30 L = 1, I - 1
+            A( L, I ) = ZERO
+   30    CONTINUE
+   40 CONTINUE
+      RETURN
+*
+*     End of SORG2R
+*
+      END
diff --git a/costa/native/external/lapack/sorgbr.f b/costa/native/external/lapack/sorgbr.f
new file mode 100644
index 000000000..2f7052283
--- /dev/null
+++ b/costa/native/external/lapack/sorgbr.f
@@ -0,0 +1,245 @@
+      SUBROUTINE SORGBR( VECT, M, N, K, A, LDA, TAU, WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          VECT
+      INTEGER            INFO, K, LDA, LWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SORGBR generates one of the real orthogonal matrices Q or P**T
+*  determined by SGEBRD when reducing a real matrix A to bidiagonal
+*  form: A = Q * B * P**T.  Q and P**T are defined as products of
+*  elementary reflectors H(i) or G(i) respectively.
+*
+*  If VECT = 'Q', A is assumed to have been an M-by-K matrix, and Q
+*  is of order M:
+*  if m >= k, Q = H(1) H(2) . . . H(k) and SORGBR returns the first n
+*  columns of Q, where m >= n >= k;
+*  if m < k, Q = H(1) H(2) . . . H(m-1) and SORGBR returns Q as an
+*  M-by-M matrix.
+*
+*  If VECT = 'P', A is assumed to have been a K-by-N matrix, and P**T
+*  is of order N:
+*  if k < n, P**T = G(k) . . . G(2) G(1) and SORGBR returns the first m
+*  rows of P**T, where n >= m >= k;
+*  if k >= n, P**T = G(n-1) . . . G(2) G(1) and SORGBR returns P**T as
+*  an N-by-N matrix.
+*
+*  Arguments
+*  =========
+*
+*  VECT    (input) CHARACTER*1
+*          Specifies whether the matrix Q or the matrix P**T is
+*          required, as defined in the transformation applied by SGEBRD:
+*          = 'Q':  generate Q;
+*          = 'P':  generate P**T.
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix Q or P**T to be returned.
+*          M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix Q or P**T to be returned.
+*          N >= 0.
+*          If VECT = 'Q', M >= N >= min(M,K);
+*          if VECT = 'P', N >= M >= min(N,K).
+*
+*  K       (input) INTEGER
+*          If VECT = 'Q', the number of columns in the original M-by-K
+*          matrix reduced by SGEBRD.
+*          If VECT = 'P', the number of rows in the original K-by-N
+*          matrix reduced by SGEBRD.
+*          K >= 0.
+*
+*  A       (input/output) REAL array, dimension (LDA,N)
+*          On entry, the vectors which define the elementary reflectors,
+*          as returned by SGEBRD.
+*          On exit, the M-by-N matrix Q or P**T.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,M).
+*
+*  TAU     (input) REAL array, dimension
+*                                (min(M,K)) if VECT = 'Q'
+*                                (min(N,K)) if VECT = 'P'
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i) or G(i), which determines Q or P**T, as
+*          returned by SGEBRD in its array argument TAUQ or TAUP.
+*
+*  WORK    (workspace/output) REAL array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK. LWORK >= max(1,min(M,N)).
+*          For optimum performance LWORK >= min(M,N)*NB, where NB
+*          is the optimal blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY, WANTQ
+      INTEGER            I, IINFO, J, LWKOPT, MN, NB
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV, LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SORGLQ, SORGQR, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      WANTQ = LSAME( VECT, 'Q' )
+      MN = MIN( M, N )
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( .NOT.WANTQ .AND. .NOT.LSAME( VECT, 'P' ) ) THEN
+         INFO = -1
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 .OR. ( WANTQ .AND. ( N.GT.M .OR. N.LT.MIN( M,
+     $         K ) ) ) .OR. ( .NOT.WANTQ .AND. ( M.GT.N .OR. M.LT.
+     $         MIN( N, K ) ) ) ) THEN
+         INFO = -3
+      ELSE IF( K.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -6
+      ELSE IF( LWORK.LT.MAX( 1, MN ) .AND. .NOT.LQUERY ) THEN
+         INFO = -9
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         IF( WANTQ ) THEN
+            NB = ILAENV( 1, 'SORGQR', ' ', M, N, K, -1 )
+         ELSE
+            NB = ILAENV( 1, 'SORGLQ', ' ', M, N, K, -1 )
+         END IF
+         LWKOPT = MAX( 1, MN )*NB
+         WORK( 1 ) = LWKOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SORGBR', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      IF( WANTQ ) THEN
+*
+*        Form Q, determined by a call to SGEBRD to reduce an m-by-k
+*        matrix
+*
+         IF( M.GE.K ) THEN
+*
+*           If m >= k, assume m >= n >= k
+*
+            CALL SORGQR( M, N, K, A, LDA, TAU, WORK, LWORK, IINFO )
+*
+         ELSE
+*
+*           If m < k, assume m = n
+*
+*           Shift the vectors which define the elementary reflectors one
+*           column to the right, and set the first row and column of Q
+*           to those of the unit matrix
+*
+            DO 20 J = M, 2, -1
+               A( 1, J ) = ZERO
+               DO 10 I = J + 1, M
+                  A( I, J ) = A( I, J-1 )
+   10          CONTINUE
+   20       CONTINUE
+            A( 1, 1 ) = ONE
+            DO 30 I = 2, M
+               A( I, 1 ) = ZERO
+   30       CONTINUE
+            IF( M.GT.1 ) THEN
+*
+*              Form Q(2:m,2:m)
+*
+               CALL SORGQR( M-1, M-1, M-1, A( 2, 2 ), LDA, TAU, WORK,
+     $                      LWORK, IINFO )
+            END IF
+         END IF
+      ELSE
+*
+*        Form P', determined by a call to SGEBRD to reduce a k-by-n
+*        matrix
+*
+         IF( K.LT.N ) THEN
+*
+*           If k < n, assume k <= m <= n
+*
+            CALL SORGLQ( M, N, K, A, LDA, TAU, WORK, LWORK, IINFO )
+*
+         ELSE
+*
+*           If k >= n, assume m = n
+*
+*           Shift the vectors which define the elementary reflectors one
+*           row downward, and set the first row and column of P' to
+*           those of the unit matrix
+*
+            A( 1, 1 ) = ONE
+            DO 40 I = 2, N
+               A( I, 1 ) = ZERO
+   40       CONTINUE
+            DO 60 J = 2, N
+               DO 50 I = J - 1, 2, -1
+                  A( I, J ) = A( I-1, J )
+   50          CONTINUE
+               A( 1, J ) = ZERO
+   60       CONTINUE
+            IF( N.GT.1 ) THEN
+*
+*              Form P'(2:n,2:n)
+*
+               CALL SORGLQ( N-1, N-1, N-1, A( 2, 2 ), LDA, TAU, WORK,
+     $                      LWORK, IINFO )
+            END IF
+         END IF
+      END IF
+      WORK( 1 ) = LWKOPT
+      RETURN
+*
+*     End of SORGBR
+*
+      END
diff --git a/costa/native/external/lapack/sorghr.f b/costa/native/external/lapack/sorghr.f
new file mode 100644
index 000000000..01ab48489
--- /dev/null
+++ b/costa/native/external/lapack/sorghr.f
@@ -0,0 +1,165 @@
+      SUBROUTINE SORGHR( N, ILO, IHI, A, LDA, TAU, WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            IHI, ILO, INFO, LDA, LWORK, N
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SORGHR generates a real orthogonal matrix Q which is defined as the
+*  product of IHI-ILO elementary reflectors of order N, as returned by
+*  SGEHRD:
+*
+*  Q = H(ilo) H(ilo+1) . . . H(ihi-1).
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The order of the matrix Q. N >= 0.
+*
+*  ILO     (input) INTEGER
+*  IHI     (input) INTEGER
+*          ILO and IHI must have the same values as in the previous call
+*          of SGEHRD. Q is equal to the unit matrix except in the
+*          submatrix Q(ilo+1:ihi,ilo+1:ihi).
+*          1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
+*
+*  A       (input/output) REAL array, dimension (LDA,N)
+*          On entry, the vectors which define the elementary reflectors,
+*          as returned by SGEHRD.
+*          On exit, the N-by-N orthogonal matrix Q.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,N).
+*
+*  TAU     (input) REAL array, dimension (N-1)
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by SGEHRD.
+*
+*  WORK    (workspace/output) REAL array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK. LWORK >= IHI-ILO.
+*          For optimum performance LWORK >= (IHI-ILO)*NB, where NB is
+*          the optimal blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            I, IINFO, J, LWKOPT, NB, NH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SORGQR, XERBLA
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV 
+      EXTERNAL           ILAENV 
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      NH = IHI - ILO
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( N.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, N ) ) THEN
+         INFO = -2
+      ELSE IF( IHI.LT.MIN( ILO, N ) .OR. IHI.GT.N ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( LWORK.LT.MAX( 1, NH ) .AND. .NOT.LQUERY ) THEN
+         INFO = -8
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         NB = ILAENV( 1, 'SORGQR', ' ', NH, NH, NH, -1 )
+         LWKOPT = MAX( 1, NH )*NB
+         WORK( 1 ) = LWKOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SORGHR', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+*     Shift the vectors which define the elementary reflectors one
+*     column to the right, and set the first ilo and the last n-ihi
+*     rows and columns to those of the unit matrix
+*
+      DO 40 J = IHI, ILO + 1, -1
+         DO 10 I = 1, J - 1
+            A( I, J ) = ZERO
+   10    CONTINUE
+         DO 20 I = J + 1, IHI
+            A( I, J ) = A( I, J-1 )
+   20    CONTINUE
+         DO 30 I = IHI + 1, N
+            A( I, J ) = ZERO
+   30    CONTINUE
+   40 CONTINUE
+      DO 60 J = 1, ILO
+         DO 50 I = 1, N
+            A( I, J ) = ZERO
+   50    CONTINUE
+         A( J, J ) = ONE
+   60 CONTINUE
+      DO 80 J = IHI + 1, N
+         DO 70 I = 1, N
+            A( I, J ) = ZERO
+   70    CONTINUE
+         A( J, J ) = ONE
+   80 CONTINUE
+*
+      IF( NH.GT.0 ) THEN
+*
+*        Generate Q(ilo+1:ihi,ilo+1:ihi)
+*
+         CALL SORGQR( NH, NH, NH, A( ILO+1, ILO+1 ), LDA, TAU( ILO ),
+     $                WORK, LWORK, IINFO )
+      END IF
+      WORK( 1 ) = LWKOPT
+      RETURN
+*
+*     End of SORGHR
+*
+      END
diff --git a/costa/native/external/lapack/sorgl2.f b/costa/native/external/lapack/sorgl2.f
new file mode 100644
index 000000000..8f4505aa8
--- /dev/null
+++ b/costa/native/external/lapack/sorgl2.f
@@ -0,0 +1,134 @@
+      SUBROUTINE SORGL2( M, N, K, A, LDA, TAU, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, K, LDA, M, N
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SORGL2 generates an m by n real matrix Q with orthonormal rows,
+*  which is defined as the first m rows of a product of k elementary
+*  reflectors of order n
+*
+*        Q  =  H(k) . . . H(2) H(1)
+*
+*  as returned by SGELQF.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix Q. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix Q. N >= M.
+*
+*  K       (input) INTEGER
+*          The number of elementary reflectors whose product defines the
+*          matrix Q. M >= K >= 0.
+*
+*  A       (input/output) REAL array, dimension (LDA,N)
+*          On entry, the i-th row must contain the vector which defines
+*          the elementary reflector H(i), for i = 1,2,...,k, as returned
+*          by SGELQF in the first k rows of its array argument A.
+*          On exit, the m-by-n matrix Q.
+*
+*  LDA     (input) INTEGER
+*          The first dimension of the array A. LDA >= max(1,M).
+*
+*  TAU     (input) REAL array, dimension (K)
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by SGELQF.
+*
+*  WORK    (workspace) REAL array, dimension (M)
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument has an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J, L
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLARF, SSCAL, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.M ) THEN
+         INFO = -2
+      ELSE IF( K.LT.0 .OR. K.GT.M ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -5
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SORGL2', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.LE.0 )
+     $   RETURN
+*
+      IF( K.LT.M ) THEN
+*
+*        Initialise rows k+1:m to rows of the unit matrix
+*
+         DO 20 J = 1, N
+            DO 10 L = K + 1, M
+               A( L, J ) = ZERO
+   10       CONTINUE
+            IF( J.GT.K .AND. J.LE.M )
+     $         A( J, J ) = ONE
+   20    CONTINUE
+      END IF
+*
+      DO 40 I = K, 1, -1
+*
+*        Apply H(i) to A(i:m,i:n) from the right
+*
+         IF( I.LT.N ) THEN
+            IF( I.LT.M ) THEN
+               A( I, I ) = ONE
+               CALL SLARF( 'Right', M-I, N-I+1, A( I, I ), LDA,
+     $                     TAU( I ), A( I+1, I ), LDA, WORK )
+            END IF
+            CALL SSCAL( N-I, -TAU( I ), A( I, I+1 ), LDA )
+         END IF
+         A( I, I ) = ONE - TAU( I )
+*
+*        Set A(i,1:i-1) to zero
+*
+         DO 30 L = 1, I - 1
+            A( I, L ) = ZERO
+   30    CONTINUE
+   40 CONTINUE
+      RETURN
+*
+*     End of SORGL2
+*
+      END
diff --git a/costa/native/external/lapack/sorglq.f b/costa/native/external/lapack/sorglq.f
new file mode 100644
index 000000000..816894aa0
--- /dev/null
+++ b/costa/native/external/lapack/sorglq.f
@@ -0,0 +1,216 @@
+      SUBROUTINE SORGLQ( M, N, K, A, LDA, TAU, WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, K, LDA, LWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SORGLQ generates an M-by-N real matrix Q with orthonormal rows,
+*  which is defined as the first M rows of a product of K elementary
+*  reflectors of order N
+*
+*        Q  =  H(k) . . . H(2) H(1)
+*
+*  as returned by SGELQF.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix Q. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix Q. N >= M.
+*
+*  K       (input) INTEGER
+*          The number of elementary reflectors whose product defines the
+*          matrix Q. M >= K >= 0.
+*
+*  A       (input/output) REAL array, dimension (LDA,N)
+*          On entry, the i-th row must contain the vector which defines
+*          the elementary reflector H(i), for i = 1,2,...,k, as returned
+*          by SGELQF in the first k rows of its array argument A.
+*          On exit, the M-by-N matrix Q.
+*
+*  LDA     (input) INTEGER
+*          The first dimension of the array A. LDA >= max(1,M).
+*
+*  TAU     (input) REAL array, dimension (K)
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by SGELQF.
+*
+*  WORK    (workspace/output) REAL array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK. LWORK >= max(1,M).
+*          For optimum performance LWORK >= M*NB, where NB is
+*          the optimal blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument has an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO
+      PARAMETER          ( ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            I, IB, IINFO, IWS, J, KI, KK, L, LDWORK,
+     $                   LWKOPT, NB, NBMIN, NX
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLARFB, SLARFT, SORGL2, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      NB = ILAENV( 1, 'SORGLQ', ' ', M, N, K, -1 )
+      LWKOPT = MAX( 1, M )*NB
+      WORK( 1 ) = LWKOPT
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.M ) THEN
+         INFO = -2
+      ELSE IF( K.LT.0 .OR. K.GT.M ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -5
+      ELSE IF( LWORK.LT.MAX( 1, M ) .AND. .NOT.LQUERY ) THEN
+         INFO = -8
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SORGLQ', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.LE.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      NBMIN = 2
+      NX = 0
+      IWS = M
+      IF( NB.GT.1 .AND. NB.LT.K ) THEN
+*
+*        Determine when to cross over from blocked to unblocked code.
+*
+         NX = MAX( 0, ILAENV( 3, 'SORGLQ', ' ', M, N, K, -1 ) )
+         IF( NX.LT.K ) THEN
+*
+*           Determine if workspace is large enough for blocked code.
+*
+            LDWORK = M
+            IWS = LDWORK*NB
+            IF( LWORK.LT.IWS ) THEN
+*
+*              Not enough workspace to use optimal NB:  reduce NB and
+*              determine the minimum value of NB.
+*
+               NB = LWORK / LDWORK
+               NBMIN = MAX( 2, ILAENV( 2, 'SORGLQ', ' ', M, N, K, -1 ) )
+            END IF
+         END IF
+      END IF
+*
+      IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN
+*
+*        Use blocked code after the last block.
+*        The first kk rows are handled by the block method.
+*
+         KI = ( ( K-NX-1 ) / NB )*NB
+         KK = MIN( K, KI+NB )
+*
+*        Set A(kk+1:m,1:kk) to zero.
+*
+         DO 20 J = 1, KK
+            DO 10 I = KK + 1, M
+               A( I, J ) = ZERO
+   10       CONTINUE
+   20    CONTINUE
+      ELSE
+         KK = 0
+      END IF
+*
+*     Use unblocked code for the last or only block.
+*
+      IF( KK.LT.M )
+     $   CALL SORGL2( M-KK, N-KK, K-KK, A( KK+1, KK+1 ), LDA,
+     $                TAU( KK+1 ), WORK, IINFO )
+*
+      IF( KK.GT.0 ) THEN
+*
+*        Use blocked code
+*
+         DO 50 I = KI + 1, 1, -NB
+            IB = MIN( NB, K-I+1 )
+            IF( I+IB.LE.M ) THEN
+*
+*              Form the triangular factor of the block reflector
+*              H = H(i) H(i+1) . . . H(i+ib-1)
+*
+               CALL SLARFT( 'Forward', 'Rowwise', N-I+1, IB, A( I, I ),
+     $                      LDA, TAU( I ), WORK, LDWORK )
+*
+*              Apply H' to A(i+ib:m,i:n) from the right
+*
+               CALL SLARFB( 'Right', 'Transpose', 'Forward', 'Rowwise',
+     $                      M-I-IB+1, N-I+1, IB, A( I, I ), LDA, WORK,
+     $                      LDWORK, A( I+IB, I ), LDA, WORK( IB+1 ),
+     $                      LDWORK )
+            END IF
+*
+*           Apply H' to columns i:n of current block
+*
+            CALL SORGL2( IB, N-I+1, IB, A( I, I ), LDA, TAU( I ), WORK,
+     $                   IINFO )
+*
+*           Set columns 1:i-1 of current block to zero
+*
+            DO 40 J = 1, I - 1
+               DO 30 L = I, I + IB - 1
+                  A( L, J ) = ZERO
+   30          CONTINUE
+   40       CONTINUE
+   50    CONTINUE
+      END IF
+*
+      WORK( 1 ) = IWS
+      RETURN
+*
+*     End of SORGLQ
+*
+      END
diff --git a/costa/native/external/lapack/sorgql.f b/costa/native/external/lapack/sorgql.f
new file mode 100644
index 000000000..b4610856a
--- /dev/null
+++ b/costa/native/external/lapack/sorgql.f
@@ -0,0 +1,214 @@
+      SUBROUTINE SORGQL( M, N, K, A, LDA, TAU, WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, K, LDA, LWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SORGQL generates an M-by-N real matrix Q with orthonormal columns,
+*  which is defined as the last N columns of a product of K elementary
+*  reflectors of order M
+*
+*        Q  =  H(k) . . . H(2) H(1)
+*
+*  as returned by SGEQLF.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix Q. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix Q. M >= N >= 0.
+*
+*  K       (input) INTEGER
+*          The number of elementary reflectors whose product defines the
+*          matrix Q. N >= K >= 0.
+*
+*  A       (input/output) REAL array, dimension (LDA,N)
+*          On entry, the (n-k+i)-th column must contain the vector which
+*          defines the elementary reflector H(i), for i = 1,2,...,k, as
+*          returned by SGEQLF in the last k columns of its array
+*          argument A.
+*          On exit, the M-by-N matrix Q.
+*
+*  LDA     (input) INTEGER
+*          The first dimension of the array A. LDA >= max(1,M).
+*
+*  TAU     (input) REAL array, dimension (K)
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by SGEQLF.
+*
+*  WORK    (workspace/output) REAL array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK. LWORK >= max(1,N).
+*          For optimum performance LWORK >= N*NB, where NB is the
+*          optimal blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument has an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO
+      PARAMETER          ( ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            I, IB, IINFO, IWS, J, KK, L, LDWORK, LWKOPT,
+     $                   NB, NBMIN, NX
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLARFB, SLARFT, SORG2L, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      NB = ILAENV( 1, 'SORGQL', ' ', M, N, K, -1 )
+      LWKOPT = MAX( 1, N )*NB
+      WORK( 1 ) = LWKOPT
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 .OR. N.GT.M ) THEN
+         INFO = -2
+      ELSE IF( K.LT.0 .OR. K.GT.N ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -5
+      ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN
+         INFO = -8
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SORGQL', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.LE.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      NBMIN = 2
+      NX = 0
+      IWS = N
+      IF( NB.GT.1 .AND. NB.LT.K ) THEN
+*
+*        Determine when to cross over from blocked to unblocked code.
+*
+         NX = MAX( 0, ILAENV( 3, 'SORGQL', ' ', M, N, K, -1 ) )
+         IF( NX.LT.K ) THEN
+*
+*           Determine if workspace is large enough for blocked code.
+*
+            LDWORK = N
+            IWS = LDWORK*NB
+            IF( LWORK.LT.IWS ) THEN
+*
+*              Not enough workspace to use optimal NB:  reduce NB and
+*              determine the minimum value of NB.
+*
+               NB = LWORK / LDWORK
+               NBMIN = MAX( 2, ILAENV( 2, 'SORGQL', ' ', M, N, K, -1 ) )
+            END IF
+         END IF
+      END IF
+*
+      IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN
+*
+*        Use blocked code after the first block.
+*        The last kk columns are handled by the block method.
+*
+         KK = MIN( K, ( ( K-NX+NB-1 ) / NB )*NB )
+*
+*        Set A(m-kk+1:m,1:n-kk) to zero.
+*
+         DO 20 J = 1, N - KK
+            DO 10 I = M - KK + 1, M
+               A( I, J ) = ZERO
+   10       CONTINUE
+   20    CONTINUE
+      ELSE
+         KK = 0
+      END IF
+*
+*     Use unblocked code for the first or only block.
+*
+      CALL SORG2L( M-KK, N-KK, K-KK, A, LDA, TAU, WORK, IINFO )
+*
+      IF( KK.GT.0 ) THEN
+*
+*        Use blocked code
+*
+         DO 50 I = K - KK + 1, K, NB
+            IB = MIN( NB, K-I+1 )
+            IF( N-K+I.GT.1 ) THEN
+*
+*              Form the triangular factor of the block reflector
+*              H = H(i+ib-1) . . . H(i+1) H(i)
+*
+               CALL SLARFT( 'Backward', 'Columnwise', M-K+I+IB-1, IB,
+     $                      A( 1, N-K+I ), LDA, TAU( I ), WORK, LDWORK )
+*
+*              Apply H to A(1:m-k+i+ib-1,1:n-k+i-1) from the left
+*
+               CALL SLARFB( 'Left', 'No transpose', 'Backward',
+     $                      'Columnwise', M-K+I+IB-1, N-K+I-1, IB,
+     $                      A( 1, N-K+I ), LDA, WORK, LDWORK, A, LDA,
+     $                      WORK( IB+1 ), LDWORK )
+            END IF
+*
+*           Apply H to rows 1:m-k+i+ib-1 of current block
+*
+            CALL SORG2L( M-K+I+IB-1, IB, IB, A( 1, N-K+I ), LDA,
+     $                   TAU( I ), WORK, IINFO )
+*
+*           Set rows m-k+i+ib:m of current block to zero
+*
+            DO 40 J = N - K + I, N - K + I + IB - 1
+               DO 30 L = M - K + I + IB, M
+                  A( L, J ) = ZERO
+   30          CONTINUE
+   40       CONTINUE
+   50    CONTINUE
+      END IF
+*
+      WORK( 1 ) = IWS
+      RETURN
+*
+*     End of SORGQL
+*
+      END
diff --git a/costa/native/external/lapack/sorgqr.f b/costa/native/external/lapack/sorgqr.f
new file mode 100644
index 000000000..70de749a8
--- /dev/null
+++ b/costa/native/external/lapack/sorgqr.f
@@ -0,0 +1,217 @@
+      SUBROUTINE SORGQR( M, N, K, A, LDA, TAU, WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, K, LDA, LWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SORGQR generates an M-by-N real matrix Q with orthonormal columns,
+*  which is defined as the first N columns of a product of K elementary
+*  reflectors of order M
+*
+*        Q  =  H(1) H(2) . . . H(k)
+*
+*  as returned by SGEQRF.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix Q. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix Q. M >= N >= 0.
+*
+*  K       (input) INTEGER
+*          The number of elementary reflectors whose product defines the
+*          matrix Q. N >= K >= 0.
+*
+*  A       (input/output) REAL array, dimension (LDA,N)
+*          On entry, the i-th column must contain the vector which
+*          defines the elementary reflector H(i), for i = 1,2,...,k, as
+*          returned by SGEQRF in the first k columns of its array
+*          argument A.
+*          On exit, the M-by-N matrix Q.
+*
+*  LDA     (input) INTEGER
+*          The first dimension of the array A. LDA >= max(1,M).
+*
+*  TAU     (input) REAL array, dimension (K)
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by SGEQRF.
+*
+*  WORK    (workspace/output) REAL array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK. LWORK >= max(1,N).
+*          For optimum performance LWORK >= N*NB, where NB is the
+*          optimal blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument has an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO
+      PARAMETER          ( ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            I, IB, IINFO, IWS, J, KI, KK, L, LDWORK,
+     $                   LWKOPT, NB, NBMIN, NX
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLARFB, SLARFT, SORG2R, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      NB = ILAENV( 1, 'SORGQR', ' ', M, N, K, -1 )
+      LWKOPT = MAX( 1, N )*NB
+      WORK( 1 ) = LWKOPT
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 .OR. N.GT.M ) THEN
+         INFO = -2
+      ELSE IF( K.LT.0 .OR. K.GT.N ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -5
+      ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN
+         INFO = -8
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SORGQR', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.LE.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      NBMIN = 2
+      NX = 0
+      IWS = N
+      IF( NB.GT.1 .AND. NB.LT.K ) THEN
+*
+*        Determine when to cross over from blocked to unblocked code.
+*
+         NX = MAX( 0, ILAENV( 3, 'SORGQR', ' ', M, N, K, -1 ) )
+         IF( NX.LT.K ) THEN
+*
+*           Determine if workspace is large enough for blocked code.
+*
+            LDWORK = N
+            IWS = LDWORK*NB
+            IF( LWORK.LT.IWS ) THEN
+*
+*              Not enough workspace to use optimal NB:  reduce NB and
+*              determine the minimum value of NB.
+*
+               NB = LWORK / LDWORK
+               NBMIN = MAX( 2, ILAENV( 2, 'SORGQR', ' ', M, N, K, -1 ) )
+            END IF
+         END IF
+      END IF
+*
+      IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN
+*
+*        Use blocked code after the last block.
+*        The first kk columns are handled by the block method.
+*
+         KI = ( ( K-NX-1 ) / NB )*NB
+         KK = MIN( K, KI+NB )
+*
+*        Set A(1:kk,kk+1:n) to zero.
+*
+         DO 20 J = KK + 1, N
+            DO 10 I = 1, KK
+               A( I, J ) = ZERO
+   10       CONTINUE
+   20    CONTINUE
+      ELSE
+         KK = 0
+      END IF
+*
+*     Use unblocked code for the last or only block.
+*
+      IF( KK.LT.N )
+     $   CALL SORG2R( M-KK, N-KK, K-KK, A( KK+1, KK+1 ), LDA,
+     $                TAU( KK+1 ), WORK, IINFO )
+*
+      IF( KK.GT.0 ) THEN
+*
+*        Use blocked code
+*
+         DO 50 I = KI + 1, 1, -NB
+            IB = MIN( NB, K-I+1 )
+            IF( I+IB.LE.N ) THEN
+*
+*              Form the triangular factor of the block reflector
+*              H = H(i) H(i+1) . . . H(i+ib-1)
+*
+               CALL SLARFT( 'Forward', 'Columnwise', M-I+1, IB,
+     $                      A( I, I ), LDA, TAU( I ), WORK, LDWORK )
+*
+*              Apply H to A(i:m,i+ib:n) from the left
+*
+               CALL SLARFB( 'Left', 'No transpose', 'Forward',
+     $                      'Columnwise', M-I+1, N-I-IB+1, IB,
+     $                      A( I, I ), LDA, WORK, LDWORK, A( I, I+IB ),
+     $                      LDA, WORK( IB+1 ), LDWORK )
+            END IF
+*
+*           Apply H to rows i:m of current block
+*
+            CALL SORG2R( M-I+1, IB, IB, A( I, I ), LDA, TAU( I ), WORK,
+     $                   IINFO )
+*
+*           Set rows 1:i-1 of current block to zero
+*
+            DO 40 J = I, I + IB - 1
+               DO 30 L = 1, I - 1
+                  A( L, J ) = ZERO
+   30          CONTINUE
+   40       CONTINUE
+   50    CONTINUE
+      END IF
+*
+      WORK( 1 ) = IWS
+      RETURN
+*
+*     End of SORGQR
+*
+      END
diff --git a/costa/native/external/lapack/sorgr2.f b/costa/native/external/lapack/sorgr2.f
new file mode 100644
index 000000000..7498ec376
--- /dev/null
+++ b/costa/native/external/lapack/sorgr2.f
@@ -0,0 +1,132 @@
+      SUBROUTINE SORGR2( M, N, K, A, LDA, TAU, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, K, LDA, M, N
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SORGR2 generates an m by n real matrix Q with orthonormal rows,
+*  which is defined as the last m rows of a product of k elementary
+*  reflectors of order n
+*
+*        Q  =  H(1) H(2) . . . H(k)
+*
+*  as returned by SGERQF.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix Q. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix Q. N >= M.
+*
+*  K       (input) INTEGER
+*          The number of elementary reflectors whose product defines the
+*          matrix Q. M >= K >= 0.
+*
+*  A       (input/output) REAL array, dimension (LDA,N)
+*          On entry, the (m-k+i)-th row must contain the vector which
+*          defines the elementary reflector H(i), for i = 1,2,...,k, as
+*          returned by SGERQF in the last k rows of its array argument
+*          A.
+*          On exit, the m by n matrix Q.
+*
+*  LDA     (input) INTEGER
+*          The first dimension of the array A. LDA >= max(1,M).
+*
+*  TAU     (input) REAL array, dimension (K)
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by SGERQF.
+*
+*  WORK    (workspace) REAL array, dimension (M)
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument has an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, II, J, L
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLARF, SSCAL, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.M ) THEN
+         INFO = -2
+      ELSE IF( K.LT.0 .OR. K.GT.M ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -5
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SORGR2', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.LE.0 )
+     $   RETURN
+*
+      IF( K.LT.M ) THEN
+*
+*        Initialise rows 1:m-k to rows of the unit matrix
+*
+         DO 20 J = 1, N
+            DO 10 L = 1, M - K
+               A( L, J ) = ZERO
+   10       CONTINUE
+            IF( J.GT.N-M .AND. J.LE.N-K )
+     $         A( M-N+J, J ) = ONE
+   20    CONTINUE
+      END IF
+*
+      DO 40 I = 1, K
+         II = M - K + I
+*
+*        Apply H(i) to A(1:m-k+i,1:n-k+i) from the right
+*
+         A( II, N-M+II ) = ONE
+         CALL SLARF( 'Right', II-1, N-M+II, A( II, 1 ), LDA, TAU( I ),
+     $               A, LDA, WORK )
+         CALL SSCAL( N-M+II-1, -TAU( I ), A( II, 1 ), LDA )
+         A( II, N-M+II ) = ONE - TAU( I )
+*
+*        Set A(m-k+i,n-k+i+1:n) to zero
+*
+         DO 30 L = N - M + II + 1, N
+            A( II, L ) = ZERO
+   30    CONTINUE
+   40 CONTINUE
+      RETURN
+*
+*     End of SORGR2
+*
+      END
diff --git a/costa/native/external/lapack/sorgrq.f b/costa/native/external/lapack/sorgrq.f
new file mode 100644
index 000000000..bdd3c7e5c
--- /dev/null
+++ b/costa/native/external/lapack/sorgrq.f
@@ -0,0 +1,214 @@
+      SUBROUTINE SORGRQ( M, N, K, A, LDA, TAU, WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, K, LDA, LWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SORGRQ generates an M-by-N real matrix Q with orthonormal rows,
+*  which is defined as the last M rows of a product of K elementary
+*  reflectors of order N
+*
+*        Q  =  H(1) H(2) . . . H(k)
+*
+*  as returned by SGERQF.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix Q. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix Q. N >= M.
+*
+*  K       (input) INTEGER
+*          The number of elementary reflectors whose product defines the
+*          matrix Q. M >= K >= 0.
+*
+*  A       (input/output) REAL array, dimension (LDA,N)
+*          On entry, the (m-k+i)-th row must contain the vector which
+*          defines the elementary reflector H(i), for i = 1,2,...,k, as
+*          returned by SGERQF in the last k rows of its array argument
+*          A.
+*          On exit, the M-by-N matrix Q.
+*
+*  LDA     (input) INTEGER
+*          The first dimension of the array A. LDA >= max(1,M).
+*
+*  TAU     (input) REAL array, dimension (K)
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by SGERQF.
+*
+*  WORK    (workspace/output) REAL array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK. LWORK >= max(1,M).
+*          For optimum performance LWORK >= M*NB, where NB is the
+*          optimal blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument has an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO
+      PARAMETER          ( ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            I, IB, II, IINFO, IWS, J, KK, L, LDWORK,
+     $                   LWKOPT, NB, NBMIN, NX
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLARFB, SLARFT, SORGR2, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      NB = ILAENV( 1, 'SORGRQ', ' ', M, N, K, -1 )
+      LWKOPT = MAX( 1, M )*NB
+      WORK( 1 ) = LWKOPT
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.M ) THEN
+         INFO = -2
+      ELSE IF( K.LT.0 .OR. K.GT.M ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -5
+      ELSE IF( LWORK.LT.MAX( 1, M ) .AND. .NOT.LQUERY ) THEN
+         INFO = -8
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SORGRQ', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.LE.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      NBMIN = 2
+      NX = 0
+      IWS = M
+      IF( NB.GT.1 .AND. NB.LT.K ) THEN
+*
+*        Determine when to cross over from blocked to unblocked code.
+*
+         NX = MAX( 0, ILAENV( 3, 'SORGRQ', ' ', M, N, K, -1 ) )
+         IF( NX.LT.K ) THEN
+*
+*           Determine if workspace is large enough for blocked code.
+*
+            LDWORK = M
+            IWS = LDWORK*NB
+            IF( LWORK.LT.IWS ) THEN
+*
+*              Not enough workspace to use optimal NB:  reduce NB and
+*              determine the minimum value of NB.
+*
+               NB = LWORK / LDWORK
+               NBMIN = MAX( 2, ILAENV( 2, 'SORGRQ', ' ', M, N, K, -1 ) )
+            END IF
+         END IF
+      END IF
+*
+      IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN
+*
+*        Use blocked code after the first block.
+*        The last kk rows are handled by the block method.
+*
+         KK = MIN( K, ( ( K-NX+NB-1 ) / NB )*NB )
+*
+*        Set A(1:m-kk,n-kk+1:n) to zero.
+*
+         DO 20 J = N - KK + 1, N
+            DO 10 I = 1, M - KK
+               A( I, J ) = ZERO
+   10       CONTINUE
+   20    CONTINUE
+      ELSE
+         KK = 0
+      END IF
+*
+*     Use unblocked code for the first or only block.
+*
+      CALL SORGR2( M-KK, N-KK, K-KK, A, LDA, TAU, WORK, IINFO )
+*
+      IF( KK.GT.0 ) THEN
+*
+*        Use blocked code
+*
+         DO 50 I = K - KK + 1, K, NB
+            IB = MIN( NB, K-I+1 )
+            II = M - K + I
+            IF( II.GT.1 ) THEN
+*
+*              Form the triangular factor of the block reflector
+*              H = H(i+ib-1) . . . H(i+1) H(i)
+*
+               CALL SLARFT( 'Backward', 'Rowwise', N-K+I+IB-1, IB,
+     $                      A( II, 1 ), LDA, TAU( I ), WORK, LDWORK )
+*
+*              Apply H' to A(1:m-k+i-1,1:n-k+i+ib-1) from the right
+*
+               CALL SLARFB( 'Right', 'Transpose', 'Backward', 'Rowwise',
+     $                      II-1, N-K+I+IB-1, IB, A( II, 1 ), LDA, WORK,
+     $                      LDWORK, A, LDA, WORK( IB+1 ), LDWORK )
+            END IF
+*
+*           Apply H' to columns 1:n-k+i+ib-1 of current block
+*
+            CALL SORGR2( IB, N-K+I+IB-1, IB, A( II, 1 ), LDA, TAU( I ),
+     $                   WORK, IINFO )
+*
+*           Set columns n-k+i+ib:n of current block to zero
+*
+            DO 40 L = N - K + I + IB, N
+               DO 30 J = II, II + IB - 1
+                  A( J, L ) = ZERO
+   30          CONTINUE
+   40       CONTINUE
+   50    CONTINUE
+      END IF
+*
+      WORK( 1 ) = IWS
+      RETURN
+*
+*     End of SORGRQ
+*
+      END
diff --git a/costa/native/external/lapack/sorgtr.f b/costa/native/external/lapack/sorgtr.f
new file mode 100644
index 000000000..d827c93f9
--- /dev/null
+++ b/costa/native/external/lapack/sorgtr.f
@@ -0,0 +1,184 @@
+      SUBROUTINE SORGTR( UPLO, N, A, LDA, TAU, WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, LWORK, N
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SORGTR generates a real orthogonal matrix Q which is defined as the
+*  product of n-1 elementary reflectors of order N, as returned by
+*  SSYTRD:
+*
+*  if UPLO = 'U', Q = H(n-1) . . . H(2) H(1),
+*
+*  if UPLO = 'L', Q = H(1) H(2) . . . H(n-1).
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U': Upper triangle of A contains elementary reflectors
+*                 from SSYTRD;
+*          = 'L': Lower triangle of A contains elementary reflectors
+*                 from SSYTRD.
+*
+*  N       (input) INTEGER
+*          The order of the matrix Q. N >= 0.
+*
+*  A       (input/output) REAL array, dimension (LDA,N)
+*          On entry, the vectors which define the elementary reflectors,
+*          as returned by SSYTRD.
+*          On exit, the N-by-N orthogonal matrix Q.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,N).
+*
+*  TAU     (input) REAL array, dimension (N-1)
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by SSYTRD.
+*
+*  WORK    (workspace/output) REAL array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK. LWORK >= max(1,N-1).
+*          For optimum performance LWORK >= (N-1)*NB, where NB is
+*          the optimal blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY, UPPER
+      INTEGER            I, IINFO, J, LWKOPT, NB
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV, LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SORGQL, SORGQR, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      LQUERY = ( LWORK.EQ.-1 )
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -4
+      ELSE IF( LWORK.LT.MAX( 1, N-1 ) .AND. .NOT.LQUERY ) THEN
+         INFO = -7
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         IF ( UPPER ) THEN
+           NB = ILAENV( 1, 'SORGQL', ' ', N-1, N-1, N-1, -1 )
+         ELSE
+           NB = ILAENV( 1, 'SORGQR', ' ', N-1, N-1, N-1, -1 )
+         END IF
+         LWKOPT = MAX( 1, N-1 )*NB
+         WORK( 1 ) = LWKOPT
+      END IF
+*    
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SORGTR', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      IF( UPPER ) THEN
+*
+*        Q was determined by a call to SSYTRD with UPLO = 'U'
+*
+*        Shift the vectors which define the elementary reflectors one
+*        column to the left, and set the last row and column of Q to
+*        those of the unit matrix
+*
+         DO 20 J = 1, N - 1
+            DO 10 I = 1, J - 1
+               A( I, J ) = A( I, J+1 )
+   10       CONTINUE
+            A( N, J ) = ZERO
+   20    CONTINUE
+         DO 30 I = 1, N - 1
+            A( I, N ) = ZERO
+   30    CONTINUE
+         A( N, N ) = ONE
+*
+*        Generate Q(1:n-1,1:n-1)
+*
+         CALL SORGQL( N-1, N-1, N-1, A, LDA, TAU, WORK, LWORK, IINFO )
+*
+      ELSE
+*
+*        Q was determined by a call to SSYTRD with UPLO = 'L'.
+*
+*        Shift the vectors which define the elementary reflectors one
+*        column to the right, and set the first row and column of Q to
+*        those of the unit matrix
+*
+         DO 50 J = N, 2, -1
+            A( 1, J ) = ZERO
+            DO 40 I = J + 1, N
+               A( I, J ) = A( I, J-1 )
+   40       CONTINUE
+   50    CONTINUE
+         A( 1, 1 ) = ONE
+         DO 60 I = 2, N
+            A( I, 1 ) = ZERO
+   60    CONTINUE
+         IF( N.GT.1 ) THEN
+*
+*           Generate Q(2:n,2:n)
+*
+            CALL SORGQR( N-1, N-1, N-1, A( 2, 2 ), LDA, TAU, WORK,
+     $                   LWORK, IINFO )
+         END IF
+      END IF
+      WORK( 1 ) = LWKOPT
+      RETURN
+*
+*     End of SORGTR
+*
+      END
diff --git a/costa/native/external/lapack/sorm2l.f b/costa/native/external/lapack/sorm2l.f
new file mode 100644
index 000000000..a27ed443e
--- /dev/null
+++ b/costa/native/external/lapack/sorm2l.f
@@ -0,0 +1,194 @@
+      SUBROUTINE SORM2L( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC,
+     $                   WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          SIDE, TRANS
+      INTEGER            INFO, K, LDA, LDC, M, N
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SORM2L overwrites the general real m by n matrix C with
+*
+*        Q * C  if SIDE = 'L' and TRANS = 'N', or
+*
+*        Q'* C  if SIDE = 'L' and TRANS = 'T', or
+*
+*        C * Q  if SIDE = 'R' and TRANS = 'N', or
+*
+*        C * Q' if SIDE = 'R' and TRANS = 'T',
+*
+*  where Q is a real orthogonal matrix defined as the product of k
+*  elementary reflectors
+*
+*        Q = H(k) . . . H(2) H(1)
+*
+*  as returned by SGEQLF. Q is of order m if SIDE = 'L' and of order n
+*  if SIDE = 'R'.
+*
+*  Arguments
+*  =========
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'L': apply Q or Q' from the Left
+*          = 'R': apply Q or Q' from the Right
+*
+*  TRANS   (input) CHARACTER*1
+*          = 'N': apply Q  (No transpose)
+*          = 'T': apply Q' (Transpose)
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix C. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix C. N >= 0.
+*
+*  K       (input) INTEGER
+*          The number of elementary reflectors whose product defines
+*          the matrix Q.
+*          If SIDE = 'L', M >= K >= 0;
+*          if SIDE = 'R', N >= K >= 0.
+*
+*  A       (input) REAL array, dimension (LDA,K)
+*          The i-th column must contain the vector which defines the
+*          elementary reflector H(i), for i = 1,2,...,k, as returned by
+*          SGEQLF in the last k columns of its array argument A.
+*          A is modified by the routine but restored on exit.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.
+*          If SIDE = 'L', LDA >= max(1,M);
+*          if SIDE = 'R', LDA >= max(1,N).
+*
+*  TAU     (input) REAL array, dimension (K)
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by SGEQLF.
+*
+*  C       (input/output) REAL array, dimension (LDC,N)
+*          On entry, the m by n matrix C.
+*          On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C. LDC >= max(1,M).
+*
+*  WORK    (workspace) REAL array, dimension
+*                                   (N) if SIDE = 'L',
+*                                   (M) if SIDE = 'R'
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE
+      PARAMETER          ( ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LEFT, NOTRAN
+      INTEGER            I, I1, I2, I3, MI, NI, NQ
+      REAL               AII
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLARF, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      LEFT = LSAME( SIDE, 'L' )
+      NOTRAN = LSAME( TRANS, 'N' )
+*
+*     NQ is the order of Q
+*
+      IF( LEFT ) THEN
+         NQ = M
+      ELSE
+         NQ = N
+      END IF
+      IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN
+         INFO = -2
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN
+         INFO = -5
+      ELSE IF( LDA.LT.MAX( 1, NQ ) ) THEN
+         INFO = -7
+      ELSE IF( LDC.LT.MAX( 1, M ) ) THEN
+         INFO = -10
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SORM2L', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 )
+     $   RETURN
+*
+      IF( ( LEFT .AND. NOTRAN ) .OR. ( .NOT.LEFT .AND. .NOT.NOTRAN ) )
+     $     THEN
+         I1 = 1
+         I2 = K
+         I3 = 1
+      ELSE
+         I1 = K
+         I2 = 1
+         I3 = -1
+      END IF
+*
+      IF( LEFT ) THEN
+         NI = N
+      ELSE
+         MI = M
+      END IF
+*
+      DO 10 I = I1, I2, I3
+         IF( LEFT ) THEN
+*
+*           H(i) is applied to C(1:m-k+i,1:n)
+*
+            MI = M - K + I
+         ELSE
+*
+*           H(i) is applied to C(1:m,1:n-k+i)
+*
+            NI = N - K + I
+         END IF
+*
+*        Apply H(i)
+*
+         AII = A( NQ-K+I, I )
+         A( NQ-K+I, I ) = ONE
+         CALL SLARF( SIDE, MI, NI, A( 1, I ), 1, TAU( I ), C, LDC,
+     $               WORK )
+         A( NQ-K+I, I ) = AII
+   10 CONTINUE
+      RETURN
+*
+*     End of SORM2L
+*
+      END
diff --git a/costa/native/external/lapack/sorm2r.f b/costa/native/external/lapack/sorm2r.f
new file mode 100644
index 000000000..8b971b32c
--- /dev/null
+++ b/costa/native/external/lapack/sorm2r.f
@@ -0,0 +1,198 @@
+      SUBROUTINE SORM2R( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC,
+     $                   WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          SIDE, TRANS
+      INTEGER            INFO, K, LDA, LDC, M, N
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SORM2R overwrites the general real m by n matrix C with
+*
+*        Q * C  if SIDE = 'L' and TRANS = 'N', or
+*
+*        Q'* C  if SIDE = 'L' and TRANS = 'T', or
+*
+*        C * Q  if SIDE = 'R' and TRANS = 'N', or
+*
+*        C * Q' if SIDE = 'R' and TRANS = 'T',
+*
+*  where Q is a real orthogonal matrix defined as the product of k
+*  elementary reflectors
+*
+*        Q = H(1) H(2) . . . H(k)
+*
+*  as returned by SGEQRF. Q is of order m if SIDE = 'L' and of order n
+*  if SIDE = 'R'.
+*
+*  Arguments
+*  =========
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'L': apply Q or Q' from the Left
+*          = 'R': apply Q or Q' from the Right
+*
+*  TRANS   (input) CHARACTER*1
+*          = 'N': apply Q  (No transpose)
+*          = 'T': apply Q' (Transpose)
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix C. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix C. N >= 0.
+*
+*  K       (input) INTEGER
+*          The number of elementary reflectors whose product defines
+*          the matrix Q.
+*          If SIDE = 'L', M >= K >= 0;
+*          if SIDE = 'R', N >= K >= 0.
+*
+*  A       (input) REAL array, dimension (LDA,K)
+*          The i-th column must contain the vector which defines the
+*          elementary reflector H(i), for i = 1,2,...,k, as returned by
+*          SGEQRF in the first k columns of its array argument A.
+*          A is modified by the routine but restored on exit.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.
+*          If SIDE = 'L', LDA >= max(1,M);
+*          if SIDE = 'R', LDA >= max(1,N).
+*
+*  TAU     (input) REAL array, dimension (K)
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by SGEQRF.
+*
+*  C       (input/output) REAL array, dimension (LDC,N)
+*          On entry, the m by n matrix C.
+*          On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C. LDC >= max(1,M).
+*
+*  WORK    (workspace) REAL array, dimension
+*                                   (N) if SIDE = 'L',
+*                                   (M) if SIDE = 'R'
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE
+      PARAMETER          ( ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LEFT, NOTRAN
+      INTEGER            I, I1, I2, I3, IC, JC, MI, NI, NQ
+      REAL               AII
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLARF, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      LEFT = LSAME( SIDE, 'L' )
+      NOTRAN = LSAME( TRANS, 'N' )
+*
+*     NQ is the order of Q
+*
+      IF( LEFT ) THEN
+         NQ = M
+      ELSE
+         NQ = N
+      END IF
+      IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN
+         INFO = -2
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN
+         INFO = -5
+      ELSE IF( LDA.LT.MAX( 1, NQ ) ) THEN
+         INFO = -7
+      ELSE IF( LDC.LT.MAX( 1, M ) ) THEN
+         INFO = -10
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SORM2R', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 )
+     $   RETURN
+*
+      IF( ( LEFT .AND. .NOT.NOTRAN ) .OR. ( .NOT.LEFT .AND. NOTRAN ) )
+     $     THEN
+         I1 = 1
+         I2 = K
+         I3 = 1
+      ELSE
+         I1 = K
+         I2 = 1
+         I3 = -1
+      END IF
+*
+      IF( LEFT ) THEN
+         NI = N
+         JC = 1
+      ELSE
+         MI = M
+         IC = 1
+      END IF
+*
+      DO 10 I = I1, I2, I3
+         IF( LEFT ) THEN
+*
+*           H(i) is applied to C(i:m,1:n)
+*
+            MI = M - I + 1
+            IC = I
+         ELSE
+*
+*           H(i) is applied to C(1:m,i:n)
+*
+            NI = N - I + 1
+            JC = I
+         END IF
+*
+*        Apply H(i)
+*
+         AII = A( I, I )
+         A( I, I ) = ONE
+         CALL SLARF( SIDE, MI, NI, A( I, I ), 1, TAU( I ), C( IC, JC ),
+     $               LDC, WORK )
+         A( I, I ) = AII
+   10 CONTINUE
+      RETURN
+*
+*     End of SORM2R
+*
+      END
diff --git a/costa/native/external/lapack/sormbr.f b/costa/native/external/lapack/sormbr.f
new file mode 100644
index 000000000..e36906c24
--- /dev/null
+++ b/costa/native/external/lapack/sormbr.f
@@ -0,0 +1,283 @@
+      SUBROUTINE SORMBR( VECT, SIDE, TRANS, M, N, K, A, LDA, TAU, C,
+     $                   LDC, WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          SIDE, TRANS, VECT
+      INTEGER            INFO, K, LDA, LDC, LWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), C( LDC, * ), TAU( * ),
+     $                   WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  If VECT = 'Q', SORMBR overwrites the general real M-by-N matrix C
+*  with
+*                  SIDE = 'L'     SIDE = 'R'
+*  TRANS = 'N':      Q * C          C * Q
+*  TRANS = 'T':      Q**T * C       C * Q**T
+*
+*  If VECT = 'P', SORMBR overwrites the general real M-by-N matrix C
+*  with
+*                  SIDE = 'L'     SIDE = 'R'
+*  TRANS = 'N':      P * C          C * P
+*  TRANS = 'T':      P**T * C       C * P**T
+*
+*  Here Q and P**T are the orthogonal matrices determined by SGEBRD when
+*  reducing a real matrix A to bidiagonal form: A = Q * B * P**T. Q and
+*  P**T are defined as products of elementary reflectors H(i) and G(i)
+*  respectively.
+*
+*  Let nq = m if SIDE = 'L' and nq = n if SIDE = 'R'. Thus nq is the
+*  order of the orthogonal matrix Q or P**T that is applied.
+*
+*  If VECT = 'Q', A is assumed to have been an NQ-by-K matrix:
+*  if nq >= k, Q = H(1) H(2) . . . H(k);
+*  if nq < k, Q = H(1) H(2) . . . H(nq-1).
+*
+*  If VECT = 'P', A is assumed to have been a K-by-NQ matrix:
+*  if k < nq, P = G(1) G(2) . . . G(k);
+*  if k >= nq, P = G(1) G(2) . . . G(nq-1).
+*
+*  Arguments
+*  =========
+*
+*  VECT    (input) CHARACTER*1
+*          = 'Q': apply Q or Q**T;
+*          = 'P': apply P or P**T.
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'L': apply Q, Q**T, P or P**T from the Left;
+*          = 'R': apply Q, Q**T, P or P**T from the Right.
+*
+*  TRANS   (input) CHARACTER*1
+*          = 'N':  No transpose, apply Q  or P;
+*          = 'T':  Transpose, apply Q**T or P**T.
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix C. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix C. N >= 0.
+*
+*  K       (input) INTEGER
+*          If VECT = 'Q', the number of columns in the original
+*          matrix reduced by SGEBRD.
+*          If VECT = 'P', the number of rows in the original
+*          matrix reduced by SGEBRD.
+*          K >= 0.
+*
+*  A       (input) REAL array, dimension
+*                                (LDA,min(nq,K)) if VECT = 'Q'
+*                                (LDA,nq)        if VECT = 'P'
+*          The vectors which define the elementary reflectors H(i) and
+*          G(i), whose products determine the matrices Q and P, as
+*          returned by SGEBRD.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.
+*          If VECT = 'Q', LDA >= max(1,nq);
+*          if VECT = 'P', LDA >= max(1,min(nq,K)).
+*
+*  TAU     (input) REAL array, dimension (min(nq,K))
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i) or G(i) which determines Q or P, as returned
+*          by SGEBRD in the array argument TAUQ or TAUP.
+*
+*  C       (input/output) REAL array, dimension (LDC,N)
+*          On entry, the M-by-N matrix C.
+*          On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q
+*          or P*C or P**T*C or C*P or C*P**T.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C. LDC >= max(1,M).
+*
+*  WORK    (workspace/output) REAL array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.
+*          If SIDE = 'L', LWORK >= max(1,N);
+*          if SIDE = 'R', LWORK >= max(1,M).
+*          For optimum performance LWORK >= N*NB if SIDE = 'L', and
+*          LWORK >= M*NB if SIDE = 'R', where NB is the optimal
+*          blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            APPLYQ, LEFT, LQUERY, NOTRAN
+      CHARACTER          TRANST
+      INTEGER            I1, I2, IINFO, LWKOPT, MI, NB, NI, NQ, NW
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV, LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SORMLQ, SORMQR, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      APPLYQ = LSAME( VECT, 'Q' )
+      LEFT = LSAME( SIDE, 'L' )
+      NOTRAN = LSAME( TRANS, 'N' )
+      LQUERY = ( LWORK.EQ.-1 )
+*
+*     NQ is the order of Q or P and NW is the minimum dimension of WORK
+*
+      IF( LEFT ) THEN
+         NQ = M
+         NW = N
+      ELSE
+         NQ = N
+         NW = M
+      END IF
+      IF( .NOT.APPLYQ .AND. .NOT.LSAME( VECT, 'P' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN
+         INFO = -3
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( K.LT.0 ) THEN
+         INFO = -6
+      ELSE IF( ( APPLYQ .AND. LDA.LT.MAX( 1, NQ ) ) .OR.
+     $         ( .NOT.APPLYQ .AND. LDA.LT.MAX( 1, MIN( NQ, K ) ) ) )
+     $          THEN
+         INFO = -8
+      ELSE IF( LDC.LT.MAX( 1, M ) ) THEN
+         INFO = -11
+      ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN
+         INFO = -13
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         IF( APPLYQ ) THEN
+            IF( LEFT ) THEN
+               NB = ILAENV( 1, 'SORMQR', SIDE // TRANS, M-1, N, M-1,
+     $                      -1 )
+            ELSE
+               NB = ILAENV( 1, 'SORMQR', SIDE // TRANS, M, N-1, N-1,
+     $                      -1 )
+            END IF   
+         ELSE
+            IF( LEFT ) THEN
+               NB = ILAENV( 1, 'SORMLQ', SIDE // TRANS, M-1, N, M-1,
+     $                      -1 ) 
+            ELSE
+               NB = ILAENV( 1, 'SORMLQ', SIDE // TRANS, M, N-1, N-1,
+     $                      -1 )
+            END IF
+         END IF
+         LWKOPT = MAX( 1, NW )*NB
+         WORK( 1 ) = LWKOPT 
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SORMBR', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      WORK( 1 ) = 1
+      IF( M.EQ.0 .OR. N.EQ.0 )
+     $   RETURN
+*
+      IF( APPLYQ ) THEN
+*
+*        Apply Q
+*
+         IF( NQ.GE.K ) THEN
+*
+*           Q was determined by a call to SGEBRD with nq >= k
+*
+            CALL SORMQR( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC,
+     $                   WORK, LWORK, IINFO )
+         ELSE IF( NQ.GT.1 ) THEN
+*
+*           Q was determined by a call to SGEBRD with nq < k
+*
+            IF( LEFT ) THEN
+               MI = M - 1
+               NI = N
+               I1 = 2
+               I2 = 1
+            ELSE
+               MI = M
+               NI = N - 1
+               I1 = 1
+               I2 = 2
+            END IF
+            CALL SORMQR( SIDE, TRANS, MI, NI, NQ-1, A( 2, 1 ), LDA, TAU,
+     $                   C( I1, I2 ), LDC, WORK, LWORK, IINFO )
+         END IF
+      ELSE
+*
+*        Apply P
+*
+         IF( NOTRAN ) THEN
+            TRANST = 'T'
+         ELSE
+            TRANST = 'N'
+         END IF
+         IF( NQ.GT.K ) THEN
+*
+*           P was determined by a call to SGEBRD with nq > k
+*
+            CALL SORMLQ( SIDE, TRANST, M, N, K, A, LDA, TAU, C, LDC,
+     $                   WORK, LWORK, IINFO )
+         ELSE IF( NQ.GT.1 ) THEN
+*
+*           P was determined by a call to SGEBRD with nq <= k
+*
+            IF( LEFT ) THEN
+               MI = M - 1
+               NI = N
+               I1 = 2
+               I2 = 1
+            ELSE
+               MI = M
+               NI = N - 1
+               I1 = 1
+               I2 = 2
+            END IF
+            CALL SORMLQ( SIDE, TRANST, MI, NI, NQ-1, A( 1, 2 ), LDA,
+     $                   TAU, C( I1, I2 ), LDC, WORK, LWORK, IINFO )
+         END IF
+      END IF
+      WORK( 1 ) = LWKOPT
+      RETURN
+*
+*     End of SORMBR
+*
+      END
diff --git a/costa/native/external/lapack/sormhr.f b/costa/native/external/lapack/sormhr.f
new file mode 100644
index 000000000..605433987
--- /dev/null
+++ b/costa/native/external/lapack/sormhr.f
@@ -0,0 +1,203 @@
+      SUBROUTINE SORMHR( SIDE, TRANS, M, N, ILO, IHI, A, LDA, TAU, C,
+     $                   LDC, WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          SIDE, TRANS
+      INTEGER            IHI, ILO, INFO, LDA, LDC, LWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), C( LDC, * ), TAU( * ),
+     $                   WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SORMHR overwrites the general real M-by-N matrix C with
+*
+*                  SIDE = 'L'     SIDE = 'R'
+*  TRANS = 'N':      Q * C          C * Q
+*  TRANS = 'T':      Q**T * C       C * Q**T
+*
+*  where Q is a real orthogonal matrix of order nq, with nq = m if
+*  SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of
+*  IHI-ILO elementary reflectors, as returned by SGEHRD:
+*
+*  Q = H(ilo) H(ilo+1) . . . H(ihi-1).
+*
+*  Arguments
+*  =========
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'L': apply Q or Q**T from the Left;
+*          = 'R': apply Q or Q**T from the Right.
+*
+*  TRANS   (input) CHARACTER*1
+*          = 'N':  No transpose, apply Q;
+*          = 'T':  Transpose, apply Q**T.
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix C. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix C. N >= 0.
+*
+*  ILO     (input) INTEGER
+*  IHI     (input) INTEGER
+*          ILO and IHI must have the same values as in the previous call
+*          of SGEHRD. Q is equal to the unit matrix except in the
+*          submatrix Q(ilo+1:ihi,ilo+1:ihi).
+*          If SIDE = 'L', then 1 <= ILO <= IHI <= M, if M > 0, and
+*          ILO = 1 and IHI = 0, if M = 0;
+*          if SIDE = 'R', then 1 <= ILO <= IHI <= N, if N > 0, and
+*          ILO = 1 and IHI = 0, if N = 0.
+*
+*  A       (input) REAL array, dimension
+*                               (LDA,M) if SIDE = 'L'
+*                               (LDA,N) if SIDE = 'R'
+*          The vectors which define the elementary reflectors, as
+*          returned by SGEHRD.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.
+*          LDA >= max(1,M) if SIDE = 'L'; LDA >= max(1,N) if SIDE = 'R'.
+*
+*  TAU     (input) REAL array, dimension
+*                               (M-1) if SIDE = 'L'
+*                               (N-1) if SIDE = 'R'
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by SGEHRD.
+*
+*  C       (input/output) REAL array, dimension (LDC,N)
+*          On entry, the M-by-N matrix C.
+*          On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C. LDC >= max(1,M).
+*
+*  WORK    (workspace/output) REAL array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.
+*          If SIDE = 'L', LWORK >= max(1,N);
+*          if SIDE = 'R', LWORK >= max(1,M).
+*          For optimum performance LWORK >= N*NB if SIDE = 'L', and
+*          LWORK >= M*NB if SIDE = 'R', where NB is the optimal
+*          blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            LEFT, LQUERY
+      INTEGER            I1, I2, IINFO, LWKOPT, MI, NB, NH, NI, NQ, NW
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV, LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SORMQR, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      NH = IHI - ILO
+      LEFT = LSAME( SIDE, 'L' )
+      LQUERY = ( LWORK.EQ.-1 )
+*
+*     NQ is the order of Q and NW is the minimum dimension of WORK
+*
+      IF( LEFT ) THEN
+         NQ = M
+         NW = N
+      ELSE
+         NQ = N
+         NW = M
+      END IF
+      IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.LSAME( TRANS, 'N' ) .AND. .NOT.LSAME( TRANS, 'T' ) )
+     $          THEN
+         INFO = -2
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, NQ ) ) THEN
+         INFO = -5
+      ELSE IF( IHI.LT.MIN( ILO, NQ ) .OR. IHI.GT.NQ ) THEN
+         INFO = -6
+      ELSE IF( LDA.LT.MAX( 1, NQ ) ) THEN
+         INFO = -8
+      ELSE IF( LDC.LT.MAX( 1, M ) ) THEN
+         INFO = -11
+      ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN
+         INFO = -13
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         IF( LEFT ) THEN
+            NB = ILAENV( 1, 'SORMQR', SIDE // TRANS, NH, N, NH, -1 )
+         ELSE
+            NB = ILAENV( 1, 'SORMQR', SIDE // TRANS, M, NH, NH, -1 ) 
+         END IF
+         LWKOPT = MAX( 1, NW )*NB
+         WORK( 1 ) = LWKOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SORMHR', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 .OR. NH.EQ.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      IF( LEFT ) THEN
+         MI = NH
+         NI = N
+         I1 = ILO + 1
+         I2 = 1
+      ELSE
+         MI = M
+         NI = NH
+         I1 = 1
+         I2 = ILO + 1
+      END IF
+*
+      CALL SORMQR( SIDE, TRANS, MI, NI, NH, A( ILO+1, ILO ), LDA,
+     $             TAU( ILO ), C( I1, I2 ), LDC, WORK, LWORK, IINFO )
+*
+      WORK( 1 ) = LWKOPT
+      RETURN
+*
+*     End of SORMHR
+*
+      END
diff --git a/costa/native/external/lapack/sorml2.f b/costa/native/external/lapack/sorml2.f
new file mode 100644
index 000000000..094be1283
--- /dev/null
+++ b/costa/native/external/lapack/sorml2.f
@@ -0,0 +1,198 @@
+      SUBROUTINE SORML2( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC,
+     $                   WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          SIDE, TRANS
+      INTEGER            INFO, K, LDA, LDC, M, N
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SORML2 overwrites the general real m by n matrix C with
+*
+*        Q * C  if SIDE = 'L' and TRANS = 'N', or
+*
+*        Q'* C  if SIDE = 'L' and TRANS = 'T', or
+*
+*        C * Q  if SIDE = 'R' and TRANS = 'N', or
+*
+*        C * Q' if SIDE = 'R' and TRANS = 'T',
+*
+*  where Q is a real orthogonal matrix defined as the product of k
+*  elementary reflectors
+*
+*        Q = H(k) . . . H(2) H(1)
+*
+*  as returned by SGELQF. Q is of order m if SIDE = 'L' and of order n
+*  if SIDE = 'R'.
+*
+*  Arguments
+*  =========
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'L': apply Q or Q' from the Left
+*          = 'R': apply Q or Q' from the Right
+*
+*  TRANS   (input) CHARACTER*1
+*          = 'N': apply Q  (No transpose)
+*          = 'T': apply Q' (Transpose)
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix C. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix C. N >= 0.
+*
+*  K       (input) INTEGER
+*          The number of elementary reflectors whose product defines
+*          the matrix Q.
+*          If SIDE = 'L', M >= K >= 0;
+*          if SIDE = 'R', N >= K >= 0.
+*
+*  A       (input) REAL array, dimension
+*                               (LDA,M) if SIDE = 'L',
+*                               (LDA,N) if SIDE = 'R'
+*          The i-th row must contain the vector which defines the
+*          elementary reflector H(i), for i = 1,2,...,k, as returned by
+*          SGELQF in the first k rows of its array argument A.
+*          A is modified by the routine but restored on exit.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,K).
+*
+*  TAU     (input) REAL array, dimension (K)
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by SGELQF.
+*
+*  C       (input/output) REAL array, dimension (LDC,N)
+*          On entry, the m by n matrix C.
+*          On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C. LDC >= max(1,M).
+*
+*  WORK    (workspace) REAL array, dimension
+*                                   (N) if SIDE = 'L',
+*                                   (M) if SIDE = 'R'
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE
+      PARAMETER          ( ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LEFT, NOTRAN
+      INTEGER            I, I1, I2, I3, IC, JC, MI, NI, NQ
+      REAL               AII
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLARF, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      LEFT = LSAME( SIDE, 'L' )
+      NOTRAN = LSAME( TRANS, 'N' )
+*
+*     NQ is the order of Q
+*
+      IF( LEFT ) THEN
+         NQ = M
+      ELSE
+         NQ = N
+      END IF
+      IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN
+         INFO = -2
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN
+         INFO = -5
+      ELSE IF( LDA.LT.MAX( 1, K ) ) THEN
+         INFO = -7
+      ELSE IF( LDC.LT.MAX( 1, M ) ) THEN
+         INFO = -10
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SORML2', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 )
+     $   RETURN
+*
+      IF( ( LEFT .AND. NOTRAN ) .OR. ( .NOT.LEFT .AND. .NOT.NOTRAN ) )
+     $     THEN
+         I1 = 1
+         I2 = K
+         I3 = 1
+      ELSE
+         I1 = K
+         I2 = 1
+         I3 = -1
+      END IF
+*
+      IF( LEFT ) THEN
+         NI = N
+         JC = 1
+      ELSE
+         MI = M
+         IC = 1
+      END IF
+*
+      DO 10 I = I1, I2, I3
+         IF( LEFT ) THEN
+*
+*           H(i) is applied to C(i:m,1:n)
+*
+            MI = M - I + 1
+            IC = I
+         ELSE
+*
+*           H(i) is applied to C(1:m,i:n)
+*
+            NI = N - I + 1
+            JC = I
+         END IF
+*
+*        Apply H(i)
+*
+         AII = A( I, I )
+         A( I, I ) = ONE
+         CALL SLARF( SIDE, MI, NI, A( I, I ), LDA, TAU( I ),
+     $               C( IC, JC ), LDC, WORK )
+         A( I, I ) = AII
+   10 CONTINUE
+      RETURN
+*
+*     End of SORML2
+*
+      END
diff --git a/costa/native/external/lapack/sormlq.f b/costa/native/external/lapack/sormlq.f
new file mode 100644
index 000000000..611e2f2d1
--- /dev/null
+++ b/costa/native/external/lapack/sormlq.f
@@ -0,0 +1,269 @@
+      SUBROUTINE SORMLQ( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC,
+     $                   WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          SIDE, TRANS
+      INTEGER            INFO, K, LDA, LDC, LWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), C( LDC, * ), TAU( * ),
+     $                   WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SORMLQ overwrites the general real M-by-N matrix C with
+*
+*                  SIDE = 'L'     SIDE = 'R'
+*  TRANS = 'N':      Q * C          C * Q
+*  TRANS = 'T':      Q**T * C       C * Q**T
+*
+*  where Q is a real orthogonal matrix defined as the product of k
+*  elementary reflectors
+*
+*        Q = H(k) . . . H(2) H(1)
+*
+*  as returned by SGELQF. Q is of order M if SIDE = 'L' and of order N
+*  if SIDE = 'R'.
+*
+*  Arguments
+*  =========
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'L': apply Q or Q**T from the Left;
+*          = 'R': apply Q or Q**T from the Right.
+*
+*  TRANS   (input) CHARACTER*1
+*          = 'N':  No transpose, apply Q;
+*          = 'T':  Transpose, apply Q**T.
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix C. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix C. N >= 0.
+*
+*  K       (input) INTEGER
+*          The number of elementary reflectors whose product defines
+*          the matrix Q.
+*          If SIDE = 'L', M >= K >= 0;
+*          if SIDE = 'R', N >= K >= 0.
+*
+*  A       (input) REAL array, dimension
+*                               (LDA,M) if SIDE = 'L',
+*                               (LDA,N) if SIDE = 'R'
+*          The i-th row must contain the vector which defines the
+*          elementary reflector H(i), for i = 1,2,...,k, as returned by
+*          SGELQF in the first k rows of its array argument A.
+*          A is modified by the routine but restored on exit.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,K).
+*
+*  TAU     (input) REAL array, dimension (K)
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by SGELQF.
+*
+*  C       (input/output) REAL array, dimension (LDC,N)
+*          On entry, the M-by-N matrix C.
+*          On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C. LDC >= max(1,M).
+*
+*  WORK    (workspace/output) REAL array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.
+*          If SIDE = 'L', LWORK >= max(1,N);
+*          if SIDE = 'R', LWORK >= max(1,M).
+*          For optimum performance LWORK >= N*NB if SIDE = 'L', and
+*          LWORK >= M*NB if SIDE = 'R', where NB is the optimal
+*          blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            NBMAX, LDT
+      PARAMETER          ( NBMAX = 64, LDT = NBMAX+1 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LEFT, LQUERY, NOTRAN
+      CHARACTER          TRANST
+      INTEGER            I, I1, I2, I3, IB, IC, IINFO, IWS, JC, LDWORK,
+     $                   LWKOPT, MI, NB, NBMIN, NI, NQ, NW
+*     ..
+*     .. Local Arrays ..
+      REAL               T( LDT, NBMAX )
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           LSAME, ILAENV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLARFB, SLARFT, SORML2, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      LEFT = LSAME( SIDE, 'L' )
+      NOTRAN = LSAME( TRANS, 'N' )
+      LQUERY = ( LWORK.EQ.-1 )
+*
+*     NQ is the order of Q and NW is the minimum dimension of WORK
+*
+      IF( LEFT ) THEN
+         NQ = M
+         NW = N
+      ELSE
+         NQ = N
+         NW = M
+      END IF
+      IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN
+         INFO = -2
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN
+         INFO = -5
+      ELSE IF( LDA.LT.MAX( 1, K ) ) THEN
+         INFO = -7
+      ELSE IF( LDC.LT.MAX( 1, M ) ) THEN
+         INFO = -10
+      ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN
+         INFO = -12
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+*
+*        Determine the block size.  NB may be at most NBMAX, where NBMAX
+*        is used to define the local array T.
+*
+         NB = MIN( NBMAX, ILAENV( 1, 'SORMLQ', SIDE // TRANS, M, N, K,
+     $             -1 ) )
+         LWKOPT = MAX( 1, NW )*NB
+         WORK( 1 ) = LWKOPT
+      END IF 
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SORMLQ', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      NBMIN = 2
+      LDWORK = NW
+      IF( NB.GT.1 .AND. NB.LT.K ) THEN
+         IWS = NW*NB
+         IF( LWORK.LT.IWS ) THEN
+            NB = LWORK / LDWORK
+            NBMIN = MAX( 2, ILAENV( 2, 'SORMLQ', SIDE // TRANS, M, N, K,
+     $              -1 ) )
+         END IF
+      ELSE
+         IWS = NW
+      END IF
+*
+      IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN
+*
+*        Use unblocked code
+*
+         CALL SORML2( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, WORK,
+     $                IINFO )
+      ELSE
+*
+*        Use blocked code
+*
+         IF( ( LEFT .AND. NOTRAN ) .OR.
+     $       ( .NOT.LEFT .AND. .NOT.NOTRAN ) ) THEN
+            I1 = 1
+            I2 = K
+            I3 = NB
+         ELSE
+            I1 = ( ( K-1 ) / NB )*NB + 1
+            I2 = 1
+            I3 = -NB
+         END IF
+*
+         IF( LEFT ) THEN
+            NI = N
+            JC = 1
+         ELSE
+            MI = M
+            IC = 1
+         END IF
+*
+         IF( NOTRAN ) THEN
+            TRANST = 'T'
+         ELSE
+            TRANST = 'N'
+         END IF
+*
+         DO 10 I = I1, I2, I3
+            IB = MIN( NB, K-I+1 )
+*
+*           Form the triangular factor of the block reflector
+*           H = H(i) H(i+1) . . . H(i+ib-1)
+*
+            CALL SLARFT( 'Forward', 'Rowwise', NQ-I+1, IB, A( I, I ),
+     $                   LDA, TAU( I ), T, LDT )
+            IF( LEFT ) THEN
+*
+*              H or H' is applied to C(i:m,1:n)
+*
+               MI = M - I + 1
+               IC = I
+            ELSE
+*
+*              H or H' is applied to C(1:m,i:n)
+*
+               NI = N - I + 1
+               JC = I
+            END IF
+*
+*           Apply H or H'
+*
+            CALL SLARFB( SIDE, TRANST, 'Forward', 'Rowwise', MI, NI, IB,
+     $                   A( I, I ), LDA, T, LDT, C( IC, JC ), LDC, WORK,
+     $                   LDWORK )
+   10    CONTINUE
+      END IF
+      WORK( 1 ) = LWKOPT
+      RETURN
+*
+*     End of SORMLQ
+*
+      END
diff --git a/costa/native/external/lapack/sormql.f b/costa/native/external/lapack/sormql.f
new file mode 100644
index 000000000..e52e99fb7
--- /dev/null
+++ b/costa/native/external/lapack/sormql.f
@@ -0,0 +1,258 @@
+      SUBROUTINE SORMQL( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC,
+     $                   WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          SIDE, TRANS
+      INTEGER            INFO, K, LDA, LDC, LWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), C( LDC, * ), TAU( * ),
+     $                   WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SORMQL overwrites the general real M-by-N matrix C with
+*
+*                  SIDE = 'L'     SIDE = 'R'
+*  TRANS = 'N':      Q * C          C * Q
+*  TRANS = 'T':      Q**T * C       C * Q**T
+*
+*  where Q is a real orthogonal matrix defined as the product of k
+*  elementary reflectors
+*
+*        Q = H(k) . . . H(2) H(1)
+*
+*  as returned by SGEQLF. Q is of order M if SIDE = 'L' and of order N
+*  if SIDE = 'R'.
+*
+*  Arguments
+*  =========
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'L': apply Q or Q**T from the Left;
+*          = 'R': apply Q or Q**T from the Right.
+*
+*  TRANS   (input) CHARACTER*1
+*          = 'N':  No transpose, apply Q;
+*          = 'T':  Transpose, apply Q**T.
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix C. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix C. N >= 0.
+*
+*  K       (input) INTEGER
+*          The number of elementary reflectors whose product defines
+*          the matrix Q.
+*          If SIDE = 'L', M >= K >= 0;
+*          if SIDE = 'R', N >= K >= 0.
+*
+*  A       (input) REAL array, dimension (LDA,K)
+*          The i-th column must contain the vector which defines the
+*          elementary reflector H(i), for i = 1,2,...,k, as returned by
+*          SGEQLF in the last k columns of its array argument A.
+*          A is modified by the routine but restored on exit.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.
+*          If SIDE = 'L', LDA >= max(1,M);
+*          if SIDE = 'R', LDA >= max(1,N).
+*
+*  TAU     (input) REAL array, dimension (K)
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by SGEQLF.
+*
+*  C       (input/output) REAL array, dimension (LDC,N)
+*          On entry, the M-by-N matrix C.
+*          On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C. LDC >= max(1,M).
+*
+*  WORK    (workspace/output) REAL array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.
+*          If SIDE = 'L', LWORK >= max(1,N);
+*          if SIDE = 'R', LWORK >= max(1,M).
+*          For optimum performance LWORK >= N*NB if SIDE = 'L', and
+*          LWORK >= M*NB if SIDE = 'R', where NB is the optimal
+*          blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            NBMAX, LDT
+      PARAMETER          ( NBMAX = 64, LDT = NBMAX+1 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LEFT, LQUERY, NOTRAN
+      INTEGER            I, I1, I2, I3, IB, IINFO, IWS, LDWORK, LWKOPT,
+     $                   MI, NB, NBMIN, NI, NQ, NW
+*     ..
+*     .. Local Arrays ..
+      REAL               T( LDT, NBMAX )
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           LSAME, ILAENV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLARFB, SLARFT, SORM2L, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      LEFT = LSAME( SIDE, 'L' )
+      NOTRAN = LSAME( TRANS, 'N' )
+      LQUERY = ( LWORK.EQ.-1 )
+*
+*     NQ is the order of Q and NW is the minimum dimension of WORK
+*
+      IF( LEFT ) THEN
+         NQ = M
+         NW = N
+      ELSE
+         NQ = N
+         NW = M
+      END IF
+      IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN
+         INFO = -2
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN
+         INFO = -5
+      ELSE IF( LDA.LT.MAX( 1, NQ ) ) THEN
+         INFO = -7
+      ELSE IF( LDC.LT.MAX( 1, M ) ) THEN
+         INFO = -10
+      ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN
+         INFO = -12
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+*
+*        Determine the block size.  NB may be at most NBMAX, where NBMAX
+*        is used to define the local array T.
+*
+         NB = MIN( NBMAX, ILAENV( 1, 'SORMQL', SIDE // TRANS, M, N, K,
+     $             -1 ) )
+         LWKOPT = MAX( 1, NW )*NB
+         WORK( 1 ) = LWKOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SORMQL', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      NBMIN = 2
+      LDWORK = NW
+      IF( NB.GT.1 .AND. NB.LT.K ) THEN
+         IWS = NW*NB
+         IF( LWORK.LT.IWS ) THEN
+            NB = LWORK / LDWORK
+            NBMIN = MAX( 2, ILAENV( 2, 'SORMQL', SIDE // TRANS, M, N, K,
+     $              -1 ) )
+         END IF
+      ELSE
+         IWS = NW
+      END IF
+*
+      IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN
+*
+*        Use unblocked code
+*
+         CALL SORM2L( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, WORK,
+     $                IINFO )
+      ELSE
+*
+*        Use blocked code
+*
+         IF( ( LEFT .AND. NOTRAN ) .OR.
+     $       ( .NOT.LEFT .AND. .NOT.NOTRAN ) ) THEN
+            I1 = 1
+            I2 = K
+            I3 = NB
+         ELSE
+            I1 = ( ( K-1 ) / NB )*NB + 1
+            I2 = 1
+            I3 = -NB
+         END IF
+*
+         IF( LEFT ) THEN
+            NI = N
+         ELSE
+            MI = M
+         END IF
+*
+         DO 10 I = I1, I2, I3
+            IB = MIN( NB, K-I+1 )
+*
+*           Form the triangular factor of the block reflector
+*           H = H(i+ib-1) . . . H(i+1) H(i)
+*
+            CALL SLARFT( 'Backward', 'Columnwise', NQ-K+I+IB-1, IB,
+     $                   A( 1, I ), LDA, TAU( I ), T, LDT )
+            IF( LEFT ) THEN
+*
+*              H or H' is applied to C(1:m-k+i+ib-1,1:n)
+*
+               MI = M - K + I + IB - 1
+            ELSE
+*
+*              H or H' is applied to C(1:m,1:n-k+i+ib-1)
+*
+               NI = N - K + I + IB - 1
+            END IF
+*
+*           Apply H or H'
+*
+            CALL SLARFB( SIDE, TRANS, 'Backward', 'Columnwise', MI, NI,
+     $                   IB, A( 1, I ), LDA, T, LDT, C, LDC, WORK,
+     $                   LDWORK )
+   10    CONTINUE
+      END IF
+      WORK( 1 ) = LWKOPT
+      RETURN
+*
+*     End of SORMQL
+*
+      END
diff --git a/costa/native/external/lapack/sormqr.f b/costa/native/external/lapack/sormqr.f
new file mode 100644
index 000000000..976fa098d
--- /dev/null
+++ b/costa/native/external/lapack/sormqr.f
@@ -0,0 +1,262 @@
+      SUBROUTINE SORMQR( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC,
+     $                   WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          SIDE, TRANS
+      INTEGER            INFO, K, LDA, LDC, LWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), C( LDC, * ), TAU( * ),
+     $                   WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SORMQR overwrites the general real M-by-N matrix C with
+*
+*                  SIDE = 'L'     SIDE = 'R'
+*  TRANS = 'N':      Q * C          C * Q
+*  TRANS = 'T':      Q**T * C       C * Q**T
+*
+*  where Q is a real orthogonal matrix defined as the product of k
+*  elementary reflectors
+*
+*        Q = H(1) H(2) . . . H(k)
+*
+*  as returned by SGEQRF. Q is of order M if SIDE = 'L' and of order N
+*  if SIDE = 'R'.
+*
+*  Arguments
+*  =========
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'L': apply Q or Q**T from the Left;
+*          = 'R': apply Q or Q**T from the Right.
+*
+*  TRANS   (input) CHARACTER*1
+*          = 'N':  No transpose, apply Q;
+*          = 'T':  Transpose, apply Q**T.
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix C. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix C. N >= 0.
+*
+*  K       (input) INTEGER
+*          The number of elementary reflectors whose product defines
+*          the matrix Q.
+*          If SIDE = 'L', M >= K >= 0;
+*          if SIDE = 'R', N >= K >= 0.
+*
+*  A       (input) REAL array, dimension (LDA,K)
+*          The i-th column must contain the vector which defines the
+*          elementary reflector H(i), for i = 1,2,...,k, as returned by
+*          SGEQRF in the first k columns of its array argument A.
+*          A is modified by the routine but restored on exit.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.
+*          If SIDE = 'L', LDA >= max(1,M);
+*          if SIDE = 'R', LDA >= max(1,N).
+*
+*  TAU     (input) REAL array, dimension (K)
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by SGEQRF.
+*
+*  C       (input/output) REAL array, dimension (LDC,N)
+*          On entry, the M-by-N matrix C.
+*          On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C. LDC >= max(1,M).
+*
+*  WORK    (workspace/output) REAL array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.
+*          If SIDE = 'L', LWORK >= max(1,N);
+*          if SIDE = 'R', LWORK >= max(1,M).
+*          For optimum performance LWORK >= N*NB if SIDE = 'L', and
+*          LWORK >= M*NB if SIDE = 'R', where NB is the optimal
+*          blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            NBMAX, LDT
+      PARAMETER          ( NBMAX = 64, LDT = NBMAX+1 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LEFT, LQUERY, NOTRAN
+      INTEGER            I, I1, I2, I3, IB, IC, IINFO, IWS, JC, LDWORK,
+     $                   LWKOPT, MI, NB, NBMIN, NI, NQ, NW
+*     ..
+*     .. Local Arrays ..
+      REAL               T( LDT, NBMAX )
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           LSAME, ILAENV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLARFB, SLARFT, SORM2R, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      LEFT = LSAME( SIDE, 'L' )
+      NOTRAN = LSAME( TRANS, 'N' )
+      LQUERY = ( LWORK.EQ.-1 )
+*
+*     NQ is the order of Q and NW is the minimum dimension of WORK
+*
+      IF( LEFT ) THEN
+         NQ = M
+         NW = N
+      ELSE
+         NQ = N
+         NW = M
+      END IF
+      IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN
+         INFO = -2
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN
+         INFO = -5
+      ELSE IF( LDA.LT.MAX( 1, NQ ) ) THEN
+         INFO = -7
+      ELSE IF( LDC.LT.MAX( 1, M ) ) THEN
+         INFO = -10
+      ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN
+         INFO = -12
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+*
+*        Determine the block size.  NB may be at most NBMAX, where NBMAX
+*        is used to define the local array T.
+*
+         NB = MIN( NBMAX, ILAENV( 1, 'SORMQR', SIDE // TRANS, M, N, K,
+     $        -1 ) )
+         LWKOPT = MAX( 1, NW )*NB
+         WORK( 1 ) = LWKOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SORMQR', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      NBMIN = 2
+      LDWORK = NW
+      IF( NB.GT.1 .AND. NB.LT.K ) THEN
+         IWS = NW*NB
+         IF( LWORK.LT.IWS ) THEN
+            NB = LWORK / LDWORK
+            NBMIN = MAX( 2, ILAENV( 2, 'SORMQR', SIDE // TRANS, M, N, K,
+     $              -1 ) )
+         END IF
+      ELSE
+         IWS = NW
+      END IF
+*
+      IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN
+*
+*        Use unblocked code
+*
+         CALL SORM2R( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, WORK,
+     $                IINFO )
+      ELSE
+*
+*        Use blocked code
+*
+         IF( ( LEFT .AND. .NOT.NOTRAN ) .OR.
+     $       ( .NOT.LEFT .AND. NOTRAN ) ) THEN
+            I1 = 1
+            I2 = K
+            I3 = NB
+         ELSE
+            I1 = ( ( K-1 ) / NB )*NB + 1
+            I2 = 1
+            I3 = -NB
+         END IF
+*
+         IF( LEFT ) THEN
+            NI = N
+            JC = 1
+         ELSE
+            MI = M
+            IC = 1
+         END IF
+*
+         DO 10 I = I1, I2, I3
+            IB = MIN( NB, K-I+1 )
+*
+*           Form the triangular factor of the block reflector
+*           H = H(i) H(i+1) . . . H(i+ib-1)
+*
+            CALL SLARFT( 'Forward', 'Columnwise', NQ-I+1, IB, A( I, I ),
+     $                   LDA, TAU( I ), T, LDT )
+            IF( LEFT ) THEN
+*
+*              H or H' is applied to C(i:m,1:n)
+*
+               MI = M - I + 1
+               IC = I
+            ELSE
+*
+*              H or H' is applied to C(1:m,i:n)
+*
+               NI = N - I + 1
+               JC = I
+            END IF
+*
+*           Apply H or H'
+*
+            CALL SLARFB( SIDE, TRANS, 'Forward', 'Columnwise', MI, NI,
+     $                   IB, A( I, I ), LDA, T, LDT, C( IC, JC ), LDC,
+     $                   WORK, LDWORK )
+   10    CONTINUE
+      END IF
+      WORK( 1 ) = LWKOPT
+      RETURN
+*
+*     End of SORMQR
+*
+      END
diff --git a/costa/native/external/lapack/sormr2.f b/costa/native/external/lapack/sormr2.f
new file mode 100644
index 000000000..dc9b250e0
--- /dev/null
+++ b/costa/native/external/lapack/sormr2.f
@@ -0,0 +1,194 @@
+      SUBROUTINE SORMR2( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC,
+     $                   WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          SIDE, TRANS
+      INTEGER            INFO, K, LDA, LDC, M, N
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SORMR2 overwrites the general real m by n matrix C with
+*
+*        Q * C  if SIDE = 'L' and TRANS = 'N', or
+*
+*        Q'* C  if SIDE = 'L' and TRANS = 'T', or
+*
+*        C * Q  if SIDE = 'R' and TRANS = 'N', or
+*
+*        C * Q' if SIDE = 'R' and TRANS = 'T',
+*
+*  where Q is a real orthogonal matrix defined as the product of k
+*  elementary reflectors
+*
+*        Q = H(1) H(2) . . . H(k)
+*
+*  as returned by SGERQF. Q is of order m if SIDE = 'L' and of order n
+*  if SIDE = 'R'.
+*
+*  Arguments
+*  =========
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'L': apply Q or Q' from the Left
+*          = 'R': apply Q or Q' from the Right
+*
+*  TRANS   (input) CHARACTER*1
+*          = 'N': apply Q  (No transpose)
+*          = 'T': apply Q' (Transpose)
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix C. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix C. N >= 0.
+*
+*  K       (input) INTEGER
+*          The number of elementary reflectors whose product defines
+*          the matrix Q.
+*          If SIDE = 'L', M >= K >= 0;
+*          if SIDE = 'R', N >= K >= 0.
+*
+*  A       (input) REAL array, dimension
+*                               (LDA,M) if SIDE = 'L',
+*                               (LDA,N) if SIDE = 'R'
+*          The i-th row must contain the vector which defines the
+*          elementary reflector H(i), for i = 1,2,...,k, as returned by
+*          SGERQF in the last k rows of its array argument A.
+*          A is modified by the routine but restored on exit.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,K).
+*
+*  TAU     (input) REAL array, dimension (K)
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by SGERQF.
+*
+*  C       (input/output) REAL array, dimension (LDC,N)
+*          On entry, the m by n matrix C.
+*          On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C. LDC >= max(1,M).
+*
+*  WORK    (workspace) REAL array, dimension
+*                                   (N) if SIDE = 'L',
+*                                   (M) if SIDE = 'R'
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE
+      PARAMETER          ( ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LEFT, NOTRAN
+      INTEGER            I, I1, I2, I3, MI, NI, NQ
+      REAL               AII
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLARF, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      LEFT = LSAME( SIDE, 'L' )
+      NOTRAN = LSAME( TRANS, 'N' )
+*
+*     NQ is the order of Q
+*
+      IF( LEFT ) THEN
+         NQ = M
+      ELSE
+         NQ = N
+      END IF
+      IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN
+         INFO = -2
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN
+         INFO = -5
+      ELSE IF( LDA.LT.MAX( 1, K ) ) THEN
+         INFO = -7
+      ELSE IF( LDC.LT.MAX( 1, M ) ) THEN
+         INFO = -10
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SORMR2', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 )
+     $   RETURN
+*
+      IF( ( LEFT .AND. .NOT.NOTRAN ) .OR. ( .NOT.LEFT .AND. NOTRAN ) )
+     $     THEN
+         I1 = 1
+         I2 = K
+         I3 = 1
+      ELSE
+         I1 = K
+         I2 = 1
+         I3 = -1
+      END IF
+*
+      IF( LEFT ) THEN
+         NI = N
+      ELSE
+         MI = M
+      END IF
+*
+      DO 10 I = I1, I2, I3
+         IF( LEFT ) THEN
+*
+*           H(i) is applied to C(1:m-k+i,1:n)
+*
+            MI = M - K + I
+         ELSE
+*
+*           H(i) is applied to C(1:m,1:n-k+i)
+*
+            NI = N - K + I
+         END IF
+*
+*        Apply H(i)
+*
+         AII = A( I, NQ-K+I )
+         A( I, NQ-K+I ) = ONE
+         CALL SLARF( SIDE, MI, NI, A( I, 1 ), LDA, TAU( I ), C, LDC,
+     $               WORK )
+         A( I, NQ-K+I ) = AII
+   10 CONTINUE
+      RETURN
+*
+*     End of SORMR2
+*
+      END
diff --git a/costa/native/external/lapack/sormr3.f b/costa/native/external/lapack/sormr3.f
new file mode 100644
index 000000000..d17d54540
--- /dev/null
+++ b/costa/native/external/lapack/sormr3.f
@@ -0,0 +1,207 @@
+      SUBROUTINE SORMR3( SIDE, TRANS, M, N, K, L, A, LDA, TAU, C, LDC,
+     $                   WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          SIDE, TRANS
+      INTEGER            INFO, K, L, LDA, LDC, M, N
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SORMR3 overwrites the general real m by n matrix C with
+*
+*        Q * C  if SIDE = 'L' and TRANS = 'N', or
+*
+*        Q'* C  if SIDE = 'L' and TRANS = 'T', or
+*
+*        C * Q  if SIDE = 'R' and TRANS = 'N', or
+*
+*        C * Q' if SIDE = 'R' and TRANS = 'T',
+*
+*  where Q is a real orthogonal matrix defined as the product of k
+*  elementary reflectors
+*
+*        Q = H(1) H(2) . . . H(k)
+*
+*  as returned by STZRZF. Q is of order m if SIDE = 'L' and of order n
+*  if SIDE = 'R'.
+*
+*  Arguments
+*  =========
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'L': apply Q or Q' from the Left
+*          = 'R': apply Q or Q' from the Right
+*
+*  TRANS   (input) CHARACTER*1
+*          = 'N': apply Q  (No transpose)
+*          = 'T': apply Q' (Transpose)
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix C. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix C. N >= 0.
+*
+*  K       (input) INTEGER
+*          The number of elementary reflectors whose product defines
+*          the matrix Q.
+*          If SIDE = 'L', M >= K >= 0;
+*          if SIDE = 'R', N >= K >= 0.
+*
+*  L       (input) INTEGER
+*          The number of columns of the matrix A containing
+*          the meaningful part of the Householder reflectors.
+*          If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0.
+*
+*  A       (input) REAL array, dimension
+*                               (LDA,M) if SIDE = 'L',
+*                               (LDA,N) if SIDE = 'R'
+*          The i-th row must contain the vector which defines the
+*          elementary reflector H(i), for i = 1,2,...,k, as returned by
+*          STZRZF in the last k rows of its array argument A.
+*          A is modified by the routine but restored on exit.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,K).
+*
+*  TAU     (input) REAL array, dimension (K)
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by STZRZF.
+*
+*  C       (input/output) REAL array, dimension (LDC,N)
+*          On entry, the m-by-n matrix C.
+*          On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C. LDC >= max(1,M).
+*
+*  WORK    (workspace) REAL array, dimension
+*                                   (N) if SIDE = 'L',
+*                                   (M) if SIDE = 'R'
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*    A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            LEFT, NOTRAN
+      INTEGER            I, I1, I2, I3, IC, JA, JC, MI, NI, NQ
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLARZ, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      LEFT = LSAME( SIDE, 'L' )
+      NOTRAN = LSAME( TRANS, 'N' )
+*
+*     NQ is the order of Q
+*
+      IF( LEFT ) THEN
+         NQ = M
+      ELSE
+         NQ = N
+      END IF
+      IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN
+         INFO = -2
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN
+         INFO = -5
+      ELSE IF( L.LT.0 .OR. ( LEFT .AND. ( L.GT.M ) ) .OR.
+     $         ( .NOT.LEFT .AND. ( L.GT.N ) ) ) THEN
+         INFO = -6
+      ELSE IF( LDA.LT.MAX( 1, K ) ) THEN
+         INFO = -8
+      ELSE IF( LDC.LT.MAX( 1, M ) ) THEN
+         INFO = -11
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SORMR3', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 )
+     $   RETURN
+*
+      IF( ( LEFT .AND. .NOT.NOTRAN .OR. .NOT.LEFT .AND. NOTRAN ) ) THEN
+         I1 = 1
+         I2 = K
+         I3 = 1
+      ELSE
+         I1 = K
+         I2 = 1
+         I3 = -1
+      END IF
+*
+      IF( LEFT ) THEN
+         NI = N
+         JA = M - L + 1
+         JC = 1
+      ELSE
+         MI = M
+         JA = N - L + 1
+         IC = 1
+      END IF
+*
+      DO 10 I = I1, I2, I3
+         IF( LEFT ) THEN
+*
+*           H(i) or H(i)' is applied to C(i:m,1:n)
+*
+            MI = M - I + 1
+            IC = I
+         ELSE
+*
+*           H(i) or H(i)' is applied to C(1:m,i:n)
+*
+            NI = N - I + 1
+            JC = I
+         END IF
+*
+*        Apply H(i) or H(i)'
+*
+         CALL SLARZ( SIDE, MI, NI, L, A( I, JA ), LDA, TAU( I ),
+     $               C( IC, JC ), LDC, WORK )
+*
+   10 CONTINUE
+*
+      RETURN
+*
+*     End of SORMR3
+*
+      END
diff --git a/costa/native/external/lapack/sormrq.f b/costa/native/external/lapack/sormrq.f
new file mode 100644
index 000000000..cb22b1cbf
--- /dev/null
+++ b/costa/native/external/lapack/sormrq.f
@@ -0,0 +1,265 @@
+      SUBROUTINE SORMRQ( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC,
+     $                   WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          SIDE, TRANS
+      INTEGER            INFO, K, LDA, LDC, LWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), C( LDC, * ), TAU( * ),
+     $                   WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SORMRQ overwrites the general real M-by-N matrix C with
+*
+*                  SIDE = 'L'     SIDE = 'R'
+*  TRANS = 'N':      Q * C          C * Q
+*  TRANS = 'T':      Q**T * C       C * Q**T
+*
+*  where Q is a real orthogonal matrix defined as the product of k
+*  elementary reflectors
+*
+*        Q = H(1) H(2) . . . H(k)
+*
+*  as returned by SGERQF. Q is of order M if SIDE = 'L' and of order N
+*  if SIDE = 'R'.
+*
+*  Arguments
+*  =========
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'L': apply Q or Q**T from the Left;
+*          = 'R': apply Q or Q**T from the Right.
+*
+*  TRANS   (input) CHARACTER*1
+*          = 'N':  No transpose, apply Q;
+*          = 'T':  Transpose, apply Q**T.
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix C. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix C. N >= 0.
+*
+*  K       (input) INTEGER
+*          The number of elementary reflectors whose product defines
+*          the matrix Q.
+*          If SIDE = 'L', M >= K >= 0;
+*          if SIDE = 'R', N >= K >= 0.
+*
+*  A       (input) REAL array, dimension
+*                               (LDA,M) if SIDE = 'L',
+*                               (LDA,N) if SIDE = 'R'
+*          The i-th row must contain the vector which defines the
+*          elementary reflector H(i), for i = 1,2,...,k, as returned by
+*          SGERQF in the last k rows of its array argument A.
+*          A is modified by the routine but restored on exit.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,K).
+*
+*  TAU     (input) REAL array, dimension (K)
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by SGERQF.
+*
+*  C       (input/output) REAL array, dimension (LDC,N)
+*          On entry, the M-by-N matrix C.
+*          On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C. LDC >= max(1,M).
+*
+*  WORK    (workspace/output) REAL array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.
+*          If SIDE = 'L', LWORK >= max(1,N);
+*          if SIDE = 'R', LWORK >= max(1,M).
+*          For optimum performance LWORK >= N*NB if SIDE = 'L', and
+*          LWORK >= M*NB if SIDE = 'R', where NB is the optimal
+*          blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            NBMAX, LDT
+      PARAMETER          ( NBMAX = 64, LDT = NBMAX+1 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LEFT, LQUERY, NOTRAN
+      CHARACTER          TRANST
+      INTEGER            I, I1, I2, I3, IB, IINFO, IWS, LDWORK, LWKOPT,
+     $                   MI, NB, NBMIN, NI, NQ, NW
+*     ..
+*     .. Local Arrays ..
+      REAL               T( LDT, NBMAX )
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           LSAME, ILAENV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLARFB, SLARFT, SORMR2, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      LEFT = LSAME( SIDE, 'L' )
+      NOTRAN = LSAME( TRANS, 'N' )
+      LQUERY = ( LWORK.EQ.-1 )
+*
+*     NQ is the order of Q and NW is the minimum dimension of WORK
+*
+      IF( LEFT ) THEN
+         NQ = M
+         NW = N
+      ELSE
+         NQ = N
+         NW = M
+      END IF
+      IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN
+         INFO = -2
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN
+         INFO = -5
+      ELSE IF( LDA.LT.MAX( 1, K ) ) THEN
+         INFO = -7
+      ELSE IF( LDC.LT.MAX( 1, M ) ) THEN
+         INFO = -10
+      ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN
+         INFO = -12
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+*
+*        Determine the block size.  NB may be at most NBMAX, where NBMAX
+*        is used to define the local array T.
+*
+         NB = MIN( NBMAX, ILAENV( 1, 'SORMRQ', SIDE // TRANS, M, N, K,
+     $        -1 ) )
+         LWKOPT = MAX( 1, NW )*NB
+         WORK( 1 ) = LWKOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SORMRQ', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      NBMIN = 2
+      LDWORK = NW
+      IF( NB.GT.1 .AND. NB.LT.K ) THEN
+         IWS = NW*NB
+         IF( LWORK.LT.IWS ) THEN
+            NB = LWORK / LDWORK
+            NBMIN = MAX( 2, ILAENV( 2, 'SORMRQ', SIDE // TRANS, M, N, K,
+     $              -1 ) )
+         END IF
+      ELSE
+         IWS = NW
+      END IF
+*
+      IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN
+*
+*        Use unblocked code
+*
+         CALL SORMR2( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, WORK,
+     $                IINFO )
+      ELSE
+*
+*        Use blocked code
+*
+         IF( ( LEFT .AND. .NOT.NOTRAN ) .OR.
+     $       ( .NOT.LEFT .AND. NOTRAN ) ) THEN
+            I1 = 1
+            I2 = K
+            I3 = NB
+         ELSE
+            I1 = ( ( K-1 ) / NB )*NB + 1
+            I2 = 1
+            I3 = -NB
+         END IF
+*
+         IF( LEFT ) THEN
+            NI = N
+         ELSE
+            MI = M
+         END IF
+*
+         IF( NOTRAN ) THEN
+            TRANST = 'T'
+         ELSE
+            TRANST = 'N'
+         END IF
+*
+         DO 10 I = I1, I2, I3
+            IB = MIN( NB, K-I+1 )
+*
+*           Form the triangular factor of the block reflector
+*           H = H(i+ib-1) . . . H(i+1) H(i)
+*
+            CALL SLARFT( 'Backward', 'Rowwise', NQ-K+I+IB-1, IB,
+     $                   A( I, 1 ), LDA, TAU( I ), T, LDT )
+            IF( LEFT ) THEN
+*
+*              H or H' is applied to C(1:m-k+i+ib-1,1:n)
+*
+               MI = M - K + I + IB - 1
+            ELSE
+*
+*              H or H' is applied to C(1:m,1:n-k+i+ib-1)
+*
+               NI = N - K + I + IB - 1
+            END IF
+*
+*           Apply H or H'
+*
+            CALL SLARFB( SIDE, TRANST, 'Backward', 'Rowwise', MI, NI,
+     $                   IB, A( I, 1 ), LDA, T, LDT, C, LDC, WORK,
+     $                   LDWORK )
+   10    CONTINUE
+      END IF
+      WORK( 1 ) = LWKOPT
+      RETURN
+*
+*     End of SORMRQ
+*
+      END
diff --git a/costa/native/external/lapack/sormrz.f b/costa/native/external/lapack/sormrz.f
new file mode 100644
index 000000000..e33b24bf0
--- /dev/null
+++ b/costa/native/external/lapack/sormrz.f
@@ -0,0 +1,289 @@
+      SUBROUTINE SORMRZ( SIDE, TRANS, M, N, K, L, A, LDA, TAU, C, LDC,
+     $                   WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          SIDE, TRANS
+      INTEGER            INFO, K, L, LDA, LDC, LWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), C( LDC, * ), TAU( * ),
+     $                   WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SORMRZ overwrites the general real M-by-N matrix C with
+*
+*                  SIDE = 'L'     SIDE = 'R'
+*  TRANS = 'N':      Q * C          C * Q
+*  TRANS = 'T':      Q**T * C       C * Q**T
+*
+*  where Q is a real orthogonal matrix defined as the product of k
+*  elementary reflectors
+*
+*        Q = H(1) H(2) . . . H(k)
+*
+*  as returned by STZRZF. Q is of order M if SIDE = 'L' and of order N
+*  if SIDE = 'R'.
+*
+*  Arguments
+*  =========
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'L': apply Q or Q**T from the Left;
+*          = 'R': apply Q or Q**T from the Right.
+*
+*  TRANS   (input) CHARACTER*1
+*          = 'N':  No transpose, apply Q;
+*          = 'T':  Transpose, apply Q**T.
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix C. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix C. N >= 0.
+*
+*  K       (input) INTEGER
+*          The number of elementary reflectors whose product defines
+*          the matrix Q.
+*          If SIDE = 'L', M >= K >= 0;
+*          if SIDE = 'R', N >= K >= 0.
+*
+*  L       (input) INTEGER
+*          The number of columns of the matrix A containing
+*          the meaningful part of the Householder reflectors.
+*          If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0.
+*
+*  A       (input) REAL array, dimension
+*                               (LDA,M) if SIDE = 'L',
+*                               (LDA,N) if SIDE = 'R'
+*          The i-th row must contain the vector which defines the
+*          elementary reflector H(i), for i = 1,2,...,k, as returned by
+*          STZRZF in the last k rows of its array argument A.
+*          A is modified by the routine but restored on exit.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,K).
+*
+*  TAU     (input) REAL array, dimension (K)
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by STZRZF.
+*
+*  C       (input/output) REAL array, dimension (LDC,N)
+*          On entry, the M-by-N matrix C.
+*          On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C. LDC >= max(1,M).
+*
+*  WORK    (workspace/output) REAL array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.
+*          If SIDE = 'L', LWORK >= max(1,N);
+*          if SIDE = 'R', LWORK >= max(1,M).
+*          For optimum performance LWORK >= N*NB if SIDE = 'L', and
+*          LWORK >= M*NB if SIDE = 'R', where NB is the optimal
+*          blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*    A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            NBMAX, LDT
+      PARAMETER          ( NBMAX = 64, LDT = NBMAX+1 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LEFT, LQUERY, NOTRAN
+      CHARACTER          TRANST
+      INTEGER            I, I1, I2, I3, IB, IC, IINFO, IWS, JA, JC,
+     $                   LDWORK, LWKOPT, MI, NB, NBMIN, NI, NQ, NW
+*     ..
+*     .. Local Arrays ..
+      REAL               T( LDT, NBMAX )
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           LSAME, ILAENV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLARZB, SLARZT, SORMR3, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      LEFT = LSAME( SIDE, 'L' )
+      NOTRAN = LSAME( TRANS, 'N' )
+      LQUERY = ( LWORK.EQ.-1 )
+*
+*     NQ is the order of Q and NW is the minimum dimension of WORK
+*
+      IF( LEFT ) THEN
+         NQ = M
+         NW = N
+      ELSE
+         NQ = N
+         NW = M
+      END IF
+      IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN
+         INFO = -2
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN
+         INFO = -5
+      ELSE IF( L.LT.0 .OR. ( LEFT .AND. ( L.GT.M ) ) .OR.
+     $         ( .NOT.LEFT .AND. ( L.GT.N ) ) ) THEN
+         INFO = -6
+      ELSE IF( LDA.LT.MAX( 1, K ) ) THEN
+         INFO = -8
+      ELSE IF( LDC.LT.MAX( 1, M ) ) THEN
+         INFO = -11
+      ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN
+         INFO = -13
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+*
+*        Determine the block size.  NB may be at most NBMAX, where NBMAX
+*        is used to define the local array T.
+*
+         NB = MIN( NBMAX, ILAENV( 1, 'SORMRQ', SIDE // TRANS, M, N, K,
+     $        -1 ) )
+         LWKOPT = MAX( 1, NW )*NB
+         WORK( 1 ) = LWKOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SORMRZ', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      NBMIN = 2
+      LDWORK = NW
+      IF( NB.GT.1 .AND. NB.LT.K ) THEN
+         IWS = NW*NB
+         IF( LWORK.LT.IWS ) THEN
+            NB = LWORK / LDWORK
+            NBMIN = MAX( 2, ILAENV( 2, 'SORMRQ', SIDE // TRANS, M, N, K,
+     $              -1 ) )
+         END IF
+      ELSE
+         IWS = NW
+      END IF
+*
+      IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN
+*
+*        Use unblocked code
+*
+         CALL SORMR3( SIDE, TRANS, M, N, K, L, A, LDA, TAU, C, LDC,
+     $                WORK, IINFO )
+      ELSE
+*
+*        Use blocked code
+*
+         IF( ( LEFT .AND. .NOT.NOTRAN ) .OR.
+     $       ( .NOT.LEFT .AND. NOTRAN ) ) THEN
+            I1 = 1
+            I2 = K
+            I3 = NB
+         ELSE
+            I1 = ( ( K-1 ) / NB )*NB + 1
+            I2 = 1
+            I3 = -NB
+         END IF
+*
+         IF( LEFT ) THEN
+            NI = N
+            JC = 1
+            JA = M - L + 1
+         ELSE
+            MI = M
+            IC = 1
+            JA = N - L + 1
+         END IF
+*
+         IF( NOTRAN ) THEN
+            TRANST = 'T'
+         ELSE
+            TRANST = 'N'
+         END IF
+*
+         DO 10 I = I1, I2, I3
+            IB = MIN( NB, K-I+1 )
+*
+*           Form the triangular factor of the block reflector
+*           H = H(i+ib-1) . . . H(i+1) H(i)
+*
+            CALL SLARZT( 'Backward', 'Rowwise', L, IB, A( I, JA ), LDA,
+     $                   TAU( I ), T, LDT )
+*
+            IF( LEFT ) THEN
+*
+*              H or H' is applied to C(i:m,1:n)
+*
+               MI = M - I + 1
+               IC = I
+            ELSE
+*
+*              H or H' is applied to C(1:m,i:n)
+*
+               NI = N - I + 1
+               JC = I
+            END IF
+*
+*           Apply H or H'
+*
+            CALL SLARZB( SIDE, TRANST, 'Backward', 'Rowwise', MI, NI,
+     $                   IB, L, A( I, JA ), LDA, T, LDT, C( IC, JC ),
+     $                   LDC, WORK, LDWORK )
+   10    CONTINUE
+*
+      END IF
+*
+      WORK( 1 ) = LWKOPT
+*
+      RETURN
+*
+*     End of SORMRZ
+*
+      END
diff --git a/costa/native/external/lapack/sormtr.f b/costa/native/external/lapack/sormtr.f
new file mode 100644
index 000000000..e3f2156f5
--- /dev/null
+++ b/costa/native/external/lapack/sormtr.f
@@ -0,0 +1,224 @@
+      SUBROUTINE SORMTR( SIDE, UPLO, TRANS, M, N, A, LDA, TAU, C, LDC,
+     $                   WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          SIDE, TRANS, UPLO
+      INTEGER            INFO, LDA, LDC, LWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), C( LDC, * ), TAU( * ),
+     $                   WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SORMTR overwrites the general real M-by-N matrix C with
+*
+*                  SIDE = 'L'     SIDE = 'R'
+*  TRANS = 'N':      Q * C          C * Q
+*  TRANS = 'T':      Q**T * C       C * Q**T
+*
+*  where Q is a real orthogonal matrix of order nq, with nq = m if
+*  SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of
+*  nq-1 elementary reflectors, as returned by SSYTRD:
+*
+*  if UPLO = 'U', Q = H(nq-1) . . . H(2) H(1);
+*
+*  if UPLO = 'L', Q = H(1) H(2) . . . H(nq-1).
+*
+*  Arguments
+*  =========
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'L': apply Q or Q**T from the Left;
+*          = 'R': apply Q or Q**T from the Right.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U': Upper triangle of A contains elementary reflectors
+*                 from SSYTRD;
+*          = 'L': Lower triangle of A contains elementary reflectors
+*                 from SSYTRD.
+*
+*  TRANS   (input) CHARACTER*1
+*          = 'N':  No transpose, apply Q;
+*          = 'T':  Transpose, apply Q**T.
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix C. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix C. N >= 0.
+*
+*  A       (input) REAL array, dimension
+*                               (LDA,M) if SIDE = 'L'
+*                               (LDA,N) if SIDE = 'R'
+*          The vectors which define the elementary reflectors, as
+*          returned by SSYTRD.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.
+*          LDA >= max(1,M) if SIDE = 'L'; LDA >= max(1,N) if SIDE = 'R'.
+*
+*  TAU     (input) REAL array, dimension
+*                               (M-1) if SIDE = 'L'
+*                               (N-1) if SIDE = 'R'
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by SSYTRD.
+*
+*  C       (input/output) REAL array, dimension (LDC,N)
+*          On entry, the M-by-N matrix C.
+*          On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C. LDC >= max(1,M).
+*
+*  WORK    (workspace/output) REAL array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.
+*          If SIDE = 'L', LWORK >= max(1,N);
+*          if SIDE = 'R', LWORK >= max(1,M).
+*          For optimum performance LWORK >= N*NB if SIDE = 'L', and
+*          LWORK >= M*NB if SIDE = 'R', where NB is the optimal
+*          blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            LEFT, LQUERY, UPPER
+      INTEGER            I1, I2, IINFO, LWKOPT, MI, NI, NB, NQ, NW
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV, LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SORMQL, SORMQR, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      LEFT = LSAME( SIDE, 'L' )
+      UPPER = LSAME( UPLO, 'U' )
+      LQUERY = ( LWORK.EQ.-1 )
+*
+*     NQ is the order of Q and NW is the minimum dimension of WORK
+*
+      IF( LEFT ) THEN
+         NQ = M
+         NW = N
+      ELSE
+         NQ = N
+         NW = M
+      END IF
+      IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.LSAME( TRANS, 'N' ) .AND. .NOT.LSAME( TRANS, 'T' ) )
+     $          THEN
+         INFO = -3
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( LDA.LT.MAX( 1, NQ ) ) THEN
+         INFO = -7
+      ELSE IF( LDC.LT.MAX( 1, M ) ) THEN
+         INFO = -10
+      ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN
+         INFO = -12
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         IF( UPPER ) THEN
+            IF( LEFT ) THEN
+               NB = ILAENV( 1, 'SORMQL', SIDE // TRANS, M-1, N, M-1,
+     $                      -1 )
+            ELSE
+               NB = ILAENV( 1, 'SORMQL', SIDE // TRANS, M, N-1, N-1,
+     $                      -1 )
+            END IF
+         ELSE
+            IF( LEFT ) THEN
+               NB = ILAENV( 1, 'SORMQR', SIDE // TRANS, M-1, N, M-1,
+     $                      -1 )
+            ELSE
+               NB = ILAENV( 1, 'SORMQR', SIDE // TRANS, M, N-1, N-1,
+     $                      -1 )
+            END IF
+         END IF
+         LWKOPT = MAX( 1, NW )*NB
+         WORK( 1 ) = LWKOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SORMTR', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 .OR. NQ.EQ.1 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      IF( LEFT ) THEN
+         MI = M - 1
+         NI = N
+      ELSE
+         MI = M
+         NI = N - 1
+      END IF
+*
+      IF( UPPER ) THEN
+*
+*        Q was determined by a call to SSYTRD with UPLO = 'U'
+*
+         CALL SORMQL( SIDE, TRANS, MI, NI, NQ-1, A( 1, 2 ), LDA, TAU, C,
+     $                LDC, WORK, LWORK, IINFO )
+      ELSE
+*
+*        Q was determined by a call to SSYTRD with UPLO = 'L'
+*
+         IF( LEFT ) THEN
+            I1 = 2
+            I2 = 1
+         ELSE
+            I1 = 1
+            I2 = 2
+         END IF
+         CALL SORMQR( SIDE, TRANS, MI, NI, NQ-1, A( 2, 1 ), LDA, TAU,
+     $                C( I1, I2 ), LDC, WORK, LWORK, IINFO )
+      END IF
+      WORK( 1 ) = LWKOPT
+      RETURN
+*
+*     End of SORMTR
+*
+      END
diff --git a/costa/native/external/lapack/spbcon.f b/costa/native/external/lapack/spbcon.f
new file mode 100644
index 000000000..a7e8fc936
--- /dev/null
+++ b/costa/native/external/lapack/spbcon.f
@@ -0,0 +1,188 @@
+      SUBROUTINE SPBCON( UPLO, N, KD, AB, LDAB, ANORM, RCOND, WORK,
+     $                   IWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, KD, LDAB, N
+      REAL               ANORM, RCOND
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      REAL               AB( LDAB, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SPBCON estimates the reciprocal of the condition number (in the
+*  1-norm) of a real symmetric positive definite band matrix using the
+*  Cholesky factorization A = U**T*U or A = L*L**T computed by SPBTRF.
+*
+*  An estimate is obtained for norm(inv(A)), and the reciprocal of the
+*  condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))).
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangular factor stored in AB;
+*          = 'L':  Lower triangular factor stored in AB.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  KD      (input) INTEGER
+*          The number of superdiagonals of the matrix A if UPLO = 'U',
+*          or the number of subdiagonals if UPLO = 'L'.  KD >= 0.
+*
+*  AB      (input) REAL array, dimension (LDAB,N)
+*          The triangular factor U or L from the Cholesky factorization
+*          A = U**T*U or A = L*L**T of the band matrix A, stored in the
+*          first KD+1 rows of the array.  The j-th column of U or L is
+*          stored in the j-th column of the array AB as follows:
+*          if UPLO ='U', AB(kd+1+i-j,j) = U(i,j) for max(1,j-kd)<=i<=j;
+*          if UPLO ='L', AB(1+i-j,j)    = L(i,j) for j<=i<=min(n,j+kd).
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KD+1.
+*
+*  ANORM   (input) REAL
+*          The 1-norm (or infinity-norm) of the symmetric band matrix A.
+*
+*  RCOND   (output) REAL
+*          The reciprocal of the condition number of the matrix A,
+*          computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an
+*          estimate of the 1-norm of inv(A) computed in this routine.
+*
+*  WORK    (workspace) REAL array, dimension (3*N)
+*
+*  IWORK   (workspace) INTEGER array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      CHARACTER          NORMIN
+      INTEGER            IX, KASE
+      REAL               AINVNM, SCALE, SCALEL, SCALEU, SMLNUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ISAMAX
+      REAL               SLAMCH
+      EXTERNAL           LSAME, ISAMAX, SLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLACON, SLATBS, SRSCL, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( KD.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDAB.LT.KD+1 ) THEN
+         INFO = -5
+      ELSE IF( ANORM.LT.ZERO ) THEN
+         INFO = -6
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SPBCON', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      RCOND = ZERO
+      IF( N.EQ.0 ) THEN
+         RCOND = ONE
+         RETURN
+      ELSE IF( ANORM.EQ.ZERO ) THEN
+         RETURN
+      END IF
+*
+      SMLNUM = SLAMCH( 'Safe minimum' )
+*
+*     Estimate the 1-norm of the inverse.
+*
+      KASE = 0
+      NORMIN = 'N'
+   10 CONTINUE
+      CALL SLACON( N, WORK( N+1 ), WORK, IWORK, AINVNM, KASE )
+      IF( KASE.NE.0 ) THEN
+         IF( UPPER ) THEN
+*
+*           Multiply by inv(U').
+*
+            CALL SLATBS( 'Upper', 'Transpose', 'Non-unit', NORMIN, N,
+     $                   KD, AB, LDAB, WORK, SCALEL, WORK( 2*N+1 ),
+     $                   INFO )
+            NORMIN = 'Y'
+*
+*           Multiply by inv(U).
+*
+            CALL SLATBS( 'Upper', 'No transpose', 'Non-unit', NORMIN, N,
+     $                   KD, AB, LDAB, WORK, SCALEU, WORK( 2*N+1 ),
+     $                   INFO )
+         ELSE
+*
+*           Multiply by inv(L).
+*
+            CALL SLATBS( 'Lower', 'No transpose', 'Non-unit', NORMIN, N,
+     $                   KD, AB, LDAB, WORK, SCALEL, WORK( 2*N+1 ),
+     $                   INFO )
+            NORMIN = 'Y'
+*
+*           Multiply by inv(L').
+*
+            CALL SLATBS( 'Lower', 'Transpose', 'Non-unit', NORMIN, N,
+     $                   KD, AB, LDAB, WORK, SCALEU, WORK( 2*N+1 ),
+     $                   INFO )
+         END IF
+*
+*        Multiply by 1/SCALE if doing so will not cause overflow.
+*
+         SCALE = SCALEL*SCALEU
+         IF( SCALE.NE.ONE ) THEN
+            IX = ISAMAX( N, WORK, 1 )
+            IF( SCALE.LT.ABS( WORK( IX ) )*SMLNUM .OR. SCALE.EQ.ZERO )
+     $         GO TO 20
+            CALL SRSCL( N, SCALE, WORK, 1 )
+         END IF
+         GO TO 10
+      END IF
+*
+*     Compute the estimate of the reciprocal condition number.
+*
+      IF( AINVNM.NE.ZERO )
+     $   RCOND = ( ONE / AINVNM ) / ANORM
+*
+   20 CONTINUE
+*
+      RETURN
+*
+*     End of SPBCON
+*
+      END
diff --git a/costa/native/external/lapack/spbequ.f b/costa/native/external/lapack/spbequ.f
new file mode 100644
index 000000000..474d9b9c0
--- /dev/null
+++ b/costa/native/external/lapack/spbequ.f
@@ -0,0 +1,167 @@
+      SUBROUTINE SPBEQU( UPLO, N, KD, AB, LDAB, S, SCOND, AMAX, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993 
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, KD, LDAB, N
+      REAL               AMAX, SCOND
+*     ..
+*     .. Array Arguments ..
+      REAL               AB( LDAB, * ), S( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SPBEQU computes row and column scalings intended to equilibrate a
+*  symmetric positive definite band matrix A and reduce its condition
+*  number (with respect to the two-norm).  S contains the scale factors,
+*  S(i) = 1/sqrt(A(i,i)), chosen so that the scaled matrix B with
+*  elements B(i,j) = S(i)*A(i,j)*S(j) has ones on the diagonal.  This
+*  choice of S puts the condition number of B within a factor N of the
+*  smallest possible condition number over all possible diagonal
+*  scalings.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangular of A is stored;
+*          = 'L':  Lower triangular of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  KD      (input) INTEGER
+*          The number of superdiagonals of the matrix A if UPLO = 'U',
+*          or the number of subdiagonals if UPLO = 'L'.  KD >= 0.
+*
+*  AB      (input) REAL array, dimension (LDAB,N)
+*          The upper or lower triangle of the symmetric band matrix A,
+*          stored in the first KD+1 rows of the array.  The j-th column
+*          of A is stored in the j-th column of the array AB as follows:
+*          if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j;
+*          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+kd).
+*
+*  LDAB     (input) INTEGER
+*          The leading dimension of the array A.  LDAB >= KD+1.
+*
+*  S       (output) REAL array, dimension (N)
+*          If INFO = 0, S contains the scale factors for A.
+*
+*  SCOND   (output) REAL
+*          If INFO = 0, S contains the ratio of the smallest S(i) to
+*          the largest S(i).  If SCOND >= 0.1 and AMAX is neither too
+*          large nor too small, it is not worth scaling by S.
+*
+*  AMAX    (output) REAL
+*          Absolute value of largest matrix element.  If AMAX is very
+*          close to overflow or very close to underflow, the matrix
+*          should be scaled.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          > 0:  if INFO = i, the i-th diagonal element is nonpositive.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            I, J
+      REAL               SMIN
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( KD.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDAB.LT.KD+1 ) THEN
+         INFO = -5
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SPBEQU', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 ) THEN
+         SCOND = ONE
+         AMAX = ZERO
+         RETURN
+      END IF
+*
+      IF( UPPER ) THEN
+         J = KD + 1
+      ELSE
+         J = 1
+      END IF
+*
+*     Initialize SMIN and AMAX.
+*
+      S( 1 ) = AB( J, 1 )
+      SMIN = S( 1 )
+      AMAX = S( 1 )
+*
+*     Find the minimum and maximum diagonal elements.
+*
+      DO 10 I = 2, N
+         S( I ) = AB( J, I )
+         SMIN = MIN( SMIN, S( I ) )
+         AMAX = MAX( AMAX, S( I ) )
+   10 CONTINUE
+*
+      IF( SMIN.LE.ZERO ) THEN
+*
+*        Find the first non-positive diagonal element and return.
+*
+         DO 20 I = 1, N
+            IF( S( I ).LE.ZERO ) THEN
+               INFO = I
+               RETURN
+            END IF
+   20    CONTINUE
+      ELSE
+*
+*        Set the scale factors to the reciprocals
+*        of the diagonal elements.
+*
+         DO 30 I = 1, N
+            S( I ) = ONE / SQRT( S( I ) )
+   30    CONTINUE
+*
+*        Compute SCOND = min(S(I)) / max(S(I))
+*
+         SCOND = SQRT( SMIN ) / SQRT( AMAX )
+      END IF
+      RETURN
+*
+*     End of SPBEQU
+*
+      END
diff --git a/costa/native/external/lapack/spbrfs.f b/costa/native/external/lapack/spbrfs.f
new file mode 100644
index 000000000..aafa620ef
--- /dev/null
+++ b/costa/native/external/lapack/spbrfs.f
@@ -0,0 +1,337 @@
+      SUBROUTINE SPBRFS( UPLO, N, KD, NRHS, AB, LDAB, AFB, LDAFB, B,
+     $                   LDB, X, LDX, FERR, BERR, WORK, IWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, KD, LDAB, LDAFB, LDB, LDX, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      REAL               AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ),
+     $                   BERR( * ), FERR( * ), WORK( * ), X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SPBRFS improves the computed solution to a system of linear
+*  equations when the coefficient matrix is symmetric positive definite
+*  and banded, and provides error bounds and backward error estimates
+*  for the solution.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  KD      (input) INTEGER
+*          The number of superdiagonals of the matrix A if UPLO = 'U',
+*          or the number of subdiagonals if UPLO = 'L'.  KD >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrices B and X.  NRHS >= 0.
+*
+*  AB      (input) REAL array, dimension (LDAB,N)
+*          The upper or lower triangle of the symmetric band matrix A,
+*          stored in the first KD+1 rows of the array.  The j-th column
+*          of A is stored in the j-th column of the array AB as follows:
+*          if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j;
+*          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+kd).
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KD+1.
+*
+*  AFB     (input) REAL array, dimension (LDAFB,N)
+*          The triangular factor U or L from the Cholesky factorization
+*          A = U**T*U or A = L*L**T of the band matrix A as computed by
+*          SPBTRF, in the same storage format as A (see AB).
+*
+*  LDAFB   (input) INTEGER
+*          The leading dimension of the array AFB.  LDAFB >= KD+1.
+*
+*  B       (input) REAL array, dimension (LDB,NRHS)
+*          The right hand side matrix B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  X       (input/output) REAL array, dimension (LDX,NRHS)
+*          On entry, the solution matrix X, as computed by SPBTRS.
+*          On exit, the improved solution matrix X.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(1,N).
+*
+*  FERR    (output) REAL array, dimension (NRHS)
+*          The estimated forward error bound for each solution vector
+*          X(j) (the j-th column of the solution matrix X).
+*          If XTRUE is the true solution corresponding to X(j), FERR(j)
+*          is an estimated upper bound for the magnitude of the largest
+*          element in (X(j) - XTRUE) divided by the magnitude of the
+*          largest element in X(j).  The estimate is as reliable as
+*          the estimate for RCOND, and is almost always a slight
+*          overestimate of the true error.
+*
+*  BERR    (output) REAL array, dimension (NRHS)
+*          The componentwise relative backward error of each solution
+*          vector X(j) (i.e., the smallest relative change in
+*          any element of A or B that makes X(j) an exact solution).
+*
+*  WORK    (workspace) REAL array, dimension (3*N)
+*
+*  IWORK   (workspace) INTEGER array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Internal Parameters
+*  ===================
+*
+*  ITMAX is the maximum number of steps of iterative refinement.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            ITMAX
+      PARAMETER          ( ITMAX = 5 )
+      REAL               ZERO
+      PARAMETER          ( ZERO = 0.0E+0 )
+      REAL               ONE
+      PARAMETER          ( ONE = 1.0E+0 )
+      REAL               TWO
+      PARAMETER          ( TWO = 2.0E+0 )
+      REAL               THREE
+      PARAMETER          ( THREE = 3.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            COUNT, I, J, K, KASE, L, NZ
+      REAL               EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SAXPY, SCOPY, SLACON, SPBTRS, SSBMV, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               SLAMCH
+      EXTERNAL           LSAME, SLAMCH
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( KD.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDAB.LT.KD+1 ) THEN
+         INFO = -6
+      ELSE IF( LDAFB.LT.KD+1 ) THEN
+         INFO = -8
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -10
+      ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+         INFO = -12
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SPBRFS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN
+         DO 10 J = 1, NRHS
+            FERR( J ) = ZERO
+            BERR( J ) = ZERO
+   10    CONTINUE
+         RETURN
+      END IF
+*
+*     NZ = maximum number of nonzero elements in each row of A, plus 1
+*
+      NZ = MIN( N+1, 2*KD+2 )
+      EPS = SLAMCH( 'Epsilon' )
+      SAFMIN = SLAMCH( 'Safe minimum' )
+      SAFE1 = NZ*SAFMIN
+      SAFE2 = SAFE1 / EPS
+*
+*     Do for each right hand side
+*
+      DO 140 J = 1, NRHS
+*
+         COUNT = 1
+         LSTRES = THREE
+   20    CONTINUE
+*
+*        Loop until stopping criterion is satisfied.
+*
+*        Compute residual R = B - A * X
+*
+         CALL SCOPY( N, B( 1, J ), 1, WORK( N+1 ), 1 )
+         CALL SSBMV( UPLO, N, KD, -ONE, AB, LDAB, X( 1, J ), 1, ONE,
+     $               WORK( N+1 ), 1 )
+*
+*        Compute componentwise relative backward error from formula
+*
+*        max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) )
+*
+*        where abs(Z) is the componentwise absolute value of the matrix
+*        or vector Z.  If the i-th component of the denominator is less
+*        than SAFE2, then SAFE1 is added to the i-th components of the
+*        numerator and denominator before dividing.
+*
+         DO 30 I = 1, N
+            WORK( I ) = ABS( B( I, J ) )
+   30    CONTINUE
+*
+*        Compute abs(A)*abs(X) + abs(B).
+*
+         IF( UPPER ) THEN
+            DO 50 K = 1, N
+               S = ZERO
+               XK = ABS( X( K, J ) )
+               L = KD + 1 - K
+               DO 40 I = MAX( 1, K-KD ), K - 1
+                  WORK( I ) = WORK( I ) + ABS( AB( L+I, K ) )*XK
+                  S = S + ABS( AB( L+I, K ) )*ABS( X( I, J ) )
+   40          CONTINUE
+               WORK( K ) = WORK( K ) + ABS( AB( KD+1, K ) )*XK + S
+   50       CONTINUE
+         ELSE
+            DO 70 K = 1, N
+               S = ZERO
+               XK = ABS( X( K, J ) )
+               WORK( K ) = WORK( K ) + ABS( AB( 1, K ) )*XK
+               L = 1 - K
+               DO 60 I = K + 1, MIN( N, K+KD )
+                  WORK( I ) = WORK( I ) + ABS( AB( L+I, K ) )*XK
+                  S = S + ABS( AB( L+I, K ) )*ABS( X( I, J ) )
+   60          CONTINUE
+               WORK( K ) = WORK( K ) + S
+   70       CONTINUE
+         END IF
+         S = ZERO
+         DO 80 I = 1, N
+            IF( WORK( I ).GT.SAFE2 ) THEN
+               S = MAX( S, ABS( WORK( N+I ) ) / WORK( I ) )
+            ELSE
+               S = MAX( S, ( ABS( WORK( N+I ) )+SAFE1 ) /
+     $             ( WORK( I )+SAFE1 ) )
+            END IF
+   80    CONTINUE
+         BERR( J ) = S
+*
+*        Test stopping criterion. Continue iterating if
+*           1) The residual BERR(J) is larger than machine epsilon, and
+*           2) BERR(J) decreased by at least a factor of 2 during the
+*              last iteration, and
+*           3) At most ITMAX iterations tried.
+*
+         IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND.
+     $       COUNT.LE.ITMAX ) THEN
+*
+*           Update solution and try again.
+*
+            CALL SPBTRS( UPLO, N, KD, 1, AFB, LDAFB, WORK( N+1 ), N,
+     $                   INFO )
+            CALL SAXPY( N, ONE, WORK( N+1 ), 1, X( 1, J ), 1 )
+            LSTRES = BERR( J )
+            COUNT = COUNT + 1
+            GO TO 20
+         END IF
+*
+*        Bound error from formula
+*
+*        norm(X - XTRUE) / norm(X) .le. FERR =
+*        norm( abs(inv(A))*
+*           ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X)
+*
+*        where
+*          norm(Z) is the magnitude of the largest component of Z
+*          inv(A) is the inverse of A
+*          abs(Z) is the componentwise absolute value of the matrix or
+*             vector Z
+*          NZ is the maximum number of nonzeros in any row of A, plus 1
+*          EPS is machine epsilon
+*
+*        The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B))
+*        is incremented by SAFE1 if the i-th component of
+*        abs(A)*abs(X) + abs(B) is less than SAFE2.
+*
+*        Use SLACON to estimate the infinity-norm of the matrix
+*           inv(A) * diag(W),
+*        where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) )))
+*
+         DO 90 I = 1, N
+            IF( WORK( I ).GT.SAFE2 ) THEN
+               WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I )
+            ELSE
+               WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + SAFE1
+            END IF
+   90    CONTINUE
+*
+         KASE = 0
+  100    CONTINUE
+         CALL SLACON( N, WORK( 2*N+1 ), WORK( N+1 ), IWORK, FERR( J ),
+     $                KASE )
+         IF( KASE.NE.0 ) THEN
+            IF( KASE.EQ.1 ) THEN
+*
+*              Multiply by diag(W)*inv(A').
+*
+               CALL SPBTRS( UPLO, N, KD, 1, AFB, LDAFB, WORK( N+1 ), N,
+     $                      INFO )
+               DO 110 I = 1, N
+                  WORK( N+I ) = WORK( N+I )*WORK( I )
+  110          CONTINUE
+            ELSE IF( KASE.EQ.2 ) THEN
+*
+*              Multiply by inv(A)*diag(W).
+*
+               DO 120 I = 1, N
+                  WORK( N+I ) = WORK( N+I )*WORK( I )
+  120          CONTINUE
+               CALL SPBTRS( UPLO, N, KD, 1, AFB, LDAFB, WORK( N+1 ), N,
+     $                      INFO )
+            END IF
+            GO TO 100
+         END IF
+*
+*        Normalize error.
+*
+         LSTRES = ZERO
+         DO 130 I = 1, N
+            LSTRES = MAX( LSTRES, ABS( X( I, J ) ) )
+  130    CONTINUE
+         IF( LSTRES.NE.ZERO )
+     $      FERR( J ) = FERR( J ) / LSTRES
+*
+  140 CONTINUE
+*
+      RETURN
+*
+*     End of SPBRFS
+*
+      END
diff --git a/costa/native/external/lapack/spbstf.f b/costa/native/external/lapack/spbstf.f
new file mode 100644
index 000000000..3cd7a68a7
--- /dev/null
+++ b/costa/native/external/lapack/spbstf.f
@@ -0,0 +1,251 @@
+      SUBROUTINE SPBSTF( UPLO, N, KD, AB, LDAB, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, KD, LDAB, N
+*     ..
+*     .. Array Arguments ..
+      REAL               AB( LDAB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SPBSTF computes a split Cholesky factorization of a real
+*  symmetric positive definite band matrix A.
+*
+*  This routine is designed to be used in conjunction with SSBGST.
+*
+*  The factorization has the form  A = S**T*S  where S is a band matrix
+*  of the same bandwidth as A and the following structure:
+*
+*    S = ( U    )
+*        ( M  L )
+*
+*  where U is upper triangular of order m = (n+kd)/2, and L is lower
+*  triangular of order n-m.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  KD      (input) INTEGER
+*          The number of superdiagonals of the matrix A if UPLO = 'U',
+*          or the number of subdiagonals if UPLO = 'L'.  KD >= 0.
+*
+*  AB      (input/output) REAL array, dimension (LDAB,N)
+*          On entry, the upper or lower triangle of the symmetric band
+*          matrix A, stored in the first kd+1 rows of the array.  The
+*          j-th column of A is stored in the j-th column of the array AB
+*          as follows:
+*          if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j;
+*          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+kd).
+*
+*          On exit, if INFO = 0, the factor S from the split Cholesky
+*          factorization A = S**T*S. See Further Details.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KD+1.
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*          > 0: if INFO = i, the factorization could not be completed,
+*               because the updated element a(i,i) was negative; the
+*               matrix A is not positive definite.
+*
+*  Further Details
+*  ===============
+*
+*  The band storage scheme is illustrated by the following example, when
+*  N = 7, KD = 2:
+*
+*  S = ( s11  s12  s13                     )
+*      (      s22  s23  s24                )
+*      (           s33  s34                )
+*      (                s44                )
+*      (           s53  s54  s55           )
+*      (                s64  s65  s66      )
+*      (                     s75  s76  s77 )
+*
+*  If UPLO = 'U', the array AB holds:
+*
+*  on entry:                          on exit:
+*
+*   *    *   a13  a24  a35  a46  a57   *    *   s13  s24  s53  s64  s75
+*   *   a12  a23  a34  a45  a56  a67   *   s12  s23  s34  s54  s65  s76
+*  a11  a22  a33  a44  a55  a66  a77  s11  s22  s33  s44  s55  s66  s77
+*
+*  If UPLO = 'L', the array AB holds:
+*
+*  on entry:                          on exit:
+*
+*  a11  a22  a33  a44  a55  a66  a77  s11  s22  s33  s44  s55  s66  s77
+*  a21  a32  a43  a54  a65  a76   *   s12  s23  s34  s54  s65  s76   *
+*  a31  a42  a53  a64  a64   *    *   s13  s24  s53  s64  s75   *    *
+*
+*  Array elements marked * are not used by the routine.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            J, KLD, KM, M
+      REAL               AJJ
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SSCAL, SSYR, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( KD.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDAB.LT.KD+1 ) THEN
+         INFO = -5
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SPBSTF', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      KLD = MAX( 1, LDAB-1 )
+*
+*     Set the splitting point m.
+*
+      M = ( N+KD ) / 2
+*
+      IF( UPPER ) THEN
+*
+*        Factorize A(m+1:n,m+1:n) as L**T*L, and update A(1:m,1:m).
+*
+         DO 10 J = N, M + 1, -1
+*
+*           Compute s(j,j) and test for non-positive-definiteness.
+*
+            AJJ = AB( KD+1, J )
+            IF( AJJ.LE.ZERO )
+     $         GO TO 50
+            AJJ = SQRT( AJJ )
+            AB( KD+1, J ) = AJJ
+            KM = MIN( J-1, KD )
+*
+*           Compute elements j-km:j-1 of the j-th column and update the
+*           the leading submatrix within the band.
+*
+            CALL SSCAL( KM, ONE / AJJ, AB( KD+1-KM, J ), 1 )
+            CALL SSYR( 'Upper', KM, -ONE, AB( KD+1-KM, J ), 1,
+     $                 AB( KD+1, J-KM ), KLD )
+   10    CONTINUE
+*
+*        Factorize the updated submatrix A(1:m,1:m) as U**T*U.
+*
+         DO 20 J = 1, M
+*
+*           Compute s(j,j) and test for non-positive-definiteness.
+*
+            AJJ = AB( KD+1, J )
+            IF( AJJ.LE.ZERO )
+     $         GO TO 50
+            AJJ = SQRT( AJJ )
+            AB( KD+1, J ) = AJJ
+            KM = MIN( KD, M-J )
+*
+*           Compute elements j+1:j+km of the j-th row and update the
+*           trailing submatrix within the band.
+*
+            IF( KM.GT.0 ) THEN
+               CALL SSCAL( KM, ONE / AJJ, AB( KD, J+1 ), KLD )
+               CALL SSYR( 'Upper', KM, -ONE, AB( KD, J+1 ), KLD,
+     $                    AB( KD+1, J+1 ), KLD )
+            END IF
+   20    CONTINUE
+      ELSE
+*
+*        Factorize A(m+1:n,m+1:n) as L**T*L, and update A(1:m,1:m).
+*
+         DO 30 J = N, M + 1, -1
+*
+*           Compute s(j,j) and test for non-positive-definiteness.
+*
+            AJJ = AB( 1, J )
+            IF( AJJ.LE.ZERO )
+     $         GO TO 50
+            AJJ = SQRT( AJJ )
+            AB( 1, J ) = AJJ
+            KM = MIN( J-1, KD )
+*
+*           Compute elements j-km:j-1 of the j-th row and update the
+*           trailing submatrix within the band.
+*
+            CALL SSCAL( KM, ONE / AJJ, AB( KM+1, J-KM ), KLD )
+            CALL SSYR( 'Lower', KM, -ONE, AB( KM+1, J-KM ), KLD,
+     $                 AB( 1, J-KM ), KLD )
+   30    CONTINUE
+*
+*        Factorize the updated submatrix A(1:m,1:m) as U**T*U.
+*
+         DO 40 J = 1, M
+*
+*           Compute s(j,j) and test for non-positive-definiteness.
+*
+            AJJ = AB( 1, J )
+            IF( AJJ.LE.ZERO )
+     $         GO TO 50
+            AJJ = SQRT( AJJ )
+            AB( 1, J ) = AJJ
+            KM = MIN( KD, M-J )
+*
+*           Compute elements j+1:j+km of the j-th column and update the
+*           trailing submatrix within the band.
+*
+            IF( KM.GT.0 ) THEN
+               CALL SSCAL( KM, ONE / AJJ, AB( 2, J ), 1 )
+               CALL SSYR( 'Lower', KM, -ONE, AB( 2, J ), 1,
+     $                    AB( 1, J+1 ), KLD )
+            END IF
+   40    CONTINUE
+      END IF
+      RETURN
+*
+   50 CONTINUE
+      INFO = J
+      RETURN
+*
+*     End of SPBSTF
+*
+      END
diff --git a/costa/native/external/lapack/spbsv.f b/costa/native/external/lapack/spbsv.f
new file mode 100644
index 000000000..ee08b73f3
--- /dev/null
+++ b/costa/native/external/lapack/spbsv.f
@@ -0,0 +1,152 @@
+      SUBROUTINE SPBSV( UPLO, N, KD, NRHS, AB, LDAB, B, LDB, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993 
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, KD, LDAB, LDB, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      REAL               AB( LDAB, * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SPBSV computes the solution to a real system of linear equations
+*     A * X = B,
+*  where A is an N-by-N symmetric positive definite band matrix and X
+*  and B are N-by-NRHS matrices.
+*
+*  The Cholesky decomposition is used to factor A as
+*     A = U**T * U,  if UPLO = 'U', or
+*     A = L * L**T,  if UPLO = 'L',
+*  where U is an upper triangular band matrix, and L is a lower
+*  triangular band matrix, with the same number of superdiagonals or
+*  subdiagonals as A.  The factored form of A is then used to solve the
+*  system of equations A * X = B.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The number of linear equations, i.e., the order of the
+*          matrix A.  N >= 0.
+*
+*  KD      (input) INTEGER
+*          The number of superdiagonals of the matrix A if UPLO = 'U',
+*          or the number of subdiagonals if UPLO = 'L'.  KD >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  AB      (input/output) REAL array, dimension (LDAB,N)
+*          On entry, the upper or lower triangle of the symmetric band
+*          matrix A, stored in the first KD+1 rows of the array.  The
+*          j-th column of A is stored in the j-th column of the array AB
+*          as follows:
+*          if UPLO = 'U', AB(KD+1+i-j,j) = A(i,j) for max(1,j-KD)<=i<=j;
+*          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(N,j+KD).
+*          See below for further details.
+*
+*          On exit, if INFO = 0, the triangular factor U or L from the
+*          Cholesky factorization A = U**T*U or A = L*L**T of the band
+*          matrix A, in the same storage format as A.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KD+1.
+*
+*  B       (input/output) REAL array, dimension (LDB,NRHS)
+*          On entry, the N-by-NRHS right hand side matrix B.
+*          On exit, if INFO = 0, the N-by-NRHS solution matrix X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, the leading minor of order i of A is not
+*                positive definite, so the factorization could not be
+*                completed, and the solution has not been computed.
+*
+*  Further Details
+*  ===============
+*
+*  The band storage scheme is illustrated by the following example, when
+*  N = 6, KD = 2, and UPLO = 'U':
+*
+*  On entry:                       On exit:
+*
+*      *    *   a13  a24  a35  a46      *    *   u13  u24  u35  u46
+*      *   a12  a23  a34  a45  a56      *   u12  u23  u34  u45  u56
+*     a11  a22  a33  a44  a55  a66     u11  u22  u33  u44  u55  u66
+*
+*  Similarly, if UPLO = 'L' the format of A is as follows:
+*
+*  On entry:                       On exit:
+*
+*     a11  a22  a33  a44  a55  a66     l11  l22  l33  l44  l55  l66
+*     a21  a32  a43  a54  a65   *      l21  l32  l43  l54  l65   *
+*     a31  a42  a53  a64   *    *      l31  l42  l53  l64   *    *
+*
+*  Array elements marked * are not used by the routine.
+*
+*  =====================================================================
+*
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SPBTRF, SPBTRS, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( KD.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDAB.LT.KD+1 ) THEN
+         INFO = -6
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -8
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SPBSV ', -INFO )
+         RETURN
+      END IF
+*
+*     Compute the Cholesky factorization A = U'*U or A = L*L'.
+*
+      CALL SPBTRF( UPLO, N, KD, AB, LDAB, INFO )
+      IF( INFO.EQ.0 ) THEN
+*
+*        Solve the system A*X = B, overwriting B with X.
+*
+         CALL SPBTRS( UPLO, N, KD, NRHS, AB, LDAB, B, LDB, INFO )
+*
+      END IF
+      RETURN
+*
+*     End of SPBSV
+*
+      END
diff --git a/costa/native/external/lapack/spbsvx.f b/costa/native/external/lapack/spbsvx.f
new file mode 100644
index 000000000..8b75d8dad
--- /dev/null
+++ b/costa/native/external/lapack/spbsvx.f
@@ -0,0 +1,424 @@
+      SUBROUTINE SPBSVX( FACT, UPLO, N, KD, NRHS, AB, LDAB, AFB, LDAFB,
+     $                   EQUED, S, B, LDB, X, LDX, RCOND, FERR, BERR,
+     $                   WORK, IWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          EQUED, FACT, UPLO
+      INTEGER            INFO, KD, LDAB, LDAFB, LDB, LDX, N, NRHS
+      REAL               RCOND
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      REAL               AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ),
+     $                   BERR( * ), FERR( * ), S( * ), WORK( * ),
+     $                   X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SPBSVX uses the Cholesky factorization A = U**T*U or A = L*L**T to
+*  compute the solution to a real system of linear equations
+*     A * X = B,
+*  where A is an N-by-N symmetric positive definite band matrix and X
+*  and B are N-by-NRHS matrices.
+*
+*  Error bounds on the solution and a condition estimate are also
+*  provided.
+*
+*  Description
+*  ===========
+*
+*  The following steps are performed:
+*
+*  1. If FACT = 'E', real scaling factors are computed to equilibrate
+*     the system:
+*        diag(S) * A * diag(S) * inv(diag(S)) * X = diag(S) * B
+*     Whether or not the system will be equilibrated depends on the
+*     scaling of the matrix A, but if equilibration is used, A is
+*     overwritten by diag(S)*A*diag(S) and B by diag(S)*B.
+*
+*  2. If FACT = 'N' or 'E', the Cholesky decomposition is used to
+*     factor the matrix A (after equilibration if FACT = 'E') as
+*        A = U**T * U,  if UPLO = 'U', or
+*        A = L * L**T,  if UPLO = 'L',
+*     where U is an upper triangular band matrix, and L is a lower
+*     triangular band matrix.
+*
+*  3. If the leading i-by-i principal minor is not positive definite,
+*     then the routine returns with INFO = i. Otherwise, the factored
+*     form of A is used to estimate the condition number of the matrix
+*     A.  If the reciprocal of the condition number is less than machine
+*     precision, INFO = N+1 is returned as a warning, but the routine
+*     still goes on to solve for X and compute error bounds as
+*     described below.
+*
+*  4. The system of equations is solved for X using the factored form
+*     of A.
+*
+*  5. Iterative refinement is applied to improve the computed solution
+*     matrix and calculate error bounds and backward error estimates
+*     for it.
+*
+*  6. If equilibration was used, the matrix X is premultiplied by
+*     diag(S) so that it solves the original system before
+*     equilibration.
+*
+*  Arguments
+*  =========
+*
+*  FACT    (input) CHARACTER*1
+*          Specifies whether or not the factored form of the matrix A is
+*          supplied on entry, and if not, whether the matrix A should be
+*          equilibrated before it is factored.
+*          = 'F':  On entry, AFB contains the factored form of A.
+*                  If EQUED = 'Y', the matrix A has been equilibrated
+*                  with scaling factors given by S.  AB and AFB will not
+*                  be modified.
+*          = 'N':  The matrix A will be copied to AFB and factored.
+*          = 'E':  The matrix A will be equilibrated if necessary, then
+*                  copied to AFB and factored.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The number of linear equations, i.e., the order of the
+*          matrix A.  N >= 0.
+*
+*  KD      (input) INTEGER
+*          The number of superdiagonals of the matrix A if UPLO = 'U',
+*          or the number of subdiagonals if UPLO = 'L'.  KD >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right-hand sides, i.e., the number of columns
+*          of the matrices B and X.  NRHS >= 0.
+*
+*  AB      (input/output) REAL array, dimension (LDAB,N)
+*          On entry, the upper or lower triangle of the symmetric band
+*          matrix A, stored in the first KD+1 rows of the array, except
+*          if FACT = 'F' and EQUED = 'Y', then A must contain the
+*          equilibrated matrix diag(S)*A*diag(S).  The j-th column of A
+*          is stored in the j-th column of the array AB as follows:
+*          if UPLO = 'U', AB(KD+1+i-j,j) = A(i,j) for max(1,j-KD)<=i<=j;
+*          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(N,j+KD).
+*          See below for further details.
+*
+*          On exit, if FACT = 'E' and EQUED = 'Y', A is overwritten by
+*          diag(S)*A*diag(S).
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array A.  LDAB >= KD+1.
+*
+*  AFB     (input or output) REAL array, dimension (LDAFB,N)
+*          If FACT = 'F', then AFB is an input argument and on entry
+*          contains the triangular factor U or L from the Cholesky
+*          factorization A = U**T*U or A = L*L**T of the band matrix
+*          A, in the same storage format as A (see AB).  If EQUED = 'Y',
+*          then AFB is the factored form of the equilibrated matrix A.
+*
+*          If FACT = 'N', then AFB is an output argument and on exit
+*          returns the triangular factor U or L from the Cholesky
+*          factorization A = U**T*U or A = L*L**T.
+*
+*          If FACT = 'E', then AFB is an output argument and on exit
+*          returns the triangular factor U or L from the Cholesky
+*          factorization A = U**T*U or A = L*L**T of the equilibrated
+*          matrix A (see the description of A for the form of the
+*          equilibrated matrix).
+*
+*  LDAFB   (input) INTEGER
+*          The leading dimension of the array AFB.  LDAFB >= KD+1.
+*
+*  EQUED   (input or output) CHARACTER*1
+*          Specifies the form of equilibration that was done.
+*          = 'N':  No equilibration (always true if FACT = 'N').
+*          = 'Y':  Equilibration was done, i.e., A has been replaced by
+*                  diag(S) * A * diag(S).
+*          EQUED is an input argument if FACT = 'F'; otherwise, it is an
+*          output argument.
+*
+*  S       (input or output) REAL array, dimension (N)
+*          The scale factors for A; not accessed if EQUED = 'N'.  S is
+*          an input argument if FACT = 'F'; otherwise, S is an output
+*          argument.  If FACT = 'F' and EQUED = 'Y', each element of S
+*          must be positive.
+*
+*  B       (input/output) REAL array, dimension (LDB,NRHS)
+*          On entry, the N-by-NRHS right hand side matrix B.
+*          On exit, if EQUED = 'N', B is not modified; if EQUED = 'Y',
+*          B is overwritten by diag(S) * B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  X       (output) REAL array, dimension (LDX,NRHS)
+*          If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X to
+*          the original system of equations.  Note that if EQUED = 'Y',
+*          A and B are modified on exit, and the solution to the
+*          equilibrated system is inv(diag(S))*X.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(1,N).
+*
+*  RCOND   (output) REAL
+*          The estimate of the reciprocal condition number of the matrix
+*          A after equilibration (if done).  If RCOND is less than the
+*          machine precision (in particular, if RCOND = 0), the matrix
+*          is singular to working precision.  This condition is
+*          indicated by a return code of INFO > 0.
+*
+*  FERR    (output) REAL array, dimension (NRHS)
+*          The estimated forward error bound for each solution vector
+*          X(j) (the j-th column of the solution matrix X).
+*          If XTRUE is the true solution corresponding to X(j), FERR(j)
+*          is an estimated upper bound for the magnitude of the largest
+*          element in (X(j) - XTRUE) divided by the magnitude of the
+*          largest element in X(j).  The estimate is as reliable as
+*          the estimate for RCOND, and is almost always a slight
+*          overestimate of the true error.
+*
+*  BERR    (output) REAL array, dimension (NRHS)
+*          The componentwise relative backward error of each solution
+*          vector X(j) (i.e., the smallest relative change in
+*          any element of A or B that makes X(j) an exact solution).
+*
+*  WORK    (workspace) REAL array, dimension (3*N)
+*
+*  IWORK   (workspace) INTEGER array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, and i is
+*                <= N:  the leading minor of order i of A is
+*                       not positive definite, so the factorization
+*                       could not be completed, and the solution has not
+*                       been computed. RCOND = 0 is returned.
+*                = N+1: U is nonsingular, but RCOND is less than machine
+*                       precision, meaning that the matrix is singular
+*                       to working precision.  Nevertheless, the
+*                       solution and error bounds are computed because
+*                       there are a number of situations where the
+*                       computed solution can be more accurate than the
+*                       value of RCOND would suggest.
+*
+*  Further Details
+*  ===============
+*
+*  The band storage scheme is illustrated by the following example, when
+*  N = 6, KD = 2, and UPLO = 'U':
+*
+*  Two-dimensional storage of the symmetric matrix A:
+*
+*     a11  a12  a13
+*          a22  a23  a24
+*               a33  a34  a35
+*                    a44  a45  a46
+*                         a55  a56
+*     (aij=conjg(aji))         a66
+*
+*  Band storage of the upper triangle of A:
+*
+*      *    *   a13  a24  a35  a46
+*      *   a12  a23  a34  a45  a56
+*     a11  a22  a33  a44  a55  a66
+*
+*  Similarly, if UPLO = 'L' the format of A is as follows:
+*
+*     a11  a22  a33  a44  a55  a66
+*     a21  a32  a43  a54  a65   *
+*     a31  a42  a53  a64   *    *
+*
+*  Array elements marked * are not used by the routine.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            EQUIL, NOFACT, RCEQU, UPPER
+      INTEGER            I, INFEQU, J, J1, J2
+      REAL               AMAX, ANORM, BIGNUM, SCOND, SMAX, SMIN, SMLNUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               SLAMCH, SLANSB
+      EXTERNAL           LSAME, SLAMCH, SLANSB
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SCOPY, SLACPY, SLAQSB, SPBCON, SPBEQU, SPBRFS,
+     $                   SPBTRF, SPBTRS, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+      NOFACT = LSAME( FACT, 'N' )
+      EQUIL = LSAME( FACT, 'E' )
+      UPPER = LSAME( UPLO, 'U' )
+      IF( NOFACT .OR. EQUIL ) THEN
+         EQUED = 'N'
+         RCEQU = .FALSE.
+      ELSE
+         RCEQU = LSAME( EQUED, 'Y' )
+         SMLNUM = SLAMCH( 'Safe minimum' )
+         BIGNUM = ONE / SMLNUM
+      END IF
+*
+*     Test the input parameters.
+*
+      IF( .NOT.NOFACT .AND. .NOT.EQUIL .AND. .NOT.LSAME( FACT, 'F' ) )
+     $     THEN
+         INFO = -1
+      ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( KD.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( LDAB.LT.KD+1 ) THEN
+         INFO = -7
+      ELSE IF( LDAFB.LT.KD+1 ) THEN
+         INFO = -9
+      ELSE IF( LSAME( FACT, 'F' ) .AND. .NOT.
+     $         ( RCEQU .OR. LSAME( EQUED, 'N' ) ) ) THEN
+         INFO = -10
+      ELSE
+         IF( RCEQU ) THEN
+            SMIN = BIGNUM
+            SMAX = ZERO
+            DO 10 J = 1, N
+               SMIN = MIN( SMIN, S( J ) )
+               SMAX = MAX( SMAX, S( J ) )
+   10       CONTINUE
+            IF( SMIN.LE.ZERO ) THEN
+               INFO = -11
+            ELSE IF( N.GT.0 ) THEN
+               SCOND = MAX( SMIN, SMLNUM ) / MIN( SMAX, BIGNUM )
+            ELSE
+               SCOND = ONE
+            END IF
+         END IF
+         IF( INFO.EQ.0 ) THEN
+            IF( LDB.LT.MAX( 1, N ) ) THEN
+               INFO = -13
+            ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+               INFO = -15
+            END IF
+         END IF
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SPBSVX', -INFO )
+         RETURN
+      END IF
+*
+      IF( EQUIL ) THEN
+*
+*        Compute row and column scalings to equilibrate the matrix A.
+*
+         CALL SPBEQU( UPLO, N, KD, AB, LDAB, S, SCOND, AMAX, INFEQU )
+         IF( INFEQU.EQ.0 ) THEN
+*
+*           Equilibrate the matrix.
+*
+            CALL SLAQSB( UPLO, N, KD, AB, LDAB, S, SCOND, AMAX, EQUED )
+            RCEQU = LSAME( EQUED, 'Y' )
+         END IF
+      END IF
+*
+*     Scale the right-hand side.
+*
+      IF( RCEQU ) THEN
+         DO 30 J = 1, NRHS
+            DO 20 I = 1, N
+               B( I, J ) = S( I )*B( I, J )
+   20       CONTINUE
+   30    CONTINUE
+      END IF
+*
+      IF( NOFACT .OR. EQUIL ) THEN
+*
+*        Compute the Cholesky factorization A = U'*U or A = L*L'.
+*
+         IF( UPPER ) THEN
+            DO 40 J = 1, N
+               J1 = MAX( J-KD, 1 )
+               CALL SCOPY( J-J1+1, AB( KD+1-J+J1, J ), 1,
+     $                     AFB( KD+1-J+J1, J ), 1 )
+   40       CONTINUE
+         ELSE
+            DO 50 J = 1, N
+               J2 = MIN( J+KD, N )
+               CALL SCOPY( J2-J+1, AB( 1, J ), 1, AFB( 1, J ), 1 )
+   50       CONTINUE
+         END IF
+*
+         CALL SPBTRF( UPLO, N, KD, AFB, LDAFB, INFO )
+*
+*        Return if INFO is non-zero.
+*
+         IF( INFO.NE.0 ) THEN
+            IF( INFO.GT.0 )
+     $         RCOND = ZERO
+            RETURN
+         END IF
+      END IF
+*
+*     Compute the norm of the matrix A.
+*
+      ANORM = SLANSB( '1', UPLO, N, KD, AB, LDAB, WORK )
+*
+*     Compute the reciprocal of the condition number of A.
+*
+      CALL SPBCON( UPLO, N, KD, AFB, LDAFB, ANORM, RCOND, WORK, IWORK,
+     $             INFO )
+*
+*     Set INFO = N+1 if the matrix is singular to working precision.
+*
+      IF( RCOND.LT.SLAMCH( 'Epsilon' ) )
+     $   INFO = N + 1
+*
+*     Compute the solution matrix X.
+*
+      CALL SLACPY( 'Full', N, NRHS, B, LDB, X, LDX )
+      CALL SPBTRS( UPLO, N, KD, NRHS, AFB, LDAFB, X, LDX, INFO )
+*
+*     Use iterative refinement to improve the computed solution and
+*     compute error bounds and backward error estimates for it.
+*
+      CALL SPBRFS( UPLO, N, KD, NRHS, AB, LDAB, AFB, LDAFB, B, LDB, X,
+     $             LDX, FERR, BERR, WORK, IWORK, INFO )
+*
+*     Transform the solution matrix X to a solution of the original
+*     system.
+*
+      IF( RCEQU ) THEN
+         DO 70 J = 1, NRHS
+            DO 60 I = 1, N
+               X( I, J ) = S( I )*X( I, J )
+   60       CONTINUE
+   70    CONTINUE
+         DO 80 J = 1, NRHS
+            FERR( J ) = FERR( J ) / SCOND
+   80    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of SPBSVX
+*
+      END
diff --git a/costa/native/external/lapack/spbtf2.f b/costa/native/external/lapack/spbtf2.f
new file mode 100644
index 000000000..ba5e389b0
--- /dev/null
+++ b/costa/native/external/lapack/spbtf2.f
@@ -0,0 +1,195 @@
+      SUBROUTINE SPBTF2( UPLO, N, KD, AB, LDAB, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, KD, LDAB, N
+*     ..
+*     .. Array Arguments ..
+      REAL               AB( LDAB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SPBTF2 computes the Cholesky factorization of a real symmetric
+*  positive definite band matrix A.
+*
+*  The factorization has the form
+*     A = U' * U ,  if UPLO = 'U', or
+*     A = L  * L',  if UPLO = 'L',
+*  where U is an upper triangular matrix, U' is the transpose of U, and
+*  L is lower triangular.
+*
+*  This is the unblocked version of the algorithm, calling Level 2 BLAS.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the upper or lower triangular part of the
+*          symmetric matrix A is stored:
+*          = 'U':  Upper triangular
+*          = 'L':  Lower triangular
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  KD      (input) INTEGER
+*          The number of super-diagonals of the matrix A if UPLO = 'U',
+*          or the number of sub-diagonals if UPLO = 'L'.  KD >= 0.
+*
+*  AB      (input/output) REAL array, dimension (LDAB,N)
+*          On entry, the upper or lower triangle of the symmetric band
+*          matrix A, stored in the first KD+1 rows of the array.  The
+*          j-th column of A is stored in the j-th column of the array AB
+*          as follows:
+*          if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j;
+*          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+kd).
+*
+*          On exit, if INFO = 0, the triangular factor U or L from the
+*          Cholesky factorization A = U'*U or A = L*L' of the band
+*          matrix A, in the same storage format as A.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KD+1.
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -k, the k-th argument had an illegal value
+*          > 0: if INFO = k, the leading minor of order k is not
+*               positive definite, and the factorization could not be
+*               completed.
+*
+*  Further Details
+*  ===============
+*
+*  The band storage scheme is illustrated by the following example, when
+*  N = 6, KD = 2, and UPLO = 'U':
+*
+*  On entry:                       On exit:
+*
+*      *    *   a13  a24  a35  a46      *    *   u13  u24  u35  u46
+*      *   a12  a23  a34  a45  a56      *   u12  u23  u34  u45  u56
+*     a11  a22  a33  a44  a55  a66     u11  u22  u33  u44  u55  u66
+*
+*  Similarly, if UPLO = 'L' the format of A is as follows:
+*
+*  On entry:                       On exit:
+*
+*     a11  a22  a33  a44  a55  a66     l11  l22  l33  l44  l55  l66
+*     a21  a32  a43  a54  a65   *      l21  l32  l43  l54  l65   *
+*     a31  a42  a53  a64   *    *      l31  l42  l53  l64   *    *
+*
+*  Array elements marked * are not used by the routine.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            J, KLD, KN
+      REAL               AJJ
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SSCAL, SSYR, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( KD.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDAB.LT.KD+1 ) THEN
+         INFO = -5
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SPBTF2', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      KLD = MAX( 1, LDAB-1 )
+*
+      IF( UPPER ) THEN
+*
+*        Compute the Cholesky factorization A = U'*U.
+*
+         DO 10 J = 1, N
+*
+*           Compute U(J,J) and test for non-positive-definiteness.
+*
+            AJJ = AB( KD+1, J )
+            IF( AJJ.LE.ZERO )
+     $         GO TO 30
+            AJJ = SQRT( AJJ )
+            AB( KD+1, J ) = AJJ
+*
+*           Compute elements J+1:J+KN of row J and update the
+*           trailing submatrix within the band.
+*
+            KN = MIN( KD, N-J )
+            IF( KN.GT.0 ) THEN
+               CALL SSCAL( KN, ONE / AJJ, AB( KD, J+1 ), KLD )
+               CALL SSYR( 'Upper', KN, -ONE, AB( KD, J+1 ), KLD,
+     $                    AB( KD+1, J+1 ), KLD )
+            END IF
+   10    CONTINUE
+      ELSE
+*
+*        Compute the Cholesky factorization A = L*L'.
+*
+         DO 20 J = 1, N
+*
+*           Compute L(J,J) and test for non-positive-definiteness.
+*
+            AJJ = AB( 1, J )
+            IF( AJJ.LE.ZERO )
+     $         GO TO 30
+            AJJ = SQRT( AJJ )
+            AB( 1, J ) = AJJ
+*
+*           Compute elements J+1:J+KN of column J and update the
+*           trailing submatrix within the band.
+*
+            KN = MIN( KD, N-J )
+            IF( KN.GT.0 ) THEN
+               CALL SSCAL( KN, ONE / AJJ, AB( 2, J ), 1 )
+               CALL SSYR( 'Lower', KN, -ONE, AB( 2, J ), 1,
+     $                    AB( 1, J+1 ), KLD )
+            END IF
+   20    CONTINUE
+      END IF
+      RETURN
+*
+   30 CONTINUE
+      INFO = J
+      RETURN
+*
+*     End of SPBTF2
+*
+      END
diff --git a/costa/native/external/lapack/spbtrf.f b/costa/native/external/lapack/spbtrf.f
new file mode 100644
index 000000000..584b09f07
--- /dev/null
+++ b/costa/native/external/lapack/spbtrf.f
@@ -0,0 +1,365 @@
+      SUBROUTINE SPBTRF( UPLO, N, KD, AB, LDAB, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993 
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, KD, LDAB, N
+*     ..
+*     .. Array Arguments ..
+      REAL               AB( LDAB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SPBTRF computes the Cholesky factorization of a real symmetric
+*  positive definite band matrix A.
+*
+*  The factorization has the form
+*     A = U**T * U,  if UPLO = 'U', or
+*     A = L  * L**T,  if UPLO = 'L',
+*  where U is an upper triangular matrix and L is lower triangular.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  KD      (input) INTEGER
+*          The number of superdiagonals of the matrix A if UPLO = 'U',
+*          or the number of subdiagonals if UPLO = 'L'.  KD >= 0.
+*
+*  AB      (input/output) REAL array, dimension (LDAB,N)
+*          On entry, the upper or lower triangle of the symmetric band
+*          matrix A, stored in the first KD+1 rows of the array.  The
+*          j-th column of A is stored in the j-th column of the array AB
+*          as follows:
+*          if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j;
+*          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+kd).
+*
+*          On exit, if INFO = 0, the triangular factor U or L from the
+*          Cholesky factorization A = U**T*U or A = L*L**T of the band
+*          matrix A, in the same storage format as A.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KD+1.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, the leading minor of order i is not
+*                positive definite, and the factorization could not be
+*                completed.
+*
+*  Further Details
+*  ===============
+*
+*  The band storage scheme is illustrated by the following example, when
+*  N = 6, KD = 2, and UPLO = 'U':
+*
+*  On entry:                       On exit:
+*
+*      *    *   a13  a24  a35  a46      *    *   u13  u24  u35  u46
+*      *   a12  a23  a34  a45  a56      *   u12  u23  u34  u45  u56
+*     a11  a22  a33  a44  a55  a66     u11  u22  u33  u44  u55  u66
+*
+*  Similarly, if UPLO = 'L' the format of A is as follows:
+*
+*  On entry:                       On exit:
+*
+*     a11  a22  a33  a44  a55  a66     l11  l22  l33  l44  l55  l66
+*     a21  a32  a43  a54  a65   *      l21  l32  l43  l54  l65   *
+*     a31  a42  a53  a64   *    *      l31  l42  l53  l64   *    *
+*
+*  Array elements marked * are not used by the routine.
+*
+*  Contributed by
+*  Peter Mayes and Giuseppe Radicati, IBM ECSEC, Rome, March 23, 1989
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+      INTEGER            NBMAX, LDWORK
+      PARAMETER          ( NBMAX = 32, LDWORK = NBMAX+1 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, I2, I3, IB, II, J, JJ, NB
+*     ..
+*     .. Local Arrays ..
+      REAL               WORK( LDWORK, NBMAX )
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           LSAME, ILAENV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SGEMM, SPBTF2, SPOTF2, SSYRK, STRSM, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( ( .NOT.LSAME( UPLO, 'U' ) ) .AND.
+     $    ( .NOT.LSAME( UPLO, 'L' ) ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( KD.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDAB.LT.KD+1 ) THEN
+         INFO = -5
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SPBTRF', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Determine the block size for this environment
+*
+      NB = ILAENV( 1, 'SPBTRF', UPLO, N, KD, -1, -1 )
+*
+*     The block size must not exceed the semi-bandwidth KD, and must not
+*     exceed the limit set by the size of the local array WORK.
+*
+      NB = MIN( NB, NBMAX )
+*
+      IF( NB.LE.1 .OR. NB.GT.KD ) THEN
+*
+*        Use unblocked code
+*
+         CALL SPBTF2( UPLO, N, KD, AB, LDAB, INFO )
+      ELSE
+*
+*        Use blocked code
+*
+         IF( LSAME( UPLO, 'U' ) ) THEN
+*
+*           Compute the Cholesky factorization of a symmetric band
+*           matrix, given the upper triangle of the matrix in band
+*           storage.
+*
+*           Zero the upper triangle of the work array.
+*
+            DO 20 J = 1, NB
+               DO 10 I = 1, J - 1
+                  WORK( I, J ) = ZERO
+   10          CONTINUE
+   20       CONTINUE
+*
+*           Process the band matrix one diagonal block at a time.
+*
+            DO 70 I = 1, N, NB
+               IB = MIN( NB, N-I+1 )
+*
+*              Factorize the diagonal block
+*
+               CALL SPOTF2( UPLO, IB, AB( KD+1, I ), LDAB-1, II )
+               IF( II.NE.0 ) THEN
+                  INFO = I + II - 1
+                  GO TO 150
+               END IF
+               IF( I+IB.LE.N ) THEN
+*
+*                 Update the relevant part of the trailing submatrix.
+*                 If A11 denotes the diagonal block which has just been
+*                 factorized, then we need to update the remaining
+*                 blocks in the diagram:
+*
+*                    A11   A12   A13
+*                          A22   A23
+*                                A33
+*
+*                 The numbers of rows and columns in the partitioning
+*                 are IB, I2, I3 respectively. The blocks A12, A22 and
+*                 A23 are empty if IB = KD. The upper triangle of A13
+*                 lies outside the band.
+*
+                  I2 = MIN( KD-IB, N-I-IB+1 )
+                  I3 = MIN( IB, N-I-KD+1 )
+*
+                  IF( I2.GT.0 ) THEN
+*
+*                    Update A12
+*
+                     CALL STRSM( 'Left', 'Upper', 'Transpose',
+     $                           'Non-unit', IB, I2, ONE, AB( KD+1, I ),
+     $                           LDAB-1, AB( KD+1-IB, I+IB ), LDAB-1 )
+*
+*                    Update A22
+*
+                     CALL SSYRK( 'Upper', 'Transpose', I2, IB, -ONE,
+     $                           AB( KD+1-IB, I+IB ), LDAB-1, ONE,
+     $                           AB( KD+1, I+IB ), LDAB-1 )
+                  END IF
+*
+                  IF( I3.GT.0 ) THEN
+*
+*                    Copy the lower triangle of A13 into the work array.
+*
+                     DO 40 JJ = 1, I3
+                        DO 30 II = JJ, IB
+                           WORK( II, JJ ) = AB( II-JJ+1, JJ+I+KD-1 )
+   30                   CONTINUE
+   40                CONTINUE
+*
+*                    Update A13 (in the work array).
+*
+                     CALL STRSM( 'Left', 'Upper', 'Transpose',
+     $                           'Non-unit', IB, I3, ONE, AB( KD+1, I ),
+     $                           LDAB-1, WORK, LDWORK )
+*
+*                    Update A23
+*
+                     IF( I2.GT.0 )
+     $                  CALL SGEMM( 'Transpose', 'No Transpose', I2, I3,
+     $                              IB, -ONE, AB( KD+1-IB, I+IB ),
+     $                              LDAB-1, WORK, LDWORK, ONE,
+     $                              AB( 1+IB, I+KD ), LDAB-1 )
+*
+*                    Update A33
+*
+                     CALL SSYRK( 'Upper', 'Transpose', I3, IB, -ONE,
+     $                           WORK, LDWORK, ONE, AB( KD+1, I+KD ),
+     $                           LDAB-1 )
+*
+*                    Copy the lower triangle of A13 back into place.
+*
+                     DO 60 JJ = 1, I3
+                        DO 50 II = JJ, IB
+                           AB( II-JJ+1, JJ+I+KD-1 ) = WORK( II, JJ )
+   50                   CONTINUE
+   60                CONTINUE
+                  END IF
+               END IF
+   70       CONTINUE
+         ELSE
+*
+*           Compute the Cholesky factorization of a symmetric band
+*           matrix, given the lower triangle of the matrix in band
+*           storage.
+*
+*           Zero the lower triangle of the work array.
+*
+            DO 90 J = 1, NB
+               DO 80 I = J + 1, NB
+                  WORK( I, J ) = ZERO
+   80          CONTINUE
+   90       CONTINUE
+*
+*           Process the band matrix one diagonal block at a time.
+*
+            DO 140 I = 1, N, NB
+               IB = MIN( NB, N-I+1 )
+*
+*              Factorize the diagonal block
+*
+               CALL SPOTF2( UPLO, IB, AB( 1, I ), LDAB-1, II )
+               IF( II.NE.0 ) THEN
+                  INFO = I + II - 1
+                  GO TO 150
+               END IF
+               IF( I+IB.LE.N ) THEN
+*
+*                 Update the relevant part of the trailing submatrix.
+*                 If A11 denotes the diagonal block which has just been
+*                 factorized, then we need to update the remaining
+*                 blocks in the diagram:
+*
+*                    A11
+*                    A21   A22
+*                    A31   A32   A33
+*
+*                 The numbers of rows and columns in the partitioning
+*                 are IB, I2, I3 respectively. The blocks A21, A22 and
+*                 A32 are empty if IB = KD. The lower triangle of A31
+*                 lies outside the band.
+*
+                  I2 = MIN( KD-IB, N-I-IB+1 )
+                  I3 = MIN( IB, N-I-KD+1 )
+*
+                  IF( I2.GT.0 ) THEN
+*
+*                    Update A21
+*
+                     CALL STRSM( 'Right', 'Lower', 'Transpose',
+     $                           'Non-unit', I2, IB, ONE, AB( 1, I ),
+     $                           LDAB-1, AB( 1+IB, I ), LDAB-1 )
+*
+*                    Update A22
+*
+                     CALL SSYRK( 'Lower', 'No Transpose', I2, IB, -ONE,
+     $                           AB( 1+IB, I ), LDAB-1, ONE,
+     $                           AB( 1, I+IB ), LDAB-1 )
+                  END IF
+*
+                  IF( I3.GT.0 ) THEN
+*
+*                    Copy the upper triangle of A31 into the work array.
+*
+                     DO 110 JJ = 1, IB
+                        DO 100 II = 1, MIN( JJ, I3 )
+                           WORK( II, JJ ) = AB( KD+1-JJ+II, JJ+I-1 )
+  100                   CONTINUE
+  110                CONTINUE
+*
+*                    Update A31 (in the work array).
+*
+                     CALL STRSM( 'Right', 'Lower', 'Transpose',
+     $                           'Non-unit', I3, IB, ONE, AB( 1, I ),
+     $                           LDAB-1, WORK, LDWORK )
+*
+*                    Update A32
+*
+                     IF( I2.GT.0 )
+     $                  CALL SGEMM( 'No transpose', 'Transpose', I3, I2,
+     $                              IB, -ONE, WORK, LDWORK,
+     $                              AB( 1+IB, I ), LDAB-1, ONE,
+     $                              AB( 1+KD-IB, I+IB ), LDAB-1 )
+*
+*                    Update A33
+*
+                     CALL SSYRK( 'Lower', 'No Transpose', I3, IB, -ONE,
+     $                           WORK, LDWORK, ONE, AB( 1, I+KD ),
+     $                           LDAB-1 )
+*
+*                    Copy the upper triangle of A31 back into place.
+*
+                     DO 130 JJ = 1, IB
+                        DO 120 II = 1, MIN( JJ, I3 )
+                           AB( KD+1-JJ+II, JJ+I-1 ) = WORK( II, JJ )
+  120                   CONTINUE
+  130                CONTINUE
+                  END IF
+               END IF
+  140       CONTINUE
+         END IF
+      END IF
+      RETURN
+*
+  150 CONTINUE
+      RETURN
+*
+*     End of SPBTRF
+*
+      END
diff --git a/costa/native/external/lapack/spbtrs.f b/costa/native/external/lapack/spbtrs.f
new file mode 100644
index 000000000..a97776da1
--- /dev/null
+++ b/costa/native/external/lapack/spbtrs.f
@@ -0,0 +1,146 @@
+      SUBROUTINE SPBTRS( UPLO, N, KD, NRHS, AB, LDAB, B, LDB, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, KD, LDAB, LDB, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      REAL               AB( LDAB, * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SPBTRS solves a system of linear equations A*X = B with a symmetric
+*  positive definite band matrix A using the Cholesky factorization
+*  A = U**T*U or A = L*L**T computed by SPBTRF.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangular factor stored in AB;
+*          = 'L':  Lower triangular factor stored in AB.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  KD      (input) INTEGER
+*          The number of superdiagonals of the matrix A if UPLO = 'U',
+*          or the number of subdiagonals if UPLO = 'L'.  KD >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  AB      (input) REAL array, dimension (LDAB,N)
+*          The triangular factor U or L from the Cholesky factorization
+*          A = U**T*U or A = L*L**T of the band matrix A, stored in the
+*          first KD+1 rows of the array.  The j-th column of U or L is
+*          stored in the j-th column of the array AB as follows:
+*          if UPLO ='U', AB(kd+1+i-j,j) = U(i,j) for max(1,j-kd)<=i<=j;
+*          if UPLO ='L', AB(1+i-j,j)    = L(i,j) for j<=i<=min(n,j+kd).
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KD+1.
+*
+*  B       (input/output) REAL array, dimension (LDB,NRHS)
+*          On entry, the right hand side matrix B.
+*          On exit, the solution matrix X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            J
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           STBSV, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( KD.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDAB.LT.KD+1 ) THEN
+         INFO = -6
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -8
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SPBTRS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 )
+     $   RETURN
+*
+      IF( UPPER ) THEN
+*
+*        Solve A*X = B where A = U'*U.
+*
+         DO 10 J = 1, NRHS
+*
+*           Solve U'*X = B, overwriting B with X.
+*
+            CALL STBSV( 'Upper', 'Transpose', 'Non-unit', N, KD, AB,
+     $                  LDAB, B( 1, J ), 1 )
+*
+*           Solve U*X = B, overwriting B with X.
+*
+            CALL STBSV( 'Upper', 'No transpose', 'Non-unit', N, KD, AB,
+     $                  LDAB, B( 1, J ), 1 )
+   10    CONTINUE
+      ELSE
+*
+*        Solve A*X = B where A = L*L'.
+*
+         DO 20 J = 1, NRHS
+*
+*           Solve L*X = B, overwriting B with X.
+*
+            CALL STBSV( 'Lower', 'No transpose', 'Non-unit', N, KD, AB,
+     $                  LDAB, B( 1, J ), 1 )
+*
+*           Solve L'*X = B, overwriting B with X.
+*
+            CALL STBSV( 'Lower', 'Transpose', 'Non-unit', N, KD, AB,
+     $                  LDAB, B( 1, J ), 1 )
+   20    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of SPBTRS
+*
+      END
diff --git a/costa/native/external/lapack/spocon.f b/costa/native/external/lapack/spocon.f
new file mode 100644
index 000000000..db4cb775d
--- /dev/null
+++ b/costa/native/external/lapack/spocon.f
@@ -0,0 +1,173 @@
+      SUBROUTINE SPOCON( UPLO, N, A, LDA, ANORM, RCOND, WORK, IWORK,
+     $                   INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993 
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, N
+      REAL               ANORM, RCOND
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      REAL               A( LDA, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SPOCON estimates the reciprocal of the condition number (in the 
+*  1-norm) of a real symmetric positive definite matrix using the
+*  Cholesky factorization A = U**T*U or A = L*L**T computed by SPOTRF.
+*
+*  An estimate is obtained for norm(inv(A)), and the reciprocal of the
+*  condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))).
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input) REAL array, dimension (LDA,N)
+*          The triangular factor U or L from the Cholesky factorization
+*          A = U**T*U or A = L*L**T, as computed by SPOTRF.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  ANORM   (input) REAL
+*          The 1-norm (or infinity-norm) of the symmetric matrix A.
+*
+*  RCOND   (output) REAL
+*          The reciprocal of the condition number of the matrix A,
+*          computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an
+*          estimate of the 1-norm of inv(A) computed in this routine.
+*
+*  WORK    (workspace) REAL array, dimension (3*N)
+*
+*  IWORK   (workspace) INTEGER array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      CHARACTER          NORMIN
+      INTEGER            IX, KASE
+      REAL               AINVNM, SCALE, SCALEL, SCALEU, SMLNUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ISAMAX
+      REAL               SLAMCH
+      EXTERNAL           LSAME, ISAMAX, SLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLACON, SLATRS, SRSCL, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -4
+      ELSE IF( ANORM.LT.ZERO ) THEN
+         INFO = -5
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SPOCON', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      RCOND = ZERO
+      IF( N.EQ.0 ) THEN
+         RCOND = ONE
+         RETURN
+      ELSE IF( ANORM.EQ.ZERO ) THEN
+         RETURN
+      END IF
+*
+      SMLNUM = SLAMCH( 'Safe minimum' )
+*
+*     Estimate the 1-norm of inv(A).
+*
+      KASE = 0
+      NORMIN = 'N'
+   10 CONTINUE
+      CALL SLACON( N, WORK( N+1 ), WORK, IWORK, AINVNM, KASE )
+      IF( KASE.NE.0 ) THEN
+         IF( UPPER ) THEN
+*
+*           Multiply by inv(U').
+*
+            CALL SLATRS( 'Upper', 'Transpose', 'Non-unit', NORMIN, N, A,
+     $                   LDA, WORK, SCALEL, WORK( 2*N+1 ), INFO )
+            NORMIN = 'Y'
+*
+*           Multiply by inv(U).
+*
+            CALL SLATRS( 'Upper', 'No transpose', 'Non-unit', NORMIN, N,
+     $                   A, LDA, WORK, SCALEU, WORK( 2*N+1 ), INFO )
+         ELSE
+*
+*           Multiply by inv(L).
+*
+            CALL SLATRS( 'Lower', 'No transpose', 'Non-unit', NORMIN, N,
+     $                   A, LDA, WORK, SCALEL, WORK( 2*N+1 ), INFO )
+            NORMIN = 'Y'
+*
+*           Multiply by inv(L').
+*
+            CALL SLATRS( 'Lower', 'Transpose', 'Non-unit', NORMIN, N, A,
+     $                   LDA, WORK, SCALEU, WORK( 2*N+1 ), INFO )
+         END IF
+*
+*        Multiply by 1/SCALE if doing so will not cause overflow.
+*
+         SCALE = SCALEL*SCALEU
+         IF( SCALE.NE.ONE ) THEN
+            IX = ISAMAX( N, WORK, 1 )
+            IF( SCALE.LT.ABS( WORK( IX ) )*SMLNUM .OR. SCALE.EQ.ZERO )
+     $         GO TO 20
+            CALL SRSCL( N, SCALE, WORK, 1 )
+         END IF
+         GO TO 10
+      END IF
+*
+*     Compute the estimate of the reciprocal condition number.
+*
+      IF( AINVNM.NE.ZERO )
+     $   RCOND = ( ONE / AINVNM ) / ANORM
+*
+   20 CONTINUE
+      RETURN
+*
+*     End of SPOCON
+*
+      END
diff --git a/costa/native/external/lapack/spoequ.f b/costa/native/external/lapack/spoequ.f
new file mode 100644
index 000000000..eef4f5b66
--- /dev/null
+++ b/costa/native/external/lapack/spoequ.f
@@ -0,0 +1,137 @@
+      SUBROUTINE SPOEQU( N, A, LDA, S, SCOND, AMAX, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993 
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, N
+      REAL               AMAX, SCOND
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), S( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SPOEQU computes row and column scalings intended to equilibrate a
+*  symmetric positive definite matrix A and reduce its condition number
+*  (with respect to the two-norm).  S contains the scale factors,
+*  S(i) = 1/sqrt(A(i,i)), chosen so that the scaled matrix B with
+*  elements B(i,j) = S(i)*A(i,j)*S(j) has ones on the diagonal.  This
+*  choice of S puts the condition number of B within a factor N of the
+*  smallest possible condition number over all possible diagonal
+*  scalings.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input) REAL array, dimension (LDA,N)
+*          The N-by-N symmetric positive definite matrix whose scaling
+*          factors are to be computed.  Only the diagonal elements of A
+*          are referenced.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  S       (output) REAL array, dimension (N)
+*          If INFO = 0, S contains the scale factors for A.
+*
+*  SCOND   (output) REAL
+*          If INFO = 0, S contains the ratio of the smallest S(i) to
+*          the largest S(i).  If SCOND >= 0.1 and AMAX is neither too
+*          large nor too small, it is not worth scaling by S.
+*
+*  AMAX    (output) REAL
+*          Absolute value of largest matrix element.  If AMAX is very
+*          close to overflow or very close to underflow, the matrix
+*          should be scaled.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, the i-th diagonal element is nonpositive.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I
+      REAL               SMIN
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( N.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -3
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SPOEQU', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 ) THEN
+         SCOND = ONE
+         AMAX = ZERO
+         RETURN
+      END IF
+*
+*     Find the minimum and maximum diagonal elements.
+*
+      S( 1 ) = A( 1, 1 )
+      SMIN = S( 1 )
+      AMAX = S( 1 )
+      DO 10 I = 2, N
+         S( I ) = A( I, I )
+         SMIN = MIN( SMIN, S( I ) )
+         AMAX = MAX( AMAX, S( I ) )
+   10 CONTINUE
+*
+      IF( SMIN.LE.ZERO ) THEN
+*
+*        Find the first non-positive diagonal element and return.
+*
+         DO 20 I = 1, N
+            IF( S( I ).LE.ZERO ) THEN
+               INFO = I
+               RETURN
+            END IF
+   20    CONTINUE
+      ELSE
+*
+*        Set the scale factors to the reciprocals
+*        of the diagonal elements.
+*
+         DO 30 I = 1, N
+            S( I ) = ONE / SQRT( S( I ) )
+   30    CONTINUE
+*
+*        Compute SCOND = min(S(I)) / max(S(I))
+*
+         SCOND = SQRT( SMIN ) / SQRT( AMAX )
+      END IF
+      RETURN
+*
+*     End of SPOEQU
+*
+      END
diff --git a/costa/native/external/lapack/sporfs.f b/costa/native/external/lapack/sporfs.f
new file mode 100644
index 000000000..46e8021fa
--- /dev/null
+++ b/costa/native/external/lapack/sporfs.f
@@ -0,0 +1,327 @@
+      SUBROUTINE SPORFS( UPLO, N, NRHS, A, LDA, AF, LDAF, B, LDB, X,
+     $                   LDX, FERR, BERR, WORK, IWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, LDAF, LDB, LDX, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      REAL               A( LDA, * ), AF( LDAF, * ), B( LDB, * ),
+     $                   BERR( * ), FERR( * ), WORK( * ), X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SPORFS improves the computed solution to a system of linear
+*  equations when the coefficient matrix is symmetric positive definite,
+*  and provides error bounds and backward error estimates for the
+*  solution.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrices B and X.  NRHS >= 0.
+*
+*  A       (input) REAL array, dimension (LDA,N)
+*          The symmetric matrix A.  If UPLO = 'U', the leading N-by-N
+*          upper triangular part of A contains the upper triangular part
+*          of the matrix A, and the strictly lower triangular part of A
+*          is not referenced.  If UPLO = 'L', the leading N-by-N lower
+*          triangular part of A contains the lower triangular part of
+*          the matrix A, and the strictly upper triangular part of A is
+*          not referenced.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  AF      (input) REAL array, dimension (LDAF,N)
+*          The triangular factor U or L from the Cholesky factorization
+*          A = U**T*U or A = L*L**T, as computed by SPOTRF.
+*
+*  LDAF    (input) INTEGER
+*          The leading dimension of the array AF.  LDAF >= max(1,N).
+*
+*  B       (input) REAL array, dimension (LDB,NRHS)
+*          The right hand side matrix B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  X       (input/output) REAL array, dimension (LDX,NRHS)
+*          On entry, the solution matrix X, as computed by SPOTRS.
+*          On exit, the improved solution matrix X.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(1,N).
+*
+*  FERR    (output) REAL array, dimension (NRHS)
+*          The estimated forward error bound for each solution vector
+*          X(j) (the j-th column of the solution matrix X).
+*          If XTRUE is the true solution corresponding to X(j), FERR(j)
+*          is an estimated upper bound for the magnitude of the largest
+*          element in (X(j) - XTRUE) divided by the magnitude of the
+*          largest element in X(j).  The estimate is as reliable as
+*          the estimate for RCOND, and is almost always a slight
+*          overestimate of the true error.
+*
+*  BERR    (output) REAL array, dimension (NRHS)
+*          The componentwise relative backward error of each solution
+*          vector X(j) (i.e., the smallest relative change in
+*          any element of A or B that makes X(j) an exact solution).
+*
+*  WORK    (workspace) REAL array, dimension (3*N)
+*
+*  IWORK   (workspace) INTEGER array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Internal Parameters
+*  ===================
+*
+*  ITMAX is the maximum number of steps of iterative refinement.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            ITMAX
+      PARAMETER          ( ITMAX = 5 )
+      REAL               ZERO
+      PARAMETER          ( ZERO = 0.0E+0 )
+      REAL               ONE
+      PARAMETER          ( ONE = 1.0E+0 )
+      REAL               TWO
+      PARAMETER          ( TWO = 2.0E+0 )
+      REAL               THREE
+      PARAMETER          ( THREE = 3.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            COUNT, I, J, K, KASE, NZ
+      REAL               EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SAXPY, SCOPY, SLACON, SPOTRS, SSYMV, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               SLAMCH
+      EXTERNAL           LSAME, SLAMCH
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -9
+      ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+         INFO = -11
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SPORFS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN
+         DO 10 J = 1, NRHS
+            FERR( J ) = ZERO
+            BERR( J ) = ZERO
+   10    CONTINUE
+         RETURN
+      END IF
+*
+*     NZ = maximum number of nonzero elements in each row of A, plus 1
+*
+      NZ = N + 1
+      EPS = SLAMCH( 'Epsilon' )
+      SAFMIN = SLAMCH( 'Safe minimum' )
+      SAFE1 = NZ*SAFMIN
+      SAFE2 = SAFE1 / EPS
+*
+*     Do for each right hand side
+*
+      DO 140 J = 1, NRHS
+*
+         COUNT = 1
+         LSTRES = THREE
+   20    CONTINUE
+*
+*        Loop until stopping criterion is satisfied.
+*
+*        Compute residual R = B - A * X
+*
+         CALL SCOPY( N, B( 1, J ), 1, WORK( N+1 ), 1 )
+         CALL SSYMV( UPLO, N, -ONE, A, LDA, X( 1, J ), 1, ONE,
+     $               WORK( N+1 ), 1 )
+*
+*        Compute componentwise relative backward error from formula
+*
+*        max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) )
+*
+*        where abs(Z) is the componentwise absolute value of the matrix
+*        or vector Z.  If the i-th component of the denominator is less
+*        than SAFE2, then SAFE1 is added to the i-th components of the
+*        numerator and denominator before dividing.
+*
+         DO 30 I = 1, N
+            WORK( I ) = ABS( B( I, J ) )
+   30    CONTINUE
+*
+*        Compute abs(A)*abs(X) + abs(B).
+*
+         IF( UPPER ) THEN
+            DO 50 K = 1, N
+               S = ZERO
+               XK = ABS( X( K, J ) )
+               DO 40 I = 1, K - 1
+                  WORK( I ) = WORK( I ) + ABS( A( I, K ) )*XK
+                  S = S + ABS( A( I, K ) )*ABS( X( I, J ) )
+   40          CONTINUE
+               WORK( K ) = WORK( K ) + ABS( A( K, K ) )*XK + S
+   50       CONTINUE
+         ELSE
+            DO 70 K = 1, N
+               S = ZERO
+               XK = ABS( X( K, J ) )
+               WORK( K ) = WORK( K ) + ABS( A( K, K ) )*XK
+               DO 60 I = K + 1, N
+                  WORK( I ) = WORK( I ) + ABS( A( I, K ) )*XK
+                  S = S + ABS( A( I, K ) )*ABS( X( I, J ) )
+   60          CONTINUE
+               WORK( K ) = WORK( K ) + S
+   70       CONTINUE
+         END IF
+         S = ZERO
+         DO 80 I = 1, N
+            IF( WORK( I ).GT.SAFE2 ) THEN
+               S = MAX( S, ABS( WORK( N+I ) ) / WORK( I ) )
+            ELSE
+               S = MAX( S, ( ABS( WORK( N+I ) )+SAFE1 ) /
+     $             ( WORK( I )+SAFE1 ) )
+            END IF
+   80    CONTINUE
+         BERR( J ) = S
+*
+*        Test stopping criterion. Continue iterating if
+*           1) The residual BERR(J) is larger than machine epsilon, and
+*           2) BERR(J) decreased by at least a factor of 2 during the
+*              last iteration, and
+*           3) At most ITMAX iterations tried.
+*
+         IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND.
+     $       COUNT.LE.ITMAX ) THEN
+*
+*           Update solution and try again.
+*
+            CALL SPOTRS( UPLO, N, 1, AF, LDAF, WORK( N+1 ), N, INFO )
+            CALL SAXPY( N, ONE, WORK( N+1 ), 1, X( 1, J ), 1 )
+            LSTRES = BERR( J )
+            COUNT = COUNT + 1
+            GO TO 20
+         END IF
+*
+*        Bound error from formula
+*
+*        norm(X - XTRUE) / norm(X) .le. FERR =
+*        norm( abs(inv(A))*
+*           ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X)
+*
+*        where
+*          norm(Z) is the magnitude of the largest component of Z
+*          inv(A) is the inverse of A
+*          abs(Z) is the componentwise absolute value of the matrix or
+*             vector Z
+*          NZ is the maximum number of nonzeros in any row of A, plus 1
+*          EPS is machine epsilon
+*
+*        The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B))
+*        is incremented by SAFE1 if the i-th component of
+*        abs(A)*abs(X) + abs(B) is less than SAFE2.
+*
+*        Use SLACON to estimate the infinity-norm of the matrix
+*           inv(A) * diag(W),
+*        where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) )))
+*
+         DO 90 I = 1, N
+            IF( WORK( I ).GT.SAFE2 ) THEN
+               WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I )
+            ELSE
+               WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + SAFE1
+            END IF
+   90    CONTINUE
+*
+         KASE = 0
+  100    CONTINUE
+         CALL SLACON( N, WORK( 2*N+1 ), WORK( N+1 ), IWORK, FERR( J ),
+     $                KASE )
+         IF( KASE.NE.0 ) THEN
+            IF( KASE.EQ.1 ) THEN
+*
+*              Multiply by diag(W)*inv(A').
+*
+               CALL SPOTRS( UPLO, N, 1, AF, LDAF, WORK( N+1 ), N, INFO )
+               DO 110 I = 1, N
+                  WORK( N+I ) = WORK( I )*WORK( N+I )
+  110          CONTINUE
+            ELSE IF( KASE.EQ.2 ) THEN
+*
+*              Multiply by inv(A)*diag(W).
+*
+               DO 120 I = 1, N
+                  WORK( N+I ) = WORK( I )*WORK( N+I )
+  120          CONTINUE
+               CALL SPOTRS( UPLO, N, 1, AF, LDAF, WORK( N+1 ), N, INFO )
+            END IF
+            GO TO 100
+         END IF
+*
+*        Normalize error.
+*
+         LSTRES = ZERO
+         DO 130 I = 1, N
+            LSTRES = MAX( LSTRES, ABS( X( I, J ) ) )
+  130    CONTINUE
+         IF( LSTRES.NE.ZERO )
+     $      FERR( J ) = FERR( J ) / LSTRES
+*
+  140 CONTINUE
+*
+      RETURN
+*
+*     End of SPORFS
+*
+      END
diff --git a/costa/native/external/lapack/sposv.f b/costa/native/external/lapack/sposv.f
new file mode 100644
index 000000000..cd06e2f29
--- /dev/null
+++ b/costa/native/external/lapack/sposv.f
@@ -0,0 +1,122 @@
+      SUBROUTINE SPOSV( UPLO, N, NRHS, A, LDA, B, LDB, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993 
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, LDB, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SPOSV computes the solution to a real system of linear equations
+*     A * X = B,
+*  where A is an N-by-N symmetric positive definite matrix and X and B
+*  are N-by-NRHS matrices.
+*
+*  The Cholesky decomposition is used to factor A as
+*     A = U**T* U,  if UPLO = 'U', or
+*     A = L * L**T,  if UPLO = 'L',
+*  where U is an upper triangular matrix and L is a lower triangular
+*  matrix.  The factored form of A is then used to solve the system of
+*  equations A * X = B.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The number of linear equations, i.e., the order of the
+*          matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  A       (input/output) REAL array, dimension (LDA,N)
+*          On entry, the symmetric matrix A.  If UPLO = 'U', the leading
+*          N-by-N upper triangular part of A contains the upper
+*          triangular part of the matrix A, and the strictly lower
+*          triangular part of A is not referenced.  If UPLO = 'L', the
+*          leading N-by-N lower triangular part of A contains the lower
+*          triangular part of the matrix A, and the strictly upper
+*          triangular part of A is not referenced.
+*
+*          On exit, if INFO = 0, the factor U or L from the Cholesky
+*          factorization A = U**T*U or A = L*L**T.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  B       (input/output) REAL array, dimension (LDB,NRHS)
+*          On entry, the N-by-NRHS right hand side matrix B.
+*          On exit, if INFO = 0, the N-by-NRHS solution matrix X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, the leading minor of order i of A is not
+*                positive definite, so the factorization could not be
+*                completed, and the solution has not been computed.
+*
+*  =====================================================================
+*
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SPOTRF, SPOTRS, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SPOSV ', -INFO )
+         RETURN
+      END IF
+*
+*     Compute the Cholesky factorization A = U'*U or A = L*L'.
+*
+      CALL SPOTRF( UPLO, N, A, LDA, INFO )
+      IF( INFO.EQ.0 ) THEN
+*
+*        Solve the system A*X = B, overwriting B with X.
+*
+         CALL SPOTRS( UPLO, N, NRHS, A, LDA, B, LDB, INFO )
+*
+      END IF
+      RETURN
+*
+*     End of SPOSV
+*
+      END
diff --git a/costa/native/external/lapack/sposvx.f b/costa/native/external/lapack/sposvx.f
new file mode 100644
index 000000000..ecd4b8d3a
--- /dev/null
+++ b/costa/native/external/lapack/sposvx.f
@@ -0,0 +1,379 @@
+      SUBROUTINE SPOSVX( FACT, UPLO, N, NRHS, A, LDA, AF, LDAF, EQUED,
+     $                   S, B, LDB, X, LDX, RCOND, FERR, BERR, WORK,
+     $                   IWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          EQUED, FACT, UPLO
+      INTEGER            INFO, LDA, LDAF, LDB, LDX, N, NRHS
+      REAL               RCOND
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      REAL               A( LDA, * ), AF( LDAF, * ), B( LDB, * ),
+     $                   BERR( * ), FERR( * ), S( * ), WORK( * ),
+     $                   X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SPOSVX uses the Cholesky factorization A = U**T*U or A = L*L**T to
+*  compute the solution to a real system of linear equations
+*     A * X = B,
+*  where A is an N-by-N symmetric positive definite matrix and X and B
+*  are N-by-NRHS matrices.
+*
+*  Error bounds on the solution and a condition estimate are also
+*  provided.
+*
+*  Description
+*  ===========
+*
+*  The following steps are performed:
+*
+*  1. If FACT = 'E', real scaling factors are computed to equilibrate
+*     the system:
+*        diag(S) * A * diag(S) * inv(diag(S)) * X = diag(S) * B
+*     Whether or not the system will be equilibrated depends on the
+*     scaling of the matrix A, but if equilibration is used, A is
+*     overwritten by diag(S)*A*diag(S) and B by diag(S)*B.
+*
+*  2. If FACT = 'N' or 'E', the Cholesky decomposition is used to
+*     factor the matrix A (after equilibration if FACT = 'E') as
+*        A = U**T* U,  if UPLO = 'U', or
+*        A = L * L**T,  if UPLO = 'L',
+*     where U is an upper triangular matrix and L is a lower triangular
+*     matrix.
+*
+*  3. If the leading i-by-i principal minor is not positive definite,
+*     then the routine returns with INFO = i. Otherwise, the factored
+*     form of A is used to estimate the condition number of the matrix
+*     A.  If the reciprocal of the condition number is less than machine
+*     precision, INFO = N+1 is returned as a warning, but the routine
+*     still goes on to solve for X and compute error bounds as
+*     described below.
+*
+*  4. The system of equations is solved for X using the factored form
+*     of A.
+*
+*  5. Iterative refinement is applied to improve the computed solution
+*     matrix and calculate error bounds and backward error estimates
+*     for it.
+*
+*  6. If equilibration was used, the matrix X is premultiplied by
+*     diag(S) so that it solves the original system before
+*     equilibration.
+*
+*  Arguments
+*  =========
+*
+*  FACT    (input) CHARACTER*1
+*          Specifies whether or not the factored form of the matrix A is
+*          supplied on entry, and if not, whether the matrix A should be
+*          equilibrated before it is factored.
+*          = 'F':  On entry, AF contains the factored form of A.
+*                  If EQUED = 'Y', the matrix A has been equilibrated
+*                  with scaling factors given by S.  A and AF will not
+*                  be modified.
+*          = 'N':  The matrix A will be copied to AF and factored.
+*          = 'E':  The matrix A will be equilibrated if necessary, then
+*                  copied to AF and factored.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The number of linear equations, i.e., the order of the
+*          matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrices B and X.  NRHS >= 0.
+*
+*  A       (input/output) REAL array, dimension (LDA,N)
+*          On entry, the symmetric matrix A, except if FACT = 'F' and
+*          EQUED = 'Y', then A must contain the equilibrated matrix
+*          diag(S)*A*diag(S).  If UPLO = 'U', the leading
+*          N-by-N upper triangular part of A contains the upper
+*          triangular part of the matrix A, and the strictly lower
+*          triangular part of A is not referenced.  If UPLO = 'L', the
+*          leading N-by-N lower triangular part of A contains the lower
+*          triangular part of the matrix A, and the strictly upper
+*          triangular part of A is not referenced.  A is not modified if
+*          FACT = 'F' or 'N', or if FACT = 'E' and EQUED = 'N' on exit.
+*
+*          On exit, if FACT = 'E' and EQUED = 'Y', A is overwritten by
+*          diag(S)*A*diag(S).
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  AF      (input or output) REAL array, dimension (LDAF,N)
+*          If FACT = 'F', then AF is an input argument and on entry
+*          contains the triangular factor U or L from the Cholesky
+*          factorization A = U**T*U or A = L*L**T, in the same storage
+*          format as A.  If EQUED .ne. 'N', then AF is the factored form
+*          of the equilibrated matrix diag(S)*A*diag(S).
+*
+*          If FACT = 'N', then AF is an output argument and on exit
+*          returns the triangular factor U or L from the Cholesky
+*          factorization A = U**T*U or A = L*L**T of the original
+*          matrix A.
+*
+*          If FACT = 'E', then AF is an output argument and on exit
+*          returns the triangular factor U or L from the Cholesky
+*          factorization A = U**T*U or A = L*L**T of the equilibrated
+*          matrix A (see the description of A for the form of the
+*          equilibrated matrix).
+*
+*  LDAF    (input) INTEGER
+*          The leading dimension of the array AF.  LDAF >= max(1,N).
+*
+*  EQUED   (input or output) CHARACTER*1
+*          Specifies the form of equilibration that was done.
+*          = 'N':  No equilibration (always true if FACT = 'N').
+*          = 'Y':  Equilibration was done, i.e., A has been replaced by
+*                  diag(S) * A * diag(S).
+*          EQUED is an input argument if FACT = 'F'; otherwise, it is an
+*          output argument.
+*
+*  S       (input or output) REAL array, dimension (N)
+*          The scale factors for A; not accessed if EQUED = 'N'.  S is
+*          an input argument if FACT = 'F'; otherwise, S is an output
+*          argument.  If FACT = 'F' and EQUED = 'Y', each element of S
+*          must be positive.
+*
+*  B       (input/output) REAL array, dimension (LDB,NRHS)
+*          On entry, the N-by-NRHS right hand side matrix B.
+*          On exit, if EQUED = 'N', B is not modified; if EQUED = 'Y',
+*          B is overwritten by diag(S) * B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  X       (output) REAL array, dimension (LDX,NRHS)
+*          If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X to
+*          the original system of equations.  Note that if EQUED = 'Y',
+*          A and B are modified on exit, and the solution to the
+*          equilibrated system is inv(diag(S))*X.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(1,N).
+*
+*  RCOND   (output) REAL
+*          The estimate of the reciprocal condition number of the matrix
+*          A after equilibration (if done).  If RCOND is less than the
+*          machine precision (in particular, if RCOND = 0), the matrix
+*          is singular to working precision.  This condition is
+*          indicated by a return code of INFO > 0.
+*
+*  FERR    (output) REAL array, dimension (NRHS)
+*          The estimated forward error bound for each solution vector
+*          X(j) (the j-th column of the solution matrix X).
+*          If XTRUE is the true solution corresponding to X(j), FERR(j)
+*          is an estimated upper bound for the magnitude of the largest
+*          element in (X(j) - XTRUE) divided by the magnitude of the
+*          largest element in X(j).  The estimate is as reliable as
+*          the estimate for RCOND, and is almost always a slight
+*          overestimate of the true error.
+*
+*  BERR    (output) REAL array, dimension (NRHS)
+*          The componentwise relative backward error of each solution
+*          vector X(j) (i.e., the smallest relative change in
+*          any element of A or B that makes X(j) an exact solution).
+*
+*  WORK    (workspace) REAL array, dimension (3*N)
+*
+*  IWORK   (workspace) INTEGER array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*          > 0: if INFO = i, and i is
+*                <= N:  the leading minor of order i of A is
+*                       not positive definite, so the factorization
+*                       could not be completed, and the solution has not
+*                       been computed. RCOND = 0 is returned.
+*                = N+1: U is nonsingular, but RCOND is less than machine
+*                       precision, meaning that the matrix is singular
+*                       to working precision.  Nevertheless, the
+*                       solution and error bounds are computed because
+*                       there are a number of situations where the
+*                       computed solution can be more accurate than the
+*                       value of RCOND would suggest.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            EQUIL, NOFACT, RCEQU
+      INTEGER            I, INFEQU, J
+      REAL               AMAX, ANORM, BIGNUM, SCOND, SMAX, SMIN, SMLNUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               SLAMCH, SLANSY
+      EXTERNAL           LSAME, SLAMCH, SLANSY
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLACPY, SLAQSY, SPOCON, SPOEQU, SPORFS, SPOTRF,
+     $                   SPOTRS, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+      NOFACT = LSAME( FACT, 'N' )
+      EQUIL = LSAME( FACT, 'E' )
+      IF( NOFACT .OR. EQUIL ) THEN
+         EQUED = 'N'
+         RCEQU = .FALSE.
+      ELSE
+         RCEQU = LSAME( EQUED, 'Y' )
+         SMLNUM = SLAMCH( 'Safe minimum' )
+         BIGNUM = ONE / SMLNUM
+      END IF
+*
+*     Test the input parameters.
+*
+      IF( .NOT.NOFACT .AND. .NOT.EQUIL .AND. .NOT.LSAME( FACT, 'F' ) )
+     $     THEN
+         INFO = -1
+      ELSE IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) )
+     $          THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -6
+      ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN
+         INFO = -8
+      ELSE IF( LSAME( FACT, 'F' ) .AND. .NOT.
+     $         ( RCEQU .OR. LSAME( EQUED, 'N' ) ) ) THEN
+         INFO = -9
+      ELSE
+         IF( RCEQU ) THEN
+            SMIN = BIGNUM
+            SMAX = ZERO
+            DO 10 J = 1, N
+               SMIN = MIN( SMIN, S( J ) )
+               SMAX = MAX( SMAX, S( J ) )
+   10       CONTINUE
+            IF( SMIN.LE.ZERO ) THEN
+               INFO = -10
+            ELSE IF( N.GT.0 ) THEN
+               SCOND = MAX( SMIN, SMLNUM ) / MIN( SMAX, BIGNUM )
+            ELSE
+               SCOND = ONE
+            END IF
+         END IF
+         IF( INFO.EQ.0 ) THEN
+            IF( LDB.LT.MAX( 1, N ) ) THEN
+               INFO = -12
+            ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+               INFO = -14
+            END IF
+         END IF
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SPOSVX', -INFO )
+         RETURN
+      END IF
+*
+      IF( EQUIL ) THEN
+*
+*        Compute row and column scalings to equilibrate the matrix A.
+*
+         CALL SPOEQU( N, A, LDA, S, SCOND, AMAX, INFEQU )
+         IF( INFEQU.EQ.0 ) THEN
+*
+*           Equilibrate the matrix.
+*
+            CALL SLAQSY( UPLO, N, A, LDA, S, SCOND, AMAX, EQUED )
+            RCEQU = LSAME( EQUED, 'Y' )
+         END IF
+      END IF
+*
+*     Scale the right hand side.
+*
+      IF( RCEQU ) THEN
+         DO 30 J = 1, NRHS
+            DO 20 I = 1, N
+               B( I, J ) = S( I )*B( I, J )
+   20       CONTINUE
+   30    CONTINUE
+      END IF
+*
+      IF( NOFACT .OR. EQUIL ) THEN
+*
+*        Compute the Cholesky factorization A = U'*U or A = L*L'.
+*
+         CALL SLACPY( UPLO, N, N, A, LDA, AF, LDAF )
+         CALL SPOTRF( UPLO, N, AF, LDAF, INFO )
+*
+*        Return if INFO is non-zero.
+*
+         IF( INFO.NE.0 ) THEN
+            IF( INFO.GT.0 )
+     $         RCOND = ZERO
+            RETURN
+         END IF
+      END IF
+*
+*     Compute the norm of the matrix A.
+*
+      ANORM = SLANSY( '1', UPLO, N, A, LDA, WORK )
+*
+*     Compute the reciprocal of the condition number of A.
+*
+      CALL SPOCON( UPLO, N, AF, LDAF, ANORM, RCOND, WORK, IWORK, INFO )
+*
+*     Set INFO = N+1 if the matrix is singular to working precision.
+*
+      IF( RCOND.LT.SLAMCH( 'Epsilon' ) )
+     $   INFO = N + 1
+*
+*     Compute the solution matrix X.
+*
+      CALL SLACPY( 'Full', N, NRHS, B, LDB, X, LDX )
+      CALL SPOTRS( UPLO, N, NRHS, AF, LDAF, X, LDX, INFO )
+*
+*     Use iterative refinement to improve the computed solution and
+*     compute error bounds and backward error estimates for it.
+*
+      CALL SPORFS( UPLO, N, NRHS, A, LDA, AF, LDAF, B, LDB, X, LDX,
+     $             FERR, BERR, WORK, IWORK, INFO )
+*
+*     Transform the solution matrix X to a solution of the original
+*     system.
+*
+      IF( RCEQU ) THEN
+         DO 50 J = 1, NRHS
+            DO 40 I = 1, N
+               X( I, J ) = S( I )*X( I, J )
+   40       CONTINUE
+   50    CONTINUE
+         DO 60 J = 1, NRHS
+            FERR( J ) = FERR( J ) / SCOND
+   60    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of SPOSVX
+*
+      END
diff --git a/costa/native/external/lapack/spotf2.f b/costa/native/external/lapack/spotf2.f
new file mode 100644
index 000000000..34fb5af51
--- /dev/null
+++ b/costa/native/external/lapack/spotf2.f
@@ -0,0 +1,168 @@
+      SUBROUTINE SPOTF2( UPLO, N, A, LDA, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, N
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SPOTF2 computes the Cholesky factorization of a real symmetric
+*  positive definite matrix A.
+*
+*  The factorization has the form
+*     A = U' * U ,  if UPLO = 'U', or
+*     A = L  * L',  if UPLO = 'L',
+*  where U is an upper triangular matrix and L is lower triangular.
+*
+*  This is the unblocked version of the algorithm, calling Level 2 BLAS.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the upper or lower triangular part of the
+*          symmetric matrix A is stored.
+*          = 'U':  Upper triangular
+*          = 'L':  Lower triangular
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input/output) REAL array, dimension (LDA,N)
+*          On entry, the symmetric matrix A.  If UPLO = 'U', the leading
+*          n by n upper triangular part of A contains the upper
+*          triangular part of the matrix A, and the strictly lower
+*          triangular part of A is not referenced.  If UPLO = 'L', the
+*          leading n by n lower triangular part of A contains the lower
+*          triangular part of the matrix A, and the strictly upper
+*          triangular part of A is not referenced.
+*
+*          On exit, if INFO = 0, the factor U or L from the Cholesky
+*          factorization A = U'*U  or A = L*L'.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -k, the k-th argument had an illegal value
+*          > 0: if INFO = k, the leading minor of order k is not
+*               positive definite, and the factorization could not be
+*               completed.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            J
+      REAL               AJJ
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               SDOT
+      EXTERNAL           LSAME, SDOT
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SGEMV, SSCAL, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SPOTF2', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      IF( UPPER ) THEN
+*
+*        Compute the Cholesky factorization A = U'*U.
+*
+         DO 10 J = 1, N
+*
+*           Compute U(J,J) and test for non-positive-definiteness.
+*
+            AJJ = A( J, J ) - SDOT( J-1, A( 1, J ), 1, A( 1, J ), 1 )
+            IF( AJJ.LE.ZERO ) THEN
+               A( J, J ) = AJJ
+               GO TO 30
+            END IF
+            AJJ = SQRT( AJJ )
+            A( J, J ) = AJJ
+*
+*           Compute elements J+1:N of row J.
+*
+            IF( J.LT.N ) THEN
+               CALL SGEMV( 'Transpose', J-1, N-J, -ONE, A( 1, J+1 ),
+     $                     LDA, A( 1, J ), 1, ONE, A( J, J+1 ), LDA )
+               CALL SSCAL( N-J, ONE / AJJ, A( J, J+1 ), LDA )
+            END IF
+   10    CONTINUE
+      ELSE
+*
+*        Compute the Cholesky factorization A = L*L'.
+*
+         DO 20 J = 1, N
+*
+*           Compute L(J,J) and test for non-positive-definiteness.
+*
+            AJJ = A( J, J ) - SDOT( J-1, A( J, 1 ), LDA, A( J, 1 ),
+     $            LDA )
+            IF( AJJ.LE.ZERO ) THEN
+               A( J, J ) = AJJ
+               GO TO 30
+            END IF
+            AJJ = SQRT( AJJ )
+            A( J, J ) = AJJ
+*
+*           Compute elements J+1:N of column J.
+*
+            IF( J.LT.N ) THEN
+               CALL SGEMV( 'No transpose', N-J, J-1, -ONE, A( J+1, 1 ),
+     $                     LDA, A( J, 1 ), LDA, ONE, A( J+1, J ), 1 )
+               CALL SSCAL( N-J, ONE / AJJ, A( J+1, J ), 1 )
+            END IF
+   20    CONTINUE
+      END IF
+      GO TO 40
+*
+   30 CONTINUE
+      INFO = J
+*
+   40 CONTINUE
+      RETURN
+*
+*     End of SPOTF2
+*
+      END
diff --git a/costa/native/external/lapack/spotrf.f b/costa/native/external/lapack/spotrf.f
new file mode 100644
index 000000000..c97642532
--- /dev/null
+++ b/costa/native/external/lapack/spotrf.f
@@ -0,0 +1,184 @@
+      SUBROUTINE SPOTRF( UPLO, N, A, LDA, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993 
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, N
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SPOTRF computes the Cholesky factorization of a real symmetric
+*  positive definite matrix A.
+*
+*  The factorization has the form
+*     A = U**T * U,  if UPLO = 'U', or
+*     A = L  * L**T,  if UPLO = 'L',
+*  where U is an upper triangular matrix and L is lower triangular.
+*
+*  This is the block version of the algorithm, calling Level 3 BLAS.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input/output) REAL array, dimension (LDA,N)
+*          On entry, the symmetric matrix A.  If UPLO = 'U', the leading
+*          N-by-N upper triangular part of A contains the upper
+*          triangular part of the matrix A, and the strictly lower
+*          triangular part of A is not referenced.  If UPLO = 'L', the
+*          leading N-by-N lower triangular part of A contains the lower
+*          triangular part of the matrix A, and the strictly upper
+*          triangular part of A is not referenced.
+*
+*          On exit, if INFO = 0, the factor U or L from the Cholesky
+*          factorization A = U**T*U or A = L*L**T.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, the leading minor of order i is not
+*                positive definite, and the factorization could not be
+*                completed.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE
+      PARAMETER          ( ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            J, JB, NB
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           LSAME, ILAENV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SGEMM, SPOTF2, SSYRK, STRSM, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SPOTRF', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Determine the block size for this environment.
+*
+      NB = ILAENV( 1, 'SPOTRF', UPLO, N, -1, -1, -1 )
+      IF( NB.LE.1 .OR. NB.GE.N ) THEN
+*
+*        Use unblocked code.
+*
+         CALL SPOTF2( UPLO, N, A, LDA, INFO )
+      ELSE
+*
+*        Use blocked code.
+*
+         IF( UPPER ) THEN
+*
+*           Compute the Cholesky factorization A = U'*U.
+*
+            DO 10 J = 1, N, NB
+*
+*              Update and factorize the current diagonal block and test
+*              for non-positive-definiteness.
+*
+               JB = MIN( NB, N-J+1 )
+               CALL SSYRK( 'Upper', 'Transpose', JB, J-1, -ONE,
+     $                     A( 1, J ), LDA, ONE, A( J, J ), LDA )
+               CALL SPOTF2( 'Upper', JB, A( J, J ), LDA, INFO )
+               IF( INFO.NE.0 )
+     $            GO TO 30
+               IF( J+JB.LE.N ) THEN
+*
+*                 Compute the current block row.
+*
+                  CALL SGEMM( 'Transpose', 'No transpose', JB, N-J-JB+1,
+     $                        J-1, -ONE, A( 1, J ), LDA, A( 1, J+JB ),
+     $                        LDA, ONE, A( J, J+JB ), LDA )
+                  CALL STRSM( 'Left', 'Upper', 'Transpose', 'Non-unit',
+     $                        JB, N-J-JB+1, ONE, A( J, J ), LDA,
+     $                        A( J, J+JB ), LDA )
+               END IF
+   10       CONTINUE
+*
+         ELSE
+*
+*           Compute the Cholesky factorization A = L*L'.
+*
+            DO 20 J = 1, N, NB
+*
+*              Update and factorize the current diagonal block and test
+*              for non-positive-definiteness.
+*
+               JB = MIN( NB, N-J+1 )
+               CALL SSYRK( 'Lower', 'No transpose', JB, J-1, -ONE,
+     $                     A( J, 1 ), LDA, ONE, A( J, J ), LDA )
+               CALL SPOTF2( 'Lower', JB, A( J, J ), LDA, INFO )
+               IF( INFO.NE.0 )
+     $            GO TO 30
+               IF( J+JB.LE.N ) THEN
+*
+*                 Compute the current block column.
+*
+                  CALL SGEMM( 'No transpose', 'Transpose', N-J-JB+1, JB,
+     $                        J-1, -ONE, A( J+JB, 1 ), LDA, A( J, 1 ),
+     $                        LDA, ONE, A( J+JB, J ), LDA )
+                  CALL STRSM( 'Right', 'Lower', 'Transpose', 'Non-unit',
+     $                        N-J-JB+1, JB, ONE, A( J, J ), LDA,
+     $                        A( J+JB, J ), LDA )
+               END IF
+   20       CONTINUE
+         END IF
+      END IF
+      GO TO 40
+*
+   30 CONTINUE
+      INFO = INFO + J - 1
+*
+   40 CONTINUE
+      RETURN
+*
+*     End of SPOTRF
+*
+      END
diff --git a/costa/native/external/lapack/spotri.f b/costa/native/external/lapack/spotri.f
new file mode 100644
index 000000000..44acd05c1
--- /dev/null
+++ b/costa/native/external/lapack/spotri.f
@@ -0,0 +1,97 @@
+      SUBROUTINE SPOTRI( UPLO, N, A, LDA, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993 
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, N
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SPOTRI computes the inverse of a real symmetric positive definite
+*  matrix A using the Cholesky factorization A = U**T*U or A = L*L**T
+*  computed by SPOTRF.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input/output) REAL array, dimension (LDA,N)
+*          On entry, the triangular factor U or L from the Cholesky
+*          factorization A = U**T*U or A = L*L**T, as computed by
+*          SPOTRF.
+*          On exit, the upper or lower triangle of the (symmetric)
+*          inverse of A, overwriting the input factor U or L.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, the (i,i) element of the factor U or L is
+*                zero, and the inverse could not be computed.
+*
+*  =====================================================================
+*
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLAUUM, STRTRI, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SPOTRI', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Invert the triangular Cholesky factor U or L.
+*
+      CALL STRTRI( UPLO, 'Non-unit', N, A, LDA, INFO )
+      IF( INFO.GT.0 )
+     $   RETURN
+*
+*     Form inv(U)*inv(U)' or inv(L)'*inv(L).
+*
+      CALL SLAUUM( UPLO, N, A, LDA, INFO )
+*
+      RETURN
+*
+*     End of SPOTRI
+*
+      END
diff --git a/costa/native/external/lapack/spotrs.f b/costa/native/external/lapack/spotrs.f
new file mode 100644
index 000000000..4c1fc3665
--- /dev/null
+++ b/costa/native/external/lapack/spotrs.f
@@ -0,0 +1,133 @@
+      SUBROUTINE SPOTRS( UPLO, N, NRHS, A, LDA, B, LDB, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993 
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, LDB, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SPOTRS solves a system of linear equations A*X = B with a symmetric
+*  positive definite matrix A using the Cholesky factorization
+*  A = U**T*U or A = L*L**T computed by SPOTRF.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  A       (input) REAL array, dimension (LDA,N)
+*          The triangular factor U or L from the Cholesky factorization
+*          A = U**T*U or A = L*L**T, as computed by SPOTRF.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  B       (input/output) REAL array, dimension (LDB,NRHS)
+*          On entry, the right hand side matrix B.
+*          On exit, the solution matrix X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE
+      PARAMETER          ( ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           STRSM, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SPOTRS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 )
+     $   RETURN
+*
+      IF( UPPER ) THEN
+*
+*        Solve A*X = B where A = U'*U.
+*
+*        Solve U'*X = B, overwriting B with X.
+*
+         CALL STRSM( 'Left', 'Upper', 'Transpose', 'Non-unit', N, NRHS,
+     $               ONE, A, LDA, B, LDB )
+*
+*        Solve U*X = B, overwriting B with X.
+*
+         CALL STRSM( 'Left', 'Upper', 'No transpose', 'Non-unit', N,
+     $               NRHS, ONE, A, LDA, B, LDB )
+      ELSE
+*
+*        Solve A*X = B where A = L*L'.
+*
+*        Solve L*X = B, overwriting B with X.
+*
+         CALL STRSM( 'Left', 'Lower', 'No transpose', 'Non-unit', N,
+     $               NRHS, ONE, A, LDA, B, LDB )
+*
+*        Solve L'*X = B, overwriting B with X.
+*
+         CALL STRSM( 'Left', 'Lower', 'Transpose', 'Non-unit', N, NRHS,
+     $               ONE, A, LDA, B, LDB )
+      END IF
+*
+      RETURN
+*
+*     End of SPOTRS
+*
+      END
diff --git a/costa/native/external/lapack/sppcon.f b/costa/native/external/lapack/sppcon.f
new file mode 100644
index 000000000..c2360875f
--- /dev/null
+++ b/costa/native/external/lapack/sppcon.f
@@ -0,0 +1,172 @@
+      SUBROUTINE SPPCON( UPLO, N, AP, ANORM, RCOND, WORK, IWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993 
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, N
+      REAL               ANORM, RCOND
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      REAL               AP( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SPPCON estimates the reciprocal of the condition number (in the
+*  1-norm) of a real symmetric positive definite packed matrix using
+*  the Cholesky factorization A = U**T*U or A = L*L**T computed by
+*  SPPTRF.
+*
+*  An estimate is obtained for norm(inv(A)), and the reciprocal of the
+*  condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))).
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  AP      (input) REAL array, dimension (N*(N+1)/2)
+*          The triangular factor U or L from the Cholesky factorization
+*          A = U**T*U or A = L*L**T, packed columnwise in a linear
+*          array.  The j-th column of U or L is stored in the array AP
+*          as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = U(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = L(i,j) for j<=i<=n.
+*
+*  ANORM   (input) REAL
+*          The 1-norm (or infinity-norm) of the symmetric matrix A.
+*
+*  RCOND   (output) REAL
+*          The reciprocal of the condition number of the matrix A,
+*          computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an
+*          estimate of the 1-norm of inv(A) computed in this routine.
+*
+*  WORK    (workspace) REAL array, dimension (3*N)
+*
+*  IWORK   (workspace) INTEGER array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      CHARACTER          NORMIN
+      INTEGER            IX, KASE
+      REAL               AINVNM, SCALE, SCALEL, SCALEU, SMLNUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ISAMAX
+      REAL               SLAMCH
+      EXTERNAL           LSAME, ISAMAX, SLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLACON, SLATPS, SRSCL, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( ANORM.LT.ZERO ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SPPCON', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      RCOND = ZERO
+      IF( N.EQ.0 ) THEN
+         RCOND = ONE
+         RETURN
+      ELSE IF( ANORM.EQ.ZERO ) THEN
+         RETURN
+      END IF
+*
+      SMLNUM = SLAMCH( 'Safe minimum' )
+*
+*     Estimate the 1-norm of the inverse.
+*
+      KASE = 0
+      NORMIN = 'N'
+   10 CONTINUE
+      CALL SLACON( N, WORK( N+1 ), WORK, IWORK, AINVNM, KASE )
+      IF( KASE.NE.0 ) THEN
+         IF( UPPER ) THEN
+*
+*           Multiply by inv(U').
+*
+            CALL SLATPS( 'Upper', 'Transpose', 'Non-unit', NORMIN, N,
+     $                   AP, WORK, SCALEL, WORK( 2*N+1 ), INFO )
+            NORMIN = 'Y'
+*
+*           Multiply by inv(U).
+*
+            CALL SLATPS( 'Upper', 'No transpose', 'Non-unit', NORMIN, N,
+     $                   AP, WORK, SCALEU, WORK( 2*N+1 ), INFO )
+         ELSE
+*
+*           Multiply by inv(L).
+*
+            CALL SLATPS( 'Lower', 'No transpose', 'Non-unit', NORMIN, N,
+     $                   AP, WORK, SCALEL, WORK( 2*N+1 ), INFO )
+            NORMIN = 'Y'
+*
+*           Multiply by inv(L').
+*
+            CALL SLATPS( 'Lower', 'Transpose', 'Non-unit', NORMIN, N,
+     $                   AP, WORK, SCALEU, WORK( 2*N+1 ), INFO )
+         END IF
+*
+*        Multiply by 1/SCALE if doing so will not cause overflow.
+*
+         SCALE = SCALEL*SCALEU
+         IF( SCALE.NE.ONE ) THEN
+            IX = ISAMAX( N, WORK, 1 )
+            IF( SCALE.LT.ABS( WORK( IX ) )*SMLNUM .OR. SCALE.EQ.ZERO )
+     $         GO TO 20
+            CALL SRSCL( N, SCALE, WORK, 1 )
+         END IF
+         GO TO 10
+      END IF
+*
+*     Compute the estimate of the reciprocal condition number.
+*
+      IF( AINVNM.NE.ZERO )
+     $   RCOND = ( ONE / AINVNM ) / ANORM
+*
+   20 CONTINUE
+      RETURN
+*
+*     End of SPPCON
+*
+      END
diff --git a/costa/native/external/lapack/sppequ.f b/costa/native/external/lapack/sppequ.f
new file mode 100644
index 000000000..925df64b4
--- /dev/null
+++ b/costa/native/external/lapack/sppequ.f
@@ -0,0 +1,169 @@
+      SUBROUTINE SPPEQU( UPLO, N, AP, S, SCOND, AMAX, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993 
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, N
+      REAL               AMAX, SCOND
+*     ..
+*     .. Array Arguments ..
+      REAL               AP( * ), S( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SPPEQU computes row and column scalings intended to equilibrate a
+*  symmetric positive definite matrix A in packed storage and reduce
+*  its condition number (with respect to the two-norm).  S contains the
+*  scale factors, S(i)=1/sqrt(A(i,i)), chosen so that the scaled matrix
+*  B with elements B(i,j)=S(i)*A(i,j)*S(j) has ones on the diagonal.
+*  This choice of S puts the condition number of B within a factor N of
+*  the smallest possible condition number over all possible diagonal
+*  scalings.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  AP      (input) REAL array, dimension (N*(N+1)/2)
+*          The upper or lower triangle of the symmetric matrix A, packed
+*          columnwise in a linear array.  The j-th column of A is stored
+*          in the array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n.
+*
+*  S       (output) REAL array, dimension (N)
+*          If INFO = 0, S contains the scale factors for A.
+*
+*  SCOND   (output) REAL
+*          If INFO = 0, S contains the ratio of the smallest S(i) to
+*          the largest S(i).  If SCOND >= 0.1 and AMAX is neither too
+*          large nor too small, it is not worth scaling by S.
+*
+*  AMAX    (output) REAL
+*          Absolute value of largest matrix element.  If AMAX is very
+*          close to overflow or very close to underflow, the matrix
+*          should be scaled.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, the i-th diagonal element is nonpositive.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            I, JJ
+      REAL               SMIN
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SPPEQU', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 ) THEN
+         SCOND = ONE
+         AMAX = ZERO
+         RETURN
+      END IF
+*
+*     Initialize SMIN and AMAX.
+*
+      S( 1 ) = AP( 1 )
+      SMIN = S( 1 )
+      AMAX = S( 1 )
+*
+      IF( UPPER ) THEN
+*
+*        UPLO = 'U':  Upper triangle of A is stored.
+*        Find the minimum and maximum diagonal elements.
+*
+         JJ = 1
+         DO 10 I = 2, N
+            JJ = JJ + I
+            S( I ) = AP( JJ )
+            SMIN = MIN( SMIN, S( I ) )
+            AMAX = MAX( AMAX, S( I ) )
+   10    CONTINUE
+*
+      ELSE
+*
+*        UPLO = 'L':  Lower triangle of A is stored.
+*        Find the minimum and maximum diagonal elements.
+*
+         JJ = 1
+         DO 20 I = 2, N
+            JJ = JJ + N - I + 2
+            S( I ) = AP( JJ )
+            SMIN = MIN( SMIN, S( I ) )
+            AMAX = MAX( AMAX, S( I ) )
+   20    CONTINUE
+      END IF
+*
+      IF( SMIN.LE.ZERO ) THEN
+*
+*        Find the first non-positive diagonal element and return.
+*
+         DO 30 I = 1, N
+            IF( S( I ).LE.ZERO ) THEN
+               INFO = I
+               RETURN
+            END IF
+   30    CONTINUE
+      ELSE
+*
+*        Set the scale factors to the reciprocals
+*        of the diagonal elements.
+*
+         DO 40 I = 1, N
+            S( I ) = ONE / SQRT( S( I ) )
+   40    CONTINUE
+*
+*        Compute SCOND = min(S(I)) / max(S(I))
+*
+         SCOND = SQRT( SMIN ) / SQRT( AMAX )
+      END IF
+      RETURN
+*
+*     End of SPPEQU
+*
+      END
diff --git a/costa/native/external/lapack/spprfs.f b/costa/native/external/lapack/spprfs.f
new file mode 100644
index 000000000..cc3663888
--- /dev/null
+++ b/costa/native/external/lapack/spprfs.f
@@ -0,0 +1,324 @@
+      SUBROUTINE SPPRFS( UPLO, N, NRHS, AP, AFP, B, LDB, X, LDX, FERR,
+     $                   BERR, WORK, IWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDB, LDX, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      REAL               AFP( * ), AP( * ), B( LDB, * ), BERR( * ),
+     $                   FERR( * ), WORK( * ), X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SPPRFS improves the computed solution to a system of linear
+*  equations when the coefficient matrix is symmetric positive definite
+*  and packed, and provides error bounds and backward error estimates
+*  for the solution.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrices B and X.  NRHS >= 0.
+*
+*  AP      (input) REAL array, dimension (N*(N+1)/2)
+*          The upper or lower triangle of the symmetric matrix A, packed
+*          columnwise in a linear array.  The j-th column of A is stored
+*          in the array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n.
+*
+*  AFP     (input) REAL array, dimension (N*(N+1)/2)
+*          The triangular factor U or L from the Cholesky factorization
+*          A = U**T*U or A = L*L**T, as computed by SPPTRF/CPPTRF,
+*          packed columnwise in a linear array in the same format as A
+*          (see AP).
+*
+*  B       (input) REAL array, dimension (LDB,NRHS)
+*          The right hand side matrix B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  X       (input/output) REAL array, dimension (LDX,NRHS)
+*          On entry, the solution matrix X, as computed by SPPTRS.
+*          On exit, the improved solution matrix X.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(1,N).
+*
+*  FERR    (output) REAL array, dimension (NRHS)
+*          The estimated forward error bound for each solution vector
+*          X(j) (the j-th column of the solution matrix X).
+*          If XTRUE is the true solution corresponding to X(j), FERR(j)
+*          is an estimated upper bound for the magnitude of the largest
+*          element in (X(j) - XTRUE) divided by the magnitude of the
+*          largest element in X(j).  The estimate is as reliable as
+*          the estimate for RCOND, and is almost always a slight
+*          overestimate of the true error.
+*
+*  BERR    (output) REAL array, dimension (NRHS)
+*          The componentwise relative backward error of each solution
+*          vector X(j) (i.e., the smallest relative change in
+*          any element of A or B that makes X(j) an exact solution).
+*
+*  WORK    (workspace) REAL array, dimension (3*N)
+*
+*  IWORK   (workspace) INTEGER array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Internal Parameters
+*  ===================
+*
+*  ITMAX is the maximum number of steps of iterative refinement.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            ITMAX
+      PARAMETER          ( ITMAX = 5 )
+      REAL               ZERO
+      PARAMETER          ( ZERO = 0.0E+0 )
+      REAL               ONE
+      PARAMETER          ( ONE = 1.0E+0 )
+      REAL               TWO
+      PARAMETER          ( TWO = 2.0E+0 )
+      REAL               THREE
+      PARAMETER          ( THREE = 3.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            COUNT, I, IK, J, K, KASE, KK, NZ
+      REAL               EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SAXPY, SCOPY, SLACON, SPPTRS, SSPMV, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               SLAMCH
+      EXTERNAL           LSAME, SLAMCH
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+         INFO = -9
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SPPRFS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN
+         DO 10 J = 1, NRHS
+            FERR( J ) = ZERO
+            BERR( J ) = ZERO
+   10    CONTINUE
+         RETURN
+      END IF
+*
+*     NZ = maximum number of nonzero elements in each row of A, plus 1
+*
+      NZ = N + 1
+      EPS = SLAMCH( 'Epsilon' )
+      SAFMIN = SLAMCH( 'Safe minimum' )
+      SAFE1 = NZ*SAFMIN
+      SAFE2 = SAFE1 / EPS
+*
+*     Do for each right hand side
+*
+      DO 140 J = 1, NRHS
+*
+         COUNT = 1
+         LSTRES = THREE
+   20    CONTINUE
+*
+*        Loop until stopping criterion is satisfied.
+*
+*        Compute residual R = B - A * X
+*
+         CALL SCOPY( N, B( 1, J ), 1, WORK( N+1 ), 1 )
+         CALL SSPMV( UPLO, N, -ONE, AP, X( 1, J ), 1, ONE, WORK( N+1 ),
+     $               1 )
+*
+*        Compute componentwise relative backward error from formula
+*
+*        max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) )
+*
+*        where abs(Z) is the componentwise absolute value of the matrix
+*        or vector Z.  If the i-th component of the denominator is less
+*        than SAFE2, then SAFE1 is added to the i-th components of the
+*        numerator and denominator before dividing.
+*
+         DO 30 I = 1, N
+            WORK( I ) = ABS( B( I, J ) )
+   30    CONTINUE
+*
+*        Compute abs(A)*abs(X) + abs(B).
+*
+         KK = 1
+         IF( UPPER ) THEN
+            DO 50 K = 1, N
+               S = ZERO
+               XK = ABS( X( K, J ) )
+               IK = KK
+               DO 40 I = 1, K - 1
+                  WORK( I ) = WORK( I ) + ABS( AP( IK ) )*XK
+                  S = S + ABS( AP( IK ) )*ABS( X( I, J ) )
+                  IK = IK + 1
+   40          CONTINUE
+               WORK( K ) = WORK( K ) + ABS( AP( KK+K-1 ) )*XK + S
+               KK = KK + K
+   50       CONTINUE
+         ELSE
+            DO 70 K = 1, N
+               S = ZERO
+               XK = ABS( X( K, J ) )
+               WORK( K ) = WORK( K ) + ABS( AP( KK ) )*XK
+               IK = KK + 1
+               DO 60 I = K + 1, N
+                  WORK( I ) = WORK( I ) + ABS( AP( IK ) )*XK
+                  S = S + ABS( AP( IK ) )*ABS( X( I, J ) )
+                  IK = IK + 1
+   60          CONTINUE
+               WORK( K ) = WORK( K ) + S
+               KK = KK + ( N-K+1 )
+   70       CONTINUE
+         END IF
+         S = ZERO
+         DO 80 I = 1, N
+            IF( WORK( I ).GT.SAFE2 ) THEN
+               S = MAX( S, ABS( WORK( N+I ) ) / WORK( I ) )
+            ELSE
+               S = MAX( S, ( ABS( WORK( N+I ) )+SAFE1 ) /
+     $             ( WORK( I )+SAFE1 ) )
+            END IF
+   80    CONTINUE
+         BERR( J ) = S
+*
+*        Test stopping criterion. Continue iterating if
+*           1) The residual BERR(J) is larger than machine epsilon, and
+*           2) BERR(J) decreased by at least a factor of 2 during the
+*              last iteration, and
+*           3) At most ITMAX iterations tried.
+*
+         IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND.
+     $       COUNT.LE.ITMAX ) THEN
+*
+*           Update solution and try again.
+*
+            CALL SPPTRS( UPLO, N, 1, AFP, WORK( N+1 ), N, INFO )
+            CALL SAXPY( N, ONE, WORK( N+1 ), 1, X( 1, J ), 1 )
+            LSTRES = BERR( J )
+            COUNT = COUNT + 1
+            GO TO 20
+         END IF
+*
+*        Bound error from formula
+*
+*        norm(X - XTRUE) / norm(X) .le. FERR =
+*        norm( abs(inv(A))*
+*           ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X)
+*
+*        where
+*          norm(Z) is the magnitude of the largest component of Z
+*          inv(A) is the inverse of A
+*          abs(Z) is the componentwise absolute value of the matrix or
+*             vector Z
+*          NZ is the maximum number of nonzeros in any row of A, plus 1
+*          EPS is machine epsilon
+*
+*        The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B))
+*        is incremented by SAFE1 if the i-th component of
+*        abs(A)*abs(X) + abs(B) is less than SAFE2.
+*
+*        Use SLACON to estimate the infinity-norm of the matrix
+*           inv(A) * diag(W),
+*        where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) )))
+*
+         DO 90 I = 1, N
+            IF( WORK( I ).GT.SAFE2 ) THEN
+               WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I )
+            ELSE
+               WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + SAFE1
+            END IF
+   90    CONTINUE
+*
+         KASE = 0
+  100    CONTINUE
+         CALL SLACON( N, WORK( 2*N+1 ), WORK( N+1 ), IWORK, FERR( J ),
+     $                KASE )
+         IF( KASE.NE.0 ) THEN
+            IF( KASE.EQ.1 ) THEN
+*
+*              Multiply by diag(W)*inv(A').
+*
+               CALL SPPTRS( UPLO, N, 1, AFP, WORK( N+1 ), N, INFO )
+               DO 110 I = 1, N
+                  WORK( N+I ) = WORK( I )*WORK( N+I )
+  110          CONTINUE
+            ELSE IF( KASE.EQ.2 ) THEN
+*
+*              Multiply by inv(A)*diag(W).
+*
+               DO 120 I = 1, N
+                  WORK( N+I ) = WORK( I )*WORK( N+I )
+  120          CONTINUE
+               CALL SPPTRS( UPLO, N, 1, AFP, WORK( N+1 ), N, INFO )
+            END IF
+            GO TO 100
+         END IF
+*
+*        Normalize error.
+*
+         LSTRES = ZERO
+         DO 130 I = 1, N
+            LSTRES = MAX( LSTRES, ABS( X( I, J ) ) )
+  130    CONTINUE
+         IF( LSTRES.NE.ZERO )
+     $      FERR( J ) = FERR( J ) / LSTRES
+*
+  140 CONTINUE
+*
+      RETURN
+*
+*     End of SPPRFS
+*
+      END
diff --git a/costa/native/external/lapack/sppsv.f b/costa/native/external/lapack/sppsv.f
new file mode 100644
index 000000000..ea8ed7ed5
--- /dev/null
+++ b/costa/native/external/lapack/sppsv.f
@@ -0,0 +1,134 @@
+      SUBROUTINE SPPSV( UPLO, N, NRHS, AP, B, LDB, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993 
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDB, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      REAL               AP( * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SPPSV computes the solution to a real system of linear equations
+*     A * X = B,
+*  where A is an N-by-N symmetric positive definite matrix stored in
+*  packed format and X and B are N-by-NRHS matrices.
+*
+*  The Cholesky decomposition is used to factor A as
+*     A = U**T* U,  if UPLO = 'U', or
+*     A = L * L**T,  if UPLO = 'L',
+*  where U is an upper triangular matrix and L is a lower triangular
+*  matrix.  The factored form of A is then used to solve the system of
+*  equations A * X = B.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The number of linear equations, i.e., the order of the
+*          matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  AP      (input/output) REAL array, dimension (N*(N+1)/2)
+*          On entry, the upper or lower triangle of the symmetric matrix
+*          A, packed columnwise in a linear array.  The j-th column of A
+*          is stored in the array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n.
+*          See below for further details.  
+*
+*          On exit, if INFO = 0, the factor U or L from the Cholesky
+*          factorization A = U**T*U or A = L*L**T, in the same storage
+*          format as A.
+*
+*  B       (input/output) REAL array, dimension (LDB,NRHS)
+*          On entry, the N-by-NRHS right hand side matrix B.
+*          On exit, if INFO = 0, the N-by-NRHS solution matrix X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, the leading minor of order i of A is not
+*                positive definite, so the factorization could not be
+*                completed, and the solution has not been computed.
+*
+*  Further Details
+*  ===============
+*
+*  The packed storage scheme is illustrated by the following example
+*  when N = 4, UPLO = 'U':
+*
+*  Two-dimensional storage of the symmetric matrix A:
+*
+*     a11 a12 a13 a14
+*         a22 a23 a24
+*             a33 a34     (aij = conjg(aji))
+*                 a44
+*
+*  Packed storage of the upper triangle of A:
+*
+*  AP = [ a11, a12, a22, a13, a23, a33, a14, a24, a34, a44 ]
+*
+*  =====================================================================
+*
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SPPTRF, SPPTRS, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -6
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SPPSV ', -INFO )
+         RETURN
+      END IF
+*
+*     Compute the Cholesky factorization A = U'*U or A = L*L'.
+*
+      CALL SPPTRF( UPLO, N, AP, INFO )
+      IF( INFO.EQ.0 ) THEN
+*
+*        Solve the system A*X = B, overwriting B with X.
+*
+         CALL SPPTRS( UPLO, N, NRHS, AP, B, LDB, INFO )
+*
+      END IF
+      RETURN
+*
+*     End of SPPSV
+*
+      END
diff --git a/costa/native/external/lapack/sppsvx.f b/costa/native/external/lapack/sppsvx.f
new file mode 100644
index 000000000..8d06890a2
--- /dev/null
+++ b/costa/native/external/lapack/sppsvx.f
@@ -0,0 +1,383 @@
+      SUBROUTINE SPPSVX( FACT, UPLO, N, NRHS, AP, AFP, EQUED, S, B, LDB,
+     $                   X, LDX, RCOND, FERR, BERR, WORK, IWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          EQUED, FACT, UPLO
+      INTEGER            INFO, LDB, LDX, N, NRHS
+      REAL               RCOND
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      REAL               AFP( * ), AP( * ), B( LDB, * ), BERR( * ),
+     $                   FERR( * ), S( * ), WORK( * ), X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SPPSVX uses the Cholesky factorization A = U**T*U or A = L*L**T to
+*  compute the solution to a real system of linear equations
+*     A * X = B,
+*  where A is an N-by-N symmetric positive definite matrix stored in
+*  packed format and X and B are N-by-NRHS matrices.
+*
+*  Error bounds on the solution and a condition estimate are also
+*  provided.
+*
+*  Description
+*  ===========
+*
+*  The following steps are performed:
+*
+*  1. If FACT = 'E', real scaling factors are computed to equilibrate
+*     the system:
+*        diag(S) * A * diag(S) * inv(diag(S)) * X = diag(S) * B
+*     Whether or not the system will be equilibrated depends on the
+*     scaling of the matrix A, but if equilibration is used, A is
+*     overwritten by diag(S)*A*diag(S) and B by diag(S)*B.
+*
+*  2. If FACT = 'N' or 'E', the Cholesky decomposition is used to
+*     factor the matrix A (after equilibration if FACT = 'E') as
+*        A = U**T* U,  if UPLO = 'U', or
+*        A = L * L**T,  if UPLO = 'L',
+*     where U is an upper triangular matrix and L is a lower triangular
+*     matrix.
+*
+*  3. If the leading i-by-i principal minor is not positive definite,
+*     then the routine returns with INFO = i. Otherwise, the factored
+*     form of A is used to estimate the condition number of the matrix
+*     A.  If the reciprocal of the condition number is less than machine
+*     precision, INFO = N+1 is returned as a warning, but the routine
+*     still goes on to solve for X and compute error bounds as
+*     described below.
+*
+*  4. The system of equations is solved for X using the factored form
+*     of A.
+*
+*  5. Iterative refinement is applied to improve the computed solution
+*     matrix and calculate error bounds and backward error estimates
+*     for it.
+*
+*  6. If equilibration was used, the matrix X is premultiplied by
+*     diag(S) so that it solves the original system before
+*     equilibration.
+*
+*  Arguments
+*  =========
+*
+*  FACT    (input) CHARACTER*1
+*          Specifies whether or not the factored form of the matrix A is
+*          supplied on entry, and if not, whether the matrix A should be
+*          equilibrated before it is factored.
+*          = 'F':  On entry, AFP contains the factored form of A.
+*                  If EQUED = 'Y', the matrix A has been equilibrated
+*                  with scaling factors given by S.  AP and AFP will not
+*                  be modified.
+*          = 'N':  The matrix A will be copied to AFP and factored.
+*          = 'E':  The matrix A will be equilibrated if necessary, then
+*                  copied to AFP and factored.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The number of linear equations, i.e., the order of the
+*          matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrices B and X.  NRHS >= 0.
+*
+*  AP      (input/output) REAL array, dimension (N*(N+1)/2)
+*          On entry, the upper or lower triangle of the symmetric matrix
+*          A, packed columnwise in a linear array, except if FACT = 'F'
+*          and EQUED = 'Y', then A must contain the equilibrated matrix
+*          diag(S)*A*diag(S).  The j-th column of A is stored in the
+*          array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n.
+*          See below for further details.  A is not modified if
+*          FACT = 'F' or 'N', or if FACT = 'E' and EQUED = 'N' on exit.
+*
+*          On exit, if FACT = 'E' and EQUED = 'Y', A is overwritten by
+*          diag(S)*A*diag(S).
+*
+*  AFP     (input or output) REAL array, dimension
+*                            (N*(N+1)/2)
+*          If FACT = 'F', then AFP is an input argument and on entry
+*          contains the triangular factor U or L from the Cholesky
+*          factorization A = U'*U or A = L*L', in the same storage
+*          format as A.  If EQUED .ne. 'N', then AFP is the factored
+*          form of the equilibrated matrix A.
+*
+*          If FACT = 'N', then AFP is an output argument and on exit
+*          returns the triangular factor U or L from the Cholesky
+*          factorization A = U'*U or A = L*L' of the original matrix A.
+*
+*          If FACT = 'E', then AFP is an output argument and on exit
+*          returns the triangular factor U or L from the Cholesky
+*          factorization A = U'*U or A = L*L' of the equilibrated
+*          matrix A (see the description of AP for the form of the
+*          equilibrated matrix).
+*
+*  EQUED   (input or output) CHARACTER*1
+*          Specifies the form of equilibration that was done.
+*          = 'N':  No equilibration (always true if FACT = 'N').
+*          = 'Y':  Equilibration was done, i.e., A has been replaced by
+*                  diag(S) * A * diag(S).
+*          EQUED is an input argument if FACT = 'F'; otherwise, it is an
+*          output argument.
+*
+*  S       (input or output) REAL array, dimension (N)
+*          The scale factors for A; not accessed if EQUED = 'N'.  S is
+*          an input argument if FACT = 'F'; otherwise, S is an output
+*          argument.  If FACT = 'F' and EQUED = 'Y', each element of S
+*          must be positive.
+*
+*  B       (input/output) REAL array, dimension (LDB,NRHS)
+*          On entry, the N-by-NRHS right hand side matrix B.
+*          On exit, if EQUED = 'N', B is not modified; if EQUED = 'Y',
+*          B is overwritten by diag(S) * B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  X       (output) REAL array, dimension (LDX,NRHS)
+*          If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X to
+*          the original system of equations.  Note that if EQUED = 'Y',
+*          A and B are modified on exit, and the solution to the
+*          equilibrated system is inv(diag(S))*X.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(1,N).
+*
+*  RCOND   (output) REAL
+*          The estimate of the reciprocal condition number of the matrix
+*          A after equilibration (if done).  If RCOND is less than the
+*          machine precision (in particular, if RCOND = 0), the matrix
+*          is singular to working precision.  This condition is
+*          indicated by a return code of INFO > 0.
+*
+*  FERR    (output) REAL array, dimension (NRHS)
+*          The estimated forward error bound for each solution vector
+*          X(j) (the j-th column of the solution matrix X).
+*          If XTRUE is the true solution corresponding to X(j), FERR(j)
+*          is an estimated upper bound for the magnitude of the largest
+*          element in (X(j) - XTRUE) divided by the magnitude of the
+*          largest element in X(j).  The estimate is as reliable as
+*          the estimate for RCOND, and is almost always a slight
+*          overestimate of the true error.
+*
+*  BERR    (output) REAL array, dimension (NRHS)
+*          The componentwise relative backward error of each solution
+*          vector X(j) (i.e., the smallest relative change in
+*          any element of A or B that makes X(j) an exact solution).
+*
+*  WORK    (workspace) REAL array, dimension (3*N)
+*
+*  IWORK   (workspace) INTEGER array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, and i is
+*                <= N:  the leading minor of order i of A is
+*                       not positive definite, so the factorization
+*                       could not be completed, and the solution has not
+*                       been computed. RCOND = 0 is returned.
+*                = N+1: U is nonsingular, but RCOND is less than machine
+*                       precision, meaning that the matrix is singular
+*                       to working precision.  Nevertheless, the
+*                       solution and error bounds are computed because
+*                       there are a number of situations where the
+*                       computed solution can be more accurate than the
+*                       value of RCOND would suggest.
+*
+*  Further Details
+*  ===============
+*
+*  The packed storage scheme is illustrated by the following example
+*  when N = 4, UPLO = 'U':
+*
+*  Two-dimensional storage of the symmetric matrix A:
+*
+*     a11 a12 a13 a14
+*         a22 a23 a24
+*             a33 a34     (aij = conjg(aji))
+*                 a44
+*
+*  Packed storage of the upper triangle of A:
+*
+*  AP = [ a11, a12, a22, a13, a23, a33, a14, a24, a34, a44 ]
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            EQUIL, NOFACT, RCEQU
+      INTEGER            I, INFEQU, J
+      REAL               AMAX, ANORM, BIGNUM, SCOND, SMAX, SMIN, SMLNUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               SLAMCH, SLANSP
+      EXTERNAL           LSAME, SLAMCH, SLANSP
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SCOPY, SLACPY, SLAQSP, SPPCON, SPPEQU, SPPRFS,
+     $                   SPPTRF, SPPTRS, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+      NOFACT = LSAME( FACT, 'N' )
+      EQUIL = LSAME( FACT, 'E' )
+      IF( NOFACT .OR. EQUIL ) THEN
+         EQUED = 'N'
+         RCEQU = .FALSE.
+      ELSE
+         RCEQU = LSAME( EQUED, 'Y' )
+         SMLNUM = SLAMCH( 'Safe minimum' )
+         BIGNUM = ONE / SMLNUM
+      END IF
+*
+*     Test the input parameters.
+*
+      IF( .NOT.NOFACT .AND. .NOT.EQUIL .AND. .NOT.LSAME( FACT, 'F' ) )
+     $     THEN
+         INFO = -1
+      ELSE IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) )
+     $          THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LSAME( FACT, 'F' ) .AND. .NOT.
+     $         ( RCEQU .OR. LSAME( EQUED, 'N' ) ) ) THEN
+         INFO = -7
+      ELSE
+         IF( RCEQU ) THEN
+            SMIN = BIGNUM
+            SMAX = ZERO
+            DO 10 J = 1, N
+               SMIN = MIN( SMIN, S( J ) )
+               SMAX = MAX( SMAX, S( J ) )
+   10       CONTINUE
+            IF( SMIN.LE.ZERO ) THEN
+               INFO = -8
+            ELSE IF( N.GT.0 ) THEN
+               SCOND = MAX( SMIN, SMLNUM ) / MIN( SMAX, BIGNUM )
+            ELSE
+               SCOND = ONE
+            END IF
+         END IF
+         IF( INFO.EQ.0 ) THEN
+            IF( LDB.LT.MAX( 1, N ) ) THEN
+               INFO = -10
+            ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+               INFO = -12
+            END IF
+         END IF
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SPPSVX', -INFO )
+         RETURN
+      END IF
+*
+      IF( EQUIL ) THEN
+*
+*        Compute row and column scalings to equilibrate the matrix A.
+*
+         CALL SPPEQU( UPLO, N, AP, S, SCOND, AMAX, INFEQU )
+         IF( INFEQU.EQ.0 ) THEN
+*
+*           Equilibrate the matrix.
+*
+            CALL SLAQSP( UPLO, N, AP, S, SCOND, AMAX, EQUED )
+            RCEQU = LSAME( EQUED, 'Y' )
+         END IF
+      END IF
+*
+*     Scale the right-hand side.
+*
+      IF( RCEQU ) THEN
+         DO 30 J = 1, NRHS
+            DO 20 I = 1, N
+               B( I, J ) = S( I )*B( I, J )
+   20       CONTINUE
+   30    CONTINUE
+      END IF
+*
+      IF( NOFACT .OR. EQUIL ) THEN
+*
+*        Compute the Cholesky factorization A = U'*U or A = L*L'.
+*
+         CALL SCOPY( N*( N+1 ) / 2, AP, 1, AFP, 1 )
+         CALL SPPTRF( UPLO, N, AFP, INFO )
+*
+*        Return if INFO is non-zero.
+*
+         IF( INFO.NE.0 ) THEN
+            IF( INFO.GT.0 )
+     $         RCOND = ZERO
+            RETURN
+         END IF
+      END IF
+*
+*     Compute the norm of the matrix A.
+*
+      ANORM = SLANSP( 'I', UPLO, N, AP, WORK )
+*
+*     Compute the reciprocal of the condition number of A.
+*
+      CALL SPPCON( UPLO, N, AFP, ANORM, RCOND, WORK, IWORK, INFO )
+*
+*     Set INFO = N+1 if the matrix is singular to working precision.
+*
+      IF( RCOND.LT.SLAMCH( 'Epsilon' ) )
+     $   INFO = N + 1
+*
+*     Compute the solution matrix X.
+*
+      CALL SLACPY( 'Full', N, NRHS, B, LDB, X, LDX )
+      CALL SPPTRS( UPLO, N, NRHS, AFP, X, LDX, INFO )
+*
+*     Use iterative refinement to improve the computed solution and
+*     compute error bounds and backward error estimates for it.
+*
+      CALL SPPRFS( UPLO, N, NRHS, AP, AFP, B, LDB, X, LDX, FERR, BERR,
+     $             WORK, IWORK, INFO )
+*
+*     Transform the solution matrix X to a solution of the original
+*     system.
+*
+      IF( RCEQU ) THEN
+         DO 50 J = 1, NRHS
+            DO 40 I = 1, N
+               X( I, J ) = S( I )*X( I, J )
+   40       CONTINUE
+   50    CONTINUE
+         DO 60 J = 1, NRHS
+            FERR( J ) = FERR( J ) / SCOND
+   60    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of SPPSVX
+*
+      END
diff --git a/costa/native/external/lapack/spptrf.f b/costa/native/external/lapack/spptrf.f
new file mode 100644
index 000000000..035e0352d
--- /dev/null
+++ b/costa/native/external/lapack/spptrf.f
@@ -0,0 +1,178 @@
+      SUBROUTINE SPPTRF( UPLO, N, AP, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993 
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, N
+*     ..
+*     .. Array Arguments ..
+      REAL               AP( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SPPTRF computes the Cholesky factorization of a real symmetric
+*  positive definite matrix A stored in packed format.
+*
+*  The factorization has the form
+*     A = U**T * U,  if UPLO = 'U', or
+*     A = L  * L**T,  if UPLO = 'L',
+*  where U is an upper triangular matrix and L is lower triangular.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  AP      (input/output) REAL array, dimension (N*(N+1)/2)
+*          On entry, the upper or lower triangle of the symmetric matrix
+*          A, packed columnwise in a linear array.  The j-th column of A
+*          is stored in the array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n.
+*          See below for further details.
+*
+*          On exit, if INFO = 0, the triangular factor U or L from the
+*          Cholesky factorization A = U**T*U or A = L*L**T, in the same
+*          storage format as A.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, the leading minor of order i is not
+*                positive definite, and the factorization could not be
+*                completed.
+*
+*  Further Details
+*  ======= =======
+*
+*  The packed storage scheme is illustrated by the following example
+*  when N = 4, UPLO = 'U':
+*
+*  Two-dimensional storage of the symmetric matrix A:
+*
+*     a11 a12 a13 a14
+*         a22 a23 a24
+*             a33 a34     (aij = aji)
+*                 a44
+*
+*  Packed storage of the upper triangle of A:
+*
+*  AP = [ a11, a12, a22, a13, a23, a33, a14, a24, a34, a44 ]
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            J, JC, JJ
+      REAL               AJJ
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               SDOT
+      EXTERNAL           LSAME, SDOT
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SSCAL, SSPR, STPSV, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SPPTRF', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      IF( UPPER ) THEN
+*
+*        Compute the Cholesky factorization A = U'*U.
+*
+         JJ = 0
+         DO 10 J = 1, N
+            JC = JJ + 1
+            JJ = JJ + J
+*
+*           Compute elements 1:J-1 of column J.
+*
+            IF( J.GT.1 )
+     $         CALL STPSV( 'Upper', 'Transpose', 'Non-unit', J-1, AP,
+     $                     AP( JC ), 1 )
+*
+*           Compute U(J,J) and test for non-positive-definiteness.
+*
+            AJJ = AP( JJ ) - SDOT( J-1, AP( JC ), 1, AP( JC ), 1 )
+            IF( AJJ.LE.ZERO ) THEN
+               AP( JJ ) = AJJ
+               GO TO 30
+            END IF
+            AP( JJ ) = SQRT( AJJ )
+   10    CONTINUE
+      ELSE
+*
+*        Compute the Cholesky factorization A = L*L'.
+*
+         JJ = 1
+         DO 20 J = 1, N
+*
+*           Compute L(J,J) and test for non-positive-definiteness.
+*
+            AJJ = AP( JJ )
+            IF( AJJ.LE.ZERO ) THEN
+               AP( JJ ) = AJJ
+               GO TO 30
+            END IF
+            AJJ = SQRT( AJJ )
+            AP( JJ ) = AJJ
+*
+*           Compute elements J+1:N of column J and update the trailing
+*           submatrix.
+*
+            IF( J.LT.N ) THEN
+               CALL SSCAL( N-J, ONE / AJJ, AP( JJ+1 ), 1 )
+               CALL SSPR( 'Lower', N-J, -ONE, AP( JJ+1 ), 1,
+     $                    AP( JJ+N-J+1 ) )
+               JJ = JJ + N - J + 1
+            END IF
+   20    CONTINUE
+      END IF
+      GO TO 40
+*
+   30 CONTINUE
+      INFO = J
+*
+   40 CONTINUE
+      RETURN
+*
+*     End of SPPTRF
+*
+      END
diff --git a/costa/native/external/lapack/spptri.f b/costa/native/external/lapack/spptri.f
new file mode 100644
index 000000000..6d376cd96
--- /dev/null
+++ b/costa/native/external/lapack/spptri.f
@@ -0,0 +1,129 @@
+      SUBROUTINE SPPTRI( UPLO, N, AP, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993 
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, N
+*     ..
+*     .. Array Arguments ..
+      REAL               AP( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SPPTRI computes the inverse of a real symmetric positive definite
+*  matrix A using the Cholesky factorization A = U**T*U or A = L*L**T
+*  computed by SPPTRF.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangular factor is stored in AP;
+*          = 'L':  Lower triangular factor is stored in AP.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  AP      (input/output) REAL array, dimension (N*(N+1)/2)
+*          On entry, the triangular factor U or L from the Cholesky
+*          factorization A = U**T*U or A = L*L**T, packed columnwise as
+*          a linear array.  The j-th column of U or L is stored in the
+*          array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = U(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = L(i,j) for j<=i<=n.
+*
+*          On exit, the upper or lower triangle of the (symmetric)
+*          inverse of A, overwriting the input factor U or L.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, the (i,i) element of the factor U or L is
+*                zero, and the inverse could not be computed.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE
+      PARAMETER          ( ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            J, JC, JJ, JJN
+      REAL               AJJ
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               SDOT
+      EXTERNAL           LSAME, SDOT
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SSCAL, SSPR, STPMV, STPTRI, XERBLA
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SPPTRI', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Invert the triangular Cholesky factor U or L.
+*
+      CALL STPTRI( UPLO, 'Non-unit', N, AP, INFO )
+      IF( INFO.GT.0 )
+     $   RETURN
+*
+      IF( UPPER ) THEN
+*
+*        Compute the product inv(U) * inv(U)'.
+*
+         JJ = 0
+         DO 10 J = 1, N
+            JC = JJ + 1
+            JJ = JJ + J
+            IF( J.GT.1 )
+     $         CALL SSPR( 'Upper', J-1, ONE, AP( JC ), 1, AP )
+            AJJ = AP( JJ )
+            CALL SSCAL( J, AJJ, AP( JC ), 1 )
+   10    CONTINUE
+*
+      ELSE
+*
+*        Compute the product inv(L)' * inv(L).
+*
+         JJ = 1
+         DO 20 J = 1, N
+            JJN = JJ + N - J + 1
+            AP( JJ ) = SDOT( N-J+1, AP( JJ ), 1, AP( JJ ), 1 )
+            IF( J.LT.N )
+     $         CALL STPMV( 'Lower', 'Transpose', 'Non-unit', N-J,
+     $                     AP( JJN ), AP( JJ+1 ), 1 )
+            JJ = JJN
+   20    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of SPPTRI
+*
+      END
diff --git a/costa/native/external/lapack/spptrs.f b/costa/native/external/lapack/spptrs.f
new file mode 100644
index 000000000..8f4fcf5b8
--- /dev/null
+++ b/costa/native/external/lapack/spptrs.f
@@ -0,0 +1,135 @@
+      SUBROUTINE SPPTRS( UPLO, N, NRHS, AP, B, LDB, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993 
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDB, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      REAL               AP( * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SPPTRS solves a system of linear equations A*X = B with a symmetric
+*  positive definite matrix A in packed storage using the Cholesky
+*  factorization A = U**T*U or A = L*L**T computed by SPPTRF.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  AP      (input) REAL array, dimension (N*(N+1)/2)
+*          The triangular factor U or L from the Cholesky factorization
+*          A = U**T*U or A = L*L**T, packed columnwise in a linear
+*          array.  The j-th column of U or L is stored in the array AP
+*          as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = U(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = L(i,j) for j<=i<=n.
+*
+*  B       (input/output) REAL array, dimension (LDB,NRHS)
+*          On entry, the right hand side matrix B.
+*          On exit, the solution matrix X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            I
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           STPSV, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -6
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SPPTRS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 )
+     $   RETURN
+*
+      IF( UPPER ) THEN
+*
+*        Solve A*X = B where A = U'*U.
+*
+         DO 10 I = 1, NRHS
+*
+*           Solve U'*X = B, overwriting B with X.
+*
+            CALL STPSV( 'Upper', 'Transpose', 'Non-unit', N, AP,
+     $                  B( 1, I ), 1 )
+*
+*           Solve U*X = B, overwriting B with X.
+*
+            CALL STPSV( 'Upper', 'No transpose', 'Non-unit', N, AP,
+     $                  B( 1, I ), 1 )
+   10    CONTINUE
+      ELSE
+*
+*        Solve A*X = B where A = L*L'.
+*
+         DO 20 I = 1, NRHS
+*
+*           Solve L*Y = B, overwriting B with X.
+*
+            CALL STPSV( 'Lower', 'No transpose', 'Non-unit', N, AP,
+     $                  B( 1, I ), 1 )
+*
+*           Solve L'*X = Y, overwriting B with X.
+*
+            CALL STPSV( 'Lower', 'Transpose', 'Non-unit', N, AP,
+     $                  B( 1, I ), 1 )
+   20    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of SPPTRS
+*
+      END
diff --git a/costa/native/external/lapack/sptcon.f b/costa/native/external/lapack/sptcon.f
new file mode 100644
index 000000000..2ea0c4f4e
--- /dev/null
+++ b/costa/native/external/lapack/sptcon.f
@@ -0,0 +1,150 @@
+      SUBROUTINE SPTCON( N, D, E, ANORM, RCOND, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, N
+      REAL               ANORM, RCOND
+*     ..
+*     .. Array Arguments ..
+      REAL               D( * ), E( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SPTCON computes the reciprocal of the condition number (in the
+*  1-norm) of a real symmetric positive definite tridiagonal matrix
+*  using the factorization A = L*D*L**T or A = U**T*D*U computed by
+*  SPTTRF.
+*
+*  Norm(inv(A)) is computed by a direct method, and the reciprocal of
+*  the condition number is computed as
+*               RCOND = 1 / (ANORM * norm(inv(A))).
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  D       (input) REAL array, dimension (N)
+*          The n diagonal elements of the diagonal matrix D from the
+*          factorization of A, as computed by SPTTRF.
+*
+*  E       (input) REAL array, dimension (N-1)
+*          The (n-1) off-diagonal elements of the unit bidiagonal factor
+*          U or L from the factorization of A,  as computed by SPTTRF.
+*
+*  ANORM   (input) REAL
+*          The 1-norm of the original matrix A.
+*
+*  RCOND   (output) REAL
+*          The reciprocal of the condition number of the matrix A,
+*          computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is the
+*          1-norm of inv(A) computed in this routine.
+*
+*  WORK    (workspace) REAL array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Further Details
+*  ===============
+*
+*  The method used is described in Nicholas J. Higham, "Efficient
+*  Algorithms for Computing the Condition Number of a Tridiagonal
+*  Matrix", SIAM J. Sci. Stat. Comput., Vol. 7, No. 1, January 1986.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, IX
+      REAL               AINVNM
+*     ..
+*     .. External Functions ..
+      INTEGER            ISAMAX
+      EXTERNAL           ISAMAX
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments.
+*
+      INFO = 0
+      IF( N.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( ANORM.LT.ZERO ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SPTCON', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      RCOND = ZERO
+      IF( N.EQ.0 ) THEN
+         RCOND = ONE
+         RETURN
+      ELSE IF( ANORM.EQ.ZERO ) THEN
+         RETURN
+      END IF
+*
+*     Check that D(1:N) is positive.
+*
+      DO 10 I = 1, N
+         IF( D( I ).LE.ZERO )
+     $      RETURN
+   10 CONTINUE
+*
+*     Solve M(A) * x = e, where M(A) = (m(i,j)) is given by
+*
+*        m(i,j) =  abs(A(i,j)), i = j,
+*        m(i,j) = -abs(A(i,j)), i .ne. j,
+*
+*     and e = [ 1, 1, ..., 1 ]'.  Note M(A) = M(L)*D*M(L)'.
+*
+*     Solve M(L) * x = e.
+*
+      WORK( 1 ) = ONE
+      DO 20 I = 2, N
+         WORK( I ) = ONE + WORK( I-1 )*ABS( E( I-1 ) )
+   20 CONTINUE
+*
+*     Solve D * M(L)' * x = b.
+*
+      WORK( N ) = WORK( N ) / D( N )
+      DO 30 I = N - 1, 1, -1
+         WORK( I ) = WORK( I ) / D( I ) + WORK( I+1 )*ABS( E( I ) )
+   30 CONTINUE
+*
+*     Compute AINVNM = max(x(i)), 1<=i<=n.
+*
+      IX = ISAMAX( N, WORK, 1 )
+      AINVNM = ABS( WORK( IX ) )
+*
+*     Compute the reciprocal condition number.
+*
+      IF( AINVNM.NE.ZERO )
+     $   RCOND = ( ONE / AINVNM ) / ANORM
+*
+      RETURN
+*
+*     End of SPTCON
+*
+      END
diff --git a/costa/native/external/lapack/spteqr.f b/costa/native/external/lapack/spteqr.f
new file mode 100644
index 000000000..b8df9060c
--- /dev/null
+++ b/costa/native/external/lapack/spteqr.f
@@ -0,0 +1,190 @@
+      SUBROUTINE SPTEQR( COMPZ, N, D, E, Z, LDZ, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          COMPZ
+      INTEGER            INFO, LDZ, N
+*     ..
+*     .. Array Arguments ..
+      REAL               D( * ), E( * ), WORK( * ), Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SPTEQR computes all eigenvalues and, optionally, eigenvectors of a
+*  symmetric positive definite tridiagonal matrix by first factoring the
+*  matrix using SPTTRF, and then calling SBDSQR to compute the singular
+*  values of the bidiagonal factor.
+*
+*  This routine computes the eigenvalues of the positive definite
+*  tridiagonal matrix to high relative accuracy.  This means that if the
+*  eigenvalues range over many orders of magnitude in size, then the
+*  small eigenvalues and corresponding eigenvectors will be computed
+*  more accurately than, for example, with the standard QR method.
+*
+*  The eigenvectors of a full or band symmetric positive definite matrix
+*  can also be found if SSYTRD, SSPTRD, or SSBTRD has been used to
+*  reduce this matrix to tridiagonal form. (The reduction to tridiagonal
+*  form, however, may preclude the possibility of obtaining high
+*  relative accuracy in the small eigenvalues of the original matrix, if
+*  these eigenvalues range over many orders of magnitude.)
+*
+*  Arguments
+*  =========
+*
+*  COMPZ   (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only.
+*          = 'V':  Compute eigenvectors of original symmetric
+*                  matrix also.  Array Z contains the orthogonal
+*                  matrix used to reduce the original matrix to
+*                  tridiagonal form.
+*          = 'I':  Compute eigenvectors of tridiagonal matrix also.
+*
+*  N       (input) INTEGER
+*          The order of the matrix.  N >= 0.
+*
+*  D       (input/output) REAL array, dimension (N)
+*          On entry, the n diagonal elements of the tridiagonal
+*          matrix.
+*          On normal exit, D contains the eigenvalues, in descending
+*          order.
+*
+*  E       (input/output) REAL array, dimension (N-1)
+*          On entry, the (n-1) subdiagonal elements of the tridiagonal
+*          matrix.
+*          On exit, E has been destroyed.
+*
+*  Z       (input/output) REAL array, dimension (LDZ, N)
+*          On entry, if COMPZ = 'V', the orthogonal matrix used in the
+*          reduction to tridiagonal form.
+*          On exit, if COMPZ = 'V', the orthonormal eigenvectors of the
+*          original symmetric matrix;
+*          if COMPZ = 'I', the orthonormal eigenvectors of the
+*          tridiagonal matrix.
+*          If INFO > 0 on exit, Z contains the eigenvectors associated
+*          with only the stored eigenvalues.
+*          If  COMPZ = 'N', then Z is not referenced.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= 1, and if
+*          COMPZ = 'V' or 'I', LDZ >= max(1,N).
+*
+*  WORK    (workspace) REAL array, dimension (4*N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          > 0:  if INFO = i, and i is:
+*                <= N  the Cholesky factorization of the matrix could
+*                      not be performed because the i-th principal minor
+*                      was not positive definite.
+*                > N   the SVD algorithm failed to converge;
+*                      if INFO = N+i, i off-diagonal elements of the
+*                      bidiagonal factor did not converge to zero.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E0, ONE = 1.0E0 )
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SBDSQR, SLASET, SPTTRF, XERBLA
+*     ..
+*     .. Local Arrays ..
+      REAL               C( 1, 1 ), VT( 1, 1 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, ICOMPZ, NRU
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+*
+      IF( LSAME( COMPZ, 'N' ) ) THEN
+         ICOMPZ = 0
+      ELSE IF( LSAME( COMPZ, 'V' ) ) THEN
+         ICOMPZ = 1
+      ELSE IF( LSAME( COMPZ, 'I' ) ) THEN
+         ICOMPZ = 2
+      ELSE
+         ICOMPZ = -1
+      END IF
+      IF( ICOMPZ.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( ( LDZ.LT.1 ) .OR. ( ICOMPZ.GT.0 .AND. LDZ.LT.MAX( 1,
+     $         N ) ) ) THEN
+         INFO = -6
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SPTEQR', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      IF( N.EQ.1 ) THEN
+         IF( ICOMPZ.GT.0 )
+     $      Z( 1, 1 ) = ONE
+         RETURN
+      END IF
+      IF( ICOMPZ.EQ.2 )
+     $   CALL SLASET( 'Full', N, N, ZERO, ONE, Z, LDZ )
+*
+*     Call SPTTRF to factor the matrix.
+*
+      CALL SPTTRF( N, D, E, INFO )
+      IF( INFO.NE.0 )
+     $   RETURN
+      DO 10 I = 1, N
+         D( I ) = SQRT( D( I ) )
+   10 CONTINUE
+      DO 20 I = 1, N - 1
+         E( I ) = E( I )*D( I )
+   20 CONTINUE
+*
+*     Call SBDSQR to compute the singular values/vectors of the
+*     bidiagonal factor.
+*
+      IF( ICOMPZ.GT.0 ) THEN
+         NRU = N
+      ELSE
+         NRU = 0
+      END IF
+      CALL SBDSQR( 'Lower', N, 0, NRU, 0, D, E, VT, 1, Z, LDZ, C, 1,
+     $             WORK, INFO )
+*
+*     Square the singular values.
+*
+      IF( INFO.EQ.0 ) THEN
+         DO 30 I = 1, N
+            D( I ) = D( I )*D( I )
+   30    CONTINUE
+      ELSE
+         INFO = N + INFO
+      END IF
+*
+      RETURN
+*
+*     End of SPTEQR
+*
+      END
diff --git a/costa/native/external/lapack/sptrfs.f b/costa/native/external/lapack/sptrfs.f
new file mode 100644
index 000000000..96dccabb4
--- /dev/null
+++ b/costa/native/external/lapack/sptrfs.f
@@ -0,0 +1,302 @@
+      SUBROUTINE SPTRFS( N, NRHS, D, E, DF, EF, B, LDB, X, LDX, FERR,
+     $                   BERR, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDB, LDX, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      REAL               B( LDB, * ), BERR( * ), D( * ), DF( * ),
+     $                   E( * ), EF( * ), FERR( * ), WORK( * ),
+     $                   X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SPTRFS improves the computed solution to a system of linear
+*  equations when the coefficient matrix is symmetric positive definite
+*  and tridiagonal, and provides error bounds and backward error
+*  estimates for the solution.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  D       (input) REAL array, dimension (N)
+*          The n diagonal elements of the tridiagonal matrix A.
+*
+*  E       (input) REAL array, dimension (N-1)
+*          The (n-1) subdiagonal elements of the tridiagonal matrix A.
+*
+*  DF      (input) REAL array, dimension (N)
+*          The n diagonal elements of the diagonal matrix D from the
+*          factorization computed by SPTTRF.
+*
+*  EF      (input) REAL array, dimension (N-1)
+*          The (n-1) subdiagonal elements of the unit bidiagonal factor
+*          L from the factorization computed by SPTTRF.
+*
+*  B       (input) REAL array, dimension (LDB,NRHS)
+*          The right hand side matrix B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  X       (input/output) REAL array, dimension (LDX,NRHS)
+*          On entry, the solution matrix X, as computed by SPTTRS.
+*          On exit, the improved solution matrix X.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(1,N).
+*
+*  FERR    (output) REAL array, dimension (NRHS)
+*          The forward error bound for each solution vector
+*          X(j) (the j-th column of the solution matrix X).
+*          If XTRUE is the true solution corresponding to X(j), FERR(j)
+*          is an estimated upper bound for the magnitude of the largest
+*          element in (X(j) - XTRUE) divided by the magnitude of the
+*          largest element in X(j).
+*
+*  BERR    (output) REAL array, dimension (NRHS)
+*          The componentwise relative backward error of each solution
+*          vector X(j) (i.e., the smallest relative change in
+*          any element of A or B that makes X(j) an exact solution).
+*
+*  WORK    (workspace) REAL array, dimension (2*N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Internal Parameters
+*  ===================
+*
+*  ITMAX is the maximum number of steps of iterative refinement.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            ITMAX
+      PARAMETER          ( ITMAX = 5 )
+      REAL               ZERO
+      PARAMETER          ( ZERO = 0.0E+0 )
+      REAL               ONE
+      PARAMETER          ( ONE = 1.0E+0 )
+      REAL               TWO
+      PARAMETER          ( TWO = 2.0E+0 )
+      REAL               THREE
+      PARAMETER          ( THREE = 3.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            COUNT, I, IX, J, NZ
+      REAL               BI, CX, DX, EPS, EX, LSTRES, S, SAFE1, SAFE2,
+     $                   SAFMIN
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SAXPY, SPTTRS, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX
+*     ..
+*     .. External Functions ..
+      INTEGER            ISAMAX
+      REAL               SLAMCH
+      EXTERNAL           ISAMAX, SLAMCH
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( N.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -8
+      ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+         INFO = -10
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SPTRFS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN
+         DO 10 J = 1, NRHS
+            FERR( J ) = ZERO
+            BERR( J ) = ZERO
+   10    CONTINUE
+         RETURN
+      END IF
+*
+*     NZ = maximum number of nonzero elements in each row of A, plus 1
+*
+      NZ = 4
+      EPS = SLAMCH( 'Epsilon' )
+      SAFMIN = SLAMCH( 'Safe minimum' )
+      SAFE1 = NZ*SAFMIN
+      SAFE2 = SAFE1 / EPS
+*
+*     Do for each right hand side
+*
+      DO 90 J = 1, NRHS
+*
+         COUNT = 1
+         LSTRES = THREE
+   20    CONTINUE
+*
+*        Loop until stopping criterion is satisfied.
+*
+*        Compute residual R = B - A * X.  Also compute
+*        abs(A)*abs(x) + abs(b) for use in the backward error bound.
+*
+         IF( N.EQ.1 ) THEN
+            BI = B( 1, J )
+            DX = D( 1 )*X( 1, J )
+            WORK( N+1 ) = BI - DX
+            WORK( 1 ) = ABS( BI ) + ABS( DX )
+         ELSE
+            BI = B( 1, J )
+            DX = D( 1 )*X( 1, J )
+            EX = E( 1 )*X( 2, J )
+            WORK( N+1 ) = BI - DX - EX
+            WORK( 1 ) = ABS( BI ) + ABS( DX ) + ABS( EX )
+            DO 30 I = 2, N - 1
+               BI = B( I, J )
+               CX = E( I-1 )*X( I-1, J )
+               DX = D( I )*X( I, J )
+               EX = E( I )*X( I+1, J )
+               WORK( N+I ) = BI - CX - DX - EX
+               WORK( I ) = ABS( BI ) + ABS( CX ) + ABS( DX ) + ABS( EX )
+   30       CONTINUE
+            BI = B( N, J )
+            CX = E( N-1 )*X( N-1, J )
+            DX = D( N )*X( N, J )
+            WORK( N+N ) = BI - CX - DX
+            WORK( N ) = ABS( BI ) + ABS( CX ) + ABS( DX )
+         END IF
+*
+*        Compute componentwise relative backward error from formula
+*
+*        max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) )
+*
+*        where abs(Z) is the componentwise absolute value of the matrix
+*        or vector Z.  If the i-th component of the denominator is less
+*        than SAFE2, then SAFE1 is added to the i-th components of the
+*        numerator and denominator before dividing.
+*
+         S = ZERO
+         DO 40 I = 1, N
+            IF( WORK( I ).GT.SAFE2 ) THEN
+               S = MAX( S, ABS( WORK( N+I ) ) / WORK( I ) )
+            ELSE
+               S = MAX( S, ( ABS( WORK( N+I ) )+SAFE1 ) /
+     $             ( WORK( I )+SAFE1 ) )
+            END IF
+   40    CONTINUE
+         BERR( J ) = S
+*
+*        Test stopping criterion. Continue iterating if
+*           1) The residual BERR(J) is larger than machine epsilon, and
+*           2) BERR(J) decreased by at least a factor of 2 during the
+*              last iteration, and
+*           3) At most ITMAX iterations tried.
+*
+         IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND.
+     $       COUNT.LE.ITMAX ) THEN
+*
+*           Update solution and try again.
+*
+            CALL SPTTRS( N, 1, DF, EF, WORK( N+1 ), N, INFO )
+            CALL SAXPY( N, ONE, WORK( N+1 ), 1, X( 1, J ), 1 )
+            LSTRES = BERR( J )
+            COUNT = COUNT + 1
+            GO TO 20
+         END IF
+*
+*        Bound error from formula
+*
+*        norm(X - XTRUE) / norm(X) .le. FERR =
+*        norm( abs(inv(A))*
+*           ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X)
+*
+*        where
+*          norm(Z) is the magnitude of the largest component of Z
+*          inv(A) is the inverse of A
+*          abs(Z) is the componentwise absolute value of the matrix or
+*             vector Z
+*          NZ is the maximum number of nonzeros in any row of A, plus 1
+*          EPS is machine epsilon
+*
+*        The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B))
+*        is incremented by SAFE1 if the i-th component of
+*        abs(A)*abs(X) + abs(B) is less than SAFE2.
+*
+         DO 50 I = 1, N
+            IF( WORK( I ).GT.SAFE2 ) THEN
+               WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I )
+            ELSE
+               WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + SAFE1
+            END IF
+   50    CONTINUE
+         IX = ISAMAX( N, WORK, 1 )
+         FERR( J ) = WORK( IX )
+*
+*        Estimate the norm of inv(A).
+*
+*        Solve M(A) * x = e, where M(A) = (m(i,j)) is given by
+*
+*           m(i,j) =  abs(A(i,j)), i = j,
+*           m(i,j) = -abs(A(i,j)), i .ne. j,
+*
+*        and e = [ 1, 1, ..., 1 ]'.  Note M(A) = M(L)*D*M(L)'.
+*
+*        Solve M(L) * x = e.
+*
+         WORK( 1 ) = ONE
+         DO 60 I = 2, N
+            WORK( I ) = ONE + WORK( I-1 )*ABS( EF( I-1 ) )
+   60    CONTINUE
+*
+*        Solve D * M(L)' * x = b.
+*
+         WORK( N ) = WORK( N ) / DF( N )
+         DO 70 I = N - 1, 1, -1
+            WORK( I ) = WORK( I ) / DF( I ) + WORK( I+1 )*ABS( EF( I ) )
+   70    CONTINUE
+*
+*        Compute norm(inv(A)) = max(x(i)), 1<=i<=n.
+*
+         IX = ISAMAX( N, WORK, 1 )
+         FERR( J ) = FERR( J )*ABS( WORK( IX ) )
+*
+*        Normalize error.
+*
+         LSTRES = ZERO
+         DO 80 I = 1, N
+            LSTRES = MAX( LSTRES, ABS( X( I, J ) ) )
+   80    CONTINUE
+         IF( LSTRES.NE.ZERO )
+     $      FERR( J ) = FERR( J ) / LSTRES
+*
+   90 CONTINUE
+*
+      RETURN
+*
+*     End of SPTRFS
+*
+      END
diff --git a/costa/native/external/lapack/sptsv.f b/costa/native/external/lapack/sptsv.f
new file mode 100644
index 000000000..01942c1d4
--- /dev/null
+++ b/costa/native/external/lapack/sptsv.f
@@ -0,0 +1,100 @@
+      SUBROUTINE SPTSV( N, NRHS, D, E, B, LDB, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 25, 1997
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDB, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      REAL               B( LDB, * ), D( * ), E( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SPTSV computes the solution to a real system of linear equations
+*  A*X = B, where A is an N-by-N symmetric positive definite tridiagonal
+*  matrix, and X and B are N-by-NRHS matrices.
+*
+*  A is factored as A = L*D*L**T, and the factored form of A is then
+*  used to solve the system of equations.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  D       (input/output) REAL array, dimension (N)
+*          On entry, the n diagonal elements of the tridiagonal matrix
+*          A.  On exit, the n diagonal elements of the diagonal matrix
+*          D from the factorization A = L*D*L**T.
+*
+*  E       (input/output) REAL array, dimension (N-1)
+*          On entry, the (n-1) subdiagonal elements of the tridiagonal
+*          matrix A.  On exit, the (n-1) subdiagonal elements of the
+*          unit bidiagonal factor L from the L*D*L**T factorization of
+*          A.  (E can also be regarded as the superdiagonal of the unit
+*          bidiagonal factor U from the U**T*D*U factorization of A.)
+*
+*  B       (input/output) REAL array, dimension (LDB,NRHS)
+*          On entry, the N-by-NRHS right hand side matrix B.
+*          On exit, if INFO = 0, the N-by-NRHS solution matrix X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, the leading minor of order i is not
+*                positive definite, and the solution has not been
+*                computed.  The factorization has not been completed
+*                unless i = N.
+*
+*  =====================================================================
+*
+*     .. External Subroutines ..
+      EXTERNAL           SPTTRF, SPTTRS, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( N.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -6
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SPTSV ', -INFO )
+         RETURN
+      END IF
+*
+*     Compute the L*D*L' (or U'*D*U) factorization of A.
+*
+      CALL SPTTRF( N, D, E, INFO )
+      IF( INFO.EQ.0 ) THEN
+*
+*        Solve the system A*X = B, overwriting B with X.
+*
+         CALL SPTTRS( N, NRHS, D, E, B, LDB, INFO )
+      END IF
+      RETURN
+*
+*     End of SPTSV
+*
+      END
diff --git a/costa/native/external/lapack/sptsvx.f b/costa/native/external/lapack/sptsvx.f
new file mode 100644
index 000000000..278ccbfc7
--- /dev/null
+++ b/costa/native/external/lapack/sptsvx.f
@@ -0,0 +1,235 @@
+      SUBROUTINE SPTSVX( FACT, N, NRHS, D, E, DF, EF, B, LDB, X, LDX,
+     $                   RCOND, FERR, BERR, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          FACT
+      INTEGER            INFO, LDB, LDX, N, NRHS
+      REAL               RCOND
+*     ..
+*     .. Array Arguments ..
+      REAL               B( LDB, * ), BERR( * ), D( * ), DF( * ),
+     $                   E( * ), EF( * ), FERR( * ), WORK( * ),
+     $                   X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SPTSVX uses the factorization A = L*D*L**T to compute the solution
+*  to a real system of linear equations A*X = B, where A is an N-by-N
+*  symmetric positive definite tridiagonal matrix and X and B are
+*  N-by-NRHS matrices.
+*
+*  Error bounds on the solution and a condition estimate are also
+*  provided.
+*
+*  Description
+*  ===========
+*
+*  The following steps are performed:
+*
+*  1. If FACT = 'N', the matrix A is factored as A = L*D*L**T, where L
+*     is a unit lower bidiagonal matrix and D is diagonal.  The
+*     factorization can also be regarded as having the form
+*     A = U**T*D*U.
+*
+*  2. If the leading i-by-i principal minor is not positive definite,
+*     then the routine returns with INFO = i. Otherwise, the factored
+*     form of A is used to estimate the condition number of the matrix
+*     A.  If the reciprocal of the condition number is less than machine
+*     precision, INFO = N+1 is returned as a warning, but the routine
+*     still goes on to solve for X and compute error bounds as
+*     described below.
+*
+*  3. The system of equations is solved for X using the factored form
+*     of A.
+*
+*  4. Iterative refinement is applied to improve the computed solution
+*     matrix and calculate error bounds and backward error estimates
+*     for it.
+*
+*  Arguments
+*  =========
+*
+*  FACT    (input) CHARACTER*1
+*          Specifies whether or not the factored form of A has been
+*          supplied on entry.
+*          = 'F':  On entry, DF and EF contain the factored form of A.
+*                  D, E, DF, and EF will not be modified.
+*          = 'N':  The matrix A will be copied to DF and EF and
+*                  factored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrices B and X.  NRHS >= 0.
+*
+*  D       (input) REAL array, dimension (N)
+*          The n diagonal elements of the tridiagonal matrix A.
+*
+*  E       (input) REAL array, dimension (N-1)
+*          The (n-1) subdiagonal elements of the tridiagonal matrix A.
+*
+*  DF      (input or output) REAL array, dimension (N)
+*          If FACT = 'F', then DF is an input argument and on entry
+*          contains the n diagonal elements of the diagonal matrix D
+*          from the L*D*L**T factorization of A.
+*          If FACT = 'N', then DF is an output argument and on exit
+*          contains the n diagonal elements of the diagonal matrix D
+*          from the L*D*L**T factorization of A.
+*
+*  EF      (input or output) REAL array, dimension (N-1)
+*          If FACT = 'F', then EF is an input argument and on entry
+*          contains the (n-1) subdiagonal elements of the unit
+*          bidiagonal factor L from the L*D*L**T factorization of A.
+*          If FACT = 'N', then EF is an output argument and on exit
+*          contains the (n-1) subdiagonal elements of the unit
+*          bidiagonal factor L from the L*D*L**T factorization of A.
+*
+*  B       (input) REAL array, dimension (LDB,NRHS)
+*          The N-by-NRHS right hand side matrix B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  X       (output) REAL array, dimension (LDX,NRHS)
+*          If INFO = 0 of INFO = N+1, the N-by-NRHS solution matrix X.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(1,N).
+*
+*  RCOND   (output) REAL
+*          The reciprocal condition number of the matrix A.  If RCOND
+*          is less than the machine precision (in particular, if
+*          RCOND = 0), the matrix is singular to working precision.
+*          This condition is indicated by a return code of INFO > 0.
+*
+*  FERR    (output) REAL array, dimension (NRHS)
+*          The forward error bound for each solution vector
+*          X(j) (the j-th column of the solution matrix X).
+*          If XTRUE is the true solution corresponding to X(j), FERR(j)
+*          is an estimated upper bound for the magnitude of the largest
+*          element in (X(j) - XTRUE) divided by the magnitude of the
+*          largest element in X(j).
+*
+*  BERR    (output) REAL array, dimension (NRHS)
+*          The componentwise relative backward error of each solution
+*          vector X(j) (i.e., the smallest relative change in any
+*          element of A or B that makes X(j) an exact solution).
+*
+*  WORK    (workspace) REAL array, dimension (2*N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, and i is
+*                <= N:  the leading minor of order i of A is
+*                       not positive definite, so the factorization
+*                       could not be completed, and the solution has not
+*                       been computed. RCOND = 0 is returned.
+*                = N+1: U is nonsingular, but RCOND is less than machine
+*                       precision, meaning that the matrix is singular
+*                       to working precision.  Nevertheless, the
+*                       solution and error bounds are computed because
+*                       there are a number of situations where the
+*                       computed solution can be more accurate than the
+*                       value of RCOND would suggest.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO
+      PARAMETER          ( ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOFACT
+      REAL               ANORM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               SLAMCH, SLANST
+      EXTERNAL           LSAME, SLAMCH, SLANST
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SCOPY, SLACPY, SPTCON, SPTRFS, SPTTRF, SPTTRS,
+     $                   XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      NOFACT = LSAME( FACT, 'N' )
+      IF( .NOT.NOFACT .AND. .NOT.LSAME( FACT, 'F' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -9
+      ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+         INFO = -11
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SPTSVX', -INFO )
+         RETURN
+      END IF
+*
+      IF( NOFACT ) THEN
+*
+*        Compute the L*D*L' (or U'*D*U) factorization of A.
+*
+         CALL SCOPY( N, D, 1, DF, 1 )
+         IF( N.GT.1 )
+     $      CALL SCOPY( N-1, E, 1, EF, 1 )
+         CALL SPTTRF( N, DF, EF, INFO )
+*
+*        Return if INFO is non-zero.
+*
+         IF( INFO.NE.0 ) THEN
+            IF( INFO.GT.0 )
+     $         RCOND = ZERO
+            RETURN
+         END IF
+      END IF
+*
+*     Compute the norm of the matrix A.
+*
+      ANORM = SLANST( '1', N, D, E )
+*
+*     Compute the reciprocal of the condition number of A.
+*
+      CALL SPTCON( N, DF, EF, ANORM, RCOND, WORK, INFO )
+*
+*     Set INFO = N+1 if the matrix is singular to working precision.
+*
+      IF( RCOND.LT.SLAMCH( 'Epsilon' ) )
+     $   INFO = N + 1
+*
+*     Compute the solution vectors X.
+*
+      CALL SLACPY( 'Full', N, NRHS, B, LDB, X, LDX )
+      CALL SPTTRS( N, NRHS, DF, EF, X, LDX, INFO )
+*
+*     Use iterative refinement to improve the computed solutions and
+*     compute error bounds and backward error estimates for them.
+*
+      CALL SPTRFS( N, NRHS, D, E, DF, EF, B, LDB, X, LDX, FERR, BERR,
+     $             WORK, INFO )
+*
+      RETURN
+*
+*     End of SPTSVX
+*
+      END
diff --git a/costa/native/external/lapack/spttrf.f b/costa/native/external/lapack/spttrf.f
new file mode 100644
index 000000000..978ad2b88
--- /dev/null
+++ b/costa/native/external/lapack/spttrf.f
@@ -0,0 +1,153 @@
+      SUBROUTINE SPTTRF( N, D, E, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, N
+*     ..
+*     .. Array Arguments ..
+      REAL               D( * ), E( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SPTTRF computes the L*D*L' factorization of a real symmetric
+*  positive definite tridiagonal matrix A.  The factorization may also
+*  be regarded as having the form A = U'*D*U.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  D       (input/output) REAL array, dimension (N)
+*          On entry, the n diagonal elements of the tridiagonal matrix
+*          A.  On exit, the n diagonal elements of the diagonal matrix
+*          D from the L*D*L' factorization of A.
+*
+*  E       (input/output) REAL array, dimension (N-1)
+*          On entry, the (n-1) subdiagonal elements of the tridiagonal
+*          matrix A.  On exit, the (n-1) subdiagonal elements of the
+*          unit bidiagonal factor L from the L*D*L' factorization of A.
+*          E can also be regarded as the superdiagonal of the unit
+*          bidiagonal factor U from the U'*D*U factorization of A.
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -k, the k-th argument had an illegal value
+*          > 0: if INFO = k, the leading minor of order k is not
+*               positive definite; if k < N, the factorization could not
+*               be completed, while if k = N, the factorization was
+*               completed, but D(N) = 0.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO
+      PARAMETER          ( ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, I4
+      REAL               EI
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MOD
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( N.LT.0 ) THEN
+         INFO = -1
+         CALL XERBLA( 'SPTTRF', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Compute the L*D*L' (or U'*D*U) factorization of A.
+*
+      I4 = MOD( N-1, 4 )
+      DO 10 I = 1, I4
+         IF( D( I ).LE.ZERO ) THEN
+            INFO = I
+            GO TO 30
+         END IF
+         EI = E( I )
+         E( I ) = EI / D( I )
+         D( I+1 ) = D( I+1 ) - E( I )*EI
+   10 CONTINUE
+*
+      DO 20 I = I4 + 1, N - 4, 4
+*
+*        Drop out of the loop if d(i) <= 0: the matrix is not positive
+*        definite.
+*
+         IF( D( I ).LE.ZERO ) THEN
+            INFO = I
+            GO TO 30
+         END IF
+*
+*        Solve for e(i) and d(i+1).
+*
+         EI = E( I )
+         E( I ) = EI / D( I )
+         D( I+1 ) = D( I+1 ) - E( I )*EI
+*
+         IF( D( I+1 ).LE.ZERO ) THEN
+            INFO = I + 1
+            GO TO 30
+         END IF
+*
+*        Solve for e(i+1) and d(i+2).
+*
+         EI = E( I+1 )
+         E( I+1 ) = EI / D( I+1 )
+         D( I+2 ) = D( I+2 ) - E( I+1 )*EI
+*
+         IF( D( I+2 ).LE.ZERO ) THEN
+            INFO = I + 2
+            GO TO 30
+         END IF
+*
+*        Solve for e(i+2) and d(i+3).
+*
+         EI = E( I+2 )
+         E( I+2 ) = EI / D( I+2 )
+         D( I+3 ) = D( I+3 ) - E( I+2 )*EI
+*
+         IF( D( I+3 ).LE.ZERO ) THEN
+            INFO = I + 3
+            GO TO 30
+         END IF
+*
+*        Solve for e(i+3) and d(i+4).
+*
+         EI = E( I+3 )
+         E( I+3 ) = EI / D( I+3 )
+         D( I+4 ) = D( I+4 ) - E( I+3 )*EI
+   20 CONTINUE
+*
+*     Check d(n) for positive definiteness.
+*
+      IF( D( N ).LE.ZERO )
+     $   INFO = N
+*
+   30 CONTINUE
+      RETURN
+*
+*     End of SPTTRF
+*
+      END
diff --git a/costa/native/external/lapack/spttrs.f b/costa/native/external/lapack/spttrs.f
new file mode 100644
index 000000000..e6b08432f
--- /dev/null
+++ b/costa/native/external/lapack/spttrs.f
@@ -0,0 +1,115 @@
+      SUBROUTINE SPTTRS( N, NRHS, D, E, B, LDB, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDB, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      REAL               B( LDB, * ), D( * ), E( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SPTTRS solves a tridiagonal system of the form
+*     A * X = B
+*  using the L*D*L' factorization of A computed by SPTTRF.  D is a
+*  diagonal matrix specified in the vector D, L is a unit bidiagonal
+*  matrix whose subdiagonal is specified in the vector E, and X and B
+*  are N by NRHS matrices.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The order of the tridiagonal matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  D       (input) REAL array, dimension (N)
+*          The n diagonal elements of the diagonal matrix D from the
+*          L*D*L' factorization of A.
+*
+*  E       (input) REAL array, dimension (N-1)
+*          The (n-1) subdiagonal elements of the unit bidiagonal factor
+*          L from the L*D*L' factorization of A.  E can also be regarded
+*          as the superdiagonal of the unit bidiagonal factor U from the
+*          factorization A = U'*D*U.
+*
+*  B       (input/output) REAL array, dimension (LDB,NRHS)
+*          On entry, the right hand side vectors B for the system of
+*          linear equations.
+*          On exit, the solution vectors, X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -k, the k-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      INTEGER            J, JB, NB
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SPTTS2, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments.
+*
+      INFO = 0
+      IF( N.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -6
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SPTTRS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 )
+     $   RETURN
+*
+*     Determine the number of right-hand sides to solve at a time.
+*
+      IF( NRHS.EQ.1 ) THEN
+         NB = 1
+      ELSE
+         NB = MAX( 1, ILAENV( 1, 'SPTTRS', ' ', N, NRHS, -1, -1 ) )
+      END IF
+*
+      IF( NB.GE.NRHS ) THEN
+         CALL SPTTS2( N, NRHS, D, E, B, LDB )
+      ELSE
+         DO 10 J = 1, NRHS, NB
+            JB = MIN( NRHS-J+1, NB )
+            CALL SPTTS2( N, JB, D, E, B( 1, J ), LDB )
+   10    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of SPTTRS
+*
+      END
diff --git a/costa/native/external/lapack/sptts2.f b/costa/native/external/lapack/sptts2.f
new file mode 100644
index 000000000..df5244515
--- /dev/null
+++ b/costa/native/external/lapack/sptts2.f
@@ -0,0 +1,94 @@
+      SUBROUTINE SPTTS2( N, NRHS, D, E, B, LDB )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            LDB, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      REAL               B( LDB, * ), D( * ), E( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SPTTS2 solves a tridiagonal system of the form
+*     A * X = B
+*  using the L*D*L' factorization of A computed by SPTTRF.  D is a
+*  diagonal matrix specified in the vector D, L is a unit bidiagonal
+*  matrix whose subdiagonal is specified in the vector E, and X and B
+*  are N by NRHS matrices.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The order of the tridiagonal matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  D       (input) REAL array, dimension (N)
+*          The n diagonal elements of the diagonal matrix D from the
+*          L*D*L' factorization of A.
+*
+*  E       (input) REAL array, dimension (N-1)
+*          The (n-1) subdiagonal elements of the unit bidiagonal factor
+*          L from the L*D*L' factorization of A.  E can also be regarded
+*          as the superdiagonal of the unit bidiagonal factor U from the
+*          factorization A = U'*D*U.
+*
+*  B       (input/output) REAL array, dimension (LDB,NRHS)
+*          On entry, the right hand side vectors B for the system of
+*          linear equations.
+*          On exit, the solution vectors, X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      INTEGER            I, J
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SSCAL
+*     ..
+*     .. Executable Statements ..
+*
+*     Quick return if possible
+*
+      IF( N.LE.1 ) THEN
+         IF( N.EQ.1 )
+     $      CALL SSCAL( NRHS, 1. / D( 1 ), B, LDB )
+         RETURN
+      END IF
+*
+*     Solve A * X = B using the factorization A = L*D*L',
+*     overwriting each right hand side vector with its solution.
+*
+      DO 30 J = 1, NRHS
+*
+*           Solve L * x = b.
+*
+         DO 10 I = 2, N
+            B( I, J ) = B( I, J ) - B( I-1, J )*E( I-1 )
+   10    CONTINUE
+*
+*           Solve D * L' * x = b.
+*
+         B( N, J ) = B( N, J ) / D( N )
+         DO 20 I = N - 1, 1, -1
+            B( I, J ) = B( I, J ) / D( I ) - B( I+1, J )*E( I )
+   20    CONTINUE
+   30 CONTINUE
+*
+      RETURN
+*
+*     End of SPTTS2
+*
+      END
diff --git a/costa/native/external/lapack/srscl.f b/costa/native/external/lapack/srscl.f
new file mode 100644
index 000000000..a61c0021b
--- /dev/null
+++ b/costa/native/external/lapack/srscl.f
@@ -0,0 +1,115 @@
+      SUBROUTINE SRSCL( N, SA, SX, INCX )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      INTEGER            INCX, N
+      REAL               SA
+*     ..
+*     .. Array Arguments ..
+      REAL               SX( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SRSCL multiplies an n-element real vector x by the real scalar 1/a.
+*  This is done without overflow or underflow as long as
+*  the final result x/a does not overflow or underflow.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The number of components of the vector x.
+*
+*  SA      (input) REAL
+*          The scalar a which is used to divide each component of x.
+*          SA must be >= 0, or the subroutine will divide by zero.
+*
+*  SX      (input/output) REAL array, dimension
+*                         (1+(N-1)*abs(INCX))
+*          The n-element vector x.
+*
+*  INCX    (input) INTEGER
+*          The increment between successive values of the vector SX.
+*          > 0:  SX(1) = X(1) and SX(1+(i-1)*INCX) = x(i),     1< i<= n
+*
+* =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            DONE
+      REAL               BIGNUM, CDEN, CDEN1, CNUM, CNUM1, MUL, SMLNUM
+*     ..
+*     .. External Functions ..
+      REAL               SLAMCH
+      EXTERNAL           SLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLABAD, SSCAL
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS
+*     ..
+*     .. Executable Statements ..
+*
+*     Quick return if possible
+*
+      IF( N.LE.0 )
+     $   RETURN
+*
+*     Get machine parameters
+*
+      SMLNUM = SLAMCH( 'S' )
+      BIGNUM = ONE / SMLNUM
+      CALL SLABAD( SMLNUM, BIGNUM )
+*
+*     Initialize the denominator to SA and the numerator to 1.
+*
+      CDEN = SA
+      CNUM = ONE
+*
+   10 CONTINUE
+      CDEN1 = CDEN*SMLNUM
+      CNUM1 = CNUM / BIGNUM
+      IF( ABS( CDEN1 ).GT.ABS( CNUM ) .AND. CNUM.NE.ZERO ) THEN
+*
+*        Pre-multiply X by SMLNUM if CDEN is large compared to CNUM.
+*
+         MUL = SMLNUM
+         DONE = .FALSE.
+         CDEN = CDEN1
+      ELSE IF( ABS( CNUM1 ).GT.ABS( CDEN ) ) THEN
+*
+*        Pre-multiply X by BIGNUM if CDEN is small compared to CNUM.
+*
+         MUL = BIGNUM
+         DONE = .FALSE.
+         CNUM = CNUM1
+      ELSE
+*
+*        Multiply X by CNUM / CDEN and return.
+*
+         MUL = CNUM / CDEN
+         DONE = .TRUE.
+      END IF
+*
+*     Scale the vector X by MUL
+*
+      CALL SSCAL( N, MUL, SX, INCX )
+*
+      IF( .NOT.DONE )
+     $   GO TO 10
+*
+      RETURN
+*
+*     End of SRSCL
+*
+      END
diff --git a/costa/native/external/lapack/ssbev.f b/costa/native/external/lapack/ssbev.f
new file mode 100644
index 000000000..694adf023
--- /dev/null
+++ b/costa/native/external/lapack/ssbev.f
@@ -0,0 +1,206 @@
+      SUBROUTINE SSBEV( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, WORK,
+     $                  INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBZ, UPLO
+      INTEGER            INFO, KD, LDAB, LDZ, N
+*     ..
+*     .. Array Arguments ..
+      REAL               AB( LDAB, * ), W( * ), WORK( * ), Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SSBEV computes all the eigenvalues and, optionally, eigenvectors of
+*  a real symmetric band matrix A.
+*
+*  Arguments
+*  =========
+*
+*  JOBZ    (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only;
+*          = 'V':  Compute eigenvalues and eigenvectors.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  KD      (input) INTEGER
+*          The number of superdiagonals of the matrix A if UPLO = 'U',
+*          or the number of subdiagonals if UPLO = 'L'.  KD >= 0.
+*
+*  AB      (input/output) REAL array, dimension (LDAB, N)
+*          On entry, the upper or lower triangle of the symmetric band
+*          matrix A, stored in the first KD+1 rows of the array.  The
+*          j-th column of A is stored in the j-th column of the array AB
+*          as follows:
+*          if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j;
+*          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+kd).
+*
+*          On exit, AB is overwritten by values generated during the
+*          reduction to tridiagonal form.  If UPLO = 'U', the first
+*          superdiagonal and the diagonal of the tridiagonal matrix T
+*          are returned in rows KD and KD+1 of AB, and if UPLO = 'L',
+*          the diagonal and first subdiagonal of T are returned in the
+*          first two rows of AB.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KD + 1.
+*
+*  W       (output) REAL array, dimension (N)
+*          If INFO = 0, the eigenvalues in ascending order.
+*
+*  Z       (output) REAL array, dimension (LDZ, N)
+*          If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal
+*          eigenvectors of the matrix A, with the i-th column of Z
+*          holding the eigenvector associated with W(i).
+*          If JOBZ = 'N', then Z is not referenced.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= 1, and if
+*          JOBZ = 'V', LDZ >= max(1,N).
+*
+*  WORK    (workspace) REAL array, dimension (max(1,3*N-2))
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, the algorithm failed to converge; i
+*                off-diagonal elements of an intermediate tridiagonal
+*                form did not converge to zero.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E0, ONE = 1.0E0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LOWER, WANTZ
+      INTEGER            IINFO, IMAX, INDE, INDWRK, ISCALE
+      REAL               ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA,
+     $                   SMLNUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               SLAMCH, SLANSB
+      EXTERNAL           LSAME, SLAMCH, SLANSB
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLASCL, SSBTRD, SSCAL, SSTEQR, SSTERF, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      WANTZ = LSAME( JOBZ, 'V' )
+      LOWER = LSAME( UPLO, 'L' )
+*
+      INFO = 0
+      IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( KD.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDAB.LT.KD+1 ) THEN
+         INFO = -6
+      ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN
+         INFO = -9
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SSBEV ', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      IF( N.EQ.1 ) THEN
+         IF( LOWER ) THEN
+            W( 1 ) = AB( 1, 1 )
+         ELSE
+            W( 1 ) = AB( KD+1, 1 )
+         END IF
+         IF( WANTZ )
+     $      Z( 1, 1 ) = ONE
+         RETURN
+      END IF
+*
+*     Get machine constants.
+*
+      SAFMIN = SLAMCH( 'Safe minimum' )
+      EPS = SLAMCH( 'Precision' )
+      SMLNUM = SAFMIN / EPS
+      BIGNUM = ONE / SMLNUM
+      RMIN = SQRT( SMLNUM )
+      RMAX = SQRT( BIGNUM )
+*
+*     Scale matrix to allowable range, if necessary.
+*
+      ANRM = SLANSB( 'M', UPLO, N, KD, AB, LDAB, WORK )
+      ISCALE = 0
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN
+         ISCALE = 1
+         SIGMA = RMIN / ANRM
+      ELSE IF( ANRM.GT.RMAX ) THEN
+         ISCALE = 1
+         SIGMA = RMAX / ANRM
+      END IF
+      IF( ISCALE.EQ.1 ) THEN
+         IF( LOWER ) THEN
+            CALL SLASCL( 'B', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO )
+         ELSE
+            CALL SLASCL( 'Q', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO )
+         END IF
+      END IF
+*
+*     Call SSBTRD to reduce symmetric band matrix to tridiagonal form.
+*
+      INDE = 1
+      INDWRK = INDE + N
+      CALL SSBTRD( JOBZ, UPLO, N, KD, AB, LDAB, W, WORK( INDE ), Z, LDZ,
+     $             WORK( INDWRK ), IINFO )
+*
+*     For eigenvalues only, call SSTERF.  For eigenvectors, call SSTEQR.
+*
+      IF( .NOT.WANTZ ) THEN
+         CALL SSTERF( N, W, WORK( INDE ), INFO )
+      ELSE
+         CALL SSTEQR( JOBZ, N, W, WORK( INDE ), Z, LDZ, WORK( INDWRK ),
+     $                INFO )
+      END IF
+*
+*     If matrix was scaled, then rescale eigenvalues appropriately.
+*
+      IF( ISCALE.EQ.1 ) THEN
+         IF( INFO.EQ.0 ) THEN
+            IMAX = N
+         ELSE
+            IMAX = INFO - 1
+         END IF
+         CALL SSCAL( IMAX, ONE / SIGMA, W, 1 )
+      END IF
+*
+      RETURN
+*
+*     End of SSBEV
+*
+      END
diff --git a/costa/native/external/lapack/ssbevd.f b/costa/native/external/lapack/ssbevd.f
new file mode 100644
index 000000000..d217793d6
--- /dev/null
+++ b/costa/native/external/lapack/ssbevd.f
@@ -0,0 +1,265 @@
+      SUBROUTINE SSBEVD( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, WORK,
+     $                   LWORK, IWORK, LIWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBZ, UPLO
+      INTEGER            INFO, KD, LDAB, LDZ, LIWORK, LWORK, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      REAL               AB( LDAB, * ), W( * ), WORK( * ), Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SSBEVD computes all the eigenvalues and, optionally, eigenvectors of
+*  a real symmetric band matrix A. If eigenvectors are desired, it uses
+*  a divide and conquer algorithm.
+*
+*  The divide and conquer algorithm makes very mild assumptions about
+*  floating point arithmetic. It will work on machines with a guard
+*  digit in add/subtract, or on those binary machines without guard
+*  digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
+*  Cray-2. It could conceivably fail on hexadecimal or decimal machines
+*  without guard digits, but we know of none.
+*
+*  Arguments
+*  =========
+*
+*  JOBZ    (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only;
+*          = 'V':  Compute eigenvalues and eigenvectors.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  KD      (input) INTEGER
+*          The number of superdiagonals of the matrix A if UPLO = 'U',
+*          or the number of subdiagonals if UPLO = 'L'.  KD >= 0.
+*
+*  AB      (input/output) REAL array, dimension (LDAB, N)
+*          On entry, the upper or lower triangle of the symmetric band
+*          matrix A, stored in the first KD+1 rows of the array.  The
+*          j-th column of A is stored in the j-th column of the array AB
+*          as follows:
+*          if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j;
+*          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+kd).
+*
+*          On exit, AB is overwritten by values generated during the
+*          reduction to tridiagonal form.  If UPLO = 'U', the first
+*          superdiagonal and the diagonal of the tridiagonal matrix T
+*          are returned in rows KD and KD+1 of AB, and if UPLO = 'L',
+*          the diagonal and first subdiagonal of T are returned in the
+*          first two rows of AB.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KD + 1.
+*
+*  W       (output) REAL array, dimension (N)
+*          If INFO = 0, the eigenvalues in ascending order.
+*
+*  Z       (output) REAL array, dimension (LDZ, N)
+*          If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal
+*          eigenvectors of the matrix A, with the i-th column of Z
+*          holding the eigenvector associated with W(i).
+*          If JOBZ = 'N', then Z is not referenced.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= 1, and if
+*          JOBZ = 'V', LDZ >= max(1,N).
+*
+*  WORK    (workspace/output) REAL array,
+*                                         dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.
+*          IF N <= 1,                LWORK must be at least 1.
+*          If JOBZ  = 'N' and N > 2, LWORK must be at least 2*N.
+*          If JOBZ  = 'V' and N > 2, LWORK must be at least
+*                         ( 1 + 5*N + 2*N**2 ).
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  IWORK   (workspace/output) INTEGER array, dimension (LIWORK)
+*          On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK.
+*
+*  LIWORK  (input) INTEGER
+*          The dimension of the array LIWORK.
+*          If JOBZ  = 'N' or N <= 1, LIWORK must be at least 1.
+*          If JOBZ  = 'V' and N > 2, LIWORK must be at least 3 + 5*N.
+*
+*          If LIWORK = -1, then a workspace query is assumed; the
+*          routine only calculates the optimal size of the IWORK array,
+*          returns this value as the first entry of the IWORK array, and
+*          no error message related to LIWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, the algorithm failed to converge; i
+*                off-diagonal elements of an intermediate tridiagonal
+*                form did not converge to zero.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LOWER, LQUERY, WANTZ
+      INTEGER            IINFO, INDE, INDWK2, INDWRK, ISCALE, LIWMIN,
+     $                   LLWRK2, LWMIN
+      REAL               ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA,
+     $                   SMLNUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               SLAMCH, SLANSB
+      EXTERNAL           LSAME, SLAMCH, SLANSB
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SGEMM, SLACPY, SLASCL, SSBTRD, SSCAL, SSTEDC,
+     $                   SSTERF, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      WANTZ = LSAME( JOBZ, 'V' )
+      LOWER = LSAME( UPLO, 'L' )
+      LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 )
+*
+      INFO = 0
+      IF( N.LE.1 ) THEN
+         LIWMIN = 1
+         LWMIN = 1
+      ELSE
+         IF( WANTZ ) THEN
+            LIWMIN = 3 + 5*N
+            LWMIN = 1 + 5*N + 2*N**2
+         ELSE
+            LIWMIN = 1
+            LWMIN = 2*N
+         END IF
+      END IF
+      IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( KD.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDAB.LT.KD+1 ) THEN
+         INFO = -6
+      ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN
+         INFO = -9
+      ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -11
+      ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -13
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         WORK( 1 ) = LWMIN
+         IWORK( 1 ) = LIWMIN
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SSBEVD', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN 
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN 
+*
+      IF( N.EQ.1 ) THEN
+         W( 1 ) = AB( 1, 1 )
+         IF( WANTZ )
+     $      Z( 1, 1 ) = ONE
+         RETURN 
+      END IF
+*
+*     Get machine constants.
+*
+      SAFMIN = SLAMCH( 'Safe minimum' )
+      EPS = SLAMCH( 'Precision' )
+      SMLNUM = SAFMIN / EPS
+      BIGNUM = ONE / SMLNUM
+      RMIN = SQRT( SMLNUM )
+      RMAX = SQRT( BIGNUM )
+*
+*     Scale matrix to allowable range, if necessary.
+*
+      ANRM = SLANSB( 'M', UPLO, N, KD, AB, LDAB, WORK )
+      ISCALE = 0
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN
+         ISCALE = 1
+         SIGMA = RMIN / ANRM
+      ELSE IF( ANRM.GT.RMAX ) THEN
+         ISCALE = 1
+         SIGMA = RMAX / ANRM
+      END IF
+      IF( ISCALE.EQ.1 ) THEN
+         IF( LOWER ) THEN
+            CALL SLASCL( 'B', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO )
+         ELSE
+            CALL SLASCL( 'Q', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO )
+         END IF
+      END IF
+*
+*     Call SSBTRD to reduce symmetric band matrix to tridiagonal form.
+*
+      INDE = 1
+      INDWRK = INDE + N
+      INDWK2 = INDWRK + N*N
+      LLWRK2 = LWORK - INDWK2 + 1
+      CALL SSBTRD( JOBZ, UPLO, N, KD, AB, LDAB, W, WORK( INDE ), Z, LDZ,
+     $             WORK( INDWRK ), IINFO )
+*
+*     For eigenvalues only, call SSTERF.  For eigenvectors, call SSTEDC.
+*
+      IF( .NOT.WANTZ ) THEN
+         CALL SSTERF( N, W, WORK( INDE ), INFO )
+      ELSE
+         CALL SSTEDC( 'I', N, W, WORK( INDE ), WORK( INDWRK ), N,
+     $                WORK( INDWK2 ), LLWRK2, IWORK, LIWORK, INFO )
+         CALL SGEMM( 'N', 'N', N, N, N, ONE, Z, LDZ, WORK( INDWRK ), N,
+     $               ZERO, WORK( INDWK2 ), N )
+         CALL SLACPY( 'A', N, N, WORK( INDWK2 ), N, Z, LDZ )
+      END IF
+*
+*     If matrix was scaled, then rescale eigenvalues appropriately.
+*
+      IF( ISCALE.EQ.1 )
+     $   CALL SSCAL( N, ONE / SIGMA, W, 1 )
+*
+      WORK( 1 ) = LWMIN
+      IWORK( 1 ) = LIWMIN
+      RETURN
+*
+*     End of SSBEVD
+*
+      END
diff --git a/costa/native/external/lapack/ssbevx.f b/costa/native/external/lapack/ssbevx.f
new file mode 100644
index 000000000..333e44592
--- /dev/null
+++ b/costa/native/external/lapack/ssbevx.f
@@ -0,0 +1,411 @@
+      SUBROUTINE SSBEVX( JOBZ, RANGE, UPLO, N, KD, AB, LDAB, Q, LDQ, VL,
+     $                   VU, IL, IU, ABSTOL, M, W, Z, LDZ, WORK, IWORK,
+     $                   IFAIL, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBZ, RANGE, UPLO
+      INTEGER            IL, INFO, IU, KD, LDAB, LDQ, LDZ, M, N
+      REAL               ABSTOL, VL, VU
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IFAIL( * ), IWORK( * )
+      REAL               AB( LDAB, * ), Q( LDQ, * ), W( * ), WORK( * ),
+     $                   Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SSBEVX computes selected eigenvalues and, optionally, eigenvectors
+*  of a real symmetric band matrix A.  Eigenvalues and eigenvectors can
+*  be selected by specifying either a range of values or a range of
+*  indices for the desired eigenvalues.
+*
+*  Arguments
+*  =========
+*
+*  JOBZ    (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only;
+*          = 'V':  Compute eigenvalues and eigenvectors.
+*
+*  RANGE   (input) CHARACTER*1
+*          = 'A': all eigenvalues will be found;
+*          = 'V': all eigenvalues in the half-open interval (VL,VU]
+*                 will be found;
+*          = 'I': the IL-th through IU-th eigenvalues will be found.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  KD      (input) INTEGER
+*          The number of superdiagonals of the matrix A if UPLO = 'U',
+*          or the number of subdiagonals if UPLO = 'L'.  KD >= 0.
+*
+*  AB      (input/output) REAL array, dimension (LDAB, N)
+*          On entry, the upper or lower triangle of the symmetric band
+*          matrix A, stored in the first KD+1 rows of the array.  The
+*          j-th column of A is stored in the j-th column of the array AB
+*          as follows:
+*          if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j;
+*          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+kd).
+*
+*          On exit, AB is overwritten by values generated during the
+*          reduction to tridiagonal form.  If UPLO = 'U', the first
+*          superdiagonal and the diagonal of the tridiagonal matrix T
+*          are returned in rows KD and KD+1 of AB, and if UPLO = 'L',
+*          the diagonal and first subdiagonal of T are returned in the
+*          first two rows of AB.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KD + 1.
+*
+*  Q       (output) REAL array, dimension (LDQ, N)
+*          If JOBZ = 'V', the N-by-N orthogonal matrix used in the
+*                         reduction to tridiagonal form.
+*          If JOBZ = 'N', the array Q is not referenced.
+*
+*  LDQ     (input) INTEGER
+*          The leading dimension of the array Q.  If JOBZ = 'V', then
+*          LDQ >= max(1,N).
+*
+*  VL      (input) REAL
+*  VU      (input) REAL
+*          If RANGE='V', the lower and upper bounds of the interval to
+*          be searched for eigenvalues. VL < VU.
+*          Not referenced if RANGE = 'A' or 'I'.
+*
+*  IL      (input) INTEGER
+*  IU      (input) INTEGER
+*          If RANGE='I', the indices (in ascending order) of the
+*          smallest and largest eigenvalues to be returned.
+*          1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0.
+*          Not referenced if RANGE = 'A' or 'V'.
+*
+*  ABSTOL  (input) REAL
+*          The absolute error tolerance for the eigenvalues.
+*          An approximate eigenvalue is accepted as converged
+*          when it is determined to lie in an interval [a,b]
+*          of width less than or equal to
+*
+*                  ABSTOL + EPS *   max( |a|,|b| ) ,
+*
+*          where EPS is the machine precision.  If ABSTOL is less than
+*          or equal to zero, then  EPS*|T|  will be used in its place,
+*          where |T| is the 1-norm of the tridiagonal matrix obtained
+*          by reducing AB to tridiagonal form.
+*
+*          Eigenvalues will be computed most accurately when ABSTOL is
+*          set to twice the underflow threshold 2*SLAMCH('S'), not zero.
+*          If this routine returns with INFO>0, indicating that some
+*          eigenvectors did not converge, try setting ABSTOL to
+*          2*SLAMCH('S').
+*
+*          See "Computing Small Singular Values of Bidiagonal Matrices
+*          with Guaranteed High Relative Accuracy," by Demmel and
+*          Kahan, LAPACK Working Note #3.
+*
+*  M       (output) INTEGER
+*          The total number of eigenvalues found.  0 <= M <= N.
+*          If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1.
+*
+*  W       (output) REAL array, dimension (N)
+*          The first M elements contain the selected eigenvalues in
+*          ascending order.
+*
+*  Z       (output) REAL array, dimension (LDZ, max(1,M))
+*          If JOBZ = 'V', then if INFO = 0, the first M columns of Z
+*          contain the orthonormal eigenvectors of the matrix A
+*          corresponding to the selected eigenvalues, with the i-th
+*          column of Z holding the eigenvector associated with W(i).
+*          If an eigenvector fails to converge, then that column of Z
+*          contains the latest approximation to the eigenvector, and the
+*          index of the eigenvector is returned in IFAIL.
+*          If JOBZ = 'N', then Z is not referenced.
+*          Note: the user must ensure that at least max(1,M) columns are
+*          supplied in the array Z; if RANGE = 'V', the exact value of M
+*          is not known in advance and an upper bound must be used.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= 1, and if
+*          JOBZ = 'V', LDZ >= max(1,N).
+*
+*  WORK    (workspace) REAL array, dimension (7*N)
+*
+*  IWORK   (workspace) INTEGER array, dimension (5*N)
+*
+*  IFAIL   (output) INTEGER array, dimension (N)
+*          If JOBZ = 'V', then if INFO = 0, the first M elements of
+*          IFAIL are zero.  If INFO > 0, then IFAIL contains the
+*          indices of the eigenvectors that failed to converge.
+*          If JOBZ = 'N', then IFAIL is not referenced.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          > 0:  if INFO = i, then i eigenvectors failed to converge.
+*                Their indices are stored in array IFAIL.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E0, ONE = 1.0E0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            ALLEIG, INDEIG, LOWER, VALEIG, WANTZ
+      CHARACTER          ORDER
+      INTEGER            I, IINFO, IMAX, INDD, INDE, INDEE, INDIBL,
+     $                   INDISP, INDIWO, INDWRK, ISCALE, ITMP1, J, JJ,
+     $                   NSPLIT
+      REAL               ABSTLL, ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN,
+     $                   SIGMA, SMLNUM, TMP1, VLL, VUU
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               SLAMCH, SLANSB
+      EXTERNAL           LSAME, SLAMCH, SLANSB
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SCOPY, SGEMV, SLACPY, SLASCL, SSBTRD, SSCAL,
+     $                   SSTEBZ, SSTEIN, SSTEQR, SSTERF, SSWAP, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      WANTZ = LSAME( JOBZ, 'V' )
+      ALLEIG = LSAME( RANGE, 'A' )
+      VALEIG = LSAME( RANGE, 'V' )
+      INDEIG = LSAME( RANGE, 'I' )
+      LOWER = LSAME( UPLO, 'L' )
+*
+      INFO = 0
+      IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( KD.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( LDAB.LT.KD+1 ) THEN
+         INFO = -7
+      ELSE IF( WANTZ .AND. LDQ.LT.MAX( 1, N ) ) THEN
+         INFO = -9
+      ELSE
+         IF( VALEIG ) THEN
+            IF( N.GT.0 .AND. VU.LE.VL )
+     $         INFO = -11
+         ELSE IF( INDEIG ) THEN
+            IF( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) THEN
+               INFO = -12
+            ELSE IF( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN
+               INFO = -13
+            END IF
+         END IF
+      END IF
+      IF( INFO.EQ.0 ) THEN
+         IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) )
+     $     INFO = -18
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SSBEVX', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      M = 0
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      IF( N.EQ.1 ) THEN
+         M = 1
+         IF( LOWER ) THEN
+            TMP1 = AB( 1, 1 )
+         ELSE
+            TMP1 = AB( KD+1, 1 )
+         END IF
+         IF( VALEIG ) THEN
+            IF( .NOT.( VL.LT.TMP1 .AND. VU.GE.TMP1 ) )
+     $         M = 0
+         END IF
+         IF( M.EQ.1 ) THEN
+            W( 1 ) = TMP1
+            IF( WANTZ )
+     $         Z( 1, 1 ) = ONE
+         END IF
+         RETURN
+      END IF
+*
+*     Get machine constants.
+*
+      SAFMIN = SLAMCH( 'Safe minimum' )
+      EPS = SLAMCH( 'Precision' )
+      SMLNUM = SAFMIN / EPS
+      BIGNUM = ONE / SMLNUM
+      RMIN = SQRT( SMLNUM )
+      RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) )
+*
+*     Scale matrix to allowable range, if necessary.
+*
+      ISCALE = 0
+      ABSTLL = ABSTOL
+      IF ( VALEIG ) THEN
+         VLL = VL
+         VUU = VU
+      ELSE
+         VLL = ZERO
+         VUU = ZERO
+      ENDIF
+      ANRM = SLANSB( 'M', UPLO, N, KD, AB, LDAB, WORK )
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN
+         ISCALE = 1
+         SIGMA = RMIN / ANRM
+      ELSE IF( ANRM.GT.RMAX ) THEN
+         ISCALE = 1
+         SIGMA = RMAX / ANRM
+      END IF
+      IF( ISCALE.EQ.1 ) THEN
+         IF( LOWER ) THEN
+            CALL SLASCL( 'B', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO )
+         ELSE
+            CALL SLASCL( 'Q', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO )
+         END IF
+         IF( ABSTOL.GT.0 )
+     $      ABSTLL = ABSTOL*SIGMA
+         IF( VALEIG ) THEN
+            VLL = VL*SIGMA
+            VUU = VU*SIGMA
+         END IF
+      END IF
+*
+*     Call SSBTRD to reduce symmetric band matrix to tridiagonal form.
+*
+      INDD = 1
+      INDE = INDD + N
+      INDWRK = INDE + N
+      CALL SSBTRD( JOBZ, UPLO, N, KD, AB, LDAB, WORK( INDD ),
+     $             WORK( INDE ), Q, LDQ, WORK( INDWRK ), IINFO )
+*
+*     If all eigenvalues are desired and ABSTOL is less than or equal
+*     to zero, then call SSTERF or SSTEQR.  If this fails for some
+*     eigenvalue, then try SSTEBZ.
+*
+      IF( ( ALLEIG .OR. ( INDEIG .AND. IL.EQ.1 .AND. IU.EQ.N ) ) .AND.
+     $    ( ABSTOL.LE.ZERO ) ) THEN
+         CALL SCOPY( N, WORK( INDD ), 1, W, 1 )
+         INDEE = INDWRK + 2*N
+         IF( .NOT.WANTZ ) THEN
+            CALL SCOPY( N-1, WORK( INDE ), 1, WORK( INDEE ), 1 )
+            CALL SSTERF( N, W, WORK( INDEE ), INFO )
+         ELSE
+            CALL SLACPY( 'A', N, N, Q, LDQ, Z, LDZ )
+            CALL SCOPY( N-1, WORK( INDE ), 1, WORK( INDEE ), 1 )
+            CALL SSTEQR( JOBZ, N, W, WORK( INDEE ), Z, LDZ,
+     $                   WORK( INDWRK ), INFO )
+            IF( INFO.EQ.0 ) THEN
+               DO 10 I = 1, N
+                  IFAIL( I ) = 0
+   10          CONTINUE
+            END IF
+         END IF
+         IF( INFO.EQ.0 ) THEN
+            M = N
+            GO TO 30
+         END IF
+         INFO = 0
+      END IF
+*
+*     Otherwise, call SSTEBZ and, if eigenvectors are desired, SSTEIN.
+*
+      IF( WANTZ ) THEN
+         ORDER = 'B'
+      ELSE
+         ORDER = 'E'
+      END IF
+      INDIBL = 1
+      INDISP = INDIBL + N
+      INDIWO = INDISP + N
+      CALL SSTEBZ( RANGE, ORDER, N, VLL, VUU, IL, IU, ABSTLL,
+     $             WORK( INDD ), WORK( INDE ), M, NSPLIT, W,
+     $             IWORK( INDIBL ), IWORK( INDISP ), WORK( INDWRK ),
+     $             IWORK( INDIWO ), INFO )
+*
+      IF( WANTZ ) THEN
+         CALL SSTEIN( N, WORK( INDD ), WORK( INDE ), M, W,
+     $                IWORK( INDIBL ), IWORK( INDISP ), Z, LDZ,
+     $                WORK( INDWRK ), IWORK( INDIWO ), IFAIL, INFO )
+*
+*        Apply orthogonal matrix used in reduction to tridiagonal
+*        form to eigenvectors returned by SSTEIN.
+*
+         DO 20 J = 1, M
+            CALL SCOPY( N, Z( 1, J ), 1, WORK( 1 ), 1 )
+            CALL SGEMV( 'N', N, N, ONE, Q, LDQ, WORK, 1, ZERO,
+     $                  Z( 1, J ), 1 )
+   20    CONTINUE
+      END IF
+*
+*     If matrix was scaled, then rescale eigenvalues appropriately.
+*
+   30 CONTINUE
+      IF( ISCALE.EQ.1 ) THEN
+         IF( INFO.EQ.0 ) THEN
+            IMAX = M
+         ELSE
+            IMAX = INFO - 1
+         END IF
+         CALL SSCAL( IMAX, ONE / SIGMA, W, 1 )
+      END IF
+*
+*     If eigenvalues are not in order, then sort them, along with
+*     eigenvectors.
+*
+      IF( WANTZ ) THEN
+         DO 50 J = 1, M - 1
+            I = 0
+            TMP1 = W( J )
+            DO 40 JJ = J + 1, M
+               IF( W( JJ ).LT.TMP1 ) THEN
+                  I = JJ
+                  TMP1 = W( JJ )
+               END IF
+   40       CONTINUE
+*
+            IF( I.NE.0 ) THEN
+               ITMP1 = IWORK( INDIBL+I-1 )
+               W( I ) = W( J )
+               IWORK( INDIBL+I-1 ) = IWORK( INDIBL+J-1 )
+               W( J ) = TMP1
+               IWORK( INDIBL+J-1 ) = ITMP1
+               CALL SSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 )
+               IF( INFO.NE.0 ) THEN
+                  ITMP1 = IFAIL( I )
+                  IFAIL( I ) = IFAIL( J )
+                  IFAIL( J ) = ITMP1
+               END IF
+            END IF
+   50    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of SSBEVX
+*
+      END
diff --git a/costa/native/external/lapack/ssbgst.f b/costa/native/external/lapack/ssbgst.f
new file mode 100644
index 000000000..915a30aee
--- /dev/null
+++ b/costa/native/external/lapack/ssbgst.f
@@ -0,0 +1,1346 @@
+      SUBROUTINE SSBGST( VECT, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, X,
+     $                   LDX, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO, VECT
+      INTEGER            INFO, KA, KB, LDAB, LDBB, LDX, N
+*     ..
+*     .. Array Arguments ..
+      REAL               AB( LDAB, * ), BB( LDBB, * ), WORK( * ),
+     $                   X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SSBGST reduces a real symmetric-definite banded generalized
+*  eigenproblem  A*x = lambda*B*x  to standard form  C*y = lambda*y,
+*  such that C has the same bandwidth as A.
+*
+*  B must have been previously factorized as S**T*S by SPBSTF, using a
+*  split Cholesky factorization. A is overwritten by C = X**T*A*X, where
+*  X = S**(-1)*Q and Q is an orthogonal matrix chosen to preserve the
+*  bandwidth of A.
+*
+*  Arguments
+*  =========
+*
+*  VECT    (input) CHARACTER*1
+*          = 'N':  do not form the transformation matrix X;
+*          = 'V':  form X.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrices A and B.  N >= 0.
+*
+*  KA      (input) INTEGER
+*          The number of superdiagonals of the matrix A if UPLO = 'U',
+*          or the number of subdiagonals if UPLO = 'L'.  KA >= 0.
+*
+*  KB      (input) INTEGER
+*          The number of superdiagonals of the matrix B if UPLO = 'U',
+*          or the number of subdiagonals if UPLO = 'L'.  KA >= KB >= 0.
+*
+*  AB      (input/output) REAL array, dimension (LDAB,N)
+*          On entry, the upper or lower triangle of the symmetric band
+*          matrix A, stored in the first ka+1 rows of the array.  The
+*          j-th column of A is stored in the j-th column of the array AB
+*          as follows:
+*          if UPLO = 'U', AB(ka+1+i-j,j) = A(i,j) for max(1,j-ka)<=i<=j;
+*          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+ka).
+*
+*          On exit, the transformed matrix X**T*A*X, stored in the same
+*          format as A.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KA+1.
+*
+*  BB      (input) REAL array, dimension (LDBB,N)
+*          The banded factor S from the split Cholesky factorization of
+*          B, as returned by SPBSTF, stored in the first KB+1 rows of
+*          the array.
+*
+*  LDBB    (input) INTEGER
+*          The leading dimension of the array BB.  LDBB >= KB+1.
+*
+*  X       (output) REAL array, dimension (LDX,N)
+*          If VECT = 'V', the n-by-n matrix X.
+*          If VECT = 'N', the array X is not referenced.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.
+*          LDX >= max(1,N) if VECT = 'V'; LDX >= 1 otherwise.
+*
+*  WORK    (workspace) REAL array, dimension (2*N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPDATE, UPPER, WANTX
+      INTEGER            I, I0, I1, I2, INCA, J, J1, J1T, J2, J2T, K,
+     $                   KA1, KB1, KBT, L, M, NR, NRT, NX
+      REAL               BII, RA, RA1, T
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SGER, SLAR2V, SLARGV, SLARTG, SLARTV, SLASET,
+     $                   SROT, SSCAL, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters
+*
+      WANTX = LSAME( VECT, 'V' )
+      UPPER = LSAME( UPLO, 'U' )
+      KA1 = KA + 1
+      KB1 = KB + 1
+      INFO = 0
+      IF( .NOT.WANTX .AND. .NOT.LSAME( VECT, 'N' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( KA.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( KB.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( LDAB.LT.KA+1 ) THEN
+         INFO = -7
+      ELSE IF( LDBB.LT.KB+1 ) THEN
+         INFO = -9
+      ELSE IF( LDX.LT.1 .OR. WANTX .AND. LDX.LT.MAX( 1, N ) ) THEN
+         INFO = -11
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SSBGST', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      INCA = LDAB*KA1
+*
+*     Initialize X to the unit matrix, if needed
+*
+      IF( WANTX )
+     $   CALL SLASET( 'Full', N, N, ZERO, ONE, X, LDX )
+*
+*     Set M to the splitting point m. It must be the same value as is
+*     used in SPBSTF. The chosen value allows the arrays WORK and RWORK
+*     to be of dimension (N).
+*
+      M = ( N+KB ) / 2
+*
+*     The routine works in two phases, corresponding to the two halves
+*     of the split Cholesky factorization of B as S**T*S where
+*
+*     S = ( U    )
+*         ( M  L )
+*
+*     with U upper triangular of order m, and L lower triangular of
+*     order n-m. S has the same bandwidth as B.
+*
+*     S is treated as a product of elementary matrices:
+*
+*     S = S(m)*S(m-1)*...*S(2)*S(1)*S(m+1)*S(m+2)*...*S(n-1)*S(n)
+*
+*     where S(i) is determined by the i-th row of S.
+*
+*     In phase 1, the index i takes the values n, n-1, ... , m+1;
+*     in phase 2, it takes the values 1, 2, ... , m.
+*
+*     For each value of i, the current matrix A is updated by forming
+*     inv(S(i))**T*A*inv(S(i)). This creates a triangular bulge outside
+*     the band of A. The bulge is then pushed down toward the bottom of
+*     A in phase 1, and up toward the top of A in phase 2, by applying
+*     plane rotations.
+*
+*     There are kb*(kb+1)/2 elements in the bulge, but at most 2*kb-1
+*     of them are linearly independent, so annihilating a bulge requires
+*     only 2*kb-1 plane rotations. The rotations are divided into a 1st
+*     set of kb-1 rotations, and a 2nd set of kb rotations.
+*
+*     Wherever possible, rotations are generated and applied in vector
+*     operations of length NR between the indices J1 and J2 (sometimes
+*     replaced by modified values NRT, J1T or J2T).
+*
+*     The cosines and sines of the rotations are stored in the array
+*     WORK. The cosines of the 1st set of rotations are stored in
+*     elements n+2:n+m-kb-1 and the sines of the 1st set in elements
+*     2:m-kb-1; the cosines of the 2nd set are stored in elements
+*     n+m-kb+1:2*n and the sines of the second set in elements m-kb+1:n.
+*
+*     The bulges are not formed explicitly; nonzero elements outside the
+*     band are created only when they are required for generating new
+*     rotations; they are stored in the array WORK, in positions where
+*     they are later overwritten by the sines of the rotations which
+*     annihilate them.
+*
+*     **************************** Phase 1 *****************************
+*
+*     The logical structure of this phase is:
+*
+*     UPDATE = .TRUE.
+*     DO I = N, M + 1, -1
+*        use S(i) to update A and create a new bulge
+*        apply rotations to push all bulges KA positions downward
+*     END DO
+*     UPDATE = .FALSE.
+*     DO I = M + KA + 1, N - 1
+*        apply rotations to push all bulges KA positions downward
+*     END DO
+*
+*     To avoid duplicating code, the two loops are merged.
+*
+      UPDATE = .TRUE.
+      I = N + 1
+   10 CONTINUE
+      IF( UPDATE ) THEN
+         I = I - 1
+         KBT = MIN( KB, I-1 )
+         I0 = I - 1
+         I1 = MIN( N, I+KA )
+         I2 = I - KBT + KA1
+         IF( I.LT.M+1 ) THEN
+            UPDATE = .FALSE.
+            I = I + 1
+            I0 = M
+            IF( KA.EQ.0 )
+     $         GO TO 480
+            GO TO 10
+         END IF
+      ELSE
+         I = I + KA
+         IF( I.GT.N-1 )
+     $      GO TO 480
+      END IF
+*
+      IF( UPPER ) THEN
+*
+*        Transform A, working with the upper triangle
+*
+         IF( UPDATE ) THEN
+*
+*           Form  inv(S(i))**T * A * inv(S(i))
+*
+            BII = BB( KB1, I )
+            DO 20 J = I, I1
+               AB( I-J+KA1, J ) = AB( I-J+KA1, J ) / BII
+   20       CONTINUE
+            DO 30 J = MAX( 1, I-KA ), I
+               AB( J-I+KA1, I ) = AB( J-I+KA1, I ) / BII
+   30       CONTINUE
+            DO 60 K = I - KBT, I - 1
+               DO 40 J = I - KBT, K
+                  AB( J-K+KA1, K ) = AB( J-K+KA1, K ) -
+     $                               BB( J-I+KB1, I )*AB( K-I+KA1, I ) -
+     $                               BB( K-I+KB1, I )*AB( J-I+KA1, I ) +
+     $                               AB( KA1, I )*BB( J-I+KB1, I )*
+     $                               BB( K-I+KB1, I )
+   40          CONTINUE
+               DO 50 J = MAX( 1, I-KA ), I - KBT - 1
+                  AB( J-K+KA1, K ) = AB( J-K+KA1, K ) -
+     $                               BB( K-I+KB1, I )*AB( J-I+KA1, I )
+   50          CONTINUE
+   60       CONTINUE
+            DO 80 J = I, I1
+               DO 70 K = MAX( J-KA, I-KBT ), I - 1
+                  AB( K-J+KA1, J ) = AB( K-J+KA1, J ) -
+     $                               BB( K-I+KB1, I )*AB( I-J+KA1, J )
+   70          CONTINUE
+   80       CONTINUE
+*
+            IF( WANTX ) THEN
+*
+*              post-multiply X by inv(S(i))
+*
+               CALL SSCAL( N-M, ONE / BII, X( M+1, I ), 1 )
+               IF( KBT.GT.0 )
+     $            CALL SGER( N-M, KBT, -ONE, X( M+1, I ), 1,
+     $                       BB( KB1-KBT, I ), 1, X( M+1, I-KBT ), LDX )
+            END IF
+*
+*           store a(i,i1) in RA1 for use in next loop over K
+*
+            RA1 = AB( I-I1+KA1, I1 )
+         END IF
+*
+*        Generate and apply vectors of rotations to chase all the
+*        existing bulges KA positions down toward the bottom of the
+*        band
+*
+         DO 130 K = 1, KB - 1
+            IF( UPDATE ) THEN
+*
+*              Determine the rotations which would annihilate the bulge
+*              which has in theory just been created
+*
+               IF( I-K+KA.LT.N .AND. I-K.GT.1 ) THEN
+*
+*                 generate rotation to annihilate a(i,i-k+ka+1)
+*
+                  CALL SLARTG( AB( K+1, I-K+KA ), RA1,
+     $                         WORK( N+I-K+KA-M ), WORK( I-K+KA-M ),
+     $                         RA )
+*
+*                 create nonzero element a(i-k,i-k+ka+1) outside the
+*                 band and store it in WORK(i-k)
+*
+                  T = -BB( KB1-K, I )*RA1
+                  WORK( I-K ) = WORK( N+I-K+KA-M )*T -
+     $                          WORK( I-K+KA-M )*AB( 1, I-K+KA )
+                  AB( 1, I-K+KA ) = WORK( I-K+KA-M )*T +
+     $                              WORK( N+I-K+KA-M )*AB( 1, I-K+KA )
+                  RA1 = RA
+               END IF
+            END IF
+            J2 = I - K - 1 + MAX( 1, K-I0+2 )*KA1
+            NR = ( N-J2+KA ) / KA1
+            J1 = J2 + ( NR-1 )*KA1
+            IF( UPDATE ) THEN
+               J2T = MAX( J2, I+2*KA-K+1 )
+            ELSE
+               J2T = J2
+            END IF
+            NRT = ( N-J2T+KA ) / KA1
+            DO 90 J = J2T, J1, KA1
+*
+*              create nonzero element a(j-ka,j+1) outside the band
+*              and store it in WORK(j-m)
+*
+               WORK( J-M ) = WORK( J-M )*AB( 1, J+1 )
+               AB( 1, J+1 ) = WORK( N+J-M )*AB( 1, J+1 )
+   90       CONTINUE
+*
+*           generate rotations in 1st set to annihilate elements which
+*           have been created outside the band
+*
+            IF( NRT.GT.0 )
+     $         CALL SLARGV( NRT, AB( 1, J2T ), INCA, WORK( J2T-M ), KA1,
+     $                      WORK( N+J2T-M ), KA1 )
+            IF( NR.GT.0 ) THEN
+*
+*              apply rotations in 1st set from the right
+*
+               DO 100 L = 1, KA - 1
+                  CALL SLARTV( NR, AB( KA1-L, J2 ), INCA,
+     $                         AB( KA-L, J2+1 ), INCA, WORK( N+J2-M ),
+     $                         WORK( J2-M ), KA1 )
+  100          CONTINUE
+*
+*              apply rotations in 1st set from both sides to diagonal
+*              blocks
+*
+               CALL SLAR2V( NR, AB( KA1, J2 ), AB( KA1, J2+1 ),
+     $                      AB( KA, J2+1 ), INCA, WORK( N+J2-M ),
+     $                      WORK( J2-M ), KA1 )
+*
+            END IF
+*
+*           start applying rotations in 1st set from the left
+*
+            DO 110 L = KA - 1, KB - K + 1, -1
+               NRT = ( N-J2+L ) / KA1
+               IF( NRT.GT.0 )
+     $            CALL SLARTV( NRT, AB( L, J2+KA1-L ), INCA,
+     $                         AB( L+1, J2+KA1-L ), INCA,
+     $                         WORK( N+J2-M ), WORK( J2-M ), KA1 )
+  110       CONTINUE
+*
+            IF( WANTX ) THEN
+*
+*              post-multiply X by product of rotations in 1st set
+*
+               DO 120 J = J2, J1, KA1
+                  CALL SROT( N-M, X( M+1, J ), 1, X( M+1, J+1 ), 1,
+     $                       WORK( N+J-M ), WORK( J-M ) )
+  120          CONTINUE
+            END IF
+  130    CONTINUE
+*
+         IF( UPDATE ) THEN
+            IF( I2.LE.N .AND. KBT.GT.0 ) THEN
+*
+*              create nonzero element a(i-kbt,i-kbt+ka+1) outside the
+*              band and store it in WORK(i-kbt)
+*
+               WORK( I-KBT ) = -BB( KB1-KBT, I )*RA1
+            END IF
+         END IF
+*
+         DO 170 K = KB, 1, -1
+            IF( UPDATE ) THEN
+               J2 = I - K - 1 + MAX( 2, K-I0+1 )*KA1
+            ELSE
+               J2 = I - K - 1 + MAX( 1, K-I0+1 )*KA1
+            END IF
+*
+*           finish applying rotations in 2nd set from the left
+*
+            DO 140 L = KB - K, 1, -1
+               NRT = ( N-J2+KA+L ) / KA1
+               IF( NRT.GT.0 )
+     $            CALL SLARTV( NRT, AB( L, J2-L+1 ), INCA,
+     $                         AB( L+1, J2-L+1 ), INCA, WORK( N+J2-KA ),
+     $                         WORK( J2-KA ), KA1 )
+  140       CONTINUE
+            NR = ( N-J2+KA ) / KA1
+            J1 = J2 + ( NR-1 )*KA1
+            DO 150 J = J1, J2, -KA1
+               WORK( J ) = WORK( J-KA )
+               WORK( N+J ) = WORK( N+J-KA )
+  150       CONTINUE
+            DO 160 J = J2, J1, KA1
+*
+*              create nonzero element a(j-ka,j+1) outside the band
+*              and store it in WORK(j)
+*
+               WORK( J ) = WORK( J )*AB( 1, J+1 )
+               AB( 1, J+1 ) = WORK( N+J )*AB( 1, J+1 )
+  160       CONTINUE
+            IF( UPDATE ) THEN
+               IF( I-K.LT.N-KA .AND. K.LE.KBT )
+     $            WORK( I-K+KA ) = WORK( I-K )
+            END IF
+  170    CONTINUE
+*
+         DO 210 K = KB, 1, -1
+            J2 = I - K - 1 + MAX( 1, K-I0+1 )*KA1
+            NR = ( N-J2+KA ) / KA1
+            J1 = J2 + ( NR-1 )*KA1
+            IF( NR.GT.0 ) THEN
+*
+*              generate rotations in 2nd set to annihilate elements
+*              which have been created outside the band
+*
+               CALL SLARGV( NR, AB( 1, J2 ), INCA, WORK( J2 ), KA1,
+     $                      WORK( N+J2 ), KA1 )
+*
+*              apply rotations in 2nd set from the right
+*
+               DO 180 L = 1, KA - 1
+                  CALL SLARTV( NR, AB( KA1-L, J2 ), INCA,
+     $                         AB( KA-L, J2+1 ), INCA, WORK( N+J2 ),
+     $                         WORK( J2 ), KA1 )
+  180          CONTINUE
+*
+*              apply rotations in 2nd set from both sides to diagonal
+*              blocks
+*
+               CALL SLAR2V( NR, AB( KA1, J2 ), AB( KA1, J2+1 ),
+     $                      AB( KA, J2+1 ), INCA, WORK( N+J2 ),
+     $                      WORK( J2 ), KA1 )
+*
+            END IF
+*
+*           start applying rotations in 2nd set from the left
+*
+            DO 190 L = KA - 1, KB - K + 1, -1
+               NRT = ( N-J2+L ) / KA1
+               IF( NRT.GT.0 )
+     $            CALL SLARTV( NRT, AB( L, J2+KA1-L ), INCA,
+     $                         AB( L+1, J2+KA1-L ), INCA, WORK( N+J2 ),
+     $                         WORK( J2 ), KA1 )
+  190       CONTINUE
+*
+            IF( WANTX ) THEN
+*
+*              post-multiply X by product of rotations in 2nd set
+*
+               DO 200 J = J2, J1, KA1
+                  CALL SROT( N-M, X( M+1, J ), 1, X( M+1, J+1 ), 1,
+     $                       WORK( N+J ), WORK( J ) )
+  200          CONTINUE
+            END IF
+  210    CONTINUE
+*
+         DO 230 K = 1, KB - 1
+            J2 = I - K - 1 + MAX( 1, K-I0+2 )*KA1
+*
+*           finish applying rotations in 1st set from the left
+*
+            DO 220 L = KB - K, 1, -1
+               NRT = ( N-J2+L ) / KA1
+               IF( NRT.GT.0 )
+     $            CALL SLARTV( NRT, AB( L, J2+KA1-L ), INCA,
+     $                         AB( L+1, J2+KA1-L ), INCA,
+     $                         WORK( N+J2-M ), WORK( J2-M ), KA1 )
+  220       CONTINUE
+  230    CONTINUE
+*
+         IF( KB.GT.1 ) THEN
+            DO 240 J = N - 1, I - KB + 2*KA + 1, -1
+               WORK( N+J-M ) = WORK( N+J-KA-M )
+               WORK( J-M ) = WORK( J-KA-M )
+  240       CONTINUE
+         END IF
+*
+      ELSE
+*
+*        Transform A, working with the lower triangle
+*
+         IF( UPDATE ) THEN
+*
+*           Form  inv(S(i))**T * A * inv(S(i))
+*
+            BII = BB( 1, I )
+            DO 250 J = I, I1
+               AB( J-I+1, I ) = AB( J-I+1, I ) / BII
+  250       CONTINUE
+            DO 260 J = MAX( 1, I-KA ), I
+               AB( I-J+1, J ) = AB( I-J+1, J ) / BII
+  260       CONTINUE
+            DO 290 K = I - KBT, I - 1
+               DO 270 J = I - KBT, K
+                  AB( K-J+1, J ) = AB( K-J+1, J ) -
+     $                             BB( I-J+1, J )*AB( I-K+1, K ) -
+     $                             BB( I-K+1, K )*AB( I-J+1, J ) +
+     $                             AB( 1, I )*BB( I-J+1, J )*
+     $                             BB( I-K+1, K )
+  270          CONTINUE
+               DO 280 J = MAX( 1, I-KA ), I - KBT - 1
+                  AB( K-J+1, J ) = AB( K-J+1, J ) -
+     $                             BB( I-K+1, K )*AB( I-J+1, J )
+  280          CONTINUE
+  290       CONTINUE
+            DO 310 J = I, I1
+               DO 300 K = MAX( J-KA, I-KBT ), I - 1
+                  AB( J-K+1, K ) = AB( J-K+1, K ) -
+     $                             BB( I-K+1, K )*AB( J-I+1, I )
+  300          CONTINUE
+  310       CONTINUE
+*
+            IF( WANTX ) THEN
+*
+*              post-multiply X by inv(S(i))
+*
+               CALL SSCAL( N-M, ONE / BII, X( M+1, I ), 1 )
+               IF( KBT.GT.0 )
+     $            CALL SGER( N-M, KBT, -ONE, X( M+1, I ), 1,
+     $                       BB( KBT+1, I-KBT ), LDBB-1,
+     $                       X( M+1, I-KBT ), LDX )
+            END IF
+*
+*           store a(i1,i) in RA1 for use in next loop over K
+*
+            RA1 = AB( I1-I+1, I )
+         END IF
+*
+*        Generate and apply vectors of rotations to chase all the
+*        existing bulges KA positions down toward the bottom of the
+*        band
+*
+         DO 360 K = 1, KB - 1
+            IF( UPDATE ) THEN
+*
+*              Determine the rotations which would annihilate the bulge
+*              which has in theory just been created
+*
+               IF( I-K+KA.LT.N .AND. I-K.GT.1 ) THEN
+*
+*                 generate rotation to annihilate a(i-k+ka+1,i)
+*
+                  CALL SLARTG( AB( KA1-K, I ), RA1, WORK( N+I-K+KA-M ),
+     $                         WORK( I-K+KA-M ), RA )
+*
+*                 create nonzero element a(i-k+ka+1,i-k) outside the
+*                 band and store it in WORK(i-k)
+*
+                  T = -BB( K+1, I-K )*RA1
+                  WORK( I-K ) = WORK( N+I-K+KA-M )*T -
+     $                          WORK( I-K+KA-M )*AB( KA1, I-K )
+                  AB( KA1, I-K ) = WORK( I-K+KA-M )*T +
+     $                             WORK( N+I-K+KA-M )*AB( KA1, I-K )
+                  RA1 = RA
+               END IF
+            END IF
+            J2 = I - K - 1 + MAX( 1, K-I0+2 )*KA1
+            NR = ( N-J2+KA ) / KA1
+            J1 = J2 + ( NR-1 )*KA1
+            IF( UPDATE ) THEN
+               J2T = MAX( J2, I+2*KA-K+1 )
+            ELSE
+               J2T = J2
+            END IF
+            NRT = ( N-J2T+KA ) / KA1
+            DO 320 J = J2T, J1, KA1
+*
+*              create nonzero element a(j+1,j-ka) outside the band
+*              and store it in WORK(j-m)
+*
+               WORK( J-M ) = WORK( J-M )*AB( KA1, J-KA+1 )
+               AB( KA1, J-KA+1 ) = WORK( N+J-M )*AB( KA1, J-KA+1 )
+  320       CONTINUE
+*
+*           generate rotations in 1st set to annihilate elements which
+*           have been created outside the band
+*
+            IF( NRT.GT.0 )
+     $         CALL SLARGV( NRT, AB( KA1, J2T-KA ), INCA, WORK( J2T-M ),
+     $                      KA1, WORK( N+J2T-M ), KA1 )
+            IF( NR.GT.0 ) THEN
+*
+*              apply rotations in 1st set from the left
+*
+               DO 330 L = 1, KA - 1
+                  CALL SLARTV( NR, AB( L+1, J2-L ), INCA,
+     $                         AB( L+2, J2-L ), INCA, WORK( N+J2-M ),
+     $                         WORK( J2-M ), KA1 )
+  330          CONTINUE
+*
+*              apply rotations in 1st set from both sides to diagonal
+*              blocks
+*
+               CALL SLAR2V( NR, AB( 1, J2 ), AB( 1, J2+1 ), AB( 2, J2 ),
+     $                      INCA, WORK( N+J2-M ), WORK( J2-M ), KA1 )
+*
+            END IF
+*
+*           start applying rotations in 1st set from the right
+*
+            DO 340 L = KA - 1, KB - K + 1, -1
+               NRT = ( N-J2+L ) / KA1
+               IF( NRT.GT.0 )
+     $            CALL SLARTV( NRT, AB( KA1-L+1, J2 ), INCA,
+     $                         AB( KA1-L, J2+1 ), INCA, WORK( N+J2-M ),
+     $                         WORK( J2-M ), KA1 )
+  340       CONTINUE
+*
+            IF( WANTX ) THEN
+*
+*              post-multiply X by product of rotations in 1st set
+*
+               DO 350 J = J2, J1, KA1
+                  CALL SROT( N-M, X( M+1, J ), 1, X( M+1, J+1 ), 1,
+     $                       WORK( N+J-M ), WORK( J-M ) )
+  350          CONTINUE
+            END IF
+  360    CONTINUE
+*
+         IF( UPDATE ) THEN
+            IF( I2.LE.N .AND. KBT.GT.0 ) THEN
+*
+*              create nonzero element a(i-kbt+ka+1,i-kbt) outside the
+*              band and store it in WORK(i-kbt)
+*
+               WORK( I-KBT ) = -BB( KBT+1, I-KBT )*RA1
+            END IF
+         END IF
+*
+         DO 400 K = KB, 1, -1
+            IF( UPDATE ) THEN
+               J2 = I - K - 1 + MAX( 2, K-I0+1 )*KA1
+            ELSE
+               J2 = I - K - 1 + MAX( 1, K-I0+1 )*KA1
+            END IF
+*
+*           finish applying rotations in 2nd set from the right
+*
+            DO 370 L = KB - K, 1, -1
+               NRT = ( N-J2+KA+L ) / KA1
+               IF( NRT.GT.0 )
+     $            CALL SLARTV( NRT, AB( KA1-L+1, J2-KA ), INCA,
+     $                         AB( KA1-L, J2-KA+1 ), INCA,
+     $                         WORK( N+J2-KA ), WORK( J2-KA ), KA1 )
+  370       CONTINUE
+            NR = ( N-J2+KA ) / KA1
+            J1 = J2 + ( NR-1 )*KA1
+            DO 380 J = J1, J2, -KA1
+               WORK( J ) = WORK( J-KA )
+               WORK( N+J ) = WORK( N+J-KA )
+  380       CONTINUE
+            DO 390 J = J2, J1, KA1
+*
+*              create nonzero element a(j+1,j-ka) outside the band
+*              and store it in WORK(j)
+*
+               WORK( J ) = WORK( J )*AB( KA1, J-KA+1 )
+               AB( KA1, J-KA+1 ) = WORK( N+J )*AB( KA1, J-KA+1 )
+  390       CONTINUE
+            IF( UPDATE ) THEN
+               IF( I-K.LT.N-KA .AND. K.LE.KBT )
+     $            WORK( I-K+KA ) = WORK( I-K )
+            END IF
+  400    CONTINUE
+*
+         DO 440 K = KB, 1, -1
+            J2 = I - K - 1 + MAX( 1, K-I0+1 )*KA1
+            NR = ( N-J2+KA ) / KA1
+            J1 = J2 + ( NR-1 )*KA1
+            IF( NR.GT.0 ) THEN
+*
+*              generate rotations in 2nd set to annihilate elements
+*              which have been created outside the band
+*
+               CALL SLARGV( NR, AB( KA1, J2-KA ), INCA, WORK( J2 ), KA1,
+     $                      WORK( N+J2 ), KA1 )
+*
+*              apply rotations in 2nd set from the left
+*
+               DO 410 L = 1, KA - 1
+                  CALL SLARTV( NR, AB( L+1, J2-L ), INCA,
+     $                         AB( L+2, J2-L ), INCA, WORK( N+J2 ),
+     $                         WORK( J2 ), KA1 )
+  410          CONTINUE
+*
+*              apply rotations in 2nd set from both sides to diagonal
+*              blocks
+*
+               CALL SLAR2V( NR, AB( 1, J2 ), AB( 1, J2+1 ), AB( 2, J2 ),
+     $                      INCA, WORK( N+J2 ), WORK( J2 ), KA1 )
+*
+            END IF
+*
+*           start applying rotations in 2nd set from the right
+*
+            DO 420 L = KA - 1, KB - K + 1, -1
+               NRT = ( N-J2+L ) / KA1
+               IF( NRT.GT.0 )
+     $            CALL SLARTV( NRT, AB( KA1-L+1, J2 ), INCA,
+     $                         AB( KA1-L, J2+1 ), INCA, WORK( N+J2 ),
+     $                         WORK( J2 ), KA1 )
+  420       CONTINUE
+*
+            IF( WANTX ) THEN
+*
+*              post-multiply X by product of rotations in 2nd set
+*
+               DO 430 J = J2, J1, KA1
+                  CALL SROT( N-M, X( M+1, J ), 1, X( M+1, J+1 ), 1,
+     $                       WORK( N+J ), WORK( J ) )
+  430          CONTINUE
+            END IF
+  440    CONTINUE
+*
+         DO 460 K = 1, KB - 1
+            J2 = I - K - 1 + MAX( 1, K-I0+2 )*KA1
+*
+*           finish applying rotations in 1st set from the right
+*
+            DO 450 L = KB - K, 1, -1
+               NRT = ( N-J2+L ) / KA1
+               IF( NRT.GT.0 )
+     $            CALL SLARTV( NRT, AB( KA1-L+1, J2 ), INCA,
+     $                         AB( KA1-L, J2+1 ), INCA, WORK( N+J2-M ),
+     $                         WORK( J2-M ), KA1 )
+  450       CONTINUE
+  460    CONTINUE
+*
+         IF( KB.GT.1 ) THEN
+            DO 470 J = N - 1, I - KB + 2*KA + 1, -1
+               WORK( N+J-M ) = WORK( N+J-KA-M )
+               WORK( J-M ) = WORK( J-KA-M )
+  470       CONTINUE
+         END IF
+*
+      END IF
+*
+      GO TO 10
+*
+  480 CONTINUE
+*
+*     **************************** Phase 2 *****************************
+*
+*     The logical structure of this phase is:
+*
+*     UPDATE = .TRUE.
+*     DO I = 1, M
+*        use S(i) to update A and create a new bulge
+*        apply rotations to push all bulges KA positions upward
+*     END DO
+*     UPDATE = .FALSE.
+*     DO I = M - KA - 1, 2, -1
+*        apply rotations to push all bulges KA positions upward
+*     END DO
+*
+*     To avoid duplicating code, the two loops are merged.
+*
+      UPDATE = .TRUE.
+      I = 0
+  490 CONTINUE
+      IF( UPDATE ) THEN
+         I = I + 1
+         KBT = MIN( KB, M-I )
+         I0 = I + 1
+         I1 = MAX( 1, I-KA )
+         I2 = I + KBT - KA1
+         IF( I.GT.M ) THEN
+            UPDATE = .FALSE.
+            I = I - 1
+            I0 = M + 1
+            IF( KA.EQ.0 )
+     $         RETURN
+            GO TO 490
+         END IF
+      ELSE
+         I = I - KA
+         IF( I.LT.2 )
+     $      RETURN
+      END IF
+*
+      IF( I.LT.M-KBT ) THEN
+         NX = M
+      ELSE
+         NX = N
+      END IF
+*
+      IF( UPPER ) THEN
+*
+*        Transform A, working with the upper triangle
+*
+         IF( UPDATE ) THEN
+*
+*           Form  inv(S(i))**T * A * inv(S(i))
+*
+            BII = BB( KB1, I )
+            DO 500 J = I1, I
+               AB( J-I+KA1, I ) = AB( J-I+KA1, I ) / BII
+  500       CONTINUE
+            DO 510 J = I, MIN( N, I+KA )
+               AB( I-J+KA1, J ) = AB( I-J+KA1, J ) / BII
+  510       CONTINUE
+            DO 540 K = I + 1, I + KBT
+               DO 520 J = K, I + KBT
+                  AB( K-J+KA1, J ) = AB( K-J+KA1, J ) -
+     $                               BB( I-J+KB1, J )*AB( I-K+KA1, K ) -
+     $                               BB( I-K+KB1, K )*AB( I-J+KA1, J ) +
+     $                               AB( KA1, I )*BB( I-J+KB1, J )*
+     $                               BB( I-K+KB1, K )
+  520          CONTINUE
+               DO 530 J = I + KBT + 1, MIN( N, I+KA )
+                  AB( K-J+KA1, J ) = AB( K-J+KA1, J ) -
+     $                               BB( I-K+KB1, K )*AB( I-J+KA1, J )
+  530          CONTINUE
+  540       CONTINUE
+            DO 560 J = I1, I
+               DO 550 K = I + 1, MIN( J+KA, I+KBT )
+                  AB( J-K+KA1, K ) = AB( J-K+KA1, K ) -
+     $                               BB( I-K+KB1, K )*AB( J-I+KA1, I )
+  550          CONTINUE
+  560       CONTINUE
+*
+            IF( WANTX ) THEN
+*
+*              post-multiply X by inv(S(i))
+*
+               CALL SSCAL( NX, ONE / BII, X( 1, I ), 1 )
+               IF( KBT.GT.0 )
+     $            CALL SGER( NX, KBT, -ONE, X( 1, I ), 1, BB( KB, I+1 ),
+     $                       LDBB-1, X( 1, I+1 ), LDX )
+            END IF
+*
+*           store a(i1,i) in RA1 for use in next loop over K
+*
+            RA1 = AB( I1-I+KA1, I )
+         END IF
+*
+*        Generate and apply vectors of rotations to chase all the
+*        existing bulges KA positions up toward the top of the band
+*
+         DO 610 K = 1, KB - 1
+            IF( UPDATE ) THEN
+*
+*              Determine the rotations which would annihilate the bulge
+*              which has in theory just been created
+*
+               IF( I+K-KA1.GT.0 .AND. I+K.LT.M ) THEN
+*
+*                 generate rotation to annihilate a(i+k-ka-1,i)
+*
+                  CALL SLARTG( AB( K+1, I ), RA1, WORK( N+I+K-KA ),
+     $                         WORK( I+K-KA ), RA )
+*
+*                 create nonzero element a(i+k-ka-1,i+k) outside the
+*                 band and store it in WORK(m-kb+i+k)
+*
+                  T = -BB( KB1-K, I+K )*RA1
+                  WORK( M-KB+I+K ) = WORK( N+I+K-KA )*T -
+     $                               WORK( I+K-KA )*AB( 1, I+K )
+                  AB( 1, I+K ) = WORK( I+K-KA )*T +
+     $                           WORK( N+I+K-KA )*AB( 1, I+K )
+                  RA1 = RA
+               END IF
+            END IF
+            J2 = I + K + 1 - MAX( 1, K+I0-M+1 )*KA1
+            NR = ( J2+KA-1 ) / KA1
+            J1 = J2 - ( NR-1 )*KA1
+            IF( UPDATE ) THEN
+               J2T = MIN( J2, I-2*KA+K-1 )
+            ELSE
+               J2T = J2
+            END IF
+            NRT = ( J2T+KA-1 ) / KA1
+            DO 570 J = J1, J2T, KA1
+*
+*              create nonzero element a(j-1,j+ka) outside the band
+*              and store it in WORK(j)
+*
+               WORK( J ) = WORK( J )*AB( 1, J+KA-1 )
+               AB( 1, J+KA-1 ) = WORK( N+J )*AB( 1, J+KA-1 )
+  570       CONTINUE
+*
+*           generate rotations in 1st set to annihilate elements which
+*           have been created outside the band
+*
+            IF( NRT.GT.0 )
+     $         CALL SLARGV( NRT, AB( 1, J1+KA ), INCA, WORK( J1 ), KA1,
+     $                      WORK( N+J1 ), KA1 )
+            IF( NR.GT.0 ) THEN
+*
+*              apply rotations in 1st set from the left
+*
+               DO 580 L = 1, KA - 1
+                  CALL SLARTV( NR, AB( KA1-L, J1+L ), INCA,
+     $                         AB( KA-L, J1+L ), INCA, WORK( N+J1 ),
+     $                         WORK( J1 ), KA1 )
+  580          CONTINUE
+*
+*              apply rotations in 1st set from both sides to diagonal
+*              blocks
+*
+               CALL SLAR2V( NR, AB( KA1, J1 ), AB( KA1, J1-1 ),
+     $                      AB( KA, J1 ), INCA, WORK( N+J1 ),
+     $                      WORK( J1 ), KA1 )
+*
+            END IF
+*
+*           start applying rotations in 1st set from the right
+*
+            DO 590 L = KA - 1, KB - K + 1, -1
+               NRT = ( J2+L-1 ) / KA1
+               J1T = J2 - ( NRT-1 )*KA1
+               IF( NRT.GT.0 )
+     $            CALL SLARTV( NRT, AB( L, J1T ), INCA,
+     $                         AB( L+1, J1T-1 ), INCA, WORK( N+J1T ),
+     $                         WORK( J1T ), KA1 )
+  590       CONTINUE
+*
+            IF( WANTX ) THEN
+*
+*              post-multiply X by product of rotations in 1st set
+*
+               DO 600 J = J1, J2, KA1
+                  CALL SROT( NX, X( 1, J ), 1, X( 1, J-1 ), 1,
+     $                       WORK( N+J ), WORK( J ) )
+  600          CONTINUE
+            END IF
+  610    CONTINUE
+*
+         IF( UPDATE ) THEN
+            IF( I2.GT.0 .AND. KBT.GT.0 ) THEN
+*
+*              create nonzero element a(i+kbt-ka-1,i+kbt) outside the
+*              band and store it in WORK(m-kb+i+kbt)
+*
+               WORK( M-KB+I+KBT ) = -BB( KB1-KBT, I+KBT )*RA1
+            END IF
+         END IF
+*
+         DO 650 K = KB, 1, -1
+            IF( UPDATE ) THEN
+               J2 = I + K + 1 - MAX( 2, K+I0-M )*KA1
+            ELSE
+               J2 = I + K + 1 - MAX( 1, K+I0-M )*KA1
+            END IF
+*
+*           finish applying rotations in 2nd set from the right
+*
+            DO 620 L = KB - K, 1, -1
+               NRT = ( J2+KA+L-1 ) / KA1
+               J1T = J2 - ( NRT-1 )*KA1
+               IF( NRT.GT.0 )
+     $            CALL SLARTV( NRT, AB( L, J1T+KA ), INCA,
+     $                         AB( L+1, J1T+KA-1 ), INCA,
+     $                         WORK( N+M-KB+J1T+KA ),
+     $                         WORK( M-KB+J1T+KA ), KA1 )
+  620       CONTINUE
+            NR = ( J2+KA-1 ) / KA1
+            J1 = J2 - ( NR-1 )*KA1
+            DO 630 J = J1, J2, KA1
+               WORK( M-KB+J ) = WORK( M-KB+J+KA )
+               WORK( N+M-KB+J ) = WORK( N+M-KB+J+KA )
+  630       CONTINUE
+            DO 640 J = J1, J2, KA1
+*
+*              create nonzero element a(j-1,j+ka) outside the band
+*              and store it in WORK(m-kb+j)
+*
+               WORK( M-KB+J ) = WORK( M-KB+J )*AB( 1, J+KA-1 )
+               AB( 1, J+KA-1 ) = WORK( N+M-KB+J )*AB( 1, J+KA-1 )
+  640       CONTINUE
+            IF( UPDATE ) THEN
+               IF( I+K.GT.KA1 .AND. K.LE.KBT )
+     $            WORK( M-KB+I+K-KA ) = WORK( M-KB+I+K )
+            END IF
+  650    CONTINUE
+*
+         DO 690 K = KB, 1, -1
+            J2 = I + K + 1 - MAX( 1, K+I0-M )*KA1
+            NR = ( J2+KA-1 ) / KA1
+            J1 = J2 - ( NR-1 )*KA1
+            IF( NR.GT.0 ) THEN
+*
+*              generate rotations in 2nd set to annihilate elements
+*              which have been created outside the band
+*
+               CALL SLARGV( NR, AB( 1, J1+KA ), INCA, WORK( M-KB+J1 ),
+     $                      KA1, WORK( N+M-KB+J1 ), KA1 )
+*
+*              apply rotations in 2nd set from the left
+*
+               DO 660 L = 1, KA - 1
+                  CALL SLARTV( NR, AB( KA1-L, J1+L ), INCA,
+     $                         AB( KA-L, J1+L ), INCA,
+     $                         WORK( N+M-KB+J1 ), WORK( M-KB+J1 ), KA1 )
+  660          CONTINUE
+*
+*              apply rotations in 2nd set from both sides to diagonal
+*              blocks
+*
+               CALL SLAR2V( NR, AB( KA1, J1 ), AB( KA1, J1-1 ),
+     $                      AB( KA, J1 ), INCA, WORK( N+M-KB+J1 ),
+     $                      WORK( M-KB+J1 ), KA1 )
+*
+            END IF
+*
+*           start applying rotations in 2nd set from the right
+*
+            DO 670 L = KA - 1, KB - K + 1, -1
+               NRT = ( J2+L-1 ) / KA1
+               J1T = J2 - ( NRT-1 )*KA1
+               IF( NRT.GT.0 )
+     $            CALL SLARTV( NRT, AB( L, J1T ), INCA,
+     $                         AB( L+1, J1T-1 ), INCA,
+     $                         WORK( N+M-KB+J1T ), WORK( M-KB+J1T ),
+     $                         KA1 )
+  670       CONTINUE
+*
+            IF( WANTX ) THEN
+*
+*              post-multiply X by product of rotations in 2nd set
+*
+               DO 680 J = J1, J2, KA1
+                  CALL SROT( NX, X( 1, J ), 1, X( 1, J-1 ), 1,
+     $                       WORK( N+M-KB+J ), WORK( M-KB+J ) )
+  680          CONTINUE
+            END IF
+  690    CONTINUE
+*
+         DO 710 K = 1, KB - 1
+            J2 = I + K + 1 - MAX( 1, K+I0-M+1 )*KA1
+*
+*           finish applying rotations in 1st set from the right
+*
+            DO 700 L = KB - K, 1, -1
+               NRT = ( J2+L-1 ) / KA1
+               J1T = J2 - ( NRT-1 )*KA1
+               IF( NRT.GT.0 )
+     $            CALL SLARTV( NRT, AB( L, J1T ), INCA,
+     $                         AB( L+1, J1T-1 ), INCA, WORK( N+J1T ),
+     $                         WORK( J1T ), KA1 )
+  700       CONTINUE
+  710    CONTINUE
+*
+         IF( KB.GT.1 ) THEN
+            DO 720 J = 2, MIN( I+KB, M ) - 2*KA - 1
+               WORK( N+J ) = WORK( N+J+KA )
+               WORK( J ) = WORK( J+KA )
+  720       CONTINUE
+         END IF
+*
+      ELSE
+*
+*        Transform A, working with the lower triangle
+*
+         IF( UPDATE ) THEN
+*
+*           Form  inv(S(i))**T * A * inv(S(i))
+*
+            BII = BB( 1, I )
+            DO 730 J = I1, I
+               AB( I-J+1, J ) = AB( I-J+1, J ) / BII
+  730       CONTINUE
+            DO 740 J = I, MIN( N, I+KA )
+               AB( J-I+1, I ) = AB( J-I+1, I ) / BII
+  740       CONTINUE
+            DO 770 K = I + 1, I + KBT
+               DO 750 J = K, I + KBT
+                  AB( J-K+1, K ) = AB( J-K+1, K ) -
+     $                             BB( J-I+1, I )*AB( K-I+1, I ) -
+     $                             BB( K-I+1, I )*AB( J-I+1, I ) +
+     $                             AB( 1, I )*BB( J-I+1, I )*
+     $                             BB( K-I+1, I )
+  750          CONTINUE
+               DO 760 J = I + KBT + 1, MIN( N, I+KA )
+                  AB( J-K+1, K ) = AB( J-K+1, K ) -
+     $                             BB( K-I+1, I )*AB( J-I+1, I )
+  760          CONTINUE
+  770       CONTINUE
+            DO 790 J = I1, I
+               DO 780 K = I + 1, MIN( J+KA, I+KBT )
+                  AB( K-J+1, J ) = AB( K-J+1, J ) -
+     $                             BB( K-I+1, I )*AB( I-J+1, J )
+  780          CONTINUE
+  790       CONTINUE
+*
+            IF( WANTX ) THEN
+*
+*              post-multiply X by inv(S(i))
+*
+               CALL SSCAL( NX, ONE / BII, X( 1, I ), 1 )
+               IF( KBT.GT.0 )
+     $            CALL SGER( NX, KBT, -ONE, X( 1, I ), 1, BB( 2, I ), 1,
+     $                       X( 1, I+1 ), LDX )
+            END IF
+*
+*           store a(i,i1) in RA1 for use in next loop over K
+*
+            RA1 = AB( I-I1+1, I1 )
+         END IF
+*
+*        Generate and apply vectors of rotations to chase all the
+*        existing bulges KA positions up toward the top of the band
+*
+         DO 840 K = 1, KB - 1
+            IF( UPDATE ) THEN
+*
+*              Determine the rotations which would annihilate the bulge
+*              which has in theory just been created
+*
+               IF( I+K-KA1.GT.0 .AND. I+K.LT.M ) THEN
+*
+*                 generate rotation to annihilate a(i,i+k-ka-1)
+*
+                  CALL SLARTG( AB( KA1-K, I+K-KA ), RA1,
+     $                         WORK( N+I+K-KA ), WORK( I+K-KA ), RA )
+*
+*                 create nonzero element a(i+k,i+k-ka-1) outside the
+*                 band and store it in WORK(m-kb+i+k)
+*
+                  T = -BB( K+1, I )*RA1
+                  WORK( M-KB+I+K ) = WORK( N+I+K-KA )*T -
+     $                               WORK( I+K-KA )*AB( KA1, I+K-KA )
+                  AB( KA1, I+K-KA ) = WORK( I+K-KA )*T +
+     $                                WORK( N+I+K-KA )*AB( KA1, I+K-KA )
+                  RA1 = RA
+               END IF
+            END IF
+            J2 = I + K + 1 - MAX( 1, K+I0-M+1 )*KA1
+            NR = ( J2+KA-1 ) / KA1
+            J1 = J2 - ( NR-1 )*KA1
+            IF( UPDATE ) THEN
+               J2T = MIN( J2, I-2*KA+K-1 )
+            ELSE
+               J2T = J2
+            END IF
+            NRT = ( J2T+KA-1 ) / KA1
+            DO 800 J = J1, J2T, KA1
+*
+*              create nonzero element a(j+ka,j-1) outside the band
+*              and store it in WORK(j)
+*
+               WORK( J ) = WORK( J )*AB( KA1, J-1 )
+               AB( KA1, J-1 ) = WORK( N+J )*AB( KA1, J-1 )
+  800       CONTINUE
+*
+*           generate rotations in 1st set to annihilate elements which
+*           have been created outside the band
+*
+            IF( NRT.GT.0 )
+     $         CALL SLARGV( NRT, AB( KA1, J1 ), INCA, WORK( J1 ), KA1,
+     $                      WORK( N+J1 ), KA1 )
+            IF( NR.GT.0 ) THEN
+*
+*              apply rotations in 1st set from the right
+*
+               DO 810 L = 1, KA - 1
+                  CALL SLARTV( NR, AB( L+1, J1 ), INCA, AB( L+2, J1-1 ),
+     $                         INCA, WORK( N+J1 ), WORK( J1 ), KA1 )
+  810          CONTINUE
+*
+*              apply rotations in 1st set from both sides to diagonal
+*              blocks
+*
+               CALL SLAR2V( NR, AB( 1, J1 ), AB( 1, J1-1 ),
+     $                      AB( 2, J1-1 ), INCA, WORK( N+J1 ),
+     $                      WORK( J1 ), KA1 )
+*
+            END IF
+*
+*           start applying rotations in 1st set from the left
+*
+            DO 820 L = KA - 1, KB - K + 1, -1
+               NRT = ( J2+L-1 ) / KA1
+               J1T = J2 - ( NRT-1 )*KA1
+               IF( NRT.GT.0 )
+     $            CALL SLARTV( NRT, AB( KA1-L+1, J1T-KA1+L ), INCA,
+     $                         AB( KA1-L, J1T-KA1+L ), INCA,
+     $                         WORK( N+J1T ), WORK( J1T ), KA1 )
+  820       CONTINUE
+*
+            IF( WANTX ) THEN
+*
+*              post-multiply X by product of rotations in 1st set
+*
+               DO 830 J = J1, J2, KA1
+                  CALL SROT( NX, X( 1, J ), 1, X( 1, J-1 ), 1,
+     $                       WORK( N+J ), WORK( J ) )
+  830          CONTINUE
+            END IF
+  840    CONTINUE
+*
+         IF( UPDATE ) THEN
+            IF( I2.GT.0 .AND. KBT.GT.0 ) THEN
+*
+*              create nonzero element a(i+kbt,i+kbt-ka-1) outside the
+*              band and store it in WORK(m-kb+i+kbt)
+*
+               WORK( M-KB+I+KBT ) = -BB( KBT+1, I )*RA1
+            END IF
+         END IF
+*
+         DO 880 K = KB, 1, -1
+            IF( UPDATE ) THEN
+               J2 = I + K + 1 - MAX( 2, K+I0-M )*KA1
+            ELSE
+               J2 = I + K + 1 - MAX( 1, K+I0-M )*KA1
+            END IF
+*
+*           finish applying rotations in 2nd set from the left
+*
+            DO 850 L = KB - K, 1, -1
+               NRT = ( J2+KA+L-1 ) / KA1
+               J1T = J2 - ( NRT-1 )*KA1
+               IF( NRT.GT.0 )
+     $            CALL SLARTV( NRT, AB( KA1-L+1, J1T+L-1 ), INCA,
+     $                         AB( KA1-L, J1T+L-1 ), INCA,
+     $                         WORK( N+M-KB+J1T+KA ),
+     $                         WORK( M-KB+J1T+KA ), KA1 )
+  850       CONTINUE
+            NR = ( J2+KA-1 ) / KA1
+            J1 = J2 - ( NR-1 )*KA1
+            DO 860 J = J1, J2, KA1
+               WORK( M-KB+J ) = WORK( M-KB+J+KA )
+               WORK( N+M-KB+J ) = WORK( N+M-KB+J+KA )
+  860       CONTINUE
+            DO 870 J = J1, J2, KA1
+*
+*              create nonzero element a(j+ka,j-1) outside the band
+*              and store it in WORK(m-kb+j)
+*
+               WORK( M-KB+J ) = WORK( M-KB+J )*AB( KA1, J-1 )
+               AB( KA1, J-1 ) = WORK( N+M-KB+J )*AB( KA1, J-1 )
+  870       CONTINUE
+            IF( UPDATE ) THEN
+               IF( I+K.GT.KA1 .AND. K.LE.KBT )
+     $            WORK( M-KB+I+K-KA ) = WORK( M-KB+I+K )
+            END IF
+  880    CONTINUE
+*
+         DO 920 K = KB, 1, -1
+            J2 = I + K + 1 - MAX( 1, K+I0-M )*KA1
+            NR = ( J2+KA-1 ) / KA1
+            J1 = J2 - ( NR-1 )*KA1
+            IF( NR.GT.0 ) THEN
+*
+*              generate rotations in 2nd set to annihilate elements
+*              which have been created outside the band
+*
+               CALL SLARGV( NR, AB( KA1, J1 ), INCA, WORK( M-KB+J1 ),
+     $                      KA1, WORK( N+M-KB+J1 ), KA1 )
+*
+*              apply rotations in 2nd set from the right
+*
+               DO 890 L = 1, KA - 1
+                  CALL SLARTV( NR, AB( L+1, J1 ), INCA, AB( L+2, J1-1 ),
+     $                         INCA, WORK( N+M-KB+J1 ), WORK( M-KB+J1 ),
+     $                         KA1 )
+  890          CONTINUE
+*
+*              apply rotations in 2nd set from both sides to diagonal
+*              blocks
+*
+               CALL SLAR2V( NR, AB( 1, J1 ), AB( 1, J1-1 ),
+     $                      AB( 2, J1-1 ), INCA, WORK( N+M-KB+J1 ),
+     $                      WORK( M-KB+J1 ), KA1 )
+*
+            END IF
+*
+*           start applying rotations in 2nd set from the left
+*
+            DO 900 L = KA - 1, KB - K + 1, -1
+               NRT = ( J2+L-1 ) / KA1
+               J1T = J2 - ( NRT-1 )*KA1
+               IF( NRT.GT.0 )
+     $            CALL SLARTV( NRT, AB( KA1-L+1, J1T-KA1+L ), INCA,
+     $                         AB( KA1-L, J1T-KA1+L ), INCA,
+     $                         WORK( N+M-KB+J1T ), WORK( M-KB+J1T ),
+     $                         KA1 )
+  900       CONTINUE
+*
+            IF( WANTX ) THEN
+*
+*              post-multiply X by product of rotations in 2nd set
+*
+               DO 910 J = J1, J2, KA1
+                  CALL SROT( NX, X( 1, J ), 1, X( 1, J-1 ), 1,
+     $                       WORK( N+M-KB+J ), WORK( M-KB+J ) )
+  910          CONTINUE
+            END IF
+  920    CONTINUE
+*
+         DO 940 K = 1, KB - 1
+            J2 = I + K + 1 - MAX( 1, K+I0-M+1 )*KA1
+*
+*           finish applying rotations in 1st set from the left
+*
+            DO 930 L = KB - K, 1, -1
+               NRT = ( J2+L-1 ) / KA1
+               J1T = J2 - ( NRT-1 )*KA1
+               IF( NRT.GT.0 )
+     $            CALL SLARTV( NRT, AB( KA1-L+1, J1T-KA1+L ), INCA,
+     $                         AB( KA1-L, J1T-KA1+L ), INCA,
+     $                         WORK( N+J1T ), WORK( J1T ), KA1 )
+  930       CONTINUE
+  940    CONTINUE
+*
+         IF( KB.GT.1 ) THEN
+            DO 950 J = 2, MIN( I+KB, M ) - 2*KA - 1
+               WORK( N+J ) = WORK( N+J+KA )
+               WORK( J ) = WORK( J+KA )
+  950       CONTINUE
+         END IF
+*
+      END IF
+*
+      GO TO 490
+*
+*     End of SSBGST
+*
+      END
diff --git a/costa/native/external/lapack/ssbgv.f b/costa/native/external/lapack/ssbgv.f
new file mode 100644
index 000000000..00624a091
--- /dev/null
+++ b/costa/native/external/lapack/ssbgv.f
@@ -0,0 +1,189 @@
+      SUBROUTINE SSBGV( JOBZ, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, W, Z,
+     $                  LDZ, WORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBZ, UPLO
+      INTEGER            INFO, KA, KB, LDAB, LDBB, LDZ, N
+*     ..
+*     .. Array Arguments ..
+      REAL               AB( LDAB, * ), BB( LDBB, * ), W( * ),
+     $                   WORK( * ), Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SSBGV computes all the eigenvalues, and optionally, the eigenvectors
+*  of a real generalized symmetric-definite banded eigenproblem, of
+*  the form A*x=(lambda)*B*x. Here A and B are assumed to be symmetric
+*  and banded, and B is also positive definite.
+*
+*  Arguments
+*  =========
+*
+*  JOBZ    (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only;
+*          = 'V':  Compute eigenvalues and eigenvectors.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangles of A and B are stored;
+*          = 'L':  Lower triangles of A and B are stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrices A and B.  N >= 0.
+*
+*  KA      (input) INTEGER
+*          The number of superdiagonals of the matrix A if UPLO = 'U',
+*          or the number of subdiagonals if UPLO = 'L'. KA >= 0.
+*
+*  KB      (input) INTEGER
+*          The number of superdiagonals of the matrix B if UPLO = 'U',
+*          or the number of subdiagonals if UPLO = 'L'. KB >= 0.
+*
+*  AB      (input/output) REAL array, dimension (LDAB, N)
+*          On entry, the upper or lower triangle of the symmetric band
+*          matrix A, stored in the first ka+1 rows of the array.  The
+*          j-th column of A is stored in the j-th column of the array AB
+*          as follows:
+*          if UPLO = 'U', AB(ka+1+i-j,j) = A(i,j) for max(1,j-ka)<=i<=j;
+*          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+ka).
+*
+*          On exit, the contents of AB are destroyed.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KA+1.
+*
+*  BB      (input/output) REAL array, dimension (LDBB, N)
+*          On entry, the upper or lower triangle of the symmetric band
+*          matrix B, stored in the first kb+1 rows of the array.  The
+*          j-th column of B is stored in the j-th column of the array BB
+*          as follows:
+*          if UPLO = 'U', BB(kb+1+i-j,j) = B(i,j) for max(1,j-kb)<=i<=j;
+*          if UPLO = 'L', BB(1+i-j,j)    = B(i,j) for j<=i<=min(n,j+kb).
+*
+*          On exit, the factor S from the split Cholesky factorization
+*          B = S**T*S, as returned by SPBSTF.
+*
+*  LDBB    (input) INTEGER
+*          The leading dimension of the array BB.  LDBB >= KB+1.
+*
+*  W       (output) REAL array, dimension (N)
+*          If INFO = 0, the eigenvalues in ascending order.
+*
+*  Z       (output) REAL array, dimension (LDZ, N)
+*          If JOBZ = 'V', then if INFO = 0, Z contains the matrix Z of
+*          eigenvectors, with the i-th column of Z holding the
+*          eigenvector associated with W(i). The eigenvectors are
+*          normalized so that Z**T*B*Z = I.
+*          If JOBZ = 'N', then Z is not referenced.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= 1, and if
+*          JOBZ = 'V', LDZ >= N.
+*
+*  WORK    (workspace) REAL array, dimension (3*N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, and i is:
+*             <= N:  the algorithm failed to converge:
+*                    i off-diagonal elements of an intermediate
+*                    tridiagonal form did not converge to zero;
+*             > N:   if INFO = N + i, for 1 <= i <= N, then SPBSTF
+*                    returned INFO = i: B is not positive definite.
+*                    The factorization of B could not be completed and
+*                    no eigenvalues or eigenvectors were computed.
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            UPPER, WANTZ
+      CHARACTER          VECT
+      INTEGER            IINFO, INDE, INDWRK
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SPBSTF, SSBGST, SSBTRD, SSTEQR, SSTERF, XERBLA
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      WANTZ = LSAME( JOBZ, 'V' )
+      UPPER = LSAME( UPLO, 'U' )
+*
+      INFO = 0
+      IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( KA.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( KB.LT.0 .OR. KB.GT.KA ) THEN
+         INFO = -5
+      ELSE IF( LDAB.LT.KA+1 ) THEN
+         INFO = -7
+      ELSE IF( LDBB.LT.KB+1 ) THEN
+         INFO = -9
+      ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN
+         INFO = -12
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SSBGV ', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Form a split Cholesky factorization of B.
+*
+      CALL SPBSTF( UPLO, N, KB, BB, LDBB, INFO )
+      IF( INFO.NE.0 ) THEN
+         INFO = N + INFO
+         RETURN
+      END IF
+*
+*     Transform problem to standard eigenvalue problem.
+*
+      INDE = 1
+      INDWRK = INDE + N
+      CALL SSBGST( JOBZ, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, Z, LDZ,
+     $             WORK( INDWRK ), IINFO )
+*
+*     Reduce to tridiagonal form.
+*
+      IF( WANTZ ) THEN
+         VECT = 'U'
+      ELSE
+         VECT = 'N'
+      END IF
+      CALL SSBTRD( VECT, UPLO, N, KA, AB, LDAB, W, WORK( INDE ), Z, LDZ,
+     $             WORK( INDWRK ), IINFO )
+*
+*     For eigenvalues only, call SSTERF.  For eigenvectors, call SSTEQR.
+*
+      IF( .NOT.WANTZ ) THEN
+         CALL SSTERF( N, W, WORK( INDE ), INFO )
+      ELSE
+         CALL SSTEQR( JOBZ, N, W, WORK( INDE ), Z, LDZ, WORK( INDWRK ),
+     $                INFO )
+      END IF
+      RETURN
+*
+*     End of SSBGV
+*
+      END
diff --git a/costa/native/external/lapack/ssbgvd.f b/costa/native/external/lapack/ssbgvd.f
new file mode 100644
index 000000000..bc185e3aa
--- /dev/null
+++ b/costa/native/external/lapack/ssbgvd.f
@@ -0,0 +1,270 @@
+      SUBROUTINE SSBGVD( JOBZ, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, W,
+     $                   Z, LDZ, WORK, LWORK, IWORK, LIWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBZ, UPLO
+      INTEGER            INFO, KA, KB, LDAB, LDBB, LDZ, LIWORK, LWORK, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      REAL               AB( LDAB, * ), BB( LDBB, * ), W( * ),
+     $                   WORK( * ), Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SSBGVD computes all the eigenvalues, and optionally, the eigenvectors
+*  of a real generalized symmetric-definite banded eigenproblem, of the
+*  form A*x=(lambda)*B*x.  Here A and B are assumed to be symmetric and
+*  banded, and B is also positive definite.  If eigenvectors are
+*  desired, it uses a divide and conquer algorithm.
+*
+*  The divide and conquer algorithm makes very mild assumptions about
+*  floating point arithmetic. It will work on machines with a guard
+*  digit in add/subtract, or on those binary machines without guard
+*  digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
+*  Cray-2. It could conceivably fail on hexadecimal or decimal machines
+*  without guard digits, but we know of none.
+*
+*  Arguments
+*  =========
+*
+*  JOBZ    (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only;
+*          = 'V':  Compute eigenvalues and eigenvectors.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangles of A and B are stored;
+*          = 'L':  Lower triangles of A and B are stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrices A and B.  N >= 0.
+*
+*  KA      (input) INTEGER
+*          The number of superdiagonals of the matrix A if UPLO = 'U',
+*          or the number of subdiagonals if UPLO = 'L'.  KA >= 0.
+*
+*  KB      (input) INTEGER
+*          The number of superdiagonals of the matrix B if UPLO = 'U',
+*          or the number of subdiagonals if UPLO = 'L'.  KB >= 0.
+*
+*  AB      (input/output) REAL array, dimension (LDAB, N)
+*          On entry, the upper or lower triangle of the symmetric band
+*          matrix A, stored in the first ka+1 rows of the array.  The
+*          j-th column of A is stored in the j-th column of the array AB
+*          as follows:
+*          if UPLO = 'U', AB(ka+1+i-j,j) = A(i,j) for max(1,j-ka)<=i<=j;
+*          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+ka).
+*
+*          On exit, the contents of AB are destroyed.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KA+1.
+*
+*  BB      (input/output) REAL array, dimension (LDBB, N)
+*          On entry, the upper or lower triangle of the symmetric band
+*          matrix B, stored in the first kb+1 rows of the array.  The
+*          j-th column of B is stored in the j-th column of the array BB
+*          as follows:
+*          if UPLO = 'U', BB(ka+1+i-j,j) = B(i,j) for max(1,j-kb)<=i<=j;
+*          if UPLO = 'L', BB(1+i-j,j)    = B(i,j) for j<=i<=min(n,j+kb).
+*
+*          On exit, the factor S from the split Cholesky factorization
+*          B = S**T*S, as returned by SPBSTF.
+*
+*  LDBB    (input) INTEGER
+*          The leading dimension of the array BB.  LDBB >= KB+1.
+*
+*  W       (output) REAL array, dimension (N)
+*          If INFO = 0, the eigenvalues in ascending order.
+*
+*  Z       (output) REAL array, dimension (LDZ, N)
+*          If JOBZ = 'V', then if INFO = 0, Z contains the matrix Z of
+*          eigenvectors, with the i-th column of Z holding the
+*          eigenvector associated with W(i).  The eigenvectors are
+*          normalized so Z**T*B*Z = I.
+*          If JOBZ = 'N', then Z is not referenced.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= 1, and if
+*          JOBZ = 'V', LDZ >= max(1,N).
+*
+*  WORK    (workspace/output) REAL array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.
+*          If N <= 1,               LWORK >= 1.
+*          If JOBZ = 'N' and N > 1, LWORK >= 3*N.
+*          If JOBZ = 'V' and N > 1, LWORK >= 1 + 5*N + 2*N**2.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  IWORK   (workspace/output) INTEGER array, dimension (LIWORK)
+*          On exit, if LIWORK > 0, IWORK(1) returns the optimal LIWORK.
+*
+*  LIWORK  (input) INTEGER
+*          The dimension of the array IWORK.
+*          If JOBZ  = 'N' or N <= 1, LIWORK >= 1.
+*          If JOBZ  = 'V' and N > 1, LIWORK >= 3 + 5*N.
+*
+*          If LIWORK = -1, then a workspace query is assumed; the
+*          routine only calculates the optimal size of the IWORK array,
+*          returns this value as the first entry of the IWORK array, and
+*          no error message related to LIWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, and i is:
+*             <= N:  the algorithm failed to converge:
+*                    i off-diagonal elements of an intermediate
+*                    tridiagonal form did not converge to zero;
+*             > N:   if INFO = N + i, for 1 <= i <= N, then SPBSTF
+*                    returned INFO = i: B is not positive definite.
+*                    The factorization of B could not be completed and
+*                    no eigenvalues or eigenvectors were computed.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY, UPPER, WANTZ
+      CHARACTER          VECT
+      INTEGER            IINFO, INDE, INDWK2, INDWRK, LIWMIN, LLWRK2,
+     $                   LWMIN
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SGEMM, SLACPY, SPBSTF, SSBGST, SSBTRD, SSTEDC,
+     $                   SSTERF, XERBLA
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      WANTZ = LSAME( JOBZ, 'V' )
+      UPPER = LSAME( UPLO, 'U' )
+      LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 )
+*
+      INFO = 0
+      IF( N.LE.1 ) THEN
+         LIWMIN = 1
+         LWMIN = 1
+      ELSE
+         IF( WANTZ ) THEN
+            LIWMIN = 3 + 5*N
+            LWMIN = 1 + 5*N + 2*N**2
+         ELSE
+            LIWMIN = 1
+            LWMIN = 2*N
+         END IF
+      END IF
+*
+      IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( KA.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( KB.LT.0 .OR. KB.GT.KA ) THEN
+         INFO = -5
+      ELSE IF( LDAB.LT.KA+1 ) THEN
+         INFO = -7
+      ELSE IF( LDBB.LT.KB+1 ) THEN
+         INFO = -9
+      ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN
+         INFO = -12
+      ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -14
+      ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -16
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         WORK( 1 ) = LWMIN
+         IWORK( 1 ) = LIWMIN
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SSBGVD', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Form a split Cholesky factorization of B.
+*
+      CALL SPBSTF( UPLO, N, KB, BB, LDBB, INFO )
+      IF( INFO.NE.0 ) THEN
+         INFO = N + INFO
+         RETURN
+      END IF
+*
+*     Transform problem to standard eigenvalue problem.
+*
+      INDE = 1
+      INDWRK = INDE + N
+      INDWK2 = INDWRK + N*N
+      LLWRK2 = LWORK - INDWK2 + 1
+      CALL SSBGST( JOBZ, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, Z, LDZ,
+     $             WORK( INDWRK ), IINFO )
+*
+*     Reduce to tridiagonal form.
+*
+      IF( WANTZ ) THEN
+         VECT = 'U'
+      ELSE
+         VECT = 'N'
+      END IF
+      CALL SSBTRD( VECT, UPLO, N, KA, AB, LDAB, W, WORK( INDE ), Z, LDZ,
+     $             WORK( INDWRK ), IINFO )
+*
+*     For eigenvalues only, call SSTERF. For eigenvectors, call SSTEDC.
+*
+      IF( .NOT.WANTZ ) THEN
+         CALL SSTERF( N, W, WORK( INDE ), INFO )
+      ELSE
+         CALL SSTEDC( 'I', N, W, WORK( INDE ), WORK( INDWRK ), N,
+     $                WORK( INDWK2 ), LLWRK2, IWORK, LIWORK, INFO )
+         CALL SGEMM( 'N', 'N', N, N, N, ONE, Z, LDZ, WORK( INDWRK ), N,
+     $               ZERO, WORK( INDWK2 ), N )
+         CALL SLACPY( 'A', N, N, WORK( INDWK2 ), N, Z, LDZ )
+      END IF
+*
+      WORK( 1 ) = LWMIN
+      IWORK( 1 ) = LIWMIN
+*
+      RETURN
+*
+*     End of SSBGVD
+*
+      END
diff --git a/costa/native/external/lapack/ssbgvx.f b/costa/native/external/lapack/ssbgvx.f
new file mode 100644
index 000000000..a4317e0ab
--- /dev/null
+++ b/costa/native/external/lapack/ssbgvx.f
@@ -0,0 +1,371 @@
+      SUBROUTINE SSBGVX( JOBZ, RANGE, UPLO, N, KA, KB, AB, LDAB, BB,
+     $                   LDBB, Q, LDQ, VL, VU, IL, IU, ABSTOL, M, W, Z,
+     $                   LDZ, WORK, IWORK, IFAIL, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBZ, RANGE, UPLO
+      INTEGER            IL, INFO, IU, KA, KB, LDAB, LDBB, LDQ, LDZ, M,
+     $                   N
+      REAL               ABSTOL, VL, VU
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IFAIL( * ), IWORK( * )
+      REAL               AB( LDAB, * ), BB( LDBB, * ), Q( LDQ, * ),
+     $                   W( * ), WORK( * ), Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SSBGVX computes selected eigenvalues, and optionally, eigenvectors
+*  of a real generalized symmetric-definite banded eigenproblem, of
+*  the form A*x=(lambda)*B*x.  Here A and B are assumed to be symmetric
+*  and banded, and B is also positive definite.  Eigenvalues and
+*  eigenvectors can be selected by specifying either all eigenvalues,
+*  a range of values or a range of indices for the desired eigenvalues.
+*
+*  Arguments
+*  =========
+*
+*  JOBZ    (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only;
+*          = 'V':  Compute eigenvalues and eigenvectors.
+*
+*  RANGE   (input) CHARACTER*1
+*          = 'A': all eigenvalues will be found.
+*          = 'V': all eigenvalues in the half-open interval (VL,VU]
+*                 will be found.
+*          = 'I': the IL-th through IU-th eigenvalues will be found.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangles of A and B are stored;
+*          = 'L':  Lower triangles of A and B are stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrices A and B.  N >= 0.
+*
+*  KA      (input) INTEGER
+*          The number of superdiagonals of the matrix A if UPLO = 'U',
+*          or the number of subdiagonals if UPLO = 'L'.  KA >= 0.
+*
+*  KB      (input) INTEGER
+*          The number of superdiagonals of the matrix B if UPLO = 'U',
+*          or the number of subdiagonals if UPLO = 'L'.  KB >= 0.
+*
+*  AB      (input/output) REAL array, dimension (LDAB, N)
+*          On entry, the upper or lower triangle of the symmetric band
+*          matrix A, stored in the first ka+1 rows of the array.  The
+*          j-th column of A is stored in the j-th column of the array AB
+*          as follows:
+*          if UPLO = 'U', AB(ka+1+i-j,j) = A(i,j) for max(1,j-ka)<=i<=j;
+*          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+ka).
+*
+*          On exit, the contents of AB are destroyed.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KA+1.
+*
+*  BB      (input/output) REAL array, dimension (LDBB, N)
+*          On entry, the upper or lower triangle of the symmetric band
+*          matrix B, stored in the first kb+1 rows of the array.  The
+*          j-th column of B is stored in the j-th column of the array BB
+*          as follows:
+*          if UPLO = 'U', BB(ka+1+i-j,j) = B(i,j) for max(1,j-kb)<=i<=j;
+*          if UPLO = 'L', BB(1+i-j,j)    = B(i,j) for j<=i<=min(n,j+kb).
+*
+*          On exit, the factor S from the split Cholesky factorization
+*          B = S**T*S, as returned by SPBSTF.
+*
+*  LDBB    (input) INTEGER
+*          The leading dimension of the array BB.  LDBB >= KB+1.
+*
+*  Q       (output) REAL array, dimension (LDQ, N)
+*          If JOBZ = 'V', the n-by-n matrix used in the reduction of
+*          A*x = (lambda)*B*x to standard form, i.e. C*x = (lambda)*x,
+*          and consequently C to tridiagonal form.
+*          If JOBZ = 'N', the array Q is not referenced.
+*
+*  LDQ     (input) INTEGER
+*          The leading dimension of the array Q.  If JOBZ = 'N',
+*          LDQ >= 1. If JOBZ = 'V', LDQ >= max(1,N).
+*
+*  VL      (input) REAL
+*  VU      (input) REAL
+*          If RANGE='V', the lower and upper bounds of the interval to
+*          be searched for eigenvalues. VL < VU.
+*          Not referenced if RANGE = 'A' or 'I'.
+*
+*  IL      (input) INTEGER
+*  IU      (input) INTEGER
+*          If RANGE='I', the indices (in ascending order) of the
+*          smallest and largest eigenvalues to be returned.
+*          1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0.
+*          Not referenced if RANGE = 'A' or 'V'.
+*
+*  ABSTOL  (input) REAL
+*          The absolute error tolerance for the eigenvalues.
+*          An approximate eigenvalue is accepted as converged
+*          when it is determined to lie in an interval [a,b]
+*          of width less than or equal to
+*
+*                  ABSTOL + EPS *   max( |a|,|b| ) ,
+*
+*          where EPS is the machine precision.  If ABSTOL is less than
+*          or equal to zero, then  EPS*|T|  will be used in its place,
+*          where |T| is the 1-norm of the tridiagonal matrix obtained
+*          by reducing A to tridiagonal form.
+*
+*          Eigenvalues will be computed most accurately when ABSTOL is
+*          set to twice the underflow threshold 2*SLAMCH('S'), not zero.
+*          If this routine returns with INFO>0, indicating that some
+*          eigenvectors did not converge, try setting ABSTOL to
+*          2*SLAMCH('S').
+*
+*  M       (output) INTEGER
+*          The total number of eigenvalues found.  0 <= M <= N.
+*          If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1.
+*
+*  W       (output) REAL array, dimension (N)
+*          If INFO = 0, the eigenvalues in ascending order.
+*
+*  Z       (output) REAL array, dimension (LDZ, N)
+*          If JOBZ = 'V', then if INFO = 0, Z contains the matrix Z of
+*          eigenvectors, with the i-th column of Z holding the
+*          eigenvector associated with W(i).  The eigenvectors are
+*          normalized so Z**T*B*Z = I.
+*          If JOBZ = 'N', then Z is not referenced.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= 1, and if
+*          JOBZ = 'V', LDZ >= max(1,N).
+*
+*  WORK    (workspace/output) REAL array, dimension (7N)
+*
+*  IWORK   (workspace/output) INTEGER array, dimension (5N)
+*
+*  IFAIL   (input) INTEGER array, dimension (M)
+*          If JOBZ = 'V', then if INFO = 0, the first M elements of
+*          IFAIL are zero.  If INFO > 0, then IFAIL contains the
+*          indices of the eigenvalues that failed to converge.
+*          If JOBZ = 'N', then IFAIL is not referenced.
+*
+*  INFO    (output) INTEGER
+*          = 0 : successful exit
+*          < 0 : if INFO = -i, the i-th argument had an illegal value
+*          <= N: if INFO = i, then i eigenvectors failed to converge.
+*                  Their indices are stored in IFAIL.
+*          > N : SPBSTF returned an error code; i.e.,
+*                if INFO = N + i, for 1 <= i <= N, then the leading
+*                minor of order i of B is not positive definite.
+*                The factorization of B could not be completed and
+*                no eigenvalues or eigenvectors were computed.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            ALLEIG, INDEIG, UPPER, VALEIG, WANTZ
+      CHARACTER          ORDER, VECT
+      INTEGER            I, IINFO, INDD, INDE, INDEE, INDIBL, INDISP,
+     $                   INDIWO, INDWRK, ITMP1, J, JJ, NSPLIT
+      REAL               TMP1
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SCOPY, SGEMV, SLACPY, SPBSTF, SSBGST, SSBTRD,
+     $                   SSTEBZ, SSTEIN, SSTEQR, SSTERF, SSWAP, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      WANTZ = LSAME( JOBZ, 'V' )
+      UPPER = LSAME( UPLO, 'U' )
+      ALLEIG = LSAME( RANGE, 'A' )
+      VALEIG = LSAME( RANGE, 'V' )
+      INDEIG = LSAME( RANGE, 'I' )
+*
+      INFO = 0
+      IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( KA.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( KB.LT.0 .OR. KB.GT.KA ) THEN
+         INFO = -6
+      ELSE IF( LDAB.LT.KA+1 ) THEN
+         INFO = -8
+      ELSE IF( LDBB.LT.KB+1 ) THEN
+         INFO = -10
+      ELSE IF( LDQ.LT.1 ) THEN
+         INFO = -12
+      ELSE IF( VALEIG .AND. N.GT.0 .AND. VU.LE.VL ) THEN
+         INFO = -14
+      ELSE IF( INDEIG .AND. IL.LT.1 ) THEN
+         INFO = -15
+      ELSE IF( INDEIG .AND. ( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) ) THEN
+         INFO = -16
+      ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN
+         INFO = -21
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SSBGVX', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      M = 0
+      IF( N.EQ.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+*     Form a split Cholesky factorization of B.
+*
+      CALL SPBSTF( UPLO, N, KB, BB, LDBB, INFO )
+      IF( INFO.NE.0 ) THEN
+         INFO = N + INFO
+         RETURN
+      END IF
+*
+*     Transform problem to standard eigenvalue problem.
+*
+      CALL SSBGST( JOBZ, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, Q, LDQ,
+     $             WORK, IINFO )
+*
+*     Reduce symmetric band matrix to tridiagonal form.
+*
+      INDD = 1
+      INDE = INDD + N
+      INDWRK = INDE + N
+      IF( WANTZ ) THEN
+         VECT = 'U'
+      ELSE
+         VECT = 'N'
+      END IF
+      CALL SSBTRD( VECT, UPLO, N, KA, AB, LDAB, WORK( INDD ),
+     $             WORK( INDE ), Q, LDQ, WORK( INDWRK ), IINFO )
+*
+*     If all eigenvalues are desired and ABSTOL is less than or equal
+*     to zero, then call SSTERF or SSTEQR.  If this fails for some
+*     eigenvalue, then try SSTEBZ.
+*
+      IF( ( ALLEIG .OR. ( INDEIG .AND. IL.EQ.1 .AND. IU.EQ.N ) ) .AND.
+     $    ( ABSTOL.LE.ZERO ) ) THEN
+         CALL SCOPY( N, WORK( INDD ), 1, W, 1 )
+         INDEE = INDWRK + 2*N
+         CALL SCOPY( N-1, WORK( INDE ), 1, WORK( INDEE ), 1 )
+         IF( .NOT.WANTZ ) THEN
+            CALL SSTERF( N, W, WORK( INDEE ), INFO )
+         ELSE
+            CALL SLACPY( 'A', N, N, Q, LDQ, Z, LDZ )
+            CALL SSTEQR( JOBZ, N, W, WORK( INDEE ), Z, LDZ,
+     $                   WORK( INDWRK ), INFO )
+            IF( INFO.EQ.0 ) THEN
+               DO 10 I = 1, N
+                  IFAIL( I ) = 0
+   10          CONTINUE
+            END IF
+         END IF
+         IF( INFO.EQ.0 ) THEN
+            M = N
+            GO TO 30
+         END IF
+         INFO = 0
+      END IF
+*
+*     Otherwise, call SSTEBZ and, if eigenvectors are desired,
+*     call SSTEIN.
+*
+      IF( WANTZ ) THEN
+         ORDER = 'B'
+      ELSE
+         ORDER = 'E'
+      END IF
+      INDIBL = 1
+      INDISP = INDIBL + N
+      INDIWO = INDISP + N
+      CALL SSTEBZ( RANGE, ORDER, N, VL, VU, IL, IU, ABSTOL,
+     $             WORK( INDD ), WORK( INDE ), M, NSPLIT, W,
+     $             IWORK( INDIBL ), IWORK( INDISP ), WORK( INDWRK ),
+     $             IWORK( INDIWO ), INFO )
+*
+      IF( WANTZ ) THEN
+         CALL SSTEIN( N, WORK( INDD ), WORK( INDE ), M, W,
+     $                IWORK( INDIBL ), IWORK( INDISP ), Z, LDZ,
+     $                WORK( INDWRK ), IWORK( INDIWO ), IFAIL, INFO )
+*
+*        Apply transformation matrix used in reduction to tridiagonal
+*        form to eigenvectors returned by SSTEIN.
+*
+         DO 20 J = 1, M
+            CALL SCOPY( N, Z( 1, J ), 1, WORK( 1 ), 1 )
+            CALL SGEMV( 'N', N, N, ONE, Q, LDQ, WORK, 1, ZERO,
+     $                  Z( 1, J ), 1 )
+   20    CONTINUE
+      END IF
+*
+   30 CONTINUE
+*
+*     If eigenvalues are not in order, then sort them, along with
+*     eigenvectors.
+*
+      IF( WANTZ ) THEN
+         DO 50 J = 1, M - 1
+            I = 0
+            TMP1 = W( J )
+            DO 40 JJ = J + 1, M
+               IF( W( JJ ).LT.TMP1 ) THEN
+                  I = JJ
+                  TMP1 = W( JJ )
+               END IF
+   40       CONTINUE
+*
+            IF( I.NE.0 ) THEN
+               ITMP1 = IWORK( INDIBL+I-1 )
+               W( I ) = W( J )
+               IWORK( INDIBL+I-1 ) = IWORK( INDIBL+J-1 )
+               W( J ) = TMP1
+               IWORK( INDIBL+J-1 ) = ITMP1
+               CALL SSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 )
+               IF( INFO.NE.0 ) THEN
+                  ITMP1 = IFAIL( I )
+                  IFAIL( I ) = IFAIL( J )
+                  IFAIL( J ) = ITMP1
+               END IF
+            END IF
+   50    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of SSBGVX
+*
+      END
diff --git a/costa/native/external/lapack/ssbtrd.f b/costa/native/external/lapack/ssbtrd.f
new file mode 100644
index 000000000..1f36133e7
--- /dev/null
+++ b/costa/native/external/lapack/ssbtrd.f
@@ -0,0 +1,553 @@
+      SUBROUTINE SSBTRD( VECT, UPLO, N, KD, AB, LDAB, D, E, Q, LDQ,
+     $                   WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO, VECT
+      INTEGER            INFO, KD, LDAB, LDQ, N
+*     ..
+*     .. Array Arguments ..
+      REAL               AB( LDAB, * ), D( * ), E( * ), Q( LDQ, * ),
+     $                   WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SSBTRD reduces a real symmetric band matrix A to symmetric
+*  tridiagonal form T by an orthogonal similarity transformation:
+*  Q**T * A * Q = T.
+*
+*  Arguments
+*  =========
+*
+*  VECT    (input) CHARACTER*1
+*          = 'N':  do not form Q;
+*          = 'V':  form Q;
+*          = 'U':  update a matrix X, by forming X*Q.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  KD      (input) INTEGER
+*          The number of superdiagonals of the matrix A if UPLO = 'U',
+*          or the number of subdiagonals if UPLO = 'L'.  KD >= 0.
+*
+*  AB      (input/output) REAL array, dimension (LDAB,N)
+*          On entry, the upper or lower triangle of the symmetric band
+*          matrix A, stored in the first KD+1 rows of the array.  The
+*          j-th column of A is stored in the j-th column of the array AB
+*          as follows:
+*          if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j;
+*          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+kd).
+*          On exit, the diagonal elements of AB are overwritten by the
+*          diagonal elements of the tridiagonal matrix T; if KD > 0, the
+*          elements on the first superdiagonal (if UPLO = 'U') or the
+*          first subdiagonal (if UPLO = 'L') are overwritten by the
+*          off-diagonal elements of T; the rest of AB is overwritten by
+*          values generated during the reduction.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KD+1.
+*
+*  D       (output) REAL array, dimension (N)
+*          The diagonal elements of the tridiagonal matrix T.
+*
+*  E       (output) REAL array, dimension (N-1)
+*          The off-diagonal elements of the tridiagonal matrix T:
+*          E(i) = T(i,i+1) if UPLO = 'U'; E(i) = T(i+1,i) if UPLO = 'L'.
+*
+*  Q       (input/output) REAL array, dimension (LDQ,N)
+*          On entry, if VECT = 'U', then Q must contain an N-by-N
+*          matrix X; if VECT = 'N' or 'V', then Q need not be set.
+*
+*          On exit:
+*          if VECT = 'V', Q contains the N-by-N orthogonal matrix Q;
+*          if VECT = 'U', Q contains the product X*Q;
+*          if VECT = 'N', the array Q is not referenced.
+*
+*  LDQ     (input) INTEGER
+*          The leading dimension of the array Q.
+*          LDQ >= 1, and LDQ >= N if VECT = 'V' or 'U'.
+*
+*  WORK    (workspace) REAL array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Further Details
+*  ===============
+*
+*  Modified by Linda Kaufman, Bell Labs.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            INITQ, UPPER, WANTQ
+      INTEGER            I, I2, IBL, INCA, INCX, IQAEND, IQB, IQEND, J,
+     $                   J1, J1END, J1INC, J2, JEND, JIN, JINC, K, KD1,
+     $                   KDM1, KDN, L, LAST, LEND, NQ, NR, NRT
+      REAL               TEMP
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLAR2V, SLARGV, SLARTG, SLARTV, SLASET, SROT,
+     $                   XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters
+*
+      INITQ = LSAME( VECT, 'V' )
+      WANTQ = INITQ .OR. LSAME( VECT, 'U' )
+      UPPER = LSAME( UPLO, 'U' )
+      KD1 = KD + 1
+      KDM1 = KD - 1
+      INCX = LDAB - 1
+      IQEND = 1
+*
+      INFO = 0
+      IF( .NOT.WANTQ .AND. .NOT.LSAME( VECT, 'N' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( KD.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDAB.LT.KD1 ) THEN
+         INFO = -6
+      ELSE IF( LDQ.LT.MAX( 1, N ) .AND. WANTQ ) THEN
+         INFO = -10
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SSBTRD', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Initialize Q to the unit matrix, if needed
+*
+      IF( INITQ )
+     $   CALL SLASET( 'Full', N, N, ZERO, ONE, Q, LDQ )
+*
+*     Wherever possible, plane rotations are generated and applied in
+*     vector operations of length NR over the index set J1:J2:KD1.
+*
+*     The cosines and sines of the plane rotations are stored in the
+*     arrays D and WORK.
+*
+      INCA = KD1*LDAB
+      KDN = MIN( N-1, KD )
+      IF( UPPER ) THEN
+*
+         IF( KD.GT.1 ) THEN
+*
+*           Reduce to tridiagonal form, working with upper triangle
+*
+            NR = 0
+            J1 = KDN + 2
+            J2 = 1
+*
+            DO 90 I = 1, N - 2
+*
+*              Reduce i-th row of matrix to tridiagonal form
+*
+               DO 80 K = KDN + 1, 2, -1
+                  J1 = J1 + KDN
+                  J2 = J2 + KDN
+*
+                  IF( NR.GT.0 ) THEN
+*
+*                    generate plane rotations to annihilate nonzero
+*                    elements which have been created outside the band
+*
+                     CALL SLARGV( NR, AB( 1, J1-1 ), INCA, WORK( J1 ),
+     $                            KD1, D( J1 ), KD1 )
+*
+*                    apply rotations from the right
+*
+*
+*                    Dependent on the the number of diagonals either
+*                    SLARTV or SROT is used
+*
+                     IF( NR.GE.2*KD-1 ) THEN
+                        DO 10 L = 1, KD - 1
+                           CALL SLARTV( NR, AB( L+1, J1-1 ), INCA,
+     $                                  AB( L, J1 ), INCA, D( J1 ),
+     $                                  WORK( J1 ), KD1 )
+   10                   CONTINUE
+*
+                     ELSE
+                        JEND = J1 + ( NR-1 )*KD1
+                        DO 20 JINC = J1, JEND, KD1
+                           CALL SROT( KDM1, AB( 2, JINC-1 ), 1,
+     $                                AB( 1, JINC ), 1, D( JINC ),
+     $                                WORK( JINC ) )
+   20                   CONTINUE
+                     END IF
+                  END IF
+*
+*
+                  IF( K.GT.2 ) THEN
+                     IF( K.LE.N-I+1 ) THEN
+*
+*                       generate plane rotation to annihilate a(i,i+k-1)
+*                       within the band
+*
+                        CALL SLARTG( AB( KD-K+3, I+K-2 ),
+     $                               AB( KD-K+2, I+K-1 ), D( I+K-1 ),
+     $                               WORK( I+K-1 ), TEMP )
+                        AB( KD-K+3, I+K-2 ) = TEMP
+*
+*                       apply rotation from the right
+*
+                        CALL SROT( K-3, AB( KD-K+4, I+K-2 ), 1,
+     $                             AB( KD-K+3, I+K-1 ), 1, D( I+K-1 ),
+     $                             WORK( I+K-1 ) )
+                     END IF
+                     NR = NR + 1
+                     J1 = J1 - KDN - 1
+                  END IF
+*
+*                 apply plane rotations from both sides to diagonal
+*                 blocks
+*
+                  IF( NR.GT.0 )
+     $               CALL SLAR2V( NR, AB( KD1, J1-1 ), AB( KD1, J1 ),
+     $                            AB( KD, J1 ), INCA, D( J1 ),
+     $                            WORK( J1 ), KD1 )
+*
+*                 apply plane rotations from the left
+*
+                  IF( NR.GT.0 ) THEN
+                     IF( 2*KD-1.LT.NR ) THEN
+*
+*                    Dependent on the the number of diagonals either
+*                    SLARTV or SROT is used
+*
+                        DO 30 L = 1, KD - 1
+                           IF( J2+L.GT.N ) THEN
+                              NRT = NR - 1
+                           ELSE
+                              NRT = NR
+                           END IF
+                           IF( NRT.GT.0 )
+     $                        CALL SLARTV( NRT, AB( KD-L, J1+L ), INCA,
+     $                                     AB( KD-L+1, J1+L ), INCA,
+     $                                     D( J1 ), WORK( J1 ), KD1 )
+   30                   CONTINUE
+                     ELSE
+                        J1END = J1 + KD1*( NR-2 )
+                        IF( J1END.GE.J1 ) THEN
+                           DO 40 JIN = J1, J1END, KD1
+                              CALL SROT( KD-1, AB( KD-1, JIN+1 ), INCX,
+     $                                   AB( KD, JIN+1 ), INCX,
+     $                                   D( JIN ), WORK( JIN ) )
+   40                      CONTINUE
+                        END IF
+                        LEND = MIN( KDM1, N-J2 )
+                        LAST = J1END + KD1
+                        IF( LEND.GT.0 )
+     $                     CALL SROT( LEND, AB( KD-1, LAST+1 ), INCX,
+     $                                AB( KD, LAST+1 ), INCX, D( LAST ),
+     $                                WORK( LAST ) )
+                     END IF
+                  END IF
+*
+                  IF( WANTQ ) THEN
+*
+*                    accumulate product of plane rotations in Q
+*
+                     IF( INITQ ) THEN
+*
+*                 take advantage of the fact that Q was
+*                 initially the Identity matrix
+*
+                        IQEND = MAX( IQEND, J2 )
+                        I2 = MAX( 0, K-3 )
+                        IQAEND = 1 + I*KD
+                        IF( K.EQ.2 )
+     $                     IQAEND = IQAEND + KD
+                        IQAEND = MIN( IQAEND, IQEND )
+                        DO 50 J = J1, J2, KD1
+                           IBL = I - I2 / KDM1
+                           I2 = I2 + 1
+                           IQB = MAX( 1, J-IBL )
+                           NQ = 1 + IQAEND - IQB
+                           IQAEND = MIN( IQAEND+KD, IQEND )
+                           CALL SROT( NQ, Q( IQB, J-1 ), 1, Q( IQB, J ),
+     $                                1, D( J ), WORK( J ) )
+   50                   CONTINUE
+                     ELSE
+*
+                        DO 60 J = J1, J2, KD1
+                           CALL SROT( N, Q( 1, J-1 ), 1, Q( 1, J ), 1,
+     $                                D( J ), WORK( J ) )
+   60                   CONTINUE
+                     END IF
+*
+                  END IF
+*
+                  IF( J2+KDN.GT.N ) THEN
+*
+*                    adjust J2 to keep within the bounds of the matrix
+*
+                     NR = NR - 1
+                     J2 = J2 - KDN - 1
+                  END IF
+*
+                  DO 70 J = J1, J2, KD1
+*
+*                    create nonzero element a(j-1,j+kd) outside the band
+*                    and store it in WORK
+*
+                     WORK( J+KD ) = WORK( J )*AB( 1, J+KD )
+                     AB( 1, J+KD ) = D( J )*AB( 1, J+KD )
+   70             CONTINUE
+   80          CONTINUE
+   90       CONTINUE
+         END IF
+*
+         IF( KD.GT.0 ) THEN
+*
+*           copy off-diagonal elements to E
+*
+            DO 100 I = 1, N - 1
+               E( I ) = AB( KD, I+1 )
+  100       CONTINUE
+         ELSE
+*
+*           set E to zero if original matrix was diagonal
+*
+            DO 110 I = 1, N - 1
+               E( I ) = ZERO
+  110       CONTINUE
+         END IF
+*
+*        copy diagonal elements to D
+*
+         DO 120 I = 1, N
+            D( I ) = AB( KD1, I )
+  120    CONTINUE
+*
+      ELSE
+*
+         IF( KD.GT.1 ) THEN
+*
+*           Reduce to tridiagonal form, working with lower triangle
+*
+            NR = 0
+            J1 = KDN + 2
+            J2 = 1
+*
+            DO 210 I = 1, N - 2
+*
+*              Reduce i-th column of matrix to tridiagonal form
+*
+               DO 200 K = KDN + 1, 2, -1
+                  J1 = J1 + KDN
+                  J2 = J2 + KDN
+*
+                  IF( NR.GT.0 ) THEN
+*
+*                    generate plane rotations to annihilate nonzero
+*                    elements which have been created outside the band
+*
+                     CALL SLARGV( NR, AB( KD1, J1-KD1 ), INCA,
+     $                            WORK( J1 ), KD1, D( J1 ), KD1 )
+*
+*                    apply plane rotations from one side
+*
+*
+*                    Dependent on the the number of diagonals either
+*                    SLARTV or SROT is used
+*
+                     IF( NR.GT.2*KD-1 ) THEN
+                        DO 130 L = 1, KD - 1
+                           CALL SLARTV( NR, AB( KD1-L, J1-KD1+L ), INCA,
+     $                                  AB( KD1-L+1, J1-KD1+L ), INCA,
+     $                                  D( J1 ), WORK( J1 ), KD1 )
+  130                   CONTINUE
+                     ELSE
+                        JEND = J1 + KD1*( NR-1 )
+                        DO 140 JINC = J1, JEND, KD1
+                           CALL SROT( KDM1, AB( KD, JINC-KD ), INCX,
+     $                                AB( KD1, JINC-KD ), INCX,
+     $                                D( JINC ), WORK( JINC ) )
+  140                   CONTINUE
+                     END IF
+*
+                  END IF
+*
+                  IF( K.GT.2 ) THEN
+                     IF( K.LE.N-I+1 ) THEN
+*
+*                       generate plane rotation to annihilate a(i+k-1,i)
+*                       within the band
+*
+                        CALL SLARTG( AB( K-1, I ), AB( K, I ),
+     $                               D( I+K-1 ), WORK( I+K-1 ), TEMP )
+                        AB( K-1, I ) = TEMP
+*
+*                       apply rotation from the left
+*
+                        CALL SROT( K-3, AB( K-2, I+1 ), LDAB-1,
+     $                             AB( K-1, I+1 ), LDAB-1, D( I+K-1 ),
+     $                             WORK( I+K-1 ) )
+                     END IF
+                     NR = NR + 1
+                     J1 = J1 - KDN - 1
+                  END IF
+*
+*                 apply plane rotations from both sides to diagonal
+*                 blocks
+*
+                  IF( NR.GT.0 )
+     $               CALL SLAR2V( NR, AB( 1, J1-1 ), AB( 1, J1 ),
+     $                            AB( 2, J1-1 ), INCA, D( J1 ),
+     $                            WORK( J1 ), KD1 )
+*
+*                 apply plane rotations from the right
+*
+*
+*                    Dependent on the the number of diagonals either
+*                    SLARTV or SROT is used
+*
+                  IF( NR.GT.0 ) THEN
+                     IF( NR.GT.2*KD-1 ) THEN
+                        DO 150 L = 1, KD - 1
+                           IF( J2+L.GT.N ) THEN
+                              NRT = NR - 1
+                           ELSE
+                              NRT = NR
+                           END IF
+                           IF( NRT.GT.0 )
+     $                        CALL SLARTV( NRT, AB( L+2, J1-1 ), INCA,
+     $                                     AB( L+1, J1 ), INCA, D( J1 ),
+     $                                     WORK( J1 ), KD1 )
+  150                   CONTINUE
+                     ELSE
+                        J1END = J1 + KD1*( NR-2 )
+                        IF( J1END.GE.J1 ) THEN
+                           DO 160 J1INC = J1, J1END, KD1
+                              CALL SROT( KDM1, AB( 3, J1INC-1 ), 1,
+     $                                   AB( 2, J1INC ), 1, D( J1INC ),
+     $                                   WORK( J1INC ) )
+  160                      CONTINUE
+                        END IF
+                        LEND = MIN( KDM1, N-J2 )
+                        LAST = J1END + KD1
+                        IF( LEND.GT.0 )
+     $                     CALL SROT( LEND, AB( 3, LAST-1 ), 1,
+     $                                AB( 2, LAST ), 1, D( LAST ),
+     $                                WORK( LAST ) )
+                     END IF
+                  END IF
+*
+*
+*
+                  IF( WANTQ ) THEN
+*
+*                    accumulate product of plane rotations in Q
+*
+                     IF( INITQ ) THEN
+*
+*                 take advantage of the fact that Q was
+*                 initially the Identity matrix
+*
+                        IQEND = MAX( IQEND, J2 )
+                        I2 = MAX( 0, K-3 )
+                        IQAEND = 1 + I*KD
+                        IF( K.EQ.2 )
+     $                     IQAEND = IQAEND + KD
+                        IQAEND = MIN( IQAEND, IQEND )
+                        DO 170 J = J1, J2, KD1
+                           IBL = I - I2 / KDM1
+                           I2 = I2 + 1
+                           IQB = MAX( 1, J-IBL )
+                           NQ = 1 + IQAEND - IQB
+                           IQAEND = MIN( IQAEND+KD, IQEND )
+                           CALL SROT( NQ, Q( IQB, J-1 ), 1, Q( IQB, J ),
+     $                                1, D( J ), WORK( J ) )
+  170                   CONTINUE
+                     ELSE
+*
+                        DO 180 J = J1, J2, KD1
+                           CALL SROT( N, Q( 1, J-1 ), 1, Q( 1, J ), 1,
+     $                                D( J ), WORK( J ) )
+  180                   CONTINUE
+                     END IF
+                  END IF
+*
+                  IF( J2+KDN.GT.N ) THEN
+*
+*                    adjust J2 to keep within the bounds of the matrix
+*
+                     NR = NR - 1
+                     J2 = J2 - KDN - 1
+                  END IF
+*
+                  DO 190 J = J1, J2, KD1
+*
+*                    create nonzero element a(j+kd,j-1) outside the
+*                    band and store it in WORK
+*
+                     WORK( J+KD ) = WORK( J )*AB( KD1, J )
+                     AB( KD1, J ) = D( J )*AB( KD1, J )
+  190             CONTINUE
+  200          CONTINUE
+  210       CONTINUE
+         END IF
+*
+         IF( KD.GT.0 ) THEN
+*
+*           copy off-diagonal elements to E
+*
+            DO 220 I = 1, N - 1
+               E( I ) = AB( 2, I )
+  220       CONTINUE
+         ELSE
+*
+*           set E to zero if original matrix was diagonal
+*
+            DO 230 I = 1, N - 1
+               E( I ) = ZERO
+  230       CONTINUE
+         END IF
+*
+*        copy diagonal elements to D
+*
+         DO 240 I = 1, N
+            D( I ) = AB( 1, I )
+  240    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of SSBTRD
+*
+      END
diff --git a/costa/native/external/lapack/sspcon.f b/costa/native/external/lapack/sspcon.f
new file mode 100644
index 000000000..65617c912
--- /dev/null
+++ b/costa/native/external/lapack/sspcon.f
@@ -0,0 +1,158 @@
+      SUBROUTINE SSPCON( UPLO, N, AP, IPIV, ANORM, RCOND, WORK, IWORK,
+     $                   INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993 
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, N
+      REAL               ANORM, RCOND
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * ), IWORK( * )
+      REAL               AP( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SSPCON estimates the reciprocal of the condition number (in the
+*  1-norm) of a real symmetric packed matrix A using the factorization
+*  A = U*D*U**T or A = L*D*L**T computed by SSPTRF.
+*
+*  An estimate is obtained for norm(inv(A)), and the reciprocal of the
+*  condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))).
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the details of the factorization are stored
+*          as an upper or lower triangular matrix.
+*          = 'U':  Upper triangular, form is A = U*D*U**T;
+*          = 'L':  Lower triangular, form is A = L*D*L**T.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  AP      (input) REAL array, dimension (N*(N+1)/2)
+*          The block diagonal matrix D and the multipliers used to
+*          obtain the factor U or L as computed by SSPTRF, stored as a
+*          packed triangular matrix.
+*
+*  IPIV    (input) INTEGER array, dimension (N)
+*          Details of the interchanges and the block structure of D
+*          as determined by SSPTRF.
+*
+*  ANORM   (input) REAL
+*          The 1-norm of the original matrix A.
+*
+*  RCOND   (output) REAL
+*          The reciprocal of the condition number of the matrix A,
+*          computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an
+*          estimate of the 1-norm of inv(A) computed in this routine.
+*
+*  WORK    (workspace) REAL array, dimension (2*N)
+*
+*  IWORK    (workspace) INTEGER array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            I, IP, KASE
+      REAL               AINVNM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLACON, SSPTRS, XERBLA
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( ANORM.LT.ZERO ) THEN
+         INFO = -5
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SSPCON', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      RCOND = ZERO
+      IF( N.EQ.0 ) THEN
+         RCOND = ONE
+         RETURN
+      ELSE IF( ANORM.LE.ZERO ) THEN
+         RETURN
+      END IF
+*
+*     Check that the diagonal matrix D is nonsingular.
+*
+      IF( UPPER ) THEN
+*
+*        Upper triangular storage: examine D from bottom to top
+*
+         IP = N*( N+1 ) / 2
+         DO 10 I = N, 1, -1
+            IF( IPIV( I ).GT.0 .AND. AP( IP ).EQ.ZERO )
+     $         RETURN
+            IP = IP - I
+   10    CONTINUE
+      ELSE
+*
+*        Lower triangular storage: examine D from top to bottom.
+*
+         IP = 1
+         DO 20 I = 1, N
+            IF( IPIV( I ).GT.0 .AND. AP( IP ).EQ.ZERO )
+     $         RETURN
+            IP = IP + N - I + 1
+   20    CONTINUE
+      END IF
+*
+*     Estimate the 1-norm of the inverse.
+*
+      KASE = 0
+   30 CONTINUE
+      CALL SLACON( N, WORK( N+1 ), WORK, IWORK, AINVNM, KASE )
+      IF( KASE.NE.0 ) THEN
+*
+*        Multiply by inv(L*D*L') or inv(U*D*U').
+*
+         CALL SSPTRS( UPLO, N, 1, AP, IPIV, WORK, N, INFO )
+         GO TO 30
+      END IF
+*
+*     Compute the estimate of the reciprocal condition number.
+*
+      IF( AINVNM.NE.ZERO )
+     $   RCOND = ( ONE / AINVNM ) / ANORM
+*
+      RETURN
+*
+*     End of SSPCON
+*
+      END
diff --git a/costa/native/external/lapack/sspev.f b/costa/native/external/lapack/sspev.f
new file mode 100644
index 000000000..3871abe04
--- /dev/null
+++ b/costa/native/external/lapack/sspev.f
@@ -0,0 +1,188 @@
+      SUBROUTINE SSPEV( JOBZ, UPLO, N, AP, W, Z, LDZ, WORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993 
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBZ, UPLO
+      INTEGER            INFO, LDZ, N
+*     ..
+*     .. Array Arguments ..
+      REAL               AP( * ), W( * ), WORK( * ), Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SSPEV computes all the eigenvalues and, optionally, eigenvectors of a
+*  real symmetric matrix A in packed storage.
+*
+*  Arguments
+*  =========
+*
+*  JOBZ    (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only;
+*          = 'V':  Compute eigenvalues and eigenvectors.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  AP      (input/output) REAL array, dimension (N*(N+1)/2)
+*          On entry, the upper or lower triangle of the symmetric matrix
+*          A, packed columnwise in a linear array.  The j-th column of A
+*          is stored in the array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n.
+*
+*          On exit, AP is overwritten by values generated during the
+*          reduction to tridiagonal form.  If UPLO = 'U', the diagonal
+*          and first superdiagonal of the tridiagonal matrix T overwrite
+*          the corresponding elements of A, and if UPLO = 'L', the
+*          diagonal and first subdiagonal of T overwrite the
+*          corresponding elements of A.
+*
+*  W       (output) REAL array, dimension (N)
+*          If INFO = 0, the eigenvalues in ascending order.
+*
+*  Z       (output) REAL array, dimension (LDZ, N)
+*          If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal
+*          eigenvectors of the matrix A, with the i-th column of Z
+*          holding the eigenvector associated with W(i).
+*          If JOBZ = 'N', then Z is not referenced.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= 1, and if
+*          JOBZ = 'V', LDZ >= max(1,N).
+*
+*  WORK    (workspace) REAL array, dimension (3*N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          > 0:  if INFO = i, the algorithm failed to converge; i
+*                off-diagonal elements of an intermediate tridiagonal
+*                form did not converge to zero.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E0, ONE = 1.0E0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            WANTZ
+      INTEGER            IINFO, IMAX, INDE, INDTAU, INDWRK, ISCALE
+      REAL               ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA,
+     $                   SMLNUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               SLAMCH, SLANSP
+      EXTERNAL           LSAME, SLAMCH, SLANSP
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SOPGTR, SSCAL, SSPTRD, SSTEQR, SSTERF, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      WANTZ = LSAME( JOBZ, 'V' )
+*
+      INFO = 0
+      IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( LSAME( UPLO, 'U' ) .OR. LSAME( UPLO, 'L' ) ) )
+     $          THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN
+         INFO = -7
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SSPEV ', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      IF( N.EQ.1 ) THEN
+         W( 1 ) = AP( 1 )
+         IF( WANTZ )
+     $      Z( 1, 1 ) = ONE
+         RETURN
+      END IF
+*
+*     Get machine constants.
+*
+      SAFMIN = SLAMCH( 'Safe minimum' )
+      EPS = SLAMCH( 'Precision' )
+      SMLNUM = SAFMIN / EPS
+      BIGNUM = ONE / SMLNUM
+      RMIN = SQRT( SMLNUM )
+      RMAX = SQRT( BIGNUM )
+*
+*     Scale matrix to allowable range, if necessary.
+*
+      ANRM = SLANSP( 'M', UPLO, N, AP, WORK )
+      ISCALE = 0
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN
+         ISCALE = 1
+         SIGMA = RMIN / ANRM
+      ELSE IF( ANRM.GT.RMAX ) THEN
+         ISCALE = 1
+         SIGMA = RMAX / ANRM
+      END IF
+      IF( ISCALE.EQ.1 ) THEN
+         CALL SSCAL( ( N*( N+1 ) ) / 2, SIGMA, AP, 1 )
+      END IF
+*
+*     Call SSPTRD to reduce symmetric packed matrix to tridiagonal form.
+*
+      INDE = 1
+      INDTAU = INDE + N
+      CALL SSPTRD( UPLO, N, AP, W, WORK( INDE ), WORK( INDTAU ), IINFO )
+*
+*     For eigenvalues only, call SSTERF.  For eigenvectors, first call
+*     SOPGTR to generate the orthogonal matrix, then call SSTEQR.
+*
+      IF( .NOT.WANTZ ) THEN
+         CALL SSTERF( N, W, WORK( INDE ), INFO )
+      ELSE
+         INDWRK = INDTAU + N
+         CALL SOPGTR( UPLO, N, AP, WORK( INDTAU ), Z, LDZ,
+     $                WORK( INDWRK ), IINFO )
+         CALL SSTEQR( JOBZ, N, W, WORK( INDE ), Z, LDZ, WORK( INDTAU ),
+     $                INFO )
+      END IF
+*
+*     If matrix was scaled, then rescale eigenvalues appropriately.
+*
+      IF( ISCALE.EQ.1 ) THEN
+         IF( INFO.EQ.0 ) THEN
+            IMAX = N
+         ELSE
+            IMAX = INFO - 1
+         END IF
+         CALL SSCAL( IMAX, ONE / SIGMA, W, 1 )
+      END IF
+*
+      RETURN
+*
+*     End of SSPEV
+*
+      END
diff --git a/costa/native/external/lapack/sspevd.f b/costa/native/external/lapack/sspevd.f
new file mode 100644
index 000000000..a407a6ed8
--- /dev/null
+++ b/costa/native/external/lapack/sspevd.f
@@ -0,0 +1,249 @@
+      SUBROUTINE SSPEVD( JOBZ, UPLO, N, AP, W, Z, LDZ, WORK, LWORK,
+     $                   IWORK, LIWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBZ, UPLO
+      INTEGER            INFO, LDZ, LIWORK, LWORK, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      REAL               AP( * ), W( * ), WORK( * ), Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SSPEVD computes all the eigenvalues and, optionally, eigenvectors
+*  of a real symmetric matrix A in packed storage. If eigenvectors are
+*  desired, it uses a divide and conquer algorithm.
+*
+*  The divide and conquer algorithm makes very mild assumptions about
+*  floating point arithmetic. It will work on machines with a guard
+*  digit in add/subtract, or on those binary machines without guard
+*  digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
+*  Cray-2. It could conceivably fail on hexadecimal or decimal machines
+*  without guard digits, but we know of none.
+*
+*  Arguments
+*  =========
+*
+*  JOBZ    (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only;
+*          = 'V':  Compute eigenvalues and eigenvectors.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  AP      (input/output) REAL array, dimension (N*(N+1)/2)
+*          On entry, the upper or lower triangle of the symmetric matrix
+*          A, packed columnwise in a linear array.  The j-th column of A
+*          is stored in the array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n.
+*
+*          On exit, AP is overwritten by values generated during the
+*          reduction to tridiagonal form.  If UPLO = 'U', the diagonal
+*          and first superdiagonal of the tridiagonal matrix T overwrite
+*          the corresponding elements of A, and if UPLO = 'L', the
+*          diagonal and first subdiagonal of T overwrite the
+*          corresponding elements of A.
+*
+*  W       (output) REAL array, dimension (N)
+*          If INFO = 0, the eigenvalues in ascending order.
+*
+*  Z       (output) REAL array, dimension (LDZ, N)
+*          If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal
+*          eigenvectors of the matrix A, with the i-th column of Z
+*          holding the eigenvector associated with W(i).
+*          If JOBZ = 'N', then Z is not referenced.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= 1, and if
+*          JOBZ = 'V', LDZ >= max(1,N).
+*
+*  WORK    (workspace/output) REAL array,
+*                                         dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.
+*          If N <= 1,               LWORK must be at least 1.
+*          If JOBZ = 'N' and N > 1, LWORK must be at least 2*N.
+*          If JOBZ = 'V' and N > 1, LWORK must be at least
+*                                                 1 + 6*N + N**2.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  IWORK   (workspace/output) INTEGER array, dimension (LIWORK)
+*          On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK.
+*
+*  LIWORK  (input) INTEGER
+*          The dimension of the array IWORK.
+*          If JOBZ  = 'N' or N <= 1, LIWORK must be at least 1.
+*          If JOBZ  = 'V' and N > 1, LIWORK must be at least 3 + 5*N.
+*
+*          If LIWORK = -1, then a workspace query is assumed; the
+*          routine only calculates the optimal size of the IWORK array,
+*          returns this value as the first entry of the IWORK array, and
+*          no error message related to LIWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          > 0:  if INFO = i, the algorithm failed to converge; i
+*                off-diagonal elements of an intermediate tridiagonal
+*                form did not converge to zero.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY, WANTZ
+      INTEGER            IINFO, INDE, INDTAU, INDWRK, ISCALE, LIWMIN,
+     $                   LLWORK, LWMIN
+      REAL               ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA,
+     $                   SMLNUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               SLAMCH, SLANSP
+      EXTERNAL           LSAME, SLAMCH, SLANSP
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SOPMTR, SSCAL, SSPTRD, SSTEDC, SSTERF, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      WANTZ = LSAME( JOBZ, 'V' )
+      LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 )
+*
+      INFO = 0
+      IF( N.LE.1 ) THEN
+         LIWMIN = 1
+         LWMIN = 1
+      ELSE
+         IF( WANTZ ) THEN
+            LIWMIN = 3 + 5*N
+            LWMIN = 1 + 6*N + N**2
+         ELSE
+            LIWMIN = 1
+            LWMIN = 2*N
+         END IF
+      END IF
+      IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( LSAME( UPLO, 'U' ) .OR. LSAME( UPLO, 'L' ) ) )
+     $          THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN
+         INFO = -7
+      ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -9
+      ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -11
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         WORK( 1 ) = LWMIN
+         IWORK( 1 ) = LIWMIN
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SSPEVD', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN 
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN 
+*
+      IF( N.EQ.1 ) THEN
+         W( 1 ) = AP( 1 )
+         IF( WANTZ )
+     $      Z( 1, 1 ) = ONE
+         RETURN 
+      END IF
+*
+*     Get machine constants.
+*
+      SAFMIN = SLAMCH( 'Safe minimum' )
+      EPS = SLAMCH( 'Precision' )
+      SMLNUM = SAFMIN / EPS
+      BIGNUM = ONE / SMLNUM
+      RMIN = SQRT( SMLNUM )
+      RMAX = SQRT( BIGNUM )
+*
+*     Scale matrix to allowable range, if necessary.
+*
+      ANRM = SLANSP( 'M', UPLO, N, AP, WORK )
+      ISCALE = 0
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN
+         ISCALE = 1
+         SIGMA = RMIN / ANRM
+      ELSE IF( ANRM.GT.RMAX ) THEN
+         ISCALE = 1
+         SIGMA = RMAX / ANRM
+      END IF
+      IF( ISCALE.EQ.1 ) THEN
+         CALL SSCAL( ( N*( N+1 ) ) / 2, SIGMA, AP, 1 )
+      END IF
+*
+*     Call SSPTRD to reduce symmetric packed matrix to tridiagonal form.
+*
+      INDE = 1
+      INDTAU = INDE + N
+      CALL SSPTRD( UPLO, N, AP, W, WORK( INDE ), WORK( INDTAU ), IINFO )
+*
+*     For eigenvalues only, call SSTERF.  For eigenvectors, first call
+*     SSTEDC to generate the eigenvector matrix, WORK(INDWRK), of the
+*     tridiagonal matrix, then call SOPMTR to multiply it by the
+*     Householder transformations represented in AP.
+*
+      IF( .NOT.WANTZ ) THEN
+         CALL SSTERF( N, W, WORK( INDE ), INFO )
+      ELSE
+         INDWRK = INDTAU + N
+         LLWORK = LWORK - INDWRK + 1
+         CALL SSTEDC( 'I', N, W, WORK( INDE ), Z, LDZ, WORK( INDWRK ),
+     $                LLWORK, IWORK, LIWORK, INFO )
+         CALL SOPMTR( 'L', UPLO, 'N', N, N, AP, WORK( INDTAU ), Z, LDZ,
+     $                WORK( INDWRK ), IINFO )
+      END IF
+*
+*     If matrix was scaled, then rescale eigenvalues appropriately.
+*
+      IF( ISCALE.EQ.1 )
+     $   CALL SSCAL( N, ONE / SIGMA, W, 1 )
+*
+      WORK( 1 ) = LWMIN
+      IWORK( 1 ) = LIWMIN
+      RETURN
+*
+*     End of SSPEVD
+*
+      END
diff --git a/costa/native/external/lapack/sspevx.f b/costa/native/external/lapack/sspevx.f
new file mode 100644
index 000000000..05d350e69
--- /dev/null
+++ b/costa/native/external/lapack/sspevx.f
@@ -0,0 +1,377 @@
+      SUBROUTINE SSPEVX( JOBZ, RANGE, UPLO, N, AP, VL, VU, IL, IU,
+     $                   ABSTOL, M, W, Z, LDZ, WORK, IWORK, IFAIL,
+     $                   INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBZ, RANGE, UPLO
+      INTEGER            IL, INFO, IU, LDZ, M, N
+      REAL               ABSTOL, VL, VU
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IFAIL( * ), IWORK( * )
+      REAL               AP( * ), W( * ), WORK( * ), Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SSPEVX computes selected eigenvalues and, optionally, eigenvectors
+*  of a real symmetric matrix A in packed storage.  Eigenvalues/vectors
+*  can be selected by specifying either a range of values or a range of
+*  indices for the desired eigenvalues.
+*
+*  Arguments
+*  =========
+*
+*  JOBZ    (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only;
+*          = 'V':  Compute eigenvalues and eigenvectors.
+*
+*  RANGE   (input) CHARACTER*1
+*          = 'A': all eigenvalues will be found;
+*          = 'V': all eigenvalues in the half-open interval (VL,VU]
+*                 will be found;
+*          = 'I': the IL-th through IU-th eigenvalues will be found.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  AP      (input/output) REAL array, dimension (N*(N+1)/2)
+*          On entry, the upper or lower triangle of the symmetric matrix
+*          A, packed columnwise in a linear array.  The j-th column of A
+*          is stored in the array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n.
+*
+*          On exit, AP is overwritten by values generated during the
+*          reduction to tridiagonal form.  If UPLO = 'U', the diagonal
+*          and first superdiagonal of the tridiagonal matrix T overwrite
+*          the corresponding elements of A, and if UPLO = 'L', the
+*          diagonal and first subdiagonal of T overwrite the
+*          corresponding elements of A.
+*
+*  VL      (input) REAL
+*  VU      (input) REAL
+*          If RANGE='V', the lower and upper bounds of the interval to
+*          be searched for eigenvalues. VL < VU.
+*          Not referenced if RANGE = 'A' or 'I'.
+*
+*  IL      (input) INTEGER
+*  IU      (input) INTEGER
+*          If RANGE='I', the indices (in ascending order) of the
+*          smallest and largest eigenvalues to be returned.
+*          1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0.
+*          Not referenced if RANGE = 'A' or 'V'.
+*
+*  ABSTOL  (input) REAL
+*          The absolute error tolerance for the eigenvalues.
+*          An approximate eigenvalue is accepted as converged
+*          when it is determined to lie in an interval [a,b]
+*          of width less than or equal to
+*
+*                  ABSTOL + EPS *   max( |a|,|b| ) ,
+*
+*          where EPS is the machine precision.  If ABSTOL is less than
+*          or equal to zero, then  EPS*|T|  will be used in its place,
+*          where |T| is the 1-norm of the tridiagonal matrix obtained
+*          by reducing AP to tridiagonal form.
+*
+*          Eigenvalues will be computed most accurately when ABSTOL is
+*          set to twice the underflow threshold 2*SLAMCH('S'), not zero.
+*          If this routine returns with INFO>0, indicating that some
+*          eigenvectors did not converge, try setting ABSTOL to
+*          2*SLAMCH('S').
+*
+*          See "Computing Small Singular Values of Bidiagonal Matrices
+*          with Guaranteed High Relative Accuracy," by Demmel and
+*          Kahan, LAPACK Working Note #3.
+*
+*  M       (output) INTEGER
+*          The total number of eigenvalues found.  0 <= M <= N.
+*          If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1.
+*
+*  W       (output) REAL array, dimension (N)
+*          If INFO = 0, the selected eigenvalues in ascending order.
+*
+*  Z       (output) REAL array, dimension (LDZ, max(1,M))
+*          If JOBZ = 'V', then if INFO = 0, the first M columns of Z
+*          contain the orthonormal eigenvectors of the matrix A
+*          corresponding to the selected eigenvalues, with the i-th
+*          column of Z holding the eigenvector associated with W(i).
+*          If an eigenvector fails to converge, then that column of Z
+*          contains the latest approximation to the eigenvector, and the
+*          index of the eigenvector is returned in IFAIL.
+*          If JOBZ = 'N', then Z is not referenced.
+*          Note: the user must ensure that at least max(1,M) columns are
+*          supplied in the array Z; if RANGE = 'V', the exact value of M
+*          is not known in advance and an upper bound must be used.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= 1, and if
+*          JOBZ = 'V', LDZ >= max(1,N).
+*
+*  WORK    (workspace) REAL array, dimension (8*N)
+*
+*  IWORK   (workspace) INTEGER array, dimension (5*N)
+*
+*  IFAIL   (output) INTEGER array, dimension (N)
+*          If JOBZ = 'V', then if INFO = 0, the first M elements of
+*          IFAIL are zero.  If INFO > 0, then IFAIL contains the
+*          indices of the eigenvectors that failed to converge.
+*          If JOBZ = 'N', then IFAIL is not referenced.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, then i eigenvectors failed to converge.
+*                Their indices are stored in array IFAIL.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E0, ONE = 1.0E0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            ALLEIG, INDEIG, VALEIG, WANTZ
+      CHARACTER          ORDER
+      INTEGER            I, IINFO, IMAX, INDD, INDE, INDEE, INDIBL,
+     $                   INDISP, INDIWO, INDTAU, INDWRK, ISCALE, ITMP1,
+     $                   J, JJ, NSPLIT
+      REAL               ABSTLL, ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN,
+     $                   SIGMA, SMLNUM, TMP1, VLL, VUU
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               SLAMCH, SLANSP
+      EXTERNAL           LSAME, SLAMCH, SLANSP
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SCOPY, SOPGTR, SOPMTR, SSCAL, SSPTRD, SSTEBZ,
+     $                   SSTEIN, SSTEQR, SSTERF, SSWAP, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      WANTZ = LSAME( JOBZ, 'V' )
+      ALLEIG = LSAME( RANGE, 'A' )
+      VALEIG = LSAME( RANGE, 'V' )
+      INDEIG = LSAME( RANGE, 'I' )
+*
+      INFO = 0
+      IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.( LSAME( UPLO, 'L' ) .OR. LSAME( UPLO, 'U' ) ) )
+     $          THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE
+         IF( VALEIG ) THEN
+            IF( N.GT.0 .AND. VU.LE.VL )
+     $         INFO = -7
+         ELSE IF( INDEIG ) THEN
+            IF( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) THEN
+               INFO = -8
+            ELSE IF( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN
+               INFO = -9
+            END IF
+         END IF
+      END IF
+      IF( INFO.EQ.0 ) THEN
+         IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) )
+     $      INFO = -14
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SSPEVX', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      M = 0
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      IF( N.EQ.1 ) THEN
+         IF( ALLEIG .OR. INDEIG ) THEN
+            M = 1
+            W( 1 ) = AP( 1 )
+         ELSE
+            IF( VL.LT.AP( 1 ) .AND. VU.GE.AP( 1 ) ) THEN
+               M = 1
+               W( 1 ) = AP( 1 )
+            END IF
+         END IF
+         IF( WANTZ )
+     $      Z( 1, 1 ) = ONE
+         RETURN
+      END IF
+*
+*     Get machine constants.
+*
+      SAFMIN = SLAMCH( 'Safe minimum' )
+      EPS = SLAMCH( 'Precision' )
+      SMLNUM = SAFMIN / EPS
+      BIGNUM = ONE / SMLNUM
+      RMIN = SQRT( SMLNUM )
+      RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) )
+*
+*     Scale matrix to allowable range, if necessary.
+*
+      ISCALE = 0
+      ABSTLL = ABSTOL
+      IF ( VALEIG ) THEN
+         VLL = VL
+         VUU = VU
+      ELSE
+         VLL = ZERO
+         VUU = ZERO
+      ENDIF
+      ANRM = SLANSP( 'M', UPLO, N, AP, WORK )
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN
+         ISCALE = 1
+         SIGMA = RMIN / ANRM
+      ELSE IF( ANRM.GT.RMAX ) THEN
+         ISCALE = 1
+         SIGMA = RMAX / ANRM
+      END IF
+      IF( ISCALE.EQ.1 ) THEN
+         CALL SSCAL( ( N*( N+1 ) ) / 2, SIGMA, AP, 1 )
+         IF( ABSTOL.GT.0 )
+     $      ABSTLL = ABSTOL*SIGMA
+         IF( VALEIG ) THEN
+            VLL = VL*SIGMA
+            VUU = VU*SIGMA
+         END IF
+      END IF
+*
+*     Call SSPTRD to reduce symmetric packed matrix to tridiagonal form.
+*
+      INDTAU = 1
+      INDE = INDTAU + N
+      INDD = INDE + N
+      INDWRK = INDD + N
+      CALL SSPTRD( UPLO, N, AP, WORK( INDD ), WORK( INDE ),
+     $             WORK( INDTAU ), IINFO )
+*
+*     If all eigenvalues are desired and ABSTOL is less than or equal
+*     to zero, then call SSTERF or SOPGTR and SSTEQR.  If this fails
+*     for some eigenvalue, then try SSTEBZ.
+*
+      IF( ( ALLEIG .OR. ( INDEIG .AND. IL.EQ.1 .AND. IU.EQ.N ) ) .AND.
+     $    ( ABSTOL.LE.ZERO ) ) THEN
+         CALL SCOPY( N, WORK( INDD ), 1, W, 1 )
+         INDEE = INDWRK + 2*N
+         IF( .NOT.WANTZ ) THEN
+            CALL SCOPY( N-1, WORK( INDE ), 1, WORK( INDEE ), 1 )
+            CALL SSTERF( N, W, WORK( INDEE ), INFO )
+         ELSE
+            CALL SOPGTR( UPLO, N, AP, WORK( INDTAU ), Z, LDZ,
+     $                   WORK( INDWRK ), IINFO )
+            CALL SCOPY( N-1, WORK( INDE ), 1, WORK( INDEE ), 1 )
+            CALL SSTEQR( JOBZ, N, W, WORK( INDEE ), Z, LDZ,
+     $                   WORK( INDWRK ), INFO )
+            IF( INFO.EQ.0 ) THEN
+               DO 10 I = 1, N
+                  IFAIL( I ) = 0
+   10          CONTINUE
+            END IF
+         END IF
+         IF( INFO.EQ.0 ) THEN
+            M = N
+            GO TO 20
+         END IF
+         INFO = 0
+      END IF
+*
+*     Otherwise, call SSTEBZ and, if eigenvectors are desired, SSTEIN.
+*
+      IF( WANTZ ) THEN
+         ORDER = 'B'
+      ELSE
+         ORDER = 'E'
+      END IF
+      INDIBL = 1
+      INDISP = INDIBL + N
+      INDIWO = INDISP + N
+      CALL SSTEBZ( RANGE, ORDER, N, VLL, VUU, IL, IU, ABSTLL,
+     $             WORK( INDD ), WORK( INDE ), M, NSPLIT, W,
+     $             IWORK( INDIBL ), IWORK( INDISP ), WORK( INDWRK ),
+     $             IWORK( INDIWO ), INFO )
+*
+      IF( WANTZ ) THEN
+         CALL SSTEIN( N, WORK( INDD ), WORK( INDE ), M, W,
+     $                IWORK( INDIBL ), IWORK( INDISP ), Z, LDZ,
+     $                WORK( INDWRK ), IWORK( INDIWO ), IFAIL, INFO )
+*
+*        Apply orthogonal matrix used in reduction to tridiagonal
+*        form to eigenvectors returned by SSTEIN.
+*
+         CALL SOPMTR( 'L', UPLO, 'N', N, M, AP, WORK( INDTAU ), Z, LDZ,
+     $                WORK( INDWRK ), INFO )
+      END IF
+*
+*     If matrix was scaled, then rescale eigenvalues appropriately.
+*
+   20 CONTINUE
+      IF( ISCALE.EQ.1 ) THEN
+         IF( INFO.EQ.0 ) THEN
+            IMAX = M
+         ELSE
+            IMAX = INFO - 1
+         END IF
+         CALL SSCAL( IMAX, ONE / SIGMA, W, 1 )
+      END IF
+*
+*     If eigenvalues are not in order, then sort them, along with
+*     eigenvectors.
+*
+      IF( WANTZ ) THEN
+         DO 40 J = 1, M - 1
+            I = 0
+            TMP1 = W( J )
+            DO 30 JJ = J + 1, M
+               IF( W( JJ ).LT.TMP1 ) THEN
+                  I = JJ
+                  TMP1 = W( JJ )
+               END IF
+   30       CONTINUE
+*
+            IF( I.NE.0 ) THEN
+               ITMP1 = IWORK( INDIBL+I-1 )
+               W( I ) = W( J )
+               IWORK( INDIBL+I-1 ) = IWORK( INDIBL+J-1 )
+               W( J ) = TMP1
+               IWORK( INDIBL+J-1 ) = ITMP1
+               CALL SSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 )
+               IF( INFO.NE.0 ) THEN
+                  ITMP1 = IFAIL( I )
+                  IFAIL( I ) = IFAIL( J )
+                  IFAIL( J ) = ITMP1
+               END IF
+            END IF
+   40    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of SSPEVX
+*
+      END
diff --git a/costa/native/external/lapack/sspgst.f b/costa/native/external/lapack/sspgst.f
new file mode 100644
index 000000000..b8d386c13
--- /dev/null
+++ b/costa/native/external/lapack/sspgst.f
@@ -0,0 +1,209 @@
+      SUBROUTINE SSPGST( ITYPE, UPLO, N, AP, BP, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993 
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, ITYPE, N
+*     ..
+*     .. Array Arguments ..
+      REAL               AP( * ), BP( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SSPGST reduces a real symmetric-definite generalized eigenproblem
+*  to standard form, using packed storage.
+*
+*  If ITYPE = 1, the problem is A*x = lambda*B*x,
+*  and A is overwritten by inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T)
+*
+*  If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or
+*  B*A*x = lambda*x, and A is overwritten by U*A*U**T or L**T*A*L.
+*
+*  B must have been previously factorized as U**T*U or L*L**T by SPPTRF.
+*
+*  Arguments
+*  =========
+*
+*  ITYPE   (input) INTEGER
+*          = 1: compute inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T);
+*          = 2 or 3: compute U*A*U**T or L**T*A*L.
+*
+*  UPLO    (input) CHARACTER
+*          = 'U':  Upper triangle of A is stored and B is factored as
+*                  U**T*U;
+*          = 'L':  Lower triangle of A is stored and B is factored as
+*                  L*L**T.
+*
+*  N       (input) INTEGER
+*          The order of the matrices A and B.  N >= 0.
+*
+*  AP      (input/output) REAL array, dimension (N*(N+1)/2)
+*          On entry, the upper or lower triangle of the symmetric matrix
+*          A, packed columnwise in a linear array.  The j-th column of A
+*          is stored in the array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n.
+*
+*          On exit, if INFO = 0, the transformed matrix, stored in the
+*          same format as A.
+*
+*  BP      (input) REAL array, dimension (N*(N+1)/2)
+*          The triangular factor from the Cholesky factorization of B,
+*          stored in the same format as A, as returned by SPPTRF.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, HALF
+      PARAMETER          ( ONE = 1.0, HALF = 0.5 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            J, J1, J1J1, JJ, K, K1, K1K1, KK
+      REAL               AJJ, AKK, BJJ, BKK, CT
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SAXPY, SSCAL, SSPMV, SSPR2, STPMV, STPSV,
+     $                   XERBLA
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               SDOT
+      EXTERNAL           LSAME, SDOT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( ITYPE.LT.1 .OR. ITYPE.GT.3 ) THEN
+         INFO = -1
+      ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SSPGST', -INFO )
+         RETURN
+      END IF
+*
+      IF( ITYPE.EQ.1 ) THEN
+         IF( UPPER ) THEN
+*
+*           Compute inv(U')*A*inv(U)
+*
+*           J1 and JJ are the indices of A(1,j) and A(j,j)
+*
+            JJ = 0
+            DO 10 J = 1, N
+               J1 = JJ + 1
+               JJ = JJ + J
+*
+*              Compute the j-th column of the upper triangle of A
+*
+               BJJ = BP( JJ )
+               CALL STPSV( UPLO, 'Transpose', 'Nonunit', J, BP,
+     $                     AP( J1 ), 1 )
+               CALL SSPMV( UPLO, J-1, -ONE, AP, BP( J1 ), 1, ONE,
+     $                     AP( J1 ), 1 )
+               CALL SSCAL( J-1, ONE / BJJ, AP( J1 ), 1 )
+               AP( JJ ) = ( AP( JJ )-SDOT( J-1, AP( J1 ), 1, BP( J1 ),
+     $                    1 ) ) / BJJ
+   10       CONTINUE
+         ELSE
+*
+*           Compute inv(L)*A*inv(L')
+*
+*           KK and K1K1 are the indices of A(k,k) and A(k+1,k+1)
+*
+            KK = 1
+            DO 20 K = 1, N
+               K1K1 = KK + N - K + 1
+*
+*              Update the lower triangle of A(k:n,k:n)
+*
+               AKK = AP( KK )
+               BKK = BP( KK )
+               AKK = AKK / BKK**2
+               AP( KK ) = AKK
+               IF( K.LT.N ) THEN
+                  CALL SSCAL( N-K, ONE / BKK, AP( KK+1 ), 1 )
+                  CT = -HALF*AKK
+                  CALL SAXPY( N-K, CT, BP( KK+1 ), 1, AP( KK+1 ), 1 )
+                  CALL SSPR2( UPLO, N-K, -ONE, AP( KK+1 ), 1,
+     $                        BP( KK+1 ), 1, AP( K1K1 ) )
+                  CALL SAXPY( N-K, CT, BP( KK+1 ), 1, AP( KK+1 ), 1 )
+                  CALL STPSV( UPLO, 'No transpose', 'Non-unit', N-K,
+     $                        BP( K1K1 ), AP( KK+1 ), 1 )
+               END IF
+               KK = K1K1
+   20       CONTINUE
+         END IF
+      ELSE
+         IF( UPPER ) THEN
+*
+*           Compute U*A*U'
+*
+*           K1 and KK are the indices of A(1,k) and A(k,k)
+*
+            KK = 0
+            DO 30 K = 1, N
+               K1 = KK + 1
+               KK = KK + K
+*
+*              Update the upper triangle of A(1:k,1:k)
+*
+               AKK = AP( KK )
+               BKK = BP( KK )
+               CALL STPMV( UPLO, 'No transpose', 'Non-unit', K-1, BP,
+     $                     AP( K1 ), 1 )
+               CT = HALF*AKK
+               CALL SAXPY( K-1, CT, BP( K1 ), 1, AP( K1 ), 1 )
+               CALL SSPR2( UPLO, K-1, ONE, AP( K1 ), 1, BP( K1 ), 1,
+     $                     AP )
+               CALL SAXPY( K-1, CT, BP( K1 ), 1, AP( K1 ), 1 )
+               CALL SSCAL( K-1, BKK, AP( K1 ), 1 )
+               AP( KK ) = AKK*BKK**2
+   30       CONTINUE
+         ELSE
+*
+*           Compute L'*A*L
+*
+*           JJ and J1J1 are the indices of A(j,j) and A(j+1,j+1)
+*
+            JJ = 1
+            DO 40 J = 1, N
+               J1J1 = JJ + N - J + 1
+*
+*              Compute the j-th column of the lower triangle of A
+*
+               AJJ = AP( JJ )
+               BJJ = BP( JJ )
+               AP( JJ ) = AJJ*BJJ + SDOT( N-J, AP( JJ+1 ), 1,
+     $                    BP( JJ+1 ), 1 )
+               CALL SSCAL( N-J, BJJ, AP( JJ+1 ), 1 )
+               CALL SSPMV( UPLO, N-J, ONE, AP( J1J1 ), BP( JJ+1 ), 1,
+     $                     ONE, AP( JJ+1 ), 1 )
+               CALL STPMV( UPLO, 'Transpose', 'Non-unit', N-J+1,
+     $                     BP( JJ ), AP( JJ ), 1 )
+               JJ = J1J1
+   40       CONTINUE
+         END IF
+      END IF
+      RETURN
+*
+*     End of SSPGST
+*
+      END
diff --git a/costa/native/external/lapack/sspgv.f b/costa/native/external/lapack/sspgv.f
new file mode 100644
index 000000000..15594c9d7
--- /dev/null
+++ b/costa/native/external/lapack/sspgv.f
@@ -0,0 +1,196 @@
+      SUBROUTINE SSPGV( ITYPE, JOBZ, UPLO, N, AP, BP, W, Z, LDZ, WORK,
+     $                  INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBZ, UPLO
+      INTEGER            INFO, ITYPE, LDZ, N
+*     ..
+*     .. Array Arguments ..
+      REAL               AP( * ), BP( * ), W( * ), WORK( * ),
+     $                   Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SSPGV computes all the eigenvalues and, optionally, the eigenvectors
+*  of a real generalized symmetric-definite eigenproblem, of the form
+*  A*x=(lambda)*B*x,  A*Bx=(lambda)*x,  or B*A*x=(lambda)*x.
+*  Here A and B are assumed to be symmetric, stored in packed format,
+*  and B is also positive definite.
+*
+*  Arguments
+*  =========
+*
+*  ITYPE   (input) INTEGER
+*          Specifies the problem type to be solved:
+*          = 1:  A*x = (lambda)*B*x
+*          = 2:  A*B*x = (lambda)*x
+*          = 3:  B*A*x = (lambda)*x
+*
+*  JOBZ    (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only;
+*          = 'V':  Compute eigenvalues and eigenvectors.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangles of A and B are stored;
+*          = 'L':  Lower triangles of A and B are stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrices A and B.  N >= 0.
+*
+*  AP      (input/output) REAL array, dimension
+*                            (N*(N+1)/2)
+*          On entry, the upper or lower triangle of the symmetric matrix
+*          A, packed columnwise in a linear array.  The j-th column of A
+*          is stored in the array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n.
+*
+*          On exit, the contents of AP are destroyed.
+*
+*  BP      (input/output) REAL array, dimension (N*(N+1)/2)
+*          On entry, the upper or lower triangle of the symmetric matrix
+*          B, packed columnwise in a linear array.  The j-th column of B
+*          is stored in the array BP as follows:
+*          if UPLO = 'U', BP(i + (j-1)*j/2) = B(i,j) for 1<=i<=j;
+*          if UPLO = 'L', BP(i + (j-1)*(2*n-j)/2) = B(i,j) for j<=i<=n.
+*
+*          On exit, the triangular factor U or L from the Cholesky
+*          factorization B = U**T*U or B = L*L**T, in the same storage
+*          format as B.
+*
+*  W       (output) REAL array, dimension (N)
+*          If INFO = 0, the eigenvalues in ascending order.
+*
+*  Z       (output) REAL array, dimension (LDZ, N)
+*          If JOBZ = 'V', then if INFO = 0, Z contains the matrix Z of
+*          eigenvectors.  The eigenvectors are normalized as follows:
+*          if ITYPE = 1 or 2, Z**T*B*Z = I;
+*          if ITYPE = 3, Z**T*inv(B)*Z = I.
+*          If JOBZ = 'N', then Z is not referenced.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= 1, and if
+*          JOBZ = 'V', LDZ >= max(1,N).
+*
+*  WORK    (workspace) REAL array, dimension (3*N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  SPPTRF or SSPEV returned an error code:
+*             <= N:  if INFO = i, SSPEV failed to converge;
+*                    i off-diagonal elements of an intermediate
+*                    tridiagonal form did not converge to zero.
+*             > N:   if INFO = n + i, for 1 <= i <= n, then the leading
+*                    minor of order i of B is not positive definite.
+*                    The factorization of B could not be completed and
+*                    no eigenvalues or eigenvectors were computed.
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            UPPER, WANTZ
+      CHARACTER          TRANS
+      INTEGER            J, NEIG
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SPPTRF, SSPEV, SSPGST, STPMV, STPSV, XERBLA
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      WANTZ = LSAME( JOBZ, 'V' )
+      UPPER = LSAME( UPLO, 'U' )
+*
+      INFO = 0
+      IF( ITYPE.LT.0 .OR. ITYPE.GT.3 ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN
+         INFO = -9
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SSPGV ', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Form a Cholesky factorization of B.
+*
+      CALL SPPTRF( UPLO, N, BP, INFO )
+      IF( INFO.NE.0 ) THEN
+         INFO = N + INFO
+         RETURN
+      END IF
+*
+*     Transform problem to standard eigenvalue problem and solve.
+*
+      CALL SSPGST( ITYPE, UPLO, N, AP, BP, INFO )
+      CALL SSPEV( JOBZ, UPLO, N, AP, W, Z, LDZ, WORK, INFO )
+*
+      IF( WANTZ ) THEN
+*
+*        Backtransform eigenvectors to the original problem.
+*
+         NEIG = N
+         IF( INFO.GT.0 )
+     $      NEIG = INFO - 1
+         IF( ITYPE.EQ.1 .OR. ITYPE.EQ.2 ) THEN
+*
+*           For A*x=(lambda)*B*x and A*B*x=(lambda)*x;
+*           backtransform eigenvectors: x = inv(L)'*y or inv(U)*y
+*
+            IF( UPPER ) THEN
+               TRANS = 'N'
+            ELSE
+               TRANS = 'T'
+            END IF
+*
+            DO 10 J = 1, NEIG
+               CALL STPSV( UPLO, TRANS, 'Non-unit', N, BP, Z( 1, J ),
+     $                     1 )
+   10       CONTINUE
+*
+         ELSE IF( ITYPE.EQ.3 ) THEN
+*
+*           For B*A*x=(lambda)*x;
+*           backtransform eigenvectors: x = L*y or U'*y
+*
+            IF( UPPER ) THEN
+               TRANS = 'T'
+            ELSE
+               TRANS = 'N'
+            END IF
+*
+            DO 20 J = 1, NEIG
+               CALL STPMV( UPLO, TRANS, 'Non-unit', N, BP, Z( 1, J ),
+     $                     1 )
+   20       CONTINUE
+         END IF
+      END IF
+      RETURN
+*
+*     End of SSPGV
+*
+      END
diff --git a/costa/native/external/lapack/sspgvd.f b/costa/native/external/lapack/sspgvd.f
new file mode 100644
index 000000000..5f8d46ee1
--- /dev/null
+++ b/costa/native/external/lapack/sspgvd.f
@@ -0,0 +1,281 @@
+      SUBROUTINE SSPGVD( ITYPE, JOBZ, UPLO, N, AP, BP, W, Z, LDZ, WORK,
+     $                   LWORK, IWORK, LIWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBZ, UPLO
+      INTEGER            INFO, ITYPE, LDZ, LIWORK, LWORK, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      REAL               AP( * ), BP( * ), W( * ), WORK( * ),
+     $                   Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SSPGVD computes all the eigenvalues, and optionally, the eigenvectors
+*  of a real generalized symmetric-definite eigenproblem, of the form
+*  A*x=(lambda)*B*x,  A*Bx=(lambda)*x,  or B*A*x=(lambda)*x.  Here A and
+*  B are assumed to be symmetric, stored in packed format, and B is also
+*  positive definite.
+*  If eigenvectors are desired, it uses a divide and conquer algorithm.
+*
+*  The divide and conquer algorithm makes very mild assumptions about
+*  floating point arithmetic. It will work on machines with a guard
+*  digit in add/subtract, or on those binary machines without guard
+*  digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
+*  Cray-2. It could conceivably fail on hexadecimal or decimal machines
+*  without guard digits, but we know of none.
+*
+*  Arguments
+*  =========
+*
+*  ITYPE   (input) INTEGER
+*          Specifies the problem type to be solved:
+*          = 1:  A*x = (lambda)*B*x
+*          = 2:  A*B*x = (lambda)*x
+*          = 3:  B*A*x = (lambda)*x
+*
+*  JOBZ    (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only;
+*          = 'V':  Compute eigenvalues and eigenvectors.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangles of A and B are stored;
+*          = 'L':  Lower triangles of A and B are stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrices A and B.  N >= 0.
+*
+*  AP      (input/output) REAL array, dimension (N*(N+1)/2)
+*          On entry, the upper or lower triangle of the symmetric matrix
+*          A, packed columnwise in a linear array.  The j-th column of A
+*          is stored in the array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n.
+*
+*          On exit, the contents of AP are destroyed.
+*
+*  BP      (input/output) REAL array, dimension (N*(N+1)/2)
+*          On entry, the upper or lower triangle of the symmetric matrix
+*          B, packed columnwise in a linear array.  The j-th column of B
+*          is stored in the array BP as follows:
+*          if UPLO = 'U', BP(i + (j-1)*j/2) = B(i,j) for 1<=i<=j;
+*          if UPLO = 'L', BP(i + (j-1)*(2*n-j)/2) = B(i,j) for j<=i<=n.
+*
+*          On exit, the triangular factor U or L from the Cholesky
+*          factorization B = U**T*U or B = L*L**T, in the same storage
+*          format as B.
+*
+*  W       (output) REAL array, dimension (N)
+*          If INFO = 0, the eigenvalues in ascending order.
+*
+*  Z       (output) REAL array, dimension (LDZ, N)
+*          If JOBZ = 'V', then if INFO = 0, Z contains the matrix Z of
+*          eigenvectors.  The eigenvectors are normalized as follows:
+*          if ITYPE = 1 or 2, Z**T*B*Z = I;
+*          if ITYPE = 3, Z**T*inv(B)*Z = I.
+*          If JOBZ = 'N', then Z is not referenced.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= 1, and if
+*          JOBZ = 'V', LDZ >= max(1,N).
+*
+*  WORK    (workspace/output) REAL array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.
+*          If N <= 1,               LWORK >= 1.
+*          If JOBZ = 'N' and N > 1, LWORK >= 2*N.
+*          If JOBZ = 'V' and N > 1, LWORK >= 1 + 6*N + 2*N**2.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  IWORK   (workspace/output) INTEGER array, dimension (LIWORK)
+*          On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK.
+*
+*  LIWORK  (input) INTEGER
+*          The dimension of the array IWORK.
+*          If JOBZ  = 'N' or N <= 1, LIWORK >= 1.
+*          If JOBZ  = 'V' and N > 1, LIWORK >= 3 + 5*N.
+*
+*          If LIWORK = -1, then a workspace query is assumed; the
+*          routine only calculates the optimal size of the IWORK array,
+*          returns this value as the first entry of the IWORK array, and
+*          no error message related to LIWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  SPPTRF or SSPEVD returned an error code:
+*             <= N:  if INFO = i, SSPEVD failed to converge;
+*                    i off-diagonal elements of an intermediate
+*                    tridiagonal form did not converge to zero;
+*             > N:   if INFO = N + i, for 1 <= i <= N, then the leading
+*                    minor of order i of B is not positive definite.
+*                    The factorization of B could not be completed and
+*                    no eigenvalues or eigenvectors were computed.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               TWO
+      PARAMETER          ( TWO = 2.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY, UPPER, WANTZ
+      CHARACTER          TRANS
+      INTEGER            J, LGN, LIWMIN, LWMIN, NEIG
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SPPTRF, SSPEVD, SSPGST, STPMV, STPSV, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          INT, LOG, MAX, REAL
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      WANTZ = LSAME( JOBZ, 'V' )
+      UPPER = LSAME( UPLO, 'U' )
+      LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 )
+*
+      INFO = 0
+      IF( N.LE.1 ) THEN
+         LGN = 0
+         LIWMIN = 1
+         LWMIN = 1
+      ELSE
+         LGN = INT( LOG( REAL( N ) ) / LOG( TWO ) )
+         IF( 2**LGN.LT.N )
+     $      LGN = LGN + 1
+         IF( 2**LGN.LT.N )
+     $      LGN = LGN + 1
+         IF( WANTZ ) THEN
+            LIWMIN = 3 + 5*N
+            LWMIN = 1 + 5*N + 2*N*LGN + 2*N**2
+         ELSE
+            LIWMIN = 1
+            LWMIN = 2*N
+         END IF
+      END IF
+*
+      IF( ITYPE.LT.0 .OR. ITYPE.GT.3 ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDZ.LT.MAX( 1, N ) ) THEN
+         INFO = -9
+      ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -11
+      ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -13
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         WORK( 1 ) = LWMIN
+         IWORK( 1 ) = LIWMIN
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SSPGVD', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Form a Cholesky factorization of BP.
+*
+      CALL SPPTRF( UPLO, N, BP, INFO )
+      IF( INFO.NE.0 ) THEN
+         INFO = N + INFO
+         RETURN
+      END IF
+*
+*     Transform problem to standard eigenvalue problem and solve.
+*
+      CALL SSPGST( ITYPE, UPLO, N, AP, BP, INFO )
+      CALL SSPEVD( JOBZ, UPLO, N, AP, W, Z, LDZ, WORK, LWORK, IWORK,
+     $             LIWORK, INFO )
+      LWMIN = MAX( REAL( LWMIN ), REAL( WORK( 1 ) ) )
+      LIWMIN = MAX( REAL( LIWMIN ), REAL( IWORK( 1 ) ) )
+*
+      IF( WANTZ ) THEN
+*
+*        Backtransform eigenvectors to the original problem.
+*
+         NEIG = N
+         IF( INFO.GT.0 )
+     $      NEIG = INFO - 1
+         IF( ITYPE.EQ.1 .OR. ITYPE.EQ.2 ) THEN
+*
+*           For A*x=(lambda)*B*x and A*B*x=(lambda)*x;
+*           backtransform eigenvectors: x = inv(L)'*y or inv(U)*y
+*
+            IF( UPPER ) THEN
+               TRANS = 'N'
+            ELSE
+               TRANS = 'T'
+            END IF
+*
+            DO 10 J = 1, NEIG
+               CALL STPSV( UPLO, TRANS, 'Non-unit', N, BP, Z( 1, J ),
+     $                     1 )
+   10       CONTINUE
+*
+         ELSE IF( ITYPE.EQ.3 ) THEN
+*
+*           For B*A*x=(lambda)*x;
+*           backtransform eigenvectors: x = L*y or U'*y
+*
+            IF( UPPER ) THEN
+               TRANS = 'T'
+            ELSE
+               TRANS = 'N'
+            END IF
+*
+            DO 20 J = 1, NEIG
+               CALL STPMV( UPLO, TRANS, 'Non-unit', N, BP, Z( 1, J ),
+     $                     1 )
+   20       CONTINUE
+         END IF
+      END IF
+*
+      WORK( 1 ) = LWMIN
+      IWORK( 1 ) = LIWMIN
+*
+      RETURN
+*
+*     End of SSPGVD
+*
+      END
diff --git a/costa/native/external/lapack/sspgvx.f b/costa/native/external/lapack/sspgvx.f
new file mode 100644
index 000000000..5b374fd3c
--- /dev/null
+++ b/costa/native/external/lapack/sspgvx.f
@@ -0,0 +1,286 @@
+      SUBROUTINE SSPGVX( ITYPE, JOBZ, RANGE, UPLO, N, AP, BP, VL, VU,
+     $                   IL, IU, ABSTOL, M, W, Z, LDZ, WORK, IWORK,
+     $                   IFAIL, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBZ, RANGE, UPLO
+      INTEGER            IL, INFO, ITYPE, IU, LDZ, M, N
+      REAL               ABSTOL, VL, VU
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IFAIL( * ), IWORK( * )
+      REAL               AP( * ), BP( * ), W( * ), WORK( * ),
+     $                   Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SSPGVX computes selected eigenvalues, and optionally, eigenvectors
+*  of a real generalized symmetric-definite eigenproblem, of the form
+*  A*x=(lambda)*B*x,  A*Bx=(lambda)*x,  or B*A*x=(lambda)*x.  Here A
+*  and B are assumed to be symmetric, stored in packed storage, and B
+*  is also positive definite.  Eigenvalues and eigenvectors can be
+*  selected by specifying either a range of values or a range of indices
+*  for the desired eigenvalues.
+*
+*  Arguments
+*  =========
+*
+*  ITYPE   (input) INTEGER
+*          Specifies the problem type to be solved:
+*          = 1:  A*x = (lambda)*B*x
+*          = 2:  A*B*x = (lambda)*x
+*          = 3:  B*A*x = (lambda)*x
+*
+*  JOBZ    (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only;
+*          = 'V':  Compute eigenvalues and eigenvectors.
+*
+*  RANGE   (input) CHARACTER*1
+*          = 'A': all eigenvalues will be found.
+*          = 'V': all eigenvalues in the half-open interval (VL,VU]
+*                 will be found.
+*          = 'I': the IL-th through IU-th eigenvalues will be found.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A and B are stored;
+*          = 'L':  Lower triangle of A and B are stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix pencil (A,B).  N >= 0.
+*
+*  AP      (input/output) REAL array, dimension (N*(N+1)/2)
+*          On entry, the upper or lower triangle of the symmetric matrix
+*          A, packed columnwise in a linear array.  The j-th column of A
+*          is stored in the array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n.
+*
+*          On exit, the contents of AP are destroyed.
+*
+*  BP      (input/output) REAL array, dimension (N*(N+1)/2)
+*          On entry, the upper or lower triangle of the symmetric matrix
+*          B, packed columnwise in a linear array.  The j-th column of B
+*          is stored in the array BP as follows:
+*          if UPLO = 'U', BP(i + (j-1)*j/2) = B(i,j) for 1<=i<=j;
+*          if UPLO = 'L', BP(i + (j-1)*(2*n-j)/2) = B(i,j) for j<=i<=n.
+*
+*          On exit, the triangular factor U or L from the Cholesky
+*          factorization B = U**T*U or B = L*L**T, in the same storage
+*          format as B.
+*
+*  VL      (input) REAL
+*  VU      (input) REAL
+*          If RANGE='V', the lower and upper bounds of the interval to
+*          be searched for eigenvalues. VL < VU.
+*          Not referenced if RANGE = 'A' or 'I'.
+*
+*  IL      (input) INTEGER
+*  IU      (input) INTEGER
+*          If RANGE='I', the indices (in ascending order) of the
+*          smallest and largest eigenvalues to be returned.
+*          1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0.
+*          Not referenced if RANGE = 'A' or 'V'.
+*
+*  ABSTOL  (input) REAL
+*          The absolute error tolerance for the eigenvalues.
+*          An approximate eigenvalue is accepted as converged
+*          when it is determined to lie in an interval [a,b]
+*          of width less than or equal to
+*
+*                  ABSTOL + EPS *   max( |a|,|b| ) ,
+*
+*          where EPS is the machine precision.  If ABSTOL is less than
+*          or equal to zero, then  EPS*|T|  will be used in its place,
+*          where |T| is the 1-norm of the tridiagonal matrix obtained
+*          by reducing A to tridiagonal form.
+*
+*          Eigenvalues will be computed most accurately when ABSTOL is
+*          set to twice the underflow threshold 2*SLAMCH('S'), not zero.
+*          If this routine returns with INFO>0, indicating that some
+*          eigenvectors did not converge, try setting ABSTOL to
+*          2*SLAMCH('S').
+*
+*  M       (output) INTEGER
+*          The total number of eigenvalues found.  0 <= M <= N.
+*          If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1.
+*
+*  W       (output) REAL array, dimension (N)
+*          On normal exit, the first M elements contain the selected
+*          eigenvalues in ascending order.
+*
+*  Z       (output) REAL array, dimension (LDZ, max(1,M))
+*          If JOBZ = 'N', then Z is not referenced.
+*          If JOBZ = 'V', then if INFO = 0, the first M columns of Z
+*          contain the orthonormal eigenvectors of the matrix A
+*          corresponding to the selected eigenvalues, with the i-th
+*          column of Z holding the eigenvector associated with W(i).
+*          The eigenvectors are normalized as follows:
+*          if ITYPE = 1 or 2, Z**T*B*Z = I;
+*          if ITYPE = 3, Z**T*inv(B)*Z = I.
+*
+*          If an eigenvector fails to converge, then that column of Z
+*          contains the latest approximation to the eigenvector, and the
+*          index of the eigenvector is returned in IFAIL.
+*          Note: the user must ensure that at least max(1,M) columns are
+*          supplied in the array Z; if RANGE = 'V', the exact value of M
+*          is not known in advance and an upper bound must be used.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= 1, and if
+*          JOBZ = 'V', LDZ >= max(1,N).
+*
+*  WORK    (workspace) REAL array, dimension (8*N)
+*
+*  IWORK   (workspace) INTEGER array, dimension (5*N)
+*
+*  IFAIL   (output) INTEGER array, dimension (N)
+*          If JOBZ = 'V', then if INFO = 0, the first M elements of
+*          IFAIL are zero.  If INFO > 0, then IFAIL contains the
+*          indices of the eigenvectors that failed to converge.
+*          If JOBZ = 'N', then IFAIL is not referenced.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  SPPTRF or SSPEVX returned an error code:
+*             <= N:  if INFO = i, SSPEVX failed to converge;
+*                    i eigenvectors failed to converge.  Their indices
+*                    are stored in array IFAIL.
+*             > N:   if INFO = N + i, for 1 <= i <= N, then the leading
+*                    minor of order i of B is not positive definite.
+*                    The factorization of B could not be completed and
+*                    no eigenvalues or eigenvectors were computed.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA
+*
+* =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            ALLEIG, INDEIG, UPPER, VALEIG, WANTZ
+      CHARACTER          TRANS
+      INTEGER            J
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SPPTRF, SSPEVX, SSPGST, STPMV, STPSV, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      UPPER = LSAME( UPLO, 'U' )
+      WANTZ = LSAME( JOBZ, 'V' )
+      ALLEIG = LSAME( RANGE, 'A' )
+      VALEIG = LSAME( RANGE, 'V' )
+      INDEIG = LSAME( RANGE, 'I' )
+*
+      INFO = 0
+      IF( ITYPE.LT.0 .OR. ITYPE.GT.3 ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN
+         INFO = -3
+      ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN
+         INFO = -4
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( VALEIG .AND. N.GT.0 .AND. VU.LE.VL ) THEN
+         INFO = -9
+      ELSE IF( INDEIG .AND. IL.LT.1 ) THEN
+         INFO = -10
+      ELSE IF( INDEIG .AND. ( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) ) THEN
+         INFO = -11
+      ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN
+         INFO = -16
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SSPGVX', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      M = 0
+      IF( N.EQ.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+*     Form a Cholesky factorization of B.
+*
+      CALL SPPTRF( UPLO, N, BP, INFO )
+      IF( INFO.NE.0 ) THEN
+         INFO = N + INFO
+         RETURN
+      END IF
+*
+*     Transform problem to standard eigenvalue problem and solve.
+*
+      CALL SSPGST( ITYPE, UPLO, N, AP, BP, INFO )
+      CALL SSPEVX( JOBZ, RANGE, UPLO, N, AP, VL, VU, IL, IU, ABSTOL, M,
+     $             W, Z, LDZ, WORK, IWORK, IFAIL, INFO )
+*
+      IF( WANTZ ) THEN
+*
+*        Backtransform eigenvectors to the original problem.
+*
+         IF( INFO.GT.0 )
+     $      M = INFO - 1
+         IF( ITYPE.EQ.1 .OR. ITYPE.EQ.2 ) THEN
+*
+*           For A*x=(lambda)*B*x and A*B*x=(lambda)*x;
+*           backtransform eigenvectors: x = inv(L)'*y or inv(U)*y
+*
+            IF( UPPER ) THEN
+               TRANS = 'N'
+            ELSE
+               TRANS = 'T'
+            END IF
+*
+            DO 10 J = 1, M
+               CALL STPSV( UPLO, TRANS, 'Non-unit', N, BP, Z( 1, J ),
+     $                     1 )
+   10       CONTINUE
+*
+         ELSE IF( ITYPE.EQ.3 ) THEN
+*
+*           For B*A*x=(lambda)*x;
+*           backtransform eigenvectors: x = L*y or U'*y
+*
+            IF( UPPER ) THEN
+               TRANS = 'T'
+            ELSE
+               TRANS = 'N'
+            END IF
+*
+            DO 20 J = 1, M
+               CALL STPMV( UPLO, TRANS, 'Non-unit', N, BP, Z( 1, J ),
+     $                     1 )
+   20       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of SSPGVX
+*
+      END
diff --git a/costa/native/external/lapack/ssprfs.f b/costa/native/external/lapack/ssprfs.f
new file mode 100644
index 000000000..0dff85913
--- /dev/null
+++ b/costa/native/external/lapack/ssprfs.f
@@ -0,0 +1,331 @@
+      SUBROUTINE SSPRFS( UPLO, N, NRHS, AP, AFP, IPIV, B, LDB, X, LDX,
+     $                   FERR, BERR, WORK, IWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDB, LDX, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * ), IWORK( * )
+      REAL               AFP( * ), AP( * ), B( LDB, * ), BERR( * ),
+     $                   FERR( * ), WORK( * ), X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SSPRFS improves the computed solution to a system of linear
+*  equations when the coefficient matrix is symmetric indefinite
+*  and packed, and provides error bounds and backward error estimates
+*  for the solution.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrices B and X.  NRHS >= 0.
+*
+*  AP      (input) REAL array, dimension (N*(N+1)/2)
+*          The upper or lower triangle of the symmetric matrix A, packed
+*          columnwise in a linear array.  The j-th column of A is stored
+*          in the array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n.
+*
+*  AFP     (input) REAL array, dimension (N*(N+1)/2)
+*          The factored form of the matrix A.  AFP contains the block
+*          diagonal matrix D and the multipliers used to obtain the
+*          factor U or L from the factorization A = U*D*U**T or
+*          A = L*D*L**T as computed by SSPTRF, stored as a packed
+*          triangular matrix.
+*
+*  IPIV    (input) INTEGER array, dimension (N)
+*          Details of the interchanges and the block structure of D
+*          as determined by SSPTRF.
+*
+*  B       (input) REAL array, dimension (LDB,NRHS)
+*          The right hand side matrix B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  X       (input/output) REAL array, dimension (LDX,NRHS)
+*          On entry, the solution matrix X, as computed by SSPTRS.
+*          On exit, the improved solution matrix X.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(1,N).
+*
+*  FERR    (output) REAL array, dimension (NRHS)
+*          The estimated forward error bound for each solution vector
+*          X(j) (the j-th column of the solution matrix X).
+*          If XTRUE is the true solution corresponding to X(j), FERR(j)
+*          is an estimated upper bound for the magnitude of the largest
+*          element in (X(j) - XTRUE) divided by the magnitude of the
+*          largest element in X(j).  The estimate is as reliable as
+*          the estimate for RCOND, and is almost always a slight
+*          overestimate of the true error.
+*
+*  BERR    (output) REAL array, dimension (NRHS)
+*          The componentwise relative backward error of each solution
+*          vector X(j) (i.e., the smallest relative change in
+*          any element of A or B that makes X(j) an exact solution).
+*
+*  WORK    (workspace) REAL array, dimension (3*N)
+*
+*  IWORK   (workspace) INTEGER array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Internal Parameters
+*  ===================
+*
+*  ITMAX is the maximum number of steps of iterative refinement.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            ITMAX
+      PARAMETER          ( ITMAX = 5 )
+      REAL               ZERO
+      PARAMETER          ( ZERO = 0.0E+0 )
+      REAL               ONE
+      PARAMETER          ( ONE = 1.0E+0 )
+      REAL               TWO
+      PARAMETER          ( TWO = 2.0E+0 )
+      REAL               THREE
+      PARAMETER          ( THREE = 3.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            COUNT, I, IK, J, K, KASE, KK, NZ
+      REAL               EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SAXPY, SCOPY, SLACON, SSPMV, SSPTRS, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               SLAMCH
+      EXTERNAL           LSAME, SLAMCH
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -8
+      ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+         INFO = -10
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SSPRFS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN
+         DO 10 J = 1, NRHS
+            FERR( J ) = ZERO
+            BERR( J ) = ZERO
+   10    CONTINUE
+         RETURN
+      END IF
+*
+*     NZ = maximum number of nonzero elements in each row of A, plus 1
+*
+      NZ = N + 1
+      EPS = SLAMCH( 'Epsilon' )
+      SAFMIN = SLAMCH( 'Safe minimum' )
+      SAFE1 = NZ*SAFMIN
+      SAFE2 = SAFE1 / EPS
+*
+*     Do for each right hand side
+*
+      DO 140 J = 1, NRHS
+*
+         COUNT = 1
+         LSTRES = THREE
+   20    CONTINUE
+*
+*        Loop until stopping criterion is satisfied.
+*
+*        Compute residual R = B - A * X
+*
+         CALL SCOPY( N, B( 1, J ), 1, WORK( N+1 ), 1 )
+         CALL SSPMV( UPLO, N, -ONE, AP, X( 1, J ), 1, ONE, WORK( N+1 ),
+     $               1 )
+*
+*        Compute componentwise relative backward error from formula
+*
+*        max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) )
+*
+*        where abs(Z) is the componentwise absolute value of the matrix
+*        or vector Z.  If the i-th component of the denominator is less
+*        than SAFE2, then SAFE1 is added to the i-th components of the
+*        numerator and denominator before dividing.
+*
+         DO 30 I = 1, N
+            WORK( I ) = ABS( B( I, J ) )
+   30    CONTINUE
+*
+*        Compute abs(A)*abs(X) + abs(B).
+*
+         KK = 1
+         IF( UPPER ) THEN
+            DO 50 K = 1, N
+               S = ZERO
+               XK = ABS( X( K, J ) )
+               IK = KK
+               DO 40 I = 1, K - 1
+                  WORK( I ) = WORK( I ) + ABS( AP( IK ) )*XK
+                  S = S + ABS( AP( IK ) )*ABS( X( I, J ) )
+                  IK = IK + 1
+   40          CONTINUE
+               WORK( K ) = WORK( K ) + ABS( AP( KK+K-1 ) )*XK + S
+               KK = KK + K
+   50       CONTINUE
+         ELSE
+            DO 70 K = 1, N
+               S = ZERO
+               XK = ABS( X( K, J ) )
+               WORK( K ) = WORK( K ) + ABS( AP( KK ) )*XK
+               IK = KK + 1
+               DO 60 I = K + 1, N
+                  WORK( I ) = WORK( I ) + ABS( AP( IK ) )*XK
+                  S = S + ABS( AP( IK ) )*ABS( X( I, J ) )
+                  IK = IK + 1
+   60          CONTINUE
+               WORK( K ) = WORK( K ) + S
+               KK = KK + ( N-K+1 )
+   70       CONTINUE
+         END IF
+         S = ZERO
+         DO 80 I = 1, N
+            IF( WORK( I ).GT.SAFE2 ) THEN
+               S = MAX( S, ABS( WORK( N+I ) ) / WORK( I ) )
+            ELSE
+               S = MAX( S, ( ABS( WORK( N+I ) )+SAFE1 ) /
+     $             ( WORK( I )+SAFE1 ) )
+            END IF
+   80    CONTINUE
+         BERR( J ) = S
+*
+*        Test stopping criterion. Continue iterating if
+*           1) The residual BERR(J) is larger than machine epsilon, and
+*           2) BERR(J) decreased by at least a factor of 2 during the
+*              last iteration, and
+*           3) At most ITMAX iterations tried.
+*
+         IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND.
+     $       COUNT.LE.ITMAX ) THEN
+*
+*           Update solution and try again.
+*
+            CALL SSPTRS( UPLO, N, 1, AFP, IPIV, WORK( N+1 ), N, INFO )
+            CALL SAXPY( N, ONE, WORK( N+1 ), 1, X( 1, J ), 1 )
+            LSTRES = BERR( J )
+            COUNT = COUNT + 1
+            GO TO 20
+         END IF
+*
+*        Bound error from formula
+*
+*        norm(X - XTRUE) / norm(X) .le. FERR =
+*        norm( abs(inv(A))*
+*           ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X)
+*
+*        where
+*          norm(Z) is the magnitude of the largest component of Z
+*          inv(A) is the inverse of A
+*          abs(Z) is the componentwise absolute value of the matrix or
+*             vector Z
+*          NZ is the maximum number of nonzeros in any row of A, plus 1
+*          EPS is machine epsilon
+*
+*        The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B))
+*        is incremented by SAFE1 if the i-th component of
+*        abs(A)*abs(X) + abs(B) is less than SAFE2.
+*
+*        Use SLACON to estimate the infinity-norm of the matrix
+*           inv(A) * diag(W),
+*        where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) )))
+*
+         DO 90 I = 1, N
+            IF( WORK( I ).GT.SAFE2 ) THEN
+               WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I )
+            ELSE
+               WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + SAFE1
+            END IF
+   90    CONTINUE
+*
+         KASE = 0
+  100    CONTINUE
+         CALL SLACON( N, WORK( 2*N+1 ), WORK( N+1 ), IWORK, FERR( J ),
+     $                KASE )
+         IF( KASE.NE.0 ) THEN
+            IF( KASE.EQ.1 ) THEN
+*
+*              Multiply by diag(W)*inv(A').
+*
+               CALL SSPTRS( UPLO, N, 1, AFP, IPIV, WORK( N+1 ), N,
+     $                      INFO )
+               DO 110 I = 1, N
+                  WORK( N+I ) = WORK( I )*WORK( N+I )
+  110          CONTINUE
+            ELSE IF( KASE.EQ.2 ) THEN
+*
+*              Multiply by inv(A)*diag(W).
+*
+               DO 120 I = 1, N
+                  WORK( N+I ) = WORK( I )*WORK( N+I )
+  120          CONTINUE
+               CALL SSPTRS( UPLO, N, 1, AFP, IPIV, WORK( N+1 ), N,
+     $                      INFO )
+            END IF
+            GO TO 100
+         END IF
+*
+*        Normalize error.
+*
+         LSTRES = ZERO
+         DO 130 I = 1, N
+            LSTRES = MAX( LSTRES, ABS( X( I, J ) ) )
+  130    CONTINUE
+         IF( LSTRES.NE.ZERO )
+     $      FERR( J ) = FERR( J ) / LSTRES
+*
+  140 CONTINUE
+*
+      RETURN
+*
+*     End of SSPRFS
+*
+      END
diff --git a/costa/native/external/lapack/sspsv.f b/costa/native/external/lapack/sspsv.f
new file mode 100644
index 000000000..eef02a4c9
--- /dev/null
+++ b/costa/native/external/lapack/sspsv.f
@@ -0,0 +1,149 @@
+      SUBROUTINE SSPSV( UPLO, N, NRHS, AP, IPIV, B, LDB, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993 
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDB, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      REAL               AP( * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SSPSV computes the solution to a real system of linear equations
+*     A * X = B,
+*  where A is an N-by-N symmetric matrix stored in packed format and X
+*  and B are N-by-NRHS matrices.
+*
+*  The diagonal pivoting method is used to factor A as
+*     A = U * D * U**T,  if UPLO = 'U', or
+*     A = L * D * L**T,  if UPLO = 'L',
+*  where U (or L) is a product of permutation and unit upper (lower)
+*  triangular matrices, D is symmetric and block diagonal with 1-by-1
+*  and 2-by-2 diagonal blocks.  The factored form of A is then used to
+*  solve the system of equations A * X = B.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The number of linear equations, i.e., the order of the
+*          matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  AP      (input/output) REAL array, dimension (N*(N+1)/2)
+*          On entry, the upper or lower triangle of the symmetric matrix
+*          A, packed columnwise in a linear array.  The j-th column of A
+*          is stored in the array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n.
+*          See below for further details.
+*
+*          On exit, the block diagonal matrix D and the multipliers used
+*          to obtain the factor U or L from the factorization
+*          A = U*D*U**T or A = L*D*L**T as computed by SSPTRF, stored as
+*          a packed triangular matrix in the same storage format as A.
+*
+*  IPIV    (output) INTEGER array, dimension (N)
+*          Details of the interchanges and the block structure of D, as
+*          determined by SSPTRF.  If IPIV(k) > 0, then rows and columns
+*          k and IPIV(k) were interchanged, and D(k,k) is a 1-by-1
+*          diagonal block.  If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0,
+*          then rows and columns k-1 and -IPIV(k) were interchanged and
+*          D(k-1:k,k-1:k) is a 2-by-2 diagonal block.  If UPLO = 'L' and
+*          IPIV(k) = IPIV(k+1) < 0, then rows and columns k+1 and
+*          -IPIV(k) were interchanged and D(k:k+1,k:k+1) is a 2-by-2
+*          diagonal block.
+*
+*  B       (input/output) REAL array, dimension (LDB,NRHS)
+*          On entry, the N-by-NRHS right hand side matrix B.
+*          On exit, if INFO = 0, the N-by-NRHS solution matrix X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, D(i,i) is exactly zero.  The factorization
+*                has been completed, but the block diagonal matrix D is
+*                exactly singular, so the solution could not be
+*                computed.
+*
+*  Further Details
+*  ===============
+*
+*  The packed storage scheme is illustrated by the following example
+*  when N = 4, UPLO = 'U':
+*
+*  Two-dimensional storage of the symmetric matrix A:
+*
+*     a11 a12 a13 a14
+*         a22 a23 a24
+*             a33 a34     (aij = aji)
+*                 a44
+*
+*  Packed storage of the upper triangle of A:
+*
+*  AP = [ a11, a12, a22, a13, a23, a33, a14, a24, a34, a44 ]
+*
+*  =====================================================================
+*
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SSPTRF, SSPTRS, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SSPSV ', -INFO )
+         RETURN
+      END IF
+*
+*     Compute the factorization A = U*D*U' or A = L*D*L'.
+*
+      CALL SSPTRF( UPLO, N, AP, IPIV, INFO )
+      IF( INFO.EQ.0 ) THEN
+*
+*        Solve the system A*X = B, overwriting B with X.
+*
+         CALL SSPTRS( UPLO, N, NRHS, AP, IPIV, B, LDB, INFO )
+*
+      END IF
+      RETURN
+*
+*     End of SSPSV
+*
+      END
diff --git a/costa/native/external/lapack/sspsvx.f b/costa/native/external/lapack/sspsvx.f
new file mode 100644
index 000000000..d2f0f9fa0
--- /dev/null
+++ b/costa/native/external/lapack/sspsvx.f
@@ -0,0 +1,279 @@
+      SUBROUTINE SSPSVX( FACT, UPLO, N, NRHS, AP, AFP, IPIV, B, LDB, X,
+     $                   LDX, RCOND, FERR, BERR, WORK, IWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          FACT, UPLO
+      INTEGER            INFO, LDB, LDX, N, NRHS
+      REAL               RCOND
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * ), IWORK( * )
+      REAL               AFP( * ), AP( * ), B( LDB, * ), BERR( * ),
+     $                   FERR( * ), WORK( * ), X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SSPSVX uses the diagonal pivoting factorization A = U*D*U**T or
+*  A = L*D*L**T to compute the solution to a real system of linear
+*  equations A * X = B, where A is an N-by-N symmetric matrix stored
+*  in packed format and X and B are N-by-NRHS matrices.
+*
+*  Error bounds on the solution and a condition estimate are also
+*  provided.
+*
+*  Description
+*  ===========
+*
+*  The following steps are performed:
+*
+*  1. If FACT = 'N', the diagonal pivoting method is used to factor A as
+*        A = U * D * U**T,  if UPLO = 'U', or
+*        A = L * D * L**T,  if UPLO = 'L',
+*     where U (or L) is a product of permutation and unit upper (lower)
+*     triangular matrices and D is symmetric and block diagonal with
+*     1-by-1 and 2-by-2 diagonal blocks.
+*
+*  2. If some D(i,i)=0, so that D is exactly singular, then the routine
+*     returns with INFO = i. Otherwise, the factored form of A is used
+*     to estimate the condition number of the matrix A.  If the
+*     reciprocal of the condition number is less than machine precision,
+*     INFO = N+1 is returned as a warning, but the routine still goes on
+*     to solve for X and compute error bounds as described below.
+*
+*  3. The system of equations is solved for X using the factored form
+*     of A.
+*
+*  4. Iterative refinement is applied to improve the computed solution
+*     matrix and calculate error bounds and backward error estimates
+*     for it.
+*
+*  Arguments
+*  =========
+*
+*  FACT    (input) CHARACTER*1
+*          Specifies whether or not the factored form of A has been
+*          supplied on entry.
+*          = 'F':  On entry, AFP and IPIV contain the factored form of
+*                  A.  AP, AFP and IPIV will not be modified.
+*          = 'N':  The matrix A will be copied to AFP and factored.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The number of linear equations, i.e., the order of the
+*          matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrices B and X.  NRHS >= 0.
+*
+*  AP      (input) REAL array, dimension (N*(N+1)/2)
+*          The upper or lower triangle of the symmetric matrix A, packed
+*          columnwise in a linear array.  The j-th column of A is stored
+*          in the array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n.
+*          See below for further details.
+*
+*  AFP     (input or output) REAL array, dimension
+*                            (N*(N+1)/2)
+*          If FACT = 'F', then AFP is an input argument and on entry
+*          contains the block diagonal matrix D and the multipliers used
+*          to obtain the factor U or L from the factorization
+*          A = U*D*U**T or A = L*D*L**T as computed by SSPTRF, stored as
+*          a packed triangular matrix in the same storage format as A.
+*
+*          If FACT = 'N', then AFP is an output argument and on exit
+*          contains the block diagonal matrix D and the multipliers used
+*          to obtain the factor U or L from the factorization
+*          A = U*D*U**T or A = L*D*L**T as computed by SSPTRF, stored as
+*          a packed triangular matrix in the same storage format as A.
+*
+*  IPIV    (input or output) INTEGER array, dimension (N)
+*          If FACT = 'F', then IPIV is an input argument and on entry
+*          contains details of the interchanges and the block structure
+*          of D, as determined by SSPTRF.
+*          If IPIV(k) > 0, then rows and columns k and IPIV(k) were
+*          interchanged and D(k,k) is a 1-by-1 diagonal block.
+*          If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and
+*          columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k)
+*          is a 2-by-2 diagonal block.  If UPLO = 'L' and IPIV(k) =
+*          IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were
+*          interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block.
+*
+*          If FACT = 'N', then IPIV is an output argument and on exit
+*          contains details of the interchanges and the block structure
+*          of D, as determined by SSPTRF.
+*
+*  B       (input) REAL array, dimension (LDB,NRHS)
+*          The N-by-NRHS right hand side matrix B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  X       (output) REAL array, dimension (LDX,NRHS)
+*          If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(1,N).
+*
+*  RCOND   (output) REAL
+*          The estimate of the reciprocal condition number of the matrix
+*          A.  If RCOND is less than the machine precision (in
+*          particular, if RCOND = 0), the matrix is singular to working
+*          precision.  This condition is indicated by a return code of
+*          INFO > 0.
+*
+*  FERR    (output) REAL array, dimension (NRHS)
+*          The estimated forward error bound for each solution vector
+*          X(j) (the j-th column of the solution matrix X).
+*          If XTRUE is the true solution corresponding to X(j), FERR(j)
+*          is an estimated upper bound for the magnitude of the largest
+*          element in (X(j) - XTRUE) divided by the magnitude of the
+*          largest element in X(j).  The estimate is as reliable as
+*          the estimate for RCOND, and is almost always a slight
+*          overestimate of the true error.
+*
+*  BERR    (output) REAL array, dimension (NRHS)
+*          The componentwise relative backward error of each solution
+*          vector X(j) (i.e., the smallest relative change in
+*          any element of A or B that makes X(j) an exact solution).
+*
+*  WORK    (workspace) REAL array, dimension (3*N)
+*
+*  IWORK   (workspace) INTEGER array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, and i is
+*                <= N:  D(i,i) is exactly zero.  The factorization
+*                       has been completed but the factor D is exactly
+*                       singular, so the solution and error bounds could
+*                       not be computed. RCOND = 0 is returned.
+*                = N+1: D is nonsingular, but RCOND is less than machine
+*                       precision, meaning that the matrix is singular
+*                       to working precision.  Nevertheless, the
+*                       solution and error bounds are computed because
+*                       there are a number of situations where the
+*                       computed solution can be more accurate than the
+*                       value of RCOND would suggest.
+*
+*  Further Details
+*  ===============
+*
+*  The packed storage scheme is illustrated by the following example
+*  when N = 4, UPLO = 'U':
+*
+*  Two-dimensional storage of the symmetric matrix A:
+*
+*     a11 a12 a13 a14
+*         a22 a23 a24
+*             a33 a34     (aij = aji)
+*                 a44
+*
+*  Packed storage of the upper triangle of A:
+*
+*  AP = [ a11, a12, a22, a13, a23, a33, a14, a24, a34, a44 ]
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO
+      PARAMETER          ( ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOFACT
+      REAL               ANORM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               SLAMCH, SLANSP
+      EXTERNAL           LSAME, SLAMCH, SLANSP
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SCOPY, SLACPY, SSPCON, SSPRFS, SSPTRF, SSPTRS,
+     $                   XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      NOFACT = LSAME( FACT, 'N' )
+      IF( .NOT.NOFACT .AND. .NOT.LSAME( FACT, 'F' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) )
+     $          THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -9
+      ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+         INFO = -11
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SSPSVX', -INFO )
+         RETURN
+      END IF
+*
+      IF( NOFACT ) THEN
+*
+*        Compute the factorization A = U*D*U' or A = L*D*L'.
+*
+         CALL SCOPY( N*( N+1 ) / 2, AP, 1, AFP, 1 )
+         CALL SSPTRF( UPLO, N, AFP, IPIV, INFO )
+*
+*        Return if INFO is non-zero.
+*
+         IF( INFO.NE.0 ) THEN
+            IF( INFO.GT.0 )
+     $         RCOND = ZERO
+            RETURN
+         END IF
+      END IF
+*
+*     Compute the norm of the matrix A.
+*
+      ANORM = SLANSP( 'I', UPLO, N, AP, WORK )
+*
+*     Compute the reciprocal of the condition number of A.
+*
+      CALL SSPCON( UPLO, N, AFP, IPIV, ANORM, RCOND, WORK, IWORK, INFO )
+*
+*     Set INFO = N+1 if the matrix is singular to working precision.
+*
+      IF( RCOND.LT.SLAMCH( 'Epsilon' ) )
+     $   INFO = N + 1
+*
+*     Compute the solution vectors X.
+*
+      CALL SLACPY( 'Full', N, NRHS, B, LDB, X, LDX )
+      CALL SSPTRS( UPLO, N, NRHS, AFP, IPIV, X, LDX, INFO )
+*
+*     Use iterative refinement to improve the computed solutions and
+*     compute error bounds and backward error estimates for them.
+*
+      CALL SSPRFS( UPLO, N, NRHS, AP, AFP, IPIV, B, LDB, X, LDX, FERR,
+     $             BERR, WORK, IWORK, INFO )
+*
+      RETURN
+*
+*     End of SSPSVX
+*
+      END
diff --git a/costa/native/external/lapack/ssptrd.f b/costa/native/external/lapack/ssptrd.f
new file mode 100644
index 000000000..a998c72a1
--- /dev/null
+++ b/costa/native/external/lapack/ssptrd.f
@@ -0,0 +1,228 @@
+      SUBROUTINE SSPTRD( UPLO, N, AP, D, E, TAU, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993 
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, N
+*     ..
+*     .. Array Arguments ..
+      REAL               AP( * ), D( * ), E( * ), TAU( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SSPTRD reduces a real symmetric matrix A stored in packed form to
+*  symmetric tridiagonal form T by an orthogonal similarity
+*  transformation: Q**T * A * Q = T.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  AP      (input/output) REAL array, dimension (N*(N+1)/2)
+*          On entry, the upper or lower triangle of the symmetric matrix
+*          A, packed columnwise in a linear array.  The j-th column of A
+*          is stored in the array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n.
+*          On exit, if UPLO = 'U', the diagonal and first superdiagonal
+*          of A are overwritten by the corresponding elements of the
+*          tridiagonal matrix T, and the elements above the first
+*          superdiagonal, with the array TAU, represent the orthogonal
+*          matrix Q as a product of elementary reflectors; if UPLO
+*          = 'L', the diagonal and first subdiagonal of A are over-
+*          written by the corresponding elements of the tridiagonal
+*          matrix T, and the elements below the first subdiagonal, with
+*          the array TAU, represent the orthogonal matrix Q as a product
+*          of elementary reflectors. See Further Details.
+*
+*  D       (output) REAL array, dimension (N)
+*          The diagonal elements of the tridiagonal matrix T:
+*          D(i) = A(i,i).
+*
+*  E       (output) REAL array, dimension (N-1)
+*          The off-diagonal elements of the tridiagonal matrix T:
+*          E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'.
+*
+*  TAU     (output) REAL array, dimension (N-1)
+*          The scalar factors of the elementary reflectors (see Further
+*          Details).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Further Details
+*  ===============
+*
+*  If UPLO = 'U', the matrix Q is represented as a product of elementary
+*  reflectors
+*
+*     Q = H(n-1) . . . H(2) H(1).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - tau * v * v'
+*
+*  where tau is a real scalar, and v is a real vector with
+*  v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in AP,
+*  overwriting A(1:i-1,i+1), and tau is stored in TAU(i).
+*
+*  If UPLO = 'L', the matrix Q is represented as a product of elementary
+*  reflectors
+*
+*     Q = H(1) H(2) . . . H(n-1).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - tau * v * v'
+*
+*  where tau is a real scalar, and v is a real vector with
+*  v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in AP,
+*  overwriting A(i+2:n,i), and tau is stored in TAU(i).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO, HALF
+      PARAMETER          ( ONE = 1.0, ZERO = 0.0, HALF = 1.0 / 2.0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            I, I1, I1I1, II
+      REAL               ALPHA, TAUI
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SAXPY, SLARFG, SSPMV, SSPR2, XERBLA
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               SDOT
+      EXTERNAL           LSAME, SDOT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SSPTRD', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.LE.0 )
+     $   RETURN
+*
+      IF( UPPER ) THEN
+*
+*        Reduce the upper triangle of A.
+*        I1 is the index in AP of A(1,I+1).
+*
+         I1 = N*( N-1 ) / 2 + 1
+         DO 10 I = N - 1, 1, -1
+*
+*           Generate elementary reflector H(i) = I - tau * v * v'
+*           to annihilate A(1:i-1,i+1)
+*
+            CALL SLARFG( I, AP( I1+I-1 ), AP( I1 ), 1, TAUI )
+            E( I ) = AP( I1+I-1 )
+*
+            IF( TAUI.NE.ZERO ) THEN
+*
+*              Apply H(i) from both sides to A(1:i,1:i)
+*
+               AP( I1+I-1 ) = ONE
+*
+*              Compute  y := tau * A * v  storing y in TAU(1:i)
+*
+               CALL SSPMV( UPLO, I, TAUI, AP, AP( I1 ), 1, ZERO, TAU,
+     $                     1 )
+*
+*              Compute  w := y - 1/2 * tau * (y'*v) * v
+*
+               ALPHA = -HALF*TAUI*SDOT( I, TAU, 1, AP( I1 ), 1 )
+               CALL SAXPY( I, ALPHA, AP( I1 ), 1, TAU, 1 )
+*
+*              Apply the transformation as a rank-2 update:
+*                 A := A - v * w' - w * v'
+*
+               CALL SSPR2( UPLO, I, -ONE, AP( I1 ), 1, TAU, 1, AP )
+*
+               AP( I1+I-1 ) = E( I )
+            END IF
+            D( I+1 ) = AP( I1+I )
+            TAU( I ) = TAUI
+            I1 = I1 - I
+   10    CONTINUE
+         D( 1 ) = AP( 1 )
+      ELSE
+*
+*        Reduce the lower triangle of A. II is the index in AP of
+*        A(i,i) and I1I1 is the index of A(i+1,i+1).
+*
+         II = 1
+         DO 20 I = 1, N - 1
+            I1I1 = II + N - I + 1
+*
+*           Generate elementary reflector H(i) = I - tau * v * v'
+*           to annihilate A(i+2:n,i)
+*
+            CALL SLARFG( N-I, AP( II+1 ), AP( II+2 ), 1, TAUI )
+            E( I ) = AP( II+1 )
+*
+            IF( TAUI.NE.ZERO ) THEN
+*
+*              Apply H(i) from both sides to A(i+1:n,i+1:n)
+*
+               AP( II+1 ) = ONE
+*
+*              Compute  y := tau * A * v  storing y in TAU(i:n-1)
+*
+               CALL SSPMV( UPLO, N-I, TAUI, AP( I1I1 ), AP( II+1 ), 1,
+     $                     ZERO, TAU( I ), 1 )
+*
+*              Compute  w := y - 1/2 * tau * (y'*v) * v
+*
+               ALPHA = -HALF*TAUI*SDOT( N-I, TAU( I ), 1, AP( II+1 ),
+     $                 1 )
+               CALL SAXPY( N-I, ALPHA, AP( II+1 ), 1, TAU( I ), 1 )
+*
+*              Apply the transformation as a rank-2 update:
+*                 A := A - v * w' - w * v'
+*
+               CALL SSPR2( UPLO, N-I, -ONE, AP( II+1 ), 1, TAU( I ), 1,
+     $                     AP( I1I1 ) )
+*
+               AP( II+1 ) = E( I )
+            END IF
+            D( I ) = AP( II )
+            TAU( I ) = TAUI
+            II = I1I1
+   20    CONTINUE
+         D( N ) = AP( II )
+      END IF
+*
+      RETURN
+*
+*     End of SSPTRD
+*
+      END
diff --git a/costa/native/external/lapack/ssptrf.f b/costa/native/external/lapack/ssptrf.f
new file mode 100644
index 000000000..d2a4bba9e
--- /dev/null
+++ b/costa/native/external/lapack/ssptrf.f
@@ -0,0 +1,548 @@
+      SUBROUTINE SSPTRF( UPLO, N, AP, IPIV, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      REAL               AP( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SSPTRF computes the factorization of a real symmetric matrix A stored
+*  in packed format using the Bunch-Kaufman diagonal pivoting method:
+*
+*     A = U*D*U**T  or  A = L*D*L**T
+*
+*  where U (or L) is a product of permutation and unit upper (lower)
+*  triangular matrices, and D is symmetric and block diagonal with
+*  1-by-1 and 2-by-2 diagonal blocks.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  AP      (input/output) REAL array, dimension (N*(N+1)/2)
+*          On entry, the upper or lower triangle of the symmetric matrix
+*          A, packed columnwise in a linear array.  The j-th column of A
+*          is stored in the array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n.
+*
+*          On exit, the block diagonal matrix D and the multipliers used
+*          to obtain the factor U or L, stored as a packed triangular
+*          matrix overwriting A (see below for further details).
+*
+*  IPIV    (output) INTEGER array, dimension (N)
+*          Details of the interchanges and the block structure of D.
+*          If IPIV(k) > 0, then rows and columns k and IPIV(k) were
+*          interchanged and D(k,k) is a 1-by-1 diagonal block.
+*          If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and
+*          columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k)
+*          is a 2-by-2 diagonal block.  If UPLO = 'L' and IPIV(k) =
+*          IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were
+*          interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block.
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*          > 0: if INFO = i, D(i,i) is exactly zero.  The factorization
+*               has been completed, but the block diagonal matrix D is
+*               exactly singular, and division by zero will occur if it
+*               is used to solve a system of equations.
+*
+*  Further Details
+*  ===============
+*
+*  5-96 - Based on modifications by J. Lewis, Boeing Computer Services
+*         Company
+*
+*  If UPLO = 'U', then A = U*D*U', where
+*     U = P(n)*U(n)* ... *P(k)U(k)* ...,
+*  i.e., U is a product of terms P(k)*U(k), where k decreases from n to
+*  1 in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1
+*  and 2-by-2 diagonal blocks D(k).  P(k) is a permutation matrix as
+*  defined by IPIV(k), and U(k) is a unit upper triangular matrix, such
+*  that if the diagonal block D(k) is of order s (s = 1 or 2), then
+*
+*             (   I    v    0   )   k-s
+*     U(k) =  (   0    I    0   )   s
+*             (   0    0    I   )   n-k
+*                k-s   s   n-k
+*
+*  If s = 1, D(k) overwrites A(k,k), and v overwrites A(1:k-1,k).
+*  If s = 2, the upper triangle of D(k) overwrites A(k-1,k-1), A(k-1,k),
+*  and A(k,k), and v overwrites A(1:k-2,k-1:k).
+*
+*  If UPLO = 'L', then A = L*D*L', where
+*     L = P(1)*L(1)* ... *P(k)*L(k)* ...,
+*  i.e., L is a product of terms P(k)*L(k), where k increases from 1 to
+*  n in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1
+*  and 2-by-2 diagonal blocks D(k).  P(k) is a permutation matrix as
+*  defined by IPIV(k), and L(k) is a unit lower triangular matrix, such
+*  that if the diagonal block D(k) is of order s (s = 1 or 2), then
+*
+*             (   I    0     0   )  k-1
+*     L(k) =  (   0    I     0   )  s
+*             (   0    v     I   )  n-k-s+1
+*                k-1   s  n-k-s+1
+*
+*  If s = 1, D(k) overwrites A(k,k), and v overwrites A(k+1:n,k).
+*  If s = 2, the lower triangle of D(k) overwrites A(k,k), A(k+1,k),
+*  and A(k+1,k+1), and v overwrites A(k+2:n,k:k+1).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+      REAL               EIGHT, SEVTEN
+      PARAMETER          ( EIGHT = 8.0E+0, SEVTEN = 17.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            I, IMAX, J, JMAX, K, KC, KK, KNC, KP, KPC,
+     $                   KSTEP, KX, NPP
+      REAL               ABSAKK, ALPHA, COLMAX, D11, D12, D21, D22, R1,
+     $                   ROWMAX, T, WK, WKM1, WKP1
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ISAMAX
+      EXTERNAL           LSAME, ISAMAX
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SSCAL, SSPR, SSWAP, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SSPTRF', -INFO )
+         RETURN
+      END IF
+*
+*     Initialize ALPHA for use in choosing pivot block size.
+*
+      ALPHA = ( ONE+SQRT( SEVTEN ) ) / EIGHT
+*
+      IF( UPPER ) THEN
+*
+*        Factorize A as U*D*U' using the upper triangle of A
+*
+*        K is the main loop index, decreasing from N to 1 in steps of
+*        1 or 2
+*
+         K = N
+         KC = ( N-1 )*N / 2 + 1
+   10    CONTINUE
+         KNC = KC
+*
+*        If K < 1, exit from loop
+*
+         IF( K.LT.1 )
+     $      GO TO 110
+         KSTEP = 1
+*
+*        Determine rows and columns to be interchanged and whether
+*        a 1-by-1 or 2-by-2 pivot block will be used
+*
+         ABSAKK = ABS( AP( KC+K-1 ) )
+*
+*        IMAX is the row-index of the largest off-diagonal element in
+*        column K, and COLMAX is its absolute value
+*
+         IF( K.GT.1 ) THEN
+            IMAX = ISAMAX( K-1, AP( KC ), 1 )
+            COLMAX = ABS( AP( KC+IMAX-1 ) )
+         ELSE
+            COLMAX = ZERO
+         END IF
+*
+         IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN
+*
+*           Column K is zero: set INFO and continue
+*
+            IF( INFO.EQ.0 )
+     $         INFO = K
+            KP = K
+         ELSE
+            IF( ABSAKK.GE.ALPHA*COLMAX ) THEN
+*
+*              no interchange, use 1-by-1 pivot block
+*
+               KP = K
+            ELSE
+*
+*              JMAX is the column-index of the largest off-diagonal
+*              element in row IMAX, and ROWMAX is its absolute value
+*
+               ROWMAX = ZERO
+               JMAX = IMAX
+               KX = IMAX*( IMAX+1 ) / 2 + IMAX
+               DO 20 J = IMAX + 1, K
+                  IF( ABS( AP( KX ) ).GT.ROWMAX ) THEN
+                     ROWMAX = ABS( AP( KX ) )
+                     JMAX = J
+                  END IF
+                  KX = KX + J
+   20          CONTINUE
+               KPC = ( IMAX-1 )*IMAX / 2 + 1
+               IF( IMAX.GT.1 ) THEN
+                  JMAX = ISAMAX( IMAX-1, AP( KPC ), 1 )
+                  ROWMAX = MAX( ROWMAX, ABS( AP( KPC+JMAX-1 ) ) )
+               END IF
+*
+               IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN
+*
+*                 no interchange, use 1-by-1 pivot block
+*
+                  KP = K
+               ELSE IF( ABS( AP( KPC+IMAX-1 ) ).GE.ALPHA*ROWMAX ) THEN
+*
+*                 interchange rows and columns K and IMAX, use 1-by-1
+*                 pivot block
+*
+                  KP = IMAX
+               ELSE
+*
+*                 interchange rows and columns K-1 and IMAX, use 2-by-2
+*                 pivot block
+*
+                  KP = IMAX
+                  KSTEP = 2
+               END IF
+            END IF
+*
+            KK = K - KSTEP + 1
+            IF( KSTEP.EQ.2 )
+     $         KNC = KNC - K + 1
+            IF( KP.NE.KK ) THEN
+*
+*              Interchange rows and columns KK and KP in the leading
+*              submatrix A(1:k,1:k)
+*
+               CALL SSWAP( KP-1, AP( KNC ), 1, AP( KPC ), 1 )
+               KX = KPC + KP - 1
+               DO 30 J = KP + 1, KK - 1
+                  KX = KX + J - 1
+                  T = AP( KNC+J-1 )
+                  AP( KNC+J-1 ) = AP( KX )
+                  AP( KX ) = T
+   30          CONTINUE
+               T = AP( KNC+KK-1 )
+               AP( KNC+KK-1 ) = AP( KPC+KP-1 )
+               AP( KPC+KP-1 ) = T
+               IF( KSTEP.EQ.2 ) THEN
+                  T = AP( KC+K-2 )
+                  AP( KC+K-2 ) = AP( KC+KP-1 )
+                  AP( KC+KP-1 ) = T
+               END IF
+            END IF
+*
+*           Update the leading submatrix
+*
+            IF( KSTEP.EQ.1 ) THEN
+*
+*              1-by-1 pivot block D(k): column k now holds
+*
+*              W(k) = U(k)*D(k)
+*
+*              where U(k) is the k-th column of U
+*
+*              Perform a rank-1 update of A(1:k-1,1:k-1) as
+*
+*              A := A - U(k)*D(k)*U(k)' = A - W(k)*1/D(k)*W(k)'
+*
+               R1 = ONE / AP( KC+K-1 )
+               CALL SSPR( UPLO, K-1, -R1, AP( KC ), 1, AP )
+*
+*              Store U(k) in column k
+*
+               CALL SSCAL( K-1, R1, AP( KC ), 1 )
+            ELSE
+*
+*              2-by-2 pivot block D(k): columns k and k-1 now hold
+*
+*              ( W(k-1) W(k) ) = ( U(k-1) U(k) )*D(k)
+*
+*              where U(k) and U(k-1) are the k-th and (k-1)-th columns
+*              of U
+*
+*              Perform a rank-2 update of A(1:k-2,1:k-2) as
+*
+*              A := A - ( U(k-1) U(k) )*D(k)*( U(k-1) U(k) )'
+*                 = A - ( W(k-1) W(k) )*inv(D(k))*( W(k-1) W(k) )'
+*
+               IF( K.GT.2 ) THEN
+*
+                  D12 = AP( K-1+( K-1 )*K / 2 )
+                  D22 = AP( K-1+( K-2 )*( K-1 ) / 2 ) / D12
+                  D11 = AP( K+( K-1 )*K / 2 ) / D12
+                  T = ONE / ( D11*D22-ONE )
+                  D12 = T / D12
+*
+                  DO 50 J = K - 2, 1, -1
+                     WKM1 = D12*( D11*AP( J+( K-2 )*( K-1 ) / 2 )-
+     $                      AP( J+( K-1 )*K / 2 ) )
+                     WK = D12*( D22*AP( J+( K-1 )*K / 2 )-
+     $                    AP( J+( K-2 )*( K-1 ) / 2 ) )
+                     DO 40 I = J, 1, -1
+                        AP( I+( J-1 )*J / 2 ) = AP( I+( J-1 )*J / 2 ) -
+     $                     AP( I+( K-1 )*K / 2 )*WK -
+     $                     AP( I+( K-2 )*( K-1 ) / 2 )*WKM1
+   40                CONTINUE
+                     AP( J+( K-1 )*K / 2 ) = WK
+                     AP( J+( K-2 )*( K-1 ) / 2 ) = WKM1
+   50             CONTINUE
+*
+               END IF
+*
+            END IF
+         END IF
+*
+*        Store details of the interchanges in IPIV
+*
+         IF( KSTEP.EQ.1 ) THEN
+            IPIV( K ) = KP
+         ELSE
+            IPIV( K ) = -KP
+            IPIV( K-1 ) = -KP
+         END IF
+*
+*        Decrease K and return to the start of the main loop
+*
+         K = K - KSTEP
+         KC = KNC - K
+         GO TO 10
+*
+      ELSE
+*
+*        Factorize A as L*D*L' using the lower triangle of A
+*
+*        K is the main loop index, increasing from 1 to N in steps of
+*        1 or 2
+*
+         K = 1
+         KC = 1
+         NPP = N*( N+1 ) / 2
+   60    CONTINUE
+         KNC = KC
+*
+*        If K > N, exit from loop
+*
+         IF( K.GT.N )
+     $      GO TO 110
+         KSTEP = 1
+*
+*        Determine rows and columns to be interchanged and whether
+*        a 1-by-1 or 2-by-2 pivot block will be used
+*
+         ABSAKK = ABS( AP( KC ) )
+*
+*        IMAX is the row-index of the largest off-diagonal element in
+*        column K, and COLMAX is its absolute value
+*
+         IF( K.LT.N ) THEN
+            IMAX = K + ISAMAX( N-K, AP( KC+1 ), 1 )
+            COLMAX = ABS( AP( KC+IMAX-K ) )
+         ELSE
+            COLMAX = ZERO
+         END IF
+*
+         IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN
+*
+*           Column K is zero: set INFO and continue
+*
+            IF( INFO.EQ.0 )
+     $         INFO = K
+            KP = K
+         ELSE
+            IF( ABSAKK.GE.ALPHA*COLMAX ) THEN
+*
+*              no interchange, use 1-by-1 pivot block
+*
+               KP = K
+            ELSE
+*
+*              JMAX is the column-index of the largest off-diagonal
+*              element in row IMAX, and ROWMAX is its absolute value
+*
+               ROWMAX = ZERO
+               KX = KC + IMAX - K
+               DO 70 J = K, IMAX - 1
+                  IF( ABS( AP( KX ) ).GT.ROWMAX ) THEN
+                     ROWMAX = ABS( AP( KX ) )
+                     JMAX = J
+                  END IF
+                  KX = KX + N - J
+   70          CONTINUE
+               KPC = NPP - ( N-IMAX+1 )*( N-IMAX+2 ) / 2 + 1
+               IF( IMAX.LT.N ) THEN
+                  JMAX = IMAX + ISAMAX( N-IMAX, AP( KPC+1 ), 1 )
+                  ROWMAX = MAX( ROWMAX, ABS( AP( KPC+JMAX-IMAX ) ) )
+               END IF
+*
+               IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN
+*
+*                 no interchange, use 1-by-1 pivot block
+*
+                  KP = K
+               ELSE IF( ABS( AP( KPC ) ).GE.ALPHA*ROWMAX ) THEN
+*
+*                 interchange rows and columns K and IMAX, use 1-by-1
+*                 pivot block
+*
+                  KP = IMAX
+               ELSE
+*
+*                 interchange rows and columns K+1 and IMAX, use 2-by-2
+*                 pivot block
+*
+                  KP = IMAX
+                  KSTEP = 2
+               END IF
+            END IF
+*
+            KK = K + KSTEP - 1
+            IF( KSTEP.EQ.2 )
+     $         KNC = KNC + N - K + 1
+            IF( KP.NE.KK ) THEN
+*
+*              Interchange rows and columns KK and KP in the trailing
+*              submatrix A(k:n,k:n)
+*
+               IF( KP.LT.N )
+     $            CALL SSWAP( N-KP, AP( KNC+KP-KK+1 ), 1, AP( KPC+1 ),
+     $                        1 )
+               KX = KNC + KP - KK
+               DO 80 J = KK + 1, KP - 1
+                  KX = KX + N - J + 1
+                  T = AP( KNC+J-KK )
+                  AP( KNC+J-KK ) = AP( KX )
+                  AP( KX ) = T
+   80          CONTINUE
+               T = AP( KNC )
+               AP( KNC ) = AP( KPC )
+               AP( KPC ) = T
+               IF( KSTEP.EQ.2 ) THEN
+                  T = AP( KC+1 )
+                  AP( KC+1 ) = AP( KC+KP-K )
+                  AP( KC+KP-K ) = T
+               END IF
+            END IF
+*
+*           Update the trailing submatrix
+*
+            IF( KSTEP.EQ.1 ) THEN
+*
+*              1-by-1 pivot block D(k): column k now holds
+*
+*              W(k) = L(k)*D(k)
+*
+*              where L(k) is the k-th column of L
+*
+               IF( K.LT.N ) THEN
+*
+*                 Perform a rank-1 update of A(k+1:n,k+1:n) as
+*
+*                 A := A - L(k)*D(k)*L(k)' = A - W(k)*(1/D(k))*W(k)'
+*
+                  R1 = ONE / AP( KC )
+                  CALL SSPR( UPLO, N-K, -R1, AP( KC+1 ), 1,
+     $                       AP( KC+N-K+1 ) )
+*
+*                 Store L(k) in column K
+*
+                  CALL SSCAL( N-K, R1, AP( KC+1 ), 1 )
+               END IF
+            ELSE
+*
+*              2-by-2 pivot block D(k): columns K and K+1 now hold
+*
+*              ( W(k) W(k+1) ) = ( L(k) L(k+1) )*D(k)
+*
+*              where L(k) and L(k+1) are the k-th and (k+1)-th columns
+*              of L
+*
+               IF( K.LT.N-1 ) THEN
+*
+*                 Perform a rank-2 update of A(k+2:n,k+2:n) as
+*
+*                 A := A - ( L(k) L(k+1) )*D(k)*( L(k) L(k+1) )'
+*                    = A - ( W(k) W(k+1) )*inv(D(k))*( W(k) W(k+1) )'
+*
+                  D21 = AP( K+1+( K-1 )*( 2*N-K ) / 2 )
+                  D11 = AP( K+1+K*( 2*N-K-1 ) / 2 ) / D21
+                  D22 = AP( K+( K-1 )*( 2*N-K ) / 2 ) / D21
+                  T = ONE / ( D11*D22-ONE )
+                  D21 = T / D21
+*
+                  DO 100 J = K + 2, N
+                     WK = D21*( D11*AP( J+( K-1 )*( 2*N-K ) / 2 )-
+     $                    AP( J+K*( 2*N-K-1 ) / 2 ) )
+                     WKP1 = D21*( D22*AP( J+K*( 2*N-K-1 ) / 2 )-
+     $                      AP( J+( K-1 )*( 2*N-K ) / 2 ) )
+*
+                     DO 90 I = J, N
+                        AP( I+( J-1 )*( 2*N-J ) / 2 ) = AP( I+( J-1 )*
+     $                     ( 2*N-J ) / 2 ) - AP( I+( K-1 )*( 2*N-K ) /
+     $                     2 )*WK - AP( I+K*( 2*N-K-1 ) / 2 )*WKP1
+   90                CONTINUE
+*
+                     AP( J+( K-1 )*( 2*N-K ) / 2 ) = WK
+                     AP( J+K*( 2*N-K-1 ) / 2 ) = WKP1
+*
+  100             CONTINUE
+               END IF
+            END IF
+         END IF
+*
+*        Store details of the interchanges in IPIV
+*
+         IF( KSTEP.EQ.1 ) THEN
+            IPIV( K ) = KP
+         ELSE
+            IPIV( K ) = -KP
+            IPIV( K+1 ) = -KP
+         END IF
+*
+*        Increase K and return to the start of the main loop
+*
+         K = K + KSTEP
+         KC = KNC + N - K + 2
+         GO TO 60
+*
+      END IF
+*
+  110 CONTINUE
+      RETURN
+*
+*     End of SSPTRF
+*
+      END
diff --git a/costa/native/external/lapack/ssptri.f b/costa/native/external/lapack/ssptri.f
new file mode 100644
index 000000000..b562cc2c4
--- /dev/null
+++ b/costa/native/external/lapack/ssptri.f
@@ -0,0 +1,335 @@
+      SUBROUTINE SSPTRI( UPLO, N, AP, IPIV, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993 
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      REAL               AP( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SSPTRI computes the inverse of a real symmetric indefinite matrix
+*  A in packed storage using the factorization A = U*D*U**T or
+*  A = L*D*L**T computed by SSPTRF.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the details of the factorization are stored
+*          as an upper or lower triangular matrix.
+*          = 'U':  Upper triangular, form is A = U*D*U**T;
+*          = 'L':  Lower triangular, form is A = L*D*L**T.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  AP      (input/output) REAL array, dimension (N*(N+1)/2)
+*          On entry, the block diagonal matrix D and the multipliers
+*          used to obtain the factor U or L as computed by SSPTRF,
+*          stored as a packed triangular matrix.
+*
+*          On exit, if INFO = 0, the (symmetric) inverse of the original
+*          matrix, stored as a packed triangular matrix. The j-th column
+*          of inv(A) is stored in the array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = inv(A)(i,j) for 1<=i<=j;
+*          if UPLO = 'L',
+*             AP(i + (j-1)*(2n-j)/2) = inv(A)(i,j) for j<=i<=n.
+*
+*  IPIV    (input) INTEGER array, dimension (N)
+*          Details of the interchanges and the block structure of D
+*          as determined by SSPTRF.
+*
+*  WORK    (workspace) REAL array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*          > 0: if INFO = i, D(i,i) = 0; the matrix is singular and its
+*               inverse could not be computed.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            J, K, KC, KCNEXT, KP, KPC, KSTEP, KX, NPP
+      REAL               AK, AKKP1, AKP1, D, T, TEMP
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               SDOT
+      EXTERNAL           LSAME, SDOT
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SCOPY, SSPMV, SSWAP, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SSPTRI', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Check that the diagonal matrix D is nonsingular.
+*
+      IF( UPPER ) THEN
+*
+*        Upper triangular storage: examine D from bottom to top
+*
+         KP = N*( N+1 ) / 2
+         DO 10 INFO = N, 1, -1
+            IF( IPIV( INFO ).GT.0 .AND. AP( KP ).EQ.ZERO )
+     $         RETURN
+            KP = KP - INFO
+   10    CONTINUE
+      ELSE
+*
+*        Lower triangular storage: examine D from top to bottom.
+*
+         KP = 1
+         DO 20 INFO = 1, N
+            IF( IPIV( INFO ).GT.0 .AND. AP( KP ).EQ.ZERO )
+     $         RETURN
+            KP = KP + N - INFO + 1
+   20    CONTINUE
+      END IF
+      INFO = 0
+*
+      IF( UPPER ) THEN
+*
+*        Compute inv(A) from the factorization A = U*D*U'.
+*
+*        K is the main loop index, increasing from 1 to N in steps of
+*        1 or 2, depending on the size of the diagonal blocks.
+*
+         K = 1
+         KC = 1
+   30    CONTINUE
+*
+*        If K > N, exit from loop.
+*
+         IF( K.GT.N )
+     $      GO TO 50
+*
+         KCNEXT = KC + K
+         IF( IPIV( K ).GT.0 ) THEN
+*
+*           1 x 1 diagonal block
+*
+*           Invert the diagonal block.
+*
+            AP( KC+K-1 ) = ONE / AP( KC+K-1 )
+*
+*           Compute column K of the inverse.
+*
+            IF( K.GT.1 ) THEN
+               CALL SCOPY( K-1, AP( KC ), 1, WORK, 1 )
+               CALL SSPMV( UPLO, K-1, -ONE, AP, WORK, 1, ZERO, AP( KC ),
+     $                     1 )
+               AP( KC+K-1 ) = AP( KC+K-1 ) -
+     $                        SDOT( K-1, WORK, 1, AP( KC ), 1 )
+            END IF
+            KSTEP = 1
+         ELSE
+*
+*           2 x 2 diagonal block
+*
+*           Invert the diagonal block.
+*
+            T = ABS( AP( KCNEXT+K-1 ) )
+            AK = AP( KC+K-1 ) / T
+            AKP1 = AP( KCNEXT+K ) / T
+            AKKP1 = AP( KCNEXT+K-1 ) / T
+            D = T*( AK*AKP1-ONE )
+            AP( KC+K-1 ) = AKP1 / D
+            AP( KCNEXT+K ) = AK / D
+            AP( KCNEXT+K-1 ) = -AKKP1 / D
+*
+*           Compute columns K and K+1 of the inverse.
+*
+            IF( K.GT.1 ) THEN
+               CALL SCOPY( K-1, AP( KC ), 1, WORK, 1 )
+               CALL SSPMV( UPLO, K-1, -ONE, AP, WORK, 1, ZERO, AP( KC ),
+     $                     1 )
+               AP( KC+K-1 ) = AP( KC+K-1 ) -
+     $                        SDOT( K-1, WORK, 1, AP( KC ), 1 )
+               AP( KCNEXT+K-1 ) = AP( KCNEXT+K-1 ) -
+     $                            SDOT( K-1, AP( KC ), 1, AP( KCNEXT ),
+     $                            1 )
+               CALL SCOPY( K-1, AP( KCNEXT ), 1, WORK, 1 )
+               CALL SSPMV( UPLO, K-1, -ONE, AP, WORK, 1, ZERO,
+     $                     AP( KCNEXT ), 1 )
+               AP( KCNEXT+K ) = AP( KCNEXT+K ) -
+     $                          SDOT( K-1, WORK, 1, AP( KCNEXT ), 1 )
+            END IF
+            KSTEP = 2
+            KCNEXT = KCNEXT + K + 1
+         END IF
+*
+         KP = ABS( IPIV( K ) )
+         IF( KP.NE.K ) THEN
+*
+*           Interchange rows and columns K and KP in the leading
+*           submatrix A(1:k+1,1:k+1)
+*
+            KPC = ( KP-1 )*KP / 2 + 1
+            CALL SSWAP( KP-1, AP( KC ), 1, AP( KPC ), 1 )
+            KX = KPC + KP - 1
+            DO 40 J = KP + 1, K - 1
+               KX = KX + J - 1
+               TEMP = AP( KC+J-1 )
+               AP( KC+J-1 ) = AP( KX )
+               AP( KX ) = TEMP
+   40       CONTINUE
+            TEMP = AP( KC+K-1 )
+            AP( KC+K-1 ) = AP( KPC+KP-1 )
+            AP( KPC+KP-1 ) = TEMP
+            IF( KSTEP.EQ.2 ) THEN
+               TEMP = AP( KC+K+K-1 )
+               AP( KC+K+K-1 ) = AP( KC+K+KP-1 )
+               AP( KC+K+KP-1 ) = TEMP
+            END IF
+         END IF
+*
+         K = K + KSTEP
+         KC = KCNEXT
+         GO TO 30
+   50    CONTINUE
+*
+      ELSE
+*
+*        Compute inv(A) from the factorization A = L*D*L'.
+*
+*        K is the main loop index, increasing from 1 to N in steps of
+*        1 or 2, depending on the size of the diagonal blocks.
+*
+         NPP = N*( N+1 ) / 2
+         K = N
+         KC = NPP
+   60    CONTINUE
+*
+*        If K < 1, exit from loop.
+*
+         IF( K.LT.1 )
+     $      GO TO 80
+*
+         KCNEXT = KC - ( N-K+2 )
+         IF( IPIV( K ).GT.0 ) THEN
+*
+*           1 x 1 diagonal block
+*
+*           Invert the diagonal block.
+*
+            AP( KC ) = ONE / AP( KC )
+*
+*           Compute column K of the inverse.
+*
+            IF( K.LT.N ) THEN
+               CALL SCOPY( N-K, AP( KC+1 ), 1, WORK, 1 )
+               CALL SSPMV( UPLO, N-K, -ONE, AP( KC+N-K+1 ), WORK, 1,
+     $                     ZERO, AP( KC+1 ), 1 )
+               AP( KC ) = AP( KC ) - SDOT( N-K, WORK, 1, AP( KC+1 ), 1 )
+            END IF
+            KSTEP = 1
+         ELSE
+*
+*           2 x 2 diagonal block
+*
+*           Invert the diagonal block.
+*
+            T = ABS( AP( KCNEXT+1 ) )
+            AK = AP( KCNEXT ) / T
+            AKP1 = AP( KC ) / T
+            AKKP1 = AP( KCNEXT+1 ) / T
+            D = T*( AK*AKP1-ONE )
+            AP( KCNEXT ) = AKP1 / D
+            AP( KC ) = AK / D
+            AP( KCNEXT+1 ) = -AKKP1 / D
+*
+*           Compute columns K-1 and K of the inverse.
+*
+            IF( K.LT.N ) THEN
+               CALL SCOPY( N-K, AP( KC+1 ), 1, WORK, 1 )
+               CALL SSPMV( UPLO, N-K, -ONE, AP( KC+( N-K+1 ) ), WORK, 1,
+     $                     ZERO, AP( KC+1 ), 1 )
+               AP( KC ) = AP( KC ) - SDOT( N-K, WORK, 1, AP( KC+1 ), 1 )
+               AP( KCNEXT+1 ) = AP( KCNEXT+1 ) -
+     $                          SDOT( N-K, AP( KC+1 ), 1,
+     $                          AP( KCNEXT+2 ), 1 )
+               CALL SCOPY( N-K, AP( KCNEXT+2 ), 1, WORK, 1 )
+               CALL SSPMV( UPLO, N-K, -ONE, AP( KC+( N-K+1 ) ), WORK, 1,
+     $                     ZERO, AP( KCNEXT+2 ), 1 )
+               AP( KCNEXT ) = AP( KCNEXT ) -
+     $                        SDOT( N-K, WORK, 1, AP( KCNEXT+2 ), 1 )
+            END IF
+            KSTEP = 2
+            KCNEXT = KCNEXT - ( N-K+3 )
+         END IF
+*
+         KP = ABS( IPIV( K ) )
+         IF( KP.NE.K ) THEN
+*
+*           Interchange rows and columns K and KP in the trailing
+*           submatrix A(k-1:n,k-1:n)
+*
+            KPC = NPP - ( N-KP+1 )*( N-KP+2 ) / 2 + 1
+            IF( KP.LT.N )
+     $         CALL SSWAP( N-KP, AP( KC+KP-K+1 ), 1, AP( KPC+1 ), 1 )
+            KX = KC + KP - K
+            DO 70 J = K + 1, KP - 1
+               KX = KX + N - J + 1
+               TEMP = AP( KC+J-K )
+               AP( KC+J-K ) = AP( KX )
+               AP( KX ) = TEMP
+   70       CONTINUE
+            TEMP = AP( KC )
+            AP( KC ) = AP( KPC )
+            AP( KPC ) = TEMP
+            IF( KSTEP.EQ.2 ) THEN
+               TEMP = AP( KC-N+K-1 )
+               AP( KC-N+K-1 ) = AP( KC-N+KP-1 )
+               AP( KC-N+KP-1 ) = TEMP
+            END IF
+         END IF
+*
+         K = K - KSTEP
+         KC = KCNEXT
+         GO TO 60
+   80    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of SSPTRI
+*
+      END
diff --git a/costa/native/external/lapack/ssptrs.f b/costa/native/external/lapack/ssptrs.f
new file mode 100644
index 000000000..394940dc6
--- /dev/null
+++ b/costa/native/external/lapack/ssptrs.f
@@ -0,0 +1,378 @@
+      SUBROUTINE SSPTRS( UPLO, N, NRHS, AP, IPIV, B, LDB, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993 
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDB, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      REAL               AP( * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SSPTRS solves a system of linear equations A*X = B with a real
+*  symmetric matrix A stored in packed format using the factorization
+*  A = U*D*U**T or A = L*D*L**T computed by SSPTRF.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the details of the factorization are stored
+*          as an upper or lower triangular matrix.
+*          = 'U':  Upper triangular, form is A = U*D*U**T;
+*          = 'L':  Lower triangular, form is A = L*D*L**T.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  AP      (input) REAL array, dimension (N*(N+1)/2)
+*          The block diagonal matrix D and the multipliers used to
+*          obtain the factor U or L as computed by SSPTRF, stored as a
+*          packed triangular matrix.
+*
+*  IPIV    (input) INTEGER array, dimension (N)
+*          Details of the interchanges and the block structure of D
+*          as determined by SSPTRF.
+*
+*  B       (input/output) REAL array, dimension (LDB,NRHS)
+*          On entry, the right hand side matrix B.
+*          On exit, the solution matrix X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE
+      PARAMETER          ( ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            J, K, KC, KP
+      REAL               AK, AKM1, AKM1K, BK, BKM1, DENOM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SGEMV, SGER, SSCAL, SSWAP, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SSPTRS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 )
+     $   RETURN
+*
+      IF( UPPER ) THEN
+*
+*        Solve A*X = B, where A = U*D*U'.
+*
+*        First solve U*D*X = B, overwriting B with X.
+*
+*        K is the main loop index, decreasing from N to 1 in steps of
+*        1 or 2, depending on the size of the diagonal blocks.
+*
+         K = N
+         KC = N*( N+1 ) / 2 + 1
+   10    CONTINUE
+*
+*        If K < 1, exit from loop.
+*
+         IF( K.LT.1 )
+     $      GO TO 30
+*
+         KC = KC - K
+         IF( IPIV( K ).GT.0 ) THEN
+*
+*           1 x 1 diagonal block
+*
+*           Interchange rows K and IPIV(K).
+*
+            KP = IPIV( K )
+            IF( KP.NE.K )
+     $         CALL SSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB )
+*
+*           Multiply by inv(U(K)), where U(K) is the transformation
+*           stored in column K of A.
+*
+            CALL SGER( K-1, NRHS, -ONE, AP( KC ), 1, B( K, 1 ), LDB,
+     $                 B( 1, 1 ), LDB )
+*
+*           Multiply by the inverse of the diagonal block.
+*
+            CALL SSCAL( NRHS, ONE / AP( KC+K-1 ), B( K, 1 ), LDB )
+            K = K - 1
+         ELSE
+*
+*           2 x 2 diagonal block
+*
+*           Interchange rows K-1 and -IPIV(K).
+*
+            KP = -IPIV( K )
+            IF( KP.NE.K-1 )
+     $         CALL SSWAP( NRHS, B( K-1, 1 ), LDB, B( KP, 1 ), LDB )
+*
+*           Multiply by inv(U(K)), where U(K) is the transformation
+*           stored in columns K-1 and K of A.
+*
+            CALL SGER( K-2, NRHS, -ONE, AP( KC ), 1, B( K, 1 ), LDB,
+     $                 B( 1, 1 ), LDB )
+            CALL SGER( K-2, NRHS, -ONE, AP( KC-( K-1 ) ), 1,
+     $                 B( K-1, 1 ), LDB, B( 1, 1 ), LDB )
+*
+*           Multiply by the inverse of the diagonal block.
+*
+            AKM1K = AP( KC+K-2 )
+            AKM1 = AP( KC-1 ) / AKM1K
+            AK = AP( KC+K-1 ) / AKM1K
+            DENOM = AKM1*AK - ONE
+            DO 20 J = 1, NRHS
+               BKM1 = B( K-1, J ) / AKM1K
+               BK = B( K, J ) / AKM1K
+               B( K-1, J ) = ( AK*BKM1-BK ) / DENOM
+               B( K, J ) = ( AKM1*BK-BKM1 ) / DENOM
+   20       CONTINUE
+            KC = KC - K + 1
+            K = K - 2
+         END IF
+*
+         GO TO 10
+   30    CONTINUE
+*
+*        Next solve U'*X = B, overwriting B with X.
+*
+*        K is the main loop index, increasing from 1 to N in steps of
+*        1 or 2, depending on the size of the diagonal blocks.
+*
+         K = 1
+         KC = 1
+   40    CONTINUE
+*
+*        If K > N, exit from loop.
+*
+         IF( K.GT.N )
+     $      GO TO 50
+*
+         IF( IPIV( K ).GT.0 ) THEN
+*
+*           1 x 1 diagonal block
+*
+*           Multiply by inv(U'(K)), where U(K) is the transformation
+*           stored in column K of A.
+*
+            CALL SGEMV( 'Transpose', K-1, NRHS, -ONE, B, LDB, AP( KC ),
+     $                  1, ONE, B( K, 1 ), LDB )
+*
+*           Interchange rows K and IPIV(K).
+*
+            KP = IPIV( K )
+            IF( KP.NE.K )
+     $         CALL SSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB )
+            KC = KC + K
+            K = K + 1
+         ELSE
+*
+*           2 x 2 diagonal block
+*
+*           Multiply by inv(U'(K+1)), where U(K+1) is the transformation
+*           stored in columns K and K+1 of A.
+*
+            CALL SGEMV( 'Transpose', K-1, NRHS, -ONE, B, LDB, AP( KC ),
+     $                  1, ONE, B( K, 1 ), LDB )
+            CALL SGEMV( 'Transpose', K-1, NRHS, -ONE, B, LDB,
+     $                  AP( KC+K ), 1, ONE, B( K+1, 1 ), LDB )
+*
+*           Interchange rows K and -IPIV(K).
+*
+            KP = -IPIV( K )
+            IF( KP.NE.K )
+     $         CALL SSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB )
+            KC = KC + 2*K + 1
+            K = K + 2
+         END IF
+*
+         GO TO 40
+   50    CONTINUE
+*
+      ELSE
+*
+*        Solve A*X = B, where A = L*D*L'.
+*
+*        First solve L*D*X = B, overwriting B with X.
+*
+*        K is the main loop index, increasing from 1 to N in steps of
+*        1 or 2, depending on the size of the diagonal blocks.
+*
+         K = 1
+         KC = 1
+   60    CONTINUE
+*
+*        If K > N, exit from loop.
+*
+         IF( K.GT.N )
+     $      GO TO 80
+*
+         IF( IPIV( K ).GT.0 ) THEN
+*
+*           1 x 1 diagonal block
+*
+*           Interchange rows K and IPIV(K).
+*
+            KP = IPIV( K )
+            IF( KP.NE.K )
+     $         CALL SSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB )
+*
+*           Multiply by inv(L(K)), where L(K) is the transformation
+*           stored in column K of A.
+*
+            IF( K.LT.N )
+     $         CALL SGER( N-K, NRHS, -ONE, AP( KC+1 ), 1, B( K, 1 ),
+     $                    LDB, B( K+1, 1 ), LDB )
+*
+*           Multiply by the inverse of the diagonal block.
+*
+            CALL SSCAL( NRHS, ONE / AP( KC ), B( K, 1 ), LDB )
+            KC = KC + N - K + 1
+            K = K + 1
+         ELSE
+*
+*           2 x 2 diagonal block
+*
+*           Interchange rows K+1 and -IPIV(K).
+*
+            KP = -IPIV( K )
+            IF( KP.NE.K+1 )
+     $         CALL SSWAP( NRHS, B( K+1, 1 ), LDB, B( KP, 1 ), LDB )
+*
+*           Multiply by inv(L(K)), where L(K) is the transformation
+*           stored in columns K and K+1 of A.
+*
+            IF( K.LT.N-1 ) THEN
+               CALL SGER( N-K-1, NRHS, -ONE, AP( KC+2 ), 1, B( K, 1 ),
+     $                    LDB, B( K+2, 1 ), LDB )
+               CALL SGER( N-K-1, NRHS, -ONE, AP( KC+N-K+2 ), 1,
+     $                    B( K+1, 1 ), LDB, B( K+2, 1 ), LDB )
+            END IF
+*
+*           Multiply by the inverse of the diagonal block.
+*
+            AKM1K = AP( KC+1 )
+            AKM1 = AP( KC ) / AKM1K
+            AK = AP( KC+N-K+1 ) / AKM1K
+            DENOM = AKM1*AK - ONE
+            DO 70 J = 1, NRHS
+               BKM1 = B( K, J ) / AKM1K
+               BK = B( K+1, J ) / AKM1K
+               B( K, J ) = ( AK*BKM1-BK ) / DENOM
+               B( K+1, J ) = ( AKM1*BK-BKM1 ) / DENOM
+   70       CONTINUE
+            KC = KC + 2*( N-K ) + 1
+            K = K + 2
+         END IF
+*
+         GO TO 60
+   80    CONTINUE
+*
+*        Next solve L'*X = B, overwriting B with X.
+*
+*        K is the main loop index, decreasing from N to 1 in steps of
+*        1 or 2, depending on the size of the diagonal blocks.
+*
+         K = N
+         KC = N*( N+1 ) / 2 + 1
+   90    CONTINUE
+*
+*        If K < 1, exit from loop.
+*
+         IF( K.LT.1 )
+     $      GO TO 100
+*
+         KC = KC - ( N-K+1 )
+         IF( IPIV( K ).GT.0 ) THEN
+*
+*           1 x 1 diagonal block
+*
+*           Multiply by inv(L'(K)), where L(K) is the transformation
+*           stored in column K of A.
+*
+            IF( K.LT.N )
+     $         CALL SGEMV( 'Transpose', N-K, NRHS, -ONE, B( K+1, 1 ),
+     $                     LDB, AP( KC+1 ), 1, ONE, B( K, 1 ), LDB )
+*
+*           Interchange rows K and IPIV(K).
+*
+            KP = IPIV( K )
+            IF( KP.NE.K )
+     $         CALL SSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB )
+            K = K - 1
+         ELSE
+*
+*           2 x 2 diagonal block
+*
+*           Multiply by inv(L'(K-1)), where L(K-1) is the transformation
+*           stored in columns K-1 and K of A.
+*
+            IF( K.LT.N ) THEN
+               CALL SGEMV( 'Transpose', N-K, NRHS, -ONE, B( K+1, 1 ),
+     $                     LDB, AP( KC+1 ), 1, ONE, B( K, 1 ), LDB )
+               CALL SGEMV( 'Transpose', N-K, NRHS, -ONE, B( K+1, 1 ),
+     $                     LDB, AP( KC-( N-K ) ), 1, ONE, B( K-1, 1 ),
+     $                     LDB )
+            END IF
+*
+*           Interchange rows K and -IPIV(K).
+*
+            KP = -IPIV( K )
+            IF( KP.NE.K )
+     $         CALL SSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB )
+            KC = KC - ( N-K+2 )
+            K = K - 2
+         END IF
+*
+         GO TO 90
+  100    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of SSPTRS
+*
+      END
diff --git a/costa/native/external/lapack/sstebz.f b/costa/native/external/lapack/sstebz.f
new file mode 100644
index 000000000..7da2f4404
--- /dev/null
+++ b/costa/native/external/lapack/sstebz.f
@@ -0,0 +1,651 @@
+      SUBROUTINE SSTEBZ( RANGE, ORDER, N, VL, VU, IL, IU, ABSTOL, D, E,
+     $                   M, NSPLIT, W, IBLOCK, ISPLIT, WORK, IWORK,
+     $                   INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          ORDER, RANGE
+      INTEGER            IL, INFO, IU, M, N, NSPLIT
+      REAL               ABSTOL, VL, VU
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IBLOCK( * ), ISPLIT( * ), IWORK( * )
+      REAL               D( * ), E( * ), W( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SSTEBZ computes the eigenvalues of a symmetric tridiagonal
+*  matrix T.  The user may ask for all eigenvalues, all eigenvalues
+*  in the half-open interval (VL, VU], or the IL-th through IU-th
+*  eigenvalues.
+*
+*  To avoid overflow, the matrix must be scaled so that its
+*  largest element is no greater than overflow**(1/2) *
+*  underflow**(1/4) in absolute value, and for greatest
+*  accuracy, it should not be much smaller than that.
+*
+*  See W. Kahan "Accurate Eigenvalues of a Symmetric Tridiagonal
+*  Matrix", Report CS41, Computer Science Dept., Stanford
+*  University, July 21, 1966.
+*
+*  Arguments
+*  =========
+*
+*  RANGE   (input) CHARACTER
+*          = 'A': ("All")   all eigenvalues will be found.
+*          = 'V': ("Value") all eigenvalues in the half-open interval
+*                           (VL, VU] will be found.
+*          = 'I': ("Index") the IL-th through IU-th eigenvalues (of the
+*                           entire matrix) will be found.
+*
+*  ORDER   (input) CHARACTER
+*          = 'B': ("By Block") the eigenvalues will be grouped by
+*                              split-off block (see IBLOCK, ISPLIT) and
+*                              ordered from smallest to largest within
+*                              the block.
+*          = 'E': ("Entire matrix")
+*                              the eigenvalues for the entire matrix
+*                              will be ordered from smallest to
+*                              largest.
+*
+*  N       (input) INTEGER
+*          The order of the tridiagonal matrix T.  N >= 0.
+*
+*  VL      (input) REAL
+*  VU      (input) REAL
+*          If RANGE='V', the lower and upper bounds of the interval to
+*          be searched for eigenvalues.  Eigenvalues less than or equal
+*          to VL, or greater than VU, will not be returned.  VL < VU.
+*          Not referenced if RANGE = 'A' or 'I'.
+*
+*  IL      (input) INTEGER
+*  IU      (input) INTEGER
+*          If RANGE='I', the indices (in ascending order) of the
+*          smallest and largest eigenvalues to be returned.
+*          1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0.
+*          Not referenced if RANGE = 'A' or 'V'.
+*
+*  ABSTOL  (input) REAL
+*          The absolute tolerance for the eigenvalues.  An eigenvalue
+*          (or cluster) is considered to be located if it has been
+*          determined to lie in an interval whose width is ABSTOL or
+*          less.  If ABSTOL is less than or equal to zero, then ULP*|T|
+*          will be used, where |T| means the 1-norm of T.
+*
+*          Eigenvalues will be computed most accurately when ABSTOL is
+*          set to twice the underflow threshold 2*SLAMCH('S'), not zero.
+*
+*  D       (input) REAL array, dimension (N)
+*          The n diagonal elements of the tridiagonal matrix T.
+*
+*  E       (input) REAL array, dimension (N-1)
+*          The (n-1) off-diagonal elements of the tridiagonal matrix T.
+*
+*  M       (output) INTEGER
+*          The actual number of eigenvalues found. 0 <= M <= N.
+*          (See also the description of INFO=2,3.)
+*
+*  NSPLIT  (output) INTEGER
+*          The number of diagonal blocks in the matrix T.
+*          1 <= NSPLIT <= N.
+*
+*  W       (output) REAL array, dimension (N)
+*          On exit, the first M elements of W will contain the
+*          eigenvalues.  (SSTEBZ may use the remaining N-M elements as
+*          workspace.)
+*
+*  IBLOCK  (output) INTEGER array, dimension (N)
+*          At each row/column j where E(j) is zero or small, the
+*          matrix T is considered to split into a block diagonal
+*          matrix.  On exit, if INFO = 0, IBLOCK(i) specifies to which
+*          block (from 1 to the number of blocks) the eigenvalue W(i)
+*          belongs.  (SSTEBZ may use the remaining N-M elements as
+*          workspace.)
+*
+*  ISPLIT  (output) INTEGER array, dimension (N)
+*          The splitting points, at which T breaks up into submatrices.
+*          The first submatrix consists of rows/columns 1 to ISPLIT(1),
+*          the second of rows/columns ISPLIT(1)+1 through ISPLIT(2),
+*          etc., and the NSPLIT-th consists of rows/columns
+*          ISPLIT(NSPLIT-1)+1 through ISPLIT(NSPLIT)=N.
+*          (Only the first NSPLIT elements will actually be used, but
+*          since the user cannot know a priori what value NSPLIT will
+*          have, N words must be reserved for ISPLIT.)
+*
+*  WORK    (workspace) REAL array, dimension (4*N)
+*
+*  IWORK   (workspace) INTEGER array, dimension (3*N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  some or all of the eigenvalues failed to converge or
+*                were not computed:
+*                =1 or 3: Bisection failed to converge for some
+*                        eigenvalues; these eigenvalues are flagged by a
+*                        negative block number.  The effect is that the
+*                        eigenvalues may not be as accurate as the
+*                        absolute and relative tolerances.  This is
+*                        generally caused by unexpectedly inaccurate
+*                        arithmetic.
+*                =2 or 3: RANGE='I' only: Not all of the eigenvalues
+*                        IL:IU were found.
+*                        Effect: M < IU+1-IL
+*                        Cause:  non-monotonic arithmetic, causing the
+*                                Sturm sequence to be non-monotonic.
+*                        Cure:   recalculate, using RANGE='A', and pick
+*                                out eigenvalues IL:IU.  In some cases,
+*                                increasing the PARAMETER "FUDGE" may
+*                                make things work.
+*                = 4:    RANGE='I', and the Gershgorin interval
+*                        initially used was too small.  No eigenvalues
+*                        were computed.
+*                        Probable cause: your machine has sloppy
+*                                        floating-point arithmetic.
+*                        Cure: Increase the PARAMETER "FUDGE",
+*                              recompile, and try again.
+*
+*  Internal Parameters
+*  ===================
+*
+*  RELFAC  REAL, default = 2.0e0
+*          The relative tolerance.  An interval (a,b] lies within
+*          "relative tolerance" if  b-a < RELFAC*ulp*max(|a|,|b|),
+*          where "ulp" is the machine precision (distance from 1 to
+*          the next larger floating point number.)
+*
+*  FUDGE   REAL, default = 2
+*          A "fudge factor" to widen the Gershgorin intervals.  Ideally,
+*          a value of 1 should work, but on machines with sloppy
+*          arithmetic, this needs to be larger.  The default for
+*          publicly released versions should be large enough to handle
+*          the worst machine around.  Note that this has no effect
+*          on accuracy of the solution.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE, TWO, HALF
+      PARAMETER          ( ZERO = 0.0E0, ONE = 1.0E0, TWO = 2.0E0,
+     $                   HALF = 1.0E0 / TWO )
+      REAL               FUDGE, RELFAC
+      PARAMETER          ( FUDGE = 2.0E0, RELFAC = 2.0E0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NCNVRG, TOOFEW
+      INTEGER            IB, IBEGIN, IDISCL, IDISCU, IE, IEND, IINFO,
+     $                   IM, IN, IOFF, IORDER, IOUT, IRANGE, ITMAX,
+     $                   ITMP1, IW, IWOFF, J, JB, JDISC, JE, NB, NWL,
+     $                   NWU
+      REAL               ATOLI, BNORM, GL, GU, PIVMIN, RTOLI, SAFEMN,
+     $                   TMP1, TMP2, TNORM, ULP, WKILL, WL, WLU, WU, WUL
+*     ..
+*     .. Local Arrays ..
+      INTEGER            IDUMMA( 1 )
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      REAL               SLAMCH
+      EXTERNAL           LSAME, ILAENV, SLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLAEBZ, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, INT, LOG, MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+*
+*     Decode RANGE
+*
+      IF( LSAME( RANGE, 'A' ) ) THEN
+         IRANGE = 1
+      ELSE IF( LSAME( RANGE, 'V' ) ) THEN
+         IRANGE = 2
+      ELSE IF( LSAME( RANGE, 'I' ) ) THEN
+         IRANGE = 3
+      ELSE
+         IRANGE = 0
+      END IF
+*
+*     Decode ORDER
+*
+      IF( LSAME( ORDER, 'B' ) ) THEN
+         IORDER = 2
+      ELSE IF( LSAME( ORDER, 'E' ) ) THEN
+         IORDER = 1
+      ELSE
+         IORDER = 0
+      END IF
+*
+*     Check for Errors
+*
+      IF( IRANGE.LE.0 ) THEN
+         INFO = -1
+      ELSE IF( IORDER.LE.0 ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( IRANGE.EQ.2 ) THEN
+         IF( VL.GE.VU ) INFO = -5
+      ELSE IF( IRANGE.EQ.3 .AND. ( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) )
+     $          THEN
+         INFO = -6
+      ELSE IF( IRANGE.EQ.3 .AND. ( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) )
+     $          THEN
+         INFO = -7
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SSTEBZ', -INFO )
+         RETURN
+      END IF
+*
+*     Initialize error flags
+*
+      INFO = 0
+      NCNVRG = .FALSE.
+      TOOFEW = .FALSE.
+*
+*     Quick return if possible
+*
+      M = 0
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Simplifications:
+*
+      IF( IRANGE.EQ.3 .AND. IL.EQ.1 .AND. IU.EQ.N )
+     $   IRANGE = 1
+*
+*     Get machine constants
+*     NB is the minimum vector length for vector bisection, or 0
+*     if only scalar is to be done.
+*
+      SAFEMN = SLAMCH( 'S' )
+      ULP = SLAMCH( 'P' )
+      RTOLI = ULP*RELFAC
+      NB = ILAENV( 1, 'SSTEBZ', ' ', N, -1, -1, -1 )
+      IF( NB.LE.1 )
+     $   NB = 0
+*
+*     Special Case when N=1
+*
+      IF( N.EQ.1 ) THEN
+         NSPLIT = 1
+         ISPLIT( 1 ) = 1
+         IF( IRANGE.EQ.2 .AND. ( VL.GE.D( 1 ) .OR. VU.LT.D( 1 ) ) ) THEN
+            M = 0
+         ELSE
+            W( 1 ) = D( 1 )
+            IBLOCK( 1 ) = 1
+            M = 1
+         END IF
+         RETURN
+      END IF
+*
+*     Compute Splitting Points
+*
+      NSPLIT = 1
+      WORK( N ) = ZERO
+      PIVMIN = ONE
+*
+CDIR$ NOVECTOR
+      DO 10 J = 2, N
+         TMP1 = E( J-1 )**2
+         IF( ABS( D( J )*D( J-1 ) )*ULP**2+SAFEMN.GT.TMP1 ) THEN
+            ISPLIT( NSPLIT ) = J - 1
+            NSPLIT = NSPLIT + 1
+            WORK( J-1 ) = ZERO
+         ELSE
+            WORK( J-1 ) = TMP1
+            PIVMIN = MAX( PIVMIN, TMP1 )
+         END IF
+   10 CONTINUE
+      ISPLIT( NSPLIT ) = N
+      PIVMIN = PIVMIN*SAFEMN
+*
+*     Compute Interval and ATOLI
+*
+      IF( IRANGE.EQ.3 ) THEN
+*
+*        RANGE='I': Compute the interval containing eigenvalues
+*                   IL through IU.
+*
+*        Compute Gershgorin interval for entire (split) matrix
+*        and use it as the initial interval
+*
+         GU = D( 1 )
+         GL = D( 1 )
+         TMP1 = ZERO
+*
+         DO 20 J = 1, N - 1
+            TMP2 = SQRT( WORK( J ) )
+            GU = MAX( GU, D( J )+TMP1+TMP2 )
+            GL = MIN( GL, D( J )-TMP1-TMP2 )
+            TMP1 = TMP2
+   20    CONTINUE
+*
+         GU = MAX( GU, D( N )+TMP1 )
+         GL = MIN( GL, D( N )-TMP1 )
+         TNORM = MAX( ABS( GL ), ABS( GU ) )
+         GL = GL - FUDGE*TNORM*ULP*N - FUDGE*TWO*PIVMIN
+         GU = GU + FUDGE*TNORM*ULP*N + FUDGE*PIVMIN
+*
+*        Compute Iteration parameters
+*
+         ITMAX = INT( ( LOG( TNORM+PIVMIN )-LOG( PIVMIN ) ) /
+     $           LOG( TWO ) ) + 2
+         IF( ABSTOL.LE.ZERO ) THEN
+            ATOLI = ULP*TNORM
+         ELSE
+            ATOLI = ABSTOL
+         END IF
+*
+         WORK( N+1 ) = GL
+         WORK( N+2 ) = GL
+         WORK( N+3 ) = GU
+         WORK( N+4 ) = GU
+         WORK( N+5 ) = GL
+         WORK( N+6 ) = GU
+         IWORK( 1 ) = -1
+         IWORK( 2 ) = -1
+         IWORK( 3 ) = N + 1
+         IWORK( 4 ) = N + 1
+         IWORK( 5 ) = IL - 1
+         IWORK( 6 ) = IU
+*
+         CALL SLAEBZ( 3, ITMAX, N, 2, 2, NB, ATOLI, RTOLI, PIVMIN, D, E,
+     $                WORK, IWORK( 5 ), WORK( N+1 ), WORK( N+5 ), IOUT,
+     $                IWORK, W, IBLOCK, IINFO )
+*
+         IF( IWORK( 6 ).EQ.IU ) THEN
+            WL = WORK( N+1 )
+            WLU = WORK( N+3 )
+            NWL = IWORK( 1 )
+            WU = WORK( N+4 )
+            WUL = WORK( N+2 )
+            NWU = IWORK( 4 )
+         ELSE
+            WL = WORK( N+2 )
+            WLU = WORK( N+4 )
+            NWL = IWORK( 2 )
+            WU = WORK( N+3 )
+            WUL = WORK( N+1 )
+            NWU = IWORK( 3 )
+         END IF
+*
+         IF( NWL.LT.0 .OR. NWL.GE.N .OR. NWU.LT.1 .OR. NWU.GT.N ) THEN
+            INFO = 4
+            RETURN
+         END IF
+      ELSE
+*
+*        RANGE='A' or 'V' -- Set ATOLI
+*
+         TNORM = MAX( ABS( D( 1 ) )+ABS( E( 1 ) ),
+     $           ABS( D( N ) )+ABS( E( N-1 ) ) )
+*
+         DO 30 J = 2, N - 1
+            TNORM = MAX( TNORM, ABS( D( J ) )+ABS( E( J-1 ) )+
+     $              ABS( E( J ) ) )
+   30    CONTINUE
+*
+         IF( ABSTOL.LE.ZERO ) THEN
+            ATOLI = ULP*TNORM
+         ELSE
+            ATOLI = ABSTOL
+         END IF
+*
+         IF( IRANGE.EQ.2 ) THEN
+            WL = VL
+            WU = VU
+         ELSE
+            WL = ZERO
+            WU = ZERO
+         END IF
+      END IF
+*
+*     Find Eigenvalues -- Loop Over Blocks and recompute NWL and NWU.
+*     NWL accumulates the number of eigenvalues .le. WL,
+*     NWU accumulates the number of eigenvalues .le. WU
+*
+      M = 0
+      IEND = 0
+      INFO = 0
+      NWL = 0
+      NWU = 0
+*
+      DO 70 JB = 1, NSPLIT
+         IOFF = IEND
+         IBEGIN = IOFF + 1
+         IEND = ISPLIT( JB )
+         IN = IEND - IOFF
+*
+         IF( IN.EQ.1 ) THEN
+*
+*           Special Case -- IN=1
+*
+            IF( IRANGE.EQ.1 .OR. WL.GE.D( IBEGIN )-PIVMIN )
+     $         NWL = NWL + 1
+            IF( IRANGE.EQ.1 .OR. WU.GE.D( IBEGIN )-PIVMIN )
+     $         NWU = NWU + 1
+            IF( IRANGE.EQ.1 .OR. ( WL.LT.D( IBEGIN )-PIVMIN .AND. WU.GE.
+     $          D( IBEGIN )-PIVMIN ) ) THEN
+               M = M + 1
+               W( M ) = D( IBEGIN )
+               IBLOCK( M ) = JB
+            END IF
+         ELSE
+*
+*           General Case -- IN > 1
+*
+*           Compute Gershgorin Interval
+*           and use it as the initial interval
+*
+            GU = D( IBEGIN )
+            GL = D( IBEGIN )
+            TMP1 = ZERO
+*
+            DO 40 J = IBEGIN, IEND - 1
+               TMP2 = ABS( E( J ) )
+               GU = MAX( GU, D( J )+TMP1+TMP2 )
+               GL = MIN( GL, D( J )-TMP1-TMP2 )
+               TMP1 = TMP2
+   40       CONTINUE
+*
+            GU = MAX( GU, D( IEND )+TMP1 )
+            GL = MIN( GL, D( IEND )-TMP1 )
+            BNORM = MAX( ABS( GL ), ABS( GU ) )
+            GL = GL - FUDGE*BNORM*ULP*IN - FUDGE*PIVMIN
+            GU = GU + FUDGE*BNORM*ULP*IN + FUDGE*PIVMIN
+*
+*           Compute ATOLI for the current submatrix
+*
+            IF( ABSTOL.LE.ZERO ) THEN
+               ATOLI = ULP*MAX( ABS( GL ), ABS( GU ) )
+            ELSE
+               ATOLI = ABSTOL
+            END IF
+*
+            IF( IRANGE.GT.1 ) THEN
+               IF( GU.LT.WL ) THEN
+                  NWL = NWL + IN
+                  NWU = NWU + IN
+                  GO TO 70
+               END IF
+               GL = MAX( GL, WL )
+               GU = MIN( GU, WU )
+               IF( GL.GE.GU )
+     $            GO TO 70
+            END IF
+*
+*           Set Up Initial Interval
+*
+            WORK( N+1 ) = GL
+            WORK( N+IN+1 ) = GU
+            CALL SLAEBZ( 1, 0, IN, IN, 1, NB, ATOLI, RTOLI, PIVMIN,
+     $                   D( IBEGIN ), E( IBEGIN ), WORK( IBEGIN ),
+     $                   IDUMMA, WORK( N+1 ), WORK( N+2*IN+1 ), IM,
+     $                   IWORK, W( M+1 ), IBLOCK( M+1 ), IINFO )
+*
+            NWL = NWL + IWORK( 1 )
+            NWU = NWU + IWORK( IN+1 )
+            IWOFF = M - IWORK( 1 )
+*
+*           Compute Eigenvalues
+*
+            ITMAX = INT( ( LOG( GU-GL+PIVMIN )-LOG( PIVMIN ) ) /
+     $              LOG( TWO ) ) + 2
+            CALL SLAEBZ( 2, ITMAX, IN, IN, 1, NB, ATOLI, RTOLI, PIVMIN,
+     $                   D( IBEGIN ), E( IBEGIN ), WORK( IBEGIN ),
+     $                   IDUMMA, WORK( N+1 ), WORK( N+2*IN+1 ), IOUT,
+     $                   IWORK, W( M+1 ), IBLOCK( M+1 ), IINFO )
+*
+*           Copy Eigenvalues Into W and IBLOCK
+*           Use -JB for block number for unconverged eigenvalues.
+*
+            DO 60 J = 1, IOUT
+               TMP1 = HALF*( WORK( J+N )+WORK( J+IN+N ) )
+*
+*              Flag non-convergence.
+*
+               IF( J.GT.IOUT-IINFO ) THEN
+                  NCNVRG = .TRUE.
+                  IB = -JB
+               ELSE
+                  IB = JB
+               END IF
+               DO 50 JE = IWORK( J ) + 1 + IWOFF,
+     $                 IWORK( J+IN ) + IWOFF
+                  W( JE ) = TMP1
+                  IBLOCK( JE ) = IB
+   50          CONTINUE
+   60       CONTINUE
+*
+            M = M + IM
+         END IF
+   70 CONTINUE
+*
+*     If RANGE='I', then (WL,WU) contains eigenvalues NWL+1,...,NWU
+*     If NWL+1 < IL or NWU > IU, discard extra eigenvalues.
+*
+      IF( IRANGE.EQ.3 ) THEN
+         IM = 0
+         IDISCL = IL - 1 - NWL
+         IDISCU = NWU - IU
+*
+         IF( IDISCL.GT.0 .OR. IDISCU.GT.0 ) THEN
+            DO 80 JE = 1, M
+               IF( W( JE ).LE.WLU .AND. IDISCL.GT.0 ) THEN
+                  IDISCL = IDISCL - 1
+               ELSE IF( W( JE ).GE.WUL .AND. IDISCU.GT.0 ) THEN
+                  IDISCU = IDISCU - 1
+               ELSE
+                  IM = IM + 1
+                  W( IM ) = W( JE )
+                  IBLOCK( IM ) = IBLOCK( JE )
+               END IF
+   80       CONTINUE
+            M = IM
+         END IF
+         IF( IDISCL.GT.0 .OR. IDISCU.GT.0 ) THEN
+*
+*           Code to deal with effects of bad arithmetic:
+*           Some low eigenvalues to be discarded are not in (WL,WLU],
+*           or high eigenvalues to be discarded are not in (WUL,WU]
+*           so just kill off the smallest IDISCL/largest IDISCU
+*           eigenvalues, by simply finding the smallest/largest
+*           eigenvalue(s).
+*
+*           (If N(w) is monotone non-decreasing, this should never
+*               happen.)
+*
+            IF( IDISCL.GT.0 ) THEN
+               WKILL = WU
+               DO 100 JDISC = 1, IDISCL
+                  IW = 0
+                  DO 90 JE = 1, M
+                     IF( IBLOCK( JE ).NE.0 .AND.
+     $                   ( W( JE ).LT.WKILL .OR. IW.EQ.0 ) ) THEN
+                        IW = JE
+                        WKILL = W( JE )
+                     END IF
+   90             CONTINUE
+                  IBLOCK( IW ) = 0
+  100          CONTINUE
+            END IF
+            IF( IDISCU.GT.0 ) THEN
+*
+               WKILL = WL
+               DO 120 JDISC = 1, IDISCU
+                  IW = 0
+                  DO 110 JE = 1, M
+                     IF( IBLOCK( JE ).NE.0 .AND.
+     $                   ( W( JE ).GT.WKILL .OR. IW.EQ.0 ) ) THEN
+                        IW = JE
+                        WKILL = W( JE )
+                     END IF
+  110             CONTINUE
+                  IBLOCK( IW ) = 0
+  120          CONTINUE
+            END IF
+            IM = 0
+            DO 130 JE = 1, M
+               IF( IBLOCK( JE ).NE.0 ) THEN
+                  IM = IM + 1
+                  W( IM ) = W( JE )
+                  IBLOCK( IM ) = IBLOCK( JE )
+               END IF
+  130       CONTINUE
+            M = IM
+         END IF
+         IF( IDISCL.LT.0 .OR. IDISCU.LT.0 ) THEN
+            TOOFEW = .TRUE.
+         END IF
+      END IF
+*
+*     If ORDER='B', do nothing -- the eigenvalues are already sorted
+*        by block.
+*     If ORDER='E', sort the eigenvalues from smallest to largest
+*
+      IF( IORDER.EQ.1 .AND. NSPLIT.GT.1 ) THEN
+         DO 150 JE = 1, M - 1
+            IE = 0
+            TMP1 = W( JE )
+            DO 140 J = JE + 1, M
+               IF( W( J ).LT.TMP1 ) THEN
+                  IE = J
+                  TMP1 = W( J )
+               END IF
+  140       CONTINUE
+*
+            IF( IE.NE.0 ) THEN
+               ITMP1 = IBLOCK( IE )
+               W( IE ) = W( JE )
+               IBLOCK( IE ) = IBLOCK( JE )
+               W( JE ) = TMP1
+               IBLOCK( JE ) = ITMP1
+            END IF
+  150    CONTINUE
+      END IF
+*
+      INFO = 0
+      IF( NCNVRG )
+     $   INFO = INFO + 1
+      IF( TOOFEW )
+     $   INFO = INFO + 2
+      RETURN
+*
+*     End of SSTEBZ
+*
+      END
diff --git a/costa/native/external/lapack/sstedc.f b/costa/native/external/lapack/sstedc.f
new file mode 100644
index 000000000..5d60e9da1
--- /dev/null
+++ b/costa/native/external/lapack/sstedc.f
@@ -0,0 +1,397 @@
+      SUBROUTINE SSTEDC( COMPZ, N, D, E, Z, LDZ, WORK, LWORK, IWORK,
+     $                   LIWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          COMPZ
+      INTEGER            INFO, LDZ, LIWORK, LWORK, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      REAL               D( * ), E( * ), WORK( * ), Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SSTEDC computes all eigenvalues and, optionally, eigenvectors of a
+*  symmetric tridiagonal matrix using the divide and conquer method.
+*  The eigenvectors of a full or band real symmetric matrix can also be
+*  found if SSYTRD or SSPTRD or SSBTRD has been used to reduce this
+*  matrix to tridiagonal form.
+*
+*  This code makes very mild assumptions about floating point
+*  arithmetic. It will work on machines with a guard digit in
+*  add/subtract, or on those binary machines without guard digits
+*  which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2.
+*  It could conceivably fail on hexadecimal or decimal machines
+*  without guard digits, but we know of none.  See SLAED3 for details.
+*
+*  Arguments
+*  =========
+*
+*  COMPZ   (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only.
+*          = 'I':  Compute eigenvectors of tridiagonal matrix also.
+*          = 'V':  Compute eigenvectors of original dense symmetric
+*                  matrix also.  On entry, Z contains the orthogonal
+*                  matrix used to reduce the original matrix to
+*                  tridiagonal form.
+*
+*  N       (input) INTEGER
+*          The dimension of the symmetric tridiagonal matrix.  N >= 0.
+*
+*  D       (input/output) REAL array, dimension (N)
+*          On entry, the diagonal elements of the tridiagonal matrix.
+*          On exit, if INFO = 0, the eigenvalues in ascending order.
+*
+*  E       (input/output) REAL array, dimension (N-1)
+*          On entry, the subdiagonal elements of the tridiagonal matrix.
+*          On exit, E has been destroyed.
+*
+*  Z       (input/output) REAL array, dimension (LDZ,N)
+*          On entry, if COMPZ = 'V', then Z contains the orthogonal
+*          matrix used in the reduction to tridiagonal form.
+*          On exit, if INFO = 0, then if COMPZ = 'V', Z contains the
+*          orthonormal eigenvectors of the original symmetric matrix,
+*          and if COMPZ = 'I', Z contains the orthonormal eigenvectors
+*          of the symmetric tridiagonal matrix.
+*          If  COMPZ = 'N', then Z is not referenced.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= 1.
+*          If eigenvectors are desired, then LDZ >= max(1,N).
+*
+*  WORK    (workspace/output) REAL array,
+*                                         dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.
+*          If COMPZ = 'N' or N <= 1 then LWORK must be at least 1.
+*          If COMPZ = 'V' and N > 1 then LWORK must be at least
+*                         ( 1 + 3*N + 2*N*lg N + 3*N**2 ),
+*                         where lg( N ) = smallest integer k such
+*                         that 2**k >= N.
+*          If COMPZ = 'I' and N > 1 then LWORK must be at least
+*                         ( 1 + 4*N + N**2 ).
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  IWORK   (workspace/output) INTEGER array, dimension (LIWORK)
+*          On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK.
+*
+*  LIWORK  (input) INTEGER
+*          The dimension of the array IWORK.
+*          If COMPZ = 'N' or N <= 1 then LIWORK must be at least 1.
+*          If COMPZ = 'V' and N > 1 then LIWORK must be at least
+*                         ( 6 + 6*N + 5*N*lg N ).
+*          If COMPZ = 'I' and N > 1 then LIWORK must be at least
+*                         ( 3 + 5*N ).
+*
+*          If LIWORK = -1, then a workspace query is assumed; the
+*          routine only calculates the optimal size of the IWORK array,
+*          returns this value as the first entry of the IWORK array, and
+*          no error message related to LIWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          > 0:  The algorithm failed to compute an eigenvalue while
+*                working on the submatrix lying in rows and columns
+*                INFO/(N+1) through mod(INFO,N+1).
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Jeff Rutter, Computer Science Division, University of California
+*     at Berkeley, USA
+*  Modified by Francoise Tisseur, University of Tennessee.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE, TWO
+      PARAMETER          ( ZERO = 0.0E0, ONE = 1.0E0, TWO = 2.0E0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            END, I, ICOMPZ, II, J, K, LGN, LIWMIN, LWMIN,
+     $                   M, SMLSIZ, START, STOREZ, STRTRW
+      REAL               EPS, ORGNRM, P, TINY
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      REAL               SLAMCH, SLANST
+      EXTERNAL           ILAENV, LSAME, SLAMCH, SLANST
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SGEMM, SLACPY, SLAED0, SLASCL, SLASET, SLASRT,
+     $                   SSTEQR, SSTERF, SSWAP, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, INT, LOG, MAX, MOD, REAL, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 )
+*
+      IF( LSAME( COMPZ, 'N' ) ) THEN
+         ICOMPZ = 0
+      ELSE IF( LSAME( COMPZ, 'V' ) ) THEN
+         ICOMPZ = 1
+      ELSE IF( LSAME( COMPZ, 'I' ) ) THEN
+         ICOMPZ = 2
+      ELSE
+         ICOMPZ = -1
+      END IF
+      IF( N.LE.1 .OR. ICOMPZ.LE.0 ) THEN
+         LIWMIN = 1
+         LWMIN = 1
+      ELSE
+         LGN = INT( LOG( REAL( N ) ) / LOG( TWO ) )
+         IF( 2**LGN.LT.N )
+     $      LGN = LGN + 1
+         IF( 2**LGN.LT.N )
+     $      LGN = LGN + 1
+         IF( ICOMPZ.EQ.1 ) THEN
+            LWMIN = 1 + 3*N + 2*N*LGN + 3*N**2
+            LIWMIN = 6 + 6*N + 5*N*LGN
+         ELSE IF( ICOMPZ.EQ.2 ) THEN
+            LWMIN = 1 + 4*N + N**2
+            LIWMIN = 3 + 5*N
+         END IF
+      END IF
+      IF( ICOMPZ.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( ( LDZ.LT.1 ) .OR. ( ICOMPZ.GT.0 .AND. LDZ.LT.MAX( 1,
+     $         N ) ) ) THEN
+         INFO = -6
+      ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -8
+      ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -10
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         WORK( 1 ) = LWMIN
+         IWORK( 1 ) = LIWMIN
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SSTEDC', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN 
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN 
+      IF( N.EQ.1 ) THEN
+         IF( ICOMPZ.NE.0 )
+     $      Z( 1, 1 ) = ONE
+         RETURN 
+      END IF
+*
+      SMLSIZ = ILAENV( 9, 'SSTEDC', ' ', 0, 0, 0, 0 )
+*
+*     If the following conditional clause is removed, then the routine
+*     will use the Divide and Conquer routine to compute only the
+*     eigenvalues, which requires (3N + 3N**2) real workspace and
+*     (2 + 5N + 2N lg(N)) integer workspace.
+*     Since on many architectures SSTERF is much faster than any other
+*     algorithm for finding eigenvalues only, it is used here
+*     as the default.
+*
+*     If COMPZ = 'N', use SSTERF to compute the eigenvalues.
+*
+      IF( ICOMPZ.EQ.0 ) THEN
+         CALL SSTERF( N, D, E, INFO )
+         RETURN 
+      END IF
+*
+*     If N is smaller than the minimum divide size (SMLSIZ+1), then
+*     solve the problem with another solver.
+*
+      IF( N.LE.SMLSIZ ) THEN
+         IF( ICOMPZ.EQ.0 ) THEN
+            CALL SSTERF( N, D, E, INFO )
+            RETURN 
+         ELSE IF( ICOMPZ.EQ.2 ) THEN
+            CALL SSTEQR( 'I', N, D, E, Z, LDZ, WORK, INFO )
+            RETURN
+         ELSE
+            CALL SSTEQR( 'V', N, D, E, Z, LDZ, WORK, INFO )
+            RETURN 
+         END IF
+      END IF
+*
+*     If COMPZ = 'V', the Z matrix must be stored elsewhere for later
+*     use.
+*
+      IF( ICOMPZ.EQ.1 ) THEN
+         STOREZ = 1 + N*N
+      ELSE
+         STOREZ = 1
+      END IF
+*
+      IF( ICOMPZ.EQ.2 ) THEN
+         CALL SLASET( 'Full', N, N, ZERO, ONE, Z, LDZ )
+      END IF
+*
+*     Scale.
+*
+      ORGNRM = SLANST( 'M', N, D, E )
+      IF( ORGNRM.EQ.ZERO )
+     $   RETURN
+*
+      EPS = SLAMCH( 'Epsilon' )
+*
+      START = 1
+*
+*     while ( START <= N )
+*
+   10 CONTINUE
+      IF( START.LE.N ) THEN
+*
+*     Let END be the position of the next subdiagonal entry such that
+*     E( END ) <= TINY or END = N if no such subdiagonal exists.  The
+*     matrix identified by the elements between START and END
+*     constitutes an independent sub-problem.
+*
+         END = START
+   20    CONTINUE
+         IF( END.LT.N ) THEN
+            TINY = EPS*SQRT( ABS( D( END ) ) )*SQRT( ABS( D( END+1 ) ) )
+            IF( ABS( E( END ) ).GT.TINY ) THEN
+               END = END + 1
+               GO TO 20
+            END IF
+         END IF
+*
+*        (Sub) Problem determined.  Compute its size and solve it.
+*
+         M = END - START + 1
+         IF( M.EQ.1 ) THEN
+            START = END + 1
+            GO TO 10
+         END IF
+         IF( M.GT.SMLSIZ ) THEN
+            INFO = SMLSIZ
+*
+*           Scale.
+*
+            ORGNRM = SLANST( 'M', M, D( START ), E( START ) )
+            CALL SLASCL( 'G', 0, 0, ORGNRM, ONE, M, 1, D( START ), M,
+     $                   INFO )
+            CALL SLASCL( 'G', 0, 0, ORGNRM, ONE, M-1, 1, E( START ),
+     $                   M-1, INFO )
+*
+            IF( ICOMPZ.EQ.1 ) THEN
+               STRTRW = 1
+            ELSE
+               STRTRW = START
+            END IF
+            CALL SLAED0( ICOMPZ, N, M, D( START ), E( START ),
+     $                   Z( STRTRW, START ), LDZ, WORK( 1 ), N,
+     $                   WORK( STOREZ ), IWORK, INFO )
+            IF( INFO.NE.0 ) THEN
+               INFO = ( INFO / ( M+1 )+START-1 )*( N+1 ) +
+     $                MOD( INFO, ( M+1 ) ) + START - 1
+               RETURN 
+            END IF
+*
+*           Scale back.
+*
+            CALL SLASCL( 'G', 0, 0, ONE, ORGNRM, M, 1, D( START ), M,
+     $                   INFO )
+*
+         ELSE
+            IF( ICOMPZ.EQ.1 ) THEN
+*
+*     Since QR won't update a Z matrix which is larger than the
+*     length of D, we must solve the sub-problem in a workspace and
+*     then multiply back into Z.
+*
+               CALL SSTEQR( 'I', M, D( START ), E( START ), WORK, M,
+     $                      WORK( M*M+1 ), INFO )
+               CALL SLACPY( 'A', N, M, Z( 1, START ), LDZ,
+     $                      WORK( STOREZ ), N )
+               CALL SGEMM( 'N', 'N', N, M, M, ONE, WORK( STOREZ ), LDZ,
+     $                     WORK, M, ZERO, Z( 1, START ), LDZ )
+            ELSE IF( ICOMPZ.EQ.2 ) THEN
+               CALL SSTEQR( 'I', M, D( START ), E( START ),
+     $                      Z( START, START ), LDZ, WORK, INFO )
+            ELSE
+               CALL SSTERF( M, D( START ), E( START ), INFO )
+            END IF
+            IF( INFO.NE.0 ) THEN
+               INFO = START*( N+1 ) + END
+               RETURN 
+            END IF
+         END IF
+*
+         START = END + 1
+         GO TO 10
+      END IF
+*
+*     endwhile
+*
+*     If the problem split any number of times, then the eigenvalues
+*     will not be properly ordered.  Here we permute the eigenvalues
+*     (and the associated eigenvectors) into ascending order.
+*
+      IF( M.NE.N ) THEN
+         IF( ICOMPZ.EQ.0 ) THEN
+*
+*        Use Quick Sort
+*
+            CALL SLASRT( 'I', N, D, INFO )
+*
+         ELSE
+*
+*        Use Selection Sort to minimize swaps of eigenvectors
+*
+            DO 40 II = 2, N
+               I = II - 1
+               K = I
+               P = D( I )
+               DO 30 J = II, N
+                  IF( D( J ).LT.P ) THEN
+                     K = J
+                     P = D( J )
+                  END IF
+   30          CONTINUE
+               IF( K.NE.I ) THEN
+                  D( K ) = D( I )
+                  D( I ) = P
+                  CALL SSWAP( N, Z( 1, I ), 1, Z( 1, K ), 1 )
+               END IF
+   40       CONTINUE
+         END IF
+      END IF
+*
+      WORK( 1 ) = LWMIN
+      IWORK( 1 ) = LIWMIN
+*
+      RETURN
+*
+*     End of SSTEDC
+*
+      END
diff --git a/costa/native/external/lapack/sstegr.f b/costa/native/external/lapack/sstegr.f
new file mode 100644
index 000000000..3df5627e0
--- /dev/null
+++ b/costa/native/external/lapack/sstegr.f
@@ -0,0 +1,400 @@
+      SUBROUTINE SSTEGR( JOBZ, RANGE, N, D, E, VL, VU, IL, IU, ABSTOL,
+     $                   M, W, Z, LDZ, ISUPPZ, WORK, LWORK, IWORK,
+     $                   LIWORK, INFO )
+*
+*  -- LAPACK computational routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBZ, RANGE
+      INTEGER            IL, INFO, IU, LDZ, LIWORK, LWORK, M, N
+      REAL               ABSTOL, VL, VU
+*     ..
+*     .. Array Arguments ..
+      INTEGER            ISUPPZ( * ), IWORK( * )
+      REAL               D( * ), E( * ), W( * ), WORK( * ), Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+* SSTEGR computes selected eigenvalues and, optionally, eigenvectors
+* of a real symmetric tridiagonal matrix T.  Eigenvalues and
+* eigenvectors can be selected by specifying either a range of values
+* or a range of indices for the desired eigenvalues. The eigenvalues
+* are computed by the dqds algorithm, while orthogonal eigenvectors are
+* computed from various ``good'' L D L^T representations (also known as
+* Relatively Robust Representations). Gram-Schmidt orthogonalization is
+* avoided as far as possible. More specifically, the various steps of
+* the algorithm are as follows. For the i-th unreduced block of T,
+*     (a) Compute T - sigma_i = L_i D_i L_i^T, such that L_i D_i L_i^T
+*         is a relatively robust representation,
+*     (b) Compute the eigenvalues, lambda_j, of L_i D_i L_i^T to high
+*         relative accuracy by the dqds algorithm,
+*     (c) If there is a cluster of close eigenvalues, "choose" sigma_i
+*         close to the cluster, and go to step (a),
+*     (d) Given the approximate eigenvalue lambda_j of L_i D_i L_i^T,
+*         compute the corresponding eigenvector by forming a
+*         rank-revealing twisted factorization.
+*  The desired accuracy of the output can be specified by the input
+*  parameter ABSTOL.
+*
+*  For more details, see "A new O(n^2) algorithm for the symmetric
+*  tridiagonal eigenvalue/eigenvector problem", by Inderjit Dhillon,
+*  Computer Science Division Technical Report No. UCB/CSD-97-971,
+*  UC Berkeley, May 1997.
+*
+*  Note 1 : Currently SSTEGR is only set up to find ALL the n
+*  eigenvalues and eigenvectors of T in O(n^2) time
+*  Note 2 : Currently the routine SSTEIN is called when an appropriate
+*  sigma_i cannot be chosen in step (c) above. SSTEIN invokes modified
+*  Gram-Schmidt when eigenvalues are close.
+*  Note 3 : SSTEGR works only on machines which follow ieee-754
+*  floating-point standard in their handling of infinities and NaNs.
+*  Normal execution of SSTEGR may create NaNs and infinities and hence
+*  may abort due to a floating point exception in environments which
+*  do not conform to the ieee standard.
+*
+*  Arguments
+*  =========
+*
+*  JOBZ    (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only;
+*          = 'V':  Compute eigenvalues and eigenvectors.
+*
+*  RANGE   (input) CHARACTER*1
+*          = 'A': all eigenvalues will be found.
+*          = 'V': all eigenvalues in the half-open interval (VL,VU]
+*                 will be found.
+*          = 'I': the IL-th through IU-th eigenvalues will be found.
+********** Only RANGE = 'A' is currently supported *********************
+*
+*  N       (input) INTEGER
+*          The order of the matrix.  N >= 0.
+*
+*  D       (input/output) REAL array, dimension (N)
+*          On entry, the n diagonal elements of the tridiagonal matrix
+*          T. On exit, D is overwritten.
+*
+*  E       (input/output) REAL array, dimension (N)
+*          On entry, the (n-1) subdiagonal elements of the tridiagonal
+*          matrix T in elements 1 to N-1 of E; E(N) need not be set.
+*          On exit, E is overwritten.
+*
+*  VL      (input) REAL
+*  VU      (input) REAL
+*          If RANGE='V', the lower and upper bounds of the interval to
+*          be searched for eigenvalues. VL < VU.
+*          Not referenced if RANGE = 'A' or 'I'.
+*
+*  IL      (input) INTEGER
+*  IU      (input) INTEGER
+*          If RANGE='I', the indices (in ascending order) of the
+*          smallest and largest eigenvalues to be returned.
+*          1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0.
+*          Not referenced if RANGE = 'A' or 'V'.
+*
+*  ABSTOL  (input) REAL
+*          The absolute error tolerance for the
+*          eigenvalues/eigenvectors. IF JOBZ = 'V', the eigenvalues and
+*          eigenvectors output have residual norms bounded by ABSTOL,
+*          and the dot products between different eigenvectors are
+*          bounded by ABSTOL. If ABSTOL is less than N*EPS*|T|, then
+*          N*EPS*|T| will be used in its place, where EPS is the
+*          machine precision and |T| is the 1-norm of the tridiagonal
+*          matrix. The eigenvalues are computed to an accuracy of
+*          EPS*|T| irrespective of ABSTOL. If high relative accuracy
+*          is important, set ABSTOL to DLAMCH( 'Safe minimum' ).
+*          See Barlow and Demmel "Computing Accurate Eigensystems of
+*          Scaled Diagonally Dominant Matrices", LAPACK Working Note #7
+*          for a discussion of which matrices define their eigenvalues
+*          to high relative accuracy.
+*
+*  M       (output) INTEGER
+*          The total number of eigenvalues found.  0 <= M <= N.
+*          If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1.
+*
+*  W       (output) REAL array, dimension (N)
+*          The first M elements contain the selected eigenvalues in
+*          ascending order.
+*
+*  Z       (output) REAL array, dimension (LDZ, max(1,M) )
+*          If JOBZ = 'V', then if INFO = 0, the first M columns of Z
+*          contain the orthonormal eigenvectors of the matrix T
+*          corresponding to the selected eigenvalues, with the i-th
+*          column of Z holding the eigenvector associated with W(i).
+*          If JOBZ = 'N', then Z is not referenced.
+*          Note: the user must ensure that at least max(1,M) columns are
+*          supplied in the array Z; if RANGE = 'V', the exact value of M
+*          is not known in advance and an upper bound must be used.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= 1, and if
+*          JOBZ = 'V', LDZ >= max(1,N).
+*
+*  ISUPPZ  (output) INTEGER ARRAY, dimension ( 2*max(1,M) )
+*          The support of the eigenvectors in Z, i.e., the indices
+*          indicating the nonzero elements in Z. The i-th eigenvector
+*          is nonzero only in elements ISUPPZ( 2*i-1 ) through
+*          ISUPPZ( 2*i ).
+*
+*  WORK    (workspace/output) REAL array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal
+*          (and minimal) LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.  LWORK >= max(1,18*N)
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  IWORK   (workspace/output) INTEGER array, dimension (LIWORK)
+*          On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK.
+*
+*  LIWORK  (input) INTEGER
+*          The dimension of the array IWORK.  LIWORK >= max(1,10*N)
+*
+*          If LIWORK = -1, then a workspace query is assumed; the
+*          routine only calculates the optimal size of the IWORK array,
+*          returns this value as the first entry of the IWORK array, and
+*          no error message related to LIWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = 1, internal error in SLARRE,
+*                if INFO = 2, internal error in SLARRV.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Inderjit Dhillon, IBM Almaden, USA
+*     Osni Marques, LBNL/NERSC, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E0, ONE = 1.0E0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            ALLEIG, INDEIG, LQUERY, VALEIG, WANTZ
+      INTEGER            I, IBEGIN, IEND, IINDBL, IINDWK, IINFO, IINSPL, 
+     $                   INDGRS, INDWOF, INDWRK, ITMP, J, JJ, LIWMIN, 
+     $                   LWMIN, NSPLIT
+      REAL               BIGNUM, EPS, RMAX, RMIN, SAFMIN, SCALE, SMLNUM,
+     $                   THRESH, TMP, TNRM, TOL
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               SLAMCH, SLANST
+      EXTERNAL           LSAME, SLAMCH, SLANST
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLARRE, SLARRV, SLASET, SSCAL, SSWAP, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN, REAL, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      WANTZ = LSAME( JOBZ, 'V' )
+      ALLEIG = LSAME( RANGE, 'A' )
+      VALEIG = LSAME( RANGE, 'V' )
+      INDEIG = LSAME( RANGE, 'I' )
+*
+      LQUERY = ( ( LWORK.EQ.-1 ) .OR. ( LIWORK.EQ.-1 ) )
+      LWMIN = 18*N
+      LIWMIN = 10*N
+*
+      INFO = 0
+      IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN
+         INFO = -2
+*
+*     The following two lines need to be removed once the 
+*     RANGE = 'V' and RANGE = 'I' options are provided.
+*
+      ELSE IF( VALEIG .OR. INDEIG ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( VALEIG .AND. N.GT.0 .AND. VU.LE.VL ) THEN
+         INFO = -7
+      ELSE IF( INDEIG .AND. IL.LT.1 ) THEN
+         INFO = -8
+*     The following change should be made in DSTEVX also, otherwise
+*     IL can be specified as N+1 and IU as N.
+*     ELSE IF( INDEIG .AND. ( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) ) THEN
+      ELSE IF( INDEIG .AND. ( IU.LT.IL .OR. IU.GT.N ) ) THEN
+         INFO = -9
+      ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN
+         INFO = -14
+      ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -17
+      ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -19
+      END IF
+      IF( INFO.EQ.0 ) THEN
+         WORK( 1 ) = LWMIN
+         IWORK( 1 ) = LIWMIN
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SSTEGR', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      M = 0
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      IF( N.EQ.1 ) THEN
+         IF( ALLEIG .OR. INDEIG ) THEN
+            M = 1
+            W( 1 ) = D( 1 )
+         ELSE
+            IF( VL.LT.D( 1 ) .AND. VU.GE.D( 1 ) ) THEN
+               M = 1
+               W( 1 ) = D( 1 )
+            END IF
+         END IF
+         IF( WANTZ )
+     $      Z( 1, 1 ) = ONE
+         RETURN
+      END IF
+*
+*     Get machine constants.
+*
+      SAFMIN = SLAMCH( 'Safe minimum' )
+      EPS = SLAMCH( 'Precision' )
+      SMLNUM = SAFMIN / EPS
+      BIGNUM = ONE / SMLNUM
+      RMIN = SQRT( SMLNUM )
+      RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) )
+*
+*     Scale matrix to allowable range, if necessary.
+*
+      SCALE = ONE
+      TNRM = SLANST( 'M', N, D, E )
+      IF( TNRM.GT.ZERO .AND. TNRM.LT.RMIN ) THEN
+         SCALE = RMIN / TNRM
+      ELSE IF( TNRM.GT.RMAX ) THEN
+         SCALE = RMAX / TNRM
+      END IF
+      IF( SCALE.NE.ONE ) THEN
+         CALL SSCAL( N, SCALE, D, 1 )
+         CALL SSCAL( N-1, SCALE, E, 1 )
+         TNRM = TNRM*SCALE
+      END IF
+      INDGRS = 1
+      INDWOF = 2*N + 1
+      INDWRK = 3*N + 1
+*
+      IINSPL = 1
+      IINDBL = N + 1
+      IINDWK = 2*N + 1
+*
+      CALL SLASET( 'Full', N, N, ZERO, ZERO, Z, LDZ )
+*
+*     Compute the desired eigenvalues of the tridiagonal after splitting
+*     into smaller subblocks if the corresponding of-diagonal elements
+*     are small
+*
+      THRESH = EPS*TNRM
+      CALL SLARRE( N, D, E, THRESH, NSPLIT, IWORK( IINSPL ), M, W,
+     $             WORK( INDWOF ), WORK( INDGRS ), WORK( INDWRK ),
+     $             IINFO )
+      IF( IINFO.NE.0 ) THEN
+         INFO = 1
+         RETURN
+      END IF
+*
+      IF( WANTZ ) THEN
+*
+*        Compute the desired eigenvectors corresponding to the computed
+*        eigenvalues
+*
+         TOL = MAX( ABSTOL, REAL( N )*THRESH )
+         IBEGIN = 1
+         DO 20 I = 1, NSPLIT
+            IEND = IWORK( IINSPL+I-1 )
+            DO 10 J = IBEGIN, IEND
+               IWORK( IINDBL+J-1 ) = I
+   10       CONTINUE
+            IBEGIN = IEND + 1
+   20    CONTINUE
+*
+         CALL SLARRV( N, D, E, IWORK( IINSPL ), M, W, IWORK( IINDBL ),
+     $                WORK( INDGRS ), TOL, Z, LDZ, ISUPPZ,
+     $                WORK( INDWRK ), IWORK( IINDWK ), IINFO )
+         IF( IINFO.NE.0 ) THEN
+            INFO = 2
+            RETURN
+         END IF
+*
+      END IF
+*
+      IBEGIN = 1
+      DO 40 I = 1, NSPLIT
+         IEND = IWORK( IINSPL+I-1 )
+         DO 30 J = IBEGIN, IEND
+            W( J ) = W( J ) + WORK( INDWOF+I-1 )
+   30    CONTINUE
+         IBEGIN = IEND + 1
+   40 CONTINUE
+*
+*     If matrix was scaled, then rescale eigenvalues appropriately.
+*
+      IF( SCALE.NE.ONE ) THEN
+         CALL SSCAL( M, ONE / SCALE, W, 1 )
+      END IF
+*
+*     If eigenvalues are not in order, then sort them, along with
+*     eigenvectors.
+*
+      IF( NSPLIT.GT.1 ) THEN
+         DO 60 J = 1, M - 1
+            I = 0
+            TMP = W( J )
+            DO 50 JJ = J + 1, M
+               IF( W( JJ ).LT.TMP ) THEN
+                  I = JJ
+                  TMP = W( JJ )
+               END IF
+   50       CONTINUE
+            IF( I.NE.0 ) THEN
+               W( I ) = W( J )
+               W( J ) = TMP
+               IF( WANTZ ) THEN
+                  CALL SSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 )
+                  ITMP = ISUPPZ( 2*I-1 )
+                  ISUPPZ( 2*I-1 ) = ISUPPZ( 2*J-1 )
+                  ISUPPZ( 2*J-1 ) = ITMP
+                  ITMP = ISUPPZ( 2*I )
+                  ISUPPZ( 2*I ) = ISUPPZ( 2*J )
+                  ISUPPZ( 2*J ) = ITMP
+               END IF
+            END IF
+   60    CONTINUE
+      END IF
+*
+      WORK( 1 ) = LWMIN
+      IWORK( 1 ) = LIWMIN
+      RETURN
+*
+*     End of SSTEGR
+*
+      END
diff --git a/costa/native/external/lapack/sstein.f b/costa/native/external/lapack/sstein.f
new file mode 100644
index 000000000..fa6fd7ee0
--- /dev/null
+++ b/costa/native/external/lapack/sstein.f
@@ -0,0 +1,362 @@
+      SUBROUTINE SSTEIN( N, D, E, M, W, IBLOCK, ISPLIT, Z, LDZ, WORK,
+     $                   IWORK, IFAIL, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDZ, M, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IBLOCK( * ), IFAIL( * ), ISPLIT( * ),
+     $                   IWORK( * )
+      REAL               D( * ), E( * ), W( * ), WORK( * ), Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SSTEIN computes the eigenvectors of a real symmetric tridiagonal
+*  matrix T corresponding to specified eigenvalues, using inverse
+*  iteration.
+*
+*  The maximum number of iterations allowed for each eigenvector is
+*  specified by an internal parameter MAXITS (currently set to 5).
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The order of the matrix.  N >= 0.
+*
+*  D       (input) REAL array, dimension (N)
+*          The n diagonal elements of the tridiagonal matrix T.
+*
+*  E       (input) REAL array, dimension (N)
+*          The (n-1) subdiagonal elements of the tridiagonal matrix
+*          T, in elements 1 to N-1.  E(N) need not be set.
+*
+*  M       (input) INTEGER
+*          The number of eigenvectors to be found.  0 <= M <= N.
+*
+*  W       (input) REAL array, dimension (N)
+*          The first M elements of W contain the eigenvalues for
+*          which eigenvectors are to be computed.  The eigenvalues
+*          should be grouped by split-off block and ordered from
+*          smallest to largest within the block.  ( The output array
+*          W from SSTEBZ with ORDER = 'B' is expected here. )
+*
+*  IBLOCK  (input) INTEGER array, dimension (N)
+*          The submatrix indices associated with the corresponding
+*          eigenvalues in W; IBLOCK(i)=1 if eigenvalue W(i) belongs to
+*          the first submatrix from the top, =2 if W(i) belongs to
+*          the second submatrix, etc.  ( The output array IBLOCK
+*          from SSTEBZ is expected here. )
+*
+*  ISPLIT  (input) INTEGER array, dimension (N)
+*          The splitting points, at which T breaks up into submatrices.
+*          The first submatrix consists of rows/columns 1 to
+*          ISPLIT( 1 ), the second of rows/columns ISPLIT( 1 )+1
+*          through ISPLIT( 2 ), etc.
+*          ( The output array ISPLIT from SSTEBZ is expected here. )
+*
+*  Z       (output) REAL array, dimension (LDZ, M)
+*          The computed eigenvectors.  The eigenvector associated
+*          with the eigenvalue W(i) is stored in the i-th column of
+*          Z.  Any vector which fails to converge is set to its current
+*          iterate after MAXITS iterations.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= max(1,N).
+*
+*  WORK    (workspace) REAL array, dimension (5*N)
+*
+*  IWORK   (workspace) INTEGER array, dimension (N)
+*
+*  IFAIL   (output) INTEGER array, dimension (M)
+*          On normal exit, all elements of IFAIL are zero.
+*          If one or more eigenvectors fail to converge after
+*          MAXITS iterations, then their indices are stored in
+*          array IFAIL.
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit.
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*          > 0: if INFO = i, then i eigenvectors failed to converge
+*               in MAXITS iterations.  Their indices are stored in
+*               array IFAIL.
+*
+*  Internal Parameters
+*  ===================
+*
+*  MAXITS  INTEGER, default = 5
+*          The maximum number of iterations performed.
+*
+*  EXTRA   INTEGER, default = 2
+*          The number of iterations performed after norm growth
+*          criterion is satisfied, should be at least 1.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE, TEN, ODM3, ODM1
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0, TEN = 1.0E+1,
+     $                   ODM3 = 1.0E-3, ODM1 = 1.0E-1 )
+      INTEGER            MAXITS, EXTRA
+      PARAMETER          ( MAXITS = 5, EXTRA = 2 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            B1, BLKSIZ, BN, GPIND, I, IINFO, INDRV1,
+     $                   INDRV2, INDRV3, INDRV4, INDRV5, ITS, J, J1,
+     $                   JBLK, JMAX, NBLK, NRMCHK
+      REAL               CTR, EPS, EPS1, NRM, ONENRM, ORTOL, PERTOL,
+     $                   SCL, SEP, STPCRT, TOL, XJ, XJM
+*     ..
+*     .. Local Arrays ..
+      INTEGER            ISEED( 4 )
+*     ..
+*     .. External Functions ..
+      INTEGER            ISAMAX
+      REAL               SASUM, SDOT, SLAMCH, SNRM2
+      EXTERNAL           ISAMAX, SASUM, SDOT, SLAMCH, SNRM2
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SAXPY, SCOPY, SLAGTF, SLAGTS, SLARNV, SSCAL,
+     $                   XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      DO 10 I = 1, M
+         IFAIL( I ) = 0
+   10 CONTINUE
+*
+      IF( N.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( M.LT.0 .OR. M.GT.N ) THEN
+         INFO = -4
+      ELSE IF( LDZ.LT.MAX( 1, N ) ) THEN
+         INFO = -9
+      ELSE
+         DO 20 J = 2, M
+            IF( IBLOCK( J ).LT.IBLOCK( J-1 ) ) THEN
+               INFO = -6
+               GO TO 30
+            END IF
+            IF( IBLOCK( J ).EQ.IBLOCK( J-1 ) .AND. W( J ).LT.W( J-1 ) )
+     $           THEN
+               INFO = -5
+               GO TO 30
+            END IF
+   20    CONTINUE
+   30    CONTINUE
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SSTEIN', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. M.EQ.0 ) THEN
+         RETURN
+      ELSE IF( N.EQ.1 ) THEN
+         Z( 1, 1 ) = ONE
+         RETURN
+      END IF
+*
+*     Get machine constants.
+*
+      EPS = SLAMCH( 'Precision' )
+*
+*     Initialize seed for random number generator SLARNV.
+*
+      DO 40 I = 1, 4
+         ISEED( I ) = 1
+   40 CONTINUE
+*
+*     Initialize pointers.
+*
+      INDRV1 = 0
+      INDRV2 = INDRV1 + N
+      INDRV3 = INDRV2 + N
+      INDRV4 = INDRV3 + N
+      INDRV5 = INDRV4 + N
+*
+*     Compute eigenvectors of matrix blocks.
+*
+      J1 = 1
+      DO 160 NBLK = 1, IBLOCK( M )
+*
+*        Find starting and ending indices of block nblk.
+*
+         IF( NBLK.EQ.1 ) THEN
+            B1 = 1
+         ELSE
+            B1 = ISPLIT( NBLK-1 ) + 1
+         END IF
+         BN = ISPLIT( NBLK )
+         BLKSIZ = BN - B1 + 1
+         IF( BLKSIZ.EQ.1 )
+     $      GO TO 60
+         GPIND = B1
+*
+*        Compute reorthogonalization criterion and stopping criterion.
+*
+         ONENRM = ABS( D( B1 ) ) + ABS( E( B1 ) )
+         ONENRM = MAX( ONENRM, ABS( D( BN ) )+ABS( E( BN-1 ) ) )
+         DO 50 I = B1 + 1, BN - 1
+            ONENRM = MAX( ONENRM, ABS( D( I ) )+ABS( E( I-1 ) )+
+     $               ABS( E( I ) ) )
+   50    CONTINUE
+         ORTOL = ODM3*ONENRM
+*
+         STPCRT = SQRT( ODM1 / BLKSIZ )
+*
+*        Loop through eigenvalues of block nblk.
+*
+   60    CONTINUE
+         JBLK = 0
+         DO 150 J = J1, M
+            IF( IBLOCK( J ).NE.NBLK ) THEN
+               J1 = J
+               GO TO 160
+            END IF
+            JBLK = JBLK + 1
+            XJ = W( J )
+*
+*           Skip all the work if the block size is one.
+*
+            IF( BLKSIZ.EQ.1 ) THEN
+               WORK( INDRV1+1 ) = ONE
+               GO TO 120
+            END IF
+*
+*           If eigenvalues j and j-1 are too close, add a relatively
+*           small perturbation.
+*
+            IF( JBLK.GT.1 ) THEN
+               EPS1 = ABS( EPS*XJ )
+               PERTOL = TEN*EPS1
+               SEP = XJ - XJM
+               IF( SEP.LT.PERTOL )
+     $            XJ = XJM + PERTOL
+            END IF
+*
+            ITS = 0
+            NRMCHK = 0
+*
+*           Get random starting vector.
+*
+            CALL SLARNV( 2, ISEED, BLKSIZ, WORK( INDRV1+1 ) )
+*
+*           Copy the matrix T so it won't be destroyed in factorization.
+*
+            CALL SCOPY( BLKSIZ, D( B1 ), 1, WORK( INDRV4+1 ), 1 )
+            CALL SCOPY( BLKSIZ-1, E( B1 ), 1, WORK( INDRV2+2 ), 1 )
+            CALL SCOPY( BLKSIZ-1, E( B1 ), 1, WORK( INDRV3+1 ), 1 )
+*
+*           Compute LU factors with partial pivoting  ( PT = LU )
+*
+            TOL = ZERO
+            CALL SLAGTF( BLKSIZ, WORK( INDRV4+1 ), XJ, WORK( INDRV2+2 ),
+     $                   WORK( INDRV3+1 ), TOL, WORK( INDRV5+1 ), IWORK,
+     $                   IINFO )
+*
+*           Update iteration count.
+*
+   70       CONTINUE
+            ITS = ITS + 1
+            IF( ITS.GT.MAXITS )
+     $         GO TO 100
+*
+*           Normalize and scale the righthand side vector Pb.
+*
+            SCL = BLKSIZ*ONENRM*MAX( EPS,
+     $            ABS( WORK( INDRV4+BLKSIZ ) ) ) /
+     $            SASUM( BLKSIZ, WORK( INDRV1+1 ), 1 )
+            CALL SSCAL( BLKSIZ, SCL, WORK( INDRV1+1 ), 1 )
+*
+*           Solve the system LU = Pb.
+*
+            CALL SLAGTS( -1, BLKSIZ, WORK( INDRV4+1 ), WORK( INDRV2+2 ),
+     $                   WORK( INDRV3+1 ), WORK( INDRV5+1 ), IWORK,
+     $                   WORK( INDRV1+1 ), TOL, IINFO )
+*
+*           Reorthogonalize by modified Gram-Schmidt if eigenvalues are
+*           close enough.
+*
+            IF( JBLK.EQ.1 )
+     $         GO TO 90
+            IF( ABS( XJ-XJM ).GT.ORTOL )
+     $         GPIND = J
+            IF( GPIND.NE.J ) THEN
+               DO 80 I = GPIND, J - 1
+                  CTR = -SDOT( BLKSIZ, WORK( INDRV1+1 ), 1, Z( B1, I ),
+     $                  1 )
+                  CALL SAXPY( BLKSIZ, CTR, Z( B1, I ), 1,
+     $                        WORK( INDRV1+1 ), 1 )
+   80          CONTINUE
+            END IF
+*
+*           Check the infinity norm of the iterate.
+*
+   90       CONTINUE
+            JMAX = ISAMAX( BLKSIZ, WORK( INDRV1+1 ), 1 )
+            NRM = ABS( WORK( INDRV1+JMAX ) )
+*
+*           Continue for additional iterations after norm reaches
+*           stopping criterion.
+*
+            IF( NRM.LT.STPCRT )
+     $         GO TO 70
+            NRMCHK = NRMCHK + 1
+            IF( NRMCHK.LT.EXTRA+1 )
+     $         GO TO 70
+*
+            GO TO 110
+*
+*           If stopping criterion was not satisfied, update info and
+*           store eigenvector number in array ifail.
+*
+  100       CONTINUE
+            INFO = INFO + 1
+            IFAIL( INFO ) = J
+*
+*           Accept iterate as jth eigenvector.
+*
+  110       CONTINUE
+            SCL = ONE / SNRM2( BLKSIZ, WORK( INDRV1+1 ), 1 )
+            JMAX = ISAMAX( BLKSIZ, WORK( INDRV1+1 ), 1 )
+            IF( WORK( INDRV1+JMAX ).LT.ZERO )
+     $         SCL = -SCL
+            CALL SSCAL( BLKSIZ, SCL, WORK( INDRV1+1 ), 1 )
+  120       CONTINUE
+            DO 130 I = 1, N
+               Z( I, J ) = ZERO
+  130       CONTINUE
+            DO 140 I = 1, BLKSIZ
+               Z( B1+I-1, J ) = WORK( INDRV1+I )
+  140       CONTINUE
+*
+*           Save the shift to check eigenvalue spacing at next
+*           iteration.
+*
+            XJM = XJ
+*
+  150    CONTINUE
+  160 CONTINUE
+*
+      RETURN
+*
+*     End of SSTEIN
+*
+      END
diff --git a/costa/native/external/lapack/ssteqr.f b/costa/native/external/lapack/ssteqr.f
new file mode 100644
index 000000000..c6bb03ac4
--- /dev/null
+++ b/costa/native/external/lapack/ssteqr.f
@@ -0,0 +1,501 @@
+      SUBROUTINE SSTEQR( COMPZ, N, D, E, Z, LDZ, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          COMPZ
+      INTEGER            INFO, LDZ, N
+*     ..
+*     .. Array Arguments ..
+      REAL               D( * ), E( * ), WORK( * ), Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SSTEQR computes all eigenvalues and, optionally, eigenvectors of a
+*  symmetric tridiagonal matrix using the implicit QL or QR method.
+*  The eigenvectors of a full or band symmetric matrix can also be found
+*  if SSYTRD or SSPTRD or SSBTRD has been used to reduce this matrix to
+*  tridiagonal form.
+*
+*  Arguments
+*  =========
+*
+*  COMPZ   (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only.
+*          = 'V':  Compute eigenvalues and eigenvectors of the original
+*                  symmetric matrix.  On entry, Z must contain the
+*                  orthogonal matrix used to reduce the original matrix
+*                  to tridiagonal form.
+*          = 'I':  Compute eigenvalues and eigenvectors of the
+*                  tridiagonal matrix.  Z is initialized to the identity
+*                  matrix.
+*
+*  N       (input) INTEGER
+*          The order of the matrix.  N >= 0.
+*
+*  D       (input/output) REAL array, dimension (N)
+*          On entry, the diagonal elements of the tridiagonal matrix.
+*          On exit, if INFO = 0, the eigenvalues in ascending order.
+*
+*  E       (input/output) REAL array, dimension (N-1)
+*          On entry, the (n-1) subdiagonal elements of the tridiagonal
+*          matrix.
+*          On exit, E has been destroyed.
+*
+*  Z       (input/output) REAL array, dimension (LDZ, N)
+*          On entry, if  COMPZ = 'V', then Z contains the orthogonal
+*          matrix used in the reduction to tridiagonal form.
+*          On exit, if INFO = 0, then if  COMPZ = 'V', Z contains the
+*          orthonormal eigenvectors of the original symmetric matrix,
+*          and if COMPZ = 'I', Z contains the orthonormal eigenvectors
+*          of the symmetric tridiagonal matrix.
+*          If COMPZ = 'N', then Z is not referenced.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= 1, and if
+*          eigenvectors are desired, then  LDZ >= max(1,N).
+*
+*  WORK    (workspace) REAL array, dimension (max(1,2*N-2))
+*          If COMPZ = 'N', then WORK is not referenced.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  the algorithm has failed to find all the eigenvalues in
+*                a total of 30*N iterations; if INFO = i, then i
+*                elements of E have not converged to zero; on exit, D
+*                and E contain the elements of a symmetric tridiagonal
+*                matrix which is orthogonally similar to the original
+*                matrix.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE, TWO, THREE
+      PARAMETER          ( ZERO = 0.0E0, ONE = 1.0E0, TWO = 2.0E0,
+     $                   THREE = 3.0E0 )
+      INTEGER            MAXIT
+      PARAMETER          ( MAXIT = 30 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, ICOMPZ, II, ISCALE, J, JTOT, K, L, L1, LEND,
+     $                   LENDM1, LENDP1, LENDSV, LM1, LSV, M, MM, MM1,
+     $                   NM1, NMAXIT
+      REAL               ANORM, B, C, EPS, EPS2, F, G, P, R, RT1, RT2,
+     $                   S, SAFMAX, SAFMIN, SSFMAX, SSFMIN, TST
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               SLAMCH, SLANST, SLAPY2
+      EXTERNAL           LSAME, SLAMCH, SLANST, SLAPY2
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLAE2, SLAEV2, SLARTG, SLASCL, SLASET, SLASR,
+     $                   SLASRT, SSWAP, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, SIGN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+*
+      IF( LSAME( COMPZ, 'N' ) ) THEN
+         ICOMPZ = 0
+      ELSE IF( LSAME( COMPZ, 'V' ) ) THEN
+         ICOMPZ = 1
+      ELSE IF( LSAME( COMPZ, 'I' ) ) THEN
+         ICOMPZ = 2
+      ELSE
+         ICOMPZ = -1
+      END IF
+      IF( ICOMPZ.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( ( LDZ.LT.1 ) .OR. ( ICOMPZ.GT.0 .AND. LDZ.LT.MAX( 1,
+     $         N ) ) ) THEN
+         INFO = -6
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SSTEQR', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      IF( N.EQ.1 ) THEN
+         IF( ICOMPZ.EQ.2 )
+     $      Z( 1, 1 ) = ONE
+         RETURN
+      END IF
+*
+*     Determine the unit roundoff and over/underflow thresholds.
+*
+      EPS = SLAMCH( 'E' )
+      EPS2 = EPS**2
+      SAFMIN = SLAMCH( 'S' )
+      SAFMAX = ONE / SAFMIN
+      SSFMAX = SQRT( SAFMAX ) / THREE
+      SSFMIN = SQRT( SAFMIN ) / EPS2
+*
+*     Compute the eigenvalues and eigenvectors of the tridiagonal
+*     matrix.
+*
+      IF( ICOMPZ.EQ.2 )
+     $   CALL SLASET( 'Full', N, N, ZERO, ONE, Z, LDZ )
+*
+      NMAXIT = N*MAXIT
+      JTOT = 0
+*
+*     Determine where the matrix splits and choose QL or QR iteration
+*     for each block, according to whether top or bottom diagonal
+*     element is smaller.
+*
+      L1 = 1
+      NM1 = N - 1
+*
+   10 CONTINUE
+      IF( L1.GT.N )
+     $   GO TO 160
+      IF( L1.GT.1 )
+     $   E( L1-1 ) = ZERO
+      IF( L1.LE.NM1 ) THEN
+         DO 20 M = L1, NM1
+            TST = ABS( E( M ) )
+            IF( TST.EQ.ZERO )
+     $         GO TO 30
+            IF( TST.LE.( SQRT( ABS( D( M ) ) )*SQRT( ABS( D( M+
+     $          1 ) ) ) )*EPS ) THEN
+               E( M ) = ZERO
+               GO TO 30
+            END IF
+   20    CONTINUE
+      END IF
+      M = N
+*
+   30 CONTINUE
+      L = L1
+      LSV = L
+      LEND = M
+      LENDSV = LEND
+      L1 = M + 1
+      IF( LEND.EQ.L )
+     $   GO TO 10
+*
+*     Scale submatrix in rows and columns L to LEND
+*
+      ANORM = SLANST( 'I', LEND-L+1, D( L ), E( L ) )
+      ISCALE = 0
+      IF( ANORM.EQ.ZERO )
+     $   GO TO 10
+      IF( ANORM.GT.SSFMAX ) THEN
+         ISCALE = 1
+         CALL SLASCL( 'G', 0, 0, ANORM, SSFMAX, LEND-L+1, 1, D( L ), N,
+     $                INFO )
+         CALL SLASCL( 'G', 0, 0, ANORM, SSFMAX, LEND-L, 1, E( L ), N,
+     $                INFO )
+      ELSE IF( ANORM.LT.SSFMIN ) THEN
+         ISCALE = 2
+         CALL SLASCL( 'G', 0, 0, ANORM, SSFMIN, LEND-L+1, 1, D( L ), N,
+     $                INFO )
+         CALL SLASCL( 'G', 0, 0, ANORM, SSFMIN, LEND-L, 1, E( L ), N,
+     $                INFO )
+      END IF
+*
+*     Choose between QL and QR iteration
+*
+      IF( ABS( D( LEND ) ).LT.ABS( D( L ) ) ) THEN
+         LEND = LSV
+         L = LENDSV
+      END IF
+*
+      IF( LEND.GT.L ) THEN
+*
+*        QL Iteration
+*
+*        Look for small subdiagonal element.
+*
+   40    CONTINUE
+         IF( L.NE.LEND ) THEN
+            LENDM1 = LEND - 1
+            DO 50 M = L, LENDM1
+               TST = ABS( E( M ) )**2
+               IF( TST.LE.( EPS2*ABS( D( M ) ) )*ABS( D( M+1 ) )+
+     $             SAFMIN )GO TO 60
+   50       CONTINUE
+         END IF
+*
+         M = LEND
+*
+   60    CONTINUE
+         IF( M.LT.LEND )
+     $      E( M ) = ZERO
+         P = D( L )
+         IF( M.EQ.L )
+     $      GO TO 80
+*
+*        If remaining matrix is 2-by-2, use SLAE2 or SLAEV2
+*        to compute its eigensystem.
+*
+         IF( M.EQ.L+1 ) THEN
+            IF( ICOMPZ.GT.0 ) THEN
+               CALL SLAEV2( D( L ), E( L ), D( L+1 ), RT1, RT2, C, S )
+               WORK( L ) = C
+               WORK( N-1+L ) = S
+               CALL SLASR( 'R', 'V', 'B', N, 2, WORK( L ),
+     $                     WORK( N-1+L ), Z( 1, L ), LDZ )
+            ELSE
+               CALL SLAE2( D( L ), E( L ), D( L+1 ), RT1, RT2 )
+            END IF
+            D( L ) = RT1
+            D( L+1 ) = RT2
+            E( L ) = ZERO
+            L = L + 2
+            IF( L.LE.LEND )
+     $         GO TO 40
+            GO TO 140
+         END IF
+*
+         IF( JTOT.EQ.NMAXIT )
+     $      GO TO 140
+         JTOT = JTOT + 1
+*
+*        Form shift.
+*
+         G = ( D( L+1 )-P ) / ( TWO*E( L ) )
+         R = SLAPY2( G, ONE )
+         G = D( M ) - P + ( E( L ) / ( G+SIGN( R, G ) ) )
+*
+         S = ONE
+         C = ONE
+         P = ZERO
+*
+*        Inner loop
+*
+         MM1 = M - 1
+         DO 70 I = MM1, L, -1
+            F = S*E( I )
+            B = C*E( I )
+            CALL SLARTG( G, F, C, S, R )
+            IF( I.NE.M-1 )
+     $         E( I+1 ) = R
+            G = D( I+1 ) - P
+            R = ( D( I )-G )*S + TWO*C*B
+            P = S*R
+            D( I+1 ) = G + P
+            G = C*R - B
+*
+*           If eigenvectors are desired, then save rotations.
+*
+            IF( ICOMPZ.GT.0 ) THEN
+               WORK( I ) = C
+               WORK( N-1+I ) = -S
+            END IF
+*
+   70    CONTINUE
+*
+*        If eigenvectors are desired, then apply saved rotations.
+*
+         IF( ICOMPZ.GT.0 ) THEN
+            MM = M - L + 1
+            CALL SLASR( 'R', 'V', 'B', N, MM, WORK( L ), WORK( N-1+L ),
+     $                  Z( 1, L ), LDZ )
+         END IF
+*
+         D( L ) = D( L ) - P
+         E( L ) = G
+         GO TO 40
+*
+*        Eigenvalue found.
+*
+   80    CONTINUE
+         D( L ) = P
+*
+         L = L + 1
+         IF( L.LE.LEND )
+     $      GO TO 40
+         GO TO 140
+*
+      ELSE
+*
+*        QR Iteration
+*
+*        Look for small superdiagonal element.
+*
+   90    CONTINUE
+         IF( L.NE.LEND ) THEN
+            LENDP1 = LEND + 1
+            DO 100 M = L, LENDP1, -1
+               TST = ABS( E( M-1 ) )**2
+               IF( TST.LE.( EPS2*ABS( D( M ) ) )*ABS( D( M-1 ) )+
+     $             SAFMIN )GO TO 110
+  100       CONTINUE
+         END IF
+*
+         M = LEND
+*
+  110    CONTINUE
+         IF( M.GT.LEND )
+     $      E( M-1 ) = ZERO
+         P = D( L )
+         IF( M.EQ.L )
+     $      GO TO 130
+*
+*        If remaining matrix is 2-by-2, use SLAE2 or SLAEV2
+*        to compute its eigensystem.
+*
+         IF( M.EQ.L-1 ) THEN
+            IF( ICOMPZ.GT.0 ) THEN
+               CALL SLAEV2( D( L-1 ), E( L-1 ), D( L ), RT1, RT2, C, S )
+               WORK( M ) = C
+               WORK( N-1+M ) = S
+               CALL SLASR( 'R', 'V', 'F', N, 2, WORK( M ),
+     $                     WORK( N-1+M ), Z( 1, L-1 ), LDZ )
+            ELSE
+               CALL SLAE2( D( L-1 ), E( L-1 ), D( L ), RT1, RT2 )
+            END IF
+            D( L-1 ) = RT1
+            D( L ) = RT2
+            E( L-1 ) = ZERO
+            L = L - 2
+            IF( L.GE.LEND )
+     $         GO TO 90
+            GO TO 140
+         END IF
+*
+         IF( JTOT.EQ.NMAXIT )
+     $      GO TO 140
+         JTOT = JTOT + 1
+*
+*        Form shift.
+*
+         G = ( D( L-1 )-P ) / ( TWO*E( L-1 ) )
+         R = SLAPY2( G, ONE )
+         G = D( M ) - P + ( E( L-1 ) / ( G+SIGN( R, G ) ) )
+*
+         S = ONE
+         C = ONE
+         P = ZERO
+*
+*        Inner loop
+*
+         LM1 = L - 1
+         DO 120 I = M, LM1
+            F = S*E( I )
+            B = C*E( I )
+            CALL SLARTG( G, F, C, S, R )
+            IF( I.NE.M )
+     $         E( I-1 ) = R
+            G = D( I ) - P
+            R = ( D( I+1 )-G )*S + TWO*C*B
+            P = S*R
+            D( I ) = G + P
+            G = C*R - B
+*
+*           If eigenvectors are desired, then save rotations.
+*
+            IF( ICOMPZ.GT.0 ) THEN
+               WORK( I ) = C
+               WORK( N-1+I ) = S
+            END IF
+*
+  120    CONTINUE
+*
+*        If eigenvectors are desired, then apply saved rotations.
+*
+         IF( ICOMPZ.GT.0 ) THEN
+            MM = L - M + 1
+            CALL SLASR( 'R', 'V', 'F', N, MM, WORK( M ), WORK( N-1+M ),
+     $                  Z( 1, M ), LDZ )
+         END IF
+*
+         D( L ) = D( L ) - P
+         E( LM1 ) = G
+         GO TO 90
+*
+*        Eigenvalue found.
+*
+  130    CONTINUE
+         D( L ) = P
+*
+         L = L - 1
+         IF( L.GE.LEND )
+     $      GO TO 90
+         GO TO 140
+*
+      END IF
+*
+*     Undo scaling if necessary
+*
+  140 CONTINUE
+      IF( ISCALE.EQ.1 ) THEN
+         CALL SLASCL( 'G', 0, 0, SSFMAX, ANORM, LENDSV-LSV+1, 1,
+     $                D( LSV ), N, INFO )
+         CALL SLASCL( 'G', 0, 0, SSFMAX, ANORM, LENDSV-LSV, 1, E( LSV ),
+     $                N, INFO )
+      ELSE IF( ISCALE.EQ.2 ) THEN
+         CALL SLASCL( 'G', 0, 0, SSFMIN, ANORM, LENDSV-LSV+1, 1,
+     $                D( LSV ), N, INFO )
+         CALL SLASCL( 'G', 0, 0, SSFMIN, ANORM, LENDSV-LSV, 1, E( LSV ),
+     $                N, INFO )
+      END IF
+*
+*     Check for no convergence to an eigenvalue after a total
+*     of N*MAXIT iterations.
+*
+      IF( JTOT.LT.NMAXIT )
+     $   GO TO 10
+      DO 150 I = 1, N - 1
+         IF( E( I ).NE.ZERO )
+     $      INFO = INFO + 1
+  150 CONTINUE
+      GO TO 190
+*
+*     Order eigenvalues and eigenvectors.
+*
+  160 CONTINUE
+      IF( ICOMPZ.EQ.0 ) THEN
+*
+*        Use Quick Sort
+*
+         CALL SLASRT( 'I', N, D, INFO )
+*
+      ELSE
+*
+*        Use Selection Sort to minimize swaps of eigenvectors
+*
+         DO 180 II = 2, N
+            I = II - 1
+            K = I
+            P = D( I )
+            DO 170 J = II, N
+               IF( D( J ).LT.P ) THEN
+                  K = J
+                  P = D( J )
+               END IF
+  170       CONTINUE
+            IF( K.NE.I ) THEN
+               D( K ) = D( I )
+               D( I ) = P
+               CALL SSWAP( N, Z( 1, I ), 1, Z( 1, K ), 1 )
+            END IF
+  180    CONTINUE
+      END IF
+*
+  190 CONTINUE
+      RETURN
+*
+*     End of SSTEQR
+*
+      END
diff --git a/costa/native/external/lapack/ssterf.f b/costa/native/external/lapack/ssterf.f
new file mode 100644
index 000000000..439c81e68
--- /dev/null
+++ b/costa/native/external/lapack/ssterf.f
@@ -0,0 +1,365 @@
+      SUBROUTINE SSTERF( N, D, E, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, N
+*     ..
+*     .. Array Arguments ..
+      REAL               D( * ), E( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SSTERF computes all eigenvalues of a symmetric tridiagonal matrix
+*  using the Pal-Walker-Kahan variant of the QL or QR algorithm.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The order of the matrix.  N >= 0.
+*
+*  D       (input/output) REAL array, dimension (N)
+*          On entry, the n diagonal elements of the tridiagonal matrix.
+*          On exit, if INFO = 0, the eigenvalues in ascending order.
+*
+*  E       (input/output) REAL array, dimension (N-1)
+*          On entry, the (n-1) subdiagonal elements of the tridiagonal
+*          matrix.
+*          On exit, E has been destroyed.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  the algorithm failed to find all of the eigenvalues in
+*                a total of 30*N iterations; if INFO = i, then i
+*                elements of E have not converged to zero.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE, TWO, THREE
+      PARAMETER          ( ZERO = 0.0E0, ONE = 1.0E0, TWO = 2.0E0,
+     $                   THREE = 3.0E0 )
+      INTEGER            MAXIT
+      PARAMETER          ( MAXIT = 30 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, ISCALE, JTOT, L, L1, LEND, LENDSV, LSV, M,
+     $                   NMAXIT
+      REAL               ALPHA, ANORM, BB, C, EPS, EPS2, GAMMA, OLDC,
+     $                   OLDGAM, P, R, RT1, RT2, RTE, S, SAFMAX, SAFMIN,
+     $                   SIGMA, SSFMAX, SSFMIN
+*     ..
+*     .. External Functions ..
+      REAL               SLAMCH, SLANST, SLAPY2
+      EXTERNAL           SLAMCH, SLANST, SLAPY2
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLAE2, SLASCL, SLASRT, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, SIGN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+*
+*     Quick return if possible
+*
+      IF( N.LT.0 ) THEN
+         INFO = -1
+         CALL XERBLA( 'SSTERF', -INFO )
+         RETURN
+      END IF
+      IF( N.LE.1 )
+     $   RETURN
+*
+*     Determine the unit roundoff for this environment.
+*
+      EPS = SLAMCH( 'E' )
+      EPS2 = EPS**2
+      SAFMIN = SLAMCH( 'S' )
+      SAFMAX = ONE / SAFMIN
+      SSFMAX = SQRT( SAFMAX ) / THREE
+      SSFMIN = SQRT( SAFMIN ) / EPS2
+*
+*     Compute the eigenvalues of the tridiagonal matrix.
+*
+      NMAXIT = N*MAXIT
+      SIGMA = ZERO
+      JTOT = 0
+*
+*     Determine where the matrix splits and choose QL or QR iteration
+*     for each block, according to whether top or bottom diagonal
+*     element is smaller.
+*
+      L1 = 1
+*
+   10 CONTINUE
+      IF( L1.GT.N )
+     $   GO TO 170
+      IF( L1.GT.1 )
+     $   E( L1-1 ) = ZERO
+      DO 20 M = L1, N - 1
+         IF( ABS( E( M ) ).LE.( SQRT( ABS( D( M ) ) )*
+     $       SQRT( ABS( D( M+1 ) ) ) )*EPS ) THEN
+            E( M ) = ZERO
+            GO TO 30
+         END IF
+   20 CONTINUE
+      M = N
+*
+   30 CONTINUE
+      L = L1
+      LSV = L
+      LEND = M
+      LENDSV = LEND
+      L1 = M + 1
+      IF( LEND.EQ.L )
+     $   GO TO 10
+*
+*     Scale submatrix in rows and columns L to LEND
+*
+      ANORM = SLANST( 'I', LEND-L+1, D( L ), E( L ) )
+      ISCALE = 0
+      IF( ANORM.GT.SSFMAX ) THEN
+         ISCALE = 1
+         CALL SLASCL( 'G', 0, 0, ANORM, SSFMAX, LEND-L+1, 1, D( L ), N,
+     $                INFO )
+         CALL SLASCL( 'G', 0, 0, ANORM, SSFMAX, LEND-L, 1, E( L ), N,
+     $                INFO )
+      ELSE IF( ANORM.LT.SSFMIN ) THEN
+         ISCALE = 2
+         CALL SLASCL( 'G', 0, 0, ANORM, SSFMIN, LEND-L+1, 1, D( L ), N,
+     $                INFO )
+         CALL SLASCL( 'G', 0, 0, ANORM, SSFMIN, LEND-L, 1, E( L ), N,
+     $                INFO )
+      END IF
+*
+      DO 40 I = L, LEND - 1
+         E( I ) = E( I )**2
+   40 CONTINUE
+*
+*     Choose between QL and QR iteration
+*
+      IF( ABS( D( LEND ) ).LT.ABS( D( L ) ) ) THEN
+         LEND = LSV
+         L = LENDSV
+      END IF
+*
+      IF( LEND.GE.L ) THEN
+*
+*        QL Iteration
+*
+*        Look for small subdiagonal element.
+*
+   50    CONTINUE
+         IF( L.NE.LEND ) THEN
+            DO 60 M = L, LEND - 1
+               IF( ABS( E( M ) ).LE.EPS2*ABS( D( M )*D( M+1 ) ) )
+     $            GO TO 70
+   60       CONTINUE
+         END IF
+         M = LEND
+*
+   70    CONTINUE
+         IF( M.LT.LEND )
+     $      E( M ) = ZERO
+         P = D( L )
+         IF( M.EQ.L )
+     $      GO TO 90
+*
+*        If remaining matrix is 2 by 2, use SLAE2 to compute its
+*        eigenvalues.
+*
+         IF( M.EQ.L+1 ) THEN
+            RTE = SQRT( E( L ) )
+            CALL SLAE2( D( L ), RTE, D( L+1 ), RT1, RT2 )
+            D( L ) = RT1
+            D( L+1 ) = RT2
+            E( L ) = ZERO
+            L = L + 2
+            IF( L.LE.LEND )
+     $         GO TO 50
+            GO TO 150
+         END IF
+*
+         IF( JTOT.EQ.NMAXIT )
+     $      GO TO 150
+         JTOT = JTOT + 1
+*
+*        Form shift.
+*
+         RTE = SQRT( E( L ) )
+         SIGMA = ( D( L+1 )-P ) / ( TWO*RTE )
+         R = SLAPY2( SIGMA, ONE )
+         SIGMA = P - ( RTE / ( SIGMA+SIGN( R, SIGMA ) ) )
+*
+         C = ONE
+         S = ZERO
+         GAMMA = D( M ) - SIGMA
+         P = GAMMA*GAMMA
+*
+*        Inner loop
+*
+         DO 80 I = M - 1, L, -1
+            BB = E( I )
+            R = P + BB
+            IF( I.NE.M-1 )
+     $         E( I+1 ) = S*R
+            OLDC = C
+            C = P / R
+            S = BB / R
+            OLDGAM = GAMMA
+            ALPHA = D( I )
+            GAMMA = C*( ALPHA-SIGMA ) - S*OLDGAM
+            D( I+1 ) = OLDGAM + ( ALPHA-GAMMA )
+            IF( C.NE.ZERO ) THEN
+               P = ( GAMMA*GAMMA ) / C
+            ELSE
+               P = OLDC*BB
+            END IF
+   80    CONTINUE
+*
+         E( L ) = S*P
+         D( L ) = SIGMA + GAMMA
+         GO TO 50
+*
+*        Eigenvalue found.
+*
+   90    CONTINUE
+         D( L ) = P
+*
+         L = L + 1
+         IF( L.LE.LEND )
+     $      GO TO 50
+         GO TO 150
+*
+      ELSE
+*
+*        QR Iteration
+*
+*        Look for small superdiagonal element.
+*
+  100    CONTINUE
+         DO 110 M = L, LEND + 1, -1
+            IF( ABS( E( M-1 ) ).LE.EPS2*ABS( D( M )*D( M-1 ) ) )
+     $         GO TO 120
+  110    CONTINUE
+         M = LEND
+*
+  120    CONTINUE
+         IF( M.GT.LEND )
+     $      E( M-1 ) = ZERO
+         P = D( L )
+         IF( M.EQ.L )
+     $      GO TO 140
+*
+*        If remaining matrix is 2 by 2, use SLAE2 to compute its
+*        eigenvalues.
+*
+         IF( M.EQ.L-1 ) THEN
+            RTE = SQRT( E( L-1 ) )
+            CALL SLAE2( D( L ), RTE, D( L-1 ), RT1, RT2 )
+            D( L ) = RT1
+            D( L-1 ) = RT2
+            E( L-1 ) = ZERO
+            L = L - 2
+            IF( L.GE.LEND )
+     $         GO TO 100
+            GO TO 150
+         END IF
+*
+         IF( JTOT.EQ.NMAXIT )
+     $      GO TO 150
+         JTOT = JTOT + 1
+*
+*        Form shift.
+*
+         RTE = SQRT( E( L-1 ) )
+         SIGMA = ( D( L-1 )-P ) / ( TWO*RTE )
+         R = SLAPY2( SIGMA, ONE )
+         SIGMA = P - ( RTE / ( SIGMA+SIGN( R, SIGMA ) ) )
+*
+         C = ONE
+         S = ZERO
+         GAMMA = D( M ) - SIGMA
+         P = GAMMA*GAMMA
+*
+*        Inner loop
+*
+         DO 130 I = M, L - 1
+            BB = E( I )
+            R = P + BB
+            IF( I.NE.M )
+     $         E( I-1 ) = S*R
+            OLDC = C
+            C = P / R
+            S = BB / R
+            OLDGAM = GAMMA
+            ALPHA = D( I+1 )
+            GAMMA = C*( ALPHA-SIGMA ) - S*OLDGAM
+            D( I ) = OLDGAM + ( ALPHA-GAMMA )
+            IF( C.NE.ZERO ) THEN
+               P = ( GAMMA*GAMMA ) / C
+            ELSE
+               P = OLDC*BB
+            END IF
+  130    CONTINUE
+*
+         E( L-1 ) = S*P
+         D( L ) = SIGMA + GAMMA
+         GO TO 100
+*
+*        Eigenvalue found.
+*
+  140    CONTINUE
+         D( L ) = P
+*
+         L = L - 1
+         IF( L.GE.LEND )
+     $      GO TO 100
+         GO TO 150
+*
+      END IF
+*
+*     Undo scaling if necessary
+*
+  150 CONTINUE
+      IF( ISCALE.EQ.1 )
+     $   CALL SLASCL( 'G', 0, 0, SSFMAX, ANORM, LENDSV-LSV+1, 1,
+     $                D( LSV ), N, INFO )
+      IF( ISCALE.EQ.2 )
+     $   CALL SLASCL( 'G', 0, 0, SSFMIN, ANORM, LENDSV-LSV+1, 1,
+     $                D( LSV ), N, INFO )
+*
+*     Check for no convergence to an eigenvalue after a total
+*     of N*MAXIT iterations.
+*
+      IF( JTOT.LT.NMAXIT )
+     $   GO TO 10
+      DO 160 I = 1, N - 1
+         IF( E( I ).NE.ZERO )
+     $      INFO = INFO + 1
+  160 CONTINUE
+      GO TO 180
+*
+*     Sort eigenvalues in increasing order.
+*
+  170 CONTINUE
+      CALL SLASRT( 'I', N, D, INFO )
+*
+  180 CONTINUE
+      RETURN
+*
+*     End of SSTERF
+*
+      END
diff --git a/costa/native/external/lapack/sstev.f b/costa/native/external/lapack/sstev.f
new file mode 100644
index 000000000..c53d2596b
--- /dev/null
+++ b/costa/native/external/lapack/sstev.f
@@ -0,0 +1,165 @@
+      SUBROUTINE SSTEV( JOBZ, N, D, E, Z, LDZ, WORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBZ
+      INTEGER            INFO, LDZ, N
+*     ..
+*     .. Array Arguments ..
+      REAL               D( * ), E( * ), WORK( * ), Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SSTEV computes all eigenvalues and, optionally, eigenvectors of a
+*  real symmetric tridiagonal matrix A.
+*
+*  Arguments
+*  =========
+*
+*  JOBZ    (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only;
+*          = 'V':  Compute eigenvalues and eigenvectors.
+*
+*  N       (input) INTEGER
+*          The order of the matrix.  N >= 0.
+*
+*  D       (input/output) REAL array, dimension (N)
+*          On entry, the n diagonal elements of the tridiagonal matrix
+*          A.
+*          On exit, if INFO = 0, the eigenvalues in ascending order.
+*
+*  E       (input/output) REAL array, dimension (N)
+*          On entry, the (n-1) subdiagonal elements of the tridiagonal
+*          matrix A, stored in elements 1 to N-1 of E; E(N) need not
+*          be set, but is used by the routine.
+*          On exit, the contents of E are destroyed.
+*
+*  Z       (output) REAL array, dimension (LDZ, N)
+*          If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal
+*          eigenvectors of the matrix A, with the i-th column of Z
+*          holding the eigenvector associated with D(i).
+*          If JOBZ = 'N', then Z is not referenced.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= 1, and if
+*          JOBZ = 'V', LDZ >= max(1,N).
+*
+*  WORK    (workspace) REAL array, dimension (max(1,2*N-2))
+*          If JOBZ = 'N', WORK is not referenced.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, the algorithm failed to converge; i
+*                off-diagonal elements of E did not converge to zero.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E0, ONE = 1.0E0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            WANTZ
+      INTEGER            IMAX, ISCALE
+      REAL               BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, SMLNUM,
+     $                   TNRM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               SLAMCH, SLANST
+      EXTERNAL           LSAME, SLAMCH, SLANST
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SSCAL, SSTEQR, SSTERF, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      WANTZ = LSAME( JOBZ, 'V' )
+*
+      INFO = 0
+      IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN
+         INFO = -6
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SSTEV ', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      IF( N.EQ.1 ) THEN
+         IF( WANTZ )
+     $      Z( 1, 1 ) = ONE
+         RETURN
+      END IF
+*
+*     Get machine constants.
+*
+      SAFMIN = SLAMCH( 'Safe minimum' )
+      EPS = SLAMCH( 'Precision' )
+      SMLNUM = SAFMIN / EPS
+      BIGNUM = ONE / SMLNUM
+      RMIN = SQRT( SMLNUM )
+      RMAX = SQRT( BIGNUM )
+*
+*     Scale matrix to allowable range, if necessary.
+*
+      ISCALE = 0
+      TNRM = SLANST( 'M', N, D, E )
+      IF( TNRM.GT.ZERO .AND. TNRM.LT.RMIN ) THEN
+         ISCALE = 1
+         SIGMA = RMIN / TNRM
+      ELSE IF( TNRM.GT.RMAX ) THEN
+         ISCALE = 1
+         SIGMA = RMAX / TNRM
+      END IF
+      IF( ISCALE.EQ.1 ) THEN
+         CALL SSCAL( N, SIGMA, D, 1 )
+         CALL SSCAL( N-1, SIGMA, E( 1 ), 1 )
+      END IF
+*
+*     For eigenvalues only, call SSTERF.  For eigenvalues and
+*     eigenvectors, call SSTEQR.
+*
+      IF( .NOT.WANTZ ) THEN
+         CALL SSTERF( N, D, E, INFO )
+      ELSE
+         CALL SSTEQR( 'I', N, D, E, Z, LDZ, WORK, INFO )
+      END IF
+*
+*     If matrix was scaled, then rescale eigenvalues appropriately.
+*
+      IF( ISCALE.EQ.1 ) THEN
+         IF( INFO.EQ.0 ) THEN
+            IMAX = N
+         ELSE
+            IMAX = INFO - 1
+         END IF
+         CALL SSCAL( IMAX, ONE / SIGMA, D, 1 )
+      END IF
+*
+      RETURN
+*
+*     End of SSTEV
+*
+      END
diff --git a/costa/native/external/lapack/sstevd.f b/costa/native/external/lapack/sstevd.f
new file mode 100644
index 000000000..49530a1f0
--- /dev/null
+++ b/costa/native/external/lapack/sstevd.f
@@ -0,0 +1,217 @@
+      SUBROUTINE SSTEVD( JOBZ, N, D, E, Z, LDZ, WORK, LWORK, IWORK,
+     $                   LIWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBZ
+      INTEGER            INFO, LDZ, LIWORK, LWORK, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      REAL               D( * ), E( * ), WORK( * ), Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SSTEVD computes all eigenvalues and, optionally, eigenvectors of a
+*  real symmetric tridiagonal matrix. If eigenvectors are desired, it
+*  uses a divide and conquer algorithm.
+*
+*  The divide and conquer algorithm makes very mild assumptions about
+*  floating point arithmetic. It will work on machines with a guard
+*  digit in add/subtract, or on those binary machines without guard
+*  digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
+*  Cray-2. It could conceivably fail on hexadecimal or decimal machines
+*  without guard digits, but we know of none.
+*
+*  Arguments
+*  =========
+*
+*  JOBZ    (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only;
+*          = 'V':  Compute eigenvalues and eigenvectors.
+*
+*  N       (input) INTEGER
+*          The order of the matrix.  N >= 0.
+*
+*  D       (input/output) REAL array, dimension (N)
+*          On entry, the n diagonal elements of the tridiagonal matrix
+*          A.
+*          On exit, if INFO = 0, the eigenvalues in ascending order.
+*
+*  E       (input/output) REAL array, dimension (N)
+*          On entry, the (n-1) subdiagonal elements of the tridiagonal
+*          matrix A, stored in elements 1 to N-1 of E; E(N) need not
+*          be set, but is used by the routine.
+*          On exit, the contents of E are destroyed.
+*
+*  Z       (output) REAL array, dimension (LDZ, N)
+*          If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal
+*          eigenvectors of the matrix A, with the i-th column of Z
+*          holding the eigenvector associated with D(i).
+*          If JOBZ = 'N', then Z is not referenced.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= 1, and if
+*          JOBZ = 'V', LDZ >= max(1,N).
+*
+*  WORK    (workspace/output) REAL array,
+*                                         dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.
+*          If JOBZ  = 'N' or N <= 1 then LWORK must be at least 1.
+*          If JOBZ  = 'V' and N > 1 then LWORK must be at least
+*                         ( 1 + 4*N + N**2 ).
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  IWORK   (workspace/output) INTEGER array, dimension (LIWORK)
+*          On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK.
+*
+*  LIWORK  (input) INTEGER
+*          The dimension of the array IWORK.
+*          If JOBZ  = 'N' or N <= 1 then LIWORK must be at least 1.
+*          If JOBZ  = 'V' and N > 1 then LIWORK must be at least 3+5*N.
+*
+*          If LIWORK = -1, then a workspace query is assumed; the
+*          routine only calculates the optimal size of the IWORK array,
+*          returns this value as the first entry of the IWORK array, and
+*          no error message related to LIWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, the algorithm failed to converge; i
+*                off-diagonal elements of E did not converge to zero.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E0, ONE = 1.0E0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY, WANTZ
+      INTEGER            ISCALE, LIWMIN, LWMIN
+      REAL               BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, SMLNUM,
+     $                   TNRM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               SLAMCH, SLANST
+      EXTERNAL           LSAME, SLAMCH, SLANST
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SSCAL, SSTEDC, SSTERF, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      WANTZ = LSAME( JOBZ, 'V' )
+      LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 )
+*
+      INFO = 0
+      LIWMIN = 1
+      LWMIN = 1
+      IF( N.GT.1 .AND. WANTZ ) THEN
+         LWMIN = 1 + 4*N + N**2
+         LIWMIN = 3 + 5*N
+      END IF
+*
+      IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN
+         INFO = -6
+      ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -8
+      ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -10
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         WORK( 1 ) = LWMIN
+         IWORK( 1 ) = LIWMIN
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SSTEVD', -INFO )
+         RETURN 
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN 
+*
+      IF( N.EQ.1 ) THEN
+         IF( WANTZ )
+     $      Z( 1, 1 ) = ONE
+         RETURN 
+      END IF
+*
+*     Get machine constants.
+*
+      SAFMIN = SLAMCH( 'Safe minimum' )
+      EPS = SLAMCH( 'Precision' )
+      SMLNUM = SAFMIN / EPS
+      BIGNUM = ONE / SMLNUM
+      RMIN = SQRT( SMLNUM )
+      RMAX = SQRT( BIGNUM )
+*
+*     Scale matrix to allowable range, if necessary.
+*
+      ISCALE = 0
+      TNRM = SLANST( 'M', N, D, E )
+      IF( TNRM.GT.ZERO .AND. TNRM.LT.RMIN ) THEN
+         ISCALE = 1
+         SIGMA = RMIN / TNRM
+      ELSE IF( TNRM.GT.RMAX ) THEN
+         ISCALE = 1
+         SIGMA = RMAX / TNRM
+      END IF
+      IF( ISCALE.EQ.1 ) THEN
+         CALL SSCAL( N, SIGMA, D, 1 )
+         CALL SSCAL( N-1, SIGMA, E( 1 ), 1 )
+      END IF
+*
+*     For eigenvalues only, call SSTERF.  For eigenvalues and
+*     eigenvectors, call SSTEDC.
+*
+      IF( .NOT.WANTZ ) THEN
+         CALL SSTERF( N, D, E, INFO )
+      ELSE
+         CALL SSTEDC( 'I', N, D, E, Z, LDZ, WORK, LWORK, IWORK, LIWORK,
+     $                INFO )
+      END IF
+*
+*     If matrix was scaled, then rescale eigenvalues appropriately.
+*
+      IF( ISCALE.EQ.1 )
+     $   CALL SSCAL( N, ONE / SIGMA, D, 1 )
+*
+      WORK( 1 ) = LWMIN
+      IWORK( 1 ) = LIWMIN
+*
+      RETURN
+*
+*     End of SSTEVD
+*
+      END
diff --git a/costa/native/external/lapack/sstevr.f b/costa/native/external/lapack/sstevr.f
new file mode 100644
index 000000000..047b418f9
--- /dev/null
+++ b/costa/native/external/lapack/sstevr.f
@@ -0,0 +1,434 @@
+      SUBROUTINE SSTEVR( JOBZ, RANGE, N, D, E, VL, VU, IL, IU, ABSTOL,
+     $                   M, W, Z, LDZ, ISUPPZ, WORK, LWORK, IWORK,
+     $                   LIWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 20, 2000
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBZ, RANGE
+      INTEGER            IL, INFO, IU, LDZ, LIWORK, LWORK, M, N
+      REAL               ABSTOL, VL, VU
+*     ..
+*     .. Array Arguments ..
+      INTEGER            ISUPPZ( * ), IWORK( * )
+      REAL               D( * ), E( * ), W( * ), WORK( * ), Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SSTEVR computes selected eigenvalues and, optionally, eigenvectors
+*  of a real symmetric tridiagonal matrix T.  Eigenvalues and
+*  eigenvectors can be selected by specifying either a range of values
+*  or a range of indices for the desired eigenvalues.
+*
+*  Whenever possible, SSTEVR calls SSTEGR to compute the
+*  eigenspectrum using Relatively Robust Representations.  SSTEGR
+*  computes eigenvalues by the dqds algorithm, while orthogonal
+*  eigenvectors are computed from various "good" L D L^T representations
+*  (also known as Relatively Robust Representations). Gram-Schmidt
+*  orthogonalization is avoided as far as possible. More specifically,
+*  the various steps of the algorithm are as follows. For the i-th
+*  unreduced block of T,
+*     (a) Compute T - sigma_i = L_i D_i L_i^T, such that L_i D_i L_i^T
+*          is a relatively robust representation,
+*     (b) Compute the eigenvalues, lambda_j, of L_i D_i L_i^T to high
+*         relative accuracy by the dqds algorithm,
+*     (c) If there is a cluster of close eigenvalues, "choose" sigma_i
+*         close to the cluster, and go to step (a),
+*     (d) Given the approximate eigenvalue lambda_j of L_i D_i L_i^T,
+*         compute the corresponding eigenvector by forming a
+*         rank-revealing twisted factorization.
+*  The desired accuracy of the output can be specified by the input
+*  parameter ABSTOL.
+*
+*  For more details, see "A new O(n^2) algorithm for the symmetric
+*  tridiagonal eigenvalue/eigenvector problem", by Inderjit Dhillon,
+*  Computer Science Division Technical Report No. UCB//CSD-97-971,
+*  UC Berkeley, May 1997.
+*
+*
+*  Note 1 : SSTEVR calls SSTEGR when the full spectrum is requested
+*  on machines which conform to the ieee-754 floating point standard.
+*  SSTEVR calls SSTEBZ and SSTEIN on non-ieee machines and
+*  when partial spectrum requests are made.
+*
+*  Normal execution of SSTEGR may create NaNs and infinities and
+*  hence may abort due to a floating point exception in environments
+*  which do not handle NaNs and infinities in the ieee standard default
+*  manner.
+*
+*  Arguments
+*  =========
+*
+*  JOBZ    (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only;
+*          = 'V':  Compute eigenvalues and eigenvectors.
+*
+*  RANGE   (input) CHARACTER*1
+*          = 'A': all eigenvalues will be found.
+*          = 'V': all eigenvalues in the half-open interval (VL,VU]
+*                 will be found.
+*          = 'I': the IL-th through IU-th eigenvalues will be found.
+********** For RANGE = 'V' or 'I' and IU - IL < N - 1, SSTEBZ and
+********** SSTEIN are called
+*
+*  N       (input) INTEGER
+*          The order of the matrix.  N >= 0.
+*
+*  D       (input/output) REAL array, dimension (N)
+*          On entry, the n diagonal elements of the tridiagonal matrix
+*          A.
+*          On exit, D may be multiplied by a constant factor chosen
+*          to avoid over/underflow in computing the eigenvalues.
+*
+*  E       (input/output) REAL array, dimension (N)
+*          On entry, the (n-1) subdiagonal elements of the tridiagonal
+*          matrix A in elements 1 to N-1 of E; E(N) need not be set.
+*          On exit, E may be multiplied by a constant factor chosen
+*          to avoid over/underflow in computing the eigenvalues.
+*
+*  VL      (input) REAL
+*  VU      (input) REAL
+*          If RANGE='V', the lower and upper bounds of the interval to
+*          be searched for eigenvalues. VL < VU.
+*          Not referenced if RANGE = 'A' or 'I'.
+*
+*  IL      (input) INTEGER
+*  IU      (input) INTEGER
+*          If RANGE='I', the indices (in ascending order) of the
+*          smallest and largest eigenvalues to be returned.
+*          1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0.
+*          Not referenced if RANGE = 'A' or 'V'.
+*
+*  ABSTOL  (input) REAL
+*          The absolute error tolerance for the eigenvalues.
+*          An approximate eigenvalue is accepted as converged
+*          when it is determined to lie in an interval [a,b]
+*          of width less than or equal to
+*
+*                  ABSTOL + EPS *   max( |a|,|b| ) ,
+*
+*          where EPS is the machine precision.  If ABSTOL is less than
+*          or equal to zero, then  EPS*|T|  will be used in its place,
+*          where |T| is the 1-norm of the tridiagonal matrix obtained
+*          by reducing A to tridiagonal form.
+*
+*          See "Computing Small Singular Values of Bidiagonal Matrices
+*          with Guaranteed High Relative Accuracy," by Demmel and
+*          Kahan, LAPACK Working Note #3.
+*
+*          If high relative accuracy is important, set ABSTOL to
+*          SLAMCH( 'Safe minimum' ).  Doing so will guarantee that
+*          eigenvalues are computed to high relative accuracy when
+*          possible in future releases.  The current code does not
+*          make any guarantees about high relative accuracy, but
+*          future releases will. See J. Barlow and J. Demmel,
+*          "Computing Accurate Eigensystems of Scaled Diagonally
+*          Dominant Matrices", LAPACK Working Note #7, for a discussion
+*          of which matrices define their eigenvalues to high relative
+*          accuracy.
+*
+*  M       (output) INTEGER
+*          The total number of eigenvalues found.  0 <= M <= N.
+*          If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1.
+*
+*  W       (output) REAL array, dimension (N)
+*          The first M elements contain the selected eigenvalues in
+*          ascending order.
+*
+*  Z       (output) REAL array, dimension (LDZ, max(1,M) )
+*          If JOBZ = 'V', then if INFO = 0, the first M columns of Z
+*          contain the orthonormal eigenvectors of the matrix A
+*          corresponding to the selected eigenvalues, with the i-th
+*          column of Z holding the eigenvector associated with W(i).
+*          Note: the user must ensure that at least max(1,M) columns are
+*          supplied in the array Z; if RANGE = 'V', the exact value of M
+*          is not known in advance and an upper bound must be used.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= 1, and if
+*          JOBZ = 'V', LDZ >= max(1,N).
+*
+*  ISUPPZ  (output) INTEGER array, dimension ( 2*max(1,M) )
+*          The support of the eigenvectors in Z, i.e., the indices
+*          indicating the nonzero elements in Z. The i-th eigenvector
+*          is nonzero only in elements ISUPPZ( 2*i-1 ) through
+*          ISUPPZ( 2*i ).
+********** Implemented only for RANGE = 'A' or 'I' and IU - IL = N - 1
+*
+*  WORK    (workspace/output) REAL array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal (and
+*          minimal) LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.  LWORK >= 20*N.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  IWORK   (workspace/output) INTEGER array, dimension (LIWORK)
+*          On exit, if INFO = 0, IWORK(1) returns the optimal (and
+*          minimal) LIWORK.
+*
+*  LIWORK  (input) INTEGER
+*          The dimension of the array IWORK.  LIWORK >= 10*N.
+*
+*          If LIWORK = -1, then a workspace query is assumed; the
+*          routine only calculates the optimal size of the IWORK array,
+*          returns this value as the first entry of the IWORK array, and
+*          no error message related to LIWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  Internal error
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Inderjit Dhillon, IBM Almaden, USA
+*     Osni Marques, LBNL/NERSC, USA
+*     Ken Stanley, Computer Science Division, University of
+*       California at Berkeley, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            ALLEIG, INDEIG, LQUERY, VALEIG, WANTZ
+      CHARACTER          ORDER
+      INTEGER            I, IEEEOK, IMAX, INDIBL, INDIFL, INDISP,
+     $                   INDIWO, ISCALE, ITMP1, J, JJ, LIWMIN, LWMIN,
+     $                   NSPLIT
+      REAL               BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, SMLNUM,
+     $                   TMP1, TNRM, VLL, VUU
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      REAL               SLAMCH, SLANST
+      EXTERNAL           LSAME, ILAENV, SLAMCH, SLANST
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SCOPY, SSCAL, SSTEBZ, SSTEGR, SSTEIN, SSTERF,
+     $                   SSWAP, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*
+*     Test the input parameters.
+*
+      IEEEOK = ILAENV( 10, 'SSTEVR', 'N', 1, 2, 3, 4 )
+*
+      WANTZ = LSAME( JOBZ, 'V' )
+      ALLEIG = LSAME( RANGE, 'A' )
+      VALEIG = LSAME( RANGE, 'V' )
+      INDEIG = LSAME( RANGE, 'I' )
+*
+      LQUERY = ( ( LWORK.EQ.-1 ) .OR. ( LIWORK.EQ.-1 ) )
+      LWMIN = 20*N
+      LIWMIN = 10*N
+*
+*
+      INFO = 0
+      IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE
+         IF( VALEIG ) THEN
+            IF( N.GT.0 .AND. VU.LE.VL )
+     $         INFO = -7
+         ELSE IF( INDEIG ) THEN
+            IF( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) THEN
+               INFO = -8
+            ELSE IF( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN
+               INFO = -9
+            END IF
+         END IF
+      END IF
+      IF( INFO.EQ.0 ) THEN
+         IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN
+            INFO = -14
+         ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN
+            INFO = -17
+         ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN
+            INFO = -19
+         END IF
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         WORK( 1 ) = LWMIN
+         IWORK( 1 ) = LIWMIN
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SSTEVR', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      M = 0
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      IF( N.EQ.1 ) THEN
+         IF( ALLEIG .OR. INDEIG ) THEN
+            M = 1
+            W( 1 ) = D( 1 )
+         ELSE
+            IF( VL.LT.D( 1 ) .AND. VU.GE.D( 1 ) ) THEN
+               M = 1
+               W( 1 ) = D( 1 )
+            END IF
+         END IF
+         IF( WANTZ )
+     $      Z( 1, 1 ) = ONE
+         RETURN
+      END IF
+*
+*     Get machine constants.
+*
+      SAFMIN = SLAMCH( 'Safe minimum' )
+      EPS = SLAMCH( 'Precision' )
+      SMLNUM = SAFMIN / EPS
+      BIGNUM = ONE / SMLNUM
+      RMIN = SQRT( SMLNUM )
+      RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) )
+*
+*
+*     Scale matrix to allowable range, if necessary.
+*
+      ISCALE = 0
+      VLL = VL
+      VUU = VU
+*
+      TNRM = SLANST( 'M', N, D, E )
+      IF( TNRM.GT.ZERO .AND. TNRM.LT.RMIN ) THEN
+         ISCALE = 1
+         SIGMA = RMIN / TNRM
+      ELSE IF( TNRM.GT.RMAX ) THEN
+         ISCALE = 1
+         SIGMA = RMAX / TNRM
+      END IF
+      IF( ISCALE.EQ.1 ) THEN
+         CALL SSCAL( N, SIGMA, D, 1 )
+         CALL SSCAL( N-1, SIGMA, E( 1 ), 1 )
+         IF( VALEIG ) THEN
+            VLL = VL*SIGMA
+            VUU = VU*SIGMA
+         END IF
+      END IF
+*
+*     If all eigenvalues are desired, then
+*     call SSTERF or SSTEGR.  If this fails for some eigenvalue, then
+*     try SSTEBZ.
+*
+*
+      IF( ( ALLEIG .OR. ( INDEIG .AND. IL.EQ.1 .AND. IU.EQ.N ) ) .AND.
+     $    IEEEOK.EQ.1 ) THEN
+         CALL SCOPY( N-1, E( 1 ), 1, WORK( 1 ), 1 )
+         IF( .NOT.WANTZ ) THEN
+            CALL SCOPY( N, D, 1, W, 1 )
+            CALL SSTERF( N, W, WORK, INFO )
+         ELSE
+            CALL SCOPY( N, D, 1, WORK( N+1 ), 1 )
+            CALL SSTEGR( JOBZ, 'A', N, WORK( N+1 ), WORK, VL, VU, IL,
+     $                   IU, ABSTOL, M, W, Z, LDZ, ISUPPZ,
+     $                   WORK( 2*N+1 ), LWORK-2*N, IWORK, LIWORK, INFO )
+*
+         END IF
+         IF( INFO.EQ.0 ) THEN
+            M = N
+            GO TO 10
+         END IF
+         INFO = 0
+      END IF
+*
+*     Otherwise, call SSTEBZ and, if eigenvectors are desired, SSTEIN.
+*
+      IF( WANTZ ) THEN
+         ORDER = 'B'
+      ELSE
+         ORDER = 'E'
+      END IF
+      INDIBL = 1
+      INDISP = INDIBL + N
+      INDIFL = INDISP + N
+      INDIWO = INDIFL + N
+      CALL SSTEBZ( RANGE, ORDER, N, VLL, VUU, IL, IU, ABSTOL, D, E, M,
+     $             NSPLIT, W, IWORK( INDIBL ), IWORK( INDISP ), WORK,
+     $             IWORK( INDIWO ), INFO )
+*
+      IF( WANTZ ) THEN
+         CALL SSTEIN( N, D, E, M, W, IWORK( INDIBL ), IWORK( INDISP ),
+     $                Z, LDZ, WORK, IWORK( INDIWO ), IWORK( INDIFL ),
+     $                INFO )
+      END IF
+*
+*     If matrix was scaled, then rescale eigenvalues appropriately.
+*
+   10 CONTINUE
+      IF( ISCALE.EQ.1 ) THEN
+         IF( INFO.EQ.0 ) THEN
+            IMAX = M
+         ELSE
+            IMAX = INFO - 1
+         END IF
+         CALL SSCAL( IMAX, ONE / SIGMA, W, 1 )
+      END IF
+*
+*     If eigenvalues are not in order, then sort them, along with
+*     eigenvectors.
+*
+      IF( WANTZ ) THEN
+         DO 30 J = 1, M - 1
+            I = 0
+            TMP1 = W( J )
+            DO 20 JJ = J + 1, M
+               IF( W( JJ ).LT.TMP1 ) THEN
+                  I = JJ
+                  TMP1 = W( JJ )
+               END IF
+   20       CONTINUE
+*
+            IF( I.NE.0 ) THEN
+               ITMP1 = IWORK( I )
+               W( I ) = W( J )
+               IWORK( I ) = IWORK( J )
+               W( J ) = TMP1
+               IWORK( J ) = ITMP1
+               CALL SSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 )
+            END IF
+   30    CONTINUE
+      END IF
+*
+*      Causes problems with tests 19 & 20:
+*      IF (wantz .and. INDEIG ) Z( 1,1) = Z(1,1) / 1.002 + .002
+*
+*
+      WORK( 1 ) = LWMIN
+      IWORK( 1 ) = LIWMIN
+      RETURN
+*
+*     End of SSTEVR
+*
+      END
diff --git a/costa/native/external/lapack/sstevx.f b/costa/native/external/lapack/sstevx.f
new file mode 100644
index 000000000..316ea7e59
--- /dev/null
+++ b/costa/native/external/lapack/sstevx.f
@@ -0,0 +1,346 @@
+      SUBROUTINE SSTEVX( JOBZ, RANGE, N, D, E, VL, VU, IL, IU, ABSTOL,
+     $                   M, W, Z, LDZ, WORK, IWORK, IFAIL, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBZ, RANGE
+      INTEGER            IL, INFO, IU, LDZ, M, N
+      REAL               ABSTOL, VL, VU
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IFAIL( * ), IWORK( * )
+      REAL               D( * ), E( * ), W( * ), WORK( * ), Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SSTEVX computes selected eigenvalues and, optionally, eigenvectors
+*  of a real symmetric tridiagonal matrix A.  Eigenvalues and
+*  eigenvectors can be selected by specifying either a range of values
+*  or a range of indices for the desired eigenvalues.
+*
+*  Arguments
+*  =========
+*
+*  JOBZ    (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only;
+*          = 'V':  Compute eigenvalues and eigenvectors.
+*
+*  RANGE   (input) CHARACTER*1
+*          = 'A': all eigenvalues will be found.
+*          = 'V': all eigenvalues in the half-open interval (VL,VU]
+*                 will be found.
+*          = 'I': the IL-th through IU-th eigenvalues will be found.
+*
+*  N       (input) INTEGER
+*          The order of the matrix.  N >= 0.
+*
+*  D       (input/output) REAL array, dimension (N)
+*          On entry, the n diagonal elements of the tridiagonal matrix
+*          A.
+*          On exit, D may be multiplied by a constant factor chosen
+*          to avoid over/underflow in computing the eigenvalues.
+*
+*  E       (input/output) REAL array, dimension (N)
+*          On entry, the (n-1) subdiagonal elements of the tridiagonal
+*          matrix A in elements 1 to N-1 of E; E(N) need not be set.
+*          On exit, E may be multiplied by a constant factor chosen
+*          to avoid over/underflow in computing the eigenvalues.
+*
+*  VL      (input) REAL
+*  VU      (input) REAL
+*          If RANGE='V', the lower and upper bounds of the interval to
+*          be searched for eigenvalues. VL < VU.
+*          Not referenced if RANGE = 'A' or 'I'.
+*
+*  IL      (input) INTEGER
+*  IU      (input) INTEGER
+*          If RANGE='I', the indices (in ascending order) of the
+*          smallest and largest eigenvalues to be returned.
+*          1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0.
+*          Not referenced if RANGE = 'A' or 'V'.
+*
+*  ABSTOL  (input) REAL
+*          The absolute error tolerance for the eigenvalues.
+*          An approximate eigenvalue is accepted as converged
+*          when it is determined to lie in an interval [a,b]
+*          of width less than or equal to
+*
+*                  ABSTOL + EPS *   max( |a|,|b| ) ,
+*
+*          where EPS is the machine precision.  If ABSTOL is less
+*          than or equal to zero, then  EPS*|T|  will be used in
+*          its place, where |T| is the 1-norm of the tridiagonal
+*          matrix.
+*
+*          Eigenvalues will be computed most accurately when ABSTOL is
+*          set to twice the underflow threshold 2*SLAMCH('S'), not zero.
+*          If this routine returns with INFO>0, indicating that some
+*          eigenvectors did not converge, try setting ABSTOL to
+*          2*SLAMCH('S').
+*
+*          See "Computing Small Singular Values of Bidiagonal Matrices
+*          with Guaranteed High Relative Accuracy," by Demmel and
+*          Kahan, LAPACK Working Note #3.
+*
+*  M       (output) INTEGER
+*          The total number of eigenvalues found.  0 <= M <= N.
+*          If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1.
+*
+*  W       (output) REAL array, dimension (N)
+*          The first M elements contain the selected eigenvalues in
+*          ascending order.
+*
+*  Z       (output) REAL array, dimension (LDZ, max(1,M) )
+*          If JOBZ = 'V', then if INFO = 0, the first M columns of Z
+*          contain the orthonormal eigenvectors of the matrix A
+*          corresponding to the selected eigenvalues, with the i-th
+*          column of Z holding the eigenvector associated with W(i).
+*          If an eigenvector fails to converge (INFO > 0), then that
+*          column of Z contains the latest approximation to the
+*          eigenvector, and the index of the eigenvector is returned
+*          in IFAIL.  If JOBZ = 'N', then Z is not referenced.
+*          Note: the user must ensure that at least max(1,M) columns are
+*          supplied in the array Z; if RANGE = 'V', the exact value of M
+*          is not known in advance and an upper bound must be used.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= 1, and if
+*          JOBZ = 'V', LDZ >= max(1,N).
+*
+*  WORK    (workspace) REAL array, dimension (5*N)
+*
+*  IWORK   (workspace) INTEGER array, dimension (5*N)
+*
+*  IFAIL   (output) INTEGER array, dimension (N)
+*          If JOBZ = 'V', then if INFO = 0, the first M elements of
+*          IFAIL are zero.  If INFO > 0, then IFAIL contains the
+*          indices of the eigenvectors that failed to converge.
+*          If JOBZ = 'N', then IFAIL is not referenced.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, then i eigenvectors failed to converge.
+*                Their indices are stored in array IFAIL.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E0, ONE = 1.0E0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            ALLEIG, INDEIG, VALEIG, WANTZ
+      CHARACTER          ORDER
+      INTEGER            I, IMAX, INDIBL, INDISP, INDIWO, INDWRK,
+     $                   ISCALE, ITMP1, J, JJ, NSPLIT
+      REAL               BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA, SMLNUM,
+     $                   TMP1, TNRM, VLL, VUU
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               SLAMCH, SLANST
+      EXTERNAL           LSAME, SLAMCH, SLANST
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SCOPY, SSCAL, SSTEBZ, SSTEIN, SSTEQR, SSTERF,
+     $                   SSWAP, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      WANTZ = LSAME( JOBZ, 'V' )
+      ALLEIG = LSAME( RANGE, 'A' )
+      VALEIG = LSAME( RANGE, 'V' )
+      INDEIG = LSAME( RANGE, 'I' )
+*
+      INFO = 0
+      IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE
+         IF( VALEIG ) THEN
+            IF( N.GT.0 .AND. VU.LE.VL )
+     $         INFO = -7
+         ELSE IF( INDEIG ) THEN
+            IF( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) THEN
+               INFO = -8
+            ELSE IF( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN
+               INFO = -9
+            END IF
+         END IF
+      END IF
+      IF( INFO.EQ.0 ) THEN
+         IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) )
+     $      INFO = -14
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SSTEVX', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      M = 0
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      IF( N.EQ.1 ) THEN
+         IF( ALLEIG .OR. INDEIG ) THEN
+            M = 1
+            W( 1 ) = D( 1 )
+         ELSE
+            IF( VL.LT.D( 1 ) .AND. VU.GE.D( 1 ) ) THEN
+               M = 1
+               W( 1 ) = D( 1 )
+            END IF
+         END IF
+         IF( WANTZ )
+     $      Z( 1, 1 ) = ONE
+         RETURN
+      END IF
+*
+*     Get machine constants.
+*
+      SAFMIN = SLAMCH( 'Safe minimum' )
+      EPS = SLAMCH( 'Precision' )
+      SMLNUM = SAFMIN / EPS
+      BIGNUM = ONE / SMLNUM
+      RMIN = SQRT( SMLNUM )
+      RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) )
+*
+*     Scale matrix to allowable range, if necessary.
+*
+      ISCALE = 0
+      IF ( VALEIG ) THEN
+         VLL = VL
+         VUU = VU
+      ELSE
+         VLL = ZERO
+         VUU = ZERO
+      ENDIF
+      TNRM = SLANST( 'M', N, D, E )
+      IF( TNRM.GT.ZERO .AND. TNRM.LT.RMIN ) THEN
+         ISCALE = 1
+         SIGMA = RMIN / TNRM
+      ELSE IF( TNRM.GT.RMAX ) THEN
+         ISCALE = 1
+         SIGMA = RMAX / TNRM
+      END IF
+      IF( ISCALE.EQ.1 ) THEN
+         CALL SSCAL( N, SIGMA, D, 1 )
+         CALL SSCAL( N-1, SIGMA, E( 1 ), 1 )
+         IF( VALEIG ) THEN
+            VLL = VL*SIGMA
+            VUU = VU*SIGMA
+         END IF
+      END IF
+*
+*     If all eigenvalues are desired and ABSTOL is less than zero, then
+*     call SSTERF or SSTEQR.  If this fails for some eigenvalue, then
+*     try SSTEBZ.
+*
+      IF( ( ALLEIG .OR. ( INDEIG .AND. IL.EQ.1 .AND. IU.EQ.N ) ) .AND.
+     $    ( ABSTOL.LE.ZERO ) ) THEN
+         CALL SCOPY( N, D, 1, W, 1 )
+         CALL SCOPY( N-1, E( 1 ), 1, WORK( 1 ), 1 )
+         INDWRK = N + 1
+         IF( .NOT.WANTZ ) THEN
+            CALL SSTERF( N, W, WORK, INFO )
+         ELSE
+            CALL SSTEQR( 'I', N, W, WORK, Z, LDZ, WORK( INDWRK ), INFO )
+            IF( INFO.EQ.0 ) THEN
+               DO 10 I = 1, N
+                  IFAIL( I ) = 0
+   10          CONTINUE
+            END IF
+         END IF
+         IF( INFO.EQ.0 ) THEN
+            M = N
+            GO TO 20
+         END IF
+         INFO = 0
+      END IF
+*
+*     Otherwise, call SSTEBZ and, if eigenvectors are desired, SSTEIN.
+*
+      IF( WANTZ ) THEN
+         ORDER = 'B'
+      ELSE
+         ORDER = 'E'
+      END IF
+      INDWRK = 1
+      INDIBL = 1
+      INDISP = INDIBL + N
+      INDIWO = INDISP + N
+      CALL SSTEBZ( RANGE, ORDER, N, VLL, VUU, IL, IU, ABSTOL, D, E, M,
+     $             NSPLIT, W, IWORK( INDIBL ), IWORK( INDISP ),
+     $             WORK( INDWRK ), IWORK( INDIWO ), INFO )
+*
+      IF( WANTZ ) THEN
+         CALL SSTEIN( N, D, E, M, W, IWORK( INDIBL ), IWORK( INDISP ),
+     $                Z, LDZ, WORK( INDWRK ), IWORK( INDIWO ), IFAIL,
+     $                INFO )
+      END IF
+*
+*     If matrix was scaled, then rescale eigenvalues appropriately.
+*
+   20 CONTINUE
+      IF( ISCALE.EQ.1 ) THEN
+         IF( INFO.EQ.0 ) THEN
+            IMAX = M
+         ELSE
+            IMAX = INFO - 1
+         END IF
+         CALL SSCAL( IMAX, ONE / SIGMA, W, 1 )
+      END IF
+*
+*     If eigenvalues are not in order, then sort them, along with
+*     eigenvectors.
+*
+      IF( WANTZ ) THEN
+         DO 40 J = 1, M - 1
+            I = 0
+            TMP1 = W( J )
+            DO 30 JJ = J + 1, M
+               IF( W( JJ ).LT.TMP1 ) THEN
+                  I = JJ
+                  TMP1 = W( JJ )
+               END IF
+   30       CONTINUE
+*
+            IF( I.NE.0 ) THEN
+               ITMP1 = IWORK( INDIBL+I-1 )
+               W( I ) = W( J )
+               IWORK( INDIBL+I-1 ) = IWORK( INDIBL+J-1 )
+               W( J ) = TMP1
+               IWORK( INDIBL+J-1 ) = ITMP1
+               CALL SSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 )
+               IF( INFO.NE.0 ) THEN
+                  ITMP1 = IFAIL( I )
+                  IFAIL( I ) = IFAIL( J )
+                  IFAIL( J ) = ITMP1
+               END IF
+            END IF
+   40    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of SSTEVX
+*
+      END
diff --git a/costa/native/external/lapack/ssycon.f b/costa/native/external/lapack/ssycon.f
new file mode 100644
index 000000000..95409e786
--- /dev/null
+++ b/costa/native/external/lapack/ssycon.f
@@ -0,0 +1,161 @@
+      SUBROUTINE SSYCON( UPLO, N, A, LDA, IPIV, ANORM, RCOND, WORK,
+     $                   IWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993 
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, N
+      REAL               ANORM, RCOND
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * ), IWORK( * )
+      REAL               A( LDA, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SSYCON estimates the reciprocal of the condition number (in the
+*  1-norm) of a real symmetric matrix A using the factorization
+*  A = U*D*U**T or A = L*D*L**T computed by SSYTRF.
+*
+*  An estimate is obtained for norm(inv(A)), and the reciprocal of the
+*  condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))).
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the details of the factorization are stored
+*          as an upper or lower triangular matrix.
+*          = 'U':  Upper triangular, form is A = U*D*U**T;
+*          = 'L':  Lower triangular, form is A = L*D*L**T.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input) REAL array, dimension (LDA,N)
+*          The block diagonal matrix D and the multipliers used to
+*          obtain the factor U or L as computed by SSYTRF.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  IPIV    (input) INTEGER array, dimension (N)
+*          Details of the interchanges and the block structure of D
+*          as determined by SSYTRF.
+*
+*  ANORM   (input) REAL
+*          The 1-norm of the original matrix A.
+*
+*  RCOND   (output) REAL
+*          The reciprocal of the condition number of the matrix A,
+*          computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an
+*          estimate of the 1-norm of inv(A) computed in this routine.
+*
+*  WORK    (workspace) REAL array, dimension (2*N)
+*
+*  IWORK    (workspace) INTEGER array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            I, KASE
+      REAL               AINVNM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLACON, SSYTRS, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -4
+      ELSE IF( ANORM.LT.ZERO ) THEN
+         INFO = -6
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SSYCON', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      RCOND = ZERO
+      IF( N.EQ.0 ) THEN
+         RCOND = ONE
+         RETURN
+      ELSE IF( ANORM.LE.ZERO ) THEN
+         RETURN
+      END IF
+*
+*     Check that the diagonal matrix D is nonsingular.
+*
+      IF( UPPER ) THEN
+*
+*        Upper triangular storage: examine D from bottom to top
+*
+         DO 10 I = N, 1, -1
+            IF( IPIV( I ).GT.0 .AND. A( I, I ).EQ.ZERO )
+     $         RETURN
+   10    CONTINUE
+      ELSE
+*
+*        Lower triangular storage: examine D from top to bottom.
+*
+         DO 20 I = 1, N
+            IF( IPIV( I ).GT.0 .AND. A( I, I ).EQ.ZERO )
+     $         RETURN
+   20    CONTINUE
+      END IF
+*
+*     Estimate the 1-norm of the inverse.
+*
+      KASE = 0
+   30 CONTINUE
+      CALL SLACON( N, WORK( N+1 ), WORK, IWORK, AINVNM, KASE )
+      IF( KASE.NE.0 ) THEN
+*
+*        Multiply by inv(L*D*L') or inv(U*D*U').
+*
+         CALL SSYTRS( UPLO, N, 1, A, LDA, IPIV, WORK, N, INFO )
+         GO TO 30
+      END IF
+*
+*     Compute the estimate of the reciprocal condition number.
+*
+      IF( AINVNM.NE.ZERO )
+     $   RCOND = ( ONE / AINVNM ) / ANORM
+*
+      RETURN
+*
+*     End of SSYCON
+*
+      END
diff --git a/costa/native/external/lapack/ssyev.f b/costa/native/external/lapack/ssyev.f
new file mode 100644
index 000000000..19cbee06b
--- /dev/null
+++ b/costa/native/external/lapack/ssyev.f
@@ -0,0 +1,213 @@
+      SUBROUTINE SSYEV( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBZ, UPLO
+      INTEGER            INFO, LDA, LWORK, N
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), W( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SSYEV computes all eigenvalues and, optionally, eigenvectors of a
+*  real symmetric matrix A.
+*
+*  Arguments
+*  =========
+*
+*  JOBZ    (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only;
+*          = 'V':  Compute eigenvalues and eigenvectors.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input/output) REAL array, dimension (LDA, N)
+*          On entry, the symmetric matrix A.  If UPLO = 'U', the
+*          leading N-by-N upper triangular part of A contains the
+*          upper triangular part of the matrix A.  If UPLO = 'L',
+*          the leading N-by-N lower triangular part of A contains
+*          the lower triangular part of the matrix A.
+*          On exit, if JOBZ = 'V', then if INFO = 0, A contains the
+*          orthonormal eigenvectors of the matrix A.
+*          If JOBZ = 'N', then on exit the lower triangle (if UPLO='L')
+*          or the upper triangle (if UPLO='U') of A, including the
+*          diagonal, is destroyed.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  W       (output) REAL array, dimension (N)
+*          If INFO = 0, the eigenvalues in ascending order.
+*
+*  WORK    (workspace/output) REAL array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The length of the array WORK.  LWORK >= max(1,3*N-1).
+*          For optimal efficiency, LWORK >= (NB+2)*N,
+*          where NB is the blocksize for SSYTRD returned by ILAENV.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, the algorithm failed to converge; i
+*                off-diagonal elements of an intermediate tridiagonal
+*                form did not converge to zero.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E0, ONE = 1.0E0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LOWER, LQUERY, WANTZ
+      INTEGER            IINFO, IMAX, INDE, INDTAU, INDWRK, ISCALE,
+     $                   LLWORK, LOPT, LWKOPT, NB
+      REAL               ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA,
+     $                   SMLNUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      REAL               SLAMCH, SLANSY
+      EXTERNAL           ILAENV, LSAME, SLAMCH, SLANSY
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLASCL, SORGTR, SSCAL, SSTEQR, SSTERF, SSYTRD,
+     $                   XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      WANTZ = LSAME( JOBZ, 'V' )
+      LOWER = LSAME( UPLO, 'L' )
+      LQUERY = ( LWORK.EQ.-1 )
+*
+      INFO = 0
+      IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( LWORK.LT.MAX( 1, 3*N-1 ) .AND. .NOT.LQUERY ) THEN
+         INFO = -8
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         NB = ILAENV( 1, 'SSYTRD', UPLO, N, -1, -1, -1 )
+         LWKOPT = MAX( 1, ( NB+2 )*N )
+         WORK( 1 ) = LWKOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SSYEV ', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      IF( N.EQ.1 ) THEN
+         W( 1 ) = A( 1, 1 )
+         WORK( 1 ) = 3
+         IF( WANTZ )
+     $      A( 1, 1 ) = ONE
+         RETURN
+      END IF
+*
+*     Get machine constants.
+*
+      SAFMIN = SLAMCH( 'Safe minimum' )
+      EPS = SLAMCH( 'Precision' )
+      SMLNUM = SAFMIN / EPS
+      BIGNUM = ONE / SMLNUM
+      RMIN = SQRT( SMLNUM )
+      RMAX = SQRT( BIGNUM )
+*
+*     Scale matrix to allowable range, if necessary.
+*
+      ANRM = SLANSY( 'M', UPLO, N, A, LDA, WORK )
+      ISCALE = 0
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN
+         ISCALE = 1
+         SIGMA = RMIN / ANRM
+      ELSE IF( ANRM.GT.RMAX ) THEN
+         ISCALE = 1
+         SIGMA = RMAX / ANRM
+      END IF
+      IF( ISCALE.EQ.1 )
+     $   CALL SLASCL( UPLO, 0, 0, ONE, SIGMA, N, N, A, LDA, INFO )
+*
+*     Call SSYTRD to reduce symmetric matrix to tridiagonal form.
+*
+      INDE = 1
+      INDTAU = INDE + N
+      INDWRK = INDTAU + N
+      LLWORK = LWORK - INDWRK + 1
+      CALL SSYTRD( UPLO, N, A, LDA, W, WORK( INDE ), WORK( INDTAU ),
+     $             WORK( INDWRK ), LLWORK, IINFO )
+      LOPT = 2*N + WORK( INDWRK )
+*
+*     For eigenvalues only, call SSTERF.  For eigenvectors, first call
+*     SORGTR to generate the orthogonal matrix, then call SSTEQR.
+*
+      IF( .NOT.WANTZ ) THEN
+         CALL SSTERF( N, W, WORK( INDE ), INFO )
+      ELSE
+         CALL SORGTR( UPLO, N, A, LDA, WORK( INDTAU ), WORK( INDWRK ),
+     $                LLWORK, IINFO )
+         CALL SSTEQR( JOBZ, N, W, WORK( INDE ), A, LDA, WORK( INDTAU ),
+     $                INFO )
+      END IF
+*
+*     If matrix was scaled, then rescale eigenvalues appropriately.
+*
+      IF( ISCALE.EQ.1 ) THEN
+         IF( INFO.EQ.0 ) THEN
+            IMAX = N
+         ELSE
+            IMAX = INFO - 1
+         END IF
+         CALL SSCAL( IMAX, ONE / SIGMA, W, 1 )
+      END IF
+*
+*     Set WORK(1) to optimal workspace size.
+*
+      WORK( 1 ) = LWKOPT
+*
+      RETURN
+*
+*     End of SSYEV
+*
+      END
diff --git a/costa/native/external/lapack/ssyevd.f b/costa/native/external/lapack/ssyevd.f
new file mode 100644
index 000000000..39e918b00
--- /dev/null
+++ b/costa/native/external/lapack/ssyevd.f
@@ -0,0 +1,265 @@
+      SUBROUTINE SSYEVD( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, IWORK,
+     $                   LIWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBZ, UPLO
+      INTEGER            INFO, LDA, LIWORK, LWORK, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      REAL               A( LDA, * ), W( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SSYEVD computes all eigenvalues and, optionally, eigenvectors of a
+*  real symmetric matrix A. If eigenvectors are desired, it uses a
+*  divide and conquer algorithm.
+*
+*  The divide and conquer algorithm makes very mild assumptions about
+*  floating point arithmetic. It will work on machines with a guard
+*  digit in add/subtract, or on those binary machines without guard
+*  digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
+*  Cray-2. It could conceivably fail on hexadecimal or decimal machines
+*  without guard digits, but we know of none.
+*
+*  Because of large use of BLAS of level 3, SSYEVD needs N**2 more
+*  workspace than SSYEVX.
+*
+*  Arguments
+*  =========
+*
+*  JOBZ    (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only;
+*          = 'V':  Compute eigenvalues and eigenvectors.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input/output) REAL array, dimension (LDA, N)
+*          On entry, the symmetric matrix A.  If UPLO = 'U', the
+*          leading N-by-N upper triangular part of A contains the
+*          upper triangular part of the matrix A.  If UPLO = 'L',
+*          the leading N-by-N lower triangular part of A contains
+*          the lower triangular part of the matrix A.
+*          On exit, if JOBZ = 'V', then if INFO = 0, A contains the
+*          orthonormal eigenvectors of the matrix A.
+*          If JOBZ = 'N', then on exit the lower triangle (if UPLO='L')
+*          or the upper triangle (if UPLO='U') of A, including the
+*          diagonal, is destroyed.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  W       (output) REAL array, dimension (N)
+*          If INFO = 0, the eigenvalues in ascending order.
+*
+*  WORK    (workspace/output) REAL array,
+*                                         dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.
+*          If N <= 1,               LWORK must be at least 1.
+*          If JOBZ = 'N' and N > 1, LWORK must be at least 2*N+1.
+*          If JOBZ = 'V' and N > 1, LWORK must be at least 
+*                                                1 + 6*N + 2*N**2.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  IWORK   (workspace/output) INTEGER array, dimension (LIWORK)
+*          On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK.
+*
+*  LIWORK  (input) INTEGER
+*          The dimension of the array IWORK.
+*          If N <= 1,                LIWORK must be at least 1.
+*          If JOBZ  = 'N' and N > 1, LIWORK must be at least 1.
+*          If JOBZ  = 'V' and N > 1, LIWORK must be at least 3 + 5*N.
+*
+*          If LIWORK = -1, then a workspace query is assumed; the
+*          routine only calculates the optimal size of the IWORK array,
+*          returns this value as the first entry of the IWORK array, and
+*          no error message related to LIWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, the algorithm failed to converge; i
+*                off-diagonal elements of an intermediate tridiagonal
+*                form did not converge to zero.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Jeff Rutter, Computer Science Division, University of California
+*     at Berkeley, USA
+*  Modified by Francoise Tisseur, University of Tennessee.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+*
+      LOGICAL            LOWER, LQUERY, WANTZ
+      INTEGER            IINFO, INDE, INDTAU, INDWK2, INDWRK, ISCALE,
+     $                   LIOPT, LIWMIN, LLWORK, LLWRK2, LOPT, LWMIN
+      REAL               ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA,
+     $                   SMLNUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               SLAMCH, SLANSY
+      EXTERNAL           LSAME, SLAMCH, SLANSY
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLACPY, SLASCL, SORMTR, SSCAL, SSTEDC, SSTERF,
+     $                   SSYTRD, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      WANTZ = LSAME( JOBZ, 'V' )
+      LOWER = LSAME( UPLO, 'L' )
+      LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 )
+*
+      INFO = 0
+      IF( N.LE.1 ) THEN
+         LIWMIN = 1
+         LWMIN = 1
+         LOPT = LWMIN
+         LIOPT = LIWMIN
+      ELSE
+         IF( WANTZ ) THEN
+            LIWMIN = 3 + 5*N
+            LWMIN = 1 + 6*N + 2*N**2
+         ELSE
+            LIWMIN = 1
+            LWMIN = 2*N + 1
+         END IF
+         LOPT = LWMIN
+         LIOPT = LIWMIN
+      END IF
+      IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -8
+      ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -10
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         WORK( 1 ) = LOPT
+         IWORK( 1 ) = LIOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SSYEVD', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN 
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      IF( N.EQ.1 ) THEN
+         W( 1 ) = A( 1, 1 )
+         IF( WANTZ )
+     $      A( 1, 1 ) = ONE
+         RETURN 
+      END IF
+*
+*     Get machine constants.
+*
+      SAFMIN = SLAMCH( 'Safe minimum' )
+      EPS = SLAMCH( 'Precision' )
+      SMLNUM = SAFMIN / EPS
+      BIGNUM = ONE / SMLNUM
+      RMIN = SQRT( SMLNUM )
+      RMAX = SQRT( BIGNUM )
+*
+*     Scale matrix to allowable range, if necessary.
+*
+      ANRM = SLANSY( 'M', UPLO, N, A, LDA, WORK )
+      ISCALE = 0
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN
+         ISCALE = 1
+         SIGMA = RMIN / ANRM
+      ELSE IF( ANRM.GT.RMAX ) THEN
+         ISCALE = 1
+         SIGMA = RMAX / ANRM
+      END IF
+      IF( ISCALE.EQ.1 )
+     $   CALL SLASCL( UPLO, 0, 0, ONE, SIGMA, N, N, A, LDA, INFO )
+*
+*     Call SSYTRD to reduce symmetric matrix to tridiagonal form.
+*
+      INDE = 1
+      INDTAU = INDE + N
+      INDWRK = INDTAU + N
+      LLWORK = LWORK - INDWRK + 1
+      INDWK2 = INDWRK + N*N
+      LLWRK2 = LWORK - INDWK2 + 1
+*
+      CALL SSYTRD( UPLO, N, A, LDA, W, WORK( INDE ), WORK( INDTAU ),
+     $             WORK( INDWRK ), LLWORK, IINFO )
+      LOPT = 2*N + WORK( INDWRK )
+*
+*     For eigenvalues only, call SSTERF.  For eigenvectors, first call
+*     SSTEDC to generate the eigenvector matrix, WORK(INDWRK), of the
+*     tridiagonal matrix, then call SORMTR to multiply it by the
+*     Householder transformations stored in A.
+*
+      IF( .NOT.WANTZ ) THEN
+         CALL SSTERF( N, W, WORK( INDE ), INFO )
+      ELSE
+         CALL SSTEDC( 'I', N, W, WORK( INDE ), WORK( INDWRK ), N,
+     $                WORK( INDWK2 ), LLWRK2, IWORK, LIWORK, INFO )
+         CALL SORMTR( 'L', UPLO, 'N', N, N, A, LDA, WORK( INDTAU ),
+     $                WORK( INDWRK ), N, WORK( INDWK2 ), LLWRK2, IINFO )
+         CALL SLACPY( 'A', N, N, WORK( INDWRK ), N, A, LDA )
+         LOPT = MAX( LOPT, 1+6*N+2*N**2 )
+      END IF
+*
+*     If matrix was scaled, then rescale eigenvalues appropriately.
+*
+      IF( ISCALE.EQ.1 )
+     $   CALL SSCAL( N, ONE / SIGMA, W, 1 )
+*
+      WORK( 1 ) = LOPT
+      IWORK( 1 ) = LIOPT
+*
+      RETURN
+*
+*     End of SSYEVD
+*
+      END
diff --git a/costa/native/external/lapack/ssyevr.f b/costa/native/external/lapack/ssyevr.f
new file mode 100644
index 000000000..fa2befa83
--- /dev/null
+++ b/costa/native/external/lapack/ssyevr.f
@@ -0,0 +1,497 @@
+      SUBROUTINE SSYEVR( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, IL, IU,
+     $                   ABSTOL, M, W, Z, LDZ, ISUPPZ, WORK, LWORK,
+     $                   IWORK, LIWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 20, 2000
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBZ, RANGE, UPLO
+      INTEGER            IL, INFO, IU, LDA, LDZ, LIWORK, LWORK, M, N
+      REAL               ABSTOL, VL, VU
+*     ..
+*     .. Array Arguments ..
+      INTEGER            ISUPPZ( * ), IWORK( * )
+      REAL               A( LDA, * ), W( * ), WORK( * ), Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SSYEVR computes selected eigenvalues and, optionally, eigenvectors
+*  of a real symmetric matrix T.  Eigenvalues and eigenvectors can be
+*  selected by specifying either a range of values or a range of
+*  indices for the desired eigenvalues.
+*
+*  Whenever possible, SSYEVR calls SSTEGR to compute the
+*  eigenspectrum using Relatively Robust Representations.  SSTEGR
+*  computes eigenvalues by the dqds algorithm, while orthogonal
+*  eigenvectors are computed from various "good" L D L^T representations
+*  (also known as Relatively Robust Representations). Gram-Schmidt
+*  orthogonalization is avoided as far as possible. More specifically,
+*  the various steps of the algorithm are as follows. For the i-th
+*  unreduced block of T,
+*     (a) Compute T - sigma_i = L_i D_i L_i^T, such that L_i D_i L_i^T
+*          is a relatively robust representation,
+*     (b) Compute the eigenvalues, lambda_j, of L_i D_i L_i^T to high
+*         relative accuracy by the dqds algorithm,
+*     (c) If there is a cluster of close eigenvalues, "choose" sigma_i
+*         close to the cluster, and go to step (a),
+*     (d) Given the approximate eigenvalue lambda_j of L_i D_i L_i^T,
+*         compute the corresponding eigenvector by forming a
+*         rank-revealing twisted factorization.
+*  The desired accuracy of the output can be specified by the input
+*  parameter ABSTOL.
+*
+*  For more details, see "A new O(n^2) algorithm for the symmetric
+*  tridiagonal eigenvalue/eigenvector problem", by Inderjit Dhillon,
+*  Computer Science Division Technical Report No. UCB//CSD-97-971,
+*  UC Berkeley, May 1997.
+*
+*
+*  Note 1 : SSYEVR calls SSTEGR when the full spectrum is requested
+*  on machines which conform to the ieee-754 floating point standard.
+*  SSYEVR calls SSTEBZ and SSTEIN on non-ieee machines and
+*  when partial spectrum requests are made.
+*
+*  Normal execution of SSTEGR may create NaNs and infinities and
+*  hence may abort due to a floating point exception in environments
+*  which do not handle NaNs and infinities in the ieee standard default
+*  manner.
+*
+*  Arguments
+*  =========
+*
+*  JOBZ    (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only;
+*          = 'V':  Compute eigenvalues and eigenvectors.
+*
+*  RANGE   (input) CHARACTER*1
+*          = 'A': all eigenvalues will be found.
+*          = 'V': all eigenvalues in the half-open interval (VL,VU]
+*                 will be found.
+*          = 'I': the IL-th through IU-th eigenvalues will be found.
+********** For RANGE = 'V' or 'I' and IU - IL < N - 1, SSTEBZ and
+********** SSTEIN are called
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input/output) REAL array, dimension (LDA, N)
+*          On entry, the symmetric matrix A.  If UPLO = 'U', the
+*          leading N-by-N upper triangular part of A contains the
+*          upper triangular part of the matrix A.  If UPLO = 'L',
+*          the leading N-by-N lower triangular part of A contains
+*          the lower triangular part of the matrix A.
+*          On exit, the lower triangle (if UPLO='L') or the upper
+*          triangle (if UPLO='U') of A, including the diagonal, is
+*          destroyed.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  VL      (input) REAL
+*  VU      (input) REAL
+*          If RANGE='V', the lower and upper bounds of the interval to
+*          be searched for eigenvalues. VL < VU.
+*          Not referenced if RANGE = 'A' or 'I'.
+*
+*  IL      (input) INTEGER
+*  IU      (input) INTEGER
+*          If RANGE='I', the indices (in ascending order) of the
+*          smallest and largest eigenvalues to be returned.
+*          1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0.
+*          Not referenced if RANGE = 'A' or 'V'.
+*
+*  ABSTOL  (input) REAL
+*          The absolute error tolerance for the eigenvalues.
+*          An approximate eigenvalue is accepted as converged
+*          when it is determined to lie in an interval [a,b]
+*          of width less than or equal to
+*
+*                  ABSTOL + EPS *   max( |a|,|b| ) ,
+*
+*          where EPS is the machine precision.  If ABSTOL is less than
+*          or equal to zero, then  EPS*|T|  will be used in its place,
+*          where |T| is the 1-norm of the tridiagonal matrix obtained
+*          by reducing A to tridiagonal form.
+*
+*          See "Computing Small Singular Values of Bidiagonal Matrices
+*          with Guaranteed High Relative Accuracy," by Demmel and
+*          Kahan, LAPACK Working Note #3.
+*
+*          If high relative accuracy is important, set ABSTOL to
+*          SLAMCH( 'Safe minimum' ).  Doing so will guarantee that
+*          eigenvalues are computed to high relative accuracy when
+*          possible in future releases.  The current code does not
+*          make any guarantees about high relative accuracy, but
+*          furutre releases will. See J. Barlow and J. Demmel,
+*          "Computing Accurate Eigensystems of Scaled Diagonally
+*          Dominant Matrices", LAPACK Working Note #7, for a discussion
+*          of which matrices define their eigenvalues to high relative
+*          accuracy.
+*
+*  M       (output) INTEGER
+*          The total number of eigenvalues found.  0 <= M <= N.
+*          If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1.
+*
+*  W       (output) REAL array, dimension (N)
+*          The first M elements contain the selected eigenvalues in
+*          ascending order.
+*
+*  Z       (output) REAL array, dimension (LDZ, max(1,M))
+*          If JOBZ = 'V', then if INFO = 0, the first M columns of Z
+*          contain the orthonormal eigenvectors of the matrix A
+*          corresponding to the selected eigenvalues, with the i-th
+*          column of Z holding the eigenvector associated with W(i).
+*          If JOBZ = 'N', then Z is not referenced.
+*          Note: the user must ensure that at least max(1,M) columns are
+*          supplied in the array Z; if RANGE = 'V', the exact value of M
+*          is not known in advance and an upper bound must be used.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= 1, and if
+*          JOBZ = 'V', LDZ >= max(1,N).
+*
+*  ISUPPZ  (output) INTEGER array, dimension ( 2*max(1,M) )
+*          The support of the eigenvectors in Z, i.e., the indices
+*          indicating the nonzero elements in Z. The i-th eigenvector
+*          is nonzero only in elements ISUPPZ( 2*i-1 ) through
+*          ISUPPZ( 2*i ).
+********** Implemented only for RANGE = 'A' or 'I' and IU - IL = N - 1
+*
+*  WORK    (workspace/output) REAL array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.  LWORK >= max(1,26*N).
+*          For optimal efficiency, LWORK >= (NB+6)*N,
+*          where NB is the max of the blocksize for SSYTRD and SORMTR
+*          returned by ILAENV.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  IWORK   (workspace/output) INTEGER array, dimension (LIWORK)
+*          On exit, if INFO = 0, IWORK(1) returns the optimal LWORK.
+*
+*  LIWORK  (input) INTEGER
+*          The dimension of the array IWORK.  LIWORK >= max(1,10*N).
+*
+*          If LIWORK = -1, then a workspace query is assumed; the
+*          routine only calculates the optimal size of the IWORK array,
+*          returns this value as the first entry of the IWORK array, and
+*          no error message related to LIWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  Internal error
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Inderjit Dhillon, IBM Almaden, USA
+*     Osni Marques, LBNL/NERSC, USA
+*     Ken Stanley, Computer Science Division, University of
+*       California at Berkeley, USA
+*
+* =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            ALLEIG, INDEIG, LOWER, LQUERY, VALEIG, WANTZ
+      CHARACTER          ORDER
+      INTEGER            I, IEEEOK, IINFO, IMAX, INDD, INDDD, INDE,
+     $                   INDEE, INDIBL, INDIFL, INDISP, INDIWO, INDTAU,
+     $                   INDWK, INDWKN, ISCALE, ITMP1, J, JJ, LIWMIN,
+     $                   LLWORK, LLWRKN, LWKOPT, LWMIN, NB, NSPLIT
+      REAL               ABSTLL, ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN,
+     $                   SIGMA, SMLNUM, TMP1, VLL, VUU
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      REAL               SLAMCH, SLANSY
+      EXTERNAL           LSAME, ILAENV, SLAMCH, SLANSY
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SCOPY, SORMTR, SSCAL, SSTEBZ, SSTEGR, SSTEIN,
+     $                   SSTERF, SSWAP, SSYTRD, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      IEEEOK = ILAENV( 10, 'SSYEVR', 'N', 1, 2, 3, 4 )
+*
+      LOWER = LSAME( UPLO, 'L' )
+      WANTZ = LSAME( JOBZ, 'V' )
+      ALLEIG = LSAME( RANGE, 'A' )
+      VALEIG = LSAME( RANGE, 'V' )
+      INDEIG = LSAME( RANGE, 'I' )
+*
+      LQUERY = ( ( LWORK.EQ.-1 ) .OR. ( LIWORK.EQ.-1 ) )
+*
+      LWMIN = MAX( 1, 26*N )
+      LIWMIN = MAX( 1, 10*N )
+*
+      INFO = 0
+      IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -6
+      ELSE
+         IF( VALEIG ) THEN
+            IF( N.GT.0 .AND. VU.LE.VL )
+     $         INFO = -8
+         ELSE IF( INDEIG ) THEN
+            IF( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) THEN
+               INFO = -9
+            ELSE IF( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN
+               INFO = -10
+            END IF
+         END IF
+      END IF
+      IF( INFO.EQ.0 ) THEN
+         IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN
+            INFO = -15
+         ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN
+            INFO = -18
+         ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN
+            INFO = -20
+         END IF
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         NB = ILAENV( 1, 'CHETRD', UPLO, N, -1, -1, -1 )
+         NB = MAX( NB, ILAENV( 1, 'CUNMTR', UPLO, N, -1, -1, -1 ) )
+         LWKOPT = MAX( ( NB+1 )*N, LWMIN )
+         WORK( 1 ) = LWKOPT
+         IWORK( 1 ) = LIWMIN
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SSYEVR', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      M = 0
+      IF( N.EQ.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      IF( N.EQ.1 ) THEN
+         WORK( 1 ) = 7
+         IF( ALLEIG .OR. INDEIG ) THEN
+            M = 1
+            W( 1 ) = A( 1, 1 )
+         ELSE
+            IF( VL.LT.A( 1, 1 ) .AND. VU.GE.A( 1, 1 ) ) THEN
+               M = 1
+               W( 1 ) = A( 1, 1 )
+            END IF
+         END IF
+         IF( WANTZ )
+     $      Z( 1, 1 ) = ONE
+         RETURN
+      END IF
+*
+*     Get machine constants.
+*
+      SAFMIN = SLAMCH( 'Safe minimum' )
+      EPS = SLAMCH( 'Precision' )
+      SMLNUM = SAFMIN / EPS
+      BIGNUM = ONE / SMLNUM
+      RMIN = SQRT( SMLNUM )
+      RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) )
+*
+*     Scale matrix to allowable range, if necessary.
+*
+      ISCALE = 0
+      ABSTLL = ABSTOL
+      VLL = VL
+      VUU = VU
+      ANRM = SLANSY( 'M', UPLO, N, A, LDA, WORK )
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN
+         ISCALE = 1
+         SIGMA = RMIN / ANRM
+      ELSE IF( ANRM.GT.RMAX ) THEN
+         ISCALE = 1
+         SIGMA = RMAX / ANRM
+      END IF
+      IF( ISCALE.EQ.1 ) THEN
+         IF( LOWER ) THEN
+            DO 10 J = 1, N
+               CALL SSCAL( N-J+1, SIGMA, A( J, J ), 1 )
+   10       CONTINUE
+         ELSE
+            DO 20 J = 1, N
+               CALL SSCAL( J, SIGMA, A( 1, J ), 1 )
+   20       CONTINUE
+         END IF
+         IF( ABSTOL.GT.0 )
+     $      ABSTLL = ABSTOL*SIGMA
+         IF( VALEIG ) THEN
+            VLL = VL*SIGMA
+            VUU = VU*SIGMA
+         END IF
+      END IF
+*
+*     Call SSYTRD to reduce symmetric matrix to tridiagonal form.
+*
+      INDTAU = 1
+      INDE = INDTAU + N
+      INDD = INDE + N
+      INDEE = INDD + N
+      INDDD = INDEE + N
+      INDIFL = INDDD + N
+      INDWK = INDIFL + N
+      LLWORK = LWORK - INDWK + 1
+      CALL SSYTRD( UPLO, N, A, LDA, WORK( INDD ), WORK( INDE ),
+     $             WORK( INDTAU ), WORK( INDWK ), LLWORK, IINFO )
+*
+*     If all eigenvalues are desired
+*     then call SSTERF or SSTEGR and SORMTR.
+*
+      IF( ( ALLEIG .OR. ( INDEIG .AND. IL.EQ.1 .AND. IU.EQ.N ) ) .AND.
+     $    IEEEOK.EQ.1 ) THEN
+         IF( .NOT.WANTZ ) THEN
+            CALL SCOPY( N, WORK( INDD ), 1, W, 1 )
+            CALL SCOPY( N-1, WORK( INDE ), 1, WORK( INDEE ), 1 )
+            CALL SSTERF( N, W, WORK( INDEE ), INFO )
+         ELSE
+            CALL SCOPY( N-1, WORK( INDE ), 1, WORK( INDEE ), 1 )
+            CALL SCOPY( N, WORK( INDD ), 1, WORK( INDDD ), 1 )
+*
+            CALL SSTEGR( JOBZ, 'A', N, WORK( INDDD ), WORK( INDEE ),
+     $                   VL, VU, IL, IU, ABSTOL, M, W, Z, LDZ, ISUPPZ,
+     $                   WORK( INDWK ), LWORK, IWORK, LIWORK, INFO )
+*
+*
+*
+*        Apply orthogonal matrix used in reduction to tridiagonal
+*        form to eigenvectors returned by SSTEIN.
+*
+            IF( WANTZ .AND. INFO.EQ.0 ) THEN
+               INDWKN = INDE
+               LLWRKN = LWORK - INDWKN + 1
+               CALL SORMTR( 'L', UPLO, 'N', N, M, A, LDA,
+     $                      WORK( INDTAU ), Z, LDZ, WORK( INDWKN ),
+     $                      LLWRKN, IINFO )
+            END IF
+         END IF
+*
+*
+         IF( INFO.EQ.0 ) THEN
+            M = N
+            GO TO 30
+         END IF
+         INFO = 0
+      END IF
+*
+*     Otherwise, call SSTEBZ and, if eigenvectors are desired, SSTEIN.
+*     Also call SSTEBZ and SSTEIN if SSTEGR fails.
+*
+      IF( WANTZ ) THEN
+         ORDER = 'B'
+      ELSE
+         ORDER = 'E'
+      END IF
+      INDIFL = 1
+      INDIBL = INDIFL + N
+      INDISP = INDIBL + N
+      INDIWO = INDISP + N
+      CALL SSTEBZ( RANGE, ORDER, N, VLL, VUU, IL, IU, ABSTLL,
+     $             WORK( INDD ), WORK( INDE ), M, NSPLIT, W,
+     $             IWORK( INDIBL ), IWORK( INDISP ), WORK( INDWK ),
+     $             IWORK( INDIWO ), INFO )
+*
+      IF( WANTZ ) THEN
+         CALL SSTEIN( N, WORK( INDD ), WORK( INDE ), M, W,
+     $                IWORK( INDIBL ), IWORK( INDISP ), Z, LDZ,
+     $                WORK( INDWK ), IWORK( INDIWO ), IWORK( INDIFL ),
+     $                INFO )
+*
+*        Apply orthogonal matrix used in reduction to tridiagonal
+*        form to eigenvectors returned by SSTEIN.
+*
+         INDWKN = INDE
+         LLWRKN = LWORK - INDWKN + 1
+         CALL SORMTR( 'L', UPLO, 'N', N, M, A, LDA, WORK( INDTAU ), Z,
+     $                LDZ, WORK( INDWKN ), LLWRKN, IINFO )
+      END IF
+*
+*     If matrix was scaled, then rescale eigenvalues appropriately.
+*
+   30 CONTINUE
+      IF( ISCALE.EQ.1 ) THEN
+         IF( INFO.EQ.0 ) THEN
+            IMAX = M
+         ELSE
+            IMAX = INFO - 1
+         END IF
+         CALL SSCAL( IMAX, ONE / SIGMA, W, 1 )
+      END IF
+*
+*     If eigenvalues are not in order, then sort them, along with
+*     eigenvectors.
+*
+      IF( WANTZ ) THEN
+         DO 50 J = 1, M - 1
+            I = 0
+            TMP1 = W( J )
+            DO 40 JJ = J + 1, M
+               IF( W( JJ ).LT.TMP1 ) THEN
+                  I = JJ
+                  TMP1 = W( JJ )
+               END IF
+   40       CONTINUE
+*
+            IF( I.NE.0 ) THEN
+               ITMP1 = IWORK( INDIBL+I-1 )
+               W( I ) = W( J )
+               IWORK( INDIBL+I-1 ) = IWORK( INDIBL+J-1 )
+               W( J ) = TMP1
+               IWORK( INDIBL+J-1 ) = ITMP1
+               CALL SSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 )
+            END IF
+   50    CONTINUE
+      END IF
+*
+*     Set WORK(1) to optimal workspace size.
+*
+      WORK( 1 ) = LWKOPT
+      IWORK( 1 ) = LIWMIN
+*
+      RETURN
+*
+*     End of SSYEVR
+*
+      END
diff --git a/costa/native/external/lapack/ssyevx.f b/costa/native/external/lapack/ssyevx.f
new file mode 100644
index 000000000..390d06b66
--- /dev/null
+++ b/costa/native/external/lapack/ssyevx.f
@@ -0,0 +1,421 @@
+      SUBROUTINE SSYEVX( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, IL, IU,
+     $                   ABSTOL, M, W, Z, LDZ, WORK, LWORK, IWORK,
+     $                   IFAIL, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBZ, RANGE, UPLO
+      INTEGER            IL, INFO, IU, LDA, LDZ, LWORK, M, N
+      REAL               ABSTOL, VL, VU
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IFAIL( * ), IWORK( * )
+      REAL               A( LDA, * ), W( * ), WORK( * ), Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SSYEVX computes selected eigenvalues and, optionally, eigenvectors
+*  of a real symmetric matrix A.  Eigenvalues and eigenvectors can be
+*  selected by specifying either a range of values or a range of indices
+*  for the desired eigenvalues.
+*
+*  Arguments
+*  =========
+*
+*  JOBZ    (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only;
+*          = 'V':  Compute eigenvalues and eigenvectors.
+*
+*  RANGE   (input) CHARACTER*1
+*          = 'A': all eigenvalues will be found.
+*          = 'V': all eigenvalues in the half-open interval (VL,VU]
+*                 will be found.
+*          = 'I': the IL-th through IU-th eigenvalues will be found.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input/output) REAL array, dimension (LDA, N)
+*          On entry, the symmetric matrix A.  If UPLO = 'U', the
+*          leading N-by-N upper triangular part of A contains the
+*          upper triangular part of the matrix A.  If UPLO = 'L',
+*          the leading N-by-N lower triangular part of A contains
+*          the lower triangular part of the matrix A.
+*          On exit, the lower triangle (if UPLO='L') or the upper
+*          triangle (if UPLO='U') of A, including the diagonal, is
+*          destroyed.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  VL      (input) REAL
+*  VU      (input) REAL
+*          If RANGE='V', the lower and upper bounds of the interval to
+*          be searched for eigenvalues. VL < VU.
+*          Not referenced if RANGE = 'A' or 'I'.
+*
+*  IL      (input) INTEGER
+*  IU      (input) INTEGER
+*          If RANGE='I', the indices (in ascending order) of the
+*          smallest and largest eigenvalues to be returned.
+*          1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0.
+*          Not referenced if RANGE = 'A' or 'V'.
+*
+*  ABSTOL  (input) REAL
+*          The absolute error tolerance for the eigenvalues.
+*          An approximate eigenvalue is accepted as converged
+*          when it is determined to lie in an interval [a,b]
+*          of width less than or equal to
+*
+*                  ABSTOL + EPS *   max( |a|,|b| ) ,
+*
+*          where EPS is the machine precision.  If ABSTOL is less than
+*          or equal to zero, then  EPS*|T|  will be used in its place,
+*          where |T| is the 1-norm of the tridiagonal matrix obtained
+*          by reducing A to tridiagonal form.
+*
+*          Eigenvalues will be computed most accurately when ABSTOL is
+*          set to twice the underflow threshold 2*SLAMCH('S'), not zero.
+*          If this routine returns with INFO>0, indicating that some
+*          eigenvectors did not converge, try setting ABSTOL to
+*          2*SLAMCH('S').
+*
+*          See "Computing Small Singular Values of Bidiagonal Matrices
+*          with Guaranteed High Relative Accuracy," by Demmel and
+*          Kahan, LAPACK Working Note #3.
+*
+*  M       (output) INTEGER
+*          The total number of eigenvalues found.  0 <= M <= N.
+*          If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1.
+*
+*  W       (output) REAL array, dimension (N)
+*          On normal exit, the first M elements contain the selected
+*          eigenvalues in ascending order.
+*
+*  Z       (output) REAL array, dimension (LDZ, max(1,M))
+*          If JOBZ = 'V', then if INFO = 0, the first M columns of Z
+*          contain the orthonormal eigenvectors of the matrix A
+*          corresponding to the selected eigenvalues, with the i-th
+*          column of Z holding the eigenvector associated with W(i).
+*          If an eigenvector fails to converge, then that column of Z
+*          contains the latest approximation to the eigenvector, and the
+*          index of the eigenvector is returned in IFAIL.
+*          If JOBZ = 'N', then Z is not referenced.
+*          Note: the user must ensure that at least max(1,M) columns are
+*          supplied in the array Z; if RANGE = 'V', the exact value of M
+*          is not known in advance and an upper bound must be used.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= 1, and if
+*          JOBZ = 'V', LDZ >= max(1,N).
+*
+*  WORK    (workspace/output) REAL array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The length of the array WORK.  LWORK >= max(1,8*N).
+*          For optimal efficiency, LWORK >= (NB+3)*N,
+*          where NB is the max of the blocksize for SSYTRD and SORMTR
+*          returned by ILAENV.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  IWORK   (workspace) INTEGER array, dimension (5*N)
+*
+*  IFAIL   (output) INTEGER array, dimension (N)
+*          If JOBZ = 'V', then if INFO = 0, the first M elements of
+*          IFAIL are zero.  If INFO > 0, then IFAIL contains the
+*          indices of the eigenvectors that failed to converge.
+*          If JOBZ = 'N', then IFAIL is not referenced.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, then i eigenvectors failed to converge.
+*                Their indices are stored in array IFAIL.
+*
+* =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            ALLEIG, INDEIG, LOWER, LQUERY, VALEIG, WANTZ
+      CHARACTER          ORDER
+      INTEGER            I, IINFO, IMAX, INDD, INDE, INDEE, INDIBL,
+     $                   INDISP, INDIWO, INDTAU, INDWKN, INDWRK, ISCALE,
+     $                   ITMP1, J, JJ, LLWORK, LLWRKN, LOPT, LWKOPT, NB,
+     $                   NSPLIT
+      REAL               ABSTLL, ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN,
+     $                   SIGMA, SMLNUM, TMP1, VLL, VUU
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      REAL               SLAMCH, SLANSY
+      EXTERNAL           LSAME, ILAENV, SLAMCH, SLANSY
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SCOPY, SLACPY, SORGTR, SORMTR, SSCAL, SSTEBZ,
+     $                   SSTEIN, SSTEQR, SSTERF, SSWAP, SSYTRD, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      LOWER = LSAME( UPLO, 'L' )
+      WANTZ = LSAME( JOBZ, 'V' )
+      ALLEIG = LSAME( RANGE, 'A' )
+      VALEIG = LSAME( RANGE, 'V' )
+      INDEIG = LSAME( RANGE, 'I' )
+      LQUERY = ( LWORK.EQ.-1 )
+*
+      INFO = 0
+      IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -6
+      ELSE
+         IF( VALEIG ) THEN
+            IF( N.GT.0 .AND. VU.LE.VL )
+     $         INFO = -8
+         ELSE IF( INDEIG ) THEN
+            IF( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) THEN
+               INFO = -9
+            ELSE IF( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN
+               INFO = -10
+            END IF
+         END IF
+      END IF
+      IF( INFO.EQ.0 ) THEN
+         IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN
+            INFO = -15
+         ELSE IF( LWORK.LT.MAX( 1, 8*N ) .AND. .NOT.LQUERY ) THEN
+            INFO = -17
+         END IF
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         NB = ILAENV( 1, 'SSYTRD', UPLO, N, -1, -1, -1 )
+         NB = MAX( NB, ILAENV( 1, 'SORMTR', UPLO, N, -1, -1, -1 ) )
+         LWKOPT = ( NB+3 )*N
+         WORK( 1 ) = LWKOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SSYEVX', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      M = 0
+      IF( N.EQ.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      IF( N.EQ.1 ) THEN
+         WORK( 1 ) = 7
+         IF( ALLEIG .OR. INDEIG ) THEN
+            M = 1
+            W( 1 ) = A( 1, 1 )
+         ELSE
+            IF( VL.LT.A( 1, 1 ) .AND. VU.GE.A( 1, 1 ) ) THEN
+               M = 1
+               W( 1 ) = A( 1, 1 )
+            END IF
+         END IF
+         IF( WANTZ )
+     $      Z( 1, 1 ) = ONE
+         RETURN
+      END IF
+*
+*     Get machine constants.
+*
+      SAFMIN = SLAMCH( 'Safe minimum' )
+      EPS = SLAMCH( 'Precision' )
+      SMLNUM = SAFMIN / EPS
+      BIGNUM = ONE / SMLNUM
+      RMIN = SQRT( SMLNUM )
+      RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) )
+*
+*     Scale matrix to allowable range, if necessary.
+*
+      ISCALE = 0
+      ABSTLL = ABSTOL
+      VLL = VL
+      VUU = VU
+      ANRM = SLANSY( 'M', UPLO, N, A, LDA, WORK )
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN
+         ISCALE = 1
+         SIGMA = RMIN / ANRM
+      ELSE IF( ANRM.GT.RMAX ) THEN
+         ISCALE = 1
+         SIGMA = RMAX / ANRM
+      END IF
+      IF( ISCALE.EQ.1 ) THEN
+         IF( LOWER ) THEN
+            DO 10 J = 1, N
+               CALL SSCAL( N-J+1, SIGMA, A( J, J ), 1 )
+   10       CONTINUE
+         ELSE
+            DO 20 J = 1, N
+               CALL SSCAL( J, SIGMA, A( 1, J ), 1 )
+   20       CONTINUE
+         END IF
+         IF( ABSTOL.GT.0 )
+     $      ABSTLL = ABSTOL*SIGMA
+         IF( VALEIG ) THEN
+            VLL = VL*SIGMA
+            VUU = VU*SIGMA
+         END IF
+      END IF
+*
+*     Call SSYTRD to reduce symmetric matrix to tridiagonal form.
+*
+      INDTAU = 1
+      INDE = INDTAU + N
+      INDD = INDE + N
+      INDWRK = INDD + N
+      LLWORK = LWORK - INDWRK + 1
+      CALL SSYTRD( UPLO, N, A, LDA, WORK( INDD ), WORK( INDE ),
+     $             WORK( INDTAU ), WORK( INDWRK ), LLWORK, IINFO )
+      LOPT = 3*N + WORK( INDWRK )
+*
+*     If all eigenvalues are desired and ABSTOL is less than or equal to
+*     zero, then call SSTERF or SORGTR and SSTEQR.  If this fails for
+*     some eigenvalue, then try SSTEBZ.
+*
+      IF( ( ALLEIG .OR. ( INDEIG .AND. IL.EQ.1 .AND. IU.EQ.N ) ) .AND.
+     $    ( ABSTOL.LE.ZERO ) ) THEN
+         CALL SCOPY( N, WORK( INDD ), 1, W, 1 )
+         INDEE = INDWRK + 2*N
+         IF( .NOT.WANTZ ) THEN
+            CALL SCOPY( N-1, WORK( INDE ), 1, WORK( INDEE ), 1 )
+            CALL SSTERF( N, W, WORK( INDEE ), INFO )
+         ELSE
+            CALL SLACPY( 'A', N, N, A, LDA, Z, LDZ )
+            CALL SORGTR( UPLO, N, Z, LDZ, WORK( INDTAU ),
+     $                   WORK( INDWRK ), LLWORK, IINFO )
+            CALL SCOPY( N-1, WORK( INDE ), 1, WORK( INDEE ), 1 )
+            CALL SSTEQR( JOBZ, N, W, WORK( INDEE ), Z, LDZ,
+     $                   WORK( INDWRK ), INFO )
+            IF( INFO.EQ.0 ) THEN
+               DO 30 I = 1, N
+                  IFAIL( I ) = 0
+   30          CONTINUE
+            END IF
+         END IF
+         IF( INFO.EQ.0 ) THEN
+            M = N
+            GO TO 40
+         END IF
+         INFO = 0
+      END IF
+*
+*     Otherwise, call SSTEBZ and, if eigenvectors are desired, SSTEIN.
+*
+      IF( WANTZ ) THEN
+         ORDER = 'B'
+      ELSE
+         ORDER = 'E'
+      END IF
+      INDIBL = 1
+      INDISP = INDIBL + N
+      INDIWO = INDISP + N
+      CALL SSTEBZ( RANGE, ORDER, N, VLL, VUU, IL, IU, ABSTLL,
+     $             WORK( INDD ), WORK( INDE ), M, NSPLIT, W,
+     $             IWORK( INDIBL ), IWORK( INDISP ), WORK( INDWRK ),
+     $             IWORK( INDIWO ), INFO )
+*
+      IF( WANTZ ) THEN
+         CALL SSTEIN( N, WORK( INDD ), WORK( INDE ), M, W,
+     $                IWORK( INDIBL ), IWORK( INDISP ), Z, LDZ,
+     $                WORK( INDWRK ), IWORK( INDIWO ), IFAIL, INFO )
+*
+*        Apply orthogonal matrix used in reduction to tridiagonal
+*        form to eigenvectors returned by SSTEIN.
+*
+         INDWKN = INDE
+         LLWRKN = LWORK - INDWKN + 1
+         CALL SORMTR( 'L', UPLO, 'N', N, M, A, LDA, WORK( INDTAU ), Z,
+     $                LDZ, WORK( INDWKN ), LLWRKN, IINFO )
+      END IF
+*
+*     If matrix was scaled, then rescale eigenvalues appropriately.
+*
+   40 CONTINUE
+      IF( ISCALE.EQ.1 ) THEN
+         IF( INFO.EQ.0 ) THEN
+            IMAX = M
+         ELSE
+            IMAX = INFO - 1
+         END IF
+         CALL SSCAL( IMAX, ONE / SIGMA, W, 1 )
+      END IF
+*
+*     If eigenvalues are not in order, then sort them, along with
+*     eigenvectors.
+*
+      IF( WANTZ ) THEN
+         DO 60 J = 1, M - 1
+            I = 0
+            TMP1 = W( J )
+            DO 50 JJ = J + 1, M
+               IF( W( JJ ).LT.TMP1 ) THEN
+                  I = JJ
+                  TMP1 = W( JJ )
+               END IF
+   50       CONTINUE
+*
+            IF( I.NE.0 ) THEN
+               ITMP1 = IWORK( INDIBL+I-1 )
+               W( I ) = W( J )
+               IWORK( INDIBL+I-1 ) = IWORK( INDIBL+J-1 )
+               W( J ) = TMP1
+               IWORK( INDIBL+J-1 ) = ITMP1
+               CALL SSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 )
+               IF( INFO.NE.0 ) THEN
+                  ITMP1 = IFAIL( I )
+                  IFAIL( I ) = IFAIL( J )
+                  IFAIL( J ) = ITMP1
+               END IF
+            END IF
+   60    CONTINUE
+      END IF
+*
+*     Set WORK(1) to optimal workspace size.
+*
+      WORK( 1 ) = LWKOPT
+*
+      RETURN
+*
+*     End of SSYEVX
+*
+      END
diff --git a/costa/native/external/lapack/ssygs2.f b/costa/native/external/lapack/ssygs2.f
new file mode 100644
index 000000000..4c33df116
--- /dev/null
+++ b/costa/native/external/lapack/ssygs2.f
@@ -0,0 +1,212 @@
+      SUBROUTINE SSYGS2( ITYPE, UPLO, N, A, LDA, B, LDB, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, ITYPE, LDA, LDB, N
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SSYGS2 reduces a real symmetric-definite generalized eigenproblem
+*  to standard form.
+*
+*  If ITYPE = 1, the problem is A*x = lambda*B*x,
+*  and A is overwritten by inv(U')*A*inv(U) or inv(L)*A*inv(L')
+*
+*  If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or
+*  B*A*x = lambda*x, and A is overwritten by U*A*U` or L'*A*L.
+*
+*  B must have been previously factorized as U'*U or L*L' by SPOTRF.
+*
+*  Arguments
+*  =========
+*
+*  ITYPE   (input) INTEGER
+*          = 1: compute inv(U')*A*inv(U) or inv(L)*A*inv(L');
+*          = 2 or 3: compute U*A*U' or L'*A*L.
+*
+*  UPLO    (input) CHARACTER
+*          Specifies whether the upper or lower triangular part of the
+*          symmetric matrix A is stored, and how B has been factorized.
+*          = 'U':  Upper triangular
+*          = 'L':  Lower triangular
+*
+*  N       (input) INTEGER
+*          The order of the matrices A and B.  N >= 0.
+*
+*  A       (input/output) REAL array, dimension (LDA,N)
+*          On entry, the symmetric matrix A.  If UPLO = 'U', the leading
+*          n by n upper triangular part of A contains the upper
+*          triangular part of the matrix A, and the strictly lower
+*          triangular part of A is not referenced.  If UPLO = 'L', the
+*          leading n by n lower triangular part of A contains the lower
+*          triangular part of the matrix A, and the strictly upper
+*          triangular part of A is not referenced.
+*
+*          On exit, if INFO = 0, the transformed matrix, stored in the
+*          same format as A.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  B       (input) REAL array, dimension (LDB,N)
+*          The triangular factor from the Cholesky factorization of B,
+*          as returned by SPOTRF.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, HALF
+      PARAMETER          ( ONE = 1.0, HALF = 0.5 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            K
+      REAL               AKK, BKK, CT
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SAXPY, SSCAL, SSYR2, STRMV, STRSV, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( ITYPE.LT.1 .OR. ITYPE.GT.3 ) THEN
+         INFO = -1
+      ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SSYGS2', -INFO )
+         RETURN
+      END IF
+*
+      IF( ITYPE.EQ.1 ) THEN
+         IF( UPPER ) THEN
+*
+*           Compute inv(U')*A*inv(U)
+*
+            DO 10 K = 1, N
+*
+*              Update the upper triangle of A(k:n,k:n)
+*
+               AKK = A( K, K )
+               BKK = B( K, K )
+               AKK = AKK / BKK**2
+               A( K, K ) = AKK
+               IF( K.LT.N ) THEN
+                  CALL SSCAL( N-K, ONE / BKK, A( K, K+1 ), LDA )
+                  CT = -HALF*AKK
+                  CALL SAXPY( N-K, CT, B( K, K+1 ), LDB, A( K, K+1 ),
+     $                        LDA )
+                  CALL SSYR2( UPLO, N-K, -ONE, A( K, K+1 ), LDA,
+     $                        B( K, K+1 ), LDB, A( K+1, K+1 ), LDA )
+                  CALL SAXPY( N-K, CT, B( K, K+1 ), LDB, A( K, K+1 ),
+     $                        LDA )
+                  CALL STRSV( UPLO, 'Transpose', 'Non-unit', N-K,
+     $                        B( K+1, K+1 ), LDB, A( K, K+1 ), LDA )
+               END IF
+   10       CONTINUE
+         ELSE
+*
+*           Compute inv(L)*A*inv(L')
+*
+            DO 20 K = 1, N
+*
+*              Update the lower triangle of A(k:n,k:n)
+*
+               AKK = A( K, K )
+               BKK = B( K, K )
+               AKK = AKK / BKK**2
+               A( K, K ) = AKK
+               IF( K.LT.N ) THEN
+                  CALL SSCAL( N-K, ONE / BKK, A( K+1, K ), 1 )
+                  CT = -HALF*AKK
+                  CALL SAXPY( N-K, CT, B( K+1, K ), 1, A( K+1, K ), 1 )
+                  CALL SSYR2( UPLO, N-K, -ONE, A( K+1, K ), 1,
+     $                        B( K+1, K ), 1, A( K+1, K+1 ), LDA )
+                  CALL SAXPY( N-K, CT, B( K+1, K ), 1, A( K+1, K ), 1 )
+                  CALL STRSV( UPLO, 'No transpose', 'Non-unit', N-K,
+     $                        B( K+1, K+1 ), LDB, A( K+1, K ), 1 )
+               END IF
+   20       CONTINUE
+         END IF
+      ELSE
+         IF( UPPER ) THEN
+*
+*           Compute U*A*U'
+*
+            DO 30 K = 1, N
+*
+*              Update the upper triangle of A(1:k,1:k)
+*
+               AKK = A( K, K )
+               BKK = B( K, K )
+               CALL STRMV( UPLO, 'No transpose', 'Non-unit', K-1, B,
+     $                     LDB, A( 1, K ), 1 )
+               CT = HALF*AKK
+               CALL SAXPY( K-1, CT, B( 1, K ), 1, A( 1, K ), 1 )
+               CALL SSYR2( UPLO, K-1, ONE, A( 1, K ), 1, B( 1, K ), 1,
+     $                     A, LDA )
+               CALL SAXPY( K-1, CT, B( 1, K ), 1, A( 1, K ), 1 )
+               CALL SSCAL( K-1, BKK, A( 1, K ), 1 )
+               A( K, K ) = AKK*BKK**2
+   30       CONTINUE
+         ELSE
+*
+*           Compute L'*A*L
+*
+            DO 40 K = 1, N
+*
+*              Update the lower triangle of A(1:k,1:k)
+*
+               AKK = A( K, K )
+               BKK = B( K, K )
+               CALL STRMV( UPLO, 'Transpose', 'Non-unit', K-1, B, LDB,
+     $                     A( K, 1 ), LDA )
+               CT = HALF*AKK
+               CALL SAXPY( K-1, CT, B( K, 1 ), LDB, A( K, 1 ), LDA )
+               CALL SSYR2( UPLO, K-1, ONE, A( K, 1 ), LDA, B( K, 1 ),
+     $                     LDB, A, LDA )
+               CALL SAXPY( K-1, CT, B( K, 1 ), LDB, A( K, 1 ), LDA )
+               CALL SSCAL( K-1, BKK, A( K, 1 ), LDA )
+               A( K, K ) = AKK*BKK**2
+   40       CONTINUE
+         END IF
+      END IF
+      RETURN
+*
+*     End of SSYGS2
+*
+      END
diff --git a/costa/native/external/lapack/ssygst.f b/costa/native/external/lapack/ssygst.f
new file mode 100644
index 000000000..1151b8945
--- /dev/null
+++ b/costa/native/external/lapack/ssygst.f
@@ -0,0 +1,250 @@
+      SUBROUTINE SSYGST( ITYPE, UPLO, N, A, LDA, B, LDB, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, ITYPE, LDA, LDB, N
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SSYGST reduces a real symmetric-definite generalized eigenproblem
+*  to standard form.
+*
+*  If ITYPE = 1, the problem is A*x = lambda*B*x,
+*  and A is overwritten by inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T)
+*
+*  If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or
+*  B*A*x = lambda*x, and A is overwritten by U*A*U**T or L**T*A*L.
+*
+*  B must have been previously factorized as U**T*U or L*L**T by SPOTRF.
+*
+*  Arguments
+*  =========
+*
+*  ITYPE   (input) INTEGER
+*          = 1: compute inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T);
+*          = 2 or 3: compute U*A*U**T or L**T*A*L.
+*
+*  UPLO    (input) CHARACTER
+*          = 'U':  Upper triangle of A is stored and B is factored as
+*                  U**T*U;
+*          = 'L':  Lower triangle of A is stored and B is factored as
+*                  L*L**T.
+*
+*  N       (input) INTEGER
+*          The order of the matrices A and B.  N >= 0.
+*
+*  A       (input/output) REAL array, dimension (LDA,N)
+*          On entry, the symmetric matrix A.  If UPLO = 'U', the leading
+*          N-by-N upper triangular part of A contains the upper
+*          triangular part of the matrix A, and the strictly lower
+*          triangular part of A is not referenced.  If UPLO = 'L', the
+*          leading N-by-N lower triangular part of A contains the lower
+*          triangular part of the matrix A, and the strictly upper
+*          triangular part of A is not referenced.
+*
+*          On exit, if INFO = 0, the transformed matrix, stored in the
+*          same format as A.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  B       (input) REAL array, dimension (LDB,N)
+*          The triangular factor from the Cholesky factorization of B,
+*          as returned by SPOTRF.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, HALF
+      PARAMETER          ( ONE = 1.0, HALF = 0.5 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            K, KB, NB
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SSYGS2, SSYMM, SSYR2K, STRMM, STRSM, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           LSAME, ILAENV
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( ITYPE.LT.1 .OR. ITYPE.GT.3 ) THEN
+         INFO = -1
+      ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SSYGST', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Determine the block size for this environment.
+*
+      NB = ILAENV( 1, 'SSYGST', UPLO, N, -1, -1, -1 )
+*
+      IF( NB.LE.1 .OR. NB.GE.N ) THEN
+*
+*        Use unblocked code
+*
+         CALL SSYGS2( ITYPE, UPLO, N, A, LDA, B, LDB, INFO )
+      ELSE
+*
+*        Use blocked code
+*
+         IF( ITYPE.EQ.1 ) THEN
+            IF( UPPER ) THEN
+*
+*              Compute inv(U')*A*inv(U)
+*
+               DO 10 K = 1, N, NB
+                  KB = MIN( N-K+1, NB )
+*
+*                 Update the upper triangle of A(k:n,k:n)
+*
+                  CALL SSYGS2( ITYPE, UPLO, KB, A( K, K ), LDA,
+     $                         B( K, K ), LDB, INFO )
+                  IF( K+KB.LE.N ) THEN
+                     CALL STRSM( 'Left', UPLO, 'Transpose', 'Non-unit',
+     $                           KB, N-K-KB+1, ONE, B( K, K ), LDB,
+     $                           A( K, K+KB ), LDA )
+                     CALL SSYMM( 'Left', UPLO, KB, N-K-KB+1, -HALF,
+     $                           A( K, K ), LDA, B( K, K+KB ), LDB, ONE,
+     $                           A( K, K+KB ), LDA )
+                     CALL SSYR2K( UPLO, 'Transpose', N-K-KB+1, KB, -ONE,
+     $                            A( K, K+KB ), LDA, B( K, K+KB ), LDB,
+     $                            ONE, A( K+KB, K+KB ), LDA )
+                     CALL SSYMM( 'Left', UPLO, KB, N-K-KB+1, -HALF,
+     $                           A( K, K ), LDA, B( K, K+KB ), LDB, ONE,
+     $                           A( K, K+KB ), LDA )
+                     CALL STRSM( 'Right', UPLO, 'No transpose',
+     $                           'Non-unit', KB, N-K-KB+1, ONE,
+     $                           B( K+KB, K+KB ), LDB, A( K, K+KB ),
+     $                           LDA )
+                  END IF
+   10          CONTINUE
+            ELSE
+*
+*              Compute inv(L)*A*inv(L')
+*
+               DO 20 K = 1, N, NB
+                  KB = MIN( N-K+1, NB )
+*
+*                 Update the lower triangle of A(k:n,k:n)
+*
+                  CALL SSYGS2( ITYPE, UPLO, KB, A( K, K ), LDA,
+     $                         B( K, K ), LDB, INFO )
+                  IF( K+KB.LE.N ) THEN
+                     CALL STRSM( 'Right', UPLO, 'Transpose', 'Non-unit',
+     $                           N-K-KB+1, KB, ONE, B( K, K ), LDB,
+     $                           A( K+KB, K ), LDA )
+                     CALL SSYMM( 'Right', UPLO, N-K-KB+1, KB, -HALF,
+     $                           A( K, K ), LDA, B( K+KB, K ), LDB, ONE,
+     $                           A( K+KB, K ), LDA )
+                     CALL SSYR2K( UPLO, 'No transpose', N-K-KB+1, KB,
+     $                            -ONE, A( K+KB, K ), LDA, B( K+KB, K ),
+     $                            LDB, ONE, A( K+KB, K+KB ), LDA )
+                     CALL SSYMM( 'Right', UPLO, N-K-KB+1, KB, -HALF,
+     $                           A( K, K ), LDA, B( K+KB, K ), LDB, ONE,
+     $                           A( K+KB, K ), LDA )
+                     CALL STRSM( 'Left', UPLO, 'No transpose',
+     $                           'Non-unit', N-K-KB+1, KB, ONE,
+     $                           B( K+KB, K+KB ), LDB, A( K+KB, K ),
+     $                           LDA )
+                  END IF
+   20          CONTINUE
+            END IF
+         ELSE
+            IF( UPPER ) THEN
+*
+*              Compute U*A*U'
+*
+               DO 30 K = 1, N, NB
+                  KB = MIN( N-K+1, NB )
+*
+*                 Update the upper triangle of A(1:k+kb-1,1:k+kb-1)
+*
+                  CALL STRMM( 'Left', UPLO, 'No transpose', 'Non-unit',
+     $                        K-1, KB, ONE, B, LDB, A( 1, K ), LDA )
+                  CALL SSYMM( 'Right', UPLO, K-1, KB, HALF, A( K, K ),
+     $                        LDA, B( 1, K ), LDB, ONE, A( 1, K ), LDA )
+                  CALL SSYR2K( UPLO, 'No transpose', K-1, KB, ONE,
+     $                         A( 1, K ), LDA, B( 1, K ), LDB, ONE, A,
+     $                         LDA )
+                  CALL SSYMM( 'Right', UPLO, K-1, KB, HALF, A( K, K ),
+     $                        LDA, B( 1, K ), LDB, ONE, A( 1, K ), LDA )
+                  CALL STRMM( 'Right', UPLO, 'Transpose', 'Non-unit',
+     $                        K-1, KB, ONE, B( K, K ), LDB, A( 1, K ),
+     $                        LDA )
+                  CALL SSYGS2( ITYPE, UPLO, KB, A( K, K ), LDA,
+     $                         B( K, K ), LDB, INFO )
+   30          CONTINUE
+            ELSE
+*
+*              Compute L'*A*L
+*
+               DO 40 K = 1, N, NB
+                  KB = MIN( N-K+1, NB )
+*
+*                 Update the lower triangle of A(1:k+kb-1,1:k+kb-1)
+*
+                  CALL STRMM( 'Right', UPLO, 'No transpose', 'Non-unit',
+     $                        KB, K-1, ONE, B, LDB, A( K, 1 ), LDA )
+                  CALL SSYMM( 'Left', UPLO, KB, K-1, HALF, A( K, K ),
+     $                        LDA, B( K, 1 ), LDB, ONE, A( K, 1 ), LDA )
+                  CALL SSYR2K( UPLO, 'Transpose', K-1, KB, ONE,
+     $                         A( K, 1 ), LDA, B( K, 1 ), LDB, ONE, A,
+     $                         LDA )
+                  CALL SSYMM( 'Left', UPLO, KB, K-1, HALF, A( K, K ),
+     $                        LDA, B( K, 1 ), LDB, ONE, A( K, 1 ), LDA )
+                  CALL STRMM( 'Left', UPLO, 'Transpose', 'Non-unit', KB,
+     $                        K-1, ONE, B( K, K ), LDB, A( K, 1 ), LDA )
+                  CALL SSYGS2( ITYPE, UPLO, KB, A( K, K ), LDA,
+     $                         B( K, K ), LDB, INFO )
+   40          CONTINUE
+            END IF
+         END IF
+      END IF
+      RETURN
+*
+*     End of SSYGST
+*
+      END
diff --git a/costa/native/external/lapack/ssygv.f b/costa/native/external/lapack/ssygv.f
new file mode 100644
index 000000000..444d8bca7
--- /dev/null
+++ b/costa/native/external/lapack/ssygv.f
@@ -0,0 +1,227 @@
+      SUBROUTINE SSYGV( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, WORK,
+     $                  LWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBZ, UPLO
+      INTEGER            INFO, ITYPE, LDA, LDB, LWORK, N
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), B( LDB, * ), W( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SSYGV computes all the eigenvalues, and optionally, the eigenvectors
+*  of a real generalized symmetric-definite eigenproblem, of the form
+*  A*x=(lambda)*B*x,  A*Bx=(lambda)*x,  or B*A*x=(lambda)*x.
+*  Here A and B are assumed to be symmetric and B is also
+*  positive definite.
+*
+*  Arguments
+*  =========
+*
+*  ITYPE   (input) INTEGER
+*          Specifies the problem type to be solved:
+*          = 1:  A*x = (lambda)*B*x
+*          = 2:  A*B*x = (lambda)*x
+*          = 3:  B*A*x = (lambda)*x
+*
+*  JOBZ    (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only;
+*          = 'V':  Compute eigenvalues and eigenvectors.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangles of A and B are stored;
+*          = 'L':  Lower triangles of A and B are stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrices A and B.  N >= 0.
+*
+*  A       (input/output) REAL array, dimension (LDA, N)
+*          On entry, the symmetric matrix A.  If UPLO = 'U', the
+*          leading N-by-N upper triangular part of A contains the
+*          upper triangular part of the matrix A.  If UPLO = 'L',
+*          the leading N-by-N lower triangular part of A contains
+*          the lower triangular part of the matrix A.
+*
+*          On exit, if JOBZ = 'V', then if INFO = 0, A contains the
+*          matrix Z of eigenvectors.  The eigenvectors are normalized
+*          as follows:
+*          if ITYPE = 1 or 2, Z**T*B*Z = I;
+*          if ITYPE = 3, Z**T*inv(B)*Z = I.
+*          If JOBZ = 'N', then on exit the upper triangle (if UPLO='U')
+*          or the lower triangle (if UPLO='L') of A, including the
+*          diagonal, is destroyed.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  B       (input/output) REAL array, dimension (LDB, N)
+*          On entry, the symmetric positive definite matrix B.
+*          If UPLO = 'U', the leading N-by-N upper triangular part of B
+*          contains the upper triangular part of the matrix B.
+*          If UPLO = 'L', the leading N-by-N lower triangular part of B
+*          contains the lower triangular part of the matrix B.
+*
+*          On exit, if INFO <= N, the part of B containing the matrix is
+*          overwritten by the triangular factor U or L from the Cholesky
+*          factorization B = U**T*U or B = L*L**T.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  W       (output) REAL array, dimension (N)
+*          If INFO = 0, the eigenvalues in ascending order.
+*
+*  WORK    (workspace/output) REAL array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The length of the array WORK.  LWORK >= max(1,3*N-1).
+*          For optimal efficiency, LWORK >= (NB+2)*N,
+*          where NB is the blocksize for SSYTRD returned by ILAENV.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  SPOTRF or SSYEV returned an error code:
+*             <= N:  if INFO = i, SSYEV failed to converge;
+*                    i off-diagonal elements of an intermediate
+*                    tridiagonal form did not converge to zero;
+*             > N:   if INFO = N + i, for 1 <= i <= N, then the leading
+*                    minor of order i of B is not positive definite.
+*                    The factorization of B could not be completed and
+*                    no eigenvalues or eigenvectors were computed.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE
+      PARAMETER          ( ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY, UPPER, WANTZ
+      CHARACTER          TRANS
+      INTEGER            LWKOPT, NB, NEIG
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV, LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SPOTRF, SSYEV, SSYGST, STRMM, STRSM, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      WANTZ = LSAME( JOBZ, 'V' )
+      UPPER = LSAME( UPLO, 'U' )
+      LQUERY = ( LWORK.EQ.-1 )
+*
+      INFO = 0
+      IF( ITYPE.LT.1 .OR. ITYPE.GT.3 ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -6
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -8
+      ELSE IF( LWORK.LT.MAX( 1, 3*N-1 ) .AND. .NOT.LQUERY ) THEN
+         INFO = -11
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         NB = ILAENV( 1, 'SSYTRD', UPLO, N, -1, -1, -1 )
+         LWKOPT = ( NB+2 )*N
+         WORK( 1 ) = LWKOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SSYGV ', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Form a Cholesky factorization of B.
+*
+      CALL SPOTRF( UPLO, N, B, LDB, INFO )
+      IF( INFO.NE.0 ) THEN
+         INFO = N + INFO
+         RETURN
+      END IF
+*
+*     Transform problem to standard eigenvalue problem and solve.
+*
+      CALL SSYGST( ITYPE, UPLO, N, A, LDA, B, LDB, INFO )
+      CALL SSYEV( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, INFO )
+*
+      IF( WANTZ ) THEN
+*
+*        Backtransform eigenvectors to the original problem.
+*
+         NEIG = N
+         IF( INFO.GT.0 )
+     $      NEIG = INFO - 1
+         IF( ITYPE.EQ.1 .OR. ITYPE.EQ.2 ) THEN
+*
+*           For A*x=(lambda)*B*x and A*B*x=(lambda)*x;
+*           backtransform eigenvectors: x = inv(L)'*y or inv(U)*y
+*
+            IF( UPPER ) THEN
+               TRANS = 'N'
+            ELSE
+               TRANS = 'T'
+            END IF
+*
+            CALL STRSM( 'Left', UPLO, TRANS, 'Non-unit', N, NEIG, ONE,
+     $                  B, LDB, A, LDA )
+*
+         ELSE IF( ITYPE.EQ.3 ) THEN
+*
+*           For B*A*x=(lambda)*x;
+*           backtransform eigenvectors: x = L*y or U'*y
+*
+            IF( UPPER ) THEN
+               TRANS = 'T'
+            ELSE
+               TRANS = 'N'
+            END IF
+*
+            CALL STRMM( 'Left', UPLO, TRANS, 'Non-unit', N, NEIG, ONE,
+     $                  B, LDB, A, LDA )
+         END IF
+      END IF
+*
+      WORK( 1 ) = LWKOPT
+      RETURN
+*
+*     End of SSYGV
+*
+      END
diff --git a/costa/native/external/lapack/ssygvd.f b/costa/native/external/lapack/ssygvd.f
new file mode 100644
index 000000000..5ca95032f
--- /dev/null
+++ b/costa/native/external/lapack/ssygvd.f
@@ -0,0 +1,277 @@
+      SUBROUTINE SSYGVD( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, WORK,
+     $                   LWORK, IWORK, LIWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBZ, UPLO
+      INTEGER            INFO, ITYPE, LDA, LDB, LIWORK, LWORK, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      REAL               A( LDA, * ), B( LDB, * ), W( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SSYGVD computes all the eigenvalues, and optionally, the eigenvectors
+*  of a real generalized symmetric-definite eigenproblem, of the form
+*  A*x=(lambda)*B*x,  A*Bx=(lambda)*x,  or B*A*x=(lambda)*x.  Here A and
+*  B are assumed to be symmetric and B is also positive definite.
+*  If eigenvectors are desired, it uses a divide and conquer algorithm.
+*
+*  The divide and conquer algorithm makes very mild assumptions about
+*  floating point arithmetic. It will work on machines with a guard
+*  digit in add/subtract, or on those binary machines without guard
+*  digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
+*  Cray-2. It could conceivably fail on hexadecimal or decimal machines
+*  without guard digits, but we know of none.
+*
+*  Arguments
+*  =========
+*
+*  ITYPE   (input) INTEGER
+*          Specifies the problem type to be solved:
+*          = 1:  A*x = (lambda)*B*x
+*          = 2:  A*B*x = (lambda)*x
+*          = 3:  B*A*x = (lambda)*x
+*
+*  JOBZ    (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only;
+*          = 'V':  Compute eigenvalues and eigenvectors.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangles of A and B are stored;
+*          = 'L':  Lower triangles of A and B are stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrices A and B.  N >= 0.
+*
+*  A       (input/output) REAL array, dimension (LDA, N)
+*          On entry, the symmetric matrix A.  If UPLO = 'U', the
+*          leading N-by-N upper triangular part of A contains the
+*          upper triangular part of the matrix A.  If UPLO = 'L',
+*          the leading N-by-N lower triangular part of A contains
+*          the lower triangular part of the matrix A.
+*
+*          On exit, if JOBZ = 'V', then if INFO = 0, A contains the
+*          matrix Z of eigenvectors.  The eigenvectors are normalized
+*          as follows:
+*          if ITYPE = 1 or 2, Z**T*B*Z = I;
+*          if ITYPE = 3, Z**T*inv(B)*Z = I.
+*          If JOBZ = 'N', then on exit the upper triangle (if UPLO='U')
+*          or the lower triangle (if UPLO='L') of A, including the
+*          diagonal, is destroyed.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  B       (input/output) REAL array, dimension (LDB, N)
+*          On entry, the symmetric matrix B.  If UPLO = 'U', the
+*          leading N-by-N upper triangular part of B contains the
+*          upper triangular part of the matrix B.  If UPLO = 'L',
+*          the leading N-by-N lower triangular part of B contains
+*          the lower triangular part of the matrix B.
+*
+*          On exit, if INFO <= N, the part of B containing the matrix is
+*          overwritten by the triangular factor U or L from the Cholesky
+*          factorization B = U**T*U or B = L*L**T.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  W       (output) REAL array, dimension (N)
+*          If INFO = 0, the eigenvalues in ascending order.
+*
+*  WORK    (workspace/output) REAL array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.
+*          If N <= 1,               LWORK >= 1.
+*          If JOBZ = 'N' and N > 1, LWORK >= 2*N+1.
+*          If JOBZ = 'V' and N > 1, LWORK >= 1 + 6*N + 2*N**2.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  IWORK   (workspace/output) INTEGER array, dimension (LIWORK)
+*          On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK.
+*
+*  LIWORK  (input) INTEGER
+*          The dimension of the array IWORK.
+*          If N <= 1,                LIWORK >= 1.
+*          If JOBZ  = 'N' and N > 1, LIWORK >= 1.
+*          If JOBZ  = 'V' and N > 1, LIWORK >= 3 + 5*N.
+*
+*          If LIWORK = -1, then a workspace query is assumed; the
+*          routine only calculates the optimal size of the IWORK array,
+*          returns this value as the first entry of the IWORK array, and
+*          no error message related to LIWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  SPOTRF or SSYEVD returned an error code:
+*             <= N:  if INFO = i, SSYEVD failed to converge;
+*                    i off-diagonal elements of an intermediate
+*                    tridiagonal form did not converge to zero;
+*             > N:   if INFO = N + i, for 1 <= i <= N, then the leading
+*                    minor of order i of B is not positive definite.
+*                    The factorization of B could not be completed and
+*                    no eigenvalues or eigenvectors were computed.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE
+      PARAMETER          ( ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY, UPPER, WANTZ
+      CHARACTER          TRANS
+      INTEGER            LIOPT, LIWMIN, LOPT, LWMIN, NEIG
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SPOTRF, SSYEVD, SSYGST, STRMM, STRSM, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, REAL
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      WANTZ = LSAME( JOBZ, 'V' )
+      UPPER = LSAME( UPLO, 'U' )
+      LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 )
+*
+      INFO = 0
+      IF( N.LE.1 ) THEN
+         LIWMIN = 1
+         LWMIN = 1
+         LOPT = LWMIN
+         LIOPT = LIWMIN
+      ELSE
+         IF( WANTZ ) THEN
+            LIWMIN = 3 + 5*N
+            LWMIN = 1 + 6*N + 2*N**2
+         ELSE
+            LIWMIN = 1
+            LWMIN = 2*N + 1
+         END IF
+         LOPT = LWMIN
+         LIOPT = LIWMIN
+      END IF
+      IF( ITYPE.LT.0 .OR. ITYPE.GT.3 ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -6
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -8
+      ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -11
+      ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -13
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         WORK( 1 ) = LOPT
+         IWORK( 1 ) = LIOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SSYGVD', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Form a Cholesky factorization of B.
+*
+      CALL SPOTRF( UPLO, N, B, LDB, INFO )
+      IF( INFO.NE.0 ) THEN
+         INFO = N + INFO
+         RETURN
+      END IF
+*
+*     Transform problem to standard eigenvalue problem and solve.
+*
+      CALL SSYGST( ITYPE, UPLO, N, A, LDA, B, LDB, INFO )
+      CALL SSYEVD( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, IWORK, LIWORK,
+     $             INFO )
+      LOPT = MAX( REAL( LOPT ), REAL( WORK( 1 ) ) )
+      LIOPT = MAX( REAL( LIOPT ), REAL( IWORK( 1 ) ) )
+*
+      IF( WANTZ ) THEN
+*
+*        Backtransform eigenvectors to the original problem.
+*
+         NEIG = N
+         IF( INFO.GT.0 )
+     $      NEIG = INFO - 1
+         IF( ITYPE.EQ.1 .OR. ITYPE.EQ.2 ) THEN
+*
+*           For A*x=(lambda)*B*x and A*B*x=(lambda)*x;
+*           backtransform eigenvectors: x = inv(L)'*y or inv(U)*y
+*
+            IF( UPPER ) THEN
+               TRANS = 'N'
+            ELSE
+               TRANS = 'T'
+            END IF
+*
+            CALL STRSM( 'Left', UPLO, TRANS, 'Non-unit', N, NEIG, ONE,
+     $                  B, LDB, A, LDA )
+*
+         ELSE IF( ITYPE.EQ.3 ) THEN
+*
+*           For B*A*x=(lambda)*x;
+*           backtransform eigenvectors: x = L*y or U'*y
+*
+            IF( UPPER ) THEN
+               TRANS = 'T'
+            ELSE
+               TRANS = 'N'
+            END IF
+*
+            CALL STRMM( 'Left', UPLO, TRANS, 'Non-unit', N, NEIG, ONE,
+     $                  B, LDB, A, LDA )
+         END IF
+      END IF
+*
+      WORK( 1 ) = LOPT
+      IWORK( 1 ) = LIOPT
+*
+      RETURN
+*
+*     End of SSYGVD
+*
+      END
diff --git a/costa/native/external/lapack/ssygvx.f b/costa/native/external/lapack/ssygvx.f
new file mode 100644
index 000000000..97f8bf503
--- /dev/null
+++ b/costa/native/external/lapack/ssygvx.f
@@ -0,0 +1,325 @@
+      SUBROUTINE SSYGVX( ITYPE, JOBZ, RANGE, UPLO, N, A, LDA, B, LDB,
+     $                   VL, VU, IL, IU, ABSTOL, M, W, Z, LDZ, WORK,
+     $                   LWORK, IWORK, IFAIL, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBZ, RANGE, UPLO
+      INTEGER            IL, INFO, ITYPE, IU, LDA, LDB, LDZ, LWORK, M, N
+      REAL               ABSTOL, VL, VU
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IFAIL( * ), IWORK( * )
+      REAL               A( LDA, * ), B( LDB, * ), W( * ), WORK( * ),
+     $                   Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SSYGVX computes selected eigenvalues, and optionally, eigenvectors
+*  of a real generalized symmetric-definite eigenproblem, of the form
+*  A*x=(lambda)*B*x,  A*Bx=(lambda)*x,  or B*A*x=(lambda)*x.  Here A
+*  and B are assumed to be symmetric and B is also positive definite.
+*  Eigenvalues and eigenvectors can be selected by specifying either a
+*  range of values or a range of indices for the desired eigenvalues.
+*
+*  Arguments
+*  =========
+*
+*  ITYPE   (input) INTEGER
+*          Specifies the problem type to be solved:
+*          = 1:  A*x = (lambda)*B*x
+*          = 2:  A*B*x = (lambda)*x
+*          = 3:  B*A*x = (lambda)*x
+*
+*  JOBZ    (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only;
+*          = 'V':  Compute eigenvalues and eigenvectors.
+*
+*  RANGE   (input) CHARACTER*1
+*          = 'A': all eigenvalues will be found.
+*          = 'V': all eigenvalues in the half-open interval (VL,VU]
+*                 will be found.
+*          = 'I': the IL-th through IU-th eigenvalues will be found.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A and B are stored;
+*          = 'L':  Lower triangle of A and B are stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix pencil (A,B).  N >= 0.
+*
+*  A       (input/output) REAL array, dimension (LDA, N)
+*          On entry, the symmetric matrix A.  If UPLO = 'U', the
+*          leading N-by-N upper triangular part of A contains the
+*          upper triangular part of the matrix A.  If UPLO = 'L',
+*          the leading N-by-N lower triangular part of A contains
+*          the lower triangular part of the matrix A.
+*
+*          On exit, the lower triangle (if UPLO='L') or the upper
+*          triangle (if UPLO='U') of A, including the diagonal, is
+*          destroyed.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  B       (input/output) REAL array, dimension (LDA, N)
+*          On entry, the symmetric matrix B.  If UPLO = 'U', the
+*          leading N-by-N upper triangular part of B contains the
+*          upper triangular part of the matrix B.  If UPLO = 'L',
+*          the leading N-by-N lower triangular part of B contains
+*          the lower triangular part of the matrix B.
+*
+*          On exit, if INFO <= N, the part of B containing the matrix is
+*          overwritten by the triangular factor U or L from the Cholesky
+*          factorization B = U**T*U or B = L*L**T.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  VL      (input) REAL
+*  VU      (input) REAL
+*          If RANGE='V', the lower and upper bounds of the interval to
+*          be searched for eigenvalues. VL < VU.
+*          Not referenced if RANGE = 'A' or 'I'.
+*
+*  IL      (input) INTEGER
+*  IU      (input) INTEGER
+*          If RANGE='I', the indices (in ascending order) of the
+*          smallest and largest eigenvalues to be returned.
+*          1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0.
+*          Not referenced if RANGE = 'A' or 'V'.
+*
+*  ABSTOL  (input) REAL
+*          The absolute error tolerance for the eigenvalues.
+*          An approximate eigenvalue is accepted as converged
+*          when it is determined to lie in an interval [a,b]
+*          of width less than or equal to
+*
+*                  ABSTOL + EPS *   max( |a|,|b| ) ,
+*
+*          where EPS is the machine precision.  If ABSTOL is less than
+*          or equal to zero, then  EPS*|T|  will be used in its place,
+*          where |T| is the 1-norm of the tridiagonal matrix obtained
+*          by reducing A to tridiagonal form.
+*
+*          Eigenvalues will be computed most accurately when ABSTOL is
+*          set to twice the underflow threshold 2*DLAMCH('S'), not zero.
+*          If this routine returns with INFO>0, indicating that some
+*          eigenvectors did not converge, try setting ABSTOL to
+*          2*SLAMCH('S').
+*
+*  M       (output) INTEGER
+*          The total number of eigenvalues found.  0 <= M <= N.
+*          If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1.
+*
+*  W       (output) REAL array, dimension (N)
+*          On normal exit, the first M elements contain the selected
+*          eigenvalues in ascending order.
+*
+*  Z       (output) REAL array, dimension (LDZ, max(1,M))
+*          If JOBZ = 'N', then Z is not referenced.
+*          If JOBZ = 'V', then if INFO = 0, the first M columns of Z
+*          contain the orthonormal eigenvectors of the matrix A
+*          corresponding to the selected eigenvalues, with the i-th
+*          column of Z holding the eigenvector associated with W(i).
+*          The eigenvectors are normalized as follows:
+*          if ITYPE = 1 or 2, Z**T*B*Z = I;
+*          if ITYPE = 3, Z**T*inv(B)*Z = I.
+*
+*          If an eigenvector fails to converge, then that column of Z
+*          contains the latest approximation to the eigenvector, and the
+*          index of the eigenvector is returned in IFAIL.
+*          Note: the user must ensure that at least max(1,M) columns are
+*          supplied in the array Z; if RANGE = 'V', the exact value of M
+*          is not known in advance and an upper bound must be used.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= 1, and if
+*          JOBZ = 'V', LDZ >= max(1,N).
+*
+*  WORK    (workspace/output) REAL array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The length of the array WORK.  LWORK >= max(1,8*N).
+*          For optimal efficiency, LWORK >= (NB+3)*N,
+*          where NB is the blocksize for SSYTRD returned by ILAENV.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  IWORK   (workspace) INTEGER array, dimension (5*N)
+*
+*  IFAIL   (output) INTEGER array, dimension (N)
+*          If JOBZ = 'V', then if INFO = 0, the first M elements of
+*          IFAIL are zero.  If INFO > 0, then IFAIL contains the
+*          indices of the eigenvectors that failed to converge.
+*          If JOBZ = 'N', then IFAIL is not referenced.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  SPOTRF or SSYEVX returned an error code:
+*             <= N:  if INFO = i, SSYEVX failed to converge;
+*                    i eigenvectors failed to converge.  Their indices
+*                    are stored in array IFAIL.
+*             > N:   if INFO = N + i, for 1 <= i <= N, then the leading
+*                    minor of order i of B is not positive definite.
+*                    The factorization of B could not be completed and
+*                    no eigenvalues or eigenvectors were computed.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA
+*
+* =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE
+      PARAMETER          ( ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            ALLEIG, INDEIG, LQUERY, UPPER, VALEIG, WANTZ
+      CHARACTER          TRANS
+      INTEGER            LOPT, LWKOPT, NB
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV, LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SPOTRF, SSYEVX, SSYGST, STRMM, STRSM, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      UPPER = LSAME( UPLO, 'U' )
+      WANTZ = LSAME( JOBZ, 'V' )
+      ALLEIG = LSAME( RANGE, 'A' )
+      VALEIG = LSAME( RANGE, 'V' )
+      INDEIG = LSAME( RANGE, 'I' )
+      LQUERY = ( LWORK.EQ.-1 )
+*
+      INFO = 0
+      IF( ITYPE.LT.0 .OR. ITYPE.GT.3 ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN
+         INFO = -3
+      ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN
+         INFO = -4
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -9
+      ELSE IF( VALEIG .AND. N.GT.0 ) THEN
+         IF( VU.LE.VL ) INFO = -11
+      ELSE IF( INDEIG .AND. IL.LT.1 ) THEN
+         INFO = -12
+      ELSE IF( INDEIG .AND. ( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) ) THEN
+         INFO = -13
+      ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN
+         INFO = -18
+      ELSE IF( LWORK.LT.MAX( 1, 8*N ) .AND. .NOT.LQUERY ) THEN
+         INFO = -20
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         NB = ILAENV( 1, 'SSYTRD', UPLO, N, -1, -1, -1 )
+         LWKOPT = ( NB+3 )*N
+         WORK( 1 ) = LWKOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SSYGVX', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      M = 0
+      IF( N.EQ.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+*     Form a Cholesky factorization of B.
+*
+      CALL SPOTRF( UPLO, N, B, LDB, INFO )
+      IF( INFO.NE.0 ) THEN
+         INFO = N + INFO
+         RETURN
+      END IF
+*
+*     Transform problem to standard eigenvalue problem and solve.
+*
+      CALL SSYGST( ITYPE, UPLO, N, A, LDA, B, LDB, INFO )
+      CALL SSYEVX( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, IL, IU, ABSTOL,
+     $             M, W, Z, LDZ, WORK, LWORK, IWORK, IFAIL, INFO )
+      LOPT = WORK( 1 )
+*
+      IF( WANTZ ) THEN
+*
+*        Backtransform eigenvectors to the original problem.
+*
+         IF( INFO.GT.0 )
+     $      M = INFO - 1
+         IF( ITYPE.EQ.1 .OR. ITYPE.EQ.2 ) THEN
+*
+*           For A*x=(lambda)*B*x and A*B*x=(lambda)*x;
+*           backtransform eigenvectors: x = inv(L)'*y or inv(U)*y
+*
+            IF( UPPER ) THEN
+               TRANS = 'N'
+            ELSE
+               TRANS = 'T'
+            END IF
+*
+            CALL STRSM( 'Left', UPLO, TRANS, 'Non-unit', N, M, ONE, B,
+     $                  LDB, Z, LDZ )
+*
+         ELSE IF( ITYPE.EQ.3 ) THEN
+*
+*           For B*A*x=(lambda)*x;
+*           backtransform eigenvectors: x = L*y or U'*y
+*
+            IF( UPPER ) THEN
+               TRANS = 'T'
+            ELSE
+               TRANS = 'N'
+            END IF
+*
+            CALL STRMM( 'Left', UPLO, TRANS, 'Non-unit', N, M, ONE, B,
+     $                  LDB, Z, LDZ )
+         END IF
+      END IF
+*
+*     Set WORK(1) to optimal workspace size.
+*
+      WORK( 1 ) = LWKOPT
+*
+      RETURN
+*
+*     End of SSYGVX
+*
+      END
diff --git a/costa/native/external/lapack/ssyrfs.f b/costa/native/external/lapack/ssyrfs.f
new file mode 100644
index 000000000..77572f0ad
--- /dev/null
+++ b/costa/native/external/lapack/ssyrfs.f
@@ -0,0 +1,335 @@
+      SUBROUTINE SSYRFS( UPLO, N, NRHS, A, LDA, AF, LDAF, IPIV, B, LDB,
+     $                   X, LDX, FERR, BERR, WORK, IWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, LDAF, LDB, LDX, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * ), IWORK( * )
+      REAL               A( LDA, * ), AF( LDAF, * ), B( LDB, * ),
+     $                   BERR( * ), FERR( * ), WORK( * ), X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SSYRFS improves the computed solution to a system of linear
+*  equations when the coefficient matrix is symmetric indefinite, and
+*  provides error bounds and backward error estimates for the solution.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrices B and X.  NRHS >= 0.
+*
+*  A       (input) REAL array, dimension (LDA,N)
+*          The symmetric matrix A.  If UPLO = 'U', the leading N-by-N
+*          upper triangular part of A contains the upper triangular part
+*          of the matrix A, and the strictly lower triangular part of A
+*          is not referenced.  If UPLO = 'L', the leading N-by-N lower
+*          triangular part of A contains the lower triangular part of
+*          the matrix A, and the strictly upper triangular part of A is
+*          not referenced.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  AF      (input) REAL array, dimension (LDAF,N)
+*          The factored form of the matrix A.  AF contains the block
+*          diagonal matrix D and the multipliers used to obtain the
+*          factor U or L from the factorization A = U*D*U**T or
+*          A = L*D*L**T as computed by SSYTRF.
+*
+*  LDAF    (input) INTEGER
+*          The leading dimension of the array AF.  LDAF >= max(1,N).
+*
+*  IPIV    (input) INTEGER array, dimension (N)
+*          Details of the interchanges and the block structure of D
+*          as determined by SSYTRF.
+*
+*  B       (input) REAL array, dimension (LDB,NRHS)
+*          The right hand side matrix B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  X       (input/output) REAL array, dimension (LDX,NRHS)
+*          On entry, the solution matrix X, as computed by SSYTRS.
+*          On exit, the improved solution matrix X.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(1,N).
+*
+*  FERR    (output) REAL array, dimension (NRHS)
+*          The estimated forward error bound for each solution vector
+*          X(j) (the j-th column of the solution matrix X).
+*          If XTRUE is the true solution corresponding to X(j), FERR(j)
+*          is an estimated upper bound for the magnitude of the largest
+*          element in (X(j) - XTRUE) divided by the magnitude of the
+*          largest element in X(j).  The estimate is as reliable as
+*          the estimate for RCOND, and is almost always a slight
+*          overestimate of the true error.
+*
+*  BERR    (output) REAL array, dimension (NRHS)
+*          The componentwise relative backward error of each solution
+*          vector X(j) (i.e., the smallest relative change in
+*          any element of A or B that makes X(j) an exact solution).
+*
+*  WORK    (workspace) REAL array, dimension (3*N)
+*
+*  IWORK   (workspace) INTEGER array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Internal Parameters
+*  ===================
+*
+*  ITMAX is the maximum number of steps of iterative refinement.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            ITMAX
+      PARAMETER          ( ITMAX = 5 )
+      REAL               ZERO
+      PARAMETER          ( ZERO = 0.0E+0 )
+      REAL               ONE
+      PARAMETER          ( ONE = 1.0E+0 )
+      REAL               TWO
+      PARAMETER          ( TWO = 2.0E+0 )
+      REAL               THREE
+      PARAMETER          ( THREE = 3.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            COUNT, I, J, K, KASE, NZ
+      REAL               EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SAXPY, SCOPY, SLACON, SSYMV, SSYTRS, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               SLAMCH
+      EXTERNAL           LSAME, SLAMCH
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -10
+      ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+         INFO = -12
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SSYRFS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN
+         DO 10 J = 1, NRHS
+            FERR( J ) = ZERO
+            BERR( J ) = ZERO
+   10    CONTINUE
+         RETURN
+      END IF
+*
+*     NZ = maximum number of nonzero elements in each row of A, plus 1
+*
+      NZ = N + 1
+      EPS = SLAMCH( 'Epsilon' )
+      SAFMIN = SLAMCH( 'Safe minimum' )
+      SAFE1 = NZ*SAFMIN
+      SAFE2 = SAFE1 / EPS
+*
+*     Do for each right hand side
+*
+      DO 140 J = 1, NRHS
+*
+         COUNT = 1
+         LSTRES = THREE
+   20    CONTINUE
+*
+*        Loop until stopping criterion is satisfied.
+*
+*        Compute residual R = B - A * X
+*
+         CALL SCOPY( N, B( 1, J ), 1, WORK( N+1 ), 1 )
+         CALL SSYMV( UPLO, N, -ONE, A, LDA, X( 1, J ), 1, ONE,
+     $               WORK( N+1 ), 1 )
+*
+*        Compute componentwise relative backward error from formula
+*
+*        max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) )
+*
+*        where abs(Z) is the componentwise absolute value of the matrix
+*        or vector Z.  If the i-th component of the denominator is less
+*        than SAFE2, then SAFE1 is added to the i-th components of the
+*        numerator and denominator before dividing.
+*
+         DO 30 I = 1, N
+            WORK( I ) = ABS( B( I, J ) )
+   30    CONTINUE
+*
+*        Compute abs(A)*abs(X) + abs(B).
+*
+         IF( UPPER ) THEN
+            DO 50 K = 1, N
+               S = ZERO
+               XK = ABS( X( K, J ) )
+               DO 40 I = 1, K - 1
+                  WORK( I ) = WORK( I ) + ABS( A( I, K ) )*XK
+                  S = S + ABS( A( I, K ) )*ABS( X( I, J ) )
+   40          CONTINUE
+               WORK( K ) = WORK( K ) + ABS( A( K, K ) )*XK + S
+   50       CONTINUE
+         ELSE
+            DO 70 K = 1, N
+               S = ZERO
+               XK = ABS( X( K, J ) )
+               WORK( K ) = WORK( K ) + ABS( A( K, K ) )*XK
+               DO 60 I = K + 1, N
+                  WORK( I ) = WORK( I ) + ABS( A( I, K ) )*XK
+                  S = S + ABS( A( I, K ) )*ABS( X( I, J ) )
+   60          CONTINUE
+               WORK( K ) = WORK( K ) + S
+   70       CONTINUE
+         END IF
+         S = ZERO
+         DO 80 I = 1, N
+            IF( WORK( I ).GT.SAFE2 ) THEN
+               S = MAX( S, ABS( WORK( N+I ) ) / WORK( I ) )
+            ELSE
+               S = MAX( S, ( ABS( WORK( N+I ) )+SAFE1 ) /
+     $             ( WORK( I )+SAFE1 ) )
+            END IF
+   80    CONTINUE
+         BERR( J ) = S
+*
+*        Test stopping criterion. Continue iterating if
+*           1) The residual BERR(J) is larger than machine epsilon, and
+*           2) BERR(J) decreased by at least a factor of 2 during the
+*              last iteration, and
+*           3) At most ITMAX iterations tried.
+*
+         IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND.
+     $       COUNT.LE.ITMAX ) THEN
+*
+*           Update solution and try again.
+*
+            CALL SSYTRS( UPLO, N, 1, AF, LDAF, IPIV, WORK( N+1 ), N,
+     $                   INFO )
+            CALL SAXPY( N, ONE, WORK( N+1 ), 1, X( 1, J ), 1 )
+            LSTRES = BERR( J )
+            COUNT = COUNT + 1
+            GO TO 20
+         END IF
+*
+*        Bound error from formula
+*
+*        norm(X - XTRUE) / norm(X) .le. FERR =
+*        norm( abs(inv(A))*
+*           ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X)
+*
+*        where
+*          norm(Z) is the magnitude of the largest component of Z
+*          inv(A) is the inverse of A
+*          abs(Z) is the componentwise absolute value of the matrix or
+*             vector Z
+*          NZ is the maximum number of nonzeros in any row of A, plus 1
+*          EPS is machine epsilon
+*
+*        The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B))
+*        is incremented by SAFE1 if the i-th component of
+*        abs(A)*abs(X) + abs(B) is less than SAFE2.
+*
+*        Use SLACON to estimate the infinity-norm of the matrix
+*           inv(A) * diag(W),
+*        where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) )))
+*
+         DO 90 I = 1, N
+            IF( WORK( I ).GT.SAFE2 ) THEN
+               WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I )
+            ELSE
+               WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + SAFE1
+            END IF
+   90    CONTINUE
+*
+         KASE = 0
+  100    CONTINUE
+         CALL SLACON( N, WORK( 2*N+1 ), WORK( N+1 ), IWORK, FERR( J ),
+     $                KASE )
+         IF( KASE.NE.0 ) THEN
+            IF( KASE.EQ.1 ) THEN
+*
+*              Multiply by diag(W)*inv(A').
+*
+               CALL SSYTRS( UPLO, N, 1, AF, LDAF, IPIV, WORK( N+1 ), N,
+     $                      INFO )
+               DO 110 I = 1, N
+                  WORK( N+I ) = WORK( I )*WORK( N+I )
+  110          CONTINUE
+            ELSE IF( KASE.EQ.2 ) THEN
+*
+*              Multiply by inv(A)*diag(W).
+*
+               DO 120 I = 1, N
+                  WORK( N+I ) = WORK( I )*WORK( N+I )
+  120          CONTINUE
+               CALL SSYTRS( UPLO, N, 1, AF, LDAF, IPIV, WORK( N+1 ), N,
+     $                      INFO )
+            END IF
+            GO TO 100
+         END IF
+*
+*        Normalize error.
+*
+         LSTRES = ZERO
+         DO 130 I = 1, N
+            LSTRES = MAX( LSTRES, ABS( X( I, J ) ) )
+  130    CONTINUE
+         IF( LSTRES.NE.ZERO )
+     $      FERR( J ) = FERR( J ) / LSTRES
+*
+  140 CONTINUE
+*
+      RETURN
+*
+*     End of SSYRFS
+*
+      END
diff --git a/costa/native/external/lapack/ssysv.f b/costa/native/external/lapack/ssysv.f
new file mode 100644
index 000000000..5da1fa0f0
--- /dev/null
+++ b/costa/native/external/lapack/ssysv.f
@@ -0,0 +1,171 @@
+      SUBROUTINE SSYSV( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK,
+     $                  LWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, LDB, LWORK, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      REAL               A( LDA, * ), B( LDB, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SSYSV computes the solution to a real system of linear equations
+*     A * X = B,
+*  where A is an N-by-N symmetric matrix and X and B are N-by-NRHS
+*  matrices.
+*
+*  The diagonal pivoting method is used to factor A as
+*     A = U * D * U**T,  if UPLO = 'U', or
+*     A = L * D * L**T,  if UPLO = 'L',
+*  where U (or L) is a product of permutation and unit upper (lower)
+*  triangular matrices, and D is symmetric and block diagonal with 
+*  1-by-1 and 2-by-2 diagonal blocks.  The factored form of A is then
+*  used to solve the system of equations A * X = B.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The number of linear equations, i.e., the order of the
+*          matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  A       (input/output) REAL array, dimension (LDA,N)
+*          On entry, the symmetric matrix A.  If UPLO = 'U', the leading
+*          N-by-N upper triangular part of A contains the upper
+*          triangular part of the matrix A, and the strictly lower
+*          triangular part of A is not referenced.  If UPLO = 'L', the
+*          leading N-by-N lower triangular part of A contains the lower
+*          triangular part of the matrix A, and the strictly upper
+*          triangular part of A is not referenced.
+*
+*          On exit, if INFO = 0, the block diagonal matrix D and the
+*          multipliers used to obtain the factor U or L from the
+*          factorization A = U*D*U**T or A = L*D*L**T as computed by
+*          SSYTRF.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  IPIV    (output) INTEGER array, dimension (N)
+*          Details of the interchanges and the block structure of D, as
+*          determined by SSYTRF.  If IPIV(k) > 0, then rows and columns
+*          k and IPIV(k) were interchanged, and D(k,k) is a 1-by-1
+*          diagonal block.  If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0,
+*          then rows and columns k-1 and -IPIV(k) were interchanged and
+*          D(k-1:k,k-1:k) is a 2-by-2 diagonal block.  If UPLO = 'L' and
+*          IPIV(k) = IPIV(k+1) < 0, then rows and columns k+1 and
+*          -IPIV(k) were interchanged and D(k:k+1,k:k+1) is a 2-by-2
+*          diagonal block.
+*
+*  B       (input/output) REAL array, dimension (LDB,NRHS)
+*          On entry, the N-by-NRHS right hand side matrix B.
+*          On exit, if INFO = 0, the N-by-NRHS solution matrix X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  WORK    (workspace/output) REAL array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The length of WORK.  LWORK >= 1, and for best performance
+*          LWORK >= N*NB, where NB is the optimal blocksize for
+*          SSYTRF.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*          > 0: if INFO = i, D(i,i) is exactly zero.  The factorization
+*               has been completed, but the block diagonal matrix D is
+*               exactly singular, so the solution could not be computed.
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            LWKOPT, NB
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV, LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SSYTRF, SSYTRS, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -8
+      ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN
+         INFO = -10
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         NB = ILAENV( 1, 'SSYTRF', UPLO, N, -1, -1, -1 )
+         LWKOPT = N*NB
+         WORK( 1 ) = LWKOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SSYSV ', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Compute the factorization A = U*D*U' or A = L*D*L'.
+*
+      CALL SSYTRF( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO )
+      IF( INFO.EQ.0 ) THEN
+*
+*        Solve the system A*X = B, overwriting B with X.
+*
+         CALL SSYTRS( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, INFO )
+*
+      END IF
+*
+      WORK( 1 ) = LWKOPT
+*
+      RETURN
+*
+*     End of SSYSV
+*
+      END
diff --git a/costa/native/external/lapack/ssysvx.f b/costa/native/external/lapack/ssysvx.f
new file mode 100644
index 000000000..eaab12ed2
--- /dev/null
+++ b/costa/native/external/lapack/ssysvx.f
@@ -0,0 +1,297 @@
+      SUBROUTINE SSYSVX( FACT, UPLO, N, NRHS, A, LDA, AF, LDAF, IPIV, B,
+     $                   LDB, X, LDX, RCOND, FERR, BERR, WORK, LWORK,
+     $                   IWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          FACT, UPLO
+      INTEGER            INFO, LDA, LDAF, LDB, LDX, LWORK, N, NRHS
+      REAL               RCOND
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * ), IWORK( * )
+      REAL               A( LDA, * ), AF( LDAF, * ), B( LDB, * ),
+     $                   BERR( * ), FERR( * ), WORK( * ), X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SSYSVX uses the diagonal pivoting factorization to compute the
+*  solution to a real system of linear equations A * X = B,
+*  where A is an N-by-N symmetric matrix and X and B are N-by-NRHS
+*  matrices.
+*
+*  Error bounds on the solution and a condition estimate are also
+*  provided.
+*
+*  Description
+*  ===========
+*
+*  The following steps are performed:
+*
+*  1. If FACT = 'N', the diagonal pivoting method is used to factor A.
+*     The form of the factorization is
+*        A = U * D * U**T,  if UPLO = 'U', or
+*        A = L * D * L**T,  if UPLO = 'L',
+*     where U (or L) is a product of permutation and unit upper (lower)
+*     triangular matrices, and D is symmetric and block diagonal with
+*     1-by-1 and 2-by-2 diagonal blocks.
+*
+*  2. If some D(i,i)=0, so that D is exactly singular, then the routine
+*     returns with INFO = i. Otherwise, the factored form of A is used
+*     to estimate the condition number of the matrix A.  If the
+*     reciprocal of the condition number is less than machine precision,
+*     INFO = N+1 is returned as a warning, but the routine still goes on
+*     to solve for X and compute error bounds as described below.
+*
+*  3. The system of equations is solved for X using the factored form
+*     of A.
+*
+*  4. Iterative refinement is applied to improve the computed solution
+*     matrix and calculate error bounds and backward error estimates
+*     for it.
+*
+*  Arguments
+*  =========
+*
+*  FACT    (input) CHARACTER*1
+*          Specifies whether or not the factored form of A has been
+*          supplied on entry.
+*          = 'F':  On entry, AF and IPIV contain the factored form of
+*                  A.  AF and IPIV will not be modified.
+*          = 'N':  The matrix A will be copied to AF and factored.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The number of linear equations, i.e., the order of the
+*          matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrices B and X.  NRHS >= 0.
+*
+*  A       (input) REAL array, dimension (LDA,N)
+*          The symmetric matrix A.  If UPLO = 'U', the leading N-by-N
+*          upper triangular part of A contains the upper triangular part
+*          of the matrix A, and the strictly lower triangular part of A
+*          is not referenced.  If UPLO = 'L', the leading N-by-N lower
+*          triangular part of A contains the lower triangular part of
+*          the matrix A, and the strictly upper triangular part of A is
+*          not referenced.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  AF      (input or output) REAL array, dimension (LDAF,N)
+*          If FACT = 'F', then AF is an input argument and on entry
+*          contains the block diagonal matrix D and the multipliers used
+*          to obtain the factor U or L from the factorization
+*          A = U*D*U**T or A = L*D*L**T as computed by SSYTRF.
+*
+*          If FACT = 'N', then AF is an output argument and on exit
+*          returns the block diagonal matrix D and the multipliers used
+*          to obtain the factor U or L from the factorization
+*          A = U*D*U**T or A = L*D*L**T.
+*
+*  LDAF    (input) INTEGER
+*          The leading dimension of the array AF.  LDAF >= max(1,N).
+*
+*  IPIV    (input or output) INTEGER array, dimension (N)
+*          If FACT = 'F', then IPIV is an input argument and on entry
+*          contains details of the interchanges and the block structure
+*          of D, as determined by SSYTRF.
+*          If IPIV(k) > 0, then rows and columns k and IPIV(k) were
+*          interchanged and D(k,k) is a 1-by-1 diagonal block.
+*          If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and
+*          columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k)
+*          is a 2-by-2 diagonal block.  If UPLO = 'L' and IPIV(k) =
+*          IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were
+*          interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block.
+*
+*          If FACT = 'N', then IPIV is an output argument and on exit
+*          contains details of the interchanges and the block structure
+*          of D, as determined by SSYTRF.
+*
+*  B       (input) REAL array, dimension (LDB,NRHS)
+*          The N-by-NRHS right hand side matrix B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  X       (output) REAL array, dimension (LDX,NRHS)
+*          If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(1,N).
+*
+*  RCOND   (output) REAL
+*          The estimate of the reciprocal condition number of the matrix
+*          A.  If RCOND is less than the machine precision (in
+*          particular, if RCOND = 0), the matrix is singular to working
+*          precision.  This condition is indicated by a return code of
+*          INFO > 0.
+*
+*  FERR    (output) REAL array, dimension (NRHS)
+*          The estimated forward error bound for each solution vector
+*          X(j) (the j-th column of the solution matrix X).
+*          If XTRUE is the true solution corresponding to X(j), FERR(j)
+*          is an estimated upper bound for the magnitude of the largest
+*          element in (X(j) - XTRUE) divided by the magnitude of the
+*          largest element in X(j).  The estimate is as reliable as
+*          the estimate for RCOND, and is almost always a slight
+*          overestimate of the true error.
+*
+*  BERR    (output) REAL array, dimension (NRHS)
+*          The componentwise relative backward error of each solution
+*          vector X(j) (i.e., the smallest relative change in
+*          any element of A or B that makes X(j) an exact solution).
+*
+*  WORK    (workspace/output) REAL array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The length of WORK.  LWORK >= 3*N, and for best performance
+*          LWORK >= N*NB, where NB is the optimal blocksize for
+*          SSYTRF.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  IWORK   (workspace) INTEGER array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*          > 0: if INFO = i, and i is
+*                <= N:  D(i,i) is exactly zero.  The factorization
+*                       has been completed but the factor D is exactly
+*                       singular, so the solution and error bounds could
+*                       not be computed. RCOND = 0 is returned.
+*                = N+1: D is nonsingular, but RCOND is less than machine
+*                       precision, meaning that the matrix is singular
+*                       to working precision.  Nevertheless, the
+*                       solution and error bounds are computed because
+*                       there are a number of situations where the
+*                       computed solution can be more accurate than the
+*                       value of RCOND would suggest.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO
+      PARAMETER          ( ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY, NOFACT
+      INTEGER            LWKOPT, NB
+      REAL               ANORM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      REAL               SLAMCH, SLANSY
+      EXTERNAL           ILAENV, LSAME, SLAMCH, SLANSY
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLACPY, SSYCON, SSYRFS, SSYTRF, SSYTRS, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      NOFACT = LSAME( FACT, 'N' )
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( .NOT.NOFACT .AND. .NOT.LSAME( FACT, 'F' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) )
+     $          THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -6
+      ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN
+         INFO = -8
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -11
+      ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+         INFO = -13
+      ELSE IF( LWORK.LT.MAX( 1, 3*N ) .AND. .NOT.LQUERY ) THEN
+         INFO = -18
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         NB = ILAENV( 1, 'SSYTRF', UPLO, N, -1, -1, -1 )
+         LWKOPT = N*NB
+         WORK( 1 ) = LWKOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SSYSVX', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+      IF( NOFACT ) THEN
+*
+*        Compute the factorization A = U*D*U' or A = L*D*L'.
+*
+         CALL SLACPY( UPLO, N, N, A, LDA, AF, LDAF )
+         CALL SSYTRF( UPLO, N, AF, LDAF, IPIV, WORK, LWORK, INFO )
+*
+*        Return if INFO is non-zero.
+*
+         IF( INFO.NE.0 ) THEN
+            IF( INFO.GT.0 )
+     $         RCOND = ZERO
+            RETURN
+         END IF
+      END IF
+*
+*     Compute the norm of the matrix A.
+*
+      ANORM = SLANSY( 'I', UPLO, N, A, LDA, WORK )
+*
+*     Compute the reciprocal of the condition number of A.
+*
+      CALL SSYCON( UPLO, N, AF, LDAF, IPIV, ANORM, RCOND, WORK, IWORK,
+     $             INFO )
+*
+*     Set INFO = N+1 if the matrix is singular to working precision.
+*
+      IF( RCOND.LT.SLAMCH( 'Epsilon' ) )
+     $   INFO = N + 1
+*
+*     Compute the solution vectors X.
+*
+      CALL SLACPY( 'Full', N, NRHS, B, LDB, X, LDX )
+      CALL SSYTRS( UPLO, N, NRHS, AF, LDAF, IPIV, X, LDX, INFO )
+*
+*     Use iterative refinement to improve the computed solutions and
+*     compute error bounds and backward error estimates for them.
+*
+      CALL SSYRFS( UPLO, N, NRHS, A, LDA, AF, LDAF, IPIV, B, LDB, X,
+     $             LDX, FERR, BERR, WORK, IWORK, INFO )
+*
+      RETURN
+*
+*     End of SSYSVX
+*
+      END
diff --git a/costa/native/external/lapack/ssytd2.f b/costa/native/external/lapack/ssytd2.f
new file mode 100644
index 000000000..4e2dc293f
--- /dev/null
+++ b/costa/native/external/lapack/ssytd2.f
@@ -0,0 +1,248 @@
+      SUBROUTINE SSYTD2( UPLO, N, A, LDA, D, E, TAU, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, N
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), D( * ), E( * ), TAU( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SSYTD2 reduces a real symmetric matrix A to symmetric tridiagonal
+*  form T by an orthogonal similarity transformation: Q' * A * Q = T.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the upper or lower triangular part of the
+*          symmetric matrix A is stored:
+*          = 'U':  Upper triangular
+*          = 'L':  Lower triangular
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input/output) REAL array, dimension (LDA,N)
+*          On entry, the symmetric matrix A.  If UPLO = 'U', the leading
+*          n-by-n upper triangular part of A contains the upper
+*          triangular part of the matrix A, and the strictly lower
+*          triangular part of A is not referenced.  If UPLO = 'L', the
+*          leading n-by-n lower triangular part of A contains the lower
+*          triangular part of the matrix A, and the strictly upper
+*          triangular part of A is not referenced.
+*          On exit, if UPLO = 'U', the diagonal and first superdiagonal
+*          of A are overwritten by the corresponding elements of the
+*          tridiagonal matrix T, and the elements above the first
+*          superdiagonal, with the array TAU, represent the orthogonal
+*          matrix Q as a product of elementary reflectors; if UPLO
+*          = 'L', the diagonal and first subdiagonal of A are over-
+*          written by the corresponding elements of the tridiagonal
+*          matrix T, and the elements below the first subdiagonal, with
+*          the array TAU, represent the orthogonal matrix Q as a product
+*          of elementary reflectors. See Further Details.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  D       (output) REAL array, dimension (N)
+*          The diagonal elements of the tridiagonal matrix T:
+*          D(i) = A(i,i).
+*
+*  E       (output) REAL array, dimension (N-1)
+*          The off-diagonal elements of the tridiagonal matrix T:
+*          E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'.
+*
+*  TAU     (output) REAL array, dimension (N-1)
+*          The scalar factors of the elementary reflectors (see Further
+*          Details).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*
+*  Further Details
+*  ===============
+*
+*  If UPLO = 'U', the matrix Q is represented as a product of elementary
+*  reflectors
+*
+*     Q = H(n-1) . . . H(2) H(1).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - tau * v * v'
+*
+*  where tau is a real scalar, and v is a real vector with
+*  v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in
+*  A(1:i-1,i+1), and tau in TAU(i).
+*
+*  If UPLO = 'L', the matrix Q is represented as a product of elementary
+*  reflectors
+*
+*     Q = H(1) H(2) . . . H(n-1).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - tau * v * v'
+*
+*  where tau is a real scalar, and v is a real vector with
+*  v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i),
+*  and tau in TAU(i).
+*
+*  The contents of A on exit are illustrated by the following examples
+*  with n = 5:
+*
+*  if UPLO = 'U':                       if UPLO = 'L':
+*
+*    (  d   e   v2  v3  v4 )              (  d                  )
+*    (      d   e   v3  v4 )              (  e   d              )
+*    (          d   e   v4 )              (  v1  e   d          )
+*    (              d   e  )              (  v1  v2  e   d      )
+*    (                  d  )              (  v1  v2  v3  e   d  )
+*
+*  where d and e denote diagonal and off-diagonal elements of T, and vi
+*  denotes an element of the vector defining H(i).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO, HALF
+      PARAMETER          ( ONE = 1.0, ZERO = 0.0, HALF = 1.0 / 2.0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            I
+      REAL               ALPHA, TAUI
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SAXPY, SLARFG, SSYMV, SSYR2, XERBLA
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               SDOT
+      EXTERNAL           LSAME, SDOT
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SSYTD2', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.LE.0 )
+     $   RETURN
+*
+      IF( UPPER ) THEN
+*
+*        Reduce the upper triangle of A
+*
+         DO 10 I = N - 1, 1, -1
+*
+*           Generate elementary reflector H(i) = I - tau * v * v'
+*           to annihilate A(1:i-1,i+1)
+*
+            CALL SLARFG( I, A( I, I+1 ), A( 1, I+1 ), 1, TAUI )
+            E( I ) = A( I, I+1 )
+*
+            IF( TAUI.NE.ZERO ) THEN
+*
+*              Apply H(i) from both sides to A(1:i,1:i)
+*
+               A( I, I+1 ) = ONE
+*
+*              Compute  x := tau * A * v  storing x in TAU(1:i)
+*
+               CALL SSYMV( UPLO, I, TAUI, A, LDA, A( 1, I+1 ), 1, ZERO,
+     $                     TAU, 1 )
+*
+*              Compute  w := x - 1/2 * tau * (x'*v) * v
+*
+               ALPHA = -HALF*TAUI*SDOT( I, TAU, 1, A( 1, I+1 ), 1 )
+               CALL SAXPY( I, ALPHA, A( 1, I+1 ), 1, TAU, 1 )
+*
+*              Apply the transformation as a rank-2 update:
+*                 A := A - v * w' - w * v'
+*
+               CALL SSYR2( UPLO, I, -ONE, A( 1, I+1 ), 1, TAU, 1, A,
+     $                     LDA )
+*
+               A( I, I+1 ) = E( I )
+            END IF
+            D( I+1 ) = A( I+1, I+1 )
+            TAU( I ) = TAUI
+   10    CONTINUE
+         D( 1 ) = A( 1, 1 )
+      ELSE
+*
+*        Reduce the lower triangle of A
+*
+         DO 20 I = 1, N - 1
+*
+*           Generate elementary reflector H(i) = I - tau * v * v'
+*           to annihilate A(i+2:n,i)
+*
+            CALL SLARFG( N-I, A( I+1, I ), A( MIN( I+2, N ), I ), 1,
+     $                   TAUI )
+            E( I ) = A( I+1, I )
+*
+            IF( TAUI.NE.ZERO ) THEN
+*
+*              Apply H(i) from both sides to A(i+1:n,i+1:n)
+*
+               A( I+1, I ) = ONE
+*
+*              Compute  x := tau * A * v  storing y in TAU(i:n-1)
+*
+               CALL SSYMV( UPLO, N-I, TAUI, A( I+1, I+1 ), LDA,
+     $                     A( I+1, I ), 1, ZERO, TAU( I ), 1 )
+*
+*              Compute  w := x - 1/2 * tau * (x'*v) * v
+*
+               ALPHA = -HALF*TAUI*SDOT( N-I, TAU( I ), 1, A( I+1, I ),
+     $                 1 )
+               CALL SAXPY( N-I, ALPHA, A( I+1, I ), 1, TAU( I ), 1 )
+*
+*              Apply the transformation as a rank-2 update:
+*                 A := A - v * w' - w * v'
+*
+               CALL SSYR2( UPLO, N-I, -ONE, A( I+1, I ), 1, TAU( I ), 1,
+     $                     A( I+1, I+1 ), LDA )
+*
+               A( I+1, I ) = E( I )
+            END IF
+            D( I ) = A( I, I )
+            TAU( I ) = TAUI
+   20    CONTINUE
+         D( N ) = A( N, N )
+      END IF
+*
+      RETURN
+*
+*     End of SSYTD2
+*
+      END
diff --git a/costa/native/external/lapack/ssytf2.f b/costa/native/external/lapack/ssytf2.f
new file mode 100644
index 000000000..c5cdaf82c
--- /dev/null
+++ b/costa/native/external/lapack/ssytf2.f
@@ -0,0 +1,511 @@
+      SUBROUTINE SSYTF2( UPLO, N, A, LDA, IPIV, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      REAL               A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SSYTF2 computes the factorization of a real symmetric matrix A using
+*  the Bunch-Kaufman diagonal pivoting method:
+*
+*     A = U*D*U'  or  A = L*D*L'
+*
+*  where U (or L) is a product of permutation and unit upper (lower)
+*  triangular matrices, U' is the transpose of U, and D is symmetric and
+*  block diagonal with 1-by-1 and 2-by-2 diagonal blocks.
+*
+*  This is the unblocked version of the algorithm, calling Level 2 BLAS.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the upper or lower triangular part of the
+*          symmetric matrix A is stored:
+*          = 'U':  Upper triangular
+*          = 'L':  Lower triangular
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input/output) REAL array, dimension (LDA,N)
+*          On entry, the symmetric matrix A.  If UPLO = 'U', the leading
+*          n-by-n upper triangular part of A contains the upper
+*          triangular part of the matrix A, and the strictly lower
+*          triangular part of A is not referenced.  If UPLO = 'L', the
+*          leading n-by-n lower triangular part of A contains the lower
+*          triangular part of the matrix A, and the strictly upper
+*          triangular part of A is not referenced.
+*
+*          On exit, the block diagonal matrix D and the multipliers used
+*          to obtain the factor U or L (see below for further details).
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  IPIV    (output) INTEGER array, dimension (N)
+*          Details of the interchanges and the block structure of D.
+*          If IPIV(k) > 0, then rows and columns k and IPIV(k) were
+*          interchanged and D(k,k) is a 1-by-1 diagonal block.
+*          If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and
+*          columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k)
+*          is a 2-by-2 diagonal block.  If UPLO = 'L' and IPIV(k) =
+*          IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were
+*          interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block.
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -k, the k-th argument had an illegal value
+*          > 0: if INFO = k, D(k,k) is exactly zero.  The factorization
+*               has been completed, but the block diagonal matrix D is
+*               exactly singular, and division by zero will occur if it
+*               is used to solve a system of equations.
+*
+*  Further Details
+*  ===============
+*
+*  1-96 - Based on modifications by J. Lewis, Boeing Computer Services
+*         Company
+*
+*  If UPLO = 'U', then A = U*D*U', where
+*     U = P(n)*U(n)* ... *P(k)U(k)* ...,
+*  i.e., U is a product of terms P(k)*U(k), where k decreases from n to
+*  1 in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1
+*  and 2-by-2 diagonal blocks D(k).  P(k) is a permutation matrix as
+*  defined by IPIV(k), and U(k) is a unit upper triangular matrix, such
+*  that if the diagonal block D(k) is of order s (s = 1 or 2), then
+*
+*             (   I    v    0   )   k-s
+*     U(k) =  (   0    I    0   )   s
+*             (   0    0    I   )   n-k
+*                k-s   s   n-k
+*
+*  If s = 1, D(k) overwrites A(k,k), and v overwrites A(1:k-1,k).
+*  If s = 2, the upper triangle of D(k) overwrites A(k-1,k-1), A(k-1,k),
+*  and A(k,k), and v overwrites A(1:k-2,k-1:k).
+*
+*  If UPLO = 'L', then A = L*D*L', where
+*     L = P(1)*L(1)* ... *P(k)*L(k)* ...,
+*  i.e., L is a product of terms P(k)*L(k), where k increases from 1 to
+*  n in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1
+*  and 2-by-2 diagonal blocks D(k).  P(k) is a permutation matrix as
+*  defined by IPIV(k), and L(k) is a unit lower triangular matrix, such
+*  that if the diagonal block D(k) is of order s (s = 1 or 2), then
+*
+*             (   I    0     0   )  k-1
+*     L(k) =  (   0    I     0   )  s
+*             (   0    v     I   )  n-k-s+1
+*                k-1   s  n-k-s+1
+*
+*  If s = 1, D(k) overwrites A(k,k), and v overwrites A(k+1:n,k).
+*  If s = 2, the lower triangle of D(k) overwrites A(k,k), A(k+1,k),
+*  and A(k+1,k+1), and v overwrites A(k+2:n,k:k+1).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+      REAL               EIGHT, SEVTEN
+      PARAMETER          ( EIGHT = 8.0E+0, SEVTEN = 17.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            I, IMAX, J, JMAX, K, KK, KP, KSTEP
+      REAL               ABSAKK, ALPHA, COLMAX, D11, D12, D21, D22, R1,
+     $                   ROWMAX, T, WK, WKM1, WKP1
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ISAMAX
+      EXTERNAL           LSAME, ISAMAX
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SSCAL, SSWAP, SSYR, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SSYTF2', -INFO )
+         RETURN
+      END IF
+*
+*     Initialize ALPHA for use in choosing pivot block size.
+*
+      ALPHA = ( ONE+SQRT( SEVTEN ) ) / EIGHT
+*
+      IF( UPPER ) THEN
+*
+*        Factorize A as U*D*U' using the upper triangle of A
+*
+*        K is the main loop index, decreasing from N to 1 in steps of
+*        1 or 2
+*
+         K = N
+   10    CONTINUE
+*
+*        If K < 1, exit from loop
+*
+         IF( K.LT.1 )
+     $      GO TO 70
+         KSTEP = 1
+*
+*        Determine rows and columns to be interchanged and whether
+*        a 1-by-1 or 2-by-2 pivot block will be used
+*
+         ABSAKK = ABS( A( K, K ) )
+*
+*        IMAX is the row-index of the largest off-diagonal element in
+*        column K, and COLMAX is its absolute value
+*
+         IF( K.GT.1 ) THEN
+            IMAX = ISAMAX( K-1, A( 1, K ), 1 )
+            COLMAX = ABS( A( IMAX, K ) )
+         ELSE
+            COLMAX = ZERO
+         END IF
+*
+         IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN
+*
+*           Column K is zero: set INFO and continue
+*
+            IF( INFO.EQ.0 )
+     $         INFO = K
+            KP = K
+         ELSE
+            IF( ABSAKK.GE.ALPHA*COLMAX ) THEN
+*
+*              no interchange, use 1-by-1 pivot block
+*
+               KP = K
+            ELSE
+*
+*              JMAX is the column-index of the largest off-diagonal
+*              element in row IMAX, and ROWMAX is its absolute value
+*
+               JMAX = IMAX + ISAMAX( K-IMAX, A( IMAX, IMAX+1 ), LDA )
+               ROWMAX = ABS( A( IMAX, JMAX ) )
+               IF( IMAX.GT.1 ) THEN
+                  JMAX = ISAMAX( IMAX-1, A( 1, IMAX ), 1 )
+                  ROWMAX = MAX( ROWMAX, ABS( A( JMAX, IMAX ) ) )
+               END IF
+*
+               IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN
+*
+*                 no interchange, use 1-by-1 pivot block
+*
+                  KP = K
+               ELSE IF( ABS( A( IMAX, IMAX ) ).GE.ALPHA*ROWMAX ) THEN
+*
+*                 interchange rows and columns K and IMAX, use 1-by-1
+*                 pivot block
+*
+                  KP = IMAX
+               ELSE
+*
+*                 interchange rows and columns K-1 and IMAX, use 2-by-2
+*                 pivot block
+*
+                  KP = IMAX
+                  KSTEP = 2
+               END IF
+            END IF
+*
+            KK = K - KSTEP + 1
+            IF( KP.NE.KK ) THEN
+*
+*              Interchange rows and columns KK and KP in the leading
+*              submatrix A(1:k,1:k)
+*
+               CALL SSWAP( KP-1, A( 1, KK ), 1, A( 1, KP ), 1 )
+               CALL SSWAP( KK-KP-1, A( KP+1, KK ), 1, A( KP, KP+1 ),
+     $                     LDA )
+               T = A( KK, KK )
+               A( KK, KK ) = A( KP, KP )
+               A( KP, KP ) = T
+               IF( KSTEP.EQ.2 ) THEN
+                  T = A( K-1, K )
+                  A( K-1, K ) = A( KP, K )
+                  A( KP, K ) = T
+               END IF
+            END IF
+*
+*           Update the leading submatrix
+*
+            IF( KSTEP.EQ.1 ) THEN
+*
+*              1-by-1 pivot block D(k): column k now holds
+*
+*              W(k) = U(k)*D(k)
+*
+*              where U(k) is the k-th column of U
+*
+*              Perform a rank-1 update of A(1:k-1,1:k-1) as
+*
+*              A := A - U(k)*D(k)*U(k)' = A - W(k)*1/D(k)*W(k)'
+*
+               R1 = ONE / A( K, K )
+               CALL SSYR( UPLO, K-1, -R1, A( 1, K ), 1, A, LDA )
+*
+*              Store U(k) in column k
+*
+               CALL SSCAL( K-1, R1, A( 1, K ), 1 )
+            ELSE
+*
+*              2-by-2 pivot block D(k): columns k and k-1 now hold
+*
+*              ( W(k-1) W(k) ) = ( U(k-1) U(k) )*D(k)
+*
+*              where U(k) and U(k-1) are the k-th and (k-1)-th columns
+*              of U
+*
+*              Perform a rank-2 update of A(1:k-2,1:k-2) as
+*
+*              A := A - ( U(k-1) U(k) )*D(k)*( U(k-1) U(k) )'
+*                 = A - ( W(k-1) W(k) )*inv(D(k))*( W(k-1) W(k) )'
+*
+               IF( K.GT.2 ) THEN
+*
+                  D12 = A( K-1, K )
+                  D22 = A( K-1, K-1 ) / D12
+                  D11 = A( K, K ) / D12
+                  T = ONE / ( D11*D22-ONE )
+                  D12 = T / D12
+*
+                  DO 30 J = K - 2, 1, -1
+                     WKM1 = D12*( D11*A( J, K-1 )-A( J, K ) )
+                     WK = D12*( D22*A( J, K )-A( J, K-1 ) )
+                     DO 20 I = J, 1, -1
+                        A( I, J ) = A( I, J ) - A( I, K )*WK -
+     $                              A( I, K-1 )*WKM1
+   20                CONTINUE
+                     A( J, K ) = WK
+                     A( J, K-1 ) = WKM1
+   30             CONTINUE
+*
+               END IF
+*
+            END IF
+         END IF
+*
+*        Store details of the interchanges in IPIV
+*
+         IF( KSTEP.EQ.1 ) THEN
+            IPIV( K ) = KP
+         ELSE
+            IPIV( K ) = -KP
+            IPIV( K-1 ) = -KP
+         END IF
+*
+*        Decrease K and return to the start of the main loop
+*
+         K = K - KSTEP
+         GO TO 10
+*
+      ELSE
+*
+*        Factorize A as L*D*L' using the lower triangle of A
+*
+*        K is the main loop index, increasing from 1 to N in steps of
+*        1 or 2
+*
+         K = 1
+   40    CONTINUE
+*
+*        If K > N, exit from loop
+*
+         IF( K.GT.N )
+     $      GO TO 70
+         KSTEP = 1
+*
+*        Determine rows and columns to be interchanged and whether
+*        a 1-by-1 or 2-by-2 pivot block will be used
+*
+         ABSAKK = ABS( A( K, K ) )
+*
+*        IMAX is the row-index of the largest off-diagonal element in
+*        column K, and COLMAX is its absolute value
+*
+         IF( K.LT.N ) THEN
+            IMAX = K + ISAMAX( N-K, A( K+1, K ), 1 )
+            COLMAX = ABS( A( IMAX, K ) )
+         ELSE
+            COLMAX = ZERO
+         END IF
+*
+         IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN
+*
+*           Column K is zero: set INFO and continue
+*
+            IF( INFO.EQ.0 )
+     $         INFO = K
+            KP = K
+         ELSE
+            IF( ABSAKK.GE.ALPHA*COLMAX ) THEN
+*
+*              no interchange, use 1-by-1 pivot block
+*
+               KP = K
+            ELSE
+*
+*              JMAX is the column-index of the largest off-diagonal
+*              element in row IMAX, and ROWMAX is its absolute value
+*
+               JMAX = K - 1 + ISAMAX( IMAX-K, A( IMAX, K ), LDA )
+               ROWMAX = ABS( A( IMAX, JMAX ) )
+               IF( IMAX.LT.N ) THEN
+                  JMAX = IMAX + ISAMAX( N-IMAX, A( IMAX+1, IMAX ), 1 )
+                  ROWMAX = MAX( ROWMAX, ABS( A( JMAX, IMAX ) ) )
+               END IF
+*
+               IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN
+*
+*                 no interchange, use 1-by-1 pivot block
+*
+                  KP = K
+               ELSE IF( ABS( A( IMAX, IMAX ) ).GE.ALPHA*ROWMAX ) THEN
+*
+*                 interchange rows and columns K and IMAX, use 1-by-1
+*                 pivot block
+*
+                  KP = IMAX
+               ELSE
+*
+*                 interchange rows and columns K+1 and IMAX, use 2-by-2
+*                 pivot block
+*
+                  KP = IMAX
+                  KSTEP = 2
+               END IF
+            END IF
+*
+            KK = K + KSTEP - 1
+            IF( KP.NE.KK ) THEN
+*
+*              Interchange rows and columns KK and KP in the trailing
+*              submatrix A(k:n,k:n)
+*
+               IF( KP.LT.N )
+     $            CALL SSWAP( N-KP, A( KP+1, KK ), 1, A( KP+1, KP ), 1 )
+               CALL SSWAP( KP-KK-1, A( KK+1, KK ), 1, A( KP, KK+1 ),
+     $                     LDA )
+               T = A( KK, KK )
+               A( KK, KK ) = A( KP, KP )
+               A( KP, KP ) = T
+               IF( KSTEP.EQ.2 ) THEN
+                  T = A( K+1, K )
+                  A( K+1, K ) = A( KP, K )
+                  A( KP, K ) = T
+               END IF
+            END IF
+*
+*           Update the trailing submatrix
+*
+            IF( KSTEP.EQ.1 ) THEN
+*
+*              1-by-1 pivot block D(k): column k now holds
+*
+*              W(k) = L(k)*D(k)
+*
+*              where L(k) is the k-th column of L
+*
+               IF( K.LT.N ) THEN
+*
+*                 Perform a rank-1 update of A(k+1:n,k+1:n) as
+*
+*                 A := A - L(k)*D(k)*L(k)' = A - W(k)*(1/D(k))*W(k)'
+*
+                  D11 = ONE / A( K, K )
+                  CALL SSYR( UPLO, N-K, -D11, A( K+1, K ), 1,
+     $                       A( K+1, K+1 ), LDA )
+*
+*                 Store L(k) in column K
+*
+                  CALL SSCAL( N-K, D11, A( K+1, K ), 1 )
+               END IF
+            ELSE
+*
+*              2-by-2 pivot block D(k)
+*
+               IF( K.LT.N-1 ) THEN
+*
+*                 Perform a rank-2 update of A(k+2:n,k+2:n) as
+*
+*                 A := A - ( (A(k) A(k+1))*D(k)**(-1) ) * (A(k) A(k+1))'
+*
+*                 where L(k) and L(k+1) are the k-th and (k+1)-th
+*                 columns of L
+*
+                  D21 = A( K+1, K )
+                  D11 = A( K+1, K+1 ) / D21
+                  D22 = A( K, K ) / D21
+                  T = ONE / ( D11*D22-ONE )
+                  D21 = T / D21
+*
+                  DO 60 J = K + 2, N
+*
+                     WK = D21*( D11*A( J, K )-A( J, K+1 ) )
+                     WKP1 = D21*( D22*A( J, K+1 )-A( J, K ) )
+*
+                     DO 50 I = J, N
+                        A( I, J ) = A( I, J ) - A( I, K )*WK -
+     $                              A( I, K+1 )*WKP1
+   50                CONTINUE
+*
+                     A( J, K ) = WK
+                     A( J, K+1 ) = WKP1
+*
+   60             CONTINUE
+               END IF
+            END IF
+         END IF
+*
+*        Store details of the interchanges in IPIV
+*
+         IF( KSTEP.EQ.1 ) THEN
+            IPIV( K ) = KP
+         ELSE
+            IPIV( K ) = -KP
+            IPIV( K+1 ) = -KP
+         END IF
+*
+*        Increase K and return to the start of the main loop
+*
+         K = K + KSTEP
+         GO TO 40
+*
+      END IF
+*
+   70 CONTINUE
+*
+      RETURN
+*
+*     End of SSYTF2
+*
+      END
diff --git a/costa/native/external/lapack/ssytrd.f b/costa/native/external/lapack/ssytrd.f
new file mode 100644
index 000000000..fab61591a
--- /dev/null
+++ b/costa/native/external/lapack/ssytrd.f
@@ -0,0 +1,295 @@
+      SUBROUTINE SSYTRD( UPLO, N, A, LDA, D, E, TAU, WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, LWORK, N
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), D( * ), E( * ), TAU( * ),
+     $                   WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SSYTRD reduces a real symmetric matrix A to real symmetric
+*  tridiagonal form T by an orthogonal similarity transformation:
+*  Q**T * A * Q = T.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input/output) REAL array, dimension (LDA,N)
+*          On entry, the symmetric matrix A.  If UPLO = 'U', the leading
+*          N-by-N upper triangular part of A contains the upper
+*          triangular part of the matrix A, and the strictly lower
+*          triangular part of A is not referenced.  If UPLO = 'L', the
+*          leading N-by-N lower triangular part of A contains the lower
+*          triangular part of the matrix A, and the strictly upper
+*          triangular part of A is not referenced.
+*          On exit, if UPLO = 'U', the diagonal and first superdiagonal
+*          of A are overwritten by the corresponding elements of the
+*          tridiagonal matrix T, and the elements above the first
+*          superdiagonal, with the array TAU, represent the orthogonal
+*          matrix Q as a product of elementary reflectors; if UPLO
+*          = 'L', the diagonal and first subdiagonal of A are over-
+*          written by the corresponding elements of the tridiagonal
+*          matrix T, and the elements below the first subdiagonal, with
+*          the array TAU, represent the orthogonal matrix Q as a product
+*          of elementary reflectors. See Further Details.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  D       (output) REAL array, dimension (N)
+*          The diagonal elements of the tridiagonal matrix T:
+*          D(i) = A(i,i).
+*
+*  E       (output) REAL array, dimension (N-1)
+*          The off-diagonal elements of the tridiagonal matrix T:
+*          E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'.
+*
+*  TAU     (output) REAL array, dimension (N-1)
+*          The scalar factors of the elementary reflectors (see Further
+*          Details).
+*
+*  WORK    (workspace/output) REAL array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.  LWORK >= 1.
+*          For optimum performance LWORK >= N*NB, where NB is the
+*          optimal blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Further Details
+*  ===============
+*
+*  If UPLO = 'U', the matrix Q is represented as a product of elementary
+*  reflectors
+*
+*     Q = H(n-1) . . . H(2) H(1).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - tau * v * v'
+*
+*  where tau is a real scalar, and v is a real vector with
+*  v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in
+*  A(1:i-1,i+1), and tau in TAU(i).
+*
+*  If UPLO = 'L', the matrix Q is represented as a product of elementary
+*  reflectors
+*
+*     Q = H(1) H(2) . . . H(n-1).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - tau * v * v'
+*
+*  where tau is a real scalar, and v is a real vector with
+*  v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i),
+*  and tau in TAU(i).
+*
+*  The contents of A on exit are illustrated by the following examples
+*  with n = 5:
+*
+*  if UPLO = 'U':                       if UPLO = 'L':
+*
+*    (  d   e   v2  v3  v4 )              (  d                  )
+*    (      d   e   v3  v4 )              (  e   d              )
+*    (          d   e   v4 )              (  v1  e   d          )
+*    (              d   e  )              (  v1  v2  e   d      )
+*    (                  d  )              (  v1  v2  v3  e   d  )
+*
+*  where d and e denote diagonal and off-diagonal elements of T, and vi
+*  denotes an element of the vector defining H(i).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE
+      PARAMETER          ( ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY, UPPER
+      INTEGER            I, IINFO, IWS, J, KK, LDWORK, LWKOPT, NB,
+     $                   NBMIN, NX
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLATRD, SSYR2K, SSYTD2, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           LSAME, ILAENV
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -4
+      ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN
+         INFO = -9
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+*
+*        Determine the block size.
+*
+         NB = ILAENV( 1, 'SSYTRD', UPLO, N, -1, -1, -1 )
+         LWKOPT = N*NB
+         WORK( 1 ) = LWKOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SSYTRD', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      NX = N
+      IWS = 1
+      IF( NB.GT.1 .AND. NB.LT.N ) THEN
+*
+*        Determine when to cross over from blocked to unblocked code
+*        (last block is always handled by unblocked code).
+*
+         NX = MAX( NB, ILAENV( 3, 'SSYTRD', UPLO, N, -1, -1, -1 ) )
+         IF( NX.LT.N ) THEN
+*
+*           Determine if workspace is large enough for blocked code.
+*
+            LDWORK = N
+            IWS = LDWORK*NB
+            IF( LWORK.LT.IWS ) THEN
+*
+*              Not enough workspace to use optimal NB:  determine the
+*              minimum value of NB, and reduce NB or force use of
+*              unblocked code by setting NX = N.
+*
+               NB = MAX( LWORK / LDWORK, 1 )
+               NBMIN = ILAENV( 2, 'SSYTRD', UPLO, N, -1, -1, -1 )
+               IF( NB.LT.NBMIN )
+     $            NX = N
+            END IF
+         ELSE
+            NX = N
+         END IF
+      ELSE
+         NB = 1
+      END IF
+*
+      IF( UPPER ) THEN
+*
+*        Reduce the upper triangle of A.
+*        Columns 1:kk are handled by the unblocked method.
+*
+         KK = N - ( ( N-NX+NB-1 ) / NB )*NB
+         DO 20 I = N - NB + 1, KK + 1, -NB
+*
+*           Reduce columns i:i+nb-1 to tridiagonal form and form the
+*           matrix W which is needed to update the unreduced part of
+*           the matrix
+*
+            CALL SLATRD( UPLO, I+NB-1, NB, A, LDA, E, TAU, WORK,
+     $                   LDWORK )
+*
+*           Update the unreduced submatrix A(1:i-1,1:i-1), using an
+*           update of the form:  A := A - V*W' - W*V'
+*
+            CALL SSYR2K( UPLO, 'No transpose', I-1, NB, -ONE, A( 1, I ),
+     $                   LDA, WORK, LDWORK, ONE, A, LDA )
+*
+*           Copy superdiagonal elements back into A, and diagonal
+*           elements into D
+*
+            DO 10 J = I, I + NB - 1
+               A( J-1, J ) = E( J-1 )
+               D( J ) = A( J, J )
+   10       CONTINUE
+   20    CONTINUE
+*
+*        Use unblocked code to reduce the last or only block
+*
+         CALL SSYTD2( UPLO, KK, A, LDA, D, E, TAU, IINFO )
+      ELSE
+*
+*        Reduce the lower triangle of A
+*
+         DO 40 I = 1, N - NX, NB
+*
+*           Reduce columns i:i+nb-1 to tridiagonal form and form the
+*           matrix W which is needed to update the unreduced part of
+*           the matrix
+*
+            CALL SLATRD( UPLO, N-I+1, NB, A( I, I ), LDA, E( I ),
+     $                   TAU( I ), WORK, LDWORK )
+*
+*           Update the unreduced submatrix A(i+ib:n,i+ib:n), using
+*           an update of the form:  A := A - V*W' - W*V'
+*
+            CALL SSYR2K( UPLO, 'No transpose', N-I-NB+1, NB, -ONE,
+     $                   A( I+NB, I ), LDA, WORK( NB+1 ), LDWORK, ONE,
+     $                   A( I+NB, I+NB ), LDA )
+*
+*           Copy subdiagonal elements back into A, and diagonal
+*           elements into D
+*
+            DO 30 J = I, I + NB - 1
+               A( J+1, J ) = E( J )
+               D( J ) = A( J, J )
+   30       CONTINUE
+   40    CONTINUE
+*
+*        Use unblocked code to reduce the last or only block
+*
+         CALL SSYTD2( UPLO, N-I+1, A( I, I ), LDA, D( I ), E( I ),
+     $                TAU( I ), IINFO )
+      END IF
+*
+      WORK( 1 ) = LWKOPT
+      RETURN
+*
+*     End of SSYTRD
+*
+      END
diff --git a/costa/native/external/lapack/ssytrf.f b/costa/native/external/lapack/ssytrf.f
new file mode 100644
index 000000000..10743da81
--- /dev/null
+++ b/costa/native/external/lapack/ssytrf.f
@@ -0,0 +1,288 @@
+      SUBROUTINE SSYTRF( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, LWORK, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      REAL               A( LDA, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SSYTRF computes the factorization of a real symmetric matrix A using
+*  the Bunch-Kaufman diagonal pivoting method.  The form of the
+*  factorization is
+*
+*     A = U*D*U**T  or  A = L*D*L**T
+*
+*  where U (or L) is a product of permutation and unit upper (lower)
+*  triangular matrices, and D is symmetric and block diagonal with 
+*  1-by-1 and 2-by-2 diagonal blocks.
+*
+*  This is the blocked version of the algorithm, calling Level 3 BLAS.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input/output) REAL array, dimension (LDA,N)
+*          On entry, the symmetric matrix A.  If UPLO = 'U', the leading
+*          N-by-N upper triangular part of A contains the upper
+*          triangular part of the matrix A, and the strictly lower
+*          triangular part of A is not referenced.  If UPLO = 'L', the
+*          leading N-by-N lower triangular part of A contains the lower
+*          triangular part of the matrix A, and the strictly upper
+*          triangular part of A is not referenced.
+*
+*          On exit, the block diagonal matrix D and the multipliers used
+*          to obtain the factor U or L (see below for further details).
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  IPIV    (output) INTEGER array, dimension (N)
+*          Details of the interchanges and the block structure of D.
+*          If IPIV(k) > 0, then rows and columns k and IPIV(k) were
+*          interchanged and D(k,k) is a 1-by-1 diagonal block.
+*          If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and
+*          columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k)
+*          is a 2-by-2 diagonal block.  If UPLO = 'L' and IPIV(k) =
+*          IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were
+*          interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block.
+*
+*  WORK    (workspace/output) REAL array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The length of WORK.  LWORK >=1.  For best performance
+*          LWORK >= N*NB, where NB is the block size returned by ILAENV.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, D(i,i) is exactly zero.  The factorization
+*                has been completed, but the block diagonal matrix D is
+*                exactly singular, and division by zero will occur if it
+*                is used to solve a system of equations.
+*
+*  Further Details
+*  ===============
+*
+*  If UPLO = 'U', then A = U*D*U', where
+*     U = P(n)*U(n)* ... *P(k)U(k)* ...,
+*  i.e., U is a product of terms P(k)*U(k), where k decreases from n to
+*  1 in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1
+*  and 2-by-2 diagonal blocks D(k).  P(k) is a permutation matrix as
+*  defined by IPIV(k), and U(k) is a unit upper triangular matrix, such
+*  that if the diagonal block D(k) is of order s (s = 1 or 2), then
+*
+*             (   I    v    0   )   k-s
+*     U(k) =  (   0    I    0   )   s
+*             (   0    0    I   )   n-k
+*                k-s   s   n-k
+*
+*  If s = 1, D(k) overwrites A(k,k), and v overwrites A(1:k-1,k).
+*  If s = 2, the upper triangle of D(k) overwrites A(k-1,k-1), A(k-1,k),
+*  and A(k,k), and v overwrites A(1:k-2,k-1:k).
+*
+*  If UPLO = 'L', then A = L*D*L', where
+*     L = P(1)*L(1)* ... *P(k)*L(k)* ...,
+*  i.e., L is a product of terms P(k)*L(k), where k increases from 1 to
+*  n in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1
+*  and 2-by-2 diagonal blocks D(k).  P(k) is a permutation matrix as
+*  defined by IPIV(k), and L(k) is a unit lower triangular matrix, such
+*  that if the diagonal block D(k) is of order s (s = 1 or 2), then
+*
+*             (   I    0     0   )  k-1
+*     L(k) =  (   0    I     0   )  s
+*             (   0    v     I   )  n-k-s+1
+*                k-1   s  n-k-s+1
+*
+*  If s = 1, D(k) overwrites A(k,k), and v overwrites A(k+1:n,k).
+*  If s = 2, the lower triangle of D(k) overwrites A(k,k), A(k+1,k),
+*  and A(k+1,k+1), and v overwrites A(k+2:n,k:k+1).
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            LQUERY, UPPER
+      INTEGER            IINFO, IWS, J, K, KB, LDWORK, LWKOPT, NB, NBMIN
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           LSAME, ILAENV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLASYF, SSYTF2, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -4
+      ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN
+         INFO = -7
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+*
+*        Determine the block size
+*
+         NB = ILAENV( 1, 'SSYTRF', UPLO, N, -1, -1, -1 )
+         LWKOPT = N*NB
+         WORK( 1 ) = LWKOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SSYTRF', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+      NBMIN = 2
+      LDWORK = N
+      IF( NB.GT.1 .AND. NB.LT.N ) THEN
+         IWS = LDWORK*NB
+         IF( LWORK.LT.IWS ) THEN
+            NB = MAX( LWORK / LDWORK, 1 )
+            NBMIN = MAX( 2, ILAENV( 2, 'SSYTRF', UPLO, N, -1, -1, -1 ) )
+         END IF
+      ELSE
+         IWS = 1
+      END IF
+      IF( NB.LT.NBMIN )
+     $   NB = N
+*
+      IF( UPPER ) THEN
+*
+*        Factorize A as U*D*U' using the upper triangle of A
+*
+*        K is the main loop index, decreasing from N to 1 in steps of
+*        KB, where KB is the number of columns factorized by SLASYF;
+*        KB is either NB or NB-1, or K for the last block
+*
+         K = N
+   10    CONTINUE
+*
+*        If K < 1, exit from loop
+*
+         IF( K.LT.1 )
+     $      GO TO 40
+*
+         IF( K.GT.NB ) THEN
+*
+*           Factorize columns k-kb+1:k of A and use blocked code to
+*           update columns 1:k-kb
+*
+            CALL SLASYF( UPLO, K, NB, KB, A, LDA, IPIV, WORK, LDWORK,
+     $                   IINFO )
+         ELSE
+*
+*           Use unblocked code to factorize columns 1:k of A
+*
+            CALL SSYTF2( UPLO, K, A, LDA, IPIV, IINFO )
+            KB = K
+         END IF
+*
+*        Set INFO on the first occurrence of a zero pivot
+*
+         IF( INFO.EQ.0 .AND. IINFO.GT.0 )
+     $      INFO = IINFO
+*
+*        Decrease K and return to the start of the main loop
+*
+         K = K - KB
+         GO TO 10
+*
+      ELSE
+*
+*        Factorize A as L*D*L' using the lower triangle of A
+*
+*        K is the main loop index, increasing from 1 to N in steps of
+*        KB, where KB is the number of columns factorized by SLASYF;
+*        KB is either NB or NB-1, or N-K+1 for the last block
+*
+         K = 1
+   20    CONTINUE
+*
+*        If K > N, exit from loop
+*
+         IF( K.GT.N )
+     $      GO TO 40
+*
+         IF( K.LE.N-NB ) THEN
+*
+*           Factorize columns k:k+kb-1 of A and use blocked code to
+*           update columns k+kb:n
+*
+            CALL SLASYF( UPLO, N-K+1, NB, KB, A( K, K ), LDA, IPIV( K ),
+     $                   WORK, LDWORK, IINFO )
+         ELSE
+*
+*           Use unblocked code to factorize columns k:n of A
+*
+            CALL SSYTF2( UPLO, N-K+1, A( K, K ), LDA, IPIV( K ), IINFO )
+            KB = N - K + 1
+         END IF
+*
+*        Set INFO on the first occurrence of a zero pivot
+*
+         IF( INFO.EQ.0 .AND. IINFO.GT.0 )
+     $      INFO = IINFO + K - 1
+*
+*        Adjust IPIV
+*
+         DO 30 J = K, K + KB - 1
+            IF( IPIV( J ).GT.0 ) THEN
+               IPIV( J ) = IPIV( J ) + K - 1
+            ELSE
+               IPIV( J ) = IPIV( J ) - K + 1
+            END IF
+   30    CONTINUE
+*
+*        Increase K and return to the start of the main loop
+*
+         K = K + KB
+         GO TO 20
+*
+      END IF
+*
+   40 CONTINUE
+      WORK( 1 ) = LWKOPT
+      RETURN
+*
+*     End of SSYTRF
+*
+      END
diff --git a/costa/native/external/lapack/ssytri.f b/costa/native/external/lapack/ssytri.f
new file mode 100644
index 000000000..ad8edc74d
--- /dev/null
+++ b/costa/native/external/lapack/ssytri.f
@@ -0,0 +1,313 @@
+      SUBROUTINE SSYTRI( UPLO, N, A, LDA, IPIV, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993 
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      REAL               A( LDA, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SSYTRI computes the inverse of a real symmetric indefinite matrix
+*  A using the factorization A = U*D*U**T or A = L*D*L**T computed by
+*  SSYTRF.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the details of the factorization are stored
+*          as an upper or lower triangular matrix.
+*          = 'U':  Upper triangular, form is A = U*D*U**T;
+*          = 'L':  Lower triangular, form is A = L*D*L**T.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input/output) REAL array, dimension (LDA,N)
+*          On entry, the block diagonal matrix D and the multipliers
+*          used to obtain the factor U or L as computed by SSYTRF.
+*
+*          On exit, if INFO = 0, the (symmetric) inverse of the original
+*          matrix.  If UPLO = 'U', the upper triangular part of the
+*          inverse is formed and the part of A below the diagonal is not
+*          referenced; if UPLO = 'L' the lower triangular part of the
+*          inverse is formed and the part of A above the diagonal is
+*          not referenced.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  IPIV    (input) INTEGER array, dimension (N)
+*          Details of the interchanges and the block structure of D
+*          as determined by SSYTRF.
+*
+*  WORK    (workspace) REAL array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*          > 0: if INFO = i, D(i,i) = 0; the matrix is singular and its
+*               inverse could not be computed.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            K, KP, KSTEP
+      REAL               AK, AKKP1, AKP1, D, T, TEMP
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               SDOT
+      EXTERNAL           LSAME, SDOT
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SCOPY, SSWAP, SSYMV, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SSYTRI', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Check that the diagonal matrix D is nonsingular.
+*
+      IF( UPPER ) THEN
+*
+*        Upper triangular storage: examine D from bottom to top
+*
+         DO 10 INFO = N, 1, -1
+            IF( IPIV( INFO ).GT.0 .AND. A( INFO, INFO ).EQ.ZERO )
+     $         RETURN
+   10    CONTINUE
+      ELSE
+*
+*        Lower triangular storage: examine D from top to bottom.
+*
+         DO 20 INFO = 1, N
+            IF( IPIV( INFO ).GT.0 .AND. A( INFO, INFO ).EQ.ZERO )
+     $         RETURN
+   20    CONTINUE
+      END IF
+      INFO = 0
+*
+      IF( UPPER ) THEN
+*
+*        Compute inv(A) from the factorization A = U*D*U'.
+*
+*        K is the main loop index, increasing from 1 to N in steps of
+*        1 or 2, depending on the size of the diagonal blocks.
+*
+         K = 1
+   30    CONTINUE
+*
+*        If K > N, exit from loop.
+*
+         IF( K.GT.N )
+     $      GO TO 40
+*
+         IF( IPIV( K ).GT.0 ) THEN
+*
+*           1 x 1 diagonal block
+*
+*           Invert the diagonal block.
+*
+            A( K, K ) = ONE / A( K, K )
+*
+*           Compute column K of the inverse.
+*
+            IF( K.GT.1 ) THEN
+               CALL SCOPY( K-1, A( 1, K ), 1, WORK, 1 )
+               CALL SSYMV( UPLO, K-1, -ONE, A, LDA, WORK, 1, ZERO,
+     $                     A( 1, K ), 1 )
+               A( K, K ) = A( K, K ) - SDOT( K-1, WORK, 1, A( 1, K ),
+     $                     1 )
+            END IF
+            KSTEP = 1
+         ELSE
+*
+*           2 x 2 diagonal block
+*
+*           Invert the diagonal block.
+*
+            T = ABS( A( K, K+1 ) )
+            AK = A( K, K ) / T
+            AKP1 = A( K+1, K+1 ) / T
+            AKKP1 = A( K, K+1 ) / T
+            D = T*( AK*AKP1-ONE )
+            A( K, K ) = AKP1 / D
+            A( K+1, K+1 ) = AK / D
+            A( K, K+1 ) = -AKKP1 / D
+*
+*           Compute columns K and K+1 of the inverse.
+*
+            IF( K.GT.1 ) THEN
+               CALL SCOPY( K-1, A( 1, K ), 1, WORK, 1 )
+               CALL SSYMV( UPLO, K-1, -ONE, A, LDA, WORK, 1, ZERO,
+     $                     A( 1, K ), 1 )
+               A( K, K ) = A( K, K ) - SDOT( K-1, WORK, 1, A( 1, K ),
+     $                     1 )
+               A( K, K+1 ) = A( K, K+1 ) -
+     $                       SDOT( K-1, A( 1, K ), 1, A( 1, K+1 ), 1 )
+               CALL SCOPY( K-1, A( 1, K+1 ), 1, WORK, 1 )
+               CALL SSYMV( UPLO, K-1, -ONE, A, LDA, WORK, 1, ZERO,
+     $                     A( 1, K+1 ), 1 )
+               A( K+1, K+1 ) = A( K+1, K+1 ) -
+     $                         SDOT( K-1, WORK, 1, A( 1, K+1 ), 1 )
+            END IF
+            KSTEP = 2
+         END IF
+*
+         KP = ABS( IPIV( K ) )
+         IF( KP.NE.K ) THEN
+*
+*           Interchange rows and columns K and KP in the leading
+*           submatrix A(1:k+1,1:k+1)
+*
+            CALL SSWAP( KP-1, A( 1, K ), 1, A( 1, KP ), 1 )
+            CALL SSWAP( K-KP-1, A( KP+1, K ), 1, A( KP, KP+1 ), LDA )
+            TEMP = A( K, K )
+            A( K, K ) = A( KP, KP )
+            A( KP, KP ) = TEMP
+            IF( KSTEP.EQ.2 ) THEN
+               TEMP = A( K, K+1 )
+               A( K, K+1 ) = A( KP, K+1 )
+               A( KP, K+1 ) = TEMP
+            END IF
+         END IF
+*
+         K = K + KSTEP
+         GO TO 30
+   40    CONTINUE
+*
+      ELSE
+*
+*        Compute inv(A) from the factorization A = L*D*L'.
+*
+*        K is the main loop index, increasing from 1 to N in steps of
+*        1 or 2, depending on the size of the diagonal blocks.
+*
+         K = N
+   50    CONTINUE
+*
+*        If K < 1, exit from loop.
+*
+         IF( K.LT.1 )
+     $      GO TO 60
+*
+         IF( IPIV( K ).GT.0 ) THEN
+*
+*           1 x 1 diagonal block
+*
+*           Invert the diagonal block.
+*
+            A( K, K ) = ONE / A( K, K )
+*
+*           Compute column K of the inverse.
+*
+            IF( K.LT.N ) THEN
+               CALL SCOPY( N-K, A( K+1, K ), 1, WORK, 1 )
+               CALL SSYMV( UPLO, N-K, -ONE, A( K+1, K+1 ), LDA, WORK, 1,
+     $                     ZERO, A( K+1, K ), 1 )
+               A( K, K ) = A( K, K ) - SDOT( N-K, WORK, 1, A( K+1, K ),
+     $                     1 )
+            END IF
+            KSTEP = 1
+         ELSE
+*
+*           2 x 2 diagonal block
+*
+*           Invert the diagonal block.
+*
+            T = ABS( A( K, K-1 ) )
+            AK = A( K-1, K-1 ) / T
+            AKP1 = A( K, K ) / T
+            AKKP1 = A( K, K-1 ) / T
+            D = T*( AK*AKP1-ONE )
+            A( K-1, K-1 ) = AKP1 / D
+            A( K, K ) = AK / D
+            A( K, K-1 ) = -AKKP1 / D
+*
+*           Compute columns K-1 and K of the inverse.
+*
+            IF( K.LT.N ) THEN
+               CALL SCOPY( N-K, A( K+1, K ), 1, WORK, 1 )
+               CALL SSYMV( UPLO, N-K, -ONE, A( K+1, K+1 ), LDA, WORK, 1,
+     $                     ZERO, A( K+1, K ), 1 )
+               A( K, K ) = A( K, K ) - SDOT( N-K, WORK, 1, A( K+1, K ),
+     $                     1 )
+               A( K, K-1 ) = A( K, K-1 ) -
+     $                       SDOT( N-K, A( K+1, K ), 1, A( K+1, K-1 ),
+     $                       1 )
+               CALL SCOPY( N-K, A( K+1, K-1 ), 1, WORK, 1 )
+               CALL SSYMV( UPLO, N-K, -ONE, A( K+1, K+1 ), LDA, WORK, 1,
+     $                     ZERO, A( K+1, K-1 ), 1 )
+               A( K-1, K-1 ) = A( K-1, K-1 ) -
+     $                         SDOT( N-K, WORK, 1, A( K+1, K-1 ), 1 )
+            END IF
+            KSTEP = 2
+         END IF
+*
+         KP = ABS( IPIV( K ) )
+         IF( KP.NE.K ) THEN
+*
+*           Interchange rows and columns K and KP in the trailing
+*           submatrix A(k-1:n,k-1:n)
+*
+            IF( KP.LT.N )
+     $         CALL SSWAP( N-KP, A( KP+1, K ), 1, A( KP+1, KP ), 1 )
+            CALL SSWAP( KP-K-1, A( K+1, K ), 1, A( KP, K+1 ), LDA )
+            TEMP = A( K, K )
+            A( K, K ) = A( KP, KP )
+            A( KP, KP ) = TEMP
+            IF( KSTEP.EQ.2 ) THEN
+               TEMP = A( K, K-1 )
+               A( K, K-1 ) = A( KP, K-1 )
+               A( KP, K-1 ) = TEMP
+            END IF
+         END IF
+*
+         K = K - KSTEP
+         GO TO 50
+   60    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of SSYTRI
+*
+      END
diff --git a/costa/native/external/lapack/ssytrs.f b/costa/native/external/lapack/ssytrs.f
new file mode 100644
index 000000000..d7525cd24
--- /dev/null
+++ b/costa/native/external/lapack/ssytrs.f
@@ -0,0 +1,370 @@
+      SUBROUTINE SSYTRS( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993 
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, LDB, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      REAL               A( LDA, * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  SSYTRS solves a system of linear equations A*X = B with a real
+*  symmetric matrix A using the factorization A = U*D*U**T or
+*  A = L*D*L**T computed by SSYTRF.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the details of the factorization are stored
+*          as an upper or lower triangular matrix.
+*          = 'U':  Upper triangular, form is A = U*D*U**T;
+*          = 'L':  Lower triangular, form is A = L*D*L**T.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  A       (input) REAL array, dimension (LDA,N)
+*          The block diagonal matrix D and the multipliers used to
+*          obtain the factor U or L as computed by SSYTRF.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  IPIV    (input) INTEGER array, dimension (N)
+*          Details of the interchanges and the block structure of D
+*          as determined by SSYTRF.
+*
+*  B       (input/output) REAL array, dimension (LDB,NRHS)
+*          On entry, the right hand side matrix B.
+*          On exit, the solution matrix X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE
+      PARAMETER          ( ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            J, K, KP
+      REAL               AK, AKM1, AKM1K, BK, BKM1, DENOM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SGEMV, SGER, SSCAL, SSWAP, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -8
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SSYTRS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 )
+     $   RETURN
+*
+      IF( UPPER ) THEN
+*
+*        Solve A*X = B, where A = U*D*U'.
+*
+*        First solve U*D*X = B, overwriting B with X.
+*
+*        K is the main loop index, decreasing from N to 1 in steps of
+*        1 or 2, depending on the size of the diagonal blocks.
+*
+         K = N
+   10    CONTINUE
+*
+*        If K < 1, exit from loop.
+*
+         IF( K.LT.1 )
+     $      GO TO 30
+*
+         IF( IPIV( K ).GT.0 ) THEN
+*
+*           1 x 1 diagonal block
+*
+*           Interchange rows K and IPIV(K).
+*
+            KP = IPIV( K )
+            IF( KP.NE.K )
+     $         CALL SSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB )
+*
+*           Multiply by inv(U(K)), where U(K) is the transformation
+*           stored in column K of A.
+*
+            CALL SGER( K-1, NRHS, -ONE, A( 1, K ), 1, B( K, 1 ), LDB,
+     $                 B( 1, 1 ), LDB )
+*
+*           Multiply by the inverse of the diagonal block.
+*
+            CALL SSCAL( NRHS, ONE / A( K, K ), B( K, 1 ), LDB )
+            K = K - 1
+         ELSE
+*
+*           2 x 2 diagonal block
+*
+*           Interchange rows K-1 and -IPIV(K).
+*
+            KP = -IPIV( K )
+            IF( KP.NE.K-1 )
+     $         CALL SSWAP( NRHS, B( K-1, 1 ), LDB, B( KP, 1 ), LDB )
+*
+*           Multiply by inv(U(K)), where U(K) is the transformation
+*           stored in columns K-1 and K of A.
+*
+            CALL SGER( K-2, NRHS, -ONE, A( 1, K ), 1, B( K, 1 ), LDB,
+     $                 B( 1, 1 ), LDB )
+            CALL SGER( K-2, NRHS, -ONE, A( 1, K-1 ), 1, B( K-1, 1 ),
+     $                 LDB, B( 1, 1 ), LDB )
+*
+*           Multiply by the inverse of the diagonal block.
+*
+            AKM1K = A( K-1, K )
+            AKM1 = A( K-1, K-1 ) / AKM1K
+            AK = A( K, K ) / AKM1K
+            DENOM = AKM1*AK - ONE
+            DO 20 J = 1, NRHS
+               BKM1 = B( K-1, J ) / AKM1K
+               BK = B( K, J ) / AKM1K
+               B( K-1, J ) = ( AK*BKM1-BK ) / DENOM
+               B( K, J ) = ( AKM1*BK-BKM1 ) / DENOM
+   20       CONTINUE
+            K = K - 2
+         END IF
+*
+         GO TO 10
+   30    CONTINUE
+*
+*        Next solve U'*X = B, overwriting B with X.
+*
+*        K is the main loop index, increasing from 1 to N in steps of
+*        1 or 2, depending on the size of the diagonal blocks.
+*
+         K = 1
+   40    CONTINUE
+*
+*        If K > N, exit from loop.
+*
+         IF( K.GT.N )
+     $      GO TO 50
+*
+         IF( IPIV( K ).GT.0 ) THEN
+*
+*           1 x 1 diagonal block
+*
+*           Multiply by inv(U'(K)), where U(K) is the transformation
+*           stored in column K of A.
+*
+            CALL SGEMV( 'Transpose', K-1, NRHS, -ONE, B, LDB, A( 1, K ),
+     $                  1, ONE, B( K, 1 ), LDB )
+*
+*           Interchange rows K and IPIV(K).
+*
+            KP = IPIV( K )
+            IF( KP.NE.K )
+     $         CALL SSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB )
+            K = K + 1
+         ELSE
+*
+*           2 x 2 diagonal block
+*
+*           Multiply by inv(U'(K+1)), where U(K+1) is the transformation
+*           stored in columns K and K+1 of A.
+*
+            CALL SGEMV( 'Transpose', K-1, NRHS, -ONE, B, LDB, A( 1, K ),
+     $                  1, ONE, B( K, 1 ), LDB )
+            CALL SGEMV( 'Transpose', K-1, NRHS, -ONE, B, LDB,
+     $                  A( 1, K+1 ), 1, ONE, B( K+1, 1 ), LDB )
+*
+*           Interchange rows K and -IPIV(K).
+*
+            KP = -IPIV( K )
+            IF( KP.NE.K )
+     $         CALL SSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB )
+            K = K + 2
+         END IF
+*
+         GO TO 40
+   50    CONTINUE
+*
+      ELSE
+*
+*        Solve A*X = B, where A = L*D*L'.
+*
+*        First solve L*D*X = B, overwriting B with X.
+*
+*        K is the main loop index, increasing from 1 to N in steps of
+*        1 or 2, depending on the size of the diagonal blocks.
+*
+         K = 1
+   60    CONTINUE
+*
+*        If K > N, exit from loop.
+*
+         IF( K.GT.N )
+     $      GO TO 80
+*
+         IF( IPIV( K ).GT.0 ) THEN
+*
+*           1 x 1 diagonal block
+*
+*           Interchange rows K and IPIV(K).
+*
+            KP = IPIV( K )
+            IF( KP.NE.K )
+     $         CALL SSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB )
+*
+*           Multiply by inv(L(K)), where L(K) is the transformation
+*           stored in column K of A.
+*
+            IF( K.LT.N )
+     $         CALL SGER( N-K, NRHS, -ONE, A( K+1, K ), 1, B( K, 1 ),
+     $                    LDB, B( K+1, 1 ), LDB )
+*
+*           Multiply by the inverse of the diagonal block.
+*
+            CALL SSCAL( NRHS, ONE / A( K, K ), B( K, 1 ), LDB )
+            K = K + 1
+         ELSE
+*
+*           2 x 2 diagonal block
+*
+*           Interchange rows K+1 and -IPIV(K).
+*
+            KP = -IPIV( K )
+            IF( KP.NE.K+1 )
+     $         CALL SSWAP( NRHS, B( K+1, 1 ), LDB, B( KP, 1 ), LDB )
+*
+*           Multiply by inv(L(K)), where L(K) is the transformation
+*           stored in columns K and K+1 of A.
+*
+            IF( K.LT.N-1 ) THEN
+               CALL SGER( N-K-1, NRHS, -ONE, A( K+2, K ), 1, B( K, 1 ),
+     $                    LDB, B( K+2, 1 ), LDB )
+               CALL SGER( N-K-1, NRHS, -ONE, A( K+2, K+1 ), 1,
+     $                    B( K+1, 1 ), LDB, B( K+2, 1 ), LDB )
+            END IF
+*
+*           Multiply by the inverse of the diagonal block.
+*
+            AKM1K = A( K+1, K )
+            AKM1 = A( K, K ) / AKM1K
+            AK = A( K+1, K+1 ) / AKM1K
+            DENOM = AKM1*AK - ONE
+            DO 70 J = 1, NRHS
+               BKM1 = B( K, J ) / AKM1K
+               BK = B( K+1, J ) / AKM1K
+               B( K, J ) = ( AK*BKM1-BK ) / DENOM
+               B( K+1, J ) = ( AKM1*BK-BKM1 ) / DENOM
+   70       CONTINUE
+            K = K + 2
+         END IF
+*
+         GO TO 60
+   80    CONTINUE
+*
+*        Next solve L'*X = B, overwriting B with X.
+*
+*        K is the main loop index, decreasing from N to 1 in steps of
+*        1 or 2, depending on the size of the diagonal blocks.
+*
+         K = N
+   90    CONTINUE
+*
+*        If K < 1, exit from loop.
+*
+         IF( K.LT.1 )
+     $      GO TO 100
+*
+         IF( IPIV( K ).GT.0 ) THEN
+*
+*           1 x 1 diagonal block
+*
+*           Multiply by inv(L'(K)), where L(K) is the transformation
+*           stored in column K of A.
+*
+            IF( K.LT.N )
+     $         CALL SGEMV( 'Transpose', N-K, NRHS, -ONE, B( K+1, 1 ),
+     $                     LDB, A( K+1, K ), 1, ONE, B( K, 1 ), LDB )
+*
+*           Interchange rows K and IPIV(K).
+*
+            KP = IPIV( K )
+            IF( KP.NE.K )
+     $         CALL SSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB )
+            K = K - 1
+         ELSE
+*
+*           2 x 2 diagonal block
+*
+*           Multiply by inv(L'(K-1)), where L(K-1) is the transformation
+*           stored in columns K-1 and K of A.
+*
+            IF( K.LT.N ) THEN
+               CALL SGEMV( 'Transpose', N-K, NRHS, -ONE, B( K+1, 1 ),
+     $                     LDB, A( K+1, K ), 1, ONE, B( K, 1 ), LDB )
+               CALL SGEMV( 'Transpose', N-K, NRHS, -ONE, B( K+1, 1 ),
+     $                     LDB, A( K+1, K-1 ), 1, ONE, B( K-1, 1 ),
+     $                     LDB )
+            END IF
+*
+*           Interchange rows K and -IPIV(K).
+*
+            KP = -IPIV( K )
+            IF( KP.NE.K )
+     $         CALL SSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB )
+            K = K - 2
+         END IF
+*
+         GO TO 90
+  100    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of SSYTRS
+*
+      END
diff --git a/costa/native/external/lapack/stbcon.f b/costa/native/external/lapack/stbcon.f
new file mode 100644
index 000000000..650424eff
--- /dev/null
+++ b/costa/native/external/lapack/stbcon.f
@@ -0,0 +1,198 @@
+      SUBROUTINE STBCON( NORM, UPLO, DIAG, N, KD, AB, LDAB, RCOND, WORK,
+     $                   IWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993 
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIAG, NORM, UPLO
+      INTEGER            INFO, KD, LDAB, N
+      REAL               RCOND
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      REAL               AB( LDAB, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  STBCON estimates the reciprocal of the condition number of a
+*  triangular band matrix A, in either the 1-norm or the infinity-norm.
+*
+*  The norm of A is computed and an estimate is obtained for
+*  norm(inv(A)), then the reciprocal of the condition number is
+*  computed as
+*     RCOND = 1 / ( norm(A) * norm(inv(A)) ).
+*
+*  Arguments
+*  =========
+*
+*  NORM    (input) CHARACTER*1
+*          Specifies whether the 1-norm condition number or the
+*          infinity-norm condition number is required:
+*          = '1' or 'O':  1-norm;
+*          = 'I':         Infinity-norm.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  A is upper triangular;
+*          = 'L':  A is lower triangular.
+*
+*  DIAG    (input) CHARACTER*1
+*          = 'N':  A is non-unit triangular;
+*          = 'U':  A is unit triangular.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  KD      (input) INTEGER
+*          The number of superdiagonals or subdiagonals of the
+*          triangular band matrix A.  KD >= 0.
+*
+*  AB      (input) REAL array, dimension (LDAB,N)
+*          The upper or lower triangular band matrix A, stored in the
+*          first kd+1 rows of the array. The j-th column of A is stored
+*          in the j-th column of the array AB as follows:
+*          if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j;
+*          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+kd).
+*          If DIAG = 'U', the diagonal elements of A are not referenced
+*          and are assumed to be 1.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KD+1.
+*
+*  RCOND   (output) REAL
+*          The reciprocal of the condition number of the matrix A,
+*          computed as RCOND = 1/(norm(A) * norm(inv(A))).
+*
+*  WORK    (workspace) REAL array, dimension (3*N)
+*
+*  IWORK   (workspace) INTEGER array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOUNIT, ONENRM, UPPER
+      CHARACTER          NORMIN
+      INTEGER            IX, KASE, KASE1
+      REAL               AINVNM, ANORM, SCALE, SMLNUM, XNORM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ISAMAX
+      REAL               SLAMCH, SLANTB
+      EXTERNAL           LSAME, ISAMAX, SLAMCH, SLANTB
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLACON, SLATBS, SRSCL, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, REAL
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      ONENRM = NORM.EQ.'1' .OR. LSAME( NORM, 'O' )
+      NOUNIT = LSAME( DIAG, 'N' )
+*
+      IF( .NOT.ONENRM .AND. .NOT.LSAME( NORM, 'I' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( KD.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( LDAB.LT.KD+1 ) THEN
+         INFO = -7
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'STBCON', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 ) THEN
+         RCOND = ONE
+         RETURN
+      END IF
+*
+      RCOND = ZERO
+      SMLNUM = SLAMCH( 'Safe minimum' )*REAL( MAX( 1, N ) )
+*
+*     Compute the norm of the triangular matrix A.
+*
+      ANORM = SLANTB( NORM, UPLO, DIAG, N, KD, AB, LDAB, WORK )
+*
+*     Continue only if ANORM > 0.
+*
+      IF( ANORM.GT.ZERO ) THEN
+*
+*        Estimate the norm of the inverse of A.
+*
+         AINVNM = ZERO
+         NORMIN = 'N'
+         IF( ONENRM ) THEN
+            KASE1 = 1
+         ELSE
+            KASE1 = 2
+         END IF
+         KASE = 0
+   10    CONTINUE
+         CALL SLACON( N, WORK( N+1 ), WORK, IWORK, AINVNM, KASE )
+         IF( KASE.NE.0 ) THEN
+            IF( KASE.EQ.KASE1 ) THEN
+*
+*              Multiply by inv(A).
+*
+               CALL SLATBS( UPLO, 'No transpose', DIAG, NORMIN, N, KD,
+     $                      AB, LDAB, WORK, SCALE, WORK( 2*N+1 ), INFO )
+            ELSE
+*
+*              Multiply by inv(A').
+*
+               CALL SLATBS( UPLO, 'Transpose', DIAG, NORMIN, N, KD, AB,
+     $                      LDAB, WORK, SCALE, WORK( 2*N+1 ), INFO )
+            END IF
+            NORMIN = 'Y'
+*
+*           Multiply by 1/SCALE if doing so will not cause overflow.
+*
+            IF( SCALE.NE.ONE ) THEN
+               IX = ISAMAX( N, WORK, 1 )
+               XNORM = ABS( WORK( IX ) )
+               IF( SCALE.LT.XNORM*SMLNUM .OR. SCALE.EQ.ZERO )
+     $            GO TO 20
+               CALL SRSCL( N, SCALE, WORK, 1 )
+            END IF
+            GO TO 10
+         END IF
+*
+*        Compute the estimate of the reciprocal condition number.
+*
+         IF( AINVNM.NE.ZERO )
+     $      RCOND = ( ONE / ANORM ) / AINVNM
+      END IF
+*
+   20 CONTINUE
+      RETURN
+*
+*     End of STBCON
+*
+      END
diff --git a/costa/native/external/lapack/stbrfs.f b/costa/native/external/lapack/stbrfs.f
new file mode 100644
index 000000000..00a50967d
--- /dev/null
+++ b/costa/native/external/lapack/stbrfs.f
@@ -0,0 +1,381 @@
+      SUBROUTINE STBRFS( UPLO, TRANS, DIAG, N, KD, NRHS, AB, LDAB, B,
+     $                   LDB, X, LDX, FERR, BERR, WORK, IWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIAG, TRANS, UPLO
+      INTEGER            INFO, KD, LDAB, LDB, LDX, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      REAL               AB( LDAB, * ), B( LDB, * ), BERR( * ),
+     $                   FERR( * ), WORK( * ), X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  STBRFS provides error bounds and backward error estimates for the
+*  solution to a system of linear equations with a triangular band
+*  coefficient matrix.
+*
+*  The solution matrix X must be computed by STBTRS or some other
+*  means before entering this routine.  STBRFS does not do iterative
+*  refinement because doing so cannot improve the backward error.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  A is upper triangular;
+*          = 'L':  A is lower triangular.
+*
+*  TRANS   (input) CHARACTER*1
+*          Specifies the form of the system of equations:
+*          = 'N':  A * X = B  (No transpose)
+*          = 'T':  A**T * X = B  (Transpose)
+*          = 'C':  A**H * X = B  (Conjugate transpose = Transpose)
+*
+*  DIAG    (input) CHARACTER*1
+*          = 'N':  A is non-unit triangular;
+*          = 'U':  A is unit triangular.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  KD      (input) INTEGER
+*          The number of superdiagonals or subdiagonals of the
+*          triangular band matrix A.  KD >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrices B and X.  NRHS >= 0.
+*
+*  AB      (input) REAL array, dimension (LDAB,N)
+*          The upper or lower triangular band matrix A, stored in the
+*          first kd+1 rows of the array. The j-th column of A is stored
+*          in the j-th column of the array AB as follows:
+*          if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j;
+*          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+kd).
+*          If DIAG = 'U', the diagonal elements of A are not referenced
+*          and are assumed to be 1.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KD+1.
+*
+*  B       (input) REAL array, dimension (LDB,NRHS)
+*          The right hand side matrix B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  X       (input) REAL array, dimension (LDX,NRHS)
+*          The solution matrix X.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(1,N).
+*
+*  FERR    (output) REAL array, dimension (NRHS)
+*          The estimated forward error bound for each solution vector
+*          X(j) (the j-th column of the solution matrix X).
+*          If XTRUE is the true solution corresponding to X(j), FERR(j)
+*          is an estimated upper bound for the magnitude of the largest
+*          element in (X(j) - XTRUE) divided by the magnitude of the
+*          largest element in X(j).  The estimate is as reliable as
+*          the estimate for RCOND, and is almost always a slight
+*          overestimate of the true error.
+*
+*  BERR    (output) REAL array, dimension (NRHS)
+*          The componentwise relative backward error of each solution
+*          vector X(j) (i.e., the smallest relative change in
+*          any element of A or B that makes X(j) an exact solution).
+*
+*  WORK    (workspace) REAL array, dimension (3*N)
+*
+*  IWORK   (workspace) INTEGER array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO
+      PARAMETER          ( ZERO = 0.0E+0 )
+      REAL               ONE
+      PARAMETER          ( ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOTRAN, NOUNIT, UPPER
+      CHARACTER          TRANST
+      INTEGER            I, J, K, KASE, NZ
+      REAL               EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SAXPY, SCOPY, SLACON, STBMV, STBSV, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               SLAMCH
+      EXTERNAL           LSAME, SLAMCH
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      NOTRAN = LSAME( TRANS, 'N' )
+      NOUNIT = LSAME( DIAG, 'N' )
+*
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT.
+     $         LSAME( TRANS, 'C' ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( KD.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -6
+      ELSE IF( LDAB.LT.KD+1 ) THEN
+         INFO = -8
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -10
+      ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+         INFO = -12
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'STBRFS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN
+         DO 10 J = 1, NRHS
+            FERR( J ) = ZERO
+            BERR( J ) = ZERO
+   10    CONTINUE
+         RETURN
+      END IF
+*
+      IF( NOTRAN ) THEN
+         TRANST = 'T'
+      ELSE
+         TRANST = 'N'
+      END IF
+*
+*     NZ = maximum number of nonzero elements in each row of A, plus 1
+*
+      NZ = KD + 2
+      EPS = SLAMCH( 'Epsilon' )
+      SAFMIN = SLAMCH( 'Safe minimum' )
+      SAFE1 = NZ*SAFMIN
+      SAFE2 = SAFE1 / EPS
+*
+*     Do for each right hand side
+*
+      DO 250 J = 1, NRHS
+*
+*        Compute residual R = B - op(A) * X,
+*        where op(A) = A or A', depending on TRANS.
+*
+         CALL SCOPY( N, X( 1, J ), 1, WORK( N+1 ), 1 )
+         CALL STBMV( UPLO, TRANS, DIAG, N, KD, AB, LDAB, WORK( N+1 ),
+     $               1 )
+         CALL SAXPY( N, -ONE, B( 1, J ), 1, WORK( N+1 ), 1 )
+*
+*        Compute componentwise relative backward error from formula
+*
+*        max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) )
+*
+*        where abs(Z) is the componentwise absolute value of the matrix
+*        or vector Z.  If the i-th component of the denominator is less
+*        than SAFE2, then SAFE1 is added to the i-th components of the
+*        numerator and denominator before dividing.
+*
+         DO 20 I = 1, N
+            WORK( I ) = ABS( B( I, J ) )
+   20    CONTINUE
+*
+         IF( NOTRAN ) THEN
+*
+*           Compute abs(A)*abs(X) + abs(B).
+*
+            IF( UPPER ) THEN
+               IF( NOUNIT ) THEN
+                  DO 40 K = 1, N
+                     XK = ABS( X( K, J ) )
+                     DO 30 I = MAX( 1, K-KD ), K
+                        WORK( I ) = WORK( I ) +
+     $                              ABS( AB( KD+1+I-K, K ) )*XK
+   30                CONTINUE
+   40             CONTINUE
+               ELSE
+                  DO 60 K = 1, N
+                     XK = ABS( X( K, J ) )
+                     DO 50 I = MAX( 1, K-KD ), K - 1
+                        WORK( I ) = WORK( I ) +
+     $                              ABS( AB( KD+1+I-K, K ) )*XK
+   50                CONTINUE
+                     WORK( K ) = WORK( K ) + XK
+   60             CONTINUE
+               END IF
+            ELSE
+               IF( NOUNIT ) THEN
+                  DO 80 K = 1, N
+                     XK = ABS( X( K, J ) )
+                     DO 70 I = K, MIN( N, K+KD )
+                        WORK( I ) = WORK( I ) + ABS( AB( 1+I-K, K ) )*XK
+   70                CONTINUE
+   80             CONTINUE
+               ELSE
+                  DO 100 K = 1, N
+                     XK = ABS( X( K, J ) )
+                     DO 90 I = K + 1, MIN( N, K+KD )
+                        WORK( I ) = WORK( I ) + ABS( AB( 1+I-K, K ) )*XK
+   90                CONTINUE
+                     WORK( K ) = WORK( K ) + XK
+  100             CONTINUE
+               END IF
+            END IF
+         ELSE
+*
+*           Compute abs(A')*abs(X) + abs(B).
+*
+            IF( UPPER ) THEN
+               IF( NOUNIT ) THEN
+                  DO 120 K = 1, N
+                     S = ZERO
+                     DO 110 I = MAX( 1, K-KD ), K
+                        S = S + ABS( AB( KD+1+I-K, K ) )*
+     $                      ABS( X( I, J ) )
+  110                CONTINUE
+                     WORK( K ) = WORK( K ) + S
+  120             CONTINUE
+               ELSE
+                  DO 140 K = 1, N
+                     S = ABS( X( K, J ) )
+                     DO 130 I = MAX( 1, K-KD ), K - 1
+                        S = S + ABS( AB( KD+1+I-K, K ) )*
+     $                      ABS( X( I, J ) )
+  130                CONTINUE
+                     WORK( K ) = WORK( K ) + S
+  140             CONTINUE
+               END IF
+            ELSE
+               IF( NOUNIT ) THEN
+                  DO 160 K = 1, N
+                     S = ZERO
+                     DO 150 I = K, MIN( N, K+KD )
+                        S = S + ABS( AB( 1+I-K, K ) )*ABS( X( I, J ) )
+  150                CONTINUE
+                     WORK( K ) = WORK( K ) + S
+  160             CONTINUE
+               ELSE
+                  DO 180 K = 1, N
+                     S = ABS( X( K, J ) )
+                     DO 170 I = K + 1, MIN( N, K+KD )
+                        S = S + ABS( AB( 1+I-K, K ) )*ABS( X( I, J ) )
+  170                CONTINUE
+                     WORK( K ) = WORK( K ) + S
+  180             CONTINUE
+               END IF
+            END IF
+         END IF
+         S = ZERO
+         DO 190 I = 1, N
+            IF( WORK( I ).GT.SAFE2 ) THEN
+               S = MAX( S, ABS( WORK( N+I ) ) / WORK( I ) )
+            ELSE
+               S = MAX( S, ( ABS( WORK( N+I ) )+SAFE1 ) /
+     $             ( WORK( I )+SAFE1 ) )
+            END IF
+  190    CONTINUE
+         BERR( J ) = S
+*
+*        Bound error from formula
+*
+*        norm(X - XTRUE) / norm(X) .le. FERR =
+*        norm( abs(inv(op(A)))*
+*           ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X)
+*
+*        where
+*          norm(Z) is the magnitude of the largest component of Z
+*          inv(op(A)) is the inverse of op(A)
+*          abs(Z) is the componentwise absolute value of the matrix or
+*             vector Z
+*          NZ is the maximum number of nonzeros in any row of A, plus 1
+*          EPS is machine epsilon
+*
+*        The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B))
+*        is incremented by SAFE1 if the i-th component of
+*        abs(op(A))*abs(X) + abs(B) is less than SAFE2.
+*
+*        Use SLACON to estimate the infinity-norm of the matrix
+*           inv(op(A)) * diag(W),
+*        where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) )))
+*
+         DO 200 I = 1, N
+            IF( WORK( I ).GT.SAFE2 ) THEN
+               WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I )
+            ELSE
+               WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + SAFE1
+            END IF
+  200    CONTINUE
+*
+         KASE = 0
+  210    CONTINUE
+         CALL SLACON( N, WORK( 2*N+1 ), WORK( N+1 ), IWORK, FERR( J ),
+     $                KASE )
+         IF( KASE.NE.0 ) THEN
+            IF( KASE.EQ.1 ) THEN
+*
+*              Multiply by diag(W)*inv(op(A)').
+*
+               CALL STBSV( UPLO, TRANST, DIAG, N, KD, AB, LDAB,
+     $                     WORK( N+1 ), 1 )
+               DO 220 I = 1, N
+                  WORK( N+I ) = WORK( I )*WORK( N+I )
+  220          CONTINUE
+            ELSE
+*
+*              Multiply by inv(op(A))*diag(W).
+*
+               DO 230 I = 1, N
+                  WORK( N+I ) = WORK( I )*WORK( N+I )
+  230          CONTINUE
+               CALL STBSV( UPLO, TRANS, DIAG, N, KD, AB, LDAB,
+     $                     WORK( N+1 ), 1 )
+            END IF
+            GO TO 210
+         END IF
+*
+*        Normalize error.
+*
+         LSTRES = ZERO
+         DO 240 I = 1, N
+            LSTRES = MAX( LSTRES, ABS( X( I, J ) ) )
+  240    CONTINUE
+         IF( LSTRES.NE.ZERO )
+     $      FERR( J ) = FERR( J ) / LSTRES
+*
+  250 CONTINUE
+*
+      RETURN
+*
+*     End of STBRFS
+*
+      END
diff --git a/costa/native/external/lapack/stbtrs.f b/costa/native/external/lapack/stbtrs.f
new file mode 100644
index 000000000..e13ed7040
--- /dev/null
+++ b/costa/native/external/lapack/stbtrs.f
@@ -0,0 +1,163 @@
+      SUBROUTINE STBTRS( UPLO, TRANS, DIAG, N, KD, NRHS, AB, LDAB, B,
+     $                   LDB, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIAG, TRANS, UPLO
+      INTEGER            INFO, KD, LDAB, LDB, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      REAL               AB( LDAB, * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  STBTRS solves a triangular system of the form
+*
+*     A * X = B  or  A**T * X = B,
+*
+*  where A is a triangular band matrix of order N, and B is an
+*  N-by NRHS matrix.  A check is made to verify that A is nonsingular.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  A is upper triangular;
+*          = 'L':  A is lower triangular.
+*
+*  TRANS   (input) CHARACTER*1
+*          Specifies the form the system of equations:
+*          = 'N':  A * X = B  (No transpose)
+*          = 'T':  A**T * X = B  (Transpose)
+*          = 'C':  A**H * X = B  (Conjugate transpose = Transpose)
+*
+*  DIAG    (input) CHARACTER*1
+*          = 'N':  A is non-unit triangular;
+*          = 'U':  A is unit triangular.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  KD      (input) INTEGER
+*          The number of superdiagonals or subdiagonals of the
+*          triangular band matrix A.  KD >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  AB      (input) REAL array, dimension (LDAB,N)
+*          The upper or lower triangular band matrix A, stored in the
+*          first kd+1 rows of AB.  The j-th column of A is stored
+*          in the j-th column of the array AB as follows:
+*          if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j;
+*          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+kd).
+*          If DIAG = 'U', the diagonal elements of A are not referenced
+*          and are assumed to be 1.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KD+1.
+*
+*  B       (input/output) REAL array, dimension (LDB,NRHS)
+*          On entry, the right hand side matrix B.
+*          On exit, if INFO = 0, the solution matrix X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, the i-th diagonal element of A is zero,
+*                indicating that the matrix is singular and the
+*                solutions X have not been computed.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO
+      PARAMETER          ( ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOUNIT, UPPER
+      INTEGER            J
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           STBSV, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      NOUNIT = LSAME( DIAG, 'N' )
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.LSAME( TRANS, 'N' ) .AND. .NOT.
+     $         LSAME( TRANS, 'T' ) .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( KD.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -6
+      ELSE IF( LDAB.LT.KD+1 ) THEN
+         INFO = -8
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -10
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'STBTRS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Check for singularity.
+*
+      IF( NOUNIT ) THEN
+         IF( UPPER ) THEN
+            DO 10 INFO = 1, N
+               IF( AB( KD+1, INFO ).EQ.ZERO )
+     $            RETURN
+   10       CONTINUE
+         ELSE
+            DO 20 INFO = 1, N
+               IF( AB( 1, INFO ).EQ.ZERO )
+     $            RETURN
+   20       CONTINUE
+         END IF
+      END IF
+      INFO = 0
+*
+*     Solve A * X = B  or  A' * X = B.
+*
+      DO 30 J = 1, NRHS
+         CALL STBSV( UPLO, TRANS, DIAG, N, KD, AB, LDAB, B( 1, J ), 1 )
+   30 CONTINUE
+*
+      RETURN
+*
+*     End of STBTRS
+*
+      END
diff --git a/costa/native/external/lapack/stgevc.f b/costa/native/external/lapack/stgevc.f
new file mode 100644
index 000000000..b361a5ecc
--- /dev/null
+++ b/costa/native/external/lapack/stgevc.f
@@ -0,0 +1,1146 @@
+      SUBROUTINE STGEVC( SIDE, HOWMNY, SELECT, N, A, LDA, B, LDB, VL,
+     $                   LDVL, VR, LDVR, MM, M, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          HOWMNY, SIDE
+      INTEGER            INFO, LDA, LDB, LDVL, LDVR, M, MM, N
+*     ..
+*     .. Array Arguments ..
+      LOGICAL            SELECT( * )
+      REAL               A( LDA, * ), B( LDB, * ), VL( LDVL, * ),
+     $                   VR( LDVR, * ), WORK( * )
+*     ..
+*
+*
+*  Purpose
+*  =======
+*
+*  STGEVC computes some or all of the right and/or left generalized
+*  eigenvectors of a pair of real upper triangular matrices (A,B).
+*
+*  The right generalized eigenvector x and the left generalized 
+*  eigenvector y of (A,B) corresponding to a generalized eigenvalue
+*  w are defined by:
+*
+*          (A - wB) * x = 0  and  y**H * (A - wB) = 0
+*
+*  where y**H denotes the conjugate tranpose of y.
+*
+*  If an eigenvalue w is determined by zero diagonal elements of both A
+*  and B, a unit vector is returned as the corresponding eigenvector.
+*
+*  If all eigenvectors are requested, the routine may either return
+*  the matrices X and/or Y of right or left eigenvectors of (A,B), or
+*  the products Z*X and/or Q*Y, where Z and Q are input orthogonal
+*  matrices.  If (A,B) was obtained from the generalized real-Schur
+*  factorization of an original pair of matrices
+*     (A0,B0) = (Q*A*Z**H,Q*B*Z**H),
+*  then Z*X and Q*Y are the matrices of right or left eigenvectors of
+*  A.
+*
+*  A must be block upper triangular, with 1-by-1 and 2-by-2 diagonal
+*  blocks.  Corresponding to each 2-by-2 diagonal block is a complex
+*  conjugate pair of eigenvalues and eigenvectors; only one
+*  eigenvector of the pair is computed, namely the one corresponding
+*  to the eigenvalue with positive imaginary part.
+* 
+*  Arguments
+*  =========
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'R': compute right eigenvectors only;
+*          = 'L': compute left eigenvectors only;
+*          = 'B': compute both right and left eigenvectors.
+*
+*  HOWMNY  (input) CHARACTER*1
+*          = 'A': compute all right and/or left eigenvectors;
+*          = 'B': compute all right and/or left eigenvectors, and
+*                 backtransform them using the input matrices supplied
+*                 in VR and/or VL; 
+*          = 'S': compute selected right and/or left eigenvectors,
+*                 specified by the logical array SELECT.
+*
+*  SELECT  (input) LOGICAL array, dimension (N)
+*          If HOWMNY='S', SELECT specifies the eigenvectors to be
+*          computed.
+*          If HOWMNY='A' or 'B', SELECT is not referenced.
+*          To select the real eigenvector corresponding to the real
+*          eigenvalue w(j), SELECT(j) must be set to .TRUE.  To select
+*          the complex eigenvector corresponding to a complex conjugate
+*          pair w(j) and w(j+1), either SELECT(j) or SELECT(j+1) must
+*          be set to .TRUE..
+*
+*  N       (input) INTEGER
+*          The order of the matrices A and B.  N >= 0.
+*
+*  A       (input) REAL array, dimension (LDA,N)
+*          The upper quasi-triangular matrix A.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of array A.  LDA >= max(1, N).
+*
+*  B       (input) REAL array, dimension (LDB,N)
+*          The upper triangular matrix B.  If A has a 2-by-2 diagonal
+*          block, then the corresponding 2-by-2 block of B must be
+*          diagonal with positive elements.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of array B.  LDB >= max(1,N).
+*
+*  VL      (input/output) REAL array, dimension (LDVL,MM)
+*          On entry, if SIDE = 'L' or 'B' and HOWMNY = 'B', VL must
+*          contain an N-by-N matrix Q (usually the orthogonal matrix Q
+*          of left Schur vectors returned by SHGEQZ).
+*          On exit, if SIDE = 'L' or 'B', VL contains:
+*          if HOWMNY = 'A', the matrix Y of left eigenvectors of (A,B);
+*          if HOWMNY = 'B', the matrix Q*Y;
+*          if HOWMNY = 'S', the left eigenvectors of (A,B) specified by
+*                      SELECT, stored consecutively in the columns of
+*                      VL, in the same order as their eigenvalues.
+*          If SIDE = 'R', VL is not referenced.
+*
+*          A complex eigenvector corresponding to a complex eigenvalue
+*          is stored in two consecutive columns, the first holding the
+*          real part, and the second the imaginary part.
+*
+*  LDVL    (input) INTEGER
+*          The leading dimension of array VL.
+*          LDVL >= max(1,N) if SIDE = 'L' or 'B'; LDVL >= 1 otherwise.
+*
+*  VR      (input/output) REAL array, dimension (LDVR,MM)
+*          On entry, if SIDE = 'R' or 'B' and HOWMNY = 'B', VR must
+*          contain an N-by-N matrix Q (usually the orthogonal matrix Z
+*          of right Schur vectors returned by SHGEQZ).
+*          On exit, if SIDE = 'R' or 'B', VR contains:
+*          if HOWMNY = 'A', the matrix X of right eigenvectors of (A,B);
+*          if HOWMNY = 'B', the matrix Z*X;
+*          if HOWMNY = 'S', the right eigenvectors of (A,B) specified by
+*                      SELECT, stored consecutively in the columns of
+*                      VR, in the same order as their eigenvalues.
+*          If SIDE = 'L', VR is not referenced.
+*
+*          A complex eigenvector corresponding to a complex eigenvalue
+*          is stored in two consecutive columns, the first holding the
+*          real part and the second the imaginary part.
+*
+*  LDVR    (input) INTEGER
+*          The leading dimension of the array VR.
+*          LDVR >= max(1,N) if SIDE = 'R' or 'B'; LDVR >= 1 otherwise.
+*
+*  MM      (input) INTEGER
+*          The number of columns in the arrays VL and/or VR. MM >= M.
+*
+*  M       (output) INTEGER
+*          The number of columns in the arrays VL and/or VR actually
+*          used to store the eigenvectors.  If HOWMNY = 'A' or 'B', M
+*          is set to N.  Each selected real eigenvector occupies one
+*          column and each selected complex eigenvector occupies two
+*          columns.
+*
+*  WORK    (workspace) REAL array, dimension (6*N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          > 0:  the 2-by-2 block (INFO:INFO+1) does not have a complex
+*                eigenvalue.
+*
+*  Further Details
+*  ===============
+*
+*  Allocation of workspace:
+*  ---------- -- ---------
+*
+*     WORK( j ) = 1-norm of j-th column of A, above the diagonal
+*     WORK( N+j ) = 1-norm of j-th column of B, above the diagonal
+*     WORK( 2*N+1:3*N ) = real part of eigenvector
+*     WORK( 3*N+1:4*N ) = imaginary part of eigenvector
+*     WORK( 4*N+1:5*N ) = real part of back-transformed eigenvector
+*     WORK( 5*N+1:6*N ) = imaginary part of back-transformed eigenvector
+*
+*  Rowwise vs. columnwise solution methods:
+*  ------- --  ---------- -------- -------
+*
+*  Finding a generalized eigenvector consists basically of solving the
+*  singular triangular system
+*
+*   (A - w B) x = 0     (for right) or:   (A - w B)**H y = 0  (for left)
+*
+*  Consider finding the i-th right eigenvector (assume all eigenvalues
+*  are real). The equation to be solved is:
+*       n                   i
+*  0 = sum  C(j,k) v(k)  = sum  C(j,k) v(k)     for j = i,. . .,1
+*      k=j                 k=j
+*
+*  where  C = (A - w B)  (The components v(i+1:n) are 0.)
+*
+*  The "rowwise" method is:
+*
+*  (1)  v(i) := 1
+*  for j = i-1,. . .,1:
+*                          i
+*      (2) compute  s = - sum C(j,k) v(k)   and
+*                        k=j+1
+*
+*      (3) v(j) := s / C(j,j)
+*
+*  Step 2 is sometimes called the "dot product" step, since it is an
+*  inner product between the j-th row and the portion of the eigenvector
+*  that has been computed so far.
+*
+*  The "columnwise" method consists basically in doing the sums
+*  for all the rows in parallel.  As each v(j) is computed, the
+*  contribution of v(j) times the j-th column of C is added to the
+*  partial sums.  Since FORTRAN arrays are stored columnwise, this has
+*  the advantage that at each step, the elements of C that are accessed
+*  are adjacent to one another, whereas with the rowwise method, the
+*  elements accessed at a step are spaced LDA (and LDB) words apart.
+*
+*  When finding left eigenvectors, the matrix in question is the
+*  transpose of the one in storage, so the rowwise method then
+*  actually accesses columns of A and B at each step, and so is the
+*  preferred method.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE, SAFETY
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0,
+     $                   SAFETY = 1.0E+2 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            COMPL, COMPR, IL2BY2, ILABAD, ILALL, ILBACK,
+     $                   ILBBAD, ILCOMP, ILCPLX, LSA, LSB
+      INTEGER            I, IBEG, IEIG, IEND, IHWMNY, IINFO, IM, ISIDE,
+     $                   J, JA, JC, JE, JR, JW, NA, NW
+      REAL               ACOEF, ACOEFA, ANORM, ASCALE, BCOEFA, BCOEFI,
+     $                   BCOEFR, BIG, BIGNUM, BNORM, BSCALE, CIM2A,
+     $                   CIM2B, CIMAGA, CIMAGB, CRE2A, CRE2B, CREALA,
+     $                   CREALB, DMIN, SAFMIN, SALFAR, SBETA, SCALE,
+     $                   SMALL, TEMP, TEMP2, TEMP2I, TEMP2R, ULP, XMAX,
+     $                   XSCALE
+*     ..
+*     .. Local Arrays ..
+      REAL               BDIAG( 2 ), SUM( 2, 2 ), SUMA( 2, 2 ),
+     $                   SUMB( 2, 2 )
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               SLAMCH
+      EXTERNAL           LSAME, SLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SGEMV, SLABAD, SLACPY, SLAG2, SLALN2, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode and Test the input parameters
+*
+      IF( LSAME( HOWMNY, 'A' ) ) THEN
+         IHWMNY = 1
+         ILALL = .TRUE.
+         ILBACK = .FALSE.
+      ELSE IF( LSAME( HOWMNY, 'S' ) ) THEN
+         IHWMNY = 2
+         ILALL = .FALSE.
+         ILBACK = .FALSE.
+      ELSE IF( LSAME( HOWMNY, 'B' ) .OR. LSAME( HOWMNY, 'T' ) ) THEN
+         IHWMNY = 3
+         ILALL = .TRUE.
+         ILBACK = .TRUE.
+      ELSE
+         IHWMNY = -1
+         ILALL = .TRUE.
+      END IF
+*
+      IF( LSAME( SIDE, 'R' ) ) THEN
+         ISIDE = 1
+         COMPL = .FALSE.
+         COMPR = .TRUE.
+      ELSE IF( LSAME( SIDE, 'L' ) ) THEN
+         ISIDE = 2
+         COMPL = .TRUE.
+         COMPR = .FALSE.
+      ELSE IF( LSAME( SIDE, 'B' ) ) THEN
+         ISIDE = 3
+         COMPL = .TRUE.
+         COMPR = .TRUE.
+      ELSE
+         ISIDE = -1
+      END IF
+*
+      INFO = 0
+      IF( ISIDE.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( IHWMNY.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -6
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -8
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'STGEVC', -INFO )
+         RETURN
+      END IF
+*
+*     Count the number of eigenvectors to be computed
+*
+      IF( .NOT.ILALL ) THEN
+         IM = 0
+         ILCPLX = .FALSE.
+         DO 10 J = 1, N
+            IF( ILCPLX ) THEN
+               ILCPLX = .FALSE.
+               GO TO 10
+            END IF
+            IF( J.LT.N ) THEN
+               IF( A( J+1, J ).NE.ZERO )
+     $            ILCPLX = .TRUE.
+            END IF
+            IF( ILCPLX ) THEN
+               IF( SELECT( J ) .OR. SELECT( J+1 ) )
+     $            IM = IM + 2
+            ELSE
+               IF( SELECT( J ) )
+     $            IM = IM + 1
+            END IF
+   10    CONTINUE
+      ELSE
+         IM = N
+      END IF
+*
+*     Check 2-by-2 diagonal blocks of A, B
+*
+      ILABAD = .FALSE.
+      ILBBAD = .FALSE.
+      DO 20 J = 1, N - 1
+         IF( A( J+1, J ).NE.ZERO ) THEN
+            IF( B( J, J ).EQ.ZERO .OR. B( J+1, J+1 ).EQ.ZERO .OR.
+     $          B( J, J+1 ).NE.ZERO )ILBBAD = .TRUE.
+            IF( J.LT.N-1 ) THEN
+               IF( A( J+2, J+1 ).NE.ZERO )
+     $            ILABAD = .TRUE.
+            END IF
+         END IF
+   20 CONTINUE
+*
+      IF( ILABAD ) THEN
+         INFO = -5
+      ELSE IF( ILBBAD ) THEN
+         INFO = -7
+      ELSE IF( COMPL .AND. LDVL.LT.N .OR. LDVL.LT.1 ) THEN
+         INFO = -10
+      ELSE IF( COMPR .AND. LDVR.LT.N .OR. LDVR.LT.1 ) THEN
+         INFO = -12
+      ELSE IF( MM.LT.IM ) THEN
+         INFO = -13
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'STGEVC', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      M = IM
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Machine Constants
+*
+      SAFMIN = SLAMCH( 'Safe minimum' )
+      BIG = ONE / SAFMIN
+      CALL SLABAD( SAFMIN, BIG )
+      ULP = SLAMCH( 'Epsilon' )*SLAMCH( 'Base' )
+      SMALL = SAFMIN*N / ULP
+      BIG = ONE / SMALL
+      BIGNUM = ONE / ( SAFMIN*N )
+*
+*     Compute the 1-norm of each column of the strictly upper triangular
+*     part (i.e., excluding all elements belonging to the diagonal
+*     blocks) of A and B to check for possible overflow in the
+*     triangular solver.
+*
+      ANORM = ABS( A( 1, 1 ) )
+      IF( N.GT.1 )
+     $   ANORM = ANORM + ABS( A( 2, 1 ) )
+      BNORM = ABS( B( 1, 1 ) )
+      WORK( 1 ) = ZERO
+      WORK( N+1 ) = ZERO
+*
+      DO 50 J = 2, N
+         TEMP = ZERO
+         TEMP2 = ZERO
+         IF( A( J, J-1 ).EQ.ZERO ) THEN
+            IEND = J - 1
+         ELSE
+            IEND = J - 2
+         END IF
+         DO 30 I = 1, IEND
+            TEMP = TEMP + ABS( A( I, J ) )
+            TEMP2 = TEMP2 + ABS( B( I, J ) )
+   30    CONTINUE
+         WORK( J ) = TEMP
+         WORK( N+J ) = TEMP2
+         DO 40 I = IEND + 1, MIN( J+1, N )
+            TEMP = TEMP + ABS( A( I, J ) )
+            TEMP2 = TEMP2 + ABS( B( I, J ) )
+   40    CONTINUE
+         ANORM = MAX( ANORM, TEMP )
+         BNORM = MAX( BNORM, TEMP2 )
+   50 CONTINUE
+*
+      ASCALE = ONE / MAX( ANORM, SAFMIN )
+      BSCALE = ONE / MAX( BNORM, SAFMIN )
+*
+*     Left eigenvectors
+*
+      IF( COMPL ) THEN
+         IEIG = 0
+*
+*        Main loop over eigenvalues
+*
+         ILCPLX = .FALSE.
+         DO 220 JE = 1, N
+*
+*           Skip this iteration if (a) HOWMNY='S' and SELECT=.FALSE., or
+*           (b) this would be the second of a complex pair.
+*           Check for complex eigenvalue, so as to be sure of which
+*           entry(-ies) of SELECT to look at.
+*
+            IF( ILCPLX ) THEN
+               ILCPLX = .FALSE.
+               GO TO 220
+            END IF
+            NW = 1
+            IF( JE.LT.N ) THEN
+               IF( A( JE+1, JE ).NE.ZERO ) THEN
+                  ILCPLX = .TRUE.
+                  NW = 2
+               END IF
+            END IF
+            IF( ILALL ) THEN
+               ILCOMP = .TRUE.
+            ELSE IF( ILCPLX ) THEN
+               ILCOMP = SELECT( JE ) .OR. SELECT( JE+1 )
+            ELSE
+               ILCOMP = SELECT( JE )
+            END IF
+            IF( .NOT.ILCOMP )
+     $         GO TO 220
+*
+*           Decide if (a) singular pencil, (b) real eigenvalue, or
+*           (c) complex eigenvalue.
+*
+            IF( .NOT.ILCPLX ) THEN
+               IF( ABS( A( JE, JE ) ).LE.SAFMIN .AND.
+     $             ABS( B( JE, JE ) ).LE.SAFMIN ) THEN
+*
+*                 Singular matrix pencil -- return unit eigenvector
+*
+                  IEIG = IEIG + 1
+                  DO 60 JR = 1, N
+                     VL( JR, IEIG ) = ZERO
+   60             CONTINUE
+                  VL( IEIG, IEIG ) = ONE
+                  GO TO 220
+               END IF
+            END IF
+*
+*           Clear vector
+*
+            DO 70 JR = 1, NW*N
+               WORK( 2*N+JR ) = ZERO
+   70       CONTINUE
+*                                                 T
+*           Compute coefficients in  ( a A - b B )  y = 0
+*              a  is  ACOEF
+*              b  is  BCOEFR + i*BCOEFI
+*
+            IF( .NOT.ILCPLX ) THEN
+*
+*              Real eigenvalue
+*
+               TEMP = ONE / MAX( ABS( A( JE, JE ) )*ASCALE,
+     $                ABS( B( JE, JE ) )*BSCALE, SAFMIN )
+               SALFAR = ( TEMP*A( JE, JE ) )*ASCALE
+               SBETA = ( TEMP*B( JE, JE ) )*BSCALE
+               ACOEF = SBETA*ASCALE
+               BCOEFR = SALFAR*BSCALE
+               BCOEFI = ZERO
+*
+*              Scale to avoid underflow
+*
+               SCALE = ONE
+               LSA = ABS( SBETA ).GE.SAFMIN .AND. ABS( ACOEF ).LT.SMALL
+               LSB = ABS( SALFAR ).GE.SAFMIN .AND. ABS( BCOEFR ).LT.
+     $               SMALL
+               IF( LSA )
+     $            SCALE = ( SMALL / ABS( SBETA ) )*MIN( ANORM, BIG )
+               IF( LSB )
+     $            SCALE = MAX( SCALE, ( SMALL / ABS( SALFAR ) )*
+     $                    MIN( BNORM, BIG ) )
+               IF( LSA .OR. LSB ) THEN
+                  SCALE = MIN( SCALE, ONE /
+     $                    ( SAFMIN*MAX( ONE, ABS( ACOEF ),
+     $                    ABS( BCOEFR ) ) ) )
+                  IF( LSA ) THEN
+                     ACOEF = ASCALE*( SCALE*SBETA )
+                  ELSE
+                     ACOEF = SCALE*ACOEF
+                  END IF
+                  IF( LSB ) THEN
+                     BCOEFR = BSCALE*( SCALE*SALFAR )
+                  ELSE
+                     BCOEFR = SCALE*BCOEFR
+                  END IF
+               END IF
+               ACOEFA = ABS( ACOEF )
+               BCOEFA = ABS( BCOEFR )
+*
+*              First component is 1
+*
+               WORK( 2*N+JE ) = ONE
+               XMAX = ONE
+            ELSE
+*
+*              Complex eigenvalue
+*
+               CALL SLAG2( A( JE, JE ), LDA, B( JE, JE ), LDB,
+     $                     SAFMIN*SAFETY, ACOEF, TEMP, BCOEFR, TEMP2,
+     $                     BCOEFI )
+               BCOEFI = -BCOEFI
+               IF( BCOEFI.EQ.ZERO ) THEN
+                  INFO = JE
+                  RETURN
+               END IF
+*
+*              Scale to avoid over/underflow
+*
+               ACOEFA = ABS( ACOEF )
+               BCOEFA = ABS( BCOEFR ) + ABS( BCOEFI )
+               SCALE = ONE
+               IF( ACOEFA*ULP.LT.SAFMIN .AND. ACOEFA.GE.SAFMIN )
+     $            SCALE = ( SAFMIN / ULP ) / ACOEFA
+               IF( BCOEFA*ULP.LT.SAFMIN .AND. BCOEFA.GE.SAFMIN )
+     $            SCALE = MAX( SCALE, ( SAFMIN / ULP ) / BCOEFA )
+               IF( SAFMIN*ACOEFA.GT.ASCALE )
+     $            SCALE = ASCALE / ( SAFMIN*ACOEFA )
+               IF( SAFMIN*BCOEFA.GT.BSCALE )
+     $            SCALE = MIN( SCALE, BSCALE / ( SAFMIN*BCOEFA ) )
+               IF( SCALE.NE.ONE ) THEN
+                  ACOEF = SCALE*ACOEF
+                  ACOEFA = ABS( ACOEF )
+                  BCOEFR = SCALE*BCOEFR
+                  BCOEFI = SCALE*BCOEFI
+                  BCOEFA = ABS( BCOEFR ) + ABS( BCOEFI )
+               END IF
+*
+*              Compute first two components of eigenvector
+*
+               TEMP = ACOEF*A( JE+1, JE )
+               TEMP2R = ACOEF*A( JE, JE ) - BCOEFR*B( JE, JE )
+               TEMP2I = -BCOEFI*B( JE, JE )
+               IF( ABS( TEMP ).GT.ABS( TEMP2R )+ABS( TEMP2I ) ) THEN
+                  WORK( 2*N+JE ) = ONE
+                  WORK( 3*N+JE ) = ZERO
+                  WORK( 2*N+JE+1 ) = -TEMP2R / TEMP
+                  WORK( 3*N+JE+1 ) = -TEMP2I / TEMP
+               ELSE
+                  WORK( 2*N+JE+1 ) = ONE
+                  WORK( 3*N+JE+1 ) = ZERO
+                  TEMP = ACOEF*A( JE, JE+1 )
+                  WORK( 2*N+JE ) = ( BCOEFR*B( JE+1, JE+1 )-ACOEF*
+     $                             A( JE+1, JE+1 ) ) / TEMP
+                  WORK( 3*N+JE ) = BCOEFI*B( JE+1, JE+1 ) / TEMP
+               END IF
+               XMAX = MAX( ABS( WORK( 2*N+JE ) )+ABS( WORK( 3*N+JE ) ),
+     $                ABS( WORK( 2*N+JE+1 ) )+ABS( WORK( 3*N+JE+1 ) ) )
+            END IF
+*
+            DMIN = MAX( ULP*ACOEFA*ANORM, ULP*BCOEFA*BNORM, SAFMIN )
+*
+*                                           T
+*           Triangular solve of  (a A - b B)  y = 0
+*
+*                                   T
+*           (rowwise in  (a A - b B) , or columnwise in (a A - b B) )
+*
+            IL2BY2 = .FALSE.
+*
+            DO 160 J = JE + NW, N
+               IF( IL2BY2 ) THEN
+                  IL2BY2 = .FALSE.
+                  GO TO 160
+               END IF
+*
+               NA = 1
+               BDIAG( 1 ) = B( J, J )
+               IF( J.LT.N ) THEN
+                  IF( A( J+1, J ).NE.ZERO ) THEN
+                     IL2BY2 = .TRUE.
+                     BDIAG( 2 ) = B( J+1, J+1 )
+                     NA = 2
+                  END IF
+               END IF
+*
+*              Check whether scaling is necessary for dot products
+*
+               XSCALE = ONE / MAX( ONE, XMAX )
+               TEMP = MAX( WORK( J ), WORK( N+J ),
+     $                ACOEFA*WORK( J )+BCOEFA*WORK( N+J ) )
+               IF( IL2BY2 )
+     $            TEMP = MAX( TEMP, WORK( J+1 ), WORK( N+J+1 ),
+     $                   ACOEFA*WORK( J+1 )+BCOEFA*WORK( N+J+1 ) )
+               IF( TEMP.GT.BIGNUM*XSCALE ) THEN
+                  DO 90 JW = 0, NW - 1
+                     DO 80 JR = JE, J - 1
+                        WORK( ( JW+2 )*N+JR ) = XSCALE*
+     $                     WORK( ( JW+2 )*N+JR )
+   80                CONTINUE
+   90             CONTINUE
+                  XMAX = XMAX*XSCALE
+               END IF
+*
+*              Compute dot products
+*
+*                    j-1
+*              SUM = sum  conjg( a*A(k,j) - b*B(k,j) )*x(k)
+*                    k=je
+*
+*              To reduce the op count, this is done as
+*
+*              _        j-1                  _        j-1
+*              a*conjg( sum  A(k,j)*x(k) ) - b*conjg( sum  B(k,j)*x(k) )
+*                       k=je                          k=je
+*
+*              which may cause underflow problems if A or B are close
+*              to underflow.  (E.g., less than SMALL.)
+*
+*
+*              A series of compiler directives to defeat vectorization
+*              for the next loop
+*
+*$PL$ CMCHAR=' '
+CDIR$          NEXTSCALAR
+C$DIR          SCALAR
+CDIR$          NEXT SCALAR
+CVD$L          NOVECTOR
+CDEC$          NOVECTOR
+CVD$           NOVECTOR
+*VDIR          NOVECTOR
+*VOCL          LOOP,SCALAR
+CIBM           PREFER SCALAR
+*$PL$ CMCHAR='*'
+*
+               DO 120 JW = 1, NW
+*
+*$PL$ CMCHAR=' '
+CDIR$             NEXTSCALAR
+C$DIR             SCALAR
+CDIR$             NEXT SCALAR
+CVD$L             NOVECTOR
+CDEC$             NOVECTOR
+CVD$              NOVECTOR
+*VDIR             NOVECTOR
+*VOCL             LOOP,SCALAR
+CIBM              PREFER SCALAR
+*$PL$ CMCHAR='*'
+*
+                  DO 110 JA = 1, NA
+                     SUMA( JA, JW ) = ZERO
+                     SUMB( JA, JW ) = ZERO
+*
+                     DO 100 JR = JE, J - 1
+                        SUMA( JA, JW ) = SUMA( JA, JW ) +
+     $                                   A( JR, J+JA-1 )*
+     $                                   WORK( ( JW+1 )*N+JR )
+                        SUMB( JA, JW ) = SUMB( JA, JW ) +
+     $                                   B( JR, J+JA-1 )*
+     $                                   WORK( ( JW+1 )*N+JR )
+  100                CONTINUE
+  110             CONTINUE
+  120          CONTINUE
+*
+*$PL$ CMCHAR=' '
+CDIR$          NEXTSCALAR
+C$DIR          SCALAR
+CDIR$          NEXT SCALAR
+CVD$L          NOVECTOR
+CDEC$          NOVECTOR
+CVD$           NOVECTOR
+*VDIR          NOVECTOR
+*VOCL          LOOP,SCALAR
+CIBM           PREFER SCALAR
+*$PL$ CMCHAR='*'
+*
+               DO 130 JA = 1, NA
+                  IF( ILCPLX ) THEN
+                     SUM( JA, 1 ) = -ACOEF*SUMA( JA, 1 ) +
+     $                              BCOEFR*SUMB( JA, 1 ) -
+     $                              BCOEFI*SUMB( JA, 2 )
+                     SUM( JA, 2 ) = -ACOEF*SUMA( JA, 2 ) +
+     $                              BCOEFR*SUMB( JA, 2 ) +
+     $                              BCOEFI*SUMB( JA, 1 )
+                  ELSE
+                     SUM( JA, 1 ) = -ACOEF*SUMA( JA, 1 ) +
+     $                              BCOEFR*SUMB( JA, 1 )
+                  END IF
+  130          CONTINUE
+*
+*                                  T
+*              Solve  ( a A - b B )  y = SUM(,)
+*              with scaling and perturbation of the denominator
+*
+               CALL SLALN2( .TRUE., NA, NW, DMIN, ACOEF, A( J, J ), LDA,
+     $                      BDIAG( 1 ), BDIAG( 2 ), SUM, 2, BCOEFR,
+     $                      BCOEFI, WORK( 2*N+J ), N, SCALE, TEMP,
+     $                      IINFO )
+               IF( SCALE.LT.ONE ) THEN
+                  DO 150 JW = 0, NW - 1
+                     DO 140 JR = JE, J - 1
+                        WORK( ( JW+2 )*N+JR ) = SCALE*
+     $                     WORK( ( JW+2 )*N+JR )
+  140                CONTINUE
+  150             CONTINUE
+                  XMAX = SCALE*XMAX
+               END IF
+               XMAX = MAX( XMAX, TEMP )
+  160       CONTINUE
+*
+*           Copy eigenvector to VL, back transforming if
+*           HOWMNY='B'.
+*
+            IEIG = IEIG + 1
+            IF( ILBACK ) THEN
+               DO 170 JW = 0, NW - 1
+                  CALL SGEMV( 'N', N, N+1-JE, ONE, VL( 1, JE ), LDVL,
+     $                        WORK( ( JW+2 )*N+JE ), 1, ZERO,
+     $                        WORK( ( JW+4 )*N+1 ), 1 )
+  170          CONTINUE
+               CALL SLACPY( ' ', N, NW, WORK( 4*N+1 ), N, VL( 1, JE ),
+     $                      LDVL )
+               IBEG = 1
+            ELSE
+               CALL SLACPY( ' ', N, NW, WORK( 2*N+1 ), N, VL( 1, IEIG ),
+     $                      LDVL )
+               IBEG = JE
+            END IF
+*
+*           Scale eigenvector
+*
+            XMAX = ZERO
+            IF( ILCPLX ) THEN
+               DO 180 J = IBEG, N
+                  XMAX = MAX( XMAX, ABS( VL( J, IEIG ) )+
+     $                   ABS( VL( J, IEIG+1 ) ) )
+  180          CONTINUE
+            ELSE
+               DO 190 J = IBEG, N
+                  XMAX = MAX( XMAX, ABS( VL( J, IEIG ) ) )
+  190          CONTINUE
+            END IF
+*
+            IF( XMAX.GT.SAFMIN ) THEN
+               XSCALE = ONE / XMAX
+*
+               DO 210 JW = 0, NW - 1
+                  DO 200 JR = IBEG, N
+                     VL( JR, IEIG+JW ) = XSCALE*VL( JR, IEIG+JW )
+  200             CONTINUE
+  210          CONTINUE
+            END IF
+            IEIG = IEIG + NW - 1
+*
+  220    CONTINUE
+      END IF
+*
+*     Right eigenvectors
+*
+      IF( COMPR ) THEN
+         IEIG = IM + 1
+*
+*        Main loop over eigenvalues
+*
+         ILCPLX = .FALSE.
+         DO 500 JE = N, 1, -1
+*
+*           Skip this iteration if (a) HOWMNY='S' and SELECT=.FALSE., or
+*           (b) this would be the second of a complex pair.
+*           Check for complex eigenvalue, so as to be sure of which
+*           entry(-ies) of SELECT to look at -- if complex, SELECT(JE)
+*           or SELECT(JE-1).
+*           If this is a complex pair, the 2-by-2 diagonal block
+*           corresponding to the eigenvalue is in rows/columns JE-1:JE
+*
+            IF( ILCPLX ) THEN
+               ILCPLX = .FALSE.
+               GO TO 500
+            END IF
+            NW = 1
+            IF( JE.GT.1 ) THEN
+               IF( A( JE, JE-1 ).NE.ZERO ) THEN
+                  ILCPLX = .TRUE.
+                  NW = 2
+               END IF
+            END IF
+            IF( ILALL ) THEN
+               ILCOMP = .TRUE.
+            ELSE IF( ILCPLX ) THEN
+               ILCOMP = SELECT( JE ) .OR. SELECT( JE-1 )
+            ELSE
+               ILCOMP = SELECT( JE )
+            END IF
+            IF( .NOT.ILCOMP )
+     $         GO TO 500
+*
+*           Decide if (a) singular pencil, (b) real eigenvalue, or
+*           (c) complex eigenvalue.
+*
+            IF( .NOT.ILCPLX ) THEN
+               IF( ABS( A( JE, JE ) ).LE.SAFMIN .AND.
+     $             ABS( B( JE, JE ) ).LE.SAFMIN ) THEN
+*
+*                 Singular matrix pencil -- unit eigenvector
+*
+                  IEIG = IEIG - 1
+                  DO 230 JR = 1, N
+                     VR( JR, IEIG ) = ZERO
+  230             CONTINUE
+                  VR( IEIG, IEIG ) = ONE
+                  GO TO 500
+               END IF
+            END IF
+*
+*           Clear vector
+*
+            DO 250 JW = 0, NW - 1
+               DO 240 JR = 1, N
+                  WORK( ( JW+2 )*N+JR ) = ZERO
+  240          CONTINUE
+  250       CONTINUE
+*
+*           Compute coefficients in  ( a A - b B ) x = 0
+*              a  is  ACOEF
+*              b  is  BCOEFR + i*BCOEFI
+*
+            IF( .NOT.ILCPLX ) THEN
+*
+*              Real eigenvalue
+*
+               TEMP = ONE / MAX( ABS( A( JE, JE ) )*ASCALE,
+     $                ABS( B( JE, JE ) )*BSCALE, SAFMIN )
+               SALFAR = ( TEMP*A( JE, JE ) )*ASCALE
+               SBETA = ( TEMP*B( JE, JE ) )*BSCALE
+               ACOEF = SBETA*ASCALE
+               BCOEFR = SALFAR*BSCALE
+               BCOEFI = ZERO
+*
+*              Scale to avoid underflow
+*
+               SCALE = ONE
+               LSA = ABS( SBETA ).GE.SAFMIN .AND. ABS( ACOEF ).LT.SMALL
+               LSB = ABS( SALFAR ).GE.SAFMIN .AND. ABS( BCOEFR ).LT.
+     $               SMALL
+               IF( LSA )
+     $            SCALE = ( SMALL / ABS( SBETA ) )*MIN( ANORM, BIG )
+               IF( LSB )
+     $            SCALE = MAX( SCALE, ( SMALL / ABS( SALFAR ) )*
+     $                    MIN( BNORM, BIG ) )
+               IF( LSA .OR. LSB ) THEN
+                  SCALE = MIN( SCALE, ONE /
+     $                    ( SAFMIN*MAX( ONE, ABS( ACOEF ),
+     $                    ABS( BCOEFR ) ) ) )
+                  IF( LSA ) THEN
+                     ACOEF = ASCALE*( SCALE*SBETA )
+                  ELSE
+                     ACOEF = SCALE*ACOEF
+                  END IF
+                  IF( LSB ) THEN
+                     BCOEFR = BSCALE*( SCALE*SALFAR )
+                  ELSE
+                     BCOEFR = SCALE*BCOEFR
+                  END IF
+               END IF
+               ACOEFA = ABS( ACOEF )
+               BCOEFA = ABS( BCOEFR )
+*
+*              First component is 1
+*
+               WORK( 2*N+JE ) = ONE
+               XMAX = ONE
+*
+*              Compute contribution from column JE of A and B to sum
+*              (See "Further Details", above.)
+*
+               DO 260 JR = 1, JE - 1
+                  WORK( 2*N+JR ) = BCOEFR*B( JR, JE ) -
+     $                             ACOEF*A( JR, JE )
+  260          CONTINUE
+            ELSE
+*
+*              Complex eigenvalue
+*
+               CALL SLAG2( A( JE-1, JE-1 ), LDA, B( JE-1, JE-1 ), LDB,
+     $                     SAFMIN*SAFETY, ACOEF, TEMP, BCOEFR, TEMP2,
+     $                     BCOEFI )
+               IF( BCOEFI.EQ.ZERO ) THEN
+                  INFO = JE - 1
+                  RETURN
+               END IF
+*
+*              Scale to avoid over/underflow
+*
+               ACOEFA = ABS( ACOEF )
+               BCOEFA = ABS( BCOEFR ) + ABS( BCOEFI )
+               SCALE = ONE
+               IF( ACOEFA*ULP.LT.SAFMIN .AND. ACOEFA.GE.SAFMIN )
+     $            SCALE = ( SAFMIN / ULP ) / ACOEFA
+               IF( BCOEFA*ULP.LT.SAFMIN .AND. BCOEFA.GE.SAFMIN )
+     $            SCALE = MAX( SCALE, ( SAFMIN / ULP ) / BCOEFA )
+               IF( SAFMIN*ACOEFA.GT.ASCALE )
+     $            SCALE = ASCALE / ( SAFMIN*ACOEFA )
+               IF( SAFMIN*BCOEFA.GT.BSCALE )
+     $            SCALE = MIN( SCALE, BSCALE / ( SAFMIN*BCOEFA ) )
+               IF( SCALE.NE.ONE ) THEN
+                  ACOEF = SCALE*ACOEF
+                  ACOEFA = ABS( ACOEF )
+                  BCOEFR = SCALE*BCOEFR
+                  BCOEFI = SCALE*BCOEFI
+                  BCOEFA = ABS( BCOEFR ) + ABS( BCOEFI )
+               END IF
+*
+*              Compute first two components of eigenvector
+*              and contribution to sums
+*
+               TEMP = ACOEF*A( JE, JE-1 )
+               TEMP2R = ACOEF*A( JE, JE ) - BCOEFR*B( JE, JE )
+               TEMP2I = -BCOEFI*B( JE, JE )
+               IF( ABS( TEMP ).GE.ABS( TEMP2R )+ABS( TEMP2I ) ) THEN
+                  WORK( 2*N+JE ) = ONE
+                  WORK( 3*N+JE ) = ZERO
+                  WORK( 2*N+JE-1 ) = -TEMP2R / TEMP
+                  WORK( 3*N+JE-1 ) = -TEMP2I / TEMP
+               ELSE
+                  WORK( 2*N+JE-1 ) = ONE
+                  WORK( 3*N+JE-1 ) = ZERO
+                  TEMP = ACOEF*A( JE-1, JE )
+                  WORK( 2*N+JE ) = ( BCOEFR*B( JE-1, JE-1 )-ACOEF*
+     $                             A( JE-1, JE-1 ) ) / TEMP
+                  WORK( 3*N+JE ) = BCOEFI*B( JE-1, JE-1 ) / TEMP
+               END IF
+*
+               XMAX = MAX( ABS( WORK( 2*N+JE ) )+ABS( WORK( 3*N+JE ) ),
+     $                ABS( WORK( 2*N+JE-1 ) )+ABS( WORK( 3*N+JE-1 ) ) )
+*
+*              Compute contribution from columns JE and JE-1
+*              of A and B to the sums.
+*
+               CREALA = ACOEF*WORK( 2*N+JE-1 )
+               CIMAGA = ACOEF*WORK( 3*N+JE-1 )
+               CREALB = BCOEFR*WORK( 2*N+JE-1 ) -
+     $                  BCOEFI*WORK( 3*N+JE-1 )
+               CIMAGB = BCOEFI*WORK( 2*N+JE-1 ) +
+     $                  BCOEFR*WORK( 3*N+JE-1 )
+               CRE2A = ACOEF*WORK( 2*N+JE )
+               CIM2A = ACOEF*WORK( 3*N+JE )
+               CRE2B = BCOEFR*WORK( 2*N+JE ) - BCOEFI*WORK( 3*N+JE )
+               CIM2B = BCOEFI*WORK( 2*N+JE ) + BCOEFR*WORK( 3*N+JE )
+               DO 270 JR = 1, JE - 2
+                  WORK( 2*N+JR ) = -CREALA*A( JR, JE-1 ) +
+     $                             CREALB*B( JR, JE-1 ) -
+     $                             CRE2A*A( JR, JE ) + CRE2B*B( JR, JE )
+                  WORK( 3*N+JR ) = -CIMAGA*A( JR, JE-1 ) +
+     $                             CIMAGB*B( JR, JE-1 ) -
+     $                             CIM2A*A( JR, JE ) + CIM2B*B( JR, JE )
+  270          CONTINUE
+            END IF
+*
+            DMIN = MAX( ULP*ACOEFA*ANORM, ULP*BCOEFA*BNORM, SAFMIN )
+*
+*           Columnwise triangular solve of  (a A - b B)  x = 0
+*
+            IL2BY2 = .FALSE.
+            DO 370 J = JE - NW, 1, -1
+*
+*              If a 2-by-2 block, is in position j-1:j, wait until
+*              next iteration to process it (when it will be j:j+1)
+*
+               IF( .NOT.IL2BY2 .AND. J.GT.1 ) THEN
+                  IF( A( J, J-1 ).NE.ZERO ) THEN
+                     IL2BY2 = .TRUE.
+                     GO TO 370
+                  END IF
+               END IF
+               BDIAG( 1 ) = B( J, J )
+               IF( IL2BY2 ) THEN
+                  NA = 2
+                  BDIAG( 2 ) = B( J+1, J+1 )
+               ELSE
+                  NA = 1
+               END IF
+*
+*              Compute x(j) (and x(j+1), if 2-by-2 block)
+*
+               CALL SLALN2( .FALSE., NA, NW, DMIN, ACOEF, A( J, J ),
+     $                      LDA, BDIAG( 1 ), BDIAG( 2 ), WORK( 2*N+J ),
+     $                      N, BCOEFR, BCOEFI, SUM, 2, SCALE, TEMP,
+     $                      IINFO )
+               IF( SCALE.LT.ONE ) THEN
+*
+                  DO 290 JW = 0, NW - 1
+                     DO 280 JR = 1, JE
+                        WORK( ( JW+2 )*N+JR ) = SCALE*
+     $                     WORK( ( JW+2 )*N+JR )
+  280                CONTINUE
+  290             CONTINUE
+               END IF
+               XMAX = MAX( SCALE*XMAX, TEMP )
+*
+               DO 310 JW = 1, NW
+                  DO 300 JA = 1, NA
+                     WORK( ( JW+1 )*N+J+JA-1 ) = SUM( JA, JW )
+  300             CONTINUE
+  310          CONTINUE
+*
+*              w = w + x(j)*(a A(*,j) - b B(*,j) ) with scaling
+*
+               IF( J.GT.1 ) THEN
+*
+*                 Check whether scaling is necessary for sum.
+*
+                  XSCALE = ONE / MAX( ONE, XMAX )
+                  TEMP = ACOEFA*WORK( J ) + BCOEFA*WORK( N+J )
+                  IF( IL2BY2 )
+     $               TEMP = MAX( TEMP, ACOEFA*WORK( J+1 )+BCOEFA*
+     $                      WORK( N+J+1 ) )
+                  TEMP = MAX( TEMP, ACOEFA, BCOEFA )
+                  IF( TEMP.GT.BIGNUM*XSCALE ) THEN
+*
+                     DO 330 JW = 0, NW - 1
+                        DO 320 JR = 1, JE
+                           WORK( ( JW+2 )*N+JR ) = XSCALE*
+     $                        WORK( ( JW+2 )*N+JR )
+  320                   CONTINUE
+  330                CONTINUE
+                     XMAX = XMAX*XSCALE
+                  END IF
+*
+*                 Compute the contributions of the off-diagonals of
+*                 column j (and j+1, if 2-by-2 block) of A and B to the
+*                 sums.
+*
+*
+                  DO 360 JA = 1, NA
+                     IF( ILCPLX ) THEN
+                        CREALA = ACOEF*WORK( 2*N+J+JA-1 )
+                        CIMAGA = ACOEF*WORK( 3*N+J+JA-1 )
+                        CREALB = BCOEFR*WORK( 2*N+J+JA-1 ) -
+     $                           BCOEFI*WORK( 3*N+J+JA-1 )
+                        CIMAGB = BCOEFI*WORK( 2*N+J+JA-1 ) +
+     $                           BCOEFR*WORK( 3*N+J+JA-1 )
+                        DO 340 JR = 1, J - 1
+                           WORK( 2*N+JR ) = WORK( 2*N+JR ) -
+     $                                      CREALA*A( JR, J+JA-1 ) +
+     $                                      CREALB*B( JR, J+JA-1 )
+                           WORK( 3*N+JR ) = WORK( 3*N+JR ) -
+     $                                      CIMAGA*A( JR, J+JA-1 ) +
+     $                                      CIMAGB*B( JR, J+JA-1 )
+  340                   CONTINUE
+                     ELSE
+                        CREALA = ACOEF*WORK( 2*N+J+JA-1 )
+                        CREALB = BCOEFR*WORK( 2*N+J+JA-1 )
+                        DO 350 JR = 1, J - 1
+                           WORK( 2*N+JR ) = WORK( 2*N+JR ) -
+     $                                      CREALA*A( JR, J+JA-1 ) +
+     $                                      CREALB*B( JR, J+JA-1 )
+  350                   CONTINUE
+                     END IF
+  360             CONTINUE
+               END IF
+*
+               IL2BY2 = .FALSE.
+  370       CONTINUE
+*
+*           Copy eigenvector to VR, back transforming if
+*           HOWMNY='B'.
+*
+            IEIG = IEIG - NW
+            IF( ILBACK ) THEN
+*
+               DO 410 JW = 0, NW - 1
+                  DO 380 JR = 1, N
+                     WORK( ( JW+4 )*N+JR ) = WORK( ( JW+2 )*N+1 )*
+     $                                       VR( JR, 1 )
+  380             CONTINUE
+*
+*                 A series of compiler directives to defeat
+*                 vectorization for the next loop
+*
+*
+                  DO 400 JC = 2, JE
+                     DO 390 JR = 1, N
+                        WORK( ( JW+4 )*N+JR ) = WORK( ( JW+4 )*N+JR ) +
+     $                     WORK( ( JW+2 )*N+JC )*VR( JR, JC )
+  390                CONTINUE
+  400             CONTINUE
+  410          CONTINUE
+*
+               DO 430 JW = 0, NW - 1
+                  DO 420 JR = 1, N
+                     VR( JR, IEIG+JW ) = WORK( ( JW+4 )*N+JR )
+  420             CONTINUE
+  430          CONTINUE
+*
+               IEND = N
+            ELSE
+               DO 450 JW = 0, NW - 1
+                  DO 440 JR = 1, N
+                     VR( JR, IEIG+JW ) = WORK( ( JW+2 )*N+JR )
+  440             CONTINUE
+  450          CONTINUE
+*
+               IEND = JE
+            END IF
+*
+*           Scale eigenvector
+*
+            XMAX = ZERO
+            IF( ILCPLX ) THEN
+               DO 460 J = 1, IEND
+                  XMAX = MAX( XMAX, ABS( VR( J, IEIG ) )+
+     $                   ABS( VR( J, IEIG+1 ) ) )
+  460          CONTINUE
+            ELSE
+               DO 470 J = 1, IEND
+                  XMAX = MAX( XMAX, ABS( VR( J, IEIG ) ) )
+  470          CONTINUE
+            END IF
+*
+            IF( XMAX.GT.SAFMIN ) THEN
+               XSCALE = ONE / XMAX
+               DO 490 JW = 0, NW - 1
+                  DO 480 JR = 1, IEND
+                     VR( JR, IEIG+JW ) = XSCALE*VR( JR, IEIG+JW )
+  480             CONTINUE
+  490          CONTINUE
+            END IF
+  500    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of STGEVC
+*
+      END
diff --git a/costa/native/external/lapack/stgex2.f b/costa/native/external/lapack/stgex2.f
new file mode 100644
index 000000000..b92ea0a46
--- /dev/null
+++ b/costa/native/external/lapack/stgex2.f
@@ -0,0 +1,582 @@
+      SUBROUTINE STGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z,
+     $                   LDZ, J1, N1, N2, WORK, LWORK, INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      LOGICAL            WANTQ, WANTZ
+      INTEGER            INFO, J1, LDA, LDB, LDQ, LDZ, LWORK, N, N1, N2
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), B( LDB, * ), Q( LDQ, * ),
+     $                   WORK( * ), Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  STGEX2 swaps adjacent diagonal blocks (A11, B11) and (A22, B22)
+*  of size 1-by-1 or 2-by-2 in an upper (quasi) triangular matrix pair
+*  (A, B) by an orthogonal equivalence transformation.
+*
+*  (A, B) must be in generalized real Schur canonical form (as returned
+*  by SGGES), i.e. A is block upper triangular with 1-by-1 and 2-by-2
+*  diagonal blocks. B is upper triangular.
+*
+*  Optionally, the matrices Q and Z of generalized Schur vectors are
+*  updated.
+*
+*         Q(in) * A(in) * Z(in)' = Q(out) * A(out) * Z(out)'
+*         Q(in) * B(in) * Z(in)' = Q(out) * B(out) * Z(out)'
+*
+*
+*  Arguments
+*  =========
+*
+*  WANTQ   (input) LOGICAL
+*          .TRUE. : update the left transformation matrix Q;
+*          .FALSE.: do not update Q.
+*
+*  WANTZ   (input) LOGICAL
+*          .TRUE. : update the right transformation matrix Z;
+*          .FALSE.: do not update Z.
+*
+*  N       (input) INTEGER
+*          The order of the matrices A and B. N >= 0.
+*
+*  A      (input/output) REAL arrays, dimensions (LDA,N)
+*          On entry, the matrix A in the pair (A, B).
+*          On exit, the updated matrix A.
+*
+*  LDA     (input)  INTEGER
+*          The leading dimension of the array A. LDA >= max(1,N).
+*
+*  B      (input/output) REAL arrays, dimensions (LDB,N)
+*          On entry, the matrix B in the pair (A, B).
+*          On exit, the updated matrix B.
+*
+*  LDB     (input)  INTEGER
+*          The leading dimension of the array B. LDB >= max(1,N).
+*
+*  Q       (input/output) REAL array, dimension (LDZ,N)
+*          On entry, if WANTQ = .TRUE., the orthogonal matrix Q.
+*          On exit, the updated matrix Q.
+*          Not referenced if WANTQ = .FALSE..
+*
+*  LDQ     (input) INTEGER
+*          The leading dimension of the array Q. LDQ >= 1.
+*          If WANTQ = .TRUE., LDQ >= N.
+*
+*  Z       (input/output) REAL array, dimension (LDZ,N)
+*          On entry, if WANTZ =.TRUE., the orthogonal matrix Z.
+*          On exit, the updated matrix Z.
+*          Not referenced if WANTZ = .FALSE..
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z. LDZ >= 1.
+*          If WANTZ = .TRUE., LDZ >= N.
+*
+*  J1      (input) INTEGER
+*          The index to the first block (A11, B11). 1 <= J1 <= N.
+*
+*  N1      (input) INTEGER
+*          The order of the first block (A11, B11). N1 = 0, 1 or 2.
+*
+*  N2      (input) INTEGER
+*          The order of the second block (A22, B22). N2 = 0, 1 or 2.
+*
+*  WORK    (workspace) REAL array, dimension (LWORK).
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.
+*          LWORK >=  MAX( N*(N2+N1), (N2+N1)*(N2+N1)*2 )
+*
+*  INFO    (output) INTEGER
+*            =0: Successful exit
+*            >0: If INFO = 1, the transformed matrix (A, B) would be
+*                too far from generalized Schur form; the blocks are
+*                not swapped and (A, B) and (Q, Z) are unchanged.
+*                The problem of swapping is too ill-conditioned.
+*            <0: If INFO = -16: LWORK is too small. Appropriate value
+*                for LWORK is returned in WORK(1).
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Bo Kagstrom and Peter Poromaa, Department of Computing Science,
+*     Umea University, S-901 87 Umea, Sweden.
+*
+*  In the current code both weak and strong stability tests are
+*  performed. The user can omit the strong stability test by changing
+*  the internal logical parameter WANDS to .FALSE.. See ref. [2] for
+*  details.
+*
+*  [1] B. Kagstrom; A Direct Method for Reordering Eigenvalues in the
+*      Generalized Real Schur Form of a Regular Matrix Pair (A, B), in
+*      M.S. Moonen et al (eds), Linear Algebra for Large Scale and
+*      Real-Time Applications, Kluwer Academic Publ. 1993, pp 195-218.
+*
+*  [2] B. Kagstrom and P. Poromaa; Computing Eigenspaces with Specified
+*      Eigenvalues of a Regular Matrix Pair (A, B) and Condition
+*      Estimation: Theory, Algorithms and Software,
+*      Report UMINF - 94.04, Department of Computing Science, Umea
+*      University, S-901 87 Umea, Sweden, 1994. Also as LAPACK Working
+*      Note 87. To appear in Numerical Algorithms, 1996.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+      REAL               TEN
+      PARAMETER          ( TEN = 1.0E+01 )
+      INTEGER            LDST
+      PARAMETER          ( LDST = 4 )
+      LOGICAL            WANDS
+      PARAMETER          ( WANDS = .TRUE. )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            STRONG, WEAK
+      INTEGER            I, IDUM, LINFO, M
+      REAL               BQRA21, BRQA21, DDUM, DNORM, DSCALE, DSUM, EPS,
+     $                   F, G, SA, SB, SCALE, SMLNUM, SS, THRESH, WS
+*     ..
+*     .. Local Arrays ..
+      INTEGER            IWORK( LDST )
+      REAL               AI( 2 ), AR( 2 ), BE( 2 ), IR( LDST, LDST ),
+     $                   IRCOP( LDST, LDST ), LI( LDST, LDST ),
+     $                   LICOP( LDST, LDST ), S( LDST, LDST ),
+     $                   SCPY( LDST, LDST ), T( LDST, LDST ),
+     $                   TAUL( LDST ), TAUR( LDST ), TCPY( LDST, LDST )
+*     ..
+*     .. External Functions ..
+      REAL               SLAMCH
+      EXTERNAL           SLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SCOPY, SGEMM, SGEQR2, SGERQ2, SLACPY, SLAGV2,
+     $                   SLARTG, SLASSQ, SORG2R, SORGR2, SORM2R, SORMR2,
+     $                   SROT, SSCAL, STGSY2
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+*
+*     Quick return if possible
+*
+      IF( N.LE.1 .OR. N1.LE.0 .OR. N2.LE.0 )
+     $   RETURN
+      IF( N1.GT.N .OR. ( J1+N1 ).GT.N )
+     $   RETURN
+      M = N1 + N2
+      IF( LWORK.LT.MAX( N*M, M*M*2 ) ) THEN
+         INFO = -16
+         WORK( 1 ) = MAX( N*M, M*M*2 )
+         RETURN
+      END IF
+*
+      WEAK = .FALSE.
+      STRONG = .FALSE.
+*
+*     Make a local copy of selected block
+*
+      CALL SCOPY( LDST*LDST, ZERO, 0, LI, 1 )
+      CALL SCOPY( LDST*LDST, ZERO, 0, IR, 1 )
+      CALL SLACPY( 'Full', M, M, A( J1, J1 ), LDA, S, LDST )
+      CALL SLACPY( 'Full', M, M, B( J1, J1 ), LDB, T, LDST )
+*
+*     Compute threshold for testing acceptance of swapping.
+*
+      EPS = SLAMCH( 'P' )
+      SMLNUM = SLAMCH( 'S' ) / EPS
+      DSCALE = ZERO
+      DSUM = ONE
+      CALL SLACPY( 'Full', M, M, S, LDST, WORK, M )
+      CALL SLASSQ( M*M, WORK, 1, DSCALE, DSUM )
+      CALL SLACPY( 'Full', M, M, T, LDST, WORK, M )
+      CALL SLASSQ( M*M, WORK, 1, DSCALE, DSUM )
+      DNORM = DSCALE*SQRT( DSUM )
+      THRESH = MAX( TEN*EPS*DNORM, SMLNUM )
+*
+      IF( M.EQ.2 ) THEN
+*
+*        CASE 1: Swap 1-by-1 and 1-by-1 blocks.
+*
+*        Compute orthogonal QL and RQ that swap 1-by-1 and 1-by-1 blocks
+*        using Givens rotations and perform the swap tentatively.
+*
+         F = S( 2, 2 )*T( 1, 1 ) - T( 2, 2 )*S( 1, 1 )
+         G = S( 2, 2 )*T( 1, 2 ) - T( 2, 2 )*S( 1, 2 )
+         SB = ABS( T( 2, 2 ) )
+         SA = ABS( S( 2, 2 ) )
+         CALL SLARTG( F, G, IR( 1, 2 ), IR( 1, 1 ), DDUM )
+         IR( 2, 1 ) = -IR( 1, 2 )
+         IR( 2, 2 ) = IR( 1, 1 )
+         CALL SROT( 2, S( 1, 1 ), 1, S( 1, 2 ), 1, IR( 1, 1 ),
+     $              IR( 2, 1 ) )
+         CALL SROT( 2, T( 1, 1 ), 1, T( 1, 2 ), 1, IR( 1, 1 ),
+     $              IR( 2, 1 ) )
+         IF( SA.GE.SB ) THEN
+            CALL SLARTG( S( 1, 1 ), S( 2, 1 ), LI( 1, 1 ), LI( 2, 1 ),
+     $                   DDUM )
+         ELSE
+            CALL SLARTG( T( 1, 1 ), T( 2, 1 ), LI( 1, 1 ), LI( 2, 1 ),
+     $                   DDUM )
+         END IF
+         CALL SROT( 2, S( 1, 1 ), LDST, S( 2, 1 ), LDST, LI( 1, 1 ),
+     $              LI( 2, 1 ) )
+         CALL SROT( 2, T( 1, 1 ), LDST, T( 2, 1 ), LDST, LI( 1, 1 ),
+     $              LI( 2, 1 ) )
+         LI( 2, 2 ) = LI( 1, 1 )
+         LI( 1, 2 ) = -LI( 2, 1 )
+*
+*        Weak stability test:
+*           |S21| + |T21| <= O(EPS * F-norm((S, T)))
+*
+         WS = ABS( S( 2, 1 ) ) + ABS( T( 2, 1 ) )
+         WEAK = WS.LE.THRESH
+         IF( .NOT.WEAK )
+     $      GO TO 70
+*
+         IF( WANDS ) THEN
+*
+*           Strong stability test:
+*             F-norm((A-QL'*S*QR, B-QL'*T*QR)) <= O(EPS*F-norm((A,B)))
+*
+            CALL SLACPY( 'Full', M, M, A( J1, J1 ), LDA, WORK( M*M+1 ),
+     $                   M )
+            CALL SGEMM( 'N', 'N', M, M, M, ONE, LI, LDST, S, LDST, ZERO,
+     $                  WORK, M )
+            CALL SGEMM( 'N', 'T', M, M, M, -ONE, WORK, M, IR, LDST, ONE,
+     $                  WORK( M*M+1 ), M )
+            DSCALE = ZERO
+            DSUM = ONE
+            CALL SLASSQ( M*M, WORK( M*M+1 ), 1, DSCALE, DSUM )
+*
+            CALL SLACPY( 'Full', M, M, B( J1, J1 ), LDB, WORK( M*M+1 ),
+     $                   M )
+            CALL SGEMM( 'N', 'N', M, M, M, ONE, LI, LDST, T, LDST, ZERO,
+     $                  WORK, M )
+            CALL SGEMM( 'N', 'T', M, M, M, -ONE, WORK, M, IR, LDST, ONE,
+     $                  WORK( M*M+1 ), M )
+            CALL SLASSQ( M*M, WORK( M*M+1 ), 1, DSCALE, DSUM )
+            SS = DSCALE*SQRT( DSUM )
+            STRONG = SS.LE.THRESH
+            IF( .NOT.STRONG )
+     $         GO TO 70
+         END IF
+*
+*        Update (A(J1:J1+M-1, M+J1:N), B(J1:J1+M-1, M+J1:N)) and
+*               (A(1:J1-1, J1:J1+M), B(1:J1-1, J1:J1+M)).
+*
+         CALL SROT( J1+1, A( 1, J1 ), 1, A( 1, J1+1 ), 1, IR( 1, 1 ),
+     $              IR( 2, 1 ) )
+         CALL SROT( J1+1, B( 1, J1 ), 1, B( 1, J1+1 ), 1, IR( 1, 1 ),
+     $              IR( 2, 1 ) )
+         CALL SROT( N-J1+1, A( J1, J1 ), LDA, A( J1+1, J1 ), LDA,
+     $              LI( 1, 1 ), LI( 2, 1 ) )
+         CALL SROT( N-J1+1, B( J1, J1 ), LDB, B( J1+1, J1 ), LDB,
+     $              LI( 1, 1 ), LI( 2, 1 ) )
+*
+*        Set  N1-by-N2 (2,1) - blocks to ZERO.
+*
+         A( J1+1, J1 ) = ZERO
+         B( J1+1, J1 ) = ZERO
+*
+*        Accumulate transformations into Q and Z if requested.
+*
+         IF( WANTZ )
+     $      CALL SROT( N, Z( 1, J1 ), 1, Z( 1, J1+1 ), 1, IR( 1, 1 ),
+     $                 IR( 2, 1 ) )
+         IF( WANTQ )
+     $      CALL SROT( N, Q( 1, J1 ), 1, Q( 1, J1+1 ), 1, LI( 1, 1 ),
+     $                 LI( 2, 1 ) )
+*
+*        Exit with INFO = 0 if swap was successfully performed.
+*
+         RETURN
+*
+      ELSE
+*
+*        CASE 2: Swap 1-by-1 and 2-by-2 blocks, or 2-by-2
+*                and 2-by-2 blocks.
+*
+*        Solve the generalized Sylvester equation
+*                 S11 * R - L * S22 = SCALE * S12
+*                 T11 * R - L * T22 = SCALE * T12
+*        for R and L. Solutions in LI and IR.
+*
+         CALL SLACPY( 'Full', N1, N2, T( 1, N1+1 ), LDST, LI, LDST )
+         CALL SLACPY( 'Full', N1, N2, S( 1, N1+1 ), LDST,
+     $                IR( N2+1, N1+1 ), LDST )
+         CALL STGSY2( 'N', 0, N1, N2, S, LDST, S( N1+1, N1+1 ), LDST,
+     $                IR( N2+1, N1+1 ), LDST, T, LDST, T( N1+1, N1+1 ),
+     $                LDST, LI, LDST, SCALE, DSUM, DSCALE, IWORK, IDUM,
+     $                LINFO )
+*
+*        Compute orthogonal matrix QL:
+*
+*                    QL' * LI = [ TL ]
+*                               [ 0  ]
+*        where
+*                    LI =  [      -L              ]
+*                          [ SCALE * identity(N2) ]
+*
+         DO 10 I = 1, N2
+            CALL SSCAL( N1, -ONE, LI( 1, I ), 1 )
+            LI( N1+I, I ) = SCALE
+   10    CONTINUE
+         CALL SGEQR2( M, N2, LI, LDST, TAUL, WORK, LINFO )
+         IF( LINFO.NE.0 )
+     $      GO TO 70
+         CALL SORG2R( M, M, N2, LI, LDST, TAUL, WORK, LINFO )
+         IF( LINFO.NE.0 )
+     $      GO TO 70
+*
+*        Compute orthogonal matrix RQ:
+*
+*                    IR * RQ' =   [ 0  TR],
+*
+*         where IR = [ SCALE * identity(N1), R ]
+*
+         DO 20 I = 1, N1
+            IR( N2+I, I ) = SCALE
+   20    CONTINUE
+         CALL SGERQ2( N1, M, IR( N2+1, 1 ), LDST, TAUR, WORK, LINFO )
+         IF( LINFO.NE.0 )
+     $      GO TO 70
+         CALL SORGR2( M, M, N1, IR, LDST, TAUR, WORK, LINFO )
+         IF( LINFO.NE.0 )
+     $      GO TO 70
+*
+*        Perform the swapping tentatively:
+*
+         CALL SGEMM( 'T', 'N', M, M, M, ONE, LI, LDST, S, LDST, ZERO,
+     $               WORK, M )
+         CALL SGEMM( 'N', 'T', M, M, M, ONE, WORK, M, IR, LDST, ZERO, S,
+     $               LDST )
+         CALL SGEMM( 'T', 'N', M, M, M, ONE, LI, LDST, T, LDST, ZERO,
+     $               WORK, M )
+         CALL SGEMM( 'N', 'T', M, M, M, ONE, WORK, M, IR, LDST, ZERO, T,
+     $               LDST )
+         CALL SLACPY( 'F', M, M, S, LDST, SCPY, LDST )
+         CALL SLACPY( 'F', M, M, T, LDST, TCPY, LDST )
+         CALL SLACPY( 'F', M, M, IR, LDST, IRCOP, LDST )
+         CALL SLACPY( 'F', M, M, LI, LDST, LICOP, LDST )
+*
+*        Triangularize the B-part by an RQ factorization.
+*        Apply transformation (from left) to A-part, giving S.
+*
+         CALL SGERQ2( M, M, T, LDST, TAUR, WORK, LINFO )
+         IF( LINFO.NE.0 )
+     $      GO TO 70
+         CALL SORMR2( 'R', 'T', M, M, M, T, LDST, TAUR, S, LDST, WORK,
+     $                LINFO )
+         IF( LINFO.NE.0 )
+     $      GO TO 70
+         CALL SORMR2( 'L', 'N', M, M, M, T, LDST, TAUR, IR, LDST, WORK,
+     $                LINFO )
+         IF( LINFO.NE.0 )
+     $      GO TO 70
+*
+*        Compute F-norm(S21) in BRQA21. (T21 is 0.)
+*
+         DSCALE = ZERO
+         DSUM = ONE
+         DO 30 I = 1, N2
+            CALL SLASSQ( N1, S( N2+1, I ), 1, DSCALE, DSUM )
+   30    CONTINUE
+         BRQA21 = DSCALE*SQRT( DSUM )
+*
+*        Triangularize the B-part by a QR factorization.
+*        Apply transformation (from right) to A-part, giving S.
+*
+         CALL SGEQR2( M, M, TCPY, LDST, TAUL, WORK, LINFO )
+         IF( LINFO.NE.0 )
+     $      GO TO 70
+         CALL SORM2R( 'L', 'T', M, M, M, TCPY, LDST, TAUL, SCPY, LDST,
+     $                WORK, INFO )
+         CALL SORM2R( 'R', 'N', M, M, M, TCPY, LDST, TAUL, LICOP, LDST,
+     $                WORK, INFO )
+         IF( LINFO.NE.0 )
+     $      GO TO 70
+*
+*        Compute F-norm(S21) in BQRA21. (T21 is 0.)
+*
+         DSCALE = ZERO
+         DSUM = ONE
+         DO 40 I = 1, N2
+            CALL SLASSQ( N1, SCPY( N2+1, I ), 1, DSCALE, DSUM )
+   40    CONTINUE
+         BQRA21 = DSCALE*SQRT( DSUM )
+*
+*        Decide which method to use.
+*          Weak stability test:
+*             F-norm(S21) <= O(EPS * F-norm((S, T)))
+*
+         IF( BQRA21.LE.BRQA21 .AND. BQRA21.LE.THRESH ) THEN
+            CALL SLACPY( 'F', M, M, SCPY, LDST, S, LDST )
+            CALL SLACPY( 'F', M, M, TCPY, LDST, T, LDST )
+            CALL SLACPY( 'F', M, M, IRCOP, LDST, IR, LDST )
+            CALL SLACPY( 'F', M, M, LICOP, LDST, LI, LDST )
+         ELSE IF( BRQA21.GE.THRESH ) THEN
+            GO TO 70
+         END IF
+*
+*        Set lower triangle of B-part to zero
+*
+         DO 50 I = 2, M
+            CALL SCOPY( M-I+1, ZERO, 0, T( I, I-1 ), 1 )
+   50    CONTINUE
+*
+         IF( WANDS ) THEN
+*
+*           Strong stability test:
+*              F-norm((A-QL*S*QR', B-QL*T*QR')) <= O(EPS*F-norm((A,B)))
+*
+            CALL SLACPY( 'Full', M, M, A( J1, J1 ), LDA, WORK( M*M+1 ),
+     $                   M )
+            CALL SGEMM( 'N', 'N', M, M, M, ONE, LI, LDST, S, LDST, ZERO,
+     $                  WORK, M )
+            CALL SGEMM( 'N', 'N', M, M, M, -ONE, WORK, M, IR, LDST, ONE,
+     $                  WORK( M*M+1 ), M )
+            DSCALE = ZERO
+            DSUM = ONE
+            CALL SLASSQ( M*M, WORK( M*M+1 ), 1, DSCALE, DSUM )
+*
+            CALL SLACPY( 'Full', M, M, B( J1, J1 ), LDB, WORK( M*M+1 ),
+     $                   M )
+            CALL SGEMM( 'N', 'N', M, M, M, ONE, LI, LDST, T, LDST, ZERO,
+     $                  WORK, M )
+            CALL SGEMM( 'N', 'N', M, M, M, -ONE, WORK, M, IR, LDST, ONE,
+     $                  WORK( M*M+1 ), M )
+            CALL SLASSQ( M*M, WORK( M*M+1 ), 1, DSCALE, DSUM )
+            SS = DSCALE*SQRT( DSUM )
+            STRONG = ( SS.LE.THRESH )
+            IF( .NOT.STRONG )
+     $         GO TO 70
+*
+         END IF
+*
+*        If the swap is accepted ("weakly" and "strongly"), apply the
+*        transformations and set N1-by-N2 (2,1)-block to zero.
+*
+         DO 60 I = 1, N2
+            CALL SCOPY( N1, ZERO, 0, S( N2+1, I ), 1 )
+   60    CONTINUE
+*
+*        copy back M-by-M diagonal block starting at index J1 of (A, B)
+*
+         CALL SLACPY( 'F', M, M, S, LDST, A( J1, J1 ), LDA )
+         CALL SLACPY( 'F', M, M, T, LDST, B( J1, J1 ), LDB )
+         CALL SCOPY( LDST*LDST, ZERO, 0, T, 1 )
+*
+*        Standardize existing 2-by-2 blocks.
+*
+         CALL SCOPY( M*M, ZERO, 0, WORK, 1 )
+         WORK( 1 ) = ONE
+         T( 1, 1 ) = ONE
+         IDUM = LWORK - M*M - 2
+         IF( N2.GT.1 ) THEN
+            CALL SLAGV2( A( J1, J1 ), LDA, B( J1, J1 ), LDB, AR, AI, BE,
+     $                   WORK( 1 ), WORK( 2 ), T( 1, 1 ), T( 2, 1 ) )
+            WORK( M+1 ) = -WORK( 2 )
+            WORK( M+2 ) = WORK( 1 )
+            T( N2, N2 ) = T( 1, 1 )
+            T( 1, 2 ) = -T( 2, 1 )
+         END IF
+         WORK( M*M ) = ONE
+         T( M, M ) = ONE
+*
+         IF( N1.GT.1 ) THEN
+            CALL SLAGV2( A( J1+N2, J1+N2 ), LDA, B( J1+N2, J1+N2 ), LDB,
+     $                   TAUR, TAUL, WORK( M*M+1 ), WORK( N2*M+N2+1 ),
+     $                   WORK( N2*M+N2+2 ), T( N2+1, N2+1 ),
+     $                   T( M, M-1 ) )
+            WORK( M*M ) = WORK( N2*M+N2+1 )
+            WORK( M*M-1 ) = -WORK( N2*M+N2+2 )
+            T( M, M ) = T( N2+1, N2+1 )
+            T( M-1, M ) = -T( M, M-1 )
+         END IF
+         CALL SGEMM( 'T', 'N', N2, N1, N2, ONE, WORK, M, A( J1, J1+N2 ),
+     $               LDA, ZERO, WORK( M*M+1 ), N2 )
+         CALL SLACPY( 'Full', N2, N1, WORK( M*M+1 ), N2, A( J1, J1+N2 ),
+     $                LDA )
+         CALL SGEMM( 'T', 'N', N2, N1, N2, ONE, WORK, M, B( J1, J1+N2 ),
+     $               LDB, ZERO, WORK( M*M+1 ), N2 )
+         CALL SLACPY( 'Full', N2, N1, WORK( M*M+1 ), N2, B( J1, J1+N2 ),
+     $                LDB )
+         CALL SGEMM( 'N', 'N', M, M, M, ONE, LI, LDST, WORK, M, ZERO,
+     $               WORK( M*M+1 ), M )
+         CALL SLACPY( 'Full', M, M, WORK( M*M+1 ), M, LI, LDST )
+         CALL SGEMM( 'N', 'N', N2, N1, N1, ONE, A( J1, J1+N2 ), LDA,
+     $               T( N2+1, N2+1 ), LDST, ZERO, WORK, N2 )
+         CALL SLACPY( 'Full', N2, N1, WORK, N2, A( J1, J1+N2 ), LDA )
+         CALL SGEMM( 'N', 'N', N2, N1, N1, ONE, B( J1, J1+N2 ), LDA,
+     $               T( N2+1, N2+1 ), LDST, ZERO, WORK, N2 )
+         CALL SLACPY( 'Full', N2, N1, WORK, N2, B( J1, J1+N2 ), LDB )
+         CALL SGEMM( 'T', 'N', M, M, M, ONE, IR, LDST, T, LDST, ZERO,
+     $               WORK, M )
+         CALL SLACPY( 'Full', M, M, WORK, M, IR, LDST )
+*
+*        Accumulate transformations into Q and Z if requested.
+*
+         IF( WANTQ ) THEN
+            CALL SGEMM( 'N', 'N', N, M, M, ONE, Q( 1, J1 ), LDQ, LI,
+     $                  LDST, ZERO, WORK, N )
+            CALL SLACPY( 'Full', N, M, WORK, N, Q( 1, J1 ), LDQ )
+*
+         END IF
+*
+         IF( WANTZ ) THEN
+            CALL SGEMM( 'N', 'N', N, M, M, ONE, Z( 1, J1 ), LDZ, IR,
+     $                  LDST, ZERO, WORK, N )
+            CALL SLACPY( 'Full', N, M, WORK, N, Z( 1, J1 ), LDZ )
+*
+         END IF
+*
+*        Update (A(J1:J1+M-1, M+J1:N), B(J1:J1+M-1, M+J1:N)) and
+*                (A(1:J1-1, J1:J1+M), B(1:J1-1, J1:J1+M)).
+*
+         I = J1 + M
+         IF( I.LE.N ) THEN
+            CALL SGEMM( 'T', 'N', M, N-I+1, M, ONE, LI, LDST,
+     $                  A( J1, I ), LDA, ZERO, WORK, M )
+            CALL SLACPY( 'Full', M, N-I+1, WORK, M, A( J1, I ), LDA )
+            CALL SGEMM( 'T', 'N', M, N-I+1, M, ONE, LI, LDST,
+     $                  B( J1, I ), LDA, ZERO, WORK, M )
+            CALL SLACPY( 'Full', M, N-I+1, WORK, M, B( J1, I ), LDA )
+         END IF
+         I = J1 - 1
+         IF( I.GT.0 ) THEN
+            CALL SGEMM( 'N', 'N', I, M, M, ONE, A( 1, J1 ), LDA, IR,
+     $                  LDST, ZERO, WORK, I )
+            CALL SLACPY( 'Full', I, M, WORK, I, A( 1, J1 ), LDA )
+            CALL SGEMM( 'N', 'N', I, M, M, ONE, B( 1, J1 ), LDB, IR,
+     $                  LDST, ZERO, WORK, I )
+            CALL SLACPY( 'Full', I, M, WORK, I, B( 1, J1 ), LDB )
+         END IF
+*
+*        Exit with INFO = 0 if swap was successfully performed.
+*
+         RETURN
+*
+      END IF
+*
+*     Exit with INFO = 1 if swap was rejected.
+*
+   70 CONTINUE
+*
+      INFO = 1
+      RETURN
+*
+*     End of STGEX2
+*
+      END
diff --git a/costa/native/external/lapack/stgexc.f b/costa/native/external/lapack/stgexc.f
new file mode 100644
index 000000000..703446009
--- /dev/null
+++ b/costa/native/external/lapack/stgexc.f
@@ -0,0 +1,434 @@
+      SUBROUTINE STGEXC( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z,
+     $                   LDZ, IFST, ILST, WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      LOGICAL            WANTQ, WANTZ
+      INTEGER            IFST, ILST, INFO, LDA, LDB, LDQ, LDZ, LWORK, N
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), B( LDB, * ), Q( LDQ, * ),
+     $                   WORK( * ), Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  STGEXC reorders the generalized real Schur decomposition of a real
+*  matrix pair (A,B) using an orthogonal equivalence transformation
+*
+*                 (A, B) = Q * (A, B) * Z',
+*
+*  so that the diagonal block of (A, B) with row index IFST is moved
+*  to row ILST.
+*
+*  (A, B) must be in generalized real Schur canonical form (as returned
+*  by SGGES), i.e. A is block upper triangular with 1-by-1 and 2-by-2
+*  diagonal blocks. B is upper triangular.
+*
+*  Optionally, the matrices Q and Z of generalized Schur vectors are
+*  updated.
+*
+*         Q(in) * A(in) * Z(in)' = Q(out) * A(out) * Z(out)'
+*         Q(in) * B(in) * Z(in)' = Q(out) * B(out) * Z(out)'
+*
+*
+*  Arguments
+*  =========
+*
+*  WANTQ   (input) LOGICAL
+*          .TRUE. : update the left transformation matrix Q;
+*          .FALSE.: do not update Q.
+*
+*  WANTZ   (input) LOGICAL
+*          .TRUE. : update the right transformation matrix Z;
+*          .FALSE.: do not update Z.
+*
+*  N       (input) INTEGER
+*          The order of the matrices A and B. N >= 0.
+*
+*  A       (input/output) REAL array, dimension (LDA,N)
+*          On entry, the matrix A in generalized real Schur canonical
+*          form.
+*          On exit, the updated matrix A, again in generalized
+*          real Schur canonical form.
+*
+*  LDA     (input)  INTEGER
+*          The leading dimension of the array A. LDA >= max(1,N).
+*
+*  B       (input/output) REAL array, dimension (LDB,N)
+*          On entry, the matrix B in generalized real Schur canonical
+*          form (A,B).
+*          On exit, the updated matrix B, again in generalized
+*          real Schur canonical form (A,B).
+*
+*  LDB     (input)  INTEGER
+*          The leading dimension of the array B. LDB >= max(1,N).
+*
+*  Q       (input/output) REAL array, dimension (LDZ,N)
+*          On entry, if WANTQ = .TRUE., the orthogonal matrix Q.
+*          On exit, the updated matrix Q.
+*          If WANTQ = .FALSE., Q is not referenced.
+*
+*  LDQ     (input) INTEGER
+*          The leading dimension of the array Q. LDQ >= 1.
+*          If WANTQ = .TRUE., LDQ >= N.
+*
+*  Z       (input/output) REAL array, dimension (LDZ,N)
+*          On entry, if WANTZ = .TRUE., the orthogonal matrix Z.
+*          On exit, the updated matrix Z.
+*          If WANTZ = .FALSE., Z is not referenced.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z. LDZ >= 1.
+*          If WANTZ = .TRUE., LDZ >= N.
+*
+*  IFST    (input/output) INTEGER
+*  ILST    (input/output) INTEGER
+*          Specify the reordering of the diagonal blocks of (A, B).
+*          The block with row index IFST is moved to row ILST, by a
+*          sequence of swapping between adjacent blocks.
+*          On exit, if IFST pointed on entry to the second row of
+*          a 2-by-2 block, it is changed to point to the first row;
+*          ILST always points to the first row of the block in its
+*          final position (which may differ from its input value by
+*          +1 or -1). 1 <= IFST, ILST <= N.
+*
+*  WORK    (workspace/output) REAL array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK. LWORK >= 4*N + 16.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*           =0:  successful exit.
+*           <0:  if INFO = -i, the i-th argument had an illegal value.
+*           =1:  The transformed matrix pair (A, B) would be too far
+*                from generalized Schur form; the problem is ill-
+*                conditioned. (A, B) may have been partially reordered,
+*                and ILST points to the first row of the current
+*                position of the block being moved.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Bo Kagstrom and Peter Poromaa, Department of Computing Science,
+*     Umea University, S-901 87 Umea, Sweden.
+*
+*  [1] B. Kagstrom; A Direct Method for Reordering Eigenvalues in the
+*      Generalized Real Schur Form of a Regular Matrix Pair (A, B), in
+*      M.S. Moonen et al (eds), Linear Algebra for Large Scale and
+*      Real-Time Applications, Kluwer Academic Publ. 1993, pp 195-218.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO
+      PARAMETER          ( ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            HERE, LWMIN, NBF, NBL, NBNEXT
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           STGEX2, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode and test input arguments.
+*
+      INFO = 0
+      LWMIN = MAX( 1, 4*N+16 )
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      ELSE IF( LDQ.LT.1 .OR. WANTQ .AND. ( LDQ.LT.MAX( 1, N ) ) ) THEN
+         INFO = -9
+      ELSE IF( LDZ.LT.1 .OR. WANTZ .AND. ( LDZ.LT.MAX( 1, N ) ) ) THEN
+         INFO = -11
+      ELSE IF( IFST.LT.1 .OR. IFST.GT.N ) THEN
+         INFO = -12
+      ELSE IF( ILST.LT.1 .OR. ILST.GT.N ) THEN
+         INFO = -13
+      ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -15
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         WORK( 1 ) = LWMIN
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'STGEXC', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.LE.1 )
+     $   RETURN
+*
+*     Determine the first row of the specified block and find out
+*     if it is 1-by-1 or 2-by-2.
+*
+      IF( IFST.GT.1 ) THEN
+         IF( A( IFST, IFST-1 ).NE.ZERO )
+     $      IFST = IFST - 1
+      END IF
+      NBF = 1
+      IF( IFST.LT.N ) THEN
+         IF( A( IFST+1, IFST ).NE.ZERO )
+     $      NBF = 2
+      END IF
+*
+*     Determine the first row of the final block
+*     and find out if it is 1-by-1 or 2-by-2.
+*
+      IF( ILST.GT.1 ) THEN
+         IF( A( ILST, ILST-1 ).NE.ZERO )
+     $      ILST = ILST - 1
+      END IF
+      NBL = 1
+      IF( ILST.LT.N ) THEN
+         IF( A( ILST+1, ILST ).NE.ZERO )
+     $      NBL = 2
+      END IF
+      IF( IFST.EQ.ILST )
+     $   RETURN
+*
+      IF( IFST.LT.ILST ) THEN
+*
+*        Update ILST.
+*
+         IF( NBF.EQ.2 .AND. NBL.EQ.1 )
+     $      ILST = ILST - 1
+         IF( NBF.EQ.1 .AND. NBL.EQ.2 )
+     $      ILST = ILST + 1
+*
+         HERE = IFST
+*
+   10    CONTINUE
+*
+*        Swap with next one below.
+*
+         IF( NBF.EQ.1 .OR. NBF.EQ.2 ) THEN
+*
+*           Current block either 1-by-1 or 2-by-2.
+*
+            NBNEXT = 1
+            IF( HERE+NBF+1.LE.N ) THEN
+               IF( A( HERE+NBF+1, HERE+NBF ).NE.ZERO )
+     $            NBNEXT = 2
+            END IF
+            CALL STGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z,
+     $                   LDZ, HERE, NBF, NBNEXT, WORK, LWORK, INFO )
+            IF( INFO.NE.0 ) THEN
+               ILST = HERE
+               RETURN
+            END IF
+            HERE = HERE + NBNEXT
+*
+*           Test if 2-by-2 block breaks into two 1-by-1 blocks.
+*
+            IF( NBF.EQ.2 ) THEN
+               IF( A( HERE+1, HERE ).EQ.ZERO )
+     $            NBF = 3
+            END IF
+*
+         ELSE
+*
+*           Current block consists of two 1-by-1 blocks, each of which
+*           must be swapped individually.
+*
+            NBNEXT = 1
+            IF( HERE+3.LE.N ) THEN
+               IF( A( HERE+3, HERE+2 ).NE.ZERO )
+     $            NBNEXT = 2
+            END IF
+            CALL STGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z,
+     $                   LDZ, HERE+1, 1, NBNEXT, WORK, LWORK, INFO )
+            IF( INFO.NE.0 ) THEN
+               ILST = HERE
+               RETURN
+            END IF
+            IF( NBNEXT.EQ.1 ) THEN
+*
+*              Swap two 1-by-1 blocks.
+*
+               CALL STGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z,
+     $                      LDZ, HERE, 1, 1, WORK, LWORK, INFO )
+               IF( INFO.NE.0 ) THEN
+                  ILST = HERE
+                  RETURN
+               END IF
+               HERE = HERE + 1
+*
+            ELSE
+*
+*              Recompute NBNEXT in case of 2-by-2 split.
+*
+               IF( A( HERE+2, HERE+1 ).EQ.ZERO )
+     $            NBNEXT = 1
+               IF( NBNEXT.EQ.2 ) THEN
+*
+*                 2-by-2 block did not split.
+*
+                  CALL STGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ,
+     $                         Z, LDZ, HERE, 1, NBNEXT, WORK, LWORK,
+     $                         INFO )
+                  IF( INFO.NE.0 ) THEN
+                     ILST = HERE
+                     RETURN
+                  END IF
+                  HERE = HERE + 2
+               ELSE
+*
+*                 2-by-2 block did split.
+*
+                  CALL STGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ,
+     $                         Z, LDZ, HERE, 1, 1, WORK, LWORK, INFO )
+                  IF( INFO.NE.0 ) THEN
+                     ILST = HERE
+                     RETURN
+                  END IF
+                  HERE = HERE + 1
+                  CALL STGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ,
+     $                         Z, LDZ, HERE, 1, 1, WORK, LWORK, INFO )
+                  IF( INFO.NE.0 ) THEN
+                     ILST = HERE
+                     RETURN
+                  END IF
+                  HERE = HERE + 1
+               END IF
+*
+            END IF
+         END IF
+         IF( HERE.LT.ILST )
+     $      GO TO 10
+      ELSE
+         HERE = IFST
+*
+   20    CONTINUE
+*
+*        Swap with next one below.
+*
+         IF( NBF.EQ.1 .OR. NBF.EQ.2 ) THEN
+*
+*           Current block either 1-by-1 or 2-by-2.
+*
+            NBNEXT = 1
+            IF( HERE.GE.3 ) THEN
+               IF( A( HERE-1, HERE-2 ).NE.ZERO )
+     $            NBNEXT = 2
+            END IF
+            CALL STGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z,
+     $                   LDZ, HERE-NBNEXT, NBNEXT, NBF, WORK, LWORK,
+     $                   INFO )
+            IF( INFO.NE.0 ) THEN
+               ILST = HERE
+               RETURN
+            END IF
+            HERE = HERE - NBNEXT
+*
+*           Test if 2-by-2 block breaks into two 1-by-1 blocks.
+*
+            IF( NBF.EQ.2 ) THEN
+               IF( A( HERE+1, HERE ).EQ.ZERO )
+     $            NBF = 3
+            END IF
+*
+         ELSE
+*
+*           Current block consists of two 1-by-1 blocks, each of which
+*           must be swapped individually.
+*
+            NBNEXT = 1
+            IF( HERE.GE.3 ) THEN
+               IF( A( HERE-1, HERE-2 ).NE.ZERO )
+     $            NBNEXT = 2
+            END IF
+            CALL STGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z,
+     $                   LDZ, HERE-NBNEXT, NBNEXT, 1, WORK, LWORK,
+     $                   INFO )
+            IF( INFO.NE.0 ) THEN
+               ILST = HERE
+               RETURN
+            END IF
+            IF( NBNEXT.EQ.1 ) THEN
+*
+*              Swap two 1-by-1 blocks.
+*
+               CALL STGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z,
+     $                      LDZ, HERE, NBNEXT, 1, WORK, LWORK, INFO )
+               IF( INFO.NE.0 ) THEN
+                  ILST = HERE
+                  RETURN
+               END IF
+               HERE = HERE - 1
+            ELSE
+*
+*             Recompute NBNEXT in case of 2-by-2 split.
+*
+               IF( A( HERE, HERE-1 ).EQ.ZERO )
+     $            NBNEXT = 1
+               IF( NBNEXT.EQ.2 ) THEN
+*
+*                 2-by-2 block did not split.
+*
+                  CALL STGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ,
+     $                         Z, LDZ, HERE-1, 2, 1, WORK, LWORK, INFO )
+                  IF( INFO.NE.0 ) THEN
+                     ILST = HERE
+                     RETURN
+                  END IF
+                  HERE = HERE - 2
+               ELSE
+*
+*                 2-by-2 block did split.
+*
+                  CALL STGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ,
+     $                         Z, LDZ, HERE, 1, 1, WORK, LWORK, INFO )
+                  IF( INFO.NE.0 ) THEN
+                     ILST = HERE
+                     RETURN
+                  END IF
+                  HERE = HERE - 1
+                  CALL STGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ,
+     $                         Z, LDZ, HERE, 1, 1, WORK, LWORK, INFO )
+                  IF( INFO.NE.0 ) THEN
+                     ILST = HERE
+                     RETURN
+                  END IF
+                  HERE = HERE - 1
+               END IF
+            END IF
+         END IF
+         IF( HERE.GT.ILST )
+     $      GO TO 20
+      END IF
+      ILST = HERE
+      WORK( 1 ) = LWMIN
+      RETURN
+*
+*     End of STGEXC
+*
+      END
diff --git a/costa/native/external/lapack/stgsen.f b/costa/native/external/lapack/stgsen.f
new file mode 100644
index 000000000..5970bd1c0
--- /dev/null
+++ b/costa/native/external/lapack/stgsen.f
@@ -0,0 +1,718 @@
+      SUBROUTINE STGSEN( IJOB, WANTQ, WANTZ, SELECT, N, A, LDA, B, LDB,
+     $                   ALPHAR, ALPHAI, BETA, Q, LDQ, Z, LDZ, M, PL,
+     $                   PR, DIF, WORK, LWORK, IWORK, LIWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      LOGICAL            WANTQ, WANTZ
+      INTEGER            IJOB, INFO, LDA, LDB, LDQ, LDZ, LIWORK, LWORK,
+     $                   M, N
+      REAL               PL, PR
+*     ..
+*     .. Array Arguments ..
+      LOGICAL            SELECT( * )
+      INTEGER            IWORK( * )
+      REAL               A( LDA, * ), ALPHAI( * ), ALPHAR( * ),
+     $                   B( LDB, * ), BETA( * ), DIF( * ), Q( LDQ, * ),
+     $                   WORK( * ), Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  STGSEN reorders the generalized real Schur decomposition of a real
+*  matrix pair (A, B) (in terms of an orthonormal equivalence trans-
+*  formation Q' * (A, B) * Z), so that a selected cluster of eigenvalues
+*  appears in the leading diagonal blocks of the upper quasi-triangular
+*  matrix A and the upper triangular B. The leading columns of Q and
+*  Z form orthonormal bases of the corresponding left and right eigen-
+*  spaces (deflating subspaces). (A, B) must be in generalized real
+*  Schur canonical form (as returned by SGGES), i.e. A is block upper
+*  triangular with 1-by-1 and 2-by-2 diagonal blocks. B is upper
+*  triangular.
+*
+*  STGSEN also computes the generalized eigenvalues
+*
+*              w(j) = (ALPHAR(j) + i*ALPHAI(j))/BETA(j)
+*
+*  of the reordered matrix pair (A, B).
+*
+*  Optionally, STGSEN computes the estimates of reciprocal condition
+*  numbers for eigenvalues and eigenspaces. These are Difu[(A11,B11),
+*  (A22,B22)] and Difl[(A11,B11), (A22,B22)], i.e. the separation(s)
+*  between the matrix pairs (A11, B11) and (A22,B22) that correspond to
+*  the selected cluster and the eigenvalues outside the cluster, resp.,
+*  and norms of "projections" onto left and right eigenspaces w.r.t.
+*  the selected cluster in the (1,1)-block.
+*
+*  Arguments
+*  =========
+*
+*  IJOB    (input) INTEGER
+*          Specifies whether condition numbers are required for the
+*          cluster of eigenvalues (PL and PR) or the deflating subspaces
+*          (Difu and Difl):
+*           =0: Only reorder w.r.t. SELECT. No extras.
+*           =1: Reciprocal of norms of "projections" onto left and right
+*               eigenspaces w.r.t. the selected cluster (PL and PR).
+*           =2: Upper bounds on Difu and Difl. F-norm-based estimate
+*               (DIF(1:2)).
+*           =3: Estimate of Difu and Difl. 1-norm-based estimate
+*               (DIF(1:2)).
+*               About 5 times as expensive as IJOB = 2.
+*           =4: Compute PL, PR and DIF (i.e. 0, 1 and 2 above): Economic
+*               version to get it all.
+*           =5: Compute PL, PR and DIF (i.e. 0, 1 and 3 above)
+*
+*  WANTQ   (input) LOGICAL
+*          .TRUE. : update the left transformation matrix Q;
+*          .FALSE.: do not update Q.
+*
+*  WANTZ   (input) LOGICAL
+*          .TRUE. : update the right transformation matrix Z;
+*          .FALSE.: do not update Z.
+*
+*  SELECT  (input) LOGICAL array, dimension (N)
+*          SELECT specifies the eigenvalues in the selected cluster.
+*          To select a real eigenvalue w(j), SELECT(j) must be set to
+*          .TRUE.. To select a complex conjugate pair of eigenvalues
+*          w(j) and w(j+1), corresponding to a 2-by-2 diagonal block,
+*          either SELECT(j) or SELECT(j+1) or both must be set to
+*          .TRUE.; a complex conjugate pair of eigenvalues must be
+*          either both included in the cluster or both excluded.
+*
+*  N       (input) INTEGER
+*          The order of the matrices A and B. N >= 0.
+*
+*  A       (input/output) REAL array, dimension(LDA,N)
+*          On entry, the upper quasi-triangular matrix A, with (A, B) in
+*          generalized real Schur canonical form.
+*          On exit, A is overwritten by the reordered matrix A.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,N).
+*
+*  B       (input/output) REAL array, dimension(LDB,N)
+*          On entry, the upper triangular matrix B, with (A, B) in
+*          generalized real Schur canonical form.
+*          On exit, B is overwritten by the reordered matrix B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B. LDB >= max(1,N).
+*
+*  ALPHAR  (output) REAL array, dimension (N)
+*  ALPHAI  (output) REAL array, dimension (N)
+*  BETA    (output) REAL array, dimension (N)
+*          On exit, (ALPHAR(j) + ALPHAI(j)*i)/BETA(j), j=1,...,N, will
+*          be the generalized eigenvalues.  ALPHAR(j) + ALPHAI(j)*i
+*          and BETA(j),j=1,...,N  are the diagonals of the complex Schur
+*          form (S,T) that would result if the 2-by-2 diagonal blocks of
+*          the real generalized Schur form of (A,B) were further reduced
+*          to triangular form using complex unitary transformations.
+*          If ALPHAI(j) is zero, then the j-th eigenvalue is real; if
+*          positive, then the j-th and (j+1)-st eigenvalues are a
+*          complex conjugate pair, with ALPHAI(j+1) negative.
+*
+*  Q       (input/output) REAL array, dimension (LDQ,N)
+*          On entry, if WANTQ = .TRUE., Q is an N-by-N matrix.
+*          On exit, Q has been postmultiplied by the left orthogonal
+*          transformation matrix which reorder (A, B); The leading M
+*          columns of Q form orthonormal bases for the specified pair of
+*          left eigenspaces (deflating subspaces).
+*          If WANTQ = .FALSE., Q is not referenced.
+*
+*  LDQ     (input) INTEGER
+*          The leading dimension of the array Q.  LDQ >= 1;
+*          and if WANTQ = .TRUE., LDQ >= N.
+*
+*  Z       (input/output) REAL array, dimension (LDZ,N)
+*          On entry, if WANTZ = .TRUE., Z is an N-by-N matrix.
+*          On exit, Z has been postmultiplied by the left orthogonal
+*          transformation matrix which reorder (A, B); The leading M
+*          columns of Z form orthonormal bases for the specified pair of
+*          left eigenspaces (deflating subspaces).
+*          If WANTZ = .FALSE., Z is not referenced.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z. LDZ >= 1;
+*          If WANTZ = .TRUE., LDZ >= N.
+*
+*  M       (output) INTEGER
+*          The dimension of the specified pair of left and right eigen-
+*          spaces (deflating subspaces). 0 <= M <= N.
+*
+*  PL, PR  (output) REAL
+*          If IJOB = 1, 4 or 5, PL, PR are lower bounds on the
+*          reciprocal of the norm of "projections" onto left and right
+*          eigenspaces with respect to the selected cluster.
+*          0 < PL, PR <= 1.
+*          If M = 0 or M = N, PL = PR  = 1.
+*          If IJOB = 0, 2 or 3, PL and PR are not referenced.
+*
+*  DIF     (output) REAL array, dimension (2).
+*          If IJOB >= 2, DIF(1:2) store the estimates of Difu and Difl.
+*          If IJOB = 2 or 4, DIF(1:2) are F-norm-based upper bounds on
+*          Difu and Difl. If IJOB = 3 or 5, DIF(1:2) are 1-norm-based
+*          estimates of Difu and Difl.
+*          If M = 0 or N, DIF(1:2) = F-norm([A, B]).
+*          If IJOB = 0 or 1, DIF is not referenced.
+*
+*  WORK    (workspace/output) REAL array, dimension (LWORK)
+*          IF IJOB = 0, WORK is not referenced.  Otherwise,
+*          on exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK. LWORK >=  4*N+16.
+*          If IJOB = 1, 2 or 4, LWORK >= MAX(4*N+16, 2*M*(N-M)).
+*          If IJOB = 3 or 5, LWORK >= MAX(4*N+16, 4*M*(N-M)).
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  IWORK   (workspace/output) INTEGER array, dimension (LIWORK)
+*          IF IJOB = 0, IWORK is not referenced.  Otherwise,
+*          on exit, if INFO = 0, IWORK(1) returns the optimal LIWORK.
+*
+*  LIWORK  (input) INTEGER
+*          The dimension of the array IWORK. LIWORK >= 1.
+*          If IJOB = 1, 2 or 4, LIWORK >=  N+6.
+*          If IJOB = 3 or 5, LIWORK >= MAX(2*M*(N-M), N+6).
+*
+*          If LIWORK = -1, then a workspace query is assumed; the
+*          routine only calculates the optimal size of the IWORK array,
+*          returns this value as the first entry of the IWORK array, and
+*          no error message related to LIWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*            =0: Successful exit.
+*            <0: If INFO = -i, the i-th argument had an illegal value.
+*            =1: Reordering of (A, B) failed because the transformed
+*                matrix pair (A, B) would be too far from generalized
+*                Schur form; the problem is very ill-conditioned.
+*                (A, B) may have been partially reordered.
+*                If requested, 0 is returned in DIF(*), PL and PR.
+*
+*  Further Details
+*  ===============
+*
+*  STGSEN first collects the selected eigenvalues by computing
+*  orthogonal U and W that move them to the top left corner of (A, B).
+*  In other words, the selected eigenvalues are the eigenvalues of
+*  (A11, B11) in:
+*
+*                U'*(A, B)*W = (A11 A12) (B11 B12) n1
+*                              ( 0  A22),( 0  B22) n2
+*                                n1  n2    n1  n2
+*
+*  where N = n1+n2 and U' means the transpose of U. The first n1 columns
+*  of U and W span the specified pair of left and right eigenspaces
+*  (deflating subspaces) of (A, B).
+*
+*  If (A, B) has been obtained from the generalized real Schur
+*  decomposition of a matrix pair (C, D) = Q*(A, B)*Z', then the
+*  reordered generalized real Schur form of (C, D) is given by
+*
+*           (C, D) = (Q*U)*(U'*(A, B)*W)*(Z*W)',
+*
+*  and the first n1 columns of Q*U and Z*W span the corresponding
+*  deflating subspaces of (C, D) (Q and Z store Q*U and Z*W, resp.).
+*
+*  Note that if the selected eigenvalue is sufficiently ill-conditioned,
+*  then its value may differ significantly from its value before
+*  reordering.
+*
+*  The reciprocal condition numbers of the left and right eigenspaces
+*  spanned by the first n1 columns of U and W (or Q*U and Z*W) may
+*  be returned in DIF(1:2), corresponding to Difu and Difl, resp.
+*
+*  The Difu and Difl are defined as:
+*
+*       Difu[(A11, B11), (A22, B22)] = sigma-min( Zu )
+*  and
+*       Difl[(A11, B11), (A22, B22)] = Difu[(A22, B22), (A11, B11)],
+*
+*  where sigma-min(Zu) is the smallest singular value of the
+*  (2*n1*n2)-by-(2*n1*n2) matrix
+*
+*       Zu = [ kron(In2, A11)  -kron(A22', In1) ]
+*            [ kron(In2, B11)  -kron(B22', In1) ].
+*
+*  Here, Inx is the identity matrix of size nx and A22' is the
+*  transpose of A22. kron(X, Y) is the Kronecker product between
+*  the matrices X and Y.
+*
+*  When DIF(2) is small, small changes in (A, B) can cause large changes
+*  in the deflating subspace. An approximate (asymptotic) bound on the
+*  maximum angular error in the computed deflating subspaces is
+*
+*       EPS * norm((A, B)) / DIF(2),
+*
+*  where EPS is the machine precision.
+*
+*  The reciprocal norm of the projectors on the left and right
+*  eigenspaces associated with (A11, B11) may be returned in PL and PR.
+*  They are computed as follows. First we compute L and R so that
+*  P*(A, B)*Q is block diagonal, where
+*
+*       P = ( I -L ) n1           Q = ( I R ) n1
+*           ( 0  I ) n2    and        ( 0 I ) n2
+*             n1 n2                    n1 n2
+*
+*  and (L, R) is the solution to the generalized Sylvester equation
+*
+*       A11*R - L*A22 = -A12
+*       B11*R - L*B22 = -B12
+*
+*  Then PL = (F-norm(L)**2+1)**(-1/2) and PR = (F-norm(R)**2+1)**(-1/2).
+*  An approximate (asymptotic) bound on the average absolute error of
+*  the selected eigenvalues is
+*
+*       EPS * norm((A, B)) / PL.
+*
+*  There are also global error bounds which valid for perturbations up
+*  to a certain restriction:  A lower bound (x) on the smallest
+*  F-norm(E,F) for which an eigenvalue of (A11, B11) may move and
+*  coalesce with an eigenvalue of (A22, B22) under perturbation (E,F),
+*  (i.e. (A + E, B + F), is
+*
+*   x = min(Difu,Difl)/((1/(PL*PL)+1/(PR*PR))**(1/2)+2*max(1/PL,1/PR)).
+*
+*  An approximate bound on x can be computed from DIF(1:2), PL and PR.
+*
+*  If y = ( F-norm(E,F) / x) <= 1, the angles between the perturbed
+*  (L', R') and unperturbed (L, R) left and right deflating subspaces
+*  associated with the selected cluster in the (1,1)-blocks can be
+*  bounded as
+*
+*   max-angle(L, L') <= arctan( y * PL / (1 - y * (1 - PL * PL)**(1/2))
+*   max-angle(R, R') <= arctan( y * PR / (1 - y * (1 - PR * PR)**(1/2))
+*
+*  See LAPACK User's Guide section 4.11 or the following references
+*  for more information.
+*
+*  Note that if the default method for computing the Frobenius-norm-
+*  based estimate DIF is not wanted (see SLATDF), then the parameter
+*  IDIFJB (see below) should be changed from 3 to 4 (routine SLATDF
+*  (IJOB = 2 will be used)). See STGSYL for more details.
+*
+*  Based on contributions by
+*     Bo Kagstrom and Peter Poromaa, Department of Computing Science,
+*     Umea University, S-901 87 Umea, Sweden.
+*
+*  References
+*  ==========
+*
+*  [1] B. Kagstrom; A Direct Method for Reordering Eigenvalues in the
+*      Generalized Real Schur Form of a Regular Matrix Pair (A, B), in
+*      M.S. Moonen et al (eds), Linear Algebra for Large Scale and
+*      Real-Time Applications, Kluwer Academic Publ. 1993, pp 195-218.
+*
+*  [2] B. Kagstrom and P. Poromaa; Computing Eigenspaces with Specified
+*      Eigenvalues of a Regular Matrix Pair (A, B) and Condition
+*      Estimation: Theory, Algorithms and Software,
+*      Report UMINF - 94.04, Department of Computing Science, Umea
+*      University, S-901 87 Umea, Sweden, 1994. Also as LAPACK Working
+*      Note 87. To appear in Numerical Algorithms, 1996.
+*
+*  [3] B. Kagstrom and P. Poromaa, LAPACK-Style Algorithms and Software
+*      for Solving the Generalized Sylvester Equation and Estimating the
+*      Separation between Regular Matrix Pairs, Report UMINF - 93.23,
+*      Department of Computing Science, Umea University, S-901 87 Umea,
+*      Sweden, December 1993, Revised April 1994, Also as LAPACK Working
+*      Note 75. To appear in ACM Trans. on Math. Software, Vol 22, No 1,
+*      1996.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            IDIFJB
+      PARAMETER          ( IDIFJB = 3 )
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY, PAIR, SWAP, WANTD, WANTD1, WANTD2,
+     $                   WANTP
+      INTEGER            I, IERR, IJB, K, KASE, KK, KS, LIWMIN, LWMIN,
+     $                   MN2, N1, N2
+      REAL               DSCALE, DSUM, EPS, RDSCAL, SMLNUM
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLACON, SLACPY, SLAG2, SLASSQ, STGEXC, STGSYL,
+     $                   XERBLA
+*     ..
+*     .. External Functions ..
+      REAL               SLAMCH
+      EXTERNAL           SLAMCH
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, SIGN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode and test the input parameters
+*
+      INFO = 0
+      LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 )
+*
+      IF( IJOB.LT.0 .OR. IJOB.GT.5 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -9
+      ELSE IF( LDQ.LT.1 .OR. ( WANTQ .AND. LDQ.LT.N ) ) THEN
+         INFO = -14
+      ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN
+         INFO = -16
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'STGSEN', -INFO )
+         RETURN
+      END IF
+*
+*     Get machine constants
+*
+      EPS = SLAMCH( 'P' )
+      SMLNUM = SLAMCH( 'S' ) / EPS
+      IERR = 0
+*
+      WANTP = IJOB.EQ.1 .OR. IJOB.GE.4
+      WANTD1 = IJOB.EQ.2 .OR. IJOB.EQ.4
+      WANTD2 = IJOB.EQ.3 .OR. IJOB.EQ.5
+      WANTD = WANTD1 .OR. WANTD2
+*
+*     Set M to the dimension of the specified pair of deflating
+*     subspaces.
+*
+      M = 0
+      PAIR = .FALSE.
+      DO 10 K = 1, N
+         IF( PAIR ) THEN
+            PAIR = .FALSE.
+         ELSE
+            IF( K.LT.N ) THEN
+               IF( A( K+1, K ).EQ.ZERO ) THEN
+                  IF( SELECT( K ) )
+     $               M = M + 1
+               ELSE
+                  PAIR = .TRUE.
+                  IF( SELECT( K ) .OR. SELECT( K+1 ) )
+     $               M = M + 2
+               END IF
+            ELSE
+               IF( SELECT( N ) )
+     $            M = M + 1
+            END IF
+         END IF
+   10 CONTINUE
+*
+      IF( IJOB.EQ.1 .OR. IJOB.EQ.2 .OR. IJOB.EQ.4 ) THEN
+         LWMIN = MAX( 1, 4*N+16, 2*M*(N-M) )
+         LIWMIN = MAX( 1, N+6 )
+      ELSE IF( IJOB.EQ.3 .OR. IJOB.EQ.5 ) THEN
+         LWMIN = MAX( 1, 4*N+16, 4*M*(N-M) )
+         LIWMIN = MAX( 1, 2*M*(N-M), N+6 )
+      ELSE
+         LWMIN = MAX( 1, 4*N+16 )
+         LIWMIN = 1
+      END IF
+*
+      WORK( 1 ) = LWMIN
+      IWORK( 1 ) = LIWMIN
+*
+      IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -22
+      ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -24
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'STGSEN', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( M.EQ.N .OR. M.EQ.0 ) THEN
+         IF( WANTP ) THEN
+            PL = ONE
+            PR = ONE
+         END IF
+         IF( WANTD ) THEN
+            DSCALE = ZERO
+            DSUM = ONE
+            DO 20 I = 1, N
+               CALL SLASSQ( N, A( 1, I ), 1, DSCALE, DSUM )
+               CALL SLASSQ( N, B( 1, I ), 1, DSCALE, DSUM )
+   20       CONTINUE
+            DIF( 1 ) = DSCALE*SQRT( DSUM )
+            DIF( 2 ) = DIF( 1 )
+         END IF
+         GO TO 60
+      END IF
+*
+*     Collect the selected blocks at the top-left corner of (A, B).
+*
+      KS = 0
+      PAIR = .FALSE.
+      DO 30 K = 1, N
+         IF( PAIR ) THEN
+            PAIR = .FALSE.
+         ELSE
+*
+            SWAP = SELECT( K )
+            IF( K.LT.N ) THEN
+               IF( A( K+1, K ).NE.ZERO ) THEN
+                  PAIR = .TRUE.
+                  SWAP = SWAP .OR. SELECT( K+1 )
+               END IF
+            END IF
+*
+            IF( SWAP ) THEN
+               KS = KS + 1
+*
+*              Swap the K-th block to position KS.
+*              Perform the reordering of diagonal blocks in (A, B)
+*              by orthogonal transformation matrices and update
+*              Q and Z accordingly (if requested):
+*
+               KK = K
+               IF( K.NE.KS )
+     $            CALL STGEXC( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ,
+     $                         Z, LDZ, KK, KS, WORK, LWORK, IERR )
+*
+               IF( IERR.GT.0 ) THEN
+*
+*                 Swap is rejected: exit.
+*
+                  INFO = 1
+                  IF( WANTP ) THEN
+                     PL = ZERO
+                     PR = ZERO
+                  END IF
+                  IF( WANTD ) THEN
+                     DIF( 1 ) = ZERO
+                     DIF( 2 ) = ZERO
+                  END IF
+                  GO TO 60
+               END IF
+*
+               IF( PAIR )
+     $            KS = KS + 1
+            END IF
+         END IF
+   30 CONTINUE
+      IF( WANTP ) THEN
+*
+*        Solve generalized Sylvester equation for R and L
+*        and compute PL and PR.
+*
+         N1 = M
+         N2 = N - M
+         I = N1 + 1
+         IJB = 0
+         CALL SLACPY( 'Full', N1, N2, A( 1, I ), LDA, WORK, N1 )
+         CALL SLACPY( 'Full', N1, N2, B( 1, I ), LDB, WORK( N1*N2+1 ),
+     $                N1 )
+         CALL STGSYL( 'N', IJB, N1, N2, A, LDA, A( I, I ), LDA, WORK,
+     $                N1, B, LDB, B( I, I ), LDB, WORK( N1*N2+1 ), N1,
+     $                DSCALE, DIF( 1 ), WORK( N1*N2*2+1 ),
+     $                LWORK-2*N1*N2, IWORK, IERR )
+*
+*        Estimate the reciprocal of norms of "projections" onto left
+*        and right eigenspaces.
+*
+         RDSCAL = ZERO
+         DSUM = ONE
+         CALL SLASSQ( N1*N2, WORK, 1, RDSCAL, DSUM )
+         PL = RDSCAL*SQRT( DSUM )
+         IF( PL.EQ.ZERO ) THEN
+            PL = ONE
+         ELSE
+            PL = DSCALE / ( SQRT( DSCALE*DSCALE / PL+PL )*SQRT( PL ) )
+         END IF
+         RDSCAL = ZERO
+         DSUM = ONE
+         CALL SLASSQ( N1*N2, WORK( N1*N2+1 ), 1, RDSCAL, DSUM )
+         PR = RDSCAL*SQRT( DSUM )
+         IF( PR.EQ.ZERO ) THEN
+            PR = ONE
+         ELSE
+            PR = DSCALE / ( SQRT( DSCALE*DSCALE / PR+PR )*SQRT( PR ) )
+         END IF
+      END IF
+*
+      IF( WANTD ) THEN
+*
+*        Compute estimates of Difu and Difl.
+*
+         IF( WANTD1 ) THEN
+            N1 = M
+            N2 = N - M
+            I = N1 + 1
+            IJB = IDIFJB
+*
+*           Frobenius norm-based Difu-estimate.
+*
+            CALL STGSYL( 'N', IJB, N1, N2, A, LDA, A( I, I ), LDA, WORK,
+     $                   N1, B, LDB, B( I, I ), LDB, WORK( N1*N2+1 ),
+     $                   N1, DSCALE, DIF( 1 ), WORK( 2*N1*N2+1 ),
+     $                   LWORK-2*N1*N2, IWORK, IERR )
+*
+*           Frobenius norm-based Difl-estimate.
+*
+            CALL STGSYL( 'N', IJB, N2, N1, A( I, I ), LDA, A, LDA, WORK,
+     $                   N2, B( I, I ), LDB, B, LDB, WORK( N1*N2+1 ),
+     $                   N2, DSCALE, DIF( 2 ), WORK( 2*N1*N2+1 ),
+     $                   LWORK-2*N1*N2, IWORK, IERR )
+         ELSE
+*
+*
+*           Compute 1-norm-based estimates of Difu and Difl using
+*           reversed communication with SLACON. In each step a
+*           generalized Sylvester equation or a transposed variant
+*           is solved.
+*
+            KASE = 0
+            N1 = M
+            N2 = N - M
+            I = N1 + 1
+            IJB = 0
+            MN2 = 2*N1*N2
+*
+*           1-norm-based estimate of Difu.
+*
+   40       CONTINUE
+            CALL SLACON( MN2, WORK( MN2+1 ), WORK, IWORK, DIF( 1 ),
+     $                   KASE )
+            IF( KASE.NE.0 ) THEN
+               IF( KASE.EQ.1 ) THEN
+*
+*                 Solve generalized Sylvester equation.
+*
+                  CALL STGSYL( 'N', IJB, N1, N2, A, LDA, A( I, I ), LDA,
+     $                         WORK, N1, B, LDB, B( I, I ), LDB,
+     $                         WORK( N1*N2+1 ), N1, DSCALE, DIF( 1 ),
+     $                         WORK( 2*N1*N2+1 ), LWORK-2*N1*N2, IWORK,
+     $                         IERR )
+               ELSE
+*
+*                 Solve the transposed variant.
+*
+                  CALL STGSYL( 'T', IJB, N1, N2, A, LDA, A( I, I ), LDA,
+     $                         WORK, N1, B, LDB, B( I, I ), LDB,
+     $                         WORK( N1*N2+1 ), N1, DSCALE, DIF( 1 ),
+     $                         WORK( 2*N1*N2+1 ), LWORK-2*N1*N2, IWORK,
+     $                         IERR )
+               END IF
+               GO TO 40
+            END IF
+            DIF( 1 ) = DSCALE / DIF( 1 )
+*
+*           1-norm-based estimate of Difl.
+*
+   50       CONTINUE
+            CALL SLACON( MN2, WORK( MN2+1 ), WORK, IWORK, DIF( 2 ),
+     $                   KASE )
+            IF( KASE.NE.0 ) THEN
+               IF( KASE.EQ.1 ) THEN
+*
+*                 Solve generalized Sylvester equation.
+*
+                  CALL STGSYL( 'N', IJB, N2, N1, A( I, I ), LDA, A, LDA,
+     $                         WORK, N2, B( I, I ), LDB, B, LDB,
+     $                         WORK( N1*N2+1 ), N2, DSCALE, DIF( 2 ),
+     $                         WORK( 2*N1*N2+1 ), LWORK-2*N1*N2, IWORK,
+     $                         IERR )
+               ELSE
+*
+*                 Solve the transposed variant.
+*
+                  CALL STGSYL( 'T', IJB, N2, N1, A( I, I ), LDA, A, LDA,
+     $                         WORK, N2, B( I, I ), LDB, B, LDB,
+     $                         WORK( N1*N2+1 ), N2, DSCALE, DIF( 2 ),
+     $                         WORK( 2*N1*N2+1 ), LWORK-2*N1*N2, IWORK,
+     $                         IERR )
+               END IF
+               GO TO 50
+            END IF
+            DIF( 2 ) = DSCALE / DIF( 2 )
+*
+         END IF
+      END IF
+*
+   60 CONTINUE
+*
+*     Compute generalized eigenvalues of reordered pair (A, B) and 
+*     normalize the generalized Schur form.
+*
+      PAIR = .FALSE.
+      DO 70 K = 1, N
+         IF( PAIR ) THEN
+            PAIR = .FALSE.
+         ELSE
+*
+            IF( K.LT.N ) THEN
+               IF( A( K+1, K ).NE.ZERO ) THEN
+                  PAIR = .TRUE.
+               END IF
+            END IF
+*
+            IF( PAIR ) THEN
+*
+*             Compute the eigenvalue(s) at position K.
+*
+               WORK( 1 ) = A( K, K )
+               WORK( 2 ) = A( K+1, K )
+               WORK( 3 ) = A( K, K+1 )
+               WORK( 4 ) = A( K+1, K+1 )
+               WORK( 5 ) = B( K, K )
+               WORK( 6 ) = B( K+1, K )
+               WORK( 7 ) = B( K, K+1 )
+               WORK( 8 ) = B( K+1, K+1 )
+               CALL SLAG2( WORK, 2, WORK( 5 ), 2, SMLNUM*EPS, BETA( K ),
+     $                     BETA( K+1 ), ALPHAR( K ), ALPHAR( K+1 ),
+     $                     ALPHAI( K ) )
+               ALPHAI( K+1 ) = -ALPHAI( K )
+*
+            ELSE
+*
+               IF( SIGN( ONE, B( K, K ) ).LT.ZERO ) THEN
+*
+*                 If B(K,K) is negative, make it positive
+*
+                  DO 80 I = 1, N
+                     A( K, I ) = -A( K, I )
+                     B( K, I ) = -B( K, I )
+                     Q( I, K ) = -Q( I, K )
+   80             CONTINUE
+               END IF
+*
+               ALPHAR( K ) = A( K, K )
+               ALPHAI( K ) = ZERO
+               BETA( K ) = B( K, K )
+*
+            END IF
+         END IF
+   70 CONTINUE
+*
+      WORK( 1 ) = LWMIN
+      IWORK( 1 ) = LIWMIN
+*
+      RETURN
+*
+*     End of STGSEN
+*
+      END
diff --git a/costa/native/external/lapack/stgsja.f b/costa/native/external/lapack/stgsja.f
new file mode 100644
index 000000000..646bb48f2
--- /dev/null
+++ b/costa/native/external/lapack/stgsja.f
@@ -0,0 +1,516 @@
+      SUBROUTINE STGSJA( JOBU, JOBV, JOBQ, M, P, N, K, L, A, LDA, B,
+     $                   LDB, TOLA, TOLB, ALPHA, BETA, U, LDU, V, LDV,
+     $                   Q, LDQ, WORK, NCYCLE, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBQ, JOBU, JOBV
+      INTEGER            INFO, K, L, LDA, LDB, LDQ, LDU, LDV, M, N,
+     $                   NCYCLE, P
+      REAL               TOLA, TOLB
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), ALPHA( * ), B( LDB, * ),
+     $                   BETA( * ), Q( LDQ, * ), U( LDU, * ),
+     $                   V( LDV, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  STGSJA computes the generalized singular value decomposition (GSVD)
+*  of two real upper triangular (or trapezoidal) matrices A and B.
+*
+*  On entry, it is assumed that matrices A and B have the following
+*  forms, which may be obtained by the preprocessing subroutine SGGSVP
+*  from a general M-by-N matrix A and P-by-N matrix B:
+*
+*               N-K-L  K    L
+*     A =    K ( 0    A12  A13 ) if M-K-L >= 0;
+*            L ( 0     0   A23 )
+*        M-K-L ( 0     0    0  )
+*
+*             N-K-L  K    L
+*     A =  K ( 0    A12  A13 ) if M-K-L < 0;
+*        M-K ( 0     0   A23 )
+*
+*             N-K-L  K    L
+*     B =  L ( 0     0   B13 )
+*        P-L ( 0     0    0  )
+*
+*  where the K-by-K matrix A12 and L-by-L matrix B13 are nonsingular
+*  upper triangular; A23 is L-by-L upper triangular if M-K-L >= 0,
+*  otherwise A23 is (M-K)-by-L upper trapezoidal.
+*
+*  On exit,
+*
+*              U'*A*Q = D1*( 0 R ),    V'*B*Q = D2*( 0 R ),
+*
+*  where U, V and Q are orthogonal matrices, Z' denotes the transpose
+*  of Z, R is a nonsingular upper triangular matrix, and D1 and D2 are
+*  ``diagonal'' matrices, which are of the following structures:
+*
+*  If M-K-L >= 0,
+*
+*                      K  L
+*         D1 =     K ( I  0 )
+*                  L ( 0  C )
+*              M-K-L ( 0  0 )
+*
+*                    K  L
+*         D2 = L   ( 0  S )
+*              P-L ( 0  0 )
+*
+*                 N-K-L  K    L
+*    ( 0 R ) = K (  0   R11  R12 ) K
+*              L (  0    0   R22 ) L
+*
+*  where
+*
+*    C = diag( ALPHA(K+1), ... , ALPHA(K+L) ),
+*    S = diag( BETA(K+1),  ... , BETA(K+L) ),
+*    C**2 + S**2 = I.
+*
+*    R is stored in A(1:K+L,N-K-L+1:N) on exit.
+*
+*  If M-K-L < 0,
+*
+*                 K M-K K+L-M
+*      D1 =   K ( I  0    0   )
+*           M-K ( 0  C    0   )
+*
+*                   K M-K K+L-M
+*      D2 =   M-K ( 0  S    0   )
+*           K+L-M ( 0  0    I   )
+*             P-L ( 0  0    0   )
+*
+*                 N-K-L  K   M-K  K+L-M
+* ( 0 R ) =    K ( 0    R11  R12  R13  )
+*            M-K ( 0     0   R22  R23  )
+*          K+L-M ( 0     0    0   R33  )
+*
+*  where
+*  C = diag( ALPHA(K+1), ... , ALPHA(M) ),
+*  S = diag( BETA(K+1),  ... , BETA(M) ),
+*  C**2 + S**2 = I.
+*
+*  R = ( R11 R12 R13 ) is stored in A(1:M, N-K-L+1:N) and R33 is stored
+*      (  0  R22 R23 )
+*  in B(M-K+1:L,N+M-K-L+1:N) on exit.
+*
+*  The computation of the orthogonal transformation matrices U, V or Q
+*  is optional.  These matrices may either be formed explicitly, or they
+*  may be postmultiplied into input matrices U1, V1, or Q1.
+*
+*  Arguments
+*  =========
+*
+*  JOBU    (input) CHARACTER*1
+*          = 'U':  U must contain an orthogonal matrix U1 on entry, and
+*                  the product U1*U is returned;
+*          = 'I':  U is initialized to the unit matrix, and the
+*                  orthogonal matrix U is returned;
+*          = 'N':  U is not computed.
+*
+*  JOBV    (input) CHARACTER*1
+*          = 'V':  V must contain an orthogonal matrix V1 on entry, and
+*                  the product V1*V is returned;
+*          = 'I':  V is initialized to the unit matrix, and the
+*                  orthogonal matrix V is returned;
+*          = 'N':  V is not computed.
+*
+*  JOBQ    (input) CHARACTER*1
+*          = 'Q':  Q must contain an orthogonal matrix Q1 on entry, and
+*                  the product Q1*Q is returned;
+*          = 'I':  Q is initialized to the unit matrix, and the
+*                  orthogonal matrix Q is returned;
+*          = 'N':  Q is not computed.
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  P       (input) INTEGER
+*          The number of rows of the matrix B.  P >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrices A and B.  N >= 0.
+*
+*  K       (input) INTEGER
+*  L       (input) INTEGER
+*          K and L specify the subblocks in the input matrices A and B:
+*          A23 = A(K+1:MIN(K+L,M),N-L+1:N) and B13 = B(1:L,N-L+1:N)
+*          of A and B, whose GSVD is going to be computed by STGSJA.
+*          See Further details.
+*
+*  A       (input/output) REAL array, dimension (LDA,N)
+*          On entry, the M-by-N matrix A.
+*          On exit, A(N-K+1:N,1:MIN(K+L,M) ) contains the triangular
+*          matrix R or part of R.  See Purpose for details.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,M).
+*
+*  B       (input/output) REAL array, dimension (LDB,N)
+*          On entry, the P-by-N matrix B.
+*          On exit, if necessary, B(M-K+1:L,N+M-K-L+1:N) contains
+*          a part of R.  See Purpose for details.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B. LDB >= max(1,P).
+*
+*  TOLA    (input) REAL
+*  TOLB    (input) REAL
+*          TOLA and TOLB are the convergence criteria for the Jacobi-
+*          Kogbetliantz iteration procedure. Generally, they are the
+*          same as used in the preprocessing step, say
+*              TOLA = max(M,N)*norm(A)*MACHEPS,
+*              TOLB = max(P,N)*norm(B)*MACHEPS.
+*
+*  ALPHA   (output) REAL array, dimension (N)
+*  BETA    (output) REAL array, dimension (N)
+*          On exit, ALPHA and BETA contain the generalized singular
+*          value pairs of A and B;
+*            ALPHA(1:K) = 1,
+*            BETA(1:K)  = 0,
+*          and if M-K-L >= 0,
+*            ALPHA(K+1:K+L) = diag(C),
+*            BETA(K+1:K+L)  = diag(S),
+*          or if M-K-L < 0,
+*            ALPHA(K+1:M)= C, ALPHA(M+1:K+L)= 0
+*            BETA(K+1:M) = S, BETA(M+1:K+L) = 1.
+*          Furthermore, if K+L < N,
+*            ALPHA(K+L+1:N) = 0 and
+*            BETA(K+L+1:N)  = 0.
+*
+*  U       (input/output) REAL array, dimension (LDU,M)
+*          On entry, if JOBU = 'U', U must contain a matrix U1 (usually
+*          the orthogonal matrix returned by SGGSVP).
+*          On exit,
+*          if JOBU = 'I', U contains the orthogonal matrix U;
+*          if JOBU = 'U', U contains the product U1*U.
+*          If JOBU = 'N', U is not referenced.
+*
+*  LDU     (input) INTEGER
+*          The leading dimension of the array U. LDU >= max(1,M) if
+*          JOBU = 'U'; LDU >= 1 otherwise.
+*
+*  V       (input/output) REAL array, dimension (LDV,P)
+*          On entry, if JOBV = 'V', V must contain a matrix V1 (usually
+*          the orthogonal matrix returned by SGGSVP).
+*          On exit,
+*          if JOBV = 'I', V contains the orthogonal matrix V;
+*          if JOBV = 'V', V contains the product V1*V.
+*          If JOBV = 'N', V is not referenced.
+*
+*  LDV     (input) INTEGER
+*          The leading dimension of the array V. LDV >= max(1,P) if
+*          JOBV = 'V'; LDV >= 1 otherwise.
+*
+*  Q       (input/output) REAL array, dimension (LDQ,N)
+*          On entry, if JOBQ = 'Q', Q must contain a matrix Q1 (usually
+*          the orthogonal matrix returned by SGGSVP).
+*          On exit,
+*          if JOBQ = 'I', Q contains the orthogonal matrix Q;
+*          if JOBQ = 'Q', Q contains the product Q1*Q.
+*          If JOBQ = 'N', Q is not referenced.
+*
+*  LDQ     (input) INTEGER
+*          The leading dimension of the array Q. LDQ >= max(1,N) if
+*          JOBQ = 'Q'; LDQ >= 1 otherwise.
+*
+*  WORK    (workspace) REAL array, dimension (2*N)
+*
+*  NCYCLE  (output) INTEGER
+*          The number of cycles required for convergence.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          = 1:  the procedure does not converge after MAXIT cycles.
+*
+*  Internal Parameters
+*  ===================
+*
+*  MAXIT   INTEGER
+*          MAXIT specifies the total loops that the iterative procedure
+*          may take. If after MAXIT cycles, the routine fails to
+*          converge, we return INFO = 1.
+*
+*  Further Details
+*  ===============
+*
+*  STGSJA essentially uses a variant of Kogbetliantz algorithm to reduce
+*  min(L,M-K)-by-L triangular (or trapezoidal) matrix A23 and L-by-L
+*  matrix B13 to the form:
+*
+*           U1'*A13*Q1 = C1*R1; V1'*B13*Q1 = S1*R1,
+*
+*  where U1, V1 and Q1 are orthogonal matrix, and Z' is the transpose
+*  of Z.  C1 and S1 are diagonal matrices satisfying
+*
+*                C1**2 + S1**2 = I,
+*
+*  and R1 is an L-by-L nonsingular upper triangular matrix.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            MAXIT
+      PARAMETER          ( MAXIT = 40 )
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+*
+      LOGICAL            INITQ, INITU, INITV, UPPER, WANTQ, WANTU, WANTV
+      INTEGER            I, J, KCYCLE
+      REAL               A1, A2, A3, B1, B2, B3, CSQ, CSU, CSV, ERROR,
+     $                   GAMMA, RWK, SNQ, SNU, SNV, SSMIN
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SCOPY, SLAGS2, SLAPLL, SLARTG, SLASET, SROT,
+     $                   SSCAL, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode and test the input parameters
+*
+      INITU = LSAME( JOBU, 'I' )
+      WANTU = INITU .OR. LSAME( JOBU, 'U' )
+*
+      INITV = LSAME( JOBV, 'I' )
+      WANTV = INITV .OR. LSAME( JOBV, 'V' )
+*
+      INITQ = LSAME( JOBQ, 'I' )
+      WANTQ = INITQ .OR. LSAME( JOBQ, 'Q' )
+*
+      INFO = 0
+      IF( .NOT.( INITU .OR. WANTU .OR. LSAME( JOBU, 'N' ) ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( INITV .OR. WANTV .OR. LSAME( JOBV, 'N' ) ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.( INITQ .OR. WANTQ .OR. LSAME( JOBQ, 'N' ) ) ) THEN
+         INFO = -3
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( P.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -6
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -10
+      ELSE IF( LDB.LT.MAX( 1, P ) ) THEN
+         INFO = -12
+      ELSE IF( LDU.LT.1 .OR. ( WANTU .AND. LDU.LT.M ) ) THEN
+         INFO = -18
+      ELSE IF( LDV.LT.1 .OR. ( WANTV .AND. LDV.LT.P ) ) THEN
+         INFO = -20
+      ELSE IF( LDQ.LT.1 .OR. ( WANTQ .AND. LDQ.LT.N ) ) THEN
+         INFO = -22
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'STGSJA', -INFO )
+         RETURN
+      END IF
+*
+*     Initialize U, V and Q, if necessary
+*
+      IF( INITU )
+     $   CALL SLASET( 'Full', M, M, ZERO, ONE, U, LDU )
+      IF( INITV )
+     $   CALL SLASET( 'Full', P, P, ZERO, ONE, V, LDV )
+      IF( INITQ )
+     $   CALL SLASET( 'Full', N, N, ZERO, ONE, Q, LDQ )
+*
+*     Loop until convergence
+*
+      UPPER = .FALSE.
+      DO 40 KCYCLE = 1, MAXIT
+*
+         UPPER = .NOT.UPPER
+*
+         DO 20 I = 1, L - 1
+            DO 10 J = I + 1, L
+*
+               A1 = ZERO
+               A2 = ZERO
+               A3 = ZERO
+               IF( K+I.LE.M )
+     $            A1 = A( K+I, N-L+I )
+               IF( K+J.LE.M )
+     $            A3 = A( K+J, N-L+J )
+*
+               B1 = B( I, N-L+I )
+               B3 = B( J, N-L+J )
+*
+               IF( UPPER ) THEN
+                  IF( K+I.LE.M )
+     $               A2 = A( K+I, N-L+J )
+                  B2 = B( I, N-L+J )
+               ELSE
+                  IF( K+J.LE.M )
+     $               A2 = A( K+J, N-L+I )
+                  B2 = B( J, N-L+I )
+               END IF
+*
+               CALL SLAGS2( UPPER, A1, A2, A3, B1, B2, B3, CSU, SNU,
+     $                      CSV, SNV, CSQ, SNQ )
+*
+*              Update (K+I)-th and (K+J)-th rows of matrix A: U'*A
+*
+               IF( K+J.LE.M )
+     $            CALL SROT( L, A( K+J, N-L+1 ), LDA, A( K+I, N-L+1 ),
+     $                       LDA, CSU, SNU )
+*
+*              Update I-th and J-th rows of matrix B: V'*B
+*
+               CALL SROT( L, B( J, N-L+1 ), LDB, B( I, N-L+1 ), LDB,
+     $                    CSV, SNV )
+*
+*              Update (N-L+I)-th and (N-L+J)-th columns of matrices
+*              A and B: A*Q and B*Q
+*
+               CALL SROT( MIN( K+L, M ), A( 1, N-L+J ), 1,
+     $                    A( 1, N-L+I ), 1, CSQ, SNQ )
+*
+               CALL SROT( L, B( 1, N-L+J ), 1, B( 1, N-L+I ), 1, CSQ,
+     $                    SNQ )
+*
+               IF( UPPER ) THEN
+                  IF( K+I.LE.M )
+     $               A( K+I, N-L+J ) = ZERO
+                  B( I, N-L+J ) = ZERO
+               ELSE
+                  IF( K+J.LE.M )
+     $               A( K+J, N-L+I ) = ZERO
+                  B( J, N-L+I ) = ZERO
+               END IF
+*
+*              Update orthogonal matrices U, V, Q, if desired.
+*
+               IF( WANTU .AND. K+J.LE.M )
+     $            CALL SROT( M, U( 1, K+J ), 1, U( 1, K+I ), 1, CSU,
+     $                       SNU )
+*
+               IF( WANTV )
+     $            CALL SROT( P, V( 1, J ), 1, V( 1, I ), 1, CSV, SNV )
+*
+               IF( WANTQ )
+     $            CALL SROT( N, Q( 1, N-L+J ), 1, Q( 1, N-L+I ), 1, CSQ,
+     $                       SNQ )
+*
+   10       CONTINUE
+   20    CONTINUE
+*
+         IF( .NOT.UPPER ) THEN
+*
+*           The matrices A13 and B13 were lower triangular at the start
+*           of the cycle, and are now upper triangular.
+*
+*           Convergence test: test the parallelism of the corresponding
+*           rows of A and B.
+*
+            ERROR = ZERO
+            DO 30 I = 1, MIN( L, M-K )
+               CALL SCOPY( L-I+1, A( K+I, N-L+I ), LDA, WORK, 1 )
+               CALL SCOPY( L-I+1, B( I, N-L+I ), LDB, WORK( L+1 ), 1 )
+               CALL SLAPLL( L-I+1, WORK, 1, WORK( L+1 ), 1, SSMIN )
+               ERROR = MAX( ERROR, SSMIN )
+   30       CONTINUE
+*
+            IF( ABS( ERROR ).LE.MIN( TOLA, TOLB ) )
+     $         GO TO 50
+         END IF
+*
+*        End of cycle loop
+*
+   40 CONTINUE
+*
+*     The algorithm has not converged after MAXIT cycles.
+*
+      INFO = 1
+      GO TO 100
+*
+   50 CONTINUE
+*
+*     If ERROR <= MIN(TOLA,TOLB), then the algorithm has converged.
+*     Compute the generalized singular value pairs (ALPHA, BETA), and
+*     set the triangular matrix R to array A.
+*
+      DO 60 I = 1, K
+         ALPHA( I ) = ONE
+         BETA( I ) = ZERO
+   60 CONTINUE
+*
+      DO 70 I = 1, MIN( L, M-K )
+*
+         A1 = A( K+I, N-L+I )
+         B1 = B( I, N-L+I )
+*
+         IF( A1.NE.ZERO ) THEN
+            GAMMA = B1 / A1
+*
+*           change sign if necessary
+*
+            IF( GAMMA.LT.ZERO ) THEN
+               CALL SSCAL( L-I+1, -ONE, B( I, N-L+I ), LDB )
+               IF( WANTV )
+     $            CALL SSCAL( P, -ONE, V( 1, I ), 1 )
+            END IF
+*
+            CALL SLARTG( ABS( GAMMA ), ONE, BETA( K+I ), ALPHA( K+I ),
+     $                   RWK )
+*
+            IF( ALPHA( K+I ).GE.BETA( K+I ) ) THEN
+               CALL SSCAL( L-I+1, ONE / ALPHA( K+I ), A( K+I, N-L+I ),
+     $                     LDA )
+            ELSE
+               CALL SSCAL( L-I+1, ONE / BETA( K+I ), B( I, N-L+I ),
+     $                     LDB )
+               CALL SCOPY( L-I+1, B( I, N-L+I ), LDB, A( K+I, N-L+I ),
+     $                     LDA )
+            END IF
+*
+         ELSE
+*
+            ALPHA( K+I ) = ZERO
+            BETA( K+I ) = ONE
+            CALL SCOPY( L-I+1, B( I, N-L+I ), LDB, A( K+I, N-L+I ),
+     $                  LDA )
+*
+         END IF
+*
+   70 CONTINUE
+*
+*     Post-assignment
+*
+      DO 80 I = M + 1, K + L
+         ALPHA( I ) = ZERO
+         BETA( I ) = ONE
+   80 CONTINUE
+*
+      IF( K+L.LT.N ) THEN
+         DO 90 I = K + L + 1, N
+            ALPHA( I ) = ZERO
+            BETA( I ) = ZERO
+   90    CONTINUE
+      END IF
+*
+  100 CONTINUE
+      NCYCLE = KCYCLE
+      RETURN
+*
+*     End of STGSJA
+*
+      END
diff --git a/costa/native/external/lapack/stgsna.f b/costa/native/external/lapack/stgsna.f
new file mode 100644
index 000000000..f6c03ed25
--- /dev/null
+++ b/costa/native/external/lapack/stgsna.f
@@ -0,0 +1,585 @@
+      SUBROUTINE STGSNA( JOB, HOWMNY, SELECT, N, A, LDA, B, LDB, VL,
+     $                   LDVL, VR, LDVR, S, DIF, MM, M, WORK, LWORK,
+     $                   IWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          HOWMNY, JOB
+      INTEGER            INFO, LDA, LDB, LDVL, LDVR, LWORK, M, MM, N
+*     ..
+*     .. Array Arguments ..
+      LOGICAL            SELECT( * )
+      INTEGER            IWORK( * )
+      REAL               A( LDA, * ), B( LDB, * ), DIF( * ), S( * ),
+     $                   VL( LDVL, * ), VR( LDVR, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  STGSNA estimates reciprocal condition numbers for specified
+*  eigenvalues and/or eigenvectors of a matrix pair (A, B) in
+*  generalized real Schur canonical form (or of any matrix pair
+*  (Q*A*Z', Q*B*Z') with orthogonal matrices Q and Z, where
+*  Z' denotes the transpose of Z.
+*
+*  (A, B) must be in generalized real Schur form (as returned by SGGES),
+*  i.e. A is block upper triangular with 1-by-1 and 2-by-2 diagonal
+*  blocks. B is upper triangular.
+*
+*
+*  Arguments
+*  =========
+*
+*  JOB     (input) CHARACTER*1
+*          Specifies whether condition numbers are required for
+*          eigenvalues (S) or eigenvectors (DIF):
+*          = 'E': for eigenvalues only (S);
+*          = 'V': for eigenvectors only (DIF);
+*          = 'B': for both eigenvalues and eigenvectors (S and DIF).
+*
+*  HOWMNY  (input) CHARACTER*1
+*          = 'A': compute condition numbers for all eigenpairs;
+*          = 'S': compute condition numbers for selected eigenpairs
+*                 specified by the array SELECT.
+*
+*  SELECT  (input) LOGICAL array, dimension (N)
+*          If HOWMNY = 'S', SELECT specifies the eigenpairs for which
+*          condition numbers are required. To select condition numbers
+*          for the eigenpair corresponding to a real eigenvalue w(j),
+*          SELECT(j) must be set to .TRUE.. To select condition numbers
+*          corresponding to a complex conjugate pair of eigenvalues w(j)
+*          and w(j+1), either SELECT(j) or SELECT(j+1) or both, must be
+*          set to .TRUE..
+*          If HOWMNY = 'A', SELECT is not referenced.
+*
+*  N       (input) INTEGER
+*          The order of the square matrix pair (A, B). N >= 0.
+*
+*  A       (input) REAL array, dimension (LDA,N)
+*          The upper quasi-triangular matrix A in the pair (A,B).
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,N).
+*
+*  B       (input) REAL array, dimension (LDB,N)
+*          The upper triangular matrix B in the pair (A,B).
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B. LDB >= max(1,N).
+*
+*  VL      (input) REAL array, dimension (LDVL,M)
+*          If JOB = 'E' or 'B', VL must contain left eigenvectors of
+*          (A, B), corresponding to the eigenpairs specified by HOWMNY
+*          and SELECT. The eigenvectors must be stored in consecutive
+*          columns of VL, as returned by STGEVC.
+*          If JOB = 'V', VL is not referenced.
+*
+*  LDVL    (input) INTEGER
+*          The leading dimension of the array VL. LDVL >= 1.
+*          If JOB = 'E' or 'B', LDVL >= N.
+*
+*  VR      (input) REAL array, dimension (LDVR,M)
+*          If JOB = 'E' or 'B', VR must contain right eigenvectors of
+*          (A, B), corresponding to the eigenpairs specified by HOWMNY
+*          and SELECT. The eigenvectors must be stored in consecutive
+*          columns ov VR, as returned by STGEVC.
+*          If JOB = 'V', VR is not referenced.
+*
+*  LDVR    (input) INTEGER
+*          The leading dimension of the array VR. LDVR >= 1.
+*          If JOB = 'E' or 'B', LDVR >= N.
+*
+*  S       (output) REAL array, dimension (MM)
+*          If JOB = 'E' or 'B', the reciprocal condition numbers of the
+*          selected eigenvalues, stored in consecutive elements of the
+*          array. For a complex conjugate pair of eigenvalues two
+*          consecutive elements of S are set to the same value. Thus
+*          S(j), DIF(j), and the j-th columns of VL and VR all
+*          correspond to the same eigenpair (but not in general the
+*          j-th eigenpair, unless all eigenpairs are selected).
+*          If JOB = 'V', S is not referenced.
+*
+*  DIF     (output) REAL array, dimension (MM)
+*          If JOB = 'V' or 'B', the estimated reciprocal condition
+*          numbers of the selected eigenvectors, stored in consecutive
+*          elements of the array. For a complex eigenvector two
+*          consecutive elements of DIF are set to the same value. If
+*          the eigenvalues cannot be reordered to compute DIF(j), DIF(j)
+*          is set to 0; this can only occur when the true value would be
+*          very small anyway.
+*          If JOB = 'E', DIF is not referenced.
+*
+*  MM      (input) INTEGER
+*          The number of elements in the arrays S and DIF. MM >= M.
+*
+*  M       (output) INTEGER
+*          The number of elements of the arrays S and DIF used to store
+*          the specified condition numbers; for each selected real
+*          eigenvalue one element is used, and for each selected complex
+*          conjugate pair of eigenvalues, two elements are used.
+*          If HOWMNY = 'A', M is set to N.
+*
+*  WORK    (workspace/output) REAL array, dimension (LWORK)
+*          If JOB = 'E', WORK is not referenced.  Otherwise,
+*          on exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK. LWORK >= N.
+*          If JOB = 'V' or 'B' LWORK >= 2*N*(N+2)+16.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  IWORK   (workspace) INTEGER array, dimension (N + 6)
+*          If JOB = 'E', IWORK is not referenced.
+*
+*  INFO    (output) INTEGER
+*          =0: Successful exit
+*          <0: If INFO = -i, the i-th argument had an illegal value
+*
+*
+*  Further Details
+*  ===============
+*
+*  The reciprocal of the condition number of a generalized eigenvalue
+*  w = (a, b) is defined as
+*
+*       S(w) = (|u'Av|**2 + |u'Bv|**2)**(1/2) / (norm(u)*norm(v))
+*
+*  where u and v are the left and right eigenvectors of (A, B)
+*  corresponding to w; |z| denotes the absolute value of the complex
+*  number, and norm(u) denotes the 2-norm of the vector u.
+*  The pair (a, b) corresponds to an eigenvalue w = a/b (= u'Av/u'Bv)
+*  of the matrix pair (A, B). If both a and b equal zero, then (A B) is
+*  singular and S(I) = -1 is returned.
+*
+*  An approximate error bound on the chordal distance between the i-th
+*  computed generalized eigenvalue w and the corresponding exact
+*  eigenvalue lambda is
+*
+*       chord(w, lambda) <= EPS * norm(A, B) / S(I)
+*
+*  where EPS is the machine precision.
+*
+*  The reciprocal of the condition number DIF(i) of right eigenvector u
+*  and left eigenvector v corresponding to the generalized eigenvalue w
+*  is defined as follows:
+*
+*  a) If the i-th eigenvalue w = (a,b) is real
+*
+*     Suppose U and V are orthogonal transformations such that
+*
+*                U'*(A, B)*V  = (S, T) = ( a   *  ) ( b  *  )  1
+*                                        ( 0  S22 ),( 0 T22 )  n-1
+*                                          1  n-1     1 n-1
+*
+*     Then the reciprocal condition number DIF(i) is
+*
+*                Difl((a, b), (S22, T22)) = sigma-min( Zl ),
+*
+*     where sigma-min(Zl) denotes the smallest singular value of the
+*     2(n-1)-by-2(n-1) matrix
+*
+*         Zl = [ kron(a, In-1)  -kron(1, S22) ]
+*              [ kron(b, In-1)  -kron(1, T22) ] .
+*
+*     Here In-1 is the identity matrix of size n-1. kron(X, Y) is the
+*     Kronecker product between the matrices X and Y.
+*
+*     Note that if the default method for computing DIF(i) is wanted
+*     (see SLATDF), then the parameter DIFDRI (see below) should be
+*     changed from 3 to 4 (routine SLATDF(IJOB = 2 will be used)).
+*     See STGSYL for more details.
+*
+*  b) If the i-th and (i+1)-th eigenvalues are complex conjugate pair,
+*
+*     Suppose U and V are orthogonal transformations such that
+*
+*                U'*(A, B)*V = (S, T) = ( S11  *   ) ( T11  *  )  2
+*                                       ( 0    S22 ),( 0    T22) n-2
+*                                         2    n-2     2    n-2
+*
+*     and (S11, T11) corresponds to the complex conjugate eigenvalue
+*     pair (w, conjg(w)). There exist unitary matrices U1 and V1 such
+*     that
+*
+*         U1'*S11*V1 = ( s11 s12 )   and U1'*T11*V1 = ( t11 t12 )
+*                      (  0  s22 )                    (  0  t22 )
+*
+*     where the generalized eigenvalues w = s11/t11 and
+*     conjg(w) = s22/t22.
+*
+*     Then the reciprocal condition number DIF(i) is bounded by
+*
+*         min( d1, max( 1, |real(s11)/real(s22)| )*d2 )
+*
+*     where, d1 = Difl((s11, t11), (s22, t22)) = sigma-min(Z1), where
+*     Z1 is the complex 2-by-2 matrix
+*
+*              Z1 =  [ s11  -s22 ]
+*                    [ t11  -t22 ],
+*
+*     This is done by computing (using real arithmetic) the
+*     roots of the characteristical polynomial det(Z1' * Z1 - lambda I),
+*     where Z1' denotes the conjugate transpose of Z1 and det(X) denotes
+*     the determinant of X.
+*
+*     and d2 is an upper bound on Difl((S11, T11), (S22, T22)), i.e. an
+*     upper bound on sigma-min(Z2), where Z2 is (2n-2)-by-(2n-2)
+*
+*              Z2 = [ kron(S11', In-2)  -kron(I2, S22) ]
+*                   [ kron(T11', In-2)  -kron(I2, T22) ]
+*
+*     Note that if the default method for computing DIF is wanted (see
+*     SLATDF), then the parameter DIFDRI (see below) should be changed
+*     from 3 to 4 (routine SLATDF(IJOB = 2 will be used)). See STGSYL
+*     for more details.
+*
+*  For each eigenvalue/vector specified by SELECT, DIF stores a
+*  Frobenius norm-based estimate of Difl.
+*
+*  An approximate error bound for the i-th computed eigenvector VL(i) or
+*  VR(i) is given by
+*
+*             EPS * norm(A, B) / DIF(i).
+*
+*  See ref. [2-3] for more details and further references.
+*
+*  Based on contributions by
+*     Bo Kagstrom and Peter Poromaa, Department of Computing Science,
+*     Umea University, S-901 87 Umea, Sweden.
+*
+*  References
+*  ==========
+*
+*  [1] B. Kagstrom; A Direct Method for Reordering Eigenvalues in the
+*      Generalized Real Schur Form of a Regular Matrix Pair (A, B), in
+*      M.S. Moonen et al (eds), Linear Algebra for Large Scale and
+*      Real-Time Applications, Kluwer Academic Publ. 1993, pp 195-218.
+*
+*  [2] B. Kagstrom and P. Poromaa; Computing Eigenspaces with Specified
+*      Eigenvalues of a Regular Matrix Pair (A, B) and Condition
+*      Estimation: Theory, Algorithms and Software,
+*      Report UMINF - 94.04, Department of Computing Science, Umea
+*      University, S-901 87 Umea, Sweden, 1994. Also as LAPACK Working
+*      Note 87. To appear in Numerical Algorithms, 1996.
+*
+*  [3] B. Kagstrom and P. Poromaa, LAPACK-Style Algorithms and Software
+*      for Solving the Generalized Sylvester Equation and Estimating the
+*      Separation between Regular Matrix Pairs, Report UMINF - 93.23,
+*      Department of Computing Science, Umea University, S-901 87 Umea,
+*      Sweden, December 1993, Revised April 1994, Also as LAPACK Working
+*      Note 75.  To appear in ACM Trans. on Math. Software, Vol 22,
+*      No 1, 1996.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            DIFDRI
+      PARAMETER          ( DIFDRI = 3 )
+      REAL               ZERO, ONE, TWO, FOUR
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0, TWO = 2.0E+0,
+     $                   FOUR = 4.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY, PAIR, SOMCON, WANTBH, WANTDF, WANTS
+      INTEGER            I, IERR, IFST, ILST, IZ, K, KS, LWMIN, N1, N2
+      REAL               ALPHAI, ALPHAR, ALPRQT, BETA, C1, C2, COND,
+     $                   EPS, LNRM, RNRM, ROOT1, ROOT2, SCALE, SMLNUM,
+     $                   TMPII, TMPIR, TMPRI, TMPRR, UHAV, UHAVI, UHBV,
+     $                   UHBVI
+*     ..
+*     .. Local Arrays ..
+      REAL               DUMMY( 1 ), DUMMY1( 1 )
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               SDOT, SLAMCH, SLAPY2, SNRM2
+      EXTERNAL           LSAME, SDOT, SLAMCH, SLAPY2, SNRM2
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SGEMV, SLACPY, SLAG2, STGEXC, STGSYL, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode and test the input parameters
+*
+      WANTBH = LSAME( JOB, 'B' )
+      WANTS = LSAME( JOB, 'E' ) .OR. WANTBH
+      WANTDF = LSAME( JOB, 'V' ) .OR. WANTBH
+*
+      SOMCON = LSAME( HOWMNY, 'S' )
+*
+      INFO = 0
+      LQUERY = ( LWORK.EQ.-1 )
+*
+      IF( LSAME( JOB, 'V' ) .OR. LSAME( JOB, 'B' ) ) THEN
+         LWMIN = MAX( 1, 2*N*(N+2)+16 )
+      ELSE
+         LWMIN = 1
+      END IF
+*
+      IF( .NOT.WANTS .AND. .NOT.WANTDF ) THEN
+         INFO = -1
+      ELSE IF( .NOT.LSAME( HOWMNY, 'A' ) .AND. .NOT.SOMCON ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -6
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -8
+      ELSE IF( WANTS .AND. LDVL.LT.N ) THEN
+         INFO = -10
+      ELSE IF( WANTS .AND. LDVR.LT.N ) THEN
+         INFO = -12
+      ELSE
+*
+*        Set M to the number of eigenpairs for which condition numbers
+*        are required, and test MM.
+*
+         IF( SOMCON ) THEN
+            M = 0
+            PAIR = .FALSE.
+            DO 10 K = 1, N
+               IF( PAIR ) THEN
+                  PAIR = .FALSE.
+               ELSE
+                  IF( K.LT.N ) THEN
+                     IF( A( K+1, K ).EQ.ZERO ) THEN
+                        IF( SELECT( K ) )
+     $                     M = M + 1
+                     ELSE
+                        PAIR = .TRUE.
+                        IF( SELECT( K ) .OR. SELECT( K+1 ) )
+     $                     M = M + 2
+                     END IF
+                  ELSE
+                     IF( SELECT( N ) )
+     $                  M = M + 1
+                  END IF
+               END IF
+   10       CONTINUE
+         ELSE
+            M = N
+         END IF
+*
+         IF( MM.LT.M ) THEN
+            INFO = -15
+         ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN
+            INFO = -18
+c        ELSE IF( WANTDF .AND. LWORK.LT.2*N*( N+2 )+16 ) THEN
+c           INFO = -18
+         END IF
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         WORK( 1 ) = LWMIN
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'STGSNA', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Get machine constants
+*
+      EPS = SLAMCH( 'P' )
+      SMLNUM = SLAMCH( 'S' ) / EPS
+      KS = 0
+      PAIR = .FALSE.
+*
+      DO 20 K = 1, N
+*
+*        Determine whether A(k,k) begins a 1-by-1 or 2-by-2 block.
+*
+         IF( PAIR ) THEN
+            PAIR = .FALSE.
+            GO TO 20
+         ELSE
+            IF( K.LT.N )
+     $         PAIR = A( K+1, K ).NE.ZERO
+         END IF
+*
+*        Determine whether condition numbers are required for the k-th
+*        eigenpair.
+*
+         IF( SOMCON ) THEN
+            IF( PAIR ) THEN
+               IF( .NOT.SELECT( K ) .AND. .NOT.SELECT( K+1 ) )
+     $            GO TO 20
+            ELSE
+               IF( .NOT.SELECT( K ) )
+     $            GO TO 20
+            END IF
+         END IF
+*
+         KS = KS + 1
+*
+         IF( WANTS ) THEN
+*
+*           Compute the reciprocal condition number of the k-th
+*           eigenvalue.
+*
+            IF( PAIR ) THEN
+*
+*              Complex eigenvalue pair.
+*
+               RNRM = SLAPY2( SNRM2( N, VR( 1, KS ), 1 ),
+     $                SNRM2( N, VR( 1, KS+1 ), 1 ) )
+               LNRM = SLAPY2( SNRM2( N, VL( 1, KS ), 1 ),
+     $                SNRM2( N, VL( 1, KS+1 ), 1 ) )
+               CALL SGEMV( 'N', N, N, ONE, A, LDA, VR( 1, KS ), 1, ZERO,
+     $                     WORK, 1 )
+               TMPRR = SDOT( N, WORK, 1, VL( 1, KS ), 1 )
+               TMPRI = SDOT( N, WORK, 1, VL( 1, KS+1 ), 1 )
+               CALL SGEMV( 'N', N, N, ONE, A, LDA, VR( 1, KS+1 ), 1,
+     $                     ZERO, WORK, 1 )
+               TMPII = SDOT( N, WORK, 1, VL( 1, KS+1 ), 1 )
+               TMPIR = SDOT( N, WORK, 1, VL( 1, KS ), 1 )
+               UHAV = TMPRR + TMPII
+               UHAVI = TMPIR - TMPRI
+               CALL SGEMV( 'N', N, N, ONE, B, LDB, VR( 1, KS ), 1, ZERO,
+     $                     WORK, 1 )
+               TMPRR = SDOT( N, WORK, 1, VL( 1, KS ), 1 )
+               TMPRI = SDOT( N, WORK, 1, VL( 1, KS+1 ), 1 )
+               CALL SGEMV( 'N', N, N, ONE, B, LDB, VR( 1, KS+1 ), 1,
+     $                     ZERO, WORK, 1 )
+               TMPII = SDOT( N, WORK, 1, VL( 1, KS+1 ), 1 )
+               TMPIR = SDOT( N, WORK, 1, VL( 1, KS ), 1 )
+               UHBV = TMPRR + TMPII
+               UHBVI = TMPIR - TMPRI
+               UHAV = SLAPY2( UHAV, UHAVI )
+               UHBV = SLAPY2( UHBV, UHBVI )
+               COND = SLAPY2( UHAV, UHBV )
+               S( KS ) = COND / ( RNRM*LNRM )
+               S( KS+1 ) = S( KS )
+*
+            ELSE
+*
+*              Real eigenvalue.
+*
+               RNRM = SNRM2( N, VR( 1, KS ), 1 )
+               LNRM = SNRM2( N, VL( 1, KS ), 1 )
+               CALL SGEMV( 'N', N, N, ONE, A, LDA, VR( 1, KS ), 1, ZERO,
+     $                     WORK, 1 )
+               UHAV = SDOT( N, WORK, 1, VL( 1, KS ), 1 )
+               CALL SGEMV( 'N', N, N, ONE, B, LDB, VR( 1, KS ), 1, ZERO,
+     $                     WORK, 1 )
+               UHBV = SDOT( N, WORK, 1, VL( 1, KS ), 1 )
+               COND = SLAPY2( UHAV, UHBV )
+               IF( COND.EQ.ZERO ) THEN
+                  S( KS ) = -ONE
+               ELSE
+                  S( KS ) = COND / ( RNRM*LNRM )
+               END IF
+            END IF
+         END IF
+*
+         IF( WANTDF ) THEN
+            IF( N.EQ.1 ) THEN
+               DIF( KS ) = SLAPY2( A( 1, 1 ), B( 1, 1 ) )
+               GO TO 20
+            END IF
+*
+*           Estimate the reciprocal condition number of the k-th
+*           eigenvectors.
+            IF( PAIR ) THEN
+*
+*              Copy the  2-by 2 pencil beginning at (A(k,k), B(k, k)).
+*              Compute the eigenvalue(s) at position K.
+*
+               WORK( 1 ) = A( K, K )
+               WORK( 2 ) = A( K+1, K )
+               WORK( 3 ) = A( K, K+1 )
+               WORK( 4 ) = A( K+1, K+1 )
+               WORK( 5 ) = B( K, K )
+               WORK( 6 ) = B( K+1, K )
+               WORK( 7 ) = B( K, K+1 )
+               WORK( 8 ) = B( K+1, K+1 )
+               CALL SLAG2( WORK, 2, WORK( 5 ), 2, SMLNUM*EPS, BETA,
+     $                     DUMMY1( 1 ), ALPHAR, DUMMY( 1 ), ALPHAI )
+               ALPRQT = ONE
+               C1 = TWO*( ALPHAR*ALPHAR+ALPHAI*ALPHAI+BETA*BETA )
+               C2 = FOUR*BETA*BETA*ALPHAI*ALPHAI
+               ROOT1 = C1 + SQRT( C1*C1-4.0*C2 )
+               ROOT2 = C2 / ROOT1
+               ROOT1 = ROOT1 / TWO
+               COND = MIN( SQRT( ROOT1 ), SQRT( ROOT2 ) )
+            END IF
+*
+*           Copy the matrix (A, B) to the array WORK and swap the
+*           diagonal block beginning at A(k,k) to the (1,1) position.
+*
+            CALL SLACPY( 'Full', N, N, A, LDA, WORK, N )
+            CALL SLACPY( 'Full', N, N, B, LDB, WORK( N*N+1 ), N )
+            IFST = K
+            ILST = 1
+*
+            CALL STGEXC( .FALSE., .FALSE., N, WORK, N, WORK( N*N+1 ), N,
+     $                   DUMMY, 1, DUMMY1, 1, IFST, ILST,
+     $                   WORK( N*N*2+1 ), LWORK-2*N*N, IERR )
+*
+            IF( IERR.GT.0 ) THEN
+*
+*              Ill-conditioned problem - swap rejected.
+*
+               DIF( KS ) = ZERO
+            ELSE
+*
+*              Reordering successful, solve generalized Sylvester
+*              equation for R and L,
+*                         A22 * R - L * A11 = A12
+*                         B22 * R - L * B11 = B12,
+*              and compute estimate of Difl((A11,B11), (A22, B22)).
+*
+               N1 = 1
+               IF( WORK( 2 ).NE.ZERO )
+     $            N1 = 2
+               N2 = N - N1
+               IF( N2.EQ.0 ) THEN
+                  DIF( KS ) = COND
+               ELSE
+                  I = N*N + 1
+                  IZ = 2*N*N + 1
+                  CALL STGSYL( 'N', DIFDRI, N2, N1, WORK( N*N1+N1+1 ),
+     $                         N, WORK, N, WORK( N1+1 ), N,
+     $                         WORK( N*N1+N1+I ), N, WORK( I ), N,
+     $                         WORK( N1+I ), N, SCALE, DIF( KS ),
+     $                         WORK( IZ+1 ), LWORK-2*N*N, IWORK, IERR )
+*
+                  IF( PAIR )
+     $               DIF( KS ) = MIN( MAX( ONE, ALPRQT )*DIF( KS ),
+     $                           COND )
+               END IF
+            END IF
+            IF( PAIR )
+     $         DIF( KS+1 ) = DIF( KS )
+         END IF
+         IF( PAIR )
+     $      KS = KS + 1
+*
+   20 CONTINUE
+      WORK( 1 ) = LWMIN
+      RETURN
+*
+*     End of STGSNA
+*
+      END
diff --git a/costa/native/external/lapack/stgsy2.f b/costa/native/external/lapack/stgsy2.f
new file mode 100644
index 000000000..e2f59b9cb
--- /dev/null
+++ b/costa/native/external/lapack/stgsy2.f
@@ -0,0 +1,950 @@
+      SUBROUTINE STGSY2( TRANS, IJOB, M, N, A, LDA, B, LDB, C, LDC, D,
+     $                   LDD, E, LDE, F, LDF, SCALE, RDSUM, RDSCAL,
+     $                   IWORK, PQ, INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          TRANS
+      INTEGER            IJOB, INFO, LDA, LDB, LDC, LDD, LDE, LDF, M, N,
+     $                   PQ
+      REAL               RDSCAL, RDSUM, SCALE
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      REAL               A( LDA, * ), B( LDB, * ), C( LDC, * ),
+     $                   D( LDD, * ), E( LDE, * ), F( LDF, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  STGSY2 solves the generalized Sylvester equation:
+*
+*              A * R - L * B = scale * C                (1)
+*              D * R - L * E = scale * F,
+*
+*  using Level 1 and 2 BLAS. where R and L are unknown M-by-N matrices,
+*  (A, D), (B, E) and (C, F) are given matrix pairs of size M-by-M,
+*  N-by-N and M-by-N, respectively, with real entries. (A, D) and (B, E)
+*  must be in generalized Schur canonical form, i.e. A, B are upper
+*  quasi triangular and D, E are upper triangular. The solution (R, L)
+*  overwrites (C, F). 0 <= SCALE <= 1 is an output scaling factor
+*  chosen to avoid overflow.
+*
+*  In matrix notation solving equation (1) corresponds to solve
+*  Z*x = scale*b, where Z is defined as
+*
+*         Z = [ kron(In, A)  -kron(B', Im) ]             (2)
+*             [ kron(In, D)  -kron(E', Im) ],
+*
+*  Ik is the identity matrix of size k and X' is the transpose of X.
+*  kron(X, Y) is the Kronecker product between the matrices X and Y.
+*  In the process of solving (1), we solve a number of such systems
+*  where Dim(In), Dim(In) = 1 or 2.
+*
+*  If TRANS = 'T', solve the transposed system Z'*y = scale*b for y,
+*  which is equivalent to solve for R and L in
+*
+*              A' * R  + D' * L   = scale *  C           (3)
+*              R  * B' + L  * E'  = scale * -F
+*
+*  This case is used to compute an estimate of Dif[(A, D), (B, E)] =
+*  sigma_min(Z) using reverse communicaton with SLACON.
+*
+*  STGSY2 also (IJOB >= 1) contributes to the computation in STGSYL
+*  of an upper bound on the separation between to matrix pairs. Then
+*  the input (A, D), (B, E) are sub-pencils of the matrix pair in
+*  STGSYL. See STGSYL for details.
+*
+*  Arguments
+*  =========
+*
+*  TRANS   (input) CHARACTER
+*          = 'N', solve the generalized Sylvester equation (1).
+*          = 'T': solve the 'transposed' system (3).
+*
+*  IJOB    (input) INTEGER
+*          Specifies what kind of functionality to be performed.
+*          = 0: solve (1) only.
+*          = 1: A contribution from this subsystem to a Frobenius
+*               norm-based estimate of the separation between two matrix
+*               pairs is computed. (look ahead strategy is used).
+*          = 2: A contribution from this subsystem to a Frobenius
+*               norm-based estimate of the separation between two matrix
+*               pairs is computed. (SGECON on sub-systems is used.)
+*          Not referenced if TRANS = 'T'.
+*
+*  M       (input) INTEGER
+*          On entry, M specifies the order of A and D, and the row
+*          dimension of C, F, R and L.
+*
+*  N       (input) INTEGER
+*          On entry, N specifies the order of B and E, and the column
+*          dimension of C, F, R and L.
+*
+*  A       (input) REAL array, dimension (LDA, M)
+*          On entry, A contains an upper quasi triangular matrix.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the matrix A. LDA >= max(1, M).
+*
+*  B       (input) REAL array, dimension (LDB, N)
+*          On entry, B contains an upper quasi triangular matrix.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the matrix B. LDB >= max(1, N).
+*
+*  C       (input/ output) REAL array, dimension (LDC, N)
+*          On entry, C contains the right-hand-side of the first matrix
+*          equation in (1).
+*          On exit, if IJOB = 0, C has been overwritten by the
+*          solution R.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the matrix C. LDC >= max(1, M).
+*
+*  D       (input) REAL array, dimension (LDD, M)
+*          On entry, D contains an upper triangular matrix.
+*
+*  LDD     (input) INTEGER
+*          The leading dimension of the matrix D. LDD >= max(1, M).
+*
+*  E       (input) REAL array, dimension (LDE, N)
+*          On entry, E contains an upper triangular matrix.
+*
+*  LDE     (input) INTEGER
+*          The leading dimension of the matrix E. LDE >= max(1, N).
+*
+*  F       (input/ output) REAL array, dimension (LDF, N)
+*          On entry, F contains the right-hand-side of the second matrix
+*          equation in (1).
+*          On exit, if IJOB = 0, F has been overwritten by the
+*          solution L.
+*
+*  LDF     (input) INTEGER
+*          The leading dimension of the matrix F. LDF >= max(1, M).
+*
+*  SCALE   (output) REAL
+*          On exit, 0 <= SCALE <= 1. If 0 < SCALE < 1, the solutions
+*          R and L (C and F on entry) will hold the solutions to a
+*          slightly perturbed system but the input matrices A, B, D and
+*          E have not been changed. If SCALE = 0, R and L will hold the
+*          solutions to the homogeneous system with C = F = 0. Normally,
+*          SCALE = 1.
+*
+*  RDSUM   (input/output) REAL
+*          On entry, the sum of squares of computed contributions to
+*          the Dif-estimate under computation by STGSYL, where the
+*          scaling factor RDSCAL (see below) has been factored out.
+*          On exit, the corresponding sum of squares updated with the
+*          contributions from the current sub-system.
+*          If TRANS = 'T' RDSUM is not touched.
+*          NOTE: RDSUM only makes sense when STGSY2 is called by STGSYL.
+*
+*  RDSCAL  (input/output) REAL
+*          On entry, scaling factor used to prevent overflow in RDSUM.
+*          On exit, RDSCAL is updated w.r.t. the current contributions
+*          in RDSUM.
+*          If TRANS = 'T', RDSCAL is not touched.
+*          NOTE: RDSCAL only makes sense when STGSY2 is called by
+*                STGSYL.
+*
+*  IWORK   (workspace) INTEGER array, dimension (M+N+2)
+*
+*  PQ      (output) INTEGER
+*          On exit, the number of subsystems (of size 2-by-2, 4-by-4 and
+*          8-by-8) solved by this routine.
+*
+*  INFO    (output) INTEGER
+*          On exit, if INFO is set to
+*            =0: Successful exit
+*            <0: If INFO = -i, the i-th argument had an illegal value.
+*            >0: The matrix pairs (A, D) and (B, E) have common or very
+*                close eigenvalues.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Bo Kagstrom and Peter Poromaa, Department of Computing Science,
+*     Umea University, S-901 87 Umea, Sweden.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            LDZ
+      PARAMETER          ( LDZ = 8 )
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOTRAN
+      INTEGER            I, IE, IERR, II, IS, ISP1, J, JE, JJ, JS, JSP1,
+     $                   K, MB, NB, P, Q, ZDIM
+      REAL               ALPHA, SCALOC
+*     ..
+*     .. Local Arrays ..
+      INTEGER            IPIV( LDZ ), JPIV( LDZ )
+      REAL               RHS( LDZ ), Z( LDZ, LDZ )
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SAXPY, SCOPY, SGEMM, SGEMV, SGER, SGESC2,
+     $                   SGETC2, SSCAL, SLATDF, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode and test input parameters
+*
+      INFO = 0
+      IERR = 0
+      NOTRAN = LSAME( TRANS, 'N' )
+      IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN
+         INFO = -1
+      ELSE IF( ( IJOB.LT.0 ) .OR. ( IJOB.GT.2 ) ) THEN
+         INFO = -2
+      ELSE IF( M.LE.0 ) THEN
+         INFO = -3
+      ELSE IF( N.LE.0 ) THEN
+         INFO = -4
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -8
+      ELSE IF( LDC.LT.MAX( 1, M ) ) THEN
+         INFO = -10
+      ELSE IF( LDD.LT.MAX( 1, M ) ) THEN
+         INFO = -12
+      ELSE IF( LDE.LT.MAX( 1, N ) ) THEN
+         INFO = -14
+      ELSE IF( LDF.LT.MAX( 1, M ) ) THEN
+         INFO = -16
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'STGSY2', -INFO )
+         RETURN
+      END IF
+*
+*     Determine block structure of A
+*
+      PQ = 0
+      P = 0
+      I = 1
+   10 CONTINUE
+      IF( I.GT.M )
+     $   GO TO 20
+      P = P + 1
+      IWORK( P ) = I
+      IF( I.EQ.M )
+     $   GO TO 20
+      IF( A( I+1, I ).NE.ZERO ) THEN
+         I = I + 2
+      ELSE
+         I = I + 1
+      END IF
+      GO TO 10
+   20 CONTINUE
+      IWORK( P+1 ) = M + 1
+*
+*     Determine block structure of B
+*
+      Q = P + 1
+      J = 1
+   30 CONTINUE
+      IF( J.GT.N )
+     $   GO TO 40
+      Q = Q + 1
+      IWORK( Q ) = J
+      IF( J.EQ.N )
+     $   GO TO 40
+      IF( B( J+1, J ).NE.ZERO ) THEN
+         J = J + 2
+      ELSE
+         J = J + 1
+      END IF
+      GO TO 30
+   40 CONTINUE
+      IWORK( Q+1 ) = N + 1
+      PQ = P*( Q-P-1 )
+*
+      IF( NOTRAN ) THEN
+*
+*        Solve (I, J) - subsystem
+*           A(I, I) * R(I, J) - L(I, J) * B(J, J) = C(I, J)
+*           D(I, I) * R(I, J) - L(I, J) * E(J, J) = F(I, J)
+*        for I = P, P - 1, ..., 1; J = 1, 2, ..., Q
+*
+         SCALE = ONE
+         SCALOC = ONE
+         DO 120 J = P + 2, Q
+            JS = IWORK( J )
+            JSP1 = JS + 1
+            JE = IWORK( J+1 ) - 1
+            NB = JE - JS + 1
+            DO 110 I = P, 1, -1
+*
+               IS = IWORK( I )
+               ISP1 = IS + 1
+               IE = IWORK( I+1 ) - 1
+               MB = IE - IS + 1
+               ZDIM = MB*NB*2
+*
+               IF( ( MB.EQ.1 ) .AND. ( NB.EQ.1 ) ) THEN
+*
+*                 Build a 2-by-2 system Z * x = RHS
+*
+                  Z( 1, 1 ) = A( IS, IS )
+                  Z( 2, 1 ) = D( IS, IS )
+                  Z( 1, 2 ) = -B( JS, JS )
+                  Z( 2, 2 ) = -E( JS, JS )
+*
+*                 Set up right hand side(s)
+*
+                  RHS( 1 ) = C( IS, JS )
+                  RHS( 2 ) = F( IS, JS )
+*
+*                 Solve Z * x = RHS
+*
+                  CALL SGETC2( ZDIM, Z, LDZ, IPIV, JPIV, IERR )
+                  IF( IERR.GT.0 )
+     $               INFO = IERR
+*
+                  IF( IJOB.EQ.0 ) THEN
+                     CALL SGESC2( ZDIM, Z, LDZ, RHS, IPIV, JPIV,
+     $                            SCALOC )
+                     IF( SCALOC.NE.ONE ) THEN
+                        DO 50 K = 1, N
+                           CALL SSCAL( M, SCALOC, C( 1, K ), 1 )
+                           CALL SSCAL( M, SCALOC, F( 1, K ), 1 )
+   50                   CONTINUE
+                        SCALE = SCALE*SCALOC
+                     END IF
+                  ELSE
+                     CALL SLATDF( IJOB, ZDIM, Z, LDZ, RHS, RDSUM,
+     $                            RDSCAL, IPIV, JPIV )
+                  END IF
+*
+*                 Unpack solution vector(s)
+*
+                  C( IS, JS ) = RHS( 1 )
+                  F( IS, JS ) = RHS( 2 )
+*
+*                 Substitute R(I, J) and L(I, J) into remaining
+*                 equation.
+*
+                  IF( I.GT.1 ) THEN
+                     ALPHA = -RHS( 1 )
+                     CALL SAXPY( IS-1, ALPHA, A( 1, IS ), 1, C( 1, JS ),
+     $                           1 )
+                     CALL SAXPY( IS-1, ALPHA, D( 1, IS ), 1, F( 1, JS ),
+     $                           1 )
+                  END IF
+                  IF( J.LT.Q ) THEN
+                     CALL SAXPY( N-JE, RHS( 2 ), B( JS, JE+1 ), LDB,
+     $                           C( IS, JE+1 ), LDC )
+                     CALL SAXPY( N-JE, RHS( 2 ), E( JS, JE+1 ), LDE,
+     $                           F( IS, JE+1 ), LDF )
+                  END IF
+*
+               ELSE IF( ( MB.EQ.1 ) .AND. ( NB.EQ.2 ) ) THEN
+*
+*                 Build a 4-by-4 system Z * x = RHS
+*
+                  Z( 1, 1 ) = A( IS, IS )
+                  Z( 2, 1 ) = ZERO
+                  Z( 3, 1 ) = D( IS, IS )
+                  Z( 4, 1 ) = ZERO
+*
+                  Z( 1, 2 ) = ZERO
+                  Z( 2, 2 ) = A( IS, IS )
+                  Z( 3, 2 ) = ZERO
+                  Z( 4, 2 ) = D( IS, IS )
+*
+                  Z( 1, 3 ) = -B( JS, JS )
+                  Z( 2, 3 ) = -B( JS, JSP1 )
+                  Z( 3, 3 ) = -E( JS, JS )
+                  Z( 4, 3 ) = -E( JS, JSP1 )
+*
+                  Z( 1, 4 ) = -B( JSP1, JS )
+                  Z( 2, 4 ) = -B( JSP1, JSP1 )
+                  Z( 3, 4 ) = ZERO
+                  Z( 4, 4 ) = -E( JSP1, JSP1 )
+*
+*                 Set up right hand side(s)
+*
+                  RHS( 1 ) = C( IS, JS )
+                  RHS( 2 ) = C( IS, JSP1 )
+                  RHS( 3 ) = F( IS, JS )
+                  RHS( 4 ) = F( IS, JSP1 )
+*
+*                 Solve Z * x = RHS
+*
+                  CALL SGETC2( ZDIM, Z, LDZ, IPIV, JPIV, IERR )
+                  IF( IERR.GT.0 )
+     $               INFO = IERR
+*
+                  IF( IJOB.EQ.0 ) THEN
+                     CALL SGESC2( ZDIM, Z, LDZ, RHS, IPIV, JPIV,
+     $                            SCALOC )
+                     IF( SCALOC.NE.ONE ) THEN
+                        DO 60 K = 1, N
+                           CALL SSCAL( M, SCALOC, C( 1, K ), 1 )
+                           CALL SSCAL( M, SCALOC, F( 1, K ), 1 )
+   60                   CONTINUE
+                        SCALE = SCALE*SCALOC
+                     END IF
+                  ELSE
+                     CALL SLATDF( IJOB, ZDIM, Z, LDZ, RHS, RDSUM,
+     $                            RDSCAL, IPIV, JPIV )
+                  END IF
+*
+*                 Unpack solution vector(s)
+*
+                  C( IS, JS ) = RHS( 1 )
+                  C( IS, JSP1 ) = RHS( 2 )
+                  F( IS, JS ) = RHS( 3 )
+                  F( IS, JSP1 ) = RHS( 4 )
+*
+*                 Substitute R(I, J) and L(I, J) into remaining
+*                 equation.
+*
+                  IF( I.GT.1 ) THEN
+                     CALL SGER( IS-1, NB, -ONE, A( 1, IS ), 1, RHS( 1 ),
+     $                          1, C( 1, JS ), LDC )
+                     CALL SGER( IS-1, NB, -ONE, D( 1, IS ), 1, RHS( 1 ),
+     $                          1, F( 1, JS ), LDF )
+                  END IF
+                  IF( J.LT.Q ) THEN
+                     CALL SAXPY( N-JE, RHS( 3 ), B( JS, JE+1 ), LDB,
+     $                           C( IS, JE+1 ), LDC )
+                     CALL SAXPY( N-JE, RHS( 3 ), E( JS, JE+1 ), LDE,
+     $                           F( IS, JE+1 ), LDF )
+                     CALL SAXPY( N-JE, RHS( 4 ), B( JSP1, JE+1 ), LDB,
+     $                           C( IS, JE+1 ), LDC )
+                     CALL SAXPY( N-JE, RHS( 4 ), E( JSP1, JE+1 ), LDE,
+     $                           F( IS, JE+1 ), LDF )
+                  END IF
+*
+               ELSE IF( ( MB.EQ.2 ) .AND. ( NB.EQ.1 ) ) THEN
+*
+*                 Build a 4-by-4 system Z * x = RHS
+*
+                  Z( 1, 1 ) = A( IS, IS )
+                  Z( 2, 1 ) = A( ISP1, IS )
+                  Z( 3, 1 ) = D( IS, IS )
+                  Z( 4, 1 ) = ZERO
+*
+                  Z( 1, 2 ) = A( IS, ISP1 )
+                  Z( 2, 2 ) = A( ISP1, ISP1 )
+                  Z( 3, 2 ) = D( IS, ISP1 )
+                  Z( 4, 2 ) = D( ISP1, ISP1 )
+*
+                  Z( 1, 3 ) = -B( JS, JS )
+                  Z( 2, 3 ) = ZERO
+                  Z( 3, 3 ) = -E( JS, JS )
+                  Z( 4, 3 ) = ZERO
+*
+                  Z( 1, 4 ) = ZERO
+                  Z( 2, 4 ) = -B( JS, JS )
+                  Z( 3, 4 ) = ZERO
+                  Z( 4, 4 ) = -E( JS, JS )
+*
+*                 Set up right hand side(s)
+*
+                  RHS( 1 ) = C( IS, JS )
+                  RHS( 2 ) = C( ISP1, JS )
+                  RHS( 3 ) = F( IS, JS )
+                  RHS( 4 ) = F( ISP1, JS )
+*
+*                 Solve Z * x = RHS
+*
+                  CALL SGETC2( ZDIM, Z, LDZ, IPIV, JPIV, IERR )
+                  IF( IERR.GT.0 )
+     $               INFO = IERR
+                  IF( IJOB.EQ.0 ) THEN
+                     CALL SGESC2( ZDIM, Z, LDZ, RHS, IPIV, JPIV,
+     $                            SCALOC )
+                     IF( SCALOC.NE.ONE ) THEN
+                        DO 70 K = 1, N
+                           CALL SSCAL( M, SCALOC, C( 1, K ), 1 )
+                           CALL SSCAL( M, SCALOC, F( 1, K ), 1 )
+   70                   CONTINUE
+                        SCALE = SCALE*SCALOC
+                     END IF
+                  ELSE
+                     CALL SLATDF( IJOB, ZDIM, Z, LDZ, RHS, RDSUM,
+     $                            RDSCAL, IPIV, JPIV )
+                  END IF
+*
+*                 Unpack solution vector(s)
+*
+                  C( IS, JS ) = RHS( 1 )
+                  C( ISP1, JS ) = RHS( 2 )
+                  F( IS, JS ) = RHS( 3 )
+                  F( ISP1, JS ) = RHS( 4 )
+*
+*                 Substitute R(I, J) and L(I, J) into remaining
+*                 equation.
+*
+                  IF( I.GT.1 ) THEN
+                     CALL SGEMV( 'N', IS-1, MB, -ONE, A( 1, IS ), LDA,
+     $                           RHS( 1 ), 1, ONE, C( 1, JS ), 1 )
+                     CALL SGEMV( 'N', IS-1, MB, -ONE, D( 1, IS ), LDD,
+     $                           RHS( 1 ), 1, ONE, F( 1, JS ), 1 )
+                  END IF
+                  IF( J.LT.Q ) THEN
+                     CALL SGER( MB, N-JE, ONE, RHS( 3 ), 1,
+     $                          B( JS, JE+1 ), LDB, C( IS, JE+1 ), LDC )
+                     CALL SGER( MB, N-JE, ONE, RHS( 3 ), 1,
+     $                          E( JS, JE+1 ), LDB, F( IS, JE+1 ), LDC )
+                  END IF
+*
+               ELSE IF( ( MB.EQ.2 ) .AND. ( NB.EQ.2 ) ) THEN
+*
+*                 Build an 8-by-8 system Z * x = RHS
+*
+                  CALL SCOPY( LDZ*LDZ, ZERO, 0, Z, 1 )
+*
+                  Z( 1, 1 ) = A( IS, IS )
+                  Z( 2, 1 ) = A( ISP1, IS )
+                  Z( 5, 1 ) = D( IS, IS )
+*
+                  Z( 1, 2 ) = A( IS, ISP1 )
+                  Z( 2, 2 ) = A( ISP1, ISP1 )
+                  Z( 5, 2 ) = D( IS, ISP1 )
+                  Z( 6, 2 ) = D( ISP1, ISP1 )
+*
+                  Z( 3, 3 ) = A( IS, IS )
+                  Z( 4, 3 ) = A( ISP1, IS )
+                  Z( 7, 3 ) = D( IS, IS )
+*
+                  Z( 3, 4 ) = A( IS, ISP1 )
+                  Z( 4, 4 ) = A( ISP1, ISP1 )
+                  Z( 7, 4 ) = D( IS, ISP1 )
+                  Z( 8, 4 ) = D( ISP1, ISP1 )
+*
+                  Z( 1, 5 ) = -B( JS, JS )
+                  Z( 3, 5 ) = -B( JS, JSP1 )
+                  Z( 5, 5 ) = -E( JS, JS )
+                  Z( 7, 5 ) = -E( JS, JSP1 )
+*
+                  Z( 2, 6 ) = -B( JS, JS )
+                  Z( 4, 6 ) = -B( JS, JSP1 )
+                  Z( 6, 6 ) = -E( JS, JS )
+                  Z( 8, 6 ) = -E( JS, JSP1 )
+*
+                  Z( 1, 7 ) = -B( JSP1, JS )
+                  Z( 3, 7 ) = -B( JSP1, JSP1 )
+                  Z( 7, 7 ) = -E( JSP1, JSP1 )
+*
+                  Z( 2, 8 ) = -B( JSP1, JS )
+                  Z( 4, 8 ) = -B( JSP1, JSP1 )
+                  Z( 8, 8 ) = -E( JSP1, JSP1 )
+*
+*                 Set up right hand side(s)
+*
+                  K = 1
+                  II = MB*NB + 1
+                  DO 80 JJ = 0, NB - 1
+                     CALL SCOPY( MB, C( IS, JS+JJ ), 1, RHS( K ), 1 )
+                     CALL SCOPY( MB, F( IS, JS+JJ ), 1, RHS( II ), 1 )
+                     K = K + MB
+                     II = II + MB
+   80             CONTINUE
+*
+*                 Solve Z * x = RHS
+*
+                  CALL SGETC2( ZDIM, Z, LDZ, IPIV, JPIV, IERR )
+                  IF( IERR.GT.0 )
+     $               INFO = IERR
+                  IF( IJOB.EQ.0 ) THEN
+                     CALL SGESC2( ZDIM, Z, LDZ, RHS, IPIV, JPIV,
+     $                            SCALOC )
+                     IF( SCALOC.NE.ONE ) THEN
+                        DO 90 K = 1, N
+                           CALL SSCAL( M, SCALOC, C( 1, K ), 1 )
+                           CALL SSCAL( M, SCALOC, F( 1, K ), 1 )
+   90                   CONTINUE
+                        SCALE = SCALE*SCALOC
+                     END IF
+                  ELSE
+                     CALL SLATDF( IJOB, ZDIM, Z, LDZ, RHS, RDSUM,
+     $                            RDSCAL, IPIV, JPIV )
+                  END IF
+*
+*                 Unpack solution vector(s)
+*
+                  K = 1
+                  II = MB*NB + 1
+                  DO 100 JJ = 0, NB - 1
+                     CALL SCOPY( MB, RHS( K ), 1, C( IS, JS+JJ ), 1 )
+                     CALL SCOPY( MB, RHS( II ), 1, F( IS, JS+JJ ), 1 )
+                     K = K + MB
+                     II = II + MB
+  100             CONTINUE
+*
+*                 Substitute R(I, J) and L(I, J) into remaining
+*                 equation.
+*
+                  IF( I.GT.1 ) THEN
+                     CALL SGEMM( 'N', 'N', IS-1, NB, MB, -ONE,
+     $                           A( 1, IS ), LDA, RHS( 1 ), MB, ONE,
+     $                           C( 1, JS ), LDC )
+                     CALL SGEMM( 'N', 'N', IS-1, NB, MB, -ONE,
+     $                           D( 1, IS ), LDD, RHS( 1 ), MB, ONE,
+     $                           F( 1, JS ), LDF )
+                  END IF
+                  IF( J.LT.Q ) THEN
+                     K = MB*NB + 1
+                     CALL SGEMM( 'N', 'N', MB, N-JE, NB, ONE, RHS( K ),
+     $                           MB, B( JS, JE+1 ), LDB, ONE,
+     $                           C( IS, JE+1 ), LDC )
+                     CALL SGEMM( 'N', 'N', MB, N-JE, NB, ONE, RHS( K ),
+     $                           MB, E( JS, JE+1 ), LDE, ONE,
+     $                           F( IS, JE+1 ), LDF )
+                  END IF
+*
+               END IF
+*
+  110       CONTINUE
+  120    CONTINUE
+      ELSE
+*
+*        Solve (I, J) - subsystem
+*             A(I, I)' * R(I, J) + D(I, I)' * L(J, J)  =  C(I, J)
+*             R(I, I)  * B(J, J) + L(I, J)  * E(J, J)  = -F(I, J)
+*        for I = 1, 2, ..., P, J = Q, Q - 1, ..., 1
+*
+         SCALE = ONE
+         SCALOC = ONE
+         DO 200 I = 1, P
+*
+            IS = IWORK( I )
+            ISP1 = IS + 1
+            IE = IWORK( I+1 ) - 1
+            MB = IE - IS + 1
+            DO 190 J = Q, P + 2, -1
+*
+               JS = IWORK( J )
+               JSP1 = JS + 1
+               JE = IWORK( J+1 ) - 1
+               NB = JE - JS + 1
+               ZDIM = MB*NB*2
+               IF( ( MB.EQ.1 ) .AND. ( NB.EQ.1 ) ) THEN
+*
+*                 Build a 2-by-2 system Z' * x = RHS
+*
+                  Z( 1, 1 ) = A( IS, IS )
+                  Z( 2, 1 ) = -B( JS, JS )
+                  Z( 1, 2 ) = D( IS, IS )
+                  Z( 2, 2 ) = -E( JS, JS )
+*
+*                 Set up right hand side(s)
+*
+                  RHS( 1 ) = C( IS, JS )
+                  RHS( 2 ) = F( IS, JS )
+*
+*                 Solve Z' * x = RHS
+*
+                  CALL SGETC2( ZDIM, Z, LDZ, IPIV, JPIV, IERR )
+                  IF( IERR.GT.0 )
+     $               INFO = IERR
+*
+                  CALL SGESC2( ZDIM, Z, LDZ, RHS, IPIV, JPIV, SCALOC )
+                  IF( SCALOC.NE.ONE ) THEN
+                     DO 130 K = 1, N
+                        CALL SSCAL( M, SCALOC, C( 1, K ), 1 )
+                        CALL SSCAL( M, SCALOC, F( 1, K ), 1 )
+  130                CONTINUE
+                     SCALE = SCALE*SCALOC
+                  END IF
+*
+*                 Unpack solution vector(s)
+*
+                  C( IS, JS ) = RHS( 1 )
+                  F( IS, JS ) = RHS( 2 )
+*
+*                 Substitute R(I, J) and L(I, J) into remaining
+*                 equation.
+*
+                  IF( J.GT.P+2 ) THEN
+                     ALPHA = RHS( 1 )
+                     CALL SAXPY( JS-1, ALPHA, B( 1, JS ), 1, F( IS, 1 ),
+     $                           LDF )
+                     ALPHA = RHS( 2 )
+                     CALL SAXPY( JS-1, ALPHA, E( 1, JS ), 1, F( IS, 1 ),
+     $                           LDF )
+                  END IF
+                  IF( I.LT.P ) THEN
+                     ALPHA = -RHS( 1 )
+                     CALL SAXPY( M-IE, ALPHA, A( IS, IE+1 ), LDA,
+     $                           C( IE+1, JS ), 1 )
+                     ALPHA = -RHS( 2 )
+                     CALL SAXPY( M-IE, ALPHA, D( IS, IE+1 ), LDD,
+     $                           C( IE+1, JS ), 1 )
+                  END IF
+*
+               ELSE IF( ( MB.EQ.1 ) .AND. ( NB.EQ.2 ) ) THEN
+*
+*                 Build a 4-by-4 system Z' * x = RHS
+*
+                  Z( 1, 1 ) = A( IS, IS )
+                  Z( 2, 1 ) = ZERO
+                  Z( 3, 1 ) = -B( JS, JS )
+                  Z( 4, 1 ) = -B( JSP1, JS )
+*
+                  Z( 1, 2 ) = ZERO
+                  Z( 2, 2 ) = A( IS, IS )
+                  Z( 3, 2 ) = -B( JS, JSP1 )
+                  Z( 4, 2 ) = -B( JSP1, JSP1 )
+*
+                  Z( 1, 3 ) = D( IS, IS )
+                  Z( 2, 3 ) = ZERO
+                  Z( 3, 3 ) = -E( JS, JS )
+                  Z( 4, 3 ) = ZERO
+*
+                  Z( 1, 4 ) = ZERO
+                  Z( 2, 4 ) = D( IS, IS )
+                  Z( 3, 4 ) = -E( JS, JSP1 )
+                  Z( 4, 4 ) = -E( JSP1, JSP1 )
+*
+*                 Set up right hand side(s)
+*
+                  RHS( 1 ) = C( IS, JS )
+                  RHS( 2 ) = C( IS, JSP1 )
+                  RHS( 3 ) = F( IS, JS )
+                  RHS( 4 ) = F( IS, JSP1 )
+*
+*                 Solve Z' * x = RHS
+*
+                  CALL SGETC2( ZDIM, Z, LDZ, IPIV, JPIV, IERR )
+                  IF( IERR.GT.0 )
+     $               INFO = IERR
+                  CALL SGESC2( ZDIM, Z, LDZ, RHS, IPIV, JPIV, SCALOC )
+                  IF( SCALOC.NE.ONE ) THEN
+                     DO 140 K = 1, N
+                        CALL SSCAL( M, SCALOC, C( 1, K ), 1 )
+                        CALL SSCAL( M, SCALOC, F( 1, K ), 1 )
+  140                CONTINUE
+                     SCALE = SCALE*SCALOC
+                  END IF
+*
+*                 Unpack solution vector(s)
+*
+                  C( IS, JS ) = RHS( 1 )
+                  C( IS, JSP1 ) = RHS( 2 )
+                  F( IS, JS ) = RHS( 3 )
+                  F( IS, JSP1 ) = RHS( 4 )
+*
+*                 Substitute R(I, J) and L(I, J) into remaining
+*                 equation.
+*
+                  IF( J.GT.P+2 ) THEN
+                     CALL SAXPY( JS-1, RHS( 1 ), B( 1, JS ), 1,
+     $                           F( IS, 1 ), LDF )
+                     CALL SAXPY( JS-1, RHS( 2 ), B( 1, JSP1 ), 1,
+     $                           F( IS, 1 ), LDF )
+                     CALL SAXPY( JS-1, RHS( 3 ), E( 1, JS ), 1,
+     $                           F( IS, 1 ), LDF )
+                     CALL SAXPY( JS-1, RHS( 4 ), E( 1, JSP1 ), 1,
+     $                           F( IS, 1 ), LDF )
+                  END IF
+                  IF( I.LT.P ) THEN
+                     CALL SGER( M-IE, NB, -ONE, A( IS, IE+1 ), LDA,
+     $                          RHS( 1 ), 1, C( IE+1, JS ), LDC )
+                     CALL SGER( M-IE, NB, -ONE, D( IS, IE+1 ), LDD,
+     $                          RHS( 3 ), 1, C( IE+1, JS ), LDC )
+                  END IF
+*
+               ELSE IF( ( MB.EQ.2 ) .AND. ( NB.EQ.1 ) ) THEN
+*
+*                 Build a 4-by-4 system Z' * x = RHS
+*
+                  Z( 1, 1 ) = A( IS, IS )
+                  Z( 2, 1 ) = A( IS, ISP1 )
+                  Z( 3, 1 ) = -B( JS, JS )
+                  Z( 4, 1 ) = ZERO
+*
+                  Z( 1, 2 ) = A( ISP1, IS )
+                  Z( 2, 2 ) = A( ISP1, ISP1 )
+                  Z( 3, 2 ) = ZERO
+                  Z( 4, 2 ) = -B( JS, JS )
+*
+                  Z( 1, 3 ) = D( IS, IS )
+                  Z( 2, 3 ) = D( IS, ISP1 )
+                  Z( 3, 3 ) = -E( JS, JS )
+                  Z( 4, 3 ) = ZERO
+*
+                  Z( 1, 4 ) = ZERO
+                  Z( 2, 4 ) = D( ISP1, ISP1 )
+                  Z( 3, 4 ) = ZERO
+                  Z( 4, 4 ) = -E( JS, JS )
+*
+*                 Set up right hand side(s)
+*
+                  RHS( 1 ) = C( IS, JS )
+                  RHS( 2 ) = C( ISP1, JS )
+                  RHS( 3 ) = F( IS, JS )
+                  RHS( 4 ) = F( ISP1, JS )
+*
+*                 Solve Z' * x = RHS
+*
+                  CALL SGETC2( ZDIM, Z, LDZ, IPIV, JPIV, IERR )
+                  IF( IERR.GT.0 )
+     $               INFO = IERR
+*
+                  CALL SGESC2( ZDIM, Z, LDZ, RHS, IPIV, JPIV, SCALOC )
+                  IF( SCALOC.NE.ONE ) THEN
+                     DO 150 K = 1, N
+                        CALL SSCAL( M, SCALOC, C( 1, K ), 1 )
+                        CALL SSCAL( M, SCALOC, F( 1, K ), 1 )
+  150                CONTINUE
+                     SCALE = SCALE*SCALOC
+                  END IF
+*
+*                 Unpack solution vector(s)
+*
+                  C( IS, JS ) = RHS( 1 )
+                  C( ISP1, JS ) = RHS( 2 )
+                  F( IS, JS ) = RHS( 3 )
+                  F( ISP1, JS ) = RHS( 4 )
+*
+*                 Substitute R(I, J) and L(I, J) into remaining
+*                 equation.
+*
+                  IF( J.GT.P+2 ) THEN
+                     CALL SGER( MB, JS-1, ONE, RHS( 1 ), 1, B( 1, JS ),
+     $                          1, F( IS, 1 ), LDF )
+                     CALL SGER( MB, JS-1, ONE, RHS( 3 ), 1, E( 1, JS ),
+     $                          1, F( IS, 1 ), LDF )
+                  END IF
+                  IF( I.LT.P ) THEN
+                     CALL SGEMV( 'T', MB, M-IE, -ONE, A( IS, IE+1 ),
+     $                           LDA, RHS( 1 ), 1, ONE, C( IE+1, JS ),
+     $                           1 )
+                     CALL SGEMV( 'T', MB, M-IE, -ONE, D( IS, IE+1 ),
+     $                           LDD, RHS( 3 ), 1, ONE, C( IE+1, JS ),
+     $                           1 )
+                  END IF
+*
+               ELSE IF( ( MB.EQ.2 ) .AND. ( NB.EQ.2 ) ) THEN
+*
+*                 Build an 8-by-8 system Z' * x = RHS
+*
+                  CALL SCOPY( LDZ*LDZ, ZERO, 0, Z, 1 )
+*
+                  Z( 1, 1 ) = A( IS, IS )
+                  Z( 2, 1 ) = A( IS, ISP1 )
+                  Z( 5, 1 ) = -B( JS, JS )
+                  Z( 7, 1 ) = -B( JSP1, JS )
+*
+                  Z( 1, 2 ) = A( ISP1, IS )
+                  Z( 2, 2 ) = A( ISP1, ISP1 )
+                  Z( 6, 2 ) = -B( JS, JS )
+                  Z( 8, 2 ) = -B( JSP1, JS )
+*
+                  Z( 3, 3 ) = A( IS, IS )
+                  Z( 4, 3 ) = A( IS, ISP1 )
+                  Z( 5, 3 ) = -B( JS, JSP1 )
+                  Z( 7, 3 ) = -B( JSP1, JSP1 )
+*
+                  Z( 3, 4 ) = A( ISP1, IS )
+                  Z( 4, 4 ) = A( ISP1, ISP1 )
+                  Z( 6, 4 ) = -B( JS, JSP1 )
+                  Z( 8, 4 ) = -B( JSP1, JSP1 )
+*
+                  Z( 1, 5 ) = D( IS, IS )
+                  Z( 2, 5 ) = D( IS, ISP1 )
+                  Z( 5, 5 ) = -E( JS, JS )
+*
+                  Z( 2, 6 ) = D( ISP1, ISP1 )
+                  Z( 6, 6 ) = -E( JS, JS )
+*
+                  Z( 3, 7 ) = D( IS, IS )
+                  Z( 4, 7 ) = D( IS, ISP1 )
+                  Z( 5, 7 ) = -E( JS, JSP1 )
+                  Z( 7, 7 ) = -E( JSP1, JSP1 )
+*
+                  Z( 4, 8 ) = D( ISP1, ISP1 )
+                  Z( 6, 8 ) = -E( JS, JSP1 )
+                  Z( 8, 8 ) = -E( JSP1, JSP1 )
+*
+*                 Set up right hand side(s)
+*
+                  K = 1
+                  II = MB*NB + 1
+                  DO 160 JJ = 0, NB - 1
+                     CALL SCOPY( MB, C( IS, JS+JJ ), 1, RHS( K ), 1 )
+                     CALL SCOPY( MB, F( IS, JS+JJ ), 1, RHS( II ), 1 )
+                     K = K + MB
+                     II = II + MB
+  160             CONTINUE
+*
+*
+*                 Solve Z' * x = RHS
+*
+                  CALL SGETC2( ZDIM, Z, LDZ, IPIV, JPIV, IERR )
+                  IF( IERR.GT.0 )
+     $               INFO = IERR
+*
+                  CALL SGESC2( ZDIM, Z, LDZ, RHS, IPIV, JPIV, SCALOC )
+                  IF( SCALOC.NE.ONE ) THEN
+                     DO 170 K = 1, N
+                        CALL SSCAL( M, SCALOC, C( 1, K ), 1 )
+                        CALL SSCAL( M, SCALOC, F( 1, K ), 1 )
+  170                CONTINUE
+                     SCALE = SCALE*SCALOC
+                  END IF
+*
+*                 Unpack solution vector(s)
+*
+                  K = 1
+                  II = MB*NB + 1
+                  DO 180 JJ = 0, NB - 1
+                     CALL SCOPY( MB, RHS( K ), 1, C( IS, JS+JJ ), 1 )
+                     CALL SCOPY( MB, RHS( II ), 1, F( IS, JS+JJ ), 1 )
+                     K = K + MB
+                     II = II + MB
+  180             CONTINUE
+*
+*                 Substitute R(I, J) and L(I, J) into remaining
+*                 equation.
+*
+                  IF( J.GT.P+2 ) THEN
+                     CALL SGEMM( 'N', 'T', MB, JS-1, NB, ONE,
+     $                           C( IS, JS ), LDC, B( 1, JS ), LDB, ONE,
+     $                           F( IS, 1 ), LDF )
+                     CALL SGEMM( 'N', 'T', MB, JS-1, NB, ONE,
+     $                           F( IS, JS ), LDF, E( 1, JS ), LDE, ONE,
+     $                           F( IS, 1 ), LDF )
+                  END IF
+                  IF( I.LT.P ) THEN
+                     CALL SGEMM( 'T', 'N', M-IE, NB, MB, -ONE,
+     $                           A( IS, IE+1 ), LDA, C( IS, JS ), LDC,
+     $                           ONE, C( IE+1, JS ), LDC )
+                     CALL SGEMM( 'T', 'N', M-IE, NB, MB, -ONE,
+     $                           D( IS, IE+1 ), LDD, F( IS, JS ), LDF,
+     $                           ONE, C( IE+1, JS ), LDC )
+                  END IF
+*
+               END IF
+*
+  190       CONTINUE
+  200    CONTINUE
+*
+      END IF
+      RETURN
+*
+*     End of STGSY2
+*
+      END
diff --git a/costa/native/external/lapack/stgsyl.f b/costa/native/external/lapack/stgsyl.f
new file mode 100644
index 000000000..d87f6b548
--- /dev/null
+++ b/costa/native/external/lapack/stgsyl.f
@@ -0,0 +1,534 @@
+      SUBROUTINE STGSYL( TRANS, IJOB, M, N, A, LDA, B, LDB, C, LDC, D,
+     $                   LDD, E, LDE, F, LDF, SCALE, DIF, WORK, LWORK,
+     $                   IWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          TRANS
+      INTEGER            IJOB, INFO, LDA, LDB, LDC, LDD, LDE, LDF,
+     $                   LWORK, M, N
+      REAL               DIF, SCALE
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      REAL               A( LDA, * ), B( LDB, * ), C( LDC, * ),
+     $                   D( LDD, * ), E( LDE, * ), F( LDF, * ),
+     $                   WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  STGSYL solves the generalized Sylvester equation:
+*
+*              A * R - L * B = scale * C                 (1)
+*              D * R - L * E = scale * F
+*
+*  where R and L are unknown m-by-n matrices, (A, D), (B, E) and
+*  (C, F) are given matrix pairs of size m-by-m, n-by-n and m-by-n,
+*  respectively, with real entries. (A, D) and (B, E) must be in
+*  generalized (real) Schur canonical form, i.e. A, B are upper quasi
+*  triangular and D, E are upper triangular.
+*
+*  The solution (R, L) overwrites (C, F). 0 <= SCALE <= 1 is an output
+*  scaling factor chosen to avoid overflow.
+*
+*  In matrix notation (1) is equivalent to solve  Zx = scale b, where
+*  Z is defined as
+*
+*             Z = [ kron(In, A)  -kron(B', Im) ]         (2)
+*                 [ kron(In, D)  -kron(E', Im) ].
+*
+*  Here Ik is the identity matrix of size k and X' is the transpose of
+*  X. kron(X, Y) is the Kronecker product between the matrices X and Y.
+*
+*  If TRANS = 'T', STGSYL solves the transposed system Z'*y = scale*b,
+*  which is equivalent to solve for R and L in
+*
+*              A' * R  + D' * L   = scale *  C           (3)
+*              R  * B' + L  * E'  = scale * (-F)
+*
+*  This case (TRANS = 'T') is used to compute an one-norm-based estimate
+*  of Dif[(A,D), (B,E)], the separation between the matrix pairs (A,D)
+*  and (B,E), using SLACON.
+*
+*  If IJOB >= 1, STGSYL computes a Frobenius norm-based estimate
+*  of Dif[(A,D),(B,E)]. That is, the reciprocal of a lower bound on the
+*  reciprocal of the smallest singular value of Z. See [1-2] for more
+*  information.
+*
+*  This is a level 3 BLAS algorithm.
+*
+*  Arguments
+*  =========
+*
+*  TRANS   (input) CHARACTER*1
+*          = 'N', solve the generalized Sylvester equation (1).
+*          = 'T', solve the 'transposed' system (3).
+*
+*  IJOB    (input) INTEGER
+*          Specifies what kind of functionality to be performed.
+*           =0: solve (1) only.
+*           =1: The functionality of 0 and 3.
+*           =2: The functionality of 0 and 4.
+*           =3: Only an estimate of Dif[(A,D), (B,E)] is computed.
+*               (look ahead strategy IJOB  = 1 is used).
+*           =4: Only an estimate of Dif[(A,D), (B,E)] is computed.
+*               ( SGECON on sub-systems is used ).
+*          Not referenced if TRANS = 'T'.
+*
+*  M       (input) INTEGER
+*          The order of the matrices A and D, and the row dimension of
+*          the matrices C, F, R and L.
+*
+*  N       (input) INTEGER
+*          The order of the matrices B and E, and the column dimension
+*          of the matrices C, F, R and L.
+*
+*  A       (input) REAL array, dimension (LDA, M)
+*          The upper quasi triangular matrix A.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1, M).
+*
+*  B       (input) REAL array, dimension (LDB, N)
+*          The upper quasi triangular matrix B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B. LDB >= max(1, N).
+*
+*  C       (input/output) REAL array, dimension (LDC, N)
+*          On entry, C contains the right-hand-side of the first matrix
+*          equation in (1) or (3).
+*          On exit, if IJOB = 0, 1 or 2, C has been overwritten by
+*          the solution R. If IJOB = 3 or 4 and TRANS = 'N', C holds R,
+*          the solution achieved during the computation of the
+*          Dif-estimate.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C. LDC >= max(1, M).
+*
+*  D       (input) REAL array, dimension (LDD, M)
+*          The upper triangular matrix D.
+*
+*  LDD     (input) INTEGER
+*          The leading dimension of the array D. LDD >= max(1, M).
+*
+*  E       (input) REAL array, dimension (LDE, N)
+*          The upper triangular matrix E.
+*
+*  LDE     (input) INTEGER
+*          The leading dimension of the array E. LDE >= max(1, N).
+*
+*  F       (input/output) REAL array, dimension (LDF, N)
+*          On entry, F contains the right-hand-side of the second matrix
+*          equation in (1) or (3).
+*          On exit, if IJOB = 0, 1 or 2, F has been overwritten by
+*          the solution L. If IJOB = 3 or 4 and TRANS = 'N', F holds L,
+*          the solution achieved during the computation of the
+*          Dif-estimate.
+*
+*  LDF     (input) INTEGER
+*          The leading dimension of the array F. LDF >= max(1, M).
+*
+*  DIF     (output) REAL
+*          On exit DIF is the reciprocal of a lower bound of the
+*          reciprocal of the Dif-function, i.e. DIF is an upper bound of
+*          Dif[(A,D), (B,E)] = sigma_min(Z), where Z as in (2).
+*          IF IJOB = 0 or TRANS = 'T', DIF is not touched.
+*
+*  SCALE   (output) REAL
+*          On exit SCALE is the scaling factor in (1) or (3).
+*          If 0 < SCALE < 1, C and F hold the solutions R and L, resp.,
+*          to a slightly perturbed system but the input matrices A, B, D
+*          and E have not been changed. If SCALE = 0, C and F hold the
+*          solutions R and L, respectively, to the homogeneous system
+*          with C = F = 0. Normally, SCALE = 1.
+*
+*  WORK    (workspace/output) REAL array, dimension (LWORK)
+*          If IJOB = 0, WORK is not referenced.  Otherwise,
+*          on exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK. LWORK > = 1.
+*          If IJOB = 1 or 2 and TRANS = 'N', LWORK >= 2*M*N.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  IWORK   (workspace) INTEGER array, dimension (M+N+6)
+*
+*  INFO    (output) INTEGER
+*            =0: successful exit
+*            <0: If INFO = -i, the i-th argument had an illegal value.
+*            >0: (A, D) and (B, E) have common or close eigenvalues.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Bo Kagstrom and Peter Poromaa, Department of Computing Science,
+*     Umea University, S-901 87 Umea, Sweden.
+*
+*  [1] B. Kagstrom and P. Poromaa, LAPACK-Style Algorithms and Software
+*      for Solving the Generalized Sylvester Equation and Estimating the
+*      Separation between Regular Matrix Pairs, Report UMINF - 93.23,
+*      Department of Computing Science, Umea University, S-901 87 Umea,
+*      Sweden, December 1993, Revised April 1994, Also as LAPACK Working
+*      Note 75.  To appear in ACM Trans. on Math. Software, Vol 22,
+*      No 1, 1996.
+*
+*  [2] B. Kagstrom, A Perturbation Analysis of the Generalized Sylvester
+*      Equation (AR - LB, DR - LE ) = (C, F), SIAM J. Matrix Anal.
+*      Appl., 15(4):1045-1060, 1994
+*
+*  [3] B. Kagstrom and L. Westin, Generalized Schur Methods with
+*      Condition Estimators for Solving the Generalized Sylvester
+*      Equation, IEEE Transactions on Automatic Control, Vol. 34, No. 7,
+*      July 1989, pp 745-751.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY, NOTRAN
+      INTEGER            I, IE, IFUNC, IROUND, IS, ISOLVE, J, JE, JS, K,
+     $                   LINFO, LWMIN, MB, NB, P, PPQQ, PQ, Q
+      REAL               DSCALE, DSUM, SCALE2, SCALOC
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           LSAME, ILAENV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SCOPY, SGEMM, SLACPY, SSCAL, STGSY2, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, REAL, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode and test input parameters
+*
+      INFO = 0
+      NOTRAN = LSAME( TRANS, 'N' )
+      LQUERY = ( LWORK.EQ.-1 )
+*
+      IF( ( IJOB.EQ.1 .OR. IJOB.EQ.2 ) .AND. NOTRAN ) THEN
+         LWMIN = MAX( 1, 2*M*N )
+      ELSE
+         LWMIN = 1
+      END IF
+*
+      IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) ) THEN
+         INFO = -1
+      ELSE IF( ( IJOB.LT.0 ) .OR. ( IJOB.GT.4 ) ) THEN
+         INFO = -2
+      ELSE IF( M.LE.0 ) THEN
+         INFO = -3
+      ELSE IF( N.LE.0 ) THEN
+         INFO = -4
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -6
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -8
+      ELSE IF( LDC.LT.MAX( 1, M ) ) THEN
+         INFO = -10
+      ELSE IF( LDD.LT.MAX( 1, M ) ) THEN
+         INFO = -12
+      ELSE IF( LDE.LT.MAX( 1, N ) ) THEN
+         INFO = -14
+      ELSE IF( LDF.LT.MAX( 1, M ) ) THEN
+         INFO = -16
+      ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -20
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         WORK( 1 ) = LWMIN
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'STGSYL', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Determine optimal block sizes MB and NB
+*
+      MB = ILAENV( 2, 'STGSYL', TRANS, M, N, -1, -1 )
+      NB = ILAENV( 5, 'STGSYL', TRANS, M, N, -1, -1 )
+*
+      ISOLVE = 1
+      IFUNC = 0
+      IF( IJOB.GE.3 .AND. NOTRAN ) THEN
+         IFUNC = IJOB - 2
+         DO 10 J = 1, N
+            CALL SCOPY( M, ZERO, 0, C( 1, J ), 1 )
+            CALL SCOPY( M, ZERO, 0, F( 1, J ), 1 )
+   10    CONTINUE
+      ELSE IF( IJOB.GE.1 .AND. NOTRAN ) THEN
+         ISOLVE = 2
+      END IF
+*
+      IF( ( MB.LE.1 .AND. NB.LE.1 ) .OR. ( MB.GE.M .AND. NB.GE.N ) )
+     $     THEN
+*
+         DO 30 IROUND = 1, ISOLVE
+*
+*           Use unblocked Level 2 solver
+*
+            DSCALE = ZERO
+            DSUM = ONE
+            PQ = 0
+            CALL STGSY2( TRANS, IFUNC, M, N, A, LDA, B, LDB, C, LDC, D,
+     $                   LDD, E, LDE, F, LDF, SCALE, DSUM, DSCALE,
+     $                   IWORK, PQ, INFO )
+            IF( DSCALE.NE.ZERO ) THEN
+               IF( IJOB.EQ.1 .OR. IJOB.EQ.3 ) THEN
+                  DIF = SQRT( REAL( 2*M*N ) ) / ( DSCALE*SQRT( DSUM ) )
+               ELSE
+                  DIF = SQRT( REAL( PQ ) ) / ( DSCALE*SQRT( DSUM ) )
+               END IF
+            END IF
+*
+            IF( ISOLVE.EQ.2 .AND. IROUND.EQ.1 ) THEN
+               IFUNC = IJOB
+               SCALE2 = SCALE
+               CALL SLACPY( 'F', M, N, C, LDC, WORK, M )
+               CALL SLACPY( 'F', M, N, F, LDF, WORK( M*N+1 ), M )
+               DO 20 J = 1, N
+                  CALL SCOPY( M, ZERO, 0, C( 1, J ), 1 )
+                  CALL SCOPY( M, ZERO, 0, F( 1, J ), 1 )
+   20          CONTINUE
+            ELSE IF( ISOLVE.EQ.2 .AND. IROUND.EQ.2 ) THEN
+               CALL SLACPY( 'F', M, N, WORK, M, C, LDC )
+               CALL SLACPY( 'F', M, N, WORK( M*N+1 ), M, F, LDF )
+               SCALE = SCALE2
+            END IF
+   30    CONTINUE
+*
+         RETURN
+      END IF
+*
+*     Determine block structure of A
+*
+      P = 0
+      I = 1
+   40 CONTINUE
+      IF( I.GT.M )
+     $   GO TO 50
+      P = P + 1
+      IWORK( P ) = I
+      I = I + MB
+      IF( I.GE.M )
+     $   GO TO 50
+      IF( A( I, I-1 ).NE.ZERO )
+     $   I = I + 1
+      GO TO 40
+   50 CONTINUE
+*
+      IWORK( P+1 ) = M + 1
+      IF( IWORK( P ).EQ.IWORK( P+1 ) )
+     $   P = P - 1
+*
+*     Determine block structure of B
+*
+      Q = P + 1
+      J = 1
+   60 CONTINUE
+      IF( J.GT.N )
+     $   GO TO 70
+      Q = Q + 1
+      IWORK( Q ) = J
+      J = J + NB
+      IF( J.GE.N )
+     $   GO TO 70
+      IF( B( J, J-1 ).NE.ZERO )
+     $   J = J + 1
+      GO TO 60
+   70 CONTINUE
+*
+      IWORK( Q+1 ) = N + 1
+      IF( IWORK( Q ).EQ.IWORK( Q+1 ) )
+     $   Q = Q - 1
+*
+      IF( NOTRAN ) THEN
+*
+         DO 150 IROUND = 1, ISOLVE
+*
+*           Solve (I, J)-subsystem
+*               A(I, I) * R(I, J) - L(I, J) * B(J, J) = C(I, J)
+*               D(I, I) * R(I, J) - L(I, J) * E(J, J) = F(I, J)
+*           for I = P, P - 1,..., 1; J = 1, 2,..., Q
+*
+            DSCALE = ZERO
+            DSUM = ONE
+            PQ = 0
+            SCALE = ONE
+            DO 130 J = P + 2, Q
+               JS = IWORK( J )
+               JE = IWORK( J+1 ) - 1
+               NB = JE - JS + 1
+               DO 120 I = P, 1, -1
+                  IS = IWORK( I )
+                  IE = IWORK( I+1 ) - 1
+                  MB = IE - IS + 1
+                  PPQQ = 0
+                  CALL STGSY2( TRANS, IFUNC, MB, NB, A( IS, IS ), LDA,
+     $                         B( JS, JS ), LDB, C( IS, JS ), LDC,
+     $                         D( IS, IS ), LDD, E( JS, JS ), LDE,
+     $                         F( IS, JS ), LDF, SCALOC, DSUM, DSCALE,
+     $                         IWORK( Q+2 ), PPQQ, LINFO )
+                  IF( LINFO.GT.0 )
+     $               INFO = LINFO
+*
+                  PQ = PQ + PPQQ
+                  IF( SCALOC.NE.ONE ) THEN
+                     DO 80 K = 1, JS - 1
+                        CALL SSCAL( M, SCALOC, C( 1, K ), 1 )
+                        CALL SSCAL( M, SCALOC, F( 1, K ), 1 )
+   80                CONTINUE
+                     DO 90 K = JS, JE
+                        CALL SSCAL( IS-1, SCALOC, C( 1, K ), 1 )
+                        CALL SSCAL( IS-1, SCALOC, F( 1, K ), 1 )
+   90                CONTINUE
+                     DO 100 K = JS, JE
+                        CALL SSCAL( M-IE, SCALOC, C( IE+1, K ), 1 )
+                        CALL SSCAL( M-IE, SCALOC, F( IE+1, K ), 1 )
+  100                CONTINUE
+                     DO 110 K = JE + 1, N
+                        CALL SSCAL( M, SCALOC, C( 1, K ), 1 )
+                        CALL SSCAL( M, SCALOC, F( 1, K ), 1 )
+  110                CONTINUE
+                     SCALE = SCALE*SCALOC
+                  END IF
+*
+*                 Substitute R(I, J) and L(I, J) into remaining
+*                 equation.
+*
+                  IF( I.GT.1 ) THEN
+                     CALL SGEMM( 'N', 'N', IS-1, NB, MB, -ONE,
+     $                           A( 1, IS ), LDA, C( IS, JS ), LDC, ONE,
+     $                           C( 1, JS ), LDC )
+                     CALL SGEMM( 'N', 'N', IS-1, NB, MB, -ONE,
+     $                           D( 1, IS ), LDD, C( IS, JS ), LDC, ONE,
+     $                           F( 1, JS ), LDF )
+                  END IF
+                  IF( J.LT.Q ) THEN
+                     CALL SGEMM( 'N', 'N', MB, N-JE, NB, ONE,
+     $                           F( IS, JS ), LDF, B( JS, JE+1 ), LDB,
+     $                           ONE, C( IS, JE+1 ), LDC )
+                     CALL SGEMM( 'N', 'N', MB, N-JE, NB, ONE,
+     $                           F( IS, JS ), LDF, E( JS, JE+1 ), LDE,
+     $                           ONE, F( IS, JE+1 ), LDF )
+                  END IF
+  120          CONTINUE
+  130       CONTINUE
+            IF( DSCALE.NE.ZERO ) THEN
+               IF( IJOB.EQ.1 .OR. IJOB.EQ.3 ) THEN
+                  DIF = SQRT( REAL( 2*M*N ) ) / ( DSCALE*SQRT( DSUM ) )
+               ELSE
+                  DIF = SQRT( REAL( PQ ) ) / ( DSCALE*SQRT( DSUM ) )
+               END IF
+            END IF
+            IF( ISOLVE.EQ.2 .AND. IROUND.EQ.1 ) THEN
+               IFUNC = IJOB
+               SCALE2 = SCALE
+               CALL SLACPY( 'F', M, N, C, LDC, WORK, M )
+               CALL SLACPY( 'F', M, N, F, LDF, WORK( M*N+1 ), M )
+               DO 140 J = 1, N
+                  CALL SCOPY( M, ZERO, 0, C( 1, J ), 1 )
+                  CALL SCOPY( M, ZERO, 0, F( 1, J ), 1 )
+  140          CONTINUE
+            ELSE IF( ISOLVE.EQ.2 .AND. IROUND.EQ.2 ) THEN
+               CALL SLACPY( 'F', M, N, WORK, M, C, LDC )
+               CALL SLACPY( 'F', M, N, WORK( M*N+1 ), M, F, LDF )
+               SCALE = SCALE2
+            END IF
+  150    CONTINUE
+*
+      ELSE
+*
+*        Solve transposed (I, J)-subsystem
+*             A(I, I)' * R(I, J)  + D(I, I)' * L(I, J)  =  C(I, J)
+*             R(I, J)  * B(J, J)' + L(I, J)  * E(J, J)' = -F(I, J)
+*        for I = 1,2,..., P; J = Q, Q-1,..., 1
+*
+         SCALE = ONE
+         DO 210 I = 1, P
+            IS = IWORK( I )
+            IE = IWORK( I+1 ) - 1
+            MB = IE - IS + 1
+            DO 200 J = Q, P + 2, -1
+               JS = IWORK( J )
+               JE = IWORK( J+1 ) - 1
+               NB = JE - JS + 1
+               CALL STGSY2( TRANS, IFUNC, MB, NB, A( IS, IS ), LDA,
+     $                      B( JS, JS ), LDB, C( IS, JS ), LDC,
+     $                      D( IS, IS ), LDD, E( JS, JS ), LDE,
+     $                      F( IS, JS ), LDF, SCALOC, DSUM, DSCALE,
+     $                      IWORK( Q+2 ), PPQQ, LINFO )
+               IF( LINFO.GT.0 )
+     $            INFO = LINFO
+               IF( SCALOC.NE.ONE ) THEN
+                  DO 160 K = 1, JS - 1
+                     CALL SSCAL( M, SCALOC, C( 1, K ), 1 )
+                     CALL SSCAL( M, SCALOC, F( 1, K ), 1 )
+  160             CONTINUE
+                  DO 170 K = JS, JE
+                     CALL SSCAL( IS-1, SCALOC, C( 1, K ), 1 )
+                     CALL SSCAL( IS-1, SCALOC, F( 1, K ), 1 )
+  170             CONTINUE
+                  DO 180 K = JS, JE
+                     CALL SSCAL( M-IE, SCALOC, C( IE+1, K ), 1 )
+                     CALL SSCAL( M-IE, SCALOC, F( IE+1, K ), 1 )
+  180             CONTINUE
+                  DO 190 K = JE + 1, N
+                     CALL SSCAL( M, SCALOC, C( 1, K ), 1 )
+                     CALL SSCAL( M, SCALOC, F( 1, K ), 1 )
+  190             CONTINUE
+                  SCALE = SCALE*SCALOC
+               END IF
+*
+*              Substitute R(I, J) and L(I, J) into remaining equation.
+*
+               IF( J.GT.P+2 ) THEN
+                  CALL SGEMM( 'N', 'T', MB, JS-1, NB, ONE, C( IS, JS ),
+     $                        LDC, B( 1, JS ), LDB, ONE, F( IS, 1 ),
+     $                        LDF )
+                  CALL SGEMM( 'N', 'T', MB, JS-1, NB, ONE, F( IS, JS ),
+     $                        LDF, E( 1, JS ), LDE, ONE, F( IS, 1 ),
+     $                        LDF )
+               END IF
+               IF( I.LT.P ) THEN
+                  CALL SGEMM( 'T', 'N', M-IE, NB, MB, -ONE,
+     $                        A( IS, IE+1 ), LDA, C( IS, JS ), LDC, ONE,
+     $                        C( IE+1, JS ), LDC )
+                  CALL SGEMM( 'T', 'N', M-IE, NB, MB, -ONE,
+     $                        D( IS, IE+1 ), LDD, F( IS, JS ), LDF, ONE,
+     $                        C( IE+1, JS ), LDC )
+               END IF
+  200       CONTINUE
+  210    CONTINUE
+*
+      END IF
+*
+      WORK( 1 ) = LWMIN
+*
+      RETURN
+*
+*     End of STGSYL
+*
+      END
diff --git a/costa/native/external/lapack/stpcon.f b/costa/native/external/lapack/stpcon.f
new file mode 100644
index 000000000..8adbadb4f
--- /dev/null
+++ b/costa/native/external/lapack/stpcon.f
@@ -0,0 +1,187 @@
+      SUBROUTINE STPCON( NORM, UPLO, DIAG, N, AP, RCOND, WORK, IWORK,
+     $                   INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993 
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIAG, NORM, UPLO
+      INTEGER            INFO, N
+      REAL               RCOND
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      REAL               AP( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  STPCON estimates the reciprocal of the condition number of a packed
+*  triangular matrix A, in either the 1-norm or the infinity-norm.
+*
+*  The norm of A is computed and an estimate is obtained for
+*  norm(inv(A)), then the reciprocal of the condition number is
+*  computed as
+*     RCOND = 1 / ( norm(A) * norm(inv(A)) ).
+*
+*  Arguments
+*  =========
+*
+*  NORM    (input) CHARACTER*1
+*          Specifies whether the 1-norm condition number or the
+*          infinity-norm condition number is required:
+*          = '1' or 'O':  1-norm;
+*          = 'I':         Infinity-norm.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  A is upper triangular;
+*          = 'L':  A is lower triangular.
+*
+*  DIAG    (input) CHARACTER*1
+*          = 'N':  A is non-unit triangular;
+*          = 'U':  A is unit triangular.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  AP      (input) REAL array, dimension (N*(N+1)/2)
+*          The upper or lower triangular matrix A, packed columnwise in
+*          a linear array.  The j-th column of A is stored in the array
+*          AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n.
+*          If DIAG = 'U', the diagonal elements of A are not referenced
+*          and are assumed to be 1.
+*
+*  RCOND   (output) REAL
+*          The reciprocal of the condition number of the matrix A,
+*          computed as RCOND = 1/(norm(A) * norm(inv(A))).
+*
+*  WORK    (workspace) REAL array, dimension (3*N)
+*
+*  IWORK   (workspace) INTEGER array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOUNIT, ONENRM, UPPER
+      CHARACTER          NORMIN
+      INTEGER            IX, KASE, KASE1
+      REAL               AINVNM, ANORM, SCALE, SMLNUM, XNORM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ISAMAX
+      REAL               SLAMCH, SLANTP
+      EXTERNAL           LSAME, ISAMAX, SLAMCH, SLANTP
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLACON, SLATPS, SRSCL, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, REAL
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      ONENRM = NORM.EQ.'1' .OR. LSAME( NORM, 'O' )
+      NOUNIT = LSAME( DIAG, 'N' )
+*
+      IF( .NOT.ONENRM .AND. .NOT.LSAME( NORM, 'I' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'STPCON', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 ) THEN
+         RCOND = ONE
+         RETURN
+      END IF
+*
+      RCOND = ZERO
+      SMLNUM = SLAMCH( 'Safe minimum' )*REAL( MAX( 1, N ) )
+*
+*     Compute the norm of the triangular matrix A.
+*
+      ANORM = SLANTP( NORM, UPLO, DIAG, N, AP, WORK )
+*
+*     Continue only if ANORM > 0.
+*
+      IF( ANORM.GT.ZERO ) THEN
+*
+*        Estimate the norm of the inverse of A.
+*
+         AINVNM = ZERO
+         NORMIN = 'N'
+         IF( ONENRM ) THEN
+            KASE1 = 1
+         ELSE
+            KASE1 = 2
+         END IF
+         KASE = 0
+   10    CONTINUE
+         CALL SLACON( N, WORK( N+1 ), WORK, IWORK, AINVNM, KASE )
+         IF( KASE.NE.0 ) THEN
+            IF( KASE.EQ.KASE1 ) THEN
+*
+*              Multiply by inv(A).
+*
+               CALL SLATPS( UPLO, 'No transpose', DIAG, NORMIN, N, AP,
+     $                      WORK, SCALE, WORK( 2*N+1 ), INFO )
+            ELSE
+*
+*              Multiply by inv(A').
+*
+               CALL SLATPS( UPLO, 'Transpose', DIAG, NORMIN, N, AP,
+     $                      WORK, SCALE, WORK( 2*N+1 ), INFO )
+            END IF
+            NORMIN = 'Y'
+*
+*           Multiply by 1/SCALE if doing so will not cause overflow.
+*
+            IF( SCALE.NE.ONE ) THEN
+               IX = ISAMAX( N, WORK, 1 )
+               XNORM = ABS( WORK( IX ) )
+               IF( SCALE.LT.XNORM*SMLNUM .OR. SCALE.EQ.ZERO )
+     $            GO TO 20
+               CALL SRSCL( N, SCALE, WORK, 1 )
+            END IF
+            GO TO 10
+         END IF
+*
+*        Compute the estimate of the reciprocal condition number.
+*
+         IF( AINVNM.NE.ZERO )
+     $      RCOND = ( ONE / ANORM ) / AINVNM
+      END IF
+*
+   20 CONTINUE
+      RETURN
+*
+*     End of STPCON
+*
+      END
diff --git a/costa/native/external/lapack/stprfs.f b/costa/native/external/lapack/stprfs.f
new file mode 100644
index 000000000..b4ad7564c
--- /dev/null
+++ b/costa/native/external/lapack/stprfs.f
@@ -0,0 +1,375 @@
+      SUBROUTINE STPRFS( UPLO, TRANS, DIAG, N, NRHS, AP, B, LDB, X, LDX,
+     $                   FERR, BERR, WORK, IWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIAG, TRANS, UPLO
+      INTEGER            INFO, LDB, LDX, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      REAL               AP( * ), B( LDB, * ), BERR( * ), FERR( * ),
+     $                   WORK( * ), X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  STPRFS provides error bounds and backward error estimates for the
+*  solution to a system of linear equations with a triangular packed
+*  coefficient matrix.
+*
+*  The solution matrix X must be computed by STPTRS or some other
+*  means before entering this routine.  STPRFS does not do iterative
+*  refinement because doing so cannot improve the backward error.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  A is upper triangular;
+*          = 'L':  A is lower triangular.
+*
+*  TRANS   (input) CHARACTER*1
+*          Specifies the form of the system of equations:
+*          = 'N':  A * X = B  (No transpose)
+*          = 'T':  A**T * X = B  (Transpose)
+*          = 'C':  A**H * X = B  (Conjugate transpose = Transpose)
+*
+*  DIAG    (input) CHARACTER*1
+*          = 'N':  A is non-unit triangular;
+*          = 'U':  A is unit triangular.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrices B and X.  NRHS >= 0.
+*
+*  AP      (input) REAL array, dimension (N*(N+1)/2)
+*          The upper or lower triangular matrix A, packed columnwise in
+*          a linear array.  The j-th column of A is stored in the array
+*          AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n.
+*          If DIAG = 'U', the diagonal elements of A are not referenced
+*          and are assumed to be 1.
+*
+*  B       (input) REAL array, dimension (LDB,NRHS)
+*          The right hand side matrix B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  X       (input) REAL array, dimension (LDX,NRHS)
+*          The solution matrix X.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(1,N).
+*
+*  FERR    (output) REAL array, dimension (NRHS)
+*          The estimated forward error bound for each solution vector
+*          X(j) (the j-th column of the solution matrix X).
+*          If XTRUE is the true solution corresponding to X(j), FERR(j)
+*          is an estimated upper bound for the magnitude of the largest
+*          element in (X(j) - XTRUE) divided by the magnitude of the
+*          largest element in X(j).  The estimate is as reliable as
+*          the estimate for RCOND, and is almost always a slight
+*          overestimate of the true error.
+*
+*  BERR    (output) REAL array, dimension (NRHS)
+*          The componentwise relative backward error of each solution
+*          vector X(j) (i.e., the smallest relative change in
+*          any element of A or B that makes X(j) an exact solution).
+*
+*  WORK    (workspace) REAL array, dimension (3*N)
+*
+*  IWORK   (workspace) INTEGER array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO
+      PARAMETER          ( ZERO = 0.0E+0 )
+      REAL               ONE
+      PARAMETER          ( ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOTRAN, NOUNIT, UPPER
+      CHARACTER          TRANST
+      INTEGER            I, J, K, KASE, KC, NZ
+      REAL               EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SAXPY, SCOPY, SLACON, STPMV, STPSV, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               SLAMCH
+      EXTERNAL           LSAME, SLAMCH
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      NOTRAN = LSAME( TRANS, 'N' )
+      NOUNIT = LSAME( DIAG, 'N' )
+*
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT.
+     $         LSAME( TRANS, 'C' ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -8
+      ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+         INFO = -10
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'STPRFS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN
+         DO 10 J = 1, NRHS
+            FERR( J ) = ZERO
+            BERR( J ) = ZERO
+   10    CONTINUE
+         RETURN
+      END IF
+*
+      IF( NOTRAN ) THEN
+         TRANST = 'T'
+      ELSE
+         TRANST = 'N'
+      END IF
+*
+*     NZ = maximum number of nonzero elements in each row of A, plus 1
+*
+      NZ = N + 1
+      EPS = SLAMCH( 'Epsilon' )
+      SAFMIN = SLAMCH( 'Safe minimum' )
+      SAFE1 = NZ*SAFMIN
+      SAFE2 = SAFE1 / EPS
+*
+*     Do for each right hand side
+*
+      DO 250 J = 1, NRHS
+*
+*        Compute residual R = B - op(A) * X,
+*        where op(A) = A or A', depending on TRANS.
+*
+         CALL SCOPY( N, X( 1, J ), 1, WORK( N+1 ), 1 )
+         CALL STPMV( UPLO, TRANS, DIAG, N, AP, WORK( N+1 ), 1 )
+         CALL SAXPY( N, -ONE, B( 1, J ), 1, WORK( N+1 ), 1 )
+*
+*        Compute componentwise relative backward error from formula
+*
+*        max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) )
+*
+*        where abs(Z) is the componentwise absolute value of the matrix
+*        or vector Z.  If the i-th component of the denominator is less
+*        than SAFE2, then SAFE1 is added to the i-th components of the
+*        numerator and denominator before dividing.
+*
+         DO 20 I = 1, N
+            WORK( I ) = ABS( B( I, J ) )
+   20    CONTINUE
+*
+         IF( NOTRAN ) THEN
+*
+*           Compute abs(A)*abs(X) + abs(B).
+*
+            IF( UPPER ) THEN
+               KC = 1
+               IF( NOUNIT ) THEN
+                  DO 40 K = 1, N
+                     XK = ABS( X( K, J ) )
+                     DO 30 I = 1, K
+                        WORK( I ) = WORK( I ) + ABS( AP( KC+I-1 ) )*XK
+   30                CONTINUE
+                     KC = KC + K
+   40             CONTINUE
+               ELSE
+                  DO 60 K = 1, N
+                     XK = ABS( X( K, J ) )
+                     DO 50 I = 1, K - 1
+                        WORK( I ) = WORK( I ) + ABS( AP( KC+I-1 ) )*XK
+   50                CONTINUE
+                     WORK( K ) = WORK( K ) + XK
+                     KC = KC + K
+   60             CONTINUE
+               END IF
+            ELSE
+               KC = 1
+               IF( NOUNIT ) THEN
+                  DO 80 K = 1, N
+                     XK = ABS( X( K, J ) )
+                     DO 70 I = K, N
+                        WORK( I ) = WORK( I ) + ABS( AP( KC+I-K ) )*XK
+   70                CONTINUE
+                     KC = KC + N - K + 1
+   80             CONTINUE
+               ELSE
+                  DO 100 K = 1, N
+                     XK = ABS( X( K, J ) )
+                     DO 90 I = K + 1, N
+                        WORK( I ) = WORK( I ) + ABS( AP( KC+I-K ) )*XK
+   90                CONTINUE
+                     WORK( K ) = WORK( K ) + XK
+                     KC = KC + N - K + 1
+  100             CONTINUE
+               END IF
+            END IF
+         ELSE
+*
+*           Compute abs(A')*abs(X) + abs(B).
+*
+            IF( UPPER ) THEN
+               KC = 1
+               IF( NOUNIT ) THEN
+                  DO 120 K = 1, N
+                     S = ZERO
+                     DO 110 I = 1, K
+                        S = S + ABS( AP( KC+I-1 ) )*ABS( X( I, J ) )
+  110                CONTINUE
+                     WORK( K ) = WORK( K ) + S
+                     KC = KC + K
+  120             CONTINUE
+               ELSE
+                  DO 140 K = 1, N
+                     S = ABS( X( K, J ) )
+                     DO 130 I = 1, K - 1
+                        S = S + ABS( AP( KC+I-1 ) )*ABS( X( I, J ) )
+  130                CONTINUE
+                     WORK( K ) = WORK( K ) + S
+                     KC = KC + K
+  140             CONTINUE
+               END IF
+            ELSE
+               KC = 1
+               IF( NOUNIT ) THEN
+                  DO 160 K = 1, N
+                     S = ZERO
+                     DO 150 I = K, N
+                        S = S + ABS( AP( KC+I-K ) )*ABS( X( I, J ) )
+  150                CONTINUE
+                     WORK( K ) = WORK( K ) + S
+                     KC = KC + N - K + 1
+  160             CONTINUE
+               ELSE
+                  DO 180 K = 1, N
+                     S = ABS( X( K, J ) )
+                     DO 170 I = K + 1, N
+                        S = S + ABS( AP( KC+I-K ) )*ABS( X( I, J ) )
+  170                CONTINUE
+                     WORK( K ) = WORK( K ) + S
+                     KC = KC + N - K + 1
+  180             CONTINUE
+               END IF
+            END IF
+         END IF
+         S = ZERO
+         DO 190 I = 1, N
+            IF( WORK( I ).GT.SAFE2 ) THEN
+               S = MAX( S, ABS( WORK( N+I ) ) / WORK( I ) )
+            ELSE
+               S = MAX( S, ( ABS( WORK( N+I ) )+SAFE1 ) /
+     $             ( WORK( I )+SAFE1 ) )
+            END IF
+  190    CONTINUE
+         BERR( J ) = S
+*
+*        Bound error from formula
+*
+*        norm(X - XTRUE) / norm(X) .le. FERR =
+*        norm( abs(inv(op(A)))*
+*           ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X)
+*
+*        where
+*          norm(Z) is the magnitude of the largest component of Z
+*          inv(op(A)) is the inverse of op(A)
+*          abs(Z) is the componentwise absolute value of the matrix or
+*             vector Z
+*          NZ is the maximum number of nonzeros in any row of A, plus 1
+*          EPS is machine epsilon
+*
+*        The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B))
+*        is incremented by SAFE1 if the i-th component of
+*        abs(op(A))*abs(X) + abs(B) is less than SAFE2.
+*
+*        Use SLACON to estimate the infinity-norm of the matrix
+*           inv(op(A)) * diag(W),
+*        where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) )))
+*
+         DO 200 I = 1, N
+            IF( WORK( I ).GT.SAFE2 ) THEN
+               WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I )
+            ELSE
+               WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + SAFE1
+            END IF
+  200    CONTINUE
+*
+         KASE = 0
+  210    CONTINUE
+         CALL SLACON( N, WORK( 2*N+1 ), WORK( N+1 ), IWORK, FERR( J ),
+     $                KASE )
+         IF( KASE.NE.0 ) THEN
+            IF( KASE.EQ.1 ) THEN
+*
+*              Multiply by diag(W)*inv(op(A)').
+*
+               CALL STPSV( UPLO, TRANST, DIAG, N, AP, WORK( N+1 ), 1 )
+               DO 220 I = 1, N
+                  WORK( N+I ) = WORK( I )*WORK( N+I )
+  220          CONTINUE
+            ELSE
+*
+*              Multiply by inv(op(A))*diag(W).
+*
+               DO 230 I = 1, N
+                  WORK( N+I ) = WORK( I )*WORK( N+I )
+  230          CONTINUE
+               CALL STPSV( UPLO, TRANS, DIAG, N, AP, WORK( N+1 ), 1 )
+            END IF
+            GO TO 210
+         END IF
+*
+*        Normalize error.
+*
+         LSTRES = ZERO
+         DO 240 I = 1, N
+            LSTRES = MAX( LSTRES, ABS( X( I, J ) ) )
+  240    CONTINUE
+         IF( LSTRES.NE.ZERO )
+     $      FERR( J ) = FERR( J ) / LSTRES
+*
+  250 CONTINUE
+*
+      RETURN
+*
+*     End of STPRFS
+*
+      END
diff --git a/costa/native/external/lapack/stptri.f b/costa/native/external/lapack/stptri.f
new file mode 100644
index 000000000..4809d64cf
--- /dev/null
+++ b/costa/native/external/lapack/stptri.f
@@ -0,0 +1,176 @@
+      SUBROUTINE STPTRI( UPLO, DIAG, N, AP, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIAG, UPLO
+      INTEGER            INFO, N
+*     ..
+*     .. Array Arguments ..
+      REAL               AP( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  STPTRI computes the inverse of a real upper or lower triangular
+*  matrix A stored in packed format.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  A is upper triangular;
+*          = 'L':  A is lower triangular.
+*
+*  DIAG    (input) CHARACTER*1
+*          = 'N':  A is non-unit triangular;
+*          = 'U':  A is unit triangular.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  AP      (input/output) REAL array, dimension (N*(N+1)/2)
+*          On entry, the upper or lower triangular matrix A, stored
+*          columnwise in a linear array.  The j-th column of A is stored
+*          in the array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*((2*n-j)/2) = A(i,j) for j<=i<=n.
+*          See below for further details.
+*          On exit, the (triangular) inverse of the original matrix, in
+*          the same packed storage format.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, A(i,i) is exactly zero.  The triangular
+*                matrix is singular and its inverse can not be computed.
+*
+*  Further Details
+*  ===============
+*
+*  A triangular matrix A can be transferred to packed storage using one
+*  of the following program segments:
+*
+*  UPLO = 'U':                      UPLO = 'L':
+*
+*        JC = 1                           JC = 1
+*        DO 2 J = 1, N                    DO 2 J = 1, N
+*           DO 1 I = 1, J                    DO 1 I = J, N
+*              AP(JC+I-1) = A(I,J)              AP(JC+I-J) = A(I,J)
+*      1    CONTINUE                    1    CONTINUE
+*           JC = JC + J                      JC = JC + N - J + 1
+*      2 CONTINUE                       2 CONTINUE
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOUNIT, UPPER
+      INTEGER            J, JC, JCLAST, JJ
+      REAL               AJJ
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SSCAL, STPMV, XERBLA
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      NOUNIT = LSAME( DIAG, 'N' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'STPTRI', -INFO )
+         RETURN
+      END IF
+*
+*     Check for singularity if non-unit.
+*
+      IF( NOUNIT ) THEN
+         IF( UPPER ) THEN
+            JJ = 0
+            DO 10 INFO = 1, N
+               JJ = JJ + INFO
+               IF( AP( JJ ).EQ.ZERO )
+     $            RETURN
+   10       CONTINUE
+         ELSE
+            JJ = 1
+            DO 20 INFO = 1, N
+               IF( AP( JJ ).EQ.ZERO )
+     $            RETURN
+               JJ = JJ + N - INFO + 1
+   20       CONTINUE
+         END IF
+         INFO = 0
+      END IF
+*
+      IF( UPPER ) THEN
+*
+*        Compute inverse of upper triangular matrix.
+*
+         JC = 1
+         DO 30 J = 1, N
+            IF( NOUNIT ) THEN
+               AP( JC+J-1 ) = ONE / AP( JC+J-1 )
+               AJJ = -AP( JC+J-1 )
+            ELSE
+               AJJ = -ONE
+            END IF
+*
+*           Compute elements 1:j-1 of j-th column.
+*
+            CALL STPMV( 'Upper', 'No transpose', DIAG, J-1, AP,
+     $                  AP( JC ), 1 )
+            CALL SSCAL( J-1, AJJ, AP( JC ), 1 )
+            JC = JC + J
+   30    CONTINUE
+*
+      ELSE
+*
+*        Compute inverse of lower triangular matrix.
+*
+         JC = N*( N+1 ) / 2
+         DO 40 J = N, 1, -1
+            IF( NOUNIT ) THEN
+               AP( JC ) = ONE / AP( JC )
+               AJJ = -AP( JC )
+            ELSE
+               AJJ = -ONE
+            END IF
+            IF( J.LT.N ) THEN
+*
+*              Compute elements j+1:n of j-th column.
+*
+               CALL STPMV( 'Lower', 'No transpose', DIAG, N-J,
+     $                     AP( JCLAST ), AP( JC+1 ), 1 )
+               CALL SSCAL( N-J, AJJ, AP( JC+1 ), 1 )
+            END IF
+            JCLAST = JC
+            JC = JC - N + J - 2
+   40    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of STPTRI
+*
+      END
diff --git a/costa/native/external/lapack/stptrs.f b/costa/native/external/lapack/stptrs.f
new file mode 100644
index 000000000..8e8ab9de0
--- /dev/null
+++ b/costa/native/external/lapack/stptrs.f
@@ -0,0 +1,154 @@
+      SUBROUTINE STPTRS( UPLO, TRANS, DIAG, N, NRHS, AP, B, LDB, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIAG, TRANS, UPLO
+      INTEGER            INFO, LDB, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      REAL               AP( * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  STPTRS solves a triangular system of the form
+*
+*     A * X = B  or  A**T * X = B,
+*
+*  where A is a triangular matrix of order N stored in packed format,
+*  and B is an N-by-NRHS matrix.  A check is made to verify that A is
+*  nonsingular.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  A is upper triangular;
+*          = 'L':  A is lower triangular.
+*
+*  TRANS   (input) CHARACTER*1
+*          Specifies the form of the system of equations:
+*          = 'N':  A * X = B  (No transpose)
+*          = 'T':  A**T * X = B  (Transpose)
+*          = 'C':  A**H * X = B  (Conjugate transpose = Transpose)
+*
+*  DIAG    (input) CHARACTER*1
+*          = 'N':  A is non-unit triangular;
+*          = 'U':  A is unit triangular.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  AP      (input) REAL array, dimension (N*(N+1)/2)
+*          The upper or lower triangular matrix A, packed columnwise in
+*          a linear array.  The j-th column of A is stored in the array
+*          AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n.
+*
+*  B       (input/output) REAL array, dimension (LDB,NRHS)
+*          On entry, the right hand side matrix B.
+*          On exit, if INFO = 0, the solution matrix X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, the i-th diagonal element of A is zero,
+*                indicating that the matrix is singular and the
+*                solutions X have not been computed.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO
+      PARAMETER          ( ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOUNIT, UPPER
+      INTEGER            J, JC
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           STPSV, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      NOUNIT = LSAME( DIAG, 'N' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.LSAME( TRANS, 'N' ) .AND. .NOT.
+     $         LSAME( TRANS, 'T' ) .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -8
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'STPTRS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Check for singularity.
+*
+      IF( NOUNIT ) THEN
+         IF( UPPER ) THEN
+            JC = 1
+            DO 10 INFO = 1, N
+               IF( AP( JC+INFO-1 ).EQ.ZERO )
+     $            RETURN
+               JC = JC + INFO
+   10       CONTINUE
+         ELSE
+            JC = 1
+            DO 20 INFO = 1, N
+               IF( AP( JC ).EQ.ZERO )
+     $            RETURN
+               JC = JC + N - INFO + 1
+   20       CONTINUE
+         END IF
+      END IF
+      INFO = 0
+*
+*     Solve A * x = b  or  A' * x = b.
+*
+      DO 30 J = 1, NRHS
+         CALL STPSV( UPLO, TRANS, DIAG, N, AP, B( 1, J ), 1 )
+   30 CONTINUE
+*
+      RETURN
+*
+*     End of STPTRS
+*
+      END
diff --git a/costa/native/external/lapack/strcon.f b/costa/native/external/lapack/strcon.f
new file mode 100644
index 000000000..1e31a2ed3
--- /dev/null
+++ b/costa/native/external/lapack/strcon.f
@@ -0,0 +1,193 @@
+      SUBROUTINE STRCON( NORM, UPLO, DIAG, N, A, LDA, RCOND, WORK,
+     $                   IWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993 
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIAG, NORM, UPLO
+      INTEGER            INFO, LDA, N
+      REAL               RCOND
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      REAL               A( LDA, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  STRCON estimates the reciprocal of the condition number of a
+*  triangular matrix A, in either the 1-norm or the infinity-norm.
+*
+*  The norm of A is computed and an estimate is obtained for
+*  norm(inv(A)), then the reciprocal of the condition number is
+*  computed as
+*     RCOND = 1 / ( norm(A) * norm(inv(A)) ).
+*
+*  Arguments
+*  =========
+*
+*  NORM    (input) CHARACTER*1
+*          Specifies whether the 1-norm condition number or the
+*          infinity-norm condition number is required:
+*          = '1' or 'O':  1-norm;
+*          = 'I':         Infinity-norm.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  A is upper triangular;
+*          = 'L':  A is lower triangular.
+*
+*  DIAG    (input) CHARACTER*1
+*          = 'N':  A is non-unit triangular;
+*          = 'U':  A is unit triangular.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input) REAL array, dimension (LDA,N)
+*          The triangular matrix A.  If UPLO = 'U', the leading N-by-N
+*          upper triangular part of the array A contains the upper
+*          triangular matrix, and the strictly lower triangular part of
+*          A is not referenced.  If UPLO = 'L', the leading N-by-N lower
+*          triangular part of the array A contains the lower triangular
+*          matrix, and the strictly upper triangular part of A is not
+*          referenced.  If DIAG = 'U', the diagonal elements of A are
+*          also not referenced and are assumed to be 1.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  RCOND   (output) REAL
+*          The reciprocal of the condition number of the matrix A,
+*          computed as RCOND = 1/(norm(A) * norm(inv(A))).
+*
+*  WORK    (workspace) REAL array, dimension (3*N)
+*
+*  IWORK   (workspace) INTEGER array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOUNIT, ONENRM, UPPER
+      CHARACTER          NORMIN
+      INTEGER            IX, KASE, KASE1
+      REAL               AINVNM, ANORM, SCALE, SMLNUM, XNORM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ISAMAX
+      REAL               SLAMCH, SLANTR
+      EXTERNAL           LSAME, ISAMAX, SLAMCH, SLANTR
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLACON, SLATRS, SRSCL, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, REAL
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      ONENRM = NORM.EQ.'1' .OR. LSAME( NORM, 'O' )
+      NOUNIT = LSAME( DIAG, 'N' )
+*
+      IF( .NOT.ONENRM .AND. .NOT.LSAME( NORM, 'I' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -6
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'STRCON', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 ) THEN
+         RCOND = ONE
+         RETURN
+      END IF
+*
+      RCOND = ZERO
+      SMLNUM = SLAMCH( 'Safe minimum' )*REAL( MAX( 1, N ) )
+*
+*     Compute the norm of the triangular matrix A.
+*
+      ANORM = SLANTR( NORM, UPLO, DIAG, N, N, A, LDA, WORK )
+*
+*     Continue only if ANORM > 0.
+*
+      IF( ANORM.GT.ZERO ) THEN
+*
+*        Estimate the norm of the inverse of A.
+*
+         AINVNM = ZERO
+         NORMIN = 'N'
+         IF( ONENRM ) THEN
+            KASE1 = 1
+         ELSE
+            KASE1 = 2
+         END IF
+         KASE = 0
+   10    CONTINUE
+         CALL SLACON( N, WORK( N+1 ), WORK, IWORK, AINVNM, KASE )
+         IF( KASE.NE.0 ) THEN
+            IF( KASE.EQ.KASE1 ) THEN
+*
+*              Multiply by inv(A).
+*
+               CALL SLATRS( UPLO, 'No transpose', DIAG, NORMIN, N, A,
+     $                      LDA, WORK, SCALE, WORK( 2*N+1 ), INFO )
+            ELSE
+*
+*              Multiply by inv(A').
+*
+               CALL SLATRS( UPLO, 'Transpose', DIAG, NORMIN, N, A, LDA,
+     $                      WORK, SCALE, WORK( 2*N+1 ), INFO )
+            END IF
+            NORMIN = 'Y'
+*
+*           Multiply by 1/SCALE if doing so will not cause overflow.
+*
+            IF( SCALE.NE.ONE ) THEN
+               IX = ISAMAX( N, WORK, 1 )
+               XNORM = ABS( WORK( IX ) )
+               IF( SCALE.LT.XNORM*SMLNUM .OR. SCALE.EQ.ZERO )
+     $            GO TO 20
+               CALL SRSCL( N, SCALE, WORK, 1 )
+            END IF
+            GO TO 10
+         END IF
+*
+*        Compute the estimate of the reciprocal condition number.
+*
+         IF( AINVNM.NE.ZERO )
+     $      RCOND = ( ONE / ANORM ) / AINVNM
+      END IF
+*
+   20 CONTINUE
+      RETURN
+*
+*     End of STRCON
+*
+      END
diff --git a/costa/native/external/lapack/strevc.f b/costa/native/external/lapack/strevc.f
new file mode 100644
index 000000000..83a0f3c4f
--- /dev/null
+++ b/costa/native/external/lapack/strevc.f
@@ -0,0 +1,1006 @@
+      SUBROUTINE STREVC( SIDE, HOWMNY, SELECT, N, T, LDT, VL, LDVL, VR,
+     $                   LDVR, MM, M, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          HOWMNY, SIDE
+      INTEGER            INFO, LDT, LDVL, LDVR, M, MM, N
+*     ..
+*     .. Array Arguments ..
+      LOGICAL            SELECT( * )
+      REAL               T( LDT, * ), VL( LDVL, * ), VR( LDVR, * ),
+     $                   WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  STREVC computes some or all of the right and/or left eigenvectors of
+*  a real upper quasi-triangular matrix T.
+*
+*  The right eigenvector x and the left eigenvector y of T corresponding
+*  to an eigenvalue w are defined by:
+*
+*               T*x = w*x,     y'*T = w*y'
+*
+*  where y' denotes the conjugate transpose of the vector y.
+*
+*  If all eigenvectors are requested, the routine may either return the
+*  matrices X and/or Y of right or left eigenvectors of T, or the
+*  products Q*X and/or Q*Y, where Q is an input orthogonal
+*  matrix. If T was obtained from the real-Schur factorization of an
+*  original matrix A = Q*T*Q', then Q*X and Q*Y are the matrices of
+*  right or left eigenvectors of A.
+*
+*  T must be in Schur canonical form (as returned by SHSEQR), that is,
+*  block upper triangular with 1-by-1 and 2-by-2 diagonal blocks; each
+*  2-by-2 diagonal block has its diagonal elements equal and its
+*  off-diagonal elements of opposite sign.  Corresponding to each 2-by-2
+*  diagonal block is a complex conjugate pair of eigenvalues and
+*  eigenvectors; only one eigenvector of the pair is computed, namely
+*  the one corresponding to the eigenvalue with positive imaginary part.
+*
+*  Arguments
+*  =========
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'R':  compute right eigenvectors only;
+*          = 'L':  compute left eigenvectors only;
+*          = 'B':  compute both right and left eigenvectors.
+*
+*  HOWMNY  (input) CHARACTER*1
+*          = 'A':  compute all right and/or left eigenvectors;
+*          = 'B':  compute all right and/or left eigenvectors,
+*                  and backtransform them using the input matrices
+*                  supplied in VR and/or VL;
+*          = 'S':  compute selected right and/or left eigenvectors,
+*                  specified by the logical array SELECT.
+*
+*  SELECT  (input/output) LOGICAL array, dimension (N)
+*          If HOWMNY = 'S', SELECT specifies the eigenvectors to be
+*          computed.
+*          If HOWMNY = 'A' or 'B', SELECT is not referenced.
+*          To select the real eigenvector corresponding to a real
+*          eigenvalue w(j), SELECT(j) must be set to .TRUE..  To select
+*          the complex eigenvector corresponding to a complex conjugate
+*          pair w(j) and w(j+1), either SELECT(j) or SELECT(j+1) must be
+*          set to .TRUE.; then on exit SELECT(j) is .TRUE. and
+*          SELECT(j+1) is .FALSE..
+*
+*  N       (input) INTEGER
+*          The order of the matrix T. N >= 0.
+*
+*  T       (input) REAL array, dimension (LDT,N)
+*          The upper quasi-triangular matrix T in Schur canonical form.
+*
+*  LDT     (input) INTEGER
+*          The leading dimension of the array T. LDT >= max(1,N).
+*
+*  VL      (input/output) REAL array, dimension (LDVL,MM)
+*          On entry, if SIDE = 'L' or 'B' and HOWMNY = 'B', VL must
+*          contain an N-by-N matrix Q (usually the orthogonal matrix Q
+*          of Schur vectors returned by SHSEQR).
+*          On exit, if SIDE = 'L' or 'B', VL contains:
+*          if HOWMNY = 'A', the matrix Y of left eigenvectors of T;
+*                           VL has the same quasi-lower triangular form
+*                           as T'. If T(i,i) is a real eigenvalue, then
+*                           the i-th column VL(i) of VL  is its
+*                           corresponding eigenvector. If T(i:i+1,i:i+1)
+*                           is a 2-by-2 block whose eigenvalues are
+*                           complex-conjugate eigenvalues of T, then
+*                           VL(i)+sqrt(-1)*VL(i+1) is the complex
+*                           eigenvector corresponding to the eigenvalue
+*                           with positive real part.
+*          if HOWMNY = 'B', the matrix Q*Y;
+*          if HOWMNY = 'S', the left eigenvectors of T specified by
+*                           SELECT, stored consecutively in the columns
+*                           of VL, in the same order as their
+*                           eigenvalues.
+*          A complex eigenvector corresponding to a complex eigenvalue
+*          is stored in two consecutive columns, the first holding the
+*          real part, and the second the imaginary part.
+*          If SIDE = 'R', VL is not referenced.
+*
+*  LDVL    (input) INTEGER
+*          The leading dimension of the array VL.  LDVL >= max(1,N) if
+*          SIDE = 'L' or 'B'; LDVL >= 1 otherwise.
+*
+*  VR      (input/output) REAL array, dimension (LDVR,MM)
+*          On entry, if SIDE = 'R' or 'B' and HOWMNY = 'B', VR must
+*          contain an N-by-N matrix Q (usually the orthogonal matrix Q
+*          of Schur vectors returned by SHSEQR).
+*          On exit, if SIDE = 'R' or 'B', VR contains:
+*          if HOWMNY = 'A', the matrix X of right eigenvectors of T;
+*                           VR has the same quasi-upper triangular form
+*                           as T. If T(i,i) is a real eigenvalue, then
+*                           the i-th column VR(i) of VR  is its
+*                           corresponding eigenvector. If T(i:i+1,i:i+1)
+*                           is a 2-by-2 block whose eigenvalues are
+*                           complex-conjugate eigenvalues of T, then
+*                           VR(i)+sqrt(-1)*VR(i+1) is the complex
+*                           eigenvector corresponding to the eigenvalue
+*                           with positive real part.
+*          if HOWMNY = 'B', the matrix Q*X;
+*          if HOWMNY = 'S', the right eigenvectors of T specified by
+*                           SELECT, stored consecutively in the columns
+*                           of VR, in the same order as their
+*                           eigenvalues.
+*          A complex eigenvector corresponding to a complex eigenvalue
+*          is stored in two consecutive columns, the first holding the
+*          real part and the second the imaginary part.
+*          If SIDE = 'L', VR is not referenced.
+*
+*  LDVR    (input) INTEGER
+*          The leading dimension of the array VR.  LDVR >= max(1,N) if
+*          SIDE = 'R' or 'B'; LDVR >= 1 otherwise.
+*
+*  MM      (input) INTEGER
+*          The number of columns in the arrays VL and/or VR. MM >= M.
+*
+*  M       (output) INTEGER
+*          The number of columns in the arrays VL and/or VR actually
+*          used to store the eigenvectors.
+*          If HOWMNY = 'A' or 'B', M is set to N.
+*          Each selected real eigenvector occupies one column and each
+*          selected complex eigenvector occupies two columns.
+*
+*  WORK    (workspace) REAL array, dimension (3*N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Further Details
+*  ===============
+*
+*  The algorithm used in this program is basically backward (forward)
+*  substitution, with scaling to make the the code robust against
+*  possible overflow.
+*
+*  Each eigenvector is normalized so that the element of largest
+*  magnitude has magnitude 1; here the magnitude of a complex number
+*  (x,y) is taken to be |x| + |y|.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            ALLV, BOTHV, LEFTV, OVER, PAIR, RIGHTV, SOMEV
+      INTEGER            I, IERR, II, IP, IS, J, J1, J2, JNXT, K, KI, N2
+      REAL               BETA, BIGNUM, EMAX, OVFL, REC, REMAX, SCALE,
+     $                   SMIN, SMLNUM, ULP, UNFL, VCRIT, VMAX, WI, WR,
+     $                   XNORM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ISAMAX
+      REAL               SDOT, SLAMCH
+      EXTERNAL           LSAME, ISAMAX, SDOT, SLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SAXPY, SCOPY, SGEMV, SLABAD, SLALN2, SSCAL,
+     $                   XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, SQRT
+*     ..
+*     .. Local Arrays ..
+      REAL               X( 2, 2 )
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode and test the input parameters
+*
+      BOTHV = LSAME( SIDE, 'B' )
+      RIGHTV = LSAME( SIDE, 'R' ) .OR. BOTHV
+      LEFTV = LSAME( SIDE, 'L' ) .OR. BOTHV
+*
+      ALLV = LSAME( HOWMNY, 'A' )
+      OVER = LSAME( HOWMNY, 'B' )
+      SOMEV = LSAME( HOWMNY, 'S' )
+*
+      INFO = 0
+      IF( .NOT.RIGHTV .AND. .NOT.LEFTV ) THEN
+         INFO = -1
+      ELSE IF( .NOT.ALLV .AND. .NOT.OVER .AND. .NOT.SOMEV ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDT.LT.MAX( 1, N ) ) THEN
+         INFO = -6
+      ELSE IF( LDVL.LT.1 .OR. ( LEFTV .AND. LDVL.LT.N ) ) THEN
+         INFO = -8
+      ELSE IF( LDVR.LT.1 .OR. ( RIGHTV .AND. LDVR.LT.N ) ) THEN
+         INFO = -10
+      ELSE
+*
+*        Set M to the number of columns required to store the selected
+*        eigenvectors, standardize the array SELECT if necessary, and
+*        test MM.
+*
+         IF( SOMEV ) THEN
+            M = 0
+            PAIR = .FALSE.
+            DO 10 J = 1, N
+               IF( PAIR ) THEN
+                  PAIR = .FALSE.
+                  SELECT( J ) = .FALSE.
+               ELSE
+                  IF( J.LT.N ) THEN
+                     IF( T( J+1, J ).EQ.ZERO ) THEN
+                        IF( SELECT( J ) )
+     $                     M = M + 1
+                     ELSE
+                        PAIR = .TRUE.
+                        IF( SELECT( J ) .OR. SELECT( J+1 ) ) THEN
+                           SELECT( J ) = .TRUE.
+                           M = M + 2
+                        END IF
+                     END IF
+                  ELSE
+                     IF( SELECT( N ) )
+     $                  M = M + 1
+                  END IF
+               END IF
+   10       CONTINUE
+         ELSE
+            M = N
+         END IF
+*
+         IF( MM.LT.M ) THEN
+            INFO = -11
+         END IF
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'STREVC', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Set the constants to control overflow.
+*
+      UNFL = SLAMCH( 'Safe minimum' )
+      OVFL = ONE / UNFL
+      CALL SLABAD( UNFL, OVFL )
+      ULP = SLAMCH( 'Precision' )
+      SMLNUM = UNFL*( N / ULP )
+      BIGNUM = ( ONE-ULP ) / SMLNUM
+*
+*     Compute 1-norm of each column of strictly upper triangular
+*     part of T to control overflow in triangular solver.
+*
+      WORK( 1 ) = ZERO
+      DO 30 J = 2, N
+         WORK( J ) = ZERO
+         DO 20 I = 1, J - 1
+            WORK( J ) = WORK( J ) + ABS( T( I, J ) )
+   20    CONTINUE
+   30 CONTINUE
+*
+*     Index IP is used to specify the real or complex eigenvalue:
+*       IP = 0, real eigenvalue,
+*            1, first of conjugate complex pair: (wr,wi)
+*           -1, second of conjugate complex pair: (wr,wi)
+*
+      N2 = 2*N
+*
+      IF( RIGHTV ) THEN
+*
+*        Compute right eigenvectors.
+*
+         IP = 0
+         IS = M
+         DO 140 KI = N, 1, -1
+*
+            IF( IP.EQ.1 )
+     $         GO TO 130
+            IF( KI.EQ.1 )
+     $         GO TO 40
+            IF( T( KI, KI-1 ).EQ.ZERO )
+     $         GO TO 40
+            IP = -1
+*
+   40       CONTINUE
+            IF( SOMEV ) THEN
+               IF( IP.EQ.0 ) THEN
+                  IF( .NOT.SELECT( KI ) )
+     $               GO TO 130
+               ELSE
+                  IF( .NOT.SELECT( KI-1 ) )
+     $               GO TO 130
+               END IF
+            END IF
+*
+*           Compute the KI-th eigenvalue (WR,WI).
+*
+            WR = T( KI, KI )
+            WI = ZERO
+            IF( IP.NE.0 )
+     $         WI = SQRT( ABS( T( KI, KI-1 ) ) )*
+     $              SQRT( ABS( T( KI-1, KI ) ) )
+            SMIN = MAX( ULP*( ABS( WR )+ABS( WI ) ), SMLNUM )
+*
+            IF( IP.EQ.0 ) THEN
+*
+*              Real right eigenvector
+*
+               WORK( KI+N ) = ONE
+*
+*              Form right-hand side
+*
+               DO 50 K = 1, KI - 1
+                  WORK( K+N ) = -T( K, KI )
+   50          CONTINUE
+*
+*              Solve the upper quasi-triangular system:
+*                 (T(1:KI-1,1:KI-1) - WR)*X = SCALE*WORK.
+*
+               JNXT = KI - 1
+               DO 60 J = KI - 1, 1, -1
+                  IF( J.GT.JNXT )
+     $               GO TO 60
+                  J1 = J
+                  J2 = J
+                  JNXT = J - 1
+                  IF( J.GT.1 ) THEN
+                     IF( T( J, J-1 ).NE.ZERO ) THEN
+                        J1 = J - 1
+                        JNXT = J - 2
+                     END IF
+                  END IF
+*
+                  IF( J1.EQ.J2 ) THEN
+*
+*                    1-by-1 diagonal block
+*
+                     CALL SLALN2( .FALSE., 1, 1, SMIN, ONE, T( J, J ),
+     $                            LDT, ONE, ONE, WORK( J+N ), N, WR,
+     $                            ZERO, X, 2, SCALE, XNORM, IERR )
+*
+*                    Scale X(1,1) to avoid overflow when updating
+*                    the right-hand side.
+*
+                     IF( XNORM.GT.ONE ) THEN
+                        IF( WORK( J ).GT.BIGNUM / XNORM ) THEN
+                           X( 1, 1 ) = X( 1, 1 ) / XNORM
+                           SCALE = SCALE / XNORM
+                        END IF
+                     END IF
+*
+*                    Scale if necessary
+*
+                     IF( SCALE.NE.ONE )
+     $                  CALL SSCAL( KI, SCALE, WORK( 1+N ), 1 )
+                     WORK( J+N ) = X( 1, 1 )
+*
+*                    Update right-hand side
+*
+                     CALL SAXPY( J-1, -X( 1, 1 ), T( 1, J ), 1,
+     $                           WORK( 1+N ), 1 )
+*
+                  ELSE
+*
+*                    2-by-2 diagonal block
+*
+                     CALL SLALN2( .FALSE., 2, 1, SMIN, ONE,
+     $                            T( J-1, J-1 ), LDT, ONE, ONE,
+     $                            WORK( J-1+N ), N, WR, ZERO, X, 2,
+     $                            SCALE, XNORM, IERR )
+*
+*                    Scale X(1,1) and X(2,1) to avoid overflow when
+*                    updating the right-hand side.
+*
+                     IF( XNORM.GT.ONE ) THEN
+                        BETA = MAX( WORK( J-1 ), WORK( J ) )
+                        IF( BETA.GT.BIGNUM / XNORM ) THEN
+                           X( 1, 1 ) = X( 1, 1 ) / XNORM
+                           X( 2, 1 ) = X( 2, 1 ) / XNORM
+                           SCALE = SCALE / XNORM
+                        END IF
+                     END IF
+*
+*                    Scale if necessary
+*
+                     IF( SCALE.NE.ONE )
+     $                  CALL SSCAL( KI, SCALE, WORK( 1+N ), 1 )
+                     WORK( J-1+N ) = X( 1, 1 )
+                     WORK( J+N ) = X( 2, 1 )
+*
+*                    Update right-hand side
+*
+                     CALL SAXPY( J-2, -X( 1, 1 ), T( 1, J-1 ), 1,
+     $                           WORK( 1+N ), 1 )
+                     CALL SAXPY( J-2, -X( 2, 1 ), T( 1, J ), 1,
+     $                           WORK( 1+N ), 1 )
+                  END IF
+   60          CONTINUE
+*
+*              Copy the vector x or Q*x to VR and normalize.
+*
+               IF( .NOT.OVER ) THEN
+                  CALL SCOPY( KI, WORK( 1+N ), 1, VR( 1, IS ), 1 )
+*
+                  II = ISAMAX( KI, VR( 1, IS ), 1 )
+                  REMAX = ONE / ABS( VR( II, IS ) )
+                  CALL SSCAL( KI, REMAX, VR( 1, IS ), 1 )
+*
+                  DO 70 K = KI + 1, N
+                     VR( K, IS ) = ZERO
+   70             CONTINUE
+               ELSE
+                  IF( KI.GT.1 )
+     $               CALL SGEMV( 'N', N, KI-1, ONE, VR, LDVR,
+     $                           WORK( 1+N ), 1, WORK( KI+N ),
+     $                           VR( 1, KI ), 1 )
+*
+                  II = ISAMAX( N, VR( 1, KI ), 1 )
+                  REMAX = ONE / ABS( VR( II, KI ) )
+                  CALL SSCAL( N, REMAX, VR( 1, KI ), 1 )
+               END IF
+*
+            ELSE
+*
+*              Complex right eigenvector.
+*
+*              Initial solve
+*                [ (T(KI-1,KI-1) T(KI-1,KI) ) - (WR + I* WI)]*X = 0.
+*                [ (T(KI,KI-1)   T(KI,KI)   )               ]
+*
+               IF( ABS( T( KI-1, KI ) ).GE.ABS( T( KI, KI-1 ) ) ) THEN
+                  WORK( KI-1+N ) = ONE
+                  WORK( KI+N2 ) = WI / T( KI-1, KI )
+               ELSE
+                  WORK( KI-1+N ) = -WI / T( KI, KI-1 )
+                  WORK( KI+N2 ) = ONE
+               END IF
+               WORK( KI+N ) = ZERO
+               WORK( KI-1+N2 ) = ZERO
+*
+*              Form right-hand side
+*
+               DO 80 K = 1, KI - 2
+                  WORK( K+N ) = -WORK( KI-1+N )*T( K, KI-1 )
+                  WORK( K+N2 ) = -WORK( KI+N2 )*T( K, KI )
+   80          CONTINUE
+*
+*              Solve upper quasi-triangular system:
+*              (T(1:KI-2,1:KI-2) - (WR+i*WI))*X = SCALE*(WORK+i*WORK2)
+*
+               JNXT = KI - 2
+               DO 90 J = KI - 2, 1, -1
+                  IF( J.GT.JNXT )
+     $               GO TO 90
+                  J1 = J
+                  J2 = J
+                  JNXT = J - 1
+                  IF( J.GT.1 ) THEN
+                     IF( T( J, J-1 ).NE.ZERO ) THEN
+                        J1 = J - 1
+                        JNXT = J - 2
+                     END IF
+                  END IF
+*
+                  IF( J1.EQ.J2 ) THEN
+*
+*                    1-by-1 diagonal block
+*
+                     CALL SLALN2( .FALSE., 1, 2, SMIN, ONE, T( J, J ),
+     $                            LDT, ONE, ONE, WORK( J+N ), N, WR, WI,
+     $                            X, 2, SCALE, XNORM, IERR )
+*
+*                    Scale X(1,1) and X(1,2) to avoid overflow when
+*                    updating the right-hand side.
+*
+                     IF( XNORM.GT.ONE ) THEN
+                        IF( WORK( J ).GT.BIGNUM / XNORM ) THEN
+                           X( 1, 1 ) = X( 1, 1 ) / XNORM
+                           X( 1, 2 ) = X( 1, 2 ) / XNORM
+                           SCALE = SCALE / XNORM
+                        END IF
+                     END IF
+*
+*                    Scale if necessary
+*
+                     IF( SCALE.NE.ONE ) THEN
+                        CALL SSCAL( KI, SCALE, WORK( 1+N ), 1 )
+                        CALL SSCAL( KI, SCALE, WORK( 1+N2 ), 1 )
+                     END IF
+                     WORK( J+N ) = X( 1, 1 )
+                     WORK( J+N2 ) = X( 1, 2 )
+*
+*                    Update the right-hand side
+*
+                     CALL SAXPY( J-1, -X( 1, 1 ), T( 1, J ), 1,
+     $                           WORK( 1+N ), 1 )
+                     CALL SAXPY( J-1, -X( 1, 2 ), T( 1, J ), 1,
+     $                           WORK( 1+N2 ), 1 )
+*
+                  ELSE
+*
+*                    2-by-2 diagonal block
+*
+                     CALL SLALN2( .FALSE., 2, 2, SMIN, ONE,
+     $                            T( J-1, J-1 ), LDT, ONE, ONE,
+     $                            WORK( J-1+N ), N, WR, WI, X, 2, SCALE,
+     $                            XNORM, IERR )
+*
+*                    Scale X to avoid overflow when updating
+*                    the right-hand side.
+*
+                     IF( XNORM.GT.ONE ) THEN
+                        BETA = MAX( WORK( J-1 ), WORK( J ) )
+                        IF( BETA.GT.BIGNUM / XNORM ) THEN
+                           REC = ONE / XNORM
+                           X( 1, 1 ) = X( 1, 1 )*REC
+                           X( 1, 2 ) = X( 1, 2 )*REC
+                           X( 2, 1 ) = X( 2, 1 )*REC
+                           X( 2, 2 ) = X( 2, 2 )*REC
+                           SCALE = SCALE*REC
+                        END IF
+                     END IF
+*
+*                    Scale if necessary
+*
+                     IF( SCALE.NE.ONE ) THEN
+                        CALL SSCAL( KI, SCALE, WORK( 1+N ), 1 )
+                        CALL SSCAL( KI, SCALE, WORK( 1+N2 ), 1 )
+                     END IF
+                     WORK( J-1+N ) = X( 1, 1 )
+                     WORK( J+N ) = X( 2, 1 )
+                     WORK( J-1+N2 ) = X( 1, 2 )
+                     WORK( J+N2 ) = X( 2, 2 )
+*
+*                    Update the right-hand side
+*
+                     CALL SAXPY( J-2, -X( 1, 1 ), T( 1, J-1 ), 1,
+     $                           WORK( 1+N ), 1 )
+                     CALL SAXPY( J-2, -X( 2, 1 ), T( 1, J ), 1,
+     $                           WORK( 1+N ), 1 )
+                     CALL SAXPY( J-2, -X( 1, 2 ), T( 1, J-1 ), 1,
+     $                           WORK( 1+N2 ), 1 )
+                     CALL SAXPY( J-2, -X( 2, 2 ), T( 1, J ), 1,
+     $                           WORK( 1+N2 ), 1 )
+                  END IF
+   90          CONTINUE
+*
+*              Copy the vector x or Q*x to VR and normalize.
+*
+               IF( .NOT.OVER ) THEN
+                  CALL SCOPY( KI, WORK( 1+N ), 1, VR( 1, IS-1 ), 1 )
+                  CALL SCOPY( KI, WORK( 1+N2 ), 1, VR( 1, IS ), 1 )
+*
+                  EMAX = ZERO
+                  DO 100 K = 1, KI
+                     EMAX = MAX( EMAX, ABS( VR( K, IS-1 ) )+
+     $                      ABS( VR( K, IS ) ) )
+  100             CONTINUE
+*
+                  REMAX = ONE / EMAX
+                  CALL SSCAL( KI, REMAX, VR( 1, IS-1 ), 1 )
+                  CALL SSCAL( KI, REMAX, VR( 1, IS ), 1 )
+*
+                  DO 110 K = KI + 1, N
+                     VR( K, IS-1 ) = ZERO
+                     VR( K, IS ) = ZERO
+  110             CONTINUE
+*
+               ELSE
+*
+                  IF( KI.GT.2 ) THEN
+                     CALL SGEMV( 'N', N, KI-2, ONE, VR, LDVR,
+     $                           WORK( 1+N ), 1, WORK( KI-1+N ),
+     $                           VR( 1, KI-1 ), 1 )
+                     CALL SGEMV( 'N', N, KI-2, ONE, VR, LDVR,
+     $                           WORK( 1+N2 ), 1, WORK( KI+N2 ),
+     $                           VR( 1, KI ), 1 )
+                  ELSE
+                     CALL SSCAL( N, WORK( KI-1+N ), VR( 1, KI-1 ), 1 )
+                     CALL SSCAL( N, WORK( KI+N2 ), VR( 1, KI ), 1 )
+                  END IF
+*
+                  EMAX = ZERO
+                  DO 120 K = 1, N
+                     EMAX = MAX( EMAX, ABS( VR( K, KI-1 ) )+
+     $                      ABS( VR( K, KI ) ) )
+  120             CONTINUE
+                  REMAX = ONE / EMAX
+                  CALL SSCAL( N, REMAX, VR( 1, KI-1 ), 1 )
+                  CALL SSCAL( N, REMAX, VR( 1, KI ), 1 )
+               END IF
+            END IF
+*
+            IS = IS - 1
+            IF( IP.NE.0 )
+     $         IS = IS - 1
+  130       CONTINUE
+            IF( IP.EQ.1 )
+     $         IP = 0
+            IF( IP.EQ.-1 )
+     $         IP = 1
+  140    CONTINUE
+      END IF
+*
+      IF( LEFTV ) THEN
+*
+*        Compute left eigenvectors.
+*
+         IP = 0
+         IS = 1
+         DO 260 KI = 1, N
+*
+            IF( IP.EQ.-1 )
+     $         GO TO 250
+            IF( KI.EQ.N )
+     $         GO TO 150
+            IF( T( KI+1, KI ).EQ.ZERO )
+     $         GO TO 150
+            IP = 1
+*
+  150       CONTINUE
+            IF( SOMEV ) THEN
+               IF( .NOT.SELECT( KI ) )
+     $            GO TO 250
+            END IF
+*
+*           Compute the KI-th eigenvalue (WR,WI).
+*
+            WR = T( KI, KI )
+            WI = ZERO
+            IF( IP.NE.0 )
+     $         WI = SQRT( ABS( T( KI, KI+1 ) ) )*
+     $              SQRT( ABS( T( KI+1, KI ) ) )
+            SMIN = MAX( ULP*( ABS( WR )+ABS( WI ) ), SMLNUM )
+*
+            IF( IP.EQ.0 ) THEN
+*
+*              Real left eigenvector.
+*
+               WORK( KI+N ) = ONE
+*
+*              Form right-hand side
+*
+               DO 160 K = KI + 1, N
+                  WORK( K+N ) = -T( KI, K )
+  160          CONTINUE
+*
+*              Solve the quasi-triangular system:
+*                 (T(KI+1:N,KI+1:N) - WR)'*X = SCALE*WORK
+*
+               VMAX = ONE
+               VCRIT = BIGNUM
+*
+               JNXT = KI + 1
+               DO 170 J = KI + 1, N
+                  IF( J.LT.JNXT )
+     $               GO TO 170
+                  J1 = J
+                  J2 = J
+                  JNXT = J + 1
+                  IF( J.LT.N ) THEN
+                     IF( T( J+1, J ).NE.ZERO ) THEN
+                        J2 = J + 1
+                        JNXT = J + 2
+                     END IF
+                  END IF
+*
+                  IF( J1.EQ.J2 ) THEN
+*
+*                    1-by-1 diagonal block
+*
+*                    Scale if necessary to avoid overflow when forming
+*                    the right-hand side.
+*
+                     IF( WORK( J ).GT.VCRIT ) THEN
+                        REC = ONE / VMAX
+                        CALL SSCAL( N-KI+1, REC, WORK( KI+N ), 1 )
+                        VMAX = ONE
+                        VCRIT = BIGNUM
+                     END IF
+*
+                     WORK( J+N ) = WORK( J+N ) -
+     $                             SDOT( J-KI-1, T( KI+1, J ), 1,
+     $                             WORK( KI+1+N ), 1 )
+*
+*                    Solve (T(J,J)-WR)'*X = WORK
+*
+                     CALL SLALN2( .FALSE., 1, 1, SMIN, ONE, T( J, J ),
+     $                            LDT, ONE, ONE, WORK( J+N ), N, WR,
+     $                            ZERO, X, 2, SCALE, XNORM, IERR )
+*
+*                    Scale if necessary
+*
+                     IF( SCALE.NE.ONE )
+     $                  CALL SSCAL( N-KI+1, SCALE, WORK( KI+N ), 1 )
+                     WORK( J+N ) = X( 1, 1 )
+                     VMAX = MAX( ABS( WORK( J+N ) ), VMAX )
+                     VCRIT = BIGNUM / VMAX
+*
+                  ELSE
+*
+*                    2-by-2 diagonal block
+*
+*                    Scale if necessary to avoid overflow when forming
+*                    the right-hand side.
+*
+                     BETA = MAX( WORK( J ), WORK( J+1 ) )
+                     IF( BETA.GT.VCRIT ) THEN
+                        REC = ONE / VMAX
+                        CALL SSCAL( N-KI+1, REC, WORK( KI+N ), 1 )
+                        VMAX = ONE
+                        VCRIT = BIGNUM
+                     END IF
+*
+                     WORK( J+N ) = WORK( J+N ) -
+     $                             SDOT( J-KI-1, T( KI+1, J ), 1,
+     $                             WORK( KI+1+N ), 1 )
+*
+                     WORK( J+1+N ) = WORK( J+1+N ) -
+     $                               SDOT( J-KI-1, T( KI+1, J+1 ), 1,
+     $                               WORK( KI+1+N ), 1 )
+*
+*                    Solve
+*                      [T(J,J)-WR   T(J,J+1)     ]'* X = SCALE*( WORK1 )
+*                      [T(J+1,J)    T(J+1,J+1)-WR]             ( WORK2 )
+*
+                     CALL SLALN2( .TRUE., 2, 1, SMIN, ONE, T( J, J ),
+     $                            LDT, ONE, ONE, WORK( J+N ), N, WR,
+     $                            ZERO, X, 2, SCALE, XNORM, IERR )
+*
+*                    Scale if necessary
+*
+                     IF( SCALE.NE.ONE )
+     $                  CALL SSCAL( N-KI+1, SCALE, WORK( KI+N ), 1 )
+                     WORK( J+N ) = X( 1, 1 )
+                     WORK( J+1+N ) = X( 2, 1 )
+*
+                     VMAX = MAX( ABS( WORK( J+N ) ),
+     $                      ABS( WORK( J+1+N ) ), VMAX )
+                     VCRIT = BIGNUM / VMAX
+*
+                  END IF
+  170          CONTINUE
+*
+*              Copy the vector x or Q*x to VL and normalize.
+*
+               IF( .NOT.OVER ) THEN
+                  CALL SCOPY( N-KI+1, WORK( KI+N ), 1, VL( KI, IS ), 1 )
+*
+                  II = ISAMAX( N-KI+1, VL( KI, IS ), 1 ) + KI - 1
+                  REMAX = ONE / ABS( VL( II, IS ) )
+                  CALL SSCAL( N-KI+1, REMAX, VL( KI, IS ), 1 )
+*
+                  DO 180 K = 1, KI - 1
+                     VL( K, IS ) = ZERO
+  180             CONTINUE
+*
+               ELSE
+*
+                  IF( KI.LT.N )
+     $               CALL SGEMV( 'N', N, N-KI, ONE, VL( 1, KI+1 ), LDVL,
+     $                           WORK( KI+1+N ), 1, WORK( KI+N ),
+     $                           VL( 1, KI ), 1 )
+*
+                  II = ISAMAX( N, VL( 1, KI ), 1 )
+                  REMAX = ONE / ABS( VL( II, KI ) )
+                  CALL SSCAL( N, REMAX, VL( 1, KI ), 1 )
+*
+               END IF
+*
+            ELSE
+*
+*              Complex left eigenvector.
+*
+*               Initial solve:
+*                 ((T(KI,KI)    T(KI,KI+1) )' - (WR - I* WI))*X = 0.
+*                 ((T(KI+1,KI) T(KI+1,KI+1))                )
+*
+               IF( ABS( T( KI, KI+1 ) ).GE.ABS( T( KI+1, KI ) ) ) THEN
+                  WORK( KI+N ) = WI / T( KI, KI+1 )
+                  WORK( KI+1+N2 ) = ONE
+               ELSE
+                  WORK( KI+N ) = ONE
+                  WORK( KI+1+N2 ) = -WI / T( KI+1, KI )
+               END IF
+               WORK( KI+1+N ) = ZERO
+               WORK( KI+N2 ) = ZERO
+*
+*              Form right-hand side
+*
+               DO 190 K = KI + 2, N
+                  WORK( K+N ) = -WORK( KI+N )*T( KI, K )
+                  WORK( K+N2 ) = -WORK( KI+1+N2 )*T( KI+1, K )
+  190          CONTINUE
+*
+*              Solve complex quasi-triangular system:
+*              ( T(KI+2,N:KI+2,N) - (WR-i*WI) )*X = WORK1+i*WORK2
+*
+               VMAX = ONE
+               VCRIT = BIGNUM
+*
+               JNXT = KI + 2
+               DO 200 J = KI + 2, N
+                  IF( J.LT.JNXT )
+     $               GO TO 200
+                  J1 = J
+                  J2 = J
+                  JNXT = J + 1
+                  IF( J.LT.N ) THEN
+                     IF( T( J+1, J ).NE.ZERO ) THEN
+                        J2 = J + 1
+                        JNXT = J + 2
+                     END IF
+                  END IF
+*
+                  IF( J1.EQ.J2 ) THEN
+*
+*                    1-by-1 diagonal block
+*
+*                    Scale if necessary to avoid overflow when
+*                    forming the right-hand side elements.
+*
+                     IF( WORK( J ).GT.VCRIT ) THEN
+                        REC = ONE / VMAX
+                        CALL SSCAL( N-KI+1, REC, WORK( KI+N ), 1 )
+                        CALL SSCAL( N-KI+1, REC, WORK( KI+N2 ), 1 )
+                        VMAX = ONE
+                        VCRIT = BIGNUM
+                     END IF
+*
+                     WORK( J+N ) = WORK( J+N ) -
+     $                             SDOT( J-KI-2, T( KI+2, J ), 1,
+     $                             WORK( KI+2+N ), 1 )
+                     WORK( J+N2 ) = WORK( J+N2 ) -
+     $                              SDOT( J-KI-2, T( KI+2, J ), 1,
+     $                              WORK( KI+2+N2 ), 1 )
+*
+*                    Solve (T(J,J)-(WR-i*WI))*(X11+i*X12)= WK+I*WK2
+*
+                     CALL SLALN2( .FALSE., 1, 2, SMIN, ONE, T( J, J ),
+     $                            LDT, ONE, ONE, WORK( J+N ), N, WR,
+     $                            -WI, X, 2, SCALE, XNORM, IERR )
+*
+*                    Scale if necessary
+*
+                     IF( SCALE.NE.ONE ) THEN
+                        CALL SSCAL( N-KI+1, SCALE, WORK( KI+N ), 1 )
+                        CALL SSCAL( N-KI+1, SCALE, WORK( KI+N2 ), 1 )
+                     END IF
+                     WORK( J+N ) = X( 1, 1 )
+                     WORK( J+N2 ) = X( 1, 2 )
+                     VMAX = MAX( ABS( WORK( J+N ) ),
+     $                      ABS( WORK( J+N2 ) ), VMAX )
+                     VCRIT = BIGNUM / VMAX
+*
+                  ELSE
+*
+*                    2-by-2 diagonal block
+*
+*                    Scale if necessary to avoid overflow when forming
+*                    the right-hand side elements.
+*
+                     BETA = MAX( WORK( J ), WORK( J+1 ) )
+                     IF( BETA.GT.VCRIT ) THEN
+                        REC = ONE / VMAX
+                        CALL SSCAL( N-KI+1, REC, WORK( KI+N ), 1 )
+                        CALL SSCAL( N-KI+1, REC, WORK( KI+N2 ), 1 )
+                        VMAX = ONE
+                        VCRIT = BIGNUM
+                     END IF
+*
+                     WORK( J+N ) = WORK( J+N ) -
+     $                             SDOT( J-KI-2, T( KI+2, J ), 1,
+     $                             WORK( KI+2+N ), 1 )
+*
+                     WORK( J+N2 ) = WORK( J+N2 ) -
+     $                              SDOT( J-KI-2, T( KI+2, J ), 1,
+     $                              WORK( KI+2+N2 ), 1 )
+*
+                     WORK( J+1+N ) = WORK( J+1+N ) -
+     $                               SDOT( J-KI-2, T( KI+2, J+1 ), 1,
+     $                               WORK( KI+2+N ), 1 )
+*
+                     WORK( J+1+N2 ) = WORK( J+1+N2 ) -
+     $                                SDOT( J-KI-2, T( KI+2, J+1 ), 1,
+     $                                WORK( KI+2+N2 ), 1 )
+*
+*                    Solve 2-by-2 complex linear equation
+*                      ([T(j,j)   T(j,j+1)  ]'-(wr-i*wi)*I)*X = SCALE*B
+*                      ([T(j+1,j) T(j+1,j+1)]             )
+*
+                     CALL SLALN2( .TRUE., 2, 2, SMIN, ONE, T( J, J ),
+     $                            LDT, ONE, ONE, WORK( J+N ), N, WR,
+     $                            -WI, X, 2, SCALE, XNORM, IERR )
+*
+*                    Scale if necessary
+*
+                     IF( SCALE.NE.ONE ) THEN
+                        CALL SSCAL( N-KI+1, SCALE, WORK( KI+N ), 1 )
+                        CALL SSCAL( N-KI+1, SCALE, WORK( KI+N2 ), 1 )
+                     END IF
+                     WORK( J+N ) = X( 1, 1 )
+                     WORK( J+N2 ) = X( 1, 2 )
+                     WORK( J+1+N ) = X( 2, 1 )
+                     WORK( J+1+N2 ) = X( 2, 2 )
+                     VMAX = MAX( ABS( X( 1, 1 ) ), ABS( X( 1, 2 ) ),
+     $                      ABS( X( 2, 1 ) ), ABS( X( 2, 2 ) ), VMAX )
+                     VCRIT = BIGNUM / VMAX
+*
+                  END IF
+  200          CONTINUE
+*
+*              Copy the vector x or Q*x to VL and normalize.
+*
+  210          CONTINUE
+               IF( .NOT.OVER ) THEN
+                  CALL SCOPY( N-KI+1, WORK( KI+N ), 1, VL( KI, IS ), 1 )
+                  CALL SCOPY( N-KI+1, WORK( KI+N2 ), 1, VL( KI, IS+1 ),
+     $                        1 )
+*
+                  EMAX = ZERO
+                  DO 220 K = KI, N
+                     EMAX = MAX( EMAX, ABS( VL( K, IS ) )+
+     $                      ABS( VL( K, IS+1 ) ) )
+  220             CONTINUE
+                  REMAX = ONE / EMAX
+                  CALL SSCAL( N-KI+1, REMAX, VL( KI, IS ), 1 )
+                  CALL SSCAL( N-KI+1, REMAX, VL( KI, IS+1 ), 1 )
+*
+                  DO 230 K = 1, KI - 1
+                     VL( K, IS ) = ZERO
+                     VL( K, IS+1 ) = ZERO
+  230             CONTINUE
+               ELSE
+                  IF( KI.LT.N-1 ) THEN
+                     CALL SGEMV( 'N', N, N-KI-1, ONE, VL( 1, KI+2 ),
+     $                           LDVL, WORK( KI+2+N ), 1, WORK( KI+N ),
+     $                           VL( 1, KI ), 1 )
+                     CALL SGEMV( 'N', N, N-KI-1, ONE, VL( 1, KI+2 ),
+     $                           LDVL, WORK( KI+2+N2 ), 1,
+     $                           WORK( KI+1+N2 ), VL( 1, KI+1 ), 1 )
+                  ELSE
+                     CALL SSCAL( N, WORK( KI+N ), VL( 1, KI ), 1 )
+                     CALL SSCAL( N, WORK( KI+1+N2 ), VL( 1, KI+1 ), 1 )
+                  END IF
+*
+                  EMAX = ZERO
+                  DO 240 K = 1, N
+                     EMAX = MAX( EMAX, ABS( VL( K, KI ) )+
+     $                      ABS( VL( K, KI+1 ) ) )
+  240             CONTINUE
+                  REMAX = ONE / EMAX
+                  CALL SSCAL( N, REMAX, VL( 1, KI ), 1 )
+                  CALL SSCAL( N, REMAX, VL( 1, KI+1 ), 1 )
+*
+               END IF
+*
+            END IF
+*
+            IS = IS + 1
+            IF( IP.NE.0 )
+     $         IS = IS + 1
+  250       CONTINUE
+            IF( IP.EQ.-1 )
+     $         IP = 0
+            IF( IP.EQ.1 )
+     $         IP = -1
+*
+  260    CONTINUE
+*
+      END IF
+*
+      RETURN
+*
+*     End of STREVC
+*
+      END
diff --git a/costa/native/external/lapack/strexc.f b/costa/native/external/lapack/strexc.f
new file mode 100644
index 000000000..c8c36f8da
--- /dev/null
+++ b/costa/native/external/lapack/strexc.f
@@ -0,0 +1,346 @@
+      SUBROUTINE STREXC( COMPQ, N, T, LDT, Q, LDQ, IFST, ILST, WORK,
+     $                   INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993 
+*
+*     .. Scalar Arguments ..
+      CHARACTER          COMPQ
+      INTEGER            IFST, ILST, INFO, LDQ, LDT, N
+*     ..
+*     .. Array Arguments ..
+      REAL               Q( LDQ, * ), T( LDT, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  STREXC reorders the real Schur factorization of a real matrix
+*  A = Q*T*Q**T, so that the diagonal block of T with row index IFST is
+*  moved to row ILST.
+*
+*  The real Schur form T is reordered by an orthogonal similarity
+*  transformation Z**T*T*Z, and optionally the matrix Q of Schur vectors
+*  is updated by postmultiplying it with Z.
+*
+*  T must be in Schur canonical form (as returned by SHSEQR), that is,
+*  block upper triangular with 1-by-1 and 2-by-2 diagonal blocks; each
+*  2-by-2 diagonal block has its diagonal elements equal and its
+*  off-diagonal elements of opposite sign.
+*
+*  Arguments
+*  =========
+*
+*  COMPQ   (input) CHARACTER*1
+*          = 'V':  update the matrix Q of Schur vectors;
+*          = 'N':  do not update Q.
+*
+*  N       (input) INTEGER
+*          The order of the matrix T. N >= 0.
+*
+*  T       (input/output) REAL array, dimension (LDT,N)
+*          On entry, the upper quasi-triangular matrix T, in Schur
+*          Schur canonical form.
+*          On exit, the reordered upper quasi-triangular matrix, again
+*          in Schur canonical form.
+*
+*  LDT     (input) INTEGER
+*          The leading dimension of the array T. LDT >= max(1,N).
+*
+*  Q       (input/output) REAL array, dimension (LDQ,N)
+*          On entry, if COMPQ = 'V', the matrix Q of Schur vectors.
+*          On exit, if COMPQ = 'V', Q has been postmultiplied by the
+*          orthogonal transformation matrix Z which reorders T.
+*          If COMPQ = 'N', Q is not referenced.
+*
+*  LDQ     (input) INTEGER
+*          The leading dimension of the array Q.  LDQ >= max(1,N).
+*
+*  IFST    (input/output) INTEGER
+*  ILST    (input/output) INTEGER
+*          Specify the reordering of the diagonal blocks of T.
+*          The block with row index IFST is moved to row ILST, by a
+*          sequence of transpositions between adjacent blocks.
+*          On exit, if IFST pointed on entry to the second row of a
+*          2-by-2 block, it is changed to point to the first row; ILST
+*          always points to the first row of the block in its final
+*          position (which may differ from its input value by +1 or -1).
+*          1 <= IFST <= N; 1 <= ILST <= N.
+*
+*  WORK    (workspace) REAL array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          = 1:  two adjacent blocks were too close to swap (the problem
+*                is very ill-conditioned); T may have been partially
+*                reordered, and ILST points to the first row of the
+*                current position of the block being moved.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO
+      PARAMETER          ( ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            WANTQ
+      INTEGER            HERE, NBF, NBL, NBNEXT
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLAEXC, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode and test the input arguments.
+*
+      INFO = 0
+      WANTQ = LSAME( COMPQ, 'V' )
+      IF( .NOT.WANTQ .AND. .NOT.LSAME( COMPQ, 'N' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDT.LT.MAX( 1, N ) ) THEN
+         INFO = -4
+      ELSE IF( LDQ.LT.1 .OR. ( WANTQ .AND. LDQ.LT.MAX( 1, N ) ) ) THEN
+         INFO = -6
+      ELSE IF( IFST.LT.1 .OR. IFST.GT.N ) THEN
+         INFO = -7
+      ELSE IF( ILST.LT.1 .OR. ILST.GT.N ) THEN
+         INFO = -8
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'STREXC', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.LE.1 )
+     $   RETURN
+*
+*     Determine the first row of specified block
+*     and find out it is 1 by 1 or 2 by 2.
+*
+      IF( IFST.GT.1 ) THEN
+         IF( T( IFST, IFST-1 ).NE.ZERO )
+     $      IFST = IFST - 1
+      END IF
+      NBF = 1
+      IF( IFST.LT.N ) THEN
+         IF( T( IFST+1, IFST ).NE.ZERO )
+     $      NBF = 2
+      END IF
+*
+*     Determine the first row of the final block
+*     and find out it is 1 by 1 or 2 by 2.
+*
+      IF( ILST.GT.1 ) THEN
+         IF( T( ILST, ILST-1 ).NE.ZERO )
+     $      ILST = ILST - 1
+      END IF
+      NBL = 1
+      IF( ILST.LT.N ) THEN
+         IF( T( ILST+1, ILST ).NE.ZERO )
+     $      NBL = 2
+      END IF
+*
+      IF( IFST.EQ.ILST )
+     $   RETURN
+*
+      IF( IFST.LT.ILST ) THEN
+*
+*        Update ILST
+*
+         IF( NBF.EQ.2 .AND. NBL.EQ.1 )
+     $      ILST = ILST - 1
+         IF( NBF.EQ.1 .AND. NBL.EQ.2 )
+     $      ILST = ILST + 1
+*
+         HERE = IFST
+*
+   10    CONTINUE
+*
+*        Swap block with next one below
+*
+         IF( NBF.EQ.1 .OR. NBF.EQ.2 ) THEN
+*
+*           Current block either 1 by 1 or 2 by 2
+*
+            NBNEXT = 1
+            IF( HERE+NBF+1.LE.N ) THEN
+               IF( T( HERE+NBF+1, HERE+NBF ).NE.ZERO )
+     $            NBNEXT = 2
+            END IF
+            CALL SLAEXC( WANTQ, N, T, LDT, Q, LDQ, HERE, NBF, NBNEXT,
+     $                   WORK, INFO )
+            IF( INFO.NE.0 ) THEN
+               ILST = HERE
+               RETURN
+            END IF
+            HERE = HERE + NBNEXT
+*
+*           Test if 2 by 2 block breaks into two 1 by 1 blocks
+*
+            IF( NBF.EQ.2 ) THEN
+               IF( T( HERE+1, HERE ).EQ.ZERO )
+     $            NBF = 3
+            END IF
+*
+         ELSE
+*
+*           Current block consists of two 1 by 1 blocks each of which
+*           must be swapped individually
+*
+            NBNEXT = 1
+            IF( HERE+3.LE.N ) THEN
+               IF( T( HERE+3, HERE+2 ).NE.ZERO )
+     $            NBNEXT = 2
+            END IF
+            CALL SLAEXC( WANTQ, N, T, LDT, Q, LDQ, HERE+1, 1, NBNEXT,
+     $                   WORK, INFO )
+            IF( INFO.NE.0 ) THEN
+               ILST = HERE
+               RETURN
+            END IF
+            IF( NBNEXT.EQ.1 ) THEN
+*
+*              Swap two 1 by 1 blocks, no problems possible
+*
+               CALL SLAEXC( WANTQ, N, T, LDT, Q, LDQ, HERE, 1, NBNEXT,
+     $                      WORK, INFO )
+               HERE = HERE + 1
+            ELSE
+*
+*              Recompute NBNEXT in case 2 by 2 split
+*
+               IF( T( HERE+2, HERE+1 ).EQ.ZERO )
+     $            NBNEXT = 1
+               IF( NBNEXT.EQ.2 ) THEN
+*
+*                 2 by 2 Block did not split
+*
+                  CALL SLAEXC( WANTQ, N, T, LDT, Q, LDQ, HERE, 1,
+     $                         NBNEXT, WORK, INFO )
+                  IF( INFO.NE.0 ) THEN
+                     ILST = HERE
+                     RETURN
+                  END IF
+                  HERE = HERE + 2
+               ELSE
+*
+*                 2 by 2 Block did split
+*
+                  CALL SLAEXC( WANTQ, N, T, LDT, Q, LDQ, HERE, 1, 1,
+     $                         WORK, INFO )
+                  CALL SLAEXC( WANTQ, N, T, LDT, Q, LDQ, HERE+1, 1, 1,
+     $                         WORK, INFO )
+                  HERE = HERE + 2
+               END IF
+            END IF
+         END IF
+         IF( HERE.LT.ILST )
+     $      GO TO 10
+*
+      ELSE
+*
+         HERE = IFST
+   20    CONTINUE
+*
+*        Swap block with next one above
+*
+         IF( NBF.EQ.1 .OR. NBF.EQ.2 ) THEN
+*
+*           Current block either 1 by 1 or 2 by 2
+*
+            NBNEXT = 1
+            IF( HERE.GE.3 ) THEN
+               IF( T( HERE-1, HERE-2 ).NE.ZERO )
+     $            NBNEXT = 2
+            END IF
+            CALL SLAEXC( WANTQ, N, T, LDT, Q, LDQ, HERE-NBNEXT, NBNEXT,
+     $                   NBF, WORK, INFO )
+            IF( INFO.NE.0 ) THEN
+               ILST = HERE
+               RETURN
+            END IF
+            HERE = HERE - NBNEXT
+*
+*           Test if 2 by 2 block breaks into two 1 by 1 blocks
+*
+            IF( NBF.EQ.2 ) THEN
+               IF( T( HERE+1, HERE ).EQ.ZERO )
+     $            NBF = 3
+            END IF
+*
+         ELSE
+*
+*           Current block consists of two 1 by 1 blocks each of which
+*           must be swapped individually
+*
+            NBNEXT = 1
+            IF( HERE.GE.3 ) THEN
+               IF( T( HERE-1, HERE-2 ).NE.ZERO )
+     $            NBNEXT = 2
+            END IF
+            CALL SLAEXC( WANTQ, N, T, LDT, Q, LDQ, HERE-NBNEXT, NBNEXT,
+     $                   1, WORK, INFO )
+            IF( INFO.NE.0 ) THEN
+               ILST = HERE
+               RETURN
+            END IF
+            IF( NBNEXT.EQ.1 ) THEN
+*
+*              Swap two 1 by 1 blocks, no problems possible
+*
+               CALL SLAEXC( WANTQ, N, T, LDT, Q, LDQ, HERE, NBNEXT, 1,
+     $                      WORK, INFO )
+               HERE = HERE - 1
+            ELSE
+*
+*              Recompute NBNEXT in case 2 by 2 split
+*
+               IF( T( HERE, HERE-1 ).EQ.ZERO )
+     $            NBNEXT = 1
+               IF( NBNEXT.EQ.2 ) THEN
+*
+*                 2 by 2 Block did not split
+*
+                  CALL SLAEXC( WANTQ, N, T, LDT, Q, LDQ, HERE-1, 2, 1,
+     $                         WORK, INFO )
+                  IF( INFO.NE.0 ) THEN
+                     ILST = HERE
+                     RETURN
+                  END IF
+                  HERE = HERE - 2
+               ELSE
+*
+*                 2 by 2 Block did split
+*
+                  CALL SLAEXC( WANTQ, N, T, LDT, Q, LDQ, HERE, 1, 1,
+     $                         WORK, INFO )
+                  CALL SLAEXC( WANTQ, N, T, LDT, Q, LDQ, HERE-1, 1, 1,
+     $                         WORK, INFO )
+                  HERE = HERE - 2
+               END IF
+            END IF
+         END IF
+         IF( HERE.GT.ILST )
+     $      GO TO 20
+      END IF
+      ILST = HERE
+*
+      RETURN
+*
+*     End of STREXC
+*
+      END
diff --git a/costa/native/external/lapack/strrfs.f b/costa/native/external/lapack/strrfs.f
new file mode 100644
index 000000000..528a3c348
--- /dev/null
+++ b/costa/native/external/lapack/strrfs.f
@@ -0,0 +1,371 @@
+      SUBROUTINE STRRFS( UPLO, TRANS, DIAG, N, NRHS, A, LDA, B, LDB, X,
+     $                   LDX, FERR, BERR, WORK, IWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIAG, TRANS, UPLO
+      INTEGER            INFO, LDA, LDB, LDX, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      REAL               A( LDA, * ), B( LDB, * ), BERR( * ), FERR( * ),
+     $                   WORK( * ), X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  STRRFS provides error bounds and backward error estimates for the
+*  solution to a system of linear equations with a triangular
+*  coefficient matrix.
+*
+*  The solution matrix X must be computed by STRTRS or some other
+*  means before entering this routine.  STRRFS does not do iterative
+*  refinement because doing so cannot improve the backward error.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  A is upper triangular;
+*          = 'L':  A is lower triangular.
+*
+*  TRANS   (input) CHARACTER*1
+*          Specifies the form of the system of equations:
+*          = 'N':  A * X = B  (No transpose)
+*          = 'T':  A**T * X = B  (Transpose)
+*          = 'C':  A**H * X = B  (Conjugate transpose = Transpose)
+*
+*  DIAG    (input) CHARACTER*1
+*          = 'N':  A is non-unit triangular;
+*          = 'U':  A is unit triangular.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrices B and X.  NRHS >= 0.
+*
+*  A       (input) REAL array, dimension (LDA,N)
+*          The triangular matrix A.  If UPLO = 'U', the leading N-by-N
+*          upper triangular part of the array A contains the upper
+*          triangular matrix, and the strictly lower triangular part of
+*          A is not referenced.  If UPLO = 'L', the leading N-by-N lower
+*          triangular part of the array A contains the lower triangular
+*          matrix, and the strictly upper triangular part of A is not
+*          referenced.  If DIAG = 'U', the diagonal elements of A are
+*          also not referenced and are assumed to be 1.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  B       (input) REAL array, dimension (LDB,NRHS)
+*          The right hand side matrix B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  X       (input) REAL array, dimension (LDX,NRHS)
+*          The solution matrix X.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(1,N).
+*
+*  FERR    (output) REAL array, dimension (NRHS)
+*          The estimated forward error bound for each solution vector
+*          X(j) (the j-th column of the solution matrix X).
+*          If XTRUE is the true solution corresponding to X(j), FERR(j)
+*          is an estimated upper bound for the magnitude of the largest
+*          element in (X(j) - XTRUE) divided by the magnitude of the
+*          largest element in X(j).  The estimate is as reliable as
+*          the estimate for RCOND, and is almost always a slight
+*          overestimate of the true error.
+*
+*  BERR    (output) REAL array, dimension (NRHS)
+*          The componentwise relative backward error of each solution
+*          vector X(j) (i.e., the smallest relative change in
+*          any element of A or B that makes X(j) an exact solution).
+*
+*  WORK    (workspace) REAL array, dimension (3*N)
+*
+*  IWORK   (workspace) INTEGER array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO
+      PARAMETER          ( ZERO = 0.0E+0 )
+      REAL               ONE
+      PARAMETER          ( ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOTRAN, NOUNIT, UPPER
+      CHARACTER          TRANST
+      INTEGER            I, J, K, KASE, NZ
+      REAL               EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SAXPY, SCOPY, SLACON, STRMV, STRSV, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               SLAMCH
+      EXTERNAL           LSAME, SLAMCH
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      NOTRAN = LSAME( TRANS, 'N' )
+      NOUNIT = LSAME( DIAG, 'N' )
+*
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT.
+     $         LSAME( TRANS, 'C' ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -9
+      ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+         INFO = -11
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'STRRFS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN
+         DO 10 J = 1, NRHS
+            FERR( J ) = ZERO
+            BERR( J ) = ZERO
+   10    CONTINUE
+         RETURN
+      END IF
+*
+      IF( NOTRAN ) THEN
+         TRANST = 'T'
+      ELSE
+         TRANST = 'N'
+      END IF
+*
+*     NZ = maximum number of nonzero elements in each row of A, plus 1
+*
+      NZ = N + 1
+      EPS = SLAMCH( 'Epsilon' )
+      SAFMIN = SLAMCH( 'Safe minimum' )
+      SAFE1 = NZ*SAFMIN
+      SAFE2 = SAFE1 / EPS
+*
+*     Do for each right hand side
+*
+      DO 250 J = 1, NRHS
+*
+*        Compute residual R = B - op(A) * X,
+*        where op(A) = A or A', depending on TRANS.
+*
+         CALL SCOPY( N, X( 1, J ), 1, WORK( N+1 ), 1 )
+         CALL STRMV( UPLO, TRANS, DIAG, N, A, LDA, WORK( N+1 ), 1 )
+         CALL SAXPY( N, -ONE, B( 1, J ), 1, WORK( N+1 ), 1 )
+*
+*        Compute componentwise relative backward error from formula
+*
+*        max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) )
+*
+*        where abs(Z) is the componentwise absolute value of the matrix
+*        or vector Z.  If the i-th component of the denominator is less
+*        than SAFE2, then SAFE1 is added to the i-th components of the
+*        numerator and denominator before dividing.
+*
+         DO 20 I = 1, N
+            WORK( I ) = ABS( B( I, J ) )
+   20    CONTINUE
+*
+         IF( NOTRAN ) THEN
+*
+*           Compute abs(A)*abs(X) + abs(B).
+*
+            IF( UPPER ) THEN
+               IF( NOUNIT ) THEN
+                  DO 40 K = 1, N
+                     XK = ABS( X( K, J ) )
+                     DO 30 I = 1, K
+                        WORK( I ) = WORK( I ) + ABS( A( I, K ) )*XK
+   30                CONTINUE
+   40             CONTINUE
+               ELSE
+                  DO 60 K = 1, N
+                     XK = ABS( X( K, J ) )
+                     DO 50 I = 1, K - 1
+                        WORK( I ) = WORK( I ) + ABS( A( I, K ) )*XK
+   50                CONTINUE
+                     WORK( K ) = WORK( K ) + XK
+   60             CONTINUE
+               END IF
+            ELSE
+               IF( NOUNIT ) THEN
+                  DO 80 K = 1, N
+                     XK = ABS( X( K, J ) )
+                     DO 70 I = K, N
+                        WORK( I ) = WORK( I ) + ABS( A( I, K ) )*XK
+   70                CONTINUE
+   80             CONTINUE
+               ELSE
+                  DO 100 K = 1, N
+                     XK = ABS( X( K, J ) )
+                     DO 90 I = K + 1, N
+                        WORK( I ) = WORK( I ) + ABS( A( I, K ) )*XK
+   90                CONTINUE
+                     WORK( K ) = WORK( K ) + XK
+  100             CONTINUE
+               END IF
+            END IF
+         ELSE
+*
+*           Compute abs(A')*abs(X) + abs(B).
+*
+            IF( UPPER ) THEN
+               IF( NOUNIT ) THEN
+                  DO 120 K = 1, N
+                     S = ZERO
+                     DO 110 I = 1, K
+                        S = S + ABS( A( I, K ) )*ABS( X( I, J ) )
+  110                CONTINUE
+                     WORK( K ) = WORK( K ) + S
+  120             CONTINUE
+               ELSE
+                  DO 140 K = 1, N
+                     S = ABS( X( K, J ) )
+                     DO 130 I = 1, K - 1
+                        S = S + ABS( A( I, K ) )*ABS( X( I, J ) )
+  130                CONTINUE
+                     WORK( K ) = WORK( K ) + S
+  140             CONTINUE
+               END IF
+            ELSE
+               IF( NOUNIT ) THEN
+                  DO 160 K = 1, N
+                     S = ZERO
+                     DO 150 I = K, N
+                        S = S + ABS( A( I, K ) )*ABS( X( I, J ) )
+  150                CONTINUE
+                     WORK( K ) = WORK( K ) + S
+  160             CONTINUE
+               ELSE
+                  DO 180 K = 1, N
+                     S = ABS( X( K, J ) )
+                     DO 170 I = K + 1, N
+                        S = S + ABS( A( I, K ) )*ABS( X( I, J ) )
+  170                CONTINUE
+                     WORK( K ) = WORK( K ) + S
+  180             CONTINUE
+               END IF
+            END IF
+         END IF
+         S = ZERO
+         DO 190 I = 1, N
+            IF( WORK( I ).GT.SAFE2 ) THEN
+               S = MAX( S, ABS( WORK( N+I ) ) / WORK( I ) )
+            ELSE
+               S = MAX( S, ( ABS( WORK( N+I ) )+SAFE1 ) /
+     $             ( WORK( I )+SAFE1 ) )
+            END IF
+  190    CONTINUE
+         BERR( J ) = S
+*
+*        Bound error from formula
+*
+*        norm(X - XTRUE) / norm(X) .le. FERR =
+*        norm( abs(inv(op(A)))*
+*           ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X)
+*
+*        where
+*          norm(Z) is the magnitude of the largest component of Z
+*          inv(op(A)) is the inverse of op(A)
+*          abs(Z) is the componentwise absolute value of the matrix or
+*             vector Z
+*          NZ is the maximum number of nonzeros in any row of A, plus 1
+*          EPS is machine epsilon
+*
+*        The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B))
+*        is incremented by SAFE1 if the i-th component of
+*        abs(op(A))*abs(X) + abs(B) is less than SAFE2.
+*
+*        Use SLACON to estimate the infinity-norm of the matrix
+*           inv(op(A)) * diag(W),
+*        where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) )))
+*
+         DO 200 I = 1, N
+            IF( WORK( I ).GT.SAFE2 ) THEN
+               WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I )
+            ELSE
+               WORK( I ) = ABS( WORK( N+I ) ) + NZ*EPS*WORK( I ) + SAFE1
+            END IF
+  200    CONTINUE
+*
+         KASE = 0
+  210    CONTINUE
+         CALL SLACON( N, WORK( 2*N+1 ), WORK( N+1 ), IWORK, FERR( J ),
+     $                KASE )
+         IF( KASE.NE.0 ) THEN
+            IF( KASE.EQ.1 ) THEN
+*
+*              Multiply by diag(W)*inv(op(A)').
+*
+               CALL STRSV( UPLO, TRANST, DIAG, N, A, LDA, WORK( N+1 ),
+     $                     1 )
+               DO 220 I = 1, N
+                  WORK( N+I ) = WORK( I )*WORK( N+I )
+  220          CONTINUE
+            ELSE
+*
+*              Multiply by inv(op(A))*diag(W).
+*
+               DO 230 I = 1, N
+                  WORK( N+I ) = WORK( I )*WORK( N+I )
+  230          CONTINUE
+               CALL STRSV( UPLO, TRANS, DIAG, N, A, LDA, WORK( N+1 ),
+     $                     1 )
+            END IF
+            GO TO 210
+         END IF
+*
+*        Normalize error.
+*
+         LSTRES = ZERO
+         DO 240 I = 1, N
+            LSTRES = MAX( LSTRES, ABS( X( I, J ) ) )
+  240    CONTINUE
+         IF( LSTRES.NE.ZERO )
+     $      FERR( J ) = FERR( J ) / LSTRES
+*
+  250 CONTINUE
+*
+      RETURN
+*
+*     End of STRRFS
+*
+      END
diff --git a/costa/native/external/lapack/strsen.f b/costa/native/external/lapack/strsen.f
new file mode 100644
index 000000000..e56557944
--- /dev/null
+++ b/costa/native/external/lapack/strsen.f
@@ -0,0 +1,457 @@
+      SUBROUTINE STRSEN( JOB, COMPQ, SELECT, N, T, LDT, Q, LDQ, WR, WI,
+     $                   M, S, SEP, WORK, LWORK, IWORK, LIWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          COMPQ, JOB
+      INTEGER            INFO, LDQ, LDT, LIWORK, LWORK, M, N
+      REAL               S, SEP
+*     ..
+*     .. Array Arguments ..
+      LOGICAL            SELECT( * )
+      INTEGER            IWORK( * )
+      REAL               Q( LDQ, * ), T( LDT, * ), WI( * ), WORK( * ),
+     $                   WR( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  STRSEN reorders the real Schur factorization of a real matrix
+*  A = Q*T*Q**T, so that a selected cluster of eigenvalues appears in
+*  the leading diagonal blocks of the upper quasi-triangular matrix T,
+*  and the leading columns of Q form an orthonormal basis of the
+*  corresponding right invariant subspace.
+*
+*  Optionally the routine computes the reciprocal condition numbers of
+*  the cluster of eigenvalues and/or the invariant subspace.
+*
+*  T must be in Schur canonical form (as returned by SHSEQR), that is,
+*  block upper triangular with 1-by-1 and 2-by-2 diagonal blocks; each
+*  2-by-2 diagonal block has its diagonal elemnts equal and its
+*  off-diagonal elements of opposite sign.
+*
+*  Arguments
+*  =========
+*
+*  JOB     (input) CHARACTER*1
+*          Specifies whether condition numbers are required for the
+*          cluster of eigenvalues (S) or the invariant subspace (SEP):
+*          = 'N': none;
+*          = 'E': for eigenvalues only (S);
+*          = 'V': for invariant subspace only (SEP);
+*          = 'B': for both eigenvalues and invariant subspace (S and
+*                 SEP).
+*
+*  COMPQ   (input) CHARACTER*1
+*          = 'V': update the matrix Q of Schur vectors;
+*          = 'N': do not update Q.
+*
+*  SELECT  (input) LOGICAL array, dimension (N)
+*          SELECT specifies the eigenvalues in the selected cluster. To
+*          select a real eigenvalue w(j), SELECT(j) must be set to
+*          .TRUE.. To select a complex conjugate pair of eigenvalues
+*          w(j) and w(j+1), corresponding to a 2-by-2 diagonal block,
+*          either SELECT(j) or SELECT(j+1) or both must be set to
+*          .TRUE.; a complex conjugate pair of eigenvalues must be
+*          either both included in the cluster or both excluded.
+*
+*  N       (input) INTEGER
+*          The order of the matrix T. N >= 0.
+*
+*  T       (input/output) REAL array, dimension (LDT,N)
+*          On entry, the upper quasi-triangular matrix T, in Schur
+*          canonical form.
+*          On exit, T is overwritten by the reordered matrix T, again in
+*          Schur canonical form, with the selected eigenvalues in the
+*          leading diagonal blocks.
+*
+*  LDT     (input) INTEGER
+*          The leading dimension of the array T. LDT >= max(1,N).
+*
+*  Q       (input/output) REAL array, dimension (LDQ,N)
+*          On entry, if COMPQ = 'V', the matrix Q of Schur vectors.
+*          On exit, if COMPQ = 'V', Q has been postmultiplied by the
+*          orthogonal transformation matrix which reorders T; the
+*          leading M columns of Q form an orthonormal basis for the
+*          specified invariant subspace.
+*          If COMPQ = 'N', Q is not referenced.
+*
+*  LDQ     (input) INTEGER
+*          The leading dimension of the array Q.
+*          LDQ >= 1; and if COMPQ = 'V', LDQ >= N.
+*
+*  WR      (output) REAL array, dimension (N)
+*  WI      (output) REAL array, dimension (N)
+*          The real and imaginary parts, respectively, of the reordered
+*          eigenvalues of T. The eigenvalues are stored in the same
+*          order as on the diagonal of T, with WR(i) = T(i,i) and, if
+*          T(i:i+1,i:i+1) is a 2-by-2 diagonal block, WI(i) > 0 and
+*          WI(i+1) = -WI(i). Note that if a complex eigenvalue is
+*          sufficiently ill-conditioned, then its value may differ
+*          significantly from its value before reordering.
+*
+*  M       (output) INTEGER
+*          The dimension of the specified invariant subspace.
+*          0 < = M <= N.
+*
+*  S       (output) REAL
+*          If JOB = 'E' or 'B', S is a lower bound on the reciprocal
+*          condition number for the selected cluster of eigenvalues.
+*          S cannot underestimate the true reciprocal condition number
+*          by more than a factor of sqrt(N). If M = 0 or N, S = 1.
+*          If JOB = 'N' or 'V', S is not referenced.
+*
+*  SEP     (output) REAL
+*          If JOB = 'V' or 'B', SEP is the estimated reciprocal
+*          condition number of the specified invariant subspace. If
+*          M = 0 or N, SEP = norm(T).
+*          If JOB = 'N' or 'E', SEP is not referenced.
+*
+*  WORK    (workspace/output) REAL array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.
+*          If JOB = 'N', LWORK >= max(1,N);
+*          if JOB = 'E', LWORK >= M*(N-M);
+*          if JOB = 'V' or 'B', LWORK >= 2*M*(N-M).
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  IWORK   (workspace) INTEGER array, dimension (LIWORK)
+*          IF JOB = 'N' or 'E', IWORK is not referenced.
+*
+*  LIWORK  (input) INTEGER
+*          The dimension of the array IWORK.
+*          If JOB = 'N' or 'E', LIWORK >= 1;
+*          if JOB = 'V' or 'B', LIWORK >= M*(N-M).
+*
+*          If LIWORK = -1, then a workspace query is assumed; the
+*          routine only calculates the optimal size of the IWORK array,
+*          returns this value as the first entry of the IWORK array, and
+*          no error message related to LIWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*          = 1: reordering of T failed because some eigenvalues are too
+*               close to separate (the problem is very ill-conditioned);
+*               T may have been partially reordered, and WR and WI
+*               contain the eigenvalues in the same order as in T; S and
+*               SEP (if requested) are set to zero.
+*
+*  Further Details
+*  ===============
+*
+*  STRSEN first collects the selected eigenvalues by computing an
+*  orthogonal transformation Z to move them to the top left corner of T.
+*  In other words, the selected eigenvalues are the eigenvalues of T11
+*  in:
+*
+*                Z'*T*Z = ( T11 T12 ) n1
+*                         (  0  T22 ) n2
+*                            n1  n2
+*
+*  where N = n1+n2 and Z' means the transpose of Z. The first n1 columns
+*  of Z span the specified invariant subspace of T.
+*
+*  If T has been obtained from the real Schur factorization of a matrix
+*  A = Q*T*Q', then the reordered real Schur factorization of A is given
+*  by A = (Q*Z)*(Z'*T*Z)*(Q*Z)', and the first n1 columns of Q*Z span
+*  the corresponding invariant subspace of A.
+*
+*  The reciprocal condition number of the average of the eigenvalues of
+*  T11 may be returned in S. S lies between 0 (very badly conditioned)
+*  and 1 (very well conditioned). It is computed as follows. First we
+*  compute R so that
+*
+*                         P = ( I  R ) n1
+*                             ( 0  0 ) n2
+*                               n1 n2
+*
+*  is the projector on the invariant subspace associated with T11.
+*  R is the solution of the Sylvester equation:
+*
+*                        T11*R - R*T22 = T12.
+*
+*  Let F-norm(M) denote the Frobenius-norm of M and 2-norm(M) denote
+*  the two-norm of M. Then S is computed as the lower bound
+*
+*                      (1 + F-norm(R)**2)**(-1/2)
+*
+*  on the reciprocal of 2-norm(P), the true reciprocal condition number.
+*  S cannot underestimate 1 / 2-norm(P) by more than a factor of
+*  sqrt(N).
+*
+*  An approximate error bound for the computed average of the
+*  eigenvalues of T11 is
+*
+*                         EPS * norm(T) / S
+*
+*  where EPS is the machine precision.
+*
+*  The reciprocal condition number of the right invariant subspace
+*  spanned by the first n1 columns of Z (or of Q*Z) is returned in SEP.
+*  SEP is defined as the separation of T11 and T22:
+*
+*                     sep( T11, T22 ) = sigma-min( C )
+*
+*  where sigma-min(C) is the smallest singular value of the
+*  n1*n2-by-n1*n2 matrix
+*
+*     C  = kprod( I(n2), T11 ) - kprod( transpose(T22), I(n1) )
+*
+*  I(m) is an m by m identity matrix, and kprod denotes the Kronecker
+*  product. We estimate sigma-min(C) by the reciprocal of an estimate of
+*  the 1-norm of inverse(C). The true reciprocal 1-norm of inverse(C)
+*  cannot differ from sigma-min(C) by more than a factor of sqrt(n1*n2).
+*
+*  When SEP is small, small changes in T can cause large changes in
+*  the invariant subspace. An approximate bound on the maximum angular
+*  error in the computed right invariant subspace is
+*
+*                      EPS * norm(T) / SEP
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY, PAIR, SWAP, WANTBH, WANTQ, WANTS,
+     $                    WANTSP
+      INTEGER            IERR, K, KASE, KK, KS, LIWMIN, LWMIN, N1, N2,
+     $                   NN
+      REAL               EST, RNORM, SCALE
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               SLANGE
+      EXTERNAL           LSAME, SLANGE
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLACON, SLACPY, STREXC, STRSYL, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode and test the input parameters
+*
+      WANTBH = LSAME( JOB, 'B' )
+      WANTS = LSAME( JOB, 'E' ) .OR. WANTBH
+      WANTSP = LSAME( JOB, 'V' ) .OR. WANTBH
+      WANTQ = LSAME( COMPQ, 'V' )
+*
+      INFO = 0
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( .NOT.LSAME( JOB, 'N' ) .AND. .NOT.WANTS .AND. .NOT.WANTSP )
+     $     THEN
+         INFO = -1
+      ELSE IF( .NOT.LSAME( COMPQ, 'N' ) .AND. .NOT.WANTQ ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDT.LT.MAX( 1, N ) ) THEN
+         INFO = -6
+      ELSE IF( LDQ.LT.1 .OR. ( WANTQ .AND. LDQ.LT.N ) ) THEN
+         INFO = -8
+      ELSE
+*
+*        Set M to the dimension of the specified invariant subspace,
+*        and test LWORK and LIWORK.
+*
+         M = 0
+         PAIR = .FALSE.
+         DO 10 K = 1, N
+            IF( PAIR ) THEN
+               PAIR = .FALSE.
+            ELSE
+               IF( K.LT.N ) THEN
+                  IF( T( K+1, K ).EQ.ZERO ) THEN
+                     IF( SELECT( K ) )
+     $                  M = M + 1
+                  ELSE
+                     PAIR = .TRUE.
+                     IF( SELECT( K ) .OR. SELECT( K+1 ) )
+     $                  M = M + 2
+                  END IF
+               ELSE
+                  IF( SELECT( N ) )
+     $               M = M + 1
+               END IF
+            END IF
+   10    CONTINUE
+*
+         N1 = M
+         N2 = N - M
+         NN = N1*N2
+*
+         IF(  WANTSP ) THEN
+            LWMIN = MAX( 1, 2*NN )
+            LIWMIN = MAX( 1, NN )
+         ELSE IF( LSAME( JOB, 'N' ) ) THEN
+            LWMIN = MAX( 1, N )
+            LIWMIN = 1
+         ELSE IF( LSAME( JOB, 'E' ) ) THEN
+            LWMIN = MAX( 1, NN )
+            LIWMIN = 1
+         END IF
+*
+         IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN
+            INFO = -15
+         ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN
+            INFO = -17
+         END IF
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         WORK( 1 ) = LWMIN
+         IWORK( 1 ) = LIWMIN
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'STRSEN', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( M.EQ.N .OR. M.EQ.0 ) THEN
+         IF( WANTS )
+     $      S = ONE
+         IF( WANTSP )
+     $      SEP = SLANGE( '1', N, N, T, LDT, WORK )
+         GO TO 40
+      END IF
+*
+*     Collect the selected blocks at the top-left corner of T.
+*
+      KS = 0
+      PAIR = .FALSE.
+      DO 20 K = 1, N
+         IF( PAIR ) THEN
+            PAIR = .FALSE.
+         ELSE
+            SWAP = SELECT( K )
+            IF( K.LT.N ) THEN
+               IF( T( K+1, K ).NE.ZERO ) THEN
+                  PAIR = .TRUE.
+                  SWAP = SWAP .OR. SELECT( K+1 )
+               END IF
+            END IF
+            IF( SWAP ) THEN
+               KS = KS + 1
+*
+*              Swap the K-th block to position KS.
+*
+               IERR = 0
+               KK = K
+               IF( K.NE.KS )
+     $            CALL STREXC( COMPQ, N, T, LDT, Q, LDQ, KK, KS, WORK,
+     $                         IERR )
+               IF( IERR.EQ.1 .OR. IERR.EQ.2 ) THEN
+*
+*                 Blocks too close to swap: exit.
+*
+                  INFO = 1
+                  IF( WANTS )
+     $               S = ZERO
+                  IF( WANTSP )
+     $               SEP = ZERO
+                  GO TO 40
+               END IF
+               IF( PAIR )
+     $            KS = KS + 1
+            END IF
+         END IF
+   20 CONTINUE
+*
+      IF( WANTS ) THEN
+*
+*        Solve Sylvester equation for R:
+*
+*           T11*R - R*T22 = scale*T12
+*
+         CALL SLACPY( 'F', N1, N2, T( 1, N1+1 ), LDT, WORK, N1 )
+         CALL STRSYL( 'N', 'N', -1, N1, N2, T, LDT, T( N1+1, N1+1 ),
+     $                LDT, WORK, N1, SCALE, IERR )
+*
+*        Estimate the reciprocal of the condition number of the cluster
+*        of eigenvalues.
+*
+         RNORM = SLANGE( 'F', N1, N2, WORK, N1, WORK )
+         IF( RNORM.EQ.ZERO ) THEN
+            S = ONE
+         ELSE
+            S = SCALE / ( SQRT( SCALE*SCALE / RNORM+RNORM )*
+     $          SQRT( RNORM ) )
+         END IF
+      END IF
+*
+      IF( WANTSP ) THEN
+*
+*        Estimate sep(T11,T22).
+*
+         EST = ZERO
+         KASE = 0
+   30    CONTINUE
+         CALL SLACON( NN, WORK( NN+1 ), WORK, IWORK, EST, KASE )
+         IF( KASE.NE.0 ) THEN
+            IF( KASE.EQ.1 ) THEN
+*
+*              Solve  T11*R - R*T22 = scale*X.
+*
+               CALL STRSYL( 'N', 'N', -1, N1, N2, T, LDT,
+     $                      T( N1+1, N1+1 ), LDT, WORK, N1, SCALE,
+     $                      IERR )
+            ELSE
+*
+*              Solve  T11'*R - R*T22' = scale*X.
+*
+               CALL STRSYL( 'T', 'T', -1, N1, N2, T, LDT,
+     $                      T( N1+1, N1+1 ), LDT, WORK, N1, SCALE,
+     $                      IERR )
+            END IF
+            GO TO 30
+         END IF
+*
+         SEP = SCALE / EST
+      END IF
+*
+   40 CONTINUE
+*
+*     Store the output eigenvalues in WR and WI.
+*
+      DO 50 K = 1, N
+         WR( K ) = T( K, K )
+         WI( K ) = ZERO
+   50 CONTINUE
+      DO 60 K = 1, N - 1
+         IF( T( K+1, K ).NE.ZERO ) THEN
+            WI( K ) = SQRT( ABS( T( K, K+1 ) ) )*
+     $                SQRT( ABS( T( K+1, K ) ) )
+            WI( K+1 ) = -WI( K )
+         END IF
+   60 CONTINUE
+*
+      WORK( 1 ) = LWMIN
+      IWORK( 1 ) = LIWMIN
+*
+      RETURN
+*
+*     End of STRSEN
+*
+      END
diff --git a/costa/native/external/lapack/strsna.f b/costa/native/external/lapack/strsna.f
new file mode 100644
index 000000000..f9d801796
--- /dev/null
+++ b/costa/native/external/lapack/strsna.f
@@ -0,0 +1,493 @@
+      SUBROUTINE STRSNA( JOB, HOWMNY, SELECT, N, T, LDT, VL, LDVL, VR,
+     $                   LDVR, S, SEP, MM, M, WORK, LDWORK, IWORK,
+     $                   INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          HOWMNY, JOB
+      INTEGER            INFO, LDT, LDVL, LDVR, LDWORK, M, MM, N
+*     ..
+*     .. Array Arguments ..
+      LOGICAL            SELECT( * )
+      INTEGER            IWORK( * )
+      REAL               S( * ), SEP( * ), T( LDT, * ), VL( LDVL, * ),
+     $                   VR( LDVR, * ), WORK( LDWORK, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  STRSNA estimates reciprocal condition numbers for specified
+*  eigenvalues and/or right eigenvectors of a real upper
+*  quasi-triangular matrix T (or of any matrix Q*T*Q**T with Q
+*  orthogonal).
+*
+*  T must be in Schur canonical form (as returned by SHSEQR), that is,
+*  block upper triangular with 1-by-1 and 2-by-2 diagonal blocks; each
+*  2-by-2 diagonal block has its diagonal elements equal and its
+*  off-diagonal elements of opposite sign.
+*
+*  Arguments
+*  =========
+*
+*  JOB     (input) CHARACTER*1
+*          Specifies whether condition numbers are required for
+*          eigenvalues (S) or eigenvectors (SEP):
+*          = 'E': for eigenvalues only (S);
+*          = 'V': for eigenvectors only (SEP);
+*          = 'B': for both eigenvalues and eigenvectors (S and SEP).
+*
+*  HOWMNY  (input) CHARACTER*1
+*          = 'A': compute condition numbers for all eigenpairs;
+*          = 'S': compute condition numbers for selected eigenpairs
+*                 specified by the array SELECT.
+*
+*  SELECT  (input) LOGICAL array, dimension (N)
+*          If HOWMNY = 'S', SELECT specifies the eigenpairs for which
+*          condition numbers are required. To select condition numbers
+*          for the eigenpair corresponding to a real eigenvalue w(j),
+*          SELECT(j) must be set to .TRUE.. To select condition numbers
+*          corresponding to a complex conjugate pair of eigenvalues w(j)
+*          and w(j+1), either SELECT(j) or SELECT(j+1) or both, must be
+*          set to .TRUE..
+*          If HOWMNY = 'A', SELECT is not referenced.
+*
+*  N       (input) INTEGER
+*          The order of the matrix T. N >= 0.
+*
+*  T       (input) REAL array, dimension (LDT,N)
+*          The upper quasi-triangular matrix T, in Schur canonical form.
+*
+*  LDT     (input) INTEGER
+*          The leading dimension of the array T. LDT >= max(1,N).
+*
+*  VL      (input) REAL array, dimension (LDVL,M)
+*          If JOB = 'E' or 'B', VL must contain left eigenvectors of T
+*          (or of any Q*T*Q**T with Q orthogonal), corresponding to the
+*          eigenpairs specified by HOWMNY and SELECT. The eigenvectors
+*          must be stored in consecutive columns of VL, as returned by
+*          SHSEIN or STREVC.
+*          If JOB = 'V', VL is not referenced.
+*
+*  LDVL    (input) INTEGER
+*          The leading dimension of the array VL.
+*          LDVL >= 1; and if JOB = 'E' or 'B', LDVL >= N.
+*
+*  VR      (input) REAL array, dimension (LDVR,M)
+*          If JOB = 'E' or 'B', VR must contain right eigenvectors of T
+*          (or of any Q*T*Q**T with Q orthogonal), corresponding to the
+*          eigenpairs specified by HOWMNY and SELECT. The eigenvectors
+*          must be stored in consecutive columns of VR, as returned by
+*          SHSEIN or STREVC.
+*          If JOB = 'V', VR is not referenced.
+*
+*  LDVR    (input) INTEGER
+*          The leading dimension of the array VR.
+*          LDVR >= 1; and if JOB = 'E' or 'B', LDVR >= N.
+*
+*  S       (output) REAL array, dimension (MM)
+*          If JOB = 'E' or 'B', the reciprocal condition numbers of the
+*          selected eigenvalues, stored in consecutive elements of the
+*          array. For a complex conjugate pair of eigenvalues two
+*          consecutive elements of S are set to the same value. Thus
+*          S(j), SEP(j), and the j-th columns of VL and VR all
+*          correspond to the same eigenpair (but not in general the
+*          j-th eigenpair, unless all eigenpairs are selected).
+*          If JOB = 'V', S is not referenced.
+*
+*  SEP     (output) REAL array, dimension (MM)
+*          If JOB = 'V' or 'B', the estimated reciprocal condition
+*          numbers of the selected eigenvectors, stored in consecutive
+*          elements of the array. For a complex eigenvector two
+*          consecutive elements of SEP are set to the same value. If
+*          the eigenvalues cannot be reordered to compute SEP(j), SEP(j)
+*          is set to 0; this can only occur when the true value would be
+*          very small anyway.
+*          If JOB = 'E', SEP is not referenced.
+*
+*  MM      (input) INTEGER
+*          The number of elements in the arrays S (if JOB = 'E' or 'B')
+*           and/or SEP (if JOB = 'V' or 'B'). MM >= M.
+*
+*  M       (output) INTEGER
+*          The number of elements of the arrays S and/or SEP actually
+*          used to store the estimated condition numbers.
+*          If HOWMNY = 'A', M is set to N.
+*
+*  WORK    (workspace) REAL array, dimension (LDWORK,N+1)
+*          If JOB = 'E', WORK is not referenced.
+*
+*  LDWORK  (input) INTEGER
+*          The leading dimension of the array WORK.
+*          LDWORK >= 1; and if JOB = 'V' or 'B', LDWORK >= N.
+*
+*  IWORK   (workspace) INTEGER array, dimension (N)
+*          If JOB = 'E', IWORK is not referenced.
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*
+*  Further Details
+*  ===============
+*
+*  The reciprocal of the condition number of an eigenvalue lambda is
+*  defined as
+*
+*          S(lambda) = |v'*u| / (norm(u)*norm(v))
+*
+*  where u and v are the right and left eigenvectors of T corresponding
+*  to lambda; v' denotes the conjugate-transpose of v, and norm(u)
+*  denotes the Euclidean norm. These reciprocal condition numbers always
+*  lie between zero (very badly conditioned) and one (very well
+*  conditioned). If n = 1, S(lambda) is defined to be 1.
+*
+*  An approximate error bound for a computed eigenvalue W(i) is given by
+*
+*                      EPS * norm(T) / S(i)
+*
+*  where EPS is the machine precision.
+*
+*  The reciprocal of the condition number of the right eigenvector u
+*  corresponding to lambda is defined as follows. Suppose
+*
+*              T = ( lambda  c  )
+*                  (   0    T22 )
+*
+*  Then the reciprocal condition number is
+*
+*          SEP( lambda, T22 ) = sigma-min( T22 - lambda*I )
+*
+*  where sigma-min denotes the smallest singular value. We approximate
+*  the smallest singular value by the reciprocal of an estimate of the
+*  one-norm of the inverse of T22 - lambda*I. If n = 1, SEP(1) is
+*  defined to be abs(T(1,1)).
+*
+*  An approximate error bound for a computed right eigenvector VR(i)
+*  is given by
+*
+*                      EPS * norm(T) / SEP(i)
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE, TWO
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0, TWO = 2.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            PAIR, SOMCON, WANTBH, WANTS, WANTSP
+      INTEGER            I, IERR, IFST, ILST, J, K, KASE, KS, N2, NN
+      REAL               BIGNUM, COND, CS, DELTA, DUMM, EPS, EST, LNRM,
+     $                   MU, PROD, PROD1, PROD2, RNRM, SCALE, SMLNUM, SN
+*     ..
+*     .. Local Arrays ..
+      REAL               DUMMY( 1 )
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               SDOT, SLAMCH, SLAPY2, SNRM2
+      EXTERNAL           LSAME, SDOT, SLAMCH, SLAPY2, SNRM2
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLABAD, SLACON, SLACPY, SLAQTR, STREXC, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode and test the input parameters
+*
+      WANTBH = LSAME( JOB, 'B' )
+      WANTS = LSAME( JOB, 'E' ) .OR. WANTBH
+      WANTSP = LSAME( JOB, 'V' ) .OR. WANTBH
+*
+      SOMCON = LSAME( HOWMNY, 'S' )
+*
+      INFO = 0
+      IF( .NOT.WANTS .AND. .NOT.WANTSP ) THEN
+         INFO = -1
+      ELSE IF( .NOT.LSAME( HOWMNY, 'A' ) .AND. .NOT.SOMCON ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDT.LT.MAX( 1, N ) ) THEN
+         INFO = -6
+      ELSE IF( LDVL.LT.1 .OR. ( WANTS .AND. LDVL.LT.N ) ) THEN
+         INFO = -8
+      ELSE IF( LDVR.LT.1 .OR. ( WANTS .AND. LDVR.LT.N ) ) THEN
+         INFO = -10
+      ELSE
+*
+*        Set M to the number of eigenpairs for which condition numbers
+*        are required, and test MM.
+*
+         IF( SOMCON ) THEN
+            M = 0
+            PAIR = .FALSE.
+            DO 10 K = 1, N
+               IF( PAIR ) THEN
+                  PAIR = .FALSE.
+               ELSE
+                  IF( K.LT.N ) THEN
+                     IF( T( K+1, K ).EQ.ZERO ) THEN
+                        IF( SELECT( K ) )
+     $                     M = M + 1
+                     ELSE
+                        PAIR = .TRUE.
+                        IF( SELECT( K ) .OR. SELECT( K+1 ) )
+     $                     M = M + 2
+                     END IF
+                  ELSE
+                     IF( SELECT( N ) )
+     $                  M = M + 1
+                  END IF
+               END IF
+   10       CONTINUE
+         ELSE
+            M = N
+         END IF
+*
+         IF( MM.LT.M ) THEN
+            INFO = -13
+         ELSE IF( LDWORK.LT.1 .OR. ( WANTSP .AND. LDWORK.LT.N ) ) THEN
+            INFO = -16
+         END IF
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'STRSNA', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      IF( N.EQ.1 ) THEN
+         IF( SOMCON ) THEN
+            IF( .NOT.SELECT( 1 ) )
+     $         RETURN
+         END IF
+         IF( WANTS )
+     $      S( 1 ) = ONE
+         IF( WANTSP )
+     $      SEP( 1 ) = ABS( T( 1, 1 ) )
+         RETURN
+      END IF
+*
+*     Get machine constants
+*
+      EPS = SLAMCH( 'P' )
+      SMLNUM = SLAMCH( 'S' ) / EPS
+      BIGNUM = ONE / SMLNUM
+      CALL SLABAD( SMLNUM, BIGNUM )
+*
+      KS = 0
+      PAIR = .FALSE.
+      DO 60 K = 1, N
+*
+*        Determine whether T(k,k) begins a 1-by-1 or 2-by-2 block.
+*
+         IF( PAIR ) THEN
+            PAIR = .FALSE.
+            GO TO 60
+         ELSE
+            IF( K.LT.N )
+     $         PAIR = T( K+1, K ).NE.ZERO
+         END IF
+*
+*        Determine whether condition numbers are required for the k-th
+*        eigenpair.
+*
+         IF( SOMCON ) THEN
+            IF( PAIR ) THEN
+               IF( .NOT.SELECT( K ) .AND. .NOT.SELECT( K+1 ) )
+     $            GO TO 60
+            ELSE
+               IF( .NOT.SELECT( K ) )
+     $            GO TO 60
+            END IF
+         END IF
+*
+         KS = KS + 1
+*
+         IF( WANTS ) THEN
+*
+*           Compute the reciprocal condition number of the k-th
+*           eigenvalue.
+*
+            IF( .NOT.PAIR ) THEN
+*
+*              Real eigenvalue.
+*
+               PROD = SDOT( N, VR( 1, KS ), 1, VL( 1, KS ), 1 )
+               RNRM = SNRM2( N, VR( 1, KS ), 1 )
+               LNRM = SNRM2( N, VL( 1, KS ), 1 )
+               S( KS ) = ABS( PROD ) / ( RNRM*LNRM )
+            ELSE
+*
+*              Complex eigenvalue.
+*
+               PROD1 = SDOT( N, VR( 1, KS ), 1, VL( 1, KS ), 1 )
+               PROD1 = PROD1 + SDOT( N, VR( 1, KS+1 ), 1, VL( 1, KS+1 ),
+     $                 1 )
+               PROD2 = SDOT( N, VL( 1, KS ), 1, VR( 1, KS+1 ), 1 )
+               PROD2 = PROD2 - SDOT( N, VL( 1, KS+1 ), 1, VR( 1, KS ),
+     $                 1 )
+               RNRM = SLAPY2( SNRM2( N, VR( 1, KS ), 1 ),
+     $                SNRM2( N, VR( 1, KS+1 ), 1 ) )
+               LNRM = SLAPY2( SNRM2( N, VL( 1, KS ), 1 ),
+     $                SNRM2( N, VL( 1, KS+1 ), 1 ) )
+               COND = SLAPY2( PROD1, PROD2 ) / ( RNRM*LNRM )
+               S( KS ) = COND
+               S( KS+1 ) = COND
+            END IF
+         END IF
+*
+         IF( WANTSP ) THEN
+*
+*           Estimate the reciprocal condition number of the k-th
+*           eigenvector.
+*
+*           Copy the matrix T to the array WORK and swap the diagonal
+*           block beginning at T(k,k) to the (1,1) position.
+*
+            CALL SLACPY( 'Full', N, N, T, LDT, WORK, LDWORK )
+            IFST = K
+            ILST = 1
+            CALL STREXC( 'No Q', N, WORK, LDWORK, DUMMY, 1, IFST, ILST,
+     $                   WORK( 1, N+1 ), IERR )
+*
+            IF( IERR.EQ.1 .OR. IERR.EQ.2 ) THEN
+*
+*              Could not swap because blocks not well separated
+*
+               SCALE = ONE
+               EST = BIGNUM
+            ELSE
+*
+*              Reordering successful
+*
+               IF( WORK( 2, 1 ).EQ.ZERO ) THEN
+*
+*                 Form C = T22 - lambda*I in WORK(2:N,2:N).
+*
+                  DO 20 I = 2, N
+                     WORK( I, I ) = WORK( I, I ) - WORK( 1, 1 )
+   20             CONTINUE
+                  N2 = 1
+                  NN = N - 1
+               ELSE
+*
+*                 Triangularize the 2 by 2 block by unitary
+*                 transformation U = [  cs   i*ss ]
+*                                    [ i*ss   cs  ].
+*                 such that the (1,1) position of WORK is complex
+*                 eigenvalue lambda with positive imaginary part. (2,2)
+*                 position of WORK is the complex eigenvalue lambda
+*                 with negative imaginary  part.
+*
+                  MU = SQRT( ABS( WORK( 1, 2 ) ) )*
+     $                 SQRT( ABS( WORK( 2, 1 ) ) )
+                  DELTA = SLAPY2( MU, WORK( 2, 1 ) )
+                  CS = MU / DELTA
+                  SN = -WORK( 2, 1 ) / DELTA
+*
+*                 Form
+*
+*                 C' = WORK(2:N,2:N) + i*[rwork(1) ..... rwork(n-1) ]
+*                                        [   mu                     ]
+*                                        [         ..               ]
+*                                        [             ..           ]
+*                                        [                  mu      ]
+*                 where C' is conjugate transpose of complex matrix C,
+*                 and RWORK is stored starting in the N+1-st column of
+*                 WORK.
+*
+                  DO 30 J = 3, N
+                     WORK( 2, J ) = CS*WORK( 2, J )
+                     WORK( J, J ) = WORK( J, J ) - WORK( 1, 1 )
+   30             CONTINUE
+                  WORK( 2, 2 ) = ZERO
+*
+                  WORK( 1, N+1 ) = TWO*MU
+                  DO 40 I = 2, N - 1
+                     WORK( I, N+1 ) = SN*WORK( 1, I+1 )
+   40             CONTINUE
+                  N2 = 2
+                  NN = 2*( N-1 )
+               END IF
+*
+*              Estimate norm(inv(C'))
+*
+               EST = ZERO
+               KASE = 0
+   50          CONTINUE
+               CALL SLACON( NN, WORK( 1, N+2 ), WORK( 1, N+4 ), IWORK,
+     $                      EST, KASE )
+               IF( KASE.NE.0 ) THEN
+                  IF( KASE.EQ.1 ) THEN
+                     IF( N2.EQ.1 ) THEN
+*
+*                       Real eigenvalue: solve C'*x = scale*c.
+*
+                        CALL SLAQTR( .TRUE., .TRUE., N-1, WORK( 2, 2 ),
+     $                               LDWORK, DUMMY, DUMM, SCALE,
+     $                               WORK( 1, N+4 ), WORK( 1, N+6 ),
+     $                               IERR )
+                     ELSE
+*
+*                       Complex eigenvalue: solve
+*                       C'*(p+iq) = scale*(c+id) in real arithmetic.
+*
+                        CALL SLAQTR( .TRUE., .FALSE., N-1, WORK( 2, 2 ),
+     $                               LDWORK, WORK( 1, N+1 ), MU, SCALE,
+     $                               WORK( 1, N+4 ), WORK( 1, N+6 ),
+     $                               IERR )
+                     END IF
+                  ELSE
+                     IF( N2.EQ.1 ) THEN
+*
+*                       Real eigenvalue: solve C*x = scale*c.
+*
+                        CALL SLAQTR( .FALSE., .TRUE., N-1, WORK( 2, 2 ),
+     $                               LDWORK, DUMMY, DUMM, SCALE,
+     $                               WORK( 1, N+4 ), WORK( 1, N+6 ),
+     $                               IERR )
+                     ELSE
+*
+*                       Complex eigenvalue: solve
+*                       C*(p+iq) = scale*(c+id) in real arithmetic.
+*
+                        CALL SLAQTR( .FALSE., .FALSE., N-1,
+     $                               WORK( 2, 2 ), LDWORK,
+     $                               WORK( 1, N+1 ), MU, SCALE,
+     $                               WORK( 1, N+4 ), WORK( 1, N+6 ),
+     $                               IERR )
+*
+                     END IF
+                  END IF
+*
+                  GO TO 50
+               END IF
+            END IF
+*
+            SEP( KS ) = SCALE / MAX( EST, SMLNUM )
+            IF( PAIR )
+     $         SEP( KS+1 ) = SEP( KS )
+         END IF
+*
+         IF( PAIR )
+     $      KS = KS + 1
+*
+   60 CONTINUE
+      RETURN
+*
+*     End of STRSNA
+*
+      END
diff --git a/costa/native/external/lapack/strsyl.f b/costa/native/external/lapack/strsyl.f
new file mode 100644
index 000000000..5d8e3301f
--- /dev/null
+++ b/costa/native/external/lapack/strsyl.f
@@ -0,0 +1,914 @@
+      SUBROUTINE STRSYL( TRANA, TRANB, ISGN, M, N, A, LDA, B, LDB, C,
+     $                   LDC, SCALE, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993 
+*
+*     .. Scalar Arguments ..
+      CHARACTER          TRANA, TRANB
+      INTEGER            INFO, ISGN, LDA, LDB, LDC, M, N
+      REAL               SCALE
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), B( LDB, * ), C( LDC, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  STRSYL solves the real Sylvester matrix equation:
+*
+*     op(A)*X + X*op(B) = scale*C or
+*     op(A)*X - X*op(B) = scale*C,
+*
+*  where op(A) = A or A**T, and  A and B are both upper quasi-
+*  triangular. A is M-by-M and B is N-by-N; the right hand side C and
+*  the solution X are M-by-N; and scale is an output scale factor, set
+*  <= 1 to avoid overflow in X.
+*
+*  A and B must be in Schur canonical form (as returned by SHSEQR), that
+*  is, block upper triangular with 1-by-1 and 2-by-2 diagonal blocks;
+*  each 2-by-2 diagonal block has its diagonal elements equal and its
+*  off-diagonal elements of opposite sign.
+*
+*  Arguments
+*  =========
+*
+*  TRANA   (input) CHARACTER*1
+*          Specifies the option op(A):
+*          = 'N': op(A) = A    (No transpose)
+*          = 'T': op(A) = A**T (Transpose)
+*          = 'C': op(A) = A**H (Conjugate transpose = Transpose)
+*
+*  TRANB   (input) CHARACTER*1
+*          Specifies the option op(B):
+*          = 'N': op(B) = B    (No transpose)
+*          = 'T': op(B) = B**T (Transpose)
+*          = 'C': op(B) = B**H (Conjugate transpose = Transpose)
+*
+*  ISGN    (input) INTEGER
+*          Specifies the sign in the equation:
+*          = +1: solve op(A)*X + X*op(B) = scale*C
+*          = -1: solve op(A)*X - X*op(B) = scale*C
+*
+*  M       (input) INTEGER
+*          The order of the matrix A, and the number of rows in the
+*          matrices X and C. M >= 0.
+*
+*  N       (input) INTEGER
+*          The order of the matrix B, and the number of columns in the
+*          matrices X and C. N >= 0.
+*
+*  A       (input) REAL array, dimension (LDA,M)
+*          The upper quasi-triangular matrix A, in Schur canonical form.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,M).
+*
+*  B       (input) REAL array, dimension (LDB,N)
+*          The upper quasi-triangular matrix B, in Schur canonical form.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B. LDB >= max(1,N).
+*
+*  C       (input/output) REAL array, dimension (LDC,N)
+*          On entry, the M-by-N right hand side matrix C.
+*          On exit, C is overwritten by the solution matrix X.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C. LDC >= max(1,M)
+*
+*  SCALE   (output) REAL
+*          The scale factor, scale, set <= 1 to avoid overflow in X.
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*          = 1: A and B have common or very close eigenvalues; perturbed
+*               values were used to solve the equation (but the matrices
+*               A and B are unchanged).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOTRNA, NOTRNB
+      INTEGER            IERR, J, K, K1, K2, KNEXT, L, L1, L2, LNEXT
+      REAL               A11, BIGNUM, DA11, DB, EPS, SCALOC, SGN, SMIN,
+     $                   SMLNUM, SUML, SUMR, XNORM
+*     ..
+*     .. Local Arrays ..
+      REAL               DUM( 1 ), VEC( 2, 2 ), X( 2, 2 )
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               SDOT, SLAMCH, SLANGE
+      EXTERNAL           LSAME, SDOT, SLAMCH, SLANGE
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLABAD, SLALN2, SLASY2, SSCAL, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, REAL
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode and Test input parameters
+*
+      NOTRNA = LSAME( TRANA, 'N' )
+      NOTRNB = LSAME( TRANB, 'N' )
+*
+      INFO = 0
+      IF( .NOT.NOTRNA .AND. .NOT.LSAME( TRANA, 'T' ) .AND. .NOT.
+     $    LSAME( TRANA, 'C' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.NOTRNB .AND. .NOT.LSAME( TRANB, 'T' ) .AND. .NOT.
+     $         LSAME( TRANB, 'C' ) ) THEN
+         INFO = -2
+      ELSE IF( ISGN.NE.1 .AND. ISGN.NE.-1 ) THEN
+         INFO = -3
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -7
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -9
+      ELSE IF( LDC.LT.MAX( 1, M ) ) THEN
+         INFO = -11
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'STRSYL', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 )
+     $   RETURN
+*
+*     Set constants to control overflow
+*
+      EPS = SLAMCH( 'P' )
+      SMLNUM = SLAMCH( 'S' )
+      BIGNUM = ONE / SMLNUM
+      CALL SLABAD( SMLNUM, BIGNUM )
+      SMLNUM = SMLNUM*REAL( M*N ) / EPS
+      BIGNUM = ONE / SMLNUM
+*
+      SMIN = MAX( SMLNUM, EPS*SLANGE( 'M', M, M, A, LDA, DUM ),
+     $       EPS*SLANGE( 'M', N, N, B, LDB, DUM ) )
+*
+      SCALE = ONE
+      SGN = ISGN
+*
+      IF( NOTRNA .AND. NOTRNB ) THEN
+*
+*        Solve    A*X + ISGN*X*B = scale*C.
+*
+*        The (K,L)th block of X is determined starting from
+*        bottom-left corner column by column by
+*
+*         A(K,K)*X(K,L) + ISGN*X(K,L)*B(L,L) = C(K,L) - R(K,L)
+*
+*        Where
+*                  M                         L-1
+*        R(K,L) = SUM [A(K,I)*X(I,L)] + ISGN*SUM [X(K,J)*B(J,L)].
+*                I=K+1                       J=1
+*
+*        Start column loop (index = L)
+*        L1 (L2) : column index of the first (first) row of X(K,L).
+*
+         LNEXT = 1
+         DO 70 L = 1, N
+            IF( L.LT.LNEXT )
+     $         GO TO 70
+            IF( L.EQ.N ) THEN
+               L1 = L
+               L2 = L
+            ELSE
+               IF( B( L+1, L ).NE.ZERO ) THEN
+                  L1 = L
+                  L2 = L + 1
+                  LNEXT = L + 2
+               ELSE
+                  L1 = L
+                  L2 = L
+                  LNEXT = L + 1
+               END IF
+            END IF
+*
+*           Start row loop (index = K)
+*           K1 (K2): row index of the first (last) row of X(K,L).
+*
+            KNEXT = M
+            DO 60 K = M, 1, -1
+               IF( K.GT.KNEXT )
+     $            GO TO 60
+               IF( K.EQ.1 ) THEN
+                  K1 = K
+                  K2 = K
+               ELSE
+                  IF( A( K, K-1 ).NE.ZERO ) THEN
+                     K1 = K - 1
+                     K2 = K
+                     KNEXT = K - 2
+                  ELSE
+                     K1 = K
+                     K2 = K
+                     KNEXT = K - 1
+                  END IF
+               END IF
+*
+               IF( L1.EQ.L2 .AND. K1.EQ.K2 ) THEN
+                  SUML = SDOT( M-K1, A( K1, MIN( K1+1, M ) ), LDA,
+     $                         C( MIN( K1+1, M ), L1 ), 1 )
+                  SUMR = SDOT( L1-1, C( K1, 1 ), LDC, B( 1, L1 ), 1 )
+                  VEC( 1, 1 ) = C( K1, L1 ) - ( SUML+SGN*SUMR )
+                  SCALOC = ONE
+*
+                  A11 = A( K1, K1 ) + SGN*B( L1, L1 )
+                  DA11 = ABS( A11 )
+                  IF( DA11.LE.SMIN ) THEN
+                     A11 = SMIN
+                     DA11 = SMIN
+                     INFO = 1
+                  END IF
+                  DB = ABS( VEC( 1, 1 ) )
+                  IF( DA11.LT.ONE .AND. DB.GT.ONE ) THEN
+                     IF( DB.GT.BIGNUM*DA11 )
+     $                  SCALOC = ONE / DB
+                  END IF
+                  X( 1, 1 ) = ( VEC( 1, 1 )*SCALOC ) / A11
+*
+                  IF( SCALOC.NE.ONE ) THEN
+                     DO 10 J = 1, N
+                        CALL SSCAL( M, SCALOC, C( 1, J ), 1 )
+   10                CONTINUE
+                     SCALE = SCALE*SCALOC
+                  END IF
+                  C( K1, L1 ) = X( 1, 1 )
+*
+               ELSE IF( L1.EQ.L2 .AND. K1.NE.K2 ) THEN
+*
+                  SUML = SDOT( M-K2, A( K1, MIN( K2+1, M ) ), LDA,
+     $                         C( MIN( K2+1, M ), L1 ), 1 )
+                  SUMR = SDOT( L1-1, C( K1, 1 ), LDC, B( 1, L1 ), 1 )
+                  VEC( 1, 1 ) = C( K1, L1 ) - ( SUML+SGN*SUMR )
+*
+                  SUML = SDOT( M-K2, A( K2, MIN( K2+1, M ) ), LDA,
+     $                         C( MIN( K2+1, M ), L1 ), 1 )
+                  SUMR = SDOT( L1-1, C( K2, 1 ), LDC, B( 1, L1 ), 1 )
+                  VEC( 2, 1 ) = C( K2, L1 ) - ( SUML+SGN*SUMR )
+*
+                  CALL SLALN2( .FALSE., 2, 1, SMIN, ONE, A( K1, K1 ),
+     $                         LDA, ONE, ONE, VEC, 2, -SGN*B( L1, L1 ),
+     $                         ZERO, X, 2, SCALOC, XNORM, IERR )
+                  IF( IERR.NE.0 )
+     $               INFO = 1
+*
+                  IF( SCALOC.NE.ONE ) THEN
+                     DO 20 J = 1, N
+                        CALL SSCAL( M, SCALOC, C( 1, J ), 1 )
+   20                CONTINUE
+                     SCALE = SCALE*SCALOC
+                  END IF
+                  C( K1, L1 ) = X( 1, 1 )
+                  C( K2, L1 ) = X( 2, 1 )
+*
+               ELSE IF( L1.NE.L2 .AND. K1.EQ.K2 ) THEN
+*
+                  SUML = SDOT( M-K1, A( K1, MIN( K1+1, M ) ), LDA,
+     $                         C( MIN( K1+1, M ), L1 ), 1 )
+                  SUMR = SDOT( L1-1, C( K1, 1 ), LDC, B( 1, L1 ), 1 )
+                  VEC( 1, 1 ) = SGN*( C( K1, L1 )-( SUML+SGN*SUMR ) )
+*
+                  SUML = SDOT( M-K1, A( K1, MIN( K1+1, M ) ), LDA,
+     $                         C( MIN( K1+1, M ), L2 ), 1 )
+                  SUMR = SDOT( L1-1, C( K1, 1 ), LDC, B( 1, L2 ), 1 )
+                  VEC( 2, 1 ) = SGN*( C( K1, L2 )-( SUML+SGN*SUMR ) )
+*
+                  CALL SLALN2( .TRUE., 2, 1, SMIN, ONE, B( L1, L1 ),
+     $                         LDB, ONE, ONE, VEC, 2, -SGN*A( K1, K1 ),
+     $                         ZERO, X, 2, SCALOC, XNORM, IERR )
+                  IF( IERR.NE.0 )
+     $               INFO = 1
+*
+                  IF( SCALOC.NE.ONE ) THEN
+                     DO 40 J = 1, N
+                        CALL SSCAL( M, SCALOC, C( 1, J ), 1 )
+   40                CONTINUE
+                     SCALE = SCALE*SCALOC
+                  END IF
+                  C( K1, L1 ) = X( 1, 1 )
+                  C( K1, L2 ) = X( 2, 1 )
+*
+               ELSE IF( L1.NE.L2 .AND. K1.NE.K2 ) THEN
+*
+                  SUML = SDOT( M-K2, A( K1, MIN( K2+1, M ) ), LDA,
+     $                         C( MIN( K2+1, M ), L1 ), 1 )
+                  SUMR = SDOT( L1-1, C( K1, 1 ), LDC, B( 1, L1 ), 1 )
+                  VEC( 1, 1 ) = C( K1, L1 ) - ( SUML+SGN*SUMR )
+*
+                  SUML = SDOT( M-K2, A( K1, MIN( K2+1, M ) ), LDA,
+     $                         C( MIN( K2+1, M ), L2 ), 1 )
+                  SUMR = SDOT( L1-1, C( K1, 1 ), LDC, B( 1, L2 ), 1 )
+                  VEC( 1, 2 ) = C( K1, L2 ) - ( SUML+SGN*SUMR )
+*
+                  SUML = SDOT( M-K2, A( K2, MIN( K2+1, M ) ), LDA,
+     $                         C( MIN( K2+1, M ), L1 ), 1 )
+                  SUMR = SDOT( L1-1, C( K2, 1 ), LDC, B( 1, L1 ), 1 )
+                  VEC( 2, 1 ) = C( K2, L1 ) - ( SUML+SGN*SUMR )
+*
+                  SUML = SDOT( M-K2, A( K2, MIN( K2+1, M ) ), LDA,
+     $                         C( MIN( K2+1, M ), L2 ), 1 )
+                  SUMR = SDOT( L1-1, C( K2, 1 ), LDC, B( 1, L2 ), 1 )
+                  VEC( 2, 2 ) = C( K2, L2 ) - ( SUML+SGN*SUMR )
+*
+                  CALL SLASY2( .FALSE., .FALSE., ISGN, 2, 2,
+     $                         A( K1, K1 ), LDA, B( L1, L1 ), LDB, VEC,
+     $                         2, SCALOC, X, 2, XNORM, IERR )
+                  IF( IERR.NE.0 )
+     $               INFO = 1
+*
+                  IF( SCALOC.NE.ONE ) THEN
+                     DO 50 J = 1, N
+                        CALL SSCAL( M, SCALOC, C( 1, J ), 1 )
+   50                CONTINUE
+                     SCALE = SCALE*SCALOC
+                  END IF
+                  C( K1, L1 ) = X( 1, 1 )
+                  C( K1, L2 ) = X( 1, 2 )
+                  C( K2, L1 ) = X( 2, 1 )
+                  C( K2, L2 ) = X( 2, 2 )
+               END IF
+*
+   60       CONTINUE
+*
+   70    CONTINUE
+*
+      ELSE IF( .NOT.NOTRNA .AND. NOTRNB ) THEN
+*
+*        Solve    A' *X + ISGN*X*B = scale*C.
+*
+*        The (K,L)th block of X is determined starting from
+*        upper-left corner column by column by
+*
+*          A(K,K)'*X(K,L) + ISGN*X(K,L)*B(L,L) = C(K,L) - R(K,L)
+*
+*        Where
+*                   K-1                        L-1
+*          R(K,L) = SUM [A(I,K)'*X(I,L)] +ISGN*SUM [X(K,J)*B(J,L)]
+*                   I=1                        J=1
+*
+*        Start column loop (index = L)
+*        L1 (L2): column index of the first (last) row of X(K,L)
+*
+         LNEXT = 1
+         DO 130 L = 1, N
+            IF( L.LT.LNEXT )
+     $         GO TO 130
+            IF( L.EQ.N ) THEN
+               L1 = L
+               L2 = L
+            ELSE
+               IF( B( L+1, L ).NE.ZERO ) THEN
+                  L1 = L
+                  L2 = L + 1
+                  LNEXT = L + 2
+               ELSE
+                  L1 = L
+                  L2 = L
+                  LNEXT = L + 1
+               END IF
+            END IF
+*
+*           Start row loop (index = K)
+*           K1 (K2): row index of the first (last) row of X(K,L)
+*
+            KNEXT = 1
+            DO 120 K = 1, M
+               IF( K.LT.KNEXT )
+     $            GO TO 120
+               IF( K.EQ.M ) THEN
+                  K1 = K
+                  K2 = K
+               ELSE
+                  IF( A( K+1, K ).NE.ZERO ) THEN
+                     K1 = K
+                     K2 = K + 1
+                     KNEXT = K + 2
+                  ELSE
+                     K1 = K
+                     K2 = K
+                     KNEXT = K + 1
+                  END IF
+               END IF
+*
+               IF( L1.EQ.L2 .AND. K1.EQ.K2 ) THEN
+                  SUML = SDOT( K1-1, A( 1, K1 ), 1, C( 1, L1 ), 1 )
+                  SUMR = SDOT( L1-1, C( K1, 1 ), LDC, B( 1, L1 ), 1 )
+                  VEC( 1, 1 ) = C( K1, L1 ) - ( SUML+SGN*SUMR )
+                  SCALOC = ONE
+*
+                  A11 = A( K1, K1 ) + SGN*B( L1, L1 )
+                  DA11 = ABS( A11 )
+                  IF( DA11.LE.SMIN ) THEN
+                     A11 = SMIN
+                     DA11 = SMIN
+                     INFO = 1
+                  END IF
+                  DB = ABS( VEC( 1, 1 ) )
+                  IF( DA11.LT.ONE .AND. DB.GT.ONE ) THEN
+                     IF( DB.GT.BIGNUM*DA11 )
+     $                  SCALOC = ONE / DB
+                  END IF
+                  X( 1, 1 ) = ( VEC( 1, 1 )*SCALOC ) / A11
+*
+                  IF( SCALOC.NE.ONE ) THEN
+                     DO 80 J = 1, N
+                        CALL SSCAL( M, SCALOC, C( 1, J ), 1 )
+   80                CONTINUE
+                     SCALE = SCALE*SCALOC
+                  END IF
+                  C( K1, L1 ) = X( 1, 1 )
+*
+               ELSE IF( L1.EQ.L2 .AND. K1.NE.K2 ) THEN
+*
+                  SUML = SDOT( K1-1, A( 1, K1 ), 1, C( 1, L1 ), 1 )
+                  SUMR = SDOT( L1-1, C( K1, 1 ), LDC, B( 1, L1 ), 1 )
+                  VEC( 1, 1 ) = C( K1, L1 ) - ( SUML+SGN*SUMR )
+*
+                  SUML = SDOT( K1-1, A( 1, K2 ), 1, C( 1, L1 ), 1 )
+                  SUMR = SDOT( L1-1, C( K2, 1 ), LDC, B( 1, L1 ), 1 )
+                  VEC( 2, 1 ) = C( K2, L1 ) - ( SUML+SGN*SUMR )
+*
+                  CALL SLALN2( .TRUE., 2, 1, SMIN, ONE, A( K1, K1 ),
+     $                         LDA, ONE, ONE, VEC, 2, -SGN*B( L1, L1 ),
+     $                         ZERO, X, 2, SCALOC, XNORM, IERR )
+                  IF( IERR.NE.0 )
+     $               INFO = 1
+*
+                  IF( SCALOC.NE.ONE ) THEN
+                     DO 90 J = 1, N
+                        CALL SSCAL( M, SCALOC, C( 1, J ), 1 )
+   90                CONTINUE
+                     SCALE = SCALE*SCALOC
+                  END IF
+                  C( K1, L1 ) = X( 1, 1 )
+                  C( K2, L1 ) = X( 2, 1 )
+*
+               ELSE IF( L1.NE.L2 .AND. K1.EQ.K2 ) THEN
+*
+                  SUML = SDOT( K1-1, A( 1, K1 ), 1, C( 1, L1 ), 1 )
+                  SUMR = SDOT( L1-1, C( K1, 1 ), LDC, B( 1, L1 ), 1 )
+                  VEC( 1, 1 ) = SGN*( C( K1, L1 )-( SUML+SGN*SUMR ) )
+*
+                  SUML = SDOT( K1-1, A( 1, K1 ), 1, C( 1, L2 ), 1 )
+                  SUMR = SDOT( L1-1, C( K1, 1 ), LDC, B( 1, L2 ), 1 )
+                  VEC( 2, 1 ) = SGN*( C( K1, L2 )-( SUML+SGN*SUMR ) )
+*
+                  CALL SLALN2( .TRUE., 2, 1, SMIN, ONE, B( L1, L1 ),
+     $                         LDB, ONE, ONE, VEC, 2, -SGN*A( K1, K1 ),
+     $                         ZERO, X, 2, SCALOC, XNORM, IERR )
+                  IF( IERR.NE.0 )
+     $               INFO = 1
+*
+                  IF( SCALOC.NE.ONE ) THEN
+                     DO 100 J = 1, N
+                        CALL SSCAL( M, SCALOC, C( 1, J ), 1 )
+  100                CONTINUE
+                     SCALE = SCALE*SCALOC
+                  END IF
+                  C( K1, L1 ) = X( 1, 1 )
+                  C( K1, L2 ) = X( 2, 1 )
+*
+               ELSE IF( L1.NE.L2 .AND. K1.NE.K2 ) THEN
+*
+                  SUML = SDOT( K1-1, A( 1, K1 ), 1, C( 1, L1 ), 1 )
+                  SUMR = SDOT( L1-1, C( K1, 1 ), LDC, B( 1, L1 ), 1 )
+                  VEC( 1, 1 ) = C( K1, L1 ) - ( SUML+SGN*SUMR )
+*
+                  SUML = SDOT( K1-1, A( 1, K1 ), 1, C( 1, L2 ), 1 )
+                  SUMR = SDOT( L1-1, C( K1, 1 ), LDC, B( 1, L2 ), 1 )
+                  VEC( 1, 2 ) = C( K1, L2 ) - ( SUML+SGN*SUMR )
+*
+                  SUML = SDOT( K1-1, A( 1, K2 ), 1, C( 1, L1 ), 1 )
+                  SUMR = SDOT( L1-1, C( K2, 1 ), LDC, B( 1, L1 ), 1 )
+                  VEC( 2, 1 ) = C( K2, L1 ) - ( SUML+SGN*SUMR )
+*
+                  SUML = SDOT( K1-1, A( 1, K2 ), 1, C( 1, L2 ), 1 )
+                  SUMR = SDOT( L1-1, C( K2, 1 ), LDC, B( 1, L2 ), 1 )
+                  VEC( 2, 2 ) = C( K2, L2 ) - ( SUML+SGN*SUMR )
+*
+                  CALL SLASY2( .TRUE., .FALSE., ISGN, 2, 2, A( K1, K1 ),
+     $                         LDA, B( L1, L1 ), LDB, VEC, 2, SCALOC, X,
+     $                         2, XNORM, IERR )
+                  IF( IERR.NE.0 )
+     $               INFO = 1
+*
+                  IF( SCALOC.NE.ONE ) THEN
+                     DO 110 J = 1, N
+                        CALL SSCAL( M, SCALOC, C( 1, J ), 1 )
+  110                CONTINUE
+                     SCALE = SCALE*SCALOC
+                  END IF
+                  C( K1, L1 ) = X( 1, 1 )
+                  C( K1, L2 ) = X( 1, 2 )
+                  C( K2, L1 ) = X( 2, 1 )
+                  C( K2, L2 ) = X( 2, 2 )
+               END IF
+*
+  120       CONTINUE
+  130    CONTINUE
+*
+      ELSE IF( .NOT.NOTRNA .AND. .NOT.NOTRNB ) THEN
+*
+*        Solve    A'*X + ISGN*X*B' = scale*C.
+*
+*        The (K,L)th block of X is determined starting from
+*        top-right corner column by column by
+*
+*           A(K,K)'*X(K,L) + ISGN*X(K,L)*B(L,L)' = C(K,L) - R(K,L)
+*
+*        Where
+*                     K-1                          N
+*            R(K,L) = SUM [A(I,K)'*X(I,L)] + ISGN*SUM [X(K,J)*B(L,J)'].
+*                     I=1                        J=L+1
+*
+*        Start column loop (index = L)
+*        L1 (L2): column index of the first (last) row of X(K,L)
+*
+         LNEXT = N
+         DO 190 L = N, 1, -1
+            IF( L.GT.LNEXT )
+     $         GO TO 190
+            IF( L.EQ.1 ) THEN
+               L1 = L
+               L2 = L
+            ELSE
+               IF( B( L, L-1 ).NE.ZERO ) THEN
+                  L1 = L - 1
+                  L2 = L
+                  LNEXT = L - 2
+               ELSE
+                  L1 = L
+                  L2 = L
+                  LNEXT = L - 1
+               END IF
+            END IF
+*
+*           Start row loop (index = K)
+*           K1 (K2): row index of the first (last) row of X(K,L)
+*
+            KNEXT = 1
+            DO 180 K = 1, M
+               IF( K.LT.KNEXT )
+     $            GO TO 180
+               IF( K.EQ.M ) THEN
+                  K1 = K
+                  K2 = K
+               ELSE
+                  IF( A( K+1, K ).NE.ZERO ) THEN
+                     K1 = K
+                     K2 = K + 1
+                     KNEXT = K + 2
+                  ELSE
+                     K1 = K
+                     K2 = K
+                     KNEXT = K + 1
+                  END IF
+               END IF
+*
+               IF( L1.EQ.L2 .AND. K1.EQ.K2 ) THEN
+                  SUML = SDOT( K1-1, A( 1, K1 ), 1, C( 1, L1 ), 1 )
+                  SUMR = SDOT( N-L1, C( K1, MIN( L1+1, N ) ), LDC,
+     $                         B( L1, MIN( L1+1, N ) ), LDB )
+                  VEC( 1, 1 ) = C( K1, L1 ) - ( SUML+SGN*SUMR )
+                  SCALOC = ONE
+*
+                  A11 = A( K1, K1 ) + SGN*B( L1, L1 )
+                  DA11 = ABS( A11 )
+                  IF( DA11.LE.SMIN ) THEN
+                     A11 = SMIN
+                     DA11 = SMIN
+                     INFO = 1
+                  END IF
+                  DB = ABS( VEC( 1, 1 ) )
+                  IF( DA11.LT.ONE .AND. DB.GT.ONE ) THEN
+                     IF( DB.GT.BIGNUM*DA11 )
+     $                  SCALOC = ONE / DB
+                  END IF
+                  X( 1, 1 ) = ( VEC( 1, 1 )*SCALOC ) / A11
+*
+                  IF( SCALOC.NE.ONE ) THEN
+                     DO 140 J = 1, N
+                        CALL SSCAL( M, SCALOC, C( 1, J ), 1 )
+  140                CONTINUE
+                     SCALE = SCALE*SCALOC
+                  END IF
+                  C( K1, L1 ) = X( 1, 1 )
+*
+               ELSE IF( L1.EQ.L2 .AND. K1.NE.K2 ) THEN
+*
+                  SUML = SDOT( K1-1, A( 1, K1 ), 1, C( 1, L1 ), 1 )
+                  SUMR = SDOT( N-L2, C( K1, MIN( L2+1, N ) ), LDC,
+     $                         B( L1, MIN( L2+1, N ) ), LDB )
+                  VEC( 1, 1 ) = C( K1, L1 ) - ( SUML+SGN*SUMR )
+*
+                  SUML = SDOT( K1-1, A( 1, K2 ), 1, C( 1, L1 ), 1 )
+                  SUMR = SDOT( N-L2, C( K2, MIN( L2+1, N ) ), LDC,
+     $                         B( L1, MIN( L2+1, N ) ), LDB )
+                  VEC( 2, 1 ) = C( K2, L1 ) - ( SUML+SGN*SUMR )
+*
+                  CALL SLALN2( .TRUE., 2, 1, SMIN, ONE, A( K1, K1 ),
+     $                         LDA, ONE, ONE, VEC, 2, -SGN*B( L1, L1 ),
+     $                         ZERO, X, 2, SCALOC, XNORM, IERR )
+                  IF( IERR.NE.0 )
+     $               INFO = 1
+*
+                  IF( SCALOC.NE.ONE ) THEN
+                     DO 150 J = 1, N
+                        CALL SSCAL( M, SCALOC, C( 1, J ), 1 )
+  150                CONTINUE
+                     SCALE = SCALE*SCALOC
+                  END IF
+                  C( K1, L1 ) = X( 1, 1 )
+                  C( K2, L1 ) = X( 2, 1 )
+*
+               ELSE IF( L1.NE.L2 .AND. K1.EQ.K2 ) THEN
+*
+                  SUML = SDOT( K1-1, A( 1, K1 ), 1, C( 1, L1 ), 1 )
+                  SUMR = SDOT( N-L2, C( K1, MIN( L2+1, N ) ), LDC,
+     $                         B( L1, MIN( L2+1, N ) ), LDB )
+                  VEC( 1, 1 ) = SGN*( C( K1, L1 )-( SUML+SGN*SUMR ) )
+*
+                  SUML = SDOT( K1-1, A( 1, K1 ), 1, C( 1, L2 ), 1 )
+                  SUMR = SDOT( N-L2, C( K1, MIN( L2+1, N ) ), LDC,
+     $                         B( L2, MIN( L2+1, N ) ), LDB )
+                  VEC( 2, 1 ) = SGN*( C( K1, L2 )-( SUML+SGN*SUMR ) )
+*
+                  CALL SLALN2( .FALSE., 2, 1, SMIN, ONE, B( L1, L1 ),
+     $                         LDB, ONE, ONE, VEC, 2, -SGN*A( K1, K1 ),
+     $                         ZERO, X, 2, SCALOC, XNORM, IERR )
+                  IF( IERR.NE.0 )
+     $               INFO = 1
+*
+                  IF( SCALOC.NE.ONE ) THEN
+                     DO 160 J = 1, N
+                        CALL SSCAL( M, SCALOC, C( 1, J ), 1 )
+  160                CONTINUE
+                     SCALE = SCALE*SCALOC
+                  END IF
+                  C( K1, L1 ) = X( 1, 1 )
+                  C( K1, L2 ) = X( 2, 1 )
+*
+               ELSE IF( L1.NE.L2 .AND. K1.NE.K2 ) THEN
+*
+                  SUML = SDOT( K1-1, A( 1, K1 ), 1, C( 1, L1 ), 1 )
+                  SUMR = SDOT( N-L2, C( K1, MIN( L2+1, N ) ), LDC,
+     $                         B( L1, MIN( L2+1, N ) ), LDB )
+                  VEC( 1, 1 ) = C( K1, L1 ) - ( SUML+SGN*SUMR )
+*
+                  SUML = SDOT( K1-1, A( 1, K1 ), 1, C( 1, L2 ), 1 )
+                  SUMR = SDOT( N-L2, C( K1, MIN( L2+1, N ) ), LDC,
+     $                         B( L2, MIN( L2+1, N ) ), LDB )
+                  VEC( 1, 2 ) = C( K1, L2 ) - ( SUML+SGN*SUMR )
+*
+                  SUML = SDOT( K1-1, A( 1, K2 ), 1, C( 1, L1 ), 1 )
+                  SUMR = SDOT( N-L2, C( K2, MIN( L2+1, N ) ), LDC,
+     $                         B( L1, MIN( L2+1, N ) ), LDB )
+                  VEC( 2, 1 ) = C( K2, L1 ) - ( SUML+SGN*SUMR )
+*
+                  SUML = SDOT( K1-1, A( 1, K2 ), 1, C( 1, L2 ), 1 )
+                  SUMR = SDOT( N-L2, C( K2, MIN( L2+1, N ) ), LDC,
+     $                         B( L2, MIN(L2+1, N ) ), LDB )
+                  VEC( 2, 2 ) = C( K2, L2 ) - ( SUML+SGN*SUMR )
+*
+                  CALL SLASY2( .TRUE., .TRUE., ISGN, 2, 2, A( K1, K1 ),
+     $                         LDA, B( L1, L1 ), LDB, VEC, 2, SCALOC, X,
+     $                         2, XNORM, IERR )
+                  IF( IERR.NE.0 )
+     $               INFO = 1
+*
+                  IF( SCALOC.NE.ONE ) THEN
+                     DO 170 J = 1, N
+                        CALL SSCAL( M, SCALOC, C( 1, J ), 1 )
+  170                CONTINUE
+                     SCALE = SCALE*SCALOC
+                  END IF
+                  C( K1, L1 ) = X( 1, 1 )
+                  C( K1, L2 ) = X( 1, 2 )
+                  C( K2, L1 ) = X( 2, 1 )
+                  C( K2, L2 ) = X( 2, 2 )
+               END IF
+*
+  180       CONTINUE
+  190    CONTINUE
+*
+      ELSE IF( NOTRNA .AND. .NOT.NOTRNB ) THEN
+*
+*        Solve    A*X + ISGN*X*B' = scale*C.
+*
+*        The (K,L)th block of X is determined starting from
+*        bottom-right corner column by column by
+*
+*            A(K,K)*X(K,L) + ISGN*X(K,L)*B(L,L)' = C(K,L) - R(K,L)
+*
+*        Where
+*                      M                          N
+*            R(K,L) = SUM [A(K,I)*X(I,L)] + ISGN*SUM [X(K,J)*B(L,J)'].
+*                    I=K+1                      J=L+1
+*
+*        Start column loop (index = L)
+*        L1 (L2): column index of the first (last) row of X(K,L)
+*
+         LNEXT = N
+         DO 250 L = N, 1, -1
+            IF( L.GT.LNEXT )
+     $         GO TO 250
+            IF( L.EQ.1 ) THEN
+               L1 = L
+               L2 = L
+            ELSE
+               IF( B( L, L-1 ).NE.ZERO ) THEN
+                  L1 = L - 1
+                  L2 = L
+                  LNEXT = L - 2
+               ELSE
+                  L1 = L
+                  L2 = L
+                  LNEXT = L - 1
+               END IF
+            END IF
+*
+*           Start row loop (index = K)
+*           K1 (K2): row index of the first (last) row of X(K,L)
+*
+            KNEXT = M
+            DO 240 K = M, 1, -1
+               IF( K.GT.KNEXT )
+     $            GO TO 240
+               IF( K.EQ.1 ) THEN
+                  K1 = K
+                  K2 = K
+               ELSE
+                  IF( A( K, K-1 ).NE.ZERO ) THEN
+                     K1 = K - 1
+                     K2 = K
+                     KNEXT = K - 2
+                  ELSE
+                     K1 = K
+                     K2 = K
+                     KNEXT = K - 1
+                  END IF
+               END IF
+*
+               IF( L1.EQ.L2 .AND. K1.EQ.K2 ) THEN
+                  SUML = SDOT( M-K1, A( K1, MIN(K1+1, M ) ), LDA,
+     $                   C( MIN( K1+1, M ), L1 ), 1 )
+                  SUMR = SDOT( N-L1, C( K1, MIN( L1+1, N ) ), LDC,
+     $                         B( L1, MIN( L1+1, N ) ), LDB )
+                  VEC( 1, 1 ) = C( K1, L1 ) - ( SUML+SGN*SUMR )
+                  SCALOC = ONE
+*
+                  A11 = A( K1, K1 ) + SGN*B( L1, L1 )
+                  DA11 = ABS( A11 )
+                  IF( DA11.LE.SMIN ) THEN
+                     A11 = SMIN
+                     DA11 = SMIN
+                     INFO = 1
+                  END IF
+                  DB = ABS( VEC( 1, 1 ) )
+                  IF( DA11.LT.ONE .AND. DB.GT.ONE ) THEN
+                     IF( DB.GT.BIGNUM*DA11 )
+     $                  SCALOC = ONE / DB
+                  END IF
+                  X( 1, 1 ) = ( VEC( 1, 1 )*SCALOC ) / A11
+*
+                  IF( SCALOC.NE.ONE ) THEN
+                     DO 200 J = 1, N
+                        CALL SSCAL( M, SCALOC, C( 1, J ), 1 )
+  200                CONTINUE
+                     SCALE = SCALE*SCALOC
+                  END IF
+                  C( K1, L1 ) = X( 1, 1 )
+*
+               ELSE IF( L1.EQ.L2 .AND. K1.NE.K2 ) THEN
+*
+                  SUML = SDOT( M-K2, A( K1, MIN( K2+1, M ) ), LDA,
+     $                         C( MIN( K2+1, M ), L1 ), 1 )
+                  SUMR = SDOT( N-L2, C( K1, MIN( L2+1, N ) ), LDC,
+     $                         B( L1, MIN( L2+1, N ) ), LDB )
+                  VEC( 1, 1 ) = C( K1, L1 ) - ( SUML+SGN*SUMR )
+*
+                  SUML = SDOT( M-K2, A( K2, MIN( K2+1, M ) ), LDA,
+     $                         C( MIN( K2+1, M ), L1 ), 1 )
+                  SUMR = SDOT( N-L2, C( K2, MIN( L2+1, N ) ), LDC,
+     $                         B( L1, MIN( L2+1, N ) ), LDB )
+                  VEC( 2, 1 ) = C( K2, L1 ) - ( SUML+SGN*SUMR )
+*
+                  CALL SLALN2( .FALSE., 2, 1, SMIN, ONE, A( K1, K1 ),
+     $                         LDA, ONE, ONE, VEC, 2, -SGN*B( L1, L1 ),
+     $                         ZERO, X, 2, SCALOC, XNORM, IERR )
+                  IF( IERR.NE.0 )
+     $               INFO = 1
+*
+                  IF( SCALOC.NE.ONE ) THEN
+                     DO 210 J = 1, N
+                        CALL SSCAL( M, SCALOC, C( 1, J ), 1 )
+  210                CONTINUE
+                     SCALE = SCALE*SCALOC
+                  END IF
+                  C( K1, L1 ) = X( 1, 1 )
+                  C( K2, L1 ) = X( 2, 1 )
+*
+               ELSE IF( L1.NE.L2 .AND. K1.EQ.K2 ) THEN
+*
+                  SUML = SDOT( M-K1, A( K1, MIN( K1+1, M ) ), LDA,
+     $                         C( MIN( K1+1, M ), L1 ), 1 )
+                  SUMR = SDOT( N-L2, C( K1, MIN( L2+1, N ) ), LDC,
+     $                         B( L1, MIN( L2+1, N ) ), LDB )
+                  VEC( 1, 1 ) = SGN*( C( K1, L1 )-( SUML+SGN*SUMR ) )
+*
+                  SUML = SDOT( M-K1, A( K1, MIN( K1+1, M ) ), LDA,
+     $                         C( MIN( K1+1, M ), L2 ), 1 )
+                  SUMR = SDOT( N-L2, C( K1, MIN( L2+1, N ) ), LDC,
+     $                         B( L2, MIN( L2+1, N ) ), LDB )
+                  VEC( 2, 1 ) = SGN*( C( K1, L2 )-( SUML+SGN*SUMR ) )
+*
+                  CALL SLALN2( .FALSE., 2, 1, SMIN, ONE, B( L1, L1 ),
+     $                         LDB, ONE, ONE, VEC, 2, -SGN*A( K1, K1 ),
+     $                         ZERO, X, 2, SCALOC, XNORM, IERR )
+                  IF( IERR.NE.0 )
+     $               INFO = 1
+*
+                  IF( SCALOC.NE.ONE ) THEN
+                     DO 220 J = 1, N
+                        CALL SSCAL( M, SCALOC, C( 1, J ), 1 )
+  220                CONTINUE
+                     SCALE = SCALE*SCALOC
+                  END IF
+                  C( K1, L1 ) = X( 1, 1 )
+                  C( K1, L2 ) = X( 2, 1 )
+*
+               ELSE IF( L1.NE.L2 .AND. K1.NE.K2 ) THEN
+*
+                  SUML = SDOT( M-K2, A( K1, MIN( K2+1, M ) ), LDA,
+     $                         C( MIN( K2+1, M ), L1 ), 1 )
+                  SUMR = SDOT( N-L2, C( K1, MIN( L2+1, N ) ), LDC,
+     $                         B( L1, MIN( L2+1, N ) ), LDB )
+                  VEC( 1, 1 ) = C( K1, L1 ) - ( SUML+SGN*SUMR )
+*
+                  SUML = SDOT( M-K2, A( K1, MIN( K2+1, M ) ), LDA,
+     $                         C( MIN( K2+1, M ), L2 ), 1 )
+                  SUMR = SDOT( N-L2, C( K1, MIN( L2+1, N ) ), LDC,
+     $                         B( L2, MIN( L2+1, N ) ), LDB )
+                  VEC( 1, 2 ) = C( K1, L2 ) - ( SUML+SGN*SUMR )
+*
+                  SUML = SDOT( M-K2, A( K2, MIN( K2+1, M ) ), LDA,
+     $                         C( MIN( K2+1, M ), L1 ), 1 )
+                  SUMR = SDOT( N-L2, C( K2, MIN( L2+1, N ) ), LDC,
+     $                         B( L1, MIN( L2+1, N ) ), LDB )
+                  VEC( 2, 1 ) = C( K2, L1 ) - ( SUML+SGN*SUMR )
+*
+                  SUML = SDOT( M-K2, A( K2, MIN( K2+1, M ) ), LDA,
+     $                         C( MIN( K2+1, M ), L2 ), 1 )
+                  SUMR = SDOT( N-L2, C( K2, MIN( L2+1, N ) ), LDC,
+     $                         B( L2, MIN( L2+1, N ) ), LDB )
+                  VEC( 2, 2 ) = C( K2, L2 ) - ( SUML+SGN*SUMR )
+*
+                  CALL SLASY2( .FALSE., .TRUE., ISGN, 2, 2, A( K1, K1 ),
+     $                         LDA, B( L1, L1 ), LDB, VEC, 2, SCALOC, X,
+     $                         2, XNORM, IERR )
+                  IF( IERR.NE.0 )
+     $               INFO = 1
+*
+                  IF( SCALOC.NE.ONE ) THEN
+                     DO 230 J = 1, N
+                        CALL SSCAL( M, SCALOC, C( 1, J ), 1 )
+  230                CONTINUE
+                     SCALE = SCALE*SCALOC
+                  END IF
+                  C( K1, L1 ) = X( 1, 1 )
+                  C( K1, L2 ) = X( 1, 2 )
+                  C( K2, L1 ) = X( 2, 1 )
+                  C( K2, L2 ) = X( 2, 2 )
+               END IF
+*
+  240       CONTINUE
+  250    CONTINUE
+*
+      END IF
+*
+      RETURN
+*
+*     End of STRSYL
+*
+      END
diff --git a/costa/native/external/lapack/strti2.f b/costa/native/external/lapack/strti2.f
new file mode 100644
index 000000000..56d7d6a93
--- /dev/null
+++ b/costa/native/external/lapack/strti2.f
@@ -0,0 +1,147 @@
+      SUBROUTINE STRTI2( UPLO, DIAG, N, A, LDA, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIAG, UPLO
+      INTEGER            INFO, LDA, N
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  STRTI2 computes the inverse of a real upper or lower triangular
+*  matrix.
+*
+*  This is the Level 2 BLAS version of the algorithm.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the matrix A is upper or lower triangular.
+*          = 'U':  Upper triangular
+*          = 'L':  Lower triangular
+*
+*  DIAG    (input) CHARACTER*1
+*          Specifies whether or not the matrix A is unit triangular.
+*          = 'N':  Non-unit triangular
+*          = 'U':  Unit triangular
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input/output) REAL array, dimension (LDA,N)
+*          On entry, the triangular matrix A.  If UPLO = 'U', the
+*          leading n by n upper triangular part of the array A contains
+*          the upper triangular matrix, and the strictly lower
+*          triangular part of A is not referenced.  If UPLO = 'L', the
+*          leading n by n lower triangular part of the array A contains
+*          the lower triangular matrix, and the strictly upper
+*          triangular part of A is not referenced.  If DIAG = 'U', the
+*          diagonal elements of A are also not referenced and are
+*          assumed to be 1.
+*
+*          On exit, the (triangular) inverse of the original matrix, in
+*          the same storage format.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -k, the k-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE
+      PARAMETER          ( ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOUNIT, UPPER
+      INTEGER            J
+      REAL               AJJ
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SSCAL, STRMV, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      NOUNIT = LSAME( DIAG, 'N' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'STRTI2', -INFO )
+         RETURN
+      END IF
+*
+      IF( UPPER ) THEN
+*
+*        Compute inverse of upper triangular matrix.
+*
+         DO 10 J = 1, N
+            IF( NOUNIT ) THEN
+               A( J, J ) = ONE / A( J, J )
+               AJJ = -A( J, J )
+            ELSE
+               AJJ = -ONE
+            END IF
+*
+*           Compute elements 1:j-1 of j-th column.
+*
+            CALL STRMV( 'Upper', 'No transpose', DIAG, J-1, A, LDA,
+     $                  A( 1, J ), 1 )
+            CALL SSCAL( J-1, AJJ, A( 1, J ), 1 )
+   10    CONTINUE
+      ELSE
+*
+*        Compute inverse of lower triangular matrix.
+*
+         DO 20 J = N, 1, -1
+            IF( NOUNIT ) THEN
+               A( J, J ) = ONE / A( J, J )
+               AJJ = -A( J, J )
+            ELSE
+               AJJ = -ONE
+            END IF
+            IF( J.LT.N ) THEN
+*
+*              Compute elements j+1:n of j-th column.
+*
+               CALL STRMV( 'Lower', 'No transpose', DIAG, N-J,
+     $                     A( J+1, J+1 ), LDA, A( J+1, J ), 1 )
+               CALL SSCAL( N-J, AJJ, A( J+1, J ), 1 )
+            END IF
+   20    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of STRTI2
+*
+      END
diff --git a/costa/native/external/lapack/strtri.f b/costa/native/external/lapack/strtri.f
new file mode 100644
index 000000000..f8dfac142
--- /dev/null
+++ b/costa/native/external/lapack/strtri.f
@@ -0,0 +1,177 @@
+      SUBROUTINE STRTRI( UPLO, DIAG, N, A, LDA, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993 
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIAG, UPLO
+      INTEGER            INFO, LDA, N
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  STRTRI computes the inverse of a real upper or lower triangular
+*  matrix A.
+*
+*  This is the Level 3 BLAS version of the algorithm.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  A is upper triangular;
+*          = 'L':  A is lower triangular.
+*
+*  DIAG    (input) CHARACTER*1
+*          = 'N':  A is non-unit triangular;
+*          = 'U':  A is unit triangular.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input/output) REAL array, dimension (LDA,N)
+*          On entry, the triangular matrix A.  If UPLO = 'U', the
+*          leading N-by-N upper triangular part of the array A contains
+*          the upper triangular matrix, and the strictly lower
+*          triangular part of A is not referenced.  If UPLO = 'L', the
+*          leading N-by-N lower triangular part of the array A contains
+*          the lower triangular matrix, and the strictly upper
+*          triangular part of A is not referenced.  If DIAG = 'U', the
+*          diagonal elements of A are also not referenced and are
+*          assumed to be 1.
+*          On exit, the (triangular) inverse of the original matrix, in
+*          the same storage format.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*          > 0: if INFO = i, A(i,i) is exactly zero.  The triangular
+*               matrix is singular and its inverse can not be computed.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOUNIT, UPPER
+      INTEGER            J, JB, NB, NN
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           LSAME, ILAENV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           STRMM, STRSM, STRTI2, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      NOUNIT = LSAME( DIAG, 'N' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'STRTRI', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Check for singularity if non-unit.
+*
+      IF( NOUNIT ) THEN
+         DO 10 INFO = 1, N
+            IF( A( INFO, INFO ).EQ.ZERO )
+     $         RETURN
+   10    CONTINUE
+         INFO = 0
+      END IF
+*
+*     Determine the block size for this environment.
+*
+      NB = ILAENV( 1, 'STRTRI', UPLO // DIAG, N, -1, -1, -1 )
+      IF( NB.LE.1 .OR. NB.GE.N ) THEN
+*
+*        Use unblocked code
+*
+         CALL STRTI2( UPLO, DIAG, N, A, LDA, INFO )
+      ELSE
+*
+*        Use blocked code
+*
+         IF( UPPER ) THEN
+*
+*           Compute inverse of upper triangular matrix
+*
+            DO 20 J = 1, N, NB
+               JB = MIN( NB, N-J+1 )
+*
+*              Compute rows 1:j-1 of current block column
+*
+               CALL STRMM( 'Left', 'Upper', 'No transpose', DIAG, J-1,
+     $                     JB, ONE, A, LDA, A( 1, J ), LDA )
+               CALL STRSM( 'Right', 'Upper', 'No transpose', DIAG, J-1,
+     $                     JB, -ONE, A( J, J ), LDA, A( 1, J ), LDA )
+*
+*              Compute inverse of current diagonal block
+*
+               CALL STRTI2( 'Upper', DIAG, JB, A( J, J ), LDA, INFO )
+   20       CONTINUE
+         ELSE
+*
+*           Compute inverse of lower triangular matrix
+*
+            NN = ( ( N-1 ) / NB )*NB + 1
+            DO 30 J = NN, 1, -NB
+               JB = MIN( NB, N-J+1 )
+               IF( J+JB.LE.N ) THEN
+*
+*                 Compute rows j+jb:n of current block column
+*
+                  CALL STRMM( 'Left', 'Lower', 'No transpose', DIAG,
+     $                        N-J-JB+1, JB, ONE, A( J+JB, J+JB ), LDA,
+     $                        A( J+JB, J ), LDA )
+                  CALL STRSM( 'Right', 'Lower', 'No transpose', DIAG,
+     $                        N-J-JB+1, JB, -ONE, A( J, J ), LDA,
+     $                        A( J+JB, J ), LDA )
+               END IF
+*
+*              Compute inverse of current diagonal block
+*
+               CALL STRTI2( 'Lower', DIAG, JB, A( J, J ), LDA, INFO )
+   30       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of STRTRI
+*
+      END
diff --git a/costa/native/external/lapack/strtrs.f b/costa/native/external/lapack/strtrs.f
new file mode 100644
index 000000000..9be345ccf
--- /dev/null
+++ b/costa/native/external/lapack/strtrs.f
@@ -0,0 +1,148 @@
+      SUBROUTINE STRTRS( UPLO, TRANS, DIAG, N, NRHS, A, LDA, B, LDB,
+     $                   INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993 
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIAG, TRANS, UPLO
+      INTEGER            INFO, LDA, LDB, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  STRTRS solves a triangular system of the form
+*
+*     A * X = B  or  A**T * X = B,
+*
+*  where A is a triangular matrix of order N, and B is an N-by-NRHS
+*  matrix.  A check is made to verify that A is nonsingular.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  A is upper triangular;
+*          = 'L':  A is lower triangular.
+*
+*  TRANS   (input) CHARACTER*1
+*          Specifies the form of the system of equations:
+*          = 'N':  A * X = B  (No transpose)
+*          = 'T':  A**T * X = B  (Transpose)
+*          = 'C':  A**H * X = B  (Conjugate transpose = Transpose)
+*
+*  DIAG    (input) CHARACTER*1
+*          = 'N':  A is non-unit triangular;
+*          = 'U':  A is unit triangular.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  A       (input) REAL array, dimension (LDA,N)
+*          The triangular matrix A.  If UPLO = 'U', the leading N-by-N
+*          upper triangular part of the array A contains the upper
+*          triangular matrix, and the strictly lower triangular part of
+*          A is not referenced.  If UPLO = 'L', the leading N-by-N lower
+*          triangular part of the array A contains the lower triangular
+*          matrix, and the strictly upper triangular part of A is not
+*          referenced.  If DIAG = 'U', the diagonal elements of A are
+*          also not referenced and are assumed to be 1.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  B       (input/output) REAL array, dimension (LDB,NRHS)
+*          On entry, the right hand side matrix B.
+*          On exit, if INFO = 0, the solution matrix X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*          > 0: if INFO = i, the i-th diagonal element of A is zero,
+*               indicating that the matrix is singular and the solutions
+*               X have not been computed.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO, ONE
+      PARAMETER          ( ZERO = 0.0E+0, ONE = 1.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOUNIT
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           STRSM, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      NOUNIT = LSAME( DIAG, 'N' )
+      IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.LSAME( TRANS, 'N' ) .AND. .NOT.
+     $         LSAME( TRANS, 'T' ) .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -9
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'STRTRS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Check for singularity.
+*
+      IF( NOUNIT ) THEN
+         DO 10 INFO = 1, N
+            IF( A( INFO, INFO ).EQ.ZERO )
+     $         RETURN
+   10    CONTINUE
+      END IF
+      INFO = 0
+*
+*     Solve A * x = b  or  A' * x = b.
+*
+      CALL STRSM( 'Left', UPLO, TRANS, DIAG, N, NRHS, ONE, A, LDA, B,
+     $            LDB )
+*
+      RETURN
+*
+*     End of STRTRS
+*
+      END
diff --git a/costa/native/external/lapack/stzrqf.f b/costa/native/external/lapack/stzrqf.f
new file mode 100644
index 000000000..3de793103
--- /dev/null
+++ b/costa/native/external/lapack/stzrqf.f
@@ -0,0 +1,165 @@
+      SUBROUTINE STZRQF( M, N, A, LDA, TAU, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, M, N
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), TAU( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  This routine is deprecated and has been replaced by routine STZRZF.
+*
+*  STZRQF reduces the M-by-N ( M<=N ) real upper trapezoidal matrix A
+*  to upper triangular form by means of orthogonal transformations.
+*
+*  The upper trapezoidal matrix A is factored as
+*
+*     A = ( R  0 ) * Z,
+*
+*  where Z is an N-by-N orthogonal matrix and R is an M-by-M upper
+*  triangular matrix.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= M.
+*
+*  A       (input/output) REAL array, dimension (LDA,N)
+*          On entry, the leading M-by-N upper trapezoidal part of the
+*          array A must contain the matrix to be factorized.
+*          On exit, the leading M-by-M upper triangular part of A
+*          contains the upper triangular matrix R, and elements M+1 to
+*          N of the first M rows of A, with the array TAU, represent the
+*          orthogonal matrix Z as a product of M elementary reflectors.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  TAU     (output) REAL array, dimension (M)
+*          The scalar factors of the elementary reflectors.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Further Details
+*  ===============
+*
+*  The factorization is obtained by Householder's method.  The kth
+*  transformation matrix, Z( k ), which is used to introduce zeros into
+*  the ( m - k + 1 )th row of A, is given in the form
+*
+*     Z( k ) = ( I     0   ),
+*              ( 0  T( k ) )
+*
+*  where
+*
+*     T( k ) = I - tau*u( k )*u( k )',   u( k ) = (   1    ),
+*                                                 (   0    )
+*                                                 ( z( k ) )
+*
+*  tau is a scalar and z( k ) is an ( n - m ) element vector.
+*  tau and z( k ) are chosen to annihilate the elements of the kth row
+*  of X.
+*
+*  The scalar tau is returned in the kth element of TAU and the vector
+*  u( k ) in the kth row of A, such that the elements of z( k ) are
+*  in  a( k, m + 1 ), ..., a( k, n ). The elements of R are returned in
+*  the upper triangular part of A.
+*
+*  Z is given by
+*
+*     Z =  Z( 1 ) * Z( 2 ) * ... * Z( m ).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ONE, ZERO
+      PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, K, M1
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SAXPY, SCOPY, SGEMV, SGER, SLARFG, XERBLA
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.M ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'STZRQF', -INFO )
+         RETURN
+      END IF
+*
+*     Perform the factorization.
+*
+      IF( M.EQ.0 )
+     $   RETURN
+      IF( M.EQ.N ) THEN
+         DO 10 I = 1, N
+            TAU( I ) = ZERO
+   10    CONTINUE
+      ELSE
+         M1 = MIN( M+1, N )
+         DO 20 K = M, 1, -1
+*
+*           Use a Householder reflection to zero the kth row of A.
+*           First set up the reflection.
+*
+            CALL SLARFG( N-M+1, A( K, K ), A( K, M1 ), LDA, TAU( K ) )
+*
+            IF( ( TAU( K ).NE.ZERO ) .AND. ( K.GT.1 ) ) THEN
+*
+*              We now perform the operation  A := A*P( k ).
+*
+*              Use the first ( k - 1 ) elements of TAU to store  a( k ),
+*              where  a( k ) consists of the first ( k - 1 ) elements of
+*              the  kth column  of  A.  Also  let  B  denote  the  first
+*              ( k - 1 ) rows of the last ( n - m ) columns of A.
+*
+               CALL SCOPY( K-1, A( 1, K ), 1, TAU, 1 )
+*
+*              Form   w = a( k ) + B*z( k )  in TAU.
+*
+               CALL SGEMV( 'No transpose', K-1, N-M, ONE, A( 1, M1 ),
+     $                     LDA, A( K, M1 ), LDA, ONE, TAU, 1 )
+*
+*              Now form  a( k ) := a( k ) - tau*w
+*              and       B      := B      - tau*w*z( k )'.
+*
+               CALL SAXPY( K-1, -TAU( K ), TAU, 1, A( 1, K ), 1 )
+               CALL SGER( K-1, N-M, -TAU( K ), TAU, 1, A( K, M1 ), LDA,
+     $                    A( 1, M1 ), LDA )
+            END IF
+   20    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of STZRQF
+*
+      END
diff --git a/costa/native/external/lapack/stzrzf.f b/costa/native/external/lapack/stzrzf.f
new file mode 100644
index 000000000..eb198bccb
--- /dev/null
+++ b/costa/native/external/lapack/stzrzf.f
@@ -0,0 +1,241 @@
+      SUBROUTINE STZRZF( M, N, A, LDA, TAU, WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, LWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  STZRZF reduces the M-by-N ( M<=N ) real upper trapezoidal matrix A
+*  to upper triangular form by means of orthogonal transformations.
+*
+*  The upper trapezoidal matrix A is factored as
+*
+*     A = ( R  0 ) * Z,
+*
+*  where Z is an N-by-N orthogonal matrix and R is an M-by-M upper
+*  triangular matrix.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  A       (input/output) REAL array, dimension (LDA,N)
+*          On entry, the leading M-by-N upper trapezoidal part of the
+*          array A must contain the matrix to be factorized.
+*          On exit, the leading M-by-M upper triangular part of A
+*          contains the upper triangular matrix R, and elements M+1 to
+*          N of the first M rows of A, with the array TAU, represent the
+*          orthogonal matrix Z as a product of M elementary reflectors.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  TAU     (output) REAL array, dimension (M)
+*          The scalar factors of the elementary reflectors.
+*
+*  WORK    (workspace/output) REAL array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.  LWORK >= max(1,M).
+*          For optimum performance LWORK >= M*NB, where NB is
+*          the optimal blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*    A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA
+*
+*  The factorization is obtained by Householder's method.  The kth
+*  transformation matrix, Z( k ), which is used to introduce zeros into
+*  the ( m - k + 1 )th row of A, is given in the form
+*
+*     Z( k ) = ( I     0   ),
+*              ( 0  T( k ) )
+*
+*  where
+*
+*     T( k ) = I - tau*u( k )*u( k )',   u( k ) = (   1    ),
+*                                                 (   0    )
+*                                                 ( z( k ) )
+*
+*  tau is a scalar and z( k ) is an ( n - m ) element vector.
+*  tau and z( k ) are chosen to annihilate the elements of the kth row
+*  of X.
+*
+*  The scalar tau is returned in the kth element of TAU and the vector
+*  u( k ) in the kth row of A, such that the elements of z( k ) are
+*  in  a( k, m + 1 ), ..., a( k, n ). The elements of R are returned in
+*  the upper triangular part of A.
+*
+*  Z is given by
+*
+*     Z =  Z( 1 ) * Z( 2 ) * ... * Z( m ).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      REAL               ZERO
+      PARAMETER          ( ZERO = 0.0E+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            I, IB, IWS, KI, KK, LDWORK, LWKOPT, M1, MU, NB,
+     $                   NBMIN, NX
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SLARZB, SLARZT, SLATRZ, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.M ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -4
+      ELSE IF( LWORK.LT.MAX( 1, M ) .AND. .NOT.LQUERY ) THEN
+         INFO = -7
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+*
+*        Determine the block size.
+*
+         NB = ILAENV( 1, 'SGERQF', ' ', M, N, -1, -1 )
+         LWKOPT = M*NB
+         WORK( 1 ) = LWKOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'STZRZF', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      ELSE IF( M.EQ.N ) THEN
+         DO 10 I = 1, N
+            TAU( I ) = ZERO
+   10    CONTINUE
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      NBMIN = 2
+      NX = 1
+      IWS = M
+      IF( NB.GT.1 .AND. NB.LT.M ) THEN
+*
+*        Determine when to cross over from blocked to unblocked code.
+*
+         NX = MAX( 0, ILAENV( 3, 'SGERQF', ' ', M, N, -1, -1 ) )
+         IF( NX.LT.M ) THEN
+*
+*           Determine if workspace is large enough for blocked code.
+*
+            LDWORK = M
+            IWS = LDWORK*NB
+            IF( LWORK.LT.IWS ) THEN
+*
+*              Not enough workspace to use optimal NB:  reduce NB and
+*              determine the minimum value of NB.
+*
+               NB = LWORK / LDWORK
+               NBMIN = MAX( 2, ILAENV( 2, 'SGERQF', ' ', M, N, -1,
+     $                 -1 ) )
+            END IF
+         END IF
+      END IF
+*
+      IF( NB.GE.NBMIN .AND. NB.LT.M .AND. NX.LT.M ) THEN
+*
+*        Use blocked code initially.
+*        The last kk rows are handled by the block method.
+*
+         M1 = MIN( M+1, N )
+         KI = ( ( M-NX-1 ) / NB )*NB
+         KK = MIN( M, KI+NB )
+*
+         DO 20 I = M - KK + KI + 1, M - KK + 1, -NB
+            IB = MIN( M-I+1, NB )
+*
+*           Compute the TZ factorization of the current block
+*           A(i:i+ib-1,i:n)
+*
+            CALL SLATRZ( IB, N-I+1, N-M, A( I, I ), LDA, TAU( I ),
+     $                   WORK )
+            IF( I.GT.1 ) THEN
+*
+*              Form the triangular factor of the block reflector
+*              H = H(i+ib-1) . . . H(i+1) H(i)
+*
+               CALL SLARZT( 'Backward', 'Rowwise', N-M, IB, A( I, M1 ),
+     $                      LDA, TAU( I ), WORK, LDWORK )
+*
+*              Apply H to A(1:i-1,i:n) from the right
+*
+               CALL SLARZB( 'Right', 'No transpose', 'Backward',
+     $                      'Rowwise', I-1, N-I+1, IB, N-M, A( I, M1 ),
+     $                      LDA, WORK, LDWORK, A( 1, I ), LDA,
+     $                      WORK( IB+1 ), LDWORK )
+            END IF
+   20    CONTINUE
+         MU = I + NB - 1
+      ELSE
+         MU = M
+      END IF
+*
+*     Use unblocked code to factor the last or only block
+*
+      IF( MU.GT.0 )
+     $   CALL SLATRZ( MU, N, N-M, A, LDA, TAU, WORK )
+*
+      WORK( 1 ) = LWKOPT
+*
+      RETURN
+*
+*     End of STZRZF
+*
+      END
diff --git a/costa/native/external/lapack/xerbla.f b/costa/native/external/lapack/xerbla.f
new file mode 100644
index 000000000..6e11175f3
--- /dev/null
+++ b/costa/native/external/lapack/xerbla.f
@@ -0,0 +1,46 @@
+      SUBROUTINE XERBLA( SRNAME, INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER*6        SRNAME
+      INTEGER            INFO
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  XERBLA  is an error handler for the LAPACK routines.
+*  It is called by an LAPACK routine if an input parameter has an
+*  invalid value.  A message is printed and execution stops.
+*
+*  Installers may consider modifying the STOP statement in order to
+*  call system-specific exception-handling facilities.
+*
+*  Arguments
+*  =========
+*
+*  SRNAME  (input) CHARACTER*6
+*          The name of the routine which called XERBLA.
+*
+*  INFO    (input) INTEGER
+*          The position of the invalid parameter in the parameter list
+*          of the calling routine.
+*
+* =====================================================================
+*
+*     .. Executable Statements ..
+*
+      WRITE( *, FMT = 9999 )SRNAME, INFO
+*
+      STOP
+*
+ 9999 FORMAT( ' ** On entry to ', A6, ' parameter number ', I2, ' had ',
+     $      'an illegal value' )
+*
+*     End of XERBLA
+*
+      END
diff --git a/costa/native/external/lapack/zbdsqr.f b/costa/native/external/lapack/zbdsqr.f
new file mode 100644
index 000000000..41f809263
--- /dev/null
+++ b/costa/native/external/lapack/zbdsqr.f
@@ -0,0 +1,733 @@
+      SUBROUTINE ZBDSQR( UPLO, N, NCVT, NRU, NCC, D, E, VT, LDVT, U,
+     $                   LDU, C, LDC, RWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDC, LDU, LDVT, N, NCC, NCVT, NRU
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   D( * ), E( * ), RWORK( * )
+      COMPLEX*16         C( LDC, * ), U( LDU, * ), VT( LDVT, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZBDSQR computes the singular value decomposition (SVD) of a real
+*  N-by-N (upper or lower) bidiagonal matrix B:  B = Q * S * P' (P'
+*  denotes the transpose of P), where S is a diagonal matrix with
+*  non-negative diagonal elements (the singular values of B), and Q
+*  and P are orthogonal matrices.
+*
+*  The routine computes S, and optionally computes U * Q, P' * VT,
+*  or Q' * C, for given complex input matrices U, VT, and C.
+*
+*  See "Computing  Small Singular Values of Bidiagonal Matrices With
+*  Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan,
+*  LAPACK Working Note #3 (or SIAM J. Sci. Statist. Comput. vol. 11,
+*  no. 5, pp. 873-912, Sept 1990) and
+*  "Accurate singular values and differential qd algorithms," by
+*  B. Parlett and V. Fernando, Technical Report CPAM-554, Mathematics
+*  Department, University of California at Berkeley, July 1992
+*  for a detailed description of the algorithm.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  B is upper bidiagonal;
+*          = 'L':  B is lower bidiagonal.
+*
+*  N       (input) INTEGER
+*          The order of the matrix B.  N >= 0.
+*
+*  NCVT    (input) INTEGER
+*          The number of columns of the matrix VT. NCVT >= 0.
+*
+*  NRU     (input) INTEGER
+*          The number of rows of the matrix U. NRU >= 0.
+*
+*  NCC     (input) INTEGER
+*          The number of columns of the matrix C. NCC >= 0.
+*
+*  D       (input/output) DOUBLE PRECISION array, dimension (N)
+*          On entry, the n diagonal elements of the bidiagonal matrix B.
+*          On exit, if INFO=0, the singular values of B in decreasing
+*          order.
+*
+*  E       (input/output) DOUBLE PRECISION array, dimension (N)
+*          On entry, the elements of E contain the
+*          offdiagonal elements of of the bidiagonal matrix whose SVD
+*          is desired. On normal exit (INFO = 0), E is destroyed.
+*          If the algorithm does not converge (INFO > 0), D and E
+*          will contain the diagonal and superdiagonal elements of a
+*          bidiagonal matrix orthogonally equivalent to the one given
+*          as input. E(N) is used for workspace.
+*
+*  VT      (input/output) COMPLEX*16 array, dimension (LDVT, NCVT)
+*          On entry, an N-by-NCVT matrix VT.
+*          On exit, VT is overwritten by P' * VT.
+*          VT is not referenced if NCVT = 0.
+*
+*  LDVT    (input) INTEGER
+*          The leading dimension of the array VT.
+*          LDVT >= max(1,N) if NCVT > 0; LDVT >= 1 if NCVT = 0.
+*
+*  U       (input/output) COMPLEX*16 array, dimension (LDU, N)
+*          On entry, an NRU-by-N matrix U.
+*          On exit, U is overwritten by U * Q.
+*          U is not referenced if NRU = 0.
+*
+*  LDU     (input) INTEGER
+*          The leading dimension of the array U.  LDU >= max(1,NRU).
+*
+*  C       (input/output) COMPLEX*16 array, dimension (LDC, NCC)
+*          On entry, an N-by-NCC matrix C.
+*          On exit, C is overwritten by Q' * C.
+*          C is not referenced if NCC = 0.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C.
+*          LDC >= max(1,N) if NCC > 0; LDC >=1 if NCC = 0.
+*
+*  RWORK   (workspace) DOUBLE PRECISION array, dimension (4*N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  If INFO = -i, the i-th argument had an illegal value
+*          > 0:  the algorithm did not converge; D and E contain the
+*                elements of a bidiagonal matrix which is orthogonally
+*                similar to the input matrix B;  if INFO = i, i
+*                elements of E have not converged to zero.
+*
+*  Internal Parameters
+*  ===================
+*
+*  TOLMUL  DOUBLE PRECISION, default = max(10,min(100,EPS**(-1/8)))
+*          TOLMUL controls the convergence criterion of the QR loop.
+*          If it is positive, TOLMUL*EPS is the desired relative
+*             precision in the computed singular values.
+*          If it is negative, abs(TOLMUL*EPS*sigma_max) is the
+*             desired absolute accuracy in the computed singular
+*             values (corresponds to relative accuracy
+*             abs(TOLMUL*EPS) in the largest singular value.
+*          abs(TOLMUL) should be between 1 and 1/EPS, and preferably
+*             between 10 (for fast convergence) and .1/EPS
+*             (for there to be some accuracy in the results).
+*          Default is to lose at either one eighth or 2 of the
+*             available decimal digits in each computed singular value
+*             (whichever is smaller).
+*
+*  MAXITR  INTEGER, default = 6
+*          MAXITR controls the maximum number of passes of the
+*          algorithm through its inner loop. The algorithms stops
+*          (and so fails to converge) if the number of passes
+*          through the inner loop exceeds MAXITR*N**2.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO
+      PARAMETER          ( ZERO = 0.0D0 )
+      DOUBLE PRECISION   ONE
+      PARAMETER          ( ONE = 1.0D0 )
+      DOUBLE PRECISION   NEGONE
+      PARAMETER          ( NEGONE = -1.0D0 )
+      DOUBLE PRECISION   HNDRTH
+      PARAMETER          ( HNDRTH = 0.01D0 )
+      DOUBLE PRECISION   TEN
+      PARAMETER          ( TEN = 10.0D0 )
+      DOUBLE PRECISION   HNDRD
+      PARAMETER          ( HNDRD = 100.0D0 )
+      DOUBLE PRECISION   MEIGTH
+      PARAMETER          ( MEIGTH = -0.125D0 )
+      INTEGER            MAXITR
+      PARAMETER          ( MAXITR = 6 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LOWER, ROTATE
+      INTEGER            I, IDIR, ISUB, ITER, J, LL, LLL, M, MAXIT, NM1,
+     $                   NM12, NM13, OLDLL, OLDM
+      DOUBLE PRECISION   ABSE, ABSS, COSL, COSR, CS, EPS, F, G, H, MU,
+     $                   OLDCS, OLDSN, R, SHIFT, SIGMN, SIGMX, SINL,
+     $                   SINR, SLL, SMAX, SMIN, SMINL, SMINLO, SMINOA,
+     $                   SN, THRESH, TOL, TOLMUL, UNFL
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DLAMCH
+      EXTERNAL           LSAME, DLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLARTG, DLAS2, DLASQ1, DLASV2, XERBLA, ZDROT,
+     $                   ZDSCAL, ZLASR, ZSWAP
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, MAX, MIN, SIGN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      LOWER = LSAME( UPLO, 'L' )
+      IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LOWER ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NCVT.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( NRU.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( NCC.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( ( NCVT.EQ.0 .AND. LDVT.LT.1 ) .OR.
+     $         ( NCVT.GT.0 .AND. LDVT.LT.MAX( 1, N ) ) ) THEN
+         INFO = -9
+      ELSE IF( LDU.LT.MAX( 1, NRU ) ) THEN
+         INFO = -11
+      ELSE IF( ( NCC.EQ.0 .AND. LDC.LT.1 ) .OR.
+     $         ( NCC.GT.0 .AND. LDC.LT.MAX( 1, N ) ) ) THEN
+         INFO = -13
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZBDSQR', -INFO )
+         RETURN
+      END IF
+      IF( N.EQ.0 )
+     $   RETURN
+      IF( N.EQ.1 )
+     $   GO TO 160
+*
+*     ROTATE is true if any singular vectors desired, false otherwise
+*
+      ROTATE = ( NCVT.GT.0 ) .OR. ( NRU.GT.0 ) .OR. ( NCC.GT.0 )
+*
+*     If no singular vectors desired, use qd algorithm
+*
+      IF( .NOT.ROTATE ) THEN
+         CALL DLASQ1( N, D, E, RWORK, INFO )
+         RETURN
+      END IF
+*
+      NM1 = N - 1
+      NM12 = NM1 + NM1
+      NM13 = NM12 + NM1
+      IDIR = 0
+*
+*     Get machine constants
+*
+      EPS = DLAMCH( 'Epsilon' )
+      UNFL = DLAMCH( 'Safe minimum' )
+*
+*     If matrix lower bidiagonal, rotate to be upper bidiagonal
+*     by applying Givens rotations on the left
+*
+      IF( LOWER ) THEN
+         DO 10 I = 1, N - 1
+            CALL DLARTG( D( I ), E( I ), CS, SN, R )
+            D( I ) = R
+            E( I ) = SN*D( I+1 )
+            D( I+1 ) = CS*D( I+1 )
+            RWORK( I ) = CS
+            RWORK( NM1+I ) = SN
+   10    CONTINUE
+*
+*        Update singular vectors if desired
+*
+         IF( NRU.GT.0 )
+     $      CALL ZLASR( 'R', 'V', 'F', NRU, N, RWORK( 1 ), RWORK( N ),
+     $                  U, LDU )
+         IF( NCC.GT.0 )
+     $      CALL ZLASR( 'L', 'V', 'F', N, NCC, RWORK( 1 ), RWORK( N ),
+     $                  C, LDC )
+      END IF
+*
+*     Compute singular values to relative accuracy TOL
+*     (By setting TOL to be negative, algorithm will compute
+*     singular values to absolute accuracy ABS(TOL)*norm(input matrix))
+*
+      TOLMUL = MAX( TEN, MIN( HNDRD, EPS**MEIGTH ) )
+      TOL = TOLMUL*EPS
+*
+*     Compute approximate maximum, minimum singular values
+*
+      SMAX = ZERO
+      DO 20 I = 1, N
+         SMAX = MAX( SMAX, ABS( D( I ) ) )
+   20 CONTINUE
+      DO 30 I = 1, N - 1
+         SMAX = MAX( SMAX, ABS( E( I ) ) )
+   30 CONTINUE
+      SMINL = ZERO
+      IF( TOL.GE.ZERO ) THEN
+*
+*        Relative accuracy desired
+*
+         SMINOA = ABS( D( 1 ) )
+         IF( SMINOA.EQ.ZERO )
+     $      GO TO 50
+         MU = SMINOA
+         DO 40 I = 2, N
+            MU = ABS( D( I ) )*( MU / ( MU+ABS( E( I-1 ) ) ) )
+            SMINOA = MIN( SMINOA, MU )
+            IF( SMINOA.EQ.ZERO )
+     $         GO TO 50
+   40    CONTINUE
+   50    CONTINUE
+         SMINOA = SMINOA / SQRT( DBLE( N ) )
+         THRESH = MAX( TOL*SMINOA, MAXITR*N*N*UNFL )
+      ELSE
+*
+*        Absolute accuracy desired
+*
+         THRESH = MAX( ABS( TOL )*SMAX, MAXITR*N*N*UNFL )
+      END IF
+*
+*     Prepare for main iteration loop for the singular values
+*     (MAXIT is the maximum number of passes through the inner
+*     loop permitted before nonconvergence signalled.)
+*
+      MAXIT = MAXITR*N*N
+      ITER = 0
+      OLDLL = -1
+      OLDM = -1
+*
+*     M points to last element of unconverged part of matrix
+*
+      M = N
+*
+*     Begin main iteration loop
+*
+   60 CONTINUE
+*
+*     Check for convergence or exceeding iteration count
+*
+      IF( M.LE.1 )
+     $   GO TO 160
+      IF( ITER.GT.MAXIT )
+     $   GO TO 200
+*
+*     Find diagonal block of matrix to work on
+*
+      IF( TOL.LT.ZERO .AND. ABS( D( M ) ).LE.THRESH )
+     $   D( M ) = ZERO
+      SMAX = ABS( D( M ) )
+      SMIN = SMAX
+      DO 70 LLL = 1, M - 1
+         LL = M - LLL
+         ABSS = ABS( D( LL ) )
+         ABSE = ABS( E( LL ) )
+         IF( TOL.LT.ZERO .AND. ABSS.LE.THRESH )
+     $      D( LL ) = ZERO
+         IF( ABSE.LE.THRESH )
+     $      GO TO 80
+         SMIN = MIN( SMIN, ABSS )
+         SMAX = MAX( SMAX, ABSS, ABSE )
+   70 CONTINUE
+      LL = 0
+      GO TO 90
+   80 CONTINUE
+      E( LL ) = ZERO
+*
+*     Matrix splits since E(LL) = 0
+*
+      IF( LL.EQ.M-1 ) THEN
+*
+*        Convergence of bottom singular value, return to top of loop
+*
+         M = M - 1
+         GO TO 60
+      END IF
+   90 CONTINUE
+      LL = LL + 1
+*
+*     E(LL) through E(M-1) are nonzero, E(LL-1) is zero
+*
+      IF( LL.EQ.M-1 ) THEN
+*
+*        2 by 2 block, handle separately
+*
+         CALL DLASV2( D( M-1 ), E( M-1 ), D( M ), SIGMN, SIGMX, SINR,
+     $                COSR, SINL, COSL )
+         D( M-1 ) = SIGMX
+         E( M-1 ) = ZERO
+         D( M ) = SIGMN
+*
+*        Compute singular vectors, if desired
+*
+         IF( NCVT.GT.0 )
+     $      CALL ZDROT( NCVT, VT( M-1, 1 ), LDVT, VT( M, 1 ), LDVT,
+     $                  COSR, SINR )
+         IF( NRU.GT.0 )
+     $      CALL ZDROT( NRU, U( 1, M-1 ), 1, U( 1, M ), 1, COSL, SINL )
+         IF( NCC.GT.0 )
+     $      CALL ZDROT( NCC, C( M-1, 1 ), LDC, C( M, 1 ), LDC, COSL,
+     $                  SINL )
+         M = M - 2
+         GO TO 60
+      END IF
+*
+*     If working on new submatrix, choose shift direction
+*     (from larger end diagonal element towards smaller)
+*
+      IF( LL.GT.OLDM .OR. M.LT.OLDLL ) THEN
+         IF( ABS( D( LL ) ).GE.ABS( D( M ) ) ) THEN
+*
+*           Chase bulge from top (big end) to bottom (small end)
+*
+            IDIR = 1
+         ELSE
+*
+*           Chase bulge from bottom (big end) to top (small end)
+*
+            IDIR = 2
+         END IF
+      END IF
+*
+*     Apply convergence tests
+*
+      IF( IDIR.EQ.1 ) THEN
+*
+*        Run convergence test in forward direction
+*        First apply standard test to bottom of matrix
+*
+         IF( ABS( E( M-1 ) ).LE.ABS( TOL )*ABS( D( M ) ) .OR.
+     $       ( TOL.LT.ZERO .AND. ABS( E( M-1 ) ).LE.THRESH ) ) THEN
+            E( M-1 ) = ZERO
+            GO TO 60
+         END IF
+*
+         IF( TOL.GE.ZERO ) THEN
+*
+*           If relative accuracy desired,
+*           apply convergence criterion forward
+*
+            MU = ABS( D( LL ) )
+            SMINL = MU
+            DO 100 LLL = LL, M - 1
+               IF( ABS( E( LLL ) ).LE.TOL*MU ) THEN
+                  E( LLL ) = ZERO
+                  GO TO 60
+               END IF
+               SMINLO = SMINL
+               MU = ABS( D( LLL+1 ) )*( MU / ( MU+ABS( E( LLL ) ) ) )
+               SMINL = MIN( SMINL, MU )
+  100       CONTINUE
+         END IF
+*
+      ELSE
+*
+*        Run convergence test in backward direction
+*        First apply standard test to top of matrix
+*
+         IF( ABS( E( LL ) ).LE.ABS( TOL )*ABS( D( LL ) ) .OR.
+     $       ( TOL.LT.ZERO .AND. ABS( E( LL ) ).LE.THRESH ) ) THEN
+            E( LL ) = ZERO
+            GO TO 60
+         END IF
+*
+         IF( TOL.GE.ZERO ) THEN
+*
+*           If relative accuracy desired,
+*           apply convergence criterion backward
+*
+            MU = ABS( D( M ) )
+            SMINL = MU
+            DO 110 LLL = M - 1, LL, -1
+               IF( ABS( E( LLL ) ).LE.TOL*MU ) THEN
+                  E( LLL ) = ZERO
+                  GO TO 60
+               END IF
+               SMINLO = SMINL
+               MU = ABS( D( LLL ) )*( MU / ( MU+ABS( E( LLL ) ) ) )
+               SMINL = MIN( SMINL, MU )
+  110       CONTINUE
+         END IF
+      END IF
+      OLDLL = LL
+      OLDM = M
+*
+*     Compute shift.  First, test if shifting would ruin relative
+*     accuracy, and if so set the shift to zero.
+*
+      IF( TOL.GE.ZERO .AND. N*TOL*( SMINL / SMAX ).LE.
+     $    MAX( EPS, HNDRTH*TOL ) ) THEN
+*
+*        Use a zero shift to avoid loss of relative accuracy
+*
+         SHIFT = ZERO
+      ELSE
+*
+*        Compute the shift from 2-by-2 block at end of matrix
+*
+         IF( IDIR.EQ.1 ) THEN
+            SLL = ABS( D( LL ) )
+            CALL DLAS2( D( M-1 ), E( M-1 ), D( M ), SHIFT, R )
+         ELSE
+            SLL = ABS( D( M ) )
+            CALL DLAS2( D( LL ), E( LL ), D( LL+1 ), SHIFT, R )
+         END IF
+*
+*        Test if shift negligible, and if so set to zero
+*
+         IF( SLL.GT.ZERO ) THEN
+            IF( ( SHIFT / SLL )**2.LT.EPS )
+     $         SHIFT = ZERO
+         END IF
+      END IF
+*
+*     Increment iteration count
+*
+      ITER = ITER + M - LL
+*
+*     If SHIFT = 0, do simplified QR iteration
+*
+      IF( SHIFT.EQ.ZERO ) THEN
+         IF( IDIR.EQ.1 ) THEN
+*
+*           Chase bulge from top to bottom
+*           Save cosines and sines for later singular vector updates
+*
+            CS = ONE
+            OLDCS = ONE
+            DO 120 I = LL, M - 1
+               CALL DLARTG( D( I )*CS, E( I ), CS, SN, R )
+               IF( I.GT.LL )
+     $            E( I-1 ) = OLDSN*R
+               CALL DLARTG( OLDCS*R, D( I+1 )*SN, OLDCS, OLDSN, D( I ) )
+               RWORK( I-LL+1 ) = CS
+               RWORK( I-LL+1+NM1 ) = SN
+               RWORK( I-LL+1+NM12 ) = OLDCS
+               RWORK( I-LL+1+NM13 ) = OLDSN
+  120       CONTINUE
+            H = D( M )*CS
+            D( M ) = H*OLDCS
+            E( M-1 ) = H*OLDSN
+*
+*           Update singular vectors
+*
+            IF( NCVT.GT.0 )
+     $         CALL ZLASR( 'L', 'V', 'F', M-LL+1, NCVT, RWORK( 1 ),
+     $                     RWORK( N ), VT( LL, 1 ), LDVT )
+            IF( NRU.GT.0 )
+     $         CALL ZLASR( 'R', 'V', 'F', NRU, M-LL+1, RWORK( NM12+1 ),
+     $                     RWORK( NM13+1 ), U( 1, LL ), LDU )
+            IF( NCC.GT.0 )
+     $         CALL ZLASR( 'L', 'V', 'F', M-LL+1, NCC, RWORK( NM12+1 ),
+     $                     RWORK( NM13+1 ), C( LL, 1 ), LDC )
+*
+*           Test convergence
+*
+            IF( ABS( E( M-1 ) ).LE.THRESH )
+     $         E( M-1 ) = ZERO
+*
+         ELSE
+*
+*           Chase bulge from bottom to top
+*           Save cosines and sines for later singular vector updates
+*
+            CS = ONE
+            OLDCS = ONE
+            DO 130 I = M, LL + 1, -1
+               CALL DLARTG( D( I )*CS, E( I-1 ), CS, SN, R )
+               IF( I.LT.M )
+     $            E( I ) = OLDSN*R
+               CALL DLARTG( OLDCS*R, D( I-1 )*SN, OLDCS, OLDSN, D( I ) )
+               RWORK( I-LL ) = CS
+               RWORK( I-LL+NM1 ) = -SN
+               RWORK( I-LL+NM12 ) = OLDCS
+               RWORK( I-LL+NM13 ) = -OLDSN
+  130       CONTINUE
+            H = D( LL )*CS
+            D( LL ) = H*OLDCS
+            E( LL ) = H*OLDSN
+*
+*           Update singular vectors
+*
+            IF( NCVT.GT.0 )
+     $         CALL ZLASR( 'L', 'V', 'B', M-LL+1, NCVT, RWORK( NM12+1 ),
+     $                     RWORK( NM13+1 ), VT( LL, 1 ), LDVT )
+            IF( NRU.GT.0 )
+     $         CALL ZLASR( 'R', 'V', 'B', NRU, M-LL+1, RWORK( 1 ),
+     $                     RWORK( N ), U( 1, LL ), LDU )
+            IF( NCC.GT.0 )
+     $         CALL ZLASR( 'L', 'V', 'B', M-LL+1, NCC, RWORK( 1 ),
+     $                     RWORK( N ), C( LL, 1 ), LDC )
+*
+*           Test convergence
+*
+            IF( ABS( E( LL ) ).LE.THRESH )
+     $         E( LL ) = ZERO
+         END IF
+      ELSE
+*
+*        Use nonzero shift
+*
+         IF( IDIR.EQ.1 ) THEN
+*
+*           Chase bulge from top to bottom
+*           Save cosines and sines for later singular vector updates
+*
+            F = ( ABS( D( LL ) )-SHIFT )*
+     $          ( SIGN( ONE, D( LL ) )+SHIFT / D( LL ) )
+            G = E( LL )
+            DO 140 I = LL, M - 1
+               CALL DLARTG( F, G, COSR, SINR, R )
+               IF( I.GT.LL )
+     $            E( I-1 ) = R
+               F = COSR*D( I ) + SINR*E( I )
+               E( I ) = COSR*E( I ) - SINR*D( I )
+               G = SINR*D( I+1 )
+               D( I+1 ) = COSR*D( I+1 )
+               CALL DLARTG( F, G, COSL, SINL, R )
+               D( I ) = R
+               F = COSL*E( I ) + SINL*D( I+1 )
+               D( I+1 ) = COSL*D( I+1 ) - SINL*E( I )
+               IF( I.LT.M-1 ) THEN
+                  G = SINL*E( I+1 )
+                  E( I+1 ) = COSL*E( I+1 )
+               END IF
+               RWORK( I-LL+1 ) = COSR
+               RWORK( I-LL+1+NM1 ) = SINR
+               RWORK( I-LL+1+NM12 ) = COSL
+               RWORK( I-LL+1+NM13 ) = SINL
+  140       CONTINUE
+            E( M-1 ) = F
+*
+*           Update singular vectors
+*
+            IF( NCVT.GT.0 )
+     $         CALL ZLASR( 'L', 'V', 'F', M-LL+1, NCVT, RWORK( 1 ),
+     $                     RWORK( N ), VT( LL, 1 ), LDVT )
+            IF( NRU.GT.0 )
+     $         CALL ZLASR( 'R', 'V', 'F', NRU, M-LL+1, RWORK( NM12+1 ),
+     $                     RWORK( NM13+1 ), U( 1, LL ), LDU )
+            IF( NCC.GT.0 )
+     $         CALL ZLASR( 'L', 'V', 'F', M-LL+1, NCC, RWORK( NM12+1 ),
+     $                     RWORK( NM13+1 ), C( LL, 1 ), LDC )
+*
+*           Test convergence
+*
+            IF( ABS( E( M-1 ) ).LE.THRESH )
+     $         E( M-1 ) = ZERO
+*
+         ELSE
+*
+*           Chase bulge from bottom to top
+*           Save cosines and sines for later singular vector updates
+*
+            F = ( ABS( D( M ) )-SHIFT )*( SIGN( ONE, D( M ) )+SHIFT /
+     $          D( M ) )
+            G = E( M-1 )
+            DO 150 I = M, LL + 1, -1
+               CALL DLARTG( F, G, COSR, SINR, R )
+               IF( I.LT.M )
+     $            E( I ) = R
+               F = COSR*D( I ) + SINR*E( I-1 )
+               E( I-1 ) = COSR*E( I-1 ) - SINR*D( I )
+               G = SINR*D( I-1 )
+               D( I-1 ) = COSR*D( I-1 )
+               CALL DLARTG( F, G, COSL, SINL, R )
+               D( I ) = R
+               F = COSL*E( I-1 ) + SINL*D( I-1 )
+               D( I-1 ) = COSL*D( I-1 ) - SINL*E( I-1 )
+               IF( I.GT.LL+1 ) THEN
+                  G = SINL*E( I-2 )
+                  E( I-2 ) = COSL*E( I-2 )
+               END IF
+               RWORK( I-LL ) = COSR
+               RWORK( I-LL+NM1 ) = -SINR
+               RWORK( I-LL+NM12 ) = COSL
+               RWORK( I-LL+NM13 ) = -SINL
+  150       CONTINUE
+            E( LL ) = F
+*
+*           Test convergence
+*
+            IF( ABS( E( LL ) ).LE.THRESH )
+     $         E( LL ) = ZERO
+*
+*           Update singular vectors if desired
+*
+            IF( NCVT.GT.0 )
+     $         CALL ZLASR( 'L', 'V', 'B', M-LL+1, NCVT, RWORK( NM12+1 ),
+     $                     RWORK( NM13+1 ), VT( LL, 1 ), LDVT )
+            IF( NRU.GT.0 )
+     $         CALL ZLASR( 'R', 'V', 'B', NRU, M-LL+1, RWORK( 1 ),
+     $                     RWORK( N ), U( 1, LL ), LDU )
+            IF( NCC.GT.0 )
+     $         CALL ZLASR( 'L', 'V', 'B', M-LL+1, NCC, RWORK( 1 ),
+     $                     RWORK( N ), C( LL, 1 ), LDC )
+         END IF
+      END IF
+*
+*     QR iteration finished, go back and check convergence
+*
+      GO TO 60
+*
+*     All singular values converged, so make them positive
+*
+  160 CONTINUE
+      DO 170 I = 1, N
+         IF( D( I ).LT.ZERO ) THEN
+            D( I ) = -D( I )
+*
+*           Change sign of singular vectors, if desired
+*
+            IF( NCVT.GT.0 )
+     $         CALL ZDSCAL( NCVT, NEGONE, VT( I, 1 ), LDVT )
+         END IF
+  170 CONTINUE
+*
+*     Sort the singular values into decreasing order (insertion sort on
+*     singular values, but only one transposition per singular vector)
+*
+      DO 190 I = 1, N - 1
+*
+*        Scan for smallest D(I)
+*
+         ISUB = 1
+         SMIN = D( 1 )
+         DO 180 J = 2, N + 1 - I
+            IF( D( J ).LE.SMIN ) THEN
+               ISUB = J
+               SMIN = D( J )
+            END IF
+  180    CONTINUE
+         IF( ISUB.NE.N+1-I ) THEN
+*
+*           Swap singular values and vectors
+*
+            D( ISUB ) = D( N+1-I )
+            D( N+1-I ) = SMIN
+            IF( NCVT.GT.0 )
+     $         CALL ZSWAP( NCVT, VT( ISUB, 1 ), LDVT, VT( N+1-I, 1 ),
+     $                     LDVT )
+            IF( NRU.GT.0 )
+     $         CALL ZSWAP( NRU, U( 1, ISUB ), 1, U( 1, N+1-I ), 1 )
+            IF( NCC.GT.0 )
+     $         CALL ZSWAP( NCC, C( ISUB, 1 ), LDC, C( N+1-I, 1 ), LDC )
+         END IF
+  190 CONTINUE
+      GO TO 220
+*
+*     Maximum number of iterations exceeded, failure to converge
+*
+  200 CONTINUE
+      INFO = 0
+      DO 210 I = 1, N - 1
+         IF( E( I ).NE.ZERO )
+     $      INFO = INFO + 1
+  210 CONTINUE
+  220 CONTINUE
+      RETURN
+*
+*     End of ZBDSQR
+*
+      END
diff --git a/costa/native/external/lapack/zdrot.f b/costa/native/external/lapack/zdrot.f
new file mode 100644
index 000000000..6c451becf
--- /dev/null
+++ b/costa/native/external/lapack/zdrot.f
@@ -0,0 +1,54 @@
+      SUBROUTINE ZDROT( N, CX, INCX, CY, INCY, C, S )
+*
+*     applies a plane rotation, where the cos and sin (c and s) are real
+*     and the vectors cx and cy are complex.
+*     jack dongarra, linpack, 3/11/78.
+*
+*     .. Scalar Arguments ..
+      INTEGER            INCX, INCY, N
+      DOUBLE PRECISION   C, S
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         CX( * ), CY( * )
+*
+* =====================================================================
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, IX, IY
+      COMPLEX*16         CTEMP
+*     ..
+*     .. Executable Statements ..
+*
+      IF( N.LE.0 )
+     $   RETURN
+      IF( INCX.EQ.1 .AND. INCY.EQ.1 )
+     $   GO TO 20
+*
+*        code for unequal increments or equal increments not equal
+*          to 1
+*
+      IX = 1
+      IY = 1
+      IF( INCX.LT.0 )
+     $   IX = ( -N+1 )*INCX + 1
+      IF( INCY.LT.0 )
+     $   IY = ( -N+1 )*INCY + 1
+      DO 10 I = 1, N
+         CTEMP = C*CX( IX ) + S*CY( IY )
+         CY( IY ) = C*CY( IY ) - S*CX( IX )
+         CX( IX ) = CTEMP
+         IX = IX + INCX
+         IY = IY + INCY
+   10 CONTINUE
+      RETURN
+*
+*        code for both increments equal to 1
+*
+   20 CONTINUE
+      DO 30 I = 1, N
+         CTEMP = C*CX( I ) + S*CY( I )
+         CY( I ) = C*CY( I ) - S*CX( I )
+         CX( I ) = CTEMP
+   30 CONTINUE
+      RETURN
+      END
diff --git a/costa/native/external/lapack/zdrscl.f b/costa/native/external/lapack/zdrscl.f
new file mode 100644
index 000000000..c44e96d65
--- /dev/null
+++ b/costa/native/external/lapack/zdrscl.f
@@ -0,0 +1,115 @@
+      SUBROUTINE ZDRSCL( N, SA, SX, INCX )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      INTEGER            INCX, N
+      DOUBLE PRECISION   SA
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         SX( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZDRSCL multiplies an n-element complex vector x by the real scalar
+*  1/a.  This is done without overflow or underflow as long as
+*  the final result x/a does not overflow or underflow.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The number of components of the vector x.
+*
+*  SA      (input) DOUBLE PRECISION
+*          The scalar a which is used to divide each component of x.
+*          SA must be >= 0, or the subroutine will divide by zero.
+*
+*  SX      (input/output) COMPLEX*16 array, dimension
+*                         (1+(N-1)*abs(INCX))
+*          The n-element vector x.
+*
+*  INCX    (input) INTEGER
+*          The increment between successive values of the vector SX.
+*          > 0:  SX(1) = X(1) and SX(1+(i-1)*INCX) = x(i),     1< i<= n
+*
+* =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            DONE
+      DOUBLE PRECISION   BIGNUM, CDEN, CDEN1, CNUM, CNUM1, MUL, SMLNUM
+*     ..
+*     .. External Functions ..
+      DOUBLE PRECISION   DLAMCH
+      EXTERNAL           DLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLABAD, ZDSCAL
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS
+*     ..
+*     .. Executable Statements ..
+*
+*     Quick return if possible
+*
+      IF( N.LE.0 )
+     $   RETURN
+*
+*     Get machine parameters
+*
+      SMLNUM = DLAMCH( 'S' )
+      BIGNUM = ONE / SMLNUM
+      CALL DLABAD( SMLNUM, BIGNUM )
+*
+*     Initialize the denominator to SA and the numerator to 1.
+*
+      CDEN = SA
+      CNUM = ONE
+*
+   10 CONTINUE
+      CDEN1 = CDEN*SMLNUM
+      CNUM1 = CNUM / BIGNUM
+      IF( ABS( CDEN1 ).GT.ABS( CNUM ) .AND. CNUM.NE.ZERO ) THEN
+*
+*        Pre-multiply X by SMLNUM if CDEN is large compared to CNUM.
+*
+         MUL = SMLNUM
+         DONE = .FALSE.
+         CDEN = CDEN1
+      ELSE IF( ABS( CNUM1 ).GT.ABS( CDEN ) ) THEN
+*
+*        Pre-multiply X by BIGNUM if CDEN is small compared to CNUM.
+*
+         MUL = BIGNUM
+         DONE = .FALSE.
+         CNUM = CNUM1
+      ELSE
+*
+*        Multiply X by CNUM / CDEN and return.
+*
+         MUL = CNUM / CDEN
+         DONE = .TRUE.
+      END IF
+*
+*     Scale the vector X by MUL
+*
+      CALL ZDSCAL( N, MUL, SX, INCX )
+*
+      IF( .NOT.DONE )
+     $   GO TO 10
+*
+      RETURN
+*
+*     End of ZDRSCL
+*
+      END
diff --git a/costa/native/external/lapack/zgbbrd.f b/costa/native/external/lapack/zgbbrd.f
new file mode 100644
index 000000000..3e24d5eb6
--- /dev/null
+++ b/costa/native/external/lapack/zgbbrd.f
@@ -0,0 +1,466 @@
+      SUBROUTINE ZGBBRD( VECT, M, N, NCC, KL, KU, AB, LDAB, D, E, Q,
+     $                   LDQ, PT, LDPT, C, LDC, WORK, RWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          VECT
+      INTEGER            INFO, KL, KU, LDAB, LDC, LDPT, LDQ, M, N, NCC
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   D( * ), E( * ), RWORK( * )
+      COMPLEX*16         AB( LDAB, * ), C( LDC, * ), PT( LDPT, * ),
+     $                   Q( LDQ, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZGBBRD reduces a complex general m-by-n band matrix A to real upper
+*  bidiagonal form B by a unitary transformation: Q' * A * P = B.
+*
+*  The routine computes B, and optionally forms Q or P', or computes
+*  Q'*C for a given matrix C.
+*
+*  Arguments
+*  =========
+*
+*  VECT    (input) CHARACTER*1
+*          Specifies whether or not the matrices Q and P' are to be
+*          formed.
+*          = 'N': do not form Q or P';
+*          = 'Q': form Q only;
+*          = 'P': form P' only;
+*          = 'B': form both.
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  NCC     (input) INTEGER
+*          The number of columns of the matrix C.  NCC >= 0.
+*
+*  KL      (input) INTEGER
+*          The number of subdiagonals of the matrix A. KL >= 0.
+*
+*  KU      (input) INTEGER
+*          The number of superdiagonals of the matrix A. KU >= 0.
+*
+*  AB      (input/output) COMPLEX*16 array, dimension (LDAB,N)
+*          On entry, the m-by-n band matrix A, stored in rows 1 to
+*          KL+KU+1. The j-th column of A is stored in the j-th column of
+*          the array AB as follows:
+*          AB(ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl).
+*          On exit, A is overwritten by values generated during the
+*          reduction.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array A. LDAB >= KL+KU+1.
+*
+*  D       (output) DOUBLE PRECISION array, dimension (min(M,N))
+*          The diagonal elements of the bidiagonal matrix B.
+*
+*  E       (output) DOUBLE PRECISION array, dimension (min(M,N)-1)
+*          The superdiagonal elements of the bidiagonal matrix B.
+*
+*  Q       (output) COMPLEX*16 array, dimension (LDQ,M)
+*          If VECT = 'Q' or 'B', the m-by-m unitary matrix Q.
+*          If VECT = 'N' or 'P', the array Q is not referenced.
+*
+*  LDQ     (input) INTEGER
+*          The leading dimension of the array Q.
+*          LDQ >= max(1,M) if VECT = 'Q' or 'B'; LDQ >= 1 otherwise.
+*
+*  PT      (output) COMPLEX*16 array, dimension (LDPT,N)
+*          If VECT = 'P' or 'B', the n-by-n unitary matrix P'.
+*          If VECT = 'N' or 'Q', the array PT is not referenced.
+*
+*  LDPT    (input) INTEGER
+*          The leading dimension of the array PT.
+*          LDPT >= max(1,N) if VECT = 'P' or 'B'; LDPT >= 1 otherwise.
+*
+*  C       (input/output) COMPLEX*16 array, dimension (LDC,NCC)
+*          On entry, an m-by-ncc matrix C.
+*          On exit, C is overwritten by Q'*C.
+*          C is not referenced if NCC = 0.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C.
+*          LDC >= max(1,M) if NCC > 0; LDC >= 1 if NCC = 0.
+*
+*  WORK    (workspace) COMPLEX*16 array, dimension (max(M,N))
+*
+*  RWORK   (workspace) DOUBLE PRECISION array, dimension (max(M,N))
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO
+      PARAMETER          ( ZERO = 0.0D+0 )
+      COMPLEX*16         CZERO, CONE
+      PARAMETER          ( CZERO = ( 0.0D+0, 0.0D+0 ),
+     $                   CONE = ( 1.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            WANTB, WANTC, WANTPT, WANTQ
+      INTEGER            I, INCA, J, J1, J2, KB, KB1, KK, KLM, KLU1,
+     $                   KUN, L, MINMN, ML, ML0, MU, MU0, NR, NRT
+      DOUBLE PRECISION   ABST, RC
+      COMPLEX*16         RA, RB, RS, T
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZLARGV, ZLARTG, ZLARTV, ZLASET, ZROT,
+     $                   ZSCAL
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DCONJG, MAX, MIN
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters
+*
+      WANTB = LSAME( VECT, 'B' )
+      WANTQ = LSAME( VECT, 'Q' ) .OR. WANTB
+      WANTPT = LSAME( VECT, 'P' ) .OR. WANTB
+      WANTC = NCC.GT.0
+      KLU1 = KL + KU + 1
+      INFO = 0
+      IF( .NOT.WANTQ .AND. .NOT.WANTPT .AND. .NOT.LSAME( VECT, 'N' ) )
+     $     THEN
+         INFO = -1
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( NCC.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( KL.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( KU.LT.0 ) THEN
+         INFO = -6
+      ELSE IF( LDAB.LT.KLU1 ) THEN
+         INFO = -8
+      ELSE IF( LDQ.LT.1 .OR. WANTQ .AND. LDQ.LT.MAX( 1, M ) ) THEN
+         INFO = -12
+      ELSE IF( LDPT.LT.1 .OR. WANTPT .AND. LDPT.LT.MAX( 1, N ) ) THEN
+         INFO = -14
+      ELSE IF( LDC.LT.1 .OR. WANTC .AND. LDC.LT.MAX( 1, M ) ) THEN
+         INFO = -16
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZGBBRD', -INFO )
+         RETURN
+      END IF
+*
+*     Initialize Q and P' to the unit matrix, if needed
+*
+      IF( WANTQ )
+     $   CALL ZLASET( 'Full', M, M, CZERO, CONE, Q, LDQ )
+      IF( WANTPT )
+     $   CALL ZLASET( 'Full', N, N, CZERO, CONE, PT, LDPT )
+*
+*     Quick return if possible.
+*
+      IF( M.EQ.0 .OR. N.EQ.0 )
+     $   RETURN
+*
+      MINMN = MIN( M, N )
+*
+      IF( KL+KU.GT.1 ) THEN
+*
+*        Reduce to upper bidiagonal form if KU > 0; if KU = 0, reduce
+*        first to lower bidiagonal form and then transform to upper
+*        bidiagonal
+*
+         IF( KU.GT.0 ) THEN
+            ML0 = 1
+            MU0 = 2
+         ELSE
+            ML0 = 2
+            MU0 = 1
+         END IF
+*
+*        Wherever possible, plane rotations are generated and applied in
+*        vector operations of length NR over the index set J1:J2:KLU1.
+*
+*        The complex sines of the plane rotations are stored in WORK,
+*        and the real cosines in RWORK.
+*
+         KLM = MIN( M-1, KL )
+         KUN = MIN( N-1, KU )
+         KB = KLM + KUN
+         KB1 = KB + 1
+         INCA = KB1*LDAB
+         NR = 0
+         J1 = KLM + 2
+         J2 = 1 - KUN
+*
+         DO 90 I = 1, MINMN
+*
+*           Reduce i-th column and i-th row of matrix to bidiagonal form
+*
+            ML = KLM + 1
+            MU = KUN + 1
+            DO 80 KK = 1, KB
+               J1 = J1 + KB
+               J2 = J2 + KB
+*
+*              generate plane rotations to annihilate nonzero elements
+*              which have been created below the band
+*
+               IF( NR.GT.0 )
+     $            CALL ZLARGV( NR, AB( KLU1, J1-KLM-1 ), INCA,
+     $                         WORK( J1 ), KB1, RWORK( J1 ), KB1 )
+*
+*              apply plane rotations from the left
+*
+               DO 10 L = 1, KB
+                  IF( J2-KLM+L-1.GT.N ) THEN
+                     NRT = NR - 1
+                  ELSE
+                     NRT = NR
+                  END IF
+                  IF( NRT.GT.0 )
+     $               CALL ZLARTV( NRT, AB( KLU1-L, J1-KLM+L-1 ), INCA,
+     $                            AB( KLU1-L+1, J1-KLM+L-1 ), INCA,
+     $                            RWORK( J1 ), WORK( J1 ), KB1 )
+   10          CONTINUE
+*
+               IF( ML.GT.ML0 ) THEN
+                  IF( ML.LE.M-I+1 ) THEN
+*
+*                    generate plane rotation to annihilate a(i+ml-1,i)
+*                    within the band, and apply rotation from the left
+*
+                     CALL ZLARTG( AB( KU+ML-1, I ), AB( KU+ML, I ),
+     $                            RWORK( I+ML-1 ), WORK( I+ML-1 ), RA )
+                     AB( KU+ML-1, I ) = RA
+                     IF( I.LT.N )
+     $                  CALL ZROT( MIN( KU+ML-2, N-I ),
+     $                             AB( KU+ML-2, I+1 ), LDAB-1,
+     $                             AB( KU+ML-1, I+1 ), LDAB-1,
+     $                             RWORK( I+ML-1 ), WORK( I+ML-1 ) )
+                  END IF
+                  NR = NR + 1
+                  J1 = J1 - KB1
+               END IF
+*
+               IF( WANTQ ) THEN
+*
+*                 accumulate product of plane rotations in Q
+*
+                  DO 20 J = J1, J2, KB1
+                     CALL ZROT( M, Q( 1, J-1 ), 1, Q( 1, J ), 1,
+     $                          RWORK( J ), DCONJG( WORK( J ) ) )
+   20             CONTINUE
+               END IF
+*
+               IF( WANTC ) THEN
+*
+*                 apply plane rotations to C
+*
+                  DO 30 J = J1, J2, KB1
+                     CALL ZROT( NCC, C( J-1, 1 ), LDC, C( J, 1 ), LDC,
+     $                          RWORK( J ), WORK( J ) )
+   30             CONTINUE
+               END IF
+*
+               IF( J2+KUN.GT.N ) THEN
+*
+*                 adjust J2 to keep within the bounds of the matrix
+*
+                  NR = NR - 1
+                  J2 = J2 - KB1
+               END IF
+*
+               DO 40 J = J1, J2, KB1
+*
+*                 create nonzero element a(j-1,j+ku) above the band
+*                 and store it in WORK(n+1:2*n)
+*
+                  WORK( J+KUN ) = WORK( J )*AB( 1, J+KUN )
+                  AB( 1, J+KUN ) = RWORK( J )*AB( 1, J+KUN )
+   40          CONTINUE
+*
+*              generate plane rotations to annihilate nonzero elements
+*              which have been generated above the band
+*
+               IF( NR.GT.0 )
+     $            CALL ZLARGV( NR, AB( 1, J1+KUN-1 ), INCA,
+     $                         WORK( J1+KUN ), KB1, RWORK( J1+KUN ),
+     $                         KB1 )
+*
+*              apply plane rotations from the right
+*
+               DO 50 L = 1, KB
+                  IF( J2+L-1.GT.M ) THEN
+                     NRT = NR - 1
+                  ELSE
+                     NRT = NR
+                  END IF
+                  IF( NRT.GT.0 )
+     $               CALL ZLARTV( NRT, AB( L+1, J1+KUN-1 ), INCA,
+     $                            AB( L, J1+KUN ), INCA,
+     $                            RWORK( J1+KUN ), WORK( J1+KUN ), KB1 )
+   50          CONTINUE
+*
+               IF( ML.EQ.ML0 .AND. MU.GT.MU0 ) THEN
+                  IF( MU.LE.N-I+1 ) THEN
+*
+*                    generate plane rotation to annihilate a(i,i+mu-1)
+*                    within the band, and apply rotation from the right
+*
+                     CALL ZLARTG( AB( KU-MU+3, I+MU-2 ),
+     $                            AB( KU-MU+2, I+MU-1 ),
+     $                            RWORK( I+MU-1 ), WORK( I+MU-1 ), RA )
+                     AB( KU-MU+3, I+MU-2 ) = RA
+                     CALL ZROT( MIN( KL+MU-2, M-I ),
+     $                          AB( KU-MU+4, I+MU-2 ), 1,
+     $                          AB( KU-MU+3, I+MU-1 ), 1,
+     $                          RWORK( I+MU-1 ), WORK( I+MU-1 ) )
+                  END IF
+                  NR = NR + 1
+                  J1 = J1 - KB1
+               END IF
+*
+               IF( WANTPT ) THEN
+*
+*                 accumulate product of plane rotations in P'
+*
+                  DO 60 J = J1, J2, KB1
+                     CALL ZROT( N, PT( J+KUN-1, 1 ), LDPT,
+     $                          PT( J+KUN, 1 ), LDPT, RWORK( J+KUN ),
+     $                          DCONJG( WORK( J+KUN ) ) )
+   60             CONTINUE
+               END IF
+*
+               IF( J2+KB.GT.M ) THEN
+*
+*                 adjust J2 to keep within the bounds of the matrix
+*
+                  NR = NR - 1
+                  J2 = J2 - KB1
+               END IF
+*
+               DO 70 J = J1, J2, KB1
+*
+*                 create nonzero element a(j+kl+ku,j+ku-1) below the
+*                 band and store it in WORK(1:n)
+*
+                  WORK( J+KB ) = WORK( J+KUN )*AB( KLU1, J+KUN )
+                  AB( KLU1, J+KUN ) = RWORK( J+KUN )*AB( KLU1, J+KUN )
+   70          CONTINUE
+*
+               IF( ML.GT.ML0 ) THEN
+                  ML = ML - 1
+               ELSE
+                  MU = MU - 1
+               END IF
+   80       CONTINUE
+   90    CONTINUE
+      END IF
+*
+      IF( KU.EQ.0 .AND. KL.GT.0 ) THEN
+*
+*        A has been reduced to complex lower bidiagonal form
+*
+*        Transform lower bidiagonal form to upper bidiagonal by applying
+*        plane rotations from the left, overwriting superdiagonal
+*        elements on subdiagonal elements
+*
+         DO 100 I = 1, MIN( M-1, N )
+            CALL ZLARTG( AB( 1, I ), AB( 2, I ), RC, RS, RA )
+            AB( 1, I ) = RA
+            IF( I.LT.N ) THEN
+               AB( 2, I ) = RS*AB( 1, I+1 )
+               AB( 1, I+1 ) = RC*AB( 1, I+1 )
+            END IF
+            IF( WANTQ )
+     $         CALL ZROT( M, Q( 1, I ), 1, Q( 1, I+1 ), 1, RC,
+     $                    DCONJG( RS ) )
+            IF( WANTC )
+     $         CALL ZROT( NCC, C( I, 1 ), LDC, C( I+1, 1 ), LDC, RC,
+     $                    RS )
+  100    CONTINUE
+      ELSE
+*
+*        A has been reduced to complex upper bidiagonal form or is
+*        diagonal
+*
+         IF( KU.GT.0 .AND. M.LT.N ) THEN
+*
+*           Annihilate a(m,m+1) by applying plane rotations from the
+*           right
+*
+            RB = AB( KU, M+1 )
+            DO 110 I = M, 1, -1
+               CALL ZLARTG( AB( KU+1, I ), RB, RC, RS, RA )
+               AB( KU+1, I ) = RA
+               IF( I.GT.1 ) THEN
+                  RB = -DCONJG( RS )*AB( KU, I )
+                  AB( KU, I ) = RC*AB( KU, I )
+               END IF
+               IF( WANTPT )
+     $            CALL ZROT( N, PT( I, 1 ), LDPT, PT( M+1, 1 ), LDPT,
+     $                       RC, DCONJG( RS ) )
+  110       CONTINUE
+         END IF
+      END IF
+*
+*     Make diagonal and superdiagonal elements real, storing them in D
+*     and E
+*
+      T = AB( KU+1, 1 )
+      DO 120 I = 1, MINMN
+         ABST = ABS( T )
+         D( I ) = ABST
+         IF( ABST.NE.ZERO ) THEN
+            T = T / ABST
+         ELSE
+            T = CONE
+         END IF
+         IF( WANTQ )
+     $      CALL ZSCAL( M, T, Q( 1, I ), 1 )
+         IF( WANTC )
+     $      CALL ZSCAL( NCC, DCONJG( T ), C( I, 1 ), LDC )
+         IF( I.LT.MINMN ) THEN
+            IF( KU.EQ.0 .AND. KL.EQ.0 ) THEN
+               E( I ) = ZERO
+               T = AB( 1, I+1 )
+            ELSE
+               IF( KU.EQ.0 ) THEN
+                  T = AB( 2, I )*DCONJG( T )
+               ELSE
+                  T = AB( KU, I+1 )*DCONJG( T )
+               END IF
+               ABST = ABS( T )
+               E( I ) = ABST
+               IF( ABST.NE.ZERO ) THEN
+                  T = T / ABST
+               ELSE
+                  T = CONE
+               END IF
+               IF( WANTPT )
+     $            CALL ZSCAL( N, T, PT( I+1, 1 ), LDPT )
+               T = AB( KU+1, I+1 )*DCONJG( T )
+            END IF
+         END IF
+  120 CONTINUE
+      RETURN
+*
+*     End of ZGBBRD
+*
+      END
diff --git a/costa/native/external/lapack/zgbcon.f b/costa/native/external/lapack/zgbcon.f
new file mode 100644
index 000000000..e8b829688
--- /dev/null
+++ b/costa/native/external/lapack/zgbcon.f
@@ -0,0 +1,230 @@
+      SUBROUTINE ZGBCON( NORM, N, KL, KU, AB, LDAB, IPIV, ANORM, RCOND,
+     $                   WORK, RWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          NORM
+      INTEGER            INFO, KL, KU, LDAB, N
+      DOUBLE PRECISION   ANORM, RCOND
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      DOUBLE PRECISION   RWORK( * )
+      COMPLEX*16         AB( LDAB, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZGBCON estimates the reciprocal of the condition number of a complex
+*  general band matrix A, in either the 1-norm or the infinity-norm,
+*  using the LU factorization computed by ZGBTRF.
+*
+*  An estimate is obtained for norm(inv(A)), and the reciprocal of the
+*  condition number is computed as
+*     RCOND = 1 / ( norm(A) * norm(inv(A)) ).
+*
+*  Arguments
+*  =========
+*
+*  NORM    (input) CHARACTER*1
+*          Specifies whether the 1-norm condition number or the
+*          infinity-norm condition number is required:
+*          = '1' or 'O':  1-norm;
+*          = 'I':         Infinity-norm.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  KL      (input) INTEGER
+*          The number of subdiagonals within the band of A.  KL >= 0.
+*
+*  KU      (input) INTEGER
+*          The number of superdiagonals within the band of A.  KU >= 0.
+*
+*  AB      (input) COMPLEX*16 array, dimension (LDAB,N)
+*          Details of the LU factorization of the band matrix A, as
+*          computed by ZGBTRF.  U is stored as an upper triangular band
+*          matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, and
+*          the multipliers used during the factorization are stored in
+*          rows KL+KU+2 to 2*KL+KU+1.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= 2*KL+KU+1.
+*
+*  IPIV    (input) INTEGER array, dimension (N)
+*          The pivot indices; for 1 <= i <= N, row i of the matrix was
+*          interchanged with row IPIV(i).
+*
+*  ANORM   (input) DOUBLE PRECISION
+*          If NORM = '1' or 'O', the 1-norm of the original matrix A.
+*          If NORM = 'I', the infinity-norm of the original matrix A.
+*
+*  RCOND   (output) DOUBLE PRECISION
+*          The reciprocal of the condition number of the matrix A,
+*          computed as RCOND = 1/(norm(A) * norm(inv(A))).
+*
+*  WORK    (workspace) COMPLEX*16 array, dimension (2*N)
+*
+*  RWORK   (workspace) DOUBLE PRECISION array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LNOTI, ONENRM
+      CHARACTER          NORMIN
+      INTEGER            IX, J, JP, KASE, KASE1, KD, LM
+      DOUBLE PRECISION   AINVNM, SCALE, SMLNUM
+      COMPLEX*16         T, ZDUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            IZAMAX
+      DOUBLE PRECISION   DLAMCH
+      COMPLEX*16         ZDOTC
+      EXTERNAL           LSAME, IZAMAX, DLAMCH, ZDOTC
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZAXPY, ZDRSCL, ZLACON, ZLATBS
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, DIMAG, MIN
+*     ..
+*     .. Statement Functions ..
+      DOUBLE PRECISION   CABS1
+*     ..
+*     .. Statement Function definitions ..
+      CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      ONENRM = NORM.EQ.'1' .OR. LSAME( NORM, 'O' )
+      IF( .NOT.ONENRM .AND. .NOT.LSAME( NORM, 'I' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( KL.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( KU.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDAB.LT.2*KL+KU+1 ) THEN
+         INFO = -6
+      ELSE IF( ANORM.LT.ZERO ) THEN
+         INFO = -8
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZGBCON', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      RCOND = ZERO
+      IF( N.EQ.0 ) THEN
+         RCOND = ONE
+         RETURN
+      ELSE IF( ANORM.EQ.ZERO ) THEN
+         RETURN
+      END IF
+*
+      SMLNUM = DLAMCH( 'Safe minimum' )
+*
+*     Estimate the norm of inv(A).
+*
+      AINVNM = ZERO
+      NORMIN = 'N'
+      IF( ONENRM ) THEN
+         KASE1 = 1
+      ELSE
+         KASE1 = 2
+      END IF
+      KD = KL + KU + 1
+      LNOTI = KL.GT.0
+      KASE = 0
+   10 CONTINUE
+      CALL ZLACON( N, WORK( N+1 ), WORK, AINVNM, KASE )
+      IF( KASE.NE.0 ) THEN
+         IF( KASE.EQ.KASE1 ) THEN
+*
+*           Multiply by inv(L).
+*
+            IF( LNOTI ) THEN
+               DO 20 J = 1, N - 1
+                  LM = MIN( KL, N-J )
+                  JP = IPIV( J )
+                  T = WORK( JP )
+                  IF( JP.NE.J ) THEN
+                     WORK( JP ) = WORK( J )
+                     WORK( J ) = T
+                  END IF
+                  CALL ZAXPY( LM, -T, AB( KD+1, J ), 1, WORK( J+1 ), 1 )
+   20          CONTINUE
+            END IF
+*
+*           Multiply by inv(U).
+*
+            CALL ZLATBS( 'Upper', 'No transpose', 'Non-unit', NORMIN, N,
+     $                   KL+KU, AB, LDAB, WORK, SCALE, RWORK, INFO )
+         ELSE
+*
+*           Multiply by inv(U').
+*
+            CALL ZLATBS( 'Upper', 'Conjugate transpose', 'Non-unit',
+     $                   NORMIN, N, KL+KU, AB, LDAB, WORK, SCALE, RWORK,
+     $                   INFO )
+*
+*           Multiply by inv(L').
+*
+            IF( LNOTI ) THEN
+               DO 30 J = N - 1, 1, -1
+                  LM = MIN( KL, N-J )
+                  WORK( J ) = WORK( J ) - ZDOTC( LM, AB( KD+1, J ), 1,
+     $                        WORK( J+1 ), 1 )
+                  JP = IPIV( J )
+                  IF( JP.NE.J ) THEN
+                     T = WORK( JP )
+                     WORK( JP ) = WORK( J )
+                     WORK( J ) = T
+                  END IF
+   30          CONTINUE
+            END IF
+         END IF
+*
+*        Divide X by 1/SCALE if doing so will not cause overflow.
+*
+         NORMIN = 'Y'
+         IF( SCALE.NE.ONE ) THEN
+            IX = IZAMAX( N, WORK, 1 )
+            IF( SCALE.LT.CABS1( WORK( IX ) )*SMLNUM .OR. SCALE.EQ.ZERO )
+     $         GO TO 40
+            CALL ZDRSCL( N, SCALE, WORK, 1 )
+         END IF
+         GO TO 10
+      END IF
+*
+*     Compute the estimate of the reciprocal condition number.
+*
+      IF( AINVNM.NE.ZERO )
+     $   RCOND = ( ONE / AINVNM ) / ANORM
+*
+   40 CONTINUE
+      RETURN
+*
+*     End of ZGBCON
+*
+      END
diff --git a/costa/native/external/lapack/zgbequ.f b/costa/native/external/lapack/zgbequ.f
new file mode 100644
index 000000000..d3e821fe5
--- /dev/null
+++ b/costa/native/external/lapack/zgbequ.f
@@ -0,0 +1,248 @@
+      SUBROUTINE ZGBEQU( M, N, KL, KU, AB, LDAB, R, C, ROWCND, COLCND,
+     $                   AMAX, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1992
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, KL, KU, LDAB, M, N
+      DOUBLE PRECISION   AMAX, COLCND, ROWCND
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   C( * ), R( * )
+      COMPLEX*16         AB( LDAB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZGBEQU computes row and column scalings intended to equilibrate an
+*  M-by-N band matrix A and reduce its condition number.  R returns the
+*  row scale factors and C the column scale factors, chosen to try to
+*  make the largest element in each row and column of the matrix B with
+*  elements B(i,j)=R(i)*A(i,j)*C(j) have absolute value 1.
+*
+*  R(i) and C(j) are restricted to be between SMLNUM = smallest safe
+*  number and BIGNUM = largest safe number.  Use of these scaling
+*  factors is not guaranteed to reduce the condition number of A but
+*  works well in practice.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  KL      (input) INTEGER
+*          The number of subdiagonals within the band of A.  KL >= 0.
+*
+*  KU      (input) INTEGER
+*          The number of superdiagonals within the band of A.  KU >= 0.
+*
+*  AB      (input) COMPLEX*16 array, dimension (LDAB,N)
+*          The band matrix A, stored in rows 1 to KL+KU+1.  The j-th
+*          column of A is stored in the j-th column of the array AB as
+*          follows:
+*          AB(ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl).
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KL+KU+1.
+*
+*  R       (output) DOUBLE PRECISION array, dimension (M)
+*          If INFO = 0, or INFO > M, R contains the row scale factors
+*          for A.
+*
+*  C       (output) DOUBLE PRECISION array, dimension (N)
+*          If INFO = 0, C contains the column scale factors for A.
+*
+*  ROWCND  (output) DOUBLE PRECISION
+*          If INFO = 0 or INFO > M, ROWCND contains the ratio of the
+*          smallest R(i) to the largest R(i).  If ROWCND >= 0.1 and
+*          AMAX is neither too large nor too small, it is not worth
+*          scaling by R.
+*
+*  COLCND  (output) DOUBLE PRECISION
+*          If INFO = 0, COLCND contains the ratio of the smallest
+*          C(i) to the largest C(i).  If COLCND >= 0.1, it is not
+*          worth scaling by C.
+*
+*  AMAX    (output) DOUBLE PRECISION
+*          Absolute value of largest matrix element.  If AMAX is very
+*          close to overflow or very close to underflow, the matrix
+*          should be scaled.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, and i is
+*                <= M:  the i-th row of A is exactly zero
+*                >  M:  the (i-M)-th column of A is exactly zero
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J, KD
+      DOUBLE PRECISION   BIGNUM, RCMAX, RCMIN, SMLNUM
+      COMPLEX*16         ZDUM
+*     ..
+*     .. External Functions ..
+      DOUBLE PRECISION   DLAMCH
+      EXTERNAL           DLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, DIMAG, MAX, MIN
+*     ..
+*     .. Statement Functions ..
+      DOUBLE PRECISION   CABS1
+*     ..
+*     .. Statement Function definitions ..
+      CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters
+*
+      INFO = 0
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( KL.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( KU.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDAB.LT.KL+KU+1 ) THEN
+         INFO = -6
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZGBEQU', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 ) THEN
+         ROWCND = ONE
+         COLCND = ONE
+         AMAX = ZERO
+         RETURN
+      END IF
+*
+*     Get machine constants.
+*
+      SMLNUM = DLAMCH( 'S' )
+      BIGNUM = ONE / SMLNUM
+*
+*     Compute row scale factors.
+*
+      DO 10 I = 1, M
+         R( I ) = ZERO
+   10 CONTINUE
+*
+*     Find the maximum element in each row.
+*
+      KD = KU + 1
+      DO 30 J = 1, N
+         DO 20 I = MAX( J-KU, 1 ), MIN( J+KL, M )
+            R( I ) = MAX( R( I ), CABS1( AB( KD+I-J, J ) ) )
+   20    CONTINUE
+   30 CONTINUE
+*
+*     Find the maximum and minimum scale factors.
+*
+      RCMIN = BIGNUM
+      RCMAX = ZERO
+      DO 40 I = 1, M
+         RCMAX = MAX( RCMAX, R( I ) )
+         RCMIN = MIN( RCMIN, R( I ) )
+   40 CONTINUE
+      AMAX = RCMAX
+*
+      IF( RCMIN.EQ.ZERO ) THEN
+*
+*        Find the first zero scale factor and return an error code.
+*
+         DO 50 I = 1, M
+            IF( R( I ).EQ.ZERO ) THEN
+               INFO = I
+               RETURN
+            END IF
+   50    CONTINUE
+      ELSE
+*
+*        Invert the scale factors.
+*
+         DO 60 I = 1, M
+            R( I ) = ONE / MIN( MAX( R( I ), SMLNUM ), BIGNUM )
+   60    CONTINUE
+*
+*        Compute ROWCND = min(R(I)) / max(R(I))
+*
+         ROWCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM )
+      END IF
+*
+*     Compute column scale factors
+*
+      DO 70 J = 1, N
+         C( J ) = ZERO
+   70 CONTINUE
+*
+*     Find the maximum element in each column,
+*     assuming the row scaling computed above.
+*
+      KD = KU + 1
+      DO 90 J = 1, N
+         DO 80 I = MAX( J-KU, 1 ), MIN( J+KL, M )
+            C( J ) = MAX( C( J ), CABS1( AB( KD+I-J, J ) )*R( I ) )
+   80    CONTINUE
+   90 CONTINUE
+*
+*     Find the maximum and minimum scale factors.
+*
+      RCMIN = BIGNUM
+      RCMAX = ZERO
+      DO 100 J = 1, N
+         RCMIN = MIN( RCMIN, C( J ) )
+         RCMAX = MAX( RCMAX, C( J ) )
+  100 CONTINUE
+*
+      IF( RCMIN.EQ.ZERO ) THEN
+*
+*        Find the first zero scale factor and return an error code.
+*
+         DO 110 J = 1, N
+            IF( C( J ).EQ.ZERO ) THEN
+               INFO = M + J
+               RETURN
+            END IF
+  110    CONTINUE
+      ELSE
+*
+*        Invert the scale factors.
+*
+         DO 120 J = 1, N
+            C( J ) = ONE / MIN( MAX( C( J ), SMLNUM ), BIGNUM )
+  120    CONTINUE
+*
+*        Compute COLCND = min(C(J)) / max(C(J))
+*
+         COLCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM )
+      END IF
+*
+      RETURN
+*
+*     End of ZGBEQU
+*
+      END
diff --git a/costa/native/external/lapack/zgbrfs.f b/costa/native/external/lapack/zgbrfs.f
new file mode 100644
index 000000000..df93efce8
--- /dev/null
+++ b/costa/native/external/lapack/zgbrfs.f
@@ -0,0 +1,361 @@
+      SUBROUTINE ZGBRFS( TRANS, N, KL, KU, NRHS, AB, LDAB, AFB, LDAFB,
+     $                   IPIV, B, LDB, X, LDX, FERR, BERR, WORK, RWORK,
+     $                   INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          TRANS
+      INTEGER            INFO, KL, KU, LDAB, LDAFB, LDB, LDX, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      DOUBLE PRECISION   BERR( * ), FERR( * ), RWORK( * )
+      COMPLEX*16         AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ),
+     $                   WORK( * ), X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZGBRFS improves the computed solution to a system of linear
+*  equations when the coefficient matrix is banded, and provides
+*  error bounds and backward error estimates for the solution.
+*
+*  Arguments
+*  =========
+*
+*  TRANS   (input) CHARACTER*1
+*          Specifies the form of the system of equations:
+*          = 'N':  A * X = B     (No transpose)
+*          = 'T':  A**T * X = B  (Transpose)
+*          = 'C':  A**H * X = B  (Conjugate transpose)
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  KL      (input) INTEGER
+*          The number of subdiagonals within the band of A.  KL >= 0.
+*
+*  KU      (input) INTEGER
+*          The number of superdiagonals within the band of A.  KU >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrices B and X.  NRHS >= 0.
+*
+*  AB      (input) COMPLEX*16 array, dimension (LDAB,N)
+*          The original band matrix A, stored in rows 1 to KL+KU+1.
+*          The j-th column of A is stored in the j-th column of the
+*          array AB as follows:
+*          AB(ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(n,j+kl).
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KL+KU+1.
+*
+*  AFB     (input) COMPLEX*16 array, dimension (LDAFB,N)
+*          Details of the LU factorization of the band matrix A, as
+*          computed by ZGBTRF.  U is stored as an upper triangular band
+*          matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, and
+*          the multipliers used during the factorization are stored in
+*          rows KL+KU+2 to 2*KL+KU+1.
+*
+*  LDAFB   (input) INTEGER
+*          The leading dimension of the array AFB.  LDAFB >= 2*KL*KU+1.
+*
+*  IPIV    (input) INTEGER array, dimension (N)
+*          The pivot indices from ZGBTRF; for 1<=i<=N, row i of the
+*          matrix was interchanged with row IPIV(i).
+*
+*  B       (input) COMPLEX*16 array, dimension (LDB,NRHS)
+*          The right hand side matrix B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  X       (input/output) COMPLEX*16 array, dimension (LDX,NRHS)
+*          On entry, the solution matrix X, as computed by ZGBTRS.
+*          On exit, the improved solution matrix X.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(1,N).
+*
+*  FERR    (output) DOUBLE PRECISION array, dimension (NRHS)
+*          The estimated forward error bound for each solution vector
+*          X(j) (the j-th column of the solution matrix X).
+*          If XTRUE is the true solution corresponding to X(j), FERR(j)
+*          is an estimated upper bound for the magnitude of the largest
+*          element in (X(j) - XTRUE) divided by the magnitude of the
+*          largest element in X(j).  The estimate is as reliable as
+*          the estimate for RCOND, and is almost always a slight
+*          overestimate of the true error.
+*
+*  BERR    (output) DOUBLE PRECISION array, dimension (NRHS)
+*          The componentwise relative backward error of each solution
+*          vector X(j) (i.e., the smallest relative change in
+*          any element of A or B that makes X(j) an exact solution).
+*
+*  WORK    (workspace) COMPLEX*16 array, dimension (2*N)
+*
+*  RWORK   (workspace) DOUBLE PRECISION array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Internal Parameters
+*  ===================
+*
+*  ITMAX is the maximum number of steps of iterative refinement.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            ITMAX
+      PARAMETER          ( ITMAX = 5 )
+      DOUBLE PRECISION   ZERO
+      PARAMETER          ( ZERO = 0.0D+0 )
+      COMPLEX*16         CONE
+      PARAMETER          ( CONE = ( 1.0D+0, 0.0D+0 ) )
+      DOUBLE PRECISION   TWO
+      PARAMETER          ( TWO = 2.0D+0 )
+      DOUBLE PRECISION   THREE
+      PARAMETER          ( THREE = 3.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOTRAN
+      CHARACTER          TRANSN, TRANST
+      INTEGER            COUNT, I, J, K, KASE, KK, NZ
+      DOUBLE PRECISION   EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK
+      COMPLEX*16         ZDUM
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZAXPY, ZCOPY, ZGBMV, ZGBTRS, ZLACON
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, DIMAG, MAX, MIN
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DLAMCH
+      EXTERNAL           LSAME, DLAMCH
+*     ..
+*     .. Statement Functions ..
+      DOUBLE PRECISION   CABS1
+*     ..
+*     .. Statement Function definitions ..
+      CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      NOTRAN = LSAME( TRANS, 'N' )
+      IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT.
+     $    LSAME( TRANS, 'C' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( KL.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( KU.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( LDAB.LT.KL+KU+1 ) THEN
+         INFO = -7
+      ELSE IF( LDAFB.LT.2*KL+KU+1 ) THEN
+         INFO = -9
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -12
+      ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+         INFO = -14
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZGBRFS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN
+         DO 10 J = 1, NRHS
+            FERR( J ) = ZERO
+            BERR( J ) = ZERO
+   10    CONTINUE
+         RETURN
+      END IF
+*
+      IF( NOTRAN ) THEN
+         TRANSN = 'N'
+         TRANST = 'C'
+      ELSE
+         TRANSN = 'C'
+         TRANST = 'N'
+      END IF
+*
+*     NZ = maximum number of nonzero elements in each row of A, plus 1
+*
+      NZ = MIN( KL+KU+2, N+1 )
+      EPS = DLAMCH( 'Epsilon' )
+      SAFMIN = DLAMCH( 'Safe minimum' )
+      SAFE1 = NZ*SAFMIN
+      SAFE2 = SAFE1 / EPS
+*
+*     Do for each right hand side
+*
+      DO 140 J = 1, NRHS
+*
+         COUNT = 1
+         LSTRES = THREE
+   20    CONTINUE
+*
+*        Loop until stopping criterion is satisfied.
+*
+*        Compute residual R = B - op(A) * X,
+*        where op(A) = A, A**T, or A**H, depending on TRANS.
+*
+         CALL ZCOPY( N, B( 1, J ), 1, WORK, 1 )
+         CALL ZGBMV( TRANS, N, N, KL, KU, -CONE, AB, LDAB, X( 1, J ), 1,
+     $               CONE, WORK, 1 )
+*
+*        Compute componentwise relative backward error from formula
+*
+*        max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) )
+*
+*        where abs(Z) is the componentwise absolute value of the matrix
+*        or vector Z.  If the i-th component of the denominator is less
+*        than SAFE2, then SAFE1 is added to the i-th components of the
+*        numerator and denominator before dividing.
+*
+         DO 30 I = 1, N
+            RWORK( I ) = CABS1( B( I, J ) )
+   30    CONTINUE
+*
+*        Compute abs(op(A))*abs(X) + abs(B).
+*
+         IF( NOTRAN ) THEN
+            DO 50 K = 1, N
+               KK = KU + 1 - K
+               XK = CABS1( X( K, J ) )
+               DO 40 I = MAX( 1, K-KU ), MIN( N, K+KL )
+                  RWORK( I ) = RWORK( I ) + CABS1( AB( KK+I, K ) )*XK
+   40          CONTINUE
+   50       CONTINUE
+         ELSE
+            DO 70 K = 1, N
+               S = ZERO
+               KK = KU + 1 - K
+               DO 60 I = MAX( 1, K-KU ), MIN( N, K+KL )
+                  S = S + CABS1( AB( KK+I, K ) )*CABS1( X( I, J ) )
+   60          CONTINUE
+               RWORK( K ) = RWORK( K ) + S
+   70       CONTINUE
+         END IF
+         S = ZERO
+         DO 80 I = 1, N
+            IF( RWORK( I ).GT.SAFE2 ) THEN
+               S = MAX( S, CABS1( WORK( I ) ) / RWORK( I ) )
+            ELSE
+               S = MAX( S, ( CABS1( WORK( I ) )+SAFE1 ) /
+     $             ( RWORK( I )+SAFE1 ) )
+            END IF
+   80    CONTINUE
+         BERR( J ) = S
+*
+*        Test stopping criterion. Continue iterating if
+*           1) The residual BERR(J) is larger than machine epsilon, and
+*           2) BERR(J) decreased by at least a factor of 2 during the
+*              last iteration, and
+*           3) At most ITMAX iterations tried.
+*
+         IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND.
+     $       COUNT.LE.ITMAX ) THEN
+*
+*           Update solution and try again.
+*
+            CALL ZGBTRS( TRANS, N, KL, KU, 1, AFB, LDAFB, IPIV, WORK, N,
+     $                   INFO )
+            CALL ZAXPY( N, CONE, WORK, 1, X( 1, J ), 1 )
+            LSTRES = BERR( J )
+            COUNT = COUNT + 1
+            GO TO 20
+         END IF
+*
+*        Bound error from formula
+*
+*        norm(X - XTRUE) / norm(X) .le. FERR =
+*        norm( abs(inv(op(A)))*
+*           ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X)
+*
+*        where
+*          norm(Z) is the magnitude of the largest component of Z
+*          inv(op(A)) is the inverse of op(A)
+*          abs(Z) is the componentwise absolute value of the matrix or
+*             vector Z
+*          NZ is the maximum number of nonzeros in any row of A, plus 1
+*          EPS is machine epsilon
+*
+*        The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B))
+*        is incremented by SAFE1 if the i-th component of
+*        abs(op(A))*abs(X) + abs(B) is less than SAFE2.
+*
+*        Use ZLACON to estimate the infinity-norm of the matrix
+*           inv(op(A)) * diag(W),
+*        where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) )))
+*
+         DO 90 I = 1, N
+            IF( RWORK( I ).GT.SAFE2 ) THEN
+               RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I )
+            ELSE
+               RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) +
+     $                      SAFE1
+            END IF
+   90    CONTINUE
+*
+         KASE = 0
+  100    CONTINUE
+         CALL ZLACON( N, WORK( N+1 ), WORK, FERR( J ), KASE )
+         IF( KASE.NE.0 ) THEN
+            IF( KASE.EQ.1 ) THEN
+*
+*              Multiply by diag(W)*inv(op(A)**H).
+*
+               CALL ZGBTRS( TRANST, N, KL, KU, 1, AFB, LDAFB, IPIV,
+     $                      WORK, N, INFO )
+               DO 110 I = 1, N
+                  WORK( I ) = RWORK( I )*WORK( I )
+  110          CONTINUE
+            ELSE
+*
+*              Multiply by inv(op(A))*diag(W).
+*
+               DO 120 I = 1, N
+                  WORK( I ) = RWORK( I )*WORK( I )
+  120          CONTINUE
+               CALL ZGBTRS( TRANSN, N, KL, KU, 1, AFB, LDAFB, IPIV,
+     $                      WORK, N, INFO )
+            END IF
+            GO TO 100
+         END IF
+*
+*        Normalize error.
+*
+         LSTRES = ZERO
+         DO 130 I = 1, N
+            LSTRES = MAX( LSTRES, CABS1( X( I, J ) ) )
+  130    CONTINUE
+         IF( LSTRES.NE.ZERO )
+     $      FERR( J ) = FERR( J ) / LSTRES
+*
+  140 CONTINUE
+*
+      RETURN
+*
+*     End of ZGBRFS
+*
+      END
diff --git a/costa/native/external/lapack/zgbsv.f b/costa/native/external/lapack/zgbsv.f
new file mode 100644
index 000000000..aef9dd3da
--- /dev/null
+++ b/costa/native/external/lapack/zgbsv.f
@@ -0,0 +1,143 @@
+      SUBROUTINE ZGBSV( N, KL, KU, NRHS, AB, LDAB, IPIV, B, LDB, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, KL, KU, LDAB, LDB, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      COMPLEX*16         AB( LDAB, * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZGBSV computes the solution to a complex system of linear equations
+*  A * X = B, where A is a band matrix of order N with KL subdiagonals
+*  and KU superdiagonals, and X and B are N-by-NRHS matrices.
+*
+*  The LU decomposition with partial pivoting and row interchanges is
+*  used to factor A as A = L * U, where L is a product of permutation
+*  and unit lower triangular matrices with KL subdiagonals, and U is
+*  upper triangular with KL+KU superdiagonals.  The factored form of A
+*  is then used to solve the system of equations A * X = B.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The number of linear equations, i.e., the order of the
+*          matrix A.  N >= 0.
+*
+*  KL      (input) INTEGER
+*          The number of subdiagonals within the band of A.  KL >= 0.
+*
+*  KU      (input) INTEGER
+*          The number of superdiagonals within the band of A.  KU >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  AB      (input/output) COMPLEX*16 array, dimension (LDAB,N)
+*          On entry, the matrix A in band storage, in rows KL+1 to
+*          2*KL+KU+1; rows 1 to KL of the array need not be set.
+*          The j-th column of A is stored in the j-th column of the
+*          array AB as follows:
+*          AB(KL+KU+1+i-j,j) = A(i,j) for max(1,j-KU)<=i<=min(N,j+KL)
+*          On exit, details of the factorization: U is stored as an
+*          upper triangular band matrix with KL+KU superdiagonals in
+*          rows 1 to KL+KU+1, and the multipliers used during the
+*          factorization are stored in rows KL+KU+2 to 2*KL+KU+1.
+*          See below for further details.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= 2*KL+KU+1.
+*
+*  IPIV    (output) INTEGER array, dimension (N)
+*          The pivot indices that define the permutation matrix P;
+*          row i of the matrix was interchanged with row IPIV(i).
+*
+*  B       (input/output) COMPLEX*16 array, dimension (LDB,NRHS)
+*          On entry, the N-by-NRHS right hand side matrix B.
+*          On exit, if INFO = 0, the N-by-NRHS solution matrix X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, U(i,i) is exactly zero.  The factorization
+*                has been completed, but the factor U is exactly
+*                singular, and the solution has not been computed.
+*
+*  Further Details
+*  ===============
+*
+*  The band storage scheme is illustrated by the following example, when
+*  M = N = 6, KL = 2, KU = 1:
+*
+*  On entry:                       On exit:
+*
+*      *    *    *    +    +    +       *    *    *   u14  u25  u36
+*      *    *    +    +    +    +       *    *   u13  u24  u35  u46
+*      *   a12  a23  a34  a45  a56      *   u12  u23  u34  u45  u56
+*     a11  a22  a33  a44  a55  a66     u11  u22  u33  u44  u55  u66
+*     a21  a32  a43  a54  a65   *      m21  m32  m43  m54  m65   *
+*     a31  a42  a53  a64   *    *      m31  m42  m53  m64   *    *
+*
+*  Array elements marked * are not used by the routine; elements marked
+*  + need not be set on entry, but are required by the routine to store
+*  elements of U because of fill-in resulting from the row interchanges.
+*
+*  =====================================================================
+*
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZGBTRF, ZGBTRS
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( N.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( KL.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( KU.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDAB.LT.2*KL+KU+1 ) THEN
+         INFO = -6
+      ELSE IF( LDB.LT.MAX( N, 1 ) ) THEN
+         INFO = -9
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZGBSV ', -INFO )
+         RETURN
+      END IF
+*
+*     Compute the LU factorization of the band matrix A.
+*
+      CALL ZGBTRF( N, N, KL, KU, AB, LDAB, IPIV, INFO )
+      IF( INFO.EQ.0 ) THEN
+*
+*        Solve the system A*X = B, overwriting B with X.
+*
+         CALL ZGBTRS( 'No transpose', N, KL, KU, NRHS, AB, LDAB, IPIV,
+     $                B, LDB, INFO )
+      END IF
+      RETURN
+*
+*     End of ZGBSV
+*
+      END
diff --git a/costa/native/external/lapack/zgbsvx.f b/costa/native/external/lapack/zgbsvx.f
new file mode 100644
index 000000000..1fa863285
--- /dev/null
+++ b/costa/native/external/lapack/zgbsvx.f
@@ -0,0 +1,518 @@
+      SUBROUTINE ZGBSVX( FACT, TRANS, N, KL, KU, NRHS, AB, LDAB, AFB,
+     $                   LDAFB, IPIV, EQUED, R, C, B, LDB, X, LDX,
+     $                   RCOND, FERR, BERR, WORK, RWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          EQUED, FACT, TRANS
+      INTEGER            INFO, KL, KU, LDAB, LDAFB, LDB, LDX, N, NRHS
+      DOUBLE PRECISION   RCOND
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      DOUBLE PRECISION   BERR( * ), C( * ), FERR( * ), R( * ),
+     $                   RWORK( * )
+      COMPLEX*16         AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ),
+     $                   WORK( * ), X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZGBSVX uses the LU factorization to compute the solution to a complex
+*  system of linear equations A * X = B, A**T * X = B, or A**H * X = B,
+*  where A is a band matrix of order N with KL subdiagonals and KU
+*  superdiagonals, and X and B are N-by-NRHS matrices.
+*
+*  Error bounds on the solution and a condition estimate are also
+*  provided.
+*
+*  Description
+*  ===========
+*
+*  The following steps are performed by this subroutine:
+*
+*  1. If FACT = 'E', real scaling factors are computed to equilibrate
+*     the system:
+*        TRANS = 'N':  diag(R)*A*diag(C)     *inv(diag(C))*X = diag(R)*B
+*        TRANS = 'T': (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B
+*        TRANS = 'C': (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B
+*     Whether or not the system will be equilibrated depends on the
+*     scaling of the matrix A, but if equilibration is used, A is
+*     overwritten by diag(R)*A*diag(C) and B by diag(R)*B (if TRANS='N')
+*     or diag(C)*B (if TRANS = 'T' or 'C').
+*
+*  2. If FACT = 'N' or 'E', the LU decomposition is used to factor the
+*     matrix A (after equilibration if FACT = 'E') as
+*        A = L * U,
+*     where L is a product of permutation and unit lower triangular
+*     matrices with KL subdiagonals, and U is upper triangular with
+*     KL+KU superdiagonals.
+*
+*  3. If some U(i,i)=0, so that U is exactly singular, then the routine
+*     returns with INFO = i. Otherwise, the factored form of A is used
+*     to estimate the condition number of the matrix A.  If the
+*     reciprocal of the condition number is less than machine precision,
+*     INFO = N+1 is returned as a warning, but the routine still goes on
+*     to solve for X and compute error bounds as described below.
+*
+*  4. The system of equations is solved for X using the factored form
+*     of A.
+*
+*  5. Iterative refinement is applied to improve the computed solution
+*     matrix and calculate error bounds and backward error estimates
+*     for it.
+*
+*  6. If equilibration was used, the matrix X is premultiplied by
+*     diag(C) (if TRANS = 'N') or diag(R) (if TRANS = 'T' or 'C') so
+*     that it solves the original system before equilibration.
+*
+*  Arguments
+*  =========
+*
+*  FACT    (input) CHARACTER*1
+*          Specifies whether or not the factored form of the matrix A is
+*          supplied on entry, and if not, whether the matrix A should be
+*          equilibrated before it is factored.
+*          = 'F':  On entry, AFB and IPIV contain the factored form of
+*                  A.  If EQUED is not 'N', the matrix A has been
+*                  equilibrated with scaling factors given by R and C.
+*                  AB, AFB, and IPIV are not modified.
+*          = 'N':  The matrix A will be copied to AFB and factored.
+*          = 'E':  The matrix A will be equilibrated if necessary, then
+*                  copied to AFB and factored.
+*
+*  TRANS   (input) CHARACTER*1
+*          Specifies the form of the system of equations.
+*          = 'N':  A * X = B     (No transpose)
+*          = 'T':  A**T * X = B  (Transpose)
+*          = 'C':  A**H * X = B  (Conjugate transpose)
+*
+*  N       (input) INTEGER
+*          The number of linear equations, i.e., the order of the
+*          matrix A.  N >= 0.
+*
+*  KL      (input) INTEGER
+*          The number of subdiagonals within the band of A.  KL >= 0.
+*
+*  KU      (input) INTEGER
+*          The number of superdiagonals within the band of A.  KU >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrices B and X.  NRHS >= 0.
+*
+*  AB      (input/output) COMPLEX*16 array, dimension (LDAB,N)
+*          On entry, the matrix A in band storage, in rows 1 to KL+KU+1.
+*          The j-th column of A is stored in the j-th column of the
+*          array AB as follows:
+*          AB(KU+1+i-j,j) = A(i,j) for max(1,j-KU)<=i<=min(N,j+kl)
+*
+*          If FACT = 'F' and EQUED is not 'N', then A must have been
+*          equilibrated by the scaling factors in R and/or C.  AB is not
+*          modified if FACT = 'F' or 'N', or if FACT = 'E' and
+*          EQUED = 'N' on exit.
+*
+*          On exit, if EQUED .ne. 'N', A is scaled as follows:
+*          EQUED = 'R':  A := diag(R) * A
+*          EQUED = 'C':  A := A * diag(C)
+*          EQUED = 'B':  A := diag(R) * A * diag(C).
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KL+KU+1.
+*
+*  AFB     (input or output) COMPLEX*16 array, dimension (LDAFB,N)
+*          If FACT = 'F', then AFB is an input argument and on entry
+*          contains details of the LU factorization of the band matrix
+*          A, as computed by ZGBTRF.  U is stored as an upper triangular
+*          band matrix with KL+KU superdiagonals in rows 1 to KL+KU+1,
+*          and the multipliers used during the factorization are stored
+*          in rows KL+KU+2 to 2*KL+KU+1.  If EQUED .ne. 'N', then AFB is
+*          the factored form of the equilibrated matrix A.
+*
+*          If FACT = 'N', then AFB is an output argument and on exit
+*          returns details of the LU factorization of A.
+*
+*          If FACT = 'E', then AFB is an output argument and on exit
+*          returns details of the LU factorization of the equilibrated
+*          matrix A (see the description of AB for the form of the
+*          equilibrated matrix).
+*
+*  LDAFB   (input) INTEGER
+*          The leading dimension of the array AFB.  LDAFB >= 2*KL+KU+1.
+*
+*  IPIV    (input or output) INTEGER array, dimension (N)
+*          If FACT = 'F', then IPIV is an input argument and on entry
+*          contains the pivot indices from the factorization A = L*U
+*          as computed by ZGBTRF; row i of the matrix was interchanged
+*          with row IPIV(i).
+*
+*          If FACT = 'N', then IPIV is an output argument and on exit
+*          contains the pivot indices from the factorization A = L*U
+*          of the original matrix A.
+*
+*          If FACT = 'E', then IPIV is an output argument and on exit
+*          contains the pivot indices from the factorization A = L*U
+*          of the equilibrated matrix A.
+*
+*  EQUED   (input or output) CHARACTER*1
+*          Specifies the form of equilibration that was done.
+*          = 'N':  No equilibration (always true if FACT = 'N').
+*          = 'R':  Row equilibration, i.e., A has been premultiplied by
+*                  diag(R).
+*          = 'C':  Column equilibration, i.e., A has been postmultiplied
+*                  by diag(C).
+*          = 'B':  Both row and column equilibration, i.e., A has been
+*                  replaced by diag(R) * A * diag(C).
+*          EQUED is an input argument if FACT = 'F'; otherwise, it is an
+*          output argument.
+*
+*  R       (input or output) DOUBLE PRECISION array, dimension (N)
+*          The row scale factors for A.  If EQUED = 'R' or 'B', A is
+*          multiplied on the left by diag(R); if EQUED = 'N' or 'C', R
+*          is not accessed.  R is an input argument if FACT = 'F';
+*          otherwise, R is an output argument.  If FACT = 'F' and
+*          EQUED = 'R' or 'B', each element of R must be positive.
+*
+*  C       (input or output) DOUBLE PRECISION array, dimension (N)
+*          The column scale factors for A.  If EQUED = 'C' or 'B', A is
+*          multiplied on the right by diag(C); if EQUED = 'N' or 'R', C
+*          is not accessed.  C is an input argument if FACT = 'F';
+*          otherwise, C is an output argument.  If FACT = 'F' and
+*          EQUED = 'C' or 'B', each element of C must be positive.
+*
+*  B       (input/output) COMPLEX*16 array, dimension (LDB,NRHS)
+*          On entry, the right hand side matrix B.
+*          On exit,
+*          if EQUED = 'N', B is not modified;
+*          if TRANS = 'N' and EQUED = 'R' or 'B', B is overwritten by
+*          diag(R)*B;
+*          if TRANS = 'T' or 'C' and EQUED = 'C' or 'B', B is
+*          overwritten by diag(C)*B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  X       (output) COMPLEX*16 array, dimension (LDX,NRHS)
+*          If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X
+*          to the original system of equations.  Note that A and B are
+*          modified on exit if EQUED .ne. 'N', and the solution to the
+*          equilibrated system is inv(diag(C))*X if TRANS = 'N' and
+*          EQUED = 'C' or 'B', or inv(diag(R))*X if TRANS = 'T' or 'C'
+*          and EQUED = 'R' or 'B'.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(1,N).
+*
+*  RCOND   (output) DOUBLE PRECISION
+*          The estimate of the reciprocal condition number of the matrix
+*          A after equilibration (if done).  If RCOND is less than the
+*          machine precision (in particular, if RCOND = 0), the matrix
+*          is singular to working precision.  This condition is
+*          indicated by a return code of INFO > 0.
+*
+*  FERR    (output) DOUBLE PRECISION array, dimension (NRHS)
+*          The estimated forward error bound for each solution vector
+*          X(j) (the j-th column of the solution matrix X).
+*          If XTRUE is the true solution corresponding to X(j), FERR(j)
+*          is an estimated upper bound for the magnitude of the largest
+*          element in (X(j) - XTRUE) divided by the magnitude of the
+*          largest element in X(j).  The estimate is as reliable as
+*          the estimate for RCOND, and is almost always a slight
+*          overestimate of the true error.
+*
+*  BERR    (output) DOUBLE PRECISION array, dimension (NRHS)
+*          The componentwise relative backward error of each solution
+*          vector X(j) (i.e., the smallest relative change in
+*          any element of A or B that makes X(j) an exact solution).
+*
+*  WORK    (workspace) COMPLEX*16 array, dimension (2*N)
+*
+*  RWORK   (workspace/output) DOUBLE PRECISION array, dimension (N)
+*          On exit, RWORK(1) contains the reciprocal pivot growth
+*          factor norm(A)/norm(U). The "max absolute element" norm is
+*          used. If RWORK(1) is much less than 1, then the stability
+*          of the LU factorization of the (equilibrated) matrix A
+*          could be poor. This also means that the solution X, condition
+*          estimator RCOND, and forward error bound FERR could be
+*          unreliable. If factorization fails with 0<INFO<=N, then
+*          RWORK(1) contains the reciprocal pivot growth factor for the
+*          leading INFO columns of A.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, and i is
+*                <= N:  U(i,i) is exactly zero.  The factorization
+*                       has been completed, but the factor U is exactly
+*                       singular, so the solution and error bounds
+*                       could not be computed. RCOND = 0 is returned.
+*                = N+1: U is nonsingular, but RCOND is less than machine
+*                       precision, meaning that the matrix is singular
+*                       to working precision.  Nevertheless, the
+*                       solution and error bounds are computed because
+*                       there are a number of situations where the
+*                       computed solution can be more accurate than the
+*                       value of RCOND would suggest.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            COLEQU, EQUIL, NOFACT, NOTRAN, ROWEQU
+      CHARACTER          NORM
+      INTEGER            I, INFEQU, J, J1, J2
+      DOUBLE PRECISION   AMAX, ANORM, BIGNUM, COLCND, RCMAX, RCMIN,
+     $                   ROWCND, RPVGRW, SMLNUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DLAMCH, ZLANGB, ZLANTB
+      EXTERNAL           LSAME, DLAMCH, ZLANGB, ZLANTB
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZCOPY, ZGBCON, ZGBEQU, ZGBRFS, ZGBTRF,
+     $                   ZGBTRS, ZLACPY, ZLAQGB
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+      NOFACT = LSAME( FACT, 'N' )
+      EQUIL = LSAME( FACT, 'E' )
+      NOTRAN = LSAME( TRANS, 'N' )
+      IF( NOFACT .OR. EQUIL ) THEN
+         EQUED = 'N'
+         ROWEQU = .FALSE.
+         COLEQU = .FALSE.
+      ELSE
+         ROWEQU = LSAME( EQUED, 'R' ) .OR. LSAME( EQUED, 'B' )
+         COLEQU = LSAME( EQUED, 'C' ) .OR. LSAME( EQUED, 'B' )
+         SMLNUM = DLAMCH( 'Safe minimum' )
+         BIGNUM = ONE / SMLNUM
+      END IF
+*
+*     Test the input parameters.
+*
+      IF( .NOT.NOFACT .AND. .NOT.EQUIL .AND. .NOT.LSAME( FACT, 'F' ) )
+     $     THEN
+         INFO = -1
+      ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT.
+     $         LSAME( TRANS, 'C' ) ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( KL.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( KU.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -6
+      ELSE IF( LDAB.LT.KL+KU+1 ) THEN
+         INFO = -8
+      ELSE IF( LDAFB.LT.2*KL+KU+1 ) THEN
+         INFO = -10
+      ELSE IF( LSAME( FACT, 'F' ) .AND. .NOT.
+     $         ( ROWEQU .OR. COLEQU .OR. LSAME( EQUED, 'N' ) ) ) THEN
+         INFO = -12
+      ELSE
+         IF( ROWEQU ) THEN
+            RCMIN = BIGNUM
+            RCMAX = ZERO
+            DO 10 J = 1, N
+               RCMIN = MIN( RCMIN, R( J ) )
+               RCMAX = MAX( RCMAX, R( J ) )
+   10       CONTINUE
+            IF( RCMIN.LE.ZERO ) THEN
+               INFO = -13
+            ELSE IF( N.GT.0 ) THEN
+               ROWCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM )
+            ELSE
+               ROWCND = ONE
+            END IF
+         END IF
+         IF( COLEQU .AND. INFO.EQ.0 ) THEN
+            RCMIN = BIGNUM
+            RCMAX = ZERO
+            DO 20 J = 1, N
+               RCMIN = MIN( RCMIN, C( J ) )
+               RCMAX = MAX( RCMAX, C( J ) )
+   20       CONTINUE
+            IF( RCMIN.LE.ZERO ) THEN
+               INFO = -14
+            ELSE IF( N.GT.0 ) THEN
+               COLCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM )
+            ELSE
+               COLCND = ONE
+            END IF
+         END IF
+         IF( INFO.EQ.0 ) THEN
+            IF( LDB.LT.MAX( 1, N ) ) THEN
+               INFO = -16
+            ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+               INFO = -18
+            END IF
+         END IF
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZGBSVX', -INFO )
+         RETURN
+      END IF
+*
+      IF( EQUIL ) THEN
+*
+*        Compute row and column scalings to equilibrate the matrix A.
+*
+         CALL ZGBEQU( N, N, KL, KU, AB, LDAB, R, C, ROWCND, COLCND,
+     $                AMAX, INFEQU )
+         IF( INFEQU.EQ.0 ) THEN
+*
+*           Equilibrate the matrix.
+*
+            CALL ZLAQGB( N, N, KL, KU, AB, LDAB, R, C, ROWCND, COLCND,
+     $                   AMAX, EQUED )
+            ROWEQU = LSAME( EQUED, 'R' ) .OR. LSAME( EQUED, 'B' )
+            COLEQU = LSAME( EQUED, 'C' ) .OR. LSAME( EQUED, 'B' )
+         END IF
+      END IF
+*
+*     Scale the right hand side.
+*
+      IF( NOTRAN ) THEN
+         IF( ROWEQU ) THEN
+            DO 40 J = 1, NRHS
+               DO 30 I = 1, N
+                  B( I, J ) = R( I )*B( I, J )
+   30          CONTINUE
+   40       CONTINUE
+         END IF
+      ELSE IF( COLEQU ) THEN
+         DO 60 J = 1, NRHS
+            DO 50 I = 1, N
+               B( I, J ) = C( I )*B( I, J )
+   50       CONTINUE
+   60    CONTINUE
+      END IF
+*
+      IF( NOFACT .OR. EQUIL ) THEN
+*
+*        Compute the LU factorization of the band matrix A.
+*
+         DO 70 J = 1, N
+            J1 = MAX( J-KU, 1 )
+            J2 = MIN( J+KL, N )
+            CALL ZCOPY( J2-J1+1, AB( KU+1-J+J1, J ), 1,
+     $                  AFB( KL+KU+1-J+J1, J ), 1 )
+   70    CONTINUE
+*
+         CALL ZGBTRF( N, N, KL, KU, AFB, LDAFB, IPIV, INFO )
+*
+*        Return if INFO is non-zero.
+*
+         IF( INFO.NE.0 ) THEN
+            IF( INFO.GT.0 ) THEN
+*
+*              Compute the reciprocal pivot growth factor of the
+*              leading rank-deficient INFO columns of A.
+*
+               ANORM = ZERO
+               DO 90 J = 1, INFO
+                  DO 80 I = MAX( KU+2-J, 1 ),
+     $                    MIN( N+KU+1-J, KL+KU+1 )
+                     ANORM = MAX( ANORM, ABS( AB( I, J ) ) )
+   80             CONTINUE
+   90          CONTINUE
+               RPVGRW = ZLANTB( 'M', 'U', 'N', INFO,
+     $                  MIN( INFO-1, KL+KU ), AFB( MAX( 1,
+     $                  KL+KU+2-INFO ), 1 ), LDAFB, RWORK )
+               IF( RPVGRW.EQ.ZERO ) THEN
+                  RPVGRW = ONE
+               ELSE
+                  RPVGRW = ANORM / RPVGRW
+               END IF
+               RWORK( 1 ) = RPVGRW
+               RCOND = ZERO
+            END IF
+            RETURN
+         END IF
+      END IF
+*
+*     Compute the norm of the matrix A and the
+*     reciprocal pivot growth factor RPVGRW.
+*
+      IF( NOTRAN ) THEN
+         NORM = '1'
+      ELSE
+         NORM = 'I'
+      END IF
+      ANORM = ZLANGB( NORM, N, KL, KU, AB, LDAB, RWORK )
+      RPVGRW = ZLANTB( 'M', 'U', 'N', N, KL+KU, AFB, LDAFB, RWORK )
+      IF( RPVGRW.EQ.ZERO ) THEN
+         RPVGRW = ONE
+      ELSE
+         RPVGRW = ZLANGB( 'M', N, KL, KU, AB, LDAB, RWORK ) / RPVGRW
+      END IF
+*
+*     Compute the reciprocal of the condition number of A.
+*
+      CALL ZGBCON( NORM, N, KL, KU, AFB, LDAFB, IPIV, ANORM, RCOND,
+     $             WORK, RWORK, INFO )
+*
+*     Set INFO = N+1 if the matrix is singular to working precision.
+*
+      IF( RCOND.LT.DLAMCH( 'Epsilon' ) )
+     $   INFO = N + 1
+*
+*     Compute the solution matrix X.
+*
+      CALL ZLACPY( 'Full', N, NRHS, B, LDB, X, LDX )
+      CALL ZGBTRS( TRANS, N, KL, KU, NRHS, AFB, LDAFB, IPIV, X, LDX,
+     $             INFO )
+*
+*     Use iterative refinement to improve the computed solution and
+*     compute error bounds and backward error estimates for it.
+*
+      CALL ZGBRFS( TRANS, N, KL, KU, NRHS, AB, LDAB, AFB, LDAFB, IPIV,
+     $             B, LDB, X, LDX, FERR, BERR, WORK, RWORK, INFO )
+*
+*     Transform the solution matrix X to a solution of the original
+*     system.
+*
+      IF( NOTRAN ) THEN
+         IF( COLEQU ) THEN
+            DO 110 J = 1, NRHS
+               DO 100 I = 1, N
+                  X( I, J ) = C( I )*X( I, J )
+  100          CONTINUE
+  110       CONTINUE
+            DO 120 J = 1, NRHS
+               FERR( J ) = FERR( J ) / COLCND
+  120       CONTINUE
+         END IF
+      ELSE IF( ROWEQU ) THEN
+         DO 140 J = 1, NRHS
+            DO 130 I = 1, N
+               X( I, J ) = R( I )*X( I, J )
+  130       CONTINUE
+  140    CONTINUE
+         DO 150 J = 1, NRHS
+            FERR( J ) = FERR( J ) / ROWCND
+  150    CONTINUE
+      END IF
+*
+      RWORK( 1 ) = RPVGRW
+      RETURN
+*
+*     End of ZGBSVX
+*
+      END
diff --git a/costa/native/external/lapack/zgbtf2.f b/costa/native/external/lapack/zgbtf2.f
new file mode 100644
index 000000000..ca7fe23ad
--- /dev/null
+++ b/costa/native/external/lapack/zgbtf2.f
@@ -0,0 +1,203 @@
+      SUBROUTINE ZGBTF2( M, N, KL, KU, AB, LDAB, IPIV, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, KL, KU, LDAB, M, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      COMPLEX*16         AB( LDAB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZGBTF2 computes an LU factorization of a complex m-by-n band matrix
+*  A using partial pivoting with row interchanges.
+*
+*  This is the unblocked version of the algorithm, calling Level 2 BLAS.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  KL      (input) INTEGER
+*          The number of subdiagonals within the band of A.  KL >= 0.
+*
+*  KU      (input) INTEGER
+*          The number of superdiagonals within the band of A.  KU >= 0.
+*
+*  AB      (input/output) COMPLEX*16 array, dimension (LDAB,N)
+*          On entry, the matrix A in band storage, in rows KL+1 to
+*          2*KL+KU+1; rows 1 to KL of the array need not be set.
+*          The j-th column of A is stored in the j-th column of the
+*          array AB as follows:
+*          AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl)
+*
+*          On exit, details of the factorization: U is stored as an
+*          upper triangular band matrix with KL+KU superdiagonals in
+*          rows 1 to KL+KU+1, and the multipliers used during the
+*          factorization are stored in rows KL+KU+2 to 2*KL+KU+1.
+*          See below for further details.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= 2*KL+KU+1.
+*
+*  IPIV    (output) INTEGER array, dimension (min(M,N))
+*          The pivot indices; for 1 <= i <= min(M,N), row i of the
+*          matrix was interchanged with row IPIV(i).
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*          > 0: if INFO = +i, U(i,i) is exactly zero. The factorization
+*               has been completed, but the factor U is exactly
+*               singular, and division by zero will occur if it is used
+*               to solve a system of equations.
+*
+*  Further Details
+*  ===============
+*
+*  The band storage scheme is illustrated by the following example, when
+*  M = N = 6, KL = 2, KU = 1:
+*
+*  On entry:                       On exit:
+*
+*      *    *    *    +    +    +       *    *    *   u14  u25  u36
+*      *    *    +    +    +    +       *    *   u13  u24  u35  u46
+*      *   a12  a23  a34  a45  a56      *   u12  u23  u34  u45  u56
+*     a11  a22  a33  a44  a55  a66     u11  u22  u33  u44  u55  u66
+*     a21  a32  a43  a54  a65   *      m21  m32  m43  m54  m65   *
+*     a31  a42  a53  a64   *    *      m31  m42  m53  m64   *    *
+*
+*  Array elements marked * are not used by the routine; elements marked
+*  + need not be set on entry, but are required by the routine to store
+*  elements of U, because of fill-in resulting from the row
+*  interchanges.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX*16         ONE, ZERO
+      PARAMETER          ( ONE = ( 1.0D+0, 0.0D+0 ),
+     $                   ZERO = ( 0.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J, JP, JU, KM, KV
+*     ..
+*     .. External Functions ..
+      INTEGER            IZAMAX
+      EXTERNAL           IZAMAX
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZGERU, ZSCAL, ZSWAP
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     KV is the number of superdiagonals in the factor U, allowing for
+*     fill-in.
+*
+      KV = KU + KL
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( KL.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( KU.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDAB.LT.KL+KV+1 ) THEN
+         INFO = -6
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZGBTF2', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 )
+     $   RETURN
+*
+*     Gaussian elimination with partial pivoting
+*
+*     Set fill-in elements in columns KU+2 to KV to zero.
+*
+      DO 20 J = KU + 2, MIN( KV, N )
+         DO 10 I = KV - J + 2, KL
+            AB( I, J ) = ZERO
+   10    CONTINUE
+   20 CONTINUE
+*
+*     JU is the index of the last column affected by the current stage
+*     of the factorization.
+*
+      JU = 1
+*
+      DO 40 J = 1, MIN( M, N )
+*
+*        Set fill-in elements in column J+KV to zero.
+*
+         IF( J+KV.LE.N ) THEN
+            DO 30 I = 1, KL
+               AB( I, J+KV ) = ZERO
+   30       CONTINUE
+         END IF
+*
+*        Find pivot and test for singularity. KM is the number of
+*        subdiagonal elements in the current column.
+*
+         KM = MIN( KL, M-J )
+         JP = IZAMAX( KM+1, AB( KV+1, J ), 1 )
+         IPIV( J ) = JP + J - 1
+         IF( AB( KV+JP, J ).NE.ZERO ) THEN
+            JU = MAX( JU, MIN( J+KU+JP-1, N ) )
+*
+*           Apply interchange to columns J to JU.
+*
+            IF( JP.NE.1 )
+     $         CALL ZSWAP( JU-J+1, AB( KV+JP, J ), LDAB-1,
+     $                     AB( KV+1, J ), LDAB-1 )
+            IF( KM.GT.0 ) THEN
+*
+*              Compute multipliers.
+*
+               CALL ZSCAL( KM, ONE / AB( KV+1, J ), AB( KV+2, J ), 1 )
+*
+*              Update trailing submatrix within the band.
+*
+               IF( JU.GT.J )
+     $            CALL ZGERU( KM, JU-J, -ONE, AB( KV+2, J ), 1,
+     $                        AB( KV, J+1 ), LDAB-1, AB( KV+1, J+1 ),
+     $                        LDAB-1 )
+            END IF
+         ELSE
+*
+*           If pivot is zero, set INFO to the index of the pivot
+*           unless a zero pivot has already been found.
+*
+            IF( INFO.EQ.0 )
+     $         INFO = J
+         END IF
+   40 CONTINUE
+      RETURN
+*
+*     End of ZGBTF2
+*
+      END
diff --git a/costa/native/external/lapack/zgbtrf.f b/costa/native/external/lapack/zgbtrf.f
new file mode 100644
index 000000000..b76a481c5
--- /dev/null
+++ b/costa/native/external/lapack/zgbtrf.f
@@ -0,0 +1,443 @@
+      SUBROUTINE ZGBTRF( M, N, KL, KU, AB, LDAB, IPIV, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, KL, KU, LDAB, M, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      COMPLEX*16         AB( LDAB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZGBTRF computes an LU factorization of a complex m-by-n band matrix A
+*  using partial pivoting with row interchanges.
+*
+*  This is the blocked version of the algorithm, calling Level 3 BLAS.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  KL      (input) INTEGER
+*          The number of subdiagonals within the band of A.  KL >= 0.
+*
+*  KU      (input) INTEGER
+*          The number of superdiagonals within the band of A.  KU >= 0.
+*
+*  AB      (input/output) COMPLEX*16 array, dimension (LDAB,N)
+*          On entry, the matrix A in band storage, in rows KL+1 to
+*          2*KL+KU+1; rows 1 to KL of the array need not be set.
+*          The j-th column of A is stored in the j-th column of the
+*          array AB as follows:
+*          AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl)
+*
+*          On exit, details of the factorization: U is stored as an
+*          upper triangular band matrix with KL+KU superdiagonals in
+*          rows 1 to KL+KU+1, and the multipliers used during the
+*          factorization are stored in rows KL+KU+2 to 2*KL+KU+1.
+*          See below for further details.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= 2*KL+KU+1.
+*
+*  IPIV    (output) INTEGER array, dimension (min(M,N))
+*          The pivot indices; for 1 <= i <= min(M,N), row i of the
+*          matrix was interchanged with row IPIV(i).
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*          > 0: if INFO = +i, U(i,i) is exactly zero. The factorization
+*               has been completed, but the factor U is exactly
+*               singular, and division by zero will occur if it is used
+*               to solve a system of equations.
+*
+*  Further Details
+*  ===============
+*
+*  The band storage scheme is illustrated by the following example, when
+*  M = N = 6, KL = 2, KU = 1:
+*
+*  On entry:                       On exit:
+*
+*      *    *    *    +    +    +       *    *    *   u14  u25  u36
+*      *    *    +    +    +    +       *    *   u13  u24  u35  u46
+*      *   a12  a23  a34  a45  a56      *   u12  u23  u34  u45  u56
+*     a11  a22  a33  a44  a55  a66     u11  u22  u33  u44  u55  u66
+*     a21  a32  a43  a54  a65   *      m21  m32  m43  m54  m65   *
+*     a31  a42  a53  a64   *    *      m31  m42  m53  m64   *    *
+*
+*  Array elements marked * are not used by the routine; elements marked
+*  + need not be set on entry, but are required by the routine to store
+*  elements of U because of fill-in resulting from the row interchanges.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX*16         ONE, ZERO
+      PARAMETER          ( ONE = ( 1.0D+0, 0.0D+0 ),
+     $                   ZERO = ( 0.0D+0, 0.0D+0 ) )
+      INTEGER            NBMAX, LDWORK
+      PARAMETER          ( NBMAX = 64, LDWORK = NBMAX+1 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, I2, I3, II, IP, J, J2, J3, JB, JJ, JM, JP,
+     $                   JU, K2, KM, KV, NB, NW
+      COMPLEX*16         TEMP
+*     ..
+*     .. Local Arrays ..
+      COMPLEX*16         WORK13( LDWORK, NBMAX ),
+     $                   WORK31( LDWORK, NBMAX )
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV, IZAMAX
+      EXTERNAL           ILAENV, IZAMAX
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZCOPY, ZGBTF2, ZGEMM, ZGERU, ZLASWP,
+     $                   ZSCAL, ZSWAP, ZTRSM
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     KV is the number of superdiagonals in the factor U, allowing for
+*     fill-in
+*
+      KV = KU + KL
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( KL.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( KU.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDAB.LT.KL+KV+1 ) THEN
+         INFO = -6
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZGBTRF', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 )
+     $   RETURN
+*
+*     Determine the block size for this environment
+*
+      NB = ILAENV( 1, 'ZGBTRF', ' ', M, N, KL, KU )
+*
+*     The block size must not exceed the limit set by the size of the
+*     local arrays WORK13 and WORK31.
+*
+      NB = MIN( NB, NBMAX )
+*
+      IF( NB.LE.1 .OR. NB.GT.KL ) THEN
+*
+*        Use unblocked code
+*
+         CALL ZGBTF2( M, N, KL, KU, AB, LDAB, IPIV, INFO )
+      ELSE
+*
+*        Use blocked code
+*
+*        Zero the superdiagonal elements of the work array WORK13
+*
+         DO 20 J = 1, NB
+            DO 10 I = 1, J - 1
+               WORK13( I, J ) = ZERO
+   10       CONTINUE
+   20    CONTINUE
+*
+*        Zero the subdiagonal elements of the work array WORK31
+*
+         DO 40 J = 1, NB
+            DO 30 I = J + 1, NB
+               WORK31( I, J ) = ZERO
+   30       CONTINUE
+   40    CONTINUE
+*
+*        Gaussian elimination with partial pivoting
+*
+*        Set fill-in elements in columns KU+2 to KV to zero
+*
+         DO 60 J = KU + 2, MIN( KV, N )
+            DO 50 I = KV - J + 2, KL
+               AB( I, J ) = ZERO
+   50       CONTINUE
+   60    CONTINUE
+*
+*        JU is the index of the last column affected by the current
+*        stage of the factorization
+*
+         JU = 1
+*
+         DO 180 J = 1, MIN( M, N ), NB
+            JB = MIN( NB, MIN( M, N )-J+1 )
+*
+*           The active part of the matrix is partitioned
+*
+*              A11   A12   A13
+*              A21   A22   A23
+*              A31   A32   A33
+*
+*           Here A11, A21 and A31 denote the current block of JB columns
+*           which is about to be factorized. The number of rows in the
+*           partitioning are JB, I2, I3 respectively, and the numbers
+*           of columns are JB, J2, J3. The superdiagonal elements of A13
+*           and the subdiagonal elements of A31 lie outside the band.
+*
+            I2 = MIN( KL-JB, M-J-JB+1 )
+            I3 = MIN( JB, M-J-KL+1 )
+*
+*           J2 and J3 are computed after JU has been updated.
+*
+*           Factorize the current block of JB columns
+*
+            DO 80 JJ = J, J + JB - 1
+*
+*              Set fill-in elements in column JJ+KV to zero
+*
+               IF( JJ+KV.LE.N ) THEN
+                  DO 70 I = 1, KL
+                     AB( I, JJ+KV ) = ZERO
+   70             CONTINUE
+               END IF
+*
+*              Find pivot and test for singularity. KM is the number of
+*              subdiagonal elements in the current column.
+*
+               KM = MIN( KL, M-JJ )
+               JP = IZAMAX( KM+1, AB( KV+1, JJ ), 1 )
+               IPIV( JJ ) = JP + JJ - J
+               IF( AB( KV+JP, JJ ).NE.ZERO ) THEN
+                  JU = MAX( JU, MIN( JJ+KU+JP-1, N ) )
+                  IF( JP.NE.1 ) THEN
+*
+*                    Apply interchange to columns J to J+JB-1
+*
+                     IF( JP+JJ-1.LT.J+KL ) THEN
+*
+                        CALL ZSWAP( JB, AB( KV+1+JJ-J, J ), LDAB-1,
+     $                              AB( KV+JP+JJ-J, J ), LDAB-1 )
+                     ELSE
+*
+*                       The interchange affects columns J to JJ-1 of A31
+*                       which are stored in the work array WORK31
+*
+                        CALL ZSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,
+     $                              WORK31( JP+JJ-J-KL, 1 ), LDWORK )
+                        CALL ZSWAP( J+JB-JJ, AB( KV+1, JJ ), LDAB-1,
+     $                              AB( KV+JP, JJ ), LDAB-1 )
+                     END IF
+                  END IF
+*
+*                 Compute multipliers
+*
+                  CALL ZSCAL( KM, ONE / AB( KV+1, JJ ), AB( KV+2, JJ ),
+     $                        1 )
+*
+*                 Update trailing submatrix within the band and within
+*                 the current block. JM is the index of the last column
+*                 which needs to be updated.
+*
+                  JM = MIN( JU, J+JB-1 )
+                  IF( JM.GT.JJ )
+     $               CALL ZGERU( KM, JM-JJ, -ONE, AB( KV+2, JJ ), 1,
+     $                           AB( KV, JJ+1 ), LDAB-1,
+     $                           AB( KV+1, JJ+1 ), LDAB-1 )
+               ELSE
+*
+*                 If pivot is zero, set INFO to the index of the pivot
+*                 unless a zero pivot has already been found.
+*
+                  IF( INFO.EQ.0 )
+     $               INFO = JJ
+               END IF
+*
+*              Copy current column of A31 into the work array WORK31
+*
+               NW = MIN( JJ-J+1, I3 )
+               IF( NW.GT.0 )
+     $            CALL ZCOPY( NW, AB( KV+KL+1-JJ+J, JJ ), 1,
+     $                        WORK31( 1, JJ-J+1 ), 1 )
+   80       CONTINUE
+            IF( J+JB.LE.N ) THEN
+*
+*              Apply the row interchanges to the other blocks.
+*
+               J2 = MIN( JU-J+1, KV ) - JB
+               J3 = MAX( 0, JU-J-KV+1 )
+*
+*              Use ZLASWP to apply the row interchanges to A12, A22, and
+*              A32.
+*
+               CALL ZLASWP( J2, AB( KV+1-JB, J+JB ), LDAB-1, 1, JB,
+     $                      IPIV( J ), 1 )
+*
+*              Adjust the pivot indices.
+*
+               DO 90 I = J, J + JB - 1
+                  IPIV( I ) = IPIV( I ) + J - 1
+   90          CONTINUE
+*
+*              Apply the row interchanges to A13, A23, and A33
+*              columnwise.
+*
+               K2 = J - 1 + JB + J2
+               DO 110 I = 1, J3
+                  JJ = K2 + I
+                  DO 100 II = J + I - 1, J + JB - 1
+                     IP = IPIV( II )
+                     IF( IP.NE.II ) THEN
+                        TEMP = AB( KV+1+II-JJ, JJ )
+                        AB( KV+1+II-JJ, JJ ) = AB( KV+1+IP-JJ, JJ )
+                        AB( KV+1+IP-JJ, JJ ) = TEMP
+                     END IF
+  100             CONTINUE
+  110          CONTINUE
+*
+*              Update the relevant part of the trailing submatrix
+*
+               IF( J2.GT.0 ) THEN
+*
+*                 Update A12
+*
+                  CALL ZTRSM( 'Left', 'Lower', 'No transpose', 'Unit',
+     $                        JB, J2, ONE, AB( KV+1, J ), LDAB-1,
+     $                        AB( KV+1-JB, J+JB ), LDAB-1 )
+*
+                  IF( I2.GT.0 ) THEN
+*
+*                    Update A22
+*
+                     CALL ZGEMM( 'No transpose', 'No transpose', I2, J2,
+     $                           JB, -ONE, AB( KV+1+JB, J ), LDAB-1,
+     $                           AB( KV+1-JB, J+JB ), LDAB-1, ONE,
+     $                           AB( KV+1, J+JB ), LDAB-1 )
+                  END IF
+*
+                  IF( I3.GT.0 ) THEN
+*
+*                    Update A32
+*
+                     CALL ZGEMM( 'No transpose', 'No transpose', I3, J2,
+     $                           JB, -ONE, WORK31, LDWORK,
+     $                           AB( KV+1-JB, J+JB ), LDAB-1, ONE,
+     $                           AB( KV+KL+1-JB, J+JB ), LDAB-1 )
+                  END IF
+               END IF
+*
+               IF( J3.GT.0 ) THEN
+*
+*                 Copy the lower triangle of A13 into the work array
+*                 WORK13
+*
+                  DO 130 JJ = 1, J3
+                     DO 120 II = JJ, JB
+                        WORK13( II, JJ ) = AB( II-JJ+1, JJ+J+KV-1 )
+  120                CONTINUE
+  130             CONTINUE
+*
+*                 Update A13 in the work array
+*
+                  CALL ZTRSM( 'Left', 'Lower', 'No transpose', 'Unit',
+     $                        JB, J3, ONE, AB( KV+1, J ), LDAB-1,
+     $                        WORK13, LDWORK )
+*
+                  IF( I2.GT.0 ) THEN
+*
+*                    Update A23
+*
+                     CALL ZGEMM( 'No transpose', 'No transpose', I2, J3,
+     $                           JB, -ONE, AB( KV+1+JB, J ), LDAB-1,
+     $                           WORK13, LDWORK, ONE, AB( 1+JB, J+KV ),
+     $                           LDAB-1 )
+                  END IF
+*
+                  IF( I3.GT.0 ) THEN
+*
+*                    Update A33
+*
+                     CALL ZGEMM( 'No transpose', 'No transpose', I3, J3,
+     $                           JB, -ONE, WORK31, LDWORK, WORK13,
+     $                           LDWORK, ONE, AB( 1+KL, J+KV ), LDAB-1 )
+                  END IF
+*
+*                 Copy the lower triangle of A13 back into place
+*
+                  DO 150 JJ = 1, J3
+                     DO 140 II = JJ, JB
+                        AB( II-JJ+1, JJ+J+KV-1 ) = WORK13( II, JJ )
+  140                CONTINUE
+  150             CONTINUE
+               END IF
+            ELSE
+*
+*              Adjust the pivot indices.
+*
+               DO 160 I = J, J + JB - 1
+                  IPIV( I ) = IPIV( I ) + J - 1
+  160          CONTINUE
+            END IF
+*
+*           Partially undo the interchanges in the current block to
+*           restore the upper triangular form of A31 and copy the upper
+*           triangle of A31 back into place
+*
+            DO 170 JJ = J + JB - 1, J, -1
+               JP = IPIV( JJ ) - JJ + 1
+               IF( JP.NE.1 ) THEN
+*
+*                 Apply interchange to columns J to JJ-1
+*
+                  IF( JP+JJ-1.LT.J+KL ) THEN
+*
+*                    The interchange does not affect A31
+*
+                     CALL ZSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,
+     $                           AB( KV+JP+JJ-J, J ), LDAB-1 )
+                  ELSE
+*
+*                    The interchange does affect A31
+*
+                     CALL ZSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,
+     $                           WORK31( JP+JJ-J-KL, 1 ), LDWORK )
+                  END IF
+               END IF
+*
+*              Copy the current column of A31 back into place
+*
+               NW = MIN( I3, JJ-J+1 )
+               IF( NW.GT.0 )
+     $            CALL ZCOPY( NW, WORK31( 1, JJ-J+1 ), 1,
+     $                        AB( KV+KL+1-JJ+J, JJ ), 1 )
+  170       CONTINUE
+  180    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of ZGBTRF
+*
+      END
diff --git a/costa/native/external/lapack/zgbtrs.f b/costa/native/external/lapack/zgbtrs.f
new file mode 100644
index 000000000..49984e89e
--- /dev/null
+++ b/costa/native/external/lapack/zgbtrs.f
@@ -0,0 +1,215 @@
+      SUBROUTINE ZGBTRS( TRANS, N, KL, KU, NRHS, AB, LDAB, IPIV, B, LDB,
+     $                   INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          TRANS
+      INTEGER            INFO, KL, KU, LDAB, LDB, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      COMPLEX*16         AB( LDAB, * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZGBTRS solves a system of linear equations
+*     A * X = B,  A**T * X = B,  or  A**H * X = B
+*  with a general band matrix A using the LU factorization computed
+*  by ZGBTRF.
+*
+*  Arguments
+*  =========
+*
+*  TRANS   (input) CHARACTER*1
+*          Specifies the form of the system of equations.
+*          = 'N':  A * X = B     (No transpose)
+*          = 'T':  A**T * X = B  (Transpose)
+*          = 'C':  A**H * X = B  (Conjugate transpose)
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  KL      (input) INTEGER
+*          The number of subdiagonals within the band of A.  KL >= 0.
+*
+*  KU      (input) INTEGER
+*          The number of superdiagonals within the band of A.  KU >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  AB      (input) COMPLEX*16 array, dimension (LDAB,N)
+*          Details of the LU factorization of the band matrix A, as
+*          computed by ZGBTRF.  U is stored as an upper triangular band
+*          matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, and
+*          the multipliers used during the factorization are stored in
+*          rows KL+KU+2 to 2*KL+KU+1.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= 2*KL+KU+1.
+*
+*  IPIV    (input) INTEGER array, dimension (N)
+*          The pivot indices; for 1 <= i <= N, row i of the matrix was
+*          interchanged with row IPIV(i).
+*
+*  B       (input/output) COMPLEX*16 array, dimension (LDB,NRHS)
+*          On entry, the right hand side matrix B.
+*          On exit, the solution matrix X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX*16         ONE
+      PARAMETER          ( ONE = ( 1.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LNOTI, NOTRAN
+      INTEGER            I, J, KD, L, LM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZGEMV, ZGERU, ZLACGV, ZSWAP, ZTBSV
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      NOTRAN = LSAME( TRANS, 'N' )
+      IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT.
+     $    LSAME( TRANS, 'C' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( KL.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( KU.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( LDAB.LT.( 2*KL+KU+1 ) ) THEN
+         INFO = -7
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -10
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZGBTRS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 )
+     $   RETURN
+*
+      KD = KU + KL + 1
+      LNOTI = KL.GT.0
+*
+      IF( NOTRAN ) THEN
+*
+*        Solve  A*X = B.
+*
+*        Solve L*X = B, overwriting B with X.
+*
+*        L is represented as a product of permutations and unit lower
+*        triangular matrices L = P(1) * L(1) * ... * P(n-1) * L(n-1),
+*        where each transformation L(i) is a rank-one modification of
+*        the identity matrix.
+*
+         IF( LNOTI ) THEN
+            DO 10 J = 1, N - 1
+               LM = MIN( KL, N-J )
+               L = IPIV( J )
+               IF( L.NE.J )
+     $            CALL ZSWAP( NRHS, B( L, 1 ), LDB, B( J, 1 ), LDB )
+               CALL ZGERU( LM, NRHS, -ONE, AB( KD+1, J ), 1, B( J, 1 ),
+     $                     LDB, B( J+1, 1 ), LDB )
+   10       CONTINUE
+         END IF
+*
+         DO 20 I = 1, NRHS
+*
+*           Solve U*X = B, overwriting B with X.
+*
+            CALL ZTBSV( 'Upper', 'No transpose', 'Non-unit', N, KL+KU,
+     $                  AB, LDAB, B( 1, I ), 1 )
+   20    CONTINUE
+*
+      ELSE IF( LSAME( TRANS, 'T' ) ) THEN
+*
+*        Solve A**T * X = B.
+*
+         DO 30 I = 1, NRHS
+*
+*           Solve U**T * X = B, overwriting B with X.
+*
+            CALL ZTBSV( 'Upper', 'Transpose', 'Non-unit', N, KL+KU, AB,
+     $                  LDAB, B( 1, I ), 1 )
+   30    CONTINUE
+*
+*        Solve L**T * X = B, overwriting B with X.
+*
+         IF( LNOTI ) THEN
+            DO 40 J = N - 1, 1, -1
+               LM = MIN( KL, N-J )
+               CALL ZGEMV( 'Transpose', LM, NRHS, -ONE, B( J+1, 1 ),
+     $                     LDB, AB( KD+1, J ), 1, ONE, B( J, 1 ), LDB )
+               L = IPIV( J )
+               IF( L.NE.J )
+     $            CALL ZSWAP( NRHS, B( L, 1 ), LDB, B( J, 1 ), LDB )
+   40       CONTINUE
+         END IF
+*
+      ELSE
+*
+*        Solve A**H * X = B.
+*
+         DO 50 I = 1, NRHS
+*
+*           Solve U**H * X = B, overwriting B with X.
+*
+            CALL ZTBSV( 'Upper', 'Conjugate transpose', 'Non-unit', N,
+     $                  KL+KU, AB, LDAB, B( 1, I ), 1 )
+   50    CONTINUE
+*
+*        Solve L**H * X = B, overwriting B with X.
+*
+         IF( LNOTI ) THEN
+            DO 60 J = N - 1, 1, -1
+               LM = MIN( KL, N-J )
+               CALL ZLACGV( NRHS, B( J, 1 ), LDB )
+               CALL ZGEMV( 'Conjugate transpose', LM, NRHS, -ONE,
+     $                     B( J+1, 1 ), LDB, AB( KD+1, J ), 1, ONE,
+     $                     B( J, 1 ), LDB )
+               CALL ZLACGV( NRHS, B( J, 1 ), LDB )
+               L = IPIV( J )
+               IF( L.NE.J )
+     $            CALL ZSWAP( NRHS, B( L, 1 ), LDB, B( J, 1 ), LDB )
+   60       CONTINUE
+         END IF
+      END IF
+      RETURN
+*
+*     End of ZGBTRS
+*
+      END
diff --git a/costa/native/external/lapack/zgebak.f b/costa/native/external/lapack/zgebak.f
new file mode 100644
index 000000000..e87616905
--- /dev/null
+++ b/costa/native/external/lapack/zgebak.f
@@ -0,0 +1,190 @@
+      SUBROUTINE ZGEBAK( JOB, SIDE, N, ILO, IHI, SCALE, M, V, LDV,
+     $                   INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOB, SIDE
+      INTEGER            IHI, ILO, INFO, LDV, M, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   SCALE( * )
+      COMPLEX*16         V( LDV, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZGEBAK forms the right or left eigenvectors of a complex general
+*  matrix by backward transformation on the computed eigenvectors of the
+*  balanced matrix output by ZGEBAL.
+*
+*  Arguments
+*  =========
+*
+*  JOB     (input) CHARACTER*1
+*          Specifies the type of backward transformation required:
+*          = 'N', do nothing, return immediately;
+*          = 'P', do backward transformation for permutation only;
+*          = 'S', do backward transformation for scaling only;
+*          = 'B', do backward transformations for both permutation and
+*                 scaling.
+*          JOB must be the same as the argument JOB supplied to ZGEBAL.
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'R':  V contains right eigenvectors;
+*          = 'L':  V contains left eigenvectors.
+*
+*  N       (input) INTEGER
+*          The number of rows of the matrix V.  N >= 0.
+*
+*  ILO     (input) INTEGER
+*  IHI     (input) INTEGER
+*          The integers ILO and IHI determined by ZGEBAL.
+*          1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
+*
+*  SCALE   (input) DOUBLE PRECISION array, dimension (N)
+*          Details of the permutation and scaling factors, as returned
+*          by ZGEBAL.
+*
+*  M       (input) INTEGER
+*          The number of columns of the matrix V.  M >= 0.
+*
+*  V       (input/output) COMPLEX*16 array, dimension (LDV,M)
+*          On entry, the matrix of right or left eigenvectors to be
+*          transformed, as returned by ZHSEIN or ZTREVC.
+*          On exit, V is overwritten by the transformed eigenvectors.
+*
+*  LDV     (input) INTEGER
+*          The leading dimension of the array V. LDV >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE
+      PARAMETER          ( ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LEFTV, RIGHTV
+      INTEGER            I, II, K
+      DOUBLE PRECISION   S
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZDSCAL, ZSWAP
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode and Test the input parameters
+*
+      RIGHTV = LSAME( SIDE, 'R' )
+      LEFTV = LSAME( SIDE, 'L' )
+*
+      INFO = 0
+      IF( .NOT.LSAME( JOB, 'N' ) .AND. .NOT.LSAME( JOB, 'P' ) .AND.
+     $    .NOT.LSAME( JOB, 'S' ) .AND. .NOT.LSAME( JOB, 'B' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.RIGHTV .AND. .NOT.LEFTV ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, N ) ) THEN
+         INFO = -4
+      ELSE IF( IHI.LT.MIN( ILO, N ) .OR. IHI.GT.N ) THEN
+         INFO = -5
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -7
+      ELSE IF( LDV.LT.MAX( 1, N ) ) THEN
+         INFO = -9
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZGEBAK', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+      IF( M.EQ.0 )
+     $   RETURN
+      IF( LSAME( JOB, 'N' ) )
+     $   RETURN
+*
+      IF( ILO.EQ.IHI )
+     $   GO TO 30
+*
+*     Backward balance
+*
+      IF( LSAME( JOB, 'S' ) .OR. LSAME( JOB, 'B' ) ) THEN
+*
+         IF( RIGHTV ) THEN
+            DO 10 I = ILO, IHI
+               S = SCALE( I )
+               CALL ZDSCAL( M, S, V( I, 1 ), LDV )
+   10       CONTINUE
+         END IF
+*
+         IF( LEFTV ) THEN
+            DO 20 I = ILO, IHI
+               S = ONE / SCALE( I )
+               CALL ZDSCAL( M, S, V( I, 1 ), LDV )
+   20       CONTINUE
+         END IF
+*
+      END IF
+*
+*     Backward permutation
+*
+*     For  I = ILO-1 step -1 until 1,
+*              IHI+1 step 1 until N do --
+*
+   30 CONTINUE
+      IF( LSAME( JOB, 'P' ) .OR. LSAME( JOB, 'B' ) ) THEN
+         IF( RIGHTV ) THEN
+            DO 40 II = 1, N
+               I = II
+               IF( I.GE.ILO .AND. I.LE.IHI )
+     $            GO TO 40
+               IF( I.LT.ILO )
+     $            I = ILO - II
+               K = SCALE( I )
+               IF( K.EQ.I )
+     $            GO TO 40
+               CALL ZSWAP( M, V( I, 1 ), LDV, V( K, 1 ), LDV )
+   40       CONTINUE
+         END IF
+*
+         IF( LEFTV ) THEN
+            DO 50 II = 1, N
+               I = II
+               IF( I.GE.ILO .AND. I.LE.IHI )
+     $            GO TO 50
+               IF( I.LT.ILO )
+     $            I = ILO - II
+               K = SCALE( I )
+               IF( K.EQ.I )
+     $            GO TO 50
+               CALL ZSWAP( M, V( I, 1 ), LDV, V( K, 1 ), LDV )
+   50       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of ZGEBAK
+*
+      END
diff --git a/costa/native/external/lapack/zgebal.f b/costa/native/external/lapack/zgebal.f
new file mode 100644
index 000000000..562abc7e3
--- /dev/null
+++ b/costa/native/external/lapack/zgebal.f
@@ -0,0 +1,331 @@
+      SUBROUTINE ZGEBAL( JOB, N, A, LDA, ILO, IHI, SCALE, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOB
+      INTEGER            IHI, ILO, INFO, LDA, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   SCALE( * )
+      COMPLEX*16         A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZGEBAL balances a general complex matrix A.  This involves, first,
+*  permuting A by a similarity transformation to isolate eigenvalues
+*  in the first 1 to ILO-1 and last IHI+1 to N elements on the
+*  diagonal; and second, applying a diagonal similarity transformation
+*  to rows and columns ILO to IHI to make the rows and columns as
+*  close in norm as possible.  Both steps are optional.
+*
+*  Balancing may reduce the 1-norm of the matrix, and improve the
+*  accuracy of the computed eigenvalues and/or eigenvectors.
+*
+*  Arguments
+*  =========
+*
+*  JOB     (input) CHARACTER*1
+*          Specifies the operations to be performed on A:
+*          = 'N':  none:  simply set ILO = 1, IHI = N, SCALE(I) = 1.0
+*                  for i = 1,...,N;
+*          = 'P':  permute only;
+*          = 'S':  scale only;
+*          = 'B':  both permute and scale.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+*          On entry, the input matrix A.
+*          On exit,  A is overwritten by the balanced matrix.
+*          If JOB = 'N', A is not referenced.
+*          See Further Details.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  ILO     (output) INTEGER
+*  IHI     (output) INTEGER
+*          ILO and IHI are set to integers such that on exit
+*          A(i,j) = 0 if i > j and j = 1,...,ILO-1 or I = IHI+1,...,N.
+*          If JOB = 'N' or 'S', ILO = 1 and IHI = N.
+*
+*  SCALE   (output) DOUBLE PRECISION array, dimension (N)
+*          Details of the permutations and scaling factors applied to
+*          A.  If P(j) is the index of the row and column interchanged
+*          with row and column j and D(j) is the scaling factor
+*          applied to row and column j, then
+*          SCALE(j) = P(j)    for j = 1,...,ILO-1
+*                   = D(j)    for j = ILO,...,IHI
+*                   = P(j)    for j = IHI+1,...,N.
+*          The order in which the interchanges are made is N to IHI+1,
+*          then 1 to ILO-1.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*
+*  Further Details
+*  ===============
+*
+*  The permutations consist of row and column interchanges which put
+*  the matrix in the form
+*
+*             ( T1   X   Y  )
+*     P A P = (  0   B   Z  )
+*             (  0   0   T2 )
+*
+*  where T1 and T2 are upper triangular matrices whose eigenvalues lie
+*  along the diagonal.  The column indices ILO and IHI mark the starting
+*  and ending columns of the submatrix B. Balancing consists of applying
+*  a diagonal similarity transformation inv(D) * B * D to make the
+*  1-norms of each row of B and its corresponding column nearly equal.
+*  The output matrix is
+*
+*     ( T1     X*D          Y    )
+*     (  0  inv(D)*B*D  inv(D)*Z ).
+*     (  0      0           T2   )
+*
+*  Information about the permutations P and the diagonal matrix D is
+*  returned in the vector SCALE.
+*
+*  This subroutine is based on the EISPACK routine CBAL.
+*
+*  Modified by Tzu-Yi Chen, Computer Science Division, University of
+*    California at Berkeley, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+      DOUBLE PRECISION   SCLFAC
+      PARAMETER          ( SCLFAC = 0.8D+1 )
+      DOUBLE PRECISION   FACTOR
+      PARAMETER          ( FACTOR = 0.95D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOCONV
+      INTEGER            I, ICA, IEXC, IRA, J, K, L, M
+      DOUBLE PRECISION   C, CA, F, G, R, RA, S, SFMAX1, SFMAX2, SFMIN1,
+     $                   SFMIN2
+      COMPLEX*16         CDUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            IZAMAX
+      DOUBLE PRECISION   DLAMCH
+      EXTERNAL           LSAME, IZAMAX, DLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZDSCAL, ZSWAP
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, DIMAG, MAX, MIN
+*     ..
+*     .. Statement Functions ..
+      DOUBLE PRECISION   CABS1
+*     ..
+*     .. Statement Function definitions ..
+      CABS1( CDUM ) = ABS( DBLE( CDUM ) ) + ABS( DIMAG( CDUM ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters
+*
+      INFO = 0
+      IF( .NOT.LSAME( JOB, 'N' ) .AND. .NOT.LSAME( JOB, 'P' ) .AND.
+     $    .NOT.LSAME( JOB, 'S' ) .AND. .NOT.LSAME( JOB, 'B' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZGEBAL', -INFO )
+         RETURN
+      END IF
+*
+      K = 1
+      L = N
+*
+      IF( N.EQ.0 )
+     $   GO TO 210
+*
+      IF( LSAME( JOB, 'N' ) ) THEN
+         DO 10 I = 1, N
+            SCALE( I ) = ONE
+   10    CONTINUE
+         GO TO 210
+      END IF
+*
+      IF( LSAME( JOB, 'S' ) )
+     $   GO TO 120
+*
+*     Permutation to isolate eigenvalues if possible
+*
+      GO TO 50
+*
+*     Row and column exchange.
+*
+   20 CONTINUE
+      SCALE( M ) = J
+      IF( J.EQ.M )
+     $   GO TO 30
+*
+      CALL ZSWAP( L, A( 1, J ), 1, A( 1, M ), 1 )
+      CALL ZSWAP( N-K+1, A( J, K ), LDA, A( M, K ), LDA )
+*
+   30 CONTINUE
+      GO TO ( 40, 80 )IEXC
+*
+*     Search for rows isolating an eigenvalue and push them down.
+*
+   40 CONTINUE
+      IF( L.EQ.1 )
+     $   GO TO 210
+      L = L - 1
+*
+   50 CONTINUE
+      DO 70 J = L, 1, -1
+*
+         DO 60 I = 1, L
+            IF( I.EQ.J )
+     $         GO TO 60
+            IF( DBLE( A( J, I ) ).NE.ZERO .OR. DIMAG( A( J, I ) ).NE.
+     $          ZERO )GO TO 70
+   60    CONTINUE
+*
+         M = L
+         IEXC = 1
+         GO TO 20
+   70 CONTINUE
+*
+      GO TO 90
+*
+*     Search for columns isolating an eigenvalue and push them left.
+*
+   80 CONTINUE
+      K = K + 1
+*
+   90 CONTINUE
+      DO 110 J = K, L
+*
+         DO 100 I = K, L
+            IF( I.EQ.J )
+     $         GO TO 100
+            IF( DBLE( A( I, J ) ).NE.ZERO .OR. DIMAG( A( I, J ) ).NE.
+     $          ZERO )GO TO 110
+  100    CONTINUE
+*
+         M = K
+         IEXC = 2
+         GO TO 20
+  110 CONTINUE
+*
+  120 CONTINUE
+      DO 130 I = K, L
+         SCALE( I ) = ONE
+  130 CONTINUE
+*
+      IF( LSAME( JOB, 'P' ) )
+     $   GO TO 210
+*
+*     Balance the submatrix in rows K to L.
+*
+*     Iterative loop for norm reduction
+*
+      SFMIN1 = DLAMCH( 'S' ) / DLAMCH( 'P' )
+      SFMAX1 = ONE / SFMIN1
+      SFMIN2 = SFMIN1*SCLFAC
+      SFMAX2 = ONE / SFMIN2
+  140 CONTINUE
+      NOCONV = .FALSE.
+*
+      DO 200 I = K, L
+         C = ZERO
+         R = ZERO
+*
+         DO 150 J = K, L
+            IF( J.EQ.I )
+     $         GO TO 150
+            C = C + CABS1( A( J, I ) )
+            R = R + CABS1( A( I, J ) )
+  150    CONTINUE
+         ICA = IZAMAX( L, A( 1, I ), 1 )
+         CA = ABS( A( ICA, I ) )
+         IRA = IZAMAX( N-K+1, A( I, K ), LDA )
+         RA = ABS( A( I, IRA+K-1 ) )
+*
+*        Guard against zero C or R due to underflow.
+*
+         IF( C.EQ.ZERO .OR. R.EQ.ZERO )
+     $      GO TO 200
+         G = R / SCLFAC
+         F = ONE
+         S = C + R
+  160    CONTINUE
+         IF( C.GE.G .OR. MAX( F, C, CA ).GE.SFMAX2 .OR.
+     $       MIN( R, G, RA ).LE.SFMIN2 )GO TO 170
+         F = F*SCLFAC
+         C = C*SCLFAC
+         CA = CA*SCLFAC
+         R = R / SCLFAC
+         G = G / SCLFAC
+         RA = RA / SCLFAC
+         GO TO 160
+*
+  170    CONTINUE
+         G = C / SCLFAC
+  180    CONTINUE
+         IF( G.LT.R .OR. MAX( R, RA ).GE.SFMAX2 .OR.
+     $       MIN( F, C, G, CA ).LE.SFMIN2 )GO TO 190
+         F = F / SCLFAC
+         C = C / SCLFAC
+         G = G / SCLFAC
+         CA = CA / SCLFAC
+         R = R*SCLFAC
+         RA = RA*SCLFAC
+         GO TO 180
+*
+*        Now balance.
+*
+  190    CONTINUE
+         IF( ( C+R ).GE.FACTOR*S )
+     $      GO TO 200
+         IF( F.LT.ONE .AND. SCALE( I ).LT.ONE ) THEN
+            IF( F*SCALE( I ).LE.SFMIN1 )
+     $         GO TO 200
+         END IF
+         IF( F.GT.ONE .AND. SCALE( I ).GT.ONE ) THEN
+            IF( SCALE( I ).GE.SFMAX1 / F )
+     $         GO TO 200
+         END IF
+         G = ONE / F
+         SCALE( I ) = SCALE( I )*F
+         NOCONV = .TRUE.
+*
+         CALL ZDSCAL( N-K+1, G, A( I, K ), LDA )
+         CALL ZDSCAL( L, F, A( 1, I ), 1 )
+*
+  200 CONTINUE
+*
+      IF( NOCONV )
+     $   GO TO 140
+*
+  210 CONTINUE
+      ILO = K
+      IHI = L
+*
+      RETURN
+*
+*     End of ZGEBAL
+*
+      END
diff --git a/costa/native/external/lapack/zgebd2.f b/costa/native/external/lapack/zgebd2.f
new file mode 100644
index 000000000..5552fcf17
--- /dev/null
+++ b/costa/native/external/lapack/zgebd2.f
@@ -0,0 +1,249 @@
+      SUBROUTINE ZGEBD2( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, M, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   D( * ), E( * )
+      COMPLEX*16         A( LDA, * ), TAUP( * ), TAUQ( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZGEBD2 reduces a complex general m by n matrix A to upper or lower
+*  real bidiagonal form B by a unitary transformation: Q' * A * P = B.
+*
+*  If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows in the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns in the matrix A.  N >= 0.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+*          On entry, the m by n general matrix to be reduced.
+*          On exit,
+*          if m >= n, the diagonal and the first superdiagonal are
+*            overwritten with the upper bidiagonal matrix B; the
+*            elements below the diagonal, with the array TAUQ, represent
+*            the unitary matrix Q as a product of elementary
+*            reflectors, and the elements above the first superdiagonal,
+*            with the array TAUP, represent the unitary matrix P as
+*            a product of elementary reflectors;
+*          if m < n, the diagonal and the first subdiagonal are
+*            overwritten with the lower bidiagonal matrix B; the
+*            elements below the first subdiagonal, with the array TAUQ,
+*            represent the unitary matrix Q as a product of
+*            elementary reflectors, and the elements above the diagonal,
+*            with the array TAUP, represent the unitary matrix P as
+*            a product of elementary reflectors.
+*          See Further Details.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  D       (output) DOUBLE PRECISION array, dimension (min(M,N))
+*          The diagonal elements of the bidiagonal matrix B:
+*          D(i) = A(i,i).
+*
+*  E       (output) DOUBLE PRECISION array, dimension (min(M,N)-1)
+*          The off-diagonal elements of the bidiagonal matrix B:
+*          if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1;
+*          if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1.
+*
+*  TAUQ    (output) COMPLEX*16 array dimension (min(M,N))
+*          The scalar factors of the elementary reflectors which
+*          represent the unitary matrix Q. See Further Details.
+*
+*  TAUP    (output) COMPLEX*16 array, dimension (min(M,N))
+*          The scalar factors of the elementary reflectors which
+*          represent the unitary matrix P. See Further Details.
+*
+*  WORK    (workspace) COMPLEX*16 array, dimension (max(M,N))
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value.
+*
+*  Further Details
+*  ===============
+*
+*  The matrices Q and P are represented as products of elementary
+*  reflectors:
+*
+*  If m >= n,
+*
+*     Q = H(1) H(2) . . . H(n)  and  P = G(1) G(2) . . . G(n-1)
+*
+*  Each H(i) and G(i) has the form:
+*
+*     H(i) = I - tauq * v * v'  and G(i) = I - taup * u * u'
+*
+*  where tauq and taup are complex scalars, and v and u are complex
+*  vectors; v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in
+*  A(i+1:m,i); u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in
+*  A(i,i+2:n); tauq is stored in TAUQ(i) and taup in TAUP(i).
+*
+*  If m < n,
+*
+*     Q = H(1) H(2) . . . H(m-1)  and  P = G(1) G(2) . . . G(m)
+*
+*  Each H(i) and G(i) has the form:
+*
+*     H(i) = I - tauq * v * v'  and G(i) = I - taup * u * u'
+*
+*  where tauq and taup are complex scalars, v and u are complex vectors;
+*  v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i);
+*  u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n);
+*  tauq is stored in TAUQ(i) and taup in TAUP(i).
+*
+*  The contents of A on exit are illustrated by the following examples:
+*
+*  m = 6 and n = 5 (m > n):          m = 5 and n = 6 (m < n):
+*
+*    (  d   e   u1  u1  u1 )           (  d   u1  u1  u1  u1  u1 )
+*    (  v1  d   e   u2  u2 )           (  e   d   u2  u2  u2  u2 )
+*    (  v1  v2  d   e   u3 )           (  v1  e   d   u3  u3  u3 )
+*    (  v1  v2  v3  d   e  )           (  v1  v2  e   d   u4  u4 )
+*    (  v1  v2  v3  v4  d  )           (  v1  v2  v3  e   d   u5 )
+*    (  v1  v2  v3  v4  v5 )
+*
+*  where d and e denote diagonal and off-diagonal elements of B, vi
+*  denotes an element of the vector defining H(i), and ui an element of
+*  the vector defining G(i).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX*16         ZERO, ONE
+      PARAMETER          ( ZERO = ( 0.0D+0, 0.0D+0 ),
+     $                   ONE = ( 1.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I
+      COMPLEX*16         ALPHA
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZLACGV, ZLARF, ZLARFG
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          DCONJG, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters
+*
+      INFO = 0
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.LT.0 ) THEN
+         CALL XERBLA( 'ZGEBD2', -INFO )
+         RETURN
+      END IF
+*
+      IF( M.GE.N ) THEN
+*
+*        Reduce to upper bidiagonal form
+*
+         DO 10 I = 1, N
+*
+*           Generate elementary reflector H(i) to annihilate A(i+1:m,i)
+*
+            ALPHA = A( I, I )
+            CALL ZLARFG( M-I+1, ALPHA, A( MIN( I+1, M ), I ), 1,
+     $                   TAUQ( I ) )
+            D( I ) = ALPHA
+            A( I, I ) = ONE
+*
+*           Apply H(i)' to A(i:m,i+1:n) from the left
+*
+            CALL ZLARF( 'Left', M-I+1, N-I, A( I, I ), 1,
+     $                  DCONJG( TAUQ( I ) ), A( I, I+1 ), LDA, WORK )
+            A( I, I ) = D( I )
+*
+            IF( I.LT.N ) THEN
+*
+*              Generate elementary reflector G(i) to annihilate
+*              A(i,i+2:n)
+*
+               CALL ZLACGV( N-I, A( I, I+1 ), LDA )
+               ALPHA = A( I, I+1 )
+               CALL ZLARFG( N-I, ALPHA, A( I, MIN( I+2, N ) ), LDA,
+     $                      TAUP( I ) )
+               E( I ) = ALPHA
+               A( I, I+1 ) = ONE
+*
+*              Apply G(i) to A(i+1:m,i+1:n) from the right
+*
+               CALL ZLARF( 'Right', M-I, N-I, A( I, I+1 ), LDA,
+     $                     TAUP( I ), A( I+1, I+1 ), LDA, WORK )
+               CALL ZLACGV( N-I, A( I, I+1 ), LDA )
+               A( I, I+1 ) = E( I )
+            ELSE
+               TAUP( I ) = ZERO
+            END IF
+   10    CONTINUE
+      ELSE
+*
+*        Reduce to lower bidiagonal form
+*
+         DO 20 I = 1, M
+*
+*           Generate elementary reflector G(i) to annihilate A(i,i+1:n)
+*
+            CALL ZLACGV( N-I+1, A( I, I ), LDA )
+            ALPHA = A( I, I )
+            CALL ZLARFG( N-I+1, ALPHA, A( I, MIN( I+1, N ) ), LDA,
+     $                   TAUP( I ) )
+            D( I ) = ALPHA
+            A( I, I ) = ONE
+*
+*           Apply G(i) to A(i+1:m,i:n) from the right
+*
+            CALL ZLARF( 'Right', M-I, N-I+1, A( I, I ), LDA, TAUP( I ),
+     $                  A( MIN( I+1, M ), I ), LDA, WORK )
+            CALL ZLACGV( N-I+1, A( I, I ), LDA )
+            A( I, I ) = D( I )
+*
+            IF( I.LT.M ) THEN
+*
+*              Generate elementary reflector H(i) to annihilate
+*              A(i+2:m,i)
+*
+               ALPHA = A( I+1, I )
+               CALL ZLARFG( M-I, ALPHA, A( MIN( I+2, M ), I ), 1,
+     $                      TAUQ( I ) )
+               E( I ) = ALPHA
+               A( I+1, I ) = ONE
+*
+*              Apply H(i)' to A(i+1:m,i+1:n) from the left
+*
+               CALL ZLARF( 'Left', M-I, N-I, A( I+1, I ), 1,
+     $                     DCONJG( TAUQ( I ) ), A( I+1, I+1 ), LDA,
+     $                     WORK )
+               A( I+1, I ) = E( I )
+            ELSE
+               TAUQ( I ) = ZERO
+            END IF
+   20    CONTINUE
+      END IF
+      RETURN
+*
+*     End of ZGEBD2
+*
+      END
diff --git a/costa/native/external/lapack/zgebrd.f b/costa/native/external/lapack/zgebrd.f
new file mode 100644
index 000000000..34b4ba6be
--- /dev/null
+++ b/costa/native/external/lapack/zgebrd.f
@@ -0,0 +1,269 @@
+      SUBROUTINE ZGEBRD( M, N, A, LDA, D, E, TAUQ, TAUP, WORK, LWORK,
+     $                   INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, LWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   D( * ), E( * )
+      COMPLEX*16         A( LDA, * ), TAUP( * ), TAUQ( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZGEBRD reduces a general complex M-by-N matrix A to upper or lower
+*  bidiagonal form B by a unitary transformation: Q**H * A * P = B.
+*
+*  If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows in the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns in the matrix A.  N >= 0.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+*          On entry, the M-by-N general matrix to be reduced.
+*          On exit,
+*          if m >= n, the diagonal and the first superdiagonal are
+*            overwritten with the upper bidiagonal matrix B; the
+*            elements below the diagonal, with the array TAUQ, represent
+*            the unitary matrix Q as a product of elementary
+*            reflectors, and the elements above the first superdiagonal,
+*            with the array TAUP, represent the unitary matrix P as
+*            a product of elementary reflectors;
+*          if m < n, the diagonal and the first subdiagonal are
+*            overwritten with the lower bidiagonal matrix B; the
+*            elements below the first subdiagonal, with the array TAUQ,
+*            represent the unitary matrix Q as a product of
+*            elementary reflectors, and the elements above the diagonal,
+*            with the array TAUP, represent the unitary matrix P as
+*            a product of elementary reflectors.
+*          See Further Details.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  D       (output) DOUBLE PRECISION array, dimension (min(M,N))
+*          The diagonal elements of the bidiagonal matrix B:
+*          D(i) = A(i,i).
+*
+*  E       (output) DOUBLE PRECISION array, dimension (min(M,N)-1)
+*          The off-diagonal elements of the bidiagonal matrix B:
+*          if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1;
+*          if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1.
+*
+*  TAUQ    (output) COMPLEX*16 array dimension (min(M,N))
+*          The scalar factors of the elementary reflectors which
+*          represent the unitary matrix Q. See Further Details.
+*
+*  TAUP    (output) COMPLEX*16 array, dimension (min(M,N))
+*          The scalar factors of the elementary reflectors which
+*          represent the unitary matrix P. See Further Details.
+*
+*  WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The length of the array WORK.  LWORK >= max(1,M,N).
+*          For optimum performance LWORK >= (M+N)*NB, where NB
+*          is the optimal blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*
+*  Further Details
+*  ===============
+*
+*  The matrices Q and P are represented as products of elementary
+*  reflectors:
+*
+*  If m >= n,
+*
+*     Q = H(1) H(2) . . . H(n)  and  P = G(1) G(2) . . . G(n-1)
+*
+*  Each H(i) and G(i) has the form:
+*
+*     H(i) = I - tauq * v * v'  and G(i) = I - taup * u * u'
+*
+*  where tauq and taup are complex scalars, and v and u are complex
+*  vectors; v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in
+*  A(i+1:m,i); u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in
+*  A(i,i+2:n); tauq is stored in TAUQ(i) and taup in TAUP(i).
+*
+*  If m < n,
+*
+*     Q = H(1) H(2) . . . H(m-1)  and  P = G(1) G(2) . . . G(m)
+*
+*  Each H(i) and G(i) has the form:
+*
+*     H(i) = I - tauq * v * v'  and G(i) = I - taup * u * u'
+*
+*  where tauq and taup are complex scalars, and v and u are complex
+*  vectors; v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in
+*  A(i+2:m,i); u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in
+*  A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i).
+*
+*  The contents of A on exit are illustrated by the following examples:
+*
+*  m = 6 and n = 5 (m > n):          m = 5 and n = 6 (m < n):
+*
+*    (  d   e   u1  u1  u1 )           (  d   u1  u1  u1  u1  u1 )
+*    (  v1  d   e   u2  u2 )           (  e   d   u2  u2  u2  u2 )
+*    (  v1  v2  d   e   u3 )           (  v1  e   d   u3  u3  u3 )
+*    (  v1  v2  v3  d   e  )           (  v1  v2  e   d   u4  u4 )
+*    (  v1  v2  v3  v4  d  )           (  v1  v2  v3  e   d   u5 )
+*    (  v1  v2  v3  v4  v5 )
+*
+*  where d and e denote diagonal and off-diagonal elements of B, vi
+*  denotes an element of the vector defining H(i), and ui an element of
+*  the vector defining G(i).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX*16         ONE
+      PARAMETER          ( ONE = ( 1.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            I, IINFO, J, LDWRKX, LDWRKY, LWKOPT, MINMN, NB,
+     $                   NBMIN, NX
+      DOUBLE PRECISION   WS
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZGEBD2, ZGEMM, ZLABRD
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          DBLE, MAX, MIN
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters
+*
+      INFO = 0
+      NB = MAX( 1, ILAENV( 1, 'ZGEBRD', ' ', M, N, -1, -1 ) )
+      LWKOPT = ( M+N )*NB
+      WORK( 1 ) = DBLE( LWKOPT )
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -4
+      ELSE IF( LWORK.LT.MAX( 1, M, N ) .AND. .NOT.LQUERY ) THEN
+         INFO = -10
+      END IF
+      IF( INFO.LT.0 ) THEN
+         CALL XERBLA( 'ZGEBRD', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      MINMN = MIN( M, N )
+      IF( MINMN.EQ.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      WS = MAX( M, N )
+      LDWRKX = M
+      LDWRKY = N
+*
+      IF( NB.GT.1 .AND. NB.LT.MINMN ) THEN
+*
+*        Set the crossover point NX.
+*
+         NX = MAX( NB, ILAENV( 3, 'ZGEBRD', ' ', M, N, -1, -1 ) )
+*
+*        Determine when to switch from blocked to unblocked code.
+*
+         IF( NX.LT.MINMN ) THEN
+            WS = ( M+N )*NB
+            IF( LWORK.LT.WS ) THEN
+*
+*              Not enough work space for the optimal NB, consider using
+*              a smaller block size.
+*
+               NBMIN = ILAENV( 2, 'ZGEBRD', ' ', M, N, -1, -1 )
+               IF( LWORK.GE.( M+N )*NBMIN ) THEN
+                  NB = LWORK / ( M+N )
+               ELSE
+                  NB = 1
+                  NX = MINMN
+               END IF
+            END IF
+         END IF
+      ELSE
+         NX = MINMN
+      END IF
+*
+      DO 30 I = 1, MINMN - NX, NB
+*
+*        Reduce rows and columns i:i+ib-1 to bidiagonal form and return
+*        the matrices X and Y which are needed to update the unreduced
+*        part of the matrix
+*
+         CALL ZLABRD( M-I+1, N-I+1, NB, A( I, I ), LDA, D( I ), E( I ),
+     $                TAUQ( I ), TAUP( I ), WORK, LDWRKX,
+     $                WORK( LDWRKX*NB+1 ), LDWRKY )
+*
+*        Update the trailing submatrix A(i+ib:m,i+ib:n), using
+*        an update of the form  A := A - V*Y' - X*U'
+*
+         CALL ZGEMM( 'No transpose', 'Conjugate transpose', M-I-NB+1,
+     $               N-I-NB+1, NB, -ONE, A( I+NB, I ), LDA,
+     $               WORK( LDWRKX*NB+NB+1 ), LDWRKY, ONE,
+     $               A( I+NB, I+NB ), LDA )
+         CALL ZGEMM( 'No transpose', 'No transpose', M-I-NB+1, N-I-NB+1,
+     $               NB, -ONE, WORK( NB+1 ), LDWRKX, A( I, I+NB ), LDA,
+     $               ONE, A( I+NB, I+NB ), LDA )
+*
+*        Copy diagonal and off-diagonal elements of B back into A
+*
+         IF( M.GE.N ) THEN
+            DO 10 J = I, I + NB - 1
+               A( J, J ) = D( J )
+               A( J, J+1 ) = E( J )
+   10       CONTINUE
+         ELSE
+            DO 20 J = I, I + NB - 1
+               A( J, J ) = D( J )
+               A( J+1, J ) = E( J )
+   20       CONTINUE
+         END IF
+   30 CONTINUE
+*
+*     Use unblocked code to reduce the remainder of the matrix
+*
+      CALL ZGEBD2( M-I+1, N-I+1, A( I, I ), LDA, D( I ), E( I ),
+     $             TAUQ( I ), TAUP( I ), WORK, IINFO )
+      WORK( 1 ) = WS
+      RETURN
+*
+*     End of ZGEBRD
+*
+      END
diff --git a/costa/native/external/lapack/zgecon.f b/costa/native/external/lapack/zgecon.f
new file mode 100644
index 000000000..0a09b0b11
--- /dev/null
+++ b/costa/native/external/lapack/zgecon.f
@@ -0,0 +1,189 @@
+      SUBROUTINE ZGECON( NORM, N, A, LDA, ANORM, RCOND, WORK, RWORK,
+     $                   INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993
+*
+*     .. Scalar Arguments ..
+      CHARACTER          NORM
+      INTEGER            INFO, LDA, N
+      DOUBLE PRECISION   ANORM, RCOND
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   RWORK( * )
+      COMPLEX*16         A( LDA, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZGECON estimates the reciprocal of the condition number of a general
+*  complex matrix A, in either the 1-norm or the infinity-norm, using
+*  the LU factorization computed by ZGETRF.
+*
+*  An estimate is obtained for norm(inv(A)), and the reciprocal of the
+*  condition number is computed as
+*     RCOND = 1 / ( norm(A) * norm(inv(A)) ).
+*
+*  Arguments
+*  =========
+*
+*  NORM    (input) CHARACTER*1
+*          Specifies whether the 1-norm condition number or the
+*          infinity-norm condition number is required:
+*          = '1' or 'O':  1-norm;
+*          = 'I':         Infinity-norm.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input) COMPLEX*16 array, dimension (LDA,N)
+*          The factors L and U from the factorization A = P*L*U
+*          as computed by ZGETRF.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  ANORM   (input) DOUBLE PRECISION
+*          If NORM = '1' or 'O', the 1-norm of the original matrix A.
+*          If NORM = 'I', the infinity-norm of the original matrix A.
+*
+*  RCOND   (output) DOUBLE PRECISION
+*          The reciprocal of the condition number of the matrix A,
+*          computed as RCOND = 1/(norm(A) * norm(inv(A))).
+*
+*  WORK    (workspace) COMPLEX*16 array, dimension (2*N)
+*
+*  RWORK   (workspace) DOUBLE PRECISION array, dimension (2*N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            ONENRM
+      CHARACTER          NORMIN
+      INTEGER            IX, KASE, KASE1
+      DOUBLE PRECISION   AINVNM, SCALE, SL, SMLNUM, SU
+      COMPLEX*16         ZDUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            IZAMAX
+      DOUBLE PRECISION   DLAMCH
+      EXTERNAL           LSAME, IZAMAX, DLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZDRSCL, ZLACON, ZLATRS
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, DIMAG, MAX
+*     ..
+*     .. Statement Functions ..
+      DOUBLE PRECISION   CABS1
+*     ..
+*     .. Statement Function definitions ..
+      CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      ONENRM = NORM.EQ.'1' .OR. LSAME( NORM, 'O' )
+      IF( .NOT.ONENRM .AND. .NOT.LSAME( NORM, 'I' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -4
+      ELSE IF( ANORM.LT.ZERO ) THEN
+         INFO = -5
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZGECON', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      RCOND = ZERO
+      IF( N.EQ.0 ) THEN
+         RCOND = ONE
+         RETURN
+      ELSE IF( ANORM.EQ.ZERO ) THEN
+         RETURN
+      END IF
+*
+      SMLNUM = DLAMCH( 'Safe minimum' )
+*
+*     Estimate the norm of inv(A).
+*
+      AINVNM = ZERO
+      NORMIN = 'N'
+      IF( ONENRM ) THEN
+         KASE1 = 1
+      ELSE
+         KASE1 = 2
+      END IF
+      KASE = 0
+   10 CONTINUE
+      CALL ZLACON( N, WORK( N+1 ), WORK, AINVNM, KASE )
+      IF( KASE.NE.0 ) THEN
+         IF( KASE.EQ.KASE1 ) THEN
+*
+*           Multiply by inv(L).
+*
+            CALL ZLATRS( 'Lower', 'No transpose', 'Unit', NORMIN, N, A,
+     $                   LDA, WORK, SL, RWORK, INFO )
+*
+*           Multiply by inv(U).
+*
+            CALL ZLATRS( 'Upper', 'No transpose', 'Non-unit', NORMIN, N,
+     $                   A, LDA, WORK, SU, RWORK( N+1 ), INFO )
+         ELSE
+*
+*           Multiply by inv(U').
+*
+            CALL ZLATRS( 'Upper', 'Conjugate transpose', 'Non-unit',
+     $                   NORMIN, N, A, LDA, WORK, SU, RWORK( N+1 ),
+     $                   INFO )
+*
+*           Multiply by inv(L').
+*
+            CALL ZLATRS( 'Lower', 'Conjugate transpose', 'Unit', NORMIN,
+     $                   N, A, LDA, WORK, SL, RWORK, INFO )
+         END IF
+*
+*        Divide X by 1/(SL*SU) if doing so will not cause overflow.
+*
+         SCALE = SL*SU
+         NORMIN = 'Y'
+         IF( SCALE.NE.ONE ) THEN
+            IX = IZAMAX( N, WORK, 1 )
+            IF( SCALE.LT.CABS1( WORK( IX ) )*SMLNUM .OR. SCALE.EQ.ZERO )
+     $         GO TO 20
+            CALL ZDRSCL( N, SCALE, WORK, 1 )
+         END IF
+         GO TO 10
+      END IF
+*
+*     Compute the estimate of the reciprocal condition number.
+*
+      IF( AINVNM.NE.ZERO )
+     $   RCOND = ( ONE / AINVNM ) / ANORM
+*
+   20 CONTINUE
+      RETURN
+*
+*     End of ZGECON
+*
+      END
diff --git a/costa/native/external/lapack/zgeequ.f b/costa/native/external/lapack/zgeequ.f
new file mode 100644
index 000000000..809e5f095
--- /dev/null
+++ b/costa/native/external/lapack/zgeequ.f
@@ -0,0 +1,234 @@
+      SUBROUTINE ZGEEQU( M, N, A, LDA, R, C, ROWCND, COLCND, AMAX,
+     $                   INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, M, N
+      DOUBLE PRECISION   AMAX, COLCND, ROWCND
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   C( * ), R( * )
+      COMPLEX*16         A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZGEEQU computes row and column scalings intended to equilibrate an
+*  M-by-N matrix A and reduce its condition number.  R returns the row
+*  scale factors and C the column scale factors, chosen to try to make
+*  the largest element in each row and column of the matrix B with
+*  elements B(i,j)=R(i)*A(i,j)*C(j) have absolute value 1.
+*
+*  R(i) and C(j) are restricted to be between SMLNUM = smallest safe
+*  number and BIGNUM = largest safe number.  Use of these scaling
+*  factors is not guaranteed to reduce the condition number of A but
+*  works well in practice.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  A       (input) COMPLEX*16 array, dimension (LDA,N)
+*          The M-by-N matrix whose equilibration factors are
+*          to be computed.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  R       (output) DOUBLE PRECISION array, dimension (M)
+*          If INFO = 0 or INFO > M, R contains the row scale factors
+*          for A.
+*
+*  C       (output) DOUBLE PRECISION array, dimension (N)
+*          If INFO = 0,  C contains the column scale factors for A.
+*
+*  ROWCND  (output) DOUBLE PRECISION
+*          If INFO = 0 or INFO > M, ROWCND contains the ratio of the
+*          smallest R(i) to the largest R(i).  If ROWCND >= 0.1 and
+*          AMAX is neither too large nor too small, it is not worth
+*          scaling by R.
+*
+*  COLCND  (output) DOUBLE PRECISION
+*          If INFO = 0, COLCND contains the ratio of the smallest
+*          C(i) to the largest C(i).  If COLCND >= 0.1, it is not
+*          worth scaling by C.
+*
+*  AMAX    (output) DOUBLE PRECISION
+*          Absolute value of largest matrix element.  If AMAX is very
+*          close to overflow or very close to underflow, the matrix
+*          should be scaled.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i,  and i is
+*                <= M:  the i-th row of A is exactly zero
+*                >  M:  the (i-M)-th column of A is exactly zero
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J
+      DOUBLE PRECISION   BIGNUM, RCMAX, RCMIN, SMLNUM
+      COMPLEX*16         ZDUM
+*     ..
+*     .. External Functions ..
+      DOUBLE PRECISION   DLAMCH
+      EXTERNAL           DLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, DIMAG, MAX, MIN
+*     ..
+*     .. Statement Functions ..
+      DOUBLE PRECISION   CABS1
+*     ..
+*     .. Statement Function definitions ..
+      CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZGEEQU', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 ) THEN
+         ROWCND = ONE
+         COLCND = ONE
+         AMAX = ZERO
+         RETURN
+      END IF
+*
+*     Get machine constants.
+*
+      SMLNUM = DLAMCH( 'S' )
+      BIGNUM = ONE / SMLNUM
+*
+*     Compute row scale factors.
+*
+      DO 10 I = 1, M
+         R( I ) = ZERO
+   10 CONTINUE
+*
+*     Find the maximum element in each row.
+*
+      DO 30 J = 1, N
+         DO 20 I = 1, M
+            R( I ) = MAX( R( I ), CABS1( A( I, J ) ) )
+   20    CONTINUE
+   30 CONTINUE
+*
+*     Find the maximum and minimum scale factors.
+*
+      RCMIN = BIGNUM
+      RCMAX = ZERO
+      DO 40 I = 1, M
+         RCMAX = MAX( RCMAX, R( I ) )
+         RCMIN = MIN( RCMIN, R( I ) )
+   40 CONTINUE
+      AMAX = RCMAX
+*
+      IF( RCMIN.EQ.ZERO ) THEN
+*
+*        Find the first zero scale factor and return an error code.
+*
+         DO 50 I = 1, M
+            IF( R( I ).EQ.ZERO ) THEN
+               INFO = I
+               RETURN
+            END IF
+   50    CONTINUE
+      ELSE
+*
+*        Invert the scale factors.
+*
+         DO 60 I = 1, M
+            R( I ) = ONE / MIN( MAX( R( I ), SMLNUM ), BIGNUM )
+   60    CONTINUE
+*
+*        Compute ROWCND = min(R(I)) / max(R(I))
+*
+         ROWCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM )
+      END IF
+*
+*     Compute column scale factors
+*
+      DO 70 J = 1, N
+         C( J ) = ZERO
+   70 CONTINUE
+*
+*     Find the maximum element in each column,
+*     assuming the row scaling computed above.
+*
+      DO 90 J = 1, N
+         DO 80 I = 1, M
+            C( J ) = MAX( C( J ), CABS1( A( I, J ) )*R( I ) )
+   80    CONTINUE
+   90 CONTINUE
+*
+*     Find the maximum and minimum scale factors.
+*
+      RCMIN = BIGNUM
+      RCMAX = ZERO
+      DO 100 J = 1, N
+         RCMIN = MIN( RCMIN, C( J ) )
+         RCMAX = MAX( RCMAX, C( J ) )
+  100 CONTINUE
+*
+      IF( RCMIN.EQ.ZERO ) THEN
+*
+*        Find the first zero scale factor and return an error code.
+*
+         DO 110 J = 1, N
+            IF( C( J ).EQ.ZERO ) THEN
+               INFO = M + J
+               RETURN
+            END IF
+  110    CONTINUE
+      ELSE
+*
+*        Invert the scale factors.
+*
+         DO 120 J = 1, N
+            C( J ) = ONE / MIN( MAX( C( J ), SMLNUM ), BIGNUM )
+  120    CONTINUE
+*
+*        Compute COLCND = min(C(J)) / max(C(J))
+*
+         COLCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM )
+      END IF
+*
+      RETURN
+*
+*     End of ZGEEQU
+*
+      END
diff --git a/costa/native/external/lapack/zgees.f b/costa/native/external/lapack/zgees.f
new file mode 100644
index 000000000..0f5d0b64b
--- /dev/null
+++ b/costa/native/external/lapack/zgees.f
@@ -0,0 +1,322 @@
+      SUBROUTINE ZGEES( JOBVS, SORT, SELECT, N, A, LDA, SDIM, W, VS,
+     $                  LDVS, WORK, LWORK, RWORK, BWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBVS, SORT
+      INTEGER            INFO, LDA, LDVS, LWORK, N, SDIM
+*     ..
+*     .. Array Arguments ..
+      LOGICAL            BWORK( * )
+      DOUBLE PRECISION   RWORK( * )
+      COMPLEX*16         A( LDA, * ), VS( LDVS, * ), W( * ), WORK( * )
+*     ..
+*     .. Function Arguments ..
+      LOGICAL            SELECT
+      EXTERNAL           SELECT
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZGEES computes for an N-by-N complex nonsymmetric matrix A, the
+*  eigenvalues, the Schur form T, and, optionally, the matrix of Schur
+*  vectors Z.  This gives the Schur factorization A = Z*T*(Z**H).
+*
+*  Optionally, it also orders the eigenvalues on the diagonal of the
+*  Schur form so that selected eigenvalues are at the top left.
+*  The leading columns of Z then form an orthonormal basis for the
+*  invariant subspace corresponding to the selected eigenvalues.
+*
+*  A complex matrix is in Schur form if it is upper triangular.
+*
+*  Arguments
+*  =========
+*
+*  JOBVS   (input) CHARACTER*1
+*          = 'N': Schur vectors are not computed;
+*          = 'V': Schur vectors are computed.
+*
+*  SORT    (input) CHARACTER*1
+*          Specifies whether or not to order the eigenvalues on the
+*          diagonal of the Schur form.
+*          = 'N': Eigenvalues are not ordered:
+*          = 'S': Eigenvalues are ordered (see SELECT).
+*
+*  SELECT  (input) LOGICAL FUNCTION of one COMPLEX*16 argument
+*          SELECT must be declared EXTERNAL in the calling subroutine.
+*          If SORT = 'S', SELECT is used to select eigenvalues to order
+*          to the top left of the Schur form.
+*          IF SORT = 'N', SELECT is not referenced.
+*          The eigenvalue W(j) is selected if SELECT(W(j)) is true.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A. N >= 0.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+*          On entry, the N-by-N matrix A.
+*          On exit, A has been overwritten by its Schur form T.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  SDIM    (output) INTEGER
+*          If SORT = 'N', SDIM = 0.
+*          If SORT = 'S', SDIM = number of eigenvalues for which
+*                         SELECT is true.
+*
+*  W       (output) COMPLEX*16 array, dimension (N)
+*          W contains the computed eigenvalues, in the same order that
+*          they appear on the diagonal of the output Schur form T.
+*
+*  VS      (output) COMPLEX*16 array, dimension (LDVS,N)
+*          If JOBVS = 'V', VS contains the unitary matrix Z of Schur
+*          vectors.
+*          If JOBVS = 'N', VS is not referenced.
+*
+*  LDVS    (input) INTEGER
+*          The leading dimension of the array VS.  LDVS >= 1; if
+*          JOBVS = 'V', LDVS >= N.
+*
+*  WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.  LWORK >= max(1,2*N).
+*          For good performance, LWORK must generally be larger.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  RWORK   (workspace) DOUBLE PRECISION array, dimension (N)
+*
+*  BWORK   (workspace) LOGICAL array, dimension (N)
+*          Not referenced if SORT = 'N'.
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value.
+*          > 0: if INFO = i, and i is
+*               <= N:  the QR algorithm failed to compute all the
+*                      eigenvalues; elements 1:ILO-1 and i+1:N of W
+*                      contain those eigenvalues which have converged;
+*                      if JOBVS = 'V', VS contains the matrix which
+*                      reduces A to its partially converged Schur form.
+*               = N+1: the eigenvalues could not be reordered because
+*                      some eigenvalues were too close to separate (the
+*                      problem is very ill-conditioned);
+*               = N+2: after reordering, roundoff changed values of
+*                      some complex eigenvalues so that leading
+*                      eigenvalues in the Schur form no longer satisfy
+*                      SELECT = .TRUE..  This could also be caused by
+*                      underflow due to scaling.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY, SCALEA, WANTST, WANTVS
+      INTEGER            HSWORK, I, IBAL, ICOND, IERR, IEVAL, IHI, ILO,
+     $                   ITAU, IWRK, K, MAXB, MAXWRK, MINWRK
+      DOUBLE PRECISION   ANRM, BIGNUM, CSCALE, EPS, S, SEP, SMLNUM
+*     ..
+*     .. Local Arrays ..
+      DOUBLE PRECISION   DUM( 1 )
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZCOPY, ZGEBAK, ZGEBAL, ZGEHRD, ZHSEQR,
+     $                   ZLACPY, ZLASCL, ZTRSEN, ZUNGHR
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      DOUBLE PRECISION   DLAMCH, ZLANGE
+      EXTERNAL           LSAME, ILAENV, DLAMCH, ZLANGE
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      LQUERY = ( LWORK.EQ.-1 )
+      WANTVS = LSAME( JOBVS, 'V' )
+      WANTST = LSAME( SORT, 'S' )
+      IF( ( .NOT.WANTVS ) .AND. ( .NOT.LSAME( JOBVS, 'N' ) ) ) THEN
+         INFO = -1
+      ELSE IF( ( .NOT.WANTST ) .AND. ( .NOT.LSAME( SORT, 'N' ) ) ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -6
+      ELSE IF( LDVS.LT.1 .OR. ( WANTVS .AND. LDVS.LT.N ) ) THEN
+         INFO = -10
+      END IF
+*
+*     Compute workspace
+*      (Note: Comments in the code beginning "Workspace:" describe the
+*       minimal amount of workspace needed at that point in the code,
+*       as well as the preferred amount for good performance.
+*       CWorkspace refers to complex workspace, and RWorkspace to real
+*       workspace. NB refers to the optimal block size for the
+*       immediately following subroutine, as returned by ILAENV.
+*       HSWORK refers to the workspace preferred by ZHSEQR, as
+*       calculated below. HSWORK is computed assuming ILO=1 and IHI=N,
+*       the worst case.)
+*
+      MINWRK = 1
+      IF( INFO.EQ.0 .AND. ( LWORK.GE.1 .OR. LQUERY ) ) THEN
+         MAXWRK = N + N*ILAENV( 1, 'ZGEHRD', ' ', N, 1, N, 0 )
+         MINWRK = MAX( 1, 2*N )
+         IF( .NOT.WANTVS ) THEN
+            MAXB = MAX( ILAENV( 8, 'ZHSEQR', 'SN', N, 1, N, -1 ), 2 )
+            K = MIN( MAXB, N, MAX( 2, ILAENV( 4, 'ZHSEQR', 'SN', N, 1,
+     $          N, -1 ) ) )
+            HSWORK = MAX( K*( K+2 ), 2*N )
+            MAXWRK = MAX( MAXWRK, HSWORK, 1 )
+         ELSE
+            MAXWRK = MAX( MAXWRK, N+( N-1 )*
+     $               ILAENV( 1, 'ZUNGHR', ' ', N, 1, N, -1 ) )
+            MAXB = MAX( ILAENV( 8, 'ZHSEQR', 'EN', N, 1, N, -1 ), 2 )
+            K = MIN( MAXB, N, MAX( 2, ILAENV( 4, 'ZHSEQR', 'EN', N, 1,
+     $          N, -1 ) ) )
+            HSWORK = MAX( K*( K+2 ), 2*N )
+            MAXWRK = MAX( MAXWRK, HSWORK, 1 )
+         END IF
+         WORK( 1 ) = MAXWRK
+      END IF
+      IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN
+         INFO = -12
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZGEES ', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 ) THEN
+         SDIM = 0
+         RETURN
+      END IF
+*
+*     Get machine constants
+*
+      EPS = DLAMCH( 'P' )
+      SMLNUM = DLAMCH( 'S' )
+      BIGNUM = ONE / SMLNUM
+      CALL DLABAD( SMLNUM, BIGNUM )
+      SMLNUM = SQRT( SMLNUM ) / EPS
+      BIGNUM = ONE / SMLNUM
+*
+*     Scale A if max element outside range [SMLNUM,BIGNUM]
+*
+      ANRM = ZLANGE( 'M', N, N, A, LDA, DUM )
+      SCALEA = .FALSE.
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
+         SCALEA = .TRUE.
+         CSCALE = SMLNUM
+      ELSE IF( ANRM.GT.BIGNUM ) THEN
+         SCALEA = .TRUE.
+         CSCALE = BIGNUM
+      END IF
+      IF( SCALEA )
+     $   CALL ZLASCL( 'G', 0, 0, ANRM, CSCALE, N, N, A, LDA, IERR )
+*
+*     Permute the matrix to make it more nearly triangular
+*     (CWorkspace: none)
+*     (RWorkspace: need N)
+*
+      IBAL = 1
+      CALL ZGEBAL( 'P', N, A, LDA, ILO, IHI, RWORK( IBAL ), IERR )
+*
+*     Reduce to upper Hessenberg form
+*     (CWorkspace: need 2*N, prefer N+N*NB)
+*     (RWorkspace: none)
+*
+      ITAU = 1
+      IWRK = N + ITAU
+      CALL ZGEHRD( N, ILO, IHI, A, LDA, WORK( ITAU ), WORK( IWRK ),
+     $             LWORK-IWRK+1, IERR )
+*
+      IF( WANTVS ) THEN
+*
+*        Copy Householder vectors to VS
+*
+         CALL ZLACPY( 'L', N, N, A, LDA, VS, LDVS )
+*
+*        Generate unitary matrix in VS
+*        (CWorkspace: need 2*N-1, prefer N+(N-1)*NB)
+*        (RWorkspace: none)
+*
+         CALL ZUNGHR( N, ILO, IHI, VS, LDVS, WORK( ITAU ), WORK( IWRK ),
+     $                LWORK-IWRK+1, IERR )
+      END IF
+*
+      SDIM = 0
+*
+*     Perform QR iteration, accumulating Schur vectors in VS if desired
+*     (CWorkspace: need 1, prefer HSWORK (see comments) )
+*     (RWorkspace: none)
+*
+      IWRK = ITAU
+      CALL ZHSEQR( 'S', JOBVS, N, ILO, IHI, A, LDA, W, VS, LDVS,
+     $             WORK( IWRK ), LWORK-IWRK+1, IEVAL )
+      IF( IEVAL.GT.0 )
+     $   INFO = IEVAL
+*
+*     Sort eigenvalues if desired
+*
+      IF( WANTST .AND. INFO.EQ.0 ) THEN
+         IF( SCALEA )
+     $      CALL ZLASCL( 'G', 0, 0, CSCALE, ANRM, N, 1, W, N, IERR )
+         DO 10 I = 1, N
+            BWORK( I ) = SELECT( W( I ) )
+   10    CONTINUE
+*
+*        Reorder eigenvalues and transform Schur vectors
+*        (CWorkspace: none)
+*        (RWorkspace: none)
+*
+         CALL ZTRSEN( 'N', JOBVS, BWORK, N, A, LDA, VS, LDVS, W, SDIM,
+     $                S, SEP, WORK( IWRK ), LWORK-IWRK+1, ICOND )
+      END IF
+*
+      IF( WANTVS ) THEN
+*
+*        Undo balancing
+*        (CWorkspace: none)
+*        (RWorkspace: need N)
+*
+         CALL ZGEBAK( 'P', 'R', N, ILO, IHI, RWORK( IBAL ), N, VS, LDVS,
+     $                IERR )
+      END IF
+*
+      IF( SCALEA ) THEN
+*
+*        Undo scaling for the Schur form of A
+*
+         CALL ZLASCL( 'U', 0, 0, CSCALE, ANRM, N, N, A, LDA, IERR )
+         CALL ZCOPY( N, A, LDA+1, W, 1 )
+      END IF
+*
+      WORK( 1 ) = MAXWRK
+      RETURN
+*
+*     End of ZGEES
+*
+      END
diff --git a/costa/native/external/lapack/zgeesx.f b/costa/native/external/lapack/zgeesx.f
new file mode 100644
index 000000000..1b9482ccf
--- /dev/null
+++ b/costa/native/external/lapack/zgeesx.f
@@ -0,0 +1,371 @@
+      SUBROUTINE ZGEESX( JOBVS, SORT, SELECT, SENSE, N, A, LDA, SDIM, W,
+     $                   VS, LDVS, RCONDE, RCONDV, WORK, LWORK, RWORK,
+     $                   BWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBVS, SENSE, SORT
+      INTEGER            INFO, LDA, LDVS, LWORK, N, SDIM
+      DOUBLE PRECISION   RCONDE, RCONDV
+*     ..
+*     .. Array Arguments ..
+      LOGICAL            BWORK( * )
+      DOUBLE PRECISION   RWORK( * )
+      COMPLEX*16         A( LDA, * ), VS( LDVS, * ), W( * ), WORK( * )
+*     ..
+*     .. Function Arguments ..
+      LOGICAL            SELECT
+      EXTERNAL           SELECT
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZGEESX computes for an N-by-N complex nonsymmetric matrix A, the
+*  eigenvalues, the Schur form T, and, optionally, the matrix of Schur
+*  vectors Z.  This gives the Schur factorization A = Z*T*(Z**H).
+*
+*  Optionally, it also orders the eigenvalues on the diagonal of the
+*  Schur form so that selected eigenvalues are at the top left;
+*  computes a reciprocal condition number for the average of the
+*  selected eigenvalues (RCONDE); and computes a reciprocal condition
+*  number for the right invariant subspace corresponding to the
+*  selected eigenvalues (RCONDV).  The leading columns of Z form an
+*  orthonormal basis for this invariant subspace.
+*
+*  For further explanation of the reciprocal condition numbers RCONDE
+*  and RCONDV, see Section 4.10 of the LAPACK Users' Guide (where
+*  these quantities are called s and sep respectively).
+*
+*  A complex matrix is in Schur form if it is upper triangular.
+*
+*  Arguments
+*  =========
+*
+*  JOBVS   (input) CHARACTER*1
+*          = 'N': Schur vectors are not computed;
+*          = 'V': Schur vectors are computed.
+*
+*  SORT    (input) CHARACTER*1
+*          Specifies whether or not to order the eigenvalues on the
+*          diagonal of the Schur form.
+*          = 'N': Eigenvalues are not ordered;
+*          = 'S': Eigenvalues are ordered (see SELECT).
+*
+*  SELECT  (input) LOGICAL FUNCTION of one COMPLEX*16 argument
+*          SELECT must be declared EXTERNAL in the calling subroutine.
+*          If SORT = 'S', SELECT is used to select eigenvalues to order
+*          to the top left of the Schur form.
+*          If SORT = 'N', SELECT is not referenced.
+*          An eigenvalue W(j) is selected if SELECT(W(j)) is true.
+*
+*  SENSE   (input) CHARACTER*1
+*          Determines which reciprocal condition numbers are computed.
+*          = 'N': None are computed;
+*          = 'E': Computed for average of selected eigenvalues only;
+*          = 'V': Computed for selected right invariant subspace only;
+*          = 'B': Computed for both.
+*          If SENSE = 'E', 'V' or 'B', SORT must equal 'S'.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A. N >= 0.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA, N)
+*          On entry, the N-by-N matrix A.
+*          On exit, A is overwritten by its Schur form T.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  SDIM    (output) INTEGER
+*          If SORT = 'N', SDIM = 0.
+*          If SORT = 'S', SDIM = number of eigenvalues for which
+*                         SELECT is true.
+*
+*  W       (output) COMPLEX*16 array, dimension (N)
+*          W contains the computed eigenvalues, in the same order
+*          that they appear on the diagonal of the output Schur form T.
+*
+*  VS      (output) COMPLEX*16 array, dimension (LDVS,N)
+*          If JOBVS = 'V', VS contains the unitary matrix Z of Schur
+*          vectors.
+*          If JOBVS = 'N', VS is not referenced.
+*
+*  LDVS    (input) INTEGER
+*          The leading dimension of the array VS.  LDVS >= 1, and if
+*          JOBVS = 'V', LDVS >= N.
+*
+*  RCONDE  (output) DOUBLE PRECISION
+*          If SENSE = 'E' or 'B', RCONDE contains the reciprocal
+*          condition number for the average of the selected eigenvalues.
+*          Not referenced if SENSE = 'N' or 'V'.
+*
+*  RCONDV  (output) DOUBLE PRECISION
+*          If SENSE = 'V' or 'B', RCONDV contains the reciprocal
+*          condition number for the selected right invariant subspace.
+*          Not referenced if SENSE = 'N' or 'E'.
+*
+*  WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.  LWORK >= max(1,2*N).
+*          Also, if SENSE = 'E' or 'V' or 'B', LWORK >= 2*SDIM*(N-SDIM),
+*          where SDIM is the number of selected eigenvalues computed by
+*          this routine.  Note that 2*SDIM*(N-SDIM) <= N*N/2.
+*          For good performance, LWORK must generally be larger.
+*
+*  RWORK   (workspace) DOUBLE PRECISION array, dimension (N)
+*
+*  BWORK   (workspace) LOGICAL array, dimension (N)
+*          Not referenced if SORT = 'N'.
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value.
+*          > 0: if INFO = i, and i is
+*             <= N: the QR algorithm failed to compute all the
+*                   eigenvalues; elements 1:ILO-1 and i+1:N of W
+*                   contain those eigenvalues which have converged; if
+*                   JOBVS = 'V', VS contains the transformation which
+*                   reduces A to its partially converged Schur form.
+*             = N+1: the eigenvalues could not be reordered because some
+*                   eigenvalues were too close to separate (the problem
+*                   is very ill-conditioned);
+*             = N+2: after reordering, roundoff changed values of some
+*                   complex eigenvalues so that leading eigenvalues in
+*                   the Schur form no longer satisfy SELECT=.TRUE.  This
+*                   could also be caused by underflow due to scaling.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            SCALEA, WANTSB, WANTSE, WANTSN, WANTST, WANTSV,
+     $                   WANTVS
+      INTEGER            HSWORK, I, IBAL, ICOND, IERR, IEVAL, IHI, ILO,
+     $                   ITAU, IWRK, K, MAXB, MAXWRK, MINWRK
+      DOUBLE PRECISION   ANRM, BIGNUM, CSCALE, EPS, SMLNUM
+*     ..
+*     .. Local Arrays ..
+      DOUBLE PRECISION   DUM( 1 )
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLASCL, XERBLA, ZCOPY, ZGEBAK, ZGEBAL, ZGEHRD,
+     $                   ZHSEQR, ZLACPY, ZLASCL, ZTRSEN, ZUNGHR
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      DOUBLE PRECISION   DLAMCH, ZLANGE
+      EXTERNAL           LSAME, ILAENV, DLAMCH, ZLANGE
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      WANTVS = LSAME( JOBVS, 'V' )
+      WANTST = LSAME( SORT, 'S' )
+      WANTSN = LSAME( SENSE, 'N' )
+      WANTSE = LSAME( SENSE, 'E' )
+      WANTSV = LSAME( SENSE, 'V' )
+      WANTSB = LSAME( SENSE, 'B' )
+      IF( ( .NOT.WANTVS ) .AND. ( .NOT.LSAME( JOBVS, 'N' ) ) ) THEN
+         INFO = -1
+      ELSE IF( ( .NOT.WANTST ) .AND. ( .NOT.LSAME( SORT, 'N' ) ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.( WANTSN .OR. WANTSE .OR. WANTSV .OR. WANTSB ) .OR.
+     $         ( .NOT.WANTST .AND. .NOT.WANTSN ) ) THEN
+         INFO = -4
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      ELSE IF( LDVS.LT.1 .OR. ( WANTVS .AND. LDVS.LT.N ) ) THEN
+         INFO = -11
+      END IF
+*
+*     Compute workspace
+*      (Note: Comments in the code beginning "Workspace:" describe the
+*       minimal amount of real workspace needed at that point in the
+*       code, as well as the preferred amount for good performance.
+*       CWorkspace refers to complex workspace, and RWorkspace to real
+*       workspace. NB refers to the optimal block size for the
+*       immediately following subroutine, as returned by ILAENV.
+*       HSWORK refers to the workspace preferred by ZHSEQR, as
+*       calculated below. HSWORK is computed assuming ILO=1 and IHI=N,
+*       the worst case.
+*       If SENSE = 'E', 'V' or 'B', then the amount of workspace needed
+*       depends on SDIM, which is computed by the routine ZTRSEN later
+*       in the code.)
+*
+      MINWRK = 1
+      IF( INFO.EQ.0 .AND. ( LWORK.GE.1 ) ) THEN
+         MAXWRK = N + N*ILAENV( 1, 'ZGEHRD', ' ', N, 1, N, 0 )
+         MINWRK = MAX( 1, 2*N )
+         IF( .NOT.WANTVS ) THEN
+            MAXB = MAX( ILAENV( 8, 'ZHSEQR', 'SN', N, 1, N, -1 ), 2 )
+            K = MIN( MAXB, N, MAX( 2, ILAENV( 4, 'ZHSEQR', 'SN', N, 1,
+     $          N, -1 ) ) )
+            HSWORK = MAX( K*( K+2 ), 2*N )
+            MAXWRK = MAX( MAXWRK, HSWORK, 1 )
+         ELSE
+            MAXWRK = MAX( MAXWRK, N+( N-1 )*
+     $               ILAENV( 1, 'ZUNGHR', ' ', N, 1, N, -1 ) )
+            MAXB = MAX( ILAENV( 8, 'ZHSEQR', 'SV', N, 1, N, -1 ), 2 )
+            K = MIN( MAXB, N, MAX( 2, ILAENV( 4, 'ZHSEQR', 'SV', N, 1,
+     $          N, -1 ) ) )
+            HSWORK = MAX( K*( K+2 ), 2*N )
+            MAXWRK = MAX( MAXWRK, HSWORK, 1 )
+         END IF
+         WORK( 1 ) = MAXWRK
+      END IF
+      IF( LWORK.LT.MINWRK ) THEN
+         INFO = -15
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZGEESX', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 ) THEN
+         SDIM = 0
+         RETURN
+      END IF
+*
+*     Get machine constants
+*
+      EPS = DLAMCH( 'P' )
+      SMLNUM = DLAMCH( 'S' )
+      BIGNUM = ONE / SMLNUM
+      CALL DLABAD( SMLNUM, BIGNUM )
+      SMLNUM = SQRT( SMLNUM ) / EPS
+      BIGNUM = ONE / SMLNUM
+*
+*     Scale A if max element outside range [SMLNUM,BIGNUM]
+*
+      ANRM = ZLANGE( 'M', N, N, A, LDA, DUM )
+      SCALEA = .FALSE.
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
+         SCALEA = .TRUE.
+         CSCALE = SMLNUM
+      ELSE IF( ANRM.GT.BIGNUM ) THEN
+         SCALEA = .TRUE.
+         CSCALE = BIGNUM
+      END IF
+      IF( SCALEA )
+     $   CALL ZLASCL( 'G', 0, 0, ANRM, CSCALE, N, N, A, LDA, IERR )
+*
+*
+*     Permute the matrix to make it more nearly triangular
+*     (CWorkspace: none)
+*     (RWorkspace: need N)
+*
+      IBAL = 1
+      CALL ZGEBAL( 'P', N, A, LDA, ILO, IHI, RWORK( IBAL ), IERR )
+*
+*     Reduce to upper Hessenberg form
+*     (CWorkspace: need 2*N, prefer N+N*NB)
+*     (RWorkspace: none)
+*
+      ITAU = 1
+      IWRK = N + ITAU
+      CALL ZGEHRD( N, ILO, IHI, A, LDA, WORK( ITAU ), WORK( IWRK ),
+     $             LWORK-IWRK+1, IERR )
+*
+      IF( WANTVS ) THEN
+*
+*        Copy Householder vectors to VS
+*
+         CALL ZLACPY( 'L', N, N, A, LDA, VS, LDVS )
+*
+*        Generate unitary matrix in VS
+*        (CWorkspace: need 2*N-1, prefer N+(N-1)*NB)
+*        (RWorkspace: none)
+*
+         CALL ZUNGHR( N, ILO, IHI, VS, LDVS, WORK( ITAU ), WORK( IWRK ),
+     $                LWORK-IWRK+1, IERR )
+      END IF
+*
+      SDIM = 0
+*
+*     Perform QR iteration, accumulating Schur vectors in VS if desired
+*     (CWorkspace: need 1, prefer HSWORK (see comments) )
+*     (RWorkspace: none)
+*
+      IWRK = ITAU
+      CALL ZHSEQR( 'S', JOBVS, N, ILO, IHI, A, LDA, W, VS, LDVS,
+     $             WORK( IWRK ), LWORK-IWRK+1, IEVAL )
+      IF( IEVAL.GT.0 )
+     $   INFO = IEVAL
+*
+*     Sort eigenvalues if desired
+*
+      IF( WANTST .AND. INFO.EQ.0 ) THEN
+         IF( SCALEA )
+     $      CALL ZLASCL( 'G', 0, 0, CSCALE, ANRM, N, 1, W, N, IERR )
+         DO 10 I = 1, N
+            BWORK( I ) = SELECT( W( I ) )
+   10    CONTINUE
+*
+*        Reorder eigenvalues, transform Schur vectors, and compute
+*        reciprocal condition numbers
+*        (CWorkspace: if SENSE is not 'N', need 2*SDIM*(N-SDIM)
+*                     otherwise, need none )
+*        (RWorkspace: none)
+*
+         CALL ZTRSEN( SENSE, JOBVS, BWORK, N, A, LDA, VS, LDVS, W, SDIM,
+     $                RCONDE, RCONDV, WORK( IWRK ), LWORK-IWRK+1,
+     $                ICOND )
+         IF( .NOT.WANTSN )
+     $      MAXWRK = MAX( MAXWRK, 2*SDIM*( N-SDIM ) )
+         IF( ICOND.EQ.-14 ) THEN
+*
+*           Not enough complex workspace
+*
+            INFO = -15
+         END IF
+      END IF
+*
+      IF( WANTVS ) THEN
+*
+*        Undo balancing
+*        (CWorkspace: none)
+*        (RWorkspace: need N)
+*
+         CALL ZGEBAK( 'P', 'R', N, ILO, IHI, RWORK( IBAL ), N, VS, LDVS,
+     $                IERR )
+      END IF
+*
+      IF( SCALEA ) THEN
+*
+*        Undo scaling for the Schur form of A
+*
+         CALL ZLASCL( 'U', 0, 0, CSCALE, ANRM, N, N, A, LDA, IERR )
+         CALL ZCOPY( N, A, LDA+1, W, 1 )
+         IF( ( WANTSV .OR. WANTSB ) .AND. INFO.EQ.0 ) THEN
+            DUM( 1 ) = RCONDV
+            CALL DLASCL( 'G', 0, 0, CSCALE, ANRM, 1, 1, DUM, 1, IERR )
+            RCONDV = DUM( 1 )
+         END IF
+      END IF
+*
+      WORK( 1 ) = MAXWRK
+      RETURN
+*
+*     End of ZGEESX
+*
+      END
diff --git a/costa/native/external/lapack/zgeev.f b/costa/native/external/lapack/zgeev.f
new file mode 100644
index 000000000..42aa63368
--- /dev/null
+++ b/costa/native/external/lapack/zgeev.f
@@ -0,0 +1,391 @@
+      SUBROUTINE ZGEEV( JOBVL, JOBVR, N, A, LDA, W, VL, LDVL, VR, LDVR,
+     $                  WORK, LWORK, RWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBVL, JOBVR
+      INTEGER            INFO, LDA, LDVL, LDVR, LWORK, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   RWORK( * )
+      COMPLEX*16         A( LDA, * ), VL( LDVL, * ), VR( LDVR, * ),
+     $                   W( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZGEEV computes for an N-by-N complex nonsymmetric matrix A, the
+*  eigenvalues and, optionally, the left and/or right eigenvectors.
+*
+*  The right eigenvector v(j) of A satisfies
+*                   A * v(j) = lambda(j) * v(j)
+*  where lambda(j) is its eigenvalue.
+*  The left eigenvector u(j) of A satisfies
+*                u(j)**H * A = lambda(j) * u(j)**H
+*  where u(j)**H denotes the conjugate transpose of u(j).
+*
+*  The computed eigenvectors are normalized to have Euclidean norm
+*  equal to 1 and largest component real.
+*
+*  Arguments
+*  =========
+*
+*  JOBVL   (input) CHARACTER*1
+*          = 'N': left eigenvectors of A are not computed;
+*          = 'V': left eigenvectors of are computed.
+*
+*  JOBVR   (input) CHARACTER*1
+*          = 'N': right eigenvectors of A are not computed;
+*          = 'V': right eigenvectors of A are computed.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A. N >= 0.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+*          On entry, the N-by-N matrix A.
+*          On exit, A has been overwritten.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  W       (output) COMPLEX*16 array, dimension (N)
+*          W contains the computed eigenvalues.
+*
+*  VL      (output) COMPLEX*16 array, dimension (LDVL,N)
+*          If JOBVL = 'V', the left eigenvectors u(j) are stored one
+*          after another in the columns of VL, in the same order
+*          as their eigenvalues.
+*          If JOBVL = 'N', VL is not referenced.
+*          u(j) = VL(:,j), the j-th column of VL.
+*
+*  LDVL    (input) INTEGER
+*          The leading dimension of the array VL.  LDVL >= 1; if
+*          JOBVL = 'V', LDVL >= N.
+*
+*  VR      (output) COMPLEX*16 array, dimension (LDVR,N)
+*          If JOBVR = 'V', the right eigenvectors v(j) are stored one
+*          after another in the columns of VR, in the same order
+*          as their eigenvalues.
+*          If JOBVR = 'N', VR is not referenced.
+*          v(j) = VR(:,j), the j-th column of VR.
+*
+*  LDVR    (input) INTEGER
+*          The leading dimension of the array VR.  LDVR >= 1; if
+*          JOBVR = 'V', LDVR >= N.
+*
+*  WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.  LWORK >= max(1,2*N).
+*          For good performance, LWORK must generally be larger.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  RWORK   (workspace) DOUBLE PRECISION array, dimension (2*N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          > 0:  if INFO = i, the QR algorithm failed to compute all the
+*                eigenvalues, and no eigenvectors have been computed;
+*                elements and i+1:N of W contain eigenvalues which have
+*                converged.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY, SCALEA, WANTVL, WANTVR
+      CHARACTER          SIDE
+      INTEGER            HSWORK, I, IBAL, IERR, IHI, ILO, IRWORK, ITAU,
+     $                   IWRK, K, MAXB, MAXWRK, MINWRK, NOUT
+      DOUBLE PRECISION   ANRM, BIGNUM, CSCALE, EPS, SCL, SMLNUM
+      COMPLEX*16         TMP
+*     ..
+*     .. Local Arrays ..
+      LOGICAL            SELECT( 1 )
+      DOUBLE PRECISION   DUM( 1 )
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZDSCAL, ZGEBAK, ZGEBAL, ZGEHRD, ZHSEQR,
+     $                   ZLACPY, ZLASCL, ZSCAL, ZTREVC, ZUNGHR
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            IDAMAX, ILAENV
+      DOUBLE PRECISION   DLAMCH, DZNRM2, ZLANGE
+      EXTERNAL           LSAME, IDAMAX, ILAENV, DLAMCH, DZNRM2, ZLANGE
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          DBLE, DCMPLX, DCONJG, DIMAG, MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      LQUERY = ( LWORK.EQ.-1 )
+      WANTVL = LSAME( JOBVL, 'V' )
+      WANTVR = LSAME( JOBVR, 'V' )
+      IF( ( .NOT.WANTVL ) .AND. ( .NOT.LSAME( JOBVL, 'N' ) ) ) THEN
+         INFO = -1
+      ELSE IF( ( .NOT.WANTVR ) .AND. ( .NOT.LSAME( JOBVR, 'N' ) ) ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( LDVL.LT.1 .OR. ( WANTVL .AND. LDVL.LT.N ) ) THEN
+         INFO = -8
+      ELSE IF( LDVR.LT.1 .OR. ( WANTVR .AND. LDVR.LT.N ) ) THEN
+         INFO = -10
+      END IF
+*
+*     Compute workspace
+*      (Note: Comments in the code beginning "Workspace:" describe the
+*       minimal amount of workspace needed at that point in the code,
+*       as well as the preferred amount for good performance.
+*       CWorkspace refers to complex workspace, and RWorkspace to real
+*       workspace. NB refers to the optimal block size for the
+*       immediately following subroutine, as returned by ILAENV.
+*       HSWORK refers to the workspace preferred by ZHSEQR, as
+*       calculated below. HSWORK is computed assuming ILO=1 and IHI=N,
+*       the worst case.)
+*
+      MINWRK = 1
+      IF( INFO.EQ.0 .AND. ( LWORK.GE.1 .OR. LQUERY ) ) THEN
+         MAXWRK = N + N*ILAENV( 1, 'ZGEHRD', ' ', N, 1, N, 0 )
+         IF( ( .NOT.WANTVL ) .AND. ( .NOT.WANTVR ) ) THEN
+            MINWRK = MAX( 1, 2*N )
+            MAXB = MAX( ILAENV( 8, 'ZHSEQR', 'EN', N, 1, N, -1 ), 2 )
+            K = MIN( MAXB, N, MAX( 2, ILAENV( 4, 'ZHSEQR', 'EN', N, 1,
+     $          N, -1 ) ) )
+            HSWORK = MAX( K*( K+2 ), 2*N )
+            MAXWRK = MAX( MAXWRK, HSWORK )
+         ELSE
+            MINWRK = MAX( 1, 2*N )
+            MAXWRK = MAX( MAXWRK, N+( N-1 )*
+     $               ILAENV( 1, 'ZUNGHR', ' ', N, 1, N, -1 ) )
+            MAXB = MAX( ILAENV( 8, 'ZHSEQR', 'SV', N, 1, N, -1 ), 2 )
+            K = MIN( MAXB, N, MAX( 2, ILAENV( 4, 'ZHSEQR', 'SV', N, 1,
+     $          N, -1 ) ) )
+            HSWORK = MAX( K*( K+2 ), 2*N )
+            MAXWRK = MAX( MAXWRK, HSWORK, 2*N )
+         END IF
+         WORK( 1 ) = MAXWRK
+      END IF
+      IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN
+         INFO = -12
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZGEEV ', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Get machine constants
+*
+      EPS = DLAMCH( 'P' )
+      SMLNUM = DLAMCH( 'S' )
+      BIGNUM = ONE / SMLNUM
+      CALL DLABAD( SMLNUM, BIGNUM )
+      SMLNUM = SQRT( SMLNUM ) / EPS
+      BIGNUM = ONE / SMLNUM
+*
+*     Scale A if max element outside range [SMLNUM,BIGNUM]
+*
+      ANRM = ZLANGE( 'M', N, N, A, LDA, DUM )
+      SCALEA = .FALSE.
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
+         SCALEA = .TRUE.
+         CSCALE = SMLNUM
+      ELSE IF( ANRM.GT.BIGNUM ) THEN
+         SCALEA = .TRUE.
+         CSCALE = BIGNUM
+      END IF
+      IF( SCALEA )
+     $   CALL ZLASCL( 'G', 0, 0, ANRM, CSCALE, N, N, A, LDA, IERR )
+*
+*     Balance the matrix
+*     (CWorkspace: none)
+*     (RWorkspace: need N)
+*
+      IBAL = 1
+      CALL ZGEBAL( 'B', N, A, LDA, ILO, IHI, RWORK( IBAL ), IERR )
+*
+*     Reduce to upper Hessenberg form
+*     (CWorkspace: need 2*N, prefer N+N*NB)
+*     (RWorkspace: none)
+*
+      ITAU = 1
+      IWRK = ITAU + N
+      CALL ZGEHRD( N, ILO, IHI, A, LDA, WORK( ITAU ), WORK( IWRK ),
+     $             LWORK-IWRK+1, IERR )
+*
+      IF( WANTVL ) THEN
+*
+*        Want left eigenvectors
+*        Copy Householder vectors to VL
+*
+         SIDE = 'L'
+         CALL ZLACPY( 'L', N, N, A, LDA, VL, LDVL )
+*
+*        Generate unitary matrix in VL
+*        (CWorkspace: need 2*N-1, prefer N+(N-1)*NB)
+*        (RWorkspace: none)
+*
+         CALL ZUNGHR( N, ILO, IHI, VL, LDVL, WORK( ITAU ), WORK( IWRK ),
+     $                LWORK-IWRK+1, IERR )
+*
+*        Perform QR iteration, accumulating Schur vectors in VL
+*        (CWorkspace: need 1, prefer HSWORK (see comments) )
+*        (RWorkspace: none)
+*
+         IWRK = ITAU
+         CALL ZHSEQR( 'S', 'V', N, ILO, IHI, A, LDA, W, VL, LDVL,
+     $                WORK( IWRK ), LWORK-IWRK+1, INFO )
+*
+         IF( WANTVR ) THEN
+*
+*           Want left and right eigenvectors
+*           Copy Schur vectors to VR
+*
+            SIDE = 'B'
+            CALL ZLACPY( 'F', N, N, VL, LDVL, VR, LDVR )
+         END IF
+*
+      ELSE IF( WANTVR ) THEN
+*
+*        Want right eigenvectors
+*        Copy Householder vectors to VR
+*
+         SIDE = 'R'
+         CALL ZLACPY( 'L', N, N, A, LDA, VR, LDVR )
+*
+*        Generate unitary matrix in VR
+*        (CWorkspace: need 2*N-1, prefer N+(N-1)*NB)
+*        (RWorkspace: none)
+*
+         CALL ZUNGHR( N, ILO, IHI, VR, LDVR, WORK( ITAU ), WORK( IWRK ),
+     $                LWORK-IWRK+1, IERR )
+*
+*        Perform QR iteration, accumulating Schur vectors in VR
+*        (CWorkspace: need 1, prefer HSWORK (see comments) )
+*        (RWorkspace: none)
+*
+         IWRK = ITAU
+         CALL ZHSEQR( 'S', 'V', N, ILO, IHI, A, LDA, W, VR, LDVR,
+     $                WORK( IWRK ), LWORK-IWRK+1, INFO )
+*
+      ELSE
+*
+*        Compute eigenvalues only
+*        (CWorkspace: need 1, prefer HSWORK (see comments) )
+*        (RWorkspace: none)
+*
+         IWRK = ITAU
+         CALL ZHSEQR( 'E', 'N', N, ILO, IHI, A, LDA, W, VR, LDVR,
+     $                WORK( IWRK ), LWORK-IWRK+1, INFO )
+      END IF
+*
+*     If INFO > 0 from ZHSEQR, then quit
+*
+      IF( INFO.GT.0 )
+     $   GO TO 50
+*
+      IF( WANTVL .OR. WANTVR ) THEN
+*
+*        Compute left and/or right eigenvectors
+*        (CWorkspace: need 2*N)
+*        (RWorkspace: need 2*N)
+*
+         IRWORK = IBAL + N
+         CALL ZTREVC( SIDE, 'B', SELECT, N, A, LDA, VL, LDVL, VR, LDVR,
+     $                N, NOUT, WORK( IWRK ), RWORK( IRWORK ), IERR )
+      END IF
+*
+      IF( WANTVL ) THEN
+*
+*        Undo balancing of left eigenvectors
+*        (CWorkspace: none)
+*        (RWorkspace: need N)
+*
+         CALL ZGEBAK( 'B', 'L', N, ILO, IHI, RWORK( IBAL ), N, VL, LDVL,
+     $                IERR )
+*
+*        Normalize left eigenvectors and make largest component real
+*
+         DO 20 I = 1, N
+            SCL = ONE / DZNRM2( N, VL( 1, I ), 1 )
+            CALL ZDSCAL( N, SCL, VL( 1, I ), 1 )
+            DO 10 K = 1, N
+               RWORK( IRWORK+K-1 ) = DBLE( VL( K, I ) )**2 +
+     $                               DIMAG( VL( K, I ) )**2
+   10       CONTINUE
+            K = IDAMAX( N, RWORK( IRWORK ), 1 )
+            TMP = DCONJG( VL( K, I ) ) / SQRT( RWORK( IRWORK+K-1 ) )
+            CALL ZSCAL( N, TMP, VL( 1, I ), 1 )
+            VL( K, I ) = DCMPLX( DBLE( VL( K, I ) ), ZERO )
+   20    CONTINUE
+      END IF
+*
+      IF( WANTVR ) THEN
+*
+*        Undo balancing of right eigenvectors
+*        (CWorkspace: none)
+*        (RWorkspace: need N)
+*
+         CALL ZGEBAK( 'B', 'R', N, ILO, IHI, RWORK( IBAL ), N, VR, LDVR,
+     $                IERR )
+*
+*        Normalize right eigenvectors and make largest component real
+*
+         DO 40 I = 1, N
+            SCL = ONE / DZNRM2( N, VR( 1, I ), 1 )
+            CALL ZDSCAL( N, SCL, VR( 1, I ), 1 )
+            DO 30 K = 1, N
+               RWORK( IRWORK+K-1 ) = DBLE( VR( K, I ) )**2 +
+     $                               DIMAG( VR( K, I ) )**2
+   30       CONTINUE
+            K = IDAMAX( N, RWORK( IRWORK ), 1 )
+            TMP = DCONJG( VR( K, I ) ) / SQRT( RWORK( IRWORK+K-1 ) )
+            CALL ZSCAL( N, TMP, VR( 1, I ), 1 )
+            VR( K, I ) = DCMPLX( DBLE( VR( K, I ) ), ZERO )
+   40    CONTINUE
+      END IF
+*
+*     Undo scaling if necessary
+*
+   50 CONTINUE
+      IF( SCALEA ) THEN
+         CALL ZLASCL( 'G', 0, 0, CSCALE, ANRM, N-INFO, 1, W( INFO+1 ),
+     $                MAX( N-INFO, 1 ), IERR )
+         IF( INFO.GT.0 ) THEN
+            CALL ZLASCL( 'G', 0, 0, CSCALE, ANRM, ILO-1, 1, W, N, IERR )
+         END IF
+      END IF
+*
+      WORK( 1 ) = MAXWRK
+      RETURN
+*
+*     End of ZGEEV
+*
+      END
diff --git a/costa/native/external/lapack/zgeevx.f b/costa/native/external/lapack/zgeevx.f
new file mode 100644
index 000000000..d8b0f45d2
--- /dev/null
+++ b/costa/native/external/lapack/zgeevx.f
@@ -0,0 +1,521 @@
+      SUBROUTINE ZGEEVX( BALANC, JOBVL, JOBVR, SENSE, N, A, LDA, W, VL,
+     $                   LDVL, VR, LDVR, ILO, IHI, SCALE, ABNRM, RCONDE,
+     $                   RCONDV, WORK, LWORK, RWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          BALANC, JOBVL, JOBVR, SENSE
+      INTEGER            IHI, ILO, INFO, LDA, LDVL, LDVR, LWORK, N
+      DOUBLE PRECISION   ABNRM
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   RCONDE( * ), RCONDV( * ), RWORK( * ),
+     $                   SCALE( * )
+      COMPLEX*16         A( LDA, * ), VL( LDVL, * ), VR( LDVR, * ),
+     $                   W( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZGEEVX computes for an N-by-N complex nonsymmetric matrix A, the
+*  eigenvalues and, optionally, the left and/or right eigenvectors.
+*
+*  Optionally also, it computes a balancing transformation to improve
+*  the conditioning of the eigenvalues and eigenvectors (ILO, IHI,
+*  SCALE, and ABNRM), reciprocal condition numbers for the eigenvalues
+*  (RCONDE), and reciprocal condition numbers for the right
+*  eigenvectors (RCONDV).
+*
+*  The right eigenvector v(j) of A satisfies
+*                   A * v(j) = lambda(j) * v(j)
+*  where lambda(j) is its eigenvalue.
+*  The left eigenvector u(j) of A satisfies
+*                u(j)**H * A = lambda(j) * u(j)**H
+*  where u(j)**H denotes the conjugate transpose of u(j).
+*
+*  The computed eigenvectors are normalized to have Euclidean norm
+*  equal to 1 and largest component real.
+*
+*  Balancing a matrix means permuting the rows and columns to make it
+*  more nearly upper triangular, and applying a diagonal similarity
+*  transformation D * A * D**(-1), where D is a diagonal matrix, to
+*  make its rows and columns closer in norm and the condition numbers
+*  of its eigenvalues and eigenvectors smaller.  The computed
+*  reciprocal condition numbers correspond to the balanced matrix.
+*  Permuting rows and columns will not change the condition numbers
+*  (in exact arithmetic) but diagonal scaling will.  For further
+*  explanation of balancing, see section 4.10.2 of the LAPACK
+*  Users' Guide.
+*
+*  Arguments
+*  =========
+*
+*  BALANC  (input) CHARACTER*1
+*          Indicates how the input matrix should be diagonally scaled
+*          and/or permuted to improve the conditioning of its
+*          eigenvalues.
+*          = 'N': Do not diagonally scale or permute;
+*          = 'P': Perform permutations to make the matrix more nearly
+*                 upper triangular. Do not diagonally scale;
+*          = 'S': Diagonally scale the matrix, ie. replace A by
+*                 D*A*D**(-1), where D is a diagonal matrix chosen
+*                 to make the rows and columns of A more equal in
+*                 norm. Do not permute;
+*          = 'B': Both diagonally scale and permute A.
+*
+*          Computed reciprocal condition numbers will be for the matrix
+*          after balancing and/or permuting. Permuting does not change
+*          condition numbers (in exact arithmetic), but balancing does.
+*
+*  JOBVL   (input) CHARACTER*1
+*          = 'N': left eigenvectors of A are not computed;
+*          = 'V': left eigenvectors of A are computed.
+*          If SENSE = 'E' or 'B', JOBVL must = 'V'.
+*
+*  JOBVR   (input) CHARACTER*1
+*          = 'N': right eigenvectors of A are not computed;
+*          = 'V': right eigenvectors of A are computed.
+*          If SENSE = 'E' or 'B', JOBVR must = 'V'.
+*
+*  SENSE   (input) CHARACTER*1
+*          Determines which reciprocal condition numbers are computed.
+*          = 'N': None are computed;
+*          = 'E': Computed for eigenvalues only;
+*          = 'V': Computed for right eigenvectors only;
+*          = 'B': Computed for eigenvalues and right eigenvectors.
+*
+*          If SENSE = 'E' or 'B', both left and right eigenvectors
+*          must also be computed (JOBVL = 'V' and JOBVR = 'V').
+*
+*  N       (input) INTEGER
+*          The order of the matrix A. N >= 0.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+*          On entry, the N-by-N matrix A.
+*          On exit, A has been overwritten.  If JOBVL = 'V' or
+*          JOBVR = 'V', A contains the Schur form of the balanced
+*          version of the matrix A.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  W       (output) COMPLEX*16 array, dimension (N)
+*          W contains the computed eigenvalues.
+*
+*  VL      (output) COMPLEX*16 array, dimension (LDVL,N)
+*          If JOBVL = 'V', the left eigenvectors u(j) are stored one
+*          after another in the columns of VL, in the same order
+*          as their eigenvalues.
+*          If JOBVL = 'N', VL is not referenced.
+*          u(j) = VL(:,j), the j-th column of VL.
+*
+*  LDVL    (input) INTEGER
+*          The leading dimension of the array VL.  LDVL >= 1; if
+*          JOBVL = 'V', LDVL >= N.
+*
+*  VR      (output) COMPLEX*16 array, dimension (LDVR,N)
+*          If JOBVR = 'V', the right eigenvectors v(j) are stored one
+*          after another in the columns of VR, in the same order
+*          as their eigenvalues.
+*          If JOBVR = 'N', VR is not referenced.
+*          v(j) = VR(:,j), the j-th column of VR.
+*
+*  LDVR    (input) INTEGER
+*          The leading dimension of the array VR.  LDVR >= 1; if
+*          JOBVR = 'V', LDVR >= N.
+*
+*  ILO,IHI (output) INTEGER
+*          ILO and IHI are integer values determined when A was
+*          balanced.  The balanced A(i,j) = 0 if I > J and
+*          J = 1,...,ILO-1 or I = IHI+1,...,N.
+*
+*  SCALE   (output) DOUBLE PRECISION array, dimension (N)
+*          Details of the permutations and scaling factors applied
+*          when balancing A.  If P(j) is the index of the row and column
+*          interchanged with row and column j, and D(j) is the scaling
+*          factor applied to row and column j, then
+*          SCALE(J) = P(J),    for J = 1,...,ILO-1
+*                   = D(J),    for J = ILO,...,IHI
+*                   = P(J)     for J = IHI+1,...,N.
+*          The order in which the interchanges are made is N to IHI+1,
+*          then 1 to ILO-1.
+*
+*  ABNRM   (output) DOUBLE PRECISION
+*          The one-norm of the balanced matrix (the maximum
+*          of the sum of absolute values of elements of any column).
+*
+*  RCONDE  (output) DOUBLE PRECISION array, dimension (N)
+*          RCONDE(j) is the reciprocal condition number of the j-th
+*          eigenvalue.
+*
+*  RCONDV  (output) DOUBLE PRECISION array, dimension (N)
+*          RCONDV(j) is the reciprocal condition number of the j-th
+*          right eigenvector.
+*
+*  WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.  If SENSE = 'N' or 'E',
+*          LWORK >= max(1,2*N), and if SENSE = 'V' or 'B',
+*          LWORK >= N*N+2*N.
+*          For good performance, LWORK must generally be larger.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  RWORK   (workspace) DOUBLE PRECISION array, dimension (2*N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          > 0:  if INFO = i, the QR algorithm failed to compute all the
+*                eigenvalues, and no eigenvectors or condition numbers
+*                have been computed; elements 1:ILO-1 and i+1:N of W
+*                contain eigenvalues which have converged.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY, SCALEA, WANTVL, WANTVR, WNTSNB, WNTSNE,
+     $                   WNTSNN, WNTSNV
+      CHARACTER          JOB, SIDE
+      INTEGER            HSWORK, I, ICOND, IERR, ITAU, IWRK, K, MAXB,
+     $                   MAXWRK, MINWRK, NOUT
+      DOUBLE PRECISION   ANRM, BIGNUM, CSCALE, EPS, SCL, SMLNUM
+      COMPLEX*16         TMP
+*     ..
+*     .. Local Arrays ..
+      LOGICAL            SELECT( 1 )
+      DOUBLE PRECISION   DUM( 1 )
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLASCL, XERBLA, ZDSCAL, ZGEBAK, ZGEBAL, ZGEHRD,
+     $                   ZHSEQR, ZLACPY, ZLASCL, ZSCAL, ZTREVC, ZTRSNA,
+     $                   ZUNGHR
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            IDAMAX, ILAENV
+      DOUBLE PRECISION   DLAMCH, DZNRM2, ZLANGE
+      EXTERNAL           LSAME, IDAMAX, ILAENV, DLAMCH, DZNRM2, ZLANGE
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          DBLE, DCMPLX, DCONJG, DIMAG, MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      LQUERY = ( LWORK.EQ.-1 )
+      WANTVL = LSAME( JOBVL, 'V' )
+      WANTVR = LSAME( JOBVR, 'V' )
+      WNTSNN = LSAME( SENSE, 'N' )
+      WNTSNE = LSAME( SENSE, 'E' )
+      WNTSNV = LSAME( SENSE, 'V' )
+      WNTSNB = LSAME( SENSE, 'B' )
+      IF( .NOT.( LSAME( BALANC, 'N' ) .OR. LSAME( BALANC,
+     $    'S' ) .OR. LSAME( BALANC, 'P' ) .OR. LSAME( BALANC, 'B' ) ) )
+     $     THEN
+         INFO = -1
+      ELSE IF( ( .NOT.WANTVL ) .AND. ( .NOT.LSAME( JOBVL, 'N' ) ) ) THEN
+         INFO = -2
+      ELSE IF( ( .NOT.WANTVR ) .AND. ( .NOT.LSAME( JOBVR, 'N' ) ) ) THEN
+         INFO = -3
+      ELSE IF( .NOT.( WNTSNN .OR. WNTSNE .OR. WNTSNB .OR. WNTSNV ) .OR.
+     $         ( ( WNTSNE .OR. WNTSNB ) .AND. .NOT.( WANTVL .AND.
+     $         WANTVR ) ) ) THEN
+         INFO = -4
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      ELSE IF( LDVL.LT.1 .OR. ( WANTVL .AND. LDVL.LT.N ) ) THEN
+         INFO = -10
+      ELSE IF( LDVR.LT.1 .OR. ( WANTVR .AND. LDVR.LT.N ) ) THEN
+         INFO = -12
+      END IF
+*
+*     Compute workspace
+*      (Note: Comments in the code beginning "Workspace:" describe the
+*       minimal amount of workspace needed at that point in the code,
+*       as well as the preferred amount for good performance.
+*       CWorkspace refers to complex workspace, and RWorkspace to real
+*       workspace. NB refers to the optimal block size for the
+*       immediately following subroutine, as returned by ILAENV.
+*       HSWORK refers to the workspace preferred by ZHSEQR, as
+*       calculated below. HSWORK is computed assuming ILO=1 and IHI=N,
+*       the worst case.)
+*
+      MINWRK = 1
+      IF( INFO.EQ.0 .AND. ( LWORK.GE.1 .OR. LQUERY ) ) THEN
+         MAXWRK = N + N*ILAENV( 1, 'ZGEHRD', ' ', N, 1, N, 0 )
+         IF( ( .NOT.WANTVL ) .AND. ( .NOT.WANTVR ) ) THEN
+            MINWRK = MAX( 1, 2*N )
+            IF( .NOT.( WNTSNN .OR. WNTSNE ) )
+     $         MINWRK = MAX( MINWRK, N*N+2*N )
+            MAXB = MAX( ILAENV( 8, 'ZHSEQR', 'SN', N, 1, N, -1 ), 2 )
+            IF( WNTSNN ) THEN
+               K = MIN( MAXB, N, MAX( 2, ILAENV( 4, 'ZHSEQR', 'EN', N,
+     $             1, N, -1 ) ) )
+            ELSE
+               K = MIN( MAXB, N, MAX( 2, ILAENV( 4, 'ZHSEQR', 'SN', N,
+     $             1, N, -1 ) ) )
+            END IF
+            HSWORK = MAX( K*( K+2 ), 2*N )
+            MAXWRK = MAX( MAXWRK, 1, HSWORK )
+            IF( .NOT.( WNTSNN .OR. WNTSNE ) )
+     $         MAXWRK = MAX( MAXWRK, N*N+2*N )
+         ELSE
+            MINWRK = MAX( 1, 2*N )
+            IF( .NOT.( WNTSNN .OR. WNTSNE ) )
+     $         MINWRK = MAX( MINWRK, N*N+2*N )
+            MAXB = MAX( ILAENV( 8, 'ZHSEQR', 'SN', N, 1, N, -1 ), 2 )
+            K = MIN( MAXB, N, MAX( 2, ILAENV( 4, 'ZHSEQR', 'EN', N, 1,
+     $          N, -1 ) ) )
+            HSWORK = MAX( K*( K+2 ), 2*N )
+            MAXWRK = MAX( MAXWRK, 1, HSWORK )
+            MAXWRK = MAX( MAXWRK, N+( N-1 )*
+     $               ILAENV( 1, 'ZUNGHR', ' ', N, 1, N, -1 ) )
+            IF( .NOT.( WNTSNN .OR. WNTSNE ) )
+     $         MAXWRK = MAX( MAXWRK, N*N+2*N )
+            MAXWRK = MAX( MAXWRK, 2*N, 1 )
+         END IF
+         WORK( 1 ) = MAXWRK
+      END IF
+      IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN
+         INFO = -20
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZGEEVX', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Get machine constants
+*
+      EPS = DLAMCH( 'P' )
+      SMLNUM = DLAMCH( 'S' )
+      BIGNUM = ONE / SMLNUM
+      CALL DLABAD( SMLNUM, BIGNUM )
+      SMLNUM = SQRT( SMLNUM ) / EPS
+      BIGNUM = ONE / SMLNUM
+*
+*     Scale A if max element outside range [SMLNUM,BIGNUM]
+*
+      ICOND = 0
+      ANRM = ZLANGE( 'M', N, N, A, LDA, DUM )
+      SCALEA = .FALSE.
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
+         SCALEA = .TRUE.
+         CSCALE = SMLNUM
+      ELSE IF( ANRM.GT.BIGNUM ) THEN
+         SCALEA = .TRUE.
+         CSCALE = BIGNUM
+      END IF
+      IF( SCALEA )
+     $   CALL ZLASCL( 'G', 0, 0, ANRM, CSCALE, N, N, A, LDA, IERR )
+*
+*     Balance the matrix and compute ABNRM
+*
+      CALL ZGEBAL( BALANC, N, A, LDA, ILO, IHI, SCALE, IERR )
+      ABNRM = ZLANGE( '1', N, N, A, LDA, DUM )
+      IF( SCALEA ) THEN
+         DUM( 1 ) = ABNRM
+         CALL DLASCL( 'G', 0, 0, CSCALE, ANRM, 1, 1, DUM, 1, IERR )
+         ABNRM = DUM( 1 )
+      END IF
+*
+*     Reduce to upper Hessenberg form
+*     (CWorkspace: need 2*N, prefer N+N*NB)
+*     (RWorkspace: none)
+*
+      ITAU = 1
+      IWRK = ITAU + N
+      CALL ZGEHRD( N, ILO, IHI, A, LDA, WORK( ITAU ), WORK( IWRK ),
+     $             LWORK-IWRK+1, IERR )
+*
+      IF( WANTVL ) THEN
+*
+*        Want left eigenvectors
+*        Copy Householder vectors to VL
+*
+         SIDE = 'L'
+         CALL ZLACPY( 'L', N, N, A, LDA, VL, LDVL )
+*
+*        Generate unitary matrix in VL
+*        (CWorkspace: need 2*N-1, prefer N+(N-1)*NB)
+*        (RWorkspace: none)
+*
+         CALL ZUNGHR( N, ILO, IHI, VL, LDVL, WORK( ITAU ), WORK( IWRK ),
+     $                LWORK-IWRK+1, IERR )
+*
+*        Perform QR iteration, accumulating Schur vectors in VL
+*        (CWorkspace: need 1, prefer HSWORK (see comments) )
+*        (RWorkspace: none)
+*
+         IWRK = ITAU
+         CALL ZHSEQR( 'S', 'V', N, ILO, IHI, A, LDA, W, VL, LDVL,
+     $                WORK( IWRK ), LWORK-IWRK+1, INFO )
+*
+         IF( WANTVR ) THEN
+*
+*           Want left and right eigenvectors
+*           Copy Schur vectors to VR
+*
+            SIDE = 'B'
+            CALL ZLACPY( 'F', N, N, VL, LDVL, VR, LDVR )
+         END IF
+*
+      ELSE IF( WANTVR ) THEN
+*
+*        Want right eigenvectors
+*        Copy Householder vectors to VR
+*
+         SIDE = 'R'
+         CALL ZLACPY( 'L', N, N, A, LDA, VR, LDVR )
+*
+*        Generate unitary matrix in VR
+*        (CWorkspace: need 2*N-1, prefer N+(N-1)*NB)
+*        (RWorkspace: none)
+*
+         CALL ZUNGHR( N, ILO, IHI, VR, LDVR, WORK( ITAU ), WORK( IWRK ),
+     $                LWORK-IWRK+1, IERR )
+*
+*        Perform QR iteration, accumulating Schur vectors in VR
+*        (CWorkspace: need 1, prefer HSWORK (see comments) )
+*        (RWorkspace: none)
+*
+         IWRK = ITAU
+         CALL ZHSEQR( 'S', 'V', N, ILO, IHI, A, LDA, W, VR, LDVR,
+     $                WORK( IWRK ), LWORK-IWRK+1, INFO )
+*
+      ELSE
+*
+*        Compute eigenvalues only
+*        If condition numbers desired, compute Schur form
+*
+         IF( WNTSNN ) THEN
+            JOB = 'E'
+         ELSE
+            JOB = 'S'
+         END IF
+*
+*        (CWorkspace: need 1, prefer HSWORK (see comments) )
+*        (RWorkspace: none)
+*
+         IWRK = ITAU
+         CALL ZHSEQR( JOB, 'N', N, ILO, IHI, A, LDA, W, VR, LDVR,
+     $                WORK( IWRK ), LWORK-IWRK+1, INFO )
+      END IF
+*
+*     If INFO > 0 from ZHSEQR, then quit
+*
+      IF( INFO.GT.0 )
+     $   GO TO 50
+*
+      IF( WANTVL .OR. WANTVR ) THEN
+*
+*        Compute left and/or right eigenvectors
+*        (CWorkspace: need 2*N)
+*        (RWorkspace: need N)
+*
+         CALL ZTREVC( SIDE, 'B', SELECT, N, A, LDA, VL, LDVL, VR, LDVR,
+     $                N, NOUT, WORK( IWRK ), RWORK, IERR )
+      END IF
+*
+*     Compute condition numbers if desired
+*     (CWorkspace: need N*N+2*N unless SENSE = 'E')
+*     (RWorkspace: need 2*N unless SENSE = 'E')
+*
+      IF( .NOT.WNTSNN ) THEN
+         CALL ZTRSNA( SENSE, 'A', SELECT, N, A, LDA, VL, LDVL, VR, LDVR,
+     $                RCONDE, RCONDV, N, NOUT, WORK( IWRK ), N, RWORK,
+     $                ICOND )
+      END IF
+*
+      IF( WANTVL ) THEN
+*
+*        Undo balancing of left eigenvectors
+*
+         CALL ZGEBAK( BALANC, 'L', N, ILO, IHI, SCALE, N, VL, LDVL,
+     $                IERR )
+*
+*        Normalize left eigenvectors and make largest component real
+*
+         DO 20 I = 1, N
+            SCL = ONE / DZNRM2( N, VL( 1, I ), 1 )
+            CALL ZDSCAL( N, SCL, VL( 1, I ), 1 )
+            DO 10 K = 1, N
+               RWORK( K ) = DBLE( VL( K, I ) )**2 +
+     $                      DIMAG( VL( K, I ) )**2
+   10       CONTINUE
+            K = IDAMAX( N, RWORK, 1 )
+            TMP = DCONJG( VL( K, I ) ) / SQRT( RWORK( K ) )
+            CALL ZSCAL( N, TMP, VL( 1, I ), 1 )
+            VL( K, I ) = DCMPLX( DBLE( VL( K, I ) ), ZERO )
+   20    CONTINUE
+      END IF
+*
+      IF( WANTVR ) THEN
+*
+*        Undo balancing of right eigenvectors
+*
+         CALL ZGEBAK( BALANC, 'R', N, ILO, IHI, SCALE, N, VR, LDVR,
+     $                IERR )
+*
+*        Normalize right eigenvectors and make largest component real
+*
+         DO 40 I = 1, N
+            SCL = ONE / DZNRM2( N, VR( 1, I ), 1 )
+            CALL ZDSCAL( N, SCL, VR( 1, I ), 1 )
+            DO 30 K = 1, N
+               RWORK( K ) = DBLE( VR( K, I ) )**2 +
+     $                      DIMAG( VR( K, I ) )**2
+   30       CONTINUE
+            K = IDAMAX( N, RWORK, 1 )
+            TMP = DCONJG( VR( K, I ) ) / SQRT( RWORK( K ) )
+            CALL ZSCAL( N, TMP, VR( 1, I ), 1 )
+            VR( K, I ) = DCMPLX( DBLE( VR( K, I ) ), ZERO )
+   40    CONTINUE
+      END IF
+*
+*     Undo scaling if necessary
+*
+   50 CONTINUE
+      IF( SCALEA ) THEN
+         CALL ZLASCL( 'G', 0, 0, CSCALE, ANRM, N-INFO, 1, W( INFO+1 ),
+     $                MAX( N-INFO, 1 ), IERR )
+         IF( INFO.EQ.0 ) THEN
+            IF( ( WNTSNV .OR. WNTSNB ) .AND. ICOND.EQ.0 )
+     $         CALL DLASCL( 'G', 0, 0, CSCALE, ANRM, N, 1, RCONDV, N,
+     $                      IERR )
+         ELSE
+            CALL ZLASCL( 'G', 0, 0, CSCALE, ANRM, ILO-1, 1, W, N, IERR )
+         END IF
+      END IF
+*
+      WORK( 1 ) = MAXWRK
+      RETURN
+*
+*     End of ZGEEVX
+*
+      END
diff --git a/costa/native/external/lapack/zgegs.f b/costa/native/external/lapack/zgegs.f
new file mode 100644
index 000000000..b5c53101a
--- /dev/null
+++ b/costa/native/external/lapack/zgegs.f
@@ -0,0 +1,442 @@
+      SUBROUTINE ZGEGS( JOBVSL, JOBVSR, N, A, LDA, B, LDB, ALPHA, BETA,
+     $                  VSL, LDVSL, VSR, LDVSR, WORK, LWORK, RWORK,
+     $                  INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBVSL, JOBVSR
+      INTEGER            INFO, LDA, LDB, LDVSL, LDVSR, LWORK, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   RWORK( * )
+      COMPLEX*16         A( LDA, * ), ALPHA( * ), B( LDB, * ),
+     $                   BETA( * ), VSL( LDVSL, * ), VSR( LDVSR, * ),
+     $                   WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  This routine is deprecated and has been replaced by routine ZGGES.
+*
+*  ZGEGS computes for a pair of N-by-N complex nonsymmetric matrices A,
+*  B:  the generalized eigenvalues (alpha, beta), the complex Schur
+*  form (A, B), and optionally left and/or right Schur vectors
+*  (VSL and VSR).
+*
+*  (If only the generalized eigenvalues are needed, use the driver ZGEGV
+*  instead.)
+*
+*  A generalized eigenvalue for a pair of matrices (A,B) is, roughly
+*  speaking, a scalar w or a ratio  alpha/beta = w, such that  A - w*B
+*  is singular.  It is usually represented as the pair (alpha,beta),
+*  as there is a reasonable interpretation for beta=0, and even for
+*  both being zero.  A good beginning reference is the book, "Matrix
+*  Computations", by G. Golub & C. van Loan (Johns Hopkins U. Press)
+*
+*  The (generalized) Schur form of a pair of matrices is the result of
+*  multiplying both matrices on the left by one unitary matrix and
+*  both on the right by another unitary matrix, these two unitary
+*  matrices being chosen so as to bring the pair of matrices into
+*  upper triangular form with the diagonal elements of B being
+*  non-negative real numbers (this is also called complex Schur form.)
+*
+*  The left and right Schur vectors are the columns of VSL and VSR,
+*  respectively, where VSL and VSR are the unitary matrices
+*  which reduce A and B to Schur form:
+*
+*  Schur form of (A,B) = ( (VSL)**H A (VSR), (VSL)**H B (VSR) )
+*
+*  Arguments
+*  =========
+*
+*  JOBVSL   (input) CHARACTER*1
+*          = 'N':  do not compute the left Schur vectors;
+*          = 'V':  compute the left Schur vectors.
+*
+*  JOBVSR   (input) CHARACTER*1
+*          = 'N':  do not compute the right Schur vectors;
+*          = 'V':  compute the right Schur vectors.
+*
+*  N       (input) INTEGER
+*          The order of the matrices A, B, VSL, and VSR.  N >= 0.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA, N)
+*          On entry, the first of the pair of matrices whose generalized
+*          eigenvalues and (optionally) Schur vectors are to be
+*          computed.
+*          On exit, the generalized Schur form of A.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of A.  LDA >= max(1,N).
+*
+*  B       (input/output) COMPLEX*16 array, dimension (LDB, N)
+*          On entry, the second of the pair of matrices whose
+*          generalized eigenvalues and (optionally) Schur vectors are
+*          to be computed.
+*          On exit, the generalized Schur form of B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of B.  LDB >= max(1,N).
+*
+*  ALPHA   (output) COMPLEX*16 array, dimension (N)
+*  BETA    (output) COMPLEX*16 array, dimension (N)
+*          On exit,  ALPHA(j)/BETA(j), j=1,...,N, will be the
+*          generalized eigenvalues.  ALPHA(j), j=1,...,N  and  BETA(j),
+*          j=1,...,N  are the diagonals of the complex Schur form (A,B)
+*          output by ZGEGS.  The  BETA(j) will be non-negative real.
+*
+*          Note: the quotients ALPHA(j)/BETA(j) may easily over- or
+*          underflow, and BETA(j) may even be zero.  Thus, the user
+*          should avoid naively computing the ratio alpha/beta.
+*          However, ALPHA will be always less than and usually
+*          comparable with norm(A) in magnitude, and BETA always less
+*          than and usually comparable with norm(B).
+*
+*  VSL     (output) COMPLEX*16 array, dimension (LDVSL,N)
+*          If JOBVSL = 'V', VSL will contain the left Schur vectors.
+*          (See "Purpose", above.)
+*          Not referenced if JOBVSL = 'N'.
+*
+*  LDVSL   (input) INTEGER
+*          The leading dimension of the matrix VSL. LDVSL >= 1, and
+*          if JOBVSL = 'V', LDVSL >= N.
+*
+*  VSR     (output) COMPLEX*16 array, dimension (LDVSR,N)
+*          If JOBVSR = 'V', VSR will contain the right Schur vectors.
+*          (See "Purpose", above.)
+*          Not referenced if JOBVSR = 'N'.
+*
+*  LDVSR   (input) INTEGER
+*          The leading dimension of the matrix VSR. LDVSR >= 1, and
+*          if JOBVSR = 'V', LDVSR >= N.
+*
+*  WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.  LWORK >= max(1,2*N).
+*          For good performance, LWORK must generally be larger.
+*          To compute the optimal value of LWORK, call ILAENV to get
+*          blocksizes (for ZGEQRF, ZUNMQR, and CUNGQR.)  Then compute:
+*          NB  -- MAX of the blocksizes for ZGEQRF, ZUNMQR, and CUNGQR;
+*          the optimal LWORK is N*(NB+1).
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  RWORK   (workspace) DOUBLE PRECISION array, dimension (3*N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          =1,...,N:
+*                The QZ iteration failed.  (A,B) are not in Schur
+*                form, but ALPHA(j) and BETA(j) should be correct for
+*                j=INFO+1,...,N.
+*          > N:  errors that usually indicate LAPACK problems:
+*                =N+1: error return from ZGGBAL
+*                =N+2: error return from ZGEQRF
+*                =N+3: error return from ZUNMQR
+*                =N+4: error return from ZUNGQR
+*                =N+5: error return from ZGGHRD
+*                =N+6: error return from ZHGEQZ (other than failed
+*                                               iteration)
+*                =N+7: error return from ZGGBAK (computing VSL)
+*                =N+8: error return from ZGGBAK (computing VSR)
+*                =N+9: error return from ZLASCL (various places)
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0 )
+      COMPLEX*16         CZERO, CONE
+      PARAMETER          ( CZERO = ( 0.0D0, 0.0D0 ),
+     $                   CONE = ( 1.0D0, 0.0D0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            ILASCL, ILBSCL, ILVSL, ILVSR, LQUERY
+      INTEGER            ICOLS, IHI, IINFO, IJOBVL, IJOBVR, ILEFT, ILO,
+     $                   IRIGHT, IROWS, IRWORK, ITAU, IWORK, LOPT,
+     $                   LWKMIN, LWKOPT, NB, NB1, NB2, NB3
+      DOUBLE PRECISION   ANRM, ANRMTO, BIGNUM, BNRM, BNRMTO, EPS,
+     $                   SAFMIN, SMLNUM
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZGEQRF, ZGGBAK, ZGGBAL, ZGGHRD, ZHGEQZ,
+     $                   ZLACPY, ZLASCL, ZLASET, ZUNGQR, ZUNMQR
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      DOUBLE PRECISION   DLAMCH, ZLANGE
+      EXTERNAL           LSAME, ILAENV, DLAMCH, ZLANGE
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          INT, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode the input arguments
+*
+      IF( LSAME( JOBVSL, 'N' ) ) THEN
+         IJOBVL = 1
+         ILVSL = .FALSE.
+      ELSE IF( LSAME( JOBVSL, 'V' ) ) THEN
+         IJOBVL = 2
+         ILVSL = .TRUE.
+      ELSE
+         IJOBVL = -1
+         ILVSL = .FALSE.
+      END IF
+*
+      IF( LSAME( JOBVSR, 'N' ) ) THEN
+         IJOBVR = 1
+         ILVSR = .FALSE.
+      ELSE IF( LSAME( JOBVSR, 'V' ) ) THEN
+         IJOBVR = 2
+         ILVSR = .TRUE.
+      ELSE
+         IJOBVR = -1
+         ILVSR = .FALSE.
+      END IF
+*
+*     Test the input arguments
+*
+      LWKMIN = MAX( 2*N, 1 )
+      LWKOPT = LWKMIN
+      WORK( 1 ) = LWKOPT
+      LQUERY = ( LWORK.EQ.-1 )
+      INFO = 0
+      IF( IJOBVL.LE.0 ) THEN
+         INFO = -1
+      ELSE IF( IJOBVR.LE.0 ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      ELSE IF( LDVSL.LT.1 .OR. ( ILVSL .AND. LDVSL.LT.N ) ) THEN
+         INFO = -11
+      ELSE IF( LDVSR.LT.1 .OR. ( ILVSR .AND. LDVSR.LT.N ) ) THEN
+         INFO = -13
+      ELSE IF( LWORK.LT.LWKMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -15
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         NB1 = ILAENV( 1, 'ZGEQRF', ' ', N, N, -1, -1 )
+         NB2 = ILAENV( 1, 'ZUNMQR', ' ', N, N, N, -1 )
+         NB3 = ILAENV( 1, 'ZUNGQR', ' ', N, N, N, -1 )
+         NB = MAX( NB1, NB2, NB3 )
+         LOPT = N*( NB+1 )
+         WORK( 1 ) = LOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZGEGS ', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Get machine constants
+*
+      EPS = DLAMCH( 'E' )*DLAMCH( 'B' )
+      SAFMIN = DLAMCH( 'S' )
+      SMLNUM = N*SAFMIN / EPS
+      BIGNUM = ONE / SMLNUM
+*
+*     Scale A if max element outside range [SMLNUM,BIGNUM]
+*
+      ANRM = ZLANGE( 'M', N, N, A, LDA, RWORK )
+      ILASCL = .FALSE.
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
+         ANRMTO = SMLNUM
+         ILASCL = .TRUE.
+      ELSE IF( ANRM.GT.BIGNUM ) THEN
+         ANRMTO = BIGNUM
+         ILASCL = .TRUE.
+      END IF
+*
+      IF( ILASCL ) THEN
+         CALL ZLASCL( 'G', -1, -1, ANRM, ANRMTO, N, N, A, LDA, IINFO )
+         IF( IINFO.NE.0 ) THEN
+            INFO = N + 9
+            RETURN
+         END IF
+      END IF
+*
+*     Scale B if max element outside range [SMLNUM,BIGNUM]
+*
+      BNRM = ZLANGE( 'M', N, N, B, LDB, RWORK )
+      ILBSCL = .FALSE.
+      IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN
+         BNRMTO = SMLNUM
+         ILBSCL = .TRUE.
+      ELSE IF( BNRM.GT.BIGNUM ) THEN
+         BNRMTO = BIGNUM
+         ILBSCL = .TRUE.
+      END IF
+*
+      IF( ILBSCL ) THEN
+         CALL ZLASCL( 'G', -1, -1, BNRM, BNRMTO, N, N, B, LDB, IINFO )
+         IF( IINFO.NE.0 ) THEN
+            INFO = N + 9
+            RETURN
+         END IF
+      END IF
+*
+*     Permute the matrix to make it more nearly triangular
+*
+      ILEFT = 1
+      IRIGHT = N + 1
+      IRWORK = IRIGHT + N
+      IWORK = 1
+      CALL ZGGBAL( 'P', N, A, LDA, B, LDB, ILO, IHI, RWORK( ILEFT ),
+     $             RWORK( IRIGHT ), RWORK( IRWORK ), IINFO )
+      IF( IINFO.NE.0 ) THEN
+         INFO = N + 1
+         GO TO 10
+      END IF
+*
+*     Reduce B to triangular form, and initialize VSL and/or VSR
+*
+      IROWS = IHI + 1 - ILO
+      ICOLS = N + 1 - ILO
+      ITAU = IWORK
+      IWORK = ITAU + IROWS
+      CALL ZGEQRF( IROWS, ICOLS, B( ILO, ILO ), LDB, WORK( ITAU ),
+     $             WORK( IWORK ), LWORK+1-IWORK, IINFO )
+      IF( IINFO.GE.0 )
+     $   LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 )
+      IF( IINFO.NE.0 ) THEN
+         INFO = N + 2
+         GO TO 10
+      END IF
+*
+      CALL ZUNMQR( 'L', 'C', IROWS, ICOLS, IROWS, B( ILO, ILO ), LDB,
+     $             WORK( ITAU ), A( ILO, ILO ), LDA, WORK( IWORK ),
+     $             LWORK+1-IWORK, IINFO )
+      IF( IINFO.GE.0 )
+     $   LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 )
+      IF( IINFO.NE.0 ) THEN
+         INFO = N + 3
+         GO TO 10
+      END IF
+*
+      IF( ILVSL ) THEN
+         CALL ZLASET( 'Full', N, N, CZERO, CONE, VSL, LDVSL )
+         CALL ZLACPY( 'L', IROWS-1, IROWS-1, B( ILO+1, ILO ), LDB,
+     $                VSL( ILO+1, ILO ), LDVSL )
+         CALL ZUNGQR( IROWS, IROWS, IROWS, VSL( ILO, ILO ), LDVSL,
+     $                WORK( ITAU ), WORK( IWORK ), LWORK+1-IWORK,
+     $                IINFO )
+         IF( IINFO.GE.0 )
+     $      LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 )
+         IF( IINFO.NE.0 ) THEN
+            INFO = N + 4
+            GO TO 10
+         END IF
+      END IF
+*
+      IF( ILVSR )
+     $   CALL ZLASET( 'Full', N, N, CZERO, CONE, VSR, LDVSR )
+*
+*     Reduce to generalized Hessenberg form
+*
+      CALL ZGGHRD( JOBVSL, JOBVSR, N, ILO, IHI, A, LDA, B, LDB, VSL,
+     $             LDVSL, VSR, LDVSR, IINFO )
+      IF( IINFO.NE.0 ) THEN
+         INFO = N + 5
+         GO TO 10
+      END IF
+*
+*     Perform QZ algorithm, computing Schur vectors if desired
+*
+      IWORK = ITAU
+      CALL ZHGEQZ( 'S', JOBVSL, JOBVSR, N, ILO, IHI, A, LDA, B, LDB,
+     $             ALPHA, BETA, VSL, LDVSL, VSR, LDVSR, WORK( IWORK ),
+     $             LWORK+1-IWORK, RWORK( IRWORK ), IINFO )
+      IF( IINFO.GE.0 )
+     $   LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 )
+      IF( IINFO.NE.0 ) THEN
+         IF( IINFO.GT.0 .AND. IINFO.LE.N ) THEN
+            INFO = IINFO
+         ELSE IF( IINFO.GT.N .AND. IINFO.LE.2*N ) THEN
+            INFO = IINFO - N
+         ELSE
+            INFO = N + 6
+         END IF
+         GO TO 10
+      END IF
+*
+*     Apply permutation to VSL and VSR
+*
+      IF( ILVSL ) THEN
+         CALL ZGGBAK( 'P', 'L', N, ILO, IHI, RWORK( ILEFT ),
+     $                RWORK( IRIGHT ), N, VSL, LDVSL, IINFO )
+         IF( IINFO.NE.0 ) THEN
+            INFO = N + 7
+            GO TO 10
+         END IF
+      END IF
+      IF( ILVSR ) THEN
+         CALL ZGGBAK( 'P', 'R', N, ILO, IHI, RWORK( ILEFT ),
+     $                RWORK( IRIGHT ), N, VSR, LDVSR, IINFO )
+         IF( IINFO.NE.0 ) THEN
+            INFO = N + 8
+            GO TO 10
+         END IF
+      END IF
+*
+*     Undo scaling
+*
+      IF( ILASCL ) THEN
+         CALL ZLASCL( 'U', -1, -1, ANRMTO, ANRM, N, N, A, LDA, IINFO )
+         IF( IINFO.NE.0 ) THEN
+            INFO = N + 9
+            RETURN
+         END IF
+         CALL ZLASCL( 'G', -1, -1, ANRMTO, ANRM, N, 1, ALPHA, N, IINFO )
+         IF( IINFO.NE.0 ) THEN
+            INFO = N + 9
+            RETURN
+         END IF
+      END IF
+*
+      IF( ILBSCL ) THEN
+         CALL ZLASCL( 'U', -1, -1, BNRMTO, BNRM, N, N, B, LDB, IINFO )
+         IF( IINFO.NE.0 ) THEN
+            INFO = N + 9
+            RETURN
+         END IF
+         CALL ZLASCL( 'G', -1, -1, BNRMTO, BNRM, N, 1, BETA, N, IINFO )
+         IF( IINFO.NE.0 ) THEN
+            INFO = N + 9
+            RETURN
+         END IF
+      END IF
+*
+   10 CONTINUE
+      WORK( 1 ) = LWKOPT
+*
+      RETURN
+*
+*     End of ZGEGS
+*
+      END
diff --git a/costa/native/external/lapack/zgegv.f b/costa/native/external/lapack/zgegv.f
new file mode 100644
index 000000000..ce0d56594
--- /dev/null
+++ b/costa/native/external/lapack/zgegv.f
@@ -0,0 +1,591 @@
+      SUBROUTINE ZGEGV( JOBVL, JOBVR, N, A, LDA, B, LDB, ALPHA, BETA,
+     $                  VL, LDVL, VR, LDVR, WORK, LWORK, RWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBVL, JOBVR
+      INTEGER            INFO, LDA, LDB, LDVL, LDVR, LWORK, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   RWORK( * )
+      COMPLEX*16         A( LDA, * ), ALPHA( * ), B( LDB, * ),
+     $                   BETA( * ), VL( LDVL, * ), VR( LDVR, * ),
+     $                   WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  This routine is deprecated and has been replaced by routine ZGGEV.
+*
+*  ZGEGV computes for a pair of N-by-N complex nonsymmetric matrices A
+*  and B, the generalized eigenvalues (alpha, beta), and optionally,
+*  the left and/or right generalized eigenvectors (VL and VR).
+*
+*  A generalized eigenvalue for a pair of matrices (A,B) is, roughly
+*  speaking, a scalar w or a ratio  alpha/beta = w, such that  A - w*B
+*  is singular.  It is usually represented as the pair (alpha,beta),
+*  as there is a reasonable interpretation for beta=0, and even for
+*  both being zero.  A good beginning reference is the book, "Matrix
+*  Computations", by G. Golub & C. van Loan (Johns Hopkins U. Press)
+*
+*  A right generalized eigenvector corresponding to a generalized
+*  eigenvalue  w  for a pair of matrices (A,B) is a vector  r  such
+*  that  (A - w B) r = 0 .  A left generalized eigenvector is a vector
+*  l such that l**H * (A - w B) = 0, where l**H is the
+*  conjugate-transpose of l.
+*
+*  Note: this routine performs "full balancing" on A and B -- see
+*  "Further Details", below.
+*
+*  Arguments
+*  =========
+*
+*  JOBVL   (input) CHARACTER*1
+*          = 'N':  do not compute the left generalized eigenvectors;
+*          = 'V':  compute the left generalized eigenvectors.
+*
+*  JOBVR   (input) CHARACTER*1
+*          = 'N':  do not compute the right generalized eigenvectors;
+*          = 'V':  compute the right generalized eigenvectors.
+*
+*  N       (input) INTEGER
+*          The order of the matrices A, B, VL, and VR.  N >= 0.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA, N)
+*          On entry, the first of the pair of matrices whose
+*          generalized eigenvalues and (optionally) generalized
+*          eigenvectors are to be computed.
+*          On exit, the contents will have been destroyed.  (For a
+*          description of the contents of A on exit, see "Further
+*          Details", below.)
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of A.  LDA >= max(1,N).
+*
+*  B       (input/output) COMPLEX*16 array, dimension (LDB, N)
+*          On entry, the second of the pair of matrices whose
+*          generalized eigenvalues and (optionally) generalized
+*          eigenvectors are to be computed.
+*          On exit, the contents will have been destroyed.  (For a
+*          description of the contents of B on exit, see "Further
+*          Details", below.)
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of B.  LDB >= max(1,N).
+*
+*  ALPHA   (output) COMPLEX*16 array, dimension (N)
+*  BETA    (output) COMPLEX*16 array, dimension (N)
+*          On exit, ALPHA(j)/BETA(j), j=1,...,N, will be the
+*          generalized eigenvalues.
+*
+*          Note: the quotients ALPHA(j)/BETA(j) may easily over- or
+*          underflow, and BETA(j) may even be zero.  Thus, the user
+*          should avoid naively computing the ratio alpha/beta.
+*          However, ALPHA will be always less than and usually
+*          comparable with norm(A) in magnitude, and BETA always less
+*          than and usually comparable with norm(B).
+*
+*  VL      (output) COMPLEX*16 array, dimension (LDVL,N)
+*          If JOBVL = 'V', the left generalized eigenvectors.  (See
+*          "Purpose", above.)
+*          Each eigenvector will be scaled so the largest component
+*          will have abs(real part) + abs(imag. part) = 1, *except*
+*          that for eigenvalues with alpha=beta=0, a zero vector will
+*          be returned as the corresponding eigenvector.
+*          Not referenced if JOBVL = 'N'.
+*
+*  LDVL    (input) INTEGER
+*          The leading dimension of the matrix VL. LDVL >= 1, and
+*          if JOBVL = 'V', LDVL >= N.
+*
+*  VR      (output) COMPLEX*16 array, dimension (LDVR,N)
+*          If JOBVR = 'V', the right generalized eigenvectors.  (See
+*          "Purpose", above.)
+*          Each eigenvector will be scaled so the largest component
+*          will have abs(real part) + abs(imag. part) = 1, *except*
+*          that for eigenvalues with alpha=beta=0, a zero vector will
+*          be returned as the corresponding eigenvector.
+*          Not referenced if JOBVR = 'N'.
+*
+*  LDVR    (input) INTEGER
+*          The leading dimension of the matrix VR. LDVR >= 1, and
+*          if JOBVR = 'V', LDVR >= N.
+*
+*  WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.  LWORK >= max(1,2*N).
+*          For good performance, LWORK must generally be larger.
+*          To compute the optimal value of LWORK, call ILAENV to get
+*          blocksizes (for ZGEQRF, ZUNMQR, and CUNGQR.)  Then compute:
+*          NB  -- MAX of the blocksizes for ZGEQRF, ZUNMQR, and CUNGQR;
+*          The optimal LWORK is  MAX( 2*N, N*(NB+1) ).
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  RWORK   (workspace/output) DOUBLE PRECISION array, dimension (8*N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          =1,...,N:
+*                The QZ iteration failed.  No eigenvectors have been
+*                calculated, but ALPHA(j) and BETA(j) should be
+*                correct for j=INFO+1,...,N.
+*          > N:  errors that usually indicate LAPACK problems:
+*                =N+1: error return from ZGGBAL
+*                =N+2: error return from ZGEQRF
+*                =N+3: error return from ZUNMQR
+*                =N+4: error return from ZUNGQR
+*                =N+5: error return from ZGGHRD
+*                =N+6: error return from ZHGEQZ (other than failed
+*                                               iteration)
+*                =N+7: error return from ZTGEVC
+*                =N+8: error return from ZGGBAK (computing VL)
+*                =N+9: error return from ZGGBAK (computing VR)
+*                =N+10: error return from ZLASCL (various calls)
+*
+*  Further Details
+*  ===============
+*
+*  Balancing
+*  ---------
+*
+*  This driver calls ZGGBAL to both permute and scale rows and columns
+*  of A and B.  The permutations PL and PR are chosen so that PL*A*PR
+*  and PL*B*R will be upper triangular except for the diagonal blocks
+*  A(i:j,i:j) and B(i:j,i:j), with i and j as close together as
+*  possible.  The diagonal scaling matrices DL and DR are chosen so
+*  that the pair  DL*PL*A*PR*DR, DL*PL*B*PR*DR have elements close to
+*  one (except for the elements that start out zero.)
+*
+*  After the eigenvalues and eigenvectors of the balanced matrices
+*  have been computed, ZGGBAK transforms the eigenvectors back to what
+*  they would have been (in perfect arithmetic) if they had not been
+*  balanced.
+*
+*  Contents of A and B on Exit
+*  -------- -- - --- - -- ----
+*
+*  If any eigenvectors are computed (either JOBVL='V' or JOBVR='V' or
+*  both), then on exit the arrays A and B will contain the complex Schur
+*  form[*] of the "balanced" versions of A and B.  If no eigenvectors
+*  are computed, then only the diagonal blocks will be correct.
+*
+*  [*] In other words, upper triangular form.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0 )
+      COMPLEX*16         CZERO, CONE
+      PARAMETER          ( CZERO = ( 0.0D0, 0.0D0 ),
+     $                   CONE = ( 1.0D0, 0.0D0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            ILIMIT, ILV, ILVL, ILVR, LQUERY
+      CHARACTER          CHTEMP
+      INTEGER            ICOLS, IHI, IINFO, IJOBVL, IJOBVR, ILEFT, ILO,
+     $                   IN, IRIGHT, IROWS, IRWORK, ITAU, IWORK, JC, JR,
+     $                   LOPT, LWKMIN, LWKOPT, NB, NB1, NB2, NB3
+      DOUBLE PRECISION   ABSAI, ABSAR, ABSB, ANRM, ANRM1, ANRM2, BNRM,
+     $                   BNRM1, BNRM2, EPS, SAFMAX, SAFMIN, SALFAI,
+     $                   SALFAR, SBETA, SCALE, TEMP
+      COMPLEX*16         X
+*     ..
+*     .. Local Arrays ..
+      LOGICAL            LDUMMA( 1 )
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZGEQRF, ZGGBAK, ZGGBAL, ZGGHRD, ZHGEQZ,
+     $                   ZLACPY, ZLASCL, ZLASET, ZTGEVC, ZUNGQR, ZUNMQR
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      DOUBLE PRECISION   DLAMCH, ZLANGE
+      EXTERNAL           LSAME, ILAENV, DLAMCH, ZLANGE
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, DCMPLX, DIMAG, INT, MAX
+*     ..
+*     .. Statement Functions ..
+      DOUBLE PRECISION   ABS1
+*     ..
+*     .. Statement Function definitions ..
+      ABS1( X ) = ABS( DBLE( X ) ) + ABS( DIMAG( X ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode the input arguments
+*
+      IF( LSAME( JOBVL, 'N' ) ) THEN
+         IJOBVL = 1
+         ILVL = .FALSE.
+      ELSE IF( LSAME( JOBVL, 'V' ) ) THEN
+         IJOBVL = 2
+         ILVL = .TRUE.
+      ELSE
+         IJOBVL = -1
+         ILVL = .FALSE.
+      END IF
+*
+      IF( LSAME( JOBVR, 'N' ) ) THEN
+         IJOBVR = 1
+         ILVR = .FALSE.
+      ELSE IF( LSAME( JOBVR, 'V' ) ) THEN
+         IJOBVR = 2
+         ILVR = .TRUE.
+      ELSE
+         IJOBVR = -1
+         ILVR = .FALSE.
+      END IF
+      ILV = ILVL .OR. ILVR
+*
+*     Test the input arguments
+*
+      LWKMIN = MAX( 2*N, 1 )
+      LWKOPT = LWKMIN
+      WORK( 1 ) = LWKOPT
+      LQUERY = ( LWORK.EQ.-1 )
+      INFO = 0
+      IF( IJOBVL.LE.0 ) THEN
+         INFO = -1
+      ELSE IF( IJOBVR.LE.0 ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      ELSE IF( LDVL.LT.1 .OR. ( ILVL .AND. LDVL.LT.N ) ) THEN
+         INFO = -11
+      ELSE IF( LDVR.LT.1 .OR. ( ILVR .AND. LDVR.LT.N ) ) THEN
+         INFO = -13
+      ELSE IF( LWORK.LT.LWKMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -15
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         NB1 = ILAENV( 1, 'ZGEQRF', ' ', N, N, -1, -1 )
+         NB2 = ILAENV( 1, 'ZUNMQR', ' ', N, N, N, -1 )
+         NB3 = ILAENV( 1, 'ZUNGQR', ' ', N, N, N, -1 )
+         NB = MAX( NB1, NB2, NB3 )
+         LOPT = MAX( 2*N, N*( NB+1 ) )
+         WORK( 1 ) = LOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZGEGV ', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Get machine constants
+*
+      EPS = DLAMCH( 'E' )*DLAMCH( 'B' )
+      SAFMIN = DLAMCH( 'S' )
+      SAFMIN = SAFMIN + SAFMIN
+      SAFMAX = ONE / SAFMIN
+*
+*     Scale A
+*
+      ANRM = ZLANGE( 'M', N, N, A, LDA, RWORK )
+      ANRM1 = ANRM
+      ANRM2 = ONE
+      IF( ANRM.LT.ONE ) THEN
+         IF( SAFMAX*ANRM.LT.ONE ) THEN
+            ANRM1 = SAFMIN
+            ANRM2 = SAFMAX*ANRM
+         END IF
+      END IF
+*
+      IF( ANRM.GT.ZERO ) THEN
+         CALL ZLASCL( 'G', -1, -1, ANRM, ONE, N, N, A, LDA, IINFO )
+         IF( IINFO.NE.0 ) THEN
+            INFO = N + 10
+            RETURN
+         END IF
+      END IF
+*
+*     Scale B
+*
+      BNRM = ZLANGE( 'M', N, N, B, LDB, RWORK )
+      BNRM1 = BNRM
+      BNRM2 = ONE
+      IF( BNRM.LT.ONE ) THEN
+         IF( SAFMAX*BNRM.LT.ONE ) THEN
+            BNRM1 = SAFMIN
+            BNRM2 = SAFMAX*BNRM
+         END IF
+      END IF
+*
+      IF( BNRM.GT.ZERO ) THEN
+         CALL ZLASCL( 'G', -1, -1, BNRM, ONE, N, N, B, LDB, IINFO )
+         IF( IINFO.NE.0 ) THEN
+            INFO = N + 10
+            RETURN
+         END IF
+      END IF
+*
+*     Permute the matrix to make it more nearly triangular
+*     Also "balance" the matrix.
+*
+      ILEFT = 1
+      IRIGHT = N + 1
+      IRWORK = IRIGHT + N
+      CALL ZGGBAL( 'P', N, A, LDA, B, LDB, ILO, IHI, RWORK( ILEFT ),
+     $             RWORK( IRIGHT ), RWORK( IRWORK ), IINFO )
+      IF( IINFO.NE.0 ) THEN
+         INFO = N + 1
+         GO TO 80
+      END IF
+*
+*     Reduce B to triangular form, and initialize VL and/or VR
+*
+      IROWS = IHI + 1 - ILO
+      IF( ILV ) THEN
+         ICOLS = N + 1 - ILO
+      ELSE
+         ICOLS = IROWS
+      END IF
+      ITAU = 1
+      IWORK = ITAU + IROWS
+      CALL ZGEQRF( IROWS, ICOLS, B( ILO, ILO ), LDB, WORK( ITAU ),
+     $             WORK( IWORK ), LWORK+1-IWORK, IINFO )
+      IF( IINFO.GE.0 )
+     $   LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 )
+      IF( IINFO.NE.0 ) THEN
+         INFO = N + 2
+         GO TO 80
+      END IF
+*
+      CALL ZUNMQR( 'L', 'C', IROWS, ICOLS, IROWS, B( ILO, ILO ), LDB,
+     $             WORK( ITAU ), A( ILO, ILO ), LDA, WORK( IWORK ),
+     $             LWORK+1-IWORK, IINFO )
+      IF( IINFO.GE.0 )
+     $   LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 )
+      IF( IINFO.NE.0 ) THEN
+         INFO = N + 3
+         GO TO 80
+      END IF
+*
+      IF( ILVL ) THEN
+         CALL ZLASET( 'Full', N, N, CZERO, CONE, VL, LDVL )
+         CALL ZLACPY( 'L', IROWS-1, IROWS-1, B( ILO+1, ILO ), LDB,
+     $                VL( ILO+1, ILO ), LDVL )
+         CALL ZUNGQR( IROWS, IROWS, IROWS, VL( ILO, ILO ), LDVL,
+     $                WORK( ITAU ), WORK( IWORK ), LWORK+1-IWORK,
+     $                IINFO )
+         IF( IINFO.GE.0 )
+     $      LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 )
+         IF( IINFO.NE.0 ) THEN
+            INFO = N + 4
+            GO TO 80
+         END IF
+      END IF
+*
+      IF( ILVR )
+     $   CALL ZLASET( 'Full', N, N, CZERO, CONE, VR, LDVR )
+*
+*     Reduce to generalized Hessenberg form
+*
+      IF( ILV ) THEN
+*
+*        Eigenvectors requested -- work on whole matrix.
+*
+         CALL ZGGHRD( JOBVL, JOBVR, N, ILO, IHI, A, LDA, B, LDB, VL,
+     $                LDVL, VR, LDVR, IINFO )
+      ELSE
+         CALL ZGGHRD( 'N', 'N', IROWS, 1, IROWS, A( ILO, ILO ), LDA,
+     $                B( ILO, ILO ), LDB, VL, LDVL, VR, LDVR, IINFO )
+      END IF
+      IF( IINFO.NE.0 ) THEN
+         INFO = N + 5
+         GO TO 80
+      END IF
+*
+*     Perform QZ algorithm
+*
+      IWORK = ITAU
+      IF( ILV ) THEN
+         CHTEMP = 'S'
+      ELSE
+         CHTEMP = 'E'
+      END IF
+      CALL ZHGEQZ( CHTEMP, JOBVL, JOBVR, N, ILO, IHI, A, LDA, B, LDB,
+     $             ALPHA, BETA, VL, LDVL, VR, LDVR, WORK( IWORK ),
+     $             LWORK+1-IWORK, RWORK( IRWORK ), IINFO )
+      IF( IINFO.GE.0 )
+     $   LWKOPT = MAX( LWKOPT, INT( WORK( IWORK ) )+IWORK-1 )
+      IF( IINFO.NE.0 ) THEN
+         IF( IINFO.GT.0 .AND. IINFO.LE.N ) THEN
+            INFO = IINFO
+         ELSE IF( IINFO.GT.N .AND. IINFO.LE.2*N ) THEN
+            INFO = IINFO - N
+         ELSE
+            INFO = N + 6
+         END IF
+         GO TO 80
+      END IF
+*
+      IF( ILV ) THEN
+*
+*        Compute Eigenvectors
+*
+         IF( ILVL ) THEN
+            IF( ILVR ) THEN
+               CHTEMP = 'B'
+            ELSE
+               CHTEMP = 'L'
+            END IF
+         ELSE
+            CHTEMP = 'R'
+         END IF
+*
+         CALL ZTGEVC( CHTEMP, 'B', LDUMMA, N, A, LDA, B, LDB, VL, LDVL,
+     $                VR, LDVR, N, IN, WORK( IWORK ), RWORK( IRWORK ),
+     $                IINFO )
+         IF( IINFO.NE.0 ) THEN
+            INFO = N + 7
+            GO TO 80
+         END IF
+*
+*        Undo balancing on VL and VR, rescale
+*
+         IF( ILVL ) THEN
+            CALL ZGGBAK( 'P', 'L', N, ILO, IHI, RWORK( ILEFT ),
+     $                   RWORK( IRIGHT ), N, VL, LDVL, IINFO )
+            IF( IINFO.NE.0 ) THEN
+               INFO = N + 8
+               GO TO 80
+            END IF
+            DO 30 JC = 1, N
+               TEMP = ZERO
+               DO 10 JR = 1, N
+                  TEMP = MAX( TEMP, ABS1( VL( JR, JC ) ) )
+   10          CONTINUE
+               IF( TEMP.LT.SAFMIN )
+     $            GO TO 30
+               TEMP = ONE / TEMP
+               DO 20 JR = 1, N
+                  VL( JR, JC ) = VL( JR, JC )*TEMP
+   20          CONTINUE
+   30       CONTINUE
+         END IF
+         IF( ILVR ) THEN
+            CALL ZGGBAK( 'P', 'R', N, ILO, IHI, RWORK( ILEFT ),
+     $                   RWORK( IRIGHT ), N, VR, LDVR, IINFO )
+            IF( IINFO.NE.0 ) THEN
+               INFO = N + 9
+               GO TO 80
+            END IF
+            DO 60 JC = 1, N
+               TEMP = ZERO
+               DO 40 JR = 1, N
+                  TEMP = MAX( TEMP, ABS1( VR( JR, JC ) ) )
+   40          CONTINUE
+               IF( TEMP.LT.SAFMIN )
+     $            GO TO 60
+               TEMP = ONE / TEMP
+               DO 50 JR = 1, N
+                  VR( JR, JC ) = VR( JR, JC )*TEMP
+   50          CONTINUE
+   60       CONTINUE
+         END IF
+*
+*        End of eigenvector calculation
+*
+      END IF
+*
+*     Undo scaling in alpha, beta
+*
+*     Note: this does not give the alpha and beta for the unscaled
+*     problem.
+*
+*     Un-scaling is limited to avoid underflow in alpha and beta
+*     if they are significant.
+*
+      DO 70 JC = 1, N
+         ABSAR = ABS( DBLE( ALPHA( JC ) ) )
+         ABSAI = ABS( DIMAG( ALPHA( JC ) ) )
+         ABSB = ABS( DBLE( BETA( JC ) ) )
+         SALFAR = ANRM*DBLE( ALPHA( JC ) )
+         SALFAI = ANRM*DIMAG( ALPHA( JC ) )
+         SBETA = BNRM*DBLE( BETA( JC ) )
+         ILIMIT = .FALSE.
+         SCALE = ONE
+*
+*        Check for significant underflow in imaginary part of ALPHA
+*
+         IF( ABS( SALFAI ).LT.SAFMIN .AND. ABSAI.GE.
+     $       MAX( SAFMIN, EPS*ABSAR, EPS*ABSB ) ) THEN
+            ILIMIT = .TRUE.
+            SCALE = ( SAFMIN / ANRM1 ) / MAX( SAFMIN, ANRM2*ABSAI )
+         END IF
+*
+*        Check for significant underflow in real part of ALPHA
+*
+         IF( ABS( SALFAR ).LT.SAFMIN .AND. ABSAR.GE.
+     $       MAX( SAFMIN, EPS*ABSAI, EPS*ABSB ) ) THEN
+            ILIMIT = .TRUE.
+            SCALE = MAX( SCALE, ( SAFMIN / ANRM1 ) /
+     $              MAX( SAFMIN, ANRM2*ABSAR ) )
+         END IF
+*
+*        Check for significant underflow in BETA
+*
+         IF( ABS( SBETA ).LT.SAFMIN .AND. ABSB.GE.
+     $       MAX( SAFMIN, EPS*ABSAR, EPS*ABSAI ) ) THEN
+            ILIMIT = .TRUE.
+            SCALE = MAX( SCALE, ( SAFMIN / BNRM1 ) /
+     $              MAX( SAFMIN, BNRM2*ABSB ) )
+         END IF
+*
+*        Check for possible overflow when limiting scaling
+*
+         IF( ILIMIT ) THEN
+            TEMP = ( SCALE*SAFMIN )*MAX( ABS( SALFAR ), ABS( SALFAI ),
+     $             ABS( SBETA ) )
+            IF( TEMP.GT.ONE )
+     $         SCALE = SCALE / TEMP
+            IF( SCALE.LT.ONE )
+     $         ILIMIT = .FALSE.
+         END IF
+*
+*        Recompute un-scaled ALPHA, BETA if necessary.
+*
+         IF( ILIMIT ) THEN
+            SALFAR = ( SCALE*DBLE( ALPHA( JC ) ) )*ANRM
+            SALFAI = ( SCALE*DIMAG( ALPHA( JC ) ) )*ANRM
+            SBETA = ( SCALE*BETA( JC ) )*BNRM
+         END IF
+         ALPHA( JC ) = DCMPLX( SALFAR, SALFAI )
+         BETA( JC ) = SBETA
+   70 CONTINUE
+*
+   80 CONTINUE
+      WORK( 1 ) = LWKOPT
+*
+      RETURN
+*
+*     End of ZGEGV
+*
+      END
diff --git a/costa/native/external/lapack/zgehd2.f b/costa/native/external/lapack/zgehd2.f
new file mode 100644
index 000000000..c2505b36d
--- /dev/null
+++ b/costa/native/external/lapack/zgehd2.f
@@ -0,0 +1,149 @@
+      SUBROUTINE ZGEHD2( N, ILO, IHI, A, LDA, TAU, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      INTEGER            IHI, ILO, INFO, LDA, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZGEHD2 reduces a complex general matrix A to upper Hessenberg form H
+*  by a unitary similarity transformation:  Q' * A * Q = H .
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  ILO     (input) INTEGER
+*  IHI     (input) INTEGER
+*          It is assumed that A is already upper triangular in rows
+*          and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally
+*          set by a previous call to ZGEBAL; otherwise they should be
+*          set to 1 and N respectively. See Further Details.
+*          1 <= ILO <= IHI <= max(1,N).
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+*          On entry, the n by n general matrix to be reduced.
+*          On exit, the upper triangle and the first subdiagonal of A
+*          are overwritten with the upper Hessenberg matrix H, and the
+*          elements below the first subdiagonal, with the array TAU,
+*          represent the unitary matrix Q as a product of elementary
+*          reflectors. See Further Details.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  TAU     (output) COMPLEX*16 array, dimension (N-1)
+*          The scalar factors of the elementary reflectors (see Further
+*          Details).
+*
+*  WORK    (workspace) COMPLEX*16 array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*
+*  Further Details
+*  ===============
+*
+*  The matrix Q is represented as a product of (ihi-ilo) elementary
+*  reflectors
+*
+*     Q = H(ilo) H(ilo+1) . . . H(ihi-1).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - tau * v * v'
+*
+*  where tau is a complex scalar, and v is a complex vector with
+*  v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on
+*  exit in A(i+2:ihi,i), and tau in TAU(i).
+*
+*  The contents of A are illustrated by the following example, with
+*  n = 7, ilo = 2 and ihi = 6:
+*
+*  on entry,                        on exit,
+*
+*  ( a   a   a   a   a   a   a )    (  a   a   h   h   h   h   a )
+*  (     a   a   a   a   a   a )    (      a   h   h   h   h   a )
+*  (     a   a   a   a   a   a )    (      h   h   h   h   h   h )
+*  (     a   a   a   a   a   a )    (      v2  h   h   h   h   h )
+*  (     a   a   a   a   a   a )    (      v2  v3  h   h   h   h )
+*  (     a   a   a   a   a   a )    (      v2  v3  v4  h   h   h )
+*  (                         a )    (                          a )
+*
+*  where a denotes an element of the original matrix A, h denotes a
+*  modified element of the upper Hessenberg matrix H, and vi denotes an
+*  element of the vector defining H(i).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX*16         ONE
+      PARAMETER          ( ONE = ( 1.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I
+      COMPLEX*16         ALPHA
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZLARF, ZLARFG
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          DCONJG, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters
+*
+      INFO = 0
+      IF( N.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, N ) ) THEN
+         INFO = -2
+      ELSE IF( IHI.LT.MIN( ILO, N ) .OR. IHI.GT.N ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZGEHD2', -INFO )
+         RETURN
+      END IF
+*
+      DO 10 I = ILO, IHI - 1
+*
+*        Compute elementary reflector H(i) to annihilate A(i+2:ihi,i)
+*
+         ALPHA = A( I+1, I )
+         CALL ZLARFG( IHI-I, ALPHA, A( MIN( I+2, N ), I ), 1, TAU( I ) )
+         A( I+1, I ) = ONE
+*
+*        Apply H(i) to A(1:ihi,i+1:ihi) from the right
+*
+         CALL ZLARF( 'Right', IHI, IHI-I, A( I+1, I ), 1, TAU( I ),
+     $               A( 1, I+1 ), LDA, WORK )
+*
+*        Apply H(i)' to A(i+1:ihi,i+1:n) from the left
+*
+         CALL ZLARF( 'Left', IHI-I, N-I, A( I+1, I ), 1,
+     $               DCONJG( TAU( I ) ), A( I+1, I+1 ), LDA, WORK )
+*
+         A( I+1, I ) = ALPHA
+   10 CONTINUE
+*
+      RETURN
+*
+*     End of ZGEHD2
+*
+      END
diff --git a/costa/native/external/lapack/zgehrd.f b/costa/native/external/lapack/zgehrd.f
new file mode 100644
index 000000000..d1c02bb20
--- /dev/null
+++ b/costa/native/external/lapack/zgehrd.f
@@ -0,0 +1,254 @@
+      SUBROUTINE ZGEHRD( N, ILO, IHI, A, LDA, TAU, WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            IHI, ILO, INFO, LDA, LWORK, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZGEHRD reduces a complex general matrix A to upper Hessenberg form H
+*  by a unitary similarity transformation:  Q' * A * Q = H .
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  ILO     (input) INTEGER
+*  IHI     (input) INTEGER
+*          It is assumed that A is already upper triangular in rows
+*          and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally
+*          set by a previous call to ZGEBAL; otherwise they should be
+*          set to 1 and N respectively. See Further Details.
+*          1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+*          On entry, the N-by-N general matrix to be reduced.
+*          On exit, the upper triangle and the first subdiagonal of A
+*          are overwritten with the upper Hessenberg matrix H, and the
+*          elements below the first subdiagonal, with the array TAU,
+*          represent the unitary matrix Q as a product of elementary
+*          reflectors. See Further Details.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  TAU     (output) COMPLEX*16 array, dimension (N-1)
+*          The scalar factors of the elementary reflectors (see Further
+*          Details). Elements 1:ILO-1 and IHI:N-1 of TAU are set to
+*          zero.
+*
+*  WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The length of the array WORK.  LWORK >= max(1,N).
+*          For optimum performance LWORK >= N*NB, where NB is the
+*          optimal blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*
+*  Further Details
+*  ===============
+*
+*  The matrix Q is represented as a product of (ihi-ilo) elementary
+*  reflectors
+*
+*     Q = H(ilo) H(ilo+1) . . . H(ihi-1).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - tau * v * v'
+*
+*  where tau is a complex scalar, and v is a complex vector with
+*  v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on
+*  exit in A(i+2:ihi,i), and tau in TAU(i).
+*
+*  The contents of A are illustrated by the following example, with
+*  n = 7, ilo = 2 and ihi = 6:
+*
+*  on entry,                        on exit,
+*
+*  ( a   a   a   a   a   a   a )    (  a   a   h   h   h   h   a )
+*  (     a   a   a   a   a   a )    (      a   h   h   h   h   a )
+*  (     a   a   a   a   a   a )    (      h   h   h   h   h   h )
+*  (     a   a   a   a   a   a )    (      v2  h   h   h   h   h )
+*  (     a   a   a   a   a   a )    (      v2  v3  h   h   h   h )
+*  (     a   a   a   a   a   a )    (      v2  v3  v4  h   h   h )
+*  (                         a )    (                          a )
+*
+*  where a denotes an element of the original matrix A, h denotes a
+*  modified element of the upper Hessenberg matrix H, and vi denotes an
+*  element of the vector defining H(i).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            NBMAX, LDT
+      PARAMETER          ( NBMAX = 64, LDT = NBMAX+1 )
+      COMPLEX*16         ZERO, ONE
+      PARAMETER          ( ZERO = ( 0.0D+0, 0.0D+0 ),
+     $                   ONE = ( 1.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            I, IB, IINFO, IWS, LDWORK, LWKOPT, NB, NBMIN,
+     $                   NH, NX
+      COMPLEX*16         EI
+*     ..
+*     .. Local Arrays ..
+      COMPLEX*16         T( LDT, NBMAX )
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZGEHD2, ZGEMM, ZLAHRD, ZLARFB
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters
+*
+      INFO = 0
+      NB = MIN( NBMAX, ILAENV( 1, 'ZGEHRD', ' ', N, ILO, IHI, -1 ) )
+      LWKOPT = N*NB
+      WORK( 1 ) = LWKOPT
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( N.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, N ) ) THEN
+         INFO = -2
+      ELSE IF( IHI.LT.MIN( ILO, N ) .OR. IHI.GT.N ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN
+         INFO = -8
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZGEHRD', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Set elements 1:ILO-1 and IHI:N-1 of TAU to zero
+*
+      DO 10 I = 1, ILO - 1
+         TAU( I ) = ZERO
+   10 CONTINUE
+      DO 20 I = MAX( 1, IHI ), N - 1
+         TAU( I ) = ZERO
+   20 CONTINUE
+*
+*     Quick return if possible
+*
+      NH = IHI - ILO + 1
+      IF( NH.LE.1 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      NBMIN = 2
+      IWS = 1
+      IF( NB.GT.1 .AND. NB.LT.NH ) THEN
+*
+*        Determine when to cross over from blocked to unblocked code
+*        (last block is always handled by unblocked code).
+*
+         NX = MAX( NB, ILAENV( 3, 'ZGEHRD', ' ', N, ILO, IHI, -1 ) )
+         IF( NX.LT.NH ) THEN
+*
+*           Determine if workspace is large enough for blocked code.
+*
+            IWS = N*NB
+            IF( LWORK.LT.IWS ) THEN
+*
+*              Not enough workspace to use optimal NB:  determine the
+*              minimum value of NB, and reduce NB or force use of
+*              unblocked code.
+*
+               NBMIN = MAX( 2, ILAENV( 2, 'ZGEHRD', ' ', N, ILO, IHI,
+     $                 -1 ) )
+               IF( LWORK.GE.N*NBMIN ) THEN
+                  NB = LWORK / N
+               ELSE
+                  NB = 1
+               END IF
+            END IF
+         END IF
+      END IF
+      LDWORK = N
+*
+      IF( NB.LT.NBMIN .OR. NB.GE.NH ) THEN
+*
+*        Use unblocked code below
+*
+         I = ILO
+*
+      ELSE
+*
+*        Use blocked code
+*
+         DO 30 I = ILO, IHI - 1 - NX, NB
+            IB = MIN( NB, IHI-I )
+*
+*           Reduce columns i:i+ib-1 to Hessenberg form, returning the
+*           matrices V and T of the block reflector H = I - V*T*V'
+*           which performs the reduction, and also the matrix Y = A*V*T
+*
+            CALL ZLAHRD( IHI, I, IB, A( 1, I ), LDA, TAU( I ), T, LDT,
+     $                   WORK, LDWORK )
+*
+*           Apply the block reflector H to A(1:ihi,i+ib:ihi) from the
+*           right, computing  A := A - Y * V'. V(i+ib,ib-1) must be set
+*           to 1.
+*
+            EI = A( I+IB, I+IB-1 )
+            A( I+IB, I+IB-1 ) = ONE
+            CALL ZGEMM( 'No transpose', 'Conjugate transpose', IHI,
+     $                  IHI-I-IB+1, IB, -ONE, WORK, LDWORK,
+     $                  A( I+IB, I ), LDA, ONE, A( 1, I+IB ), LDA )
+            A( I+IB, I+IB-1 ) = EI
+*
+*           Apply the block reflector H to A(i+1:ihi,i+ib:n) from the
+*           left
+*
+            CALL ZLARFB( 'Left', 'Conjugate transpose', 'Forward',
+     $                   'Columnwise', IHI-I, N-I-IB+1, IB, A( I+1, I ),
+     $                   LDA, T, LDT, A( I+1, I+IB ), LDA, WORK,
+     $                   LDWORK )
+   30    CONTINUE
+      END IF
+*
+*     Use unblocked code to reduce the rest of the matrix
+*
+      CALL ZGEHD2( N, I, IHI, A, LDA, TAU, WORK, IINFO )
+      WORK( 1 ) = IWS
+*
+      RETURN
+*
+*     End of ZGEHRD
+*
+      END
diff --git a/costa/native/external/lapack/zgelq2.f b/costa/native/external/lapack/zgelq2.f
new file mode 100644
index 000000000..fd516f373
--- /dev/null
+++ b/costa/native/external/lapack/zgelq2.f
@@ -0,0 +1,124 @@
+      SUBROUTINE ZGELQ2( M, N, A, LDA, TAU, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, M, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZGELQ2 computes an LQ factorization of a complex m by n matrix A:
+*  A = L * Q.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+*          On entry, the m by n matrix A.
+*          On exit, the elements on and below the diagonal of the array
+*          contain the m by min(m,n) lower trapezoidal matrix L (L is
+*          lower triangular if m <= n); the elements above the diagonal,
+*          with the array TAU, represent the unitary matrix Q as a
+*          product of elementary reflectors (see Further Details).
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  TAU     (output) COMPLEX*16 array, dimension (min(M,N))
+*          The scalar factors of the elementary reflectors (see Further
+*          Details).
+*
+*  WORK    (workspace) COMPLEX*16 array, dimension (M)
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*
+*  Further Details
+*  ===============
+*
+*  The matrix Q is represented as a product of elementary reflectors
+*
+*     Q = H(k)' . . . H(2)' H(1)', where k = min(m,n).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - tau * v * v'
+*
+*  where tau is a complex scalar, and v is a complex vector with
+*  v(1:i-1) = 0 and v(i) = 1; conjg(v(i+1:n)) is stored on exit in
+*  A(i,i+1:n), and tau in TAU(i).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX*16         ONE
+      PARAMETER          ( ONE = ( 1.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, K
+      COMPLEX*16         ALPHA
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZLACGV, ZLARF, ZLARFG
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZGELQ2', -INFO )
+         RETURN
+      END IF
+*
+      K = MIN( M, N )
+*
+      DO 10 I = 1, K
+*
+*        Generate elementary reflector H(i) to annihilate A(i,i+1:n)
+*
+         CALL ZLACGV( N-I+1, A( I, I ), LDA )
+         ALPHA = A( I, I )
+         CALL ZLARFG( N-I+1, ALPHA, A( I, MIN( I+1, N ) ), LDA,
+     $                TAU( I ) )
+         IF( I.LT.M ) THEN
+*
+*           Apply H(i) to A(i+1:m,i:n) from the right
+*
+            A( I, I ) = ONE
+            CALL ZLARF( 'Right', M-I, N-I+1, A( I, I ), LDA, TAU( I ),
+     $                  A( I+1, I ), LDA, WORK )
+         END IF
+         A( I, I ) = ALPHA
+         CALL ZLACGV( N-I+1, A( I, I ), LDA )
+   10 CONTINUE
+      RETURN
+*
+*     End of ZGELQ2
+*
+      END
diff --git a/costa/native/external/lapack/zgelqf.f b/costa/native/external/lapack/zgelqf.f
new file mode 100644
index 000000000..0ee3476be
--- /dev/null
+++ b/costa/native/external/lapack/zgelqf.f
@@ -0,0 +1,196 @@
+      SUBROUTINE ZGELQF( M, N, A, LDA, TAU, WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, LWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZGELQF computes an LQ factorization of a complex M-by-N matrix A:
+*  A = L * Q.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+*          On entry, the M-by-N matrix A.
+*          On exit, the elements on and below the diagonal of the array
+*          contain the m-by-min(m,n) lower trapezoidal matrix L (L is
+*          lower triangular if m <= n); the elements above the diagonal,
+*          with the array TAU, represent the unitary matrix Q as a
+*          product of elementary reflectors (see Further Details).
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  TAU     (output) COMPLEX*16 array, dimension (min(M,N))
+*          The scalar factors of the elementary reflectors (see Further
+*          Details).
+*
+*  WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.  LWORK >= max(1,M).
+*          For optimum performance LWORK >= M*NB, where NB is the
+*          optimal blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Further Details
+*  ===============
+*
+*  The matrix Q is represented as a product of elementary reflectors
+*
+*     Q = H(k)' . . . H(2)' H(1)', where k = min(m,n).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - tau * v * v'
+*
+*  where tau is a complex scalar, and v is a complex vector with
+*  v(1:i-1) = 0 and v(i) = 1; conjg(v(i+1:n)) is stored on exit in
+*  A(i,i+1:n), and tau in TAU(i).
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            I, IB, IINFO, IWS, K, LDWORK, LWKOPT, NB,
+     $                   NBMIN, NX
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZGELQ2, ZLARFB, ZLARFT
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      NB = ILAENV( 1, 'ZGELQF', ' ', M, N, -1, -1 )
+      LWKOPT = M*NB
+      WORK( 1 ) = LWKOPT
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -4
+      ELSE IF( LWORK.LT.MAX( 1, M ) .AND. .NOT.LQUERY ) THEN
+         INFO = -7
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZGELQF', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      K = MIN( M, N )
+      IF( K.EQ.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      NBMIN = 2
+      NX = 0
+      IWS = M
+      IF( NB.GT.1 .AND. NB.LT.K ) THEN
+*
+*        Determine when to cross over from blocked to unblocked code.
+*
+         NX = MAX( 0, ILAENV( 3, 'ZGELQF', ' ', M, N, -1, -1 ) )
+         IF( NX.LT.K ) THEN
+*
+*           Determine if workspace is large enough for blocked code.
+*
+            LDWORK = M
+            IWS = LDWORK*NB
+            IF( LWORK.LT.IWS ) THEN
+*
+*              Not enough workspace to use optimal NB:  reduce NB and
+*              determine the minimum value of NB.
+*
+               NB = LWORK / LDWORK
+               NBMIN = MAX( 2, ILAENV( 2, 'ZGELQF', ' ', M, N, -1,
+     $                 -1 ) )
+            END IF
+         END IF
+      END IF
+*
+      IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN
+*
+*        Use blocked code initially
+*
+         DO 10 I = 1, K - NX, NB
+            IB = MIN( K-I+1, NB )
+*
+*           Compute the LQ factorization of the current block
+*           A(i:i+ib-1,i:n)
+*
+            CALL ZGELQ2( IB, N-I+1, A( I, I ), LDA, TAU( I ), WORK,
+     $                   IINFO )
+            IF( I+IB.LE.M ) THEN
+*
+*              Form the triangular factor of the block reflector
+*              H = H(i) H(i+1) . . . H(i+ib-1)
+*
+               CALL ZLARFT( 'Forward', 'Rowwise', N-I+1, IB, A( I, I ),
+     $                      LDA, TAU( I ), WORK, LDWORK )
+*
+*              Apply H to A(i+ib:m,i:n) from the right
+*
+               CALL ZLARFB( 'Right', 'No transpose', 'Forward',
+     $                      'Rowwise', M-I-IB+1, N-I+1, IB, A( I, I ),
+     $                      LDA, WORK, LDWORK, A( I+IB, I ), LDA,
+     $                      WORK( IB+1 ), LDWORK )
+            END IF
+   10    CONTINUE
+      ELSE
+         I = 1
+      END IF
+*
+*     Use unblocked code to factor the last or only block.
+*
+      IF( I.LE.K )
+     $   CALL ZGELQ2( M-I+1, N-I+1, A( I, I ), LDA, TAU( I ), WORK,
+     $                IINFO )
+*
+      WORK( 1 ) = IWS
+      RETURN
+*
+*     End of ZGELQF
+*
+      END
diff --git a/costa/native/external/lapack/zgels.f b/costa/native/external/lapack/zgels.f
new file mode 100644
index 000000000..c3b56d630
--- /dev/null
+++ b/costa/native/external/lapack/zgels.f
@@ -0,0 +1,405 @@
+      SUBROUTINE ZGELS( TRANS, M, N, NRHS, A, LDA, B, LDB, WORK, LWORK,
+     $                  INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          TRANS
+      INTEGER            INFO, LDA, LDB, LWORK, M, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), B( LDB, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZGELS solves overdetermined or underdetermined complex linear systems
+*  involving an M-by-N matrix A, or its conjugate-transpose, using a QR
+*  or LQ factorization of A.  It is assumed that A has full rank.
+*
+*  The following options are provided:
+*
+*  1. If TRANS = 'N' and m >= n:  find the least squares solution of
+*     an overdetermined system, i.e., solve the least squares problem
+*                  minimize || B - A*X ||.
+*
+*  2. If TRANS = 'N' and m < n:  find the minimum norm solution of
+*     an underdetermined system A * X = B.
+*
+*  3. If TRANS = 'C' and m >= n:  find the minimum norm solution of
+*     an undetermined system A**H * X = B.
+*
+*  4. If TRANS = 'C' and m < n:  find the least squares solution of
+*     an overdetermined system, i.e., solve the least squares problem
+*                  minimize || B - A**H * X ||.
+*
+*  Several right hand side vectors b and solution vectors x can be
+*  handled in a single call; they are stored as the columns of the
+*  M-by-NRHS right hand side matrix B and the N-by-NRHS solution
+*  matrix X.
+*
+*  Arguments
+*  =========
+*
+*  TRANS   (input) CHARACTER
+*          = 'N': the linear system involves A;
+*          = 'C': the linear system involves A**H.
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of
+*          columns of the matrices B and X. NRHS >= 0.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+*          On entry, the M-by-N matrix A.
+*            if M >= N, A is overwritten by details of its QR
+*                       factorization as returned by ZGEQRF;
+*            if M <  N, A is overwritten by details of its LQ
+*                       factorization as returned by ZGELQF.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  B       (input/output) COMPLEX*16 array, dimension (LDB,NRHS)
+*          On entry, the matrix B of right hand side vectors, stored
+*          columnwise; B is M-by-NRHS if TRANS = 'N', or N-by-NRHS
+*          if TRANS = 'C'.
+*          On exit, B is overwritten by the solution vectors, stored
+*          columnwise:
+*          if TRANS = 'N' and m >= n, rows 1 to n of B contain the least
+*          squares solution vectors; the residual sum of squares for the
+*          solution in each column is given by the sum of squares of
+*          elements N+1 to M in that column;
+*          if TRANS = 'N' and m < n, rows 1 to N of B contain the
+*          minimum norm solution vectors;
+*          if TRANS = 'C' and m >= n, rows 1 to M of B contain the
+*          minimum norm solution vectors;
+*          if TRANS = 'C' and m < n, rows 1 to M of B contain the
+*          least squares solution vectors; the residual sum of squares
+*          for the solution in each column is given by the sum of
+*          squares of elements M+1 to N in that column.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B. LDB >= MAX(1,M,N).
+*
+*  WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.
+*          LWORK >= max( 1, MN + max( MN, NRHS ) ).
+*          For optimal performance,
+*          LWORK >= max( 1, MN + max( MN, NRHS )*NB ).
+*          where MN = min(M,N) and NB is the optimum block size.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+      COMPLEX*16         CZERO, CONE
+      PARAMETER          ( CZERO = ( 0.0D+0, 0.0D+0 ),
+     $                   CONE = ( 1.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY, TPSD
+      INTEGER            BROW, I, IASCL, IBSCL, J, MN, NB, SCLLEN, WSIZE
+      DOUBLE PRECISION   ANRM, BIGNUM, BNRM, SMLNUM
+*     ..
+*     .. Local Arrays ..
+      DOUBLE PRECISION   RWORK( 1 )
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      DOUBLE PRECISION   DLAMCH, ZLANGE
+      EXTERNAL           LSAME, ILAENV, DLAMCH, ZLANGE
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZGELQF, ZGEQRF, ZLASCL, ZLASET, ZTRSM,
+     $                   ZUNMLQ, ZUNMQR
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          DBLE, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments.
+*
+      INFO = 0
+      MN = MIN( M, N )
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( .NOT.( LSAME( TRANS, 'N' ) .OR. LSAME( TRANS, 'C' ) ) ) THEN
+         INFO = -1
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -6
+      ELSE IF( LDB.LT.MAX( 1, M, N ) ) THEN
+         INFO = -8
+      ELSE IF( LWORK.LT.MAX( 1, MN+MAX( MN, NRHS ) ) .AND. .NOT.LQUERY )
+     $          THEN
+         INFO = -10
+      END IF
+*
+*     Figure out optimal block size
+*
+      IF( INFO.EQ.0 .OR. INFO.EQ.-10 ) THEN
+*
+         TPSD = .TRUE.
+         IF( LSAME( TRANS, 'N' ) )
+     $      TPSD = .FALSE.
+*
+         IF( M.GE.N ) THEN
+            NB = ILAENV( 1, 'ZGEQRF', ' ', M, N, -1, -1 )
+            IF( TPSD ) THEN
+               NB = MAX( NB, ILAENV( 1, 'ZUNMQR', 'LN', M, NRHS, N,
+     $              -1 ) )
+            ELSE
+               NB = MAX( NB, ILAENV( 1, 'ZUNMQR', 'LC', M, NRHS, N,
+     $              -1 ) )
+            END IF
+         ELSE
+            NB = ILAENV( 1, 'ZGELQF', ' ', M, N, -1, -1 )
+            IF( TPSD ) THEN
+               NB = MAX( NB, ILAENV( 1, 'ZUNMLQ', 'LC', N, NRHS, M,
+     $              -1 ) )
+            ELSE
+               NB = MAX( NB, ILAENV( 1, 'ZUNMLQ', 'LN', N, NRHS, M,
+     $              -1 ) )
+            END IF
+         END IF
+*
+         WSIZE = MAX( 1, MN+MAX( MN, NRHS )*NB )
+         WORK( 1 ) = DBLE( WSIZE )
+*
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZGELS ', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( MIN( M, N, NRHS ).EQ.0 ) THEN
+         CALL ZLASET( 'Full', MAX( M, N ), NRHS, CZERO, CZERO, B, LDB )
+         RETURN
+      END IF
+*
+*     Get machine parameters
+*
+      SMLNUM = DLAMCH( 'S' ) / DLAMCH( 'P' )
+      BIGNUM = ONE / SMLNUM
+      CALL DLABAD( SMLNUM, BIGNUM )
+*
+*     Scale A, B if max element outside range [SMLNUM,BIGNUM]
+*
+      ANRM = ZLANGE( 'M', M, N, A, LDA, RWORK )
+      IASCL = 0
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
+*
+*        Scale matrix norm up to SMLNUM
+*
+         CALL ZLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO )
+         IASCL = 1
+      ELSE IF( ANRM.GT.BIGNUM ) THEN
+*
+*        Scale matrix norm down to BIGNUM
+*
+         CALL ZLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO )
+         IASCL = 2
+      ELSE IF( ANRM.EQ.ZERO ) THEN
+*
+*        Matrix all zero. Return zero solution.
+*
+         CALL ZLASET( 'F', MAX( M, N ), NRHS, CZERO, CZERO, B, LDB )
+         GO TO 50
+      END IF
+*
+      BROW = M
+      IF( TPSD )
+     $   BROW = N
+      BNRM = ZLANGE( 'M', BROW, NRHS, B, LDB, RWORK )
+      IBSCL = 0
+      IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN
+*
+*        Scale matrix norm up to SMLNUM
+*
+         CALL ZLASCL( 'G', 0, 0, BNRM, SMLNUM, BROW, NRHS, B, LDB,
+     $                INFO )
+         IBSCL = 1
+      ELSE IF( BNRM.GT.BIGNUM ) THEN
+*
+*        Scale matrix norm down to BIGNUM
+*
+         CALL ZLASCL( 'G', 0, 0, BNRM, BIGNUM, BROW, NRHS, B, LDB,
+     $                INFO )
+         IBSCL = 2
+      END IF
+*
+      IF( M.GE.N ) THEN
+*
+*        compute QR factorization of A
+*
+         CALL ZGEQRF( M, N, A, LDA, WORK( 1 ), WORK( MN+1 ), LWORK-MN,
+     $                INFO )
+*
+*        workspace at least N, optimally N*NB
+*
+         IF( .NOT.TPSD ) THEN
+*
+*           Least-Squares Problem min || A * X - B ||
+*
+*           B(1:M,1:NRHS) := Q' * B(1:M,1:NRHS)
+*
+            CALL ZUNMQR( 'Left', 'Conjugate transpose', M, NRHS, N, A,
+     $                   LDA, WORK( 1 ), B, LDB, WORK( MN+1 ), LWORK-MN,
+     $                   INFO )
+*
+*           workspace at least NRHS, optimally NRHS*NB
+*
+*           B(1:N,1:NRHS) := inv(R) * B(1:N,1:NRHS)
+*
+            CALL ZTRSM( 'Left', 'Upper', 'No transpose', 'Non-unit', N,
+     $                  NRHS, CONE, A, LDA, B, LDB )
+*
+            SCLLEN = N
+*
+         ELSE
+*
+*           Overdetermined system of equations A' * X = B
+*
+*           B(1:N,1:NRHS) := inv(R') * B(1:N,1:NRHS)
+*
+            CALL ZTRSM( 'Left', 'Upper', 'Conjugate transpose',
+     $                  'Non-unit', N, NRHS, CONE, A, LDA, B, LDB )
+*
+*           B(N+1:M,1:NRHS) = ZERO
+*
+            DO 20 J = 1, NRHS
+               DO 10 I = N + 1, M
+                  B( I, J ) = CZERO
+   10          CONTINUE
+   20       CONTINUE
+*
+*           B(1:M,1:NRHS) := Q(1:N,:) * B(1:N,1:NRHS)
+*
+            CALL ZUNMQR( 'Left', 'No transpose', M, NRHS, N, A, LDA,
+     $                   WORK( 1 ), B, LDB, WORK( MN+1 ), LWORK-MN,
+     $                   INFO )
+*
+*           workspace at least NRHS, optimally NRHS*NB
+*
+            SCLLEN = M
+*
+         END IF
+*
+      ELSE
+*
+*        Compute LQ factorization of A
+*
+         CALL ZGELQF( M, N, A, LDA, WORK( 1 ), WORK( MN+1 ), LWORK-MN,
+     $                INFO )
+*
+*        workspace at least M, optimally M*NB.
+*
+         IF( .NOT.TPSD ) THEN
+*
+*           underdetermined system of equations A * X = B
+*
+*           B(1:M,1:NRHS) := inv(L) * B(1:M,1:NRHS)
+*
+            CALL ZTRSM( 'Left', 'Lower', 'No transpose', 'Non-unit', M,
+     $                  NRHS, CONE, A, LDA, B, LDB )
+*
+*           B(M+1:N,1:NRHS) = 0
+*
+            DO 40 J = 1, NRHS
+               DO 30 I = M + 1, N
+                  B( I, J ) = CZERO
+   30          CONTINUE
+   40       CONTINUE
+*
+*           B(1:N,1:NRHS) := Q(1:N,:)' * B(1:M,1:NRHS)
+*
+            CALL ZUNMLQ( 'Left', 'Conjugate transpose', N, NRHS, M, A,
+     $                   LDA, WORK( 1 ), B, LDB, WORK( MN+1 ), LWORK-MN,
+     $                   INFO )
+*
+*           workspace at least NRHS, optimally NRHS*NB
+*
+            SCLLEN = N
+*
+         ELSE
+*
+*           overdetermined system min || A' * X - B ||
+*
+*           B(1:N,1:NRHS) := Q * B(1:N,1:NRHS)
+*
+            CALL ZUNMLQ( 'Left', 'No transpose', N, NRHS, M, A, LDA,
+     $                   WORK( 1 ), B, LDB, WORK( MN+1 ), LWORK-MN,
+     $                   INFO )
+*
+*           workspace at least NRHS, optimally NRHS*NB
+*
+*           B(1:M,1:NRHS) := inv(L') * B(1:M,1:NRHS)
+*
+            CALL ZTRSM( 'Left', 'Lower', 'Conjugate transpose',
+     $                  'Non-unit', M, NRHS, CONE, A, LDA, B, LDB )
+*
+            SCLLEN = M
+*
+         END IF
+*
+      END IF
+*
+*     Undo scaling
+*
+      IF( IASCL.EQ.1 ) THEN
+         CALL ZLASCL( 'G', 0, 0, ANRM, SMLNUM, SCLLEN, NRHS, B, LDB,
+     $                INFO )
+      ELSE IF( IASCL.EQ.2 ) THEN
+         CALL ZLASCL( 'G', 0, 0, ANRM, BIGNUM, SCLLEN, NRHS, B, LDB,
+     $                INFO )
+      END IF
+      IF( IBSCL.EQ.1 ) THEN
+         CALL ZLASCL( 'G', 0, 0, SMLNUM, BNRM, SCLLEN, NRHS, B, LDB,
+     $                INFO )
+      ELSE IF( IBSCL.EQ.2 ) THEN
+         CALL ZLASCL( 'G', 0, 0, BIGNUM, BNRM, SCLLEN, NRHS, B, LDB,
+     $                INFO )
+      END IF
+*
+   50 CONTINUE
+      WORK( 1 ) = DBLE( WSIZE )
+*
+      RETURN
+*
+*     End of ZGELS
+*
+      END
diff --git a/costa/native/external/lapack/zgelsd.f b/costa/native/external/lapack/zgelsd.f
new file mode 100644
index 000000000..890964a74
--- /dev/null
+++ b/costa/native/external/lapack/zgelsd.f
@@ -0,0 +1,545 @@
+      SUBROUTINE ZGELSD( M, N, NRHS, A, LDA, B, LDB, S, RCOND, RANK,
+     $                   WORK, LWORK, RWORK, IWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, LDB, LWORK, M, N, NRHS, RANK
+      DOUBLE PRECISION   RCOND
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      DOUBLE PRECISION   RWORK( * ), S( * )
+      COMPLEX*16         A( LDA, * ), B( LDB, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZGELSD computes the minimum-norm solution to a real linear least
+*  squares problem:
+*      minimize 2-norm(| b - A*x |)
+*  using the singular value decomposition (SVD) of A. A is an M-by-N
+*  matrix which may be rank-deficient.
+*
+*  Several right hand side vectors b and solution vectors x can be
+*  handled in a single call; they are stored as the columns of the
+*  M-by-NRHS right hand side matrix B and the N-by-NRHS solution
+*  matrix X.
+*
+*  The problem is solved in three steps:
+*  (1) Reduce the coefficient matrix A to bidiagonal form with
+*      Householder tranformations, reducing the original problem
+*      into a "bidiagonal least squares problem" (BLS)
+*  (2) Solve the BLS using a divide and conquer approach.
+*  (3) Apply back all the Householder tranformations to solve
+*      the original least squares problem.
+*
+*  The effective rank of A is determined by treating as zero those
+*  singular values which are less than RCOND times the largest singular
+*  value.
+*
+*  The divide and conquer algorithm makes very mild assumptions about
+*  floating point arithmetic. It will work on machines with a guard
+*  digit in add/subtract, or on those binary machines without guard
+*  digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
+*  Cray-2. It could conceivably fail on hexadecimal or decimal machines
+*  without guard digits, but we know of none.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A. N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrices B and X. NRHS >= 0.
+*
+*  A       (input) COMPLEX*16 array, dimension (LDA,N)
+*          On entry, the M-by-N matrix A.
+*          On exit, A has been destroyed.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,M).
+*
+*  B       (input/output) COMPLEX*16 array, dimension (LDB,NRHS)
+*          On entry, the M-by-NRHS right hand side matrix B.
+*          On exit, B is overwritten by the N-by-NRHS solution matrix X.
+*          If m >= n and RANK = n, the residual sum-of-squares for
+*          the solution in the i-th column is given by the sum of
+*          squares of elements n+1:m in that column.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,M,N).
+*
+*  S       (output) DOUBLE PRECISION array, dimension (min(M,N))
+*          The singular values of A in decreasing order.
+*          The condition number of A in the 2-norm = S(1)/S(min(m,n)).
+*
+*  RCOND   (input) DOUBLE PRECISION
+*          RCOND is used to determine the effective rank of A.
+*          Singular values S(i) <= RCOND*S(1) are treated as zero.
+*          If RCOND < 0, machine precision is used instead.
+*
+*  RANK    (output) INTEGER
+*          The effective rank of A, i.e., the number of singular values
+*          which are greater than RCOND*S(1).
+*
+*  WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK. LWORK must be at least 1.
+*          The exact minimum amount of workspace needed depends on M,
+*          N and NRHS. As long as LWORK is at least
+*              2 * N + N * NRHS
+*          if M is greater than or equal to N or
+*              2 * M + M * NRHS
+*          if M is less than N, the code will execute correctly.
+*          For good performance, LWORK should generally be larger.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  RWORK   (workspace) DOUBLE PRECISION array, dimension at least
+*             10*N + 2*N*SMLSIZ + 8*N*NLVL + 3*SMLSIZ*NRHS +
+*             (SMLSIZ+1)**2
+*          if M is greater than or equal to N or
+*             10*M + 2*M*SMLSIZ + 8*M*NLVL + 3*SMLSIZ*NRHS +
+*             (SMLSIZ+1)**2
+*          if M is less than N, the code will execute correctly.
+*          SMLSIZ is returned by ILAENV and is equal to the maximum
+*          size of the subproblems at the bottom of the computation
+*          tree (usually about 25), and
+*             NLVL = MAX( 0, INT( LOG_2( MIN( M,N )/(SMLSIZ+1) ) ) + 1 )
+*
+*  IWORK   (workspace) INTEGER array, dimension (LIWORK)
+*          LIWORK >= 3 * MINMN * NLVL + 11 * MINMN,
+*          where MINMN = MIN( M,N ).
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value.
+*          > 0:  the algorithm for computing the SVD failed to converge;
+*                if INFO = i, i off-diagonal elements of an intermediate
+*                bidiagonal form did not converge to zero.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Ming Gu and Ren-Cang Li, Computer Science Division, University of
+*       California at Berkeley, USA
+*     Osni Marques, LBNL/NERSC, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+      COMPLEX*16         CZERO
+      PARAMETER          ( CZERO = ( 0.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            IASCL, IBSCL, IE, IL, ITAU, ITAUP, ITAUQ,
+     $                   LDWORK, MAXMN, MAXWRK, MINMN, MINWRK, MM,
+     $                   MNTHR, NRWORK, NWORK, SMLSIZ
+      DOUBLE PRECISION   ANRM, BIGNUM, BNRM, EPS, SFMIN, SMLNUM
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLABAD, DLASCL, DLASET, XERBLA, ZGEBRD, ZGELQF,
+     $                   ZGEQRF, ZLACPY, ZLALSD, ZLASCL, ZLASET, ZUNMBR,
+     $                   ZUNMLQ, ZUNMQR
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      DOUBLE PRECISION   DLAMCH, ZLANGE
+      EXTERNAL           ILAENV, DLAMCH, ZLANGE
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          DCMPLX, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments.
+*
+      INFO = 0
+      MINMN = MIN( M, N )
+      MAXMN = MAX( M, N )
+      MNTHR = ILAENV( 6, 'ZGELSD', ' ', M, N, NRHS, -1 )
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, MAXMN ) ) THEN
+         INFO = -7
+      END IF
+*
+      SMLSIZ = ILAENV( 9, 'ZGELSD', ' ', 0, 0, 0, 0 )
+*
+*     Compute workspace.
+*     (Note: Comments in the code beginning "Workspace:" describe the
+*     minimal amount of workspace needed at that point in the code,
+*     as well as the preferred amount for good performance.
+*     NB refers to the optimal block size for the immediately
+*     following subroutine, as returned by ILAENV.)
+*
+      MINWRK = 1
+      IF( INFO.EQ.0 ) THEN
+         MAXWRK = 0
+         MM = M
+         IF( M.GE.N .AND. M.GE.MNTHR ) THEN
+*
+*           Path 1a - overdetermined, with many more rows than columns.
+*
+            MM = N
+            MAXWRK = MAX( MAXWRK, N*ILAENV( 1, 'ZGEQRF', ' ', M, N, -1,
+     $               -1 ) )
+            MAXWRK = MAX( MAXWRK, NRHS*ILAENV( 1, 'ZUNMQR', 'LC', M,
+     $               NRHS, N, -1 ) )
+         END IF
+         IF( M.GE.N ) THEN
+*
+*           Path 1 - overdetermined or exactly determined.
+*
+            MAXWRK = MAX( MAXWRK, 2*N+( MM+N )*
+     $               ILAENV( 1, 'ZGEBRD', ' ', MM, N, -1, -1 ) )
+            MAXWRK = MAX( MAXWRK, 2*N+NRHS*
+     $               ILAENV( 1, 'ZUNMBR', 'QLC', MM, NRHS, N, -1 ) )
+            MAXWRK = MAX( MAXWRK, 2*N+( N-1 )*
+     $               ILAENV( 1, 'ZUNMBR', 'PLN', N, NRHS, N, -1 ) )
+            MAXWRK = MAX( MAXWRK, 2*N+N*NRHS )
+            MINWRK = MAX( 2*N+MM, 2*N+N*NRHS )
+         END IF
+         IF( N.GT.M ) THEN
+            IF( N.GE.MNTHR ) THEN
+*
+*              Path 2a - underdetermined, with many more columns
+*              than rows.
+*
+               MAXWRK = M + M*ILAENV( 1, 'ZGELQF', ' ', M, N, -1, -1 )
+               MAXWRK = MAX( MAXWRK, M*M+4*M+2*M*
+     $                  ILAENV( 1, 'ZGEBRD', ' ', M, M, -1, -1 ) )
+               MAXWRK = MAX( MAXWRK, M*M+4*M+NRHS*
+     $                  ILAENV( 1, 'ZUNMBR', 'QLC', M, NRHS, M, -1 ) )
+               MAXWRK = MAX( MAXWRK, M*M+4*M+( M-1 )*
+     $                  ILAENV( 1, 'ZUNMLQ', 'LC', N, NRHS, M, -1 ) )
+               IF( NRHS.GT.1 ) THEN
+                  MAXWRK = MAX( MAXWRK, M*M+M+M*NRHS )
+               ELSE
+                  MAXWRK = MAX( MAXWRK, M*M+2*M )
+               END IF
+               MAXWRK = MAX( MAXWRK, M*M+4*M+M*NRHS )
+            ELSE
+*
+*              Path 2 - underdetermined.
+*
+               MAXWRK = 2*M + ( N+M )*ILAENV( 1, 'ZGEBRD', ' ', M, N,
+     $                  -1, -1 )
+               MAXWRK = MAX( MAXWRK, 2*M+NRHS*
+     $                  ILAENV( 1, 'ZUNMBR', 'QLC', M, NRHS, M, -1 ) )
+               MAXWRK = MAX( MAXWRK, 2*M+M*
+     $                  ILAENV( 1, 'ZUNMBR', 'PLN', N, NRHS, M, -1 ) )
+               MAXWRK = MAX( MAXWRK, 2*M+M*NRHS )
+            END IF
+            MINWRK = MAX( 2*M+N, 2*M+M*NRHS )
+         END IF
+         MINWRK = MIN( MINWRK, MAXWRK )
+         WORK( 1 ) = DCMPLX( MAXWRK, 0 )
+         IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN
+            INFO = -12
+         END IF
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZGELSD', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         GO TO 10
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( M.EQ.0 .OR. N.EQ.0 ) THEN
+         RANK = 0
+         RETURN
+      END IF
+*
+*     Get machine parameters.
+*
+      EPS = DLAMCH( 'P' )
+      SFMIN = DLAMCH( 'S' )
+      SMLNUM = SFMIN / EPS
+      BIGNUM = ONE / SMLNUM
+      CALL DLABAD( SMLNUM, BIGNUM )
+*
+*     Scale A if max entry outside range [SMLNUM,BIGNUM].
+*
+      ANRM = ZLANGE( 'M', M, N, A, LDA, RWORK )
+      IASCL = 0
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
+*
+*        Scale matrix norm up to SMLNUM
+*
+         CALL ZLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO )
+         IASCL = 1
+      ELSE IF( ANRM.GT.BIGNUM ) THEN
+*
+*        Scale matrix norm down to BIGNUM.
+*
+         CALL ZLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO )
+         IASCL = 2
+      ELSE IF( ANRM.EQ.ZERO ) THEN
+*
+*        Matrix all zero. Return zero solution.
+*
+         CALL ZLASET( 'F', MAX( M, N ), NRHS, CZERO, CZERO, B, LDB )
+         CALL DLASET( 'F', MINMN, 1, ZERO, ZERO, S, 1 )
+         RANK = 0
+         GO TO 10
+      END IF
+*
+*     Scale B if max entry outside range [SMLNUM,BIGNUM].
+*
+      BNRM = ZLANGE( 'M', M, NRHS, B, LDB, RWORK )
+      IBSCL = 0
+      IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN
+*
+*        Scale matrix norm up to SMLNUM.
+*
+         CALL ZLASCL( 'G', 0, 0, BNRM, SMLNUM, M, NRHS, B, LDB, INFO )
+         IBSCL = 1
+      ELSE IF( BNRM.GT.BIGNUM ) THEN
+*
+*        Scale matrix norm down to BIGNUM.
+*
+         CALL ZLASCL( 'G', 0, 0, BNRM, BIGNUM, M, NRHS, B, LDB, INFO )
+         IBSCL = 2
+      END IF
+*
+*     If M < N make sure B(M+1:N,:) = 0
+*
+      IF( M.LT.N )
+     $   CALL ZLASET( 'F', N-M, NRHS, CZERO, CZERO, B( M+1, 1 ), LDB )
+*
+*     Overdetermined case.
+*
+      IF( M.GE.N ) THEN
+*
+*        Path 1 - overdetermined or exactly determined.
+*
+         MM = M
+         IF( M.GE.MNTHR ) THEN
+*
+*           Path 1a - overdetermined, with many more rows than columns
+*
+            MM = N
+            ITAU = 1
+            NWORK = ITAU + N
+*
+*           Compute A=Q*R.
+*           (RWorkspace: need N)
+*           (CWorkspace: need N, prefer N*NB)
+*
+            CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ),
+     $                   LWORK-NWORK+1, INFO )
+*
+*           Multiply B by transpose(Q).
+*           (RWorkspace: need N)
+*           (CWorkspace: need NRHS, prefer NRHS*NB)
+*
+            CALL ZUNMQR( 'L', 'C', M, NRHS, N, A, LDA, WORK( ITAU ), B,
+     $                   LDB, WORK( NWORK ), LWORK-NWORK+1, INFO )
+*
+*           Zero out below R.
+*
+            IF( N.GT.1 ) THEN
+               CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO, A( 2, 1 ),
+     $                      LDA )
+            END IF
+         END IF
+*
+         ITAUQ = 1
+         ITAUP = ITAUQ + N
+         NWORK = ITAUP + N
+         IE = 1
+         NRWORK = IE + N
+*
+*        Bidiagonalize R in A.
+*        (RWorkspace: need N)
+*        (CWorkspace: need 2*N+MM, prefer 2*N+(MM+N)*NB)
+*
+         CALL ZGEBRD( MM, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ),
+     $                WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1,
+     $                INFO )
+*
+*        Multiply B by transpose of left bidiagonalizing vectors of R.
+*        (CWorkspace: need 2*N+NRHS, prefer 2*N+NRHS*NB)
+*
+         CALL ZUNMBR( 'Q', 'L', 'C', MM, NRHS, N, A, LDA, WORK( ITAUQ ),
+     $                B, LDB, WORK( NWORK ), LWORK-NWORK+1, INFO )
+*
+*        Solve the bidiagonal least squares problem.
+*
+         CALL ZLALSD( 'U', SMLSIZ, N, NRHS, S, RWORK( IE ), B, LDB,
+     $                RCOND, RANK, WORK( NWORK ), RWORK( NRWORK ),
+     $                IWORK, INFO )
+         IF( INFO.NE.0 ) THEN
+            GO TO 10
+         END IF
+*
+*        Multiply B by right bidiagonalizing vectors of R.
+*
+         CALL ZUNMBR( 'P', 'L', 'N', N, NRHS, N, A, LDA, WORK( ITAUP ),
+     $                B, LDB, WORK( NWORK ), LWORK-NWORK+1, INFO )
+*
+      ELSE IF( N.GE.MNTHR .AND. LWORK.GE.4*M+M*M+
+     $         MAX( M, 2*M-4, NRHS, N-3*M ) ) THEN
+*
+*        Path 2a - underdetermined, with many more columns than rows
+*        and sufficient workspace for an efficient algorithm.
+*
+         LDWORK = M
+         IF( LWORK.GE.MAX( 4*M+M*LDA+MAX( M, 2*M-4, NRHS, N-3*M ),
+     $       M*LDA+M+M*NRHS ) )LDWORK = LDA
+         ITAU = 1
+         NWORK = M + 1
+*
+*        Compute A=L*Q.
+*        (CWorkspace: need 2*M, prefer M+M*NB)
+*
+         CALL ZGELQF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ),
+     $                LWORK-NWORK+1, INFO )
+         IL = NWORK
+*
+*        Copy L to WORK(IL), zeroing out above its diagonal.
+*
+         CALL ZLACPY( 'L', M, M, A, LDA, WORK( IL ), LDWORK )
+         CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO, WORK( IL+LDWORK ),
+     $                LDWORK )
+         ITAUQ = IL + LDWORK*M
+         ITAUP = ITAUQ + M
+         NWORK = ITAUP + M
+         IE = 1
+         NRWORK = IE + M
+*
+*        Bidiagonalize L in WORK(IL).
+*        (RWorkspace: need M)
+*        (CWorkspace: need M*M+4*M, prefer M*M+4*M+2*M*NB)
+*
+         CALL ZGEBRD( M, M, WORK( IL ), LDWORK, S, RWORK( IE ),
+     $                WORK( ITAUQ ), WORK( ITAUP ), WORK( NWORK ),
+     $                LWORK-NWORK+1, INFO )
+*
+*        Multiply B by transpose of left bidiagonalizing vectors of L.
+*        (CWorkspace: need M*M+4*M+NRHS, prefer M*M+4*M+NRHS*NB)
+*
+         CALL ZUNMBR( 'Q', 'L', 'C', M, NRHS, M, WORK( IL ), LDWORK,
+     $                WORK( ITAUQ ), B, LDB, WORK( NWORK ),
+     $                LWORK-NWORK+1, INFO )
+*
+*        Solve the bidiagonal least squares problem.
+*
+         CALL ZLALSD( 'U', SMLSIZ, M, NRHS, S, RWORK( IE ), B, LDB,
+     $                RCOND, RANK, WORK( NWORK ), RWORK( NRWORK ),
+     $                IWORK, INFO )
+         IF( INFO.NE.0 ) THEN
+            GO TO 10
+         END IF
+*
+*        Multiply B by right bidiagonalizing vectors of L.
+*
+         CALL ZUNMBR( 'P', 'L', 'N', M, NRHS, M, WORK( IL ), LDWORK,
+     $                WORK( ITAUP ), B, LDB, WORK( NWORK ),
+     $                LWORK-NWORK+1, INFO )
+*
+*        Zero out below first M rows of B.
+*
+         CALL ZLASET( 'F', N-M, NRHS, CZERO, CZERO, B( M+1, 1 ), LDB )
+         NWORK = ITAU + M
+*
+*        Multiply transpose(Q) by B.
+*        (CWorkspace: need NRHS, prefer NRHS*NB)
+*
+         CALL ZUNMLQ( 'L', 'C', N, NRHS, M, A, LDA, WORK( ITAU ), B,
+     $                LDB, WORK( NWORK ), LWORK-NWORK+1, INFO )
+*
+      ELSE
+*
+*        Path 2 - remaining underdetermined cases.
+*
+         ITAUQ = 1
+         ITAUP = ITAUQ + M
+         NWORK = ITAUP + M
+         IE = 1
+         NRWORK = IE + M
+*
+*        Bidiagonalize A.
+*        (RWorkspace: need M)
+*        (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB)
+*
+         CALL ZGEBRD( M, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ),
+     $                WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1,
+     $                INFO )
+*
+*        Multiply B by transpose of left bidiagonalizing vectors.
+*        (CWorkspace: need 2*M+NRHS, prefer 2*M+NRHS*NB)
+*
+         CALL ZUNMBR( 'Q', 'L', 'C', M, NRHS, N, A, LDA, WORK( ITAUQ ),
+     $                B, LDB, WORK( NWORK ), LWORK-NWORK+1, INFO )
+*
+*        Solve the bidiagonal least squares problem.
+*
+         CALL ZLALSD( 'L', SMLSIZ, M, NRHS, S, RWORK( IE ), B, LDB,
+     $                RCOND, RANK, WORK( NWORK ), RWORK( NRWORK ),
+     $                IWORK, INFO )
+         IF( INFO.NE.0 ) THEN
+            GO TO 10
+         END IF
+*
+*        Multiply B by right bidiagonalizing vectors of A.
+*
+         CALL ZUNMBR( 'P', 'L', 'N', N, NRHS, M, A, LDA, WORK( ITAUP ),
+     $                B, LDB, WORK( NWORK ), LWORK-NWORK+1, INFO )
+*
+      END IF
+*
+*     Undo scaling.
+*
+      IF( IASCL.EQ.1 ) THEN
+         CALL ZLASCL( 'G', 0, 0, ANRM, SMLNUM, N, NRHS, B, LDB, INFO )
+         CALL DLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN,
+     $                INFO )
+      ELSE IF( IASCL.EQ.2 ) THEN
+         CALL ZLASCL( 'G', 0, 0, ANRM, BIGNUM, N, NRHS, B, LDB, INFO )
+         CALL DLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN,
+     $                INFO )
+      END IF
+      IF( IBSCL.EQ.1 ) THEN
+         CALL ZLASCL( 'G', 0, 0, SMLNUM, BNRM, N, NRHS, B, LDB, INFO )
+      ELSE IF( IBSCL.EQ.2 ) THEN
+         CALL ZLASCL( 'G', 0, 0, BIGNUM, BNRM, N, NRHS, B, LDB, INFO )
+      END IF
+*
+   10 CONTINUE
+      WORK( 1 ) = DCMPLX( MAXWRK, 0 )
+      RETURN
+*
+*     End of ZGELSD
+*
+      END
diff --git a/costa/native/external/lapack/zgelss.f b/costa/native/external/lapack/zgelss.f
new file mode 100644
index 000000000..aa75b5081
--- /dev/null
+++ b/costa/native/external/lapack/zgelss.f
@@ -0,0 +1,639 @@
+      SUBROUTINE ZGELSS( M, N, NRHS, A, LDA, B, LDB, S, RCOND, RANK,
+     $                   WORK, LWORK, RWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, LDB, LWORK, M, N, NRHS, RANK
+      DOUBLE PRECISION   RCOND
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   RWORK( * ), S( * )
+      COMPLEX*16         A( LDA, * ), B( LDB, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZGELSS computes the minimum norm solution to a complex linear
+*  least squares problem:
+*
+*  Minimize 2-norm(| b - A*x |).
+*
+*  using the singular value decomposition (SVD) of A. A is an M-by-N
+*  matrix which may be rank-deficient.
+*
+*  Several right hand side vectors b and solution vectors x can be
+*  handled in a single call; they are stored as the columns of the
+*  M-by-NRHS right hand side matrix B and the N-by-NRHS solution matrix
+*  X.
+*
+*  The effective rank of A is determined by treating as zero those
+*  singular values which are less than RCOND times the largest singular
+*  value.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A. N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrices B and X. NRHS >= 0.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+*          On entry, the M-by-N matrix A.
+*          On exit, the first min(m,n) rows of A are overwritten with
+*          its right singular vectors, stored rowwise.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,M).
+*
+*  B       (input/output) COMPLEX*16 array, dimension (LDB,NRHS)
+*          On entry, the M-by-NRHS right hand side matrix B.
+*          On exit, B is overwritten by the N-by-NRHS solution matrix X.
+*          If m >= n and RANK = n, the residual sum-of-squares for
+*          the solution in the i-th column is given by the sum of
+*          squares of elements n+1:m in that column.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,M,N).
+*
+*  S       (output) DOUBLE PRECISION array, dimension (min(M,N))
+*          The singular values of A in decreasing order.
+*          The condition number of A in the 2-norm = S(1)/S(min(m,n)).
+*
+*  RCOND   (input) DOUBLE PRECISION
+*          RCOND is used to determine the effective rank of A.
+*          Singular values S(i) <= RCOND*S(1) are treated as zero.
+*          If RCOND < 0, machine precision is used instead.
+*
+*  RANK    (output) INTEGER
+*          The effective rank of A, i.e., the number of singular values
+*          which are greater than RCOND*S(1).
+*
+*  WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK. LWORK >= 1, and also:
+*          LWORK >=  2*min(M,N) + max(M,N,NRHS)
+*          For good performance, LWORK should generally be larger.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  RWORK   (workspace) DOUBLE PRECISION array, dimension (5*min(M,N))
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          > 0:  the algorithm for computing the SVD failed to converge;
+*                if INFO = i, i off-diagonal elements of an intermediate
+*                bidiagonal form did not converge to zero.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+      COMPLEX*16         CZERO, CONE
+      PARAMETER          ( CZERO = ( 0.0D+0, 0.0D+0 ),
+     $                   CONE = ( 1.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            BL, CHUNK, I, IASCL, IBSCL, IE, IL, IRWORK,
+     $                   ITAU, ITAUP, ITAUQ, IWORK, LDWORK, MAXMN,
+     $                   MAXWRK, MINMN, MINWRK, MM, MNTHR
+      DOUBLE PRECISION   ANRM, BIGNUM, BNRM, EPS, SFMIN, SMLNUM, THR
+*     ..
+*     .. Local Arrays ..
+      COMPLEX*16         VDUM( 1 )
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLABAD, DLASCL, DLASET, XERBLA, ZBDSQR, ZCOPY,
+     $                   ZDRSCL, ZGEBRD, ZGELQF, ZGEMM, ZGEMV, ZGEQRF,
+     $                   ZLACPY, ZLASCL, ZLASET, ZUNGBR, ZUNMBR, ZUNMLQ,
+     $                   ZUNMQR
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      DOUBLE PRECISION   DLAMCH, ZLANGE
+      EXTERNAL           ILAENV, DLAMCH, ZLANGE
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      MINMN = MIN( M, N )
+      MAXMN = MAX( M, N )
+      MNTHR = ILAENV( 6, 'ZGELSS', ' ', M, N, NRHS, -1 )
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, MAXMN ) ) THEN
+         INFO = -7
+      END IF
+*
+*     Compute workspace
+*      (Note: Comments in the code beginning "Workspace:" describe the
+*       minimal amount of workspace needed at that point in the code,
+*       as well as the preferred amount for good performance.
+*       CWorkspace refers to complex workspace, and RWorkspace refers
+*       to real workspace. NB refers to the optimal block size for the
+*       immediately following subroutine, as returned by ILAENV.)
+*
+      MINWRK = 1
+      IF( INFO.EQ.0 .AND. ( LWORK.GE.1 .OR. LQUERY ) ) THEN
+         MAXWRK = 0
+         MM = M
+         IF( M.GE.N .AND. M.GE.MNTHR ) THEN
+*
+*           Path 1a - overdetermined, with many more rows than columns
+*
+*           Space needed for ZBDSQR is BDSPAC = 5*N
+*
+            MM = N
+            MAXWRK = MAX( MAXWRK, N+N*ILAENV( 1, 'ZGEQRF', ' ', M, N,
+     $               -1, -1 ) )
+            MAXWRK = MAX( MAXWRK, N+NRHS*
+     $               ILAENV( 1, 'ZUNMQR', 'LC', M, NRHS, N, -1 ) )
+         END IF
+         IF( M.GE.N ) THEN
+*
+*           Path 1 - overdetermined or exactly determined
+*
+*           Space needed for ZBDSQR is BDSPC = 7*N+12
+*
+            MAXWRK = MAX( MAXWRK, 2*N+( MM+N )*
+     $               ILAENV( 1, 'ZGEBRD', ' ', MM, N, -1, -1 ) )
+            MAXWRK = MAX( MAXWRK, 2*N+NRHS*
+     $               ILAENV( 1, 'ZUNMBR', 'QLC', MM, NRHS, N, -1 ) )
+            MAXWRK = MAX( MAXWRK, 2*N+( N-1 )*
+     $               ILAENV( 1, 'ZUNGBR', 'P', N, N, N, -1 ) )
+            MAXWRK = MAX( MAXWRK, N*NRHS )
+            MINWRK = 2*N + MAX( NRHS, M )
+         END IF
+         IF( N.GT.M ) THEN
+            MINWRK = 2*M + MAX( NRHS, N )
+            IF( N.GE.MNTHR ) THEN
+*
+*              Path 2a - underdetermined, with many more columns
+*              than rows
+*
+*              Space needed for ZBDSQR is BDSPAC = 5*M
+*
+               MAXWRK = M + M*ILAENV( 1, 'ZGELQF', ' ', M, N, -1, -1 )
+               MAXWRK = MAX( MAXWRK, 3*M+M*M+2*M*
+     $                  ILAENV( 1, 'ZGEBRD', ' ', M, M, -1, -1 ) )
+               MAXWRK = MAX( MAXWRK, 3*M+M*M+NRHS*
+     $                  ILAENV( 1, 'ZUNMBR', 'QLC', M, NRHS, M, -1 ) )
+               MAXWRK = MAX( MAXWRK, 3*M+M*M+( M-1 )*
+     $                  ILAENV( 1, 'ZUNGBR', 'P', M, M, M, -1 ) )
+               IF( NRHS.GT.1 ) THEN
+                  MAXWRK = MAX( MAXWRK, M*M+M+M*NRHS )
+               ELSE
+                  MAXWRK = MAX( MAXWRK, M*M+2*M )
+               END IF
+               MAXWRK = MAX( MAXWRK, M+NRHS*
+     $                  ILAENV( 1, 'ZUNMLQ', 'LC', N, NRHS, M, -1 ) )
+            ELSE
+*
+*              Path 2 - underdetermined
+*
+*              Space needed for ZBDSQR is BDSPAC = 5*M
+*
+               MAXWRK = 2*M + ( N+M )*ILAENV( 1, 'ZGEBRD', ' ', M, N,
+     $                  -1, -1 )
+               MAXWRK = MAX( MAXWRK, 2*M+NRHS*
+     $                  ILAENV( 1, 'ZUNMBR', 'QLC', M, NRHS, M, -1 ) )
+               MAXWRK = MAX( MAXWRK, 2*M+M*
+     $                  ILAENV( 1, 'ZUNGBR', 'P', M, N, M, -1 ) )
+               MAXWRK = MAX( MAXWRK, N*NRHS )
+            END IF
+         END IF
+         MINWRK = MAX( MINWRK, 1 )
+         MAXWRK = MAX( MINWRK, MAXWRK )
+         WORK( 1 ) = MAXWRK
+      END IF
+*
+      IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY )
+     $   INFO = -12
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZGELSS', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 ) THEN
+         RANK = 0
+         RETURN
+      END IF
+*
+*     Get machine parameters
+*
+      EPS = DLAMCH( 'P' )
+      SFMIN = DLAMCH( 'S' )
+      SMLNUM = SFMIN / EPS
+      BIGNUM = ONE / SMLNUM
+      CALL DLABAD( SMLNUM, BIGNUM )
+*
+*     Scale A if max element outside range [SMLNUM,BIGNUM]
+*
+      ANRM = ZLANGE( 'M', M, N, A, LDA, RWORK )
+      IASCL = 0
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
+*
+*        Scale matrix norm up to SMLNUM
+*
+         CALL ZLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO )
+         IASCL = 1
+      ELSE IF( ANRM.GT.BIGNUM ) THEN
+*
+*        Scale matrix norm down to BIGNUM
+*
+         CALL ZLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO )
+         IASCL = 2
+      ELSE IF( ANRM.EQ.ZERO ) THEN
+*
+*        Matrix all zero. Return zero solution.
+*
+         CALL ZLASET( 'F', MAX( M, N ), NRHS, CZERO, CZERO, B, LDB )
+         CALL DLASET( 'F', MINMN, 1, ZERO, ZERO, S, MINMN )
+         RANK = 0
+         GO TO 70
+      END IF
+*
+*     Scale B if max element outside range [SMLNUM,BIGNUM]
+*
+      BNRM = ZLANGE( 'M', M, NRHS, B, LDB, RWORK )
+      IBSCL = 0
+      IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN
+*
+*        Scale matrix norm up to SMLNUM
+*
+         CALL ZLASCL( 'G', 0, 0, BNRM, SMLNUM, M, NRHS, B, LDB, INFO )
+         IBSCL = 1
+      ELSE IF( BNRM.GT.BIGNUM ) THEN
+*
+*        Scale matrix norm down to BIGNUM
+*
+         CALL ZLASCL( 'G', 0, 0, BNRM, BIGNUM, M, NRHS, B, LDB, INFO )
+         IBSCL = 2
+      END IF
+*
+*     Overdetermined case
+*
+      IF( M.GE.N ) THEN
+*
+*        Path 1 - overdetermined or exactly determined
+*
+         MM = M
+         IF( M.GE.MNTHR ) THEN
+*
+*           Path 1a - overdetermined, with many more rows than columns
+*
+            MM = N
+            ITAU = 1
+            IWORK = ITAU + N
+*
+*           Compute A=Q*R
+*           (CWorkspace: need 2*N, prefer N+N*NB)
+*           (RWorkspace: none)
+*
+            CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ),
+     $                   LWORK-IWORK+1, INFO )
+*
+*           Multiply B by transpose(Q)
+*           (CWorkspace: need N+NRHS, prefer N+NRHS*NB)
+*           (RWorkspace: none)
+*
+            CALL ZUNMQR( 'L', 'C', M, NRHS, N, A, LDA, WORK( ITAU ), B,
+     $                   LDB, WORK( IWORK ), LWORK-IWORK+1, INFO )
+*
+*           Zero out below R
+*
+            IF( N.GT.1 )
+     $         CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO, A( 2, 1 ),
+     $                      LDA )
+         END IF
+*
+         IE = 1
+         ITAUQ = 1
+         ITAUP = ITAUQ + N
+         IWORK = ITAUP + N
+*
+*        Bidiagonalize R in A
+*        (CWorkspace: need 2*N+MM, prefer 2*N+(MM+N)*NB)
+*        (RWorkspace: need N)
+*
+         CALL ZGEBRD( MM, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ),
+     $                WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
+     $                INFO )
+*
+*        Multiply B by transpose of left bidiagonalizing vectors of R
+*        (CWorkspace: need 2*N+NRHS, prefer 2*N+NRHS*NB)
+*        (RWorkspace: none)
+*
+         CALL ZUNMBR( 'Q', 'L', 'C', MM, NRHS, N, A, LDA, WORK( ITAUQ ),
+     $                B, LDB, WORK( IWORK ), LWORK-IWORK+1, INFO )
+*
+*        Generate right bidiagonalizing vectors of R in A
+*        (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
+*        (RWorkspace: none)
+*
+         CALL ZUNGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
+     $                WORK( IWORK ), LWORK-IWORK+1, INFO )
+         IRWORK = IE + N
+*
+*        Perform bidiagonal QR iteration
+*          multiply B by transpose of left singular vectors
+*          compute right singular vectors in A
+*        (CWorkspace: none)
+*        (RWorkspace: need BDSPAC)
+*
+         CALL ZBDSQR( 'U', N, N, 0, NRHS, S, RWORK( IE ), A, LDA, VDUM,
+     $                1, B, LDB, RWORK( IRWORK ), INFO )
+         IF( INFO.NE.0 )
+     $      GO TO 70
+*
+*        Multiply B by reciprocals of singular values
+*
+         THR = MAX( RCOND*S( 1 ), SFMIN )
+         IF( RCOND.LT.ZERO )
+     $      THR = MAX( EPS*S( 1 ), SFMIN )
+         RANK = 0
+         DO 10 I = 1, N
+            IF( S( I ).GT.THR ) THEN
+               CALL ZDRSCL( NRHS, S( I ), B( I, 1 ), LDB )
+               RANK = RANK + 1
+            ELSE
+               CALL ZLASET( 'F', 1, NRHS, CZERO, CZERO, B( I, 1 ), LDB )
+            END IF
+   10    CONTINUE
+*
+*        Multiply B by right singular vectors
+*        (CWorkspace: need N, prefer N*NRHS)
+*        (RWorkspace: none)
+*
+         IF( LWORK.GE.LDB*NRHS .AND. NRHS.GT.1 ) THEN
+            CALL ZGEMM( 'C', 'N', N, NRHS, N, CONE, A, LDA, B, LDB,
+     $                  CZERO, WORK, LDB )
+            CALL ZLACPY( 'G', N, NRHS, WORK, LDB, B, LDB )
+         ELSE IF( NRHS.GT.1 ) THEN
+            CHUNK = LWORK / N
+            DO 20 I = 1, NRHS, CHUNK
+               BL = MIN( NRHS-I+1, CHUNK )
+               CALL ZGEMM( 'C', 'N', N, BL, N, CONE, A, LDA, B( 1, I ),
+     $                     LDB, CZERO, WORK, N )
+               CALL ZLACPY( 'G', N, BL, WORK, N, B( 1, I ), LDB )
+   20       CONTINUE
+         ELSE
+            CALL ZGEMV( 'C', N, N, CONE, A, LDA, B, 1, CZERO, WORK, 1 )
+            CALL ZCOPY( N, WORK, 1, B, 1 )
+         END IF
+*
+      ELSE IF( N.GE.MNTHR .AND. LWORK.GE.3*M+M*M+MAX( M, NRHS, N-2*M ) )
+     $          THEN
+*
+*        Underdetermined case, M much less than N
+*
+*        Path 2a - underdetermined, with many more columns than rows
+*        and sufficient workspace for an efficient algorithm
+*
+         LDWORK = M
+         IF( LWORK.GE.3*M+M*LDA+MAX( M, NRHS, N-2*M ) )
+     $      LDWORK = LDA
+         ITAU = 1
+         IWORK = M + 1
+*
+*        Compute A=L*Q
+*        (CWorkspace: need 2*M, prefer M+M*NB)
+*        (RWorkspace: none)
+*
+         CALL ZGELQF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ),
+     $                LWORK-IWORK+1, INFO )
+         IL = IWORK
+*
+*        Copy L to WORK(IL), zeroing out above it
+*
+         CALL ZLACPY( 'L', M, M, A, LDA, WORK( IL ), LDWORK )
+         CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO, WORK( IL+LDWORK ),
+     $                LDWORK )
+         IE = 1
+         ITAUQ = IL + LDWORK*M
+         ITAUP = ITAUQ + M
+         IWORK = ITAUP + M
+*
+*        Bidiagonalize L in WORK(IL)
+*        (CWorkspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
+*        (RWorkspace: need M)
+*
+         CALL ZGEBRD( M, M, WORK( IL ), LDWORK, S, RWORK( IE ),
+     $                WORK( ITAUQ ), WORK( ITAUP ), WORK( IWORK ),
+     $                LWORK-IWORK+1, INFO )
+*
+*        Multiply B by transpose of left bidiagonalizing vectors of L
+*        (CWorkspace: need M*M+3*M+NRHS, prefer M*M+3*M+NRHS*NB)
+*        (RWorkspace: none)
+*
+         CALL ZUNMBR( 'Q', 'L', 'C', M, NRHS, M, WORK( IL ), LDWORK,
+     $                WORK( ITAUQ ), B, LDB, WORK( IWORK ),
+     $                LWORK-IWORK+1, INFO )
+*
+*        Generate right bidiagonalizing vectors of R in WORK(IL)
+*        (CWorkspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB)
+*        (RWorkspace: none)
+*
+         CALL ZUNGBR( 'P', M, M, M, WORK( IL ), LDWORK, WORK( ITAUP ),
+     $                WORK( IWORK ), LWORK-IWORK+1, INFO )
+         IRWORK = IE + M
+*
+*        Perform bidiagonal QR iteration, computing right singular
+*        vectors of L in WORK(IL) and multiplying B by transpose of
+*        left singular vectors
+*        (CWorkspace: need M*M)
+*        (RWorkspace: need BDSPAC)
+*
+         CALL ZBDSQR( 'U', M, M, 0, NRHS, S, RWORK( IE ), WORK( IL ),
+     $                LDWORK, A, LDA, B, LDB, RWORK( IRWORK ), INFO )
+         IF( INFO.NE.0 )
+     $      GO TO 70
+*
+*        Multiply B by reciprocals of singular values
+*
+         THR = MAX( RCOND*S( 1 ), SFMIN )
+         IF( RCOND.LT.ZERO )
+     $      THR = MAX( EPS*S( 1 ), SFMIN )
+         RANK = 0
+         DO 30 I = 1, M
+            IF( S( I ).GT.THR ) THEN
+               CALL ZDRSCL( NRHS, S( I ), B( I, 1 ), LDB )
+               RANK = RANK + 1
+            ELSE
+               CALL ZLASET( 'F', 1, NRHS, CZERO, CZERO, B( I, 1 ), LDB )
+            END IF
+   30    CONTINUE
+         IWORK = IL + M*LDWORK
+*
+*        Multiply B by right singular vectors of L in WORK(IL)
+*        (CWorkspace: need M*M+2*M, prefer M*M+M+M*NRHS)
+*        (RWorkspace: none)
+*
+         IF( LWORK.GE.LDB*NRHS+IWORK-1 .AND. NRHS.GT.1 ) THEN
+            CALL ZGEMM( 'C', 'N', M, NRHS, M, CONE, WORK( IL ), LDWORK,
+     $                  B, LDB, CZERO, WORK( IWORK ), LDB )
+            CALL ZLACPY( 'G', M, NRHS, WORK( IWORK ), LDB, B, LDB )
+         ELSE IF( NRHS.GT.1 ) THEN
+            CHUNK = ( LWORK-IWORK+1 ) / M
+            DO 40 I = 1, NRHS, CHUNK
+               BL = MIN( NRHS-I+1, CHUNK )
+               CALL ZGEMM( 'C', 'N', M, BL, M, CONE, WORK( IL ), LDWORK,
+     $                     B( 1, I ), LDB, CZERO, WORK( IWORK ), N )
+               CALL ZLACPY( 'G', M, BL, WORK( IWORK ), N, B( 1, I ),
+     $                      LDB )
+   40       CONTINUE
+         ELSE
+            CALL ZGEMV( 'C', M, M, CONE, WORK( IL ), LDWORK, B( 1, 1 ),
+     $                  1, CZERO, WORK( IWORK ), 1 )
+            CALL ZCOPY( M, WORK( IWORK ), 1, B( 1, 1 ), 1 )
+         END IF
+*
+*        Zero out below first M rows of B
+*
+         CALL ZLASET( 'F', N-M, NRHS, CZERO, CZERO, B( M+1, 1 ), LDB )
+         IWORK = ITAU + M
+*
+*        Multiply transpose(Q) by B
+*        (CWorkspace: need M+NRHS, prefer M+NHRS*NB)
+*        (RWorkspace: none)
+*
+         CALL ZUNMLQ( 'L', 'C', N, NRHS, M, A, LDA, WORK( ITAU ), B,
+     $                LDB, WORK( IWORK ), LWORK-IWORK+1, INFO )
+*
+      ELSE
+*
+*        Path 2 - remaining underdetermined cases
+*
+         IE = 1
+         ITAUQ = 1
+         ITAUP = ITAUQ + M
+         IWORK = ITAUP + M
+*
+*        Bidiagonalize A
+*        (CWorkspace: need 3*M, prefer 2*M+(M+N)*NB)
+*        (RWorkspace: need N)
+*
+         CALL ZGEBRD( M, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ),
+     $                WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
+     $                INFO )
+*
+*        Multiply B by transpose of left bidiagonalizing vectors
+*        (CWorkspace: need 2*M+NRHS, prefer 2*M+NRHS*NB)
+*        (RWorkspace: none)
+*
+         CALL ZUNMBR( 'Q', 'L', 'C', M, NRHS, N, A, LDA, WORK( ITAUQ ),
+     $                B, LDB, WORK( IWORK ), LWORK-IWORK+1, INFO )
+*
+*        Generate right bidiagonalizing vectors in A
+*        (CWorkspace: need 3*M, prefer 2*M+M*NB)
+*        (RWorkspace: none)
+*
+         CALL ZUNGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ),
+     $                WORK( IWORK ), LWORK-IWORK+1, INFO )
+         IRWORK = IE + M
+*
+*        Perform bidiagonal QR iteration,
+*           computing right singular vectors of A in A and
+*           multiplying B by transpose of left singular vectors
+*        (CWorkspace: none)
+*        (RWorkspace: need BDSPAC)
+*
+         CALL ZBDSQR( 'L', M, N, 0, NRHS, S, RWORK( IE ), A, LDA, VDUM,
+     $                1, B, LDB, RWORK( IRWORK ), INFO )
+         IF( INFO.NE.0 )
+     $      GO TO 70
+*
+*        Multiply B by reciprocals of singular values
+*
+         THR = MAX( RCOND*S( 1 ), SFMIN )
+         IF( RCOND.LT.ZERO )
+     $      THR = MAX( EPS*S( 1 ), SFMIN )
+         RANK = 0
+         DO 50 I = 1, M
+            IF( S( I ).GT.THR ) THEN
+               CALL ZDRSCL( NRHS, S( I ), B( I, 1 ), LDB )
+               RANK = RANK + 1
+            ELSE
+               CALL ZLASET( 'F', 1, NRHS, CZERO, CZERO, B( I, 1 ), LDB )
+            END IF
+   50    CONTINUE
+*
+*        Multiply B by right singular vectors of A
+*        (CWorkspace: need N, prefer N*NRHS)
+*        (RWorkspace: none)
+*
+         IF( LWORK.GE.LDB*NRHS .AND. NRHS.GT.1 ) THEN
+            CALL ZGEMM( 'C', 'N', N, NRHS, M, CONE, A, LDA, B, LDB,
+     $                  CZERO, WORK, LDB )
+            CALL ZLACPY( 'G', N, NRHS, WORK, LDB, B, LDB )
+         ELSE IF( NRHS.GT.1 ) THEN
+            CHUNK = LWORK / N
+            DO 60 I = 1, NRHS, CHUNK
+               BL = MIN( NRHS-I+1, CHUNK )
+               CALL ZGEMM( 'C', 'N', N, BL, M, CONE, A, LDA, B( 1, I ),
+     $                     LDB, CZERO, WORK, N )
+               CALL ZLACPY( 'F', N, BL, WORK, N, B( 1, I ), LDB )
+   60       CONTINUE
+         ELSE
+            CALL ZGEMV( 'C', M, N, CONE, A, LDA, B, 1, CZERO, WORK, 1 )
+            CALL ZCOPY( N, WORK, 1, B, 1 )
+         END IF
+      END IF
+*
+*     Undo scaling
+*
+      IF( IASCL.EQ.1 ) THEN
+         CALL ZLASCL( 'G', 0, 0, ANRM, SMLNUM, N, NRHS, B, LDB, INFO )
+         CALL DLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN,
+     $                INFO )
+      ELSE IF( IASCL.EQ.2 ) THEN
+         CALL ZLASCL( 'G', 0, 0, ANRM, BIGNUM, N, NRHS, B, LDB, INFO )
+         CALL DLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN,
+     $                INFO )
+      END IF
+      IF( IBSCL.EQ.1 ) THEN
+         CALL ZLASCL( 'G', 0, 0, SMLNUM, BNRM, N, NRHS, B, LDB, INFO )
+      ELSE IF( IBSCL.EQ.2 ) THEN
+         CALL ZLASCL( 'G', 0, 0, BIGNUM, BNRM, N, NRHS, B, LDB, INFO )
+      END IF
+   70 CONTINUE
+      WORK( 1 ) = MAXWRK
+      RETURN
+*
+*     End of ZGELSS
+*
+      END
diff --git a/costa/native/external/lapack/zgelsx.f b/costa/native/external/lapack/zgelsx.f
new file mode 100644
index 000000000..dd324d652
--- /dev/null
+++ b/costa/native/external/lapack/zgelsx.f
@@ -0,0 +1,358 @@
+      SUBROUTINE ZGELSX( M, N, NRHS, A, LDA, B, LDB, JPVT, RCOND, RANK,
+     $                   WORK, RWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, LDB, M, N, NRHS, RANK
+      DOUBLE PRECISION   RCOND
+*     ..
+*     .. Array Arguments ..
+      INTEGER            JPVT( * )
+      DOUBLE PRECISION   RWORK( * )
+      COMPLEX*16         A( LDA, * ), B( LDB, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  This routine is deprecated and has been replaced by routine ZGELSY.
+*
+*  ZGELSX computes the minimum-norm solution to a complex linear least
+*  squares problem:
+*      minimize || A * X - B ||
+*  using a complete orthogonal factorization of A.  A is an M-by-N
+*  matrix which may be rank-deficient.
+*
+*  Several right hand side vectors b and solution vectors x can be
+*  handled in a single call; they are stored as the columns of the
+*  M-by-NRHS right hand side matrix B and the N-by-NRHS solution
+*  matrix X.
+*
+*  The routine first computes a QR factorization with column pivoting:
+*      A * P = Q * [ R11 R12 ]
+*                  [  0  R22 ]
+*  with R11 defined as the largest leading submatrix whose estimated
+*  condition number is less than 1/RCOND.  The order of R11, RANK,
+*  is the effective rank of A.
+*
+*  Then, R22 is considered to be negligible, and R12 is annihilated
+*  by unitary transformations from the right, arriving at the
+*  complete orthogonal factorization:
+*     A * P = Q * [ T11 0 ] * Z
+*                 [  0  0 ]
+*  The minimum-norm solution is then
+*     X = P * Z' [ inv(T11)*Q1'*B ]
+*                [        0       ]
+*  where Q1 consists of the first RANK columns of Q.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of
+*          columns of matrices B and X. NRHS >= 0.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+*          On entry, the M-by-N matrix A.
+*          On exit, A has been overwritten by details of its
+*          complete orthogonal factorization.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  B       (input/output) COMPLEX*16 array, dimension (LDB,NRHS)
+*          On entry, the M-by-NRHS right hand side matrix B.
+*          On exit, the N-by-NRHS solution matrix X.
+*          If m >= n and RANK = n, the residual sum-of-squares for
+*          the solution in the i-th column is given by the sum of
+*          squares of elements N+1:M in that column.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B. LDB >= max(1,M,N).
+*
+*  JPVT    (input/output) INTEGER array, dimension (N)
+*          On entry, if JPVT(i) .ne. 0, the i-th column of A is an
+*          initial column, otherwise it is a free column.  Before
+*          the QR factorization of A, all initial columns are
+*          permuted to the leading positions; only the remaining
+*          free columns are moved as a result of column pivoting
+*          during the factorization.
+*          On exit, if JPVT(i) = k, then the i-th column of A*P
+*          was the k-th column of A.
+*
+*  RCOND   (input) DOUBLE PRECISION
+*          RCOND is used to determine the effective rank of A, which
+*          is defined as the order of the largest leading triangular
+*          submatrix R11 in the QR factorization with pivoting of A,
+*          whose estimated condition number < 1/RCOND.
+*
+*  RANK    (output) INTEGER
+*          The effective rank of A, i.e., the order of the submatrix
+*          R11.  This is the same as the order of the submatrix T11
+*          in the complete orthogonal factorization of A.
+*
+*  WORK    (workspace) COMPLEX*16 array, dimension
+*                      (min(M,N) + max( N, 2*min(M,N)+NRHS )),
+*
+*  RWORK   (workspace) DOUBLE PRECISION array, dimension (2*N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            IMAX, IMIN
+      PARAMETER          ( IMAX = 1, IMIN = 2 )
+      DOUBLE PRECISION   ZERO, ONE, DONE, NTDONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0, DONE = ZERO,
+     $                   NTDONE = ONE )
+      COMPLEX*16         CZERO, CONE
+      PARAMETER          ( CZERO = ( 0.0D+0, 0.0D+0 ),
+     $                   CONE = ( 1.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, IASCL, IBSCL, ISMAX, ISMIN, J, K, MN
+      DOUBLE PRECISION   ANRM, BIGNUM, BNRM, SMAX, SMAXPR, SMIN, SMINPR,
+     $                   SMLNUM
+      COMPLEX*16         C1, C2, S1, S2, T1, T2
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZGEQPF, ZLAIC1, ZLASCL, ZLASET, ZLATZM,
+     $                   ZTRSM, ZTZRQF, ZUNM2R
+*     ..
+*     .. External Functions ..
+      DOUBLE PRECISION   DLAMCH, ZLANGE
+      EXTERNAL           DLAMCH, ZLANGE
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DCONJG, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+      MN = MIN( M, N )
+      ISMIN = MN + 1
+      ISMAX = 2*MN + 1
+*
+*     Test the input arguments.
+*
+      INFO = 0
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, M, N ) ) THEN
+         INFO = -7
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZGELSX', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( MIN( M, N, NRHS ).EQ.0 ) THEN
+         RANK = 0
+         RETURN
+      END IF
+*
+*     Get machine parameters
+*
+      SMLNUM = DLAMCH( 'S' ) / DLAMCH( 'P' )
+      BIGNUM = ONE / SMLNUM
+      CALL DLABAD( SMLNUM, BIGNUM )
+*
+*     Scale A, B if max elements outside range [SMLNUM,BIGNUM]
+*
+      ANRM = ZLANGE( 'M', M, N, A, LDA, RWORK )
+      IASCL = 0
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
+*
+*        Scale matrix norm up to SMLNUM
+*
+         CALL ZLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO )
+         IASCL = 1
+      ELSE IF( ANRM.GT.BIGNUM ) THEN
+*
+*        Scale matrix norm down to BIGNUM
+*
+         CALL ZLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO )
+         IASCL = 2
+      ELSE IF( ANRM.EQ.ZERO ) THEN
+*
+*        Matrix all zero. Return zero solution.
+*
+         CALL ZLASET( 'F', MAX( M, N ), NRHS, CZERO, CZERO, B, LDB )
+         RANK = 0
+         GO TO 100
+      END IF
+*
+      BNRM = ZLANGE( 'M', M, NRHS, B, LDB, RWORK )
+      IBSCL = 0
+      IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN
+*
+*        Scale matrix norm up to SMLNUM
+*
+         CALL ZLASCL( 'G', 0, 0, BNRM, SMLNUM, M, NRHS, B, LDB, INFO )
+         IBSCL = 1
+      ELSE IF( BNRM.GT.BIGNUM ) THEN
+*
+*        Scale matrix norm down to BIGNUM
+*
+         CALL ZLASCL( 'G', 0, 0, BNRM, BIGNUM, M, NRHS, B, LDB, INFO )
+         IBSCL = 2
+      END IF
+*
+*     Compute QR factorization with column pivoting of A:
+*        A * P = Q * R
+*
+      CALL ZGEQPF( M, N, A, LDA, JPVT, WORK( 1 ), WORK( MN+1 ), RWORK,
+     $             INFO )
+*
+*     complex workspace MN+N. Real workspace 2*N. Details of Householder
+*     rotations stored in WORK(1:MN).
+*
+*     Determine RANK using incremental condition estimation
+*
+      WORK( ISMIN ) = CONE
+      WORK( ISMAX ) = CONE
+      SMAX = ABS( A( 1, 1 ) )
+      SMIN = SMAX
+      IF( ABS( A( 1, 1 ) ).EQ.ZERO ) THEN
+         RANK = 0
+         CALL ZLASET( 'F', MAX( M, N ), NRHS, CZERO, CZERO, B, LDB )
+         GO TO 100
+      ELSE
+         RANK = 1
+      END IF
+*
+   10 CONTINUE
+      IF( RANK.LT.MN ) THEN
+         I = RANK + 1
+         CALL ZLAIC1( IMIN, RANK, WORK( ISMIN ), SMIN, A( 1, I ),
+     $                A( I, I ), SMINPR, S1, C1 )
+         CALL ZLAIC1( IMAX, RANK, WORK( ISMAX ), SMAX, A( 1, I ),
+     $                A( I, I ), SMAXPR, S2, C2 )
+*
+         IF( SMAXPR*RCOND.LE.SMINPR ) THEN
+            DO 20 I = 1, RANK
+               WORK( ISMIN+I-1 ) = S1*WORK( ISMIN+I-1 )
+               WORK( ISMAX+I-1 ) = S2*WORK( ISMAX+I-1 )
+   20       CONTINUE
+            WORK( ISMIN+RANK ) = C1
+            WORK( ISMAX+RANK ) = C2
+            SMIN = SMINPR
+            SMAX = SMAXPR
+            RANK = RANK + 1
+            GO TO 10
+         END IF
+      END IF
+*
+*     Logically partition R = [ R11 R12 ]
+*                             [  0  R22 ]
+*     where R11 = R(1:RANK,1:RANK)
+*
+*     [R11,R12] = [ T11, 0 ] * Y
+*
+      IF( RANK.LT.N )
+     $   CALL ZTZRQF( RANK, N, A, LDA, WORK( MN+1 ), INFO )
+*
+*     Details of Householder rotations stored in WORK(MN+1:2*MN)
+*
+*     B(1:M,1:NRHS) := Q' * B(1:M,1:NRHS)
+*
+      CALL ZUNM2R( 'Left', 'Conjugate transpose', M, NRHS, MN, A, LDA,
+     $             WORK( 1 ), B, LDB, WORK( 2*MN+1 ), INFO )
+*
+*     workspace NRHS
+*
+*      B(1:RANK,1:NRHS) := inv(T11) * B(1:RANK,1:NRHS)
+*
+      CALL ZTRSM( 'Left', 'Upper', 'No transpose', 'Non-unit', RANK,
+     $            NRHS, CONE, A, LDA, B, LDB )
+*
+      DO 40 I = RANK + 1, N
+         DO 30 J = 1, NRHS
+            B( I, J ) = CZERO
+   30    CONTINUE
+   40 CONTINUE
+*
+*     B(1:N,1:NRHS) := Y' * B(1:N,1:NRHS)
+*
+      IF( RANK.LT.N ) THEN
+         DO 50 I = 1, RANK
+            CALL ZLATZM( 'Left', N-RANK+1, NRHS, A( I, RANK+1 ), LDA,
+     $                   DCONJG( WORK( MN+I ) ), B( I, 1 ),
+     $                   B( RANK+1, 1 ), LDB, WORK( 2*MN+1 ) )
+   50    CONTINUE
+      END IF
+*
+*     workspace NRHS
+*
+*     B(1:N,1:NRHS) := P * B(1:N,1:NRHS)
+*
+      DO 90 J = 1, NRHS
+         DO 60 I = 1, N
+            WORK( 2*MN+I ) = NTDONE
+   60    CONTINUE
+         DO 80 I = 1, N
+            IF( WORK( 2*MN+I ).EQ.NTDONE ) THEN
+               IF( JPVT( I ).NE.I ) THEN
+                  K = I
+                  T1 = B( K, J )
+                  T2 = B( JPVT( K ), J )
+   70             CONTINUE
+                  B( JPVT( K ), J ) = T1
+                  WORK( 2*MN+K ) = DONE
+                  T1 = T2
+                  K = JPVT( K )
+                  T2 = B( JPVT( K ), J )
+                  IF( JPVT( K ).NE.I )
+     $               GO TO 70
+                  B( I, J ) = T1
+                  WORK( 2*MN+K ) = DONE
+               END IF
+            END IF
+   80    CONTINUE
+   90 CONTINUE
+*
+*     Undo scaling
+*
+      IF( IASCL.EQ.1 ) THEN
+         CALL ZLASCL( 'G', 0, 0, ANRM, SMLNUM, N, NRHS, B, LDB, INFO )
+         CALL ZLASCL( 'U', 0, 0, SMLNUM, ANRM, RANK, RANK, A, LDA,
+     $                INFO )
+      ELSE IF( IASCL.EQ.2 ) THEN
+         CALL ZLASCL( 'G', 0, 0, ANRM, BIGNUM, N, NRHS, B, LDB, INFO )
+         CALL ZLASCL( 'U', 0, 0, BIGNUM, ANRM, RANK, RANK, A, LDA,
+     $                INFO )
+      END IF
+      IF( IBSCL.EQ.1 ) THEN
+         CALL ZLASCL( 'G', 0, 0, SMLNUM, BNRM, N, NRHS, B, LDB, INFO )
+      ELSE IF( IBSCL.EQ.2 ) THEN
+         CALL ZLASCL( 'G', 0, 0, BIGNUM, BNRM, N, NRHS, B, LDB, INFO )
+      END IF
+*
+  100 CONTINUE
+*
+      RETURN
+*
+*     End of ZGELSX
+*
+      END
diff --git a/costa/native/external/lapack/zgelsy.f b/costa/native/external/lapack/zgelsy.f
new file mode 100644
index 000000000..28c84971d
--- /dev/null
+++ b/costa/native/external/lapack/zgelsy.f
@@ -0,0 +1,386 @@
+      SUBROUTINE ZGELSY( M, N, NRHS, A, LDA, B, LDB, JPVT, RCOND, RANK,
+     $                   WORK, LWORK, RWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, LDB, LWORK, M, N, NRHS, RANK
+      DOUBLE PRECISION   RCOND
+*     ..
+*     .. Array Arguments ..
+      INTEGER            JPVT( * )
+      DOUBLE PRECISION   RWORK( * )
+      COMPLEX*16         A( LDA, * ), B( LDB, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZGELSY computes the minimum-norm solution to a complex linear least
+*  squares problem:
+*      minimize || A * X - B ||
+*  using a complete orthogonal factorization of A.  A is an M-by-N
+*  matrix which may be rank-deficient.
+*
+*  Several right hand side vectors b and solution vectors x can be
+*  handled in a single call; they are stored as the columns of the
+*  M-by-NRHS right hand side matrix B and the N-by-NRHS solution
+*  matrix X.
+*
+*  The routine first computes a QR factorization with column pivoting:
+*      A * P = Q * [ R11 R12 ]
+*                  [  0  R22 ]
+*  with R11 defined as the largest leading submatrix whose estimated
+*  condition number is less than 1/RCOND.  The order of R11, RANK,
+*  is the effective rank of A.
+*
+*  Then, R22 is considered to be negligible, and R12 is annihilated
+*  by unitary transformations from the right, arriving at the
+*  complete orthogonal factorization:
+*     A * P = Q * [ T11 0 ] * Z
+*                 [  0  0 ]
+*  The minimum-norm solution is then
+*     X = P * Z' [ inv(T11)*Q1'*B ]
+*                [        0       ]
+*  where Q1 consists of the first RANK columns of Q.
+*
+*  This routine is basically identical to the original xGELSX except
+*  three differences:
+*    o The permutation of matrix B (the right hand side) is faster and
+*      more simple.
+*    o The call to the subroutine xGEQPF has been substituted by the
+*      the call to the subroutine xGEQP3. This subroutine is a Blas-3
+*      version of the QR factorization with column pivoting.
+*    o Matrix B (the right hand side) is updated with Blas-3.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of
+*          columns of matrices B and X. NRHS >= 0.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+*          On entry, the M-by-N matrix A.
+*          On exit, A has been overwritten by details of its
+*          complete orthogonal factorization.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  B       (input/output) COMPLEX*16 array, dimension (LDB,NRHS)
+*          On entry, the M-by-NRHS right hand side matrix B.
+*          On exit, the N-by-NRHS solution matrix X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B. LDB >= max(1,M,N).
+*
+*  JPVT    (input/output) INTEGER array, dimension (N)
+*          On entry, if JPVT(i) .ne. 0, the i-th column of A is permuted
+*          to the front of AP, otherwise column i is a free column.
+*          On exit, if JPVT(i) = k, then the i-th column of A*P
+*          was the k-th column of A.
+*
+*  RCOND   (input) DOUBLE PRECISION
+*          RCOND is used to determine the effective rank of A, which
+*          is defined as the order of the largest leading triangular
+*          submatrix R11 in the QR factorization with pivoting of A,
+*          whose estimated condition number < 1/RCOND.
+*
+*  RANK    (output) INTEGER
+*          The effective rank of A, i.e., the order of the submatrix
+*          R11.  This is the same as the order of the submatrix T11
+*          in the complete orthogonal factorization of A.
+*
+*  WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.
+*          The unblocked strategy requires that:
+*            LWORK >= MN + MAX( 2*MN, N+1, MN+NRHS )
+*          where MN = min(M,N).
+*          The block algorithm requires that:
+*            LWORK >= MN + MAX( 2*MN, NB*(N+1), MN+MN*NB, MN+NB*NRHS )
+*          where NB is an upper bound on the blocksize returned
+*          by ILAENV for the routines ZGEQP3, ZTZRZF, CTZRQF, ZUNMQR,
+*          and ZUNMRZ.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  RWORK   (workspace) DOUBLE PRECISION array, dimension (2*N)
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*    A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA
+*    E. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain
+*    G. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            IMAX, IMIN
+      PARAMETER          ( IMAX = 1, IMIN = 2 )
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+      COMPLEX*16         CZERO, CONE
+      PARAMETER          ( CZERO = ( 0.0D+0, 0.0D+0 ),
+     $                   CONE = ( 1.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            I, IASCL, IBSCL, ISMAX, ISMIN, J, LWKOPT, MN,
+     $                   NB, NB1, NB2, NB3, NB4
+      DOUBLE PRECISION   ANRM, BIGNUM, BNRM, SMAX, SMAXPR, SMIN, SMINPR,
+     $                   SMLNUM, WSIZE
+      COMPLEX*16         C1, C2, S1, S2
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLABAD, XERBLA, ZCOPY, ZGEQP3, ZLAIC1, ZLASCL,
+     $                   ZLASET, ZTRSM, ZTZRZF, ZUNMQR, ZUNMRZ
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      DOUBLE PRECISION   DLAMCH, ZLANGE
+      EXTERNAL           ILAENV, DLAMCH, ZLANGE
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, DCMPLX, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+      MN = MIN( M, N )
+      ISMIN = MN + 1
+      ISMAX = 2*MN + 1
+*
+*     Test the input arguments.
+*
+      INFO = 0
+      NB1 = ILAENV( 1, 'ZGEQRF', ' ', M, N, -1, -1 )
+      NB2 = ILAENV( 1, 'ZGERQF', ' ', M, N, -1, -1 )
+      NB3 = ILAENV( 1, 'ZUNMQR', ' ', M, N, NRHS, -1 )
+      NB4 = ILAENV( 1, 'ZUNMRQ', ' ', M, N, NRHS, -1 )
+      NB = MAX( NB1, NB2, NB3, NB4 )
+      LWKOPT = MAX( 1, MN+2*N+NB*( N+1 ), 2*MN+NB*NRHS )
+      WORK( 1 ) = DCMPLX( LWKOPT )
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, M, N ) ) THEN
+         INFO = -7
+      ELSE IF( LWORK.LT.( MN+MAX( 2*MN, N+1, MN+NRHS ) ) .AND. .NOT.
+     $         LQUERY ) THEN
+         INFO = -12
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZGELSY', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( MIN( M, N, NRHS ).EQ.0 ) THEN
+         RANK = 0
+         RETURN
+      END IF
+*
+*     Get machine parameters
+*
+      SMLNUM = DLAMCH( 'S' ) / DLAMCH( 'P' )
+      BIGNUM = ONE / SMLNUM
+      CALL DLABAD( SMLNUM, BIGNUM )
+*
+*     Scale A, B if max entries outside range [SMLNUM,BIGNUM]
+*
+      ANRM = ZLANGE( 'M', M, N, A, LDA, RWORK )
+      IASCL = 0
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
+*
+*        Scale matrix norm up to SMLNUM
+*
+         CALL ZLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, INFO )
+         IASCL = 1
+      ELSE IF( ANRM.GT.BIGNUM ) THEN
+*
+*        Scale matrix norm down to BIGNUM
+*
+         CALL ZLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, INFO )
+         IASCL = 2
+      ELSE IF( ANRM.EQ.ZERO ) THEN
+*
+*        Matrix all zero. Return zero solution.
+*
+         CALL ZLASET( 'F', MAX( M, N ), NRHS, CZERO, CZERO, B, LDB )
+         RANK = 0
+         GO TO 70
+      END IF
+*
+      BNRM = ZLANGE( 'M', M, NRHS, B, LDB, RWORK )
+      IBSCL = 0
+      IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN
+*
+*        Scale matrix norm up to SMLNUM
+*
+         CALL ZLASCL( 'G', 0, 0, BNRM, SMLNUM, M, NRHS, B, LDB, INFO )
+         IBSCL = 1
+      ELSE IF( BNRM.GT.BIGNUM ) THEN
+*
+*        Scale matrix norm down to BIGNUM
+*
+         CALL ZLASCL( 'G', 0, 0, BNRM, BIGNUM, M, NRHS, B, LDB, INFO )
+         IBSCL = 2
+      END IF
+*
+*     Compute QR factorization with column pivoting of A:
+*        A * P = Q * R
+*
+      CALL ZGEQP3( M, N, A, LDA, JPVT, WORK( 1 ), WORK( MN+1 ),
+     $             LWORK-MN, RWORK, INFO )
+      WSIZE = MN + DBLE( WORK( MN+1 ) )
+*
+*     complex workspace: MN+NB*(N+1). real workspace 2*N.
+*     Details of Householder rotations stored in WORK(1:MN).
+*
+*     Determine RANK using incremental condition estimation
+*
+      WORK( ISMIN ) = CONE
+      WORK( ISMAX ) = CONE
+      SMAX = ABS( A( 1, 1 ) )
+      SMIN = SMAX
+      IF( ABS( A( 1, 1 ) ).EQ.ZERO ) THEN
+         RANK = 0
+         CALL ZLASET( 'F', MAX( M, N ), NRHS, CZERO, CZERO, B, LDB )
+         GO TO 70
+      ELSE
+         RANK = 1
+      END IF
+*
+   10 CONTINUE
+      IF( RANK.LT.MN ) THEN
+         I = RANK + 1
+         CALL ZLAIC1( IMIN, RANK, WORK( ISMIN ), SMIN, A( 1, I ),
+     $                A( I, I ), SMINPR, S1, C1 )
+         CALL ZLAIC1( IMAX, RANK, WORK( ISMAX ), SMAX, A( 1, I ),
+     $                A( I, I ), SMAXPR, S2, C2 )
+*
+         IF( SMAXPR*RCOND.LE.SMINPR ) THEN
+            DO 20 I = 1, RANK
+               WORK( ISMIN+I-1 ) = S1*WORK( ISMIN+I-1 )
+               WORK( ISMAX+I-1 ) = S2*WORK( ISMAX+I-1 )
+   20       CONTINUE
+            WORK( ISMIN+RANK ) = C1
+            WORK( ISMAX+RANK ) = C2
+            SMIN = SMINPR
+            SMAX = SMAXPR
+            RANK = RANK + 1
+            GO TO 10
+         END IF
+      END IF
+*
+*     complex workspace: 3*MN.
+*
+*     Logically partition R = [ R11 R12 ]
+*                             [  0  R22 ]
+*     where R11 = R(1:RANK,1:RANK)
+*
+*     [R11,R12] = [ T11, 0 ] * Y
+*
+      IF( RANK.LT.N )
+     $   CALL ZTZRZF( RANK, N, A, LDA, WORK( MN+1 ), WORK( 2*MN+1 ),
+     $                LWORK-2*MN, INFO )
+*
+*     complex workspace: 2*MN.
+*     Details of Householder rotations stored in WORK(MN+1:2*MN)
+*
+*     B(1:M,1:NRHS) := Q' * B(1:M,1:NRHS)
+*
+      CALL ZUNMQR( 'Left', 'Conjugate transpose', M, NRHS, MN, A, LDA,
+     $             WORK( 1 ), B, LDB, WORK( 2*MN+1 ), LWORK-2*MN, INFO )
+      WSIZE = MAX( WSIZE, 2*MN+DBLE( WORK( 2*MN+1 ) ) )
+*
+*     complex workspace: 2*MN+NB*NRHS.
+*
+*     B(1:RANK,1:NRHS) := inv(T11) * B(1:RANK,1:NRHS)
+*
+      CALL ZTRSM( 'Left', 'Upper', 'No transpose', 'Non-unit', RANK,
+     $            NRHS, CONE, A, LDA, B, LDB )
+*
+      DO 40 J = 1, NRHS
+         DO 30 I = RANK + 1, N
+            B( I, J ) = CZERO
+   30    CONTINUE
+   40 CONTINUE
+*
+*     B(1:N,1:NRHS) := Y' * B(1:N,1:NRHS)
+*
+      IF( RANK.LT.N ) THEN
+         CALL ZUNMRZ( 'Left', 'Conjugate transpose', N, NRHS, RANK,
+     $                N-RANK, A, LDA, WORK( MN+1 ), B, LDB,
+     $                WORK( 2*MN+1 ), LWORK-2*MN, INFO )
+      END IF
+*
+*     complex workspace: 2*MN+NRHS.
+*
+*     B(1:N,1:NRHS) := P * B(1:N,1:NRHS)
+*
+      DO 60 J = 1, NRHS
+         DO 50 I = 1, N
+            WORK( JPVT( I ) ) = B( I, J )
+   50    CONTINUE
+         CALL ZCOPY( N, WORK( 1 ), 1, B( 1, J ), 1 )
+   60 CONTINUE
+*
+*     complex workspace: N.
+*
+*     Undo scaling
+*
+      IF( IASCL.EQ.1 ) THEN
+         CALL ZLASCL( 'G', 0, 0, ANRM, SMLNUM, N, NRHS, B, LDB, INFO )
+         CALL ZLASCL( 'U', 0, 0, SMLNUM, ANRM, RANK, RANK, A, LDA,
+     $                INFO )
+      ELSE IF( IASCL.EQ.2 ) THEN
+         CALL ZLASCL( 'G', 0, 0, ANRM, BIGNUM, N, NRHS, B, LDB, INFO )
+         CALL ZLASCL( 'U', 0, 0, BIGNUM, ANRM, RANK, RANK, A, LDA,
+     $                INFO )
+      END IF
+      IF( IBSCL.EQ.1 ) THEN
+         CALL ZLASCL( 'G', 0, 0, SMLNUM, BNRM, N, NRHS, B, LDB, INFO )
+      ELSE IF( IBSCL.EQ.2 ) THEN
+         CALL ZLASCL( 'G', 0, 0, BIGNUM, BNRM, N, NRHS, B, LDB, INFO )
+      END IF
+*
+   70 CONTINUE
+      WORK( 1 ) = DCMPLX( LWKOPT )
+*
+      RETURN
+*
+*     End of ZGELSY
+*
+      END
diff --git a/costa/native/external/lapack/zgeql2.f b/costa/native/external/lapack/zgeql2.f
new file mode 100644
index 000000000..e408312ea
--- /dev/null
+++ b/costa/native/external/lapack/zgeql2.f
@@ -0,0 +1,122 @@
+      SUBROUTINE ZGEQL2( M, N, A, LDA, TAU, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, M, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZGEQL2 computes a QL factorization of a complex m by n matrix A:
+*  A = Q * L.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+*          On entry, the m by n matrix A.
+*          On exit, if m >= n, the lower triangle of the subarray
+*          A(m-n+1:m,1:n) contains the n by n lower triangular matrix L;
+*          if m <= n, the elements on and below the (n-m)-th
+*          superdiagonal contain the m by n lower trapezoidal matrix L;
+*          the remaining elements, with the array TAU, represent the
+*          unitary matrix Q as a product of elementary reflectors
+*          (see Further Details).
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  TAU     (output) COMPLEX*16 array, dimension (min(M,N))
+*          The scalar factors of the elementary reflectors (see Further
+*          Details).
+*
+*  WORK    (workspace) COMPLEX*16 array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*
+*  Further Details
+*  ===============
+*
+*  The matrix Q is represented as a product of elementary reflectors
+*
+*     Q = H(k) . . . H(2) H(1), where k = min(m,n).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - tau * v * v'
+*
+*  where tau is a complex scalar, and v is a complex vector with
+*  v(m-k+i+1:m) = 0 and v(m-k+i) = 1; v(1:m-k+i-1) is stored on exit in
+*  A(1:m-k+i-1,n-k+i), and tau in TAU(i).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX*16         ONE
+      PARAMETER          ( ONE = ( 1.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, K
+      COMPLEX*16         ALPHA
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZLARF, ZLARFG
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          DCONJG, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZGEQL2', -INFO )
+         RETURN
+      END IF
+*
+      K = MIN( M, N )
+*
+      DO 10 I = K, 1, -1
+*
+*        Generate elementary reflector H(i) to annihilate
+*        A(1:m-k+i-1,n-k+i)
+*
+         ALPHA = A( M-K+I, N-K+I )
+         CALL ZLARFG( M-K+I, ALPHA, A( 1, N-K+I ), 1, TAU( I ) )
+*
+*        Apply H(i)' to A(1:m-k+i,1:n-k+i-1) from the left
+*
+         A( M-K+I, N-K+I ) = ONE
+         CALL ZLARF( 'Left', M-K+I, N-K+I-1, A( 1, N-K+I ), 1,
+     $               DCONJG( TAU( I ) ), A, LDA, WORK )
+         A( M-K+I, N-K+I ) = ALPHA
+   10 CONTINUE
+      RETURN
+*
+*     End of ZGEQL2
+*
+      END
diff --git a/costa/native/external/lapack/zgeqlf.f b/costa/native/external/lapack/zgeqlf.f
new file mode 100644
index 000000000..f57dd8d32
--- /dev/null
+++ b/costa/native/external/lapack/zgeqlf.f
@@ -0,0 +1,205 @@
+      SUBROUTINE ZGEQLF( M, N, A, LDA, TAU, WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, LWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZGEQLF computes a QL factorization of a complex M-by-N matrix A:
+*  A = Q * L.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+*          On entry, the M-by-N matrix A.
+*          On exit,
+*          if m >= n, the lower triangle of the subarray
+*          A(m-n+1:m,1:n) contains the N-by-N lower triangular matrix L;
+*          if m <= n, the elements on and below the (n-m)-th
+*          superdiagonal contain the M-by-N lower trapezoidal matrix L;
+*          the remaining elements, with the array TAU, represent the
+*          unitary matrix Q as a product of elementary reflectors
+*          (see Further Details).
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  TAU     (output) COMPLEX*16 array, dimension (min(M,N))
+*          The scalar factors of the elementary reflectors (see Further
+*          Details).
+*
+*  WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.  LWORK >= max(1,N).
+*          For optimum performance LWORK >= N*NB, where NB is
+*          the optimal blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Further Details
+*  ===============
+*
+*  The matrix Q is represented as a product of elementary reflectors
+*
+*     Q = H(k) . . . H(2) H(1), where k = min(m,n).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - tau * v * v'
+*
+*  where tau is a complex scalar, and v is a complex vector with
+*  v(m-k+i+1:m) = 0 and v(m-k+i) = 1; v(1:m-k+i-1) is stored on exit in
+*  A(1:m-k+i-1,n-k+i), and tau in TAU(i).
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            I, IB, IINFO, IWS, K, KI, KK, LDWORK, LWKOPT,
+     $                   MU, NB, NBMIN, NU, NX
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZGEQL2, ZLARFB, ZLARFT
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      NB = ILAENV( 1, 'ZGEQLF', ' ', M, N, -1, -1 )
+      LWKOPT = N*NB
+      WORK( 1 ) = LWKOPT
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -4
+      ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN
+         INFO = -7
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZGEQLF', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      K = MIN( M, N )
+      IF( K.EQ.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      NBMIN = 2
+      NX = 1
+      IWS = N
+      IF( NB.GT.1 .AND. NB.LT.K ) THEN
+*
+*        Determine when to cross over from blocked to unblocked code.
+*
+         NX = MAX( 0, ILAENV( 3, 'ZGEQLF', ' ', M, N, -1, -1 ) )
+         IF( NX.LT.K ) THEN
+*
+*           Determine if workspace is large enough for blocked code.
+*
+            LDWORK = N
+            IWS = LDWORK*NB
+            IF( LWORK.LT.IWS ) THEN
+*
+*              Not enough workspace to use optimal NB:  reduce NB and
+*              determine the minimum value of NB.
+*
+               NB = LWORK / LDWORK
+               NBMIN = MAX( 2, ILAENV( 2, 'ZGEQLF', ' ', M, N, -1,
+     $                 -1 ) )
+            END IF
+         END IF
+      END IF
+*
+      IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN
+*
+*        Use blocked code initially.
+*        The last kk columns are handled by the block method.
+*
+         KI = ( ( K-NX-1 ) / NB )*NB
+         KK = MIN( K, KI+NB )
+*
+         DO 10 I = K - KK + KI + 1, K - KK + 1, -NB
+            IB = MIN( K-I+1, NB )
+*
+*           Compute the QL factorization of the current block
+*           A(1:m-k+i+ib-1,n-k+i:n-k+i+ib-1)
+*
+            CALL ZGEQL2( M-K+I+IB-1, IB, A( 1, N-K+I ), LDA, TAU( I ),
+     $                   WORK, IINFO )
+            IF( N-K+I.GT.1 ) THEN
+*
+*              Form the triangular factor of the block reflector
+*              H = H(i+ib-1) . . . H(i+1) H(i)
+*
+               CALL ZLARFT( 'Backward', 'Columnwise', M-K+I+IB-1, IB,
+     $                      A( 1, N-K+I ), LDA, TAU( I ), WORK, LDWORK )
+*
+*              Apply H' to A(1:m-k+i+ib-1,1:n-k+i-1) from the left
+*
+               CALL ZLARFB( 'Left', 'Conjugate transpose', 'Backward',
+     $                      'Columnwise', M-K+I+IB-1, N-K+I-1, IB,
+     $                      A( 1, N-K+I ), LDA, WORK, LDWORK, A, LDA,
+     $                      WORK( IB+1 ), LDWORK )
+            END IF
+   10    CONTINUE
+         MU = M - K + I + NB - 1
+         NU = N - K + I + NB - 1
+      ELSE
+         MU = M
+         NU = N
+      END IF
+*
+*     Use unblocked code to factor the last or only block
+*
+      IF( MU.GT.0 .AND. NU.GT.0 )
+     $   CALL ZGEQL2( MU, NU, A, LDA, TAU, WORK, IINFO )
+*
+      WORK( 1 ) = IWS
+      RETURN
+*
+*     End of ZGEQLF
+*
+      END
diff --git a/costa/native/external/lapack/zgeqp3.f b/costa/native/external/lapack/zgeqp3.f
new file mode 100644
index 000000000..ffb709a5d
--- /dev/null
+++ b/costa/native/external/lapack/zgeqp3.f
@@ -0,0 +1,285 @@
+      SUBROUTINE ZGEQP3( M, N, A, LDA, JPVT, TAU, WORK, LWORK, RWORK,
+     $                   INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, LWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            JPVT( * )
+      DOUBLE PRECISION   RWORK( * )
+      COMPLEX*16         A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZGEQP3 computes a QR factorization with column pivoting of a
+*  matrix A:  A*P = Q*R  using Level 3 BLAS.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+*          On entry, the M-by-N matrix A.
+*          On exit, the upper triangle of the array contains the
+*          min(M,N)-by-N upper trapezoidal matrix R; the elements below
+*          the diagonal, together with the array TAU, represent the
+*          unitary matrix Q as a product of min(M,N) elementary
+*          reflectors.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,M).
+*
+*  JPVT    (input/output) INTEGER array, dimension (N)
+*          On entry, if JPVT(J).ne.0, the J-th column of A is permuted
+*          to the front of A*P (a leading column); if JPVT(J)=0,
+*          the J-th column of A is a free column.
+*          On exit, if JPVT(J)=K, then the J-th column of A*P was the
+*          the K-th column of A.
+*
+*  TAU     (output) COMPLEX*16 array, dimension (min(M,N))
+*          The scalar factors of the elementary reflectors.
+*
+*  WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+*          On exit, if INFO=0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK. LWORK >= N+1.
+*          For optimal performance LWORK >= ( N+1 )*NB, where NB
+*          is the optimal blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  RWORK   (workspace) DOUBLE PRECISION array, dimension (2*N)
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit.
+*          < 0: if INFO = -i, the i-th argument had an illegal value.
+*
+*  Further Details
+*  ===============
+*
+*  The matrix Q is represented as a product of elementary reflectors
+*
+*     Q = H(1) H(2) . . . H(k), where k = min(m,n).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - tau * v * v'
+*
+*  where tau is a real/complex scalar, and v is a real/complex vector
+*  with v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in
+*  A(i+1:m,i), and tau in TAU(i).
+*
+*  Based on contributions by
+*    G. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain
+*    X. Sun, Computer Science Dept., Duke University, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            INB, INBMIN, IXOVER
+      PARAMETER          ( INB = 1, INBMIN = 2, IXOVER = 3 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            FJB, IWS, J, JB, LWKOPT, MINMN, MINWS, NA, NB,
+     $                   NBMIN, NFXD, NX, SM, SMINMN, SN, TOPBMN
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZGEQRF, ZLAQP2, ZLAQPS, ZSWAP, ZUNMQR
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      DOUBLE PRECISION   DZNRM2
+      EXTERNAL           ILAENV, DZNRM2
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          INT, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+      IWS = N + 1
+      MINMN = MIN( M, N )
+*
+*     Test input arguments
+*     ====================
+*
+      INFO = 0
+      NB = ILAENV( INB, 'ZGEQRF', ' ', M, N, -1, -1 )
+      LWKOPT = ( N+1 )*NB
+      WORK( 1 ) = LWKOPT
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -4
+      ELSE IF( ( LWORK.LT.IWS ) .AND. .NOT.LQUERY ) THEN
+         INFO = -8
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZGEQP3', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( MINMN.EQ.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+*     Move initial columns up front.
+*
+      NFXD = 1
+      DO 10 J = 1, N
+         IF( JPVT( J ).NE.0 ) THEN
+            IF( J.NE.NFXD ) THEN
+               CALL ZSWAP( M, A( 1, J ), 1, A( 1, NFXD ), 1 )
+               JPVT( J ) = JPVT( NFXD )
+               JPVT( NFXD ) = J
+            ELSE
+               JPVT( J ) = J
+            END IF
+            NFXD = NFXD + 1
+         ELSE
+            JPVT( J ) = J
+         END IF
+   10 CONTINUE
+      NFXD = NFXD - 1
+*
+*     Factorize fixed columns
+*     =======================
+*
+*     Compute the QR factorization of fixed columns and update
+*     remaining columns.
+*
+      IF( NFXD.GT.0 ) THEN
+         NA = MIN( M, NFXD )
+*CC      CALL ZGEQR2( M, NA, A, LDA, TAU, WORK, INFO )
+         CALL ZGEQRF( M, NA, A, LDA, TAU, WORK, LWORK, INFO )
+         IWS = MAX( IWS, INT( WORK( 1 ) ) )
+         IF( NA.LT.N ) THEN
+*CC         CALL ZUNM2R( 'Left', 'Conjugate Transpose', M, N-NA,
+*CC  $                   NA, A, LDA, TAU, A( 1, NA+1 ), LDA, WORK,
+*CC  $                   INFO )
+            CALL ZUNMQR( 'Left', 'Conjugate Transpose', M, N-NA, NA, A,
+     $                   LDA, TAU, A( 1, NA+1 ), LDA, WORK, LWORK,
+     $                   INFO )
+            IWS = MAX( IWS, INT( WORK( 1 ) ) )
+         END IF
+      END IF
+*
+*     Factorize free columns
+*     ======================
+*
+      IF( NFXD.LT.MINMN ) THEN
+*
+         SM = M - NFXD
+         SN = N - NFXD
+         SMINMN = MINMN - NFXD
+*
+*        Determine the block size.
+*
+         NB = ILAENV( INB, 'ZGEQRF', ' ', SM, SN, -1, -1 )
+         NBMIN = 2
+         NX = 0
+*
+         IF( ( NB.GT.1 ) .AND. ( NB.LT.SMINMN ) ) THEN
+*
+*           Determine when to cross over from blocked to unblocked code.
+*
+            NX = MAX( 0, ILAENV( IXOVER, 'ZGEQRF', ' ', SM, SN, -1,
+     $           -1 ) )
+*
+*
+            IF( NX.LT.SMINMN ) THEN
+*
+*              Determine if workspace is large enough for blocked code.
+*
+               MINWS = ( SN+1 )*NB
+               IWS = MAX( IWS, MINWS )
+               IF( LWORK.LT.MINWS ) THEN
+*
+*                 Not enough workspace to use optimal NB: Reduce NB and
+*                 determine the minimum value of NB.
+*
+                  NB = LWORK / ( SN+1 )
+                  NBMIN = MAX( 2, ILAENV( INBMIN, 'ZGEQRF', ' ', SM, SN,
+     $                    -1, -1 ) )
+*
+*
+               END IF
+            END IF
+         END IF
+*
+*        Initialize partial column norms. The first N elements of work
+*        store the exact column norms.
+*
+         DO 20 J = NFXD + 1, N
+            RWORK( J ) = DZNRM2( SM, A( NFXD+1, J ), 1 )
+            RWORK( N+J ) = RWORK( J )
+   20    CONTINUE
+*
+         IF( ( NB.GE.NBMIN ) .AND. ( NB.LT.SMINMN ) .AND.
+     $       ( NX.LT.SMINMN ) ) THEN
+*
+*           Use blocked code initially.
+*
+            J = NFXD + 1
+*
+*           Compute factorization: while loop.
+*
+*
+            TOPBMN = MINMN - NX
+   30       CONTINUE
+            IF( J.LE.TOPBMN ) THEN
+               JB = MIN( NB, TOPBMN-J+1 )
+*
+*              Factorize JB columns among columns J:N.
+*
+               CALL ZLAQPS( M, N-J+1, J-1, JB, FJB, A( 1, J ), LDA,
+     $                      JPVT( J ), TAU( J ), RWORK( J ),
+     $                      RWORK( N+J ), WORK( 1 ), WORK( JB+1 ),
+     $                      N-J+1 )
+*
+               J = J + FJB
+               GO TO 30
+            END IF
+         ELSE
+            J = NFXD + 1
+         END IF
+*
+*        Use unblocked code to factor the last or only block.
+*
+*
+         IF( J.LE.MINMN )
+     $      CALL ZLAQP2( M, N-J+1, J-1, A( 1, J ), LDA, JPVT( J ),
+     $                   TAU( J ), RWORK( J ), RWORK( N+J ), WORK( 1 ) )
+*
+      END IF
+*
+      WORK( 1 ) = IWS
+      RETURN
+*
+*     End of ZGEQP3
+*
+      END
diff --git a/costa/native/external/lapack/zgeqpf.f b/costa/native/external/lapack/zgeqpf.f
new file mode 100644
index 000000000..fa1ed602a
--- /dev/null
+++ b/costa/native/external/lapack/zgeqpf.f
@@ -0,0 +1,225 @@
+      SUBROUTINE ZGEQPF( M, N, A, LDA, JPVT, TAU, WORK, RWORK, INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, M, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            JPVT( * )
+      DOUBLE PRECISION   RWORK( * )
+      COMPLEX*16         A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  This routine is deprecated and has been replaced by routine ZGEQP3.
+*
+*  ZGEQPF computes a QR factorization with column pivoting of a
+*  complex M-by-N matrix A: A*P = Q*R.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A. N >= 0
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+*          On entry, the M-by-N matrix A.
+*          On exit, the upper triangle of the array contains the
+*          min(M,N)-by-N upper triangular matrix R; the elements
+*          below the diagonal, together with the array TAU,
+*          represent the unitary matrix Q as a product of
+*          min(m,n) elementary reflectors.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,M).
+*
+*  JPVT    (input/output) INTEGER array, dimension (N)
+*          On entry, if JPVT(i) .ne. 0, the i-th column of A is permuted
+*          to the front of A*P (a leading column); if JPVT(i) = 0,
+*          the i-th column of A is a free column.
+*          On exit, if JPVT(i) = k, then the i-th column of A*P
+*          was the k-th column of A.
+*
+*  TAU     (output) COMPLEX*16 array, dimension (min(M,N))
+*          The scalar factors of the elementary reflectors.
+*
+*  WORK    (workspace) COMPLEX*16 array, dimension (N)
+*
+*  RWORK   (workspace) DOUBLE PRECISION array, dimension (2*N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Further Details
+*  ===============
+*
+*  The matrix Q is represented as a product of elementary reflectors
+*
+*     Q = H(1) H(2) . . . H(n)
+*
+*  Each H(i) has the form
+*
+*     H = I - tau * v * v'
+*
+*  where tau is a complex scalar, and v is a complex vector with
+*  v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i).
+*
+*  The matrix P is represented in jpvt as follows: If
+*     jpvt(j) = i
+*  then the jth column of P is the ith canonical unit vector.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, ITEMP, J, MA, MN, PVT
+      DOUBLE PRECISION   TEMP, TEMP2
+      COMPLEX*16         AII
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZGEQR2, ZLARF, ZLARFG, ZSWAP, ZUNM2R
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DCMPLX, DCONJG, MAX, MIN, SQRT
+*     ..
+*     .. External Functions ..
+      INTEGER            IDAMAX
+      DOUBLE PRECISION   DZNRM2
+      EXTERNAL           IDAMAX, DZNRM2
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZGEQPF', -INFO )
+         RETURN
+      END IF
+*
+      MN = MIN( M, N )
+*
+*     Move initial columns up front
+*
+      ITEMP = 1
+      DO 10 I = 1, N
+         IF( JPVT( I ).NE.0 ) THEN
+            IF( I.NE.ITEMP ) THEN
+               CALL ZSWAP( M, A( 1, I ), 1, A( 1, ITEMP ), 1 )
+               JPVT( I ) = JPVT( ITEMP )
+               JPVT( ITEMP ) = I
+            ELSE
+               JPVT( I ) = I
+            END IF
+            ITEMP = ITEMP + 1
+         ELSE
+            JPVT( I ) = I
+         END IF
+   10 CONTINUE
+      ITEMP = ITEMP - 1
+*
+*     Compute the QR factorization and update remaining columns
+*
+      IF( ITEMP.GT.0 ) THEN
+         MA = MIN( ITEMP, M )
+         CALL ZGEQR2( M, MA, A, LDA, TAU, WORK, INFO )
+         IF( MA.LT.N ) THEN
+            CALL ZUNM2R( 'Left', 'Conjugate transpose', M, N-MA, MA, A,
+     $                   LDA, TAU, A( 1, MA+1 ), LDA, WORK, INFO )
+         END IF
+      END IF
+*
+      IF( ITEMP.LT.MN ) THEN
+*
+*        Initialize partial column norms. The first n elements of
+*        work store the exact column norms.
+*
+         DO 20 I = ITEMP + 1, N
+            RWORK( I ) = DZNRM2( M-ITEMP, A( ITEMP+1, I ), 1 )
+            RWORK( N+I ) = RWORK( I )
+   20    CONTINUE
+*
+*        Compute factorization
+*
+         DO 40 I = ITEMP + 1, MN
+*
+*           Determine ith pivot column and swap if necessary
+*
+            PVT = ( I-1 ) + IDAMAX( N-I+1, RWORK( I ), 1 )
+*
+            IF( PVT.NE.I ) THEN
+               CALL ZSWAP( M, A( 1, PVT ), 1, A( 1, I ), 1 )
+               ITEMP = JPVT( PVT )
+               JPVT( PVT ) = JPVT( I )
+               JPVT( I ) = ITEMP
+               RWORK( PVT ) = RWORK( I )
+               RWORK( N+PVT ) = RWORK( N+I )
+            END IF
+*
+*           Generate elementary reflector H(i)
+*
+            AII = A( I, I )
+            CALL ZLARFG( M-I+1, AII, A( MIN( I+1, M ), I ), 1,
+     $                   TAU( I ) )
+            A( I, I ) = AII
+*
+            IF( I.LT.N ) THEN
+*
+*              Apply H(i) to A(i:m,i+1:n) from the left
+*
+               AII = A( I, I )
+               A( I, I ) = DCMPLX( ONE )
+               CALL ZLARF( 'Left', M-I+1, N-I, A( I, I ), 1,
+     $                     DCONJG( TAU( I ) ), A( I, I+1 ), LDA, WORK )
+               A( I, I ) = AII
+            END IF
+*
+*           Update partial column norms
+*
+            DO 30 J = I + 1, N
+               IF( RWORK( J ).NE.ZERO ) THEN
+                  TEMP = ONE - ( ABS( A( I, J ) ) / RWORK( J ) )**2
+                  TEMP = MAX( TEMP, ZERO )
+                  TEMP2 = ONE + 0.05D0*TEMP*
+     $                    ( RWORK( J ) / RWORK( N+J ) )**2
+                  IF( TEMP2.EQ.ONE ) THEN
+                     IF( M-I.GT.0 ) THEN
+                        RWORK( J ) = DZNRM2( M-I, A( I+1, J ), 1 )
+                        RWORK( N+J ) = RWORK( J )
+                     ELSE
+                        RWORK( J ) = ZERO
+                        RWORK( N+J ) = ZERO
+                     END IF
+                  ELSE
+                     RWORK( J ) = RWORK( J )*SQRT( TEMP )
+                  END IF
+               END IF
+   30       CONTINUE
+*
+   40    CONTINUE
+      END IF
+      RETURN
+*
+*     End of ZGEQPF
+*
+      END
diff --git a/costa/native/external/lapack/zgeqr2.f b/costa/native/external/lapack/zgeqr2.f
new file mode 100644
index 000000000..a4f226eca
--- /dev/null
+++ b/costa/native/external/lapack/zgeqr2.f
@@ -0,0 +1,122 @@
+      SUBROUTINE ZGEQR2( M, N, A, LDA, TAU, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, M, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZGEQR2 computes a QR factorization of a complex m by n matrix A:
+*  A = Q * R.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+*          On entry, the m by n matrix A.
+*          On exit, the elements on and above the diagonal of the array
+*          contain the min(m,n) by n upper trapezoidal matrix R (R is
+*          upper triangular if m >= n); the elements below the diagonal,
+*          with the array TAU, represent the unitary matrix Q as a
+*          product of elementary reflectors (see Further Details).
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  TAU     (output) COMPLEX*16 array, dimension (min(M,N))
+*          The scalar factors of the elementary reflectors (see Further
+*          Details).
+*
+*  WORK    (workspace) COMPLEX*16 array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*
+*  Further Details
+*  ===============
+*
+*  The matrix Q is represented as a product of elementary reflectors
+*
+*     Q = H(1) H(2) . . . H(k), where k = min(m,n).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - tau * v * v'
+*
+*  where tau is a complex scalar, and v is a complex vector with
+*  v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i),
+*  and tau in TAU(i).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX*16         ONE
+      PARAMETER          ( ONE = ( 1.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, K
+      COMPLEX*16         ALPHA
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZLARF, ZLARFG
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          DCONJG, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZGEQR2', -INFO )
+         RETURN
+      END IF
+*
+      K = MIN( M, N )
+*
+      DO 10 I = 1, K
+*
+*        Generate elementary reflector H(i) to annihilate A(i+1:m,i)
+*
+         CALL ZLARFG( M-I+1, A( I, I ), A( MIN( I+1, M ), I ), 1,
+     $                TAU( I ) )
+         IF( I.LT.N ) THEN
+*
+*           Apply H(i)' to A(i:m,i+1:n) from the left
+*
+            ALPHA = A( I, I )
+            A( I, I ) = ONE
+            CALL ZLARF( 'Left', M-I+1, N-I, A( I, I ), 1,
+     $                  DCONJG( TAU( I ) ), A( I, I+1 ), LDA, WORK )
+            A( I, I ) = ALPHA
+         END IF
+   10 CONTINUE
+      RETURN
+*
+*     End of ZGEQR2
+*
+      END
diff --git a/costa/native/external/lapack/zgeqrf.f b/costa/native/external/lapack/zgeqrf.f
new file mode 100644
index 000000000..d621c1e0d
--- /dev/null
+++ b/costa/native/external/lapack/zgeqrf.f
@@ -0,0 +1,197 @@
+      SUBROUTINE ZGEQRF( M, N, A, LDA, TAU, WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, LWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZGEQRF computes a QR factorization of a complex M-by-N matrix A:
+*  A = Q * R.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+*          On entry, the M-by-N matrix A.
+*          On exit, the elements on and above the diagonal of the array
+*          contain the min(M,N)-by-N upper trapezoidal matrix R (R is
+*          upper triangular if m >= n); the elements below the diagonal,
+*          with the array TAU, represent the unitary matrix Q as a
+*          product of min(m,n) elementary reflectors (see Further
+*          Details).
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  TAU     (output) COMPLEX*16 array, dimension (min(M,N))
+*          The scalar factors of the elementary reflectors (see Further
+*          Details).
+*
+*  WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.  LWORK >= max(1,N).
+*          For optimum performance LWORK >= N*NB, where NB is
+*          the optimal blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Further Details
+*  ===============
+*
+*  The matrix Q is represented as a product of elementary reflectors
+*
+*     Q = H(1) H(2) . . . H(k), where k = min(m,n).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - tau * v * v'
+*
+*  where tau is a complex scalar, and v is a complex vector with
+*  v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i),
+*  and tau in TAU(i).
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            I, IB, IINFO, IWS, K, LDWORK, LWKOPT, NB,
+     $                   NBMIN, NX
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZGEQR2, ZLARFB, ZLARFT
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      NB = ILAENV( 1, 'ZGEQRF', ' ', M, N, -1, -1 )
+      LWKOPT = N*NB
+      WORK( 1 ) = LWKOPT
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -4
+      ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN
+         INFO = -7
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZGEQRF', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      K = MIN( M, N )
+      IF( K.EQ.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      NBMIN = 2
+      NX = 0
+      IWS = N
+      IF( NB.GT.1 .AND. NB.LT.K ) THEN
+*
+*        Determine when to cross over from blocked to unblocked code.
+*
+         NX = MAX( 0, ILAENV( 3, 'ZGEQRF', ' ', M, N, -1, -1 ) )
+         IF( NX.LT.K ) THEN
+*
+*           Determine if workspace is large enough for blocked code.
+*
+            LDWORK = N
+            IWS = LDWORK*NB
+            IF( LWORK.LT.IWS ) THEN
+*
+*              Not enough workspace to use optimal NB:  reduce NB and
+*              determine the minimum value of NB.
+*
+               NB = LWORK / LDWORK
+               NBMIN = MAX( 2, ILAENV( 2, 'ZGEQRF', ' ', M, N, -1,
+     $                 -1 ) )
+            END IF
+         END IF
+      END IF
+*
+      IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN
+*
+*        Use blocked code initially
+*
+         DO 10 I = 1, K - NX, NB
+            IB = MIN( K-I+1, NB )
+*
+*           Compute the QR factorization of the current block
+*           A(i:m,i:i+ib-1)
+*
+            CALL ZGEQR2( M-I+1, IB, A( I, I ), LDA, TAU( I ), WORK,
+     $                   IINFO )
+            IF( I+IB.LE.N ) THEN
+*
+*              Form the triangular factor of the block reflector
+*              H = H(i) H(i+1) . . . H(i+ib-1)
+*
+               CALL ZLARFT( 'Forward', 'Columnwise', M-I+1, IB,
+     $                      A( I, I ), LDA, TAU( I ), WORK, LDWORK )
+*
+*              Apply H' to A(i:m,i+ib:n) from the left
+*
+               CALL ZLARFB( 'Left', 'Conjugate transpose', 'Forward',
+     $                      'Columnwise', M-I+1, N-I-IB+1, IB,
+     $                      A( I, I ), LDA, WORK, LDWORK, A( I, I+IB ),
+     $                      LDA, WORK( IB+1 ), LDWORK )
+            END IF
+   10    CONTINUE
+      ELSE
+         I = 1
+      END IF
+*
+*     Use unblocked code to factor the last or only block.
+*
+      IF( I.LE.K )
+     $   CALL ZGEQR2( M-I+1, N-I+1, A( I, I ), LDA, TAU( I ), WORK,
+     $                IINFO )
+*
+      WORK( 1 ) = IWS
+      RETURN
+*
+*     End of ZGEQRF
+*
+      END
diff --git a/costa/native/external/lapack/zgerfs.f b/costa/native/external/lapack/zgerfs.f
new file mode 100644
index 000000000..aa2f0418d
--- /dev/null
+++ b/costa/native/external/lapack/zgerfs.f
@@ -0,0 +1,341 @@
+      SUBROUTINE ZGERFS( TRANS, N, NRHS, A, LDA, AF, LDAF, IPIV, B, LDB,
+     $                   X, LDX, FERR, BERR, WORK, RWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          TRANS
+      INTEGER            INFO, LDA, LDAF, LDB, LDX, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      DOUBLE PRECISION   BERR( * ), FERR( * ), RWORK( * )
+      COMPLEX*16         A( LDA, * ), AF( LDAF, * ), B( LDB, * ),
+     $                   WORK( * ), X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZGERFS improves the computed solution to a system of linear
+*  equations and provides error bounds and backward error estimates for
+*  the solution.
+*
+*  Arguments
+*  =========
+*
+*  TRANS   (input) CHARACTER*1
+*          Specifies the form of the system of equations:
+*          = 'N':  A * X = B     (No transpose)
+*          = 'T':  A**T * X = B  (Transpose)
+*          = 'C':  A**H * X = B  (Conjugate transpose)
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrices B and X.  NRHS >= 0.
+*
+*  A       (input) COMPLEX*16 array, dimension (LDA,N)
+*          The original N-by-N matrix A.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  AF      (input) COMPLEX*16 array, dimension (LDAF,N)
+*          The factors L and U from the factorization A = P*L*U
+*          as computed by ZGETRF.
+*
+*  LDAF    (input) INTEGER
+*          The leading dimension of the array AF.  LDAF >= max(1,N).
+*
+*  IPIV    (input) INTEGER array, dimension (N)
+*          The pivot indices from ZGETRF; for 1<=i<=N, row i of the
+*          matrix was interchanged with row IPIV(i).
+*
+*  B       (input) COMPLEX*16 array, dimension (LDB,NRHS)
+*          The right hand side matrix B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  X       (input/output) COMPLEX*16 array, dimension (LDX,NRHS)
+*          On entry, the solution matrix X, as computed by ZGETRS.
+*          On exit, the improved solution matrix X.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(1,N).
+*
+*  FERR    (output) DOUBLE PRECISION array, dimension (NRHS)
+*          The estimated forward error bound for each solution vector
+*          X(j) (the j-th column of the solution matrix X).
+*          If XTRUE is the true solution corresponding to X(j), FERR(j)
+*          is an estimated upper bound for the magnitude of the largest
+*          element in (X(j) - XTRUE) divided by the magnitude of the
+*          largest element in X(j).  The estimate is as reliable as
+*          the estimate for RCOND, and is almost always a slight
+*          overestimate of the true error.
+*
+*  BERR    (output) DOUBLE PRECISION array, dimension (NRHS)
+*          The componentwise relative backward error of each solution
+*          vector X(j) (i.e., the smallest relative change in
+*          any element of A or B that makes X(j) an exact solution).
+*
+*  WORK    (workspace) COMPLEX*16 array, dimension (2*N)
+*
+*  RWORK   (workspace) DOUBLE PRECISION array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Internal Parameters
+*  ===================
+*
+*  ITMAX is the maximum number of steps of iterative refinement.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            ITMAX
+      PARAMETER          ( ITMAX = 5 )
+      DOUBLE PRECISION   ZERO
+      PARAMETER          ( ZERO = 0.0D+0 )
+      COMPLEX*16         ONE
+      PARAMETER          ( ONE = ( 1.0D+0, 0.0D+0 ) )
+      DOUBLE PRECISION   TWO
+      PARAMETER          ( TWO = 2.0D+0 )
+      DOUBLE PRECISION   THREE
+      PARAMETER          ( THREE = 3.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOTRAN
+      CHARACTER          TRANSN, TRANST
+      INTEGER            COUNT, I, J, K, KASE, NZ
+      DOUBLE PRECISION   EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK
+      COMPLEX*16         ZDUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DLAMCH
+      EXTERNAL           LSAME, DLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZAXPY, ZCOPY, ZGEMV, ZGETRS, ZLACON
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, DIMAG, MAX
+*     ..
+*     .. Statement Functions ..
+      DOUBLE PRECISION   CABS1
+*     ..
+*     .. Statement Function definitions ..
+      CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      NOTRAN = LSAME( TRANS, 'N' )
+      IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT.
+     $    LSAME( TRANS, 'C' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -10
+      ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+         INFO = -12
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZGERFS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN
+         DO 10 J = 1, NRHS
+            FERR( J ) = ZERO
+            BERR( J ) = ZERO
+   10    CONTINUE
+         RETURN
+      END IF
+*
+      IF( NOTRAN ) THEN
+         TRANSN = 'N'
+         TRANST = 'C'
+      ELSE
+         TRANSN = 'C'
+         TRANST = 'N'
+      END IF
+*
+*     NZ = maximum number of nonzero elements in each row of A, plus 1
+*
+      NZ = N + 1
+      EPS = DLAMCH( 'Epsilon' )
+      SAFMIN = DLAMCH( 'Safe minimum' )
+      SAFE1 = NZ*SAFMIN
+      SAFE2 = SAFE1 / EPS
+*
+*     Do for each right hand side
+*
+      DO 140 J = 1, NRHS
+*
+         COUNT = 1
+         LSTRES = THREE
+   20    CONTINUE
+*
+*        Loop until stopping criterion is satisfied.
+*
+*        Compute residual R = B - op(A) * X,
+*        where op(A) = A, A**T, or A**H, depending on TRANS.
+*
+         CALL ZCOPY( N, B( 1, J ), 1, WORK, 1 )
+         CALL ZGEMV( TRANS, N, N, -ONE, A, LDA, X( 1, J ), 1, ONE, WORK,
+     $               1 )
+*
+*        Compute componentwise relative backward error from formula
+*
+*        max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) )
+*
+*        where abs(Z) is the componentwise absolute value of the matrix
+*        or vector Z.  If the i-th component of the denominator is less
+*        than SAFE2, then SAFE1 is added to the i-th components of the
+*        numerator and denominator before dividing.
+*
+         DO 30 I = 1, N
+            RWORK( I ) = CABS1( B( I, J ) )
+   30    CONTINUE
+*
+*        Compute abs(op(A))*abs(X) + abs(B).
+*
+         IF( NOTRAN ) THEN
+            DO 50 K = 1, N
+               XK = CABS1( X( K, J ) )
+               DO 40 I = 1, N
+                  RWORK( I ) = RWORK( I ) + CABS1( A( I, K ) )*XK
+   40          CONTINUE
+   50       CONTINUE
+         ELSE
+            DO 70 K = 1, N
+               S = ZERO
+               DO 60 I = 1, N
+                  S = S + CABS1( A( I, K ) )*CABS1( X( I, J ) )
+   60          CONTINUE
+               RWORK( K ) = RWORK( K ) + S
+   70       CONTINUE
+         END IF
+         S = ZERO
+         DO 80 I = 1, N
+            IF( RWORK( I ).GT.SAFE2 ) THEN
+               S = MAX( S, CABS1( WORK( I ) ) / RWORK( I ) )
+            ELSE
+               S = MAX( S, ( CABS1( WORK( I ) )+SAFE1 ) /
+     $             ( RWORK( I )+SAFE1 ) )
+            END IF
+   80    CONTINUE
+         BERR( J ) = S
+*
+*        Test stopping criterion. Continue iterating if
+*           1) The residual BERR(J) is larger than machine epsilon, and
+*           2) BERR(J) decreased by at least a factor of 2 during the
+*              last iteration, and
+*           3) At most ITMAX iterations tried.
+*
+         IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND.
+     $       COUNT.LE.ITMAX ) THEN
+*
+*           Update solution and try again.
+*
+            CALL ZGETRS( TRANS, N, 1, AF, LDAF, IPIV, WORK, N, INFO )
+            CALL ZAXPY( N, ONE, WORK, 1, X( 1, J ), 1 )
+            LSTRES = BERR( J )
+            COUNT = COUNT + 1
+            GO TO 20
+         END IF
+*
+*        Bound error from formula
+*
+*        norm(X - XTRUE) / norm(X) .le. FERR =
+*        norm( abs(inv(op(A)))*
+*           ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X)
+*
+*        where
+*          norm(Z) is the magnitude of the largest component of Z
+*          inv(op(A)) is the inverse of op(A)
+*          abs(Z) is the componentwise absolute value of the matrix or
+*             vector Z
+*          NZ is the maximum number of nonzeros in any row of A, plus 1
+*          EPS is machine epsilon
+*
+*        The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B))
+*        is incremented by SAFE1 if the i-th component of
+*        abs(op(A))*abs(X) + abs(B) is less than SAFE2.
+*
+*        Use ZLACON to estimate the infinity-norm of the matrix
+*           inv(op(A)) * diag(W),
+*        where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) )))
+*
+         DO 90 I = 1, N
+            IF( RWORK( I ).GT.SAFE2 ) THEN
+               RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I )
+            ELSE
+               RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) +
+     $                      SAFE1
+            END IF
+   90    CONTINUE
+*
+         KASE = 0
+  100    CONTINUE
+         CALL ZLACON( N, WORK( N+1 ), WORK, FERR( J ), KASE )
+         IF( KASE.NE.0 ) THEN
+            IF( KASE.EQ.1 ) THEN
+*
+*              Multiply by diag(W)*inv(op(A)**H).
+*
+               CALL ZGETRS( TRANST, N, 1, AF, LDAF, IPIV, WORK, N,
+     $                      INFO )
+               DO 110 I = 1, N
+                  WORK( I ) = RWORK( I )*WORK( I )
+  110          CONTINUE
+            ELSE
+*
+*              Multiply by inv(op(A))*diag(W).
+*
+               DO 120 I = 1, N
+                  WORK( I ) = RWORK( I )*WORK( I )
+  120          CONTINUE
+               CALL ZGETRS( TRANSN, N, 1, AF, LDAF, IPIV, WORK, N,
+     $                      INFO )
+            END IF
+            GO TO 100
+         END IF
+*
+*        Normalize error.
+*
+         LSTRES = ZERO
+         DO 130 I = 1, N
+            LSTRES = MAX( LSTRES, CABS1( X( I, J ) ) )
+  130    CONTINUE
+         IF( LSTRES.NE.ZERO )
+     $      FERR( J ) = FERR( J ) / LSTRES
+*
+  140 CONTINUE
+*
+      RETURN
+*
+*     End of ZGERFS
+*
+      END
diff --git a/costa/native/external/lapack/zgerq2.f b/costa/native/external/lapack/zgerq2.f
new file mode 100644
index 000000000..6e709f69f
--- /dev/null
+++ b/costa/native/external/lapack/zgerq2.f
@@ -0,0 +1,124 @@
+      SUBROUTINE ZGERQ2( M, N, A, LDA, TAU, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, M, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZGERQ2 computes an RQ factorization of a complex m by n matrix A:
+*  A = R * Q.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+*          On entry, the m by n matrix A.
+*          On exit, if m <= n, the upper triangle of the subarray
+*          A(1:m,n-m+1:n) contains the m by m upper triangular matrix R;
+*          if m >= n, the elements on and above the (m-n)-th subdiagonal
+*          contain the m by n upper trapezoidal matrix R; the remaining
+*          elements, with the array TAU, represent the unitary matrix
+*          Q as a product of elementary reflectors (see Further
+*          Details).
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  TAU     (output) COMPLEX*16 array, dimension (min(M,N))
+*          The scalar factors of the elementary reflectors (see Further
+*          Details).
+*
+*  WORK    (workspace) COMPLEX*16 array, dimension (M)
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*
+*  Further Details
+*  ===============
+*
+*  The matrix Q is represented as a product of elementary reflectors
+*
+*     Q = H(1)' H(2)' . . . H(k)', where k = min(m,n).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - tau * v * v'
+*
+*  where tau is a complex scalar, and v is a complex vector with
+*  v(n-k+i+1:n) = 0 and v(n-k+i) = 1; conjg(v(1:n-k+i-1)) is stored on
+*  exit in A(m-k+i,1:n-k+i-1), and tau in TAU(i).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX*16         ONE
+      PARAMETER          ( ONE = ( 1.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, K
+      COMPLEX*16         ALPHA
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZLACGV, ZLARF, ZLARFG
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZGERQ2', -INFO )
+         RETURN
+      END IF
+*
+      K = MIN( M, N )
+*
+      DO 10 I = K, 1, -1
+*
+*        Generate elementary reflector H(i) to annihilate
+*        A(m-k+i,1:n-k+i-1)
+*
+         CALL ZLACGV( N-K+I, A( M-K+I, 1 ), LDA )
+         ALPHA = A( M-K+I, N-K+I )
+         CALL ZLARFG( N-K+I, ALPHA, A( M-K+I, 1 ), LDA, TAU( I ) )
+*
+*        Apply H(i) to A(1:m-k+i-1,1:n-k+i) from the right
+*
+         A( M-K+I, N-K+I ) = ONE
+         CALL ZLARF( 'Right', M-K+I-1, N-K+I, A( M-K+I, 1 ), LDA,
+     $               TAU( I ), A, LDA, WORK )
+         A( M-K+I, N-K+I ) = ALPHA
+         CALL ZLACGV( N-K+I-1, A( M-K+I, 1 ), LDA )
+   10 CONTINUE
+      RETURN
+*
+*     End of ZGERQ2
+*
+      END
diff --git a/costa/native/external/lapack/zgerqf.f b/costa/native/external/lapack/zgerqf.f
new file mode 100644
index 000000000..e213e8cb3
--- /dev/null
+++ b/costa/native/external/lapack/zgerqf.f
@@ -0,0 +1,205 @@
+      SUBROUTINE ZGERQF( M, N, A, LDA, TAU, WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, LWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZGERQF computes an RQ factorization of a complex M-by-N matrix A:
+*  A = R * Q.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+*          On entry, the M-by-N matrix A.
+*          On exit,
+*          if m <= n, the upper triangle of the subarray
+*          A(1:m,n-m+1:n) contains the M-by-M upper triangular matrix R;
+*          if m >= n, the elements on and above the (m-n)-th subdiagonal
+*          contain the M-by-N upper trapezoidal matrix R;
+*          the remaining elements, with the array TAU, represent the
+*          unitary matrix Q as a product of min(m,n) elementary
+*          reflectors (see Further Details).
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  TAU     (output) COMPLEX*16 array, dimension (min(M,N))
+*          The scalar factors of the elementary reflectors (see Further
+*          Details).
+*
+*  WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.  LWORK >= max(1,M).
+*          For optimum performance LWORK >= M*NB, where NB is
+*          the optimal blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Further Details
+*  ===============
+*
+*  The matrix Q is represented as a product of elementary reflectors
+*
+*     Q = H(1)' H(2)' . . . H(k)', where k = min(m,n).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - tau * v * v'
+*
+*  where tau is a complex scalar, and v is a complex vector with
+*  v(n-k+i+1:n) = 0 and v(n-k+i) = 1; conjg(v(1:n-k+i-1)) is stored on
+*  exit in A(m-k+i,1:n-k+i-1), and tau in TAU(i).
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            I, IB, IINFO, IWS, K, KI, KK, LDWORK, LWKOPT,
+     $                   MU, NB, NBMIN, NU, NX
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZGERQ2, ZLARFB, ZLARFT
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      NB = ILAENV( 1, 'ZGERQF', ' ', M, N, -1, -1 )
+      LWKOPT = M*NB
+      WORK( 1 ) = LWKOPT
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -4
+      ELSE IF( LWORK.LT.MAX( 1, M ) .AND. .NOT.LQUERY ) THEN
+         INFO = -7
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZGERQF', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      K = MIN( M, N )
+      IF( K.EQ.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      NBMIN = 2
+      NX = 1
+      IWS = M
+      IF( NB.GT.1 .AND. NB.LT.K ) THEN
+*
+*        Determine when to cross over from blocked to unblocked code.
+*
+         NX = MAX( 0, ILAENV( 3, 'ZGERQF', ' ', M, N, -1, -1 ) )
+         IF( NX.LT.K ) THEN
+*
+*           Determine if workspace is large enough for blocked code.
+*
+            LDWORK = M
+            IWS = LDWORK*NB
+            IF( LWORK.LT.IWS ) THEN
+*
+*              Not enough workspace to use optimal NB:  reduce NB and
+*              determine the minimum value of NB.
+*
+               NB = LWORK / LDWORK
+               NBMIN = MAX( 2, ILAENV( 2, 'ZGERQF', ' ', M, N, -1,
+     $                 -1 ) )
+            END IF
+         END IF
+      END IF
+*
+      IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN
+*
+*        Use blocked code initially.
+*        The last kk rows are handled by the block method.
+*
+         KI = ( ( K-NX-1 ) / NB )*NB
+         KK = MIN( K, KI+NB )
+*
+         DO 10 I = K - KK + KI + 1, K - KK + 1, -NB
+            IB = MIN( K-I+1, NB )
+*
+*           Compute the RQ factorization of the current block
+*           A(m-k+i:m-k+i+ib-1,1:n-k+i+ib-1)
+*
+            CALL ZGERQ2( IB, N-K+I+IB-1, A( M-K+I, 1 ), LDA, TAU( I ),
+     $                   WORK, IINFO )
+            IF( M-K+I.GT.1 ) THEN
+*
+*              Form the triangular factor of the block reflector
+*              H = H(i+ib-1) . . . H(i+1) H(i)
+*
+               CALL ZLARFT( 'Backward', 'Rowwise', N-K+I+IB-1, IB,
+     $                      A( M-K+I, 1 ), LDA, TAU( I ), WORK, LDWORK )
+*
+*              Apply H to A(1:m-k+i-1,1:n-k+i+ib-1) from the right
+*
+               CALL ZLARFB( 'Right', 'No transpose', 'Backward',
+     $                      'Rowwise', M-K+I-1, N-K+I+IB-1, IB,
+     $                      A( M-K+I, 1 ), LDA, WORK, LDWORK, A, LDA,
+     $                      WORK( IB+1 ), LDWORK )
+            END IF
+   10    CONTINUE
+         MU = M - K + I + NB - 1
+         NU = N - K + I + NB - 1
+      ELSE
+         MU = M
+         NU = N
+      END IF
+*
+*     Use unblocked code to factor the last or only block
+*
+      IF( MU.GT.0 .AND. NU.GT.0 )
+     $   CALL ZGERQ2( MU, NU, A, LDA, TAU, WORK, IINFO )
+*
+      WORK( 1 ) = IWS
+      RETURN
+*
+*     End of ZGERQF
+*
+      END
diff --git a/costa/native/external/lapack/zgesc2.f b/costa/native/external/lapack/zgesc2.f
new file mode 100644
index 000000000..ce4375287
--- /dev/null
+++ b/costa/native/external/lapack/zgesc2.f
@@ -0,0 +1,134 @@
+      SUBROUTINE ZGESC2( N, A, LDA, RHS, IPIV, JPIV, SCALE )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            LDA, N
+      DOUBLE PRECISION   SCALE
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * ), JPIV( * )
+      COMPLEX*16         A( LDA, * ), RHS( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZGESC2 solves a system of linear equations
+*
+*            A * X = scale* RHS
+*
+*  with a general N-by-N matrix A using the LU factorization with
+*  complete pivoting computed by ZGETC2.
+*
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.
+*
+*  A       (input) COMPLEX*16 array, dimension (LDA, N)
+*          On entry, the  LU part of the factorization of the n-by-n
+*          matrix A computed by ZGETC2:  A = P * L * U * Q
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1, N).
+*
+*  RHS     (input/output) COMPLEX*16 array, dimension N.
+*          On entry, the right hand side vector b.
+*          On exit, the solution vector X.
+*
+*  IPIV    (iput) INTEGER array, dimension (N).
+*          The pivot indices; for 1 <= i <= N, row i of the
+*          matrix has been interchanged with row IPIV(i).
+*
+*  JPIV    (iput) INTEGER array, dimension (N).
+*          The pivot indices; for 1 <= j <= N, column j of the
+*          matrix has been interchanged with column JPIV(j).
+*
+*  SCALE    (output) DOUBLE PRECISION
+*           On exit, SCALE contains the scale factor. SCALE is chosen
+*           0 <= SCALE <= 1 to prevent owerflow in the solution.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Bo Kagstrom and Peter Poromaa, Department of Computing Science,
+*     Umea University, S-901 87 Umea, Sweden.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE, TWO
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0, TWO = 2.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J
+      DOUBLE PRECISION   BIGNUM, EPS, SMLNUM
+      COMPLEX*16         TEMP
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           ZLASWP, ZSCAL
+*     ..
+*     .. External Functions ..
+      INTEGER            IZAMAX
+      DOUBLE PRECISION   DLAMCH
+      EXTERNAL           IZAMAX, DLAMCH
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, DCMPLX
+*     ..
+*     .. Executable Statements ..
+*
+*     Set constant to control overflow
+*
+      EPS = DLAMCH( 'P' )
+      SMLNUM = DLAMCH( 'S' ) / EPS
+      BIGNUM = ONE / SMLNUM
+      CALL DLABAD( SMLNUM, BIGNUM )
+*
+*     Apply permutations IPIV to RHS
+*
+      CALL ZLASWP( 1, RHS, LDA, 1, N-1, IPIV, 1 )
+*
+*     Solve for L part
+*
+      DO 20 I = 1, N - 1
+         DO 10 J = I + 1, N
+            RHS( J ) = RHS( J ) - A( J, I )*RHS( I )
+   10    CONTINUE
+   20 CONTINUE
+*
+*     Solve for U part
+*
+      SCALE = ONE
+*
+*     Check for scaling
+*
+      I = IZAMAX( N, RHS, 1 )
+      IF( TWO*SMLNUM*ABS( RHS( I ) ).GT.ABS( A( N, N ) ) ) THEN
+         TEMP = DCMPLX( ONE / TWO, ZERO ) / ABS( RHS( I ) )
+         CALL ZSCAL( N, TEMP, RHS( 1 ), 1 )
+         SCALE = SCALE*DBLE( TEMP )
+      END IF
+      DO 40 I = N, 1, -1
+         TEMP = DCMPLX( ONE, ZERO ) / A( I, I )
+         RHS( I ) = RHS( I )*TEMP
+         DO 30 J = I + 1, N
+            RHS( I ) = RHS( I ) - RHS( J )*( A( I, J )*TEMP )
+   30    CONTINUE
+   40 CONTINUE
+*
+*     Apply permutations JPIV to the solution (RHS)
+*
+      CALL ZLASWP( 1, RHS, LDA, 1, N-1, JPIV, -1 )
+      RETURN
+*
+*     End of ZGESC2
+*
+      END
diff --git a/costa/native/external/lapack/zgesdd.f b/costa/native/external/lapack/zgesdd.f
new file mode 100644
index 000000000..8954ce496
--- /dev/null
+++ b/costa/native/external/lapack/zgesdd.f
@@ -0,0 +1,1950 @@
+      SUBROUTINE ZGESDD( JOBZ, M, N, A, LDA, S, U, LDU, VT, LDVT, WORK,
+     $                   LWORK, RWORK, IWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBZ
+      INTEGER            INFO, LDA, LDU, LDVT, LWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      DOUBLE PRECISION   RWORK( * ), S( * )
+      COMPLEX*16         A( LDA, * ), U( LDU, * ), VT( LDVT, * ),
+     $                   WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZGESDD computes the singular value decomposition (SVD) of a complex
+*  M-by-N matrix A, optionally computing the left and/or right singular
+*  vectors, by using divide-and-conquer method. The SVD is written
+*
+*       A = U * SIGMA * conjugate-transpose(V)
+*
+*  where SIGMA is an M-by-N matrix which is zero except for its
+*  min(m,n) diagonal elements, U is an M-by-M unitary matrix, and
+*  V is an N-by-N unitary matrix.  The diagonal elements of SIGMA
+*  are the singular values of A; they are real and non-negative, and
+*  are returned in descending order.  The first min(m,n) columns of
+*  U and V are the left and right singular vectors of A.
+*
+*  Note that the routine returns VT = V**H, not V.
+*
+*  The divide and conquer algorithm makes very mild assumptions about
+*  floating point arithmetic. It will work on machines with a guard
+*  digit in add/subtract, or on those binary machines without guard
+*  digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
+*  Cray-2. It could conceivably fail on hexadecimal or decimal machines
+*  without guard digits, but we know of none.
+*
+*  Arguments
+*  =========
+*
+*  JOBZ    (input) CHARACTER*1
+*          Specifies options for computing all or part of the matrix U:
+*          = 'A':  all M columns of U and all N rows of V**H are
+*                  returned in the arrays U and VT;
+*          = 'S':  the first min(M,N) columns of U and the first
+*                  min(M,N) rows of V**H are returned in the arrays U
+*                  and VT;
+*          = 'O':  If M >= N, the first N columns of U are overwritten
+*                  on the array A and all rows of V**H are returned in
+*                  the array VT;
+*                  otherwise, all columns of U are returned in the
+*                  array U and the first M rows of V**H are overwritten
+*                  in the array VT;
+*          = 'N':  no columns of U or rows of V**H are computed.
+*
+*  M       (input) INTEGER
+*          The number of rows of the input matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the input matrix A.  N >= 0.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+*          On entry, the M-by-N matrix A.
+*          On exit,
+*          if JOBZ = 'O',  A is overwritten with the first N columns
+*                          of U (the left singular vectors, stored
+*                          columnwise) if M >= N;
+*                          A is overwritten with the first M rows
+*                          of V**H (the right singular vectors, stored
+*                          rowwise) otherwise.
+*          if JOBZ .ne. 'O', the contents of A are destroyed.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  S       (output) DOUBLE PRECISION array, dimension (min(M,N))
+*          The singular values of A, sorted so that S(i) >= S(i+1).
+*
+*  U       (output) COMPLEX*16 array, dimension (LDU,UCOL)
+*          UCOL = M if JOBZ = 'A' or JOBZ = 'O' and M < N;
+*          UCOL = min(M,N) if JOBZ = 'S'.
+*          If JOBZ = 'A' or JOBZ = 'O' and M < N, U contains the M-by-M
+*          unitary matrix U;
+*          if JOBZ = 'S', U contains the first min(M,N) columns of U
+*          (the left singular vectors, stored columnwise);
+*          if JOBZ = 'O' and M >= N, or JOBZ = 'N', U is not referenced.
+*
+*  LDU     (input) INTEGER
+*          The leading dimension of the array U.  LDU >= 1; if
+*          JOBZ = 'S' or 'A' or JOBZ = 'O' and M < N, LDU >= M.
+*
+*  VT      (output) COMPLEX*16 array, dimension (LDVT,N)
+*          If JOBZ = 'A' or JOBZ = 'O' and M >= N, VT contains the
+*          N-by-N unitary matrix V**H;
+*          if JOBZ = 'S', VT contains the first min(M,N) rows of
+*          V**H (the right singular vectors, stored rowwise);
+*          if JOBZ = 'O' and M < N, or JOBZ = 'N', VT is not referenced.
+*
+*  LDVT    (input) INTEGER
+*          The leading dimension of the array VT.  LDVT >= 1; if
+*          JOBZ = 'A' or JOBZ = 'O' and M >= N, LDVT >= N;
+*          if JOBZ = 'S', LDVT >= min(M,N).
+*
+*  WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK. LWORK >= 1.
+*          if JOBZ = 'N', LWORK >= 2*min(M,N)+max(M,N).
+*          if JOBZ = 'O',
+*                LWORK >= 2*min(M,N)*min(M,N)+2*min(M,N)+max(M,N).
+*          if JOBZ = 'S' or 'A',
+*                LWORK >= min(M,N)*min(M,N)+2*min(M,N)+max(M,N).
+*          For good performance, LWORK should generally be larger.
+*          If LWORK < 0 but other input arguments are legal, WORK(1)
+*          returns the optimal LWORK.
+*
+*  RWORK   (workspace) DOUBLE PRECISION array, dimension (LRWORK)
+*          If JOBZ = 'N', LRWORK >= 7*min(M,N).
+*          Otherwise, LRWORK >= 5*min(M,N)*min(M,N) + 5*min(M,N)
+*
+*  IWORK   (workspace) INTEGER array, dimension (8*min(M,N))
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          > 0:  The updating process of DBDSDC did not converge.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Ming Gu and Huan Ren, Computer Science Division, University of
+*     California at Berkeley, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX*16         CZERO, CONE
+      PARAMETER          ( CZERO = ( 0.0D0, 0.0D0 ),
+     $                   CONE = ( 1.0D0, 0.0D0 ) )
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY, WNTQA, WNTQAS, WNTQN, WNTQO, WNTQS
+      INTEGER            BLK, CHUNK, I, IE, IERR, IL, IR, IRU, IRVT,
+     $                   ISCL, ITAU, ITAUP, ITAUQ, IU, IVT, LDWKVT,
+     $                   LDWRKL, LDWRKR, LDWRKU, MAXWRK, MINMN, MINWRK,
+     $                   MNTHR1, MNTHR2, NRWORK, NWORK, WRKBL
+      DOUBLE PRECISION   ANRM, BIGNUM, EPS, SMLNUM
+*     ..
+*     .. Local Arrays ..
+      INTEGER            IDUM( 1 )
+      DOUBLE PRECISION   DUM( 1 )
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DBDSDC, DLASCL, XERBLA, ZGEBRD, ZGELQF, ZGEMM, 
+     $                   ZGEQRF, ZLACP2, ZLACPY, ZLACRM, ZLARCM, ZLASCL, 
+     $                   ZLASET, ZUNGBR, ZUNGLQ, ZUNGQR, ZUNMBR
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      DOUBLE PRECISION   DLAMCH, ZLANGE
+      EXTERNAL           DLAMCH, ILAENV, LSAME, ZLANGE
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          INT, MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      MINMN = MIN( M, N )
+      MNTHR1 = INT( MINMN*17.0D0 / 9.0D0 )
+      MNTHR2 = INT( MINMN*5.0D0 / 3.0D0 )
+      WNTQA = LSAME( JOBZ, 'A' )
+      WNTQS = LSAME( JOBZ, 'S' )
+      WNTQAS = WNTQA .OR. WNTQS
+      WNTQO = LSAME( JOBZ, 'O' )
+      WNTQN = LSAME( JOBZ, 'N' )
+      MINWRK = 1
+      MAXWRK = 1
+      LQUERY = ( LWORK.EQ.-1 )
+*
+      IF( .NOT.( WNTQA .OR. WNTQS .OR. WNTQO .OR. WNTQN ) ) THEN
+         INFO = -1
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -5
+      ELSE IF( LDU.LT.1 .OR. ( WNTQAS .AND. LDU.LT.M ) .OR.
+     $         ( WNTQO .AND. M.LT.N .AND. LDU.LT.M ) ) THEN
+         INFO = -8
+      ELSE IF( LDVT.LT.1 .OR. ( WNTQA .AND. LDVT.LT.N ) .OR.
+     $         ( WNTQS .AND. LDVT.LT.MINMN ) .OR.
+     $         ( WNTQO .AND. M.GE.N .AND. LDVT.LT.N ) ) THEN
+         INFO = -10
+      END IF
+*
+*     Compute workspace
+*      (Note: Comments in the code beginning "Workspace:" describe the
+*       minimal amount of workspace needed at that point in the code,
+*       as well as the preferred amount for good performance.
+*       CWorkspace refers to complex workspace, and RWorkspace to
+*       real workspace. NB refers to the optimal block size for the
+*       immediately following subroutine, as returned by ILAENV.)
+*
+      IF( INFO.EQ.0 .AND. M.GT.0 .AND. N.GT.0 ) THEN
+         IF( M.GE.N ) THEN
+*
+*           There is no complex work space needed for bidiagonal SVD
+*           The real work space needed for bidiagonal SVD is BDSPAC,
+*           BDSPAC = 3*N*N + 4*N
+*
+            IF( M.GE.MNTHR1 ) THEN
+               IF( WNTQN ) THEN
+*
+*                 Path 1 (M much larger than N, JOBZ='N')
+*
+                  WRKBL = N + N*ILAENV( 1, 'ZGEQRF', ' ', M, N, -1,
+     $                    -1 )
+                  WRKBL = MAX( WRKBL, 2*N+2*N*
+     $                    ILAENV( 1, 'ZGEBRD', ' ', N, N, -1, -1 ) )
+                  MAXWRK = WRKBL
+                  MINWRK = 3*N
+               ELSE IF( WNTQO ) THEN
+*
+*                 Path 2 (M much larger than N, JOBZ='O')
+*
+                  WRKBL = N + N*ILAENV( 1, 'ZGEQRF', ' ', M, N, -1, -1 )
+                  WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'ZUNGQR', ' ', M,
+     $                    N, N, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*N+2*N*
+     $                    ILAENV( 1, 'ZGEBRD', ' ', N, N, -1, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*N+N*
+     $                    ILAENV( 1, 'ZUNMBR', 'QLN', N, N, N, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*N+N*
+     $                    ILAENV( 1, 'ZUNMBR', 'PRC', N, N, N, -1 ) )
+                  MAXWRK = M*N + N*N + WRKBL
+                  MINWRK = 2*N*N + 3*N
+               ELSE IF( WNTQS ) THEN
+*
+*                 Path 3 (M much larger than N, JOBZ='S')
+*
+                  WRKBL = N + N*ILAENV( 1, 'ZGEQRF', ' ', M, N, -1, -1 )
+                  WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'ZUNGQR', ' ', M,
+     $                    N, N, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*N+2*N*
+     $                    ILAENV( 1, 'ZGEBRD', ' ', N, N, -1, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*N+N*
+     $                    ILAENV( 1, 'ZUNMBR', 'QLN', N, N, N, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*N+N*
+     $                    ILAENV( 1, 'ZUNMBR', 'PRC', N, N, N, -1 ) )
+                  MAXWRK = N*N + WRKBL
+                  MINWRK = N*N + 3*N
+               ELSE IF( WNTQA ) THEN
+*
+*                 Path 4 (M much larger than N, JOBZ='A')
+*
+                  WRKBL = N + N*ILAENV( 1, 'ZGEQRF', ' ', M, N, -1, -1 )
+                  WRKBL = MAX( WRKBL, N+M*ILAENV( 1, 'ZUNGQR', ' ', M,
+     $                    M, N, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*N+2*N*
+     $                    ILAENV( 1, 'ZGEBRD', ' ', N, N, -1, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*N+N*
+     $                    ILAENV( 1, 'ZUNMBR', 'QLN', N, N, N, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*N+N*
+     $                    ILAENV( 1, 'ZUNMBR', 'PRC', N, N, N, -1 ) )
+                  MAXWRK = N*N + WRKBL
+                  MINWRK = N*N + 2*N + M
+               END IF
+            ELSE IF( M.GE.MNTHR2 ) THEN
+*
+*              Path 5 (M much larger than N, but not as much as MNTHR1)
+*
+               MAXWRK = 2*N + ( M+N )*ILAENV( 1, 'ZGEBRD', ' ', M, N,
+     $                  -1, -1 )
+               MINWRK = 2*N + M
+               IF( WNTQO ) THEN
+                  MAXWRK = MAX( MAXWRK, 2*N+N*
+     $                     ILAENV( 1, 'ZUNGBR', 'P', N, N, N, -1 ) )
+                  MAXWRK = MAX( MAXWRK, 2*N+N*
+     $                     ILAENV( 1, 'ZUNGBR', 'Q', M, N, N, -1 ) )
+                  MAXWRK = MAXWRK + M*N
+                  MINWRK = MINWRK + N*N
+               ELSE IF( WNTQS ) THEN
+                  MAXWRK = MAX( MAXWRK, 2*N+N*
+     $                     ILAENV( 1, 'ZUNGBR', 'P', N, N, N, -1 ) )
+                  MAXWRK = MAX( MAXWRK, 2*N+N*
+     $                     ILAENV( 1, 'ZUNGBR', 'Q', M, N, N, -1 ) )
+               ELSE IF( WNTQA ) THEN
+                  MAXWRK = MAX( MAXWRK, 2*N+N*
+     $                     ILAENV( 1, 'ZUNGBR', 'P', N, N, N, -1 ) )
+                  MAXWRK = MAX( MAXWRK, 2*N+M*
+     $                     ILAENV( 1, 'ZUNGBR', 'Q', M, M, N, -1 ) )
+               END IF
+            ELSE
+*
+*              Path 6 (M at least N, but not much larger)
+*
+               MAXWRK = 2*N + ( M+N )*ILAENV( 1, 'ZGEBRD', ' ', M, N,
+     $                  -1, -1 )
+               MINWRK = 2*N + M
+               IF( WNTQO ) THEN
+                  MAXWRK = MAX( MAXWRK, 2*N+N*
+     $                     ILAENV( 1, 'ZUNMBR', 'PRC', N, N, N, -1 ) )
+                  MAXWRK = MAX( MAXWRK, 2*N+N*
+     $                     ILAENV( 1, 'ZUNMBR', 'QLN', M, N, N, -1 ) )
+                  MAXWRK = MAXWRK + M*N
+                  MINWRK = MINWRK + N*N
+               ELSE IF( WNTQS ) THEN
+                  MAXWRK = MAX( MAXWRK, 2*N+N*
+     $                     ILAENV( 1, 'ZUNMBR', 'PRC', N, N, N, -1 ) )
+                  MAXWRK = MAX( MAXWRK, 2*N+N*
+     $                     ILAENV( 1, 'ZUNMBR', 'QLN', M, N, N, -1 ) )
+               ELSE IF( WNTQA ) THEN
+                  MAXWRK = MAX( MAXWRK, 2*N+N*
+     $                     ILAENV( 1, 'ZUNGBR', 'PRC', N, N, N, -1 ) )
+                  MAXWRK = MAX( MAXWRK, 2*N+M*
+     $                     ILAENV( 1, 'ZUNGBR', 'QLN', M, M, N, -1 ) )
+               END IF
+            END IF
+         ELSE
+*
+*           There is no complex work space needed for bidiagonal SVD
+*           The real work space needed for bidiagonal SVD is BDSPAC,
+*           BDSPAC = 3*M*M + 4*M
+*
+            IF( N.GE.MNTHR1 ) THEN
+               IF( WNTQN ) THEN
+*
+*                 Path 1t (N much larger than M, JOBZ='N')
+*
+                  MAXWRK = M + M*ILAENV( 1, 'ZGELQF', ' ', M, N, -1,
+     $                     -1 )
+                  MAXWRK = MAX( MAXWRK, 2*M+2*M*
+     $                     ILAENV( 1, 'ZGEBRD', ' ', M, M, -1, -1 ) )
+                  MINWRK = 3*M
+               ELSE IF( WNTQO ) THEN
+*
+*                 Path 2t (N much larger than M, JOBZ='O')
+*
+                  WRKBL = M + M*ILAENV( 1, 'ZGELQF', ' ', M, N, -1, -1 )
+                  WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'ZUNGLQ', ' ', M,
+     $                    N, M, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*M+2*M*
+     $                    ILAENV( 1, 'ZGEBRD', ' ', M, M, -1, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*M+M*
+     $                    ILAENV( 1, 'ZUNMBR', 'PRC', M, M, M, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*M+M*
+     $                    ILAENV( 1, 'ZUNMBR', 'QLN', M, M, M, -1 ) )
+                  MAXWRK = M*N + M*M + WRKBL
+                  MINWRK = 2*M*M + 3*M
+               ELSE IF( WNTQS ) THEN
+*
+*                 Path 3t (N much larger than M, JOBZ='S')
+*
+                  WRKBL = M + M*ILAENV( 1, 'ZGELQF', ' ', M, N, -1, -1 )
+                  WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'ZUNGLQ', ' ', M,
+     $                    N, M, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*M+2*M*
+     $                    ILAENV( 1, 'ZGEBRD', ' ', M, M, -1, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*M+M*
+     $                    ILAENV( 1, 'ZUNMBR', 'PRC', M, M, M, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*M+M*
+     $                    ILAENV( 1, 'ZUNMBR', 'QLN', M, M, M, -1 ) )
+                  MAXWRK = M*M + WRKBL
+                  MINWRK = M*M + 3*M
+               ELSE IF( WNTQA ) THEN
+*
+*                 Path 4t (N much larger than M, JOBZ='A')
+*
+                  WRKBL = M + M*ILAENV( 1, 'ZGELQF', ' ', M, N, -1, -1 )
+                  WRKBL = MAX( WRKBL, M+N*ILAENV( 1, 'ZUNGLQ', ' ', N,
+     $                    N, M, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*M+2*M*
+     $                    ILAENV( 1, 'ZGEBRD', ' ', M, M, -1, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*M+M*
+     $                    ILAENV( 1, 'ZUNMBR', 'PRC', M, M, M, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*M+M*
+     $                    ILAENV( 1, 'ZUNMBR', 'QLN', M, M, M, -1 ) )
+                  MAXWRK = M*M + WRKBL
+                  MINWRK = M*M + 2*M + N
+               END IF
+            ELSE IF( N.GE.MNTHR2 ) THEN
+*
+*              Path 5t (N much larger than M, but not as much as MNTHR1)
+*
+               MAXWRK = 2*M + ( M+N )*ILAENV( 1, 'ZGEBRD', ' ', M, N,
+     $                  -1, -1 )
+               MINWRK = 2*M + N
+               IF( WNTQO ) THEN
+                  MAXWRK = MAX( MAXWRK, 2*M+M*
+     $                     ILAENV( 1, 'ZUNGBR', 'P', M, N, M, -1 ) )
+                  MAXWRK = MAX( MAXWRK, 2*M+M*
+     $                     ILAENV( 1, 'ZUNGBR', 'Q', M, M, N, -1 ) )
+                  MAXWRK = MAXWRK + M*N
+                  MINWRK = MINWRK + M*M
+               ELSE IF( WNTQS ) THEN
+                  MAXWRK = MAX( MAXWRK, 2*M+M*
+     $                     ILAENV( 1, 'ZUNGBR', 'P', M, N, M, -1 ) )
+                  MAXWRK = MAX( MAXWRK, 2*M+M*
+     $                     ILAENV( 1, 'ZUNGBR', 'Q', M, M, N, -1 ) )
+               ELSE IF( WNTQA ) THEN
+                  MAXWRK = MAX( MAXWRK, 2*M+N*
+     $                     ILAENV( 1, 'ZUNGBR', 'P', N, N, M, -1 ) )
+                  MAXWRK = MAX( MAXWRK, 2*M+M*
+     $                     ILAENV( 1, 'ZUNGBR', 'Q', M, M, N, -1 ) )
+               END IF
+            ELSE
+*
+*              Path 6t (N greater than M, but not much larger)
+*
+               MAXWRK = 2*M + ( M+N )*ILAENV( 1, 'ZGEBRD', ' ', M, N,
+     $                  -1, -1 )
+               MINWRK = 2*M + N
+               IF( WNTQO ) THEN
+                  MAXWRK = MAX( MAXWRK, 2*M+M*
+     $                     ILAENV( 1, 'ZUNMBR', 'PRC', M, N, M, -1 ) )
+                  MAXWRK = MAX( MAXWRK, 2*M+M*
+     $                     ILAENV( 1, 'ZUNMBR', 'QLN', M, M, N, -1 ) )
+                  MAXWRK = MAXWRK + M*N
+                  MINWRK = MINWRK + M*M
+               ELSE IF( WNTQS ) THEN
+                  MAXWRK = MAX( MAXWRK, 2*M+M*
+     $                     ILAENV( 1, 'ZUNGBR', 'PRC', M, N, M, -1 ) )
+                  MAXWRK = MAX( MAXWRK, 2*M+M*
+     $                     ILAENV( 1, 'ZUNGBR', 'QLN', M, M, N, -1 ) )
+               ELSE IF( WNTQA ) THEN
+                  MAXWRK = MAX( MAXWRK, 2*M+N*
+     $                     ILAENV( 1, 'ZUNGBR', 'PRC', N, N, M, -1 ) )
+                  MAXWRK = MAX( MAXWRK, 2*M+M*
+     $                     ILAENV( 1, 'ZUNGBR', 'QLN', M, M, N, -1 ) )
+               END IF
+            END IF
+         END IF
+         MAXWRK = MAX( MAXWRK, MINWRK )
+         WORK( 1 ) = MAXWRK
+      END IF
+*
+      IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN
+         INFO = -13
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZGESDD', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 ) THEN
+         IF( LWORK.GE.1 )
+     $      WORK( 1 ) = ONE
+         RETURN
+      END IF
+*
+*     Get machine constants
+*
+      EPS = DLAMCH( 'P' )
+      SMLNUM = SQRT( DLAMCH( 'S' ) ) / EPS
+      BIGNUM = ONE / SMLNUM
+*
+*     Scale A if max element outside range [SMLNUM,BIGNUM]
+*
+      ANRM = ZLANGE( 'M', M, N, A, LDA, DUM )
+      ISCL = 0
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
+         ISCL = 1
+         CALL ZLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, IERR )
+      ELSE IF( ANRM.GT.BIGNUM ) THEN
+         ISCL = 1
+         CALL ZLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, IERR )
+      END IF
+*
+      IF( M.GE.N ) THEN
+*
+*        A has at least as many rows as columns. If A has sufficiently
+*        more rows than columns, first reduce using the QR
+*        decomposition (if sufficient workspace available)
+*
+         IF( M.GE.MNTHR1 ) THEN
+*
+            IF( WNTQN ) THEN
+*
+*              Path 1 (M much larger than N, JOBZ='N')
+*              No singular vectors to be computed
+*
+               ITAU = 1
+               NWORK = ITAU + N
+*
+*              Compute A=Q*R
+*              (CWorkspace: need 2*N, prefer N+N*NB)
+*              (RWorkspace: need 0)
+*
+               CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+*
+*              Zero out below R
+*
+               CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO, A( 2, 1 ),
+     $                      LDA )
+               IE = 1
+               ITAUQ = 1
+               ITAUP = ITAUQ + N
+               NWORK = ITAUP + N
+*
+*              Bidiagonalize R in A
+*              (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
+*              (RWorkspace: need N)
+*
+               CALL ZGEBRD( N, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ),
+     $                      WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1,
+     $                      IERR )
+               NRWORK = IE + N
+*
+*              Perform bidiagonal SVD, compute singular values only
+*              (CWorkspace: 0)
+*              (RWorkspace: need BDSPAC)
+*
+               CALL DBDSDC( 'U', 'N', N, S, RWORK( IE ), DUM, 1, DUM, 1,
+     $                      DUM, IDUM, RWORK( NRWORK ), IWORK, INFO )
+*
+            ELSE IF( WNTQO ) THEN
+*
+*              Path 2 (M much larger than N, JOBZ='O')
+*              N left singular vectors to be overwritten on A and
+*              N right singular vectors to be computed in VT
+*
+               IU = 1
+*
+*              WORK(IU) is N by N
+*
+               LDWRKU = N
+               IR = IU + LDWRKU*N
+               IF( LWORK.GE.M*N+N*N+3*N ) THEN
+*
+*                 WORK(IR) is M by N
+*
+                  LDWRKR = M
+               ELSE
+                  LDWRKR = ( LWORK-N*N-3*N ) / N
+               END IF
+               ITAU = IR + LDWRKR*N
+               NWORK = ITAU + N
+*
+*              Compute A=Q*R
+*              (CWorkspace: need N*N+2*N, prefer M*N+N+N*NB)
+*              (RWorkspace: 0)
+*
+               CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+*
+*              Copy R to WORK( IR ), zeroing out below it
+*
+               CALL ZLACPY( 'U', N, N, A, LDA, WORK( IR ), LDWRKR )
+               CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO, WORK( IR+1 ),
+     $                      LDWRKR )
+*
+*              Generate Q in A
+*              (CWorkspace: need 2*N, prefer N+N*NB)
+*              (RWorkspace: 0)
+*
+               CALL ZUNGQR( M, N, N, A, LDA, WORK( ITAU ),
+     $                      WORK( NWORK ), LWORK-NWORK+1, IERR )
+               IE = 1
+               ITAUQ = ITAU
+               ITAUP = ITAUQ + N
+               NWORK = ITAUP + N
+*
+*              Bidiagonalize R in WORK(IR)
+*              (CWorkspace: need N*N+3*N, prefer M*N+2*N+2*N*NB)
+*              (RWorkspace: need N)
+*
+               CALL ZGEBRD( N, N, WORK( IR ), LDWRKR, S, RWORK( IE ),
+     $                      WORK( ITAUQ ), WORK( ITAUP ), WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+*
+*              Perform bidiagonal SVD, computing left singular vectors
+*              of R in WORK(IRU) and computing right singular vectors
+*              of R in WORK(IRVT)
+*              (CWorkspace: need 0)
+*              (RWorkspace: need BDSPAC)
+*
+               IRU = IE + N
+               IRVT = IRU + N*N
+               NRWORK = IRVT + N*N
+               CALL DBDSDC( 'U', 'I', N, S, RWORK( IE ), RWORK( IRU ),
+     $                      N, RWORK( IRVT ), N, DUM, IDUM,
+     $                      RWORK( NRWORK ), IWORK, INFO )
+*
+*              Copy real matrix RWORK(IRU) to complex matrix WORK(IU)
+*              Overwrite WORK(IU) by the left singular vectors of R
+*              (CWorkspace: need 2*N*N+3*N, prefer M*N+N*N+2*N+N*NB)
+*              (RWorkspace: 0)
+*
+               CALL ZLACP2( 'F', N, N, RWORK( IRU ), N, WORK( IU ),
+     $                      LDWRKU )
+               CALL ZUNMBR( 'Q', 'L', 'N', N, N, N, WORK( IR ), LDWRKR,
+     $                      WORK( ITAUQ ), WORK( IU ), LDWRKU,
+     $                      WORK( NWORK ), LWORK-NWORK+1, IERR )
+*
+*              Copy real matrix RWORK(IRVT) to complex matrix VT
+*              Overwrite VT by the right singular vectors of R
+*              (CWorkspace: need N*N+3*N, prefer M*N+2*N+N*NB)
+*              (RWorkspace: 0)
+*
+               CALL ZLACP2( 'F', N, N, RWORK( IRVT ), N, VT, LDVT )
+               CALL ZUNMBR( 'P', 'R', 'C', N, N, N, WORK( IR ), LDWRKR,
+     $                      WORK( ITAUP ), VT, LDVT, WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+*
+*              Multiply Q in A by left singular vectors of R in
+*              WORK(IU), storing result in WORK(IR) and copying to A
+*              (CWorkspace: need 2*N*N, prefer N*N+M*N)
+*              (RWorkspace: 0)
+*
+               DO 10 I = 1, M, LDWRKR
+                  CHUNK = MIN( M-I+1, LDWRKR )
+                  CALL ZGEMM( 'N', 'N', CHUNK, N, N, CONE, A( I, 1 ),
+     $                        LDA, WORK( IU ), LDWRKU, CZERO,
+     $                        WORK( IR ), LDWRKR )
+                  CALL ZLACPY( 'F', CHUNK, N, WORK( IR ), LDWRKR,
+     $                         A( I, 1 ), LDA )
+   10          CONTINUE
+*
+            ELSE IF( WNTQS ) THEN
+*
+*              Path 3 (M much larger than N, JOBZ='S')
+*              N left singular vectors to be computed in U and
+*              N right singular vectors to be computed in VT
+*
+               IR = 1
+*
+*              WORK(IR) is N by N
+*
+               LDWRKR = N
+               ITAU = IR + LDWRKR*N
+               NWORK = ITAU + N
+*
+*              Compute A=Q*R
+*              (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
+*              (RWorkspace: 0)
+*
+               CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+*
+*              Copy R to WORK(IR), zeroing out below it
+*
+               CALL ZLACPY( 'U', N, N, A, LDA, WORK( IR ), LDWRKR )
+               CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO, WORK( IR+1 ),
+     $                      LDWRKR )
+*
+*              Generate Q in A
+*              (CWorkspace: need 2*N, prefer N+N*NB)
+*              (RWorkspace: 0)
+*
+               CALL ZUNGQR( M, N, N, A, LDA, WORK( ITAU ),
+     $                      WORK( NWORK ), LWORK-NWORK+1, IERR )
+               IE = 1
+               ITAUQ = ITAU
+               ITAUP = ITAUQ + N
+               NWORK = ITAUP + N
+*
+*              Bidiagonalize R in WORK(IR)
+*              (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
+*              (RWorkspace: need N)
+*
+               CALL ZGEBRD( N, N, WORK( IR ), LDWRKR, S, RWORK( IE ),
+     $                      WORK( ITAUQ ), WORK( ITAUP ), WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+*
+*              Perform bidiagonal SVD, computing left singular vectors
+*              of bidiagonal matrix in RWORK(IRU) and computing right
+*              singular vectors of bidiagonal matrix in RWORK(IRVT)
+*              (CWorkspace: need 0)
+*              (RWorkspace: need BDSPAC)
+*
+               IRU = IE + N
+               IRVT = IRU + N*N
+               NRWORK = IRVT + N*N
+               CALL DBDSDC( 'U', 'I', N, S, RWORK( IE ), RWORK( IRU ),
+     $                      N, RWORK( IRVT ), N, DUM, IDUM,
+     $                      RWORK( NRWORK ), IWORK, INFO )
+*
+*              Copy real matrix RWORK(IRU) to complex matrix U
+*              Overwrite U by left singular vectors of R
+*              (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
+*              (RWorkspace: 0)
+*
+               CALL ZLACP2( 'F', N, N, RWORK( IRU ), N, U, LDU )
+               CALL ZUNMBR( 'Q', 'L', 'N', N, N, N, WORK( IR ), LDWRKR,
+     $                      WORK( ITAUQ ), U, LDU, WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+*
+*              Copy real matrix RWORK(IRVT) to complex matrix VT
+*              Overwrite VT by right singular vectors of R
+*              (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
+*              (RWorkspace: 0)
+*
+               CALL ZLACP2( 'F', N, N, RWORK( IRVT ), N, VT, LDVT )
+               CALL ZUNMBR( 'P', 'R', 'C', N, N, N, WORK( IR ), LDWRKR,
+     $                      WORK( ITAUP ), VT, LDVT, WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+*
+*              Multiply Q in A by left singular vectors of R in
+*              WORK(IR), storing result in U
+*              (CWorkspace: need N*N)
+*              (RWorkspace: 0)
+*
+               CALL ZLACPY( 'F', N, N, U, LDU, WORK( IR ), LDWRKR )
+               CALL ZGEMM( 'N', 'N', M, N, N, CONE, A, LDA, WORK( IR ),
+     $                     LDWRKR, CZERO, U, LDU )
+*
+            ELSE IF( WNTQA ) THEN
+*
+*              Path 4 (M much larger than N, JOBZ='A')
+*              M left singular vectors to be computed in U and
+*              N right singular vectors to be computed in VT
+*
+               IU = 1
+*
+*              WORK(IU) is N by N
+*
+               LDWRKU = N
+               ITAU = IU + LDWRKU*N
+               NWORK = ITAU + N
+*
+*              Compute A=Q*R, copying result to U
+*              (CWorkspace: need 2*N, prefer N+N*NB)
+*              (RWorkspace: 0)
+*
+               CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+               CALL ZLACPY( 'L', M, N, A, LDA, U, LDU )
+*
+*              Generate Q in U
+*              (CWorkspace: need N+M, prefer N+M*NB)
+*              (RWorkspace: 0)
+*
+               CALL ZUNGQR( M, M, N, U, LDU, WORK( ITAU ),
+     $                      WORK( NWORK ), LWORK-NWORK+1, IERR )
+*
+*              Produce R in A, zeroing out below it
+*
+               CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO, A( 2, 1 ),
+     $                      LDA )
+               IE = 1
+               ITAUQ = ITAU
+               ITAUP = ITAUQ + N
+               NWORK = ITAUP + N
+*
+*              Bidiagonalize R in A
+*              (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
+*              (RWorkspace: need N)
+*
+               CALL ZGEBRD( N, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ),
+     $                      WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1,
+     $                      IERR )
+               IRU = IE + N
+               IRVT = IRU + N*N
+               NRWORK = IRVT + N*N
+*
+*              Perform bidiagonal SVD, computing left singular vectors
+*              of bidiagonal matrix in RWORK(IRU) and computing right
+*              singular vectors of bidiagonal matrix in RWORK(IRVT)
+*              (CWorkspace: need 0)
+*              (RWorkspace: need BDSPAC)
+*
+               CALL DBDSDC( 'U', 'I', N, S, RWORK( IE ), RWORK( IRU ),
+     $                      N, RWORK( IRVT ), N, DUM, IDUM,
+     $                      RWORK( NRWORK ), IWORK, INFO )
+*
+*              Copy real matrix RWORK(IRU) to complex matrix WORK(IU)
+*              Overwrite WORK(IU) by left singular vectors of R
+*              (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
+*              (RWorkspace: 0)
+*
+               CALL ZLACP2( 'F', N, N, RWORK( IRU ), N, WORK( IU ),
+     $                      LDWRKU )
+               CALL ZUNMBR( 'Q', 'L', 'N', N, N, N, A, LDA,
+     $                      WORK( ITAUQ ), WORK( IU ), LDWRKU,
+     $                      WORK( NWORK ), LWORK-NWORK+1, IERR )
+*
+*              Copy real matrix RWORK(IRVT) to complex matrix VT
+*              Overwrite VT by right singular vectors of R
+*              (CWorkspace: need 3*N, prefer 2*N+N*NB)
+*              (RWorkspace: 0)
+*
+               CALL ZLACP2( 'F', N, N, RWORK( IRVT ), N, VT, LDVT )
+               CALL ZUNMBR( 'P', 'R', 'C', N, N, N, A, LDA,
+     $                      WORK( ITAUP ), VT, LDVT, WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+*
+*              Multiply Q in U by left singular vectors of R in
+*              WORK(IU), storing result in A
+*              (CWorkspace: need N*N)
+*              (RWorkspace: 0)
+*
+               CALL ZGEMM( 'N', 'N', M, N, N, CONE, U, LDU, WORK( IU ),
+     $                     LDWRKU, CZERO, A, LDA )
+*
+*              Copy left singular vectors of A from A to U
+*
+               CALL ZLACPY( 'F', M, N, A, LDA, U, LDU )
+*
+            END IF
+*
+         ELSE IF( M.GE.MNTHR2 ) THEN
+*
+*           MNTHR2 <= M < MNTHR1
+*
+*           Path 5 (M much larger than N, but not as much as MNTHR1)
+*           Reduce to bidiagonal form without QR decomposition, use
+*           ZUNGBR and matrix multiplication to compute singular vectors
+*
+            IE = 1
+            NRWORK = IE + N
+            ITAUQ = 1
+            ITAUP = ITAUQ + N
+            NWORK = ITAUP + N
+*
+*           Bidiagonalize A
+*           (CWorkspace: need 2*N+M, prefer 2*N+(M+N)*NB)
+*           (RWorkspace: need N)
+*
+            CALL ZGEBRD( M, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ),
+     $                   WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1,
+     $                   IERR )
+            IF( WNTQN ) THEN
+*
+*              Compute singular values only
+*              (Cworkspace: 0)
+*              (Rworkspace: need BDSPAC)
+*
+               CALL DBDSDC( 'U', 'N', N, S, RWORK( IE ), DUM, 1, DUM, 1,
+     $                      DUM, IDUM, RWORK( NRWORK ), IWORK, INFO )
+            ELSE IF( WNTQO ) THEN
+               IU = NWORK
+               IRU = NRWORK
+               IRVT = IRU + N*N
+               NRWORK = IRVT + N*N
+*
+*              Copy A to VT, generate P**H
+*              (Cworkspace: need 2*N, prefer N+N*NB)
+*              (Rworkspace: 0)
+*
+               CALL ZLACPY( 'U', N, N, A, LDA, VT, LDVT )
+               CALL ZUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
+     $                      WORK( NWORK ), LWORK-NWORK+1, IERR )
+*
+*              Generate Q in A
+*              (CWorkspace: need 2*N, prefer N+N*NB)
+*              (RWorkspace: 0)
+*
+               CALL ZUNGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ),
+     $                      WORK( NWORK ), LWORK-NWORK+1, IERR )
+*
+               IF( LWORK.GE.M*N+3*N ) THEN
+*
+*                 WORK( IU ) is M by N
+*
+                  LDWRKU = M
+               ELSE
+*
+*                 WORK(IU) is LDWRKU by N
+*
+                  LDWRKU = ( LWORK-3*N ) / N
+               END IF
+               NWORK = IU + LDWRKU*N
+*
+*              Perform bidiagonal SVD, computing left singular vectors
+*              of bidiagonal matrix in RWORK(IRU) and computing right
+*              singular vectors of bidiagonal matrix in RWORK(IRVT)
+*              (CWorkspace: need 0)
+*              (RWorkspace: need BDSPAC)
+*
+               CALL DBDSDC( 'U', 'I', N, S, RWORK( IE ), RWORK( IRU ),
+     $                      N, RWORK( IRVT ), N, DUM, IDUM,
+     $                      RWORK( NRWORK ), IWORK, INFO )
+*
+*              Multiply real matrix RWORK(IRVT) by P**H in VT,
+*              storing the result in WORK(IU), copying to VT
+*              (Cworkspace: need 0)
+*              (Rworkspace: need 3*N*N)
+*
+               CALL ZLARCM( N, N, RWORK( IRVT ), N, VT, LDVT,
+     $                      WORK( IU ), LDWRKU, RWORK( NRWORK ) )
+               CALL ZLACPY( 'F', N, N, WORK( IU ), LDWRKU, VT, LDVT )
+*
+*              Multiply Q in A by real matrix RWORK(IRU), storing the
+*              result in WORK(IU), copying to A
+*              (CWorkspace: need N*N, prefer M*N)
+*              (Rworkspace: need 3*N*N, prefer N*N+2*M*N)
+*
+               NRWORK = IRVT
+               DO 20 I = 1, M, LDWRKU
+                  CHUNK = MIN( M-I+1, LDWRKU )
+                  CALL ZLACRM( CHUNK, N, A( I, 1 ), LDA, RWORK( IRU ),
+     $                         N, WORK( IU ), LDWRKU, RWORK( NRWORK ) )
+                  CALL ZLACPY( 'F', CHUNK, N, WORK( IU ), LDWRKU,
+     $                         A( I, 1 ), LDA )
+   20          CONTINUE
+*
+            ELSE IF( WNTQS ) THEN
+*
+*              Copy A to VT, generate P**H
+*              (Cworkspace: need 2*N, prefer N+N*NB)
+*              (Rworkspace: 0)
+*
+               CALL ZLACPY( 'U', N, N, A, LDA, VT, LDVT )
+               CALL ZUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
+     $                      WORK( NWORK ), LWORK-NWORK+1, IERR )
+*
+*              Copy A to U, generate Q
+*              (Cworkspace: need 2*N, prefer N+N*NB)
+*              (Rworkspace: 0)
+*
+               CALL ZLACPY( 'L', M, N, A, LDA, U, LDU )
+               CALL ZUNGBR( 'Q', M, N, N, U, LDU, WORK( ITAUQ ),
+     $                      WORK( NWORK ), LWORK-NWORK+1, IERR )
+*
+*              Perform bidiagonal SVD, computing left singular vectors
+*              of bidiagonal matrix in RWORK(IRU) and computing right
+*              singular vectors of bidiagonal matrix in RWORK(IRVT)
+*              (CWorkspace: need 0)
+*              (RWorkspace: need BDSPAC)
+*
+               IRU = NRWORK
+               IRVT = IRU + N*N
+               NRWORK = IRVT + N*N
+               CALL DBDSDC( 'U', 'I', N, S, RWORK( IE ), RWORK( IRU ),
+     $                      N, RWORK( IRVT ), N, DUM, IDUM,
+     $                      RWORK( NRWORK ), IWORK, INFO )
+*
+*              Multiply real matrix RWORK(IRVT) by P**H in VT,
+*              storing the result in A, copying to VT
+*              (Cworkspace: need 0)
+*              (Rworkspace: need 3*N*N)
+*
+               CALL ZLARCM( N, N, RWORK( IRVT ), N, VT, LDVT, A, LDA,
+     $                      RWORK( NRWORK ) )
+               CALL ZLACPY( 'F', N, N, A, LDA, VT, LDVT )
+*
+*              Multiply Q in U by real matrix RWORK(IRU), storing the
+*              result in A, copying to U
+*              (CWorkspace: need 0)
+*              (Rworkspace: need N*N+2*M*N)
+*
+               NRWORK = IRVT
+               CALL ZLACRM( M, N, U, LDU, RWORK( IRU ), N, A, LDA,
+     $                      RWORK( NRWORK ) )
+               CALL ZLACPY( 'F', M, N, A, LDA, U, LDU )
+            ELSE
+*
+*              Copy A to VT, generate P**H
+*              (Cworkspace: need 2*N, prefer N+N*NB)
+*              (Rworkspace: 0)
+*
+               CALL ZLACPY( 'U', N, N, A, LDA, VT, LDVT )
+               CALL ZUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
+     $                      WORK( NWORK ), LWORK-NWORK+1, IERR )
+*
+*              Copy A to U, generate Q
+*              (Cworkspace: need 2*N, prefer N+N*NB)
+*              (Rworkspace: 0)
+*
+               CALL ZLACPY( 'L', M, N, A, LDA, U, LDU )
+               CALL ZUNGBR( 'Q', M, M, N, U, LDU, WORK( ITAUQ ),
+     $                      WORK( NWORK ), LWORK-NWORK+1, IERR )
+*
+*              Perform bidiagonal SVD, computing left singular vectors
+*              of bidiagonal matrix in RWORK(IRU) and computing right
+*              singular vectors of bidiagonal matrix in RWORK(IRVT)
+*              (CWorkspace: need 0)
+*              (RWorkspace: need BDSPAC)
+*
+               IRU = NRWORK
+               IRVT = IRU + N*N
+               NRWORK = IRVT + N*N
+               CALL DBDSDC( 'U', 'I', N, S, RWORK( IE ), RWORK( IRU ),
+     $                      N, RWORK( IRVT ), N, DUM, IDUM,
+     $                      RWORK( NRWORK ), IWORK, INFO )
+*
+*              Multiply real matrix RWORK(IRVT) by P**H in VT,
+*              storing the result in A, copying to VT
+*              (Cworkspace: need 0)
+*              (Rworkspace: need 3*N*N)
+*
+               CALL ZLARCM( N, N, RWORK( IRVT ), N, VT, LDVT, A, LDA,
+     $                      RWORK( NRWORK ) )
+               CALL ZLACPY( 'F', N, N, A, LDA, VT, LDVT )
+*
+*              Multiply Q in U by real matrix RWORK(IRU), storing the
+*              result in A, copying to U
+*              (CWorkspace: 0)
+*              (Rworkspace: need 3*N*N)
+*
+               NRWORK = IRVT
+               CALL ZLACRM( M, N, U, LDU, RWORK( IRU ), N, A, LDA,
+     $                      RWORK( NRWORK ) )
+               CALL ZLACPY( 'F', M, N, A, LDA, U, LDU )
+            END IF
+*
+         ELSE
+*
+*           M .LT. MNTHR2
+*
+*           Path 6 (M at least N, but not much larger)
+*           Reduce to bidiagonal form without QR decomposition
+*           Use ZUNMBR to compute singular vectors
+*
+            IE = 1
+            NRWORK = IE + N
+            ITAUQ = 1
+            ITAUP = ITAUQ + N
+            NWORK = ITAUP + N
+*
+*           Bidiagonalize A
+*           (CWorkspace: need 2*N+M, prefer 2*N+(M+N)*NB)
+*           (RWorkspace: need N)
+*
+            CALL ZGEBRD( M, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ),
+     $                   WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1,
+     $                   IERR )
+            IF( WNTQN ) THEN
+*
+*              Compute singular values only
+*              (Cworkspace: 0)
+*              (Rworkspace: need BDSPAC)
+*
+               CALL DBDSDC( 'U', 'N', N, S, RWORK( IE ), DUM, 1, DUM, 1,
+     $                      DUM, IDUM, RWORK( NRWORK ), IWORK, INFO )
+            ELSE IF( WNTQO ) THEN
+               IU = NWORK
+               IRU = NRWORK
+               IRVT = IRU + N*N
+               NRWORK = IRVT + N*N
+               IF( LWORK.GE.M*N+3*N ) THEN
+*
+*                 WORK( IU ) is M by N
+*
+                  LDWRKU = M
+               ELSE
+*
+*                 WORK( IU ) is LDWRKU by N
+*
+                  LDWRKU = ( LWORK-3*N ) / N
+               END IF
+               NWORK = IU + LDWRKU*N
+*
+*              Perform bidiagonal SVD, computing left singular vectors
+*              of bidiagonal matrix in RWORK(IRU) and computing right
+*              singular vectors of bidiagonal matrix in RWORK(IRVT)
+*              (CWorkspace: need 0)
+*              (RWorkspace: need BDSPAC)
+*
+               CALL DBDSDC( 'U', 'I', N, S, RWORK( IE ), RWORK( IRU ),
+     $                      N, RWORK( IRVT ), N, DUM, IDUM,
+     $                      RWORK( NRWORK ), IWORK, INFO )
+*
+*              Copy real matrix RWORK(IRVT) to complex matrix VT
+*              Overwrite VT by right singular vectors of A
+*              (Cworkspace: need 2*N, prefer N+N*NB)
+*              (Rworkspace: need 0)
+*
+               CALL ZLACP2( 'F', N, N, RWORK( IRVT ), N, VT, LDVT )
+               CALL ZUNMBR( 'P', 'R', 'C', N, N, N, A, LDA,
+     $                      WORK( ITAUP ), VT, LDVT, WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+*
+               IF( LWORK.GE.M*N+3*N ) THEN
+*
+*              Copy real matrix RWORK(IRU) to complex matrix WORK(IU)
+*              Overwrite WORK(IU) by left singular vectors of A, copying
+*              to A
+*              (Cworkspace: need M*N+2*N, prefer M*N+N+N*NB)
+*              (Rworkspace: need 0)
+*
+                  CALL ZLASET( 'F', M, N, CZERO, CZERO, WORK( IU ),
+     $                         LDWRKU )
+                  CALL ZLACP2( 'F', N, N, RWORK( IRU ), N, WORK( IU ),
+     $                         LDWRKU )
+                  CALL ZUNMBR( 'Q', 'L', 'N', M, N, N, A, LDA,
+     $                         WORK( ITAUQ ), WORK( IU ), LDWRKU,
+     $                         WORK( NWORK ), LWORK-NWORK+1, IERR )
+                  CALL ZLACPY( 'F', M, N, WORK( IU ), LDWRKU, A, LDA )
+               ELSE
+*
+*                 Generate Q in A
+*                 (Cworkspace: need 2*N, prefer N+N*NB)
+*                 (Rworkspace: need 0)
+*
+                  CALL ZUNGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ),
+     $                         WORK( NWORK ), LWORK-NWORK+1, IERR )
+*
+*                 Multiply Q in A by real matrix RWORK(IRU), storing the
+*                 result in WORK(IU), copying to A
+*                 (CWorkspace: need N*N, prefer M*N)
+*                 (Rworkspace: need 3*N*N, prefer N*N+2*M*N)
+*
+                  NRWORK = IRVT
+                  DO 30 I = 1, M, LDWRKU
+                     CHUNK = MIN( M-I+1, LDWRKU )
+                     CALL ZLACRM( CHUNK, N, A( I, 1 ), LDA,
+     $                            RWORK( IRU ), N, WORK( IU ), LDWRKU,
+     $                            RWORK( NRWORK ) )
+                     CALL ZLACPY( 'F', CHUNK, N, WORK( IU ), LDWRKU,
+     $                            A( I, 1 ), LDA )
+   30             CONTINUE
+               END IF
+*
+            ELSE IF( WNTQS ) THEN
+*
+*              Perform bidiagonal SVD, computing left singular vectors
+*              of bidiagonal matrix in RWORK(IRU) and computing right
+*              singular vectors of bidiagonal matrix in RWORK(IRVT)
+*              (CWorkspace: need 0)
+*              (RWorkspace: need BDSPAC)
+*
+               IRU = NRWORK
+               IRVT = IRU + N*N
+               NRWORK = IRVT + N*N
+               CALL DBDSDC( 'U', 'I', N, S, RWORK( IE ), RWORK( IRU ),
+     $                      N, RWORK( IRVT ), N, DUM, IDUM,
+     $                      RWORK( NRWORK ), IWORK, INFO )
+*
+*              Copy real matrix RWORK(IRU) to complex matrix U
+*              Overwrite U by left singular vectors of A
+*              (CWorkspace: need 3*N, prefer 2*N+N*NB)
+*              (RWorkspace: 0)
+*
+               CALL ZLASET( 'F', M, N, CZERO, CZERO, U, LDU )
+               CALL ZLACP2( 'F', N, N, RWORK( IRU ), N, U, LDU )
+               CALL ZUNMBR( 'Q', 'L', 'N', M, N, N, A, LDA,
+     $                      WORK( ITAUQ ), U, LDU, WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+*
+*              Copy real matrix RWORK(IRVT) to complex matrix VT
+*              Overwrite VT by right singular vectors of A
+*              (CWorkspace: need 3*N, prefer 2*N+N*NB)
+*              (RWorkspace: 0)
+*
+               CALL ZLACP2( 'F', N, N, RWORK( IRVT ), N, VT, LDVT )
+               CALL ZUNMBR( 'P', 'R', 'C', N, N, N, A, LDA,
+     $                      WORK( ITAUP ), VT, LDVT, WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+            ELSE
+*
+*              Perform bidiagonal SVD, computing left singular vectors
+*              of bidiagonal matrix in RWORK(IRU) and computing right
+*              singular vectors of bidiagonal matrix in RWORK(IRVT)
+*              (CWorkspace: need 0)
+*              (RWorkspace: need BDSPAC)
+*
+               IRU = NRWORK
+               IRVT = IRU + N*N
+               NRWORK = IRVT + N*N
+               CALL DBDSDC( 'U', 'I', N, S, RWORK( IE ), RWORK( IRU ),
+     $                      N, RWORK( IRVT ), N, DUM, IDUM,
+     $                      RWORK( NRWORK ), IWORK, INFO )
+*
+*              Set the right corner of U to identity matrix
+*
+               CALL ZLASET( 'F', M, M, CZERO, CZERO, U, LDU )
+               CALL ZLASET( 'F', M-N, M-N, CZERO, CONE, U( N+1, N+1 ),
+     $                      LDU )
+*
+*              Copy real matrix RWORK(IRU) to complex matrix U
+*              Overwrite U by left singular vectors of A
+*              (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
+*              (RWorkspace: 0)
+*
+               CALL ZLACP2( 'F', N, N, RWORK( IRU ), N, U, LDU )
+               CALL ZUNMBR( 'Q', 'L', 'N', M, M, N, A, LDA,
+     $                      WORK( ITAUQ ), U, LDU, WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+*
+*              Copy real matrix RWORK(IRVT) to complex matrix VT
+*              Overwrite VT by right singular vectors of A
+*              (CWorkspace: need 3*N, prefer 2*N+N*NB)
+*              (RWorkspace: 0)
+*
+               CALL ZLACP2( 'F', N, N, RWORK( IRVT ), N, VT, LDVT )
+               CALL ZUNMBR( 'P', 'R', 'C', N, N, N, A, LDA,
+     $                      WORK( ITAUP ), VT, LDVT, WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+            END IF
+*
+         END IF
+*
+      ELSE
+*
+*        A has more columns than rows. If A has sufficiently more
+*        columns than rows, first reduce using the LQ decomposition
+*        (if sufficient workspace available)
+*
+         IF( N.GE.MNTHR1 ) THEN
+*
+            IF( WNTQN ) THEN
+*
+*              Path 1t (N much larger than M, JOBZ='N')
+*              No singular vectors to be computed
+*
+               ITAU = 1
+               NWORK = ITAU + M
+*
+*              Compute A=L*Q
+*              (CWorkspace: need 2*M, prefer M+M*NB)
+*              (RWorkspace: 0)
+*
+               CALL ZGELQF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+*
+*              Zero out above L
+*
+               CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO, A( 1, 2 ),
+     $                      LDA )
+               IE = 1
+               ITAUQ = 1
+               ITAUP = ITAUQ + M
+               NWORK = ITAUP + M
+*
+*              Bidiagonalize L in A
+*              (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
+*              (RWorkspace: need M)
+*
+               CALL ZGEBRD( M, M, A, LDA, S, RWORK( IE ), WORK( ITAUQ ),
+     $                      WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1,
+     $                      IERR )
+               NRWORK = IE + M
+*
+*              Perform bidiagonal SVD, compute singular values only
+*              (CWorkspace: 0)
+*              (RWorkspace: need BDSPAC)
+*
+               CALL DBDSDC( 'U', 'N', M, S, RWORK( IE ), DUM, 1, DUM, 1,
+     $                      DUM, IDUM, RWORK( NRWORK ), IWORK, INFO )
+*
+            ELSE IF( WNTQO ) THEN
+*
+*              Path 2t (N much larger than M, JOBZ='O')
+*              M right singular vectors to be overwritten on A and
+*              M left singular vectors to be computed in U
+*
+               IVT = 1
+               LDWKVT = M
+*
+*              WORK(IVT) is M by M
+*
+               IL = IVT + LDWKVT*M
+               IF( LWORK.GE.M*N+M*M+3*M ) THEN
+*
+*                 WORK(IL) M by N
+*
+                  LDWRKL = M
+                  CHUNK = N
+               ELSE
+*
+*                 WORK(IL) is M by CHUNK
+*
+                  LDWRKL = M
+                  CHUNK = ( LWORK-M*M-3*M ) / M
+               END IF
+               ITAU = IL + LDWRKL*CHUNK
+               NWORK = ITAU + M
+*
+*              Compute A=L*Q
+*              (CWorkspace: need 2*M, prefer M+M*NB)
+*              (RWorkspace: 0)
+*
+               CALL ZGELQF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+*
+*              Copy L to WORK(IL), zeroing about above it
+*
+               CALL ZLACPY( 'L', M, M, A, LDA, WORK( IL ), LDWRKL )
+               CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO,
+     $                      WORK( IL+LDWRKL ), LDWRKL )
+*
+*              Generate Q in A
+*              (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
+*              (RWorkspace: 0)
+*
+               CALL ZUNGLQ( M, N, M, A, LDA, WORK( ITAU ),
+     $                      WORK( NWORK ), LWORK-NWORK+1, IERR )
+               IE = 1
+               ITAUQ = ITAU
+               ITAUP = ITAUQ + M
+               NWORK = ITAUP + M
+*
+*              Bidiagonalize L in WORK(IL)
+*              (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
+*              (RWorkspace: need M)
+*
+               CALL ZGEBRD( M, M, WORK( IL ), LDWRKL, S, RWORK( IE ),
+     $                      WORK( ITAUQ ), WORK( ITAUP ), WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+*
+*              Perform bidiagonal SVD, computing left singular vectors
+*              of bidiagonal matrix in RWORK(IRU) and computing right
+*              singular vectors of bidiagonal matrix in RWORK(IRVT)
+*              (CWorkspace: need 0)
+*              (RWorkspace: need BDSPAC)
+*
+               IRU = IE + M
+               IRVT = IRU + M*M
+               NRWORK = IRVT + M*M
+               CALL DBDSDC( 'U', 'I', M, S, RWORK( IE ), RWORK( IRU ),
+     $                      M, RWORK( IRVT ), M, DUM, IDUM,
+     $                      RWORK( NRWORK ), IWORK, INFO )
+*
+*              Copy real matrix RWORK(IRU) to complex matrix WORK(IU)
+*              Overwrite WORK(IU) by the left singular vectors of L
+*              (CWorkspace: need N*N+3*N, prefer M*N+2*N+N*NB)
+*              (RWorkspace: 0)
+*
+               CALL ZLACP2( 'F', M, M, RWORK( IRU ), M, U, LDU )
+               CALL ZUNMBR( 'Q', 'L', 'N', M, M, M, WORK( IL ), LDWRKL,
+     $                      WORK( ITAUQ ), U, LDU, WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+*
+*              Copy real matrix RWORK(IRVT) to complex matrix WORK(IVT)
+*              Overwrite WORK(IVT) by the right singular vectors of L
+*              (CWorkspace: need N*N+3*N, prefer M*N+2*N+N*NB)
+*              (RWorkspace: 0)
+*
+               CALL ZLACP2( 'F', M, M, RWORK( IRVT ), M, WORK( IVT ),
+     $                      LDWKVT )
+               CALL ZUNMBR( 'P', 'R', 'C', M, M, M, WORK( IL ), LDWRKL,
+     $                      WORK( ITAUP ), WORK( IVT ), LDWKVT,
+     $                      WORK( NWORK ), LWORK-NWORK+1, IERR )
+*
+*              Multiply right singular vectors of L in WORK(IL) by Q
+*              in A, storing result in WORK(IL) and copying to A
+*              (CWorkspace: need 2*M*M, prefer M*M+M*N))
+*              (RWorkspace: 0)
+*
+               DO 40 I = 1, N, CHUNK
+                  BLK = MIN( N-I+1, CHUNK )
+                  CALL ZGEMM( 'N', 'N', M, BLK, M, CONE, WORK( IVT ), M,
+     $                        A( 1, I ), LDA, CZERO, WORK( IL ),
+     $                        LDWRKL )
+                  CALL ZLACPY( 'F', M, BLK, WORK( IL ), LDWRKL,
+     $                         A( 1, I ), LDA )
+   40          CONTINUE
+*
+            ELSE IF( WNTQS ) THEN
+*
+*             Path 3t (N much larger than M, JOBZ='S')
+*             M right singular vectors to be computed in VT and
+*             M left singular vectors to be computed in U
+*
+               IL = 1
+*
+*              WORK(IL) is M by M
+*
+               LDWRKL = M
+               ITAU = IL + LDWRKL*M
+               NWORK = ITAU + M
+*
+*              Compute A=L*Q
+*              (CWorkspace: need 2*M, prefer M+M*NB)
+*              (RWorkspace: 0)
+*
+               CALL ZGELQF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+*
+*              Copy L to WORK(IL), zeroing out above it
+*
+               CALL ZLACPY( 'L', M, M, A, LDA, WORK( IL ), LDWRKL )
+               CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO,
+     $                      WORK( IL+LDWRKL ), LDWRKL )
+*
+*              Generate Q in A
+*              (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
+*              (RWorkspace: 0)
+*
+               CALL ZUNGLQ( M, N, M, A, LDA, WORK( ITAU ),
+     $                      WORK( NWORK ), LWORK-NWORK+1, IERR )
+               IE = 1
+               ITAUQ = ITAU
+               ITAUP = ITAUQ + M
+               NWORK = ITAUP + M
+*
+*              Bidiagonalize L in WORK(IL)
+*              (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
+*              (RWorkspace: need M)
+*
+               CALL ZGEBRD( M, M, WORK( IL ), LDWRKL, S, RWORK( IE ),
+     $                      WORK( ITAUQ ), WORK( ITAUP ), WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+*
+*              Perform bidiagonal SVD, computing left singular vectors
+*              of bidiagonal matrix in RWORK(IRU) and computing right
+*              singular vectors of bidiagonal matrix in RWORK(IRVT)
+*              (CWorkspace: need 0)
+*              (RWorkspace: need BDSPAC)
+*
+               IRU = IE + M
+               IRVT = IRU + M*M
+               NRWORK = IRVT + M*M
+               CALL DBDSDC( 'U', 'I', M, S, RWORK( IE ), RWORK( IRU ),
+     $                      M, RWORK( IRVT ), M, DUM, IDUM,
+     $                      RWORK( NRWORK ), IWORK, INFO )
+*
+*              Copy real matrix RWORK(IRU) to complex matrix U
+*              Overwrite U by left singular vectors of L
+*              (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB)
+*              (RWorkspace: 0)
+*
+               CALL ZLACP2( 'F', M, M, RWORK( IRU ), M, U, LDU )
+               CALL ZUNMBR( 'Q', 'L', 'N', M, M, M, WORK( IL ), LDWRKL,
+     $                      WORK( ITAUQ ), U, LDU, WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+*
+*              Copy real matrix RWORK(IRVT) to complex matrix VT
+*              Overwrite VT by left singular vectors of L
+*              (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB)
+*              (RWorkspace: 0)
+*
+               CALL ZLACP2( 'F', M, M, RWORK( IRVT ), M, VT, LDVT )
+               CALL ZUNMBR( 'P', 'R', 'C', M, M, M, WORK( IL ), LDWRKL,
+     $                      WORK( ITAUP ), VT, LDVT, WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+*
+*              Copy VT to WORK(IL), multiply right singular vectors of L
+*              in WORK(IL) by Q in A, storing result in VT
+*              (CWorkspace: need M*M)
+*              (RWorkspace: 0)
+*
+               CALL ZLACPY( 'F', M, M, VT, LDVT, WORK( IL ), LDWRKL )
+               CALL ZGEMM( 'N', 'N', M, N, M, CONE, WORK( IL ), LDWRKL,
+     $                     A, LDA, CZERO, VT, LDVT )
+*
+            ELSE IF( WNTQA ) THEN
+*
+*              Path 9t (N much larger than M, JOBZ='A')
+*              N right singular vectors to be computed in VT and
+*              M left singular vectors to be computed in U
+*
+               IVT = 1
+*
+*              WORK(IVT) is M by M
+*
+               LDWKVT = M
+               ITAU = IVT + LDWKVT*M
+               NWORK = ITAU + M
+*
+*              Compute A=L*Q, copying result to VT
+*              (CWorkspace: need 2*M, prefer M+M*NB)
+*              (RWorkspace: 0)
+*
+               CALL ZGELQF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+               CALL ZLACPY( 'U', M, N, A, LDA, VT, LDVT )
+*
+*              Generate Q in VT
+*              (CWorkspace: need M+N, prefer M+N*NB)
+*              (RWorkspace: 0)
+*
+               CALL ZUNGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
+     $                      WORK( NWORK ), LWORK-NWORK+1, IERR )
+*
+*              Produce L in A, zeroing out above it
+*
+               CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO, A( 1, 2 ),
+     $                      LDA )
+               IE = 1
+               ITAUQ = ITAU
+               ITAUP = ITAUQ + M
+               NWORK = ITAUP + M
+*
+*              Bidiagonalize L in A
+*              (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
+*              (RWorkspace: need M)
+*
+               CALL ZGEBRD( M, M, A, LDA, S, RWORK( IE ), WORK( ITAUQ ),
+     $                      WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1,
+     $                      IERR )
+*
+*              Perform bidiagonal SVD, computing left singular vectors
+*              of bidiagonal matrix in RWORK(IRU) and computing right
+*              singular vectors of bidiagonal matrix in RWORK(IRVT)
+*              (CWorkspace: need 0)
+*              (RWorkspace: need BDSPAC)
+*
+               IRU = IE + M
+               IRVT = IRU + M*M
+               NRWORK = IRVT + M*M
+               CALL DBDSDC( 'U', 'I', M, S, RWORK( IE ), RWORK( IRU ),
+     $                      M, RWORK( IRVT ), M, DUM, IDUM,
+     $                      RWORK( NRWORK ), IWORK, INFO )
+*
+*              Copy real matrix RWORK(IRU) to complex matrix U
+*              Overwrite U by left singular vectors of L
+*              (CWorkspace: need 3*M, prefer 2*M+M*NB)
+*              (RWorkspace: 0)
+*
+               CALL ZLACP2( 'F', M, M, RWORK( IRU ), M, U, LDU )
+               CALL ZUNMBR( 'Q', 'L', 'N', M, M, M, A, LDA,
+     $                      WORK( ITAUQ ), U, LDU, WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+*
+*              Copy real matrix RWORK(IRVT) to complex matrix WORK(IVT)
+*              Overwrite WORK(IVT) by right singular vectors of L
+*              (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB)
+*              (RWorkspace: 0)
+*
+               CALL ZLACP2( 'F', M, M, RWORK( IRVT ), M, WORK( IVT ),
+     $                      LDWKVT )
+               CALL ZUNMBR( 'P', 'R', 'C', M, M, M, A, LDA,
+     $                      WORK( ITAUP ), WORK( IVT ), LDWKVT,
+     $                      WORK( NWORK ), LWORK-NWORK+1, IERR )
+*
+*              Multiply right singular vectors of L in WORK(IVT) by
+*              Q in VT, storing result in A
+*              (CWorkspace: need M*M)
+*              (RWorkspace: 0)
+*
+               CALL ZGEMM( 'N', 'N', M, N, M, CONE, WORK( IVT ), LDWKVT,
+     $                     VT, LDVT, CZERO, A, LDA )
+*
+*              Copy right singular vectors of A from A to VT
+*
+               CALL ZLACPY( 'F', M, N, A, LDA, VT, LDVT )
+*
+            END IF
+*
+         ELSE IF( N.GE.MNTHR2 ) THEN
+*
+*           MNTHR2 <= N < MNTHR1
+*
+*           Path 5t (N much larger than M, but not as much as MNTHR1)
+*           Reduce to bidiagonal form without QR decomposition, use
+*           ZUNGBR and matrix multiplication to compute singular vectors
+*
+*
+            IE = 1
+            NRWORK = IE + M
+            ITAUQ = 1
+            ITAUP = ITAUQ + M
+            NWORK = ITAUP + M
+*
+*           Bidiagonalize A
+*           (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB)
+*           (RWorkspace: M)
+*
+            CALL ZGEBRD( M, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ),
+     $                   WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1,
+     $                   IERR )
+*
+            IF( WNTQN ) THEN
+*
+*              Compute singular values only
+*              (Cworkspace: 0)
+*              (Rworkspace: need BDSPAC)
+*
+               CALL DBDSDC( 'L', 'N', M, S, RWORK( IE ), DUM, 1, DUM, 1,
+     $                      DUM, IDUM, RWORK( NRWORK ), IWORK, INFO )
+            ELSE IF( WNTQO ) THEN
+               IRVT = NRWORK
+               IRU = IRVT + M*M
+               NRWORK = IRU + M*M
+               IVT = NWORK
+*
+*              Copy A to U, generate Q
+*              (Cworkspace: need 2*M, prefer M+M*NB)
+*              (Rworkspace: 0)
+*
+               CALL ZLACPY( 'L', M, M, A, LDA, U, LDU )
+               CALL ZUNGBR( 'Q', M, M, N, U, LDU, WORK( ITAUQ ),
+     $                      WORK( NWORK ), LWORK-NWORK+1, IERR )
+*
+*              Generate P**H in A
+*              (Cworkspace: need 2*M, prefer M+M*NB)
+*              (Rworkspace: 0)
+*
+               CALL ZUNGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ),
+     $                      WORK( NWORK ), LWORK-NWORK+1, IERR )
+*
+               LDWKVT = M
+               IF( LWORK.GE.M*N+3*M ) THEN
+*
+*                 WORK( IVT ) is M by N
+*
+                  NWORK = IVT + LDWKVT*N
+                  CHUNK = N
+               ELSE
+*
+*                 WORK( IVT ) is M by CHUNK
+*
+                  CHUNK = ( LWORK-3*M ) / M
+                  NWORK = IVT + LDWKVT*CHUNK
+               END IF
+*
+*              Perform bidiagonal SVD, computing left singular vectors
+*              of bidiagonal matrix in RWORK(IRU) and computing right
+*              singular vectors of bidiagonal matrix in RWORK(IRVT)
+*              (CWorkspace: need 0)
+*              (RWorkspace: need BDSPAC)
+*
+               CALL DBDSDC( 'L', 'I', M, S, RWORK( IE ), RWORK( IRU ),
+     $                      M, RWORK( IRVT ), M, DUM, IDUM,
+     $                      RWORK( NRWORK ), IWORK, INFO )
+*
+*              Multiply Q in U by real matrix RWORK(IRVT)
+*              storing the result in WORK(IVT), copying to U
+*              (Cworkspace: need 0)
+*              (Rworkspace: need 2*M*M)
+*
+               CALL ZLACRM( M, M, U, LDU, RWORK( IRU ), M, WORK( IVT ),
+     $                      LDWKVT, RWORK( NRWORK ) )
+               CALL ZLACPY( 'F', M, M, WORK( IVT ), LDWKVT, U, LDU )
+*
+*              Multiply RWORK(IRVT) by P**H in A, storing the
+*              result in WORK(IVT), copying to A
+*              (CWorkspace: need M*M, prefer M*N)
+*              (Rworkspace: need 2*M*M, prefer 2*M*N)
+*
+               NRWORK = IRU
+               DO 50 I = 1, N, CHUNK
+                  BLK = MIN( N-I+1, CHUNK )
+                  CALL ZLARCM( M, BLK, RWORK( IRVT ), M, A( 1, I ), LDA,
+     $                         WORK( IVT ), LDWKVT, RWORK( NRWORK ) )
+                  CALL ZLACPY( 'F', M, BLK, WORK( IVT ), LDWKVT,
+     $                         A( 1, I ), LDA )
+   50          CONTINUE
+            ELSE IF( WNTQS ) THEN
+*
+*              Copy A to U, generate Q
+*              (Cworkspace: need 2*M, prefer M+M*NB)
+*              (Rworkspace: 0)
+*
+               CALL ZLACPY( 'L', M, M, A, LDA, U, LDU )
+               CALL ZUNGBR( 'Q', M, M, N, U, LDU, WORK( ITAUQ ),
+     $                      WORK( NWORK ), LWORK-NWORK+1, IERR )
+*
+*              Copy A to VT, generate P**H
+*              (Cworkspace: need 2*M, prefer M+M*NB)
+*              (Rworkspace: 0)
+*
+               CALL ZLACPY( 'U', M, N, A, LDA, VT, LDVT )
+               CALL ZUNGBR( 'P', M, N, M, VT, LDVT, WORK( ITAUP ),
+     $                      WORK( NWORK ), LWORK-NWORK+1, IERR )
+*
+*              Perform bidiagonal SVD, computing left singular vectors
+*              of bidiagonal matrix in RWORK(IRU) and computing right
+*              singular vectors of bidiagonal matrix in RWORK(IRVT)
+*              (CWorkspace: need 0)
+*              (RWorkspace: need BDSPAC)
+*
+               IRVT = NRWORK
+               IRU = IRVT + M*M
+               NRWORK = IRU + M*M
+               CALL DBDSDC( 'L', 'I', M, S, RWORK( IE ), RWORK( IRU ),
+     $                      M, RWORK( IRVT ), M, DUM, IDUM,
+     $                      RWORK( NRWORK ), IWORK, INFO )
+*
+*              Multiply Q in U by real matrix RWORK(IRU), storing the
+*              result in A, copying to U
+*              (CWorkspace: need 0)
+*              (Rworkspace: need 3*M*M)
+*
+               CALL ZLACRM( M, M, U, LDU, RWORK( IRU ), M, A, LDA,
+     $                      RWORK( NRWORK ) )
+               CALL ZLACPY( 'F', M, M, A, LDA, U, LDU )
+*
+*              Multiply real matrix RWORK(IRVT) by P**H in VT,
+*              storing the result in A, copying to VT
+*              (Cworkspace: need 0)
+*              (Rworkspace: need M*M+2*M*N)
+*
+               NRWORK = IRU
+               CALL ZLARCM( M, N, RWORK( IRVT ), M, VT, LDVT, A, LDA,
+     $                      RWORK( NRWORK ) )
+               CALL ZLACPY( 'F', M, N, A, LDA, VT, LDVT )
+            ELSE
+*
+*              Copy A to U, generate Q
+*              (Cworkspace: need 2*M, prefer M+M*NB)
+*              (Rworkspace: 0)
+*
+               CALL ZLACPY( 'L', M, M, A, LDA, U, LDU )
+               CALL ZUNGBR( 'Q', M, M, N, U, LDU, WORK( ITAUQ ),
+     $                      WORK( NWORK ), LWORK-NWORK+1, IERR )
+*
+*              Copy A to VT, generate P**H
+*              (Cworkspace: need 2*M, prefer M+M*NB)
+*              (Rworkspace: 0)
+*
+               CALL ZLACPY( 'U', M, N, A, LDA, VT, LDVT )
+               CALL ZUNGBR( 'P', N, N, M, VT, LDVT, WORK( ITAUP ),
+     $                      WORK( NWORK ), LWORK-NWORK+1, IERR )
+*
+*              Perform bidiagonal SVD, computing left singular vectors
+*              of bidiagonal matrix in RWORK(IRU) and computing right
+*              singular vectors of bidiagonal matrix in RWORK(IRVT)
+*              (CWorkspace: need 0)
+*              (RWorkspace: need BDSPAC)
+*
+               IRVT = NRWORK
+               IRU = IRVT + M*M
+               NRWORK = IRU + M*M
+               CALL DBDSDC( 'L', 'I', M, S, RWORK( IE ), RWORK( IRU ),
+     $                      M, RWORK( IRVT ), M, DUM, IDUM,
+     $                      RWORK( NRWORK ), IWORK, INFO )
+*
+*              Multiply Q in U by real matrix RWORK(IRU), storing the
+*              result in A, copying to U
+*              (CWorkspace: need 0)
+*              (Rworkspace: need 3*M*M)
+*
+               CALL ZLACRM( M, M, U, LDU, RWORK( IRU ), M, A, LDA,
+     $                      RWORK( NRWORK ) )
+               CALL ZLACPY( 'F', M, M, A, LDA, U, LDU )
+*
+*              Multiply real matrix RWORK(IRVT) by P**H in VT,
+*              storing the result in A, copying to VT
+*              (Cworkspace: need 0)
+*              (Rworkspace: need M*M+2*M*N)
+*
+               CALL ZLARCM( M, N, RWORK( IRVT ), M, VT, LDVT, A, LDA,
+     $                      RWORK( NRWORK ) )
+               CALL ZLACPY( 'F', M, N, A, LDA, VT, LDVT )
+            END IF
+*
+         ELSE
+*
+*           N .LT. MNTHR2
+*
+*           Path 6t (N greater than M, but not much larger)
+*           Reduce to bidiagonal form without LQ decomposition
+*           Use ZUNMBR to compute singular vectors
+*
+            IE = 1
+            NRWORK = IE + M
+            ITAUQ = 1
+            ITAUP = ITAUQ + M
+            NWORK = ITAUP + M
+*
+*           Bidiagonalize A
+*           (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB)
+*           (RWorkspace: M)
+*
+            CALL ZGEBRD( M, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ),
+     $                   WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1,
+     $                   IERR )
+            IF( WNTQN ) THEN
+*
+*              Compute singular values only
+*              (Cworkspace: 0)
+*              (Rworkspace: need BDSPAC)
+*
+               CALL DBDSDC( 'L', 'N', M, S, RWORK( IE ), DUM, 1, DUM, 1,
+     $                      DUM, IDUM, RWORK( NRWORK ), IWORK, INFO )
+            ELSE IF( WNTQO ) THEN
+               LDWKVT = M
+               IVT = NWORK
+               IF( LWORK.GE.M*N+3*M ) THEN
+*
+*                 WORK( IVT ) is M by N
+*
+                  CALL ZLASET( 'F', M, N, CZERO, CZERO, WORK( IVT ),
+     $                         LDWKVT )
+                  NWORK = IVT + LDWKVT*N
+               ELSE
+*
+*                 WORK( IVT ) is M by CHUNK
+*
+                  CHUNK = ( LWORK-3*M ) / M
+                  NWORK = IVT + LDWKVT*CHUNK
+               END IF
+*
+*              Perform bidiagonal SVD, computing left singular vectors
+*              of bidiagonal matrix in RWORK(IRU) and computing right
+*              singular vectors of bidiagonal matrix in RWORK(IRVT)
+*              (CWorkspace: need 0)
+*              (RWorkspace: need BDSPAC)
+*
+               IRVT = NRWORK
+               IRU = IRVT + M*M
+               NRWORK = IRU + M*M
+               CALL DBDSDC( 'L', 'I', M, S, RWORK( IE ), RWORK( IRU ),
+     $                      M, RWORK( IRVT ), M, DUM, IDUM,
+     $                      RWORK( NRWORK ), IWORK, INFO )
+*
+*              Copy real matrix RWORK(IRU) to complex matrix U
+*              Overwrite U by left singular vectors of A
+*              (Cworkspace: need 2*M, prefer M+M*NB)
+*              (Rworkspace: need 0)
+*
+               CALL ZLACP2( 'F', M, M, RWORK( IRU ), M, U, LDU )
+               CALL ZUNMBR( 'Q', 'L', 'N', M, M, N, A, LDA,
+     $                      WORK( ITAUQ ), U, LDU, WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+*
+               IF( LWORK.GE.M*N+3*M ) THEN
+*
+*              Copy real matrix RWORK(IRVT) to complex matrix WORK(IVT)
+*              Overwrite WORK(IVT) by right singular vectors of A,
+*              copying to A
+*              (Cworkspace: need M*N+2*M, prefer M*N+M+M*NB)
+*              (Rworkspace: need 0)
+*
+                  CALL ZLACP2( 'F', M, M, RWORK( IRVT ), M, WORK( IVT ),
+     $                         LDWKVT )
+                  CALL ZUNMBR( 'P', 'R', 'C', M, N, M, A, LDA,
+     $                         WORK( ITAUP ), WORK( IVT ), LDWKVT,
+     $                         WORK( NWORK ), LWORK-NWORK+1, IERR )
+                  CALL ZLACPY( 'F', M, N, WORK( IVT ), LDWKVT, A, LDA )
+               ELSE
+*
+*                 Generate P**H in A
+*                 (Cworkspace: need 2*M, prefer M+M*NB)
+*                 (Rworkspace: need 0)
+*
+                  CALL ZUNGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ),
+     $                         WORK( NWORK ), LWORK-NWORK+1, IERR )
+*
+*                 Multiply Q in A by real matrix RWORK(IRU), storing the
+*                 result in WORK(IU), copying to A
+*                 (CWorkspace: need M*M, prefer M*N)
+*                 (Rworkspace: need 3*M*M, prefer M*M+2*M*N)
+*
+                  NRWORK = IRU
+                  DO 60 I = 1, N, CHUNK
+                     BLK = MIN( N-I+1, CHUNK )
+                     CALL ZLARCM( M, BLK, RWORK( IRVT ), M, A( 1, I ),
+     $                            LDA, WORK( IVT ), LDWKVT,
+     $                            RWORK( NRWORK ) )
+                     CALL ZLACPY( 'F', M, BLK, WORK( IVT ), LDWKVT,
+     $                            A( 1, I ), LDA )
+   60             CONTINUE
+               END IF
+            ELSE IF( WNTQS ) THEN
+*
+*              Perform bidiagonal SVD, computing left singular vectors
+*              of bidiagonal matrix in RWORK(IRU) and computing right
+*              singular vectors of bidiagonal matrix in RWORK(IRVT)
+*              (CWorkspace: need 0)
+*              (RWorkspace: need BDSPAC)
+*
+               IRVT = NRWORK
+               IRU = IRVT + M*M
+               NRWORK = IRU + M*M
+               CALL DBDSDC( 'L', 'I', M, S, RWORK( IE ), RWORK( IRU ),
+     $                      M, RWORK( IRVT ), M, DUM, IDUM,
+     $                      RWORK( NRWORK ), IWORK, INFO )
+*
+*              Copy real matrix RWORK(IRU) to complex matrix U
+*              Overwrite U by left singular vectors of A
+*              (CWorkspace: need 3*M, prefer 2*M+M*NB)
+*              (RWorkspace: M*M)
+*
+               CALL ZLACP2( 'F', M, M, RWORK( IRU ), M, U, LDU )
+               CALL ZUNMBR( 'Q', 'L', 'N', M, M, N, A, LDA,
+     $                      WORK( ITAUQ ), U, LDU, WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+*
+*              Copy real matrix RWORK(IRVT) to complex matrix VT
+*              Overwrite VT by right singular vectors of A
+*              (CWorkspace: need 3*M, prefer 2*M+M*NB)
+*              (RWorkspace: M*M)
+*
+               CALL ZLASET( 'F', M, N, CZERO, CZERO, VT, LDVT )
+               CALL ZLACP2( 'F', M, M, RWORK( IRVT ), M, VT, LDVT )
+               CALL ZUNMBR( 'P', 'R', 'C', M, N, M, A, LDA,
+     $                      WORK( ITAUP ), VT, LDVT, WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+            ELSE
+*
+*              Perform bidiagonal SVD, computing left singular vectors
+*              of bidiagonal matrix in RWORK(IRU) and computing right
+*              singular vectors of bidiagonal matrix in RWORK(IRVT)
+*              (CWorkspace: need 0)
+*              (RWorkspace: need BDSPAC)
+*
+               IRVT = NRWORK
+               IRU = IRVT + M*M
+               NRWORK = IRU + M*M
+*
+               CALL DBDSDC( 'L', 'I', M, S, RWORK( IE ), RWORK( IRU ),
+     $                      M, RWORK( IRVT ), M, DUM, IDUM,
+     $                      RWORK( NRWORK ), IWORK, INFO )
+*
+*              Copy real matrix RWORK(IRU) to complex matrix U
+*              Overwrite U by left singular vectors of A
+*              (CWorkspace: need 3*M, prefer 2*M+M*NB)
+*              (RWorkspace: M*M)
+*
+               CALL ZLACP2( 'F', M, M, RWORK( IRU ), M, U, LDU )
+               CALL ZUNMBR( 'Q', 'L', 'N', M, M, N, A, LDA,
+     $                      WORK( ITAUQ ), U, LDU, WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+*
+*              Set the right corner of VT to identity matrix
+*
+               CALL ZLASET( 'F', N-M, N-M, CZERO, CONE, VT( M+1, M+1 ),
+     $                      LDVT )
+*
+*              Copy real matrix RWORK(IRVT) to complex matrix VT
+*              Overwrite VT by right singular vectors of A
+*              (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
+*              (RWorkspace: M*M)
+*
+               CALL ZLASET( 'F', N, N, CZERO, CZERO, VT, LDVT )
+               CALL ZLACP2( 'F', M, M, RWORK( IRVT ), M, VT, LDVT )
+               CALL ZUNMBR( 'P', 'R', 'C', N, N, M, A, LDA,
+     $                      WORK( ITAUP ), VT, LDVT, WORK( NWORK ),
+     $                      LWORK-NWORK+1, IERR )
+            END IF
+*
+         END IF
+*
+      END IF
+*
+*     Undo scaling if necessary
+*
+      IF( ISCL.EQ.1 ) THEN
+         IF( ANRM.GT.BIGNUM )
+     $      CALL DLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN,
+     $                   IERR )
+         IF( ANRM.LT.SMLNUM )
+     $      CALL DLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN,
+     $                   IERR )
+      END IF
+*
+*     Return optimal workspace in WORK(1)
+*
+      WORK( 1 ) = MAXWRK
+*
+      RETURN
+*
+*     End of ZGESDD
+*
+      END
diff --git a/costa/native/external/lapack/zgesv.f b/costa/native/external/lapack/zgesv.f
new file mode 100644
index 000000000..4eeb90d4c
--- /dev/null
+++ b/costa/native/external/lapack/zgesv.f
@@ -0,0 +1,108 @@
+      SUBROUTINE ZGESV( N, NRHS, A, LDA, IPIV, B, LDB, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, LDB, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      COMPLEX*16         A( LDA, * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZGESV computes the solution to a complex system of linear equations
+*     A * X = B,
+*  where A is an N-by-N matrix and X and B are N-by-NRHS matrices.
+*
+*  The LU decomposition with partial pivoting and row interchanges is
+*  used to factor A as
+*     A = P * L * U,
+*  where P is a permutation matrix, L is unit lower triangular, and U is
+*  upper triangular.  The factored form of A is then used to solve the
+*  system of equations A * X = B.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The number of linear equations, i.e., the order of the
+*          matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+*          On entry, the N-by-N coefficient matrix A.
+*          On exit, the factors L and U from the factorization
+*          A = P*L*U; the unit diagonal elements of L are not stored.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  IPIV    (output) INTEGER array, dimension (N)
+*          The pivot indices that define the permutation matrix P;
+*          row i of the matrix was interchanged with row IPIV(i).
+*
+*  B       (input/output) COMPLEX*16 array, dimension (LDB,NRHS)
+*          On entry, the N-by-NRHS matrix of right hand side matrix B.
+*          On exit, if INFO = 0, the N-by-NRHS solution matrix X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, U(i,i) is exactly zero.  The factorization
+*                has been completed, but the factor U is exactly
+*                singular, so the solution could not be computed.
+*
+*  =====================================================================
+*
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZGETRF, ZGETRS
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( N.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -4
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZGESV ', -INFO )
+         RETURN
+      END IF
+*
+*     Compute the LU factorization of A.
+*
+      CALL ZGETRF( N, N, A, LDA, IPIV, INFO )
+      IF( INFO.EQ.0 ) THEN
+*
+*        Solve the system A*X = B, overwriting B with X.
+*
+         CALL ZGETRS( 'No transpose', N, NRHS, A, LDA, IPIV, B, LDB,
+     $                INFO )
+      END IF
+      RETURN
+*
+*     End of ZGESV
+*
+      END
diff --git a/costa/native/external/lapack/zgesvd.f b/costa/native/external/lapack/zgesvd.f
new file mode 100644
index 000000000..181be23ab
--- /dev/null
+++ b/costa/native/external/lapack/zgesvd.f
@@ -0,0 +1,3618 @@
+      SUBROUTINE ZGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT,
+     $                   WORK, LWORK, RWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBU, JOBVT
+      INTEGER            INFO, LDA, LDU, LDVT, LWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   RWORK( * ), S( * )
+      COMPLEX*16         A( LDA, * ), U( LDU, * ), VT( LDVT, * ),
+     $                   WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZGESVD computes the singular value decomposition (SVD) of a complex
+*  M-by-N matrix A, optionally computing the left and/or right singular
+*  vectors. The SVD is written
+*
+*       A = U * SIGMA * conjugate-transpose(V)
+*
+*  where SIGMA is an M-by-N matrix which is zero except for its
+*  min(m,n) diagonal elements, U is an M-by-M unitary matrix, and
+*  V is an N-by-N unitary matrix.  The diagonal elements of SIGMA
+*  are the singular values of A; they are real and non-negative, and
+*  are returned in descending order.  The first min(m,n) columns of
+*  U and V are the left and right singular vectors of A.
+*
+*  Note that the routine returns V**H, not V.
+*
+*  Arguments
+*  =========
+*
+*  JOBU    (input) CHARACTER*1
+*          Specifies options for computing all or part of the matrix U:
+*          = 'A':  all M columns of U are returned in array U:
+*          = 'S':  the first min(m,n) columns of U (the left singular
+*                  vectors) are returned in the array U;
+*          = 'O':  the first min(m,n) columns of U (the left singular
+*                  vectors) are overwritten on the array A;
+*          = 'N':  no columns of U (no left singular vectors) are
+*                  computed.
+*
+*  JOBVT   (input) CHARACTER*1
+*          Specifies options for computing all or part of the matrix
+*          V**H:
+*          = 'A':  all N rows of V**H are returned in the array VT;
+*          = 'S':  the first min(m,n) rows of V**H (the right singular
+*                  vectors) are returned in the array VT;
+*          = 'O':  the first min(m,n) rows of V**H (the right singular
+*                  vectors) are overwritten on the array A;
+*          = 'N':  no rows of V**H (no right singular vectors) are
+*                  computed.
+*
+*          JOBVT and JOBU cannot both be 'O'.
+*
+*  M       (input) INTEGER
+*          The number of rows of the input matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the input matrix A.  N >= 0.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+*          On entry, the M-by-N matrix A.
+*          On exit,
+*          if JOBU = 'O',  A is overwritten with the first min(m,n)
+*                          columns of U (the left singular vectors,
+*                          stored columnwise);
+*          if JOBVT = 'O', A is overwritten with the first min(m,n)
+*                          rows of V**H (the right singular vectors,
+*                          stored rowwise);
+*          if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A
+*                          are destroyed.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  S       (output) DOUBLE PRECISION array, dimension (min(M,N))
+*          The singular values of A, sorted so that S(i) >= S(i+1).
+*
+*  U       (output) COMPLEX*16 array, dimension (LDU,UCOL)
+*          (LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'.
+*          If JOBU = 'A', U contains the M-by-M unitary matrix U;
+*          if JOBU = 'S', U contains the first min(m,n) columns of U
+*          (the left singular vectors, stored columnwise);
+*          if JOBU = 'N' or 'O', U is not referenced.
+*
+*  LDU     (input) INTEGER
+*          The leading dimension of the array U.  LDU >= 1; if
+*          JOBU = 'S' or 'A', LDU >= M.
+*
+*  VT      (output) COMPLEX*16 array, dimension (LDVT,N)
+*          If JOBVT = 'A', VT contains the N-by-N unitary matrix
+*          V**H;
+*          if JOBVT = 'S', VT contains the first min(m,n) rows of
+*          V**H (the right singular vectors, stored rowwise);
+*          if JOBVT = 'N' or 'O', VT is not referenced.
+*
+*  LDVT    (input) INTEGER
+*          The leading dimension of the array VT.  LDVT >= 1; if
+*          JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N).
+*
+*  WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK. LWORK >= 1.
+*          LWORK >=  2*MIN(M,N)+MAX(M,N).
+*          For good performance, LWORK should generally be larger.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  RWORK   (workspace) DOUBLE PRECISION array, dimension (5*min(M,N))
+*          On exit, if INFO > 0, RWORK(1:MIN(M,N)-1) contains the
+*          unconverged superdiagonal elements of an upper bidiagonal
+*          matrix B whose diagonal is in S (not necessarily sorted).
+*          B satisfies A = U * B * VT, so it has the same singular
+*          values as A, and singular vectors related by U and VT.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          > 0:  if ZBDSQR did not converge, INFO specifies how many
+*                superdiagonals of an intermediate bidiagonal form B
+*                did not converge to zero. See the description of RWORK
+*                above for details.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX*16         CZERO, CONE
+      PARAMETER          ( CZERO = ( 0.0D0, 0.0D0 ),
+     $                   CONE = ( 1.0D0, 0.0D0 ) )
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY, WNTUA, WNTUAS, WNTUN, WNTUO, WNTUS,
+     $                   WNTVA, WNTVAS, WNTVN, WNTVO, WNTVS
+      INTEGER            BLK, CHUNK, I, IE, IERR, IR, IRWORK, ISCL,
+     $                   ITAU, ITAUP, ITAUQ, IU, IWORK, LDWRKR, LDWRKU,
+     $                   MAXWRK, MINMN, MINWRK, MNTHR, NCU, NCVT, NRU,
+     $                   NRVT, WRKBL
+      DOUBLE PRECISION   ANRM, BIGNUM, EPS, SMLNUM
+*     ..
+*     .. Local Arrays ..
+      DOUBLE PRECISION   DUM( 1 )
+      COMPLEX*16         CDUM( 1 )
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLASCL, XERBLA, ZBDSQR, ZGEBRD, ZGELQF, ZGEMM,
+     $                   ZGEQRF, ZLACPY, ZLASCL, ZLASET, ZUNGBR, ZUNGLQ,
+     $                   ZUNGQR, ZUNMBR
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      DOUBLE PRECISION   DLAMCH, ZLANGE
+      EXTERNAL           LSAME, ILAENV, DLAMCH, ZLANGE
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      MINMN = MIN( M, N )
+      MNTHR = ILAENV( 6, 'ZGESVD', JOBU // JOBVT, M, N, 0, 0 )
+      WNTUA = LSAME( JOBU, 'A' )
+      WNTUS = LSAME( JOBU, 'S' )
+      WNTUAS = WNTUA .OR. WNTUS
+      WNTUO = LSAME( JOBU, 'O' )
+      WNTUN = LSAME( JOBU, 'N' )
+      WNTVA = LSAME( JOBVT, 'A' )
+      WNTVS = LSAME( JOBVT, 'S' )
+      WNTVAS = WNTVA .OR. WNTVS
+      WNTVO = LSAME( JOBVT, 'O' )
+      WNTVN = LSAME( JOBVT, 'N' )
+      MINWRK = 1
+      LQUERY = ( LWORK.EQ.-1 )
+*
+      IF( .NOT.( WNTUA .OR. WNTUS .OR. WNTUO .OR. WNTUN ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( WNTVA .OR. WNTVS .OR. WNTVO .OR. WNTVN ) .OR.
+     $         ( WNTVO .AND. WNTUO ) ) THEN
+         INFO = -2
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -6
+      ELSE IF( LDU.LT.1 .OR. ( WNTUAS .AND. LDU.LT.M ) ) THEN
+         INFO = -9
+      ELSE IF( LDVT.LT.1 .OR. ( WNTVA .AND. LDVT.LT.N ) .OR.
+     $         ( WNTVS .AND. LDVT.LT.MINMN ) ) THEN
+         INFO = -11
+      END IF
+*
+*     Compute workspace
+*      (Note: Comments in the code beginning "Workspace:" describe the
+*       minimal amount of workspace needed at that point in the code,
+*       as well as the preferred amount for good performance.
+*       CWorkspace refers to complex workspace, and RWorkspace to
+*       real workspace. NB refers to the optimal block size for the
+*       immediately following subroutine, as returned by ILAENV.)
+*
+      IF( INFO.EQ.0 .AND. ( LWORK.GE.1 .OR. LQUERY ) .AND. M.GT.0 .AND.
+     $    N.GT.0 ) THEN
+         IF( M.GE.N ) THEN
+*
+*           Space needed for ZBDSQR is BDSPAC = 5*N
+*
+            IF( M.GE.MNTHR ) THEN
+               IF( WNTUN ) THEN
+*
+*                 Path 1 (M much larger than N, JOBU='N')
+*
+                  MAXWRK = N + N*ILAENV( 1, 'ZGEQRF', ' ', M, N, -1,
+     $                     -1 )
+                  MAXWRK = MAX( MAXWRK, 2*N+2*N*
+     $                     ILAENV( 1, 'ZGEBRD', ' ', N, N, -1, -1 ) )
+                  IF( WNTVO .OR. WNTVAS )
+     $               MAXWRK = MAX( MAXWRK, 2*N+( N-1 )*
+     $                        ILAENV( 1, 'ZUNGBR', 'P', N, N, N, -1 ) )
+                  MINWRK = 3*N
+                  MAXWRK = MAX( MINWRK, MAXWRK )
+               ELSE IF( WNTUO .AND. WNTVN ) THEN
+*
+*                 Path 2 (M much larger than N, JOBU='O', JOBVT='N')
+*
+                  WRKBL = N + N*ILAENV( 1, 'ZGEQRF', ' ', M, N, -1, -1 )
+                  WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'ZUNGQR', ' ', M,
+     $                    N, N, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*N+2*N*
+     $                    ILAENV( 1, 'ZGEBRD', ' ', N, N, -1, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*N+N*
+     $                    ILAENV( 1, 'ZUNGBR', 'Q', N, N, N, -1 ) )
+                  MAXWRK = MAX( N*N+WRKBL, N*N+M*N )
+                  MINWRK = 2*N + M
+                  MAXWRK = MAX( MINWRK, MAXWRK )
+               ELSE IF( WNTUO .AND. WNTVAS ) THEN
+*
+*                 Path 3 (M much larger than N, JOBU='O', JOBVT='S' or
+*                 'A')
+*
+                  WRKBL = N + N*ILAENV( 1, 'ZGEQRF', ' ', M, N, -1, -1 )
+                  WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'ZUNGQR', ' ', M,
+     $                    N, N, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*N+2*N*
+     $                    ILAENV( 1, 'ZGEBRD', ' ', N, N, -1, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*N+N*
+     $                    ILAENV( 1, 'ZUNGBR', 'Q', N, N, N, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*N+( N-1 )*
+     $                    ILAENV( 1, 'ZUNGBR', 'P', N, N, N, -1 ) )
+                  MAXWRK = MAX( N*N+WRKBL, N*N+M*N )
+                  MINWRK = 2*N + M
+                  MAXWRK = MAX( MINWRK, MAXWRK )
+               ELSE IF( WNTUS .AND. WNTVN ) THEN
+*
+*                 Path 4 (M much larger than N, JOBU='S', JOBVT='N')
+*
+                  WRKBL = N + N*ILAENV( 1, 'ZGEQRF', ' ', M, N, -1, -1 )
+                  WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'ZUNGQR', ' ', M,
+     $                    N, N, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*N+2*N*
+     $                    ILAENV( 1, 'ZGEBRD', ' ', N, N, -1, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*N+N*
+     $                    ILAENV( 1, 'ZUNGBR', 'Q', N, N, N, -1 ) )
+                  MAXWRK = N*N + WRKBL
+                  MINWRK = 2*N + M
+                  MAXWRK = MAX( MINWRK, MAXWRK )
+               ELSE IF( WNTUS .AND. WNTVO ) THEN
+*
+*                 Path 5 (M much larger than N, JOBU='S', JOBVT='O')
+*
+                  WRKBL = N + N*ILAENV( 1, 'ZGEQRF', ' ', M, N, -1, -1 )
+                  WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'ZUNGQR', ' ', M,
+     $                    N, N, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*N+2*N*
+     $                    ILAENV( 1, 'ZGEBRD', ' ', N, N, -1, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*N+N*
+     $                    ILAENV( 1, 'ZUNGBR', 'Q', N, N, N, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*N+( N-1 )*
+     $                    ILAENV( 1, 'ZUNGBR', 'P', N, N, N, -1 ) )
+                  MAXWRK = 2*N*N + WRKBL
+                  MINWRK = 2*N + M
+                  MAXWRK = MAX( MINWRK, MAXWRK )
+               ELSE IF( WNTUS .AND. WNTVAS ) THEN
+*
+*                 Path 6 (M much larger than N, JOBU='S', JOBVT='S' or
+*                 'A')
+*
+                  WRKBL = N + N*ILAENV( 1, 'ZGEQRF', ' ', M, N, -1, -1 )
+                  WRKBL = MAX( WRKBL, N+N*ILAENV( 1, 'ZUNGQR', ' ', M,
+     $                    N, N, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*N+2*N*
+     $                    ILAENV( 1, 'ZGEBRD', ' ', N, N, -1, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*N+N*
+     $                    ILAENV( 1, 'ZUNGBR', 'Q', N, N, N, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*N+( N-1 )*
+     $                    ILAENV( 1, 'ZUNGBR', 'P', N, N, N, -1 ) )
+                  MAXWRK = N*N + WRKBL
+                  MINWRK = 2*N + M
+                  MAXWRK = MAX( MINWRK, MAXWRK )
+               ELSE IF( WNTUA .AND. WNTVN ) THEN
+*
+*                 Path 7 (M much larger than N, JOBU='A', JOBVT='N')
+*
+                  WRKBL = N + N*ILAENV( 1, 'ZGEQRF', ' ', M, N, -1, -1 )
+                  WRKBL = MAX( WRKBL, N+M*ILAENV( 1, 'ZUNGQR', ' ', M,
+     $                    M, N, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*N+2*N*
+     $                    ILAENV( 1, 'ZGEBRD', ' ', N, N, -1, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*N+N*
+     $                    ILAENV( 1, 'ZUNGBR', 'Q', N, N, N, -1 ) )
+                  MAXWRK = N*N + WRKBL
+                  MINWRK = 2*N + M
+                  MAXWRK = MAX( MINWRK, MAXWRK )
+               ELSE IF( WNTUA .AND. WNTVO ) THEN
+*
+*                 Path 8 (M much larger than N, JOBU='A', JOBVT='O')
+*
+                  WRKBL = N + N*ILAENV( 1, 'ZGEQRF', ' ', M, N, -1, -1 )
+                  WRKBL = MAX( WRKBL, N+M*ILAENV( 1, 'ZUNGQR', ' ', M,
+     $                    M, N, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*N+2*N*
+     $                    ILAENV( 1, 'ZGEBRD', ' ', N, N, -1, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*N+N*
+     $                    ILAENV( 1, 'ZUNGBR', 'Q', N, N, N, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*N+( N-1 )*
+     $                    ILAENV( 1, 'ZUNGBR', 'P', N, N, N, -1 ) )
+                  MAXWRK = 2*N*N + WRKBL
+                  MINWRK = 2*N + M
+                  MAXWRK = MAX( MINWRK, MAXWRK )
+               ELSE IF( WNTUA .AND. WNTVAS ) THEN
+*
+*                 Path 9 (M much larger than N, JOBU='A', JOBVT='S' or
+*                 'A')
+*
+                  WRKBL = N + N*ILAENV( 1, 'ZGEQRF', ' ', M, N, -1, -1 )
+                  WRKBL = MAX( WRKBL, N+M*ILAENV( 1, 'ZUNGQR', ' ', M,
+     $                    M, N, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*N+2*N*
+     $                    ILAENV( 1, 'ZGEBRD', ' ', N, N, -1, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*N+N*
+     $                    ILAENV( 1, 'ZUNGBR', 'Q', N, N, N, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*N+( N-1 )*
+     $                    ILAENV( 1, 'ZUNGBR', 'P', N, N, N, -1 ) )
+                  MAXWRK = N*N + WRKBL
+                  MINWRK = 2*N + M
+                  MAXWRK = MAX( MINWRK, MAXWRK )
+               END IF
+            ELSE
+*
+*              Path 10 (M at least N, but not much larger)
+*
+               MAXWRK = 2*N + ( M+N )*ILAENV( 1, 'ZGEBRD', ' ', M, N,
+     $                  -1, -1 )
+               IF( WNTUS .OR. WNTUO )
+     $            MAXWRK = MAX( MAXWRK, 2*N+N*
+     $                     ILAENV( 1, 'ZUNGBR', 'Q', M, N, N, -1 ) )
+               IF( WNTUA )
+     $            MAXWRK = MAX( MAXWRK, 2*N+M*
+     $                     ILAENV( 1, 'ZUNGBR', 'Q', M, M, N, -1 ) )
+               IF( .NOT.WNTVN )
+     $            MAXWRK = MAX( MAXWRK, 2*N+( N-1 )*
+     $                     ILAENV( 1, 'ZUNGBR', 'P', N, N, N, -1 ) )
+               MINWRK = 2*N + M
+               MAXWRK = MAX( MINWRK, MAXWRK )
+            END IF
+         ELSE
+*
+*           Space needed for ZBDSQR is BDSPAC = 5*M
+*
+            IF( N.GE.MNTHR ) THEN
+               IF( WNTVN ) THEN
+*
+*                 Path 1t(N much larger than M, JOBVT='N')
+*
+                  MAXWRK = M + M*ILAENV( 1, 'ZGELQF', ' ', M, N, -1,
+     $                     -1 )
+                  MAXWRK = MAX( MAXWRK, 2*M+2*M*
+     $                     ILAENV( 1, 'ZGEBRD', ' ', M, M, -1, -1 ) )
+                  IF( WNTUO .OR. WNTUAS )
+     $               MAXWRK = MAX( MAXWRK, 2*M+M*
+     $                        ILAENV( 1, 'ZUNGBR', 'Q', M, M, M, -1 ) )
+                  MINWRK = 3*M
+                  MAXWRK = MAX( MINWRK, MAXWRK )
+               ELSE IF( WNTVO .AND. WNTUN ) THEN
+*
+*                 Path 2t(N much larger than M, JOBU='N', JOBVT='O')
+*
+                  WRKBL = M + M*ILAENV( 1, 'ZGELQF', ' ', M, N, -1, -1 )
+                  WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'ZUNGLQ', ' ', M,
+     $                    N, M, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*M+2*M*
+     $                    ILAENV( 1, 'ZGEBRD', ' ', M, M, -1, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*M+( M-1 )*
+     $                    ILAENV( 1, 'ZUNGBR', 'P', M, M, M, -1 ) )
+                  MAXWRK = MAX( M*M+WRKBL, M*M+M*N )
+                  MINWRK = 2*M + N
+                  MAXWRK = MAX( MINWRK, MAXWRK )
+               ELSE IF( WNTVO .AND. WNTUAS ) THEN
+*
+*                 Path 3t(N much larger than M, JOBU='S' or 'A',
+*                 JOBVT='O')
+*
+                  WRKBL = M + M*ILAENV( 1, 'ZGELQF', ' ', M, N, -1, -1 )
+                  WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'ZUNGLQ', ' ', M,
+     $                    N, M, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*M+2*M*
+     $                    ILAENV( 1, 'ZGEBRD', ' ', M, M, -1, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*M+( M-1 )*
+     $                    ILAENV( 1, 'ZUNGBR', 'P', M, M, M, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*M+M*
+     $                    ILAENV( 1, 'ZUNGBR', 'Q', M, M, M, -1 ) )
+                  MAXWRK = MAX( M*M+WRKBL, M*M+M*N )
+                  MINWRK = 2*M + N
+                  MAXWRK = MAX( MINWRK, MAXWRK )
+               ELSE IF( WNTVS .AND. WNTUN ) THEN
+*
+*                 Path 4t(N much larger than M, JOBU='N', JOBVT='S')
+*
+                  WRKBL = M + M*ILAENV( 1, 'ZGELQF', ' ', M, N, -1, -1 )
+                  WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'ZUNGLQ', ' ', M,
+     $                    N, M, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*M+2*M*
+     $                    ILAENV( 1, 'ZGEBRD', ' ', M, M, -1, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*M+( M-1 )*
+     $                    ILAENV( 1, 'ZUNGBR', 'P', M, M, M, -1 ) )
+                  MAXWRK = M*M + WRKBL
+                  MINWRK = 2*M + N
+                  MAXWRK = MAX( MINWRK, MAXWRK )
+               ELSE IF( WNTVS .AND. WNTUO ) THEN
+*
+*                 Path 5t(N much larger than M, JOBU='O', JOBVT='S')
+*
+                  WRKBL = M + M*ILAENV( 1, 'ZGELQF', ' ', M, N, -1, -1 )
+                  WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'ZUNGLQ', ' ', M,
+     $                    N, M, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*M+2*M*
+     $                    ILAENV( 1, 'ZGEBRD', ' ', M, M, -1, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*M+( M-1 )*
+     $                    ILAENV( 1, 'ZUNGBR', 'P', M, M, M, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*M+M*
+     $                    ILAENV( 1, 'ZUNGBR', 'Q', M, M, M, -1 ) )
+                  MAXWRK = 2*M*M + WRKBL
+                  MINWRK = 2*M + N
+                  MAXWRK = MAX( MINWRK, MAXWRK )
+               ELSE IF( WNTVS .AND. WNTUAS ) THEN
+*
+*                 Path 6t(N much larger than M, JOBU='S' or 'A',
+*                 JOBVT='S')
+*
+                  WRKBL = M + M*ILAENV( 1, 'ZGELQF', ' ', M, N, -1, -1 )
+                  WRKBL = MAX( WRKBL, M+M*ILAENV( 1, 'ZUNGLQ', ' ', M,
+     $                    N, M, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*M+2*M*
+     $                    ILAENV( 1, 'ZGEBRD', ' ', M, M, -1, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*M+( M-1 )*
+     $                    ILAENV( 1, 'ZUNGBR', 'P', M, M, M, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*M+M*
+     $                    ILAENV( 1, 'ZUNGBR', 'Q', M, M, M, -1 ) )
+                  MAXWRK = M*M + WRKBL
+                  MINWRK = 2*M + N
+                  MAXWRK = MAX( MINWRK, MAXWRK )
+               ELSE IF( WNTVA .AND. WNTUN ) THEN
+*
+*                 Path 7t(N much larger than M, JOBU='N', JOBVT='A')
+*
+                  WRKBL = M + M*ILAENV( 1, 'ZGELQF', ' ', M, N, -1, -1 )
+                  WRKBL = MAX( WRKBL, M+N*ILAENV( 1, 'ZUNGLQ', ' ', N,
+     $                    N, M, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*M+2*M*
+     $                    ILAENV( 1, 'ZGEBRD', ' ', M, M, -1, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*M+( M-1 )*
+     $                    ILAENV( 1, 'ZUNGBR', 'P', M, M, M, -1 ) )
+                  MAXWRK = M*M + WRKBL
+                  MINWRK = 2*M + N
+                  MAXWRK = MAX( MINWRK, MAXWRK )
+               ELSE IF( WNTVA .AND. WNTUO ) THEN
+*
+*                 Path 8t(N much larger than M, JOBU='O', JOBVT='A')
+*
+                  WRKBL = M + M*ILAENV( 1, 'ZGELQF', ' ', M, N, -1, -1 )
+                  WRKBL = MAX( WRKBL, M+N*ILAENV( 1, 'ZUNGLQ', ' ', N,
+     $                    N, M, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*M+2*M*
+     $                    ILAENV( 1, 'ZGEBRD', ' ', M, M, -1, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*M+( M-1 )*
+     $                    ILAENV( 1, 'ZUNGBR', 'P', M, M, M, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*M+M*
+     $                    ILAENV( 1, 'ZUNGBR', 'Q', M, M, M, -1 ) )
+                  MAXWRK = 2*M*M + WRKBL
+                  MINWRK = 2*M + N
+                  MAXWRK = MAX( MINWRK, MAXWRK )
+               ELSE IF( WNTVA .AND. WNTUAS ) THEN
+*
+*                 Path 9t(N much larger than M, JOBU='S' or 'A',
+*                 JOBVT='A')
+*
+                  WRKBL = M + M*ILAENV( 1, 'ZGELQF', ' ', M, N, -1, -1 )
+                  WRKBL = MAX( WRKBL, M+N*ILAENV( 1, 'ZUNGLQ', ' ', N,
+     $                    N, M, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*M+2*M*
+     $                    ILAENV( 1, 'ZGEBRD', ' ', M, M, -1, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*M+( M-1 )*
+     $                    ILAENV( 1, 'ZUNGBR', 'P', M, M, M, -1 ) )
+                  WRKBL = MAX( WRKBL, 2*M+M*
+     $                    ILAENV( 1, 'ZUNGBR', 'Q', M, M, M, -1 ) )
+                  MAXWRK = M*M + WRKBL
+                  MINWRK = 2*M + N
+                  MAXWRK = MAX( MINWRK, MAXWRK )
+               END IF
+            ELSE
+*
+*              Path 10t(N greater than M, but not much larger)
+*
+               MAXWRK = 2*M + ( M+N )*ILAENV( 1, 'ZGEBRD', ' ', M, N,
+     $                  -1, -1 )
+               IF( WNTVS .OR. WNTVO )
+     $            MAXWRK = MAX( MAXWRK, 2*M+M*
+     $                     ILAENV( 1, 'ZUNGBR', 'P', M, N, M, -1 ) )
+               IF( WNTVA )
+     $            MAXWRK = MAX( MAXWRK, 2*M+N*
+     $                     ILAENV( 1, 'ZUNGBR', 'P', N, N, M, -1 ) )
+               IF( .NOT.WNTUN )
+     $            MAXWRK = MAX( MAXWRK, 2*M+( M-1 )*
+     $                     ILAENV( 1, 'ZUNGBR', 'Q', M, M, M, -1 ) )
+               MINWRK = 2*M + N
+               MAXWRK = MAX( MINWRK, MAXWRK )
+            END IF
+         END IF
+         WORK( 1 ) = MAXWRK
+      END IF
+*
+      IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN
+         INFO = -13
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZGESVD', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 ) THEN
+         IF( LWORK.GE.1 )
+     $      WORK( 1 ) = ONE
+         RETURN
+      END IF
+*
+*     Get machine constants
+*
+      EPS = DLAMCH( 'P' )
+      SMLNUM = SQRT( DLAMCH( 'S' ) ) / EPS
+      BIGNUM = ONE / SMLNUM
+*
+*     Scale A if max element outside range [SMLNUM,BIGNUM]
+*
+      ANRM = ZLANGE( 'M', M, N, A, LDA, DUM )
+      ISCL = 0
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
+         ISCL = 1
+         CALL ZLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, IERR )
+      ELSE IF( ANRM.GT.BIGNUM ) THEN
+         ISCL = 1
+         CALL ZLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, IERR )
+      END IF
+*
+      IF( M.GE.N ) THEN
+*
+*        A has at least as many rows as columns. If A has sufficiently
+*        more rows than columns, first reduce using the QR
+*        decomposition (if sufficient workspace available)
+*
+         IF( M.GE.MNTHR ) THEN
+*
+            IF( WNTUN ) THEN
+*
+*              Path 1 (M much larger than N, JOBU='N')
+*              No left singular vectors to be computed
+*
+               ITAU = 1
+               IWORK = ITAU + N
+*
+*              Compute A=Q*R
+*              (CWorkspace: need 2*N, prefer N+N*NB)
+*              (RWorkspace: need 0)
+*
+               CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ),
+     $                      LWORK-IWORK+1, IERR )
+*
+*              Zero out below R
+*
+               CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO, A( 2, 1 ),
+     $                      LDA )
+               IE = 1
+               ITAUQ = 1
+               ITAUP = ITAUQ + N
+               IWORK = ITAUP + N
+*
+*              Bidiagonalize R in A
+*              (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
+*              (RWorkspace: need N)
+*
+               CALL ZGEBRD( N, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ),
+     $                      WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
+     $                      IERR )
+               NCVT = 0
+               IF( WNTVO .OR. WNTVAS ) THEN
+*
+*                 If right singular vectors desired, generate P'.
+*                 (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
+*                 (RWorkspace: 0)
+*
+                  CALL ZUNGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+                  NCVT = N
+               END IF
+               IRWORK = IE + N
+*
+*              Perform bidiagonal QR iteration, computing right
+*              singular vectors of A in A if desired
+*              (CWorkspace: 0)
+*              (RWorkspace: need BDSPAC)
+*
+               CALL ZBDSQR( 'U', N, NCVT, 0, 0, S, RWORK( IE ), A, LDA,
+     $                      CDUM, 1, CDUM, 1, RWORK( IRWORK ), INFO )
+*
+*              If right singular vectors desired in VT, copy them there
+*
+               IF( WNTVAS )
+     $            CALL ZLACPY( 'F', N, N, A, LDA, VT, LDVT )
+*
+            ELSE IF( WNTUO .AND. WNTVN ) THEN
+*
+*              Path 2 (M much larger than N, JOBU='O', JOBVT='N')
+*              N left singular vectors to be overwritten on A and
+*              no right singular vectors to be computed
+*
+               IF( LWORK.GE.N*N+3*N ) THEN
+*
+*                 Sufficient workspace for a fast algorithm
+*
+                  IR = 1
+                  IF( LWORK.GE.MAX( WRKBL, LDA*N )+LDA*N ) THEN
+*
+*                    WORK(IU) is LDA by N, WORK(IR) is LDA by N
+*
+                     LDWRKU = LDA
+                     LDWRKR = LDA
+                  ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N )+N*N ) THEN
+*
+*                    WORK(IU) is LDA by N, WORK(IR) is N by N
+*
+                     LDWRKU = LDA
+                     LDWRKR = N
+                  ELSE
+*
+*                    WORK(IU) is LDWRKU by N, WORK(IR) is N by N
+*
+                     LDWRKU = ( LWORK-N*N ) / N
+                     LDWRKR = N
+                  END IF
+                  ITAU = IR + LDWRKR*N
+                  IWORK = ITAU + N
+*
+*                 Compute A=Q*R
+*                 (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
+*                 (RWorkspace: 0)
+*
+                  CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                 Copy R to WORK(IR) and zero out below it
+*
+                  CALL ZLACPY( 'U', N, N, A, LDA, WORK( IR ), LDWRKR )
+                  CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO,
+     $                         WORK( IR+1 ), LDWRKR )
+*
+*                 Generate Q in A
+*                 (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
+*                 (RWorkspace: 0)
+*
+                  CALL ZUNGQR( M, N, N, A, LDA, WORK( ITAU ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+                  IE = 1
+                  ITAUQ = ITAU
+                  ITAUP = ITAUQ + N
+                  IWORK = ITAUP + N
+*
+*                 Bidiagonalize R in WORK(IR)
+*                 (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
+*                 (RWorkspace: need N)
+*
+                  CALL ZGEBRD( N, N, WORK( IR ), LDWRKR, S, RWORK( IE ),
+     $                         WORK( ITAUQ ), WORK( ITAUP ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                 Generate left vectors bidiagonalizing R
+*                 (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
+*                 (RWorkspace: need 0)
+*
+                  CALL ZUNGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
+     $                         WORK( ITAUQ ), WORK( IWORK ),
+     $                         LWORK-IWORK+1, IERR )
+                  IRWORK = IE + N
+*
+*                 Perform bidiagonal QR iteration, computing left
+*                 singular vectors of R in WORK(IR)
+*                 (CWorkspace: need N*N)
+*                 (RWorkspace: need BDSPAC)
+*
+                  CALL ZBDSQR( 'U', N, 0, N, 0, S, RWORK( IE ), CDUM, 1,
+     $                         WORK( IR ), LDWRKR, CDUM, 1,
+     $                         RWORK( IRWORK ), INFO )
+                  IU = ITAUQ
+*
+*                 Multiply Q in A by left singular vectors of R in
+*                 WORK(IR), storing result in WORK(IU) and copying to A
+*                 (CWorkspace: need N*N+N, prefer N*N+M*N)
+*                 (RWorkspace: 0)
+*
+                  DO 10 I = 1, M, LDWRKU
+                     CHUNK = MIN( M-I+1, LDWRKU )
+                     CALL ZGEMM( 'N', 'N', CHUNK, N, N, CONE, A( I, 1 ),
+     $                           LDA, WORK( IR ), LDWRKR, CZERO,
+     $                           WORK( IU ), LDWRKU )
+                     CALL ZLACPY( 'F', CHUNK, N, WORK( IU ), LDWRKU,
+     $                            A( I, 1 ), LDA )
+   10             CONTINUE
+*
+               ELSE
+*
+*                 Insufficient workspace for a fast algorithm
+*
+                  IE = 1
+                  ITAUQ = 1
+                  ITAUP = ITAUQ + N
+                  IWORK = ITAUP + N
+*
+*                 Bidiagonalize A
+*                 (CWorkspace: need 2*N+M, prefer 2*N+(M+N)*NB)
+*                 (RWorkspace: N)
+*
+                  CALL ZGEBRD( M, N, A, LDA, S, RWORK( IE ),
+     $                         WORK( ITAUQ ), WORK( ITAUP ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                 Generate left vectors bidiagonalizing A
+*                 (CWorkspace: need 3*N, prefer 2*N+N*NB)
+*                 (RWorkspace: 0)
+*
+                  CALL ZUNGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+                  IRWORK = IE + N
+*
+*                 Perform bidiagonal QR iteration, computing left
+*                 singular vectors of A in A
+*                 (CWorkspace: need 0)
+*                 (RWorkspace: need BDSPAC)
+*
+                  CALL ZBDSQR( 'U', N, 0, M, 0, S, RWORK( IE ), CDUM, 1,
+     $                         A, LDA, CDUM, 1, RWORK( IRWORK ), INFO )
+*
+               END IF
+*
+            ELSE IF( WNTUO .AND. WNTVAS ) THEN
+*
+*              Path 3 (M much larger than N, JOBU='O', JOBVT='S' or 'A')
+*              N left singular vectors to be overwritten on A and
+*              N right singular vectors to be computed in VT
+*
+               IF( LWORK.GE.N*N+3*N ) THEN
+*
+*                 Sufficient workspace for a fast algorithm
+*
+                  IR = 1
+                  IF( LWORK.GE.MAX( WRKBL, LDA*N )+LDA*N ) THEN
+*
+*                    WORK(IU) is LDA by N and WORK(IR) is LDA by N
+*
+                     LDWRKU = LDA
+                     LDWRKR = LDA
+                  ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N )+N*N ) THEN
+*
+*                    WORK(IU) is LDA by N and WORK(IR) is N by N
+*
+                     LDWRKU = LDA
+                     LDWRKR = N
+                  ELSE
+*
+*                    WORK(IU) is LDWRKU by N and WORK(IR) is N by N
+*
+                     LDWRKU = ( LWORK-N*N ) / N
+                     LDWRKR = N
+                  END IF
+                  ITAU = IR + LDWRKR*N
+                  IWORK = ITAU + N
+*
+*                 Compute A=Q*R
+*                 (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
+*                 (RWorkspace: 0)
+*
+                  CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                 Copy R to VT, zeroing out below it
+*
+                  CALL ZLACPY( 'U', N, N, A, LDA, VT, LDVT )
+                  CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO, VT( 2, 1 ),
+     $                         LDVT )
+*
+*                 Generate Q in A
+*                 (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
+*                 (RWorkspace: 0)
+*
+                  CALL ZUNGQR( M, N, N, A, LDA, WORK( ITAU ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+                  IE = 1
+                  ITAUQ = ITAU
+                  ITAUP = ITAUQ + N
+                  IWORK = ITAUP + N
+*
+*                 Bidiagonalize R in VT, copying result to WORK(IR)
+*                 (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
+*                 (RWorkspace: need N)
+*
+                  CALL ZGEBRD( N, N, VT, LDVT, S, RWORK( IE ),
+     $                         WORK( ITAUQ ), WORK( ITAUP ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+                  CALL ZLACPY( 'L', N, N, VT, LDVT, WORK( IR ), LDWRKR )
+*
+*                 Generate left vectors bidiagonalizing R in WORK(IR)
+*                 (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
+*                 (RWorkspace: 0)
+*
+                  CALL ZUNGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
+     $                         WORK( ITAUQ ), WORK( IWORK ),
+     $                         LWORK-IWORK+1, IERR )
+*
+*                 Generate right vectors bidiagonalizing R in VT
+*                 (CWorkspace: need N*N+3*N-1, prefer N*N+2*N+(N-1)*NB)
+*                 (RWorkspace: 0)
+*
+                  CALL ZUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+                  IRWORK = IE + N
+*
+*                 Perform bidiagonal QR iteration, computing left
+*                 singular vectors of R in WORK(IR) and computing right
+*                 singular vectors of R in VT
+*                 (CWorkspace: need N*N)
+*                 (RWorkspace: need BDSPAC)
+*
+                  CALL ZBDSQR( 'U', N, N, N, 0, S, RWORK( IE ), VT,
+     $                         LDVT, WORK( IR ), LDWRKR, CDUM, 1,
+     $                         RWORK( IRWORK ), INFO )
+                  IU = ITAUQ
+*
+*                 Multiply Q in A by left singular vectors of R in
+*                 WORK(IR), storing result in WORK(IU) and copying to A
+*                 (CWorkspace: need N*N+N, prefer N*N+M*N)
+*                 (RWorkspace: 0)
+*
+                  DO 20 I = 1, M, LDWRKU
+                     CHUNK = MIN( M-I+1, LDWRKU )
+                     CALL ZGEMM( 'N', 'N', CHUNK, N, N, CONE, A( I, 1 ),
+     $                           LDA, WORK( IR ), LDWRKR, CZERO,
+     $                           WORK( IU ), LDWRKU )
+                     CALL ZLACPY( 'F', CHUNK, N, WORK( IU ), LDWRKU,
+     $                            A( I, 1 ), LDA )
+   20             CONTINUE
+*
+               ELSE
+*
+*                 Insufficient workspace for a fast algorithm
+*
+                  ITAU = 1
+                  IWORK = ITAU + N
+*
+*                 Compute A=Q*R
+*                 (CWorkspace: need 2*N, prefer N+N*NB)
+*                 (RWorkspace: 0)
+*
+                  CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                 Copy R to VT, zeroing out below it
+*
+                  CALL ZLACPY( 'U', N, N, A, LDA, VT, LDVT )
+                  CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO, VT( 2, 1 ),
+     $                         LDVT )
+*
+*                 Generate Q in A
+*                 (CWorkspace: need 2*N, prefer N+N*NB)
+*                 (RWorkspace: 0)
+*
+                  CALL ZUNGQR( M, N, N, A, LDA, WORK( ITAU ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+                  IE = 1
+                  ITAUQ = ITAU
+                  ITAUP = ITAUQ + N
+                  IWORK = ITAUP + N
+*
+*                 Bidiagonalize R in VT
+*                 (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
+*                 (RWorkspace: N)
+*
+                  CALL ZGEBRD( N, N, VT, LDVT, S, RWORK( IE ),
+     $                         WORK( ITAUQ ), WORK( ITAUP ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                 Multiply Q in A by left vectors bidiagonalizing R
+*                 (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
+*                 (RWorkspace: 0)
+*
+                  CALL ZUNMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT,
+     $                         WORK( ITAUQ ), A, LDA, WORK( IWORK ),
+     $                         LWORK-IWORK+1, IERR )
+*
+*                 Generate right vectors bidiagonalizing R in VT
+*                 (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
+*                 (RWorkspace: 0)
+*
+                  CALL ZUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+                  IRWORK = IE + N
+*
+*                 Perform bidiagonal QR iteration, computing left
+*                 singular vectors of A in A and computing right
+*                 singular vectors of A in VT
+*                 (CWorkspace: 0)
+*                 (RWorkspace: need BDSPAC)
+*
+                  CALL ZBDSQR( 'U', N, N, M, 0, S, RWORK( IE ), VT,
+     $                         LDVT, A, LDA, CDUM, 1, RWORK( IRWORK ),
+     $                         INFO )
+*
+               END IF
+*
+            ELSE IF( WNTUS ) THEN
+*
+               IF( WNTVN ) THEN
+*
+*                 Path 4 (M much larger than N, JOBU='S', JOBVT='N')
+*                 N left singular vectors to be computed in U and
+*                 no right singular vectors to be computed
+*
+                  IF( LWORK.GE.N*N+3*N ) THEN
+*
+*                    Sufficient workspace for a fast algorithm
+*
+                     IR = 1
+                     IF( LWORK.GE.WRKBL+LDA*N ) THEN
+*
+*                       WORK(IR) is LDA by N
+*
+                        LDWRKR = LDA
+                     ELSE
+*
+*                       WORK(IR) is N by N
+*
+                        LDWRKR = N
+                     END IF
+                     ITAU = IR + LDWRKR*N
+                     IWORK = ITAU + N
+*
+*                    Compute A=Q*R
+*                    (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Copy R to WORK(IR), zeroing out below it
+*
+                     CALL ZLACPY( 'U', N, N, A, LDA, WORK( IR ),
+     $                            LDWRKR )
+                     CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO,
+     $                            WORK( IR+1 ), LDWRKR )
+*
+*                    Generate Q in A
+*                    (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL ZUNGQR( M, N, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IE = 1
+                     ITAUQ = ITAU
+                     ITAUP = ITAUQ + N
+                     IWORK = ITAUP + N
+*
+*                    Bidiagonalize R in WORK(IR)
+*                    (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
+*                    (RWorkspace: need N)
+*
+                     CALL ZGEBRD( N, N, WORK( IR ), LDWRKR, S,
+     $                            RWORK( IE ), WORK( ITAUQ ),
+     $                            WORK( ITAUP ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+*
+*                    Generate left vectors bidiagonalizing R in WORK(IR)
+*                    (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL ZUNGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
+     $                            WORK( ITAUQ ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+                     IRWORK = IE + N
+*
+*                    Perform bidiagonal QR iteration, computing left
+*                    singular vectors of R in WORK(IR)
+*                    (CWorkspace: need N*N)
+*                    (RWorkspace: need BDSPAC)
+*
+                     CALL ZBDSQR( 'U', N, 0, N, 0, S, RWORK( IE ), CDUM,
+     $                            1, WORK( IR ), LDWRKR, CDUM, 1,
+     $                            RWORK( IRWORK ), INFO )
+*
+*                    Multiply Q in A by left singular vectors of R in
+*                    WORK(IR), storing result in U
+*                    (CWorkspace: need N*N)
+*                    (RWorkspace: 0)
+*
+                     CALL ZGEMM( 'N', 'N', M, N, N, CONE, A, LDA,
+     $                           WORK( IR ), LDWRKR, CZERO, U, LDU )
+*
+                  ELSE
+*
+*                    Insufficient workspace for a fast algorithm
+*
+                     ITAU = 1
+                     IWORK = ITAU + N
+*
+*                    Compute A=Q*R, copying result to U
+*                    (CWorkspace: need 2*N, prefer N+N*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     CALL ZLACPY( 'L', M, N, A, LDA, U, LDU )
+*
+*                    Generate Q in U
+*                    (CWorkspace: need 2*N, prefer N+N*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL ZUNGQR( M, N, N, U, LDU, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IE = 1
+                     ITAUQ = ITAU
+                     ITAUP = ITAUQ + N
+                     IWORK = ITAUP + N
+*
+*                    Zero out below R in A
+*
+                     CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO,
+     $                            A( 2, 1 ), LDA )
+*
+*                    Bidiagonalize R in A
+*                    (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
+*                    (RWorkspace: need N)
+*
+                     CALL ZGEBRD( N, N, A, LDA, S, RWORK( IE ),
+     $                            WORK( ITAUQ ), WORK( ITAUP ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Multiply Q in U by left vectors bidiagonalizing R
+*                    (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL ZUNMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
+     $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+                     IRWORK = IE + N
+*
+*                    Perform bidiagonal QR iteration, computing left
+*                    singular vectors of A in U
+*                    (CWorkspace: 0)
+*                    (RWorkspace: need BDSPAC)
+*
+                     CALL ZBDSQR( 'U', N, 0, M, 0, S, RWORK( IE ), CDUM,
+     $                            1, U, LDU, CDUM, 1, RWORK( IRWORK ),
+     $                            INFO )
+*
+                  END IF
+*
+               ELSE IF( WNTVO ) THEN
+*
+*                 Path 5 (M much larger than N, JOBU='S', JOBVT='O')
+*                 N left singular vectors to be computed in U and
+*                 N right singular vectors to be overwritten on A
+*
+                  IF( LWORK.GE.2*N*N+3*N ) THEN
+*
+*                    Sufficient workspace for a fast algorithm
+*
+                     IU = 1
+                     IF( LWORK.GE.WRKBL+2*LDA*N ) THEN
+*
+*                       WORK(IU) is LDA by N and WORK(IR) is LDA by N
+*
+                        LDWRKU = LDA
+                        IR = IU + LDWRKU*N
+                        LDWRKR = LDA
+                     ELSE IF( LWORK.GE.WRKBL+( LDA+N )*N ) THEN
+*
+*                       WORK(IU) is LDA by N and WORK(IR) is N by N
+*
+                        LDWRKU = LDA
+                        IR = IU + LDWRKU*N
+                        LDWRKR = N
+                     ELSE
+*
+*                       WORK(IU) is N by N and WORK(IR) is N by N
+*
+                        LDWRKU = N
+                        IR = IU + LDWRKU*N
+                        LDWRKR = N
+                     END IF
+                     ITAU = IR + LDWRKR*N
+                     IWORK = ITAU + N
+*
+*                    Compute A=Q*R
+*                    (CWorkspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Copy R to WORK(IU), zeroing out below it
+*
+                     CALL ZLACPY( 'U', N, N, A, LDA, WORK( IU ),
+     $                            LDWRKU )
+                     CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO,
+     $                            WORK( IU+1 ), LDWRKU )
+*
+*                    Generate Q in A
+*                    (CWorkspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL ZUNGQR( M, N, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IE = 1
+                     ITAUQ = ITAU
+                     ITAUP = ITAUQ + N
+                     IWORK = ITAUP + N
+*
+*                    Bidiagonalize R in WORK(IU), copying result to
+*                    WORK(IR)
+*                    (CWorkspace: need   2*N*N+3*N,
+*                                 prefer 2*N*N+2*N+2*N*NB)
+*                    (RWorkspace: need   N)
+*
+                     CALL ZGEBRD( N, N, WORK( IU ), LDWRKU, S,
+     $                            RWORK( IE ), WORK( ITAUQ ),
+     $                            WORK( ITAUP ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+                     CALL ZLACPY( 'U', N, N, WORK( IU ), LDWRKU,
+     $                            WORK( IR ), LDWRKR )
+*
+*                    Generate left bidiagonalizing vectors in WORK(IU)
+*                    (CWorkspace: need 2*N*N+3*N, prefer 2*N*N+2*N+N*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL ZUNGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
+     $                            WORK( ITAUQ ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+*
+*                    Generate right bidiagonalizing vectors in WORK(IR)
+*                    (CWorkspace: need   2*N*N+3*N-1,
+*                                 prefer 2*N*N+2*N+(N-1)*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL ZUNGBR( 'P', N, N, N, WORK( IR ), LDWRKR,
+     $                            WORK( ITAUP ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+                     IRWORK = IE + N
+*
+*                    Perform bidiagonal QR iteration, computing left
+*                    singular vectors of R in WORK(IU) and computing
+*                    right singular vectors of R in WORK(IR)
+*                    (CWorkspace: need 2*N*N)
+*                    (RWorkspace: need BDSPAC)
+*
+                     CALL ZBDSQR( 'U', N, N, N, 0, S, RWORK( IE ),
+     $                            WORK( IR ), LDWRKR, WORK( IU ),
+     $                            LDWRKU, CDUM, 1, RWORK( IRWORK ),
+     $                            INFO )
+*
+*                    Multiply Q in A by left singular vectors of R in
+*                    WORK(IU), storing result in U
+*                    (CWorkspace: need N*N)
+*                    (RWorkspace: 0)
+*
+                     CALL ZGEMM( 'N', 'N', M, N, N, CONE, A, LDA,
+     $                           WORK( IU ), LDWRKU, CZERO, U, LDU )
+*
+*                    Copy right singular vectors of R to A
+*                    (CWorkspace: need N*N)
+*                    (RWorkspace: 0)
+*
+                     CALL ZLACPY( 'F', N, N, WORK( IR ), LDWRKR, A,
+     $                            LDA )
+*
+                  ELSE
+*
+*                    Insufficient workspace for a fast algorithm
+*
+                     ITAU = 1
+                     IWORK = ITAU + N
+*
+*                    Compute A=Q*R, copying result to U
+*                    (CWorkspace: need 2*N, prefer N+N*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     CALL ZLACPY( 'L', M, N, A, LDA, U, LDU )
+*
+*                    Generate Q in U
+*                    (CWorkspace: need 2*N, prefer N+N*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL ZUNGQR( M, N, N, U, LDU, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IE = 1
+                     ITAUQ = ITAU
+                     ITAUP = ITAUQ + N
+                     IWORK = ITAUP + N
+*
+*                    Zero out below R in A
+*
+                     CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO,
+     $                            A( 2, 1 ), LDA )
+*
+*                    Bidiagonalize R in A
+*                    (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
+*                    (RWorkspace: need N)
+*
+                     CALL ZGEBRD( N, N, A, LDA, S, RWORK( IE ),
+     $                            WORK( ITAUQ ), WORK( ITAUP ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Multiply Q in U by left vectors bidiagonalizing R
+*                    (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL ZUNMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
+     $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+*
+*                    Generate right vectors bidiagonalizing R in A
+*                    (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL ZUNGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IRWORK = IE + N
+*
+*                    Perform bidiagonal QR iteration, computing left
+*                    singular vectors of A in U and computing right
+*                    singular vectors of A in A
+*                    (CWorkspace: 0)
+*                    (RWorkspace: need BDSPAC)
+*
+                     CALL ZBDSQR( 'U', N, N, M, 0, S, RWORK( IE ), A,
+     $                            LDA, U, LDU, CDUM, 1, RWORK( IRWORK ),
+     $                            INFO )
+*
+                  END IF
+*
+               ELSE IF( WNTVAS ) THEN
+*
+*                 Path 6 (M much larger than N, JOBU='S', JOBVT='S'
+*                         or 'A')
+*                 N left singular vectors to be computed in U and
+*                 N right singular vectors to be computed in VT
+*
+                  IF( LWORK.GE.N*N+3*N ) THEN
+*
+*                    Sufficient workspace for a fast algorithm
+*
+                     IU = 1
+                     IF( LWORK.GE.WRKBL+LDA*N ) THEN
+*
+*                       WORK(IU) is LDA by N
+*
+                        LDWRKU = LDA
+                     ELSE
+*
+*                       WORK(IU) is N by N
+*
+                        LDWRKU = N
+                     END IF
+                     ITAU = IU + LDWRKU*N
+                     IWORK = ITAU + N
+*
+*                    Compute A=Q*R
+*                    (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Copy R to WORK(IU), zeroing out below it
+*
+                     CALL ZLACPY( 'U', N, N, A, LDA, WORK( IU ),
+     $                            LDWRKU )
+                     CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO,
+     $                            WORK( IU+1 ), LDWRKU )
+*
+*                    Generate Q in A
+*                    (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL ZUNGQR( M, N, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IE = 1
+                     ITAUQ = ITAU
+                     ITAUP = ITAUQ + N
+                     IWORK = ITAUP + N
+*
+*                    Bidiagonalize R in WORK(IU), copying result to VT
+*                    (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
+*                    (RWorkspace: need N)
+*
+                     CALL ZGEBRD( N, N, WORK( IU ), LDWRKU, S,
+     $                            RWORK( IE ), WORK( ITAUQ ),
+     $                            WORK( ITAUP ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+                     CALL ZLACPY( 'U', N, N, WORK( IU ), LDWRKU, VT,
+     $                            LDVT )
+*
+*                    Generate left bidiagonalizing vectors in WORK(IU)
+*                    (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL ZUNGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
+     $                            WORK( ITAUQ ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+*
+*                    Generate right bidiagonalizing vectors in VT
+*                    (CWorkspace: need   N*N+3*N-1,
+*                                 prefer N*N+2*N+(N-1)*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL ZUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IRWORK = IE + N
+*
+*                    Perform bidiagonal QR iteration, computing left
+*                    singular vectors of R in WORK(IU) and computing
+*                    right singular vectors of R in VT
+*                    (CWorkspace: need N*N)
+*                    (RWorkspace: need BDSPAC)
+*
+                     CALL ZBDSQR( 'U', N, N, N, 0, S, RWORK( IE ), VT,
+     $                            LDVT, WORK( IU ), LDWRKU, CDUM, 1,
+     $                            RWORK( IRWORK ), INFO )
+*
+*                    Multiply Q in A by left singular vectors of R in
+*                    WORK(IU), storing result in U
+*                    (CWorkspace: need N*N)
+*                    (RWorkspace: 0)
+*
+                     CALL ZGEMM( 'N', 'N', M, N, N, CONE, A, LDA,
+     $                           WORK( IU ), LDWRKU, CZERO, U, LDU )
+*
+                  ELSE
+*
+*                    Insufficient workspace for a fast algorithm
+*
+                     ITAU = 1
+                     IWORK = ITAU + N
+*
+*                    Compute A=Q*R, copying result to U
+*                    (CWorkspace: need 2*N, prefer N+N*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     CALL ZLACPY( 'L', M, N, A, LDA, U, LDU )
+*
+*                    Generate Q in U
+*                    (CWorkspace: need 2*N, prefer N+N*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL ZUNGQR( M, N, N, U, LDU, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Copy R to VT, zeroing out below it
+*
+                     CALL ZLACPY( 'U', N, N, A, LDA, VT, LDVT )
+                     CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO,
+     $                            VT( 2, 1 ), LDVT )
+                     IE = 1
+                     ITAUQ = ITAU
+                     ITAUP = ITAUQ + N
+                     IWORK = ITAUP + N
+*
+*                    Bidiagonalize R in VT
+*                    (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
+*                    (RWorkspace: need N)
+*
+                     CALL ZGEBRD( N, N, VT, LDVT, S, RWORK( IE ),
+     $                            WORK( ITAUQ ), WORK( ITAUP ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Multiply Q in U by left bidiagonalizing vectors
+*                    in VT
+*                    (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL ZUNMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT,
+     $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+*
+*                    Generate right bidiagonalizing vectors in VT
+*                    (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL ZUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IRWORK = IE + N
+*
+*                    Perform bidiagonal QR iteration, computing left
+*                    singular vectors of A in U and computing right
+*                    singular vectors of A in VT
+*                    (CWorkspace: 0)
+*                    (RWorkspace: need BDSPAC)
+*
+                     CALL ZBDSQR( 'U', N, N, M, 0, S, RWORK( IE ), VT,
+     $                            LDVT, U, LDU, CDUM, 1,
+     $                            RWORK( IRWORK ), INFO )
+*
+                  END IF
+*
+               END IF
+*
+            ELSE IF( WNTUA ) THEN
+*
+               IF( WNTVN ) THEN
+*
+*                 Path 7 (M much larger than N, JOBU='A', JOBVT='N')
+*                 M left singular vectors to be computed in U and
+*                 no right singular vectors to be computed
+*
+                  IF( LWORK.GE.N*N+MAX( N+M, 3*N ) ) THEN
+*
+*                    Sufficient workspace for a fast algorithm
+*
+                     IR = 1
+                     IF( LWORK.GE.WRKBL+LDA*N ) THEN
+*
+*                       WORK(IR) is LDA by N
+*
+                        LDWRKR = LDA
+                     ELSE
+*
+*                       WORK(IR) is N by N
+*
+                        LDWRKR = N
+                     END IF
+                     ITAU = IR + LDWRKR*N
+                     IWORK = ITAU + N
+*
+*                    Compute A=Q*R, copying result to U
+*                    (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     CALL ZLACPY( 'L', M, N, A, LDA, U, LDU )
+*
+*                    Copy R to WORK(IR), zeroing out below it
+*
+                     CALL ZLACPY( 'U', N, N, A, LDA, WORK( IR ),
+     $                            LDWRKR )
+                     CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO,
+     $                            WORK( IR+1 ), LDWRKR )
+*
+*                    Generate Q in U
+*                    (CWorkspace: need N*N+N+M, prefer N*N+N+M*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL ZUNGQR( M, M, N, U, LDU, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IE = 1
+                     ITAUQ = ITAU
+                     ITAUP = ITAUQ + N
+                     IWORK = ITAUP + N
+*
+*                    Bidiagonalize R in WORK(IR)
+*                    (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
+*                    (RWorkspace: need N)
+*
+                     CALL ZGEBRD( N, N, WORK( IR ), LDWRKR, S,
+     $                            RWORK( IE ), WORK( ITAUQ ),
+     $                            WORK( ITAUP ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+*
+*                    Generate left bidiagonalizing vectors in WORK(IR)
+*                    (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL ZUNGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
+     $                            WORK( ITAUQ ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+                     IRWORK = IE + N
+*
+*                    Perform bidiagonal QR iteration, computing left
+*                    singular vectors of R in WORK(IR)
+*                    (CWorkspace: need N*N)
+*                    (RWorkspace: need BDSPAC)
+*
+                     CALL ZBDSQR( 'U', N, 0, N, 0, S, RWORK( IE ), CDUM,
+     $                            1, WORK( IR ), LDWRKR, CDUM, 1,
+     $                            RWORK( IRWORK ), INFO )
+*
+*                    Multiply Q in U by left singular vectors of R in
+*                    WORK(IR), storing result in A
+*                    (CWorkspace: need N*N)
+*                    (RWorkspace: 0)
+*
+                     CALL ZGEMM( 'N', 'N', M, N, N, CONE, U, LDU,
+     $                           WORK( IR ), LDWRKR, CZERO, A, LDA )
+*
+*                    Copy left singular vectors of A from A to U
+*
+                     CALL ZLACPY( 'F', M, N, A, LDA, U, LDU )
+*
+                  ELSE
+*
+*                    Insufficient workspace for a fast algorithm
+*
+                     ITAU = 1
+                     IWORK = ITAU + N
+*
+*                    Compute A=Q*R, copying result to U
+*                    (CWorkspace: need 2*N, prefer N+N*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     CALL ZLACPY( 'L', M, N, A, LDA, U, LDU )
+*
+*                    Generate Q in U
+*                    (CWorkspace: need N+M, prefer N+M*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL ZUNGQR( M, M, N, U, LDU, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IE = 1
+                     ITAUQ = ITAU
+                     ITAUP = ITAUQ + N
+                     IWORK = ITAUP + N
+*
+*                    Zero out below R in A
+*
+                     CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO,
+     $                            A( 2, 1 ), LDA )
+*
+*                    Bidiagonalize R in A
+*                    (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
+*                    (RWorkspace: need N)
+*
+                     CALL ZGEBRD( N, N, A, LDA, S, RWORK( IE ),
+     $                            WORK( ITAUQ ), WORK( ITAUP ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Multiply Q in U by left bidiagonalizing vectors
+*                    in A
+*                    (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL ZUNMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
+     $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+                     IRWORK = IE + N
+*
+*                    Perform bidiagonal QR iteration, computing left
+*                    singular vectors of A in U
+*                    (CWorkspace: 0)
+*                    (RWorkspace: need BDSPAC)
+*
+                     CALL ZBDSQR( 'U', N, 0, M, 0, S, RWORK( IE ), CDUM,
+     $                            1, U, LDU, CDUM, 1, RWORK( IRWORK ),
+     $                            INFO )
+*
+                  END IF
+*
+               ELSE IF( WNTVO ) THEN
+*
+*                 Path 8 (M much larger than N, JOBU='A', JOBVT='O')
+*                 M left singular vectors to be computed in U and
+*                 N right singular vectors to be overwritten on A
+*
+                  IF( LWORK.GE.2*N*N+MAX( N+M, 3*N ) ) THEN
+*
+*                    Sufficient workspace for a fast algorithm
+*
+                     IU = 1
+                     IF( LWORK.GE.WRKBL+2*LDA*N ) THEN
+*
+*                       WORK(IU) is LDA by N and WORK(IR) is LDA by N
+*
+                        LDWRKU = LDA
+                        IR = IU + LDWRKU*N
+                        LDWRKR = LDA
+                     ELSE IF( LWORK.GE.WRKBL+( LDA+N )*N ) THEN
+*
+*                       WORK(IU) is LDA by N and WORK(IR) is N by N
+*
+                        LDWRKU = LDA
+                        IR = IU + LDWRKU*N
+                        LDWRKR = N
+                     ELSE
+*
+*                       WORK(IU) is N by N and WORK(IR) is N by N
+*
+                        LDWRKU = N
+                        IR = IU + LDWRKU*N
+                        LDWRKR = N
+                     END IF
+                     ITAU = IR + LDWRKR*N
+                     IWORK = ITAU + N
+*
+*                    Compute A=Q*R, copying result to U
+*                    (CWorkspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     CALL ZLACPY( 'L', M, N, A, LDA, U, LDU )
+*
+*                    Generate Q in U
+*                    (CWorkspace: need 2*N*N+N+M, prefer 2*N*N+N+M*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL ZUNGQR( M, M, N, U, LDU, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Copy R to WORK(IU), zeroing out below it
+*
+                     CALL ZLACPY( 'U', N, N, A, LDA, WORK( IU ),
+     $                            LDWRKU )
+                     CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO,
+     $                            WORK( IU+1 ), LDWRKU )
+                     IE = 1
+                     ITAUQ = ITAU
+                     ITAUP = ITAUQ + N
+                     IWORK = ITAUP + N
+*
+*                    Bidiagonalize R in WORK(IU), copying result to
+*                    WORK(IR)
+*                    (CWorkspace: need   2*N*N+3*N,
+*                                 prefer 2*N*N+2*N+2*N*NB)
+*                    (RWorkspace: need   N)
+*
+                     CALL ZGEBRD( N, N, WORK( IU ), LDWRKU, S,
+     $                            RWORK( IE ), WORK( ITAUQ ),
+     $                            WORK( ITAUP ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+                     CALL ZLACPY( 'U', N, N, WORK( IU ), LDWRKU,
+     $                            WORK( IR ), LDWRKR )
+*
+*                    Generate left bidiagonalizing vectors in WORK(IU)
+*                    (CWorkspace: need 2*N*N+3*N, prefer 2*N*N+2*N+N*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL ZUNGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
+     $                            WORK( ITAUQ ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+*
+*                    Generate right bidiagonalizing vectors in WORK(IR)
+*                    (CWorkspace: need   2*N*N+3*N-1,
+*                                 prefer 2*N*N+2*N+(N-1)*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL ZUNGBR( 'P', N, N, N, WORK( IR ), LDWRKR,
+     $                            WORK( ITAUP ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+                     IRWORK = IE + N
+*
+*                    Perform bidiagonal QR iteration, computing left
+*                    singular vectors of R in WORK(IU) and computing
+*                    right singular vectors of R in WORK(IR)
+*                    (CWorkspace: need 2*N*N)
+*                    (RWorkspace: need BDSPAC)
+*
+                     CALL ZBDSQR( 'U', N, N, N, 0, S, RWORK( IE ),
+     $                            WORK( IR ), LDWRKR, WORK( IU ),
+     $                            LDWRKU, CDUM, 1, RWORK( IRWORK ),
+     $                            INFO )
+*
+*                    Multiply Q in U by left singular vectors of R in
+*                    WORK(IU), storing result in A
+*                    (CWorkspace: need N*N)
+*                    (RWorkspace: 0)
+*
+                     CALL ZGEMM( 'N', 'N', M, N, N, CONE, U, LDU,
+     $                           WORK( IU ), LDWRKU, CZERO, A, LDA )
+*
+*                    Copy left singular vectors of A from A to U
+*
+                     CALL ZLACPY( 'F', M, N, A, LDA, U, LDU )
+*
+*                    Copy right singular vectors of R from WORK(IR) to A
+*
+                     CALL ZLACPY( 'F', N, N, WORK( IR ), LDWRKR, A,
+     $                            LDA )
+*
+                  ELSE
+*
+*                    Insufficient workspace for a fast algorithm
+*
+                     ITAU = 1
+                     IWORK = ITAU + N
+*
+*                    Compute A=Q*R, copying result to U
+*                    (CWorkspace: need 2*N, prefer N+N*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     CALL ZLACPY( 'L', M, N, A, LDA, U, LDU )
+*
+*                    Generate Q in U
+*                    (CWorkspace: need N+M, prefer N+M*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL ZUNGQR( M, M, N, U, LDU, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IE = 1
+                     ITAUQ = ITAU
+                     ITAUP = ITAUQ + N
+                     IWORK = ITAUP + N
+*
+*                    Zero out below R in A
+*
+                     CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO,
+     $                            A( 2, 1 ), LDA )
+*
+*                    Bidiagonalize R in A
+*                    (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
+*                    (RWorkspace: need N)
+*
+                     CALL ZGEBRD( N, N, A, LDA, S, RWORK( IE ),
+     $                            WORK( ITAUQ ), WORK( ITAUP ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Multiply Q in U by left bidiagonalizing vectors
+*                    in A
+*                    (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL ZUNMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
+     $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+*
+*                    Generate right bidiagonalizing vectors in A
+*                    (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL ZUNGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IRWORK = IE + N
+*
+*                    Perform bidiagonal QR iteration, computing left
+*                    singular vectors of A in U and computing right
+*                    singular vectors of A in A
+*                    (CWorkspace: 0)
+*                    (RWorkspace: need BDSPAC)
+*
+                     CALL ZBDSQR( 'U', N, N, M, 0, S, RWORK( IE ), A,
+     $                            LDA, U, LDU, CDUM, 1, RWORK( IRWORK ),
+     $                            INFO )
+*
+                  END IF
+*
+               ELSE IF( WNTVAS ) THEN
+*
+*                 Path 9 (M much larger than N, JOBU='A', JOBVT='S'
+*                         or 'A')
+*                 M left singular vectors to be computed in U and
+*                 N right singular vectors to be computed in VT
+*
+                  IF( LWORK.GE.N*N+MAX( N+M, 3*N ) ) THEN
+*
+*                    Sufficient workspace for a fast algorithm
+*
+                     IU = 1
+                     IF( LWORK.GE.WRKBL+LDA*N ) THEN
+*
+*                       WORK(IU) is LDA by N
+*
+                        LDWRKU = LDA
+                     ELSE
+*
+*                       WORK(IU) is N by N
+*
+                        LDWRKU = N
+                     END IF
+                     ITAU = IU + LDWRKU*N
+                     IWORK = ITAU + N
+*
+*                    Compute A=Q*R, copying result to U
+*                    (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     CALL ZLACPY( 'L', M, N, A, LDA, U, LDU )
+*
+*                    Generate Q in U
+*                    (CWorkspace: need N*N+N+M, prefer N*N+N+M*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL ZUNGQR( M, M, N, U, LDU, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Copy R to WORK(IU), zeroing out below it
+*
+                     CALL ZLACPY( 'U', N, N, A, LDA, WORK( IU ),
+     $                            LDWRKU )
+                     CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO,
+     $                            WORK( IU+1 ), LDWRKU )
+                     IE = 1
+                     ITAUQ = ITAU
+                     ITAUP = ITAUQ + N
+                     IWORK = ITAUP + N
+*
+*                    Bidiagonalize R in WORK(IU), copying result to VT
+*                    (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
+*                    (RWorkspace: need N)
+*
+                     CALL ZGEBRD( N, N, WORK( IU ), LDWRKU, S,
+     $                            RWORK( IE ), WORK( ITAUQ ),
+     $                            WORK( ITAUP ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+                     CALL ZLACPY( 'U', N, N, WORK( IU ), LDWRKU, VT,
+     $                            LDVT )
+*
+*                    Generate left bidiagonalizing vectors in WORK(IU)
+*                    (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL ZUNGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
+     $                            WORK( ITAUQ ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+*
+*                    Generate right bidiagonalizing vectors in VT
+*                    (CWorkspace: need   N*N+3*N-1,
+*                                 prefer N*N+2*N+(N-1)*NB)
+*                    (RWorkspace: need   0)
+*
+                     CALL ZUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IRWORK = IE + N
+*
+*                    Perform bidiagonal QR iteration, computing left
+*                    singular vectors of R in WORK(IU) and computing
+*                    right singular vectors of R in VT
+*                    (CWorkspace: need N*N)
+*                    (RWorkspace: need BDSPAC)
+*
+                     CALL ZBDSQR( 'U', N, N, N, 0, S, RWORK( IE ), VT,
+     $                            LDVT, WORK( IU ), LDWRKU, CDUM, 1,
+     $                            RWORK( IRWORK ), INFO )
+*
+*                    Multiply Q in U by left singular vectors of R in
+*                    WORK(IU), storing result in A
+*                    (CWorkspace: need N*N)
+*                    (RWorkspace: 0)
+*
+                     CALL ZGEMM( 'N', 'N', M, N, N, CONE, U, LDU,
+     $                           WORK( IU ), LDWRKU, CZERO, A, LDA )
+*
+*                    Copy left singular vectors of A from A to U
+*
+                     CALL ZLACPY( 'F', M, N, A, LDA, U, LDU )
+*
+                  ELSE
+*
+*                    Insufficient workspace for a fast algorithm
+*
+                     ITAU = 1
+                     IWORK = ITAU + N
+*
+*                    Compute A=Q*R, copying result to U
+*                    (CWorkspace: need 2*N, prefer N+N*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL ZGEQRF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     CALL ZLACPY( 'L', M, N, A, LDA, U, LDU )
+*
+*                    Generate Q in U
+*                    (CWorkspace: need N+M, prefer N+M*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL ZUNGQR( M, M, N, U, LDU, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Copy R from A to VT, zeroing out below it
+*
+                     CALL ZLACPY( 'U', N, N, A, LDA, VT, LDVT )
+                     CALL ZLASET( 'L', N-1, N-1, CZERO, CZERO,
+     $                            VT( 2, 1 ), LDVT )
+                     IE = 1
+                     ITAUQ = ITAU
+                     ITAUP = ITAUQ + N
+                     IWORK = ITAUP + N
+*
+*                    Bidiagonalize R in VT
+*                    (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
+*                    (RWorkspace: need N)
+*
+                     CALL ZGEBRD( N, N, VT, LDVT, S, RWORK( IE ),
+     $                            WORK( ITAUQ ), WORK( ITAUP ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Multiply Q in U by left bidiagonalizing vectors
+*                    in VT
+*                    (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL ZUNMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT,
+     $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+*
+*                    Generate right bidiagonalizing vectors in VT
+*                    (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL ZUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IRWORK = IE + N
+*
+*                    Perform bidiagonal QR iteration, computing left
+*                    singular vectors of A in U and computing right
+*                    singular vectors of A in VT
+*                    (CWorkspace: 0)
+*                    (RWorkspace: need BDSPAC)
+*
+                     CALL ZBDSQR( 'U', N, N, M, 0, S, RWORK( IE ), VT,
+     $                            LDVT, U, LDU, CDUM, 1,
+     $                            RWORK( IRWORK ), INFO )
+*
+                  END IF
+*
+               END IF
+*
+            END IF
+*
+         ELSE
+*
+*           M .LT. MNTHR
+*
+*           Path 10 (M at least N, but not much larger)
+*           Reduce to bidiagonal form without QR decomposition
+*
+            IE = 1
+            ITAUQ = 1
+            ITAUP = ITAUQ + N
+            IWORK = ITAUP + N
+*
+*           Bidiagonalize A
+*           (CWorkspace: need 2*N+M, prefer 2*N+(M+N)*NB)
+*           (RWorkspace: need N)
+*
+            CALL ZGEBRD( M, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ),
+     $                   WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
+     $                   IERR )
+            IF( WNTUAS ) THEN
+*
+*              If left singular vectors desired in U, copy result to U
+*              and generate left bidiagonalizing vectors in U
+*              (CWorkspace: need 2*N+NCU, prefer 2*N+NCU*NB)
+*              (RWorkspace: 0)
+*
+               CALL ZLACPY( 'L', M, N, A, LDA, U, LDU )
+               IF( WNTUS )
+     $            NCU = N
+               IF( WNTUA )
+     $            NCU = M
+               CALL ZUNGBR( 'Q', M, NCU, N, U, LDU, WORK( ITAUQ ),
+     $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
+            END IF
+            IF( WNTVAS ) THEN
+*
+*              If right singular vectors desired in VT, copy result to
+*              VT and generate right bidiagonalizing vectors in VT
+*              (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
+*              (RWorkspace: 0)
+*
+               CALL ZLACPY( 'U', N, N, A, LDA, VT, LDVT )
+               CALL ZUNGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
+     $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
+            END IF
+            IF( WNTUO ) THEN
+*
+*              If left singular vectors desired in A, generate left
+*              bidiagonalizing vectors in A
+*              (CWorkspace: need 3*N, prefer 2*N+N*NB)
+*              (RWorkspace: 0)
+*
+               CALL ZUNGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ),
+     $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
+            END IF
+            IF( WNTVO ) THEN
+*
+*              If right singular vectors desired in A, generate right
+*              bidiagonalizing vectors in A
+*              (CWorkspace: need 3*N-1, prefer 2*N+(N-1)*NB)
+*              (RWorkspace: 0)
+*
+               CALL ZUNGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
+     $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
+            END IF
+            IRWORK = IE + N
+            IF( WNTUAS .OR. WNTUO )
+     $         NRU = M
+            IF( WNTUN )
+     $         NRU = 0
+            IF( WNTVAS .OR. WNTVO )
+     $         NCVT = N
+            IF( WNTVN )
+     $         NCVT = 0
+            IF( ( .NOT.WNTUO ) .AND. ( .NOT.WNTVO ) ) THEN
+*
+*              Perform bidiagonal QR iteration, if desired, computing
+*              left singular vectors in U and computing right singular
+*              vectors in VT
+*              (CWorkspace: 0)
+*              (RWorkspace: need BDSPAC)
+*
+               CALL ZBDSQR( 'U', N, NCVT, NRU, 0, S, RWORK( IE ), VT,
+     $                      LDVT, U, LDU, CDUM, 1, RWORK( IRWORK ),
+     $                      INFO )
+            ELSE IF( ( .NOT.WNTUO ) .AND. WNTVO ) THEN
+*
+*              Perform bidiagonal QR iteration, if desired, computing
+*              left singular vectors in U and computing right singular
+*              vectors in A
+*              (CWorkspace: 0)
+*              (RWorkspace: need BDSPAC)
+*
+               CALL ZBDSQR( 'U', N, NCVT, NRU, 0, S, RWORK( IE ), A,
+     $                      LDA, U, LDU, CDUM, 1, RWORK( IRWORK ),
+     $                      INFO )
+            ELSE
+*
+*              Perform bidiagonal QR iteration, if desired, computing
+*              left singular vectors in A and computing right singular
+*              vectors in VT
+*              (CWorkspace: 0)
+*              (RWorkspace: need BDSPAC)
+*
+               CALL ZBDSQR( 'U', N, NCVT, NRU, 0, S, RWORK( IE ), VT,
+     $                      LDVT, A, LDA, CDUM, 1, RWORK( IRWORK ),
+     $                      INFO )
+            END IF
+*
+         END IF
+*
+      ELSE
+*
+*        A has more columns than rows. If A has sufficiently more
+*        columns than rows, first reduce using the LQ decomposition (if
+*        sufficient workspace available)
+*
+         IF( N.GE.MNTHR ) THEN
+*
+            IF( WNTVN ) THEN
+*
+*              Path 1t(N much larger than M, JOBVT='N')
+*              No right singular vectors to be computed
+*
+               ITAU = 1
+               IWORK = ITAU + M
+*
+*              Compute A=L*Q
+*              (CWorkspace: need 2*M, prefer M+M*NB)
+*              (RWorkspace: 0)
+*
+               CALL ZGELQF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ),
+     $                      LWORK-IWORK+1, IERR )
+*
+*              Zero out above L
+*
+               CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO, A( 1, 2 ),
+     $                      LDA )
+               IE = 1
+               ITAUQ = 1
+               ITAUP = ITAUQ + M
+               IWORK = ITAUP + M
+*
+*              Bidiagonalize L in A
+*              (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
+*              (RWorkspace: need M)
+*
+               CALL ZGEBRD( M, M, A, LDA, S, RWORK( IE ), WORK( ITAUQ ),
+     $                      WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
+     $                      IERR )
+               IF( WNTUO .OR. WNTUAS ) THEN
+*
+*                 If left singular vectors desired, generate Q
+*                 (CWorkspace: need 3*M, prefer 2*M+M*NB)
+*                 (RWorkspace: 0)
+*
+                  CALL ZUNGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+               END IF
+               IRWORK = IE + M
+               NRU = 0
+               IF( WNTUO .OR. WNTUAS )
+     $            NRU = M
+*
+*              Perform bidiagonal QR iteration, computing left singular
+*              vectors of A in A if desired
+*              (CWorkspace: 0)
+*              (RWorkspace: need BDSPAC)
+*
+               CALL ZBDSQR( 'U', M, 0, NRU, 0, S, RWORK( IE ), CDUM, 1,
+     $                      A, LDA, CDUM, 1, RWORK( IRWORK ), INFO )
+*
+*              If left singular vectors desired in U, copy them there
+*
+               IF( WNTUAS )
+     $            CALL ZLACPY( 'F', M, M, A, LDA, U, LDU )
+*
+            ELSE IF( WNTVO .AND. WNTUN ) THEN
+*
+*              Path 2t(N much larger than M, JOBU='N', JOBVT='O')
+*              M right singular vectors to be overwritten on A and
+*              no left singular vectors to be computed
+*
+               IF( LWORK.GE.M*M+3*M ) THEN
+*
+*                 Sufficient workspace for a fast algorithm
+*
+                  IR = 1
+                  IF( LWORK.GE.MAX( WRKBL, LDA*N )+LDA*M ) THEN
+*
+*                    WORK(IU) is LDA by N and WORK(IR) is LDA by M
+*
+                     LDWRKU = LDA
+                     CHUNK = N
+                     LDWRKR = LDA
+                  ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N )+M*M ) THEN
+*
+*                    WORK(IU) is LDA by N and WORK(IR) is M by M
+*
+                     LDWRKU = LDA
+                     CHUNK = N
+                     LDWRKR = M
+                  ELSE
+*
+*                    WORK(IU) is M by CHUNK and WORK(IR) is M by M
+*
+                     LDWRKU = M
+                     CHUNK = ( LWORK-M*M ) / M
+                     LDWRKR = M
+                  END IF
+                  ITAU = IR + LDWRKR*M
+                  IWORK = ITAU + M
+*
+*                 Compute A=L*Q
+*                 (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
+*                 (RWorkspace: 0)
+*
+                  CALL ZGELQF( M, N, A, LDA, WORK( ITAU ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                 Copy L to WORK(IR) and zero out above it
+*
+                  CALL ZLACPY( 'L', M, M, A, LDA, WORK( IR ), LDWRKR )
+                  CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO,
+     $                         WORK( IR+LDWRKR ), LDWRKR )
+*
+*                 Generate Q in A
+*                 (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
+*                 (RWorkspace: 0)
+*
+                  CALL ZUNGLQ( M, N, M, A, LDA, WORK( ITAU ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+                  IE = 1
+                  ITAUQ = ITAU
+                  ITAUP = ITAUQ + M
+                  IWORK = ITAUP + M
+*
+*                 Bidiagonalize L in WORK(IR)
+*                 (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
+*                 (RWorkspace: need M)
+*
+                  CALL ZGEBRD( M, M, WORK( IR ), LDWRKR, S, RWORK( IE ),
+     $                         WORK( ITAUQ ), WORK( ITAUP ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                 Generate right vectors bidiagonalizing L
+*                 (CWorkspace: need M*M+3*M-1, prefer M*M+2*M+(M-1)*NB)
+*                 (RWorkspace: 0)
+*
+                  CALL ZUNGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
+     $                         WORK( ITAUP ), WORK( IWORK ),
+     $                         LWORK-IWORK+1, IERR )
+                  IRWORK = IE + M
+*
+*                 Perform bidiagonal QR iteration, computing right
+*                 singular vectors of L in WORK(IR)
+*                 (CWorkspace: need M*M)
+*                 (RWorkspace: need BDSPAC)
+*
+                  CALL ZBDSQR( 'U', M, M, 0, 0, S, RWORK( IE ),
+     $                         WORK( IR ), LDWRKR, CDUM, 1, CDUM, 1,
+     $                         RWORK( IRWORK ), INFO )
+                  IU = ITAUQ
+*
+*                 Multiply right singular vectors of L in WORK(IR) by Q
+*                 in A, storing result in WORK(IU) and copying to A
+*                 (CWorkspace: need M*M+M, prefer M*M+M*N)
+*                 (RWorkspace: 0)
+*
+                  DO 30 I = 1, N, CHUNK
+                     BLK = MIN( N-I+1, CHUNK )
+                     CALL ZGEMM( 'N', 'N', M, BLK, M, CONE, WORK( IR ),
+     $                           LDWRKR, A( 1, I ), LDA, CZERO,
+     $                           WORK( IU ), LDWRKU )
+                     CALL ZLACPY( 'F', M, BLK, WORK( IU ), LDWRKU,
+     $                            A( 1, I ), LDA )
+   30             CONTINUE
+*
+               ELSE
+*
+*                 Insufficient workspace for a fast algorithm
+*
+                  IE = 1
+                  ITAUQ = 1
+                  ITAUP = ITAUQ + M
+                  IWORK = ITAUP + M
+*
+*                 Bidiagonalize A
+*                 (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB)
+*                 (RWorkspace: need M)
+*
+                  CALL ZGEBRD( M, N, A, LDA, S, RWORK( IE ),
+     $                         WORK( ITAUQ ), WORK( ITAUP ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                 Generate right vectors bidiagonalizing A
+*                 (CWorkspace: need 3*M, prefer 2*M+M*NB)
+*                 (RWorkspace: 0)
+*
+                  CALL ZUNGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+                  IRWORK = IE + M
+*
+*                 Perform bidiagonal QR iteration, computing right
+*                 singular vectors of A in A
+*                 (CWorkspace: 0)
+*                 (RWorkspace: need BDSPAC)
+*
+                  CALL ZBDSQR( 'L', M, N, 0, 0, S, RWORK( IE ), A, LDA,
+     $                         CDUM, 1, CDUM, 1, RWORK( IRWORK ), INFO )
+*
+               END IF
+*
+            ELSE IF( WNTVO .AND. WNTUAS ) THEN
+*
+*              Path 3t(N much larger than M, JOBU='S' or 'A', JOBVT='O')
+*              M right singular vectors to be overwritten on A and
+*              M left singular vectors to be computed in U
+*
+               IF( LWORK.GE.M*M+3*M ) THEN
+*
+*                 Sufficient workspace for a fast algorithm
+*
+                  IR = 1
+                  IF( LWORK.GE.MAX( WRKBL, LDA*N )+LDA*M ) THEN
+*
+*                    WORK(IU) is LDA by N and WORK(IR) is LDA by M
+*
+                     LDWRKU = LDA
+                     CHUNK = N
+                     LDWRKR = LDA
+                  ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N )+M*M ) THEN
+*
+*                    WORK(IU) is LDA by N and WORK(IR) is M by M
+*
+                     LDWRKU = LDA
+                     CHUNK = N
+                     LDWRKR = M
+                  ELSE
+*
+*                    WORK(IU) is M by CHUNK and WORK(IR) is M by M
+*
+                     LDWRKU = M
+                     CHUNK = ( LWORK-M*M ) / M
+                     LDWRKR = M
+                  END IF
+                  ITAU = IR + LDWRKR*M
+                  IWORK = ITAU + M
+*
+*                 Compute A=L*Q
+*                 (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
+*                 (RWorkspace: 0)
+*
+                  CALL ZGELQF( M, N, A, LDA, WORK( ITAU ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                 Copy L to U, zeroing about above it
+*
+                  CALL ZLACPY( 'L', M, M, A, LDA, U, LDU )
+                  CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO, U( 1, 2 ),
+     $                         LDU )
+*
+*                 Generate Q in A
+*                 (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
+*                 (RWorkspace: 0)
+*
+                  CALL ZUNGLQ( M, N, M, A, LDA, WORK( ITAU ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+                  IE = 1
+                  ITAUQ = ITAU
+                  ITAUP = ITAUQ + M
+                  IWORK = ITAUP + M
+*
+*                 Bidiagonalize L in U, copying result to WORK(IR)
+*                 (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
+*                 (RWorkspace: need M)
+*
+                  CALL ZGEBRD( M, M, U, LDU, S, RWORK( IE ),
+     $                         WORK( ITAUQ ), WORK( ITAUP ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+                  CALL ZLACPY( 'U', M, M, U, LDU, WORK( IR ), LDWRKR )
+*
+*                 Generate right vectors bidiagonalizing L in WORK(IR)
+*                 (CWorkspace: need M*M+3*M-1, prefer M*M+2*M+(M-1)*NB)
+*                 (RWorkspace: 0)
+*
+                  CALL ZUNGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
+     $                         WORK( ITAUP ), WORK( IWORK ),
+     $                         LWORK-IWORK+1, IERR )
+*
+*                 Generate left vectors bidiagonalizing L in U
+*                 (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB)
+*                 (RWorkspace: 0)
+*
+                  CALL ZUNGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+                  IRWORK = IE + M
+*
+*                 Perform bidiagonal QR iteration, computing left
+*                 singular vectors of L in U, and computing right
+*                 singular vectors of L in WORK(IR)
+*                 (CWorkspace: need M*M)
+*                 (RWorkspace: need BDSPAC)
+*
+                  CALL ZBDSQR( 'U', M, M, M, 0, S, RWORK( IE ),
+     $                         WORK( IR ), LDWRKR, U, LDU, CDUM, 1,
+     $                         RWORK( IRWORK ), INFO )
+                  IU = ITAUQ
+*
+*                 Multiply right singular vectors of L in WORK(IR) by Q
+*                 in A, storing result in WORK(IU) and copying to A
+*                 (CWorkspace: need M*M+M, prefer M*M+M*N))
+*                 (RWorkspace: 0)
+*
+                  DO 40 I = 1, N, CHUNK
+                     BLK = MIN( N-I+1, CHUNK )
+                     CALL ZGEMM( 'N', 'N', M, BLK, M, CONE, WORK( IR ),
+     $                           LDWRKR, A( 1, I ), LDA, CZERO,
+     $                           WORK( IU ), LDWRKU )
+                     CALL ZLACPY( 'F', M, BLK, WORK( IU ), LDWRKU,
+     $                            A( 1, I ), LDA )
+   40             CONTINUE
+*
+               ELSE
+*
+*                 Insufficient workspace for a fast algorithm
+*
+                  ITAU = 1
+                  IWORK = ITAU + M
+*
+*                 Compute A=L*Q
+*                 (CWorkspace: need 2*M, prefer M+M*NB)
+*                 (RWorkspace: 0)
+*
+                  CALL ZGELQF( M, N, A, LDA, WORK( ITAU ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                 Copy L to U, zeroing out above it
+*
+                  CALL ZLACPY( 'L', M, M, A, LDA, U, LDU )
+                  CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO, U( 1, 2 ),
+     $                         LDU )
+*
+*                 Generate Q in A
+*                 (CWorkspace: need 2*M, prefer M+M*NB)
+*                 (RWorkspace: 0)
+*
+                  CALL ZUNGLQ( M, N, M, A, LDA, WORK( ITAU ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+                  IE = 1
+                  ITAUQ = ITAU
+                  ITAUP = ITAUQ + M
+                  IWORK = ITAUP + M
+*
+*                 Bidiagonalize L in U
+*                 (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
+*                 (RWorkspace: need M)
+*
+                  CALL ZGEBRD( M, M, U, LDU, S, RWORK( IE ),
+     $                         WORK( ITAUQ ), WORK( ITAUP ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                 Multiply right vectors bidiagonalizing L by Q in A
+*                 (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
+*                 (RWorkspace: 0)
+*
+                  CALL ZUNMBR( 'P', 'L', 'C', M, N, M, U, LDU,
+     $                         WORK( ITAUP ), A, LDA, WORK( IWORK ),
+     $                         LWORK-IWORK+1, IERR )
+*
+*                 Generate left vectors bidiagonalizing L in U
+*                 (CWorkspace: need 3*M, prefer 2*M+M*NB)
+*                 (RWorkspace: 0)
+*
+                  CALL ZUNGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
+     $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
+                  IRWORK = IE + M
+*
+*                 Perform bidiagonal QR iteration, computing left
+*                 singular vectors of A in U and computing right
+*                 singular vectors of A in A
+*                 (CWorkspace: 0)
+*                 (RWorkspace: need BDSPAC)
+*
+                  CALL ZBDSQR( 'U', M, N, M, 0, S, RWORK( IE ), A, LDA,
+     $                         U, LDU, CDUM, 1, RWORK( IRWORK ), INFO )
+*
+               END IF
+*
+            ELSE IF( WNTVS ) THEN
+*
+               IF( WNTUN ) THEN
+*
+*                 Path 4t(N much larger than M, JOBU='N', JOBVT='S')
+*                 M right singular vectors to be computed in VT and
+*                 no left singular vectors to be computed
+*
+                  IF( LWORK.GE.M*M+3*M ) THEN
+*
+*                    Sufficient workspace for a fast algorithm
+*
+                     IR = 1
+                     IF( LWORK.GE.WRKBL+LDA*M ) THEN
+*
+*                       WORK(IR) is LDA by M
+*
+                        LDWRKR = LDA
+                     ELSE
+*
+*                       WORK(IR) is M by M
+*
+                        LDWRKR = M
+                     END IF
+                     ITAU = IR + LDWRKR*M
+                     IWORK = ITAU + M
+*
+*                    Compute A=L*Q
+*                    (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL ZGELQF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Copy L to WORK(IR), zeroing out above it
+*
+                     CALL ZLACPY( 'L', M, M, A, LDA, WORK( IR ),
+     $                            LDWRKR )
+                     CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO,
+     $                            WORK( IR+LDWRKR ), LDWRKR )
+*
+*                    Generate Q in A
+*                    (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL ZUNGLQ( M, N, M, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IE = 1
+                     ITAUQ = ITAU
+                     ITAUP = ITAUQ + M
+                     IWORK = ITAUP + M
+*
+*                    Bidiagonalize L in WORK(IR)
+*                    (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
+*                    (RWorkspace: need M)
+*
+                     CALL ZGEBRD( M, M, WORK( IR ), LDWRKR, S,
+     $                            RWORK( IE ), WORK( ITAUQ ),
+     $                            WORK( ITAUP ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+*
+*                    Generate right vectors bidiagonalizing L in
+*                    WORK(IR)
+*                    (CWorkspace: need M*M+3*M, prefer M*M+2*M+(M-1)*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL ZUNGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
+     $                            WORK( ITAUP ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+                     IRWORK = IE + M
+*
+*                    Perform bidiagonal QR iteration, computing right
+*                    singular vectors of L in WORK(IR)
+*                    (CWorkspace: need M*M)
+*                    (RWorkspace: need BDSPAC)
+*
+                     CALL ZBDSQR( 'U', M, M, 0, 0, S, RWORK( IE ),
+     $                            WORK( IR ), LDWRKR, CDUM, 1, CDUM, 1,
+     $                            RWORK( IRWORK ), INFO )
+*
+*                    Multiply right singular vectors of L in WORK(IR) by
+*                    Q in A, storing result in VT
+*                    (CWorkspace: need M*M)
+*                    (RWorkspace: 0)
+*
+                     CALL ZGEMM( 'N', 'N', M, N, M, CONE, WORK( IR ),
+     $                           LDWRKR, A, LDA, CZERO, VT, LDVT )
+*
+                  ELSE
+*
+*                    Insufficient workspace for a fast algorithm
+*
+                     ITAU = 1
+                     IWORK = ITAU + M
+*
+*                    Compute A=L*Q
+*                    (CWorkspace: need 2*M, prefer M+M*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL ZGELQF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Copy result to VT
+*
+                     CALL ZLACPY( 'U', M, N, A, LDA, VT, LDVT )
+*
+*                    Generate Q in VT
+*                    (CWorkspace: need 2*M, prefer M+M*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL ZUNGLQ( M, N, M, VT, LDVT, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IE = 1
+                     ITAUQ = ITAU
+                     ITAUP = ITAUQ + M
+                     IWORK = ITAUP + M
+*
+*                    Zero out above L in A
+*
+                     CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO,
+     $                            A( 1, 2 ), LDA )
+*
+*                    Bidiagonalize L in A
+*                    (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
+*                    (RWorkspace: need M)
+*
+                     CALL ZGEBRD( M, M, A, LDA, S, RWORK( IE ),
+     $                            WORK( ITAUQ ), WORK( ITAUP ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Multiply right vectors bidiagonalizing L by Q in VT
+*                    (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL ZUNMBR( 'P', 'L', 'C', M, N, M, A, LDA,
+     $                            WORK( ITAUP ), VT, LDVT,
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IRWORK = IE + M
+*
+*                    Perform bidiagonal QR iteration, computing right
+*                    singular vectors of A in VT
+*                    (CWorkspace: 0)
+*                    (RWorkspace: need BDSPAC)
+*
+                     CALL ZBDSQR( 'U', M, N, 0, 0, S, RWORK( IE ), VT,
+     $                            LDVT, CDUM, 1, CDUM, 1,
+     $                            RWORK( IRWORK ), INFO )
+*
+                  END IF
+*
+               ELSE IF( WNTUO ) THEN
+*
+*                 Path 5t(N much larger than M, JOBU='O', JOBVT='S')
+*                 M right singular vectors to be computed in VT and
+*                 M left singular vectors to be overwritten on A
+*
+                  IF( LWORK.GE.2*M*M+3*M ) THEN
+*
+*                    Sufficient workspace for a fast algorithm
+*
+                     IU = 1
+                     IF( LWORK.GE.WRKBL+2*LDA*M ) THEN
+*
+*                       WORK(IU) is LDA by M and WORK(IR) is LDA by M
+*
+                        LDWRKU = LDA
+                        IR = IU + LDWRKU*M
+                        LDWRKR = LDA
+                     ELSE IF( LWORK.GE.WRKBL+( LDA+M )*M ) THEN
+*
+*                       WORK(IU) is LDA by M and WORK(IR) is M by M
+*
+                        LDWRKU = LDA
+                        IR = IU + LDWRKU*M
+                        LDWRKR = M
+                     ELSE
+*
+*                       WORK(IU) is M by M and WORK(IR) is M by M
+*
+                        LDWRKU = M
+                        IR = IU + LDWRKU*M
+                        LDWRKR = M
+                     END IF
+                     ITAU = IR + LDWRKR*M
+                     IWORK = ITAU + M
+*
+*                    Compute A=L*Q
+*                    (CWorkspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL ZGELQF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Copy L to WORK(IU), zeroing out below it
+*
+                     CALL ZLACPY( 'L', M, M, A, LDA, WORK( IU ),
+     $                            LDWRKU )
+                     CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO,
+     $                            WORK( IU+LDWRKU ), LDWRKU )
+*
+*                    Generate Q in A
+*                    (CWorkspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL ZUNGLQ( M, N, M, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IE = 1
+                     ITAUQ = ITAU
+                     ITAUP = ITAUQ + M
+                     IWORK = ITAUP + M
+*
+*                    Bidiagonalize L in WORK(IU), copying result to
+*                    WORK(IR)
+*                    (CWorkspace: need   2*M*M+3*M,
+*                                 prefer 2*M*M+2*M+2*M*NB)
+*                    (RWorkspace: need   M)
+*
+                     CALL ZGEBRD( M, M, WORK( IU ), LDWRKU, S,
+     $                            RWORK( IE ), WORK( ITAUQ ),
+     $                            WORK( ITAUP ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+                     CALL ZLACPY( 'L', M, M, WORK( IU ), LDWRKU,
+     $                            WORK( IR ), LDWRKR )
+*
+*                    Generate right bidiagonalizing vectors in WORK(IU)
+*                    (CWorkspace: need   2*M*M+3*M-1,
+*                                 prefer 2*M*M+2*M+(M-1)*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL ZUNGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
+     $                            WORK( ITAUP ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+*
+*                    Generate left bidiagonalizing vectors in WORK(IR)
+*                    (CWorkspace: need 2*M*M+3*M, prefer 2*M*M+2*M+M*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL ZUNGBR( 'Q', M, M, M, WORK( IR ), LDWRKR,
+     $                            WORK( ITAUQ ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+                     IRWORK = IE + M
+*
+*                    Perform bidiagonal QR iteration, computing left
+*                    singular vectors of L in WORK(IR) and computing
+*                    right singular vectors of L in WORK(IU)
+*                    (CWorkspace: need 2*M*M)
+*                    (RWorkspace: need BDSPAC)
+*
+                     CALL ZBDSQR( 'U', M, M, M, 0, S, RWORK( IE ),
+     $                            WORK( IU ), LDWRKU, WORK( IR ),
+     $                            LDWRKR, CDUM, 1, RWORK( IRWORK ),
+     $                            INFO )
+*
+*                    Multiply right singular vectors of L in WORK(IU) by
+*                    Q in A, storing result in VT
+*                    (CWorkspace: need M*M)
+*                    (RWorkspace: 0)
+*
+                     CALL ZGEMM( 'N', 'N', M, N, M, CONE, WORK( IU ),
+     $                           LDWRKU, A, LDA, CZERO, VT, LDVT )
+*
+*                    Copy left singular vectors of L to A
+*                    (CWorkspace: need M*M)
+*                    (RWorkspace: 0)
+*
+                     CALL ZLACPY( 'F', M, M, WORK( IR ), LDWRKR, A,
+     $                            LDA )
+*
+                  ELSE
+*
+*                    Insufficient workspace for a fast algorithm
+*
+                     ITAU = 1
+                     IWORK = ITAU + M
+*
+*                    Compute A=L*Q, copying result to VT
+*                    (CWorkspace: need 2*M, prefer M+M*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL ZGELQF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     CALL ZLACPY( 'U', M, N, A, LDA, VT, LDVT )
+*
+*                    Generate Q in VT
+*                    (CWorkspace: need 2*M, prefer M+M*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL ZUNGLQ( M, N, M, VT, LDVT, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IE = 1
+                     ITAUQ = ITAU
+                     ITAUP = ITAUQ + M
+                     IWORK = ITAUP + M
+*
+*                    Zero out above L in A
+*
+                     CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO,
+     $                            A( 1, 2 ), LDA )
+*
+*                    Bidiagonalize L in A
+*                    (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
+*                    (RWorkspace: need M)
+*
+                     CALL ZGEBRD( M, M, A, LDA, S, RWORK( IE ),
+     $                            WORK( ITAUQ ), WORK( ITAUP ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Multiply right vectors bidiagonalizing L by Q in VT
+*                    (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL ZUNMBR( 'P', 'L', 'C', M, N, M, A, LDA,
+     $                            WORK( ITAUP ), VT, LDVT,
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Generate left bidiagonalizing vectors of L in A
+*                    (CWorkspace: need 3*M, prefer 2*M+M*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL ZUNGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IRWORK = IE + M
+*
+*                    Perform bidiagonal QR iteration, computing left
+*                    singular vectors of A in A and computing right
+*                    singular vectors of A in VT
+*                    (CWorkspace: 0)
+*                    (RWorkspace: need BDSPAC)
+*
+                     CALL ZBDSQR( 'U', M, N, M, 0, S, RWORK( IE ), VT,
+     $                            LDVT, A, LDA, CDUM, 1,
+     $                            RWORK( IRWORK ), INFO )
+*
+                  END IF
+*
+               ELSE IF( WNTUAS ) THEN
+*
+*                 Path 6t(N much larger than M, JOBU='S' or 'A',
+*                         JOBVT='S')
+*                 M right singular vectors to be computed in VT and
+*                 M left singular vectors to be computed in U
+*
+                  IF( LWORK.GE.M*M+3*M ) THEN
+*
+*                    Sufficient workspace for a fast algorithm
+*
+                     IU = 1
+                     IF( LWORK.GE.WRKBL+LDA*M ) THEN
+*
+*                       WORK(IU) is LDA by N
+*
+                        LDWRKU = LDA
+                     ELSE
+*
+*                       WORK(IU) is LDA by M
+*
+                        LDWRKU = M
+                     END IF
+                     ITAU = IU + LDWRKU*M
+                     IWORK = ITAU + M
+*
+*                    Compute A=L*Q
+*                    (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL ZGELQF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Copy L to WORK(IU), zeroing out above it
+*
+                     CALL ZLACPY( 'L', M, M, A, LDA, WORK( IU ),
+     $                            LDWRKU )
+                     CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO,
+     $                            WORK( IU+LDWRKU ), LDWRKU )
+*
+*                    Generate Q in A
+*                    (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL ZUNGLQ( M, N, M, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IE = 1
+                     ITAUQ = ITAU
+                     ITAUP = ITAUQ + M
+                     IWORK = ITAUP + M
+*
+*                    Bidiagonalize L in WORK(IU), copying result to U
+*                    (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
+*                    (RWorkspace: need M)
+*
+                     CALL ZGEBRD( M, M, WORK( IU ), LDWRKU, S,
+     $                            RWORK( IE ), WORK( ITAUQ ),
+     $                            WORK( ITAUP ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+                     CALL ZLACPY( 'L', M, M, WORK( IU ), LDWRKU, U,
+     $                            LDU )
+*
+*                    Generate right bidiagonalizing vectors in WORK(IU)
+*                    (CWorkspace: need   M*M+3*M-1,
+*                                 prefer M*M+2*M+(M-1)*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL ZUNGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
+     $                            WORK( ITAUP ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+*
+*                    Generate left bidiagonalizing vectors in U
+*                    (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL ZUNGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IRWORK = IE + M
+*
+*                    Perform bidiagonal QR iteration, computing left
+*                    singular vectors of L in U and computing right
+*                    singular vectors of L in WORK(IU)
+*                    (CWorkspace: need M*M)
+*                    (RWorkspace: need BDSPAC)
+*
+                     CALL ZBDSQR( 'U', M, M, M, 0, S, RWORK( IE ),
+     $                            WORK( IU ), LDWRKU, U, LDU, CDUM, 1,
+     $                            RWORK( IRWORK ), INFO )
+*
+*                    Multiply right singular vectors of L in WORK(IU) by
+*                    Q in A, storing result in VT
+*                    (CWorkspace: need M*M)
+*                    (RWorkspace: 0)
+*
+                     CALL ZGEMM( 'N', 'N', M, N, M, CONE, WORK( IU ),
+     $                           LDWRKU, A, LDA, CZERO, VT, LDVT )
+*
+                  ELSE
+*
+*                    Insufficient workspace for a fast algorithm
+*
+                     ITAU = 1
+                     IWORK = ITAU + M
+*
+*                    Compute A=L*Q, copying result to VT
+*                    (CWorkspace: need 2*M, prefer M+M*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL ZGELQF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     CALL ZLACPY( 'U', M, N, A, LDA, VT, LDVT )
+*
+*                    Generate Q in VT
+*                    (CWorkspace: need 2*M, prefer M+M*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL ZUNGLQ( M, N, M, VT, LDVT, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Copy L to U, zeroing out above it
+*
+                     CALL ZLACPY( 'L', M, M, A, LDA, U, LDU )
+                     CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO,
+     $                            U( 1, 2 ), LDU )
+                     IE = 1
+                     ITAUQ = ITAU
+                     ITAUP = ITAUQ + M
+                     IWORK = ITAUP + M
+*
+*                    Bidiagonalize L in U
+*                    (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
+*                    (RWorkspace: need M)
+*
+                     CALL ZGEBRD( M, M, U, LDU, S, RWORK( IE ),
+     $                            WORK( ITAUQ ), WORK( ITAUP ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Multiply right bidiagonalizing vectors in U by Q
+*                    in VT
+*                    (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL ZUNMBR( 'P', 'L', 'C', M, N, M, U, LDU,
+     $                            WORK( ITAUP ), VT, LDVT,
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Generate left bidiagonalizing vectors in U
+*                    (CWorkspace: need 3*M, prefer 2*M+M*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL ZUNGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IRWORK = IE + M
+*
+*                    Perform bidiagonal QR iteration, computing left
+*                    singular vectors of A in U and computing right
+*                    singular vectors of A in VT
+*                    (CWorkspace: 0)
+*                    (RWorkspace: need BDSPAC)
+*
+                     CALL ZBDSQR( 'U', M, N, M, 0, S, RWORK( IE ), VT,
+     $                            LDVT, U, LDU, CDUM, 1,
+     $                            RWORK( IRWORK ), INFO )
+*
+                  END IF
+*
+               END IF
+*
+            ELSE IF( WNTVA ) THEN
+*
+               IF( WNTUN ) THEN
+*
+*                 Path 7t(N much larger than M, JOBU='N', JOBVT='A')
+*                 N right singular vectors to be computed in VT and
+*                 no left singular vectors to be computed
+*
+                  IF( LWORK.GE.M*M+MAX( N+M, 3*M ) ) THEN
+*
+*                    Sufficient workspace for a fast algorithm
+*
+                     IR = 1
+                     IF( LWORK.GE.WRKBL+LDA*M ) THEN
+*
+*                       WORK(IR) is LDA by M
+*
+                        LDWRKR = LDA
+                     ELSE
+*
+*                       WORK(IR) is M by M
+*
+                        LDWRKR = M
+                     END IF
+                     ITAU = IR + LDWRKR*M
+                     IWORK = ITAU + M
+*
+*                    Compute A=L*Q, copying result to VT
+*                    (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL ZGELQF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     CALL ZLACPY( 'U', M, N, A, LDA, VT, LDVT )
+*
+*                    Copy L to WORK(IR), zeroing out above it
+*
+                     CALL ZLACPY( 'L', M, M, A, LDA, WORK( IR ),
+     $                            LDWRKR )
+                     CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO,
+     $                            WORK( IR+LDWRKR ), LDWRKR )
+*
+*                    Generate Q in VT
+*                    (CWorkspace: need M*M+M+N, prefer M*M+M+N*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL ZUNGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IE = 1
+                     ITAUQ = ITAU
+                     ITAUP = ITAUQ + M
+                     IWORK = ITAUP + M
+*
+*                    Bidiagonalize L in WORK(IR)
+*                    (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
+*                    (RWorkspace: need M)
+*
+                     CALL ZGEBRD( M, M, WORK( IR ), LDWRKR, S,
+     $                            RWORK( IE ), WORK( ITAUQ ),
+     $                            WORK( ITAUP ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+*
+*                    Generate right bidiagonalizing vectors in WORK(IR)
+*                    (CWorkspace: need   M*M+3*M-1,
+*                                 prefer M*M+2*M+(M-1)*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL ZUNGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
+     $                            WORK( ITAUP ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+                     IRWORK = IE + M
+*
+*                    Perform bidiagonal QR iteration, computing right
+*                    singular vectors of L in WORK(IR)
+*                    (CWorkspace: need M*M)
+*                    (RWorkspace: need BDSPAC)
+*
+                     CALL ZBDSQR( 'U', M, M, 0, 0, S, RWORK( IE ),
+     $                            WORK( IR ), LDWRKR, CDUM, 1, CDUM, 1,
+     $                            RWORK( IRWORK ), INFO )
+*
+*                    Multiply right singular vectors of L in WORK(IR) by
+*                    Q in VT, storing result in A
+*                    (CWorkspace: need M*M)
+*                    (RWorkspace: 0)
+*
+                     CALL ZGEMM( 'N', 'N', M, N, M, CONE, WORK( IR ),
+     $                           LDWRKR, VT, LDVT, CZERO, A, LDA )
+*
+*                    Copy right singular vectors of A from A to VT
+*
+                     CALL ZLACPY( 'F', M, N, A, LDA, VT, LDVT )
+*
+                  ELSE
+*
+*                    Insufficient workspace for a fast algorithm
+*
+                     ITAU = 1
+                     IWORK = ITAU + M
+*
+*                    Compute A=L*Q, copying result to VT
+*                    (CWorkspace: need 2*M, prefer M+M*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL ZGELQF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     CALL ZLACPY( 'U', M, N, A, LDA, VT, LDVT )
+*
+*                    Generate Q in VT
+*                    (CWorkspace: need M+N, prefer M+N*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL ZUNGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IE = 1
+                     ITAUQ = ITAU
+                     ITAUP = ITAUQ + M
+                     IWORK = ITAUP + M
+*
+*                    Zero out above L in A
+*
+                     CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO,
+     $                            A( 1, 2 ), LDA )
+*
+*                    Bidiagonalize L in A
+*                    (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
+*                    (RWorkspace: need M)
+*
+                     CALL ZGEBRD( M, M, A, LDA, S, RWORK( IE ),
+     $                            WORK( ITAUQ ), WORK( ITAUP ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Multiply right bidiagonalizing vectors in A by Q
+*                    in VT
+*                    (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL ZUNMBR( 'P', 'L', 'C', M, N, M, A, LDA,
+     $                            WORK( ITAUP ), VT, LDVT,
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IRWORK = IE + M
+*
+*                    Perform bidiagonal QR iteration, computing right
+*                    singular vectors of A in VT
+*                    (CWorkspace: 0)
+*                    (RWorkspace: need BDSPAC)
+*
+                     CALL ZBDSQR( 'U', M, N, 0, 0, S, RWORK( IE ), VT,
+     $                            LDVT, CDUM, 1, CDUM, 1,
+     $                            RWORK( IRWORK ), INFO )
+*
+                  END IF
+*
+               ELSE IF( WNTUO ) THEN
+*
+*                 Path 8t(N much larger than M, JOBU='O', JOBVT='A')
+*                 N right singular vectors to be computed in VT and
+*                 M left singular vectors to be overwritten on A
+*
+                  IF( LWORK.GE.2*M*M+MAX( N+M, 3*M ) ) THEN
+*
+*                    Sufficient workspace for a fast algorithm
+*
+                     IU = 1
+                     IF( LWORK.GE.WRKBL+2*LDA*M ) THEN
+*
+*                       WORK(IU) is LDA by M and WORK(IR) is LDA by M
+*
+                        LDWRKU = LDA
+                        IR = IU + LDWRKU*M
+                        LDWRKR = LDA
+                     ELSE IF( LWORK.GE.WRKBL+( LDA+M )*M ) THEN
+*
+*                       WORK(IU) is LDA by M and WORK(IR) is M by M
+*
+                        LDWRKU = LDA
+                        IR = IU + LDWRKU*M
+                        LDWRKR = M
+                     ELSE
+*
+*                       WORK(IU) is M by M and WORK(IR) is M by M
+*
+                        LDWRKU = M
+                        IR = IU + LDWRKU*M
+                        LDWRKR = M
+                     END IF
+                     ITAU = IR + LDWRKR*M
+                     IWORK = ITAU + M
+*
+*                    Compute A=L*Q, copying result to VT
+*                    (CWorkspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL ZGELQF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     CALL ZLACPY( 'U', M, N, A, LDA, VT, LDVT )
+*
+*                    Generate Q in VT
+*                    (CWorkspace: need 2*M*M+M+N, prefer 2*M*M+M+N*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL ZUNGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Copy L to WORK(IU), zeroing out above it
+*
+                     CALL ZLACPY( 'L', M, M, A, LDA, WORK( IU ),
+     $                            LDWRKU )
+                     CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO,
+     $                            WORK( IU+LDWRKU ), LDWRKU )
+                     IE = 1
+                     ITAUQ = ITAU
+                     ITAUP = ITAUQ + M
+                     IWORK = ITAUP + M
+*
+*                    Bidiagonalize L in WORK(IU), copying result to
+*                    WORK(IR)
+*                    (CWorkspace: need   2*M*M+3*M,
+*                                 prefer 2*M*M+2*M+2*M*NB)
+*                    (RWorkspace: need   M)
+*
+                     CALL ZGEBRD( M, M, WORK( IU ), LDWRKU, S,
+     $                            RWORK( IE ), WORK( ITAUQ ),
+     $                            WORK( ITAUP ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+                     CALL ZLACPY( 'L', M, M, WORK( IU ), LDWRKU,
+     $                            WORK( IR ), LDWRKR )
+*
+*                    Generate right bidiagonalizing vectors in WORK(IU)
+*                    (CWorkspace: need   2*M*M+3*M-1,
+*                                 prefer 2*M*M+2*M+(M-1)*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL ZUNGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
+     $                            WORK( ITAUP ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+*
+*                    Generate left bidiagonalizing vectors in WORK(IR)
+*                    (CWorkspace: need 2*M*M+3*M, prefer 2*M*M+2*M+M*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL ZUNGBR( 'Q', M, M, M, WORK( IR ), LDWRKR,
+     $                            WORK( ITAUQ ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+                     IRWORK = IE + M
+*
+*                    Perform bidiagonal QR iteration, computing left
+*                    singular vectors of L in WORK(IR) and computing
+*                    right singular vectors of L in WORK(IU)
+*                    (CWorkspace: need 2*M*M)
+*                    (RWorkspace: need BDSPAC)
+*
+                     CALL ZBDSQR( 'U', M, M, M, 0, S, RWORK( IE ),
+     $                            WORK( IU ), LDWRKU, WORK( IR ),
+     $                            LDWRKR, CDUM, 1, RWORK( IRWORK ),
+     $                            INFO )
+*
+*                    Multiply right singular vectors of L in WORK(IU) by
+*                    Q in VT, storing result in A
+*                    (CWorkspace: need M*M)
+*                    (RWorkspace: 0)
+*
+                     CALL ZGEMM( 'N', 'N', M, N, M, CONE, WORK( IU ),
+     $                           LDWRKU, VT, LDVT, CZERO, A, LDA )
+*
+*                    Copy right singular vectors of A from A to VT
+*
+                     CALL ZLACPY( 'F', M, N, A, LDA, VT, LDVT )
+*
+*                    Copy left singular vectors of A from WORK(IR) to A
+*
+                     CALL ZLACPY( 'F', M, M, WORK( IR ), LDWRKR, A,
+     $                            LDA )
+*
+                  ELSE
+*
+*                    Insufficient workspace for a fast algorithm
+*
+                     ITAU = 1
+                     IWORK = ITAU + M
+*
+*                    Compute A=L*Q, copying result to VT
+*                    (CWorkspace: need 2*M, prefer M+M*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL ZGELQF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     CALL ZLACPY( 'U', M, N, A, LDA, VT, LDVT )
+*
+*                    Generate Q in VT
+*                    (CWorkspace: need M+N, prefer M+N*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL ZUNGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IE = 1
+                     ITAUQ = ITAU
+                     ITAUP = ITAUQ + M
+                     IWORK = ITAUP + M
+*
+*                    Zero out above L in A
+*
+                     CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO,
+     $                            A( 1, 2 ), LDA )
+*
+*                    Bidiagonalize L in A
+*                    (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
+*                    (RWorkspace: need M)
+*
+                     CALL ZGEBRD( M, M, A, LDA, S, RWORK( IE ),
+     $                            WORK( ITAUQ ), WORK( ITAUP ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Multiply right bidiagonalizing vectors in A by Q
+*                    in VT
+*                    (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL ZUNMBR( 'P', 'L', 'C', M, N, M, A, LDA,
+     $                            WORK( ITAUP ), VT, LDVT,
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Generate left bidiagonalizing vectors in A
+*                    (CWorkspace: need 3*M, prefer 2*M+M*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL ZUNGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IRWORK = IE + M
+*
+*                    Perform bidiagonal QR iteration, computing left
+*                    singular vectors of A in A and computing right
+*                    singular vectors of A in VT
+*                    (CWorkspace: 0)
+*                    (RWorkspace: need BDSPAC)
+*
+                     CALL ZBDSQR( 'U', M, N, M, 0, S, RWORK( IE ), VT,
+     $                            LDVT, A, LDA, CDUM, 1,
+     $                            RWORK( IRWORK ), INFO )
+*
+                  END IF
+*
+               ELSE IF( WNTUAS ) THEN
+*
+*                 Path 9t(N much larger than M, JOBU='S' or 'A',
+*                         JOBVT='A')
+*                 N right singular vectors to be computed in VT and
+*                 M left singular vectors to be computed in U
+*
+                  IF( LWORK.GE.M*M+MAX( N+M, 3*M ) ) THEN
+*
+*                    Sufficient workspace for a fast algorithm
+*
+                     IU = 1
+                     IF( LWORK.GE.WRKBL+LDA*M ) THEN
+*
+*                       WORK(IU) is LDA by M
+*
+                        LDWRKU = LDA
+                     ELSE
+*
+*                       WORK(IU) is M by M
+*
+                        LDWRKU = M
+                     END IF
+                     ITAU = IU + LDWRKU*M
+                     IWORK = ITAU + M
+*
+*                    Compute A=L*Q, copying result to VT
+*                    (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL ZGELQF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     CALL ZLACPY( 'U', M, N, A, LDA, VT, LDVT )
+*
+*                    Generate Q in VT
+*                    (CWorkspace: need M*M+M+N, prefer M*M+M+N*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL ZUNGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Copy L to WORK(IU), zeroing out above it
+*
+                     CALL ZLACPY( 'L', M, M, A, LDA, WORK( IU ),
+     $                            LDWRKU )
+                     CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO,
+     $                            WORK( IU+LDWRKU ), LDWRKU )
+                     IE = 1
+                     ITAUQ = ITAU
+                     ITAUP = ITAUQ + M
+                     IWORK = ITAUP + M
+*
+*                    Bidiagonalize L in WORK(IU), copying result to U
+*                    (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
+*                    (RWorkspace: need M)
+*
+                     CALL ZGEBRD( M, M, WORK( IU ), LDWRKU, S,
+     $                            RWORK( IE ), WORK( ITAUQ ),
+     $                            WORK( ITAUP ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+                     CALL ZLACPY( 'L', M, M, WORK( IU ), LDWRKU, U,
+     $                            LDU )
+*
+*                    Generate right bidiagonalizing vectors in WORK(IU)
+*                    (CWorkspace: need M*M+3*M, prefer M*M+2*M+(M-1)*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL ZUNGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
+     $                            WORK( ITAUP ), WORK( IWORK ),
+     $                            LWORK-IWORK+1, IERR )
+*
+*                    Generate left bidiagonalizing vectors in U
+*                    (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL ZUNGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IRWORK = IE + M
+*
+*                    Perform bidiagonal QR iteration, computing left
+*                    singular vectors of L in U and computing right
+*                    singular vectors of L in WORK(IU)
+*                    (CWorkspace: need M*M)
+*                    (RWorkspace: need BDSPAC)
+*
+                     CALL ZBDSQR( 'U', M, M, M, 0, S, RWORK( IE ),
+     $                            WORK( IU ), LDWRKU, U, LDU, CDUM, 1,
+     $                            RWORK( IRWORK ), INFO )
+*
+*                    Multiply right singular vectors of L in WORK(IU) by
+*                    Q in VT, storing result in A
+*                    (CWorkspace: need M*M)
+*                    (RWorkspace: 0)
+*
+                     CALL ZGEMM( 'N', 'N', M, N, M, CONE, WORK( IU ),
+     $                           LDWRKU, VT, LDVT, CZERO, A, LDA )
+*
+*                    Copy right singular vectors of A from A to VT
+*
+                     CALL ZLACPY( 'F', M, N, A, LDA, VT, LDVT )
+*
+                  ELSE
+*
+*                    Insufficient workspace for a fast algorithm
+*
+                     ITAU = 1
+                     IWORK = ITAU + M
+*
+*                    Compute A=L*Q, copying result to VT
+*                    (CWorkspace: need 2*M, prefer M+M*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL ZGELQF( M, N, A, LDA, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     CALL ZLACPY( 'U', M, N, A, LDA, VT, LDVT )
+*
+*                    Generate Q in VT
+*                    (CWorkspace: need M+N, prefer M+N*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL ZUNGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Copy L to U, zeroing out above it
+*
+                     CALL ZLACPY( 'L', M, M, A, LDA, U, LDU )
+                     CALL ZLASET( 'U', M-1, M-1, CZERO, CZERO,
+     $                            U( 1, 2 ), LDU )
+                     IE = 1
+                     ITAUQ = ITAU
+                     ITAUP = ITAUQ + M
+                     IWORK = ITAUP + M
+*
+*                    Bidiagonalize L in U
+*                    (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
+*                    (RWorkspace: need M)
+*
+                     CALL ZGEBRD( M, M, U, LDU, S, RWORK( IE ),
+     $                            WORK( ITAUQ ), WORK( ITAUP ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Multiply right bidiagonalizing vectors in U by Q
+*                    in VT
+*                    (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL ZUNMBR( 'P', 'L', 'C', M, N, M, U, LDU,
+     $                            WORK( ITAUP ), VT, LDVT,
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+*
+*                    Generate left bidiagonalizing vectors in U
+*                    (CWorkspace: need 3*M, prefer 2*M+M*NB)
+*                    (RWorkspace: 0)
+*
+                     CALL ZUNGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
+     $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
+                     IRWORK = IE + M
+*
+*                    Perform bidiagonal QR iteration, computing left
+*                    singular vectors of A in U and computing right
+*                    singular vectors of A in VT
+*                    (CWorkspace: 0)
+*                    (RWorkspace: need BDSPAC)
+*
+                     CALL ZBDSQR( 'U', M, N, M, 0, S, RWORK( IE ), VT,
+     $                            LDVT, U, LDU, CDUM, 1,
+     $                            RWORK( IRWORK ), INFO )
+*
+                  END IF
+*
+               END IF
+*
+            END IF
+*
+         ELSE
+*
+*           N .LT. MNTHR
+*
+*           Path 10t(N greater than M, but not much larger)
+*           Reduce to bidiagonal form without LQ decomposition
+*
+            IE = 1
+            ITAUQ = 1
+            ITAUP = ITAUQ + M
+            IWORK = ITAUP + M
+*
+*           Bidiagonalize A
+*           (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB)
+*           (RWorkspace: M)
+*
+            CALL ZGEBRD( M, N, A, LDA, S, RWORK( IE ), WORK( ITAUQ ),
+     $                   WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
+     $                   IERR )
+            IF( WNTUAS ) THEN
+*
+*              If left singular vectors desired in U, copy result to U
+*              and generate left bidiagonalizing vectors in U
+*              (CWorkspace: need 3*M-1, prefer 2*M+(M-1)*NB)
+*              (RWorkspace: 0)
+*
+               CALL ZLACPY( 'L', M, M, A, LDA, U, LDU )
+               CALL ZUNGBR( 'Q', M, M, N, U, LDU, WORK( ITAUQ ),
+     $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
+            END IF
+            IF( WNTVAS ) THEN
+*
+*              If right singular vectors desired in VT, copy result to
+*              VT and generate right bidiagonalizing vectors in VT
+*              (CWorkspace: need 2*M+NRVT, prefer 2*M+NRVT*NB)
+*              (RWorkspace: 0)
+*
+               CALL ZLACPY( 'U', M, N, A, LDA, VT, LDVT )
+               IF( WNTVA )
+     $            NRVT = N
+               IF( WNTVS )
+     $            NRVT = M
+               CALL ZUNGBR( 'P', NRVT, N, M, VT, LDVT, WORK( ITAUP ),
+     $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
+            END IF
+            IF( WNTUO ) THEN
+*
+*              If left singular vectors desired in A, generate left
+*              bidiagonalizing vectors in A
+*              (CWorkspace: need 3*M-1, prefer 2*M+(M-1)*NB)
+*              (RWorkspace: 0)
+*
+               CALL ZUNGBR( 'Q', M, M, N, A, LDA, WORK( ITAUQ ),
+     $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
+            END IF
+            IF( WNTVO ) THEN
+*
+*              If right singular vectors desired in A, generate right
+*              bidiagonalizing vectors in A
+*              (CWorkspace: need 3*M, prefer 2*M+M*NB)
+*              (RWorkspace: 0)
+*
+               CALL ZUNGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ),
+     $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
+            END IF
+            IRWORK = IE + M
+            IF( WNTUAS .OR. WNTUO )
+     $         NRU = M
+            IF( WNTUN )
+     $         NRU = 0
+            IF( WNTVAS .OR. WNTVO )
+     $         NCVT = N
+            IF( WNTVN )
+     $         NCVT = 0
+            IF( ( .NOT.WNTUO ) .AND. ( .NOT.WNTVO ) ) THEN
+*
+*              Perform bidiagonal QR iteration, if desired, computing
+*              left singular vectors in U and computing right singular
+*              vectors in VT
+*              (CWorkspace: 0)
+*              (RWorkspace: need BDSPAC)
+*
+               CALL ZBDSQR( 'L', M, NCVT, NRU, 0, S, RWORK( IE ), VT,
+     $                      LDVT, U, LDU, CDUM, 1, RWORK( IRWORK ),
+     $                      INFO )
+            ELSE IF( ( .NOT.WNTUO ) .AND. WNTVO ) THEN
+*
+*              Perform bidiagonal QR iteration, if desired, computing
+*              left singular vectors in U and computing right singular
+*              vectors in A
+*              (CWorkspace: 0)
+*              (RWorkspace: need BDSPAC)
+*
+               CALL ZBDSQR( 'L', M, NCVT, NRU, 0, S, RWORK( IE ), A,
+     $                      LDA, U, LDU, CDUM, 1, RWORK( IRWORK ),
+     $                      INFO )
+            ELSE
+*
+*              Perform bidiagonal QR iteration, if desired, computing
+*              left singular vectors in A and computing right singular
+*              vectors in VT
+*              (CWorkspace: 0)
+*              (RWorkspace: need BDSPAC)
+*
+               CALL ZBDSQR( 'L', M, NCVT, NRU, 0, S, RWORK( IE ), VT,
+     $                      LDVT, A, LDA, CDUM, 1, RWORK( IRWORK ),
+     $                      INFO )
+            END IF
+*
+         END IF
+*
+      END IF
+*
+*     Undo scaling if necessary
+*
+      IF( ISCL.EQ.1 ) THEN
+         IF( ANRM.GT.BIGNUM )
+     $      CALL DLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN,
+     $                   IERR )
+         IF( INFO.NE.0 .AND. ANRM.GT.BIGNUM )
+     $      CALL DLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN-1, 1,
+     $                   RWORK( IE ), MINMN, IERR )
+         IF( ANRM.LT.SMLNUM )
+     $      CALL DLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN,
+     $                   IERR )
+         IF( INFO.NE.0 .AND. ANRM.LT.SMLNUM )
+     $      CALL DLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN-1, 1,
+     $                   RWORK( IE ), MINMN, IERR )
+      END IF
+*
+*     Return optimal workspace in WORK(1)
+*
+      WORK( 1 ) = MAXWRK
+*
+      RETURN
+*
+*     End of ZGESVD
+*
+      END
diff --git a/costa/native/external/lapack/zgesvx.f b/costa/native/external/lapack/zgesvx.f
new file mode 100644
index 000000000..0df63b3f3
--- /dev/null
+++ b/costa/native/external/lapack/zgesvx.f
@@ -0,0 +1,484 @@
+      SUBROUTINE ZGESVX( FACT, TRANS, N, NRHS, A, LDA, AF, LDAF, IPIV,
+     $                   EQUED, R, C, B, LDB, X, LDX, RCOND, FERR, BERR,
+     $                   WORK, RWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          EQUED, FACT, TRANS
+      INTEGER            INFO, LDA, LDAF, LDB, LDX, N, NRHS
+      DOUBLE PRECISION   RCOND
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      DOUBLE PRECISION   BERR( * ), C( * ), FERR( * ), R( * ),
+     $                   RWORK( * )
+      COMPLEX*16         A( LDA, * ), AF( LDAF, * ), B( LDB, * ),
+     $                   WORK( * ), X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZGESVX uses the LU factorization to compute the solution to a complex
+*  system of linear equations
+*     A * X = B,
+*  where A is an N-by-N matrix and X and B are N-by-NRHS matrices.
+*
+*  Error bounds on the solution and a condition estimate are also
+*  provided.
+*
+*  Description
+*  ===========
+*
+*  The following steps are performed:
+*
+*  1. If FACT = 'E', real scaling factors are computed to equilibrate
+*     the system:
+*        TRANS = 'N':  diag(R)*A*diag(C)     *inv(diag(C))*X = diag(R)*B
+*        TRANS = 'T': (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B
+*        TRANS = 'C': (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B
+*     Whether or not the system will be equilibrated depends on the
+*     scaling of the matrix A, but if equilibration is used, A is
+*     overwritten by diag(R)*A*diag(C) and B by diag(R)*B (if TRANS='N')
+*     or diag(C)*B (if TRANS = 'T' or 'C').
+*
+*  2. If FACT = 'N' or 'E', the LU decomposition is used to factor the
+*     matrix A (after equilibration if FACT = 'E') as
+*        A = P * L * U,
+*     where P is a permutation matrix, L is a unit lower triangular
+*     matrix, and U is upper triangular.
+*
+*  3. If some U(i,i)=0, so that U is exactly singular, then the routine
+*     returns with INFO = i. Otherwise, the factored form of A is used
+*     to estimate the condition number of the matrix A.  If the
+*     reciprocal of the condition number is less than machine precision,
+*     INFO = N+1 is returned as a warning, but the routine still goes on
+*     to solve for X and compute error bounds as described below.
+*
+*  4. The system of equations is solved for X using the factored form
+*     of A.
+*
+*  5. Iterative refinement is applied to improve the computed solution
+*     matrix and calculate error bounds and backward error estimates
+*     for it.
+*
+*  6. If equilibration was used, the matrix X is premultiplied by
+*     diag(C) (if TRANS = 'N') or diag(R) (if TRANS = 'T' or 'C') so
+*     that it solves the original system before equilibration.
+*
+*  Arguments
+*  =========
+*
+*  FACT    (input) CHARACTER*1
+*          Specifies whether or not the factored form of the matrix A is
+*          supplied on entry, and if not, whether the matrix A should be
+*          equilibrated before it is factored.
+*          = 'F':  On entry, AF and IPIV contain the factored form of A.
+*                  If EQUED is not 'N', the matrix A has been
+*                  equilibrated with scaling factors given by R and C.
+*                  A, AF, and IPIV are not modified.
+*          = 'N':  The matrix A will be copied to AF and factored.
+*          = 'E':  The matrix A will be equilibrated if necessary, then
+*                  copied to AF and factored.
+*
+*  TRANS   (input) CHARACTER*1
+*          Specifies the form of the system of equations:
+*          = 'N':  A * X = B     (No transpose)
+*          = 'T':  A**T * X = B  (Transpose)
+*          = 'C':  A**H * X = B  (Conjugate transpose)
+*
+*  N       (input) INTEGER
+*          The number of linear equations, i.e., the order of the
+*          matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrices B and X.  NRHS >= 0.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+*          On entry, the N-by-N matrix A.  If FACT = 'F' and EQUED is
+*          not 'N', then A must have been equilibrated by the scaling
+*          factors in R and/or C.  A is not modified if FACT = 'F' or
+*          'N', or if FACT = 'E' and EQUED = 'N' on exit.
+*
+*          On exit, if EQUED .ne. 'N', A is scaled as follows:
+*          EQUED = 'R':  A := diag(R) * A
+*          EQUED = 'C':  A := A * diag(C)
+*          EQUED = 'B':  A := diag(R) * A * diag(C).
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  AF      (input or output) COMPLEX*16 array, dimension (LDAF,N)
+*          If FACT = 'F', then AF is an input argument and on entry
+*          contains the factors L and U from the factorization
+*          A = P*L*U as computed by ZGETRF.  If EQUED .ne. 'N', then
+*          AF is the factored form of the equilibrated matrix A.
+*
+*          If FACT = 'N', then AF is an output argument and on exit
+*          returns the factors L and U from the factorization A = P*L*U
+*          of the original matrix A.
+*
+*          If FACT = 'E', then AF is an output argument and on exit
+*          returns the factors L and U from the factorization A = P*L*U
+*          of the equilibrated matrix A (see the description of A for
+*          the form of the equilibrated matrix).
+*
+*  LDAF    (input) INTEGER
+*          The leading dimension of the array AF.  LDAF >= max(1,N).
+*
+*  IPIV    (input or output) INTEGER array, dimension (N)
+*          If FACT = 'F', then IPIV is an input argument and on entry
+*          contains the pivot indices from the factorization A = P*L*U
+*          as computed by ZGETRF; row i of the matrix was interchanged
+*          with row IPIV(i).
+*
+*          If FACT = 'N', then IPIV is an output argument and on exit
+*          contains the pivot indices from the factorization A = P*L*U
+*          of the original matrix A.
+*
+*          If FACT = 'E', then IPIV is an output argument and on exit
+*          contains the pivot indices from the factorization A = P*L*U
+*          of the equilibrated matrix A.
+*
+*  EQUED   (input or output) CHARACTER*1
+*          Specifies the form of equilibration that was done.
+*          = 'N':  No equilibration (always true if FACT = 'N').
+*          = 'R':  Row equilibration, i.e., A has been premultiplied by
+*                  diag(R).
+*          = 'C':  Column equilibration, i.e., A has been postmultiplied
+*                  by diag(C).
+*          = 'B':  Both row and column equilibration, i.e., A has been
+*                  replaced by diag(R) * A * diag(C).
+*          EQUED is an input argument if FACT = 'F'; otherwise, it is an
+*          output argument.
+*
+*  R       (input or output) DOUBLE PRECISION array, dimension (N)
+*          The row scale factors for A.  If EQUED = 'R' or 'B', A is
+*          multiplied on the left by diag(R); if EQUED = 'N' or 'C', R
+*          is not accessed.  R is an input argument if FACT = 'F';
+*          otherwise, R is an output argument.  If FACT = 'F' and
+*          EQUED = 'R' or 'B', each element of R must be positive.
+*
+*  C       (input or output) DOUBLE PRECISION array, dimension (N)
+*          The column scale factors for A.  If EQUED = 'C' or 'B', A is
+*          multiplied on the right by diag(C); if EQUED = 'N' or 'R', C
+*          is not accessed.  C is an input argument if FACT = 'F';
+*          otherwise, C is an output argument.  If FACT = 'F' and
+*          EQUED = 'C' or 'B', each element of C must be positive.
+*
+*  B       (input/output) COMPLEX*16 array, dimension (LDB,NRHS)
+*          On entry, the N-by-NRHS right hand side matrix B.
+*          On exit,
+*          if EQUED = 'N', B is not modified;
+*          if TRANS = 'N' and EQUED = 'R' or 'B', B is overwritten by
+*          diag(R)*B;
+*          if TRANS = 'T' or 'C' and EQUED = 'C' or 'B', B is
+*          overwritten by diag(C)*B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  X       (output) COMPLEX*16 array, dimension (LDX,NRHS)
+*          If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X
+*          to the original system of equations.  Note that A and B are
+*          modified on exit if EQUED .ne. 'N', and the solution to the
+*          equilibrated system is inv(diag(C))*X if TRANS = 'N' and
+*          EQUED = 'C' or 'B', or inv(diag(R))*X if TRANS = 'T' or 'C'
+*          and EQUED = 'R' or 'B'.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(1,N).
+*
+*  RCOND   (output) DOUBLE PRECISION
+*          The estimate of the reciprocal condition number of the matrix
+*          A after equilibration (if done).  If RCOND is less than the
+*          machine precision (in particular, if RCOND = 0), the matrix
+*          is singular to working precision.  This condition is
+*          indicated by a return code of INFO > 0.
+*
+*  FERR    (output) DOUBLE PRECISION array, dimension (NRHS)
+*          The estimated forward error bound for each solution vector
+*          X(j) (the j-th column of the solution matrix X).
+*          If XTRUE is the true solution corresponding to X(j), FERR(j)
+*          is an estimated upper bound for the magnitude of the largest
+*          element in (X(j) - XTRUE) divided by the magnitude of the
+*          largest element in X(j).  The estimate is as reliable as
+*          the estimate for RCOND, and is almost always a slight
+*          overestimate of the true error.
+*
+*  BERR    (output) DOUBLE PRECISION array, dimension (NRHS)
+*          The componentwise relative backward error of each solution
+*          vector X(j) (i.e., the smallest relative change in
+*          any element of A or B that makes X(j) an exact solution).
+*
+*  WORK    (workspace) COMPLEX*16 array, dimension (2*N)
+*
+*  RWORK   (workspace/output) DOUBLE PRECISION array, dimension (2*N)
+*          On exit, RWORK(1) contains the reciprocal pivot growth
+*          factor norm(A)/norm(U). The "max absolute element" norm is
+*          used. If RWORK(1) is much less than 1, then the stability
+*          of the LU factorization of the (equilibrated) matrix A
+*          could be poor. This also means that the solution X, condition
+*          estimator RCOND, and forward error bound FERR could be
+*          unreliable. If factorization fails with 0<INFO<=N, then
+*          RWORK(1) contains the reciprocal pivot growth factor for the
+*          leading INFO columns of A.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, and i is
+*                <= N:  U(i,i) is exactly zero.  The factorization has
+*                       been completed, but the factor U is exactly
+*                       singular, so the solution and error bounds
+*                       could not be computed. RCOND = 0 is returned.
+*                = N+1: U is nonsingular, but RCOND is less than machine
+*                       precision, meaning that the matrix is singular
+*                       to working precision.  Nevertheless, the
+*                       solution and error bounds are computed because
+*                       there are a number of situations where the
+*                       computed solution can be more accurate than the
+*                       value of RCOND would suggest.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            COLEQU, EQUIL, NOFACT, NOTRAN, ROWEQU
+      CHARACTER          NORM
+      INTEGER            I, INFEQU, J
+      DOUBLE PRECISION   AMAX, ANORM, BIGNUM, COLCND, RCMAX, RCMIN,
+     $                   ROWCND, RPVGRW, SMLNUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DLAMCH, ZLANGE, ZLANTR
+      EXTERNAL           LSAME, DLAMCH, ZLANGE, ZLANTR
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZGECON, ZGEEQU, ZGERFS, ZGETRF, ZGETRS,
+     $                   ZLACPY, ZLAQGE
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+      NOFACT = LSAME( FACT, 'N' )
+      EQUIL = LSAME( FACT, 'E' )
+      NOTRAN = LSAME( TRANS, 'N' )
+      IF( NOFACT .OR. EQUIL ) THEN
+         EQUED = 'N'
+         ROWEQU = .FALSE.
+         COLEQU = .FALSE.
+      ELSE
+         ROWEQU = LSAME( EQUED, 'R' ) .OR. LSAME( EQUED, 'B' )
+         COLEQU = LSAME( EQUED, 'C' ) .OR. LSAME( EQUED, 'B' )
+         SMLNUM = DLAMCH( 'Safe minimum' )
+         BIGNUM = ONE / SMLNUM
+      END IF
+*
+*     Test the input parameters.
+*
+      IF( .NOT.NOFACT .AND. .NOT.EQUIL .AND. .NOT.LSAME( FACT, 'F' ) )
+     $     THEN
+         INFO = -1
+      ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT.
+     $         LSAME( TRANS, 'C' ) ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -6
+      ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN
+         INFO = -8
+      ELSE IF( LSAME( FACT, 'F' ) .AND. .NOT.
+     $         ( ROWEQU .OR. COLEQU .OR. LSAME( EQUED, 'N' ) ) ) THEN
+         INFO = -10
+      ELSE
+         IF( ROWEQU ) THEN
+            RCMIN = BIGNUM
+            RCMAX = ZERO
+            DO 10 J = 1, N
+               RCMIN = MIN( RCMIN, R( J ) )
+               RCMAX = MAX( RCMAX, R( J ) )
+   10       CONTINUE
+            IF( RCMIN.LE.ZERO ) THEN
+               INFO = -11
+            ELSE IF( N.GT.0 ) THEN
+               ROWCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM )
+            ELSE
+               ROWCND = ONE
+            END IF
+         END IF
+         IF( COLEQU .AND. INFO.EQ.0 ) THEN
+            RCMIN = BIGNUM
+            RCMAX = ZERO
+            DO 20 J = 1, N
+               RCMIN = MIN( RCMIN, C( J ) )
+               RCMAX = MAX( RCMAX, C( J ) )
+   20       CONTINUE
+            IF( RCMIN.LE.ZERO ) THEN
+               INFO = -12
+            ELSE IF( N.GT.0 ) THEN
+               COLCND = MAX( RCMIN, SMLNUM ) / MIN( RCMAX, BIGNUM )
+            ELSE
+               COLCND = ONE
+            END IF
+         END IF
+         IF( INFO.EQ.0 ) THEN
+            IF( LDB.LT.MAX( 1, N ) ) THEN
+               INFO = -14
+            ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+               INFO = -16
+            END IF
+         END IF
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZGESVX', -INFO )
+         RETURN
+      END IF
+*
+      IF( EQUIL ) THEN
+*
+*        Compute row and column scalings to equilibrate the matrix A.
+*
+         CALL ZGEEQU( N, N, A, LDA, R, C, ROWCND, COLCND, AMAX, INFEQU )
+         IF( INFEQU.EQ.0 ) THEN
+*
+*           Equilibrate the matrix.
+*
+            CALL ZLAQGE( N, N, A, LDA, R, C, ROWCND, COLCND, AMAX,
+     $                   EQUED )
+            ROWEQU = LSAME( EQUED, 'R' ) .OR. LSAME( EQUED, 'B' )
+            COLEQU = LSAME( EQUED, 'C' ) .OR. LSAME( EQUED, 'B' )
+         END IF
+      END IF
+*
+*     Scale the right hand side.
+*
+      IF( NOTRAN ) THEN
+         IF( ROWEQU ) THEN
+            DO 40 J = 1, NRHS
+               DO 30 I = 1, N
+                  B( I, J ) = R( I )*B( I, J )
+   30          CONTINUE
+   40       CONTINUE
+         END IF
+      ELSE IF( COLEQU ) THEN
+         DO 60 J = 1, NRHS
+            DO 50 I = 1, N
+               B( I, J ) = C( I )*B( I, J )
+   50       CONTINUE
+   60    CONTINUE
+      END IF
+*
+      IF( NOFACT .OR. EQUIL ) THEN
+*
+*        Compute the LU factorization of A.
+*
+         CALL ZLACPY( 'Full', N, N, A, LDA, AF, LDAF )
+         CALL ZGETRF( N, N, AF, LDAF, IPIV, INFO )
+*
+*        Return if INFO is non-zero.
+*
+         IF( INFO.NE.0 ) THEN
+            IF( INFO.GT.0 ) THEN
+*
+*              Compute the reciprocal pivot growth factor of the
+*              leading rank-deficient INFO columns of A.
+*
+               RPVGRW = ZLANTR( 'M', 'U', 'N', INFO, INFO, AF, LDAF,
+     $                  RWORK )
+               IF( RPVGRW.EQ.ZERO ) THEN
+                  RPVGRW = ONE
+               ELSE
+                  RPVGRW = ZLANGE( 'M', N, INFO, A, LDA, RWORK ) /
+     $                     RPVGRW
+               END IF
+               RWORK( 1 ) = RPVGRW
+               RCOND = ZERO
+            END IF
+            RETURN
+         END IF
+      END IF
+*
+*     Compute the norm of the matrix A and the
+*     reciprocal pivot growth factor RPVGRW.
+*
+      IF( NOTRAN ) THEN
+         NORM = '1'
+      ELSE
+         NORM = 'I'
+      END IF
+      ANORM = ZLANGE( NORM, N, N, A, LDA, RWORK )
+      RPVGRW = ZLANTR( 'M', 'U', 'N', N, N, AF, LDAF, RWORK )
+      IF( RPVGRW.EQ.ZERO ) THEN
+         RPVGRW = ONE
+      ELSE
+         RPVGRW = ZLANGE( 'M', N, N, A, LDA, RWORK ) / RPVGRW
+      END IF
+*
+*     Compute the reciprocal of the condition number of A.
+*
+      CALL ZGECON( NORM, N, AF, LDAF, ANORM, RCOND, WORK, RWORK, INFO )
+*
+*     Set INFO = N+1 if the matrix is singular to working precision.
+*
+      IF( RCOND.LT.DLAMCH( 'Epsilon' ) )
+     $   INFO = N + 1
+*
+*     Compute the solution matrix X.
+*
+      CALL ZLACPY( 'Full', N, NRHS, B, LDB, X, LDX )
+      CALL ZGETRS( TRANS, N, NRHS, AF, LDAF, IPIV, X, LDX, INFO )
+*
+*     Use iterative refinement to improve the computed solution and
+*     compute error bounds and backward error estimates for it.
+*
+      CALL ZGERFS( TRANS, N, NRHS, A, LDA, AF, LDAF, IPIV, B, LDB, X,
+     $             LDX, FERR, BERR, WORK, RWORK, INFO )
+*
+*     Transform the solution matrix X to a solution of the original
+*     system.
+*
+      IF( NOTRAN ) THEN
+         IF( COLEQU ) THEN
+            DO 80 J = 1, NRHS
+               DO 70 I = 1, N
+                  X( I, J ) = C( I )*X( I, J )
+   70          CONTINUE
+   80       CONTINUE
+            DO 90 J = 1, NRHS
+               FERR( J ) = FERR( J ) / COLCND
+   90       CONTINUE
+         END IF
+      ELSE IF( ROWEQU ) THEN
+         DO 110 J = 1, NRHS
+            DO 100 I = 1, N
+               X( I, J ) = R( I )*X( I, J )
+  100       CONTINUE
+  110    CONTINUE
+         DO 120 J = 1, NRHS
+            FERR( J ) = FERR( J ) / ROWCND
+  120    CONTINUE
+      END IF
+*
+      RWORK( 1 ) = RPVGRW
+      RETURN
+*
+*     End of ZGESVX
+*
+      END
diff --git a/costa/native/external/lapack/zgetc2.f b/costa/native/external/lapack/zgetc2.f
new file mode 100644
index 000000000..645d8a1a0
--- /dev/null
+++ b/costa/native/external/lapack/zgetc2.f
@@ -0,0 +1,146 @@
+      SUBROUTINE ZGETC2( N, A, LDA, IPIV, JPIV, INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * ), JPIV( * )
+      COMPLEX*16         A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZGETC2 computes an LU factorization, using complete pivoting, of the
+*  n-by-n matrix A. The factorization has the form A = P * L * U * Q,
+*  where P and Q are permutation matrices, L is lower triangular with
+*  unit diagonal elements and U is upper triangular.
+*
+*  This is a level 1 BLAS version of the algorithm.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The order of the matrix A. N >= 0.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA, N)
+*          On entry, the n-by-n matrix to be factored.
+*          On exit, the factors L and U from the factorization
+*          A = P*L*U*Q; the unit diagonal elements of L are not stored.
+*          If U(k, k) appears to be less than SMIN, U(k, k) is given the
+*          value of SMIN, giving a nonsingular perturbed system.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1, N).
+*
+*  IPIV    (output) INTEGER array, dimension (N).
+*          The pivot indices; for 1 <= i <= N, row i of the
+*          matrix has been interchanged with row IPIV(i).
+*
+*  JPIV    (output) INTEGER array, dimension (N).
+*          The pivot indices; for 1 <= j <= N, column j of the
+*          matrix has been interchanged with column JPIV(j).
+*
+*  INFO    (output) INTEGER
+*           = 0: successful exit
+*           > 0: if INFO = k, U(k, k) is likely to produce overflow if
+*                one tries to solve for x in Ax = b. So U is perturbed
+*                to avoid the overflow.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Bo Kagstrom and Peter Poromaa, Department of Computing Science,
+*     Umea University, S-901 87 Umea, Sweden.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, IP, IPV, J, JP, JPV
+      DOUBLE PRECISION   BIGNUM, EPS, SMIN, SMLNUM, XMAX
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           ZGERU, ZSWAP
+*     ..
+*     .. External Functions ..
+      DOUBLE PRECISION   DLAMCH
+      EXTERNAL           DLAMCH
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DCMPLX, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Set constants to control overflow
+*
+      INFO = 0
+      EPS = DLAMCH( 'P' )
+      SMLNUM = DLAMCH( 'S' ) / EPS
+      BIGNUM = ONE / SMLNUM
+      CALL DLABAD( SMLNUM, BIGNUM )
+*
+*     Factorize A using complete pivoting.
+*     Set pivots less than SMIN to SMIN
+*
+      DO 40 I = 1, N - 1
+*
+*        Find max element in matrix A
+*
+         XMAX = ZERO
+         DO 20 IP = I, N
+            DO 10 JP = I, N
+               IF( ABS( A( IP, JP ) ).GE.XMAX ) THEN
+                  XMAX = ABS( A( IP, JP ) )
+                  IPV = IP
+                  JPV = JP
+               END IF
+   10       CONTINUE
+   20    CONTINUE
+         IF( I.EQ.1 )
+     $      SMIN = MAX( EPS*XMAX, SMLNUM )
+*
+*        Swap rows
+*
+         IF( IPV.NE.I )
+     $      CALL ZSWAP( N, A( IPV, 1 ), LDA, A( I, 1 ), LDA )
+         IPIV( I ) = IPV
+*
+*        Swap columns
+*
+         IF( JPV.NE.I )
+     $      CALL ZSWAP( N, A( 1, JPV ), 1, A( 1, I ), 1 )
+         JPIV( I ) = JPV
+*
+*        Check for singularity
+*
+         IF( ABS( A( I, I ) ).LT.SMIN ) THEN
+            INFO = I
+            A( I, I ) = DCMPLX( SMIN, ZERO )
+         END IF
+         DO 30 J = I + 1, N
+            A( J, I ) = A( J, I ) / A( I, I )
+   30    CONTINUE
+         CALL ZGERU( N-I, N-I, -DCMPLX( ONE ), A( I+1, I ), 1,
+     $               A( I, I+1 ), LDA, A( I+1, I+1 ), LDA )
+   40 CONTINUE
+*
+      IF( ABS( A( N, N ) ).LT.SMIN ) THEN
+         INFO = N
+         A( N, N ) = DCMPLX( SMIN, ZERO )
+      END IF
+      RETURN
+*
+*     End of ZGETC2
+*
+      END
diff --git a/costa/native/external/lapack/zgetf2.f b/costa/native/external/lapack/zgetf2.f
new file mode 100644
index 000000000..4cbdb446b
--- /dev/null
+++ b/costa/native/external/lapack/zgetf2.f
@@ -0,0 +1,136 @@
+      SUBROUTINE ZGETF2( M, N, A, LDA, IPIV, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, M, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      COMPLEX*16         A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZGETF2 computes an LU factorization of a general m-by-n matrix A
+*  using partial pivoting with row interchanges.
+*
+*  The factorization has the form
+*     A = P * L * U
+*  where P is a permutation matrix, L is lower triangular with unit
+*  diagonal elements (lower trapezoidal if m > n), and U is upper
+*  triangular (upper trapezoidal if m < n).
+*
+*  This is the right-looking Level 2 BLAS version of the algorithm.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+*          On entry, the m by n matrix to be factored.
+*          On exit, the factors L and U from the factorization
+*          A = P*L*U; the unit diagonal elements of L are not stored.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  IPIV    (output) INTEGER array, dimension (min(M,N))
+*          The pivot indices; for 1 <= i <= min(M,N), row i of the
+*          matrix was interchanged with row IPIV(i).
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -k, the k-th argument had an illegal value
+*          > 0: if INFO = k, U(k,k) is exactly zero. The factorization
+*               has been completed, but the factor U is exactly
+*               singular, and division by zero will occur if it is used
+*               to solve a system of equations.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX*16         ONE, ZERO
+      PARAMETER          ( ONE = ( 1.0D+0, 0.0D+0 ),
+     $                   ZERO = ( 0.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            J, JP
+*     ..
+*     .. External Functions ..
+      INTEGER            IZAMAX
+      EXTERNAL           IZAMAX
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZGERU, ZSCAL, ZSWAP
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZGETF2', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 )
+     $   RETURN
+*
+      DO 10 J = 1, MIN( M, N )
+*
+*        Find pivot and test for singularity.
+*
+         JP = J - 1 + IZAMAX( M-J+1, A( J, J ), 1 )
+         IPIV( J ) = JP
+         IF( A( JP, J ).NE.ZERO ) THEN
+*
+*           Apply the interchange to columns 1:N.
+*
+            IF( JP.NE.J )
+     $         CALL ZSWAP( N, A( J, 1 ), LDA, A( JP, 1 ), LDA )
+*
+*           Compute elements J+1:M of J-th column.
+*
+            IF( J.LT.M )
+     $         CALL ZSCAL( M-J, ONE / A( J, J ), A( J+1, J ), 1 )
+*
+         ELSE IF( INFO.EQ.0 ) THEN
+*
+            INFO = J
+         END IF
+*
+         IF( J.LT.MIN( M, N ) ) THEN
+*
+*           Update trailing submatrix.
+*
+            CALL ZGERU( M-J, N-J, -ONE, A( J+1, J ), 1, A( J, J+1 ),
+     $                  LDA, A( J+1, J+1 ), LDA )
+         END IF
+   10 CONTINUE
+      RETURN
+*
+*     End of ZGETF2
+*
+      END
diff --git a/costa/native/external/lapack/zgetrf.f b/costa/native/external/lapack/zgetrf.f
new file mode 100644
index 000000000..6f48c8d56
--- /dev/null
+++ b/costa/native/external/lapack/zgetrf.f
@@ -0,0 +1,160 @@
+      SUBROUTINE ZGETRF( M, N, A, LDA, IPIV, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, M, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      COMPLEX*16         A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZGETRF computes an LU factorization of a general M-by-N matrix A
+*  using partial pivoting with row interchanges.
+*
+*  The factorization has the form
+*     A = P * L * U
+*  where P is a permutation matrix, L is lower triangular with unit
+*  diagonal elements (lower trapezoidal if m > n), and U is upper
+*  triangular (upper trapezoidal if m < n).
+*
+*  This is the right-looking Level 3 BLAS version of the algorithm.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+*          On entry, the M-by-N matrix to be factored.
+*          On exit, the factors L and U from the factorization
+*          A = P*L*U; the unit diagonal elements of L are not stored.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  IPIV    (output) INTEGER array, dimension (min(M,N))
+*          The pivot indices; for 1 <= i <= min(M,N), row i of the
+*          matrix was interchanged with row IPIV(i).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, U(i,i) is exactly zero. The factorization
+*                has been completed, but the factor U is exactly
+*                singular, and division by zero will occur if it is used
+*                to solve a system of equations.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX*16         ONE
+      PARAMETER          ( ONE = ( 1.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, IINFO, J, JB, NB
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZGEMM, ZGETF2, ZLASWP, ZTRSM
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZGETRF', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 )
+     $   RETURN
+*
+*     Determine the block size for this environment.
+*
+      NB = ILAENV( 1, 'ZGETRF', ' ', M, N, -1, -1 )
+      IF( NB.LE.1 .OR. NB.GE.MIN( M, N ) ) THEN
+*
+*        Use unblocked code.
+*
+         CALL ZGETF2( M, N, A, LDA, IPIV, INFO )
+      ELSE
+*
+*        Use blocked code.
+*
+         DO 20 J = 1, MIN( M, N ), NB
+            JB = MIN( MIN( M, N )-J+1, NB )
+*
+*           Factor diagonal and subdiagonal blocks and test for exact
+*           singularity.
+*
+            CALL ZGETF2( M-J+1, JB, A( J, J ), LDA, IPIV( J ), IINFO )
+*
+*           Adjust INFO and the pivot indices.
+*
+            IF( INFO.EQ.0 .AND. IINFO.GT.0 )
+     $         INFO = IINFO + J - 1
+            DO 10 I = J, MIN( M, J+JB-1 )
+               IPIV( I ) = J - 1 + IPIV( I )
+   10       CONTINUE
+*
+*           Apply interchanges to columns 1:J-1.
+*
+            CALL ZLASWP( J-1, A, LDA, J, J+JB-1, IPIV, 1 )
+*
+            IF( J+JB.LE.N ) THEN
+*
+*              Apply interchanges to columns J+JB:N.
+*
+               CALL ZLASWP( N-J-JB+1, A( 1, J+JB ), LDA, J, J+JB-1,
+     $                      IPIV, 1 )
+*
+*              Compute block row of U.
+*
+               CALL ZTRSM( 'Left', 'Lower', 'No transpose', 'Unit', JB,
+     $                     N-J-JB+1, ONE, A( J, J ), LDA, A( J, J+JB ),
+     $                     LDA )
+               IF( J+JB.LE.M ) THEN
+*
+*                 Update trailing submatrix.
+*
+                  CALL ZGEMM( 'No transpose', 'No transpose', M-J-JB+1,
+     $                        N-J-JB+1, JB, -ONE, A( J+JB, J ), LDA,
+     $                        A( J, J+JB ), LDA, ONE, A( J+JB, J+JB ),
+     $                        LDA )
+               END IF
+            END IF
+   20    CONTINUE
+      END IF
+      RETURN
+*
+*     End of ZGETRF
+*
+      END
diff --git a/costa/native/external/lapack/zgetri.f b/costa/native/external/lapack/zgetri.f
new file mode 100644
index 000000000..1eb4eb7f1
--- /dev/null
+++ b/costa/native/external/lapack/zgetri.f
@@ -0,0 +1,194 @@
+      SUBROUTINE ZGETRI( N, A, LDA, IPIV, WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, LWORK, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      COMPLEX*16         A( LDA, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZGETRI computes the inverse of a matrix using the LU factorization
+*  computed by ZGETRF.
+*
+*  This method inverts U and then computes inv(A) by solving the system
+*  inv(A)*L = inv(U) for inv(A).
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+*          On entry, the factors L and U from the factorization
+*          A = P*L*U as computed by ZGETRF.
+*          On exit, if INFO = 0, the inverse of the original matrix A.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  IPIV    (input) INTEGER array, dimension (N)
+*          The pivot indices from ZGETRF; for 1<=i<=N, row i of the
+*          matrix was interchanged with row IPIV(i).
+*
+*  WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+*          On exit, if INFO=0, then WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.  LWORK >= max(1,N).
+*          For optimal performance LWORK >= N*NB, where NB is
+*          the optimal blocksize returned by ILAENV.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, U(i,i) is exactly zero; the matrix is
+*                singular and its inverse could not be computed.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX*16         ZERO, ONE
+      PARAMETER          ( ZERO = ( 0.0D+0, 0.0D+0 ),
+     $                   ONE = ( 1.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            I, IWS, J, JB, JJ, JP, LDWORK, LWKOPT, NB,
+     $                   NBMIN, NN
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZGEMM, ZGEMV, ZSWAP, ZTRSM, ZTRTRI
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      NB = ILAENV( 1, 'ZGETRI', ' ', N, -1, -1, -1 )
+      LWKOPT = N*NB
+      WORK( 1 ) = LWKOPT
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( N.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -3
+      ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN
+         INFO = -6
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZGETRI', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Form inv(U).  If INFO > 0 from ZTRTRI, then U is singular,
+*     and the inverse is not computed.
+*
+      CALL ZTRTRI( 'Upper', 'Non-unit', N, A, LDA, INFO )
+      IF( INFO.GT.0 )
+     $   RETURN
+*
+      NBMIN = 2
+      LDWORK = N
+      IF( NB.GT.1 .AND. NB.LT.N ) THEN
+         IWS = MAX( LDWORK*NB, 1 )
+         IF( LWORK.LT.IWS ) THEN
+            NB = LWORK / LDWORK
+            NBMIN = MAX( 2, ILAENV( 2, 'ZGETRI', ' ', N, -1, -1, -1 ) )
+         END IF
+      ELSE
+         IWS = N
+      END IF
+*
+*     Solve the equation inv(A)*L = inv(U) for inv(A).
+*
+      IF( NB.LT.NBMIN .OR. NB.GE.N ) THEN
+*
+*        Use unblocked code.
+*
+         DO 20 J = N, 1, -1
+*
+*           Copy current column of L to WORK and replace with zeros.
+*
+            DO 10 I = J + 1, N
+               WORK( I ) = A( I, J )
+               A( I, J ) = ZERO
+   10       CONTINUE
+*
+*           Compute current column of inv(A).
+*
+            IF( J.LT.N )
+     $         CALL ZGEMV( 'No transpose', N, N-J, -ONE, A( 1, J+1 ),
+     $                     LDA, WORK( J+1 ), 1, ONE, A( 1, J ), 1 )
+   20    CONTINUE
+      ELSE
+*
+*        Use blocked code.
+*
+         NN = ( ( N-1 ) / NB )*NB + 1
+         DO 50 J = NN, 1, -NB
+            JB = MIN( NB, N-J+1 )
+*
+*           Copy current block column of L to WORK and replace with
+*           zeros.
+*
+            DO 40 JJ = J, J + JB - 1
+               DO 30 I = JJ + 1, N
+                  WORK( I+( JJ-J )*LDWORK ) = A( I, JJ )
+                  A( I, JJ ) = ZERO
+   30          CONTINUE
+   40       CONTINUE
+*
+*           Compute current block column of inv(A).
+*
+            IF( J+JB.LE.N )
+     $         CALL ZGEMM( 'No transpose', 'No transpose', N, JB,
+     $                     N-J-JB+1, -ONE, A( 1, J+JB ), LDA,
+     $                     WORK( J+JB ), LDWORK, ONE, A( 1, J ), LDA )
+            CALL ZTRSM( 'Right', 'Lower', 'No transpose', 'Unit', N, JB,
+     $                  ONE, WORK( J ), LDWORK, A( 1, J ), LDA )
+   50    CONTINUE
+      END IF
+*
+*     Apply column interchanges.
+*
+      DO 60 J = N - 1, 1, -1
+         JP = IPIV( J )
+         IF( JP.NE.J )
+     $      CALL ZSWAP( N, A( 1, J ), 1, A( 1, JP ), 1 )
+   60 CONTINUE
+*
+      WORK( 1 ) = IWS
+      RETURN
+*
+*     End of ZGETRI
+*
+      END
diff --git a/costa/native/external/lapack/zgetrs.f b/costa/native/external/lapack/zgetrs.f
new file mode 100644
index 000000000..508484618
--- /dev/null
+++ b/costa/native/external/lapack/zgetrs.f
@@ -0,0 +1,150 @@
+      SUBROUTINE ZGETRS( TRANS, N, NRHS, A, LDA, IPIV, B, LDB, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          TRANS
+      INTEGER            INFO, LDA, LDB, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      COMPLEX*16         A( LDA, * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZGETRS solves a system of linear equations
+*     A * X = B,  A**T * X = B,  or  A**H * X = B
+*  with a general N-by-N matrix A using the LU factorization computed
+*  by ZGETRF.
+*
+*  Arguments
+*  =========
+*
+*  TRANS   (input) CHARACTER*1
+*          Specifies the form of the system of equations:
+*          = 'N':  A * X = B     (No transpose)
+*          = 'T':  A**T * X = B  (Transpose)
+*          = 'C':  A**H * X = B  (Conjugate transpose)
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  A       (input) COMPLEX*16 array, dimension (LDA,N)
+*          The factors L and U from the factorization A = P*L*U
+*          as computed by ZGETRF.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  IPIV    (input) INTEGER array, dimension (N)
+*          The pivot indices from ZGETRF; for 1<=i<=N, row i of the
+*          matrix was interchanged with row IPIV(i).
+*
+*  B       (input/output) COMPLEX*16 array, dimension (LDB,NRHS)
+*          On entry, the right hand side matrix B.
+*          On exit, the solution matrix X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX*16         ONE
+      PARAMETER          ( ONE = ( 1.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOTRAN
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZLASWP, ZTRSM
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      NOTRAN = LSAME( TRANS, 'N' )
+      IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT.
+     $    LSAME( TRANS, 'C' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -8
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZGETRS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 )
+     $   RETURN
+*
+      IF( NOTRAN ) THEN
+*
+*        Solve A * X = B.
+*
+*        Apply row interchanges to the right hand sides.
+*
+         CALL ZLASWP( NRHS, B, LDB, 1, N, IPIV, 1 )
+*
+*        Solve L*X = B, overwriting B with X.
+*
+         CALL ZTRSM( 'Left', 'Lower', 'No transpose', 'Unit', N, NRHS,
+     $               ONE, A, LDA, B, LDB )
+*
+*        Solve U*X = B, overwriting B with X.
+*
+         CALL ZTRSM( 'Left', 'Upper', 'No transpose', 'Non-unit', N,
+     $               NRHS, ONE, A, LDA, B, LDB )
+      ELSE
+*
+*        Solve A**T * X = B  or A**H * X = B.
+*
+*        Solve U'*X = B, overwriting B with X.
+*
+         CALL ZTRSM( 'Left', 'Upper', TRANS, 'Non-unit', N, NRHS, ONE,
+     $               A, LDA, B, LDB )
+*
+*        Solve L'*X = B, overwriting B with X.
+*
+         CALL ZTRSM( 'Left', 'Lower', TRANS, 'Unit', N, NRHS, ONE, A,
+     $               LDA, B, LDB )
+*
+*        Apply row interchanges to the solution vectors.
+*
+         CALL ZLASWP( NRHS, B, LDB, 1, N, IPIV, -1 )
+      END IF
+*
+      RETURN
+*
+*     End of ZGETRS
+*
+      END
diff --git a/costa/native/external/lapack/zggbak.f b/costa/native/external/lapack/zggbak.f
new file mode 100644
index 000000000..4e5e5dca4
--- /dev/null
+++ b/costa/native/external/lapack/zggbak.f
@@ -0,0 +1,216 @@
+      SUBROUTINE ZGGBAK( JOB, SIDE, N, ILO, IHI, LSCALE, RSCALE, M, V,
+     $                   LDV, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOB, SIDE
+      INTEGER            IHI, ILO, INFO, LDV, M, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   LSCALE( * ), RSCALE( * )
+      COMPLEX*16         V( LDV, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZGGBAK forms the right or left eigenvectors of a complex generalized
+*  eigenvalue problem A*x = lambda*B*x, by backward transformation on
+*  the computed eigenvectors of the balanced pair of matrices output by
+*  ZGGBAL.
+*
+*  Arguments
+*  =========
+*
+*  JOB     (input) CHARACTER*1
+*          Specifies the type of backward transformation required:
+*          = 'N':  do nothing, return immediately;
+*          = 'P':  do backward transformation for permutation only;
+*          = 'S':  do backward transformation for scaling only;
+*          = 'B':  do backward transformations for both permutation and
+*                  scaling.
+*          JOB must be the same as the argument JOB supplied to ZGGBAL.
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'R':  V contains right eigenvectors;
+*          = 'L':  V contains left eigenvectors.
+*
+*  N       (input) INTEGER
+*          The number of rows of the matrix V.  N >= 0.
+*
+*  ILO     (input) INTEGER
+*  IHI     (input) INTEGER
+*          The integers ILO and IHI determined by ZGGBAL.
+*          1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
+*
+*  LSCALE  (input) DOUBLE PRECISION array, dimension (N)
+*          Details of the permutations and/or scaling factors applied
+*          to the left side of A and B, as returned by ZGGBAL.
+*
+*  RSCALE  (input) DOUBLE PRECISION array, dimension (N)
+*          Details of the permutations and/or scaling factors applied
+*          to the right side of A and B, as returned by ZGGBAL.
+*
+*  M       (input) INTEGER
+*          The number of columns of the matrix V.  M >= 0.
+*
+*  V       (input/output) COMPLEX*16 array, dimension (LDV,M)
+*          On entry, the matrix of right or left eigenvectors to be
+*          transformed, as returned by ZTGEVC.
+*          On exit, V is overwritten by the transformed eigenvectors.
+*
+*  LDV     (input) INTEGER
+*          The leading dimension of the matrix V. LDV >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*
+*  Further Details
+*  ===============
+*
+*  See R.C. Ward, Balancing the generalized eigenvalue problem,
+*                 SIAM J. Sci. Stat. Comp. 2 (1981), 141-152.
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            LEFTV, RIGHTV
+      INTEGER            I, K
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZDSCAL, ZSWAP
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters
+*
+      RIGHTV = LSAME( SIDE, 'R' )
+      LEFTV = LSAME( SIDE, 'L' )
+*
+      INFO = 0
+      IF( .NOT.LSAME( JOB, 'N' ) .AND. .NOT.LSAME( JOB, 'P' ) .AND.
+     $    .NOT.LSAME( JOB, 'S' ) .AND. .NOT.LSAME( JOB, 'B' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.RIGHTV .AND. .NOT.LEFTV ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( ILO.LT.1 ) THEN
+         INFO = -4
+      ELSE IF( IHI.LT.ILO .OR. IHI.GT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -6
+      ELSE IF( LDV.LT.MAX( 1, N ) ) THEN
+         INFO = -10
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZGGBAK', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+      IF( M.EQ.0 )
+     $   RETURN
+      IF( LSAME( JOB, 'N' ) )
+     $   RETURN
+*
+      IF( ILO.EQ.IHI )
+     $   GO TO 30
+*
+*     Backward balance
+*
+      IF( LSAME( JOB, 'S' ) .OR. LSAME( JOB, 'B' ) ) THEN
+*
+*        Backward transformation on right eigenvectors
+*
+         IF( RIGHTV ) THEN
+            DO 10 I = ILO, IHI
+               CALL ZDSCAL( M, RSCALE( I ), V( I, 1 ), LDV )
+   10       CONTINUE
+         END IF
+*
+*        Backward transformation on left eigenvectors
+*
+         IF( LEFTV ) THEN
+            DO 20 I = ILO, IHI
+               CALL ZDSCAL( M, LSCALE( I ), V( I, 1 ), LDV )
+   20       CONTINUE
+         END IF
+      END IF
+*
+*     Backward permutation
+*
+   30 CONTINUE
+      IF( LSAME( JOB, 'P' ) .OR. LSAME( JOB, 'B' ) ) THEN
+*
+*        Backward permutation on right eigenvectors
+*
+         IF( RIGHTV ) THEN
+            IF( ILO.EQ.1 )
+     $         GO TO 50
+            DO 40 I = ILO - 1, 1, -1
+               K = RSCALE( I )
+               IF( K.EQ.I )
+     $            GO TO 40
+               CALL ZSWAP( M, V( I, 1 ), LDV, V( K, 1 ), LDV )
+   40       CONTINUE
+*
+   50       CONTINUE
+            IF( IHI.EQ.N )
+     $         GO TO 70
+            DO 60 I = IHI + 1, N
+               K = RSCALE( I )
+               IF( K.EQ.I )
+     $            GO TO 60
+               CALL ZSWAP( M, V( I, 1 ), LDV, V( K, 1 ), LDV )
+   60       CONTINUE
+         END IF
+*
+*        Backward permutation on left eigenvectors
+*
+   70    CONTINUE
+         IF( LEFTV ) THEN
+            IF( ILO.EQ.1 )
+     $         GO TO 90
+            DO 80 I = ILO - 1, 1, -1
+               K = LSCALE( I )
+               IF( K.EQ.I )
+     $            GO TO 80
+               CALL ZSWAP( M, V( I, 1 ), LDV, V( K, 1 ), LDV )
+   80       CONTINUE
+*
+   90       CONTINUE
+            IF( IHI.EQ.N )
+     $         GO TO 110
+            DO 100 I = IHI + 1, N
+               K = LSCALE( I )
+               IF( K.EQ.I )
+     $            GO TO 100
+               CALL ZSWAP( M, V( I, 1 ), LDV, V( K, 1 ), LDV )
+  100       CONTINUE
+         END IF
+      END IF
+*
+  110 CONTINUE
+*
+      RETURN
+*
+*     End of ZGGBAK
+*
+      END
diff --git a/costa/native/external/lapack/zggbal.f b/costa/native/external/lapack/zggbal.f
new file mode 100644
index 000000000..f557e4425
--- /dev/null
+++ b/costa/native/external/lapack/zggbal.f
@@ -0,0 +1,474 @@
+      SUBROUTINE ZGGBAL( JOB, N, A, LDA, B, LDB, ILO, IHI, LSCALE,
+     $                   RSCALE, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOB
+      INTEGER            IHI, ILO, INFO, LDA, LDB, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   LSCALE( * ), RSCALE( * ), WORK( * )
+      COMPLEX*16         A( LDA, * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZGGBAL balances a pair of general complex matrices (A,B).  This
+*  involves, first, permuting A and B by similarity transformations to
+*  isolate eigenvalues in the first 1 to ILO$-$1 and last IHI+1 to N
+*  elements on the diagonal; and second, applying a diagonal similarity
+*  transformation to rows and columns ILO to IHI to make the rows
+*  and columns as close in norm as possible. Both steps are optional.
+*
+*  Balancing may reduce the 1-norm of the matrices, and improve the
+*  accuracy of the computed eigenvalues and/or eigenvectors in the
+*  generalized eigenvalue problem A*x = lambda*B*x.
+*
+*  Arguments
+*  =========
+*
+*  JOB     (input) CHARACTER*1
+*          Specifies the operations to be performed on A and B:
+*          = 'N':  none:  simply set ILO = 1, IHI = N, LSCALE(I) = 1.0
+*                  and RSCALE(I) = 1.0 for i=1,...,N;
+*          = 'P':  permute only;
+*          = 'S':  scale only;
+*          = 'B':  both permute and scale.
+*
+*  N       (input) INTEGER
+*          The order of the matrices A and B.  N >= 0.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+*          On entry, the input matrix A.
+*          On exit, A is overwritten by the balanced matrix.
+*          If JOB = 'N', A is not referenced.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,N).
+*
+*  B       (input/output) COMPLEX*16 array, dimension (LDB,N)
+*          On entry, the input matrix B.
+*          On exit, B is overwritten by the balanced matrix.
+*          If JOB = 'N', B is not referenced.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B. LDB >= max(1,N).
+*
+*  ILO     (output) INTEGER
+*  IHI     (output) INTEGER
+*          ILO and IHI are set to integers such that on exit
+*          A(i,j) = 0 and B(i,j) = 0 if i > j and
+*          j = 1,...,ILO-1 or i = IHI+1,...,N.
+*          If JOB = 'N' or 'S', ILO = 1 and IHI = N.
+*
+*  LSCALE  (output) DOUBLE PRECISION array, dimension (N)
+*          Details of the permutations and scaling factors applied
+*          to the left side of A and B.  If P(j) is the index of the
+*          row interchanged with row j, and D(j) is the scaling factor
+*          applied to row j, then
+*            LSCALE(j) = P(j)    for J = 1,...,ILO-1
+*                      = D(j)    for J = ILO,...,IHI
+*                      = P(j)    for J = IHI+1,...,N.
+*          The order in which the interchanges are made is N to IHI+1,
+*          then 1 to ILO-1.
+*
+*  RSCALE  (output) DOUBLE PRECISION array, dimension (N)
+*          Details of the permutations and scaling factors applied
+*          to the right side of A and B.  If P(j) is the index of the
+*          column interchanged with column j, and D(j) is the scaling
+*          factor applied to column j, then
+*            RSCALE(j) = P(j)    for J = 1,...,ILO-1
+*                      = D(j)    for J = ILO,...,IHI
+*                      = P(j)    for J = IHI+1,...,N.
+*          The order in which the interchanges are made is N to IHI+1,
+*          then 1 to ILO-1.
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (6*N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*
+*  Further Details
+*  ===============
+*
+*  See R.C. WARD, Balancing the generalized eigenvalue problem,
+*                 SIAM J. Sci. Stat. Comp. 2 (1981), 141-152.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, HALF, ONE
+      PARAMETER          ( ZERO = 0.0D+0, HALF = 0.5D+0, ONE = 1.0D+0 )
+      DOUBLE PRECISION   THREE, SCLFAC
+      PARAMETER          ( THREE = 3.0D+0, SCLFAC = 1.0D+1 )
+      COMPLEX*16         CZERO
+      PARAMETER          ( CZERO = ( 0.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, ICAB, IFLOW, IP1, IR, IRAB, IT, J, JC, JP1,
+     $                   K, KOUNT, L, LCAB, LM1, LRAB, LSFMAX, LSFMIN,
+     $                   M, NR, NRP2
+      DOUBLE PRECISION   ALPHA, BASL, BETA, CAB, CMAX, COEF, COEF2,
+     $                   COEF5, COR, EW, EWC, GAMMA, PGAMMA, RAB, SFMAX,
+     $                   SFMIN, SUM, T, TA, TB, TC
+      COMPLEX*16         CDUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            IZAMAX
+      DOUBLE PRECISION   DDOT, DLAMCH
+      EXTERNAL           LSAME, IZAMAX, DDOT, DLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DAXPY, DSCAL, XERBLA, ZDSCAL, ZSWAP
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, DIMAG, INT, LOG10, MAX, MIN, SIGN
+*     ..
+*     .. Statement Functions ..
+      DOUBLE PRECISION   CABS1
+*     ..
+*     .. Statement Function definitions ..
+      CABS1( CDUM ) = ABS( DBLE( CDUM ) ) + ABS( DIMAG( CDUM ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters
+*
+      INFO = 0
+      IF( .NOT.LSAME( JOB, 'N' ) .AND. .NOT.LSAME( JOB, 'P' ) .AND.
+     $    .NOT.LSAME( JOB, 'S' ) .AND. .NOT.LSAME( JOB, 'B' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -4
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZGGBAL', -INFO )
+         RETURN
+      END IF
+*
+      K = 1
+      L = N
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      IF( LSAME( JOB, 'N' ) ) THEN
+         ILO = 1
+         IHI = N
+         DO 10 I = 1, N
+            LSCALE( I ) = ONE
+            RSCALE( I ) = ONE
+   10    CONTINUE
+         RETURN
+      END IF
+*
+      IF( K.EQ.L ) THEN
+         ILO = 1
+         IHI = 1
+         LSCALE( 1 ) = ONE
+         RSCALE( 1 ) = ONE
+         RETURN
+      END IF
+*
+      IF( LSAME( JOB, 'S' ) )
+     $   GO TO 190
+*
+      GO TO 30
+*
+*     Permute the matrices A and B to isolate the eigenvalues.
+*
+*     Find row with one nonzero in columns 1 through L
+*
+   20 CONTINUE
+      L = LM1
+      IF( L.NE.1 )
+     $   GO TO 30
+*
+      RSCALE( 1 ) = 1
+      LSCALE( 1 ) = 1
+      GO TO 190
+*
+   30 CONTINUE
+      LM1 = L - 1
+      DO 80 I = L, 1, -1
+         DO 40 J = 1, LM1
+            JP1 = J + 1
+            IF( A( I, J ).NE.CZERO .OR. B( I, J ).NE.CZERO )
+     $         GO TO 50
+   40    CONTINUE
+         J = L
+         GO TO 70
+*
+   50    CONTINUE
+         DO 60 J = JP1, L
+            IF( A( I, J ).NE.CZERO .OR. B( I, J ).NE.CZERO )
+     $         GO TO 80
+   60    CONTINUE
+         J = JP1 - 1
+*
+   70    CONTINUE
+         M = L
+         IFLOW = 1
+         GO TO 160
+   80 CONTINUE
+      GO TO 100
+*
+*     Find column with one nonzero in rows K through N
+*
+   90 CONTINUE
+      K = K + 1
+*
+  100 CONTINUE
+      DO 150 J = K, L
+         DO 110 I = K, LM1
+            IP1 = I + 1
+            IF( A( I, J ).NE.CZERO .OR. B( I, J ).NE.CZERO )
+     $         GO TO 120
+  110    CONTINUE
+         I = L
+         GO TO 140
+  120    CONTINUE
+         DO 130 I = IP1, L
+            IF( A( I, J ).NE.CZERO .OR. B( I, J ).NE.CZERO )
+     $         GO TO 150
+  130    CONTINUE
+         I = IP1 - 1
+  140    CONTINUE
+         M = K
+         IFLOW = 2
+         GO TO 160
+  150 CONTINUE
+      GO TO 190
+*
+*     Permute rows M and I
+*
+  160 CONTINUE
+      LSCALE( M ) = I
+      IF( I.EQ.M )
+     $   GO TO 170
+      CALL ZSWAP( N-K+1, A( I, K ), LDA, A( M, K ), LDA )
+      CALL ZSWAP( N-K+1, B( I, K ), LDB, B( M, K ), LDB )
+*
+*     Permute columns M and J
+*
+  170 CONTINUE
+      RSCALE( M ) = J
+      IF( J.EQ.M )
+     $   GO TO 180
+      CALL ZSWAP( L, A( 1, J ), 1, A( 1, M ), 1 )
+      CALL ZSWAP( L, B( 1, J ), 1, B( 1, M ), 1 )
+*
+  180 CONTINUE
+      GO TO ( 20, 90 )IFLOW
+*
+  190 CONTINUE
+      ILO = K
+      IHI = L
+*
+      IF( ILO.EQ.IHI )
+     $   RETURN
+*
+      IF( LSAME( JOB, 'P' ) )
+     $   RETURN
+*
+*     Balance the submatrix in rows ILO to IHI.
+*
+      NR = IHI - ILO + 1
+      DO 200 I = ILO, IHI
+         RSCALE( I ) = ZERO
+         LSCALE( I ) = ZERO
+*
+         WORK( I ) = ZERO
+         WORK( I+N ) = ZERO
+         WORK( I+2*N ) = ZERO
+         WORK( I+3*N ) = ZERO
+         WORK( I+4*N ) = ZERO
+         WORK( I+5*N ) = ZERO
+  200 CONTINUE
+*
+*     Compute right side vector in resulting linear equations
+*
+      BASL = LOG10( SCLFAC )
+      DO 240 I = ILO, IHI
+         DO 230 J = ILO, IHI
+            IF( A( I, J ).EQ.CZERO ) THEN
+               TA = ZERO
+               GO TO 210
+            END IF
+            TA = LOG10( CABS1( A( I, J ) ) ) / BASL
+*
+  210       CONTINUE
+            IF( B( I, J ).EQ.CZERO ) THEN
+               TB = ZERO
+               GO TO 220
+            END IF
+            TB = LOG10( CABS1( B( I, J ) ) ) / BASL
+*
+  220       CONTINUE
+            WORK( I+4*N ) = WORK( I+4*N ) - TA - TB
+            WORK( J+5*N ) = WORK( J+5*N ) - TA - TB
+  230    CONTINUE
+  240 CONTINUE
+*
+      COEF = ONE / DBLE( 2*NR )
+      COEF2 = COEF*COEF
+      COEF5 = HALF*COEF2
+      NRP2 = NR + 2
+      BETA = ZERO
+      IT = 1
+*
+*     Start generalized conjugate gradient iteration
+*
+  250 CONTINUE
+*
+      GAMMA = DDOT( NR, WORK( ILO+4*N ), 1, WORK( ILO+4*N ), 1 ) +
+     $        DDOT( NR, WORK( ILO+5*N ), 1, WORK( ILO+5*N ), 1 )
+*
+      EW = ZERO
+      EWC = ZERO
+      DO 260 I = ILO, IHI
+         EW = EW + WORK( I+4*N )
+         EWC = EWC + WORK( I+5*N )
+  260 CONTINUE
+*
+      GAMMA = COEF*GAMMA - COEF2*( EW**2+EWC**2 ) - COEF5*( EW-EWC )**2
+      IF( GAMMA.EQ.ZERO )
+     $   GO TO 350
+      IF( IT.NE.1 )
+     $   BETA = GAMMA / PGAMMA
+      T = COEF5*( EWC-THREE*EW )
+      TC = COEF5*( EW-THREE*EWC )
+*
+      CALL DSCAL( NR, BETA, WORK( ILO ), 1 )
+      CALL DSCAL( NR, BETA, WORK( ILO+N ), 1 )
+*
+      CALL DAXPY( NR, COEF, WORK( ILO+4*N ), 1, WORK( ILO+N ), 1 )
+      CALL DAXPY( NR, COEF, WORK( ILO+5*N ), 1, WORK( ILO ), 1 )
+*
+      DO 270 I = ILO, IHI
+         WORK( I ) = WORK( I ) + TC
+         WORK( I+N ) = WORK( I+N ) + T
+  270 CONTINUE
+*
+*     Apply matrix to vector
+*
+      DO 300 I = ILO, IHI
+         KOUNT = 0
+         SUM = ZERO
+         DO 290 J = ILO, IHI
+            IF( A( I, J ).EQ.CZERO )
+     $         GO TO 280
+            KOUNT = KOUNT + 1
+            SUM = SUM + WORK( J )
+  280       CONTINUE
+            IF( B( I, J ).EQ.CZERO )
+     $         GO TO 290
+            KOUNT = KOUNT + 1
+            SUM = SUM + WORK( J )
+  290    CONTINUE
+         WORK( I+2*N ) = DBLE( KOUNT )*WORK( I+N ) + SUM
+  300 CONTINUE
+*
+      DO 330 J = ILO, IHI
+         KOUNT = 0
+         SUM = ZERO
+         DO 320 I = ILO, IHI
+            IF( A( I, J ).EQ.CZERO )
+     $         GO TO 310
+            KOUNT = KOUNT + 1
+            SUM = SUM + WORK( I+N )
+  310       CONTINUE
+            IF( B( I, J ).EQ.CZERO )
+     $         GO TO 320
+            KOUNT = KOUNT + 1
+            SUM = SUM + WORK( I+N )
+  320    CONTINUE
+         WORK( J+3*N ) = DBLE( KOUNT )*WORK( J ) + SUM
+  330 CONTINUE
+*
+      SUM = DDOT( NR, WORK( ILO+N ), 1, WORK( ILO+2*N ), 1 ) +
+     $      DDOT( NR, WORK( ILO ), 1, WORK( ILO+3*N ), 1 )
+      ALPHA = GAMMA / SUM
+*
+*     Determine correction to current iteration
+*
+      CMAX = ZERO
+      DO 340 I = ILO, IHI
+         COR = ALPHA*WORK( I+N )
+         IF( ABS( COR ).GT.CMAX )
+     $      CMAX = ABS( COR )
+         LSCALE( I ) = LSCALE( I ) + COR
+         COR = ALPHA*WORK( I )
+         IF( ABS( COR ).GT.CMAX )
+     $      CMAX = ABS( COR )
+         RSCALE( I ) = RSCALE( I ) + COR
+  340 CONTINUE
+      IF( CMAX.LT.HALF )
+     $   GO TO 350
+*
+      CALL DAXPY( NR, -ALPHA, WORK( ILO+2*N ), 1, WORK( ILO+4*N ), 1 )
+      CALL DAXPY( NR, -ALPHA, WORK( ILO+3*N ), 1, WORK( ILO+5*N ), 1 )
+*
+      PGAMMA = GAMMA
+      IT = IT + 1
+      IF( IT.LE.NRP2 )
+     $   GO TO 250
+*
+*     End generalized conjugate gradient iteration
+*
+  350 CONTINUE
+      SFMIN = DLAMCH( 'S' )
+      SFMAX = ONE / SFMIN
+      LSFMIN = INT( LOG10( SFMIN ) / BASL+ONE )
+      LSFMAX = INT( LOG10( SFMAX ) / BASL )
+      DO 360 I = ILO, IHI
+         IRAB = IZAMAX( N-ILO+1, A( I, ILO ), LDA )
+         RAB = ABS( A( I, IRAB+ILO-1 ) )
+         IRAB = IZAMAX( N-ILO+1, B( I, ILO ), LDA )
+         RAB = MAX( RAB, ABS( B( I, IRAB+ILO-1 ) ) )
+         LRAB = INT( LOG10( RAB+SFMIN ) / BASL+ONE )
+         IR = LSCALE( I ) + SIGN( HALF, LSCALE( I ) )
+         IR = MIN( MAX( IR, LSFMIN ), LSFMAX, LSFMAX-LRAB )
+         LSCALE( I ) = SCLFAC**IR
+         ICAB = IZAMAX( IHI, A( 1, I ), 1 )
+         CAB = ABS( A( ICAB, I ) )
+         ICAB = IZAMAX( IHI, B( 1, I ), 1 )
+         CAB = MAX( CAB, ABS( B( ICAB, I ) ) )
+         LCAB = INT( LOG10( CAB+SFMIN ) / BASL+ONE )
+         JC = RSCALE( I ) + SIGN( HALF, RSCALE( I ) )
+         JC = MIN( MAX( JC, LSFMIN ), LSFMAX, LSFMAX-LCAB )
+         RSCALE( I ) = SCLFAC**JC
+  360 CONTINUE
+*
+*     Row scaling of matrices A and B
+*
+      DO 370 I = ILO, IHI
+         CALL ZDSCAL( N-ILO+1, LSCALE( I ), A( I, ILO ), LDA )
+         CALL ZDSCAL( N-ILO+1, LSCALE( I ), B( I, ILO ), LDB )
+  370 CONTINUE
+*
+*     Column scaling of matrices A and B
+*
+      DO 380 J = ILO, IHI
+         CALL ZDSCAL( IHI, RSCALE( J ), A( 1, J ), 1 )
+         CALL ZDSCAL( IHI, RSCALE( J ), B( 1, J ), 1 )
+  380 CONTINUE
+*
+      RETURN
+*
+*     End of ZGGBAL
+*
+      END
diff --git a/costa/native/external/lapack/zgges.f b/costa/native/external/lapack/zgges.f
new file mode 100644
index 000000000..b10ff4231
--- /dev/null
+++ b/costa/native/external/lapack/zgges.f
@@ -0,0 +1,475 @@
+      SUBROUTINE ZGGES( JOBVSL, JOBVSR, SORT, DELCTG, N, A, LDA, B, LDB,
+     $                  SDIM, ALPHA, BETA, VSL, LDVSL, VSR, LDVSR, WORK,
+     $                  LWORK, RWORK, BWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBVSL, JOBVSR, SORT
+      INTEGER            INFO, LDA, LDB, LDVSL, LDVSR, LWORK, N, SDIM
+*     ..
+*     .. Array Arguments ..
+      LOGICAL            BWORK( * )
+      DOUBLE PRECISION   RWORK( * )
+      COMPLEX*16         A( LDA, * ), ALPHA( * ), B( LDB, * ),
+     $                   BETA( * ), VSL( LDVSL, * ), VSR( LDVSR, * ),
+     $                   WORK( * )
+*     ..
+*     .. Function Arguments ..
+      LOGICAL            DELCTG
+      EXTERNAL           DELCTG
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZGGES computes for a pair of N-by-N complex nonsymmetric matrices
+*  (A,B), the generalized eigenvalues, the generalized complex Schur
+*  form (S, T), and optionally left and/or right Schur vectors (VSL
+*  and VSR). This gives the generalized Schur factorization
+*
+*          (A,B) = ( (VSL)*S*(VSR)**H, (VSL)*T*(VSR)**H )
+*
+*  where (VSR)**H is the conjugate-transpose of VSR.
+*
+*  Optionally, it also orders the eigenvalues so that a selected cluster
+*  of eigenvalues appears in the leading diagonal blocks of the upper
+*  triangular matrix S and the upper triangular matrix T. The leading
+*  columns of VSL and VSR then form an unitary basis for the
+*  corresponding left and right eigenspaces (deflating subspaces).
+*
+*  (If only the generalized eigenvalues are needed, use the driver
+*  ZGGEV instead, which is faster.)
+*
+*  A generalized eigenvalue for a pair of matrices (A,B) is a scalar w
+*  or a ratio alpha/beta = w, such that  A - w*B is singular.  It is
+*  usually represented as the pair (alpha,beta), as there is a
+*  reasonable interpretation for beta=0, and even for both being zero.
+*
+*  A pair of matrices (S,T) is in generalized complex Schur form if S
+*  and T are upper triangular and, in addition, the diagonal elements
+*  of T are non-negative real numbers.
+*
+*  Arguments
+*  =========
+*
+*  JOBVSL  (input) CHARACTER*1
+*          = 'N':  do not compute the left Schur vectors;
+*          = 'V':  compute the left Schur vectors.
+*
+*  JOBVSR  (input) CHARACTER*1
+*          = 'N':  do not compute the right Schur vectors;
+*          = 'V':  compute the right Schur vectors.
+*
+*  SORT    (input) CHARACTER*1
+*          Specifies whether or not to order the eigenvalues on the
+*          diagonal of the generalized Schur form.
+*          = 'N':  Eigenvalues are not ordered;
+*          = 'S':  Eigenvalues are ordered (see DELZTG).
+*
+*  DELZTG  (input) LOGICAL FUNCTION of two COMPLEX*16 arguments
+*          DELZTG must be declared EXTERNAL in the calling subroutine.
+*          If SORT = 'N', DELZTG is not referenced.
+*          If SORT = 'S', DELZTG is used to select eigenvalues to sort
+*          to the top left of the Schur form.
+*          An eigenvalue ALPHA(j)/BETA(j) is selected if
+*          DELZTG(ALPHA(j),BETA(j)) is true.
+*
+*          Note that a selected complex eigenvalue may no longer satisfy
+*          DELZTG(ALPHA(j),BETA(j)) = .TRUE. after ordering, since
+*          ordering may change the value of complex eigenvalues
+*          (especially if the eigenvalue is ill-conditioned), in this
+*          case INFO is set to N+2 (See INFO below).
+*
+*  N       (input) INTEGER
+*          The order of the matrices A, B, VSL, and VSR.  N >= 0.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA, N)
+*          On entry, the first of the pair of matrices.
+*          On exit, A has been overwritten by its generalized Schur
+*          form S.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of A.  LDA >= max(1,N).
+*
+*  B       (input/output) COMPLEX*16 array, dimension (LDB, N)
+*          On entry, the second of the pair of matrices.
+*          On exit, B has been overwritten by its generalized Schur
+*          form T.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of B.  LDB >= max(1,N).
+*
+*  SDIM    (output) INTEGER
+*          If SORT = 'N', SDIM = 0.
+*          If SORT = 'S', SDIM = number of eigenvalues (after sorting)
+*          for which DELZTG is true.
+*
+*  ALPHA   (output) COMPLEX*16 array, dimension (N)
+*  BETA    (output) COMPLEX*16 array, dimension (N)
+*          On exit,  ALPHA(j)/BETA(j), j=1,...,N, will be the
+*          generalized eigenvalues.  ALPHA(j), j=1,...,N  and  BETA(j),
+*          j=1,...,N  are the diagonals of the complex Schur form (A,B)
+*          output by ZGGES. The  BETA(j) will be non-negative real.
+*
+*          Note: the quotients ALPHA(j)/BETA(j) may easily over- or
+*          underflow, and BETA(j) may even be zero.  Thus, the user
+*          should avoid naively computing the ratio alpha/beta.
+*          However, ALPHA will be always less than and usually
+*          comparable with norm(A) in magnitude, and BETA always less
+*          than and usually comparable with norm(B).
+*
+*  VSL     (output) COMPLEX*16 array, dimension (LDVSL,N)
+*          If JOBVSL = 'V', VSL will contain the left Schur vectors.
+*          Not referenced if JOBVSL = 'N'.
+*
+*  LDVSL   (input) INTEGER
+*          The leading dimension of the matrix VSL. LDVSL >= 1, and
+*          if JOBVSL = 'V', LDVSL >= N.
+*
+*  VSR     (output) COMPLEX*16 array, dimension (LDVSR,N)
+*          If JOBVSR = 'V', VSR will contain the right Schur vectors.
+*          Not referenced if JOBVSR = 'N'.
+*
+*  LDVSR   (input) INTEGER
+*          The leading dimension of the matrix VSR. LDVSR >= 1, and
+*          if JOBVSR = 'V', LDVSR >= N.
+*
+*  WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.  LWORK >= max(1,2*N).
+*          For good performance, LWORK must generally be larger.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  RWORK   (workspace) DOUBLE PRECISION array, dimension (8*N)
+*
+*  BWORK   (workspace) LOGICAL array, dimension (N)
+*          Not referenced if SORT = 'N'.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          =1,...,N:
+*                The QZ iteration failed.  (A,B) are not in Schur
+*                form, but ALPHA(j) and BETA(j) should be correct for
+*                j=INFO+1,...,N.
+*          > N:  =N+1: other than QZ iteration failed in ZHGEQZ
+*                =N+2: after reordering, roundoff changed values of
+*                      some complex eigenvalues so that leading
+*                      eigenvalues in the Generalized Schur form no
+*                      longer satisfy DELZTG=.TRUE.  This could also
+*                      be caused due to scaling.
+*                =N+3: reordering falied in ZTGSEN.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0 )
+      COMPLEX*16         CZERO, CONE
+      PARAMETER          ( CZERO = ( 0.0D0, 0.0D0 ),
+     $                   CONE = ( 1.0D0, 0.0D0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            CURSL, ILASCL, ILBSCL, ILVSL, ILVSR, LASTSL,
+     $                   LQUERY, WANTST
+      INTEGER            I, ICOLS, IERR, IHI, IJOBVL, IJOBVR, ILEFT,
+     $                   ILO, IRIGHT, IROWS, IRWRK, ITAU, IWRK, LWKMIN,
+     $                   LWKOPT
+      DOUBLE PRECISION   ANRM, ANRMTO, BIGNUM, BNRM, BNRMTO, EPS, PVSL,
+     $                   PVSR, SMLNUM
+*     ..
+*     .. Local Arrays ..
+      INTEGER            IDUM( 1 )
+      DOUBLE PRECISION   DIF( 2 )
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZGEQRF, ZGGBAK, ZGGBAL, ZGGHRD, ZHGEQZ,
+     $                   ZLACPY, ZLASCL, ZLASET, ZTGSEN, ZUNGQR, ZUNMQR
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      DOUBLE PRECISION   DLAMCH, ZLANGE
+      EXTERNAL           LSAME, ILAENV, DLAMCH, ZLANGE
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode the input arguments
+*
+      IF( LSAME( JOBVSL, 'N' ) ) THEN
+         IJOBVL = 1
+         ILVSL = .FALSE.
+      ELSE IF( LSAME( JOBVSL, 'V' ) ) THEN
+         IJOBVL = 2
+         ILVSL = .TRUE.
+      ELSE
+         IJOBVL = -1
+         ILVSL = .FALSE.
+      END IF
+*
+      IF( LSAME( JOBVSR, 'N' ) ) THEN
+         IJOBVR = 1
+         ILVSR = .FALSE.
+      ELSE IF( LSAME( JOBVSR, 'V' ) ) THEN
+         IJOBVR = 2
+         ILVSR = .TRUE.
+      ELSE
+         IJOBVR = -1
+         ILVSR = .FALSE.
+      END IF
+*
+      WANTST = LSAME( SORT, 'S' )
+*
+*     Test the input arguments
+*
+      INFO = 0
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( IJOBVL.LE.0 ) THEN
+         INFO = -1
+      ELSE IF( IJOBVR.LE.0 ) THEN
+         INFO = -2
+      ELSE IF( ( .NOT.WANTST ) .AND. ( .NOT.LSAME( SORT, 'N' ) ) ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -9
+      ELSE IF( LDVSL.LT.1 .OR. ( ILVSL .AND. LDVSL.LT.N ) ) THEN
+         INFO = -14
+      ELSE IF( LDVSR.LT.1 .OR. ( ILVSR .AND. LDVSR.LT.N ) ) THEN
+         INFO = -16
+      END IF
+*
+*     Compute workspace
+*      (Note: Comments in the code beginning "Workspace:" describe the
+*       minimal amount of workspace needed at that point in the code,
+*       as well as the preferred amount for good performance.
+*       NB refers to the optimal block size for the immediately
+*       following subroutine, as returned by ILAENV.)
+*
+      LWKMIN = 1
+      IF( INFO.EQ.0 .AND. ( LWORK.GE.1 .OR. LQUERY ) ) THEN
+         LWKMIN = MAX( 1, 2*N )
+         LWKOPT = N + N*ILAENV( 1, 'ZGEQRF', ' ', N, 1, N, 0 )
+         IF( ILVSL ) THEN
+            LWKOPT = MAX( LWKOPT, N+N*ILAENV( 1, 'ZUNGQR', ' ', N, 1, N,
+     $               -1 ) )
+         END IF
+         WORK( 1 ) = LWKOPT
+      END IF
+*
+      IF( LWORK.LT.LWKMIN .AND. .NOT.LQUERY )
+     $   INFO = -18
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZGGES ', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      WORK( 1 ) = LWKOPT
+      IF( N.EQ.0 ) THEN
+         SDIM = 0
+         RETURN
+      END IF
+*
+*     Get machine constants
+*
+      EPS = DLAMCH( 'P' )
+      SMLNUM = DLAMCH( 'S' )
+      BIGNUM = ONE / SMLNUM
+      CALL DLABAD( SMLNUM, BIGNUM )
+      SMLNUM = SQRT( SMLNUM ) / EPS
+      BIGNUM = ONE / SMLNUM
+*
+*     Scale A if max element outside range [SMLNUM,BIGNUM]
+*
+      ANRM = ZLANGE( 'M', N, N, A, LDA, RWORK )
+      ILASCL = .FALSE.
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
+         ANRMTO = SMLNUM
+         ILASCL = .TRUE.
+      ELSE IF( ANRM.GT.BIGNUM ) THEN
+         ANRMTO = BIGNUM
+         ILASCL = .TRUE.
+      END IF
+*
+      IF( ILASCL )
+     $   CALL ZLASCL( 'G', 0, 0, ANRM, ANRMTO, N, N, A, LDA, IERR )
+*
+*     Scale B if max element outside range [SMLNUM,BIGNUM]
+*
+      BNRM = ZLANGE( 'M', N, N, B, LDB, RWORK )
+      ILBSCL = .FALSE.
+      IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN
+         BNRMTO = SMLNUM
+         ILBSCL = .TRUE.
+      ELSE IF( BNRM.GT.BIGNUM ) THEN
+         BNRMTO = BIGNUM
+         ILBSCL = .TRUE.
+      END IF
+*
+      IF( ILBSCL )
+     $   CALL ZLASCL( 'G', 0, 0, BNRM, BNRMTO, N, N, B, LDB, IERR )
+*
+*     Permute the matrix to make it more nearly triangular
+*     (Real Workspace: need 6*N)
+*
+      ILEFT = 1
+      IRIGHT = N + 1
+      IRWRK = IRIGHT + N
+      CALL ZGGBAL( 'P', N, A, LDA, B, LDB, ILO, IHI, RWORK( ILEFT ),
+     $             RWORK( IRIGHT ), RWORK( IRWRK ), IERR )
+*
+*     Reduce B to triangular form (QR decomposition of B)
+*     (Complex Workspace: need N, prefer N*NB)
+*
+      IROWS = IHI + 1 - ILO
+      ICOLS = N + 1 - ILO
+      ITAU = 1
+      IWRK = ITAU + IROWS
+      CALL ZGEQRF( IROWS, ICOLS, B( ILO, ILO ), LDB, WORK( ITAU ),
+     $             WORK( IWRK ), LWORK+1-IWRK, IERR )
+*
+*     Apply the orthogonal transformation to matrix A
+*     (Complex Workspace: need N, prefer N*NB)
+*
+      CALL ZUNMQR( 'L', 'C', IROWS, ICOLS, IROWS, B( ILO, ILO ), LDB,
+     $             WORK( ITAU ), A( ILO, ILO ), LDA, WORK( IWRK ),
+     $             LWORK+1-IWRK, IERR )
+*
+*     Initialize VSL
+*     (Complex Workspace: need N, prefer N*NB)
+*
+      IF( ILVSL ) THEN
+         CALL ZLASET( 'Full', N, N, CZERO, CONE, VSL, LDVSL )
+         CALL ZLACPY( 'L', IROWS-1, IROWS-1, B( ILO+1, ILO ), LDB,
+     $                VSL( ILO+1, ILO ), LDVSL )
+         CALL ZUNGQR( IROWS, IROWS, IROWS, VSL( ILO, ILO ), LDVSL,
+     $                WORK( ITAU ), WORK( IWRK ), LWORK+1-IWRK, IERR )
+      END IF
+*
+*     Initialize VSR
+*
+      IF( ILVSR )
+     $   CALL ZLASET( 'Full', N, N, CZERO, CONE, VSR, LDVSR )
+*
+*     Reduce to generalized Hessenberg form
+*     (Workspace: none needed)
+*
+      CALL ZGGHRD( JOBVSL, JOBVSR, N, ILO, IHI, A, LDA, B, LDB, VSL,
+     $             LDVSL, VSR, LDVSR, IERR )
+*
+      SDIM = 0
+*
+*     Perform QZ algorithm, computing Schur vectors if desired
+*     (Complex Workspace: need N)
+*     (Real Workspace: need N)
+*
+      IWRK = ITAU
+      CALL ZHGEQZ( 'S', JOBVSL, JOBVSR, N, ILO, IHI, A, LDA, B, LDB,
+     $             ALPHA, BETA, VSL, LDVSL, VSR, LDVSR, WORK( IWRK ),
+     $             LWORK+1-IWRK, RWORK( IRWRK ), IERR )
+      IF( IERR.NE.0 ) THEN
+         IF( IERR.GT.0 .AND. IERR.LE.N ) THEN
+            INFO = IERR
+         ELSE IF( IERR.GT.N .AND. IERR.LE.2*N ) THEN
+            INFO = IERR - N
+         ELSE
+            INFO = N + 1
+         END IF
+         GO TO 30
+      END IF
+*
+*     Sort eigenvalues ALPHA/BETA if desired
+*     (Workspace: none needed)
+*
+      IF( WANTST ) THEN
+*
+*        Undo scaling on eigenvalues before selecting
+*
+         IF( ILASCL )
+     $      CALL ZLASCL( 'G', 0, 0, ANRM, ANRMTO, N, 1, ALPHA, N, IERR )
+         IF( ILBSCL )
+     $      CALL ZLASCL( 'G', 0, 0, BNRM, BNRMTO, N, 1, BETA, N, IERR )
+*
+*        Select eigenvalues
+*
+         DO 10 I = 1, N
+            BWORK( I ) = DELCTG( ALPHA( I ), BETA( I ) )
+   10    CONTINUE
+*
+         CALL ZTGSEN( 0, ILVSL, ILVSR, BWORK, N, A, LDA, B, LDB, ALPHA,
+     $                BETA, VSL, LDVSL, VSR, LDVSR, SDIM, PVSL, PVSR,
+     $                DIF, WORK( IWRK ), LWORK-IWRK+1, IDUM, 1, IERR )
+         IF( IERR.EQ.1 )
+     $      INFO = N + 3
+*
+      END IF
+*
+*     Apply back-permutation to VSL and VSR
+*     (Workspace: none needed)
+*
+      IF( ILVSL )
+     $   CALL ZGGBAK( 'P', 'L', N, ILO, IHI, RWORK( ILEFT ),
+     $                RWORK( IRIGHT ), N, VSL, LDVSL, IERR )
+      IF( ILVSR )
+     $   CALL ZGGBAK( 'P', 'R', N, ILO, IHI, RWORK( ILEFT ),
+     $                RWORK( IRIGHT ), N, VSR, LDVSR, IERR )
+*
+*     Undo scaling
+*
+      IF( ILASCL ) THEN
+         CALL ZLASCL( 'U', 0, 0, ANRMTO, ANRM, N, N, A, LDA, IERR )
+         CALL ZLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHA, N, IERR )
+      END IF
+*
+      IF( ILBSCL ) THEN
+         CALL ZLASCL( 'U', 0, 0, BNRMTO, BNRM, N, N, B, LDB, IERR )
+         CALL ZLASCL( 'G', 0, 0, BNRMTO, BNRM, N, 1, BETA, N, IERR )
+      END IF
+*
+      IF( WANTST ) THEN
+*
+*        Check if reordering is correct
+*
+         LASTSL = .TRUE.
+         SDIM = 0
+         DO 20 I = 1, N
+            CURSL = DELCTG( ALPHA( I ), BETA( I ) )
+            IF( CURSL )
+     $         SDIM = SDIM + 1
+            IF( CURSL .AND. .NOT.LASTSL )
+     $         INFO = N + 2
+            LASTSL = CURSL
+   20    CONTINUE
+*
+      END IF
+*
+   30 CONTINUE
+*
+      WORK( 1 ) = LWKOPT
+*
+      RETURN
+*
+*     End of ZGGES
+*
+      END
diff --git a/costa/native/external/lapack/zggesx.f b/costa/native/external/lapack/zggesx.f
new file mode 100644
index 000000000..89c22433d
--- /dev/null
+++ b/costa/native/external/lapack/zggesx.f
@@ -0,0 +1,544 @@
+      SUBROUTINE ZGGESX( JOBVSL, JOBVSR, SORT, DELCTG, SENSE, N, A, LDA,
+     $                   B, LDB, SDIM, ALPHA, BETA, VSL, LDVSL, VSR,
+     $                   LDVSR, RCONDE, RCONDV, WORK, LWORK, RWORK,
+     $                   IWORK, LIWORK, BWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBVSL, JOBVSR, SENSE, SORT
+      INTEGER            INFO, LDA, LDB, LDVSL, LDVSR, LIWORK, LWORK, N,
+     $                   SDIM
+*     ..
+*     .. Array Arguments ..
+      LOGICAL            BWORK( * )
+      INTEGER            IWORK( * )
+      DOUBLE PRECISION   RCONDE( 2 ), RCONDV( 2 ), RWORK( * )
+      COMPLEX*16         A( LDA, * ), ALPHA( * ), B( LDB, * ),
+     $                   BETA( * ), VSL( LDVSL, * ), VSR( LDVSR, * ),
+     $                   WORK( * )
+*     ..
+*     .. Function Arguments ..
+      LOGICAL            DELCTG
+      EXTERNAL           DELCTG
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZGGESX computes for a pair of N-by-N complex nonsymmetric matrices
+*  (A,B), the generalized eigenvalues, the complex Schur form (S,T),
+*  and, optionally, the left and/or right matrices of Schur vectors (VSL
+*  and VSR).  This gives the generalized Schur factorization
+*
+*       (A,B) = ( (VSL) S (VSR)**H, (VSL) T (VSR)**H )
+*
+*  where (VSR)**H is the conjugate-transpose of VSR.
+*
+*  Optionally, it also orders the eigenvalues so that a selected cluster
+*  of eigenvalues appears in the leading diagonal blocks of the upper
+*  triangular matrix S and the upper triangular matrix T; computes
+*  a reciprocal condition number for the average of the selected
+*  eigenvalues (RCONDE); and computes a reciprocal condition number for
+*  the right and left deflating subspaces corresponding to the selected
+*  eigenvalues (RCONDV). The leading columns of VSL and VSR then form
+*  an orthonormal basis for the corresponding left and right eigenspaces
+*  (deflating subspaces).
+*
+*  A generalized eigenvalue for a pair of matrices (A,B) is a scalar w
+*  or a ratio alpha/beta = w, such that  A - w*B is singular.  It is
+*  usually represented as the pair (alpha,beta), as there is a
+*  reasonable interpretation for beta=0 or for both being zero.
+*
+*  A pair of matrices (S,T) is in generalized complex Schur form if T is
+*  upper triangular with non-negative diagonal and S is upper
+*  triangular.
+*
+*  Arguments
+*  =========
+*
+*  JOBVSL  (input) CHARACTER*1
+*          = 'N':  do not compute the left Schur vectors;
+*          = 'V':  compute the left Schur vectors.
+*
+*  JOBVSR  (input) CHARACTER*1
+*          = 'N':  do not compute the right Schur vectors;
+*          = 'V':  compute the right Schur vectors.
+*
+*  SORT    (input) CHARACTER*1
+*          Specifies whether or not to order the eigenvalues on the
+*          diagonal of the generalized Schur form.
+*          = 'N':  Eigenvalues are not ordered;
+*          = 'S':  Eigenvalues are ordered (see DELZTG).
+*
+*  DELZTG  (input) LOGICAL FUNCTION of two COMPLEX*16 arguments
+*          DELZTG must be declared EXTERNAL in the calling subroutine.
+*          If SORT = 'N', DELZTG is not referenced.
+*          If SORT = 'S', DELZTG is used to select eigenvalues to sort
+*          to the top left of the Schur form.
+*          Note that a selected complex eigenvalue may no longer satisfy
+*          DELZTG(ALPHA(j),BETA(j)) = .TRUE. after ordering, since
+*          ordering may change the value of complex eigenvalues
+*          (especially if the eigenvalue is ill-conditioned), in this
+*          case INFO is set to N+3 see INFO below).
+*
+*  SENSE   (input) CHARACTER
+*          Determines which reciprocal condition numbers are computed.
+*          = 'N' : None are computed;
+*          = 'E' : Computed for average of selected eigenvalues only;
+*          = 'V' : Computed for selected deflating subspaces only;
+*          = 'B' : Computed for both.
+*          If SENSE = 'E', 'V', or 'B', SORT must equal 'S'.
+*
+*  N       (input) INTEGER
+*          The order of the matrices A, B, VSL, and VSR.  N >= 0.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA, N)
+*          On entry, the first of the pair of matrices.
+*          On exit, A has been overwritten by its generalized Schur
+*          form S.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of A.  LDA >= max(1,N).
+*
+*  B       (input/output) COMPLEX*16 array, dimension (LDB, N)
+*          On entry, the second of the pair of matrices.
+*          On exit, B has been overwritten by its generalized Schur
+*          form T.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of B.  LDB >= max(1,N).
+*
+*  SDIM    (output) INTEGER
+*          If SORT = 'N', SDIM = 0.
+*          If SORT = 'S', SDIM = number of eigenvalues (after sorting)
+*          for which DELZTG is true.
+*
+*  ALPHA   (output) COMPLEX*16 array, dimension (N)
+*  BETA    (output) COMPLEX*16 array, dimension (N)
+*          On exit, ALPHA(j)/BETA(j), j=1,...,N, will be the
+*          generalized eigenvalues.  ALPHA(j) and BETA(j),j=1,...,N  are
+*          the diagonals of the complex Schur form (S,T).  BETA(j) will
+*          be non-negative real.
+*
+*          Note: the quotients ALPHA(j)/BETA(j) may easily over- or
+*          underflow, and BETA(j) may even be zero.  Thus, the user
+*          should avoid naively computing the ratio alpha/beta.
+*          However, ALPHA will be always less than and usually
+*          comparable with norm(A) in magnitude, and BETA always less
+*          than and usually comparable with norm(B).
+*
+*  VSL     (output) COMPLEX*16 array, dimension (LDVSL,N)
+*          If JOBVSL = 'V', VSL will contain the left Schur vectors.
+*          Not referenced if JOBVSL = 'N'.
+*
+*  LDVSL   (input) INTEGER
+*          The leading dimension of the matrix VSL. LDVSL >=1, and
+*          if JOBVSL = 'V', LDVSL >= N.
+*
+*  VSR     (output) COMPLEX*16 array, dimension (LDVSR,N)
+*          If JOBVSR = 'V', VSR will contain the right Schur vectors.
+*          Not referenced if JOBVSR = 'N'.
+*
+*  LDVSR   (input) INTEGER
+*          The leading dimension of the matrix VSR. LDVSR >= 1, and
+*          if JOBVSR = 'V', LDVSR >= N.
+*
+*  RCONDE  (output) DOUBLE PRECISION array, dimension ( 2 )
+*          If SENSE = 'E' or 'B', RCONDE(1) and RCONDE(2) contain the
+*          reciprocal condition numbers for the average of the selected
+*          eigenvalues.
+*          Not referenced if SENSE = 'N' or 'V'.
+*
+*  RCONDV  (output) DOUBLE PRECISION array, dimension ( 2 )
+*          If SENSE = 'V' or 'B', RCONDV(1) and RCONDV(2) contain the
+*          reciprocal condition number for the selected deflating
+*          subspaces.
+*          Not referenced if SENSE = 'N' or 'E'.
+*
+*  WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.  LWORK >= 2*N.
+*          If SENSE = 'E', 'V', or 'B',
+*          LWORK >= MAX(2*N, 2*SDIM*(N-SDIM)).
+*
+*  RWORK   (workspace) DOUBLE PRECISION array, dimension ( 8*N )
+*          Real workspace.
+*
+*  IWORK   (workspace/output) INTEGER array, dimension (LIWORK)
+*          Not referenced if SENSE = 'N'.
+*          On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK.
+*
+*  LIWORK  (input) INTEGER
+*          The dimension of the array WORK. LIWORK >= N+2.
+*
+*  BWORK   (workspace) LOGICAL array, dimension (N)
+*          Not referenced if SORT = 'N'.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          = 1,...,N:
+*                The QZ iteration failed.  (A,B) are not in Schur
+*                form, but ALPHA(j) and BETA(j) should be correct for
+*                j=INFO+1,...,N.
+*          > N:  =N+1: other than QZ iteration failed in ZHGEQZ
+*                =N+2: after reordering, roundoff changed values of
+*                      some complex eigenvalues so that leading
+*                      eigenvalues in the Generalized Schur form no
+*                      longer satisfy DELZTG=.TRUE.  This could also
+*                      be caused due to scaling.
+*                =N+3: reordering failed in ZTGSEN.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+      COMPLEX*16         CZERO, CONE
+      PARAMETER          ( CZERO = ( 0.0D+0, 0.0D+0 ),
+     $                   CONE = ( 1.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            CURSL, ILASCL, ILBSCL, ILVSL, ILVSR, LASTSL,
+     $                   WANTSB, WANTSE, WANTSN, WANTST, WANTSV
+      INTEGER            I, ICOLS, IERR, IHI, IJOB, IJOBVL, IJOBVR,
+     $                   ILEFT, ILO, IRIGHT, IROWS, IRWRK, ITAU, IWRK,
+     $                   LIWMIN, MAXWRK, MINWRK
+      DOUBLE PRECISION   ANRM, ANRMTO, BIGNUM, BNRM, BNRMTO, EPS, PL,
+     $                   PR, SMLNUM
+*     ..
+*     .. Local Arrays ..
+      DOUBLE PRECISION   DIF( 2 )
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZGEQRF, ZGGBAK, ZGGBAL, ZGGHRD, ZHGEQZ,
+     $                   ZLACPY, ZLASCL, ZLASET, ZTGSEN, ZUNGQR, ZUNMQR
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      DOUBLE PRECISION   DLAMCH, ZLANGE
+      EXTERNAL           LSAME, ILAENV, DLAMCH, ZLANGE
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode the input arguments
+*
+      IF( LSAME( JOBVSL, 'N' ) ) THEN
+         IJOBVL = 1
+         ILVSL = .FALSE.
+      ELSE IF( LSAME( JOBVSL, 'V' ) ) THEN
+         IJOBVL = 2
+         ILVSL = .TRUE.
+      ELSE
+         IJOBVL = -1
+         ILVSL = .FALSE.
+      END IF
+*
+      IF( LSAME( JOBVSR, 'N' ) ) THEN
+         IJOBVR = 1
+         ILVSR = .FALSE.
+      ELSE IF( LSAME( JOBVSR, 'V' ) ) THEN
+         IJOBVR = 2
+         ILVSR = .TRUE.
+      ELSE
+         IJOBVR = -1
+         ILVSR = .FALSE.
+      END IF
+*
+      WANTST = LSAME( SORT, 'S' )
+      WANTSN = LSAME( SENSE, 'N' )
+      WANTSE = LSAME( SENSE, 'E' )
+      WANTSV = LSAME( SENSE, 'V' )
+      WANTSB = LSAME( SENSE, 'B' )
+      IF( WANTSN ) THEN
+         IJOB = 0
+         IWORK( 1 ) = 1
+      ELSE IF( WANTSE ) THEN
+         IJOB = 1
+      ELSE IF( WANTSV ) THEN
+         IJOB = 2
+      ELSE IF( WANTSB ) THEN
+         IJOB = 4
+      END IF
+*
+*     Test the input arguments
+*
+      INFO = 0
+      IF( IJOBVL.LE.0 ) THEN
+         INFO = -1
+      ELSE IF( IJOBVR.LE.0 ) THEN
+         INFO = -2
+      ELSE IF( ( .NOT.WANTST ) .AND. ( .NOT.LSAME( SORT, 'N' ) ) ) THEN
+         INFO = -3
+      ELSE IF( .NOT.( WANTSN .OR. WANTSE .OR. WANTSV .OR. WANTSB ) .OR.
+     $         ( .NOT.WANTST .AND. .NOT.WANTSN ) ) THEN
+         INFO = -5
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -6
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -8
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -10
+      ELSE IF( LDVSL.LT.1 .OR. ( ILVSL .AND. LDVSL.LT.N ) ) THEN
+         INFO = -15
+      ELSE IF( LDVSR.LT.1 .OR. ( ILVSR .AND. LDVSR.LT.N ) ) THEN
+         INFO = -17
+      END IF
+*
+*     Compute workspace
+*      (Note: Comments in the code beginning "Workspace:" describe the
+*       minimal amount of workspace needed at that point in the code,
+*       as well as the preferred amount for good performance.
+*       NB refers to the optimal block size for the immediately
+*       following subroutine, as returned by ILAENV.)
+*
+      MINWRK = 1
+      IF( INFO.EQ.0 .AND. LWORK.GE.1 ) THEN
+         MINWRK = MAX( 1, 2*N )
+         MAXWRK = N + N*ILAENV( 1, 'ZGEQRF', ' ', N, 1, N, 0 )
+         IF( ILVSL ) THEN
+            MAXWRK = MAX( MAXWRK, N+N*ILAENV( 1, 'ZUNGQR', ' ', N, 1, N,
+     $               -1 ) )
+         END IF
+         WORK( 1 ) = MAXWRK
+      END IF
+      IF( .NOT.WANTSN ) THEN
+         LIWMIN = N + 2
+      ELSE
+         LIWMIN = 1
+      END IF
+      IWORK( 1 ) = LIWMIN
+*
+      IF( INFO.EQ.0 .AND. LWORK.LT.MINWRK ) THEN
+         INFO = -21
+      ELSE IF( INFO.EQ.0 .AND. IJOB.GE.1 ) THEN
+         IF( LIWORK.LT.LIWMIN )
+     $      INFO = -24
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZGGESX', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 ) THEN
+         SDIM = 0
+         RETURN
+      END IF
+*
+*     Get machine constants
+*
+      EPS = DLAMCH( 'P' )
+      SMLNUM = DLAMCH( 'S' )
+      BIGNUM = ONE / SMLNUM
+      CALL DLABAD( SMLNUM, BIGNUM )
+      SMLNUM = SQRT( SMLNUM ) / EPS
+      BIGNUM = ONE / SMLNUM
+*
+*     Scale A if max element outside range [SMLNUM,BIGNUM]
+*
+      ANRM = ZLANGE( 'M', N, N, A, LDA, RWORK )
+      ILASCL = .FALSE.
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
+         ANRMTO = SMLNUM
+         ILASCL = .TRUE.
+      ELSE IF( ANRM.GT.BIGNUM ) THEN
+         ANRMTO = BIGNUM
+         ILASCL = .TRUE.
+      END IF
+      IF( ILASCL )
+     $   CALL ZLASCL( 'G', 0, 0, ANRM, ANRMTO, N, N, A, LDA, IERR )
+*
+*     Scale B if max element outside range [SMLNUM,BIGNUM]
+*
+      BNRM = ZLANGE( 'M', N, N, B, LDB, RWORK )
+      ILBSCL = .FALSE.
+      IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN
+         BNRMTO = SMLNUM
+         ILBSCL = .TRUE.
+      ELSE IF( BNRM.GT.BIGNUM ) THEN
+         BNRMTO = BIGNUM
+         ILBSCL = .TRUE.
+      END IF
+      IF( ILBSCL )
+     $   CALL ZLASCL( 'G', 0, 0, BNRM, BNRMTO, N, N, B, LDB, IERR )
+*
+*     Permute the matrix to make it more nearly triangular
+*     (Real Workspace: need 6*N)
+*
+      ILEFT = 1
+      IRIGHT = N + 1
+      IRWRK = IRIGHT + N
+      CALL ZGGBAL( 'P', N, A, LDA, B, LDB, ILO, IHI, RWORK( ILEFT ),
+     $             RWORK( IRIGHT ), RWORK( IRWRK ), IERR )
+*
+*     Reduce B to triangular form (QR decomposition of B)
+*     (Complex Workspace: need N, prefer N*NB)
+*
+      IROWS = IHI + 1 - ILO
+      ICOLS = N + 1 - ILO
+      ITAU = 1
+      IWRK = ITAU + IROWS
+      CALL ZGEQRF( IROWS, ICOLS, B( ILO, ILO ), LDB, WORK( ITAU ),
+     $             WORK( IWRK ), LWORK+1-IWRK, IERR )
+*
+*     Apply the unitary transformation to matrix A
+*     (Complex Workspace: need N, prefer N*NB)
+*
+      CALL ZUNMQR( 'L', 'C', IROWS, ICOLS, IROWS, B( ILO, ILO ), LDB,
+     $             WORK( ITAU ), A( ILO, ILO ), LDA, WORK( IWRK ),
+     $             LWORK+1-IWRK, IERR )
+*
+*     Initialize VSL
+*     (Complex Workspace: need N, prefer N*NB)
+*
+      IF( ILVSL ) THEN
+         CALL ZLASET( 'Full', N, N, CZERO, CONE, VSL, LDVSL )
+         CALL ZLACPY( 'L', IROWS-1, IROWS-1, B( ILO+1, ILO ), LDB,
+     $                VSL( ILO+1, ILO ), LDVSL )
+         CALL ZUNGQR( IROWS, IROWS, IROWS, VSL( ILO, ILO ), LDVSL,
+     $                WORK( ITAU ), WORK( IWRK ), LWORK+1-IWRK, IERR )
+      END IF
+*
+*     Initialize VSR
+*
+      IF( ILVSR )
+     $   CALL ZLASET( 'Full', N, N, CZERO, CONE, VSR, LDVSR )
+*
+*     Reduce to generalized Hessenberg form
+*     (Workspace: none needed)
+*
+      CALL ZGGHRD( JOBVSL, JOBVSR, N, ILO, IHI, A, LDA, B, LDB, VSL,
+     $             LDVSL, VSR, LDVSR, IERR )
+*
+      SDIM = 0
+*
+*     Perform QZ algorithm, computing Schur vectors if desired
+*     (Complex Workspace: need N)
+*     (Real Workspace:    need N)
+*
+      IWRK = ITAU
+      CALL ZHGEQZ( 'S', JOBVSL, JOBVSR, N, ILO, IHI, A, LDA, B, LDB,
+     $             ALPHA, BETA, VSL, LDVSL, VSR, LDVSR, WORK( IWRK ),
+     $             LWORK+1-IWRK, RWORK( IRWRK ), IERR )
+      IF( IERR.NE.0 ) THEN
+         IF( IERR.GT.0 .AND. IERR.LE.N ) THEN
+            INFO = IERR
+         ELSE IF( IERR.GT.N .AND. IERR.LE.2*N ) THEN
+            INFO = IERR - N
+         ELSE
+            INFO = N + 1
+         END IF
+         GO TO 40
+      END IF
+*
+*     Sort eigenvalues ALPHA/BETA and compute the reciprocal of
+*     condition number(s)
+*
+      IF( WANTST ) THEN
+*
+*        Undo scaling on eigenvalues before DELZTGing
+*
+         IF( ILASCL )
+     $      CALL ZLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHA, N, IERR )
+         IF( ILBSCL )
+     $      CALL ZLASCL( 'G', 0, 0, BNRMTO, BNRM, N, 1, BETA, N, IERR )
+*
+*        Select eigenvalues
+*
+         DO 10 I = 1, N
+            BWORK( I ) = DELCTG( ALPHA( I ), BETA( I ) )
+   10    CONTINUE
+*
+*        Reorder eigenvalues, transform Generalized Schur vectors, and
+*        compute reciprocal condition numbers
+*        (Complex Workspace: If IJOB >= 1, need MAX(1, 2*SDIM*(N-SDIM))
+*                            otherwise, need 1 )
+*
+         CALL ZTGSEN( IJOB, ILVSL, ILVSR, BWORK, N, A, LDA, B, LDB,
+     $                ALPHA, BETA, VSL, LDVSL, VSR, LDVSR, SDIM, PL, PR,
+     $                DIF, WORK( IWRK ), LWORK-IWRK+1, IWORK, LIWORK,
+     $                IERR )
+*
+         IF( IJOB.GE.1 )
+     $      MAXWRK = MAX( MAXWRK, 2*SDIM*( N-SDIM ) )
+         IF( IERR.EQ.-21 ) THEN
+*
+*            not enough complex workspace
+*
+            INFO = -21
+         ELSE
+            RCONDE( 1 ) = PL
+            RCONDE( 2 ) = PL
+            RCONDV( 1 ) = DIF( 1 )
+            RCONDV( 2 ) = DIF( 2 )
+            IF( IERR.EQ.1 )
+     $         INFO = N + 3
+         END IF
+*
+      END IF
+*
+*     Apply permutation to VSL and VSR
+*     (Workspace: none needed)
+*
+      IF( ILVSL )
+     $   CALL ZGGBAK( 'P', 'L', N, ILO, IHI, RWORK( ILEFT ),
+     $                RWORK( IRIGHT ), N, VSL, LDVSL, IERR )
+*
+      IF( ILVSR )
+     $   CALL ZGGBAK( 'P', 'R', N, ILO, IHI, RWORK( ILEFT ),
+     $                RWORK( IRIGHT ), N, VSR, LDVSR, IERR )
+*
+*     Undo scaling
+*
+      IF( ILASCL ) THEN
+         CALL ZLASCL( 'U', 0, 0, ANRMTO, ANRM, N, N, A, LDA, IERR )
+         CALL ZLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHA, N, IERR )
+      END IF
+*
+      IF( ILBSCL ) THEN
+         CALL ZLASCL( 'U', 0, 0, BNRMTO, BNRM, N, N, B, LDB, IERR )
+         CALL ZLASCL( 'G', 0, 0, BNRMTO, BNRM, N, 1, BETA, N, IERR )
+      END IF
+*
+   20 CONTINUE
+*
+      IF( WANTST ) THEN
+*
+*        Check if reordering is correct
+*
+         LASTSL = .TRUE.
+         SDIM = 0
+         DO 30 I = 1, N
+            CURSL = DELCTG( ALPHA( I ), BETA( I ) )
+            IF( CURSL )
+     $         SDIM = SDIM + 1
+            IF( CURSL .AND. .NOT.LASTSL )
+     $         INFO = N + 2
+            LASTSL = CURSL
+   30    CONTINUE
+*
+      END IF
+*
+   40 CONTINUE
+*
+      WORK( 1 ) = MAXWRK
+      IWORK( 1 ) = LIWMIN
+*
+      RETURN
+*
+*     End of ZGGESX
+*
+      END
diff --git a/costa/native/external/lapack/zggev.f b/costa/native/external/lapack/zggev.f
new file mode 100644
index 000000000..33228cb99
--- /dev/null
+++ b/costa/native/external/lapack/zggev.f
@@ -0,0 +1,448 @@
+      SUBROUTINE ZGGEV( JOBVL, JOBVR, N, A, LDA, B, LDB, ALPHA, BETA,
+     $                  VL, LDVL, VR, LDVR, WORK, LWORK, RWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBVL, JOBVR
+      INTEGER            INFO, LDA, LDB, LDVL, LDVR, LWORK, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   RWORK( * )
+      COMPLEX*16         A( LDA, * ), ALPHA( * ), B( LDB, * ),
+     $                   BETA( * ), VL( LDVL, * ), VR( LDVR, * ),
+     $                   WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZGGEV computes for a pair of N-by-N complex nonsymmetric matrices
+*  (A,B), the generalized eigenvalues, and optionally, the left and/or
+*  right generalized eigenvectors.
+*
+*  A generalized eigenvalue for a pair of matrices (A,B) is a scalar
+*  lambda or a ratio alpha/beta = lambda, such that A - lambda*B is
+*  singular. It is usually represented as the pair (alpha,beta), as
+*  there is a reasonable interpretation for beta=0, and even for both
+*  being zero.
+*
+*  The right generalized eigenvector v(j) corresponding to the
+*  generalized eigenvalue lambda(j) of (A,B) satisfies
+*
+*               A * v(j) = lambda(j) * B * v(j).
+*
+*  The left generalized eigenvector u(j) corresponding to the
+*  generalized eigenvalues lambda(j) of (A,B) satisfies
+*
+*               u(j)**H * A = lambda(j) * u(j)**H * B
+*
+*  where u(j)**H is the conjugate-transpose of u(j).
+*
+*  Arguments
+*  =========
+*
+*  JOBVL   (input) CHARACTER*1
+*          = 'N':  do not compute the left generalized eigenvectors;
+*          = 'V':  compute the left generalized eigenvectors.
+*
+*  JOBVR   (input) CHARACTER*1
+*          = 'N':  do not compute the right generalized eigenvectors;
+*          = 'V':  compute the right generalized eigenvectors.
+*
+*  N       (input) INTEGER
+*          The order of the matrices A, B, VL, and VR.  N >= 0.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA, N)
+*          On entry, the matrix A in the pair (A,B).
+*          On exit, A has been overwritten.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of A.  LDA >= max(1,N).
+*
+*  B       (input/output) COMPLEX*16 array, dimension (LDB, N)
+*          On entry, the matrix B in the pair (A,B).
+*          On exit, B has been overwritten.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of B.  LDB >= max(1,N).
+*
+*  ALPHA   (output) COMPLEX*16 array, dimension (N)
+*  BETA    (output) COMPLEX*16 array, dimension (N)
+*          On exit, ALPHA(j)/BETA(j), j=1,...,N, will be the
+*          generalized eigenvalues.
+*
+*          Note: the quotients ALPHA(j)/BETA(j) may easily over- or
+*          underflow, and BETA(j) may even be zero.  Thus, the user
+*          should avoid naively computing the ratio alpha/beta.
+*          However, ALPHA will be always less than and usually
+*          comparable with norm(A) in magnitude, and BETA always less
+*          than and usually comparable with norm(B).
+*
+*  VL      (output) COMPLEX*16 array, dimension (LDVL,N)
+*          If JOBVL = 'V', the left generalized eigenvectors u(j) are
+*          stored one after another in the columns of VL, in the same
+*          order as their eigenvalues.
+*          Each eigenvector will be scaled so the largest component
+*          will have abs(real part) + abs(imag. part) = 1.
+*          Not referenced if JOBVL = 'N'.
+*
+*  LDVL    (input) INTEGER
+*          The leading dimension of the matrix VL. LDVL >= 1, and
+*          if JOBVL = 'V', LDVL >= N.
+*
+*  VR      (output) COMPLEX*16 array, dimension (LDVR,N)
+*          If JOBVR = 'V', the right generalized eigenvectors v(j) are
+*          stored one after another in the columns of VR, in the same
+*          order as their eigenvalues.
+*          Each eigenvector will be scaled so the largest component
+*          will have abs(real part) + abs(imag. part) = 1.
+*          Not referenced if JOBVR = 'N'.
+*
+*  LDVR    (input) INTEGER
+*          The leading dimension of the matrix VR. LDVR >= 1, and
+*          if JOBVR = 'V', LDVR >= N.
+*
+*  WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.  LWORK >= max(1,2*N).
+*          For good performance, LWORK must generally be larger.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  RWORK   (workspace/output) DOUBLE PRECISION array, dimension (8*N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          =1,...,N:
+*                The QZ iteration failed.  No eigenvectors have been
+*                calculated, but ALPHA(j) and BETA(j) should be
+*                correct for j=INFO+1,...,N.
+*          > N:  =N+1: other then QZ iteration failed in DHGEQZ,
+*                =N+2: error return from DTGEVC.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0 )
+      COMPLEX*16         CZERO, CONE
+      PARAMETER          ( CZERO = ( 0.0D0, 0.0D0 ),
+     $                   CONE = ( 1.0D0, 0.0D0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            ILASCL, ILBSCL, ILV, ILVL, ILVR, LQUERY
+      CHARACTER          CHTEMP
+      INTEGER            ICOLS, IERR, IHI, IJOBVL, IJOBVR, ILEFT, ILO,
+     $                   IN, IRIGHT, IROWS, IRWRK, ITAU, IWRK, JC, JR,
+     $                   LWKMIN, LWKOPT
+      DOUBLE PRECISION   ANRM, ANRMTO, BIGNUM, BNRM, BNRMTO, EPS,
+     $                   SMLNUM, TEMP
+      COMPLEX*16         X
+*     ..
+*     .. Local Arrays ..
+      LOGICAL            LDUMMA( 1 )
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZGEQRF, ZGGBAK, ZGGBAL, ZGGHRD, ZHGEQZ,
+     $                   ZLACPY, ZLASCL, ZLASET, ZTGEVC, ZUNGQR, ZUNMQR
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      DOUBLE PRECISION   DLAMCH, ZLANGE
+      EXTERNAL           LSAME, ILAENV, DLAMCH, ZLANGE
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, DIMAG, MAX, SQRT
+*     ..
+*     .. Statement Functions ..
+      DOUBLE PRECISION   ABS1
+*     ..
+*     .. Statement Function definitions ..
+      ABS1( X ) = ABS( DBLE( X ) ) + ABS( DIMAG( X ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode the input arguments
+*
+      IF( LSAME( JOBVL, 'N' ) ) THEN
+         IJOBVL = 1
+         ILVL = .FALSE.
+      ELSE IF( LSAME( JOBVL, 'V' ) ) THEN
+         IJOBVL = 2
+         ILVL = .TRUE.
+      ELSE
+         IJOBVL = -1
+         ILVL = .FALSE.
+      END IF
+*
+      IF( LSAME( JOBVR, 'N' ) ) THEN
+         IJOBVR = 1
+         ILVR = .FALSE.
+      ELSE IF( LSAME( JOBVR, 'V' ) ) THEN
+         IJOBVR = 2
+         ILVR = .TRUE.
+      ELSE
+         IJOBVR = -1
+         ILVR = .FALSE.
+      END IF
+      ILV = ILVL .OR. ILVR
+*
+*     Test the input arguments
+*
+      INFO = 0
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( IJOBVL.LE.0 ) THEN
+         INFO = -1
+      ELSE IF( IJOBVR.LE.0 ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      ELSE IF( LDVL.LT.1 .OR. ( ILVL .AND. LDVL.LT.N ) ) THEN
+         INFO = -11
+      ELSE IF( LDVR.LT.1 .OR. ( ILVR .AND. LDVR.LT.N ) ) THEN
+         INFO = -13
+      END IF
+*
+*     Compute workspace
+*      (Note: Comments in the code beginning "Workspace:" describe the
+*       minimal amount of workspace needed at that point in the code,
+*       as well as the preferred amount for good performance.
+*       NB refers to the optimal block size for the immediately
+*       following subroutine, as returned by ILAENV. The workspace is
+*       computed assuming ILO = 1 and IHI = N, the worst case.)
+*
+      LWKMIN = 1
+      IF( INFO.EQ.0 .AND. ( LWORK.GE.1 .OR. LQUERY ) ) THEN
+         LWKOPT = N + N*ILAENV( 1, 'ZGEQRF', ' ', N, 1, N, 0 )
+         LWKMIN = MAX( 1, 2*N )
+         WORK( 1 ) = LWKOPT
+      END IF
+*
+      IF( LWORK.LT.LWKMIN .AND. .NOT.LQUERY )
+     $   INFO = -15
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZGGEV ', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      WORK( 1 ) = LWKOPT
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Get machine constants
+*
+      EPS = DLAMCH( 'E' )*DLAMCH( 'B' )
+      SMLNUM = DLAMCH( 'S' )
+      BIGNUM = ONE / SMLNUM
+      CALL DLABAD( SMLNUM, BIGNUM )
+      SMLNUM = SQRT( SMLNUM ) / EPS
+      BIGNUM = ONE / SMLNUM
+*
+*     Scale A if max element outside range [SMLNUM,BIGNUM]
+*
+      ANRM = ZLANGE( 'M', N, N, A, LDA, RWORK )
+      ILASCL = .FALSE.
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
+         ANRMTO = SMLNUM
+         ILASCL = .TRUE.
+      ELSE IF( ANRM.GT.BIGNUM ) THEN
+         ANRMTO = BIGNUM
+         ILASCL = .TRUE.
+      END IF
+      IF( ILASCL )
+     $   CALL ZLASCL( 'G', 0, 0, ANRM, ANRMTO, N, N, A, LDA, IERR )
+*
+*     Scale B if max element outside range [SMLNUM,BIGNUM]
+*
+      BNRM = ZLANGE( 'M', N, N, B, LDB, RWORK )
+      ILBSCL = .FALSE.
+      IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN
+         BNRMTO = SMLNUM
+         ILBSCL = .TRUE.
+      ELSE IF( BNRM.GT.BIGNUM ) THEN
+         BNRMTO = BIGNUM
+         ILBSCL = .TRUE.
+      END IF
+      IF( ILBSCL )
+     $   CALL ZLASCL( 'G', 0, 0, BNRM, BNRMTO, N, N, B, LDB, IERR )
+*
+*     Permute the matrices A, B to isolate eigenvalues if possible
+*     (Real Workspace: need 6*N)
+*
+      ILEFT = 1
+      IRIGHT = N + 1
+      IRWRK = IRIGHT + N
+      CALL ZGGBAL( 'P', N, A, LDA, B, LDB, ILO, IHI, RWORK( ILEFT ),
+     $             RWORK( IRIGHT ), RWORK( IRWRK ), IERR )
+*
+*     Reduce B to triangular form (QR decomposition of B)
+*     (Complex Workspace: need N, prefer N*NB)
+*
+      IROWS = IHI + 1 - ILO
+      IF( ILV ) THEN
+         ICOLS = N + 1 - ILO
+      ELSE
+         ICOLS = IROWS
+      END IF
+      ITAU = 1
+      IWRK = ITAU + IROWS
+      CALL ZGEQRF( IROWS, ICOLS, B( ILO, ILO ), LDB, WORK( ITAU ),
+     $             WORK( IWRK ), LWORK+1-IWRK, IERR )
+*
+*     Apply the orthogonal transformation to matrix A
+*     (Complex Workspace: need N, prefer N*NB)
+*
+      CALL ZUNMQR( 'L', 'C', IROWS, ICOLS, IROWS, B( ILO, ILO ), LDB,
+     $             WORK( ITAU ), A( ILO, ILO ), LDA, WORK( IWRK ),
+     $             LWORK+1-IWRK, IERR )
+*
+*     Initialize VL
+*     (Complex Workspace: need N, prefer N*NB)
+*
+      IF( ILVL ) THEN
+         CALL ZLASET( 'Full', N, N, CZERO, CONE, VL, LDVL )
+         CALL ZLACPY( 'L', IROWS-1, IROWS-1, B( ILO+1, ILO ), LDB,
+     $                VL( ILO+1, ILO ), LDVL )
+         CALL ZUNGQR( IROWS, IROWS, IROWS, VL( ILO, ILO ), LDVL,
+     $                WORK( ITAU ), WORK( IWRK ), LWORK+1-IWRK, IERR )
+      END IF
+*
+*     Initialize VR
+*
+      IF( ILVR )
+     $   CALL ZLASET( 'Full', N, N, CZERO, CONE, VR, LDVR )
+*
+*     Reduce to generalized Hessenberg form
+*
+      IF( ILV ) THEN
+*
+*        Eigenvectors requested -- work on whole matrix.
+*
+         CALL ZGGHRD( JOBVL, JOBVR, N, ILO, IHI, A, LDA, B, LDB, VL,
+     $                LDVL, VR, LDVR, IERR )
+      ELSE
+         CALL ZGGHRD( 'N', 'N', IROWS, 1, IROWS, A( ILO, ILO ), LDA,
+     $                B( ILO, ILO ), LDB, VL, LDVL, VR, LDVR, IERR )
+      END IF
+*
+*     Perform QZ algorithm (Compute eigenvalues, and optionally, the
+*     Schur form and Schur vectors)
+*     (Complex Workspace: need N)
+*     (Real Workspace: need N)
+*
+      IWRK = ITAU
+      IF( ILV ) THEN
+         CHTEMP = 'S'
+      ELSE
+         CHTEMP = 'E'
+      END IF
+      CALL ZHGEQZ( CHTEMP, JOBVL, JOBVR, N, ILO, IHI, A, LDA, B, LDB,
+     $             ALPHA, BETA, VL, LDVL, VR, LDVR, WORK( IWRK ),
+     $             LWORK+1-IWRK, RWORK( IRWRK ), IERR )
+      IF( IERR.NE.0 ) THEN
+         IF( IERR.GT.0 .AND. IERR.LE.N ) THEN
+            INFO = IERR
+         ELSE IF( IERR.GT.N .AND. IERR.LE.2*N ) THEN
+            INFO = IERR - N
+         ELSE
+            INFO = N + 1
+         END IF
+         GO TO 70
+      END IF
+*
+*     Compute Eigenvectors
+*     (Real Workspace: need 2*N)
+*     (Complex Workspace: need 2*N)
+*
+      IF( ILV ) THEN
+         IF( ILVL ) THEN
+            IF( ILVR ) THEN
+               CHTEMP = 'B'
+            ELSE
+               CHTEMP = 'L'
+            END IF
+         ELSE
+            CHTEMP = 'R'
+         END IF
+*
+         CALL ZTGEVC( CHTEMP, 'B', LDUMMA, N, A, LDA, B, LDB, VL, LDVL,
+     $                VR, LDVR, N, IN, WORK( IWRK ), RWORK( IRWRK ),
+     $                IERR )
+         IF( IERR.NE.0 ) THEN
+            INFO = N + 2
+            GO TO 70
+         END IF
+*
+*        Undo balancing on VL and VR and normalization
+*        (Workspace: none needed)
+*
+         IF( ILVL ) THEN
+            CALL ZGGBAK( 'P', 'L', N, ILO, IHI, RWORK( ILEFT ),
+     $                   RWORK( IRIGHT ), N, VL, LDVL, IERR )
+            DO 30 JC = 1, N
+               TEMP = ZERO
+               DO 10 JR = 1, N
+                  TEMP = MAX( TEMP, ABS1( VL( JR, JC ) ) )
+   10          CONTINUE
+               IF( TEMP.LT.SMLNUM )
+     $            GO TO 30
+               TEMP = ONE / TEMP
+               DO 20 JR = 1, N
+                  VL( JR, JC ) = VL( JR, JC )*TEMP
+   20          CONTINUE
+   30       CONTINUE
+         END IF
+         IF( ILVR ) THEN
+            CALL ZGGBAK( 'P', 'R', N, ILO, IHI, RWORK( ILEFT ),
+     $                   RWORK( IRIGHT ), N, VR, LDVR, IERR )
+            DO 60 JC = 1, N
+               TEMP = ZERO
+               DO 40 JR = 1, N
+                  TEMP = MAX( TEMP, ABS1( VR( JR, JC ) ) )
+   40          CONTINUE
+               IF( TEMP.LT.SMLNUM )
+     $            GO TO 60
+               TEMP = ONE / TEMP
+               DO 50 JR = 1, N
+                  VR( JR, JC ) = VR( JR, JC )*TEMP
+   50          CONTINUE
+   60       CONTINUE
+         END IF
+      END IF
+*
+*     Undo scaling if necessary
+*
+      IF( ILASCL )
+     $   CALL ZLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHA, N, IERR )
+*
+      IF( ILBSCL )
+     $   CALL ZLASCL( 'G', 0, 0, BNRMTO, BNRM, N, 1, BETA, N, IERR )
+*
+   70 CONTINUE
+      WORK( 1 ) = LWKOPT
+*
+      RETURN
+*
+*     End of ZGGEV
+*
+      END
diff --git a/costa/native/external/lapack/zggevx.f b/costa/native/external/lapack/zggevx.f
new file mode 100644
index 000000000..e98e7bee3
--- /dev/null
+++ b/costa/native/external/lapack/zggevx.f
@@ -0,0 +1,640 @@
+      SUBROUTINE ZGGEVX( BALANC, JOBVL, JOBVR, SENSE, N, A, LDA, B, LDB,
+     $                   ALPHA, BETA, VL, LDVL, VR, LDVR, ILO, IHI,
+     $                   LSCALE, RSCALE, ABNRM, BBNRM, RCONDE, RCONDV,
+     $                   WORK, LWORK, RWORK, IWORK, BWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          BALANC, JOBVL, JOBVR, SENSE
+      INTEGER            IHI, ILO, INFO, LDA, LDB, LDVL, LDVR, LWORK, N
+      DOUBLE PRECISION   ABNRM, BBNRM
+*     ..
+*     .. Array Arguments ..
+      LOGICAL            BWORK( * )
+      INTEGER            IWORK( * )
+      DOUBLE PRECISION   LSCALE( * ), RCONDE( * ), RCONDV( * ),
+     $                   RSCALE( * ), RWORK( * )
+      COMPLEX*16         A( LDA, * ), ALPHA( * ), B( LDB, * ),
+     $                   BETA( * ), VL( LDVL, * ), VR( LDVR, * ),
+     $                   WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZGGEVX computes for a pair of N-by-N complex nonsymmetric matrices
+*  (A,B) the generalized eigenvalues, and optionally, the left and/or
+*  right generalized eigenvectors.
+*
+*  Optionally, it also computes a balancing transformation to improve
+*  the conditioning of the eigenvalues and eigenvectors (ILO, IHI,
+*  LSCALE, RSCALE, ABNRM, and BBNRM), reciprocal condition numbers for
+*  the eigenvalues (RCONDE), and reciprocal condition numbers for the
+*  right eigenvectors (RCONDV).
+*
+*  A generalized eigenvalue for a pair of matrices (A,B) is a scalar
+*  lambda or a ratio alpha/beta = lambda, such that A - lambda*B is
+*  singular. It is usually represented as the pair (alpha,beta), as
+*  there is a reasonable interpretation for beta=0, and even for both
+*  being zero.
+*
+*  The right eigenvector v(j) corresponding to the eigenvalue lambda(j)
+*  of (A,B) satisfies
+*                   A * v(j) = lambda(j) * B * v(j) .
+*  The left eigenvector u(j) corresponding to the eigenvalue lambda(j)
+*  of (A,B) satisfies
+*                   u(j)**H * A  = lambda(j) * u(j)**H * B.
+*  where u(j)**H is the conjugate-transpose of u(j).
+*
+*
+*  Arguments
+*  =========
+*
+*  BALANC  (input) CHARACTER*1
+*          Specifies the balance option to be performed:
+*          = 'N':  do not diagonally scale or permute;
+*          = 'P':  permute only;
+*          = 'S':  scale only;
+*          = 'B':  both permute and scale.
+*          Computed reciprocal condition numbers will be for the
+*          matrices after permuting and/or balancing. Permuting does
+*          not change condition numbers (in exact arithmetic), but
+*          balancing does.
+*
+*  JOBVL   (input) CHARACTER*1
+*          = 'N':  do not compute the left generalized eigenvectors;
+*          = 'V':  compute the left generalized eigenvectors.
+*
+*  JOBVR   (input) CHARACTER*1
+*          = 'N':  do not compute the right generalized eigenvectors;
+*          = 'V':  compute the right generalized eigenvectors.
+*
+*  SENSE   (input) CHARACTER*1
+*          Determines which reciprocal condition numbers are computed.
+*          = 'N': none are computed;
+*          = 'E': computed for eigenvalues only;
+*          = 'V': computed for eigenvectors only;
+*          = 'B': computed for eigenvalues and eigenvectors.
+*
+*  N       (input) INTEGER
+*          The order of the matrices A, B, VL, and VR.  N >= 0.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA, N)
+*          On entry, the matrix A in the pair (A,B).
+*          On exit, A has been overwritten. If JOBVL='V' or JOBVR='V'
+*          or both, then A contains the first part of the complex Schur
+*          form of the "balanced" versions of the input A and B.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of A.  LDA >= max(1,N).
+*
+*  B       (input/output) COMPLEX*16 array, dimension (LDB, N)
+*          On entry, the matrix B in the pair (A,B).
+*          On exit, B has been overwritten. If JOBVL='V' or JOBVR='V'
+*          or both, then B contains the second part of the complex
+*          Schur form of the "balanced" versions of the input A and B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of B.  LDB >= max(1,N).
+*
+*  ALPHA   (output) COMPLEX*16 array, dimension (N)
+*  BETA    (output) COMPLEX*16 array, dimension (N)
+*          On exit, ALPHA(j)/BETA(j), j=1,...,N, will be the generalized
+*          eigenvalues.
+*
+*          Note: the quotient ALPHA(j)/BETA(j) ) may easily over- or
+*          underflow, and BETA(j) may even be zero.  Thus, the user
+*          should avoid naively computing the ratio ALPHA/BETA.
+*          However, ALPHA will be always less than and usually
+*          comparable with norm(A) in magnitude, and BETA always less
+*          than and usually comparable with norm(B).
+*
+*  VL      (output) COMPLEX*16 array, dimension (LDVL,N)
+*          If JOBVL = 'V', the left generalized eigenvectors u(j) are
+*          stored one after another in the columns of VL, in the same
+*          order as their eigenvalues.
+*          Each eigenvector will be scaled so the largest component
+*          will have abs(real part) + abs(imag. part) = 1.
+*          Not referenced if JOBVL = 'N'.
+*
+*  LDVL    (input) INTEGER
+*          The leading dimension of the matrix VL. LDVL >= 1, and
+*          if JOBVL = 'V', LDVL >= N.
+*
+*  VR      (output) COMPLEX*16 array, dimension (LDVR,N)
+*          If JOBVR = 'V', the right generalized eigenvectors v(j) are
+*          stored one after another in the columns of VR, in the same
+*          order as their eigenvalues.
+*          Each eigenvector will be scaled so the largest component
+*          will have abs(real part) + abs(imag. part) = 1.
+*          Not referenced if JOBVR = 'N'.
+*
+*  LDVR    (input) INTEGER
+*          The leading dimension of the matrix VR. LDVR >= 1, and
+*          if JOBVR = 'V', LDVR >= N.
+*
+*  ILO,IHI (output) INTEGER
+*          ILO and IHI are integer values such that on exit
+*          A(i,j) = 0 and B(i,j) = 0 if i > j and
+*          j = 1,...,ILO-1 or i = IHI+1,...,N.
+*          If BALANC = 'N' or 'S', ILO = 1 and IHI = N.
+*
+*  LSCALE  (output) DOUBLE PRECISION array, dimension (N)
+*          Details of the permutations and scaling factors applied
+*          to the left side of A and B.  If PL(j) is the index of the
+*          row interchanged with row j, and DL(j) is the scaling
+*          factor applied to row j, then
+*            LSCALE(j) = PL(j)  for j = 1,...,ILO-1
+*                      = DL(j)  for j = ILO,...,IHI
+*                      = PL(j)  for j = IHI+1,...,N.
+*          The order in which the interchanges are made is N to IHI+1,
+*          then 1 to ILO-1.
+*
+*  RSCALE  (output) DOUBLE PRECISION array, dimension (N)
+*          Details of the permutations and scaling factors applied
+*          to the right side of A and B.  If PR(j) is the index of the
+*          column interchanged with column j, and DR(j) is the scaling
+*          factor applied to column j, then
+*            RSCALE(j) = PR(j)  for j = 1,...,ILO-1
+*                      = DR(j)  for j = ILO,...,IHI
+*                      = PR(j)  for j = IHI+1,...,N
+*          The order in which the interchanges are made is N to IHI+1,
+*          then 1 to ILO-1.
+*
+*  ABNRM   (output) DOUBLE PRECISION
+*          The one-norm of the balanced matrix A.
+*
+*  BBNRM   (output) DOUBLE PRECISION
+*          The one-norm of the balanced matrix B.
+*
+*  RCONDE  (output) DOUBLE PRECISION array, dimension (N)
+*          If SENSE = 'E' or 'B', the reciprocal condition numbers of
+*          the selected eigenvalues, stored in consecutive elements of
+*          the array.
+*          If SENSE = 'V', RCONDE is not referenced.
+*
+*  RCONDV  (output) DOUBLE PRECISION array, dimension (N)
+*          If JOB = 'V' or 'B', the estimated reciprocal condition
+*          numbers of the selected eigenvectors, stored in consecutive
+*          elements of the array. If the eigenvalues cannot be reordered
+*          to compute RCONDV(j), RCONDV(j) is set to 0; this can only
+*          occur when the true value would be very small anyway.
+*          If SENSE = 'E', RCONDV is not referenced.
+*          Not referenced if JOB = 'E'.
+*
+*  WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK. LWORK >= max(1,2*N).
+*          If SENSE = 'N' or 'E', LWORK >= 2*N.
+*          If SENSE = 'V' or 'B', LWORK >= 2*N*N+2*N.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  RWORK   (workspace) DOUBLE PRECISION array, dimension (6*N)
+*          Real workspace.
+*
+*  IWORK   (workspace) INTEGER array, dimension (N+2)
+*          If SENSE = 'E', IWORK is not referenced.
+*
+*  BWORK   (workspace) LOGICAL array, dimension (N)
+*          If SENSE = 'N', BWORK is not referenced.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          = 1,...,N:
+*                The QZ iteration failed.  No eigenvectors have been
+*                calculated, but ALPHA(j) and BETA(j) should be correct
+*                for j=INFO+1,...,N.
+*          > N:  =N+1: other than QZ iteration failed in ZHGEQZ.
+*                =N+2: error return from ZTGEVC.
+*
+*  Further Details
+*  ===============
+*
+*  Balancing a matrix pair (A,B) includes, first, permuting rows and
+*  columns to isolate eigenvalues, second, applying diagonal similarity
+*  transformation to the rows and columns to make the rows and columns
+*  as close in norm as possible. The computed reciprocal condition
+*  numbers correspond to the balanced matrix. Permuting rows and columns
+*  will not change the condition numbers (in exact arithmetic) but
+*  diagonal scaling will.  For further explanation of balancing, see
+*  section 4.11.1.2 of LAPACK Users' Guide.
+*
+*  An approximate error bound on the chordal distance between the i-th
+*  computed generalized eigenvalue w and the corresponding exact
+*  eigenvalue lambda is
+*
+*       chord(w, lambda) <= EPS * norm(ABNRM, BBNRM) / RCONDE(I)
+*
+*  An approximate error bound for the angle between the i-th computed
+*  eigenvector VL(i) or VR(i) is given by
+*
+*       EPS * norm(ABNRM, BBNRM) / DIF(i).
+*
+*  For further explanation of the reciprocal condition numbers RCONDE
+*  and RCONDV, see section 4.11 of LAPACK User's Guide.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+      COMPLEX*16         CZERO, CONE
+      PARAMETER          ( CZERO = ( 0.0D+0, 0.0D+0 ),
+     $                   CONE = ( 1.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            ILASCL, ILBSCL, ILV, ILVL, ILVR, LQUERY,
+     $                   WANTSB, WANTSE, WANTSN, WANTSV
+      CHARACTER          CHTEMP
+      INTEGER            I, ICOLS, IERR, IJOBVL, IJOBVR, IN, IROWS,
+     $                   ITAU, IWRK, IWRK1, J, JC, JR, M, MAXWRK, MINWRK
+      DOUBLE PRECISION   ANRM, ANRMTO, BIGNUM, BNRM, BNRMTO, EPS,
+     $                   SMLNUM, TEMP
+      COMPLEX*16         X
+*     ..
+*     .. Local Arrays ..
+      LOGICAL            LDUMMA( 1 )
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLASCL, XERBLA, ZGEQRF, ZGGBAK, ZGGBAL, ZGGHRD,
+     $                   ZHGEQZ, ZLACPY, ZLASCL, ZLASET, ZTGEVC, ZTGSNA,
+     $                   ZUNGQR, ZUNMQR
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      DOUBLE PRECISION   DLAMCH, ZLANGE
+      EXTERNAL           LSAME, ILAENV, DLAMCH, ZLANGE
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, DIMAG, MAX, SQRT
+*     ..
+*     .. Statement Functions ..
+      DOUBLE PRECISION   ABS1
+*     ..
+*     .. Statement Function definitions ..
+      ABS1( X ) = ABS( DBLE( X ) ) + ABS( DIMAG( X ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode the input arguments
+*
+      IF( LSAME( JOBVL, 'N' ) ) THEN
+         IJOBVL = 1
+         ILVL = .FALSE.
+      ELSE IF( LSAME( JOBVL, 'V' ) ) THEN
+         IJOBVL = 2
+         ILVL = .TRUE.
+      ELSE
+         IJOBVL = -1
+         ILVL = .FALSE.
+      END IF
+*
+      IF( LSAME( JOBVR, 'N' ) ) THEN
+         IJOBVR = 1
+         ILVR = .FALSE.
+      ELSE IF( LSAME( JOBVR, 'V' ) ) THEN
+         IJOBVR = 2
+         ILVR = .TRUE.
+      ELSE
+         IJOBVR = -1
+         ILVR = .FALSE.
+      END IF
+      ILV = ILVL .OR. ILVR
+*
+      WANTSN = LSAME( SENSE, 'N' )
+      WANTSE = LSAME( SENSE, 'E' )
+      WANTSV = LSAME( SENSE, 'V' )
+      WANTSB = LSAME( SENSE, 'B' )
+*
+*     Test the input arguments
+*
+      INFO = 0
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( .NOT.( LSAME( BALANC, 'N' ) .OR. LSAME( BALANC,
+     $    'S' ) .OR. LSAME( BALANC, 'P' ) .OR. LSAME( BALANC, 'B' ) ) )
+     $     THEN
+         INFO = -1
+      ELSE IF( IJOBVL.LE.0 ) THEN
+         INFO = -2
+      ELSE IF( IJOBVR.LE.0 ) THEN
+         INFO = -3
+      ELSE IF( .NOT.( WANTSN .OR. WANTSE .OR. WANTSB .OR. WANTSV ) )
+     $          THEN
+         INFO = -4
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -9
+      ELSE IF( LDVL.LT.1 .OR. ( ILVL .AND. LDVL.LT.N ) ) THEN
+         INFO = -13
+      ELSE IF( LDVR.LT.1 .OR. ( ILVR .AND. LDVR.LT.N ) ) THEN
+         INFO = -15
+      END IF
+*
+*     Compute workspace
+*      (Note: Comments in the code beginning "Workspace:" describe the
+*       minimal amount of workspace needed at that point in the code,
+*       as well as the preferred amount for good performance.
+*       NB refers to the optimal block size for the immediately
+*       following subroutine, as returned by ILAENV. The workspace is
+*       computed assuming ILO = 1 and IHI = N, the worst case.)
+*
+      MINWRK = 1
+      IF( INFO.EQ.0 .AND. ( LWORK.GE.1 .OR. LQUERY ) ) THEN
+         MAXWRK = N + N*ILAENV( 1, 'ZGEQRF', ' ', N, 1, N, 0 )
+         IF( WANTSE ) THEN
+            MINWRK = MAX( 1, 2*N )
+         ELSE IF( WANTSV .OR. WANTSB ) THEN
+            MINWRK = 2*N*N + 2*N
+            MAXWRK = MAX( MAXWRK, 2*N*N+2*N )
+         END IF
+         WORK( 1 ) = MAXWRK
+      END IF
+*
+      IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN
+         INFO = -25
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZGGEVX', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Get machine constants
+*
+      EPS = DLAMCH( 'P' )
+      SMLNUM = DLAMCH( 'S' )
+      BIGNUM = ONE / SMLNUM
+      CALL DLABAD( SMLNUM, BIGNUM )
+      SMLNUM = SQRT( SMLNUM ) / EPS
+      BIGNUM = ONE / SMLNUM
+*
+*     Scale A if max element outside range [SMLNUM,BIGNUM]
+*
+      ANRM = ZLANGE( 'M', N, N, A, LDA, RWORK )
+      ILASCL = .FALSE.
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
+         ANRMTO = SMLNUM
+         ILASCL = .TRUE.
+      ELSE IF( ANRM.GT.BIGNUM ) THEN
+         ANRMTO = BIGNUM
+         ILASCL = .TRUE.
+      END IF
+      IF( ILASCL )
+     $   CALL ZLASCL( 'G', 0, 0, ANRM, ANRMTO, N, N, A, LDA, IERR )
+*
+*     Scale B if max element outside range [SMLNUM,BIGNUM]
+*
+      BNRM = ZLANGE( 'M', N, N, B, LDB, RWORK )
+      ILBSCL = .FALSE.
+      IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN
+         BNRMTO = SMLNUM
+         ILBSCL = .TRUE.
+      ELSE IF( BNRM.GT.BIGNUM ) THEN
+         BNRMTO = BIGNUM
+         ILBSCL = .TRUE.
+      END IF
+      IF( ILBSCL )
+     $   CALL ZLASCL( 'G', 0, 0, BNRM, BNRMTO, N, N, B, LDB, IERR )
+*
+*     Permute and/or balance the matrix pair (A,B)
+*     (Real Workspace: need 6*N)
+*
+      CALL ZGGBAL( BALANC, N, A, LDA, B, LDB, ILO, IHI, LSCALE, RSCALE,
+     $             RWORK, IERR )
+*
+*     Compute ABNRM and BBNRM
+*
+      ABNRM = ZLANGE( '1', N, N, A, LDA, RWORK( 1 ) )
+      IF( ILASCL ) THEN
+         RWORK( 1 ) = ABNRM
+         CALL DLASCL( 'G', 0, 0, ANRMTO, ANRM, 1, 1, RWORK( 1 ), 1,
+     $                IERR )
+         ABNRM = RWORK( 1 )
+      END IF
+*
+      BBNRM = ZLANGE( '1', N, N, B, LDB, RWORK( 1 ) )
+      IF( ILBSCL ) THEN
+         RWORK( 1 ) = BBNRM
+         CALL DLASCL( 'G', 0, 0, BNRMTO, BNRM, 1, 1, RWORK( 1 ), 1,
+     $                IERR )
+         BBNRM = RWORK( 1 )
+      END IF
+*
+*     Reduce B to triangular form (QR decomposition of B)
+*     (Complex Workspace: need N, prefer N*NB )
+*
+      IROWS = IHI + 1 - ILO
+      IF( ILV .OR. .NOT.WANTSN ) THEN
+         ICOLS = N + 1 - ILO
+      ELSE
+         ICOLS = IROWS
+      END IF
+      ITAU = 1
+      IWRK = ITAU + IROWS
+      CALL ZGEQRF( IROWS, ICOLS, B( ILO, ILO ), LDB, WORK( ITAU ),
+     $             WORK( IWRK ), LWORK+1-IWRK, IERR )
+*
+*     Apply the unitary transformation to A
+*     (Complex Workspace: need N, prefer N*NB)
+*
+      CALL ZUNMQR( 'L', 'C', IROWS, ICOLS, IROWS, B( ILO, ILO ), LDB,
+     $             WORK( ITAU ), A( ILO, ILO ), LDA, WORK( IWRK ),
+     $             LWORK+1-IWRK, IERR )
+*
+*     Initialize VL and/or VR
+*     (Workspace: need N, prefer N*NB)
+*
+      IF( ILVL ) THEN
+         CALL ZLASET( 'Full', N, N, CZERO, CONE, VL, LDVL )
+         CALL ZLACPY( 'L', IROWS-1, IROWS-1, B( ILO+1, ILO ), LDB,
+     $                VL( ILO+1, ILO ), LDVL )
+         CALL ZUNGQR( IROWS, IROWS, IROWS, VL( ILO, ILO ), LDVL,
+     $                WORK( ITAU ), WORK( IWRK ), LWORK+1-IWRK, IERR )
+      END IF
+*
+      IF( ILVR )
+     $   CALL ZLASET( 'Full', N, N, CZERO, CONE, VR, LDVR )
+*
+*     Reduce to generalized Hessenberg form
+*     (Workspace: none needed)
+*
+      IF( ILV .OR. .NOT.WANTSN ) THEN
+*
+*        Eigenvectors requested -- work on whole matrix.
+*
+         CALL ZGGHRD( JOBVL, JOBVR, N, ILO, IHI, A, LDA, B, LDB, VL,
+     $                LDVL, VR, LDVR, IERR )
+      ELSE
+         CALL ZGGHRD( 'N', 'N', IROWS, 1, IROWS, A( ILO, ILO ), LDA,
+     $                B( ILO, ILO ), LDB, VL, LDVL, VR, LDVR, IERR )
+      END IF
+*
+*     Perform QZ algorithm (Compute eigenvalues, and optionally, the
+*     Schur forms and Schur vectors)
+*     (Complex Workspace: need N)
+*     (Real Workspace: need N)
+*
+      IWRK = ITAU
+      IF( ILV .OR. .NOT.WANTSN ) THEN
+         CHTEMP = 'S'
+      ELSE
+         CHTEMP = 'E'
+      END IF
+*
+      CALL ZHGEQZ( CHTEMP, JOBVL, JOBVR, N, ILO, IHI, A, LDA, B, LDB,
+     $             ALPHA, BETA, VL, LDVL, VR, LDVR, WORK( IWRK ),
+     $             LWORK+1-IWRK, RWORK, IERR )
+      IF( IERR.NE.0 ) THEN
+         IF( IERR.GT.0 .AND. IERR.LE.N ) THEN
+            INFO = IERR
+         ELSE IF( IERR.GT.N .AND. IERR.LE.2*N ) THEN
+            INFO = IERR - N
+         ELSE
+            INFO = N + 1
+         END IF
+         GO TO 90
+      END IF
+*
+*     Compute Eigenvectors and estimate condition numbers if desired
+*     ZTGEVC: (Complex Workspace: need 2*N )
+*             (Real Workspace:    need 2*N )
+*     ZTGSNA: (Complex Workspace: need 2*N*N if SENSE='V' or 'B')
+*             (Integer Workspace: need N+2 )
+*
+      IF( ILV .OR. .NOT.WANTSN ) THEN
+         IF( ILV ) THEN
+            IF( ILVL ) THEN
+               IF( ILVR ) THEN
+                  CHTEMP = 'B'
+               ELSE
+                  CHTEMP = 'L'
+               END IF
+            ELSE
+               CHTEMP = 'R'
+            END IF
+*
+            CALL ZTGEVC( CHTEMP, 'B', LDUMMA, N, A, LDA, B, LDB, VL,
+     $                   LDVL, VR, LDVR, N, IN, WORK( IWRK ), RWORK,
+     $                   IERR )
+            IF( IERR.NE.0 ) THEN
+               INFO = N + 2
+               GO TO 90
+            END IF
+         END IF
+*
+         IF( .NOT.WANTSN ) THEN
+*
+*           compute eigenvectors (DTGEVC) and estimate condition
+*           numbers (DTGSNA). Note that the definition of the condition
+*           number is not invariant under transformation (u,v) to
+*           (Q*u, Z*v), where (u,v) are eigenvectors of the generalized
+*           Schur form (S,T), Q and Z are orthogonal matrices. In order
+*           to avoid using extra 2*N*N workspace, we have to
+*           re-calculate eigenvectors and estimate the condition numbers
+*           one at a time.
+*
+            DO 20 I = 1, N
+*
+               DO 10 J = 1, N
+                  BWORK( J ) = .FALSE.
+   10          CONTINUE
+               BWORK( I ) = .TRUE.
+*
+               IWRK = N + 1
+               IWRK1 = IWRK + N
+*
+               IF( WANTSE .OR. WANTSB ) THEN
+                  CALL ZTGEVC( 'B', 'S', BWORK, N, A, LDA, B, LDB,
+     $                         WORK( 1 ), N, WORK( IWRK ), N, 1, M,
+     $                         WORK( IWRK1 ), RWORK, IERR )
+                  IF( IERR.NE.0 ) THEN
+                     INFO = N + 2
+                     GO TO 90
+                  END IF
+               END IF
+*
+               CALL ZTGSNA( SENSE, 'S', BWORK, N, A, LDA, B, LDB,
+     $                      WORK( 1 ), N, WORK( IWRK ), N, RCONDE( I ),
+     $                      RCONDV( I ), 1, M, WORK( IWRK1 ),
+     $                      LWORK-IWRK1+1, IWORK, IERR )
+*
+   20       CONTINUE
+         END IF
+      END IF
+*
+*     Undo balancing on VL and VR and normalization
+*     (Workspace: none needed)
+*
+      IF( ILVL ) THEN
+         CALL ZGGBAK( BALANC, 'L', N, ILO, IHI, LSCALE, RSCALE, N, VL,
+     $                LDVL, IERR )
+*
+         DO 50 JC = 1, N
+            TEMP = ZERO
+            DO 30 JR = 1, N
+               TEMP = MAX( TEMP, ABS1( VL( JR, JC ) ) )
+   30       CONTINUE
+            IF( TEMP.LT.SMLNUM )
+     $         GO TO 50
+            TEMP = ONE / TEMP
+            DO 40 JR = 1, N
+               VL( JR, JC ) = VL( JR, JC )*TEMP
+   40       CONTINUE
+   50    CONTINUE
+      END IF
+*
+      IF( ILVR ) THEN
+         CALL ZGGBAK( BALANC, 'R', N, ILO, IHI, LSCALE, RSCALE, N, VR,
+     $                LDVR, IERR )
+         DO 80 JC = 1, N
+            TEMP = ZERO
+            DO 60 JR = 1, N
+               TEMP = MAX( TEMP, ABS1( VR( JR, JC ) ) )
+   60       CONTINUE
+            IF( TEMP.LT.SMLNUM )
+     $         GO TO 80
+            TEMP = ONE / TEMP
+            DO 70 JR = 1, N
+               VR( JR, JC ) = VR( JR, JC )*TEMP
+   70       CONTINUE
+   80    CONTINUE
+      END IF
+*
+*     Undo scaling if necessary
+*
+      IF( ILASCL )
+     $   CALL ZLASCL( 'G', 0, 0, ANRMTO, ANRM, N, 1, ALPHA, N, IERR )
+*
+      IF( ILBSCL )
+     $   CALL ZLASCL( 'G', 0, 0, BNRMTO, BNRM, N, 1, BETA, N, IERR )
+*
+   90 CONTINUE
+      WORK( 1 ) = MAXWRK
+*
+      RETURN
+*
+*     End of ZGGEVX
+*
+      END
diff --git a/costa/native/external/lapack/zggglm.f b/costa/native/external/lapack/zggglm.f
new file mode 100644
index 000000000..42b91c16a
--- /dev/null
+++ b/costa/native/external/lapack/zggglm.f
@@ -0,0 +1,213 @@
+      SUBROUTINE ZGGGLM( N, M, P, A, LDA, B, LDB, D, X, Y, WORK, LWORK,
+     $                   INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, LDB, LWORK, M, N, P
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), B( LDB, * ), D( * ), WORK( * ),
+     $                   X( * ), Y( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZGGGLM solves a general Gauss-Markov linear model (GLM) problem:
+*
+*          minimize || y ||_2   subject to   d = A*x + B*y
+*              x
+*
+*  where A is an N-by-M matrix, B is an N-by-P matrix, and d is a
+*  given N-vector. It is assumed that M <= N <= M+P, and
+*
+*             rank(A) = M    and    rank( A B ) = N.
+*
+*  Under these assumptions, the constrained equation is always
+*  consistent, and there is a unique solution x and a minimal 2-norm
+*  solution y, which is obtained using a generalized QR factorization
+*  of A and B.
+*
+*  In particular, if matrix B is square nonsingular, then the problem
+*  GLM is equivalent to the following weighted linear least squares
+*  problem
+*
+*               minimize || inv(B)*(d-A*x) ||_2
+*                   x
+*
+*  where inv(B) denotes the inverse of B.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The number of rows of the matrices A and B.  N >= 0.
+*
+*  M       (input) INTEGER
+*          The number of columns of the matrix A.  0 <= M <= N.
+*
+*  P       (input) INTEGER
+*          The number of columns of the matrix B.  P >= N-M.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA,M)
+*          On entry, the N-by-M matrix A.
+*          On exit, A is destroyed.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,N).
+*
+*  B       (input/output) COMPLEX*16 array, dimension (LDB,P)
+*          On entry, the N-by-P matrix B.
+*          On exit, B is destroyed.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B. LDB >= max(1,N).
+*
+*  D       (input/output) COMPLEX*16 array, dimension (N)
+*          On entry, D is the left hand side of the GLM equation.
+*          On exit, D is destroyed.
+*
+*  X       (output) COMPLEX*16 array, dimension (M)
+*  Y       (output) COMPLEX*16 array, dimension (P)
+*          On exit, X and Y are the solutions of the GLM problem.
+*
+*  WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK. LWORK >= max(1,N+M+P).
+*          For optimum performance, LWORK >= M+min(N,P)+max(N,P)*NB,
+*          where NB is an upper bound for the optimal blocksizes for
+*          ZGEQRF, CGERQF, ZUNMQR and CUNMRQ.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*
+*  ===================================================================
+*
+*     .. Parameters ..
+      COMPLEX*16         CZERO, CONE
+      PARAMETER          ( CZERO = ( 0.0D+0, 0.0D+0 ),
+     $                   CONE = ( 1.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            I, LOPT, LWKOPT, NB, NB1, NB2, NB3, NB4, NP
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZCOPY, ZGEMV, ZGGQRF, ZTRSV, ZUNMQR,
+     $                   ZUNMRQ
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          INT, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters
+*
+      INFO = 0
+      NP = MIN( N, P )
+      NB1 = ILAENV( 1, 'ZGEQRF', ' ', N, M, -1, -1 )
+      NB2 = ILAENV( 1, 'ZGERQF', ' ', N, M, -1, -1 )
+      NB3 = ILAENV( 1, 'ZUNMQR', ' ', N, M, P, -1 )
+      NB4 = ILAENV( 1, 'ZUNMRQ', ' ', N, M, P, -1 )
+      NB = MAX( NB1, NB2, NB3, NB4 )
+      LWKOPT = M + NP + MAX( N, P )*NB
+      WORK( 1 ) = LWKOPT
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( N.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( M.LT.0 .OR. M.GT.N ) THEN
+         INFO = -2
+      ELSE IF( P.LT.0 .OR. P.LT.N-M ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      ELSE IF( LWORK.LT.MAX( 1, N+M+P ) .AND. .NOT.LQUERY ) THEN
+         INFO = -12
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZGGGLM', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Compute the GQR factorization of matrices A and B:
+*
+*            Q'*A = ( R11 ) M,    Q'*B*Z' = ( T11   T12 ) M
+*                   (  0  ) N-M             (  0    T22 ) N-M
+*                      M                     M+P-N  N-M
+*
+*     where R11 and T22 are upper triangular, and Q and Z are
+*     unitary.
+*
+      CALL ZGGQRF( N, M, P, A, LDA, WORK, B, LDB, WORK( M+1 ),
+     $             WORK( M+NP+1 ), LWORK-M-NP, INFO )
+      LOPT = WORK( M+NP+1 )
+*
+*     Update left-hand-side vector d = Q'*d = ( d1 ) M
+*                                             ( d2 ) N-M
+*
+      CALL ZUNMQR( 'Left', 'Conjugate transpose', N, 1, M, A, LDA, WORK,
+     $             D, MAX( 1, N ), WORK( M+NP+1 ), LWORK-M-NP, INFO )
+      LOPT = MAX( LOPT, INT( WORK( M+NP+1 ) ) )
+*
+*     Solve T22*y2 = d2 for y2
+*
+      CALL ZTRSV( 'Upper', 'No transpose', 'Non unit', N-M,
+     $            B( M+1, M+P-N+1 ), LDB, D( M+1 ), 1 )
+      CALL ZCOPY( N-M, D( M+1 ), 1, Y( M+P-N+1 ), 1 )
+*
+*     Set y1 = 0
+*
+      DO 10 I = 1, M + P - N
+         Y( I ) = CZERO
+   10 CONTINUE
+*
+*     Update d1 = d1 - T12*y2
+*
+      CALL ZGEMV( 'No transpose', M, N-M, -CONE, B( 1, M+P-N+1 ), LDB,
+     $            Y( M+P-N+1 ), 1, CONE, D, 1 )
+*
+*     Solve triangular system: R11*x = d1
+*
+      CALL ZTRSV( 'Upper', 'No Transpose', 'Non unit', M, A, LDA, D, 1 )
+*
+*     Copy D to X
+*
+      CALL ZCOPY( M, D, 1, X, 1 )
+*
+*     Backward transformation y = Z'*y
+*
+      CALL ZUNMRQ( 'Left', 'Conjugate transpose', P, 1, NP,
+     $             B( MAX( 1, N-P+1 ), 1 ), LDB, WORK( M+1 ), Y,
+     $             MAX( 1, P ), WORK( M+NP+1 ), LWORK-M-NP, INFO )
+      WORK( 1 ) = M + NP + MAX( LOPT, INT( WORK( M+NP+1 ) ) )
+*
+      RETURN
+*
+*     End of ZGGGLM
+*
+      END
diff --git a/costa/native/external/lapack/zgghrd.f b/costa/native/external/lapack/zgghrd.f
new file mode 100644
index 000000000..fdaf5187e
--- /dev/null
+++ b/costa/native/external/lapack/zgghrd.f
@@ -0,0 +1,256 @@
+      SUBROUTINE ZGGHRD( COMPQ, COMPZ, N, ILO, IHI, A, LDA, B, LDB, Q,
+     $                   LDQ, Z, LDZ, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          COMPQ, COMPZ
+      INTEGER            IHI, ILO, INFO, LDA, LDB, LDQ, LDZ, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), B( LDB, * ), Q( LDQ, * ),
+     $                   Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZGGHRD reduces a pair of complex matrices (A,B) to generalized upper
+*  Hessenberg form using unitary transformations, where A is a
+*  general matrix and B is upper triangular:  Q' * A * Z = H and
+*  Q' * B * Z = T, where H is upper Hessenberg, T is upper triangular,
+*  and Q and Z are unitary, and ' means conjugate transpose.
+*
+*  The unitary matrices Q and Z are determined as products of Givens
+*  rotations.  They may either be formed explicitly, or they may be
+*  postmultiplied into input matrices Q1 and Z1, so that
+*
+*       Q1 * A * Z1' = (Q1*Q) * H * (Z1*Z)'
+*       Q1 * B * Z1' = (Q1*Q) * T * (Z1*Z)'
+*
+*  Arguments
+*  =========
+*
+*  COMPQ   (input) CHARACTER*1
+*          = 'N': do not compute Q;
+*          = 'I': Q is initialized to the unit matrix, and the
+*                 unitary matrix Q is returned;
+*          = 'V': Q must contain a unitary matrix Q1 on entry,
+*                 and the product Q1*Q is returned.
+*
+*  COMPZ   (input) CHARACTER*1
+*          = 'N': do not compute Q;
+*          = 'I': Q is initialized to the unit matrix, and the
+*                 unitary matrix Q is returned;
+*          = 'V': Q must contain a unitary matrix Q1 on entry,
+*                 and the product Q1*Q is returned.
+*
+*  N       (input) INTEGER
+*          The order of the matrices A and B.  N >= 0.
+*
+*  ILO     (input) INTEGER
+*  IHI     (input) INTEGER
+*          It is assumed that A is already upper triangular in rows and
+*          columns 1:ILO-1 and IHI+1:N.  ILO and IHI are normally set
+*          by a previous call to ZGGBAL; otherwise they should be set
+*          to 1 and N respectively.
+*          1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA, N)
+*          On entry, the N-by-N general matrix to be reduced.
+*          On exit, the upper triangle and the first subdiagonal of A
+*          are overwritten with the upper Hessenberg matrix H, and the
+*          rest is set to zero.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  B       (input/output) COMPLEX*16 array, dimension (LDB, N)
+*          On entry, the N-by-N upper triangular matrix B.
+*          On exit, the upper triangular matrix T = Q' B Z.  The
+*          elements below the diagonal are set to zero.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  Q       (input/output) COMPLEX*16 array, dimension (LDQ, N)
+*          If COMPQ='N':  Q is not referenced.
+*          If COMPQ='I':  on entry, Q need not be set, and on exit it
+*                         contains the unitary matrix Q, where Q'
+*                         is the product of the Givens transformations
+*                         which are applied to A and B on the left.
+*          If COMPQ='V':  on entry, Q must contain a unitary matrix
+*                         Q1, and on exit this is overwritten by Q1*Q.
+*
+*  LDQ     (input) INTEGER
+*          The leading dimension of the array Q.
+*          LDQ >= N if COMPQ='V' or 'I'; LDQ >= 1 otherwise.
+*
+*  Z       (input/output) COMPLEX*16 array, dimension (LDZ, N)
+*          If COMPZ='N':  Z is not referenced.
+*          If COMPZ='I':  on entry, Z need not be set, and on exit it
+*                         contains the unitary matrix Z, which is
+*                         the product of the Givens transformations
+*                         which are applied to A and B on the right.
+*          If COMPZ='V':  on entry, Z must contain a unitary matrix
+*                         Z1, and on exit this is overwritten by Z1*Z.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.
+*          LDZ >= N if COMPZ='V' or 'I'; LDZ >= 1 otherwise.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*
+*  Further Details
+*  ===============
+*
+*  This routine reduces A to Hessenberg and B to triangular form by
+*  an unblocked reduction, as described in _Matrix_Computations_,
+*  by Golub and van Loan (Johns Hopkins Press).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX*16         CONE, CZERO
+      PARAMETER          ( CONE = ( 1.0D+0, 0.0D+0 ),
+     $                   CZERO = ( 0.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            ILQ, ILZ
+      INTEGER            ICOMPQ, ICOMPZ, JCOL, JROW
+      DOUBLE PRECISION   C
+      COMPLEX*16         CTEMP, S
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZLARTG, ZLASET, ZROT
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          DCONJG, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode COMPQ
+*
+      IF( LSAME( COMPQ, 'N' ) ) THEN
+         ILQ = .FALSE.
+         ICOMPQ = 1
+      ELSE IF( LSAME( COMPQ, 'V' ) ) THEN
+         ILQ = .TRUE.
+         ICOMPQ = 2
+      ELSE IF( LSAME( COMPQ, 'I' ) ) THEN
+         ILQ = .TRUE.
+         ICOMPQ = 3
+      ELSE
+         ICOMPQ = 0
+      END IF
+*
+*     Decode COMPZ
+*
+      IF( LSAME( COMPZ, 'N' ) ) THEN
+         ILZ = .FALSE.
+         ICOMPZ = 1
+      ELSE IF( LSAME( COMPZ, 'V' ) ) THEN
+         ILZ = .TRUE.
+         ICOMPZ = 2
+      ELSE IF( LSAME( COMPZ, 'I' ) ) THEN
+         ILZ = .TRUE.
+         ICOMPZ = 3
+      ELSE
+         ICOMPZ = 0
+      END IF
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( ICOMPQ.LE.0 ) THEN
+         INFO = -1
+      ELSE IF( ICOMPZ.LE.0 ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( ILO.LT.1 ) THEN
+         INFO = -4
+      ELSE IF( IHI.GT.N .OR. IHI.LT.ILO-1 ) THEN
+         INFO = -5
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -9
+      ELSE IF( ( ILQ .AND. LDQ.LT.N ) .OR. LDQ.LT.1 ) THEN
+         INFO = -11
+      ELSE IF( ( ILZ .AND. LDZ.LT.N ) .OR. LDZ.LT.1 ) THEN
+         INFO = -13
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZGGHRD', -INFO )
+         RETURN
+      END IF
+*
+*     Initialize Q and Z if desired.
+*
+      IF( ICOMPQ.EQ.3 )
+     $   CALL ZLASET( 'Full', N, N, CZERO, CONE, Q, LDQ )
+      IF( ICOMPZ.EQ.3 )
+     $   CALL ZLASET( 'Full', N, N, CZERO, CONE, Z, LDZ )
+*
+*     Quick return if possible
+*
+      IF( N.LE.1 )
+     $   RETURN
+*
+*     Zero out lower triangle of B
+*
+      DO 20 JCOL = 1, N - 1
+         DO 10 JROW = JCOL + 1, N
+            B( JROW, JCOL ) = CZERO
+   10    CONTINUE
+   20 CONTINUE
+*
+*     Reduce A and B
+*
+      DO 40 JCOL = ILO, IHI - 2
+*
+         DO 30 JROW = IHI, JCOL + 2, -1
+*
+*           Step 1: rotate rows JROW-1, JROW to kill A(JROW,JCOL)
+*
+            CTEMP = A( JROW-1, JCOL )
+            CALL ZLARTG( CTEMP, A( JROW, JCOL ), C, S,
+     $                   A( JROW-1, JCOL ) )
+            A( JROW, JCOL ) = CZERO
+            CALL ZROT( N-JCOL, A( JROW-1, JCOL+1 ), LDA,
+     $                 A( JROW, JCOL+1 ), LDA, C, S )
+            CALL ZROT( N+2-JROW, B( JROW-1, JROW-1 ), LDB,
+     $                 B( JROW, JROW-1 ), LDB, C, S )
+            IF( ILQ )
+     $         CALL ZROT( N, Q( 1, JROW-1 ), 1, Q( 1, JROW ), 1, C,
+     $                    DCONJG( S ) )
+*
+*           Step 2: rotate columns JROW, JROW-1 to kill B(JROW,JROW-1)
+*
+            CTEMP = B( JROW, JROW )
+            CALL ZLARTG( CTEMP, B( JROW, JROW-1 ), C, S,
+     $                   B( JROW, JROW ) )
+            B( JROW, JROW-1 ) = CZERO
+            CALL ZROT( IHI, A( 1, JROW ), 1, A( 1, JROW-1 ), 1, C, S )
+            CALL ZROT( JROW-1, B( 1, JROW ), 1, B( 1, JROW-1 ), 1, C,
+     $                 S )
+            IF( ILZ )
+     $         CALL ZROT( N, Z( 1, JROW ), 1, Z( 1, JROW-1 ), 1, C, S )
+   30    CONTINUE
+   40 CONTINUE
+*
+      RETURN
+*
+*     End of ZGGHRD
+*
+      END
diff --git a/costa/native/external/lapack/zgglse.f b/costa/native/external/lapack/zgglse.f
new file mode 100644
index 000000000..b8787a80f
--- /dev/null
+++ b/costa/native/external/lapack/zgglse.f
@@ -0,0 +1,218 @@
+      SUBROUTINE ZGGLSE( M, N, P, A, LDA, B, LDB, C, D, X, WORK, LWORK,
+     $                   INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, LDB, LWORK, M, N, P
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), B( LDB, * ), C( * ), D( * ),
+     $                   WORK( * ), X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZGGLSE solves the linear equality-constrained least squares (LSE)
+*  problem:
+*
+*          minimize || c - A*x ||_2   subject to   B*x = d
+*
+*  where A is an M-by-N matrix, B is a P-by-N matrix, c is a given
+*  M-vector, and d is a given P-vector. It is assumed that
+*  P <= N <= M+P, and
+*
+*           rank(B) = P and  rank( ( A ) ) = N.
+*                                ( ( B ) )
+*
+*  These conditions ensure that the LSE problem has a unique solution,
+*  which is obtained using a GRQ factorization of the matrices B and A.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrices A and B. N >= 0.
+*
+*  P       (input) INTEGER
+*          The number of rows of the matrix B. 0 <= P <= N <= M+P.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+*          On entry, the M-by-N matrix A.
+*          On exit, A is destroyed.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,M).
+*
+*  B       (input/output) COMPLEX*16 array, dimension (LDB,N)
+*          On entry, the P-by-N matrix B.
+*          On exit, B is destroyed.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B. LDB >= max(1,P).
+*
+*  C       (input/output) COMPLEX*16 array, dimension (M)
+*          On entry, C contains the right hand side vector for the
+*          least squares part of the LSE problem.
+*          On exit, the residual sum of squares for the solution
+*          is given by the sum of squares of elements N-P+1 to M of
+*          vector C.
+*
+*  D       (input/output) COMPLEX*16 array, dimension (P)
+*          On entry, D contains the right hand side vector for the
+*          constrained equation.
+*          On exit, D is destroyed.
+*
+*  X       (output) COMPLEX*16 array, dimension (N)
+*          On exit, X is the solution of the LSE problem.
+*
+*  WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK. LWORK >= max(1,M+N+P).
+*          For optimum performance LWORK >= P+min(M,N)+max(M,N)*NB,
+*          where NB is an upper bound for the optimal blocksizes for
+*          ZGEQRF, CGERQF, ZUNMQR and CUNMRQ.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX*16         CONE
+      PARAMETER          ( CONE = ( 1.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            LOPT, LWKOPT, MN, NB, NB1, NB2, NB3, NB4, NR
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZAXPY, ZCOPY, ZGEMV, ZGGRQF, ZTRMV,
+     $                   ZTRSV, ZUNMQR, ZUNMRQ
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          INT, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters
+*
+      INFO = 0
+      MN = MIN( M, N )
+      NB1 = ILAENV( 1, 'ZGEQRF', ' ', M, N, -1, -1 )
+      NB2 = ILAENV( 1, 'ZGERQF', ' ', M, N, -1, -1 )
+      NB3 = ILAENV( 1, 'ZUNMQR', ' ', M, N, P, -1 )
+      NB4 = ILAENV( 1, 'ZUNMRQ', ' ', M, N, P, -1 )
+      NB = MAX( NB1, NB2, NB3, NB4 )
+      LWKOPT = P + MN + MAX( M, N )*NB
+      WORK( 1 ) = LWKOPT
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( P.LT.0 .OR. P.GT.N .OR. P.LT.N-M ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, P ) ) THEN
+         INFO = -7
+      ELSE IF( LWORK.LT.MAX( 1, M+N+P ) .AND. .NOT.LQUERY ) THEN
+         INFO = -12
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZGGLSE', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Compute the GRQ factorization of matrices B and A:
+*
+*            B*Q' = (  0  T12 ) P   Z'*A*Q' = ( R11 R12 ) N-P
+*                     N-P  P                  (  0  R22 ) M+P-N
+*                                               N-P  P
+*
+*     where T12 and R11 are upper triangular, and Q and Z are
+*     unitary.
+*
+      CALL ZGGRQF( P, M, N, B, LDB, WORK, A, LDA, WORK( P+1 ),
+     $             WORK( P+MN+1 ), LWORK-P-MN, INFO )
+      LOPT = WORK( P+MN+1 )
+*
+*     Update c = Z'*c = ( c1 ) N-P
+*                       ( c2 ) M+P-N
+*
+      CALL ZUNMQR( 'Left', 'Conjugate Transpose', M, 1, MN, A, LDA,
+     $             WORK( P+1 ), C, MAX( 1, M ), WORK( P+MN+1 ),
+     $             LWORK-P-MN, INFO )
+      LOPT = MAX( LOPT, INT( WORK( P+MN+1 ) ) )
+*
+*     Solve T12*x2 = d for x2
+*
+      CALL ZTRSV( 'Upper', 'No transpose', 'Non unit', P, B( 1, N-P+1 ),
+     $            LDB, D, 1 )
+*
+*     Update c1
+*
+      CALL ZGEMV( 'No transpose', N-P, P, -CONE, A( 1, N-P+1 ), LDA, D,
+     $            1, CONE, C, 1 )
+*
+*     Sovle R11*x1 = c1 for x1
+*
+      CALL ZTRSV( 'Upper', 'No transpose', 'Non unit', N-P, A, LDA, C,
+     $            1 )
+*
+*     Put the solutions in X
+*
+      CALL ZCOPY( N-P, C, 1, X, 1 )
+      CALL ZCOPY( P, D, 1, X( N-P+1 ), 1 )
+*
+*     Compute the residual vector:
+*
+      IF( M.LT.N ) THEN
+         NR = M + P - N
+         CALL ZGEMV( 'No transpose', NR, N-M, -CONE, A( N-P+1, M+1 ),
+     $               LDA, D( NR+1 ), 1, CONE, C( N-P+1 ), 1 )
+      ELSE
+         NR = P
+      END IF
+      CALL ZTRMV( 'Upper', 'No transpose', 'Non unit', NR,
+     $            A( N-P+1, N-P+1 ), LDA, D, 1 )
+      CALL ZAXPY( NR, -CONE, D, 1, C( N-P+1 ), 1 )
+*
+*     Backward transformation x = Q'*x
+*
+      CALL ZUNMRQ( 'Left', 'Conjugate Transpose', N, 1, P, B, LDB,
+     $             WORK( 1 ), X, N, WORK( P+MN+1 ), LWORK-P-MN, INFO )
+      WORK( 1 ) = P + MN + MAX( LOPT, INT( WORK( P+MN+1 ) ) )
+*
+      RETURN
+*
+*     End of ZGGLSE
+*
+      END
diff --git a/costa/native/external/lapack/zggqrf.f b/costa/native/external/lapack/zggqrf.f
new file mode 100644
index 000000000..43bfdb7e7
--- /dev/null
+++ b/costa/native/external/lapack/zggqrf.f
@@ -0,0 +1,212 @@
+      SUBROUTINE ZGGQRF( N, M, P, A, LDA, TAUA, B, LDB, TAUB, WORK,
+     $                   LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, LDB, LWORK, M, N, P
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), B( LDB, * ), TAUA( * ), TAUB( * ),
+     $                   WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZGGQRF computes a generalized QR factorization of an N-by-M matrix A
+*  and an N-by-P matrix B:
+*
+*              A = Q*R,        B = Q*T*Z,
+*
+*  where Q is an N-by-N unitary matrix, Z is a P-by-P unitary matrix,
+*  and R and T assume one of the forms:
+*
+*  if N >= M,  R = ( R11 ) M  ,   or if N < M,  R = ( R11  R12 ) N,
+*                  (  0  ) N-M                         N   M-N
+*                     M
+*
+*  where R11 is upper triangular, and
+*
+*  if N <= P,  T = ( 0  T12 ) N,   or if N > P,  T = ( T11 ) N-P,
+*                   P-N  N                           ( T21 ) P
+*                                                       P
+*
+*  where T12 or T21 is upper triangular.
+*
+*  In particular, if B is square and nonsingular, the GQR factorization
+*  of A and B implicitly gives the QR factorization of inv(B)*A:
+*
+*               inv(B)*A = Z'*(inv(T)*R)
+*
+*  where inv(B) denotes the inverse of the matrix B, and Z' denotes the
+*  conjugate transpose of matrix Z.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The number of rows of the matrices A and B. N >= 0.
+*
+*  M       (input) INTEGER
+*          The number of columns of the matrix A.  M >= 0.
+*
+*  P       (input) INTEGER
+*          The number of columns of the matrix B.  P >= 0.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA,M)
+*          On entry, the N-by-M matrix A.
+*          On exit, the elements on and above the diagonal of the array
+*          contain the min(N,M)-by-M upper trapezoidal matrix R (R is
+*          upper triangular if N >= M); the elements below the diagonal,
+*          with the array TAUA, represent the unitary matrix Q as a
+*          product of min(N,M) elementary reflectors (see Further
+*          Details).
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,N).
+*
+*  TAUA    (output) COMPLEX*16 array, dimension (min(N,M))
+*          The scalar factors of the elementary reflectors which
+*          represent the unitary matrix Q (see Further Details).
+*
+*  B       (input/output) COMPLEX*16 array, dimension (LDB,P)
+*          On entry, the N-by-P matrix B.
+*          On exit, if N <= P, the upper triangle of the subarray
+*          B(1:N,P-N+1:P) contains the N-by-N upper triangular matrix T;
+*          if N > P, the elements on and above the (N-P)-th subdiagonal
+*          contain the N-by-P upper trapezoidal matrix T; the remaining
+*          elements, with the array TAUB, represent the unitary
+*          matrix Z as a product of elementary reflectors (see Further
+*          Details).
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B. LDB >= max(1,N).
+*
+*  TAUB    (output) COMPLEX*16 array, dimension (min(N,P))
+*          The scalar factors of the elementary reflectors which
+*          represent the unitary matrix Z (see Further Details).
+*
+*  WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK. LWORK >= max(1,N,M,P).
+*          For optimum performance LWORK >= max(N,M,P)*max(NB1,NB2,NB3),
+*          where NB1 is the optimal blocksize for the QR factorization
+*          of an N-by-M matrix, NB2 is the optimal blocksize for the
+*          RQ factorization of an N-by-P matrix, and NB3 is the optimal
+*          blocksize for a call of ZUNMQR.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*           = 0:  successful exit
+*           < 0:  if INFO = -i, the i-th argument had an illegal value.
+*
+*  Further Details
+*  ===============
+*
+*  The matrix Q is represented as a product of elementary reflectors
+*
+*     Q = H(1) H(2) . . . H(k), where k = min(n,m).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - taua * v * v'
+*
+*  where taua is a complex scalar, and v is a complex vector with
+*  v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i+1:n,i),
+*  and taua in TAUA(i).
+*  To form Q explicitly, use LAPACK subroutine ZUNGQR.
+*  To use Q to update another matrix, use LAPACK subroutine ZUNMQR.
+*
+*  The matrix Z is represented as a product of elementary reflectors
+*
+*     Z = H(1) H(2) . . . H(k), where k = min(n,p).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - taub * v * v'
+*
+*  where taub is a complex scalar, and v is a complex vector with
+*  v(p-k+i+1:p) = 0 and v(p-k+i) = 1; v(1:p-k+i-1) is stored on exit in
+*  B(n-k+i,1:p-k+i-1), and taub in TAUB(i).
+*  To form Z explicitly, use LAPACK subroutine ZUNGRQ.
+*  To use Z to update another matrix, use LAPACK subroutine ZUNMRQ.
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            LOPT, LWKOPT, NB, NB1, NB2, NB3
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZGEQRF, ZGERQF, ZUNMQR
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          INT, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters
+*
+      INFO = 0
+      NB1 = ILAENV( 1, 'ZGEQRF', ' ', N, M, -1, -1 )
+      NB2 = ILAENV( 1, 'ZGERQF', ' ', N, P, -1, -1 )
+      NB3 = ILAENV( 1, 'ZUNMQR', ' ', N, M, P, -1 )
+      NB = MAX( NB1, NB2, NB3 )
+      LWKOPT = MAX( N, M, P )*NB
+      WORK( 1 ) = LWKOPT
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( N.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( P.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -8
+      ELSE IF( LWORK.LT.MAX( 1, N, M, P ) .AND. .NOT.LQUERY ) THEN
+         INFO = -11
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZGGQRF', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     QR factorization of N-by-M matrix A: A = Q*R
+*
+      CALL ZGEQRF( N, M, A, LDA, TAUA, WORK, LWORK, INFO )
+      LOPT = WORK( 1 )
+*
+*     Update B := Q'*B.
+*
+      CALL ZUNMQR( 'Left', 'Conjugate Transpose', N, P, MIN( N, M ), A,
+     $             LDA, TAUA, B, LDB, WORK, LWORK, INFO )
+      LOPT = MAX( LOPT, INT( WORK( 1 ) ) )
+*
+*     RQ factorization of N-by-P matrix B: B = T*Z.
+*
+      CALL ZGERQF( N, P, B, LDB, TAUB, WORK, LWORK, INFO )
+      WORK( 1 ) = MAX( LOPT, INT( WORK( 1 ) ) )
+*
+      RETURN
+*
+*     End of ZGGQRF
+*
+      END
diff --git a/costa/native/external/lapack/zggrqf.f b/costa/native/external/lapack/zggrqf.f
new file mode 100644
index 000000000..8323a80f1
--- /dev/null
+++ b/costa/native/external/lapack/zggrqf.f
@@ -0,0 +1,212 @@
+      SUBROUTINE ZGGRQF( M, P, N, A, LDA, TAUA, B, LDB, TAUB, WORK,
+     $                   LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, LDB, LWORK, M, N, P
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), B( LDB, * ), TAUA( * ), TAUB( * ),
+     $                   WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZGGRQF computes a generalized RQ factorization of an M-by-N matrix A
+*  and a P-by-N matrix B:
+*
+*              A = R*Q,        B = Z*T*Q,
+*
+*  where Q is an N-by-N unitary matrix, Z is a P-by-P unitary
+*  matrix, and R and T assume one of the forms:
+*
+*  if M <= N,  R = ( 0  R12 ) M,   or if M > N,  R = ( R11 ) M-N,
+*                   N-M  M                           ( R21 ) N
+*                                                       N
+*
+*  where R12 or R21 is upper triangular, and
+*
+*  if P >= N,  T = ( T11 ) N  ,   or if P < N,  T = ( T11  T12 ) P,
+*                  (  0  ) P-N                         P   N-P
+*                     N
+*
+*  where T11 is upper triangular.
+*
+*  In particular, if B is square and nonsingular, the GRQ factorization
+*  of A and B implicitly gives the RQ factorization of A*inv(B):
+*
+*               A*inv(B) = (R*inv(T))*Z'
+*
+*  where inv(B) denotes the inverse of the matrix B, and Z' denotes the
+*  conjugate transpose of the matrix Z.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  P       (input) INTEGER
+*          The number of rows of the matrix B.  P >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrices A and B. N >= 0.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+*          On entry, the M-by-N matrix A.
+*          On exit, if M <= N, the upper triangle of the subarray
+*          A(1:M,N-M+1:N) contains the M-by-M upper triangular matrix R;
+*          if M > N, the elements on and above the (M-N)-th subdiagonal
+*          contain the M-by-N upper trapezoidal matrix R; the remaining
+*          elements, with the array TAUA, represent the unitary
+*          matrix Q as a product of elementary reflectors (see Further
+*          Details).
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,M).
+*
+*  TAUA    (output) COMPLEX*16 array, dimension (min(M,N))
+*          The scalar factors of the elementary reflectors which
+*          represent the unitary matrix Q (see Further Details).
+*
+*  B       (input/output) COMPLEX*16 array, dimension (LDB,N)
+*          On entry, the P-by-N matrix B.
+*          On exit, the elements on and above the diagonal of the array
+*          contain the min(P,N)-by-N upper trapezoidal matrix T (T is
+*          upper triangular if P >= N); the elements below the diagonal,
+*          with the array TAUB, represent the unitary matrix Z as a
+*          product of elementary reflectors (see Further Details).
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B. LDB >= max(1,P).
+*
+*  TAUB    (output) COMPLEX*16 array, dimension (min(P,N))
+*          The scalar factors of the elementary reflectors which
+*          represent the unitary matrix Z (see Further Details).
+*
+*  WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK. LWORK >= max(1,N,M,P).
+*          For optimum performance LWORK >= max(N,M,P)*max(NB1,NB2,NB3),
+*          where NB1 is the optimal blocksize for the RQ factorization
+*          of an M-by-N matrix, NB2 is the optimal blocksize for the
+*          QR factorization of a P-by-N matrix, and NB3 is the optimal
+*          blocksize for a call of ZUNMRQ.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO=-i, the i-th argument had an illegal value.
+*
+*  Further Details
+*  ===============
+*
+*  The matrix Q is represented as a product of elementary reflectors
+*
+*     Q = H(1) H(2) . . . H(k), where k = min(m,n).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - taua * v * v'
+*
+*  where taua is a complex scalar, and v is a complex vector with
+*  v(n-k+i+1:n) = 0 and v(n-k+i) = 1; v(1:n-k+i-1) is stored on exit in
+*  A(m-k+i,1:n-k+i-1), and taua in TAUA(i).
+*  To form Q explicitly, use LAPACK subroutine ZUNGRQ.
+*  To use Q to update another matrix, use LAPACK subroutine ZUNMRQ.
+*
+*  The matrix Z is represented as a product of elementary reflectors
+*
+*     Z = H(1) H(2) . . . H(k), where k = min(p,n).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - taub * v * v'
+*
+*  where taub is a complex scalar, and v is a complex vector with
+*  v(1:i-1) = 0 and v(i) = 1; v(i+1:p) is stored on exit in B(i+1:p,i),
+*  and taub in TAUB(i).
+*  To form Z explicitly, use LAPACK subroutine ZUNGQR.
+*  To use Z to update another matrix, use LAPACK subroutine ZUNMQR.
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            LOPT, LWKOPT, NB, NB1, NB2, NB3
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZGEQRF, ZGERQF, ZUNMRQ
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          INT, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters
+*
+      INFO = 0
+      NB1 = ILAENV( 1, 'ZGERQF', ' ', M, N, -1, -1 )
+      NB2 = ILAENV( 1, 'ZGEQRF', ' ', P, N, -1, -1 )
+      NB3 = ILAENV( 1, 'ZUNMRQ', ' ', M, N, P, -1 )
+      NB = MAX( NB1, NB2, NB3 )
+      LWKOPT = MAX( N, M, P )*NB
+      WORK( 1 ) = LWKOPT
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( P.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, P ) ) THEN
+         INFO = -8
+      ELSE IF( LWORK.LT.MAX( 1, M, P, N ) .AND. .NOT.LQUERY ) THEN
+         INFO = -11
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZGGRQF', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     RQ factorization of M-by-N matrix A: A = R*Q
+*
+      CALL ZGERQF( M, N, A, LDA, TAUA, WORK, LWORK, INFO )
+      LOPT = WORK( 1 )
+*
+*     Update B := B*Q'
+*
+      CALL ZUNMRQ( 'Right', 'Conjugate Transpose', P, N, MIN( M, N ),
+     $             A( MAX( 1, M-N+1 ), 1 ), LDA, TAUA, B, LDB, WORK,
+     $             LWORK, INFO )
+      LOPT = MAX( LOPT, INT( WORK( 1 ) ) )
+*
+*     QR factorization of P-by-N matrix B: B = Z*T
+*
+      CALL ZGEQRF( P, N, B, LDB, TAUB, WORK, LWORK, INFO )
+      WORK( 1 ) = MAX( LOPT, INT( WORK( 1 ) ) )
+*
+      RETURN
+*
+*     End of ZGGRQF
+*
+      END
diff --git a/costa/native/external/lapack/zggsvd.f b/costa/native/external/lapack/zggsvd.f
new file mode 100644
index 000000000..41a0368ad
--- /dev/null
+++ b/costa/native/external/lapack/zggsvd.f
@@ -0,0 +1,334 @@
+      SUBROUTINE ZGGSVD( JOBU, JOBV, JOBQ, M, N, P, K, L, A, LDA, B,
+     $                   LDB, ALPHA, BETA, U, LDU, V, LDV, Q, LDQ, WORK,
+     $                   RWORK, IWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBQ, JOBU, JOBV
+      INTEGER            INFO, K, L, LDA, LDB, LDQ, LDU, LDV, M, N, P
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      DOUBLE PRECISION   ALPHA( * ), BETA( * ), RWORK( * )
+      COMPLEX*16         A( LDA, * ), B( LDB, * ), Q( LDQ, * ),
+     $                   U( LDU, * ), V( LDV, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZGGSVD computes the generalized singular value decomposition (GSVD)
+*  of an M-by-N complex matrix A and P-by-N complex matrix B:
+*
+*        U'*A*Q = D1*( 0 R ),    V'*B*Q = D2*( 0 R )
+*
+*  where U, V and Q are unitary matrices, and Z' means the conjugate
+*  transpose of Z.  Let K+L = the effective numerical rank of the
+*  matrix (A',B')', then R is a (K+L)-by-(K+L) nonsingular upper
+*  triangular matrix, D1 and D2 are M-by-(K+L) and P-by-(K+L) "diagonal"
+*  matrices and of the following structures, respectively:
+*
+*  If M-K-L >= 0,
+*
+*                      K  L
+*         D1 =     K ( I  0 )
+*                  L ( 0  C )
+*              M-K-L ( 0  0 )
+*
+*                    K  L
+*         D2 =   L ( 0  S )
+*              P-L ( 0  0 )
+*
+*                  N-K-L  K    L
+*    ( 0 R ) = K (  0   R11  R12 )
+*              L (  0    0   R22 )
+*  where
+*
+*    C = diag( ALPHA(K+1), ... , ALPHA(K+L) ),
+*    S = diag( BETA(K+1),  ... , BETA(K+L) ),
+*    C**2 + S**2 = I.
+*
+*    R is stored in A(1:K+L,N-K-L+1:N) on exit.
+*
+*  If M-K-L < 0,
+*
+*                    K M-K K+L-M
+*         D1 =   K ( I  0    0   )
+*              M-K ( 0  C    0   )
+*
+*                      K M-K K+L-M
+*         D2 =   M-K ( 0  S    0  )
+*              K+L-M ( 0  0    I  )
+*                P-L ( 0  0    0  )
+*
+*                     N-K-L  K   M-K  K+L-M
+*    ( 0 R ) =     K ( 0    R11  R12  R13  )
+*                M-K ( 0     0   R22  R23  )
+*              K+L-M ( 0     0    0   R33  )
+*
+*  where
+*
+*    C = diag( ALPHA(K+1), ... , ALPHA(M) ),
+*    S = diag( BETA(K+1),  ... , BETA(M) ),
+*    C**2 + S**2 = I.
+*
+*    (R11 R12 R13 ) is stored in A(1:M, N-K-L+1:N), and R33 is stored
+*    ( 0  R22 R23 )
+*    in B(M-K+1:L,N+M-K-L+1:N) on exit.
+*
+*  The routine computes C, S, R, and optionally the unitary
+*  transformation matrices U, V and Q.
+*
+*  In particular, if B is an N-by-N nonsingular matrix, then the GSVD of
+*  A and B implicitly gives the SVD of A*inv(B):
+*                       A*inv(B) = U*(D1*inv(D2))*V'.
+*  If ( A',B')' has orthnormal columns, then the GSVD of A and B is also
+*  equal to the CS decomposition of A and B. Furthermore, the GSVD can
+*  be used to derive the solution of the eigenvalue problem:
+*                       A'*A x = lambda* B'*B x.
+*  In some literature, the GSVD of A and B is presented in the form
+*                   U'*A*X = ( 0 D1 ),   V'*B*X = ( 0 D2 )
+*  where U and V are orthogonal and X is nonsingular, and D1 and D2 are
+*  ``diagonal''.  The former GSVD form can be converted to the latter
+*  form by taking the nonsingular matrix X as
+*
+*                        X = Q*(  I   0    )
+*                              (  0 inv(R) )
+*
+*  Arguments
+*  =========
+*
+*  JOBU    (input) CHARACTER*1
+*          = 'U':  Unitary matrix U is computed;
+*          = 'N':  U is not computed.
+*
+*  JOBV    (input) CHARACTER*1
+*          = 'V':  Unitary matrix V is computed;
+*          = 'N':  V is not computed.
+*
+*  JOBQ    (input) CHARACTER*1
+*          = 'Q':  Unitary matrix Q is computed;
+*          = 'N':  Q is not computed.
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrices A and B.  N >= 0.
+*
+*  P       (input) INTEGER
+*          The number of rows of the matrix B.  P >= 0.
+*
+*  K       (output) INTEGER
+*  L       (output) INTEGER
+*          On exit, K and L specify the dimension of the subblocks
+*          described in Purpose.
+*          K + L = effective numerical rank of (A',B')'.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+*          On entry, the M-by-N matrix A.
+*          On exit, A contains the triangular matrix R, or part of R.
+*          See Purpose for details.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,M).
+*
+*  B       (input/output) COMPLEX*16 array, dimension (LDB,N)
+*          On entry, the P-by-N matrix B.
+*          On exit, B contains part of the triangular matrix R if
+*          M-K-L < 0.  See Purpose for details.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B. LDB >= max(1,P).
+*
+*  ALPHA   (output) DOUBLE PRECISION array, dimension (N)
+*  BETA    (output) DOUBLE PRECISION array, dimension (N)
+*          On exit, ALPHA and BETA contain the generalized singular
+*          value pairs of A and B;
+*            ALPHA(1:K) = 1,
+*            BETA(1:K)  = 0,
+*          and if M-K-L >= 0,
+*            ALPHA(K+1:K+L) = C,
+*            BETA(K+1:K+L)  = S,
+*          or if M-K-L < 0,
+*            ALPHA(K+1:M)= C, ALPHA(M+1:K+L)= 0
+*            BETA(K+1:M) = S, BETA(M+1:K+L) = 1
+*          and
+*            ALPHA(K+L+1:N) = 0
+*            BETA(K+L+1:N)  = 0
+*
+*  U       (output) COMPLEX*16 array, dimension (LDU,M)
+*          If JOBU = 'U', U contains the M-by-M unitary matrix U.
+*          If JOBU = 'N', U is not referenced.
+*
+*  LDU     (input) INTEGER
+*          The leading dimension of the array U. LDU >= max(1,M) if
+*          JOBU = 'U'; LDU >= 1 otherwise.
+*
+*  V       (output) COMPLEX*16 array, dimension (LDV,P)
+*          If JOBV = 'V', V contains the P-by-P unitary matrix V.
+*          If JOBV = 'N', V is not referenced.
+*
+*  LDV     (input) INTEGER
+*          The leading dimension of the array V. LDV >= max(1,P) if
+*          JOBV = 'V'; LDV >= 1 otherwise.
+*
+*  Q       (output) COMPLEX*16 array, dimension (LDQ,N)
+*          If JOBQ = 'Q', Q contains the N-by-N unitary matrix Q.
+*          If JOBQ = 'N', Q is not referenced.
+*
+*  LDQ     (input) INTEGER
+*          The leading dimension of the array Q. LDQ >= max(1,N) if
+*          JOBQ = 'Q'; LDQ >= 1 otherwise.
+*
+*  WORK    (workspace) COMPLEX*16 array, dimension (max(3*N,M,P)+N)
+*
+*  RWORK   (workspace) DOUBLE PRECISION array, dimension (2*N)
+*
+*  IWORK   (workspace/output) INTEGER array, dimension (N)
+*          On exit, IWORK stores the sorting information. More
+*          precisely, the following loop will sort ALPHA
+*             for I = K+1, min(M,K+L)
+*                 swap ALPHA(I) and ALPHA(IWORK(I))
+*             endfor
+*          such that ALPHA(1) >= ALPHA(2) >= ... >= ALPHA(N).
+*
+*  INFO    (output)INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          > 0:  if INFO = 1, the Jacobi-type procedure failed to
+*                converge.  For further details, see subroutine ZTGSJA.
+*
+*  Internal Parameters
+*  ===================
+*
+*  TOLA    DOUBLE PRECISION
+*  TOLB    DOUBLE PRECISION
+*          TOLA and TOLB are the thresholds to determine the effective
+*          rank of (A',B')'. Generally, they are set to
+*                   TOLA = MAX(M,N)*norm(A)*MAZHEPS,
+*                   TOLB = MAX(P,N)*norm(B)*MAZHEPS.
+*          The size of TOLA and TOLB may affect the size of backward
+*          errors of the decomposition.
+*
+*  Further Details
+*  ===============
+*
+*  2-96 Based on modifications by
+*     Ming Gu and Huan Ren, Computer Science Division, University of
+*     California at Berkeley, USA
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            WANTQ, WANTU, WANTV
+      INTEGER            I, IBND, ISUB, J, NCYCLE
+      DOUBLE PRECISION   ANORM, BNORM, SMAX, TEMP, TOLA, TOLB, ULP, UNFL
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DLAMCH, ZLANGE
+      EXTERNAL           LSAME, DLAMCH, ZLANGE
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DCOPY, XERBLA, ZGGSVP, ZTGSJA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode and test the input parameters
+*
+      WANTU = LSAME( JOBU, 'U' )
+      WANTV = LSAME( JOBV, 'V' )
+      WANTQ = LSAME( JOBQ, 'Q' )
+*
+      INFO = 0
+      IF( .NOT.( WANTU .OR. LSAME( JOBU, 'N' ) ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( WANTV .OR. LSAME( JOBV, 'N' ) ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.( WANTQ .OR. LSAME( JOBQ, 'N' ) ) ) THEN
+         INFO = -3
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( P.LT.0 ) THEN
+         INFO = -6
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -10
+      ELSE IF( LDB.LT.MAX( 1, P ) ) THEN
+         INFO = -12
+      ELSE IF( LDU.LT.1 .OR. ( WANTU .AND. LDU.LT.M ) ) THEN
+         INFO = -16
+      ELSE IF( LDV.LT.1 .OR. ( WANTV .AND. LDV.LT.P ) ) THEN
+         INFO = -18
+      ELSE IF( LDQ.LT.1 .OR. ( WANTQ .AND. LDQ.LT.N ) ) THEN
+         INFO = -20
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZGGSVD', -INFO )
+         RETURN
+      END IF
+*
+*     Compute the Frobenius norm of matrices A and B
+*
+      ANORM = ZLANGE( '1', M, N, A, LDA, RWORK )
+      BNORM = ZLANGE( '1', P, N, B, LDB, RWORK )
+*
+*     Get machine precision and set up threshold for determining
+*     the effective numerical rank of the matrices A and B.
+*
+      ULP = DLAMCH( 'Precision' )
+      UNFL = DLAMCH( 'Safe Minimum' )
+      TOLA = MAX( M, N )*MAX( ANORM, UNFL )*ULP
+      TOLB = MAX( P, N )*MAX( BNORM, UNFL )*ULP
+*
+      CALL ZGGSVP( JOBU, JOBV, JOBQ, M, P, N, A, LDA, B, LDB, TOLA,
+     $             TOLB, K, L, U, LDU, V, LDV, Q, LDQ, IWORK, RWORK,
+     $             WORK, WORK( N+1 ), INFO )
+*
+*     Compute the GSVD of two upper "triangular" matrices
+*
+      CALL ZTGSJA( JOBU, JOBV, JOBQ, M, P, N, K, L, A, LDA, B, LDB,
+     $             TOLA, TOLB, ALPHA, BETA, U, LDU, V, LDV, Q, LDQ,
+     $             WORK, NCYCLE, INFO )
+*
+*     Sort the singular values and store the pivot indices in IWORK
+*     Copy ALPHA to RWORK, then sort ALPHA in RWORK
+*
+      CALL DCOPY( N, ALPHA, 1, RWORK, 1 )
+      IBND = MIN( L, M-K )
+      DO 20 I = 1, IBND
+*
+*        Scan for largest ALPHA(K+I)
+*
+         ISUB = I
+         SMAX = RWORK( K+I )
+         DO 10 J = I + 1, IBND
+            TEMP = RWORK( K+J )
+            IF( TEMP.GT.SMAX ) THEN
+               ISUB = J
+               SMAX = TEMP
+            END IF
+   10    CONTINUE
+         IF( ISUB.NE.I ) THEN
+            RWORK( K+ISUB ) = RWORK( K+I )
+            RWORK( K+I ) = SMAX
+            IWORK( K+I ) = K + ISUB
+         ELSE
+            IWORK( K+I ) = K + I
+         END IF
+   20 CONTINUE
+*
+      RETURN
+*
+*     End of ZGGSVD
+*
+      END
diff --git a/costa/native/external/lapack/zggsvp.f b/costa/native/external/lapack/zggsvp.f
new file mode 100644
index 000000000..843d1e9f9
--- /dev/null
+++ b/costa/native/external/lapack/zggsvp.f
@@ -0,0 +1,403 @@
+      SUBROUTINE ZGGSVP( JOBU, JOBV, JOBQ, M, P, N, A, LDA, B, LDB,
+     $                   TOLA, TOLB, K, L, U, LDU, V, LDV, Q, LDQ,
+     $                   IWORK, RWORK, TAU, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBQ, JOBU, JOBV
+      INTEGER            INFO, K, L, LDA, LDB, LDQ, LDU, LDV, M, N, P
+      DOUBLE PRECISION   TOLA, TOLB
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      DOUBLE PRECISION   RWORK( * )
+      COMPLEX*16         A( LDA, * ), B( LDB, * ), Q( LDQ, * ),
+     $                   TAU( * ), U( LDU, * ), V( LDV, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZGGSVP computes unitary matrices U, V and Q such that
+*
+*                   N-K-L  K    L
+*   U'*A*Q =     K ( 0    A12  A13 )  if M-K-L >= 0;
+*                L ( 0     0   A23 )
+*            M-K-L ( 0     0    0  )
+*
+*                   N-K-L  K    L
+*          =     K ( 0    A12  A13 )  if M-K-L < 0;
+*              M-K ( 0     0   A23 )
+*
+*                 N-K-L  K    L
+*   V'*B*Q =   L ( 0     0   B13 )
+*            P-L ( 0     0    0  )
+*
+*  where the K-by-K matrix A12 and L-by-L matrix B13 are nonsingular
+*  upper triangular; A23 is L-by-L upper triangular if M-K-L >= 0,
+*  otherwise A23 is (M-K)-by-L upper trapezoidal.  K+L = the effective
+*  numerical rank of the (M+P)-by-N matrix (A',B')'.  Z' denotes the
+*  conjugate transpose of Z.
+*
+*  This decomposition is the preprocessing step for computing the
+*  Generalized Singular Value Decomposition (GSVD), see subroutine
+*  ZGGSVD.
+*
+*  Arguments
+*  =========
+*
+*  JOBU    (input) CHARACTER*1
+*          = 'U':  Unitary matrix U is computed;
+*          = 'N':  U is not computed.
+*
+*  JOBV    (input) CHARACTER*1
+*          = 'V':  Unitary matrix V is computed;
+*          = 'N':  V is not computed.
+*
+*  JOBQ    (input) CHARACTER*1
+*          = 'Q':  Unitary matrix Q is computed;
+*          = 'N':  Q is not computed.
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  P       (input) INTEGER
+*          The number of rows of the matrix B.  P >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrices A and B.  N >= 0.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+*          On entry, the M-by-N matrix A.
+*          On exit, A contains the triangular (or trapezoidal) matrix
+*          described in the Purpose section.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,M).
+*
+*  B       (input/output) COMPLEX*16 array, dimension (LDB,N)
+*          On entry, the P-by-N matrix B.
+*          On exit, B contains the triangular matrix described in
+*          the Purpose section.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B. LDB >= max(1,P).
+*
+*  TOLA    (input) DOUBLE PRECISION
+*  TOLB    (input) DOUBLE PRECISION
+*          TOLA and TOLB are the thresholds to determine the effective
+*          numerical rank of matrix B and a subblock of A. Generally,
+*          they are set to
+*             TOLA = MAX(M,N)*norm(A)*MAZHEPS,
+*             TOLB = MAX(P,N)*norm(B)*MAZHEPS.
+*          The size of TOLA and TOLB may affect the size of backward
+*          errors of the decomposition.
+*
+*  K       (output) INTEGER
+*  L       (output) INTEGER
+*          On exit, K and L specify the dimension of the subblocks
+*          described in Purpose section.
+*          K + L = effective numerical rank of (A',B')'.
+*
+*  U       (output) COMPLEX*16 array, dimension (LDU,M)
+*          If JOBU = 'U', U contains the unitary matrix U.
+*          If JOBU = 'N', U is not referenced.
+*
+*  LDU     (input) INTEGER
+*          The leading dimension of the array U. LDU >= max(1,M) if
+*          JOBU = 'U'; LDU >= 1 otherwise.
+*
+*  V       (output) COMPLEX*16 array, dimension (LDV,M)
+*          If JOBV = 'V', V contains the unitary matrix V.
+*          If JOBV = 'N', V is not referenced.
+*
+*  LDV     (input) INTEGER
+*          The leading dimension of the array V. LDV >= max(1,P) if
+*          JOBV = 'V'; LDV >= 1 otherwise.
+*
+*  Q       (output) COMPLEX*16 array, dimension (LDQ,N)
+*          If JOBQ = 'Q', Q contains the unitary matrix Q.
+*          If JOBQ = 'N', Q is not referenced.
+*
+*  LDQ     (input) INTEGER
+*          The leading dimension of the array Q. LDQ >= max(1,N) if
+*          JOBQ = 'Q'; LDQ >= 1 otherwise.
+*
+*  IWORK   (workspace) INTEGER array, dimension (N)
+*
+*  RWORK   (workspace) DOUBLE PRECISION array, dimension (2*N)
+*
+*  TAU     (workspace) COMPLEX*16 array, dimension (N)
+*
+*  WORK    (workspace) COMPLEX*16 array, dimension (max(3*N,M,P))
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*
+*  Further Details
+*  ===============
+*
+*  The subroutine uses LAPACK subroutine ZGEQPF for the QR factorization
+*  with column pivoting to detect the effective numerical rank of the
+*  a matrix. It may be replaced by a better rank determination strategy.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX*16         CZERO, CONE
+      PARAMETER          ( CZERO = ( 0.0D+0, 0.0D+0 ),
+     $                   CONE = ( 1.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            FORWRD, WANTQ, WANTU, WANTV
+      INTEGER            I, J
+      COMPLEX*16         T
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZGEQPF, ZGEQR2, ZGERQ2, ZLACPY, ZLAPMT,
+     $                   ZLASET, ZUNG2R, ZUNM2R, ZUNMR2
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, DIMAG, MAX, MIN
+*     ..
+*     .. Statement Functions ..
+      DOUBLE PRECISION   CABS1
+*     ..
+*     .. Statement Function definitions ..
+      CABS1( T ) = ABS( DBLE( T ) ) + ABS( DIMAG( T ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters
+*
+      WANTU = LSAME( JOBU, 'U' )
+      WANTV = LSAME( JOBV, 'V' )
+      WANTQ = LSAME( JOBQ, 'Q' )
+      FORWRD = .TRUE.
+*
+      INFO = 0
+      IF( .NOT.( WANTU .OR. LSAME( JOBU, 'N' ) ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( WANTV .OR. LSAME( JOBV, 'N' ) ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.( WANTQ .OR. LSAME( JOBQ, 'N' ) ) ) THEN
+         INFO = -3
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( P.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -6
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -8
+      ELSE IF( LDB.LT.MAX( 1, P ) ) THEN
+         INFO = -10
+      ELSE IF( LDU.LT.1 .OR. ( WANTU .AND. LDU.LT.M ) ) THEN
+         INFO = -16
+      ELSE IF( LDV.LT.1 .OR. ( WANTV .AND. LDV.LT.P ) ) THEN
+         INFO = -18
+      ELSE IF( LDQ.LT.1 .OR. ( WANTQ .AND. LDQ.LT.N ) ) THEN
+         INFO = -20
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZGGSVP', -INFO )
+         RETURN
+      END IF
+*
+*     QR with column pivoting of B: B*P = V*( S11 S12 )
+*                                           (  0   0  )
+*
+      DO 10 I = 1, N
+         IWORK( I ) = 0
+   10 CONTINUE
+      CALL ZGEQPF( P, N, B, LDB, IWORK, TAU, WORK, RWORK, INFO )
+*
+*     Update A := A*P
+*
+      CALL ZLAPMT( FORWRD, M, N, A, LDA, IWORK )
+*
+*     Determine the effective rank of matrix B.
+*
+      L = 0
+      DO 20 I = 1, MIN( P, N )
+         IF( CABS1( B( I, I ) ).GT.TOLB )
+     $      L = L + 1
+   20 CONTINUE
+*
+      IF( WANTV ) THEN
+*
+*        Copy the details of V, and form V.
+*
+         CALL ZLASET( 'Full', P, P, CZERO, CZERO, V, LDV )
+         IF( P.GT.1 )
+     $      CALL ZLACPY( 'Lower', P-1, N, B( 2, 1 ), LDB, V( 2, 1 ),
+     $                   LDV )
+         CALL ZUNG2R( P, P, MIN( P, N ), V, LDV, TAU, WORK, INFO )
+      END IF
+*
+*     Clean up B
+*
+      DO 40 J = 1, L - 1
+         DO 30 I = J + 1, L
+            B( I, J ) = CZERO
+   30    CONTINUE
+   40 CONTINUE
+      IF( P.GT.L )
+     $   CALL ZLASET( 'Full', P-L, N, CZERO, CZERO, B( L+1, 1 ), LDB )
+*
+      IF( WANTQ ) THEN
+*
+*        Set Q = I and Update Q := Q*P
+*
+         CALL ZLASET( 'Full', N, N, CZERO, CONE, Q, LDQ )
+         CALL ZLAPMT( FORWRD, N, N, Q, LDQ, IWORK )
+      END IF
+*
+      IF( P.GE.L .AND. N.NE.L ) THEN
+*
+*        RQ factorization of ( S11 S12 ) = ( 0 S12 )*Z
+*
+         CALL ZGERQ2( L, N, B, LDB, TAU, WORK, INFO )
+*
+*        Update A := A*Z'
+*
+         CALL ZUNMR2( 'Right', 'Conjugate transpose', M, N, L, B, LDB,
+     $                TAU, A, LDA, WORK, INFO )
+         IF( WANTQ ) THEN
+*
+*           Update Q := Q*Z'
+*
+            CALL ZUNMR2( 'Right', 'Conjugate transpose', N, N, L, B,
+     $                   LDB, TAU, Q, LDQ, WORK, INFO )
+         END IF
+*
+*        Clean up B
+*
+         CALL ZLASET( 'Full', L, N-L, CZERO, CZERO, B, LDB )
+         DO 60 J = N - L + 1, N
+            DO 50 I = J - N + L + 1, L
+               B( I, J ) = CZERO
+   50       CONTINUE
+   60    CONTINUE
+*
+      END IF
+*
+*     Let              N-L     L
+*                A = ( A11    A12 ) M,
+*
+*     then the following does the complete QR decomposition of A11:
+*
+*              A11 = U*(  0  T12 )*P1'
+*                      (  0   0  )
+*
+      DO 70 I = 1, N - L
+         IWORK( I ) = 0
+   70 CONTINUE
+      CALL ZGEQPF( M, N-L, A, LDA, IWORK, TAU, WORK, RWORK, INFO )
+*
+*     Determine the effective rank of A11
+*
+      K = 0
+      DO 80 I = 1, MIN( M, N-L )
+         IF( CABS1( A( I, I ) ).GT.TOLA )
+     $      K = K + 1
+   80 CONTINUE
+*
+*     Update A12 := U'*A12, where A12 = A( 1:M, N-L+1:N )
+*
+      CALL ZUNM2R( 'Left', 'Conjugate transpose', M, L, MIN( M, N-L ),
+     $             A, LDA, TAU, A( 1, N-L+1 ), LDA, WORK, INFO )
+*
+      IF( WANTU ) THEN
+*
+*        Copy the details of U, and form U
+*
+         CALL ZLASET( 'Full', M, M, CZERO, CZERO, U, LDU )
+         IF( M.GT.1 )
+     $      CALL ZLACPY( 'Lower', M-1, N-L, A( 2, 1 ), LDA, U( 2, 1 ),
+     $                   LDU )
+         CALL ZUNG2R( M, M, MIN( M, N-L ), U, LDU, TAU, WORK, INFO )
+      END IF
+*
+      IF( WANTQ ) THEN
+*
+*        Update Q( 1:N, 1:N-L )  = Q( 1:N, 1:N-L )*P1
+*
+         CALL ZLAPMT( FORWRD, N, N-L, Q, LDQ, IWORK )
+      END IF
+*
+*     Clean up A: set the strictly lower triangular part of
+*     A(1:K, 1:K) = 0, and A( K+1:M, 1:N-L ) = 0.
+*
+      DO 100 J = 1, K - 1
+         DO 90 I = J + 1, K
+            A( I, J ) = CZERO
+   90    CONTINUE
+  100 CONTINUE
+      IF( M.GT.K )
+     $   CALL ZLASET( 'Full', M-K, N-L, CZERO, CZERO, A( K+1, 1 ), LDA )
+*
+      IF( N-L.GT.K ) THEN
+*
+*        RQ factorization of ( T11 T12 ) = ( 0 T12 )*Z1
+*
+         CALL ZGERQ2( K, N-L, A, LDA, TAU, WORK, INFO )
+*
+         IF( WANTQ ) THEN
+*
+*           Update Q( 1:N,1:N-L ) = Q( 1:N,1:N-L )*Z1'
+*
+            CALL ZUNMR2( 'Right', 'Conjugate transpose', N, N-L, K, A,
+     $                   LDA, TAU, Q, LDQ, WORK, INFO )
+         END IF
+*
+*        Clean up A
+*
+         CALL ZLASET( 'Full', K, N-L-K, CZERO, CZERO, A, LDA )
+         DO 120 J = N - L - K + 1, N - L
+            DO 110 I = J - N + L + K + 1, K
+               A( I, J ) = CZERO
+  110       CONTINUE
+  120    CONTINUE
+*
+      END IF
+*
+      IF( M.GT.K ) THEN
+*
+*        QR factorization of A( K+1:M,N-L+1:N )
+*
+         CALL ZGEQR2( M-K, L, A( K+1, N-L+1 ), LDA, TAU, WORK, INFO )
+*
+         IF( WANTU ) THEN
+*
+*           Update U(:,K+1:M) := U(:,K+1:M)*U1
+*
+            CALL ZUNM2R( 'Right', 'No transpose', M, M-K, MIN( M-K, L ),
+     $                   A( K+1, N-L+1 ), LDA, TAU, U( 1, K+1 ), LDU,
+     $                   WORK, INFO )
+         END IF
+*
+*        Clean up
+*
+         DO 140 J = N - L + 1, N
+            DO 130 I = J - N + K + L + 1, M
+               A( I, J ) = CZERO
+  130       CONTINUE
+  140    CONTINUE
+*
+      END IF
+*
+      RETURN
+*
+*     End of ZGGSVP
+*
+      END
diff --git a/costa/native/external/lapack/zgtcon.f b/costa/native/external/lapack/zgtcon.f
new file mode 100644
index 000000000..a66f5b0d9
--- /dev/null
+++ b/costa/native/external/lapack/zgtcon.f
@@ -0,0 +1,167 @@
+      SUBROUTINE ZGTCON( NORM, N, DL, D, DU, DU2, IPIV, ANORM, RCOND,
+     $                   WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          NORM
+      INTEGER            INFO, N
+      DOUBLE PRECISION   ANORM, RCOND
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      COMPLEX*16         D( * ), DL( * ), DU( * ), DU2( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZGTCON estimates the reciprocal of the condition number of a complex
+*  tridiagonal matrix A using the LU factorization as computed by
+*  ZGTTRF.
+*
+*  An estimate is obtained for norm(inv(A)), and the reciprocal of the
+*  condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))).
+*
+*  Arguments
+*  =========
+*
+*  NORM    (input) CHARACTER*1
+*          Specifies whether the 1-norm condition number or the
+*          infinity-norm condition number is required:
+*          = '1' or 'O':  1-norm;
+*          = 'I':         Infinity-norm.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  DL      (input) COMPLEX*16 array, dimension (N-1)
+*          The (n-1) multipliers that define the matrix L from the
+*          LU factorization of A as computed by ZGTTRF.
+*
+*  D       (input) COMPLEX*16 array, dimension (N)
+*          The n diagonal elements of the upper triangular matrix U from
+*          the LU factorization of A.
+*
+*  DU      (input) COMPLEX*16 array, dimension (N-1)
+*          The (n-1) elements of the first superdiagonal of U.
+*
+*  DU2     (input) COMPLEX*16 array, dimension (N-2)
+*          The (n-2) elements of the second superdiagonal of U.
+*
+*  IPIV    (input) INTEGER array, dimension (N)
+*          The pivot indices; for 1 <= i <= n, row i of the matrix was
+*          interchanged with row IPIV(i).  IPIV(i) will always be either
+*          i or i+1; IPIV(i) = i indicates a row interchange was not
+*          required.
+*
+*  ANORM   (input) DOUBLE PRECISION
+*          If NORM = '1' or 'O', the 1-norm of the original matrix A.
+*          If NORM = 'I', the infinity-norm of the original matrix A.
+*
+*  RCOND   (output) DOUBLE PRECISION
+*          The reciprocal of the condition number of the matrix A,
+*          computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an
+*          estimate of the 1-norm of inv(A) computed in this routine.
+*
+*  WORK    (workspace) COMPLEX*16 array, dimension (2*N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            ONENRM
+      INTEGER            I, KASE, KASE1
+      DOUBLE PRECISION   AINVNM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZGTTRS, ZLACON
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          DCMPLX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments.
+*
+      INFO = 0
+      ONENRM = NORM.EQ.'1' .OR. LSAME( NORM, 'O' )
+      IF( .NOT.ONENRM .AND. .NOT.LSAME( NORM, 'I' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( ANORM.LT.ZERO ) THEN
+         INFO = -8
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZGTCON', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      RCOND = ZERO
+      IF( N.EQ.0 ) THEN
+         RCOND = ONE
+         RETURN
+      ELSE IF( ANORM.EQ.ZERO ) THEN
+         RETURN
+      END IF
+*
+*     Check that D(1:N) is non-zero.
+*
+      DO 10 I = 1, N
+         IF( D( I ).EQ.DCMPLX( ZERO ) )
+     $      RETURN
+   10 CONTINUE
+*
+      AINVNM = ZERO
+      IF( ONENRM ) THEN
+         KASE1 = 1
+      ELSE
+         KASE1 = 2
+      END IF
+      KASE = 0
+   20 CONTINUE
+      CALL ZLACON( N, WORK( N+1 ), WORK, AINVNM, KASE )
+      IF( KASE.NE.0 ) THEN
+         IF( KASE.EQ.KASE1 ) THEN
+*
+*           Multiply by inv(U)*inv(L).
+*
+            CALL ZGTTRS( 'No transpose', N, 1, DL, D, DU, DU2, IPIV,
+     $                   WORK, N, INFO )
+         ELSE
+*
+*           Multiply by inv(L')*inv(U').
+*
+            CALL ZGTTRS( 'Conjugate transpose', N, 1, DL, D, DU, DU2,
+     $                   IPIV, WORK, N, INFO )
+         END IF
+         GO TO 20
+      END IF
+*
+*     Compute the estimate of the reciprocal condition number.
+*
+      IF( AINVNM.NE.ZERO )
+     $   RCOND = ( ONE / AINVNM ) / ANORM
+*
+      RETURN
+*
+*     End of ZGTCON
+*
+      END
diff --git a/costa/native/external/lapack/zgtrfs.f b/costa/native/external/lapack/zgtrfs.f
new file mode 100644
index 000000000..abf97ae47
--- /dev/null
+++ b/costa/native/external/lapack/zgtrfs.f
@@ -0,0 +1,369 @@
+      SUBROUTINE ZGTRFS( TRANS, N, NRHS, DL, D, DU, DLF, DF, DUF, DU2,
+     $                   IPIV, B, LDB, X, LDX, FERR, BERR, WORK, RWORK,
+     $                   INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          TRANS
+      INTEGER            INFO, LDB, LDX, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      DOUBLE PRECISION   BERR( * ), FERR( * ), RWORK( * )
+      COMPLEX*16         B( LDB, * ), D( * ), DF( * ), DL( * ),
+     $                   DLF( * ), DU( * ), DU2( * ), DUF( * ),
+     $                   WORK( * ), X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZGTRFS improves the computed solution to a system of linear
+*  equations when the coefficient matrix is tridiagonal, and provides
+*  error bounds and backward error estimates for the solution.
+*
+*  Arguments
+*  =========
+*
+*  TRANS   (input) CHARACTER*1
+*          Specifies the form of the system of equations:
+*          = 'N':  A * X = B     (No transpose)
+*          = 'T':  A**T * X = B  (Transpose)
+*          = 'C':  A**H * X = B  (Conjugate transpose)
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  DL      (input) COMPLEX*16 array, dimension (N-1)
+*          The (n-1) subdiagonal elements of A.
+*
+*  D       (input) COMPLEX*16 array, dimension (N)
+*          The diagonal elements of A.
+*
+*  DU      (input) COMPLEX*16 array, dimension (N-1)
+*          The (n-1) superdiagonal elements of A.
+*
+*  DLF     (input) COMPLEX*16 array, dimension (N-1)
+*          The (n-1) multipliers that define the matrix L from the
+*          LU factorization of A as computed by ZGTTRF.
+*
+*  DF      (input) COMPLEX*16 array, dimension (N)
+*          The n diagonal elements of the upper triangular matrix U from
+*          the LU factorization of A.
+*
+*  DUF     (input) COMPLEX*16 array, dimension (N-1)
+*          The (n-1) elements of the first superdiagonal of U.
+*
+*  DU2     (input) COMPLEX*16 array, dimension (N-2)
+*          The (n-2) elements of the second superdiagonal of U.
+*
+*  IPIV    (input) INTEGER array, dimension (N)
+*          The pivot indices; for 1 <= i <= n, row i of the matrix was
+*          interchanged with row IPIV(i).  IPIV(i) will always be either
+*          i or i+1; IPIV(i) = i indicates a row interchange was not
+*          required.
+*
+*  B       (input) COMPLEX*16 array, dimension (LDB,NRHS)
+*          The right hand side matrix B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  X       (input/output) COMPLEX*16 array, dimension (LDX,NRHS)
+*          On entry, the solution matrix X, as computed by ZGTTRS.
+*          On exit, the improved solution matrix X.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(1,N).
+*
+*  FERR    (output) DOUBLE PRECISION array, dimension (NRHS)
+*          The estimated forward error bound for each solution vector
+*          X(j) (the j-th column of the solution matrix X).
+*          If XTRUE is the true solution corresponding to X(j), FERR(j)
+*          is an estimated upper bound for the magnitude of the largest
+*          element in (X(j) - XTRUE) divided by the magnitude of the
+*          largest element in X(j).  The estimate is as reliable as
+*          the estimate for RCOND, and is almost always a slight
+*          overestimate of the true error.
+*
+*  BERR    (output) DOUBLE PRECISION array, dimension (NRHS)
+*          The componentwise relative backward error of each solution
+*          vector X(j) (i.e., the smallest relative change in
+*          any element of A or B that makes X(j) an exact solution).
+*
+*  WORK    (workspace) COMPLEX*16 array, dimension (2*N)
+*
+*  RWORK   (workspace) DOUBLE PRECISION array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Internal Parameters
+*  ===================
+*
+*  ITMAX is the maximum number of steps of iterative refinement.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            ITMAX
+      PARAMETER          ( ITMAX = 5 )
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+      DOUBLE PRECISION   TWO
+      PARAMETER          ( TWO = 2.0D+0 )
+      DOUBLE PRECISION   THREE
+      PARAMETER          ( THREE = 3.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOTRAN
+      CHARACTER          TRANSN, TRANST
+      INTEGER            COUNT, I, J, KASE, NZ
+      DOUBLE PRECISION   EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN
+      COMPLEX*16         ZDUM
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZAXPY, ZCOPY, ZGTTRS, ZLACON, ZLAGTM
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, DCMPLX, DIMAG, MAX
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DLAMCH
+      EXTERNAL           LSAME, DLAMCH
+*     ..
+*     .. Statement Functions ..
+      DOUBLE PRECISION   CABS1
+*     ..
+*     .. Statement Function definitions ..
+      CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      NOTRAN = LSAME( TRANS, 'N' )
+      IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT.
+     $    LSAME( TRANS, 'C' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -13
+      ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+         INFO = -15
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZGTRFS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN
+         DO 10 J = 1, NRHS
+            FERR( J ) = ZERO
+            BERR( J ) = ZERO
+   10    CONTINUE
+         RETURN
+      END IF
+*
+      IF( NOTRAN ) THEN
+         TRANSN = 'N'
+         TRANST = 'C'
+      ELSE
+         TRANSN = 'C'
+         TRANST = 'N'
+      END IF
+*
+*     NZ = maximum number of nonzero elements in each row of A, plus 1
+*
+      NZ = 4
+      EPS = DLAMCH( 'Epsilon' )
+      SAFMIN = DLAMCH( 'Safe minimum' )
+      SAFE1 = NZ*SAFMIN
+      SAFE2 = SAFE1 / EPS
+*
+*     Do for each right hand side
+*
+      DO 110 J = 1, NRHS
+*
+         COUNT = 1
+         LSTRES = THREE
+   20    CONTINUE
+*
+*        Loop until stopping criterion is satisfied.
+*
+*        Compute residual R = B - op(A) * X,
+*        where op(A) = A, A**T, or A**H, depending on TRANS.
+*
+         CALL ZCOPY( N, B( 1, J ), 1, WORK, 1 )
+         CALL ZLAGTM( TRANS, N, 1, -ONE, DL, D, DU, X( 1, J ), LDX, ONE,
+     $                WORK, N )
+*
+*        Compute abs(op(A))*abs(x) + abs(b) for use in the backward
+*        error bound.
+*
+         IF( NOTRAN ) THEN
+            IF( N.EQ.1 ) THEN
+               RWORK( 1 ) = CABS1( B( 1, J ) ) +
+     $                      CABS1( D( 1 ) )*CABS1( X( 1, J ) )
+            ELSE
+               RWORK( 1 ) = CABS1( B( 1, J ) ) +
+     $                      CABS1( D( 1 ) )*CABS1( X( 1, J ) ) +
+     $                      CABS1( DU( 1 ) )*CABS1( X( 2, J ) )
+               DO 30 I = 2, N - 1
+                  RWORK( I ) = CABS1( B( I, J ) ) +
+     $                         CABS1( DL( I-1 ) )*CABS1( X( I-1, J ) ) +
+     $                         CABS1( D( I ) )*CABS1( X( I, J ) ) +
+     $                         CABS1( DU( I ) )*CABS1( X( I+1, J ) )
+   30          CONTINUE
+               RWORK( N ) = CABS1( B( N, J ) ) +
+     $                      CABS1( DL( N-1 ) )*CABS1( X( N-1, J ) ) +
+     $                      CABS1( D( N ) )*CABS1( X( N, J ) )
+            END IF
+         ELSE
+            IF( N.EQ.1 ) THEN
+               RWORK( 1 ) = CABS1( B( 1, J ) ) +
+     $                      CABS1( D( 1 ) )*CABS1( X( 1, J ) )
+            ELSE
+               RWORK( 1 ) = CABS1( B( 1, J ) ) +
+     $                      CABS1( D( 1 ) )*CABS1( X( 1, J ) ) +
+     $                      CABS1( DL( 1 ) )*CABS1( X( 2, J ) )
+               DO 40 I = 2, N - 1
+                  RWORK( I ) = CABS1( B( I, J ) ) +
+     $                         CABS1( DU( I-1 ) )*CABS1( X( I-1, J ) ) +
+     $                         CABS1( D( I ) )*CABS1( X( I, J ) ) +
+     $                         CABS1( DL( I ) )*CABS1( X( I+1, J ) )
+   40          CONTINUE
+               RWORK( N ) = CABS1( B( N, J ) ) +
+     $                      CABS1( DU( N-1 ) )*CABS1( X( N-1, J ) ) +
+     $                      CABS1( D( N ) )*CABS1( X( N, J ) )
+            END IF
+         END IF
+*
+*        Compute componentwise relative backward error from formula
+*
+*        max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) )
+*
+*        where abs(Z) is the componentwise absolute value of the matrix
+*        or vector Z.  If the i-th component of the denominator is less
+*        than SAFE2, then SAFE1 is added to the i-th components of the
+*        numerator and denominator before dividing.
+*
+         S = ZERO
+         DO 50 I = 1, N
+            IF( RWORK( I ).GT.SAFE2 ) THEN
+               S = MAX( S, CABS1( WORK( I ) ) / RWORK( I ) )
+            ELSE
+               S = MAX( S, ( CABS1( WORK( I ) )+SAFE1 ) /
+     $             ( RWORK( I )+SAFE1 ) )
+            END IF
+   50    CONTINUE
+         BERR( J ) = S
+*
+*        Test stopping criterion. Continue iterating if
+*           1) The residual BERR(J) is larger than machine epsilon, and
+*           2) BERR(J) decreased by at least a factor of 2 during the
+*              last iteration, and
+*           3) At most ITMAX iterations tried.
+*
+         IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND.
+     $       COUNT.LE.ITMAX ) THEN
+*
+*           Update solution and try again.
+*
+            CALL ZGTTRS( TRANS, N, 1, DLF, DF, DUF, DU2, IPIV, WORK, N,
+     $                   INFO )
+            CALL ZAXPY( N, DCMPLX( ONE ), WORK, 1, X( 1, J ), 1 )
+            LSTRES = BERR( J )
+            COUNT = COUNT + 1
+            GO TO 20
+         END IF
+*
+*        Bound error from formula
+*
+*        norm(X - XTRUE) / norm(X) .le. FERR =
+*        norm( abs(inv(op(A)))*
+*           ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X)
+*
+*        where
+*          norm(Z) is the magnitude of the largest component of Z
+*          inv(op(A)) is the inverse of op(A)
+*          abs(Z) is the componentwise absolute value of the matrix or
+*             vector Z
+*          NZ is the maximum number of nonzeros in any row of A, plus 1
+*          EPS is machine epsilon
+*
+*        The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B))
+*        is incremented by SAFE1 if the i-th component of
+*        abs(op(A))*abs(X) + abs(B) is less than SAFE2.
+*
+*        Use ZLACON to estimate the infinity-norm of the matrix
+*           inv(op(A)) * diag(W),
+*        where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) )))
+*
+         DO 60 I = 1, N
+            IF( RWORK( I ).GT.SAFE2 ) THEN
+               RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I )
+            ELSE
+               RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) +
+     $                      SAFE1
+            END IF
+   60    CONTINUE
+*
+         KASE = 0
+   70    CONTINUE
+         CALL ZLACON( N, WORK( N+1 ), WORK, FERR( J ), KASE )
+         IF( KASE.NE.0 ) THEN
+            IF( KASE.EQ.1 ) THEN
+*
+*              Multiply by diag(W)*inv(op(A)**H).
+*
+               CALL ZGTTRS( TRANST, N, 1, DLF, DF, DUF, DU2, IPIV, WORK,
+     $                      N, INFO )
+               DO 80 I = 1, N
+                  WORK( I ) = RWORK( I )*WORK( I )
+   80          CONTINUE
+            ELSE
+*
+*              Multiply by inv(op(A))*diag(W).
+*
+               DO 90 I = 1, N
+                  WORK( I ) = RWORK( I )*WORK( I )
+   90          CONTINUE
+               CALL ZGTTRS( TRANSN, N, 1, DLF, DF, DUF, DU2, IPIV, WORK,
+     $                      N, INFO )
+            END IF
+            GO TO 70
+         END IF
+*
+*        Normalize error.
+*
+         LSTRES = ZERO
+         DO 100 I = 1, N
+            LSTRES = MAX( LSTRES, CABS1( X( I, J ) ) )
+  100    CONTINUE
+         IF( LSTRES.NE.ZERO )
+     $      FERR( J ) = FERR( J ) / LSTRES
+*
+  110 CONTINUE
+*
+      RETURN
+*
+*     End of ZGTRFS
+*
+      END
diff --git a/costa/native/external/lapack/zgtsv.f b/costa/native/external/lapack/zgtsv.f
new file mode 100644
index 000000000..f6223f32f
--- /dev/null
+++ b/costa/native/external/lapack/zgtsv.f
@@ -0,0 +1,174 @@
+      SUBROUTINE ZGTSV( N, NRHS, DL, D, DU, B, LDB, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDB, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         B( LDB, * ), D( * ), DL( * ), DU( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZGTSV  solves the equation
+*
+*     A*X = B,
+*
+*  where A is an N-by-N tridiagonal matrix, by Gaussian elimination with
+*  partial pivoting.
+*
+*  Note that the equation  A'*X = B  may be solved by interchanging the
+*  order of the arguments DU and DL.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  DL      (input/output) COMPLEX*16 array, dimension (N-1)
+*          On entry, DL must contain the (n-1) subdiagonal elements of
+*          A.
+*          On exit, DL is overwritten by the (n-2) elements of the
+*          second superdiagonal of the upper triangular matrix U from
+*          the LU factorization of A, in DL(1), ..., DL(n-2).
+*
+*  D       (input/output) COMPLEX*16 array, dimension (N)
+*          On entry, D must contain the diagonal elements of A.
+*          On exit, D is overwritten by the n diagonal elements of U.
+*
+*  DU      (input/output) COMPLEX*16 array, dimension (N-1)
+*          On entry, DU must contain the (n-1) superdiagonal elements
+*          of A.
+*          On exit, DU is overwritten by the (n-1) elements of the first
+*          superdiagonal of U.
+*
+*  B       (input/output) COMPLEX*16 array, dimension (LDB,NRHS)
+*          On entry, the N-by-NRHS right hand side matrix B.
+*          On exit, if INFO = 0, the N-by-NRHS solution matrix X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, U(i,i) is exactly zero, and the solution
+*                has not been computed.  The factorization has not been
+*                completed unless i = N.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX*16         ZERO
+      PARAMETER          ( ZERO = ( 0.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            J, K
+      COMPLEX*16         MULT, TEMP, ZDUM
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, DIMAG, MAX
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     ..
+*     .. Statement Functions ..
+      DOUBLE PRECISION   CABS1
+*     ..
+*     .. Statement Function definitions ..
+      CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) )
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+      IF( N.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZGTSV ', -INFO )
+         RETURN
+      END IF
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      DO 30 K = 1, N - 1
+         IF( DL( K ).EQ.ZERO ) THEN
+*
+*           Subdiagonal is zero, no elimination is required.
+*
+            IF( D( K ).EQ.ZERO ) THEN
+*
+*              Diagonal is zero: set INFO = K and return; a unique
+*              solution can not be found.
+*
+               INFO = K
+               RETURN
+            END IF
+         ELSE IF( CABS1( D( K ) ).GE.CABS1( DL( K ) ) ) THEN
+*
+*           No row interchange required
+*
+            MULT = DL( K ) / D( K )
+            D( K+1 ) = D( K+1 ) - MULT*DU( K )
+            DO 10 J = 1, NRHS
+               B( K+1, J ) = B( K+1, J ) - MULT*B( K, J )
+   10       CONTINUE
+            IF( K.LT.( N-1 ) )
+     $         DL( K ) = ZERO
+         ELSE
+*
+*           Interchange rows K and K+1
+*
+            MULT = D( K ) / DL( K )
+            D( K ) = DL( K )
+            TEMP = D( K+1 )
+            D( K+1 ) = DU( K ) - MULT*TEMP
+            IF( K.LT.( N-1 ) ) THEN
+               DL( K ) = DU( K+1 )
+               DU( K+1 ) = -MULT*DL( K )
+            END IF
+            DU( K ) = TEMP
+            DO 20 J = 1, NRHS
+               TEMP = B( K, J )
+               B( K, J ) = B( K+1, J )
+               B( K+1, J ) = TEMP - MULT*B( K+1, J )
+   20       CONTINUE
+         END IF
+   30 CONTINUE
+      IF( D( N ).EQ.ZERO ) THEN
+         INFO = N
+         RETURN
+      END IF
+*
+*     Back solve with the matrix U from the factorization.
+*
+      DO 50 J = 1, NRHS
+         B( N, J ) = B( N, J ) / D( N )
+         IF( N.GT.1 )
+     $      B( N-1, J ) = ( B( N-1, J )-DU( N-1 )*B( N, J ) ) / D( N-1 )
+         DO 40 K = N - 2, 1, -1
+            B( K, J ) = ( B( K, J )-DU( K )*B( K+1, J )-DL( K )*
+     $                  B( K+2, J ) ) / D( K )
+   40    CONTINUE
+   50 CONTINUE
+*
+      RETURN
+*
+*     End of ZGTSV
+*
+      END
diff --git a/costa/native/external/lapack/zgtsvx.f b/costa/native/external/lapack/zgtsvx.f
new file mode 100644
index 000000000..94a483b82
--- /dev/null
+++ b/costa/native/external/lapack/zgtsvx.f
@@ -0,0 +1,294 @@
+      SUBROUTINE ZGTSVX( FACT, TRANS, N, NRHS, DL, D, DU, DLF, DF, DUF,
+     $                   DU2, IPIV, B, LDB, X, LDX, RCOND, FERR, BERR,
+     $                   WORK, RWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          FACT, TRANS
+      INTEGER            INFO, LDB, LDX, N, NRHS
+      DOUBLE PRECISION   RCOND
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      DOUBLE PRECISION   BERR( * ), FERR( * ), RWORK( * )
+      COMPLEX*16         B( LDB, * ), D( * ), DF( * ), DL( * ),
+     $                   DLF( * ), DU( * ), DU2( * ), DUF( * ),
+     $                   WORK( * ), X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZGTSVX uses the LU factorization to compute the solution to a complex
+*  system of linear equations A * X = B, A**T * X = B, or A**H * X = B,
+*  where A is a tridiagonal matrix of order N and X and B are N-by-NRHS
+*  matrices.
+*
+*  Error bounds on the solution and a condition estimate are also
+*  provided.
+*
+*  Description
+*  ===========
+*
+*  The following steps are performed:
+*
+*  1. If FACT = 'N', the LU decomposition is used to factor the matrix A
+*     as A = L * U, where L is a product of permutation and unit lower
+*     bidiagonal matrices and U is upper triangular with nonzeros in
+*     only the main diagonal and first two superdiagonals.
+*
+*  2. If some U(i,i)=0, so that U is exactly singular, then the routine
+*     returns with INFO = i. Otherwise, the factored form of A is used
+*     to estimate the condition number of the matrix A.  If the
+*     reciprocal of the condition number is less than machine precision,
+*     INFO = N+1 is returned as a warning, but the routine still goes on
+*     to solve for X and compute error bounds as described below.
+*
+*  3. The system of equations is solved for X using the factored form
+*     of A.
+*
+*  4. Iterative refinement is applied to improve the computed solution
+*     matrix and calculate error bounds and backward error estimates
+*     for it.
+*
+*  Arguments
+*  =========
+*
+*  FACT    (input) CHARACTER*1
+*          Specifies whether or not the factored form of A has been
+*          supplied on entry.
+*          = 'F':  DLF, DF, DUF, DU2, and IPIV contain the factored form
+*                  of A; DL, D, DU, DLF, DF, DUF, DU2 and IPIV will not
+*                  be modified.
+*          = 'N':  The matrix will be copied to DLF, DF, and DUF
+*                  and factored.
+*
+*  TRANS   (input) CHARACTER*1
+*          Specifies the form of the system of equations:
+*          = 'N':  A * X = B     (No transpose)
+*          = 'T':  A**T * X = B  (Transpose)
+*          = 'C':  A**H * X = B  (Conjugate transpose)
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  DL      (input) COMPLEX*16 array, dimension (N-1)
+*          The (n-1) subdiagonal elements of A.
+*
+*  D       (input) COMPLEX*16 array, dimension (N)
+*          The n diagonal elements of A.
+*
+*  DU      (input) COMPLEX*16 array, dimension (N-1)
+*          The (n-1) superdiagonal elements of A.
+*
+*  DLF     (input or output) COMPLEX*16 array, dimension (N-1)
+*          If FACT = 'F', then DLF is an input argument and on entry
+*          contains the (n-1) multipliers that define the matrix L from
+*          the LU factorization of A as computed by ZGTTRF.
+*
+*          If FACT = 'N', then DLF is an output argument and on exit
+*          contains the (n-1) multipliers that define the matrix L from
+*          the LU factorization of A.
+*
+*  DF      (input or output) COMPLEX*16 array, dimension (N)
+*          If FACT = 'F', then DF is an input argument and on entry
+*          contains the n diagonal elements of the upper triangular
+*          matrix U from the LU factorization of A.
+*
+*          If FACT = 'N', then DF is an output argument and on exit
+*          contains the n diagonal elements of the upper triangular
+*          matrix U from the LU factorization of A.
+*
+*  DUF     (input or output) COMPLEX*16 array, dimension (N-1)
+*          If FACT = 'F', then DUF is an input argument and on entry
+*          contains the (n-1) elements of the first superdiagonal of U.
+*
+*          If FACT = 'N', then DUF is an output argument and on exit
+*          contains the (n-1) elements of the first superdiagonal of U.
+*
+*  DU2     (input or output) COMPLEX*16 array, dimension (N-2)
+*          If FACT = 'F', then DU2 is an input argument and on entry
+*          contains the (n-2) elements of the second superdiagonal of
+*          U.
+*
+*          If FACT = 'N', then DU2 is an output argument and on exit
+*          contains the (n-2) elements of the second superdiagonal of
+*          U.
+*
+*  IPIV    (input or output) INTEGER array, dimension (N)
+*          If FACT = 'F', then IPIV is an input argument and on entry
+*          contains the pivot indices from the LU factorization of A as
+*          computed by ZGTTRF.
+*
+*          If FACT = 'N', then IPIV is an output argument and on exit
+*          contains the pivot indices from the LU factorization of A;
+*          row i of the matrix was interchanged with row IPIV(i).
+*          IPIV(i) will always be either i or i+1; IPIV(i) = i indicates
+*          a row interchange was not required.
+*
+*  B       (input) COMPLEX*16 array, dimension (LDB,NRHS)
+*          The N-by-NRHS right hand side matrix B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  X       (output) COMPLEX*16 array, dimension (LDX,NRHS)
+*          If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(1,N).
+*
+*  RCOND   (output) DOUBLE PRECISION
+*          The estimate of the reciprocal condition number of the matrix
+*          A.  If RCOND is less than the machine precision (in
+*          particular, if RCOND = 0), the matrix is singular to working
+*          precision.  This condition is indicated by a return code of
+*          INFO > 0.
+*
+*  FERR    (output) DOUBLE PRECISION array, dimension (NRHS)
+*          The estimated forward error bound for each solution vector
+*          X(j) (the j-th column of the solution matrix X).
+*          If XTRUE is the true solution corresponding to X(j), FERR(j)
+*          is an estimated upper bound for the magnitude of the largest
+*          element in (X(j) - XTRUE) divided by the magnitude of the
+*          largest element in X(j).  The estimate is as reliable as
+*          the estimate for RCOND, and is almost always a slight
+*          overestimate of the true error.
+*
+*  BERR    (output) DOUBLE PRECISION array, dimension (NRHS)
+*          The componentwise relative backward error of each solution
+*          vector X(j) (i.e., the smallest relative change in
+*          any element of A or B that makes X(j) an exact solution).
+*
+*  WORK    (workspace) COMPLEX*16 array, dimension (2*N)
+*
+*  RWORK   (workspace) DOUBLE PRECISION array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, and i is
+*                <= N:  U(i,i) is exactly zero.  The factorization
+*                       has not been completed unless i = N, but the
+*                       factor U is exactly singular, so the solution
+*                       and error bounds could not be computed.
+*                       RCOND = 0 is returned.
+*                = N+1: U is nonsingular, but RCOND is less than machine
+*                       precision, meaning that the matrix is singular
+*                       to working precision.  Nevertheless, the
+*                       solution and error bounds are computed because
+*                       there are a number of situations where the
+*                       computed solution can be more accurate than the
+*                       value of RCOND would suggest.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO
+      PARAMETER          ( ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOFACT, NOTRAN
+      CHARACTER          NORM
+      DOUBLE PRECISION   ANORM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DLAMCH, ZLANGT
+      EXTERNAL           LSAME, DLAMCH, ZLANGT
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZCOPY, ZGTCON, ZGTRFS, ZGTTRF, ZGTTRS,
+     $                   ZLACPY
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+      NOFACT = LSAME( FACT, 'N' )
+      NOTRAN = LSAME( TRANS, 'N' )
+      IF( .NOT.NOFACT .AND. .NOT.LSAME( FACT, 'F' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT.
+     $         LSAME( TRANS, 'C' ) ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -14
+      ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+         INFO = -16
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZGTSVX', -INFO )
+         RETURN
+      END IF
+*
+      IF( NOFACT ) THEN
+*
+*        Compute the LU factorization of A.
+*
+         CALL ZCOPY( N, D, 1, DF, 1 )
+         IF( N.GT.1 ) THEN
+            CALL ZCOPY( N-1, DL, 1, DLF, 1 )
+            CALL ZCOPY( N-1, DU, 1, DUF, 1 )
+         END IF
+         CALL ZGTTRF( N, DLF, DF, DUF, DU2, IPIV, INFO )
+*
+*        Return if INFO is non-zero.
+*
+         IF( INFO.NE.0 ) THEN
+            IF( INFO.GT.0 )
+     $         RCOND = ZERO
+            RETURN
+         END IF
+      END IF
+*
+*     Compute the norm of the matrix A.
+*
+      IF( NOTRAN ) THEN
+         NORM = '1'
+      ELSE
+         NORM = 'I'
+      END IF
+      ANORM = ZLANGT( NORM, N, DL, D, DU )
+*
+*     Compute the reciprocal of the condition number of A.
+*
+      CALL ZGTCON( NORM, N, DLF, DF, DUF, DU2, IPIV, ANORM, RCOND, WORK,
+     $             INFO )
+*
+*     Set INFO = N+1 if the matrix is singular to working precision.
+*
+      IF( RCOND.LT.DLAMCH( 'Epsilon' ) )
+     $   INFO = N + 1
+*
+*     Compute the solution vectors X.
+*
+      CALL ZLACPY( 'Full', N, NRHS, B, LDB, X, LDX )
+      CALL ZGTTRS( TRANS, N, NRHS, DLF, DF, DUF, DU2, IPIV, X, LDX,
+     $             INFO )
+*
+*     Use iterative refinement to improve the computed solutions and
+*     compute error bounds and backward error estimates for them.
+*
+      CALL ZGTRFS( TRANS, N, NRHS, DL, D, DU, DLF, DF, DUF, DU2, IPIV,
+     $             B, LDB, X, LDX, FERR, BERR, WORK, RWORK, INFO )
+*
+      RETURN
+*
+*     End of ZGTSVX
+*
+      END
diff --git a/costa/native/external/lapack/zgttrf.f b/costa/native/external/lapack/zgttrf.f
new file mode 100644
index 000000000..0bc59b6e5
--- /dev/null
+++ b/costa/native/external/lapack/zgttrf.f
@@ -0,0 +1,175 @@
+      SUBROUTINE ZGTTRF( N, DL, D, DU, DU2, IPIV, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      COMPLEX*16         D( * ), DL( * ), DU( * ), DU2( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZGTTRF computes an LU factorization of a complex tridiagonal matrix A
+*  using elimination with partial pivoting and row interchanges.
+*
+*  The factorization has the form
+*     A = L * U
+*  where L is a product of permutation and unit lower bidiagonal
+*  matrices and U is upper triangular with nonzeros in only the main
+*  diagonal and first two superdiagonals.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.
+*
+*  DL      (input/output) COMPLEX*16 array, dimension (N-1)
+*          On entry, DL must contain the (n-1) sub-diagonal elements of
+*          A.
+*
+*          On exit, DL is overwritten by the (n-1) multipliers that
+*          define the matrix L from the LU factorization of A.
+*
+*  D       (input/output) COMPLEX*16 array, dimension (N)
+*          On entry, D must contain the diagonal elements of A.
+*
+*          On exit, D is overwritten by the n diagonal elements of the
+*          upper triangular matrix U from the LU factorization of A.
+*
+*  DU      (input/output) COMPLEX*16 array, dimension (N-1)
+*          On entry, DU must contain the (n-1) super-diagonal elements
+*          of A.
+*
+*          On exit, DU is overwritten by the (n-1) elements of the first
+*          super-diagonal of U.
+*
+*  DU2     (output) COMPLEX*16 array, dimension (N-2)
+*          On exit, DU2 is overwritten by the (n-2) elements of the
+*          second super-diagonal of U.
+*
+*  IPIV    (output) INTEGER array, dimension (N)
+*          The pivot indices; for 1 <= i <= n, row i of the matrix was
+*          interchanged with row IPIV(i).  IPIV(i) will always be either
+*          i or i+1; IPIV(i) = i indicates a row interchange was not
+*          required.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -k, the k-th argument had an illegal value
+*          > 0:  if INFO = k, U(k,k) is exactly zero. The factorization
+*                has been completed, but the factor U is exactly
+*                singular, and division by zero will occur if it is used
+*                to solve a system of equations.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO
+      PARAMETER          ( ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I
+      COMPLEX*16         FACT, TEMP, ZDUM
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, DIMAG
+*     ..
+*     .. Statement Functions ..
+      DOUBLE PRECISION   CABS1
+*     ..
+*     .. Statement Function definitions ..
+      CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) )
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+      IF( N.LT.0 ) THEN
+         INFO = -1
+         CALL XERBLA( 'ZGTTRF', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Initialize IPIV(i) = i and DU2(i) = 0
+*
+      DO 10 I = 1, N
+         IPIV( I ) = I
+   10 CONTINUE
+      DO 20 I = 1, N - 2
+         DU2( I ) = ZERO
+   20 CONTINUE
+*
+      DO 30 I = 1, N - 2
+         IF( CABS1( D( I ) ).GE.CABS1( DL( I ) ) ) THEN
+*
+*           No row interchange required, eliminate DL(I)
+*
+            IF( CABS1( D( I ) ).NE.ZERO ) THEN
+               FACT = DL( I ) / D( I )
+               DL( I ) = FACT
+               D( I+1 ) = D( I+1 ) - FACT*DU( I )
+            END IF
+         ELSE
+*
+*           Interchange rows I and I+1, eliminate DL(I)
+*
+            FACT = D( I ) / DL( I )
+            D( I ) = DL( I )
+            DL( I ) = FACT
+            TEMP = DU( I )
+            DU( I ) = D( I+1 )
+            D( I+1 ) = TEMP - FACT*D( I+1 )
+            DU2( I ) = DU( I+1 )
+            DU( I+1 ) = -FACT*DU( I+1 )
+            IPIV( I ) = I + 1
+         END IF
+   30 CONTINUE
+      IF( N.GT.1 ) THEN
+         I = N - 1
+         IF( CABS1( D( I ) ).GE.CABS1( DL( I ) ) ) THEN
+            IF( CABS1( D( I ) ).NE.ZERO ) THEN
+               FACT = DL( I ) / D( I )
+               DL( I ) = FACT
+               D( I+1 ) = D( I+1 ) - FACT*DU( I )
+            END IF
+         ELSE
+            FACT = D( I ) / DL( I )
+            D( I ) = DL( I )
+            DL( I ) = FACT
+            TEMP = DU( I )
+            DU( I ) = D( I+1 )
+            D( I+1 ) = TEMP - FACT*D( I+1 )
+            IPIV( I ) = I + 1
+         END IF
+      END IF
+*
+*     Check for a zero on the diagonal of U.
+*
+      DO 40 I = 1, N
+         IF( CABS1( D( I ) ).EQ.ZERO ) THEN
+            INFO = I
+            GO TO 50
+         END IF
+   40 CONTINUE
+   50 CONTINUE
+*
+      RETURN
+*
+*     End of ZGTTRF
+*
+      END
diff --git a/costa/native/external/lapack/zgttrs.f b/costa/native/external/lapack/zgttrs.f
new file mode 100644
index 000000000..e62367c9b
--- /dev/null
+++ b/costa/native/external/lapack/zgttrs.f
@@ -0,0 +1,143 @@
+      SUBROUTINE ZGTTRS( TRANS, N, NRHS, DL, D, DU, DU2, IPIV, B, LDB,
+     $                   INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          TRANS
+      INTEGER            INFO, LDB, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      COMPLEX*16         B( LDB, * ), D( * ), DL( * ), DU( * ), DU2( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZGTTRS solves one of the systems of equations
+*     A * X = B,  A**T * X = B,  or  A**H * X = B,
+*  with a tridiagonal matrix A using the LU factorization computed
+*  by ZGTTRF.
+*
+*  Arguments
+*  =========
+*
+*  TRANS   (input) CHARACTER
+*          Specifies the form of the system of equations.
+*          = 'N':  A * X = B     (No transpose)
+*          = 'T':  A**T * X = B  (Transpose)
+*          = 'C':  A**H * X = B  (Conjugate transpose)
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  DL      (input) COMPLEX*16 array, dimension (N-1)
+*          The (n-1) multipliers that define the matrix L from the
+*          LU factorization of A.
+*
+*  D       (input) COMPLEX*16 array, dimension (N)
+*          The n diagonal elements of the upper triangular matrix U from
+*          the LU factorization of A.
+*
+*  DU      (input) COMPLEX*16 array, dimension (N-1)
+*          The (n-1) elements of the first super-diagonal of U.
+*
+*  DU2     (input) COMPLEX*16 array, dimension (N-2)
+*          The (n-2) elements of the second super-diagonal of U.
+*
+*  IPIV    (input) INTEGER array, dimension (N)
+*          The pivot indices; for 1 <= i <= n, row i of the matrix was
+*          interchanged with row IPIV(i).  IPIV(i) will always be either
+*          i or i+1; IPIV(i) = i indicates a row interchange was not
+*          required.
+*
+*  B       (input/output) COMPLEX*16 array, dimension (LDB,NRHS)
+*          On entry, the matrix of right hand side vectors B.
+*          On exit, B is overwritten by the solution vectors X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -k, the k-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            NOTRAN
+      INTEGER            ITRANS, J, JB, NB
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZGTTS2
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+      NOTRAN = ( TRANS.EQ.'N' .OR. TRANS.EQ.'n' )
+      IF( .NOT.NOTRAN .AND. .NOT.( TRANS.EQ.'T' .OR. TRANS.EQ.
+     $    't' ) .AND. .NOT.( TRANS.EQ.'C' .OR. TRANS.EQ.'c' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDB.LT.MAX( N, 1 ) ) THEN
+         INFO = -10
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZGTTRS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 )
+     $   RETURN
+*
+*     Decode TRANS
+*
+      IF( NOTRAN ) THEN
+         ITRANS = 0
+      ELSE IF( TRANS.EQ.'T' .OR. TRANS.EQ.'t' ) THEN
+         ITRANS = 1
+      ELSE
+         ITRANS = 2
+      END IF
+*
+*     Determine the number of right-hand sides to solve at a time.
+*
+      IF( NRHS.EQ.1 ) THEN
+         NB = 1
+      ELSE
+         NB = MAX( 1, ILAENV( 1, 'ZGTTRS', TRANS, N, NRHS, -1, -1 ) )
+      END IF
+*
+      IF( NB.GE.NRHS ) THEN
+         CALL ZGTTS2( ITRANS, N, NRHS, DL, D, DU, DU2, IPIV, B, LDB )
+      ELSE
+         DO 10 J = 1, NRHS, NB
+            JB = MIN( NRHS-J+1, NB )
+            CALL ZGTTS2( ITRANS, N, JB, DL, D, DU, DU2, IPIV, B( 1, J ),
+     $                   LDB )
+   10    CONTINUE
+      END IF
+*
+*     End of ZGTTRS
+*
+      END
diff --git a/costa/native/external/lapack/zgtts2.f b/costa/native/external/lapack/zgtts2.f
new file mode 100644
index 000000000..adf7077b7
--- /dev/null
+++ b/costa/native/external/lapack/zgtts2.f
@@ -0,0 +1,272 @@
+      SUBROUTINE ZGTTS2( ITRANS, N, NRHS, DL, D, DU, DU2, IPIV, B, LDB )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            ITRANS, LDB, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      COMPLEX*16         B( LDB, * ), D( * ), DL( * ), DU( * ), DU2( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZGTTS2 solves one of the systems of equations
+*     A * X = B,  A**T * X = B,  or  A**H * X = B,
+*  with a tridiagonal matrix A using the LU factorization computed
+*  by ZGTTRF.
+*
+*  Arguments
+*  =========
+*
+*  ITRANS  (input) INTEGER
+*          Specifies the form of the system of equations.
+*          = 0:  A * X = B     (No transpose)
+*          = 1:  A**T * X = B  (Transpose)
+*          = 2:  A**H * X = B  (Conjugate transpose)
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  DL      (input) COMPLEX*16 array, dimension (N-1)
+*          The (n-1) multipliers that define the matrix L from the
+*          LU factorization of A.
+*
+*  D       (input) COMPLEX*16 array, dimension (N)
+*          The n diagonal elements of the upper triangular matrix U from
+*          the LU factorization of A.
+*
+*  DU      (input) COMPLEX*16 array, dimension (N-1)
+*          The (n-1) elements of the first super-diagonal of U.
+*
+*  DU2     (input) COMPLEX*16 array, dimension (N-2)
+*          The (n-2) elements of the second super-diagonal of U.
+*
+*  IPIV    (input) INTEGER array, dimension (N)
+*          The pivot indices; for 1 <= i <= n, row i of the matrix was
+*          interchanged with row IPIV(i).  IPIV(i) will always be either
+*          i or i+1; IPIV(i) = i indicates a row interchange was not
+*          required.
+*
+*  B       (input/output) COMPLEX*16 array, dimension (LDB,NRHS)
+*          On entry, the matrix of right hand side vectors B.
+*          On exit, B is overwritten by the solution vectors X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      INTEGER            I, J
+      COMPLEX*16         TEMP
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          DCONJG
+*     ..
+*     .. Executable Statements ..
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 )
+     $   RETURN
+*
+      IF( ITRANS.EQ.0 ) THEN
+*
+*        Solve A*X = B using the LU factorization of A,
+*        overwriting each right hand side vector with its solution.
+*
+         IF( NRHS.LE.1 ) THEN
+            J = 1
+   10       CONTINUE
+*
+*           Solve L*x = b.
+*
+            DO 20 I = 1, N - 1
+               IF( IPIV( I ).EQ.I ) THEN
+                  B( I+1, J ) = B( I+1, J ) - DL( I )*B( I, J )
+               ELSE
+                  TEMP = B( I, J )
+                  B( I, J ) = B( I+1, J )
+                  B( I+1, J ) = TEMP - DL( I )*B( I, J )
+               END IF
+   20       CONTINUE
+*
+*           Solve U*x = b.
+*
+            B( N, J ) = B( N, J ) / D( N )
+            IF( N.GT.1 )
+     $         B( N-1, J ) = ( B( N-1, J )-DU( N-1 )*B( N, J ) ) /
+     $                       D( N-1 )
+            DO 30 I = N - 2, 1, -1
+               B( I, J ) = ( B( I, J )-DU( I )*B( I+1, J )-DU2( I )*
+     $                     B( I+2, J ) ) / D( I )
+   30       CONTINUE
+            IF( J.LT.NRHS ) THEN
+               J = J + 1
+               GO TO 10
+            END IF
+         ELSE
+            DO 60 J = 1, NRHS
+*
+*           Solve L*x = b.
+*
+               DO 40 I = 1, N - 1
+                  IF( IPIV( I ).EQ.I ) THEN
+                     B( I+1, J ) = B( I+1, J ) - DL( I )*B( I, J )
+                  ELSE
+                     TEMP = B( I, J )
+                     B( I, J ) = B( I+1, J )
+                     B( I+1, J ) = TEMP - DL( I )*B( I, J )
+                  END IF
+   40          CONTINUE
+*
+*           Solve U*x = b.
+*
+               B( N, J ) = B( N, J ) / D( N )
+               IF( N.GT.1 )
+     $            B( N-1, J ) = ( B( N-1, J )-DU( N-1 )*B( N, J ) ) /
+     $                          D( N-1 )
+               DO 50 I = N - 2, 1, -1
+                  B( I, J ) = ( B( I, J )-DU( I )*B( I+1, J )-DU2( I )*
+     $                        B( I+2, J ) ) / D( I )
+   50          CONTINUE
+   60       CONTINUE
+         END IF
+      ELSE IF( ITRANS.EQ.1 ) THEN
+*
+*        Solve A**T * X = B.
+*
+         IF( NRHS.LE.1 ) THEN
+            J = 1
+   70       CONTINUE
+*
+*           Solve U**T * x = b.
+*
+            B( 1, J ) = B( 1, J ) / D( 1 )
+            IF( N.GT.1 )
+     $         B( 2, J ) = ( B( 2, J )-DU( 1 )*B( 1, J ) ) / D( 2 )
+            DO 80 I = 3, N
+               B( I, J ) = ( B( I, J )-DU( I-1 )*B( I-1, J )-DU2( I-2 )*
+     $                     B( I-2, J ) ) / D( I )
+   80       CONTINUE
+*
+*           Solve L**T * x = b.
+*
+            DO 90 I = N - 1, 1, -1
+               IF( IPIV( I ).EQ.I ) THEN
+                  B( I, J ) = B( I, J ) - DL( I )*B( I+1, J )
+               ELSE
+                  TEMP = B( I+1, J )
+                  B( I+1, J ) = B( I, J ) - DL( I )*TEMP
+                  B( I, J ) = TEMP
+               END IF
+   90       CONTINUE
+            IF( J.LT.NRHS ) THEN
+               J = J + 1
+               GO TO 70
+            END IF
+         ELSE
+            DO 120 J = 1, NRHS
+*
+*           Solve U**T * x = b.
+*
+               B( 1, J ) = B( 1, J ) / D( 1 )
+               IF( N.GT.1 )
+     $            B( 2, J ) = ( B( 2, J )-DU( 1 )*B( 1, J ) ) / D( 2 )
+               DO 100 I = 3, N
+                  B( I, J ) = ( B( I, J )-DU( I-1 )*B( I-1, J )-
+     $                        DU2( I-2 )*B( I-2, J ) ) / D( I )
+  100          CONTINUE
+*
+*           Solve L**T * x = b.
+*
+               DO 110 I = N - 1, 1, -1
+                  IF( IPIV( I ).EQ.I ) THEN
+                     B( I, J ) = B( I, J ) - DL( I )*B( I+1, J )
+                  ELSE
+                     TEMP = B( I+1, J )
+                     B( I+1, J ) = B( I, J ) - DL( I )*TEMP
+                     B( I, J ) = TEMP
+                  END IF
+  110          CONTINUE
+  120       CONTINUE
+         END IF
+      ELSE
+*
+*        Solve A**H * X = B.
+*
+         IF( NRHS.LE.1 ) THEN
+            J = 1
+  130       CONTINUE
+*
+*           Solve U**H * x = b.
+*
+            B( 1, J ) = B( 1, J ) / DCONJG( D( 1 ) )
+            IF( N.GT.1 )
+     $         B( 2, J ) = ( B( 2, J )-DCONJG( DU( 1 ) )*B( 1, J ) ) /
+     $                     DCONJG( D( 2 ) )
+            DO 140 I = 3, N
+               B( I, J ) = ( B( I, J )-DCONJG( DU( I-1 ) )*B( I-1, J )-
+     $                     DCONJG( DU2( I-2 ) )*B( I-2, J ) ) /
+     $                     DCONJG( D( I ) )
+  140       CONTINUE
+*
+*           Solve L**H * x = b.
+*
+            DO 150 I = N - 1, 1, -1
+               IF( IPIV( I ).EQ.I ) THEN
+                  B( I, J ) = B( I, J ) - DCONJG( DL( I ) )*B( I+1, J )
+               ELSE
+                  TEMP = B( I+1, J )
+                  B( I+1, J ) = B( I, J ) - DCONJG( DL( I ) )*TEMP
+                  B( I, J ) = TEMP
+               END IF
+  150       CONTINUE
+            IF( J.LT.NRHS ) THEN
+               J = J + 1
+               GO TO 130
+            END IF
+         ELSE
+            DO 180 J = 1, NRHS
+*
+*           Solve U**H * x = b.
+*
+               B( 1, J ) = B( 1, J ) / DCONJG( D( 1 ) )
+               IF( N.GT.1 )
+     $            B( 2, J ) = ( B( 2, J )-DCONJG( DU( 1 ) )*B( 1, J ) )
+     $                         / DCONJG( D( 2 ) )
+               DO 160 I = 3, N
+                  B( I, J ) = ( B( I, J )-DCONJG( DU( I-1 ) )*
+     $                        B( I-1, J )-DCONJG( DU2( I-2 ) )*
+     $                        B( I-2, J ) ) / DCONJG( D( I ) )
+  160          CONTINUE
+*
+*           Solve L**H * x = b.
+*
+               DO 170 I = N - 1, 1, -1
+                  IF( IPIV( I ).EQ.I ) THEN
+                     B( I, J ) = B( I, J ) - DCONJG( DL( I ) )*
+     $                           B( I+1, J )
+                  ELSE
+                     TEMP = B( I+1, J )
+                     B( I+1, J ) = B( I, J ) - DCONJG( DL( I ) )*TEMP
+                     B( I, J ) = TEMP
+                  END IF
+  170          CONTINUE
+  180       CONTINUE
+         END IF
+      END IF
+*
+*     End of ZGTTS2
+*
+      END
diff --git a/costa/native/external/lapack/zhbev.f b/costa/native/external/lapack/zhbev.f
new file mode 100644
index 000000000..70672c9b3
--- /dev/null
+++ b/costa/native/external/lapack/zhbev.f
@@ -0,0 +1,209 @@
+      SUBROUTINE ZHBEV( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, WORK,
+     $                  RWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBZ, UPLO
+      INTEGER            INFO, KD, LDAB, LDZ, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   RWORK( * ), W( * )
+      COMPLEX*16         AB( LDAB, * ), WORK( * ), Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZHBEV computes all the eigenvalues and, optionally, eigenvectors of
+*  a complex Hermitian band matrix A.
+*
+*  Arguments
+*  =========
+*
+*  JOBZ    (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only;
+*          = 'V':  Compute eigenvalues and eigenvectors.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  KD      (input) INTEGER
+*          The number of superdiagonals of the matrix A if UPLO = 'U',
+*          or the number of subdiagonals if UPLO = 'L'.  KD >= 0.
+*
+*  AB      (input/output) COMPLEX*16 array, dimension (LDAB, N)
+*          On entry, the upper or lower triangle of the Hermitian band
+*          matrix A, stored in the first KD+1 rows of the array.  The
+*          j-th column of A is stored in the j-th column of the array AB
+*          as follows:
+*          if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j;
+*          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+kd).
+*
+*          On exit, AB is overwritten by values generated during the
+*          reduction to tridiagonal form.  If UPLO = 'U', the first
+*          superdiagonal and the diagonal of the tridiagonal matrix T
+*          are returned in rows KD and KD+1 of AB, and if UPLO = 'L',
+*          the diagonal and first subdiagonal of T are returned in the
+*          first two rows of AB.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KD + 1.
+*
+*  W       (output) DOUBLE PRECISION array, dimension (N)
+*          If INFO = 0, the eigenvalues in ascending order.
+*
+*  Z       (output) COMPLEX*16 array, dimension (LDZ, N)
+*          If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal
+*          eigenvectors of the matrix A, with the i-th column of Z
+*          holding the eigenvector associated with W(i).
+*          If JOBZ = 'N', then Z is not referenced.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= 1, and if
+*          JOBZ = 'V', LDZ >= max(1,N).
+*
+*  WORK    (workspace) COMPLEX*16 array, dimension (N)
+*
+*  RWORK   (workspace) DOUBLE PRECISION array, dimension (max(1,3*N-2))
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          > 0:  if INFO = i, the algorithm failed to converge; i
+*                off-diagonal elements of an intermediate tridiagonal
+*                form did not converge to zero.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LOWER, WANTZ
+      INTEGER            IINFO, IMAX, INDE, INDRWK, ISCALE
+      DOUBLE PRECISION   ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA,
+     $                   SMLNUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DLAMCH, ZLANHB
+      EXTERNAL           LSAME, DLAMCH, ZLANHB
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DSCAL, DSTERF, XERBLA, ZHBTRD, ZLASCL, ZSTEQR
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      WANTZ = LSAME( JOBZ, 'V' )
+      LOWER = LSAME( UPLO, 'L' )
+*
+      INFO = 0
+      IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( KD.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDAB.LT.KD+1 ) THEN
+         INFO = -6
+      ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN
+         INFO = -9
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZHBEV ', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      IF( N.EQ.1 ) THEN
+         IF( LOWER ) THEN
+            W( 1 ) = AB( 1, 1 )
+         ELSE
+            W( 1 ) = AB( KD+1, 1 )
+         END IF
+         IF( WANTZ )
+     $      Z( 1, 1 ) = ONE
+         RETURN
+      END IF
+*
+*     Get machine constants.
+*
+      SAFMIN = DLAMCH( 'Safe minimum' )
+      EPS = DLAMCH( 'Precision' )
+      SMLNUM = SAFMIN / EPS
+      BIGNUM = ONE / SMLNUM
+      RMIN = SQRT( SMLNUM )
+      RMAX = SQRT( BIGNUM )
+*
+*     Scale matrix to allowable range, if necessary.
+*
+      ANRM = ZLANHB( 'M', UPLO, N, KD, AB, LDAB, RWORK )
+      ISCALE = 0
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN
+         ISCALE = 1
+         SIGMA = RMIN / ANRM
+      ELSE IF( ANRM.GT.RMAX ) THEN
+         ISCALE = 1
+         SIGMA = RMAX / ANRM
+      END IF
+      IF( ISCALE.EQ.1 ) THEN
+         IF( LOWER ) THEN
+            CALL ZLASCL( 'B', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO )
+         ELSE
+            CALL ZLASCL( 'Q', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO )
+         END IF
+      END IF
+*
+*     Call ZHBTRD to reduce Hermitian band matrix to tridiagonal form.
+*
+      INDE = 1
+      CALL ZHBTRD( JOBZ, UPLO, N, KD, AB, LDAB, W, RWORK( INDE ), Z,
+     $             LDZ, WORK, IINFO )
+*
+*     For eigenvalues only, call DSTERF.  For eigenvectors, call ZSTEQR.
+*
+      IF( .NOT.WANTZ ) THEN
+         CALL DSTERF( N, W, RWORK( INDE ), INFO )
+      ELSE
+         INDRWK = INDE + N
+         CALL ZSTEQR( JOBZ, N, W, RWORK( INDE ), Z, LDZ,
+     $                RWORK( INDRWK ), INFO )
+      END IF
+*
+*     If matrix was scaled, then rescale eigenvalues appropriately.
+*
+      IF( ISCALE.EQ.1 ) THEN
+         IF( INFO.EQ.0 ) THEN
+            IMAX = N
+         ELSE
+            IMAX = INFO - 1
+         END IF
+         CALL DSCAL( IMAX, ONE / SIGMA, W, 1 )
+      END IF
+*
+      RETURN
+*
+*     End of ZHBEV
+*
+      END
diff --git a/costa/native/external/lapack/zhbevd.f b/costa/native/external/lapack/zhbevd.f
new file mode 100644
index 000000000..f9dcce634
--- /dev/null
+++ b/costa/native/external/lapack/zhbevd.f
@@ -0,0 +1,298 @@
+      SUBROUTINE ZHBEVD( JOBZ, UPLO, N, KD, AB, LDAB, W, Z, LDZ, WORK,
+     $                   LWORK, RWORK, LRWORK, IWORK, LIWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBZ, UPLO
+      INTEGER            INFO, KD, LDAB, LDZ, LIWORK, LRWORK, LWORK, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      DOUBLE PRECISION   RWORK( * ), W( * )
+      COMPLEX*16         AB( LDAB, * ), WORK( * ), Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZHBEVD computes all the eigenvalues and, optionally, eigenvectors of
+*  a complex Hermitian band matrix A.  If eigenvectors are desired, it
+*  uses a divide and conquer algorithm.
+*
+*  The divide and conquer algorithm makes very mild assumptions about
+*  floating point arithmetic. It will work on machines with a guard
+*  digit in add/subtract, or on those binary machines without guard
+*  digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
+*  Cray-2. It could conceivably fail on hexadecimal or decimal machines
+*  without guard digits, but we know of none.
+*
+*  Arguments
+*  =========
+*
+*  JOBZ    (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only;
+*          = 'V':  Compute eigenvalues and eigenvectors.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  KD      (input) INTEGER
+*          The number of superdiagonals of the matrix A if UPLO = 'U',
+*          or the number of subdiagonals if UPLO = 'L'.  KD >= 0.
+*
+*  AB      (input/output) COMPLEX*16 array, dimension (LDAB, N)
+*          On entry, the upper or lower triangle of the Hermitian band
+*          matrix A, stored in the first KD+1 rows of the array.  The
+*          j-th column of A is stored in the j-th column of the array AB
+*          as follows:
+*          if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j;
+*          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+kd).
+*
+*          On exit, AB is overwritten by values generated during the
+*          reduction to tridiagonal form.  If UPLO = 'U', the first
+*          superdiagonal and the diagonal of the tridiagonal matrix T
+*          are returned in rows KD and KD+1 of AB, and if UPLO = 'L',
+*          the diagonal and first subdiagonal of T are returned in the
+*          first two rows of AB.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KD + 1.
+*
+*  W       (output) DOUBLE PRECISION array, dimension (N)
+*          If INFO = 0, the eigenvalues in ascending order.
+*
+*  Z       (output) COMPLEX*16 array, dimension (LDZ, N)
+*          If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal
+*          eigenvectors of the matrix A, with the i-th column of Z
+*          holding the eigenvector associated with W(i).
+*          If JOBZ = 'N', then Z is not referenced.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= 1, and if
+*          JOBZ = 'V', LDZ >= max(1,N).
+*
+*  WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.
+*          If N <= 1,               LWORK must be at least 1.
+*          If JOBZ = 'N' and N > 1, LWORK must be at least N.
+*          If JOBZ = 'V' and N > 1, LWORK must be at least 2*N**2.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  RWORK   (workspace/output) DOUBLE PRECISION array,
+*                                         dimension (LRWORK)
+*          On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK.
+*
+*  LRWORK  (input) INTEGER
+*          The dimension of array RWORK.
+*          If N <= 1,               LRWORK must be at least 1.
+*          If JOBZ = 'N' and N > 1, LRWORK must be at least N.
+*          If JOBZ = 'V' and N > 1, LRWORK must be at least
+*                        1 + 5*N + 2*N**2.
+*
+*          If LRWORK = -1, then a workspace query is assumed; the
+*          routine only calculates the optimal size of the RWORK array,
+*          returns this value as the first entry of the RWORK array, and
+*          no error message related to LRWORK is issued by XERBLA.
+*
+*  IWORK   (workspace/output) INTEGER array, dimension (LIWORK)
+*          On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK.
+*
+*  LIWORK  (input) INTEGER
+*          The dimension of array IWORK.
+*          If JOBZ = 'N' or N <= 1, LIWORK must be at least 1.
+*          If JOBZ = 'V' and N > 1, LIWORK must be at least 3 + 5*N .
+*
+*          If LIWORK = -1, then a workspace query is assumed; the
+*          routine only calculates the optimal size of the IWORK array,
+*          returns this value as the first entry of the IWORK array, and
+*          no error message related to LIWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          > 0:  if INFO = i, the algorithm failed to converge; i
+*                off-diagonal elements of an intermediate tridiagonal
+*                form did not converge to zero.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0 )
+      COMPLEX*16         CZERO, CONE
+      PARAMETER          ( CZERO = ( 0.0D0, 0.0D0 ),
+     $                   CONE = ( 1.0D0, 0.0D0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LOWER, LQUERY, WANTZ
+      INTEGER            IINFO, IMAX, INDE, INDWK2, INDWRK, ISCALE,
+     $                   LIWMIN, LLRWK, LLWK2, LRWMIN, LWMIN
+      DOUBLE PRECISION   ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA,
+     $                   SMLNUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DLAMCH, ZLANHB
+      EXTERNAL           LSAME, DLAMCH, ZLANHB
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DSCAL, DSTERF, XERBLA, ZGEMM, ZHBTRD, ZLACPY,
+     $                   ZLASCL, ZSTEDC
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      WANTZ = LSAME( JOBZ, 'V' )
+      LOWER = LSAME( UPLO, 'L' )
+      LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 .OR. LRWORK.EQ.-1 )
+*
+      INFO = 0
+      IF( N.LE.1 ) THEN
+         LWMIN = 1
+         LRWMIN = 1
+         LIWMIN = 1
+      ELSE
+         IF( WANTZ ) THEN
+            LWMIN = 2*N**2
+            LRWMIN = 1 + 5*N + 2*N**2
+            LIWMIN = 3 + 5*N
+         ELSE
+            LWMIN = N
+            LRWMIN = N
+            LIWMIN = 1
+         END IF
+      END IF
+      IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( KD.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDAB.LT.KD+1 ) THEN
+         INFO = -6
+      ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN
+         INFO = -9
+      ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -11
+      ELSE IF( LRWORK.LT.LRWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -13
+      ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -15
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         WORK( 1 ) = LWMIN
+         RWORK( 1 ) = LRWMIN
+         IWORK( 1 ) = LIWMIN
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZHBEVD', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      IF( N.EQ.1 ) THEN
+         W( 1 ) = AB( 1, 1 )
+         IF( WANTZ )
+     $      Z( 1, 1 ) = CONE
+         RETURN
+      END IF
+*
+*     Get machine constants.
+*
+      SAFMIN = DLAMCH( 'Safe minimum' )
+      EPS = DLAMCH( 'Precision' )
+      SMLNUM = SAFMIN / EPS
+      BIGNUM = ONE / SMLNUM
+      RMIN = SQRT( SMLNUM )
+      RMAX = SQRT( BIGNUM )
+*
+*     Scale matrix to allowable range, if necessary.
+*
+      ANRM = ZLANHB( 'M', UPLO, N, KD, AB, LDAB, RWORK )
+      ISCALE = 0
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN
+         ISCALE = 1
+         SIGMA = RMIN / ANRM
+      ELSE IF( ANRM.GT.RMAX ) THEN
+         ISCALE = 1
+         SIGMA = RMAX / ANRM
+      END IF
+      IF( ISCALE.EQ.1 ) THEN
+         IF( LOWER ) THEN
+            CALL ZLASCL( 'B', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO )
+         ELSE
+            CALL ZLASCL( 'Q', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO )
+         END IF
+      END IF
+*
+*     Call ZHBTRD to reduce Hermitian band matrix to tridiagonal form.
+*
+      INDE = 1
+      INDWRK = INDE + N
+      INDWK2 = 1 + N*N
+      LLWK2 = LWORK - INDWK2 + 1
+      LLRWK = LRWORK - INDWRK + 1
+      CALL ZHBTRD( JOBZ, UPLO, N, KD, AB, LDAB, W, RWORK( INDE ), Z,
+     $             LDZ, WORK, IINFO )
+*
+*     For eigenvalues only, call DSTERF.  For eigenvectors, call ZSTEDC.
+*
+      IF( .NOT.WANTZ ) THEN
+         CALL DSTERF( N, W, RWORK( INDE ), INFO )
+      ELSE
+         CALL ZSTEDC( 'I', N, W, RWORK( INDE ), WORK, N, WORK( INDWK2 ),
+     $                LLWK2, RWORK( INDWRK ), LLRWK, IWORK, LIWORK,
+     $                INFO )
+         CALL ZGEMM( 'N', 'N', N, N, N, CONE, Z, LDZ, WORK, N, CZERO,
+     $               WORK( INDWK2 ), N )
+         CALL ZLACPY( 'A', N, N, WORK( INDWK2 ), N, Z, LDZ )
+      END IF
+*
+*     If matrix was scaled, then rescale eigenvalues appropriately.
+*
+      IF( ISCALE.EQ.1 ) THEN
+         IF( INFO.EQ.0 ) THEN
+            IMAX = N
+         ELSE
+            IMAX = INFO - 1
+         END IF
+         CALL DSCAL( IMAX, ONE / SIGMA, W, 1 )
+      END IF
+*
+      WORK( 1 ) = LWMIN
+      RWORK( 1 ) = LRWMIN
+      IWORK( 1 ) = LIWMIN
+      RETURN
+*
+*     End of ZHBEVD
+*
+      END
diff --git a/costa/native/external/lapack/zhbevx.f b/costa/native/external/lapack/zhbevx.f
new file mode 100644
index 000000000..3905a980a
--- /dev/null
+++ b/costa/native/external/lapack/zhbevx.f
@@ -0,0 +1,417 @@
+      SUBROUTINE ZHBEVX( JOBZ, RANGE, UPLO, N, KD, AB, LDAB, Q, LDQ, VL,
+     $                   VU, IL, IU, ABSTOL, M, W, Z, LDZ, WORK, RWORK,
+     $                   IWORK, IFAIL, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBZ, RANGE, UPLO
+      INTEGER            IL, INFO, IU, KD, LDAB, LDQ, LDZ, M, N
+      DOUBLE PRECISION   ABSTOL, VL, VU
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IFAIL( * ), IWORK( * )
+      DOUBLE PRECISION   RWORK( * ), W( * )
+      COMPLEX*16         AB( LDAB, * ), Q( LDQ, * ), WORK( * ),
+     $                   Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZHBEVX computes selected eigenvalues and, optionally, eigenvectors
+*  of a complex Hermitian band matrix A.  Eigenvalues and eigenvectors
+*  can be selected by specifying either a range of values or a range of
+*  indices for the desired eigenvalues.
+*
+*  Arguments
+*  =========
+*
+*  JOBZ    (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only;
+*          = 'V':  Compute eigenvalues and eigenvectors.
+*
+*  RANGE   (input) CHARACTER*1
+*          = 'A': all eigenvalues will be found;
+*          = 'V': all eigenvalues in the half-open interval (VL,VU]
+*                 will be found;
+*          = 'I': the IL-th through IU-th eigenvalues will be found.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  KD      (input) INTEGER
+*          The number of superdiagonals of the matrix A if UPLO = 'U',
+*          or the number of subdiagonals if UPLO = 'L'.  KD >= 0.
+*
+*  AB      (input/output) COMPLEX*16 array, dimension (LDAB, N)
+*          On entry, the upper or lower triangle of the Hermitian band
+*          matrix A, stored in the first KD+1 rows of the array.  The
+*          j-th column of A is stored in the j-th column of the array AB
+*          as follows:
+*          if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j;
+*          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+kd).
+*
+*          On exit, AB is overwritten by values generated during the
+*          reduction to tridiagonal form.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KD + 1.
+*
+*  Q       (output) COMPLEX*16 array, dimension (LDQ, N)
+*          If JOBZ = 'V', the N-by-N unitary matrix used in the
+*                          reduction to tridiagonal form.
+*          If JOBZ = 'N', the array Q is not referenced.
+*
+*  LDQ     (input) INTEGER
+*          The leading dimension of the array Q.  If JOBZ = 'V', then
+*          LDQ >= max(1,N).
+*
+*  VL      (input) DOUBLE PRECISION
+*  VU      (input) DOUBLE PRECISION
+*          If RANGE='V', the lower and upper bounds of the interval to
+*          be searched for eigenvalues. VL < VU.
+*          Not referenced if RANGE = 'A' or 'I'.
+*
+*  IL      (input) INTEGER
+*  IU      (input) INTEGER
+*          If RANGE='I', the indices (in ascending order) of the
+*          smallest and largest eigenvalues to be returned.
+*          1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0.
+*          Not referenced if RANGE = 'A' or 'V'.
+*
+*  ABSTOL  (input) DOUBLE PRECISION
+*          The absolute error tolerance for the eigenvalues.
+*          An approximate eigenvalue is accepted as converged
+*          when it is determined to lie in an interval [a,b]
+*          of width less than or equal to
+*
+*                  ABSTOL + EPS *   max( |a|,|b| ) ,
+*
+*          where EPS is the machine precision.  If ABSTOL is less than
+*          or equal to zero, then  EPS*|T|  will be used in its place,
+*          where |T| is the 1-norm of the tridiagonal matrix obtained
+*          by reducing AB to tridiagonal form.
+*
+*          Eigenvalues will be computed most accurately when ABSTOL is
+*          set to twice the underflow threshold 2*DLAMCH('S'), not zero.
+*          If this routine returns with INFO>0, indicating that some
+*          eigenvectors did not converge, try setting ABSTOL to
+*          2*DLAMCH('S').
+*
+*          See "Computing Small Singular Values of Bidiagonal Matrices
+*          with Guaranteed High Relative Accuracy," by Demmel and
+*          Kahan, LAPACK Working Note #3.
+*
+*  M       (output) INTEGER
+*          The total number of eigenvalues found.  0 <= M <= N.
+*          If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1.
+*
+*  W       (output) DOUBLE PRECISION array, dimension (N)
+*          The first M elements contain the selected eigenvalues in
+*          ascending order.
+*
+*  Z       (output) COMPLEX*16 array, dimension (LDZ, max(1,M))
+*          If JOBZ = 'V', then if INFO = 0, the first M columns of Z
+*          contain the orthonormal eigenvectors of the matrix A
+*          corresponding to the selected eigenvalues, with the i-th
+*          column of Z holding the eigenvector associated with W(i).
+*          If an eigenvector fails to converge, then that column of Z
+*          contains the latest approximation to the eigenvector, and the
+*          index of the eigenvector is returned in IFAIL.
+*          If JOBZ = 'N', then Z is not referenced.
+*          Note: the user must ensure that at least max(1,M) columns are
+*          supplied in the array Z; if RANGE = 'V', the exact value of M
+*          is not known in advance and an upper bound must be used.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= 1, and if
+*          JOBZ = 'V', LDZ >= max(1,N).
+*
+*  WORK    (workspace) COMPLEX*16 array, dimension (N)
+*
+*  RWORK   (workspace) DOUBLE PRECISION array, dimension (7*N)
+*
+*  IWORK   (workspace) INTEGER array, dimension (5*N)
+*
+*  IFAIL   (output) INTEGER array, dimension (N)
+*          If JOBZ = 'V', then if INFO = 0, the first M elements of
+*          IFAIL are zero.  If INFO > 0, then IFAIL contains the
+*          indices of the eigenvectors that failed to converge.
+*          If JOBZ = 'N', then IFAIL is not referenced.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, then i eigenvectors failed to converge.
+*                Their indices are stored in array IFAIL.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0 )
+      COMPLEX*16         CZERO, CONE
+      PARAMETER          ( CZERO = ( 0.0D0, 0.0D0 ),
+     $                   CONE = ( 1.0D0, 0.0D0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            ALLEIG, INDEIG, LOWER, VALEIG, WANTZ
+      CHARACTER          ORDER
+      INTEGER            I, IINFO, IMAX, INDD, INDE, INDEE, INDIBL,
+     $                   INDISP, INDIWK, INDRWK, INDWRK, ISCALE, ITMP1,
+     $                   J, JJ, NSPLIT
+      DOUBLE PRECISION   ABSTLL, ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN,
+     $                   SIGMA, SMLNUM, TMP1, VLL, VUU
+      COMPLEX*16         CTMP1
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DLAMCH, ZLANHB
+      EXTERNAL           LSAME, DLAMCH, ZLANHB
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DCOPY, DSCAL, DSTEBZ, DSTERF, XERBLA, ZCOPY,
+     $                   ZGEMV, ZHBTRD, ZLACPY, ZLASCL, ZSTEIN, ZSTEQR,
+     $                   ZSWAP
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          DBLE, MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      WANTZ = LSAME( JOBZ, 'V' )
+      ALLEIG = LSAME( RANGE, 'A' )
+      VALEIG = LSAME( RANGE, 'V' )
+      INDEIG = LSAME( RANGE, 'I' )
+      LOWER = LSAME( UPLO, 'L' )
+*
+      INFO = 0
+      IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( KD.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( LDAB.LT.KD+1 ) THEN
+         INFO = -7
+      ELSE IF( WANTZ .AND. LDQ.LT.MAX( 1, N ) ) THEN
+         INFO = -9
+      ELSE
+         IF( VALEIG ) THEN
+            IF( N.GT.0 .AND. VU.LE.VL )
+     $         INFO = -11
+         ELSE IF( INDEIG ) THEN
+            IF( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) THEN
+               INFO = -12
+            ELSE IF( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN
+               INFO = -13
+            END IF
+         END IF
+      END IF
+      IF( INFO.EQ.0 ) THEN
+         IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) )
+     $      INFO = -18
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZHBEVX', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      M = 0
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      IF( N.EQ.1 ) THEN
+         M = 1
+         IF( LOWER ) THEN
+            CTMP1 = AB( 1, 1 )
+         ELSE
+            CTMP1 = AB( KD+1, 1 )
+         END IF
+         TMP1 = DBLE( CTMP1 )
+         IF( VALEIG ) THEN
+            IF( .NOT.( VL.LT.TMP1 .AND. VU.GE.TMP1 ) )
+     $         M = 0
+         END IF
+         IF( M.EQ.1 ) THEN
+            W( 1 ) = CTMP1
+            IF( WANTZ )
+     $         Z( 1, 1 ) = CONE
+         END IF
+         RETURN
+      END IF
+*
+*     Get machine constants.
+*
+      SAFMIN = DLAMCH( 'Safe minimum' )
+      EPS = DLAMCH( 'Precision' )
+      SMLNUM = SAFMIN / EPS
+      BIGNUM = ONE / SMLNUM
+      RMIN = SQRT( SMLNUM )
+      RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) )
+*
+*     Scale matrix to allowable range, if necessary.
+*
+      ISCALE = 0
+      ABSTLL = ABSTOL
+      IF( VALEIG ) THEN
+         VLL = VL
+         VUU = VU
+      ELSE
+         VLL = ZERO
+         VUU = ZERO
+      END IF
+      ANRM = ZLANHB( 'M', UPLO, N, KD, AB, LDAB, RWORK )
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN
+         ISCALE = 1
+         SIGMA = RMIN / ANRM
+      ELSE IF( ANRM.GT.RMAX ) THEN
+         ISCALE = 1
+         SIGMA = RMAX / ANRM
+      END IF
+      IF( ISCALE.EQ.1 ) THEN
+         IF( LOWER ) THEN
+            CALL ZLASCL( 'B', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO )
+         ELSE
+            CALL ZLASCL( 'Q', KD, KD, ONE, SIGMA, N, N, AB, LDAB, INFO )
+         END IF
+         IF( ABSTOL.GT.0 )
+     $      ABSTLL = ABSTOL*SIGMA
+         IF( VALEIG ) THEN
+            VLL = VL*SIGMA
+            VUU = VU*SIGMA
+         END IF
+      END IF
+*
+*     Call ZHBTRD to reduce Hermitian band matrix to tridiagonal form.
+*
+      INDD = 1
+      INDE = INDD + N
+      INDRWK = INDE + N
+      INDWRK = 1
+      CALL ZHBTRD( JOBZ, UPLO, N, KD, AB, LDAB, RWORK( INDD ),
+     $             RWORK( INDE ), Q, LDQ, WORK( INDWRK ), IINFO )
+*
+*     If all eigenvalues are desired and ABSTOL is less than or equal
+*     to zero, then call DSTERF or ZSTEQR.  If this fails for some
+*     eigenvalue, then try DSTEBZ.
+*
+      IF( ( ALLEIG .OR. ( INDEIG .AND. IL.EQ.1 .AND. IU.EQ.N ) ) .AND.
+     $    ( ABSTOL.LE.ZERO ) ) THEN
+         CALL DCOPY( N, RWORK( INDD ), 1, W, 1 )
+         INDEE = INDRWK + 2*N
+         IF( .NOT.WANTZ ) THEN
+            CALL DCOPY( N-1, RWORK( INDE ), 1, RWORK( INDEE ), 1 )
+            CALL DSTERF( N, W, RWORK( INDEE ), INFO )
+         ELSE
+            CALL ZLACPY( 'A', N, N, Q, LDQ, Z, LDZ )
+            CALL DCOPY( N-1, RWORK( INDE ), 1, RWORK( INDEE ), 1 )
+            CALL ZSTEQR( JOBZ, N, W, RWORK( INDEE ), Z, LDZ,
+     $                   RWORK( INDRWK ), INFO )
+            IF( INFO.EQ.0 ) THEN
+               DO 10 I = 1, N
+                  IFAIL( I ) = 0
+   10          CONTINUE
+            END IF
+         END IF
+         IF( INFO.EQ.0 ) THEN
+            M = N
+            GO TO 30
+         END IF
+         INFO = 0
+      END IF
+*
+*     Otherwise, call DSTEBZ and, if eigenvectors are desired, ZSTEIN.
+*
+      IF( WANTZ ) THEN
+         ORDER = 'B'
+      ELSE
+         ORDER = 'E'
+      END IF
+      INDIBL = 1
+      INDISP = INDIBL + N
+      INDIWK = INDISP + N
+      CALL DSTEBZ( RANGE, ORDER, N, VLL, VUU, IL, IU, ABSTLL,
+     $             RWORK( INDD ), RWORK( INDE ), M, NSPLIT, W,
+     $             IWORK( INDIBL ), IWORK( INDISP ), RWORK( INDRWK ),
+     $             IWORK( INDIWK ), INFO )
+*
+      IF( WANTZ ) THEN
+         CALL ZSTEIN( N, RWORK( INDD ), RWORK( INDE ), M, W,
+     $                IWORK( INDIBL ), IWORK( INDISP ), Z, LDZ,
+     $                RWORK( INDRWK ), IWORK( INDIWK ), IFAIL, INFO )
+*
+*        Apply unitary matrix used in reduction to tridiagonal
+*        form to eigenvectors returned by ZSTEIN.
+*
+         DO 20 J = 1, M
+            CALL ZCOPY( N, Z( 1, J ), 1, WORK( 1 ), 1 )
+            CALL ZGEMV( 'N', N, N, CONE, Q, LDQ, WORK, 1, CZERO,
+     $                  Z( 1, J ), 1 )
+   20    CONTINUE
+      END IF
+*
+*     If matrix was scaled, then rescale eigenvalues appropriately.
+*
+   30 CONTINUE
+      IF( ISCALE.EQ.1 ) THEN
+         IF( INFO.EQ.0 ) THEN
+            IMAX = M
+         ELSE
+            IMAX = INFO - 1
+         END IF
+         CALL DSCAL( IMAX, ONE / SIGMA, W, 1 )
+      END IF
+*
+*     If eigenvalues are not in order, then sort them, along with
+*     eigenvectors.
+*
+      IF( WANTZ ) THEN
+         DO 50 J = 1, M - 1
+            I = 0
+            TMP1 = W( J )
+            DO 40 JJ = J + 1, M
+               IF( W( JJ ).LT.TMP1 ) THEN
+                  I = JJ
+                  TMP1 = W( JJ )
+               END IF
+   40       CONTINUE
+*
+            IF( I.NE.0 ) THEN
+               ITMP1 = IWORK( INDIBL+I-1 )
+               W( I ) = W( J )
+               IWORK( INDIBL+I-1 ) = IWORK( INDIBL+J-1 )
+               W( J ) = TMP1
+               IWORK( INDIBL+J-1 ) = ITMP1
+               CALL ZSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 )
+               IF( INFO.NE.0 ) THEN
+                  ITMP1 = IFAIL( I )
+                  IFAIL( I ) = IFAIL( J )
+                  IFAIL( J ) = ITMP1
+               END IF
+            END IF
+   50    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of ZHBEVX
+*
+      END
diff --git a/costa/native/external/lapack/zhbgst.f b/costa/native/external/lapack/zhbgst.f
new file mode 100644
index 000000000..d0ddf2915
--- /dev/null
+++ b/costa/native/external/lapack/zhbgst.f
@@ -0,0 +1,1378 @@
+      SUBROUTINE ZHBGST( VECT, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, X,
+     $                   LDX, WORK, RWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO, VECT
+      INTEGER            INFO, KA, KB, LDAB, LDBB, LDX, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   RWORK( * )
+      COMPLEX*16         AB( LDAB, * ), BB( LDBB, * ), WORK( * ),
+     $                   X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZHBGST reduces a complex Hermitian-definite banded generalized
+*  eigenproblem  A*x = lambda*B*x  to standard form  C*y = lambda*y,
+*  such that C has the same bandwidth as A.
+*
+*  B must have been previously factorized as S**H*S by ZPBSTF, using a
+*  split Cholesky factorization. A is overwritten by C = X**H*A*X, where
+*  X = S**(-1)*Q and Q is a unitary matrix chosen to preserve the
+*  bandwidth of A.
+*
+*  Arguments
+*  =========
+*
+*  VECT    (input) CHARACTER*1
+*          = 'N':  do not form the transformation matrix X;
+*          = 'V':  form X.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrices A and B.  N >= 0.
+*
+*  KA      (input) INTEGER
+*          The number of superdiagonals of the matrix A if UPLO = 'U',
+*          or the number of subdiagonals if UPLO = 'L'.  KA >= 0.
+*
+*  KB      (input) INTEGER
+*          The number of superdiagonals of the matrix B if UPLO = 'U',
+*          or the number of subdiagonals if UPLO = 'L'.  KA >= KB >= 0.
+*
+*  AB      (input/output) COMPLEX*16 array, dimension (LDAB,N)
+*          On entry, the upper or lower triangle of the Hermitian band
+*          matrix A, stored in the first ka+1 rows of the array.  The
+*          j-th column of A is stored in the j-th column of the array AB
+*          as follows:
+*          if UPLO = 'U', AB(ka+1+i-j,j) = A(i,j) for max(1,j-ka)<=i<=j;
+*          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+ka).
+*
+*          On exit, the transformed matrix X**H*A*X, stored in the same
+*          format as A.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KA+1.
+*
+*  BB      (input) COMPLEX*16 array, dimension (LDBB,N)
+*          The banded factor S from the split Cholesky factorization of
+*          B, as returned by ZPBSTF, stored in the first kb+1 rows of
+*          the array.
+*
+*  LDBB    (input) INTEGER
+*          The leading dimension of the array BB.  LDBB >= KB+1.
+*
+*  X       (output) COMPLEX*16 array, dimension (LDX,N)
+*          If VECT = 'V', the n-by-n matrix X.
+*          If VECT = 'N', the array X is not referenced.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.
+*          LDX >= max(1,N) if VECT = 'V'; LDX >= 1 otherwise.
+*
+*  WORK    (workspace) COMPLEX*16 array, dimension (N)
+*
+*  RWORK   (workspace) DOUBLE PRECISION array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX*16         CZERO, CONE
+      DOUBLE PRECISION   ONE
+      PARAMETER          ( CZERO = ( 0.0D+0, 0.0D+0 ),
+     $                   CONE = ( 1.0D+0, 0.0D+0 ), ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPDATE, UPPER, WANTX
+      INTEGER            I, I0, I1, I2, INCA, J, J1, J1T, J2, J2T, K,
+     $                   KA1, KB1, KBT, L, M, NR, NRT, NX
+      DOUBLE PRECISION   BII
+      COMPLEX*16         RA, RA1, T
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZDSCAL, ZGERC, ZGERU, ZLACGV, ZLAR2V,
+     $                   ZLARGV, ZLARTG, ZLARTV, ZLASET, ZROT
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          DBLE, DCONJG, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters
+*
+      WANTX = LSAME( VECT, 'V' )
+      UPPER = LSAME( UPLO, 'U' )
+      KA1 = KA + 1
+      KB1 = KB + 1
+      INFO = 0
+      IF( .NOT.WANTX .AND. .NOT.LSAME( VECT, 'N' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( KA.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( KB.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( LDAB.LT.KA+1 ) THEN
+         INFO = -7
+      ELSE IF( LDBB.LT.KB+1 ) THEN
+         INFO = -9
+      ELSE IF( LDX.LT.1 .OR. WANTX .AND. LDX.LT.MAX( 1, N ) ) THEN
+         INFO = -11
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZHBGST', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      INCA = LDAB*KA1
+*
+*     Initialize X to the unit matrix, if needed
+*
+      IF( WANTX )
+     $   CALL ZLASET( 'Full', N, N, CZERO, CONE, X, LDX )
+*
+*     Set M to the splitting point m. It must be the same value as is
+*     used in ZPBSTF. The chosen value allows the arrays WORK and RWORK
+*     to be of dimension (N).
+*
+      M = ( N+KB ) / 2
+*
+*     The routine works in two phases, corresponding to the two halves
+*     of the split Cholesky factorization of B as S**H*S where
+*
+*     S = ( U    )
+*         ( M  L )
+*
+*     with U upper triangular of order m, and L lower triangular of
+*     order n-m. S has the same bandwidth as B.
+*
+*     S is treated as a product of elementary matrices:
+*
+*     S = S(m)*S(m-1)*...*S(2)*S(1)*S(m+1)*S(m+2)*...*S(n-1)*S(n)
+*
+*     where S(i) is determined by the i-th row of S.
+*
+*     In phase 1, the index i takes the values n, n-1, ... , m+1;
+*     in phase 2, it takes the values 1, 2, ... , m.
+*
+*     For each value of i, the current matrix A is updated by forming
+*     inv(S(i))**H*A*inv(S(i)). This creates a triangular bulge outside
+*     the band of A. The bulge is then pushed down toward the bottom of
+*     A in phase 1, and up toward the top of A in phase 2, by applying
+*     plane rotations.
+*
+*     There are kb*(kb+1)/2 elements in the bulge, but at most 2*kb-1
+*     of them are linearly independent, so annihilating a bulge requires
+*     only 2*kb-1 plane rotations. The rotations are divided into a 1st
+*     set of kb-1 rotations, and a 2nd set of kb rotations.
+*
+*     Wherever possible, rotations are generated and applied in vector
+*     operations of length NR between the indices J1 and J2 (sometimes
+*     replaced by modified values NRT, J1T or J2T).
+*
+*     The real cosines and complex sines of the rotations are stored in
+*     the arrays RWORK and WORK, those of the 1st set in elements
+*     2:m-kb-1, and those of the 2nd set in elements m-kb+1:n.
+*
+*     The bulges are not formed explicitly; nonzero elements outside the
+*     band are created only when they are required for generating new
+*     rotations; they are stored in the array WORK, in positions where
+*     they are later overwritten by the sines of the rotations which
+*     annihilate them.
+*
+*     **************************** Phase 1 *****************************
+*
+*     The logical structure of this phase is:
+*
+*     UPDATE = .TRUE.
+*     DO I = N, M + 1, -1
+*        use S(i) to update A and create a new bulge
+*        apply rotations to push all bulges KA positions downward
+*     END DO
+*     UPDATE = .FALSE.
+*     DO I = M + KA + 1, N - 1
+*        apply rotations to push all bulges KA positions downward
+*     END DO
+*
+*     To avoid duplicating code, the two loops are merged.
+*
+      UPDATE = .TRUE.
+      I = N + 1
+   10 CONTINUE
+      IF( UPDATE ) THEN
+         I = I - 1
+         KBT = MIN( KB, I-1 )
+         I0 = I - 1
+         I1 = MIN( N, I+KA )
+         I2 = I - KBT + KA1
+         IF( I.LT.M+1 ) THEN
+            UPDATE = .FALSE.
+            I = I + 1
+            I0 = M
+            IF( KA.EQ.0 )
+     $         GO TO 480
+            GO TO 10
+         END IF
+      ELSE
+         I = I + KA
+         IF( I.GT.N-1 )
+     $      GO TO 480
+      END IF
+*
+      IF( UPPER ) THEN
+*
+*        Transform A, working with the upper triangle
+*
+         IF( UPDATE ) THEN
+*
+*           Form  inv(S(i))**H * A * inv(S(i))
+*
+            BII = DBLE( BB( KB1, I ) )
+            AB( KA1, I ) = ( DBLE( AB( KA1, I ) ) / BII ) / BII
+            DO 20 J = I + 1, I1
+               AB( I-J+KA1, J ) = AB( I-J+KA1, J ) / BII
+   20       CONTINUE
+            DO 30 J = MAX( 1, I-KA ), I - 1
+               AB( J-I+KA1, I ) = AB( J-I+KA1, I ) / BII
+   30       CONTINUE
+            DO 60 K = I - KBT, I - 1
+               DO 40 J = I - KBT, K
+                  AB( J-K+KA1, K ) = AB( J-K+KA1, K ) -
+     $                               BB( J-I+KB1, I )*
+     $                               DCONJG( AB( K-I+KA1, I ) ) -
+     $                               DCONJG( BB( K-I+KB1, I ) )*
+     $                               AB( J-I+KA1, I ) +
+     $                               DBLE( AB( KA1, I ) )*
+     $                               BB( J-I+KB1, I )*
+     $                               DCONJG( BB( K-I+KB1, I ) )
+   40          CONTINUE
+               DO 50 J = MAX( 1, I-KA ), I - KBT - 1
+                  AB( J-K+KA1, K ) = AB( J-K+KA1, K ) -
+     $                               DCONJG( BB( K-I+KB1, I ) )*
+     $                               AB( J-I+KA1, I )
+   50          CONTINUE
+   60       CONTINUE
+            DO 80 J = I, I1
+               DO 70 K = MAX( J-KA, I-KBT ), I - 1
+                  AB( K-J+KA1, J ) = AB( K-J+KA1, J ) -
+     $                               BB( K-I+KB1, I )*AB( I-J+KA1, J )
+   70          CONTINUE
+   80       CONTINUE
+*
+            IF( WANTX ) THEN
+*
+*              post-multiply X by inv(S(i))
+*
+               CALL ZDSCAL( N-M, ONE / BII, X( M+1, I ), 1 )
+               IF( KBT.GT.0 )
+     $            CALL ZGERC( N-M, KBT, -CONE, X( M+1, I ), 1,
+     $                        BB( KB1-KBT, I ), 1, X( M+1, I-KBT ),
+     $                        LDX )
+            END IF
+*
+*           store a(i,i1) in RA1 for use in next loop over K
+*
+            RA1 = AB( I-I1+KA1, I1 )
+         END IF
+*
+*        Generate and apply vectors of rotations to chase all the
+*        existing bulges KA positions down toward the bottom of the
+*        band
+*
+         DO 130 K = 1, KB - 1
+            IF( UPDATE ) THEN
+*
+*              Determine the rotations which would annihilate the bulge
+*              which has in theory just been created
+*
+               IF( I-K+KA.LT.N .AND. I-K.GT.1 ) THEN
+*
+*                 generate rotation to annihilate a(i,i-k+ka+1)
+*
+                  CALL ZLARTG( AB( K+1, I-K+KA ), RA1,
+     $                         RWORK( I-K+KA-M ), WORK( I-K+KA-M ), RA )
+*
+*                 create nonzero element a(i-k,i-k+ka+1) outside the
+*                 band and store it in WORK(i-k)
+*
+                  T = -BB( KB1-K, I )*RA1
+                  WORK( I-K ) = RWORK( I-K+KA-M )*T -
+     $                          DCONJG( WORK( I-K+KA-M ) )*
+     $                          AB( 1, I-K+KA )
+                  AB( 1, I-K+KA ) = WORK( I-K+KA-M )*T +
+     $                              RWORK( I-K+KA-M )*AB( 1, I-K+KA )
+                  RA1 = RA
+               END IF
+            END IF
+            J2 = I - K - 1 + MAX( 1, K-I0+2 )*KA1
+            NR = ( N-J2+KA ) / KA1
+            J1 = J2 + ( NR-1 )*KA1
+            IF( UPDATE ) THEN
+               J2T = MAX( J2, I+2*KA-K+1 )
+            ELSE
+               J2T = J2
+            END IF
+            NRT = ( N-J2T+KA ) / KA1
+            DO 90 J = J2T, J1, KA1
+*
+*              create nonzero element a(j-ka,j+1) outside the band
+*              and store it in WORK(j-m)
+*
+               WORK( J-M ) = WORK( J-M )*AB( 1, J+1 )
+               AB( 1, J+1 ) = RWORK( J-M )*AB( 1, J+1 )
+   90       CONTINUE
+*
+*           generate rotations in 1st set to annihilate elements which
+*           have been created outside the band
+*
+            IF( NRT.GT.0 )
+     $         CALL ZLARGV( NRT, AB( 1, J2T ), INCA, WORK( J2T-M ), KA1,
+     $                      RWORK( J2T-M ), KA1 )
+            IF( NR.GT.0 ) THEN
+*
+*              apply rotations in 1st set from the right
+*
+               DO 100 L = 1, KA - 1
+                  CALL ZLARTV( NR, AB( KA1-L, J2 ), INCA,
+     $                         AB( KA-L, J2+1 ), INCA, RWORK( J2-M ),
+     $                         WORK( J2-M ), KA1 )
+  100          CONTINUE
+*
+*              apply rotations in 1st set from both sides to diagonal
+*              blocks
+*
+               CALL ZLAR2V( NR, AB( KA1, J2 ), AB( KA1, J2+1 ),
+     $                      AB( KA, J2+1 ), INCA, RWORK( J2-M ),
+     $                      WORK( J2-M ), KA1 )
+*
+               CALL ZLACGV( NR, WORK( J2-M ), KA1 )
+            END IF
+*
+*           start applying rotations in 1st set from the left
+*
+            DO 110 L = KA - 1, KB - K + 1, -1
+               NRT = ( N-J2+L ) / KA1
+               IF( NRT.GT.0 )
+     $            CALL ZLARTV( NRT, AB( L, J2+KA1-L ), INCA,
+     $                         AB( L+1, J2+KA1-L ), INCA, RWORK( J2-M ),
+     $                         WORK( J2-M ), KA1 )
+  110       CONTINUE
+*
+            IF( WANTX ) THEN
+*
+*              post-multiply X by product of rotations in 1st set
+*
+               DO 120 J = J2, J1, KA1
+                  CALL ZROT( N-M, X( M+1, J ), 1, X( M+1, J+1 ), 1,
+     $                       RWORK( J-M ), DCONJG( WORK( J-M ) ) )
+  120          CONTINUE
+            END IF
+  130    CONTINUE
+*
+         IF( UPDATE ) THEN
+            IF( I2.LE.N .AND. KBT.GT.0 ) THEN
+*
+*              create nonzero element a(i-kbt,i-kbt+ka+1) outside the
+*              band and store it in WORK(i-kbt)
+*
+               WORK( I-KBT ) = -BB( KB1-KBT, I )*RA1
+            END IF
+         END IF
+*
+         DO 170 K = KB, 1, -1
+            IF( UPDATE ) THEN
+               J2 = I - K - 1 + MAX( 2, K-I0+1 )*KA1
+            ELSE
+               J2 = I - K - 1 + MAX( 1, K-I0+1 )*KA1
+            END IF
+*
+*           finish applying rotations in 2nd set from the left
+*
+            DO 140 L = KB - K, 1, -1
+               NRT = ( N-J2+KA+L ) / KA1
+               IF( NRT.GT.0 )
+     $            CALL ZLARTV( NRT, AB( L, J2-L+1 ), INCA,
+     $                         AB( L+1, J2-L+1 ), INCA, RWORK( J2-KA ),
+     $                         WORK( J2-KA ), KA1 )
+  140       CONTINUE
+            NR = ( N-J2+KA ) / KA1
+            J1 = J2 + ( NR-1 )*KA1
+            DO 150 J = J1, J2, -KA1
+               WORK( J ) = WORK( J-KA )
+               RWORK( J ) = RWORK( J-KA )
+  150       CONTINUE
+            DO 160 J = J2, J1, KA1
+*
+*              create nonzero element a(j-ka,j+1) outside the band
+*              and store it in WORK(j)
+*
+               WORK( J ) = WORK( J )*AB( 1, J+1 )
+               AB( 1, J+1 ) = RWORK( J )*AB( 1, J+1 )
+  160       CONTINUE
+            IF( UPDATE ) THEN
+               IF( I-K.LT.N-KA .AND. K.LE.KBT )
+     $            WORK( I-K+KA ) = WORK( I-K )
+            END IF
+  170    CONTINUE
+*
+         DO 210 K = KB, 1, -1
+            J2 = I - K - 1 + MAX( 1, K-I0+1 )*KA1
+            NR = ( N-J2+KA ) / KA1
+            J1 = J2 + ( NR-1 )*KA1
+            IF( NR.GT.0 ) THEN
+*
+*              generate rotations in 2nd set to annihilate elements
+*              which have been created outside the band
+*
+               CALL ZLARGV( NR, AB( 1, J2 ), INCA, WORK( J2 ), KA1,
+     $                      RWORK( J2 ), KA1 )
+*
+*              apply rotations in 2nd set from the right
+*
+               DO 180 L = 1, KA - 1
+                  CALL ZLARTV( NR, AB( KA1-L, J2 ), INCA,
+     $                         AB( KA-L, J2+1 ), INCA, RWORK( J2 ),
+     $                         WORK( J2 ), KA1 )
+  180          CONTINUE
+*
+*              apply rotations in 2nd set from both sides to diagonal
+*              blocks
+*
+               CALL ZLAR2V( NR, AB( KA1, J2 ), AB( KA1, J2+1 ),
+     $                      AB( KA, J2+1 ), INCA, RWORK( J2 ),
+     $                      WORK( J2 ), KA1 )
+*
+               CALL ZLACGV( NR, WORK( J2 ), KA1 )
+            END IF
+*
+*           start applying rotations in 2nd set from the left
+*
+            DO 190 L = KA - 1, KB - K + 1, -1
+               NRT = ( N-J2+L ) / KA1
+               IF( NRT.GT.0 )
+     $            CALL ZLARTV( NRT, AB( L, J2+KA1-L ), INCA,
+     $                         AB( L+1, J2+KA1-L ), INCA, RWORK( J2 ),
+     $                         WORK( J2 ), KA1 )
+  190       CONTINUE
+*
+            IF( WANTX ) THEN
+*
+*              post-multiply X by product of rotations in 2nd set
+*
+               DO 200 J = J2, J1, KA1
+                  CALL ZROT( N-M, X( M+1, J ), 1, X( M+1, J+1 ), 1,
+     $                       RWORK( J ), DCONJG( WORK( J ) ) )
+  200          CONTINUE
+            END IF
+  210    CONTINUE
+*
+         DO 230 K = 1, KB - 1
+            J2 = I - K - 1 + MAX( 1, K-I0+2 )*KA1
+*
+*           finish applying rotations in 1st set from the left
+*
+            DO 220 L = KB - K, 1, -1
+               NRT = ( N-J2+L ) / KA1
+               IF( NRT.GT.0 )
+     $            CALL ZLARTV( NRT, AB( L, J2+KA1-L ), INCA,
+     $                         AB( L+1, J2+KA1-L ), INCA, RWORK( J2-M ),
+     $                         WORK( J2-M ), KA1 )
+  220       CONTINUE
+  230    CONTINUE
+*
+         IF( KB.GT.1 ) THEN
+            DO 240 J = N - 1, I2 + KA, -1
+               RWORK( J-M ) = RWORK( J-KA-M )
+               WORK( J-M ) = WORK( J-KA-M )
+  240       CONTINUE
+         END IF
+*
+      ELSE
+*
+*        Transform A, working with the lower triangle
+*
+         IF( UPDATE ) THEN
+*
+*           Form  inv(S(i))**H * A * inv(S(i))
+*
+            BII = DBLE( BB( 1, I ) )
+            AB( 1, I ) = ( DBLE( AB( 1, I ) ) / BII ) / BII
+            DO 250 J = I + 1, I1
+               AB( J-I+1, I ) = AB( J-I+1, I ) / BII
+  250       CONTINUE
+            DO 260 J = MAX( 1, I-KA ), I - 1
+               AB( I-J+1, J ) = AB( I-J+1, J ) / BII
+  260       CONTINUE
+            DO 290 K = I - KBT, I - 1
+               DO 270 J = I - KBT, K
+                  AB( K-J+1, J ) = AB( K-J+1, J ) -
+     $                             BB( I-J+1, J )*DCONJG( AB( I-K+1,
+     $                             K ) ) - DCONJG( BB( I-K+1, K ) )*
+     $                             AB( I-J+1, J ) + DBLE( AB( 1, I ) )*
+     $                             BB( I-J+1, J )*DCONJG( BB( I-K+1,
+     $                             K ) )
+  270          CONTINUE
+               DO 280 J = MAX( 1, I-KA ), I - KBT - 1
+                  AB( K-J+1, J ) = AB( K-J+1, J ) -
+     $                             DCONJG( BB( I-K+1, K ) )*
+     $                             AB( I-J+1, J )
+  280          CONTINUE
+  290       CONTINUE
+            DO 310 J = I, I1
+               DO 300 K = MAX( J-KA, I-KBT ), I - 1
+                  AB( J-K+1, K ) = AB( J-K+1, K ) -
+     $                             BB( I-K+1, K )*AB( J-I+1, I )
+  300          CONTINUE
+  310       CONTINUE
+*
+            IF( WANTX ) THEN
+*
+*              post-multiply X by inv(S(i))
+*
+               CALL ZDSCAL( N-M, ONE / BII, X( M+1, I ), 1 )
+               IF( KBT.GT.0 )
+     $            CALL ZGERU( N-M, KBT, -CONE, X( M+1, I ), 1,
+     $                        BB( KBT+1, I-KBT ), LDBB-1,
+     $                        X( M+1, I-KBT ), LDX )
+            END IF
+*
+*           store a(i1,i) in RA1 for use in next loop over K
+*
+            RA1 = AB( I1-I+1, I )
+         END IF
+*
+*        Generate and apply vectors of rotations to chase all the
+*        existing bulges KA positions down toward the bottom of the
+*        band
+*
+         DO 360 K = 1, KB - 1
+            IF( UPDATE ) THEN
+*
+*              Determine the rotations which would annihilate the bulge
+*              which has in theory just been created
+*
+               IF( I-K+KA.LT.N .AND. I-K.GT.1 ) THEN
+*
+*                 generate rotation to annihilate a(i-k+ka+1,i)
+*
+                  CALL ZLARTG( AB( KA1-K, I ), RA1, RWORK( I-K+KA-M ),
+     $                         WORK( I-K+KA-M ), RA )
+*
+*                 create nonzero element a(i-k+ka+1,i-k) outside the
+*                 band and store it in WORK(i-k)
+*
+                  T = -BB( K+1, I-K )*RA1
+                  WORK( I-K ) = RWORK( I-K+KA-M )*T -
+     $                          DCONJG( WORK( I-K+KA-M ) )*
+     $                          AB( KA1, I-K )
+                  AB( KA1, I-K ) = WORK( I-K+KA-M )*T +
+     $                             RWORK( I-K+KA-M )*AB( KA1, I-K )
+                  RA1 = RA
+               END IF
+            END IF
+            J2 = I - K - 1 + MAX( 1, K-I0+2 )*KA1
+            NR = ( N-J2+KA ) / KA1
+            J1 = J2 + ( NR-1 )*KA1
+            IF( UPDATE ) THEN
+               J2T = MAX( J2, I+2*KA-K+1 )
+            ELSE
+               J2T = J2
+            END IF
+            NRT = ( N-J2T+KA ) / KA1
+            DO 320 J = J2T, J1, KA1
+*
+*              create nonzero element a(j+1,j-ka) outside the band
+*              and store it in WORK(j-m)
+*
+               WORK( J-M ) = WORK( J-M )*AB( KA1, J-KA+1 )
+               AB( KA1, J-KA+1 ) = RWORK( J-M )*AB( KA1, J-KA+1 )
+  320       CONTINUE
+*
+*           generate rotations in 1st set to annihilate elements which
+*           have been created outside the band
+*
+            IF( NRT.GT.0 )
+     $         CALL ZLARGV( NRT, AB( KA1, J2T-KA ), INCA, WORK( J2T-M ),
+     $                      KA1, RWORK( J2T-M ), KA1 )
+            IF( NR.GT.0 ) THEN
+*
+*              apply rotations in 1st set from the left
+*
+               DO 330 L = 1, KA - 1
+                  CALL ZLARTV( NR, AB( L+1, J2-L ), INCA,
+     $                         AB( L+2, J2-L ), INCA, RWORK( J2-M ),
+     $                         WORK( J2-M ), KA1 )
+  330          CONTINUE
+*
+*              apply rotations in 1st set from both sides to diagonal
+*              blocks
+*
+               CALL ZLAR2V( NR, AB( 1, J2 ), AB( 1, J2+1 ), AB( 2, J2 ),
+     $                      INCA, RWORK( J2-M ), WORK( J2-M ), KA1 )
+*
+               CALL ZLACGV( NR, WORK( J2-M ), KA1 )
+            END IF
+*
+*           start applying rotations in 1st set from the right
+*
+            DO 340 L = KA - 1, KB - K + 1, -1
+               NRT = ( N-J2+L ) / KA1
+               IF( NRT.GT.0 )
+     $            CALL ZLARTV( NRT, AB( KA1-L+1, J2 ), INCA,
+     $                         AB( KA1-L, J2+1 ), INCA, RWORK( J2-M ),
+     $                         WORK( J2-M ), KA1 )
+  340       CONTINUE
+*
+            IF( WANTX ) THEN
+*
+*              post-multiply X by product of rotations in 1st set
+*
+               DO 350 J = J2, J1, KA1
+                  CALL ZROT( N-M, X( M+1, J ), 1, X( M+1, J+1 ), 1,
+     $                       RWORK( J-M ), WORK( J-M ) )
+  350          CONTINUE
+            END IF
+  360    CONTINUE
+*
+         IF( UPDATE ) THEN
+            IF( I2.LE.N .AND. KBT.GT.0 ) THEN
+*
+*              create nonzero element a(i-kbt+ka+1,i-kbt) outside the
+*              band and store it in WORK(i-kbt)
+*
+               WORK( I-KBT ) = -BB( KBT+1, I-KBT )*RA1
+            END IF
+         END IF
+*
+         DO 400 K = KB, 1, -1
+            IF( UPDATE ) THEN
+               J2 = I - K - 1 + MAX( 2, K-I0+1 )*KA1
+            ELSE
+               J2 = I - K - 1 + MAX( 1, K-I0+1 )*KA1
+            END IF
+*
+*           finish applying rotations in 2nd set from the right
+*
+            DO 370 L = KB - K, 1, -1
+               NRT = ( N-J2+KA+L ) / KA1
+               IF( NRT.GT.0 )
+     $            CALL ZLARTV( NRT, AB( KA1-L+1, J2-KA ), INCA,
+     $                         AB( KA1-L, J2-KA+1 ), INCA,
+     $                         RWORK( J2-KA ), WORK( J2-KA ), KA1 )
+  370       CONTINUE
+            NR = ( N-J2+KA ) / KA1
+            J1 = J2 + ( NR-1 )*KA1
+            DO 380 J = J1, J2, -KA1
+               WORK( J ) = WORK( J-KA )
+               RWORK( J ) = RWORK( J-KA )
+  380       CONTINUE
+            DO 390 J = J2, J1, KA1
+*
+*              create nonzero element a(j+1,j-ka) outside the band
+*              and store it in WORK(j)
+*
+               WORK( J ) = WORK( J )*AB( KA1, J-KA+1 )
+               AB( KA1, J-KA+1 ) = RWORK( J )*AB( KA1, J-KA+1 )
+  390       CONTINUE
+            IF( UPDATE ) THEN
+               IF( I-K.LT.N-KA .AND. K.LE.KBT )
+     $            WORK( I-K+KA ) = WORK( I-K )
+            END IF
+  400    CONTINUE
+*
+         DO 440 K = KB, 1, -1
+            J2 = I - K - 1 + MAX( 1, K-I0+1 )*KA1
+            NR = ( N-J2+KA ) / KA1
+            J1 = J2 + ( NR-1 )*KA1
+            IF( NR.GT.0 ) THEN
+*
+*              generate rotations in 2nd set to annihilate elements
+*              which have been created outside the band
+*
+               CALL ZLARGV( NR, AB( KA1, J2-KA ), INCA, WORK( J2 ), KA1,
+     $                      RWORK( J2 ), KA1 )
+*
+*              apply rotations in 2nd set from the left
+*
+               DO 410 L = 1, KA - 1
+                  CALL ZLARTV( NR, AB( L+1, J2-L ), INCA,
+     $                         AB( L+2, J2-L ), INCA, RWORK( J2 ),
+     $                         WORK( J2 ), KA1 )
+  410          CONTINUE
+*
+*              apply rotations in 2nd set from both sides to diagonal
+*              blocks
+*
+               CALL ZLAR2V( NR, AB( 1, J2 ), AB( 1, J2+1 ), AB( 2, J2 ),
+     $                      INCA, RWORK( J2 ), WORK( J2 ), KA1 )
+*
+               CALL ZLACGV( NR, WORK( J2 ), KA1 )
+            END IF
+*
+*           start applying rotations in 2nd set from the right
+*
+            DO 420 L = KA - 1, KB - K + 1, -1
+               NRT = ( N-J2+L ) / KA1
+               IF( NRT.GT.0 )
+     $            CALL ZLARTV( NRT, AB( KA1-L+1, J2 ), INCA,
+     $                         AB( KA1-L, J2+1 ), INCA, RWORK( J2 ),
+     $                         WORK( J2 ), KA1 )
+  420       CONTINUE
+*
+            IF( WANTX ) THEN
+*
+*              post-multiply X by product of rotations in 2nd set
+*
+               DO 430 J = J2, J1, KA1
+                  CALL ZROT( N-M, X( M+1, J ), 1, X( M+1, J+1 ), 1,
+     $                       RWORK( J ), WORK( J ) )
+  430          CONTINUE
+            END IF
+  440    CONTINUE
+*
+         DO 460 K = 1, KB - 1
+            J2 = I - K - 1 + MAX( 1, K-I0+2 )*KA1
+*
+*           finish applying rotations in 1st set from the right
+*
+            DO 450 L = KB - K, 1, -1
+               NRT = ( N-J2+L ) / KA1
+               IF( NRT.GT.0 )
+     $            CALL ZLARTV( NRT, AB( KA1-L+1, J2 ), INCA,
+     $                         AB( KA1-L, J2+1 ), INCA, RWORK( J2-M ),
+     $                         WORK( J2-M ), KA1 )
+  450       CONTINUE
+  460    CONTINUE
+*
+         IF( KB.GT.1 ) THEN
+            DO 470 J = N - 1, I2 + KA, -1
+               RWORK( J-M ) = RWORK( J-KA-M )
+               WORK( J-M ) = WORK( J-KA-M )
+  470       CONTINUE
+         END IF
+*
+      END IF
+*
+      GO TO 10
+*
+  480 CONTINUE
+*
+*     **************************** Phase 2 *****************************
+*
+*     The logical structure of this phase is:
+*
+*     UPDATE = .TRUE.
+*     DO I = 1, M
+*        use S(i) to update A and create a new bulge
+*        apply rotations to push all bulges KA positions upward
+*     END DO
+*     UPDATE = .FALSE.
+*     DO I = M - KA - 1, 2, -1
+*        apply rotations to push all bulges KA positions upward
+*     END DO
+*
+*     To avoid duplicating code, the two loops are merged.
+*
+      UPDATE = .TRUE.
+      I = 0
+  490 CONTINUE
+      IF( UPDATE ) THEN
+         I = I + 1
+         KBT = MIN( KB, M-I )
+         I0 = I + 1
+         I1 = MAX( 1, I-KA )
+         I2 = I + KBT - KA1
+         IF( I.GT.M ) THEN
+            UPDATE = .FALSE.
+            I = I - 1
+            I0 = M + 1
+            IF( KA.EQ.0 )
+     $         RETURN
+            GO TO 490
+         END IF
+      ELSE
+         I = I - KA
+         IF( I.LT.2 )
+     $      RETURN
+      END IF
+*
+      IF( I.LT.M-KBT ) THEN
+         NX = M
+      ELSE
+         NX = N
+      END IF
+*
+      IF( UPPER ) THEN
+*
+*        Transform A, working with the upper triangle
+*
+         IF( UPDATE ) THEN
+*
+*           Form  inv(S(i))**H * A * inv(S(i))
+*
+            BII = DBLE( BB( KB1, I ) )
+            AB( KA1, I ) = ( DBLE( AB( KA1, I ) ) / BII ) / BII
+            DO 500 J = I1, I - 1
+               AB( J-I+KA1, I ) = AB( J-I+KA1, I ) / BII
+  500       CONTINUE
+            DO 510 J = I + 1, MIN( N, I+KA )
+               AB( I-J+KA1, J ) = AB( I-J+KA1, J ) / BII
+  510       CONTINUE
+            DO 540 K = I + 1, I + KBT
+               DO 520 J = K, I + KBT
+                  AB( K-J+KA1, J ) = AB( K-J+KA1, J ) -
+     $                               BB( I-J+KB1, J )*
+     $                               DCONJG( AB( I-K+KA1, K ) ) -
+     $                               DCONJG( BB( I-K+KB1, K ) )*
+     $                               AB( I-J+KA1, J ) +
+     $                               DBLE( AB( KA1, I ) )*
+     $                               BB( I-J+KB1, J )*
+     $                               DCONJG( BB( I-K+KB1, K ) )
+  520          CONTINUE
+               DO 530 J = I + KBT + 1, MIN( N, I+KA )
+                  AB( K-J+KA1, J ) = AB( K-J+KA1, J ) -
+     $                               DCONJG( BB( I-K+KB1, K ) )*
+     $                               AB( I-J+KA1, J )
+  530          CONTINUE
+  540       CONTINUE
+            DO 560 J = I1, I
+               DO 550 K = I + 1, MIN( J+KA, I+KBT )
+                  AB( J-K+KA1, K ) = AB( J-K+KA1, K ) -
+     $                               BB( I-K+KB1, K )*AB( J-I+KA1, I )
+  550          CONTINUE
+  560       CONTINUE
+*
+            IF( WANTX ) THEN
+*
+*              post-multiply X by inv(S(i))
+*
+               CALL ZDSCAL( NX, ONE / BII, X( 1, I ), 1 )
+               IF( KBT.GT.0 )
+     $            CALL ZGERU( NX, KBT, -CONE, X( 1, I ), 1,
+     $                        BB( KB, I+1 ), LDBB-1, X( 1, I+1 ), LDX )
+            END IF
+*
+*           store a(i1,i) in RA1 for use in next loop over K
+*
+            RA1 = AB( I1-I+KA1, I )
+         END IF
+*
+*        Generate and apply vectors of rotations to chase all the
+*        existing bulges KA positions up toward the top of the band
+*
+         DO 610 K = 1, KB - 1
+            IF( UPDATE ) THEN
+*
+*              Determine the rotations which would annihilate the bulge
+*              which has in theory just been created
+*
+               IF( I+K-KA1.GT.0 .AND. I+K.LT.M ) THEN
+*
+*                 generate rotation to annihilate a(i+k-ka-1,i)
+*
+                  CALL ZLARTG( AB( K+1, I ), RA1, RWORK( I+K-KA ),
+     $                         WORK( I+K-KA ), RA )
+*
+*                 create nonzero element a(i+k-ka-1,i+k) outside the
+*                 band and store it in WORK(m-kb+i+k)
+*
+                  T = -BB( KB1-K, I+K )*RA1
+                  WORK( M-KB+I+K ) = RWORK( I+K-KA )*T -
+     $                               DCONJG( WORK( I+K-KA ) )*
+     $                               AB( 1, I+K )
+                  AB( 1, I+K ) = WORK( I+K-KA )*T +
+     $                           RWORK( I+K-KA )*AB( 1, I+K )
+                  RA1 = RA
+               END IF
+            END IF
+            J2 = I + K + 1 - MAX( 1, K+I0-M+1 )*KA1
+            NR = ( J2+KA-1 ) / KA1
+            J1 = J2 - ( NR-1 )*KA1
+            IF( UPDATE ) THEN
+               J2T = MIN( J2, I-2*KA+K-1 )
+            ELSE
+               J2T = J2
+            END IF
+            NRT = ( J2T+KA-1 ) / KA1
+            DO 570 J = J1, J2T, KA1
+*
+*              create nonzero element a(j-1,j+ka) outside the band
+*              and store it in WORK(j)
+*
+               WORK( J ) = WORK( J )*AB( 1, J+KA-1 )
+               AB( 1, J+KA-1 ) = RWORK( J )*AB( 1, J+KA-1 )
+  570       CONTINUE
+*
+*           generate rotations in 1st set to annihilate elements which
+*           have been created outside the band
+*
+            IF( NRT.GT.0 )
+     $         CALL ZLARGV( NRT, AB( 1, J1+KA ), INCA, WORK( J1 ), KA1,
+     $                      RWORK( J1 ), KA1 )
+            IF( NR.GT.0 ) THEN
+*
+*              apply rotations in 1st set from the left
+*
+               DO 580 L = 1, KA - 1
+                  CALL ZLARTV( NR, AB( KA1-L, J1+L ), INCA,
+     $                         AB( KA-L, J1+L ), INCA, RWORK( J1 ),
+     $                         WORK( J1 ), KA1 )
+  580          CONTINUE
+*
+*              apply rotations in 1st set from both sides to diagonal
+*              blocks
+*
+               CALL ZLAR2V( NR, AB( KA1, J1 ), AB( KA1, J1-1 ),
+     $                      AB( KA, J1 ), INCA, RWORK( J1 ), WORK( J1 ),
+     $                      KA1 )
+*
+               CALL ZLACGV( NR, WORK( J1 ), KA1 )
+            END IF
+*
+*           start applying rotations in 1st set from the right
+*
+            DO 590 L = KA - 1, KB - K + 1, -1
+               NRT = ( J2+L-1 ) / KA1
+               J1T = J2 - ( NRT-1 )*KA1
+               IF( NRT.GT.0 )
+     $            CALL ZLARTV( NRT, AB( L, J1T ), INCA,
+     $                         AB( L+1, J1T-1 ), INCA, RWORK( J1T ),
+     $                         WORK( J1T ), KA1 )
+  590       CONTINUE
+*
+            IF( WANTX ) THEN
+*
+*              post-multiply X by product of rotations in 1st set
+*
+               DO 600 J = J1, J2, KA1
+                  CALL ZROT( NX, X( 1, J ), 1, X( 1, J-1 ), 1,
+     $                       RWORK( J ), WORK( J ) )
+  600          CONTINUE
+            END IF
+  610    CONTINUE
+*
+         IF( UPDATE ) THEN
+            IF( I2.GT.0 .AND. KBT.GT.0 ) THEN
+*
+*              create nonzero element a(i+kbt-ka-1,i+kbt) outside the
+*              band and store it in WORK(m-kb+i+kbt)
+*
+               WORK( M-KB+I+KBT ) = -BB( KB1-KBT, I+KBT )*RA1
+            END IF
+         END IF
+*
+         DO 650 K = KB, 1, -1
+            IF( UPDATE ) THEN
+               J2 = I + K + 1 - MAX( 2, K+I0-M )*KA1
+            ELSE
+               J2 = I + K + 1 - MAX( 1, K+I0-M )*KA1
+            END IF
+*
+*           finish applying rotations in 2nd set from the right
+*
+            DO 620 L = KB - K, 1, -1
+               NRT = ( J2+KA+L-1 ) / KA1
+               J1T = J2 - ( NRT-1 )*KA1
+               IF( NRT.GT.0 )
+     $            CALL ZLARTV( NRT, AB( L, J1T+KA ), INCA,
+     $                         AB( L+1, J1T+KA-1 ), INCA,
+     $                         RWORK( M-KB+J1T+KA ),
+     $                         WORK( M-KB+J1T+KA ), KA1 )
+  620       CONTINUE
+            NR = ( J2+KA-1 ) / KA1
+            J1 = J2 - ( NR-1 )*KA1
+            DO 630 J = J1, J2, KA1
+               WORK( M-KB+J ) = WORK( M-KB+J+KA )
+               RWORK( M-KB+J ) = RWORK( M-KB+J+KA )
+  630       CONTINUE
+            DO 640 J = J1, J2, KA1
+*
+*              create nonzero element a(j-1,j+ka) outside the band
+*              and store it in WORK(m-kb+j)
+*
+               WORK( M-KB+J ) = WORK( M-KB+J )*AB( 1, J+KA-1 )
+               AB( 1, J+KA-1 ) = RWORK( M-KB+J )*AB( 1, J+KA-1 )
+  640       CONTINUE
+            IF( UPDATE ) THEN
+               IF( I+K.GT.KA1 .AND. K.LE.KBT )
+     $            WORK( M-KB+I+K-KA ) = WORK( M-KB+I+K )
+            END IF
+  650    CONTINUE
+*
+         DO 690 K = KB, 1, -1
+            J2 = I + K + 1 - MAX( 1, K+I0-M )*KA1
+            NR = ( J2+KA-1 ) / KA1
+            J1 = J2 - ( NR-1 )*KA1
+            IF( NR.GT.0 ) THEN
+*
+*              generate rotations in 2nd set to annihilate elements
+*              which have been created outside the band
+*
+               CALL ZLARGV( NR, AB( 1, J1+KA ), INCA, WORK( M-KB+J1 ),
+     $                      KA1, RWORK( M-KB+J1 ), KA1 )
+*
+*              apply rotations in 2nd set from the left
+*
+               DO 660 L = 1, KA - 1
+                  CALL ZLARTV( NR, AB( KA1-L, J1+L ), INCA,
+     $                         AB( KA-L, J1+L ), INCA, RWORK( M-KB+J1 ),
+     $                         WORK( M-KB+J1 ), KA1 )
+  660          CONTINUE
+*
+*              apply rotations in 2nd set from both sides to diagonal
+*              blocks
+*
+               CALL ZLAR2V( NR, AB( KA1, J1 ), AB( KA1, J1-1 ),
+     $                      AB( KA, J1 ), INCA, RWORK( M-KB+J1 ),
+     $                      WORK( M-KB+J1 ), KA1 )
+*
+               CALL ZLACGV( NR, WORK( M-KB+J1 ), KA1 )
+            END IF
+*
+*           start applying rotations in 2nd set from the right
+*
+            DO 670 L = KA - 1, KB - K + 1, -1
+               NRT = ( J2+L-1 ) / KA1
+               J1T = J2 - ( NRT-1 )*KA1
+               IF( NRT.GT.0 )
+     $            CALL ZLARTV( NRT, AB( L, J1T ), INCA,
+     $                         AB( L+1, J1T-1 ), INCA,
+     $                         RWORK( M-KB+J1T ), WORK( M-KB+J1T ),
+     $                         KA1 )
+  670       CONTINUE
+*
+            IF( WANTX ) THEN
+*
+*              post-multiply X by product of rotations in 2nd set
+*
+               DO 680 J = J1, J2, KA1
+                  CALL ZROT( NX, X( 1, J ), 1, X( 1, J-1 ), 1,
+     $                       RWORK( M-KB+J ), WORK( M-KB+J ) )
+  680          CONTINUE
+            END IF
+  690    CONTINUE
+*
+         DO 710 K = 1, KB - 1
+            J2 = I + K + 1 - MAX( 1, K+I0-M+1 )*KA1
+*
+*           finish applying rotations in 1st set from the right
+*
+            DO 700 L = KB - K, 1, -1
+               NRT = ( J2+L-1 ) / KA1
+               J1T = J2 - ( NRT-1 )*KA1
+               IF( NRT.GT.0 )
+     $            CALL ZLARTV( NRT, AB( L, J1T ), INCA,
+     $                         AB( L+1, J1T-1 ), INCA, RWORK( J1T ),
+     $                         WORK( J1T ), KA1 )
+  700       CONTINUE
+  710    CONTINUE
+*
+         IF( KB.GT.1 ) THEN
+            DO 720 J = 2, I2 - KA
+               RWORK( J ) = RWORK( J+KA )
+               WORK( J ) = WORK( J+KA )
+  720       CONTINUE
+         END IF
+*
+      ELSE
+*
+*        Transform A, working with the lower triangle
+*
+         IF( UPDATE ) THEN
+*
+*           Form  inv(S(i))**H * A * inv(S(i))
+*
+            BII = DBLE( BB( 1, I ) )
+            AB( 1, I ) = ( DBLE( AB( 1, I ) ) / BII ) / BII
+            DO 730 J = I1, I - 1
+               AB( I-J+1, J ) = AB( I-J+1, J ) / BII
+  730       CONTINUE
+            DO 740 J = I + 1, MIN( N, I+KA )
+               AB( J-I+1, I ) = AB( J-I+1, I ) / BII
+  740       CONTINUE
+            DO 770 K = I + 1, I + KBT
+               DO 750 J = K, I + KBT
+                  AB( J-K+1, K ) = AB( J-K+1, K ) -
+     $                             BB( J-I+1, I )*DCONJG( AB( K-I+1,
+     $                             I ) ) - DCONJG( BB( K-I+1, I ) )*
+     $                             AB( J-I+1, I ) + DBLE( AB( 1, I ) )*
+     $                             BB( J-I+1, I )*DCONJG( BB( K-I+1,
+     $                             I ) )
+  750          CONTINUE
+               DO 760 J = I + KBT + 1, MIN( N, I+KA )
+                  AB( J-K+1, K ) = AB( J-K+1, K ) -
+     $                             DCONJG( BB( K-I+1, I ) )*
+     $                             AB( J-I+1, I )
+  760          CONTINUE
+  770       CONTINUE
+            DO 790 J = I1, I
+               DO 780 K = I + 1, MIN( J+KA, I+KBT )
+                  AB( K-J+1, J ) = AB( K-J+1, J ) -
+     $                             BB( K-I+1, I )*AB( I-J+1, J )
+  780          CONTINUE
+  790       CONTINUE
+*
+            IF( WANTX ) THEN
+*
+*              post-multiply X by inv(S(i))
+*
+               CALL ZDSCAL( NX, ONE / BII, X( 1, I ), 1 )
+               IF( KBT.GT.0 )
+     $            CALL ZGERC( NX, KBT, -CONE, X( 1, I ), 1, BB( 2, I ),
+     $                        1, X( 1, I+1 ), LDX )
+            END IF
+*
+*           store a(i,i1) in RA1 for use in next loop over K
+*
+            RA1 = AB( I-I1+1, I1 )
+         END IF
+*
+*        Generate and apply vectors of rotations to chase all the
+*        existing bulges KA positions up toward the top of the band
+*
+         DO 840 K = 1, KB - 1
+            IF( UPDATE ) THEN
+*
+*              Determine the rotations which would annihilate the bulge
+*              which has in theory just been created
+*
+               IF( I+K-KA1.GT.0 .AND. I+K.LT.M ) THEN
+*
+*                 generate rotation to annihilate a(i,i+k-ka-1)
+*
+                  CALL ZLARTG( AB( KA1-K, I+K-KA ), RA1,
+     $                         RWORK( I+K-KA ), WORK( I+K-KA ), RA )
+*
+*                 create nonzero element a(i+k,i+k-ka-1) outside the
+*                 band and store it in WORK(m-kb+i+k)
+*
+                  T = -BB( K+1, I )*RA1
+                  WORK( M-KB+I+K ) = RWORK( I+K-KA )*T -
+     $                               DCONJG( WORK( I+K-KA ) )*
+     $                               AB( KA1, I+K-KA )
+                  AB( KA1, I+K-KA ) = WORK( I+K-KA )*T +
+     $                                RWORK( I+K-KA )*AB( KA1, I+K-KA )
+                  RA1 = RA
+               END IF
+            END IF
+            J2 = I + K + 1 - MAX( 1, K+I0-M+1 )*KA1
+            NR = ( J2+KA-1 ) / KA1
+            J1 = J2 - ( NR-1 )*KA1
+            IF( UPDATE ) THEN
+               J2T = MIN( J2, I-2*KA+K-1 )
+            ELSE
+               J2T = J2
+            END IF
+            NRT = ( J2T+KA-1 ) / KA1
+            DO 800 J = J1, J2T, KA1
+*
+*              create nonzero element a(j+ka,j-1) outside the band
+*              and store it in WORK(j)
+*
+               WORK( J ) = WORK( J )*AB( KA1, J-1 )
+               AB( KA1, J-1 ) = RWORK( J )*AB( KA1, J-1 )
+  800       CONTINUE
+*
+*           generate rotations in 1st set to annihilate elements which
+*           have been created outside the band
+*
+            IF( NRT.GT.0 )
+     $         CALL ZLARGV( NRT, AB( KA1, J1 ), INCA, WORK( J1 ), KA1,
+     $                      RWORK( J1 ), KA1 )
+            IF( NR.GT.0 ) THEN
+*
+*              apply rotations in 1st set from the right
+*
+               DO 810 L = 1, KA - 1
+                  CALL ZLARTV( NR, AB( L+1, J1 ), INCA, AB( L+2, J1-1 ),
+     $                         INCA, RWORK( J1 ), WORK( J1 ), KA1 )
+  810          CONTINUE
+*
+*              apply rotations in 1st set from both sides to diagonal
+*              blocks
+*
+               CALL ZLAR2V( NR, AB( 1, J1 ), AB( 1, J1-1 ),
+     $                      AB( 2, J1-1 ), INCA, RWORK( J1 ),
+     $                      WORK( J1 ), KA1 )
+*
+               CALL ZLACGV( NR, WORK( J1 ), KA1 )
+            END IF
+*
+*           start applying rotations in 1st set from the left
+*
+            DO 820 L = KA - 1, KB - K + 1, -1
+               NRT = ( J2+L-1 ) / KA1
+               J1T = J2 - ( NRT-1 )*KA1
+               IF( NRT.GT.0 )
+     $            CALL ZLARTV( NRT, AB( KA1-L+1, J1T-KA1+L ), INCA,
+     $                         AB( KA1-L, J1T-KA1+L ), INCA,
+     $                         RWORK( J1T ), WORK( J1T ), KA1 )
+  820       CONTINUE
+*
+            IF( WANTX ) THEN
+*
+*              post-multiply X by product of rotations in 1st set
+*
+               DO 830 J = J1, J2, KA1
+                  CALL ZROT( NX, X( 1, J ), 1, X( 1, J-1 ), 1,
+     $                       RWORK( J ), DCONJG( WORK( J ) ) )
+  830          CONTINUE
+            END IF
+  840    CONTINUE
+*
+         IF( UPDATE ) THEN
+            IF( I2.GT.0 .AND. KBT.GT.0 ) THEN
+*
+*              create nonzero element a(i+kbt,i+kbt-ka-1) outside the
+*              band and store it in WORK(m-kb+i+kbt)
+*
+               WORK( M-KB+I+KBT ) = -BB( KBT+1, I )*RA1
+            END IF
+         END IF
+*
+         DO 880 K = KB, 1, -1
+            IF( UPDATE ) THEN
+               J2 = I + K + 1 - MAX( 2, K+I0-M )*KA1
+            ELSE
+               J2 = I + K + 1 - MAX( 1, K+I0-M )*KA1
+            END IF
+*
+*           finish applying rotations in 2nd set from the left
+*
+            DO 850 L = KB - K, 1, -1
+               NRT = ( J2+KA+L-1 ) / KA1
+               J1T = J2 - ( NRT-1 )*KA1
+               IF( NRT.GT.0 )
+     $            CALL ZLARTV( NRT, AB( KA1-L+1, J1T+L-1 ), INCA,
+     $                         AB( KA1-L, J1T+L-1 ), INCA,
+     $                         RWORK( M-KB+J1T+KA ),
+     $                         WORK( M-KB+J1T+KA ), KA1 )
+  850       CONTINUE
+            NR = ( J2+KA-1 ) / KA1
+            J1 = J2 - ( NR-1 )*KA1
+            DO 860 J = J1, J2, KA1
+               WORK( M-KB+J ) = WORK( M-KB+J+KA )
+               RWORK( M-KB+J ) = RWORK( M-KB+J+KA )
+  860       CONTINUE
+            DO 870 J = J1, J2, KA1
+*
+*              create nonzero element a(j+ka,j-1) outside the band
+*              and store it in WORK(m-kb+j)
+*
+               WORK( M-KB+J ) = WORK( M-KB+J )*AB( KA1, J-1 )
+               AB( KA1, J-1 ) = RWORK( M-KB+J )*AB( KA1, J-1 )
+  870       CONTINUE
+            IF( UPDATE ) THEN
+               IF( I+K.GT.KA1 .AND. K.LE.KBT )
+     $            WORK( M-KB+I+K-KA ) = WORK( M-KB+I+K )
+            END IF
+  880    CONTINUE
+*
+         DO 920 K = KB, 1, -1
+            J2 = I + K + 1 - MAX( 1, K+I0-M )*KA1
+            NR = ( J2+KA-1 ) / KA1
+            J1 = J2 - ( NR-1 )*KA1
+            IF( NR.GT.0 ) THEN
+*
+*              generate rotations in 2nd set to annihilate elements
+*              which have been created outside the band
+*
+               CALL ZLARGV( NR, AB( KA1, J1 ), INCA, WORK( M-KB+J1 ),
+     $                      KA1, RWORK( M-KB+J1 ), KA1 )
+*
+*              apply rotations in 2nd set from the right
+*
+               DO 890 L = 1, KA - 1
+                  CALL ZLARTV( NR, AB( L+1, J1 ), INCA, AB( L+2, J1-1 ),
+     $                         INCA, RWORK( M-KB+J1 ), WORK( M-KB+J1 ),
+     $                         KA1 )
+  890          CONTINUE
+*
+*              apply rotations in 2nd set from both sides to diagonal
+*              blocks
+*
+               CALL ZLAR2V( NR, AB( 1, J1 ), AB( 1, J1-1 ),
+     $                      AB( 2, J1-1 ), INCA, RWORK( M-KB+J1 ),
+     $                      WORK( M-KB+J1 ), KA1 )
+*
+               CALL ZLACGV( NR, WORK( M-KB+J1 ), KA1 )
+            END IF
+*
+*           start applying rotations in 2nd set from the left
+*
+            DO 900 L = KA - 1, KB - K + 1, -1
+               NRT = ( J2+L-1 ) / KA1
+               J1T = J2 - ( NRT-1 )*KA1
+               IF( NRT.GT.0 )
+     $            CALL ZLARTV( NRT, AB( KA1-L+1, J1T-KA1+L ), INCA,
+     $                         AB( KA1-L, J1T-KA1+L ), INCA,
+     $                         RWORK( M-KB+J1T ), WORK( M-KB+J1T ),
+     $                         KA1 )
+  900       CONTINUE
+*
+            IF( WANTX ) THEN
+*
+*              post-multiply X by product of rotations in 2nd set
+*
+               DO 910 J = J1, J2, KA1
+                  CALL ZROT( NX, X( 1, J ), 1, X( 1, J-1 ), 1,
+     $                       RWORK( M-KB+J ), DCONJG( WORK( M-KB+J ) ) )
+  910          CONTINUE
+            END IF
+  920    CONTINUE
+*
+         DO 940 K = 1, KB - 1
+            J2 = I + K + 1 - MAX( 1, K+I0-M+1 )*KA1
+*
+*           finish applying rotations in 1st set from the left
+*
+            DO 930 L = KB - K, 1, -1
+               NRT = ( J2+L-1 ) / KA1
+               J1T = J2 - ( NRT-1 )*KA1
+               IF( NRT.GT.0 )
+     $            CALL ZLARTV( NRT, AB( KA1-L+1, J1T-KA1+L ), INCA,
+     $                         AB( KA1-L, J1T-KA1+L ), INCA,
+     $                         RWORK( J1T ), WORK( J1T ), KA1 )
+  930       CONTINUE
+  940    CONTINUE
+*
+         IF( KB.GT.1 ) THEN
+            DO 950 J = 2, I2 - KA
+               RWORK( J ) = RWORK( J+KA )
+               WORK( J ) = WORK( J+KA )
+  950       CONTINUE
+         END IF
+*
+      END IF
+*
+      GO TO 490
+*
+*     End of ZHBGST
+*
+      END
diff --git a/costa/native/external/lapack/zhbgv.f b/costa/native/external/lapack/zhbgv.f
new file mode 100644
index 000000000..9fc18ace7
--- /dev/null
+++ b/costa/native/external/lapack/zhbgv.f
@@ -0,0 +1,192 @@
+      SUBROUTINE ZHBGV( JOBZ, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, W, Z,
+     $                  LDZ, WORK, RWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBZ, UPLO
+      INTEGER            INFO, KA, KB, LDAB, LDBB, LDZ, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   RWORK( * ), W( * )
+      COMPLEX*16         AB( LDAB, * ), BB( LDBB, * ), WORK( * ),
+     $                   Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZHBGV computes all the eigenvalues, and optionally, the eigenvectors
+*  of a complex generalized Hermitian-definite banded eigenproblem, of
+*  the form A*x=(lambda)*B*x. Here A and B are assumed to be Hermitian
+*  and banded, and B is also positive definite.
+*
+*  Arguments
+*  =========
+*
+*  JOBZ    (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only;
+*          = 'V':  Compute eigenvalues and eigenvectors.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangles of A and B are stored;
+*          = 'L':  Lower triangles of A and B are stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrices A and B.  N >= 0.
+*
+*  KA      (input) INTEGER
+*          The number of superdiagonals of the matrix A if UPLO = 'U',
+*          or the number of subdiagonals if UPLO = 'L'. KA >= 0.
+*
+*  KB      (input) INTEGER
+*          The number of superdiagonals of the matrix B if UPLO = 'U',
+*          or the number of subdiagonals if UPLO = 'L'. KB >= 0.
+*
+*  AB      (input/output) COMPLEX*16 array, dimension (LDAB, N)
+*          On entry, the upper or lower triangle of the Hermitian band
+*          matrix A, stored in the first ka+1 rows of the array.  The
+*          j-th column of A is stored in the j-th column of the array AB
+*          as follows:
+*          if UPLO = 'U', AB(ka+1+i-j,j) = A(i,j) for max(1,j-ka)<=i<=j;
+*          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+ka).
+*
+*          On exit, the contents of AB are destroyed.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KA+1.
+*
+*  BB      (input/output) COMPLEX*16 array, dimension (LDBB, N)
+*          On entry, the upper or lower triangle of the Hermitian band
+*          matrix B, stored in the first kb+1 rows of the array.  The
+*          j-th column of B is stored in the j-th column of the array BB
+*          as follows:
+*          if UPLO = 'U', BB(kb+1+i-j,j) = B(i,j) for max(1,j-kb)<=i<=j;
+*          if UPLO = 'L', BB(1+i-j,j)    = B(i,j) for j<=i<=min(n,j+kb).
+*
+*          On exit, the factor S from the split Cholesky factorization
+*          B = S**H*S, as returned by ZPBSTF.
+*
+*  LDBB    (input) INTEGER
+*          The leading dimension of the array BB.  LDBB >= KB+1.
+*
+*  W       (output) DOUBLE PRECISION array, dimension (N)
+*          If INFO = 0, the eigenvalues in ascending order.
+*
+*  Z       (output) COMPLEX*16 array, dimension (LDZ, N)
+*          If JOBZ = 'V', then if INFO = 0, Z contains the matrix Z of
+*          eigenvectors, with the i-th column of Z holding the
+*          eigenvector associated with W(i). The eigenvectors are
+*          normalized so that Z**H*B*Z = I.
+*          If JOBZ = 'N', then Z is not referenced.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= 1, and if
+*          JOBZ = 'V', LDZ >= N.
+*
+*  WORK    (workspace) COMPLEX*16 array, dimension (N)
+*
+*  RWORK   (workspace) DOUBLE PRECISION array, dimension (3*N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, and i is:
+*             <= N:  the algorithm failed to converge:
+*                    i off-diagonal elements of an intermediate
+*                    tridiagonal form did not converge to zero;
+*             > N:   if INFO = N + i, for 1 <= i <= N, then ZPBSTF
+*                    returned INFO = i: B is not positive definite.
+*                    The factorization of B could not be completed and
+*                    no eigenvalues or eigenvectors were computed.
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            UPPER, WANTZ
+      CHARACTER          VECT
+      INTEGER            IINFO, INDE, INDWRK
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DSTERF, XERBLA, ZHBGST, ZHBTRD, ZPBSTF, ZSTEQR
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      WANTZ = LSAME( JOBZ, 'V' )
+      UPPER = LSAME( UPLO, 'U' )
+*
+      INFO = 0
+      IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( KA.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( KB.LT.0 .OR. KB.GT.KA ) THEN
+         INFO = -5
+      ELSE IF( LDAB.LT.KA+1 ) THEN
+         INFO = -7
+      ELSE IF( LDBB.LT.KB+1 ) THEN
+         INFO = -9
+      ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN
+         INFO = -12
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZHBGV ', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Form a split Cholesky factorization of B.
+*
+      CALL ZPBSTF( UPLO, N, KB, BB, LDBB, INFO )
+      IF( INFO.NE.0 ) THEN
+         INFO = N + INFO
+         RETURN
+      END IF
+*
+*     Transform problem to standard eigenvalue problem.
+*
+      INDE = 1
+      INDWRK = INDE + N
+      CALL ZHBGST( JOBZ, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, Z, LDZ,
+     $             WORK, RWORK( INDWRK ), IINFO )
+*
+*     Reduce to tridiagonal form.
+*
+      IF( WANTZ ) THEN
+         VECT = 'U'
+      ELSE
+         VECT = 'N'
+      END IF
+      CALL ZHBTRD( VECT, UPLO, N, KA, AB, LDAB, W, RWORK( INDE ), Z,
+     $             LDZ, WORK, IINFO )
+*
+*     For eigenvalues only, call DSTERF.  For eigenvectors, call ZSTEQR.
+*
+      IF( .NOT.WANTZ ) THEN
+         CALL DSTERF( N, W, RWORK( INDE ), INFO )
+      ELSE
+         CALL ZSTEQR( JOBZ, N, W, RWORK( INDE ), Z, LDZ,
+     $                RWORK( INDWRK ), INFO )
+      END IF
+      RETURN
+*
+*     End of ZHBGV
+*
+      END
diff --git a/costa/native/external/lapack/zhbgvd.f b/costa/native/external/lapack/zhbgvd.f
new file mode 100644
index 000000000..446b61408
--- /dev/null
+++ b/costa/native/external/lapack/zhbgvd.f
@@ -0,0 +1,295 @@
+      SUBROUTINE ZHBGVD( JOBZ, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, W,
+     $                   Z, LDZ, WORK, LWORK, RWORK, LRWORK, IWORK,
+     $                   LIWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBZ, UPLO
+      INTEGER            INFO, KA, KB, LDAB, LDBB, LDZ, LIWORK, LRWORK,
+     $                   LWORK, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      DOUBLE PRECISION   RWORK( * ), W( * )
+      COMPLEX*16         AB( LDAB, * ), BB( LDBB, * ), WORK( * ),
+     $                   Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZHBGVD computes all the eigenvalues, and optionally, the eigenvectors
+*  of a complex generalized Hermitian-definite banded eigenproblem, of
+*  the form A*x=(lambda)*B*x. Here A and B are assumed to be Hermitian
+*  and banded, and B is also positive definite.  If eigenvectors are
+*  desired, it uses a divide and conquer algorithm.
+*
+*  The divide and conquer algorithm makes very mild assumptions about
+*  floating point arithmetic. It will work on machines with a guard
+*  digit in add/subtract, or on those binary machines without guard
+*  digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
+*  Cray-2. It could conceivably fail on hexadecimal or decimal machines
+*  without guard digits, but we know of none.
+*
+*  Arguments
+*  =========
+*
+*  JOBZ    (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only;
+*          = 'V':  Compute eigenvalues and eigenvectors.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangles of A and B are stored;
+*          = 'L':  Lower triangles of A and B are stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrices A and B.  N >= 0.
+*
+*  KA      (input) INTEGER
+*          The number of superdiagonals of the matrix A if UPLO = 'U',
+*          or the number of subdiagonals if UPLO = 'L'. KA >= 0.
+*
+*  KB      (input) INTEGER
+*          The number of superdiagonals of the matrix B if UPLO = 'U',
+*          or the number of subdiagonals if UPLO = 'L'. KB >= 0.
+*
+*  AB      (input/output) COMPLEX*16 array, dimension (LDAB, N)
+*          On entry, the upper or lower triangle of the Hermitian band
+*          matrix A, stored in the first ka+1 rows of the array.  The
+*          j-th column of A is stored in the j-th column of the array AB
+*          as follows:
+*          if UPLO = 'U', AB(ka+1+i-j,j) = A(i,j) for max(1,j-ka)<=i<=j;
+*          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+ka).
+*
+*          On exit, the contents of AB are destroyed.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KA+1.
+*
+*  BB      (input/output) COMPLEX*16 array, dimension (LDBB, N)
+*          On entry, the upper or lower triangle of the Hermitian band
+*          matrix B, stored in the first kb+1 rows of the array.  The
+*          j-th column of B is stored in the j-th column of the array BB
+*          as follows:
+*          if UPLO = 'U', BB(kb+1+i-j,j) = B(i,j) for max(1,j-kb)<=i<=j;
+*          if UPLO = 'L', BB(1+i-j,j)    = B(i,j) for j<=i<=min(n,j+kb).
+*
+*          On exit, the factor S from the split Cholesky factorization
+*          B = S**H*S, as returned by ZPBSTF.
+*
+*  LDBB    (input) INTEGER
+*          The leading dimension of the array BB.  LDBB >= KB+1.
+*
+*  W       (output) DOUBLE PRECISION array, dimension (N)
+*          If INFO = 0, the eigenvalues in ascending order.
+*
+*  Z       (output) COMPLEX*16 array, dimension (LDZ, N)
+*          If JOBZ = 'V', then if INFO = 0, Z contains the matrix Z of
+*          eigenvectors, with the i-th column of Z holding the
+*          eigenvector associated with W(i). The eigenvectors are
+*          normalized so that Z**H*B*Z = I.
+*          If JOBZ = 'N', then Z is not referenced.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= 1, and if
+*          JOBZ = 'V', LDZ >= N.
+*
+*  WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+*          On exit, if INFO=0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.
+*          If N <= 1,               LWORK >= 1.
+*          If JOBZ = 'N' and N > 1, LWORK >= N.
+*          If JOBZ = 'V' and N > 1, LWORK >= 2*N**2.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  RWORK   (workspace/output) DOUBLE PRECISION array, dimension (LRWORK)
+*          On exit, if INFO=0, RWORK(1) returns the optimal LRWORK.
+*
+*  LRWORK  (input) INTEGER
+*          The dimension of array RWORK.
+*          If N <= 1,               LRWORK >= 1.
+*          If JOBZ = 'N' and N > 1, LRWORK >= N.
+*          If JOBZ = 'V' and N > 1, LRWORK >= 1 + 5*N + 2*N**2.
+*
+*          If LRWORK = -1, then a workspace query is assumed; the
+*          routine only calculates the optimal size of the RWORK array,
+*          returns this value as the first entry of the RWORK array, and
+*          no error message related to LRWORK is issued by XERBLA.
+*
+*  IWORK   (workspace/output) INTEGER array, dimension (LIWORK)
+*          On exit, if INFO=0, IWORK(1) returns the optimal LIWORK.
+*
+*  LIWORK  (input) INTEGER
+*          The dimension of array IWORK.
+*          If JOBZ = 'N' or N <= 1, LIWORK >= 1.
+*          If JOBZ = 'V' and N > 1, LIWORK >= 3 + 5*N.
+*
+*          If LIWORK = -1, then a workspace query is assumed; the
+*          routine only calculates the optimal size of the IWORK array,
+*          returns this value as the first entry of the IWORK array, and
+*          no error message related to LIWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, and i is:
+*             <= N:  the algorithm failed to converge:
+*                    i off-diagonal elements of an intermediate
+*                    tridiagonal form did not converge to zero;
+*             > N:   if INFO = N + i, for 1 <= i <= N, then ZPBSTF
+*                    returned INFO = i: B is not positive definite.
+*                    The factorization of B could not be completed and
+*                    no eigenvalues or eigenvectors were computed.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX*16         CONE, CZERO
+      PARAMETER          ( CONE = ( 1.0D+0, 0.0D+0 ),
+     $                   CZERO = ( 0.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY, UPPER, WANTZ
+      CHARACTER          VECT
+      INTEGER            IINFO, INDE, INDWK2, INDWRK, LIWMIN, LLRWK,
+     $                   LLWK2, LRWMIN, LWMIN
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DSTERF, XERBLA, ZGEMM, ZHBGST, ZHBTRD, ZLACPY,
+     $                   ZPBSTF, ZSTEDC
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      WANTZ = LSAME( JOBZ, 'V' )
+      UPPER = LSAME( UPLO, 'U' )
+      LQUERY = ( LWORK.EQ.-1 .OR. LRWORK.EQ.-1 .OR. LIWORK.EQ.-1 )
+*
+      INFO = 0
+      IF( N.LE.1 ) THEN
+         LWMIN = 1
+         LRWMIN = 1
+         LIWMIN = 1
+      ELSE
+         IF( WANTZ ) THEN
+            LWMIN = 2*N**2
+            LRWMIN = 1 + 5*N + 2*N**2
+            LIWMIN = 3 + 5*N
+         ELSE
+            LWMIN = N
+            LRWMIN = N
+            LIWMIN = 1
+         END IF
+      END IF
+      IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( KA.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( KB.LT.0 .OR. KB.GT.KA ) THEN
+         INFO = -5
+      ELSE IF( LDAB.LT.KA+1 ) THEN
+         INFO = -7
+      ELSE IF( LDBB.LT.KB+1 ) THEN
+         INFO = -9
+      ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN
+         INFO = -12
+      ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -14
+      ELSE IF( LRWORK.LT.LRWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -16
+      ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -18
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         WORK( 1 ) = LWMIN
+         RWORK( 1 ) = LRWMIN
+         IWORK( 1 ) = LIWMIN
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZHBGVD', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Form a split Cholesky factorization of B.
+*
+      CALL ZPBSTF( UPLO, N, KB, BB, LDBB, INFO )
+      IF( INFO.NE.0 ) THEN
+         INFO = N + INFO
+         RETURN
+      END IF
+*
+*     Transform problem to standard eigenvalue problem.
+*
+      INDE = 1
+      INDWRK = INDE + N
+      INDWK2 = 1 + N*N
+      LLWK2 = LWORK - INDWK2 + 2
+      LLRWK = LRWORK - INDWRK + 2
+      CALL ZHBGST( JOBZ, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, Z, LDZ,
+     $             WORK, RWORK( INDWRK ), IINFO )
+*
+*     Reduce Hermitian band matrix to tridiagonal form.
+*
+      IF( WANTZ ) THEN
+         VECT = 'U'
+      ELSE
+         VECT = 'N'
+      END IF
+      CALL ZHBTRD( VECT, UPLO, N, KA, AB, LDAB, W, RWORK( INDE ), Z,
+     $             LDZ, WORK, IINFO )
+*
+*     For eigenvalues only, call DSTERF.  For eigenvectors, call ZSTEDC.
+*
+      IF( .NOT.WANTZ ) THEN
+         CALL DSTERF( N, W, RWORK( INDE ), INFO )
+      ELSE
+         CALL ZSTEDC( 'I', N, W, RWORK( INDE ), WORK, N, WORK( INDWK2 ),
+     $                LLWK2, RWORK( INDWRK ), LLRWK, IWORK, LIWORK,
+     $                INFO )
+         CALL ZGEMM( 'N', 'N', N, N, N, CONE, Z, LDZ, WORK, N, CZERO,
+     $               WORK( INDWK2 ), N )
+         CALL ZLACPY( 'A', N, N, WORK( INDWK2 ), N, Z, LDZ )
+      END IF
+*
+      WORK( 1 ) = LWMIN
+      RWORK( 1 ) = LRWMIN
+      IWORK( 1 ) = LIWMIN
+      RETURN
+*
+*     End of ZHBGVD
+*
+      END
diff --git a/costa/native/external/lapack/zhbgvx.f b/costa/native/external/lapack/zhbgvx.f
new file mode 100644
index 000000000..d12fbd994
--- /dev/null
+++ b/costa/native/external/lapack/zhbgvx.f
@@ -0,0 +1,374 @@
+      SUBROUTINE ZHBGVX( JOBZ, RANGE, UPLO, N, KA, KB, AB, LDAB, BB,
+     $                   LDBB, Q, LDQ, VL, VU, IL, IU, ABSTOL, M, W, Z,
+     $                   LDZ, WORK, RWORK, IWORK, IFAIL, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBZ, RANGE, UPLO
+      INTEGER            IL, INFO, IU, KA, KB, LDAB, LDBB, LDQ, LDZ, M,
+     $                   N
+      DOUBLE PRECISION   ABSTOL, VL, VU
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IFAIL( * ), IWORK( * )
+      DOUBLE PRECISION   RWORK( * ), W( * )
+      COMPLEX*16         AB( LDAB, * ), BB( LDBB, * ), Q( LDQ, * ),
+     $                   WORK( * ), Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZHBGVX computes all the eigenvalues, and optionally, the eigenvectors
+*  of a complex generalized Hermitian-definite banded eigenproblem, of
+*  the form A*x=(lambda)*B*x. Here A and B are assumed to be Hermitian
+*  and banded, and B is also positive definite.  Eigenvalues and
+*  eigenvectors can be selected by specifying either all eigenvalues,
+*  a range of values or a range of indices for the desired eigenvalues.
+*
+*  Arguments
+*  =========
+*
+*  JOBZ    (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only;
+*          = 'V':  Compute eigenvalues and eigenvectors.
+*
+*  RANGE   (input) CHARACTER*1
+*          = 'A': all eigenvalues will be found;
+*          = 'V': all eigenvalues in the half-open interval (VL,VU]
+*                 will be found;
+*          = 'I': the IL-th through IU-th eigenvalues will be found.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangles of A and B are stored;
+*          = 'L':  Lower triangles of A and B are stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrices A and B.  N >= 0.
+*
+*  KA      (input) INTEGER
+*          The number of superdiagonals of the matrix A if UPLO = 'U',
+*          or the number of subdiagonals if UPLO = 'L'. KA >= 0.
+*
+*  KB      (input) INTEGER
+*          The number of superdiagonals of the matrix B if UPLO = 'U',
+*          or the number of subdiagonals if UPLO = 'L'. KB >= 0.
+*
+*  AB      (input/output) COMPLEX*16 array, dimension (LDAB, N)
+*          On entry, the upper or lower triangle of the Hermitian band
+*          matrix A, stored in the first ka+1 rows of the array.  The
+*          j-th column of A is stored in the j-th column of the array AB
+*          as follows:
+*          if UPLO = 'U', AB(ka+1+i-j,j) = A(i,j) for max(1,j-ka)<=i<=j;
+*          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+ka).
+*
+*          On exit, the contents of AB are destroyed.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KA+1.
+*
+*  BB      (input/output) COMPLEX*16 array, dimension (LDBB, N)
+*          On entry, the upper or lower triangle of the Hermitian band
+*          matrix B, stored in the first kb+1 rows of the array.  The
+*          j-th column of B is stored in the j-th column of the array BB
+*          as follows:
+*          if UPLO = 'U', BB(kb+1+i-j,j) = B(i,j) for max(1,j-kb)<=i<=j;
+*          if UPLO = 'L', BB(1+i-j,j)    = B(i,j) for j<=i<=min(n,j+kb).
+*
+*          On exit, the factor S from the split Cholesky factorization
+*          B = S**H*S, as returned by ZPBSTF.
+*
+*  LDBB    (input) INTEGER
+*          The leading dimension of the array BB.  LDBB >= KB+1.
+*
+*  Q       (output) COMPLEX*16 array, dimension (LDQ, N)
+*          If JOBZ = 'V', the n-by-n matrix used in the reduction of
+*          A*x = (lambda)*B*x to standard form, i.e. C*x = (lambda)*x,
+*          and consequently C to tridiagonal form.
+*          If JOBZ = 'N', the array Q is not referenced.
+*
+*  LDQ     (input) INTEGER
+*          The leading dimension of the array Q.  If JOBZ = 'N',
+*          LDQ >= 1. If JOBZ = 'V', LDQ >= max(1,N).
+*
+*  VL      (input) DOUBLE PRECISION
+*  VU      (input) DOUBLE PRECISION
+*          If RANGE='V', the lower and upper bounds of the interval to
+*          be searched for eigenvalues. VL < VU.
+*          Not referenced if RANGE = 'A' or 'I'.
+*
+*  IL      (input) INTEGER
+*  IU      (input) INTEGER
+*          If RANGE='I', the indices (in ascending order) of the
+*          smallest and largest eigenvalues to be returned.
+*          1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0.
+*          Not referenced if RANGE = 'A' or 'V'.
+*
+*  ABSTOL  (input) DOUBLE PRECISION
+*          The absolute error tolerance for the eigenvalues.
+*          An approximate eigenvalue is accepted as converged
+*          when it is determined to lie in an interval [a,b]
+*          of width less than or equal to
+*
+*                  ABSTOL + EPS *   max( |a|,|b| ) ,
+*
+*          where EPS is the machine precision.  If ABSTOL is less than
+*          or equal to zero, then  EPS*|T|  will be used in its place,
+*          where |T| is the 1-norm of the tridiagonal matrix obtained
+*          by reducing AP to tridiagonal form.
+*
+*          Eigenvalues will be computed most accurately when ABSTOL is
+*          set to twice the underflow threshold 2*DLAMCH('S'), not zero.
+*          If this routine returns with INFO>0, indicating that some
+*          eigenvectors did not converge, try setting ABSTOL to
+*          2*DLAMCH('S').
+*
+*  M       (output) INTEGER
+*          The total number of eigenvalues found.  0 <= M <= N.
+*          If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1.
+*
+*  W       (output) DOUBLE PRECISION array, dimension (N)
+*          If INFO = 0, the eigenvalues in ascending order.
+*
+*  Z       (output) COMPLEX*16 array, dimension (LDZ, N)
+*          If JOBZ = 'V', then if INFO = 0, Z contains the matrix Z of
+*          eigenvectors, with the i-th column of Z holding the
+*          eigenvector associated with W(i). The eigenvectors are
+*          normalized so that Z**H*B*Z = I.
+*          If JOBZ = 'N', then Z is not referenced.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= 1, and if
+*          JOBZ = 'V', LDZ >= N.
+*
+*  WORK    (workspace) COMPLEX*16 array, dimension (N)
+*
+*  RWORK   (workspace) DOUBLE PRECISION array, dimension (7*N)
+*
+*  IWORK   (workspace) INTEGER array, dimension (5*N)
+*
+*  IFAIL   (output) INTEGER array, dimension (N)
+*          If JOBZ = 'V', then if INFO = 0, the first M elements of
+*          IFAIL are zero.  If INFO > 0, then IFAIL contains the
+*          indices of the eigenvectors that failed to converge.
+*          If JOBZ = 'N', then IFAIL is not referenced.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, and i is:
+*             <= N:  then i eigenvectors failed to converge.  Their
+*                    indices are stored in array IFAIL.
+*             > N:   if INFO = N + i, for 1 <= i <= N, then ZPBSTF
+*                    returned INFO = i: B is not positive definite.
+*                    The factorization of B could not be completed and
+*                    no eigenvalues or eigenvectors were computed.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO
+      PARAMETER          ( ZERO = 0.0D+0 )
+      COMPLEX*16         CZERO, CONE
+      PARAMETER          ( CZERO = ( 0.0D+0, 0.0D+0 ),
+     $                   CONE = ( 1.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            ALLEIG, INDEIG, UPPER, VALEIG, WANTZ
+      CHARACTER          ORDER, VECT
+      INTEGER            I, IINFO, INDD, INDE, INDEE, INDIBL, INDISP,
+     $                   INDIWK, INDRWK, INDWRK, ITMP1, J, JJ, NSPLIT
+      DOUBLE PRECISION   TMP1
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DCOPY, DSTEBZ, DSTERF, XERBLA, ZCOPY, ZGEMV,
+     $                   ZHBGST, ZHBTRD, ZLACPY, ZPBSTF, ZSTEIN, ZSTEQR,
+     $                   ZSWAP
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      WANTZ = LSAME( JOBZ, 'V' )
+      UPPER = LSAME( UPLO, 'U' )
+      ALLEIG = LSAME( RANGE, 'A' )
+      VALEIG = LSAME( RANGE, 'V' )
+      INDEIG = LSAME( RANGE, 'I' )
+*
+      INFO = 0
+      IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( KA.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( KB.LT.0 .OR. KB.GT.KA ) THEN
+         INFO = -6
+      ELSE IF( LDAB.LT.KA+1 ) THEN
+         INFO = -8
+      ELSE IF( LDBB.LT.KB+1 ) THEN
+         INFO = -10
+      ELSE IF( VALEIG .AND. N.GT.0 .AND. VU.LE.VL ) THEN
+         INFO = -12
+      ELSE IF( INDEIG .AND. IL.LT.1 ) THEN
+         INFO = -13
+      ELSE IF( INDEIG .AND. ( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) ) THEN
+         INFO = -14
+      ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN
+         INFO = -19
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZHBGVX', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Form a split Cholesky factorization of B.
+*
+      CALL ZPBSTF( UPLO, N, KB, BB, LDBB, INFO )
+      IF( INFO.NE.0 ) THEN
+         INFO = N + INFO
+         RETURN
+      END IF
+*
+*     Transform problem to standard eigenvalue problem.
+*
+      CALL ZHBGST( JOBZ, UPLO, N, KA, KB, AB, LDAB, BB, LDBB, Q, LDQ,
+     $             WORK, RWORK, IINFO )
+*
+*     Solve the standard eigenvalue problem.
+*     Reduce Hermitian band matrix to tridiagonal form.
+*
+      INDD = 1
+      INDE = INDD + N
+      INDRWK = INDE + N
+      INDWRK = 1
+      IF( WANTZ ) THEN
+         VECT = 'U'
+      ELSE
+         VECT = 'N'
+      END IF
+      CALL ZHBTRD( VECT, UPLO, N, KA, AB, LDAB, RWORK( INDD ),
+     $             RWORK( INDE ), Q, LDQ, WORK( INDWRK ), IINFO )
+*
+*     If all eigenvalues are desired and ABSTOL is less than or equal
+*     to zero, then call DSTERF or ZSTEQR.  If this fails for some
+*     eigenvalue, then try DSTEBZ.
+*
+      IF( ( ALLEIG .OR. ( INDEIG .AND. IL.EQ.1 .AND. IU.EQ.N ) ) .AND.
+     $    ( ABSTOL.LE.ZERO ) ) THEN
+         CALL DCOPY( N, RWORK( INDD ), 1, W, 1 )
+         INDEE = INDRWK + 2*N
+         CALL DCOPY( N-1, RWORK( INDE ), 1, RWORK( INDEE ), 1 )
+         IF( .NOT.WANTZ ) THEN
+            CALL DSTERF( N, W, RWORK( INDEE ), INFO )
+         ELSE
+            CALL ZLACPY( 'A', N, N, Q, LDQ, Z, LDZ )
+            CALL ZSTEQR( JOBZ, N, W, RWORK( INDEE ), Z, LDZ,
+     $                   RWORK( INDRWK ), INFO )
+            IF( INFO.EQ.0 ) THEN
+               DO 10 I = 1, N
+                  IFAIL( I ) = 0
+   10          CONTINUE
+            END IF
+         END IF
+         IF( INFO.EQ.0 ) THEN
+            M = N
+            GO TO 30
+         END IF
+         INFO = 0
+      END IF
+*
+*     Otherwise, call DSTEBZ and, if eigenvectors are desired,
+*     call ZSTEIN.
+*
+      IF( WANTZ ) THEN
+         ORDER = 'B'
+      ELSE
+         ORDER = 'E'
+      END IF
+      INDIBL = 1
+      INDISP = INDIBL + N
+      INDIWK = INDISP + N
+      CALL DSTEBZ( RANGE, ORDER, N, VL, VU, IL, IU, ABSTOL,
+     $             RWORK( INDD ), RWORK( INDE ), M, NSPLIT, W,
+     $             IWORK( INDIBL ), IWORK( INDISP ), RWORK( INDRWK ),
+     $             IWORK( INDIWK ), INFO )
+*
+      IF( WANTZ ) THEN
+         CALL ZSTEIN( N, RWORK( INDD ), RWORK( INDE ), M, W,
+     $                IWORK( INDIBL ), IWORK( INDISP ), Z, LDZ,
+     $                RWORK( INDRWK ), IWORK( INDIWK ), IFAIL, INFO )
+*
+*        Apply unitary matrix used in reduction to tridiagonal
+*        form to eigenvectors returned by ZSTEIN.
+*
+         DO 20 J = 1, M
+            CALL ZCOPY( N, Z( 1, J ), 1, WORK( 1 ), 1 )
+            CALL ZGEMV( 'N', N, N, CONE, Q, LDQ, WORK, 1, CZERO,
+     $                  Z( 1, J ), 1 )
+   20    CONTINUE
+      END IF
+*
+   30 CONTINUE
+*
+*     If eigenvalues are not in order, then sort them, along with
+*     eigenvectors.
+*
+      IF( WANTZ ) THEN
+         DO 50 J = 1, M - 1
+            I = 0
+            TMP1 = W( J )
+            DO 40 JJ = J + 1, M
+               IF( W( JJ ).LT.TMP1 ) THEN
+                  I = JJ
+                  TMP1 = W( JJ )
+               END IF
+   40       CONTINUE
+*
+            IF( I.NE.0 ) THEN
+               ITMP1 = IWORK( INDIBL+I-1 )
+               W( I ) = W( J )
+               IWORK( INDIBL+I-1 ) = IWORK( INDIBL+J-1 )
+               W( J ) = TMP1
+               IWORK( INDIBL+J-1 ) = ITMP1
+               CALL ZSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 )
+               IF( INFO.NE.0 ) THEN
+                  ITMP1 = IFAIL( I )
+                  IFAIL( I ) = IFAIL( J )
+                  IFAIL( J ) = ITMP1
+               END IF
+            END IF
+   50    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of ZHBGVX
+*
+      END
diff --git a/costa/native/external/lapack/zhbtrd.f b/costa/native/external/lapack/zhbtrd.f
new file mode 100644
index 000000000..a4743c280
--- /dev/null
+++ b/costa/native/external/lapack/zhbtrd.f
@@ -0,0 +1,589 @@
+      SUBROUTINE ZHBTRD( VECT, UPLO, N, KD, AB, LDAB, D, E, Q, LDQ,
+     $                   WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO, VECT
+      INTEGER            INFO, KD, LDAB, LDQ, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   D( * ), E( * )
+      COMPLEX*16         AB( LDAB, * ), Q( LDQ, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZHBTRD reduces a complex Hermitian band matrix A to real symmetric
+*  tridiagonal form T by a unitary similarity transformation:
+*  Q**H * A * Q = T.
+*
+*  Arguments
+*  =========
+*
+*  VECT    (input) CHARACTER*1
+*          = 'N':  do not form Q;
+*          = 'V':  form Q;
+*          = 'U':  update a matrix X, by forming X*Q.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  KD      (input) INTEGER
+*          The number of superdiagonals of the matrix A if UPLO = 'U',
+*          or the number of subdiagonals if UPLO = 'L'.  KD >= 0.
+*
+*  AB      (input/output) COMPLEX*16 array, dimension (LDAB,N)
+*          On entry, the upper or lower triangle of the Hermitian band
+*          matrix A, stored in the first KD+1 rows of the array.  The
+*          j-th column of A is stored in the j-th column of the array AB
+*          as follows:
+*          if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j;
+*          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+kd).
+*          On exit, the diagonal elements of AB are overwritten by the
+*          diagonal elements of the tridiagonal matrix T; if KD > 0, the
+*          elements on the first superdiagonal (if UPLO = 'U') or the
+*          first subdiagonal (if UPLO = 'L') are overwritten by the
+*          off-diagonal elements of T; the rest of AB is overwritten by
+*          values generated during the reduction.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KD+1.
+*
+*  D       (output) DOUBLE PRECISION array, dimension (N)
+*          The diagonal elements of the tridiagonal matrix T.
+*
+*  E       (output) DOUBLE PRECISION array, dimension (N-1)
+*          The off-diagonal elements of the tridiagonal matrix T:
+*          E(i) = T(i,i+1) if UPLO = 'U'; E(i) = T(i+1,i) if UPLO = 'L'.
+*
+*  Q       (input/output) COMPLEX*16 array, dimension (LDQ,N)
+*          On entry, if VECT = 'U', then Q must contain an N-by-N
+*          matrix X; if VECT = 'N' or 'V', then Q need not be set.
+*
+*          On exit:
+*          if VECT = 'V', Q contains the N-by-N unitary matrix Q;
+*          if VECT = 'U', Q contains the product X*Q;
+*          if VECT = 'N', the array Q is not referenced.
+*
+*  LDQ     (input) INTEGER
+*          The leading dimension of the array Q.
+*          LDQ >= 1, and LDQ >= N if VECT = 'V' or 'U'.
+*
+*  WORK    (workspace) COMPLEX*16 array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Further Details
+*  ===============
+*
+*  Modified by Linda Kaufman, Bell Labs.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO
+      PARAMETER          ( ZERO = 0.0D+0 )
+      COMPLEX*16         CZERO, CONE
+      PARAMETER          ( CZERO = ( 0.0D+0, 0.0D+0 ),
+     $                   CONE = ( 1.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            INITQ, UPPER, WANTQ
+      INTEGER            I, I2, IBL, INCA, INCX, IQAEND, IQB, IQEND, J,
+     $                   J1, J1END, J1INC, J2, JEND, JIN, JINC, K, KD1,
+     $                   KDM1, KDN, L, LAST, LEND, NQ, NR, NRT
+      DOUBLE PRECISION   ABST
+      COMPLEX*16         T, TEMP
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZLACGV, ZLAR2V, ZLARGV, ZLARTG, ZLARTV,
+     $                   ZLASET, ZROT, ZSCAL
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, DCONJG, MAX, MIN
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters
+*
+      INITQ = LSAME( VECT, 'V' )
+      WANTQ = INITQ .OR. LSAME( VECT, 'U' )
+      UPPER = LSAME( UPLO, 'U' )
+      KD1 = KD + 1
+      KDM1 = KD - 1
+      INCX = LDAB - 1
+      IQEND = 1
+*
+      INFO = 0
+      IF( .NOT.WANTQ .AND. .NOT.LSAME( VECT, 'N' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( KD.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDAB.LT.KD1 ) THEN
+         INFO = -6
+      ELSE IF( LDQ.LT.MAX( 1, N ) .AND. WANTQ ) THEN
+         INFO = -10
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZHBTRD', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Initialize Q to the unit matrix, if needed
+*
+      IF( INITQ )
+     $   CALL ZLASET( 'Full', N, N, CZERO, CONE, Q, LDQ )
+*
+*     Wherever possible, plane rotations are generated and applied in
+*     vector operations of length NR over the index set J1:J2:KD1.
+*
+*     The real cosines and complex sines of the plane rotations are
+*     stored in the arrays D and WORK.
+*
+      INCA = KD1*LDAB
+      KDN = MIN( N-1, KD )
+      IF( UPPER ) THEN
+*
+         IF( KD.GT.1 ) THEN
+*
+*           Reduce to complex Hermitian tridiagonal form, working with
+*           the upper triangle
+*
+            NR = 0
+            J1 = KDN + 2
+            J2 = 1
+*
+            AB( KD1, 1 ) = DBLE( AB( KD1, 1 ) )
+            DO 90 I = 1, N - 2
+*
+*              Reduce i-th row of matrix to tridiagonal form
+*
+               DO 80 K = KDN + 1, 2, -1
+                  J1 = J1 + KDN
+                  J2 = J2 + KDN
+*
+                  IF( NR.GT.0 ) THEN
+*
+*                    generate plane rotations to annihilate nonzero
+*                    elements which have been created outside the band
+*
+                     CALL ZLARGV( NR, AB( 1, J1-1 ), INCA, WORK( J1 ),
+     $                            KD1, D( J1 ), KD1 )
+*
+*                    apply rotations from the right
+*
+*
+*                    Dependent on the the number of diagonals either
+*                    ZLARTV or ZROT is used
+*
+                     IF( NR.GE.2*KD-1 ) THEN
+                        DO 10 L = 1, KD - 1
+                           CALL ZLARTV( NR, AB( L+1, J1-1 ), INCA,
+     $                                  AB( L, J1 ), INCA, D( J1 ),
+     $                                  WORK( J1 ), KD1 )
+   10                   CONTINUE
+*
+                     ELSE
+                        JEND = J1 + ( NR-1 )*KD1
+                        DO 20 JINC = J1, JEND, KD1
+                           CALL ZROT( KDM1, AB( 2, JINC-1 ), 1,
+     $                                AB( 1, JINC ), 1, D( JINC ),
+     $                                WORK( JINC ) )
+   20                   CONTINUE
+                     END IF
+                  END IF
+*
+*
+                  IF( K.GT.2 ) THEN
+                     IF( K.LE.N-I+1 ) THEN
+*
+*                       generate plane rotation to annihilate a(i,i+k-1)
+*                       within the band
+*
+                        CALL ZLARTG( AB( KD-K+3, I+K-2 ),
+     $                               AB( KD-K+2, I+K-1 ), D( I+K-1 ),
+     $                               WORK( I+K-1 ), TEMP )
+                        AB( KD-K+3, I+K-2 ) = TEMP
+*
+*                       apply rotation from the right
+*
+                        CALL ZROT( K-3, AB( KD-K+4, I+K-2 ), 1,
+     $                             AB( KD-K+3, I+K-1 ), 1, D( I+K-1 ),
+     $                             WORK( I+K-1 ) )
+                     END IF
+                     NR = NR + 1
+                     J1 = J1 - KDN - 1
+                  END IF
+*
+*                 apply plane rotations from both sides to diagonal
+*                 blocks
+*
+                  IF( NR.GT.0 )
+     $               CALL ZLAR2V( NR, AB( KD1, J1-1 ), AB( KD1, J1 ),
+     $                            AB( KD, J1 ), INCA, D( J1 ),
+     $                            WORK( J1 ), KD1 )
+*
+*                 apply plane rotations from the left
+*
+                  CALL ZLACGV( NR, WORK( J1 ), KD1 )
+                  IF( NR.GT.0 ) THEN
+                     IF( 2*KD-1.LT.NR ) THEN
+*
+*                    Dependent on the the number of diagonals either
+*                    ZLARTV or ZROT is used
+*
+                        DO 30 L = 1, KD - 1
+                           IF( J2+L.GT.N ) THEN
+                              NRT = NR - 1
+                           ELSE
+                              NRT = NR
+                           END IF
+                           IF( NRT.GT.0 )
+     $                        CALL ZLARTV( NRT, AB( KD-L, J1+L ), INCA,
+     $                                     AB( KD-L+1, J1+L ), INCA,
+     $                                     D( J1 ), WORK( J1 ), KD1 )
+   30                   CONTINUE
+                     ELSE
+                        J1END = J1 + KD1*( NR-2 )
+                        IF( J1END.GE.J1 ) THEN
+                           DO 40 JIN = J1, J1END, KD1
+                              CALL ZROT( KD-1, AB( KD-1, JIN+1 ), INCX,
+     $                                   AB( KD, JIN+1 ), INCX,
+     $                                   D( JIN ), WORK( JIN ) )
+   40                      CONTINUE
+                        END IF
+                        LEND = MIN( KDM1, N-J2 )
+                        LAST = J1END + KD1
+                        IF( LEND.GT.0 )
+     $                     CALL ZROT( LEND, AB( KD-1, LAST+1 ), INCX,
+     $                                AB( KD, LAST+1 ), INCX, D( LAST ),
+     $                                WORK( LAST ) )
+                     END IF
+                  END IF
+*
+                  IF( WANTQ ) THEN
+*
+*                    accumulate product of plane rotations in Q
+*
+                     IF( INITQ ) THEN
+*
+*                 take advantage of the fact that Q was
+*                 initially the Identity matrix
+*
+                        IQEND = MAX( IQEND, J2 )
+                        I2 = MAX( 0, K-3 )
+                        IQAEND = 1 + I*KD
+                        IF( K.EQ.2 )
+     $                     IQAEND = IQAEND + KD
+                        IQAEND = MIN( IQAEND, IQEND )
+                        DO 50 J = J1, J2, KD1
+                           IBL = I - I2 / KDM1
+                           I2 = I2 + 1
+                           IQB = MAX( 1, J-IBL )
+                           NQ = 1 + IQAEND - IQB
+                           IQAEND = MIN( IQAEND+KD, IQEND )
+                           CALL ZROT( NQ, Q( IQB, J-1 ), 1, Q( IQB, J ),
+     $                                1, D( J ), DCONJG( WORK( J ) ) )
+   50                   CONTINUE
+                     ELSE
+*
+                        DO 60 J = J1, J2, KD1
+                           CALL ZROT( N, Q( 1, J-1 ), 1, Q( 1, J ), 1,
+     $                                D( J ), DCONJG( WORK( J ) ) )
+   60                   CONTINUE
+                     END IF
+*
+                  END IF
+*
+                  IF( J2+KDN.GT.N ) THEN
+*
+*                    adjust J2 to keep within the bounds of the matrix
+*
+                     NR = NR - 1
+                     J2 = J2 - KDN - 1
+                  END IF
+*
+                  DO 70 J = J1, J2, KD1
+*
+*                    create nonzero element a(j-1,j+kd) outside the band
+*                    and store it in WORK
+*
+                     WORK( J+KD ) = WORK( J )*AB( 1, J+KD )
+                     AB( 1, J+KD ) = D( J )*AB( 1, J+KD )
+   70             CONTINUE
+   80          CONTINUE
+   90       CONTINUE
+         END IF
+*
+         IF( KD.GT.0 ) THEN
+*
+*           make off-diagonal elements real and copy them to E
+*
+            DO 100 I = 1, N - 1
+               T = AB( KD, I+1 )
+               ABST = ABS( T )
+               AB( KD, I+1 ) = ABST
+               E( I ) = ABST
+               IF( ABST.NE.ZERO ) THEN
+                  T = T / ABST
+               ELSE
+                  T = CONE
+               END IF
+               IF( I.LT.N-1 )
+     $            AB( KD, I+2 ) = AB( KD, I+2 )*T
+               IF( WANTQ ) THEN
+                  CALL ZSCAL( N, DCONJG( T ), Q( 1, I+1 ), 1 )
+               END IF
+  100       CONTINUE
+         ELSE
+*
+*           set E to zero if original matrix was diagonal
+*
+            DO 110 I = 1, N - 1
+               E( I ) = ZERO
+  110       CONTINUE
+         END IF
+*
+*        copy diagonal elements to D
+*
+         DO 120 I = 1, N
+            D( I ) = AB( KD1, I )
+  120    CONTINUE
+*
+      ELSE
+*
+         IF( KD.GT.1 ) THEN
+*
+*           Reduce to complex Hermitian tridiagonal form, working with
+*           the lower triangle
+*
+            NR = 0
+            J1 = KDN + 2
+            J2 = 1
+*
+            AB( 1, 1 ) = DBLE( AB( 1, 1 ) )
+            DO 210 I = 1, N - 2
+*
+*              Reduce i-th column of matrix to tridiagonal form
+*
+               DO 200 K = KDN + 1, 2, -1
+                  J1 = J1 + KDN
+                  J2 = J2 + KDN
+*
+                  IF( NR.GT.0 ) THEN
+*
+*                    generate plane rotations to annihilate nonzero
+*                    elements which have been created outside the band
+*
+                     CALL ZLARGV( NR, AB( KD1, J1-KD1 ), INCA,
+     $                            WORK( J1 ), KD1, D( J1 ), KD1 )
+*
+*                    apply plane rotations from one side
+*
+*
+*                    Dependent on the the number of diagonals either
+*                    ZLARTV or ZROT is used
+*
+                     IF( NR.GT.2*KD-1 ) THEN
+                        DO 130 L = 1, KD - 1
+                           CALL ZLARTV( NR, AB( KD1-L, J1-KD1+L ), INCA,
+     $                                  AB( KD1-L+1, J1-KD1+L ), INCA,
+     $                                  D( J1 ), WORK( J1 ), KD1 )
+  130                   CONTINUE
+                     ELSE
+                        JEND = J1 + KD1*( NR-1 )
+                        DO 140 JINC = J1, JEND, KD1
+                           CALL ZROT( KDM1, AB( KD, JINC-KD ), INCX,
+     $                                AB( KD1, JINC-KD ), INCX,
+     $                                D( JINC ), WORK( JINC ) )
+  140                   CONTINUE
+                     END IF
+*
+                  END IF
+*
+                  IF( K.GT.2 ) THEN
+                     IF( K.LE.N-I+1 ) THEN
+*
+*                       generate plane rotation to annihilate a(i+k-1,i)
+*                       within the band
+*
+                        CALL ZLARTG( AB( K-1, I ), AB( K, I ),
+     $                               D( I+K-1 ), WORK( I+K-1 ), TEMP )
+                        AB( K-1, I ) = TEMP
+*
+*                       apply rotation from the left
+*
+                        CALL ZROT( K-3, AB( K-2, I+1 ), LDAB-1,
+     $                             AB( K-1, I+1 ), LDAB-1, D( I+K-1 ),
+     $                             WORK( I+K-1 ) )
+                     END IF
+                     NR = NR + 1
+                     J1 = J1 - KDN - 1
+                  END IF
+*
+*                 apply plane rotations from both sides to diagonal
+*                 blocks
+*
+                  IF( NR.GT.0 )
+     $               CALL ZLAR2V( NR, AB( 1, J1-1 ), AB( 1, J1 ),
+     $                            AB( 2, J1-1 ), INCA, D( J1 ),
+     $                            WORK( J1 ), KD1 )
+*
+*                 apply plane rotations from the right
+*
+*
+*                    Dependent on the the number of diagonals either
+*                    ZLARTV or ZROT is used
+*
+                  CALL ZLACGV( NR, WORK( J1 ), KD1 )
+                  IF( NR.GT.0 ) THEN
+                     IF( NR.GT.2*KD-1 ) THEN
+                        DO 150 L = 1, KD - 1
+                           IF( J2+L.GT.N ) THEN
+                              NRT = NR - 1
+                           ELSE
+                              NRT = NR
+                           END IF
+                           IF( NRT.GT.0 )
+     $                        CALL ZLARTV( NRT, AB( L+2, J1-1 ), INCA,
+     $                                     AB( L+1, J1 ), INCA, D( J1 ),
+     $                                     WORK( J1 ), KD1 )
+  150                   CONTINUE
+                     ELSE
+                        J1END = J1 + KD1*( NR-2 )
+                        IF( J1END.GE.J1 ) THEN
+                           DO 160 J1INC = J1, J1END, KD1
+                              CALL ZROT( KDM1, AB( 3, J1INC-1 ), 1,
+     $                                   AB( 2, J1INC ), 1, D( J1INC ),
+     $                                   WORK( J1INC ) )
+  160                      CONTINUE
+                        END IF
+                        LEND = MIN( KDM1, N-J2 )
+                        LAST = J1END + KD1
+                        IF( LEND.GT.0 )
+     $                     CALL ZROT( LEND, AB( 3, LAST-1 ), 1,
+     $                                AB( 2, LAST ), 1, D( LAST ),
+     $                                WORK( LAST ) )
+                     END IF
+                  END IF
+*
+*
+*
+                  IF( WANTQ ) THEN
+*
+*                    accumulate product of plane rotations in Q
+*
+                     IF( INITQ ) THEN
+*
+*                 take advantage of the fact that Q was
+*                 initially the Identity matrix
+*
+                        IQEND = MAX( IQEND, J2 )
+                        I2 = MAX( 0, K-3 )
+                        IQAEND = 1 + I*KD
+                        IF( K.EQ.2 )
+     $                     IQAEND = IQAEND + KD
+                        IQAEND = MIN( IQAEND, IQEND )
+                        DO 170 J = J1, J2, KD1
+                           IBL = I - I2 / KDM1
+                           I2 = I2 + 1
+                           IQB = MAX( 1, J-IBL )
+                           NQ = 1 + IQAEND - IQB
+                           IQAEND = MIN( IQAEND+KD, IQEND )
+                           CALL ZROT( NQ, Q( IQB, J-1 ), 1, Q( IQB, J ),
+     $                                1, D( J ), WORK( J ) )
+  170                   CONTINUE
+                     ELSE
+*
+                        DO 180 J = J1, J2, KD1
+                           CALL ZROT( N, Q( 1, J-1 ), 1, Q( 1, J ), 1,
+     $                                D( J ), WORK( J ) )
+  180                   CONTINUE
+                     END IF
+                  END IF
+*
+                  IF( J2+KDN.GT.N ) THEN
+*
+*                    adjust J2 to keep within the bounds of the matrix
+*
+                     NR = NR - 1
+                     J2 = J2 - KDN - 1
+                  END IF
+*
+                  DO 190 J = J1, J2, KD1
+*
+*                    create nonzero element a(j+kd,j-1) outside the
+*                    band and store it in WORK
+*
+                     WORK( J+KD ) = WORK( J )*AB( KD1, J )
+                     AB( KD1, J ) = D( J )*AB( KD1, J )
+  190             CONTINUE
+  200          CONTINUE
+  210       CONTINUE
+         END IF
+*
+         IF( KD.GT.0 ) THEN
+*
+*           make off-diagonal elements real and copy them to E
+*
+            DO 220 I = 1, N - 1
+               T = AB( 2, I )
+               ABST = ABS( T )
+               AB( 2, I ) = ABST
+               E( I ) = ABST
+               IF( ABST.NE.ZERO ) THEN
+                  T = T / ABST
+               ELSE
+                  T = CONE
+               END IF
+               IF( I.LT.N-1 )
+     $            AB( 2, I+1 ) = AB( 2, I+1 )*T
+               IF( WANTQ ) THEN
+                  CALL ZSCAL( N, T, Q( 1, I+1 ), 1 )
+               END IF
+  220       CONTINUE
+         ELSE
+*
+*           set E to zero if original matrix was diagonal
+*
+            DO 230 I = 1, N - 1
+               E( I ) = ZERO
+  230       CONTINUE
+         END IF
+*
+*        copy diagonal elements to D
+*
+         DO 240 I = 1, N
+            D( I ) = AB( 1, I )
+  240    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of ZHBTRD
+*
+      END
diff --git a/costa/native/external/lapack/zhecon.f b/costa/native/external/lapack/zhecon.f
new file mode 100644
index 000000000..d65ee3a2e
--- /dev/null
+++ b/costa/native/external/lapack/zhecon.f
@@ -0,0 +1,159 @@
+      SUBROUTINE ZHECON( UPLO, N, A, LDA, IPIV, ANORM, RCOND, WORK,
+     $                   INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, N
+      DOUBLE PRECISION   ANORM, RCOND
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      COMPLEX*16         A( LDA, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZHECON estimates the reciprocal of the condition number of a complex
+*  Hermitian matrix A using the factorization A = U*D*U**H or
+*  A = L*D*L**H computed by ZHETRF.
+*
+*  An estimate is obtained for norm(inv(A)), and the reciprocal of the
+*  condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))).
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the details of the factorization are stored
+*          as an upper or lower triangular matrix.
+*          = 'U':  Upper triangular, form is A = U*D*U**H;
+*          = 'L':  Lower triangular, form is A = L*D*L**H.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input) COMPLEX*16 array, dimension (LDA,N)
+*          The block diagonal matrix D and the multipliers used to
+*          obtain the factor U or L as computed by ZHETRF.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  IPIV    (input) INTEGER array, dimension (N)
+*          Details of the interchanges and the block structure of D
+*          as determined by ZHETRF.
+*
+*  ANORM   (input) DOUBLE PRECISION
+*          The 1-norm of the original matrix A.
+*
+*  RCOND   (output) DOUBLE PRECISION
+*          The reciprocal of the condition number of the matrix A,
+*          computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an
+*          estimate of the 1-norm of inv(A) computed in this routine.
+*
+*  WORK    (workspace) COMPLEX*16 array, dimension (2*N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            I, KASE
+      DOUBLE PRECISION   AINVNM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZHETRS, ZLACON
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -4
+      ELSE IF( ANORM.LT.ZERO ) THEN
+         INFO = -6
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZHECON', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      RCOND = ZERO
+      IF( N.EQ.0 ) THEN
+         RCOND = ONE
+         RETURN
+      ELSE IF( ANORM.LE.ZERO ) THEN
+         RETURN
+      END IF
+*
+*     Check that the diagonal matrix D is nonsingular.
+*
+      IF( UPPER ) THEN
+*
+*        Upper triangular storage: examine D from bottom to top
+*
+         DO 10 I = N, 1, -1
+            IF( IPIV( I ).GT.0 .AND. A( I, I ).EQ.ZERO )
+     $         RETURN
+   10    CONTINUE
+      ELSE
+*
+*        Lower triangular storage: examine D from top to bottom.
+*
+         DO 20 I = 1, N
+            IF( IPIV( I ).GT.0 .AND. A( I, I ).EQ.ZERO )
+     $         RETURN
+   20    CONTINUE
+      END IF
+*
+*     Estimate the 1-norm of the inverse.
+*
+      KASE = 0
+   30 CONTINUE
+      CALL ZLACON( N, WORK( N+1 ), WORK, AINVNM, KASE )
+      IF( KASE.NE.0 ) THEN
+*
+*        Multiply by inv(L*D*L') or inv(U*D*U').
+*
+         CALL ZHETRS( UPLO, N, 1, A, LDA, IPIV, WORK, N, INFO )
+         GO TO 30
+      END IF
+*
+*     Compute the estimate of the reciprocal condition number.
+*
+      IF( AINVNM.NE.ZERO )
+     $   RCOND = ( ONE / AINVNM ) / ANORM
+*
+      RETURN
+*
+*     End of ZHECON
+*
+      END
diff --git a/costa/native/external/lapack/zheev.f b/costa/native/external/lapack/zheev.f
new file mode 100644
index 000000000..240758636
--- /dev/null
+++ b/costa/native/external/lapack/zheev.f
@@ -0,0 +1,220 @@
+      SUBROUTINE ZHEEV( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, RWORK,
+     $                  INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBZ, UPLO
+      INTEGER            INFO, LDA, LWORK, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   RWORK( * ), W( * )
+      COMPLEX*16         A( LDA, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZHEEV computes all eigenvalues and, optionally, eigenvectors of a
+*  complex Hermitian matrix A.
+*
+*  Arguments
+*  =========
+*
+*  JOBZ    (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only;
+*          = 'V':  Compute eigenvalues and eigenvectors.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA, N)
+*          On entry, the Hermitian matrix A.  If UPLO = 'U', the
+*          leading N-by-N upper triangular part of A contains the
+*          upper triangular part of the matrix A.  If UPLO = 'L',
+*          the leading N-by-N lower triangular part of A contains
+*          the lower triangular part of the matrix A.
+*          On exit, if JOBZ = 'V', then if INFO = 0, A contains the
+*          orthonormal eigenvectors of the matrix A.
+*          If JOBZ = 'N', then on exit the lower triangle (if UPLO='L')
+*          or the upper triangle (if UPLO='U') of A, including the
+*          diagonal, is destroyed.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  W       (output) DOUBLE PRECISION array, dimension (N)
+*          If INFO = 0, the eigenvalues in ascending order.
+*
+*  WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The length of the array WORK.  LWORK >= max(1,2*N-1).
+*          For optimal efficiency, LWORK >= (NB+1)*N,
+*          where NB is the blocksize for ZHETRD returned by ILAENV.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  RWORK   (workspace) DOUBLE PRECISION array, dimension (max(1, 3*N-2))
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, the algorithm failed to converge; i
+*                off-diagonal elements of an intermediate tridiagonal
+*                form did not converge to zero.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0 )
+      COMPLEX*16         CONE
+      PARAMETER          ( CONE = ( 1.0D0, 0.0D0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LOWER, LQUERY, WANTZ
+      INTEGER            IINFO, IMAX, INDE, INDTAU, INDWRK, ISCALE,
+     $                   LLWORK, LOPT, LWKOPT, NB
+      DOUBLE PRECISION   ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA,
+     $                   SMLNUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      DOUBLE PRECISION   DLAMCH, ZLANHE
+      EXTERNAL           LSAME, ILAENV, DLAMCH, ZLANHE
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DSCAL, DSTERF, XERBLA, ZHETRD, ZLASCL, ZSTEQR,
+     $                   ZUNGTR
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      WANTZ = LSAME( JOBZ, 'V' )
+      LOWER = LSAME( UPLO, 'L' )
+      LQUERY = ( LWORK.EQ.-1 )
+*
+      INFO = 0
+      IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( LWORK.LT.MAX( 1, 2*N-1 ) .AND. .NOT.LQUERY ) THEN
+         INFO = -8
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         NB = ILAENV( 1, 'ZHETRD', UPLO, N, -1, -1, -1 )
+         LWKOPT = MAX( 1, ( NB+1 )*N )
+         WORK( 1 ) = LWKOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZHEEV ', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      IF( N.EQ.1 ) THEN
+         W( 1 ) = A( 1, 1 )
+         WORK( 1 ) = 3
+         IF( WANTZ )
+     $      A( 1, 1 ) = CONE
+         RETURN
+      END IF
+*
+*     Get machine constants.
+*
+      SAFMIN = DLAMCH( 'Safe minimum' )
+      EPS = DLAMCH( 'Precision' )
+      SMLNUM = SAFMIN / EPS
+      BIGNUM = ONE / SMLNUM
+      RMIN = SQRT( SMLNUM )
+      RMAX = SQRT( BIGNUM )
+*
+*     Scale matrix to allowable range, if necessary.
+*
+      ANRM = ZLANHE( 'M', UPLO, N, A, LDA, RWORK )
+      ISCALE = 0
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN
+         ISCALE = 1
+         SIGMA = RMIN / ANRM
+      ELSE IF( ANRM.GT.RMAX ) THEN
+         ISCALE = 1
+         SIGMA = RMAX / ANRM
+      END IF
+      IF( ISCALE.EQ.1 )
+     $   CALL ZLASCL( UPLO, 0, 0, ONE, SIGMA, N, N, A, LDA, INFO )
+*
+*     Call ZHETRD to reduce Hermitian matrix to tridiagonal form.
+*
+      INDE = 1
+      INDTAU = 1
+      INDWRK = INDTAU + N
+      LLWORK = LWORK - INDWRK + 1
+      CALL ZHETRD( UPLO, N, A, LDA, W, RWORK( INDE ), WORK( INDTAU ),
+     $             WORK( INDWRK ), LLWORK, IINFO )
+      LOPT = N + WORK( INDWRK )
+*
+*     For eigenvalues only, call DSTERF.  For eigenvectors, first call
+*     ZUNGTR to generate the unitary matrix, then call ZSTEQR.
+*
+      IF( .NOT.WANTZ ) THEN
+         CALL DSTERF( N, W, RWORK( INDE ), INFO )
+      ELSE
+         CALL ZUNGTR( UPLO, N, A, LDA, WORK( INDTAU ), WORK( INDWRK ),
+     $                LLWORK, IINFO )
+         INDWRK = INDE + N
+         CALL ZSTEQR( JOBZ, N, W, RWORK( INDE ), A, LDA,
+     $                RWORK( INDWRK ), INFO )
+      END IF
+*
+*     If matrix was scaled, then rescale eigenvalues appropriately.
+*
+      IF( ISCALE.EQ.1 ) THEN
+         IF( INFO.EQ.0 ) THEN
+            IMAX = N
+         ELSE
+            IMAX = INFO - 1
+         END IF
+         CALL DSCAL( IMAX, ONE / SIGMA, W, 1 )
+      END IF
+*
+*     Set WORK(1) to optimal complex workspace size.
+*
+      WORK( 1 ) = LWKOPT
+*
+      RETURN
+*
+*     End of ZHEEV
+*
+      END
diff --git a/costa/native/external/lapack/zheevd.f b/costa/native/external/lapack/zheevd.f
new file mode 100644
index 000000000..c8b5bfb1a
--- /dev/null
+++ b/costa/native/external/lapack/zheevd.f
@@ -0,0 +1,296 @@
+      SUBROUTINE ZHEEVD( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, RWORK,
+     $                   LRWORK, IWORK, LIWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBZ, UPLO
+      INTEGER            INFO, LDA, LIWORK, LRWORK, LWORK, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      DOUBLE PRECISION   RWORK( * ), W( * )
+      COMPLEX*16         A( LDA, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZHEEVD computes all eigenvalues and, optionally, eigenvectors of a
+*  complex Hermitian matrix A.  If eigenvectors are desired, it uses a
+*  divide and conquer algorithm.
+*
+*  The divide and conquer algorithm makes very mild assumptions about
+*  floating point arithmetic. It will work on machines with a guard
+*  digit in add/subtract, or on those binary machines without guard
+*  digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
+*  Cray-2. It could conceivably fail on hexadecimal or decimal machines
+*  without guard digits, but we know of none.
+*
+*  Arguments
+*  =========
+*
+*  JOBZ    (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only;
+*          = 'V':  Compute eigenvalues and eigenvectors.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA, N)
+*          On entry, the Hermitian matrix A.  If UPLO = 'U', the
+*          leading N-by-N upper triangular part of A contains the
+*          upper triangular part of the matrix A.  If UPLO = 'L',
+*          the leading N-by-N lower triangular part of A contains
+*          the lower triangular part of the matrix A.
+*          On exit, if JOBZ = 'V', then if INFO = 0, A contains the
+*          orthonormal eigenvectors of the matrix A.
+*          If JOBZ = 'N', then on exit the lower triangle (if UPLO='L')
+*          or the upper triangle (if UPLO='U') of A, including the
+*          diagonal, is destroyed.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  W       (output) DOUBLE PRECISION array, dimension (N)
+*          If INFO = 0, the eigenvalues in ascending order.
+*
+*  WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The length of the array WORK.
+*          If N <= 1,                LWORK must be at least 1.
+*          If JOBZ  = 'N' and N > 1, LWORK must be at least N + 1.
+*          If JOBZ  = 'V' and N > 1, LWORK must be at least 2*N + N**2.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  RWORK   (workspace/output) DOUBLE PRECISION array,
+*                                         dimension (LRWORK)
+*          On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK.
+*
+*  LRWORK  (input) INTEGER
+*          The dimension of the array RWORK.
+*          If N <= 1,                LRWORK must be at least 1.
+*          If JOBZ  = 'N' and N > 1, LRWORK must be at least N.
+*          If JOBZ  = 'V' and N > 1, LRWORK must be at least
+*                         1 + 5*N + 2*N**2.
+*
+*          If LRWORK = -1, then a workspace query is assumed; the
+*          routine only calculates the optimal size of the RWORK array,
+*          returns this value as the first entry of the RWORK array, and
+*          no error message related to LRWORK is issued by XERBLA.
+*
+*  IWORK   (workspace/output) INTEGER array, dimension (LIWORK)
+*          On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK.
+*
+*  LIWORK  (input) INTEGER
+*          The dimension of the array IWORK.
+*          If N <= 1,                LIWORK must be at least 1.
+*          If JOBZ  = 'N' and N > 1, LIWORK must be at least 1.
+*          If JOBZ  = 'V' and N > 1, LIWORK must be at least 3 + 5*N.
+*
+*          If LIWORK = -1, then a workspace query is assumed; the
+*          routine only calculates the optimal size of the IWORK array,
+*          returns this value as the first entry of the IWORK array, and
+*          no error message related to LIWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, the algorithm failed to converge; i
+*                off-diagonal elements of an intermediate tridiagonal
+*                form did not converge to zero.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Jeff Rutter, Computer Science Division, University of California
+*     at Berkeley, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0 )
+      COMPLEX*16         CONE
+      PARAMETER          ( CONE = ( 1.0D0, 0.0D0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LOWER, LQUERY, WANTZ
+      INTEGER            IINFO, IMAX, INDE, INDRWK, INDTAU, INDWK2,
+     $                   INDWRK, ISCALE, LIOPT, LIWMIN, LLRWK, LLWORK,
+     $                   LLWRK2, LOPT, LROPT, LRWMIN, LWMIN
+      DOUBLE PRECISION   ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA,
+     $                   SMLNUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DLAMCH, ZLANHE
+      EXTERNAL           LSAME, DLAMCH, ZLANHE
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DSCAL, DSTERF, XERBLA, ZHETRD, ZLACPY, ZLASCL,
+     $                   ZSTEDC, ZUNMTR
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          DBLE, INT, MAX, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      WANTZ = LSAME( JOBZ, 'V' )
+      LOWER = LSAME( UPLO, 'L' )
+      LQUERY = ( LWORK.EQ.-1 .OR. LRWORK.EQ.-1 .OR. LIWORK.EQ.-1 )
+*
+      INFO = 0
+      IF( N.LE.1 ) THEN
+         LWMIN = 1
+         LRWMIN = 1
+         LIWMIN = 1
+         LOPT = LWMIN
+         LROPT = LRWMIN
+         LIOPT = LIWMIN
+      ELSE
+         IF( WANTZ ) THEN
+            LWMIN = 2*N + N*N
+            LRWMIN = 1 + 5*N + 2*N**2
+            LIWMIN = 3 + 5*N
+         ELSE
+            LWMIN = N + 1
+            LRWMIN = N
+            LIWMIN = 1
+         END IF
+         LOPT = LWMIN
+         LROPT = LRWMIN
+         LIOPT = LIWMIN
+      END IF
+      IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -8
+      ELSE IF( LRWORK.LT.LRWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -10
+      ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -12
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         WORK( 1 ) = LOPT
+         RWORK( 1 ) = LROPT
+         IWORK( 1 ) = LIOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZHEEVD', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      IF( N.EQ.1 ) THEN
+         W( 1 ) = A( 1, 1 )
+         IF( WANTZ )
+     $      A( 1, 1 ) = CONE
+         RETURN
+      END IF
+*
+*     Get machine constants.
+*
+      SAFMIN = DLAMCH( 'Safe minimum' )
+      EPS = DLAMCH( 'Precision' )
+      SMLNUM = SAFMIN / EPS
+      BIGNUM = ONE / SMLNUM
+      RMIN = SQRT( SMLNUM )
+      RMAX = SQRT( BIGNUM )
+*
+*     Scale matrix to allowable range, if necessary.
+*
+      ANRM = ZLANHE( 'M', UPLO, N, A, LDA, RWORK )
+      ISCALE = 0
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN
+         ISCALE = 1
+         SIGMA = RMIN / ANRM
+      ELSE IF( ANRM.GT.RMAX ) THEN
+         ISCALE = 1
+         SIGMA = RMAX / ANRM
+      END IF
+      IF( ISCALE.EQ.1 )
+     $   CALL ZLASCL( UPLO, 0, 0, ONE, SIGMA, N, N, A, LDA, INFO )
+*
+*     Call ZHETRD to reduce Hermitian matrix to tridiagonal form.
+*
+      INDE = 1
+      INDTAU = 1
+      INDWRK = INDTAU + N
+      INDRWK = INDE + N
+      INDWK2 = INDWRK + N*N
+      LLWORK = LWORK - INDWRK + 1
+      LLWRK2 = LWORK - INDWK2 + 1
+      LLRWK = LRWORK - INDRWK + 1
+      CALL ZHETRD( UPLO, N, A, LDA, W, RWORK( INDE ), WORK( INDTAU ),
+     $             WORK( INDWRK ), LLWORK, IINFO )
+      LOPT = MAX( DBLE( LOPT ), DBLE( N )+DBLE( WORK( INDWRK ) ) )
+*
+*     For eigenvalues only, call DSTERF.  For eigenvectors, first call
+*     ZSTEDC to generate the eigenvector matrix, WORK(INDWRK), of the
+*     tridiagonal matrix, then call ZUNMTR to multiply it to the
+*     Householder transformations represented as Householder vectors in
+*     A.
+*
+      IF( .NOT.WANTZ ) THEN
+         CALL DSTERF( N, W, RWORK( INDE ), INFO )
+      ELSE
+         CALL ZSTEDC( 'I', N, W, RWORK( INDE ), WORK( INDWRK ), N,
+     $                WORK( INDWK2 ), LLWRK2, RWORK( INDRWK ), LLRWK,
+     $                IWORK, LIWORK, INFO )
+         CALL ZUNMTR( 'L', UPLO, 'N', N, N, A, LDA, WORK( INDTAU ),
+     $                WORK( INDWRK ), N, WORK( INDWK2 ), LLWRK2, IINFO )
+         CALL ZLACPY( 'A', N, N, WORK( INDWRK ), N, A, LDA )
+         LOPT = MAX( LOPT, N+N**2+INT( WORK( INDWK2 ) ) )
+      END IF
+*
+*     If matrix was scaled, then rescale eigenvalues appropriately.
+*
+      IF( ISCALE.EQ.1 ) THEN
+         IF( INFO.EQ.0 ) THEN
+            IMAX = N
+         ELSE
+            IMAX = INFO - 1
+         END IF
+         CALL DSCAL( IMAX, ONE / SIGMA, W, 1 )
+      END IF
+*
+      WORK( 1 ) = LOPT
+      RWORK( 1 ) = LROPT
+      IWORK( 1 ) = LIOPT
+*
+      RETURN
+*
+*     End of ZHEEVD
+*
+      END
diff --git a/costa/native/external/lapack/zheevr.f b/costa/native/external/lapack/zheevr.f
new file mode 100644
index 000000000..557aedac5
--- /dev/null
+++ b/costa/native/external/lapack/zheevr.f
@@ -0,0 +1,522 @@
+      SUBROUTINE ZHEEVR( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, IL, IU,
+     $                   ABSTOL, M, W, Z, LDZ, ISUPPZ, WORK, LWORK,
+     $                   RWORK, LRWORK, IWORK, LIWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 20, 2000
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBZ, RANGE, UPLO
+      INTEGER            IL, INFO, IU, LDA, LDZ, LIWORK, LRWORK, LWORK,
+     $                   M, N
+      DOUBLE PRECISION   ABSTOL, VL, VU
+*     ..
+*     .. Array Arguments ..
+      INTEGER            ISUPPZ( * ), IWORK( * )
+      DOUBLE PRECISION   RWORK( * ), W( * )
+      COMPLEX*16         A( LDA, * ), WORK( * ), Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZHEEVR computes selected eigenvalues and, optionally, eigenvectors
+*  of a complex Hermitian matrix T.  Eigenvalues and eigenvectors can
+*  be selected by specifying either a range of values or a range of
+*  indices for the desired eigenvalues.
+*
+*  Whenever possible, ZHEEVR calls ZSTEGR to compute the
+*  eigenspectrum using Relatively Robust Representations.  ZSTEGR
+*  computes eigenvalues by the dqds algorithm, while orthogonal
+*  eigenvectors are computed from various "good" L D L^T representations
+*  (also known as Relatively Robust Representations). Gram-Schmidt
+*  orthogonalization is avoided as far as possible. More specifically,
+*  the various steps of the algorithm are as follows. For the i-th
+*  unreduced block of T,
+*     (a) Compute T - sigma_i = L_i D_i L_i^T, such that L_i D_i L_i^T
+*          is a relatively robust representation,
+*     (b) Compute the eigenvalues, lambda_j, of L_i D_i L_i^T to high
+*         relative accuracy by the dqds algorithm,
+*     (c) If there is a cluster of close eigenvalues, "choose" sigma_i
+*         close to the cluster, and go to step (a),
+*     (d) Given the approximate eigenvalue lambda_j of L_i D_i L_i^T,
+*         compute the corresponding eigenvector by forming a
+*         rank-revealing twisted factorization.
+*  The desired accuracy of the output can be specified by the input
+*  parameter ABSTOL.
+*
+*  For more details, see "A new O(n^2) algorithm for the symmetric
+*  tridiagonal eigenvalue/eigenvector problem", by Inderjit Dhillon,
+*  Computer Science Division Technical Report No. UCB//CSD-97-971,
+*  UC Berkeley, May 1997.
+*
+*
+*  Note 1 : ZHEEVR calls ZSTEGR when the full spectrum is requested
+*  on machines which conform to the ieee-754 floating point standard.
+*  ZHEEVR calls DSTEBZ and ZSTEIN on non-ieee machines and
+*  when partial spectrum requests are made.
+*
+*  Normal execution of ZSTEGR may create NaNs and infinities and
+*  hence may abort due to a floating point exception in environments
+*  which do not handle NaNs and infinities in the ieee standard default
+*  manner.
+*
+*  Arguments
+*  =========
+*
+*  JOBZ    (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only;
+*          = 'V':  Compute eigenvalues and eigenvectors.
+*
+*  RANGE   (input) CHARACTER*1
+*          = 'A': all eigenvalues will be found.
+*          = 'V': all eigenvalues in the half-open interval (VL,VU]
+*                 will be found.
+*          = 'I': the IL-th through IU-th eigenvalues will be found.
+********** For RANGE = 'V' or 'I' and IU - IL < N - 1, DSTEBZ and
+********** ZSTEIN are called
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA, N)
+*          On entry, the Hermitian matrix A.  If UPLO = 'U', the
+*          leading N-by-N upper triangular part of A contains the
+*          upper triangular part of the matrix A.  If UPLO = 'L',
+*          the leading N-by-N lower triangular part of A contains
+*          the lower triangular part of the matrix A.
+*          On exit, the lower triangle (if UPLO='L') or the upper
+*          triangle (if UPLO='U') of A, including the diagonal, is
+*          destroyed.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  VL      (input) DOUBLE PRECISION
+*  VU      (input) DOUBLE PRECISION
+*          If RANGE='V', the lower and upper bounds of the interval to
+*          be searched for eigenvalues. VL < VU.
+*          Not referenced if RANGE = 'A' or 'I'.
+*
+*  IL      (input) INTEGER
+*  IU      (input) INTEGER
+*          If RANGE='I', the indices (in ascending order) of the
+*          smallest and largest eigenvalues to be returned.
+*          1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0.
+*          Not referenced if RANGE = 'A' or 'V'.
+*
+*  ABSTOL  (input) DOUBLE PRECISION
+*          The absolute error tolerance for the eigenvalues.
+*          An approximate eigenvalue is accepted as converged
+*          when it is determined to lie in an interval [a,b]
+*          of width less than or equal to
+*
+*                  ABSTOL + EPS *   max( |a|,|b| ) ,
+*
+*          where EPS is the machine precision.  If ABSTOL is less than
+*          or equal to zero, then  EPS*|T|  will be used in its place,
+*          where |T| is the 1-norm of the tridiagonal matrix obtained
+*          by reducing A to tridiagonal form.
+*
+*          See "Computing Small Singular Values of Bidiagonal Matrices
+*          with Guaranteed High Relative Accuracy," by Demmel and
+*          Kahan, LAPACK Working Note #3.
+*
+*          If high relative accuracy is important, set ABSTOL to
+*          DLAMCH( 'Safe minimum' ).  Doing so will guarantee that
+*          eigenvalues are computed to high relative accuracy when
+*          possible in future releases.  The current code does not
+*          make any guarantees about high relative accuracy, but
+*          furutre releases will. See J. Barlow and J. Demmel,
+*          "Computing Accurate Eigensystems of Scaled Diagonally
+*          Dominant Matrices", LAPACK Working Note #7, for a discussion
+*          of which matrices define their eigenvalues to high relative
+*          accuracy.
+*
+*  M       (output) INTEGER
+*          The total number of eigenvalues found.  0 <= M <= N.
+*          If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1.
+*
+*  W       (output) DOUBLE PRECISION array, dimension (N)
+*          The first M elements contain the selected eigenvalues in
+*          ascending order.
+*
+*  Z       (output) COMPLEX*16 array, dimension (LDZ, max(1,M))
+*          If JOBZ = 'V', then if INFO = 0, the first M columns of Z
+*          contain the orthonormal eigenvectors of the matrix A
+*          corresponding to the selected eigenvalues, with the i-th
+*          column of Z holding the eigenvector associated with W(i).
+*          If JOBZ = 'N', then Z is not referenced.
+*          Note: the user must ensure that at least max(1,M) columns are
+*          supplied in the array Z; if RANGE = 'V', the exact value of M
+*          is not known in advance and an upper bound must be used.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= 1, and if
+*          JOBZ = 'V', LDZ >= max(1,N).
+*
+*  ISUPPZ  (output) INTEGER ARRAY, dimension ( 2*max(1,M) )
+*          The support of the eigenvectors in Z, i.e., the indices
+*          indicating the nonzero elements in Z. The i-th eigenvector
+*          is nonzero only in elements ISUPPZ( 2*i-1 ) through
+*          ISUPPZ( 2*i ).
+********** Implemented only for RANGE = 'A' or 'I' and IU - IL = N - 1
+*
+*  WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The length of the array WORK.  LWORK >= max(1,2*N).
+*          For optimal efficiency, LWORK >= (NB+1)*N,
+*          where NB is the max of the blocksize for ZHETRD and for
+*          ZUNMTR as returned by ILAENV.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  RWORK   (workspace/output) DOUBLE PRECISION array, dimension (LRWORK)
+*          On exit, if INFO = 0, RWORK(1) returns the optimal
+*          (and minimal) LRWORK.
+*
+* LRWORK  (input) INTEGER
+*         The length of the array RWORK.  LRWORK >= max(1,24*N).
+*
+*         If LRWORK = -1, then a workspace query is assumed; the routine
+*         only calculates the optimal size of the RWORK array, returns
+*         this value as the first entry of the RWORK array, and no error
+*         message related to LRWORK is issued by XERBLA.
+*
+*  IWORK   (workspace/output) INTEGER array, dimension (LIWORK)
+*          On exit, if INFO = 0, IWORK(1) returns the optimal
+*          (and minimal) LIWORK.
+*
+* LIWORK  (input) INTEGER
+*         The dimension of the array IWORK.  LIWORK >= max(1,10*N).
+*
+*         If LIWORK = -1, then a workspace query is assumed; the
+*         routine only calculates the optimal size of the IWORK array,
+*         returns this value as the first entry of the IWORK array, and
+*         no error message related to LIWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  Internal error
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Inderjit Dhillon, IBM Almaden, USA
+*     Osni Marques, LBNL/NERSC, USA
+*     Ken Stanley, Computer Science Division, University of
+*       California at Berkeley, USA
+*
+* =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            ALLEIG, INDEIG, LOWER, LQUERY, VALEIG, WANTZ
+      CHARACTER          ORDER
+      INTEGER            I, IEEEOK, IINFO, IMAX, INDIBL, INDIFL, INDISP,
+     $                   INDIWO, INDRD, INDRDD, INDRE, INDREE, INDRWK,
+     $                   INDTAU, INDWK, INDWKN, ISCALE, ITMP1, J, JJ,
+     $                   LIWMIN, LLWORK, LLWRKN, LRWMIN, LWKOPT, LWMIN,
+     $                   NB, NSPLIT
+      DOUBLE PRECISION   ABSTLL, ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN,
+     $                   SIGMA, SMLNUM, TMP1, VLL, VUU
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      DOUBLE PRECISION   DLAMCH, ZLANSY
+      EXTERNAL           LSAME, ILAENV, DLAMCH, ZLANSY
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DCOPY, DSCAL, DSTEBZ, DSTERF, XERBLA, ZDSCAL,
+     $                   ZHETRD, ZSTEGR, ZSTEIN, ZSWAP, ZUNMTR
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          DBLE, MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      IEEEOK = ILAENV( 10, 'ZHEEVR', 'N', 1, 2, 3, 4 )
+*
+      LOWER = LSAME( UPLO, 'L' )
+      WANTZ = LSAME( JOBZ, 'V' )
+      ALLEIG = LSAME( RANGE, 'A' )
+      VALEIG = LSAME( RANGE, 'V' )
+      INDEIG = LSAME( RANGE, 'I' )
+*
+      LQUERY = ( ( LWORK.EQ.-1 ) .OR. ( LRWORK.EQ.-1 ) .OR.
+     $         ( LIWORK.EQ.-1 ) )
+*
+      LRWMIN = MAX( 1, 24*N )
+      LIWMIN = MAX( 1, 10*N )
+      LWMIN = MAX( 1, 2*N )
+*
+      INFO = 0
+      IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -6
+      ELSE
+         IF( VALEIG ) THEN
+            IF( N.GT.0 .AND. VU.LE.VL )
+     $         INFO = -8
+         ELSE IF( INDEIG ) THEN
+            IF( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) THEN
+               INFO = -9
+            ELSE IF( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN
+               INFO = -10
+            END IF
+         END IF
+      END IF
+      IF( INFO.EQ.0 ) THEN
+         IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN
+            INFO = -15
+         ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN
+            INFO = -18
+         ELSE IF( LRWORK.LT.LRWMIN .AND. .NOT.LQUERY ) THEN
+            INFO = -20
+         ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN
+            INFO = -22
+         END IF
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         NB = ILAENV( 1, 'ZHETRD', UPLO, N, -1, -1, -1 )
+         NB = MAX( NB, ILAENV( 1, 'ZUNMTR', UPLO, N, -1, -1, -1 ) )
+         LWKOPT = MAX( ( NB+1 )*N, LWMIN )
+         WORK( 1 ) = LWKOPT
+         RWORK( 1 ) = LRWMIN
+         IWORK( 1 ) = LIWMIN
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZHEEVR', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      M = 0
+      IF( N.EQ.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      IF( N.EQ.1 ) THEN
+         WORK( 1 ) = 7
+         IF( ALLEIG .OR. INDEIG ) THEN
+            M = 1
+            W( 1 ) = DBLE( A( 1, 1 ) )
+         ELSE
+            IF( VL.LT.DBLE( A( 1, 1 ) ) .AND. VU.GE.DBLE( A( 1, 1 ) ) )
+     $           THEN
+               M = 1
+               W( 1 ) = DBLE( A( 1, 1 ) )
+            END IF
+         END IF
+         IF( WANTZ )
+     $      Z( 1, 1 ) = ONE
+         RETURN
+      END IF
+*
+*     Get machine constants.
+*
+      SAFMIN = DLAMCH( 'Safe minimum' )
+      EPS = DLAMCH( 'Precision' )
+      SMLNUM = SAFMIN / EPS
+      BIGNUM = ONE / SMLNUM
+      RMIN = SQRT( SMLNUM )
+      RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) )
+*
+*     Scale matrix to allowable range, if necessary.
+*
+      ISCALE = 0
+      ABSTLL = ABSTOL
+      VLL = VL
+      VUU = VU
+      ANRM = ZLANSY( 'M', UPLO, N, A, LDA, RWORK )
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN
+         ISCALE = 1
+         SIGMA = RMIN / ANRM
+      ELSE IF( ANRM.GT.RMAX ) THEN
+         ISCALE = 1
+         SIGMA = RMAX / ANRM
+      END IF
+      IF( ISCALE.EQ.1 ) THEN
+         IF( LOWER ) THEN
+            DO 10 J = 1, N
+               CALL ZDSCAL( N-J+1, SIGMA, A( J, J ), 1 )
+   10       CONTINUE
+         ELSE
+            DO 20 J = 1, N
+               CALL ZDSCAL( J, SIGMA, A( 1, J ), 1 )
+   20       CONTINUE
+         END IF
+         IF( ABSTOL.GT.0 )
+     $      ABSTLL = ABSTOL*SIGMA
+         IF( VALEIG ) THEN
+            VLL = VL*SIGMA
+            VUU = VU*SIGMA
+         END IF
+      END IF
+*
+*     Call ZHETRD to reduce Hermitian matrix to tridiagonal form.
+*
+      INDTAU = 1
+      INDWK = INDTAU + N
+*
+      INDRE = 1
+      INDRD = INDRE + N
+      INDREE = INDRD + N
+      INDRDD = INDREE + N
+      INDRWK = INDRDD + N
+      LLWORK = LWORK - INDWK + 1
+      CALL ZHETRD( UPLO, N, A, LDA, RWORK( INDRD ), RWORK( INDRE ),
+     $             WORK( INDTAU ), WORK( INDWK ), LLWORK, IINFO )
+*
+*     If all eigenvalues are desired
+*     then call DSTERF or ZSTEGR and ZUNMTR.
+*
+      IF( ( ALLEIG .OR. ( INDEIG .AND. IL.EQ.1 .AND. IU.EQ.N ) ) .AND.
+     $    IEEEOK.EQ.1 ) THEN
+         IF( .NOT.WANTZ ) THEN
+            CALL DCOPY( N, RWORK( INDRD ), 1, W, 1 )
+            CALL DCOPY( N-1, RWORK( INDRE ), 1, RWORK( INDREE ), 1 )
+            CALL DSTERF( N, W, RWORK( INDREE ), INFO )
+         ELSE
+            CALL DCOPY( N-1, RWORK( INDRE ), 1, RWORK( INDREE ), 1 )
+            CALL DCOPY( N, RWORK( INDRD ), 1, RWORK( INDRDD ), 1 )
+*
+            CALL ZSTEGR( JOBZ, 'A', N, RWORK( INDRDD ),
+     $                   RWORK( INDREE ), VL, VU, IL, IU, ABSTOL, M, W,
+     $                   Z, LDZ, ISUPPZ, RWORK( INDRWK ), LWORK, IWORK,
+     $                   LIWORK, INFO )
+*
+*
+*
+*        Apply unitary matrix used in reduction to tridiagonal
+*        form to eigenvectors returned by ZSTEIN.
+*
+            IF( WANTZ .AND. INFO.EQ.0 ) THEN
+               INDWKN = INDWK
+               LLWRKN = LWORK - INDWKN + 1
+               CALL ZUNMTR( 'L', UPLO, 'N', N, M, A, LDA,
+     $                      WORK( INDTAU ), Z, LDZ, WORK( INDWKN ),
+     $                      LLWRKN, IINFO )
+            END IF
+         END IF
+*
+*
+         IF( INFO.EQ.0 ) THEN
+            M = N
+            GO TO 30
+         END IF
+         INFO = 0
+      END IF
+*
+*     Otherwise, call DSTEBZ and, if eigenvectors are desired, ZSTEIN.
+*     Also call DSTEBZ and ZSTEIN if CSTEGR fails.
+*
+      IF( WANTZ ) THEN
+         ORDER = 'B'
+      ELSE
+         ORDER = 'E'
+      END IF
+      INDIFL = 1
+      INDIBL = INDIFL + N
+      INDISP = INDIBL + N
+      INDIWO = INDISP + N
+      CALL DSTEBZ( RANGE, ORDER, N, VLL, VUU, IL, IU, ABSTLL,
+     $             RWORK( INDRD ), RWORK( INDRE ), M, NSPLIT, W,
+     $             IWORK( INDIBL ), IWORK( INDISP ), RWORK( INDRWK ),
+     $             IWORK( INDIWO ), INFO )
+*
+      IF( WANTZ ) THEN
+         CALL ZSTEIN( N, RWORK( INDRD ), RWORK( INDRE ), M, W,
+     $                IWORK( INDIBL ), IWORK( INDISP ), Z, LDZ,
+     $                RWORK( INDRWK ), IWORK( INDIWO ), IWORK( INDIFL ),
+     $                INFO )
+*
+*        Apply unitary matrix used in reduction to tridiagonal
+*        form to eigenvectors returned by ZSTEIN.
+*
+         INDWKN = INDWK
+         LLWRKN = LWORK - INDWKN + 1
+         CALL ZUNMTR( 'L', UPLO, 'N', N, M, A, LDA, WORK( INDTAU ), Z,
+     $                LDZ, WORK( INDWKN ), LLWRKN, IINFO )
+      END IF
+*
+*     If matrix was scaled, then rescale eigenvalues appropriately.
+*
+   30 CONTINUE
+      IF( ISCALE.EQ.1 ) THEN
+         IF( INFO.EQ.0 ) THEN
+            IMAX = M
+         ELSE
+            IMAX = INFO - 1
+         END IF
+         CALL DSCAL( IMAX, ONE / SIGMA, W, 1 )
+      END IF
+*
+*     If eigenvalues are not in order, then sort them, along with
+*     eigenvectors.
+*
+      IF( WANTZ ) THEN
+         DO 50 J = 1, M - 1
+            I = 0
+            TMP1 = W( J )
+            DO 40 JJ = J + 1, M
+               IF( W( JJ ).LT.TMP1 ) THEN
+                  I = JJ
+                  TMP1 = W( JJ )
+               END IF
+   40       CONTINUE
+*
+            IF( I.NE.0 ) THEN
+               ITMP1 = IWORK( INDIBL+I-1 )
+               W( I ) = W( J )
+               IWORK( INDIBL+I-1 ) = IWORK( INDIBL+J-1 )
+               W( J ) = TMP1
+               IWORK( INDIBL+J-1 ) = ITMP1
+               CALL ZSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 )
+            END IF
+   50    CONTINUE
+      END IF
+*
+*     Set WORK(1) to optimal workspace size.
+*
+      WORK( 1 ) = LWKOPT
+      RWORK( 1 ) = LRWMIN
+      IWORK( 1 ) = LIWMIN
+*
+      RETURN
+*
+*     End of ZHEEVR
+*
+      END
diff --git a/costa/native/external/lapack/zheevx.f b/costa/native/external/lapack/zheevx.f
new file mode 100644
index 000000000..6797df556
--- /dev/null
+++ b/costa/native/external/lapack/zheevx.f
@@ -0,0 +1,426 @@
+      SUBROUTINE ZHEEVX( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, IL, IU,
+     $                   ABSTOL, M, W, Z, LDZ, WORK, LWORK, RWORK,
+     $                   IWORK, IFAIL, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBZ, RANGE, UPLO
+      INTEGER            IL, INFO, IU, LDA, LDZ, LWORK, M, N
+      DOUBLE PRECISION   ABSTOL, VL, VU
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IFAIL( * ), IWORK( * )
+      DOUBLE PRECISION   RWORK( * ), W( * )
+      COMPLEX*16         A( LDA, * ), WORK( * ), Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZHEEVX computes selected eigenvalues and, optionally, eigenvectors
+*  of a complex Hermitian matrix A.  Eigenvalues and eigenvectors can
+*  be selected by specifying either a range of values or a range of
+*  indices for the desired eigenvalues.
+*
+*  Arguments
+*  =========
+*
+*  JOBZ    (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only;
+*          = 'V':  Compute eigenvalues and eigenvectors.
+*
+*  RANGE   (input) CHARACTER*1
+*          = 'A': all eigenvalues will be found.
+*          = 'V': all eigenvalues in the half-open interval (VL,VU]
+*                 will be found.
+*          = 'I': the IL-th through IU-th eigenvalues will be found.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA, N)
+*          On entry, the Hermitian matrix A.  If UPLO = 'U', the
+*          leading N-by-N upper triangular part of A contains the
+*          upper triangular part of the matrix A.  If UPLO = 'L',
+*          the leading N-by-N lower triangular part of A contains
+*          the lower triangular part of the matrix A.
+*          On exit, the lower triangle (if UPLO='L') or the upper
+*          triangle (if UPLO='U') of A, including the diagonal, is
+*          destroyed.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  VL      (input) DOUBLE PRECISION
+*  VU      (input) DOUBLE PRECISION
+*          If RANGE='V', the lower and upper bounds of the interval to
+*          be searched for eigenvalues. VL < VU.
+*          Not referenced if RANGE = 'A' or 'I'.
+*
+*  IL      (input) INTEGER
+*  IU      (input) INTEGER
+*          If RANGE='I', the indices (in ascending order) of the
+*          smallest and largest eigenvalues to be returned.
+*          1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0.
+*          Not referenced if RANGE = 'A' or 'V'.
+*
+*  ABSTOL  (input) DOUBLE PRECISION
+*          The absolute error tolerance for the eigenvalues.
+*          An approximate eigenvalue is accepted as converged
+*          when it is determined to lie in an interval [a,b]
+*          of width less than or equal to
+*
+*                  ABSTOL + EPS *   max( |a|,|b| ) ,
+*
+*          where EPS is the machine precision.  If ABSTOL is less than
+*          or equal to zero, then  EPS*|T|  will be used in its place,
+*          where |T| is the 1-norm of the tridiagonal matrix obtained
+*          by reducing A to tridiagonal form.
+*
+*          Eigenvalues will be computed most accurately when ABSTOL is
+*          set to twice the underflow threshold 2*DLAMCH('S'), not zero.
+*          If this routine returns with INFO>0, indicating that some
+*          eigenvectors did not converge, try setting ABSTOL to
+*          2*DLAMCH('S').
+*
+*          See "Computing Small Singular Values of Bidiagonal Matrices
+*          with Guaranteed High Relative Accuracy," by Demmel and
+*          Kahan, LAPACK Working Note #3.
+*
+*  M       (output) INTEGER
+*          The total number of eigenvalues found.  0 <= M <= N.
+*          If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1.
+*
+*  W       (output) DOUBLE PRECISION array, dimension (N)
+*          On normal exit, the first M elements contain the selected
+*          eigenvalues in ascending order.
+*
+*  Z       (output) COMPLEX*16 array, dimension (LDZ, max(1,M))
+*          If JOBZ = 'V', then if INFO = 0, the first M columns of Z
+*          contain the orthonormal eigenvectors of the matrix A
+*          corresponding to the selected eigenvalues, with the i-th
+*          column of Z holding the eigenvector associated with W(i).
+*          If an eigenvector fails to converge, then that column of Z
+*          contains the latest approximation to the eigenvector, and the
+*          index of the eigenvector is returned in IFAIL.
+*          If JOBZ = 'N', then Z is not referenced.
+*          Note: the user must ensure that at least max(1,M) columns are
+*          supplied in the array Z; if RANGE = 'V', the exact value of M
+*          is not known in advance and an upper bound must be used.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= 1, and if
+*          JOBZ = 'V', LDZ >= max(1,N).
+*
+*  WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The length of the array WORK.  LWORK >= max(1,2*N-1).
+*          For optimal efficiency, LWORK >= (NB+1)*N,
+*          where NB is the max of the blocksize for ZHETRD and for
+*          ZUNMTR as returned by ILAENV.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  RWORK   (workspace) DOUBLE PRECISION array, dimension (7*N)
+*
+*  IWORK   (workspace) INTEGER array, dimension (5*N)
+*
+*  IFAIL   (output) INTEGER array, dimension (N)
+*          If JOBZ = 'V', then if INFO = 0, the first M elements of
+*          IFAIL are zero.  If INFO > 0, then IFAIL contains the
+*          indices of the eigenvectors that failed to converge.
+*          If JOBZ = 'N', then IFAIL is not referenced.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, then i eigenvectors failed to converge.
+*                Their indices are stored in array IFAIL.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+      COMPLEX*16         CONE
+      PARAMETER          ( CONE = ( 1.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            ALLEIG, INDEIG, LOWER, LQUERY, VALEIG, WANTZ
+      CHARACTER          ORDER
+      INTEGER            I, IINFO, IMAX, INDD, INDE, INDEE, INDIBL,
+     $                   INDISP, INDIWK, INDRWK, INDTAU, INDWRK, ISCALE,
+     $                   ITMP1, J, JJ, LLWORK, LOPT, LWKOPT, NB, NSPLIT
+      DOUBLE PRECISION   ABSTLL, ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN,
+     $                   SIGMA, SMLNUM, TMP1, VLL, VUU
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      DOUBLE PRECISION   DLAMCH, ZLANHE
+      EXTERNAL           LSAME, ILAENV, DLAMCH, ZLANHE
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DCOPY, DSCAL, DSTEBZ, DSTERF, XERBLA, ZDSCAL,
+     $                   ZHETRD, ZLACPY, ZSTEIN, ZSTEQR, ZSWAP, ZUNGTR,
+     $                   ZUNMTR
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          DBLE, MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      LOWER = LSAME( UPLO, 'L' )
+      WANTZ = LSAME( JOBZ, 'V' )
+      ALLEIG = LSAME( RANGE, 'A' )
+      VALEIG = LSAME( RANGE, 'V' )
+      INDEIG = LSAME( RANGE, 'I' )
+      LQUERY = ( LWORK.EQ.-1 )
+*
+      INFO = 0
+      IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.( LOWER .OR. LSAME( UPLO, 'U' ) ) ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -6
+      ELSE
+         IF( VALEIG ) THEN
+            IF( N.GT.0 .AND. VU.LE.VL )
+     $         INFO = -8
+         ELSE IF( INDEIG ) THEN
+            IF( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) THEN
+               INFO = -9
+            ELSE IF( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN
+               INFO = -10
+            END IF
+         END IF
+      END IF
+      IF( INFO.EQ.0 ) THEN
+         IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN
+            INFO = -15
+         ELSE IF( LWORK.LT.MAX( 1, 2*N-1 ) .AND. .NOT.LQUERY ) THEN
+            INFO = -17
+         END IF
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         NB = ILAENV( 1, 'ZHETRD', UPLO, N, -1, -1, -1 )
+         NB = MAX( NB, ILAENV( 1, 'ZUNMTR', UPLO, N, -1, -1, -1 ) )
+         LWKOPT = ( NB+1 )*N
+         WORK( 1 ) = LWKOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZHEEVX', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      M = 0
+      IF( N.EQ.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      IF( N.EQ.1 ) THEN
+         WORK( 1 ) = 1
+         IF( ALLEIG .OR. INDEIG ) THEN
+            M = 1
+            W( 1 ) = A( 1, 1 )
+         ELSE IF( VALEIG ) THEN
+            IF( VL.LT.DBLE( A( 1, 1 ) ) .AND. VU.GE.DBLE( A( 1, 1 ) ) )
+     $           THEN
+               M = 1
+               W( 1 ) = A( 1, 1 )
+            END IF
+         END IF
+         IF( WANTZ )
+     $      Z( 1, 1 ) = CONE
+         RETURN
+      END IF
+*
+*     Get machine constants.
+*
+      SAFMIN = DLAMCH( 'Safe minimum' )
+      EPS = DLAMCH( 'Precision' )
+      SMLNUM = SAFMIN / EPS
+      BIGNUM = ONE / SMLNUM
+      RMIN = SQRT( SMLNUM )
+      RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) )
+*
+*     Scale matrix to allowable range, if necessary.
+*
+      ISCALE = 0
+      ABSTLL = ABSTOL
+      VLL = VL
+      VUU = VU
+      ANRM = ZLANHE( 'M', UPLO, N, A, LDA, RWORK )
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN
+         ISCALE = 1
+         SIGMA = RMIN / ANRM
+      ELSE IF( ANRM.GT.RMAX ) THEN
+         ISCALE = 1
+         SIGMA = RMAX / ANRM
+      END IF
+      IF( ISCALE.EQ.1 ) THEN
+         IF( LOWER ) THEN
+            DO 10 J = 1, N
+               CALL ZDSCAL( N-J+1, SIGMA, A( J, J ), 1 )
+   10       CONTINUE
+         ELSE
+            DO 20 J = 1, N
+               CALL ZDSCAL( J, SIGMA, A( 1, J ), 1 )
+   20       CONTINUE
+         END IF
+         IF( ABSTOL.GT.0 )
+     $      ABSTLL = ABSTOL*SIGMA
+         IF( VALEIG ) THEN
+            VLL = VL*SIGMA
+            VUU = VU*SIGMA
+         END IF
+      END IF
+*
+*     Call ZHETRD to reduce Hermitian matrix to tridiagonal form.
+*
+      INDD = 1
+      INDE = INDD + N
+      INDRWK = INDE + N
+      INDTAU = 1
+      INDWRK = INDTAU + N
+      LLWORK = LWORK - INDWRK + 1
+      CALL ZHETRD( UPLO, N, A, LDA, RWORK( INDD ), RWORK( INDE ),
+     $             WORK( INDTAU ), WORK( INDWRK ), LLWORK, IINFO )
+      LOPT = N + WORK( INDWRK )
+*
+*     If all eigenvalues are desired and ABSTOL is less than or equal to
+*     zero, then call DSTERF or ZUNGTR and ZSTEQR.  If this fails for
+*     some eigenvalue, then try DSTEBZ.
+*
+      IF( ( ALLEIG .OR. ( INDEIG .AND. IL.EQ.1 .AND. IU.EQ.N ) ) .AND.
+     $    ( ABSTOL.LE.ZERO ) ) THEN
+         CALL DCOPY( N, RWORK( INDD ), 1, W, 1 )
+         INDEE = INDRWK + 2*N
+         IF( .NOT.WANTZ ) THEN
+            CALL DCOPY( N-1, RWORK( INDE ), 1, RWORK( INDEE ), 1 )
+            CALL DSTERF( N, W, RWORK( INDEE ), INFO )
+         ELSE
+            CALL ZLACPY( 'A', N, N, A, LDA, Z, LDZ )
+            CALL ZUNGTR( UPLO, N, Z, LDZ, WORK( INDTAU ),
+     $                   WORK( INDWRK ), LLWORK, IINFO )
+            CALL DCOPY( N-1, RWORK( INDE ), 1, RWORK( INDEE ), 1 )
+            CALL ZSTEQR( JOBZ, N, W, RWORK( INDEE ), Z, LDZ,
+     $                   RWORK( INDRWK ), INFO )
+            IF( INFO.EQ.0 ) THEN
+               DO 30 I = 1, N
+                  IFAIL( I ) = 0
+   30          CONTINUE
+            END IF
+         END IF
+         IF( INFO.EQ.0 ) THEN
+            M = N
+            GO TO 40
+         END IF
+         INFO = 0
+      END IF
+*
+*     Otherwise, call DSTEBZ and, if eigenvectors are desired, ZSTEIN.
+*
+      IF( WANTZ ) THEN
+         ORDER = 'B'
+      ELSE
+         ORDER = 'E'
+      END IF
+      INDIBL = 1
+      INDISP = INDIBL + N
+      INDIWK = INDISP + N
+      CALL DSTEBZ( RANGE, ORDER, N, VLL, VUU, IL, IU, ABSTLL,
+     $             RWORK( INDD ), RWORK( INDE ), M, NSPLIT, W,
+     $             IWORK( INDIBL ), IWORK( INDISP ), RWORK( INDRWK ),
+     $             IWORK( INDIWK ), INFO )
+*
+      IF( WANTZ ) THEN
+         CALL ZSTEIN( N, RWORK( INDD ), RWORK( INDE ), M, W,
+     $                IWORK( INDIBL ), IWORK( INDISP ), Z, LDZ,
+     $                RWORK( INDRWK ), IWORK( INDIWK ), IFAIL, INFO )
+*
+*        Apply unitary matrix used in reduction to tridiagonal
+*        form to eigenvectors returned by ZSTEIN.
+*
+         CALL ZUNMTR( 'L', UPLO, 'N', N, M, A, LDA, WORK( INDTAU ), Z,
+     $                LDZ, WORK( INDWRK ), LLWORK, IINFO )
+      END IF
+*
+*     If matrix was scaled, then rescale eigenvalues appropriately.
+*
+   40 CONTINUE
+      IF( ISCALE.EQ.1 ) THEN
+         IF( INFO.EQ.0 ) THEN
+            IMAX = M
+         ELSE
+            IMAX = INFO - 1
+         END IF
+         CALL DSCAL( IMAX, ONE / SIGMA, W, 1 )
+      END IF
+*
+*     If eigenvalues are not in order, then sort them, along with
+*     eigenvectors.
+*
+      IF( WANTZ ) THEN
+         DO 60 J = 1, M - 1
+            I = 0
+            TMP1 = W( J )
+            DO 50 JJ = J + 1, M
+               IF( W( JJ ).LT.TMP1 ) THEN
+                  I = JJ
+                  TMP1 = W( JJ )
+               END IF
+   50       CONTINUE
+*
+            IF( I.NE.0 ) THEN
+               ITMP1 = IWORK( INDIBL+I-1 )
+               W( I ) = W( J )
+               IWORK( INDIBL+I-1 ) = IWORK( INDIBL+J-1 )
+               W( J ) = TMP1
+               IWORK( INDIBL+J-1 ) = ITMP1
+               CALL ZSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 )
+               IF( INFO.NE.0 ) THEN
+                  ITMP1 = IFAIL( I )
+                  IFAIL( I ) = IFAIL( J )
+                  IFAIL( J ) = ITMP1
+               END IF
+            END IF
+   60    CONTINUE
+      END IF
+*
+*     Set WORK(1) to optimal complex workspace size.
+*
+      WORK( 1 ) = LWKOPT
+*
+      RETURN
+*
+*     End of ZHEEVX
+*
+      END
diff --git a/costa/native/external/lapack/zhegs2.f b/costa/native/external/lapack/zhegs2.f
new file mode 100644
index 000000000..63a5ef387
--- /dev/null
+++ b/costa/native/external/lapack/zhegs2.f
@@ -0,0 +1,225 @@
+      SUBROUTINE ZHEGS2( ITYPE, UPLO, N, A, LDA, B, LDB, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, ITYPE, LDA, LDB, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZHEGS2 reduces a complex Hermitian-definite generalized
+*  eigenproblem to standard form.
+*
+*  If ITYPE = 1, the problem is A*x = lambda*B*x,
+*  and A is overwritten by inv(U')*A*inv(U) or inv(L)*A*inv(L')
+*
+*  If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or
+*  B*A*x = lambda*x, and A is overwritten by U*A*U` or L'*A*L.
+*
+*  B must have been previously factorized as U'*U or L*L' by ZPOTRF.
+*
+*  Arguments
+*  =========
+*
+*  ITYPE   (input) INTEGER
+*          = 1: compute inv(U')*A*inv(U) or inv(L)*A*inv(L');
+*          = 2 or 3: compute U*A*U' or L'*A*L.
+*
+*  UPLO    (input) CHARACTER
+*          Specifies whether the upper or lower triangular part of the
+*          Hermitian matrix A is stored, and how B has been factorized.
+*          = 'U':  Upper triangular
+*          = 'L':  Lower triangular
+*
+*  N       (input) INTEGER
+*          The order of the matrices A and B.  N >= 0.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+*          On entry, the Hermitian matrix A.  If UPLO = 'U', the leading
+*          n by n upper triangular part of A contains the upper
+*          triangular part of the matrix A, and the strictly lower
+*          triangular part of A is not referenced.  If UPLO = 'L', the
+*          leading n by n lower triangular part of A contains the lower
+*          triangular part of the matrix A, and the strictly upper
+*          triangular part of A is not referenced.
+*
+*          On exit, if INFO = 0, the transformed matrix, stored in the
+*          same format as A.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  B       (input) COMPLEX*16 array, dimension (LDB,N)
+*          The triangular factor from the Cholesky factorization of B,
+*          as returned by ZPOTRF.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, HALF
+      PARAMETER          ( ONE = 1.0D+0, HALF = 0.5D+0 )
+      COMPLEX*16         CONE
+      PARAMETER          ( CONE = ( 1.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            K
+      DOUBLE PRECISION   AKK, BKK
+      COMPLEX*16         CT
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZAXPY, ZDSCAL, ZHER2, ZLACGV, ZTRMV,
+     $                   ZTRSV
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( ITYPE.LT.1 .OR. ITYPE.GT.3 ) THEN
+         INFO = -1
+      ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZHEGS2', -INFO )
+         RETURN
+      END IF
+*
+      IF( ITYPE.EQ.1 ) THEN
+         IF( UPPER ) THEN
+*
+*           Compute inv(U')*A*inv(U)
+*
+            DO 10 K = 1, N
+*
+*              Update the upper triangle of A(k:n,k:n)
+*
+               AKK = A( K, K )
+               BKK = B( K, K )
+               AKK = AKK / BKK**2
+               A( K, K ) = AKK
+               IF( K.LT.N ) THEN
+                  CALL ZDSCAL( N-K, ONE / BKK, A( K, K+1 ), LDA )
+                  CT = -HALF*AKK
+                  CALL ZLACGV( N-K, A( K, K+1 ), LDA )
+                  CALL ZLACGV( N-K, B( K, K+1 ), LDB )
+                  CALL ZAXPY( N-K, CT, B( K, K+1 ), LDB, A( K, K+1 ),
+     $                        LDA )
+                  CALL ZHER2( UPLO, N-K, -CONE, A( K, K+1 ), LDA,
+     $                        B( K, K+1 ), LDB, A( K+1, K+1 ), LDA )
+                  CALL ZAXPY( N-K, CT, B( K, K+1 ), LDB, A( K, K+1 ),
+     $                        LDA )
+                  CALL ZLACGV( N-K, B( K, K+1 ), LDB )
+                  CALL ZTRSV( UPLO, 'Conjugate transpose', 'Non-unit',
+     $                        N-K, B( K+1, K+1 ), LDB, A( K, K+1 ),
+     $                        LDA )
+                  CALL ZLACGV( N-K, A( K, K+1 ), LDA )
+               END IF
+   10       CONTINUE
+         ELSE
+*
+*           Compute inv(L)*A*inv(L')
+*
+            DO 20 K = 1, N
+*
+*              Update the lower triangle of A(k:n,k:n)
+*
+               AKK = A( K, K )
+               BKK = B( K, K )
+               AKK = AKK / BKK**2
+               A( K, K ) = AKK
+               IF( K.LT.N ) THEN
+                  CALL ZDSCAL( N-K, ONE / BKK, A( K+1, K ), 1 )
+                  CT = -HALF*AKK
+                  CALL ZAXPY( N-K, CT, B( K+1, K ), 1, A( K+1, K ), 1 )
+                  CALL ZHER2( UPLO, N-K, -CONE, A( K+1, K ), 1,
+     $                        B( K+1, K ), 1, A( K+1, K+1 ), LDA )
+                  CALL ZAXPY( N-K, CT, B( K+1, K ), 1, A( K+1, K ), 1 )
+                  CALL ZTRSV( UPLO, 'No transpose', 'Non-unit', N-K,
+     $                        B( K+1, K+1 ), LDB, A( K+1, K ), 1 )
+               END IF
+   20       CONTINUE
+         END IF
+      ELSE
+         IF( UPPER ) THEN
+*
+*           Compute U*A*U'
+*
+            DO 30 K = 1, N
+*
+*              Update the upper triangle of A(1:k,1:k)
+*
+               AKK = A( K, K )
+               BKK = B( K, K )
+               CALL ZTRMV( UPLO, 'No transpose', 'Non-unit', K-1, B,
+     $                     LDB, A( 1, K ), 1 )
+               CT = HALF*AKK
+               CALL ZAXPY( K-1, CT, B( 1, K ), 1, A( 1, K ), 1 )
+               CALL ZHER2( UPLO, K-1, CONE, A( 1, K ), 1, B( 1, K ), 1,
+     $                     A, LDA )
+               CALL ZAXPY( K-1, CT, B( 1, K ), 1, A( 1, K ), 1 )
+               CALL ZDSCAL( K-1, BKK, A( 1, K ), 1 )
+               A( K, K ) = AKK*BKK**2
+   30       CONTINUE
+         ELSE
+*
+*           Compute L'*A*L
+*
+            DO 40 K = 1, N
+*
+*              Update the lower triangle of A(1:k,1:k)
+*
+               AKK = A( K, K )
+               BKK = B( K, K )
+               CALL ZLACGV( K-1, A( K, 1 ), LDA )
+               CALL ZTRMV( UPLO, 'Conjugate transpose', 'Non-unit', K-1,
+     $                     B, LDB, A( K, 1 ), LDA )
+               CT = HALF*AKK
+               CALL ZLACGV( K-1, B( K, 1 ), LDB )
+               CALL ZAXPY( K-1, CT, B( K, 1 ), LDB, A( K, 1 ), LDA )
+               CALL ZHER2( UPLO, K-1, CONE, A( K, 1 ), LDA, B( K, 1 ),
+     $                     LDB, A, LDA )
+               CALL ZAXPY( K-1, CT, B( K, 1 ), LDB, A( K, 1 ), LDA )
+               CALL ZLACGV( K-1, B( K, 1 ), LDB )
+               CALL ZDSCAL( K-1, BKK, A( K, 1 ), LDA )
+               CALL ZLACGV( K-1, A( K, 1 ), LDA )
+               A( K, K ) = AKK*BKK**2
+   40       CONTINUE
+         END IF
+      END IF
+      RETURN
+*
+*     End of ZHEGS2
+*
+      END
diff --git a/costa/native/external/lapack/zhegst.f b/costa/native/external/lapack/zhegst.f
new file mode 100644
index 000000000..dfaa47af3
--- /dev/null
+++ b/costa/native/external/lapack/zhegst.f
@@ -0,0 +1,260 @@
+      SUBROUTINE ZHEGST( ITYPE, UPLO, N, A, LDA, B, LDB, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, ITYPE, LDA, LDB, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZHEGST reduces a complex Hermitian-definite generalized
+*  eigenproblem to standard form.
+*
+*  If ITYPE = 1, the problem is A*x = lambda*B*x,
+*  and A is overwritten by inv(U**H)*A*inv(U) or inv(L)*A*inv(L**H)
+*
+*  If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or
+*  B*A*x = lambda*x, and A is overwritten by U*A*U**H or L**H*A*L.
+*
+*  B must have been previously factorized as U**H*U or L*L**H by ZPOTRF.
+*
+*  Arguments
+*  =========
+*
+*  ITYPE   (input) INTEGER
+*          = 1: compute inv(U**H)*A*inv(U) or inv(L)*A*inv(L**H);
+*          = 2 or 3: compute U*A*U**H or L**H*A*L.
+*
+*  UPLO    (input) CHARACTER
+*          = 'U':  Upper triangle of A is stored and B is factored as
+*                  U**H*U;
+*          = 'L':  Lower triangle of A is stored and B is factored as
+*                  L*L**H.
+*
+*  N       (input) INTEGER
+*          The order of the matrices A and B.  N >= 0.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+*          On entry, the Hermitian matrix A.  If UPLO = 'U', the leading
+*          N-by-N upper triangular part of A contains the upper
+*          triangular part of the matrix A, and the strictly lower
+*          triangular part of A is not referenced.  If UPLO = 'L', the
+*          leading N-by-N lower triangular part of A contains the lower
+*          triangular part of the matrix A, and the strictly upper
+*          triangular part of A is not referenced.
+*
+*          On exit, if INFO = 0, the transformed matrix, stored in the
+*          same format as A.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  B       (input) COMPLEX*16 array, dimension (LDB,N)
+*          The triangular factor from the Cholesky factorization of B,
+*          as returned by ZPOTRF.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE
+      PARAMETER          ( ONE = 1.0D+0 )
+      COMPLEX*16         CONE, HALF
+      PARAMETER          ( CONE = ( 1.0D+0, 0.0D+0 ),
+     $                   HALF = ( 0.5D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            K, KB, NB
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZHEGS2, ZHEMM, ZHER2K, ZTRMM, ZTRSM
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           LSAME, ILAENV
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( ITYPE.LT.1 .OR. ITYPE.GT.3 ) THEN
+         INFO = -1
+      ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZHEGST', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Determine the block size for this environment.
+*
+      NB = ILAENV( 1, 'ZHEGST', UPLO, N, -1, -1, -1 )
+*
+      IF( NB.LE.1 .OR. NB.GE.N ) THEN
+*
+*        Use unblocked code
+*
+         CALL ZHEGS2( ITYPE, UPLO, N, A, LDA, B, LDB, INFO )
+      ELSE
+*
+*        Use blocked code
+*
+         IF( ITYPE.EQ.1 ) THEN
+            IF( UPPER ) THEN
+*
+*              Compute inv(U')*A*inv(U)
+*
+               DO 10 K = 1, N, NB
+                  KB = MIN( N-K+1, NB )
+*
+*                 Update the upper triangle of A(k:n,k:n)
+*
+                  CALL ZHEGS2( ITYPE, UPLO, KB, A( K, K ), LDA,
+     $                         B( K, K ), LDB, INFO )
+                  IF( K+KB.LE.N ) THEN
+                     CALL ZTRSM( 'Left', UPLO, 'Conjugate transpose',
+     $                           'Non-unit', KB, N-K-KB+1, CONE,
+     $                           B( K, K ), LDB, A( K, K+KB ), LDA )
+                     CALL ZHEMM( 'Left', UPLO, KB, N-K-KB+1, -HALF,
+     $                           A( K, K ), LDA, B( K, K+KB ), LDB,
+     $                           CONE, A( K, K+KB ), LDA )
+                     CALL ZHER2K( UPLO, 'Conjugate transpose', N-K-KB+1,
+     $                            KB, -CONE, A( K, K+KB ), LDA,
+     $                            B( K, K+KB ), LDB, ONE,
+     $                            A( K+KB, K+KB ), LDA )
+                     CALL ZHEMM( 'Left', UPLO, KB, N-K-KB+1, -HALF,
+     $                           A( K, K ), LDA, B( K, K+KB ), LDB,
+     $                           CONE, A( K, K+KB ), LDA )
+                     CALL ZTRSM( 'Right', UPLO, 'No transpose',
+     $                           'Non-unit', KB, N-K-KB+1, CONE,
+     $                           B( K+KB, K+KB ), LDB, A( K, K+KB ),
+     $                           LDA )
+                  END IF
+   10          CONTINUE
+            ELSE
+*
+*              Compute inv(L)*A*inv(L')
+*
+               DO 20 K = 1, N, NB
+                  KB = MIN( N-K+1, NB )
+*
+*                 Update the lower triangle of A(k:n,k:n)
+*
+                  CALL ZHEGS2( ITYPE, UPLO, KB, A( K, K ), LDA,
+     $                         B( K, K ), LDB, INFO )
+                  IF( K+KB.LE.N ) THEN
+                     CALL ZTRSM( 'Right', UPLO, 'Conjugate transpose',
+     $                           'Non-unit', N-K-KB+1, KB, CONE,
+     $                           B( K, K ), LDB, A( K+KB, K ), LDA )
+                     CALL ZHEMM( 'Right', UPLO, N-K-KB+1, KB, -HALF,
+     $                           A( K, K ), LDA, B( K+KB, K ), LDB,
+     $                           CONE, A( K+KB, K ), LDA )
+                     CALL ZHER2K( UPLO, 'No transpose', N-K-KB+1, KB,
+     $                            -CONE, A( K+KB, K ), LDA,
+     $                            B( K+KB, K ), LDB, ONE,
+     $                            A( K+KB, K+KB ), LDA )
+                     CALL ZHEMM( 'Right', UPLO, N-K-KB+1, KB, -HALF,
+     $                           A( K, K ), LDA, B( K+KB, K ), LDB,
+     $                           CONE, A( K+KB, K ), LDA )
+                     CALL ZTRSM( 'Left', UPLO, 'No transpose',
+     $                           'Non-unit', N-K-KB+1, KB, CONE,
+     $                           B( K+KB, K+KB ), LDB, A( K+KB, K ),
+     $                           LDA )
+                  END IF
+   20          CONTINUE
+            END IF
+         ELSE
+            IF( UPPER ) THEN
+*
+*              Compute U*A*U'
+*
+               DO 30 K = 1, N, NB
+                  KB = MIN( N-K+1, NB )
+*
+*                 Update the upper triangle of A(1:k+kb-1,1:k+kb-1)
+*
+                  CALL ZTRMM( 'Left', UPLO, 'No transpose', 'Non-unit',
+     $                        K-1, KB, CONE, B, LDB, A( 1, K ), LDA )
+                  CALL ZHEMM( 'Right', UPLO, K-1, KB, HALF, A( K, K ),
+     $                        LDA, B( 1, K ), LDB, CONE, A( 1, K ),
+     $                        LDA )
+                  CALL ZHER2K( UPLO, 'No transpose', K-1, KB, CONE,
+     $                         A( 1, K ), LDA, B( 1, K ), LDB, ONE, A,
+     $                         LDA )
+                  CALL ZHEMM( 'Right', UPLO, K-1, KB, HALF, A( K, K ),
+     $                        LDA, B( 1, K ), LDB, CONE, A( 1, K ),
+     $                        LDA )
+                  CALL ZTRMM( 'Right', UPLO, 'Conjugate transpose',
+     $                        'Non-unit', K-1, KB, CONE, B( K, K ), LDB,
+     $                        A( 1, K ), LDA )
+                  CALL ZHEGS2( ITYPE, UPLO, KB, A( K, K ), LDA,
+     $                         B( K, K ), LDB, INFO )
+   30          CONTINUE
+            ELSE
+*
+*              Compute L'*A*L
+*
+               DO 40 K = 1, N, NB
+                  KB = MIN( N-K+1, NB )
+*
+*                 Update the lower triangle of A(1:k+kb-1,1:k+kb-1)
+*
+                  CALL ZTRMM( 'Right', UPLO, 'No transpose', 'Non-unit',
+     $                        KB, K-1, CONE, B, LDB, A( K, 1 ), LDA )
+                  CALL ZHEMM( 'Left', UPLO, KB, K-1, HALF, A( K, K ),
+     $                        LDA, B( K, 1 ), LDB, CONE, A( K, 1 ),
+     $                        LDA )
+                  CALL ZHER2K( UPLO, 'Conjugate transpose', K-1, KB,
+     $                         CONE, A( K, 1 ), LDA, B( K, 1 ), LDB,
+     $                         ONE, A, LDA )
+                  CALL ZHEMM( 'Left', UPLO, KB, K-1, HALF, A( K, K ),
+     $                        LDA, B( K, 1 ), LDB, CONE, A( K, 1 ),
+     $                        LDA )
+                  CALL ZTRMM( 'Left', UPLO, 'Conjugate transpose',
+     $                        'Non-unit', KB, K-1, CONE, B( K, K ), LDB,
+     $                        A( K, 1 ), LDA )
+                  CALL ZHEGS2( ITYPE, UPLO, KB, A( K, K ), LDA,
+     $                         B( K, K ), LDB, INFO )
+   40          CONTINUE
+            END IF
+         END IF
+      END IF
+      RETURN
+*
+*     End of ZHEGST
+*
+      END
diff --git a/costa/native/external/lapack/zhegv.f b/costa/native/external/lapack/zhegv.f
new file mode 100644
index 000000000..3bfc95b61
--- /dev/null
+++ b/costa/native/external/lapack/zhegv.f
@@ -0,0 +1,229 @@
+      SUBROUTINE ZHEGV( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, WORK,
+     $                  LWORK, RWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBZ, UPLO
+      INTEGER            INFO, ITYPE, LDA, LDB, LWORK, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   RWORK( * ), W( * )
+      COMPLEX*16         A( LDA, * ), B( LDB, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZHEGV computes all the eigenvalues, and optionally, the eigenvectors
+*  of a complex generalized Hermitian-definite eigenproblem, of the form
+*  A*x=(lambda)*B*x,  A*Bx=(lambda)*x,  or B*A*x=(lambda)*x.
+*  Here A and B are assumed to be Hermitian and B is also
+*  positive definite.
+*
+*  Arguments
+*  =========
+*
+*  ITYPE   (input) INTEGER
+*          Specifies the problem type to be solved:
+*          = 1:  A*x = (lambda)*B*x
+*          = 2:  A*B*x = (lambda)*x
+*          = 3:  B*A*x = (lambda)*x
+*
+*  JOBZ    (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only;
+*          = 'V':  Compute eigenvalues and eigenvectors.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangles of A and B are stored;
+*          = 'L':  Lower triangles of A and B are stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrices A and B.  N >= 0.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA, N)
+*          On entry, the Hermitian matrix A.  If UPLO = 'U', the
+*          leading N-by-N upper triangular part of A contains the
+*          upper triangular part of the matrix A.  If UPLO = 'L',
+*          the leading N-by-N lower triangular part of A contains
+*          the lower triangular part of the matrix A.
+*
+*          On exit, if JOBZ = 'V', then if INFO = 0, A contains the
+*          matrix Z of eigenvectors.  The eigenvectors are normalized
+*          as follows:
+*          if ITYPE = 1 or 2, Z**H*B*Z = I;
+*          if ITYPE = 3, Z**H*inv(B)*Z = I.
+*          If JOBZ = 'N', then on exit the upper triangle (if UPLO='U')
+*          or the lower triangle (if UPLO='L') of A, including the
+*          diagonal, is destroyed.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  B       (input/output) COMPLEX*16 array, dimension (LDB, N)
+*          On entry, the Hermitian positive definite matrix B.
+*          If UPLO = 'U', the leading N-by-N upper triangular part of B
+*          contains the upper triangular part of the matrix B.
+*          If UPLO = 'L', the leading N-by-N lower triangular part of B
+*          contains the lower triangular part of the matrix B.
+*
+*          On exit, if INFO <= N, the part of B containing the matrix is
+*          overwritten by the triangular factor U or L from the Cholesky
+*          factorization B = U**H*U or B = L*L**H.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  W       (output) DOUBLE PRECISION array, dimension (N)
+*          If INFO = 0, the eigenvalues in ascending order.
+*
+*  WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The length of the array WORK.  LWORK >= max(1,2*N-1).
+*          For optimal efficiency, LWORK >= (NB+1)*N,
+*          where NB is the blocksize for ZHETRD returned by ILAENV.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  RWORK   (workspace) DOUBLE PRECISION array, dimension (max(1, 3*N-2))
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  ZPOTRF or ZHEEV returned an error code:
+*             <= N:  if INFO = i, ZHEEV failed to converge;
+*                    i off-diagonal elements of an intermediate
+*                    tridiagonal form did not converge to zero;
+*             > N:   if INFO = N + i, for 1 <= i <= N, then the leading
+*                    minor of order i of B is not positive definite.
+*                    The factorization of B could not be completed and
+*                    no eigenvalues or eigenvectors were computed.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX*16         ONE
+      PARAMETER          ( ONE = ( 1.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY, UPPER, WANTZ
+      CHARACTER          TRANS
+      INTEGER            LWKOPT, NB, NEIG
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           LSAME, ILAENV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZHEEV, ZHEGST, ZPOTRF, ZTRMM, ZTRSM
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      WANTZ = LSAME( JOBZ, 'V' )
+      UPPER = LSAME( UPLO, 'U' )
+      LQUERY = ( LWORK.EQ.-1 )
+*
+      INFO = 0
+      IF( ITYPE.LT.1 .OR. ITYPE.GT.3 ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -6
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -8
+      ELSE IF( LWORK.LT.MAX( 1, 2*N-1 ) .AND. .NOT.LQUERY ) THEN
+         INFO = -11
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         NB = ILAENV( 1, 'ZHETRD', UPLO, N, -1, -1, -1 )
+         LWKOPT = ( NB+1 )*N
+         WORK( 1 ) = LWKOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZHEGV ', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Form a Cholesky factorization of B.
+*
+      CALL ZPOTRF( UPLO, N, B, LDB, INFO )
+      IF( INFO.NE.0 ) THEN
+         INFO = N + INFO
+         RETURN
+      END IF
+*
+*     Transform problem to standard eigenvalue problem and solve.
+*
+      CALL ZHEGST( ITYPE, UPLO, N, A, LDA, B, LDB, INFO )
+      CALL ZHEEV( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, RWORK, INFO )
+*
+      IF( WANTZ ) THEN
+*
+*        Backtransform eigenvectors to the original problem.
+*
+         NEIG = N
+         IF( INFO.GT.0 )
+     $      NEIG = INFO - 1
+         IF( ITYPE.EQ.1 .OR. ITYPE.EQ.2 ) THEN
+*
+*           For A*x=(lambda)*B*x and A*B*x=(lambda)*x;
+*           backtransform eigenvectors: x = inv(L)'*y or inv(U)*y
+*
+            IF( UPPER ) THEN
+               TRANS = 'N'
+            ELSE
+               TRANS = 'C'
+            END IF
+*
+            CALL ZTRSM( 'Left', UPLO, TRANS, 'Non-unit', N, NEIG, ONE,
+     $                  B, LDB, A, LDA )
+*
+         ELSE IF( ITYPE.EQ.3 ) THEN
+*
+*           For B*A*x=(lambda)*x;
+*           backtransform eigenvectors: x = L*y or U'*y
+*
+            IF( UPPER ) THEN
+               TRANS = 'C'
+            ELSE
+               TRANS = 'N'
+            END IF
+*
+            CALL ZTRMM( 'Left', UPLO, TRANS, 'Non-unit', N, NEIG, ONE,
+     $                  B, LDB, A, LDA )
+         END IF
+      END IF
+*
+      WORK( 1 ) = LWKOPT
+*
+      RETURN
+*
+*     End of ZHEGV
+*
+      END
diff --git a/costa/native/external/lapack/zhegvd.f b/costa/native/external/lapack/zhegvd.f
new file mode 100644
index 000000000..5362a33e8
--- /dev/null
+++ b/costa/native/external/lapack/zhegvd.f
@@ -0,0 +1,297 @@
+      SUBROUTINE ZHEGVD( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W, WORK,
+     $                   LWORK, RWORK, LRWORK, IWORK, LIWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBZ, UPLO
+      INTEGER            INFO, ITYPE, LDA, LDB, LIWORK, LRWORK, LWORK, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      DOUBLE PRECISION   RWORK( * ), W( * )
+      COMPLEX*16         A( LDA, * ), B( LDB, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZHEGVD computes all the eigenvalues, and optionally, the eigenvectors
+*  of a complex generalized Hermitian-definite eigenproblem, of the form
+*  A*x=(lambda)*B*x,  A*Bx=(lambda)*x,  or B*A*x=(lambda)*x.  Here A and
+*  B are assumed to be Hermitian and B is also positive definite.
+*  If eigenvectors are desired, it uses a divide and conquer algorithm.
+*
+*  The divide and conquer algorithm makes very mild assumptions about
+*  floating point arithmetic. It will work on machines with a guard
+*  digit in add/subtract, or on those binary machines without guard
+*  digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
+*  Cray-2. It could conceivably fail on hexadecimal or decimal machines
+*  without guard digits, but we know of none.
+*
+*  Arguments
+*  =========
+*
+*  ITYPE   (input) INTEGER
+*          Specifies the problem type to be solved:
+*          = 1:  A*x = (lambda)*B*x
+*          = 2:  A*B*x = (lambda)*x
+*          = 3:  B*A*x = (lambda)*x
+*
+*  JOBZ    (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only;
+*          = 'V':  Compute eigenvalues and eigenvectors.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangles of A and B are stored;
+*          = 'L':  Lower triangles of A and B are stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrices A and B.  N >= 0.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA, N)
+*          On entry, the Hermitian matrix A.  If UPLO = 'U', the
+*          leading N-by-N upper triangular part of A contains the
+*          upper triangular part of the matrix A.  If UPLO = 'L',
+*          the leading N-by-N lower triangular part of A contains
+*          the lower triangular part of the matrix A.
+*
+*          On exit, if JOBZ = 'V', then if INFO = 0, A contains the
+*          matrix Z of eigenvectors.  The eigenvectors are normalized
+*          as follows:
+*          if ITYPE = 1 or 2, Z**H*B*Z = I;
+*          if ITYPE = 3, Z**H*inv(B)*Z = I.
+*          If JOBZ = 'N', then on exit the upper triangle (if UPLO='U')
+*          or the lower triangle (if UPLO='L') of A, including the
+*          diagonal, is destroyed.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  B       (input/output) COMPLEX*16 array, dimension (LDB, N)
+*          On entry, the Hermitian matrix B.  If UPLO = 'U', the
+*          leading N-by-N upper triangular part of B contains the
+*          upper triangular part of the matrix B.  If UPLO = 'L',
+*          the leading N-by-N lower triangular part of B contains
+*          the lower triangular part of the matrix B.
+*
+*          On exit, if INFO <= N, the part of B containing the matrix is
+*          overwritten by the triangular factor U or L from the Cholesky
+*          factorization B = U**H*U or B = L*L**H.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  W       (output) DOUBLE PRECISION array, dimension (N)
+*          If INFO = 0, the eigenvalues in ascending order.
+*
+*  WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The length of the array WORK.
+*          If N <= 1,                LWORK >= 1.
+*          If JOBZ  = 'N' and N > 1, LWORK >= N + 1.
+*          If JOBZ  = 'V' and N > 1, LWORK >= 2*N + N**2.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  RWORK   (workspace/output) DOUBLE PRECISION array, dimension (LRWORK)
+*          On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK.
+*
+*  LRWORK  (input) INTEGER
+*          The dimension of the array RWORK.
+*          If N <= 1,                LRWORK >= 1.
+*          If JOBZ  = 'N' and N > 1, LRWORK >= N.
+*          If JOBZ  = 'V' and N > 1, LRWORK >= 1 + 5*N + 2*N**2.
+*
+*          If LRWORK = -1, then a workspace query is assumed; the
+*          routine only calculates the optimal size of the RWORK array,
+*          returns this value as the first entry of the RWORK array, and
+*          no error message related to LRWORK is issued by XERBLA.
+*
+*  IWORK   (workspace/output) INTEGER array, dimension (LIWORK)
+*          On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK.
+*
+*  LIWORK  (input) INTEGER
+*          The dimension of the array IWORK.
+*          If N <= 1,                LIWORK >= 1.
+*          If JOBZ  = 'N' and N > 1, LIWORK >= 1.
+*          If JOBZ  = 'V' and N > 1, LIWORK >= 3 + 5*N.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  ZPOTRF or ZHEEVD returned an error code:
+*             <= N:  if INFO = i, ZHEEVD failed to converge;
+*                    i off-diagonal elements of an intermediate
+*                    tridiagonal form did not converge to zero;
+*             > N:   if INFO = N + i, for 1 <= i <= N, then the leading
+*                    minor of order i of B is not positive definite.
+*                    The factorization of B could not be completed and
+*                    no eigenvalues or eigenvectors were computed.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX*16         CONE
+      PARAMETER          ( CONE = ( 1.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY, UPPER, WANTZ
+      CHARACTER          TRANS
+      INTEGER            LIOPT, LIWMIN, LOPT, LROPT, LRWMIN, LWMIN, NEIG
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZHEEVD, ZHEGST, ZPOTRF, ZTRMM, ZTRSM
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          DBLE, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      WANTZ = LSAME( JOBZ, 'V' )
+      UPPER = LSAME( UPLO, 'U' )
+      LQUERY = ( LWORK.EQ.-1 .OR. LRWORK.EQ.-1 .OR. LIWORK.EQ.-1 )
+*
+      INFO = 0
+      IF( N.LE.1 ) THEN
+         LWMIN = 1
+         LRWMIN = 1
+         LIWMIN = 1
+         LOPT = LWMIN
+         LROPT = LRWMIN
+         LIOPT = LIWMIN
+      ELSE
+         IF( WANTZ ) THEN
+            LWMIN = 2*N + N*N
+            LRWMIN = 1 + 5*N + 2*N*N
+            LIWMIN = 3 + 5*N
+         ELSE
+            LWMIN = N + 1
+            LRWMIN = N
+            LIWMIN = 1
+         END IF
+         LOPT = LWMIN
+         LROPT = LRWMIN
+         LIOPT = LIWMIN
+      END IF
+      IF( ITYPE.LT.0 .OR. ITYPE.GT.3 ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -6
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -8
+      ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -11
+      ELSE IF( LRWORK.LT.LRWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -13
+      ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -15
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         WORK( 1 ) = LOPT
+         RWORK( 1 ) = LROPT
+         IWORK( 1 ) = LIOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZHEGVD', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Form a Cholesky factorization of B.
+*
+      CALL ZPOTRF( UPLO, N, B, LDB, INFO )
+      IF( INFO.NE.0 ) THEN
+         INFO = N + INFO
+         RETURN
+      END IF
+*
+*     Transform problem to standard eigenvalue problem and solve.
+*
+      CALL ZHEGST( ITYPE, UPLO, N, A, LDA, B, LDB, INFO )
+      CALL ZHEEVD( JOBZ, UPLO, N, A, LDA, W, WORK, LWORK, RWORK, LRWORK,
+     $             IWORK, LIWORK, INFO )
+      LOPT = MAX( DBLE( LOPT ), DBLE( WORK( 1 ) ) )
+      LROPT = MAX( DBLE( LROPT ), DBLE( RWORK( 1 ) ) )
+      LIOPT = MAX( DBLE( LIOPT ), DBLE( IWORK( 1 ) ) )
+*
+      IF( WANTZ ) THEN
+*
+*        Backtransform eigenvectors to the original problem.
+*
+         NEIG = N
+         IF( INFO.GT.0 )
+     $      NEIG = INFO - 1
+         IF( ITYPE.EQ.1 .OR. ITYPE.EQ.2 ) THEN
+*
+*           For A*x=(lambda)*B*x and A*B*x=(lambda)*x;
+*           backtransform eigenvectors: x = inv(L)'*y or inv(U)*y
+*
+            IF( UPPER ) THEN
+               TRANS = 'N'
+            ELSE
+               TRANS = 'C'
+            END IF
+*
+            CALL ZTRSM( 'Left', UPLO, TRANS, 'Non-unit', N, NEIG, CONE,
+     $                  B, LDB, A, LDA )
+*
+         ELSE IF( ITYPE.EQ.3 ) THEN
+*
+*           For B*A*x=(lambda)*x;
+*           backtransform eigenvectors: x = L*y or U'*y
+*
+            IF( UPPER ) THEN
+               TRANS = 'C'
+            ELSE
+               TRANS = 'N'
+            END IF
+*
+            CALL ZTRMM( 'Left', UPLO, TRANS, 'Non-unit', N, NEIG, CONE,
+     $                  B, LDB, A, LDA )
+         END IF
+      END IF
+*
+      WORK( 1 ) = LOPT
+      RWORK( 1 ) = LROPT
+      IWORK( 1 ) = LIOPT
+*
+      RETURN
+*
+*     End of ZHEGVD
+*
+      END
diff --git a/costa/native/external/lapack/zhegvx.f b/costa/native/external/lapack/zhegvx.f
new file mode 100644
index 000000000..c5b67aec1
--- /dev/null
+++ b/costa/native/external/lapack/zhegvx.f
@@ -0,0 +1,330 @@
+      SUBROUTINE ZHEGVX( ITYPE, JOBZ, RANGE, UPLO, N, A, LDA, B, LDB,
+     $                   VL, VU, IL, IU, ABSTOL, M, W, Z, LDZ, WORK,
+     $                   LWORK, RWORK, IWORK, IFAIL, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBZ, RANGE, UPLO
+      INTEGER            IL, INFO, ITYPE, IU, LDA, LDB, LDZ, LWORK, M, N
+      DOUBLE PRECISION   ABSTOL, VL, VU
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IFAIL( * ), IWORK( * )
+      DOUBLE PRECISION   RWORK( * ), W( * )
+      COMPLEX*16         A( LDA, * ), B( LDB, * ), WORK( * ),
+     $                   Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZHEGVX computes selected eigenvalues, and optionally, eigenvectors
+*  of a complex generalized Hermitian-definite eigenproblem, of the form
+*  A*x=(lambda)*B*x,  A*Bx=(lambda)*x,  or B*A*x=(lambda)*x.  Here A and
+*  B are assumed to be Hermitian and B is also positive definite.
+*  Eigenvalues and eigenvectors can be selected by specifying either a
+*  range of values or a range of indices for the desired eigenvalues.
+*
+*  Arguments
+*  =========
+*
+*  ITYPE   (input) INTEGER
+*          Specifies the problem type to be solved:
+*          = 1:  A*x = (lambda)*B*x
+*          = 2:  A*B*x = (lambda)*x
+*          = 3:  B*A*x = (lambda)*x
+*
+*  JOBZ    (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only;
+*          = 'V':  Compute eigenvalues and eigenvectors.
+*
+*  RANGE   (input) CHARACTER*1
+*          = 'A': all eigenvalues will be found.
+*          = 'V': all eigenvalues in the half-open interval (VL,VU]
+*                 will be found.
+*          = 'I': the IL-th through IU-th eigenvalues will be found.
+**
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangles of A and B are stored;
+*          = 'L':  Lower triangles of A and B are stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrices A and B.  N >= 0.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA, N)
+*          On entry, the Hermitian matrix A.  If UPLO = 'U', the
+*          leading N-by-N upper triangular part of A contains the
+*          upper triangular part of the matrix A.  If UPLO = 'L',
+*          the leading N-by-N lower triangular part of A contains
+*          the lower triangular part of the matrix A.
+*
+*          On exit,  the lower triangle (if UPLO='L') or the upper
+*          triangle (if UPLO='U') of A, including the diagonal, is
+*          destroyed.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  B       (input/output) COMPLEX*16 array, dimension (LDB, N)
+*          On entry, the Hermitian matrix B.  If UPLO = 'U', the
+*          leading N-by-N upper triangular part of B contains the
+*          upper triangular part of the matrix B.  If UPLO = 'L',
+*          the leading N-by-N lower triangular part of B contains
+*          the lower triangular part of the matrix B.
+*
+*          On exit, if INFO <= N, the part of B containing the matrix is
+*          overwritten by the triangular factor U or L from the Cholesky
+*          factorization B = U**H*U or B = L*L**H.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  VL      (input) DOUBLE PRECISION
+*  VU      (input) DOUBLE PRECISION
+*          If RANGE='V', the lower and upper bounds of the interval to
+*          be searched for eigenvalues. VL < VU.
+*          Not referenced if RANGE = 'A' or 'I'.
+*
+*  IL      (input) INTEGER
+*  IU      (input) INTEGER
+*          If RANGE='I', the indices (in ascending order) of the
+*          smallest and largest eigenvalues to be returned.
+*          1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0.
+*          Not referenced if RANGE = 'A' or 'V'.
+*
+*  ABSTOL  (input) DOUBLE PRECISION
+*          The absolute error tolerance for the eigenvalues.
+*          An approximate eigenvalue is accepted as converged
+*          when it is determined to lie in an interval [a,b]
+*          of width less than or equal to
+*
+*                  ABSTOL + EPS *   max( |a|,|b| ) ,
+*
+*          where EPS is the machine precision.  If ABSTOL is less than
+*          or equal to zero, then  EPS*|T|  will be used in its place,
+*          where |T| is the 1-norm of the tridiagonal matrix obtained
+*          by reducing A to tridiagonal form.
+*
+*          Eigenvalues will be computed most accurately when ABSTOL is
+*          set to twice the underflow threshold 2*DLAMCH('S'), not zero.
+*          If this routine returns with INFO>0, indicating that some
+*          eigenvectors did not converge, try setting ABSTOL to
+*          2*DLAMCH('S').
+*
+*  M       (output) INTEGER
+*          The total number of eigenvalues found.  0 <= M <= N.
+*          If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1.
+*
+*  W       (output) DOUBLE PRECISION array, dimension (N)
+*          The first M elements contain the selected
+*          eigenvalues in ascending order.
+*
+*  Z       (output) COMPLEX*16 array, dimension (LDZ, max(1,M))
+*          If JOBZ = 'N', then Z is not referenced.
+*          If JOBZ = 'V', then if INFO = 0, the first M columns of Z
+*          contain the orthonormal eigenvectors of the matrix A
+*          corresponding to the selected eigenvalues, with the i-th
+*          column of Z holding the eigenvector associated with W(i).
+*          The eigenvectors are normalized as follows:
+*          if ITYPE = 1 or 2, Z**T*B*Z = I;
+*          if ITYPE = 3, Z**T*inv(B)*Z = I.
+*
+*          If an eigenvector fails to converge, then that column of Z
+*          contains the latest approximation to the eigenvector, and the
+*          index of the eigenvector is returned in IFAIL.
+*          Note: the user must ensure that at least max(1,M) columns are
+*          supplied in the array Z; if RANGE = 'V', the exact value of M
+*          is not known in advance and an upper bound must be used.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= 1, and if
+*          JOBZ = 'V', LDZ >= max(1,N).
+*
+*  WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The length of the array WORK.  LWORK >= max(1,2*N-1).
+*          For optimal efficiency, LWORK >= (NB+1)*N,
+*          where NB is the blocksize for ZHETRD returned by ILAENV.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  RWORK   (workspace) DOUBLE PRECISION array, dimension (7*N)
+*
+*  IWORK   (workspace) INTEGER array, dimension (5*N)
+*
+*  IFAIL   (output) INTEGER array, dimension (N)
+*          If JOBZ = 'V', then if INFO = 0, the first M elements of
+*          IFAIL are zero.  If INFO > 0, then IFAIL contains the
+*          indices of the eigenvectors that failed to converge.
+*          If JOBZ = 'N', then IFAIL is not referenced.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  ZPOTRF or ZHEEVX returned an error code:
+*             <= N:  if INFO = i, ZHEEVX failed to converge;
+*                    i eigenvectors failed to converge.  Their indices
+*                    are stored in array IFAIL.
+*             > N:   if INFO = N + i, for 1 <= i <= N, then the leading
+*                    minor of order i of B is not positive definite.
+*                    The factorization of B could not be completed and
+*                    no eigenvalues or eigenvectors were computed.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX*16         CONE
+      PARAMETER          ( CONE = ( 1.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            ALLEIG, INDEIG, LQUERY, UPPER, VALEIG, WANTZ
+      CHARACTER          TRANS
+      INTEGER            LOPT, LWKOPT, NB
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           LSAME, ILAENV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZHEEVX, ZHEGST, ZPOTRF, ZTRMM, ZTRSM
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      WANTZ = LSAME( JOBZ, 'V' )
+      UPPER = LSAME( UPLO, 'U' )
+      ALLEIG = LSAME( RANGE, 'A' )
+      VALEIG = LSAME( RANGE, 'V' )
+      INDEIG = LSAME( RANGE, 'I' )
+      LQUERY = ( LWORK.EQ.-1 )
+*
+      INFO = 0
+      IF( ITYPE.LT.0 .OR. ITYPE.GT.3 ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN
+         INFO = -3
+      ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN
+         INFO = -4
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -9
+      ELSE IF( VALEIG .AND. N.GT.0 ) THEN
+         IF( VU.LE.VL )
+     $      INFO = -11
+      ELSE IF( INDEIG .AND. IL.LT.1 ) THEN
+         INFO = -12
+      ELSE IF( INDEIG .AND. ( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) ) THEN
+         INFO = -13
+      ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN
+         INFO = -18
+      ELSE IF( LWORK.LT.MAX( 1, 2*N-1 ) .AND. .NOT.LQUERY ) THEN
+         INFO = -20
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         NB = ILAENV( 1, 'ZHETRD', UPLO, N, -1, -1, -1 )
+         LWKOPT = ( NB+1 )*N
+         WORK( 1 ) = LWKOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZHEGVX', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      M = 0
+      IF( N.EQ.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+*     Form a Cholesky factorization of B.
+*
+      CALL ZPOTRF( UPLO, N, B, LDB, INFO )
+      IF( INFO.NE.0 ) THEN
+         INFO = N + INFO
+         RETURN
+      END IF
+*
+*     Transform problem to standard eigenvalue problem and solve.
+*
+      CALL ZHEGST( ITYPE, UPLO, N, A, LDA, B, LDB, INFO )
+      CALL ZHEEVX( JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, IL, IU, ABSTOL,
+     $             M, W, Z, LDZ, WORK, LWORK, RWORK, IWORK, IFAIL,
+     $             INFO )
+      LOPT = WORK( 1 )
+*
+      IF( WANTZ ) THEN
+*
+*        Backtransform eigenvectors to the original problem.
+*
+         IF( INFO.GT.0 )
+     $      M = INFO - 1
+         IF( ITYPE.EQ.1 .OR. ITYPE.EQ.2 ) THEN
+*
+*           For A*x=(lambda)*B*x and A*B*x=(lambda)*x;
+*           backtransform eigenvectors: x = inv(L)'*y or inv(U)*y
+*
+            IF( UPPER ) THEN
+               TRANS = 'N'
+            ELSE
+               TRANS = 'C'
+            END IF
+*
+            CALL ZTRSM( 'Left', UPLO, TRANS, 'Non-unit', N, M, CONE, B,
+     $                  LDB, Z, LDZ )
+*
+         ELSE IF( ITYPE.EQ.3 ) THEN
+*
+*           For B*A*x=(lambda)*x;
+*           backtransform eigenvectors: x = L*y or U'*y
+*
+            IF( UPPER ) THEN
+               TRANS = 'C'
+            ELSE
+               TRANS = 'N'
+            END IF
+*
+            CALL ZTRMM( 'Left', UPLO, TRANS, 'Non-unit', N, M, CONE, B,
+     $                  LDB, Z, LDZ )
+         END IF
+      END IF
+*
+*     Set WORK(1) to optimal complex workspace size.
+*
+      WORK( 1 ) = LWKOPT
+*
+      RETURN
+*
+*     End of ZHEGVX
+*
+      END
diff --git a/costa/native/external/lapack/zherfs.f b/costa/native/external/lapack/zherfs.f
new file mode 100644
index 000000000..456841f1d
--- /dev/null
+++ b/costa/native/external/lapack/zherfs.f
@@ -0,0 +1,339 @@
+      SUBROUTINE ZHERFS( UPLO, N, NRHS, A, LDA, AF, LDAF, IPIV, B, LDB,
+     $                   X, LDX, FERR, BERR, WORK, RWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, LDAF, LDB, LDX, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      DOUBLE PRECISION   BERR( * ), FERR( * ), RWORK( * )
+      COMPLEX*16         A( LDA, * ), AF( LDAF, * ), B( LDB, * ),
+     $                   WORK( * ), X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZHERFS improves the computed solution to a system of linear
+*  equations when the coefficient matrix is Hermitian indefinite, and
+*  provides error bounds and backward error estimates for the solution.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrices B and X.  NRHS >= 0.
+*
+*  A       (input) COMPLEX*16 array, dimension (LDA,N)
+*          The Hermitian matrix A.  If UPLO = 'U', the leading N-by-N
+*          upper triangular part of A contains the upper triangular part
+*          of the matrix A, and the strictly lower triangular part of A
+*          is not referenced.  If UPLO = 'L', the leading N-by-N lower
+*          triangular part of A contains the lower triangular part of
+*          the matrix A, and the strictly upper triangular part of A is
+*          not referenced.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  AF      (input) COMPLEX*16 array, dimension (LDAF,N)
+*          The factored form of the matrix A.  AF contains the block
+*          diagonal matrix D and the multipliers used to obtain the
+*          factor U or L from the factorization A = U*D*U**H or
+*          A = L*D*L**H as computed by ZHETRF.
+*
+*  LDAF    (input) INTEGER
+*          The leading dimension of the array AF.  LDAF >= max(1,N).
+*
+*  IPIV    (input) INTEGER array, dimension (N)
+*          Details of the interchanges and the block structure of D
+*          as determined by ZHETRF.
+*
+*  B       (input) COMPLEX*16 array, dimension (LDB,NRHS)
+*          The right hand side matrix B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  X       (input/output) COMPLEX*16 array, dimension (LDX,NRHS)
+*          On entry, the solution matrix X, as computed by ZHETRS.
+*          On exit, the improved solution matrix X.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(1,N).
+*
+*  FERR    (output) DOUBLE PRECISION array, dimension (NRHS)
+*          The estimated forward error bound for each solution vector
+*          X(j) (the j-th column of the solution matrix X).
+*          If XTRUE is the true solution corresponding to X(j), FERR(j)
+*          is an estimated upper bound for the magnitude of the largest
+*          element in (X(j) - XTRUE) divided by the magnitude of the
+*          largest element in X(j).  The estimate is as reliable as
+*          the estimate for RCOND, and is almost always a slight
+*          overestimate of the true error.
+*
+*  BERR    (output) DOUBLE PRECISION array, dimension (NRHS)
+*          The componentwise relative backward error of each solution
+*          vector X(j) (i.e., the smallest relative change in
+*          any element of A or B that makes X(j) an exact solution).
+*
+*  WORK    (workspace) COMPLEX*16 array, dimension (2*N)
+*
+*  RWORK   (workspace) DOUBLE PRECISION array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Internal Parameters
+*  ===================
+*
+*  ITMAX is the maximum number of steps of iterative refinement.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            ITMAX
+      PARAMETER          ( ITMAX = 5 )
+      DOUBLE PRECISION   ZERO
+      PARAMETER          ( ZERO = 0.0D+0 )
+      COMPLEX*16         ONE
+      PARAMETER          ( ONE = ( 1.0D+0, 0.0D+0 ) )
+      DOUBLE PRECISION   TWO
+      PARAMETER          ( TWO = 2.0D+0 )
+      DOUBLE PRECISION   THREE
+      PARAMETER          ( THREE = 3.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            COUNT, I, J, K, KASE, NZ
+      DOUBLE PRECISION   EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK
+      COMPLEX*16         ZDUM
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZAXPY, ZCOPY, ZHEMV, ZHETRS, ZLACON
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, DIMAG, MAX
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DLAMCH
+      EXTERNAL           LSAME, DLAMCH
+*     ..
+*     .. Statement Functions ..
+      DOUBLE PRECISION   CABS1
+*     ..
+*     .. Statement Function definitions ..
+      CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -10
+      ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+         INFO = -12
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZHERFS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN
+         DO 10 J = 1, NRHS
+            FERR( J ) = ZERO
+            BERR( J ) = ZERO
+   10    CONTINUE
+         RETURN
+      END IF
+*
+*     NZ = maximum number of nonzero elements in each row of A, plus 1
+*
+      NZ = N + 1
+      EPS = DLAMCH( 'Epsilon' )
+      SAFMIN = DLAMCH( 'Safe minimum' )
+      SAFE1 = NZ*SAFMIN
+      SAFE2 = SAFE1 / EPS
+*
+*     Do for each right hand side
+*
+      DO 140 J = 1, NRHS
+*
+         COUNT = 1
+         LSTRES = THREE
+   20    CONTINUE
+*
+*        Loop until stopping criterion is satisfied.
+*
+*        Compute residual R = B - A * X
+*
+         CALL ZCOPY( N, B( 1, J ), 1, WORK, 1 )
+         CALL ZHEMV( UPLO, N, -ONE, A, LDA, X( 1, J ), 1, ONE, WORK, 1 )
+*
+*        Compute componentwise relative backward error from formula
+*
+*        max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) )
+*
+*        where abs(Z) is the componentwise absolute value of the matrix
+*        or vector Z.  If the i-th component of the denominator is less
+*        than SAFE2, then SAFE1 is added to the i-th components of the
+*        numerator and denominator before dividing.
+*
+         DO 30 I = 1, N
+            RWORK( I ) = CABS1( B( I, J ) )
+   30    CONTINUE
+*
+*        Compute abs(A)*abs(X) + abs(B).
+*
+         IF( UPPER ) THEN
+            DO 50 K = 1, N
+               S = ZERO
+               XK = CABS1( X( K, J ) )
+               DO 40 I = 1, K - 1
+                  RWORK( I ) = RWORK( I ) + CABS1( A( I, K ) )*XK
+                  S = S + CABS1( A( I, K ) )*CABS1( X( I, J ) )
+   40          CONTINUE
+               RWORK( K ) = RWORK( K ) + ABS( DBLE( A( K, K ) ) )*XK + S
+   50       CONTINUE
+         ELSE
+            DO 70 K = 1, N
+               S = ZERO
+               XK = CABS1( X( K, J ) )
+               RWORK( K ) = RWORK( K ) + ABS( DBLE( A( K, K ) ) )*XK
+               DO 60 I = K + 1, N
+                  RWORK( I ) = RWORK( I ) + CABS1( A( I, K ) )*XK
+                  S = S + CABS1( A( I, K ) )*CABS1( X( I, J ) )
+   60          CONTINUE
+               RWORK( K ) = RWORK( K ) + S
+   70       CONTINUE
+         END IF
+         S = ZERO
+         DO 80 I = 1, N
+            IF( RWORK( I ).GT.SAFE2 ) THEN
+               S = MAX( S, CABS1( WORK( I ) ) / RWORK( I ) )
+            ELSE
+               S = MAX( S, ( CABS1( WORK( I ) )+SAFE1 ) /
+     $             ( RWORK( I )+SAFE1 ) )
+            END IF
+   80    CONTINUE
+         BERR( J ) = S
+*
+*        Test stopping criterion. Continue iterating if
+*           1) The residual BERR(J) is larger than machine epsilon, and
+*           2) BERR(J) decreased by at least a factor of 2 during the
+*              last iteration, and
+*           3) At most ITMAX iterations tried.
+*
+         IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND.
+     $       COUNT.LE.ITMAX ) THEN
+*
+*           Update solution and try again.
+*
+            CALL ZHETRS( UPLO, N, 1, AF, LDAF, IPIV, WORK, N, INFO )
+            CALL ZAXPY( N, ONE, WORK, 1, X( 1, J ), 1 )
+            LSTRES = BERR( J )
+            COUNT = COUNT + 1
+            GO TO 20
+         END IF
+*
+*        Bound error from formula
+*
+*        norm(X - XTRUE) / norm(X) .le. FERR =
+*        norm( abs(inv(A))*
+*           ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X)
+*
+*        where
+*          norm(Z) is the magnitude of the largest component of Z
+*          inv(A) is the inverse of A
+*          abs(Z) is the componentwise absolute value of the matrix or
+*             vector Z
+*          NZ is the maximum number of nonzeros in any row of A, plus 1
+*          EPS is machine epsilon
+*
+*        The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B))
+*        is incremented by SAFE1 if the i-th component of
+*        abs(A)*abs(X) + abs(B) is less than SAFE2.
+*
+*        Use ZLACON to estimate the infinity-norm of the matrix
+*           inv(A) * diag(W),
+*        where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) )))
+*
+         DO 90 I = 1, N
+            IF( RWORK( I ).GT.SAFE2 ) THEN
+               RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I )
+            ELSE
+               RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) +
+     $                      SAFE1
+            END IF
+   90    CONTINUE
+*
+         KASE = 0
+  100    CONTINUE
+         CALL ZLACON( N, WORK( N+1 ), WORK, FERR( J ), KASE )
+         IF( KASE.NE.0 ) THEN
+            IF( KASE.EQ.1 ) THEN
+*
+*              Multiply by diag(W)*inv(A').
+*
+               CALL ZHETRS( UPLO, N, 1, AF, LDAF, IPIV, WORK, N, INFO )
+               DO 110 I = 1, N
+                  WORK( I ) = RWORK( I )*WORK( I )
+  110          CONTINUE
+            ELSE IF( KASE.EQ.2 ) THEN
+*
+*              Multiply by inv(A)*diag(W).
+*
+               DO 120 I = 1, N
+                  WORK( I ) = RWORK( I )*WORK( I )
+  120          CONTINUE
+               CALL ZHETRS( UPLO, N, 1, AF, LDAF, IPIV, WORK, N, INFO )
+            END IF
+            GO TO 100
+         END IF
+*
+*        Normalize error.
+*
+         LSTRES = ZERO
+         DO 130 I = 1, N
+            LSTRES = MAX( LSTRES, CABS1( X( I, J ) ) )
+  130    CONTINUE
+         IF( LSTRES.NE.ZERO )
+     $      FERR( J ) = FERR( J ) / LSTRES
+*
+  140 CONTINUE
+*
+      RETURN
+*
+*     End of ZHERFS
+*
+      END
diff --git a/costa/native/external/lapack/zhesv.f b/costa/native/external/lapack/zhesv.f
new file mode 100644
index 000000000..18fa775c8
--- /dev/null
+++ b/costa/native/external/lapack/zhesv.f
@@ -0,0 +1,171 @@
+      SUBROUTINE ZHESV( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK,
+     $                  LWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, LDB, LWORK, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      COMPLEX*16         A( LDA, * ), B( LDB, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZHESV computes the solution to a complex system of linear equations
+*     A * X = B,
+*  where A is an N-by-N Hermitian matrix and X and B are N-by-NRHS
+*  matrices.
+*
+*  The diagonal pivoting method is used to factor A as
+*     A = U * D * U**H,  if UPLO = 'U', or
+*     A = L * D * L**H,  if UPLO = 'L',
+*  where U (or L) is a product of permutation and unit upper (lower)
+*  triangular matrices, and D is Hermitian and block diagonal with
+*  1-by-1 and 2-by-2 diagonal blocks.  The factored form of A is then
+*  used to solve the system of equations A * X = B.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The number of linear equations, i.e., the order of the
+*          matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+*          On entry, the Hermitian matrix A.  If UPLO = 'U', the leading
+*          N-by-N upper triangular part of A contains the upper
+*          triangular part of the matrix A, and the strictly lower
+*          triangular part of A is not referenced.  If UPLO = 'L', the
+*          leading N-by-N lower triangular part of A contains the lower
+*          triangular part of the matrix A, and the strictly upper
+*          triangular part of A is not referenced.
+*
+*          On exit, if INFO = 0, the block diagonal matrix D and the
+*          multipliers used to obtain the factor U or L from the
+*          factorization A = U*D*U**H or A = L*D*L**H as computed by
+*          ZHETRF.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  IPIV    (output) INTEGER array, dimension (N)
+*          Details of the interchanges and the block structure of D, as
+*          determined by ZHETRF.  If IPIV(k) > 0, then rows and columns
+*          k and IPIV(k) were interchanged, and D(k,k) is a 1-by-1
+*          diagonal block.  If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0,
+*          then rows and columns k-1 and -IPIV(k) were interchanged and
+*          D(k-1:k,k-1:k) is a 2-by-2 diagonal block.  If UPLO = 'L' and
+*          IPIV(k) = IPIV(k+1) < 0, then rows and columns k+1 and
+*          -IPIV(k) were interchanged and D(k:k+1,k:k+1) is a 2-by-2
+*          diagonal block.
+*
+*  B       (input/output) COMPLEX*16 array, dimension (LDB,NRHS)
+*          On entry, the N-by-NRHS right hand side matrix B.
+*          On exit, if INFO = 0, the N-by-NRHS solution matrix X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The length of WORK.  LWORK >= 1, and for best performance
+*          LWORK >= N*NB, where NB is the optimal blocksize for
+*          ZHETRF.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*          > 0: if INFO = i, D(i,i) is exactly zero.  The factorization
+*               has been completed, but the block diagonal matrix D is
+*               exactly singular, so the solution could not be computed.
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            LWKOPT, NB
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           LSAME, ILAENV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZHETRF, ZHETRS
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -8
+      ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN
+         INFO = -10
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         NB = ILAENV( 1, 'ZHETRF', UPLO, N, -1, -1, -1 )
+         LWKOPT = N*NB
+         WORK( 1 ) = LWKOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZHESV ', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Compute the factorization A = U*D*U' or A = L*D*L'.
+*
+      CALL ZHETRF( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO )
+      IF( INFO.EQ.0 ) THEN
+*
+*        Solve the system A*X = B, overwriting B with X.
+*
+         CALL ZHETRS( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, INFO )
+*
+      END IF
+*
+      WORK( 1 ) = LWKOPT
+*
+      RETURN
+*
+*     End of ZHESV
+*
+      END
diff --git a/costa/native/external/lapack/zhesvx.f b/costa/native/external/lapack/zhesvx.f
new file mode 100644
index 000000000..399f14864
--- /dev/null
+++ b/costa/native/external/lapack/zhesvx.f
@@ -0,0 +1,299 @@
+      SUBROUTINE ZHESVX( FACT, UPLO, N, NRHS, A, LDA, AF, LDAF, IPIV, B,
+     $                   LDB, X, LDX, RCOND, FERR, BERR, WORK, LWORK,
+     $                   RWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          FACT, UPLO
+      INTEGER            INFO, LDA, LDAF, LDB, LDX, LWORK, N, NRHS
+      DOUBLE PRECISION   RCOND
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      DOUBLE PRECISION   BERR( * ), FERR( * ), RWORK( * )
+      COMPLEX*16         A( LDA, * ), AF( LDAF, * ), B( LDB, * ),
+     $                   WORK( * ), X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZHESVX uses the diagonal pivoting factorization to compute the
+*  solution to a complex system of linear equations A * X = B,
+*  where A is an N-by-N Hermitian matrix and X and B are N-by-NRHS
+*  matrices.
+*
+*  Error bounds on the solution and a condition estimate are also
+*  provided.
+*
+*  Description
+*  ===========
+*
+*  The following steps are performed:
+*
+*  1. If FACT = 'N', the diagonal pivoting method is used to factor A.
+*     The form of the factorization is
+*        A = U * D * U**H,  if UPLO = 'U', or
+*        A = L * D * L**H,  if UPLO = 'L',
+*     where U (or L) is a product of permutation and unit upper (lower)
+*     triangular matrices, and D is Hermitian and block diagonal with
+*     1-by-1 and 2-by-2 diagonal blocks.
+*
+*  2. If some D(i,i)=0, so that D is exactly singular, then the routine
+*     returns with INFO = i. Otherwise, the factored form of A is used
+*     to estimate the condition number of the matrix A.  If the
+*     reciprocal of the condition number is less than machine precision,
+*     INFO = N+1 is returned as a warning, but the routine still goes on
+*     to solve for X and compute error bounds as described below.
+*
+*  3. The system of equations is solved for X using the factored form
+*     of A.
+*
+*  4. Iterative refinement is applied to improve the computed solution
+*     matrix and calculate error bounds and backward error estimates
+*     for it.
+*
+*  Arguments
+*  =========
+*
+*  FACT    (input) CHARACTER*1
+*          Specifies whether or not the factored form of A has been
+*          supplied on entry.
+*          = 'F':  On entry, AF and IPIV contain the factored form
+*                  of A.  A, AF and IPIV will not be modified.
+*          = 'N':  The matrix A will be copied to AF and factored.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The number of linear equations, i.e., the order of the
+*          matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrices B and X.  NRHS >= 0.
+*
+*  A       (input) COMPLEX*16 array, dimension (LDA,N)
+*          The Hermitian matrix A.  If UPLO = 'U', the leading N-by-N
+*          upper triangular part of A contains the upper triangular part
+*          of the matrix A, and the strictly lower triangular part of A
+*          is not referenced.  If UPLO = 'L', the leading N-by-N lower
+*          triangular part of A contains the lower triangular part of
+*          the matrix A, and the strictly upper triangular part of A is
+*          not referenced.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  AF      (input or output) COMPLEX*16 array, dimension (LDAF,N)
+*          If FACT = 'F', then AF is an input argument and on entry
+*          contains the block diagonal matrix D and the multipliers used
+*          to obtain the factor U or L from the factorization
+*          A = U*D*U**H or A = L*D*L**H as computed by ZHETRF.
+*
+*          If FACT = 'N', then AF is an output argument and on exit
+*          returns the block diagonal matrix D and the multipliers used
+*          to obtain the factor U or L from the factorization
+*          A = U*D*U**H or A = L*D*L**H.
+*
+*  LDAF    (input) INTEGER
+*          The leading dimension of the array AF.  LDAF >= max(1,N).
+*
+*  IPIV    (input or output) INTEGER array, dimension (N)
+*          If FACT = 'F', then IPIV is an input argument and on entry
+*          contains details of the interchanges and the block structure
+*          of D, as determined by ZHETRF.
+*          If IPIV(k) > 0, then rows and columns k and IPIV(k) were
+*          interchanged and D(k,k) is a 1-by-1 diagonal block.
+*          If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and
+*          columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k)
+*          is a 2-by-2 diagonal block.  If UPLO = 'L' and IPIV(k) =
+*          IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were
+*          interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block.
+*
+*          If FACT = 'N', then IPIV is an output argument and on exit
+*          contains details of the interchanges and the block structure
+*          of D, as determined by ZHETRF.
+*
+*  B       (input) COMPLEX*16 array, dimension (LDB,NRHS)
+*          The N-by-NRHS right hand side matrix B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  X       (output) COMPLEX*16 array, dimension (LDX,NRHS)
+*          If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(1,N).
+*
+*  RCOND   (output) DOUBLE PRECISION
+*          The estimate of the reciprocal condition number of the matrix
+*          A.  If RCOND is less than the machine precision (in
+*          particular, if RCOND = 0), the matrix is singular to working
+*          precision.  This condition is indicated by a return code of
+*          INFO > 0.
+*
+*  FERR    (output) DOUBLE PRECISION array, dimension (NRHS)
+*          The estimated forward error bound for each solution vector
+*          X(j) (the j-th column of the solution matrix X).
+*          If XTRUE is the true solution corresponding to X(j), FERR(j)
+*          is an estimated upper bound for the magnitude of the largest
+*          element in (X(j) - XTRUE) divided by the magnitude of the
+*          largest element in X(j).  The estimate is as reliable as
+*          the estimate for RCOND, and is almost always a slight
+*          overestimate of the true error.
+*
+*  BERR    (output) DOUBLE PRECISION array, dimension (NRHS)
+*          The componentwise relative backward error of each solution
+*          vector X(j) (i.e., the smallest relative change in
+*          any element of A or B that makes X(j) an exact solution).
+*
+*  WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The length of WORK.  LWORK >= 2*N, and for best performance
+*          LWORK >= N*NB, where NB is the optimal blocksize for
+*          ZHETRF.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  RWORK   (workspace) DOUBLE PRECISION array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*          > 0: if INFO = i, and i is
+*                <= N:  D(i,i) is exactly zero.  The factorization
+*                       has been completed but the factor D is exactly
+*                       singular, so the solution and error bounds could
+*                       not be computed. RCOND = 0 is returned.
+*                = N+1: D is nonsingular, but RCOND is less than machine
+*                       precision, meaning that the matrix is singular
+*                       to working precision.  Nevertheless, the
+*                       solution and error bounds are computed because
+*                       there are a number of situations where the
+*                       computed solution can be more accurate than the
+*                       value of RCOND would suggest.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO
+      PARAMETER          ( ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY, NOFACT
+      INTEGER            LWKOPT, NB
+      DOUBLE PRECISION   ANORM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      DOUBLE PRECISION   DLAMCH, ZLANHE
+      EXTERNAL           LSAME, ILAENV, DLAMCH, ZLANHE
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZHECON, ZHERFS, ZHETRF, ZHETRS, ZLACPY
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      NOFACT = LSAME( FACT, 'N' )
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( .NOT.NOFACT .AND. .NOT.LSAME( FACT, 'F' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) )
+     $          THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -6
+      ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN
+         INFO = -8
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -11
+      ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+         INFO = -13
+      ELSE IF( LWORK.LT.MAX( 1, 2*N ) .AND. .NOT.LQUERY ) THEN
+         INFO = -18
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         NB = ILAENV( 1, 'ZHETRF', UPLO, N, -1, -1, -1 )
+         LWKOPT = N*NB
+         WORK( 1 ) = LWKOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZHESVX', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+      IF( NOFACT ) THEN
+*
+*        Compute the factorization A = U*D*U' or A = L*D*L'.
+*
+         CALL ZLACPY( UPLO, N, N, A, LDA, AF, LDAF )
+         CALL ZHETRF( UPLO, N, AF, LDAF, IPIV, WORK, LWORK, INFO )
+*
+*        Return if INFO is non-zero.
+*
+         IF( INFO.NE.0 ) THEN
+            IF( INFO.GT.0 )
+     $         RCOND = ZERO
+            RETURN
+         END IF
+      END IF
+*
+*     Compute the norm of the matrix A.
+*
+      ANORM = ZLANHE( 'I', UPLO, N, A, LDA, RWORK )
+*
+*     Compute the reciprocal of the condition number of A.
+*
+      CALL ZHECON( UPLO, N, AF, LDAF, IPIV, ANORM, RCOND, WORK, INFO )
+*
+*     Set INFO = N+1 if the matrix is singular to working precision.
+*
+      IF( RCOND.LT.DLAMCH( 'Epsilon' ) )
+     $   INFO = N + 1
+*
+*     Compute the solution vectors X.
+*
+      CALL ZLACPY( 'Full', N, NRHS, B, LDB, X, LDX )
+      CALL ZHETRS( UPLO, N, NRHS, AF, LDAF, IPIV, X, LDX, INFO )
+*
+*     Use iterative refinement to improve the computed solutions and
+*     compute error bounds and backward error estimates for them.
+*
+      CALL ZHERFS( UPLO, N, NRHS, A, LDA, AF, LDAF, IPIV, B, LDB, X,
+     $             LDX, FERR, BERR, WORK, RWORK, INFO )
+*
+      WORK( 1 ) = LWKOPT
+*
+      RETURN
+*
+*     End of ZHESVX
+*
+      END
diff --git a/costa/native/external/lapack/zhetd2.f b/costa/native/external/lapack/zhetd2.f
new file mode 100644
index 000000000..cbf4b3187
--- /dev/null
+++ b/costa/native/external/lapack/zhetd2.f
@@ -0,0 +1,259 @@
+      SUBROUTINE ZHETD2( UPLO, N, A, LDA, D, E, TAU, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   D( * ), E( * )
+      COMPLEX*16         A( LDA, * ), TAU( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZHETD2 reduces a complex Hermitian matrix A to real symmetric
+*  tridiagonal form T by a unitary similarity transformation:
+*  Q' * A * Q = T.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the upper or lower triangular part of the
+*          Hermitian matrix A is stored:
+*          = 'U':  Upper triangular
+*          = 'L':  Lower triangular
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+*          On entry, the Hermitian matrix A.  If UPLO = 'U', the leading
+*          n-by-n upper triangular part of A contains the upper
+*          triangular part of the matrix A, and the strictly lower
+*          triangular part of A is not referenced.  If UPLO = 'L', the
+*          leading n-by-n lower triangular part of A contains the lower
+*          triangular part of the matrix A, and the strictly upper
+*          triangular part of A is not referenced.
+*          On exit, if UPLO = 'U', the diagonal and first superdiagonal
+*          of A are overwritten by the corresponding elements of the
+*          tridiagonal matrix T, and the elements above the first
+*          superdiagonal, with the array TAU, represent the unitary
+*          matrix Q as a product of elementary reflectors; if UPLO
+*          = 'L', the diagonal and first subdiagonal of A are over-
+*          written by the corresponding elements of the tridiagonal
+*          matrix T, and the elements below the first subdiagonal, with
+*          the array TAU, represent the unitary matrix Q as a product
+*          of elementary reflectors. See Further Details.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  D       (output) DOUBLE PRECISION array, dimension (N)
+*          The diagonal elements of the tridiagonal matrix T:
+*          D(i) = A(i,i).
+*
+*  E       (output) DOUBLE PRECISION array, dimension (N-1)
+*          The off-diagonal elements of the tridiagonal matrix T:
+*          E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'.
+*
+*  TAU     (output) COMPLEX*16 array, dimension (N-1)
+*          The scalar factors of the elementary reflectors (see Further
+*          Details).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*
+*  Further Details
+*  ===============
+*
+*  If UPLO = 'U', the matrix Q is represented as a product of elementary
+*  reflectors
+*
+*     Q = H(n-1) . . . H(2) H(1).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - tau * v * v'
+*
+*  where tau is a complex scalar, and v is a complex vector with
+*  v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in
+*  A(1:i-1,i+1), and tau in TAU(i).
+*
+*  If UPLO = 'L', the matrix Q is represented as a product of elementary
+*  reflectors
+*
+*     Q = H(1) H(2) . . . H(n-1).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - tau * v * v'
+*
+*  where tau is a complex scalar, and v is a complex vector with
+*  v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i),
+*  and tau in TAU(i).
+*
+*  The contents of A on exit are illustrated by the following examples
+*  with n = 5:
+*
+*  if UPLO = 'U':                       if UPLO = 'L':
+*
+*    (  d   e   v2  v3  v4 )              (  d                  )
+*    (      d   e   v3  v4 )              (  e   d              )
+*    (          d   e   v4 )              (  v1  e   d          )
+*    (              d   e  )              (  v1  v2  e   d      )
+*    (                  d  )              (  v1  v2  v3  e   d  )
+*
+*  where d and e denote diagonal and off-diagonal elements of T, and vi
+*  denotes an element of the vector defining H(i).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX*16         ONE, ZERO, HALF
+      PARAMETER          ( ONE = ( 1.0D+0, 0.0D+0 ),
+     $                   ZERO = ( 0.0D+0, 0.0D+0 ),
+     $                   HALF = ( 0.5D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            I
+      COMPLEX*16         ALPHA, TAUI
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZAXPY, ZHEMV, ZHER2, ZLARFG
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      COMPLEX*16         ZDOTC
+      EXTERNAL           LSAME, ZDOTC
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          DBLE, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZHETD2', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.LE.0 )
+     $   RETURN
+*
+      IF( UPPER ) THEN
+*
+*        Reduce the upper triangle of A
+*
+         A( N, N ) = DBLE( A( N, N ) )
+         DO 10 I = N - 1, 1, -1
+*
+*           Generate elementary reflector H(i) = I - tau * v * v'
+*           to annihilate A(1:i-1,i+1)
+*
+            ALPHA = A( I, I+1 )
+            CALL ZLARFG( I, ALPHA, A( 1, I+1 ), 1, TAUI )
+            E( I ) = ALPHA
+*
+            IF( TAUI.NE.ZERO ) THEN
+*
+*              Apply H(i) from both sides to A(1:i,1:i)
+*
+               A( I, I+1 ) = ONE
+*
+*              Compute  x := tau * A * v  storing x in TAU(1:i)
+*
+               CALL ZHEMV( UPLO, I, TAUI, A, LDA, A( 1, I+1 ), 1, ZERO,
+     $                     TAU, 1 )
+*
+*              Compute  w := x - 1/2 * tau * (x'*v) * v
+*
+               ALPHA = -HALF*TAUI*ZDOTC( I, TAU, 1, A( 1, I+1 ), 1 )
+               CALL ZAXPY( I, ALPHA, A( 1, I+1 ), 1, TAU, 1 )
+*
+*              Apply the transformation as a rank-2 update:
+*                 A := A - v * w' - w * v'
+*
+               CALL ZHER2( UPLO, I, -ONE, A( 1, I+1 ), 1, TAU, 1, A,
+     $                     LDA )
+*
+            ELSE
+               A( I, I ) = DBLE( A( I, I ) )
+            END IF
+            A( I, I+1 ) = E( I )
+            D( I+1 ) = A( I+1, I+1 )
+            TAU( I ) = TAUI
+   10    CONTINUE
+         D( 1 ) = A( 1, 1 )
+      ELSE
+*
+*        Reduce the lower triangle of A
+*
+         A( 1, 1 ) = DBLE( A( 1, 1 ) )
+         DO 20 I = 1, N - 1
+*
+*           Generate elementary reflector H(i) = I - tau * v * v'
+*           to annihilate A(i+2:n,i)
+*
+            ALPHA = A( I+1, I )
+            CALL ZLARFG( N-I, ALPHA, A( MIN( I+2, N ), I ), 1, TAUI )
+            E( I ) = ALPHA
+*
+            IF( TAUI.NE.ZERO ) THEN
+*
+*              Apply H(i) from both sides to A(i+1:n,i+1:n)
+*
+               A( I+1, I ) = ONE
+*
+*              Compute  x := tau * A * v  storing y in TAU(i:n-1)
+*
+               CALL ZHEMV( UPLO, N-I, TAUI, A( I+1, I+1 ), LDA,
+     $                     A( I+1, I ), 1, ZERO, TAU( I ), 1 )
+*
+*              Compute  w := x - 1/2 * tau * (x'*v) * v
+*
+               ALPHA = -HALF*TAUI*ZDOTC( N-I, TAU( I ), 1, A( I+1, I ),
+     $                 1 )
+               CALL ZAXPY( N-I, ALPHA, A( I+1, I ), 1, TAU( I ), 1 )
+*
+*              Apply the transformation as a rank-2 update:
+*                 A := A - v * w' - w * v'
+*
+               CALL ZHER2( UPLO, N-I, -ONE, A( I+1, I ), 1, TAU( I ), 1,
+     $                     A( I+1, I+1 ), LDA )
+*
+            ELSE
+               A( I+1, I+1 ) = DBLE( A( I+1, I+1 ) )
+            END IF
+            A( I+1, I ) = E( I )
+            D( I ) = A( I, I )
+            TAU( I ) = TAUI
+   20    CONTINUE
+         D( N ) = A( N, N )
+      END IF
+*
+      RETURN
+*
+*     End of ZHETD2
+*
+      END
diff --git a/costa/native/external/lapack/zhetf2.f b/costa/native/external/lapack/zhetf2.f
new file mode 100644
index 000000000..7a0fb8b11
--- /dev/null
+++ b/costa/native/external/lapack/zhetf2.f
@@ -0,0 +1,546 @@
+      SUBROUTINE ZHETF2( UPLO, N, A, LDA, IPIV, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      COMPLEX*16         A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZHETF2 computes the factorization of a complex Hermitian matrix A
+*  using the Bunch-Kaufman diagonal pivoting method:
+*
+*     A = U*D*U'  or  A = L*D*L'
+*
+*  where U (or L) is a product of permutation and unit upper (lower)
+*  triangular matrices, U' is the conjugate transpose of U, and D is
+*  Hermitian and block diagonal with 1-by-1 and 2-by-2 diagonal blocks.
+*
+*  This is the unblocked version of the algorithm, calling Level 2 BLAS.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the upper or lower triangular part of the
+*          Hermitian matrix A is stored:
+*          = 'U':  Upper triangular
+*          = 'L':  Lower triangular
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+*          On entry, the Hermitian matrix A.  If UPLO = 'U', the leading
+*          n-by-n upper triangular part of A contains the upper
+*          triangular part of the matrix A, and the strictly lower
+*          triangular part of A is not referenced.  If UPLO = 'L', the
+*          leading n-by-n lower triangular part of A contains the lower
+*          triangular part of the matrix A, and the strictly upper
+*          triangular part of A is not referenced.
+*
+*          On exit, the block diagonal matrix D and the multipliers used
+*          to obtain the factor U or L (see below for further details).
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  IPIV    (output) INTEGER array, dimension (N)
+*          Details of the interchanges and the block structure of D.
+*          If IPIV(k) > 0, then rows and columns k and IPIV(k) were
+*          interchanged and D(k,k) is a 1-by-1 diagonal block.
+*          If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and
+*          columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k)
+*          is a 2-by-2 diagonal block.  If UPLO = 'L' and IPIV(k) =
+*          IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were
+*          interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block.
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -k, the k-th argument had an illegal value
+*          > 0: if INFO = k, D(k,k) is exactly zero.  The factorization
+*               has been completed, but the block diagonal matrix D is
+*               exactly singular, and division by zero will occur if it
+*               is used to solve a system of equations.
+*
+*  Further Details
+*  ===============
+*
+*  1-96 - Based on modifications by
+*    J. Lewis, Boeing Computer Services Company
+*    A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA
+*
+*  If UPLO = 'U', then A = U*D*U', where
+*     U = P(n)*U(n)* ... *P(k)U(k)* ...,
+*  i.e., U is a product of terms P(k)*U(k), where k decreases from n to
+*  1 in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1
+*  and 2-by-2 diagonal blocks D(k).  P(k) is a permutation matrix as
+*  defined by IPIV(k), and U(k) is a unit upper triangular matrix, such
+*  that if the diagonal block D(k) is of order s (s = 1 or 2), then
+*
+*             (   I    v    0   )   k-s
+*     U(k) =  (   0    I    0   )   s
+*             (   0    0    I   )   n-k
+*                k-s   s   n-k
+*
+*  If s = 1, D(k) overwrites A(k,k), and v overwrites A(1:k-1,k).
+*  If s = 2, the upper triangle of D(k) overwrites A(k-1,k-1), A(k-1,k),
+*  and A(k,k), and v overwrites A(1:k-2,k-1:k).
+*
+*  If UPLO = 'L', then A = L*D*L', where
+*     L = P(1)*L(1)* ... *P(k)*L(k)* ...,
+*  i.e., L is a product of terms P(k)*L(k), where k increases from 1 to
+*  n in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1
+*  and 2-by-2 diagonal blocks D(k).  P(k) is a permutation matrix as
+*  defined by IPIV(k), and L(k) is a unit lower triangular matrix, such
+*  that if the diagonal block D(k) is of order s (s = 1 or 2), then
+*
+*             (   I    0     0   )  k-1
+*     L(k) =  (   0    I     0   )  s
+*             (   0    v     I   )  n-k-s+1
+*                k-1   s  n-k-s+1
+*
+*  If s = 1, D(k) overwrites A(k,k), and v overwrites A(k+1:n,k).
+*  If s = 2, the lower triangle of D(k) overwrites A(k,k), A(k+1,k),
+*  and A(k+1,k+1), and v overwrites A(k+2:n,k:k+1).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+      DOUBLE PRECISION   EIGHT, SEVTEN
+      PARAMETER          ( EIGHT = 8.0D+0, SEVTEN = 17.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            I, IMAX, J, JMAX, K, KK, KP, KSTEP
+      DOUBLE PRECISION   ABSAKK, ALPHA, COLMAX, D, D11, D22, R1, ROWMAX,
+     $                   TT
+      COMPLEX*16         D12, D21, T, WK, WKM1, WKP1, ZDUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            IZAMAX
+      DOUBLE PRECISION   DLAPY2
+      EXTERNAL           LSAME, IZAMAX, DLAPY2
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZDSCAL, ZHER, ZSWAP
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, DCMPLX, DCONJG, DIMAG, MAX, SQRT
+*     ..
+*     .. Statement Functions ..
+      DOUBLE PRECISION   CABS1
+*     ..
+*     .. Statement Function definitions ..
+      CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZHETF2', -INFO )
+         RETURN
+      END IF
+*
+*     Initialize ALPHA for use in choosing pivot block size.
+*
+      ALPHA = ( ONE+SQRT( SEVTEN ) ) / EIGHT
+*
+      IF( UPPER ) THEN
+*
+*        Factorize A as U*D*U' using the upper triangle of A
+*
+*        K is the main loop index, decreasing from N to 1 in steps of
+*        1 or 2
+*
+         K = N
+   10    CONTINUE
+*
+*        If K < 1, exit from loop
+*
+         IF( K.LT.1 )
+     $      GO TO 90
+         KSTEP = 1
+*
+*        Determine rows and columns to be interchanged and whether
+*        a 1-by-1 or 2-by-2 pivot block will be used
+*
+         ABSAKK = ABS( DBLE( A( K, K ) ) )
+*
+*        IMAX is the row-index of the largest off-diagonal element in
+*        column K, and COLMAX is its absolute value
+*
+         IF( K.GT.1 ) THEN
+            IMAX = IZAMAX( K-1, A( 1, K ), 1 )
+            COLMAX = CABS1( A( IMAX, K ) )
+         ELSE
+            COLMAX = ZERO
+         END IF
+*
+         IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN
+*
+*           Column K is zero: set INFO and continue
+*
+            IF( INFO.EQ.0 )
+     $         INFO = K
+            KP = K
+            A( K, K ) = DBLE( A( K, K ) )
+         ELSE
+            IF( ABSAKK.GE.ALPHA*COLMAX ) THEN
+*
+*              no interchange, use 1-by-1 pivot block
+*
+               KP = K
+            ELSE
+*
+*              JMAX is the column-index of the largest off-diagonal
+*              element in row IMAX, and ROWMAX is its absolute value
+*
+               JMAX = IMAX + IZAMAX( K-IMAX, A( IMAX, IMAX+1 ), LDA )
+               ROWMAX = CABS1( A( IMAX, JMAX ) )
+               IF( IMAX.GT.1 ) THEN
+                  JMAX = IZAMAX( IMAX-1, A( 1, IMAX ), 1 )
+                  ROWMAX = MAX( ROWMAX, CABS1( A( JMAX, IMAX ) ) )
+               END IF
+*
+               IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN
+*
+*                 no interchange, use 1-by-1 pivot block
+*
+                  KP = K
+               ELSE IF( ABS( DBLE( A( IMAX, IMAX ) ) ).GE.ALPHA*ROWMAX )
+     $                   THEN
+*
+*                 interchange rows and columns K and IMAX, use 1-by-1
+*                 pivot block
+*
+                  KP = IMAX
+               ELSE
+*
+*                 interchange rows and columns K-1 and IMAX, use 2-by-2
+*                 pivot block
+*
+                  KP = IMAX
+                  KSTEP = 2
+               END IF
+            END IF
+*
+            KK = K - KSTEP + 1
+            IF( KP.NE.KK ) THEN
+*
+*              Interchange rows and columns KK and KP in the leading
+*              submatrix A(1:k,1:k)
+*
+               CALL ZSWAP( KP-1, A( 1, KK ), 1, A( 1, KP ), 1 )
+               DO 20 J = KP + 1, KK - 1
+                  T = DCONJG( A( J, KK ) )
+                  A( J, KK ) = DCONJG( A( KP, J ) )
+                  A( KP, J ) = T
+   20          CONTINUE
+               A( KP, KK ) = DCONJG( A( KP, KK ) )
+               R1 = DBLE( A( KK, KK ) )
+               A( KK, KK ) = DBLE( A( KP, KP ) )
+               A( KP, KP ) = R1
+               IF( KSTEP.EQ.2 ) THEN
+                  A( K, K ) = DBLE( A( K, K ) )
+                  T = A( K-1, K )
+                  A( K-1, K ) = A( KP, K )
+                  A( KP, K ) = T
+               END IF
+            ELSE
+               A( K, K ) = DBLE( A( K, K ) )
+               IF( KSTEP.EQ.2 )
+     $            A( K-1, K-1 ) = DBLE( A( K-1, K-1 ) )
+            END IF
+*
+*           Update the leading submatrix
+*
+            IF( KSTEP.EQ.1 ) THEN
+*
+*              1-by-1 pivot block D(k): column k now holds
+*
+*              W(k) = U(k)*D(k)
+*
+*              where U(k) is the k-th column of U
+*
+*              Perform a rank-1 update of A(1:k-1,1:k-1) as
+*
+*              A := A - U(k)*D(k)*U(k)' = A - W(k)*1/D(k)*W(k)'
+*
+               R1 = ONE / DBLE( A( K, K ) )
+               CALL ZHER( UPLO, K-1, -R1, A( 1, K ), 1, A, LDA )
+*
+*              Store U(k) in column k
+*
+               CALL ZDSCAL( K-1, R1, A( 1, K ), 1 )
+            ELSE
+*
+*              2-by-2 pivot block D(k): columns k and k-1 now hold
+*
+*              ( W(k-1) W(k) ) = ( U(k-1) U(k) )*D(k)
+*
+*              where U(k) and U(k-1) are the k-th and (k-1)-th columns
+*              of U
+*
+*              Perform a rank-2 update of A(1:k-2,1:k-2) as
+*
+*              A := A - ( U(k-1) U(k) )*D(k)*( U(k-1) U(k) )'
+*                 = A - ( W(k-1) W(k) )*inv(D(k))*( W(k-1) W(k) )'
+*
+               IF( K.GT.2 ) THEN
+*
+                  D = DLAPY2( DBLE( A( K-1, K ) ),
+     $                DIMAG( A( K-1, K ) ) )
+                  D22 = DBLE( A( K-1, K-1 ) ) / D
+                  D11 = DBLE( A( K, K ) ) / D
+                  TT = ONE / ( D11*D22-ONE )
+                  D12 = A( K-1, K ) / D
+                  D = TT / D
+*
+                  DO 40 J = K - 2, 1, -1
+                     WKM1 = D*( D11*A( J, K-1 )-DCONJG( D12 )*
+     $                      A( J, K ) )
+                     WK = D*( D22*A( J, K )-D12*A( J, K-1 ) )
+                     DO 30 I = J, 1, -1
+                        A( I, J ) = A( I, J ) - A( I, K )*DCONJG( WK ) -
+     $                              A( I, K-1 )*DCONJG( WKM1 )
+   30                CONTINUE
+                     A( J, K ) = WK
+                     A( J, K-1 ) = WKM1
+                     A( J, J ) = DCMPLX( DBLE( A( J, J ) ), 0.0D+0 )
+   40             CONTINUE
+*
+               END IF
+*
+            END IF
+         END IF
+*
+*        Store details of the interchanges in IPIV
+*
+         IF( KSTEP.EQ.1 ) THEN
+            IPIV( K ) = KP
+         ELSE
+            IPIV( K ) = -KP
+            IPIV( K-1 ) = -KP
+         END IF
+*
+*        Decrease K and return to the start of the main loop
+*
+         K = K - KSTEP
+         GO TO 10
+*
+      ELSE
+*
+*        Factorize A as L*D*L' using the lower triangle of A
+*
+*        K is the main loop index, increasing from 1 to N in steps of
+*        1 or 2
+*
+         K = 1
+   50    CONTINUE
+*
+*        If K > N, exit from loop
+*
+         IF( K.GT.N )
+     $      GO TO 90
+         KSTEP = 1
+*
+*        Determine rows and columns to be interchanged and whether
+*        a 1-by-1 or 2-by-2 pivot block will be used
+*
+         ABSAKK = ABS( DBLE( A( K, K ) ) )
+*
+*        IMAX is the row-index of the largest off-diagonal element in
+*        column K, and COLMAX is its absolute value
+*
+         IF( K.LT.N ) THEN
+            IMAX = K + IZAMAX( N-K, A( K+1, K ), 1 )
+            COLMAX = CABS1( A( IMAX, K ) )
+         ELSE
+            COLMAX = ZERO
+         END IF
+*
+         IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN
+*
+*           Column K is zero: set INFO and continue
+*
+            IF( INFO.EQ.0 )
+     $         INFO = K
+            KP = K
+            A( K, K ) = DBLE( A( K, K ) )
+         ELSE
+            IF( ABSAKK.GE.ALPHA*COLMAX ) THEN
+*
+*              no interchange, use 1-by-1 pivot block
+*
+               KP = K
+            ELSE
+*
+*              JMAX is the column-index of the largest off-diagonal
+*              element in row IMAX, and ROWMAX is its absolute value
+*
+               JMAX = K - 1 + IZAMAX( IMAX-K, A( IMAX, K ), LDA )
+               ROWMAX = CABS1( A( IMAX, JMAX ) )
+               IF( IMAX.LT.N ) THEN
+                  JMAX = IMAX + IZAMAX( N-IMAX, A( IMAX+1, IMAX ), 1 )
+                  ROWMAX = MAX( ROWMAX, CABS1( A( JMAX, IMAX ) ) )
+               END IF
+*
+               IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN
+*
+*                 no interchange, use 1-by-1 pivot block
+*
+                  KP = K
+               ELSE IF( ABS( DBLE( A( IMAX, IMAX ) ) ).GE.ALPHA*ROWMAX )
+     $                   THEN
+*
+*                 interchange rows and columns K and IMAX, use 1-by-1
+*                 pivot block
+*
+                  KP = IMAX
+               ELSE
+*
+*                 interchange rows and columns K+1 and IMAX, use 2-by-2
+*                 pivot block
+*
+                  KP = IMAX
+                  KSTEP = 2
+               END IF
+            END IF
+*
+            KK = K + KSTEP - 1
+            IF( KP.NE.KK ) THEN
+*
+*              Interchange rows and columns KK and KP in the trailing
+*              submatrix A(k:n,k:n)
+*
+               IF( KP.LT.N )
+     $            CALL ZSWAP( N-KP, A( KP+1, KK ), 1, A( KP+1, KP ), 1 )
+               DO 60 J = KK + 1, KP - 1
+                  T = DCONJG( A( J, KK ) )
+                  A( J, KK ) = DCONJG( A( KP, J ) )
+                  A( KP, J ) = T
+   60          CONTINUE
+               A( KP, KK ) = DCONJG( A( KP, KK ) )
+               R1 = DBLE( A( KK, KK ) )
+               A( KK, KK ) = DBLE( A( KP, KP ) )
+               A( KP, KP ) = R1
+               IF( KSTEP.EQ.2 ) THEN
+                  A( K, K ) = DBLE( A( K, K ) )
+                  T = A( K+1, K )
+                  A( K+1, K ) = A( KP, K )
+                  A( KP, K ) = T
+               END IF
+            ELSE
+               A( K, K ) = DBLE( A( K, K ) )
+               IF( KSTEP.EQ.2 )
+     $            A( K+1, K+1 ) = DBLE( A( K+1, K+1 ) )
+            END IF
+*
+*           Update the trailing submatrix
+*
+            IF( KSTEP.EQ.1 ) THEN
+*
+*              1-by-1 pivot block D(k): column k now holds
+*
+*              W(k) = L(k)*D(k)
+*
+*              where L(k) is the k-th column of L
+*
+               IF( K.LT.N ) THEN
+*
+*                 Perform a rank-1 update of A(k+1:n,k+1:n) as
+*
+*                 A := A - L(k)*D(k)*L(k)' = A - W(k)*(1/D(k))*W(k)'
+*
+                  R1 = ONE / DBLE( A( K, K ) )
+                  CALL ZHER( UPLO, N-K, -R1, A( K+1, K ), 1,
+     $                       A( K+1, K+1 ), LDA )
+*
+*                 Store L(k) in column K
+*
+                  CALL ZDSCAL( N-K, R1, A( K+1, K ), 1 )
+               END IF
+            ELSE
+*
+*              2-by-2 pivot block D(k)
+*
+               IF( K.LT.N-1 ) THEN
+*
+*                 Perform a rank-2 update of A(k+2:n,k+2:n) as
+*
+*                 A := A - ( L(k) L(k+1) )*D(k)*( L(k) L(k+1) )'
+*                    = A - ( W(k) W(k+1) )*inv(D(k))*( W(k) W(k+1) )'
+*
+*                 where L(k) and L(k+1) are the k-th and (k+1)-th
+*                 columns of L
+*
+                  D = DLAPY2( DBLE( A( K+1, K ) ),
+     $                DIMAG( A( K+1, K ) ) )
+                  D11 = DBLE( A( K+1, K+1 ) ) / D
+                  D22 = DBLE( A( K, K ) ) / D
+                  TT = ONE / ( D11*D22-ONE )
+                  D21 = A( K+1, K ) / D
+                  D = TT / D
+*
+                  DO 80 J = K + 2, N
+                     WK = D*( D11*A( J, K )-D21*A( J, K+1 ) )
+                     WKP1 = D*( D22*A( J, K+1 )-DCONJG( D21 )*
+     $                      A( J, K ) )
+                     DO 70 I = J, N
+                        A( I, J ) = A( I, J ) - A( I, K )*DCONJG( WK ) -
+     $                              A( I, K+1 )*DCONJG( WKP1 )
+   70                CONTINUE
+                     A( J, K ) = WK
+                     A( J, K+1 ) = WKP1
+                     A( J, J ) = DCMPLX( DBLE( A( J, J ) ), 0.0D+0 )
+   80             CONTINUE
+               END IF
+            END IF
+         END IF
+*
+*        Store details of the interchanges in IPIV
+*
+         IF( KSTEP.EQ.1 ) THEN
+            IPIV( K ) = KP
+         ELSE
+            IPIV( K ) = -KP
+            IPIV( K+1 ) = -KP
+         END IF
+*
+*        Increase K and return to the start of the main loop
+*
+         K = K + KSTEP
+         GO TO 50
+*
+      END IF
+*
+   90 CONTINUE
+      RETURN
+*
+*     End of ZHETF2
+*
+      END
diff --git a/costa/native/external/lapack/zhetrd.f b/costa/native/external/lapack/zhetrd.f
new file mode 100644
index 000000000..bc310c090
--- /dev/null
+++ b/costa/native/external/lapack/zhetrd.f
@@ -0,0 +1,297 @@
+      SUBROUTINE ZHETRD( UPLO, N, A, LDA, D, E, TAU, WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, LWORK, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   D( * ), E( * )
+      COMPLEX*16         A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZHETRD reduces a complex Hermitian matrix A to real symmetric
+*  tridiagonal form T by a unitary similarity transformation:
+*  Q**H * A * Q = T.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+*          On entry, the Hermitian matrix A.  If UPLO = 'U', the leading
+*          N-by-N upper triangular part of A contains the upper
+*          triangular part of the matrix A, and the strictly lower
+*          triangular part of A is not referenced.  If UPLO = 'L', the
+*          leading N-by-N lower triangular part of A contains the lower
+*          triangular part of the matrix A, and the strictly upper
+*          triangular part of A is not referenced.
+*          On exit, if UPLO = 'U', the diagonal and first superdiagonal
+*          of A are overwritten by the corresponding elements of the
+*          tridiagonal matrix T, and the elements above the first
+*          superdiagonal, with the array TAU, represent the unitary
+*          matrix Q as a product of elementary reflectors; if UPLO
+*          = 'L', the diagonal and first subdiagonal of A are over-
+*          written by the corresponding elements of the tridiagonal
+*          matrix T, and the elements below the first subdiagonal, with
+*          the array TAU, represent the unitary matrix Q as a product
+*          of elementary reflectors. See Further Details.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  D       (output) DOUBLE PRECISION array, dimension (N)
+*          The diagonal elements of the tridiagonal matrix T:
+*          D(i) = A(i,i).
+*
+*  E       (output) DOUBLE PRECISION array, dimension (N-1)
+*          The off-diagonal elements of the tridiagonal matrix T:
+*          E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'.
+*
+*  TAU     (output) COMPLEX*16 array, dimension (N-1)
+*          The scalar factors of the elementary reflectors (see Further
+*          Details).
+*
+*  WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.  LWORK >= 1.
+*          For optimum performance LWORK >= N*NB, where NB is the
+*          optimal blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Further Details
+*  ===============
+*
+*  If UPLO = 'U', the matrix Q is represented as a product of elementary
+*  reflectors
+*
+*     Q = H(n-1) . . . H(2) H(1).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - tau * v * v'
+*
+*  where tau is a complex scalar, and v is a complex vector with
+*  v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in
+*  A(1:i-1,i+1), and tau in TAU(i).
+*
+*  If UPLO = 'L', the matrix Q is represented as a product of elementary
+*  reflectors
+*
+*     Q = H(1) H(2) . . . H(n-1).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - tau * v * v'
+*
+*  where tau is a complex scalar, and v is a complex vector with
+*  v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i),
+*  and tau in TAU(i).
+*
+*  The contents of A on exit are illustrated by the following examples
+*  with n = 5:
+*
+*  if UPLO = 'U':                       if UPLO = 'L':
+*
+*    (  d   e   v2  v3  v4 )              (  d                  )
+*    (      d   e   v3  v4 )              (  e   d              )
+*    (          d   e   v4 )              (  v1  e   d          )
+*    (              d   e  )              (  v1  v2  e   d      )
+*    (                  d  )              (  v1  v2  v3  e   d  )
+*
+*  where d and e denote diagonal and off-diagonal elements of T, and vi
+*  denotes an element of the vector defining H(i).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE
+      PARAMETER          ( ONE = 1.0D+0 )
+      COMPLEX*16         CONE
+      PARAMETER          ( CONE = ( 1.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY, UPPER
+      INTEGER            I, IINFO, IWS, J, KK, LDWORK, LWKOPT, NB,
+     $                   NBMIN, NX
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZHER2K, ZHETD2, ZLATRD
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           LSAME, ILAENV
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -4
+      ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN
+         INFO = -9
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+*
+*        Determine the block size.
+*
+         NB = ILAENV( 1, 'ZHETRD', UPLO, N, -1, -1, -1 )
+         LWKOPT = N*NB
+         WORK( 1 ) = LWKOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZHETRD', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      NX = N
+      IWS = 1
+      IF( NB.GT.1 .AND. NB.LT.N ) THEN
+*
+*        Determine when to cross over from blocked to unblocked code
+*        (last block is always handled by unblocked code).
+*
+         NX = MAX( NB, ILAENV( 3, 'ZHETRD', UPLO, N, -1, -1, -1 ) )
+         IF( NX.LT.N ) THEN
+*
+*           Determine if workspace is large enough for blocked code.
+*
+            LDWORK = N
+            IWS = LDWORK*NB
+            IF( LWORK.LT.IWS ) THEN
+*
+*              Not enough workspace to use optimal NB:  determine the
+*              minimum value of NB, and reduce NB or force use of
+*              unblocked code by setting NX = N.
+*
+               NB = MAX( LWORK / LDWORK, 1 )
+               NBMIN = ILAENV( 2, 'ZHETRD', UPLO, N, -1, -1, -1 )
+               IF( NB.LT.NBMIN )
+     $            NX = N
+            END IF
+         ELSE
+            NX = N
+         END IF
+      ELSE
+         NB = 1
+      END IF
+*
+      IF( UPPER ) THEN
+*
+*        Reduce the upper triangle of A.
+*        Columns 1:kk are handled by the unblocked method.
+*
+         KK = N - ( ( N-NX+NB-1 ) / NB )*NB
+         DO 20 I = N - NB + 1, KK + 1, -NB
+*
+*           Reduce columns i:i+nb-1 to tridiagonal form and form the
+*           matrix W which is needed to update the unreduced part of
+*           the matrix
+*
+            CALL ZLATRD( UPLO, I+NB-1, NB, A, LDA, E, TAU, WORK,
+     $                   LDWORK )
+*
+*           Update the unreduced submatrix A(1:i-1,1:i-1), using an
+*           update of the form:  A := A - V*W' - W*V'
+*
+            CALL ZHER2K( UPLO, 'No transpose', I-1, NB, -CONE,
+     $                   A( 1, I ), LDA, WORK, LDWORK, ONE, A, LDA )
+*
+*           Copy superdiagonal elements back into A, and diagonal
+*           elements into D
+*
+            DO 10 J = I, I + NB - 1
+               A( J-1, J ) = E( J-1 )
+               D( J ) = A( J, J )
+   10       CONTINUE
+   20    CONTINUE
+*
+*        Use unblocked code to reduce the last or only block
+*
+         CALL ZHETD2( UPLO, KK, A, LDA, D, E, TAU, IINFO )
+      ELSE
+*
+*        Reduce the lower triangle of A
+*
+         DO 40 I = 1, N - NX, NB
+*
+*           Reduce columns i:i+nb-1 to tridiagonal form and form the
+*           matrix W which is needed to update the unreduced part of
+*           the matrix
+*
+            CALL ZLATRD( UPLO, N-I+1, NB, A( I, I ), LDA, E( I ),
+     $                   TAU( I ), WORK, LDWORK )
+*
+*           Update the unreduced submatrix A(i+nb:n,i+nb:n), using
+*           an update of the form:  A := A - V*W' - W*V'
+*
+            CALL ZHER2K( UPLO, 'No transpose', N-I-NB+1, NB, -CONE,
+     $                   A( I+NB, I ), LDA, WORK( NB+1 ), LDWORK, ONE,
+     $                   A( I+NB, I+NB ), LDA )
+*
+*           Copy subdiagonal elements back into A, and diagonal
+*           elements into D
+*
+            DO 30 J = I, I + NB - 1
+               A( J+1, J ) = E( J )
+               D( J ) = A( J, J )
+   30       CONTINUE
+   40    CONTINUE
+*
+*        Use unblocked code to reduce the last or only block
+*
+         CALL ZHETD2( UPLO, N-I+1, A( I, I ), LDA, D( I ), E( I ),
+     $                TAU( I ), IINFO )
+      END IF
+*
+      WORK( 1 ) = LWKOPT
+      RETURN
+*
+*     End of ZHETRD
+*
+      END
diff --git a/costa/native/external/lapack/zhetrf.f b/costa/native/external/lapack/zhetrf.f
new file mode 100644
index 000000000..562655b11
--- /dev/null
+++ b/costa/native/external/lapack/zhetrf.f
@@ -0,0 +1,282 @@
+      SUBROUTINE ZHETRF( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, LWORK, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      COMPLEX*16         A( LDA, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZHETRF computes the factorization of a complex Hermitian matrix A
+*  using the Bunch-Kaufman diagonal pivoting method.  The form of the
+*  factorization is
+*
+*     A = U*D*U**H  or  A = L*D*L**H
+*
+*  where U (or L) is a product of permutation and unit upper (lower)
+*  triangular matrices, and D is Hermitian and block diagonal with
+*  1-by-1 and 2-by-2 diagonal blocks.
+*
+*  This is the blocked version of the algorithm, calling Level 3 BLAS.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+*          On entry, the Hermitian matrix A.  If UPLO = 'U', the leading
+*          N-by-N upper triangular part of A contains the upper
+*          triangular part of the matrix A, and the strictly lower
+*          triangular part of A is not referenced.  If UPLO = 'L', the
+*          leading N-by-N lower triangular part of A contains the lower
+*          triangular part of the matrix A, and the strictly upper
+*          triangular part of A is not referenced.
+*
+*          On exit, the block diagonal matrix D and the multipliers used
+*          to obtain the factor U or L (see below for further details).
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  IPIV    (output) INTEGER array, dimension (N)
+*          Details of the interchanges and the block structure of D.
+*          If IPIV(k) > 0, then rows and columns k and IPIV(k) were
+*          interchanged and D(k,k) is a 1-by-1 diagonal block.
+*          If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and
+*          columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k)
+*          is a 2-by-2 diagonal block.  If UPLO = 'L' and IPIV(k) =
+*          IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were
+*          interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block.
+*
+*  WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The length of WORK.  LWORK >=1.  For best performance
+*          LWORK >= N*NB, where NB is the block size returned by ILAENV.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, D(i,i) is exactly zero.  The factorization
+*                has been completed, but the block diagonal matrix D is
+*                exactly singular, and division by zero will occur if it
+*                is used to solve a system of equations.
+*
+*  Further Details
+*  ===============
+*
+*  If UPLO = 'U', then A = U*D*U', where
+*     U = P(n)*U(n)* ... *P(k)U(k)* ...,
+*  i.e., U is a product of terms P(k)*U(k), where k decreases from n to
+*  1 in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1
+*  and 2-by-2 diagonal blocks D(k).  P(k) is a permutation matrix as
+*  defined by IPIV(k), and U(k) is a unit upper triangular matrix, such
+*  that if the diagonal block D(k) is of order s (s = 1 or 2), then
+*
+*             (   I    v    0   )   k-s
+*     U(k) =  (   0    I    0   )   s
+*             (   0    0    I   )   n-k
+*                k-s   s   n-k
+*
+*  If s = 1, D(k) overwrites A(k,k), and v overwrites A(1:k-1,k).
+*  If s = 2, the upper triangle of D(k) overwrites A(k-1,k-1), A(k-1,k),
+*  and A(k,k), and v overwrites A(1:k-2,k-1:k).
+*
+*  If UPLO = 'L', then A = L*D*L', where
+*     L = P(1)*L(1)* ... *P(k)*L(k)* ...,
+*  i.e., L is a product of terms P(k)*L(k), where k increases from 1 to
+*  n in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1
+*  and 2-by-2 diagonal blocks D(k).  P(k) is a permutation matrix as
+*  defined by IPIV(k), and L(k) is a unit lower triangular matrix, such
+*  that if the diagonal block D(k) is of order s (s = 1 or 2), then
+*
+*             (   I    0     0   )  k-1
+*     L(k) =  (   0    I     0   )  s
+*             (   0    v     I   )  n-k-s+1
+*                k-1   s  n-k-s+1
+*
+*  If s = 1, D(k) overwrites A(k,k), and v overwrites A(k+1:n,k).
+*  If s = 2, the lower triangle of D(k) overwrites A(k,k), A(k+1,k),
+*  and A(k+1,k+1), and v overwrites A(k+2:n,k:k+1).
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            LQUERY, UPPER
+      INTEGER            IINFO, IWS, J, K, KB, LDWORK, LWKOPT, NB, NBMIN
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           LSAME, ILAENV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZHETF2, ZLAHEF
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -4
+      ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN
+         INFO = -7
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+*
+*        Determine the block size
+*
+         NB = ILAENV( 1, 'ZHETRF', UPLO, N, -1, -1, -1 )
+         LWKOPT = N*NB
+         WORK( 1 ) = LWKOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZHETRF', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+      NBMIN = 2
+      LDWORK = N
+      IF( NB.GT.1 .AND. NB.LT.N ) THEN
+         IWS = LDWORK*NB
+         IF( LWORK.LT.IWS ) THEN
+            NB = MAX( LWORK / LDWORK, 1 )
+            NBMIN = MAX( 2, ILAENV( 2, 'ZHETRF', UPLO, N, -1, -1, -1 ) )
+         END IF
+      ELSE
+         IWS = 1
+      END IF
+      IF( NB.LT.NBMIN )
+     $   NB = N
+*
+      IF( UPPER ) THEN
+*
+*        Factorize A as U*D*U' using the upper triangle of A
+*
+*        K is the main loop index, decreasing from N to 1 in steps of
+*        KB, where KB is the number of columns factorized by ZLAHEF;
+*        KB is either NB or NB-1, or K for the last block
+*
+         K = N
+   10    CONTINUE
+*
+*        If K < 1, exit from loop
+*
+         IF( K.LT.1 )
+     $      GO TO 40
+*
+         IF( K.GT.NB ) THEN
+*
+*           Factorize columns k-kb+1:k of A and use blocked code to
+*           update columns 1:k-kb
+*
+            CALL ZLAHEF( UPLO, K, NB, KB, A, LDA, IPIV, WORK, N, IINFO )
+         ELSE
+*
+*           Use unblocked code to factorize columns 1:k of A
+*
+            CALL ZHETF2( UPLO, K, A, LDA, IPIV, IINFO )
+            KB = K
+         END IF
+*
+*        Set INFO on the first occurrence of a zero pivot
+*
+         IF( INFO.EQ.0 .AND. IINFO.GT.0 )
+     $      INFO = IINFO
+*
+*        Decrease K and return to the start of the main loop
+*
+         K = K - KB
+         GO TO 10
+*
+      ELSE
+*
+*        Factorize A as L*D*L' using the lower triangle of A
+*
+*        K is the main loop index, increasing from 1 to N in steps of
+*        KB, where KB is the number of columns factorized by ZLAHEF;
+*        KB is either NB or NB-1, or N-K+1 for the last block
+*
+         K = 1
+   20    CONTINUE
+*
+*        If K > N, exit from loop
+*
+         IF( K.GT.N )
+     $      GO TO 40
+*
+         IF( K.LE.N-NB ) THEN
+*
+*           Factorize columns k:k+kb-1 of A and use blocked code to
+*           update columns k+kb:n
+*
+            CALL ZLAHEF( UPLO, N-K+1, NB, KB, A( K, K ), LDA, IPIV( K ),
+     $                   WORK, N, IINFO )
+         ELSE
+*
+*           Use unblocked code to factorize columns k:n of A
+*
+            CALL ZHETF2( UPLO, N-K+1, A( K, K ), LDA, IPIV( K ), IINFO )
+            KB = N - K + 1
+         END IF
+*
+*        Set INFO on the first occurrence of a zero pivot
+*
+         IF( INFO.EQ.0 .AND. IINFO.GT.0 )
+     $      INFO = IINFO + K - 1
+*
+*        Adjust IPIV
+*
+         DO 30 J = K, K + KB - 1
+            IF( IPIV( J ).GT.0 ) THEN
+               IPIV( J ) = IPIV( J ) + K - 1
+            ELSE
+               IPIV( J ) = IPIV( J ) - K + 1
+            END IF
+   30    CONTINUE
+*
+*        Increase K and return to the start of the main loop
+*
+         K = K + KB
+         GO TO 20
+*
+      END IF
+*
+   40 CONTINUE
+      WORK( 1 ) = LWKOPT
+      RETURN
+*
+*     End of ZHETRF
+*
+      END
diff --git a/costa/native/external/lapack/zhetri.f b/costa/native/external/lapack/zhetri.f
new file mode 100644
index 000000000..4115937fb
--- /dev/null
+++ b/costa/native/external/lapack/zhetri.f
@@ -0,0 +1,328 @@
+      SUBROUTINE ZHETRI( UPLO, N, A, LDA, IPIV, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      COMPLEX*16         A( LDA, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZHETRI computes the inverse of a complex Hermitian indefinite matrix
+*  A using the factorization A = U*D*U**H or A = L*D*L**H computed by
+*  ZHETRF.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the details of the factorization are stored
+*          as an upper or lower triangular matrix.
+*          = 'U':  Upper triangular, form is A = U*D*U**H;
+*          = 'L':  Lower triangular, form is A = L*D*L**H.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+*          On entry, the block diagonal matrix D and the multipliers
+*          used to obtain the factor U or L as computed by ZHETRF.
+*
+*          On exit, if INFO = 0, the (Hermitian) inverse of the original
+*          matrix.  If UPLO = 'U', the upper triangular part of the
+*          inverse is formed and the part of A below the diagonal is not
+*          referenced; if UPLO = 'L' the lower triangular part of the
+*          inverse is formed and the part of A above the diagonal is
+*          not referenced.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  IPIV    (input) INTEGER array, dimension (N)
+*          Details of the interchanges and the block structure of D
+*          as determined by ZHETRF.
+*
+*  WORK    (workspace) COMPLEX*16 array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*          > 0: if INFO = i, D(i,i) = 0; the matrix is singular and its
+*               inverse could not be computed.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE
+      COMPLEX*16         CONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, CONE = ( 1.0D+0, 0.0D+0 ),
+     $                   ZERO = ( 0.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            J, K, KP, KSTEP
+      DOUBLE PRECISION   AK, AKP1, D, T
+      COMPLEX*16         AKKP1, TEMP
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      COMPLEX*16         ZDOTC
+      EXTERNAL           LSAME, ZDOTC
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZCOPY, ZHEMV, ZSWAP
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, DCONJG, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZHETRI', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Check that the diagonal matrix D is nonsingular.
+*
+      IF( UPPER ) THEN
+*
+*        Upper triangular storage: examine D from bottom to top
+*
+         DO 10 INFO = N, 1, -1
+            IF( IPIV( INFO ).GT.0 .AND. A( INFO, INFO ).EQ.ZERO )
+     $         RETURN
+   10    CONTINUE
+      ELSE
+*
+*        Lower triangular storage: examine D from top to bottom.
+*
+         DO 20 INFO = 1, N
+            IF( IPIV( INFO ).GT.0 .AND. A( INFO, INFO ).EQ.ZERO )
+     $         RETURN
+   20    CONTINUE
+      END IF
+      INFO = 0
+*
+      IF( UPPER ) THEN
+*
+*        Compute inv(A) from the factorization A = U*D*U'.
+*
+*        K is the main loop index, increasing from 1 to N in steps of
+*        1 or 2, depending on the size of the diagonal blocks.
+*
+         K = 1
+   30    CONTINUE
+*
+*        If K > N, exit from loop.
+*
+         IF( K.GT.N )
+     $      GO TO 50
+*
+         IF( IPIV( K ).GT.0 ) THEN
+*
+*           1 x 1 diagonal block
+*
+*           Invert the diagonal block.
+*
+            A( K, K ) = ONE / DBLE( A( K, K ) )
+*
+*           Compute column K of the inverse.
+*
+            IF( K.GT.1 ) THEN
+               CALL ZCOPY( K-1, A( 1, K ), 1, WORK, 1 )
+               CALL ZHEMV( UPLO, K-1, -CONE, A, LDA, WORK, 1, ZERO,
+     $                     A( 1, K ), 1 )
+               A( K, K ) = A( K, K ) - DBLE( ZDOTC( K-1, WORK, 1, A( 1,
+     $                     K ), 1 ) )
+            END IF
+            KSTEP = 1
+         ELSE
+*
+*           2 x 2 diagonal block
+*
+*           Invert the diagonal block.
+*
+            T = ABS( A( K, K+1 ) )
+            AK = DBLE( A( K, K ) ) / T
+            AKP1 = DBLE( A( K+1, K+1 ) ) / T
+            AKKP1 = A( K, K+1 ) / T
+            D = T*( AK*AKP1-ONE )
+            A( K, K ) = AKP1 / D
+            A( K+1, K+1 ) = AK / D
+            A( K, K+1 ) = -AKKP1 / D
+*
+*           Compute columns K and K+1 of the inverse.
+*
+            IF( K.GT.1 ) THEN
+               CALL ZCOPY( K-1, A( 1, K ), 1, WORK, 1 )
+               CALL ZHEMV( UPLO, K-1, -CONE, A, LDA, WORK, 1, ZERO,
+     $                     A( 1, K ), 1 )
+               A( K, K ) = A( K, K ) - DBLE( ZDOTC( K-1, WORK, 1, A( 1,
+     $                     K ), 1 ) )
+               A( K, K+1 ) = A( K, K+1 ) -
+     $                       ZDOTC( K-1, A( 1, K ), 1, A( 1, K+1 ), 1 )
+               CALL ZCOPY( K-1, A( 1, K+1 ), 1, WORK, 1 )
+               CALL ZHEMV( UPLO, K-1, -CONE, A, LDA, WORK, 1, ZERO,
+     $                     A( 1, K+1 ), 1 )
+               A( K+1, K+1 ) = A( K+1, K+1 ) -
+     $                         DBLE( ZDOTC( K-1, WORK, 1, A( 1, K+1 ),
+     $                         1 ) )
+            END IF
+            KSTEP = 2
+         END IF
+*
+         KP = ABS( IPIV( K ) )
+         IF( KP.NE.K ) THEN
+*
+*           Interchange rows and columns K and KP in the leading
+*           submatrix A(1:k+1,1:k+1)
+*
+            CALL ZSWAP( KP-1, A( 1, K ), 1, A( 1, KP ), 1 )
+            DO 40 J = KP + 1, K - 1
+               TEMP = DCONJG( A( J, K ) )
+               A( J, K ) = DCONJG( A( KP, J ) )
+               A( KP, J ) = TEMP
+   40       CONTINUE
+            A( KP, K ) = DCONJG( A( KP, K ) )
+            TEMP = A( K, K )
+            A( K, K ) = A( KP, KP )
+            A( KP, KP ) = TEMP
+            IF( KSTEP.EQ.2 ) THEN
+               TEMP = A( K, K+1 )
+               A( K, K+1 ) = A( KP, K+1 )
+               A( KP, K+1 ) = TEMP
+            END IF
+         END IF
+*
+         K = K + KSTEP
+         GO TO 30
+   50    CONTINUE
+*
+      ELSE
+*
+*        Compute inv(A) from the factorization A = L*D*L'.
+*
+*        K is the main loop index, increasing from 1 to N in steps of
+*        1 or 2, depending on the size of the diagonal blocks.
+*
+         K = N
+   60    CONTINUE
+*
+*        If K < 1, exit from loop.
+*
+         IF( K.LT.1 )
+     $      GO TO 80
+*
+         IF( IPIV( K ).GT.0 ) THEN
+*
+*           1 x 1 diagonal block
+*
+*           Invert the diagonal block.
+*
+            A( K, K ) = ONE / DBLE( A( K, K ) )
+*
+*           Compute column K of the inverse.
+*
+            IF( K.LT.N ) THEN
+               CALL ZCOPY( N-K, A( K+1, K ), 1, WORK, 1 )
+               CALL ZHEMV( UPLO, N-K, -CONE, A( K+1, K+1 ), LDA, WORK,
+     $                     1, ZERO, A( K+1, K ), 1 )
+               A( K, K ) = A( K, K ) - DBLE( ZDOTC( N-K, WORK, 1,
+     $                     A( K+1, K ), 1 ) )
+            END IF
+            KSTEP = 1
+         ELSE
+*
+*           2 x 2 diagonal block
+*
+*           Invert the diagonal block.
+*
+            T = ABS( A( K, K-1 ) )
+            AK = DBLE( A( K-1, K-1 ) ) / T
+            AKP1 = DBLE( A( K, K ) ) / T
+            AKKP1 = A( K, K-1 ) / T
+            D = T*( AK*AKP1-ONE )
+            A( K-1, K-1 ) = AKP1 / D
+            A( K, K ) = AK / D
+            A( K, K-1 ) = -AKKP1 / D
+*
+*           Compute columns K-1 and K of the inverse.
+*
+            IF( K.LT.N ) THEN
+               CALL ZCOPY( N-K, A( K+1, K ), 1, WORK, 1 )
+               CALL ZHEMV( UPLO, N-K, -CONE, A( K+1, K+1 ), LDA, WORK,
+     $                     1, ZERO, A( K+1, K ), 1 )
+               A( K, K ) = A( K, K ) - DBLE( ZDOTC( N-K, WORK, 1,
+     $                     A( K+1, K ), 1 ) )
+               A( K, K-1 ) = A( K, K-1 ) -
+     $                       ZDOTC( N-K, A( K+1, K ), 1, A( K+1, K-1 ),
+     $                       1 )
+               CALL ZCOPY( N-K, A( K+1, K-1 ), 1, WORK, 1 )
+               CALL ZHEMV( UPLO, N-K, -CONE, A( K+1, K+1 ), LDA, WORK,
+     $                     1, ZERO, A( K+1, K-1 ), 1 )
+               A( K-1, K-1 ) = A( K-1, K-1 ) -
+     $                         DBLE( ZDOTC( N-K, WORK, 1, A( K+1, K-1 ),
+     $                         1 ) )
+            END IF
+            KSTEP = 2
+         END IF
+*
+         KP = ABS( IPIV( K ) )
+         IF( KP.NE.K ) THEN
+*
+*           Interchange rows and columns K and KP in the trailing
+*           submatrix A(k-1:n,k-1:n)
+*
+            IF( KP.LT.N )
+     $         CALL ZSWAP( N-KP, A( KP+1, K ), 1, A( KP+1, KP ), 1 )
+            DO 70 J = K + 1, KP - 1
+               TEMP = DCONJG( A( J, K ) )
+               A( J, K ) = DCONJG( A( KP, J ) )
+               A( KP, J ) = TEMP
+   70       CONTINUE
+            A( KP, K ) = DCONJG( A( KP, K ) )
+            TEMP = A( K, K )
+            A( K, K ) = A( KP, KP )
+            A( KP, KP ) = TEMP
+            IF( KSTEP.EQ.2 ) THEN
+               TEMP = A( K, K-1 )
+               A( K, K-1 ) = A( KP, K-1 )
+               A( KP, K-1 ) = TEMP
+            END IF
+         END IF
+*
+         K = K - KSTEP
+         GO TO 60
+   80    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of ZHETRI
+*
+      END
diff --git a/costa/native/external/lapack/zhetrs.f b/costa/native/external/lapack/zhetrs.f
new file mode 100644
index 000000000..dd416f35d
--- /dev/null
+++ b/costa/native/external/lapack/zhetrs.f
@@ -0,0 +1,394 @@
+      SUBROUTINE ZHETRS( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, LDB, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      COMPLEX*16         A( LDA, * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZHETRS solves a system of linear equations A*X = B with a complex
+*  Hermitian matrix A using the factorization A = U*D*U**H or
+*  A = L*D*L**H computed by ZHETRF.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the details of the factorization are stored
+*          as an upper or lower triangular matrix.
+*          = 'U':  Upper triangular, form is A = U*D*U**H;
+*          = 'L':  Lower triangular, form is A = L*D*L**H.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  A       (input) COMPLEX*16 array, dimension (LDA,N)
+*          The block diagonal matrix D and the multipliers used to
+*          obtain the factor U or L as computed by ZHETRF.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  IPIV    (input) INTEGER array, dimension (N)
+*          Details of the interchanges and the block structure of D
+*          as determined by ZHETRF.
+*
+*  B       (input/output) COMPLEX*16 array, dimension (LDB,NRHS)
+*          On entry, the right hand side matrix B.
+*          On exit, the solution matrix X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX*16         ONE
+      PARAMETER          ( ONE = ( 1.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            J, K, KP
+      DOUBLE PRECISION   S
+      COMPLEX*16         AK, AKM1, AKM1K, BK, BKM1, DENOM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZDSCAL, ZGEMV, ZGERU, ZLACGV, ZSWAP
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          DBLE, DCONJG, MAX
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -8
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZHETRS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 )
+     $   RETURN
+*
+      IF( UPPER ) THEN
+*
+*        Solve A*X = B, where A = U*D*U'.
+*
+*        First solve U*D*X = B, overwriting B with X.
+*
+*        K is the main loop index, decreasing from N to 1 in steps of
+*        1 or 2, depending on the size of the diagonal blocks.
+*
+         K = N
+   10    CONTINUE
+*
+*        If K < 1, exit from loop.
+*
+         IF( K.LT.1 )
+     $      GO TO 30
+*
+         IF( IPIV( K ).GT.0 ) THEN
+*
+*           1 x 1 diagonal block
+*
+*           Interchange rows K and IPIV(K).
+*
+            KP = IPIV( K )
+            IF( KP.NE.K )
+     $         CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB )
+*
+*           Multiply by inv(U(K)), where U(K) is the transformation
+*           stored in column K of A.
+*
+            CALL ZGERU( K-1, NRHS, -ONE, A( 1, K ), 1, B( K, 1 ), LDB,
+     $                  B( 1, 1 ), LDB )
+*
+*           Multiply by the inverse of the diagonal block.
+*
+            S = DBLE( ONE ) / DBLE( A( K, K ) )
+            CALL ZDSCAL( NRHS, S, B( K, 1 ), LDB )
+            K = K - 1
+         ELSE
+*
+*           2 x 2 diagonal block
+*
+*           Interchange rows K-1 and -IPIV(K).
+*
+            KP = -IPIV( K )
+            IF( KP.NE.K-1 )
+     $         CALL ZSWAP( NRHS, B( K-1, 1 ), LDB, B( KP, 1 ), LDB )
+*
+*           Multiply by inv(U(K)), where U(K) is the transformation
+*           stored in columns K-1 and K of A.
+*
+            CALL ZGERU( K-2, NRHS, -ONE, A( 1, K ), 1, B( K, 1 ), LDB,
+     $                  B( 1, 1 ), LDB )
+            CALL ZGERU( K-2, NRHS, -ONE, A( 1, K-1 ), 1, B( K-1, 1 ),
+     $                  LDB, B( 1, 1 ), LDB )
+*
+*           Multiply by the inverse of the diagonal block.
+*
+            AKM1K = A( K-1, K )
+            AKM1 = A( K-1, K-1 ) / AKM1K
+            AK = A( K, K ) / DCONJG( AKM1K )
+            DENOM = AKM1*AK - ONE
+            DO 20 J = 1, NRHS
+               BKM1 = B( K-1, J ) / AKM1K
+               BK = B( K, J ) / DCONJG( AKM1K )
+               B( K-1, J ) = ( AK*BKM1-BK ) / DENOM
+               B( K, J ) = ( AKM1*BK-BKM1 ) / DENOM
+   20       CONTINUE
+            K = K - 2
+         END IF
+*
+         GO TO 10
+   30    CONTINUE
+*
+*        Next solve U'*X = B, overwriting B with X.
+*
+*        K is the main loop index, increasing from 1 to N in steps of
+*        1 or 2, depending on the size of the diagonal blocks.
+*
+         K = 1
+   40    CONTINUE
+*
+*        If K > N, exit from loop.
+*
+         IF( K.GT.N )
+     $      GO TO 50
+*
+         IF( IPIV( K ).GT.0 ) THEN
+*
+*           1 x 1 diagonal block
+*
+*           Multiply by inv(U'(K)), where U(K) is the transformation
+*           stored in column K of A.
+*
+            IF( K.GT.1 ) THEN
+               CALL ZLACGV( NRHS, B( K, 1 ), LDB )
+               CALL ZGEMV( 'Conjugate transpose', K-1, NRHS, -ONE, B,
+     $                     LDB, A( 1, K ), 1, ONE, B( K, 1 ), LDB )
+               CALL ZLACGV( NRHS, B( K, 1 ), LDB )
+            END IF
+*
+*           Interchange rows K and IPIV(K).
+*
+            KP = IPIV( K )
+            IF( KP.NE.K )
+     $         CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB )
+            K = K + 1
+         ELSE
+*
+*           2 x 2 diagonal block
+*
+*           Multiply by inv(U'(K+1)), where U(K+1) is the transformation
+*           stored in columns K and K+1 of A.
+*
+            IF( K.GT.1 ) THEN
+               CALL ZLACGV( NRHS, B( K, 1 ), LDB )
+               CALL ZGEMV( 'Conjugate transpose', K-1, NRHS, -ONE, B,
+     $                     LDB, A( 1, K ), 1, ONE, B( K, 1 ), LDB )
+               CALL ZLACGV( NRHS, B( K, 1 ), LDB )
+*
+               CALL ZLACGV( NRHS, B( K+1, 1 ), LDB )
+               CALL ZGEMV( 'Conjugate transpose', K-1, NRHS, -ONE, B,
+     $                     LDB, A( 1, K+1 ), 1, ONE, B( K+1, 1 ), LDB )
+               CALL ZLACGV( NRHS, B( K+1, 1 ), LDB )
+            END IF
+*
+*           Interchange rows K and -IPIV(K).
+*
+            KP = -IPIV( K )
+            IF( KP.NE.K )
+     $         CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB )
+            K = K + 2
+         END IF
+*
+         GO TO 40
+   50    CONTINUE
+*
+      ELSE
+*
+*        Solve A*X = B, where A = L*D*L'.
+*
+*        First solve L*D*X = B, overwriting B with X.
+*
+*        K is the main loop index, increasing from 1 to N in steps of
+*        1 or 2, depending on the size of the diagonal blocks.
+*
+         K = 1
+   60    CONTINUE
+*
+*        If K > N, exit from loop.
+*
+         IF( K.GT.N )
+     $      GO TO 80
+*
+         IF( IPIV( K ).GT.0 ) THEN
+*
+*           1 x 1 diagonal block
+*
+*           Interchange rows K and IPIV(K).
+*
+            KP = IPIV( K )
+            IF( KP.NE.K )
+     $         CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB )
+*
+*           Multiply by inv(L(K)), where L(K) is the transformation
+*           stored in column K of A.
+*
+            IF( K.LT.N )
+     $         CALL ZGERU( N-K, NRHS, -ONE, A( K+1, K ), 1, B( K, 1 ),
+     $                     LDB, B( K+1, 1 ), LDB )
+*
+*           Multiply by the inverse of the diagonal block.
+*
+            S = DBLE( ONE ) / DBLE( A( K, K ) )
+            CALL ZDSCAL( NRHS, S, B( K, 1 ), LDB )
+            K = K + 1
+         ELSE
+*
+*           2 x 2 diagonal block
+*
+*           Interchange rows K+1 and -IPIV(K).
+*
+            KP = -IPIV( K )
+            IF( KP.NE.K+1 )
+     $         CALL ZSWAP( NRHS, B( K+1, 1 ), LDB, B( KP, 1 ), LDB )
+*
+*           Multiply by inv(L(K)), where L(K) is the transformation
+*           stored in columns K and K+1 of A.
+*
+            IF( K.LT.N-1 ) THEN
+               CALL ZGERU( N-K-1, NRHS, -ONE, A( K+2, K ), 1, B( K, 1 ),
+     $                     LDB, B( K+2, 1 ), LDB )
+               CALL ZGERU( N-K-1, NRHS, -ONE, A( K+2, K+1 ), 1,
+     $                     B( K+1, 1 ), LDB, B( K+2, 1 ), LDB )
+            END IF
+*
+*           Multiply by the inverse of the diagonal block.
+*
+            AKM1K = A( K+1, K )
+            AKM1 = A( K, K ) / DCONJG( AKM1K )
+            AK = A( K+1, K+1 ) / AKM1K
+            DENOM = AKM1*AK - ONE
+            DO 70 J = 1, NRHS
+               BKM1 = B( K, J ) / DCONJG( AKM1K )
+               BK = B( K+1, J ) / AKM1K
+               B( K, J ) = ( AK*BKM1-BK ) / DENOM
+               B( K+1, J ) = ( AKM1*BK-BKM1 ) / DENOM
+   70       CONTINUE
+            K = K + 2
+         END IF
+*
+         GO TO 60
+   80    CONTINUE
+*
+*        Next solve L'*X = B, overwriting B with X.
+*
+*        K is the main loop index, decreasing from N to 1 in steps of
+*        1 or 2, depending on the size of the diagonal blocks.
+*
+         K = N
+   90    CONTINUE
+*
+*        If K < 1, exit from loop.
+*
+         IF( K.LT.1 )
+     $      GO TO 100
+*
+         IF( IPIV( K ).GT.0 ) THEN
+*
+*           1 x 1 diagonal block
+*
+*           Multiply by inv(L'(K)), where L(K) is the transformation
+*           stored in column K of A.
+*
+            IF( K.LT.N ) THEN
+               CALL ZLACGV( NRHS, B( K, 1 ), LDB )
+               CALL ZGEMV( 'Conjugate transpose', N-K, NRHS, -ONE,
+     $                     B( K+1, 1 ), LDB, A( K+1, K ), 1, ONE,
+     $                     B( K, 1 ), LDB )
+               CALL ZLACGV( NRHS, B( K, 1 ), LDB )
+            END IF
+*
+*           Interchange rows K and IPIV(K).
+*
+            KP = IPIV( K )
+            IF( KP.NE.K )
+     $         CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB )
+            K = K - 1
+         ELSE
+*
+*           2 x 2 diagonal block
+*
+*           Multiply by inv(L'(K-1)), where L(K-1) is the transformation
+*           stored in columns K-1 and K of A.
+*
+            IF( K.LT.N ) THEN
+               CALL ZLACGV( NRHS, B( K, 1 ), LDB )
+               CALL ZGEMV( 'Conjugate transpose', N-K, NRHS, -ONE,
+     $                     B( K+1, 1 ), LDB, A( K+1, K ), 1, ONE,
+     $                     B( K, 1 ), LDB )
+               CALL ZLACGV( NRHS, B( K, 1 ), LDB )
+*
+               CALL ZLACGV( NRHS, B( K-1, 1 ), LDB )
+               CALL ZGEMV( 'Conjugate transpose', N-K, NRHS, -ONE,
+     $                     B( K+1, 1 ), LDB, A( K+1, K-1 ), 1, ONE,
+     $                     B( K-1, 1 ), LDB )
+               CALL ZLACGV( NRHS, B( K-1, 1 ), LDB )
+            END IF
+*
+*           Interchange rows K and -IPIV(K).
+*
+            KP = -IPIV( K )
+            IF( KP.NE.K )
+     $         CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB )
+            K = K - 2
+         END IF
+*
+         GO TO 90
+  100    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of ZHETRS
+*
+      END
diff --git a/costa/native/external/lapack/zhgeqz.f b/costa/native/external/lapack/zhgeqz.f
new file mode 100644
index 000000000..1e29d9882
--- /dev/null
+++ b/costa/native/external/lapack/zhgeqz.f
@@ -0,0 +1,735 @@
+      SUBROUTINE ZHGEQZ( JOB, COMPQ, COMPZ, N, ILO, IHI, A, LDA, B, LDB,
+     $                   ALPHA, BETA, Q, LDQ, Z, LDZ, WORK, LWORK,
+     $                   RWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          COMPQ, COMPZ, JOB
+      INTEGER            IHI, ILO, INFO, LDA, LDB, LDQ, LDZ, LWORK, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   RWORK( * )
+      COMPLEX*16         A( LDA, * ), ALPHA( * ), B( LDB, * ),
+     $                   BETA( * ), Q( LDQ, * ), WORK( * ), Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZHGEQZ implements a single-shift version of the QZ
+*  method for finding the generalized eigenvalues w(i)=ALPHA(i)/BETA(i)
+*  of the equation
+*
+*       det( A - w(i) B ) = 0
+*
+*  If JOB='S', then the pair (A,B) is simultaneously
+*  reduced to Schur form (i.e., A and B are both upper triangular) by
+*  applying one unitary tranformation (usually called Q) on the left and
+*  another (usually called Z) on the right.  The diagonal elements of
+*  A are then ALPHA(1),...,ALPHA(N), and of B are BETA(1),...,BETA(N).
+*
+*  If JOB='S' and COMPQ and COMPZ are 'V' or 'I', then the unitary
+*  transformations used to reduce (A,B) are accumulated into the arrays
+*  Q and Z s.t.:
+*
+*       Q(in) A(in) Z(in)* = Q(out) A(out) Z(out)*
+*       Q(in) B(in) Z(in)* = Q(out) B(out) Z(out)*
+*
+*  Ref: C.B. Moler & G.W. Stewart, "An Algorithm for Generalized Matrix
+*       Eigenvalue Problems", SIAM J. Numer. Anal., 10(1973),
+*       pp. 241--256.
+*
+*  Arguments
+*  =========
+*
+*  JOB     (input) CHARACTER*1
+*          = 'E': compute only ALPHA and BETA.  A and B will not
+*                 necessarily be put into generalized Schur form.
+*          = 'S': put A and B into generalized Schur form, as well
+*                 as computing ALPHA and BETA.
+*
+*  COMPQ   (input) CHARACTER*1
+*          = 'N': do not modify Q.
+*          = 'V': multiply the array Q on the right by the conjugate
+*                 transpose of the unitary tranformation that is
+*                 applied to the left side of A and B to reduce them
+*                 to Schur form.
+*          = 'I': like COMPQ='V', except that Q will be initialized to
+*                 the identity first.
+*
+*  COMPZ   (input) CHARACTER*1
+*          = 'N': do not modify Z.
+*          = 'V': multiply the array Z on the right by the unitary
+*                 tranformation that is applied to the right side of
+*                 A and B to reduce them to Schur form.
+*          = 'I': like COMPZ='V', except that Z will be initialized to
+*                 the identity first.
+*
+*  N       (input) INTEGER
+*          The order of the matrices A, B, Q, and Z.  N >= 0.
+*
+*  ILO     (input) INTEGER
+*  IHI     (input) INTEGER
+*          It is assumed that A is already upper triangular in rows and
+*          columns 1:ILO-1 and IHI+1:N.
+*          1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA, N)
+*          On entry, the N-by-N upper Hessenberg matrix A.  Elements
+*          below the subdiagonal must be zero.
+*          If JOB='S', then on exit A and B will have been
+*             simultaneously reduced to upper triangular form.
+*          If JOB='E', then on exit A will have been destroyed.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max( 1, N ).
+*
+*  B       (input/output) COMPLEX*16 array, dimension (LDB, N)
+*          On entry, the N-by-N upper triangular matrix B.  Elements
+*          below the diagonal must be zero.
+*          If JOB='S', then on exit A and B will have been
+*             simultaneously reduced to upper triangular form.
+*          If JOB='E', then on exit B will have been destroyed.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max( 1, N ).
+*
+*  ALPHA   (output) COMPLEX*16 array, dimension (N)
+*          The diagonal elements of A when the pair (A,B) has been
+*          reduced to Schur form.  ALPHA(i)/BETA(i) i=1,...,N
+*          are the generalized eigenvalues.
+*
+*  BETA    (output) COMPLEX*16 array, dimension (N)
+*          The diagonal elements of B when the pair (A,B) has been
+*          reduced to Schur form.  ALPHA(i)/BETA(i) i=1,...,N
+*          are the generalized eigenvalues.  A and B are normalized
+*          so that BETA(1),...,BETA(N) are non-negative real numbers.
+*
+*  Q       (input/output) COMPLEX*16 array, dimension (LDQ, N)
+*          If COMPQ='N', then Q will not be referenced.
+*          If COMPQ='V' or 'I', then the conjugate transpose of the
+*             unitary transformations which are applied to A and B on
+*             the left will be applied to the array Q on the right.
+*
+*  LDQ     (input) INTEGER
+*          The leading dimension of the array Q.  LDQ >= 1.
+*          If COMPQ='V' or 'I', then LDQ >= N.
+*
+*  Z       (input/output) COMPLEX*16 array, dimension (LDZ, N)
+*          If COMPZ='N', then Z will not be referenced.
+*          If COMPZ='V' or 'I', then the unitary transformations which
+*             are applied to A and B on the right will be applied to the
+*             array Z on the right.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= 1.
+*          If COMPZ='V' or 'I', then LDZ >= N.
+*
+*  WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+*          On exit, if INFO >= 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.  LWORK >= max(1,N).
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  RWORK   (workspace) DOUBLE PRECISION array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*          = 1,...,N: the QZ iteration did not converge.  (A,B) is not
+*                     in Schur form, but ALPHA(i) and BETA(i),
+*                     i=INFO+1,...,N should be correct.
+*          = N+1,...,2*N: the shift calculation failed.  (A,B) is not
+*                     in Schur form, but ALPHA(i) and BETA(i),
+*                     i=INFO-N+1,...,N should be correct.
+*          > 2*N:     various "impossible" errors.
+*
+*  Further Details
+*  ===============
+*
+*  We assume that complex ABS works as long as its value is less than
+*  overflow.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX*16         CZERO, CONE
+      PARAMETER          ( CZERO = ( 0.0D+0, 0.0D+0 ),
+     $                   CONE = ( 1.0D+0, 0.0D+0 ) )
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+      DOUBLE PRECISION   HALF
+      PARAMETER          ( HALF = 0.5D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            ILAZR2, ILAZRO, ILQ, ILSCHR, ILZ, LQUERY
+      INTEGER            ICOMPQ, ICOMPZ, IFIRST, IFRSTM, IITER, ILAST,
+     $                   ILASTM, IN, ISCHUR, ISTART, J, JC, JCH, JITER,
+     $                   JR, MAXIT
+      DOUBLE PRECISION   ABSB, ANORM, ASCALE, ATOL, BNORM, BSCALE, BTOL,
+     $                   C, SAFMIN, TEMP, TEMP2, TEMPR, ULP
+      COMPLEX*16         ABI22, AD11, AD12, AD21, AD22, CTEMP, CTEMP2,
+     $                   CTEMP3, ESHIFT, RTDISC, S, SHIFT, SIGNBC, T,
+     $                   U12, X
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DLAMCH, ZLANHS
+      EXTERNAL           LSAME, DLAMCH, ZLANHS
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZLARTG, ZLASET, ZROT, ZSCAL
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, DCMPLX, DCONJG, DIMAG, MAX, MIN,
+     $                   SQRT
+*     ..
+*     .. Statement Functions ..
+      DOUBLE PRECISION   ABS1
+*     ..
+*     .. Statement Function definitions ..
+      ABS1( X ) = ABS( DBLE( X ) ) + ABS( DIMAG( X ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode JOB, COMPQ, COMPZ
+*
+      IF( LSAME( JOB, 'E' ) ) THEN
+         ILSCHR = .FALSE.
+         ISCHUR = 1
+      ELSE IF( LSAME( JOB, 'S' ) ) THEN
+         ILSCHR = .TRUE.
+         ISCHUR = 2
+      ELSE
+         ISCHUR = 0
+      END IF
+*
+      IF( LSAME( COMPQ, 'N' ) ) THEN
+         ILQ = .FALSE.
+         ICOMPQ = 1
+      ELSE IF( LSAME( COMPQ, 'V' ) ) THEN
+         ILQ = .TRUE.
+         ICOMPQ = 2
+      ELSE IF( LSAME( COMPQ, 'I' ) ) THEN
+         ILQ = .TRUE.
+         ICOMPQ = 3
+      ELSE
+         ICOMPQ = 0
+      END IF
+*
+      IF( LSAME( COMPZ, 'N' ) ) THEN
+         ILZ = .FALSE.
+         ICOMPZ = 1
+      ELSE IF( LSAME( COMPZ, 'V' ) ) THEN
+         ILZ = .TRUE.
+         ICOMPZ = 2
+      ELSE IF( LSAME( COMPZ, 'I' ) ) THEN
+         ILZ = .TRUE.
+         ICOMPZ = 3
+      ELSE
+         ICOMPZ = 0
+      END IF
+*
+*     Check Argument Values
+*
+      INFO = 0
+      WORK( 1 ) = MAX( 1, N )
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( ISCHUR.EQ.0 ) THEN
+         INFO = -1
+      ELSE IF( ICOMPQ.EQ.0 ) THEN
+         INFO = -2
+      ELSE IF( ICOMPZ.EQ.0 ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( ILO.LT.1 ) THEN
+         INFO = -5
+      ELSE IF( IHI.GT.N .OR. IHI.LT.ILO-1 ) THEN
+         INFO = -6
+      ELSE IF( LDA.LT.N ) THEN
+         INFO = -8
+      ELSE IF( LDB.LT.N ) THEN
+         INFO = -10
+      ELSE IF( LDQ.LT.1 .OR. ( ILQ .AND. LDQ.LT.N ) ) THEN
+         INFO = -14
+      ELSE IF( LDZ.LT.1 .OR. ( ILZ .AND. LDZ.LT.N ) ) THEN
+         INFO = -16
+      ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN
+         INFO = -18
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZHGEQZ', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+*     WORK( 1 ) = CMPLX( 1 )
+      IF( N.LE.0 ) THEN
+         WORK( 1 ) = DCMPLX( 1 )
+         RETURN
+      END IF
+*
+*     Initialize Q and Z
+*
+      IF( ICOMPQ.EQ.3 )
+     $   CALL ZLASET( 'Full', N, N, CZERO, CONE, Q, LDQ )
+      IF( ICOMPZ.EQ.3 )
+     $   CALL ZLASET( 'Full', N, N, CZERO, CONE, Z, LDZ )
+*
+*     Machine Constants
+*
+      IN = IHI + 1 - ILO
+      SAFMIN = DLAMCH( 'S' )
+      ULP = DLAMCH( 'E' )*DLAMCH( 'B' )
+      ANORM = ZLANHS( 'F', IN, A( ILO, ILO ), LDA, RWORK )
+      BNORM = ZLANHS( 'F', IN, B( ILO, ILO ), LDB, RWORK )
+      ATOL = MAX( SAFMIN, ULP*ANORM )
+      BTOL = MAX( SAFMIN, ULP*BNORM )
+      ASCALE = ONE / MAX( SAFMIN, ANORM )
+      BSCALE = ONE / MAX( SAFMIN, BNORM )
+*
+*
+*     Set Eigenvalues IHI+1:N
+*
+      DO 10 J = IHI + 1, N
+         ABSB = ABS( B( J, J ) )
+         IF( ABSB.GT.SAFMIN ) THEN
+            SIGNBC = DCONJG( B( J, J ) / ABSB )
+            B( J, J ) = ABSB
+            IF( ILSCHR ) THEN
+               CALL ZSCAL( J-1, SIGNBC, B( 1, J ), 1 )
+               CALL ZSCAL( J, SIGNBC, A( 1, J ), 1 )
+            ELSE
+               A( J, J ) = A( J, J )*SIGNBC
+            END IF
+            IF( ILZ )
+     $         CALL ZSCAL( N, SIGNBC, Z( 1, J ), 1 )
+         ELSE
+            B( J, J ) = CZERO
+         END IF
+         ALPHA( J ) = A( J, J )
+         BETA( J ) = B( J, J )
+   10 CONTINUE
+*
+*     If IHI < ILO, skip QZ steps
+*
+      IF( IHI.LT.ILO )
+     $   GO TO 190
+*
+*     MAIN QZ ITERATION LOOP
+*
+*     Initialize dynamic indices
+*
+*     Eigenvalues ILAST+1:N have been found.
+*        Column operations modify rows IFRSTM:whatever
+*        Row operations modify columns whatever:ILASTM
+*
+*     If only eigenvalues are being computed, then
+*        IFRSTM is the row of the last splitting row above row ILAST;
+*        this is always at least ILO.
+*     IITER counts iterations since the last eigenvalue was found,
+*        to tell when to use an extraordinary shift.
+*     MAXIT is the maximum number of QZ sweeps allowed.
+*
+      ILAST = IHI
+      IF( ILSCHR ) THEN
+         IFRSTM = 1
+         ILASTM = N
+      ELSE
+         IFRSTM = ILO
+         ILASTM = IHI
+      END IF
+      IITER = 0
+      ESHIFT = CZERO
+      MAXIT = 30*( IHI-ILO+1 )
+*
+      DO 170 JITER = 1, MAXIT
+*
+*        Check for too many iterations.
+*
+         IF( JITER.GT.MAXIT )
+     $      GO TO 180
+*
+*        Split the matrix if possible.
+*
+*        Two tests:
+*           1: A(j,j-1)=0  or  j=ILO
+*           2: B(j,j)=0
+*
+*        Special case: j=ILAST
+*
+         IF( ILAST.EQ.ILO ) THEN
+            GO TO 60
+         ELSE
+            IF( ABS1( A( ILAST, ILAST-1 ) ).LE.ATOL ) THEN
+               A( ILAST, ILAST-1 ) = CZERO
+               GO TO 60
+            END IF
+         END IF
+*
+         IF( ABS( B( ILAST, ILAST ) ).LE.BTOL ) THEN
+            B( ILAST, ILAST ) = CZERO
+            GO TO 50
+         END IF
+*
+*        General case: j<ILAST
+*
+         DO 40 J = ILAST - 1, ILO, -1
+*
+*           Test 1: for A(j,j-1)=0 or j=ILO
+*
+            IF( J.EQ.ILO ) THEN
+               ILAZRO = .TRUE.
+            ELSE
+               IF( ABS1( A( J, J-1 ) ).LE.ATOL ) THEN
+                  A( J, J-1 ) = CZERO
+                  ILAZRO = .TRUE.
+               ELSE
+                  ILAZRO = .FALSE.
+               END IF
+            END IF
+*
+*           Test 2: for B(j,j)=0
+*
+            IF( ABS( B( J, J ) ).LT.BTOL ) THEN
+               B( J, J ) = CZERO
+*
+*              Test 1a: Check for 2 consecutive small subdiagonals in A
+*
+               ILAZR2 = .FALSE.
+               IF( .NOT.ILAZRO ) THEN
+                  IF( ABS1( A( J, J-1 ) )*( ASCALE*ABS1( A( J+1,
+     $                J ) ) ).LE.ABS1( A( J, J ) )*( ASCALE*ATOL ) )
+     $                ILAZR2 = .TRUE.
+               END IF
+*
+*              If both tests pass (1 & 2), i.e., the leading diagonal
+*              element of B in the block is zero, split a 1x1 block off
+*              at the top. (I.e., at the J-th row/column) The leading
+*              diagonal element of the remainder can also be zero, so
+*              this may have to be done repeatedly.
+*
+               IF( ILAZRO .OR. ILAZR2 ) THEN
+                  DO 20 JCH = J, ILAST - 1
+                     CTEMP = A( JCH, JCH )
+                     CALL ZLARTG( CTEMP, A( JCH+1, JCH ), C, S,
+     $                            A( JCH, JCH ) )
+                     A( JCH+1, JCH ) = CZERO
+                     CALL ZROT( ILASTM-JCH, A( JCH, JCH+1 ), LDA,
+     $                          A( JCH+1, JCH+1 ), LDA, C, S )
+                     CALL ZROT( ILASTM-JCH, B( JCH, JCH+1 ), LDB,
+     $                          B( JCH+1, JCH+1 ), LDB, C, S )
+                     IF( ILQ )
+     $                  CALL ZROT( N, Q( 1, JCH ), 1, Q( 1, JCH+1 ), 1,
+     $                             C, DCONJG( S ) )
+                     IF( ILAZR2 )
+     $                  A( JCH, JCH-1 ) = A( JCH, JCH-1 )*C
+                     ILAZR2 = .FALSE.
+                     IF( ABS1( B( JCH+1, JCH+1 ) ).GE.BTOL ) THEN
+                        IF( JCH+1.GE.ILAST ) THEN
+                           GO TO 60
+                        ELSE
+                           IFIRST = JCH + 1
+                           GO TO 70
+                        END IF
+                     END IF
+                     B( JCH+1, JCH+1 ) = CZERO
+   20             CONTINUE
+                  GO TO 50
+               ELSE
+*
+*                 Only test 2 passed -- chase the zero to B(ILAST,ILAST)
+*                 Then process as in the case B(ILAST,ILAST)=0
+*
+                  DO 30 JCH = J, ILAST - 1
+                     CTEMP = B( JCH, JCH+1 )
+                     CALL ZLARTG( CTEMP, B( JCH+1, JCH+1 ), C, S,
+     $                            B( JCH, JCH+1 ) )
+                     B( JCH+1, JCH+1 ) = CZERO
+                     IF( JCH.LT.ILASTM-1 )
+     $                  CALL ZROT( ILASTM-JCH-1, B( JCH, JCH+2 ), LDB,
+     $                             B( JCH+1, JCH+2 ), LDB, C, S )
+                     CALL ZROT( ILASTM-JCH+2, A( JCH, JCH-1 ), LDA,
+     $                          A( JCH+1, JCH-1 ), LDA, C, S )
+                     IF( ILQ )
+     $                  CALL ZROT( N, Q( 1, JCH ), 1, Q( 1, JCH+1 ), 1,
+     $                             C, DCONJG( S ) )
+                     CTEMP = A( JCH+1, JCH )
+                     CALL ZLARTG( CTEMP, A( JCH+1, JCH-1 ), C, S,
+     $                            A( JCH+1, JCH ) )
+                     A( JCH+1, JCH-1 ) = CZERO
+                     CALL ZROT( JCH+1-IFRSTM, A( IFRSTM, JCH ), 1,
+     $                          A( IFRSTM, JCH-1 ), 1, C, S )
+                     CALL ZROT( JCH-IFRSTM, B( IFRSTM, JCH ), 1,
+     $                          B( IFRSTM, JCH-1 ), 1, C, S )
+                     IF( ILZ )
+     $                  CALL ZROT( N, Z( 1, JCH ), 1, Z( 1, JCH-1 ), 1,
+     $                             C, S )
+   30             CONTINUE
+                  GO TO 50
+               END IF
+            ELSE IF( ILAZRO ) THEN
+*
+*              Only test 1 passed -- work on J:ILAST
+*
+               IFIRST = J
+               GO TO 70
+            END IF
+*
+*           Neither test passed -- try next J
+*
+   40    CONTINUE
+*
+*        (Drop-through is "impossible")
+*
+         INFO = 2*N + 1
+         GO TO 210
+*
+*        B(ILAST,ILAST)=0 -- clear A(ILAST,ILAST-1) to split off a
+*        1x1 block.
+*
+   50    CONTINUE
+         CTEMP = A( ILAST, ILAST )
+         CALL ZLARTG( CTEMP, A( ILAST, ILAST-1 ), C, S,
+     $                A( ILAST, ILAST ) )
+         A( ILAST, ILAST-1 ) = CZERO
+         CALL ZROT( ILAST-IFRSTM, A( IFRSTM, ILAST ), 1,
+     $              A( IFRSTM, ILAST-1 ), 1, C, S )
+         CALL ZROT( ILAST-IFRSTM, B( IFRSTM, ILAST ), 1,
+     $              B( IFRSTM, ILAST-1 ), 1, C, S )
+         IF( ILZ )
+     $      CALL ZROT( N, Z( 1, ILAST ), 1, Z( 1, ILAST-1 ), 1, C, S )
+*
+*        A(ILAST,ILAST-1)=0 -- Standardize B, set ALPHA and BETA
+*
+   60    CONTINUE
+         ABSB = ABS( B( ILAST, ILAST ) )
+         IF( ABSB.GT.SAFMIN ) THEN
+            SIGNBC = DCONJG( B( ILAST, ILAST ) / ABSB )
+            B( ILAST, ILAST ) = ABSB
+            IF( ILSCHR ) THEN
+               CALL ZSCAL( ILAST-IFRSTM, SIGNBC, B( IFRSTM, ILAST ), 1 )
+               CALL ZSCAL( ILAST+1-IFRSTM, SIGNBC, A( IFRSTM, ILAST ),
+     $                     1 )
+            ELSE
+               A( ILAST, ILAST ) = A( ILAST, ILAST )*SIGNBC
+            END IF
+            IF( ILZ )
+     $         CALL ZSCAL( N, SIGNBC, Z( 1, ILAST ), 1 )
+         ELSE
+            B( ILAST, ILAST ) = CZERO
+         END IF
+         ALPHA( ILAST ) = A( ILAST, ILAST )
+         BETA( ILAST ) = B( ILAST, ILAST )
+*
+*        Go to next block -- exit if finished.
+*
+         ILAST = ILAST - 1
+         IF( ILAST.LT.ILO )
+     $      GO TO 190
+*
+*        Reset counters
+*
+         IITER = 0
+         ESHIFT = CZERO
+         IF( .NOT.ILSCHR ) THEN
+            ILASTM = ILAST
+            IF( IFRSTM.GT.ILAST )
+     $         IFRSTM = ILO
+         END IF
+         GO TO 160
+*
+*        QZ step
+*
+*        This iteration only involves rows/columns IFIRST:ILAST.  We
+*        assume IFIRST < ILAST, and that the diagonal of B is non-zero.
+*
+   70    CONTINUE
+         IITER = IITER + 1
+         IF( .NOT.ILSCHR ) THEN
+            IFRSTM = IFIRST
+         END IF
+*
+*        Compute the Shift.
+*
+*        At this point, IFIRST < ILAST, and the diagonal elements of
+*        B(IFIRST:ILAST,IFIRST,ILAST) are larger than BTOL (in
+*        magnitude)
+*
+         IF( ( IITER / 10 )*10.NE.IITER ) THEN
+*
+*           The Wilkinson shift (AEP p.512), i.e., the eigenvalue of
+*           the bottom-right 2x2 block of A inv(B) which is nearest to
+*           the bottom-right element.
+*
+*           We factor B as U*D, where U has unit diagonals, and
+*           compute (A*inv(D))*inv(U).
+*
+            U12 = ( BSCALE*B( ILAST-1, ILAST ) ) /
+     $            ( BSCALE*B( ILAST, ILAST ) )
+            AD11 = ( ASCALE*A( ILAST-1, ILAST-1 ) ) /
+     $             ( BSCALE*B( ILAST-1, ILAST-1 ) )
+            AD21 = ( ASCALE*A( ILAST, ILAST-1 ) ) /
+     $             ( BSCALE*B( ILAST-1, ILAST-1 ) )
+            AD12 = ( ASCALE*A( ILAST-1, ILAST ) ) /
+     $             ( BSCALE*B( ILAST, ILAST ) )
+            AD22 = ( ASCALE*A( ILAST, ILAST ) ) /
+     $             ( BSCALE*B( ILAST, ILAST ) )
+            ABI22 = AD22 - U12*AD21
+*
+            T = HALF*( AD11+ABI22 )
+            RTDISC = SQRT( T**2+AD12*AD21-AD11*AD22 )
+            TEMP = DBLE( T-ABI22 )*DBLE( RTDISC ) +
+     $             DIMAG( T-ABI22 )*DIMAG( RTDISC )
+            IF( TEMP.LE.ZERO ) THEN
+               SHIFT = T + RTDISC
+            ELSE
+               SHIFT = T - RTDISC
+            END IF
+         ELSE
+*
+*           Exceptional shift.  Chosen for no particularly good reason.
+*
+            ESHIFT = ESHIFT + DCONJG( ( ASCALE*A( ILAST-1, ILAST ) ) /
+     $               ( BSCALE*B( ILAST-1, ILAST-1 ) ) )
+            SHIFT = ESHIFT
+         END IF
+*
+*        Now check for two consecutive small subdiagonals.
+*
+         DO 80 J = ILAST - 1, IFIRST + 1, -1
+            ISTART = J
+            CTEMP = ASCALE*A( J, J ) - SHIFT*( BSCALE*B( J, J ) )
+            TEMP = ABS1( CTEMP )
+            TEMP2 = ASCALE*ABS1( A( J+1, J ) )
+            TEMPR = MAX( TEMP, TEMP2 )
+            IF( TEMPR.LT.ONE .AND. TEMPR.NE.ZERO ) THEN
+               TEMP = TEMP / TEMPR
+               TEMP2 = TEMP2 / TEMPR
+            END IF
+            IF( ABS1( A( J, J-1 ) )*TEMP2.LE.TEMP*ATOL )
+     $         GO TO 90
+   80    CONTINUE
+*
+         ISTART = IFIRST
+         CTEMP = ASCALE*A( IFIRST, IFIRST ) -
+     $           SHIFT*( BSCALE*B( IFIRST, IFIRST ) )
+   90    CONTINUE
+*
+*        Do an implicit-shift QZ sweep.
+*
+*        Initial Q
+*
+         CTEMP2 = ASCALE*A( ISTART+1, ISTART )
+         CALL ZLARTG( CTEMP, CTEMP2, C, S, CTEMP3 )
+*
+*        Sweep
+*
+         DO 150 J = ISTART, ILAST - 1
+            IF( J.GT.ISTART ) THEN
+               CTEMP = A( J, J-1 )
+               CALL ZLARTG( CTEMP, A( J+1, J-1 ), C, S, A( J, J-1 ) )
+               A( J+1, J-1 ) = CZERO
+            END IF
+*
+            DO 100 JC = J, ILASTM
+               CTEMP = C*A( J, JC ) + S*A( J+1, JC )
+               A( J+1, JC ) = -DCONJG( S )*A( J, JC ) + C*A( J+1, JC )
+               A( J, JC ) = CTEMP
+               CTEMP2 = C*B( J, JC ) + S*B( J+1, JC )
+               B( J+1, JC ) = -DCONJG( S )*B( J, JC ) + C*B( J+1, JC )
+               B( J, JC ) = CTEMP2
+  100       CONTINUE
+            IF( ILQ ) THEN
+               DO 110 JR = 1, N
+                  CTEMP = C*Q( JR, J ) + DCONJG( S )*Q( JR, J+1 )
+                  Q( JR, J+1 ) = -S*Q( JR, J ) + C*Q( JR, J+1 )
+                  Q( JR, J ) = CTEMP
+  110          CONTINUE
+            END IF
+*
+            CTEMP = B( J+1, J+1 )
+            CALL ZLARTG( CTEMP, B( J+1, J ), C, S, B( J+1, J+1 ) )
+            B( J+1, J ) = CZERO
+*
+            DO 120 JR = IFRSTM, MIN( J+2, ILAST )
+               CTEMP = C*A( JR, J+1 ) + S*A( JR, J )
+               A( JR, J ) = -DCONJG( S )*A( JR, J+1 ) + C*A( JR, J )
+               A( JR, J+1 ) = CTEMP
+  120       CONTINUE
+            DO 130 JR = IFRSTM, J
+               CTEMP = C*B( JR, J+1 ) + S*B( JR, J )
+               B( JR, J ) = -DCONJG( S )*B( JR, J+1 ) + C*B( JR, J )
+               B( JR, J+1 ) = CTEMP
+  130       CONTINUE
+            IF( ILZ ) THEN
+               DO 140 JR = 1, N
+                  CTEMP = C*Z( JR, J+1 ) + S*Z( JR, J )
+                  Z( JR, J ) = -DCONJG( S )*Z( JR, J+1 ) + C*Z( JR, J )
+                  Z( JR, J+1 ) = CTEMP
+  140          CONTINUE
+            END IF
+  150    CONTINUE
+*
+  160    CONTINUE
+*
+  170 CONTINUE
+*
+*     Drop-through = non-convergence
+*
+  180 CONTINUE
+      INFO = ILAST
+      GO TO 210
+*
+*     Successful completion of all QZ steps
+*
+  190 CONTINUE
+*
+*     Set Eigenvalues 1:ILO-1
+*
+      DO 200 J = 1, ILO - 1
+         ABSB = ABS( B( J, J ) )
+         IF( ABSB.GT.SAFMIN ) THEN
+            SIGNBC = DCONJG( B( J, J ) / ABSB )
+            B( J, J ) = ABSB
+            IF( ILSCHR ) THEN
+               CALL ZSCAL( J-1, SIGNBC, B( 1, J ), 1 )
+               CALL ZSCAL( J, SIGNBC, A( 1, J ), 1 )
+            ELSE
+               A( J, J ) = A( J, J )*SIGNBC
+            END IF
+            IF( ILZ )
+     $         CALL ZSCAL( N, SIGNBC, Z( 1, J ), 1 )
+         ELSE
+            B( J, J ) = CZERO
+         END IF
+         ALPHA( J ) = A( J, J )
+         BETA( J ) = B( J, J )
+  200 CONTINUE
+*
+*     Normal Termination
+*
+      INFO = 0
+*
+*     Exit (other than argument error) -- return optimal workspace size
+*
+  210 CONTINUE
+      WORK( 1 ) = DCMPLX( N )
+      RETURN
+*
+*     End of ZHGEQZ
+*
+      END
diff --git a/costa/native/external/lapack/zhpcon.f b/costa/native/external/lapack/zhpcon.f
new file mode 100644
index 000000000..028a9b10c
--- /dev/null
+++ b/costa/native/external/lapack/zhpcon.f
@@ -0,0 +1,155 @@
+      SUBROUTINE ZHPCON( UPLO, N, AP, IPIV, ANORM, RCOND, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, N
+      DOUBLE PRECISION   ANORM, RCOND
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      COMPLEX*16         AP( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZHPCON estimates the reciprocal of the condition number of a complex
+*  Hermitian packed matrix A using the factorization A = U*D*U**H or
+*  A = L*D*L**H computed by ZHPTRF.
+*
+*  An estimate is obtained for norm(inv(A)), and the reciprocal of the
+*  condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))).
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the details of the factorization are stored
+*          as an upper or lower triangular matrix.
+*          = 'U':  Upper triangular, form is A = U*D*U**H;
+*          = 'L':  Lower triangular, form is A = L*D*L**H.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  AP      (input) COMPLEX*16 array, dimension (N*(N+1)/2)
+*          The block diagonal matrix D and the multipliers used to
+*          obtain the factor U or L as computed by ZHPTRF, stored as a
+*          packed triangular matrix.
+*
+*  IPIV    (input) INTEGER array, dimension (N)
+*          Details of the interchanges and the block structure of D
+*          as determined by ZHPTRF.
+*
+*  ANORM   (input) DOUBLE PRECISION
+*          The 1-norm of the original matrix A.
+*
+*  RCOND   (output) DOUBLE PRECISION
+*          The reciprocal of the condition number of the matrix A,
+*          computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an
+*          estimate of the 1-norm of inv(A) computed in this routine.
+*
+*  WORK    (workspace) COMPLEX*16 array, dimension (2*N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            I, IP, KASE
+      DOUBLE PRECISION   AINVNM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZHPTRS, ZLACON
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( ANORM.LT.ZERO ) THEN
+         INFO = -5
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZHPCON', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      RCOND = ZERO
+      IF( N.EQ.0 ) THEN
+         RCOND = ONE
+         RETURN
+      ELSE IF( ANORM.LE.ZERO ) THEN
+         RETURN
+      END IF
+*
+*     Check that the diagonal matrix D is nonsingular.
+*
+      IF( UPPER ) THEN
+*
+*        Upper triangular storage: examine D from bottom to top
+*
+         IP = N*( N+1 ) / 2
+         DO 10 I = N, 1, -1
+            IF( IPIV( I ).GT.0 .AND. AP( IP ).EQ.ZERO )
+     $         RETURN
+            IP = IP - I
+   10    CONTINUE
+      ELSE
+*
+*        Lower triangular storage: examine D from top to bottom.
+*
+         IP = 1
+         DO 20 I = 1, N
+            IF( IPIV( I ).GT.0 .AND. AP( IP ).EQ.ZERO )
+     $         RETURN
+            IP = IP + N - I + 1
+   20    CONTINUE
+      END IF
+*
+*     Estimate the 1-norm of the inverse.
+*
+      KASE = 0
+   30 CONTINUE
+      CALL ZLACON( N, WORK( N+1 ), WORK, AINVNM, KASE )
+      IF( KASE.NE.0 ) THEN
+*
+*        Multiply by inv(L*D*L') or inv(U*D*U').
+*
+         CALL ZHPTRS( UPLO, N, 1, AP, IPIV, WORK, N, INFO )
+         GO TO 30
+      END IF
+*
+*     Compute the estimate of the reciprocal condition number.
+*
+      IF( AINVNM.NE.ZERO )
+     $   RCOND = ( ONE / AINVNM ) / ANORM
+*
+      RETURN
+*
+*     End of ZHPCON
+*
+      END
diff --git a/costa/native/external/lapack/zhpev.f b/costa/native/external/lapack/zhpev.f
new file mode 100644
index 000000000..110af19ef
--- /dev/null
+++ b/costa/native/external/lapack/zhpev.f
@@ -0,0 +1,197 @@
+      SUBROUTINE ZHPEV( JOBZ, UPLO, N, AP, W, Z, LDZ, WORK, RWORK,
+     $                  INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBZ, UPLO
+      INTEGER            INFO, LDZ, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   RWORK( * ), W( * )
+      COMPLEX*16         AP( * ), WORK( * ), Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZHPEV computes all the eigenvalues and, optionally, eigenvectors of a
+*  complex Hermitian matrix in packed storage.
+*
+*  Arguments
+*  =========
+*
+*  JOBZ    (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only;
+*          = 'V':  Compute eigenvalues and eigenvectors.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  AP      (input/output) COMPLEX*16 array, dimension (N*(N+1)/2)
+*          On entry, the upper or lower triangle of the Hermitian matrix
+*          A, packed columnwise in a linear array.  The j-th column of A
+*          is stored in the array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n.
+*
+*          On exit, AP is overwritten by values generated during the
+*          reduction to tridiagonal form.  If UPLO = 'U', the diagonal
+*          and first superdiagonal of the tridiagonal matrix T overwrite
+*          the corresponding elements of A, and if UPLO = 'L', the
+*          diagonal and first subdiagonal of T overwrite the
+*          corresponding elements of A.
+*
+*  W       (output) DOUBLE PRECISION array, dimension (N)
+*          If INFO = 0, the eigenvalues in ascending order.
+*
+*  Z       (output) COMPLEX*16 array, dimension (LDZ, N)
+*          If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal
+*          eigenvectors of the matrix A, with the i-th column of Z
+*          holding the eigenvector associated with W(i).
+*          If JOBZ = 'N', then Z is not referenced.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= 1, and if
+*          JOBZ = 'V', LDZ >= max(1,N).
+*
+*  WORK    (workspace) COMPLEX*16 array, dimension (max(1, 2*N-1))
+*
+*  RWORK   (workspace) DOUBLE PRECISION array, dimension (max(1, 3*N-2))
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          > 0:  if INFO = i, the algorithm failed to converge; i
+*                off-diagonal elements of an intermediate tridiagonal
+*                form did not converge to zero.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            WANTZ
+      INTEGER            IINFO, IMAX, INDE, INDRWK, INDTAU, INDWRK,
+     $                   ISCALE
+      DOUBLE PRECISION   ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA,
+     $                   SMLNUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DLAMCH, ZLANHP
+      EXTERNAL           LSAME, DLAMCH, ZLANHP
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DSCAL, DSTERF, XERBLA, ZDSCAL, ZHPTRD, ZSTEQR,
+     $                   ZUPGTR
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      WANTZ = LSAME( JOBZ, 'V' )
+*
+      INFO = 0
+      IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( LSAME( UPLO, 'L' ) .OR. LSAME( UPLO, 'U' ) ) )
+     $          THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN
+         INFO = -7
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZHPEV ', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      IF( N.EQ.1 ) THEN
+         W( 1 ) = AP( 1 )
+         RWORK( 1 ) = 1
+         IF( WANTZ )
+     $      Z( 1, 1 ) = ONE
+         RETURN
+      END IF
+*
+*     Get machine constants.
+*
+      SAFMIN = DLAMCH( 'Safe minimum' )
+      EPS = DLAMCH( 'Precision' )
+      SMLNUM = SAFMIN / EPS
+      BIGNUM = ONE / SMLNUM
+      RMIN = SQRT( SMLNUM )
+      RMAX = SQRT( BIGNUM )
+*
+*     Scale matrix to allowable range, if necessary.
+*
+      ANRM = ZLANHP( 'M', UPLO, N, AP, RWORK )
+      ISCALE = 0
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN
+         ISCALE = 1
+         SIGMA = RMIN / ANRM
+      ELSE IF( ANRM.GT.RMAX ) THEN
+         ISCALE = 1
+         SIGMA = RMAX / ANRM
+      END IF
+      IF( ISCALE.EQ.1 ) THEN
+         CALL ZDSCAL( ( N*( N+1 ) ) / 2, SIGMA, AP, 1 )
+      END IF
+*
+*     Call ZHPTRD to reduce Hermitian packed matrix to tridiagonal form.
+*
+      INDE = 1
+      INDTAU = 1
+      CALL ZHPTRD( UPLO, N, AP, W, RWORK( INDE ), WORK( INDTAU ),
+     $             IINFO )
+*
+*     For eigenvalues only, call DSTERF.  For eigenvectors, first call
+*     ZUPGTR to generate the orthogonal matrix, then call ZSTEQR.
+*
+      IF( .NOT.WANTZ ) THEN
+         CALL DSTERF( N, W, RWORK( INDE ), INFO )
+      ELSE
+         INDWRK = INDTAU + N
+         CALL ZUPGTR( UPLO, N, AP, WORK( INDTAU ), Z, LDZ,
+     $                WORK( INDWRK ), IINFO )
+         INDRWK = INDE + N
+         CALL ZSTEQR( JOBZ, N, W, RWORK( INDE ), Z, LDZ,
+     $                RWORK( INDRWK ), INFO )
+      END IF
+*
+*     If matrix was scaled, then rescale eigenvalues appropriately.
+*
+      IF( ISCALE.EQ.1 ) THEN
+         IF( INFO.EQ.0 ) THEN
+            IMAX = N
+         ELSE
+            IMAX = INFO - 1
+         END IF
+         CALL DSCAL( IMAX, ONE / SIGMA, W, 1 )
+      END IF
+*
+      RETURN
+*
+*     End of ZHPEV
+*
+      END
diff --git a/costa/native/external/lapack/zhpevd.f b/costa/native/external/lapack/zhpevd.f
new file mode 100644
index 000000000..e4dbf766e
--- /dev/null
+++ b/costa/native/external/lapack/zhpevd.f
@@ -0,0 +1,280 @@
+      SUBROUTINE ZHPEVD( JOBZ, UPLO, N, AP, W, Z, LDZ, WORK, LWORK,
+     $                   RWORK, LRWORK, IWORK, LIWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBZ, UPLO
+      INTEGER            INFO, LDZ, LIWORK, LRWORK, LWORK, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      DOUBLE PRECISION   RWORK( * ), W( * )
+      COMPLEX*16         AP( * ), WORK( * ), Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZHPEVD computes all the eigenvalues and, optionally, eigenvectors of
+*  a complex Hermitian matrix A in packed storage.  If eigenvectors are
+*  desired, it uses a divide and conquer algorithm.
+*
+*  The divide and conquer algorithm makes very mild assumptions about
+*  floating point arithmetic. It will work on machines with a guard
+*  digit in add/subtract, or on those binary machines without guard
+*  digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
+*  Cray-2. It could conceivably fail on hexadecimal or decimal machines
+*  without guard digits, but we know of none.
+*
+*  Arguments
+*  =========
+*
+*  JOBZ    (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only;
+*          = 'V':  Compute eigenvalues and eigenvectors.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  AP      (input/output) COMPLEX*16 array, dimension (N*(N+1)/2)
+*          On entry, the upper or lower triangle of the Hermitian matrix
+*          A, packed columnwise in a linear array.  The j-th column of A
+*          is stored in the array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n.
+*
+*          On exit, AP is overwritten by values generated during the
+*          reduction to tridiagonal form.  If UPLO = 'U', the diagonal
+*          and first superdiagonal of the tridiagonal matrix T overwrite
+*          the corresponding elements of A, and if UPLO = 'L', the
+*          diagonal and first subdiagonal of T overwrite the
+*          corresponding elements of A.
+*
+*  W       (output) DOUBLE PRECISION array, dimension (N)
+*          If INFO = 0, the eigenvalues in ascending order.
+*
+*  Z       (output) COMPLEX*16 array, dimension (LDZ, N)
+*          If JOBZ = 'V', then if INFO = 0, Z contains the orthonormal
+*          eigenvectors of the matrix A, with the i-th column of Z
+*          holding the eigenvector associated with W(i).
+*          If JOBZ = 'N', then Z is not referenced.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= 1, and if
+*          JOBZ = 'V', LDZ >= max(1,N).
+*
+*  WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of array WORK.
+*          If N <= 1,               LWORK must be at least 1.
+*          If JOBZ = 'N' and N > 1, LWORK must be at least N.
+*          If JOBZ = 'V' and N > 1, LWORK must be at least 2*N.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  RWORK   (workspace/output) DOUBLE PRECISION array,
+*                                         dimension (LRWORK)
+*          On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK.
+*
+*  LRWORK  (input) INTEGER
+*          The dimension of array RWORK.
+*          If N <= 1,               LRWORK must be at least 1.
+*          If JOBZ = 'N' and N > 1, LRWORK must be at least N.
+*          If JOBZ = 'V' and N > 1, LRWORK must be at least
+*                    1 + 5*N + 2*N**2.
+*
+*          If LRWORK = -1, then a workspace query is assumed; the
+*          routine only calculates the optimal size of the RWORK array,
+*          returns this value as the first entry of the RWORK array, and
+*          no error message related to LRWORK is issued by XERBLA.
+*
+*  IWORK   (workspace/output) INTEGER array, dimension (LIWORK)
+*          On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK.
+*
+*  LIWORK  (input) INTEGER
+*          The dimension of array IWORK.
+*          If JOBZ  = 'N' or N <= 1, LIWORK must be at least 1.
+*          If JOBZ  = 'V' and N > 1, LIWORK must be at least 3 + 5*N.
+*
+*          If LIWORK = -1, then a workspace query is assumed; the
+*          routine only calculates the optimal size of the IWORK array,
+*          returns this value as the first entry of the IWORK array, and
+*          no error message related to LIWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          > 0:  if INFO = i, the algorithm failed to converge; i
+*                off-diagonal elements of an intermediate tridiagonal
+*                form did not converge to zero.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+      COMPLEX*16         CONE
+      PARAMETER          ( CONE = ( 1.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY, WANTZ
+      INTEGER            IINFO, IMAX, INDE, INDRWK, INDTAU, INDWRK,
+     $                   ISCALE, LIWMIN, LLRWK, LLWRK, LRWMIN, LWMIN
+      DOUBLE PRECISION   ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN, SIGMA,
+     $                   SMLNUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DLAMCH, ZLANHP
+      EXTERNAL           LSAME, DLAMCH, ZLANHP
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DSCAL, DSTERF, XERBLA, ZDSCAL, ZHPTRD, ZSTEDC,
+     $                   ZUPMTR
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      WANTZ = LSAME( JOBZ, 'V' )
+      LQUERY = ( LWORK.EQ.-1 .OR. LRWORK.EQ.-1 .OR. LIWORK.EQ.-1 )
+*
+      INFO = 0
+      IF( N.LE.1 ) THEN
+         LWMIN = 1
+         LIWMIN = 1
+         LRWMIN = 1
+      ELSE
+         IF( WANTZ ) THEN
+            LWMIN = 2*N
+            LRWMIN = 1 + 5*N + 2*N**2
+            LIWMIN = 3 + 5*N
+         ELSE
+            LWMIN = N
+            LRWMIN = N
+            LIWMIN = 1
+         END IF
+      END IF
+      IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( LSAME( UPLO, 'L' ) .OR. LSAME( UPLO, 'U' ) ) )
+     $          THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN
+         INFO = -7
+      ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -9
+      ELSE IF( LRWORK.LT.LRWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -11
+      ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -13
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         WORK( 1 ) = LWMIN
+         RWORK( 1 ) = LRWMIN
+         IWORK( 1 ) = LIWMIN
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZHPEVD', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      IF( N.EQ.1 ) THEN
+         W( 1 ) = AP( 1 )
+         IF( WANTZ )
+     $      Z( 1, 1 ) = CONE
+         RETURN
+      END IF
+*
+*     Get machine constants.
+*
+      SAFMIN = DLAMCH( 'Safe minimum' )
+      EPS = DLAMCH( 'Precision' )
+      SMLNUM = SAFMIN / EPS
+      BIGNUM = ONE / SMLNUM
+      RMIN = SQRT( SMLNUM )
+      RMAX = SQRT( BIGNUM )
+*
+*     Scale matrix to allowable range, if necessary.
+*
+      ANRM = ZLANHP( 'M', UPLO, N, AP, RWORK )
+      ISCALE = 0
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN
+         ISCALE = 1
+         SIGMA = RMIN / ANRM
+      ELSE IF( ANRM.GT.RMAX ) THEN
+         ISCALE = 1
+         SIGMA = RMAX / ANRM
+      END IF
+      IF( ISCALE.EQ.1 ) THEN
+         CALL ZDSCAL( ( N*( N+1 ) ) / 2, SIGMA, AP, 1 )
+      END IF
+*
+*     Call ZHPTRD to reduce Hermitian packed matrix to tridiagonal form.
+*
+      INDE = 1
+      INDTAU = 1
+      INDRWK = INDE + N
+      INDWRK = INDTAU + N
+      LLWRK = LWORK - INDWRK + 1
+      LLRWK = LRWORK - INDRWK + 1
+      CALL ZHPTRD( UPLO, N, AP, W, RWORK( INDE ), WORK( INDTAU ),
+     $             IINFO )
+*
+*     For eigenvalues only, call DSTERF.  For eigenvectors, first call
+*     ZUPGTR to generate the orthogonal matrix, then call ZSTEDC.
+*
+      IF( .NOT.WANTZ ) THEN
+         CALL DSTERF( N, W, RWORK( INDE ), INFO )
+      ELSE
+         CALL ZSTEDC( 'I', N, W, RWORK( INDE ), Z, LDZ, WORK( INDWRK ),
+     $                LLWRK, RWORK( INDRWK ), LLRWK, IWORK, LIWORK,
+     $                INFO )
+         CALL ZUPMTR( 'L', UPLO, 'N', N, N, AP, WORK( INDTAU ), Z, LDZ,
+     $                WORK( INDWRK ), IINFO )
+      END IF
+*
+*     If matrix was scaled, then rescale eigenvalues appropriately.
+*
+      IF( ISCALE.EQ.1 ) THEN
+         IF( INFO.EQ.0 ) THEN
+            IMAX = N
+         ELSE
+            IMAX = INFO - 1
+         END IF
+         CALL DSCAL( IMAX, ONE / SIGMA, W, 1 )
+      END IF
+*
+      WORK( 1 ) = LWMIN
+      RWORK( 1 ) = LRWMIN
+      IWORK( 1 ) = LIWMIN
+      RETURN
+*
+*     End of ZHPEVD
+*
+      END
diff --git a/costa/native/external/lapack/zhpevx.f b/costa/native/external/lapack/zhpevx.f
new file mode 100644
index 000000000..ffda9fb7a
--- /dev/null
+++ b/costa/native/external/lapack/zhpevx.f
@@ -0,0 +1,384 @@
+      SUBROUTINE ZHPEVX( JOBZ, RANGE, UPLO, N, AP, VL, VU, IL, IU,
+     $                   ABSTOL, M, W, Z, LDZ, WORK, RWORK, IWORK,
+     $                   IFAIL, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBZ, RANGE, UPLO
+      INTEGER            IL, INFO, IU, LDZ, M, N
+      DOUBLE PRECISION   ABSTOL, VL, VU
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IFAIL( * ), IWORK( * )
+      DOUBLE PRECISION   RWORK( * ), W( * )
+      COMPLEX*16         AP( * ), WORK( * ), Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZHPEVX computes selected eigenvalues and, optionally, eigenvectors
+*  of a complex Hermitian matrix A in packed storage.
+*  Eigenvalues/vectors can be selected by specifying either a range of
+*  values or a range of indices for the desired eigenvalues.
+*
+*  Arguments
+*  =========
+*
+*  JOBZ    (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only;
+*          = 'V':  Compute eigenvalues and eigenvectors.
+*
+*  RANGE   (input) CHARACTER*1
+*          = 'A': all eigenvalues will be found;
+*          = 'V': all eigenvalues in the half-open interval (VL,VU]
+*                 will be found;
+*          = 'I': the IL-th through IU-th eigenvalues will be found.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  AP      (input/output) COMPLEX*16 array, dimension (N*(N+1)/2)
+*          On entry, the upper or lower triangle of the Hermitian matrix
+*          A, packed columnwise in a linear array.  The j-th column of A
+*          is stored in the array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n.
+*
+*          On exit, AP is overwritten by values generated during the
+*          reduction to tridiagonal form.  If UPLO = 'U', the diagonal
+*          and first superdiagonal of the tridiagonal matrix T overwrite
+*          the corresponding elements of A, and if UPLO = 'L', the
+*          diagonal and first subdiagonal of T overwrite the
+*          corresponding elements of A.
+*
+*  VL      (input) DOUBLE PRECISION
+*  VU      (input) DOUBLE PRECISION
+*          If RANGE='V', the lower and upper bounds of the interval to
+*          be searched for eigenvalues. VL < VU.
+*          Not referenced if RANGE = 'A' or 'I'.
+*
+*  IL      (input) INTEGER
+*  IU      (input) INTEGER
+*          If RANGE='I', the indices (in ascending order) of the
+*          smallest and largest eigenvalues to be returned.
+*          1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0.
+*          Not referenced if RANGE = 'A' or 'V'.
+*
+*  ABSTOL  (input) DOUBLE PRECISION
+*          The absolute error tolerance for the eigenvalues.
+*          An approximate eigenvalue is accepted as converged
+*          when it is determined to lie in an interval [a,b]
+*          of width less than or equal to
+*
+*                  ABSTOL + EPS *   max( |a|,|b| ) ,
+*
+*          where EPS is the machine precision.  If ABSTOL is less than
+*          or equal to zero, then  EPS*|T|  will be used in its place,
+*          where |T| is the 1-norm of the tridiagonal matrix obtained
+*          by reducing AP to tridiagonal form.
+*
+*          Eigenvalues will be computed most accurately when ABSTOL is
+*          set to twice the underflow threshold 2*DLAMCH('S'), not zero.
+*          If this routine returns with INFO>0, indicating that some
+*          eigenvectors did not converge, try setting ABSTOL to
+*          2*DLAMCH('S').
+*
+*          See "Computing Small Singular Values of Bidiagonal Matrices
+*          with Guaranteed High Relative Accuracy," by Demmel and
+*          Kahan, LAPACK Working Note #3.
+*
+*  M       (output) INTEGER
+*          The total number of eigenvalues found.  0 <= M <= N.
+*          If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1.
+*
+*  W       (output) DOUBLE PRECISION array, dimension (N)
+*          If INFO = 0, the selected eigenvalues in ascending order.
+*
+*  Z       (output) COMPLEX*16 array, dimension (LDZ, max(1,M))
+*          If JOBZ = 'V', then if INFO = 0, the first M columns of Z
+*          contain the orthonormal eigenvectors of the matrix A
+*          corresponding to the selected eigenvalues, with the i-th
+*          column of Z holding the eigenvector associated with W(i).
+*          If an eigenvector fails to converge, then that column of Z
+*          contains the latest approximation to the eigenvector, and
+*          the index of the eigenvector is returned in IFAIL.
+*          If JOBZ = 'N', then Z is not referenced.
+*          Note: the user must ensure that at least max(1,M) columns are
+*          supplied in the array Z; if RANGE = 'V', the exact value of M
+*          is not known in advance and an upper bound must be used.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= 1, and if
+*          JOBZ = 'V', LDZ >= max(1,N).
+*
+*  WORK    (workspace) COMPLEX*16 array, dimension (2*N)
+*
+*  RWORK   (workspace) DOUBLE PRECISION array, dimension (7*N)
+*
+*  IWORK   (workspace) INTEGER array, dimension (5*N)
+*
+*  IFAIL   (output) INTEGER array, dimension (N)
+*          If JOBZ = 'V', then if INFO = 0, the first M elements of
+*          IFAIL are zero.  If INFO > 0, then IFAIL contains the
+*          indices of the eigenvectors that failed to converge.
+*          If JOBZ = 'N', then IFAIL is not referenced.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, then i eigenvectors failed to converge.
+*                Their indices are stored in array IFAIL.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0 )
+      COMPLEX*16         CONE
+      PARAMETER          ( CONE = ( 1.0D0, 0.0D0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            ALLEIG, INDEIG, VALEIG, WANTZ
+      CHARACTER          ORDER
+      INTEGER            I, IINFO, IMAX, INDD, INDE, INDEE, INDIBL,
+     $                   INDISP, INDIWK, INDRWK, INDTAU, INDWRK, ISCALE,
+     $                   ITMP1, J, JJ, NSPLIT
+      DOUBLE PRECISION   ABSTLL, ANRM, BIGNUM, EPS, RMAX, RMIN, SAFMIN,
+     $                   SIGMA, SMLNUM, TMP1, VLL, VUU
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DLAMCH, ZLANHP
+      EXTERNAL           LSAME, DLAMCH, ZLANHP
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DCOPY, DSCAL, DSTEBZ, DSTERF, XERBLA, ZDSCAL,
+     $                   ZHPTRD, ZSTEIN, ZSTEQR, ZSWAP, ZUPGTR, ZUPMTR
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          DBLE, MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      WANTZ = LSAME( JOBZ, 'V' )
+      ALLEIG = LSAME( RANGE, 'A' )
+      VALEIG = LSAME( RANGE, 'V' )
+      INDEIG = LSAME( RANGE, 'I' )
+*
+      INFO = 0
+      IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.( LSAME( UPLO, 'L' ) .OR. LSAME( UPLO, 'U' ) ) )
+     $          THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE
+         IF( VALEIG ) THEN
+            IF( N.GT.0 .AND. VU.LE.VL )
+     $         INFO = -7
+         ELSE IF( INDEIG ) THEN
+            IF( IL.LT.1 .OR. IL.GT.MAX( 1, N ) ) THEN
+               INFO = -8
+            ELSE IF( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) THEN
+               INFO = -9
+            END IF
+         END IF
+      END IF
+      IF( INFO.EQ.0 ) THEN
+         IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) )
+     $      INFO = -14
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZHPEVX', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      M = 0
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      IF( N.EQ.1 ) THEN
+         IF( ALLEIG .OR. INDEIG ) THEN
+            M = 1
+            W( 1 ) = AP( 1 )
+         ELSE
+            IF( VL.LT.DBLE( AP( 1 ) ) .AND. VU.GE.DBLE( AP( 1 ) ) ) THEN
+               M = 1
+               W( 1 ) = AP( 1 )
+            END IF
+         END IF
+         IF( WANTZ )
+     $      Z( 1, 1 ) = CONE
+         RETURN
+      END IF
+*
+*     Get machine constants.
+*
+      SAFMIN = DLAMCH( 'Safe minimum' )
+      EPS = DLAMCH( 'Precision' )
+      SMLNUM = SAFMIN / EPS
+      BIGNUM = ONE / SMLNUM
+      RMIN = SQRT( SMLNUM )
+      RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) )
+*
+*     Scale matrix to allowable range, if necessary.
+*
+      ISCALE = 0
+      ABSTLL = ABSTOL
+      IF( VALEIG ) THEN
+         VLL = VL
+         VUU = VU
+      ELSE
+         VLL = ZERO
+         VUU = ZERO
+      END IF
+      ANRM = ZLANHP( 'M', UPLO, N, AP, RWORK )
+      IF( ANRM.GT.ZERO .AND. ANRM.LT.RMIN ) THEN
+         ISCALE = 1
+         SIGMA = RMIN / ANRM
+      ELSE IF( ANRM.GT.RMAX ) THEN
+         ISCALE = 1
+         SIGMA = RMAX / ANRM
+      END IF
+      IF( ISCALE.EQ.1 ) THEN
+         CALL ZDSCAL( ( N*( N+1 ) ) / 2, SIGMA, AP, 1 )
+         IF( ABSTOL.GT.0 )
+     $      ABSTLL = ABSTOL*SIGMA
+         IF( VALEIG ) THEN
+            VLL = VL*SIGMA
+            VUU = VU*SIGMA
+         END IF
+      END IF
+*
+*     Call ZHPTRD to reduce Hermitian packed matrix to tridiagonal form.
+*
+      INDD = 1
+      INDE = INDD + N
+      INDRWK = INDE + N
+      INDTAU = 1
+      INDWRK = INDTAU + N
+      CALL ZHPTRD( UPLO, N, AP, RWORK( INDD ), RWORK( INDE ),
+     $             WORK( INDTAU ), IINFO )
+*
+*     If all eigenvalues are desired and ABSTOL is less than or equal
+*     to zero, then call DSTERF or ZUPGTR and ZSTEQR.  If this fails
+*     for some eigenvalue, then try DSTEBZ.
+*
+      IF( ( ALLEIG .OR. ( INDEIG .AND. IL.EQ.1 .AND. IU.EQ.N ) ) .AND.
+     $    ( ABSTOL.LE.ZERO ) ) THEN
+         CALL DCOPY( N, RWORK( INDD ), 1, W, 1 )
+         INDEE = INDRWK + 2*N
+         IF( .NOT.WANTZ ) THEN
+            CALL DCOPY( N-1, RWORK( INDE ), 1, RWORK( INDEE ), 1 )
+            CALL DSTERF( N, W, RWORK( INDEE ), INFO )
+         ELSE
+            CALL ZUPGTR( UPLO, N, AP, WORK( INDTAU ), Z, LDZ,
+     $                   WORK( INDWRK ), IINFO )
+            CALL DCOPY( N-1, RWORK( INDE ), 1, RWORK( INDEE ), 1 )
+            CALL ZSTEQR( JOBZ, N, W, RWORK( INDEE ), Z, LDZ,
+     $                   RWORK( INDRWK ), INFO )
+            IF( INFO.EQ.0 ) THEN
+               DO 10 I = 1, N
+                  IFAIL( I ) = 0
+   10          CONTINUE
+            END IF
+         END IF
+         IF( INFO.EQ.0 ) THEN
+            M = N
+            GO TO 20
+         END IF
+         INFO = 0
+      END IF
+*
+*     Otherwise, call DSTEBZ and, if eigenvectors are desired, ZSTEIN.
+*
+      IF( WANTZ ) THEN
+         ORDER = 'B'
+      ELSE
+         ORDER = 'E'
+      END IF
+      INDIBL = 1
+      INDISP = INDIBL + N
+      INDIWK = INDISP + N
+      CALL DSTEBZ( RANGE, ORDER, N, VLL, VUU, IL, IU, ABSTLL,
+     $             RWORK( INDD ), RWORK( INDE ), M, NSPLIT, W,
+     $             IWORK( INDIBL ), IWORK( INDISP ), RWORK( INDRWK ),
+     $             IWORK( INDIWK ), INFO )
+*
+      IF( WANTZ ) THEN
+         CALL ZSTEIN( N, RWORK( INDD ), RWORK( INDE ), M, W,
+     $                IWORK( INDIBL ), IWORK( INDISP ), Z, LDZ,
+     $                RWORK( INDRWK ), IWORK( INDIWK ), IFAIL, INFO )
+*
+*        Apply unitary matrix used in reduction to tridiagonal
+*        form to eigenvectors returned by ZSTEIN.
+*
+         INDWRK = INDTAU + N
+         CALL ZUPMTR( 'L', UPLO, 'N', N, M, AP, WORK( INDTAU ), Z, LDZ,
+     $                WORK( INDWRK ), INFO )
+      END IF
+*
+*     If matrix was scaled, then rescale eigenvalues appropriately.
+*
+   20 CONTINUE
+      IF( ISCALE.EQ.1 ) THEN
+         IF( INFO.EQ.0 ) THEN
+            IMAX = M
+         ELSE
+            IMAX = INFO - 1
+         END IF
+         CALL DSCAL( IMAX, ONE / SIGMA, W, 1 )
+      END IF
+*
+*     If eigenvalues are not in order, then sort them, along with
+*     eigenvectors.
+*
+      IF( WANTZ ) THEN
+         DO 40 J = 1, M - 1
+            I = 0
+            TMP1 = W( J )
+            DO 30 JJ = J + 1, M
+               IF( W( JJ ).LT.TMP1 ) THEN
+                  I = JJ
+                  TMP1 = W( JJ )
+               END IF
+   30       CONTINUE
+*
+            IF( I.NE.0 ) THEN
+               ITMP1 = IWORK( INDIBL+I-1 )
+               W( I ) = W( J )
+               IWORK( INDIBL+I-1 ) = IWORK( INDIBL+J-1 )
+               W( J ) = TMP1
+               IWORK( INDIBL+J-1 ) = ITMP1
+               CALL ZSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 )
+               IF( INFO.NE.0 ) THEN
+                  ITMP1 = IFAIL( I )
+                  IFAIL( I ) = IFAIL( J )
+                  IFAIL( J ) = ITMP1
+               END IF
+            END IF
+   40    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of ZHPEVX
+*
+      END
diff --git a/costa/native/external/lapack/zhpgst.f b/costa/native/external/lapack/zhpgst.f
new file mode 100644
index 000000000..69aeec055
--- /dev/null
+++ b/costa/native/external/lapack/zhpgst.f
@@ -0,0 +1,216 @@
+      SUBROUTINE ZHPGST( ITYPE, UPLO, N, AP, BP, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, ITYPE, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         AP( * ), BP( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZHPGST reduces a complex Hermitian-definite generalized
+*  eigenproblem to standard form, using packed storage.
+*
+*  If ITYPE = 1, the problem is A*x = lambda*B*x,
+*  and A is overwritten by inv(U**H)*A*inv(U) or inv(L)*A*inv(L**H)
+*
+*  If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or
+*  B*A*x = lambda*x, and A is overwritten by U*A*U**H or L**H*A*L.
+*
+*  B must have been previously factorized as U**H*U or L*L**H by ZPPTRF.
+*
+*  Arguments
+*  =========
+*
+*  ITYPE   (input) INTEGER
+*          = 1: compute inv(U**H)*A*inv(U) or inv(L)*A*inv(L**H);
+*          = 2 or 3: compute U*A*U**H or L**H*A*L.
+*
+*  UPLO    (input) CHARACTER
+*          = 'U':  Upper triangle of A is stored and B is factored as
+*                  U**H*U;
+*          = 'L':  Lower triangle of A is stored and B is factored as
+*                  L*L**H.
+*
+*  N       (input) INTEGER
+*          The order of the matrices A and B.  N >= 0.
+*
+*  AP      (input/output) COMPLEX*16 array, dimension (N*(N+1)/2)
+*          On entry, the upper or lower triangle of the Hermitian matrix
+*          A, packed columnwise in a linear array.  The j-th column of A
+*          is stored in the array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n.
+*
+*          On exit, if INFO = 0, the transformed matrix, stored in the
+*          same format as A.
+*
+*  BP      (input) COMPLEX*16 array, dimension (N*(N+1)/2)
+*          The triangular factor from the Cholesky factorization of B,
+*          stored in the same format as A, as returned by ZPPTRF.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, HALF
+      PARAMETER          ( ONE = 1.0D+0, HALF = 0.5D+0 )
+      COMPLEX*16         CONE
+      PARAMETER          ( CONE = ( 1.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            J, J1, J1J1, JJ, K, K1, K1K1, KK
+      DOUBLE PRECISION   AJJ, AKK, BJJ, BKK
+      COMPLEX*16         CT
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZAXPY, ZDSCAL, ZHPMV, ZHPR2, ZTPMV,
+     $                   ZTPSV
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          DBLE
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      COMPLEX*16         ZDOTC
+      EXTERNAL           LSAME, ZDOTC
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( ITYPE.LT.1 .OR. ITYPE.GT.3 ) THEN
+         INFO = -1
+      ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZHPGST', -INFO )
+         RETURN
+      END IF
+*
+      IF( ITYPE.EQ.1 ) THEN
+         IF( UPPER ) THEN
+*
+*           Compute inv(U')*A*inv(U)
+*
+*           J1 and JJ are the indices of A(1,j) and A(j,j)
+*
+            JJ = 0
+            DO 10 J = 1, N
+               J1 = JJ + 1
+               JJ = JJ + J
+*
+*              Compute the j-th column of the upper triangle of A
+*
+               AP( JJ ) = DBLE( AP( JJ ) )
+               BJJ = BP( JJ )
+               CALL ZTPSV( UPLO, 'Conjugate transpose', 'Non-unit', J,
+     $                     BP, AP( J1 ), 1 )
+               CALL ZHPMV( UPLO, J-1, -CONE, AP, BP( J1 ), 1, CONE,
+     $                     AP( J1 ), 1 )
+               CALL ZDSCAL( J-1, ONE / BJJ, AP( J1 ), 1 )
+               AP( JJ ) = ( AP( JJ )-ZDOTC( J-1, AP( J1 ), 1, BP( J1 ),
+     $                    1 ) ) / BJJ
+   10       CONTINUE
+         ELSE
+*
+*           Compute inv(L)*A*inv(L')
+*
+*           KK and K1K1 are the indices of A(k,k) and A(k+1,k+1)
+*
+            KK = 1
+            DO 20 K = 1, N
+               K1K1 = KK + N - K + 1
+*
+*              Update the lower triangle of A(k:n,k:n)
+*
+               AKK = AP( KK )
+               BKK = BP( KK )
+               AKK = AKK / BKK**2
+               AP( KK ) = AKK
+               IF( K.LT.N ) THEN
+                  CALL ZDSCAL( N-K, ONE / BKK, AP( KK+1 ), 1 )
+                  CT = -HALF*AKK
+                  CALL ZAXPY( N-K, CT, BP( KK+1 ), 1, AP( KK+1 ), 1 )
+                  CALL ZHPR2( UPLO, N-K, -CONE, AP( KK+1 ), 1,
+     $                        BP( KK+1 ), 1, AP( K1K1 ) )
+                  CALL ZAXPY( N-K, CT, BP( KK+1 ), 1, AP( KK+1 ), 1 )
+                  CALL ZTPSV( UPLO, 'No transpose', 'Non-unit', N-K,
+     $                        BP( K1K1 ), AP( KK+1 ), 1 )
+               END IF
+               KK = K1K1
+   20       CONTINUE
+         END IF
+      ELSE
+         IF( UPPER ) THEN
+*
+*           Compute U*A*U'
+*
+*           K1 and KK are the indices of A(1,k) and A(k,k)
+*
+            KK = 0
+            DO 30 K = 1, N
+               K1 = KK + 1
+               KK = KK + K
+*
+*              Update the upper triangle of A(1:k,1:k)
+*
+               AKK = AP( KK )
+               BKK = BP( KK )
+               CALL ZTPMV( UPLO, 'No transpose', 'Non-unit', K-1, BP,
+     $                     AP( K1 ), 1 )
+               CT = HALF*AKK
+               CALL ZAXPY( K-1, CT, BP( K1 ), 1, AP( K1 ), 1 )
+               CALL ZHPR2( UPLO, K-1, CONE, AP( K1 ), 1, BP( K1 ), 1,
+     $                     AP )
+               CALL ZAXPY( K-1, CT, BP( K1 ), 1, AP( K1 ), 1 )
+               CALL ZDSCAL( K-1, BKK, AP( K1 ), 1 )
+               AP( KK ) = AKK*BKK**2
+   30       CONTINUE
+         ELSE
+*
+*           Compute L'*A*L
+*
+*           JJ and J1J1 are the indices of A(j,j) and A(j+1,j+1)
+*
+            JJ = 1
+            DO 40 J = 1, N
+               J1J1 = JJ + N - J + 1
+*
+*              Compute the j-th column of the lower triangle of A
+*
+               AJJ = AP( JJ )
+               BJJ = BP( JJ )
+               AP( JJ ) = AJJ*BJJ + ZDOTC( N-J, AP( JJ+1 ), 1,
+     $                    BP( JJ+1 ), 1 )
+               CALL ZDSCAL( N-J, BJJ, AP( JJ+1 ), 1 )
+               CALL ZHPMV( UPLO, N-J, CONE, AP( J1J1 ), BP( JJ+1 ), 1,
+     $                     CONE, AP( JJ+1 ), 1 )
+               CALL ZTPMV( UPLO, 'Conjugate transpose', 'Non-unit',
+     $                     N-J+1, BP( JJ ), AP( JJ ), 1 )
+               JJ = J1J1
+   40       CONTINUE
+         END IF
+      END IF
+      RETURN
+*
+*     End of ZHPGST
+*
+      END
diff --git a/costa/native/external/lapack/zhpgv.f b/costa/native/external/lapack/zhpgv.f
new file mode 100644
index 000000000..ed5589ad1
--- /dev/null
+++ b/costa/native/external/lapack/zhpgv.f
@@ -0,0 +1,197 @@
+      SUBROUTINE ZHPGV( ITYPE, JOBZ, UPLO, N, AP, BP, W, Z, LDZ, WORK,
+     $                  RWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBZ, UPLO
+      INTEGER            INFO, ITYPE, LDZ, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   RWORK( * ), W( * )
+      COMPLEX*16         AP( * ), BP( * ), WORK( * ), Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZHPGV computes all the eigenvalues and, optionally, the eigenvectors
+*  of a complex generalized Hermitian-definite eigenproblem, of the form
+*  A*x=(lambda)*B*x,  A*Bx=(lambda)*x,  or B*A*x=(lambda)*x.
+*  Here A and B are assumed to be Hermitian, stored in packed format,
+*  and B is also positive definite.
+*
+*  Arguments
+*  =========
+*
+*  ITYPE   (input) INTEGER
+*          Specifies the problem type to be solved:
+*          = 1:  A*x = (lambda)*B*x
+*          = 2:  A*B*x = (lambda)*x
+*          = 3:  B*A*x = (lambda)*x
+*
+*  JOBZ    (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only;
+*          = 'V':  Compute eigenvalues and eigenvectors.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangles of A and B are stored;
+*          = 'L':  Lower triangles of A and B are stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrices A and B.  N >= 0.
+*
+*  AP      (input/output) COMPLEX*16 array, dimension (N*(N+1)/2)
+*          On entry, the upper or lower triangle of the Hermitian matrix
+*          A, packed columnwise in a linear array.  The j-th column of A
+*          is stored in the array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n.
+*
+*          On exit, the contents of AP are destroyed.
+*
+*  BP      (input/output) COMPLEX*16 array, dimension (N*(N+1)/2)
+*          On entry, the upper or lower triangle of the Hermitian matrix
+*          B, packed columnwise in a linear array.  The j-th column of B
+*          is stored in the array BP as follows:
+*          if UPLO = 'U', BP(i + (j-1)*j/2) = B(i,j) for 1<=i<=j;
+*          if UPLO = 'L', BP(i + (j-1)*(2*n-j)/2) = B(i,j) for j<=i<=n.
+*
+*          On exit, the triangular factor U or L from the Cholesky
+*          factorization B = U**H*U or B = L*L**H, in the same storage
+*          format as B.
+*
+*  W       (output) DOUBLE PRECISION array, dimension (N)
+*          If INFO = 0, the eigenvalues in ascending order.
+*
+*  Z       (output) COMPLEX*16 array, dimension (LDZ, N)
+*          If JOBZ = 'V', then if INFO = 0, Z contains the matrix Z of
+*          eigenvectors.  The eigenvectors are normalized as follows:
+*          if ITYPE = 1 or 2, Z**H*B*Z = I;
+*          if ITYPE = 3, Z**H*inv(B)*Z = I.
+*          If JOBZ = 'N', then Z is not referenced.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= 1, and if
+*          JOBZ = 'V', LDZ >= max(1,N).
+*
+*  WORK    (workspace) COMPLEX*16 array, dimension (max(1, 2*N-1))
+*
+*  RWORK   (workspace) DOUBLE PRECISION array, dimension (max(1, 3*N-2))
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  ZPPTRF or ZHPEV returned an error code:
+*             <= N:  if INFO = i, ZHPEV failed to converge;
+*                    i off-diagonal elements of an intermediate
+*                    tridiagonal form did not convergeto zero;
+*             > N:   if INFO = N + i, for 1 <= i <= n, then the leading
+*                    minor of order i of B is not positive definite.
+*                    The factorization of B could not be completed and
+*                    no eigenvalues or eigenvectors were computed.
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            UPPER, WANTZ
+      CHARACTER          TRANS
+      INTEGER            J, NEIG
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZHPEV, ZHPGST, ZPPTRF, ZTPMV, ZTPSV
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      WANTZ = LSAME( JOBZ, 'V' )
+      UPPER = LSAME( UPLO, 'U' )
+*
+      INFO = 0
+      IF( ITYPE.LT.0 .OR. ITYPE.GT.3 ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN
+         INFO = -9
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZHPGV ', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Form a Cholesky factorization of B.
+*
+      CALL ZPPTRF( UPLO, N, BP, INFO )
+      IF( INFO.NE.0 ) THEN
+         INFO = N + INFO
+         RETURN
+      END IF
+*
+*     Transform problem to standard eigenvalue problem and solve.
+*
+      CALL ZHPGST( ITYPE, UPLO, N, AP, BP, INFO )
+      CALL ZHPEV( JOBZ, UPLO, N, AP, W, Z, LDZ, WORK, RWORK, INFO )
+*
+      IF( WANTZ ) THEN
+*
+*        Backtransform eigenvectors to the original problem.
+*
+         NEIG = N
+         IF( INFO.GT.0 )
+     $      NEIG = INFO - 1
+         IF( ITYPE.EQ.1 .OR. ITYPE.EQ.2 ) THEN
+*
+*           For A*x=(lambda)*B*x and A*B*x=(lambda)*x;
+*           backtransform eigenvectors: x = inv(L)'*y or inv(U)*y
+*
+            IF( UPPER ) THEN
+               TRANS = 'N'
+            ELSE
+               TRANS = 'C'
+            END IF
+*
+            DO 10 J = 1, NEIG
+               CALL ZTPSV( UPLO, TRANS, 'Non-unit', N, BP, Z( 1, J ),
+     $                     1 )
+   10       CONTINUE
+*
+         ELSE IF( ITYPE.EQ.3 ) THEN
+*
+*           For B*A*x=(lambda)*x;
+*           backtransform eigenvectors: x = L*y or U'*y
+*
+            IF( UPPER ) THEN
+               TRANS = 'C'
+            ELSE
+               TRANS = 'N'
+            END IF
+*
+            DO 20 J = 1, NEIG
+               CALL ZTPMV( UPLO, TRANS, 'Non-unit', N, BP, Z( 1, J ),
+     $                     1 )
+   20       CONTINUE
+         END IF
+      END IF
+      RETURN
+*
+*     End of ZHPGV
+*
+      END
diff --git a/costa/native/external/lapack/zhpgvd.f b/costa/native/external/lapack/zhpgvd.f
new file mode 100644
index 000000000..56878c3e4
--- /dev/null
+++ b/costa/native/external/lapack/zhpgvd.f
@@ -0,0 +1,291 @@
+      SUBROUTINE ZHPGVD( ITYPE, JOBZ, UPLO, N, AP, BP, W, Z, LDZ, WORK,
+     $                   LWORK, RWORK, LRWORK, IWORK, LIWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBZ, UPLO
+      INTEGER            INFO, ITYPE, LDZ, LIWORK, LRWORK, LWORK, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      DOUBLE PRECISION   RWORK( * ), W( * )
+      COMPLEX*16         AP( * ), BP( * ), WORK( * ), Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZHPGVD computes all the eigenvalues and, optionally, the eigenvectors
+*  of a complex generalized Hermitian-definite eigenproblem, of the form
+*  A*x=(lambda)*B*x,  A*Bx=(lambda)*x,  or B*A*x=(lambda)*x.  Here A and
+*  B are assumed to be Hermitian, stored in packed format, and B is also
+*  positive definite.
+*  If eigenvectors are desired, it uses a divide and conquer algorithm.
+*
+*  The divide and conquer algorithm makes very mild assumptions about
+*  floating point arithmetic. It will work on machines with a guard
+*  digit in add/subtract, or on those binary machines without guard
+*  digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
+*  Cray-2. It could conceivably fail on hexadecimal or decimal machines
+*  without guard digits, but we know of none.
+*
+*  Arguments
+*  =========
+*
+*  ITYPE   (input) INTEGER
+*          Specifies the problem type to be solved:
+*          = 1:  A*x = (lambda)*B*x
+*          = 2:  A*B*x = (lambda)*x
+*          = 3:  B*A*x = (lambda)*x
+*
+*  JOBZ    (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only;
+*          = 'V':  Compute eigenvalues and eigenvectors.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangles of A and B are stored;
+*          = 'L':  Lower triangles of A and B are stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrices A and B.  N >= 0.
+*
+*  AP      (input/output) COMPLEX*16 array, dimension (N*(N+1)/2)
+*          On entry, the upper or lower triangle of the Hermitian matrix
+*          A, packed columnwise in a linear array.  The j-th column of A
+*          is stored in the array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n.
+*
+*          On exit, the contents of AP are destroyed.
+*
+*  BP      (input/output) COMPLEX*16 array, dimension (N*(N+1)/2)
+*          On entry, the upper or lower triangle of the Hermitian matrix
+*          B, packed columnwise in a linear array.  The j-th column of B
+*          is stored in the array BP as follows:
+*          if UPLO = 'U', BP(i + (j-1)*j/2) = B(i,j) for 1<=i<=j;
+*          if UPLO = 'L', BP(i + (j-1)*(2*n-j)/2) = B(i,j) for j<=i<=n.
+*
+*          On exit, the triangular factor U or L from the Cholesky
+*          factorization B = U**H*U or B = L*L**H, in the same storage
+*          format as B.
+*
+*  W       (output) DOUBLE PRECISION array, dimension (N)
+*          If INFO = 0, the eigenvalues in ascending order.
+*
+*  Z       (output) COMPLEX*16 array, dimension (LDZ, N)
+*          If JOBZ = 'V', then if INFO = 0, Z contains the matrix Z of
+*          eigenvectors.  The eigenvectors are normalized as follows:
+*          if ITYPE = 1 or 2, Z**H*B*Z = I;
+*          if ITYPE = 3, Z**H*inv(B)*Z = I.
+*          If JOBZ = 'N', then Z is not referenced.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= 1, and if
+*          JOBZ = 'V', LDZ >= max(1,N).
+*
+*  WORK    (workspace) COMPLEX*16 array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of array WORK.
+*          If N <= 1,               LWORK >= 1.
+*          If JOBZ = 'N' and N > 1, LWORK >= N.
+*          If JOBZ = 'V' and N > 1, LWORK >= 2*N.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  RWORK   (workspace) DOUBLE PRECISION array, dimension (LRWORK)
+*          On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK.
+*
+*  LRWORK  (input) INTEGER
+*          The dimension of array RWORK.
+*          If N <= 1,               LRWORK >= 1.
+*          If JOBZ = 'N' and N > 1, LRWORK >= N.
+*          If JOBZ = 'V' and N > 1, LRWORK >= 1 + 5*N + 2*N**2.
+*
+*          If LRWORK = -1, then a workspace query is assumed; the
+*          routine only calculates the optimal size of the RWORK array,
+*          returns this value as the first entry of the RWORK array, and
+*          no error message related to LRWORK is issued by XERBLA.
+*
+*  IWORK   (workspace/output) INTEGER array, dimension (LIWORK)
+*          On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK.
+*
+*  LIWORK  (input) INTEGER
+*          The dimension of array IWORK.
+*          If JOBZ  = 'N' or N <= 1, LIWORK >= 1.
+*          If JOBZ  = 'V' and N > 1, LIWORK >= 3 + 5*N.
+*
+*          If LIWORK = -1, then a workspace query is assumed; the
+*          routine only calculates the optimal size of the IWORK array,
+*          returns this value as the first entry of the IWORK array, and
+*          no error message related to LIWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  ZPPTRF or ZHPEVD returned an error code:
+*             <= N:  if INFO = i, ZHPEVD failed to converge;
+*                    i off-diagonal elements of an intermediate
+*                    tridiagonal form did not convergeto zero;
+*             > N:   if INFO = N + i, for 1 <= i <= n, then the leading
+*                    minor of order i of B is not positive definite.
+*                    The factorization of B could not be completed and
+*                    no eigenvalues or eigenvectors were computed.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            LQUERY, UPPER, WANTZ
+      CHARACTER          TRANS
+      INTEGER            J, LIWMIN, LRWMIN, LWMIN, NEIG
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZHPEVD, ZHPGST, ZPPTRF, ZTPMV, ZTPSV
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          DBLE, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      WANTZ = LSAME( JOBZ, 'V' )
+      UPPER = LSAME( UPLO, 'U' )
+      LQUERY = ( LWORK.EQ.-1 .OR. LRWORK.EQ.-1 .OR. LIWORK.EQ.-1 )
+*
+      INFO = 0
+      IF( N.LE.1 ) THEN
+         LWMIN = 1
+         LIWMIN = 1
+         LRWMIN = 1
+      ELSE
+         IF( WANTZ ) THEN
+            LWMIN = 2*N
+            LRWMIN = 1 + 5*N + 2*N**2
+            LIWMIN = 3 + 5*N
+         ELSE
+            LWMIN = N
+            LRWMIN = N
+            LIWMIN = 1
+         END IF
+      END IF
+      IF( ITYPE.LT.0 .OR. ITYPE.GT.3 ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN
+         INFO = -9
+      ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -11
+      ELSE IF( LRWORK.LT.LRWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -13
+      ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -15
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         WORK( 1 ) = LWMIN
+         RWORK( 1 ) = LRWMIN
+         IWORK( 1 ) = LIWMIN
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZHPGVD', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Form a Cholesky factorization of B.
+*
+      CALL ZPPTRF( UPLO, N, BP, INFO )
+      IF( INFO.NE.0 ) THEN
+         INFO = N + INFO
+         RETURN
+      END IF
+*
+*     Transform problem to standard eigenvalue problem and solve.
+*
+      CALL ZHPGST( ITYPE, UPLO, N, AP, BP, INFO )
+      CALL ZHPEVD( JOBZ, UPLO, N, AP, W, Z, LDZ, WORK, LWORK, RWORK,
+     $             LRWORK, IWORK, LIWORK, INFO )
+      LWMIN = MAX( DBLE( LWMIN ), DBLE( WORK( 1 ) ) )
+      LRWMIN = MAX( DBLE( LRWMIN ), DBLE( RWORK( 1 ) ) )
+      LIWMIN = MAX( DBLE( LIWMIN ), DBLE( IWORK( 1 ) ) )
+*
+      IF( WANTZ ) THEN
+*
+*        Backtransform eigenvectors to the original problem.
+*
+         NEIG = N
+         IF( INFO.GT.0 )
+     $      NEIG = INFO - 1
+         IF( ITYPE.EQ.1 .OR. ITYPE.EQ.2 ) THEN
+*
+*           For A*x=(lambda)*B*x and A*B*x=(lambda)*x;
+*           backtransform eigenvectors: x = inv(L)'*y or inv(U)*y
+*
+            IF( UPPER ) THEN
+               TRANS = 'N'
+            ELSE
+               TRANS = 'C'
+            END IF
+*
+            DO 10 J = 1, NEIG
+               CALL ZTPSV( UPLO, TRANS, 'Non-unit', N, BP, Z( 1, J ),
+     $                     1 )
+   10       CONTINUE
+*
+         ELSE IF( ITYPE.EQ.3 ) THEN
+*
+*           For B*A*x=(lambda)*x;
+*           backtransform eigenvectors: x = L*y or U'*y
+*
+            IF( UPPER ) THEN
+               TRANS = 'C'
+            ELSE
+               TRANS = 'N'
+            END IF
+*
+            DO 20 J = 1, NEIG
+               CALL ZTPMV( UPLO, TRANS, 'Non-unit', N, BP, Z( 1, J ),
+     $                     1 )
+   20       CONTINUE
+         END IF
+      END IF
+*
+      WORK( 1 ) = LWMIN
+      RWORK( 1 ) = LRWMIN
+      IWORK( 1 ) = LIWMIN
+      RETURN
+*
+*     End of ZHPGVD
+*
+      END
diff --git a/costa/native/external/lapack/zhpgvx.f b/costa/native/external/lapack/zhpgvx.f
new file mode 100644
index 000000000..f3c9d8b3c
--- /dev/null
+++ b/costa/native/external/lapack/zhpgvx.f
@@ -0,0 +1,284 @@
+      SUBROUTINE ZHPGVX( ITYPE, JOBZ, RANGE, UPLO, N, AP, BP, VL, VU,
+     $                   IL, IU, ABSTOL, M, W, Z, LDZ, WORK, RWORK,
+     $                   IWORK, IFAIL, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBZ, RANGE, UPLO
+      INTEGER            IL, INFO, ITYPE, IU, LDZ, M, N
+      DOUBLE PRECISION   ABSTOL, VL, VU
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IFAIL( * ), IWORK( * )
+      DOUBLE PRECISION   RWORK( * ), W( * )
+      COMPLEX*16         AP( * ), BP( * ), WORK( * ), Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZHPGVX computes selected eigenvalues and, optionally, eigenvectors
+*  of a complex generalized Hermitian-definite eigenproblem, of the form
+*  A*x=(lambda)*B*x,  A*Bx=(lambda)*x,  or B*A*x=(lambda)*x.  Here A and
+*  B are assumed to be Hermitian, stored in packed format, and B is also
+*  positive definite.  Eigenvalues and eigenvectors can be selected by
+*  specifying either a range of values or a range of indices for the
+*  desired eigenvalues.
+*
+*  Arguments
+*  =========
+*
+*  ITYPE   (input) INTEGER
+*          Specifies the problem type to be solved:
+*          = 1:  A*x = (lambda)*B*x
+*          = 2:  A*B*x = (lambda)*x
+*          = 3:  B*A*x = (lambda)*x
+*
+*  JOBZ    (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only;
+*          = 'V':  Compute eigenvalues and eigenvectors.
+*
+*  RANGE   (input) CHARACTER*1
+*          = 'A': all eigenvalues will be found;
+*          = 'V': all eigenvalues in the half-open interval (VL,VU]
+*                 will be found;
+*          = 'I': the IL-th through IU-th eigenvalues will be found.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangles of A and B are stored;
+*          = 'L':  Lower triangles of A and B are stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrices A and B.  N >= 0.
+*
+*  AP      (input/output) COMPLEX*16 array, dimension (N*(N+1)/2)
+*          On entry, the upper or lower triangle of the Hermitian matrix
+*          A, packed columnwise in a linear array.  The j-th column of A
+*          is stored in the array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n.
+*
+*          On exit, the contents of AP are destroyed.
+*
+*  BP      (input/output) COMPLEX*16 array, dimension (N*(N+1)/2)
+*          On entry, the upper or lower triangle of the Hermitian matrix
+*          B, packed columnwise in a linear array.  The j-th column of B
+*          is stored in the array BP as follows:
+*          if UPLO = 'U', BP(i + (j-1)*j/2) = B(i,j) for 1<=i<=j;
+*          if UPLO = 'L', BP(i + (j-1)*(2*n-j)/2) = B(i,j) for j<=i<=n.
+*
+*          On exit, the triangular factor U or L from the Cholesky
+*          factorization B = U**H*U or B = L*L**H, in the same storage
+*          format as B.
+*
+*  VL      (input) DOUBLE PRECISION
+*  VU      (input) DOUBLE PRECISION
+*          If RANGE='V', the lower and upper bounds of the interval to
+*          be searched for eigenvalues. VL < VU.
+*          Not referenced if RANGE = 'A' or 'I'.
+*
+*  IL      (input) INTEGER
+*  IU      (input) INTEGER
+*          If RANGE='I', the indices (in ascending order) of the
+*          smallest and largest eigenvalues to be returned.
+*          1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0.
+*          Not referenced if RANGE = 'A' or 'V'.
+*
+*  ABSTOL  (input) DOUBLE PRECISION
+*          The absolute error tolerance for the eigenvalues.
+*          An approximate eigenvalue is accepted as converged
+*          when it is determined to lie in an interval [a,b]
+*          of width less than or equal to
+*
+*                  ABSTOL + EPS *   max( |a|,|b| ) ,
+*
+*          where EPS is the machine precision.  If ABSTOL is less than
+*          or equal to zero, then  EPS*|T|  will be used in its place,
+*          where |T| is the 1-norm of the tridiagonal matrix obtained
+*          by reducing AP to tridiagonal form.
+*
+*          Eigenvalues will be computed most accurately when ABSTOL is
+*          set to twice the underflow threshold 2*DLAMCH('S'), not zero.
+*          If this routine returns with INFO>0, indicating that some
+*          eigenvectors did not converge, try setting ABSTOL to
+*          2*DLAMCH('S').
+*
+*  M       (output) INTEGER
+*          The total number of eigenvalues found.  0 <= M <= N.
+*          If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1.
+*
+*  W       (output) DOUBLE PRECISION array, dimension (N)
+*          On normal exit, the first M elements contain the selected
+*          eigenvalues in ascending order.
+*
+*  Z       (output) COMPLEX*16 array, dimension (LDZ, N)
+*          If JOBZ = 'N', then Z is not referenced.
+*          If JOBZ = 'V', then if INFO = 0, the first M columns of Z
+*          contain the orthonormal eigenvectors of the matrix A
+*          corresponding to the selected eigenvalues, with the i-th
+*          column of Z holding the eigenvector associated with W(i).
+*          The eigenvectors are normalized as follows:
+*          if ITYPE = 1 or 2, Z**H*B*Z = I;
+*          if ITYPE = 3, Z**H*inv(B)*Z = I.
+*
+*          If an eigenvector fails to converge, then that column of Z
+*          contains the latest approximation to the eigenvector, and the
+*          index of the eigenvector is returned in IFAIL.
+*          Note: the user must ensure that at least max(1,M) columns are
+*          supplied in the array Z; if RANGE = 'V', the exact value of M
+*          is not known in advance and an upper bound must be used.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= 1, and if
+*          JOBZ = 'V', LDZ >= max(1,N).
+*
+*  WORK    (workspace) COMPLEX*16 array, dimension (2*N)
+*
+*  RWORK   (workspace) DOUBLE PRECISION array, dimension (7*N)
+*
+*  IWORK   (workspace) INTEGER array, dimension (5*N)
+*
+*  IFAIL   (output) INTEGER array, dimension (N)
+*          If JOBZ = 'V', then if INFO = 0, the first M elements of
+*          IFAIL are zero.  If INFO > 0, then IFAIL contains the
+*          indices of the eigenvectors that failed to converge.
+*          If JOBZ = 'N', then IFAIL is not referenced.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  ZPPTRF or ZHPEVX returned an error code:
+*             <= N:  if INFO = i, ZHPEVX failed to converge;
+*                    i eigenvectors failed to converge.  Their indices
+*                    are stored in array IFAIL.
+*             > N:   if INFO = N + i, for 1 <= i <= n, then the leading
+*                    minor of order i of B is not positive definite.
+*                    The factorization of B could not be completed and
+*                    no eigenvalues or eigenvectors were computed.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Mark Fahey, Department of Mathematics, Univ. of Kentucky, USA
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            ALLEIG, INDEIG, UPPER, VALEIG, WANTZ
+      CHARACTER          TRANS
+      INTEGER            J
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZHPEVX, ZHPGST, ZPPTRF, ZTPMV, ZTPSV
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      WANTZ = LSAME( JOBZ, 'V' )
+      UPPER = LSAME( UPLO, 'U' )
+      ALLEIG = LSAME( RANGE, 'A' )
+      VALEIG = LSAME( RANGE, 'V' )
+      INDEIG = LSAME( RANGE, 'I' )
+*
+      INFO = 0
+      IF( ITYPE.LT.0 .OR. ITYPE.GT.3 ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN
+         INFO = -3
+      ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN
+         INFO = -4
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( VALEIG .AND. N.GT.0 .AND. VU.LE.VL ) THEN
+         INFO = -9
+      ELSE IF( INDEIG .AND. IL.LT.1 ) THEN
+         INFO = -10
+      ELSE IF( INDEIG .AND. ( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) ) THEN
+         INFO = -11
+      ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN
+         INFO = -16
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZHPGVX', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Form a Cholesky factorization of B.
+*
+      CALL ZPPTRF( UPLO, N, BP, INFO )
+      IF( INFO.NE.0 ) THEN
+         INFO = N + INFO
+         RETURN
+      END IF
+*
+*     Transform problem to standard eigenvalue problem and solve.
+*
+      CALL ZHPGST( ITYPE, UPLO, N, AP, BP, INFO )
+      CALL ZHPEVX( JOBZ, RANGE, UPLO, N, AP, VL, VU, IL, IU, ABSTOL, M,
+     $             W, Z, LDZ, WORK, RWORK, IWORK, IFAIL, INFO )
+*
+      IF( WANTZ ) THEN
+*
+*        Backtransform eigenvectors to the original problem.
+*
+         IF( INFO.GT.0 )
+     $      M = INFO - 1
+         IF( ITYPE.EQ.1 .OR. ITYPE.EQ.2 ) THEN
+*
+*           For A*x=(lambda)*B*x and A*B*x=(lambda)*x;
+*           backtransform eigenvectors: x = inv(L)'*y or inv(U)*y
+*
+            IF( UPPER ) THEN
+               TRANS = 'N'
+            ELSE
+               TRANS = 'C'
+            END IF
+*
+            DO 10 J = 1, M
+               CALL ZTPSV( UPLO, TRANS, 'Non-unit', N, BP, Z( 1, J ),
+     $                     1 )
+   10       CONTINUE
+*
+         ELSE IF( ITYPE.EQ.3 ) THEN
+*
+*           For B*A*x=(lambda)*x;
+*           backtransform eigenvectors: x = L*y or U'*y
+*
+            IF( UPPER ) THEN
+               TRANS = 'C'
+            ELSE
+               TRANS = 'N'
+            END IF
+*
+            DO 20 J = 1, M
+               CALL ZTPMV( UPLO, TRANS, 'Non-unit', N, BP, Z( 1, J ),
+     $                     1 )
+   20       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of ZHPGVX
+*
+      END
diff --git a/costa/native/external/lapack/zhprfs.f b/costa/native/external/lapack/zhprfs.f
new file mode 100644
index 000000000..4e822cfaa
--- /dev/null
+++ b/costa/native/external/lapack/zhprfs.f
@@ -0,0 +1,337 @@
+      SUBROUTINE ZHPRFS( UPLO, N, NRHS, AP, AFP, IPIV, B, LDB, X, LDX,
+     $                   FERR, BERR, WORK, RWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDB, LDX, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      DOUBLE PRECISION   BERR( * ), FERR( * ), RWORK( * )
+      COMPLEX*16         AFP( * ), AP( * ), B( LDB, * ), WORK( * ),
+     $                   X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZHPRFS improves the computed solution to a system of linear
+*  equations when the coefficient matrix is Hermitian indefinite
+*  and packed, and provides error bounds and backward error estimates
+*  for the solution.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrices B and X.  NRHS >= 0.
+*
+*  AP      (input) COMPLEX*16 array, dimension (N*(N+1)/2)
+*          The upper or lower triangle of the Hermitian matrix A, packed
+*          columnwise in a linear array.  The j-th column of A is stored
+*          in the array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n.
+*
+*  AFP     (input) COMPLEX*16 array, dimension (N*(N+1)/2)
+*          The factored form of the matrix A.  AFP contains the block
+*          diagonal matrix D and the multipliers used to obtain the
+*          factor U or L from the factorization A = U*D*U**H or
+*          A = L*D*L**H as computed by ZHPTRF, stored as a packed
+*          triangular matrix.
+*
+*  IPIV    (input) INTEGER array, dimension (N)
+*          Details of the interchanges and the block structure of D
+*          as determined by ZHPTRF.
+*
+*  B       (input) COMPLEX*16 array, dimension (LDB,NRHS)
+*          The right hand side matrix B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  X       (input/output) COMPLEX*16 array, dimension (LDX,NRHS)
+*          On entry, the solution matrix X, as computed by ZHPTRS.
+*          On exit, the improved solution matrix X.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(1,N).
+*
+*  FERR    (output) DOUBLE PRECISION array, dimension (NRHS)
+*          The estimated forward error bound for each solution vector
+*          X(j) (the j-th column of the solution matrix X).
+*          If XTRUE is the true solution corresponding to X(j), FERR(j)
+*          is an estimated upper bound for the magnitude of the largest
+*          element in (X(j) - XTRUE) divided by the magnitude of the
+*          largest element in X(j).  The estimate is as reliable as
+*          the estimate for RCOND, and is almost always a slight
+*          overestimate of the true error.
+*
+*  BERR    (output) DOUBLE PRECISION array, dimension (NRHS)
+*          The componentwise relative backward error of each solution
+*          vector X(j) (i.e., the smallest relative change in
+*          any element of A or B that makes X(j) an exact solution).
+*
+*  WORK    (workspace) COMPLEX*16 array, dimension (2*N)
+*
+*  RWORK   (workspace) DOUBLE PRECISION array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Internal Parameters
+*  ===================
+*
+*  ITMAX is the maximum number of steps of iterative refinement.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            ITMAX
+      PARAMETER          ( ITMAX = 5 )
+      DOUBLE PRECISION   ZERO
+      PARAMETER          ( ZERO = 0.0D+0 )
+      COMPLEX*16         ONE
+      PARAMETER          ( ONE = ( 1.0D+0, 0.0D+0 ) )
+      DOUBLE PRECISION   TWO
+      PARAMETER          ( TWO = 2.0D+0 )
+      DOUBLE PRECISION   THREE
+      PARAMETER          ( THREE = 3.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            COUNT, I, IK, J, K, KASE, KK, NZ
+      DOUBLE PRECISION   EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK
+      COMPLEX*16         ZDUM
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZAXPY, ZCOPY, ZHPMV, ZHPTRS, ZLACON
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, DIMAG, MAX
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DLAMCH
+      EXTERNAL           LSAME, DLAMCH
+*     ..
+*     .. Statement Functions ..
+      DOUBLE PRECISION   CABS1
+*     ..
+*     .. Statement Function definitions ..
+      CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -8
+      ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+         INFO = -10
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZHPRFS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN
+         DO 10 J = 1, NRHS
+            FERR( J ) = ZERO
+            BERR( J ) = ZERO
+   10    CONTINUE
+         RETURN
+      END IF
+*
+*     NZ = maximum number of nonzero elements in each row of A, plus 1
+*
+      NZ = N + 1
+      EPS = DLAMCH( 'Epsilon' )
+      SAFMIN = DLAMCH( 'Safe minimum' )
+      SAFE1 = NZ*SAFMIN
+      SAFE2 = SAFE1 / EPS
+*
+*     Do for each right hand side
+*
+      DO 140 J = 1, NRHS
+*
+         COUNT = 1
+         LSTRES = THREE
+   20    CONTINUE
+*
+*        Loop until stopping criterion is satisfied.
+*
+*        Compute residual R = B - A * X
+*
+         CALL ZCOPY( N, B( 1, J ), 1, WORK, 1 )
+         CALL ZHPMV( UPLO, N, -ONE, AP, X( 1, J ), 1, ONE, WORK, 1 )
+*
+*        Compute componentwise relative backward error from formula
+*
+*        max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) )
+*
+*        where abs(Z) is the componentwise absolute value of the matrix
+*        or vector Z.  If the i-th component of the denominator is less
+*        than SAFE2, then SAFE1 is added to the i-th components of the
+*        numerator and denominator before dividing.
+*
+         DO 30 I = 1, N
+            RWORK( I ) = CABS1( B( I, J ) )
+   30    CONTINUE
+*
+*        Compute abs(A)*abs(X) + abs(B).
+*
+         KK = 1
+         IF( UPPER ) THEN
+            DO 50 K = 1, N
+               S = ZERO
+               XK = CABS1( X( K, J ) )
+               IK = KK
+               DO 40 I = 1, K - 1
+                  RWORK( I ) = RWORK( I ) + CABS1( AP( IK ) )*XK
+                  S = S + CABS1( AP( IK ) )*CABS1( X( I, J ) )
+                  IK = IK + 1
+   40          CONTINUE
+               RWORK( K ) = RWORK( K ) + ABS( DBLE( AP( KK+K-1 ) ) )*
+     $                      XK + S
+               KK = KK + K
+   50       CONTINUE
+         ELSE
+            DO 70 K = 1, N
+               S = ZERO
+               XK = CABS1( X( K, J ) )
+               RWORK( K ) = RWORK( K ) + ABS( DBLE( AP( KK ) ) )*XK
+               IK = KK + 1
+               DO 60 I = K + 1, N
+                  RWORK( I ) = RWORK( I ) + CABS1( AP( IK ) )*XK
+                  S = S + CABS1( AP( IK ) )*CABS1( X( I, J ) )
+                  IK = IK + 1
+   60          CONTINUE
+               RWORK( K ) = RWORK( K ) + S
+               KK = KK + ( N-K+1 )
+   70       CONTINUE
+         END IF
+         S = ZERO
+         DO 80 I = 1, N
+            IF( RWORK( I ).GT.SAFE2 ) THEN
+               S = MAX( S, CABS1( WORK( I ) ) / RWORK( I ) )
+            ELSE
+               S = MAX( S, ( CABS1( WORK( I ) )+SAFE1 ) /
+     $             ( RWORK( I )+SAFE1 ) )
+            END IF
+   80    CONTINUE
+         BERR( J ) = S
+*
+*        Test stopping criterion. Continue iterating if
+*           1) The residual BERR(J) is larger than machine epsilon, and
+*           2) BERR(J) decreased by at least a factor of 2 during the
+*              last iteration, and
+*           3) At most ITMAX iterations tried.
+*
+         IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND.
+     $       COUNT.LE.ITMAX ) THEN
+*
+*           Update solution and try again.
+*
+            CALL ZHPTRS( UPLO, N, 1, AFP, IPIV, WORK, N, INFO )
+            CALL ZAXPY( N, ONE, WORK, 1, X( 1, J ), 1 )
+            LSTRES = BERR( J )
+            COUNT = COUNT + 1
+            GO TO 20
+         END IF
+*
+*        Bound error from formula
+*
+*        norm(X - XTRUE) / norm(X) .le. FERR =
+*        norm( abs(inv(A))*
+*           ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X)
+*
+*        where
+*          norm(Z) is the magnitude of the largest component of Z
+*          inv(A) is the inverse of A
+*          abs(Z) is the componentwise absolute value of the matrix or
+*             vector Z
+*          NZ is the maximum number of nonzeros in any row of A, plus 1
+*          EPS is machine epsilon
+*
+*        The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B))
+*        is incremented by SAFE1 if the i-th component of
+*        abs(A)*abs(X) + abs(B) is less than SAFE2.
+*
+*        Use ZLACON to estimate the infinity-norm of the matrix
+*           inv(A) * diag(W),
+*        where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) )))
+*
+         DO 90 I = 1, N
+            IF( RWORK( I ).GT.SAFE2 ) THEN
+               RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I )
+            ELSE
+               RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) +
+     $                      SAFE1
+            END IF
+   90    CONTINUE
+*
+         KASE = 0
+  100    CONTINUE
+         CALL ZLACON( N, WORK( N+1 ), WORK, FERR( J ), KASE )
+         IF( KASE.NE.0 ) THEN
+            IF( KASE.EQ.1 ) THEN
+*
+*              Multiply by diag(W)*inv(A').
+*
+               CALL ZHPTRS( UPLO, N, 1, AFP, IPIV, WORK, N, INFO )
+               DO 110 I = 1, N
+                  WORK( I ) = RWORK( I )*WORK( I )
+  110          CONTINUE
+            ELSE IF( KASE.EQ.2 ) THEN
+*
+*              Multiply by inv(A)*diag(W).
+*
+               DO 120 I = 1, N
+                  WORK( I ) = RWORK( I )*WORK( I )
+  120          CONTINUE
+               CALL ZHPTRS( UPLO, N, 1, AFP, IPIV, WORK, N, INFO )
+            END IF
+            GO TO 100
+         END IF
+*
+*        Normalize error.
+*
+         LSTRES = ZERO
+         DO 130 I = 1, N
+            LSTRES = MAX( LSTRES, CABS1( X( I, J ) ) )
+  130    CONTINUE
+         IF( LSTRES.NE.ZERO )
+     $      FERR( J ) = FERR( J ) / LSTRES
+*
+  140 CONTINUE
+*
+      RETURN
+*
+*     End of ZHPRFS
+*
+      END
diff --git a/costa/native/external/lapack/zhpsv.f b/costa/native/external/lapack/zhpsv.f
new file mode 100644
index 000000000..7c06670cd
--- /dev/null
+++ b/costa/native/external/lapack/zhpsv.f
@@ -0,0 +1,149 @@
+      SUBROUTINE ZHPSV( UPLO, N, NRHS, AP, IPIV, B, LDB, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDB, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      COMPLEX*16         AP( * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZHPSV computes the solution to a complex system of linear equations
+*     A * X = B,
+*  where A is an N-by-N Hermitian matrix stored in packed format and X
+*  and B are N-by-NRHS matrices.
+*
+*  The diagonal pivoting method is used to factor A as
+*     A = U * D * U**H,  if UPLO = 'U', or
+*     A = L * D * L**H,  if UPLO = 'L',
+*  where U (or L) is a product of permutation and unit upper (lower)
+*  triangular matrices, D is Hermitian and block diagonal with 1-by-1
+*  and 2-by-2 diagonal blocks.  The factored form of A is then used to
+*  solve the system of equations A * X = B.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The number of linear equations, i.e., the order of the
+*          matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  AP      (input/output) COMPLEX*16 array, dimension (N*(N+1)/2)
+*          On entry, the upper or lower triangle of the Hermitian matrix
+*          A, packed columnwise in a linear array.  The j-th column of A
+*          is stored in the array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n.
+*          See below for further details.
+*
+*          On exit, the block diagonal matrix D and the multipliers used
+*          to obtain the factor U or L from the factorization
+*          A = U*D*U**H or A = L*D*L**H as computed by ZHPTRF, stored as
+*          a packed triangular matrix in the same storage format as A.
+*
+*  IPIV    (output) INTEGER array, dimension (N)
+*          Details of the interchanges and the block structure of D, as
+*          determined by ZHPTRF.  If IPIV(k) > 0, then rows and columns
+*          k and IPIV(k) were interchanged, and D(k,k) is a 1-by-1
+*          diagonal block.  If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0,
+*          then rows and columns k-1 and -IPIV(k) were interchanged and
+*          D(k-1:k,k-1:k) is a 2-by-2 diagonal block.  If UPLO = 'L' and
+*          IPIV(k) = IPIV(k+1) < 0, then rows and columns k+1 and
+*          -IPIV(k) were interchanged and D(k:k+1,k:k+1) is a 2-by-2
+*          diagonal block.
+*
+*  B       (input/output) COMPLEX*16 array, dimension (LDB,NRHS)
+*          On entry, the N-by-NRHS right hand side matrix B.
+*          On exit, if INFO = 0, the N-by-NRHS solution matrix X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, D(i,i) is exactly zero.  The factorization
+*                has been completed, but the block diagonal matrix D is
+*                exactly singular, so the solution could not be
+*                computed.
+*
+*  Further Details
+*  ===============
+*
+*  The packed storage scheme is illustrated by the following example
+*  when N = 4, UPLO = 'U':
+*
+*  Two-dimensional storage of the Hermitian matrix A:
+*
+*     a11 a12 a13 a14
+*         a22 a23 a24
+*             a33 a34     (aij = conjg(aji))
+*                 a44
+*
+*  Packed storage of the upper triangle of A:
+*
+*  AP = [ a11, a12, a22, a13, a23, a33, a14, a24, a34, a44 ]
+*
+*  =====================================================================
+*
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZHPTRF, ZHPTRS
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZHPSV ', -INFO )
+         RETURN
+      END IF
+*
+*     Compute the factorization A = U*D*U' or A = L*D*L'.
+*
+      CALL ZHPTRF( UPLO, N, AP, IPIV, INFO )
+      IF( INFO.EQ.0 ) THEN
+*
+*        Solve the system A*X = B, overwriting B with X.
+*
+         CALL ZHPTRS( UPLO, N, NRHS, AP, IPIV, B, LDB, INFO )
+*
+      END IF
+      RETURN
+*
+*     End of ZHPSV
+*
+      END
diff --git a/costa/native/external/lapack/zhpsvx.f b/costa/native/external/lapack/zhpsvx.f
new file mode 100644
index 000000000..54303daff
--- /dev/null
+++ b/costa/native/external/lapack/zhpsvx.f
@@ -0,0 +1,279 @@
+      SUBROUTINE ZHPSVX( FACT, UPLO, N, NRHS, AP, AFP, IPIV, B, LDB, X,
+     $                   LDX, RCOND, FERR, BERR, WORK, RWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          FACT, UPLO
+      INTEGER            INFO, LDB, LDX, N, NRHS
+      DOUBLE PRECISION   RCOND
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      DOUBLE PRECISION   BERR( * ), FERR( * ), RWORK( * )
+      COMPLEX*16         AFP( * ), AP( * ), B( LDB, * ), WORK( * ),
+     $                   X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZHPSVX uses the diagonal pivoting factorization A = U*D*U**H or
+*  A = L*D*L**H to compute the solution to a complex system of linear
+*  equations A * X = B, where A is an N-by-N Hermitian matrix stored
+*  in packed format and X and B are N-by-NRHS matrices.
+*
+*  Error bounds on the solution and a condition estimate are also
+*  provided.
+*
+*  Description
+*  ===========
+*
+*  The following steps are performed:
+*
+*  1. If FACT = 'N', the diagonal pivoting method is used to factor A as
+*        A = U * D * U**H,  if UPLO = 'U', or
+*        A = L * D * L**H,  if UPLO = 'L',
+*     where U (or L) is a product of permutation and unit upper (lower)
+*     triangular matrices and D is Hermitian and block diagonal with
+*     1-by-1 and 2-by-2 diagonal blocks.
+*
+*  2. If some D(i,i)=0, so that D is exactly singular, then the routine
+*     returns with INFO = i. Otherwise, the factored form of A is used
+*     to estimate the condition number of the matrix A.  If the
+*     reciprocal of the condition number is less than machine precision,
+*     INFO = N+1 is returned as a warning, but the routine still goes on
+*     to solve for X and compute error bounds as described below.
+*
+*  3. The system of equations is solved for X using the factored form
+*     of A.
+*
+*  4. Iterative refinement is applied to improve the computed solution
+*     matrix and calculate error bounds and backward error estimates
+*     for it.
+*
+*  Arguments
+*  =========
+*
+*  FACT    (input) CHARACTER*1
+*          Specifies whether or not the factored form of A has been
+*          supplied on entry.
+*          = 'F':  On entry, AFP and IPIV contain the factored form of
+*                  A.  AFP and IPIV will not be modified.
+*          = 'N':  The matrix A will be copied to AFP and factored.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The number of linear equations, i.e., the order of the
+*          matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrices B and X.  NRHS >= 0.
+*
+*  AP      (input) COMPLEX*16 array, dimension (N*(N+1)/2)
+*          The upper or lower triangle of the Hermitian matrix A, packed
+*          columnwise in a linear array.  The j-th column of A is stored
+*          in the array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n.
+*          See below for further details.
+*
+*  AFP     (input or output) COMPLEX*16 array, dimension (N*(N+1)/2)
+*          If FACT = 'F', then AFP is an input argument and on entry
+*          contains the block diagonal matrix D and the multipliers used
+*          to obtain the factor U or L from the factorization
+*          A = U*D*U**H or A = L*D*L**H as computed by ZHPTRF, stored as
+*          a packed triangular matrix in the same storage format as A.
+*
+*          If FACT = 'N', then AFP is an output argument and on exit
+*          contains the block diagonal matrix D and the multipliers used
+*          to obtain the factor U or L from the factorization
+*          A = U*D*U**H or A = L*D*L**H as computed by ZHPTRF, stored as
+*          a packed triangular matrix in the same storage format as A.
+*
+*  IPIV    (input or output) INTEGER array, dimension (N)
+*          If FACT = 'F', then IPIV is an input argument and on entry
+*          contains details of the interchanges and the block structure
+*          of D, as determined by ZHPTRF.
+*          If IPIV(k) > 0, then rows and columns k and IPIV(k) were
+*          interchanged and D(k,k) is a 1-by-1 diagonal block.
+*          If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and
+*          columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k)
+*          is a 2-by-2 diagonal block.  If UPLO = 'L' and IPIV(k) =
+*          IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were
+*          interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block.
+*
+*          If FACT = 'N', then IPIV is an output argument and on exit
+*          contains details of the interchanges and the block structure
+*          of D, as determined by ZHPTRF.
+*
+*  B       (input) COMPLEX*16 array, dimension (LDB,NRHS)
+*          The N-by-NRHS right hand side matrix B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  X       (output) COMPLEX*16 array, dimension (LDX,NRHS)
+*          If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(1,N).
+*
+*  RCOND   (output) DOUBLE PRECISION
+*          The estimate of the reciprocal condition number of the matrix
+*          A.  If RCOND is less than the machine precision (in
+*          particular, if RCOND = 0), the matrix is singular to working
+*          precision.  This condition is indicated by a return code of
+*          INFO > 0.
+*
+*  FERR    (output) DOUBLE PRECISION array, dimension (NRHS)
+*          The estimated forward error bound for each solution vector
+*          X(j) (the j-th column of the solution matrix X).
+*          If XTRUE is the true solution corresponding to X(j), FERR(j)
+*          is an estimated upper bound for the magnitude of the largest
+*          element in (X(j) - XTRUE) divided by the magnitude of the
+*          largest element in X(j).  The estimate is as reliable as
+*          the estimate for RCOND, and is almost always a slight
+*          overestimate of the true error.
+*
+*  BERR    (output) DOUBLE PRECISION array, dimension (NRHS)
+*          The componentwise relative backward error of each solution
+*          vector X(j) (i.e., the smallest relative change in
+*          any element of A or B that makes X(j) an exact solution).
+*
+*  WORK    (workspace) COMPLEX*16 array, dimension (2*N)
+*
+*  RWORK   (workspace) DOUBLE PRECISION array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, and i is
+*                <= N:  D(i,i) is exactly zero.  The factorization
+*                       has been completed but the factor D is exactly
+*                       singular, so the solution and error bounds could
+*                       not be computed. RCOND = 0 is returned.
+*                = N+1: D is nonsingular, but RCOND is less than machine
+*                       precision, meaning that the matrix is singular
+*                       to working precision.  Nevertheless, the
+*                       solution and error bounds are computed because
+*                       there are a number of situations where the
+*                       computed solution can be more accurate than the
+*                       value of RCOND would suggest.
+*
+*  Further Details
+*  ===============
+*
+*  The packed storage scheme is illustrated by the following example
+*  when N = 4, UPLO = 'U':
+*
+*  Two-dimensional storage of the Hermitian matrix A:
+*
+*     a11 a12 a13 a14
+*         a22 a23 a24
+*             a33 a34     (aij = conjg(aji))
+*                 a44
+*
+*  Packed storage of the upper triangle of A:
+*
+*  AP = [ a11, a12, a22, a13, a23, a33, a14, a24, a34, a44 ]
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO
+      PARAMETER          ( ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOFACT
+      DOUBLE PRECISION   ANORM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DLAMCH, ZLANHP
+      EXTERNAL           LSAME, DLAMCH, ZLANHP
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZCOPY, ZHPCON, ZHPRFS, ZHPTRF, ZHPTRS,
+     $                   ZLACPY
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      NOFACT = LSAME( FACT, 'N' )
+      IF( .NOT.NOFACT .AND. .NOT.LSAME( FACT, 'F' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) )
+     $          THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -9
+      ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+         INFO = -11
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZHPSVX', -INFO )
+         RETURN
+      END IF
+*
+      IF( NOFACT ) THEN
+*
+*        Compute the factorization A = U*D*U' or A = L*D*L'.
+*
+         CALL ZCOPY( N*( N+1 ) / 2, AP, 1, AFP, 1 )
+         CALL ZHPTRF( UPLO, N, AFP, IPIV, INFO )
+*
+*        Return if INFO is non-zero.
+*
+         IF( INFO.NE.0 ) THEN
+            IF( INFO.GT.0 )
+     $         RCOND = ZERO
+            RETURN
+         END IF
+      END IF
+*
+*     Compute the norm of the matrix A.
+*
+      ANORM = ZLANHP( 'I', UPLO, N, AP, RWORK )
+*
+*     Compute the reciprocal of the condition number of A.
+*
+      CALL ZHPCON( UPLO, N, AFP, IPIV, ANORM, RCOND, WORK, INFO )
+*
+*     Set INFO = N+1 if the matrix is singular to working precision.
+*
+      IF( RCOND.LT.DLAMCH( 'Epsilon' ) )
+     $   INFO = N + 1
+*
+*     Compute the solution vectors X.
+*
+      CALL ZLACPY( 'Full', N, NRHS, B, LDB, X, LDX )
+      CALL ZHPTRS( UPLO, N, NRHS, AFP, IPIV, X, LDX, INFO )
+*
+*     Use iterative refinement to improve the computed solutions and
+*     compute error bounds and backward error estimates for them.
+*
+      CALL ZHPRFS( UPLO, N, NRHS, AP, AFP, IPIV, B, LDB, X, LDX, FERR,
+     $             BERR, WORK, RWORK, INFO )
+*
+      RETURN
+*
+*     End of ZHPSVX
+*
+      END
diff --git a/costa/native/external/lapack/zhptrd.f b/costa/native/external/lapack/zhptrd.f
new file mode 100644
index 000000000..00536f6b5
--- /dev/null
+++ b/costa/native/external/lapack/zhptrd.f
@@ -0,0 +1,238 @@
+      SUBROUTINE ZHPTRD( UPLO, N, AP, D, E, TAU, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   D( * ), E( * )
+      COMPLEX*16         AP( * ), TAU( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZHPTRD reduces a complex Hermitian matrix A stored in packed form to
+*  real symmetric tridiagonal form T by a unitary similarity
+*  transformation: Q**H * A * Q = T.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  AP      (input/output) COMPLEX*16 array, dimension (N*(N+1)/2)
+*          On entry, the upper or lower triangle of the Hermitian matrix
+*          A, packed columnwise in a linear array.  The j-th column of A
+*          is stored in the array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n.
+*          On exit, if UPLO = 'U', the diagonal and first superdiagonal
+*          of A are overwritten by the corresponding elements of the
+*          tridiagonal matrix T, and the elements above the first
+*          superdiagonal, with the array TAU, represent the unitary
+*          matrix Q as a product of elementary reflectors; if UPLO
+*          = 'L', the diagonal and first subdiagonal of A are over-
+*          written by the corresponding elements of the tridiagonal
+*          matrix T, and the elements below the first subdiagonal, with
+*          the array TAU, represent the unitary matrix Q as a product
+*          of elementary reflectors. See Further Details.
+*
+*  D       (output) DOUBLE PRECISION array, dimension (N)
+*          The diagonal elements of the tridiagonal matrix T:
+*          D(i) = A(i,i).
+*
+*  E       (output) DOUBLE PRECISION array, dimension (N-1)
+*          The off-diagonal elements of the tridiagonal matrix T:
+*          E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'.
+*
+*  TAU     (output) COMPLEX*16 array, dimension (N-1)
+*          The scalar factors of the elementary reflectors (see Further
+*          Details).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Further Details
+*  ===============
+*
+*  If UPLO = 'U', the matrix Q is represented as a product of elementary
+*  reflectors
+*
+*     Q = H(n-1) . . . H(2) H(1).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - tau * v * v'
+*
+*  where tau is a complex scalar, and v is a complex vector with
+*  v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in AP,
+*  overwriting A(1:i-1,i+1), and tau is stored in TAU(i).
+*
+*  If UPLO = 'L', the matrix Q is represented as a product of elementary
+*  reflectors
+*
+*     Q = H(1) H(2) . . . H(n-1).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - tau * v * v'
+*
+*  where tau is a complex scalar, and v is a complex vector with
+*  v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in AP,
+*  overwriting A(i+2:n,i), and tau is stored in TAU(i).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX*16         ONE, ZERO, HALF
+      PARAMETER          ( ONE = ( 1.0D+0, 0.0D+0 ),
+     $                   ZERO = ( 0.0D+0, 0.0D+0 ),
+     $                   HALF = ( 0.5D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            I, I1, I1I1, II
+      COMPLEX*16         ALPHA, TAUI
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZAXPY, ZHPMV, ZHPR2, ZLARFG
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      COMPLEX*16         ZDOTC
+      EXTERNAL           LSAME, ZDOTC
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          DBLE
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZHPTRD', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.LE.0 )
+     $   RETURN
+*
+      IF( UPPER ) THEN
+*
+*        Reduce the upper triangle of A.
+*        I1 is the index in AP of A(1,I+1).
+*
+         I1 = N*( N-1 ) / 2 + 1
+         AP( I1+N-1 ) = DBLE( AP( I1+N-1 ) )
+         DO 10 I = N - 1, 1, -1
+*
+*           Generate elementary reflector H(i) = I - tau * v * v'
+*           to annihilate A(1:i-1,i+1)
+*
+            ALPHA = AP( I1+I-1 )
+            CALL ZLARFG( I, ALPHA, AP( I1 ), 1, TAUI )
+            E( I ) = ALPHA
+*
+            IF( TAUI.NE.ZERO ) THEN
+*
+*              Apply H(i) from both sides to A(1:i,1:i)
+*
+               AP( I1+I-1 ) = ONE
+*
+*              Compute  y := tau * A * v  storing y in TAU(1:i)
+*
+               CALL ZHPMV( UPLO, I, TAUI, AP, AP( I1 ), 1, ZERO, TAU,
+     $                     1 )
+*
+*              Compute  w := y - 1/2 * tau * (y'*v) * v
+*
+               ALPHA = -HALF*TAUI*ZDOTC( I, TAU, 1, AP( I1 ), 1 )
+               CALL ZAXPY( I, ALPHA, AP( I1 ), 1, TAU, 1 )
+*
+*              Apply the transformation as a rank-2 update:
+*                 A := A - v * w' - w * v'
+*
+               CALL ZHPR2( UPLO, I, -ONE, AP( I1 ), 1, TAU, 1, AP )
+*
+            END IF
+            AP( I1+I-1 ) = E( I )
+            D( I+1 ) = AP( I1+I )
+            TAU( I ) = TAUI
+            I1 = I1 - I
+   10    CONTINUE
+         D( 1 ) = AP( 1 )
+      ELSE
+*
+*        Reduce the lower triangle of A. II is the index in AP of
+*        A(i,i) and I1I1 is the index of A(i+1,i+1).
+*
+         II = 1
+         AP( 1 ) = DBLE( AP( 1 ) )
+         DO 20 I = 1, N - 1
+            I1I1 = II + N - I + 1
+*
+*           Generate elementary reflector H(i) = I - tau * v * v'
+*           to annihilate A(i+2:n,i)
+*
+            ALPHA = AP( II+1 )
+            CALL ZLARFG( N-I, ALPHA, AP( II+2 ), 1, TAUI )
+            E( I ) = ALPHA
+*
+            IF( TAUI.NE.ZERO ) THEN
+*
+*              Apply H(i) from both sides to A(i+1:n,i+1:n)
+*
+               AP( II+1 ) = ONE
+*
+*              Compute  y := tau * A * v  storing y in TAU(i:n-1)
+*
+               CALL ZHPMV( UPLO, N-I, TAUI, AP( I1I1 ), AP( II+1 ), 1,
+     $                     ZERO, TAU( I ), 1 )
+*
+*              Compute  w := y - 1/2 * tau * (y'*v) * v
+*
+               ALPHA = -HALF*TAUI*ZDOTC( N-I, TAU( I ), 1, AP( II+1 ),
+     $                 1 )
+               CALL ZAXPY( N-I, ALPHA, AP( II+1 ), 1, TAU( I ), 1 )
+*
+*              Apply the transformation as a rank-2 update:
+*                 A := A - v * w' - w * v'
+*
+               CALL ZHPR2( UPLO, N-I, -ONE, AP( II+1 ), 1, TAU( I ), 1,
+     $                     AP( I1I1 ) )
+*
+            END IF
+            AP( II+1 ) = E( I )
+            D( I ) = AP( II )
+            TAU( I ) = TAUI
+            II = I1I1
+   20    CONTINUE
+         D( N ) = AP( II )
+      END IF
+*
+      RETURN
+*
+*     End of ZHPTRD
+*
+      END
diff --git a/costa/native/external/lapack/zhptrf.f b/costa/native/external/lapack/zhptrf.f
new file mode 100644
index 000000000..08ff6b66d
--- /dev/null
+++ b/costa/native/external/lapack/zhptrf.f
@@ -0,0 +1,582 @@
+      SUBROUTINE ZHPTRF( UPLO, N, AP, IPIV, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      COMPLEX*16         AP( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZHPTRF computes the factorization of a complex Hermitian packed
+*  matrix A using the Bunch-Kaufman diagonal pivoting method:
+*
+*     A = U*D*U**H  or  A = L*D*L**H
+*
+*  where U (or L) is a product of permutation and unit upper (lower)
+*  triangular matrices, and D is Hermitian and block diagonal with
+*  1-by-1 and 2-by-2 diagonal blocks.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  AP      (input/output) COMPLEX*16 array, dimension (N*(N+1)/2)
+*          On entry, the upper or lower triangle of the Hermitian matrix
+*          A, packed columnwise in a linear array.  The j-th column of A
+*          is stored in the array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n.
+*
+*          On exit, the block diagonal matrix D and the multipliers used
+*          to obtain the factor U or L, stored as a packed triangular
+*          matrix overwriting A (see below for further details).
+*
+*  IPIV    (output) INTEGER array, dimension (N)
+*          Details of the interchanges and the block structure of D.
+*          If IPIV(k) > 0, then rows and columns k and IPIV(k) were
+*          interchanged and D(k,k) is a 1-by-1 diagonal block.
+*          If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and
+*          columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k)
+*          is a 2-by-2 diagonal block.  If UPLO = 'L' and IPIV(k) =
+*          IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were
+*          interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block.
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*          > 0: if INFO = i, D(i,i) is exactly zero.  The factorization
+*               has been completed, but the block diagonal matrix D is
+*               exactly singular, and division by zero will occur if it
+*               is used to solve a system of equations.
+*
+*  Further Details
+*  ===============
+*
+*  5-96 - Based on modifications by J. Lewis, Boeing Computer Services
+*         Company
+*
+*  If UPLO = 'U', then A = U*D*U', where
+*     U = P(n)*U(n)* ... *P(k)U(k)* ...,
+*  i.e., U is a product of terms P(k)*U(k), where k decreases from n to
+*  1 in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1
+*  and 2-by-2 diagonal blocks D(k).  P(k) is a permutation matrix as
+*  defined by IPIV(k), and U(k) is a unit upper triangular matrix, such
+*  that if the diagonal block D(k) is of order s (s = 1 or 2), then
+*
+*             (   I    v    0   )   k-s
+*     U(k) =  (   0    I    0   )   s
+*             (   0    0    I   )   n-k
+*                k-s   s   n-k
+*
+*  If s = 1, D(k) overwrites A(k,k), and v overwrites A(1:k-1,k).
+*  If s = 2, the upper triangle of D(k) overwrites A(k-1,k-1), A(k-1,k),
+*  and A(k,k), and v overwrites A(1:k-2,k-1:k).
+*
+*  If UPLO = 'L', then A = L*D*L', where
+*     L = P(1)*L(1)* ... *P(k)*L(k)* ...,
+*  i.e., L is a product of terms P(k)*L(k), where k increases from 1 to
+*  n in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1
+*  and 2-by-2 diagonal blocks D(k).  P(k) is a permutation matrix as
+*  defined by IPIV(k), and L(k) is a unit lower triangular matrix, such
+*  that if the diagonal block D(k) is of order s (s = 1 or 2), then
+*
+*             (   I    0     0   )  k-1
+*     L(k) =  (   0    I     0   )  s
+*             (   0    v     I   )  n-k-s+1
+*                k-1   s  n-k-s+1
+*
+*  If s = 1, D(k) overwrites A(k,k), and v overwrites A(k+1:n,k).
+*  If s = 2, the lower triangle of D(k) overwrites A(k,k), A(k+1,k),
+*  and A(k+1,k+1), and v overwrites A(k+2:n,k:k+1).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+      DOUBLE PRECISION   EIGHT, SEVTEN
+      PARAMETER          ( EIGHT = 8.0D+0, SEVTEN = 17.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            I, IMAX, J, JMAX, K, KC, KK, KNC, KP, KPC,
+     $                   KSTEP, KX, NPP
+      DOUBLE PRECISION   ABSAKK, ALPHA, COLMAX, D, D11, D22, R1, ROWMAX,
+     $                   TT
+      COMPLEX*16         D12, D21, T, WK, WKM1, WKP1, ZDUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            IZAMAX
+      DOUBLE PRECISION   DLAPY2
+      EXTERNAL           LSAME, IZAMAX, DLAPY2
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZDSCAL, ZHPR, ZSWAP
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, DCMPLX, DCONJG, DIMAG, MAX, SQRT
+*     ..
+*     .. Statement Functions ..
+      DOUBLE PRECISION   CABS1
+*     ..
+*     .. Statement Function definitions ..
+      CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZHPTRF', -INFO )
+         RETURN
+      END IF
+*
+*     Initialize ALPHA for use in choosing pivot block size.
+*
+      ALPHA = ( ONE+SQRT( SEVTEN ) ) / EIGHT
+*
+      IF( UPPER ) THEN
+*
+*        Factorize A as U*D*U' using the upper triangle of A
+*
+*        K is the main loop index, decreasing from N to 1 in steps of
+*        1 or 2
+*
+         K = N
+         KC = ( N-1 )*N / 2 + 1
+   10    CONTINUE
+         KNC = KC
+*
+*        If K < 1, exit from loop
+*
+         IF( K.LT.1 )
+     $      GO TO 110
+         KSTEP = 1
+*
+*        Determine rows and columns to be interchanged and whether
+*        a 1-by-1 or 2-by-2 pivot block will be used
+*
+         ABSAKK = ABS( DBLE( AP( KC+K-1 ) ) )
+*
+*        IMAX is the row-index of the largest off-diagonal element in
+*        column K, and COLMAX is its absolute value
+*
+         IF( K.GT.1 ) THEN
+            IMAX = IZAMAX( K-1, AP( KC ), 1 )
+            COLMAX = CABS1( AP( KC+IMAX-1 ) )
+         ELSE
+            COLMAX = ZERO
+         END IF
+*
+         IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN
+*
+*           Column K is zero: set INFO and continue
+*
+            IF( INFO.EQ.0 )
+     $         INFO = K
+            KP = K
+            AP( KC+K-1 ) = DBLE( AP( KC+K-1 ) )
+         ELSE
+            IF( ABSAKK.GE.ALPHA*COLMAX ) THEN
+*
+*              no interchange, use 1-by-1 pivot block
+*
+               KP = K
+            ELSE
+*
+*              JMAX is the column-index of the largest off-diagonal
+*              element in row IMAX, and ROWMAX is its absolute value
+*
+               ROWMAX = ZERO
+               JMAX = IMAX
+               KX = IMAX*( IMAX+1 ) / 2 + IMAX
+               DO 20 J = IMAX + 1, K
+                  IF( CABS1( AP( KX ) ).GT.ROWMAX ) THEN
+                     ROWMAX = CABS1( AP( KX ) )
+                     JMAX = J
+                  END IF
+                  KX = KX + J
+   20          CONTINUE
+               KPC = ( IMAX-1 )*IMAX / 2 + 1
+               IF( IMAX.GT.1 ) THEN
+                  JMAX = IZAMAX( IMAX-1, AP( KPC ), 1 )
+                  ROWMAX = MAX( ROWMAX, CABS1( AP( KPC+JMAX-1 ) ) )
+               END IF
+*
+               IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN
+*
+*                 no interchange, use 1-by-1 pivot block
+*
+                  KP = K
+               ELSE IF( ABS( DBLE( AP( KPC+IMAX-1 ) ) ).GE.ALPHA*
+     $                  ROWMAX ) THEN
+*
+*                 interchange rows and columns K and IMAX, use 1-by-1
+*                 pivot block
+*
+                  KP = IMAX
+               ELSE
+*
+*                 interchange rows and columns K-1 and IMAX, use 2-by-2
+*                 pivot block
+*
+                  KP = IMAX
+                  KSTEP = 2
+               END IF
+            END IF
+*
+            KK = K - KSTEP + 1
+            IF( KSTEP.EQ.2 )
+     $         KNC = KNC - K + 1
+            IF( KP.NE.KK ) THEN
+*
+*              Interchange rows and columns KK and KP in the leading
+*              submatrix A(1:k,1:k)
+*
+               CALL ZSWAP( KP-1, AP( KNC ), 1, AP( KPC ), 1 )
+               KX = KPC + KP - 1
+               DO 30 J = KP + 1, KK - 1
+                  KX = KX + J - 1
+                  T = DCONJG( AP( KNC+J-1 ) )
+                  AP( KNC+J-1 ) = DCONJG( AP( KX ) )
+                  AP( KX ) = T
+   30          CONTINUE
+               AP( KX+KK-1 ) = DCONJG( AP( KX+KK-1 ) )
+               R1 = DBLE( AP( KNC+KK-1 ) )
+               AP( KNC+KK-1 ) = DBLE( AP( KPC+KP-1 ) )
+               AP( KPC+KP-1 ) = R1
+               IF( KSTEP.EQ.2 ) THEN
+                  AP( KC+K-1 ) = DBLE( AP( KC+K-1 ) )
+                  T = AP( KC+K-2 )
+                  AP( KC+K-2 ) = AP( KC+KP-1 )
+                  AP( KC+KP-1 ) = T
+               END IF
+            ELSE
+               AP( KC+K-1 ) = DBLE( AP( KC+K-1 ) )
+               IF( KSTEP.EQ.2 )
+     $            AP( KC-1 ) = DBLE( AP( KC-1 ) )
+            END IF
+*
+*           Update the leading submatrix
+*
+            IF( KSTEP.EQ.1 ) THEN
+*
+*              1-by-1 pivot block D(k): column k now holds
+*
+*              W(k) = U(k)*D(k)
+*
+*              where U(k) is the k-th column of U
+*
+*              Perform a rank-1 update of A(1:k-1,1:k-1) as
+*
+*              A := A - U(k)*D(k)*U(k)' = A - W(k)*1/D(k)*W(k)'
+*
+               R1 = ONE / DBLE( AP( KC+K-1 ) )
+               CALL ZHPR( UPLO, K-1, -R1, AP( KC ), 1, AP )
+*
+*              Store U(k) in column k
+*
+               CALL ZDSCAL( K-1, R1, AP( KC ), 1 )
+            ELSE
+*
+*              2-by-2 pivot block D(k): columns k and k-1 now hold
+*
+*              ( W(k-1) W(k) ) = ( U(k-1) U(k) )*D(k)
+*
+*              where U(k) and U(k-1) are the k-th and (k-1)-th columns
+*              of U
+*
+*              Perform a rank-2 update of A(1:k-2,1:k-2) as
+*
+*              A := A - ( U(k-1) U(k) )*D(k)*( U(k-1) U(k) )'
+*                 = A - ( W(k-1) W(k) )*inv(D(k))*( W(k-1) W(k) )'
+*
+               IF( K.GT.2 ) THEN
+*
+                  D = DLAPY2( DBLE( AP( K-1+( K-1 )*K / 2 ) ),
+     $                DIMAG( AP( K-1+( K-1 )*K / 2 ) ) )
+                  D22 = DBLE( AP( K-1+( K-2 )*( K-1 ) / 2 ) ) / D
+                  D11 = DBLE( AP( K+( K-1 )*K / 2 ) ) / D
+                  TT = ONE / ( D11*D22-ONE )
+                  D12 = AP( K-1+( K-1 )*K / 2 ) / D
+                  D = TT / D
+*
+                  DO 50 J = K - 2, 1, -1
+                     WKM1 = D*( D11*AP( J+( K-2 )*( K-1 ) / 2 )-
+     $                      DCONJG( D12 )*AP( J+( K-1 )*K / 2 ) )
+                     WK = D*( D22*AP( J+( K-1 )*K / 2 )-D12*
+     $                    AP( J+( K-2 )*( K-1 ) / 2 ) )
+                     DO 40 I = J, 1, -1
+                        AP( I+( J-1 )*J / 2 ) = AP( I+( J-1 )*J / 2 ) -
+     $                     AP( I+( K-1 )*K / 2 )*DCONJG( WK ) -
+     $                     AP( I+( K-2 )*( K-1 ) / 2 )*DCONJG( WKM1 )
+   40                CONTINUE
+                     AP( J+( K-1 )*K / 2 ) = WK
+                     AP( J+( K-2 )*( K-1 ) / 2 ) = WKM1
+                     AP( J+( J-1 )*J / 2 ) = DCMPLX( DBLE( AP( J+( J-
+     $                                       1 )*J / 2 ) ), 0.0D+0 )
+   50             CONTINUE
+*
+               END IF
+*
+            END IF
+         END IF
+*
+*        Store details of the interchanges in IPIV
+*
+         IF( KSTEP.EQ.1 ) THEN
+            IPIV( K ) = KP
+         ELSE
+            IPIV( K ) = -KP
+            IPIV( K-1 ) = -KP
+         END IF
+*
+*        Decrease K and return to the start of the main loop
+*
+         K = K - KSTEP
+         KC = KNC - K
+         GO TO 10
+*
+      ELSE
+*
+*        Factorize A as L*D*L' using the lower triangle of A
+*
+*        K is the main loop index, increasing from 1 to N in steps of
+*        1 or 2
+*
+         K = 1
+         KC = 1
+         NPP = N*( N+1 ) / 2
+   60    CONTINUE
+         KNC = KC
+*
+*        If K > N, exit from loop
+*
+         IF( K.GT.N )
+     $      GO TO 110
+         KSTEP = 1
+*
+*        Determine rows and columns to be interchanged and whether
+*        a 1-by-1 or 2-by-2 pivot block will be used
+*
+         ABSAKK = ABS( DBLE( AP( KC ) ) )
+*
+*        IMAX is the row-index of the largest off-diagonal element in
+*        column K, and COLMAX is its absolute value
+*
+         IF( K.LT.N ) THEN
+            IMAX = K + IZAMAX( N-K, AP( KC+1 ), 1 )
+            COLMAX = CABS1( AP( KC+IMAX-K ) )
+         ELSE
+            COLMAX = ZERO
+         END IF
+*
+         IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN
+*
+*           Column K is zero: set INFO and continue
+*
+            IF( INFO.EQ.0 )
+     $         INFO = K
+            KP = K
+            AP( KC ) = DBLE( AP( KC ) )
+         ELSE
+            IF( ABSAKK.GE.ALPHA*COLMAX ) THEN
+*
+*              no interchange, use 1-by-1 pivot block
+*
+               KP = K
+            ELSE
+*
+*              JMAX is the column-index of the largest off-diagonal
+*              element in row IMAX, and ROWMAX is its absolute value
+*
+               ROWMAX = ZERO
+               KX = KC + IMAX - K
+               DO 70 J = K, IMAX - 1
+                  IF( CABS1( AP( KX ) ).GT.ROWMAX ) THEN
+                     ROWMAX = CABS1( AP( KX ) )
+                     JMAX = J
+                  END IF
+                  KX = KX + N - J
+   70          CONTINUE
+               KPC = NPP - ( N-IMAX+1 )*( N-IMAX+2 ) / 2 + 1
+               IF( IMAX.LT.N ) THEN
+                  JMAX = IMAX + IZAMAX( N-IMAX, AP( KPC+1 ), 1 )
+                  ROWMAX = MAX( ROWMAX, CABS1( AP( KPC+JMAX-IMAX ) ) )
+               END IF
+*
+               IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN
+*
+*                 no interchange, use 1-by-1 pivot block
+*
+                  KP = K
+               ELSE IF( ABS( DBLE( AP( KPC ) ) ).GE.ALPHA*ROWMAX ) THEN
+*
+*                 interchange rows and columns K and IMAX, use 1-by-1
+*                 pivot block
+*
+                  KP = IMAX
+               ELSE
+*
+*                 interchange rows and columns K+1 and IMAX, use 2-by-2
+*                 pivot block
+*
+                  KP = IMAX
+                  KSTEP = 2
+               END IF
+            END IF
+*
+            KK = K + KSTEP - 1
+            IF( KSTEP.EQ.2 )
+     $         KNC = KNC + N - K + 1
+            IF( KP.NE.KK ) THEN
+*
+*              Interchange rows and columns KK and KP in the trailing
+*              submatrix A(k:n,k:n)
+*
+               IF( KP.LT.N )
+     $            CALL ZSWAP( N-KP, AP( KNC+KP-KK+1 ), 1, AP( KPC+1 ),
+     $                        1 )
+               KX = KNC + KP - KK
+               DO 80 J = KK + 1, KP - 1
+                  KX = KX + N - J + 1
+                  T = DCONJG( AP( KNC+J-KK ) )
+                  AP( KNC+J-KK ) = DCONJG( AP( KX ) )
+                  AP( KX ) = T
+   80          CONTINUE
+               AP( KNC+KP-KK ) = DCONJG( AP( KNC+KP-KK ) )
+               R1 = DBLE( AP( KNC ) )
+               AP( KNC ) = DBLE( AP( KPC ) )
+               AP( KPC ) = R1
+               IF( KSTEP.EQ.2 ) THEN
+                  AP( KC ) = DBLE( AP( KC ) )
+                  T = AP( KC+1 )
+                  AP( KC+1 ) = AP( KC+KP-K )
+                  AP( KC+KP-K ) = T
+               END IF
+            ELSE
+               AP( KC ) = DBLE( AP( KC ) )
+               IF( KSTEP.EQ.2 )
+     $            AP( KNC ) = DBLE( AP( KNC ) )
+            END IF
+*
+*           Update the trailing submatrix
+*
+            IF( KSTEP.EQ.1 ) THEN
+*
+*              1-by-1 pivot block D(k): column k now holds
+*
+*              W(k) = L(k)*D(k)
+*
+*              where L(k) is the k-th column of L
+*
+               IF( K.LT.N ) THEN
+*
+*                 Perform a rank-1 update of A(k+1:n,k+1:n) as
+*
+*                 A := A - L(k)*D(k)*L(k)' = A - W(k)*(1/D(k))*W(k)'
+*
+                  R1 = ONE / DBLE( AP( KC ) )
+                  CALL ZHPR( UPLO, N-K, -R1, AP( KC+1 ), 1,
+     $                       AP( KC+N-K+1 ) )
+*
+*                 Store L(k) in column K
+*
+                  CALL ZDSCAL( N-K, R1, AP( KC+1 ), 1 )
+               END IF
+            ELSE
+*
+*              2-by-2 pivot block D(k): columns K and K+1 now hold
+*
+*              ( W(k) W(k+1) ) = ( L(k) L(k+1) )*D(k)
+*
+*              where L(k) and L(k+1) are the k-th and (k+1)-th columns
+*              of L
+*
+               IF( K.LT.N-1 ) THEN
+*
+*                 Perform a rank-2 update of A(k+2:n,k+2:n) as
+*
+*                 A := A - ( L(k) L(k+1) )*D(k)*( L(k) L(k+1) )'
+*                    = A - ( W(k) W(k+1) )*inv(D(k))*( W(k) W(k+1) )'
+*
+*                 where L(k) and L(k+1) are the k-th and (k+1)-th
+*                 columns of L
+*
+                  D = DLAPY2( DBLE( AP( K+1+( K-1 )*( 2*N-K ) / 2 ) ),
+     $                DIMAG( AP( K+1+( K-1 )*( 2*N-K ) / 2 ) ) )
+                  D11 = DBLE( AP( K+1+K*( 2*N-K-1 ) / 2 ) ) / D
+                  D22 = DBLE( AP( K+( K-1 )*( 2*N-K ) / 2 ) ) / D
+                  TT = ONE / ( D11*D22-ONE )
+                  D21 = AP( K+1+( K-1 )*( 2*N-K ) / 2 ) / D
+                  D = TT / D
+*
+                  DO 100 J = K + 2, N
+                     WK = D*( D11*AP( J+( K-1 )*( 2*N-K ) / 2 )-D21*
+     $                    AP( J+K*( 2*N-K-1 ) / 2 ) )
+                     WKP1 = D*( D22*AP( J+K*( 2*N-K-1 ) / 2 )-
+     $                      DCONJG( D21 )*AP( J+( K-1 )*( 2*N-K ) /
+     $                      2 ) )
+                     DO 90 I = J, N
+                        AP( I+( J-1 )*( 2*N-J ) / 2 ) = AP( I+( J-1 )*
+     $                     ( 2*N-J ) / 2 ) - AP( I+( K-1 )*( 2*N-K ) /
+     $                     2 )*DCONJG( WK ) - AP( I+K*( 2*N-K-1 ) / 2 )*
+     $                     DCONJG( WKP1 )
+   90                CONTINUE
+                     AP( J+( K-1 )*( 2*N-K ) / 2 ) = WK
+                     AP( J+K*( 2*N-K-1 ) / 2 ) = WKP1
+                     AP( J+( J-1 )*( 2*N-J ) / 2 )
+     $                  = DCMPLX( DBLE( AP( J+( J-1 )*( 2*N-J ) / 2 ) ),
+     $                  0.0D+0 )
+  100             CONTINUE
+               END IF
+            END IF
+         END IF
+*
+*        Store details of the interchanges in IPIV
+*
+         IF( KSTEP.EQ.1 ) THEN
+            IPIV( K ) = KP
+         ELSE
+            IPIV( K ) = -KP
+            IPIV( K+1 ) = -KP
+         END IF
+*
+*        Increase K and return to the start of the main loop
+*
+         K = K + KSTEP
+         KC = KNC + N - K + 2
+         GO TO 60
+*
+      END IF
+*
+  110 CONTINUE
+      RETURN
+*
+*     End of ZHPTRF
+*
+      END
diff --git a/costa/native/external/lapack/zhptri.f b/costa/native/external/lapack/zhptri.f
new file mode 100644
index 000000000..5873b61b1
--- /dev/null
+++ b/costa/native/external/lapack/zhptri.f
@@ -0,0 +1,344 @@
+      SUBROUTINE ZHPTRI( UPLO, N, AP, IPIV, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      COMPLEX*16         AP( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZHPTRI computes the inverse of a complex Hermitian indefinite matrix
+*  A in packed storage using the factorization A = U*D*U**H or
+*  A = L*D*L**H computed by ZHPTRF.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the details of the factorization are stored
+*          as an upper or lower triangular matrix.
+*          = 'U':  Upper triangular, form is A = U*D*U**H;
+*          = 'L':  Lower triangular, form is A = L*D*L**H.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  AP      (input/output) COMPLEX*16 array, dimension (N*(N+1)/2)
+*          On entry, the block diagonal matrix D and the multipliers
+*          used to obtain the factor U or L as computed by ZHPTRF,
+*          stored as a packed triangular matrix.
+*
+*          On exit, if INFO = 0, the (Hermitian) inverse of the original
+*          matrix, stored as a packed triangular matrix. The j-th column
+*          of inv(A) is stored in the array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = inv(A)(i,j) for 1<=i<=j;
+*          if UPLO = 'L',
+*             AP(i + (j-1)*(2n-j)/2) = inv(A)(i,j) for j<=i<=n.
+*
+*  IPIV    (input) INTEGER array, dimension (N)
+*          Details of the interchanges and the block structure of D
+*          as determined by ZHPTRF.
+*
+*  WORK    (workspace) COMPLEX*16 array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*          > 0: if INFO = i, D(i,i) = 0; the matrix is singular and its
+*               inverse could not be computed.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE
+      COMPLEX*16         CONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, CONE = ( 1.0D+0, 0.0D+0 ),
+     $                   ZERO = ( 0.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            J, K, KC, KCNEXT, KP, KPC, KSTEP, KX, NPP
+      DOUBLE PRECISION   AK, AKP1, D, T
+      COMPLEX*16         AKKP1, TEMP
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      COMPLEX*16         ZDOTC
+      EXTERNAL           LSAME, ZDOTC
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZCOPY, ZHPMV, ZSWAP
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, DCONJG
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZHPTRI', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Check that the diagonal matrix D is nonsingular.
+*
+      IF( UPPER ) THEN
+*
+*        Upper triangular storage: examine D from bottom to top
+*
+         KP = N*( N+1 ) / 2
+         DO 10 INFO = N, 1, -1
+            IF( IPIV( INFO ).GT.0 .AND. AP( KP ).EQ.ZERO )
+     $         RETURN
+            KP = KP - INFO
+   10    CONTINUE
+      ELSE
+*
+*        Lower triangular storage: examine D from top to bottom.
+*
+         KP = 1
+         DO 20 INFO = 1, N
+            IF( IPIV( INFO ).GT.0 .AND. AP( KP ).EQ.ZERO )
+     $         RETURN
+            KP = KP + N - INFO + 1
+   20    CONTINUE
+      END IF
+      INFO = 0
+*
+      IF( UPPER ) THEN
+*
+*        Compute inv(A) from the factorization A = U*D*U'.
+*
+*        K is the main loop index, increasing from 1 to N in steps of
+*        1 or 2, depending on the size of the diagonal blocks.
+*
+         K = 1
+         KC = 1
+   30    CONTINUE
+*
+*        If K > N, exit from loop.
+*
+         IF( K.GT.N )
+     $      GO TO 50
+*
+         KCNEXT = KC + K
+         IF( IPIV( K ).GT.0 ) THEN
+*
+*           1 x 1 diagonal block
+*
+*           Invert the diagonal block.
+*
+            AP( KC+K-1 ) = ONE / DBLE( AP( KC+K-1 ) )
+*
+*           Compute column K of the inverse.
+*
+            IF( K.GT.1 ) THEN
+               CALL ZCOPY( K-1, AP( KC ), 1, WORK, 1 )
+               CALL ZHPMV( UPLO, K-1, -CONE, AP, WORK, 1, ZERO,
+     $                     AP( KC ), 1 )
+               AP( KC+K-1 ) = AP( KC+K-1 ) -
+     $                        DBLE( ZDOTC( K-1, WORK, 1, AP( KC ), 1 ) )
+            END IF
+            KSTEP = 1
+         ELSE
+*
+*           2 x 2 diagonal block
+*
+*           Invert the diagonal block.
+*
+            T = ABS( AP( KCNEXT+K-1 ) )
+            AK = DBLE( AP( KC+K-1 ) ) / T
+            AKP1 = DBLE( AP( KCNEXT+K ) ) / T
+            AKKP1 = AP( KCNEXT+K-1 ) / T
+            D = T*( AK*AKP1-ONE )
+            AP( KC+K-1 ) = AKP1 / D
+            AP( KCNEXT+K ) = AK / D
+            AP( KCNEXT+K-1 ) = -AKKP1 / D
+*
+*           Compute columns K and K+1 of the inverse.
+*
+            IF( K.GT.1 ) THEN
+               CALL ZCOPY( K-1, AP( KC ), 1, WORK, 1 )
+               CALL ZHPMV( UPLO, K-1, -CONE, AP, WORK, 1, ZERO,
+     $                     AP( KC ), 1 )
+               AP( KC+K-1 ) = AP( KC+K-1 ) -
+     $                        DBLE( ZDOTC( K-1, WORK, 1, AP( KC ), 1 ) )
+               AP( KCNEXT+K-1 ) = AP( KCNEXT+K-1 ) -
+     $                            ZDOTC( K-1, AP( KC ), 1, AP( KCNEXT ),
+     $                            1 )
+               CALL ZCOPY( K-1, AP( KCNEXT ), 1, WORK, 1 )
+               CALL ZHPMV( UPLO, K-1, -CONE, AP, WORK, 1, ZERO,
+     $                     AP( KCNEXT ), 1 )
+               AP( KCNEXT+K ) = AP( KCNEXT+K ) -
+     $                          DBLE( ZDOTC( K-1, WORK, 1, AP( KCNEXT ),
+     $                          1 ) )
+            END IF
+            KSTEP = 2
+            KCNEXT = KCNEXT + K + 1
+         END IF
+*
+         KP = ABS( IPIV( K ) )
+         IF( KP.NE.K ) THEN
+*
+*           Interchange rows and columns K and KP in the leading
+*           submatrix A(1:k+1,1:k+1)
+*
+            KPC = ( KP-1 )*KP / 2 + 1
+            CALL ZSWAP( KP-1, AP( KC ), 1, AP( KPC ), 1 )
+            KX = KPC + KP - 1
+            DO 40 J = KP + 1, K - 1
+               KX = KX + J - 1
+               TEMP = DCONJG( AP( KC+J-1 ) )
+               AP( KC+J-1 ) = DCONJG( AP( KX ) )
+               AP( KX ) = TEMP
+   40       CONTINUE
+            AP( KC+KP-1 ) = DCONJG( AP( KC+KP-1 ) )
+            TEMP = AP( KC+K-1 )
+            AP( KC+K-1 ) = AP( KPC+KP-1 )
+            AP( KPC+KP-1 ) = TEMP
+            IF( KSTEP.EQ.2 ) THEN
+               TEMP = AP( KC+K+K-1 )
+               AP( KC+K+K-1 ) = AP( KC+K+KP-1 )
+               AP( KC+K+KP-1 ) = TEMP
+            END IF
+         END IF
+*
+         K = K + KSTEP
+         KC = KCNEXT
+         GO TO 30
+   50    CONTINUE
+*
+      ELSE
+*
+*        Compute inv(A) from the factorization A = L*D*L'.
+*
+*        K is the main loop index, increasing from 1 to N in steps of
+*        1 or 2, depending on the size of the diagonal blocks.
+*
+         NPP = N*( N+1 ) / 2
+         K = N
+         KC = NPP
+   60    CONTINUE
+*
+*        If K < 1, exit from loop.
+*
+         IF( K.LT.1 )
+     $      GO TO 80
+*
+         KCNEXT = KC - ( N-K+2 )
+         IF( IPIV( K ).GT.0 ) THEN
+*
+*           1 x 1 diagonal block
+*
+*           Invert the diagonal block.
+*
+            AP( KC ) = ONE / DBLE( AP( KC ) )
+*
+*           Compute column K of the inverse.
+*
+            IF( K.LT.N ) THEN
+               CALL ZCOPY( N-K, AP( KC+1 ), 1, WORK, 1 )
+               CALL ZHPMV( UPLO, N-K, -CONE, AP( KC+N-K+1 ), WORK, 1,
+     $                     ZERO, AP( KC+1 ), 1 )
+               AP( KC ) = AP( KC ) - DBLE( ZDOTC( N-K, WORK, 1,
+     $                    AP( KC+1 ), 1 ) )
+            END IF
+            KSTEP = 1
+         ELSE
+*
+*           2 x 2 diagonal block
+*
+*           Invert the diagonal block.
+*
+            T = ABS( AP( KCNEXT+1 ) )
+            AK = DBLE( AP( KCNEXT ) ) / T
+            AKP1 = DBLE( AP( KC ) ) / T
+            AKKP1 = AP( KCNEXT+1 ) / T
+            D = T*( AK*AKP1-ONE )
+            AP( KCNEXT ) = AKP1 / D
+            AP( KC ) = AK / D
+            AP( KCNEXT+1 ) = -AKKP1 / D
+*
+*           Compute columns K-1 and K of the inverse.
+*
+            IF( K.LT.N ) THEN
+               CALL ZCOPY( N-K, AP( KC+1 ), 1, WORK, 1 )
+               CALL ZHPMV( UPLO, N-K, -CONE, AP( KC+( N-K+1 ) ), WORK,
+     $                     1, ZERO, AP( KC+1 ), 1 )
+               AP( KC ) = AP( KC ) - DBLE( ZDOTC( N-K, WORK, 1,
+     $                    AP( KC+1 ), 1 ) )
+               AP( KCNEXT+1 ) = AP( KCNEXT+1 ) -
+     $                          ZDOTC( N-K, AP( KC+1 ), 1,
+     $                          AP( KCNEXT+2 ), 1 )
+               CALL ZCOPY( N-K, AP( KCNEXT+2 ), 1, WORK, 1 )
+               CALL ZHPMV( UPLO, N-K, -CONE, AP( KC+( N-K+1 ) ), WORK,
+     $                     1, ZERO, AP( KCNEXT+2 ), 1 )
+               AP( KCNEXT ) = AP( KCNEXT ) -
+     $                        DBLE( ZDOTC( N-K, WORK, 1, AP( KCNEXT+2 ),
+     $                        1 ) )
+            END IF
+            KSTEP = 2
+            KCNEXT = KCNEXT - ( N-K+3 )
+         END IF
+*
+         KP = ABS( IPIV( K ) )
+         IF( KP.NE.K ) THEN
+*
+*           Interchange rows and columns K and KP in the trailing
+*           submatrix A(k-1:n,k-1:n)
+*
+            KPC = NPP - ( N-KP+1 )*( N-KP+2 ) / 2 + 1
+            IF( KP.LT.N )
+     $         CALL ZSWAP( N-KP, AP( KC+KP-K+1 ), 1, AP( KPC+1 ), 1 )
+            KX = KC + KP - K
+            DO 70 J = K + 1, KP - 1
+               KX = KX + N - J + 1
+               TEMP = DCONJG( AP( KC+J-K ) )
+               AP( KC+J-K ) = DCONJG( AP( KX ) )
+               AP( KX ) = TEMP
+   70       CONTINUE
+            AP( KC+KP-K ) = DCONJG( AP( KC+KP-K ) )
+            TEMP = AP( KC )
+            AP( KC ) = AP( KPC )
+            AP( KPC ) = TEMP
+            IF( KSTEP.EQ.2 ) THEN
+               TEMP = AP( KC-N+K-1 )
+               AP( KC-N+K-1 ) = AP( KC-N+KP-1 )
+               AP( KC-N+KP-1 ) = TEMP
+            END IF
+         END IF
+*
+         K = K - KSTEP
+         KC = KCNEXT
+         GO TO 60
+   80    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of ZHPTRI
+*
+      END
diff --git a/costa/native/external/lapack/zhptrs.f b/costa/native/external/lapack/zhptrs.f
new file mode 100644
index 000000000..c2ed8233e
--- /dev/null
+++ b/costa/native/external/lapack/zhptrs.f
@@ -0,0 +1,402 @@
+      SUBROUTINE ZHPTRS( UPLO, N, NRHS, AP, IPIV, B, LDB, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDB, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      COMPLEX*16         AP( * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZHPTRS solves a system of linear equations A*X = B with a complex
+*  Hermitian matrix A stored in packed format using the factorization
+*  A = U*D*U**H or A = L*D*L**H computed by ZHPTRF.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the details of the factorization are stored
+*          as an upper or lower triangular matrix.
+*          = 'U':  Upper triangular, form is A = U*D*U**H;
+*          = 'L':  Lower triangular, form is A = L*D*L**H.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  AP      (input) COMPLEX*16 array, dimension (N*(N+1)/2)
+*          The block diagonal matrix D and the multipliers used to
+*          obtain the factor U or L as computed by ZHPTRF, stored as a
+*          packed triangular matrix.
+*
+*  IPIV    (input) INTEGER array, dimension (N)
+*          Details of the interchanges and the block structure of D
+*          as determined by ZHPTRF.
+*
+*  B       (input/output) COMPLEX*16 array, dimension (LDB,NRHS)
+*          On entry, the right hand side matrix B.
+*          On exit, the solution matrix X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX*16         ONE
+      PARAMETER          ( ONE = ( 1.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            J, K, KC, KP
+      DOUBLE PRECISION   S
+      COMPLEX*16         AK, AKM1, AKM1K, BK, BKM1, DENOM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZDSCAL, ZGEMV, ZGERU, ZLACGV, ZSWAP
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          DBLE, DCONJG, MAX
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZHPTRS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 )
+     $   RETURN
+*
+      IF( UPPER ) THEN
+*
+*        Solve A*X = B, where A = U*D*U'.
+*
+*        First solve U*D*X = B, overwriting B with X.
+*
+*        K is the main loop index, decreasing from N to 1 in steps of
+*        1 or 2, depending on the size of the diagonal blocks.
+*
+         K = N
+         KC = N*( N+1 ) / 2 + 1
+   10    CONTINUE
+*
+*        If K < 1, exit from loop.
+*
+         IF( K.LT.1 )
+     $      GO TO 30
+*
+         KC = KC - K
+         IF( IPIV( K ).GT.0 ) THEN
+*
+*           1 x 1 diagonal block
+*
+*           Interchange rows K and IPIV(K).
+*
+            KP = IPIV( K )
+            IF( KP.NE.K )
+     $         CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB )
+*
+*           Multiply by inv(U(K)), where U(K) is the transformation
+*           stored in column K of A.
+*
+            CALL ZGERU( K-1, NRHS, -ONE, AP( KC ), 1, B( K, 1 ), LDB,
+     $                  B( 1, 1 ), LDB )
+*
+*           Multiply by the inverse of the diagonal block.
+*
+            S = DBLE( ONE ) / DBLE( AP( KC+K-1 ) )
+            CALL ZDSCAL( NRHS, S, B( K, 1 ), LDB )
+            K = K - 1
+         ELSE
+*
+*           2 x 2 diagonal block
+*
+*           Interchange rows K-1 and -IPIV(K).
+*
+            KP = -IPIV( K )
+            IF( KP.NE.K-1 )
+     $         CALL ZSWAP( NRHS, B( K-1, 1 ), LDB, B( KP, 1 ), LDB )
+*
+*           Multiply by inv(U(K)), where U(K) is the transformation
+*           stored in columns K-1 and K of A.
+*
+            CALL ZGERU( K-2, NRHS, -ONE, AP( KC ), 1, B( K, 1 ), LDB,
+     $                  B( 1, 1 ), LDB )
+            CALL ZGERU( K-2, NRHS, -ONE, AP( KC-( K-1 ) ), 1,
+     $                  B( K-1, 1 ), LDB, B( 1, 1 ), LDB )
+*
+*           Multiply by the inverse of the diagonal block.
+*
+            AKM1K = AP( KC+K-2 )
+            AKM1 = AP( KC-1 ) / AKM1K
+            AK = AP( KC+K-1 ) / DCONJG( AKM1K )
+            DENOM = AKM1*AK - ONE
+            DO 20 J = 1, NRHS
+               BKM1 = B( K-1, J ) / AKM1K
+               BK = B( K, J ) / DCONJG( AKM1K )
+               B( K-1, J ) = ( AK*BKM1-BK ) / DENOM
+               B( K, J ) = ( AKM1*BK-BKM1 ) / DENOM
+   20       CONTINUE
+            KC = KC - K + 1
+            K = K - 2
+         END IF
+*
+         GO TO 10
+   30    CONTINUE
+*
+*        Next solve U'*X = B, overwriting B with X.
+*
+*        K is the main loop index, increasing from 1 to N in steps of
+*        1 or 2, depending on the size of the diagonal blocks.
+*
+         K = 1
+         KC = 1
+   40    CONTINUE
+*
+*        If K > N, exit from loop.
+*
+         IF( K.GT.N )
+     $      GO TO 50
+*
+         IF( IPIV( K ).GT.0 ) THEN
+*
+*           1 x 1 diagonal block
+*
+*           Multiply by inv(U'(K)), where U(K) is the transformation
+*           stored in column K of A.
+*
+            IF( K.GT.1 ) THEN
+               CALL ZLACGV( NRHS, B( K, 1 ), LDB )
+               CALL ZGEMV( 'Conjugate transpose', K-1, NRHS, -ONE, B,
+     $                     LDB, AP( KC ), 1, ONE, B( K, 1 ), LDB )
+               CALL ZLACGV( NRHS, B( K, 1 ), LDB )
+            END IF
+*
+*           Interchange rows K and IPIV(K).
+*
+            KP = IPIV( K )
+            IF( KP.NE.K )
+     $         CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB )
+            KC = KC + K
+            K = K + 1
+         ELSE
+*
+*           2 x 2 diagonal block
+*
+*           Multiply by inv(U'(K+1)), where U(K+1) is the transformation
+*           stored in columns K and K+1 of A.
+*
+            IF( K.GT.1 ) THEN
+               CALL ZLACGV( NRHS, B( K, 1 ), LDB )
+               CALL ZGEMV( 'Conjugate transpose', K-1, NRHS, -ONE, B,
+     $                     LDB, AP( KC ), 1, ONE, B( K, 1 ), LDB )
+               CALL ZLACGV( NRHS, B( K, 1 ), LDB )
+*
+               CALL ZLACGV( NRHS, B( K+1, 1 ), LDB )
+               CALL ZGEMV( 'Conjugate transpose', K-1, NRHS, -ONE, B,
+     $                     LDB, AP( KC+K ), 1, ONE, B( K+1, 1 ), LDB )
+               CALL ZLACGV( NRHS, B( K+1, 1 ), LDB )
+            END IF
+*
+*           Interchange rows K and -IPIV(K).
+*
+            KP = -IPIV( K )
+            IF( KP.NE.K )
+     $         CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB )
+            KC = KC + 2*K + 1
+            K = K + 2
+         END IF
+*
+         GO TO 40
+   50    CONTINUE
+*
+      ELSE
+*
+*        Solve A*X = B, where A = L*D*L'.
+*
+*        First solve L*D*X = B, overwriting B with X.
+*
+*        K is the main loop index, increasing from 1 to N in steps of
+*        1 or 2, depending on the size of the diagonal blocks.
+*
+         K = 1
+         KC = 1
+   60    CONTINUE
+*
+*        If K > N, exit from loop.
+*
+         IF( K.GT.N )
+     $      GO TO 80
+*
+         IF( IPIV( K ).GT.0 ) THEN
+*
+*           1 x 1 diagonal block
+*
+*           Interchange rows K and IPIV(K).
+*
+            KP = IPIV( K )
+            IF( KP.NE.K )
+     $         CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB )
+*
+*           Multiply by inv(L(K)), where L(K) is the transformation
+*           stored in column K of A.
+*
+            IF( K.LT.N )
+     $         CALL ZGERU( N-K, NRHS, -ONE, AP( KC+1 ), 1, B( K, 1 ),
+     $                     LDB, B( K+1, 1 ), LDB )
+*
+*           Multiply by the inverse of the diagonal block.
+*
+            S = DBLE( ONE ) / DBLE( AP( KC ) )
+            CALL ZDSCAL( NRHS, S, B( K, 1 ), LDB )
+            KC = KC + N - K + 1
+            K = K + 1
+         ELSE
+*
+*           2 x 2 diagonal block
+*
+*           Interchange rows K+1 and -IPIV(K).
+*
+            KP = -IPIV( K )
+            IF( KP.NE.K+1 )
+     $         CALL ZSWAP( NRHS, B( K+1, 1 ), LDB, B( KP, 1 ), LDB )
+*
+*           Multiply by inv(L(K)), where L(K) is the transformation
+*           stored in columns K and K+1 of A.
+*
+            IF( K.LT.N-1 ) THEN
+               CALL ZGERU( N-K-1, NRHS, -ONE, AP( KC+2 ), 1, B( K, 1 ),
+     $                     LDB, B( K+2, 1 ), LDB )
+               CALL ZGERU( N-K-1, NRHS, -ONE, AP( KC+N-K+2 ), 1,
+     $                     B( K+1, 1 ), LDB, B( K+2, 1 ), LDB )
+            END IF
+*
+*           Multiply by the inverse of the diagonal block.
+*
+            AKM1K = AP( KC+1 )
+            AKM1 = AP( KC ) / DCONJG( AKM1K )
+            AK = AP( KC+N-K+1 ) / AKM1K
+            DENOM = AKM1*AK - ONE
+            DO 70 J = 1, NRHS
+               BKM1 = B( K, J ) / DCONJG( AKM1K )
+               BK = B( K+1, J ) / AKM1K
+               B( K, J ) = ( AK*BKM1-BK ) / DENOM
+               B( K+1, J ) = ( AKM1*BK-BKM1 ) / DENOM
+   70       CONTINUE
+            KC = KC + 2*( N-K ) + 1
+            K = K + 2
+         END IF
+*
+         GO TO 60
+   80    CONTINUE
+*
+*        Next solve L'*X = B, overwriting B with X.
+*
+*        K is the main loop index, decreasing from N to 1 in steps of
+*        1 or 2, depending on the size of the diagonal blocks.
+*
+         K = N
+         KC = N*( N+1 ) / 2 + 1
+   90    CONTINUE
+*
+*        If K < 1, exit from loop.
+*
+         IF( K.LT.1 )
+     $      GO TO 100
+*
+         KC = KC - ( N-K+1 )
+         IF( IPIV( K ).GT.0 ) THEN
+*
+*           1 x 1 diagonal block
+*
+*           Multiply by inv(L'(K)), where L(K) is the transformation
+*           stored in column K of A.
+*
+            IF( K.LT.N ) THEN
+               CALL ZLACGV( NRHS, B( K, 1 ), LDB )
+               CALL ZGEMV( 'Conjugate transpose', N-K, NRHS, -ONE,
+     $                     B( K+1, 1 ), LDB, AP( KC+1 ), 1, ONE,
+     $                     B( K, 1 ), LDB )
+               CALL ZLACGV( NRHS, B( K, 1 ), LDB )
+            END IF
+*
+*           Interchange rows K and IPIV(K).
+*
+            KP = IPIV( K )
+            IF( KP.NE.K )
+     $         CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB )
+            K = K - 1
+         ELSE
+*
+*           2 x 2 diagonal block
+*
+*           Multiply by inv(L'(K-1)), where L(K-1) is the transformation
+*           stored in columns K-1 and K of A.
+*
+            IF( K.LT.N ) THEN
+               CALL ZLACGV( NRHS, B( K, 1 ), LDB )
+               CALL ZGEMV( 'Conjugate transpose', N-K, NRHS, -ONE,
+     $                     B( K+1, 1 ), LDB, AP( KC+1 ), 1, ONE,
+     $                     B( K, 1 ), LDB )
+               CALL ZLACGV( NRHS, B( K, 1 ), LDB )
+*
+               CALL ZLACGV( NRHS, B( K-1, 1 ), LDB )
+               CALL ZGEMV( 'Conjugate transpose', N-K, NRHS, -ONE,
+     $                     B( K+1, 1 ), LDB, AP( KC-( N-K ) ), 1, ONE,
+     $                     B( K-1, 1 ), LDB )
+               CALL ZLACGV( NRHS, B( K-1, 1 ), LDB )
+            END IF
+*
+*           Interchange rows K and -IPIV(K).
+*
+            KP = -IPIV( K )
+            IF( KP.NE.K )
+     $         CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB )
+            KC = KC - ( N-K+2 )
+            K = K - 2
+         END IF
+*
+         GO TO 90
+  100    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of ZHPTRS
+*
+      END
diff --git a/costa/native/external/lapack/zhsein.f b/costa/native/external/lapack/zhsein.f
new file mode 100644
index 000000000..c6f38150c
--- /dev/null
+++ b/costa/native/external/lapack/zhsein.f
@@ -0,0 +1,351 @@
+      SUBROUTINE ZHSEIN( SIDE, EIGSRC, INITV, SELECT, N, H, LDH, W, VL,
+     $                   LDVL, VR, LDVR, MM, M, WORK, RWORK, IFAILL,
+     $                   IFAILR, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          EIGSRC, INITV, SIDE
+      INTEGER            INFO, LDH, LDVL, LDVR, M, MM, N
+*     ..
+*     .. Array Arguments ..
+      LOGICAL            SELECT( * )
+      INTEGER            IFAILL( * ), IFAILR( * )
+      DOUBLE PRECISION   RWORK( * )
+      COMPLEX*16         H( LDH, * ), VL( LDVL, * ), VR( LDVR, * ),
+     $                   W( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZHSEIN uses inverse iteration to find specified right and/or left
+*  eigenvectors of a complex upper Hessenberg matrix H.
+*
+*  The right eigenvector x and the left eigenvector y of the matrix H
+*  corresponding to an eigenvalue w are defined by:
+*
+*               H * x = w * x,     y**h * H = w * y**h
+*
+*  where y**h denotes the conjugate transpose of the vector y.
+*
+*  Arguments
+*  =========
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'R': compute right eigenvectors only;
+*          = 'L': compute left eigenvectors only;
+*          = 'B': compute both right and left eigenvectors.
+*
+*  EIGSRC  (input) CHARACTER*1
+*          Specifies the source of eigenvalues supplied in W:
+*          = 'Q': the eigenvalues were found using ZHSEQR; thus, if
+*                 H has zero subdiagonal elements, and so is
+*                 block-triangular, then the j-th eigenvalue can be
+*                 assumed to be an eigenvalue of the block containing
+*                 the j-th row/column.  This property allows ZHSEIN to
+*                 perform inverse iteration on just one diagonal block.
+*          = 'N': no assumptions are made on the correspondence
+*                 between eigenvalues and diagonal blocks.  In this
+*                 case, ZHSEIN must always perform inverse iteration
+*                 using the whole matrix H.
+*
+*  INITV   (input) CHARACTER*1
+*          = 'N': no initial vectors are supplied;
+*          = 'U': user-supplied initial vectors are stored in the arrays
+*                 VL and/or VR.
+*
+*  SELECT  (input) LOGICAL array, dimension (N)
+*          Specifies the eigenvectors to be computed. To select the
+*          eigenvector corresponding to the eigenvalue W(j),
+*          SELECT(j) must be set to .TRUE..
+*
+*  N       (input) INTEGER
+*          The order of the matrix H.  N >= 0.
+*
+*  H       (input) COMPLEX*16 array, dimension (LDH,N)
+*          The upper Hessenberg matrix H.
+*
+*  LDH     (input) INTEGER
+*          The leading dimension of the array H.  LDH >= max(1,N).
+*
+*  W       (input/output) COMPLEX*16 array, dimension (N)
+*          On entry, the eigenvalues of H.
+*          On exit, the real parts of W may have been altered since
+*          close eigenvalues are perturbed slightly in searching for
+*          independent eigenvectors.
+*
+*  VL      (input/output) COMPLEX*16 array, dimension (LDVL,MM)
+*          On entry, if INITV = 'U' and SIDE = 'L' or 'B', VL must
+*          contain starting vectors for the inverse iteration for the
+*          left eigenvectors; the starting vector for each eigenvector
+*          must be in the same column in which the eigenvector will be
+*          stored.
+*          On exit, if SIDE = 'L' or 'B', the left eigenvectors
+*          specified by SELECT will be stored consecutively in the
+*          columns of VL, in the same order as their eigenvalues.
+*          If SIDE = 'R', VL is not referenced.
+*
+*  LDVL    (input) INTEGER
+*          The leading dimension of the array VL.
+*          LDVL >= max(1,N) if SIDE = 'L' or 'B'; LDVL >= 1 otherwise.
+*
+*  VR      (input/output) COMPLEX*16 array, dimension (LDVR,MM)
+*          On entry, if INITV = 'U' and SIDE = 'R' or 'B', VR must
+*          contain starting vectors for the inverse iteration for the
+*          right eigenvectors; the starting vector for each eigenvector
+*          must be in the same column in which the eigenvector will be
+*          stored.
+*          On exit, if SIDE = 'R' or 'B', the right eigenvectors
+*          specified by SELECT will be stored consecutively in the
+*          columns of VR, in the same order as their eigenvalues.
+*          If SIDE = 'L', VR is not referenced.
+*
+*  LDVR    (input) INTEGER
+*          The leading dimension of the array VR.
+*          LDVR >= max(1,N) if SIDE = 'R' or 'B'; LDVR >= 1 otherwise.
+*
+*  MM      (input) INTEGER
+*          The number of columns in the arrays VL and/or VR. MM >= M.
+*
+*  M       (output) INTEGER
+*          The number of columns in the arrays VL and/or VR required to
+*          store the eigenvectors (= the number of .TRUE. elements in
+*          SELECT).
+*
+*  WORK    (workspace) COMPLEX*16 array, dimension (N*N)
+*
+*  RWORK   (workspace) DOUBLE PRECISION array, dimension (N)
+*
+*  IFAILL  (output) INTEGER array, dimension (MM)
+*          If SIDE = 'L' or 'B', IFAILL(i) = j > 0 if the left
+*          eigenvector in the i-th column of VL (corresponding to the
+*          eigenvalue w(j)) failed to converge; IFAILL(i) = 0 if the
+*          eigenvector converged satisfactorily.
+*          If SIDE = 'R', IFAILL is not referenced.
+*
+*  IFAILR  (output) INTEGER array, dimension (MM)
+*          If SIDE = 'R' or 'B', IFAILR(i) = j > 0 if the right
+*          eigenvector in the i-th column of VR (corresponding to the
+*          eigenvalue w(j)) failed to converge; IFAILR(i) = 0 if the
+*          eigenvector converged satisfactorily.
+*          If SIDE = 'L', IFAILR is not referenced.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, i is the number of eigenvectors which
+*                failed to converge; see IFAILL and IFAILR for further
+*                details.
+*
+*  Further Details
+*  ===============
+*
+*  Each eigenvector is normalized so that the element of largest
+*  magnitude has magnitude 1; here the magnitude of a complex number
+*  (x,y) is taken to be |x|+|y|.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX*16         ZERO
+      PARAMETER          ( ZERO = ( 0.0D+0, 0.0D+0 ) )
+      DOUBLE PRECISION   RZERO
+      PARAMETER          ( RZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            BOTHV, FROMQR, LEFTV, NOINIT, RIGHTV
+      INTEGER            I, IINFO, K, KL, KLN, KR, KS, LDWORK
+      DOUBLE PRECISION   EPS3, HNORM, SMLNUM, ULP, UNFL
+      COMPLEX*16         CDUM, WK
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DLAMCH, ZLANHS
+      EXTERNAL           LSAME, DLAMCH, ZLANHS
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZLAEIN
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, DIMAG, MAX
+*     ..
+*     .. Statement Functions ..
+      DOUBLE PRECISION   CABS1
+*     ..
+*     .. Statement Function definitions ..
+      CABS1( CDUM ) = ABS( DBLE( CDUM ) ) + ABS( DIMAG( CDUM ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode and test the input parameters.
+*
+      BOTHV = LSAME( SIDE, 'B' )
+      RIGHTV = LSAME( SIDE, 'R' ) .OR. BOTHV
+      LEFTV = LSAME( SIDE, 'L' ) .OR. BOTHV
+*
+      FROMQR = LSAME( EIGSRC, 'Q' )
+*
+      NOINIT = LSAME( INITV, 'N' )
+*
+*     Set M to the number of columns required to store the selected
+*     eigenvectors.
+*
+      M = 0
+      DO 10 K = 1, N
+         IF( SELECT( K ) )
+     $      M = M + 1
+   10 CONTINUE
+*
+      INFO = 0
+      IF( .NOT.RIGHTV .AND. .NOT.LEFTV ) THEN
+         INFO = -1
+      ELSE IF( .NOT.FROMQR .AND. .NOT.LSAME( EIGSRC, 'N' ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.NOINIT .AND. .NOT.LSAME( INITV, 'U' ) ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( LDH.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      ELSE IF( LDVL.LT.1 .OR. ( LEFTV .AND. LDVL.LT.N ) ) THEN
+         INFO = -10
+      ELSE IF( LDVR.LT.1 .OR. ( RIGHTV .AND. LDVR.LT.N ) ) THEN
+         INFO = -12
+      ELSE IF( MM.LT.M ) THEN
+         INFO = -13
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZHSEIN', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Set machine-dependent constants.
+*
+      UNFL = DLAMCH( 'Safe minimum' )
+      ULP = DLAMCH( 'Precision' )
+      SMLNUM = UNFL*( N / ULP )
+*
+      LDWORK = N
+*
+      KL = 1
+      KLN = 0
+      IF( FROMQR ) THEN
+         KR = 0
+      ELSE
+         KR = N
+      END IF
+      KS = 1
+*
+      DO 100 K = 1, N
+         IF( SELECT( K ) ) THEN
+*
+*           Compute eigenvector(s) corresponding to W(K).
+*
+            IF( FROMQR ) THEN
+*
+*              If affiliation of eigenvalues is known, check whether
+*              the matrix splits.
+*
+*              Determine KL and KR such that 1 <= KL <= K <= KR <= N
+*              and H(KL,KL-1) and H(KR+1,KR) are zero (or KL = 1 or
+*              KR = N).
+*
+*              Then inverse iteration can be performed with the
+*              submatrix H(KL:N,KL:N) for a left eigenvector, and with
+*              the submatrix H(1:KR,1:KR) for a right eigenvector.
+*
+               DO 20 I = K, KL + 1, -1
+                  IF( H( I, I-1 ).EQ.ZERO )
+     $               GO TO 30
+   20          CONTINUE
+   30          CONTINUE
+               KL = I
+               IF( K.GT.KR ) THEN
+                  DO 40 I = K, N - 1
+                     IF( H( I+1, I ).EQ.ZERO )
+     $                  GO TO 50
+   40             CONTINUE
+   50             CONTINUE
+                  KR = I
+               END IF
+            END IF
+*
+            IF( KL.NE.KLN ) THEN
+               KLN = KL
+*
+*              Compute infinity-norm of submatrix H(KL:KR,KL:KR) if it
+*              has not ben computed before.
+*
+               HNORM = ZLANHS( 'I', KR-KL+1, H( KL, KL ), LDH, RWORK )
+               IF( HNORM.GT.RZERO ) THEN
+                  EPS3 = HNORM*ULP
+               ELSE
+                  EPS3 = SMLNUM
+               END IF
+            END IF
+*
+*           Perturb eigenvalue if it is close to any previous
+*           selected eigenvalues affiliated to the submatrix
+*           H(KL:KR,KL:KR). Close roots are modified by EPS3.
+*
+            WK = W( K )
+   60       CONTINUE
+            DO 70 I = K - 1, KL, -1
+               IF( SELECT( I ) .AND. CABS1( W( I )-WK ).LT.EPS3 ) THEN
+                  WK = WK + EPS3
+                  GO TO 60
+               END IF
+   70       CONTINUE
+            W( K ) = WK
+*
+            IF( LEFTV ) THEN
+*
+*              Compute left eigenvector.
+*
+               CALL ZLAEIN( .FALSE., NOINIT, N-KL+1, H( KL, KL ), LDH,
+     $                      WK, VL( KL, KS ), WORK, LDWORK, RWORK, EPS3,
+     $                      SMLNUM, IINFO )
+               IF( IINFO.GT.0 ) THEN
+                  INFO = INFO + 1
+                  IFAILL( KS ) = K
+               ELSE
+                  IFAILL( KS ) = 0
+               END IF
+               DO 80 I = 1, KL - 1
+                  VL( I, KS ) = ZERO
+   80          CONTINUE
+            END IF
+            IF( RIGHTV ) THEN
+*
+*              Compute right eigenvector.
+*
+               CALL ZLAEIN( .TRUE., NOINIT, KR, H, LDH, WK, VR( 1, KS ),
+     $                      WORK, LDWORK, RWORK, EPS3, SMLNUM, IINFO )
+               IF( IINFO.GT.0 ) THEN
+                  INFO = INFO + 1
+                  IFAILR( KS ) = K
+               ELSE
+                  IFAILR( KS ) = 0
+               END IF
+               DO 90 I = KR + 1, N
+                  VR( I, KS ) = ZERO
+   90          CONTINUE
+            END IF
+            KS = KS + 1
+         END IF
+  100 CONTINUE
+*
+      RETURN
+*
+*     End of ZHSEIN
+*
+      END
diff --git a/costa/native/external/lapack/zhseqr.f b/costa/native/external/lapack/zhseqr.f
new file mode 100644
index 000000000..17274a8f6
--- /dev/null
+++ b/costa/native/external/lapack/zhseqr.f
@@ -0,0 +1,474 @@
+      SUBROUTINE ZHSEQR( JOB, COMPZ, N, ILO, IHI, H, LDH, W, Z, LDZ,
+     $                   WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          COMPZ, JOB
+      INTEGER            IHI, ILO, INFO, LDH, LDZ, LWORK, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         H( LDH, * ), W( * ), WORK( * ), Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZHSEQR computes the eigenvalues of a complex upper Hessenberg
+*  matrix H, and, optionally, the matrices T and Z from the Schur
+*  decomposition H = Z T Z**H, where T is an upper triangular matrix
+*  (the Schur form), and Z is the unitary matrix of Schur vectors.
+*
+*  Optionally Z may be postmultiplied into an input unitary matrix Q,
+*  so that this routine can give the Schur factorization of a matrix A
+*  which has been reduced to the Hessenberg form H by the unitary
+*  matrix Q:  A = Q*H*Q**H = (QZ)*T*(QZ)**H.
+*
+*  Arguments
+*  =========
+*
+*  JOB     (input) CHARACTER*1
+*          = 'E': compute eigenvalues only;
+*          = 'S': compute eigenvalues and the Schur form T.
+*
+*  COMPZ   (input) CHARACTER*1
+*          = 'N': no Schur vectors are computed;
+*          = 'I': Z is initialized to the unit matrix and the matrix Z
+*                 of Schur vectors of H is returned;
+*          = 'V': Z must contain an unitary matrix Q on entry, and
+*                 the product Q*Z is returned.
+*
+*  N       (input) INTEGER
+*          The order of the matrix H.  N >= 0.
+*
+*  ILO     (input) INTEGER
+*  IHI     (input) INTEGER
+*          It is assumed that H is already upper triangular in rows
+*          and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally
+*          set by a previous call to ZGEBAL, and then passed to CGEHRD
+*          when the matrix output by ZGEBAL is reduced to Hessenberg
+*          form. Otherwise ILO and IHI should be set to 1 and N
+*          respectively.
+*          1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
+*
+*  H       (input/output) COMPLEX*16 array, dimension (LDH,N)
+*          On entry, the upper Hessenberg matrix H.
+*          On exit, if JOB = 'S', H contains the upper triangular matrix
+*          T from the Schur decomposition (the Schur form). If
+*          JOB = 'E', the contents of H are unspecified on exit.
+*
+*  LDH     (input) INTEGER
+*          The leading dimension of the array H. LDH >= max(1,N).
+*
+*  W       (output) COMPLEX*16 array, dimension (N)
+*          The computed eigenvalues. If JOB = 'S', the eigenvalues are
+*          stored in the same order as on the diagonal of the Schur form
+*          returned in H, with W(i) = H(i,i).
+*
+*  Z       (input/output) COMPLEX*16 array, dimension (LDZ,N)
+*          If COMPZ = 'N': Z is not referenced.
+*          If COMPZ = 'I': on entry, Z need not be set, and on exit, Z
+*          contains the unitary matrix Z of the Schur vectors of H.
+*          If COMPZ = 'V': on entry Z must contain an N-by-N matrix Q,
+*          which is assumed to be equal to the unit matrix except for
+*          the submatrix Z(ILO:IHI,ILO:IHI); on exit Z contains Q*Z.
+*          Normally Q is the unitary matrix generated by ZUNGHR after
+*          the call to ZGEHRD which formed the Hessenberg matrix H.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.
+*          LDZ >= max(1,N) if COMPZ = 'I' or 'V'; LDZ >= 1 otherwise.
+*
+*  WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.  LWORK >= max(1,N).
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, ZHSEQR failed to compute all the
+*                eigenvalues in a total of 30*(IHI-ILO+1) iterations;
+*                elements 1:ilo-1 and i+1:n of W contain those
+*                eigenvalues which have been successfully computed.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX*16         ZERO, ONE
+      PARAMETER          ( ZERO = ( 0.0D+0, 0.0D+0 ),
+     $                   ONE = ( 1.0D+0, 0.0D+0 ) )
+      DOUBLE PRECISION   RZERO, RONE, CONST
+      PARAMETER          ( RZERO = 0.0D+0, RONE = 1.0D+0,
+     $                   CONST = 1.5D+0 )
+      INTEGER            NSMAX, LDS
+      PARAMETER          ( NSMAX = 15, LDS = NSMAX )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            INITZ, LQUERY, WANTT, WANTZ
+      INTEGER            I, I1, I2, IERR, II, ITEMP, ITN, ITS, J, K, L,
+     $                   MAXB, NH, NR, NS, NV
+      DOUBLE PRECISION   OVFL, RTEMP, SMLNUM, TST1, ULP, UNFL
+      COMPLEX*16         CDUM, TAU, TEMP
+*     ..
+*     .. Local Arrays ..
+      DOUBLE PRECISION   RWORK( 1 )
+      COMPLEX*16         S( LDS, NSMAX ), V( NSMAX+1 ), VV( NSMAX+1 )
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV, IZAMAX
+      DOUBLE PRECISION   DLAMCH, DLAPY2, ZLANHS
+      EXTERNAL           LSAME, ILAENV, IZAMAX, DLAMCH, DLAPY2, ZLANHS
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZCOPY, ZDSCAL, ZGEMV, ZLACPY, ZLAHQR,
+     $                   ZLARFG, ZLARFX, ZLASET, ZSCAL
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, DCONJG, DIMAG, MAX, MIN
+*     ..
+*     .. Statement Functions ..
+      DOUBLE PRECISION   CABS1
+*     ..
+*     .. Statement Function definitions ..
+      CABS1( CDUM ) = ABS( DBLE( CDUM ) ) + ABS( DIMAG( CDUM ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode and test the input parameters
+*
+      WANTT = LSAME( JOB, 'S' )
+      INITZ = LSAME( COMPZ, 'I' )
+      WANTZ = INITZ .OR. LSAME( COMPZ, 'V' )
+*
+      INFO = 0
+      WORK( 1 ) = MAX( 1, N )
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( .NOT.LSAME( JOB, 'E' ) .AND. .NOT.WANTT ) THEN
+         INFO = -1
+      ELSE IF( .NOT.LSAME( COMPZ, 'N' ) .AND. .NOT.WANTZ ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, N ) ) THEN
+         INFO = -4
+      ELSE IF( IHI.LT.MIN( ILO, N ) .OR. IHI.GT.N ) THEN
+         INFO = -5
+      ELSE IF( LDH.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      ELSE IF( LDZ.LT.1 .OR. WANTZ .AND. LDZ.LT.MAX( 1, N ) ) THEN
+         INFO = -10
+      ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN
+         INFO = -12
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZHSEQR', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Initialize Z, if necessary
+*
+      IF( INITZ )
+     $   CALL ZLASET( 'Full', N, N, ZERO, ONE, Z, LDZ )
+*
+*     Store the eigenvalues isolated by ZGEBAL.
+*
+      DO 10 I = 1, ILO - 1
+         W( I ) = H( I, I )
+   10 CONTINUE
+      DO 20 I = IHI + 1, N
+         W( I ) = H( I, I )
+   20 CONTINUE
+*
+*     Quick return if possible.
+*
+      IF( N.EQ.0 )
+     $   RETURN
+      IF( ILO.EQ.IHI ) THEN
+         W( ILO ) = H( ILO, ILO )
+         RETURN
+      END IF
+*
+*     Set rows and columns ILO to IHI to zero below the first
+*     subdiagonal.
+*
+      DO 40 J = ILO, IHI - 2
+         DO 30 I = J + 2, N
+            H( I, J ) = ZERO
+   30    CONTINUE
+   40 CONTINUE
+      NH = IHI - ILO + 1
+*
+*     I1 and I2 are the indices of the first row and last column of H
+*     to which transformations must be applied. If eigenvalues only are
+*     being computed, I1 and I2 are re-set inside the main loop.
+*
+      IF( WANTT ) THEN
+         I1 = 1
+         I2 = N
+      ELSE
+         I1 = ILO
+         I2 = IHI
+      END IF
+*
+*     Ensure that the subdiagonal elements are real.
+*
+      DO 50 I = ILO + 1, IHI
+         TEMP = H( I, I-1 )
+         IF( DIMAG( TEMP ).NE.RZERO ) THEN
+            RTEMP = DLAPY2( DBLE( TEMP ), DIMAG( TEMP ) )
+            H( I, I-1 ) = RTEMP
+            TEMP = TEMP / RTEMP
+            IF( I2.GT.I )
+     $         CALL ZSCAL( I2-I, DCONJG( TEMP ), H( I, I+1 ), LDH )
+            CALL ZSCAL( I-I1, TEMP, H( I1, I ), 1 )
+            IF( I.LT.IHI )
+     $         H( I+1, I ) = TEMP*H( I+1, I )
+            IF( WANTZ )
+     $         CALL ZSCAL( NH, TEMP, Z( ILO, I ), 1 )
+         END IF
+   50 CONTINUE
+*
+*     Determine the order of the multi-shift QR algorithm to be used.
+*
+      NS = ILAENV( 4, 'ZHSEQR', JOB // COMPZ, N, ILO, IHI, -1 )
+      MAXB = ILAENV( 8, 'ZHSEQR', JOB // COMPZ, N, ILO, IHI, -1 )
+      IF( NS.LE.1 .OR. NS.GT.NH .OR. MAXB.GE.NH ) THEN
+*
+*        Use the standard double-shift algorithm
+*
+         CALL ZLAHQR( WANTT, WANTZ, N, ILO, IHI, H, LDH, W, ILO, IHI, Z,
+     $                LDZ, INFO )
+         RETURN
+      END IF
+      MAXB = MAX( 2, MAXB )
+      NS = MIN( NS, MAXB, NSMAX )
+*
+*     Now 1 < NS <= MAXB < NH.
+*
+*     Set machine-dependent constants for the stopping criterion.
+*     If norm(H) <= sqrt(OVFL), overflow should not occur.
+*
+      UNFL = DLAMCH( 'Safe minimum' )
+      OVFL = RONE / UNFL
+      CALL DLABAD( UNFL, OVFL )
+      ULP = DLAMCH( 'Precision' )
+      SMLNUM = UNFL*( NH / ULP )
+*
+*     ITN is the total number of multiple-shift QR iterations allowed.
+*
+      ITN = 30*NH
+*
+*     The main loop begins here. I is the loop index and decreases from
+*     IHI to ILO in steps of at most MAXB. Each iteration of the loop
+*     works with the active submatrix in rows and columns L to I.
+*     Eigenvalues I+1 to IHI have already converged. Either L = ILO, or
+*     H(L,L-1) is negligible so that the matrix splits.
+*
+      I = IHI
+   60 CONTINUE
+      IF( I.LT.ILO )
+     $   GO TO 180
+*
+*     Perform multiple-shift QR iterations on rows and columns ILO to I
+*     until a submatrix of order at most MAXB splits off at the bottom
+*     because a subdiagonal element has become negligible.
+*
+      L = ILO
+      DO 160 ITS = 0, ITN
+*
+*        Look for a single small subdiagonal element.
+*
+         DO 70 K = I, L + 1, -1
+            TST1 = CABS1( H( K-1, K-1 ) ) + CABS1( H( K, K ) )
+            IF( TST1.EQ.RZERO )
+     $         TST1 = ZLANHS( '1', I-L+1, H( L, L ), LDH, RWORK )
+            IF( ABS( DBLE( H( K, K-1 ) ) ).LE.MAX( ULP*TST1, SMLNUM ) )
+     $         GO TO 80
+   70    CONTINUE
+   80    CONTINUE
+         L = K
+         IF( L.GT.ILO ) THEN
+*
+*           H(L,L-1) is negligible.
+*
+            H( L, L-1 ) = ZERO
+         END IF
+*
+*        Exit from loop if a submatrix of order <= MAXB has split off.
+*
+         IF( L.GE.I-MAXB+1 )
+     $      GO TO 170
+*
+*        Now the active submatrix is in rows and columns L to I. If
+*        eigenvalues only are being computed, only the active submatrix
+*        need be transformed.
+*
+         IF( .NOT.WANTT ) THEN
+            I1 = L
+            I2 = I
+         END IF
+*
+         IF( ITS.EQ.20 .OR. ITS.EQ.30 ) THEN
+*
+*           Exceptional shifts.
+*
+            DO 90 II = I - NS + 1, I
+               W( II ) = CONST*( ABS( DBLE( H( II, II-1 ) ) )+
+     $                   ABS( DBLE( H( II, II ) ) ) )
+   90       CONTINUE
+         ELSE
+*
+*           Use eigenvalues of trailing submatrix of order NS as shifts.
+*
+            CALL ZLACPY( 'Full', NS, NS, H( I-NS+1, I-NS+1 ), LDH, S,
+     $                   LDS )
+            CALL ZLAHQR( .FALSE., .FALSE., NS, 1, NS, S, LDS,
+     $                   W( I-NS+1 ), 1, NS, Z, LDZ, IERR )
+            IF( IERR.GT.0 ) THEN
+*
+*              If ZLAHQR failed to compute all NS eigenvalues, use the
+*              unconverged diagonal elements as the remaining shifts.
+*
+               DO 100 II = 1, IERR
+                  W( I-NS+II ) = S( II, II )
+  100          CONTINUE
+            END IF
+         END IF
+*
+*        Form the first column of (G-w(1)) (G-w(2)) . . . (G-w(ns))
+*        where G is the Hessenberg submatrix H(L:I,L:I) and w is
+*        the vector of shifts (stored in W). The result is
+*        stored in the local array V.
+*
+         V( 1 ) = ONE
+         DO 110 II = 2, NS + 1
+            V( II ) = ZERO
+  110    CONTINUE
+         NV = 1
+         DO 130 J = I - NS + 1, I
+            CALL ZCOPY( NV+1, V, 1, VV, 1 )
+            CALL ZGEMV( 'No transpose', NV+1, NV, ONE, H( L, L ), LDH,
+     $                  VV, 1, -W( J ), V, 1 )
+            NV = NV + 1
+*
+*           Scale V(1:NV) so that max(abs(V(i))) = 1. If V is zero,
+*           reset it to the unit vector.
+*
+            ITEMP = IZAMAX( NV, V, 1 )
+            RTEMP = CABS1( V( ITEMP ) )
+            IF( RTEMP.EQ.RZERO ) THEN
+               V( 1 ) = ONE
+               DO 120 II = 2, NV
+                  V( II ) = ZERO
+  120          CONTINUE
+            ELSE
+               RTEMP = MAX( RTEMP, SMLNUM )
+               CALL ZDSCAL( NV, RONE / RTEMP, V, 1 )
+            END IF
+  130    CONTINUE
+*
+*        Multiple-shift QR step
+*
+         DO 150 K = L, I - 1
+*
+*           The first iteration of this loop determines a reflection G
+*           from the vector V and applies it from left and right to H,
+*           thus creating a nonzero bulge below the subdiagonal.
+*
+*           Each subsequent iteration determines a reflection G to
+*           restore the Hessenberg form in the (K-1)th column, and thus
+*           chases the bulge one step toward the bottom of the active
+*           submatrix. NR is the order of G.
+*
+            NR = MIN( NS+1, I-K+1 )
+            IF( K.GT.L )
+     $         CALL ZCOPY( NR, H( K, K-1 ), 1, V, 1 )
+            CALL ZLARFG( NR, V( 1 ), V( 2 ), 1, TAU )
+            IF( K.GT.L ) THEN
+               H( K, K-1 ) = V( 1 )
+               DO 140 II = K + 1, I
+                  H( II, K-1 ) = ZERO
+  140          CONTINUE
+            END IF
+            V( 1 ) = ONE
+*
+*           Apply G' from the left to transform the rows of the matrix
+*           in columns K to I2.
+*
+            CALL ZLARFX( 'Left', NR, I2-K+1, V, DCONJG( TAU ),
+     $                   H( K, K ), LDH, WORK )
+*
+*           Apply G from the right to transform the columns of the
+*           matrix in rows I1 to min(K+NR,I).
+*
+            CALL ZLARFX( 'Right', MIN( K+NR, I )-I1+1, NR, V, TAU,
+     $                   H( I1, K ), LDH, WORK )
+*
+            IF( WANTZ ) THEN
+*
+*              Accumulate transformations in the matrix Z
+*
+               CALL ZLARFX( 'Right', NH, NR, V, TAU, Z( ILO, K ), LDZ,
+     $                      WORK )
+            END IF
+  150    CONTINUE
+*
+*        Ensure that H(I,I-1) is real.
+*
+         TEMP = H( I, I-1 )
+         IF( DIMAG( TEMP ).NE.RZERO ) THEN
+            RTEMP = DLAPY2( DBLE( TEMP ), DIMAG( TEMP ) )
+            H( I, I-1 ) = RTEMP
+            TEMP = TEMP / RTEMP
+            IF( I2.GT.I )
+     $         CALL ZSCAL( I2-I, DCONJG( TEMP ), H( I, I+1 ), LDH )
+            CALL ZSCAL( I-I1, TEMP, H( I1, I ), 1 )
+            IF( WANTZ ) THEN
+               CALL ZSCAL( NH, TEMP, Z( ILO, I ), 1 )
+            END IF
+         END IF
+*
+  160 CONTINUE
+*
+*     Failure to converge in remaining number of iterations
+*
+      INFO = I
+      RETURN
+*
+  170 CONTINUE
+*
+*     A submatrix of order <= MAXB in rows and columns L to I has split
+*     off. Use the double-shift QR algorithm to handle it.
+*
+      CALL ZLAHQR( WANTT, WANTZ, N, L, I, H, LDH, W, ILO, IHI, Z, LDZ,
+     $             INFO )
+      IF( INFO.GT.0 )
+     $   RETURN
+*
+*     Decrement number of remaining iterations, and return to start of
+*     the main loop with a new value of I.
+*
+      ITN = ITN - ITS
+      I = L - 1
+      GO TO 60
+*
+  180 CONTINUE
+      WORK( 1 ) = MAX( 1, N )
+      RETURN
+*
+*     End of ZHSEQR
+*
+      END
diff --git a/costa/native/external/lapack/zlabrd.f b/costa/native/external/lapack/zlabrd.f
new file mode 100644
index 000000000..45ee265ec
--- /dev/null
+++ b/costa/native/external/lapack/zlabrd.f
@@ -0,0 +1,329 @@
+      SUBROUTINE ZLABRD( M, N, NB, A, LDA, D, E, TAUQ, TAUP, X, LDX, Y,
+     $                   LDY )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      INTEGER            LDA, LDX, LDY, M, N, NB
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   D( * ), E( * )
+      COMPLEX*16         A( LDA, * ), TAUP( * ), TAUQ( * ), X( LDX, * ),
+     $                   Y( LDY, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZLABRD reduces the first NB rows and columns of a complex general
+*  m by n matrix A to upper or lower real bidiagonal form by a unitary
+*  transformation Q' * A * P, and returns the matrices X and Y which
+*  are needed to apply the transformation to the unreduced part of A.
+*
+*  If m >= n, A is reduced to upper bidiagonal form; if m < n, to lower
+*  bidiagonal form.
+*
+*  This is an auxiliary routine called by ZGEBRD
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows in the matrix A.
+*
+*  N       (input) INTEGER
+*          The number of columns in the matrix A.
+*
+*  NB      (input) INTEGER
+*          The number of leading rows and columns of A to be reduced.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+*          On entry, the m by n general matrix to be reduced.
+*          On exit, the first NB rows and columns of the matrix are
+*          overwritten; the rest of the array is unchanged.
+*          If m >= n, elements on and below the diagonal in the first NB
+*            columns, with the array TAUQ, represent the unitary
+*            matrix Q as a product of elementary reflectors; and
+*            elements above the diagonal in the first NB rows, with the
+*            array TAUP, represent the unitary matrix P as a product
+*            of elementary reflectors.
+*          If m < n, elements below the diagonal in the first NB
+*            columns, with the array TAUQ, represent the unitary
+*            matrix Q as a product of elementary reflectors, and
+*            elements on and above the diagonal in the first NB rows,
+*            with the array TAUP, represent the unitary matrix P as
+*            a product of elementary reflectors.
+*          See Further Details.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  D       (output) DOUBLE PRECISION array, dimension (NB)
+*          The diagonal elements of the first NB rows and columns of
+*          the reduced matrix.  D(i) = A(i,i).
+*
+*  E       (output) DOUBLE PRECISION array, dimension (NB)
+*          The off-diagonal elements of the first NB rows and columns of
+*          the reduced matrix.
+*
+*  TAUQ    (output) COMPLEX*16 array dimension (NB)
+*          The scalar factors of the elementary reflectors which
+*          represent the unitary matrix Q. See Further Details.
+*
+*  TAUP    (output) COMPLEX*16 array, dimension (NB)
+*          The scalar factors of the elementary reflectors which
+*          represent the unitary matrix P. See Further Details.
+*
+*  X       (output) COMPLEX*16 array, dimension (LDX,NB)
+*          The m-by-nb matrix X required to update the unreduced part
+*          of A.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X. LDX >= max(1,M).
+*
+*  Y       (output) COMPLEX*16 array, dimension (LDY,NB)
+*          The n-by-nb matrix Y required to update the unreduced part
+*          of A.
+*
+*  LDY     (output) INTEGER
+*          The leading dimension of the array Y. LDY >= max(1,N).
+*
+*  Further Details
+*  ===============
+*
+*  The matrices Q and P are represented as products of elementary
+*  reflectors:
+*
+*     Q = H(1) H(2) . . . H(nb)  and  P = G(1) G(2) . . . G(nb)
+*
+*  Each H(i) and G(i) has the form:
+*
+*     H(i) = I - tauq * v * v'  and G(i) = I - taup * u * u'
+*
+*  where tauq and taup are complex scalars, and v and u are complex
+*  vectors.
+*
+*  If m >= n, v(1:i-1) = 0, v(i) = 1, and v(i:m) is stored on exit in
+*  A(i:m,i); u(1:i) = 0, u(i+1) = 1, and u(i+1:n) is stored on exit in
+*  A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i).
+*
+*  If m < n, v(1:i) = 0, v(i+1) = 1, and v(i+1:m) is stored on exit in
+*  A(i+2:m,i); u(1:i-1) = 0, u(i) = 1, and u(i:n) is stored on exit in
+*  A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i).
+*
+*  The elements of the vectors v and u together form the m-by-nb matrix
+*  V and the nb-by-n matrix U' which are needed, with X and Y, to apply
+*  the transformation to the unreduced part of the matrix, using a block
+*  update of the form:  A := A - V*Y' - X*U'.
+*
+*  The contents of A on exit are illustrated by the following examples
+*  with nb = 2:
+*
+*  m = 6 and n = 5 (m > n):          m = 5 and n = 6 (m < n):
+*
+*    (  1   1   u1  u1  u1 )           (  1   u1  u1  u1  u1  u1 )
+*    (  v1  1   1   u2  u2 )           (  1   1   u2  u2  u2  u2 )
+*    (  v1  v2  a   a   a  )           (  v1  1   a   a   a   a  )
+*    (  v1  v2  a   a   a  )           (  v1  v2  a   a   a   a  )
+*    (  v1  v2  a   a   a  )           (  v1  v2  a   a   a   a  )
+*    (  v1  v2  a   a   a  )
+*
+*  where a denotes an element of the original matrix which is unchanged,
+*  vi denotes an element of the vector defining H(i), and ui an element
+*  of the vector defining G(i).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX*16         ZERO, ONE
+      PARAMETER          ( ZERO = ( 0.0D+0, 0.0D+0 ),
+     $                   ONE = ( 1.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I
+      COMPLEX*16         ALPHA
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           ZGEMV, ZLACGV, ZLARFG, ZSCAL
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Quick return if possible
+*
+      IF( M.LE.0 .OR. N.LE.0 )
+     $   RETURN
+*
+      IF( M.GE.N ) THEN
+*
+*        Reduce to upper bidiagonal form
+*
+         DO 10 I = 1, NB
+*
+*           Update A(i:m,i)
+*
+            CALL ZLACGV( I-1, Y( I, 1 ), LDY )
+            CALL ZGEMV( 'No transpose', M-I+1, I-1, -ONE, A( I, 1 ),
+     $                  LDA, Y( I, 1 ), LDY, ONE, A( I, I ), 1 )
+            CALL ZLACGV( I-1, Y( I, 1 ), LDY )
+            CALL ZGEMV( 'No transpose', M-I+1, I-1, -ONE, X( I, 1 ),
+     $                  LDX, A( 1, I ), 1, ONE, A( I, I ), 1 )
+*
+*           Generate reflection Q(i) to annihilate A(i+1:m,i)
+*
+            ALPHA = A( I, I )
+            CALL ZLARFG( M-I+1, ALPHA, A( MIN( I+1, M ), I ), 1,
+     $                   TAUQ( I ) )
+            D( I ) = ALPHA
+            IF( I.LT.N ) THEN
+               A( I, I ) = ONE
+*
+*              Compute Y(i+1:n,i)
+*
+               CALL ZGEMV( 'Conjugate transpose', M-I+1, N-I, ONE,
+     $                     A( I, I+1 ), LDA, A( I, I ), 1, ZERO,
+     $                     Y( I+1, I ), 1 )
+               CALL ZGEMV( 'Conjugate transpose', M-I+1, I-1, ONE,
+     $                     A( I, 1 ), LDA, A( I, I ), 1, ZERO,
+     $                     Y( 1, I ), 1 )
+               CALL ZGEMV( 'No transpose', N-I, I-1, -ONE, Y( I+1, 1 ),
+     $                     LDY, Y( 1, I ), 1, ONE, Y( I+1, I ), 1 )
+               CALL ZGEMV( 'Conjugate transpose', M-I+1, I-1, ONE,
+     $                     X( I, 1 ), LDX, A( I, I ), 1, ZERO,
+     $                     Y( 1, I ), 1 )
+               CALL ZGEMV( 'Conjugate transpose', I-1, N-I, -ONE,
+     $                     A( 1, I+1 ), LDA, Y( 1, I ), 1, ONE,
+     $                     Y( I+1, I ), 1 )
+               CALL ZSCAL( N-I, TAUQ( I ), Y( I+1, I ), 1 )
+*
+*              Update A(i,i+1:n)
+*
+               CALL ZLACGV( N-I, A( I, I+1 ), LDA )
+               CALL ZLACGV( I, A( I, 1 ), LDA )
+               CALL ZGEMV( 'No transpose', N-I, I, -ONE, Y( I+1, 1 ),
+     $                     LDY, A( I, 1 ), LDA, ONE, A( I, I+1 ), LDA )
+               CALL ZLACGV( I, A( I, 1 ), LDA )
+               CALL ZLACGV( I-1, X( I, 1 ), LDX )
+               CALL ZGEMV( 'Conjugate transpose', I-1, N-I, -ONE,
+     $                     A( 1, I+1 ), LDA, X( I, 1 ), LDX, ONE,
+     $                     A( I, I+1 ), LDA )
+               CALL ZLACGV( I-1, X( I, 1 ), LDX )
+*
+*              Generate reflection P(i) to annihilate A(i,i+2:n)
+*
+               ALPHA = A( I, I+1 )
+               CALL ZLARFG( N-I, ALPHA, A( I, MIN( I+2, N ) ), LDA,
+     $                      TAUP( I ) )
+               E( I ) = ALPHA
+               A( I, I+1 ) = ONE
+*
+*              Compute X(i+1:m,i)
+*
+               CALL ZGEMV( 'No transpose', M-I, N-I, ONE, A( I+1, I+1 ),
+     $                     LDA, A( I, I+1 ), LDA, ZERO, X( I+1, I ), 1 )
+               CALL ZGEMV( 'Conjugate transpose', N-I, I, ONE,
+     $                     Y( I+1, 1 ), LDY, A( I, I+1 ), LDA, ZERO,
+     $                     X( 1, I ), 1 )
+               CALL ZGEMV( 'No transpose', M-I, I, -ONE, A( I+1, 1 ),
+     $                     LDA, X( 1, I ), 1, ONE, X( I+1, I ), 1 )
+               CALL ZGEMV( 'No transpose', I-1, N-I, ONE, A( 1, I+1 ),
+     $                     LDA, A( I, I+1 ), LDA, ZERO, X( 1, I ), 1 )
+               CALL ZGEMV( 'No transpose', M-I, I-1, -ONE, X( I+1, 1 ),
+     $                     LDX, X( 1, I ), 1, ONE, X( I+1, I ), 1 )
+               CALL ZSCAL( M-I, TAUP( I ), X( I+1, I ), 1 )
+               CALL ZLACGV( N-I, A( I, I+1 ), LDA )
+            END IF
+   10    CONTINUE
+      ELSE
+*
+*        Reduce to lower bidiagonal form
+*
+         DO 20 I = 1, NB
+*
+*           Update A(i,i:n)
+*
+            CALL ZLACGV( N-I+1, A( I, I ), LDA )
+            CALL ZLACGV( I-1, A( I, 1 ), LDA )
+            CALL ZGEMV( 'No transpose', N-I+1, I-1, -ONE, Y( I, 1 ),
+     $                  LDY, A( I, 1 ), LDA, ONE, A( I, I ), LDA )
+            CALL ZLACGV( I-1, A( I, 1 ), LDA )
+            CALL ZLACGV( I-1, X( I, 1 ), LDX )
+            CALL ZGEMV( 'Conjugate transpose', I-1, N-I+1, -ONE,
+     $                  A( 1, I ), LDA, X( I, 1 ), LDX, ONE, A( I, I ),
+     $                  LDA )
+            CALL ZLACGV( I-1, X( I, 1 ), LDX )
+*
+*           Generate reflection P(i) to annihilate A(i,i+1:n)
+*
+            ALPHA = A( I, I )
+            CALL ZLARFG( N-I+1, ALPHA, A( I, MIN( I+1, N ) ), LDA,
+     $                   TAUP( I ) )
+            D( I ) = ALPHA
+            IF( I.LT.M ) THEN
+               A( I, I ) = ONE
+*
+*              Compute X(i+1:m,i)
+*
+               CALL ZGEMV( 'No transpose', M-I, N-I+1, ONE, A( I+1, I ),
+     $                     LDA, A( I, I ), LDA, ZERO, X( I+1, I ), 1 )
+               CALL ZGEMV( 'Conjugate transpose', N-I+1, I-1, ONE,
+     $                     Y( I, 1 ), LDY, A( I, I ), LDA, ZERO,
+     $                     X( 1, I ), 1 )
+               CALL ZGEMV( 'No transpose', M-I, I-1, -ONE, A( I+1, 1 ),
+     $                     LDA, X( 1, I ), 1, ONE, X( I+1, I ), 1 )
+               CALL ZGEMV( 'No transpose', I-1, N-I+1, ONE, A( 1, I ),
+     $                     LDA, A( I, I ), LDA, ZERO, X( 1, I ), 1 )
+               CALL ZGEMV( 'No transpose', M-I, I-1, -ONE, X( I+1, 1 ),
+     $                     LDX, X( 1, I ), 1, ONE, X( I+1, I ), 1 )
+               CALL ZSCAL( M-I, TAUP( I ), X( I+1, I ), 1 )
+               CALL ZLACGV( N-I+1, A( I, I ), LDA )
+*
+*              Update A(i+1:m,i)
+*
+               CALL ZLACGV( I-1, Y( I, 1 ), LDY )
+               CALL ZGEMV( 'No transpose', M-I, I-1, -ONE, A( I+1, 1 ),
+     $                     LDA, Y( I, 1 ), LDY, ONE, A( I+1, I ), 1 )
+               CALL ZLACGV( I-1, Y( I, 1 ), LDY )
+               CALL ZGEMV( 'No transpose', M-I, I, -ONE, X( I+1, 1 ),
+     $                     LDX, A( 1, I ), 1, ONE, A( I+1, I ), 1 )
+*
+*              Generate reflection Q(i) to annihilate A(i+2:m,i)
+*
+               ALPHA = A( I+1, I )
+               CALL ZLARFG( M-I, ALPHA, A( MIN( I+2, M ), I ), 1,
+     $                      TAUQ( I ) )
+               E( I ) = ALPHA
+               A( I+1, I ) = ONE
+*
+*              Compute Y(i+1:n,i)
+*
+               CALL ZGEMV( 'Conjugate transpose', M-I, N-I, ONE,
+     $                     A( I+1, I+1 ), LDA, A( I+1, I ), 1, ZERO,
+     $                     Y( I+1, I ), 1 )
+               CALL ZGEMV( 'Conjugate transpose', M-I, I-1, ONE,
+     $                     A( I+1, 1 ), LDA, A( I+1, I ), 1, ZERO,
+     $                     Y( 1, I ), 1 )
+               CALL ZGEMV( 'No transpose', N-I, I-1, -ONE, Y( I+1, 1 ),
+     $                     LDY, Y( 1, I ), 1, ONE, Y( I+1, I ), 1 )
+               CALL ZGEMV( 'Conjugate transpose', M-I, I, ONE,
+     $                     X( I+1, 1 ), LDX, A( I+1, I ), 1, ZERO,
+     $                     Y( 1, I ), 1 )
+               CALL ZGEMV( 'Conjugate transpose', I, N-I, -ONE,
+     $                     A( 1, I+1 ), LDA, Y( 1, I ), 1, ONE,
+     $                     Y( I+1, I ), 1 )
+               CALL ZSCAL( N-I, TAUQ( I ), Y( I+1, I ), 1 )
+            ELSE
+               CALL ZLACGV( N-I+1, A( I, I ), LDA )
+            END IF
+   20    CONTINUE
+      END IF
+      RETURN
+*
+*     End of ZLABRD
+*
+      END
diff --git a/costa/native/external/lapack/zlacgv.f b/costa/native/external/lapack/zlacgv.f
new file mode 100644
index 000000000..032f8c69c
--- /dev/null
+++ b/costa/native/external/lapack/zlacgv.f
@@ -0,0 +1,61 @@
+      SUBROUTINE ZLACGV( N, X, INCX )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      INTEGER            INCX, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZLACGV conjugates a complex vector of length N.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The length of the vector X.  N >= 0.
+*
+*  X       (input/output) COMPLEX*16 array, dimension
+*                         (1+(N-1)*abs(INCX))
+*          On entry, the vector of length N to be conjugated.
+*          On exit, X is overwritten with conjg(X).
+*
+*  INCX    (input) INTEGER
+*          The spacing between successive elements of X.
+*
+* =====================================================================
+*
+*     .. Local Scalars ..
+      INTEGER            I, IOFF
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          DCONJG
+*     ..
+*     .. Executable Statements ..
+*
+      IF( INCX.EQ.1 ) THEN
+         DO 10 I = 1, N
+            X( I ) = DCONJG( X( I ) )
+   10    CONTINUE
+      ELSE
+         IOFF = 1
+         IF( INCX.LT.0 )
+     $      IOFF = 1 - ( N-1 )*INCX
+         DO 20 I = 1, N
+            X( IOFF ) = DCONJG( X( IOFF ) )
+            IOFF = IOFF + INCX
+   20    CONTINUE
+      END IF
+      RETURN
+*
+*     End of ZLACGV
+*
+      END
diff --git a/costa/native/external/lapack/zlacon.f b/costa/native/external/lapack/zlacon.f
new file mode 100644
index 000000000..32c3c6788
--- /dev/null
+++ b/costa/native/external/lapack/zlacon.f
@@ -0,0 +1,211 @@
+      SUBROUTINE ZLACON( N, V, X, EST, KASE )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            KASE, N
+      DOUBLE PRECISION   EST
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         V( N ), X( N )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZLACON estimates the 1-norm of a square, complex matrix A.
+*  Reverse communication is used for evaluating matrix-vector products.
+*
+*  Arguments
+*  =========
+*
+*  N      (input) INTEGER
+*         The order of the matrix.  N >= 1.
+*
+*  V      (workspace) COMPLEX*16 array, dimension (N)
+*         On the final return, V = A*W,  where  EST = norm(V)/norm(W)
+*         (W is not returned).
+*
+*  X      (input/output) COMPLEX*16 array, dimension (N)
+*         On an intermediate return, X should be overwritten by
+*               A * X,   if KASE=1,
+*               A' * X,  if KASE=2,
+*         where A' is the conjugate transpose of A, and ZLACON must be
+*         re-called with all the other parameters unchanged.
+*
+*  EST    (output) DOUBLE PRECISION
+*         An estimate (a lower bound) for norm(A).
+*
+*  KASE   (input/output) INTEGER
+*         On the initial call to ZLACON, KASE should be 0.
+*         On an intermediate return, KASE will be 1 or 2, indicating
+*         whether X should be overwritten by A * X  or A' * X.
+*         On the final return from ZLACON, KASE will again be 0.
+*
+*  Further Details
+*  ======= =======
+*
+*  Contributed by Nick Higham, University of Manchester.
+*  Originally named CONEST, dated March 16, 1988.
+*
+*  Reference: N.J. Higham, "FORTRAN codes for estimating the one-norm of
+*  a real or complex matrix, with applications to condition estimation",
+*  ACM Trans. Math. Soft., vol. 14, no. 4, pp. 381-396, December 1988.
+*
+*  Last modified:  April, 1999
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            ITMAX
+      PARAMETER          ( ITMAX = 5 )
+      DOUBLE PRECISION   ONE, TWO
+      PARAMETER          ( ONE = 1.0D0, TWO = 2.0D0 )
+      COMPLEX*16         CZERO, CONE
+      PARAMETER          ( CZERO = ( 0.0D0, 0.0D0 ),
+     $                   CONE = ( 1.0D0, 0.0D0 ) )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, ITER, J, JLAST, JUMP
+      DOUBLE PRECISION   ABSXI, ALTSGN, ESTOLD, SAFMIN, TEMP
+*     ..
+*     .. External Functions ..
+      INTEGER            IZMAX1
+      DOUBLE PRECISION   DLAMCH, DZSUM1
+      EXTERNAL           IZMAX1, DLAMCH, DZSUM1
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           ZCOPY
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, DCMPLX, DIMAG
+*     ..
+*     .. Save statement ..
+      SAVE
+*     ..
+*     .. Executable Statements ..
+*
+      SAFMIN = DLAMCH( 'Safe minimum' )
+      IF( KASE.EQ.0 ) THEN
+         DO 10 I = 1, N
+            X( I ) = DCMPLX( ONE / DBLE( N ) )
+   10    CONTINUE
+         KASE = 1
+         JUMP = 1
+         RETURN
+      END IF
+*
+      GO TO ( 20, 40, 70, 90, 120 )JUMP
+*
+*     ................ ENTRY   (JUMP = 1)
+*     FIRST ITERATION.  X HAS BEEN OVERWRITTEN BY A*X.
+*
+   20 CONTINUE
+      IF( N.EQ.1 ) THEN
+         V( 1 ) = X( 1 )
+         EST = ABS( V( 1 ) )
+*        ... QUIT
+         GO TO 130
+      END IF
+      EST = DZSUM1( N, X, 1 )
+*
+      DO 30 I = 1, N
+         ABSXI = ABS( X( I ) )
+         IF( ABSXI.GT.SAFMIN ) THEN
+            X( I ) = DCMPLX( DBLE( X( I ) ) / ABSXI,
+     $               DIMAG( X( I ) ) / ABSXI )
+         ELSE
+            X( I ) = CONE
+         END IF
+   30 CONTINUE
+      KASE = 2
+      JUMP = 2
+      RETURN
+*
+*     ................ ENTRY   (JUMP = 2)
+*     FIRST ITERATION.  X HAS BEEN OVERWRITTEN BY ZTRANS(A)*X.
+*
+   40 CONTINUE
+      J = IZMAX1( N, X, 1 )
+      ITER = 2
+*
+*     MAIN LOOP - ITERATIONS 2,3,...,ITMAX.
+*
+   50 CONTINUE
+      DO 60 I = 1, N
+         X( I ) = CZERO
+   60 CONTINUE
+      X( J ) = CONE
+      KASE = 1
+      JUMP = 3
+      RETURN
+*
+*     ................ ENTRY   (JUMP = 3)
+*     X HAS BEEN OVERWRITTEN BY A*X.
+*
+   70 CONTINUE
+      CALL ZCOPY( N, X, 1, V, 1 )
+      ESTOLD = EST
+      EST = DZSUM1( N, V, 1 )
+*
+*     TEST FOR CYCLING.
+      IF( EST.LE.ESTOLD )
+     $   GO TO 100
+*
+      DO 80 I = 1, N
+         ABSXI = ABS( X( I ) )
+         IF( ABSXI.GT.SAFMIN ) THEN
+            X( I ) = DCMPLX( DBLE( X( I ) ) / ABSXI,
+     $               DIMAG( X( I ) ) / ABSXI )
+         ELSE
+            X( I ) = CONE
+         END IF
+   80 CONTINUE
+      KASE = 2
+      JUMP = 4
+      RETURN
+*
+*     ................ ENTRY   (JUMP = 4)
+*     X HAS BEEN OVERWRITTEN BY ZTRANS(A)*X.
+*
+   90 CONTINUE
+      JLAST = J
+      J = IZMAX1( N, X, 1 )
+      IF( ( ABS( X( JLAST ) ).NE.ABS( X( J ) ) ) .AND.
+     $    ( ITER.LT.ITMAX ) ) THEN
+         ITER = ITER + 1
+         GO TO 50
+      END IF
+*
+*     ITERATION COMPLETE.  FINAL STAGE.
+*
+  100 CONTINUE
+      ALTSGN = ONE
+      DO 110 I = 1, N
+         X( I ) = DCMPLX( ALTSGN*( ONE+DBLE( I-1 ) / DBLE( N-1 ) ) )
+         ALTSGN = -ALTSGN
+  110 CONTINUE
+      KASE = 1
+      JUMP = 5
+      RETURN
+*
+*     ................ ENTRY   (JUMP = 5)
+*     X HAS BEEN OVERWRITTEN BY A*X.
+*
+  120 CONTINUE
+      TEMP = TWO*( DZSUM1( N, X, 1 ) / DBLE( 3*N ) )
+      IF( TEMP.GT.EST ) THEN
+         CALL ZCOPY( N, X, 1, V, 1 )
+         EST = TEMP
+      END IF
+*
+  130 CONTINUE
+      KASE = 0
+      RETURN
+*
+*     End of ZLACON
+*
+      END
diff --git a/costa/native/external/lapack/zlacp2.f b/costa/native/external/lapack/zlacp2.f
new file mode 100644
index 000000000..a01057fb1
--- /dev/null
+++ b/costa/native/external/lapack/zlacp2.f
@@ -0,0 +1,92 @@
+      SUBROUTINE ZLACP2( UPLO, M, N, A, LDA, B, LDB )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            LDA, LDB, M, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * )
+      COMPLEX*16         B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZLACP2 copies all or part of a real two-dimensional matrix A to a
+*  complex matrix B.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies the part of the matrix A to be copied to B.
+*          = 'U':      Upper triangular part
+*          = 'L':      Lower triangular part
+*          Otherwise:  All of the matrix A
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  A       (input) DOUBLE PRECISION array, dimension (LDA,N)
+*          The m by n matrix A.  If UPLO = 'U', only the upper trapezium
+*          is accessed; if UPLO = 'L', only the lower trapezium is
+*          accessed.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  B       (output) COMPLEX*16 array, dimension (LDB,N)
+*          On exit, B = A in the locations specified by UPLO.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,M).
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      INTEGER            I, J
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MIN
+*     ..
+*     .. Executable Statements ..
+*
+      IF( LSAME( UPLO, 'U' ) ) THEN
+         DO 20 J = 1, N
+            DO 10 I = 1, MIN( J, M )
+               B( I, J ) = A( I, J )
+   10       CONTINUE
+   20    CONTINUE
+*
+      ELSE IF( LSAME( UPLO, 'L' ) ) THEN
+         DO 40 J = 1, N
+            DO 30 I = J, M
+               B( I, J ) = A( I, J )
+   30       CONTINUE
+   40    CONTINUE
+*
+      ELSE
+         DO 60 J = 1, N
+            DO 50 I = 1, M
+               B( I, J ) = A( I, J )
+   50       CONTINUE
+   60    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of ZLACP2
+*
+      END
diff --git a/costa/native/external/lapack/zlacpy.f b/costa/native/external/lapack/zlacpy.f
new file mode 100644
index 000000000..b0d264ce2
--- /dev/null
+++ b/costa/native/external/lapack/zlacpy.f
@@ -0,0 +1,91 @@
+      SUBROUTINE ZLACPY( UPLO, M, N, A, LDA, B, LDB )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            LDA, LDB, M, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZLACPY copies all or part of a two-dimensional matrix A to another
+*  matrix B.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies the part of the matrix A to be copied to B.
+*          = 'U':      Upper triangular part
+*          = 'L':      Lower triangular part
+*          Otherwise:  All of the matrix A
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  A       (input) COMPLEX*16 array, dimension (LDA,N)
+*          The m by n matrix A.  If UPLO = 'U', only the upper trapezium
+*          is accessed; if UPLO = 'L', only the lower trapezium is
+*          accessed.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  B       (output) COMPLEX*16 array, dimension (LDB,N)
+*          On exit, B = A in the locations specified by UPLO.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,M).
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      INTEGER            I, J
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MIN
+*     ..
+*     .. Executable Statements ..
+*
+      IF( LSAME( UPLO, 'U' ) ) THEN
+         DO 20 J = 1, N
+            DO 10 I = 1, MIN( J, M )
+               B( I, J ) = A( I, J )
+   10       CONTINUE
+   20    CONTINUE
+*
+      ELSE IF( LSAME( UPLO, 'L' ) ) THEN
+         DO 40 J = 1, N
+            DO 30 I = J, M
+               B( I, J ) = A( I, J )
+   30       CONTINUE
+   40    CONTINUE
+*
+      ELSE
+         DO 60 J = 1, N
+            DO 50 I = 1, M
+               B( I, J ) = A( I, J )
+   50       CONTINUE
+   60    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of ZLACPY
+*
+      END
diff --git a/costa/native/external/lapack/zlacrm.f b/costa/native/external/lapack/zlacrm.f
new file mode 100644
index 000000000..951916541
--- /dev/null
+++ b/costa/native/external/lapack/zlacrm.f
@@ -0,0 +1,111 @@
+      SUBROUTINE ZLACRM( M, N, A, LDA, B, LDB, C, LDC, RWORK )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      INTEGER            LDA, LDB, LDC, M, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   B( LDB, * ), RWORK( * )
+      COMPLEX*16         A( LDA, * ), C( LDC, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZLACRM performs a very simple matrix-matrix multiplication:
+*           C := A * B,
+*  where A is M by N and complex; B is N by N and real;
+*  C is M by N and complex.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A and of the matrix C.
+*          M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns and rows of the matrix B and
+*          the number of columns of the matrix C.
+*          N >= 0.
+*
+*  A       (input) COMPLEX*16 array, dimension (LDA, N)
+*          A contains the M by N matrix A.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >=max(1,M).
+*
+*  B       (input) DOUBLE PRECISION array, dimension (LDB, N)
+*          B contains the N by N matrix B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B. LDB >=max(1,N).
+*
+*  C       (input) COMPLEX*16 array, dimension (LDC, N)
+*          C contains the M by N matrix C.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C. LDC >=max(1,N).
+*
+*  RWORK   (workspace) DOUBLE PRECISION array, dimension (2*M*N)
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D0, ZERO = 0.0D0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J, L
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          DBLE, DCMPLX, DIMAG
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DGEMM
+*     ..
+*     .. Executable Statements ..
+*
+*     Quick return if possible.
+*
+      IF( ( M.EQ.0 ) .OR. ( N.EQ.0 ) )
+     $   RETURN
+*
+      DO 20 J = 1, N
+         DO 10 I = 1, M
+            RWORK( ( J-1 )*M+I ) = DBLE( A( I, J ) )
+   10    CONTINUE
+   20 CONTINUE
+*
+      L = M*N + 1
+      CALL DGEMM( 'N', 'N', M, N, N, ONE, RWORK, M, B, LDB, ZERO,
+     $            RWORK( L ), M )
+      DO 40 J = 1, N
+         DO 30 I = 1, M
+            C( I, J ) = RWORK( L+( J-1 )*M+I-1 )
+   30    CONTINUE
+   40 CONTINUE
+*
+      DO 60 J = 1, N
+         DO 50 I = 1, M
+            RWORK( ( J-1 )*M+I ) = DIMAG( A( I, J ) )
+   50    CONTINUE
+   60 CONTINUE
+      CALL DGEMM( 'N', 'N', M, N, N, ONE, RWORK, M, B, LDB, ZERO,
+     $            RWORK( L ), M )
+      DO 80 J = 1, N
+         DO 70 I = 1, M
+            C( I, J ) = DCMPLX( DBLE( C( I, J ) ),
+     $                  RWORK( L+( J-1 )*M+I-1 ) )
+   70    CONTINUE
+   80 CONTINUE
+*
+      RETURN
+*
+*     End of ZLACRM
+*
+      END
diff --git a/costa/native/external/lapack/zlacrt.f b/costa/native/external/lapack/zlacrt.f
new file mode 100644
index 000000000..174683600
--- /dev/null
+++ b/costa/native/external/lapack/zlacrt.f
@@ -0,0 +1,91 @@
+      SUBROUTINE ZLACRT( N, CX, INCX, CY, INCY, C, S )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      INTEGER            INCX, INCY, N
+      COMPLEX*16         C, S
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         CX( * ), CY( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZLACRT performs the operation
+*
+*     (  c  s )( x )  ==> ( x )
+*     ( -s  c )( y )      ( y )
+*
+*  where c and s are complex and the vectors x and y are complex.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The number of elements in the vectors CX and CY.
+*
+*  CX      (input/output) COMPLEX*16 array, dimension (N)
+*          On input, the vector x.
+*          On output, CX is overwritten with c*x + s*y.
+*
+*  INCX    (input) INTEGER
+*          The increment between successive values of CX.  INCX <> 0.
+*
+*  CY      (input/output) COMPLEX*16 array, dimension (N)
+*          On input, the vector y.
+*          On output, CY is overwritten with -s*x + c*y.
+*
+*  INCY    (input) INTEGER
+*          The increment between successive values of CY.  INCY <> 0.
+*
+*  C       (input) COMPLEX*16
+*  S       (input) COMPLEX*16
+*          C and S define the matrix
+*             [  C   S  ].
+*             [ -S   C  ]
+*
+* =====================================================================
+*
+*     .. Local Scalars ..
+      INTEGER            I, IX, IY
+      COMPLEX*16         CTEMP
+*     ..
+*     .. Executable Statements ..
+*
+      IF( N.LE.0 )
+     $   RETURN
+      IF( INCX.EQ.1 .AND. INCY.EQ.1 )
+     $   GO TO 20
+*
+*     Code for unequal increments or equal increments not equal to 1
+*
+      IX = 1
+      IY = 1
+      IF( INCX.LT.0 )
+     $   IX = ( -N+1 )*INCX + 1
+      IF( INCY.LT.0 )
+     $   IY = ( -N+1 )*INCY + 1
+      DO 10 I = 1, N
+         CTEMP = C*CX( IX ) + S*CY( IY )
+         CY( IY ) = C*CY( IY ) - S*CX( IX )
+         CX( IX ) = CTEMP
+         IX = IX + INCX
+         IY = IY + INCY
+   10 CONTINUE
+      RETURN
+*
+*     Code for both increments equal to 1
+*
+   20 CONTINUE
+      DO 30 I = 1, N
+         CTEMP = C*CX( I ) + S*CY( I )
+         CY( I ) = C*CY( I ) - S*CX( I )
+         CX( I ) = CTEMP
+   30 CONTINUE
+      RETURN
+      END
diff --git a/costa/native/external/lapack/zladiv.f b/costa/native/external/lapack/zladiv.f
new file mode 100644
index 000000000..31f3aacb9
--- /dev/null
+++ b/costa/native/external/lapack/zladiv.f
@@ -0,0 +1,47 @@
+      DOUBLE COMPLEX   FUNCTION ZLADIV( X, Y )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      COMPLEX*16         X, Y
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZLADIV := X / Y, where X and Y are complex.  The computation of X / Y
+*  will not overflow on an intermediary step unless the results
+*  overflows.
+*
+*  Arguments
+*  =========
+*
+*  X       (input) COMPLEX*16
+*  Y       (input) COMPLEX*16
+*          The complex scalars X and Y.
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      DOUBLE PRECISION   ZI, ZR
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLADIV
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          DBLE, DCMPLX, DIMAG
+*     ..
+*     .. Executable Statements ..
+*
+      CALL DLADIV( DBLE( X ), DIMAG( X ), DBLE( Y ), DIMAG( Y ), ZR,
+     $             ZI )
+      ZLADIV = DCMPLX( ZR, ZI )
+*
+      RETURN
+*
+*     End of ZLADIV
+*
+      END
diff --git a/costa/native/external/lapack/zlaed0.f b/costa/native/external/lapack/zlaed0.f
new file mode 100644
index 000000000..68ecbd8e5
--- /dev/null
+++ b/costa/native/external/lapack/zlaed0.f
@@ -0,0 +1,289 @@
+      SUBROUTINE ZLAED0( QSIZ, N, D, E, Q, LDQ, QSTORE, LDQS, RWORK,
+     $                   IWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDQ, LDQS, N, QSIZ
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      DOUBLE PRECISION   D( * ), E( * ), RWORK( * )
+      COMPLEX*16         Q( LDQ, * ), QSTORE( LDQS, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  Using the divide and conquer method, ZLAED0 computes all eigenvalues
+*  of a symmetric tridiagonal matrix which is one diagonal block of
+*  those from reducing a dense or band Hermitian matrix and
+*  corresponding eigenvectors of the dense or band matrix.
+*
+*  Arguments
+*  =========
+*
+*  QSIZ   (input) INTEGER
+*         The dimension of the unitary matrix used to reduce
+*         the full matrix to tridiagonal form.  QSIZ >= N if ICOMPQ = 1.
+*
+*  N      (input) INTEGER
+*         The dimension of the symmetric tridiagonal matrix.  N >= 0.
+*
+*  D      (input/output) DOUBLE PRECISION array, dimension (N)
+*         On entry, the diagonal elements of the tridiagonal matrix.
+*         On exit, the eigenvalues in ascending order.
+*
+*  E      (input/output) DOUBLE PRECISION array, dimension (N-1)
+*         On entry, the off-diagonal elements of the tridiagonal matrix.
+*         On exit, E has been destroyed.
+*
+*  Q      (input/output) COMPLEX*16 array, dimension (LDQ,N)
+*         On entry, Q must contain an QSIZ x N matrix whose columns
+*         unitarily orthonormal. It is a part of the unitary matrix
+*         that reduces the full dense Hermitian matrix to a
+*         (reducible) symmetric tridiagonal matrix.
+*
+*  LDQ    (input) INTEGER
+*         The leading dimension of the array Q.  LDQ >= max(1,N).
+*
+*  IWORK  (workspace) INTEGER array,
+*         the dimension of IWORK must be at least
+*                      6 + 6*N + 5*N*lg N
+*                      ( lg( N ) = smallest integer k
+*                                  such that 2^k >= N )
+*
+*  RWORK  (workspace) DOUBLE PRECISION array,
+*                               dimension (1 + 3*N + 2*N*lg N + 3*N**2)
+*                        ( lg( N ) = smallest integer k
+*                                    such that 2^k >= N )
+*
+*  QSTORE (workspace) COMPLEX*16 array, dimension (LDQS, N)
+*         Used to store parts of
+*         the eigenvector matrix when the updating matrix multiplies
+*         take place.
+*
+*  LDQS   (input) INTEGER
+*         The leading dimension of the array QSTORE.
+*         LDQS >= max(1,N).
+*
+*  INFO   (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          > 0:  The algorithm failed to compute an eigenvalue while
+*                working on the submatrix lying in rows and columns
+*                INFO/(N+1) through mod(INFO,N+1).
+*
+*  =====================================================================
+*
+*  Warning:      N could be as big as QSIZ!
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   TWO
+      PARAMETER          ( TWO = 2.D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            CURLVL, CURPRB, CURR, I, IGIVCL, IGIVNM,
+     $                   IGIVPT, INDXQ, IPERM, IPRMPT, IQ, IQPTR, IWREM,
+     $                   J, K, LGN, LL, MATSIZ, MSD2, SMLSIZ, SMM1,
+     $                   SPM1, SPM2, SUBMAT, SUBPBS, TLVLS
+      DOUBLE PRECISION   TEMP
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DCOPY, DSTEQR, XERBLA, ZCOPY, ZLACRM, ZLAED7
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, INT, LOG, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+*
+*     IF( ICOMPQ .LT. 0 .OR. ICOMPQ .GT. 2 ) THEN
+*        INFO = -1
+*     ELSE IF( ( ICOMPQ .EQ. 1 ) .AND. ( QSIZ .LT. MAX( 0, N ) ) )
+*    $        THEN
+      IF( QSIZ.LT.MAX( 0, N ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN
+         INFO = -6
+      ELSE IF( LDQS.LT.MAX( 1, N ) ) THEN
+         INFO = -8
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZLAED0', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      SMLSIZ = ILAENV( 9, 'ZLAED0', ' ', 0, 0, 0, 0 )
+*
+*     Determine the size and placement of the submatrices, and save in
+*     the leading elements of IWORK.
+*
+      IWORK( 1 ) = N
+      SUBPBS = 1
+      TLVLS = 0
+   10 CONTINUE
+      IF( IWORK( SUBPBS ).GT.SMLSIZ ) THEN
+         DO 20 J = SUBPBS, 1, -1
+            IWORK( 2*J ) = ( IWORK( J )+1 ) / 2
+            IWORK( 2*J-1 ) = IWORK( J ) / 2
+   20    CONTINUE
+         TLVLS = TLVLS + 1
+         SUBPBS = 2*SUBPBS
+         GO TO 10
+      END IF
+      DO 30 J = 2, SUBPBS
+         IWORK( J ) = IWORK( J ) + IWORK( J-1 )
+   30 CONTINUE
+*
+*     Divide the matrix into SUBPBS submatrices of size at most SMLSIZ+1
+*     using rank-1 modifications (cuts).
+*
+      SPM1 = SUBPBS - 1
+      DO 40 I = 1, SPM1
+         SUBMAT = IWORK( I ) + 1
+         SMM1 = SUBMAT - 1
+         D( SMM1 ) = D( SMM1 ) - ABS( E( SMM1 ) )
+         D( SUBMAT ) = D( SUBMAT ) - ABS( E( SMM1 ) )
+   40 CONTINUE
+*
+      INDXQ = 4*N + 3
+*
+*     Set up workspaces for eigenvalues only/accumulate new vectors
+*     routine
+*
+      TEMP = LOG( DBLE( N ) ) / LOG( TWO )
+      LGN = INT( TEMP )
+      IF( 2**LGN.LT.N )
+     $   LGN = LGN + 1
+      IF( 2**LGN.LT.N )
+     $   LGN = LGN + 1
+      IPRMPT = INDXQ + N + 1
+      IPERM = IPRMPT + N*LGN
+      IQPTR = IPERM + N*LGN
+      IGIVPT = IQPTR + N + 2
+      IGIVCL = IGIVPT + N*LGN
+*
+      IGIVNM = 1
+      IQ = IGIVNM + 2*N*LGN
+      IWREM = IQ + N**2 + 1
+*     Initialize pointers
+      DO 50 I = 0, SUBPBS
+         IWORK( IPRMPT+I ) = 1
+         IWORK( IGIVPT+I ) = 1
+   50 CONTINUE
+      IWORK( IQPTR ) = 1
+*
+*     Solve each submatrix eigenproblem at the bottom of the divide and
+*     conquer tree.
+*
+      CURR = 0
+      DO 70 I = 0, SPM1
+         IF( I.EQ.0 ) THEN
+            SUBMAT = 1
+            MATSIZ = IWORK( 1 )
+         ELSE
+            SUBMAT = IWORK( I ) + 1
+            MATSIZ = IWORK( I+1 ) - IWORK( I )
+         END IF
+         LL = IQ - 1 + IWORK( IQPTR+CURR )
+         CALL DSTEQR( 'I', MATSIZ, D( SUBMAT ), E( SUBMAT ),
+     $                RWORK( LL ), MATSIZ, RWORK, INFO )
+         CALL ZLACRM( QSIZ, MATSIZ, Q( 1, SUBMAT ), LDQ, RWORK( LL ),
+     $                MATSIZ, QSTORE( 1, SUBMAT ), LDQS,
+     $                RWORK( IWREM ) )
+         IWORK( IQPTR+CURR+1 ) = IWORK( IQPTR+CURR ) + MATSIZ**2
+         CURR = CURR + 1
+         IF( INFO.GT.0 ) THEN
+            INFO = SUBMAT*( N+1 ) + SUBMAT + MATSIZ - 1
+            RETURN
+         END IF
+         K = 1
+         DO 60 J = SUBMAT, IWORK( I+1 )
+            IWORK( INDXQ+J ) = K
+            K = K + 1
+   60    CONTINUE
+   70 CONTINUE
+*
+*     Successively merge eigensystems of adjacent submatrices
+*     into eigensystem for the corresponding larger matrix.
+*
+*     while ( SUBPBS > 1 )
+*
+      CURLVL = 1
+   80 CONTINUE
+      IF( SUBPBS.GT.1 ) THEN
+         SPM2 = SUBPBS - 2
+         DO 90 I = 0, SPM2, 2
+            IF( I.EQ.0 ) THEN
+               SUBMAT = 1
+               MATSIZ = IWORK( 2 )
+               MSD2 = IWORK( 1 )
+               CURPRB = 0
+            ELSE
+               SUBMAT = IWORK( I ) + 1
+               MATSIZ = IWORK( I+2 ) - IWORK( I )
+               MSD2 = MATSIZ / 2
+               CURPRB = CURPRB + 1
+            END IF
+*
+*     Merge lower order eigensystems (of size MSD2 and MATSIZ - MSD2)
+*     into an eigensystem of size MATSIZ.  ZLAED7 handles the case
+*     when the eigenvectors of a full or band Hermitian matrix (which
+*     was reduced to tridiagonal form) are desired.
+*
+*     I am free to use Q as a valuable working space until Loop 150.
+*
+            CALL ZLAED7( MATSIZ, MSD2, QSIZ, TLVLS, CURLVL, CURPRB,
+     $                   D( SUBMAT ), QSTORE( 1, SUBMAT ), LDQS,
+     $                   E( SUBMAT+MSD2-1 ), IWORK( INDXQ+SUBMAT ),
+     $                   RWORK( IQ ), IWORK( IQPTR ), IWORK( IPRMPT ),
+     $                   IWORK( IPERM ), IWORK( IGIVPT ),
+     $                   IWORK( IGIVCL ), RWORK( IGIVNM ),
+     $                   Q( 1, SUBMAT ), RWORK( IWREM ),
+     $                   IWORK( SUBPBS+1 ), INFO )
+            IF( INFO.GT.0 ) THEN
+               INFO = SUBMAT*( N+1 ) + SUBMAT + MATSIZ - 1
+               RETURN
+            END IF
+            IWORK( I / 2+1 ) = IWORK( I+2 )
+   90    CONTINUE
+         SUBPBS = SUBPBS / 2
+         CURLVL = CURLVL + 1
+         GO TO 80
+      END IF
+*
+*     end while
+*
+*     Re-merge the eigenvalues/vectors which were deflated at the final
+*     merge step.
+*
+      DO 100 I = 1, N
+         J = IWORK( INDXQ+I )
+         RWORK( I ) = D( J )
+         CALL ZCOPY( QSIZ, QSTORE( 1, J ), 1, Q( 1, I ), 1 )
+  100 CONTINUE
+      CALL DCOPY( N, RWORK, 1, D, 1 )
+*
+      RETURN
+*
+*     End of ZLAED0
+*
+      END
diff --git a/costa/native/external/lapack/zlaed7.f b/costa/native/external/lapack/zlaed7.f
new file mode 100644
index 000000000..86f5b1111
--- /dev/null
+++ b/costa/native/external/lapack/zlaed7.f
@@ -0,0 +1,267 @@
+      SUBROUTINE ZLAED7( N, CUTPNT, QSIZ, TLVLS, CURLVL, CURPBM, D, Q,
+     $                   LDQ, RHO, INDXQ, QSTORE, QPTR, PRMPTR, PERM,
+     $                   GIVPTR, GIVCOL, GIVNUM, WORK, RWORK, IWORK,
+     $                   INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      INTEGER            CURLVL, CURPBM, CUTPNT, INFO, LDQ, N, QSIZ,
+     $                   TLVLS
+      DOUBLE PRECISION   RHO
+*     ..
+*     .. Array Arguments ..
+      INTEGER            GIVCOL( 2, * ), GIVPTR( * ), INDXQ( * ),
+     $                   IWORK( * ), PERM( * ), PRMPTR( * ), QPTR( * )
+      DOUBLE PRECISION   D( * ), GIVNUM( 2, * ), QSTORE( * ), RWORK( * )
+      COMPLEX*16         Q( LDQ, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZLAED7 computes the updated eigensystem of a diagonal
+*  matrix after modification by a rank-one symmetric matrix. This
+*  routine is used only for the eigenproblem which requires all
+*  eigenvalues and optionally eigenvectors of a dense or banded
+*  Hermitian matrix that has been reduced to tridiagonal form.
+*
+*    T = Q(in) ( D(in) + RHO * Z*Z' ) Q'(in) = Q(out) * D(out) * Q'(out)
+*
+*    where Z = Q'u, u is a vector of length N with ones in the
+*    CUTPNT and CUTPNT + 1 th elements and zeros elsewhere.
+*
+*     The eigenvectors of the original matrix are stored in Q, and the
+*     eigenvalues are in D.  The algorithm consists of three stages:
+*
+*        The first stage consists of deflating the size of the problem
+*        when there are multiple eigenvalues or if there is a zero in
+*        the Z vector.  For each such occurence the dimension of the
+*        secular equation problem is reduced by one.  This stage is
+*        performed by the routine DLAED2.
+*
+*        The second stage consists of calculating the updated
+*        eigenvalues. This is done by finding the roots of the secular
+*        equation via the routine DLAED4 (as called by SLAED3).
+*        This routine also calculates the eigenvectors of the current
+*        problem.
+*
+*        The final stage consists of computing the updated eigenvectors
+*        directly using the updated eigenvalues.  The eigenvectors for
+*        the current problem are multiplied with the eigenvectors from
+*        the overall problem.
+*
+*  Arguments
+*  =========
+*
+*  N      (input) INTEGER
+*         The dimension of the symmetric tridiagonal matrix.  N >= 0.
+*
+*  CUTPNT (input) INTEGER
+*         Contains the location of the last eigenvalue in the leading
+*         sub-matrix.  min(1,N) <= CUTPNT <= N.
+*
+*  QSIZ   (input) INTEGER
+*         The dimension of the unitary matrix used to reduce
+*         the full matrix to tridiagonal form.  QSIZ >= N.
+*
+*  TLVLS  (input) INTEGER
+*         The total number of merging levels in the overall divide and
+*         conquer tree.
+*
+*  CURLVL (input) INTEGER
+*         The current level in the overall merge routine,
+*         0 <= curlvl <= tlvls.
+*
+*  CURPBM (input) INTEGER
+*         The current problem in the current level in the overall
+*         merge routine (counting from upper left to lower right).
+*
+*  D      (input/output) DOUBLE PRECISION array, dimension (N)
+*         On entry, the eigenvalues of the rank-1-perturbed matrix.
+*         On exit, the eigenvalues of the repaired matrix.
+*
+*  Q      (input/output) COMPLEX*16 array, dimension (LDQ,N)
+*         On entry, the eigenvectors of the rank-1-perturbed matrix.
+*         On exit, the eigenvectors of the repaired tridiagonal matrix.
+*
+*  LDQ    (input) INTEGER
+*         The leading dimension of the array Q.  LDQ >= max(1,N).
+*
+*  RHO    (input) DOUBLE PRECISION
+*         Contains the subdiagonal element used to create the rank-1
+*         modification.
+*
+*  INDXQ  (output) INTEGER array, dimension (N)
+*         This contains the permutation which will reintegrate the
+*         subproblem just solved back into sorted order,
+*         ie. D( INDXQ( I = 1, N ) ) will be in ascending order.
+*
+*  IWORK  (workspace) INTEGER array, dimension (4*N)
+*
+*  RWORK  (workspace) DOUBLE PRECISION array,
+*                                 dimension (3*N+2*QSIZ*N)
+*
+*  WORK   (workspace) COMPLEX*16 array, dimension (QSIZ*N)
+*
+*  QSTORE (input/output) DOUBLE PRECISION array, dimension (N**2+1)
+*         Stores eigenvectors of submatrices encountered during
+*         divide and conquer, packed together. QPTR points to
+*         beginning of the submatrices.
+*
+*  QPTR   (input/output) INTEGER array, dimension (N+2)
+*         List of indices pointing to beginning of submatrices stored
+*         in QSTORE. The submatrices are numbered starting at the
+*         bottom left of the divide and conquer tree, from left to
+*         right and bottom to top.
+*
+*  PRMPTR (input) INTEGER array, dimension (N lg N)
+*         Contains a list of pointers which indicate where in PERM a
+*         level's permutation is stored.  PRMPTR(i+1) - PRMPTR(i)
+*         indicates the size of the permutation and also the size of
+*         the full, non-deflated problem.
+*
+*  PERM   (input) INTEGER array, dimension (N lg N)
+*         Contains the permutations (from deflation and sorting) to be
+*         applied to each eigenblock.
+*
+*  GIVPTR (input) INTEGER array, dimension (N lg N)
+*         Contains a list of pointers which indicate where in GIVCOL a
+*         level's Givens rotations are stored.  GIVPTR(i+1) - GIVPTR(i)
+*         indicates the number of Givens rotations.
+*
+*  GIVCOL (input) INTEGER array, dimension (2, N lg N)
+*         Each pair of numbers indicates a pair of columns to take place
+*         in a Givens rotation.
+*
+*  GIVNUM (input) DOUBLE PRECISION array, dimension (2, N lg N)
+*         Each number indicates the S value to be used in the
+*         corresponding Givens rotation.
+*
+*  INFO   (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          > 0:  if INFO = 1, an eigenvalue did not converge
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      INTEGER            COLTYP, CURR, I, IDLMDA, IND1, IND2, INDX,
+     $                   INDXC, INDXP, IQ, IW, IZ, K, N1, N2, PTR
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLAED9, DLAEDA, DLAMRG, XERBLA, ZLACRM, ZLAED8
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+*
+*     IF( ICOMPQ.LT.0 .OR. ICOMPQ.GT.1 ) THEN
+*        INFO = -1
+*     ELSE IF( N.LT.0 ) THEN
+      IF( N.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( MIN( 1, N ).GT.CUTPNT .OR. N.LT.CUTPNT ) THEN
+         INFO = -2
+      ELSE IF( QSIZ.LT.N ) THEN
+         INFO = -3
+      ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN
+         INFO = -9
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZLAED7', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     The following values are for bookkeeping purposes only.  They are
+*     integer pointers which indicate the portion of the workspace
+*     used by a particular array in DLAED2 and SLAED3.
+*
+      IZ = 1
+      IDLMDA = IZ + N
+      IW = IDLMDA + N
+      IQ = IW + N
+*
+      INDX = 1
+      INDXC = INDX + N
+      COLTYP = INDXC + N
+      INDXP = COLTYP + N
+*
+*     Form the z-vector which consists of the last row of Q_1 and the
+*     first row of Q_2.
+*
+      PTR = 1 + 2**TLVLS
+      DO 10 I = 1, CURLVL - 1
+         PTR = PTR + 2**( TLVLS-I )
+   10 CONTINUE
+      CURR = PTR + CURPBM
+      CALL DLAEDA( N, TLVLS, CURLVL, CURPBM, PRMPTR, PERM, GIVPTR,
+     $             GIVCOL, GIVNUM, QSTORE, QPTR, RWORK( IZ ),
+     $             RWORK( IZ+N ), INFO )
+*
+*     When solving the final problem, we no longer need the stored data,
+*     so we will overwrite the data from this level onto the previously
+*     used storage space.
+*
+      IF( CURLVL.EQ.TLVLS ) THEN
+         QPTR( CURR ) = 1
+         PRMPTR( CURR ) = 1
+         GIVPTR( CURR ) = 1
+      END IF
+*
+*     Sort and Deflate eigenvalues.
+*
+      CALL ZLAED8( K, N, QSIZ, Q, LDQ, D, RHO, CUTPNT, RWORK( IZ ),
+     $             RWORK( IDLMDA ), WORK, QSIZ, RWORK( IW ),
+     $             IWORK( INDXP ), IWORK( INDX ), INDXQ,
+     $             PERM( PRMPTR( CURR ) ), GIVPTR( CURR+1 ),
+     $             GIVCOL( 1, GIVPTR( CURR ) ),
+     $             GIVNUM( 1, GIVPTR( CURR ) ), INFO )
+      PRMPTR( CURR+1 ) = PRMPTR( CURR ) + N
+      GIVPTR( CURR+1 ) = GIVPTR( CURR+1 ) + GIVPTR( CURR )
+*
+*     Solve Secular Equation.
+*
+      IF( K.NE.0 ) THEN
+         CALL DLAED9( K, 1, K, N, D, RWORK( IQ ), K, RHO,
+     $                RWORK( IDLMDA ), RWORK( IW ),
+     $                QSTORE( QPTR( CURR ) ), K, INFO )
+         CALL ZLACRM( QSIZ, K, WORK, QSIZ, QSTORE( QPTR( CURR ) ), K, Q,
+     $                LDQ, RWORK( IQ ) )
+         QPTR( CURR+1 ) = QPTR( CURR ) + K**2
+         IF( INFO.NE.0 ) THEN
+            RETURN
+         END IF
+*
+*     Prepare the INDXQ sorting premutation.
+*
+         N1 = K
+         N2 = N - K
+         IND1 = 1
+         IND2 = N
+         CALL DLAMRG( N1, N2, D, 1, -1, INDXQ )
+      ELSE
+         QPTR( CURR+1 ) = QPTR( CURR )
+         DO 20 I = 1, N
+            INDXQ( I ) = I
+   20    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of ZLAED7
+*
+      END
diff --git a/costa/native/external/lapack/zlaed8.f b/costa/native/external/lapack/zlaed8.f
new file mode 100644
index 000000000..c485d5729
--- /dev/null
+++ b/costa/native/external/lapack/zlaed8.f
@@ -0,0 +1,364 @@
+      SUBROUTINE ZLAED8( K, N, QSIZ, Q, LDQ, D, RHO, CUTPNT, Z, DLAMDA,
+     $                   Q2, LDQ2, W, INDXP, INDX, INDXQ, PERM, GIVPTR,
+     $                   GIVCOL, GIVNUM, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
+*     Courant Institute, NAG Ltd., and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      INTEGER            CUTPNT, GIVPTR, INFO, K, LDQ, LDQ2, N, QSIZ
+      DOUBLE PRECISION   RHO
+*     ..
+*     .. Array Arguments ..
+      INTEGER            GIVCOL( 2, * ), INDX( * ), INDXP( * ),
+     $                   INDXQ( * ), PERM( * )
+      DOUBLE PRECISION   D( * ), DLAMDA( * ), GIVNUM( 2, * ), W( * ),
+     $                   Z( * )
+      COMPLEX*16         Q( LDQ, * ), Q2( LDQ2, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZLAED8 merges the two sets of eigenvalues together into a single
+*  sorted set.  Then it tries to deflate the size of the problem.
+*  There are two ways in which deflation can occur:  when two or more
+*  eigenvalues are close together or if there is a tiny element in the
+*  Z vector.  For each such occurrence the order of the related secular
+*  equation problem is reduced by one.
+*
+*  Arguments
+*  =========
+*
+*  K      (output) INTEGER
+*         Contains the number of non-deflated eigenvalues.
+*         This is the order of the related secular equation.
+*
+*  N      (input) INTEGER
+*         The dimension of the symmetric tridiagonal matrix.  N >= 0.
+*
+*  QSIZ   (input) INTEGER
+*         The dimension of the unitary matrix used to reduce
+*         the dense or band matrix to tridiagonal form.
+*         QSIZ >= N if ICOMPQ = 1.
+*
+*  Q      (input/output) COMPLEX*16 array, dimension (LDQ,N)
+*         On entry, Q contains the eigenvectors of the partially solved
+*         system which has been previously updated in matrix
+*         multiplies with other partially solved eigensystems.
+*         On exit, Q contains the trailing (N-K) updated eigenvectors
+*         (those which were deflated) in its last N-K columns.
+*
+*  LDQ    (input) INTEGER
+*         The leading dimension of the array Q.  LDQ >= max( 1, N ).
+*
+*  D      (input/output) DOUBLE PRECISION array, dimension (N)
+*         On entry, D contains the eigenvalues of the two submatrices to
+*         be combined.  On exit, D contains the trailing (N-K) updated
+*         eigenvalues (those which were deflated) sorted into increasing
+*         order.
+*
+*  RHO    (input/output) DOUBLE PRECISION
+*         Contains the off diagonal element associated with the rank-1
+*         cut which originally split the two submatrices which are now
+*         being recombined. RHO is modified during the computation to
+*         the value required by DLAED3.
+*
+*  CUTPNT (input) INTEGER
+*         Contains the location of the last eigenvalue in the leading
+*         sub-matrix.  MIN(1,N) <= CUTPNT <= N.
+*
+*  Z      (input) DOUBLE PRECISION array, dimension (N)
+*         On input this vector contains the updating vector (the last
+*         row of the first sub-eigenvector matrix and the first row of
+*         the second sub-eigenvector matrix).  The contents of Z are
+*         destroyed during the updating process.
+*
+*  DLAMDA (output) DOUBLE PRECISION array, dimension (N)
+*         Contains a copy of the first K eigenvalues which will be used
+*         by DLAED3 to form the secular equation.
+*
+*  Q2     (output) COMPLEX*16 array, dimension (LDQ2,N)
+*         If ICOMPQ = 0, Q2 is not referenced.  Otherwise,
+*         Contains a copy of the first K eigenvectors which will be used
+*         by DLAED7 in a matrix multiply (DGEMM) to update the new
+*         eigenvectors.
+*
+*  LDQ2   (input) INTEGER
+*         The leading dimension of the array Q2.  LDQ2 >= max( 1, N ).
+*
+*  W      (output) DOUBLE PRECISION array, dimension (N)
+*         This will hold the first k values of the final
+*         deflation-altered z-vector and will be passed to DLAED3.
+*
+*  INDXP  (workspace) INTEGER array, dimension (N)
+*         This will contain the permutation used to place deflated
+*         values of D at the end of the array. On output INDXP(1:K)
+*         points to the nondeflated D-values and INDXP(K+1:N)
+*         points to the deflated eigenvalues.
+*
+*  INDX   (workspace) INTEGER array, dimension (N)
+*         This will contain the permutation used to sort the contents of
+*         D into ascending order.
+*
+*  INDXQ  (input) INTEGER array, dimension (N)
+*         This contains the permutation which separately sorts the two
+*         sub-problems in D into ascending order.  Note that elements in
+*         the second half of this permutation must first have CUTPNT
+*         added to their values in order to be accurate.
+*
+*  PERM   (output) INTEGER array, dimension (N)
+*         Contains the permutations (from deflation and sorting) to be
+*         applied to each eigenblock.
+*
+*  GIVPTR (output) INTEGER
+*         Contains the number of Givens rotations which took place in
+*         this subproblem.
+*
+*  GIVCOL (output) INTEGER array, dimension (2, N)
+*         Each pair of numbers indicates a pair of columns to take place
+*         in a Givens rotation.
+*
+*  GIVNUM (output) DOUBLE PRECISION array, dimension (2, N)
+*         Each number indicates the S value to be used in the
+*         corresponding Givens rotation.
+*
+*  INFO   (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   MONE, ZERO, ONE, TWO, EIGHT
+      PARAMETER          ( MONE = -1.0D0, ZERO = 0.0D0, ONE = 1.0D0,
+     $                   TWO = 2.0D0, EIGHT = 8.0D0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, IMAX, J, JLAM, JMAX, JP, K2, N1, N1P1, N2
+      DOUBLE PRECISION   C, EPS, S, T, TAU, TOL
+*     ..
+*     .. External Functions ..
+      INTEGER            IDAMAX
+      DOUBLE PRECISION   DLAMCH, DLAPY2
+      EXTERNAL           IDAMAX, DLAMCH, DLAPY2
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DCOPY, DLAMRG, DSCAL, XERBLA, ZCOPY, ZDROT,
+     $                   ZLACPY
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+*
+      IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( QSIZ.LT.N ) THEN
+         INFO = -3
+      ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( CUTPNT.LT.MIN( 1, N ) .OR. CUTPNT.GT.N ) THEN
+         INFO = -8
+      ELSE IF( LDQ2.LT.MAX( 1, N ) ) THEN
+         INFO = -12
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZLAED8', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      N1 = CUTPNT
+      N2 = N - N1
+      N1P1 = N1 + 1
+*
+      IF( RHO.LT.ZERO ) THEN
+         CALL DSCAL( N2, MONE, Z( N1P1 ), 1 )
+      END IF
+*
+*     Normalize z so that norm(z) = 1
+*
+      T = ONE / SQRT( TWO )
+      DO 10 J = 1, N
+         INDX( J ) = J
+   10 CONTINUE
+      CALL DSCAL( N, T, Z, 1 )
+      RHO = ABS( TWO*RHO )
+*
+*     Sort the eigenvalues into increasing order
+*
+      DO 20 I = CUTPNT + 1, N
+         INDXQ( I ) = INDXQ( I ) + CUTPNT
+   20 CONTINUE
+      DO 30 I = 1, N
+         DLAMDA( I ) = D( INDXQ( I ) )
+         W( I ) = Z( INDXQ( I ) )
+   30 CONTINUE
+      I = 1
+      J = CUTPNT + 1
+      CALL DLAMRG( N1, N2, DLAMDA, 1, 1, INDX )
+      DO 40 I = 1, N
+         D( I ) = DLAMDA( INDX( I ) )
+         Z( I ) = W( INDX( I ) )
+   40 CONTINUE
+*
+*     Calculate the allowable deflation tolerance
+*
+      IMAX = IDAMAX( N, Z, 1 )
+      JMAX = IDAMAX( N, D, 1 )
+      EPS = DLAMCH( 'Epsilon' )
+      TOL = EIGHT*EPS*ABS( D( JMAX ) )
+*
+*     If the rank-1 modifier is small enough, no more needs to be done
+*     -- except to reorganize Q so that its columns correspond with the
+*     elements in D.
+*
+      IF( RHO*ABS( Z( IMAX ) ).LE.TOL ) THEN
+         K = 0
+         DO 50 J = 1, N
+            PERM( J ) = INDXQ( INDX( J ) )
+            CALL ZCOPY( QSIZ, Q( 1, PERM( J ) ), 1, Q2( 1, J ), 1 )
+   50    CONTINUE
+         CALL ZLACPY( 'A', QSIZ, N, Q2( 1, 1 ), LDQ2, Q( 1, 1 ), LDQ )
+         RETURN
+      END IF
+*
+*     If there are multiple eigenvalues then the problem deflates.  Here
+*     the number of equal eigenvalues are found.  As each equal
+*     eigenvalue is found, an elementary reflector is computed to rotate
+*     the corresponding eigensubspace so that the corresponding
+*     components of Z are zero in this new basis.
+*
+      K = 0
+      GIVPTR = 0
+      K2 = N + 1
+      DO 60 J = 1, N
+         IF( RHO*ABS( Z( J ) ).LE.TOL ) THEN
+*
+*           Deflate due to small z component.
+*
+            K2 = K2 - 1
+            INDXP( K2 ) = J
+            IF( J.EQ.N )
+     $         GO TO 100
+         ELSE
+            JLAM = J
+            GO TO 70
+         END IF
+   60 CONTINUE
+   70 CONTINUE
+      J = J + 1
+      IF( J.GT.N )
+     $   GO TO 90
+      IF( RHO*ABS( Z( J ) ).LE.TOL ) THEN
+*
+*        Deflate due to small z component.
+*
+         K2 = K2 - 1
+         INDXP( K2 ) = J
+      ELSE
+*
+*        Check if eigenvalues are close enough to allow deflation.
+*
+         S = Z( JLAM )
+         C = Z( J )
+*
+*        Find sqrt(a**2+b**2) without overflow or
+*        destructive underflow.
+*
+         TAU = DLAPY2( C, S )
+         T = D( J ) - D( JLAM )
+         C = C / TAU
+         S = -S / TAU
+         IF( ABS( T*C*S ).LE.TOL ) THEN
+*
+*           Deflation is possible.
+*
+            Z( J ) = TAU
+            Z( JLAM ) = ZERO
+*
+*           Record the appropriate Givens rotation
+*
+            GIVPTR = GIVPTR + 1
+            GIVCOL( 1, GIVPTR ) = INDXQ( INDX( JLAM ) )
+            GIVCOL( 2, GIVPTR ) = INDXQ( INDX( J ) )
+            GIVNUM( 1, GIVPTR ) = C
+            GIVNUM( 2, GIVPTR ) = S
+            CALL ZDROT( QSIZ, Q( 1, INDXQ( INDX( JLAM ) ) ), 1,
+     $                  Q( 1, INDXQ( INDX( J ) ) ), 1, C, S )
+            T = D( JLAM )*C*C + D( J )*S*S
+            D( J ) = D( JLAM )*S*S + D( J )*C*C
+            D( JLAM ) = T
+            K2 = K2 - 1
+            I = 1
+   80       CONTINUE
+            IF( K2+I.LE.N ) THEN
+               IF( D( JLAM ).LT.D( INDXP( K2+I ) ) ) THEN
+                  INDXP( K2+I-1 ) = INDXP( K2+I )
+                  INDXP( K2+I ) = JLAM
+                  I = I + 1
+                  GO TO 80
+               ELSE
+                  INDXP( K2+I-1 ) = JLAM
+               END IF
+            ELSE
+               INDXP( K2+I-1 ) = JLAM
+            END IF
+            JLAM = J
+         ELSE
+            K = K + 1
+            W( K ) = Z( JLAM )
+            DLAMDA( K ) = D( JLAM )
+            INDXP( K ) = JLAM
+            JLAM = J
+         END IF
+      END IF
+      GO TO 70
+   90 CONTINUE
+*
+*     Record the last eigenvalue.
+*
+      K = K + 1
+      W( K ) = Z( JLAM )
+      DLAMDA( K ) = D( JLAM )
+      INDXP( K ) = JLAM
+*
+  100 CONTINUE
+*
+*     Sort the eigenvalues and corresponding eigenvectors into DLAMDA
+*     and Q2 respectively.  The eigenvalues/vectors which were not
+*     deflated go into the first K slots of DLAMDA and Q2 respectively,
+*     while those which were deflated go into the last N - K slots.
+*
+      DO 110 J = 1, N
+         JP = INDXP( J )
+         DLAMDA( J ) = D( JP )
+         PERM( J ) = INDXQ( INDX( JP ) )
+         CALL ZCOPY( QSIZ, Q( 1, PERM( J ) ), 1, Q2( 1, J ), 1 )
+  110 CONTINUE
+*
+*     The deflated eigenvalues and their corresponding vectors go back
+*     into the last N - K slots of D and Q respectively.
+*
+      IF( K.LT.N ) THEN
+         CALL DCOPY( N-K, DLAMDA( K+1 ), 1, D( K+1 ), 1 )
+         CALL ZLACPY( 'A', QSIZ, N-K, Q2( 1, K+1 ), LDQ2, Q( 1, K+1 ),
+     $                LDQ )
+      END IF
+*
+      RETURN
+*
+*     End of ZLAED8
+*
+      END
diff --git a/costa/native/external/lapack/zlaein.f b/costa/native/external/lapack/zlaein.f
new file mode 100644
index 000000000..577ec1186
--- /dev/null
+++ b/costa/native/external/lapack/zlaein.f
@@ -0,0 +1,264 @@
+      SUBROUTINE ZLAEIN( RIGHTV, NOINIT, N, H, LDH, W, V, B, LDB, RWORK,
+     $                   EPS3, SMLNUM, INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      LOGICAL            NOINIT, RIGHTV
+      INTEGER            INFO, LDB, LDH, N
+      DOUBLE PRECISION   EPS3, SMLNUM
+      COMPLEX*16         W
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   RWORK( * )
+      COMPLEX*16         B( LDB, * ), H( LDH, * ), V( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZLAEIN uses inverse iteration to find a right or left eigenvector
+*  corresponding to the eigenvalue W of a complex upper Hessenberg
+*  matrix H.
+*
+*  Arguments
+*  =========
+*
+*  RIGHTV   (input) LOGICAL
+*          = .TRUE. : compute right eigenvector;
+*          = .FALSE.: compute left eigenvector.
+*
+*  NOINIT   (input) LOGICAL
+*          = .TRUE. : no initial vector supplied in V
+*          = .FALSE.: initial vector supplied in V.
+*
+*  N       (input) INTEGER
+*          The order of the matrix H.  N >= 0.
+*
+*  H       (input) COMPLEX*16 array, dimension (LDH,N)
+*          The upper Hessenberg matrix H.
+*
+*  LDH     (input) INTEGER
+*          The leading dimension of the array H.  LDH >= max(1,N).
+*
+*  W       (input) COMPLEX*16
+*          The eigenvalue of H whose corresponding right or left
+*          eigenvector is to be computed.
+*
+*  V       (input/output) COMPLEX*16 array, dimension (N)
+*          On entry, if NOINIT = .FALSE., V must contain a starting
+*          vector for inverse iteration; otherwise V need not be set.
+*          On exit, V contains the computed eigenvector, normalized so
+*          that the component of largest magnitude has magnitude 1; here
+*          the magnitude of a complex number (x,y) is taken to be
+*          |x| + |y|.
+*
+*  B       (workspace) COMPLEX*16 array, dimension (LDB,N)
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  RWORK   (workspace) DOUBLE PRECISION array, dimension (N)
+*
+*  EPS3    (input) DOUBLE PRECISION
+*          A small machine-dependent value which is used to perturb
+*          close eigenvalues, and to replace zero pivots.
+*
+*  SMLNUM  (input) DOUBLE PRECISION
+*          A machine-dependent value close to the underflow threshold.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          = 1:  inverse iteration did not converge; V is set to the
+*                last iterate.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, TENTH
+      PARAMETER          ( ONE = 1.0D+0, TENTH = 1.0D-1 )
+      COMPLEX*16         ZERO
+      PARAMETER          ( ZERO = ( 0.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      CHARACTER          NORMIN, TRANS
+      INTEGER            I, IERR, ITS, J
+      DOUBLE PRECISION   GROWTO, NRMSML, ROOTN, RTEMP, SCALE, VNORM
+      COMPLEX*16         CDUM, EI, EJ, TEMP, X
+*     ..
+*     .. External Functions ..
+      INTEGER            IZAMAX
+      DOUBLE PRECISION   DZASUM, DZNRM2
+      COMPLEX*16         ZLADIV
+      EXTERNAL           IZAMAX, DZASUM, DZNRM2, ZLADIV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           ZDSCAL, ZLATRS
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, DIMAG, MAX, SQRT
+*     ..
+*     .. Statement Functions ..
+      DOUBLE PRECISION   CABS1
+*     ..
+*     .. Statement Function definitions ..
+      CABS1( CDUM ) = ABS( DBLE( CDUM ) ) + ABS( DIMAG( CDUM ) )
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+*
+*     GROWTO is the threshold used in the acceptance test for an
+*     eigenvector.
+*
+      ROOTN = SQRT( DBLE( N ) )
+      GROWTO = TENTH / ROOTN
+      NRMSML = MAX( ONE, EPS3*ROOTN )*SMLNUM
+*
+*     Form B = H - W*I (except that the subdiagonal elements are not
+*     stored).
+*
+      DO 20 J = 1, N
+         DO 10 I = 1, J - 1
+            B( I, J ) = H( I, J )
+   10    CONTINUE
+         B( J, J ) = H( J, J ) - W
+   20 CONTINUE
+*
+      IF( NOINIT ) THEN
+*
+*        Initialize V.
+*
+         DO 30 I = 1, N
+            V( I ) = EPS3
+   30    CONTINUE
+      ELSE
+*
+*        Scale supplied initial vector.
+*
+         VNORM = DZNRM2( N, V, 1 )
+         CALL ZDSCAL( N, ( EPS3*ROOTN ) / MAX( VNORM, NRMSML ), V, 1 )
+      END IF
+*
+      IF( RIGHTV ) THEN
+*
+*        LU decomposition with partial pivoting of B, replacing zero
+*        pivots by EPS3.
+*
+         DO 60 I = 1, N - 1
+            EI = H( I+1, I )
+            IF( CABS1( B( I, I ) ).LT.CABS1( EI ) ) THEN
+*
+*              Interchange rows and eliminate.
+*
+               X = ZLADIV( B( I, I ), EI )
+               B( I, I ) = EI
+               DO 40 J = I + 1, N
+                  TEMP = B( I+1, J )
+                  B( I+1, J ) = B( I, J ) - X*TEMP
+                  B( I, J ) = TEMP
+   40          CONTINUE
+            ELSE
+*
+*              Eliminate without interchange.
+*
+               IF( B( I, I ).EQ.ZERO )
+     $            B( I, I ) = EPS3
+               X = ZLADIV( EI, B( I, I ) )
+               IF( X.NE.ZERO ) THEN
+                  DO 50 J = I + 1, N
+                     B( I+1, J ) = B( I+1, J ) - X*B( I, J )
+   50             CONTINUE
+               END IF
+            END IF
+   60    CONTINUE
+         IF( B( N, N ).EQ.ZERO )
+     $      B( N, N ) = EPS3
+*
+         TRANS = 'N'
+*
+      ELSE
+*
+*        UL decomposition with partial pivoting of B, replacing zero
+*        pivots by EPS3.
+*
+         DO 90 J = N, 2, -1
+            EJ = H( J, J-1 )
+            IF( CABS1( B( J, J ) ).LT.CABS1( EJ ) ) THEN
+*
+*              Interchange columns and eliminate.
+*
+               X = ZLADIV( B( J, J ), EJ )
+               B( J, J ) = EJ
+               DO 70 I = 1, J - 1
+                  TEMP = B( I, J-1 )
+                  B( I, J-1 ) = B( I, J ) - X*TEMP
+                  B( I, J ) = TEMP
+   70          CONTINUE
+            ELSE
+*
+*              Eliminate without interchange.
+*
+               IF( B( J, J ).EQ.ZERO )
+     $            B( J, J ) = EPS3
+               X = ZLADIV( EJ, B( J, J ) )
+               IF( X.NE.ZERO ) THEN
+                  DO 80 I = 1, J - 1
+                     B( I, J-1 ) = B( I, J-1 ) - X*B( I, J )
+   80             CONTINUE
+               END IF
+            END IF
+   90    CONTINUE
+         IF( B( 1, 1 ).EQ.ZERO )
+     $      B( 1, 1 ) = EPS3
+*
+         TRANS = 'C'
+*
+      END IF
+*
+      NORMIN = 'N'
+      DO 110 ITS = 1, N
+*
+*        Solve U*x = scale*v for a right eigenvector
+*          or U'*x = scale*v for a left eigenvector,
+*        overwriting x on v.
+*
+         CALL ZLATRS( 'Upper', TRANS, 'Nonunit', NORMIN, N, B, LDB, V,
+     $                SCALE, RWORK, IERR )
+         NORMIN = 'Y'
+*
+*        Test for sufficient growth in the norm of v.
+*
+         VNORM = DZASUM( N, V, 1 )
+         IF( VNORM.GE.GROWTO*SCALE )
+     $      GO TO 120
+*
+*        Choose new orthogonal starting vector and try again.
+*
+         RTEMP = EPS3 / ( ROOTN+ONE )
+         V( 1 ) = EPS3
+         DO 100 I = 2, N
+            V( I ) = RTEMP
+  100    CONTINUE
+         V( N-ITS+1 ) = V( N-ITS+1 ) - EPS3*ROOTN
+  110 CONTINUE
+*
+*     Failure to find eigenvector in N iterations.
+*
+      INFO = 1
+*
+  120 CONTINUE
+*
+*     Normalize eigenvector.
+*
+      I = IZAMAX( N, V, 1 )
+      CALL ZDSCAL( N, ONE / CABS1( V( I ) ), V, 1 )
+*
+      RETURN
+*
+*     End of ZLAEIN
+*
+      END
diff --git a/costa/native/external/lapack/zlaesy.f b/costa/native/external/lapack/zlaesy.f
new file mode 100644
index 000000000..d947e150d
--- /dev/null
+++ b/costa/native/external/lapack/zlaesy.f
@@ -0,0 +1,153 @@
+      SUBROUTINE ZLAESY( A, B, C, RT1, RT2, EVSCAL, CS1, SN1 )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      COMPLEX*16         A, B, C, CS1, EVSCAL, RT1, RT2, SN1
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZLAESY computes the eigendecomposition of a 2-by-2 symmetric matrix
+*     ( ( A, B );( B, C ) )
+*  provided the norm of the matrix of eigenvectors is larger than
+*  some threshold value.
+*
+*  RT1 is the eigenvalue of larger absolute value, and RT2 of
+*  smaller absolute value.  If the eigenvectors are computed, then
+*  on return ( CS1, SN1 ) is the unit eigenvector for RT1, hence
+*
+*  [  CS1     SN1   ] . [ A  B ] . [ CS1    -SN1   ] = [ RT1  0  ]
+*  [ -SN1     CS1   ]   [ B  C ]   [ SN1     CS1   ]   [  0  RT2 ]
+*
+*  Arguments
+*  =========
+*
+*  A       (input) COMPLEX*16
+*          The ( 1, 1 ) element of input matrix.
+*
+*  B       (input) COMPLEX*16
+*          The ( 1, 2 ) element of input matrix.  The ( 2, 1 ) element
+*          is also given by B, since the 2-by-2 matrix is symmetric.
+*
+*  C       (input) COMPLEX*16
+*          The ( 2, 2 ) element of input matrix.
+*
+*  RT1     (output) COMPLEX*16
+*          The eigenvalue of larger modulus.
+*
+*  RT2     (output) COMPLEX*16
+*          The eigenvalue of smaller modulus.
+*
+*  EVSCAL  (output) COMPLEX*16
+*          The complex value by which the eigenvector matrix was scaled
+*          to make it orthonormal.  If EVSCAL is zero, the eigenvectors
+*          were not computed.  This means one of two things:  the 2-by-2
+*          matrix could not be diagonalized, or the norm of the matrix
+*          of eigenvectors before scaling was larger than the threshold
+*          value THRESH (set below).
+*
+*  CS1     (output) COMPLEX*16
+*  SN1     (output) COMPLEX*16
+*          If EVSCAL .NE. 0,  ( CS1, SN1 ) is the unit right eigenvector
+*          for RT1.
+*
+* =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO
+      PARAMETER          ( ZERO = 0.0D0 )
+      DOUBLE PRECISION   ONE
+      PARAMETER          ( ONE = 1.0D0 )
+      COMPLEX*16         CONE
+      PARAMETER          ( CONE = ( 1.0D0, 0.0D0 ) )
+      DOUBLE PRECISION   HALF
+      PARAMETER          ( HALF = 0.5D0 )
+      DOUBLE PRECISION   THRESH
+      PARAMETER          ( THRESH = 0.1D0 )
+*     ..
+*     .. Local Scalars ..
+      DOUBLE PRECISION   BABS, EVNORM, TABS, Z
+      COMPLEX*16         S, T, TMP
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*
+*     Special case:  The matrix is actually diagonal.
+*     To avoid divide by zero later, we treat this case separately.
+*
+      IF( ABS( B ).EQ.ZERO ) THEN
+         RT1 = A
+         RT2 = C
+         IF( ABS( RT1 ).LT.ABS( RT2 ) ) THEN
+            TMP = RT1
+            RT1 = RT2
+            RT2 = TMP
+            CS1 = ZERO
+            SN1 = ONE
+         ELSE
+            CS1 = ONE
+            SN1 = ZERO
+         END IF
+      ELSE
+*
+*        Compute the eigenvalues and eigenvectors.
+*        The characteristic equation is
+*           lambda **2 - (A+C) lambda + (A*C - B*B)
+*        and we solve it using the quadratic formula.
+*
+         S = ( A+C )*HALF
+         T = ( A-C )*HALF
+*
+*        Take the square root carefully to avoid over/under flow.
+*
+         BABS = ABS( B )
+         TABS = ABS( T )
+         Z = MAX( BABS, TABS )
+         IF( Z.GT.ZERO )
+     $      T = Z*SQRT( ( T / Z )**2+( B / Z )**2 )
+*
+*        Compute the two eigenvalues.  RT1 and RT2 are exchanged
+*        if necessary so that RT1 will have the greater magnitude.
+*
+         RT1 = S + T
+         RT2 = S - T
+         IF( ABS( RT1 ).LT.ABS( RT2 ) ) THEN
+            TMP = RT1
+            RT1 = RT2
+            RT2 = TMP
+         END IF
+*
+*        Choose CS1 = 1 and SN1 to satisfy the first equation, then
+*        scale the components of this eigenvector so that the matrix
+*        of eigenvectors X satisfies  X * X' = I .  (No scaling is
+*        done if the norm of the eigenvalue matrix is less than THRESH.)
+*
+         SN1 = ( RT1-A ) / B
+         TABS = ABS( SN1 )
+         IF( TABS.GT.ONE ) THEN
+            T = TABS*SQRT( ( ONE / TABS )**2+( SN1 / TABS )**2 )
+         ELSE
+            T = SQRT( CONE+SN1*SN1 )
+         END IF
+         EVNORM = ABS( T )
+         IF( EVNORM.GE.THRESH ) THEN
+            EVSCAL = CONE / T
+            CS1 = EVSCAL
+            SN1 = SN1*EVSCAL
+         ELSE
+            EVSCAL = ZERO
+         END IF
+      END IF
+      RETURN
+*
+*     End of ZLAESY
+*
+      END
diff --git a/costa/native/external/lapack/zlaev2.f b/costa/native/external/lapack/zlaev2.f
new file mode 100644
index 000000000..372566512
--- /dev/null
+++ b/costa/native/external/lapack/zlaev2.f
@@ -0,0 +1,96 @@
+      SUBROUTINE ZLAEV2( A, B, C, RT1, RT2, CS1, SN1 )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      DOUBLE PRECISION   CS1, RT1, RT2
+      COMPLEX*16         A, B, C, SN1
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZLAEV2 computes the eigendecomposition of a 2-by-2 Hermitian matrix
+*     [  A         B  ]
+*     [  CONJG(B)  C  ].
+*  On return, RT1 is the eigenvalue of larger absolute value, RT2 is the
+*  eigenvalue of smaller absolute value, and (CS1,SN1) is the unit right
+*  eigenvector for RT1, giving the decomposition
+*
+*  [ CS1  CONJG(SN1) ] [    A     B ] [ CS1 -CONJG(SN1) ] = [ RT1  0  ]
+*  [-SN1     CS1     ] [ CONJG(B) C ] [ SN1     CS1     ]   [  0  RT2 ].
+*
+*  Arguments
+*  =========
+*
+*  A      (input) COMPLEX*16
+*         The (1,1) element of the 2-by-2 matrix.
+*
+*  B      (input) COMPLEX*16
+*         The (1,2) element and the conjugate of the (2,1) element of
+*         the 2-by-2 matrix.
+*
+*  C      (input) COMPLEX*16
+*         The (2,2) element of the 2-by-2 matrix.
+*
+*  RT1    (output) DOUBLE PRECISION
+*         The eigenvalue of larger absolute value.
+*
+*  RT2    (output) DOUBLE PRECISION
+*         The eigenvalue of smaller absolute value.
+*
+*  CS1    (output) DOUBLE PRECISION
+*  SN1    (output) COMPLEX*16
+*         The vector (CS1, SN1) is a unit right eigenvector for RT1.
+*
+*  Further Details
+*  ===============
+*
+*  RT1 is accurate to a few ulps barring over/underflow.
+*
+*  RT2 may be inaccurate if there is massive cancellation in the
+*  determinant A*C-B*B; higher precision or correctly rounded or
+*  correctly truncated arithmetic would be needed to compute RT2
+*  accurately in all cases.
+*
+*  CS1 and SN1 are accurate to a few ulps barring over/underflow.
+*
+*  Overflow is possible only if RT1 is within a factor of 5 of overflow.
+*  Underflow is harmless if the input data is 0 or exceeds
+*     underflow_threshold / macheps.
+*
+* =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO
+      PARAMETER          ( ZERO = 0.0D0 )
+      DOUBLE PRECISION   ONE
+      PARAMETER          ( ONE = 1.0D0 )
+*     ..
+*     .. Local Scalars ..
+      DOUBLE PRECISION   T
+      COMPLEX*16         W
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLAEV2
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, DCONJG
+*     ..
+*     .. Executable Statements ..
+*
+      IF( ABS( B ).EQ.ZERO ) THEN
+         W = ONE
+      ELSE
+         W = DCONJG( B ) / ABS( B )
+      END IF
+      CALL DLAEV2( DBLE( A ), ABS( B ), DBLE( C ), RT1, RT2, CS1, T )
+      SN1 = W*T
+      RETURN
+*
+*     End of ZLAEV2
+*
+      END
diff --git a/costa/native/external/lapack/zlags2.f b/costa/native/external/lapack/zlags2.f
new file mode 100644
index 000000000..ba3f96c0d
--- /dev/null
+++ b/costa/native/external/lapack/zlags2.f
@@ -0,0 +1,309 @@
+      SUBROUTINE ZLAGS2( UPPER, A1, A2, A3, B1, B2, B3, CSU, SNU, CSV,
+     $                   SNV, CSQ, SNQ )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      LOGICAL            UPPER
+      DOUBLE PRECISION   A1, A3, B1, B3, CSQ, CSU, CSV
+      COMPLEX*16         A2, B2, SNQ, SNU, SNV
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZLAGS2 computes 2-by-2 unitary matrices U, V and Q, such
+*  that if ( UPPER ) then
+*
+*            U'*A*Q = U'*( A1 A2 )*Q = ( x  0  )
+*                        ( 0  A3 )     ( x  x  )
+*  and
+*            V'*B*Q = V'*( B1 B2 )*Q = ( x  0  )
+*                        ( 0  B3 )     ( x  x  )
+*
+*  or if ( .NOT.UPPER ) then
+*
+*            U'*A*Q = U'*( A1 0  )*Q = ( x  x  )
+*                        ( A2 A3 )     ( 0  x  )
+*  and
+*            V'*B*Q = V'*( B1 0  )*Q = ( x  x  )
+*                        ( B2 B3 )     ( 0  x  )
+*  where
+*
+*    U = (     CSU      SNU ), V = (     CSV     SNV ),
+*        ( -CONJG(SNU)  CSU )      ( -CONJG(SNV) CSV )
+*
+*    Q = (     CSQ      SNQ )
+*        ( -CONJG(SNQ)  CSQ )
+*
+*  Z' denotes the conjugate transpose of Z.
+*
+*  The rows of the transformed A and B are parallel. Moreover, if the
+*  input 2-by-2 matrix A is not zero, then the transformed (1,1) entry
+*  of A is not zero. If the input matrices A and B are both not zero,
+*  then the transformed (2,2) element of B is not zero, except when the
+*  first rows of input A and B are parallel and the second rows are
+*  zero.
+*
+*  Arguments
+*  =========
+*
+*  UPPER   (input) LOGICAL
+*          = .TRUE.: the input matrices A and B are upper triangular.
+*          = .FALSE.: the input matrices A and B are lower triangular.
+*
+*  A1      (input) DOUBLE PRECISION
+*  A2      (input) COMPLEX*16
+*  A3      (input) DOUBLE PRECISION
+*          On entry, A1, A2 and A3 are elements of the input 2-by-2
+*          upper (lower) triangular matrix A.
+*
+*  B1      (input) DOUBLE PRECISION
+*  B2      (input) COMPLEX*16
+*  B3      (input) DOUBLE PRECISION
+*          On entry, B1, B2 and B3 are elements of the input 2-by-2
+*          upper (lower) triangular matrix B.
+*
+*  CSU     (output) DOUBLE PRECISION
+*  SNU     (output) COMPLEX*16
+*          The desired unitary matrix U.
+*
+*  CSV     (output) DOUBLE PRECISION
+*  SNV     (output) COMPLEX*16
+*          The desired unitary matrix V.
+*
+*  CSQ     (output) DOUBLE PRECISION
+*  SNQ     (output) COMPLEX*16
+*          The desired unitary matrix Q.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      DOUBLE PRECISION   A, AUA11, AUA12, AUA21, AUA22, AVB12, AVB11, 
+     $                   AVB21, AVB22, CSL, CSR, D, FB, FC, S1, S2, 
+     $                   SNL, SNR, UA11R, UA22R, VB11R, VB22R
+      COMPLEX*16         B, C, D1, R, T, UA11, UA12, UA21, UA22, VB11,
+     $                   VB12, VB21, VB22
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLASV2, ZLARTG
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, DCMPLX, DCONJG, DIMAG
+*     ..
+*     .. Statement Functions ..
+      DOUBLE PRECISION   ABS1
+*     ..
+*     .. Statement Function definitions ..
+      ABS1( T ) = ABS( DBLE( T ) ) + ABS( DIMAG( T ) )
+*     ..
+*     .. Executable Statements ..
+*
+      IF( UPPER ) THEN
+*
+*        Input matrices A and B are upper triangular matrices
+*
+*        Form matrix C = A*adj(B) = ( a b )
+*                                   ( 0 d )
+*
+         A = A1*B3
+         D = A3*B1
+         B = A2*B1 - A1*B2
+         FB = ABS( B )
+*
+*        Transform complex 2-by-2 matrix C to real matrix by unitary
+*        diagonal matrix diag(1,D1).
+*
+         D1 = ONE
+         IF( FB.NE.ZERO )
+     $      D1 = B / FB
+*
+*        The SVD of real 2 by 2 triangular C
+*
+*         ( CSL -SNL )*( A B )*(  CSR  SNR ) = ( R 0 )
+*         ( SNL  CSL ) ( 0 D ) ( -SNR  CSR )   ( 0 T )
+*
+         CALL DLASV2( A, FB, D, S1, S2, SNR, CSR, SNL, CSL )
+*
+         IF( ABS( CSL ).GE.ABS( SNL ) .OR. ABS( CSR ).GE.ABS( SNR ) )
+     $        THEN
+*
+*           Compute the (1,1) and (1,2) elements of U'*A and V'*B,
+*           and (1,2) element of |U|'*|A| and |V|'*|B|.
+*
+            UA11R = CSL*A1
+            UA12 = CSL*A2 + D1*SNL*A3
+*
+            VB11R = CSR*B1
+            VB12 = CSR*B2 + D1*SNR*B3
+*
+            AUA12 = ABS( CSL )*ABS1( A2 ) + ABS( SNL )*ABS( A3 )
+            AVB12 = ABS( CSR )*ABS1( B2 ) + ABS( SNR )*ABS( B3 )
+*
+*           zero (1,2) elements of U'*A and V'*B
+*
+            IF( ( ABS( UA11R )+ABS1( UA12 ) ).EQ.ZERO ) THEN
+               CALL ZLARTG( -DCMPLX( VB11R ), DCONJG( VB12 ), CSQ, SNQ,
+     $                      R )
+            ELSE IF( ( ABS( VB11R )+ABS1( VB12 ) ).EQ.ZERO ) THEN
+               CALL ZLARTG( -DCMPLX( UA11R ), DCONJG( UA12 ), CSQ, SNQ,
+     $                      R )
+            ELSE IF( AUA12 / ( ABS( UA11R )+ABS1( UA12 ) ).LE.AVB12 /
+     $               ( ABS( VB11R )+ABS1( VB12 ) ) ) THEN
+               CALL ZLARTG( -DCMPLX( UA11R ), DCONJG( UA12 ), CSQ, SNQ,
+     $                      R )
+            ELSE
+               CALL ZLARTG( -DCMPLX( VB11R ), DCONJG( VB12 ), CSQ, SNQ,
+     $                      R )
+            END IF
+*
+            CSU = CSL
+            SNU = -D1*SNL
+            CSV = CSR
+            SNV = -D1*SNR
+*
+         ELSE
+*
+*           Compute the (2,1) and (2,2) elements of U'*A and V'*B,
+*           and (2,2) element of |U|'*|A| and |V|'*|B|.
+*
+            UA21 = -DCONJG( D1 )*SNL*A1
+            UA22 = -DCONJG( D1 )*SNL*A2 + CSL*A3
+*
+            VB21 = -DCONJG( D1 )*SNR*B1
+            VB22 = -DCONJG( D1 )*SNR*B2 + CSR*B3
+*
+            AUA22 = ABS( SNL )*ABS1( A2 ) + ABS( CSL )*ABS( A3 )
+            AVB22 = ABS( SNR )*ABS1( B2 ) + ABS( CSR )*ABS( B3 )
+*
+*           zero (2,2) elements of U'*A and V'*B, and then swap.
+*
+            IF( ( ABS1( UA21 )+ABS1( UA22 ) ).EQ.ZERO ) THEN
+               CALL ZLARTG( -DCONJG( VB21 ), DCONJG( VB22 ), CSQ, SNQ,
+     $                      R )
+            ELSE IF( ( ABS1( VB21 )+ABS( VB22 ) ).EQ.ZERO ) THEN
+               CALL ZLARTG( -DCONJG( UA21 ), DCONJG( UA22 ), CSQ, SNQ,
+     $                      R )
+            ELSE IF( AUA22 / ( ABS1( UA21 )+ABS1( UA22 ) ).LE.AVB22 /
+     $               ( ABS1( VB21 )+ABS1( VB22 ) ) ) THEN
+               CALL ZLARTG( -DCONJG( UA21 ), DCONJG( UA22 ), CSQ, SNQ,
+     $                      R )
+            ELSE
+               CALL ZLARTG( -DCONJG( VB21 ), DCONJG( VB22 ), CSQ, SNQ,
+     $                      R )
+            END IF
+*
+            CSU = SNL
+            SNU = D1*CSL
+            CSV = SNR
+            SNV = D1*CSR
+*
+         END IF
+*
+      ELSE
+*
+*        Input matrices A and B are lower triangular matrices
+*
+*        Form matrix C = A*adj(B) = ( a 0 )
+*                                   ( c d )
+*
+         A = A1*B3
+         D = A3*B1
+         C = A2*B3 - A3*B2
+         FC = ABS( C )
+*
+*        Transform complex 2-by-2 matrix C to real matrix by unitary
+*        diagonal matrix diag(d1,1).
+*
+         D1 = ONE
+         IF( FC.NE.ZERO )
+     $      D1 = C / FC
+*
+*        The SVD of real 2 by 2 triangular C
+*
+*         ( CSL -SNL )*( A 0 )*(  CSR  SNR ) = ( R 0 )
+*         ( SNL  CSL ) ( C D ) ( -SNR  CSR )   ( 0 T )
+*
+         CALL DLASV2( A, FC, D, S1, S2, SNR, CSR, SNL, CSL )
+*
+         IF( ABS( CSR ).GE.ABS( SNR ) .OR. ABS( CSL ).GE.ABS( SNL ) )
+     $        THEN
+*
+*           Compute the (2,1) and (2,2) elements of U'*A and V'*B,
+*           and (2,1) element of |U|'*|A| and |V|'*|B|.
+*
+            UA21 = -D1*SNR*A1 + CSR*A2
+            UA22R = CSR*A3
+*
+            VB21 = -D1*SNL*B1 + CSL*B2
+            VB22R = CSL*B3
+*
+            AUA21 = ABS( SNR )*ABS( A1 ) + ABS( CSR )*ABS1( A2 )
+            AVB21 = ABS( SNL )*ABS( B1 ) + ABS( CSL )*ABS1( B2 )
+*
+*           zero (2,1) elements of U'*A and V'*B.
+*
+            IF( ( ABS1( UA21 )+ABS( UA22R ) ).EQ.ZERO ) THEN
+               CALL ZLARTG( DCMPLX( VB22R ), VB21, CSQ, SNQ, R )
+            ELSE IF( ( ABS1( VB21 )+ABS( VB22R ) ).EQ.ZERO ) THEN
+               CALL ZLARTG( DCMPLX( UA22R ), UA21, CSQ, SNQ, R )
+            ELSE IF( AUA21 / ( ABS1( UA21 )+ABS( UA22R ) ).LE.AVB21 /
+     $               ( ABS1( VB21 )+ABS( VB22R ) ) ) THEN
+               CALL ZLARTG( DCMPLX( UA22R ), UA21, CSQ, SNQ, R )
+            ELSE
+               CALL ZLARTG( DCMPLX( VB22R ), VB21, CSQ, SNQ, R )
+            END IF
+*
+            CSU = CSR
+            SNU = -DCONJG( D1 )*SNR
+            CSV = CSL
+            SNV = -DCONJG( D1 )*SNL
+*
+         ELSE
+*
+*           Compute the (1,1) and (1,2) elements of U'*A and V'*B,
+*           and (1,1) element of |U|'*|A| and |V|'*|B|.
+*
+            UA11 = CSR*A1 + DCONJG( D1 )*SNR*A2
+            UA12 = DCONJG( D1 )*SNR*A3
+*
+            VB11 = CSL*B1 + DCONJG( D1 )*SNL*B2
+            VB12 = DCONJG( D1 )*SNL*B3
+*
+            AUA11 = ABS( CSR )*ABS( A1 ) + ABS( SNR )*ABS1( A2 )
+            AVB11 = ABS( CSL )*ABS( B1 ) + ABS( SNL )*ABS1( B2 )
+*
+*           zero (1,1) elements of U'*A and V'*B, and then swap.
+*
+            IF( ( ABS1( UA11 )+ABS1( UA12 ) ).EQ.ZERO ) THEN
+               CALL ZLARTG( VB12, VB11, CSQ, SNQ, R )
+            ELSE IF( ( ABS1( VB11 )+ABS1( VB12 ) ).EQ.ZERO ) THEN
+               CALL ZLARTG( UA12, UA11, CSQ, SNQ, R )
+            ELSE IF( AUA11 / ( ABS1( UA11 )+ABS1( UA12 ) ).LE.AVB11 /
+     $               ( ABS1( VB11 )+ABS1( VB12 ) ) ) THEN
+               CALL ZLARTG( UA12, UA11, CSQ, SNQ, R )
+            ELSE
+               CALL ZLARTG( VB12, VB11, CSQ, SNQ, R )
+            END IF
+*
+            CSU = SNR
+            SNU = DCONJG( D1 )*CSR
+            CSV = SNL
+            SNV = DCONJG( D1 )*CSL
+*
+         END IF
+*
+      END IF
+*
+      RETURN
+*
+*     End of ZLAGS2
+*
+      END
diff --git a/costa/native/external/lapack/zlagtm.f b/costa/native/external/lapack/zlagtm.f
new file mode 100644
index 000000000..2a2fb6d68
--- /dev/null
+++ b/costa/native/external/lapack/zlagtm.f
@@ -0,0 +1,234 @@
+      SUBROUTINE ZLAGTM( TRANS, N, NRHS, ALPHA, DL, D, DU, X, LDX, BETA,
+     $                   B, LDB )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          TRANS
+      INTEGER            LDB, LDX, N, NRHS
+      DOUBLE PRECISION   ALPHA, BETA
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         B( LDB, * ), D( * ), DL( * ), DU( * ),
+     $                   X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZLAGTM performs a matrix-vector product of the form
+*
+*     B := alpha * A * X + beta * B
+*
+*  where A is a tridiagonal matrix of order N, B and X are N by NRHS
+*  matrices, and alpha and beta are real scalars, each of which may be
+*  0., 1., or -1.
+*
+*  Arguments
+*  =========
+*
+*  TRANS   (input) CHARACTER
+*          Specifies the operation applied to A.
+*          = 'N':  No transpose, B := alpha * A * X + beta * B
+*          = 'T':  Transpose,    B := alpha * A**T * X + beta * B
+*          = 'C':  Conjugate transpose, B := alpha * A**H * X + beta * B
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrices X and B.
+*
+*  ALPHA   (input) DOUBLE PRECISION
+*          The scalar alpha.  ALPHA must be 0., 1., or -1.; otherwise,
+*          it is assumed to be 0.
+*
+*  DL      (input) COMPLEX*16 array, dimension (N-1)
+*          The (n-1) sub-diagonal elements of T.
+*
+*  D       (input) COMPLEX*16 array, dimension (N)
+*          The diagonal elements of T.
+*
+*  DU      (input) COMPLEX*16 array, dimension (N-1)
+*          The (n-1) super-diagonal elements of T.
+*
+*  X       (input) COMPLEX*16 array, dimension (LDX,NRHS)
+*          The N by NRHS matrix X.
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(N,1).
+*
+*  BETA    (input) DOUBLE PRECISION
+*          The scalar beta.  BETA must be 0., 1., or -1.; otherwise,
+*          it is assumed to be 1.
+*
+*  B       (input/output) COMPLEX*16 array, dimension (LDB,NRHS)
+*          On entry, the N by NRHS matrix B.
+*          On exit, B is overwritten by the matrix expression
+*          B := alpha * A * X + beta * B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(N,1).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          DCONJG
+*     ..
+*     .. Executable Statements ..
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Multiply B by BETA if BETA.NE.1.
+*
+      IF( BETA.EQ.ZERO ) THEN
+         DO 20 J = 1, NRHS
+            DO 10 I = 1, N
+               B( I, J ) = ZERO
+   10       CONTINUE
+   20    CONTINUE
+      ELSE IF( BETA.EQ.-ONE ) THEN
+         DO 40 J = 1, NRHS
+            DO 30 I = 1, N
+               B( I, J ) = -B( I, J )
+   30       CONTINUE
+   40    CONTINUE
+      END IF
+*
+      IF( ALPHA.EQ.ONE ) THEN
+         IF( LSAME( TRANS, 'N' ) ) THEN
+*
+*           Compute B := B + A*X
+*
+            DO 60 J = 1, NRHS
+               IF( N.EQ.1 ) THEN
+                  B( 1, J ) = B( 1, J ) + D( 1 )*X( 1, J )
+               ELSE
+                  B( 1, J ) = B( 1, J ) + D( 1 )*X( 1, J ) +
+     $                        DU( 1 )*X( 2, J )
+                  B( N, J ) = B( N, J ) + DL( N-1 )*X( N-1, J ) +
+     $                        D( N )*X( N, J )
+                  DO 50 I = 2, N - 1
+                     B( I, J ) = B( I, J ) + DL( I-1 )*X( I-1, J ) +
+     $                           D( I )*X( I, J ) + DU( I )*X( I+1, J )
+   50             CONTINUE
+               END IF
+   60       CONTINUE
+         ELSE IF( LSAME( TRANS, 'T' ) ) THEN
+*
+*           Compute B := B + A**T * X
+*
+            DO 80 J = 1, NRHS
+               IF( N.EQ.1 ) THEN
+                  B( 1, J ) = B( 1, J ) + D( 1 )*X( 1, J )
+               ELSE
+                  B( 1, J ) = B( 1, J ) + D( 1 )*X( 1, J ) +
+     $                        DL( 1 )*X( 2, J )
+                  B( N, J ) = B( N, J ) + DU( N-1 )*X( N-1, J ) +
+     $                        D( N )*X( N, J )
+                  DO 70 I = 2, N - 1
+                     B( I, J ) = B( I, J ) + DU( I-1 )*X( I-1, J ) +
+     $                           D( I )*X( I, J ) + DL( I )*X( I+1, J )
+   70             CONTINUE
+               END IF
+   80       CONTINUE
+         ELSE IF( LSAME( TRANS, 'C' ) ) THEN
+*
+*           Compute B := B + A**H * X
+*
+            DO 100 J = 1, NRHS
+               IF( N.EQ.1 ) THEN
+                  B( 1, J ) = B( 1, J ) + DCONJG( D( 1 ) )*X( 1, J )
+               ELSE
+                  B( 1, J ) = B( 1, J ) + DCONJG( D( 1 ) )*X( 1, J ) +
+     $                        DCONJG( DL( 1 ) )*X( 2, J )
+                  B( N, J ) = B( N, J ) + DCONJG( DU( N-1 ) )*
+     $                        X( N-1, J ) + DCONJG( D( N ) )*X( N, J )
+                  DO 90 I = 2, N - 1
+                     B( I, J ) = B( I, J ) + DCONJG( DU( I-1 ) )*
+     $                           X( I-1, J ) + DCONJG( D( I ) )*
+     $                           X( I, J ) + DCONJG( DL( I ) )*
+     $                           X( I+1, J )
+   90             CONTINUE
+               END IF
+  100       CONTINUE
+         END IF
+      ELSE IF( ALPHA.EQ.-ONE ) THEN
+         IF( LSAME( TRANS, 'N' ) ) THEN
+*
+*           Compute B := B - A*X
+*
+            DO 120 J = 1, NRHS
+               IF( N.EQ.1 ) THEN
+                  B( 1, J ) = B( 1, J ) - D( 1 )*X( 1, J )
+               ELSE
+                  B( 1, J ) = B( 1, J ) - D( 1 )*X( 1, J ) -
+     $                        DU( 1 )*X( 2, J )
+                  B( N, J ) = B( N, J ) - DL( N-1 )*X( N-1, J ) -
+     $                        D( N )*X( N, J )
+                  DO 110 I = 2, N - 1
+                     B( I, J ) = B( I, J ) - DL( I-1 )*X( I-1, J ) -
+     $                           D( I )*X( I, J ) - DU( I )*X( I+1, J )
+  110             CONTINUE
+               END IF
+  120       CONTINUE
+         ELSE IF( LSAME( TRANS, 'T' ) ) THEN
+*
+*           Compute B := B - A'*X
+*
+            DO 140 J = 1, NRHS
+               IF( N.EQ.1 ) THEN
+                  B( 1, J ) = B( 1, J ) - D( 1 )*X( 1, J )
+               ELSE
+                  B( 1, J ) = B( 1, J ) - D( 1 )*X( 1, J ) -
+     $                        DL( 1 )*X( 2, J )
+                  B( N, J ) = B( N, J ) - DU( N-1 )*X( N-1, J ) -
+     $                        D( N )*X( N, J )
+                  DO 130 I = 2, N - 1
+                     B( I, J ) = B( I, J ) - DU( I-1 )*X( I-1, J ) -
+     $                           D( I )*X( I, J ) - DL( I )*X( I+1, J )
+  130             CONTINUE
+               END IF
+  140       CONTINUE
+         ELSE IF( LSAME( TRANS, 'C' ) ) THEN
+*
+*           Compute B := B - A'*X
+*
+            DO 160 J = 1, NRHS
+               IF( N.EQ.1 ) THEN
+                  B( 1, J ) = B( 1, J ) - DCONJG( D( 1 ) )*X( 1, J )
+               ELSE
+                  B( 1, J ) = B( 1, J ) - DCONJG( D( 1 ) )*X( 1, J ) -
+     $                        DCONJG( DL( 1 ) )*X( 2, J )
+                  B( N, J ) = B( N, J ) - DCONJG( DU( N-1 ) )*
+     $                        X( N-1, J ) - DCONJG( D( N ) )*X( N, J )
+                  DO 150 I = 2, N - 1
+                     B( I, J ) = B( I, J ) - DCONJG( DU( I-1 ) )*
+     $                           X( I-1, J ) - DCONJG( D( I ) )*
+     $                           X( I, J ) - DCONJG( DL( I ) )*
+     $                           X( I+1, J )
+  150             CONTINUE
+               END IF
+  160       CONTINUE
+         END IF
+      END IF
+      RETURN
+*
+*     End of ZLAGTM
+*
+      END
diff --git a/costa/native/external/lapack/zlahef.f b/costa/native/external/lapack/zlahef.f
new file mode 100644
index 000000000..7520c57b4
--- /dev/null
+++ b/costa/native/external/lapack/zlahef.f
@@ -0,0 +1,648 @@
+      SUBROUTINE ZLAHEF( UPLO, N, NB, KB, A, LDA, IPIV, W, LDW, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, KB, LDA, LDW, N, NB
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      COMPLEX*16         A( LDA, * ), W( LDW, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZLAHEF computes a partial factorization of a complex Hermitian
+*  matrix A using the Bunch-Kaufman diagonal pivoting method. The
+*  partial factorization has the form:
+*
+*  A  =  ( I  U12 ) ( A11  0  ) (  I    0   )  if UPLO = 'U', or:
+*        ( 0  U22 ) (  0   D  ) ( U12' U22' )
+*
+*  A  =  ( L11  0 ) (  D   0  ) ( L11' L21' )  if UPLO = 'L'
+*        ( L21  I ) (  0  A22 ) (  0    I   )
+*
+*  where the order of D is at most NB. The actual order is returned in
+*  the argument KB, and is either NB or NB-1, or N if N <= NB.
+*  Note that U' denotes the conjugate transpose of U.
+*
+*  ZLAHEF is an auxiliary routine called by ZHETRF. It uses blocked code
+*  (calling Level 3 BLAS) to update the submatrix A11 (if UPLO = 'U') or
+*  A22 (if UPLO = 'L').
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the upper or lower triangular part of the
+*          Hermitian matrix A is stored:
+*          = 'U':  Upper triangular
+*          = 'L':  Lower triangular
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NB      (input) INTEGER
+*          The maximum number of columns of the matrix A that should be
+*          factored.  NB should be at least 2 to allow for 2-by-2 pivot
+*          blocks.
+*
+*  KB      (output) INTEGER
+*          The number of columns of A that were actually factored.
+*          KB is either NB-1 or NB, or N if N <= NB.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+*          On entry, the Hermitian matrix A.  If UPLO = 'U', the leading
+*          n-by-n upper triangular part of A contains the upper
+*          triangular part of the matrix A, and the strictly lower
+*          triangular part of A is not referenced.  If UPLO = 'L', the
+*          leading n-by-n lower triangular part of A contains the lower
+*          triangular part of the matrix A, and the strictly upper
+*          triangular part of A is not referenced.
+*          On exit, A contains details of the partial factorization.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  IPIV    (output) INTEGER array, dimension (N)
+*          Details of the interchanges and the block structure of D.
+*          If UPLO = 'U', only the last KB elements of IPIV are set;
+*          if UPLO = 'L', only the first KB elements are set.
+*
+*          If IPIV(k) > 0, then rows and columns k and IPIV(k) were
+*          interchanged and D(k,k) is a 1-by-1 diagonal block.
+*          If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and
+*          columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k)
+*          is a 2-by-2 diagonal block.  If UPLO = 'L' and IPIV(k) =
+*          IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were
+*          interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block.
+*
+*  W       (workspace) COMPLEX*16 array, dimension (LDW,NB)
+*
+*  LDW     (input) INTEGER
+*          The leading dimension of the array W.  LDW >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          > 0: if INFO = k, D(k,k) is exactly zero.  The factorization
+*               has been completed, but the block diagonal matrix D is
+*               exactly singular.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+      COMPLEX*16         CONE
+      PARAMETER          ( CONE = ( 1.0D+0, 0.0D+0 ) )
+      DOUBLE PRECISION   EIGHT, SEVTEN
+      PARAMETER          ( EIGHT = 8.0D+0, SEVTEN = 17.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            IMAX, J, JB, JJ, JMAX, JP, K, KK, KKW, KP,
+     $                   KSTEP, KW
+      DOUBLE PRECISION   ABSAKK, ALPHA, COLMAX, R1, ROWMAX, T
+      COMPLEX*16         D11, D21, D22, Z
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            IZAMAX
+      EXTERNAL           LSAME, IZAMAX
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           ZCOPY, ZDSCAL, ZGEMM, ZGEMV, ZLACGV, ZSWAP
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, DCONJG, DIMAG, MAX, MIN, SQRT
+*     ..
+*     .. Statement Functions ..
+      DOUBLE PRECISION   CABS1
+*     ..
+*     .. Statement Function definitions ..
+      CABS1( Z ) = ABS( DBLE( Z ) ) + ABS( DIMAG( Z ) )
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+*
+*     Initialize ALPHA for use in choosing pivot block size.
+*
+      ALPHA = ( ONE+SQRT( SEVTEN ) ) / EIGHT
+*
+      IF( LSAME( UPLO, 'U' ) ) THEN
+*
+*        Factorize the trailing columns of A using the upper triangle
+*        of A and working backwards, and compute the matrix W = U12*D
+*        for use in updating A11 (note that conjg(W) is actually stored)
+*
+*        K is the main loop index, decreasing from N in steps of 1 or 2
+*
+*        KW is the column of W which corresponds to column K of A
+*
+         K = N
+   10    CONTINUE
+         KW = NB + K - N
+*
+*        Exit from loop
+*
+         IF( ( K.LE.N-NB+1 .AND. NB.LT.N ) .OR. K.LT.1 )
+     $      GO TO 30
+*
+*        Copy column K of A to column KW of W and update it
+*
+         CALL ZCOPY( K-1, A( 1, K ), 1, W( 1, KW ), 1 )
+         W( K, KW ) = DBLE( A( K, K ) )
+         IF( K.LT.N ) THEN
+            CALL ZGEMV( 'No transpose', K, N-K, -CONE, A( 1, K+1 ), LDA,
+     $                  W( K, KW+1 ), LDW, CONE, W( 1, KW ), 1 )
+            W( K, KW ) = DBLE( W( K, KW ) )
+         END IF
+*
+         KSTEP = 1
+*
+*        Determine rows and columns to be interchanged and whether
+*        a 1-by-1 or 2-by-2 pivot block will be used
+*
+         ABSAKK = ABS( DBLE( W( K, KW ) ) )
+*
+*        IMAX is the row-index of the largest off-diagonal element in
+*        column K, and COLMAX is its absolute value
+*
+         IF( K.GT.1 ) THEN
+            IMAX = IZAMAX( K-1, W( 1, KW ), 1 )
+            COLMAX = CABS1( W( IMAX, KW ) )
+         ELSE
+            COLMAX = ZERO
+         END IF
+*
+         IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN
+*
+*           Column K is zero: set INFO and continue
+*
+            IF( INFO.EQ.0 )
+     $         INFO = K
+            KP = K
+            A( K, K ) = DBLE( A( K, K ) )
+         ELSE
+            IF( ABSAKK.GE.ALPHA*COLMAX ) THEN
+*
+*              no interchange, use 1-by-1 pivot block
+*
+               KP = K
+            ELSE
+*
+*              Copy column IMAX to column KW-1 of W and update it
+*
+               CALL ZCOPY( IMAX-1, A( 1, IMAX ), 1, W( 1, KW-1 ), 1 )
+               W( IMAX, KW-1 ) = DBLE( A( IMAX, IMAX ) )
+               CALL ZCOPY( K-IMAX, A( IMAX, IMAX+1 ), LDA,
+     $                     W( IMAX+1, KW-1 ), 1 )
+               CALL ZLACGV( K-IMAX, W( IMAX+1, KW-1 ), 1 )
+               IF( K.LT.N ) THEN
+                  CALL ZGEMV( 'No transpose', K, N-K, -CONE,
+     $                        A( 1, K+1 ), LDA, W( IMAX, KW+1 ), LDW,
+     $                        CONE, W( 1, KW-1 ), 1 )
+                  W( IMAX, KW-1 ) = DBLE( W( IMAX, KW-1 ) )
+               END IF
+*
+*              JMAX is the column-index of the largest off-diagonal
+*              element in row IMAX, and ROWMAX is its absolute value
+*
+               JMAX = IMAX + IZAMAX( K-IMAX, W( IMAX+1, KW-1 ), 1 )
+               ROWMAX = CABS1( W( JMAX, KW-1 ) )
+               IF( IMAX.GT.1 ) THEN
+                  JMAX = IZAMAX( IMAX-1, W( 1, KW-1 ), 1 )
+                  ROWMAX = MAX( ROWMAX, CABS1( W( JMAX, KW-1 ) ) )
+               END IF
+*
+               IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN
+*
+*                 no interchange, use 1-by-1 pivot block
+*
+                  KP = K
+               ELSE IF( ABS( DBLE( W( IMAX, KW-1 ) ) ).GE.ALPHA*ROWMAX )
+     $                   THEN
+*
+*                 interchange rows and columns K and IMAX, use 1-by-1
+*                 pivot block
+*
+                  KP = IMAX
+*
+*                 copy column KW-1 of W to column KW
+*
+                  CALL ZCOPY( K, W( 1, KW-1 ), 1, W( 1, KW ), 1 )
+               ELSE
+*
+*                 interchange rows and columns K-1 and IMAX, use 2-by-2
+*                 pivot block
+*
+                  KP = IMAX
+                  KSTEP = 2
+               END IF
+            END IF
+*
+            KK = K - KSTEP + 1
+            KKW = NB + KK - N
+*
+*           Updated column KP is already stored in column KKW of W
+*
+            IF( KP.NE.KK ) THEN
+*
+*              Copy non-updated column KK to column KP
+*
+               A( KP, KP ) = DBLE( A( KK, KK ) )
+               CALL ZCOPY( KK-1-KP, A( KP+1, KK ), 1, A( KP, KP+1 ),
+     $                     LDA )
+               CALL ZLACGV( KK-1-KP, A( KP, KP+1 ), LDA )
+               CALL ZCOPY( KP-1, A( 1, KK ), 1, A( 1, KP ), 1 )
+*
+*              Interchange rows KK and KP in last KK columns of A and W
+*
+               IF( KK.LT.N )
+     $            CALL ZSWAP( N-KK, A( KK, KK+1 ), LDA, A( KP, KK+1 ),
+     $                        LDA )
+               CALL ZSWAP( N-KK+1, W( KK, KKW ), LDW, W( KP, KKW ),
+     $                     LDW )
+            END IF
+*
+            IF( KSTEP.EQ.1 ) THEN
+*
+*              1-by-1 pivot block D(k): column KW of W now holds
+*
+*              W(k) = U(k)*D(k)
+*
+*              where U(k) is the k-th column of U
+*
+*              Store U(k) in column k of A
+*
+               CALL ZCOPY( K, W( 1, KW ), 1, A( 1, K ), 1 )
+               R1 = ONE / DBLE( A( K, K ) )
+               CALL ZDSCAL( K-1, R1, A( 1, K ), 1 )
+*
+*              Conjugate W(k)
+*
+               CALL ZLACGV( K-1, W( 1, KW ), 1 )
+            ELSE
+*
+*              2-by-2 pivot block D(k): columns KW and KW-1 of W now
+*              hold
+*
+*              ( W(k-1) W(k) ) = ( U(k-1) U(k) )*D(k)
+*
+*              where U(k) and U(k-1) are the k-th and (k-1)-th columns
+*              of U
+*
+               IF( K.GT.2 ) THEN
+*
+*                 Store U(k) and U(k-1) in columns k and k-1 of A
+*
+                  D21 = W( K-1, KW )
+                  D11 = W( K, KW ) / DCONJG( D21 )
+                  D22 = W( K-1, KW-1 ) / D21
+                  T = ONE / ( DBLE( D11*D22 )-ONE )
+                  D21 = T / D21
+                  DO 20 J = 1, K - 2
+                     A( J, K-1 ) = D21*( D11*W( J, KW-1 )-W( J, KW ) )
+                     A( J, K ) = DCONJG( D21 )*
+     $                           ( D22*W( J, KW )-W( J, KW-1 ) )
+   20             CONTINUE
+               END IF
+*
+*              Copy D(k) to A
+*
+               A( K-1, K-1 ) = W( K-1, KW-1 )
+               A( K-1, K ) = W( K-1, KW )
+               A( K, K ) = W( K, KW )
+*
+*              Conjugate W(k) and W(k-1)
+*
+               CALL ZLACGV( K-1, W( 1, KW ), 1 )
+               CALL ZLACGV( K-2, W( 1, KW-1 ), 1 )
+            END IF
+         END IF
+*
+*        Store details of the interchanges in IPIV
+*
+         IF( KSTEP.EQ.1 ) THEN
+            IPIV( K ) = KP
+         ELSE
+            IPIV( K ) = -KP
+            IPIV( K-1 ) = -KP
+         END IF
+*
+*        Decrease K and return to the start of the main loop
+*
+         K = K - KSTEP
+         GO TO 10
+*
+   30    CONTINUE
+*
+*        Update the upper triangle of A11 (= A(1:k,1:k)) as
+*
+*        A11 := A11 - U12*D*U12' = A11 - U12*W'
+*
+*        computing blocks of NB columns at a time (note that conjg(W) is
+*        actually stored)
+*
+         DO 50 J = ( ( K-1 ) / NB )*NB + 1, 1, -NB
+            JB = MIN( NB, K-J+1 )
+*
+*           Update the upper triangle of the diagonal block
+*
+            DO 40 JJ = J, J + JB - 1
+               A( JJ, JJ ) = DBLE( A( JJ, JJ ) )
+               CALL ZGEMV( 'No transpose', JJ-J+1, N-K, -CONE,
+     $                     A( J, K+1 ), LDA, W( JJ, KW+1 ), LDW, CONE,
+     $                     A( J, JJ ), 1 )
+               A( JJ, JJ ) = DBLE( A( JJ, JJ ) )
+   40       CONTINUE
+*
+*           Update the rectangular superdiagonal block
+*
+            CALL ZGEMM( 'No transpose', 'Transpose', J-1, JB, N-K,
+     $                  -CONE, A( 1, K+1 ), LDA, W( J, KW+1 ), LDW,
+     $                  CONE, A( 1, J ), LDA )
+   50    CONTINUE
+*
+*        Put U12 in standard form by partially undoing the interchanges
+*        in columns k+1:n
+*
+         J = K + 1
+   60    CONTINUE
+         JJ = J
+         JP = IPIV( J )
+         IF( JP.LT.0 ) THEN
+            JP = -JP
+            J = J + 1
+         END IF
+         J = J + 1
+         IF( JP.NE.JJ .AND. J.LE.N )
+     $      CALL ZSWAP( N-J+1, A( JP, J ), LDA, A( JJ, J ), LDA )
+         IF( J.LE.N )
+     $      GO TO 60
+*
+*        Set KB to the number of columns factorized
+*
+         KB = N - K
+*
+      ELSE
+*
+*        Factorize the leading columns of A using the lower triangle
+*        of A and working forwards, and compute the matrix W = L21*D
+*        for use in updating A22 (note that conjg(W) is actually stored)
+*
+*        K is the main loop index, increasing from 1 in steps of 1 or 2
+*
+         K = 1
+   70    CONTINUE
+*
+*        Exit from loop
+*
+         IF( ( K.GE.NB .AND. NB.LT.N ) .OR. K.GT.N )
+     $      GO TO 90
+*
+*        Copy column K of A to column K of W and update it
+*
+         W( K, K ) = DBLE( A( K, K ) )
+         IF( K.LT.N )
+     $      CALL ZCOPY( N-K, A( K+1, K ), 1, W( K+1, K ), 1 )
+         CALL ZGEMV( 'No transpose', N-K+1, K-1, -CONE, A( K, 1 ), LDA,
+     $               W( K, 1 ), LDW, CONE, W( K, K ), 1 )
+         W( K, K ) = DBLE( W( K, K ) )
+*
+         KSTEP = 1
+*
+*        Determine rows and columns to be interchanged and whether
+*        a 1-by-1 or 2-by-2 pivot block will be used
+*
+         ABSAKK = ABS( DBLE( W( K, K ) ) )
+*
+*        IMAX is the row-index of the largest off-diagonal element in
+*        column K, and COLMAX is its absolute value
+*
+         IF( K.LT.N ) THEN
+            IMAX = K + IZAMAX( N-K, W( K+1, K ), 1 )
+            COLMAX = CABS1( W( IMAX, K ) )
+         ELSE
+            COLMAX = ZERO
+         END IF
+*
+         IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN
+*
+*           Column K is zero: set INFO and continue
+*
+            IF( INFO.EQ.0 )
+     $         INFO = K
+            KP = K
+            A( K, K ) = DBLE( A( K, K ) )
+         ELSE
+            IF( ABSAKK.GE.ALPHA*COLMAX ) THEN
+*
+*              no interchange, use 1-by-1 pivot block
+*
+               KP = K
+            ELSE
+*
+*              Copy column IMAX to column K+1 of W and update it
+*
+               CALL ZCOPY( IMAX-K, A( IMAX, K ), LDA, W( K, K+1 ), 1 )
+               CALL ZLACGV( IMAX-K, W( K, K+1 ), 1 )
+               W( IMAX, K+1 ) = DBLE( A( IMAX, IMAX ) )
+               IF( IMAX.LT.N )
+     $            CALL ZCOPY( N-IMAX, A( IMAX+1, IMAX ), 1,
+     $                        W( IMAX+1, K+1 ), 1 )
+               CALL ZGEMV( 'No transpose', N-K+1, K-1, -CONE, A( K, 1 ),
+     $                     LDA, W( IMAX, 1 ), LDW, CONE, W( K, K+1 ),
+     $                     1 )
+               W( IMAX, K+1 ) = DBLE( W( IMAX, K+1 ) )
+*
+*              JMAX is the column-index of the largest off-diagonal
+*              element in row IMAX, and ROWMAX is its absolute value
+*
+               JMAX = K - 1 + IZAMAX( IMAX-K, W( K, K+1 ), 1 )
+               ROWMAX = CABS1( W( JMAX, K+1 ) )
+               IF( IMAX.LT.N ) THEN
+                  JMAX = IMAX + IZAMAX( N-IMAX, W( IMAX+1, K+1 ), 1 )
+                  ROWMAX = MAX( ROWMAX, CABS1( W( JMAX, K+1 ) ) )
+               END IF
+*
+               IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN
+*
+*                 no interchange, use 1-by-1 pivot block
+*
+                  KP = K
+               ELSE IF( ABS( DBLE( W( IMAX, K+1 ) ) ).GE.ALPHA*ROWMAX )
+     $                   THEN
+*
+*                 interchange rows and columns K and IMAX, use 1-by-1
+*                 pivot block
+*
+                  KP = IMAX
+*
+*                 copy column K+1 of W to column K
+*
+                  CALL ZCOPY( N-K+1, W( K, K+1 ), 1, W( K, K ), 1 )
+               ELSE
+*
+*                 interchange rows and columns K+1 and IMAX, use 2-by-2
+*                 pivot block
+*
+                  KP = IMAX
+                  KSTEP = 2
+               END IF
+            END IF
+*
+            KK = K + KSTEP - 1
+*
+*           Updated column KP is already stored in column KK of W
+*
+            IF( KP.NE.KK ) THEN
+*
+*              Copy non-updated column KK to column KP
+*
+               A( KP, KP ) = DBLE( A( KK, KK ) )
+               CALL ZCOPY( KP-KK-1, A( KK+1, KK ), 1, A( KP, KK+1 ),
+     $                     LDA )
+               CALL ZLACGV( KP-KK-1, A( KP, KK+1 ), LDA )
+               IF( KP.LT.N )
+     $            CALL ZCOPY( N-KP, A( KP+1, KK ), 1, A( KP+1, KP ), 1 )
+*
+*              Interchange rows KK and KP in first KK columns of A and W
+*
+               CALL ZSWAP( KK-1, A( KK, 1 ), LDA, A( KP, 1 ), LDA )
+               CALL ZSWAP( KK, W( KK, 1 ), LDW, W( KP, 1 ), LDW )
+            END IF
+*
+            IF( KSTEP.EQ.1 ) THEN
+*
+*              1-by-1 pivot block D(k): column k of W now holds
+*
+*              W(k) = L(k)*D(k)
+*
+*              where L(k) is the k-th column of L
+*
+*              Store L(k) in column k of A
+*
+               CALL ZCOPY( N-K+1, W( K, K ), 1, A( K, K ), 1 )
+               IF( K.LT.N ) THEN
+                  R1 = ONE / DBLE( A( K, K ) )
+                  CALL ZDSCAL( N-K, R1, A( K+1, K ), 1 )
+*
+*                 Conjugate W(k)
+*
+                  CALL ZLACGV( N-K, W( K+1, K ), 1 )
+               END IF
+            ELSE
+*
+*              2-by-2 pivot block D(k): columns k and k+1 of W now hold
+*
+*              ( W(k) W(k+1) ) = ( L(k) L(k+1) )*D(k)
+*
+*              where L(k) and L(k+1) are the k-th and (k+1)-th columns
+*              of L
+*
+               IF( K.LT.N-1 ) THEN
+*
+*                 Store L(k) and L(k+1) in columns k and k+1 of A
+*
+                  D21 = W( K+1, K )
+                  D11 = W( K+1, K+1 ) / D21
+                  D22 = W( K, K ) / DCONJG( D21 )
+                  T = ONE / ( DBLE( D11*D22 )-ONE )
+                  D21 = T / D21
+                  DO 80 J = K + 2, N
+                     A( J, K ) = DCONJG( D21 )*
+     $                           ( D11*W( J, K )-W( J, K+1 ) )
+                     A( J, K+1 ) = D21*( D22*W( J, K+1 )-W( J, K ) )
+   80             CONTINUE
+               END IF
+*
+*              Copy D(k) to A
+*
+               A( K, K ) = W( K, K )
+               A( K+1, K ) = W( K+1, K )
+               A( K+1, K+1 ) = W( K+1, K+1 )
+*
+*              Conjugate W(k) and W(k+1)
+*
+               CALL ZLACGV( N-K, W( K+1, K ), 1 )
+               CALL ZLACGV( N-K-1, W( K+2, K+1 ), 1 )
+            END IF
+         END IF
+*
+*        Store details of the interchanges in IPIV
+*
+         IF( KSTEP.EQ.1 ) THEN
+            IPIV( K ) = KP
+         ELSE
+            IPIV( K ) = -KP
+            IPIV( K+1 ) = -KP
+         END IF
+*
+*        Increase K and return to the start of the main loop
+*
+         K = K + KSTEP
+         GO TO 70
+*
+   90    CONTINUE
+*
+*        Update the lower triangle of A22 (= A(k:n,k:n)) as
+*
+*        A22 := A22 - L21*D*L21' = A22 - L21*W'
+*
+*        computing blocks of NB columns at a time (note that conjg(W) is
+*        actually stored)
+*
+         DO 110 J = K, N, NB
+            JB = MIN( NB, N-J+1 )
+*
+*           Update the lower triangle of the diagonal block
+*
+            DO 100 JJ = J, J + JB - 1
+               A( JJ, JJ ) = DBLE( A( JJ, JJ ) )
+               CALL ZGEMV( 'No transpose', J+JB-JJ, K-1, -CONE,
+     $                     A( JJ, 1 ), LDA, W( JJ, 1 ), LDW, CONE,
+     $                     A( JJ, JJ ), 1 )
+               A( JJ, JJ ) = DBLE( A( JJ, JJ ) )
+  100       CONTINUE
+*
+*           Update the rectangular subdiagonal block
+*
+            IF( J+JB.LE.N )
+     $         CALL ZGEMM( 'No transpose', 'Transpose', N-J-JB+1, JB,
+     $                     K-1, -CONE, A( J+JB, 1 ), LDA, W( J, 1 ),
+     $                     LDW, CONE, A( J+JB, J ), LDA )
+  110    CONTINUE
+*
+*        Put L21 in standard form by partially undoing the interchanges
+*        in columns 1:k-1
+*
+         J = K - 1
+  120    CONTINUE
+         JJ = J
+         JP = IPIV( J )
+         IF( JP.LT.0 ) THEN
+            JP = -JP
+            J = J - 1
+         END IF
+         J = J - 1
+         IF( JP.NE.JJ .AND. J.GE.1 )
+     $      CALL ZSWAP( J, A( JP, 1 ), LDA, A( JJ, 1 ), LDA )
+         IF( J.GE.1 )
+     $      GO TO 120
+*
+*        Set KB to the number of columns factorized
+*
+         KB = K - 1
+*
+      END IF
+      RETURN
+*
+*     End of ZLAHEF
+*
+      END
diff --git a/costa/native/external/lapack/zlahqr.f b/costa/native/external/lapack/zlahqr.f
new file mode 100644
index 000000000..5fbd9e55c
--- /dev/null
+++ b/costa/native/external/lapack/zlahqr.f
@@ -0,0 +1,384 @@
+      SUBROUTINE ZLAHQR( WANTT, WANTZ, N, ILO, IHI, H, LDH, W, ILOZ,
+     $                   IHIZ, Z, LDZ, INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      LOGICAL            WANTT, WANTZ
+      INTEGER            IHI, IHIZ, ILO, ILOZ, INFO, LDH, LDZ, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         H( LDH, * ), W( * ), Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZLAHQR is an auxiliary routine called by ZHSEQR to update the
+*  eigenvalues and Schur decomposition already computed by ZHSEQR, by
+*  dealing with the Hessenberg submatrix in rows and columns ILO to IHI.
+*
+*  Arguments
+*  =========
+*
+*  WANTT   (input) LOGICAL
+*          = .TRUE. : the full Schur form T is required;
+*          = .FALSE.: only eigenvalues are required.
+*
+*  WANTZ   (input) LOGICAL
+*          = .TRUE. : the matrix of Schur vectors Z is required;
+*          = .FALSE.: Schur vectors are not required.
+*
+*  N       (input) INTEGER
+*          The order of the matrix H.  N >= 0.
+*
+*  ILO     (input) INTEGER
+*  IHI     (input) INTEGER
+*          It is assumed that H is already upper triangular in rows and
+*          columns IHI+1:N, and that H(ILO,ILO-1) = 0 (unless ILO = 1).
+*          ZLAHQR works primarily with the Hessenberg submatrix in rows
+*          and columns ILO to IHI, but applies transformations to all of
+*          H if WANTT is .TRUE..
+*          1 <= ILO <= max(1,IHI); IHI <= N.
+*
+*  H       (input/output) COMPLEX*16 array, dimension (LDH,N)
+*          On entry, the upper Hessenberg matrix H.
+*          On exit, if WANTT is .TRUE., H is upper triangular in rows
+*          and columns ILO:IHI, with any 2-by-2 diagonal blocks in
+*          standard form. If WANTT is .FALSE., the contents of H are
+*          unspecified on exit.
+*
+*  LDH     (input) INTEGER
+*          The leading dimension of the array H. LDH >= max(1,N).
+*
+*  W       (output) COMPLEX*16 array, dimension (N)
+*          The computed eigenvalues ILO to IHI are stored in the
+*          corresponding elements of W. If WANTT is .TRUE., the
+*          eigenvalues are stored in the same order as on the diagonal
+*          of the Schur form returned in H, with W(i) = H(i,i).
+*
+*  ILOZ    (input) INTEGER
+*  IHIZ    (input) INTEGER
+*          Specify the rows of Z to which transformations must be
+*          applied if WANTZ is .TRUE..
+*          1 <= ILOZ <= ILO; IHI <= IHIZ <= N.
+*
+*  Z       (input/output) COMPLEX*16 array, dimension (LDZ,N)
+*          If WANTZ is .TRUE., on entry Z must contain the current
+*          matrix Z of transformations accumulated by ZHSEQR, and on
+*          exit Z has been updated; transformations are applied only to
+*          the submatrix Z(ILOZ:IHIZ,ILO:IHI).
+*          If WANTZ is .FALSE., Z is not referenced.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z. LDZ >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          > 0: if INFO = i, ZLAHQR failed to compute all the
+*               eigenvalues ILO to IHI in a total of 30*(IHI-ILO+1)
+*               iterations; elements i+1:ihi of W contain those
+*               eigenvalues which have been successfully computed.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX*16         ZERO, ONE
+      PARAMETER          ( ZERO = ( 0.0D+0, 0.0D+0 ),
+     $                   ONE = ( 1.0D+0, 0.0D+0 ) )
+      DOUBLE PRECISION   RZERO, HALF
+      PARAMETER          ( RZERO = 0.0D+0, HALF = 0.5D+0 )
+      DOUBLE PRECISION   DAT1
+      PARAMETER          ( DAT1 = 0.75D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, I1, I2, ITN, ITS, J, K, L, M, NH, NZ
+      DOUBLE PRECISION   H10, H21, RTEMP, S, SMLNUM, T2, TST1, ULP
+      COMPLEX*16         CDUM, H11, H11S, H22, SUM, T, T1, TEMP, U, V2,
+     $                   X, Y
+*     ..
+*     .. Local Arrays ..
+      DOUBLE PRECISION   RWORK( 1 )
+      COMPLEX*16         V( 2 )
+*     ..
+*     .. External Functions ..
+      DOUBLE PRECISION   DLAMCH, ZLANHS
+      COMPLEX*16         ZLADIV
+      EXTERNAL           DLAMCH, ZLANHS, ZLADIV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           ZCOPY, ZLARFG, ZSCAL
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, DCONJG, DIMAG, MAX, MIN, SQRT
+*     ..
+*     .. Statement Functions ..
+      DOUBLE PRECISION   CABS1
+*     ..
+*     .. Statement Function definitions ..
+      CABS1( CDUM ) = ABS( DBLE( CDUM ) ) + ABS( DIMAG( CDUM ) )
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+      IF( ILO.EQ.IHI ) THEN
+         W( ILO ) = H( ILO, ILO )
+         RETURN
+      END IF
+*
+      NH = IHI - ILO + 1
+      NZ = IHIZ - ILOZ + 1
+*
+*     Set machine-dependent constants for the stopping criterion.
+*     If norm(H) <= sqrt(OVFL), overflow should not occur.
+*
+      ULP = DLAMCH( 'Precision' )
+      SMLNUM = DLAMCH( 'Safe minimum' ) / ULP
+*
+*     I1 and I2 are the indices of the first row and last column of H
+*     to which transformations must be applied. If eigenvalues only are
+*     being computed, I1 and I2 are set inside the main loop.
+*
+      IF( WANTT ) THEN
+         I1 = 1
+         I2 = N
+      END IF
+*
+*     ITN is the total number of QR iterations allowed.
+*
+      ITN = 30*NH
+*
+*     The main loop begins here. I is the loop index and decreases from
+*     IHI to ILO in steps of 1. Each iteration of the loop works
+*     with the active submatrix in rows and columns L to I.
+*     Eigenvalues I+1 to IHI have already converged. Either L = ILO, or
+*     H(L,L-1) is negligible so that the matrix splits.
+*
+      I = IHI
+   10 CONTINUE
+      IF( I.LT.ILO )
+     $   GO TO 130
+*
+*     Perform QR iterations on rows and columns ILO to I until a
+*     submatrix of order 1 splits off at the bottom because a
+*     subdiagonal element has become negligible.
+*
+      L = ILO
+      DO 110 ITS = 0, ITN
+*
+*        Look for a single small subdiagonal element.
+*
+         DO 20 K = I, L + 1, -1
+            TST1 = CABS1( H( K-1, K-1 ) ) + CABS1( H( K, K ) )
+            IF( TST1.EQ.RZERO )
+     $         TST1 = ZLANHS( '1', I-L+1, H( L, L ), LDH, RWORK )
+            IF( ABS( DBLE( H( K, K-1 ) ) ).LE.MAX( ULP*TST1, SMLNUM ) )
+     $         GO TO 30
+   20    CONTINUE
+   30    CONTINUE
+         L = K
+         IF( L.GT.ILO ) THEN
+*
+*           H(L,L-1) is negligible
+*
+            H( L, L-1 ) = ZERO
+         END IF
+*
+*        Exit from loop if a submatrix of order 1 has split off.
+*
+         IF( L.GE.I )
+     $      GO TO 120
+*
+*        Now the active submatrix is in rows and columns L to I. If
+*        eigenvalues only are being computed, only the active submatrix
+*        need be transformed.
+*
+         IF( .NOT.WANTT ) THEN
+            I1 = L
+            I2 = I
+         END IF
+*
+         IF( ITS.EQ.10 .OR. ITS.EQ.20 ) THEN
+*
+*           Exceptional shift.
+*
+            S = DAT1*ABS( DBLE( H( I, I-1 ) ) )
+            T = S + H( I, I )
+         ELSE
+*
+*           Wilkinson's shift.
+*
+            T = H( I, I )
+            U = H( I-1, I )*DBLE( H( I, I-1 ) )
+            IF( U.NE.ZERO ) THEN
+               X = HALF*( H( I-1, I-1 )-T )
+               Y = SQRT( X*X+U )
+               IF( DBLE( X )*DBLE( Y )+DIMAG( X )*DIMAG( Y ).LT.RZERO )
+     $            Y = -Y
+               T = T - ZLADIV( U, ( X+Y ) )
+            END IF
+         END IF
+*
+*        Look for two consecutive small subdiagonal elements.
+*
+         DO 40 M = I - 1, L + 1, -1
+*
+*           Determine the effect of starting the single-shift QR
+*           iteration at row M, and see if this would make H(M,M-1)
+*           negligible.
+*
+            H11 = H( M, M )
+            H22 = H( M+1, M+1 )
+            H11S = H11 - T
+            H21 = H( M+1, M )
+            S = CABS1( H11S ) + ABS( H21 )
+            H11S = H11S / S
+            H21 = H21 / S
+            V( 1 ) = H11S
+            V( 2 ) = H21
+            H10 = H( M, M-1 )
+            TST1 = CABS1( H11S )*( CABS1( H11 )+CABS1( H22 ) )
+            IF( ABS( H10*H21 ).LE.ULP*TST1 )
+     $         GO TO 50
+   40    CONTINUE
+         H11 = H( L, L )
+         H22 = H( L+1, L+1 )
+         H11S = H11 - T
+         H21 = H( L+1, L )
+         S = CABS1( H11S ) + ABS( H21 )
+         H11S = H11S / S
+         H21 = H21 / S
+         V( 1 ) = H11S
+         V( 2 ) = H21
+   50    CONTINUE
+*
+*        Single-shift QR step
+*
+         DO 100 K = M, I - 1
+*
+*           The first iteration of this loop determines a reflection G
+*           from the vector V and applies it from left and right to H,
+*           thus creating a nonzero bulge below the subdiagonal.
+*
+*           Each subsequent iteration determines a reflection G to
+*           restore the Hessenberg form in the (K-1)th column, and thus
+*           chases the bulge one step toward the bottom of the active
+*           submatrix.
+*
+*           V(2) is always real before the call to ZLARFG, and hence
+*           after the call T2 ( = T1*V(2) ) is also real.
+*
+            IF( K.GT.M )
+     $         CALL ZCOPY( 2, H( K, K-1 ), 1, V, 1 )
+            CALL ZLARFG( 2, V( 1 ), V( 2 ), 1, T1 )
+            IF( K.GT.M ) THEN
+               H( K, K-1 ) = V( 1 )
+               H( K+1, K-1 ) = ZERO
+            END IF
+            V2 = V( 2 )
+            T2 = DBLE( T1*V2 )
+*
+*           Apply G from the left to transform the rows of the matrix
+*           in columns K to I2.
+*
+            DO 60 J = K, I2
+               SUM = DCONJG( T1 )*H( K, J ) + T2*H( K+1, J )
+               H( K, J ) = H( K, J ) - SUM
+               H( K+1, J ) = H( K+1, J ) - SUM*V2
+   60       CONTINUE
+*
+*           Apply G from the right to transform the columns of the
+*           matrix in rows I1 to min(K+2,I).
+*
+            DO 70 J = I1, MIN( K+2, I )
+               SUM = T1*H( J, K ) + T2*H( J, K+1 )
+               H( J, K ) = H( J, K ) - SUM
+               H( J, K+1 ) = H( J, K+1 ) - SUM*DCONJG( V2 )
+   70       CONTINUE
+*
+            IF( WANTZ ) THEN
+*
+*              Accumulate transformations in the matrix Z
+*
+               DO 80 J = ILOZ, IHIZ
+                  SUM = T1*Z( J, K ) + T2*Z( J, K+1 )
+                  Z( J, K ) = Z( J, K ) - SUM
+                  Z( J, K+1 ) = Z( J, K+1 ) - SUM*DCONJG( V2 )
+   80          CONTINUE
+            END IF
+*
+            IF( K.EQ.M .AND. M.GT.L ) THEN
+*
+*              If the QR step was started at row M > L because two
+*              consecutive small subdiagonals were found, then extra
+*              scaling must be performed to ensure that H(M,M-1) remains
+*              real.
+*
+               TEMP = ONE - T1
+               TEMP = TEMP / ABS( TEMP )
+               H( M+1, M ) = H( M+1, M )*DCONJG( TEMP )
+               IF( M+2.LE.I )
+     $            H( M+2, M+1 ) = H( M+2, M+1 )*TEMP
+               DO 90 J = M, I
+                  IF( J.NE.M+1 ) THEN
+                     IF( I2.GT.J )
+     $                  CALL ZSCAL( I2-J, TEMP, H( J, J+1 ), LDH )
+                     CALL ZSCAL( J-I1, DCONJG( TEMP ), H( I1, J ), 1 )
+                     IF( WANTZ ) THEN
+                        CALL ZSCAL( NZ, DCONJG( TEMP ), Z( ILOZ, J ),
+     $                              1 )
+                     END IF
+                  END IF
+   90          CONTINUE
+            END IF
+  100    CONTINUE
+*
+*        Ensure that H(I,I-1) is real.
+*
+         TEMP = H( I, I-1 )
+         IF( DIMAG( TEMP ).NE.RZERO ) THEN
+            RTEMP = ABS( TEMP )
+            H( I, I-1 ) = RTEMP
+            TEMP = TEMP / RTEMP
+            IF( I2.GT.I )
+     $         CALL ZSCAL( I2-I, DCONJG( TEMP ), H( I, I+1 ), LDH )
+            CALL ZSCAL( I-I1, TEMP, H( I1, I ), 1 )
+            IF( WANTZ ) THEN
+               CALL ZSCAL( NZ, TEMP, Z( ILOZ, I ), 1 )
+            END IF
+         END IF
+*
+  110 CONTINUE
+*
+*     Failure to converge in remaining number of iterations
+*
+      INFO = I
+      RETURN
+*
+  120 CONTINUE
+*
+*     H(I,I-1) is negligible: one eigenvalue has converged.
+*
+      W( I ) = H( I, I )
+*
+*     Decrement number of remaining iterations, and return to start of
+*     the main loop with new value of I.
+*
+      ITN = ITN - ITS
+      I = L - 1
+      GO TO 10
+*
+  130 CONTINUE
+      RETURN
+*
+*     End of ZLAHQR
+*
+      END
diff --git a/costa/native/external/lapack/zlahrd.f b/costa/native/external/lapack/zlahrd.f
new file mode 100644
index 000000000..4f39378c2
--- /dev/null
+++ b/costa/native/external/lapack/zlahrd.f
@@ -0,0 +1,212 @@
+      SUBROUTINE ZLAHRD( N, K, NB, A, LDA, TAU, T, LDT, Y, LDY )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            K, LDA, LDT, LDY, N, NB
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), T( LDT, NB ), TAU( NB ),
+     $                   Y( LDY, NB )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZLAHRD reduces the first NB columns of a complex general n-by-(n-k+1)
+*  matrix A so that elements below the k-th subdiagonal are zero. The
+*  reduction is performed by a unitary similarity transformation
+*  Q' * A * Q. The routine returns the matrices V and T which determine
+*  Q as a block reflector I - V*T*V', and also the matrix Y = A * V * T.
+*
+*  This is an auxiliary routine called by ZGEHRD.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.
+*
+*  K       (input) INTEGER
+*          The offset for the reduction. Elements below the k-th
+*          subdiagonal in the first NB columns are reduced to zero.
+*
+*  NB      (input) INTEGER
+*          The number of columns to be reduced.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA,N-K+1)
+*          On entry, the n-by-(n-k+1) general matrix A.
+*          On exit, the elements on and above the k-th subdiagonal in
+*          the first NB columns are overwritten with the corresponding
+*          elements of the reduced matrix; the elements below the k-th
+*          subdiagonal, with the array TAU, represent the matrix Q as a
+*          product of elementary reflectors. The other columns of A are
+*          unchanged. See Further Details.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  TAU     (output) COMPLEX*16 array, dimension (NB)
+*          The scalar factors of the elementary reflectors. See Further
+*          Details.
+*
+*  T       (output) COMPLEX*16 array, dimension (LDT,NB)
+*          The upper triangular matrix T.
+*
+*  LDT     (input) INTEGER
+*          The leading dimension of the array T.  LDT >= NB.
+*
+*  Y       (output) COMPLEX*16 array, dimension (LDY,NB)
+*          The n-by-nb matrix Y.
+*
+*  LDY     (input) INTEGER
+*          The leading dimension of the array Y. LDY >= max(1,N).
+*
+*  Further Details
+*  ===============
+*
+*  The matrix Q is represented as a product of nb elementary reflectors
+*
+*     Q = H(1) H(2) . . . H(nb).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - tau * v * v'
+*
+*  where tau is a complex scalar, and v is a complex vector with
+*  v(1:i+k-1) = 0, v(i+k) = 1; v(i+k+1:n) is stored on exit in
+*  A(i+k+1:n,i), and tau in TAU(i).
+*
+*  The elements of the vectors v together form the (n-k+1)-by-nb matrix
+*  V which is needed, with T and Y, to apply the transformation to the
+*  unreduced part of the matrix, using an update of the form:
+*  A := (I - V*T*V') * (A - Y*V').
+*
+*  The contents of A on exit are illustrated by the following example
+*  with n = 7, k = 3 and nb = 2:
+*
+*     ( a   h   a   a   a )
+*     ( a   h   a   a   a )
+*     ( a   h   a   a   a )
+*     ( h   h   a   a   a )
+*     ( v1  h   a   a   a )
+*     ( v1  v2  a   a   a )
+*     ( v1  v2  a   a   a )
+*
+*  where a denotes an element of the original matrix A, h denotes a
+*  modified element of the upper Hessenberg matrix H, and vi denotes an
+*  element of the vector defining H(i).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX*16         ZERO, ONE
+      PARAMETER          ( ZERO = ( 0.0D+0, 0.0D+0 ),
+     $                   ONE = ( 1.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I
+      COMPLEX*16         EI
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           ZAXPY, ZCOPY, ZGEMV, ZLACGV, ZLARFG, ZSCAL,
+     $                   ZTRMV
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Quick return if possible
+*
+      IF( N.LE.1 )
+     $   RETURN
+*
+      DO 10 I = 1, NB
+         IF( I.GT.1 ) THEN
+*
+*           Update A(1:n,i)
+*
+*           Compute i-th column of A - Y * V'
+*
+            CALL ZLACGV( I-1, A( K+I-1, 1 ), LDA )
+            CALL ZGEMV( 'No transpose', N, I-1, -ONE, Y, LDY,
+     $                  A( K+I-1, 1 ), LDA, ONE, A( 1, I ), 1 )
+            CALL ZLACGV( I-1, A( K+I-1, 1 ), LDA )
+*
+*           Apply I - V * T' * V' to this column (call it b) from the
+*           left, using the last column of T as workspace
+*
+*           Let  V = ( V1 )   and   b = ( b1 )   (first I-1 rows)
+*                    ( V2 )             ( b2 )
+*
+*           where V1 is unit lower triangular
+*
+*           w := V1' * b1
+*
+            CALL ZCOPY( I-1, A( K+1, I ), 1, T( 1, NB ), 1 )
+            CALL ZTRMV( 'Lower', 'Conjugate transpose', 'Unit', I-1,
+     $                  A( K+1, 1 ), LDA, T( 1, NB ), 1 )
+*
+*           w := w + V2'*b2
+*
+            CALL ZGEMV( 'Conjugate transpose', N-K-I+1, I-1, ONE,
+     $                  A( K+I, 1 ), LDA, A( K+I, I ), 1, ONE,
+     $                  T( 1, NB ), 1 )
+*
+*           w := T'*w
+*
+            CALL ZTRMV( 'Upper', 'Conjugate transpose', 'Non-unit', I-1,
+     $                  T, LDT, T( 1, NB ), 1 )
+*
+*           b2 := b2 - V2*w
+*
+            CALL ZGEMV( 'No transpose', N-K-I+1, I-1, -ONE, A( K+I, 1 ),
+     $                  LDA, T( 1, NB ), 1, ONE, A( K+I, I ), 1 )
+*
+*           b1 := b1 - V1*w
+*
+            CALL ZTRMV( 'Lower', 'No transpose', 'Unit', I-1,
+     $                  A( K+1, 1 ), LDA, T( 1, NB ), 1 )
+            CALL ZAXPY( I-1, -ONE, T( 1, NB ), 1, A( K+1, I ), 1 )
+*
+            A( K+I-1, I-1 ) = EI
+         END IF
+*
+*        Generate the elementary reflector H(i) to annihilate
+*        A(k+i+1:n,i)
+*
+         EI = A( K+I, I )
+         CALL ZLARFG( N-K-I+1, EI, A( MIN( K+I+1, N ), I ), 1,
+     $                TAU( I ) )
+         A( K+I, I ) = ONE
+*
+*        Compute  Y(1:n,i)
+*
+         CALL ZGEMV( 'No transpose', N, N-K-I+1, ONE, A( 1, I+1 ), LDA,
+     $               A( K+I, I ), 1, ZERO, Y( 1, I ), 1 )
+         CALL ZGEMV( 'Conjugate transpose', N-K-I+1, I-1, ONE,
+     $               A( K+I, 1 ), LDA, A( K+I, I ), 1, ZERO, T( 1, I ),
+     $               1 )
+         CALL ZGEMV( 'No transpose', N, I-1, -ONE, Y, LDY, T( 1, I ), 1,
+     $               ONE, Y( 1, I ), 1 )
+         CALL ZSCAL( N, TAU( I ), Y( 1, I ), 1 )
+*
+*        Compute T(1:i,i)
+*
+         CALL ZSCAL( I-1, -TAU( I ), T( 1, I ), 1 )
+         CALL ZTRMV( 'Upper', 'No transpose', 'Non-unit', I-1, T, LDT,
+     $               T( 1, I ), 1 )
+         T( I, I ) = TAU( I )
+*
+   10 CONTINUE
+      A( K+NB, NB ) = EI
+*
+      RETURN
+*
+*     End of ZLAHRD
+*
+      END
diff --git a/costa/native/external/lapack/zlaic1.f b/costa/native/external/lapack/zlaic1.f
new file mode 100644
index 000000000..297445d3b
--- /dev/null
+++ b/costa/native/external/lapack/zlaic1.f
@@ -0,0 +1,296 @@
+      SUBROUTINE ZLAIC1( JOB, J, X, SEST, W, GAMMA, SESTPR, S, C )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            J, JOB
+      DOUBLE PRECISION   SEST, SESTPR
+      COMPLEX*16         C, GAMMA, S
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         W( J ), X( J )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZLAIC1 applies one step of incremental condition estimation in
+*  its simplest version:
+*
+*  Let x, twonorm(x) = 1, be an approximate singular vector of an j-by-j
+*  lower triangular matrix L, such that
+*           twonorm(L*x) = sest
+*  Then ZLAIC1 computes sestpr, s, c such that
+*  the vector
+*                  [ s*x ]
+*           xhat = [  c  ]
+*  is an approximate singular vector of
+*                  [ L     0  ]
+*           Lhat = [ w' gamma ]
+*  in the sense that
+*           twonorm(Lhat*xhat) = sestpr.
+*
+*  Depending on JOB, an estimate for the largest or smallest singular
+*  value is computed.
+*
+*  Note that [s c]' and sestpr**2 is an eigenpair of the system
+*
+*      diag(sest*sest, 0) + [alpha  gamma] * [ conjg(alpha) ]
+*                                            [ conjg(gamma) ]
+*
+*  where  alpha =  conjg(x)'*w.
+*
+*  Arguments
+*  =========
+*
+*  JOB     (input) INTEGER
+*          = 1: an estimate for the largest singular value is computed.
+*          = 2: an estimate for the smallest singular value is computed.
+*
+*  J       (input) INTEGER
+*          Length of X and W
+*
+*  X       (input) COMPLEX*16 array, dimension (J)
+*          The j-vector x.
+*
+*  SEST    (input) DOUBLE PRECISION
+*          Estimated singular value of j by j matrix L
+*
+*  W       (input) COMPLEX*16 array, dimension (J)
+*          The j-vector w.
+*
+*  GAMMA   (input) COMPLEX*16
+*          The diagonal element gamma.
+*
+*  SESTPR  (output) DOUBLE PRECISION
+*          Estimated singular value of (j+1) by (j+1) matrix Lhat.
+*
+*  S       (output) COMPLEX*16
+*          Sine needed in forming xhat.
+*
+*  C       (output) COMPLEX*16
+*          Cosine needed in forming xhat.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE, TWO
+      PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0, TWO = 2.0D0 )
+      DOUBLE PRECISION   HALF, FOUR
+      PARAMETER          ( HALF = 0.5D0, FOUR = 4.0D0 )
+*     ..
+*     .. Local Scalars ..
+      DOUBLE PRECISION   ABSALP, ABSEST, ABSGAM, B, EPS, NORMA, S1, S2,
+     $                   SCL, T, TEST, TMP, ZETA1, ZETA2
+      COMPLEX*16         ALPHA, COSINE, SINE
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DCONJG, MAX, SQRT
+*     ..
+*     .. External Functions ..
+      DOUBLE PRECISION   DLAMCH
+      COMPLEX*16         ZDOTC
+      EXTERNAL           DLAMCH, ZDOTC
+*     ..
+*     .. Executable Statements ..
+*
+      EPS = DLAMCH( 'Epsilon' )
+      ALPHA = ZDOTC( J, X, 1, W, 1 )
+*
+      ABSALP = ABS( ALPHA )
+      ABSGAM = ABS( GAMMA )
+      ABSEST = ABS( SEST )
+*
+      IF( JOB.EQ.1 ) THEN
+*
+*        Estimating largest singular value
+*
+*        special cases
+*
+         IF( SEST.EQ.ZERO ) THEN
+            S1 = MAX( ABSGAM, ABSALP )
+            IF( S1.EQ.ZERO ) THEN
+               S = ZERO
+               C = ONE
+               SESTPR = ZERO
+            ELSE
+               S = ALPHA / S1
+               C = GAMMA / S1
+               TMP = SQRT( S*DCONJG( S )+C*DCONJG( C ) )
+               S = S / TMP
+               C = C / TMP
+               SESTPR = S1*TMP
+            END IF
+            RETURN
+         ELSE IF( ABSGAM.LE.EPS*ABSEST ) THEN
+            S = ONE
+            C = ZERO
+            TMP = MAX( ABSEST, ABSALP )
+            S1 = ABSEST / TMP
+            S2 = ABSALP / TMP
+            SESTPR = TMP*SQRT( S1*S1+S2*S2 )
+            RETURN
+         ELSE IF( ABSALP.LE.EPS*ABSEST ) THEN
+            S1 = ABSGAM
+            S2 = ABSEST
+            IF( S1.LE.S2 ) THEN
+               S = ONE
+               C = ZERO
+               SESTPR = S2
+            ELSE
+               S = ZERO
+               C = ONE
+               SESTPR = S1
+            END IF
+            RETURN
+         ELSE IF( ABSEST.LE.EPS*ABSALP .OR. ABSEST.LE.EPS*ABSGAM ) THEN
+            S1 = ABSGAM
+            S2 = ABSALP
+            IF( S1.LE.S2 ) THEN
+               TMP = S1 / S2
+               SCL = SQRT( ONE+TMP*TMP )
+               SESTPR = S2*SCL
+               S = ( ALPHA / S2 ) / SCL
+               C = ( GAMMA / S2 ) / SCL
+            ELSE
+               TMP = S2 / S1
+               SCL = SQRT( ONE+TMP*TMP )
+               SESTPR = S1*SCL
+               S = ( ALPHA / S1 ) / SCL
+               C = ( GAMMA / S1 ) / SCL
+            END IF
+            RETURN
+         ELSE
+*
+*           normal case
+*
+            ZETA1 = ABSALP / ABSEST
+            ZETA2 = ABSGAM / ABSEST
+*
+            B = ( ONE-ZETA1*ZETA1-ZETA2*ZETA2 )*HALF
+            C = ZETA1*ZETA1
+            IF( B.GT.ZERO ) THEN
+               T = C / ( B+SQRT( B*B+C ) )
+            ELSE
+               T = SQRT( B*B+C ) - B
+            END IF
+*
+            SINE = -( ALPHA / ABSEST ) / T
+            COSINE = -( GAMMA / ABSEST ) / ( ONE+T )
+            TMP = SQRT( SINE*DCONJG( SINE )+COSINE*DCONJG( COSINE ) )
+            S = SINE / TMP
+            C = COSINE / TMP
+            SESTPR = SQRT( T+ONE )*ABSEST
+            RETURN
+         END IF
+*
+      ELSE IF( JOB.EQ.2 ) THEN
+*
+*        Estimating smallest singular value
+*
+*        special cases
+*
+         IF( SEST.EQ.ZERO ) THEN
+            SESTPR = ZERO
+            IF( MAX( ABSGAM, ABSALP ).EQ.ZERO ) THEN
+               SINE = ONE
+               COSINE = ZERO
+            ELSE
+               SINE = -DCONJG( GAMMA )
+               COSINE = DCONJG( ALPHA )
+            END IF
+            S1 = MAX( ABS( SINE ), ABS( COSINE ) )
+            S = SINE / S1
+            C = COSINE / S1
+            TMP = SQRT( S*DCONJG( S )+C*DCONJG( C ) )
+            S = S / TMP
+            C = C / TMP
+            RETURN
+         ELSE IF( ABSGAM.LE.EPS*ABSEST ) THEN
+            S = ZERO
+            C = ONE
+            SESTPR = ABSGAM
+            RETURN
+         ELSE IF( ABSALP.LE.EPS*ABSEST ) THEN
+            S1 = ABSGAM
+            S2 = ABSEST
+            IF( S1.LE.S2 ) THEN
+               S = ZERO
+               C = ONE
+               SESTPR = S1
+            ELSE
+               S = ONE
+               C = ZERO
+               SESTPR = S2
+            END IF
+            RETURN
+         ELSE IF( ABSEST.LE.EPS*ABSALP .OR. ABSEST.LE.EPS*ABSGAM ) THEN
+            S1 = ABSGAM
+            S2 = ABSALP
+            IF( S1.LE.S2 ) THEN
+               TMP = S1 / S2
+               SCL = SQRT( ONE+TMP*TMP )
+               SESTPR = ABSEST*( TMP / SCL )
+               S = -( DCONJG( GAMMA ) / S2 ) / SCL
+               C = ( DCONJG( ALPHA ) / S2 ) / SCL
+            ELSE
+               TMP = S2 / S1
+               SCL = SQRT( ONE+TMP*TMP )
+               SESTPR = ABSEST / SCL
+               S = -( DCONJG( GAMMA ) / S1 ) / SCL
+               C = ( DCONJG( ALPHA ) / S1 ) / SCL
+            END IF
+            RETURN
+         ELSE
+*
+*           normal case
+*
+            ZETA1 = ABSALP / ABSEST
+            ZETA2 = ABSGAM / ABSEST
+*
+            NORMA = MAX( ONE+ZETA1*ZETA1+ZETA1*ZETA2,
+     $              ZETA1*ZETA2+ZETA2*ZETA2 )
+*
+*           See if root is closer to zero or to ONE
+*
+            TEST = ONE + TWO*( ZETA1-ZETA2 )*( ZETA1+ZETA2 )
+            IF( TEST.GE.ZERO ) THEN
+*
+*              root is close to zero, compute directly
+*
+               B = ( ZETA1*ZETA1+ZETA2*ZETA2+ONE )*HALF
+               C = ZETA2*ZETA2
+               T = C / ( B+SQRT( ABS( B*B-C ) ) )
+               SINE = ( ALPHA / ABSEST ) / ( ONE-T )
+               COSINE = -( GAMMA / ABSEST ) / T
+               SESTPR = SQRT( T+FOUR*EPS*EPS*NORMA )*ABSEST
+            ELSE
+*
+*              root is closer to ONE, shift by that amount
+*
+               B = ( ZETA2*ZETA2+ZETA1*ZETA1-ONE )*HALF
+               C = ZETA1*ZETA1
+               IF( B.GE.ZERO ) THEN
+                  T = -C / ( B+SQRT( B*B+C ) )
+               ELSE
+                  T = B - SQRT( B*B+C )
+               END IF
+               SINE = -( ALPHA / ABSEST ) / T
+               COSINE = -( GAMMA / ABSEST ) / ( ONE+T )
+               SESTPR = SQRT( ONE+T+FOUR*EPS*EPS*NORMA )*ABSEST
+            END IF
+            TMP = SQRT( SINE*DCONJG( SINE )+COSINE*DCONJG( COSINE ) )
+            S = SINE / TMP
+            C = COSINE / TMP
+            RETURN
+*
+         END IF
+      END IF
+      RETURN
+*
+*     End of ZLAIC1
+*
+      END
diff --git a/costa/native/external/lapack/zlals0.f b/costa/native/external/lapack/zlals0.f
new file mode 100644
index 000000000..2abee26bb
--- /dev/null
+++ b/costa/native/external/lapack/zlals0.f
@@ -0,0 +1,434 @@
+      SUBROUTINE ZLALS0( ICOMPQ, NL, NR, SQRE, NRHS, B, LDB, BX, LDBX,
+     $                   PERM, GIVPTR, GIVCOL, LDGCOL, GIVNUM, LDGNUM,
+     $                   POLES, DIFL, DIFR, Z, K, C, S, RWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     December 1, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            GIVPTR, ICOMPQ, INFO, K, LDB, LDBX, LDGCOL,
+     $                   LDGNUM, NL, NR, NRHS, SQRE
+      DOUBLE PRECISION   C, S
+*     ..
+*     .. Array Arguments ..
+      INTEGER            GIVCOL( LDGCOL, * ), PERM( * )
+      DOUBLE PRECISION   DIFL( * ), DIFR( LDGNUM, * ),
+     $                   GIVNUM( LDGNUM, * ), POLES( LDGNUM, * ),
+     $                   RWORK( * ), Z( * )
+      COMPLEX*16         B( LDB, * ), BX( LDBX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZLALS0 applies back the multiplying factors of either the left or the
+*  right singular vector matrix of a diagonal matrix appended by a row
+*  to the right hand side matrix B in solving the least squares problem
+*  using the divide-and-conquer SVD approach.
+*
+*  For the left singular vector matrix, three types of orthogonal
+*  matrices are involved:
+*
+*  (1L) Givens rotations: the number of such rotations is GIVPTR; the
+*       pairs of columns/rows they were applied to are stored in GIVCOL;
+*       and the C- and S-values of these rotations are stored in GIVNUM.
+*
+*  (2L) Permutation. The (NL+1)-st row of B is to be moved to the first
+*       row, and for J=2:N, PERM(J)-th row of B is to be moved to the
+*       J-th row.
+*
+*  (3L) The left singular vector matrix of the remaining matrix.
+*
+*  For the right singular vector matrix, four types of orthogonal
+*  matrices are involved:
+*
+*  (1R) The right singular vector matrix of the remaining matrix.
+*
+*  (2R) If SQRE = 1, one extra Givens rotation to generate the right
+*       null space.
+*
+*  (3R) The inverse transformation of (2L).
+*
+*  (4R) The inverse transformation of (1L).
+*
+*  Arguments
+*  =========
+*
+*  ICOMPQ (input) INTEGER
+*         Specifies whether singular vectors are to be computed in
+*         factored form:
+*         = 0: Left singular vector matrix.
+*         = 1: Right singular vector matrix.
+*
+*  NL     (input) INTEGER
+*         The row dimension of the upper block. NL >= 1.
+*
+*  NR     (input) INTEGER
+*         The row dimension of the lower block. NR >= 1.
+*
+*  SQRE   (input) INTEGER
+*         = 0: the lower block is an NR-by-NR square matrix.
+*         = 1: the lower block is an NR-by-(NR+1) rectangular matrix.
+*
+*         The bidiagonal matrix has row dimension N = NL + NR + 1,
+*         and column dimension M = N + SQRE.
+*
+*  NRHS   (input) INTEGER
+*         The number of columns of B and BX. NRHS must be at least 1.
+*
+*  B      (input/output) COMPLEX*16 array, dimension ( LDB, NRHS )
+*         On input, B contains the right hand sides of the least
+*         squares problem in rows 1 through M. On output, B contains
+*         the solution X in rows 1 through N.
+*
+*  LDB    (input) INTEGER
+*         The leading dimension of B. LDB must be at least
+*         max(1,MAX( M, N ) ).
+*
+*  BX     (workspace) COMPLEX*16 array, dimension ( LDBX, NRHS )
+*
+*  LDBX   (input) INTEGER
+*         The leading dimension of BX.
+*
+*  PERM   (input) INTEGER array, dimension ( N )
+*         The permutations (from deflation and sorting) applied
+*         to the two blocks.
+*
+*  GIVPTR (input) INTEGER
+*         The number of Givens rotations which took place in this
+*         subproblem.
+*
+*  GIVCOL (input) INTEGER array, dimension ( LDGCOL, 2 )
+*         Each pair of numbers indicates a pair of rows/columns
+*         involved in a Givens rotation.
+*
+*  LDGCOL (input) INTEGER
+*         The leading dimension of GIVCOL, must be at least N.
+*
+*  GIVNUM (input) DOUBLE PRECISION array, dimension ( LDGNUM, 2 )
+*         Each number indicates the C or S value used in the
+*         corresponding Givens rotation.
+*
+*  LDGNUM (input) INTEGER
+*         The leading dimension of arrays DIFR, POLES and
+*         GIVNUM, must be at least K.
+*
+*  POLES  (input) DOUBLE PRECISION array, dimension ( LDGNUM, 2 )
+*         On entry, POLES(1:K, 1) contains the new singular
+*         values obtained from solving the secular equation, and
+*         POLES(1:K, 2) is an array containing the poles in the secular
+*         equation.
+*
+*  DIFL   (input) DOUBLE PRECISION array, dimension ( K ).
+*         On entry, DIFL(I) is the distance between I-th updated
+*         (undeflated) singular value and the I-th (undeflated) old
+*         singular value.
+*
+*  DIFR   (input) DOUBLE PRECISION array, dimension ( LDGNUM, 2 ).
+*         On entry, DIFR(I, 1) contains the distances between I-th
+*         updated (undeflated) singular value and the I+1-th
+*         (undeflated) old singular value. And DIFR(I, 2) is the
+*         normalizing factor for the I-th right singular vector.
+*
+*  Z      (input) DOUBLE PRECISION array, dimension ( K )
+*         Contain the components of the deflation-adjusted updating row
+*         vector.
+*
+*  K      (input) INTEGER
+*         Contains the dimension of the non-deflated matrix,
+*         This is the order of the related secular equation. 1 <= K <=N.
+*
+*  C      (input) DOUBLE PRECISION
+*         C contains garbage if SQRE =0 and the C-value of a Givens
+*         rotation related to the right null space if SQRE = 1.
+*
+*  S      (input) DOUBLE PRECISION
+*         S contains garbage if SQRE =0 and the S-value of a Givens
+*         rotation related to the right null space if SQRE = 1.
+*
+*  RWORK  (workspace) DOUBLE PRECISION array, dimension
+*         ( K*(1+NRHS) + 2*NRHS )
+*
+*  INFO   (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Ming Gu and Ren-Cang Li, Computer Science Division, University of
+*       California at Berkeley, USA
+*     Osni Marques, LBNL/NERSC, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO, NEGONE
+      PARAMETER          ( ONE = 1.0D0, ZERO = 0.0D0, NEGONE = -1.0D0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J, JCOL, JROW, M, N, NLP1
+      DOUBLE PRECISION   DIFLJ, DIFRJ, DJ, DSIGJ, DSIGJP, TEMP
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DGEMV, XERBLA, ZCOPY, ZDROT, ZDSCAL, ZLACPY,
+     $                   ZLASCL
+*     ..
+*     .. External Functions ..
+      DOUBLE PRECISION   DLAMC3, DNRM2
+      EXTERNAL           DLAMC3, DNRM2
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          DBLE, DCMPLX, DIMAG
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+*
+      IF( ( ICOMPQ.LT.0 ) .OR. ( ICOMPQ.GT.1 ) ) THEN
+         INFO = -1
+      ELSE IF( NL.LT.1 ) THEN
+         INFO = -2
+      ELSE IF( NR.LT.1 ) THEN
+         INFO = -3
+      ELSE IF( ( SQRE.LT.0 ) .OR. ( SQRE.GT.1 ) ) THEN
+         INFO = -4
+      END IF
+*
+      N = NL + NR + 1
+*
+      IF( NRHS.LT.1 ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.N ) THEN
+         INFO = -7
+      ELSE IF( LDBX.LT.N ) THEN
+         INFO = -9
+      ELSE IF( GIVPTR.LT.0 ) THEN
+         INFO = -11
+      ELSE IF( LDGCOL.LT.N ) THEN
+         INFO = -13
+      ELSE IF( LDGNUM.LT.N ) THEN
+         INFO = -15
+      ELSE IF( K.LT.1 ) THEN
+         INFO = -20
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZLALS0', -INFO )
+         RETURN
+      END IF
+*
+      M = N + SQRE
+      NLP1 = NL + 1
+*
+      IF( ICOMPQ.EQ.0 ) THEN
+*
+*        Apply back orthogonal transformations from the left.
+*
+*        Step (1L): apply back the Givens rotations performed.
+*
+         DO 10 I = 1, GIVPTR
+            CALL ZDROT( NRHS, B( GIVCOL( I, 2 ), 1 ), LDB,
+     $                  B( GIVCOL( I, 1 ), 1 ), LDB, GIVNUM( I, 2 ),
+     $                  GIVNUM( I, 1 ) )
+   10    CONTINUE
+*
+*        Step (2L): permute rows of B.
+*
+         CALL ZCOPY( NRHS, B( NLP1, 1 ), LDB, BX( 1, 1 ), LDBX )
+         DO 20 I = 2, N
+            CALL ZCOPY( NRHS, B( PERM( I ), 1 ), LDB, BX( I, 1 ), LDBX )
+   20    CONTINUE
+*
+*        Step (3L): apply the inverse of the left singular vector
+*        matrix to BX.
+*
+         IF( K.EQ.1 ) THEN
+            CALL ZCOPY( NRHS, BX, LDBX, B, LDB )
+            IF( Z( 1 ).LT.ZERO ) THEN
+               CALL ZDSCAL( NRHS, NEGONE, B, LDB )
+            END IF
+         ELSE
+            DO 100 J = 1, K
+               DIFLJ = DIFL( J )
+               DJ = POLES( J, 1 )
+               DSIGJ = -POLES( J, 2 )
+               IF( J.LT.K ) THEN
+                  DIFRJ = -DIFR( J, 1 )
+                  DSIGJP = -POLES( J+1, 2 )
+               END IF
+               IF( ( Z( J ).EQ.ZERO ) .OR. ( POLES( J, 2 ).EQ.ZERO ) )
+     $              THEN
+                  RWORK( J ) = ZERO
+               ELSE
+                  RWORK( J ) = -POLES( J, 2 )*Z( J ) / DIFLJ /
+     $                         ( POLES( J, 2 )+DJ )
+               END IF
+               DO 30 I = 1, J - 1
+                  IF( ( Z( I ).EQ.ZERO ) .OR.
+     $                ( POLES( I, 2 ).EQ.ZERO ) ) THEN
+                     RWORK( I ) = ZERO
+                  ELSE
+                     RWORK( I ) = POLES( I, 2 )*Z( I ) /
+     $                            ( DLAMC3( POLES( I, 2 ), DSIGJ )-
+     $                            DIFLJ ) / ( POLES( I, 2 )+DJ )
+                  END IF
+   30          CONTINUE
+               DO 40 I = J + 1, K
+                  IF( ( Z( I ).EQ.ZERO ) .OR.
+     $                ( POLES( I, 2 ).EQ.ZERO ) ) THEN
+                     RWORK( I ) = ZERO
+                  ELSE
+                     RWORK( I ) = POLES( I, 2 )*Z( I ) /
+     $                            ( DLAMC3( POLES( I, 2 ), DSIGJP )+
+     $                            DIFRJ ) / ( POLES( I, 2 )+DJ )
+                  END IF
+   40          CONTINUE
+               RWORK( 1 ) = NEGONE
+               TEMP = DNRM2( K, RWORK, 1 )
+*
+*              Since B and BX are complex, the following call to DGEMV
+*              is performed in two steps (real and imaginary parts).
+*
+*              CALL DGEMV( 'T', K, NRHS, ONE, BX, LDBX, WORK, 1, ZERO,
+*    $                     B( J, 1 ), LDB )
+*
+               I = K + NRHS*2
+               DO 60 JCOL = 1, NRHS
+                  DO 50 JROW = 1, K
+                     I = I + 1
+                     RWORK( I ) = DBLE( BX( JROW, JCOL ) )
+   50             CONTINUE
+   60          CONTINUE
+               CALL DGEMV( 'T', K, NRHS, ONE, RWORK( 1+K+NRHS*2 ), K,
+     $                     RWORK( 1 ), 1, ZERO, RWORK( 1+K ), 1 )
+               I = K + NRHS*2
+               DO 80 JCOL = 1, NRHS
+                  DO 70 JROW = 1, K
+                     I = I + 1
+                     RWORK( I ) = DIMAG( BX( JROW, JCOL ) )
+   70             CONTINUE
+   80          CONTINUE
+               CALL DGEMV( 'T', K, NRHS, ONE, RWORK( 1+K+NRHS*2 ), K,
+     $                     RWORK( 1 ), 1, ZERO, RWORK( 1+K+NRHS ), 1 )
+               DO 90 JCOL = 1, NRHS
+                  B( J, JCOL ) = DCMPLX( RWORK( JCOL+K ),
+     $                           RWORK( JCOL+K+NRHS ) )
+   90          CONTINUE
+               CALL ZLASCL( 'G', 0, 0, TEMP, ONE, 1, NRHS, B( J, 1 ),
+     $                      LDB, INFO )
+  100       CONTINUE
+         END IF
+*
+*        Move the deflated rows of BX to B also.
+*
+         IF( K.LT.MAX( M, N ) )
+     $      CALL ZLACPY( 'A', N-K, NRHS, BX( K+1, 1 ), LDBX,
+     $                   B( K+1, 1 ), LDB )
+      ELSE
+*
+*        Apply back the right orthogonal transformations.
+*
+*        Step (1R): apply back the new right singular vector matrix
+*        to B.
+*
+         IF( K.EQ.1 ) THEN
+            CALL ZCOPY( NRHS, B, LDB, BX, LDBX )
+         ELSE
+            DO 180 J = 1, K
+               DSIGJ = POLES( J, 2 )
+               IF( Z( J ).EQ.ZERO ) THEN
+                  RWORK( J ) = ZERO
+               ELSE
+                  RWORK( J ) = -Z( J ) / DIFL( J ) /
+     $                         ( DSIGJ+POLES( J, 1 ) ) / DIFR( J, 2 )
+               END IF
+               DO 110 I = 1, J - 1
+                  IF( Z( J ).EQ.ZERO ) THEN
+                     RWORK( I ) = ZERO
+                  ELSE
+                     RWORK( I ) = Z( J ) / ( DLAMC3( DSIGJ, -POLES( I+1,
+     $                            2 ) )-DIFR( I, 1 ) ) /
+     $                            ( DSIGJ+POLES( I, 1 ) ) / DIFR( I, 2 )
+                  END IF
+  110          CONTINUE
+               DO 120 I = J + 1, K
+                  IF( Z( J ).EQ.ZERO ) THEN
+                     RWORK( I ) = ZERO
+                  ELSE
+                     RWORK( I ) = Z( J ) / ( DLAMC3( DSIGJ, -POLES( I,
+     $                            2 ) )-DIFL( I ) ) /
+     $                            ( DSIGJ+POLES( I, 1 ) ) / DIFR( I, 2 )
+                  END IF
+  120          CONTINUE
+*
+*              Since B and BX are complex, the following call to DGEMV
+*              is performed in two steps (real and imaginary parts).
+*
+*              CALL DGEMV( 'T', K, NRHS, ONE, B, LDB, WORK, 1, ZERO,
+*    $                     BX( J, 1 ), LDBX )
+*
+               I = K + NRHS*2
+               DO 140 JCOL = 1, NRHS
+                  DO 130 JROW = 1, K
+                     I = I + 1
+                     RWORK( I ) = DBLE( B( JROW, JCOL ) )
+  130             CONTINUE
+  140          CONTINUE
+               CALL DGEMV( 'T', K, NRHS, ONE, RWORK( 1+K+NRHS*2 ), K,
+     $                     RWORK( 1 ), 1, ZERO, RWORK( 1+K ), 1 )
+               I = K + NRHS*2
+               DO 160 JCOL = 1, NRHS
+                  DO 150 JROW = 1, K
+                     I = I + 1
+                     RWORK( I ) = DIMAG( B( JROW, JCOL ) )
+  150             CONTINUE
+  160          CONTINUE
+               CALL DGEMV( 'T', K, NRHS, ONE, RWORK( 1+K+NRHS*2 ), K,
+     $                     RWORK( 1 ), 1, ZERO, RWORK( 1+K+NRHS ), 1 )
+               DO 170 JCOL = 1, NRHS
+                  BX( J, JCOL ) = DCMPLX( RWORK( JCOL+K ),
+     $                            RWORK( JCOL+K+NRHS ) )
+  170          CONTINUE
+  180       CONTINUE
+         END IF
+*
+*        Step (2R): if SQRE = 1, apply back the rotation that is
+*        related to the right null space of the subproblem.
+*
+         IF( SQRE.EQ.1 ) THEN
+            CALL ZCOPY( NRHS, B( M, 1 ), LDB, BX( M, 1 ), LDBX )
+            CALL ZDROT( NRHS, BX( 1, 1 ), LDBX, BX( M, 1 ), LDBX, C, S )
+         END IF
+         IF( K.LT.MAX( M, N ) )
+     $      CALL ZLACPY( 'A', N-K, NRHS, B( K+1, 1 ), LDB, BX( K+1, 1 ),
+     $                   LDBX )
+*
+*        Step (3R): permute rows of B.
+*
+         CALL ZCOPY( NRHS, BX( 1, 1 ), LDBX, B( NLP1, 1 ), LDB )
+         IF( SQRE.EQ.1 ) THEN
+            CALL ZCOPY( NRHS, BX( M, 1 ), LDBX, B( M, 1 ), LDB )
+         END IF
+         DO 190 I = 2, N
+            CALL ZCOPY( NRHS, BX( I, 1 ), LDBX, B( PERM( I ), 1 ), LDB )
+  190    CONTINUE
+*
+*        Step (4R): apply back the Givens rotations performed.
+*
+         DO 200 I = GIVPTR, 1, -1
+            CALL ZDROT( NRHS, B( GIVCOL( I, 2 ), 1 ), LDB,
+     $                  B( GIVCOL( I, 1 ), 1 ), LDB, GIVNUM( I, 2 ),
+     $                  -GIVNUM( I, 1 ) )
+  200    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of ZLALS0
+*
+      END
diff --git a/costa/native/external/lapack/zlalsa.f b/costa/native/external/lapack/zlalsa.f
new file mode 100644
index 000000000..79d796da4
--- /dev/null
+++ b/costa/native/external/lapack/zlalsa.f
@@ -0,0 +1,504 @@
+      SUBROUTINE ZLALSA( ICOMPQ, SMLSIZ, N, NRHS, B, LDB, BX, LDBX, U,
+     $                   LDU, VT, K, DIFL, DIFR, Z, POLES, GIVPTR,
+     $                   GIVCOL, LDGCOL, PERM, GIVNUM, C, S, RWORK,
+     $                   IWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            ICOMPQ, INFO, LDB, LDBX, LDGCOL, LDU, N, NRHS,
+     $                   SMLSIZ
+*     ..
+*     .. Array Arguments ..
+      INTEGER            GIVCOL( LDGCOL, * ), GIVPTR( * ), IWORK( * ),
+     $                   K( * ), PERM( LDGCOL, * )
+      DOUBLE PRECISION   C( * ), DIFL( LDU, * ), DIFR( LDU, * ),
+     $                   GIVNUM( LDU, * ), POLES( LDU, * ), RWORK( * ),
+     $                   S( * ), U( LDU, * ), VT( LDU, * ), Z( LDU, * )
+      COMPLEX*16         B( LDB, * ), BX( LDBX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZLALSA is an itermediate step in solving the least squares problem
+*  by computing the SVD of the coefficient matrix in compact form (The
+*  singular vectors are computed as products of simple orthorgonal
+*  matrices.).
+*
+*  If ICOMPQ = 0, ZLALSA applies the inverse of the left singular vector
+*  matrix of an upper bidiagonal matrix to the right hand side; and if
+*  ICOMPQ = 1, ZLALSA applies the right singular vector matrix to the
+*  right hand side. The singular vector matrices were generated in
+*  compact form by ZLALSA.
+*
+*  Arguments
+*  =========
+*
+*  ICOMPQ (input) INTEGER
+*         Specifies whether the left or the right singular vector
+*         matrix is involved.
+*         = 0: Left singular vector matrix
+*         = 1: Right singular vector matrix
+*
+*  SMLSIZ (input) INTEGER
+*         The maximum size of the subproblems at the bottom of the
+*         computation tree.
+*
+*  N      (input) INTEGER
+*         The row and column dimensions of the upper bidiagonal matrix.
+*
+*  NRHS   (input) INTEGER
+*         The number of columns of B and BX. NRHS must be at least 1.
+*
+*  B      (input) COMPLEX*16 array, dimension ( LDB, NRHS )
+*         On input, B contains the right hand sides of the least
+*         squares problem in rows 1 through M. On output, B contains
+*         the solution X in rows 1 through N.
+*
+*  LDB    (input) INTEGER
+*         The leading dimension of B in the calling subprogram.
+*         LDB must be at least max(1,MAX( M, N ) ).
+*
+*  BX     (output) COMPLEX*16 array, dimension ( LDBX, NRHS )
+*         On exit, the result of applying the left or right singular
+*         vector matrix to B.
+*
+*  LDBX   (input) INTEGER
+*         The leading dimension of BX.
+*
+*  U      (input) DOUBLE PRECISION array, dimension ( LDU, SMLSIZ ).
+*         On entry, U contains the left singular vector matrices of all
+*         subproblems at the bottom level.
+*
+*  LDU    (input) INTEGER, LDU = > N.
+*         The leading dimension of arrays U, VT, DIFL, DIFR,
+*         POLES, GIVNUM, and Z.
+*
+*  VT     (input) DOUBLE PRECISION array, dimension ( LDU, SMLSIZ+1 ).
+*         On entry, VT' contains the right singular vector matrices of
+*         all subproblems at the bottom level.
+*
+*  K      (input) INTEGER array, dimension ( N ).
+*
+*  DIFL   (input) DOUBLE PRECISION array, dimension ( LDU, NLVL ).
+*         where NLVL = INT(log_2 (N/(SMLSIZ+1))) + 1.
+*
+*  DIFR   (input) DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ).
+*         On entry, DIFL(*, I) and DIFR(*, 2 * I -1) record
+*         distances between singular values on the I-th level and
+*         singular values on the (I -1)-th level, and DIFR(*, 2 * I)
+*         record the normalizing factors of the right singular vectors
+*         matrices of subproblems on I-th level.
+*
+*  Z      (input) DOUBLE PRECISION array, dimension ( LDU, NLVL ).
+*         On entry, Z(1, I) contains the components of the deflation-
+*         adjusted updating row vector for subproblems on the I-th
+*         level.
+*
+*  POLES  (input) DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ).
+*         On entry, POLES(*, 2 * I -1: 2 * I) contains the new and old
+*         singular values involved in the secular equations on the I-th
+*         level.
+*
+*  GIVPTR (input) INTEGER array, dimension ( N ).
+*         On entry, GIVPTR( I ) records the number of Givens
+*         rotations performed on the I-th problem on the computation
+*         tree.
+*
+*  GIVCOL (input) INTEGER array, dimension ( LDGCOL, 2 * NLVL ).
+*         On entry, for each I, GIVCOL(*, 2 * I - 1: 2 * I) records the
+*         locations of Givens rotations performed on the I-th level on
+*         the computation tree.
+*
+*  LDGCOL (input) INTEGER, LDGCOL = > N.
+*         The leading dimension of arrays GIVCOL and PERM.
+*
+*  PERM   (input) INTEGER array, dimension ( LDGCOL, NLVL ).
+*         On entry, PERM(*, I) records permutations done on the I-th
+*         level of the computation tree.
+*
+*  GIVNUM (input) DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ).
+*         On entry, GIVNUM(*, 2 *I -1 : 2 * I) records the C- and S-
+*         values of Givens rotations performed on the I-th level on the
+*         computation tree.
+*
+*  C      (input) DOUBLE PRECISION array, dimension ( N ).
+*         On entry, if the I-th subproblem is not square,
+*         C( I ) contains the C-value of a Givens rotation related to
+*         the right null space of the I-th subproblem.
+*
+*  S      (input) DOUBLE PRECISION array, dimension ( N ).
+*         On entry, if the I-th subproblem is not square,
+*         S( I ) contains the S-value of a Givens rotation related to
+*         the right null space of the I-th subproblem.
+*
+*  RWORK  (workspace) DOUBLE PRECISION array, dimension at least
+*         max ( N, (SMLSZ+1)*NRHS*3 ).
+*
+*  IWORK  (workspace) INTEGER array.
+*         The dimension must be at least 3 * N
+*
+*  INFO   (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Ming Gu and Ren-Cang Li, Computer Science Division, University of
+*       California at Berkeley, USA
+*     Osni Marques, LBNL/NERSC, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, I1, IC, IM1, INODE, J, JCOL, JIMAG, JREAL,
+     $                   JROW, LF, LL, LVL, LVL2, ND, NDB1, NDIML,
+     $                   NDIMR, NL, NLF, NLP1, NLVL, NR, NRF, NRP1, SQRE
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DGEMM, DLASDT, XERBLA, ZCOPY, ZLALS0
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          DBLE, DCMPLX, DIMAG
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+*
+      IF( ( ICOMPQ.LT.0 ) .OR. ( ICOMPQ.GT.1 ) ) THEN
+         INFO = -1
+      ELSE IF( SMLSIZ.LT.3 ) THEN
+         INFO = -2
+      ELSE IF( N.LT.SMLSIZ ) THEN
+         INFO = -3
+      ELSE IF( NRHS.LT.1 ) THEN
+         INFO = -4
+      ELSE IF( LDB.LT.N ) THEN
+         INFO = -6
+      ELSE IF( LDBX.LT.N ) THEN
+         INFO = -8
+      ELSE IF( LDU.LT.N ) THEN
+         INFO = -10
+      ELSE IF( LDGCOL.LT.N ) THEN
+         INFO = -19
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZLALSA', -INFO )
+         RETURN
+      END IF
+*
+*     Book-keeping and  setting up the computation tree.
+*
+      INODE = 1
+      NDIML = INODE + N
+      NDIMR = NDIML + N
+*
+      CALL DLASDT( N, NLVL, ND, IWORK( INODE ), IWORK( NDIML ),
+     $             IWORK( NDIMR ), SMLSIZ )
+*
+*     The following code applies back the left singular vector factors.
+*     For applying back the right singular vector factors, go to 170.
+*
+      IF( ICOMPQ.EQ.1 ) THEN
+         GO TO 170
+      END IF
+*
+*     The nodes on the bottom level of the tree were solved
+*     by DLASDQ. The corresponding left and right singular vector
+*     matrices are in explicit form. First apply back the left
+*     singular vector matrices.
+*
+      NDB1 = ( ND+1 ) / 2
+      DO 130 I = NDB1, ND
+*
+*        IC : center row of each node
+*        NL : number of rows of left  subproblem
+*        NR : number of rows of right subproblem
+*        NLF: starting row of the left   subproblem
+*        NRF: starting row of the right  subproblem
+*
+         I1 = I - 1
+         IC = IWORK( INODE+I1 )
+         NL = IWORK( NDIML+I1 )
+         NR = IWORK( NDIMR+I1 )
+         NLF = IC - NL
+         NRF = IC + 1
+*
+*        Since B and BX are complex, the following call to DGEMM
+*        is performed in two steps (real and imaginary parts).
+*
+*        CALL DGEMM( 'T', 'N', NL, NRHS, NL, ONE, U( NLF, 1 ), LDU,
+*     $               B( NLF, 1 ), LDB, ZERO, BX( NLF, 1 ), LDBX )
+*
+         J = NL*NRHS*2
+         DO 20 JCOL = 1, NRHS
+            DO 10 JROW = NLF, NLF + NL - 1
+               J = J + 1
+               RWORK( J ) = DBLE( B( JROW, JCOL ) )
+   10       CONTINUE
+   20    CONTINUE
+         CALL DGEMM( 'T', 'N', NL, NRHS, NL, ONE, U( NLF, 1 ), LDU,
+     $               RWORK( 1+NL*NRHS*2 ), NL, ZERO, RWORK( 1 ), NL )
+         J = NL*NRHS*2
+         DO 40 JCOL = 1, NRHS
+            DO 30 JROW = NLF, NLF + NL - 1
+               J = J + 1
+               RWORK( J ) = DIMAG( B( JROW, JCOL ) )
+   30       CONTINUE
+   40    CONTINUE
+         CALL DGEMM( 'T', 'N', NL, NRHS, NL, ONE, U( NLF, 1 ), LDU,
+     $               RWORK( 1+NL*NRHS*2 ), NL, ZERO, RWORK( 1+NL*NRHS ),
+     $               NL )
+         JREAL = 0
+         JIMAG = NL*NRHS
+         DO 60 JCOL = 1, NRHS
+            DO 50 JROW = NLF, NLF + NL - 1
+               JREAL = JREAL + 1
+               JIMAG = JIMAG + 1
+               BX( JROW, JCOL ) = DCMPLX( RWORK( JREAL ),
+     $                            RWORK( JIMAG ) )
+   50       CONTINUE
+   60    CONTINUE
+*
+*        Since B and BX are complex, the following call to DGEMM
+*        is performed in two steps (real and imaginary parts).
+*
+*        CALL DGEMM( 'T', 'N', NR, NRHS, NR, ONE, U( NRF, 1 ), LDU,
+*    $               B( NRF, 1 ), LDB, ZERO, BX( NRF, 1 ), LDBX )
+*
+         J = NR*NRHS*2
+         DO 80 JCOL = 1, NRHS
+            DO 70 JROW = NRF, NRF + NR - 1
+               J = J + 1
+               RWORK( J ) = DBLE( B( JROW, JCOL ) )
+   70       CONTINUE
+   80    CONTINUE
+         CALL DGEMM( 'T', 'N', NR, NRHS, NR, ONE, U( NRF, 1 ), LDU,
+     $               RWORK( 1+NR*NRHS*2 ), NR, ZERO, RWORK( 1 ), NR )
+         J = NR*NRHS*2
+         DO 100 JCOL = 1, NRHS
+            DO 90 JROW = NRF, NRF + NR - 1
+               J = J + 1
+               RWORK( J ) = DIMAG( B( JROW, JCOL ) )
+   90       CONTINUE
+  100    CONTINUE
+         CALL DGEMM( 'T', 'N', NR, NRHS, NR, ONE, U( NRF, 1 ), LDU,
+     $               RWORK( 1+NR*NRHS*2 ), NR, ZERO, RWORK( 1+NR*NRHS ),
+     $               NR )
+         JREAL = 0
+         JIMAG = NR*NRHS
+         DO 120 JCOL = 1, NRHS
+            DO 110 JROW = NRF, NRF + NR - 1
+               JREAL = JREAL + 1
+               JIMAG = JIMAG + 1
+               BX( JROW, JCOL ) = DCMPLX( RWORK( JREAL ),
+     $                            RWORK( JIMAG ) )
+  110       CONTINUE
+  120    CONTINUE
+*
+  130 CONTINUE
+*
+*     Next copy the rows of B that correspond to unchanged rows
+*     in the bidiagonal matrix to BX.
+*
+      DO 140 I = 1, ND
+         IC = IWORK( INODE+I-1 )
+         CALL ZCOPY( NRHS, B( IC, 1 ), LDB, BX( IC, 1 ), LDBX )
+  140 CONTINUE
+*
+*     Finally go through the left singular vector matrices of all
+*     the other subproblems bottom-up on the tree.
+*
+      J = 2**NLVL
+      SQRE = 0
+*
+      DO 160 LVL = NLVL, 1, -1
+         LVL2 = 2*LVL - 1
+*
+*        find the first node LF and last node LL on
+*        the current level LVL
+*
+         IF( LVL.EQ.1 ) THEN
+            LF = 1
+            LL = 1
+         ELSE
+            LF = 2**( LVL-1 )
+            LL = 2*LF - 1
+         END IF
+         DO 150 I = LF, LL
+            IM1 = I - 1
+            IC = IWORK( INODE+IM1 )
+            NL = IWORK( NDIML+IM1 )
+            NR = IWORK( NDIMR+IM1 )
+            NLF = IC - NL
+            NRF = IC + 1
+            J = J - 1
+            CALL ZLALS0( ICOMPQ, NL, NR, SQRE, NRHS, BX( NLF, 1 ), LDBX,
+     $                   B( NLF, 1 ), LDB, PERM( NLF, LVL ),
+     $                   GIVPTR( J ), GIVCOL( NLF, LVL2 ), LDGCOL,
+     $                   GIVNUM( NLF, LVL2 ), LDU, POLES( NLF, LVL2 ),
+     $                   DIFL( NLF, LVL ), DIFR( NLF, LVL2 ),
+     $                   Z( NLF, LVL ), K( J ), C( J ), S( J ), RWORK,
+     $                   INFO )
+  150    CONTINUE
+  160 CONTINUE
+      GO TO 330
+*
+*     ICOMPQ = 1: applying back the right singular vector factors.
+*
+  170 CONTINUE
+*
+*     First now go through the right singular vector matrices of all
+*     the tree nodes top-down.
+*
+      J = 0
+      DO 190 LVL = 1, NLVL
+         LVL2 = 2*LVL - 1
+*
+*        Find the first node LF and last node LL on
+*        the current level LVL.
+*
+         IF( LVL.EQ.1 ) THEN
+            LF = 1
+            LL = 1
+         ELSE
+            LF = 2**( LVL-1 )
+            LL = 2*LF - 1
+         END IF
+         DO 180 I = LL, LF, -1
+            IM1 = I - 1
+            IC = IWORK( INODE+IM1 )
+            NL = IWORK( NDIML+IM1 )
+            NR = IWORK( NDIMR+IM1 )
+            NLF = IC - NL
+            NRF = IC + 1
+            IF( I.EQ.LL ) THEN
+               SQRE = 0
+            ELSE
+               SQRE = 1
+            END IF
+            J = J + 1
+            CALL ZLALS0( ICOMPQ, NL, NR, SQRE, NRHS, B( NLF, 1 ), LDB,
+     $                   BX( NLF, 1 ), LDBX, PERM( NLF, LVL ),
+     $                   GIVPTR( J ), GIVCOL( NLF, LVL2 ), LDGCOL,
+     $                   GIVNUM( NLF, LVL2 ), LDU, POLES( NLF, LVL2 ),
+     $                   DIFL( NLF, LVL ), DIFR( NLF, LVL2 ),
+     $                   Z( NLF, LVL ), K( J ), C( J ), S( J ), RWORK,
+     $                   INFO )
+  180    CONTINUE
+  190 CONTINUE
+*
+*     The nodes on the bottom level of the tree were solved
+*     by DLASDQ. The corresponding right singular vector
+*     matrices are in explicit form. Apply them back.
+*
+      NDB1 = ( ND+1 ) / 2
+      DO 320 I = NDB1, ND
+         I1 = I - 1
+         IC = IWORK( INODE+I1 )
+         NL = IWORK( NDIML+I1 )
+         NR = IWORK( NDIMR+I1 )
+         NLP1 = NL + 1
+         IF( I.EQ.ND ) THEN
+            NRP1 = NR
+         ELSE
+            NRP1 = NR + 1
+         END IF
+         NLF = IC - NL
+         NRF = IC + 1
+*
+*        Since B and BX are complex, the following call to DGEMM is
+*        performed in two steps (real and imaginary parts).
+*
+*        CALL DGEMM( 'T', 'N', NLP1, NRHS, NLP1, ONE, VT( NLF, 1 ), LDU,
+*    $               B( NLF, 1 ), LDB, ZERO, BX( NLF, 1 ), LDBX )
+*
+         J = NLP1*NRHS*2
+         DO 210 JCOL = 1, NRHS
+            DO 200 JROW = NLF, NLF + NLP1 - 1
+               J = J + 1
+               RWORK( J ) = DBLE( B( JROW, JCOL ) )
+  200       CONTINUE
+  210    CONTINUE
+         CALL DGEMM( 'T', 'N', NLP1, NRHS, NLP1, ONE, VT( NLF, 1 ), LDU,
+     $               RWORK( 1+NLP1*NRHS*2 ), NLP1, ZERO, RWORK( 1 ),
+     $               NLP1 )
+         J = NLP1*NRHS*2
+         DO 230 JCOL = 1, NRHS
+            DO 220 JROW = NLF, NLF + NLP1 - 1
+               J = J + 1
+               RWORK( J ) = DIMAG( B( JROW, JCOL ) )
+  220       CONTINUE
+  230    CONTINUE
+         CALL DGEMM( 'T', 'N', NLP1, NRHS, NLP1, ONE, VT( NLF, 1 ), LDU,
+     $               RWORK( 1+NLP1*NRHS*2 ), NLP1, ZERO,
+     $               RWORK( 1+NLP1*NRHS ), NLP1 )
+         JREAL = 0
+         JIMAG = NLP1*NRHS
+         DO 250 JCOL = 1, NRHS
+            DO 240 JROW = NLF, NLF + NLP1 - 1
+               JREAL = JREAL + 1
+               JIMAG = JIMAG + 1
+               BX( JROW, JCOL ) = DCMPLX( RWORK( JREAL ),
+     $                            RWORK( JIMAG ) )
+  240       CONTINUE
+  250    CONTINUE
+*
+*        Since B and BX are complex, the following call to DGEMM is
+*        performed in two steps (real and imaginary parts).
+*
+*        CALL DGEMM( 'T', 'N', NRP1, NRHS, NRP1, ONE, VT( NRF, 1 ), LDU,
+*    $               B( NRF, 1 ), LDB, ZERO, BX( NRF, 1 ), LDBX )
+*
+         J = NRP1*NRHS*2
+         DO 270 JCOL = 1, NRHS
+            DO 260 JROW = NRF, NRF + NRP1 - 1
+               J = J + 1
+               RWORK( J ) = DBLE( B( JROW, JCOL ) )
+  260       CONTINUE
+  270    CONTINUE
+         CALL DGEMM( 'T', 'N', NRP1, NRHS, NRP1, ONE, VT( NRF, 1 ), LDU,
+     $               RWORK( 1+NRP1*NRHS*2 ), NRP1, ZERO, RWORK( 1 ),
+     $               NRP1 )
+         J = NRP1*NRHS*2
+         DO 290 JCOL = 1, NRHS
+            DO 280 JROW = NRF, NRF + NRP1 - 1
+               J = J + 1
+               RWORK( J ) = DIMAG( B( JROW, JCOL ) )
+  280       CONTINUE
+  290    CONTINUE
+         CALL DGEMM( 'T', 'N', NRP1, NRHS, NRP1, ONE, VT( NRF, 1 ), LDU,
+     $               RWORK( 1+NRP1*NRHS*2 ), NRP1, ZERO,
+     $               RWORK( 1+NRP1*NRHS ), NRP1 )
+         JREAL = 0
+         JIMAG = NRP1*NRHS
+         DO 310 JCOL = 1, NRHS
+            DO 300 JROW = NRF, NRF + NRP1 - 1
+               JREAL = JREAL + 1
+               JIMAG = JIMAG + 1
+               BX( JROW, JCOL ) = DCMPLX( RWORK( JREAL ),
+     $                            RWORK( JIMAG ) )
+  300       CONTINUE
+  310    CONTINUE
+*
+  320 CONTINUE
+*
+  330 CONTINUE
+*
+      RETURN
+*
+*     End of ZLALSA
+*
+      END
diff --git a/costa/native/external/lapack/zlalsd.f b/costa/native/external/lapack/zlalsd.f
new file mode 100644
index 000000000..da700d100
--- /dev/null
+++ b/costa/native/external/lapack/zlalsd.f
@@ -0,0 +1,599 @@
+      SUBROUTINE ZLALSD( UPLO, SMLSIZ, N, NRHS, D, E, B, LDB, RCOND,
+     $                   RANK, WORK, RWORK, IWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDB, N, NRHS, RANK, SMLSIZ
+      DOUBLE PRECISION   RCOND
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      DOUBLE PRECISION   D( * ), E( * ), RWORK( * )
+      COMPLEX*16         B( LDB, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZLALSD uses the singular value decomposition of A to solve the least
+*  squares problem of finding X to minimize the Euclidean norm of each
+*  column of A*X-B, where A is N-by-N upper bidiagonal, and X and B
+*  are N-by-NRHS. The solution X overwrites B.
+*
+*  The singular values of A smaller than RCOND times the largest
+*  singular value are treated as zero in solving the least squares
+*  problem; in this case a minimum norm solution is returned.
+*  The actual singular values are returned in D in ascending order.
+*
+*  This code makes very mild assumptions about floating point
+*  arithmetic. It will work on machines with a guard digit in
+*  add/subtract, or on those binary machines without guard digits
+*  which subtract like the Cray XMP, Cray YMP, Cray C 90, or Cray 2.
+*  It could conceivably fail on hexadecimal or decimal machines
+*  without guard digits, but we know of none.
+*
+*  Arguments
+*  =========
+*
+*  UPLO   (input) CHARACTER*1
+*         = 'U': D and E define an upper bidiagonal matrix.
+*         = 'L': D and E define a  lower bidiagonal matrix.
+*
+*  SMLSIZ (input) INTEGER
+*         The maximum size of the subproblems at the bottom of the
+*         computation tree.
+*
+*  N      (input) INTEGER
+*         The dimension of the  bidiagonal matrix.  N >= 0.
+*
+*  NRHS   (input) INTEGER
+*         The number of columns of B. NRHS must be at least 1.
+*
+*  D      (input/output) DOUBLE PRECISION array, dimension (N)
+*         On entry D contains the main diagonal of the bidiagonal
+*         matrix. On exit, if INFO = 0, D contains its singular values.
+*
+*  E      (input) DOUBLE PRECISION array, dimension (N-1)
+*         Contains the super-diagonal entries of the bidiagonal matrix.
+*         On exit, E has been destroyed.
+*
+*  B      (input/output) COMPLEX*16 array, dimension (LDB,NRHS)
+*         On input, B contains the right hand sides of the least
+*         squares problem. On output, B contains the solution X.
+*
+*  LDB    (input) INTEGER
+*         The leading dimension of B in the calling subprogram.
+*         LDB must be at least max(1,N).
+*
+*  RCOND  (input) DOUBLE PRECISION
+*         The singular values of A less than or equal to RCOND times
+*         the largest singular value are treated as zero in solving
+*         the least squares problem. If RCOND is negative,
+*         machine precision is used instead.
+*         For example, if diag(S)*X=B were the least squares problem,
+*         where diag(S) is a diagonal matrix of singular values, the
+*         solution would be X(i) = B(i) / S(i) if S(i) is greater than
+*         RCOND*max(S), and X(i) = 0 if S(i) is less than or equal to
+*         RCOND*max(S).
+*
+*  RANK   (output) INTEGER
+*         The number of singular values of A greater than RCOND times
+*         the largest singular value.
+*
+*  WORK   (workspace) COMPLEX*16 array, dimension at least
+*         (N * NRHS).
+*
+*  RWORK  (workspace) DOUBLE PRECISION array, dimension at least
+*         (9*N + 2*N*SMLSIZ + 8*N*NLVL + 3*SMLSIZ*NRHS + (SMLSIZ+1)**2),
+*         where
+*         NLVL = MAX( 0, INT( LOG_2( MIN( M,N )/(SMLSIZ+1) ) ) + 1 )
+*
+*  IWORK  (workspace) INTEGER array, dimension at least
+*         (3*N*NLVL + 11*N).
+*
+*  INFO   (output) INTEGER
+*         = 0:  successful exit.
+*         < 0:  if INFO = -i, the i-th argument had an illegal value.
+*         > 0:  The algorithm failed to compute an singular value while
+*               working on the submatrix lying in rows and columns
+*               INFO/(N+1) through MOD(INFO,N+1).
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Ming Gu and Ren-Cang Li, Computer Science Division, University of
+*       California at Berkeley, USA
+*     Osni Marques, LBNL/NERSC, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE, TWO
+      PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0, TWO = 2.0D0 )
+      COMPLEX*16         CZERO
+      PARAMETER          ( CZERO = ( 0.0D0, 0.0D0 ) )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            BX, BXST, C, DIFL, DIFR, GIVCOL, GIVNUM,
+     $                   GIVPTR, I, ICMPQ1, ICMPQ2, IRWB, IRWIB, IRWRB,
+     $                   IRWU, IRWVT, IRWWRK, IWK, J, JCOL, JIMAG,
+     $                   JREAL, JROW, K, NLVL, NM1, NRWORK, NSIZE, NSUB,
+     $                   PERM, POLES, S, SIZEI, SMLSZP, SQRE, ST, ST1,
+     $                   U, VT, Z
+      DOUBLE PRECISION   CS, EPS, ORGNRM, R, SN, TOL
+*     ..
+*     .. External Functions ..
+      INTEGER            IDAMAX
+      DOUBLE PRECISION   DLAMCH, DLANST
+      EXTERNAL           IDAMAX, DLAMCH, DLANST
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DGEMM, DLARTG, DLASCL, DLASDA, DLASDQ, DLASET,
+     $                   DLASRT, XERBLA, ZCOPY, ZDROT, ZLACPY, ZLALSA,
+     $                   ZLASCL, ZLASET
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, DCMPLX, DIMAG, INT, LOG, SIGN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+*
+      IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( NRHS.LT.1 ) THEN
+         INFO = -4
+      ELSE IF( ( LDB.LT.1 ) .OR. ( LDB.LT.N ) ) THEN
+         INFO = -8
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZLALSD', -INFO )
+         RETURN
+      END IF
+*
+      EPS = DLAMCH( 'Epsilon' )
+*
+*     Set up the tolerance.
+*
+      IF( ( RCOND.LE.ZERO ) .OR. ( RCOND.GE.ONE ) ) THEN
+         RCOND = EPS
+      END IF
+*
+      RANK = 0
+*
+*     Quick return if possible.
+*
+      IF( N.EQ.0 ) THEN
+         RETURN
+      ELSE IF( N.EQ.1 ) THEN
+         IF( D( 1 ).EQ.ZERO ) THEN
+            CALL ZLASET( 'A', 1, NRHS, CZERO, CZERO, B, LDB )
+         ELSE
+            RANK = 1
+            CALL ZLASCL( 'G', 0, 0, D( 1 ), ONE, 1, NRHS, B, LDB, INFO )
+            D( 1 ) = ABS( D( 1 ) )
+         END IF
+         RETURN
+      END IF
+*
+*     Rotate the matrix if it is lower bidiagonal.
+*
+      IF( UPLO.EQ.'L' ) THEN
+         DO 10 I = 1, N - 1
+            CALL DLARTG( D( I ), E( I ), CS, SN, R )
+            D( I ) = R
+            E( I ) = SN*D( I+1 )
+            D( I+1 ) = CS*D( I+1 )
+            IF( NRHS.EQ.1 ) THEN
+               CALL ZDROT( 1, B( I, 1 ), 1, B( I+1, 1 ), 1, CS, SN )
+            ELSE
+               RWORK( I*2-1 ) = CS
+               RWORK( I*2 ) = SN
+            END IF
+   10    CONTINUE
+         IF( NRHS.GT.1 ) THEN
+            DO 30 I = 1, NRHS
+               DO 20 J = 1, N - 1
+                  CS = RWORK( J*2-1 )
+                  SN = RWORK( J*2 )
+                  CALL ZDROT( 1, B( J, I ), 1, B( J+1, I ), 1, CS, SN )
+   20          CONTINUE
+   30       CONTINUE
+         END IF
+      END IF
+*
+*     Scale.
+*
+      NM1 = N - 1
+      ORGNRM = DLANST( 'M', N, D, E )
+      IF( ORGNRM.EQ.ZERO ) THEN
+         CALL ZLASET( 'A', N, NRHS, CZERO, CZERO, B, LDB )
+         RETURN
+      END IF
+*
+      CALL DLASCL( 'G', 0, 0, ORGNRM, ONE, N, 1, D, N, INFO )
+      CALL DLASCL( 'G', 0, 0, ORGNRM, ONE, NM1, 1, E, NM1, INFO )
+*
+*     If N is smaller than the minimum divide size SMLSIZ, then solve
+*     the problem with another solver.
+*
+      IF( N.LE.SMLSIZ ) THEN
+         IRWU = 1
+         IRWVT = IRWU + N*N
+         IRWWRK = IRWVT + N*N
+         IRWRB = IRWWRK
+         IRWIB = IRWRB + N*NRHS
+         IRWB = IRWIB + N*NRHS
+         CALL DLASET( 'A', N, N, ZERO, ONE, RWORK( IRWU ), N )
+         CALL DLASET( 'A', N, N, ZERO, ONE, RWORK( IRWVT ), N )
+         CALL DLASDQ( 'U', 0, N, N, N, 0, D, E, RWORK( IRWVT ), N,
+     $                RWORK( IRWU ), N, RWORK( IRWWRK ), 1,
+     $                RWORK( IRWWRK ), INFO )
+         IF( INFO.NE.0 ) THEN
+            RETURN
+         END IF
+*
+*        In the real version, B is passed to DLASDQ and multiplied
+*        internally by Q'. Here B is complex and that product is
+*        computed below in two steps (real and imaginary parts).
+*
+         J = IRWB - 1
+         DO 50 JCOL = 1, NRHS
+            DO 40 JROW = 1, N
+               J = J + 1
+               RWORK( J ) = DBLE( B( JROW, JCOL ) )
+   40       CONTINUE
+   50    CONTINUE
+         CALL DGEMM( 'T', 'N', N, NRHS, N, ONE, RWORK( IRWU ), N,
+     $               RWORK( IRWB ), N, ZERO, RWORK( IRWRB ), N )
+         J = IRWB - 1
+         DO 70 JCOL = 1, NRHS
+            DO 60 JROW = 1, N
+               J = J + 1
+               RWORK( J ) = DIMAG( B( JROW, JCOL ) )
+   60       CONTINUE
+   70    CONTINUE
+         CALL DGEMM( 'T', 'N', N, NRHS, N, ONE, RWORK( IRWU ), N,
+     $               RWORK( IRWB ), N, ZERO, RWORK( IRWIB ), N )
+         JREAL = IRWRB - 1
+         JIMAG = IRWIB - 1
+         DO 90 JCOL = 1, NRHS
+            DO 80 JROW = 1, N
+               JREAL = JREAL + 1
+               JIMAG = JIMAG + 1
+               B( JROW, JCOL ) = DCMPLX( RWORK( JREAL ),
+     $                           RWORK( JIMAG ) )
+   80       CONTINUE
+   90    CONTINUE
+*
+         TOL = RCOND*ABS( D( IDAMAX( N, D, 1 ) ) )
+         DO 100 I = 1, N
+            IF( D( I ).LE.TOL ) THEN
+               CALL ZLASET( 'A', 1, NRHS, CZERO, CZERO, B( I, 1 ), LDB )
+            ELSE
+               CALL ZLASCL( 'G', 0, 0, D( I ), ONE, 1, NRHS, B( I, 1 ),
+     $                      LDB, INFO )
+               RANK = RANK + 1
+            END IF
+  100    CONTINUE
+*
+*        Since B is complex, the following call to DGEMM is performed
+*        in two steps (real and imaginary parts). That is for V * B
+*        (in the real version of the code V' is stored in WORK).
+*
+*        CALL DGEMM( 'T', 'N', N, NRHS, N, ONE, WORK, N, B, LDB, ZERO,
+*    $               WORK( NWORK ), N )
+*
+         J = IRWB - 1
+         DO 120 JCOL = 1, NRHS
+            DO 110 JROW = 1, N
+               J = J + 1
+               RWORK( J ) = DBLE( B( JROW, JCOL ) )
+  110       CONTINUE
+  120    CONTINUE
+         CALL DGEMM( 'T', 'N', N, NRHS, N, ONE, RWORK( IRWVT ), N,
+     $               RWORK( IRWB ), N, ZERO, RWORK( IRWRB ), N )
+         J = IRWB - 1
+         DO 140 JCOL = 1, NRHS
+            DO 130 JROW = 1, N
+               J = J + 1
+               RWORK( J ) = DIMAG( B( JROW, JCOL ) )
+  130       CONTINUE
+  140    CONTINUE
+         CALL DGEMM( 'T', 'N', N, NRHS, N, ONE, RWORK( IRWVT ), N,
+     $               RWORK( IRWB ), N, ZERO, RWORK( IRWIB ), N )
+         JREAL = IRWRB - 1
+         JIMAG = IRWIB - 1
+         DO 160 JCOL = 1, NRHS
+            DO 150 JROW = 1, N
+               JREAL = JREAL + 1
+               JIMAG = JIMAG + 1
+               B( JROW, JCOL ) = DCMPLX( RWORK( JREAL ),
+     $                           RWORK( JIMAG ) )
+  150       CONTINUE
+  160    CONTINUE
+*
+*        Unscale.
+*
+         CALL DLASCL( 'G', 0, 0, ONE, ORGNRM, N, 1, D, N, INFO )
+         CALL DLASRT( 'D', N, D, INFO )
+         CALL ZLASCL( 'G', 0, 0, ORGNRM, ONE, N, NRHS, B, LDB, INFO )
+*
+         RETURN
+      END IF
+*
+*     Book-keeping and setting up some constants.
+*
+      NLVL = INT( LOG( DBLE( N ) / DBLE( SMLSIZ+1 ) ) / LOG( TWO ) ) + 1
+*
+      SMLSZP = SMLSIZ + 1
+*
+      U = 1
+      VT = 1 + SMLSIZ*N
+      DIFL = VT + SMLSZP*N
+      DIFR = DIFL + NLVL*N
+      Z = DIFR + NLVL*N*2
+      C = Z + NLVL*N
+      S = C + N
+      POLES = S + N
+      GIVNUM = POLES + 2*NLVL*N
+      NRWORK = GIVNUM + 2*NLVL*N
+      BX = 1
+*
+      IRWRB = NRWORK
+      IRWIB = IRWRB + SMLSIZ*NRHS
+      IRWB = IRWIB + SMLSIZ*NRHS
+*
+      SIZEI = 1 + N
+      K = SIZEI + N
+      GIVPTR = K + N
+      PERM = GIVPTR + N
+      GIVCOL = PERM + NLVL*N
+      IWK = GIVCOL + NLVL*N*2
+*
+      ST = 1
+      SQRE = 0
+      ICMPQ1 = 1
+      ICMPQ2 = 0
+      NSUB = 0
+*
+      DO 170 I = 1, N
+         IF( ABS( D( I ) ).LT.EPS ) THEN
+            D( I ) = SIGN( EPS, D( I ) )
+         END IF
+  170 CONTINUE
+*
+      DO 240 I = 1, NM1
+         IF( ( ABS( E( I ) ).LT.EPS ) .OR. ( I.EQ.NM1 ) ) THEN
+            NSUB = NSUB + 1
+            IWORK( NSUB ) = ST
+*
+*           Subproblem found. First determine its size and then
+*           apply divide and conquer on it.
+*
+            IF( I.LT.NM1 ) THEN
+*
+*              A subproblem with E(I) small for I < NM1.
+*
+               NSIZE = I - ST + 1
+               IWORK( SIZEI+NSUB-1 ) = NSIZE
+            ELSE IF( ABS( E( I ) ).GE.EPS ) THEN
+*
+*              A subproblem with E(NM1) not too small but I = NM1.
+*
+               NSIZE = N - ST + 1
+               IWORK( SIZEI+NSUB-1 ) = NSIZE
+            ELSE
+*
+*              A subproblem with E(NM1) small. This implies an
+*              1-by-1 subproblem at D(N), which is not solved
+*              explicitly.
+*
+               NSIZE = I - ST + 1
+               IWORK( SIZEI+NSUB-1 ) = NSIZE
+               NSUB = NSUB + 1
+               IWORK( NSUB ) = N
+               IWORK( SIZEI+NSUB-1 ) = 1
+               CALL ZCOPY( NRHS, B( N, 1 ), LDB, WORK( BX+NM1 ), N )
+            END IF
+            ST1 = ST - 1
+            IF( NSIZE.EQ.1 ) THEN
+*
+*              This is a 1-by-1 subproblem and is not solved
+*              explicitly.
+*
+               CALL ZCOPY( NRHS, B( ST, 1 ), LDB, WORK( BX+ST1 ), N )
+            ELSE IF( NSIZE.LE.SMLSIZ ) THEN
+*
+*              This is a small subproblem and is solved by DLASDQ.
+*
+               CALL DLASET( 'A', NSIZE, NSIZE, ZERO, ONE,
+     $                      RWORK( VT+ST1 ), N )
+               CALL DLASET( 'A', NSIZE, NSIZE, ZERO, ONE,
+     $                      RWORK( U+ST1 ), N )
+               CALL DLASDQ( 'U', 0, NSIZE, NSIZE, NSIZE, 0, D( ST ),
+     $                      E( ST ), RWORK( VT+ST1 ), N, RWORK( U+ST1 ),
+     $                      N, RWORK( NRWORK ), 1, RWORK( NRWORK ),
+     $                      INFO )
+               IF( INFO.NE.0 ) THEN
+                  RETURN
+               END IF
+*
+*              In the real version, B is passed to DLASDQ and multiplied
+*              internally by Q'. Here B is complex and that product is
+*              computed below in two steps (real and imaginary parts).
+*
+               J = IRWB - 1
+               DO 190 JCOL = 1, NRHS
+                  DO 180 JROW = ST, ST + NSIZE - 1
+                     J = J + 1
+                     RWORK( J ) = DBLE( B( JROW, JCOL ) )
+  180             CONTINUE
+  190          CONTINUE
+               CALL DGEMM( 'T', 'N', NSIZE, NRHS, NSIZE, ONE,
+     $                     RWORK( U+ST1 ), N, RWORK( IRWB ), NSIZE,
+     $                     ZERO, RWORK( IRWRB ), NSIZE )
+               J = IRWB - 1
+               DO 210 JCOL = 1, NRHS
+                  DO 200 JROW = ST, ST + NSIZE - 1
+                     J = J + 1
+                     RWORK( J ) = DIMAG( B( JROW, JCOL ) )
+  200             CONTINUE
+  210          CONTINUE
+               CALL DGEMM( 'T', 'N', NSIZE, NRHS, NSIZE, ONE,
+     $                     RWORK( U+ST1 ), N, RWORK( IRWB ), NSIZE,
+     $                     ZERO, RWORK( IRWIB ), NSIZE )
+               JREAL = IRWRB - 1
+               JIMAG = IRWIB - 1
+               DO 230 JCOL = 1, NRHS
+                  DO 220 JROW = ST, ST + NSIZE - 1
+                     JREAL = JREAL + 1
+                     JIMAG = JIMAG + 1
+                     B( JROW, JCOL ) = DCMPLX( RWORK( JREAL ),
+     $                                 RWORK( JIMAG ) )
+  220             CONTINUE
+  230          CONTINUE
+*
+               CALL ZLACPY( 'A', NSIZE, NRHS, B( ST, 1 ), LDB,
+     $                      WORK( BX+ST1 ), N )
+            ELSE
+*
+*              A large problem. Solve it using divide and conquer.
+*
+               CALL DLASDA( ICMPQ1, SMLSIZ, NSIZE, SQRE, D( ST ),
+     $                      E( ST ), RWORK( U+ST1 ), N, RWORK( VT+ST1 ),
+     $                      IWORK( K+ST1 ), RWORK( DIFL+ST1 ),
+     $                      RWORK( DIFR+ST1 ), RWORK( Z+ST1 ),
+     $                      RWORK( POLES+ST1 ), IWORK( GIVPTR+ST1 ),
+     $                      IWORK( GIVCOL+ST1 ), N, IWORK( PERM+ST1 ),
+     $                      RWORK( GIVNUM+ST1 ), RWORK( C+ST1 ),
+     $                      RWORK( S+ST1 ), RWORK( NRWORK ),
+     $                      IWORK( IWK ), INFO )
+               IF( INFO.NE.0 ) THEN
+                  RETURN
+               END IF
+               BXST = BX + ST1
+               CALL ZLALSA( ICMPQ2, SMLSIZ, NSIZE, NRHS, B( ST, 1 ),
+     $                      LDB, WORK( BXST ), N, RWORK( U+ST1 ), N,
+     $                      RWORK( VT+ST1 ), IWORK( K+ST1 ),
+     $                      RWORK( DIFL+ST1 ), RWORK( DIFR+ST1 ),
+     $                      RWORK( Z+ST1 ), RWORK( POLES+ST1 ),
+     $                      IWORK( GIVPTR+ST1 ), IWORK( GIVCOL+ST1 ), N,
+     $                      IWORK( PERM+ST1 ), RWORK( GIVNUM+ST1 ),
+     $                      RWORK( C+ST1 ), RWORK( S+ST1 ),
+     $                      RWORK( NRWORK ), IWORK( IWK ), INFO )
+               IF( INFO.NE.0 ) THEN
+                  RETURN
+               END IF
+            END IF
+            ST = I + 1
+         END IF
+  240 CONTINUE
+*
+*     Apply the singular values and treat the tiny ones as zero.
+*
+      TOL = RCOND*ABS( D( IDAMAX( N, D, 1 ) ) )
+*
+      DO 250 I = 1, N
+*
+*        Some of the elements in D can be negative because 1-by-1
+*        subproblems were not solved explicitly.
+*
+         IF( ABS( D( I ) ).LE.TOL ) THEN
+            CALL ZLASET( 'A', 1, NRHS, CZERO, CZERO, WORK( BX+I-1 ), N )
+         ELSE
+            RANK = RANK + 1
+            CALL ZLASCL( 'G', 0, 0, D( I ), ONE, 1, NRHS,
+     $                   WORK( BX+I-1 ), N, INFO )
+         END IF
+         D( I ) = ABS( D( I ) )
+  250 CONTINUE
+*
+*     Now apply back the right singular vectors.
+*
+      ICMPQ2 = 1
+      DO 320 I = 1, NSUB
+         ST = IWORK( I )
+         ST1 = ST - 1
+         NSIZE = IWORK( SIZEI+I-1 )
+         BXST = BX + ST1
+         IF( NSIZE.EQ.1 ) THEN
+            CALL ZCOPY( NRHS, WORK( BXST ), N, B( ST, 1 ), LDB )
+         ELSE IF( NSIZE.LE.SMLSIZ ) THEN
+*
+*           Since B and BX are complex, the following call to DGEMM
+*           is performed in two steps (real and imaginary parts).
+*
+*           CALL DGEMM( 'T', 'N', NSIZE, NRHS, NSIZE, ONE,
+*    $                  RWORK( VT+ST1 ), N, RWORK( BXST ), N, ZERO,
+*    $                  B( ST, 1 ), LDB )
+*
+            J = BXST - N - 1
+            JREAL = IRWB - 1
+            DO 270 JCOL = 1, NRHS
+               J = J + N
+               DO 260 JROW = 1, NSIZE
+                  JREAL = JREAL + 1
+                  RWORK( JREAL ) = DBLE( WORK( J+JROW ) )
+  260          CONTINUE
+  270       CONTINUE
+            CALL DGEMM( 'T', 'N', NSIZE, NRHS, NSIZE, ONE,
+     $                  RWORK( VT+ST1 ), N, RWORK( IRWB ), NSIZE, ZERO,
+     $                  RWORK( IRWRB ), NSIZE )
+            J = BXST - N - 1
+            JIMAG = IRWB - 1
+            DO 290 JCOL = 1, NRHS
+               J = J + N
+               DO 280 JROW = 1, NSIZE
+                  JIMAG = JIMAG + 1
+                  RWORK( JIMAG ) = DIMAG( WORK( J+JROW ) )
+  280          CONTINUE
+  290       CONTINUE
+            CALL DGEMM( 'T', 'N', NSIZE, NRHS, NSIZE, ONE,
+     $                  RWORK( VT+ST1 ), N, RWORK( IRWB ), NSIZE, ZERO,
+     $                  RWORK( IRWIB ), NSIZE )
+            JREAL = IRWRB - 1
+            JIMAG = IRWIB - 1
+            DO 310 JCOL = 1, NRHS
+               DO 300 JROW = ST, ST + NSIZE - 1
+                  JREAL = JREAL + 1
+                  JIMAG = JIMAG + 1
+                  B( JROW, JCOL ) = DCMPLX( RWORK( JREAL ),
+     $                              RWORK( JIMAG ) )
+  300          CONTINUE
+  310       CONTINUE
+         ELSE
+            CALL ZLALSA( ICMPQ2, SMLSIZ, NSIZE, NRHS, WORK( BXST ), N,
+     $                   B( ST, 1 ), LDB, RWORK( U+ST1 ), N,
+     $                   RWORK( VT+ST1 ), IWORK( K+ST1 ),
+     $                   RWORK( DIFL+ST1 ), RWORK( DIFR+ST1 ),
+     $                   RWORK( Z+ST1 ), RWORK( POLES+ST1 ),
+     $                   IWORK( GIVPTR+ST1 ), IWORK( GIVCOL+ST1 ), N,
+     $                   IWORK( PERM+ST1 ), RWORK( GIVNUM+ST1 ),
+     $                   RWORK( C+ST1 ), RWORK( S+ST1 ),
+     $                   RWORK( NRWORK ), IWORK( IWK ), INFO )
+            IF( INFO.NE.0 ) THEN
+               RETURN
+            END IF
+         END IF
+  320 CONTINUE
+*
+*     Unscale and sort the singular values.
+*
+      CALL DLASCL( 'G', 0, 0, ONE, ORGNRM, N, 1, D, N, INFO )
+      CALL DLASRT( 'D', N, D, INFO )
+      CALL ZLASCL( 'G', 0, 0, ORGNRM, ONE, N, NRHS, B, LDB, INFO )
+*
+      RETURN
+*
+*     End of ZLALSD
+*
+      END
diff --git a/costa/native/external/lapack/zlangb.f b/costa/native/external/lapack/zlangb.f
new file mode 100644
index 000000000..112c63bec
--- /dev/null
+++ b/costa/native/external/lapack/zlangb.f
@@ -0,0 +1,155 @@
+      DOUBLE PRECISION FUNCTION ZLANGB( NORM, N, KL, KU, AB, LDAB,
+     $                 WORK )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          NORM
+      INTEGER            KL, KU, LDAB, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   WORK( * )
+      COMPLEX*16         AB( LDAB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZLANGB  returns the value of the one norm,  or the Frobenius norm, or
+*  the  infinity norm,  or the element of  largest absolute value  of an
+*  n by n band matrix  A,  with kl sub-diagonals and ku super-diagonals.
+*
+*  Description
+*  ===========
+*
+*  ZLANGB returns the value
+*
+*     ZLANGB = ( max(abs(A(i,j))), NORM = 'M' or 'm'
+*              (
+*              ( norm1(A),         NORM = '1', 'O' or 'o'
+*              (
+*              ( normI(A),         NORM = 'I' or 'i'
+*              (
+*              ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
+*
+*  where  norm1  denotes the  one norm of a matrix (maximum column sum),
+*  normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
+*  normF  denotes the  Frobenius norm of a matrix (square root of sum of
+*  squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
+*
+*  Arguments
+*  =========
+*
+*  NORM    (input) CHARACTER*1
+*          Specifies the value to be returned in ZLANGB as described
+*          above.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.  When N = 0, ZLANGB is
+*          set to zero.
+*
+*  KL      (input) INTEGER
+*          The number of sub-diagonals of the matrix A.  KL >= 0.
+*
+*  KU      (input) INTEGER
+*          The number of super-diagonals of the matrix A.  KU >= 0.
+*
+*  AB      (input) COMPLEX*16 array, dimension (LDAB,N)
+*          The band matrix A, stored in rows 1 to KL+KU+1.  The j-th
+*          column of A is stored in the j-th column of the array AB as
+*          follows:
+*          AB(ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(n,j+kl).
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KL+KU+1.
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (LWORK),
+*          where LWORK >= N when NORM = 'I'; otherwise, WORK is not
+*          referenced.
+*
+* =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J, K, L
+      DOUBLE PRECISION   SCALE, SUM, VALUE
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           ZLASSQ
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+      IF( N.EQ.0 ) THEN
+         VALUE = ZERO
+      ELSE IF( LSAME( NORM, 'M' ) ) THEN
+*
+*        Find max(abs(A(i,j))).
+*
+         VALUE = ZERO
+         DO 20 J = 1, N
+            DO 10 I = MAX( KU+2-J, 1 ), MIN( N+KU+1-J, KL+KU+1 )
+               VALUE = MAX( VALUE, ABS( AB( I, J ) ) )
+   10       CONTINUE
+   20    CONTINUE
+      ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN
+*
+*        Find norm1(A).
+*
+         VALUE = ZERO
+         DO 40 J = 1, N
+            SUM = ZERO
+            DO 30 I = MAX( KU+2-J, 1 ), MIN( N+KU+1-J, KL+KU+1 )
+               SUM = SUM + ABS( AB( I, J ) )
+   30       CONTINUE
+            VALUE = MAX( VALUE, SUM )
+   40    CONTINUE
+      ELSE IF( LSAME( NORM, 'I' ) ) THEN
+*
+*        Find normI(A).
+*
+         DO 50 I = 1, N
+            WORK( I ) = ZERO
+   50    CONTINUE
+         DO 70 J = 1, N
+            K = KU + 1 - J
+            DO 60 I = MAX( 1, J-KU ), MIN( N, J+KL )
+               WORK( I ) = WORK( I ) + ABS( AB( K+I, J ) )
+   60       CONTINUE
+   70    CONTINUE
+         VALUE = ZERO
+         DO 80 I = 1, N
+            VALUE = MAX( VALUE, WORK( I ) )
+   80    CONTINUE
+      ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
+*
+*        Find normF(A).
+*
+         SCALE = ZERO
+         SUM = ONE
+         DO 90 J = 1, N
+            L = MAX( 1, J-KU )
+            K = KU + 1 - J + L
+            CALL ZLASSQ( MIN( N, J+KL )-L+1, AB( K, J ), 1, SCALE, SUM )
+   90    CONTINUE
+         VALUE = SCALE*SQRT( SUM )
+      END IF
+*
+      ZLANGB = VALUE
+      RETURN
+*
+*     End of ZLANGB
+*
+      END
diff --git a/costa/native/external/lapack/zlange.f b/costa/native/external/lapack/zlange.f
new file mode 100644
index 000000000..754f0f779
--- /dev/null
+++ b/costa/native/external/lapack/zlange.f
@@ -0,0 +1,146 @@
+      DOUBLE PRECISION FUNCTION ZLANGE( NORM, M, N, A, LDA, WORK )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          NORM
+      INTEGER            LDA, M, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   WORK( * )
+      COMPLEX*16         A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZLANGE  returns the value of the one norm,  or the Frobenius norm, or
+*  the  infinity norm,  or the  element of  largest absolute value  of a
+*  complex matrix A.
+*
+*  Description
+*  ===========
+*
+*  ZLANGE returns the value
+*
+*     ZLANGE = ( max(abs(A(i,j))), NORM = 'M' or 'm'
+*              (
+*              ( norm1(A),         NORM = '1', 'O' or 'o'
+*              (
+*              ( normI(A),         NORM = 'I' or 'i'
+*              (
+*              ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
+*
+*  where  norm1  denotes the  one norm of a matrix (maximum column sum),
+*  normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
+*  normF  denotes the  Frobenius norm of a matrix (square root of sum of
+*  squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
+*
+*  Arguments
+*  =========
+*
+*  NORM    (input) CHARACTER*1
+*          Specifies the value to be returned in ZLANGE as described
+*          above.
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.  When M = 0,
+*          ZLANGE is set to zero.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.  When N = 0,
+*          ZLANGE is set to zero.
+*
+*  A       (input) COMPLEX*16 array, dimension (LDA,N)
+*          The m by n matrix A.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(M,1).
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (LWORK),
+*          where LWORK >= M when NORM = 'I'; otherwise, WORK is not
+*          referenced.
+*
+* =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J
+      DOUBLE PRECISION   SCALE, SUM, VALUE
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           ZLASSQ
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+      IF( MIN( M, N ).EQ.0 ) THEN
+         VALUE = ZERO
+      ELSE IF( LSAME( NORM, 'M' ) ) THEN
+*
+*        Find max(abs(A(i,j))).
+*
+         VALUE = ZERO
+         DO 20 J = 1, N
+            DO 10 I = 1, M
+               VALUE = MAX( VALUE, ABS( A( I, J ) ) )
+   10       CONTINUE
+   20    CONTINUE
+      ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN
+*
+*        Find norm1(A).
+*
+         VALUE = ZERO
+         DO 40 J = 1, N
+            SUM = ZERO
+            DO 30 I = 1, M
+               SUM = SUM + ABS( A( I, J ) )
+   30       CONTINUE
+            VALUE = MAX( VALUE, SUM )
+   40    CONTINUE
+      ELSE IF( LSAME( NORM, 'I' ) ) THEN
+*
+*        Find normI(A).
+*
+         DO 50 I = 1, M
+            WORK( I ) = ZERO
+   50    CONTINUE
+         DO 70 J = 1, N
+            DO 60 I = 1, M
+               WORK( I ) = WORK( I ) + ABS( A( I, J ) )
+   60       CONTINUE
+   70    CONTINUE
+         VALUE = ZERO
+         DO 80 I = 1, M
+            VALUE = MAX( VALUE, WORK( I ) )
+   80    CONTINUE
+      ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
+*
+*        Find normF(A).
+*
+         SCALE = ZERO
+         SUM = ONE
+         DO 90 J = 1, N
+            CALL ZLASSQ( M, A( 1, J ), 1, SCALE, SUM )
+   90    CONTINUE
+         VALUE = SCALE*SQRT( SUM )
+      END IF
+*
+      ZLANGE = VALUE
+      RETURN
+*
+*     End of ZLANGE
+*
+      END
diff --git a/costa/native/external/lapack/zlangt.f b/costa/native/external/lapack/zlangt.f
new file mode 100644
index 000000000..e6c053831
--- /dev/null
+++ b/costa/native/external/lapack/zlangt.f
@@ -0,0 +1,142 @@
+      DOUBLE PRECISION FUNCTION ZLANGT( NORM, N, DL, D, DU )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          NORM
+      INTEGER            N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         D( * ), DL( * ), DU( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZLANGT  returns the value of the one norm,  or the Frobenius norm, or
+*  the  infinity norm,  or the  element of  largest absolute value  of a
+*  complex tridiagonal matrix A.
+*
+*  Description
+*  ===========
+*
+*  ZLANGT returns the value
+*
+*     ZLANGT = ( max(abs(A(i,j))), NORM = 'M' or 'm'
+*              (
+*              ( norm1(A),         NORM = '1', 'O' or 'o'
+*              (
+*              ( normI(A),         NORM = 'I' or 'i'
+*              (
+*              ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
+*
+*  where  norm1  denotes the  one norm of a matrix (maximum column sum),
+*  normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
+*  normF  denotes the  Frobenius norm of a matrix (square root of sum of
+*  squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
+*
+*  Arguments
+*  =========
+*
+*  NORM    (input) CHARACTER*1
+*          Specifies the value to be returned in ZLANGT as described
+*          above.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.  When N = 0, ZLANGT is
+*          set to zero.
+*
+*  DL      (input) COMPLEX*16 array, dimension (N-1)
+*          The (n-1) sub-diagonal elements of A.
+*
+*  D       (input) COMPLEX*16 array, dimension (N)
+*          The diagonal elements of A.
+*
+*  DU      (input) COMPLEX*16 array, dimension (N-1)
+*          The (n-1) super-diagonal elements of A.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I
+      DOUBLE PRECISION   ANORM, SCALE, SUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           ZLASSQ
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+      IF( N.LE.0 ) THEN
+         ANORM = ZERO
+      ELSE IF( LSAME( NORM, 'M' ) ) THEN
+*
+*        Find max(abs(A(i,j))).
+*
+         ANORM = ABS( D( N ) )
+         DO 10 I = 1, N - 1
+            ANORM = MAX( ANORM, ABS( DL( I ) ) )
+            ANORM = MAX( ANORM, ABS( D( I ) ) )
+            ANORM = MAX( ANORM, ABS( DU( I ) ) )
+   10    CONTINUE
+      ELSE IF( LSAME( NORM, 'O' ) .OR. NORM.EQ.'1' ) THEN
+*
+*        Find norm1(A).
+*
+         IF( N.EQ.1 ) THEN
+            ANORM = ABS( D( 1 ) )
+         ELSE
+            ANORM = MAX( ABS( D( 1 ) )+ABS( DL( 1 ) ),
+     $              ABS( D( N ) )+ABS( DU( N-1 ) ) )
+            DO 20 I = 2, N - 1
+               ANORM = MAX( ANORM, ABS( D( I ) )+ABS( DL( I ) )+
+     $                 ABS( DU( I-1 ) ) )
+   20       CONTINUE
+         END IF
+      ELSE IF( LSAME( NORM, 'I' ) ) THEN
+*
+*        Find normI(A).
+*
+         IF( N.EQ.1 ) THEN
+            ANORM = ABS( D( 1 ) )
+         ELSE
+            ANORM = MAX( ABS( D( 1 ) )+ABS( DU( 1 ) ),
+     $              ABS( D( N ) )+ABS( DL( N-1 ) ) )
+            DO 30 I = 2, N - 1
+               ANORM = MAX( ANORM, ABS( D( I ) )+ABS( DU( I ) )+
+     $                 ABS( DL( I-1 ) ) )
+   30       CONTINUE
+         END IF
+      ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
+*
+*        Find normF(A).
+*
+         SCALE = ZERO
+         SUM = ONE
+         CALL ZLASSQ( N, D, 1, SCALE, SUM )
+         IF( N.GT.1 ) THEN
+            CALL ZLASSQ( N-1, DL, 1, SCALE, SUM )
+            CALL ZLASSQ( N-1, DU, 1, SCALE, SUM )
+         END IF
+         ANORM = SCALE*SQRT( SUM )
+      END IF
+*
+      ZLANGT = ANORM
+      RETURN
+*
+*     End of ZLANGT
+*
+      END
diff --git a/costa/native/external/lapack/zlanhb.f b/costa/native/external/lapack/zlanhb.f
new file mode 100644
index 000000000..a30f9ce51
--- /dev/null
+++ b/costa/native/external/lapack/zlanhb.f
@@ -0,0 +1,202 @@
+      DOUBLE PRECISION FUNCTION ZLANHB( NORM, UPLO, N, K, AB, LDAB,
+     $                 WORK )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          NORM, UPLO
+      INTEGER            K, LDAB, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   WORK( * )
+      COMPLEX*16         AB( LDAB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZLANHB  returns the value of the one norm,  or the Frobenius norm, or
+*  the  infinity norm,  or the element of  largest absolute value  of an
+*  n by n hermitian band matrix A,  with k super-diagonals.
+*
+*  Description
+*  ===========
+*
+*  ZLANHB returns the value
+*
+*     ZLANHB = ( max(abs(A(i,j))), NORM = 'M' or 'm'
+*              (
+*              ( norm1(A),         NORM = '1', 'O' or 'o'
+*              (
+*              ( normI(A),         NORM = 'I' or 'i'
+*              (
+*              ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
+*
+*  where  norm1  denotes the  one norm of a matrix (maximum column sum),
+*  normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
+*  normF  denotes the  Frobenius norm of a matrix (square root of sum of
+*  squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
+*
+*  Arguments
+*  =========
+*
+*  NORM    (input) CHARACTER*1
+*          Specifies the value to be returned in ZLANHB as described
+*          above.
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the upper or lower triangular part of the
+*          band matrix A is supplied.
+*          = 'U':  Upper triangular
+*          = 'L':  Lower triangular
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.  When N = 0, ZLANHB is
+*          set to zero.
+*
+*  K       (input) INTEGER
+*          The number of super-diagonals or sub-diagonals of the
+*          band matrix A.  K >= 0.
+*
+*  AB      (input) COMPLEX*16 array, dimension (LDAB,N)
+*          The upper or lower triangle of the hermitian band matrix A,
+*          stored in the first K+1 rows of AB.  The j-th column of A is
+*          stored in the j-th column of the array AB as follows:
+*          if UPLO = 'U', AB(k+1+i-j,j) = A(i,j) for max(1,j-k)<=i<=j;
+*          if UPLO = 'L', AB(1+i-j,j)   = A(i,j) for j<=i<=min(n,j+k).
+*          Note that the imaginary parts of the diagonal elements need
+*          not be set and are assumed to be zero.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= K+1.
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (LWORK),
+*          where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise,
+*          WORK is not referenced.
+*
+* =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J, L
+      DOUBLE PRECISION   ABSA, SCALE, SUM, VALUE
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           ZLASSQ
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+      IF( N.EQ.0 ) THEN
+         VALUE = ZERO
+      ELSE IF( LSAME( NORM, 'M' ) ) THEN
+*
+*        Find max(abs(A(i,j))).
+*
+         VALUE = ZERO
+         IF( LSAME( UPLO, 'U' ) ) THEN
+            DO 20 J = 1, N
+               DO 10 I = MAX( K+2-J, 1 ), K
+                  VALUE = MAX( VALUE, ABS( AB( I, J ) ) )
+   10          CONTINUE
+               VALUE = MAX( VALUE, ABS( DBLE( AB( K+1, J ) ) ) )
+   20       CONTINUE
+         ELSE
+            DO 40 J = 1, N
+               VALUE = MAX( VALUE, ABS( DBLE( AB( 1, J ) ) ) )
+               DO 30 I = 2, MIN( N+1-J, K+1 )
+                  VALUE = MAX( VALUE, ABS( AB( I, J ) ) )
+   30          CONTINUE
+   40       CONTINUE
+         END IF
+      ELSE IF( ( LSAME( NORM, 'I' ) ) .OR. ( LSAME( NORM, 'O' ) ) .OR.
+     $         ( NORM.EQ.'1' ) ) THEN
+*
+*        Find normI(A) ( = norm1(A), since A is hermitian).
+*
+         VALUE = ZERO
+         IF( LSAME( UPLO, 'U' ) ) THEN
+            DO 60 J = 1, N
+               SUM = ZERO
+               L = K + 1 - J
+               DO 50 I = MAX( 1, J-K ), J - 1
+                  ABSA = ABS( AB( L+I, J ) )
+                  SUM = SUM + ABSA
+                  WORK( I ) = WORK( I ) + ABSA
+   50          CONTINUE
+               WORK( J ) = SUM + ABS( DBLE( AB( K+1, J ) ) )
+   60       CONTINUE
+            DO 70 I = 1, N
+               VALUE = MAX( VALUE, WORK( I ) )
+   70       CONTINUE
+         ELSE
+            DO 80 I = 1, N
+               WORK( I ) = ZERO
+   80       CONTINUE
+            DO 100 J = 1, N
+               SUM = WORK( J ) + ABS( DBLE( AB( 1, J ) ) )
+               L = 1 - J
+               DO 90 I = J + 1, MIN( N, J+K )
+                  ABSA = ABS( AB( L+I, J ) )
+                  SUM = SUM + ABSA
+                  WORK( I ) = WORK( I ) + ABSA
+   90          CONTINUE
+               VALUE = MAX( VALUE, SUM )
+  100       CONTINUE
+         END IF
+      ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
+*
+*        Find normF(A).
+*
+         SCALE = ZERO
+         SUM = ONE
+         IF( K.GT.0 ) THEN
+            IF( LSAME( UPLO, 'U' ) ) THEN
+               DO 110 J = 2, N
+                  CALL ZLASSQ( MIN( J-1, K ), AB( MAX( K+2-J, 1 ), J ),
+     $                         1, SCALE, SUM )
+  110          CONTINUE
+               L = K + 1
+            ELSE
+               DO 120 J = 1, N - 1
+                  CALL ZLASSQ( MIN( N-J, K ), AB( 2, J ), 1, SCALE,
+     $                         SUM )
+  120          CONTINUE
+               L = 1
+            END IF
+            SUM = 2*SUM
+         ELSE
+            L = 1
+         END IF
+         DO 130 J = 1, N
+            IF( DBLE( AB( L, J ) ).NE.ZERO ) THEN
+               ABSA = ABS( DBLE( AB( L, J ) ) )
+               IF( SCALE.LT.ABSA ) THEN
+                  SUM = ONE + SUM*( SCALE / ABSA )**2
+                  SCALE = ABSA
+               ELSE
+                  SUM = SUM + ( ABSA / SCALE )**2
+               END IF
+            END IF
+  130    CONTINUE
+         VALUE = SCALE*SQRT( SUM )
+      END IF
+*
+      ZLANHB = VALUE
+      RETURN
+*
+*     End of ZLANHB
+*
+      END
diff --git a/costa/native/external/lapack/zlanhe.f b/costa/native/external/lapack/zlanhe.f
new file mode 100644
index 000000000..deda1052e
--- /dev/null
+++ b/costa/native/external/lapack/zlanhe.f
@@ -0,0 +1,188 @@
+      DOUBLE PRECISION FUNCTION ZLANHE( NORM, UPLO, N, A, LDA, WORK )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          NORM, UPLO
+      INTEGER            LDA, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   WORK( * )
+      COMPLEX*16         A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZLANHE  returns the value of the one norm,  or the Frobenius norm, or
+*  the  infinity norm,  or the  element of  largest absolute value  of a
+*  complex hermitian matrix A.
+*
+*  Description
+*  ===========
+*
+*  ZLANHE returns the value
+*
+*     ZLANHE = ( max(abs(A(i,j))), NORM = 'M' or 'm'
+*              (
+*              ( norm1(A),         NORM = '1', 'O' or 'o'
+*              (
+*              ( normI(A),         NORM = 'I' or 'i'
+*              (
+*              ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
+*
+*  where  norm1  denotes the  one norm of a matrix (maximum column sum),
+*  normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
+*  normF  denotes the  Frobenius norm of a matrix (square root of sum of
+*  squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
+*
+*  Arguments
+*  =========
+*
+*  NORM    (input) CHARACTER*1
+*          Specifies the value to be returned in ZLANHE as described
+*          above.
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the upper or lower triangular part of the
+*          hermitian matrix A is to be referenced.
+*          = 'U':  Upper triangular part of A is referenced
+*          = 'L':  Lower triangular part of A is referenced
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.  When N = 0, ZLANHE is
+*          set to zero.
+*
+*  A       (input) COMPLEX*16 array, dimension (LDA,N)
+*          The hermitian matrix A.  If UPLO = 'U', the leading n by n
+*          upper triangular part of A contains the upper triangular part
+*          of the matrix A, and the strictly lower triangular part of A
+*          is not referenced.  If UPLO = 'L', the leading n by n lower
+*          triangular part of A contains the lower triangular part of
+*          the matrix A, and the strictly upper triangular part of A is
+*          not referenced. Note that the imaginary parts of the diagonal
+*          elements need not be set and are assumed to be zero.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(N,1).
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (LWORK),
+*          where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise,
+*          WORK is not referenced.
+*
+* =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J
+      DOUBLE PRECISION   ABSA, SCALE, SUM, VALUE
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           ZLASSQ
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, MAX, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+      IF( N.EQ.0 ) THEN
+         VALUE = ZERO
+      ELSE IF( LSAME( NORM, 'M' ) ) THEN
+*
+*        Find max(abs(A(i,j))).
+*
+         VALUE = ZERO
+         IF( LSAME( UPLO, 'U' ) ) THEN
+            DO 20 J = 1, N
+               DO 10 I = 1, J - 1
+                  VALUE = MAX( VALUE, ABS( A( I, J ) ) )
+   10          CONTINUE
+               VALUE = MAX( VALUE, ABS( DBLE( A( J, J ) ) ) )
+   20       CONTINUE
+         ELSE
+            DO 40 J = 1, N
+               VALUE = MAX( VALUE, ABS( DBLE( A( J, J ) ) ) )
+               DO 30 I = J + 1, N
+                  VALUE = MAX( VALUE, ABS( A( I, J ) ) )
+   30          CONTINUE
+   40       CONTINUE
+         END IF
+      ELSE IF( ( LSAME( NORM, 'I' ) ) .OR. ( LSAME( NORM, 'O' ) ) .OR.
+     $         ( NORM.EQ.'1' ) ) THEN
+*
+*        Find normI(A) ( = norm1(A), since A is hermitian).
+*
+         VALUE = ZERO
+         IF( LSAME( UPLO, 'U' ) ) THEN
+            DO 60 J = 1, N
+               SUM = ZERO
+               DO 50 I = 1, J - 1
+                  ABSA = ABS( A( I, J ) )
+                  SUM = SUM + ABSA
+                  WORK( I ) = WORK( I ) + ABSA
+   50          CONTINUE
+               WORK( J ) = SUM + ABS( DBLE( A( J, J ) ) )
+   60       CONTINUE
+            DO 70 I = 1, N
+               VALUE = MAX( VALUE, WORK( I ) )
+   70       CONTINUE
+         ELSE
+            DO 80 I = 1, N
+               WORK( I ) = ZERO
+   80       CONTINUE
+            DO 100 J = 1, N
+               SUM = WORK( J ) + ABS( DBLE( A( J, J ) ) )
+               DO 90 I = J + 1, N
+                  ABSA = ABS( A( I, J ) )
+                  SUM = SUM + ABSA
+                  WORK( I ) = WORK( I ) + ABSA
+   90          CONTINUE
+               VALUE = MAX( VALUE, SUM )
+  100       CONTINUE
+         END IF
+      ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
+*
+*        Find normF(A).
+*
+         SCALE = ZERO
+         SUM = ONE
+         IF( LSAME( UPLO, 'U' ) ) THEN
+            DO 110 J = 2, N
+               CALL ZLASSQ( J-1, A( 1, J ), 1, SCALE, SUM )
+  110       CONTINUE
+         ELSE
+            DO 120 J = 1, N - 1
+               CALL ZLASSQ( N-J, A( J+1, J ), 1, SCALE, SUM )
+  120       CONTINUE
+         END IF
+         SUM = 2*SUM
+         DO 130 I = 1, N
+            IF( DBLE( A( I, I ) ).NE.ZERO ) THEN
+               ABSA = ABS( DBLE( A( I, I ) ) )
+               IF( SCALE.LT.ABSA ) THEN
+                  SUM = ONE + SUM*( SCALE / ABSA )**2
+                  SCALE = ABSA
+               ELSE
+                  SUM = SUM + ( ABSA / SCALE )**2
+               END IF
+            END IF
+  130    CONTINUE
+         VALUE = SCALE*SQRT( SUM )
+      END IF
+*
+      ZLANHE = VALUE
+      RETURN
+*
+*     End of ZLANHE
+*
+      END
diff --git a/costa/native/external/lapack/zlanhp.f b/costa/native/external/lapack/zlanhp.f
new file mode 100644
index 000000000..3c5f4cde6
--- /dev/null
+++ b/costa/native/external/lapack/zlanhp.f
@@ -0,0 +1,202 @@
+      DOUBLE PRECISION FUNCTION ZLANHP( NORM, UPLO, N, AP, WORK )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          NORM, UPLO
+      INTEGER            N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   WORK( * )
+      COMPLEX*16         AP( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZLANHP  returns the value of the one norm,  or the Frobenius norm, or
+*  the  infinity norm,  or the  element of  largest absolute value  of a
+*  complex hermitian matrix A,  supplied in packed form.
+*
+*  Description
+*  ===========
+*
+*  ZLANHP returns the value
+*
+*     ZLANHP = ( max(abs(A(i,j))), NORM = 'M' or 'm'
+*              (
+*              ( norm1(A),         NORM = '1', 'O' or 'o'
+*              (
+*              ( normI(A),         NORM = 'I' or 'i'
+*              (
+*              ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
+*
+*  where  norm1  denotes the  one norm of a matrix (maximum column sum),
+*  normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
+*  normF  denotes the  Frobenius norm of a matrix (square root of sum of
+*  squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
+*
+*  Arguments
+*  =========
+*
+*  NORM    (input) CHARACTER*1
+*          Specifies the value to be returned in ZLANHP as described
+*          above.
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the upper or lower triangular part of the
+*          hermitian matrix A is supplied.
+*          = 'U':  Upper triangular part of A is supplied
+*          = 'L':  Lower triangular part of A is supplied
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.  When N = 0, ZLANHP is
+*          set to zero.
+*
+*  AP      (input) COMPLEX*16 array, dimension (N*(N+1)/2)
+*          The upper or lower triangle of the hermitian matrix A, packed
+*          columnwise in a linear array.  The j-th column of A is stored
+*          in the array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n.
+*          Note that the  imaginary parts of the diagonal elements need
+*          not be set and are assumed to be zero.
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (LWORK),
+*          where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise,
+*          WORK is not referenced.
+*
+* =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J, K
+      DOUBLE PRECISION   ABSA, SCALE, SUM, VALUE
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           ZLASSQ
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, MAX, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+      IF( N.EQ.0 ) THEN
+         VALUE = ZERO
+      ELSE IF( LSAME( NORM, 'M' ) ) THEN
+*
+*        Find max(abs(A(i,j))).
+*
+         VALUE = ZERO
+         IF( LSAME( UPLO, 'U' ) ) THEN
+            K = 0
+            DO 20 J = 1, N
+               DO 10 I = K + 1, K + J - 1
+                  VALUE = MAX( VALUE, ABS( AP( I ) ) )
+   10          CONTINUE
+               K = K + J
+               VALUE = MAX( VALUE, ABS( DBLE( AP( K ) ) ) )
+   20       CONTINUE
+         ELSE
+            K = 1
+            DO 40 J = 1, N
+               VALUE = MAX( VALUE, ABS( DBLE( AP( K ) ) ) )
+               DO 30 I = K + 1, K + N - J
+                  VALUE = MAX( VALUE, ABS( AP( I ) ) )
+   30          CONTINUE
+               K = K + N - J + 1
+   40       CONTINUE
+         END IF
+      ELSE IF( ( LSAME( NORM, 'I' ) ) .OR. ( LSAME( NORM, 'O' ) ) .OR.
+     $         ( NORM.EQ.'1' ) ) THEN
+*
+*        Find normI(A) ( = norm1(A), since A is hermitian).
+*
+         VALUE = ZERO
+         K = 1
+         IF( LSAME( UPLO, 'U' ) ) THEN
+            DO 60 J = 1, N
+               SUM = ZERO
+               DO 50 I = 1, J - 1
+                  ABSA = ABS( AP( K ) )
+                  SUM = SUM + ABSA
+                  WORK( I ) = WORK( I ) + ABSA
+                  K = K + 1
+   50          CONTINUE
+               WORK( J ) = SUM + ABS( DBLE( AP( K ) ) )
+               K = K + 1
+   60       CONTINUE
+            DO 70 I = 1, N
+               VALUE = MAX( VALUE, WORK( I ) )
+   70       CONTINUE
+         ELSE
+            DO 80 I = 1, N
+               WORK( I ) = ZERO
+   80       CONTINUE
+            DO 100 J = 1, N
+               SUM = WORK( J ) + ABS( DBLE( AP( K ) ) )
+               K = K + 1
+               DO 90 I = J + 1, N
+                  ABSA = ABS( AP( K ) )
+                  SUM = SUM + ABSA
+                  WORK( I ) = WORK( I ) + ABSA
+                  K = K + 1
+   90          CONTINUE
+               VALUE = MAX( VALUE, SUM )
+  100       CONTINUE
+         END IF
+      ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
+*
+*        Find normF(A).
+*
+         SCALE = ZERO
+         SUM = ONE
+         K = 2
+         IF( LSAME( UPLO, 'U' ) ) THEN
+            DO 110 J = 2, N
+               CALL ZLASSQ( J-1, AP( K ), 1, SCALE, SUM )
+               K = K + J
+  110       CONTINUE
+         ELSE
+            DO 120 J = 1, N - 1
+               CALL ZLASSQ( N-J, AP( K ), 1, SCALE, SUM )
+               K = K + N - J + 1
+  120       CONTINUE
+         END IF
+         SUM = 2*SUM
+         K = 1
+         DO 130 I = 1, N
+            IF( DBLE( AP( K ) ).NE.ZERO ) THEN
+               ABSA = ABS( DBLE( AP( K ) ) )
+               IF( SCALE.LT.ABSA ) THEN
+                  SUM = ONE + SUM*( SCALE / ABSA )**2
+                  SCALE = ABSA
+               ELSE
+                  SUM = SUM + ( ABSA / SCALE )**2
+               END IF
+            END IF
+            IF( LSAME( UPLO, 'U' ) ) THEN
+               K = K + I + 1
+            ELSE
+               K = K + N - I + 1
+            END IF
+  130    CONTINUE
+         VALUE = SCALE*SQRT( SUM )
+      END IF
+*
+      ZLANHP = VALUE
+      RETURN
+*
+*     End of ZLANHP
+*
+      END
diff --git a/costa/native/external/lapack/zlanhs.f b/costa/native/external/lapack/zlanhs.f
new file mode 100644
index 000000000..2499c5119
--- /dev/null
+++ b/costa/native/external/lapack/zlanhs.f
@@ -0,0 +1,143 @@
+      DOUBLE PRECISION FUNCTION ZLANHS( NORM, N, A, LDA, WORK )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          NORM
+      INTEGER            LDA, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   WORK( * )
+      COMPLEX*16         A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZLANHS  returns the value of the one norm,  or the Frobenius norm, or
+*  the  infinity norm,  or the  element of  largest absolute value  of a
+*  Hessenberg matrix A.
+*
+*  Description
+*  ===========
+*
+*  ZLANHS returns the value
+*
+*     ZLANHS = ( max(abs(A(i,j))), NORM = 'M' or 'm'
+*              (
+*              ( norm1(A),         NORM = '1', 'O' or 'o'
+*              (
+*              ( normI(A),         NORM = 'I' or 'i'
+*              (
+*              ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
+*
+*  where  norm1  denotes the  one norm of a matrix (maximum column sum),
+*  normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
+*  normF  denotes the  Frobenius norm of a matrix (square root of sum of
+*  squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
+*
+*  Arguments
+*  =========
+*
+*  NORM    (input) CHARACTER*1
+*          Specifies the value to be returned in ZLANHS as described
+*          above.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.  When N = 0, ZLANHS is
+*          set to zero.
+*
+*  A       (input) COMPLEX*16 array, dimension (LDA,N)
+*          The n by n upper Hessenberg matrix A; the part of A below the
+*          first sub-diagonal is not referenced.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(N,1).
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (LWORK),
+*          where LWORK >= N when NORM = 'I'; otherwise, WORK is not
+*          referenced.
+*
+* =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J
+      DOUBLE PRECISION   SCALE, SUM, VALUE
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           ZLASSQ
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+      IF( N.EQ.0 ) THEN
+         VALUE = ZERO
+      ELSE IF( LSAME( NORM, 'M' ) ) THEN
+*
+*        Find max(abs(A(i,j))).
+*
+         VALUE = ZERO
+         DO 20 J = 1, N
+            DO 10 I = 1, MIN( N, J+1 )
+               VALUE = MAX( VALUE, ABS( A( I, J ) ) )
+   10       CONTINUE
+   20    CONTINUE
+      ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN
+*
+*        Find norm1(A).
+*
+         VALUE = ZERO
+         DO 40 J = 1, N
+            SUM = ZERO
+            DO 30 I = 1, MIN( N, J+1 )
+               SUM = SUM + ABS( A( I, J ) )
+   30       CONTINUE
+            VALUE = MAX( VALUE, SUM )
+   40    CONTINUE
+      ELSE IF( LSAME( NORM, 'I' ) ) THEN
+*
+*        Find normI(A).
+*
+         DO 50 I = 1, N
+            WORK( I ) = ZERO
+   50    CONTINUE
+         DO 70 J = 1, N
+            DO 60 I = 1, MIN( N, J+1 )
+               WORK( I ) = WORK( I ) + ABS( A( I, J ) )
+   60       CONTINUE
+   70    CONTINUE
+         VALUE = ZERO
+         DO 80 I = 1, N
+            VALUE = MAX( VALUE, WORK( I ) )
+   80    CONTINUE
+      ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
+*
+*        Find normF(A).
+*
+         SCALE = ZERO
+         SUM = ONE
+         DO 90 J = 1, N
+            CALL ZLASSQ( MIN( N, J+1 ), A( 1, J ), 1, SCALE, SUM )
+   90    CONTINUE
+         VALUE = SCALE*SQRT( SUM )
+      END IF
+*
+      ZLANHS = VALUE
+      RETURN
+*
+*     End of ZLANHS
+*
+      END
diff --git a/costa/native/external/lapack/zlanht.f b/costa/native/external/lapack/zlanht.f
new file mode 100644
index 000000000..8fca28266
--- /dev/null
+++ b/costa/native/external/lapack/zlanht.f
@@ -0,0 +1,126 @@
+      DOUBLE PRECISION FUNCTION ZLANHT( NORM, N, D, E )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          NORM
+      INTEGER            N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   D( * )
+      COMPLEX*16         E( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZLANHT  returns the value of the one norm,  or the Frobenius norm, or
+*  the  infinity norm,  or the  element of  largest absolute value  of a
+*  complex Hermitian tridiagonal matrix A.
+*
+*  Description
+*  ===========
+*
+*  ZLANHT returns the value
+*
+*     ZLANHT = ( max(abs(A(i,j))), NORM = 'M' or 'm'
+*              (
+*              ( norm1(A),         NORM = '1', 'O' or 'o'
+*              (
+*              ( normI(A),         NORM = 'I' or 'i'
+*              (
+*              ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
+*
+*  where  norm1  denotes the  one norm of a matrix (maximum column sum),
+*  normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
+*  normF  denotes the  Frobenius norm of a matrix (square root of sum of
+*  squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
+*
+*  Arguments
+*  =========
+*
+*  NORM    (input) CHARACTER*1
+*          Specifies the value to be returned in ZLANHT as described
+*          above.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.  When N = 0, ZLANHT is
+*          set to zero.
+*
+*  D       (input) DOUBLE PRECISION array, dimension (N)
+*          The diagonal elements of A.
+*
+*  E       (input) COMPLEX*16 array, dimension (N-1)
+*          The (n-1) sub-diagonal or super-diagonal elements of A.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I
+      DOUBLE PRECISION   ANORM, SCALE, SUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLASSQ, ZLASSQ
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+      IF( N.LE.0 ) THEN
+         ANORM = ZERO
+      ELSE IF( LSAME( NORM, 'M' ) ) THEN
+*
+*        Find max(abs(A(i,j))).
+*
+         ANORM = ABS( D( N ) )
+         DO 10 I = 1, N - 1
+            ANORM = MAX( ANORM, ABS( D( I ) ) )
+            ANORM = MAX( ANORM, ABS( E( I ) ) )
+   10    CONTINUE
+      ELSE IF( LSAME( NORM, 'O' ) .OR. NORM.EQ.'1' .OR.
+     $         LSAME( NORM, 'I' ) ) THEN
+*
+*        Find norm1(A).
+*
+         IF( N.EQ.1 ) THEN
+            ANORM = ABS( D( 1 ) )
+         ELSE
+            ANORM = MAX( ABS( D( 1 ) )+ABS( E( 1 ) ),
+     $              ABS( E( N-1 ) )+ABS( D( N ) ) )
+            DO 20 I = 2, N - 1
+               ANORM = MAX( ANORM, ABS( D( I ) )+ABS( E( I ) )+
+     $                 ABS( E( I-1 ) ) )
+   20       CONTINUE
+         END IF
+      ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
+*
+*        Find normF(A).
+*
+         SCALE = ZERO
+         SUM = ONE
+         IF( N.GT.1 ) THEN
+            CALL ZLASSQ( N-1, E, 1, SCALE, SUM )
+            SUM = 2*SUM
+         END IF
+         CALL DLASSQ( N, D, 1, SCALE, SUM )
+         ANORM = SCALE*SQRT( SUM )
+      END IF
+*
+      ZLANHT = ANORM
+      RETURN
+*
+*     End of ZLANHT
+*
+      END
diff --git a/costa/native/external/lapack/zlansb.f b/costa/native/external/lapack/zlansb.f
new file mode 100644
index 000000000..ffb1b1367
--- /dev/null
+++ b/costa/native/external/lapack/zlansb.f
@@ -0,0 +1,188 @@
+      DOUBLE PRECISION FUNCTION ZLANSB( NORM, UPLO, N, K, AB, LDAB,
+     $                 WORK )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          NORM, UPLO
+      INTEGER            K, LDAB, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   WORK( * )
+      COMPLEX*16         AB( LDAB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZLANSB  returns the value of the one norm,  or the Frobenius norm, or
+*  the  infinity norm,  or the element of  largest absolute value  of an
+*  n by n symmetric band matrix A,  with k super-diagonals.
+*
+*  Description
+*  ===========
+*
+*  ZLANSB returns the value
+*
+*     ZLANSB = ( max(abs(A(i,j))), NORM = 'M' or 'm'
+*              (
+*              ( norm1(A),         NORM = '1', 'O' or 'o'
+*              (
+*              ( normI(A),         NORM = 'I' or 'i'
+*              (
+*              ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
+*
+*  where  norm1  denotes the  one norm of a matrix (maximum column sum),
+*  normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
+*  normF  denotes the  Frobenius norm of a matrix (square root of sum of
+*  squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
+*
+*  Arguments
+*  =========
+*
+*  NORM    (input) CHARACTER*1
+*          Specifies the value to be returned in ZLANSB as described
+*          above.
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the upper or lower triangular part of the
+*          band matrix A is supplied.
+*          = 'U':  Upper triangular part is supplied
+*          = 'L':  Lower triangular part is supplied
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.  When N = 0, ZLANSB is
+*          set to zero.
+*
+*  K       (input) INTEGER
+*          The number of super-diagonals or sub-diagonals of the
+*          band matrix A.  K >= 0.
+*
+*  AB      (input) COMPLEX*16 array, dimension (LDAB,N)
+*          The upper or lower triangle of the symmetric band matrix A,
+*          stored in the first K+1 rows of AB.  The j-th column of A is
+*          stored in the j-th column of the array AB as follows:
+*          if UPLO = 'U', AB(k+1+i-j,j) = A(i,j) for max(1,j-k)<=i<=j;
+*          if UPLO = 'L', AB(1+i-j,j)   = A(i,j) for j<=i<=min(n,j+k).
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= K+1.
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (LWORK),
+*          where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise,
+*          WORK is not referenced.
+*
+* =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J, L
+      DOUBLE PRECISION   ABSA, SCALE, SUM, VALUE
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           ZLASSQ
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+      IF( N.EQ.0 ) THEN
+         VALUE = ZERO
+      ELSE IF( LSAME( NORM, 'M' ) ) THEN
+*
+*        Find max(abs(A(i,j))).
+*
+         VALUE = ZERO
+         IF( LSAME( UPLO, 'U' ) ) THEN
+            DO 20 J = 1, N
+               DO 10 I = MAX( K+2-J, 1 ), K + 1
+                  VALUE = MAX( VALUE, ABS( AB( I, J ) ) )
+   10          CONTINUE
+   20       CONTINUE
+         ELSE
+            DO 40 J = 1, N
+               DO 30 I = 1, MIN( N+1-J, K+1 )
+                  VALUE = MAX( VALUE, ABS( AB( I, J ) ) )
+   30          CONTINUE
+   40       CONTINUE
+         END IF
+      ELSE IF( ( LSAME( NORM, 'I' ) ) .OR. ( LSAME( NORM, 'O' ) ) .OR.
+     $         ( NORM.EQ.'1' ) ) THEN
+*
+*        Find normI(A) ( = norm1(A), since A is symmetric).
+*
+         VALUE = ZERO
+         IF( LSAME( UPLO, 'U' ) ) THEN
+            DO 60 J = 1, N
+               SUM = ZERO
+               L = K + 1 - J
+               DO 50 I = MAX( 1, J-K ), J - 1
+                  ABSA = ABS( AB( L+I, J ) )
+                  SUM = SUM + ABSA
+                  WORK( I ) = WORK( I ) + ABSA
+   50          CONTINUE
+               WORK( J ) = SUM + ABS( AB( K+1, J ) )
+   60       CONTINUE
+            DO 70 I = 1, N
+               VALUE = MAX( VALUE, WORK( I ) )
+   70       CONTINUE
+         ELSE
+            DO 80 I = 1, N
+               WORK( I ) = ZERO
+   80       CONTINUE
+            DO 100 J = 1, N
+               SUM = WORK( J ) + ABS( AB( 1, J ) )
+               L = 1 - J
+               DO 90 I = J + 1, MIN( N, J+K )
+                  ABSA = ABS( AB( L+I, J ) )
+                  SUM = SUM + ABSA
+                  WORK( I ) = WORK( I ) + ABSA
+   90          CONTINUE
+               VALUE = MAX( VALUE, SUM )
+  100       CONTINUE
+         END IF
+      ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
+*
+*        Find normF(A).
+*
+         SCALE = ZERO
+         SUM = ONE
+         IF( K.GT.0 ) THEN
+            IF( LSAME( UPLO, 'U' ) ) THEN
+               DO 110 J = 2, N
+                  CALL ZLASSQ( MIN( J-1, K ), AB( MAX( K+2-J, 1 ), J ),
+     $                         1, SCALE, SUM )
+  110          CONTINUE
+               L = K + 1
+            ELSE
+               DO 120 J = 1, N - 1
+                  CALL ZLASSQ( MIN( N-J, K ), AB( 2, J ), 1, SCALE,
+     $                         SUM )
+  120          CONTINUE
+               L = 1
+            END IF
+            SUM = 2*SUM
+         ELSE
+            L = 1
+         END IF
+         CALL ZLASSQ( N, AB( L, 1 ), LDAB, SCALE, SUM )
+         VALUE = SCALE*SQRT( SUM )
+      END IF
+*
+      ZLANSB = VALUE
+      RETURN
+*
+*     End of ZLANSB
+*
+      END
diff --git a/costa/native/external/lapack/zlansp.f b/costa/native/external/lapack/zlansp.f
new file mode 100644
index 000000000..9f70a691b
--- /dev/null
+++ b/costa/native/external/lapack/zlansp.f
@@ -0,0 +1,207 @@
+      DOUBLE PRECISION FUNCTION ZLANSP( NORM, UPLO, N, AP, WORK )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          NORM, UPLO
+      INTEGER            N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   WORK( * )
+      COMPLEX*16         AP( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZLANSP  returns the value of the one norm,  or the Frobenius norm, or
+*  the  infinity norm,  or the  element of  largest absolute value  of a
+*  complex symmetric matrix A,  supplied in packed form.
+*
+*  Description
+*  ===========
+*
+*  ZLANSP returns the value
+*
+*     ZLANSP = ( max(abs(A(i,j))), NORM = 'M' or 'm'
+*              (
+*              ( norm1(A),         NORM = '1', 'O' or 'o'
+*              (
+*              ( normI(A),         NORM = 'I' or 'i'
+*              (
+*              ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
+*
+*  where  norm1  denotes the  one norm of a matrix (maximum column sum),
+*  normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
+*  normF  denotes the  Frobenius norm of a matrix (square root of sum of
+*  squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
+*
+*  Arguments
+*  =========
+*
+*  NORM    (input) CHARACTER*1
+*          Specifies the value to be returned in ZLANSP as described
+*          above.
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the upper or lower triangular part of the
+*          symmetric matrix A is supplied.
+*          = 'U':  Upper triangular part of A is supplied
+*          = 'L':  Lower triangular part of A is supplied
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.  When N = 0, ZLANSP is
+*          set to zero.
+*
+*  AP      (input) COMPLEX*16 array, dimension (N*(N+1)/2)
+*          The upper or lower triangle of the symmetric matrix A, packed
+*          columnwise in a linear array.  The j-th column of A is stored
+*          in the array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n.
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (LWORK),
+*          where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise,
+*          WORK is not referenced.
+*
+* =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J, K
+      DOUBLE PRECISION   ABSA, SCALE, SUM, VALUE
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           ZLASSQ
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, DIMAG, MAX, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+      IF( N.EQ.0 ) THEN
+         VALUE = ZERO
+      ELSE IF( LSAME( NORM, 'M' ) ) THEN
+*
+*        Find max(abs(A(i,j))).
+*
+         VALUE = ZERO
+         IF( LSAME( UPLO, 'U' ) ) THEN
+            K = 1
+            DO 20 J = 1, N
+               DO 10 I = K, K + J - 1
+                  VALUE = MAX( VALUE, ABS( AP( I ) ) )
+   10          CONTINUE
+               K = K + J
+   20       CONTINUE
+         ELSE
+            K = 1
+            DO 40 J = 1, N
+               DO 30 I = K, K + N - J
+                  VALUE = MAX( VALUE, ABS( AP( I ) ) )
+   30          CONTINUE
+               K = K + N - J + 1
+   40       CONTINUE
+         END IF
+      ELSE IF( ( LSAME( NORM, 'I' ) ) .OR. ( LSAME( NORM, 'O' ) ) .OR.
+     $         ( NORM.EQ.'1' ) ) THEN
+*
+*        Find normI(A) ( = norm1(A), since A is symmetric).
+*
+         VALUE = ZERO
+         K = 1
+         IF( LSAME( UPLO, 'U' ) ) THEN
+            DO 60 J = 1, N
+               SUM = ZERO
+               DO 50 I = 1, J - 1
+                  ABSA = ABS( AP( K ) )
+                  SUM = SUM + ABSA
+                  WORK( I ) = WORK( I ) + ABSA
+                  K = K + 1
+   50          CONTINUE
+               WORK( J ) = SUM + ABS( AP( K ) )
+               K = K + 1
+   60       CONTINUE
+            DO 70 I = 1, N
+               VALUE = MAX( VALUE, WORK( I ) )
+   70       CONTINUE
+         ELSE
+            DO 80 I = 1, N
+               WORK( I ) = ZERO
+   80       CONTINUE
+            DO 100 J = 1, N
+               SUM = WORK( J ) + ABS( AP( K ) )
+               K = K + 1
+               DO 90 I = J + 1, N
+                  ABSA = ABS( AP( K ) )
+                  SUM = SUM + ABSA
+                  WORK( I ) = WORK( I ) + ABSA
+                  K = K + 1
+   90          CONTINUE
+               VALUE = MAX( VALUE, SUM )
+  100       CONTINUE
+         END IF
+      ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
+*
+*        Find normF(A).
+*
+         SCALE = ZERO
+         SUM = ONE
+         K = 2
+         IF( LSAME( UPLO, 'U' ) ) THEN
+            DO 110 J = 2, N
+               CALL ZLASSQ( J-1, AP( K ), 1, SCALE, SUM )
+               K = K + J
+  110       CONTINUE
+         ELSE
+            DO 120 J = 1, N - 1
+               CALL ZLASSQ( N-J, AP( K ), 1, SCALE, SUM )
+               K = K + N - J + 1
+  120       CONTINUE
+         END IF
+         SUM = 2*SUM
+         K = 1
+         DO 130 I = 1, N
+            IF( DBLE( AP( K ) ).NE.ZERO ) THEN
+               ABSA = ABS( DBLE( AP( K ) ) )
+               IF( SCALE.LT.ABSA ) THEN
+                  SUM = ONE + SUM*( SCALE / ABSA )**2
+                  SCALE = ABSA
+               ELSE
+                  SUM = SUM + ( ABSA / SCALE )**2
+               END IF
+            END IF
+            IF( DIMAG( AP( K ) ).NE.ZERO ) THEN
+               ABSA = ABS( DIMAG( AP( K ) ) )
+               IF( SCALE.LT.ABSA ) THEN
+                  SUM = ONE + SUM*( SCALE / ABSA )**2
+                  SCALE = ABSA
+               ELSE
+                  SUM = SUM + ( ABSA / SCALE )**2
+               END IF
+            END IF
+            IF( LSAME( UPLO, 'U' ) ) THEN
+               K = K + I + 1
+            ELSE
+               K = K + N - I + 1
+            END IF
+  130    CONTINUE
+         VALUE = SCALE*SQRT( SUM )
+      END IF
+*
+      ZLANSP = VALUE
+      RETURN
+*
+*     End of ZLANSP
+*
+      END
diff --git a/costa/native/external/lapack/zlansy.f b/costa/native/external/lapack/zlansy.f
new file mode 100644
index 000000000..571783614
--- /dev/null
+++ b/costa/native/external/lapack/zlansy.f
@@ -0,0 +1,175 @@
+      DOUBLE PRECISION FUNCTION ZLANSY( NORM, UPLO, N, A, LDA, WORK )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          NORM, UPLO
+      INTEGER            LDA, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   WORK( * )
+      COMPLEX*16         A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZLANSY  returns the value of the one norm,  or the Frobenius norm, or
+*  the  infinity norm,  or the  element of  largest absolute value  of a
+*  complex symmetric matrix A.
+*
+*  Description
+*  ===========
+*
+*  ZLANSY returns the value
+*
+*     ZLANSY = ( max(abs(A(i,j))), NORM = 'M' or 'm'
+*              (
+*              ( norm1(A),         NORM = '1', 'O' or 'o'
+*              (
+*              ( normI(A),         NORM = 'I' or 'i'
+*              (
+*              ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
+*
+*  where  norm1  denotes the  one norm of a matrix (maximum column sum),
+*  normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
+*  normF  denotes the  Frobenius norm of a matrix (square root of sum of
+*  squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
+*
+*  Arguments
+*  =========
+*
+*  NORM    (input) CHARACTER*1
+*          Specifies the value to be returned in ZLANSY as described
+*          above.
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the upper or lower triangular part of the
+*          symmetric matrix A is to be referenced.
+*          = 'U':  Upper triangular part of A is referenced
+*          = 'L':  Lower triangular part of A is referenced
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.  When N = 0, ZLANSY is
+*          set to zero.
+*
+*  A       (input) COMPLEX*16 array, dimension (LDA,N)
+*          The symmetric matrix A.  If UPLO = 'U', the leading n by n
+*          upper triangular part of A contains the upper triangular part
+*          of the matrix A, and the strictly lower triangular part of A
+*          is not referenced.  If UPLO = 'L', the leading n by n lower
+*          triangular part of A contains the lower triangular part of
+*          the matrix A, and the strictly upper triangular part of A is
+*          not referenced.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(N,1).
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (LWORK),
+*          where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise,
+*          WORK is not referenced.
+*
+* =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J
+      DOUBLE PRECISION   ABSA, SCALE, SUM, VALUE
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           ZLASSQ
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+      IF( N.EQ.0 ) THEN
+         VALUE = ZERO
+      ELSE IF( LSAME( NORM, 'M' ) ) THEN
+*
+*        Find max(abs(A(i,j))).
+*
+         VALUE = ZERO
+         IF( LSAME( UPLO, 'U' ) ) THEN
+            DO 20 J = 1, N
+               DO 10 I = 1, J
+                  VALUE = MAX( VALUE, ABS( A( I, J ) ) )
+   10          CONTINUE
+   20       CONTINUE
+         ELSE
+            DO 40 J = 1, N
+               DO 30 I = J, N
+                  VALUE = MAX( VALUE, ABS( A( I, J ) ) )
+   30          CONTINUE
+   40       CONTINUE
+         END IF
+      ELSE IF( ( LSAME( NORM, 'I' ) ) .OR. ( LSAME( NORM, 'O' ) ) .OR.
+     $         ( NORM.EQ.'1' ) ) THEN
+*
+*        Find normI(A) ( = norm1(A), since A is symmetric).
+*
+         VALUE = ZERO
+         IF( LSAME( UPLO, 'U' ) ) THEN
+            DO 60 J = 1, N
+               SUM = ZERO
+               DO 50 I = 1, J - 1
+                  ABSA = ABS( A( I, J ) )
+                  SUM = SUM + ABSA
+                  WORK( I ) = WORK( I ) + ABSA
+   50          CONTINUE
+               WORK( J ) = SUM + ABS( A( J, J ) )
+   60       CONTINUE
+            DO 70 I = 1, N
+               VALUE = MAX( VALUE, WORK( I ) )
+   70       CONTINUE
+         ELSE
+            DO 80 I = 1, N
+               WORK( I ) = ZERO
+   80       CONTINUE
+            DO 100 J = 1, N
+               SUM = WORK( J ) + ABS( A( J, J ) )
+               DO 90 I = J + 1, N
+                  ABSA = ABS( A( I, J ) )
+                  SUM = SUM + ABSA
+                  WORK( I ) = WORK( I ) + ABSA
+   90          CONTINUE
+               VALUE = MAX( VALUE, SUM )
+  100       CONTINUE
+         END IF
+      ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
+*
+*        Find normF(A).
+*
+         SCALE = ZERO
+         SUM = ONE
+         IF( LSAME( UPLO, 'U' ) ) THEN
+            DO 110 J = 2, N
+               CALL ZLASSQ( J-1, A( 1, J ), 1, SCALE, SUM )
+  110       CONTINUE
+         ELSE
+            DO 120 J = 1, N - 1
+               CALL ZLASSQ( N-J, A( J+1, J ), 1, SCALE, SUM )
+  120       CONTINUE
+         END IF
+         SUM = 2*SUM
+         CALL ZLASSQ( N, A, LDA+1, SCALE, SUM )
+         VALUE = SCALE*SQRT( SUM )
+      END IF
+*
+      ZLANSY = VALUE
+      RETURN
+*
+*     End of ZLANSY
+*
+      END
diff --git a/costa/native/external/lapack/zlantb.f b/costa/native/external/lapack/zlantb.f
new file mode 100644
index 000000000..b6ffac6c3
--- /dev/null
+++ b/costa/native/external/lapack/zlantb.f
@@ -0,0 +1,286 @@
+      DOUBLE PRECISION FUNCTION ZLANTB( NORM, UPLO, DIAG, N, K, AB,
+     $                 LDAB, WORK )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIAG, NORM, UPLO
+      INTEGER            K, LDAB, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   WORK( * )
+      COMPLEX*16         AB( LDAB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZLANTB  returns the value of the one norm,  or the Frobenius norm, or
+*  the  infinity norm,  or the element of  largest absolute value  of an
+*  n by n triangular band matrix A,  with ( k + 1 ) diagonals.
+*
+*  Description
+*  ===========
+*
+*  ZLANTB returns the value
+*
+*     ZLANTB = ( max(abs(A(i,j))), NORM = 'M' or 'm'
+*              (
+*              ( norm1(A),         NORM = '1', 'O' or 'o'
+*              (
+*              ( normI(A),         NORM = 'I' or 'i'
+*              (
+*              ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
+*
+*  where  norm1  denotes the  one norm of a matrix (maximum column sum),
+*  normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
+*  normF  denotes the  Frobenius norm of a matrix (square root of sum of
+*  squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
+*
+*  Arguments
+*  =========
+*
+*  NORM    (input) CHARACTER*1
+*          Specifies the value to be returned in ZLANTB as described
+*          above.
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the matrix A is upper or lower triangular.
+*          = 'U':  Upper triangular
+*          = 'L':  Lower triangular
+*
+*  DIAG    (input) CHARACTER*1
+*          Specifies whether or not the matrix A is unit triangular.
+*          = 'N':  Non-unit triangular
+*          = 'U':  Unit triangular
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.  When N = 0, ZLANTB is
+*          set to zero.
+*
+*  K       (input) INTEGER
+*          The number of super-diagonals of the matrix A if UPLO = 'U',
+*          or the number of sub-diagonals of the matrix A if UPLO = 'L'.
+*          K >= 0.
+*
+*  AB      (input) COMPLEX*16 array, dimension (LDAB,N)
+*          The upper or lower triangular band matrix A, stored in the
+*          first k+1 rows of AB.  The j-th column of A is stored
+*          in the j-th column of the array AB as follows:
+*          if UPLO = 'U', AB(k+1+i-j,j) = A(i,j) for max(1,j-k)<=i<=j;
+*          if UPLO = 'L', AB(1+i-j,j)   = A(i,j) for j<=i<=min(n,j+k).
+*          Note that when DIAG = 'U', the elements of the array AB
+*          corresponding to the diagonal elements of the matrix A are
+*          not referenced, but are assumed to be one.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= K+1.
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (LWORK),
+*          where LWORK >= N when NORM = 'I'; otherwise, WORK is not
+*          referenced.
+*
+* =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UDIAG
+      INTEGER            I, J, L
+      DOUBLE PRECISION   SCALE, SUM, VALUE
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           ZLASSQ
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+      IF( N.EQ.0 ) THEN
+         VALUE = ZERO
+      ELSE IF( LSAME( NORM, 'M' ) ) THEN
+*
+*        Find max(abs(A(i,j))).
+*
+         IF( LSAME( DIAG, 'U' ) ) THEN
+            VALUE = ONE
+            IF( LSAME( UPLO, 'U' ) ) THEN
+               DO 20 J = 1, N
+                  DO 10 I = MAX( K+2-J, 1 ), K
+                     VALUE = MAX( VALUE, ABS( AB( I, J ) ) )
+   10             CONTINUE
+   20          CONTINUE
+            ELSE
+               DO 40 J = 1, N
+                  DO 30 I = 2, MIN( N+1-J, K+1 )
+                     VALUE = MAX( VALUE, ABS( AB( I, J ) ) )
+   30             CONTINUE
+   40          CONTINUE
+            END IF
+         ELSE
+            VALUE = ZERO
+            IF( LSAME( UPLO, 'U' ) ) THEN
+               DO 60 J = 1, N
+                  DO 50 I = MAX( K+2-J, 1 ), K + 1
+                     VALUE = MAX( VALUE, ABS( AB( I, J ) ) )
+   50             CONTINUE
+   60          CONTINUE
+            ELSE
+               DO 80 J = 1, N
+                  DO 70 I = 1, MIN( N+1-J, K+1 )
+                     VALUE = MAX( VALUE, ABS( AB( I, J ) ) )
+   70             CONTINUE
+   80          CONTINUE
+            END IF
+         END IF
+      ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN
+*
+*        Find norm1(A).
+*
+         VALUE = ZERO
+         UDIAG = LSAME( DIAG, 'U' )
+         IF( LSAME( UPLO, 'U' ) ) THEN
+            DO 110 J = 1, N
+               IF( UDIAG ) THEN
+                  SUM = ONE
+                  DO 90 I = MAX( K+2-J, 1 ), K
+                     SUM = SUM + ABS( AB( I, J ) )
+   90             CONTINUE
+               ELSE
+                  SUM = ZERO
+                  DO 100 I = MAX( K+2-J, 1 ), K + 1
+                     SUM = SUM + ABS( AB( I, J ) )
+  100             CONTINUE
+               END IF
+               VALUE = MAX( VALUE, SUM )
+  110       CONTINUE
+         ELSE
+            DO 140 J = 1, N
+               IF( UDIAG ) THEN
+                  SUM = ONE
+                  DO 120 I = 2, MIN( N+1-J, K+1 )
+                     SUM = SUM + ABS( AB( I, J ) )
+  120             CONTINUE
+               ELSE
+                  SUM = ZERO
+                  DO 130 I = 1, MIN( N+1-J, K+1 )
+                     SUM = SUM + ABS( AB( I, J ) )
+  130             CONTINUE
+               END IF
+               VALUE = MAX( VALUE, SUM )
+  140       CONTINUE
+         END IF
+      ELSE IF( LSAME( NORM, 'I' ) ) THEN
+*
+*        Find normI(A).
+*
+         VALUE = ZERO
+         IF( LSAME( UPLO, 'U' ) ) THEN
+            IF( LSAME( DIAG, 'U' ) ) THEN
+               DO 150 I = 1, N
+                  WORK( I ) = ONE
+  150          CONTINUE
+               DO 170 J = 1, N
+                  L = K + 1 - J
+                  DO 160 I = MAX( 1, J-K ), J - 1
+                     WORK( I ) = WORK( I ) + ABS( AB( L+I, J ) )
+  160             CONTINUE
+  170          CONTINUE
+            ELSE
+               DO 180 I = 1, N
+                  WORK( I ) = ZERO
+  180          CONTINUE
+               DO 200 J = 1, N
+                  L = K + 1 - J
+                  DO 190 I = MAX( 1, J-K ), J
+                     WORK( I ) = WORK( I ) + ABS( AB( L+I, J ) )
+  190             CONTINUE
+  200          CONTINUE
+            END IF
+         ELSE
+            IF( LSAME( DIAG, 'U' ) ) THEN
+               DO 210 I = 1, N
+                  WORK( I ) = ONE
+  210          CONTINUE
+               DO 230 J = 1, N
+                  L = 1 - J
+                  DO 220 I = J + 1, MIN( N, J+K )
+                     WORK( I ) = WORK( I ) + ABS( AB( L+I, J ) )
+  220             CONTINUE
+  230          CONTINUE
+            ELSE
+               DO 240 I = 1, N
+                  WORK( I ) = ZERO
+  240          CONTINUE
+               DO 260 J = 1, N
+                  L = 1 - J
+                  DO 250 I = J, MIN( N, J+K )
+                     WORK( I ) = WORK( I ) + ABS( AB( L+I, J ) )
+  250             CONTINUE
+  260          CONTINUE
+            END IF
+         END IF
+         DO 270 I = 1, N
+            VALUE = MAX( VALUE, WORK( I ) )
+  270    CONTINUE
+      ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
+*
+*        Find normF(A).
+*
+         IF( LSAME( UPLO, 'U' ) ) THEN
+            IF( LSAME( DIAG, 'U' ) ) THEN
+               SCALE = ONE
+               SUM = N
+               IF( K.GT.0 ) THEN
+                  DO 280 J = 2, N
+                     CALL ZLASSQ( MIN( J-1, K ),
+     $                            AB( MAX( K+2-J, 1 ), J ), 1, SCALE,
+     $                            SUM )
+  280             CONTINUE
+               END IF
+            ELSE
+               SCALE = ZERO
+               SUM = ONE
+               DO 290 J = 1, N
+                  CALL ZLASSQ( MIN( J, K+1 ), AB( MAX( K+2-J, 1 ), J ),
+     $                         1, SCALE, SUM )
+  290          CONTINUE
+            END IF
+         ELSE
+            IF( LSAME( DIAG, 'U' ) ) THEN
+               SCALE = ONE
+               SUM = N
+               IF( K.GT.0 ) THEN
+                  DO 300 J = 1, N - 1
+                     CALL ZLASSQ( MIN( N-J, K ), AB( 2, J ), 1, SCALE,
+     $                            SUM )
+  300             CONTINUE
+               END IF
+            ELSE
+               SCALE = ZERO
+               SUM = ONE
+               DO 310 J = 1, N
+                  CALL ZLASSQ( MIN( N-J+1, K+1 ), AB( 1, J ), 1, SCALE,
+     $                         SUM )
+  310          CONTINUE
+            END IF
+         END IF
+         VALUE = SCALE*SQRT( SUM )
+      END IF
+*
+      ZLANTB = VALUE
+      RETURN
+*
+*     End of ZLANTB
+*
+      END
diff --git a/costa/native/external/lapack/zlantp.f b/costa/native/external/lapack/zlantp.f
new file mode 100644
index 000000000..e2483d8d2
--- /dev/null
+++ b/costa/native/external/lapack/zlantp.f
@@ -0,0 +1,287 @@
+      DOUBLE PRECISION FUNCTION ZLANTP( NORM, UPLO, DIAG, N, AP, WORK )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIAG, NORM, UPLO
+      INTEGER            N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   WORK( * )
+      COMPLEX*16         AP( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZLANTP  returns the value of the one norm,  or the Frobenius norm, or
+*  the  infinity norm,  or the  element of  largest absolute value  of a
+*  triangular matrix A, supplied in packed form.
+*
+*  Description
+*  ===========
+*
+*  ZLANTP returns the value
+*
+*     ZLANTP = ( max(abs(A(i,j))), NORM = 'M' or 'm'
+*              (
+*              ( norm1(A),         NORM = '1', 'O' or 'o'
+*              (
+*              ( normI(A),         NORM = 'I' or 'i'
+*              (
+*              ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
+*
+*  where  norm1  denotes the  one norm of a matrix (maximum column sum),
+*  normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
+*  normF  denotes the  Frobenius norm of a matrix (square root of sum of
+*  squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
+*
+*  Arguments
+*  =========
+*
+*  NORM    (input) CHARACTER*1
+*          Specifies the value to be returned in ZLANTP as described
+*          above.
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the matrix A is upper or lower triangular.
+*          = 'U':  Upper triangular
+*          = 'L':  Lower triangular
+*
+*  DIAG    (input) CHARACTER*1
+*          Specifies whether or not the matrix A is unit triangular.
+*          = 'N':  Non-unit triangular
+*          = 'U':  Unit triangular
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.  When N = 0, ZLANTP is
+*          set to zero.
+*
+*  AP      (input) COMPLEX*16 array, dimension (N*(N+1)/2)
+*          The upper or lower triangular matrix A, packed columnwise in
+*          a linear array.  The j-th column of A is stored in the array
+*          AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n.
+*          Note that when DIAG = 'U', the elements of the array AP
+*          corresponding to the diagonal elements of the matrix A are
+*          not referenced, but are assumed to be one.
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (LWORK),
+*          where LWORK >= N when NORM = 'I'; otherwise, WORK is not
+*          referenced.
+*
+* =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UDIAG
+      INTEGER            I, J, K
+      DOUBLE PRECISION   SCALE, SUM, VALUE
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           ZLASSQ
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+      IF( N.EQ.0 ) THEN
+         VALUE = ZERO
+      ELSE IF( LSAME( NORM, 'M' ) ) THEN
+*
+*        Find max(abs(A(i,j))).
+*
+         K = 1
+         IF( LSAME( DIAG, 'U' ) ) THEN
+            VALUE = ONE
+            IF( LSAME( UPLO, 'U' ) ) THEN
+               DO 20 J = 1, N
+                  DO 10 I = K, K + J - 2
+                     VALUE = MAX( VALUE, ABS( AP( I ) ) )
+   10             CONTINUE
+                  K = K + J
+   20          CONTINUE
+            ELSE
+               DO 40 J = 1, N
+                  DO 30 I = K + 1, K + N - J
+                     VALUE = MAX( VALUE, ABS( AP( I ) ) )
+   30             CONTINUE
+                  K = K + N - J + 1
+   40          CONTINUE
+            END IF
+         ELSE
+            VALUE = ZERO
+            IF( LSAME( UPLO, 'U' ) ) THEN
+               DO 60 J = 1, N
+                  DO 50 I = K, K + J - 1
+                     VALUE = MAX( VALUE, ABS( AP( I ) ) )
+   50             CONTINUE
+                  K = K + J
+   60          CONTINUE
+            ELSE
+               DO 80 J = 1, N
+                  DO 70 I = K, K + N - J
+                     VALUE = MAX( VALUE, ABS( AP( I ) ) )
+   70             CONTINUE
+                  K = K + N - J + 1
+   80          CONTINUE
+            END IF
+         END IF
+      ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN
+*
+*        Find norm1(A).
+*
+         VALUE = ZERO
+         K = 1
+         UDIAG = LSAME( DIAG, 'U' )
+         IF( LSAME( UPLO, 'U' ) ) THEN
+            DO 110 J = 1, N
+               IF( UDIAG ) THEN
+                  SUM = ONE
+                  DO 90 I = K, K + J - 2
+                     SUM = SUM + ABS( AP( I ) )
+   90             CONTINUE
+               ELSE
+                  SUM = ZERO
+                  DO 100 I = K, K + J - 1
+                     SUM = SUM + ABS( AP( I ) )
+  100             CONTINUE
+               END IF
+               K = K + J
+               VALUE = MAX( VALUE, SUM )
+  110       CONTINUE
+         ELSE
+            DO 140 J = 1, N
+               IF( UDIAG ) THEN
+                  SUM = ONE
+                  DO 120 I = K + 1, K + N - J
+                     SUM = SUM + ABS( AP( I ) )
+  120             CONTINUE
+               ELSE
+                  SUM = ZERO
+                  DO 130 I = K, K + N - J
+                     SUM = SUM + ABS( AP( I ) )
+  130             CONTINUE
+               END IF
+               K = K + N - J + 1
+               VALUE = MAX( VALUE, SUM )
+  140       CONTINUE
+         END IF
+      ELSE IF( LSAME( NORM, 'I' ) ) THEN
+*
+*        Find normI(A).
+*
+         K = 1
+         IF( LSAME( UPLO, 'U' ) ) THEN
+            IF( LSAME( DIAG, 'U' ) ) THEN
+               DO 150 I = 1, N
+                  WORK( I ) = ONE
+  150          CONTINUE
+               DO 170 J = 1, N
+                  DO 160 I = 1, J - 1
+                     WORK( I ) = WORK( I ) + ABS( AP( K ) )
+                     K = K + 1
+  160             CONTINUE
+                  K = K + 1
+  170          CONTINUE
+            ELSE
+               DO 180 I = 1, N
+                  WORK( I ) = ZERO
+  180          CONTINUE
+               DO 200 J = 1, N
+                  DO 190 I = 1, J
+                     WORK( I ) = WORK( I ) + ABS( AP( K ) )
+                     K = K + 1
+  190             CONTINUE
+  200          CONTINUE
+            END IF
+         ELSE
+            IF( LSAME( DIAG, 'U' ) ) THEN
+               DO 210 I = 1, N
+                  WORK( I ) = ONE
+  210          CONTINUE
+               DO 230 J = 1, N
+                  K = K + 1
+                  DO 220 I = J + 1, N
+                     WORK( I ) = WORK( I ) + ABS( AP( K ) )
+                     K = K + 1
+  220             CONTINUE
+  230          CONTINUE
+            ELSE
+               DO 240 I = 1, N
+                  WORK( I ) = ZERO
+  240          CONTINUE
+               DO 260 J = 1, N
+                  DO 250 I = J, N
+                     WORK( I ) = WORK( I ) + ABS( AP( K ) )
+                     K = K + 1
+  250             CONTINUE
+  260          CONTINUE
+            END IF
+         END IF
+         VALUE = ZERO
+         DO 270 I = 1, N
+            VALUE = MAX( VALUE, WORK( I ) )
+  270    CONTINUE
+      ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
+*
+*        Find normF(A).
+*
+         IF( LSAME( UPLO, 'U' ) ) THEN
+            IF( LSAME( DIAG, 'U' ) ) THEN
+               SCALE = ONE
+               SUM = N
+               K = 2
+               DO 280 J = 2, N
+                  CALL ZLASSQ( J-1, AP( K ), 1, SCALE, SUM )
+                  K = K + J
+  280          CONTINUE
+            ELSE
+               SCALE = ZERO
+               SUM = ONE
+               K = 1
+               DO 290 J = 1, N
+                  CALL ZLASSQ( J, AP( K ), 1, SCALE, SUM )
+                  K = K + J
+  290          CONTINUE
+            END IF
+         ELSE
+            IF( LSAME( DIAG, 'U' ) ) THEN
+               SCALE = ONE
+               SUM = N
+               K = 2
+               DO 300 J = 1, N - 1
+                  CALL ZLASSQ( N-J, AP( K ), 1, SCALE, SUM )
+                  K = K + N - J + 1
+  300          CONTINUE
+            ELSE
+               SCALE = ZERO
+               SUM = ONE
+               K = 1
+               DO 310 J = 1, N
+                  CALL ZLASSQ( N-J+1, AP( K ), 1, SCALE, SUM )
+                  K = K + N - J + 1
+  310          CONTINUE
+            END IF
+         END IF
+         VALUE = SCALE*SQRT( SUM )
+      END IF
+*
+      ZLANTP = VALUE
+      RETURN
+*
+*     End of ZLANTP
+*
+      END
diff --git a/costa/native/external/lapack/zlantr.f b/costa/native/external/lapack/zlantr.f
new file mode 100644
index 000000000..1cb7efce2
--- /dev/null
+++ b/costa/native/external/lapack/zlantr.f
@@ -0,0 +1,278 @@
+      DOUBLE PRECISION FUNCTION ZLANTR( NORM, UPLO, DIAG, M, N, A, LDA,
+     $                 WORK )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIAG, NORM, UPLO
+      INTEGER            LDA, M, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   WORK( * )
+      COMPLEX*16         A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZLANTR  returns the value of the one norm,  or the Frobenius norm, or
+*  the  infinity norm,  or the  element of  largest absolute value  of a
+*  trapezoidal or triangular matrix A.
+*
+*  Description
+*  ===========
+*
+*  ZLANTR returns the value
+*
+*     ZLANTR = ( max(abs(A(i,j))), NORM = 'M' or 'm'
+*              (
+*              ( norm1(A),         NORM = '1', 'O' or 'o'
+*              (
+*              ( normI(A),         NORM = 'I' or 'i'
+*              (
+*              ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
+*
+*  where  norm1  denotes the  one norm of a matrix (maximum column sum),
+*  normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
+*  normF  denotes the  Frobenius norm of a matrix (square root of sum of
+*  squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
+*
+*  Arguments
+*  =========
+*
+*  NORM    (input) CHARACTER*1
+*          Specifies the value to be returned in ZLANTR as described
+*          above.
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the matrix A is upper or lower trapezoidal.
+*          = 'U':  Upper trapezoidal
+*          = 'L':  Lower trapezoidal
+*          Note that A is triangular instead of trapezoidal if M = N.
+*
+*  DIAG    (input) CHARACTER*1
+*          Specifies whether or not the matrix A has unit diagonal.
+*          = 'N':  Non-unit diagonal
+*          = 'U':  Unit diagonal
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0, and if
+*          UPLO = 'U', M <= N.  When M = 0, ZLANTR is set to zero.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0, and if
+*          UPLO = 'L', N <= M.  When N = 0, ZLANTR is set to zero.
+*
+*  A       (input) COMPLEX*16 array, dimension (LDA,N)
+*          The trapezoidal matrix A (A is triangular if M = N).
+*          If UPLO = 'U', the leading m by n upper trapezoidal part of
+*          the array A contains the upper trapezoidal matrix, and the
+*          strictly lower triangular part of A is not referenced.
+*          If UPLO = 'L', the leading m by n lower trapezoidal part of
+*          the array A contains the lower trapezoidal matrix, and the
+*          strictly upper triangular part of A is not referenced.  Note
+*          that when DIAG = 'U', the diagonal elements of A are not
+*          referenced and are assumed to be one.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(M,1).
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (LWORK),
+*          where LWORK >= M when NORM = 'I'; otherwise, WORK is not
+*          referenced.
+*
+* =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UDIAG
+      INTEGER            I, J
+      DOUBLE PRECISION   SCALE, SUM, VALUE
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           ZLASSQ
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+      IF( MIN( M, N ).EQ.0 ) THEN
+         VALUE = ZERO
+      ELSE IF( LSAME( NORM, 'M' ) ) THEN
+*
+*        Find max(abs(A(i,j))).
+*
+         IF( LSAME( DIAG, 'U' ) ) THEN
+            VALUE = ONE
+            IF( LSAME( UPLO, 'U' ) ) THEN
+               DO 20 J = 1, N
+                  DO 10 I = 1, MIN( M, J-1 )
+                     VALUE = MAX( VALUE, ABS( A( I, J ) ) )
+   10             CONTINUE
+   20          CONTINUE
+            ELSE
+               DO 40 J = 1, N
+                  DO 30 I = J + 1, M
+                     VALUE = MAX( VALUE, ABS( A( I, J ) ) )
+   30             CONTINUE
+   40          CONTINUE
+            END IF
+         ELSE
+            VALUE = ZERO
+            IF( LSAME( UPLO, 'U' ) ) THEN
+               DO 60 J = 1, N
+                  DO 50 I = 1, MIN( M, J )
+                     VALUE = MAX( VALUE, ABS( A( I, J ) ) )
+   50             CONTINUE
+   60          CONTINUE
+            ELSE
+               DO 80 J = 1, N
+                  DO 70 I = J, M
+                     VALUE = MAX( VALUE, ABS( A( I, J ) ) )
+   70             CONTINUE
+   80          CONTINUE
+            END IF
+         END IF
+      ELSE IF( ( LSAME( NORM, 'O' ) ) .OR. ( NORM.EQ.'1' ) ) THEN
+*
+*        Find norm1(A).
+*
+         VALUE = ZERO
+         UDIAG = LSAME( DIAG, 'U' )
+         IF( LSAME( UPLO, 'U' ) ) THEN
+            DO 110 J = 1, N
+               IF( ( UDIAG ) .AND. ( J.LE.M ) ) THEN
+                  SUM = ONE
+                  DO 90 I = 1, J - 1
+                     SUM = SUM + ABS( A( I, J ) )
+   90             CONTINUE
+               ELSE
+                  SUM = ZERO
+                  DO 100 I = 1, MIN( M, J )
+                     SUM = SUM + ABS( A( I, J ) )
+  100             CONTINUE
+               END IF
+               VALUE = MAX( VALUE, SUM )
+  110       CONTINUE
+         ELSE
+            DO 140 J = 1, N
+               IF( UDIAG ) THEN
+                  SUM = ONE
+                  DO 120 I = J + 1, M
+                     SUM = SUM + ABS( A( I, J ) )
+  120             CONTINUE
+               ELSE
+                  SUM = ZERO
+                  DO 130 I = J, M
+                     SUM = SUM + ABS( A( I, J ) )
+  130             CONTINUE
+               END IF
+               VALUE = MAX( VALUE, SUM )
+  140       CONTINUE
+         END IF
+      ELSE IF( LSAME( NORM, 'I' ) ) THEN
+*
+*        Find normI(A).
+*
+         IF( LSAME( UPLO, 'U' ) ) THEN
+            IF( LSAME( DIAG, 'U' ) ) THEN
+               DO 150 I = 1, M
+                  WORK( I ) = ONE
+  150          CONTINUE
+               DO 170 J = 1, N
+                  DO 160 I = 1, MIN( M, J-1 )
+                     WORK( I ) = WORK( I ) + ABS( A( I, J ) )
+  160             CONTINUE
+  170          CONTINUE
+            ELSE
+               DO 180 I = 1, M
+                  WORK( I ) = ZERO
+  180          CONTINUE
+               DO 200 J = 1, N
+                  DO 190 I = 1, MIN( M, J )
+                     WORK( I ) = WORK( I ) + ABS( A( I, J ) )
+  190             CONTINUE
+  200          CONTINUE
+            END IF
+         ELSE
+            IF( LSAME( DIAG, 'U' ) ) THEN
+               DO 210 I = 1, N
+                  WORK( I ) = ONE
+  210          CONTINUE
+               DO 220 I = N + 1, M
+                  WORK( I ) = ZERO
+  220          CONTINUE
+               DO 240 J = 1, N
+                  DO 230 I = J + 1, M
+                     WORK( I ) = WORK( I ) + ABS( A( I, J ) )
+  230             CONTINUE
+  240          CONTINUE
+            ELSE
+               DO 250 I = 1, M
+                  WORK( I ) = ZERO
+  250          CONTINUE
+               DO 270 J = 1, N
+                  DO 260 I = J, M
+                     WORK( I ) = WORK( I ) + ABS( A( I, J ) )
+  260             CONTINUE
+  270          CONTINUE
+            END IF
+         END IF
+         VALUE = ZERO
+         DO 280 I = 1, M
+            VALUE = MAX( VALUE, WORK( I ) )
+  280    CONTINUE
+      ELSE IF( ( LSAME( NORM, 'F' ) ) .OR. ( LSAME( NORM, 'E' ) ) ) THEN
+*
+*        Find normF(A).
+*
+         IF( LSAME( UPLO, 'U' ) ) THEN
+            IF( LSAME( DIAG, 'U' ) ) THEN
+               SCALE = ONE
+               SUM = MIN( M, N )
+               DO 290 J = 2, N
+                  CALL ZLASSQ( MIN( M, J-1 ), A( 1, J ), 1, SCALE, SUM )
+  290          CONTINUE
+            ELSE
+               SCALE = ZERO
+               SUM = ONE
+               DO 300 J = 1, N
+                  CALL ZLASSQ( MIN( M, J ), A( 1, J ), 1, SCALE, SUM )
+  300          CONTINUE
+            END IF
+         ELSE
+            IF( LSAME( DIAG, 'U' ) ) THEN
+               SCALE = ONE
+               SUM = MIN( M, N )
+               DO 310 J = 1, N
+                  CALL ZLASSQ( M-J, A( MIN( M, J+1 ), J ), 1, SCALE,
+     $                         SUM )
+  310          CONTINUE
+            ELSE
+               SCALE = ZERO
+               SUM = ONE
+               DO 320 J = 1, N
+                  CALL ZLASSQ( M-J+1, A( J, J ), 1, SCALE, SUM )
+  320          CONTINUE
+            END IF
+         END IF
+         VALUE = SCALE*SQRT( SUM )
+      END IF
+*
+      ZLANTR = VALUE
+      RETURN
+*
+*     End of ZLANTR
+*
+      END
diff --git a/costa/native/external/lapack/zlapll.f b/costa/native/external/lapack/zlapll.f
new file mode 100644
index 000000000..014085eaf
--- /dev/null
+++ b/costa/native/external/lapack/zlapll.f
@@ -0,0 +1,104 @@
+      SUBROUTINE ZLAPLL( N, X, INCX, Y, INCY, SSMIN )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      INTEGER            INCX, INCY, N
+      DOUBLE PRECISION   SSMIN
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         X( * ), Y( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  Given two column vectors X and Y, let
+*
+*                       A = ( X Y ).
+*
+*  The subroutine first computes the QR factorization of A = Q*R,
+*  and then computes the SVD of the 2-by-2 upper triangular matrix R.
+*  The smaller singular value of R is returned in SSMIN, which is used
+*  as the measurement of the linear dependency of the vectors X and Y.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The length of the vectors X and Y.
+*
+*  X       (input/output) COMPLEX*16 array, dimension (1+(N-1)*INCX)
+*          On entry, X contains the N-vector X.
+*          On exit, X is overwritten.
+*
+*  INCX    (input) INTEGER
+*          The increment between successive elements of X. INCX > 0.
+*
+*  Y       (input/output) COMPLEX*16 array, dimension (1+(N-1)*INCY)
+*          On entry, Y contains the N-vector Y.
+*          On exit, Y is overwritten.
+*
+*  INCY    (input) INTEGER
+*          The increment between successive elements of Y. INCY > 0.
+*
+*  SSMIN   (output) DOUBLE PRECISION
+*          The smallest singular value of the N-by-2 matrix A = ( X Y ).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO
+      PARAMETER          ( ZERO = 0.0D+0 )
+      COMPLEX*16         CONE
+      PARAMETER          ( CONE = ( 1.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      DOUBLE PRECISION   SSMAX
+      COMPLEX*16         A11, A12, A22, C, TAU
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DCONJG
+*     ..
+*     .. External Functions ..
+      COMPLEX*16         ZDOTC
+      EXTERNAL           ZDOTC
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLAS2, ZAXPY, ZLARFG
+*     ..
+*     .. Executable Statements ..
+*
+*     Quick return if possible
+*
+      IF( N.LE.1 ) THEN
+         SSMIN = ZERO
+         RETURN
+      END IF
+*
+*     Compute the QR factorization of the N-by-2 matrix ( X Y )
+*
+      CALL ZLARFG( N, X( 1 ), X( 1+INCX ), INCX, TAU )
+      A11 = X( 1 )
+      X( 1 ) = CONE
+*
+      C = -DCONJG( TAU )*ZDOTC( N, X, INCX, Y, INCY )
+      CALL ZAXPY( N, C, X, INCX, Y, INCY )
+*
+      CALL ZLARFG( N-1, Y( 1+INCY ), Y( 1+2*INCY ), INCY, TAU )
+*
+      A12 = Y( 1 )
+      A22 = Y( 1+INCY )
+*
+*     Compute the SVD of 2-by-2 Upper triangular matrix.
+*
+      CALL DLAS2( ABS( A11 ), ABS( A12 ), ABS( A22 ), SSMIN, SSMAX )
+*
+      RETURN
+*
+*     End of ZLAPLL
+*
+      END
diff --git a/costa/native/external/lapack/zlapmt.f b/costa/native/external/lapack/zlapmt.f
new file mode 100644
index 000000000..c0efd5a8c
--- /dev/null
+++ b/costa/native/external/lapack/zlapmt.f
@@ -0,0 +1,135 @@
+      SUBROUTINE ZLAPMT( FORWRD, M, N, X, LDX, K )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993
+*
+*     .. Scalar Arguments ..
+      LOGICAL            FORWRD
+      INTEGER            LDX, M, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            K( * )
+      COMPLEX*16         X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZLAPMT rearranges the columns of the M by N matrix X as specified
+*  by the permutation K(1),K(2),...,K(N) of the integers 1,...,N.
+*  If FORWRD = .TRUE.,  forward permutation:
+*
+*       X(*,K(J)) is moved X(*,J) for J = 1,2,...,N.
+*
+*  If FORWRD = .FALSE., backward permutation:
+*
+*       X(*,J) is moved to X(*,K(J)) for J = 1,2,...,N.
+*
+*  Arguments
+*  =========
+*
+*  FORWRD  (input) LOGICAL
+*          = .TRUE., forward permutation
+*          = .FALSE., backward permutation
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix X. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix X. N >= 0.
+*
+*  X       (input/output) COMPLEX*16 array, dimension (LDX,N)
+*          On entry, the M by N matrix X.
+*          On exit, X contains the permuted matrix X.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X, LDX >= MAX(1,M).
+*
+*  K       (input) INTEGER array, dimension (N)
+*          On entry, K contains the permutation vector.
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      INTEGER            I, II, IN, J
+      COMPLEX*16         TEMP
+*     ..
+*     .. Executable Statements ..
+*
+      IF( N.LE.1 )
+     $   RETURN
+*
+      DO 10 I = 1, N
+         K( I ) = -K( I )
+   10 CONTINUE
+*
+      IF( FORWRD ) THEN
+*
+*        Forward permutation
+*
+         DO 50 I = 1, N
+*
+            IF( K( I ).GT.0 )
+     $         GO TO 40
+*
+            J = I
+            K( J ) = -K( J )
+            IN = K( J )
+*
+   20       CONTINUE
+            IF( K( IN ).GT.0 )
+     $         GO TO 40
+*
+            DO 30 II = 1, M
+               TEMP = X( II, J )
+               X( II, J ) = X( II, IN )
+               X( II, IN ) = TEMP
+   30       CONTINUE
+*
+            K( IN ) = -K( IN )
+            J = IN
+            IN = K( IN )
+            GO TO 20
+*
+   40       CONTINUE
+*
+   50    CONTINUE
+*
+      ELSE
+*
+*        Backward permutation
+*
+         DO 90 I = 1, N
+*
+            IF( K( I ).GT.0 )
+     $         GO TO 80
+*
+            K( I ) = -K( I )
+            J = K( I )
+   60       CONTINUE
+            IF( J.EQ.I )
+     $         GO TO 80
+*
+            DO 70 II = 1, M
+               TEMP = X( II, I )
+               X( II, I ) = X( II, J )
+               X( II, J ) = TEMP
+   70       CONTINUE
+*
+            K( J ) = -K( J )
+            J = K( J )
+            GO TO 60
+*
+   80       CONTINUE
+*
+   90    CONTINUE
+*
+      END IF
+*
+      RETURN
+*
+*     End of ZLAPMT
+*
+      END
diff --git a/costa/native/external/lapack/zlaqgb.f b/costa/native/external/lapack/zlaqgb.f
new file mode 100644
index 000000000..fdce88bd4
--- /dev/null
+++ b/costa/native/external/lapack/zlaqgb.f
@@ -0,0 +1,170 @@
+      SUBROUTINE ZLAQGB( M, N, KL, KU, AB, LDAB, R, C, ROWCND, COLCND,
+     $                   AMAX, EQUED )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          EQUED
+      INTEGER            KL, KU, LDAB, M, N
+      DOUBLE PRECISION   AMAX, COLCND, ROWCND
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   C( * ), R( * )
+      COMPLEX*16         AB( LDAB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZLAQGB equilibrates a general M by N band matrix A with KL
+*  subdiagonals and KU superdiagonals using the row and scaling factors
+*  in the vectors R and C.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  KL      (input) INTEGER
+*          The number of subdiagonals within the band of A.  KL >= 0.
+*
+*  KU      (input) INTEGER
+*          The number of superdiagonals within the band of A.  KU >= 0.
+*
+*  AB      (input/output) COMPLEX*16 array, dimension (LDAB,N)
+*          On entry, the matrix A in band storage, in rows 1 to KL+KU+1.
+*          The j-th column of A is stored in the j-th column of the
+*          array AB as follows:
+*          AB(ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl)
+*
+*          On exit, the equilibrated matrix, in the same storage format
+*          as A.  See EQUED for the form of the equilibrated matrix.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDA >= KL+KU+1.
+*
+*  R       (output) DOUBLE PRECISION array, dimension (M)
+*          The row scale factors for A.
+*
+*  C       (output) DOUBLE PRECISION array, dimension (N)
+*          The column scale factors for A.
+*
+*  ROWCND  (output) DOUBLE PRECISION
+*          Ratio of the smallest R(i) to the largest R(i).
+*
+*  COLCND  (output) DOUBLE PRECISION
+*          Ratio of the smallest C(i) to the largest C(i).
+*
+*  AMAX    (input) DOUBLE PRECISION
+*          Absolute value of largest matrix entry.
+*
+*  EQUED   (output) CHARACTER*1
+*          Specifies the form of equilibration that was done.
+*          = 'N':  No equilibration
+*          = 'R':  Row equilibration, i.e., A has been premultiplied by
+*                  diag(R).
+*          = 'C':  Column equilibration, i.e., A has been postmultiplied
+*                  by diag(C).
+*          = 'B':  Both row and column equilibration, i.e., A has been
+*                  replaced by diag(R) * A * diag(C).
+*
+*  Internal Parameters
+*  ===================
+*
+*  THRESH is a threshold value used to decide if row or column scaling
+*  should be done based on the ratio of the row or column scaling
+*  factors.  If ROWCND < THRESH, row scaling is done, and if
+*  COLCND < THRESH, column scaling is done.
+*
+*  LARGE and SMALL are threshold values used to decide if row scaling
+*  should be done based on the absolute size of the largest matrix
+*  element.  If AMAX > LARGE or AMAX < SMALL, row scaling is done.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, THRESH
+      PARAMETER          ( ONE = 1.0D+0, THRESH = 0.1D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J
+      DOUBLE PRECISION   CJ, LARGE, SMALL
+*     ..
+*     .. External Functions ..
+      DOUBLE PRECISION   DLAMCH
+      EXTERNAL           DLAMCH
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Quick return if possible
+*
+      IF( M.LE.0 .OR. N.LE.0 ) THEN
+         EQUED = 'N'
+         RETURN
+      END IF
+*
+*     Initialize LARGE and SMALL.
+*
+      SMALL = DLAMCH( 'Safe minimum' ) / DLAMCH( 'Precision' )
+      LARGE = ONE / SMALL
+*
+      IF( ROWCND.GE.THRESH .AND. AMAX.GE.SMALL .AND. AMAX.LE.LARGE )
+     $     THEN
+*
+*        No row scaling
+*
+         IF( COLCND.GE.THRESH ) THEN
+*
+*           No column scaling
+*
+            EQUED = 'N'
+         ELSE
+*
+*           Column scaling
+*
+            DO 20 J = 1, N
+               CJ = C( J )
+               DO 10 I = MAX( 1, J-KU ), MIN( M, J+KL )
+                  AB( KU+1+I-J, J ) = CJ*AB( KU+1+I-J, J )
+   10          CONTINUE
+   20       CONTINUE
+            EQUED = 'C'
+         END IF
+      ELSE IF( COLCND.GE.THRESH ) THEN
+*
+*        Row scaling, no column scaling
+*
+         DO 40 J = 1, N
+            DO 30 I = MAX( 1, J-KU ), MIN( M, J+KL )
+               AB( KU+1+I-J, J ) = R( I )*AB( KU+1+I-J, J )
+   30       CONTINUE
+   40    CONTINUE
+         EQUED = 'R'
+      ELSE
+*
+*        Row and column scaling
+*
+         DO 60 J = 1, N
+            CJ = C( J )
+            DO 50 I = MAX( 1, J-KU ), MIN( M, J+KL )
+               AB( KU+1+I-J, J ) = CJ*R( I )*AB( KU+1+I-J, J )
+   50       CONTINUE
+   60    CONTINUE
+         EQUED = 'B'
+      END IF
+*
+      RETURN
+*
+*     End of ZLAQGB
+*
+      END
diff --git a/costa/native/external/lapack/zlaqge.f b/costa/native/external/lapack/zlaqge.f
new file mode 100644
index 000000000..162b38b4b
--- /dev/null
+++ b/costa/native/external/lapack/zlaqge.f
@@ -0,0 +1,156 @@
+      SUBROUTINE ZLAQGE( M, N, A, LDA, R, C, ROWCND, COLCND, AMAX,
+     $                   EQUED )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          EQUED
+      INTEGER            LDA, M, N
+      DOUBLE PRECISION   AMAX, COLCND, ROWCND
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   C( * ), R( * )
+      COMPLEX*16         A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZLAQGE equilibrates a general M by N matrix A using the row and
+*  scaling factors in the vectors R and C.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+*          On entry, the M by N matrix A.
+*          On exit, the equilibrated matrix.  See EQUED for the form of
+*          the equilibrated matrix.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(M,1).
+*
+*  R       (input) DOUBLE PRECISION array, dimension (M)
+*          The row scale factors for A.
+*
+*  C       (input) DOUBLE PRECISION array, dimension (N)
+*          The column scale factors for A.
+*
+*  ROWCND  (input) DOUBLE PRECISION
+*          Ratio of the smallest R(i) to the largest R(i).
+*
+*  COLCND  (input) DOUBLE PRECISION
+*          Ratio of the smallest C(i) to the largest C(i).
+*
+*  AMAX    (input) DOUBLE PRECISION
+*          Absolute value of largest matrix entry.
+*
+*  EQUED   (output) CHARACTER*1
+*          Specifies the form of equilibration that was done.
+*          = 'N':  No equilibration
+*          = 'R':  Row equilibration, i.e., A has been premultiplied by
+*                  diag(R).
+*          = 'C':  Column equilibration, i.e., A has been postmultiplied
+*                  by diag(C).
+*          = 'B':  Both row and column equilibration, i.e., A has been
+*                  replaced by diag(R) * A * diag(C).
+*
+*  Internal Parameters
+*  ===================
+*
+*  THRESH is a threshold value used to decide if row or column scaling
+*  should be done based on the ratio of the row or column scaling
+*  factors.  If ROWCND < THRESH, row scaling is done, and if
+*  COLCND < THRESH, column scaling is done.
+*
+*  LARGE and SMALL are threshold values used to decide if row scaling
+*  should be done based on the absolute size of the largest matrix
+*  element.  If AMAX > LARGE or AMAX < SMALL, row scaling is done.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, THRESH
+      PARAMETER          ( ONE = 1.0D+0, THRESH = 0.1D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J
+      DOUBLE PRECISION   CJ, LARGE, SMALL
+*     ..
+*     .. External Functions ..
+      DOUBLE PRECISION   DLAMCH
+      EXTERNAL           DLAMCH
+*     ..
+*     .. Executable Statements ..
+*
+*     Quick return if possible
+*
+      IF( M.LE.0 .OR. N.LE.0 ) THEN
+         EQUED = 'N'
+         RETURN
+      END IF
+*
+*     Initialize LARGE and SMALL.
+*
+      SMALL = DLAMCH( 'Safe minimum' ) / DLAMCH( 'Precision' )
+      LARGE = ONE / SMALL
+*
+      IF( ROWCND.GE.THRESH .AND. AMAX.GE.SMALL .AND. AMAX.LE.LARGE )
+     $     THEN
+*
+*        No row scaling
+*
+         IF( COLCND.GE.THRESH ) THEN
+*
+*           No column scaling
+*
+            EQUED = 'N'
+         ELSE
+*
+*           Column scaling
+*
+            DO 20 J = 1, N
+               CJ = C( J )
+               DO 10 I = 1, M
+                  A( I, J ) = CJ*A( I, J )
+   10          CONTINUE
+   20       CONTINUE
+            EQUED = 'C'
+         END IF
+      ELSE IF( COLCND.GE.THRESH ) THEN
+*
+*        Row scaling, no column scaling
+*
+         DO 40 J = 1, N
+            DO 30 I = 1, M
+               A( I, J ) = R( I )*A( I, J )
+   30       CONTINUE
+   40    CONTINUE
+         EQUED = 'R'
+      ELSE
+*
+*        Row and column scaling
+*
+         DO 60 J = 1, N
+            CJ = C( J )
+            DO 50 I = 1, M
+               A( I, J ) = CJ*R( I )*A( I, J )
+   50       CONTINUE
+   60    CONTINUE
+         EQUED = 'B'
+      END IF
+*
+      RETURN
+*
+*     End of ZLAQGE
+*
+      END
diff --git a/costa/native/external/lapack/zlaqhb.f b/costa/native/external/lapack/zlaqhb.f
new file mode 100644
index 000000000..c891c3a67
--- /dev/null
+++ b/costa/native/external/lapack/zlaqhb.f
@@ -0,0 +1,152 @@
+      SUBROUTINE ZLAQHB( UPLO, N, KD, AB, LDAB, S, SCOND, AMAX, EQUED )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          EQUED, UPLO
+      INTEGER            KD, LDAB, N
+      DOUBLE PRECISION   AMAX, SCOND
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   S( * )
+      COMPLEX*16         AB( LDAB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZLAQHB equilibrates a symmetric band matrix A using the scaling
+*  factors in the vector S.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the upper or lower triangular part of the
+*          symmetric matrix A is stored.
+*          = 'U':  Upper triangular
+*          = 'L':  Lower triangular
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  KD      (input) INTEGER
+*          The number of super-diagonals of the matrix A if UPLO = 'U',
+*          or the number of sub-diagonals if UPLO = 'L'.  KD >= 0.
+*
+*  AB      (input/output) COMPLEX*16 array, dimension (LDAB,N)
+*          On entry, the upper or lower triangle of the symmetric band
+*          matrix A, stored in the first KD+1 rows of the array.  The
+*          j-th column of A is stored in the j-th column of the array AB
+*          as follows:
+*          if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j;
+*          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+kd).
+*
+*          On exit, if INFO = 0, the triangular factor U or L from the
+*          Cholesky factorization A = U'*U or A = L*L' of the band
+*          matrix A, in the same storage format as A.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KD+1.
+*
+*  S       (output) DOUBLE PRECISION array, dimension (N)
+*          The scale factors for A.
+*
+*  SCOND   (input) DOUBLE PRECISION
+*          Ratio of the smallest S(i) to the largest S(i).
+*
+*  AMAX    (input) DOUBLE PRECISION
+*          Absolute value of largest matrix entry.
+*
+*  EQUED   (output) CHARACTER*1
+*          Specifies whether or not equilibration was done.
+*          = 'N':  No equilibration.
+*          = 'Y':  Equilibration was done, i.e., A has been replaced by
+*                  diag(S) * A * diag(S).
+*
+*  Internal Parameters
+*  ===================
+*
+*  THRESH is a threshold value used to decide if scaling should be done
+*  based on the ratio of the scaling factors.  If SCOND < THRESH,
+*  scaling is done.
+*
+*  LARGE and SMALL are threshold values used to decide if scaling should
+*  be done based on the absolute size of the largest matrix element.
+*  If AMAX > LARGE or AMAX < SMALL, scaling is done.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, THRESH
+      PARAMETER          ( ONE = 1.0D+0, THRESH = 0.1D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J
+      DOUBLE PRECISION   CJ, LARGE, SMALL
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DLAMCH
+      EXTERNAL           LSAME, DLAMCH
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          DBLE, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Quick return if possible
+*
+      IF( N.LE.0 ) THEN
+         EQUED = 'N'
+         RETURN
+      END IF
+*
+*     Initialize LARGE and SMALL.
+*
+      SMALL = DLAMCH( 'Safe minimum' ) / DLAMCH( 'Precision' )
+      LARGE = ONE / SMALL
+*
+      IF( SCOND.GE.THRESH .AND. AMAX.GE.SMALL .AND. AMAX.LE.LARGE ) THEN
+*
+*        No equilibration
+*
+         EQUED = 'N'
+      ELSE
+*
+*        Replace A by diag(S) * A * diag(S).
+*
+         IF( LSAME( UPLO, 'U' ) ) THEN
+*
+*           Upper triangle of A is stored in band format.
+*
+            DO 20 J = 1, N
+               CJ = S( J )
+               DO 10 I = MAX( 1, J-KD ), J - 1
+                  AB( KD+1+I-J, J ) = CJ*S( I )*AB( KD+1+I-J, J )
+   10          CONTINUE
+               AB( KD+1, J ) = CJ*CJ*DBLE( AB( KD+1, J ) )
+   20       CONTINUE
+         ELSE
+*
+*           Lower triangle of A is stored.
+*
+            DO 40 J = 1, N
+               CJ = S( J )
+               AB( 1, J ) = CJ*CJ*DBLE( AB( 1, J ) )
+               DO 30 I = J + 1, MIN( N, J+KD )
+                  AB( 1+I-J, J ) = CJ*S( I )*AB( 1+I-J, J )
+   30          CONTINUE
+   40       CONTINUE
+         END IF
+         EQUED = 'Y'
+      END IF
+*
+      RETURN
+*
+*     End of ZLAQHB
+*
+      END
diff --git a/costa/native/external/lapack/zlaqhe.f b/costa/native/external/lapack/zlaqhe.f
new file mode 100644
index 000000000..3eb2a093b
--- /dev/null
+++ b/costa/native/external/lapack/zlaqhe.f
@@ -0,0 +1,148 @@
+      SUBROUTINE ZLAQHE( UPLO, N, A, LDA, S, SCOND, AMAX, EQUED )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          EQUED, UPLO
+      INTEGER            LDA, N
+      DOUBLE PRECISION   AMAX, SCOND
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   S( * )
+      COMPLEX*16         A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZLAQHE equilibrates a Hermitian matrix A using the scaling factors
+*  in the vector S.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the upper or lower triangular part of the
+*          Hermitian matrix A is stored.
+*          = 'U':  Upper triangular
+*          = 'L':  Lower triangular
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+*          On entry, the Hermitian matrix A.  If UPLO = 'U', the leading
+*          n by n upper triangular part of A contains the upper
+*          triangular part of the matrix A, and the strictly lower
+*          triangular part of A is not referenced.  If UPLO = 'L', the
+*          leading n by n lower triangular part of A contains the lower
+*          triangular part of the matrix A, and the strictly upper
+*          triangular part of A is not referenced.
+*
+*          On exit, if EQUED = 'Y', the equilibrated matrix:
+*          diag(S) * A * diag(S).
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(N,1).
+*
+*  S       (input) DOUBLE PRECISION array, dimension (N)
+*          The scale factors for A.
+*
+*  SCOND   (input) DOUBLE PRECISION
+*          Ratio of the smallest S(i) to the largest S(i).
+*
+*  AMAX    (input) DOUBLE PRECISION
+*          Absolute value of largest matrix entry.
+*
+*  EQUED   (output) CHARACTER*1
+*          Specifies whether or not equilibration was done.
+*          = 'N':  No equilibration.
+*          = 'Y':  Equilibration was done, i.e., A has been replaced by
+*                  diag(S) * A * diag(S).
+*
+*  Internal Parameters
+*  ===================
+*
+*  THRESH is a threshold value used to decide if scaling should be done
+*  based on the ratio of the scaling factors.  If SCOND < THRESH,
+*  scaling is done.
+*
+*  LARGE and SMALL are threshold values used to decide if scaling should
+*  be done based on the absolute size of the largest matrix element.
+*  If AMAX > LARGE or AMAX < SMALL, scaling is done.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, THRESH
+      PARAMETER          ( ONE = 1.0D+0, THRESH = 0.1D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J
+      DOUBLE PRECISION   CJ, LARGE, SMALL
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DLAMCH
+      EXTERNAL           LSAME, DLAMCH
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          DBLE
+*     ..
+*     .. Executable Statements ..
+*
+*     Quick return if possible
+*
+      IF( N.LE.0 ) THEN
+         EQUED = 'N'
+         RETURN
+      END IF
+*
+*     Initialize LARGE and SMALL.
+*
+      SMALL = DLAMCH( 'Safe minimum' ) / DLAMCH( 'Precision' )
+      LARGE = ONE / SMALL
+*
+      IF( SCOND.GE.THRESH .AND. AMAX.GE.SMALL .AND. AMAX.LE.LARGE ) THEN
+*
+*        No equilibration
+*
+         EQUED = 'N'
+      ELSE
+*
+*        Replace A by diag(S) * A * diag(S).
+*
+         IF( LSAME( UPLO, 'U' ) ) THEN
+*
+*           Upper triangle of A is stored.
+*
+            DO 20 J = 1, N
+               CJ = S( J )
+               DO 10 I = 1, J - 1
+                  A( I, J ) = CJ*S( I )*A( I, J )
+   10          CONTINUE
+               A( J, J ) = CJ*CJ*DBLE( A( J, J ) )
+   20       CONTINUE
+         ELSE
+*
+*           Lower triangle of A is stored.
+*
+            DO 40 J = 1, N
+               CJ = S( J )
+               A( J, J ) = CJ*CJ*DBLE( A( J, J ) )
+               DO 30 I = J + 1, N
+                  A( I, J ) = CJ*S( I )*A( I, J )
+   30          CONTINUE
+   40       CONTINUE
+         END IF
+         EQUED = 'Y'
+      END IF
+*
+      RETURN
+*
+*     End of ZLAQHE
+*
+      END
diff --git a/costa/native/external/lapack/zlaqhp.f b/costa/native/external/lapack/zlaqhp.f
new file mode 100644
index 000000000..07a2039f4
--- /dev/null
+++ b/costa/native/external/lapack/zlaqhp.f
@@ -0,0 +1,147 @@
+      SUBROUTINE ZLAQHP( UPLO, N, AP, S, SCOND, AMAX, EQUED )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          EQUED, UPLO
+      INTEGER            N
+      DOUBLE PRECISION   AMAX, SCOND
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   S( * )
+      COMPLEX*16         AP( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZLAQHP equilibrates a Hermitian matrix A using the scaling factors
+*  in the vector S.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the upper or lower triangular part of the
+*          Hermitian matrix A is stored.
+*          = 'U':  Upper triangular
+*          = 'L':  Lower triangular
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  AP      (input/output) COMPLEX*16 array, dimension (N*(N+1)/2)
+*          On entry, the upper or lower triangle of the Hermitian matrix
+*          A, packed columnwise in a linear array.  The j-th column of A
+*          is stored in the array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n.
+*
+*          On exit, the equilibrated matrix:  diag(S) * A * diag(S), in
+*          the same storage format as A.
+*
+*  S       (input) DOUBLE PRECISION array, dimension (N)
+*          The scale factors for A.
+*
+*  SCOND   (input) DOUBLE PRECISION
+*          Ratio of the smallest S(i) to the largest S(i).
+*
+*  AMAX    (input) DOUBLE PRECISION
+*          Absolute value of largest matrix entry.
+*
+*  EQUED   (output) CHARACTER*1
+*          Specifies whether or not equilibration was done.
+*          = 'N':  No equilibration.
+*          = 'Y':  Equilibration was done, i.e., A has been replaced by
+*                  diag(S) * A * diag(S).
+*
+*  Internal Parameters
+*  ===================
+*
+*  THRESH is a threshold value used to decide if scaling should be done
+*  based on the ratio of the scaling factors.  If SCOND < THRESH,
+*  scaling is done.
+*
+*  LARGE and SMALL are threshold values used to decide if scaling should
+*  be done based on the absolute size of the largest matrix element.
+*  If AMAX > LARGE or AMAX < SMALL, scaling is done.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, THRESH
+      PARAMETER          ( ONE = 1.0D+0, THRESH = 0.1D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J, JC
+      DOUBLE PRECISION   CJ, LARGE, SMALL
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DLAMCH
+      EXTERNAL           LSAME, DLAMCH
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          DBLE
+*     ..
+*     .. Executable Statements ..
+*
+*     Quick return if possible
+*
+      IF( N.LE.0 ) THEN
+         EQUED = 'N'
+         RETURN
+      END IF
+*
+*     Initialize LARGE and SMALL.
+*
+      SMALL = DLAMCH( 'Safe minimum' ) / DLAMCH( 'Precision' )
+      LARGE = ONE / SMALL
+*
+      IF( SCOND.GE.THRESH .AND. AMAX.GE.SMALL .AND. AMAX.LE.LARGE ) THEN
+*
+*        No equilibration
+*
+         EQUED = 'N'
+      ELSE
+*
+*        Replace A by diag(S) * A * diag(S).
+*
+         IF( LSAME( UPLO, 'U' ) ) THEN
+*
+*           Upper triangle of A is stored.
+*
+            JC = 1
+            DO 20 J = 1, N
+               CJ = S( J )
+               DO 10 I = 1, J - 1
+                  AP( JC+I-1 ) = CJ*S( I )*AP( JC+I-1 )
+   10          CONTINUE
+               AP( JC+J-1 ) = CJ*CJ*DBLE( AP( JC+J-1 ) )
+               JC = JC + J
+   20       CONTINUE
+         ELSE
+*
+*           Lower triangle of A is stored.
+*
+            JC = 1
+            DO 40 J = 1, N
+               CJ = S( J )
+               AP( JC ) = CJ*CJ*DBLE( AP( JC ) )
+               DO 30 I = J + 1, N
+                  AP( JC+I-J ) = CJ*S( I )*AP( JC+I-J )
+   30          CONTINUE
+               JC = JC + N - J + 1
+   40       CONTINUE
+         END IF
+         EQUED = 'Y'
+      END IF
+*
+      RETURN
+*
+*     End of ZLAQHP
+*
+      END
diff --git a/costa/native/external/lapack/zlaqp2.f b/costa/native/external/lapack/zlaqp2.f
new file mode 100644
index 000000000..b7386e1ee
--- /dev/null
+++ b/costa/native/external/lapack/zlaqp2.f
@@ -0,0 +1,170 @@
+      SUBROUTINE ZLAQP2( M, N, OFFSET, A, LDA, JPVT, TAU, VN1, VN2,
+     $                   WORK )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            LDA, M, N, OFFSET
+*     ..
+*     .. Array Arguments ..
+      INTEGER            JPVT( * )
+      DOUBLE PRECISION   VN1( * ), VN2( * )
+      COMPLEX*16         A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZLAQP2 computes a QR factorization with column pivoting of
+*  the block A(OFFSET+1:M,1:N).
+*  The block A(1:OFFSET,1:N) is accordingly pivoted, but not factorized.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A. N >= 0.
+*
+*  OFFSET  (input) INTEGER
+*          The number of rows of the matrix A that must be pivoted
+*          but no factorized. OFFSET >= 0.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+*          On entry, the M-by-N matrix A.
+*          On exit, the upper triangle of block A(OFFSET+1:M,1:N) is
+*          the triangular factor obtained; the elements in block
+*          A(OFFSET+1:M,1:N) below the diagonal, together with the
+*          array TAU, represent the orthogonal matrix Q as a product of
+*          elementary reflectors. Block A(1:OFFSET,1:N) has been
+*          accordingly pivoted, but no factorized.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,M).
+*
+*  JPVT    (input/output) INTEGER array, dimension (N)
+*          On entry, if JPVT(i) .ne. 0, the i-th column of A is permuted
+*          to the front of A*P (a leading column); if JPVT(i) = 0,
+*          the i-th column of A is a free column.
+*          On exit, if JPVT(i) = k, then the i-th column of A*P
+*          was the k-th column of A.
+*
+*  TAU     (output) COMPLEX*16 array, dimension (min(M,N))
+*          The scalar factors of the elementary reflectors.
+*
+*  VN1     (input/output) DOUBLE PRECISION array, dimension (N)
+*          The vector with the partial column norms.
+*
+*  VN2     (input/output) DOUBLE PRECISION array, dimension (N)
+*          The vector with the exact column norms.
+*
+*  WORK    (workspace) COMPLEX*16 array, dimension (N)
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*    G. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain
+*    X. Sun, Computer Science Dept., Duke University, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      COMPLEX*16         CONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0,
+     $                   CONE = ( 1.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, ITEMP, J, MN, OFFPI, PVT
+      DOUBLE PRECISION   TEMP, TEMP2
+      COMPLEX*16         AII
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           ZLARF, ZLARFG, ZSWAP
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DCONJG, MAX, MIN, SQRT
+*     ..
+*     .. External Functions ..
+      INTEGER            IDAMAX
+      DOUBLE PRECISION   DZNRM2
+      EXTERNAL           IDAMAX, DZNRM2
+*     ..
+*     .. Executable Statements ..
+*
+      MN = MIN( M-OFFSET, N )
+*
+*     Compute factorization.
+*
+      DO 20 I = 1, MN
+*
+         OFFPI = OFFSET + I
+*
+*        Determine ith pivot column and swap if necessary.
+*
+         PVT = ( I-1 ) + IDAMAX( N-I+1, VN1( I ), 1 )
+*
+         IF( PVT.NE.I ) THEN
+            CALL ZSWAP( M, A( 1, PVT ), 1, A( 1, I ), 1 )
+            ITEMP = JPVT( PVT )
+            JPVT( PVT ) = JPVT( I )
+            JPVT( I ) = ITEMP
+            VN1( PVT ) = VN1( I )
+            VN2( PVT ) = VN2( I )
+         END IF
+*
+*        Generate elementary reflector H(i).
+*
+         IF( OFFPI.LT.M ) THEN
+            CALL ZLARFG( M-OFFPI+1, A( OFFPI, I ), A( OFFPI+1, I ), 1,
+     $                   TAU( I ) )
+         ELSE
+            CALL ZLARFG( 1, A( M, I ), A( M, I ), 1, TAU( I ) )
+         END IF
+*
+         IF( I.LT.N ) THEN
+*
+*           Apply H(i)' to A(offset+i:m,i+1:n) from the left.
+*
+            AII = A( OFFPI, I )
+            A( OFFPI, I ) = CONE
+            CALL ZLARF( 'Left', M-OFFPI+1, N-I, A( OFFPI, I ), 1,
+     $                  DCONJG( TAU( I ) ), A( OFFPI, I+1 ), LDA,
+     $                  WORK( 1 ) )
+            A( OFFPI, I ) = AII
+         END IF
+*
+*        Update partial column norms.
+*
+         DO 10 J = I + 1, N
+            IF( VN1( J ).NE.ZERO ) THEN
+               TEMP = ONE - ( ABS( A( OFFPI, J ) ) / VN1( J ) )**2
+               TEMP = MAX( TEMP, ZERO )
+               TEMP2 = ONE + 0.05D0*TEMP*( VN1( J ) / VN2( J ) )**2
+               IF( TEMP2.EQ.ONE ) THEN
+                  IF( OFFPI.LT.M ) THEN
+                     VN1( J ) = DZNRM2( M-OFFPI, A( OFFPI+1, J ), 1 )
+                     VN2( J ) = VN1( J )
+                  ELSE
+                     VN1( J ) = ZERO
+                     VN2( J ) = ZERO
+                  END IF
+               ELSE
+                  VN1( J ) = VN1( J )*SQRT( TEMP )
+               END IF
+            END IF
+   10    CONTINUE
+*
+   20 CONTINUE
+*
+      RETURN
+*
+*     End of ZLAQP2
+*
+      END
diff --git a/costa/native/external/lapack/zlaqps.f b/costa/native/external/lapack/zlaqps.f
new file mode 100644
index 000000000..27b0d8baa
--- /dev/null
+++ b/costa/native/external/lapack/zlaqps.f
@@ -0,0 +1,260 @@
+      SUBROUTINE ZLAQPS( M, N, OFFSET, NB, KB, A, LDA, JPVT, TAU, VN1,
+     $                   VN2, AUXV, F, LDF )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            KB, LDA, LDF, M, N, NB, OFFSET
+*     ..
+*     .. Array Arguments ..
+      INTEGER            JPVT( * )
+      DOUBLE PRECISION   VN1( * ), VN2( * )
+      COMPLEX*16         A( LDA, * ), AUXV( * ), F( LDF, * ), TAU( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZLAQPS computes a step of QR factorization with column pivoting
+*  of a complex M-by-N matrix A by using Blas-3.  It tries to factorize
+*  NB columns from A starting from the row OFFSET+1, and updates all
+*  of the matrix with Blas-3 xGEMM.
+*
+*  In some cases, due to catastrophic cancellations, it cannot
+*  factorize NB columns.  Hence, the actual number of factorized
+*  columns is returned in KB.
+*
+*  Block A(1:OFFSET,1:N) is accordingly pivoted, but not factorized.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A. N >= 0
+*
+*  OFFSET  (input) INTEGER
+*          The number of rows of A that have been factorized in
+*          previous steps.
+*
+*  NB      (input) INTEGER
+*          The number of columns to factorize.
+*
+*  KB      (output) INTEGER
+*          The number of columns actually factorized.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+*          On entry, the M-by-N matrix A.
+*          On exit, block A(OFFSET+1:M,1:KB) is the triangular
+*          factor obtained and block A(1:OFFSET,1:N) has been
+*          accordingly pivoted, but no factorized.
+*          The rest of the matrix, block A(OFFSET+1:M,KB+1:N) has
+*          been updated.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,M).
+*
+*  JPVT    (input/output) INTEGER array, dimension (N)
+*          JPVT(I) = K <==> Column K of the full matrix A has been
+*          permuted into position I in AP.
+*
+*  TAU     (output) COMPLEX*16 array, dimension (KB)
+*          The scalar factors of the elementary reflectors.
+*
+*  VN1     (input/output) DOUBLE PRECISION array, dimension (N)
+*          The vector with the partial column norms.
+*
+*  VN2     (input/output) DOUBLE PRECISION array, dimension (N)
+*          The vector with the exact column norms.
+*
+*  AUXV    (input/output) COMPLEX*16 array, dimension (NB)
+*          Auxiliar vector.
+*
+*  F       (input/output) COMPLEX*16 array, dimension (LDF,NB)
+*          Matrix F' = L*Y'*A.
+*
+*  LDF     (input) INTEGER
+*          The leading dimension of the array F. LDF >= max(1,N).
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*    G. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain
+*    X. Sun, Computer Science Dept., Duke University, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      COMPLEX*16         CZERO, CONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0,
+     $                   CZERO = ( 0.0D+0, 0.0D+0 ),
+     $                   CONE = ( 1.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            ITEMP, J, K, LASTRK, LSTICC, PVT, RK
+      DOUBLE PRECISION   TEMP, TEMP2
+      COMPLEX*16         AKK
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           ZGEMM, ZGEMV, ZLARFG, ZSWAP
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, DCONJG, MAX, MIN, NINT, SQRT
+*     ..
+*     .. External Functions ..
+      INTEGER            IDAMAX
+      DOUBLE PRECISION   DZNRM2
+      EXTERNAL           IDAMAX, DZNRM2
+*     ..
+*     .. Executable Statements ..
+*
+      LASTRK = MIN( M, N+OFFSET )
+      LSTICC = 0
+      K = 0
+*
+*     Beginning of while loop.
+*
+   10 CONTINUE
+      IF( ( K.LT.NB ) .AND. ( LSTICC.EQ.0 ) ) THEN
+         K = K + 1
+         RK = OFFSET + K
+*
+*        Determine ith pivot column and swap if necessary
+*
+         PVT = ( K-1 ) + IDAMAX( N-K+1, VN1( K ), 1 )
+         IF( PVT.NE.K ) THEN
+            CALL ZSWAP( M, A( 1, PVT ), 1, A( 1, K ), 1 )
+            CALL ZSWAP( K-1, F( PVT, 1 ), LDF, F( K, 1 ), LDF )
+            ITEMP = JPVT( PVT )
+            JPVT( PVT ) = JPVT( K )
+            JPVT( K ) = ITEMP
+            VN1( PVT ) = VN1( K )
+            VN2( PVT ) = VN2( K )
+         END IF
+*
+*        Apply previous Householder reflectors to column K:
+*        A(RK:M,K) := A(RK:M,K) - A(RK:M,1:K-1)*F(K,1:K-1)'.
+*
+         IF( K.GT.1 ) THEN
+*CC            CALL ZGEMM( 'No transpose', 'Conjugate transpose',
+*CC     $                  M-RK+1, 1, K-1, -CONE, A( RK, 1 ), LDA,
+*CC     $                  F( K, 1 ), LDF, CONE, A( RK, K ), LDA )
+            DO 20 J = 1, K - 1
+               F( K, J ) = DCONJG( F( K, J ) )
+   20       CONTINUE
+            CALL ZGEMV( 'No transpose', M-RK+1, K-1, -CONE, A( RK, 1 ),
+     $                  LDA, F( K, 1 ), LDF, CONE, A( RK, K ), 1 )
+            DO 30 J = 1, K - 1
+               F( K, J ) = DCONJG( F( K, J ) )
+   30       CONTINUE
+         END IF
+*
+*        Generate elementary reflector H(k).
+*
+         IF( RK.LT.M ) THEN
+            CALL ZLARFG( M-RK+1, A( RK, K ), A( RK+1, K ), 1, TAU( K ) )
+         ELSE
+            CALL ZLARFG( 1, A( RK, K ), A( RK, K ), 1, TAU( K ) )
+         END IF
+*
+         AKK = A( RK, K )
+         A( RK, K ) = CONE
+*
+*        Compute Kth column of F:
+*
+*        Compute  F(K+1:N,K) := tau(K)*A(RK:M,K+1:N)'*A(RK:M,K).
+*
+         IF( K.LT.N ) THEN
+            CALL ZGEMV( 'Conjugate transpose', M-RK+1, N-K, TAU( K ),
+     $                  A( RK, K+1 ), LDA, A( RK, K ), 1, CZERO,
+     $                  F( K+1, K ), 1 )
+         END IF
+*
+*        Padding F(1:K,K) with zeros.
+*
+         DO 40 J = 1, K
+            F( J, K ) = CZERO
+   40    CONTINUE
+*
+*        Incremental updating of F:
+*        F(1:N,K) := F(1:N,K) - tau(K)*F(1:N,1:K-1)*A(RK:M,1:K-1)'
+*                    *A(RK:M,K).
+*
+         IF( K.GT.1 ) THEN
+            CALL ZGEMV( 'Conjugate transpose', M-RK+1, K-1, -TAU( K ),
+     $                  A( RK, 1 ), LDA, A( RK, K ), 1, CZERO,
+     $                  AUXV( 1 ), 1 )
+*
+            CALL ZGEMV( 'No transpose', N, K-1, CONE, F( 1, 1 ), LDF,
+     $                  AUXV( 1 ), 1, CONE, F( 1, K ), 1 )
+         END IF
+*
+*        Update the current row of A:
+*        A(RK,K+1:N) := A(RK,K+1:N) - A(RK,1:K)*F(K+1:N,1:K)'.
+*
+         IF( K.LT.N ) THEN
+            CALL ZGEMM( 'No transpose', 'Conjugate transpose', 1, N-K,
+     $                  K, -CONE, A( RK, 1 ), LDA, F( K+1, 1 ), LDF,
+     $                  CONE, A( RK, K+1 ), LDA )
+         END IF
+*
+*        Update partial column norms.
+*
+         IF( RK.LT.LASTRK ) THEN
+            DO 50 J = K + 1, N
+               IF( VN1( J ).NE.ZERO ) THEN
+                  TEMP = ABS( A( RK, J ) ) / VN1( J )
+                  TEMP = MAX( ZERO, ( ONE+TEMP )*( ONE-TEMP ) )
+                  TEMP2 = ONE + 0.05D0*TEMP*( VN1( J ) / VN2( J ) )**2
+                  IF( TEMP2.EQ.ONE ) THEN
+                     VN2( J ) = DBLE( LSTICC )
+                     LSTICC = J
+                  ELSE
+                     VN1( J ) = VN1( J )*SQRT( TEMP )
+                  END IF
+               END IF
+   50       CONTINUE
+         END IF
+*
+         A( RK, K ) = AKK
+*
+*        End of while loop.
+*
+         GO TO 10
+      END IF
+      KB = K
+      RK = OFFSET + KB
+*
+*     Apply the block reflector to the rest of the matrix:
+*     A(OFFSET+KB+1:M,KB+1:N) := A(OFFSET+KB+1:M,KB+1:N) -
+*                         A(OFFSET+KB+1:M,1:KB)*F(KB+1:N,1:KB)'.
+*
+      IF( KB.LT.MIN( N, M-OFFSET ) ) THEN
+         CALL ZGEMM( 'No transpose', 'Conjugate transpose', M-RK, N-KB,
+     $               KB, -CONE, A( RK+1, 1 ), LDA, F( KB+1, 1 ), LDF,
+     $               CONE, A( RK+1, KB+1 ), LDA )
+      END IF
+*
+*     Recomputation of difficult columns.
+*
+   60 CONTINUE
+      IF( LSTICC.GT.0 ) THEN
+         ITEMP = NINT( VN2( LSTICC ) )
+         VN1( LSTICC ) = DZNRM2( M-RK, A( RK+1, LSTICC ), 1 )
+         VN2( LSTICC ) = VN1( LSTICC )
+         LSTICC = ITEMP
+         GO TO 60
+      END IF
+*
+      RETURN
+*
+*     End of ZLAQPS
+*
+      END
diff --git a/costa/native/external/lapack/zlaqsb.f b/costa/native/external/lapack/zlaqsb.f
new file mode 100644
index 000000000..5fd31ab66
--- /dev/null
+++ b/costa/native/external/lapack/zlaqsb.f
@@ -0,0 +1,150 @@
+      SUBROUTINE ZLAQSB( UPLO, N, KD, AB, LDAB, S, SCOND, AMAX, EQUED )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          EQUED, UPLO
+      INTEGER            KD, LDAB, N
+      DOUBLE PRECISION   AMAX, SCOND
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   S( * )
+      COMPLEX*16         AB( LDAB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZLAQSB equilibrates a symmetric band matrix A using the scaling
+*  factors in the vector S.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the upper or lower triangular part of the
+*          symmetric matrix A is stored.
+*          = 'U':  Upper triangular
+*          = 'L':  Lower triangular
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  KD      (input) INTEGER
+*          The number of super-diagonals of the matrix A if UPLO = 'U',
+*          or the number of sub-diagonals if UPLO = 'L'.  KD >= 0.
+*
+*  AB      (input/output) COMPLEX*16 array, dimension (LDAB,N)
+*          On entry, the upper or lower triangle of the symmetric band
+*          matrix A, stored in the first KD+1 rows of the array.  The
+*          j-th column of A is stored in the j-th column of the array AB
+*          as follows:
+*          if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j;
+*          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+kd).
+*
+*          On exit, if INFO = 0, the triangular factor U or L from the
+*          Cholesky factorization A = U'*U or A = L*L' of the band
+*          matrix A, in the same storage format as A.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KD+1.
+*
+*  S       (output) DOUBLE PRECISION array, dimension (N)
+*          The scale factors for A.
+*
+*  SCOND   (input) DOUBLE PRECISION
+*          Ratio of the smallest S(i) to the largest S(i).
+*
+*  AMAX    (input) DOUBLE PRECISION
+*          Absolute value of largest matrix entry.
+*
+*  EQUED   (output) CHARACTER*1
+*          Specifies whether or not equilibration was done.
+*          = 'N':  No equilibration.
+*          = 'Y':  Equilibration was done, i.e., A has been replaced by
+*                  diag(S) * A * diag(S).
+*
+*  Internal Parameters
+*  ===================
+*
+*  THRESH is a threshold value used to decide if scaling should be done
+*  based on the ratio of the scaling factors.  If SCOND < THRESH,
+*  scaling is done.
+*
+*  LARGE and SMALL are threshold values used to decide if scaling should
+*  be done based on the absolute size of the largest matrix element.
+*  If AMAX > LARGE or AMAX < SMALL, scaling is done.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, THRESH
+      PARAMETER          ( ONE = 1.0D+0, THRESH = 0.1D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J
+      DOUBLE PRECISION   CJ, LARGE, SMALL
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DLAMCH
+      EXTERNAL           LSAME, DLAMCH
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Quick return if possible
+*
+      IF( N.LE.0 ) THEN
+         EQUED = 'N'
+         RETURN
+      END IF
+*
+*     Initialize LARGE and SMALL.
+*
+      SMALL = DLAMCH( 'Safe minimum' ) / DLAMCH( 'Precision' )
+      LARGE = ONE / SMALL
+*
+      IF( SCOND.GE.THRESH .AND. AMAX.GE.SMALL .AND. AMAX.LE.LARGE ) THEN
+*
+*        No equilibration
+*
+         EQUED = 'N'
+      ELSE
+*
+*        Replace A by diag(S) * A * diag(S).
+*
+         IF( LSAME( UPLO, 'U' ) ) THEN
+*
+*           Upper triangle of A is stored in band format.
+*
+            DO 20 J = 1, N
+               CJ = S( J )
+               DO 10 I = MAX( 1, J-KD ), J
+                  AB( KD+1+I-J, J ) = CJ*S( I )*AB( KD+1+I-J, J )
+   10          CONTINUE
+   20       CONTINUE
+         ELSE
+*
+*           Lower triangle of A is stored.
+*
+            DO 40 J = 1, N
+               CJ = S( J )
+               DO 30 I = J, MIN( N, J+KD )
+                  AB( 1+I-J, J ) = CJ*S( I )*AB( 1+I-J, J )
+   30          CONTINUE
+   40       CONTINUE
+         END IF
+         EQUED = 'Y'
+      END IF
+*
+      RETURN
+*
+*     End of ZLAQSB
+*
+      END
diff --git a/costa/native/external/lapack/zlaqsp.f b/costa/native/external/lapack/zlaqsp.f
new file mode 100644
index 000000000..f554a51aa
--- /dev/null
+++ b/costa/native/external/lapack/zlaqsp.f
@@ -0,0 +1,142 @@
+      SUBROUTINE ZLAQSP( UPLO, N, AP, S, SCOND, AMAX, EQUED )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          EQUED, UPLO
+      INTEGER            N
+      DOUBLE PRECISION   AMAX, SCOND
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   S( * )
+      COMPLEX*16         AP( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZLAQSP equilibrates a symmetric matrix A using the scaling factors
+*  in the vector S.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the upper or lower triangular part of the
+*          symmetric matrix A is stored.
+*          = 'U':  Upper triangular
+*          = 'L':  Lower triangular
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  AP      (input/output) COMPLEX*16 array, dimension (N*(N+1)/2)
+*          On entry, the upper or lower triangle of the symmetric matrix
+*          A, packed columnwise in a linear array.  The j-th column of A
+*          is stored in the array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n.
+*
+*          On exit, the equilibrated matrix:  diag(S) * A * diag(S), in
+*          the same storage format as A.
+*
+*  S       (input) DOUBLE PRECISION array, dimension (N)
+*          The scale factors for A.
+*
+*  SCOND   (input) DOUBLE PRECISION
+*          Ratio of the smallest S(i) to the largest S(i).
+*
+*  AMAX    (input) DOUBLE PRECISION
+*          Absolute value of largest matrix entry.
+*
+*  EQUED   (output) CHARACTER*1
+*          Specifies whether or not equilibration was done.
+*          = 'N':  No equilibration.
+*          = 'Y':  Equilibration was done, i.e., A has been replaced by
+*                  diag(S) * A * diag(S).
+*
+*  Internal Parameters
+*  ===================
+*
+*  THRESH is a threshold value used to decide if scaling should be done
+*  based on the ratio of the scaling factors.  If SCOND < THRESH,
+*  scaling is done.
+*
+*  LARGE and SMALL are threshold values used to decide if scaling should
+*  be done based on the absolute size of the largest matrix element.
+*  If AMAX > LARGE or AMAX < SMALL, scaling is done.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, THRESH
+      PARAMETER          ( ONE = 1.0D+0, THRESH = 0.1D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J, JC
+      DOUBLE PRECISION   CJ, LARGE, SMALL
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DLAMCH
+      EXTERNAL           LSAME, DLAMCH
+*     ..
+*     .. Executable Statements ..
+*
+*     Quick return if possible
+*
+      IF( N.LE.0 ) THEN
+         EQUED = 'N'
+         RETURN
+      END IF
+*
+*     Initialize LARGE and SMALL.
+*
+      SMALL = DLAMCH( 'Safe minimum' ) / DLAMCH( 'Precision' )
+      LARGE = ONE / SMALL
+*
+      IF( SCOND.GE.THRESH .AND. AMAX.GE.SMALL .AND. AMAX.LE.LARGE ) THEN
+*
+*        No equilibration
+*
+         EQUED = 'N'
+      ELSE
+*
+*        Replace A by diag(S) * A * diag(S).
+*
+         IF( LSAME( UPLO, 'U' ) ) THEN
+*
+*           Upper triangle of A is stored.
+*
+            JC = 1
+            DO 20 J = 1, N
+               CJ = S( J )
+               DO 10 I = 1, J
+                  AP( JC+I-1 ) = CJ*S( I )*AP( JC+I-1 )
+   10          CONTINUE
+               JC = JC + J
+   20       CONTINUE
+         ELSE
+*
+*           Lower triangle of A is stored.
+*
+            JC = 1
+            DO 40 J = 1, N
+               CJ = S( J )
+               DO 30 I = J, N
+                  AP( JC+I-J ) = CJ*S( I )*AP( JC+I-J )
+   30          CONTINUE
+               JC = JC + N - J + 1
+   40       CONTINUE
+         END IF
+         EQUED = 'Y'
+      END IF
+*
+      RETURN
+*
+*     End of ZLAQSP
+*
+      END
diff --git a/costa/native/external/lapack/zlaqsy.f b/costa/native/external/lapack/zlaqsy.f
new file mode 100644
index 000000000..b7b987e54
--- /dev/null
+++ b/costa/native/external/lapack/zlaqsy.f
@@ -0,0 +1,143 @@
+      SUBROUTINE ZLAQSY( UPLO, N, A, LDA, S, SCOND, AMAX, EQUED )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          EQUED, UPLO
+      INTEGER            LDA, N
+      DOUBLE PRECISION   AMAX, SCOND
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   S( * )
+      COMPLEX*16         A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZLAQSY equilibrates a symmetric matrix A using the scaling factors
+*  in the vector S.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the upper or lower triangular part of the
+*          symmetric matrix A is stored.
+*          = 'U':  Upper triangular
+*          = 'L':  Lower triangular
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+*          On entry, the symmetric matrix A.  If UPLO = 'U', the leading
+*          n by n upper triangular part of A contains the upper
+*          triangular part of the matrix A, and the strictly lower
+*          triangular part of A is not referenced.  If UPLO = 'L', the
+*          leading n by n lower triangular part of A contains the lower
+*          triangular part of the matrix A, and the strictly upper
+*          triangular part of A is not referenced.
+*
+*          On exit, if EQUED = 'Y', the equilibrated matrix:
+*          diag(S) * A * diag(S).
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(N,1).
+*
+*  S       (input) DOUBLE PRECISION array, dimension (N)
+*          The scale factors for A.
+*
+*  SCOND   (input) DOUBLE PRECISION
+*          Ratio of the smallest S(i) to the largest S(i).
+*
+*  AMAX    (input) DOUBLE PRECISION
+*          Absolute value of largest matrix entry.
+*
+*  EQUED   (output) CHARACTER*1
+*          Specifies whether or not equilibration was done.
+*          = 'N':  No equilibration.
+*          = 'Y':  Equilibration was done, i.e., A has been replaced by
+*                  diag(S) * A * diag(S).
+*
+*  Internal Parameters
+*  ===================
+*
+*  THRESH is a threshold value used to decide if scaling should be done
+*  based on the ratio of the scaling factors.  If SCOND < THRESH,
+*  scaling is done.
+*
+*  LARGE and SMALL are threshold values used to decide if scaling should
+*  be done based on the absolute size of the largest matrix element.
+*  If AMAX > LARGE or AMAX < SMALL, scaling is done.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, THRESH
+      PARAMETER          ( ONE = 1.0D+0, THRESH = 0.1D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J
+      DOUBLE PRECISION   CJ, LARGE, SMALL
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DLAMCH
+      EXTERNAL           LSAME, DLAMCH
+*     ..
+*     .. Executable Statements ..
+*
+*     Quick return if possible
+*
+      IF( N.LE.0 ) THEN
+         EQUED = 'N'
+         RETURN
+      END IF
+*
+*     Initialize LARGE and SMALL.
+*
+      SMALL = DLAMCH( 'Safe minimum' ) / DLAMCH( 'Precision' )
+      LARGE = ONE / SMALL
+*
+      IF( SCOND.GE.THRESH .AND. AMAX.GE.SMALL .AND. AMAX.LE.LARGE ) THEN
+*
+*        No equilibration
+*
+         EQUED = 'N'
+      ELSE
+*
+*        Replace A by diag(S) * A * diag(S).
+*
+         IF( LSAME( UPLO, 'U' ) ) THEN
+*
+*           Upper triangle of A is stored.
+*
+            DO 20 J = 1, N
+               CJ = S( J )
+               DO 10 I = 1, J
+                  A( I, J ) = CJ*S( I )*A( I, J )
+   10          CONTINUE
+   20       CONTINUE
+         ELSE
+*
+*           Lower triangle of A is stored.
+*
+            DO 40 J = 1, N
+               CJ = S( J )
+               DO 30 I = J, N
+                  A( I, J ) = CJ*S( I )*A( I, J )
+   30          CONTINUE
+   40       CONTINUE
+         END IF
+         EQUED = 'Y'
+      END IF
+*
+      RETURN
+*
+*     End of ZLAQSY
+*
+      END
diff --git a/costa/native/external/lapack/zlar1v.f b/costa/native/external/lapack/zlar1v.f
new file mode 100644
index 000000000..5e5d29d01
--- /dev/null
+++ b/costa/native/external/lapack/zlar1v.f
@@ -0,0 +1,328 @@
+      SUBROUTINE ZLAR1V( N, B1, BN, SIGMA, D, L, LD, LLD, GERSCH, Z,
+     $                   ZTZ, MINGMA, R, ISUPPZ, WORK )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            B1, BN, N, R
+      DOUBLE PRECISION   MINGMA, SIGMA, ZTZ
+*     ..
+*     .. Array Arguments ..
+      INTEGER            ISUPPZ( * )
+      DOUBLE PRECISION   D( * ), GERSCH( * ), L( * ), LD( * ), LLD( * ),
+     $                   WORK( * )
+      COMPLEX*16         Z( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZLAR1V computes the (scaled) r-th column of the inverse of
+*  the sumbmatrix in rows B1 through BN of the tridiagonal matrix
+*  L D L^T - sigma I. The following steps accomplish this computation :
+*  (a) Stationary qd transform,  L D L^T - sigma I = L(+) D(+) L(+)^T,
+*  (b) Progressive qd transform, L D L^T - sigma I = U(-) D(-) U(-)^T,
+*  (c) Computation of the diagonal elements of the inverse of
+*      L D L^T - sigma I by combining the above transforms, and choosing
+*      r as the index where the diagonal of the inverse is (one of the)
+*      largest in magnitude.
+*  (d) Computation of the (scaled) r-th column of the inverse using the
+*      twisted factorization obtained by combining the top part of the
+*      the stationary and the bottom part of the progressive transform.
+*
+*  Arguments
+*  =========
+*
+*  N        (input) INTEGER
+*           The order of the matrix L D L^T.
+*
+*  B1       (input) INTEGER
+*           First index of the submatrix of L D L^T.
+*
+*  BN       (input) INTEGER
+*           Last index of the submatrix of L D L^T.
+*
+*  SIGMA    (input) DOUBLE PRECISION
+*           The shift. Initially, when R = 0, SIGMA should be a good
+*           approximation to an eigenvalue of L D L^T.
+*
+*  L        (input) DOUBLE PRECISION array, dimension (N-1)
+*           The (n-1) subdiagonal elements of the unit bidiagonal matrix
+*           L, in elements 1 to N-1.
+*
+*  D        (input) DOUBLE PRECISION array, dimension (N)
+*           The n diagonal elements of the diagonal matrix D.
+*
+*  LD       (input) DOUBLE PRECISION array, dimension (N-1)
+*           The n-1 elements L(i)*D(i).
+*
+*  LLD      (input) DOUBLE PRECISION array, dimension (N-1)
+*           The n-1 elements L(i)*L(i)*D(i).
+*
+*  GERSCH   (input) DOUBLE PRECISION array, dimension (2*N)
+*           The n Gerschgorin intervals. These are used to restrict
+*           the initial search for R, when R is input as 0.
+*
+*  Z        (output) COMPLEX*16 array, dimension (N)
+*           The (scaled) r-th column of the inverse. Z(R) is returned
+*           to be 1.
+*
+*  ZTZ      (output) DOUBLE PRECISION
+*           The square of the norm of Z.
+*
+*  MINGMA   (output) DOUBLE PRECISION
+*           The reciprocal of the largest (in magnitude) diagonal
+*           element of the inverse of L D L^T - sigma I.
+*
+*  R        (input/output) INTEGER
+*           Initially, R should be input to be 0 and is then output as
+*           the index where the diagonal element of the inverse is
+*           largest in magnitude. In later iterations, this same value
+*           of R should be input.
+*
+*  ISUPPZ   (output) INTEGER array, dimension (2)
+*           The support of the vector in Z, i.e., the vector Z is
+*           nonzero only in elements ISUPPZ(1) through ISUPPZ( 2 ).
+*
+*  WORK     (workspace) DOUBLE PRECISION array, dimension (4*N)
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Inderjit Dhillon, IBM Almaden, USA
+*     Osni Marques, LBNL/NERSC, USA
+*     Ken Stanley, Computer Science Division, University of
+*       California at Berkeley, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            BLKSIZ
+      PARAMETER          ( BLKSIZ = 32 )
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0 )
+      COMPLEX*16         CONE
+      PARAMETER          ( CONE = ( 1.0D0, 0.0D0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            SAWNAN
+      INTEGER            FROM, I, INDP, INDS, INDUMN, J, R1, R2, TO
+      DOUBLE PRECISION   DMINUS, DPLUS, EPS, S, TMP
+*     ..
+*     .. External Functions ..
+      DOUBLE PRECISION   DLAMCH
+      EXTERNAL           DLAMCH
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+      EPS = DLAMCH( 'Precision' )
+      IF( R.EQ.0 ) THEN
+*
+*        Eliminate the top and bottom indices from the possible values
+*        of R where the desired eigenvector is largest in magnitude.
+*
+         R1 = B1
+         DO 10 I = B1, BN
+            IF( SIGMA.GE.GERSCH( 2*I-1 ) .OR. SIGMA.LE.GERSCH( 2*I ) )
+     $           THEN
+               R1 = I
+               GO TO 20
+            END IF
+   10    CONTINUE
+   20    CONTINUE
+         R2 = BN
+         DO 30 I = BN, B1, -1
+            IF( SIGMA.GE.GERSCH( 2*I-1 ) .OR. SIGMA.LE.GERSCH( 2*I ) )
+     $           THEN
+               R2 = I
+               GO TO 40
+            END IF
+   30    CONTINUE
+   40    CONTINUE
+      ELSE
+         R1 = R
+         R2 = R
+      END IF
+*
+      INDUMN = N
+      INDS = 2*N + 1
+      INDP = 3*N + 1
+      SAWNAN = .FALSE.
+*
+*     Compute the stationary transform (using the differential form)
+*     untill the index R2
+*
+      IF( B1.EQ.1 ) THEN
+         WORK( INDS ) = ZERO
+      ELSE
+         WORK( INDS ) = LLD( B1-1 )
+      END IF
+      S = WORK( INDS ) - SIGMA
+      DO 50 I = B1, R2 - 1
+         DPLUS = D( I ) + S
+         WORK( I ) = LD( I ) / DPLUS
+         WORK( INDS+I ) = S*WORK( I )*L( I )
+         S = WORK( INDS+I ) - SIGMA
+   50 CONTINUE
+*
+      IF( .NOT.( S.GT.ZERO .OR. S.LT.ONE ) ) THEN
+*
+*        Run a slower version of the above loop if a NaN is detected
+*
+         SAWNAN = .TRUE.
+         J = B1 + 1
+   60    CONTINUE
+         IF( WORK( INDS+J ).GT.ZERO .OR. WORK( INDS+J ).LT.ONE ) THEN
+            J = J + 1
+            GO TO 60
+         END IF
+         WORK( INDS+J ) = LLD( J )
+         S = WORK( INDS+J ) - SIGMA
+         DO 70 I = J + 1, R2 - 1
+            DPLUS = D( I ) + S
+            WORK( I ) = LD( I ) / DPLUS
+            IF( WORK( I ).EQ.ZERO ) THEN
+               WORK( INDS+I ) = LLD( I )
+            ELSE
+               WORK( INDS+I ) = S*WORK( I )*L( I )
+            END IF
+            S = WORK( INDS+I ) - SIGMA
+   70    CONTINUE
+      END IF
+      WORK( INDP+BN-1 ) = D( BN ) - SIGMA
+      DO 80 I = BN - 1, R1, -1
+         DMINUS = LLD( I ) + WORK( INDP+I )
+         TMP = D( I ) / DMINUS
+         WORK( INDUMN+I ) = L( I )*TMP
+         WORK( INDP+I-1 ) = WORK( INDP+I )*TMP - SIGMA
+   80 CONTINUE
+      TMP = WORK( INDP+R1-1 )
+      IF( .NOT.( TMP.GT.ZERO .OR. TMP.LT.ONE ) ) THEN
+*
+*        Run a slower version of the above loop if a NaN is detected
+*
+         SAWNAN = .TRUE.
+         J = BN - 3
+   90    CONTINUE
+         IF( WORK( INDP+J ).GT.ZERO .OR. WORK( INDP+J ).LT.ONE ) THEN
+            J = J - 1
+            GO TO 90
+         END IF
+         WORK( INDP+J ) = D( J+1 ) - SIGMA
+         DO 100 I = J, R1, -1
+            DMINUS = LLD( I ) + WORK( INDP+I )
+            TMP = D( I ) / DMINUS
+            WORK( INDUMN+I ) = L( I )*TMP
+            IF( TMP.EQ.ZERO ) THEN
+               WORK( INDP+I-1 ) = D( I ) - SIGMA
+            ELSE
+               WORK( INDP+I-1 ) = WORK( INDP+I )*TMP - SIGMA
+            END IF
+  100    CONTINUE
+      END IF
+*
+*     Find the index (from R1 to R2) of the largest (in magnitude)
+*     diagonal element of the inverse
+*
+      MINGMA = WORK( INDS+R1-1 ) + WORK( INDP+R1-1 )
+      IF( MINGMA.EQ.ZERO )
+     $   MINGMA = EPS*WORK( INDS+R1-1 )
+      R = R1
+      DO 110 I = R1, R2 - 1
+         TMP = WORK( INDS+I ) + WORK( INDP+I )
+         IF( TMP.EQ.ZERO )
+     $      TMP = EPS*WORK( INDS+I )
+         IF( ABS( TMP ).LT.ABS( MINGMA ) ) THEN
+            MINGMA = TMP
+            R = I + 1
+         END IF
+  110 CONTINUE
+*
+*     Compute the (scaled) r-th column of the inverse
+*
+      ISUPPZ( 1 ) = B1
+      ISUPPZ( 2 ) = BN
+      Z( R ) = CONE
+      ZTZ = ONE
+      IF( .NOT.SAWNAN ) THEN
+         FROM = R - 1
+         TO = MAX( R-BLKSIZ, B1 )
+  120    CONTINUE
+         IF( FROM.GE.B1 ) THEN
+            DO 130 I = FROM, TO, -1
+               Z( I ) = -( WORK( I )*Z( I+1 ) )
+               ZTZ = ZTZ + DBLE( Z( I )*Z( I ) )
+  130       CONTINUE
+            IF( ABS( Z( TO ) ).LE.EPS .AND. ABS( Z( TO+1 ) ).LE.EPS )
+     $           THEN
+               ISUPPZ( 1 ) = TO + 2
+            ELSE
+               FROM = TO - 1
+               TO = MAX( TO-BLKSIZ, B1 )
+               GO TO 120
+            END IF
+         END IF
+         FROM = R + 1
+         TO = MIN( R+BLKSIZ, BN )
+  140    CONTINUE
+         IF( FROM.LE.BN ) THEN
+            DO 150 I = FROM, TO
+               Z( I ) = -( WORK( INDUMN+I-1 )*Z( I-1 ) )
+               ZTZ = ZTZ + DBLE( Z( I )*Z( I ) )
+  150       CONTINUE
+            IF( ABS( Z( TO ) ).LE.EPS .AND. ABS( Z( TO-1 ) ).LE.EPS )
+     $           THEN
+               ISUPPZ( 2 ) = TO - 2
+            ELSE
+               FROM = TO + 1
+               TO = MIN( TO+BLKSIZ, BN )
+               GO TO 140
+            END IF
+         END IF
+      ELSE
+         DO 160 I = R - 1, B1, -1
+            IF( Z( I+1 ).EQ.ZERO ) THEN
+               Z( I ) = -( LD( I+1 ) / LD( I ) )*Z( I+2 )
+            ELSE IF( ABS( Z( I+1 ) ).LE.EPS .AND. ABS( Z( I+2 ) ).LE.
+     $               EPS ) THEN
+               ISUPPZ( 1 ) = I + 3
+               GO TO 170
+            ELSE
+               Z( I ) = -( WORK( I )*Z( I+1 ) )
+            END IF
+            ZTZ = ZTZ + DBLE( Z( I )*Z( I ) )
+  160    CONTINUE
+  170    CONTINUE
+         DO 180 I = R, BN - 1
+            IF( Z( I ).EQ.ZERO ) THEN
+               Z( I+1 ) = -( LD( I-1 ) / LD( I ) )*Z( I-1 )
+            ELSE IF( ABS( Z( I ) ).LE.EPS .AND. ABS( Z( I-1 ) ).LE.EPS )
+     $                THEN
+               ISUPPZ( 2 ) = I - 2
+               GO TO 190
+            ELSE
+               Z( I+1 ) = -( WORK( INDUMN+I )*Z( I ) )
+            END IF
+            ZTZ = ZTZ + DBLE( Z( I+1 )*Z( I+1 ) )
+  180    CONTINUE
+  190    CONTINUE
+      END IF
+      DO 200 I = B1, ISUPPZ( 1 ) - 3
+         Z( I ) = ZERO
+  200 CONTINUE
+      DO 210 I = ISUPPZ( 2 ) + 3, BN
+         Z( I ) = ZERO
+  210 CONTINUE
+*
+      RETURN
+*
+*     End of ZLAR1V
+*
+      END
diff --git a/costa/native/external/lapack/zlar2v.f b/costa/native/external/lapack/zlar2v.f
new file mode 100644
index 000000000..b11458f65
--- /dev/null
+++ b/costa/native/external/lapack/zlar2v.f
@@ -0,0 +1,98 @@
+      SUBROUTINE ZLAR2V( N, X, Y, Z, INCX, C, S, INCC )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      INTEGER            INCC, INCX, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   C( * )
+      COMPLEX*16         S( * ), X( * ), Y( * ), Z( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZLAR2V applies a vector of complex plane rotations with real cosines
+*  from both sides to a sequence of 2-by-2 complex Hermitian matrices,
+*  defined by the elements of the vectors x, y and z. For i = 1,2,...,n
+*
+*     (       x(i)  z(i) ) :=
+*     ( conjg(z(i)) y(i) )
+*
+*       (  c(i) conjg(s(i)) ) (       x(i)  z(i) ) ( c(i) -conjg(s(i)) )
+*       ( -s(i)       c(i)  ) ( conjg(z(i)) y(i) ) ( s(i)        c(i)  )
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The number of plane rotations to be applied.
+*
+*  X       (input/output) COMPLEX*16 array, dimension (1+(N-1)*INCX)
+*          The vector x; the elements of x are assumed to be real.
+*
+*  Y       (input/output) COMPLEX*16 array, dimension (1+(N-1)*INCX)
+*          The vector y; the elements of y are assumed to be real.
+*
+*  Z       (input/output) COMPLEX*16 array, dimension (1+(N-1)*INCX)
+*          The vector z.
+*
+*  INCX    (input) INTEGER
+*          The increment between elements of X, Y and Z. INCX > 0.
+*
+*  C       (input) DOUBLE PRECISION array, dimension (1+(N-1)*INCC)
+*          The cosines of the plane rotations.
+*
+*  S       (input) COMPLEX*16 array, dimension (1+(N-1)*INCC)
+*          The sines of the plane rotations.
+*
+*  INCC    (input) INTEGER
+*          The increment between elements of C and S. INCC > 0.
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      INTEGER            I, IC, IX
+      DOUBLE PRECISION   CI, SII, SIR, T1I, T1R, T5, T6, XI, YI, ZII,
+     $                   ZIR
+      COMPLEX*16         SI, T2, T3, T4, ZI
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          DBLE, DCMPLX, DCONJG, DIMAG
+*     ..
+*     .. Executable Statements ..
+*
+      IX = 1
+      IC = 1
+      DO 10 I = 1, N
+         XI = DBLE( X( IX ) )
+         YI = DBLE( Y( IX ) )
+         ZI = Z( IX )
+         ZIR = DBLE( ZI )
+         ZII = DIMAG( ZI )
+         CI = C( IC )
+         SI = S( IC )
+         SIR = DBLE( SI )
+         SII = DIMAG( SI )
+         T1R = SIR*ZIR - SII*ZII
+         T1I = SIR*ZII + SII*ZIR
+         T2 = CI*ZI
+         T3 = T2 - DCONJG( SI )*XI
+         T4 = DCONJG( T2 ) + SI*YI
+         T5 = CI*XI + T1R
+         T6 = CI*YI - T1R
+         X( IX ) = CI*T5 + ( SIR*DBLE( T4 )+SII*DIMAG( T4 ) )
+         Y( IX ) = CI*T6 - ( SIR*DBLE( T3 )-SII*DIMAG( T3 ) )
+         Z( IX ) = CI*T3 + DCONJG( SI )*DCMPLX( T6, T1I )
+         IX = IX + INCX
+         IC = IC + INCC
+   10 CONTINUE
+      RETURN
+*
+*     End of ZLAR2V
+*
+      END
diff --git a/costa/native/external/lapack/zlarcm.f b/costa/native/external/lapack/zlarcm.f
new file mode 100644
index 000000000..ea86f80f6
--- /dev/null
+++ b/costa/native/external/lapack/zlarcm.f
@@ -0,0 +1,111 @@
+      SUBROUTINE ZLARCM( M, N, A, LDA, B, LDB, C, LDC, RWORK )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            LDA, LDB, LDC, M, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), RWORK( * )
+      COMPLEX*16         B( LDB, * ), C( LDC, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZLARCM performs a very simple matrix-matrix multiplication:
+*           C := A * B,
+*  where A is M by M and real; B is M by N and complex;
+*  C is M by N and complex.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A and of the matrix C.
+*          M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns and rows of the matrix B and
+*          the number of columns of the matrix C.
+*          N >= 0.
+*
+*  A       (input) DOUBLE PRECISION array, dimension (LDA, M)
+*          A contains the M by M matrix A.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >=max(1,M).
+*
+*  B       (input) DOUBLE PRECISION array, dimension (LDB, N)
+*          B contains the M by N matrix B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B. LDB >=max(1,M).
+*
+*  C       (input) COMPLEX*16 array, dimension (LDC, N)
+*          C contains the M by N matrix C.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C. LDC >=max(1,M).
+*
+*  RWORK   (workspace) DOUBLE PRECISION array, dimension (2*M*N)
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D0, ZERO = 0.0D0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J, L
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          DBLE, DCMPLX, DIMAG
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DGEMM
+*     ..
+*     .. Executable Statements ..
+*
+*     Quick return if possible.
+*
+      IF( ( M.EQ.0 ) .OR. ( N.EQ.0 ) )
+     $   RETURN
+*
+      DO 20 J = 1, N
+         DO 10 I = 1, M
+            RWORK( ( J-1 )*M+I ) = DBLE( B( I, J ) )
+   10    CONTINUE
+   20 CONTINUE
+*
+      L = M*N + 1
+      CALL DGEMM( 'N', 'N', M, N, M, ONE, A, LDA, RWORK, M, ZERO,
+     $            RWORK( L ), M )
+      DO 40 J = 1, N
+         DO 30 I = 1, M
+            C( I, J ) = RWORK( L+( J-1 )*M+I-1 )
+   30    CONTINUE
+   40 CONTINUE
+*
+      DO 60 J = 1, N
+         DO 50 I = 1, M
+            RWORK( ( J-1 )*M+I ) = DIMAG( B( I, J ) )
+   50    CONTINUE
+   60 CONTINUE
+      CALL DGEMM( 'N', 'N', M, N, M, ONE, A, LDA, RWORK, M, ZERO,
+     $            RWORK( L ), M )
+      DO 80 J = 1, N
+         DO 70 I = 1, M
+            C( I, J ) = DCMPLX( DBLE( C( I, J ) ),
+     $                  RWORK( L+( J-1 )*M+I-1 ) )
+   70    CONTINUE
+   80 CONTINUE
+*
+      RETURN
+*
+*     End of ZLARCM
+*
+      END
diff --git a/costa/native/external/lapack/zlarf.f b/costa/native/external/lapack/zlarf.f
new file mode 100644
index 000000000..f63bd070e
--- /dev/null
+++ b/costa/native/external/lapack/zlarf.f
@@ -0,0 +1,121 @@
+      SUBROUTINE ZLARF( SIDE, M, N, V, INCV, TAU, C, LDC, WORK )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          SIDE
+      INTEGER            INCV, LDC, M, N
+      COMPLEX*16         TAU
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         C( LDC, * ), V( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZLARF applies a complex elementary reflector H to a complex M-by-N
+*  matrix C, from either the left or the right. H is represented in the
+*  form
+*
+*        H = I - tau * v * v'
+*
+*  where tau is a complex scalar and v is a complex vector.
+*
+*  If tau = 0, then H is taken to be the unit matrix.
+*
+*  To apply H' (the conjugate transpose of H), supply conjg(tau) instead
+*  tau.
+*
+*  Arguments
+*  =========
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'L': form  H * C
+*          = 'R': form  C * H
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix C.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix C.
+*
+*  V       (input) COMPLEX*16 array, dimension
+*                     (1 + (M-1)*abs(INCV)) if SIDE = 'L'
+*                  or (1 + (N-1)*abs(INCV)) if SIDE = 'R'
+*          The vector v in the representation of H. V is not used if
+*          TAU = 0.
+*
+*  INCV    (input) INTEGER
+*          The increment between elements of v. INCV <> 0.
+*
+*  TAU     (input) COMPLEX*16
+*          The value tau in the representation of H.
+*
+*  C       (input/output) COMPLEX*16 array, dimension (LDC,N)
+*          On entry, the M-by-N matrix C.
+*          On exit, C is overwritten by the matrix H * C if SIDE = 'L',
+*          or C * H if SIDE = 'R'.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C. LDC >= max(1,M).
+*
+*  WORK    (workspace) COMPLEX*16 array, dimension
+*                         (N) if SIDE = 'L'
+*                      or (M) if SIDE = 'R'
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX*16         ONE, ZERO
+      PARAMETER          ( ONE = ( 1.0D+0, 0.0D+0 ),
+     $                   ZERO = ( 0.0D+0, 0.0D+0 ) )
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           ZGEMV, ZGERC
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. Executable Statements ..
+*
+      IF( LSAME( SIDE, 'L' ) ) THEN
+*
+*        Form  H * C
+*
+         IF( TAU.NE.ZERO ) THEN
+*
+*           w := C' * v
+*
+            CALL ZGEMV( 'Conjugate transpose', M, N, ONE, C, LDC, V,
+     $                  INCV, ZERO, WORK, 1 )
+*
+*           C := C - v * w'
+*
+            CALL ZGERC( M, N, -TAU, V, INCV, WORK, 1, C, LDC )
+         END IF
+      ELSE
+*
+*        Form  C * H
+*
+         IF( TAU.NE.ZERO ) THEN
+*
+*           w := C * v
+*
+            CALL ZGEMV( 'No transpose', M, N, ONE, C, LDC, V, INCV,
+     $                  ZERO, WORK, 1 )
+*
+*           C := C - w * v'
+*
+            CALL ZGERC( M, N, -TAU, WORK, 1, V, INCV, C, LDC )
+         END IF
+      END IF
+      RETURN
+*
+*     End of ZLARF
+*
+      END
diff --git a/costa/native/external/lapack/zlarfb.f b/costa/native/external/lapack/zlarfb.f
new file mode 100644
index 000000000..e21692a7d
--- /dev/null
+++ b/costa/native/external/lapack/zlarfb.f
@@ -0,0 +1,609 @@
+      SUBROUTINE ZLARFB( SIDE, TRANS, DIRECT, STOREV, M, N, K, V, LDV,
+     $                   T, LDT, C, LDC, WORK, LDWORK )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIRECT, SIDE, STOREV, TRANS
+      INTEGER            K, LDC, LDT, LDV, LDWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         C( LDC, * ), T( LDT, * ), V( LDV, * ),
+     $                   WORK( LDWORK, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZLARFB applies a complex block reflector H or its transpose H' to a
+*  complex M-by-N matrix C, from either the left or the right.
+*
+*  Arguments
+*  =========
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'L': apply H or H' from the Left
+*          = 'R': apply H or H' from the Right
+*
+*  TRANS   (input) CHARACTER*1
+*          = 'N': apply H (No transpose)
+*          = 'C': apply H' (Conjugate transpose)
+*
+*  DIRECT  (input) CHARACTER*1
+*          Indicates how H is formed from a product of elementary
+*          reflectors
+*          = 'F': H = H(1) H(2) . . . H(k) (Forward)
+*          = 'B': H = H(k) . . . H(2) H(1) (Backward)
+*
+*  STOREV  (input) CHARACTER*1
+*          Indicates how the vectors which define the elementary
+*          reflectors are stored:
+*          = 'C': Columnwise
+*          = 'R': Rowwise
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix C.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix C.
+*
+*  K       (input) INTEGER
+*          The order of the matrix T (= the number of elementary
+*          reflectors whose product defines the block reflector).
+*
+*  V       (input) COMPLEX*16 array, dimension
+*                                (LDV,K) if STOREV = 'C'
+*                                (LDV,M) if STOREV = 'R' and SIDE = 'L'
+*                                (LDV,N) if STOREV = 'R' and SIDE = 'R'
+*          The matrix V. See further details.
+*
+*  LDV     (input) INTEGER
+*          The leading dimension of the array V.
+*          If STOREV = 'C' and SIDE = 'L', LDV >= max(1,M);
+*          if STOREV = 'C' and SIDE = 'R', LDV >= max(1,N);
+*          if STOREV = 'R', LDV >= K.
+*
+*  T       (input) COMPLEX*16 array, dimension (LDT,K)
+*          The triangular K-by-K matrix T in the representation of the
+*          block reflector.
+*
+*  LDT     (input) INTEGER
+*          The leading dimension of the array T. LDT >= K.
+*
+*  C       (input/output) COMPLEX*16 array, dimension (LDC,N)
+*          On entry, the M-by-N matrix C.
+*          On exit, C is overwritten by H*C or H'*C or C*H or C*H'.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C. LDC >= max(1,M).
+*
+*  WORK    (workspace) COMPLEX*16 array, dimension (LDWORK,K)
+*
+*  LDWORK  (input) INTEGER
+*          The leading dimension of the array WORK.
+*          If SIDE = 'L', LDWORK >= max(1,N);
+*          if SIDE = 'R', LDWORK >= max(1,M).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX*16         ONE
+      PARAMETER          ( ONE = ( 1.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      CHARACTER          TRANST
+      INTEGER            I, J
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           ZCOPY, ZGEMM, ZLACGV, ZTRMM
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          DCONJG
+*     ..
+*     .. Executable Statements ..
+*
+*     Quick return if possible
+*
+      IF( M.LE.0 .OR. N.LE.0 )
+     $   RETURN
+*
+      IF( LSAME( TRANS, 'N' ) ) THEN
+         TRANST = 'C'
+      ELSE
+         TRANST = 'N'
+      END IF
+*
+      IF( LSAME( STOREV, 'C' ) ) THEN
+*
+         IF( LSAME( DIRECT, 'F' ) ) THEN
+*
+*           Let  V =  ( V1 )    (first K rows)
+*                     ( V2 )
+*           where  V1  is unit lower triangular.
+*
+            IF( LSAME( SIDE, 'L' ) ) THEN
+*
+*              Form  H * C  or  H' * C  where  C = ( C1 )
+*                                                  ( C2 )
+*
+*              W := C' * V  =  (C1'*V1 + C2'*V2)  (stored in WORK)
+*
+*              W := C1'
+*
+               DO 10 J = 1, K
+                  CALL ZCOPY( N, C( J, 1 ), LDC, WORK( 1, J ), 1 )
+                  CALL ZLACGV( N, WORK( 1, J ), 1 )
+   10          CONTINUE
+*
+*              W := W * V1
+*
+               CALL ZTRMM( 'Right', 'Lower', 'No transpose', 'Unit', N,
+     $                     K, ONE, V, LDV, WORK, LDWORK )
+               IF( M.GT.K ) THEN
+*
+*                 W := W + C2'*V2
+*
+                  CALL ZGEMM( 'Conjugate transpose', 'No transpose', N,
+     $                        K, M-K, ONE, C( K+1, 1 ), LDC,
+     $                        V( K+1, 1 ), LDV, ONE, WORK, LDWORK )
+               END IF
+*
+*              W := W * T'  or  W * T
+*
+               CALL ZTRMM( 'Right', 'Upper', TRANST, 'Non-unit', N, K,
+     $                     ONE, T, LDT, WORK, LDWORK )
+*
+*              C := C - V * W'
+*
+               IF( M.GT.K ) THEN
+*
+*                 C2 := C2 - V2 * W'
+*
+                  CALL ZGEMM( 'No transpose', 'Conjugate transpose',
+     $                        M-K, N, K, -ONE, V( K+1, 1 ), LDV, WORK,
+     $                        LDWORK, ONE, C( K+1, 1 ), LDC )
+               END IF
+*
+*              W := W * V1'
+*
+               CALL ZTRMM( 'Right', 'Lower', 'Conjugate transpose',
+     $                     'Unit', N, K, ONE, V, LDV, WORK, LDWORK )
+*
+*              C1 := C1 - W'
+*
+               DO 30 J = 1, K
+                  DO 20 I = 1, N
+                     C( J, I ) = C( J, I ) - DCONJG( WORK( I, J ) )
+   20             CONTINUE
+   30          CONTINUE
+*
+            ELSE IF( LSAME( SIDE, 'R' ) ) THEN
+*
+*              Form  C * H  or  C * H'  where  C = ( C1  C2 )
+*
+*              W := C * V  =  (C1*V1 + C2*V2)  (stored in WORK)
+*
+*              W := C1
+*
+               DO 40 J = 1, K
+                  CALL ZCOPY( M, C( 1, J ), 1, WORK( 1, J ), 1 )
+   40          CONTINUE
+*
+*              W := W * V1
+*
+               CALL ZTRMM( 'Right', 'Lower', 'No transpose', 'Unit', M,
+     $                     K, ONE, V, LDV, WORK, LDWORK )
+               IF( N.GT.K ) THEN
+*
+*                 W := W + C2 * V2
+*
+                  CALL ZGEMM( 'No transpose', 'No transpose', M, K, N-K,
+     $                        ONE, C( 1, K+1 ), LDC, V( K+1, 1 ), LDV,
+     $                        ONE, WORK, LDWORK )
+               END IF
+*
+*              W := W * T  or  W * T'
+*
+               CALL ZTRMM( 'Right', 'Upper', TRANS, 'Non-unit', M, K,
+     $                     ONE, T, LDT, WORK, LDWORK )
+*
+*              C := C - W * V'
+*
+               IF( N.GT.K ) THEN
+*
+*                 C2 := C2 - W * V2'
+*
+                  CALL ZGEMM( 'No transpose', 'Conjugate transpose', M,
+     $                        N-K, K, -ONE, WORK, LDWORK, V( K+1, 1 ),
+     $                        LDV, ONE, C( 1, K+1 ), LDC )
+               END IF
+*
+*              W := W * V1'
+*
+               CALL ZTRMM( 'Right', 'Lower', 'Conjugate transpose',
+     $                     'Unit', M, K, ONE, V, LDV, WORK, LDWORK )
+*
+*              C1 := C1 - W
+*
+               DO 60 J = 1, K
+                  DO 50 I = 1, M
+                     C( I, J ) = C( I, J ) - WORK( I, J )
+   50             CONTINUE
+   60          CONTINUE
+            END IF
+*
+         ELSE
+*
+*           Let  V =  ( V1 )
+*                     ( V2 )    (last K rows)
+*           where  V2  is unit upper triangular.
+*
+            IF( LSAME( SIDE, 'L' ) ) THEN
+*
+*              Form  H * C  or  H' * C  where  C = ( C1 )
+*                                                  ( C2 )
+*
+*              W := C' * V  =  (C1'*V1 + C2'*V2)  (stored in WORK)
+*
+*              W := C2'
+*
+               DO 70 J = 1, K
+                  CALL ZCOPY( N, C( M-K+J, 1 ), LDC, WORK( 1, J ), 1 )
+                  CALL ZLACGV( N, WORK( 1, J ), 1 )
+   70          CONTINUE
+*
+*              W := W * V2
+*
+               CALL ZTRMM( 'Right', 'Upper', 'No transpose', 'Unit', N,
+     $                     K, ONE, V( M-K+1, 1 ), LDV, WORK, LDWORK )
+               IF( M.GT.K ) THEN
+*
+*                 W := W + C1'*V1
+*
+                  CALL ZGEMM( 'Conjugate transpose', 'No transpose', N,
+     $                        K, M-K, ONE, C, LDC, V, LDV, ONE, WORK,
+     $                        LDWORK )
+               END IF
+*
+*              W := W * T'  or  W * T
+*
+               CALL ZTRMM( 'Right', 'Lower', TRANST, 'Non-unit', N, K,
+     $                     ONE, T, LDT, WORK, LDWORK )
+*
+*              C := C - V * W'
+*
+               IF( M.GT.K ) THEN
+*
+*                 C1 := C1 - V1 * W'
+*
+                  CALL ZGEMM( 'No transpose', 'Conjugate transpose',
+     $                        M-K, N, K, -ONE, V, LDV, WORK, LDWORK,
+     $                        ONE, C, LDC )
+               END IF
+*
+*              W := W * V2'
+*
+               CALL ZTRMM( 'Right', 'Upper', 'Conjugate transpose',
+     $                     'Unit', N, K, ONE, V( M-K+1, 1 ), LDV, WORK,
+     $                     LDWORK )
+*
+*              C2 := C2 - W'
+*
+               DO 90 J = 1, K
+                  DO 80 I = 1, N
+                     C( M-K+J, I ) = C( M-K+J, I ) -
+     $                               DCONJG( WORK( I, J ) )
+   80             CONTINUE
+   90          CONTINUE
+*
+            ELSE IF( LSAME( SIDE, 'R' ) ) THEN
+*
+*              Form  C * H  or  C * H'  where  C = ( C1  C2 )
+*
+*              W := C * V  =  (C1*V1 + C2*V2)  (stored in WORK)
+*
+*              W := C2
+*
+               DO 100 J = 1, K
+                  CALL ZCOPY( M, C( 1, N-K+J ), 1, WORK( 1, J ), 1 )
+  100          CONTINUE
+*
+*              W := W * V2
+*
+               CALL ZTRMM( 'Right', 'Upper', 'No transpose', 'Unit', M,
+     $                     K, ONE, V( N-K+1, 1 ), LDV, WORK, LDWORK )
+               IF( N.GT.K ) THEN
+*
+*                 W := W + C1 * V1
+*
+                  CALL ZGEMM( 'No transpose', 'No transpose', M, K, N-K,
+     $                        ONE, C, LDC, V, LDV, ONE, WORK, LDWORK )
+               END IF
+*
+*              W := W * T  or  W * T'
+*
+               CALL ZTRMM( 'Right', 'Lower', TRANS, 'Non-unit', M, K,
+     $                     ONE, T, LDT, WORK, LDWORK )
+*
+*              C := C - W * V'
+*
+               IF( N.GT.K ) THEN
+*
+*                 C1 := C1 - W * V1'
+*
+                  CALL ZGEMM( 'No transpose', 'Conjugate transpose', M,
+     $                        N-K, K, -ONE, WORK, LDWORK, V, LDV, ONE,
+     $                        C, LDC )
+               END IF
+*
+*              W := W * V2'
+*
+               CALL ZTRMM( 'Right', 'Upper', 'Conjugate transpose',
+     $                     'Unit', M, K, ONE, V( N-K+1, 1 ), LDV, WORK,
+     $                     LDWORK )
+*
+*              C2 := C2 - W
+*
+               DO 120 J = 1, K
+                  DO 110 I = 1, M
+                     C( I, N-K+J ) = C( I, N-K+J ) - WORK( I, J )
+  110             CONTINUE
+  120          CONTINUE
+            END IF
+         END IF
+*
+      ELSE IF( LSAME( STOREV, 'R' ) ) THEN
+*
+         IF( LSAME( DIRECT, 'F' ) ) THEN
+*
+*           Let  V =  ( V1  V2 )    (V1: first K columns)
+*           where  V1  is unit upper triangular.
+*
+            IF( LSAME( SIDE, 'L' ) ) THEN
+*
+*              Form  H * C  or  H' * C  where  C = ( C1 )
+*                                                  ( C2 )
+*
+*              W := C' * V'  =  (C1'*V1' + C2'*V2') (stored in WORK)
+*
+*              W := C1'
+*
+               DO 130 J = 1, K
+                  CALL ZCOPY( N, C( J, 1 ), LDC, WORK( 1, J ), 1 )
+                  CALL ZLACGV( N, WORK( 1, J ), 1 )
+  130          CONTINUE
+*
+*              W := W * V1'
+*
+               CALL ZTRMM( 'Right', 'Upper', 'Conjugate transpose',
+     $                     'Unit', N, K, ONE, V, LDV, WORK, LDWORK )
+               IF( M.GT.K ) THEN
+*
+*                 W := W + C2'*V2'
+*
+                  CALL ZGEMM( 'Conjugate transpose',
+     $                        'Conjugate transpose', N, K, M-K, ONE,
+     $                        C( K+1, 1 ), LDC, V( 1, K+1 ), LDV, ONE,
+     $                        WORK, LDWORK )
+               END IF
+*
+*              W := W * T'  or  W * T
+*
+               CALL ZTRMM( 'Right', 'Upper', TRANST, 'Non-unit', N, K,
+     $                     ONE, T, LDT, WORK, LDWORK )
+*
+*              C := C - V' * W'
+*
+               IF( M.GT.K ) THEN
+*
+*                 C2 := C2 - V2' * W'
+*
+                  CALL ZGEMM( 'Conjugate transpose',
+     $                        'Conjugate transpose', M-K, N, K, -ONE,
+     $                        V( 1, K+1 ), LDV, WORK, LDWORK, ONE,
+     $                        C( K+1, 1 ), LDC )
+               END IF
+*
+*              W := W * V1
+*
+               CALL ZTRMM( 'Right', 'Upper', 'No transpose', 'Unit', N,
+     $                     K, ONE, V, LDV, WORK, LDWORK )
+*
+*              C1 := C1 - W'
+*
+               DO 150 J = 1, K
+                  DO 140 I = 1, N
+                     C( J, I ) = C( J, I ) - DCONJG( WORK( I, J ) )
+  140             CONTINUE
+  150          CONTINUE
+*
+            ELSE IF( LSAME( SIDE, 'R' ) ) THEN
+*
+*              Form  C * H  or  C * H'  where  C = ( C1  C2 )
+*
+*              W := C * V'  =  (C1*V1' + C2*V2')  (stored in WORK)
+*
+*              W := C1
+*
+               DO 160 J = 1, K
+                  CALL ZCOPY( M, C( 1, J ), 1, WORK( 1, J ), 1 )
+  160          CONTINUE
+*
+*              W := W * V1'
+*
+               CALL ZTRMM( 'Right', 'Upper', 'Conjugate transpose',
+     $                     'Unit', M, K, ONE, V, LDV, WORK, LDWORK )
+               IF( N.GT.K ) THEN
+*
+*                 W := W + C2 * V2'
+*
+                  CALL ZGEMM( 'No transpose', 'Conjugate transpose', M,
+     $                        K, N-K, ONE, C( 1, K+1 ), LDC,
+     $                        V( 1, K+1 ), LDV, ONE, WORK, LDWORK )
+               END IF
+*
+*              W := W * T  or  W * T'
+*
+               CALL ZTRMM( 'Right', 'Upper', TRANS, 'Non-unit', M, K,
+     $                     ONE, T, LDT, WORK, LDWORK )
+*
+*              C := C - W * V
+*
+               IF( N.GT.K ) THEN
+*
+*                 C2 := C2 - W * V2
+*
+                  CALL ZGEMM( 'No transpose', 'No transpose', M, N-K, K,
+     $                        -ONE, WORK, LDWORK, V( 1, K+1 ), LDV, ONE,
+     $                        C( 1, K+1 ), LDC )
+               END IF
+*
+*              W := W * V1
+*
+               CALL ZTRMM( 'Right', 'Upper', 'No transpose', 'Unit', M,
+     $                     K, ONE, V, LDV, WORK, LDWORK )
+*
+*              C1 := C1 - W
+*
+               DO 180 J = 1, K
+                  DO 170 I = 1, M
+                     C( I, J ) = C( I, J ) - WORK( I, J )
+  170             CONTINUE
+  180          CONTINUE
+*
+            END IF
+*
+         ELSE
+*
+*           Let  V =  ( V1  V2 )    (V2: last K columns)
+*           where  V2  is unit lower triangular.
+*
+            IF( LSAME( SIDE, 'L' ) ) THEN
+*
+*              Form  H * C  or  H' * C  where  C = ( C1 )
+*                                                  ( C2 )
+*
+*              W := C' * V'  =  (C1'*V1' + C2'*V2') (stored in WORK)
+*
+*              W := C2'
+*
+               DO 190 J = 1, K
+                  CALL ZCOPY( N, C( M-K+J, 1 ), LDC, WORK( 1, J ), 1 )
+                  CALL ZLACGV( N, WORK( 1, J ), 1 )
+  190          CONTINUE
+*
+*              W := W * V2'
+*
+               CALL ZTRMM( 'Right', 'Lower', 'Conjugate transpose',
+     $                     'Unit', N, K, ONE, V( 1, M-K+1 ), LDV, WORK,
+     $                     LDWORK )
+               IF( M.GT.K ) THEN
+*
+*                 W := W + C1'*V1'
+*
+                  CALL ZGEMM( 'Conjugate transpose',
+     $                        'Conjugate transpose', N, K, M-K, ONE, C,
+     $                        LDC, V, LDV, ONE, WORK, LDWORK )
+               END IF
+*
+*              W := W * T'  or  W * T
+*
+               CALL ZTRMM( 'Right', 'Lower', TRANST, 'Non-unit', N, K,
+     $                     ONE, T, LDT, WORK, LDWORK )
+*
+*              C := C - V' * W'
+*
+               IF( M.GT.K ) THEN
+*
+*                 C1 := C1 - V1' * W'
+*
+                  CALL ZGEMM( 'Conjugate transpose',
+     $                        'Conjugate transpose', M-K, N, K, -ONE, V,
+     $                        LDV, WORK, LDWORK, ONE, C, LDC )
+               END IF
+*
+*              W := W * V2
+*
+               CALL ZTRMM( 'Right', 'Lower', 'No transpose', 'Unit', N,
+     $                     K, ONE, V( 1, M-K+1 ), LDV, WORK, LDWORK )
+*
+*              C2 := C2 - W'
+*
+               DO 210 J = 1, K
+                  DO 200 I = 1, N
+                     C( M-K+J, I ) = C( M-K+J, I ) -
+     $                               DCONJG( WORK( I, J ) )
+  200             CONTINUE
+  210          CONTINUE
+*
+            ELSE IF( LSAME( SIDE, 'R' ) ) THEN
+*
+*              Form  C * H  or  C * H'  where  C = ( C1  C2 )
+*
+*              W := C * V'  =  (C1*V1' + C2*V2')  (stored in WORK)
+*
+*              W := C2
+*
+               DO 220 J = 1, K
+                  CALL ZCOPY( M, C( 1, N-K+J ), 1, WORK( 1, J ), 1 )
+  220          CONTINUE
+*
+*              W := W * V2'
+*
+               CALL ZTRMM( 'Right', 'Lower', 'Conjugate transpose',
+     $                     'Unit', M, K, ONE, V( 1, N-K+1 ), LDV, WORK,
+     $                     LDWORK )
+               IF( N.GT.K ) THEN
+*
+*                 W := W + C1 * V1'
+*
+                  CALL ZGEMM( 'No transpose', 'Conjugate transpose', M,
+     $                        K, N-K, ONE, C, LDC, V, LDV, ONE, WORK,
+     $                        LDWORK )
+               END IF
+*
+*              W := W * T  or  W * T'
+*
+               CALL ZTRMM( 'Right', 'Lower', TRANS, 'Non-unit', M, K,
+     $                     ONE, T, LDT, WORK, LDWORK )
+*
+*              C := C - W * V
+*
+               IF( N.GT.K ) THEN
+*
+*                 C1 := C1 - W * V1
+*
+                  CALL ZGEMM( 'No transpose', 'No transpose', M, N-K, K,
+     $                        -ONE, WORK, LDWORK, V, LDV, ONE, C, LDC )
+               END IF
+*
+*              W := W * V2
+*
+               CALL ZTRMM( 'Right', 'Lower', 'No transpose', 'Unit', M,
+     $                     K, ONE, V( 1, N-K+1 ), LDV, WORK, LDWORK )
+*
+*              C1 := C1 - W
+*
+               DO 240 J = 1, K
+                  DO 230 I = 1, M
+                     C( I, N-K+J ) = C( I, N-K+J ) - WORK( I, J )
+  230             CONTINUE
+  240          CONTINUE
+*
+            END IF
+*
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of ZLARFB
+*
+      END
diff --git a/costa/native/external/lapack/zlarfg.f b/costa/native/external/lapack/zlarfg.f
new file mode 100644
index 000000000..243549400
--- /dev/null
+++ b/costa/native/external/lapack/zlarfg.f
@@ -0,0 +1,146 @@
+      SUBROUTINE ZLARFG( N, ALPHA, X, INCX, TAU )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      INTEGER            INCX, N
+      COMPLEX*16         ALPHA, TAU
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZLARFG generates a complex elementary reflector H of order n, such
+*  that
+*
+*        H' * ( alpha ) = ( beta ),   H' * H = I.
+*             (   x   )   (   0  )
+*
+*  where alpha and beta are scalars, with beta real, and x is an
+*  (n-1)-element complex vector. H is represented in the form
+*
+*        H = I - tau * ( 1 ) * ( 1 v' ) ,
+*                      ( v )
+*
+*  where tau is a complex scalar and v is a complex (n-1)-element
+*  vector. Note that H is not hermitian.
+*
+*  If the elements of x are all zero and alpha is real, then tau = 0
+*  and H is taken to be the unit matrix.
+*
+*  Otherwise  1 <= real(tau) <= 2  and  abs(tau-1) <= 1 .
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The order of the elementary reflector.
+*
+*  ALPHA   (input/output) COMPLEX*16
+*          On entry, the value alpha.
+*          On exit, it is overwritten with the value beta.
+*
+*  X       (input/output) COMPLEX*16 array, dimension
+*                         (1+(N-2)*abs(INCX))
+*          On entry, the vector x.
+*          On exit, it is overwritten with the vector v.
+*
+*  INCX    (input) INTEGER
+*          The increment between elements of X. INCX > 0.
+*
+*  TAU     (output) COMPLEX*16
+*          The value tau.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            J, KNT
+      DOUBLE PRECISION   ALPHI, ALPHR, BETA, RSAFMN, SAFMIN, XNORM
+*     ..
+*     .. External Functions ..
+      DOUBLE PRECISION   DLAMCH, DLAPY3, DZNRM2
+      COMPLEX*16         ZLADIV
+      EXTERNAL           DLAMCH, DLAPY3, DZNRM2, ZLADIV
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, DCMPLX, DIMAG, SIGN
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           ZDSCAL, ZSCAL
+*     ..
+*     .. Executable Statements ..
+*
+      IF( N.LE.0 ) THEN
+         TAU = ZERO
+         RETURN
+      END IF
+*
+      XNORM = DZNRM2( N-1, X, INCX )
+      ALPHR = DBLE( ALPHA )
+      ALPHI = DIMAG( ALPHA )
+*
+      IF( XNORM.EQ.ZERO .AND. ALPHI.EQ.ZERO ) THEN
+*
+*        H  =  I
+*
+         TAU = ZERO
+      ELSE
+*
+*        general case
+*
+         BETA = -SIGN( DLAPY3( ALPHR, ALPHI, XNORM ), ALPHR )
+         SAFMIN = DLAMCH( 'S' ) / DLAMCH( 'E' )
+         RSAFMN = ONE / SAFMIN
+*
+         IF( ABS( BETA ).LT.SAFMIN ) THEN
+*
+*           XNORM, BETA may be inaccurate; scale X and recompute them
+*
+            KNT = 0
+   10       CONTINUE
+            KNT = KNT + 1
+            CALL ZDSCAL( N-1, RSAFMN, X, INCX )
+            BETA = BETA*RSAFMN
+            ALPHI = ALPHI*RSAFMN
+            ALPHR = ALPHR*RSAFMN
+            IF( ABS( BETA ).LT.SAFMIN )
+     $         GO TO 10
+*
+*           New BETA is at most 1, at least SAFMIN
+*
+            XNORM = DZNRM2( N-1, X, INCX )
+            ALPHA = DCMPLX( ALPHR, ALPHI )
+            BETA = -SIGN( DLAPY3( ALPHR, ALPHI, XNORM ), ALPHR )
+            TAU = DCMPLX( ( BETA-ALPHR ) / BETA, -ALPHI / BETA )
+            ALPHA = ZLADIV( DCMPLX( ONE ), ALPHA-BETA )
+            CALL ZSCAL( N-1, ALPHA, X, INCX )
+*
+*           If ALPHA is subnormal, it may lose relative accuracy
+*
+            ALPHA = BETA
+            DO 20 J = 1, KNT
+               ALPHA = ALPHA*SAFMIN
+   20       CONTINUE
+         ELSE
+            TAU = DCMPLX( ( BETA-ALPHR ) / BETA, -ALPHI / BETA )
+            ALPHA = ZLADIV( DCMPLX( ONE ), ALPHA-BETA )
+            CALL ZSCAL( N-1, ALPHA, X, INCX )
+            ALPHA = BETA
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of ZLARFG
+*
+      END
diff --git a/costa/native/external/lapack/zlarft.f b/costa/native/external/lapack/zlarft.f
new file mode 100644
index 000000000..cd26a1e6e
--- /dev/null
+++ b/costa/native/external/lapack/zlarft.f
@@ -0,0 +1,225 @@
+      SUBROUTINE ZLARFT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIRECT, STOREV
+      INTEGER            K, LDT, LDV, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         T( LDT, * ), TAU( * ), V( LDV, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZLARFT forms the triangular factor T of a complex block reflector H
+*  of order n, which is defined as a product of k elementary reflectors.
+*
+*  If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular;
+*
+*  If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular.
+*
+*  If STOREV = 'C', the vector which defines the elementary reflector
+*  H(i) is stored in the i-th column of the array V, and
+*
+*     H  =  I - V * T * V'
+*
+*  If STOREV = 'R', the vector which defines the elementary reflector
+*  H(i) is stored in the i-th row of the array V, and
+*
+*     H  =  I - V' * T * V
+*
+*  Arguments
+*  =========
+*
+*  DIRECT  (input) CHARACTER*1
+*          Specifies the order in which the elementary reflectors are
+*          multiplied to form the block reflector:
+*          = 'F': H = H(1) H(2) . . . H(k) (Forward)
+*          = 'B': H = H(k) . . . H(2) H(1) (Backward)
+*
+*  STOREV  (input) CHARACTER*1
+*          Specifies how the vectors which define the elementary
+*          reflectors are stored (see also Further Details):
+*          = 'C': columnwise
+*          = 'R': rowwise
+*
+*  N       (input) INTEGER
+*          The order of the block reflector H. N >= 0.
+*
+*  K       (input) INTEGER
+*          The order of the triangular factor T (= the number of
+*          elementary reflectors). K >= 1.
+*
+*  V       (input/output) COMPLEX*16 array, dimension
+*                               (LDV,K) if STOREV = 'C'
+*                               (LDV,N) if STOREV = 'R'
+*          The matrix V. See further details.
+*
+*  LDV     (input) INTEGER
+*          The leading dimension of the array V.
+*          If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K.
+*
+*  TAU     (input) COMPLEX*16 array, dimension (K)
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i).
+*
+*  T       (output) COMPLEX*16 array, dimension (LDT,K)
+*          The k by k triangular factor T of the block reflector.
+*          If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is
+*          lower triangular. The rest of the array is not used.
+*
+*  LDT     (input) INTEGER
+*          The leading dimension of the array T. LDT >= K.
+*
+*  Further Details
+*  ===============
+*
+*  The shape of the matrix V and the storage of the vectors which define
+*  the H(i) is best illustrated by the following example with n = 5 and
+*  k = 3. The elements equal to 1 are not stored; the corresponding
+*  array elements are modified but restored on exit. The rest of the
+*  array is not used.
+*
+*  DIRECT = 'F' and STOREV = 'C':         DIRECT = 'F' and STOREV = 'R':
+*
+*               V = (  1       )                 V = (  1 v1 v1 v1 v1 )
+*                   ( v1  1    )                     (     1 v2 v2 v2 )
+*                   ( v1 v2  1 )                     (        1 v3 v3 )
+*                   ( v1 v2 v3 )
+*                   ( v1 v2 v3 )
+*
+*  DIRECT = 'B' and STOREV = 'C':         DIRECT = 'B' and STOREV = 'R':
+*
+*               V = ( v1 v2 v3 )                 V = ( v1 v1  1       )
+*                   ( v1 v2 v3 )                     ( v2 v2 v2  1    )
+*                   (  1 v2 v3 )                     ( v3 v3 v3 v3  1 )
+*                   (     1 v3 )
+*                   (        1 )
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX*16         ONE, ZERO
+      PARAMETER          ( ONE = ( 1.0D+0, 0.0D+0 ),
+     $                   ZERO = ( 0.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J
+      COMPLEX*16         VII
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           ZGEMV, ZLACGV, ZTRMV
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. Executable Statements ..
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      IF( LSAME( DIRECT, 'F' ) ) THEN
+         DO 20 I = 1, K
+            IF( TAU( I ).EQ.ZERO ) THEN
+*
+*              H(i)  =  I
+*
+               DO 10 J = 1, I
+                  T( J, I ) = ZERO
+   10          CONTINUE
+            ELSE
+*
+*              general case
+*
+               VII = V( I, I )
+               V( I, I ) = ONE
+               IF( LSAME( STOREV, 'C' ) ) THEN
+*
+*                 T(1:i-1,i) := - tau(i) * V(i:n,1:i-1)' * V(i:n,i)
+*
+                  CALL ZGEMV( 'Conjugate transpose', N-I+1, I-1,
+     $                        -TAU( I ), V( I, 1 ), LDV, V( I, I ), 1,
+     $                        ZERO, T( 1, I ), 1 )
+               ELSE
+*
+*                 T(1:i-1,i) := - tau(i) * V(1:i-1,i:n) * V(i,i:n)'
+*
+                  IF( I.LT.N )
+     $               CALL ZLACGV( N-I, V( I, I+1 ), LDV )
+                  CALL ZGEMV( 'No transpose', I-1, N-I+1, -TAU( I ),
+     $                        V( 1, I ), LDV, V( I, I ), LDV, ZERO,
+     $                        T( 1, I ), 1 )
+                  IF( I.LT.N )
+     $               CALL ZLACGV( N-I, V( I, I+1 ), LDV )
+               END IF
+               V( I, I ) = VII
+*
+*              T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i)
+*
+               CALL ZTRMV( 'Upper', 'No transpose', 'Non-unit', I-1, T,
+     $                     LDT, T( 1, I ), 1 )
+               T( I, I ) = TAU( I )
+            END IF
+   20    CONTINUE
+      ELSE
+         DO 40 I = K, 1, -1
+            IF( TAU( I ).EQ.ZERO ) THEN
+*
+*              H(i)  =  I
+*
+               DO 30 J = I, K
+                  T( J, I ) = ZERO
+   30          CONTINUE
+            ELSE
+*
+*              general case
+*
+               IF( I.LT.K ) THEN
+                  IF( LSAME( STOREV, 'C' ) ) THEN
+                     VII = V( N-K+I, I )
+                     V( N-K+I, I ) = ONE
+*
+*                    T(i+1:k,i) :=
+*                            - tau(i) * V(1:n-k+i,i+1:k)' * V(1:n-k+i,i)
+*
+                     CALL ZGEMV( 'Conjugate transpose', N-K+I, K-I,
+     $                           -TAU( I ), V( 1, I+1 ), LDV, V( 1, I ),
+     $                           1, ZERO, T( I+1, I ), 1 )
+                     V( N-K+I, I ) = VII
+                  ELSE
+                     VII = V( I, N-K+I )
+                     V( I, N-K+I ) = ONE
+*
+*                    T(i+1:k,i) :=
+*                            - tau(i) * V(i+1:k,1:n-k+i) * V(i,1:n-k+i)'
+*
+                     CALL ZLACGV( N-K+I-1, V( I, 1 ), LDV )
+                     CALL ZGEMV( 'No transpose', K-I, N-K+I, -TAU( I ),
+     $                           V( I+1, 1 ), LDV, V( I, 1 ), LDV, ZERO,
+     $                           T( I+1, I ), 1 )
+                     CALL ZLACGV( N-K+I-1, V( I, 1 ), LDV )
+                     V( I, N-K+I ) = VII
+                  END IF
+*
+*                 T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i)
+*
+                  CALL ZTRMV( 'Lower', 'No transpose', 'Non-unit', K-I,
+     $                        T( I+1, I+1 ), LDT, T( I+1, I ), 1 )
+               END IF
+               T( I, I ) = TAU( I )
+            END IF
+   40    CONTINUE
+      END IF
+      RETURN
+*
+*     End of ZLARFT
+*
+      END
diff --git a/costa/native/external/lapack/zlarfx.f b/costa/native/external/lapack/zlarfx.f
new file mode 100644
index 000000000..8627cb65e
--- /dev/null
+++ b/costa/native/external/lapack/zlarfx.f
@@ -0,0 +1,642 @@
+      SUBROUTINE ZLARFX( SIDE, M, N, V, TAU, C, LDC, WORK )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          SIDE
+      INTEGER            LDC, M, N
+      COMPLEX*16         TAU
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         C( LDC, * ), V( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZLARFX applies a complex elementary reflector H to a complex m by n
+*  matrix C, from either the left or the right. H is represented in the
+*  form
+*
+*        H = I - tau * v * v'
+*
+*  where tau is a complex scalar and v is a complex vector.
+*
+*  If tau = 0, then H is taken to be the unit matrix
+*
+*  This version uses inline code if H has order < 11.
+*
+*  Arguments
+*  =========
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'L': form  H * C
+*          = 'R': form  C * H
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix C.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix C.
+*
+*  V       (input) COMPLEX*16 array, dimension (M) if SIDE = 'L'
+*                                        or (N) if SIDE = 'R'
+*          The vector v in the representation of H.
+*
+*  TAU     (input) COMPLEX*16
+*          The value tau in the representation of H.
+*
+*  C       (input/output) COMPLEX*16 array, dimension (LDC,N)
+*          On entry, the m by n matrix C.
+*          On exit, C is overwritten by the matrix H * C if SIDE = 'L',
+*          or C * H if SIDE = 'R'.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C. LDA >= max(1,M).
+*
+*  WORK    (workspace) COMPLEX*16 array, dimension (N) if SIDE = 'L'
+*                                            or (M) if SIDE = 'R'
+*          WORK is not referenced if H has order < 11.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX*16         ZERO, ONE
+      PARAMETER          ( ZERO = ( 0.0D+0, 0.0D+0 ),
+     $                   ONE = ( 1.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            J
+      COMPLEX*16         SUM, T1, T10, T2, T3, T4, T5, T6, T7, T8, T9,
+     $                   V1, V10, V2, V3, V4, V5, V6, V7, V8, V9
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           ZGEMV, ZGERC
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          DCONJG
+*     ..
+*     .. Executable Statements ..
+*
+      IF( TAU.EQ.ZERO )
+     $   RETURN
+      IF( LSAME( SIDE, 'L' ) ) THEN
+*
+*        Form  H * C, where H has order m.
+*
+         GO TO ( 10, 30, 50, 70, 90, 110, 130, 150,
+     $           170, 190 )M
+*
+*        Code for general M
+*
+*        w := C'*v
+*
+         CALL ZGEMV( 'Conjugate transpose', M, N, ONE, C, LDC, V, 1,
+     $               ZERO, WORK, 1 )
+*
+*        C := C - tau * v * w'
+*
+         CALL ZGERC( M, N, -TAU, V, 1, WORK, 1, C, LDC )
+         GO TO 410
+   10    CONTINUE
+*
+*        Special code for 1 x 1 Householder
+*
+         T1 = ONE - TAU*V( 1 )*DCONJG( V( 1 ) )
+         DO 20 J = 1, N
+            C( 1, J ) = T1*C( 1, J )
+   20    CONTINUE
+         GO TO 410
+   30    CONTINUE
+*
+*        Special code for 2 x 2 Householder
+*
+         V1 = DCONJG( V( 1 ) )
+         T1 = TAU*DCONJG( V1 )
+         V2 = DCONJG( V( 2 ) )
+         T2 = TAU*DCONJG( V2 )
+         DO 40 J = 1, N
+            SUM = V1*C( 1, J ) + V2*C( 2, J )
+            C( 1, J ) = C( 1, J ) - SUM*T1
+            C( 2, J ) = C( 2, J ) - SUM*T2
+   40    CONTINUE
+         GO TO 410
+   50    CONTINUE
+*
+*        Special code for 3 x 3 Householder
+*
+         V1 = DCONJG( V( 1 ) )
+         T1 = TAU*DCONJG( V1 )
+         V2 = DCONJG( V( 2 ) )
+         T2 = TAU*DCONJG( V2 )
+         V3 = DCONJG( V( 3 ) )
+         T3 = TAU*DCONJG( V3 )
+         DO 60 J = 1, N
+            SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J )
+            C( 1, J ) = C( 1, J ) - SUM*T1
+            C( 2, J ) = C( 2, J ) - SUM*T2
+            C( 3, J ) = C( 3, J ) - SUM*T3
+   60    CONTINUE
+         GO TO 410
+   70    CONTINUE
+*
+*        Special code for 4 x 4 Householder
+*
+         V1 = DCONJG( V( 1 ) )
+         T1 = TAU*DCONJG( V1 )
+         V2 = DCONJG( V( 2 ) )
+         T2 = TAU*DCONJG( V2 )
+         V3 = DCONJG( V( 3 ) )
+         T3 = TAU*DCONJG( V3 )
+         V4 = DCONJG( V( 4 ) )
+         T4 = TAU*DCONJG( V4 )
+         DO 80 J = 1, N
+            SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) +
+     $            V4*C( 4, J )
+            C( 1, J ) = C( 1, J ) - SUM*T1
+            C( 2, J ) = C( 2, J ) - SUM*T2
+            C( 3, J ) = C( 3, J ) - SUM*T3
+            C( 4, J ) = C( 4, J ) - SUM*T4
+   80    CONTINUE
+         GO TO 410
+   90    CONTINUE
+*
+*        Special code for 5 x 5 Householder
+*
+         V1 = DCONJG( V( 1 ) )
+         T1 = TAU*DCONJG( V1 )
+         V2 = DCONJG( V( 2 ) )
+         T2 = TAU*DCONJG( V2 )
+         V3 = DCONJG( V( 3 ) )
+         T3 = TAU*DCONJG( V3 )
+         V4 = DCONJG( V( 4 ) )
+         T4 = TAU*DCONJG( V4 )
+         V5 = DCONJG( V( 5 ) )
+         T5 = TAU*DCONJG( V5 )
+         DO 100 J = 1, N
+            SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) +
+     $            V4*C( 4, J ) + V5*C( 5, J )
+            C( 1, J ) = C( 1, J ) - SUM*T1
+            C( 2, J ) = C( 2, J ) - SUM*T2
+            C( 3, J ) = C( 3, J ) - SUM*T3
+            C( 4, J ) = C( 4, J ) - SUM*T4
+            C( 5, J ) = C( 5, J ) - SUM*T5
+  100    CONTINUE
+         GO TO 410
+  110    CONTINUE
+*
+*        Special code for 6 x 6 Householder
+*
+         V1 = DCONJG( V( 1 ) )
+         T1 = TAU*DCONJG( V1 )
+         V2 = DCONJG( V( 2 ) )
+         T2 = TAU*DCONJG( V2 )
+         V3 = DCONJG( V( 3 ) )
+         T3 = TAU*DCONJG( V3 )
+         V4 = DCONJG( V( 4 ) )
+         T4 = TAU*DCONJG( V4 )
+         V5 = DCONJG( V( 5 ) )
+         T5 = TAU*DCONJG( V5 )
+         V6 = DCONJG( V( 6 ) )
+         T6 = TAU*DCONJG( V6 )
+         DO 120 J = 1, N
+            SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) +
+     $            V4*C( 4, J ) + V5*C( 5, J ) + V6*C( 6, J )
+            C( 1, J ) = C( 1, J ) - SUM*T1
+            C( 2, J ) = C( 2, J ) - SUM*T2
+            C( 3, J ) = C( 3, J ) - SUM*T3
+            C( 4, J ) = C( 4, J ) - SUM*T4
+            C( 5, J ) = C( 5, J ) - SUM*T5
+            C( 6, J ) = C( 6, J ) - SUM*T6
+  120    CONTINUE
+         GO TO 410
+  130    CONTINUE
+*
+*        Special code for 7 x 7 Householder
+*
+         V1 = DCONJG( V( 1 ) )
+         T1 = TAU*DCONJG( V1 )
+         V2 = DCONJG( V( 2 ) )
+         T2 = TAU*DCONJG( V2 )
+         V3 = DCONJG( V( 3 ) )
+         T3 = TAU*DCONJG( V3 )
+         V4 = DCONJG( V( 4 ) )
+         T4 = TAU*DCONJG( V4 )
+         V5 = DCONJG( V( 5 ) )
+         T5 = TAU*DCONJG( V5 )
+         V6 = DCONJG( V( 6 ) )
+         T6 = TAU*DCONJG( V6 )
+         V7 = DCONJG( V( 7 ) )
+         T7 = TAU*DCONJG( V7 )
+         DO 140 J = 1, N
+            SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) +
+     $            V4*C( 4, J ) + V5*C( 5, J ) + V6*C( 6, J ) +
+     $            V7*C( 7, J )
+            C( 1, J ) = C( 1, J ) - SUM*T1
+            C( 2, J ) = C( 2, J ) - SUM*T2
+            C( 3, J ) = C( 3, J ) - SUM*T3
+            C( 4, J ) = C( 4, J ) - SUM*T4
+            C( 5, J ) = C( 5, J ) - SUM*T5
+            C( 6, J ) = C( 6, J ) - SUM*T6
+            C( 7, J ) = C( 7, J ) - SUM*T7
+  140    CONTINUE
+         GO TO 410
+  150    CONTINUE
+*
+*        Special code for 8 x 8 Householder
+*
+         V1 = DCONJG( V( 1 ) )
+         T1 = TAU*DCONJG( V1 )
+         V2 = DCONJG( V( 2 ) )
+         T2 = TAU*DCONJG( V2 )
+         V3 = DCONJG( V( 3 ) )
+         T3 = TAU*DCONJG( V3 )
+         V4 = DCONJG( V( 4 ) )
+         T4 = TAU*DCONJG( V4 )
+         V5 = DCONJG( V( 5 ) )
+         T5 = TAU*DCONJG( V5 )
+         V6 = DCONJG( V( 6 ) )
+         T6 = TAU*DCONJG( V6 )
+         V7 = DCONJG( V( 7 ) )
+         T7 = TAU*DCONJG( V7 )
+         V8 = DCONJG( V( 8 ) )
+         T8 = TAU*DCONJG( V8 )
+         DO 160 J = 1, N
+            SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) +
+     $            V4*C( 4, J ) + V5*C( 5, J ) + V6*C( 6, J ) +
+     $            V7*C( 7, J ) + V8*C( 8, J )
+            C( 1, J ) = C( 1, J ) - SUM*T1
+            C( 2, J ) = C( 2, J ) - SUM*T2
+            C( 3, J ) = C( 3, J ) - SUM*T3
+            C( 4, J ) = C( 4, J ) - SUM*T4
+            C( 5, J ) = C( 5, J ) - SUM*T5
+            C( 6, J ) = C( 6, J ) - SUM*T6
+            C( 7, J ) = C( 7, J ) - SUM*T7
+            C( 8, J ) = C( 8, J ) - SUM*T8
+  160    CONTINUE
+         GO TO 410
+  170    CONTINUE
+*
+*        Special code for 9 x 9 Householder
+*
+         V1 = DCONJG( V( 1 ) )
+         T1 = TAU*DCONJG( V1 )
+         V2 = DCONJG( V( 2 ) )
+         T2 = TAU*DCONJG( V2 )
+         V3 = DCONJG( V( 3 ) )
+         T3 = TAU*DCONJG( V3 )
+         V4 = DCONJG( V( 4 ) )
+         T4 = TAU*DCONJG( V4 )
+         V5 = DCONJG( V( 5 ) )
+         T5 = TAU*DCONJG( V5 )
+         V6 = DCONJG( V( 6 ) )
+         T6 = TAU*DCONJG( V6 )
+         V7 = DCONJG( V( 7 ) )
+         T7 = TAU*DCONJG( V7 )
+         V8 = DCONJG( V( 8 ) )
+         T8 = TAU*DCONJG( V8 )
+         V9 = DCONJG( V( 9 ) )
+         T9 = TAU*DCONJG( V9 )
+         DO 180 J = 1, N
+            SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) +
+     $            V4*C( 4, J ) + V5*C( 5, J ) + V6*C( 6, J ) +
+     $            V7*C( 7, J ) + V8*C( 8, J ) + V9*C( 9, J )
+            C( 1, J ) = C( 1, J ) - SUM*T1
+            C( 2, J ) = C( 2, J ) - SUM*T2
+            C( 3, J ) = C( 3, J ) - SUM*T3
+            C( 4, J ) = C( 4, J ) - SUM*T4
+            C( 5, J ) = C( 5, J ) - SUM*T5
+            C( 6, J ) = C( 6, J ) - SUM*T6
+            C( 7, J ) = C( 7, J ) - SUM*T7
+            C( 8, J ) = C( 8, J ) - SUM*T8
+            C( 9, J ) = C( 9, J ) - SUM*T9
+  180    CONTINUE
+         GO TO 410
+  190    CONTINUE
+*
+*        Special code for 10 x 10 Householder
+*
+         V1 = DCONJG( V( 1 ) )
+         T1 = TAU*DCONJG( V1 )
+         V2 = DCONJG( V( 2 ) )
+         T2 = TAU*DCONJG( V2 )
+         V3 = DCONJG( V( 3 ) )
+         T3 = TAU*DCONJG( V3 )
+         V4 = DCONJG( V( 4 ) )
+         T4 = TAU*DCONJG( V4 )
+         V5 = DCONJG( V( 5 ) )
+         T5 = TAU*DCONJG( V5 )
+         V6 = DCONJG( V( 6 ) )
+         T6 = TAU*DCONJG( V6 )
+         V7 = DCONJG( V( 7 ) )
+         T7 = TAU*DCONJG( V7 )
+         V8 = DCONJG( V( 8 ) )
+         T8 = TAU*DCONJG( V8 )
+         V9 = DCONJG( V( 9 ) )
+         T9 = TAU*DCONJG( V9 )
+         V10 = DCONJG( V( 10 ) )
+         T10 = TAU*DCONJG( V10 )
+         DO 200 J = 1, N
+            SUM = V1*C( 1, J ) + V2*C( 2, J ) + V3*C( 3, J ) +
+     $            V4*C( 4, J ) + V5*C( 5, J ) + V6*C( 6, J ) +
+     $            V7*C( 7, J ) + V8*C( 8, J ) + V9*C( 9, J ) +
+     $            V10*C( 10, J )
+            C( 1, J ) = C( 1, J ) - SUM*T1
+            C( 2, J ) = C( 2, J ) - SUM*T2
+            C( 3, J ) = C( 3, J ) - SUM*T3
+            C( 4, J ) = C( 4, J ) - SUM*T4
+            C( 5, J ) = C( 5, J ) - SUM*T5
+            C( 6, J ) = C( 6, J ) - SUM*T6
+            C( 7, J ) = C( 7, J ) - SUM*T7
+            C( 8, J ) = C( 8, J ) - SUM*T8
+            C( 9, J ) = C( 9, J ) - SUM*T9
+            C( 10, J ) = C( 10, J ) - SUM*T10
+  200    CONTINUE
+         GO TO 410
+      ELSE
+*
+*        Form  C * H, where H has order n.
+*
+         GO TO ( 210, 230, 250, 270, 290, 310, 330, 350,
+     $           370, 390 )N
+*
+*        Code for general N
+*
+*        w := C * v
+*
+         CALL ZGEMV( 'No transpose', M, N, ONE, C, LDC, V, 1, ZERO,
+     $               WORK, 1 )
+*
+*        C := C - tau * w * v'
+*
+         CALL ZGERC( M, N, -TAU, WORK, 1, V, 1, C, LDC )
+         GO TO 410
+  210    CONTINUE
+*
+*        Special code for 1 x 1 Householder
+*
+         T1 = ONE - TAU*V( 1 )*DCONJG( V( 1 ) )
+         DO 220 J = 1, M
+            C( J, 1 ) = T1*C( J, 1 )
+  220    CONTINUE
+         GO TO 410
+  230    CONTINUE
+*
+*        Special code for 2 x 2 Householder
+*
+         V1 = V( 1 )
+         T1 = TAU*DCONJG( V1 )
+         V2 = V( 2 )
+         T2 = TAU*DCONJG( V2 )
+         DO 240 J = 1, M
+            SUM = V1*C( J, 1 ) + V2*C( J, 2 )
+            C( J, 1 ) = C( J, 1 ) - SUM*T1
+            C( J, 2 ) = C( J, 2 ) - SUM*T2
+  240    CONTINUE
+         GO TO 410
+  250    CONTINUE
+*
+*        Special code for 3 x 3 Householder
+*
+         V1 = V( 1 )
+         T1 = TAU*DCONJG( V1 )
+         V2 = V( 2 )
+         T2 = TAU*DCONJG( V2 )
+         V3 = V( 3 )
+         T3 = TAU*DCONJG( V3 )
+         DO 260 J = 1, M
+            SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 )
+            C( J, 1 ) = C( J, 1 ) - SUM*T1
+            C( J, 2 ) = C( J, 2 ) - SUM*T2
+            C( J, 3 ) = C( J, 3 ) - SUM*T3
+  260    CONTINUE
+         GO TO 410
+  270    CONTINUE
+*
+*        Special code for 4 x 4 Householder
+*
+         V1 = V( 1 )
+         T1 = TAU*DCONJG( V1 )
+         V2 = V( 2 )
+         T2 = TAU*DCONJG( V2 )
+         V3 = V( 3 )
+         T3 = TAU*DCONJG( V3 )
+         V4 = V( 4 )
+         T4 = TAU*DCONJG( V4 )
+         DO 280 J = 1, M
+            SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) +
+     $            V4*C( J, 4 )
+            C( J, 1 ) = C( J, 1 ) - SUM*T1
+            C( J, 2 ) = C( J, 2 ) - SUM*T2
+            C( J, 3 ) = C( J, 3 ) - SUM*T3
+            C( J, 4 ) = C( J, 4 ) - SUM*T4
+  280    CONTINUE
+         GO TO 410
+  290    CONTINUE
+*
+*        Special code for 5 x 5 Householder
+*
+         V1 = V( 1 )
+         T1 = TAU*DCONJG( V1 )
+         V2 = V( 2 )
+         T2 = TAU*DCONJG( V2 )
+         V3 = V( 3 )
+         T3 = TAU*DCONJG( V3 )
+         V4 = V( 4 )
+         T4 = TAU*DCONJG( V4 )
+         V5 = V( 5 )
+         T5 = TAU*DCONJG( V5 )
+         DO 300 J = 1, M
+            SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) +
+     $            V4*C( J, 4 ) + V5*C( J, 5 )
+            C( J, 1 ) = C( J, 1 ) - SUM*T1
+            C( J, 2 ) = C( J, 2 ) - SUM*T2
+            C( J, 3 ) = C( J, 3 ) - SUM*T3
+            C( J, 4 ) = C( J, 4 ) - SUM*T4
+            C( J, 5 ) = C( J, 5 ) - SUM*T5
+  300    CONTINUE
+         GO TO 410
+  310    CONTINUE
+*
+*        Special code for 6 x 6 Householder
+*
+         V1 = V( 1 )
+         T1 = TAU*DCONJG( V1 )
+         V2 = V( 2 )
+         T2 = TAU*DCONJG( V2 )
+         V3 = V( 3 )
+         T3 = TAU*DCONJG( V3 )
+         V4 = V( 4 )
+         T4 = TAU*DCONJG( V4 )
+         V5 = V( 5 )
+         T5 = TAU*DCONJG( V5 )
+         V6 = V( 6 )
+         T6 = TAU*DCONJG( V6 )
+         DO 320 J = 1, M
+            SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) +
+     $            V4*C( J, 4 ) + V5*C( J, 5 ) + V6*C( J, 6 )
+            C( J, 1 ) = C( J, 1 ) - SUM*T1
+            C( J, 2 ) = C( J, 2 ) - SUM*T2
+            C( J, 3 ) = C( J, 3 ) - SUM*T3
+            C( J, 4 ) = C( J, 4 ) - SUM*T4
+            C( J, 5 ) = C( J, 5 ) - SUM*T5
+            C( J, 6 ) = C( J, 6 ) - SUM*T6
+  320    CONTINUE
+         GO TO 410
+  330    CONTINUE
+*
+*        Special code for 7 x 7 Householder
+*
+         V1 = V( 1 )
+         T1 = TAU*DCONJG( V1 )
+         V2 = V( 2 )
+         T2 = TAU*DCONJG( V2 )
+         V3 = V( 3 )
+         T3 = TAU*DCONJG( V3 )
+         V4 = V( 4 )
+         T4 = TAU*DCONJG( V4 )
+         V5 = V( 5 )
+         T5 = TAU*DCONJG( V5 )
+         V6 = V( 6 )
+         T6 = TAU*DCONJG( V6 )
+         V7 = V( 7 )
+         T7 = TAU*DCONJG( V7 )
+         DO 340 J = 1, M
+            SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) +
+     $            V4*C( J, 4 ) + V5*C( J, 5 ) + V6*C( J, 6 ) +
+     $            V7*C( J, 7 )
+            C( J, 1 ) = C( J, 1 ) - SUM*T1
+            C( J, 2 ) = C( J, 2 ) - SUM*T2
+            C( J, 3 ) = C( J, 3 ) - SUM*T3
+            C( J, 4 ) = C( J, 4 ) - SUM*T4
+            C( J, 5 ) = C( J, 5 ) - SUM*T5
+            C( J, 6 ) = C( J, 6 ) - SUM*T6
+            C( J, 7 ) = C( J, 7 ) - SUM*T7
+  340    CONTINUE
+         GO TO 410
+  350    CONTINUE
+*
+*        Special code for 8 x 8 Householder
+*
+         V1 = V( 1 )
+         T1 = TAU*DCONJG( V1 )
+         V2 = V( 2 )
+         T2 = TAU*DCONJG( V2 )
+         V3 = V( 3 )
+         T3 = TAU*DCONJG( V3 )
+         V4 = V( 4 )
+         T4 = TAU*DCONJG( V4 )
+         V5 = V( 5 )
+         T5 = TAU*DCONJG( V5 )
+         V6 = V( 6 )
+         T6 = TAU*DCONJG( V6 )
+         V7 = V( 7 )
+         T7 = TAU*DCONJG( V7 )
+         V8 = V( 8 )
+         T8 = TAU*DCONJG( V8 )
+         DO 360 J = 1, M
+            SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) +
+     $            V4*C( J, 4 ) + V5*C( J, 5 ) + V6*C( J, 6 ) +
+     $            V7*C( J, 7 ) + V8*C( J, 8 )
+            C( J, 1 ) = C( J, 1 ) - SUM*T1
+            C( J, 2 ) = C( J, 2 ) - SUM*T2
+            C( J, 3 ) = C( J, 3 ) - SUM*T3
+            C( J, 4 ) = C( J, 4 ) - SUM*T4
+            C( J, 5 ) = C( J, 5 ) - SUM*T5
+            C( J, 6 ) = C( J, 6 ) - SUM*T6
+            C( J, 7 ) = C( J, 7 ) - SUM*T7
+            C( J, 8 ) = C( J, 8 ) - SUM*T8
+  360    CONTINUE
+         GO TO 410
+  370    CONTINUE
+*
+*        Special code for 9 x 9 Householder
+*
+         V1 = V( 1 )
+         T1 = TAU*DCONJG( V1 )
+         V2 = V( 2 )
+         T2 = TAU*DCONJG( V2 )
+         V3 = V( 3 )
+         T3 = TAU*DCONJG( V3 )
+         V4 = V( 4 )
+         T4 = TAU*DCONJG( V4 )
+         V5 = V( 5 )
+         T5 = TAU*DCONJG( V5 )
+         V6 = V( 6 )
+         T6 = TAU*DCONJG( V6 )
+         V7 = V( 7 )
+         T7 = TAU*DCONJG( V7 )
+         V8 = V( 8 )
+         T8 = TAU*DCONJG( V8 )
+         V9 = V( 9 )
+         T9 = TAU*DCONJG( V9 )
+         DO 380 J = 1, M
+            SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) +
+     $            V4*C( J, 4 ) + V5*C( J, 5 ) + V6*C( J, 6 ) +
+     $            V7*C( J, 7 ) + V8*C( J, 8 ) + V9*C( J, 9 )
+            C( J, 1 ) = C( J, 1 ) - SUM*T1
+            C( J, 2 ) = C( J, 2 ) - SUM*T2
+            C( J, 3 ) = C( J, 3 ) - SUM*T3
+            C( J, 4 ) = C( J, 4 ) - SUM*T4
+            C( J, 5 ) = C( J, 5 ) - SUM*T5
+            C( J, 6 ) = C( J, 6 ) - SUM*T6
+            C( J, 7 ) = C( J, 7 ) - SUM*T7
+            C( J, 8 ) = C( J, 8 ) - SUM*T8
+            C( J, 9 ) = C( J, 9 ) - SUM*T9
+  380    CONTINUE
+         GO TO 410
+  390    CONTINUE
+*
+*        Special code for 10 x 10 Householder
+*
+         V1 = V( 1 )
+         T1 = TAU*DCONJG( V1 )
+         V2 = V( 2 )
+         T2 = TAU*DCONJG( V2 )
+         V3 = V( 3 )
+         T3 = TAU*DCONJG( V3 )
+         V4 = V( 4 )
+         T4 = TAU*DCONJG( V4 )
+         V5 = V( 5 )
+         T5 = TAU*DCONJG( V5 )
+         V6 = V( 6 )
+         T6 = TAU*DCONJG( V6 )
+         V7 = V( 7 )
+         T7 = TAU*DCONJG( V7 )
+         V8 = V( 8 )
+         T8 = TAU*DCONJG( V8 )
+         V9 = V( 9 )
+         T9 = TAU*DCONJG( V9 )
+         V10 = V( 10 )
+         T10 = TAU*DCONJG( V10 )
+         DO 400 J = 1, M
+            SUM = V1*C( J, 1 ) + V2*C( J, 2 ) + V3*C( J, 3 ) +
+     $            V4*C( J, 4 ) + V5*C( J, 5 ) + V6*C( J, 6 ) +
+     $            V7*C( J, 7 ) + V8*C( J, 8 ) + V9*C( J, 9 ) +
+     $            V10*C( J, 10 )
+            C( J, 1 ) = C( J, 1 ) - SUM*T1
+            C( J, 2 ) = C( J, 2 ) - SUM*T2
+            C( J, 3 ) = C( J, 3 ) - SUM*T3
+            C( J, 4 ) = C( J, 4 ) - SUM*T4
+            C( J, 5 ) = C( J, 5 ) - SUM*T5
+            C( J, 6 ) = C( J, 6 ) - SUM*T6
+            C( J, 7 ) = C( J, 7 ) - SUM*T7
+            C( J, 8 ) = C( J, 8 ) - SUM*T8
+            C( J, 9 ) = C( J, 9 ) - SUM*T9
+            C( J, 10 ) = C( J, 10 ) - SUM*T10
+  400    CONTINUE
+         GO TO 410
+      END IF
+  410 CONTINUE
+      RETURN
+*
+*     End of ZLARFX
+*
+      END
diff --git a/costa/native/external/lapack/zlargv.f b/costa/native/external/lapack/zlargv.f
new file mode 100644
index 000000000..f51ee9b95
--- /dev/null
+++ b/costa/native/external/lapack/zlargv.f
@@ -0,0 +1,225 @@
+      SUBROUTINE ZLARGV( N, X, INCX, Y, INCY, C, INCC )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INCC, INCX, INCY, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   C( * )
+      COMPLEX*16         X( * ), Y( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZLARGV generates a vector of complex plane rotations with real
+*  cosines, determined by elements of the complex vectors x and y.
+*  For i = 1,2,...,n
+*
+*     (        c(i)   s(i) ) ( x(i) ) = ( r(i) )
+*     ( -conjg(s(i))  c(i) ) ( y(i) ) = (   0  )
+*
+*     where c(i)**2 + ABS(s(i))**2 = 1
+*
+*  The following conventions are used (these are the same as in ZLARTG,
+*  but differ from the BLAS1 routine ZROTG):
+*     If y(i)=0, then c(i)=1 and s(i)=0.
+*     If x(i)=0, then c(i)=0 and s(i) is chosen so that r(i) is real.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The number of plane rotations to be generated.
+*
+*  X       (input/output) COMPLEX*16 array, dimension (1+(N-1)*INCX)
+*          On entry, the vector x.
+*          On exit, x(i) is overwritten by r(i), for i = 1,...,n.
+*
+*  INCX    (input) INTEGER
+*          The increment between elements of X. INCX > 0.
+*
+*  Y       (input/output) COMPLEX*16 array, dimension (1+(N-1)*INCY)
+*          On entry, the vector y.
+*          On exit, the sines of the plane rotations.
+*
+*  INCY    (input) INTEGER
+*          The increment between elements of Y. INCY > 0.
+*
+*  C       (output) DOUBLE PRECISION array, dimension (1+(N-1)*INCC)
+*          The cosines of the plane rotations.
+*
+*  INCC    (input) INTEGER
+*          The increment between elements of C. INCC > 0.
+*
+*  Further Details
+*  ======= =======
+*
+*  6-6-96 - Modified with a new algorithm by W. Kahan and J. Demmel
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   TWO, ONE, ZERO
+      PARAMETER          ( TWO = 2.0D+0, ONE = 1.0D+0, ZERO = 0.0D+0 )
+      COMPLEX*16         CZERO
+      PARAMETER          ( CZERO = ( 0.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            FIRST
+      INTEGER            COUNT, I, IC, IX, IY, J
+      DOUBLE PRECISION   CS, D, DI, DR, EPS, F2, F2S, G2, G2S, SAFMIN,
+     $                   SAFMN2, SAFMX2, SCALE
+      COMPLEX*16         F, FF, FS, G, GS, R, SN
+*     ..
+*     .. External Functions ..
+      DOUBLE PRECISION   DLAMCH, DLAPY2
+      EXTERNAL           DLAMCH, DLAPY2
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, DCMPLX, DCONJG, DIMAG, INT, LOG,
+     $                   MAX, SQRT
+*     ..
+*     .. Statement Functions ..
+      DOUBLE PRECISION   ABS1, ABSSQ
+*     ..
+*     .. Save statement ..
+      SAVE               FIRST, SAFMX2, SAFMIN, SAFMN2
+*     ..
+*     .. Data statements ..
+      DATA               FIRST / .TRUE. /
+*     ..
+*     .. Statement Function definitions ..
+      ABS1( FF ) = MAX( ABS( DBLE( FF ) ), ABS( DIMAG( FF ) ) )
+      ABSSQ( FF ) = DBLE( FF )**2 + DIMAG( FF )**2
+*     ..
+*     .. Executable Statements ..
+*
+      IF( FIRST ) THEN
+         FIRST = .FALSE.
+         SAFMIN = DLAMCH( 'S' )
+         EPS = DLAMCH( 'E' )
+         SAFMN2 = DLAMCH( 'B' )**INT( LOG( SAFMIN / EPS ) /
+     $            LOG( DLAMCH( 'B' ) ) / TWO )
+         SAFMX2 = ONE / SAFMN2
+      END IF
+      IX = 1
+      IY = 1
+      IC = 1
+      DO 60 I = 1, N
+         F = X( IX )
+         G = Y( IY )
+*
+*        Use identical algorithm as in ZLARTG
+*
+         SCALE = MAX( ABS1( F ), ABS1( G ) )
+         FS = F
+         GS = G
+         COUNT = 0
+         IF( SCALE.GE.SAFMX2 ) THEN
+   10       CONTINUE
+            COUNT = COUNT + 1
+            FS = FS*SAFMN2
+            GS = GS*SAFMN2
+            SCALE = SCALE*SAFMN2
+            IF( SCALE.GE.SAFMX2 )
+     $         GO TO 10
+         ELSE IF( SCALE.LE.SAFMN2 ) THEN
+            IF( G.EQ.CZERO ) THEN
+               CS = ONE
+               SN = CZERO
+               R = F
+               GO TO 50
+            END IF
+   20       CONTINUE
+            COUNT = COUNT - 1
+            FS = FS*SAFMX2
+            GS = GS*SAFMX2
+            SCALE = SCALE*SAFMX2
+            IF( SCALE.LE.SAFMN2 )
+     $         GO TO 20
+         END IF
+         F2 = ABSSQ( FS )
+         G2 = ABSSQ( GS )
+         IF( F2.LE.MAX( G2, ONE )*SAFMIN ) THEN
+*
+*           This is a rare case: F is very small.
+*
+            IF( F.EQ.CZERO ) THEN
+               CS = ZERO
+               R = DLAPY2( DBLE( G ), DIMAG( G ) )
+*              Do complex/real division explicitly with two real
+*              divisions
+               D = DLAPY2( DBLE( GS ), DIMAG( GS ) )
+               SN = DCMPLX( DBLE( GS ) / D, -DIMAG( GS ) / D )
+               GO TO 50
+            END IF
+            F2S = DLAPY2( DBLE( FS ), DIMAG( FS ) )
+*           G2 and G2S are accurate
+*           G2 is at least SAFMIN, and G2S is at least SAFMN2
+            G2S = SQRT( G2 )
+*           Error in CS from underflow in F2S is at most
+*           UNFL / SAFMN2 .lt. sqrt(UNFL*EPS) .lt. EPS
+*           If MAX(G2,ONE)=G2, then F2 .lt. G2*SAFMIN,
+*           and so CS .lt. sqrt(SAFMIN)
+*           If MAX(G2,ONE)=ONE, then F2 .lt. SAFMIN
+*           and so CS .lt. sqrt(SAFMIN)/SAFMN2 = sqrt(EPS)
+*           Therefore, CS = F2S/G2S / sqrt( 1 + (F2S/G2S)**2 ) = F2S/G2S
+            CS = F2S / G2S
+*           Make sure abs(FF) = 1
+*           Do complex/real division explicitly with 2 real divisions
+            IF( ABS1( F ).GT.ONE ) THEN
+               D = DLAPY2( DBLE( F ), DIMAG( F ) )
+               FF = DCMPLX( DBLE( F ) / D, DIMAG( F ) / D )
+            ELSE
+               DR = SAFMX2*DBLE( F )
+               DI = SAFMX2*DIMAG( F )
+               D = DLAPY2( DR, DI )
+               FF = DCMPLX( DR / D, DI / D )
+            END IF
+            SN = FF*DCMPLX( DBLE( GS ) / G2S, -DIMAG( GS ) / G2S )
+            R = CS*F + SN*G
+         ELSE
+*
+*           This is the most common case.
+*           Neither F2 nor F2/G2 are less than SAFMIN
+*           F2S cannot overflow, and it is accurate
+*
+            F2S = SQRT( ONE+G2 / F2 )
+*           Do the F2S(real)*FS(complex) multiply with two real
+*           multiplies
+            R = DCMPLX( F2S*DBLE( FS ), F2S*DIMAG( FS ) )
+            CS = ONE / F2S
+            D = F2 + G2
+*           Do complex/real division explicitly with two real divisions
+            SN = DCMPLX( DBLE( R ) / D, DIMAG( R ) / D )
+            SN = SN*DCONJG( GS )
+            IF( COUNT.NE.0 ) THEN
+               IF( COUNT.GT.0 ) THEN
+                  DO 30 J = 1, COUNT
+                     R = R*SAFMX2
+   30             CONTINUE
+               ELSE
+                  DO 40 J = 1, -COUNT
+                     R = R*SAFMN2
+   40             CONTINUE
+               END IF
+            END IF
+         END IF
+   50    CONTINUE
+         C( IC ) = CS
+         Y( IY ) = SN
+         X( IX ) = R
+         IC = IC + INCC
+         IY = IY + INCY
+         IX = IX + INCX
+   60 CONTINUE
+      RETURN
+*
+*     End of ZLARGV
+*
+      END
diff --git a/costa/native/external/lapack/zlarnv.f b/costa/native/external/lapack/zlarnv.f
new file mode 100644
index 000000000..3cad52f03
--- /dev/null
+++ b/costa/native/external/lapack/zlarnv.f
@@ -0,0 +1,131 @@
+      SUBROUTINE ZLARNV( IDIST, ISEED, N, X )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      INTEGER            IDIST, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            ISEED( 4 )
+      COMPLEX*16         X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZLARNV returns a vector of n random complex numbers from a uniform or
+*  normal distribution.
+*
+*  Arguments
+*  =========
+*
+*  IDIST   (input) INTEGER
+*          Specifies the distribution of the random numbers:
+*          = 1:  real and imaginary parts each uniform (0,1)
+*          = 2:  real and imaginary parts each uniform (-1,1)
+*          = 3:  real and imaginary parts each normal (0,1)
+*          = 4:  uniformly distributed on the disc abs(z) < 1
+*          = 5:  uniformly distributed on the circle abs(z) = 1
+*
+*  ISEED   (input/output) INTEGER array, dimension (4)
+*          On entry, the seed of the random number generator; the array
+*          elements must be between 0 and 4095, and ISEED(4) must be
+*          odd.
+*          On exit, the seed is updated.
+*
+*  N       (input) INTEGER
+*          The number of random numbers to be generated.
+*
+*  X       (output) COMPLEX*16 array, dimension (N)
+*          The generated random numbers.
+*
+*  Further Details
+*  ===============
+*
+*  This routine calls the auxiliary routine DLARUV to generate random
+*  real numbers from a uniform (0,1) distribution, in batches of up to
+*  128 using vectorisable code. The Box-Muller method is used to
+*  transform numbers from a uniform to a normal distribution.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE, TWO
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0, TWO = 2.0D+0 )
+      INTEGER            LV
+      PARAMETER          ( LV = 128 )
+      DOUBLE PRECISION   TWOPI
+      PARAMETER          ( TWOPI = 6.2831853071795864769252867663D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, IL, IV
+*     ..
+*     .. Local Arrays ..
+      DOUBLE PRECISION   U( LV )
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          DCMPLX, EXP, LOG, MIN, SQRT
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLARUV
+*     ..
+*     .. Executable Statements ..
+*
+      DO 60 IV = 1, N, LV / 2
+         IL = MIN( LV / 2, N-IV+1 )
+*
+*        Call DLARUV to generate 2*IL real numbers from a uniform (0,1)
+*        distribution (2*IL <= LV)
+*
+         CALL DLARUV( ISEED, 2*IL, U )
+*
+         IF( IDIST.EQ.1 ) THEN
+*
+*           Copy generated numbers
+*
+            DO 10 I = 1, IL
+               X( IV+I-1 ) = DCMPLX( U( 2*I-1 ), U( 2*I ) )
+   10       CONTINUE
+         ELSE IF( IDIST.EQ.2 ) THEN
+*
+*           Convert generated numbers to uniform (-1,1) distribution
+*
+            DO 20 I = 1, IL
+               X( IV+I-1 ) = DCMPLX( TWO*U( 2*I-1 )-ONE,
+     $                       TWO*U( 2*I )-ONE )
+   20       CONTINUE
+         ELSE IF( IDIST.EQ.3 ) THEN
+*
+*           Convert generated numbers to normal (0,1) distribution
+*
+            DO 30 I = 1, IL
+               X( IV+I-1 ) = SQRT( -TWO*LOG( U( 2*I-1 ) ) )*
+     $                       EXP( DCMPLX( ZERO, TWOPI*U( 2*I ) ) )
+   30       CONTINUE
+         ELSE IF( IDIST.EQ.4 ) THEN
+*
+*           Convert generated numbers to complex numbers uniformly
+*           distributed on the unit disk
+*
+            DO 40 I = 1, IL
+               X( IV+I-1 ) = SQRT( U( 2*I-1 ) )*
+     $                       EXP( DCMPLX( ZERO, TWOPI*U( 2*I ) ) )
+   40       CONTINUE
+         ELSE IF( IDIST.EQ.5 ) THEN
+*
+*           Convert generated numbers to complex numbers uniformly
+*           distributed on the unit circle
+*
+            DO 50 I = 1, IL
+               X( IV+I-1 ) = EXP( DCMPLX( ZERO, TWOPI*U( 2*I ) ) )
+   50       CONTINUE
+         END IF
+   60 CONTINUE
+      RETURN
+*
+*     End of ZLARNV
+*
+      END
diff --git a/costa/native/external/lapack/zlarrv.f b/costa/native/external/lapack/zlarrv.f
new file mode 100644
index 000000000..d44fd7293
--- /dev/null
+++ b/costa/native/external/lapack/zlarrv.f
@@ -0,0 +1,437 @@
+      SUBROUTINE ZLARRV( N, D, L, ISPLIT, M, W, IBLOCK, GERSCH, TOL, Z,
+     $                   LDZ, ISUPPZ, WORK, IWORK, INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDZ, M, N
+      DOUBLE PRECISION   TOL
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IBLOCK( * ), ISPLIT( * ), ISUPPZ( * ),
+     $                   IWORK( * )
+      DOUBLE PRECISION   D( * ), GERSCH( * ), L( * ), W( * ), WORK( * )
+      COMPLEX*16         Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZLARRV computes the eigenvectors of the tridiagonal matrix
+*  T = L D L^T given L, D and the eigenvalues of L D L^T.
+*  The input eigenvalues should have high relative accuracy with
+*  respect to the entries of L and D. The desired accuracy of the
+*  output can be specified by the input parameter TOL.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The order of the matrix.  N >= 0.
+*
+*  D       (input/output) DOUBLE PRECISION array, dimension (N)
+*          On entry, the n diagonal elements of the diagonal matrix D.
+*          On exit, D may be overwritten.
+*
+*  L       (input/output) DOUBLE PRECISION array, dimension (N-1)
+*          On entry, the (n-1) subdiagonal elements of the unit
+*          bidiagonal matrix L in elements 1 to N-1 of L. L(N) need
+*          not be set. On exit, L is overwritten.
+*
+*  ISPLIT  (input) INTEGER array, dimension (N)
+*          The splitting points, at which T breaks up into submatrices.
+*          The first submatrix consists of rows/columns 1 to
+*          ISPLIT( 1 ), the second of rows/columns ISPLIT( 1 )+1
+*          through ISPLIT( 2 ), etc.
+*
+*  TOL     (input) DOUBLE PRECISION
+*          The absolute error tolerance for the
+*          eigenvalues/eigenvectors.
+*          Errors in the input eigenvalues must be bounded by TOL.
+*          The eigenvectors output have residual norms
+*          bounded by TOL, and the dot products between different
+*          eigenvectors are bounded by TOL. TOL must be at least
+*          N*EPS*|T|, where EPS is the machine precision and |T| is
+*          the 1-norm of the tridiagonal matrix.
+*
+*  M       (input) INTEGER
+*          The total number of eigenvalues found.  0 <= M <= N.
+*          If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1.
+*
+*  W       (input) DOUBLE PRECISION array, dimension (N)
+*          The first M elements of W contain the eigenvalues for
+*          which eigenvectors are to be computed.  The eigenvalues
+*          should be grouped by split-off block and ordered from
+*          smallest to largest within the block ( The output array
+*          W from DLARRE is expected here ).
+*          Errors in W must be bounded by TOL (see above).
+*
+*  IBLOCK  (input) INTEGER array, dimension (N)
+*          The submatrix indices associated with the corresponding
+*          eigenvalues in W; IBLOCK(i)=1 if eigenvalue W(i) belongs to
+*          the first submatrix from the top, =2 if W(i) belongs to
+*          the second submatrix, etc.
+*
+*  Z       (output) COMPLEX*16 array, dimension (LDZ, max(1,M) )
+*          If JOBZ = 'V', then if INFO = 0, the first M columns of Z
+*          contain the orthonormal eigenvectors of the matrix T
+*          corresponding to the selected eigenvalues, with the i-th
+*          column of Z holding the eigenvector associated with W(i).
+*          If JOBZ = 'N', then Z is not referenced.
+*          Note: the user must ensure that at least max(1,M) columns are
+*          supplied in the array Z; if RANGE = 'V', the exact value of M
+*          is not known in advance and an upper bound must be used.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= 1, and if
+*          JOBZ = 'V', LDZ >= max(1,N).
+*
+*  ISUPPZ  (output) INTEGER ARRAY, dimension ( 2*max(1,M) )
+*          The support of the eigenvectors in Z, i.e., the indices
+*          indicating the nonzero elements in Z. The i-th eigenvector
+*          is nonzero only in elements ISUPPZ( 2*i-1 ) through
+*          ISUPPZ( 2*i ).
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (13*N)
+*
+*  IWORK   (workspace) INTEGER array, dimension (6*N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = 1, internal error in DLARRB
+*                if INFO = 2, internal error in ZSTEIN
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Inderjit Dhillon, IBM Almaden, USA
+*     Osni Marques, LBNL/NERSC, USA
+*     Ken Stanley, Computer Science Division, University of
+*       California at Berkeley, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            MGSSIZ
+      PARAMETER          ( MGSSIZ = 20 )
+      DOUBLE PRECISION   ZERO, ONE, FOUR
+      PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0, FOUR = 4.0D0 )
+      COMPLEX*16         CZERO
+      PARAMETER          ( CZERO = ( 0.0D0, 0.0D0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            MGSCLS
+      INTEGER            I, IBEGIN, IEND, IINDC1, IINDC2, IINDR, IINDWK,
+     $                   IINFO, IM, IN, INDERR, INDGAP, INDIN1, INDIN2,
+     $                   INDLD, INDLLD, INDWRK, ITER, ITMP1, ITMP2, J,
+     $                   JBLK, K, KTOT, LSBDPT, MAXITR, NCLUS, NDEPTH,
+     $                   NDONE, NEWCLS, NEWFRS, NEWFTT, NEWLST, NEWSIZ,
+     $                   NSPLIT, OLDCLS, OLDFST, OLDIEN, OLDLST, OLDNCL,
+     $                   P, Q
+      DOUBLE PRECISION   EPS, GAP, LAMBDA, MGSTOL, MINGMA, MINRGP,
+     $                   NRMINV, RELGAP, RELTOL, RESID, RQCORR, SIGMA,
+     $                   TMP1, ZTZ
+*     ..
+*     .. External Functions ..
+      DOUBLE PRECISION   DLAMCH, DZNRM2
+      COMPLEX*16         ZDOTU
+      EXTERNAL           DLAMCH, DZNRM2, ZDOTU
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DCOPY, DLARRB, DLARRF, ZAXPY, ZDSCAL, ZLAR1V,
+     $                   ZLASET, ZSTEIN
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, DCMPLX, MAX, MIN, SQRT
+*     ..
+*     .. Local Arrays ..
+      INTEGER            TEMP( 1 )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INDERR = N + 1
+      INDLD = 2*N
+      INDLLD = 3*N
+      INDGAP = 4*N
+      INDIN1 = 5*N + 1
+      INDIN2 = 6*N + 1
+      INDWRK = 7*N + 1
+*
+      IINDR = N
+      IINDC1 = 2*N
+      IINDC2 = 3*N
+      IINDWK = 4*N + 1
+*
+      EPS = DLAMCH( 'Precision' )
+*
+      DO 10 I = 1, 2*N
+         IWORK( I ) = 0
+   10 CONTINUE
+      DO 20 I = 1, M
+         WORK( INDERR+I-1 ) = EPS*ABS( W( I ) )
+   20 CONTINUE
+      CALL ZLASET( 'Full', N, N, CZERO, CZERO, Z, LDZ )
+      MGSTOL = 5.0D0*EPS
+*
+      NSPLIT = IBLOCK( M )
+      IBEGIN = 1
+      DO 190 JBLK = 1, NSPLIT
+         IEND = ISPLIT( JBLK )
+*
+*        Find the eigenvectors of the submatrix indexed IBEGIN
+*        through IEND.
+*
+         IF( IBEGIN.EQ.IEND ) THEN
+            Z( IBEGIN, IBEGIN ) = ONE
+            ISUPPZ( 2*IBEGIN-1 ) = IBEGIN
+            ISUPPZ( 2*IBEGIN ) = IBEGIN
+            IBEGIN = IEND + 1
+            GO TO 190
+         END IF
+         OLDIEN = IBEGIN - 1
+         IN = IEND - OLDIEN
+         RELTOL = MIN( 1.0D-2, ONE / DBLE( IN ) )
+         IM = IN
+         CALL DCOPY( IM, W( IBEGIN ), 1, WORK, 1 )
+         DO 30 I = 1, IN - 1
+            WORK( INDGAP+I ) = WORK( I+1 ) - WORK( I )
+   30    CONTINUE
+         WORK( INDGAP+IN ) = MAX( ABS( WORK( IN ) ), EPS )
+         NDONE = 0
+*
+         NDEPTH = 0
+         LSBDPT = 1
+         NCLUS = 1
+         IWORK( IINDC1+1 ) = 1
+         IWORK( IINDC1+2 ) = IN
+*
+*        While( NDONE.LT.IM ) do
+*
+   40    CONTINUE
+         IF( NDONE.LT.IM ) THEN
+            OLDNCL = NCLUS
+            NCLUS = 0
+            LSBDPT = 1 - LSBDPT
+            DO 170 I = 1, OLDNCL
+               IF( LSBDPT.EQ.0 ) THEN
+                  OLDCLS = IINDC1
+                  NEWCLS = IINDC2
+               ELSE
+                  OLDCLS = IINDC2
+                  NEWCLS = IINDC1
+               END IF
+*
+*              If NDEPTH > 1, retrieve the relatively robust
+*              representation (RRR) and perform limited bisection
+*              (if necessary) to get approximate eigenvalues.
+*
+               J = OLDCLS + 2*I
+               OLDFST = IWORK( J-1 )
+               OLDLST = IWORK( J )
+               IF( NDEPTH.GT.0 ) THEN
+                  J = OLDIEN + OLDFST
+                  DO 50 K = 1, IN
+                     D( IBEGIN+K-1 ) = DBLE( Z( IBEGIN+K-1,
+     $                                 OLDIEN+OLDFST ) )
+                     L( IBEGIN+K-1 ) = DBLE( Z( IBEGIN+K-1,
+     $                                 OLDIEN+OLDFST+1 ) )
+   50             CONTINUE
+                  SIGMA = L( IEND )
+               END IF
+               K = IBEGIN
+               DO 60 J = 1, IN - 1
+                  WORK( INDLD+J ) = D( K )*L( K )
+                  WORK( INDLLD+J ) = WORK( INDLD+J )*L( K )
+                  K = K + 1
+   60          CONTINUE
+               IF( NDEPTH.GT.0 ) THEN
+                  CALL DLARRB( IN, D( IBEGIN ), L( IBEGIN ),
+     $                         WORK( INDLD+1 ), WORK( INDLLD+1 ),
+     $                         OLDFST, OLDLST, SIGMA, RELTOL, WORK,
+     $                         WORK( INDGAP+1 ), WORK( INDERR ),
+     $                         WORK( INDWRK ), IWORK( IINDWK ), IINFO )
+                  IF( IINFO.NE.0 ) THEN
+                     INFO = 1
+                     RETURN
+                  END IF
+               END IF
+*
+*              Classify eigenvalues of the current representation (RRR)
+*              as (i) isolated, (ii) loosely clustered or (iii) tightly
+*              clustered
+*
+               NEWFRS = OLDFST
+               DO 160 J = OLDFST, OLDLST
+                  IF( J.EQ.OLDLST .OR. WORK( INDGAP+J ).GE.RELTOL*
+     $                ABS( WORK( J ) ) ) THEN
+                     NEWLST = J
+                  ELSE
+*
+*                    continue (to the next loop)
+*
+                     RELGAP = WORK( INDGAP+J ) / ABS( WORK( J ) )
+                     IF( J.EQ.NEWFRS ) THEN
+                        MINRGP = RELGAP
+                     ELSE
+                        MINRGP = MIN( MINRGP, RELGAP )
+                     END IF
+                     GO TO 160
+                  END IF
+                  NEWSIZ = NEWLST - NEWFRS + 1
+                  MAXITR = 10
+                  NEWFTT = OLDIEN + NEWFRS
+                  IF( NEWSIZ.GT.1 ) THEN
+                     MGSCLS = NEWSIZ.LE.MGSSIZ .AND. MINRGP.GE.MGSTOL
+                     IF( .NOT.MGSCLS ) THEN
+                        DO 70 K = 1, IN
+                           WORK( INDIN1+K-1 ) = DBLE( Z( IBEGIN+K-1,
+     $                                          NEWFTT ) )
+                           WORK( INDIN2+K-1 ) = DBLE( Z( IBEGIN+K-1,
+     $                                          NEWFTT+1 ) )
+   70                   CONTINUE
+                        CALL DLARRF( IN, D( IBEGIN ), L( IBEGIN ),
+     $                               WORK( INDLD+1 ), WORK( INDLLD+1 ),
+     $                               NEWFRS, NEWLST, WORK,
+     $                               WORK( INDIN1 ), WORK( INDIN2 ),
+     $                               WORK( INDWRK ), IWORK( IINDWK ),
+     $                               INFO )
+                        IF( INFO.EQ.0 ) THEN
+                           NCLUS = NCLUS + 1
+                           K = NEWCLS + 2*NCLUS
+                           IWORK( K-1 ) = NEWFRS
+                           IWORK( K ) = NEWLST
+                        ELSE
+                           INFO = 0
+                           IF( MINRGP.GE.MGSTOL ) THEN
+                              MGSCLS = .TRUE.
+                           ELSE
+*
+*                             Call ZSTEIN to process this tight cluster.
+*                             This happens only if MINRGP <= MGSTOL
+*                             and DLARRF returns INFO = 1. The latter
+*                             means that a new RRR to "break" the
+*                             cluster could not be found.
+*
+                              WORK( INDWRK ) = D( IBEGIN )
+                              DO 80 K = 1, IN - 1
+                                 WORK( INDWRK+K ) = D( IBEGIN+K ) +
+     $                                              WORK( INDLLD+K )
+   80                         CONTINUE
+                              DO 90 K = 1, NEWSIZ
+                                 IWORK( IINDWK+K-1 ) = 1
+   90                         CONTINUE
+                              DO 100 K = NEWFRS, NEWLST
+                                 ISUPPZ( 2*( IBEGIN+K )-3 ) = 1
+                                 ISUPPZ( 2*( IBEGIN+K )-2 ) = IN
+  100                         CONTINUE
+                              TEMP( 1 ) = IN
+                              CALL ZSTEIN( IN, WORK( INDWRK ),
+     $                                     WORK( INDLD+1 ), NEWSIZ,
+     $                                     WORK( NEWFRS ),
+     $                                     IWORK( IINDWK ), TEMP( 1 ),
+     $                                     Z( IBEGIN, NEWFTT ), LDZ,
+     $                                     WORK( INDWRK+IN ),
+     $                                     IWORK( IINDWK+IN ),
+     $                                     IWORK( IINDWK+2*IN ), IINFO )
+                              IF( IINFO.NE.0 ) THEN
+                                 INFO = 2
+                                 RETURN
+                              END IF
+                              NDONE = NDONE + NEWSIZ
+                           END IF
+                        END IF
+                     END IF
+                  ELSE
+                     MGSCLS = .FALSE.
+                  END IF
+                  IF( NEWSIZ.EQ.1 .OR. MGSCLS ) THEN
+                     KTOT = NEWFTT
+                     DO 120 K = NEWFRS, NEWLST
+                        ITER = 0
+  110                   CONTINUE
+                        LAMBDA = WORK( K )
+                        CALL ZLAR1V( IN, 1, IN, LAMBDA, D( IBEGIN ),
+     $                               L( IBEGIN ), WORK( INDLD+1 ),
+     $                               WORK( INDLLD+1 ),
+     $                               GERSCH( 2*OLDIEN+1 ),
+     $                               Z( IBEGIN, KTOT ), ZTZ, MINGMA,
+     $                               IWORK( IINDR+KTOT ),
+     $                               ISUPPZ( 2*KTOT-1 ),
+     $                               WORK( INDWRK ) )
+                        TMP1 = ONE / ZTZ
+                        NRMINV = SQRT( TMP1 )
+                        RESID = ABS( MINGMA )*NRMINV
+                        RQCORR = MINGMA*TMP1
+                        IF( K.EQ.IN ) THEN
+                           GAP = WORK( INDGAP+K-1 )
+                        ELSE IF( K.EQ.1 ) THEN
+                           GAP = WORK( INDGAP+K )
+                        ELSE
+                           GAP = MIN( WORK( INDGAP+K-1 ),
+     $                           WORK( INDGAP+K ) )
+                        END IF
+                        ITER = ITER + 1
+                        IF( RESID.GT.TOL*GAP .AND. ABS( RQCORR ).GT.
+     $                      FOUR*EPS*ABS( LAMBDA ) ) THEN
+                           WORK( K ) = LAMBDA + RQCORR
+                           IF( ITER.LT.MAXITR ) THEN
+                              GO TO 110
+                           END IF
+                        END IF
+                        IWORK( KTOT ) = 1
+                        IF( NEWSIZ.EQ.1 )
+     $                     NDONE = NDONE + 1
+                        CALL ZDSCAL( IN, NRMINV, Z( IBEGIN, KTOT ), 1 )
+                        KTOT = KTOT + 1
+  120                CONTINUE
+                     IF( NEWSIZ.GT.1 ) THEN
+                        ITMP1 = ISUPPZ( 2*NEWFTT-1 )
+                        ITMP2 = ISUPPZ( 2*NEWFTT )
+                        KTOT = OLDIEN + NEWLST
+                        DO 140 P = NEWFTT + 1, KTOT
+                           DO 130 Q = NEWFTT, P - 1
+                              TMP1 = -ZDOTU( IN, Z( IBEGIN, P ), 1,
+     $                               Z( IBEGIN, Q ), 1 )
+                              CALL ZAXPY( IN, DCMPLX( TMP1, ZERO ),
+     $                                    Z( IBEGIN, Q ), 1,
+     $                                    Z( IBEGIN, P ), 1 )
+  130                      CONTINUE
+                           TMP1 = ONE / DZNRM2( IN, Z( IBEGIN, P ), 1 )
+                           CALL ZDSCAL( IN, TMP1, Z( IBEGIN, P ), 1 )
+                           ITMP1 = MIN( ITMP1, ISUPPZ( 2*P-1 ) )
+                           ITMP2 = MAX( ITMP2, ISUPPZ( 2*P ) )
+  140                   CONTINUE
+                        DO 150 P = NEWFTT, KTOT
+                           ISUPPZ( 2*P-1 ) = ITMP1
+                           ISUPPZ( 2*P ) = ITMP2
+  150                   CONTINUE
+                        NDONE = NDONE + NEWSIZ
+                     END IF
+                  END IF
+                  NEWFRS = J + 1
+  160          CONTINUE
+  170       CONTINUE
+            NDEPTH = NDEPTH + 1
+            GO TO 40
+         END IF
+         J = 2*IBEGIN
+         DO 180 I = IBEGIN, IEND
+            ISUPPZ( J-1 ) = ISUPPZ( J-1 ) + OLDIEN
+            ISUPPZ( J ) = ISUPPZ( J ) + OLDIEN
+            J = J + 2
+  180    CONTINUE
+         IBEGIN = IEND + 1
+  190 CONTINUE
+*
+      RETURN
+*
+*     End of ZLARRV
+*
+      END
diff --git a/costa/native/external/lapack/zlartg.f b/costa/native/external/lapack/zlartg.f
new file mode 100644
index 000000000..95f9dc0b6
--- /dev/null
+++ b/costa/native/external/lapack/zlartg.f
@@ -0,0 +1,193 @@
+      SUBROUTINE ZLARTG( F, G, CS, SN, R )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      DOUBLE PRECISION   CS
+      COMPLEX*16         F, G, R, SN
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZLARTG generates a plane rotation so that
+*
+*     [  CS  SN  ]     [ F ]     [ R ]
+*     [  __      ]  .  [   ]  =  [   ]   where CS**2 + |SN|**2 = 1.
+*     [ -SN  CS  ]     [ G ]     [ 0 ]
+*
+*  This is a faster version of the BLAS1 routine ZROTG, except for
+*  the following differences:
+*     F and G are unchanged on return.
+*     If G=0, then CS=1 and SN=0.
+*     If F=0, then CS=0 and SN is chosen so that R is real.
+*
+*  Arguments
+*  =========
+*
+*  F       (input) COMPLEX*16
+*          The first component of vector to be rotated.
+*
+*  G       (input) COMPLEX*16
+*          The second component of vector to be rotated.
+*
+*  CS      (output) DOUBLE PRECISION
+*          The cosine of the rotation.
+*
+*  SN      (output) COMPLEX*16
+*          The sine of the rotation.
+*
+*  R       (output) COMPLEX*16
+*          The nonzero component of the rotated vector.
+*
+*  Further Details
+*  ======= =======
+*
+*  3-5-96 - Modified with a new algorithm by W. Kahan and J. Demmel
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   TWO, ONE, ZERO
+      PARAMETER          ( TWO = 2.0D+0, ONE = 1.0D+0, ZERO = 0.0D+0 )
+      COMPLEX*16         CZERO
+      PARAMETER          ( CZERO = ( 0.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            FIRST
+      INTEGER            COUNT, I
+      DOUBLE PRECISION   D, DI, DR, EPS, F2, F2S, G2, G2S, SAFMIN,
+     $                   SAFMN2, SAFMX2, SCALE
+      COMPLEX*16         FF, FS, GS
+*     ..
+*     .. External Functions ..
+      DOUBLE PRECISION   DLAMCH, DLAPY2
+      EXTERNAL           DLAMCH, DLAPY2
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, DCMPLX, DCONJG, DIMAG, INT, LOG,
+     $                   MAX, SQRT
+*     ..
+*     .. Statement Functions ..
+      DOUBLE PRECISION   ABS1, ABSSQ
+*     ..
+*     .. Save statement ..
+      SAVE               FIRST, SAFMX2, SAFMIN, SAFMN2
+*     ..
+*     .. Data statements ..
+      DATA               FIRST / .TRUE. /
+*     ..
+*     .. Statement Function definitions ..
+      ABS1( FF ) = MAX( ABS( DBLE( FF ) ), ABS( DIMAG( FF ) ) )
+      ABSSQ( FF ) = DBLE( FF )**2 + DIMAG( FF )**2
+*     ..
+*     .. Executable Statements ..
+*
+      IF( FIRST ) THEN
+         FIRST = .FALSE.
+         SAFMIN = DLAMCH( 'S' )
+         EPS = DLAMCH( 'E' )
+         SAFMN2 = DLAMCH( 'B' )**INT( LOG( SAFMIN / EPS ) /
+     $            LOG( DLAMCH( 'B' ) ) / TWO )
+         SAFMX2 = ONE / SAFMN2
+      END IF
+      SCALE = MAX( ABS1( F ), ABS1( G ) )
+      FS = F
+      GS = G
+      COUNT = 0
+      IF( SCALE.GE.SAFMX2 ) THEN
+   10    CONTINUE
+         COUNT = COUNT + 1
+         FS = FS*SAFMN2
+         GS = GS*SAFMN2
+         SCALE = SCALE*SAFMN2
+         IF( SCALE.GE.SAFMX2 )
+     $      GO TO 10
+      ELSE IF( SCALE.LE.SAFMN2 ) THEN
+         IF( G.EQ.CZERO ) THEN
+            CS = ONE
+            SN = CZERO
+            R = F
+            RETURN
+         END IF
+   20    CONTINUE
+         COUNT = COUNT - 1
+         FS = FS*SAFMX2
+         GS = GS*SAFMX2
+         SCALE = SCALE*SAFMX2
+         IF( SCALE.LE.SAFMN2 )
+     $      GO TO 20
+      END IF
+      F2 = ABSSQ( FS )
+      G2 = ABSSQ( GS )
+      IF( F2.LE.MAX( G2, ONE )*SAFMIN ) THEN
+*
+*        This is a rare case: F is very small.
+*
+         IF( F.EQ.CZERO ) THEN
+            CS = ZERO
+            R = DLAPY2( DBLE( G ), DIMAG( G ) )
+*           Do complex/real division explicitly with two real divisions
+            D = DLAPY2( DBLE( GS ), DIMAG( GS ) )
+            SN = DCMPLX( DBLE( GS ) / D, -DIMAG( GS ) / D )
+            RETURN
+         END IF
+         F2S = DLAPY2( DBLE( FS ), DIMAG( FS ) )
+*        G2 and G2S are accurate
+*        G2 is at least SAFMIN, and G2S is at least SAFMN2
+         G2S = SQRT( G2 )
+*        Error in CS from underflow in F2S is at most
+*        UNFL / SAFMN2 .lt. sqrt(UNFL*EPS) .lt. EPS
+*        If MAX(G2,ONE)=G2, then F2 .lt. G2*SAFMIN,
+*        and so CS .lt. sqrt(SAFMIN)
+*        If MAX(G2,ONE)=ONE, then F2 .lt. SAFMIN
+*        and so CS .lt. sqrt(SAFMIN)/SAFMN2 = sqrt(EPS)
+*        Therefore, CS = F2S/G2S / sqrt( 1 + (F2S/G2S)**2 ) = F2S/G2S
+         CS = F2S / G2S
+*        Make sure abs(FF) = 1
+*        Do complex/real division explicitly with 2 real divisions
+         IF( ABS1( F ).GT.ONE ) THEN
+            D = DLAPY2( DBLE( F ), DIMAG( F ) )
+            FF = DCMPLX( DBLE( F ) / D, DIMAG( F ) / D )
+         ELSE
+            DR = SAFMX2*DBLE( F )
+            DI = SAFMX2*DIMAG( F )
+            D = DLAPY2( DR, DI )
+            FF = DCMPLX( DR / D, DI / D )
+         END IF
+         SN = FF*DCMPLX( DBLE( GS ) / G2S, -DIMAG( GS ) / G2S )
+         R = CS*F + SN*G
+      ELSE
+*
+*        This is the most common case.
+*        Neither F2 nor F2/G2 are less than SAFMIN
+*        F2S cannot overflow, and it is accurate
+*
+         F2S = SQRT( ONE+G2 / F2 )
+*        Do the F2S(real)*FS(complex) multiply with two real multiplies
+         R = DCMPLX( F2S*DBLE( FS ), F2S*DIMAG( FS ) )
+         CS = ONE / F2S
+         D = F2 + G2
+*        Do complex/real division explicitly with two real divisions
+         SN = DCMPLX( DBLE( R ) / D, DIMAG( R ) / D )
+         SN = SN*DCONJG( GS )
+         IF( COUNT.NE.0 ) THEN
+            IF( COUNT.GT.0 ) THEN
+               DO 30 I = 1, COUNT
+                  R = R*SAFMX2
+   30          CONTINUE
+            ELSE
+               DO 40 I = 1, -COUNT
+                  R = R*SAFMN2
+   40          CONTINUE
+            END IF
+         END IF
+      END IF
+      RETURN
+*
+*     End of ZLARTG
+*
+      END
diff --git a/costa/native/external/lapack/zlartv.f b/costa/native/external/lapack/zlartv.f
new file mode 100644
index 000000000..736cc7a4b
--- /dev/null
+++ b/costa/native/external/lapack/zlartv.f
@@ -0,0 +1,79 @@
+      SUBROUTINE ZLARTV( N, X, INCX, Y, INCY, C, S, INCC )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      INTEGER            INCC, INCX, INCY, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   C( * )
+      COMPLEX*16         S( * ), X( * ), Y( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZLARTV applies a vector of complex plane rotations with real cosines
+*  to elements of the complex vectors x and y. For i = 1,2,...,n
+*
+*     ( x(i) ) := (        c(i)   s(i) ) ( x(i) )
+*     ( y(i) )    ( -conjg(s(i))  c(i) ) ( y(i) )
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The number of plane rotations to be applied.
+*
+*  X       (input/output) COMPLEX*16 array, dimension (1+(N-1)*INCX)
+*          The vector x.
+*
+*  INCX    (input) INTEGER
+*          The increment between elements of X. INCX > 0.
+*
+*  Y       (input/output) COMPLEX*16 array, dimension (1+(N-1)*INCY)
+*          The vector y.
+*
+*  INCY    (input) INTEGER
+*          The increment between elements of Y. INCY > 0.
+*
+*  C       (input) DOUBLE PRECISION array, dimension (1+(N-1)*INCC)
+*          The cosines of the plane rotations.
+*
+*  S       (input) COMPLEX*16 array, dimension (1+(N-1)*INCC)
+*          The sines of the plane rotations.
+*
+*  INCC    (input) INTEGER
+*          The increment between elements of C and S. INCC > 0.
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      INTEGER            I, IC, IX, IY
+      COMPLEX*16         XI, YI
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          DCONJG
+*     ..
+*     .. Executable Statements ..
+*
+      IX = 1
+      IY = 1
+      IC = 1
+      DO 10 I = 1, N
+         XI = X( IX )
+         YI = Y( IY )
+         X( IX ) = C( IC )*XI + S( IC )*YI
+         Y( IY ) = C( IC )*YI - DCONJG( S( IC ) )*XI
+         IX = IX + INCX
+         IY = IY + INCY
+         IC = IC + INCC
+   10 CONTINUE
+      RETURN
+*
+*     End of ZLARTV
+*
+      END
diff --git a/costa/native/external/lapack/zlarz.f b/costa/native/external/lapack/zlarz.f
new file mode 100644
index 000000000..661358e8a
--- /dev/null
+++ b/costa/native/external/lapack/zlarz.f
@@ -0,0 +1,158 @@
+      SUBROUTINE ZLARZ( SIDE, M, N, L, V, INCV, TAU, C, LDC, WORK )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          SIDE
+      INTEGER            INCV, L, LDC, M, N
+      COMPLEX*16         TAU
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         C( LDC, * ), V( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZLARZ applies a complex elementary reflector H to a complex
+*  M-by-N matrix C, from either the left or the right. H is represented
+*  in the form
+*
+*        H = I - tau * v * v'
+*
+*  where tau is a complex scalar and v is a complex vector.
+*
+*  If tau = 0, then H is taken to be the unit matrix.
+*
+*  To apply H' (the conjugate transpose of H), supply conjg(tau) instead
+*  tau.
+*
+*  H is a product of k elementary reflectors as returned by ZTZRZF.
+*
+*  Arguments
+*  =========
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'L': form  H * C
+*          = 'R': form  C * H
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix C.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix C.
+*
+*  L       (input) INTEGER
+*          The number of entries of the vector V containing
+*          the meaningful part of the Householder vectors.
+*          If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0.
+*
+*  V       (input) COMPLEX*16 array, dimension (1+(L-1)*abs(INCV))
+*          The vector v in the representation of H as returned by
+*          ZTZRZF. V is not used if TAU = 0.
+*
+*  INCV    (input) INTEGER
+*          The increment between elements of v. INCV <> 0.
+*
+*  TAU     (input) COMPLEX*16
+*          The value tau in the representation of H.
+*
+*  C       (input/output) COMPLEX*16 array, dimension (LDC,N)
+*          On entry, the M-by-N matrix C.
+*          On exit, C is overwritten by the matrix H * C if SIDE = 'L',
+*          or C * H if SIDE = 'R'.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C. LDC >= max(1,M).
+*
+*  WORK    (workspace) COMPLEX*16 array, dimension
+*                         (N) if SIDE = 'L'
+*                      or (M) if SIDE = 'R'
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*    A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX*16         ONE, ZERO
+      PARAMETER          ( ONE = ( 1.0D+0, 0.0D+0 ),
+     $                   ZERO = ( 0.0D+0, 0.0D+0 ) )
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           ZAXPY, ZCOPY, ZGEMV, ZGERC, ZGERU, ZLACGV
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. Executable Statements ..
+*
+      IF( LSAME( SIDE, 'L' ) ) THEN
+*
+*        Form  H * C
+*
+         IF( TAU.NE.ZERO ) THEN
+*
+*           w( 1:n ) = conjg( C( 1, 1:n ) )
+*
+            CALL ZCOPY( N, C, LDC, WORK, 1 )
+            CALL ZLACGV( N, WORK, 1 )
+*
+*           w( 1:n ) = conjg( w( 1:n ) + C( m-l+1:m, 1:n )' * v( 1:l ) )
+*
+            CALL ZGEMV( 'Conjugate transpose', L, N, ONE, C( M-L+1, 1 ),
+     $                  LDC, V, INCV, ONE, WORK, 1 )
+            CALL ZLACGV( N, WORK, 1 )
+*
+*           C( 1, 1:n ) = C( 1, 1:n ) - tau * w( 1:n )
+*
+            CALL ZAXPY( N, -TAU, WORK, 1, C, LDC )
+*
+*           C( m-l+1:m, 1:n ) = C( m-l+1:m, 1:n ) - ...
+*                               tau * v( 1:l ) * conjg( w( 1:n )' )
+*
+            CALL ZGERU( L, N, -TAU, V, INCV, WORK, 1, C( M-L+1, 1 ),
+     $                  LDC )
+         END IF
+*
+      ELSE
+*
+*        Form  C * H
+*
+         IF( TAU.NE.ZERO ) THEN
+*
+*           w( 1:m ) = C( 1:m, 1 )
+*
+            CALL ZCOPY( M, C, 1, WORK, 1 )
+*
+*           w( 1:m ) = w( 1:m ) + C( 1:m, n-l+1:n, 1:n ) * v( 1:l )
+*
+            CALL ZGEMV( 'No transpose', M, L, ONE, C( 1, N-L+1 ), LDC,
+     $                  V, INCV, ONE, WORK, 1 )
+*
+*           C( 1:m, 1 ) = C( 1:m, 1 ) - tau * w( 1:m )
+*
+            CALL ZAXPY( M, -TAU, WORK, 1, C, 1 )
+*
+*           C( 1:m, n-l+1:n ) = C( 1:m, n-l+1:n ) - ...
+*                               tau * w( 1:m ) * v( 1:l )'
+*
+            CALL ZGERC( M, L, -TAU, WORK, 1, V, INCV, C( 1, N-L+1 ),
+     $                  LDC )
+*
+         END IF
+*
+      END IF
+*
+      RETURN
+*
+*     End of ZLARZ
+*
+      END
diff --git a/costa/native/external/lapack/zlarzb.f b/costa/native/external/lapack/zlarzb.f
new file mode 100644
index 000000000..eba455112
--- /dev/null
+++ b/costa/native/external/lapack/zlarzb.f
@@ -0,0 +1,235 @@
+      SUBROUTINE ZLARZB( SIDE, TRANS, DIRECT, STOREV, M, N, K, L, V,
+     $                   LDV, T, LDT, C, LDC, WORK, LDWORK )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     December 1, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIRECT, SIDE, STOREV, TRANS
+      INTEGER            K, L, LDC, LDT, LDV, LDWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         C( LDC, * ), T( LDT, * ), V( LDV, * ),
+     $                   WORK( LDWORK, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZLARZB applies a complex block reflector H or its transpose H**H
+*  to a complex distributed M-by-N  C from the left or the right.
+*
+*  Currently, only STOREV = 'R' and DIRECT = 'B' are supported.
+*
+*  Arguments
+*  =========
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'L': apply H or H' from the Left
+*          = 'R': apply H or H' from the Right
+*
+*  TRANS   (input) CHARACTER*1
+*          = 'N': apply H (No transpose)
+*          = 'C': apply H' (Conjugate transpose)
+*
+*  DIRECT  (input) CHARACTER*1
+*          Indicates how H is formed from a product of elementary
+*          reflectors
+*          = 'F': H = H(1) H(2) . . . H(k) (Forward, not supported yet)
+*          = 'B': H = H(k) . . . H(2) H(1) (Backward)
+*
+*  STOREV  (input) CHARACTER*1
+*          Indicates how the vectors which define the elementary
+*          reflectors are stored:
+*          = 'C': Columnwise                        (not supported yet)
+*          = 'R': Rowwise
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix C.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix C.
+*
+*  K       (input) INTEGER
+*          The order of the matrix T (= the number of elementary
+*          reflectors whose product defines the block reflector).
+*
+*  L       (input) INTEGER
+*          The number of columns of the matrix V containing the
+*          meaningful part of the Householder reflectors.
+*          If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0.
+*
+*  V       (input) COMPLEX*16 array, dimension (LDV,NV).
+*          If STOREV = 'C', NV = K; if STOREV = 'R', NV = L.
+*
+*  LDV     (input) INTEGER
+*          The leading dimension of the array V.
+*          If STOREV = 'C', LDV >= L; if STOREV = 'R', LDV >= K.
+*
+*  T       (input) COMPLEX*16 array, dimension (LDT,K)
+*          The triangular K-by-K matrix T in the representation of the
+*          block reflector.
+*
+*  LDT     (input) INTEGER
+*          The leading dimension of the array T. LDT >= K.
+*
+*  C       (input/output) COMPLEX*16 array, dimension (LDC,N)
+*          On entry, the M-by-N matrix C.
+*          On exit, C is overwritten by H*C or H'*C or C*H or C*H'.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C. LDC >= max(1,M).
+*
+*  WORK    (workspace) COMPLEX*16 array, dimension (LDWORK,K)
+*
+*  LDWORK  (input) INTEGER
+*          The leading dimension of the array WORK.
+*          If SIDE = 'L', LDWORK >= max(1,N);
+*          if SIDE = 'R', LDWORK >= max(1,M).
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*    A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX*16         ONE
+      PARAMETER          ( ONE = ( 1.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      CHARACTER          TRANST
+      INTEGER            I, INFO, J
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZCOPY, ZGEMM, ZLACGV, ZTRMM
+*     ..
+*     .. Executable Statements ..
+*
+*     Quick return if possible
+*
+      IF( M.LE.0 .OR. N.LE.0 )
+     $   RETURN
+*
+*     Check for currently supported options
+*
+      INFO = 0
+      IF( .NOT.LSAME( DIRECT, 'B' ) ) THEN
+         INFO = -3
+      ELSE IF( .NOT.LSAME( STOREV, 'R' ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZLARZB', -INFO )
+         RETURN
+      END IF
+*
+      IF( LSAME( TRANS, 'N' ) ) THEN
+         TRANST = 'C'
+      ELSE
+         TRANST = 'N'
+      END IF
+*
+      IF( LSAME( SIDE, 'L' ) ) THEN
+*
+*        Form  H * C  or  H' * C
+*
+*        W( 1:n, 1:k ) = conjg( C( 1:k, 1:n )' )
+*
+         DO 10 J = 1, K
+            CALL ZCOPY( N, C( J, 1 ), LDC, WORK( 1, J ), 1 )
+   10    CONTINUE
+*
+*        W( 1:n, 1:k ) = W( 1:n, 1:k ) + ...
+*                        conjg( C( m-l+1:m, 1:n )' ) * V( 1:k, 1:l )'
+*
+         IF( L.GT.0 )
+     $      CALL ZGEMM( 'Transpose', 'Conjugate transpose', N, K, L,
+     $                  ONE, C( M-L+1, 1 ), LDC, V, LDV, ONE, WORK,
+     $                  LDWORK )
+*
+*        W( 1:n, 1:k ) = W( 1:n, 1:k ) * T'  or  W( 1:m, 1:k ) * T
+*
+         CALL ZTRMM( 'Right', 'Lower', TRANST, 'Non-unit', N, K, ONE, T,
+     $               LDT, WORK, LDWORK )
+*
+*        C( 1:k, 1:n ) = C( 1:k, 1:n ) - conjg( W( 1:n, 1:k )' )
+*
+         DO 30 J = 1, N
+            DO 20 I = 1, K
+               C( I, J ) = C( I, J ) - WORK( J, I )
+   20       CONTINUE
+   30    CONTINUE
+*
+*        C( m-l+1:m, 1:n ) = C( m-l+1:m, 1:n ) - ...
+*                    conjg( V( 1:k, 1:l )' ) * conjg( W( 1:n, 1:k )' )
+*
+         IF( L.GT.0 )
+     $      CALL ZGEMM( 'Transpose', 'Transpose', L, N, K, -ONE, V, LDV,
+     $                  WORK, LDWORK, ONE, C( M-L+1, 1 ), LDC )
+*
+      ELSE IF( LSAME( SIDE, 'R' ) ) THEN
+*
+*        Form  C * H  or  C * H'
+*
+*        W( 1:m, 1:k ) = C( 1:m, 1:k )
+*
+         DO 40 J = 1, K
+            CALL ZCOPY( M, C( 1, J ), 1, WORK( 1, J ), 1 )
+   40    CONTINUE
+*
+*        W( 1:m, 1:k ) = W( 1:m, 1:k ) + ...
+*                        C( 1:m, n-l+1:n ) * conjg( V( 1:k, 1:l )' )
+*
+         IF( L.GT.0 )
+     $      CALL ZGEMM( 'No transpose', 'Transpose', M, K, L, ONE,
+     $                  C( 1, N-L+1 ), LDC, V, LDV, ONE, WORK, LDWORK )
+*
+*        W( 1:m, 1:k ) = W( 1:m, 1:k ) * conjg( T )  or
+*                        W( 1:m, 1:k ) * conjg( T' )
+*
+         DO 50 J = 1, K
+            CALL ZLACGV( K-J+1, T( J, J ), 1 )
+   50    CONTINUE
+         CALL ZTRMM( 'Right', 'Lower', TRANS, 'Non-unit', M, K, ONE, T,
+     $               LDT, WORK, LDWORK )
+         DO 60 J = 1, K
+            CALL ZLACGV( K-J+1, T( J, J ), 1 )
+   60    CONTINUE
+*
+*        C( 1:m, 1:k ) = C( 1:m, 1:k ) - W( 1:m, 1:k )
+*
+         DO 80 J = 1, K
+            DO 70 I = 1, M
+               C( I, J ) = C( I, J ) - WORK( I, J )
+   70       CONTINUE
+   80    CONTINUE
+*
+*        C( 1:m, n-l+1:n ) = C( 1:m, n-l+1:n ) - ...
+*                            W( 1:m, 1:k ) * conjg( V( 1:k, 1:l ) )
+*
+         DO 90 J = 1, L
+            CALL ZLACGV( K, V( 1, J ), 1 )
+   90    CONTINUE
+         IF( L.GT.0 )
+     $      CALL ZGEMM( 'No transpose', 'No transpose', M, L, K, -ONE,
+     $                  WORK, LDWORK, V, LDV, ONE, C( 1, N-L+1 ), LDC )
+         DO 100 J = 1, L
+            CALL ZLACGV( K, V( 1, J ), 1 )
+  100    CONTINUE
+*
+      END IF
+*
+      RETURN
+*
+*     End of ZLARZB
+*
+      END
diff --git a/costa/native/external/lapack/zlarzt.f b/costa/native/external/lapack/zlarzt.f
new file mode 100644
index 000000000..e6bfed887
--- /dev/null
+++ b/costa/native/external/lapack/zlarzt.f
@@ -0,0 +1,187 @@
+      SUBROUTINE ZLARZT( DIRECT, STOREV, N, K, V, LDV, TAU, T, LDT )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIRECT, STOREV
+      INTEGER            K, LDT, LDV, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         T( LDT, * ), TAU( * ), V( LDV, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZLARZT forms the triangular factor T of a complex block reflector
+*  H of order > n, which is defined as a product of k elementary
+*  reflectors.
+*
+*  If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular;
+*
+*  If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular.
+*
+*  If STOREV = 'C', the vector which defines the elementary reflector
+*  H(i) is stored in the i-th column of the array V, and
+*
+*     H  =  I - V * T * V'
+*
+*  If STOREV = 'R', the vector which defines the elementary reflector
+*  H(i) is stored in the i-th row of the array V, and
+*
+*     H  =  I - V' * T * V
+*
+*  Currently, only STOREV = 'R' and DIRECT = 'B' are supported.
+*
+*  Arguments
+*  =========
+*
+*  DIRECT  (input) CHARACTER*1
+*          Specifies the order in which the elementary reflectors are
+*          multiplied to form the block reflector:
+*          = 'F': H = H(1) H(2) . . . H(k) (Forward, not supported yet)
+*          = 'B': H = H(k) . . . H(2) H(1) (Backward)
+*
+*  STOREV  (input) CHARACTER*1
+*          Specifies how the vectors which define the elementary
+*          reflectors are stored (see also Further Details):
+*          = 'C': columnwise                        (not supported yet)
+*          = 'R': rowwise
+*
+*  N       (input) INTEGER
+*          The order of the block reflector H. N >= 0.
+*
+*  K       (input) INTEGER
+*          The order of the triangular factor T (= the number of
+*          elementary reflectors). K >= 1.
+*
+*  V       (input/output) COMPLEX*16 array, dimension
+*                               (LDV,K) if STOREV = 'C'
+*                               (LDV,N) if STOREV = 'R'
+*          The matrix V. See further details.
+*
+*  LDV     (input) INTEGER
+*          The leading dimension of the array V.
+*          If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K.
+*
+*  TAU     (input) COMPLEX*16 array, dimension (K)
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i).
+*
+*  T       (output) COMPLEX*16 array, dimension (LDT,K)
+*          The k by k triangular factor T of the block reflector.
+*          If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is
+*          lower triangular. The rest of the array is not used.
+*
+*  LDT     (input) INTEGER
+*          The leading dimension of the array T. LDT >= K.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*    A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA
+*
+*  The shape of the matrix V and the storage of the vectors which define
+*  the H(i) is best illustrated by the following example with n = 5 and
+*  k = 3. The elements equal to 1 are not stored; the corresponding
+*  array elements are modified but restored on exit. The rest of the
+*  array is not used.
+*
+*  DIRECT = 'F' and STOREV = 'C':         DIRECT = 'F' and STOREV = 'R':
+*
+*                                              ______V_____
+*         ( v1 v2 v3 )                        /            \
+*         ( v1 v2 v3 )                      ( v1 v1 v1 v1 v1 . . . . 1 )
+*     V = ( v1 v2 v3 )                      ( v2 v2 v2 v2 v2 . . . 1   )
+*         ( v1 v2 v3 )                      ( v3 v3 v3 v3 v3 . . 1     )
+*         ( v1 v2 v3 )
+*            .  .  .
+*            .  .  .
+*            1  .  .
+*               1  .
+*                  1
+*
+*  DIRECT = 'B' and STOREV = 'C':         DIRECT = 'B' and STOREV = 'R':
+*
+*                                                        ______V_____
+*            1                                          /            \
+*            .  1                           ( 1 . . . . v1 v1 v1 v1 v1 )
+*            .  .  1                        ( . 1 . . . v2 v2 v2 v2 v2 )
+*            .  .  .                        ( . . 1 . . v3 v3 v3 v3 v3 )
+*            .  .  .
+*         ( v1 v2 v3 )
+*         ( v1 v2 v3 )
+*     V = ( v1 v2 v3 )
+*         ( v1 v2 v3 )
+*         ( v1 v2 v3 )
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX*16         ZERO
+      PARAMETER          ( ZERO = ( 0.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, INFO, J
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZGEMV, ZLACGV, ZTRMV
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. Executable Statements ..
+*
+*     Check for currently supported options
+*
+      INFO = 0
+      IF( .NOT.LSAME( DIRECT, 'B' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.LSAME( STOREV, 'R' ) ) THEN
+         INFO = -2
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZLARZT', -INFO )
+         RETURN
+      END IF
+*
+      DO 20 I = K, 1, -1
+         IF( TAU( I ).EQ.ZERO ) THEN
+*
+*           H(i)  =  I
+*
+            DO 10 J = I, K
+               T( J, I ) = ZERO
+   10       CONTINUE
+         ELSE
+*
+*           general case
+*
+            IF( I.LT.K ) THEN
+*
+*              T(i+1:k,i) = - tau(i) * V(i+1:k,1:n) * V(i,1:n)'
+*
+               CALL ZLACGV( N, V( I, 1 ), LDV )
+               CALL ZGEMV( 'No transpose', K-I, N, -TAU( I ),
+     $                     V( I+1, 1 ), LDV, V( I, 1 ), LDV, ZERO,
+     $                     T( I+1, I ), 1 )
+               CALL ZLACGV( N, V( I, 1 ), LDV )
+*
+*              T(i+1:k,i) = T(i+1:k,i+1:k) * T(i+1:k,i)
+*
+               CALL ZTRMV( 'Lower', 'No transpose', 'Non-unit', K-I,
+     $                     T( I+1, I+1 ), LDT, T( I+1, I ), 1 )
+            END IF
+            T( I, I ) = TAU( I )
+         END IF
+   20 CONTINUE
+      RETURN
+*
+*     End of ZLARZT
+*
+      END
diff --git a/costa/native/external/lapack/zlascl.f b/costa/native/external/lapack/zlascl.f
new file mode 100644
index 000000000..c67328602
--- /dev/null
+++ b/costa/native/external/lapack/zlascl.f
@@ -0,0 +1,268 @@
+      SUBROUTINE ZLASCL( TYPE, KL, KU, CFROM, CTO, M, N, A, LDA, INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          TYPE
+      INTEGER            INFO, KL, KU, LDA, M, N
+      DOUBLE PRECISION   CFROM, CTO
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZLASCL multiplies the M by N complex matrix A by the real scalar
+*  CTO/CFROM.  This is done without over/underflow as long as the final
+*  result CTO*A(I,J)/CFROM does not over/underflow. TYPE specifies that
+*  A may be full, upper triangular, lower triangular, upper Hessenberg,
+*  or banded.
+*
+*  Arguments
+*  =========
+*
+*  TYPE    (input) CHARACTER*1
+*          TYPE indices the storage type of the input matrix.
+*          = 'G':  A is a full matrix.
+*          = 'L':  A is a lower triangular matrix.
+*          = 'U':  A is an upper triangular matrix.
+*          = 'H':  A is an upper Hessenberg matrix.
+*          = 'B':  A is a symmetric band matrix with lower bandwidth KL
+*                  and upper bandwidth KU and with the only the lower
+*                  half stored.
+*          = 'Q':  A is a symmetric band matrix with lower bandwidth KL
+*                  and upper bandwidth KU and with the only the upper
+*                  half stored.
+*          = 'Z':  A is a band matrix with lower bandwidth KL and upper
+*                  bandwidth KU.
+*
+*  KL      (input) INTEGER
+*          The lower bandwidth of A.  Referenced only if TYPE = 'B',
+*          'Q' or 'Z'.
+*
+*  KU      (input) INTEGER
+*          The upper bandwidth of A.  Referenced only if TYPE = 'B',
+*          'Q' or 'Z'.
+*
+*  CFROM   (input) DOUBLE PRECISION
+*  CTO     (input) DOUBLE PRECISION
+*          The matrix A is multiplied by CTO/CFROM. A(I,J) is computed
+*          without over/underflow if the final result CTO*A(I,J)/CFROM
+*          can be represented without over/underflow.  CFROM must be
+*          nonzero.
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA,M)
+*          The matrix to be multiplied by CTO/CFROM.  See TYPE for the
+*          storage type.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  INFO    (output) INTEGER
+*          0  - successful exit
+*          <0 - if INFO = -i, the i-th argument had an illegal value.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            DONE
+      INTEGER            I, ITYPE, J, K1, K2, K3, K4
+      DOUBLE PRECISION   BIGNUM, CFROM1, CFROMC, CTO1, CTOC, MUL, SMLNUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DLAMCH
+      EXTERNAL           LSAME, DLAMCH
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, MIN
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+*
+      IF( LSAME( TYPE, 'G' ) ) THEN
+         ITYPE = 0
+      ELSE IF( LSAME( TYPE, 'L' ) ) THEN
+         ITYPE = 1
+      ELSE IF( LSAME( TYPE, 'U' ) ) THEN
+         ITYPE = 2
+      ELSE IF( LSAME( TYPE, 'H' ) ) THEN
+         ITYPE = 3
+      ELSE IF( LSAME( TYPE, 'B' ) ) THEN
+         ITYPE = 4
+      ELSE IF( LSAME( TYPE, 'Q' ) ) THEN
+         ITYPE = 5
+      ELSE IF( LSAME( TYPE, 'Z' ) ) THEN
+         ITYPE = 6
+      ELSE
+         ITYPE = -1
+      END IF
+*
+      IF( ITYPE.EQ.-1 ) THEN
+         INFO = -1
+      ELSE IF( CFROM.EQ.ZERO ) THEN
+         INFO = -4
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -6
+      ELSE IF( N.LT.0 .OR. ( ITYPE.EQ.4 .AND. N.NE.M ) .OR.
+     $         ( ITYPE.EQ.5 .AND. N.NE.M ) ) THEN
+         INFO = -7
+      ELSE IF( ITYPE.LE.3 .AND. LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -9
+      ELSE IF( ITYPE.GE.4 ) THEN
+         IF( KL.LT.0 .OR. KL.GT.MAX( M-1, 0 ) ) THEN
+            INFO = -2
+         ELSE IF( KU.LT.0 .OR. KU.GT.MAX( N-1, 0 ) .OR.
+     $            ( ( ITYPE.EQ.4 .OR. ITYPE.EQ.5 ) .AND. KL.NE.KU ) )
+     $             THEN
+            INFO = -3
+         ELSE IF( ( ITYPE.EQ.4 .AND. LDA.LT.KL+1 ) .OR.
+     $            ( ITYPE.EQ.5 .AND. LDA.LT.KU+1 ) .OR.
+     $            ( ITYPE.EQ.6 .AND. LDA.LT.2*KL+KU+1 ) ) THEN
+            INFO = -9
+         END IF
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZLASCL', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. M.EQ.0 )
+     $   RETURN
+*
+*     Get machine parameters
+*
+      SMLNUM = DLAMCH( 'S' )
+      BIGNUM = ONE / SMLNUM
+*
+      CFROMC = CFROM
+      CTOC = CTO
+*
+   10 CONTINUE
+      CFROM1 = CFROMC*SMLNUM
+      CTO1 = CTOC / BIGNUM
+      IF( ABS( CFROM1 ).GT.ABS( CTOC ) .AND. CTOC.NE.ZERO ) THEN
+         MUL = SMLNUM
+         DONE = .FALSE.
+         CFROMC = CFROM1
+      ELSE IF( ABS( CTO1 ).GT.ABS( CFROMC ) ) THEN
+         MUL = BIGNUM
+         DONE = .FALSE.
+         CTOC = CTO1
+      ELSE
+         MUL = CTOC / CFROMC
+         DONE = .TRUE.
+      END IF
+*
+      IF( ITYPE.EQ.0 ) THEN
+*
+*        Full matrix
+*
+         DO 30 J = 1, N
+            DO 20 I = 1, M
+               A( I, J ) = A( I, J )*MUL
+   20       CONTINUE
+   30    CONTINUE
+*
+      ELSE IF( ITYPE.EQ.1 ) THEN
+*
+*        Lower triangular matrix
+*
+         DO 50 J = 1, N
+            DO 40 I = J, M
+               A( I, J ) = A( I, J )*MUL
+   40       CONTINUE
+   50    CONTINUE
+*
+      ELSE IF( ITYPE.EQ.2 ) THEN
+*
+*        Upper triangular matrix
+*
+         DO 70 J = 1, N
+            DO 60 I = 1, MIN( J, M )
+               A( I, J ) = A( I, J )*MUL
+   60       CONTINUE
+   70    CONTINUE
+*
+      ELSE IF( ITYPE.EQ.3 ) THEN
+*
+*        Upper Hessenberg matrix
+*
+         DO 90 J = 1, N
+            DO 80 I = 1, MIN( J+1, M )
+               A( I, J ) = A( I, J )*MUL
+   80       CONTINUE
+   90    CONTINUE
+*
+      ELSE IF( ITYPE.EQ.4 ) THEN
+*
+*        Lower half of a symmetric band matrix
+*
+         K3 = KL + 1
+         K4 = N + 1
+         DO 110 J = 1, N
+            DO 100 I = 1, MIN( K3, K4-J )
+               A( I, J ) = A( I, J )*MUL
+  100       CONTINUE
+  110    CONTINUE
+*
+      ELSE IF( ITYPE.EQ.5 ) THEN
+*
+*        Upper half of a symmetric band matrix
+*
+         K1 = KU + 2
+         K3 = KU + 1
+         DO 130 J = 1, N
+            DO 120 I = MAX( K1-J, 1 ), K3
+               A( I, J ) = A( I, J )*MUL
+  120       CONTINUE
+  130    CONTINUE
+*
+      ELSE IF( ITYPE.EQ.6 ) THEN
+*
+*        Band matrix
+*
+         K1 = KL + KU + 2
+         K2 = KL + 1
+         K3 = 2*KL + KU + 1
+         K4 = KL + KU + 1 + M
+         DO 150 J = 1, N
+            DO 140 I = MAX( K1-J, K2 ), MIN( K3, K4-J )
+               A( I, J ) = A( I, J )*MUL
+  140       CONTINUE
+  150    CONTINUE
+*
+      END IF
+*
+      IF( .NOT.DONE )
+     $   GO TO 10
+*
+      RETURN
+*
+*     End of ZLASCL
+*
+      END
diff --git a/costa/native/external/lapack/zlaset.f b/costa/native/external/lapack/zlaset.f
new file mode 100644
index 000000000..1039ac802
--- /dev/null
+++ b/costa/native/external/lapack/zlaset.f
@@ -0,0 +1,115 @@
+      SUBROUTINE ZLASET( UPLO, M, N, ALPHA, BETA, A, LDA )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            LDA, M, N
+      COMPLEX*16         ALPHA, BETA
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZLASET initializes a 2-D array A to BETA on the diagonal and
+*  ALPHA on the offdiagonals.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies the part of the matrix A to be set.
+*          = 'U':      Upper triangular part is set. The lower triangle
+*                      is unchanged.
+*          = 'L':      Lower triangular part is set. The upper triangle
+*                      is unchanged.
+*          Otherwise:  All of the matrix A is set.
+*
+*  M       (input) INTEGER
+*          On entry, M specifies the number of rows of A.
+*
+*  N       (input) INTEGER
+*          On entry, N specifies the number of columns of A.
+*
+*  ALPHA   (input) COMPLEX*16
+*          All the offdiagonal array elements are set to ALPHA.
+*
+*  BETA    (input) COMPLEX*16
+*          All the diagonal array elements are set to BETA.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+*          On entry, the m by n matrix A.
+*          On exit, A(i,j) = ALPHA, 1 <= i <= m, 1 <= j <= n, i.ne.j;
+*                   A(i,i) = BETA , 1 <= i <= min(m,n)
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      INTEGER            I, J
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MIN
+*     ..
+*     .. Executable Statements ..
+*
+      IF( LSAME( UPLO, 'U' ) ) THEN
+*
+*        Set the diagonal to BETA and the strictly upper triangular
+*        part of the array to ALPHA.
+*
+         DO 20 J = 2, N
+            DO 10 I = 1, MIN( J-1, M )
+               A( I, J ) = ALPHA
+   10       CONTINUE
+   20    CONTINUE
+         DO 30 I = 1, MIN( N, M )
+            A( I, I ) = BETA
+   30    CONTINUE
+*
+      ELSE IF( LSAME( UPLO, 'L' ) ) THEN
+*
+*        Set the diagonal to BETA and the strictly lower triangular
+*        part of the array to ALPHA.
+*
+         DO 50 J = 1, MIN( M, N )
+            DO 40 I = J + 1, M
+               A( I, J ) = ALPHA
+   40       CONTINUE
+   50    CONTINUE
+         DO 60 I = 1, MIN( N, M )
+            A( I, I ) = BETA
+   60    CONTINUE
+*
+      ELSE
+*
+*        Set the array to BETA on the diagonal and ALPHA on the
+*        offdiagonal.
+*
+         DO 80 J = 1, N
+            DO 70 I = 1, M
+               A( I, J ) = ALPHA
+   70       CONTINUE
+   80    CONTINUE
+         DO 90 I = 1, MIN( M, N )
+            A( I, I ) = BETA
+   90    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of ZLASET
+*
+      END
diff --git a/costa/native/external/lapack/zlasr.f b/costa/native/external/lapack/zlasr.f
new file mode 100644
index 000000000..1fb08d1d6
--- /dev/null
+++ b/costa/native/external/lapack/zlasr.f
@@ -0,0 +1,325 @@
+      SUBROUTINE ZLASR( SIDE, PIVOT, DIRECT, M, N, C, S, A, LDA )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIRECT, PIVOT, SIDE
+      INTEGER            LDA, M, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   C( * ), S( * )
+      COMPLEX*16         A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZLASR   performs the transformation
+*
+*     A := P*A,   when SIDE = 'L' or 'l'  (  Left-hand side )
+*
+*     A := A*P',  when SIDE = 'R' or 'r'  ( Right-hand side )
+*
+*  where A is an m by n complex matrix and P is an orthogonal matrix,
+*  consisting of a sequence of plane rotations determined by the
+*  parameters PIVOT and DIRECT as follows ( z = m when SIDE = 'L' or 'l'
+*  and z = n when SIDE = 'R' or 'r' ):
+*
+*  When  DIRECT = 'F' or 'f'  ( Forward sequence ) then
+*
+*     P = P( z - 1 )*...*P( 2 )*P( 1 ),
+*
+*  and when DIRECT = 'B' or 'b'  ( Backward sequence ) then
+*
+*     P = P( 1 )*P( 2 )*...*P( z - 1 ),
+*
+*  where  P( k ) is a plane rotation matrix for the following planes:
+*
+*     when  PIVOT = 'V' or 'v'  ( Variable pivot ),
+*        the plane ( k, k + 1 )
+*
+*     when  PIVOT = 'T' or 't'  ( Top pivot ),
+*        the plane ( 1, k + 1 )
+*
+*     when  PIVOT = 'B' or 'b'  ( Bottom pivot ),
+*        the plane ( k, z )
+*
+*  c( k ) and s( k )  must contain the  cosine and sine that define the
+*  matrix  P( k ).  The two by two plane rotation part of the matrix
+*  P( k ), R( k ), is assumed to be of the form
+*
+*     R( k ) = (  c( k )  s( k ) ).
+*              ( -s( k )  c( k ) )
+*
+*  Arguments
+*  =========
+*
+*  SIDE    (input) CHARACTER*1
+*          Specifies whether the plane rotation matrix P is applied to
+*          A on the left or the right.
+*          = 'L':  Left, compute A := P*A
+*          = 'R':  Right, compute A:= A*P'
+*
+*  DIRECT  (input) CHARACTER*1
+*          Specifies whether P is a forward or backward sequence of
+*          plane rotations.
+*          = 'F':  Forward, P = P( z - 1 )*...*P( 2 )*P( 1 )
+*          = 'B':  Backward, P = P( 1 )*P( 2 )*...*P( z - 1 )
+*
+*  PIVOT   (input) CHARACTER*1
+*          Specifies the plane for which P(k) is a plane rotation
+*          matrix.
+*          = 'V':  Variable pivot, the plane (k,k+1)
+*          = 'T':  Top pivot, the plane (1,k+1)
+*          = 'B':  Bottom pivot, the plane (k,z)
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  If m <= 1, an immediate
+*          return is effected.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  If n <= 1, an
+*          immediate return is effected.
+*
+*  C, S    (input) DOUBLE PRECISION arrays, dimension
+*                  (M-1) if SIDE = 'L'
+*                  (N-1) if SIDE = 'R'
+*          c(k) and s(k) contain the cosine and sine that define the
+*          matrix P(k).  The two by two plane rotation part of the
+*          matrix P(k), R(k), is assumed to be of the form
+*          R( k ) = (  c( k )  s( k ) ).
+*                   ( -s( k )  c( k ) )
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+*          The m by n matrix A.  On exit, A is overwritten by P*A if
+*          SIDE = 'R' or by A*P' if SIDE = 'L'.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, INFO, J
+      DOUBLE PRECISION   CTEMP, STEMP
+      COMPLEX*16         TEMP
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters
+*
+      INFO = 0
+      IF( .NOT.( LSAME( SIDE, 'L' ) .OR. LSAME( SIDE, 'R' ) ) ) THEN
+         INFO = 1
+      ELSE IF( .NOT.( LSAME( PIVOT, 'V' ) .OR. LSAME( PIVOT,
+     $         'T' ) .OR. LSAME( PIVOT, 'B' ) ) ) THEN
+         INFO = 2
+      ELSE IF( .NOT.( LSAME( DIRECT, 'F' ) .OR. LSAME( DIRECT, 'B' ) ) )
+     $          THEN
+         INFO = 3
+      ELSE IF( M.LT.0 ) THEN
+         INFO = 4
+      ELSE IF( N.LT.0 ) THEN
+         INFO = 5
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = 9
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZLASR ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( ( M.EQ.0 ) .OR. ( N.EQ.0 ) )
+     $   RETURN
+      IF( LSAME( SIDE, 'L' ) ) THEN
+*
+*        Form  P * A
+*
+         IF( LSAME( PIVOT, 'V' ) ) THEN
+            IF( LSAME( DIRECT, 'F' ) ) THEN
+               DO 20 J = 1, M - 1
+                  CTEMP = C( J )
+                  STEMP = S( J )
+                  IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN
+                     DO 10 I = 1, N
+                        TEMP = A( J+1, I )
+                        A( J+1, I ) = CTEMP*TEMP - STEMP*A( J, I )
+                        A( J, I ) = STEMP*TEMP + CTEMP*A( J, I )
+   10                CONTINUE
+                  END IF
+   20          CONTINUE
+            ELSE IF( LSAME( DIRECT, 'B' ) ) THEN
+               DO 40 J = M - 1, 1, -1
+                  CTEMP = C( J )
+                  STEMP = S( J )
+                  IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN
+                     DO 30 I = 1, N
+                        TEMP = A( J+1, I )
+                        A( J+1, I ) = CTEMP*TEMP - STEMP*A( J, I )
+                        A( J, I ) = STEMP*TEMP + CTEMP*A( J, I )
+   30                CONTINUE
+                  END IF
+   40          CONTINUE
+            END IF
+         ELSE IF( LSAME( PIVOT, 'T' ) ) THEN
+            IF( LSAME( DIRECT, 'F' ) ) THEN
+               DO 60 J = 2, M
+                  CTEMP = C( J-1 )
+                  STEMP = S( J-1 )
+                  IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN
+                     DO 50 I = 1, N
+                        TEMP = A( J, I )
+                        A( J, I ) = CTEMP*TEMP - STEMP*A( 1, I )
+                        A( 1, I ) = STEMP*TEMP + CTEMP*A( 1, I )
+   50                CONTINUE
+                  END IF
+   60          CONTINUE
+            ELSE IF( LSAME( DIRECT, 'B' ) ) THEN
+               DO 80 J = M, 2, -1
+                  CTEMP = C( J-1 )
+                  STEMP = S( J-1 )
+                  IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN
+                     DO 70 I = 1, N
+                        TEMP = A( J, I )
+                        A( J, I ) = CTEMP*TEMP - STEMP*A( 1, I )
+                        A( 1, I ) = STEMP*TEMP + CTEMP*A( 1, I )
+   70                CONTINUE
+                  END IF
+   80          CONTINUE
+            END IF
+         ELSE IF( LSAME( PIVOT, 'B' ) ) THEN
+            IF( LSAME( DIRECT, 'F' ) ) THEN
+               DO 100 J = 1, M - 1
+                  CTEMP = C( J )
+                  STEMP = S( J )
+                  IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN
+                     DO 90 I = 1, N
+                        TEMP = A( J, I )
+                        A( J, I ) = STEMP*A( M, I ) + CTEMP*TEMP
+                        A( M, I ) = CTEMP*A( M, I ) - STEMP*TEMP
+   90                CONTINUE
+                  END IF
+  100          CONTINUE
+            ELSE IF( LSAME( DIRECT, 'B' ) ) THEN
+               DO 120 J = M - 1, 1, -1
+                  CTEMP = C( J )
+                  STEMP = S( J )
+                  IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN
+                     DO 110 I = 1, N
+                        TEMP = A( J, I )
+                        A( J, I ) = STEMP*A( M, I ) + CTEMP*TEMP
+                        A( M, I ) = CTEMP*A( M, I ) - STEMP*TEMP
+  110                CONTINUE
+                  END IF
+  120          CONTINUE
+            END IF
+         END IF
+      ELSE IF( LSAME( SIDE, 'R' ) ) THEN
+*
+*        Form A * P'
+*
+         IF( LSAME( PIVOT, 'V' ) ) THEN
+            IF( LSAME( DIRECT, 'F' ) ) THEN
+               DO 140 J = 1, N - 1
+                  CTEMP = C( J )
+                  STEMP = S( J )
+                  IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN
+                     DO 130 I = 1, M
+                        TEMP = A( I, J+1 )
+                        A( I, J+1 ) = CTEMP*TEMP - STEMP*A( I, J )
+                        A( I, J ) = STEMP*TEMP + CTEMP*A( I, J )
+  130                CONTINUE
+                  END IF
+  140          CONTINUE
+            ELSE IF( LSAME( DIRECT, 'B' ) ) THEN
+               DO 160 J = N - 1, 1, -1
+                  CTEMP = C( J )
+                  STEMP = S( J )
+                  IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN
+                     DO 150 I = 1, M
+                        TEMP = A( I, J+1 )
+                        A( I, J+1 ) = CTEMP*TEMP - STEMP*A( I, J )
+                        A( I, J ) = STEMP*TEMP + CTEMP*A( I, J )
+  150                CONTINUE
+                  END IF
+  160          CONTINUE
+            END IF
+         ELSE IF( LSAME( PIVOT, 'T' ) ) THEN
+            IF( LSAME( DIRECT, 'F' ) ) THEN
+               DO 180 J = 2, N
+                  CTEMP = C( J-1 )
+                  STEMP = S( J-1 )
+                  IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN
+                     DO 170 I = 1, M
+                        TEMP = A( I, J )
+                        A( I, J ) = CTEMP*TEMP - STEMP*A( I, 1 )
+                        A( I, 1 ) = STEMP*TEMP + CTEMP*A( I, 1 )
+  170                CONTINUE
+                  END IF
+  180          CONTINUE
+            ELSE IF( LSAME( DIRECT, 'B' ) ) THEN
+               DO 200 J = N, 2, -1
+                  CTEMP = C( J-1 )
+                  STEMP = S( J-1 )
+                  IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN
+                     DO 190 I = 1, M
+                        TEMP = A( I, J )
+                        A( I, J ) = CTEMP*TEMP - STEMP*A( I, 1 )
+                        A( I, 1 ) = STEMP*TEMP + CTEMP*A( I, 1 )
+  190                CONTINUE
+                  END IF
+  200          CONTINUE
+            END IF
+         ELSE IF( LSAME( PIVOT, 'B' ) ) THEN
+            IF( LSAME( DIRECT, 'F' ) ) THEN
+               DO 220 J = 1, N - 1
+                  CTEMP = C( J )
+                  STEMP = S( J )
+                  IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN
+                     DO 210 I = 1, M
+                        TEMP = A( I, J )
+                        A( I, J ) = STEMP*A( I, N ) + CTEMP*TEMP
+                        A( I, N ) = CTEMP*A( I, N ) - STEMP*TEMP
+  210                CONTINUE
+                  END IF
+  220          CONTINUE
+            ELSE IF( LSAME( DIRECT, 'B' ) ) THEN
+               DO 240 J = N - 1, 1, -1
+                  CTEMP = C( J )
+                  STEMP = S( J )
+                  IF( ( CTEMP.NE.ONE ) .OR. ( STEMP.NE.ZERO ) ) THEN
+                     DO 230 I = 1, M
+                        TEMP = A( I, J )
+                        A( I, J ) = STEMP*A( I, N ) + CTEMP*TEMP
+                        A( I, N ) = CTEMP*A( I, N ) - STEMP*TEMP
+  230                CONTINUE
+                  END IF
+  240          CONTINUE
+            END IF
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of ZLASR
+*
+      END
diff --git a/costa/native/external/lapack/zlassq.f b/costa/native/external/lapack/zlassq.f
new file mode 100644
index 000000000..541c207ca
--- /dev/null
+++ b/costa/native/external/lapack/zlassq.f
@@ -0,0 +1,102 @@
+      SUBROUTINE ZLASSQ( N, X, INCX, SCALE, SUMSQ )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INCX, N
+      DOUBLE PRECISION   SCALE, SUMSQ
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZLASSQ returns the values scl and ssq such that
+*
+*     ( scl**2 )*ssq = x( 1 )**2 +...+ x( n )**2 + ( scale**2 )*sumsq,
+*
+*  where x( i ) = abs( X( 1 + ( i - 1 )*INCX ) ). The value of sumsq is
+*  assumed to be at least unity and the value of ssq will then satisfy
+*
+*     1.0 .le. ssq .le. ( sumsq + 2*n ).
+*
+*  scale is assumed to be non-negative and scl returns the value
+*
+*     scl = max( scale, abs( real( x( i ) ) ), abs( aimag( x( i ) ) ) ),
+*            i
+*
+*  scale and sumsq must be supplied in SCALE and SUMSQ respectively.
+*  SCALE and SUMSQ are overwritten by scl and ssq respectively.
+*
+*  The routine makes only one pass through the vector X.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The number of elements to be used from the vector X.
+*
+*  X       (input) COMPLEX*16 array, dimension (N)
+*          The vector x as described above.
+*             x( i )  = X( 1 + ( i - 1 )*INCX ), 1 <= i <= n.
+*
+*  INCX    (input) INTEGER
+*          The increment between successive values of the vector X.
+*          INCX > 0.
+*
+*  SCALE   (input/output) DOUBLE PRECISION
+*          On entry, the value  scale  in the equation above.
+*          On exit, SCALE is overwritten with the value  scl .
+*
+*  SUMSQ   (input/output) DOUBLE PRECISION
+*          On entry, the value  sumsq  in the equation above.
+*          On exit, SUMSQ is overwritten with the value  ssq .
+*
+* =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO
+      PARAMETER          ( ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            IX
+      DOUBLE PRECISION   TEMP1
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, DIMAG
+*     ..
+*     .. Executable Statements ..
+*
+      IF( N.GT.0 ) THEN
+         DO 10 IX = 1, 1 + ( N-1 )*INCX, INCX
+            IF( DBLE( X( IX ) ).NE.ZERO ) THEN
+               TEMP1 = ABS( DBLE( X( IX ) ) )
+               IF( SCALE.LT.TEMP1 ) THEN
+                  SUMSQ = 1 + SUMSQ*( SCALE / TEMP1 )**2
+                  SCALE = TEMP1
+               ELSE
+                  SUMSQ = SUMSQ + ( TEMP1 / SCALE )**2
+               END IF
+            END IF
+            IF( DIMAG( X( IX ) ).NE.ZERO ) THEN
+               TEMP1 = ABS( DIMAG( X( IX ) ) )
+               IF( SCALE.LT.TEMP1 ) THEN
+                  SUMSQ = 1 + SUMSQ*( SCALE / TEMP1 )**2
+                  SCALE = TEMP1
+               ELSE
+                  SUMSQ = SUMSQ + ( TEMP1 / SCALE )**2
+               END IF
+            END IF
+   10    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of ZLASSQ
+*
+      END
diff --git a/costa/native/external/lapack/zlaswp.f b/costa/native/external/lapack/zlaswp.f
new file mode 100644
index 000000000..0749250bc
--- /dev/null
+++ b/costa/native/external/lapack/zlaswp.f
@@ -0,0 +1,120 @@
+      SUBROUTINE ZLASWP( N, A, LDA, K1, K2, IPIV, INCX )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INCX, K1, K2, LDA, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      COMPLEX*16         A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZLASWP performs a series of row interchanges on the matrix A.
+*  One row interchange is initiated for each of rows K1 through K2 of A.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+*          On entry, the matrix of column dimension N to which the row
+*          interchanges will be applied.
+*          On exit, the permuted matrix.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.
+*
+*  K1      (input) INTEGER
+*          The first element of IPIV for which a row interchange will
+*          be done.
+*
+*  K2      (input) INTEGER
+*          The last element of IPIV for which a row interchange will
+*          be done.
+*
+*  IPIV    (input) INTEGER array, dimension (M*abs(INCX))
+*          The vector of pivot indices.  Only the elements in positions
+*          K1 through K2 of IPIV are accessed.
+*          IPIV(K) = L implies rows K and L are to be interchanged.
+*
+*  INCX    (input) INTEGER
+*          The increment between successive values of IPIV.  If IPIV
+*          is negative, the pivots are applied in reverse order.
+*
+*  Further Details
+*  ===============
+*
+*  Modified by
+*   R. C. Whaley, Computer Science Dept., Univ. of Tenn., Knoxville, USA
+*
+* =====================================================================
+*
+*     .. Local Scalars ..
+      INTEGER            I, I1, I2, INC, IP, IX, IX0, J, K, N32
+      COMPLEX*16         TEMP
+*     ..
+*     .. Executable Statements ..
+*
+*     Interchange row I with row IPIV(I) for each of rows K1 through K2.
+*
+      IF( INCX.GT.0 ) THEN
+         IX0 = K1
+         I1 = K1
+         I2 = K2
+         INC = 1
+      ELSE IF( INCX.LT.0 ) THEN
+         IX0 = 1 + ( 1-K2 )*INCX
+         I1 = K2
+         I2 = K1
+         INC = -1
+      ELSE
+         RETURN
+      END IF
+*
+      N32 = ( N / 32 )*32
+      IF( N32.NE.0 ) THEN
+         DO 30 J = 1, N32, 32
+            IX = IX0
+            DO 20 I = I1, I2, INC
+               IP = IPIV( IX )
+               IF( IP.NE.I ) THEN
+                  DO 10 K = J, J + 31
+                     TEMP = A( I, K )
+                     A( I, K ) = A( IP, K )
+                     A( IP, K ) = TEMP
+   10             CONTINUE
+               END IF
+               IX = IX + INCX
+   20       CONTINUE
+   30    CONTINUE
+      END IF
+      IF( N32.NE.N ) THEN
+         N32 = N32 + 1
+         IX = IX0
+         DO 50 I = I1, I2, INC
+            IP = IPIV( IX )
+            IF( IP.NE.I ) THEN
+               DO 40 K = N32, N
+                  TEMP = A( I, K )
+                  A( I, K ) = A( IP, K )
+                  A( IP, K ) = TEMP
+   40          CONTINUE
+            END IF
+            IX = IX + INCX
+   50    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of ZLASWP
+*
+      END
diff --git a/costa/native/external/lapack/zlasyf.f b/costa/native/external/lapack/zlasyf.f
new file mode 100644
index 000000000..a7f89e74e
--- /dev/null
+++ b/costa/native/external/lapack/zlasyf.f
@@ -0,0 +1,598 @@
+      SUBROUTINE ZLASYF( UPLO, N, NB, KB, A, LDA, IPIV, W, LDW, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, KB, LDA, LDW, N, NB
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      COMPLEX*16         A( LDA, * ), W( LDW, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZLASYF computes a partial factorization of a complex symmetric matrix
+*  A using the Bunch-Kaufman diagonal pivoting method. The partial
+*  factorization has the form:
+*
+*  A  =  ( I  U12 ) ( A11  0  ) (  I    0   )  if UPLO = 'U', or:
+*        ( 0  U22 ) (  0   D  ) ( U12' U22' )
+*
+*  A  =  ( L11  0 ) ( D    0  ) ( L11' L21' )  if UPLO = 'L'
+*        ( L21  I ) ( 0   A22 ) (  0    I   )
+*
+*  where the order of D is at most NB. The actual order is returned in
+*  the argument KB, and is either NB or NB-1, or N if N <= NB.
+*  Note that U' denotes the transpose of U.
+*
+*  ZLASYF is an auxiliary routine called by ZSYTRF. It uses blocked code
+*  (calling Level 3 BLAS) to update the submatrix A11 (if UPLO = 'U') or
+*  A22 (if UPLO = 'L').
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the upper or lower triangular part of the
+*          symmetric matrix A is stored:
+*          = 'U':  Upper triangular
+*          = 'L':  Lower triangular
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NB      (input) INTEGER
+*          The maximum number of columns of the matrix A that should be
+*          factored.  NB should be at least 2 to allow for 2-by-2 pivot
+*          blocks.
+*
+*  KB      (output) INTEGER
+*          The number of columns of A that were actually factored.
+*          KB is either NB-1 or NB, or N if N <= NB.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+*          On entry, the symmetric matrix A.  If UPLO = 'U', the leading
+*          n-by-n upper triangular part of A contains the upper
+*          triangular part of the matrix A, and the strictly lower
+*          triangular part of A is not referenced.  If UPLO = 'L', the
+*          leading n-by-n lower triangular part of A contains the lower
+*          triangular part of the matrix A, and the strictly upper
+*          triangular part of A is not referenced.
+*          On exit, A contains details of the partial factorization.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  IPIV    (output) INTEGER array, dimension (N)
+*          Details of the interchanges and the block structure of D.
+*          If UPLO = 'U', only the last KB elements of IPIV are set;
+*          if UPLO = 'L', only the first KB elements are set.
+*
+*          If IPIV(k) > 0, then rows and columns k and IPIV(k) were
+*          interchanged and D(k,k) is a 1-by-1 diagonal block.
+*          If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and
+*          columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k)
+*          is a 2-by-2 diagonal block.  If UPLO = 'L' and IPIV(k) =
+*          IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were
+*          interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block.
+*
+*  W       (workspace) COMPLEX*16 array, dimension (LDW,NB)
+*
+*  LDW     (input) INTEGER
+*          The leading dimension of the array W.  LDW >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          > 0: if INFO = k, D(k,k) is exactly zero.  The factorization
+*               has been completed, but the block diagonal matrix D is
+*               exactly singular.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+      DOUBLE PRECISION   EIGHT, SEVTEN
+      PARAMETER          ( EIGHT = 8.0D+0, SEVTEN = 17.0D+0 )
+      COMPLEX*16         CONE
+      PARAMETER          ( CONE = ( 1.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            IMAX, J, JB, JJ, JMAX, JP, K, KK, KKW, KP,
+     $                   KSTEP, KW
+      DOUBLE PRECISION   ABSAKK, ALPHA, COLMAX, ROWMAX
+      COMPLEX*16         D11, D21, D22, R1, T, Z
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            IZAMAX
+      EXTERNAL           LSAME, IZAMAX
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           ZCOPY, ZGEMM, ZGEMV, ZSCAL, ZSWAP
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, DIMAG, MAX, MIN, SQRT
+*     ..
+*     .. Statement Functions ..
+      DOUBLE PRECISION   CABS1
+*     ..
+*     .. Statement Function definitions ..
+      CABS1( Z ) = ABS( DBLE( Z ) ) + ABS( DIMAG( Z ) )
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+*
+*     Initialize ALPHA for use in choosing pivot block size.
+*
+      ALPHA = ( ONE+SQRT( SEVTEN ) ) / EIGHT
+*
+      IF( LSAME( UPLO, 'U' ) ) THEN
+*
+*        Factorize the trailing columns of A using the upper triangle
+*        of A and working backwards, and compute the matrix W = U12*D
+*        for use in updating A11
+*
+*        K is the main loop index, decreasing from N in steps of 1 or 2
+*
+*        KW is the column of W which corresponds to column K of A
+*
+         K = N
+   10    CONTINUE
+         KW = NB + K - N
+*
+*        Exit from loop
+*
+         IF( ( K.LE.N-NB+1 .AND. NB.LT.N ) .OR. K.LT.1 )
+     $      GO TO 30
+*
+*        Copy column K of A to column KW of W and update it
+*
+         CALL ZCOPY( K, A( 1, K ), 1, W( 1, KW ), 1 )
+         IF( K.LT.N )
+     $      CALL ZGEMV( 'No transpose', K, N-K, -CONE, A( 1, K+1 ), LDA,
+     $                  W( K, KW+1 ), LDW, CONE, W( 1, KW ), 1 )
+*
+         KSTEP = 1
+*
+*        Determine rows and columns to be interchanged and whether
+*        a 1-by-1 or 2-by-2 pivot block will be used
+*
+         ABSAKK = CABS1( W( K, KW ) )
+*
+*        IMAX is the row-index of the largest off-diagonal element in
+*        column K, and COLMAX is its absolute value
+*
+         IF( K.GT.1 ) THEN
+            IMAX = IZAMAX( K-1, W( 1, KW ), 1 )
+            COLMAX = CABS1( W( IMAX, KW ) )
+         ELSE
+            COLMAX = ZERO
+         END IF
+*
+         IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN
+*
+*           Column K is zero: set INFO and continue
+*
+            IF( INFO.EQ.0 )
+     $         INFO = K
+            KP = K
+         ELSE
+            IF( ABSAKK.GE.ALPHA*COLMAX ) THEN
+*
+*              no interchange, use 1-by-1 pivot block
+*
+               KP = K
+            ELSE
+*
+*              Copy column IMAX to column KW-1 of W and update it
+*
+               CALL ZCOPY( IMAX, A( 1, IMAX ), 1, W( 1, KW-1 ), 1 )
+               CALL ZCOPY( K-IMAX, A( IMAX, IMAX+1 ), LDA,
+     $                     W( IMAX+1, KW-1 ), 1 )
+               IF( K.LT.N )
+     $            CALL ZGEMV( 'No transpose', K, N-K, -CONE,
+     $                        A( 1, K+1 ), LDA, W( IMAX, KW+1 ), LDW,
+     $                        CONE, W( 1, KW-1 ), 1 )
+*
+*              JMAX is the column-index of the largest off-diagonal
+*              element in row IMAX, and ROWMAX is its absolute value
+*
+               JMAX = IMAX + IZAMAX( K-IMAX, W( IMAX+1, KW-1 ), 1 )
+               ROWMAX = CABS1( W( JMAX, KW-1 ) )
+               IF( IMAX.GT.1 ) THEN
+                  JMAX = IZAMAX( IMAX-1, W( 1, KW-1 ), 1 )
+                  ROWMAX = MAX( ROWMAX, CABS1( W( JMAX, KW-1 ) ) )
+               END IF
+*
+               IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN
+*
+*                 no interchange, use 1-by-1 pivot block
+*
+                  KP = K
+               ELSE IF( CABS1( W( IMAX, KW-1 ) ).GE.ALPHA*ROWMAX ) THEN
+*
+*                 interchange rows and columns K and IMAX, use 1-by-1
+*                 pivot block
+*
+                  KP = IMAX
+*
+*                 copy column KW-1 of W to column KW
+*
+                  CALL ZCOPY( K, W( 1, KW-1 ), 1, W( 1, KW ), 1 )
+               ELSE
+*
+*                 interchange rows and columns K-1 and IMAX, use 2-by-2
+*                 pivot block
+*
+                  KP = IMAX
+                  KSTEP = 2
+               END IF
+            END IF
+*
+            KK = K - KSTEP + 1
+            KKW = NB + KK - N
+*
+*           Updated column KP is already stored in column KKW of W
+*
+            IF( KP.NE.KK ) THEN
+*
+*              Copy non-updated column KK to column KP
+*
+               A( KP, K ) = A( KK, K )
+               CALL ZCOPY( K-1-KP, A( KP+1, KK ), 1, A( KP, KP+1 ),
+     $                     LDA )
+               CALL ZCOPY( KP, A( 1, KK ), 1, A( 1, KP ), 1 )
+*
+*              Interchange rows KK and KP in last KK columns of A and W
+*
+               CALL ZSWAP( N-KK+1, A( KK, KK ), LDA, A( KP, KK ), LDA )
+               CALL ZSWAP( N-KK+1, W( KK, KKW ), LDW, W( KP, KKW ),
+     $                     LDW )
+            END IF
+*
+            IF( KSTEP.EQ.1 ) THEN
+*
+*              1-by-1 pivot block D(k): column KW of W now holds
+*
+*              W(k) = U(k)*D(k)
+*
+*              where U(k) is the k-th column of U
+*
+*              Store U(k) in column k of A
+*
+               CALL ZCOPY( K, W( 1, KW ), 1, A( 1, K ), 1 )
+               R1 = CONE / A( K, K )
+               CALL ZSCAL( K-1, R1, A( 1, K ), 1 )
+            ELSE
+*
+*              2-by-2 pivot block D(k): columns KW and KW-1 of W now
+*              hold
+*
+*              ( W(k-1) W(k) ) = ( U(k-1) U(k) )*D(k)
+*
+*              where U(k) and U(k-1) are the k-th and (k-1)-th columns
+*              of U
+*
+               IF( K.GT.2 ) THEN
+*
+*                 Store U(k) and U(k-1) in columns k and k-1 of A
+*
+                  D21 = W( K-1, KW )
+                  D11 = W( K, KW ) / D21
+                  D22 = W( K-1, KW-1 ) / D21
+                  T = CONE / ( D11*D22-CONE )
+                  D21 = T / D21
+                  DO 20 J = 1, K - 2
+                     A( J, K-1 ) = D21*( D11*W( J, KW-1 )-W( J, KW ) )
+                     A( J, K ) = D21*( D22*W( J, KW )-W( J, KW-1 ) )
+   20             CONTINUE
+               END IF
+*
+*              Copy D(k) to A
+*
+               A( K-1, K-1 ) = W( K-1, KW-1 )
+               A( K-1, K ) = W( K-1, KW )
+               A( K, K ) = W( K, KW )
+            END IF
+         END IF
+*
+*        Store details of the interchanges in IPIV
+*
+         IF( KSTEP.EQ.1 ) THEN
+            IPIV( K ) = KP
+         ELSE
+            IPIV( K ) = -KP
+            IPIV( K-1 ) = -KP
+         END IF
+*
+*        Decrease K and return to the start of the main loop
+*
+         K = K - KSTEP
+         GO TO 10
+*
+   30    CONTINUE
+*
+*        Update the upper triangle of A11 (= A(1:k,1:k)) as
+*
+*        A11 := A11 - U12*D*U12' = A11 - U12*W'
+*
+*        computing blocks of NB columns at a time
+*
+         DO 50 J = ( ( K-1 ) / NB )*NB + 1, 1, -NB
+            JB = MIN( NB, K-J+1 )
+*
+*           Update the upper triangle of the diagonal block
+*
+            DO 40 JJ = J, J + JB - 1
+               CALL ZGEMV( 'No transpose', JJ-J+1, N-K, -CONE,
+     $                     A( J, K+1 ), LDA, W( JJ, KW+1 ), LDW, CONE,
+     $                     A( J, JJ ), 1 )
+   40       CONTINUE
+*
+*           Update the rectangular superdiagonal block
+*
+            CALL ZGEMM( 'No transpose', 'Transpose', J-1, JB, N-K,
+     $                  -CONE, A( 1, K+1 ), LDA, W( J, KW+1 ), LDW,
+     $                  CONE, A( 1, J ), LDA )
+   50    CONTINUE
+*
+*        Put U12 in standard form by partially undoing the interchanges
+*        in columns k+1:n
+*
+         J = K + 1
+   60    CONTINUE
+         JJ = J
+         JP = IPIV( J )
+         IF( JP.LT.0 ) THEN
+            JP = -JP
+            J = J + 1
+         END IF
+         J = J + 1
+         IF( JP.NE.JJ .AND. J.LE.N )
+     $      CALL ZSWAP( N-J+1, A( JP, J ), LDA, A( JJ, J ), LDA )
+         IF( J.LE.N )
+     $      GO TO 60
+*
+*        Set KB to the number of columns factorized
+*
+         KB = N - K
+*
+      ELSE
+*
+*        Factorize the leading columns of A using the lower triangle
+*        of A and working forwards, and compute the matrix W = L21*D
+*        for use in updating A22
+*
+*        K is the main loop index, increasing from 1 in steps of 1 or 2
+*
+         K = 1
+   70    CONTINUE
+*
+*        Exit from loop
+*
+         IF( ( K.GE.NB .AND. NB.LT.N ) .OR. K.GT.N )
+     $      GO TO 90
+*
+*        Copy column K of A to column K of W and update it
+*
+         CALL ZCOPY( N-K+1, A( K, K ), 1, W( K, K ), 1 )
+         CALL ZGEMV( 'No transpose', N-K+1, K-1, -CONE, A( K, 1 ), LDA,
+     $               W( K, 1 ), LDW, CONE, W( K, K ), 1 )
+*
+         KSTEP = 1
+*
+*        Determine rows and columns to be interchanged and whether
+*        a 1-by-1 or 2-by-2 pivot block will be used
+*
+         ABSAKK = CABS1( W( K, K ) )
+*
+*        IMAX is the row-index of the largest off-diagonal element in
+*        column K, and COLMAX is its absolute value
+*
+         IF( K.LT.N ) THEN
+            IMAX = K + IZAMAX( N-K, W( K+1, K ), 1 )
+            COLMAX = CABS1( W( IMAX, K ) )
+         ELSE
+            COLMAX = ZERO
+         END IF
+*
+         IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN
+*
+*           Column K is zero: set INFO and continue
+*
+            IF( INFO.EQ.0 )
+     $         INFO = K
+            KP = K
+         ELSE
+            IF( ABSAKK.GE.ALPHA*COLMAX ) THEN
+*
+*              no interchange, use 1-by-1 pivot block
+*
+               KP = K
+            ELSE
+*
+*              Copy column IMAX to column K+1 of W and update it
+*
+               CALL ZCOPY( IMAX-K, A( IMAX, K ), LDA, W( K, K+1 ), 1 )
+               CALL ZCOPY( N-IMAX+1, A( IMAX, IMAX ), 1, W( IMAX, K+1 ),
+     $                     1 )
+               CALL ZGEMV( 'No transpose', N-K+1, K-1, -CONE, A( K, 1 ),
+     $                     LDA, W( IMAX, 1 ), LDW, CONE, W( K, K+1 ),
+     $                     1 )
+*
+*              JMAX is the column-index of the largest off-diagonal
+*              element in row IMAX, and ROWMAX is its absolute value
+*
+               JMAX = K - 1 + IZAMAX( IMAX-K, W( K, K+1 ), 1 )
+               ROWMAX = CABS1( W( JMAX, K+1 ) )
+               IF( IMAX.LT.N ) THEN
+                  JMAX = IMAX + IZAMAX( N-IMAX, W( IMAX+1, K+1 ), 1 )
+                  ROWMAX = MAX( ROWMAX, CABS1( W( JMAX, K+1 ) ) )
+               END IF
+*
+               IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN
+*
+*                 no interchange, use 1-by-1 pivot block
+*
+                  KP = K
+               ELSE IF( CABS1( W( IMAX, K+1 ) ).GE.ALPHA*ROWMAX ) THEN
+*
+*                 interchange rows and columns K and IMAX, use 1-by-1
+*                 pivot block
+*
+                  KP = IMAX
+*
+*                 copy column K+1 of W to column K
+*
+                  CALL ZCOPY( N-K+1, W( K, K+1 ), 1, W( K, K ), 1 )
+               ELSE
+*
+*                 interchange rows and columns K+1 and IMAX, use 2-by-2
+*                 pivot block
+*
+                  KP = IMAX
+                  KSTEP = 2
+               END IF
+            END IF
+*
+            KK = K + KSTEP - 1
+*
+*           Updated column KP is already stored in column KK of W
+*
+            IF( KP.NE.KK ) THEN
+*
+*              Copy non-updated column KK to column KP
+*
+               A( KP, K ) = A( KK, K )
+               CALL ZCOPY( KP-K-1, A( K+1, KK ), 1, A( KP, K+1 ), LDA )
+               CALL ZCOPY( N-KP+1, A( KP, KK ), 1, A( KP, KP ), 1 )
+*
+*              Interchange rows KK and KP in first KK columns of A and W
+*
+               CALL ZSWAP( KK, A( KK, 1 ), LDA, A( KP, 1 ), LDA )
+               CALL ZSWAP( KK, W( KK, 1 ), LDW, W( KP, 1 ), LDW )
+            END IF
+*
+            IF( KSTEP.EQ.1 ) THEN
+*
+*              1-by-1 pivot block D(k): column k of W now holds
+*
+*              W(k) = L(k)*D(k)
+*
+*              where L(k) is the k-th column of L
+*
+*              Store L(k) in column k of A
+*
+               CALL ZCOPY( N-K+1, W( K, K ), 1, A( K, K ), 1 )
+               IF( K.LT.N ) THEN
+                  R1 = CONE / A( K, K )
+                  CALL ZSCAL( N-K, R1, A( K+1, K ), 1 )
+               END IF
+            ELSE
+*
+*              2-by-2 pivot block D(k): columns k and k+1 of W now hold
+*
+*              ( W(k) W(k+1) ) = ( L(k) L(k+1) )*D(k)
+*
+*              where L(k) and L(k+1) are the k-th and (k+1)-th columns
+*              of L
+*
+               IF( K.LT.N-1 ) THEN
+*
+*                 Store L(k) and L(k+1) in columns k and k+1 of A
+*
+                  D21 = W( K+1, K )
+                  D11 = W( K+1, K+1 ) / D21
+                  D22 = W( K, K ) / D21
+                  T = CONE / ( D11*D22-CONE )
+                  D21 = T / D21
+                  DO 80 J = K + 2, N
+                     A( J, K ) = D21*( D11*W( J, K )-W( J, K+1 ) )
+                     A( J, K+1 ) = D21*( D22*W( J, K+1 )-W( J, K ) )
+   80             CONTINUE
+               END IF
+*
+*              Copy D(k) to A
+*
+               A( K, K ) = W( K, K )
+               A( K+1, K ) = W( K+1, K )
+               A( K+1, K+1 ) = W( K+1, K+1 )
+            END IF
+         END IF
+*
+*        Store details of the interchanges in IPIV
+*
+         IF( KSTEP.EQ.1 ) THEN
+            IPIV( K ) = KP
+         ELSE
+            IPIV( K ) = -KP
+            IPIV( K+1 ) = -KP
+         END IF
+*
+*        Increase K and return to the start of the main loop
+*
+         K = K + KSTEP
+         GO TO 70
+*
+   90    CONTINUE
+*
+*        Update the lower triangle of A22 (= A(k:n,k:n)) as
+*
+*        A22 := A22 - L21*D*L21' = A22 - L21*W'
+*
+*        computing blocks of NB columns at a time
+*
+         DO 110 J = K, N, NB
+            JB = MIN( NB, N-J+1 )
+*
+*           Update the lower triangle of the diagonal block
+*
+            DO 100 JJ = J, J + JB - 1
+               CALL ZGEMV( 'No transpose', J+JB-JJ, K-1, -CONE,
+     $                     A( JJ, 1 ), LDA, W( JJ, 1 ), LDW, CONE,
+     $                     A( JJ, JJ ), 1 )
+  100       CONTINUE
+*
+*           Update the rectangular subdiagonal block
+*
+            IF( J+JB.LE.N )
+     $         CALL ZGEMM( 'No transpose', 'Transpose', N-J-JB+1, JB,
+     $                     K-1, -CONE, A( J+JB, 1 ), LDA, W( J, 1 ),
+     $                     LDW, CONE, A( J+JB, J ), LDA )
+  110    CONTINUE
+*
+*        Put L21 in standard form by partially undoing the interchanges
+*        in columns 1:k-1
+*
+         J = K - 1
+  120    CONTINUE
+         JJ = J
+         JP = IPIV( J )
+         IF( JP.LT.0 ) THEN
+            JP = -JP
+            J = J - 1
+         END IF
+         J = J - 1
+         IF( JP.NE.JJ .AND. J.GE.1 )
+     $      CALL ZSWAP( J, A( JP, 1 ), LDA, A( JJ, 1 ), LDA )
+         IF( J.GE.1 )
+     $      GO TO 120
+*
+*        Set KB to the number of columns factorized
+*
+         KB = K - 1
+*
+      END IF
+      RETURN
+*
+*     End of ZLASYF
+*
+      END
diff --git a/costa/native/external/lapack/zlatbs.f b/costa/native/external/lapack/zlatbs.f
new file mode 100644
index 000000000..f389f9f78
--- /dev/null
+++ b/costa/native/external/lapack/zlatbs.f
@@ -0,0 +1,909 @@
+      SUBROUTINE ZLATBS( UPLO, TRANS, DIAG, NORMIN, N, KD, AB, LDAB, X,
+     $                   SCALE, CNORM, INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIAG, NORMIN, TRANS, UPLO
+      INTEGER            INFO, KD, LDAB, N
+      DOUBLE PRECISION   SCALE
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   CNORM( * )
+      COMPLEX*16         AB( LDAB, * ), X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZLATBS solves one of the triangular systems
+*
+*     A * x = s*b,  A**T * x = s*b,  or  A**H * x = s*b,
+*
+*  with scaling to prevent overflow, where A is an upper or lower
+*  triangular band matrix.  Here A' denotes the transpose of A, x and b
+*  are n-element vectors, and s is a scaling factor, usually less than
+*  or equal to 1, chosen so that the components of x will be less than
+*  the overflow threshold.  If the unscaled problem will not cause
+*  overflow, the Level 2 BLAS routine ZTBSV is called.  If the matrix A
+*  is singular (A(j,j) = 0 for some j), then s is set to 0 and a
+*  non-trivial solution to A*x = 0 is returned.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the matrix A is upper or lower triangular.
+*          = 'U':  Upper triangular
+*          = 'L':  Lower triangular
+*
+*  TRANS   (input) CHARACTER*1
+*          Specifies the operation applied to A.
+*          = 'N':  Solve A * x = s*b     (No transpose)
+*          = 'T':  Solve A**T * x = s*b  (Transpose)
+*          = 'C':  Solve A**H * x = s*b  (Conjugate transpose)
+*
+*  DIAG    (input) CHARACTER*1
+*          Specifies whether or not the matrix A is unit triangular.
+*          = 'N':  Non-unit triangular
+*          = 'U':  Unit triangular
+*
+*  NORMIN  (input) CHARACTER*1
+*          Specifies whether CNORM has been set or not.
+*          = 'Y':  CNORM contains the column norms on entry
+*          = 'N':  CNORM is not set on entry.  On exit, the norms will
+*                  be computed and stored in CNORM.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  KD      (input) INTEGER
+*          The number of subdiagonals or superdiagonals in the
+*          triangular matrix A.  KD >= 0.
+*
+*  AB      (input) COMPLEX*16 array, dimension (LDAB,N)
+*          The upper or lower triangular band matrix A, stored in the
+*          first KD+1 rows of the array. The j-th column of A is stored
+*          in the j-th column of the array AB as follows:
+*          if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j;
+*          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+kd).
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KD+1.
+*
+*  X       (input/output) COMPLEX*16 array, dimension (N)
+*          On entry, the right hand side b of the triangular system.
+*          On exit, X is overwritten by the solution vector x.
+*
+*  SCALE   (output) DOUBLE PRECISION
+*          The scaling factor s for the triangular system
+*             A * x = s*b,  A**T * x = s*b,  or  A**H * x = s*b.
+*          If SCALE = 0, the matrix A is singular or badly scaled, and
+*          the vector x is an exact or approximate solution to A*x = 0.
+*
+*  CNORM   (input or output) DOUBLE PRECISION array, dimension (N)
+*
+*          If NORMIN = 'Y', CNORM is an input argument and CNORM(j)
+*          contains the norm of the off-diagonal part of the j-th column
+*          of A.  If TRANS = 'N', CNORM(j) must be greater than or equal
+*          to the infinity-norm, and if TRANS = 'T' or 'C', CNORM(j)
+*          must be greater than or equal to the 1-norm.
+*
+*          If NORMIN = 'N', CNORM is an output argument and CNORM(j)
+*          returns the 1-norm of the offdiagonal part of the j-th column
+*          of A.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -k, the k-th argument had an illegal value
+*
+*  Further Details
+*  ======= =======
+*
+*  A rough bound on x is computed; if that is less than overflow, ZTBSV
+*  is called, otherwise, specific code is used which checks for possible
+*  overflow or divide-by-zero at every operation.
+*
+*  A columnwise scheme is used for solving A*x = b.  The basic algorithm
+*  if A is lower triangular is
+*
+*       x[1:n] := b[1:n]
+*       for j = 1, ..., n
+*            x(j) := x(j) / A(j,j)
+*            x[j+1:n] := x[j+1:n] - x(j) * A[j+1:n,j]
+*       end
+*
+*  Define bounds on the components of x after j iterations of the loop:
+*     M(j) = bound on x[1:j]
+*     G(j) = bound on x[j+1:n]
+*  Initially, let M(0) = 0 and G(0) = max{x(i), i=1,...,n}.
+*
+*  Then for iteration j+1 we have
+*     M(j+1) <= G(j) / | A(j+1,j+1) |
+*     G(j+1) <= G(j) + M(j+1) * | A[j+2:n,j+1] |
+*            <= G(j) ( 1 + CNORM(j+1) / | A(j+1,j+1) | )
+*
+*  where CNORM(j+1) is greater than or equal to the infinity-norm of
+*  column j+1 of A, not counting the diagonal.  Hence
+*
+*     G(j) <= G(0) product ( 1 + CNORM(i) / | A(i,i) | )
+*                  1<=i<=j
+*  and
+*
+*     |x(j)| <= ( G(0) / |A(j,j)| ) product ( 1 + CNORM(i) / |A(i,i)| )
+*                                   1<=i< j
+*
+*  Since |x(j)| <= M(j), we use the Level 2 BLAS routine ZTBSV if the
+*  reciprocal of the largest M(j), j=1,..,n, is larger than
+*  max(underflow, 1/overflow).
+*
+*  The bound on x(j) is also used to determine when a step in the
+*  columnwise method can be performed without fear of overflow.  If
+*  the computed bound is greater than a large constant, x is scaled to
+*  prevent overflow, but if the bound overflows, x is set to 0, x(j) to
+*  1, and scale to 0, and a non-trivial solution to A*x = 0 is found.
+*
+*  Similarly, a row-wise scheme is used to solve A**T *x = b  or
+*  A**H *x = b.  The basic algorithm for A upper triangular is
+*
+*       for j = 1, ..., n
+*            x(j) := ( b(j) - A[1:j-1,j]' * x[1:j-1] ) / A(j,j)
+*       end
+*
+*  We simultaneously compute two bounds
+*       G(j) = bound on ( b(i) - A[1:i-1,i]' * x[1:i-1] ), 1<=i<=j
+*       M(j) = bound on x(i), 1<=i<=j
+*
+*  The initial values are G(0) = 0, M(0) = max{b(i), i=1,..,n}, and we
+*  add the constraint G(j) >= G(j-1) and M(j) >= M(j-1) for j >= 1.
+*  Then the bound on x(j) is
+*
+*       M(j) <= M(j-1) * ( 1 + CNORM(j) ) / | A(j,j) |
+*
+*            <= M(0) * product ( ( 1 + CNORM(i) ) / |A(i,i)| )
+*                      1<=i<=j
+*
+*  and we can safely call ZTBSV if 1/M(n) and 1/G(n) are both greater
+*  than max(underflow, 1/overflow).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, HALF, ONE, TWO
+      PARAMETER          ( ZERO = 0.0D+0, HALF = 0.5D+0, ONE = 1.0D+0,
+     $                   TWO = 2.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOTRAN, NOUNIT, UPPER
+      INTEGER            I, IMAX, J, JFIRST, JINC, JLAST, JLEN, MAIND
+      DOUBLE PRECISION   BIGNUM, GROW, REC, SMLNUM, TJJ, TMAX, TSCAL,
+     $                   XBND, XJ, XMAX
+      COMPLEX*16         CSUMJ, TJJS, USCAL, ZDUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            IDAMAX, IZAMAX
+      DOUBLE PRECISION   DLAMCH, DZASUM
+      COMPLEX*16         ZDOTC, ZDOTU, ZLADIV
+      EXTERNAL           LSAME, IDAMAX, IZAMAX, DLAMCH, DZASUM, ZDOTC,
+     $                   ZDOTU, ZLADIV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DSCAL, XERBLA, ZAXPY, ZDSCAL, ZTBSV
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, DCMPLX, DCONJG, DIMAG, MAX, MIN
+*     ..
+*     .. Statement Functions ..
+      DOUBLE PRECISION   CABS1, CABS2
+*     ..
+*     .. Statement Function definitions ..
+      CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) )
+      CABS2( ZDUM ) = ABS( DBLE( ZDUM ) / 2.D0 ) +
+     $                ABS( DIMAG( ZDUM ) / 2.D0 )
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      NOTRAN = LSAME( TRANS, 'N' )
+      NOUNIT = LSAME( DIAG, 'N' )
+*
+*     Test the input parameters.
+*
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT.
+     $         LSAME( TRANS, 'C' ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN
+         INFO = -3
+      ELSE IF( .NOT.LSAME( NORMIN, 'Y' ) .AND. .NOT.
+     $         LSAME( NORMIN, 'N' ) ) THEN
+         INFO = -4
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( KD.LT.0 ) THEN
+         INFO = -6
+      ELSE IF( LDAB.LT.KD+1 ) THEN
+         INFO = -8
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZLATBS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Determine machine dependent parameters to control overflow.
+*
+      SMLNUM = DLAMCH( 'Safe minimum' )
+      BIGNUM = ONE / SMLNUM
+      CALL DLABAD( SMLNUM, BIGNUM )
+      SMLNUM = SMLNUM / DLAMCH( 'Precision' )
+      BIGNUM = ONE / SMLNUM
+      SCALE = ONE
+*
+      IF( LSAME( NORMIN, 'N' ) ) THEN
+*
+*        Compute the 1-norm of each column, not including the diagonal.
+*
+         IF( UPPER ) THEN
+*
+*           A is upper triangular.
+*
+            DO 10 J = 1, N
+               JLEN = MIN( KD, J-1 )
+               CNORM( J ) = DZASUM( JLEN, AB( KD+1-JLEN, J ), 1 )
+   10       CONTINUE
+         ELSE
+*
+*           A is lower triangular.
+*
+            DO 20 J = 1, N
+               JLEN = MIN( KD, N-J )
+               IF( JLEN.GT.0 ) THEN
+                  CNORM( J ) = DZASUM( JLEN, AB( 2, J ), 1 )
+               ELSE
+                  CNORM( J ) = ZERO
+               END IF
+   20       CONTINUE
+         END IF
+      END IF
+*
+*     Scale the column norms by TSCAL if the maximum element in CNORM is
+*     greater than BIGNUM/2.
+*
+      IMAX = IDAMAX( N, CNORM, 1 )
+      TMAX = CNORM( IMAX )
+      IF( TMAX.LE.BIGNUM*HALF ) THEN
+         TSCAL = ONE
+      ELSE
+         TSCAL = HALF / ( SMLNUM*TMAX )
+         CALL DSCAL( N, TSCAL, CNORM, 1 )
+      END IF
+*
+*     Compute a bound on the computed solution vector to see if the
+*     Level 2 BLAS routine ZTBSV can be used.
+*
+      XMAX = ZERO
+      DO 30 J = 1, N
+         XMAX = MAX( XMAX, CABS2( X( J ) ) )
+   30 CONTINUE
+      XBND = XMAX
+      IF( NOTRAN ) THEN
+*
+*        Compute the growth in A * x = b.
+*
+         IF( UPPER ) THEN
+            JFIRST = N
+            JLAST = 1
+            JINC = -1
+            MAIND = KD + 1
+         ELSE
+            JFIRST = 1
+            JLAST = N
+            JINC = 1
+            MAIND = 1
+         END IF
+*
+         IF( TSCAL.NE.ONE ) THEN
+            GROW = ZERO
+            GO TO 60
+         END IF
+*
+         IF( NOUNIT ) THEN
+*
+*           A is non-unit triangular.
+*
+*           Compute GROW = 1/G(j) and XBND = 1/M(j).
+*           Initially, G(0) = max{x(i), i=1,...,n}.
+*
+            GROW = HALF / MAX( XBND, SMLNUM )
+            XBND = GROW
+            DO 40 J = JFIRST, JLAST, JINC
+*
+*              Exit the loop if the growth factor is too small.
+*
+               IF( GROW.LE.SMLNUM )
+     $            GO TO 60
+*
+               TJJS = AB( MAIND, J )
+               TJJ = CABS1( TJJS )
+*
+               IF( TJJ.GE.SMLNUM ) THEN
+*
+*                 M(j) = G(j-1) / abs(A(j,j))
+*
+                  XBND = MIN( XBND, MIN( ONE, TJJ )*GROW )
+               ELSE
+*
+*                 M(j) could overflow, set XBND to 0.
+*
+                  XBND = ZERO
+               END IF
+*
+               IF( TJJ+CNORM( J ).GE.SMLNUM ) THEN
+*
+*                 G(j) = G(j-1)*( 1 + CNORM(j) / abs(A(j,j)) )
+*
+                  GROW = GROW*( TJJ / ( TJJ+CNORM( J ) ) )
+               ELSE
+*
+*                 G(j) could overflow, set GROW to 0.
+*
+                  GROW = ZERO
+               END IF
+   40       CONTINUE
+            GROW = XBND
+         ELSE
+*
+*           A is unit triangular.
+*
+*           Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}.
+*
+            GROW = MIN( ONE, HALF / MAX( XBND, SMLNUM ) )
+            DO 50 J = JFIRST, JLAST, JINC
+*
+*              Exit the loop if the growth factor is too small.
+*
+               IF( GROW.LE.SMLNUM )
+     $            GO TO 60
+*
+*              G(j) = G(j-1)*( 1 + CNORM(j) )
+*
+               GROW = GROW*( ONE / ( ONE+CNORM( J ) ) )
+   50       CONTINUE
+         END IF
+   60    CONTINUE
+*
+      ELSE
+*
+*        Compute the growth in A**T * x = b  or  A**H * x = b.
+*
+         IF( UPPER ) THEN
+            JFIRST = 1
+            JLAST = N
+            JINC = 1
+            MAIND = KD + 1
+         ELSE
+            JFIRST = N
+            JLAST = 1
+            JINC = -1
+            MAIND = 1
+         END IF
+*
+         IF( TSCAL.NE.ONE ) THEN
+            GROW = ZERO
+            GO TO 90
+         END IF
+*
+         IF( NOUNIT ) THEN
+*
+*           A is non-unit triangular.
+*
+*           Compute GROW = 1/G(j) and XBND = 1/M(j).
+*           Initially, M(0) = max{x(i), i=1,...,n}.
+*
+            GROW = HALF / MAX( XBND, SMLNUM )
+            XBND = GROW
+            DO 70 J = JFIRST, JLAST, JINC
+*
+*              Exit the loop if the growth factor is too small.
+*
+               IF( GROW.LE.SMLNUM )
+     $            GO TO 90
+*
+*              G(j) = max( G(j-1), M(j-1)*( 1 + CNORM(j) ) )
+*
+               XJ = ONE + CNORM( J )
+               GROW = MIN( GROW, XBND / XJ )
+*
+               TJJS = AB( MAIND, J )
+               TJJ = CABS1( TJJS )
+*
+               IF( TJJ.GE.SMLNUM ) THEN
+*
+*                 M(j) = M(j-1)*( 1 + CNORM(j) ) / abs(A(j,j))
+*
+                  IF( XJ.GT.TJJ )
+     $               XBND = XBND*( TJJ / XJ )
+               ELSE
+*
+*                 M(j) could overflow, set XBND to 0.
+*
+                  XBND = ZERO
+               END IF
+   70       CONTINUE
+            GROW = MIN( GROW, XBND )
+         ELSE
+*
+*           A is unit triangular.
+*
+*           Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}.
+*
+            GROW = MIN( ONE, HALF / MAX( XBND, SMLNUM ) )
+            DO 80 J = JFIRST, JLAST, JINC
+*
+*              Exit the loop if the growth factor is too small.
+*
+               IF( GROW.LE.SMLNUM )
+     $            GO TO 90
+*
+*              G(j) = ( 1 + CNORM(j) )*G(j-1)
+*
+               XJ = ONE + CNORM( J )
+               GROW = GROW / XJ
+   80       CONTINUE
+         END IF
+   90    CONTINUE
+      END IF
+*
+      IF( ( GROW*TSCAL ).GT.SMLNUM ) THEN
+*
+*        Use the Level 2 BLAS solve if the reciprocal of the bound on
+*        elements of X is not too small.
+*
+         CALL ZTBSV( UPLO, TRANS, DIAG, N, KD, AB, LDAB, X, 1 )
+      ELSE
+*
+*        Use a Level 1 BLAS solve, scaling intermediate results.
+*
+         IF( XMAX.GT.BIGNUM*HALF ) THEN
+*
+*           Scale X so that its components are less than or equal to
+*           BIGNUM in absolute value.
+*
+            SCALE = ( BIGNUM*HALF ) / XMAX
+            CALL ZDSCAL( N, SCALE, X, 1 )
+            XMAX = BIGNUM
+         ELSE
+            XMAX = XMAX*TWO
+         END IF
+*
+         IF( NOTRAN ) THEN
+*
+*           Solve A * x = b
+*
+            DO 120 J = JFIRST, JLAST, JINC
+*
+*              Compute x(j) = b(j) / A(j,j), scaling x if necessary.
+*
+               XJ = CABS1( X( J ) )
+               IF( NOUNIT ) THEN
+                  TJJS = AB( MAIND, J )*TSCAL
+               ELSE
+                  TJJS = TSCAL
+                  IF( TSCAL.EQ.ONE )
+     $               GO TO 110
+               END IF
+               TJJ = CABS1( TJJS )
+               IF( TJJ.GT.SMLNUM ) THEN
+*
+*                    abs(A(j,j)) > SMLNUM:
+*
+                  IF( TJJ.LT.ONE ) THEN
+                     IF( XJ.GT.TJJ*BIGNUM ) THEN
+*
+*                          Scale x by 1/b(j).
+*
+                        REC = ONE / XJ
+                        CALL ZDSCAL( N, REC, X, 1 )
+                        SCALE = SCALE*REC
+                        XMAX = XMAX*REC
+                     END IF
+                  END IF
+                  X( J ) = ZLADIV( X( J ), TJJS )
+                  XJ = CABS1( X( J ) )
+               ELSE IF( TJJ.GT.ZERO ) THEN
+*
+*                    0 < abs(A(j,j)) <= SMLNUM:
+*
+                  IF( XJ.GT.TJJ*BIGNUM ) THEN
+*
+*                       Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM
+*                       to avoid overflow when dividing by A(j,j).
+*
+                     REC = ( TJJ*BIGNUM ) / XJ
+                     IF( CNORM( J ).GT.ONE ) THEN
+*
+*                          Scale by 1/CNORM(j) to avoid overflow when
+*                          multiplying x(j) times column j.
+*
+                        REC = REC / CNORM( J )
+                     END IF
+                     CALL ZDSCAL( N, REC, X, 1 )
+                     SCALE = SCALE*REC
+                     XMAX = XMAX*REC
+                  END IF
+                  X( J ) = ZLADIV( X( J ), TJJS )
+                  XJ = CABS1( X( J ) )
+               ELSE
+*
+*                    A(j,j) = 0:  Set x(1:n) = 0, x(j) = 1, and
+*                    scale = 0, and compute a solution to A*x = 0.
+*
+                  DO 100 I = 1, N
+                     X( I ) = ZERO
+  100             CONTINUE
+                  X( J ) = ONE
+                  XJ = ONE
+                  SCALE = ZERO
+                  XMAX = ZERO
+               END IF
+  110          CONTINUE
+*
+*              Scale x if necessary to avoid overflow when adding a
+*              multiple of column j of A.
+*
+               IF( XJ.GT.ONE ) THEN
+                  REC = ONE / XJ
+                  IF( CNORM( J ).GT.( BIGNUM-XMAX )*REC ) THEN
+*
+*                    Scale x by 1/(2*abs(x(j))).
+*
+                     REC = REC*HALF
+                     CALL ZDSCAL( N, REC, X, 1 )
+                     SCALE = SCALE*REC
+                  END IF
+               ELSE IF( XJ*CNORM( J ).GT.( BIGNUM-XMAX ) ) THEN
+*
+*                 Scale x by 1/2.
+*
+                  CALL ZDSCAL( N, HALF, X, 1 )
+                  SCALE = SCALE*HALF
+               END IF
+*
+               IF( UPPER ) THEN
+                  IF( J.GT.1 ) THEN
+*
+*                    Compute the update
+*                       x(max(1,j-kd):j-1) := x(max(1,j-kd):j-1) -
+*                                             x(j)* A(max(1,j-kd):j-1,j)
+*
+                     JLEN = MIN( KD, J-1 )
+                     CALL ZAXPY( JLEN, -X( J )*TSCAL,
+     $                           AB( KD+1-JLEN, J ), 1, X( J-JLEN ), 1 )
+                     I = IZAMAX( J-1, X, 1 )
+                     XMAX = CABS1( X( I ) )
+                  END IF
+               ELSE IF( J.LT.N ) THEN
+*
+*                 Compute the update
+*                    x(j+1:min(j+kd,n)) := x(j+1:min(j+kd,n)) -
+*                                          x(j) * A(j+1:min(j+kd,n),j)
+*
+                  JLEN = MIN( KD, N-J )
+                  IF( JLEN.GT.0 )
+     $               CALL ZAXPY( JLEN, -X( J )*TSCAL, AB( 2, J ), 1,
+     $                           X( J+1 ), 1 )
+                  I = J + IZAMAX( N-J, X( J+1 ), 1 )
+                  XMAX = CABS1( X( I ) )
+               END IF
+  120       CONTINUE
+*
+         ELSE IF( LSAME( TRANS, 'T' ) ) THEN
+*
+*           Solve A**T * x = b
+*
+            DO 170 J = JFIRST, JLAST, JINC
+*
+*              Compute x(j) = b(j) - sum A(k,j)*x(k).
+*                                    k<>j
+*
+               XJ = CABS1( X( J ) )
+               USCAL = TSCAL
+               REC = ONE / MAX( XMAX, ONE )
+               IF( CNORM( J ).GT.( BIGNUM-XJ )*REC ) THEN
+*
+*                 If x(j) could overflow, scale x by 1/(2*XMAX).
+*
+                  REC = REC*HALF
+                  IF( NOUNIT ) THEN
+                     TJJS = AB( MAIND, J )*TSCAL
+                  ELSE
+                     TJJS = TSCAL
+                  END IF
+                  TJJ = CABS1( TJJS )
+                  IF( TJJ.GT.ONE ) THEN
+*
+*                       Divide by A(j,j) when scaling x if A(j,j) > 1.
+*
+                     REC = MIN( ONE, REC*TJJ )
+                     USCAL = ZLADIV( USCAL, TJJS )
+                  END IF
+                  IF( REC.LT.ONE ) THEN
+                     CALL ZDSCAL( N, REC, X, 1 )
+                     SCALE = SCALE*REC
+                     XMAX = XMAX*REC
+                  END IF
+               END IF
+*
+               CSUMJ = ZERO
+               IF( USCAL.EQ.DCMPLX( ONE ) ) THEN
+*
+*                 If the scaling needed for A in the dot product is 1,
+*                 call ZDOTU to perform the dot product.
+*
+                  IF( UPPER ) THEN
+                     JLEN = MIN( KD, J-1 )
+                     CSUMJ = ZDOTU( JLEN, AB( KD+1-JLEN, J ), 1,
+     $                       X( J-JLEN ), 1 )
+                  ELSE
+                     JLEN = MIN( KD, N-J )
+                     IF( JLEN.GT.1 )
+     $                  CSUMJ = ZDOTU( JLEN, AB( 2, J ), 1, X( J+1 ),
+     $                          1 )
+                  END IF
+               ELSE
+*
+*                 Otherwise, use in-line code for the dot product.
+*
+                  IF( UPPER ) THEN
+                     JLEN = MIN( KD, J-1 )
+                     DO 130 I = 1, JLEN
+                        CSUMJ = CSUMJ + ( AB( KD+I-JLEN, J )*USCAL )*
+     $                          X( J-JLEN-1+I )
+  130                CONTINUE
+                  ELSE
+                     JLEN = MIN( KD, N-J )
+                     DO 140 I = 1, JLEN
+                        CSUMJ = CSUMJ + ( AB( I+1, J )*USCAL )*X( J+I )
+  140                CONTINUE
+                  END IF
+               END IF
+*
+               IF( USCAL.EQ.DCMPLX( TSCAL ) ) THEN
+*
+*                 Compute x(j) := ( x(j) - CSUMJ ) / A(j,j) if 1/A(j,j)
+*                 was not used to scale the dotproduct.
+*
+                  X( J ) = X( J ) - CSUMJ
+                  XJ = CABS1( X( J ) )
+                  IF( NOUNIT ) THEN
+*
+*                    Compute x(j) = x(j) / A(j,j), scaling if necessary.
+*
+                     TJJS = AB( MAIND, J )*TSCAL
+                  ELSE
+                     TJJS = TSCAL
+                     IF( TSCAL.EQ.ONE )
+     $                  GO TO 160
+                  END IF
+                  TJJ = CABS1( TJJS )
+                  IF( TJJ.GT.SMLNUM ) THEN
+*
+*                       abs(A(j,j)) > SMLNUM:
+*
+                     IF( TJJ.LT.ONE ) THEN
+                        IF( XJ.GT.TJJ*BIGNUM ) THEN
+*
+*                             Scale X by 1/abs(x(j)).
+*
+                           REC = ONE / XJ
+                           CALL ZDSCAL( N, REC, X, 1 )
+                           SCALE = SCALE*REC
+                           XMAX = XMAX*REC
+                        END IF
+                     END IF
+                     X( J ) = ZLADIV( X( J ), TJJS )
+                  ELSE IF( TJJ.GT.ZERO ) THEN
+*
+*                       0 < abs(A(j,j)) <= SMLNUM:
+*
+                     IF( XJ.GT.TJJ*BIGNUM ) THEN
+*
+*                          Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM.
+*
+                        REC = ( TJJ*BIGNUM ) / XJ
+                        CALL ZDSCAL( N, REC, X, 1 )
+                        SCALE = SCALE*REC
+                        XMAX = XMAX*REC
+                     END IF
+                     X( J ) = ZLADIV( X( J ), TJJS )
+                  ELSE
+*
+*                       A(j,j) = 0:  Set x(1:n) = 0, x(j) = 1, and
+*                       scale = 0 and compute a solution to A**T *x = 0.
+*
+                     DO 150 I = 1, N
+                        X( I ) = ZERO
+  150                CONTINUE
+                     X( J ) = ONE
+                     SCALE = ZERO
+                     XMAX = ZERO
+                  END IF
+  160             CONTINUE
+               ELSE
+*
+*                 Compute x(j) := x(j) / A(j,j) - CSUMJ if the dot
+*                 product has already been divided by 1/A(j,j).
+*
+                  X( J ) = ZLADIV( X( J ), TJJS ) - CSUMJ
+               END IF
+               XMAX = MAX( XMAX, CABS1( X( J ) ) )
+  170       CONTINUE
+*
+         ELSE
+*
+*           Solve A**H * x = b
+*
+            DO 220 J = JFIRST, JLAST, JINC
+*
+*              Compute x(j) = b(j) - sum A(k,j)*x(k).
+*                                    k<>j
+*
+               XJ = CABS1( X( J ) )
+               USCAL = TSCAL
+               REC = ONE / MAX( XMAX, ONE )
+               IF( CNORM( J ).GT.( BIGNUM-XJ )*REC ) THEN
+*
+*                 If x(j) could overflow, scale x by 1/(2*XMAX).
+*
+                  REC = REC*HALF
+                  IF( NOUNIT ) THEN
+                     TJJS = DCONJG( AB( MAIND, J ) )*TSCAL
+                  ELSE
+                     TJJS = TSCAL
+                  END IF
+                  TJJ = CABS1( TJJS )
+                  IF( TJJ.GT.ONE ) THEN
+*
+*                       Divide by A(j,j) when scaling x if A(j,j) > 1.
+*
+                     REC = MIN( ONE, REC*TJJ )
+                     USCAL = ZLADIV( USCAL, TJJS )
+                  END IF
+                  IF( REC.LT.ONE ) THEN
+                     CALL ZDSCAL( N, REC, X, 1 )
+                     SCALE = SCALE*REC
+                     XMAX = XMAX*REC
+                  END IF
+               END IF
+*
+               CSUMJ = ZERO
+               IF( USCAL.EQ.DCMPLX( ONE ) ) THEN
+*
+*                 If the scaling needed for A in the dot product is 1,
+*                 call ZDOTC to perform the dot product.
+*
+                  IF( UPPER ) THEN
+                     JLEN = MIN( KD, J-1 )
+                     CSUMJ = ZDOTC( JLEN, AB( KD+1-JLEN, J ), 1,
+     $                       X( J-JLEN ), 1 )
+                  ELSE
+                     JLEN = MIN( KD, N-J )
+                     IF( JLEN.GT.1 )
+     $                  CSUMJ = ZDOTC( JLEN, AB( 2, J ), 1, X( J+1 ),
+     $                          1 )
+                  END IF
+               ELSE
+*
+*                 Otherwise, use in-line code for the dot product.
+*
+                  IF( UPPER ) THEN
+                     JLEN = MIN( KD, J-1 )
+                     DO 180 I = 1, JLEN
+                        CSUMJ = CSUMJ + ( DCONJG( AB( KD+I-JLEN, J ) )*
+     $                          USCAL )*X( J-JLEN-1+I )
+  180                CONTINUE
+                  ELSE
+                     JLEN = MIN( KD, N-J )
+                     DO 190 I = 1, JLEN
+                        CSUMJ = CSUMJ + ( DCONJG( AB( I+1, J ) )*USCAL )
+     $                          *X( J+I )
+  190                CONTINUE
+                  END IF
+               END IF
+*
+               IF( USCAL.EQ.DCMPLX( TSCAL ) ) THEN
+*
+*                 Compute x(j) := ( x(j) - CSUMJ ) / A(j,j) if 1/A(j,j)
+*                 was not used to scale the dotproduct.
+*
+                  X( J ) = X( J ) - CSUMJ
+                  XJ = CABS1( X( J ) )
+                  IF( NOUNIT ) THEN
+*
+*                    Compute x(j) = x(j) / A(j,j), scaling if necessary.
+*
+                     TJJS = DCONJG( AB( MAIND, J ) )*TSCAL
+                  ELSE
+                     TJJS = TSCAL
+                     IF( TSCAL.EQ.ONE )
+     $                  GO TO 210
+                  END IF
+                  TJJ = CABS1( TJJS )
+                  IF( TJJ.GT.SMLNUM ) THEN
+*
+*                       abs(A(j,j)) > SMLNUM:
+*
+                     IF( TJJ.LT.ONE ) THEN
+                        IF( XJ.GT.TJJ*BIGNUM ) THEN
+*
+*                             Scale X by 1/abs(x(j)).
+*
+                           REC = ONE / XJ
+                           CALL ZDSCAL( N, REC, X, 1 )
+                           SCALE = SCALE*REC
+                           XMAX = XMAX*REC
+                        END IF
+                     END IF
+                     X( J ) = ZLADIV( X( J ), TJJS )
+                  ELSE IF( TJJ.GT.ZERO ) THEN
+*
+*                       0 < abs(A(j,j)) <= SMLNUM:
+*
+                     IF( XJ.GT.TJJ*BIGNUM ) THEN
+*
+*                          Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM.
+*
+                        REC = ( TJJ*BIGNUM ) / XJ
+                        CALL ZDSCAL( N, REC, X, 1 )
+                        SCALE = SCALE*REC
+                        XMAX = XMAX*REC
+                     END IF
+                     X( J ) = ZLADIV( X( J ), TJJS )
+                  ELSE
+*
+*                       A(j,j) = 0:  Set x(1:n) = 0, x(j) = 1, and
+*                       scale = 0 and compute a solution to A**H *x = 0.
+*
+                     DO 200 I = 1, N
+                        X( I ) = ZERO
+  200                CONTINUE
+                     X( J ) = ONE
+                     SCALE = ZERO
+                     XMAX = ZERO
+                  END IF
+  210             CONTINUE
+               ELSE
+*
+*                 Compute x(j) := x(j) / A(j,j) - CSUMJ if the dot
+*                 product has already been divided by 1/A(j,j).
+*
+                  X( J ) = ZLADIV( X( J ), TJJS ) - CSUMJ
+               END IF
+               XMAX = MAX( XMAX, CABS1( X( J ) ) )
+  220       CONTINUE
+         END IF
+         SCALE = SCALE / TSCAL
+      END IF
+*
+*     Scale the column norms by 1/TSCAL for return.
+*
+      IF( TSCAL.NE.ONE ) THEN
+         CALL DSCAL( N, ONE / TSCAL, CNORM, 1 )
+      END IF
+*
+      RETURN
+*
+*     End of ZLATBS
+*
+      END
diff --git a/costa/native/external/lapack/zlatdf.f b/costa/native/external/lapack/zlatdf.f
new file mode 100644
index 000000000..3316ca4fb
--- /dev/null
+++ b/costa/native/external/lapack/zlatdf.f
@@ -0,0 +1,242 @@
+      SUBROUTINE ZLATDF( IJOB, N, Z, LDZ, RHS, RDSUM, RDSCAL, IPIV,
+     $                   JPIV )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            IJOB, LDZ, N
+      DOUBLE PRECISION   RDSCAL, RDSUM
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * ), JPIV( * )
+      COMPLEX*16         RHS( * ), Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZLATDF computes the contribution to the reciprocal Dif-estimate
+*  by solving for x in Z * x = b, where b is chosen such that the norm
+*  of x is as large as possible. It is assumed that LU decomposition
+*  of Z has been computed by ZGETC2. On entry RHS = f holds the
+*  contribution from earlier solved sub-systems, and on return RHS = x.
+*
+*  The factorization of Z returned by ZGETC2 has the form
+*  Z = P * L * U * Q, where P and Q are permutation matrices. L is lower
+*  triangular with unit diagonal elements and U is upper triangular.
+*
+*  Arguments
+*  =========
+*
+*  IJOB    (input) INTEGER
+*          IJOB = 2: First compute an approximative null-vector e
+*              of Z using ZGECON, e is normalized and solve for
+*              Zx = +-e - f with the sign giving the greater value of
+*              2-norm(x).  About 5 times as expensive as Default.
+*          IJOB .ne. 2: Local look ahead strategy where
+*              all entries of the r.h.s. b is choosen as either +1 or
+*              -1.  Default.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix Z.
+*
+*  Z       (input) DOUBLE PRECISION array, dimension (LDZ, N)
+*          On entry, the LU part of the factorization of the n-by-n
+*          matrix Z computed by ZGETC2:  Z = P * L * U * Q
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDA >= max(1, N).
+*
+*  RHS     (input/output) DOUBLE PRECISION array, dimension (N).
+*          On entry, RHS contains contributions from other subsystems.
+*          On exit, RHS contains the solution of the subsystem with
+*          entries according to the value of IJOB (see above).
+*
+*  RDSUM   (input/output) DOUBLE PRECISION
+*          On entry, the sum of squares of computed contributions to
+*          the Dif-estimate under computation by ZTGSYL, where the
+*          scaling factor RDSCAL (see below) has been factored out.
+*          On exit, the corresponding sum of squares updated with the
+*          contributions from the current sub-system.
+*          If TRANS = 'T' RDSUM is not touched.
+*          NOTE: RDSUM only makes sense when ZTGSY2 is called by CTGSYL.
+*
+*  RDSCAL  (input/output) DOUBLE PRECISION
+*          On entry, scaling factor used to prevent overflow in RDSUM.
+*          On exit, RDSCAL is updated w.r.t. the current contributions
+*          in RDSUM.
+*          If TRANS = 'T', RDSCAL is not touched.
+*          NOTE: RDSCAL only makes sense when ZTGSY2 is called by
+*          ZTGSYL.
+*
+*  IPIV    (input) INTEGER array, dimension (N).
+*          The pivot indices; for 1 <= i <= N, row i of the
+*          matrix has been interchanged with row IPIV(i).
+*
+*  JPIV    (input) INTEGER array, dimension (N).
+*          The pivot indices; for 1 <= j <= N, column j of the
+*          matrix has been interchanged with column JPIV(j).
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Bo Kagstrom and Peter Poromaa, Department of Computing Science,
+*     Umea University, S-901 87 Umea, Sweden.
+*
+*  This routine is a further developed implementation of algorithm
+*  BSOLVE in [1] using complete pivoting in the LU factorization.
+*
+*   [1]   Bo Kagstrom and Lars Westin,
+*         Generalized Schur Methods with Condition Estimators for
+*         Solving the Generalized Sylvester Equation, IEEE Transactions
+*         on Automatic Control, Vol. 34, No. 7, July 1989, pp 745-751.
+*
+*   [2]   Peter Poromaa,
+*         On Efficient and Robust Estimators for the Separation
+*         between two Regular Matrix Pairs with Applications in
+*         Condition Estimation. Report UMINF-95.05, Department of
+*         Computing Science, Umea University, S-901 87 Umea, Sweden,
+*         1995.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            MAXDIM
+      PARAMETER          ( MAXDIM = 2 )
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+      COMPLEX*16         CONE
+      PARAMETER          ( CONE = ( 1.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, INFO, J, K
+      DOUBLE PRECISION   RTEMP, SCALE, SMINU, SPLUS
+      COMPLEX*16         BM, BP, PMONE, TEMP
+*     ..
+*     .. Local Arrays ..
+      DOUBLE PRECISION   RWORK( MAXDIM )
+      COMPLEX*16         WORK( 4*MAXDIM ), XM( MAXDIM ), XP( MAXDIM )
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           ZAXPY, ZCOPY, ZGECON, ZGESC2, ZLASSQ, ZLASWP,
+     $                   ZSCAL
+*     ..
+*     .. External Functions ..
+      DOUBLE PRECISION   DZASUM
+      COMPLEX*16         ZDOTC
+      EXTERNAL           DZASUM, ZDOTC
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+      IF( IJOB.NE.2 ) THEN
+*
+*        Apply permutations IPIV to RHS
+*
+         CALL ZLASWP( 1, RHS, LDZ, 1, N-1, IPIV, 1 )
+*
+*        Solve for L-part choosing RHS either to +1 or -1.
+*
+         PMONE = -CONE
+         DO 10 J = 1, N - 1
+            BP = RHS( J ) + CONE
+            BM = RHS( J ) - CONE
+            SPLUS = ONE
+*
+*           Lockahead for L- part RHS(1:N-1) = +-1
+*           SPLUS and SMIN computed more efficiently than in BSOLVE[1].
+*
+            SPLUS = SPLUS + DBLE( ZDOTC( N-J, Z( J+1, J ), 1, Z( J+1,
+     $              J ), 1 ) )
+            SMINU = DBLE( ZDOTC( N-J, Z( J+1, J ), 1, RHS( J+1 ), 1 ) )
+            SPLUS = SPLUS*DBLE( RHS( J ) )
+            IF( SPLUS.GT.SMINU ) THEN
+               RHS( J ) = BP
+            ELSE IF( SMINU.GT.SPLUS ) THEN
+               RHS( J ) = BM
+            ELSE
+*
+*              In this case the updating sums are equal and we can
+*              choose RHS(J) +1 or -1. The first time this happens we
+*              choose -1, thereafter +1. This is a simple way to get
+*              good estimates of matrices like Byers well-known example
+*              (see [1]). (Not done in BSOLVE.)
+*
+               RHS( J ) = RHS( J ) + PMONE
+               PMONE = CONE
+            END IF
+*
+*           Compute the remaining r.h.s.
+*
+            TEMP = -RHS( J )
+            CALL ZAXPY( N-J, TEMP, Z( J+1, J ), 1, RHS( J+1 ), 1 )
+   10    CONTINUE
+*
+*        Solve for U- part, lockahead for RHS(N) = +-1. This is not done
+*        In BSOLVE and will hopefully give us a better estimate because
+*        any ill-conditioning of the original matrix is transfered to U
+*        and not to L. U(N, N) is an approximation to sigma_min(LU).
+*
+         CALL ZCOPY( N-1, RHS, 1, WORK, 1 )
+         WORK( N ) = RHS( N ) + CONE
+         RHS( N ) = RHS( N ) - CONE
+         SPLUS = ZERO
+         SMINU = ZERO
+         DO 30 I = N, 1, -1
+            TEMP = CONE / Z( I, I )
+            WORK( I ) = WORK( I )*TEMP
+            RHS( I ) = RHS( I )*TEMP
+            DO 20 K = I + 1, N
+               WORK( I ) = WORK( I ) - WORK( K )*( Z( I, K )*TEMP )
+               RHS( I ) = RHS( I ) - RHS( K )*( Z( I, K )*TEMP )
+   20       CONTINUE
+            SPLUS = SPLUS + ABS( WORK( I ) )
+            SMINU = SMINU + ABS( RHS( I ) )
+   30    CONTINUE
+         IF( SPLUS.GT.SMINU )
+     $      CALL ZCOPY( N, WORK, 1, RHS, 1 )
+*
+*        Apply the permutations JPIV to the computed solution (RHS)
+*
+         CALL ZLASWP( 1, RHS, LDZ, 1, N-1, JPIV, -1 )
+*
+*        Compute the sum of squares
+*
+         CALL ZLASSQ( N, RHS, 1, RDSCAL, RDSUM )
+         RETURN
+      END IF
+*
+*     ENTRY IJOB = 2
+*
+*     Compute approximate nullvector XM of Z
+*
+      CALL ZGECON( 'I', N, Z, LDZ, ONE, RTEMP, WORK, RWORK, INFO )
+      CALL ZCOPY( N, WORK( N+1 ), 1, XM, 1 )
+*
+*     Compute RHS
+*
+      CALL ZLASWP( 1, XM, LDZ, 1, N-1, IPIV, -1 )
+      TEMP = CONE / SQRT( ZDOTC( N, XM, 1, XM, 1 ) )
+      CALL ZSCAL( N, TEMP, XM, 1 )
+      CALL ZCOPY( N, XM, 1, XP, 1 )
+      CALL ZAXPY( N, CONE, RHS, 1, XP, 1 )
+      CALL ZAXPY( N, -CONE, XM, 1, RHS, 1 )
+      CALL ZGESC2( N, Z, LDZ, RHS, IPIV, JPIV, SCALE )
+      CALL ZGESC2( N, Z, LDZ, XP, IPIV, JPIV, SCALE )
+      IF( DZASUM( N, XP, 1 ).GT.DZASUM( N, RHS, 1 ) )
+     $   CALL ZCOPY( N, XP, 1, RHS, 1 )
+*
+*     Compute the sum of squares
+*
+      CALL ZLASSQ( N, RHS, 1, RDSCAL, RDSUM )
+      RETURN
+*
+*     End of ZLATDF
+*
+      END
diff --git a/costa/native/external/lapack/zlatps.f b/costa/native/external/lapack/zlatps.f
new file mode 100644
index 000000000..0b26e585a
--- /dev/null
+++ b/costa/native/external/lapack/zlatps.f
@@ -0,0 +1,895 @@
+      SUBROUTINE ZLATPS( UPLO, TRANS, DIAG, NORMIN, N, AP, X, SCALE,
+     $                   CNORM, INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIAG, NORMIN, TRANS, UPLO
+      INTEGER            INFO, N
+      DOUBLE PRECISION   SCALE
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   CNORM( * )
+      COMPLEX*16         AP( * ), X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZLATPS solves one of the triangular systems
+*
+*     A * x = s*b,  A**T * x = s*b,  or  A**H * x = s*b,
+*
+*  with scaling to prevent overflow, where A is an upper or lower
+*  triangular matrix stored in packed form.  Here A**T denotes the
+*  transpose of A, A**H denotes the conjugate transpose of A, x and b
+*  are n-element vectors, and s is a scaling factor, usually less than
+*  or equal to 1, chosen so that the components of x will be less than
+*  the overflow threshold.  If the unscaled problem will not cause
+*  overflow, the Level 2 BLAS routine ZTPSV is called. If the matrix A
+*  is singular (A(j,j) = 0 for some j), then s is set to 0 and a
+*  non-trivial solution to A*x = 0 is returned.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the matrix A is upper or lower triangular.
+*          = 'U':  Upper triangular
+*          = 'L':  Lower triangular
+*
+*  TRANS   (input) CHARACTER*1
+*          Specifies the operation applied to A.
+*          = 'N':  Solve A * x = s*b     (No transpose)
+*          = 'T':  Solve A**T * x = s*b  (Transpose)
+*          = 'C':  Solve A**H * x = s*b  (Conjugate transpose)
+*
+*  DIAG    (input) CHARACTER*1
+*          Specifies whether or not the matrix A is unit triangular.
+*          = 'N':  Non-unit triangular
+*          = 'U':  Unit triangular
+*
+*  NORMIN  (input) CHARACTER*1
+*          Specifies whether CNORM has been set or not.
+*          = 'Y':  CNORM contains the column norms on entry
+*          = 'N':  CNORM is not set on entry.  On exit, the norms will
+*                  be computed and stored in CNORM.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  AP      (input) COMPLEX*16 array, dimension (N*(N+1)/2)
+*          The upper or lower triangular matrix A, packed columnwise in
+*          a linear array.  The j-th column of A is stored in the array
+*          AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n.
+*
+*  X       (input/output) COMPLEX*16 array, dimension (N)
+*          On entry, the right hand side b of the triangular system.
+*          On exit, X is overwritten by the solution vector x.
+*
+*  SCALE   (output) DOUBLE PRECISION
+*          The scaling factor s for the triangular system
+*             A * x = s*b,  A**T * x = s*b,  or  A**H * x = s*b.
+*          If SCALE = 0, the matrix A is singular or badly scaled, and
+*          the vector x is an exact or approximate solution to A*x = 0.
+*
+*  CNORM   (input or output) DOUBLE PRECISION array, dimension (N)
+*
+*          If NORMIN = 'Y', CNORM is an input argument and CNORM(j)
+*          contains the norm of the off-diagonal part of the j-th column
+*          of A.  If TRANS = 'N', CNORM(j) must be greater than or equal
+*          to the infinity-norm, and if TRANS = 'T' or 'C', CNORM(j)
+*          must be greater than or equal to the 1-norm.
+*
+*          If NORMIN = 'N', CNORM is an output argument and CNORM(j)
+*          returns the 1-norm of the offdiagonal part of the j-th column
+*          of A.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -k, the k-th argument had an illegal value
+*
+*  Further Details
+*  ======= =======
+*
+*  A rough bound on x is computed; if that is less than overflow, ZTPSV
+*  is called, otherwise, specific code is used which checks for possible
+*  overflow or divide-by-zero at every operation.
+*
+*  A columnwise scheme is used for solving A*x = b.  The basic algorithm
+*  if A is lower triangular is
+*
+*       x[1:n] := b[1:n]
+*       for j = 1, ..., n
+*            x(j) := x(j) / A(j,j)
+*            x[j+1:n] := x[j+1:n] - x(j) * A[j+1:n,j]
+*       end
+*
+*  Define bounds on the components of x after j iterations of the loop:
+*     M(j) = bound on x[1:j]
+*     G(j) = bound on x[j+1:n]
+*  Initially, let M(0) = 0 and G(0) = max{x(i), i=1,...,n}.
+*
+*  Then for iteration j+1 we have
+*     M(j+1) <= G(j) / | A(j+1,j+1) |
+*     G(j+1) <= G(j) + M(j+1) * | A[j+2:n,j+1] |
+*            <= G(j) ( 1 + CNORM(j+1) / | A(j+1,j+1) | )
+*
+*  where CNORM(j+1) is greater than or equal to the infinity-norm of
+*  column j+1 of A, not counting the diagonal.  Hence
+*
+*     G(j) <= G(0) product ( 1 + CNORM(i) / | A(i,i) | )
+*                  1<=i<=j
+*  and
+*
+*     |x(j)| <= ( G(0) / |A(j,j)| ) product ( 1 + CNORM(i) / |A(i,i)| )
+*                                   1<=i< j
+*
+*  Since |x(j)| <= M(j), we use the Level 2 BLAS routine ZTPSV if the
+*  reciprocal of the largest M(j), j=1,..,n, is larger than
+*  max(underflow, 1/overflow).
+*
+*  The bound on x(j) is also used to determine when a step in the
+*  columnwise method can be performed without fear of overflow.  If
+*  the computed bound is greater than a large constant, x is scaled to
+*  prevent overflow, but if the bound overflows, x is set to 0, x(j) to
+*  1, and scale to 0, and a non-trivial solution to A*x = 0 is found.
+*
+*  Similarly, a row-wise scheme is used to solve A**T *x = b  or
+*  A**H *x = b.  The basic algorithm for A upper triangular is
+*
+*       for j = 1, ..., n
+*            x(j) := ( b(j) - A[1:j-1,j]' * x[1:j-1] ) / A(j,j)
+*       end
+*
+*  We simultaneously compute two bounds
+*       G(j) = bound on ( b(i) - A[1:i-1,i]' * x[1:i-1] ), 1<=i<=j
+*       M(j) = bound on x(i), 1<=i<=j
+*
+*  The initial values are G(0) = 0, M(0) = max{b(i), i=1,..,n}, and we
+*  add the constraint G(j) >= G(j-1) and M(j) >= M(j-1) for j >= 1.
+*  Then the bound on x(j) is
+*
+*       M(j) <= M(j-1) * ( 1 + CNORM(j) ) / | A(j,j) |
+*
+*            <= M(0) * product ( ( 1 + CNORM(i) ) / |A(i,i)| )
+*                      1<=i<=j
+*
+*  and we can safely call ZTPSV if 1/M(n) and 1/G(n) are both greater
+*  than max(underflow, 1/overflow).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, HALF, ONE, TWO
+      PARAMETER          ( ZERO = 0.0D+0, HALF = 0.5D+0, ONE = 1.0D+0,
+     $                   TWO = 2.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOTRAN, NOUNIT, UPPER
+      INTEGER            I, IMAX, IP, J, JFIRST, JINC, JLAST, JLEN
+      DOUBLE PRECISION   BIGNUM, GROW, REC, SMLNUM, TJJ, TMAX, TSCAL,
+     $                   XBND, XJ, XMAX
+      COMPLEX*16         CSUMJ, TJJS, USCAL, ZDUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            IDAMAX, IZAMAX
+      DOUBLE PRECISION   DLAMCH, DZASUM
+      COMPLEX*16         ZDOTC, ZDOTU, ZLADIV
+      EXTERNAL           LSAME, IDAMAX, IZAMAX, DLAMCH, DZASUM, ZDOTC,
+     $                   ZDOTU, ZLADIV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DSCAL, XERBLA, ZAXPY, ZDSCAL, ZTPSV
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, DCMPLX, DCONJG, DIMAG, MAX, MIN
+*     ..
+*     .. Statement Functions ..
+      DOUBLE PRECISION   CABS1, CABS2
+*     ..
+*     .. Statement Function definitions ..
+      CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) )
+      CABS2( ZDUM ) = ABS( DBLE( ZDUM ) / 2.D0 ) +
+     $                ABS( DIMAG( ZDUM ) / 2.D0 )
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      NOTRAN = LSAME( TRANS, 'N' )
+      NOUNIT = LSAME( DIAG, 'N' )
+*
+*     Test the input parameters.
+*
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT.
+     $         LSAME( TRANS, 'C' ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN
+         INFO = -3
+      ELSE IF( .NOT.LSAME( NORMIN, 'Y' ) .AND. .NOT.
+     $         LSAME( NORMIN, 'N' ) ) THEN
+         INFO = -4
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -5
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZLATPS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Determine machine dependent parameters to control overflow.
+*
+      SMLNUM = DLAMCH( 'Safe minimum' )
+      BIGNUM = ONE / SMLNUM
+      CALL DLABAD( SMLNUM, BIGNUM )
+      SMLNUM = SMLNUM / DLAMCH( 'Precision' )
+      BIGNUM = ONE / SMLNUM
+      SCALE = ONE
+*
+      IF( LSAME( NORMIN, 'N' ) ) THEN
+*
+*        Compute the 1-norm of each column, not including the diagonal.
+*
+         IF( UPPER ) THEN
+*
+*           A is upper triangular.
+*
+            IP = 1
+            DO 10 J = 1, N
+               CNORM( J ) = DZASUM( J-1, AP( IP ), 1 )
+               IP = IP + J
+   10       CONTINUE
+         ELSE
+*
+*           A is lower triangular.
+*
+            IP = 1
+            DO 20 J = 1, N - 1
+               CNORM( J ) = DZASUM( N-J, AP( IP+1 ), 1 )
+               IP = IP + N - J + 1
+   20       CONTINUE
+            CNORM( N ) = ZERO
+         END IF
+      END IF
+*
+*     Scale the column norms by TSCAL if the maximum element in CNORM is
+*     greater than BIGNUM/2.
+*
+      IMAX = IDAMAX( N, CNORM, 1 )
+      TMAX = CNORM( IMAX )
+      IF( TMAX.LE.BIGNUM*HALF ) THEN
+         TSCAL = ONE
+      ELSE
+         TSCAL = HALF / ( SMLNUM*TMAX )
+         CALL DSCAL( N, TSCAL, CNORM, 1 )
+      END IF
+*
+*     Compute a bound on the computed solution vector to see if the
+*     Level 2 BLAS routine ZTPSV can be used.
+*
+      XMAX = ZERO
+      DO 30 J = 1, N
+         XMAX = MAX( XMAX, CABS2( X( J ) ) )
+   30 CONTINUE
+      XBND = XMAX
+      IF( NOTRAN ) THEN
+*
+*        Compute the growth in A * x = b.
+*
+         IF( UPPER ) THEN
+            JFIRST = N
+            JLAST = 1
+            JINC = -1
+         ELSE
+            JFIRST = 1
+            JLAST = N
+            JINC = 1
+         END IF
+*
+         IF( TSCAL.NE.ONE ) THEN
+            GROW = ZERO
+            GO TO 60
+         END IF
+*
+         IF( NOUNIT ) THEN
+*
+*           A is non-unit triangular.
+*
+*           Compute GROW = 1/G(j) and XBND = 1/M(j).
+*           Initially, G(0) = max{x(i), i=1,...,n}.
+*
+            GROW = HALF / MAX( XBND, SMLNUM )
+            XBND = GROW
+            IP = JFIRST*( JFIRST+1 ) / 2
+            JLEN = N
+            DO 40 J = JFIRST, JLAST, JINC
+*
+*              Exit the loop if the growth factor is too small.
+*
+               IF( GROW.LE.SMLNUM )
+     $            GO TO 60
+*
+               TJJS = AP( IP )
+               TJJ = CABS1( TJJS )
+*
+               IF( TJJ.GE.SMLNUM ) THEN
+*
+*                 M(j) = G(j-1) / abs(A(j,j))
+*
+                  XBND = MIN( XBND, MIN( ONE, TJJ )*GROW )
+               ELSE
+*
+*                 M(j) could overflow, set XBND to 0.
+*
+                  XBND = ZERO
+               END IF
+*
+               IF( TJJ+CNORM( J ).GE.SMLNUM ) THEN
+*
+*                 G(j) = G(j-1)*( 1 + CNORM(j) / abs(A(j,j)) )
+*
+                  GROW = GROW*( TJJ / ( TJJ+CNORM( J ) ) )
+               ELSE
+*
+*                 G(j) could overflow, set GROW to 0.
+*
+                  GROW = ZERO
+               END IF
+               IP = IP + JINC*JLEN
+               JLEN = JLEN - 1
+   40       CONTINUE
+            GROW = XBND
+         ELSE
+*
+*           A is unit triangular.
+*
+*           Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}.
+*
+            GROW = MIN( ONE, HALF / MAX( XBND, SMLNUM ) )
+            DO 50 J = JFIRST, JLAST, JINC
+*
+*              Exit the loop if the growth factor is too small.
+*
+               IF( GROW.LE.SMLNUM )
+     $            GO TO 60
+*
+*              G(j) = G(j-1)*( 1 + CNORM(j) )
+*
+               GROW = GROW*( ONE / ( ONE+CNORM( J ) ) )
+   50       CONTINUE
+         END IF
+   60    CONTINUE
+*
+      ELSE
+*
+*        Compute the growth in A**T * x = b  or  A**H * x = b.
+*
+         IF( UPPER ) THEN
+            JFIRST = 1
+            JLAST = N
+            JINC = 1
+         ELSE
+            JFIRST = N
+            JLAST = 1
+            JINC = -1
+         END IF
+*
+         IF( TSCAL.NE.ONE ) THEN
+            GROW = ZERO
+            GO TO 90
+         END IF
+*
+         IF( NOUNIT ) THEN
+*
+*           A is non-unit triangular.
+*
+*           Compute GROW = 1/G(j) and XBND = 1/M(j).
+*           Initially, M(0) = max{x(i), i=1,...,n}.
+*
+            GROW = HALF / MAX( XBND, SMLNUM )
+            XBND = GROW
+            IP = JFIRST*( JFIRST+1 ) / 2
+            JLEN = 1
+            DO 70 J = JFIRST, JLAST, JINC
+*
+*              Exit the loop if the growth factor is too small.
+*
+               IF( GROW.LE.SMLNUM )
+     $            GO TO 90
+*
+*              G(j) = max( G(j-1), M(j-1)*( 1 + CNORM(j) ) )
+*
+               XJ = ONE + CNORM( J )
+               GROW = MIN( GROW, XBND / XJ )
+*
+               TJJS = AP( IP )
+               TJJ = CABS1( TJJS )
+*
+               IF( TJJ.GE.SMLNUM ) THEN
+*
+*                 M(j) = M(j-1)*( 1 + CNORM(j) ) / abs(A(j,j))
+*
+                  IF( XJ.GT.TJJ )
+     $               XBND = XBND*( TJJ / XJ )
+               ELSE
+*
+*                 M(j) could overflow, set XBND to 0.
+*
+                  XBND = ZERO
+               END IF
+               JLEN = JLEN + 1
+               IP = IP + JINC*JLEN
+   70       CONTINUE
+            GROW = MIN( GROW, XBND )
+         ELSE
+*
+*           A is unit triangular.
+*
+*           Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}.
+*
+            GROW = MIN( ONE, HALF / MAX( XBND, SMLNUM ) )
+            DO 80 J = JFIRST, JLAST, JINC
+*
+*              Exit the loop if the growth factor is too small.
+*
+               IF( GROW.LE.SMLNUM )
+     $            GO TO 90
+*
+*              G(j) = ( 1 + CNORM(j) )*G(j-1)
+*
+               XJ = ONE + CNORM( J )
+               GROW = GROW / XJ
+   80       CONTINUE
+         END IF
+   90    CONTINUE
+      END IF
+*
+      IF( ( GROW*TSCAL ).GT.SMLNUM ) THEN
+*
+*        Use the Level 2 BLAS solve if the reciprocal of the bound on
+*        elements of X is not too small.
+*
+         CALL ZTPSV( UPLO, TRANS, DIAG, N, AP, X, 1 )
+      ELSE
+*
+*        Use a Level 1 BLAS solve, scaling intermediate results.
+*
+         IF( XMAX.GT.BIGNUM*HALF ) THEN
+*
+*           Scale X so that its components are less than or equal to
+*           BIGNUM in absolute value.
+*
+            SCALE = ( BIGNUM*HALF ) / XMAX
+            CALL ZDSCAL( N, SCALE, X, 1 )
+            XMAX = BIGNUM
+         ELSE
+            XMAX = XMAX*TWO
+         END IF
+*
+         IF( NOTRAN ) THEN
+*
+*           Solve A * x = b
+*
+            IP = JFIRST*( JFIRST+1 ) / 2
+            DO 120 J = JFIRST, JLAST, JINC
+*
+*              Compute x(j) = b(j) / A(j,j), scaling x if necessary.
+*
+               XJ = CABS1( X( J ) )
+               IF( NOUNIT ) THEN
+                  TJJS = AP( IP )*TSCAL
+               ELSE
+                  TJJS = TSCAL
+                  IF( TSCAL.EQ.ONE )
+     $               GO TO 110
+               END IF
+               TJJ = CABS1( TJJS )
+               IF( TJJ.GT.SMLNUM ) THEN
+*
+*                    abs(A(j,j)) > SMLNUM:
+*
+                  IF( TJJ.LT.ONE ) THEN
+                     IF( XJ.GT.TJJ*BIGNUM ) THEN
+*
+*                          Scale x by 1/b(j).
+*
+                        REC = ONE / XJ
+                        CALL ZDSCAL( N, REC, X, 1 )
+                        SCALE = SCALE*REC
+                        XMAX = XMAX*REC
+                     END IF
+                  END IF
+                  X( J ) = ZLADIV( X( J ), TJJS )
+                  XJ = CABS1( X( J ) )
+               ELSE IF( TJJ.GT.ZERO ) THEN
+*
+*                    0 < abs(A(j,j)) <= SMLNUM:
+*
+                  IF( XJ.GT.TJJ*BIGNUM ) THEN
+*
+*                       Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM
+*                       to avoid overflow when dividing by A(j,j).
+*
+                     REC = ( TJJ*BIGNUM ) / XJ
+                     IF( CNORM( J ).GT.ONE ) THEN
+*
+*                          Scale by 1/CNORM(j) to avoid overflow when
+*                          multiplying x(j) times column j.
+*
+                        REC = REC / CNORM( J )
+                     END IF
+                     CALL ZDSCAL( N, REC, X, 1 )
+                     SCALE = SCALE*REC
+                     XMAX = XMAX*REC
+                  END IF
+                  X( J ) = ZLADIV( X( J ), TJJS )
+                  XJ = CABS1( X( J ) )
+               ELSE
+*
+*                    A(j,j) = 0:  Set x(1:n) = 0, x(j) = 1, and
+*                    scale = 0, and compute a solution to A*x = 0.
+*
+                  DO 100 I = 1, N
+                     X( I ) = ZERO
+  100             CONTINUE
+                  X( J ) = ONE
+                  XJ = ONE
+                  SCALE = ZERO
+                  XMAX = ZERO
+               END IF
+  110          CONTINUE
+*
+*              Scale x if necessary to avoid overflow when adding a
+*              multiple of column j of A.
+*
+               IF( XJ.GT.ONE ) THEN
+                  REC = ONE / XJ
+                  IF( CNORM( J ).GT.( BIGNUM-XMAX )*REC ) THEN
+*
+*                    Scale x by 1/(2*abs(x(j))).
+*
+                     REC = REC*HALF
+                     CALL ZDSCAL( N, REC, X, 1 )
+                     SCALE = SCALE*REC
+                  END IF
+               ELSE IF( XJ*CNORM( J ).GT.( BIGNUM-XMAX ) ) THEN
+*
+*                 Scale x by 1/2.
+*
+                  CALL ZDSCAL( N, HALF, X, 1 )
+                  SCALE = SCALE*HALF
+               END IF
+*
+               IF( UPPER ) THEN
+                  IF( J.GT.1 ) THEN
+*
+*                    Compute the update
+*                       x(1:j-1) := x(1:j-1) - x(j) * A(1:j-1,j)
+*
+                     CALL ZAXPY( J-1, -X( J )*TSCAL, AP( IP-J+1 ), 1, X,
+     $                           1 )
+                     I = IZAMAX( J-1, X, 1 )
+                     XMAX = CABS1( X( I ) )
+                  END IF
+                  IP = IP - J
+               ELSE
+                  IF( J.LT.N ) THEN
+*
+*                    Compute the update
+*                       x(j+1:n) := x(j+1:n) - x(j) * A(j+1:n,j)
+*
+                     CALL ZAXPY( N-J, -X( J )*TSCAL, AP( IP+1 ), 1,
+     $                           X( J+1 ), 1 )
+                     I = J + IZAMAX( N-J, X( J+1 ), 1 )
+                     XMAX = CABS1( X( I ) )
+                  END IF
+                  IP = IP + N - J + 1
+               END IF
+  120       CONTINUE
+*
+         ELSE IF( LSAME( TRANS, 'T' ) ) THEN
+*
+*           Solve A**T * x = b
+*
+            IP = JFIRST*( JFIRST+1 ) / 2
+            JLEN = 1
+            DO 170 J = JFIRST, JLAST, JINC
+*
+*              Compute x(j) = b(j) - sum A(k,j)*x(k).
+*                                    k<>j
+*
+               XJ = CABS1( X( J ) )
+               USCAL = TSCAL
+               REC = ONE / MAX( XMAX, ONE )
+               IF( CNORM( J ).GT.( BIGNUM-XJ )*REC ) THEN
+*
+*                 If x(j) could overflow, scale x by 1/(2*XMAX).
+*
+                  REC = REC*HALF
+                  IF( NOUNIT ) THEN
+                     TJJS = AP( IP )*TSCAL
+                  ELSE
+                     TJJS = TSCAL
+                  END IF
+                  TJJ = CABS1( TJJS )
+                  IF( TJJ.GT.ONE ) THEN
+*
+*                       Divide by A(j,j) when scaling x if A(j,j) > 1.
+*
+                     REC = MIN( ONE, REC*TJJ )
+                     USCAL = ZLADIV( USCAL, TJJS )
+                  END IF
+                  IF( REC.LT.ONE ) THEN
+                     CALL ZDSCAL( N, REC, X, 1 )
+                     SCALE = SCALE*REC
+                     XMAX = XMAX*REC
+                  END IF
+               END IF
+*
+               CSUMJ = ZERO
+               IF( USCAL.EQ.DCMPLX( ONE ) ) THEN
+*
+*                 If the scaling needed for A in the dot product is 1,
+*                 call ZDOTU to perform the dot product.
+*
+                  IF( UPPER ) THEN
+                     CSUMJ = ZDOTU( J-1, AP( IP-J+1 ), 1, X, 1 )
+                  ELSE IF( J.LT.N ) THEN
+                     CSUMJ = ZDOTU( N-J, AP( IP+1 ), 1, X( J+1 ), 1 )
+                  END IF
+               ELSE
+*
+*                 Otherwise, use in-line code for the dot product.
+*
+                  IF( UPPER ) THEN
+                     DO 130 I = 1, J - 1
+                        CSUMJ = CSUMJ + ( AP( IP-J+I )*USCAL )*X( I )
+  130                CONTINUE
+                  ELSE IF( J.LT.N ) THEN
+                     DO 140 I = 1, N - J
+                        CSUMJ = CSUMJ + ( AP( IP+I )*USCAL )*X( J+I )
+  140                CONTINUE
+                  END IF
+               END IF
+*
+               IF( USCAL.EQ.DCMPLX( TSCAL ) ) THEN
+*
+*                 Compute x(j) := ( x(j) - CSUMJ ) / A(j,j) if 1/A(j,j)
+*                 was not used to scale the dotproduct.
+*
+                  X( J ) = X( J ) - CSUMJ
+                  XJ = CABS1( X( J ) )
+                  IF( NOUNIT ) THEN
+*
+*                    Compute x(j) = x(j) / A(j,j), scaling if necessary.
+*
+                     TJJS = AP( IP )*TSCAL
+                  ELSE
+                     TJJS = TSCAL
+                     IF( TSCAL.EQ.ONE )
+     $                  GO TO 160
+                  END IF
+                  TJJ = CABS1( TJJS )
+                  IF( TJJ.GT.SMLNUM ) THEN
+*
+*                       abs(A(j,j)) > SMLNUM:
+*
+                     IF( TJJ.LT.ONE ) THEN
+                        IF( XJ.GT.TJJ*BIGNUM ) THEN
+*
+*                             Scale X by 1/abs(x(j)).
+*
+                           REC = ONE / XJ
+                           CALL ZDSCAL( N, REC, X, 1 )
+                           SCALE = SCALE*REC
+                           XMAX = XMAX*REC
+                        END IF
+                     END IF
+                     X( J ) = ZLADIV( X( J ), TJJS )
+                  ELSE IF( TJJ.GT.ZERO ) THEN
+*
+*                       0 < abs(A(j,j)) <= SMLNUM:
+*
+                     IF( XJ.GT.TJJ*BIGNUM ) THEN
+*
+*                          Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM.
+*
+                        REC = ( TJJ*BIGNUM ) / XJ
+                        CALL ZDSCAL( N, REC, X, 1 )
+                        SCALE = SCALE*REC
+                        XMAX = XMAX*REC
+                     END IF
+                     X( J ) = ZLADIV( X( J ), TJJS )
+                  ELSE
+*
+*                       A(j,j) = 0:  Set x(1:n) = 0, x(j) = 1, and
+*                       scale = 0 and compute a solution to A**T *x = 0.
+*
+                     DO 150 I = 1, N
+                        X( I ) = ZERO
+  150                CONTINUE
+                     X( J ) = ONE
+                     SCALE = ZERO
+                     XMAX = ZERO
+                  END IF
+  160             CONTINUE
+               ELSE
+*
+*                 Compute x(j) := x(j) / A(j,j) - CSUMJ if the dot
+*                 product has already been divided by 1/A(j,j).
+*
+                  X( J ) = ZLADIV( X( J ), TJJS ) - CSUMJ
+               END IF
+               XMAX = MAX( XMAX, CABS1( X( J ) ) )
+               JLEN = JLEN + 1
+               IP = IP + JINC*JLEN
+  170       CONTINUE
+*
+         ELSE
+*
+*           Solve A**H * x = b
+*
+            IP = JFIRST*( JFIRST+1 ) / 2
+            JLEN = 1
+            DO 220 J = JFIRST, JLAST, JINC
+*
+*              Compute x(j) = b(j) - sum A(k,j)*x(k).
+*                                    k<>j
+*
+               XJ = CABS1( X( J ) )
+               USCAL = TSCAL
+               REC = ONE / MAX( XMAX, ONE )
+               IF( CNORM( J ).GT.( BIGNUM-XJ )*REC ) THEN
+*
+*                 If x(j) could overflow, scale x by 1/(2*XMAX).
+*
+                  REC = REC*HALF
+                  IF( NOUNIT ) THEN
+                     TJJS = DCONJG( AP( IP ) )*TSCAL
+                  ELSE
+                     TJJS = TSCAL
+                  END IF
+                  TJJ = CABS1( TJJS )
+                  IF( TJJ.GT.ONE ) THEN
+*
+*                       Divide by A(j,j) when scaling x if A(j,j) > 1.
+*
+                     REC = MIN( ONE, REC*TJJ )
+                     USCAL = ZLADIV( USCAL, TJJS )
+                  END IF
+                  IF( REC.LT.ONE ) THEN
+                     CALL ZDSCAL( N, REC, X, 1 )
+                     SCALE = SCALE*REC
+                     XMAX = XMAX*REC
+                  END IF
+               END IF
+*
+               CSUMJ = ZERO
+               IF( USCAL.EQ.DCMPLX( ONE ) ) THEN
+*
+*                 If the scaling needed for A in the dot product is 1,
+*                 call ZDOTC to perform the dot product.
+*
+                  IF( UPPER ) THEN
+                     CSUMJ = ZDOTC( J-1, AP( IP-J+1 ), 1, X, 1 )
+                  ELSE IF( J.LT.N ) THEN
+                     CSUMJ = ZDOTC( N-J, AP( IP+1 ), 1, X( J+1 ), 1 )
+                  END IF
+               ELSE
+*
+*                 Otherwise, use in-line code for the dot product.
+*
+                  IF( UPPER ) THEN
+                     DO 180 I = 1, J - 1
+                        CSUMJ = CSUMJ + ( DCONJG( AP( IP-J+I ) )*USCAL )
+     $                          *X( I )
+  180                CONTINUE
+                  ELSE IF( J.LT.N ) THEN
+                     DO 190 I = 1, N - J
+                        CSUMJ = CSUMJ + ( DCONJG( AP( IP+I ) )*USCAL )*
+     $                          X( J+I )
+  190                CONTINUE
+                  END IF
+               END IF
+*
+               IF( USCAL.EQ.DCMPLX( TSCAL ) ) THEN
+*
+*                 Compute x(j) := ( x(j) - CSUMJ ) / A(j,j) if 1/A(j,j)
+*                 was not used to scale the dotproduct.
+*
+                  X( J ) = X( J ) - CSUMJ
+                  XJ = CABS1( X( J ) )
+                  IF( NOUNIT ) THEN
+*
+*                    Compute x(j) = x(j) / A(j,j), scaling if necessary.
+*
+                     TJJS = DCONJG( AP( IP ) )*TSCAL
+                  ELSE
+                     TJJS = TSCAL
+                     IF( TSCAL.EQ.ONE )
+     $                  GO TO 210
+                  END IF
+                  TJJ = CABS1( TJJS )
+                  IF( TJJ.GT.SMLNUM ) THEN
+*
+*                       abs(A(j,j)) > SMLNUM:
+*
+                     IF( TJJ.LT.ONE ) THEN
+                        IF( XJ.GT.TJJ*BIGNUM ) THEN
+*
+*                             Scale X by 1/abs(x(j)).
+*
+                           REC = ONE / XJ
+                           CALL ZDSCAL( N, REC, X, 1 )
+                           SCALE = SCALE*REC
+                           XMAX = XMAX*REC
+                        END IF
+                     END IF
+                     X( J ) = ZLADIV( X( J ), TJJS )
+                  ELSE IF( TJJ.GT.ZERO ) THEN
+*
+*                       0 < abs(A(j,j)) <= SMLNUM:
+*
+                     IF( XJ.GT.TJJ*BIGNUM ) THEN
+*
+*                          Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM.
+*
+                        REC = ( TJJ*BIGNUM ) / XJ
+                        CALL ZDSCAL( N, REC, X, 1 )
+                        SCALE = SCALE*REC
+                        XMAX = XMAX*REC
+                     END IF
+                     X( J ) = ZLADIV( X( J ), TJJS )
+                  ELSE
+*
+*                       A(j,j) = 0:  Set x(1:n) = 0, x(j) = 1, and
+*                       scale = 0 and compute a solution to A**H *x = 0.
+*
+                     DO 200 I = 1, N
+                        X( I ) = ZERO
+  200                CONTINUE
+                     X( J ) = ONE
+                     SCALE = ZERO
+                     XMAX = ZERO
+                  END IF
+  210             CONTINUE
+               ELSE
+*
+*                 Compute x(j) := x(j) / A(j,j) - CSUMJ if the dot
+*                 product has already been divided by 1/A(j,j).
+*
+                  X( J ) = ZLADIV( X( J ), TJJS ) - CSUMJ
+               END IF
+               XMAX = MAX( XMAX, CABS1( X( J ) ) )
+               JLEN = JLEN + 1
+               IP = IP + JINC*JLEN
+  220       CONTINUE
+         END IF
+         SCALE = SCALE / TSCAL
+      END IF
+*
+*     Scale the column norms by 1/TSCAL for return.
+*
+      IF( TSCAL.NE.ONE ) THEN
+         CALL DSCAL( N, ONE / TSCAL, CNORM, 1 )
+      END IF
+*
+      RETURN
+*
+*     End of ZLATPS
+*
+      END
diff --git a/costa/native/external/lapack/zlatrd.f b/costa/native/external/lapack/zlatrd.f
new file mode 100644
index 000000000..8b0369fcc
--- /dev/null
+++ b/costa/native/external/lapack/zlatrd.f
@@ -0,0 +1,280 @@
+      SUBROUTINE ZLATRD( UPLO, N, NB, A, LDA, E, TAU, W, LDW )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            LDA, LDW, N, NB
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   E( * )
+      COMPLEX*16         A( LDA, * ), TAU( * ), W( LDW, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZLATRD reduces NB rows and columns of a complex Hermitian matrix A to
+*  Hermitian tridiagonal form by a unitary similarity
+*  transformation Q' * A * Q, and returns the matrices V and W which are
+*  needed to apply the transformation to the unreduced part of A.
+*
+*  If UPLO = 'U', ZLATRD reduces the last NB rows and columns of a
+*  matrix, of which the upper triangle is supplied;
+*  if UPLO = 'L', ZLATRD reduces the first NB rows and columns of a
+*  matrix, of which the lower triangle is supplied.
+*
+*  This is an auxiliary routine called by ZHETRD.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER
+*          Specifies whether the upper or lower triangular part of the
+*          Hermitian matrix A is stored:
+*          = 'U': Upper triangular
+*          = 'L': Lower triangular
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.
+*
+*  NB      (input) INTEGER
+*          The number of rows and columns to be reduced.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+*          On entry, the Hermitian matrix A.  If UPLO = 'U', the leading
+*          n-by-n upper triangular part of A contains the upper
+*          triangular part of the matrix A, and the strictly lower
+*          triangular part of A is not referenced.  If UPLO = 'L', the
+*          leading n-by-n lower triangular part of A contains the lower
+*          triangular part of the matrix A, and the strictly upper
+*          triangular part of A is not referenced.
+*          On exit:
+*          if UPLO = 'U', the last NB columns have been reduced to
+*            tridiagonal form, with the diagonal elements overwriting
+*            the diagonal elements of A; the elements above the diagonal
+*            with the array TAU, represent the unitary matrix Q as a
+*            product of elementary reflectors;
+*          if UPLO = 'L', the first NB columns have been reduced to
+*            tridiagonal form, with the diagonal elements overwriting
+*            the diagonal elements of A; the elements below the diagonal
+*            with the array TAU, represent the  unitary matrix Q as a
+*            product of elementary reflectors.
+*          See Further Details.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  E       (output) DOUBLE PRECISION array, dimension (N-1)
+*          If UPLO = 'U', E(n-nb:n-1) contains the superdiagonal
+*          elements of the last NB columns of the reduced matrix;
+*          if UPLO = 'L', E(1:nb) contains the subdiagonal elements of
+*          the first NB columns of the reduced matrix.
+*
+*  TAU     (output) COMPLEX*16 array, dimension (N-1)
+*          The scalar factors of the elementary reflectors, stored in
+*          TAU(n-nb:n-1) if UPLO = 'U', and in TAU(1:nb) if UPLO = 'L'.
+*          See Further Details.
+*
+*  W       (output) COMPLEX*16 array, dimension (LDW,NB)
+*          The n-by-nb matrix W required to update the unreduced part
+*          of A.
+*
+*  LDW     (input) INTEGER
+*          The leading dimension of the array W. LDW >= max(1,N).
+*
+*  Further Details
+*  ===============
+*
+*  If UPLO = 'U', the matrix Q is represented as a product of elementary
+*  reflectors
+*
+*     Q = H(n) H(n-1) . . . H(n-nb+1).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - tau * v * v'
+*
+*  where tau is a complex scalar, and v is a complex vector with
+*  v(i:n) = 0 and v(i-1) = 1; v(1:i-1) is stored on exit in A(1:i-1,i),
+*  and tau in TAU(i-1).
+*
+*  If UPLO = 'L', the matrix Q is represented as a product of elementary
+*  reflectors
+*
+*     Q = H(1) H(2) . . . H(nb).
+*
+*  Each H(i) has the form
+*
+*     H(i) = I - tau * v * v'
+*
+*  where tau is a complex scalar, and v is a complex vector with
+*  v(1:i) = 0 and v(i+1) = 1; v(i+1:n) is stored on exit in A(i+1:n,i),
+*  and tau in TAU(i).
+*
+*  The elements of the vectors v together form the n-by-nb matrix V
+*  which is needed, with W, to apply the transformation to the unreduced
+*  part of the matrix, using a Hermitian rank-2k update of the form:
+*  A := A - V*W' - W*V'.
+*
+*  The contents of A on exit are illustrated by the following examples
+*  with n = 5 and nb = 2:
+*
+*  if UPLO = 'U':                       if UPLO = 'L':
+*
+*    (  a   a   a   v4  v5 )              (  d                  )
+*    (      a   a   v4  v5 )              (  1   d              )
+*    (          a   1   v5 )              (  v1  1   a          )
+*    (              d   1  )              (  v1  v2  a   a      )
+*    (                  d  )              (  v1  v2  a   a   a  )
+*
+*  where d denotes a diagonal element of the reduced matrix, a denotes
+*  an element of the original matrix that is unchanged, and vi denotes
+*  an element of the vector defining H(i).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX*16         ZERO, ONE, HALF
+      PARAMETER          ( ZERO = ( 0.0D+0, 0.0D+0 ),
+     $                   ONE = ( 1.0D+0, 0.0D+0 ),
+     $                   HALF = ( 0.5D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, IW
+      COMPLEX*16         ALPHA
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           ZAXPY, ZGEMV, ZHEMV, ZLACGV, ZLARFG, ZSCAL
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      COMPLEX*16         ZDOTC
+      EXTERNAL           LSAME, ZDOTC
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          DBLE, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Quick return if possible
+*
+      IF( N.LE.0 )
+     $   RETURN
+*
+      IF( LSAME( UPLO, 'U' ) ) THEN
+*
+*        Reduce last NB columns of upper triangle
+*
+         DO 10 I = N, N - NB + 1, -1
+            IW = I - N + NB
+            IF( I.LT.N ) THEN
+*
+*              Update A(1:i,i)
+*
+               A( I, I ) = DBLE( A( I, I ) )
+               CALL ZLACGV( N-I, W( I, IW+1 ), LDW )
+               CALL ZGEMV( 'No transpose', I, N-I, -ONE, A( 1, I+1 ),
+     $                     LDA, W( I, IW+1 ), LDW, ONE, A( 1, I ), 1 )
+               CALL ZLACGV( N-I, W( I, IW+1 ), LDW )
+               CALL ZLACGV( N-I, A( I, I+1 ), LDA )
+               CALL ZGEMV( 'No transpose', I, N-I, -ONE, W( 1, IW+1 ),
+     $                     LDW, A( I, I+1 ), LDA, ONE, A( 1, I ), 1 )
+               CALL ZLACGV( N-I, A( I, I+1 ), LDA )
+               A( I, I ) = DBLE( A( I, I ) )
+            END IF
+            IF( I.GT.1 ) THEN
+*
+*              Generate elementary reflector H(i) to annihilate
+*              A(1:i-2,i)
+*
+               ALPHA = A( I-1, I )
+               CALL ZLARFG( I-1, ALPHA, A( 1, I ), 1, TAU( I-1 ) )
+               E( I-1 ) = ALPHA
+               A( I-1, I ) = ONE
+*
+*              Compute W(1:i-1,i)
+*
+               CALL ZHEMV( 'Upper', I-1, ONE, A, LDA, A( 1, I ), 1,
+     $                     ZERO, W( 1, IW ), 1 )
+               IF( I.LT.N ) THEN
+                  CALL ZGEMV( 'Conjugate transpose', I-1, N-I, ONE,
+     $                        W( 1, IW+1 ), LDW, A( 1, I ), 1, ZERO,
+     $                        W( I+1, IW ), 1 )
+                  CALL ZGEMV( 'No transpose', I-1, N-I, -ONE,
+     $                        A( 1, I+1 ), LDA, W( I+1, IW ), 1, ONE,
+     $                        W( 1, IW ), 1 )
+                  CALL ZGEMV( 'Conjugate transpose', I-1, N-I, ONE,
+     $                        A( 1, I+1 ), LDA, A( 1, I ), 1, ZERO,
+     $                        W( I+1, IW ), 1 )
+                  CALL ZGEMV( 'No transpose', I-1, N-I, -ONE,
+     $                        W( 1, IW+1 ), LDW, W( I+1, IW ), 1, ONE,
+     $                        W( 1, IW ), 1 )
+               END IF
+               CALL ZSCAL( I-1, TAU( I-1 ), W( 1, IW ), 1 )
+               ALPHA = -HALF*TAU( I-1 )*ZDOTC( I-1, W( 1, IW ), 1,
+     $                 A( 1, I ), 1 )
+               CALL ZAXPY( I-1, ALPHA, A( 1, I ), 1, W( 1, IW ), 1 )
+            END IF
+*
+   10    CONTINUE
+      ELSE
+*
+*        Reduce first NB columns of lower triangle
+*
+         DO 20 I = 1, NB
+*
+*           Update A(i:n,i)
+*
+            A( I, I ) = DBLE( A( I, I ) )
+            CALL ZLACGV( I-1, W( I, 1 ), LDW )
+            CALL ZGEMV( 'No transpose', N-I+1, I-1, -ONE, A( I, 1 ),
+     $                  LDA, W( I, 1 ), LDW, ONE, A( I, I ), 1 )
+            CALL ZLACGV( I-1, W( I, 1 ), LDW )
+            CALL ZLACGV( I-1, A( I, 1 ), LDA )
+            CALL ZGEMV( 'No transpose', N-I+1, I-1, -ONE, W( I, 1 ),
+     $                  LDW, A( I, 1 ), LDA, ONE, A( I, I ), 1 )
+            CALL ZLACGV( I-1, A( I, 1 ), LDA )
+            A( I, I ) = DBLE( A( I, I ) )
+            IF( I.LT.N ) THEN
+*
+*              Generate elementary reflector H(i) to annihilate
+*              A(i+2:n,i)
+*
+               ALPHA = A( I+1, I )
+               CALL ZLARFG( N-I, ALPHA, A( MIN( I+2, N ), I ), 1,
+     $                      TAU( I ) )
+               E( I ) = ALPHA
+               A( I+1, I ) = ONE
+*
+*              Compute W(i+1:n,i)
+*
+               CALL ZHEMV( 'Lower', N-I, ONE, A( I+1, I+1 ), LDA,
+     $                     A( I+1, I ), 1, ZERO, W( I+1, I ), 1 )
+               CALL ZGEMV( 'Conjugate transpose', N-I, I-1, ONE,
+     $                     W( I+1, 1 ), LDW, A( I+1, I ), 1, ZERO,
+     $                     W( 1, I ), 1 )
+               CALL ZGEMV( 'No transpose', N-I, I-1, -ONE, A( I+1, 1 ),
+     $                     LDA, W( 1, I ), 1, ONE, W( I+1, I ), 1 )
+               CALL ZGEMV( 'Conjugate transpose', N-I, I-1, ONE,
+     $                     A( I+1, 1 ), LDA, A( I+1, I ), 1, ZERO,
+     $                     W( 1, I ), 1 )
+               CALL ZGEMV( 'No transpose', N-I, I-1, -ONE, W( I+1, 1 ),
+     $                     LDW, W( 1, I ), 1, ONE, W( I+1, I ), 1 )
+               CALL ZSCAL( N-I, TAU( I ), W( I+1, I ), 1 )
+               ALPHA = -HALF*TAU( I )*ZDOTC( N-I, W( I+1, I ), 1,
+     $                 A( I+1, I ), 1 )
+               CALL ZAXPY( N-I, ALPHA, A( I+1, I ), 1, W( I+1, I ), 1 )
+            END IF
+*
+   20    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of ZLATRD
+*
+      END
diff --git a/costa/native/external/lapack/zlatrs.f b/costa/native/external/lapack/zlatrs.f
new file mode 100644
index 000000000..4ae39729a
--- /dev/null
+++ b/costa/native/external/lapack/zlatrs.f
@@ -0,0 +1,880 @@
+      SUBROUTINE ZLATRS( UPLO, TRANS, DIAG, NORMIN, N, A, LDA, X, SCALE,
+     $                   CNORM, INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIAG, NORMIN, TRANS, UPLO
+      INTEGER            INFO, LDA, N
+      DOUBLE PRECISION   SCALE
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   CNORM( * )
+      COMPLEX*16         A( LDA, * ), X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZLATRS solves one of the triangular systems
+*
+*     A * x = s*b,  A**T * x = s*b,  or  A**H * x = s*b,
+*
+*  with scaling to prevent overflow.  Here A is an upper or lower
+*  triangular matrix, A**T denotes the transpose of A, A**H denotes the
+*  conjugate transpose of A, x and b are n-element vectors, and s is a
+*  scaling factor, usually less than or equal to 1, chosen so that the
+*  components of x will be less than the overflow threshold.  If the
+*  unscaled problem will not cause overflow, the Level 2 BLAS routine
+*  ZTRSV is called. If the matrix A is singular (A(j,j) = 0 for some j),
+*  then s is set to 0 and a non-trivial solution to A*x = 0 is returned.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the matrix A is upper or lower triangular.
+*          = 'U':  Upper triangular
+*          = 'L':  Lower triangular
+*
+*  TRANS   (input) CHARACTER*1
+*          Specifies the operation applied to A.
+*          = 'N':  Solve A * x = s*b     (No transpose)
+*          = 'T':  Solve A**T * x = s*b  (Transpose)
+*          = 'C':  Solve A**H * x = s*b  (Conjugate transpose)
+*
+*  DIAG    (input) CHARACTER*1
+*          Specifies whether or not the matrix A is unit triangular.
+*          = 'N':  Non-unit triangular
+*          = 'U':  Unit triangular
+*
+*  NORMIN  (input) CHARACTER*1
+*          Specifies whether CNORM has been set or not.
+*          = 'Y':  CNORM contains the column norms on entry
+*          = 'N':  CNORM is not set on entry.  On exit, the norms will
+*                  be computed and stored in CNORM.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input) COMPLEX*16 array, dimension (LDA,N)
+*          The triangular matrix A.  If UPLO = 'U', the leading n by n
+*          upper triangular part of the array A contains the upper
+*          triangular matrix, and the strictly lower triangular part of
+*          A is not referenced.  If UPLO = 'L', the leading n by n lower
+*          triangular part of the array A contains the lower triangular
+*          matrix, and the strictly upper triangular part of A is not
+*          referenced.  If DIAG = 'U', the diagonal elements of A are
+*          also not referenced and are assumed to be 1.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max (1,N).
+*
+*  X       (input/output) COMPLEX*16 array, dimension (N)
+*          On entry, the right hand side b of the triangular system.
+*          On exit, X is overwritten by the solution vector x.
+*
+*  SCALE   (output) DOUBLE PRECISION
+*          The scaling factor s for the triangular system
+*             A * x = s*b,  A**T * x = s*b,  or  A**H * x = s*b.
+*          If SCALE = 0, the matrix A is singular or badly scaled, and
+*          the vector x is an exact or approximate solution to A*x = 0.
+*
+*  CNORM   (input or output) DOUBLE PRECISION array, dimension (N)
+*
+*          If NORMIN = 'Y', CNORM is an input argument and CNORM(j)
+*          contains the norm of the off-diagonal part of the j-th column
+*          of A.  If TRANS = 'N', CNORM(j) must be greater than or equal
+*          to the infinity-norm, and if TRANS = 'T' or 'C', CNORM(j)
+*          must be greater than or equal to the 1-norm.
+*
+*          If NORMIN = 'N', CNORM is an output argument and CNORM(j)
+*          returns the 1-norm of the offdiagonal part of the j-th column
+*          of A.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -k, the k-th argument had an illegal value
+*
+*  Further Details
+*  ======= =======
+*
+*  A rough bound on x is computed; if that is less than overflow, ZTRSV
+*  is called, otherwise, specific code is used which checks for possible
+*  overflow or divide-by-zero at every operation.
+*
+*  A columnwise scheme is used for solving A*x = b.  The basic algorithm
+*  if A is lower triangular is
+*
+*       x[1:n] := b[1:n]
+*       for j = 1, ..., n
+*            x(j) := x(j) / A(j,j)
+*            x[j+1:n] := x[j+1:n] - x(j) * A[j+1:n,j]
+*       end
+*
+*  Define bounds on the components of x after j iterations of the loop:
+*     M(j) = bound on x[1:j]
+*     G(j) = bound on x[j+1:n]
+*  Initially, let M(0) = 0 and G(0) = max{x(i), i=1,...,n}.
+*
+*  Then for iteration j+1 we have
+*     M(j+1) <= G(j) / | A(j+1,j+1) |
+*     G(j+1) <= G(j) + M(j+1) * | A[j+2:n,j+1] |
+*            <= G(j) ( 1 + CNORM(j+1) / | A(j+1,j+1) | )
+*
+*  where CNORM(j+1) is greater than or equal to the infinity-norm of
+*  column j+1 of A, not counting the diagonal.  Hence
+*
+*     G(j) <= G(0) product ( 1 + CNORM(i) / | A(i,i) | )
+*                  1<=i<=j
+*  and
+*
+*     |x(j)| <= ( G(0) / |A(j,j)| ) product ( 1 + CNORM(i) / |A(i,i)| )
+*                                   1<=i< j
+*
+*  Since |x(j)| <= M(j), we use the Level 2 BLAS routine ZTRSV if the
+*  reciprocal of the largest M(j), j=1,..,n, is larger than
+*  max(underflow, 1/overflow).
+*
+*  The bound on x(j) is also used to determine when a step in the
+*  columnwise method can be performed without fear of overflow.  If
+*  the computed bound is greater than a large constant, x is scaled to
+*  prevent overflow, but if the bound overflows, x is set to 0, x(j) to
+*  1, and scale to 0, and a non-trivial solution to A*x = 0 is found.
+*
+*  Similarly, a row-wise scheme is used to solve A**T *x = b  or
+*  A**H *x = b.  The basic algorithm for A upper triangular is
+*
+*       for j = 1, ..., n
+*            x(j) := ( b(j) - A[1:j-1,j]' * x[1:j-1] ) / A(j,j)
+*       end
+*
+*  We simultaneously compute two bounds
+*       G(j) = bound on ( b(i) - A[1:i-1,i]' * x[1:i-1] ), 1<=i<=j
+*       M(j) = bound on x(i), 1<=i<=j
+*
+*  The initial values are G(0) = 0, M(0) = max{b(i), i=1,..,n}, and we
+*  add the constraint G(j) >= G(j-1) and M(j) >= M(j-1) for j >= 1.
+*  Then the bound on x(j) is
+*
+*       M(j) <= M(j-1) * ( 1 + CNORM(j) ) / | A(j,j) |
+*
+*            <= M(0) * product ( ( 1 + CNORM(i) ) / |A(i,i)| )
+*                      1<=i<=j
+*
+*  and we can safely call ZTRSV if 1/M(n) and 1/G(n) are both greater
+*  than max(underflow, 1/overflow).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, HALF, ONE, TWO
+      PARAMETER          ( ZERO = 0.0D+0, HALF = 0.5D+0, ONE = 1.0D+0,
+     $                   TWO = 2.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOTRAN, NOUNIT, UPPER
+      INTEGER            I, IMAX, J, JFIRST, JINC, JLAST
+      DOUBLE PRECISION   BIGNUM, GROW, REC, SMLNUM, TJJ, TMAX, TSCAL,
+     $                   XBND, XJ, XMAX
+      COMPLEX*16         CSUMJ, TJJS, USCAL, ZDUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            IDAMAX, IZAMAX
+      DOUBLE PRECISION   DLAMCH, DZASUM
+      COMPLEX*16         ZDOTC, ZDOTU, ZLADIV
+      EXTERNAL           LSAME, IDAMAX, IZAMAX, DLAMCH, DZASUM, ZDOTC,
+     $                   ZDOTU, ZLADIV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DSCAL, XERBLA, ZAXPY, ZDSCAL, ZTRSV
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, DCMPLX, DCONJG, DIMAG, MAX, MIN
+*     ..
+*     .. Statement Functions ..
+      DOUBLE PRECISION   CABS1, CABS2
+*     ..
+*     .. Statement Function definitions ..
+      CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) )
+      CABS2( ZDUM ) = ABS( DBLE( ZDUM ) / 2.D0 ) +
+     $                ABS( DIMAG( ZDUM ) / 2.D0 )
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      NOTRAN = LSAME( TRANS, 'N' )
+      NOUNIT = LSAME( DIAG, 'N' )
+*
+*     Test the input parameters.
+*
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT.
+     $         LSAME( TRANS, 'C' ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN
+         INFO = -3
+      ELSE IF( .NOT.LSAME( NORMIN, 'Y' ) .AND. .NOT.
+     $         LSAME( NORMIN, 'N' ) ) THEN
+         INFO = -4
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZLATRS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Determine machine dependent parameters to control overflow.
+*
+      SMLNUM = DLAMCH( 'Safe minimum' )
+      BIGNUM = ONE / SMLNUM
+      CALL DLABAD( SMLNUM, BIGNUM )
+      SMLNUM = SMLNUM / DLAMCH( 'Precision' )
+      BIGNUM = ONE / SMLNUM
+      SCALE = ONE
+*
+      IF( LSAME( NORMIN, 'N' ) ) THEN
+*
+*        Compute the 1-norm of each column, not including the diagonal.
+*
+         IF( UPPER ) THEN
+*
+*           A is upper triangular.
+*
+            DO 10 J = 1, N
+               CNORM( J ) = DZASUM( J-1, A( 1, J ), 1 )
+   10       CONTINUE
+         ELSE
+*
+*           A is lower triangular.
+*
+            DO 20 J = 1, N - 1
+               CNORM( J ) = DZASUM( N-J, A( J+1, J ), 1 )
+   20       CONTINUE
+            CNORM( N ) = ZERO
+         END IF
+      END IF
+*
+*     Scale the column norms by TSCAL if the maximum element in CNORM is
+*     greater than BIGNUM/2.
+*
+      IMAX = IDAMAX( N, CNORM, 1 )
+      TMAX = CNORM( IMAX )
+      IF( TMAX.LE.BIGNUM*HALF ) THEN
+         TSCAL = ONE
+      ELSE
+         TSCAL = HALF / ( SMLNUM*TMAX )
+         CALL DSCAL( N, TSCAL, CNORM, 1 )
+      END IF
+*
+*     Compute a bound on the computed solution vector to see if the
+*     Level 2 BLAS routine ZTRSV can be used.
+*
+      XMAX = ZERO
+      DO 30 J = 1, N
+         XMAX = MAX( XMAX, CABS2( X( J ) ) )
+   30 CONTINUE
+      XBND = XMAX
+*
+      IF( NOTRAN ) THEN
+*
+*        Compute the growth in A * x = b.
+*
+         IF( UPPER ) THEN
+            JFIRST = N
+            JLAST = 1
+            JINC = -1
+         ELSE
+            JFIRST = 1
+            JLAST = N
+            JINC = 1
+         END IF
+*
+         IF( TSCAL.NE.ONE ) THEN
+            GROW = ZERO
+            GO TO 60
+         END IF
+*
+         IF( NOUNIT ) THEN
+*
+*           A is non-unit triangular.
+*
+*           Compute GROW = 1/G(j) and XBND = 1/M(j).
+*           Initially, G(0) = max{x(i), i=1,...,n}.
+*
+            GROW = HALF / MAX( XBND, SMLNUM )
+            XBND = GROW
+            DO 40 J = JFIRST, JLAST, JINC
+*
+*              Exit the loop if the growth factor is too small.
+*
+               IF( GROW.LE.SMLNUM )
+     $            GO TO 60
+*
+               TJJS = A( J, J )
+               TJJ = CABS1( TJJS )
+*
+               IF( TJJ.GE.SMLNUM ) THEN
+*
+*                 M(j) = G(j-1) / abs(A(j,j))
+*
+                  XBND = MIN( XBND, MIN( ONE, TJJ )*GROW )
+               ELSE
+*
+*                 M(j) could overflow, set XBND to 0.
+*
+                  XBND = ZERO
+               END IF
+*
+               IF( TJJ+CNORM( J ).GE.SMLNUM ) THEN
+*
+*                 G(j) = G(j-1)*( 1 + CNORM(j) / abs(A(j,j)) )
+*
+                  GROW = GROW*( TJJ / ( TJJ+CNORM( J ) ) )
+               ELSE
+*
+*                 G(j) could overflow, set GROW to 0.
+*
+                  GROW = ZERO
+               END IF
+   40       CONTINUE
+            GROW = XBND
+         ELSE
+*
+*           A is unit triangular.
+*
+*           Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}.
+*
+            GROW = MIN( ONE, HALF / MAX( XBND, SMLNUM ) )
+            DO 50 J = JFIRST, JLAST, JINC
+*
+*              Exit the loop if the growth factor is too small.
+*
+               IF( GROW.LE.SMLNUM )
+     $            GO TO 60
+*
+*              G(j) = G(j-1)*( 1 + CNORM(j) )
+*
+               GROW = GROW*( ONE / ( ONE+CNORM( J ) ) )
+   50       CONTINUE
+         END IF
+   60    CONTINUE
+*
+      ELSE
+*
+*        Compute the growth in A**T * x = b  or  A**H * x = b.
+*
+         IF( UPPER ) THEN
+            JFIRST = 1
+            JLAST = N
+            JINC = 1
+         ELSE
+            JFIRST = N
+            JLAST = 1
+            JINC = -1
+         END IF
+*
+         IF( TSCAL.NE.ONE ) THEN
+            GROW = ZERO
+            GO TO 90
+         END IF
+*
+         IF( NOUNIT ) THEN
+*
+*           A is non-unit triangular.
+*
+*           Compute GROW = 1/G(j) and XBND = 1/M(j).
+*           Initially, M(0) = max{x(i), i=1,...,n}.
+*
+            GROW = HALF / MAX( XBND, SMLNUM )
+            XBND = GROW
+            DO 70 J = JFIRST, JLAST, JINC
+*
+*              Exit the loop if the growth factor is too small.
+*
+               IF( GROW.LE.SMLNUM )
+     $            GO TO 90
+*
+*              G(j) = max( G(j-1), M(j-1)*( 1 + CNORM(j) ) )
+*
+               XJ = ONE + CNORM( J )
+               GROW = MIN( GROW, XBND / XJ )
+*
+               TJJS = A( J, J )
+               TJJ = CABS1( TJJS )
+*
+               IF( TJJ.GE.SMLNUM ) THEN
+*
+*                 M(j) = M(j-1)*( 1 + CNORM(j) ) / abs(A(j,j))
+*
+                  IF( XJ.GT.TJJ )
+     $               XBND = XBND*( TJJ / XJ )
+               ELSE
+*
+*                 M(j) could overflow, set XBND to 0.
+*
+                  XBND = ZERO
+               END IF
+   70       CONTINUE
+            GROW = MIN( GROW, XBND )
+         ELSE
+*
+*           A is unit triangular.
+*
+*           Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}.
+*
+            GROW = MIN( ONE, HALF / MAX( XBND, SMLNUM ) )
+            DO 80 J = JFIRST, JLAST, JINC
+*
+*              Exit the loop if the growth factor is too small.
+*
+               IF( GROW.LE.SMLNUM )
+     $            GO TO 90
+*
+*              G(j) = ( 1 + CNORM(j) )*G(j-1)
+*
+               XJ = ONE + CNORM( J )
+               GROW = GROW / XJ
+   80       CONTINUE
+         END IF
+   90    CONTINUE
+      END IF
+*
+      IF( ( GROW*TSCAL ).GT.SMLNUM ) THEN
+*
+*        Use the Level 2 BLAS solve if the reciprocal of the bound on
+*        elements of X is not too small.
+*
+         CALL ZTRSV( UPLO, TRANS, DIAG, N, A, LDA, X, 1 )
+      ELSE
+*
+*        Use a Level 1 BLAS solve, scaling intermediate results.
+*
+         IF( XMAX.GT.BIGNUM*HALF ) THEN
+*
+*           Scale X so that its components are less than or equal to
+*           BIGNUM in absolute value.
+*
+            SCALE = ( BIGNUM*HALF ) / XMAX
+            CALL ZDSCAL( N, SCALE, X, 1 )
+            XMAX = BIGNUM
+         ELSE
+            XMAX = XMAX*TWO
+         END IF
+*
+         IF( NOTRAN ) THEN
+*
+*           Solve A * x = b
+*
+            DO 120 J = JFIRST, JLAST, JINC
+*
+*              Compute x(j) = b(j) / A(j,j), scaling x if necessary.
+*
+               XJ = CABS1( X( J ) )
+               IF( NOUNIT ) THEN
+                  TJJS = A( J, J )*TSCAL
+               ELSE
+                  TJJS = TSCAL
+                  IF( TSCAL.EQ.ONE )
+     $               GO TO 110
+               END IF
+               TJJ = CABS1( TJJS )
+               IF( TJJ.GT.SMLNUM ) THEN
+*
+*                    abs(A(j,j)) > SMLNUM:
+*
+                  IF( TJJ.LT.ONE ) THEN
+                     IF( XJ.GT.TJJ*BIGNUM ) THEN
+*
+*                          Scale x by 1/b(j).
+*
+                        REC = ONE / XJ
+                        CALL ZDSCAL( N, REC, X, 1 )
+                        SCALE = SCALE*REC
+                        XMAX = XMAX*REC
+                     END IF
+                  END IF
+                  X( J ) = ZLADIV( X( J ), TJJS )
+                  XJ = CABS1( X( J ) )
+               ELSE IF( TJJ.GT.ZERO ) THEN
+*
+*                    0 < abs(A(j,j)) <= SMLNUM:
+*
+                  IF( XJ.GT.TJJ*BIGNUM ) THEN
+*
+*                       Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM
+*                       to avoid overflow when dividing by A(j,j).
+*
+                     REC = ( TJJ*BIGNUM ) / XJ
+                     IF( CNORM( J ).GT.ONE ) THEN
+*
+*                          Scale by 1/CNORM(j) to avoid overflow when
+*                          multiplying x(j) times column j.
+*
+                        REC = REC / CNORM( J )
+                     END IF
+                     CALL ZDSCAL( N, REC, X, 1 )
+                     SCALE = SCALE*REC
+                     XMAX = XMAX*REC
+                  END IF
+                  X( J ) = ZLADIV( X( J ), TJJS )
+                  XJ = CABS1( X( J ) )
+               ELSE
+*
+*                    A(j,j) = 0:  Set x(1:n) = 0, x(j) = 1, and
+*                    scale = 0, and compute a solution to A*x = 0.
+*
+                  DO 100 I = 1, N
+                     X( I ) = ZERO
+  100             CONTINUE
+                  X( J ) = ONE
+                  XJ = ONE
+                  SCALE = ZERO
+                  XMAX = ZERO
+               END IF
+  110          CONTINUE
+*
+*              Scale x if necessary to avoid overflow when adding a
+*              multiple of column j of A.
+*
+               IF( XJ.GT.ONE ) THEN
+                  REC = ONE / XJ
+                  IF( CNORM( J ).GT.( BIGNUM-XMAX )*REC ) THEN
+*
+*                    Scale x by 1/(2*abs(x(j))).
+*
+                     REC = REC*HALF
+                     CALL ZDSCAL( N, REC, X, 1 )
+                     SCALE = SCALE*REC
+                  END IF
+               ELSE IF( XJ*CNORM( J ).GT.( BIGNUM-XMAX ) ) THEN
+*
+*                 Scale x by 1/2.
+*
+                  CALL ZDSCAL( N, HALF, X, 1 )
+                  SCALE = SCALE*HALF
+               END IF
+*
+               IF( UPPER ) THEN
+                  IF( J.GT.1 ) THEN
+*
+*                    Compute the update
+*                       x(1:j-1) := x(1:j-1) - x(j) * A(1:j-1,j)
+*
+                     CALL ZAXPY( J-1, -X( J )*TSCAL, A( 1, J ), 1, X,
+     $                           1 )
+                     I = IZAMAX( J-1, X, 1 )
+                     XMAX = CABS1( X( I ) )
+                  END IF
+               ELSE
+                  IF( J.LT.N ) THEN
+*
+*                    Compute the update
+*                       x(j+1:n) := x(j+1:n) - x(j) * A(j+1:n,j)
+*
+                     CALL ZAXPY( N-J, -X( J )*TSCAL, A( J+1, J ), 1,
+     $                           X( J+1 ), 1 )
+                     I = J + IZAMAX( N-J, X( J+1 ), 1 )
+                     XMAX = CABS1( X( I ) )
+                  END IF
+               END IF
+  120       CONTINUE
+*
+         ELSE IF( LSAME( TRANS, 'T' ) ) THEN
+*
+*           Solve A**T * x = b
+*
+            DO 170 J = JFIRST, JLAST, JINC
+*
+*              Compute x(j) = b(j) - sum A(k,j)*x(k).
+*                                    k<>j
+*
+               XJ = CABS1( X( J ) )
+               USCAL = TSCAL
+               REC = ONE / MAX( XMAX, ONE )
+               IF( CNORM( J ).GT.( BIGNUM-XJ )*REC ) THEN
+*
+*                 If x(j) could overflow, scale x by 1/(2*XMAX).
+*
+                  REC = REC*HALF
+                  IF( NOUNIT ) THEN
+                     TJJS = A( J, J )*TSCAL
+                  ELSE
+                     TJJS = TSCAL
+                  END IF
+                  TJJ = CABS1( TJJS )
+                  IF( TJJ.GT.ONE ) THEN
+*
+*                       Divide by A(j,j) when scaling x if A(j,j) > 1.
+*
+                     REC = MIN( ONE, REC*TJJ )
+                     USCAL = ZLADIV( USCAL, TJJS )
+                  END IF
+                  IF( REC.LT.ONE ) THEN
+                     CALL ZDSCAL( N, REC, X, 1 )
+                     SCALE = SCALE*REC
+                     XMAX = XMAX*REC
+                  END IF
+               END IF
+*
+               CSUMJ = ZERO
+               IF( USCAL.EQ.DCMPLX( ONE ) ) THEN
+*
+*                 If the scaling needed for A in the dot product is 1,
+*                 call ZDOTU to perform the dot product.
+*
+                  IF( UPPER ) THEN
+                     CSUMJ = ZDOTU( J-1, A( 1, J ), 1, X, 1 )
+                  ELSE IF( J.LT.N ) THEN
+                     CSUMJ = ZDOTU( N-J, A( J+1, J ), 1, X( J+1 ), 1 )
+                  END IF
+               ELSE
+*
+*                 Otherwise, use in-line code for the dot product.
+*
+                  IF( UPPER ) THEN
+                     DO 130 I = 1, J - 1
+                        CSUMJ = CSUMJ + ( A( I, J )*USCAL )*X( I )
+  130                CONTINUE
+                  ELSE IF( J.LT.N ) THEN
+                     DO 140 I = J + 1, N
+                        CSUMJ = CSUMJ + ( A( I, J )*USCAL )*X( I )
+  140                CONTINUE
+                  END IF
+               END IF
+*
+               IF( USCAL.EQ.DCMPLX( TSCAL ) ) THEN
+*
+*                 Compute x(j) := ( x(j) - CSUMJ ) / A(j,j) if 1/A(j,j)
+*                 was not used to scale the dotproduct.
+*
+                  X( J ) = X( J ) - CSUMJ
+                  XJ = CABS1( X( J ) )
+                  IF( NOUNIT ) THEN
+                     TJJS = A( J, J )*TSCAL
+                  ELSE
+                     TJJS = TSCAL
+                     IF( TSCAL.EQ.ONE )
+     $                  GO TO 160
+                  END IF
+*
+*                    Compute x(j) = x(j) / A(j,j), scaling if necessary.
+*
+                  TJJ = CABS1( TJJS )
+                  IF( TJJ.GT.SMLNUM ) THEN
+*
+*                       abs(A(j,j)) > SMLNUM:
+*
+                     IF( TJJ.LT.ONE ) THEN
+                        IF( XJ.GT.TJJ*BIGNUM ) THEN
+*
+*                             Scale X by 1/abs(x(j)).
+*
+                           REC = ONE / XJ
+                           CALL ZDSCAL( N, REC, X, 1 )
+                           SCALE = SCALE*REC
+                           XMAX = XMAX*REC
+                        END IF
+                     END IF
+                     X( J ) = ZLADIV( X( J ), TJJS )
+                  ELSE IF( TJJ.GT.ZERO ) THEN
+*
+*                       0 < abs(A(j,j)) <= SMLNUM:
+*
+                     IF( XJ.GT.TJJ*BIGNUM ) THEN
+*
+*                          Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM.
+*
+                        REC = ( TJJ*BIGNUM ) / XJ
+                        CALL ZDSCAL( N, REC, X, 1 )
+                        SCALE = SCALE*REC
+                        XMAX = XMAX*REC
+                     END IF
+                     X( J ) = ZLADIV( X( J ), TJJS )
+                  ELSE
+*
+*                       A(j,j) = 0:  Set x(1:n) = 0, x(j) = 1, and
+*                       scale = 0 and compute a solution to A**T *x = 0.
+*
+                     DO 150 I = 1, N
+                        X( I ) = ZERO
+  150                CONTINUE
+                     X( J ) = ONE
+                     SCALE = ZERO
+                     XMAX = ZERO
+                  END IF
+  160             CONTINUE
+               ELSE
+*
+*                 Compute x(j) := x(j) / A(j,j) - CSUMJ if the dot
+*                 product has already been divided by 1/A(j,j).
+*
+                  X( J ) = ZLADIV( X( J ), TJJS ) - CSUMJ
+               END IF
+               XMAX = MAX( XMAX, CABS1( X( J ) ) )
+  170       CONTINUE
+*
+         ELSE
+*
+*           Solve A**H * x = b
+*
+            DO 220 J = JFIRST, JLAST, JINC
+*
+*              Compute x(j) = b(j) - sum A(k,j)*x(k).
+*                                    k<>j
+*
+               XJ = CABS1( X( J ) )
+               USCAL = TSCAL
+               REC = ONE / MAX( XMAX, ONE )
+               IF( CNORM( J ).GT.( BIGNUM-XJ )*REC ) THEN
+*
+*                 If x(j) could overflow, scale x by 1/(2*XMAX).
+*
+                  REC = REC*HALF
+                  IF( NOUNIT ) THEN
+                     TJJS = DCONJG( A( J, J ) )*TSCAL
+                  ELSE
+                     TJJS = TSCAL
+                  END IF
+                  TJJ = CABS1( TJJS )
+                  IF( TJJ.GT.ONE ) THEN
+*
+*                       Divide by A(j,j) when scaling x if A(j,j) > 1.
+*
+                     REC = MIN( ONE, REC*TJJ )
+                     USCAL = ZLADIV( USCAL, TJJS )
+                  END IF
+                  IF( REC.LT.ONE ) THEN
+                     CALL ZDSCAL( N, REC, X, 1 )
+                     SCALE = SCALE*REC
+                     XMAX = XMAX*REC
+                  END IF
+               END IF
+*
+               CSUMJ = ZERO
+               IF( USCAL.EQ.DCMPLX( ONE ) ) THEN
+*
+*                 If the scaling needed for A in the dot product is 1,
+*                 call ZDOTC to perform the dot product.
+*
+                  IF( UPPER ) THEN
+                     CSUMJ = ZDOTC( J-1, A( 1, J ), 1, X, 1 )
+                  ELSE IF( J.LT.N ) THEN
+                     CSUMJ = ZDOTC( N-J, A( J+1, J ), 1, X( J+1 ), 1 )
+                  END IF
+               ELSE
+*
+*                 Otherwise, use in-line code for the dot product.
+*
+                  IF( UPPER ) THEN
+                     DO 180 I = 1, J - 1
+                        CSUMJ = CSUMJ + ( DCONJG( A( I, J ) )*USCAL )*
+     $                          X( I )
+  180                CONTINUE
+                  ELSE IF( J.LT.N ) THEN
+                     DO 190 I = J + 1, N
+                        CSUMJ = CSUMJ + ( DCONJG( A( I, J ) )*USCAL )*
+     $                          X( I )
+  190                CONTINUE
+                  END IF
+               END IF
+*
+               IF( USCAL.EQ.DCMPLX( TSCAL ) ) THEN
+*
+*                 Compute x(j) := ( x(j) - CSUMJ ) / A(j,j) if 1/A(j,j)
+*                 was not used to scale the dotproduct.
+*
+                  X( J ) = X( J ) - CSUMJ
+                  XJ = CABS1( X( J ) )
+                  IF( NOUNIT ) THEN
+                     TJJS = DCONJG( A( J, J ) )*TSCAL
+                  ELSE
+                     TJJS = TSCAL
+                     IF( TSCAL.EQ.ONE )
+     $                  GO TO 210
+                  END IF
+*
+*                    Compute x(j) = x(j) / A(j,j), scaling if necessary.
+*
+                  TJJ = CABS1( TJJS )
+                  IF( TJJ.GT.SMLNUM ) THEN
+*
+*                       abs(A(j,j)) > SMLNUM:
+*
+                     IF( TJJ.LT.ONE ) THEN
+                        IF( XJ.GT.TJJ*BIGNUM ) THEN
+*
+*                             Scale X by 1/abs(x(j)).
+*
+                           REC = ONE / XJ
+                           CALL ZDSCAL( N, REC, X, 1 )
+                           SCALE = SCALE*REC
+                           XMAX = XMAX*REC
+                        END IF
+                     END IF
+                     X( J ) = ZLADIV( X( J ), TJJS )
+                  ELSE IF( TJJ.GT.ZERO ) THEN
+*
+*                       0 < abs(A(j,j)) <= SMLNUM:
+*
+                     IF( XJ.GT.TJJ*BIGNUM ) THEN
+*
+*                          Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM.
+*
+                        REC = ( TJJ*BIGNUM ) / XJ
+                        CALL ZDSCAL( N, REC, X, 1 )
+                        SCALE = SCALE*REC
+                        XMAX = XMAX*REC
+                     END IF
+                     X( J ) = ZLADIV( X( J ), TJJS )
+                  ELSE
+*
+*                       A(j,j) = 0:  Set x(1:n) = 0, x(j) = 1, and
+*                       scale = 0 and compute a solution to A**H *x = 0.
+*
+                     DO 200 I = 1, N
+                        X( I ) = ZERO
+  200                CONTINUE
+                     X( J ) = ONE
+                     SCALE = ZERO
+                     XMAX = ZERO
+                  END IF
+  210             CONTINUE
+               ELSE
+*
+*                 Compute x(j) := x(j) / A(j,j) - CSUMJ if the dot
+*                 product has already been divided by 1/A(j,j).
+*
+                  X( J ) = ZLADIV( X( J ), TJJS ) - CSUMJ
+               END IF
+               XMAX = MAX( XMAX, CABS1( X( J ) ) )
+  220       CONTINUE
+         END IF
+         SCALE = SCALE / TSCAL
+      END IF
+*
+*     Scale the column norms by 1/TSCAL for return.
+*
+      IF( TSCAL.NE.ONE ) THEN
+         CALL DSCAL( N, ONE / TSCAL, CNORM, 1 )
+      END IF
+*
+      RETURN
+*
+*     End of ZLATRS
+*
+      END
diff --git a/costa/native/external/lapack/zlatrz.f b/costa/native/external/lapack/zlatrz.f
new file mode 100644
index 000000000..5e82f3d20
--- /dev/null
+++ b/costa/native/external/lapack/zlatrz.f
@@ -0,0 +1,134 @@
+      SUBROUTINE ZLATRZ( M, N, L, A, LDA, TAU, WORK )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            L, LDA, M, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZLATRZ factors the M-by-(M+L) complex upper trapezoidal matrix
+*  [ A1 A2 ] = [ A(1:M,1:M) A(1:M,N-L+1:N) ] as ( R  0 ) * Z by means
+*  of unitary transformations, where  Z is an (M+L)-by-(M+L) unitary
+*  matrix and, R and A1 are M-by-M upper triangular matrices.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  L       (input) INTEGER
+*          The number of columns of the matrix A containing the
+*          meaningful part of the Householder vectors. N-M >= L >= 0.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+*          On entry, the leading M-by-N upper trapezoidal part of the
+*          array A must contain the matrix to be factorized.
+*          On exit, the leading M-by-M upper triangular part of A
+*          contains the upper triangular matrix R, and elements N-L+1 to
+*          N of the first M rows of A, with the array TAU, represent the
+*          unitary matrix Z as a product of M elementary reflectors.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  TAU     (output) COMPLEX*16 array, dimension (M)
+*          The scalar factors of the elementary reflectors.
+*
+*  WORK    (workspace) COMPLEX*16 array, dimension (M)
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*    A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA
+*
+*  The factorization is obtained by Householder's method.  The kth
+*  transformation matrix, Z( k ), which is used to introduce zeros into
+*  the ( m - k + 1 )th row of A, is given in the form
+*
+*     Z( k ) = ( I     0   ),
+*              ( 0  T( k ) )
+*
+*  where
+*
+*     T( k ) = I - tau*u( k )*u( k )',   u( k ) = (   1    ),
+*                                                 (   0    )
+*                                                 ( z( k ) )
+*
+*  tau is a scalar and z( k ) is an l element vector. tau and z( k )
+*  are chosen to annihilate the elements of the kth row of A2.
+*
+*  The scalar tau is returned in the kth element of TAU and the vector
+*  u( k ) in the kth row of A2, such that the elements of z( k ) are
+*  in  a( k, l + 1 ), ..., a( k, n ). The elements of R are returned in
+*  the upper triangular part of A1.
+*
+*  Z is given by
+*
+*     Z =  Z( 1 ) * Z( 2 ) * ... * Z( m ).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX*16         ZERO
+      PARAMETER          ( ZERO = ( 0.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I
+      COMPLEX*16         ALPHA
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           ZLACGV, ZLARFG, ZLARZ
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          DCONJG
+*     ..
+*     .. Executable Statements ..
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 ) THEN
+         RETURN
+      ELSE IF( M.EQ.N ) THEN
+         DO 10 I = 1, N
+            TAU( I ) = ZERO
+   10    CONTINUE
+         RETURN
+      END IF
+*
+      DO 20 I = M, 1, -1
+*
+*        Generate elementary reflector H(i) to annihilate
+*        [ A(i,i) A(i,n-l+1:n) ]
+*
+         CALL ZLACGV( L, A( I, N-L+1 ), LDA )
+         ALPHA = DCONJG( A( I, I ) )
+         CALL ZLARFG( L+1, ALPHA, A( I, N-L+1 ), LDA, TAU( I ) )
+         TAU( I ) = DCONJG( TAU( I ) )
+*
+*        Apply H(i) to A(1:i-1,i:n) from the right
+*
+         CALL ZLARZ( 'Right', I-1, N-I+1, L, A( I, N-L+1 ), LDA,
+     $               DCONJG( TAU( I ) ), A( 1, I ), LDA, WORK )
+         A( I, I ) = DCONJG( ALPHA )
+*
+   20 CONTINUE
+*
+      RETURN
+*
+*     End of ZLATRZ
+*
+      END
diff --git a/costa/native/external/lapack/zlatzm.f b/costa/native/external/lapack/zlatzm.f
new file mode 100644
index 000000000..e81223f45
--- /dev/null
+++ b/costa/native/external/lapack/zlatzm.f
@@ -0,0 +1,147 @@
+      SUBROUTINE ZLATZM( SIDE, M, N, V, INCV, TAU, C1, C2, LDC, WORK )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          SIDE
+      INTEGER            INCV, LDC, M, N
+      COMPLEX*16         TAU
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         C1( LDC, * ), C2( LDC, * ), V( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  This routine is deprecated and has been replaced by routine ZUNMRZ.
+*
+*  ZLATZM applies a Householder matrix generated by ZTZRQF to a matrix.
+*
+*  Let P = I - tau*u*u',   u = ( 1 ),
+*                              ( v )
+*  where v is an (m-1) vector if SIDE = 'L', or a (n-1) vector if
+*  SIDE = 'R'.
+*
+*  If SIDE equals 'L', let
+*         C = [ C1 ] 1
+*             [ C2 ] m-1
+*               n
+*  Then C is overwritten by P*C.
+*
+*  If SIDE equals 'R', let
+*         C = [ C1, C2 ] m
+*                1  n-1
+*  Then C is overwritten by C*P.
+*
+*  Arguments
+*  =========
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'L': form P * C
+*          = 'R': form C * P
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix C.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix C.
+*
+*  V       (input) COMPLEX*16 array, dimension
+*                  (1 + (M-1)*abs(INCV)) if SIDE = 'L'
+*                  (1 + (N-1)*abs(INCV)) if SIDE = 'R'
+*          The vector v in the representation of P. V is not used
+*          if TAU = 0.
+*
+*  INCV    (input) INTEGER
+*          The increment between elements of v. INCV <> 0
+*
+*  TAU     (input) COMPLEX*16
+*          The value tau in the representation of P.
+*
+*  C1      (input/output) COMPLEX*16 array, dimension
+*                         (LDC,N) if SIDE = 'L'
+*                         (M,1)   if SIDE = 'R'
+*          On entry, the n-vector C1 if SIDE = 'L', or the m-vector C1
+*          if SIDE = 'R'.
+*
+*          On exit, the first row of P*C if SIDE = 'L', or the first
+*          column of C*P if SIDE = 'R'.
+*
+*  C2      (input/output) COMPLEX*16 array, dimension
+*                         (LDC, N)   if SIDE = 'L'
+*                         (LDC, N-1) if SIDE = 'R'
+*          On entry, the (m - 1) x n matrix C2 if SIDE = 'L', or the
+*          m x (n - 1) matrix C2 if SIDE = 'R'.
+*
+*          On exit, rows 2:m of P*C if SIDE = 'L', or columns 2:m of C*P
+*          if SIDE = 'R'.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the arrays C1 and C2.
+*          LDC >= max(1,M).
+*
+*  WORK    (workspace) COMPLEX*16 array, dimension
+*                      (N) if SIDE = 'L'
+*                      (M) if SIDE = 'R'
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX*16         ONE, ZERO
+      PARAMETER          ( ONE = ( 1.0D+0, 0.0D+0 ),
+     $                   ZERO = ( 0.0D+0, 0.0D+0 ) )
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           ZAXPY, ZCOPY, ZGEMV, ZGERC, ZGERU, ZLACGV
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MIN
+*     ..
+*     .. Executable Statements ..
+*
+      IF( ( MIN( M, N ).EQ.0 ) .OR. ( TAU.EQ.ZERO ) )
+     $   RETURN
+*
+      IF( LSAME( SIDE, 'L' ) ) THEN
+*
+*        w :=  conjg( C1 + v' * C2 )
+*
+         CALL ZCOPY( N, C1, LDC, WORK, 1 )
+         CALL ZLACGV( N, WORK, 1 )
+         CALL ZGEMV( 'Conjugate transpose', M-1, N, ONE, C2, LDC, V,
+     $               INCV, ONE, WORK, 1 )
+*
+*        [ C1 ] := [ C1 ] - tau* [ 1 ] * w'
+*        [ C2 ]    [ C2 ]        [ v ]
+*
+         CALL ZLACGV( N, WORK, 1 )
+         CALL ZAXPY( N, -TAU, WORK, 1, C1, LDC )
+         CALL ZGERU( M-1, N, -TAU, V, INCV, WORK, 1, C2, LDC )
+*
+      ELSE IF( LSAME( SIDE, 'R' ) ) THEN
+*
+*        w := C1 + C2 * v
+*
+         CALL ZCOPY( M, C1, 1, WORK, 1 )
+         CALL ZGEMV( 'No transpose', M, N-1, ONE, C2, LDC, V, INCV, ONE,
+     $               WORK, 1 )
+*
+*        [ C1, C2 ] := [ C1, C2 ] - tau* w * [ 1 , v']
+*
+         CALL ZAXPY( M, -TAU, WORK, 1, C1, 1 )
+         CALL ZGERC( M, N-1, -TAU, WORK, 1, V, INCV, C2, LDC )
+      END IF
+*
+      RETURN
+*
+*     End of ZLATZM
+*
+      END
diff --git a/costa/native/external/lapack/zlauu2.f b/costa/native/external/lapack/zlauu2.f
new file mode 100644
index 000000000..99502aac6
--- /dev/null
+++ b/costa/native/external/lapack/zlauu2.f
@@ -0,0 +1,144 @@
+      SUBROUTINE ZLAUU2( UPLO, N, A, LDA, INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZLAUU2 computes the product U * U' or L' * L, where the triangular
+*  factor U or L is stored in the upper or lower triangular part of
+*  the array A.
+*
+*  If UPLO = 'U' or 'u' then the upper triangle of the result is stored,
+*  overwriting the factor U in A.
+*  If UPLO = 'L' or 'l' then the lower triangle of the result is stored,
+*  overwriting the factor L in A.
+*
+*  This is the unblocked form of the algorithm, calling Level 2 BLAS.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the triangular factor stored in the array A
+*          is upper or lower triangular:
+*          = 'U':  Upper triangular
+*          = 'L':  Lower triangular
+*
+*  N       (input) INTEGER
+*          The order of the triangular factor U or L.  N >= 0.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+*          On entry, the triangular factor U or L.
+*          On exit, if UPLO = 'U', the upper triangle of A is
+*          overwritten with the upper triangle of the product U * U';
+*          if UPLO = 'L', the lower triangle of A is overwritten with
+*          the lower triangle of the product L' * L.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -k, the k-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX*16         ONE
+      PARAMETER          ( ONE = ( 1.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            I
+      DOUBLE PRECISION   AII
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      COMPLEX*16         ZDOTC
+      EXTERNAL           LSAME, ZDOTC
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZDSCAL, ZGEMV, ZLACGV
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          DBLE, DCMPLX, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZLAUU2', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      IF( UPPER ) THEN
+*
+*        Compute the product U * U'.
+*
+         DO 10 I = 1, N
+            AII = A( I, I )
+            IF( I.LT.N ) THEN
+               A( I, I ) = AII*AII + DBLE( ZDOTC( N-I, A( I, I+1 ), LDA,
+     $                     A( I, I+1 ), LDA ) )
+               CALL ZLACGV( N-I, A( I, I+1 ), LDA )
+               CALL ZGEMV( 'No transpose', I-1, N-I, ONE, A( 1, I+1 ),
+     $                     LDA, A( I, I+1 ), LDA, DCMPLX( AII ),
+     $                     A( 1, I ), 1 )
+               CALL ZLACGV( N-I, A( I, I+1 ), LDA )
+            ELSE
+               CALL ZDSCAL( I, AII, A( 1, I ), 1 )
+            END IF
+   10    CONTINUE
+*
+      ELSE
+*
+*        Compute the product L' * L.
+*
+         DO 20 I = 1, N
+            AII = A( I, I )
+            IF( I.LT.N ) THEN
+               A( I, I ) = AII*AII + DBLE( ZDOTC( N-I, A( I+1, I ), 1,
+     $                     A( I+1, I ), 1 ) )
+               CALL ZLACGV( I-1, A( I, 1 ), LDA )
+               CALL ZGEMV( 'Conjugate transpose', N-I, I-1, ONE,
+     $                     A( I+1, 1 ), LDA, A( I+1, I ), 1,
+     $                     DCMPLX( AII ), A( I, 1 ), LDA )
+               CALL ZLACGV( I-1, A( I, 1 ), LDA )
+            ELSE
+               CALL ZDSCAL( I, AII, A( I, 1 ), LDA )
+            END IF
+   20    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of ZLAUU2
+*
+      END
diff --git a/costa/native/external/lapack/zlauum.f b/costa/native/external/lapack/zlauum.f
new file mode 100644
index 000000000..07fbf5e87
--- /dev/null
+++ b/costa/native/external/lapack/zlauum.f
@@ -0,0 +1,161 @@
+      SUBROUTINE ZLAUUM( UPLO, N, A, LDA, INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZLAUUM computes the product U * U' or L' * L, where the triangular
+*  factor U or L is stored in the upper or lower triangular part of
+*  the array A.
+*
+*  If UPLO = 'U' or 'u' then the upper triangle of the result is stored,
+*  overwriting the factor U in A.
+*  If UPLO = 'L' or 'l' then the lower triangle of the result is stored,
+*  overwriting the factor L in A.
+*
+*  This is the blocked form of the algorithm, calling Level 3 BLAS.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the triangular factor stored in the array A
+*          is upper or lower triangular:
+*          = 'U':  Upper triangular
+*          = 'L':  Lower triangular
+*
+*  N       (input) INTEGER
+*          The order of the triangular factor U or L.  N >= 0.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+*          On entry, the triangular factor U or L.
+*          On exit, if UPLO = 'U', the upper triangle of A is
+*          overwritten with the upper triangle of the product U * U';
+*          if UPLO = 'L', the lower triangle of A is overwritten with
+*          the lower triangle of the product L' * L.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -k, the k-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE
+      PARAMETER          ( ONE = 1.0D+0 )
+      COMPLEX*16         CONE
+      PARAMETER          ( CONE = ( 1.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            I, IB, NB
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           LSAME, ILAENV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZGEMM, ZHERK, ZLAUU2, ZTRMM
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZLAUUM', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Determine the block size for this environment.
+*
+      NB = ILAENV( 1, 'ZLAUUM', UPLO, N, -1, -1, -1 )
+*
+      IF( NB.LE.1 .OR. NB.GE.N ) THEN
+*
+*        Use unblocked code
+*
+         CALL ZLAUU2( UPLO, N, A, LDA, INFO )
+      ELSE
+*
+*        Use blocked code
+*
+         IF( UPPER ) THEN
+*
+*           Compute the product U * U'.
+*
+            DO 10 I = 1, N, NB
+               IB = MIN( NB, N-I+1 )
+               CALL ZTRMM( 'Right', 'Upper', 'Conjugate transpose',
+     $                     'Non-unit', I-1, IB, CONE, A( I, I ), LDA,
+     $                     A( 1, I ), LDA )
+               CALL ZLAUU2( 'Upper', IB, A( I, I ), LDA, INFO )
+               IF( I+IB.LE.N ) THEN
+                  CALL ZGEMM( 'No transpose', 'Conjugate transpose',
+     $                        I-1, IB, N-I-IB+1, CONE, A( 1, I+IB ),
+     $                        LDA, A( I, I+IB ), LDA, CONE, A( 1, I ),
+     $                        LDA )
+                  CALL ZHERK( 'Upper', 'No transpose', IB, N-I-IB+1,
+     $                        ONE, A( I, I+IB ), LDA, ONE, A( I, I ),
+     $                        LDA )
+               END IF
+   10       CONTINUE
+         ELSE
+*
+*           Compute the product L' * L.
+*
+            DO 20 I = 1, N, NB
+               IB = MIN( NB, N-I+1 )
+               CALL ZTRMM( 'Left', 'Lower', 'Conjugate transpose',
+     $                     'Non-unit', IB, I-1, CONE, A( I, I ), LDA,
+     $                     A( I, 1 ), LDA )
+               CALL ZLAUU2( 'Lower', IB, A( I, I ), LDA, INFO )
+               IF( I+IB.LE.N ) THEN
+                  CALL ZGEMM( 'Conjugate transpose', 'No transpose', IB,
+     $                        I-1, N-I-IB+1, CONE, A( I+IB, I ), LDA,
+     $                        A( I+IB, 1 ), LDA, CONE, A( I, 1 ), LDA )
+                  CALL ZHERK( 'Lower', 'Conjugate transpose', IB,
+     $                        N-I-IB+1, ONE, A( I+IB, I ), LDA, ONE,
+     $                        A( I, I ), LDA )
+               END IF
+   20       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of ZLAUUM
+*
+      END
diff --git a/costa/native/external/lapack/zpbcon.f b/costa/native/external/lapack/zpbcon.f
new file mode 100644
index 000000000..4c5f15cf8
--- /dev/null
+++ b/costa/native/external/lapack/zpbcon.f
@@ -0,0 +1,194 @@
+      SUBROUTINE ZPBCON( UPLO, N, KD, AB, LDAB, ANORM, RCOND, WORK,
+     $                   RWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, KD, LDAB, N
+      DOUBLE PRECISION   ANORM, RCOND
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   RWORK( * )
+      COMPLEX*16         AB( LDAB, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZPBCON estimates the reciprocal of the condition number (in the
+*  1-norm) of a complex Hermitian positive definite band matrix using
+*  the Cholesky factorization A = U**H*U or A = L*L**H computed by
+*  ZPBTRF.
+*
+*  An estimate is obtained for norm(inv(A)), and the reciprocal of the
+*  condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))).
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangular factor stored in AB;
+*          = 'L':  Lower triangular factor stored in AB.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  KD      (input) INTEGER
+*          The number of superdiagonals of the matrix A if UPLO = 'U',
+*          or the number of sub-diagonals if UPLO = 'L'.  KD >= 0.
+*
+*  AB      (input) COMPLEX*16 array, dimension (LDAB,N)
+*          The triangular factor U or L from the Cholesky factorization
+*          A = U**H*U or A = L*L**H of the band matrix A, stored in the
+*          first KD+1 rows of the array.  The j-th column of U or L is
+*          stored in the j-th column of the array AB as follows:
+*          if UPLO ='U', AB(kd+1+i-j,j) = U(i,j) for max(1,j-kd)<=i<=j;
+*          if UPLO ='L', AB(1+i-j,j)    = L(i,j) for j<=i<=min(n,j+kd).
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KD+1.
+*
+*  ANORM   (input) DOUBLE PRECISION
+*          The 1-norm (or infinity-norm) of the Hermitian band matrix A.
+*
+*  RCOND   (output) DOUBLE PRECISION
+*          The reciprocal of the condition number of the matrix A,
+*          computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an
+*          estimate of the 1-norm of inv(A) computed in this routine.
+*
+*  WORK    (workspace) COMPLEX*16 array, dimension (2*N)
+*
+*  RWORK   (workspace) DOUBLE PRECISION array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      CHARACTER          NORMIN
+      INTEGER            IX, KASE
+      DOUBLE PRECISION   AINVNM, SCALE, SCALEL, SCALEU, SMLNUM
+      COMPLEX*16         ZDUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            IZAMAX
+      DOUBLE PRECISION   DLAMCH
+      EXTERNAL           LSAME, IZAMAX, DLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZDRSCL, ZLACON, ZLATBS
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, DIMAG
+*     ..
+*     .. Statement Functions ..
+      DOUBLE PRECISION   CABS1
+*     ..
+*     .. Statement Function definitions ..
+      CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( KD.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDAB.LT.KD+1 ) THEN
+         INFO = -5
+      ELSE IF( ANORM.LT.ZERO ) THEN
+         INFO = -6
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZPBCON', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      RCOND = ZERO
+      IF( N.EQ.0 ) THEN
+         RCOND = ONE
+         RETURN
+      ELSE IF( ANORM.EQ.ZERO ) THEN
+         RETURN
+      END IF
+*
+      SMLNUM = DLAMCH( 'Safe minimum' )
+*
+*     Estimate the 1-norm of the inverse.
+*
+      KASE = 0
+      NORMIN = 'N'
+   10 CONTINUE
+      CALL ZLACON( N, WORK( N+1 ), WORK, AINVNM, KASE )
+      IF( KASE.NE.0 ) THEN
+         IF( UPPER ) THEN
+*
+*           Multiply by inv(U').
+*
+            CALL ZLATBS( 'Upper', 'Conjugate transpose', 'Non-unit',
+     $                   NORMIN, N, KD, AB, LDAB, WORK, SCALEL, RWORK,
+     $                   INFO )
+            NORMIN = 'Y'
+*
+*           Multiply by inv(U).
+*
+            CALL ZLATBS( 'Upper', 'No transpose', 'Non-unit', NORMIN, N,
+     $                   KD, AB, LDAB, WORK, SCALEU, RWORK, INFO )
+         ELSE
+*
+*           Multiply by inv(L).
+*
+            CALL ZLATBS( 'Lower', 'No transpose', 'Non-unit', NORMIN, N,
+     $                   KD, AB, LDAB, WORK, SCALEL, RWORK, INFO )
+            NORMIN = 'Y'
+*
+*           Multiply by inv(L').
+*
+            CALL ZLATBS( 'Lower', 'Conjugate transpose', 'Non-unit',
+     $                   NORMIN, N, KD, AB, LDAB, WORK, SCALEU, RWORK,
+     $                   INFO )
+         END IF
+*
+*        Multiply by 1/SCALE if doing so will not cause overflow.
+*
+         SCALE = SCALEL*SCALEU
+         IF( SCALE.NE.ONE ) THEN
+            IX = IZAMAX( N, WORK, 1 )
+            IF( SCALE.LT.CABS1( WORK( IX ) )*SMLNUM .OR. SCALE.EQ.ZERO )
+     $         GO TO 20
+            CALL ZDRSCL( N, SCALE, WORK, 1 )
+         END IF
+         GO TO 10
+      END IF
+*
+*     Compute the estimate of the reciprocal condition number.
+*
+      IF( AINVNM.NE.ZERO )
+     $   RCOND = ( ONE / AINVNM ) / ANORM
+*
+   20 CONTINUE
+*
+      RETURN
+*
+*     End of ZPBCON
+*
+      END
diff --git a/costa/native/external/lapack/zpbequ.f b/costa/native/external/lapack/zpbequ.f
new file mode 100644
index 000000000..9e1bcb1c6
--- /dev/null
+++ b/costa/native/external/lapack/zpbequ.f
@@ -0,0 +1,168 @@
+      SUBROUTINE ZPBEQU( UPLO, N, KD, AB, LDAB, S, SCOND, AMAX, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, KD, LDAB, N
+      DOUBLE PRECISION   AMAX, SCOND
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   S( * )
+      COMPLEX*16         AB( LDAB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZPBEQU computes row and column scalings intended to equilibrate a
+*  Hermitian positive definite band matrix A and reduce its condition
+*  number (with respect to the two-norm).  S contains the scale factors,
+*  S(i) = 1/sqrt(A(i,i)), chosen so that the scaled matrix B with
+*  elements B(i,j) = S(i)*A(i,j)*S(j) has ones on the diagonal.  This
+*  choice of S puts the condition number of B within a factor N of the
+*  smallest possible condition number over all possible diagonal
+*  scalings.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangular of A is stored;
+*          = 'L':  Lower triangular of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  KD      (input) INTEGER
+*          The number of superdiagonals of the matrix A if UPLO = 'U',
+*          or the number of subdiagonals if UPLO = 'L'.  KD >= 0.
+*
+*  AB      (input) COMPLEX*16 array, dimension (LDAB,N)
+*          The upper or lower triangle of the Hermitian band matrix A,
+*          stored in the first KD+1 rows of the array.  The j-th column
+*          of A is stored in the j-th column of the array AB as follows:
+*          if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j;
+*          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+kd).
+*
+*  LDAB     (input) INTEGER
+*          The leading dimension of the array A.  LDAB >= KD+1.
+*
+*  S       (output) DOUBLE PRECISION array, dimension (N)
+*          If INFO = 0, S contains the scale factors for A.
+*
+*  SCOND   (output) DOUBLE PRECISION
+*          If INFO = 0, S contains the ratio of the smallest S(i) to
+*          the largest S(i).  If SCOND >= 0.1 and AMAX is neither too
+*          large nor too small, it is not worth scaling by S.
+*
+*  AMAX    (output) DOUBLE PRECISION
+*          Absolute value of largest matrix element.  If AMAX is very
+*          close to overflow or very close to underflow, the matrix
+*          should be scaled.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          > 0:  if INFO = i, the i-th diagonal element is nonpositive.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            I, J
+      DOUBLE PRECISION   SMIN
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          DBLE, MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( KD.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDAB.LT.KD+1 ) THEN
+         INFO = -5
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZPBEQU', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 ) THEN
+         SCOND = ONE
+         AMAX = ZERO
+         RETURN
+      END IF
+*
+      IF( UPPER ) THEN
+         J = KD + 1
+      ELSE
+         J = 1
+      END IF
+*
+*     Initialize SMIN and AMAX.
+*
+      S( 1 ) = DBLE( AB( J, 1 ) )
+      SMIN = S( 1 )
+      AMAX = S( 1 )
+*
+*     Find the minimum and maximum diagonal elements.
+*
+      DO 10 I = 2, N
+         S( I ) = DBLE( AB( J, I ) )
+         SMIN = MIN( SMIN, S( I ) )
+         AMAX = MAX( AMAX, S( I ) )
+   10 CONTINUE
+*
+      IF( SMIN.LE.ZERO ) THEN
+*
+*        Find the first non-positive diagonal element and return.
+*
+         DO 20 I = 1, N
+            IF( S( I ).LE.ZERO ) THEN
+               INFO = I
+               RETURN
+            END IF
+   20    CONTINUE
+      ELSE
+*
+*        Set the scale factors to the reciprocals
+*        of the diagonal elements.
+*
+         DO 30 I = 1, N
+            S( I ) = ONE / SQRT( S( I ) )
+   30    CONTINUE
+*
+*        Compute SCOND = min(S(I)) / max(S(I))
+*
+         SCOND = SQRT( SMIN ) / SQRT( AMAX )
+      END IF
+      RETURN
+*
+*     End of ZPBEQU
+*
+      END
diff --git a/costa/native/external/lapack/zpbrfs.f b/costa/native/external/lapack/zpbrfs.f
new file mode 100644
index 000000000..43a37d26b
--- /dev/null
+++ b/costa/native/external/lapack/zpbrfs.f
@@ -0,0 +1,342 @@
+      SUBROUTINE ZPBRFS( UPLO, N, KD, NRHS, AB, LDAB, AFB, LDAFB, B,
+     $                   LDB, X, LDX, FERR, BERR, WORK, RWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, KD, LDAB, LDAFB, LDB, LDX, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   BERR( * ), FERR( * ), RWORK( * )
+      COMPLEX*16         AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ),
+     $                   WORK( * ), X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZPBRFS improves the computed solution to a system of linear
+*  equations when the coefficient matrix is Hermitian positive definite
+*  and banded, and provides error bounds and backward error estimates
+*  for the solution.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  KD      (input) INTEGER
+*          The number of superdiagonals of the matrix A if UPLO = 'U',
+*          or the number of subdiagonals if UPLO = 'L'.  KD >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrices B and X.  NRHS >= 0.
+*
+*  AB      (input) DOUBLE PRECISION array, dimension (LDAB,N)
+*          The upper or lower triangle of the Hermitian band matrix A,
+*          stored in the first KD+1 rows of the array.  The j-th column
+*          of A is stored in the j-th column of the array AB as follows:
+*          if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j;
+*          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+kd).
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KD+1.
+*
+*  AFB     (input) COMPLEX*16 array, dimension (LDAFB,N)
+*          The triangular factor U or L from the Cholesky factorization
+*          A = U**H*U or A = L*L**H of the band matrix A as computed by
+*          ZPBTRF, in the same storage format as A (see AB).
+*
+*  LDAFB   (input) INTEGER
+*          The leading dimension of the array AFB.  LDAFB >= KD+1.
+*
+*  B       (input) COMPLEX*16 array, dimension (LDB,NRHS)
+*          The right hand side matrix B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  X       (input/output) COMPLEX*16 array, dimension (LDX,NRHS)
+*          On entry, the solution matrix X, as computed by ZPBTRS.
+*          On exit, the improved solution matrix X.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(1,N).
+*
+*  FERR    (output) DOUBLE PRECISION array, dimension (NRHS)
+*          The estimated forward error bound for each solution vector
+*          X(j) (the j-th column of the solution matrix X).
+*          If XTRUE is the true solution corresponding to X(j), FERR(j)
+*          is an estimated upper bound for the magnitude of the largest
+*          element in (X(j) - XTRUE) divided by the magnitude of the
+*          largest element in X(j).  The estimate is as reliable as
+*          the estimate for RCOND, and is almost always a slight
+*          overestimate of the true error.
+*
+*  BERR    (output) DOUBLE PRECISION array, dimension (NRHS)
+*          The componentwise relative backward error of each solution
+*          vector X(j) (i.e., the smallest relative change in
+*          any element of A or B that makes X(j) an exact solution).
+*
+*  WORK    (workspace) COMPLEX*16 array, dimension (2*N)
+*
+*  RWORK   (workspace) DOUBLE PRECISION array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Internal Parameters
+*  ===================
+*
+*  ITMAX is the maximum number of steps of iterative refinement.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            ITMAX
+      PARAMETER          ( ITMAX = 5 )
+      DOUBLE PRECISION   ZERO
+      PARAMETER          ( ZERO = 0.0D+0 )
+      COMPLEX*16         ONE
+      PARAMETER          ( ONE = ( 1.0D+0, 0.0D+0 ) )
+      DOUBLE PRECISION   TWO
+      PARAMETER          ( TWO = 2.0D+0 )
+      DOUBLE PRECISION   THREE
+      PARAMETER          ( THREE = 3.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            COUNT, I, J, K, KASE, L, NZ
+      DOUBLE PRECISION   EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK
+      COMPLEX*16         ZDUM
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZAXPY, ZCOPY, ZHBMV, ZLACON, ZPBTRS
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, DIMAG, MAX, MIN
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DLAMCH
+      EXTERNAL           LSAME, DLAMCH
+*     ..
+*     .. Statement Functions ..
+      DOUBLE PRECISION   CABS1
+*     ..
+*     .. Statement Function definitions ..
+      CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( KD.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDAB.LT.KD+1 ) THEN
+         INFO = -6
+      ELSE IF( LDAFB.LT.KD+1 ) THEN
+         INFO = -8
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -10
+      ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+         INFO = -12
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZPBRFS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN
+         DO 10 J = 1, NRHS
+            FERR( J ) = ZERO
+            BERR( J ) = ZERO
+   10    CONTINUE
+         RETURN
+      END IF
+*
+*     NZ = maximum number of nonzero elements in each row of A, plus 1
+*
+      NZ = MIN( N+1, 2*KD+2 )
+      EPS = DLAMCH( 'Epsilon' )
+      SAFMIN = DLAMCH( 'Safe minimum' )
+      SAFE1 = NZ*SAFMIN
+      SAFE2 = SAFE1 / EPS
+*
+*     Do for each right hand side
+*
+      DO 140 J = 1, NRHS
+*
+         COUNT = 1
+         LSTRES = THREE
+   20    CONTINUE
+*
+*        Loop until stopping criterion is satisfied.
+*
+*        Compute residual R = B - A * X
+*
+         CALL ZCOPY( N, B( 1, J ), 1, WORK, 1 )
+         CALL ZHBMV( UPLO, N, KD, -ONE, AB, LDAB, X( 1, J ), 1, ONE,
+     $               WORK, 1 )
+*
+*        Compute componentwise relative backward error from formula
+*
+*        max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) )
+*
+*        where abs(Z) is the componentwise absolute value of the matrix
+*        or vector Z.  If the i-th component of the denominator is less
+*        than SAFE2, then SAFE1 is added to the i-th components of the
+*        numerator and denominator before dividing.
+*
+         DO 30 I = 1, N
+            RWORK( I ) = CABS1( B( I, J ) )
+   30    CONTINUE
+*
+*        Compute abs(A)*abs(X) + abs(B).
+*
+         IF( UPPER ) THEN
+            DO 50 K = 1, N
+               S = ZERO
+               XK = CABS1( X( K, J ) )
+               L = KD + 1 - K
+               DO 40 I = MAX( 1, K-KD ), K - 1
+                  RWORK( I ) = RWORK( I ) + CABS1( AB( L+I, K ) )*XK
+                  S = S + CABS1( AB( L+I, K ) )*CABS1( X( I, J ) )
+   40          CONTINUE
+               RWORK( K ) = RWORK( K ) + ABS( DBLE( AB( KD+1, K ) ) )*
+     $                      XK + S
+   50       CONTINUE
+         ELSE
+            DO 70 K = 1, N
+               S = ZERO
+               XK = CABS1( X( K, J ) )
+               RWORK( K ) = RWORK( K ) + ABS( DBLE( AB( 1, K ) ) )*XK
+               L = 1 - K
+               DO 60 I = K + 1, MIN( N, K+KD )
+                  RWORK( I ) = RWORK( I ) + CABS1( AB( L+I, K ) )*XK
+                  S = S + CABS1( AB( L+I, K ) )*CABS1( X( I, J ) )
+   60          CONTINUE
+               RWORK( K ) = RWORK( K ) + S
+   70       CONTINUE
+         END IF
+         S = ZERO
+         DO 80 I = 1, N
+            IF( RWORK( I ).GT.SAFE2 ) THEN
+               S = MAX( S, CABS1( WORK( I ) ) / RWORK( I ) )
+            ELSE
+               S = MAX( S, ( CABS1( WORK( I ) )+SAFE1 ) /
+     $             ( RWORK( I )+SAFE1 ) )
+            END IF
+   80    CONTINUE
+         BERR( J ) = S
+*
+*        Test stopping criterion. Continue iterating if
+*           1) The residual BERR(J) is larger than machine epsilon, and
+*           2) BERR(J) decreased by at least a factor of 2 during the
+*              last iteration, and
+*           3) At most ITMAX iterations tried.
+*
+         IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND.
+     $       COUNT.LE.ITMAX ) THEN
+*
+*           Update solution and try again.
+*
+            CALL ZPBTRS( UPLO, N, KD, 1, AFB, LDAFB, WORK, N, INFO )
+            CALL ZAXPY( N, ONE, WORK, 1, X( 1, J ), 1 )
+            LSTRES = BERR( J )
+            COUNT = COUNT + 1
+            GO TO 20
+         END IF
+*
+*        Bound error from formula
+*
+*        norm(X - XTRUE) / norm(X) .le. FERR =
+*        norm( abs(inv(A))*
+*           ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X)
+*
+*        where
+*          norm(Z) is the magnitude of the largest component of Z
+*          inv(A) is the inverse of A
+*          abs(Z) is the componentwise absolute value of the matrix or
+*             vector Z
+*          NZ is the maximum number of nonzeros in any row of A, plus 1
+*          EPS is machine epsilon
+*
+*        The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B))
+*        is incremented by SAFE1 if the i-th component of
+*        abs(A)*abs(X) + abs(B) is less than SAFE2.
+*
+*        Use ZLACON to estimate the infinity-norm of the matrix
+*           inv(A) * diag(W),
+*        where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) )))
+*
+         DO 90 I = 1, N
+            IF( RWORK( I ).GT.SAFE2 ) THEN
+               RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I )
+            ELSE
+               RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) +
+     $                      SAFE1
+            END IF
+   90    CONTINUE
+*
+         KASE = 0
+  100    CONTINUE
+         CALL ZLACON( N, WORK( N+1 ), WORK, FERR( J ), KASE )
+         IF( KASE.NE.0 ) THEN
+            IF( KASE.EQ.1 ) THEN
+*
+*              Multiply by diag(W)*inv(A').
+*
+               CALL ZPBTRS( UPLO, N, KD, 1, AFB, LDAFB, WORK, N, INFO )
+               DO 110 I = 1, N
+                  WORK( I ) = RWORK( I )*WORK( I )
+  110          CONTINUE
+            ELSE IF( KASE.EQ.2 ) THEN
+*
+*              Multiply by inv(A)*diag(W).
+*
+               DO 120 I = 1, N
+                  WORK( I ) = RWORK( I )*WORK( I )
+  120          CONTINUE
+               CALL ZPBTRS( UPLO, N, KD, 1, AFB, LDAFB, WORK, N, INFO )
+            END IF
+            GO TO 100
+         END IF
+*
+*        Normalize error.
+*
+         LSTRES = ZERO
+         DO 130 I = 1, N
+            LSTRES = MAX( LSTRES, CABS1( X( I, J ) ) )
+  130    CONTINUE
+         IF( LSTRES.NE.ZERO )
+     $      FERR( J ) = FERR( J ) / LSTRES
+*
+  140 CONTINUE
+*
+      RETURN
+*
+*     End of ZPBRFS
+*
+      END
diff --git a/costa/native/external/lapack/zpbstf.f b/costa/native/external/lapack/zpbstf.f
new file mode 100644
index 000000000..11d004e16
--- /dev/null
+++ b/costa/native/external/lapack/zpbstf.f
@@ -0,0 +1,264 @@
+      SUBROUTINE ZPBSTF( UPLO, N, KD, AB, LDAB, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, KD, LDAB, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         AB( LDAB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZPBSTF computes a split Cholesky factorization of a complex
+*  Hermitian positive definite band matrix A.
+*
+*  This routine is designed to be used in conjunction with ZHBGST.
+*
+*  The factorization has the form  A = S**H*S  where S is a band matrix
+*  of the same bandwidth as A and the following structure:
+*
+*    S = ( U    )
+*        ( M  L )
+*
+*  where U is upper triangular of order m = (n+kd)/2, and L is lower
+*  triangular of order n-m.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  KD      (input) INTEGER
+*          The number of superdiagonals of the matrix A if UPLO = 'U',
+*          or the number of subdiagonals if UPLO = 'L'.  KD >= 0.
+*
+*  AB      (input/output) COMPLEX*16 array, dimension (LDAB,N)
+*          On entry, the upper or lower triangle of the Hermitian band
+*          matrix A, stored in the first kd+1 rows of the array.  The
+*          j-th column of A is stored in the j-th column of the array AB
+*          as follows:
+*          if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j;
+*          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+kd).
+*
+*          On exit, if INFO = 0, the factor S from the split Cholesky
+*          factorization A = S**H*S. See Further Details.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KD+1.
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*          > 0: if INFO = i, the factorization could not be completed,
+*               because the updated element a(i,i) was negative; the
+*               matrix A is not positive definite.
+*
+*  Further Details
+*  ===============
+*
+*  The band storage scheme is illustrated by the following example, when
+*  N = 7, KD = 2:
+*
+*  S = ( s11  s12  s13                     )
+*      (      s22  s23  s24                )
+*      (           s33  s34                )
+*      (                s44                )
+*      (           s53  s54  s55           )
+*      (                s64  s65  s66      )
+*      (                     s75  s76  s77 )
+*
+*  If UPLO = 'U', the array AB holds:
+*
+*  on entry:                          on exit:
+*
+*   *    *   a13  a24  a35  a46  a57   *    *   s13  s24  s53' s64' s75'
+*   *   a12  a23  a34  a45  a56  a67   *   s12  s23  s34  s54' s65' s76'
+*  a11  a22  a33  a44  a55  a66  a77  s11  s22  s33  s44  s55  s66  s77
+*
+*  If UPLO = 'L', the array AB holds:
+*
+*  on entry:                          on exit:
+*
+*  a11  a22  a33  a44  a55  a66  a77  s11  s22  s33  s44  s55  s66  s77
+*  a21  a32  a43  a54  a65  a76   *   s12' s23' s34' s54  s65  s76   *
+*  a31  a42  a53  a64  a64   *    *   s13' s24' s53  s64  s75   *    *
+*
+*  Array elements marked * are not used by the routine; s12' denotes
+*  conjg(s12); the diagonal elements of S are real.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            J, KLD, KM, M
+      DOUBLE PRECISION   AJJ
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZDSCAL, ZHER, ZLACGV
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          DBLE, MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( KD.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDAB.LT.KD+1 ) THEN
+         INFO = -5
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZPBSTF', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      KLD = MAX( 1, LDAB-1 )
+*
+*     Set the splitting point m.
+*
+      M = ( N+KD ) / 2
+*
+      IF( UPPER ) THEN
+*
+*        Factorize A(m+1:n,m+1:n) as L**H*L, and update A(1:m,1:m).
+*
+         DO 10 J = N, M + 1, -1
+*
+*           Compute s(j,j) and test for non-positive-definiteness.
+*
+            AJJ = DBLE( AB( KD+1, J ) )
+            IF( AJJ.LE.ZERO ) THEN
+               AB( KD+1, J ) = AJJ
+               GO TO 50
+            END IF
+            AJJ = SQRT( AJJ )
+            AB( KD+1, J ) = AJJ
+            KM = MIN( J-1, KD )
+*
+*           Compute elements j-km:j-1 of the j-th column and update the
+*           the leading submatrix within the band.
+*
+            CALL ZDSCAL( KM, ONE / AJJ, AB( KD+1-KM, J ), 1 )
+            CALL ZHER( 'Upper', KM, -ONE, AB( KD+1-KM, J ), 1,
+     $                 AB( KD+1, J-KM ), KLD )
+   10    CONTINUE
+*
+*        Factorize the updated submatrix A(1:m,1:m) as U**H*U.
+*
+         DO 20 J = 1, M
+*
+*           Compute s(j,j) and test for non-positive-definiteness.
+*
+            AJJ = DBLE( AB( KD+1, J ) )
+            IF( AJJ.LE.ZERO ) THEN
+               AB( KD+1, J ) = AJJ
+               GO TO 50
+            END IF
+            AJJ = SQRT( AJJ )
+            AB( KD+1, J ) = AJJ
+            KM = MIN( KD, M-J )
+*
+*           Compute elements j+1:j+km of the j-th row and update the
+*           trailing submatrix within the band.
+*
+            IF( KM.GT.0 ) THEN
+               CALL ZDSCAL( KM, ONE / AJJ, AB( KD, J+1 ), KLD )
+               CALL ZLACGV( KM, AB( KD, J+1 ), KLD )
+               CALL ZHER( 'Upper', KM, -ONE, AB( KD, J+1 ), KLD,
+     $                    AB( KD+1, J+1 ), KLD )
+               CALL ZLACGV( KM, AB( KD, J+1 ), KLD )
+            END IF
+   20    CONTINUE
+      ELSE
+*
+*        Factorize A(m+1:n,m+1:n) as L**H*L, and update A(1:m,1:m).
+*
+         DO 30 J = N, M + 1, -1
+*
+*           Compute s(j,j) and test for non-positive-definiteness.
+*
+            AJJ = DBLE( AB( 1, J ) )
+            IF( AJJ.LE.ZERO ) THEN
+               AB( 1, J ) = AJJ
+               GO TO 50
+            END IF
+            AJJ = SQRT( AJJ )
+            AB( 1, J ) = AJJ
+            KM = MIN( J-1, KD )
+*
+*           Compute elements j-km:j-1 of the j-th row and update the
+*           trailing submatrix within the band.
+*
+            CALL ZDSCAL( KM, ONE / AJJ, AB( KM+1, J-KM ), KLD )
+            CALL ZLACGV( KM, AB( KM+1, J-KM ), KLD )
+            CALL ZHER( 'Lower', KM, -ONE, AB( KM+1, J-KM ), KLD,
+     $                 AB( 1, J-KM ), KLD )
+            CALL ZLACGV( KM, AB( KM+1, J-KM ), KLD )
+   30    CONTINUE
+*
+*        Factorize the updated submatrix A(1:m,1:m) as U**H*U.
+*
+         DO 40 J = 1, M
+*
+*           Compute s(j,j) and test for non-positive-definiteness.
+*
+            AJJ = DBLE( AB( 1, J ) )
+            IF( AJJ.LE.ZERO ) THEN
+               AB( 1, J ) = AJJ
+               GO TO 50
+            END IF
+            AJJ = SQRT( AJJ )
+            AB( 1, J ) = AJJ
+            KM = MIN( KD, M-J )
+*
+*           Compute elements j+1:j+km of the j-th column and update the
+*           trailing submatrix within the band.
+*
+            IF( KM.GT.0 ) THEN
+               CALL ZDSCAL( KM, ONE / AJJ, AB( 2, J ), 1 )
+               CALL ZHER( 'Lower', KM, -ONE, AB( 2, J ), 1,
+     $                    AB( 1, J+1 ), KLD )
+            END IF
+   40    CONTINUE
+      END IF
+      RETURN
+*
+   50 CONTINUE
+      INFO = J
+      RETURN
+*
+*     End of ZPBSTF
+*
+      END
diff --git a/costa/native/external/lapack/zpbsv.f b/costa/native/external/lapack/zpbsv.f
new file mode 100644
index 000000000..b93d171a7
--- /dev/null
+++ b/costa/native/external/lapack/zpbsv.f
@@ -0,0 +1,152 @@
+      SUBROUTINE ZPBSV( UPLO, N, KD, NRHS, AB, LDAB, B, LDB, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, KD, LDAB, LDB, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         AB( LDAB, * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZPBSV computes the solution to a complex system of linear equations
+*     A * X = B,
+*  where A is an N-by-N Hermitian positive definite band matrix and X
+*  and B are N-by-NRHS matrices.
+*
+*  The Cholesky decomposition is used to factor A as
+*     A = U**H * U,  if UPLO = 'U', or
+*     A = L * L**H,  if UPLO = 'L',
+*  where U is an upper triangular band matrix, and L is a lower
+*  triangular band matrix, with the same number of superdiagonals or
+*  subdiagonals as A.  The factored form of A is then used to solve the
+*  system of equations A * X = B.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The number of linear equations, i.e., the order of the
+*          matrix A.  N >= 0.
+*
+*  KD      (input) INTEGER
+*          The number of superdiagonals of the matrix A if UPLO = 'U',
+*          or the number of subdiagonals if UPLO = 'L'.  KD >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  AB      (input/output) COMPLEX*16 array, dimension (LDAB,N)
+*          On entry, the upper or lower triangle of the Hermitian band
+*          matrix A, stored in the first KD+1 rows of the array.  The
+*          j-th column of A is stored in the j-th column of the array AB
+*          as follows:
+*          if UPLO = 'U', AB(KD+1+i-j,j) = A(i,j) for max(1,j-KD)<=i<=j;
+*          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(N,j+KD).
+*          See below for further details.
+*
+*          On exit, if INFO = 0, the triangular factor U or L from the
+*          Cholesky factorization A = U**H*U or A = L*L**H of the band
+*          matrix A, in the same storage format as A.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KD+1.
+*
+*  B       (input/output) COMPLEX*16 array, dimension (LDB,NRHS)
+*          On entry, the N-by-NRHS right hand side matrix B.
+*          On exit, if INFO = 0, the N-by-NRHS solution matrix X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, the leading minor of order i of A is not
+*                positive definite, so the factorization could not be
+*                completed, and the solution has not been computed.
+*
+*  Further Details
+*  ===============
+*
+*  The band storage scheme is illustrated by the following example, when
+*  N = 6, KD = 2, and UPLO = 'U':
+*
+*  On entry:                       On exit:
+*
+*      *    *   a13  a24  a35  a46      *    *   u13  u24  u35  u46
+*      *   a12  a23  a34  a45  a56      *   u12  u23  u34  u45  u56
+*     a11  a22  a33  a44  a55  a66     u11  u22  u33  u44  u55  u66
+*
+*  Similarly, if UPLO = 'L' the format of A is as follows:
+*
+*  On entry:                       On exit:
+*
+*     a11  a22  a33  a44  a55  a66     l11  l22  l33  l44  l55  l66
+*     a21  a32  a43  a54  a65   *      l21  l32  l43  l54  l65   *
+*     a31  a42  a53  a64   *    *      l31  l42  l53  l64   *    *
+*
+*  Array elements marked * are not used by the routine.
+*
+*  =====================================================================
+*
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZPBTRF, ZPBTRS
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( KD.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDAB.LT.KD+1 ) THEN
+         INFO = -6
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -8
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZPBSV ', -INFO )
+         RETURN
+      END IF
+*
+*     Compute the Cholesky factorization A = U'*U or A = L*L'.
+*
+      CALL ZPBTRF( UPLO, N, KD, AB, LDAB, INFO )
+      IF( INFO.EQ.0 ) THEN
+*
+*        Solve the system A*X = B, overwriting B with X.
+*
+         CALL ZPBTRS( UPLO, N, KD, NRHS, AB, LDAB, B, LDB, INFO )
+*
+      END IF
+      RETURN
+*
+*     End of ZPBSV
+*
+      END
diff --git a/costa/native/external/lapack/zpbsvx.f b/costa/native/external/lapack/zpbsvx.f
new file mode 100644
index 000000000..4d5be185a
--- /dev/null
+++ b/costa/native/external/lapack/zpbsvx.f
@@ -0,0 +1,423 @@
+      SUBROUTINE ZPBSVX( FACT, UPLO, N, KD, NRHS, AB, LDAB, AFB, LDAFB,
+     $                   EQUED, S, B, LDB, X, LDX, RCOND, FERR, BERR,
+     $                   WORK, RWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          EQUED, FACT, UPLO
+      INTEGER            INFO, KD, LDAB, LDAFB, LDB, LDX, N, NRHS
+      DOUBLE PRECISION   RCOND
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   BERR( * ), FERR( * ), RWORK( * ), S( * )
+      COMPLEX*16         AB( LDAB, * ), AFB( LDAFB, * ), B( LDB, * ),
+     $                   WORK( * ), X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZPBSVX uses the Cholesky factorization A = U**H*U or A = L*L**H to
+*  compute the solution to a complex system of linear equations
+*     A * X = B,
+*  where A is an N-by-N Hermitian positive definite band matrix and X
+*  and B are N-by-NRHS matrices.
+*
+*  Error bounds on the solution and a condition estimate are also
+*  provided.
+*
+*  Description
+*  ===========
+*
+*  The following steps are performed:
+*
+*  1. If FACT = 'E', real scaling factors are computed to equilibrate
+*     the system:
+*        diag(S) * A * diag(S) * inv(diag(S)) * X = diag(S) * B
+*     Whether or not the system will be equilibrated depends on the
+*     scaling of the matrix A, but if equilibration is used, A is
+*     overwritten by diag(S)*A*diag(S) and B by diag(S)*B.
+*
+*  2. If FACT = 'N' or 'E', the Cholesky decomposition is used to
+*     factor the matrix A (after equilibration if FACT = 'E') as
+*        A = U**H * U,  if UPLO = 'U', or
+*        A = L * L**H,  if UPLO = 'L',
+*     where U is an upper triangular band matrix, and L is a lower
+*     triangular band matrix.
+*
+*  3. If the leading i-by-i principal minor is not positive definite,
+*     then the routine returns with INFO = i. Otherwise, the factored
+*     form of A is used to estimate the condition number of the matrix
+*     A.  If the reciprocal of the condition number is less than machine
+*     precision, INFO = N+1 is returned as a warning, but the routine
+*     still goes on to solve for X and compute error bounds as
+*     described below.
+*
+*  4. The system of equations is solved for X using the factored form
+*     of A.
+*
+*  5. Iterative refinement is applied to improve the computed solution
+*     matrix and calculate error bounds and backward error estimates
+*     for it.
+*
+*  6. If equilibration was used, the matrix X is premultiplied by
+*     diag(S) so that it solves the original system before
+*     equilibration.
+*
+*  Arguments
+*  =========
+*
+*  FACT    (input) CHARACTER*1
+*          Specifies whether or not the factored form of the matrix A is
+*          supplied on entry, and if not, whether the matrix A should be
+*          equilibrated before it is factored.
+*          = 'F':  On entry, AFB contains the factored form of A.
+*                  If EQUED = 'Y', the matrix A has been equilibrated
+*                  with scaling factors given by S.  AB and AFB will not
+*                  be modified.
+*          = 'N':  The matrix A will be copied to AFB and factored.
+*          = 'E':  The matrix A will be equilibrated if necessary, then
+*                  copied to AFB and factored.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The number of linear equations, i.e., the order of the
+*          matrix A.  N >= 0.
+*
+*  KD      (input) INTEGER
+*          The number of superdiagonals of the matrix A if UPLO = 'U',
+*          or the number of subdiagonals if UPLO = 'L'.  KD >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right-hand sides, i.e., the number of columns
+*          of the matrices B and X.  NRHS >= 0.
+*
+*  AB      (input/output) COMPLEX*16 array, dimension (LDAB,N)
+*          On entry, the upper or lower triangle of the Hermitian band
+*          matrix A, stored in the first KD+1 rows of the array, except
+*          if FACT = 'F' and EQUED = 'Y', then A must contain the
+*          equilibrated matrix diag(S)*A*diag(S).  The j-th column of A
+*          is stored in the j-th column of the array AB as follows:
+*          if UPLO = 'U', AB(KD+1+i-j,j) = A(i,j) for max(1,j-KD)<=i<=j;
+*          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(N,j+KD).
+*          See below for further details.
+*
+*          On exit, if FACT = 'E' and EQUED = 'Y', A is overwritten by
+*          diag(S)*A*diag(S).
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array A.  LDAB >= KD+1.
+*
+*  AFB     (input or output) COMPLEX*16 array, dimension (LDAFB,N)
+*          If FACT = 'F', then AFB is an input argument and on entry
+*          contains the triangular factor U or L from the Cholesky
+*          factorization A = U**H*U or A = L*L**H of the band matrix
+*          A, in the same storage format as A (see AB).  If EQUED = 'Y',
+*          then AFB is the factored form of the equilibrated matrix A.
+*
+*          If FACT = 'N', then AFB is an output argument and on exit
+*          returns the triangular factor U or L from the Cholesky
+*          factorization A = U**H*U or A = L*L**H.
+*
+*          If FACT = 'E', then AFB is an output argument and on exit
+*          returns the triangular factor U or L from the Cholesky
+*          factorization A = U**H*U or A = L*L**H of the equilibrated
+*          matrix A (see the description of A for the form of the
+*          equilibrated matrix).
+*
+*  LDAFB   (input) INTEGER
+*          The leading dimension of the array AFB.  LDAFB >= KD+1.
+*
+*  EQUED   (input or output) CHARACTER*1
+*          Specifies the form of equilibration that was done.
+*          = 'N':  No equilibration (always true if FACT = 'N').
+*          = 'Y':  Equilibration was done, i.e., A has been replaced by
+*                  diag(S) * A * diag(S).
+*          EQUED is an input argument if FACT = 'F'; otherwise, it is an
+*          output argument.
+*
+*  S       (input or output) DOUBLE PRECISION array, dimension (N)
+*          The scale factors for A; not accessed if EQUED = 'N'.  S is
+*          an input argument if FACT = 'F'; otherwise, S is an output
+*          argument.  If FACT = 'F' and EQUED = 'Y', each element of S
+*          must be positive.
+*
+*  B       (input/output) COMPLEX*16 array, dimension (LDB,NRHS)
+*          On entry, the N-by-NRHS right hand side matrix B.
+*          On exit, if EQUED = 'N', B is not modified; if EQUED = 'Y',
+*          B is overwritten by diag(S) * B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  X       (output) COMPLEX*16 array, dimension (LDX,NRHS)
+*          If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X to
+*          the original system of equations.  Note that if EQUED = 'Y',
+*          A and B are modified on exit, and the solution to the
+*          equilibrated system is inv(diag(S))*X.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(1,N).
+*
+*  RCOND   (output) DOUBLE PRECISION
+*          The estimate of the reciprocal condition number of the matrix
+*          A after equilibration (if done).  If RCOND is less than the
+*          machine precision (in particular, if RCOND = 0), the matrix
+*          is singular to working precision.  This condition is
+*          indicated by a return code of INFO > 0.
+*
+*  FERR    (output) DOUBLE PRECISION array, dimension (NRHS)
+*          The estimated forward error bound for each solution vector
+*          X(j) (the j-th column of the solution matrix X).
+*          If XTRUE is the true solution corresponding to X(j), FERR(j)
+*          is an estimated upper bound for the magnitude of the largest
+*          element in (X(j) - XTRUE) divided by the magnitude of the
+*          largest element in X(j).  The estimate is as reliable as
+*          the estimate for RCOND, and is almost always a slight
+*          overestimate of the true error.
+*
+*  BERR    (output) DOUBLE PRECISION array, dimension (NRHS)
+*          The componentwise relative backward error of each solution
+*          vector X(j) (i.e., the smallest relative change in
+*          any element of A or B that makes X(j) an exact solution).
+*
+*  WORK    (workspace) COMPLEX*16 array, dimension (2*N)
+*
+*  RWORK   (workspace) DOUBLE PRECISION array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*          > 0: if INFO = i, and i is
+*                <= N:  the leading minor of order i of A is
+*                       not positive definite, so the factorization
+*                       could not be completed, and the solution has not
+*                       been computed. RCOND = 0 is returned.
+*                = N+1: U is nonsingular, but RCOND is less than machine
+*                       precision, meaning that the matrix is singular
+*                       to working precision.  Nevertheless, the
+*                       solution and error bounds are computed because
+*                       there are a number of situations where the
+*                       computed solution can be more accurate than the
+*                       value of RCOND would suggest.
+*
+*  Further Details
+*  ===============
+*
+*  The band storage scheme is illustrated by the following example, when
+*  N = 6, KD = 2, and UPLO = 'U':
+*
+*  Two-dimensional storage of the Hermitian matrix A:
+*
+*     a11  a12  a13
+*          a22  a23  a24
+*               a33  a34  a35
+*                    a44  a45  a46
+*                         a55  a56
+*     (aij=conjg(aji))         a66
+*
+*  Band storage of the upper triangle of A:
+*
+*      *    *   a13  a24  a35  a46
+*      *   a12  a23  a34  a45  a56
+*     a11  a22  a33  a44  a55  a66
+*
+*  Similarly, if UPLO = 'L' the format of A is as follows:
+*
+*     a11  a22  a33  a44  a55  a66
+*     a21  a32  a43  a54  a65   *
+*     a31  a42  a53  a64   *    *
+*
+*  Array elements marked * are not used by the routine.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            EQUIL, NOFACT, RCEQU, UPPER
+      INTEGER            I, INFEQU, J, J1, J2
+      DOUBLE PRECISION   AMAX, ANORM, BIGNUM, SCOND, SMAX, SMIN, SMLNUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DLAMCH, ZLANHB
+      EXTERNAL           LSAME, DLAMCH, ZLANHB
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZCOPY, ZLACPY, ZLAQHB, ZPBCON, ZPBEQU,
+     $                   ZPBRFS, ZPBTRF, ZPBTRS
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+      NOFACT = LSAME( FACT, 'N' )
+      EQUIL = LSAME( FACT, 'E' )
+      UPPER = LSAME( UPLO, 'U' )
+      IF( NOFACT .OR. EQUIL ) THEN
+         EQUED = 'N'
+         RCEQU = .FALSE.
+      ELSE
+         RCEQU = LSAME( EQUED, 'Y' )
+         SMLNUM = DLAMCH( 'Safe minimum' )
+         BIGNUM = ONE / SMLNUM
+      END IF
+*
+*     Test the input parameters.
+*
+      IF( .NOT.NOFACT .AND. .NOT.EQUIL .AND. .NOT.LSAME( FACT, 'F' ) )
+     $     THEN
+         INFO = -1
+      ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( KD.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( LDAB.LT.KD+1 ) THEN
+         INFO = -7
+      ELSE IF( LDAFB.LT.KD+1 ) THEN
+         INFO = -9
+      ELSE IF( LSAME( FACT, 'F' ) .AND. .NOT.
+     $         ( RCEQU .OR. LSAME( EQUED, 'N' ) ) ) THEN
+         INFO = -10
+      ELSE
+         IF( RCEQU ) THEN
+            SMIN = BIGNUM
+            SMAX = ZERO
+            DO 10 J = 1, N
+               SMIN = MIN( SMIN, S( J ) )
+               SMAX = MAX( SMAX, S( J ) )
+   10       CONTINUE
+            IF( SMIN.LE.ZERO ) THEN
+               INFO = -11
+            ELSE IF( N.GT.0 ) THEN
+               SCOND = MAX( SMIN, SMLNUM ) / MIN( SMAX, BIGNUM )
+            ELSE
+               SCOND = ONE
+            END IF
+         END IF
+         IF( INFO.EQ.0 ) THEN
+            IF( LDB.LT.MAX( 1, N ) ) THEN
+               INFO = -13
+            ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+               INFO = -15
+            END IF
+         END IF
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZPBSVX', -INFO )
+         RETURN
+      END IF
+*
+      IF( EQUIL ) THEN
+*
+*        Compute row and column scalings to equilibrate the matrix A.
+*
+         CALL ZPBEQU( UPLO, N, KD, AB, LDAB, S, SCOND, AMAX, INFEQU )
+         IF( INFEQU.EQ.0 ) THEN
+*
+*           Equilibrate the matrix.
+*
+            CALL ZLAQHB( UPLO, N, KD, AB, LDAB, S, SCOND, AMAX, EQUED )
+            RCEQU = LSAME( EQUED, 'Y' )
+         END IF
+      END IF
+*
+*     Scale the right-hand side.
+*
+      IF( RCEQU ) THEN
+         DO 30 J = 1, NRHS
+            DO 20 I = 1, N
+               B( I, J ) = S( I )*B( I, J )
+   20       CONTINUE
+   30    CONTINUE
+      END IF
+*
+      IF( NOFACT .OR. EQUIL ) THEN
+*
+*        Compute the Cholesky factorization A = U'*U or A = L*L'.
+*
+         IF( UPPER ) THEN
+            DO 40 J = 1, N
+               J1 = MAX( J-KD, 1 )
+               CALL ZCOPY( J-J1+1, AB( KD+1-J+J1, J ), 1,
+     $                     AFB( KD+1-J+J1, J ), 1 )
+   40       CONTINUE
+         ELSE
+            DO 50 J = 1, N
+               J2 = MIN( J+KD, N )
+               CALL ZCOPY( J2-J+1, AB( 1, J ), 1, AFB( 1, J ), 1 )
+   50       CONTINUE
+         END IF
+*
+         CALL ZPBTRF( UPLO, N, KD, AFB, LDAFB, INFO )
+*
+*        Return if INFO is non-zero.
+*
+         IF( INFO.NE.0 ) THEN
+            IF( INFO.GT.0 )
+     $         RCOND = ZERO
+            RETURN
+         END IF
+      END IF
+*
+*     Compute the norm of the matrix A.
+*
+      ANORM = ZLANHB( '1', UPLO, N, KD, AB, LDAB, RWORK )
+*
+*     Compute the reciprocal of the condition number of A.
+*
+      CALL ZPBCON( UPLO, N, KD, AFB, LDAFB, ANORM, RCOND, WORK, RWORK,
+     $             INFO )
+*
+*     Set INFO = N+1 if the matrix is singular to working precision.
+*
+      IF( RCOND.LT.DLAMCH( 'Epsilon' ) )
+     $   INFO = N + 1
+*
+*     Compute the solution matrix X.
+*
+      CALL ZLACPY( 'Full', N, NRHS, B, LDB, X, LDX )
+      CALL ZPBTRS( UPLO, N, KD, NRHS, AFB, LDAFB, X, LDX, INFO )
+*
+*     Use iterative refinement to improve the computed solution and
+*     compute error bounds and backward error estimates for it.
+*
+      CALL ZPBRFS( UPLO, N, KD, NRHS, AB, LDAB, AFB, LDAFB, B, LDB, X,
+     $             LDX, FERR, BERR, WORK, RWORK, INFO )
+*
+*     Transform the solution matrix X to a solution of the original
+*     system.
+*
+      IF( RCEQU ) THEN
+         DO 70 J = 1, NRHS
+            DO 60 I = 1, N
+               X( I, J ) = S( I )*X( I, J )
+   60       CONTINUE
+   70    CONTINUE
+         DO 80 J = 1, NRHS
+            FERR( J ) = FERR( J ) / SCOND
+   80    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of ZPBSVX
+*
+      END
diff --git a/costa/native/external/lapack/zpbtf2.f b/costa/native/external/lapack/zpbtf2.f
new file mode 100644
index 000000000..2b0d5a7a4
--- /dev/null
+++ b/costa/native/external/lapack/zpbtf2.f
@@ -0,0 +1,201 @@
+      SUBROUTINE ZPBTF2( UPLO, N, KD, AB, LDAB, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 29, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, KD, LDAB, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         AB( LDAB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZPBTF2 computes the Cholesky factorization of a complex Hermitian
+*  positive definite band matrix A.
+*
+*  The factorization has the form
+*     A = U' * U ,  if UPLO = 'U', or
+*     A = L  * L',  if UPLO = 'L',
+*  where U is an upper triangular matrix, U' is the conjugate transpose
+*  of U, and L is lower triangular.
+*
+*  This is the unblocked version of the algorithm, calling Level 2 BLAS.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the upper or lower triangular part of the
+*          Hermitian matrix A is stored:
+*          = 'U':  Upper triangular
+*          = 'L':  Lower triangular
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  KD      (input) INTEGER
+*          The number of super-diagonals of the matrix A if UPLO = 'U',
+*          or the number of sub-diagonals if UPLO = 'L'.  KD >= 0.
+*
+*  AB      (input/output) COMPLEX*16 array, dimension (LDAB,N)
+*          On entry, the upper or lower triangle of the Hermitian band
+*          matrix A, stored in the first KD+1 rows of the array.  The
+*          j-th column of A is stored in the j-th column of the array AB
+*          as follows:
+*          if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j;
+*          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+kd).
+*
+*          On exit, if INFO = 0, the triangular factor U or L from the
+*          Cholesky factorization A = U'*U or A = L*L' of the band
+*          matrix A, in the same storage format as A.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KD+1.
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -k, the k-th argument had an illegal value
+*          > 0: if INFO = k, the leading minor of order k is not
+*               positive definite, and the factorization could not be
+*               completed.
+*
+*  Further Details
+*  ===============
+*
+*  The band storage scheme is illustrated by the following example, when
+*  N = 6, KD = 2, and UPLO = 'U':
+*
+*  On entry:                       On exit:
+*
+*      *    *   a13  a24  a35  a46      *    *   u13  u24  u35  u46
+*      *   a12  a23  a34  a45  a56      *   u12  u23  u34  u45  u56
+*     a11  a22  a33  a44  a55  a66     u11  u22  u33  u44  u55  u66
+*
+*  Similarly, if UPLO = 'L' the format of A is as follows:
+*
+*  On entry:                       On exit:
+*
+*     a11  a22  a33  a44  a55  a66     l11  l22  l33  l44  l55  l66
+*     a21  a32  a43  a54  a65   *      l21  l32  l43  l54  l65   *
+*     a31  a42  a53  a64   *    *      l31  l42  l53  l64   *    *
+*
+*  Array elements marked * are not used by the routine.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            J, KLD, KN
+      DOUBLE PRECISION   AJJ
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZDSCAL, ZHER, ZLACGV
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          DBLE, MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( KD.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDAB.LT.KD+1 ) THEN
+         INFO = -5
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZPBTF2', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      KLD = MAX( 1, LDAB-1 )
+*
+      IF( UPPER ) THEN
+*
+*        Compute the Cholesky factorization A = U'*U.
+*
+         DO 10 J = 1, N
+*
+*           Compute U(J,J) and test for non-positive-definiteness.
+*
+            AJJ = DBLE( AB( KD+1, J ) )
+            IF( AJJ.LE.ZERO ) THEN
+               AB( KD+1, J ) = AJJ
+               GO TO 30
+            END IF
+            AJJ = SQRT( AJJ )
+            AB( KD+1, J ) = AJJ
+*
+*           Compute elements J+1:J+KN of row J and update the
+*           trailing submatrix within the band.
+*
+            KN = MIN( KD, N-J )
+            IF( KN.GT.0 ) THEN
+               CALL ZDSCAL( KN, ONE / AJJ, AB( KD, J+1 ), KLD )
+               CALL ZLACGV( KN, AB( KD, J+1 ), KLD )
+               CALL ZHER( 'Upper', KN, -ONE, AB( KD, J+1 ), KLD,
+     $                    AB( KD+1, J+1 ), KLD )
+               CALL ZLACGV( KN, AB( KD, J+1 ), KLD )
+            END IF
+   10    CONTINUE
+      ELSE
+*
+*        Compute the Cholesky factorization A = L*L'.
+*
+         DO 20 J = 1, N
+*
+*           Compute L(J,J) and test for non-positive-definiteness.
+*
+            AJJ = DBLE( AB( 1, J ) )
+            IF( AJJ.LE.ZERO ) THEN
+               AB( 1, J ) = AJJ
+               GO TO 30
+            END IF
+            AJJ = SQRT( AJJ )
+            AB( 1, J ) = AJJ
+*
+*           Compute elements J+1:J+KN of column J and update the
+*           trailing submatrix within the band.
+*
+            KN = MIN( KD, N-J )
+            IF( KN.GT.0 ) THEN
+               CALL ZDSCAL( KN, ONE / AJJ, AB( 2, J ), 1 )
+               CALL ZHER( 'Lower', KN, -ONE, AB( 2, J ), 1,
+     $                    AB( 1, J+1 ), KLD )
+            END IF
+   20    CONTINUE
+      END IF
+      RETURN
+*
+   30 CONTINUE
+      INFO = J
+      RETURN
+*
+*     End of ZPBTF2
+*
+      END
diff --git a/costa/native/external/lapack/zpbtrf.f b/costa/native/external/lapack/zpbtrf.f
new file mode 100644
index 000000000..1f7f66e97
--- /dev/null
+++ b/costa/native/external/lapack/zpbtrf.f
@@ -0,0 +1,372 @@
+      SUBROUTINE ZPBTRF( UPLO, N, KD, AB, LDAB, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, KD, LDAB, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         AB( LDAB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZPBTRF computes the Cholesky factorization of a complex Hermitian
+*  positive definite band matrix A.
+*
+*  The factorization has the form
+*     A = U**H * U,  if UPLO = 'U', or
+*     A = L  * L**H,  if UPLO = 'L',
+*  where U is an upper triangular matrix and L is lower triangular.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  KD      (input) INTEGER
+*          The number of superdiagonals of the matrix A if UPLO = 'U',
+*          or the number of subdiagonals if UPLO = 'L'.  KD >= 0.
+*
+*  AB      (input/output) COMPLEX*16 array, dimension (LDAB,N)
+*          On entry, the upper or lower triangle of the Hermitian band
+*          matrix A, stored in the first KD+1 rows of the array.  The
+*          j-th column of A is stored in the j-th column of the array AB
+*          as follows:
+*          if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j;
+*          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+kd).
+*
+*          On exit, if INFO = 0, the triangular factor U or L from the
+*          Cholesky factorization A = U**H*U or A = L*L**H of the band
+*          matrix A, in the same storage format as A.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KD+1.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, the leading minor of order i is not
+*                positive definite, and the factorization could not be
+*                completed.
+*
+*  Further Details
+*  ===============
+*
+*  The band storage scheme is illustrated by the following example, when
+*  N = 6, KD = 2, and UPLO = 'U':
+*
+*  On entry:                       On exit:
+*
+*      *    *   a13  a24  a35  a46      *    *   u13  u24  u35  u46
+*      *   a12  a23  a34  a45  a56      *   u12  u23  u34  u45  u56
+*     a11  a22  a33  a44  a55  a66     u11  u22  u33  u44  u55  u66
+*
+*  Similarly, if UPLO = 'L' the format of A is as follows:
+*
+*  On entry:                       On exit:
+*
+*     a11  a22  a33  a44  a55  a66     l11  l22  l33  l44  l55  l66
+*     a21  a32  a43  a54  a65   *      l21  l32  l43  l54  l65   *
+*     a31  a42  a53  a64   *    *      l31  l42  l53  l64   *    *
+*
+*  Array elements marked * are not used by the routine.
+*
+*  Contributed by
+*  Peter Mayes and Giuseppe Radicati, IBM ECSEC, Rome, March 23, 1989
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+      COMPLEX*16         CONE
+      PARAMETER          ( CONE = ( 1.0D+0, 0.0D+0 ) )
+      INTEGER            NBMAX, LDWORK
+      PARAMETER          ( NBMAX = 32, LDWORK = NBMAX+1 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, I2, I3, IB, II, J, JJ, NB
+*     ..
+*     .. Local Arrays ..
+      COMPLEX*16         WORK( LDWORK, NBMAX )
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           LSAME, ILAENV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZGEMM, ZHERK, ZPBTF2, ZPOTF2, ZTRSM
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( ( .NOT.LSAME( UPLO, 'U' ) ) .AND.
+     $    ( .NOT.LSAME( UPLO, 'L' ) ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( KD.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDAB.LT.KD+1 ) THEN
+         INFO = -5
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZPBTRF', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Determine the block size for this environment
+*
+      NB = ILAENV( 1, 'ZPBTRF', UPLO, N, KD, -1, -1 )
+*
+*     The block size must not exceed the semi-bandwidth KD, and must not
+*     exceed the limit set by the size of the local array WORK.
+*
+      NB = MIN( NB, NBMAX )
+*
+      IF( NB.LE.1 .OR. NB.GT.KD ) THEN
+*
+*        Use unblocked code
+*
+         CALL ZPBTF2( UPLO, N, KD, AB, LDAB, INFO )
+      ELSE
+*
+*        Use blocked code
+*
+         IF( LSAME( UPLO, 'U' ) ) THEN
+*
+*           Compute the Cholesky factorization of a Hermitian band
+*           matrix, given the upper triangle of the matrix in band
+*           storage.
+*
+*           Zero the upper triangle of the work array.
+*
+            DO 20 J = 1, NB
+               DO 10 I = 1, J - 1
+                  WORK( I, J ) = ZERO
+   10          CONTINUE
+   20       CONTINUE
+*
+*           Process the band matrix one diagonal block at a time.
+*
+            DO 70 I = 1, N, NB
+               IB = MIN( NB, N-I+1 )
+*
+*              Factorize the diagonal block
+*
+               CALL ZPOTF2( UPLO, IB, AB( KD+1, I ), LDAB-1, II )
+               IF( II.NE.0 ) THEN
+                  INFO = I + II - 1
+                  GO TO 150
+               END IF
+               IF( I+IB.LE.N ) THEN
+*
+*                 Update the relevant part of the trailing submatrix.
+*                 If A11 denotes the diagonal block which has just been
+*                 factorized, then we need to update the remaining
+*                 blocks in the diagram:
+*
+*                    A11   A12   A13
+*                          A22   A23
+*                                A33
+*
+*                 The numbers of rows and columns in the partitioning
+*                 are IB, I2, I3 respectively. The blocks A12, A22 and
+*                 A23 are empty if IB = KD. The upper triangle of A13
+*                 lies outside the band.
+*
+                  I2 = MIN( KD-IB, N-I-IB+1 )
+                  I3 = MIN( IB, N-I-KD+1 )
+*
+                  IF( I2.GT.0 ) THEN
+*
+*                    Update A12
+*
+                     CALL ZTRSM( 'Left', 'Upper', 'Conjugate transpose',
+     $                           'Non-unit', IB, I2, CONE,
+     $                           AB( KD+1, I ), LDAB-1,
+     $                           AB( KD+1-IB, I+IB ), LDAB-1 )
+*
+*                    Update A22
+*
+                     CALL ZHERK( 'Upper', 'Conjugate transpose', I2, IB,
+     $                           -ONE, AB( KD+1-IB, I+IB ), LDAB-1, ONE,
+     $                           AB( KD+1, I+IB ), LDAB-1 )
+                  END IF
+*
+                  IF( I3.GT.0 ) THEN
+*
+*                    Copy the lower triangle of A13 into the work array.
+*
+                     DO 40 JJ = 1, I3
+                        DO 30 II = JJ, IB
+                           WORK( II, JJ ) = AB( II-JJ+1, JJ+I+KD-1 )
+   30                   CONTINUE
+   40                CONTINUE
+*
+*                    Update A13 (in the work array).
+*
+                     CALL ZTRSM( 'Left', 'Upper', 'Conjugate transpose',
+     $                           'Non-unit', IB, I3, CONE,
+     $                           AB( KD+1, I ), LDAB-1, WORK, LDWORK )
+*
+*                    Update A23
+*
+                     IF( I2.GT.0 )
+     $                  CALL ZGEMM( 'Conjugate transpose',
+     $                              'No transpose', I2, I3, IB, -CONE,
+     $                              AB( KD+1-IB, I+IB ), LDAB-1, WORK,
+     $                              LDWORK, CONE, AB( 1+IB, I+KD ),
+     $                              LDAB-1 )
+*
+*                    Update A33
+*
+                     CALL ZHERK( 'Upper', 'Conjugate transpose', I3, IB,
+     $                           -ONE, WORK, LDWORK, ONE,
+     $                           AB( KD+1, I+KD ), LDAB-1 )
+*
+*                    Copy the lower triangle of A13 back into place.
+*
+                     DO 60 JJ = 1, I3
+                        DO 50 II = JJ, IB
+                           AB( II-JJ+1, JJ+I+KD-1 ) = WORK( II, JJ )
+   50                   CONTINUE
+   60                CONTINUE
+                  END IF
+               END IF
+   70       CONTINUE
+         ELSE
+*
+*           Compute the Cholesky factorization of a Hermitian band
+*           matrix, given the lower triangle of the matrix in band
+*           storage.
+*
+*           Zero the lower triangle of the work array.
+*
+            DO 90 J = 1, NB
+               DO 80 I = J + 1, NB
+                  WORK( I, J ) = ZERO
+   80          CONTINUE
+   90       CONTINUE
+*
+*           Process the band matrix one diagonal block at a time.
+*
+            DO 140 I = 1, N, NB
+               IB = MIN( NB, N-I+1 )
+*
+*              Factorize the diagonal block
+*
+               CALL ZPOTF2( UPLO, IB, AB( 1, I ), LDAB-1, II )
+               IF( II.NE.0 ) THEN
+                  INFO = I + II - 1
+                  GO TO 150
+               END IF
+               IF( I+IB.LE.N ) THEN
+*
+*                 Update the relevant part of the trailing submatrix.
+*                 If A11 denotes the diagonal block which has just been
+*                 factorized, then we need to update the remaining
+*                 blocks in the diagram:
+*
+*                    A11
+*                    A21   A22
+*                    A31   A32   A33
+*
+*                 The numbers of rows and columns in the partitioning
+*                 are IB, I2, I3 respectively. The blocks A21, A22 and
+*                 A32 are empty if IB = KD. The lower triangle of A31
+*                 lies outside the band.
+*
+                  I2 = MIN( KD-IB, N-I-IB+1 )
+                  I3 = MIN( IB, N-I-KD+1 )
+*
+                  IF( I2.GT.0 ) THEN
+*
+*                    Update A21
+*
+                     CALL ZTRSM( 'Right', 'Lower',
+     $                           'Conjugate transpose', 'Non-unit', I2,
+     $                           IB, CONE, AB( 1, I ), LDAB-1,
+     $                           AB( 1+IB, I ), LDAB-1 )
+*
+*                    Update A22
+*
+                     CALL ZHERK( 'Lower', 'No transpose', I2, IB, -ONE,
+     $                           AB( 1+IB, I ), LDAB-1, ONE,
+     $                           AB( 1, I+IB ), LDAB-1 )
+                  END IF
+*
+                  IF( I3.GT.0 ) THEN
+*
+*                    Copy the upper triangle of A31 into the work array.
+*
+                     DO 110 JJ = 1, IB
+                        DO 100 II = 1, MIN( JJ, I3 )
+                           WORK( II, JJ ) = AB( KD+1-JJ+II, JJ+I-1 )
+  100                   CONTINUE
+  110                CONTINUE
+*
+*                    Update A31 (in the work array).
+*
+                     CALL ZTRSM( 'Right', 'Lower',
+     $                           'Conjugate transpose', 'Non-unit', I3,
+     $                           IB, CONE, AB( 1, I ), LDAB-1, WORK,
+     $                           LDWORK )
+*
+*                    Update A32
+*
+                     IF( I2.GT.0 )
+     $                  CALL ZGEMM( 'No transpose',
+     $                              'Conjugate transpose', I3, I2, IB,
+     $                              -CONE, WORK, LDWORK, AB( 1+IB, I ),
+     $                              LDAB-1, CONE, AB( 1+KD-IB, I+IB ),
+     $                              LDAB-1 )
+*
+*                    Update A33
+*
+                     CALL ZHERK( 'Lower', 'No transpose', I3, IB, -ONE,
+     $                           WORK, LDWORK, ONE, AB( 1, I+KD ),
+     $                           LDAB-1 )
+*
+*                    Copy the upper triangle of A31 back into place.
+*
+                     DO 130 JJ = 1, IB
+                        DO 120 II = 1, MIN( JJ, I3 )
+                           AB( KD+1-JJ+II, JJ+I-1 ) = WORK( II, JJ )
+  120                   CONTINUE
+  130                CONTINUE
+                  END IF
+               END IF
+  140       CONTINUE
+         END IF
+      END IF
+      RETURN
+*
+  150 CONTINUE
+      RETURN
+*
+*     End of ZPBTRF
+*
+      END
diff --git a/costa/native/external/lapack/zpbtrs.f b/costa/native/external/lapack/zpbtrs.f
new file mode 100644
index 000000000..ff8124db3
--- /dev/null
+++ b/costa/native/external/lapack/zpbtrs.f
@@ -0,0 +1,146 @@
+      SUBROUTINE ZPBTRS( UPLO, N, KD, NRHS, AB, LDAB, B, LDB, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, KD, LDAB, LDB, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         AB( LDAB, * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZPBTRS solves a system of linear equations A*X = B with a Hermitian
+*  positive definite band matrix A using the Cholesky factorization
+*  A = U**H*U or A = L*L**H computed by ZPBTRF.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangular factor stored in AB;
+*          = 'L':  Lower triangular factor stored in AB.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  KD      (input) INTEGER
+*          The number of superdiagonals of the matrix A if UPLO = 'U',
+*          or the number of subdiagonals if UPLO = 'L'.  KD >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  AB      (input) COMPLEX*16 array, dimension (LDAB,N)
+*          The triangular factor U or L from the Cholesky factorization
+*          A = U**H*U or A = L*L**H of the band matrix A, stored in the
+*          first KD+1 rows of the array.  The j-th column of U or L is
+*          stored in the j-th column of the array AB as follows:
+*          if UPLO ='U', AB(kd+1+i-j,j) = U(i,j) for max(1,j-kd)<=i<=j;
+*          if UPLO ='L', AB(1+i-j,j)    = L(i,j) for j<=i<=min(n,j+kd).
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KD+1.
+*
+*  B       (input/output) COMPLEX*16 array, dimension (LDB,NRHS)
+*          On entry, the right hand side matrix B.
+*          On exit, the solution matrix X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            J
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZTBSV
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( KD.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDAB.LT.KD+1 ) THEN
+         INFO = -6
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -8
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZPBTRS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 )
+     $   RETURN
+*
+      IF( UPPER ) THEN
+*
+*        Solve A*X = B where A = U'*U.
+*
+         DO 10 J = 1, NRHS
+*
+*           Solve U'*X = B, overwriting B with X.
+*
+            CALL ZTBSV( 'Upper', 'Conjugate transpose', 'Non-unit', N,
+     $                  KD, AB, LDAB, B( 1, J ), 1 )
+*
+*           Solve U*X = B, overwriting B with X.
+*
+            CALL ZTBSV( 'Upper', 'No transpose', 'Non-unit', N, KD, AB,
+     $                  LDAB, B( 1, J ), 1 )
+   10    CONTINUE
+      ELSE
+*
+*        Solve A*X = B where A = L*L'.
+*
+         DO 20 J = 1, NRHS
+*
+*           Solve L*X = B, overwriting B with X.
+*
+            CALL ZTBSV( 'Lower', 'No transpose', 'Non-unit', N, KD, AB,
+     $                  LDAB, B( 1, J ), 1 )
+*
+*           Solve L'*X = B, overwriting B with X.
+*
+            CALL ZTBSV( 'Lower', 'Conjugate transpose', 'Non-unit', N,
+     $                  KD, AB, LDAB, B( 1, J ), 1 )
+   20    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of ZPBTRS
+*
+      END
diff --git a/costa/native/external/lapack/zpocon.f b/costa/native/external/lapack/zpocon.f
new file mode 100644
index 000000000..3ab984a83
--- /dev/null
+++ b/costa/native/external/lapack/zpocon.f
@@ -0,0 +1,180 @@
+      SUBROUTINE ZPOCON( UPLO, N, A, LDA, ANORM, RCOND, WORK, RWORK,
+     $                   INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, N
+      DOUBLE PRECISION   ANORM, RCOND
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   RWORK( * )
+      COMPLEX*16         A( LDA, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZPOCON estimates the reciprocal of the condition number (in the
+*  1-norm) of a complex Hermitian positive definite matrix using the
+*  Cholesky factorization A = U**H*U or A = L*L**H computed by ZPOTRF.
+*
+*  An estimate is obtained for norm(inv(A)), and the reciprocal of the
+*  condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))).
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input) COMPLEX*16 array, dimension (LDA,N)
+*          The triangular factor U or L from the Cholesky factorization
+*          A = U**H*U or A = L*L**H, as computed by ZPOTRF.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  ANORM   (input) DOUBLE PRECISION
+*          The 1-norm (or infinity-norm) of the Hermitian matrix A.
+*
+*  RCOND   (output) DOUBLE PRECISION
+*          The reciprocal of the condition number of the matrix A,
+*          computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an
+*          estimate of the 1-norm of inv(A) computed in this routine.
+*
+*  WORK    (workspace) COMPLEX*16 array, dimension (2*N)
+*
+*  RWORK   (workspace) DOUBLE PRECISION array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      CHARACTER          NORMIN
+      INTEGER            IX, KASE
+      DOUBLE PRECISION   AINVNM, SCALE, SCALEL, SCALEU, SMLNUM
+      COMPLEX*16         ZDUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            IZAMAX
+      DOUBLE PRECISION   DLAMCH
+      EXTERNAL           LSAME, IZAMAX, DLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZDRSCL, ZLACON, ZLATRS
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, DIMAG, MAX
+*     ..
+*     .. Statement Functions ..
+      DOUBLE PRECISION   CABS1
+*     ..
+*     .. Statement Function definitions ..
+      CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -4
+      ELSE IF( ANORM.LT.ZERO ) THEN
+         INFO = -5
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZPOCON', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      RCOND = ZERO
+      IF( N.EQ.0 ) THEN
+         RCOND = ONE
+         RETURN
+      ELSE IF( ANORM.EQ.ZERO ) THEN
+         RETURN
+      END IF
+*
+      SMLNUM = DLAMCH( 'Safe minimum' )
+*
+*     Estimate the 1-norm of inv(A).
+*
+      KASE = 0
+      NORMIN = 'N'
+   10 CONTINUE
+      CALL ZLACON( N, WORK( N+1 ), WORK, AINVNM, KASE )
+      IF( KASE.NE.0 ) THEN
+         IF( UPPER ) THEN
+*
+*           Multiply by inv(U').
+*
+            CALL ZLATRS( 'Upper', 'Conjugate transpose', 'Non-unit',
+     $                   NORMIN, N, A, LDA, WORK, SCALEL, RWORK, INFO )
+            NORMIN = 'Y'
+*
+*           Multiply by inv(U).
+*
+            CALL ZLATRS( 'Upper', 'No transpose', 'Non-unit', NORMIN, N,
+     $                   A, LDA, WORK, SCALEU, RWORK, INFO )
+         ELSE
+*
+*           Multiply by inv(L).
+*
+            CALL ZLATRS( 'Lower', 'No transpose', 'Non-unit', NORMIN, N,
+     $                   A, LDA, WORK, SCALEL, RWORK, INFO )
+            NORMIN = 'Y'
+*
+*           Multiply by inv(L').
+*
+            CALL ZLATRS( 'Lower', 'Conjugate transpose', 'Non-unit',
+     $                   NORMIN, N, A, LDA, WORK, SCALEU, RWORK, INFO )
+         END IF
+*
+*        Multiply by 1/SCALE if doing so will not cause overflow.
+*
+         SCALE = SCALEL*SCALEU
+         IF( SCALE.NE.ONE ) THEN
+            IX = IZAMAX( N, WORK, 1 )
+            IF( SCALE.LT.CABS1( WORK( IX ) )*SMLNUM .OR. SCALE.EQ.ZERO )
+     $         GO TO 20
+            CALL ZDRSCL( N, SCALE, WORK, 1 )
+         END IF
+         GO TO 10
+      END IF
+*
+*     Compute the estimate of the reciprocal condition number.
+*
+      IF( AINVNM.NE.ZERO )
+     $   RCOND = ( ONE / AINVNM ) / ANORM
+*
+   20 CONTINUE
+      RETURN
+*
+*     End of ZPOCON
+*
+      END
diff --git a/costa/native/external/lapack/zpoequ.f b/costa/native/external/lapack/zpoequ.f
new file mode 100644
index 000000000..65b1ca714
--- /dev/null
+++ b/costa/native/external/lapack/zpoequ.f
@@ -0,0 +1,138 @@
+      SUBROUTINE ZPOEQU( N, A, LDA, S, SCOND, AMAX, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, N
+      DOUBLE PRECISION   AMAX, SCOND
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   S( * )
+      COMPLEX*16         A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZPOEQU computes row and column scalings intended to equilibrate a
+*  Hermitian positive definite matrix A and reduce its condition number
+*  (with respect to the two-norm).  S contains the scale factors,
+*  S(i) = 1/sqrt(A(i,i)), chosen so that the scaled matrix B with
+*  elements B(i,j) = S(i)*A(i,j)*S(j) has ones on the diagonal.  This
+*  choice of S puts the condition number of B within a factor N of the
+*  smallest possible condition number over all possible diagonal
+*  scalings.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input) COMPLEX*16 array, dimension (LDA,N)
+*          The N-by-N Hermitian positive definite matrix whose scaling
+*          factors are to be computed.  Only the diagonal elements of A
+*          are referenced.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  S       (output) DOUBLE PRECISION array, dimension (N)
+*          If INFO = 0, S contains the scale factors for A.
+*
+*  SCOND   (output) DOUBLE PRECISION
+*          If INFO = 0, S contains the ratio of the smallest S(i) to
+*          the largest S(i).  If SCOND >= 0.1 and AMAX is neither too
+*          large nor too small, it is not worth scaling by S.
+*
+*  AMAX    (output) DOUBLE PRECISION
+*          Absolute value of largest matrix element.  If AMAX is very
+*          close to overflow or very close to underflow, the matrix
+*          should be scaled.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, the i-th diagonal element is nonpositive.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I
+      DOUBLE PRECISION   SMIN
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          DBLE, MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( N.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -3
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZPOEQU', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 ) THEN
+         SCOND = ONE
+         AMAX = ZERO
+         RETURN
+      END IF
+*
+*     Find the minimum and maximum diagonal elements.
+*
+      S( 1 ) = DBLE( A( 1, 1 ) )
+      SMIN = S( 1 )
+      AMAX = S( 1 )
+      DO 10 I = 2, N
+         S( I ) = DBLE( A( I, I ) )
+         SMIN = MIN( SMIN, S( I ) )
+         AMAX = MAX( AMAX, S( I ) )
+   10 CONTINUE
+*
+      IF( SMIN.LE.ZERO ) THEN
+*
+*        Find the first non-positive diagonal element and return.
+*
+         DO 20 I = 1, N
+            IF( S( I ).LE.ZERO ) THEN
+               INFO = I
+               RETURN
+            END IF
+   20    CONTINUE
+      ELSE
+*
+*        Set the scale factors to the reciprocals
+*        of the diagonal elements.
+*
+         DO 30 I = 1, N
+            S( I ) = ONE / SQRT( S( I ) )
+   30    CONTINUE
+*
+*        Compute SCOND = min(S(I)) / max(S(I))
+*
+         SCOND = SQRT( SMIN ) / SQRT( AMAX )
+      END IF
+      RETURN
+*
+*     End of ZPOEQU
+*
+      END
diff --git a/costa/native/external/lapack/zporfs.f b/costa/native/external/lapack/zporfs.f
new file mode 100644
index 000000000..7c394da8b
--- /dev/null
+++ b/costa/native/external/lapack/zporfs.f
@@ -0,0 +1,333 @@
+      SUBROUTINE ZPORFS( UPLO, N, NRHS, A, LDA, AF, LDAF, B, LDB, X,
+     $                   LDX, FERR, BERR, WORK, RWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, LDAF, LDB, LDX, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   BERR( * ), FERR( * ), RWORK( * )
+      COMPLEX*16         A( LDA, * ), AF( LDAF, * ), B( LDB, * ),
+     $                   WORK( * ), X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZPORFS improves the computed solution to a system of linear
+*  equations when the coefficient matrix is Hermitian positive definite,
+*  and provides error bounds and backward error estimates for the
+*  solution.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrices B and X.  NRHS >= 0.
+*
+*  A       (input) COMPLEX*16 array, dimension (LDA,N)
+*          The Hermitian matrix A.  If UPLO = 'U', the leading N-by-N
+*          upper triangular part of A contains the upper triangular part
+*          of the matrix A, and the strictly lower triangular part of A
+*          is not referenced.  If UPLO = 'L', the leading N-by-N lower
+*          triangular part of A contains the lower triangular part of
+*          the matrix A, and the strictly upper triangular part of A is
+*          not referenced.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  AF      (input) COMPLEX*16 array, dimension (LDAF,N)
+*          The triangular factor U or L from the Cholesky factorization
+*          A = U**H*U or A = L*L**H, as computed by ZPOTRF.
+*
+*  LDAF    (input) INTEGER
+*          The leading dimension of the array AF.  LDAF >= max(1,N).
+*
+*  B       (input) COMPLEX*16 array, dimension (LDB,NRHS)
+*          The right hand side matrix B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  X       (input/output) COMPLEX*16 array, dimension (LDX,NRHS)
+*          On entry, the solution matrix X, as computed by ZPOTRS.
+*          On exit, the improved solution matrix X.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(1,N).
+*
+*  FERR    (output) DOUBLE PRECISION array, dimension (NRHS)
+*          The estimated forward error bound for each solution vector
+*          X(j) (the j-th column of the solution matrix X).
+*          If XTRUE is the true solution corresponding to X(j), FERR(j)
+*          is an estimated upper bound for the magnitude of the largest
+*          element in (X(j) - XTRUE) divided by the magnitude of the
+*          largest element in X(j).  The estimate is as reliable as
+*          the estimate for RCOND, and is almost always a slight
+*          overestimate of the true error.
+*
+*  BERR    (output) DOUBLE PRECISION array, dimension (NRHS)
+*          The componentwise relative backward error of each solution
+*          vector X(j) (i.e., the smallest relative change in
+*          any element of A or B that makes X(j) an exact solution).
+*
+*  WORK    (workspace) COMPLEX*16 array, dimension (2*N)
+*
+*  RWORK   (workspace) DOUBLE PRECISION array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Internal Parameters
+*  ===================
+*
+*  ITMAX is the maximum number of steps of iterative refinement.
+*
+*  ====================================================================
+*
+*     .. Parameters ..
+      INTEGER            ITMAX
+      PARAMETER          ( ITMAX = 5 )
+      DOUBLE PRECISION   ZERO
+      PARAMETER          ( ZERO = 0.0D+0 )
+      COMPLEX*16         ONE
+      PARAMETER          ( ONE = ( 1.0D+0, 0.0D+0 ) )
+      DOUBLE PRECISION   TWO
+      PARAMETER          ( TWO = 2.0D+0 )
+      DOUBLE PRECISION   THREE
+      PARAMETER          ( THREE = 3.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            COUNT, I, J, K, KASE, NZ
+      DOUBLE PRECISION   EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK
+      COMPLEX*16         ZDUM
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZAXPY, ZCOPY, ZHEMV, ZLACON, ZPOTRS
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, DIMAG, MAX
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DLAMCH
+      EXTERNAL           LSAME, DLAMCH
+*     ..
+*     .. Statement Functions ..
+      DOUBLE PRECISION   CABS1
+*     ..
+*     .. Statement Function definitions ..
+      CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -9
+      ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+         INFO = -11
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZPORFS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN
+         DO 10 J = 1, NRHS
+            FERR( J ) = ZERO
+            BERR( J ) = ZERO
+   10    CONTINUE
+         RETURN
+      END IF
+*
+*     NZ = maximum number of nonzero elements in each row of A, plus 1
+*
+      NZ = N + 1
+      EPS = DLAMCH( 'Epsilon' )
+      SAFMIN = DLAMCH( 'Safe minimum' )
+      SAFE1 = NZ*SAFMIN
+      SAFE2 = SAFE1 / EPS
+*
+*     Do for each right hand side
+*
+      DO 140 J = 1, NRHS
+*
+         COUNT = 1
+         LSTRES = THREE
+   20    CONTINUE
+*
+*        Loop until stopping criterion is satisfied.
+*
+*        Compute residual R = B - A * X
+*
+         CALL ZCOPY( N, B( 1, J ), 1, WORK, 1 )
+         CALL ZHEMV( UPLO, N, -ONE, A, LDA, X( 1, J ), 1, ONE, WORK, 1 )
+*
+*        Compute componentwise relative backward error from formula
+*
+*        max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) )
+*
+*        where abs(Z) is the componentwise absolute value of the matrix
+*        or vector Z.  If the i-th component of the denominator is less
+*        than SAFE2, then SAFE1 is added to the i-th components of the
+*        numerator and denominator before dividing.
+*
+         DO 30 I = 1, N
+            RWORK( I ) = CABS1( B( I, J ) )
+   30    CONTINUE
+*
+*        Compute abs(A)*abs(X) + abs(B).
+*
+         IF( UPPER ) THEN
+            DO 50 K = 1, N
+               S = ZERO
+               XK = CABS1( X( K, J ) )
+               DO 40 I = 1, K - 1
+                  RWORK( I ) = RWORK( I ) + CABS1( A( I, K ) )*XK
+                  S = S + CABS1( A( I, K ) )*CABS1( X( I, J ) )
+   40          CONTINUE
+               RWORK( K ) = RWORK( K ) + ABS( DBLE( A( K, K ) ) )*XK + S
+   50       CONTINUE
+         ELSE
+            DO 70 K = 1, N
+               S = ZERO
+               XK = CABS1( X( K, J ) )
+               RWORK( K ) = RWORK( K ) + ABS( DBLE( A( K, K ) ) )*XK
+               DO 60 I = K + 1, N
+                  RWORK( I ) = RWORK( I ) + CABS1( A( I, K ) )*XK
+                  S = S + CABS1( A( I, K ) )*CABS1( X( I, J ) )
+   60          CONTINUE
+               RWORK( K ) = RWORK( K ) + S
+   70       CONTINUE
+         END IF
+         S = ZERO
+         DO 80 I = 1, N
+            IF( RWORK( I ).GT.SAFE2 ) THEN
+               S = MAX( S, CABS1( WORK( I ) ) / RWORK( I ) )
+            ELSE
+               S = MAX( S, ( CABS1( WORK( I ) )+SAFE1 ) /
+     $             ( RWORK( I )+SAFE1 ) )
+            END IF
+   80    CONTINUE
+         BERR( J ) = S
+*
+*        Test stopping criterion. Continue iterating if
+*           1) The residual BERR(J) is larger than machine epsilon, and
+*           2) BERR(J) decreased by at least a factor of 2 during the
+*              last iteration, and
+*           3) At most ITMAX iterations tried.
+*
+         IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND.
+     $       COUNT.LE.ITMAX ) THEN
+*
+*           Update solution and try again.
+*
+            CALL ZPOTRS( UPLO, N, 1, AF, LDAF, WORK, N, INFO )
+            CALL ZAXPY( N, ONE, WORK, 1, X( 1, J ), 1 )
+            LSTRES = BERR( J )
+            COUNT = COUNT + 1
+            GO TO 20
+         END IF
+*
+*        Bound error from formula
+*
+*        norm(X - XTRUE) / norm(X) .le. FERR =
+*        norm( abs(inv(A))*
+*           ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X)
+*
+*        where
+*          norm(Z) is the magnitude of the largest component of Z
+*          inv(A) is the inverse of A
+*          abs(Z) is the componentwise absolute value of the matrix or
+*             vector Z
+*          NZ is the maximum number of nonzeros in any row of A, plus 1
+*          EPS is machine epsilon
+*
+*        The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B))
+*        is incremented by SAFE1 if the i-th component of
+*        abs(A)*abs(X) + abs(B) is less than SAFE2.
+*
+*        Use ZLACON to estimate the infinity-norm of the matrix
+*           inv(A) * diag(W),
+*        where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) )))
+*
+         DO 90 I = 1, N
+            IF( RWORK( I ).GT.SAFE2 ) THEN
+               RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I )
+            ELSE
+               RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) +
+     $                      SAFE1
+            END IF
+   90    CONTINUE
+*
+         KASE = 0
+  100    CONTINUE
+         CALL ZLACON( N, WORK( N+1 ), WORK, FERR( J ), KASE )
+         IF( KASE.NE.0 ) THEN
+            IF( KASE.EQ.1 ) THEN
+*
+*              Multiply by diag(W)*inv(A').
+*
+               CALL ZPOTRS( UPLO, N, 1, AF, LDAF, WORK, N, INFO )
+               DO 110 I = 1, N
+                  WORK( I ) = RWORK( I )*WORK( I )
+  110          CONTINUE
+            ELSE IF( KASE.EQ.2 ) THEN
+*
+*              Multiply by inv(A)*diag(W).
+*
+               DO 120 I = 1, N
+                  WORK( I ) = RWORK( I )*WORK( I )
+  120          CONTINUE
+               CALL ZPOTRS( UPLO, N, 1, AF, LDAF, WORK, N, INFO )
+            END IF
+            GO TO 100
+         END IF
+*
+*        Normalize error.
+*
+         LSTRES = ZERO
+         DO 130 I = 1, N
+            LSTRES = MAX( LSTRES, CABS1( X( I, J ) ) )
+  130    CONTINUE
+         IF( LSTRES.NE.ZERO )
+     $      FERR( J ) = FERR( J ) / LSTRES
+*
+  140 CONTINUE
+*
+      RETURN
+*
+*     End of ZPORFS
+*
+      END
diff --git a/costa/native/external/lapack/zposv.f b/costa/native/external/lapack/zposv.f
new file mode 100644
index 000000000..2d69554fd
--- /dev/null
+++ b/costa/native/external/lapack/zposv.f
@@ -0,0 +1,122 @@
+      SUBROUTINE ZPOSV( UPLO, N, NRHS, A, LDA, B, LDB, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, LDB, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZPOSV computes the solution to a complex system of linear equations
+*     A * X = B,
+*  where A is an N-by-N Hermitian positive definite matrix and X and B
+*  are N-by-NRHS matrices.
+*
+*  The Cholesky decomposition is used to factor A as
+*     A = U**H* U,  if UPLO = 'U', or
+*     A = L * L**H,  if UPLO = 'L',
+*  where U is an upper triangular matrix and  L is a lower triangular
+*  matrix.  The factored form of A is then used to solve the system of
+*  equations A * X = B.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The number of linear equations, i.e., the order of the
+*          matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+*          On entry, the Hermitian matrix A.  If UPLO = 'U', the leading
+*          N-by-N upper triangular part of A contains the upper
+*          triangular part of the matrix A, and the strictly lower
+*          triangular part of A is not referenced.  If UPLO = 'L', the
+*          leading N-by-N lower triangular part of A contains the lower
+*          triangular part of the matrix A, and the strictly upper
+*          triangular part of A is not referenced.
+*
+*          On exit, if INFO = 0, the factor U or L from the Cholesky
+*          factorization A = U**H*U or A = L*L**H.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  B       (input/output) COMPLEX*16 array, dimension (LDB,NRHS)
+*          On entry, the N-by-NRHS right hand side matrix B.
+*          On exit, if INFO = 0, the N-by-NRHS solution matrix X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, the leading minor of order i of A is not
+*                positive definite, so the factorization could not be
+*                completed, and the solution has not been computed.
+*
+*  =====================================================================
+*
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZPOTRF, ZPOTRS
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZPOSV ', -INFO )
+         RETURN
+      END IF
+*
+*     Compute the Cholesky factorization A = U'*U or A = L*L'.
+*
+      CALL ZPOTRF( UPLO, N, A, LDA, INFO )
+      IF( INFO.EQ.0 ) THEN
+*
+*        Solve the system A*X = B, overwriting B with X.
+*
+         CALL ZPOTRS( UPLO, N, NRHS, A, LDA, B, LDB, INFO )
+*
+      END IF
+      RETURN
+*
+*     End of ZPOSV
+*
+      END
diff --git a/costa/native/external/lapack/zposvx.f b/costa/native/external/lapack/zposvx.f
new file mode 100644
index 000000000..098f52a3a
--- /dev/null
+++ b/costa/native/external/lapack/zposvx.f
@@ -0,0 +1,378 @@
+      SUBROUTINE ZPOSVX( FACT, UPLO, N, NRHS, A, LDA, AF, LDAF, EQUED,
+     $                   S, B, LDB, X, LDX, RCOND, FERR, BERR, WORK,
+     $                   RWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          EQUED, FACT, UPLO
+      INTEGER            INFO, LDA, LDAF, LDB, LDX, N, NRHS
+      DOUBLE PRECISION   RCOND
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   BERR( * ), FERR( * ), RWORK( * ), S( * )
+      COMPLEX*16         A( LDA, * ), AF( LDAF, * ), B( LDB, * ),
+     $                   WORK( * ), X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZPOSVX uses the Cholesky factorization A = U**H*U or A = L*L**H to
+*  compute the solution to a complex system of linear equations
+*     A * X = B,
+*  where A is an N-by-N Hermitian positive definite matrix and X and B
+*  are N-by-NRHS matrices.
+*
+*  Error bounds on the solution and a condition estimate are also
+*  provided.
+*
+*  Description
+*  ===========
+*
+*  The following steps are performed:
+*
+*  1. If FACT = 'E', real scaling factors are computed to equilibrate
+*     the system:
+*        diag(S) * A * diag(S) * inv(diag(S)) * X = diag(S) * B
+*     Whether or not the system will be equilibrated depends on the
+*     scaling of the matrix A, but if equilibration is used, A is
+*     overwritten by diag(S)*A*diag(S) and B by diag(S)*B.
+*
+*  2. If FACT = 'N' or 'E', the Cholesky decomposition is used to
+*     factor the matrix A (after equilibration if FACT = 'E') as
+*        A = U**H* U,  if UPLO = 'U', or
+*        A = L * L**H,  if UPLO = 'L',
+*     where U is an upper triangular matrix and L is a lower triangular
+*     matrix.
+*
+*  3. If the leading i-by-i principal minor is not positive definite,
+*     then the routine returns with INFO = i. Otherwise, the factored
+*     form of A is used to estimate the condition number of the matrix
+*     A.  If the reciprocal of the condition number is less than machine
+*     precision, INFO = N+1 is returned as a warning, but the routine
+*     still goes on to solve for X and compute error bounds as
+*     described below.
+*
+*  4. The system of equations is solved for X using the factored form
+*     of A.
+*
+*  5. Iterative refinement is applied to improve the computed solution
+*     matrix and calculate error bounds and backward error estimates
+*     for it.
+*
+*  6. If equilibration was used, the matrix X is premultiplied by
+*     diag(S) so that it solves the original system before
+*     equilibration.
+*
+*  Arguments
+*  =========
+*
+*  FACT    (input) CHARACTER*1
+*          Specifies whether or not the factored form of the matrix A is
+*          supplied on entry, and if not, whether the matrix A should be
+*          equilibrated before it is factored.
+*          = 'F':  On entry, AF contains the factored form of A.
+*                  If EQUED = 'Y', the matrix A has been equilibrated
+*                  with scaling factors given by S.  A and AF will not
+*                  be modified.
+*          = 'N':  The matrix A will be copied to AF and factored.
+*          = 'E':  The matrix A will be equilibrated if necessary, then
+*                  copied to AF and factored.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The number of linear equations, i.e., the order of the
+*          matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrices B and X.  NRHS >= 0.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+*          On entry, the Hermitian matrix A, except if FACT = 'F' and
+*          EQUED = 'Y', then A must contain the equilibrated matrix
+*          diag(S)*A*diag(S).  If UPLO = 'U', the leading
+*          N-by-N upper triangular part of A contains the upper
+*          triangular part of the matrix A, and the strictly lower
+*          triangular part of A is not referenced.  If UPLO = 'L', the
+*          leading N-by-N lower triangular part of A contains the lower
+*          triangular part of the matrix A, and the strictly upper
+*          triangular part of A is not referenced.  A is not modified if
+*          FACT = 'F' or 'N', or if FACT = 'E' and EQUED = 'N' on exit.
+*
+*          On exit, if FACT = 'E' and EQUED = 'Y', A is overwritten by
+*          diag(S)*A*diag(S).
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  AF      (input or output) COMPLEX*16 array, dimension (LDAF,N)
+*          If FACT = 'F', then AF is an input argument and on entry
+*          contains the triangular factor U or L from the Cholesky
+*          factorization A = U**H*U or A = L*L**H, in the same storage
+*          format as A.  If EQUED .ne. 'N', then AF is the factored form
+*          of the equilibrated matrix diag(S)*A*diag(S).
+*
+*          If FACT = 'N', then AF is an output argument and on exit
+*          returns the triangular factor U or L from the Cholesky
+*          factorization A = U**H*U or A = L*L**H of the original
+*          matrix A.
+*
+*          If FACT = 'E', then AF is an output argument and on exit
+*          returns the triangular factor U or L from the Cholesky
+*          factorization A = U**H*U or A = L*L**H of the equilibrated
+*          matrix A (see the description of A for the form of the
+*          equilibrated matrix).
+*
+*  LDAF    (input) INTEGER
+*          The leading dimension of the array AF.  LDAF >= max(1,N).
+*
+*  EQUED   (input or output) CHARACTER*1
+*          Specifies the form of equilibration that was done.
+*          = 'N':  No equilibration (always true if FACT = 'N').
+*          = 'Y':  Equilibration was done, i.e., A has been replaced by
+*                  diag(S) * A * diag(S).
+*          EQUED is an input argument if FACT = 'F'; otherwise, it is an
+*          output argument.
+*
+*  S       (input or output) DOUBLE PRECISION array, dimension (N)
+*          The scale factors for A; not accessed if EQUED = 'N'.  S is
+*          an input argument if FACT = 'F'; otherwise, S is an output
+*          argument.  If FACT = 'F' and EQUED = 'Y', each element of S
+*          must be positive.
+*
+*  B       (input/output) COMPLEX*16 array, dimension (LDB,NRHS)
+*          On entry, the N-by-NRHS righthand side matrix B.
+*          On exit, if EQUED = 'N', B is not modified; if EQUED = 'Y',
+*          B is overwritten by diag(S) * B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  X       (output) COMPLEX*16 array, dimension (LDX,NRHS)
+*          If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X to
+*          the original system of equations.  Note that if EQUED = 'Y',
+*          A and B are modified on exit, and the solution to the
+*          equilibrated system is inv(diag(S))*X.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(1,N).
+*
+*  RCOND   (output) DOUBLE PRECISION
+*          The estimate of the reciprocal condition number of the matrix
+*          A after equilibration (if done).  If RCOND is less than the
+*          machine precision (in particular, if RCOND = 0), the matrix
+*          is singular to working precision.  This condition is
+*          indicated by a return code of INFO > 0.
+*
+*  FERR    (output) DOUBLE PRECISION array, dimension (NRHS)
+*          The estimated forward error bound for each solution vector
+*          X(j) (the j-th column of the solution matrix X).
+*          If XTRUE is the true solution corresponding to X(j), FERR(j)
+*          is an estimated upper bound for the magnitude of the largest
+*          element in (X(j) - XTRUE) divided by the magnitude of the
+*          largest element in X(j).  The estimate is as reliable as
+*          the estimate for RCOND, and is almost always a slight
+*          overestimate of the true error.
+*
+*  BERR    (output) DOUBLE PRECISION array, dimension (NRHS)
+*          The componentwise relative backward error of each solution
+*          vector X(j) (i.e., the smallest relative change in
+*          any element of A or B that makes X(j) an exact solution).
+*
+*  WORK    (workspace) COMPLEX*16 array, dimension (2*N)
+*
+*  RWORK   (workspace) DOUBLE PRECISION array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*          > 0: if INFO = i, and i is
+*                <= N:  the leading minor of order i of A is
+*                       not positive definite, so the factorization
+*                       could not be completed, and the solution has not
+*                       been computed. RCOND = 0 is returned.
+*                = N+1: U is nonsingular, but RCOND is less than machine
+*                       precision, meaning that the matrix is singular
+*                       to working precision.  Nevertheless, the
+*                       solution and error bounds are computed because
+*                       there are a number of situations where the
+*                       computed solution can be more accurate than the
+*                       value of RCOND would suggest.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            EQUIL, NOFACT, RCEQU
+      INTEGER            I, INFEQU, J
+      DOUBLE PRECISION   AMAX, ANORM, BIGNUM, SCOND, SMAX, SMIN, SMLNUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DLAMCH, ZLANHE
+      EXTERNAL           LSAME, DLAMCH, ZLANHE
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZLACPY, ZLAQHE, ZPOCON, ZPOEQU, ZPORFS,
+     $                   ZPOTRF, ZPOTRS
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+      NOFACT = LSAME( FACT, 'N' )
+      EQUIL = LSAME( FACT, 'E' )
+      IF( NOFACT .OR. EQUIL ) THEN
+         EQUED = 'N'
+         RCEQU = .FALSE.
+      ELSE
+         RCEQU = LSAME( EQUED, 'Y' )
+         SMLNUM = DLAMCH( 'Safe minimum' )
+         BIGNUM = ONE / SMLNUM
+      END IF
+*
+*     Test the input parameters.
+*
+      IF( .NOT.NOFACT .AND. .NOT.EQUIL .AND. .NOT.LSAME( FACT, 'F' ) )
+     $     THEN
+         INFO = -1
+      ELSE IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) )
+     $          THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -6
+      ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN
+         INFO = -8
+      ELSE IF( LSAME( FACT, 'F' ) .AND. .NOT.
+     $         ( RCEQU .OR. LSAME( EQUED, 'N' ) ) ) THEN
+         INFO = -9
+      ELSE
+         IF( RCEQU ) THEN
+            SMIN = BIGNUM
+            SMAX = ZERO
+            DO 10 J = 1, N
+               SMIN = MIN( SMIN, S( J ) )
+               SMAX = MAX( SMAX, S( J ) )
+   10       CONTINUE
+            IF( SMIN.LE.ZERO ) THEN
+               INFO = -10
+            ELSE IF( N.GT.0 ) THEN
+               SCOND = MAX( SMIN, SMLNUM ) / MIN( SMAX, BIGNUM )
+            ELSE
+               SCOND = ONE
+            END IF
+         END IF
+         IF( INFO.EQ.0 ) THEN
+            IF( LDB.LT.MAX( 1, N ) ) THEN
+               INFO = -12
+            ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+               INFO = -14
+            END IF
+         END IF
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZPOSVX', -INFO )
+         RETURN
+      END IF
+*
+      IF( EQUIL ) THEN
+*
+*        Compute row and column scalings to equilibrate the matrix A.
+*
+         CALL ZPOEQU( N, A, LDA, S, SCOND, AMAX, INFEQU )
+         IF( INFEQU.EQ.0 ) THEN
+*
+*           Equilibrate the matrix.
+*
+            CALL ZLAQHE( UPLO, N, A, LDA, S, SCOND, AMAX, EQUED )
+            RCEQU = LSAME( EQUED, 'Y' )
+         END IF
+      END IF
+*
+*     Scale the right hand side.
+*
+      IF( RCEQU ) THEN
+         DO 30 J = 1, NRHS
+            DO 20 I = 1, N
+               B( I, J ) = S( I )*B( I, J )
+   20       CONTINUE
+   30    CONTINUE
+      END IF
+*
+      IF( NOFACT .OR. EQUIL ) THEN
+*
+*        Compute the Cholesky factorization A = U'*U or A = L*L'.
+*
+         CALL ZLACPY( UPLO, N, N, A, LDA, AF, LDAF )
+         CALL ZPOTRF( UPLO, N, AF, LDAF, INFO )
+*
+*        Return if INFO is non-zero.
+*
+         IF( INFO.NE.0 ) THEN
+            IF( INFO.GT.0 )
+     $         RCOND = ZERO
+            RETURN
+         END IF
+      END IF
+*
+*     Compute the norm of the matrix A.
+*
+      ANORM = ZLANHE( '1', UPLO, N, A, LDA, RWORK )
+*
+*     Compute the reciprocal of the condition number of A.
+*
+      CALL ZPOCON( UPLO, N, AF, LDAF, ANORM, RCOND, WORK, RWORK, INFO )
+*
+*     Set INFO = N+1 if the matrix is singular to working precision.
+*
+      IF( RCOND.LT.DLAMCH( 'Epsilon' ) )
+     $   INFO = N + 1
+*
+*     Compute the solution matrix X.
+*
+      CALL ZLACPY( 'Full', N, NRHS, B, LDB, X, LDX )
+      CALL ZPOTRS( UPLO, N, NRHS, AF, LDAF, X, LDX, INFO )
+*
+*     Use iterative refinement to improve the computed solution and
+*     compute error bounds and backward error estimates for it.
+*
+      CALL ZPORFS( UPLO, N, NRHS, A, LDA, AF, LDAF, B, LDB, X, LDX,
+     $             FERR, BERR, WORK, RWORK, INFO )
+*
+*     Transform the solution matrix X to a solution of the original
+*     system.
+*
+      IF( RCEQU ) THEN
+         DO 50 J = 1, NRHS
+            DO 40 I = 1, N
+               X( I, J ) = S( I )*X( I, J )
+   40       CONTINUE
+   50    CONTINUE
+         DO 60 J = 1, NRHS
+            FERR( J ) = FERR( J ) / SCOND
+   60    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of ZPOSVX
+*
+      END
diff --git a/costa/native/external/lapack/zpotf2.f b/costa/native/external/lapack/zpotf2.f
new file mode 100644
index 000000000..771db483e
--- /dev/null
+++ b/costa/native/external/lapack/zpotf2.f
@@ -0,0 +1,175 @@
+      SUBROUTINE ZPOTF2( UPLO, N, A, LDA, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZPOTF2 computes the Cholesky factorization of a complex Hermitian
+*  positive definite matrix A.
+*
+*  The factorization has the form
+*     A = U' * U ,  if UPLO = 'U', or
+*     A = L  * L',  if UPLO = 'L',
+*  where U is an upper triangular matrix and L is lower triangular.
+*
+*  This is the unblocked version of the algorithm, calling Level 2 BLAS.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the upper or lower triangular part of the
+*          Hermitian matrix A is stored.
+*          = 'U':  Upper triangular
+*          = 'L':  Lower triangular
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+*          On entry, the Hermitian matrix A.  If UPLO = 'U', the leading
+*          n by n upper triangular part of A contains the upper
+*          triangular part of the matrix A, and the strictly lower
+*          triangular part of A is not referenced.  If UPLO = 'L', the
+*          leading n by n lower triangular part of A contains the lower
+*          triangular part of the matrix A, and the strictly upper
+*          triangular part of A is not referenced.
+*
+*          On exit, if INFO = 0, the factor U or L from the Cholesky
+*          factorization A = U'*U  or A = L*L'.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -k, the k-th argument had an illegal value
+*          > 0: if INFO = k, the leading minor of order k is not
+*               positive definite, and the factorization could not be
+*               completed.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+      COMPLEX*16         CONE
+      PARAMETER          ( CONE = ( 1.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            J
+      DOUBLE PRECISION   AJJ
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      COMPLEX*16         ZDOTC
+      EXTERNAL           LSAME, ZDOTC
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZDSCAL, ZGEMV, ZLACGV
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          DBLE, MAX, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZPOTF2', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      IF( UPPER ) THEN
+*
+*        Compute the Cholesky factorization A = U'*U.
+*
+         DO 10 J = 1, N
+*
+*           Compute U(J,J) and test for non-positive-definiteness.
+*
+            AJJ = DBLE( A( J, J ) ) - ZDOTC( J-1, A( 1, J ), 1,
+     $            A( 1, J ), 1 )
+            IF( AJJ.LE.ZERO ) THEN
+               A( J, J ) = AJJ
+               GO TO 30
+            END IF
+            AJJ = SQRT( AJJ )
+            A( J, J ) = AJJ
+*
+*           Compute elements J+1:N of row J.
+*
+            IF( J.LT.N ) THEN
+               CALL ZLACGV( J-1, A( 1, J ), 1 )
+               CALL ZGEMV( 'Transpose', J-1, N-J, -CONE, A( 1, J+1 ),
+     $                     LDA, A( 1, J ), 1, CONE, A( J, J+1 ), LDA )
+               CALL ZLACGV( J-1, A( 1, J ), 1 )
+               CALL ZDSCAL( N-J, ONE / AJJ, A( J, J+1 ), LDA )
+            END IF
+   10    CONTINUE
+      ELSE
+*
+*        Compute the Cholesky factorization A = L*L'.
+*
+         DO 20 J = 1, N
+*
+*           Compute L(J,J) and test for non-positive-definiteness.
+*
+            AJJ = DBLE( A( J, J ) ) - ZDOTC( J-1, A( J, 1 ), LDA,
+     $            A( J, 1 ), LDA )
+            IF( AJJ.LE.ZERO ) THEN
+               A( J, J ) = AJJ
+               GO TO 30
+            END IF
+            AJJ = SQRT( AJJ )
+            A( J, J ) = AJJ
+*
+*           Compute elements J+1:N of column J.
+*
+            IF( J.LT.N ) THEN
+               CALL ZLACGV( J-1, A( J, 1 ), LDA )
+               CALL ZGEMV( 'No transpose', N-J, J-1, -CONE, A( J+1, 1 ),
+     $                     LDA, A( J, 1 ), LDA, CONE, A( J+1, J ), 1 )
+               CALL ZLACGV( J-1, A( J, 1 ), LDA )
+               CALL ZDSCAL( N-J, ONE / AJJ, A( J+1, J ), 1 )
+            END IF
+   20    CONTINUE
+      END IF
+      GO TO 40
+*
+   30 CONTINUE
+      INFO = J
+*
+   40 CONTINUE
+      RETURN
+*
+*     End of ZPOTF2
+*
+      END
diff --git a/costa/native/external/lapack/zpotrf.f b/costa/native/external/lapack/zpotrf.f
new file mode 100644
index 000000000..e46514d63
--- /dev/null
+++ b/costa/native/external/lapack/zpotrf.f
@@ -0,0 +1,187 @@
+      SUBROUTINE ZPOTRF( UPLO, N, A, LDA, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZPOTRF computes the Cholesky factorization of a complex Hermitian
+*  positive definite matrix A.
+*
+*  The factorization has the form
+*     A = U**H * U,  if UPLO = 'U', or
+*     A = L  * L**H,  if UPLO = 'L',
+*  where U is an upper triangular matrix and L is lower triangular.
+*
+*  This is the block version of the algorithm, calling Level 3 BLAS.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+*          On entry, the Hermitian matrix A.  If UPLO = 'U', the leading
+*          N-by-N upper triangular part of A contains the upper
+*          triangular part of the matrix A, and the strictly lower
+*          triangular part of A is not referenced.  If UPLO = 'L', the
+*          leading N-by-N lower triangular part of A contains the lower
+*          triangular part of the matrix A, and the strictly upper
+*          triangular part of A is not referenced.
+*
+*          On exit, if INFO = 0, the factor U or L from the Cholesky
+*          factorization A = U**H*U or A = L*L**H.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, the leading minor of order i is not
+*                positive definite, and the factorization could not be
+*                completed.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE
+      COMPLEX*16         CONE
+      PARAMETER          ( ONE = 1.0D+0, CONE = ( 1.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            J, JB, NB
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           LSAME, ILAENV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZGEMM, ZHERK, ZPOTF2, ZTRSM
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZPOTRF', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Determine the block size for this environment.
+*
+      NB = ILAENV( 1, 'ZPOTRF', UPLO, N, -1, -1, -1 )
+      IF( NB.LE.1 .OR. NB.GE.N ) THEN
+*
+*        Use unblocked code.
+*
+         CALL ZPOTF2( UPLO, N, A, LDA, INFO )
+      ELSE
+*
+*        Use blocked code.
+*
+         IF( UPPER ) THEN
+*
+*           Compute the Cholesky factorization A = U'*U.
+*
+            DO 10 J = 1, N, NB
+*
+*              Update and factorize the current diagonal block and test
+*              for non-positive-definiteness.
+*
+               JB = MIN( NB, N-J+1 )
+               CALL ZHERK( 'Upper', 'Conjugate transpose', JB, J-1,
+     $                     -ONE, A( 1, J ), LDA, ONE, A( J, J ), LDA )
+               CALL ZPOTF2( 'Upper', JB, A( J, J ), LDA, INFO )
+               IF( INFO.NE.0 )
+     $            GO TO 30
+               IF( J+JB.LE.N ) THEN
+*
+*                 Compute the current block row.
+*
+                  CALL ZGEMM( 'Conjugate transpose', 'No transpose', JB,
+     $                        N-J-JB+1, J-1, -CONE, A( 1, J ), LDA,
+     $                        A( 1, J+JB ), LDA, CONE, A( J, J+JB ),
+     $                        LDA )
+                  CALL ZTRSM( 'Left', 'Upper', 'Conjugate transpose',
+     $                        'Non-unit', JB, N-J-JB+1, CONE, A( J, J ),
+     $                        LDA, A( J, J+JB ), LDA )
+               END IF
+   10       CONTINUE
+*
+         ELSE
+*
+*           Compute the Cholesky factorization A = L*L'.
+*
+            DO 20 J = 1, N, NB
+*
+*              Update and factorize the current diagonal block and test
+*              for non-positive-definiteness.
+*
+               JB = MIN( NB, N-J+1 )
+               CALL ZHERK( 'Lower', 'No transpose', JB, J-1, -ONE,
+     $                     A( J, 1 ), LDA, ONE, A( J, J ), LDA )
+               CALL ZPOTF2( 'Lower', JB, A( J, J ), LDA, INFO )
+               IF( INFO.NE.0 )
+     $            GO TO 30
+               IF( J+JB.LE.N ) THEN
+*
+*                 Compute the current block column.
+*
+                  CALL ZGEMM( 'No transpose', 'Conjugate transpose',
+     $                        N-J-JB+1, JB, J-1, -CONE, A( J+JB, 1 ),
+     $                        LDA, A( J, 1 ), LDA, CONE, A( J+JB, J ),
+     $                        LDA )
+                  CALL ZTRSM( 'Right', 'Lower', 'Conjugate transpose',
+     $                        'Non-unit', N-J-JB+1, JB, CONE, A( J, J ),
+     $                        LDA, A( J+JB, J ), LDA )
+               END IF
+   20       CONTINUE
+         END IF
+      END IF
+      GO TO 40
+*
+   30 CONTINUE
+      INFO = INFO + J - 1
+*
+   40 CONTINUE
+      RETURN
+*
+*     End of ZPOTRF
+*
+      END
diff --git a/costa/native/external/lapack/zpotri.f b/costa/native/external/lapack/zpotri.f
new file mode 100644
index 000000000..06fe7a483
--- /dev/null
+++ b/costa/native/external/lapack/zpotri.f
@@ -0,0 +1,97 @@
+      SUBROUTINE ZPOTRI( UPLO, N, A, LDA, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZPOTRI computes the inverse of a complex Hermitian positive definite
+*  matrix A using the Cholesky factorization A = U**H*U or A = L*L**H
+*  computed by ZPOTRF.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+*          On entry, the triangular factor U or L from the Cholesky
+*          factorization A = U**H*U or A = L*L**H, as computed by
+*          ZPOTRF.
+*          On exit, the upper or lower triangle of the (Hermitian)
+*          inverse of A, overwriting the input factor U or L.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, the (i,i) element of the factor U or L is
+*                zero, and the inverse could not be computed.
+*
+*  =====================================================================
+*
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZLAUUM, ZTRTRI
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZPOTRI', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Invert the triangular Cholesky factor U or L.
+*
+      CALL ZTRTRI( UPLO, 'Non-unit', N, A, LDA, INFO )
+      IF( INFO.GT.0 )
+     $   RETURN
+*
+*     Form inv(U)*inv(U)' or inv(L)'*inv(L).
+*
+      CALL ZLAUUM( UPLO, N, A, LDA, INFO )
+*
+      RETURN
+*
+*     End of ZPOTRI
+*
+      END
diff --git a/costa/native/external/lapack/zpotrs.f b/costa/native/external/lapack/zpotrs.f
new file mode 100644
index 000000000..cf6e825be
--- /dev/null
+++ b/costa/native/external/lapack/zpotrs.f
@@ -0,0 +1,133 @@
+      SUBROUTINE ZPOTRS( UPLO, N, NRHS, A, LDA, B, LDB, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, LDB, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZPOTRS solves a system of linear equations A*X = B with a Hermitian
+*  positive definite matrix A using the Cholesky factorization
+*  A = U**H*U or A = L*L**H computed by ZPOTRF.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  A       (input) COMPLEX*16 array, dimension (LDA,N)
+*          The triangular factor U or L from the Cholesky factorization
+*          A = U**H*U or A = L*L**H, as computed by ZPOTRF.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  B       (input/output) COMPLEX*16 array, dimension (LDB,NRHS)
+*          On entry, the right hand side matrix B.
+*          On exit, the solution matrix X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX*16         ONE
+      PARAMETER          ( ONE = ( 1.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZTRSM
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZPOTRS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 )
+     $   RETURN
+*
+      IF( UPPER ) THEN
+*
+*        Solve A*X = B where A = U'*U.
+*
+*        Solve U'*X = B, overwriting B with X.
+*
+         CALL ZTRSM( 'Left', 'Upper', 'Conjugate transpose', 'Non-unit',
+     $               N, NRHS, ONE, A, LDA, B, LDB )
+*
+*        Solve U*X = B, overwriting B with X.
+*
+         CALL ZTRSM( 'Left', 'Upper', 'No transpose', 'Non-unit', N,
+     $               NRHS, ONE, A, LDA, B, LDB )
+      ELSE
+*
+*        Solve A*X = B where A = L*L'.
+*
+*        Solve L*X = B, overwriting B with X.
+*
+         CALL ZTRSM( 'Left', 'Lower', 'No transpose', 'Non-unit', N,
+     $               NRHS, ONE, A, LDA, B, LDB )
+*
+*        Solve L'*X = B, overwriting B with X.
+*
+         CALL ZTRSM( 'Left', 'Lower', 'Conjugate transpose', 'Non-unit',
+     $               N, NRHS, ONE, A, LDA, B, LDB )
+      END IF
+*
+      RETURN
+*
+*     End of ZPOTRS
+*
+      END
diff --git a/costa/native/external/lapack/zppcon.f b/costa/native/external/lapack/zppcon.f
new file mode 100644
index 000000000..9c955ae9f
--- /dev/null
+++ b/costa/native/external/lapack/zppcon.f
@@ -0,0 +1,179 @@
+      SUBROUTINE ZPPCON( UPLO, N, AP, ANORM, RCOND, WORK, RWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, N
+      DOUBLE PRECISION   ANORM, RCOND
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   RWORK( * )
+      COMPLEX*16         AP( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZPPCON estimates the reciprocal of the condition number (in the
+*  1-norm) of a complex Hermitian positive definite packed matrix using
+*  the Cholesky factorization A = U**H*U or A = L*L**H computed by
+*  ZPPTRF.
+*
+*  An estimate is obtained for norm(inv(A)), and the reciprocal of the
+*  condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))).
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  AP      (input) COMPLEX*16 array, dimension (N*(N+1)/2)
+*          The triangular factor U or L from the Cholesky factorization
+*          A = U**H*U or A = L*L**H, packed columnwise in a linear
+*          array.  The j-th column of U or L is stored in the array AP
+*          as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = U(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = L(i,j) for j<=i<=n.
+*
+*  ANORM   (input) DOUBLE PRECISION
+*          The 1-norm (or infinity-norm) of the Hermitian matrix A.
+*
+*  RCOND   (output) DOUBLE PRECISION
+*          The reciprocal of the condition number of the matrix A,
+*          computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an
+*          estimate of the 1-norm of inv(A) computed in this routine.
+*
+*  WORK    (workspace) COMPLEX*16 array, dimension (2*N)
+*
+*  RWORK   (workspace) DOUBLE PRECISION array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      CHARACTER          NORMIN
+      INTEGER            IX, KASE
+      DOUBLE PRECISION   AINVNM, SCALE, SCALEL, SCALEU, SMLNUM
+      COMPLEX*16         ZDUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            IZAMAX
+      DOUBLE PRECISION   DLAMCH
+      EXTERNAL           LSAME, IZAMAX, DLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZDRSCL, ZLACON, ZLATPS
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, DIMAG
+*     ..
+*     .. Statement Functions ..
+      DOUBLE PRECISION   CABS1
+*     ..
+*     .. Statement Function definitions ..
+      CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( ANORM.LT.ZERO ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZPPCON', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      RCOND = ZERO
+      IF( N.EQ.0 ) THEN
+         RCOND = ONE
+         RETURN
+      ELSE IF( ANORM.EQ.ZERO ) THEN
+         RETURN
+      END IF
+*
+      SMLNUM = DLAMCH( 'Safe minimum' )
+*
+*     Estimate the 1-norm of the inverse.
+*
+      KASE = 0
+      NORMIN = 'N'
+   10 CONTINUE
+      CALL ZLACON( N, WORK( N+1 ), WORK, AINVNM, KASE )
+      IF( KASE.NE.0 ) THEN
+         IF( UPPER ) THEN
+*
+*           Multiply by inv(U').
+*
+            CALL ZLATPS( 'Upper', 'Conjugate transpose', 'Non-unit',
+     $                   NORMIN, N, AP, WORK, SCALEL, RWORK, INFO )
+            NORMIN = 'Y'
+*
+*           Multiply by inv(U).
+*
+            CALL ZLATPS( 'Upper', 'No transpose', 'Non-unit', NORMIN, N,
+     $                   AP, WORK, SCALEU, RWORK, INFO )
+         ELSE
+*
+*           Multiply by inv(L).
+*
+            CALL ZLATPS( 'Lower', 'No transpose', 'Non-unit', NORMIN, N,
+     $                   AP, WORK, SCALEL, RWORK, INFO )
+            NORMIN = 'Y'
+*
+*           Multiply by inv(L').
+*
+            CALL ZLATPS( 'Lower', 'Conjugate transpose', 'Non-unit',
+     $                   NORMIN, N, AP, WORK, SCALEU, RWORK, INFO )
+         END IF
+*
+*        Multiply by 1/SCALE if doing so will not cause overflow.
+*
+         SCALE = SCALEL*SCALEU
+         IF( SCALE.NE.ONE ) THEN
+            IX = IZAMAX( N, WORK, 1 )
+            IF( SCALE.LT.CABS1( WORK( IX ) )*SMLNUM .OR. SCALE.EQ.ZERO )
+     $         GO TO 20
+            CALL ZDRSCL( N, SCALE, WORK, 1 )
+         END IF
+         GO TO 10
+      END IF
+*
+*     Compute the estimate of the reciprocal condition number.
+*
+      IF( AINVNM.NE.ZERO )
+     $   RCOND = ( ONE / AINVNM ) / ANORM
+*
+   20 CONTINUE
+      RETURN
+*
+*     End of ZPPCON
+*
+      END
diff --git a/costa/native/external/lapack/zppequ.f b/costa/native/external/lapack/zppequ.f
new file mode 100644
index 000000000..da40eb4c3
--- /dev/null
+++ b/costa/native/external/lapack/zppequ.f
@@ -0,0 +1,170 @@
+      SUBROUTINE ZPPEQU( UPLO, N, AP, S, SCOND, AMAX, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, N
+      DOUBLE PRECISION   AMAX, SCOND
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   S( * )
+      COMPLEX*16         AP( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZPPEQU computes row and column scalings intended to equilibrate a
+*  Hermitian positive definite matrix A in packed storage and reduce
+*  its condition number (with respect to the two-norm).  S contains the
+*  scale factors, S(i)=1/sqrt(A(i,i)), chosen so that the scaled matrix
+*  B with elements B(i,j)=S(i)*A(i,j)*S(j) has ones on the diagonal.
+*  This choice of S puts the condition number of B within a factor N of
+*  the smallest possible condition number over all possible diagonal
+*  scalings.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  AP      (input) COMPLEX*16 array, dimension (N*(N+1)/2)
+*          The upper or lower triangle of the Hermitian matrix A, packed
+*          columnwise in a linear array.  The j-th column of A is stored
+*          in the array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n.
+*
+*  S       (output) DOUBLE PRECISION array, dimension (N)
+*          If INFO = 0, S contains the scale factors for A.
+*
+*  SCOND   (output) DOUBLE PRECISION
+*          If INFO = 0, S contains the ratio of the smallest S(i) to
+*          the largest S(i).  If SCOND >= 0.1 and AMAX is neither too
+*          large nor too small, it is not worth scaling by S.
+*
+*  AMAX    (output) DOUBLE PRECISION
+*          Absolute value of largest matrix element.  If AMAX is very
+*          close to overflow or very close to underflow, the matrix
+*          should be scaled.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, the i-th diagonal element is nonpositive.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            I, JJ
+      DOUBLE PRECISION   SMIN
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          DBLE, MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZPPEQU', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 ) THEN
+         SCOND = ONE
+         AMAX = ZERO
+         RETURN
+      END IF
+*
+*     Initialize SMIN and AMAX.
+*
+      S( 1 ) = DBLE( AP( 1 ) )
+      SMIN = S( 1 )
+      AMAX = S( 1 )
+*
+      IF( UPPER ) THEN
+*
+*        UPLO = 'U':  Upper triangle of A is stored.
+*        Find the minimum and maximum diagonal elements.
+*
+         JJ = 1
+         DO 10 I = 2, N
+            JJ = JJ + I
+            S( I ) = DBLE( AP( JJ ) )
+            SMIN = MIN( SMIN, S( I ) )
+            AMAX = MAX( AMAX, S( I ) )
+   10    CONTINUE
+*
+      ELSE
+*
+*        UPLO = 'L':  Lower triangle of A is stored.
+*        Find the minimum and maximum diagonal elements.
+*
+         JJ = 1
+         DO 20 I = 2, N
+            JJ = JJ + N - I + 2
+            S( I ) = DBLE( AP( JJ ) )
+            SMIN = MIN( SMIN, S( I ) )
+            AMAX = MAX( AMAX, S( I ) )
+   20    CONTINUE
+      END IF
+*
+      IF( SMIN.LE.ZERO ) THEN
+*
+*        Find the first non-positive diagonal element and return.
+*
+         DO 30 I = 1, N
+            IF( S( I ).LE.ZERO ) THEN
+               INFO = I
+               RETURN
+            END IF
+   30    CONTINUE
+      ELSE
+*
+*        Set the scale factors to the reciprocals
+*        of the diagonal elements.
+*
+         DO 40 I = 1, N
+            S( I ) = ONE / SQRT( S( I ) )
+   40    CONTINUE
+*
+*        Compute SCOND = min(S(I)) / max(S(I))
+*
+         SCOND = SQRT( SMIN ) / SQRT( AMAX )
+      END IF
+      RETURN
+*
+*     End of ZPPEQU
+*
+      END
diff --git a/costa/native/external/lapack/zpprfs.f b/costa/native/external/lapack/zpprfs.f
new file mode 100644
index 000000000..e455e8920
--- /dev/null
+++ b/costa/native/external/lapack/zpprfs.f
@@ -0,0 +1,331 @@
+      SUBROUTINE ZPPRFS( UPLO, N, NRHS, AP, AFP, B, LDB, X, LDX, FERR,
+     $                   BERR, WORK, RWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDB, LDX, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   BERR( * ), FERR( * ), RWORK( * )
+      COMPLEX*16         AFP( * ), AP( * ), B( LDB, * ), WORK( * ),
+     $                   X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZPPRFS improves the computed solution to a system of linear
+*  equations when the coefficient matrix is Hermitian positive definite
+*  and packed, and provides error bounds and backward error estimates
+*  for the solution.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrices B and X.  NRHS >= 0.
+*
+*  AP      (input) COMPLEX*16 array, dimension (N*(N+1)/2)
+*          The upper or lower triangle of the Hermitian matrix A, packed
+*          columnwise in a linear array.  The j-th column of A is stored
+*          in the array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n.
+*
+*  AFP     (input) COMPLEX*16 array, dimension (N*(N+1)/2)
+*          The triangular factor U or L from the Cholesky factorization
+*          A = U**H*U or A = L*L**H, as computed by DPPTRF/ZPPTRF,
+*          packed columnwise in a linear array in the same format as A
+*          (see AP).
+*
+*  B       (input) COMPLEX*16 array, dimension (LDB,NRHS)
+*          The right hand side matrix B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  X       (input/output) COMPLEX*16 array, dimension (LDX,NRHS)
+*          On entry, the solution matrix X, as computed by ZPPTRS.
+*          On exit, the improved solution matrix X.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(1,N).
+*
+*  FERR    (output) DOUBLE PRECISION array, dimension (NRHS)
+*          The estimated forward error bound for each solution vector
+*          X(j) (the j-th column of the solution matrix X).
+*          If XTRUE is the true solution corresponding to X(j), FERR(j)
+*          is an estimated upper bound for the magnitude of the largest
+*          element in (X(j) - XTRUE) divided by the magnitude of the
+*          largest element in X(j).  The estimate is as reliable as
+*          the estimate for RCOND, and is almost always a slight
+*          overestimate of the true error.
+*
+*  BERR    (output) DOUBLE PRECISION array, dimension (NRHS)
+*          The componentwise relative backward error of each solution
+*          vector X(j) (i.e., the smallest relative change in
+*          any element of A or B that makes X(j) an exact solution).
+*
+*  WORK    (workspace) COMPLEX*16 array, dimension (2*N)
+*
+*  RWORK   (workspace) DOUBLE PRECISION array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Internal Parameters
+*  ===================
+*
+*  ITMAX is the maximum number of steps of iterative refinement.
+*
+*  ====================================================================
+*
+*     .. Parameters ..
+      INTEGER            ITMAX
+      PARAMETER          ( ITMAX = 5 )
+      DOUBLE PRECISION   ZERO
+      PARAMETER          ( ZERO = 0.0D+0 )
+      COMPLEX*16         CONE
+      PARAMETER          ( CONE = ( 1.0D+0, 0.0D+0 ) )
+      DOUBLE PRECISION   TWO
+      PARAMETER          ( TWO = 2.0D+0 )
+      DOUBLE PRECISION   THREE
+      PARAMETER          ( THREE = 3.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            COUNT, I, IK, J, K, KASE, KK, NZ
+      DOUBLE PRECISION   EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK
+      COMPLEX*16         ZDUM
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZAXPY, ZCOPY, ZHPMV, ZLACON, ZPPTRS
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, DIMAG, MAX
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DLAMCH
+      EXTERNAL           LSAME, DLAMCH
+*     ..
+*     .. Statement Functions ..
+      DOUBLE PRECISION   CABS1
+*     ..
+*     .. Statement Function definitions ..
+      CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+         INFO = -9
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZPPRFS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN
+         DO 10 J = 1, NRHS
+            FERR( J ) = ZERO
+            BERR( J ) = ZERO
+   10    CONTINUE
+         RETURN
+      END IF
+*
+*     NZ = maximum number of nonzero elements in each row of A, plus 1
+*
+      NZ = N + 1
+      EPS = DLAMCH( 'Epsilon' )
+      SAFMIN = DLAMCH( 'Safe minimum' )
+      SAFE1 = NZ*SAFMIN
+      SAFE2 = SAFE1 / EPS
+*
+*     Do for each right hand side
+*
+      DO 140 J = 1, NRHS
+*
+         COUNT = 1
+         LSTRES = THREE
+   20    CONTINUE
+*
+*        Loop until stopping criterion is satisfied.
+*
+*        Compute residual R = B - A * X
+*
+         CALL ZCOPY( N, B( 1, J ), 1, WORK, 1 )
+         CALL ZHPMV( UPLO, N, -CONE, AP, X( 1, J ), 1, CONE, WORK, 1 )
+*
+*        Compute componentwise relative backward error from formula
+*
+*        max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) )
+*
+*        where abs(Z) is the componentwise absolute value of the matrix
+*        or vector Z.  If the i-th component of the denominator is less
+*        than SAFE2, then SAFE1 is added to the i-th components of the
+*        numerator and denominator before dividing.
+*
+         DO 30 I = 1, N
+            RWORK( I ) = CABS1( B( I, J ) )
+   30    CONTINUE
+*
+*        Compute abs(A)*abs(X) + abs(B).
+*
+         KK = 1
+         IF( UPPER ) THEN
+            DO 50 K = 1, N
+               S = ZERO
+               XK = CABS1( X( K, J ) )
+               IK = KK
+               DO 40 I = 1, K - 1
+                  RWORK( I ) = RWORK( I ) + CABS1( AP( IK ) )*XK
+                  S = S + CABS1( AP( IK ) )*CABS1( X( I, J ) )
+                  IK = IK + 1
+   40          CONTINUE
+               RWORK( K ) = RWORK( K ) + ABS( DBLE( AP( KK+K-1 ) ) )*
+     $                      XK + S
+               KK = KK + K
+   50       CONTINUE
+         ELSE
+            DO 70 K = 1, N
+               S = ZERO
+               XK = CABS1( X( K, J ) )
+               RWORK( K ) = RWORK( K ) + ABS( DBLE( AP( KK ) ) )*XK
+               IK = KK + 1
+               DO 60 I = K + 1, N
+                  RWORK( I ) = RWORK( I ) + CABS1( AP( IK ) )*XK
+                  S = S + CABS1( AP( IK ) )*CABS1( X( I, J ) )
+                  IK = IK + 1
+   60          CONTINUE
+               RWORK( K ) = RWORK( K ) + S
+               KK = KK + ( N-K+1 )
+   70       CONTINUE
+         END IF
+         S = ZERO
+         DO 80 I = 1, N
+            IF( RWORK( I ).GT.SAFE2 ) THEN
+               S = MAX( S, CABS1( WORK( I ) ) / RWORK( I ) )
+            ELSE
+               S = MAX( S, ( CABS1( WORK( I ) )+SAFE1 ) /
+     $             ( RWORK( I )+SAFE1 ) )
+            END IF
+   80    CONTINUE
+         BERR( J ) = S
+*
+*        Test stopping criterion. Continue iterating if
+*           1) The residual BERR(J) is larger than machine epsilon, and
+*           2) BERR(J) decreased by at least a factor of 2 during the
+*              last iteration, and
+*           3) At most ITMAX iterations tried.
+*
+         IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND.
+     $       COUNT.LE.ITMAX ) THEN
+*
+*           Update solution and try again.
+*
+            CALL ZPPTRS( UPLO, N, 1, AFP, WORK, N, INFO )
+            CALL ZAXPY( N, CONE, WORK, 1, X( 1, J ), 1 )
+            LSTRES = BERR( J )
+            COUNT = COUNT + 1
+            GO TO 20
+         END IF
+*
+*        Bound error from formula
+*
+*        norm(X - XTRUE) / norm(X) .le. FERR =
+*        norm( abs(inv(A))*
+*           ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X)
+*
+*        where
+*          norm(Z) is the magnitude of the largest component of Z
+*          inv(A) is the inverse of A
+*          abs(Z) is the componentwise absolute value of the matrix or
+*             vector Z
+*          NZ is the maximum number of nonzeros in any row of A, plus 1
+*          EPS is machine epsilon
+*
+*        The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B))
+*        is incremented by SAFE1 if the i-th component of
+*        abs(A)*abs(X) + abs(B) is less than SAFE2.
+*
+*        Use ZLACON to estimate the infinity-norm of the matrix
+*           inv(A) * diag(W),
+*        where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) )))
+*
+         DO 90 I = 1, N
+            IF( RWORK( I ).GT.SAFE2 ) THEN
+               RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I )
+            ELSE
+               RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) +
+     $                      SAFE1
+            END IF
+   90    CONTINUE
+*
+         KASE = 0
+  100    CONTINUE
+         CALL ZLACON( N, WORK( N+1 ), WORK, FERR( J ), KASE )
+         IF( KASE.NE.0 ) THEN
+            IF( KASE.EQ.1 ) THEN
+*
+*              Multiply by diag(W)*inv(A').
+*
+               CALL ZPPTRS( UPLO, N, 1, AFP, WORK, N, INFO )
+               DO 110 I = 1, N
+                  WORK( I ) = RWORK( I )*WORK( I )
+  110          CONTINUE
+            ELSE IF( KASE.EQ.2 ) THEN
+*
+*              Multiply by inv(A)*diag(W).
+*
+               DO 120 I = 1, N
+                  WORK( I ) = RWORK( I )*WORK( I )
+  120          CONTINUE
+               CALL ZPPTRS( UPLO, N, 1, AFP, WORK, N, INFO )
+            END IF
+            GO TO 100
+         END IF
+*
+*        Normalize error.
+*
+         LSTRES = ZERO
+         DO 130 I = 1, N
+            LSTRES = MAX( LSTRES, CABS1( X( I, J ) ) )
+  130    CONTINUE
+         IF( LSTRES.NE.ZERO )
+     $      FERR( J ) = FERR( J ) / LSTRES
+*
+  140 CONTINUE
+*
+      RETURN
+*
+*     End of ZPPRFS
+*
+      END
diff --git a/costa/native/external/lapack/zppsv.f b/costa/native/external/lapack/zppsv.f
new file mode 100644
index 000000000..707f93257
--- /dev/null
+++ b/costa/native/external/lapack/zppsv.f
@@ -0,0 +1,134 @@
+      SUBROUTINE ZPPSV( UPLO, N, NRHS, AP, B, LDB, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDB, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         AP( * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZPPSV computes the solution to a complex system of linear equations
+*     A * X = B,
+*  where A is an N-by-N Hermitian positive definite matrix stored in
+*  packed format and X and B are N-by-NRHS matrices.
+*
+*  The Cholesky decomposition is used to factor A as
+*     A = U**H* U,  if UPLO = 'U', or
+*     A = L * L**H,  if UPLO = 'L',
+*  where U is an upper triangular matrix and L is a lower triangular
+*  matrix.  The factored form of A is then used to solve the system of
+*  equations A * X = B.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The number of linear equations, i.e., the order of the
+*          matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  AP      (input/output) COMPLEX*16 array, dimension (N*(N+1)/2)
+*          On entry, the upper or lower triangle of the Hermitian matrix
+*          A, packed columnwise in a linear array.  The j-th column of A
+*          is stored in the array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n.
+*          See below for further details.
+*
+*          On exit, if INFO = 0, the factor U or L from the Cholesky
+*          factorization A = U**H*U or A = L*L**H, in the same storage
+*          format as A.
+*
+*  B       (input/output) COMPLEX*16 array, dimension (LDB,NRHS)
+*          On entry, the N-by-NRHS right hand side matrix B.
+*          On exit, if INFO = 0, the N-by-NRHS solution matrix X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, the leading minor of order i of A is not
+*                positive definite, so the factorization could not be
+*                completed, and the solution has not been computed.
+*
+*  Further Details
+*  ===============
+*
+*  The packed storage scheme is illustrated by the following example
+*  when N = 4, UPLO = 'U':
+*
+*  Two-dimensional storage of the Hermitian matrix A:
+*
+*     a11 a12 a13 a14
+*         a22 a23 a24
+*             a33 a34     (aij = conjg(aji))
+*                 a44
+*
+*  Packed storage of the upper triangle of A:
+*
+*  AP = [ a11, a12, a22, a13, a23, a33, a14, a24, a34, a44 ]
+*
+*  =====================================================================
+*
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZPPTRF, ZPPTRS
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -6
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZPPSV ', -INFO )
+         RETURN
+      END IF
+*
+*     Compute the Cholesky factorization A = U'*U or A = L*L'.
+*
+      CALL ZPPTRF( UPLO, N, AP, INFO )
+      IF( INFO.EQ.0 ) THEN
+*
+*        Solve the system A*X = B, overwriting B with X.
+*
+         CALL ZPPTRS( UPLO, N, NRHS, AP, B, LDB, INFO )
+*
+      END IF
+      RETURN
+*
+*     End of ZPPSV
+*
+      END
diff --git a/costa/native/external/lapack/zppsvx.f b/costa/native/external/lapack/zppsvx.f
new file mode 100644
index 000000000..b2307fbea
--- /dev/null
+++ b/costa/native/external/lapack/zppsvx.f
@@ -0,0 +1,383 @@
+      SUBROUTINE ZPPSVX( FACT, UPLO, N, NRHS, AP, AFP, EQUED, S, B, LDB,
+     $                   X, LDX, RCOND, FERR, BERR, WORK, RWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          EQUED, FACT, UPLO
+      INTEGER            INFO, LDB, LDX, N, NRHS
+      DOUBLE PRECISION   RCOND
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   BERR( * ), FERR( * ), RWORK( * ), S( * )
+      COMPLEX*16         AFP( * ), AP( * ), B( LDB, * ), WORK( * ),
+     $                   X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZPPSVX uses the Cholesky factorization A = U**H*U or A = L*L**H to
+*  compute the solution to a complex system of linear equations
+*     A * X = B,
+*  where A is an N-by-N Hermitian positive definite matrix stored in
+*  packed format and X and B are N-by-NRHS matrices.
+*
+*  Error bounds on the solution and a condition estimate are also
+*  provided.
+*
+*  Description
+*  ===========
+*
+*  The following steps are performed:
+*
+*  1. If FACT = 'E', real scaling factors are computed to equilibrate
+*     the system:
+*        diag(S) * A * diag(S) * inv(diag(S)) * X = diag(S) * B
+*     Whether or not the system will be equilibrated depends on the
+*     scaling of the matrix A, but if equilibration is used, A is
+*     overwritten by diag(S)*A*diag(S) and B by diag(S)*B.
+*
+*  2. If FACT = 'N' or 'E', the Cholesky decomposition is used to
+*     factor the matrix A (after equilibration if FACT = 'E') as
+*        A = U'* U ,  if UPLO = 'U', or
+*        A = L * L',  if UPLO = 'L',
+*     where U is an upper triangular matrix, L is a lower triangular
+*     matrix, and ' indicates conjugate transpose.
+*
+*  3. If the leading i-by-i principal minor is not positive definite,
+*     then the routine returns with INFO = i. Otherwise, the factored
+*     form of A is used to estimate the condition number of the matrix
+*     A.  If the reciprocal of the condition number is less than machine
+*     precision, INFO = N+1 is returned as a warning, but the routine
+*     still goes on to solve for X and compute error bounds as
+*     described below.
+*
+*  4. The system of equations is solved for X using the factored form
+*     of A.
+*
+*  5. Iterative refinement is applied to improve the computed solution
+*     matrix and calculate error bounds and backward error estimates
+*     for it.
+*
+*  6. If equilibration was used, the matrix X is premultiplied by
+*     diag(S) so that it solves the original system before
+*     equilibration.
+*
+*  Arguments
+*  =========
+*
+*  FACT    (input) CHARACTER*1
+*          Specifies whether or not the factored form of the matrix A is
+*          supplied on entry, and if not, whether the matrix A should be
+*          equilibrated before it is factored.
+*          = 'F':  On entry, AFP contains the factored form of A.
+*                  If EQUED = 'Y', the matrix A has been equilibrated
+*                  with scaling factors given by S.  AP and AFP will not
+*                  be modified.
+*          = 'N':  The matrix A will be copied to AFP and factored.
+*          = 'E':  The matrix A will be equilibrated if necessary, then
+*                  copied to AFP and factored.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The number of linear equations, i.e., the order of the
+*          matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrices B and X.  NRHS >= 0.
+*
+*  AP      (input/output) COMPLEX*16 array, dimension (N*(N+1)/2)
+*          On entry, the upper or lower triangle of the Hermitian matrix
+*          A, packed columnwise in a linear array, except if FACT = 'F'
+*          and EQUED = 'Y', then A must contain the equilibrated matrix
+*          diag(S)*A*diag(S).  The j-th column of A is stored in the
+*          array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n.
+*          See below for further details.  A is not modified if
+*          FACT = 'F' or 'N', or if FACT = 'E' and EQUED = 'N' on exit.
+*
+*          On exit, if FACT = 'E' and EQUED = 'Y', A is overwritten by
+*          diag(S)*A*diag(S).
+*
+*  AFP     (input or output) COMPLEX*16 array, dimension (N*(N+1)/2)
+*          If FACT = 'F', then AFP is an input argument and on entry
+*          contains the triangular factor U or L from the Cholesky
+*          factorization A = U**H*U or A = L*L**H, in the same storage
+*          format as A.  If EQUED .ne. 'N', then AFP is the factored
+*          form of the equilibrated matrix A.
+*
+*          If FACT = 'N', then AFP is an output argument and on exit
+*          returns the triangular factor U or L from the Cholesky
+*          factorization A = U**H*U or A = L*L**H of the original
+*          matrix A.
+*
+*          If FACT = 'E', then AFP is an output argument and on exit
+*          returns the triangular factor U or L from the Cholesky
+*          factorization A = U**H*U or A = L*L**H of the equilibrated
+*          matrix A (see the description of AP for the form of the
+*          equilibrated matrix).
+*
+*  EQUED   (input or output) CHARACTER*1
+*          Specifies the form of equilibration that was done.
+*          = 'N':  No equilibration (always true if FACT = 'N').
+*          = 'Y':  Equilibration was done, i.e., A has been replaced by
+*                  diag(S) * A * diag(S).
+*          EQUED is an input argument if FACT = 'F'; otherwise, it is an
+*          output argument.
+*
+*  S       (input or output) DOUBLE PRECISION array, dimension (N)
+*          The scale factors for A; not accessed if EQUED = 'N'.  S is
+*          an input argument if FACT = 'F'; otherwise, S is an output
+*          argument.  If FACT = 'F' and EQUED = 'Y', each element of S
+*          must be positive.
+*
+*  B       (input/output) COMPLEX*16 array, dimension (LDB,NRHS)
+*          On entry, the N-by-NRHS right hand side matrix B.
+*          On exit, if EQUED = 'N', B is not modified; if EQUED = 'Y',
+*          B is overwritten by diag(S) * B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  X       (output) COMPLEX*16 array, dimension (LDX,NRHS)
+*          If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X to
+*          the original system of equations.  Note that if EQUED = 'Y',
+*          A and B are modified on exit, and the solution to the
+*          equilibrated system is inv(diag(S))*X.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(1,N).
+*
+*  RCOND   (output) DOUBLE PRECISION
+*          The estimate of the reciprocal condition number of the matrix
+*          A after equilibration (if done).  If RCOND is less than the
+*          machine precision (in particular, if RCOND = 0), the matrix
+*          is singular to working precision.  This condition is
+*          indicated by a return code of INFO > 0.
+*
+*  FERR    (output) DOUBLE PRECISION array, dimension (NRHS)
+*          The estimated forward error bound for each solution vector
+*          X(j) (the j-th column of the solution matrix X).
+*          If XTRUE is the true solution corresponding to X(j), FERR(j)
+*          is an estimated upper bound for the magnitude of the largest
+*          element in (X(j) - XTRUE) divided by the magnitude of the
+*          largest element in X(j).  The estimate is as reliable as
+*          the estimate for RCOND, and is almost always a slight
+*          overestimate of the true error.
+*
+*  BERR    (output) DOUBLE PRECISION array, dimension (NRHS)
+*          The componentwise relative backward error of each solution
+*          vector X(j) (i.e., the smallest relative change in
+*          any element of A or B that makes X(j) an exact solution).
+*
+*  WORK    (workspace) COMPLEX*16 array, dimension (2*N)
+*
+*  RWORK   (workspace) DOUBLE PRECISION array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, and i is
+*                <= N:  the leading minor of order i of A is
+*                       not positive definite, so the factorization
+*                       could not be completed, and the solution has not
+*                       been computed. RCOND = 0 is returned.
+*                = N+1: U is nonsingular, but RCOND is less than machine
+*                       precision, meaning that the matrix is singular
+*                       to working precision.  Nevertheless, the
+*                       solution and error bounds are computed because
+*                       there are a number of situations where the
+*                       computed solution can be more accurate than the
+*                       value of RCOND would suggest.
+*
+*  Further Details
+*  ===============
+*
+*  The packed storage scheme is illustrated by the following example
+*  when N = 4, UPLO = 'U':
+*
+*  Two-dimensional storage of the Hermitian matrix A:
+*
+*     a11 a12 a13 a14
+*         a22 a23 a24
+*             a33 a34     (aij = conjg(aji))
+*                 a44
+*
+*  Packed storage of the upper triangle of A:
+*
+*  AP = [ a11, a12, a22, a13, a23, a33, a14, a24, a34, a44 ]
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            EQUIL, NOFACT, RCEQU
+      INTEGER            I, INFEQU, J
+      DOUBLE PRECISION   AMAX, ANORM, BIGNUM, SCOND, SMAX, SMIN, SMLNUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DLAMCH, ZLANHP
+      EXTERNAL           LSAME, DLAMCH, ZLANHP
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZCOPY, ZLACPY, ZLAQHP, ZPPCON, ZPPEQU,
+     $                   ZPPRFS, ZPPTRF, ZPPTRS
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+      NOFACT = LSAME( FACT, 'N' )
+      EQUIL = LSAME( FACT, 'E' )
+      IF( NOFACT .OR. EQUIL ) THEN
+         EQUED = 'N'
+         RCEQU = .FALSE.
+      ELSE
+         RCEQU = LSAME( EQUED, 'Y' )
+         SMLNUM = DLAMCH( 'Safe minimum' )
+         BIGNUM = ONE / SMLNUM
+      END IF
+*
+*     Test the input parameters.
+*
+      IF( .NOT.NOFACT .AND. .NOT.EQUIL .AND. .NOT.LSAME( FACT, 'F' ) )
+     $     THEN
+         INFO = -1
+      ELSE IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) )
+     $          THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LSAME( FACT, 'F' ) .AND. .NOT.
+     $         ( RCEQU .OR. LSAME( EQUED, 'N' ) ) ) THEN
+         INFO = -7
+      ELSE
+         IF( RCEQU ) THEN
+            SMIN = BIGNUM
+            SMAX = ZERO
+            DO 10 J = 1, N
+               SMIN = MIN( SMIN, S( J ) )
+               SMAX = MAX( SMAX, S( J ) )
+   10       CONTINUE
+            IF( SMIN.LE.ZERO ) THEN
+               INFO = -8
+            ELSE IF( N.GT.0 ) THEN
+               SCOND = MAX( SMIN, SMLNUM ) / MIN( SMAX, BIGNUM )
+            ELSE
+               SCOND = ONE
+            END IF
+         END IF
+         IF( INFO.EQ.0 ) THEN
+            IF( LDB.LT.MAX( 1, N ) ) THEN
+               INFO = -10
+            ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+               INFO = -12
+            END IF
+         END IF
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZPPSVX', -INFO )
+         RETURN
+      END IF
+*
+      IF( EQUIL ) THEN
+*
+*        Compute row and column scalings to equilibrate the matrix A.
+*
+         CALL ZPPEQU( UPLO, N, AP, S, SCOND, AMAX, INFEQU )
+         IF( INFEQU.EQ.0 ) THEN
+*
+*           Equilibrate the matrix.
+*
+            CALL ZLAQHP( UPLO, N, AP, S, SCOND, AMAX, EQUED )
+            RCEQU = LSAME( EQUED, 'Y' )
+         END IF
+      END IF
+*
+*     Scale the right-hand side.
+*
+      IF( RCEQU ) THEN
+         DO 30 J = 1, NRHS
+            DO 20 I = 1, N
+               B( I, J ) = S( I )*B( I, J )
+   20       CONTINUE
+   30    CONTINUE
+      END IF
+*
+      IF( NOFACT .OR. EQUIL ) THEN
+*
+*        Compute the Cholesky factorization A = U'*U or A = L*L'.
+*
+         CALL ZCOPY( N*( N+1 ) / 2, AP, 1, AFP, 1 )
+         CALL ZPPTRF( UPLO, N, AFP, INFO )
+*
+*        Return if INFO is non-zero.
+*
+         IF( INFO.NE.0 ) THEN
+            IF( INFO.GT.0 )
+     $         RCOND = ZERO
+            RETURN
+         END IF
+      END IF
+*
+*     Compute the norm of the matrix A.
+*
+      ANORM = ZLANHP( 'I', UPLO, N, AP, RWORK )
+*
+*     Compute the reciprocal of the condition number of A.
+*
+      CALL ZPPCON( UPLO, N, AFP, ANORM, RCOND, WORK, RWORK, INFO )
+*
+*     Set INFO = N+1 if the matrix is singular to working precision.
+*
+      IF( RCOND.LT.DLAMCH( 'Epsilon' ) )
+     $   INFO = N + 1
+*
+*     Compute the solution matrix X.
+*
+      CALL ZLACPY( 'Full', N, NRHS, B, LDB, X, LDX )
+      CALL ZPPTRS( UPLO, N, NRHS, AFP, X, LDX, INFO )
+*
+*     Use iterative refinement to improve the computed solution and
+*     compute error bounds and backward error estimates for it.
+*
+      CALL ZPPRFS( UPLO, N, NRHS, AP, AFP, B, LDB, X, LDX, FERR, BERR,
+     $             WORK, RWORK, INFO )
+*
+*     Transform the solution matrix X to a solution of the original
+*     system.
+*
+      IF( RCEQU ) THEN
+         DO 50 J = 1, NRHS
+            DO 40 I = 1, N
+               X( I, J ) = S( I )*X( I, J )
+   40       CONTINUE
+   50    CONTINUE
+         DO 60 J = 1, NRHS
+            FERR( J ) = FERR( J ) / SCOND
+   60    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of ZPPSVX
+*
+      END
diff --git a/costa/native/external/lapack/zpptrf.f b/costa/native/external/lapack/zpptrf.f
new file mode 100644
index 000000000..8f7bb0c42
--- /dev/null
+++ b/costa/native/external/lapack/zpptrf.f
@@ -0,0 +1,179 @@
+      SUBROUTINE ZPPTRF( UPLO, N, AP, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         AP( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZPPTRF computes the Cholesky factorization of a complex Hermitian
+*  positive definite matrix A stored in packed format.
+*
+*  The factorization has the form
+*     A = U**H * U,  if UPLO = 'U', or
+*     A = L  * L**H,  if UPLO = 'L',
+*  where U is an upper triangular matrix and L is lower triangular.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  AP      (input/output) COMPLEX*16 array, dimension (N*(N+1)/2)
+*          On entry, the upper or lower triangle of the Hermitian matrix
+*          A, packed columnwise in a linear array.  The j-th column of A
+*          is stored in the array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n.
+*          See below for further details.
+*
+*          On exit, if INFO = 0, the triangular factor U or L from the
+*          Cholesky factorization A = U**H*U or A = L*L**H, in the same
+*          storage format as A.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, the leading minor of order i is not
+*                positive definite, and the factorization could not be
+*                completed.
+*
+*  Further Details
+*  ===============
+*
+*  The packed storage scheme is illustrated by the following example
+*  when N = 4, UPLO = 'U':
+*
+*  Two-dimensional storage of the Hermitian matrix A:
+*
+*     a11 a12 a13 a14
+*         a22 a23 a24
+*             a33 a34     (aij = conjg(aji))
+*                 a44
+*
+*  Packed storage of the upper triangle of A:
+*
+*  AP = [ a11, a12, a22, a13, a23, a33, a14, a24, a34, a44 ]
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            J, JC, JJ
+      DOUBLE PRECISION   AJJ
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      COMPLEX*16         ZDOTC
+      EXTERNAL           LSAME, ZDOTC
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZDSCAL, ZHPR, ZTPSV
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          DBLE, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZPPTRF', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      IF( UPPER ) THEN
+*
+*        Compute the Cholesky factorization A = U'*U.
+*
+         JJ = 0
+         DO 10 J = 1, N
+            JC = JJ + 1
+            JJ = JJ + J
+*
+*           Compute elements 1:J-1 of column J.
+*
+            IF( J.GT.1 )
+     $         CALL ZTPSV( 'Upper', 'Conjugate transpose', 'Non-unit',
+     $                     J-1, AP, AP( JC ), 1 )
+*
+*           Compute U(J,J) and test for non-positive-definiteness.
+*
+            AJJ = DBLE( AP( JJ ) ) - ZDOTC( J-1, AP( JC ), 1, AP( JC ),
+     $            1 )
+            IF( AJJ.LE.ZERO ) THEN
+               AP( JJ ) = AJJ
+               GO TO 30
+            END IF
+            AP( JJ ) = SQRT( AJJ )
+   10    CONTINUE
+      ELSE
+*
+*        Compute the Cholesky factorization A = L*L'.
+*
+         JJ = 1
+         DO 20 J = 1, N
+*
+*           Compute L(J,J) and test for non-positive-definiteness.
+*
+            AJJ = DBLE( AP( JJ ) )
+            IF( AJJ.LE.ZERO ) THEN
+               AP( JJ ) = AJJ
+               GO TO 30
+            END IF
+            AJJ = SQRT( AJJ )
+            AP( JJ ) = AJJ
+*
+*           Compute elements J+1:N of column J and update the trailing
+*           submatrix.
+*
+            IF( J.LT.N ) THEN
+               CALL ZDSCAL( N-J, ONE / AJJ, AP( JJ+1 ), 1 )
+               CALL ZHPR( 'Lower', N-J, -ONE, AP( JJ+1 ), 1,
+     $                    AP( JJ+N-J+1 ) )
+               JJ = JJ + N - J + 1
+            END IF
+   20    CONTINUE
+      END IF
+      GO TO 40
+*
+   30 CONTINUE
+      INFO = J
+*
+   40 CONTINUE
+      RETURN
+*
+*     End of ZPPTRF
+*
+      END
diff --git a/costa/native/external/lapack/zpptri.f b/costa/native/external/lapack/zpptri.f
new file mode 100644
index 000000000..4e82f2b9e
--- /dev/null
+++ b/costa/native/external/lapack/zpptri.f
@@ -0,0 +1,131 @@
+      SUBROUTINE ZPPTRI( UPLO, N, AP, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         AP( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZPPTRI computes the inverse of a complex Hermitian positive definite
+*  matrix A using the Cholesky factorization A = U**H*U or A = L*L**H
+*  computed by ZPPTRF.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangular factor is stored in AP;
+*          = 'L':  Lower triangular factor is stored in AP.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  AP      (input/output) COMPLEX*16 array, dimension (N*(N+1)/2)
+*          On entry, the triangular factor U or L from the Cholesky
+*          factorization A = U**H*U or A = L*L**H, packed columnwise as
+*          a linear array.  The j-th column of U or L is stored in the
+*          array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = U(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = L(i,j) for j<=i<=n.
+*
+*          On exit, the upper or lower triangle of the (Hermitian)
+*          inverse of A, overwriting the input factor U or L.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, the (i,i) element of the factor U or L is
+*                zero, and the inverse could not be computed.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE
+      PARAMETER          ( ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            J, JC, JJ, JJN
+      DOUBLE PRECISION   AJJ
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      COMPLEX*16         ZDOTC
+      EXTERNAL           LSAME, ZDOTC
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZDSCAL, ZHPR, ZTPMV, ZTPTRI
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          DBLE
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZPPTRI', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Invert the triangular Cholesky factor U or L.
+*
+      CALL ZTPTRI( UPLO, 'Non-unit', N, AP, INFO )
+      IF( INFO.GT.0 )
+     $   RETURN
+      IF( UPPER ) THEN
+*
+*        Compute the product inv(U) * inv(U)'.
+*
+         JJ = 0
+         DO 10 J = 1, N
+            JC = JJ + 1
+            JJ = JJ + J
+            IF( J.GT.1 )
+     $         CALL ZHPR( 'Upper', J-1, ONE, AP( JC ), 1, AP )
+            AJJ = AP( JJ )
+            CALL ZDSCAL( J, AJJ, AP( JC ), 1 )
+   10    CONTINUE
+*
+      ELSE
+*
+*        Compute the product inv(L)' * inv(L).
+*
+         JJ = 1
+         DO 20 J = 1, N
+            JJN = JJ + N - J + 1
+            AP( JJ ) = DBLE( ZDOTC( N-J+1, AP( JJ ), 1, AP( JJ ), 1 ) )
+            IF( J.LT.N )
+     $         CALL ZTPMV( 'Lower', 'Conjugate transpose', 'Non-unit',
+     $                     N-J, AP( JJN ), AP( JJ+1 ), 1 )
+            JJ = JJN
+   20    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of ZPPTRI
+*
+      END
diff --git a/costa/native/external/lapack/zpptrs.f b/costa/native/external/lapack/zpptrs.f
new file mode 100644
index 000000000..6aa056708
--- /dev/null
+++ b/costa/native/external/lapack/zpptrs.f
@@ -0,0 +1,135 @@
+      SUBROUTINE ZPPTRS( UPLO, N, NRHS, AP, B, LDB, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDB, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         AP( * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZPPTRS solves a system of linear equations A*X = B with a Hermitian
+*  positive definite matrix A in packed storage using the Cholesky
+*  factorization A = U**H*U or A = L*L**H computed by ZPPTRF.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  AP      (input) COMPLEX*16 array, dimension (N*(N+1)/2)
+*          The triangular factor U or L from the Cholesky factorization
+*          A = U**H*U or A = L*L**H, packed columnwise in a linear
+*          array.  The j-th column of U or L is stored in the array AP
+*          as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = U(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = L(i,j) for j<=i<=n.
+*
+*  B       (input/output) COMPLEX*16 array, dimension (LDB,NRHS)
+*          On entry, the right hand side matrix B.
+*          On exit, the solution matrix X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            I
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZTPSV
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -6
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZPPTRS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 )
+     $   RETURN
+*
+      IF( UPPER ) THEN
+*
+*        Solve A*X = B where A = U'*U.
+*
+         DO 10 I = 1, NRHS
+*
+*           Solve U'*X = B, overwriting B with X.
+*
+            CALL ZTPSV( 'Upper', 'Conjugate transpose', 'Non-unit', N,
+     $                  AP, B( 1, I ), 1 )
+*
+*           Solve U*X = B, overwriting B with X.
+*
+            CALL ZTPSV( 'Upper', 'No transpose', 'Non-unit', N, AP,
+     $                  B( 1, I ), 1 )
+   10    CONTINUE
+      ELSE
+*
+*        Solve A*X = B where A = L*L'.
+*
+         DO 20 I = 1, NRHS
+*
+*           Solve L*Y = B, overwriting B with X.
+*
+            CALL ZTPSV( 'Lower', 'No transpose', 'Non-unit', N, AP,
+     $                  B( 1, I ), 1 )
+*
+*           Solve L'*X = Y, overwriting B with X.
+*
+            CALL ZTPSV( 'Lower', 'Conjugate transpose', 'Non-unit', N,
+     $                  AP, B( 1, I ), 1 )
+   20    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of ZPPTRS
+*
+      END
diff --git a/costa/native/external/lapack/zptcon.f b/costa/native/external/lapack/zptcon.f
new file mode 100644
index 000000000..8905e07f8
--- /dev/null
+++ b/costa/native/external/lapack/zptcon.f
@@ -0,0 +1,151 @@
+      SUBROUTINE ZPTCON( N, D, E, ANORM, RCOND, RWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, N
+      DOUBLE PRECISION   ANORM, RCOND
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   D( * ), RWORK( * )
+      COMPLEX*16         E( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZPTCON computes the reciprocal of the condition number (in the
+*  1-norm) of a complex Hermitian positive definite tridiagonal matrix
+*  using the factorization A = L*D*L**H or A = U**H*D*U computed by
+*  ZPTTRF.
+*
+*  Norm(inv(A)) is computed by a direct method, and the reciprocal of
+*  the condition number is computed as
+*                   RCOND = 1 / (ANORM * norm(inv(A))).
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  D       (input) DOUBLE PRECISION array, dimension (N)
+*          The n diagonal elements of the diagonal matrix D from the
+*          factorization of A, as computed by ZPTTRF.
+*
+*  E       (input) COMPLEX*16 array, dimension (N-1)
+*          The (n-1) off-diagonal elements of the unit bidiagonal factor
+*          U or L from the factorization of A, as computed by ZPTTRF.
+*
+*  ANORM   (input) DOUBLE PRECISION
+*          The 1-norm of the original matrix A.
+*
+*  RCOND   (output) DOUBLE PRECISION
+*          The reciprocal of the condition number of the matrix A,
+*          computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is the
+*          1-norm of inv(A) computed in this routine.
+*
+*  RWORK   (workspace) DOUBLE PRECISION array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Further Details
+*  ===============
+*
+*  The method used is described in Nicholas J. Higham, "Efficient
+*  Algorithms for Computing the Condition Number of a Tridiagonal
+*  Matrix", SIAM J. Sci. Stat. Comput., Vol. 7, No. 1, January 1986.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, IX
+      DOUBLE PRECISION   AINVNM
+*     ..
+*     .. External Functions ..
+      INTEGER            IDAMAX
+      EXTERNAL           IDAMAX
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments.
+*
+      INFO = 0
+      IF( N.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( ANORM.LT.ZERO ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZPTCON', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      RCOND = ZERO
+      IF( N.EQ.0 ) THEN
+         RCOND = ONE
+         RETURN
+      ELSE IF( ANORM.EQ.ZERO ) THEN
+         RETURN
+      END IF
+*
+*     Check that D(1:N) is positive.
+*
+      DO 10 I = 1, N
+         IF( D( I ).LE.ZERO )
+     $      RETURN
+   10 CONTINUE
+*
+*     Solve M(A) * x = e, where M(A) = (m(i,j)) is given by
+*
+*        m(i,j) =  abs(A(i,j)), i = j,
+*        m(i,j) = -abs(A(i,j)), i .ne. j,
+*
+*     and e = [ 1, 1, ..., 1 ]'.  Note M(A) = M(L)*D*M(L)'.
+*
+*     Solve M(L) * x = e.
+*
+      RWORK( 1 ) = ONE
+      DO 20 I = 2, N
+         RWORK( I ) = ONE + RWORK( I-1 )*ABS( E( I-1 ) )
+   20 CONTINUE
+*
+*     Solve D * M(L)' * x = b.
+*
+      RWORK( N ) = RWORK( N ) / D( N )
+      DO 30 I = N - 1, 1, -1
+         RWORK( I ) = RWORK( I ) / D( I ) + RWORK( I+1 )*ABS( E( I ) )
+   30 CONTINUE
+*
+*     Compute AINVNM = max(x(i)), 1<=i<=n.
+*
+      IX = IDAMAX( N, RWORK, 1 )
+      AINVNM = ABS( RWORK( IX ) )
+*
+*     Compute the reciprocal condition number.
+*
+      IF( AINVNM.NE.ZERO )
+     $   RCOND = ( ONE / AINVNM ) / ANORM
+*
+      RETURN
+*
+*     End of ZPTCON
+*
+      END
diff --git a/costa/native/external/lapack/zpteqr.f b/costa/native/external/lapack/zpteqr.f
new file mode 100644
index 000000000..19a305943
--- /dev/null
+++ b/costa/native/external/lapack/zpteqr.f
@@ -0,0 +1,191 @@
+      SUBROUTINE ZPTEQR( COMPZ, N, D, E, Z, LDZ, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          COMPZ
+      INTEGER            INFO, LDZ, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   D( * ), E( * ), WORK( * )
+      COMPLEX*16         Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZPTEQR computes all eigenvalues and, optionally, eigenvectors of a
+*  symmetric positive definite tridiagonal matrix by first factoring the
+*  matrix using DPTTRF and then calling ZBDSQR to compute the singular
+*  values of the bidiagonal factor.
+*
+*  This routine computes the eigenvalues of the positive definite
+*  tridiagonal matrix to high relative accuracy.  This means that if the
+*  eigenvalues range over many orders of magnitude in size, then the
+*  small eigenvalues and corresponding eigenvectors will be computed
+*  more accurately than, for example, with the standard QR method.
+*
+*  The eigenvectors of a full or band positive definite Hermitian matrix
+*  can also be found if ZHETRD, ZHPTRD, or ZHBTRD has been used to
+*  reduce this matrix to tridiagonal form.  (The reduction to
+*  tridiagonal form, however, may preclude the possibility of obtaining
+*  high relative accuracy in the small eigenvalues of the original
+*  matrix, if these eigenvalues range over many orders of magnitude.)
+*
+*  Arguments
+*  =========
+*
+*  COMPZ   (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only.
+*          = 'V':  Compute eigenvectors of original Hermitian
+*                  matrix also.  Array Z contains the unitary matrix
+*                  used to reduce the original matrix to tridiagonal
+*                  form.
+*          = 'I':  Compute eigenvectors of tridiagonal matrix also.
+*
+*  N       (input) INTEGER
+*          The order of the matrix.  N >= 0.
+*
+*  D       (input/output) DOUBLE PRECISION array, dimension (N)
+*          On entry, the n diagonal elements of the tridiagonal matrix.
+*          On normal exit, D contains the eigenvalues, in descending
+*          order.
+*
+*  E       (input/output) DOUBLE PRECISION array, dimension (N-1)
+*          On entry, the (n-1) subdiagonal elements of the tridiagonal
+*          matrix.
+*          On exit, E has been destroyed.
+*
+*  Z       (input/output) COMPLEX*16 array, dimension (LDZ, N)
+*          On entry, if COMPZ = 'V', the unitary matrix used in the
+*          reduction to tridiagonal form.
+*          On exit, if COMPZ = 'V', the orthonormal eigenvectors of the
+*          original Hermitian matrix;
+*          if COMPZ = 'I', the orthonormal eigenvectors of the
+*          tridiagonal matrix.
+*          If INFO > 0 on exit, Z contains the eigenvectors associated
+*          with only the stored eigenvalues.
+*          If  COMPZ = 'N', then Z is not referenced.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= 1, and if
+*          COMPZ = 'V' or 'I', LDZ >= max(1,N).
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (4*N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          > 0:  if INFO = i, and i is:
+*                <= N  the Cholesky factorization of the matrix could
+*                      not be performed because the i-th principal minor
+*                      was not positive definite.
+*                > N   the SVD algorithm failed to converge;
+*                      if INFO = N+i, i off-diagonal elements of the
+*                      bidiagonal factor did not converge to zero.
+*
+*  ====================================================================
+*
+*     .. Parameters ..
+      COMPLEX*16         CZERO, CONE
+      PARAMETER          ( CZERO = ( 0.0D+0, 0.0D+0 ),
+     $                   CONE = ( 1.0D+0, 0.0D+0 ) )
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DPTTRF, XERBLA, ZBDSQR, ZLASET
+*     ..
+*     .. Local Arrays ..
+      COMPLEX*16         C( 1, 1 ), VT( 1, 1 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, ICOMPZ, NRU
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+*
+      IF( LSAME( COMPZ, 'N' ) ) THEN
+         ICOMPZ = 0
+      ELSE IF( LSAME( COMPZ, 'V' ) ) THEN
+         ICOMPZ = 1
+      ELSE IF( LSAME( COMPZ, 'I' ) ) THEN
+         ICOMPZ = 2
+      ELSE
+         ICOMPZ = -1
+      END IF
+      IF( ICOMPZ.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( ( LDZ.LT.1 ) .OR. ( ICOMPZ.GT.0 .AND. LDZ.LT.MAX( 1,
+     $         N ) ) ) THEN
+         INFO = -6
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZPTEQR', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      IF( N.EQ.1 ) THEN
+         IF( ICOMPZ.GT.0 )
+     $      Z( 1, 1 ) = CONE
+         RETURN
+      END IF
+      IF( ICOMPZ.EQ.2 )
+     $   CALL ZLASET( 'Full', N, N, CZERO, CONE, Z, LDZ )
+*
+*     Call DPTTRF to factor the matrix.
+*
+      CALL DPTTRF( N, D, E, INFO )
+      IF( INFO.NE.0 )
+     $   RETURN
+      DO 10 I = 1, N
+         D( I ) = SQRT( D( I ) )
+   10 CONTINUE
+      DO 20 I = 1, N - 1
+         E( I ) = E( I )*D( I )
+   20 CONTINUE
+*
+*     Call ZBDSQR to compute the singular values/vectors of the
+*     bidiagonal factor.
+*
+      IF( ICOMPZ.GT.0 ) THEN
+         NRU = N
+      ELSE
+         NRU = 0
+      END IF
+      CALL ZBDSQR( 'Lower', N, 0, NRU, 0, D, E, VT, 1, Z, LDZ, C, 1,
+     $             WORK, INFO )
+*
+*     Square the singular values.
+*
+      IF( INFO.EQ.0 ) THEN
+         DO 30 I = 1, N
+            D( I ) = D( I )*D( I )
+   30    CONTINUE
+      ELSE
+         INFO = N + INFO
+      END IF
+*
+      RETURN
+*
+*     End of ZPTEQR
+*
+      END
diff --git a/costa/native/external/lapack/zptrfs.f b/costa/native/external/lapack/zptrfs.f
new file mode 100644
index 000000000..98381b64e
--- /dev/null
+++ b/costa/native/external/lapack/zptrfs.f
@@ -0,0 +1,367 @@
+      SUBROUTINE ZPTRFS( UPLO, N, NRHS, D, E, DF, EF, B, LDB, X, LDX,
+     $                   FERR, BERR, WORK, RWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDB, LDX, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   BERR( * ), D( * ), DF( * ), FERR( * ),
+     $                   RWORK( * )
+      COMPLEX*16         B( LDB, * ), E( * ), EF( * ), WORK( * ),
+     $                   X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZPTRFS improves the computed solution to a system of linear
+*  equations when the coefficient matrix is Hermitian positive definite
+*  and tridiagonal, and provides error bounds and backward error
+*  estimates for the solution.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the superdiagonal or the subdiagonal of the
+*          tridiagonal matrix A is stored and the form of the
+*          factorization:
+*          = 'U':  E is the superdiagonal of A, and A = U**H*D*U;
+*          = 'L':  E is the subdiagonal of A, and A = L*D*L**H.
+*          (The two forms are equivalent if A is real.)
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  D       (input) DOUBLE PRECISION array, dimension (N)
+*          The n real diagonal elements of the tridiagonal matrix A.
+*
+*  E       (input) COMPLEX*16 array, dimension (N-1)
+*          The (n-1) off-diagonal elements of the tridiagonal matrix A
+*          (see UPLO).
+*
+*  DF      (input) DOUBLE PRECISION array, dimension (N)
+*          The n diagonal elements of the diagonal matrix D from
+*          the factorization computed by ZPTTRF.
+*
+*  EF      (input) COMPLEX*16 array, dimension (N-1)
+*          The (n-1) off-diagonal elements of the unit bidiagonal
+*          factor U or L from the factorization computed by ZPTTRF
+*          (see UPLO).
+*
+*  B       (input) COMPLEX*16 array, dimension (LDB,NRHS)
+*          The right hand side matrix B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  X       (input/output) COMPLEX*16 array, dimension (LDX,NRHS)
+*          On entry, the solution matrix X, as computed by ZPTTRS.
+*          On exit, the improved solution matrix X.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(1,N).
+*
+*  FERR    (output) DOUBLE PRECISION array, dimension (NRHS)
+*          The forward error bound for each solution vector
+*          X(j) (the j-th column of the solution matrix X).
+*          If XTRUE is the true solution corresponding to X(j), FERR(j)
+*          is an estimated upper bound for the magnitude of the largest
+*          element in (X(j) - XTRUE) divided by the magnitude of the
+*          largest element in X(j).
+*
+*  BERR    (output) DOUBLE PRECISION array, dimension (NRHS)
+*          The componentwise relative backward error of each solution
+*          vector X(j) (i.e., the smallest relative change in
+*          any element of A or B that makes X(j) an exact solution).
+*
+*  WORK    (workspace) COMPLEX*16 array, dimension (N)
+*
+*  RWORK   (workspace) DOUBLE PRECISION array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Internal Parameters
+*  ===================
+*
+*  ITMAX is the maximum number of steps of iterative refinement.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            ITMAX
+      PARAMETER          ( ITMAX = 5 )
+      DOUBLE PRECISION   ZERO
+      PARAMETER          ( ZERO = 0.0D+0 )
+      DOUBLE PRECISION   ONE
+      PARAMETER          ( ONE = 1.0D+0 )
+      DOUBLE PRECISION   TWO
+      PARAMETER          ( TWO = 2.0D+0 )
+      DOUBLE PRECISION   THREE
+      PARAMETER          ( THREE = 3.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            COUNT, I, IX, J, NZ
+      DOUBLE PRECISION   EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN
+      COMPLEX*16         BI, CX, DX, EX, ZDUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            IDAMAX
+      DOUBLE PRECISION   DLAMCH
+      EXTERNAL           LSAME, IDAMAX, DLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZAXPY, ZPTTRS
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, DCMPLX, DCONJG, DIMAG, MAX
+*     ..
+*     .. Statement Functions ..
+      DOUBLE PRECISION   CABS1
+*     ..
+*     .. Statement Function definitions ..
+      CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -9
+      ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+         INFO = -11
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZPTRFS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN
+         DO 10 J = 1, NRHS
+            FERR( J ) = ZERO
+            BERR( J ) = ZERO
+   10    CONTINUE
+         RETURN
+      END IF
+*
+*     NZ = maximum number of nonzero elements in each row of A, plus 1
+*
+      NZ = 4
+      EPS = DLAMCH( 'Epsilon' )
+      SAFMIN = DLAMCH( 'Safe minimum' )
+      SAFE1 = NZ*SAFMIN
+      SAFE2 = SAFE1 / EPS
+*
+*     Do for each right hand side
+*
+      DO 100 J = 1, NRHS
+*
+         COUNT = 1
+         LSTRES = THREE
+   20    CONTINUE
+*
+*        Loop until stopping criterion is satisfied.
+*
+*        Compute residual R = B - A * X.  Also compute
+*        abs(A)*abs(x) + abs(b) for use in the backward error bound.
+*
+         IF( UPPER ) THEN
+            IF( N.EQ.1 ) THEN
+               BI = B( 1, J )
+               DX = D( 1 )*X( 1, J )
+               WORK( 1 ) = BI - DX
+               RWORK( 1 ) = CABS1( BI ) + CABS1( DX )
+            ELSE
+               BI = B( 1, J )
+               DX = D( 1 )*X( 1, J )
+               EX = E( 1 )*X( 2, J )
+               WORK( 1 ) = BI - DX - EX
+               RWORK( 1 ) = CABS1( BI ) + CABS1( DX ) +
+     $                      CABS1( E( 1 ) )*CABS1( X( 2, J ) )
+               DO 30 I = 2, N - 1
+                  BI = B( I, J )
+                  CX = DCONJG( E( I-1 ) )*X( I-1, J )
+                  DX = D( I )*X( I, J )
+                  EX = E( I )*X( I+1, J )
+                  WORK( I ) = BI - CX - DX - EX
+                  RWORK( I ) = CABS1( BI ) +
+     $                         CABS1( E( I-1 ) )*CABS1( X( I-1, J ) ) +
+     $                         CABS1( DX ) + CABS1( E( I ) )*
+     $                         CABS1( X( I+1, J ) )
+   30          CONTINUE
+               BI = B( N, J )
+               CX = DCONJG( E( N-1 ) )*X( N-1, J )
+               DX = D( N )*X( N, J )
+               WORK( N ) = BI - CX - DX
+               RWORK( N ) = CABS1( BI ) + CABS1( E( N-1 ) )*
+     $                      CABS1( X( N-1, J ) ) + CABS1( DX )
+            END IF
+         ELSE
+            IF( N.EQ.1 ) THEN
+               BI = B( 1, J )
+               DX = D( 1 )*X( 1, J )
+               WORK( 1 ) = BI - DX
+               RWORK( 1 ) = CABS1( BI ) + CABS1( DX )
+            ELSE
+               BI = B( 1, J )
+               DX = D( 1 )*X( 1, J )
+               EX = DCONJG( E( 1 ) )*X( 2, J )
+               WORK( 1 ) = BI - DX - EX
+               RWORK( 1 ) = CABS1( BI ) + CABS1( DX ) +
+     $                      CABS1( E( 1 ) )*CABS1( X( 2, J ) )
+               DO 40 I = 2, N - 1
+                  BI = B( I, J )
+                  CX = E( I-1 )*X( I-1, J )
+                  DX = D( I )*X( I, J )
+                  EX = DCONJG( E( I ) )*X( I+1, J )
+                  WORK( I ) = BI - CX - DX - EX
+                  RWORK( I ) = CABS1( BI ) +
+     $                         CABS1( E( I-1 ) )*CABS1( X( I-1, J ) ) +
+     $                         CABS1( DX ) + CABS1( E( I ) )*
+     $                         CABS1( X( I+1, J ) )
+   40          CONTINUE
+               BI = B( N, J )
+               CX = E( N-1 )*X( N-1, J )
+               DX = D( N )*X( N, J )
+               WORK( N ) = BI - CX - DX
+               RWORK( N ) = CABS1( BI ) + CABS1( E( N-1 ) )*
+     $                      CABS1( X( N-1, J ) ) + CABS1( DX )
+            END IF
+         END IF
+*
+*        Compute componentwise relative backward error from formula
+*
+*        max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) )
+*
+*        where abs(Z) is the componentwise absolute value of the matrix
+*        or vector Z.  If the i-th component of the denominator is less
+*        than SAFE2, then SAFE1 is added to the i-th components of the
+*        numerator and denominator before dividing.
+*
+         S = ZERO
+         DO 50 I = 1, N
+            IF( RWORK( I ).GT.SAFE2 ) THEN
+               S = MAX( S, CABS1( WORK( I ) ) / RWORK( I ) )
+            ELSE
+               S = MAX( S, ( CABS1( WORK( I ) )+SAFE1 ) /
+     $             ( RWORK( I )+SAFE1 ) )
+            END IF
+   50    CONTINUE
+         BERR( J ) = S
+*
+*        Test stopping criterion. Continue iterating if
+*           1) The residual BERR(J) is larger than machine epsilon, and
+*           2) BERR(J) decreased by at least a factor of 2 during the
+*              last iteration, and
+*           3) At most ITMAX iterations tried.
+*
+         IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND.
+     $       COUNT.LE.ITMAX ) THEN
+*
+*           Update solution and try again.
+*
+            CALL ZPTTRS( UPLO, N, 1, DF, EF, WORK, N, INFO )
+            CALL ZAXPY( N, DCMPLX( ONE ), WORK, 1, X( 1, J ), 1 )
+            LSTRES = BERR( J )
+            COUNT = COUNT + 1
+            GO TO 20
+         END IF
+*
+*        Bound error from formula
+*
+*        norm(X - XTRUE) / norm(X) .le. FERR =
+*        norm( abs(inv(A))*
+*           ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X)
+*
+*        where
+*          norm(Z) is the magnitude of the largest component of Z
+*          inv(A) is the inverse of A
+*          abs(Z) is the componentwise absolute value of the matrix or
+*             vector Z
+*          NZ is the maximum number of nonzeros in any row of A, plus 1
+*          EPS is machine epsilon
+*
+*        The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B))
+*        is incremented by SAFE1 if the i-th component of
+*        abs(A)*abs(X) + abs(B) is less than SAFE2.
+*
+         DO 60 I = 1, N
+            IF( RWORK( I ).GT.SAFE2 ) THEN
+               RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I )
+            ELSE
+               RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) +
+     $                      SAFE1
+            END IF
+   60    CONTINUE
+         IX = IDAMAX( N, RWORK, 1 )
+         FERR( J ) = RWORK( IX )
+*
+*        Estimate the norm of inv(A).
+*
+*        Solve M(A) * x = e, where M(A) = (m(i,j)) is given by
+*
+*           m(i,j) =  abs(A(i,j)), i = j,
+*           m(i,j) = -abs(A(i,j)), i .ne. j,
+*
+*        and e = [ 1, 1, ..., 1 ]'.  Note M(A) = M(L)*D*M(L)'.
+*
+*        Solve M(L) * x = e.
+*
+         RWORK( 1 ) = ONE
+         DO 70 I = 2, N
+            RWORK( I ) = ONE + RWORK( I-1 )*ABS( EF( I-1 ) )
+   70    CONTINUE
+*
+*        Solve D * M(L)' * x = b.
+*
+         RWORK( N ) = RWORK( N ) / DF( N )
+         DO 80 I = N - 1, 1, -1
+            RWORK( I ) = RWORK( I ) / DF( I ) +
+     $                   RWORK( I+1 )*ABS( EF( I ) )
+   80    CONTINUE
+*
+*        Compute norm(inv(A)) = max(x(i)), 1<=i<=n.
+*
+         IX = IDAMAX( N, RWORK, 1 )
+         FERR( J ) = FERR( J )*ABS( RWORK( IX ) )
+*
+*        Normalize error.
+*
+         LSTRES = ZERO
+         DO 90 I = 1, N
+            LSTRES = MAX( LSTRES, ABS( X( I, J ) ) )
+   90    CONTINUE
+         IF( LSTRES.NE.ZERO )
+     $      FERR( J ) = FERR( J ) / LSTRES
+*
+  100 CONTINUE
+*
+      RETURN
+*
+*     End of ZPTRFS
+*
+      END
diff --git a/costa/native/external/lapack/zptsv.f b/costa/native/external/lapack/zptsv.f
new file mode 100644
index 000000000..2949d70e4
--- /dev/null
+++ b/costa/native/external/lapack/zptsv.f
@@ -0,0 +1,101 @@
+      SUBROUTINE ZPTSV( N, NRHS, D, E, B, LDB, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     February 25, 1997
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDB, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   D( * )
+      COMPLEX*16         B( LDB, * ), E( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZPTSV computes the solution to a complex system of linear equations
+*  A*X = B, where A is an N-by-N Hermitian positive definite tridiagonal
+*  matrix, and X and B are N-by-NRHS matrices.
+*
+*  A is factored as A = L*D*L**H, and the factored form of A is then
+*  used to solve the system of equations.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  D       (input/output) DOUBLE PRECISION array, dimension (N)
+*          On entry, the n diagonal elements of the tridiagonal matrix
+*          A.  On exit, the n diagonal elements of the diagonal matrix
+*          D from the factorization A = L*D*L**H.
+*
+*  E       (input/output) COMPLEX*16 array, dimension (N-1)
+*          On entry, the (n-1) subdiagonal elements of the tridiagonal
+*          matrix A.  On exit, the (n-1) subdiagonal elements of the
+*          unit bidiagonal factor L from the L*D*L**H factorization of
+*          A.  E can also be regarded as the superdiagonal of the unit
+*          bidiagonal factor U from the U**H*D*U factorization of A.
+*
+*  B       (input/output) COMPLEX*16 array, dimension (LDB,N)
+*          On entry, the N-by-NRHS right hand side matrix B.
+*          On exit, if INFO = 0, the N-by-NRHS solution matrix X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, the leading minor of order i is not
+*                positive definite, and the solution has not been
+*                computed.  The factorization has not been completed
+*                unless i = N.
+*
+*  =====================================================================
+*
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZPTTRF, ZPTTRS
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( N.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -6
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZPTSV ', -INFO )
+         RETURN
+      END IF
+*
+*     Compute the L*D*L' (or U'*D*U) factorization of A.
+*
+      CALL ZPTTRF( N, D, E, INFO )
+      IF( INFO.EQ.0 ) THEN
+*
+*        Solve the system A*X = B, overwriting B with X.
+*
+         CALL ZPTTRS( 'Lower', N, NRHS, D, E, B, LDB, INFO )
+      END IF
+      RETURN
+*
+*     End of ZPTSV
+*
+      END
diff --git a/costa/native/external/lapack/zptsvx.f b/costa/native/external/lapack/zptsvx.f
new file mode 100644
index 000000000..eeb5bcb1f
--- /dev/null
+++ b/costa/native/external/lapack/zptsvx.f
@@ -0,0 +1,238 @@
+      SUBROUTINE ZPTSVX( FACT, N, NRHS, D, E, DF, EF, B, LDB, X, LDX,
+     $                   RCOND, FERR, BERR, WORK, RWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          FACT
+      INTEGER            INFO, LDB, LDX, N, NRHS
+      DOUBLE PRECISION   RCOND
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   BERR( * ), D( * ), DF( * ), FERR( * ),
+     $                   RWORK( * )
+      COMPLEX*16         B( LDB, * ), E( * ), EF( * ), WORK( * ),
+     $                   X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZPTSVX uses the factorization A = L*D*L**H to compute the solution
+*  to a complex system of linear equations A*X = B, where A is an
+*  N-by-N Hermitian positive definite tridiagonal matrix and X and B
+*  are N-by-NRHS matrices.
+*
+*  Error bounds on the solution and a condition estimate are also
+*  provided.
+*
+*  Description
+*  ===========
+*
+*  The following steps are performed:
+*
+*  1. If FACT = 'N', the matrix A is factored as A = L*D*L**H, where L
+*     is a unit lower bidiagonal matrix and D is diagonal.  The
+*     factorization can also be regarded as having the form
+*     A = U**H*D*U.
+*
+*  2. If the leading i-by-i principal minor is not positive definite,
+*     then the routine returns with INFO = i. Otherwise, the factored
+*     form of A is used to estimate the condition number of the matrix
+*     A.  If the reciprocal of the condition number is less than machine
+*     precision, INFO = N+1 is returned as a warning, but the routine
+*     still goes on to solve for X and compute error bounds as
+*     described below.
+*
+*  3. The system of equations is solved for X using the factored form
+*     of A.
+*
+*  4. Iterative refinement is applied to improve the computed solution
+*     matrix and calculate error bounds and backward error estimates
+*     for it.
+*
+*  Arguments
+*  =========
+*
+*  FACT    (input) CHARACTER*1
+*          Specifies whether or not the factored form of the matrix
+*          A is supplied on entry.
+*          = 'F':  On entry, DF and EF contain the factored form of A.
+*                  D, E, DF, and EF will not be modified.
+*          = 'N':  The matrix A will be copied to DF and EF and
+*                  factored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrices B and X.  NRHS >= 0.
+*
+*  D       (input) DOUBLE PRECISION array, dimension (N)
+*          The n diagonal elements of the tridiagonal matrix A.
+*
+*  E       (input) COMPLEX*16 array, dimension (N-1)
+*          The (n-1) subdiagonal elements of the tridiagonal matrix A.
+*
+*  DF      (input or output) DOUBLE PRECISION array, dimension (N)
+*          If FACT = 'F', then DF is an input argument and on entry
+*          contains the n diagonal elements of the diagonal matrix D
+*          from the L*D*L**H factorization of A.
+*          If FACT = 'N', then DF is an output argument and on exit
+*          contains the n diagonal elements of the diagonal matrix D
+*          from the L*D*L**H factorization of A.
+*
+*  EF      (input or output) COMPLEX*16 array, dimension (N-1)
+*          If FACT = 'F', then EF is an input argument and on entry
+*          contains the (n-1) subdiagonal elements of the unit
+*          bidiagonal factor L from the L*D*L**H factorization of A.
+*          If FACT = 'N', then EF is an output argument and on exit
+*          contains the (n-1) subdiagonal elements of the unit
+*          bidiagonal factor L from the L*D*L**H factorization of A.
+*
+*  B       (input) COMPLEX*16 array, dimension (LDB,NRHS)
+*          The N-by-NRHS right hand side matrix B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  X       (output) COMPLEX*16 array, dimension (LDX,NRHS)
+*          If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(1,N).
+*
+*  RCOND   (output) DOUBLE PRECISION
+*          The reciprocal condition number of the matrix A.  If RCOND
+*          is less than the machine precision (in particular, if
+*          RCOND = 0), the matrix is singular to working precision.
+*          This condition is indicated by a return code of INFO > 0.
+*
+*  FERR    (output) DOUBLE PRECISION array, dimension (NRHS)
+*          The forward error bound for each solution vector
+*          X(j) (the j-th column of the solution matrix X).
+*          If XTRUE is the true solution corresponding to X(j), FERR(j)
+*          is an estimated upper bound for the magnitude of the largest
+*          element in (X(j) - XTRUE) divided by the magnitude of the
+*          largest element in X(j).
+*
+*  BERR    (output) DOUBLE PRECISION array, dimension (NRHS)
+*          The componentwise relative backward error of each solution
+*          vector X(j) (i.e., the smallest relative change in any
+*          element of A or B that makes X(j) an exact solution).
+*
+*  WORK    (workspace) COMPLEX*16 array, dimension (N)
+*
+*  RWORK   (workspace) DOUBLE PRECISION array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, and i is
+*                <= N:  the leading minor of order i of A is
+*                       not positive definite, so the factorization
+*                       could not be completed, and the solution has not
+*                       been computed. RCOND = 0 is returned.
+*                = N+1: U is nonsingular, but RCOND is less than machine
+*                       precision, meaning that the matrix is singular
+*                       to working precision.  Nevertheless, the
+*                       solution and error bounds are computed because
+*                       there are a number of situations where the
+*                       computed solution can be more accurate than the
+*                       value of RCOND would suggest.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO
+      PARAMETER          ( ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOFACT
+      DOUBLE PRECISION   ANORM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DLAMCH, ZLANHT
+      EXTERNAL           LSAME, DLAMCH, ZLANHT
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DCOPY, XERBLA, ZCOPY, ZLACPY, ZPTCON, ZPTRFS,
+     $                   ZPTTRF, ZPTTRS
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      NOFACT = LSAME( FACT, 'N' )
+      IF( .NOT.NOFACT .AND. .NOT.LSAME( FACT, 'F' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -9
+      ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+         INFO = -11
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZPTSVX', -INFO )
+         RETURN
+      END IF
+*
+      IF( NOFACT ) THEN
+*
+*        Compute the L*D*L' (or U'*D*U) factorization of A.
+*
+         CALL DCOPY( N, D, 1, DF, 1 )
+         IF( N.GT.1 )
+     $      CALL ZCOPY( N-1, E, 1, EF, 1 )
+         CALL ZPTTRF( N, DF, EF, INFO )
+*
+*        Return if INFO is non-zero.
+*
+         IF( INFO.NE.0 ) THEN
+            IF( INFO.GT.0 )
+     $         RCOND = ZERO
+            RETURN
+         END IF
+      END IF
+*
+*     Compute the norm of the matrix A.
+*
+      ANORM = ZLANHT( '1', N, D, E )
+*
+*     Compute the reciprocal of the condition number of A.
+*
+      CALL ZPTCON( N, DF, EF, ANORM, RCOND, RWORK, INFO )
+*
+*     Set INFO = N+1 if the matrix is singular to working precision.
+*
+      IF( RCOND.LT.DLAMCH( 'Epsilon' ) )
+     $   INFO = N + 1
+*
+*     Compute the solution vectors X.
+*
+      CALL ZLACPY( 'Full', N, NRHS, B, LDB, X, LDX )
+      CALL ZPTTRS( 'Lower', N, NRHS, DF, EF, X, LDX, INFO )
+*
+*     Use iterative refinement to improve the computed solutions and
+*     compute error bounds and backward error estimates for them.
+*
+      CALL ZPTRFS( 'Lower', N, NRHS, D, E, DF, EF, B, LDB, X, LDX, FERR,
+     $             BERR, WORK, RWORK, INFO )
+*
+      RETURN
+*
+*     End of ZPTSVX
+*
+      END
diff --git a/costa/native/external/lapack/zpttrf.f b/costa/native/external/lapack/zpttrf.f
new file mode 100644
index 000000000..92ddc9871
--- /dev/null
+++ b/costa/native/external/lapack/zpttrf.f
@@ -0,0 +1,169 @@
+      SUBROUTINE ZPTTRF( N, D, E, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   D( * )
+      COMPLEX*16         E( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZPTTRF computes the L*D*L' factorization of a complex Hermitian
+*  positive definite tridiagonal matrix A.  The factorization may also
+*  be regarded as having the form A = U'*D*U.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  D       (input/output) DOUBLE PRECISION array, dimension (N)
+*          On entry, the n diagonal elements of the tridiagonal matrix
+*          A.  On exit, the n diagonal elements of the diagonal matrix
+*          D from the L*D*L' factorization of A.
+*
+*  E       (input/output) COMPLEX*16 array, dimension (N-1)
+*          On entry, the (n-1) subdiagonal elements of the tridiagonal
+*          matrix A.  On exit, the (n-1) subdiagonal elements of the
+*          unit bidiagonal factor L from the L*D*L' factorization of A.
+*          E can also be regarded as the superdiagonal of the unit
+*          bidiagonal factor U from the U'*D*U factorization of A.
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -k, the k-th argument had an illegal value
+*          > 0: if INFO = k, the leading minor of order k is not
+*               positive definite; if k < N, the factorization could not
+*               be completed, while if k = N, the factorization was
+*               completed, but D(N) = 0.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO
+      PARAMETER          ( ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, I4
+      DOUBLE PRECISION   EII, EIR, F, G
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          DBLE, DCMPLX, DIMAG, MOD
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( N.LT.0 ) THEN
+         INFO = -1
+         CALL XERBLA( 'ZPTTRF', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Compute the L*D*L' (or U'*D*U) factorization of A.
+*
+      I4 = MOD( N-1, 4 )
+      DO 10 I = 1, I4
+         IF( D( I ).LE.ZERO ) THEN
+            INFO = I
+            GO TO 30
+         END IF
+         EIR = DBLE( E( I ) )
+         EII = DIMAG( E( I ) )
+         F = EIR / D( I )
+         G = EII / D( I )
+         E( I ) = DCMPLX( F, G )
+         D( I+1 ) = D( I+1 ) - F*EIR - G*EII
+   10 CONTINUE
+*
+      DO 20 I = I4 + 1, N - 4, 4
+*
+*        Drop out of the loop if d(i) <= 0: the matrix is not positive
+*        definite.
+*
+         IF( D( I ).LE.ZERO ) THEN
+            INFO = I
+            GO TO 30
+         END IF
+*
+*        Solve for e(i) and d(i+1).
+*
+         EIR = DBLE( E( I ) )
+         EII = DIMAG( E( I ) )
+         F = EIR / D( I )
+         G = EII / D( I )
+         E( I ) = DCMPLX( F, G )
+         D( I+1 ) = D( I+1 ) - F*EIR - G*EII
+*
+         IF( D( I+1 ).LE.ZERO ) THEN
+            INFO = I + 1
+            GO TO 30
+         END IF
+*
+*        Solve for e(i+1) and d(i+2).
+*
+         EIR = DBLE( E( I+1 ) )
+         EII = DIMAG( E( I+1 ) )
+         F = EIR / D( I+1 )
+         G = EII / D( I+1 )
+         E( I+1 ) = DCMPLX( F, G )
+         D( I+2 ) = D( I+2 ) - F*EIR - G*EII
+*
+         IF( D( I+2 ).LE.ZERO ) THEN
+            INFO = I + 2
+            GO TO 30
+         END IF
+*
+*        Solve for e(i+2) and d(i+3).
+*
+         EIR = DBLE( E( I+2 ) )
+         EII = DIMAG( E( I+2 ) )
+         F = EIR / D( I+2 )
+         G = EII / D( I+2 )
+         E( I+2 ) = DCMPLX( F, G )
+         D( I+3 ) = D( I+3 ) - F*EIR - G*EII
+*
+         IF( D( I+3 ).LE.ZERO ) THEN
+            INFO = I + 3
+            GO TO 30
+         END IF
+*
+*        Solve for e(i+3) and d(i+4).
+*
+         EIR = DBLE( E( I+3 ) )
+         EII = DIMAG( E( I+3 ) )
+         F = EIR / D( I+3 )
+         G = EII / D( I+3 )
+         E( I+3 ) = DCMPLX( F, G )
+         D( I+4 ) = D( I+4 ) - F*EIR - G*EII
+   20 CONTINUE
+*
+*     Check d(n) for positive definiteness.
+*
+      IF( D( N ).LE.ZERO )
+     $   INFO = N
+*
+   30 CONTINUE
+      RETURN
+*
+*     End of ZPTTRF
+*
+      END
diff --git a/costa/native/external/lapack/zpttrs.f b/costa/native/external/lapack/zpttrs.f
new file mode 100644
index 000000000..2dbbe2e7e
--- /dev/null
+++ b/costa/native/external/lapack/zpttrs.f
@@ -0,0 +1,136 @@
+      SUBROUTINE ZPTTRS( UPLO, N, NRHS, D, E, B, LDB, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDB, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   D( * )
+      COMPLEX*16         B( LDB, * ), E( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZPTTRS solves a tridiagonal system of the form
+*     A * X = B
+*  using the factorization A = U'*D*U or A = L*D*L' computed by ZPTTRF.
+*  D is a diagonal matrix specified in the vector D, U (or L) is a unit
+*  bidiagonal matrix whose superdiagonal (subdiagonal) is specified in
+*  the vector E, and X and B are N by NRHS matrices.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies the form of the factorization and whether the
+*          vector E is the superdiagonal of the upper bidiagonal factor
+*          U or the subdiagonal of the lower bidiagonal factor L.
+*          = 'U':  A = U'*D*U, E is the superdiagonal of U
+*          = 'L':  A = L*D*L', E is the subdiagonal of L
+*
+*  N       (input) INTEGER
+*          The order of the tridiagonal matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  D       (input) DOUBLE PRECISION array, dimension (N)
+*          The n diagonal elements of the diagonal matrix D from the
+*          factorization A = U'*D*U or A = L*D*L'.
+*
+*  E       (input) COMPLEX*16 array, dimension (N-1)
+*          If UPLO = 'U', the (n-1) superdiagonal elements of the unit
+*          bidiagonal factor U from the factorization A = U'*D*U.
+*          If UPLO = 'L', the (n-1) subdiagonal elements of the unit
+*          bidiagonal factor L from the factorization A = L*D*L'.
+*
+*  B       (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS)
+*          On entry, the right hand side vectors B for the system of
+*          linear equations.
+*          On exit, the solution vectors, X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -k, the k-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            IUPLO, J, JB, NB
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZPTTS2
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments.
+*
+      INFO = 0
+      UPPER = ( UPLO.EQ.'U' .OR. UPLO.EQ.'u' )
+      IF( .NOT.UPPER .AND. .NOT.( UPLO.EQ.'L' .OR. UPLO.EQ.'l' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZPTTRS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 )
+     $   RETURN
+*
+*     Determine the number of right-hand sides to solve at a time.
+*
+      IF( NRHS.EQ.1 ) THEN
+         NB = 1
+      ELSE
+         NB = MAX( 1, ILAENV( 1, 'ZPTTRS', UPLO, N, NRHS, -1, -1 ) )
+      END IF
+*
+*     Decode UPLO
+*
+      IF( UPPER ) THEN
+         IUPLO = 1
+      ELSE
+         IUPLO = 0
+      END IF
+*
+      IF( NB.GE.NRHS ) THEN
+         CALL ZPTTS2( IUPLO, N, NRHS, D, E, B, LDB )
+      ELSE
+         DO 10 J = 1, NRHS, NB
+            JB = MIN( NRHS-J+1, NB )
+            CALL ZPTTS2( IUPLO, N, JB, D, E, B( 1, J ), LDB )
+   10    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of ZPTTRS
+*
+      END
diff --git a/costa/native/external/lapack/zptts2.f b/costa/native/external/lapack/zptts2.f
new file mode 100644
index 000000000..4d0d38df3
--- /dev/null
+++ b/costa/native/external/lapack/zptts2.f
@@ -0,0 +1,177 @@
+      SUBROUTINE ZPTTS2( IUPLO, N, NRHS, D, E, B, LDB )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            IUPLO, LDB, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   D( * )
+      COMPLEX*16         B( LDB, * ), E( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZPTTS2 solves a tridiagonal system of the form
+*     A * X = B
+*  using the factorization A = U'*D*U or A = L*D*L' computed by ZPTTRF.
+*  D is a diagonal matrix specified in the vector D, U (or L) is a unit
+*  bidiagonal matrix whose superdiagonal (subdiagonal) is specified in
+*  the vector E, and X and B are N by NRHS matrices.
+*
+*  Arguments
+*  =========
+*
+*  IUPLO   (input) INTEGER
+*          Specifies the form of the factorization and whether the
+*          vector E is the superdiagonal of the upper bidiagonal factor
+*          U or the subdiagonal of the lower bidiagonal factor L.
+*          = 1:  A = U'*D*U, E is the superdiagonal of U
+*          = 0:  A = L*D*L', E is the subdiagonal of L
+*
+*  N       (input) INTEGER
+*          The order of the tridiagonal matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  D       (input) DOUBLE PRECISION array, dimension (N)
+*          The n diagonal elements of the diagonal matrix D from the
+*          factorization A = U'*D*U or A = L*D*L'.
+*
+*  E       (input) COMPLEX*16 array, dimension (N-1)
+*          If IUPLO = 1, the (n-1) superdiagonal elements of the unit
+*          bidiagonal factor U from the factorization A = U'*D*U.
+*          If IUPLO = 0, the (n-1) subdiagonal elements of the unit
+*          bidiagonal factor L from the factorization A = L*D*L'.
+*
+*  B       (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS)
+*          On entry, the right hand side vectors B for the system of
+*          linear equations.
+*          On exit, the solution vectors, X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      INTEGER            I, J
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           ZDSCAL
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          DCONJG
+*     ..
+*     .. Executable Statements ..
+*
+*     Quick return if possible
+*
+      IF( N.LE.1 ) THEN
+         IF( N.EQ.1 )
+     $      CALL ZDSCAL( NRHS, 1.D0 / D( 1 ), B, LDB )
+         RETURN
+      END IF
+*
+      IF( IUPLO.EQ.1 ) THEN
+*
+*        Solve A * X = B using the factorization A = U'*D*U,
+*        overwriting each right hand side vector with its solution.
+*
+         IF( NRHS.LE.2 ) THEN
+            J = 1
+   10       CONTINUE
+*
+*           Solve U' * x = b.
+*
+            DO 20 I = 2, N
+               B( I, J ) = B( I, J ) - B( I-1, J )*DCONJG( E( I-1 ) )
+   20       CONTINUE
+*
+*           Solve D * U * x = b.
+*
+            DO 30 I = 1, N
+               B( I, J ) = B( I, J ) / D( I )
+   30       CONTINUE
+            DO 40 I = N - 1, 1, -1
+               B( I, J ) = B( I, J ) - B( I+1, J )*E( I )
+   40       CONTINUE
+            IF( J.LT.NRHS ) THEN
+               J = J + 1
+               GO TO 10
+            END IF
+         ELSE
+            DO 70 J = 1, NRHS
+*
+*              Solve U' * x = b.
+*
+               DO 50 I = 2, N
+                  B( I, J ) = B( I, J ) - B( I-1, J )*DCONJG( E( I-1 ) )
+   50          CONTINUE
+*
+*              Solve D * U * x = b.
+*
+               B( N, J ) = B( N, J ) / D( N )
+               DO 60 I = N - 1, 1, -1
+                  B( I, J ) = B( I, J ) / D( I ) - B( I+1, J )*E( I )
+   60          CONTINUE
+   70       CONTINUE
+         END IF
+      ELSE
+*
+*        Solve A * X = B using the factorization A = L*D*L',
+*        overwriting each right hand side vector with its solution.
+*
+         IF( NRHS.LE.2 ) THEN
+            J = 1
+   80       CONTINUE
+*
+*           Solve L * x = b.
+*
+            DO 90 I = 2, N
+               B( I, J ) = B( I, J ) - B( I-1, J )*E( I-1 )
+   90       CONTINUE
+*
+*           Solve D * L' * x = b.
+*
+            DO 100 I = 1, N
+               B( I, J ) = B( I, J ) / D( I )
+  100       CONTINUE
+            DO 110 I = N - 1, 1, -1
+               B( I, J ) = B( I, J ) - B( I+1, J )*DCONJG( E( I ) )
+  110       CONTINUE
+            IF( J.LT.NRHS ) THEN
+               J = J + 1
+               GO TO 80
+            END IF
+         ELSE
+            DO 140 J = 1, NRHS
+*
+*              Solve L * x = b.
+*
+               DO 120 I = 2, N
+                  B( I, J ) = B( I, J ) - B( I-1, J )*E( I-1 )
+  120          CONTINUE
+*
+*              Solve D * L' * x = b.
+*
+               B( N, J ) = B( N, J ) / D( N )
+               DO 130 I = N - 1, 1, -1
+                  B( I, J ) = B( I, J ) / D( I ) -
+     $                        B( I+1, J )*DCONJG( E( I ) )
+  130          CONTINUE
+  140       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of ZPTTS2
+*
+      END
diff --git a/costa/native/external/lapack/zrot.f b/costa/native/external/lapack/zrot.f
new file mode 100644
index 000000000..993fc0dd9
--- /dev/null
+++ b/costa/native/external/lapack/zrot.f
@@ -0,0 +1,92 @@
+      SUBROUTINE ZROT( N, CX, INCX, CY, INCY, C, S )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      INTEGER            INCX, INCY, N
+      DOUBLE PRECISION   C
+      COMPLEX*16         S
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         CX( * ), CY( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZROT   applies a plane rotation, where the cos (C) is real and the
+*  sin (S) is complex, and the vectors CX and CY are complex.
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The number of elements in the vectors CX and CY.
+*
+*  CX      (input/output) COMPLEX*16 array, dimension (N)
+*          On input, the vector X.
+*          On output, CX is overwritten with C*X + S*Y.
+*
+*  INCX    (input) INTEGER
+*          The increment between successive values of CY.  INCX <> 0.
+*
+*  CY      (input/output) COMPLEX*16 array, dimension (N)
+*          On input, the vector Y.
+*          On output, CY is overwritten with -CONJG(S)*X + C*Y.
+*
+*  INCY    (input) INTEGER
+*          The increment between successive values of CY.  INCX <> 0.
+*
+*  C       (input) DOUBLE PRECISION
+*  S       (input) COMPLEX*16
+*          C and S define a rotation
+*             [  C          S  ]
+*             [ -conjg(S)   C  ]
+*          where C*C + S*CONJG(S) = 1.0.
+*
+* =====================================================================
+*
+*     .. Local Scalars ..
+      INTEGER            I, IX, IY
+      COMPLEX*16         STEMP
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          DCONJG
+*     ..
+*     .. Executable Statements ..
+*
+      IF( N.LE.0 )
+     $   RETURN
+      IF( INCX.EQ.1 .AND. INCY.EQ.1 )
+     $   GO TO 20
+*
+*     Code for unequal increments or equal increments not equal to 1
+*
+      IX = 1
+      IY = 1
+      IF( INCX.LT.0 )
+     $   IX = ( -N+1 )*INCX + 1
+      IF( INCY.LT.0 )
+     $   IY = ( -N+1 )*INCY + 1
+      DO 10 I = 1, N
+         STEMP = C*CX( IX ) + S*CY( IY )
+         CY( IY ) = C*CY( IY ) - DCONJG( S )*CX( IX )
+         CX( IX ) = STEMP
+         IX = IX + INCX
+         IY = IY + INCY
+   10 CONTINUE
+      RETURN
+*
+*     Code for both increments equal to 1
+*
+   20 CONTINUE
+      DO 30 I = 1, N
+         STEMP = C*CX( I ) + S*CY( I )
+         CY( I ) = C*CY( I ) - DCONJG( S )*CX( I )
+         CX( I ) = STEMP
+   30 CONTINUE
+      RETURN
+      END
diff --git a/costa/native/external/lapack/zspcon.f b/costa/native/external/lapack/zspcon.f
new file mode 100644
index 000000000..0ff3dce48
--- /dev/null
+++ b/costa/native/external/lapack/zspcon.f
@@ -0,0 +1,155 @@
+      SUBROUTINE ZSPCON( UPLO, N, AP, IPIV, ANORM, RCOND, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, N
+      DOUBLE PRECISION   ANORM, RCOND
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      COMPLEX*16         AP( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZSPCON estimates the reciprocal of the condition number (in the
+*  1-norm) of a complex symmetric packed matrix A using the
+*  factorization A = U*D*U**T or A = L*D*L**T computed by ZSPTRF.
+*
+*  An estimate is obtained for norm(inv(A)), and the reciprocal of the
+*  condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))).
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the details of the factorization are stored
+*          as an upper or lower triangular matrix.
+*          = 'U':  Upper triangular, form is A = U*D*U**T;
+*          = 'L':  Lower triangular, form is A = L*D*L**T.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  AP      (input) COMPLEX*16 array, dimension (N*(N+1)/2)
+*          The block diagonal matrix D and the multipliers used to
+*          obtain the factor U or L as computed by ZSPTRF, stored as a
+*          packed triangular matrix.
+*
+*  IPIV    (input) INTEGER array, dimension (N)
+*          Details of the interchanges and the block structure of D
+*          as determined by ZSPTRF.
+*
+*  ANORM   (input) DOUBLE PRECISION
+*          The 1-norm of the original matrix A.
+*
+*  RCOND   (output) DOUBLE PRECISION
+*          The reciprocal of the condition number of the matrix A,
+*          computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an
+*          estimate of the 1-norm of inv(A) computed in this routine.
+*
+*  WORK    (workspace) COMPLEX*16 array, dimension (2*N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            I, IP, KASE
+      DOUBLE PRECISION   AINVNM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZLACON, ZSPTRS
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( ANORM.LT.ZERO ) THEN
+         INFO = -5
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZSPCON', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      RCOND = ZERO
+      IF( N.EQ.0 ) THEN
+         RCOND = ONE
+         RETURN
+      ELSE IF( ANORM.LE.ZERO ) THEN
+         RETURN
+      END IF
+*
+*     Check that the diagonal matrix D is nonsingular.
+*
+      IF( UPPER ) THEN
+*
+*        Upper triangular storage: examine D from bottom to top
+*
+         IP = N*( N+1 ) / 2
+         DO 10 I = N, 1, -1
+            IF( IPIV( I ).GT.0 .AND. AP( IP ).EQ.ZERO )
+     $         RETURN
+            IP = IP - I
+   10    CONTINUE
+      ELSE
+*
+*        Lower triangular storage: examine D from top to bottom.
+*
+         IP = 1
+         DO 20 I = 1, N
+            IF( IPIV( I ).GT.0 .AND. AP( IP ).EQ.ZERO )
+     $         RETURN
+            IP = IP + N - I + 1
+   20    CONTINUE
+      END IF
+*
+*     Estimate the 1-norm of the inverse.
+*
+      KASE = 0
+   30 CONTINUE
+      CALL ZLACON( N, WORK( N+1 ), WORK, AINVNM, KASE )
+      IF( KASE.NE.0 ) THEN
+*
+*        Multiply by inv(L*D*L') or inv(U*D*U').
+*
+         CALL ZSPTRS( UPLO, N, 1, AP, IPIV, WORK, N, INFO )
+         GO TO 30
+      END IF
+*
+*     Compute the estimate of the reciprocal condition number.
+*
+      IF( AINVNM.NE.ZERO )
+     $   RCOND = ( ONE / AINVNM ) / ANORM
+*
+      RETURN
+*
+*     End of ZSPCON
+*
+      END
diff --git a/costa/native/external/lapack/zspmv.f b/costa/native/external/lapack/zspmv.f
new file mode 100644
index 000000000..8ad09f486
--- /dev/null
+++ b/costa/native/external/lapack/zspmv.f
@@ -0,0 +1,265 @@
+      SUBROUTINE ZSPMV( UPLO, N, ALPHA, AP, X, INCX, BETA, Y, INCY )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INCX, INCY, N
+      COMPLEX*16         ALPHA, BETA
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         AP( * ), X( * ), Y( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZSPMV  performs the matrix-vector operation
+*
+*     y := alpha*A*x + beta*y,
+*
+*  where alpha and beta are scalars, x and y are n element vectors and
+*  A is an n by n symmetric matrix, supplied in packed form.
+*
+*  Arguments
+*  ==========
+*
+*  UPLO   - CHARACTER*1
+*           On entry, UPLO specifies whether the upper or lower
+*           triangular part of the matrix A is supplied in the packed
+*           array AP as follows:
+*
+*              UPLO = 'U' or 'u'   The upper triangular part of A is
+*                                  supplied in AP.
+*
+*              UPLO = 'L' or 'l'   The lower triangular part of A is
+*                                  supplied in AP.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - COMPLEX*16
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  AP     - COMPLEX*16 array, dimension at least
+*           ( ( N*( N + 1 ) )/2 ).
+*           Before entry, with UPLO = 'U' or 'u', the array AP must
+*           contain the upper triangular part of the symmetric matrix
+*           packed sequentially, column by column, so that AP( 1 )
+*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 )
+*           and a( 2, 2 ) respectively, and so on.
+*           Before entry, with UPLO = 'L' or 'l', the array AP must
+*           contain the lower triangular part of the symmetric matrix
+*           packed sequentially, column by column, so that AP( 1 )
+*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 )
+*           and a( 3, 1 ) respectively, and so on.
+*           Unchanged on exit.
+*
+*  X      - COMPLEX*16 array, dimension at least
+*           ( 1 + ( N - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the N-
+*           element vector x.
+*           Unchanged on exit.
+*
+*  INCX   - INTEGER
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*  BETA   - COMPLEX*16
+*           On entry, BETA specifies the scalar beta. When BETA is
+*           supplied as zero then Y need not be set on input.
+*           Unchanged on exit.
+*
+*  Y      - COMPLEX*16 array, dimension at least
+*           ( 1 + ( N - 1 )*abs( INCY ) ).
+*           Before entry, the incremented array Y must contain the n
+*           element vector y. On exit, Y is overwritten by the updated
+*           vector y.
+*
+*  INCY   - INTEGER
+*           On entry, INCY specifies the increment for the elements of
+*           Y. INCY must not be zero.
+*           Unchanged on exit.
+*
+* =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX*16         ONE
+      PARAMETER          ( ONE = ( 1.0D+0, 0.0D+0 ) )
+      COMPLEX*16         ZERO
+      PARAMETER          ( ZERO = ( 0.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, INFO, IX, IY, J, JX, JY, K, KK, KX, KY
+      COMPLEX*16         TEMP1, TEMP2
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = 1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = 2
+      ELSE IF( INCX.EQ.0 ) THEN
+         INFO = 6
+      ELSE IF( INCY.EQ.0 ) THEN
+         INFO = 9
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZSPMV ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( N.EQ.0 ) .OR. ( ( ALPHA.EQ.ZERO ) .AND. ( BETA.EQ.ONE ) ) )
+     $   RETURN
+*
+*     Set up the start points in  X  and  Y.
+*
+      IF( INCX.GT.0 ) THEN
+         KX = 1
+      ELSE
+         KX = 1 - ( N-1 )*INCX
+      END IF
+      IF( INCY.GT.0 ) THEN
+         KY = 1
+      ELSE
+         KY = 1 - ( N-1 )*INCY
+      END IF
+*
+*     Start the operations. In this version the elements of the array AP
+*     are accessed sequentially with one pass through AP.
+*
+*     First form  y := beta*y.
+*
+      IF( BETA.NE.ONE ) THEN
+         IF( INCY.EQ.1 ) THEN
+            IF( BETA.EQ.ZERO ) THEN
+               DO 10 I = 1, N
+                  Y( I ) = ZERO
+   10          CONTINUE
+            ELSE
+               DO 20 I = 1, N
+                  Y( I ) = BETA*Y( I )
+   20          CONTINUE
+            END IF
+         ELSE
+            IY = KY
+            IF( BETA.EQ.ZERO ) THEN
+               DO 30 I = 1, N
+                  Y( IY ) = ZERO
+                  IY = IY + INCY
+   30          CONTINUE
+            ELSE
+               DO 40 I = 1, N
+                  Y( IY ) = BETA*Y( IY )
+                  IY = IY + INCY
+   40          CONTINUE
+            END IF
+         END IF
+      END IF
+      IF( ALPHA.EQ.ZERO )
+     $   RETURN
+      KK = 1
+      IF( LSAME( UPLO, 'U' ) ) THEN
+*
+*        Form  y  when AP contains the upper triangle.
+*
+         IF( ( INCX.EQ.1 ) .AND. ( INCY.EQ.1 ) ) THEN
+            DO 60 J = 1, N
+               TEMP1 = ALPHA*X( J )
+               TEMP2 = ZERO
+               K = KK
+               DO 50 I = 1, J - 1
+                  Y( I ) = Y( I ) + TEMP1*AP( K )
+                  TEMP2 = TEMP2 + AP( K )*X( I )
+                  K = K + 1
+   50          CONTINUE
+               Y( J ) = Y( J ) + TEMP1*AP( KK+J-1 ) + ALPHA*TEMP2
+               KK = KK + J
+   60       CONTINUE
+         ELSE
+            JX = KX
+            JY = KY
+            DO 80 J = 1, N
+               TEMP1 = ALPHA*X( JX )
+               TEMP2 = ZERO
+               IX = KX
+               IY = KY
+               DO 70 K = KK, KK + J - 2
+                  Y( IY ) = Y( IY ) + TEMP1*AP( K )
+                  TEMP2 = TEMP2 + AP( K )*X( IX )
+                  IX = IX + INCX
+                  IY = IY + INCY
+   70          CONTINUE
+               Y( JY ) = Y( JY ) + TEMP1*AP( KK+J-1 ) + ALPHA*TEMP2
+               JX = JX + INCX
+               JY = JY + INCY
+               KK = KK + J
+   80       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  y  when AP contains the lower triangle.
+*
+         IF( ( INCX.EQ.1 ) .AND. ( INCY.EQ.1 ) ) THEN
+            DO 100 J = 1, N
+               TEMP1 = ALPHA*X( J )
+               TEMP2 = ZERO
+               Y( J ) = Y( J ) + TEMP1*AP( KK )
+               K = KK + 1
+               DO 90 I = J + 1, N
+                  Y( I ) = Y( I ) + TEMP1*AP( K )
+                  TEMP2 = TEMP2 + AP( K )*X( I )
+                  K = K + 1
+   90          CONTINUE
+               Y( J ) = Y( J ) + ALPHA*TEMP2
+               KK = KK + ( N-J+1 )
+  100       CONTINUE
+         ELSE
+            JX = KX
+            JY = KY
+            DO 120 J = 1, N
+               TEMP1 = ALPHA*X( JX )
+               TEMP2 = ZERO
+               Y( JY ) = Y( JY ) + TEMP1*AP( KK )
+               IX = JX
+               IY = JY
+               DO 110 K = KK + 1, KK + N - J
+                  IX = IX + INCX
+                  IY = IY + INCY
+                  Y( IY ) = Y( IY ) + TEMP1*AP( K )
+                  TEMP2 = TEMP2 + AP( K )*X( IX )
+  110          CONTINUE
+               Y( JY ) = Y( JY ) + ALPHA*TEMP2
+               JX = JX + INCX
+               JY = JY + INCY
+               KK = KK + ( N-J+1 )
+  120       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of ZSPMV
+*
+      END
diff --git a/costa/native/external/lapack/zspr.f b/costa/native/external/lapack/zspr.f
new file mode 100644
index 000000000..be29e6ad8
--- /dev/null
+++ b/costa/native/external/lapack/zspr.f
@@ -0,0 +1,214 @@
+      SUBROUTINE ZSPR( UPLO, N, ALPHA, X, INCX, AP )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INCX, N
+      COMPLEX*16         ALPHA
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         AP( * ), X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZSPR    performs the symmetric rank 1 operation
+*
+*     A := alpha*x*conjg( x' ) + A,
+*
+*  where alpha is a complex scalar, x is an n element vector and A is an
+*  n by n symmetric matrix, supplied in packed form.
+*
+*  Arguments
+*  ==========
+*
+*  UPLO   - CHARACTER*1
+*           On entry, UPLO specifies whether the upper or lower
+*           triangular part of the matrix A is supplied in the packed
+*           array AP as follows:
+*
+*              UPLO = 'U' or 'u'   The upper triangular part of A is
+*                                  supplied in AP.
+*
+*              UPLO = 'L' or 'l'   The lower triangular part of A is
+*                                  supplied in AP.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - COMPLEX*16
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  X      - COMPLEX*16 array, dimension at least
+*           ( 1 + ( N - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the N-
+*           element vector x.
+*           Unchanged on exit.
+*
+*  INCX   - INTEGER
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*  AP     - COMPLEX*16 array, dimension at least
+*           ( ( N*( N + 1 ) )/2 ).
+*           Before entry, with  UPLO = 'U' or 'u', the array AP must
+*           contain the upper triangular part of the symmetric matrix
+*           packed sequentially, column by column, so that AP( 1 )
+*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 )
+*           and a( 2, 2 ) respectively, and so on. On exit, the array
+*           AP is overwritten by the upper triangular part of the
+*           updated matrix.
+*           Before entry, with UPLO = 'L' or 'l', the array AP must
+*           contain the lower triangular part of the symmetric matrix
+*           packed sequentially, column by column, so that AP( 1 )
+*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 )
+*           and a( 3, 1 ) respectively, and so on. On exit, the array
+*           AP is overwritten by the lower triangular part of the
+*           updated matrix.
+*           Note that the imaginary parts of the diagonal elements need
+*           not be set, they are assumed to be zero, and on exit they
+*           are set to zero.
+*
+* =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX*16         ZERO
+      PARAMETER          ( ZERO = ( 0.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, INFO, IX, J, JX, K, KK, KX
+      COMPLEX*16         TEMP
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = 1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = 2
+      ELSE IF( INCX.EQ.0 ) THEN
+         INFO = 5
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZSPR  ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( N.EQ.0 ) .OR. ( ALPHA.EQ.ZERO ) )
+     $   RETURN
+*
+*     Set the start point in X if the increment is not unity.
+*
+      IF( INCX.LE.0 ) THEN
+         KX = 1 - ( N-1 )*INCX
+      ELSE IF( INCX.NE.1 ) THEN
+         KX = 1
+      END IF
+*
+*     Start the operations. In this version the elements of the array AP
+*     are accessed sequentially with one pass through AP.
+*
+      KK = 1
+      IF( LSAME( UPLO, 'U' ) ) THEN
+*
+*        Form  A  when upper triangle is stored in AP.
+*
+         IF( INCX.EQ.1 ) THEN
+            DO 20 J = 1, N
+               IF( X( J ).NE.ZERO ) THEN
+                  TEMP = ALPHA*X( J )
+                  K = KK
+                  DO 10 I = 1, J - 1
+                     AP( K ) = AP( K ) + X( I )*TEMP
+                     K = K + 1
+   10             CONTINUE
+                  AP( KK+J-1 ) = AP( KK+J-1 ) + X( J )*TEMP
+               ELSE
+                  AP( KK+J-1 ) = AP( KK+J-1 )
+               END IF
+               KK = KK + J
+   20       CONTINUE
+         ELSE
+            JX = KX
+            DO 40 J = 1, N
+               IF( X( JX ).NE.ZERO ) THEN
+                  TEMP = ALPHA*X( JX )
+                  IX = KX
+                  DO 30 K = KK, KK + J - 2
+                     AP( K ) = AP( K ) + X( IX )*TEMP
+                     IX = IX + INCX
+   30             CONTINUE
+                  AP( KK+J-1 ) = AP( KK+J-1 ) + X( JX )*TEMP
+               ELSE
+                  AP( KK+J-1 ) = AP( KK+J-1 )
+               END IF
+               JX = JX + INCX
+               KK = KK + J
+   40       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  A  when lower triangle is stored in AP.
+*
+         IF( INCX.EQ.1 ) THEN
+            DO 60 J = 1, N
+               IF( X( J ).NE.ZERO ) THEN
+                  TEMP = ALPHA*X( J )
+                  AP( KK ) = AP( KK ) + TEMP*X( J )
+                  K = KK + 1
+                  DO 50 I = J + 1, N
+                     AP( K ) = AP( K ) + X( I )*TEMP
+                     K = K + 1
+   50             CONTINUE
+               ELSE
+                  AP( KK ) = AP( KK )
+               END IF
+               KK = KK + N - J + 1
+   60       CONTINUE
+         ELSE
+            JX = KX
+            DO 80 J = 1, N
+               IF( X( JX ).NE.ZERO ) THEN
+                  TEMP = ALPHA*X( JX )
+                  AP( KK ) = AP( KK ) + TEMP*X( JX )
+                  IX = JX
+                  DO 70 K = KK + 1, KK + N - J
+                     IX = IX + INCX
+                     AP( K ) = AP( K ) + X( IX )*TEMP
+   70             CONTINUE
+               ELSE
+                  AP( KK ) = AP( KK )
+               END IF
+               JX = JX + INCX
+               KK = KK + N - J + 1
+   80       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of ZSPR
+*
+      END
diff --git a/costa/native/external/lapack/zsprfs.f b/costa/native/external/lapack/zsprfs.f
new file mode 100644
index 000000000..edb9cd811
--- /dev/null
+++ b/costa/native/external/lapack/zsprfs.f
@@ -0,0 +1,336 @@
+      SUBROUTINE ZSPRFS( UPLO, N, NRHS, AP, AFP, IPIV, B, LDB, X, LDX,
+     $                   FERR, BERR, WORK, RWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDB, LDX, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      DOUBLE PRECISION   BERR( * ), FERR( * ), RWORK( * )
+      COMPLEX*16         AFP( * ), AP( * ), B( LDB, * ), WORK( * ),
+     $                   X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZSPRFS improves the computed solution to a system of linear
+*  equations when the coefficient matrix is symmetric indefinite
+*  and packed, and provides error bounds and backward error estimates
+*  for the solution.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrices B and X.  NRHS >= 0.
+*
+*  AP      (input) COMPLEX*16 array, dimension (N*(N+1)/2)
+*          The upper or lower triangle of the symmetric matrix A, packed
+*          columnwise in a linear array.  The j-th column of A is stored
+*          in the array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n.
+*
+*  AFP     (input) COMPLEX*16 array, dimension (N*(N+1)/2)
+*          The factored form of the matrix A.  AFP contains the block
+*          diagonal matrix D and the multipliers used to obtain the
+*          factor U or L from the factorization A = U*D*U**T or
+*          A = L*D*L**T as computed by ZSPTRF, stored as a packed
+*          triangular matrix.
+*
+*  IPIV    (input) INTEGER array, dimension (N)
+*          Details of the interchanges and the block structure of D
+*          as determined by ZSPTRF.
+*
+*  B       (input) COMPLEX*16 array, dimension (LDB,NRHS)
+*          The right hand side matrix B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  X       (input/output) COMPLEX*16 array, dimension (LDX,NRHS)
+*          On entry, the solution matrix X, as computed by ZSPTRS.
+*          On exit, the improved solution matrix X.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(1,N).
+*
+*  FERR    (output) DOUBLE PRECISION array, dimension (NRHS)
+*          The estimated forward error bound for each solution vector
+*          X(j) (the j-th column of the solution matrix X).
+*          If XTRUE is the true solution corresponding to X(j), FERR(j)
+*          is an estimated upper bound for the magnitude of the largest
+*          element in (X(j) - XTRUE) divided by the magnitude of the
+*          largest element in X(j).  The estimate is as reliable as
+*          the estimate for RCOND, and is almost always a slight
+*          overestimate of the true error.
+*
+*  BERR    (output) DOUBLE PRECISION array, dimension (NRHS)
+*          The componentwise relative backward error of each solution
+*          vector X(j) (i.e., the smallest relative change in
+*          any element of A or B that makes X(j) an exact solution).
+*
+*  WORK    (workspace) COMPLEX*16 array, dimension (2*N)
+*
+*  RWORK   (workspace) DOUBLE PRECISION array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Internal Parameters
+*  ===================
+*
+*  ITMAX is the maximum number of steps of iterative refinement.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            ITMAX
+      PARAMETER          ( ITMAX = 5 )
+      DOUBLE PRECISION   ZERO
+      PARAMETER          ( ZERO = 0.0D+0 )
+      COMPLEX*16         ONE
+      PARAMETER          ( ONE = ( 1.0D+0, 0.0D+0 ) )
+      DOUBLE PRECISION   TWO
+      PARAMETER          ( TWO = 2.0D+0 )
+      DOUBLE PRECISION   THREE
+      PARAMETER          ( THREE = 3.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            COUNT, I, IK, J, K, KASE, KK, NZ
+      DOUBLE PRECISION   EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK
+      COMPLEX*16         ZDUM
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZAXPY, ZCOPY, ZLACON, ZSPMV, ZSPTRS
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, DIMAG, MAX
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DLAMCH
+      EXTERNAL           LSAME, DLAMCH
+*     ..
+*     .. Statement Functions ..
+      DOUBLE PRECISION   CABS1
+*     ..
+*     .. Statement Function definitions ..
+      CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -8
+      ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+         INFO = -10
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZSPRFS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN
+         DO 10 J = 1, NRHS
+            FERR( J ) = ZERO
+            BERR( J ) = ZERO
+   10    CONTINUE
+         RETURN
+      END IF
+*
+*     NZ = maximum number of nonzero elements in each row of A, plus 1
+*
+      NZ = N + 1
+      EPS = DLAMCH( 'Epsilon' )
+      SAFMIN = DLAMCH( 'Safe minimum' )
+      SAFE1 = NZ*SAFMIN
+      SAFE2 = SAFE1 / EPS
+*
+*     Do for each right hand side
+*
+      DO 140 J = 1, NRHS
+*
+         COUNT = 1
+         LSTRES = THREE
+   20    CONTINUE
+*
+*        Loop until stopping criterion is satisfied.
+*
+*        Compute residual R = B - A * X
+*
+         CALL ZCOPY( N, B( 1, J ), 1, WORK, 1 )
+         CALL ZSPMV( UPLO, N, -ONE, AP, X( 1, J ), 1, ONE, WORK, 1 )
+*
+*        Compute componentwise relative backward error from formula
+*
+*        max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) )
+*
+*        where abs(Z) is the componentwise absolute value of the matrix
+*        or vector Z.  If the i-th component of the denominator is less
+*        than SAFE2, then SAFE1 is added to the i-th components of the
+*        numerator and denominator before dividing.
+*
+         DO 30 I = 1, N
+            RWORK( I ) = CABS1( B( I, J ) )
+   30    CONTINUE
+*
+*        Compute abs(A)*abs(X) + abs(B).
+*
+         KK = 1
+         IF( UPPER ) THEN
+            DO 50 K = 1, N
+               S = ZERO
+               XK = CABS1( X( K, J ) )
+               IK = KK
+               DO 40 I = 1, K - 1
+                  RWORK( I ) = RWORK( I ) + CABS1( AP( IK ) )*XK
+                  S = S + CABS1( AP( IK ) )*CABS1( X( I, J ) )
+                  IK = IK + 1
+   40          CONTINUE
+               RWORK( K ) = RWORK( K ) + CABS1( AP( KK+K-1 ) )*XK + S
+               KK = KK + K
+   50       CONTINUE
+         ELSE
+            DO 70 K = 1, N
+               S = ZERO
+               XK = CABS1( X( K, J ) )
+               RWORK( K ) = RWORK( K ) + CABS1( AP( KK ) )*XK
+               IK = KK + 1
+               DO 60 I = K + 1, N
+                  RWORK( I ) = RWORK( I ) + CABS1( AP( IK ) )*XK
+                  S = S + CABS1( AP( IK ) )*CABS1( X( I, J ) )
+                  IK = IK + 1
+   60          CONTINUE
+               RWORK( K ) = RWORK( K ) + S
+               KK = KK + ( N-K+1 )
+   70       CONTINUE
+         END IF
+         S = ZERO
+         DO 80 I = 1, N
+            IF( RWORK( I ).GT.SAFE2 ) THEN
+               S = MAX( S, CABS1( WORK( I ) ) / RWORK( I ) )
+            ELSE
+               S = MAX( S, ( CABS1( WORK( I ) )+SAFE1 ) /
+     $             ( RWORK( I )+SAFE1 ) )
+            END IF
+   80    CONTINUE
+         BERR( J ) = S
+*
+*        Test stopping criterion. Continue iterating if
+*           1) The residual BERR(J) is larger than machine epsilon, and
+*           2) BERR(J) decreased by at least a factor of 2 during the
+*              last iteration, and
+*           3) At most ITMAX iterations tried.
+*
+         IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND.
+     $       COUNT.LE.ITMAX ) THEN
+*
+*           Update solution and try again.
+*
+            CALL ZSPTRS( UPLO, N, 1, AFP, IPIV, WORK, N, INFO )
+            CALL ZAXPY( N, ONE, WORK, 1, X( 1, J ), 1 )
+            LSTRES = BERR( J )
+            COUNT = COUNT + 1
+            GO TO 20
+         END IF
+*
+*        Bound error from formula
+*
+*        norm(X - XTRUE) / norm(X) .le. FERR =
+*        norm( abs(inv(A))*
+*           ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X)
+*
+*        where
+*          norm(Z) is the magnitude of the largest component of Z
+*          inv(A) is the inverse of A
+*          abs(Z) is the componentwise absolute value of the matrix or
+*             vector Z
+*          NZ is the maximum number of nonzeros in any row of A, plus 1
+*          EPS is machine epsilon
+*
+*        The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B))
+*        is incremented by SAFE1 if the i-th component of
+*        abs(A)*abs(X) + abs(B) is less than SAFE2.
+*
+*        Use ZLACON to estimate the infinity-norm of the matrix
+*           inv(A) * diag(W),
+*        where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) )))
+*
+         DO 90 I = 1, N
+            IF( RWORK( I ).GT.SAFE2 ) THEN
+               RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I )
+            ELSE
+               RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) +
+     $                      SAFE1
+            END IF
+   90    CONTINUE
+*
+         KASE = 0
+  100    CONTINUE
+         CALL ZLACON( N, WORK( N+1 ), WORK, FERR( J ), KASE )
+         IF( KASE.NE.0 ) THEN
+            IF( KASE.EQ.1 ) THEN
+*
+*              Multiply by diag(W)*inv(A').
+*
+               CALL ZSPTRS( UPLO, N, 1, AFP, IPIV, WORK, N, INFO )
+               DO 110 I = 1, N
+                  WORK( I ) = RWORK( I )*WORK( I )
+  110          CONTINUE
+            ELSE IF( KASE.EQ.2 ) THEN
+*
+*              Multiply by inv(A)*diag(W).
+*
+               DO 120 I = 1, N
+                  WORK( I ) = RWORK( I )*WORK( I )
+  120          CONTINUE
+               CALL ZSPTRS( UPLO, N, 1, AFP, IPIV, WORK, N, INFO )
+            END IF
+            GO TO 100
+         END IF
+*
+*        Normalize error.
+*
+         LSTRES = ZERO
+         DO 130 I = 1, N
+            LSTRES = MAX( LSTRES, CABS1( X( I, J ) ) )
+  130    CONTINUE
+         IF( LSTRES.NE.ZERO )
+     $      FERR( J ) = FERR( J ) / LSTRES
+*
+  140 CONTINUE
+*
+      RETURN
+*
+*     End of ZSPRFS
+*
+      END
diff --git a/costa/native/external/lapack/zspsv.f b/costa/native/external/lapack/zspsv.f
new file mode 100644
index 000000000..8f37f0784
--- /dev/null
+++ b/costa/native/external/lapack/zspsv.f
@@ -0,0 +1,149 @@
+      SUBROUTINE ZSPSV( UPLO, N, NRHS, AP, IPIV, B, LDB, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDB, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      COMPLEX*16         AP( * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZSPSV computes the solution to a complex system of linear equations
+*     A * X = B,
+*  where A is an N-by-N symmetric matrix stored in packed format and X
+*  and B are N-by-NRHS matrices.
+*
+*  The diagonal pivoting method is used to factor A as
+*     A = U * D * U**T,  if UPLO = 'U', or
+*     A = L * D * L**T,  if UPLO = 'L',
+*  where U (or L) is a product of permutation and unit upper (lower)
+*  triangular matrices, D is symmetric and block diagonal with 1-by-1
+*  and 2-by-2 diagonal blocks.  The factored form of A is then used to
+*  solve the system of equations A * X = B.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The number of linear equations, i.e., the order of the
+*          matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  AP      (input/output) COMPLEX*16 array, dimension (N*(N+1)/2)
+*          On entry, the upper or lower triangle of the symmetric matrix
+*          A, packed columnwise in a linear array.  The j-th column of A
+*          is stored in the array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n.
+*          See below for further details.
+*
+*          On exit, the block diagonal matrix D and the multipliers used
+*          to obtain the factor U or L from the factorization
+*          A = U*D*U**T or A = L*D*L**T as computed by ZSPTRF, stored as
+*          a packed triangular matrix in the same storage format as A.
+*
+*  IPIV    (output) INTEGER array, dimension (N)
+*          Details of the interchanges and the block structure of D, as
+*          determined by ZSPTRF.  If IPIV(k) > 0, then rows and columns
+*          k and IPIV(k) were interchanged, and D(k,k) is a 1-by-1
+*          diagonal block.  If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0,
+*          then rows and columns k-1 and -IPIV(k) were interchanged and
+*          D(k-1:k,k-1:k) is a 2-by-2 diagonal block.  If UPLO = 'L' and
+*          IPIV(k) = IPIV(k+1) < 0, then rows and columns k+1 and
+*          -IPIV(k) were interchanged and D(k:k+1,k:k+1) is a 2-by-2
+*          diagonal block.
+*
+*  B       (input/output) COMPLEX*16 array, dimension (LDB,NRHS)
+*          On entry, the N-by-NRHS right hand side matrix B.
+*          On exit, if INFO = 0, the N-by-NRHS solution matrix X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, D(i,i) is exactly zero.  The factorization
+*                has been completed, but the block diagonal matrix D is
+*                exactly singular, so the solution could not be
+*                computed.
+*
+*  Further Details
+*  ===============
+*
+*  The packed storage scheme is illustrated by the following example
+*  when N = 4, UPLO = 'U':
+*
+*  Two-dimensional storage of the symmetric matrix A:
+*
+*     a11 a12 a13 a14
+*         a22 a23 a24
+*             a33 a34     (aij = aji)
+*                 a44
+*
+*  Packed storage of the upper triangle of A:
+*
+*  AP = [ a11, a12, a22, a13, a23, a33, a14, a24, a34, a44 ]
+*
+*  =====================================================================
+*
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZSPTRF, ZSPTRS
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZSPSV ', -INFO )
+         RETURN
+      END IF
+*
+*     Compute the factorization A = U*D*U' or A = L*D*L'.
+*
+      CALL ZSPTRF( UPLO, N, AP, IPIV, INFO )
+      IF( INFO.EQ.0 ) THEN
+*
+*        Solve the system A*X = B, overwriting B with X.
+*
+         CALL ZSPTRS( UPLO, N, NRHS, AP, IPIV, B, LDB, INFO )
+*
+      END IF
+      RETURN
+*
+*     End of ZSPSV
+*
+      END
diff --git a/costa/native/external/lapack/zspsvx.f b/costa/native/external/lapack/zspsvx.f
new file mode 100644
index 000000000..d2eea7e5a
--- /dev/null
+++ b/costa/native/external/lapack/zspsvx.f
@@ -0,0 +1,279 @@
+      SUBROUTINE ZSPSVX( FACT, UPLO, N, NRHS, AP, AFP, IPIV, B, LDB, X,
+     $                   LDX, RCOND, FERR, BERR, WORK, RWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          FACT, UPLO
+      INTEGER            INFO, LDB, LDX, N, NRHS
+      DOUBLE PRECISION   RCOND
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      DOUBLE PRECISION   BERR( * ), FERR( * ), RWORK( * )
+      COMPLEX*16         AFP( * ), AP( * ), B( LDB, * ), WORK( * ),
+     $                   X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZSPSVX uses the diagonal pivoting factorization A = U*D*U**T or
+*  A = L*D*L**T to compute the solution to a complex system of linear
+*  equations A * X = B, where A is an N-by-N symmetric matrix stored
+*  in packed format and X and B are N-by-NRHS matrices.
+*
+*  Error bounds on the solution and a condition estimate are also
+*  provided.
+*
+*  Description
+*  ===========
+*
+*  The following steps are performed:
+*
+*  1. If FACT = 'N', the diagonal pivoting method is used to factor A as
+*        A = U * D * U**T,  if UPLO = 'U', or
+*        A = L * D * L**T,  if UPLO = 'L',
+*     where U (or L) is a product of permutation and unit upper (lower)
+*     triangular matrices and D is symmetric and block diagonal with
+*     1-by-1 and 2-by-2 diagonal blocks.
+*
+*  2. If some D(i,i)=0, so that D is exactly singular, then the routine
+*     returns with INFO = i. Otherwise, the factored form of A is used
+*     to estimate the condition number of the matrix A.  If the
+*     reciprocal of the condition number is less than machine precision,
+*     INFO = N+1 is returned as a warning, but the routine still goes on
+*     to solve for X and compute error bounds as described below.
+*
+*  3. The system of equations is solved for X using the factored form
+*     of A.
+*
+*  4. Iterative refinement is applied to improve the computed solution
+*     matrix and calculate error bounds and backward error estimates
+*     for it.
+*
+*  Arguments
+*  =========
+*
+*  FACT    (input) CHARACTER*1
+*          Specifies whether or not the factored form of A has been
+*          supplied on entry.
+*          = 'F':  On entry, AFP and IPIV contain the factored form
+*                  of A.  AP, AFP and IPIV will not be modified.
+*          = 'N':  The matrix A will be copied to AFP and factored.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The number of linear equations, i.e., the order of the
+*          matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrices B and X.  NRHS >= 0.
+*
+*  AP      (input) COMPLEX*16 array, dimension (N*(N+1)/2)
+*          The upper or lower triangle of the symmetric matrix A, packed
+*          columnwise in a linear array.  The j-th column of A is stored
+*          in the array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n.
+*          See below for further details.
+*
+*  AFP     (input or output) COMPLEX*16 array, dimension (N*(N+1)/2)
+*          If FACT = 'F', then AFP is an input argument and on entry
+*          contains the block diagonal matrix D and the multipliers used
+*          to obtain the factor U or L from the factorization
+*          A = U*D*U**T or A = L*D*L**T as computed by ZSPTRF, stored as
+*          a packed triangular matrix in the same storage format as A.
+*
+*          If FACT = 'N', then AFP is an output argument and on exit
+*          contains the block diagonal matrix D and the multipliers used
+*          to obtain the factor U or L from the factorization
+*          A = U*D*U**T or A = L*D*L**T as computed by ZSPTRF, stored as
+*          a packed triangular matrix in the same storage format as A.
+*
+*  IPIV    (input or output) INTEGER array, dimension (N)
+*          If FACT = 'F', then IPIV is an input argument and on entry
+*          contains details of the interchanges and the block structure
+*          of D, as determined by ZSPTRF.
+*          If IPIV(k) > 0, then rows and columns k and IPIV(k) were
+*          interchanged and D(k,k) is a 1-by-1 diagonal block.
+*          If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and
+*          columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k)
+*          is a 2-by-2 diagonal block.  If UPLO = 'L' and IPIV(k) =
+*          IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were
+*          interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block.
+*
+*          If FACT = 'N', then IPIV is an output argument and on exit
+*          contains details of the interchanges and the block structure
+*          of D, as determined by ZSPTRF.
+*
+*  B       (input) COMPLEX*16 array, dimension (LDB,NRHS)
+*          The N-by-NRHS right hand side matrix B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  X       (output) COMPLEX*16 array, dimension (LDX,NRHS)
+*          If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(1,N).
+*
+*  RCOND   (output) DOUBLE PRECISION
+*          The estimate of the reciprocal condition number of the matrix
+*          A.  If RCOND is less than the machine precision (in
+*          particular, if RCOND = 0), the matrix is singular to working
+*          precision.  This condition is indicated by a return code of
+*          INFO > 0.
+*
+*  FERR    (output) DOUBLE PRECISION array, dimension (NRHS)
+*          The estimated forward error bound for each solution vector
+*          X(j) (the j-th column of the solution matrix X).
+*          If XTRUE is the true solution corresponding to X(j), FERR(j)
+*          is an estimated upper bound for the magnitude of the largest
+*          element in (X(j) - XTRUE) divided by the magnitude of the
+*          largest element in X(j).  The estimate is as reliable as
+*          the estimate for RCOND, and is almost always a slight
+*          overestimate of the true error.
+*
+*  BERR    (output) DOUBLE PRECISION array, dimension (NRHS)
+*          The componentwise relative backward error of each solution
+*          vector X(j) (i.e., the smallest relative change in
+*          any element of A or B that makes X(j) an exact solution).
+*
+*  WORK    (workspace) COMPLEX*16 array, dimension (2*N)
+*
+*  RWORK   (workspace) DOUBLE PRECISION array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, and i is
+*                <= N:  D(i,i) is exactly zero.  The factorization
+*                       has been completed but the factor D is exactly
+*                       singular, so the solution and error bounds could
+*                       not be computed. RCOND = 0 is returned.
+*                = N+1: D is nonsingular, but RCOND is less than machine
+*                       precision, meaning that the matrix is singular
+*                       to working precision.  Nevertheless, the
+*                       solution and error bounds are computed because
+*                       there are a number of situations where the
+*                       computed solution can be more accurate than the
+*                       value of RCOND would suggest.
+*
+*  Further Details
+*  ===============
+*
+*  The packed storage scheme is illustrated by the following example
+*  when N = 4, UPLO = 'U':
+*
+*  Two-dimensional storage of the symmetric matrix A:
+*
+*     a11 a12 a13 a14
+*         a22 a23 a24
+*             a33 a34     (aij = aji)
+*                 a44
+*
+*  Packed storage of the upper triangle of A:
+*
+*  AP = [ a11, a12, a22, a13, a23, a33, a14, a24, a34, a44 ]
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO
+      PARAMETER          ( ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOFACT
+      DOUBLE PRECISION   ANORM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DLAMCH, ZLANSP
+      EXTERNAL           LSAME, DLAMCH, ZLANSP
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZCOPY, ZLACPY, ZSPCON, ZSPRFS, ZSPTRF,
+     $                   ZSPTRS
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      NOFACT = LSAME( FACT, 'N' )
+      IF( .NOT.NOFACT .AND. .NOT.LSAME( FACT, 'F' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) )
+     $          THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -9
+      ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+         INFO = -11
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZSPSVX', -INFO )
+         RETURN
+      END IF
+*
+      IF( NOFACT ) THEN
+*
+*        Compute the factorization A = U*D*U' or A = L*D*L'.
+*
+         CALL ZCOPY( N*( N+1 ) / 2, AP, 1, AFP, 1 )
+         CALL ZSPTRF( UPLO, N, AFP, IPIV, INFO )
+*
+*        Return if INFO is non-zero.
+*
+         IF( INFO.NE.0 ) THEN
+            IF( INFO.GT.0 )
+     $         RCOND = ZERO
+            RETURN
+         END IF
+      END IF
+*
+*     Compute the norm of the matrix A.
+*
+      ANORM = ZLANSP( 'I', UPLO, N, AP, RWORK )
+*
+*     Compute the reciprocal of the condition number of A.
+*
+      CALL ZSPCON( UPLO, N, AFP, IPIV, ANORM, RCOND, WORK, INFO )
+*
+*     Set INFO = N+1 if the matrix is singular to working precision.
+*
+      IF( RCOND.LT.DLAMCH( 'Epsilon' ) )
+     $   INFO = N + 1
+*
+*     Compute the solution vectors X.
+*
+      CALL ZLACPY( 'Full', N, NRHS, B, LDB, X, LDX )
+      CALL ZSPTRS( UPLO, N, NRHS, AFP, IPIV, X, LDX, INFO )
+*
+*     Use iterative refinement to improve the computed solutions and
+*     compute error bounds and backward error estimates for them.
+*
+      CALL ZSPRFS( UPLO, N, NRHS, AP, AFP, IPIV, B, LDB, X, LDX, FERR,
+     $             BERR, WORK, RWORK, INFO )
+*
+      RETURN
+*
+*     End of ZSPSVX
+*
+      END
diff --git a/costa/native/external/lapack/zsptrf.f b/costa/native/external/lapack/zsptrf.f
new file mode 100644
index 000000000..69a81901e
--- /dev/null
+++ b/costa/native/external/lapack/zsptrf.f
@@ -0,0 +1,556 @@
+      SUBROUTINE ZSPTRF( UPLO, N, AP, IPIV, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      COMPLEX*16         AP( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZSPTRF computes the factorization of a complex symmetric matrix A
+*  stored in packed format using the Bunch-Kaufman diagonal pivoting
+*  method:
+*
+*     A = U*D*U**T  or  A = L*D*L**T
+*
+*  where U (or L) is a product of permutation and unit upper (lower)
+*  triangular matrices, and D is symmetric and block diagonal with
+*  1-by-1 and 2-by-2 diagonal blocks.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  AP      (input/output) COMPLEX*16 array, dimension (N*(N+1)/2)
+*          On entry, the upper or lower triangle of the symmetric matrix
+*          A, packed columnwise in a linear array.  The j-th column of A
+*          is stored in the array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n.
+*
+*          On exit, the block diagonal matrix D and the multipliers used
+*          to obtain the factor U or L, stored as a packed triangular
+*          matrix overwriting A (see below for further details).
+*
+*  IPIV    (output) INTEGER array, dimension (N)
+*          Details of the interchanges and the block structure of D.
+*          If IPIV(k) > 0, then rows and columns k and IPIV(k) were
+*          interchanged and D(k,k) is a 1-by-1 diagonal block.
+*          If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and
+*          columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k)
+*          is a 2-by-2 diagonal block.  If UPLO = 'L' and IPIV(k) =
+*          IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were
+*          interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block.
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*          > 0: if INFO = i, D(i,i) is exactly zero.  The factorization
+*               has been completed, but the block diagonal matrix D is
+*               exactly singular, and division by zero will occur if it
+*               is used to solve a system of equations.
+*
+*  Further Details
+*  ===============
+*
+*  5-96 - Based on modifications by J. Lewis, Boeing Computer Services
+*         Company
+*
+*  If UPLO = 'U', then A = U*D*U', where
+*     U = P(n)*U(n)* ... *P(k)U(k)* ...,
+*  i.e., U is a product of terms P(k)*U(k), where k decreases from n to
+*  1 in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1
+*  and 2-by-2 diagonal blocks D(k).  P(k) is a permutation matrix as
+*  defined by IPIV(k), and U(k) is a unit upper triangular matrix, such
+*  that if the diagonal block D(k) is of order s (s = 1 or 2), then
+*
+*             (   I    v    0   )   k-s
+*     U(k) =  (   0    I    0   )   s
+*             (   0    0    I   )   n-k
+*                k-s   s   n-k
+*
+*  If s = 1, D(k) overwrites A(k,k), and v overwrites A(1:k-1,k).
+*  If s = 2, the upper triangle of D(k) overwrites A(k-1,k-1), A(k-1,k),
+*  and A(k,k), and v overwrites A(1:k-2,k-1:k).
+*
+*  If UPLO = 'L', then A = L*D*L', where
+*     L = P(1)*L(1)* ... *P(k)*L(k)* ...,
+*  i.e., L is a product of terms P(k)*L(k), where k increases from 1 to
+*  n in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1
+*  and 2-by-2 diagonal blocks D(k).  P(k) is a permutation matrix as
+*  defined by IPIV(k), and L(k) is a unit lower triangular matrix, such
+*  that if the diagonal block D(k) is of order s (s = 1 or 2), then
+*
+*             (   I    0     0   )  k-1
+*     L(k) =  (   0    I     0   )  s
+*             (   0    v     I   )  n-k-s+1
+*                k-1   s  n-k-s+1
+*
+*  If s = 1, D(k) overwrites A(k,k), and v overwrites A(k+1:n,k).
+*  If s = 2, the lower triangle of D(k) overwrites A(k,k), A(k+1,k),
+*  and A(k+1,k+1), and v overwrites A(k+2:n,k:k+1).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+      DOUBLE PRECISION   EIGHT, SEVTEN
+      PARAMETER          ( EIGHT = 8.0D+0, SEVTEN = 17.0D+0 )
+      COMPLEX*16         CONE
+      PARAMETER          ( CONE = ( 1.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            I, IMAX, J, JMAX, K, KC, KK, KNC, KP, KPC,
+     $                   KSTEP, KX, NPP
+      DOUBLE PRECISION   ABSAKK, ALPHA, COLMAX, ROWMAX
+      COMPLEX*16         D11, D12, D21, D22, R1, T, WK, WKM1, WKP1, ZDUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            IZAMAX
+      EXTERNAL           LSAME, IZAMAX
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZSCAL, ZSPR, ZSWAP
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, DIMAG, MAX, SQRT
+*     ..
+*     .. Statement Functions ..
+      DOUBLE PRECISION   CABS1
+*     ..
+*     .. Statement Function definitions ..
+      CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZSPTRF', -INFO )
+         RETURN
+      END IF
+*
+*     Initialize ALPHA for use in choosing pivot block size.
+*
+      ALPHA = ( ONE+SQRT( SEVTEN ) ) / EIGHT
+*
+      IF( UPPER ) THEN
+*
+*        Factorize A as U*D*U' using the upper triangle of A
+*
+*        K is the main loop index, decreasing from N to 1 in steps of
+*        1 or 2
+*
+         K = N
+         KC = ( N-1 )*N / 2 + 1
+   10    CONTINUE
+         KNC = KC
+*
+*        If K < 1, exit from loop
+*
+         IF( K.LT.1 )
+     $      GO TO 110
+         KSTEP = 1
+*
+*        Determine rows and columns to be interchanged and whether
+*        a 1-by-1 or 2-by-2 pivot block will be used
+*
+         ABSAKK = CABS1( AP( KC+K-1 ) )
+*
+*        IMAX is the row-index of the largest off-diagonal element in
+*        column K, and COLMAX is its absolute value
+*
+         IF( K.GT.1 ) THEN
+            IMAX = IZAMAX( K-1, AP( KC ), 1 )
+            COLMAX = CABS1( AP( KC+IMAX-1 ) )
+         ELSE
+            COLMAX = ZERO
+         END IF
+*
+         IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN
+*
+*           Column K is zero: set INFO and continue
+*
+            IF( INFO.EQ.0 )
+     $         INFO = K
+            KP = K
+         ELSE
+            IF( ABSAKK.GE.ALPHA*COLMAX ) THEN
+*
+*              no interchange, use 1-by-1 pivot block
+*
+               KP = K
+            ELSE
+*
+*              JMAX is the column-index of the largest off-diagonal
+*              element in row IMAX, and ROWMAX is its absolute value
+*
+               ROWMAX = ZERO
+               JMAX = IMAX
+               KX = IMAX*( IMAX+1 ) / 2 + IMAX
+               DO 20 J = IMAX + 1, K
+                  IF( CABS1( AP( KX ) ).GT.ROWMAX ) THEN
+                     ROWMAX = CABS1( AP( KX ) )
+                     JMAX = J
+                  END IF
+                  KX = KX + J
+   20          CONTINUE
+               KPC = ( IMAX-1 )*IMAX / 2 + 1
+               IF( IMAX.GT.1 ) THEN
+                  JMAX = IZAMAX( IMAX-1, AP( KPC ), 1 )
+                  ROWMAX = MAX( ROWMAX, CABS1( AP( KPC+JMAX-1 ) ) )
+               END IF
+*
+               IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN
+*
+*                 no interchange, use 1-by-1 pivot block
+*
+                  KP = K
+               ELSE IF( CABS1( AP( KPC+IMAX-1 ) ).GE.ALPHA*ROWMAX ) THEN
+*
+*                 interchange rows and columns K and IMAX, use 1-by-1
+*                 pivot block
+*
+                  KP = IMAX
+               ELSE
+*
+*                 interchange rows and columns K-1 and IMAX, use 2-by-2
+*                 pivot block
+*
+                  KP = IMAX
+                  KSTEP = 2
+               END IF
+            END IF
+*
+            KK = K - KSTEP + 1
+            IF( KSTEP.EQ.2 )
+     $         KNC = KNC - K + 1
+            IF( KP.NE.KK ) THEN
+*
+*              Interchange rows and columns KK and KP in the leading
+*              submatrix A(1:k,1:k)
+*
+               CALL ZSWAP( KP-1, AP( KNC ), 1, AP( KPC ), 1 )
+               KX = KPC + KP - 1
+               DO 30 J = KP + 1, KK - 1
+                  KX = KX + J - 1
+                  T = AP( KNC+J-1 )
+                  AP( KNC+J-1 ) = AP( KX )
+                  AP( KX ) = T
+   30          CONTINUE
+               T = AP( KNC+KK-1 )
+               AP( KNC+KK-1 ) = AP( KPC+KP-1 )
+               AP( KPC+KP-1 ) = T
+               IF( KSTEP.EQ.2 ) THEN
+                  T = AP( KC+K-2 )
+                  AP( KC+K-2 ) = AP( KC+KP-1 )
+                  AP( KC+KP-1 ) = T
+               END IF
+            END IF
+*
+*           Update the leading submatrix
+*
+            IF( KSTEP.EQ.1 ) THEN
+*
+*              1-by-1 pivot block D(k): column k now holds
+*
+*              W(k) = U(k)*D(k)
+*
+*              where U(k) is the k-th column of U
+*
+*              Perform a rank-1 update of A(1:k-1,1:k-1) as
+*
+*              A := A - U(k)*D(k)*U(k)' = A - W(k)*1/D(k)*W(k)'
+*
+               R1 = CONE / AP( KC+K-1 )
+               CALL ZSPR( UPLO, K-1, -R1, AP( KC ), 1, AP )
+*
+*              Store U(k) in column k
+*
+               CALL ZSCAL( K-1, R1, AP( KC ), 1 )
+            ELSE
+*
+*              2-by-2 pivot block D(k): columns k and k-1 now hold
+*
+*              ( W(k-1) W(k) ) = ( U(k-1) U(k) )*D(k)
+*
+*              where U(k) and U(k-1) are the k-th and (k-1)-th columns
+*              of U
+*
+*              Perform a rank-2 update of A(1:k-2,1:k-2) as
+*
+*              A := A - ( U(k-1) U(k) )*D(k)*( U(k-1) U(k) )'
+*                 = A - ( W(k-1) W(k) )*inv(D(k))*( W(k-1) W(k) )'
+*
+               IF( K.GT.2 ) THEN
+*
+                  D12 = AP( K-1+( K-1 )*K / 2 )
+                  D22 = AP( K-1+( K-2 )*( K-1 ) / 2 ) / D12
+                  D11 = AP( K+( K-1 )*K / 2 ) / D12
+                  T = CONE / ( D11*D22-CONE )
+                  D12 = T / D12
+*
+                  DO 50 J = K - 2, 1, -1
+                     WKM1 = D12*( D11*AP( J+( K-2 )*( K-1 ) / 2 )-
+     $                      AP( J+( K-1 )*K / 2 ) )
+                     WK = D12*( D22*AP( J+( K-1 )*K / 2 )-
+     $                    AP( J+( K-2 )*( K-1 ) / 2 ) )
+                     DO 40 I = J, 1, -1
+                        AP( I+( J-1 )*J / 2 ) = AP( I+( J-1 )*J / 2 ) -
+     $                     AP( I+( K-1 )*K / 2 )*WK -
+     $                     AP( I+( K-2 )*( K-1 ) / 2 )*WKM1
+   40                CONTINUE
+                     AP( J+( K-1 )*K / 2 ) = WK
+                     AP( J+( K-2 )*( K-1 ) / 2 ) = WKM1
+   50             CONTINUE
+*
+               END IF
+            END IF
+         END IF
+*
+*        Store details of the interchanges in IPIV
+*
+         IF( KSTEP.EQ.1 ) THEN
+            IPIV( K ) = KP
+         ELSE
+            IPIV( K ) = -KP
+            IPIV( K-1 ) = -KP
+         END IF
+*
+*        Decrease K and return to the start of the main loop
+*
+         K = K - KSTEP
+         KC = KNC - K
+         GO TO 10
+*
+      ELSE
+*
+*        Factorize A as L*D*L' using the lower triangle of A
+*
+*        K is the main loop index, increasing from 1 to N in steps of
+*        1 or 2
+*
+         K = 1
+         KC = 1
+         NPP = N*( N+1 ) / 2
+   60    CONTINUE
+         KNC = KC
+*
+*        If K > N, exit from loop
+*
+         IF( K.GT.N )
+     $      GO TO 110
+         KSTEP = 1
+*
+*        Determine rows and columns to be interchanged and whether
+*        a 1-by-1 or 2-by-2 pivot block will be used
+*
+         ABSAKK = CABS1( AP( KC ) )
+*
+*        IMAX is the row-index of the largest off-diagonal element in
+*        column K, and COLMAX is its absolute value
+*
+         IF( K.LT.N ) THEN
+            IMAX = K + IZAMAX( N-K, AP( KC+1 ), 1 )
+            COLMAX = CABS1( AP( KC+IMAX-K ) )
+         ELSE
+            COLMAX = ZERO
+         END IF
+*
+         IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN
+*
+*           Column K is zero: set INFO and continue
+*
+            IF( INFO.EQ.0 )
+     $         INFO = K
+            KP = K
+         ELSE
+            IF( ABSAKK.GE.ALPHA*COLMAX ) THEN
+*
+*              no interchange, use 1-by-1 pivot block
+*
+               KP = K
+            ELSE
+*
+*              JMAX is the column-index of the largest off-diagonal
+*              element in row IMAX, and ROWMAX is its absolute value
+*
+               ROWMAX = ZERO
+               KX = KC + IMAX - K
+               DO 70 J = K, IMAX - 1
+                  IF( CABS1( AP( KX ) ).GT.ROWMAX ) THEN
+                     ROWMAX = CABS1( AP( KX ) )
+                     JMAX = J
+                  END IF
+                  KX = KX + N - J
+   70          CONTINUE
+               KPC = NPP - ( N-IMAX+1 )*( N-IMAX+2 ) / 2 + 1
+               IF( IMAX.LT.N ) THEN
+                  JMAX = IMAX + IZAMAX( N-IMAX, AP( KPC+1 ), 1 )
+                  ROWMAX = MAX( ROWMAX, CABS1( AP( KPC+JMAX-IMAX ) ) )
+               END IF
+*
+               IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN
+*
+*                 no interchange, use 1-by-1 pivot block
+*
+                  KP = K
+               ELSE IF( CABS1( AP( KPC ) ).GE.ALPHA*ROWMAX ) THEN
+*
+*                 interchange rows and columns K and IMAX, use 1-by-1
+*                 pivot block
+*
+                  KP = IMAX
+               ELSE
+*
+*                 interchange rows and columns K+1 and IMAX, use 2-by-2
+*                 pivot block
+*
+                  KP = IMAX
+                  KSTEP = 2
+               END IF
+            END IF
+*
+            KK = K + KSTEP - 1
+            IF( KSTEP.EQ.2 )
+     $         KNC = KNC + N - K + 1
+            IF( KP.NE.KK ) THEN
+*
+*              Interchange rows and columns KK and KP in the trailing
+*              submatrix A(k:n,k:n)
+*
+               IF( KP.LT.N )
+     $            CALL ZSWAP( N-KP, AP( KNC+KP-KK+1 ), 1, AP( KPC+1 ),
+     $                        1 )
+               KX = KNC + KP - KK
+               DO 80 J = KK + 1, KP - 1
+                  KX = KX + N - J + 1
+                  T = AP( KNC+J-KK )
+                  AP( KNC+J-KK ) = AP( KX )
+                  AP( KX ) = T
+   80          CONTINUE
+               T = AP( KNC )
+               AP( KNC ) = AP( KPC )
+               AP( KPC ) = T
+               IF( KSTEP.EQ.2 ) THEN
+                  T = AP( KC+1 )
+                  AP( KC+1 ) = AP( KC+KP-K )
+                  AP( KC+KP-K ) = T
+               END IF
+            END IF
+*
+*           Update the trailing submatrix
+*
+            IF( KSTEP.EQ.1 ) THEN
+*
+*              1-by-1 pivot block D(k): column k now holds
+*
+*              W(k) = L(k)*D(k)
+*
+*              where L(k) is the k-th column of L
+*
+               IF( K.LT.N ) THEN
+*
+*                 Perform a rank-1 update of A(k+1:n,k+1:n) as
+*
+*                 A := A - L(k)*D(k)*L(k)' = A - W(k)*(1/D(k))*W(k)'
+*
+                  R1 = CONE / AP( KC )
+                  CALL ZSPR( UPLO, N-K, -R1, AP( KC+1 ), 1,
+     $                       AP( KC+N-K+1 ) )
+*
+*                 Store L(k) in column K
+*
+                  CALL ZSCAL( N-K, R1, AP( KC+1 ), 1 )
+               END IF
+            ELSE
+*
+*              2-by-2 pivot block D(k): columns K and K+1 now hold
+*
+*              ( W(k) W(k+1) ) = ( L(k) L(k+1) )*D(k)
+*
+*              where L(k) and L(k+1) are the k-th and (k+1)-th columns
+*              of L
+*
+               IF( K.LT.N-1 ) THEN
+*
+*                 Perform a rank-2 update of A(k+2:n,k+2:n) as
+*
+*                 A := A - ( L(k) L(k+1) )*D(k)*( L(k) L(k+1) )'
+*                    = A - ( W(k) W(k+1) )*inv(D(k))*( W(k) W(k+1) )'
+*
+*                 where L(k) and L(k+1) are the k-th and (k+1)-th
+*                 columns of L
+*
+                  D21 = AP( K+1+( K-1 )*( 2*N-K ) / 2 )
+                  D11 = AP( K+1+K*( 2*N-K-1 ) / 2 ) / D21
+                  D22 = AP( K+( K-1 )*( 2*N-K ) / 2 ) / D21
+                  T = CONE / ( D11*D22-CONE )
+                  D21 = T / D21
+*
+                  DO 100 J = K + 2, N
+                     WK = D21*( D11*AP( J+( K-1 )*( 2*N-K ) / 2 )-
+     $                    AP( J+K*( 2*N-K-1 ) / 2 ) )
+                     WKP1 = D21*( D22*AP( J+K*( 2*N-K-1 ) / 2 )-
+     $                      AP( J+( K-1 )*( 2*N-K ) / 2 ) )
+                     DO 90 I = J, N
+                        AP( I+( J-1 )*( 2*N-J ) / 2 ) = AP( I+( J-1 )*
+     $                     ( 2*N-J ) / 2 ) - AP( I+( K-1 )*( 2*N-K ) /
+     $                     2 )*WK - AP( I+K*( 2*N-K-1 ) / 2 )*WKP1
+   90                CONTINUE
+                     AP( J+( K-1 )*( 2*N-K ) / 2 ) = WK
+                     AP( J+K*( 2*N-K-1 ) / 2 ) = WKP1
+  100             CONTINUE
+               END IF
+            END IF
+         END IF
+*
+*        Store details of the interchanges in IPIV
+*
+         IF( KSTEP.EQ.1 ) THEN
+            IPIV( K ) = KP
+         ELSE
+            IPIV( K ) = -KP
+            IPIV( K+1 ) = -KP
+         END IF
+*
+*        Increase K and return to the start of the main loop
+*
+         K = K + KSTEP
+         KC = KNC + N - K + 2
+         GO TO 60
+*
+      END IF
+*
+  110 CONTINUE
+      RETURN
+*
+*     End of ZSPTRF
+*
+      END
diff --git a/costa/native/external/lapack/zsptri.f b/costa/native/external/lapack/zsptri.f
new file mode 100644
index 000000000..eb5dec827
--- /dev/null
+++ b/costa/native/external/lapack/zsptri.f
@@ -0,0 +1,338 @@
+      SUBROUTINE ZSPTRI( UPLO, N, AP, IPIV, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      COMPLEX*16         AP( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZSPTRI computes the inverse of a complex symmetric indefinite matrix
+*  A in packed storage using the factorization A = U*D*U**T or
+*  A = L*D*L**T computed by ZSPTRF.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the details of the factorization are stored
+*          as an upper or lower triangular matrix.
+*          = 'U':  Upper triangular, form is A = U*D*U**T;
+*          = 'L':  Lower triangular, form is A = L*D*L**T.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  AP      (input/output) COMPLEX*16 array, dimension (N*(N+1)/2)
+*          On entry, the block diagonal matrix D and the multipliers
+*          used to obtain the factor U or L as computed by ZSPTRF,
+*          stored as a packed triangular matrix.
+*
+*          On exit, if INFO = 0, the (symmetric) inverse of the original
+*          matrix, stored as a packed triangular matrix. The j-th column
+*          of inv(A) is stored in the array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = inv(A)(i,j) for 1<=i<=j;
+*          if UPLO = 'L',
+*             AP(i + (j-1)*(2n-j)/2) = inv(A)(i,j) for j<=i<=n.
+*
+*  IPIV    (input) INTEGER array, dimension (N)
+*          Details of the interchanges and the block structure of D
+*          as determined by ZSPTRF.
+*
+*  WORK    (workspace) COMPLEX*16 array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*          > 0: if INFO = i, D(i,i) = 0; the matrix is singular and its
+*               inverse could not be computed.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX*16         ONE, ZERO
+      PARAMETER          ( ONE = ( 1.0D+0, 0.0D+0 ),
+     $                   ZERO = ( 0.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            J, K, KC, KCNEXT, KP, KPC, KSTEP, KX, NPP
+      COMPLEX*16         AK, AKKP1, AKP1, D, T, TEMP
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      COMPLEX*16         ZDOTU
+      EXTERNAL           LSAME, ZDOTU
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZCOPY, ZSPMV, ZSWAP
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZSPTRI', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Check that the diagonal matrix D is nonsingular.
+*
+      IF( UPPER ) THEN
+*
+*        Upper triangular storage: examine D from bottom to top
+*
+         KP = N*( N+1 ) / 2
+         DO 10 INFO = N, 1, -1
+            IF( IPIV( INFO ).GT.0 .AND. AP( KP ).EQ.ZERO )
+     $         RETURN
+            KP = KP - INFO
+   10    CONTINUE
+      ELSE
+*
+*        Lower triangular storage: examine D from top to bottom.
+*
+         KP = 1
+         DO 20 INFO = 1, N
+            IF( IPIV( INFO ).GT.0 .AND. AP( KP ).EQ.ZERO )
+     $         RETURN
+            KP = KP + N - INFO + 1
+   20    CONTINUE
+      END IF
+      INFO = 0
+*
+      IF( UPPER ) THEN
+*
+*        Compute inv(A) from the factorization A = U*D*U'.
+*
+*        K is the main loop index, increasing from 1 to N in steps of
+*        1 or 2, depending on the size of the diagonal blocks.
+*
+         K = 1
+         KC = 1
+   30    CONTINUE
+*
+*        If K > N, exit from loop.
+*
+         IF( K.GT.N )
+     $      GO TO 50
+*
+         KCNEXT = KC + K
+         IF( IPIV( K ).GT.0 ) THEN
+*
+*           1 x 1 diagonal block
+*
+*           Invert the diagonal block.
+*
+            AP( KC+K-1 ) = ONE / AP( KC+K-1 )
+*
+*           Compute column K of the inverse.
+*
+            IF( K.GT.1 ) THEN
+               CALL ZCOPY( K-1, AP( KC ), 1, WORK, 1 )
+               CALL ZSPMV( UPLO, K-1, -ONE, AP, WORK, 1, ZERO, AP( KC ),
+     $                     1 )
+               AP( KC+K-1 ) = AP( KC+K-1 ) -
+     $                        ZDOTU( K-1, WORK, 1, AP( KC ), 1 )
+            END IF
+            KSTEP = 1
+         ELSE
+*
+*           2 x 2 diagonal block
+*
+*           Invert the diagonal block.
+*
+            T = AP( KCNEXT+K-1 )
+            AK = AP( KC+K-1 ) / T
+            AKP1 = AP( KCNEXT+K ) / T
+            AKKP1 = AP( KCNEXT+K-1 ) / T
+            D = T*( AK*AKP1-ONE )
+            AP( KC+K-1 ) = AKP1 / D
+            AP( KCNEXT+K ) = AK / D
+            AP( KCNEXT+K-1 ) = -AKKP1 / D
+*
+*           Compute columns K and K+1 of the inverse.
+*
+            IF( K.GT.1 ) THEN
+               CALL ZCOPY( K-1, AP( KC ), 1, WORK, 1 )
+               CALL ZSPMV( UPLO, K-1, -ONE, AP, WORK, 1, ZERO, AP( KC ),
+     $                     1 )
+               AP( KC+K-1 ) = AP( KC+K-1 ) -
+     $                        ZDOTU( K-1, WORK, 1, AP( KC ), 1 )
+               AP( KCNEXT+K-1 ) = AP( KCNEXT+K-1 ) -
+     $                            ZDOTU( K-1, AP( KC ), 1, AP( KCNEXT ),
+     $                            1 )
+               CALL ZCOPY( K-1, AP( KCNEXT ), 1, WORK, 1 )
+               CALL ZSPMV( UPLO, K-1, -ONE, AP, WORK, 1, ZERO,
+     $                     AP( KCNEXT ), 1 )
+               AP( KCNEXT+K ) = AP( KCNEXT+K ) -
+     $                          ZDOTU( K-1, WORK, 1, AP( KCNEXT ), 1 )
+            END IF
+            KSTEP = 2
+            KCNEXT = KCNEXT + K + 1
+         END IF
+*
+         KP = ABS( IPIV( K ) )
+         IF( KP.NE.K ) THEN
+*
+*           Interchange rows and columns K and KP in the leading
+*           submatrix A(1:k+1,1:k+1)
+*
+            KPC = ( KP-1 )*KP / 2 + 1
+            CALL ZSWAP( KP-1, AP( KC ), 1, AP( KPC ), 1 )
+            KX = KPC + KP - 1
+            DO 40 J = KP + 1, K - 1
+               KX = KX + J - 1
+               TEMP = AP( KC+J-1 )
+               AP( KC+J-1 ) = AP( KX )
+               AP( KX ) = TEMP
+   40       CONTINUE
+            TEMP = AP( KC+K-1 )
+            AP( KC+K-1 ) = AP( KPC+KP-1 )
+            AP( KPC+KP-1 ) = TEMP
+            IF( KSTEP.EQ.2 ) THEN
+               TEMP = AP( KC+K+K-1 )
+               AP( KC+K+K-1 ) = AP( KC+K+KP-1 )
+               AP( KC+K+KP-1 ) = TEMP
+            END IF
+         END IF
+*
+         K = K + KSTEP
+         KC = KCNEXT
+         GO TO 30
+   50    CONTINUE
+*
+      ELSE
+*
+*        Compute inv(A) from the factorization A = L*D*L'.
+*
+*        K is the main loop index, increasing from 1 to N in steps of
+*        1 or 2, depending on the size of the diagonal blocks.
+*
+         NPP = N*( N+1 ) / 2
+         K = N
+         KC = NPP
+   60    CONTINUE
+*
+*        If K < 1, exit from loop.
+*
+         IF( K.LT.1 )
+     $      GO TO 80
+*
+         KCNEXT = KC - ( N-K+2 )
+         IF( IPIV( K ).GT.0 ) THEN
+*
+*           1 x 1 diagonal block
+*
+*           Invert the diagonal block.
+*
+            AP( KC ) = ONE / AP( KC )
+*
+*           Compute column K of the inverse.
+*
+            IF( K.LT.N ) THEN
+               CALL ZCOPY( N-K, AP( KC+1 ), 1, WORK, 1 )
+               CALL ZSPMV( UPLO, N-K, -ONE, AP( KC+N-K+1 ), WORK, 1,
+     $                     ZERO, AP( KC+1 ), 1 )
+               AP( KC ) = AP( KC ) - ZDOTU( N-K, WORK, 1, AP( KC+1 ),
+     $                    1 )
+            END IF
+            KSTEP = 1
+         ELSE
+*
+*           2 x 2 diagonal block
+*
+*           Invert the diagonal block.
+*
+            T = AP( KCNEXT+1 )
+            AK = AP( KCNEXT ) / T
+            AKP1 = AP( KC ) / T
+            AKKP1 = AP( KCNEXT+1 ) / T
+            D = T*( AK*AKP1-ONE )
+            AP( KCNEXT ) = AKP1 / D
+            AP( KC ) = AK / D
+            AP( KCNEXT+1 ) = -AKKP1 / D
+*
+*           Compute columns K-1 and K of the inverse.
+*
+            IF( K.LT.N ) THEN
+               CALL ZCOPY( N-K, AP( KC+1 ), 1, WORK, 1 )
+               CALL ZSPMV( UPLO, N-K, -ONE, AP( KC+( N-K+1 ) ), WORK, 1,
+     $                     ZERO, AP( KC+1 ), 1 )
+               AP( KC ) = AP( KC ) - ZDOTU( N-K, WORK, 1, AP( KC+1 ),
+     $                    1 )
+               AP( KCNEXT+1 ) = AP( KCNEXT+1 ) -
+     $                          ZDOTU( N-K, AP( KC+1 ), 1,
+     $                          AP( KCNEXT+2 ), 1 )
+               CALL ZCOPY( N-K, AP( KCNEXT+2 ), 1, WORK, 1 )
+               CALL ZSPMV( UPLO, N-K, -ONE, AP( KC+( N-K+1 ) ), WORK, 1,
+     $                     ZERO, AP( KCNEXT+2 ), 1 )
+               AP( KCNEXT ) = AP( KCNEXT ) -
+     $                        ZDOTU( N-K, WORK, 1, AP( KCNEXT+2 ), 1 )
+            END IF
+            KSTEP = 2
+            KCNEXT = KCNEXT - ( N-K+3 )
+         END IF
+*
+         KP = ABS( IPIV( K ) )
+         IF( KP.NE.K ) THEN
+*
+*           Interchange rows and columns K and KP in the trailing
+*           submatrix A(k-1:n,k-1:n)
+*
+            KPC = NPP - ( N-KP+1 )*( N-KP+2 ) / 2 + 1
+            IF( KP.LT.N )
+     $         CALL ZSWAP( N-KP, AP( KC+KP-K+1 ), 1, AP( KPC+1 ), 1 )
+            KX = KC + KP - K
+            DO 70 J = K + 1, KP - 1
+               KX = KX + N - J + 1
+               TEMP = AP( KC+J-K )
+               AP( KC+J-K ) = AP( KX )
+               AP( KX ) = TEMP
+   70       CONTINUE
+            TEMP = AP( KC )
+            AP( KC ) = AP( KPC )
+            AP( KPC ) = TEMP
+            IF( KSTEP.EQ.2 ) THEN
+               TEMP = AP( KC-N+K-1 )
+               AP( KC-N+K-1 ) = AP( KC-N+KP-1 )
+               AP( KC-N+KP-1 ) = TEMP
+            END IF
+         END IF
+*
+         K = K - KSTEP
+         KC = KCNEXT
+         GO TO 60
+   80    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of ZSPTRI
+*
+      END
diff --git a/costa/native/external/lapack/zsptrs.f b/costa/native/external/lapack/zsptrs.f
new file mode 100644
index 000000000..3a621a63a
--- /dev/null
+++ b/costa/native/external/lapack/zsptrs.f
@@ -0,0 +1,378 @@
+      SUBROUTINE ZSPTRS( UPLO, N, NRHS, AP, IPIV, B, LDB, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDB, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      COMPLEX*16         AP( * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZSPTRS solves a system of linear equations A*X = B with a complex
+*  symmetric matrix A stored in packed format using the factorization
+*  A = U*D*U**T or A = L*D*L**T computed by ZSPTRF.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the details of the factorization are stored
+*          as an upper or lower triangular matrix.
+*          = 'U':  Upper triangular, form is A = U*D*U**T;
+*          = 'L':  Lower triangular, form is A = L*D*L**T.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  AP      (input) COMPLEX*16 array, dimension (N*(N+1)/2)
+*          The block diagonal matrix D and the multipliers used to
+*          obtain the factor U or L as computed by ZSPTRF, stored as a
+*          packed triangular matrix.
+*
+*  IPIV    (input) INTEGER array, dimension (N)
+*          Details of the interchanges and the block structure of D
+*          as determined by ZSPTRF.
+*
+*  B       (input/output) COMPLEX*16 array, dimension (LDB,NRHS)
+*          On entry, the right hand side matrix B.
+*          On exit, the solution matrix X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX*16         ONE
+      PARAMETER          ( ONE = ( 1.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            J, K, KC, KP
+      COMPLEX*16         AK, AKM1, AKM1K, BK, BKM1, DENOM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZGEMV, ZGERU, ZSCAL, ZSWAP
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZSPTRS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 )
+     $   RETURN
+*
+      IF( UPPER ) THEN
+*
+*        Solve A*X = B, where A = U*D*U'.
+*
+*        First solve U*D*X = B, overwriting B with X.
+*
+*        K is the main loop index, decreasing from N to 1 in steps of
+*        1 or 2, depending on the size of the diagonal blocks.
+*
+         K = N
+         KC = N*( N+1 ) / 2 + 1
+   10    CONTINUE
+*
+*        If K < 1, exit from loop.
+*
+         IF( K.LT.1 )
+     $      GO TO 30
+*
+         KC = KC - K
+         IF( IPIV( K ).GT.0 ) THEN
+*
+*           1 x 1 diagonal block
+*
+*           Interchange rows K and IPIV(K).
+*
+            KP = IPIV( K )
+            IF( KP.NE.K )
+     $         CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB )
+*
+*           Multiply by inv(U(K)), where U(K) is the transformation
+*           stored in column K of A.
+*
+            CALL ZGERU( K-1, NRHS, -ONE, AP( KC ), 1, B( K, 1 ), LDB,
+     $                  B( 1, 1 ), LDB )
+*
+*           Multiply by the inverse of the diagonal block.
+*
+            CALL ZSCAL( NRHS, ONE / AP( KC+K-1 ), B( K, 1 ), LDB )
+            K = K - 1
+         ELSE
+*
+*           2 x 2 diagonal block
+*
+*           Interchange rows K-1 and -IPIV(K).
+*
+            KP = -IPIV( K )
+            IF( KP.NE.K-1 )
+     $         CALL ZSWAP( NRHS, B( K-1, 1 ), LDB, B( KP, 1 ), LDB )
+*
+*           Multiply by inv(U(K)), where U(K) is the transformation
+*           stored in columns K-1 and K of A.
+*
+            CALL ZGERU( K-2, NRHS, -ONE, AP( KC ), 1, B( K, 1 ), LDB,
+     $                  B( 1, 1 ), LDB )
+            CALL ZGERU( K-2, NRHS, -ONE, AP( KC-( K-1 ) ), 1,
+     $                  B( K-1, 1 ), LDB, B( 1, 1 ), LDB )
+*
+*           Multiply by the inverse of the diagonal block.
+*
+            AKM1K = AP( KC+K-2 )
+            AKM1 = AP( KC-1 ) / AKM1K
+            AK = AP( KC+K-1 ) / AKM1K
+            DENOM = AKM1*AK - ONE
+            DO 20 J = 1, NRHS
+               BKM1 = B( K-1, J ) / AKM1K
+               BK = B( K, J ) / AKM1K
+               B( K-1, J ) = ( AK*BKM1-BK ) / DENOM
+               B( K, J ) = ( AKM1*BK-BKM1 ) / DENOM
+   20       CONTINUE
+            KC = KC - K + 1
+            K = K - 2
+         END IF
+*
+         GO TO 10
+   30    CONTINUE
+*
+*        Next solve U'*X = B, overwriting B with X.
+*
+*        K is the main loop index, increasing from 1 to N in steps of
+*        1 or 2, depending on the size of the diagonal blocks.
+*
+         K = 1
+         KC = 1
+   40    CONTINUE
+*
+*        If K > N, exit from loop.
+*
+         IF( K.GT.N )
+     $      GO TO 50
+*
+         IF( IPIV( K ).GT.0 ) THEN
+*
+*           1 x 1 diagonal block
+*
+*           Multiply by inv(U'(K)), where U(K) is the transformation
+*           stored in column K of A.
+*
+            CALL ZGEMV( 'Transpose', K-1, NRHS, -ONE, B, LDB, AP( KC ),
+     $                  1, ONE, B( K, 1 ), LDB )
+*
+*           Interchange rows K and IPIV(K).
+*
+            KP = IPIV( K )
+            IF( KP.NE.K )
+     $         CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB )
+            KC = KC + K
+            K = K + 1
+         ELSE
+*
+*           2 x 2 diagonal block
+*
+*           Multiply by inv(U'(K+1)), where U(K+1) is the transformation
+*           stored in columns K and K+1 of A.
+*
+            CALL ZGEMV( 'Transpose', K-1, NRHS, -ONE, B, LDB, AP( KC ),
+     $                  1, ONE, B( K, 1 ), LDB )
+            CALL ZGEMV( 'Transpose', K-1, NRHS, -ONE, B, LDB,
+     $                  AP( KC+K ), 1, ONE, B( K+1, 1 ), LDB )
+*
+*           Interchange rows K and -IPIV(K).
+*
+            KP = -IPIV( K )
+            IF( KP.NE.K )
+     $         CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB )
+            KC = KC + 2*K + 1
+            K = K + 2
+         END IF
+*
+         GO TO 40
+   50    CONTINUE
+*
+      ELSE
+*
+*        Solve A*X = B, where A = L*D*L'.
+*
+*        First solve L*D*X = B, overwriting B with X.
+*
+*        K is the main loop index, increasing from 1 to N in steps of
+*        1 or 2, depending on the size of the diagonal blocks.
+*
+         K = 1
+         KC = 1
+   60    CONTINUE
+*
+*        If K > N, exit from loop.
+*
+         IF( K.GT.N )
+     $      GO TO 80
+*
+         IF( IPIV( K ).GT.0 ) THEN
+*
+*           1 x 1 diagonal block
+*
+*           Interchange rows K and IPIV(K).
+*
+            KP = IPIV( K )
+            IF( KP.NE.K )
+     $         CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB )
+*
+*           Multiply by inv(L(K)), where L(K) is the transformation
+*           stored in column K of A.
+*
+            IF( K.LT.N )
+     $         CALL ZGERU( N-K, NRHS, -ONE, AP( KC+1 ), 1, B( K, 1 ),
+     $                     LDB, B( K+1, 1 ), LDB )
+*
+*           Multiply by the inverse of the diagonal block.
+*
+            CALL ZSCAL( NRHS, ONE / AP( KC ), B( K, 1 ), LDB )
+            KC = KC + N - K + 1
+            K = K + 1
+         ELSE
+*
+*           2 x 2 diagonal block
+*
+*           Interchange rows K+1 and -IPIV(K).
+*
+            KP = -IPIV( K )
+            IF( KP.NE.K+1 )
+     $         CALL ZSWAP( NRHS, B( K+1, 1 ), LDB, B( KP, 1 ), LDB )
+*
+*           Multiply by inv(L(K)), where L(K) is the transformation
+*           stored in columns K and K+1 of A.
+*
+            IF( K.LT.N-1 ) THEN
+               CALL ZGERU( N-K-1, NRHS, -ONE, AP( KC+2 ), 1, B( K, 1 ),
+     $                     LDB, B( K+2, 1 ), LDB )
+               CALL ZGERU( N-K-1, NRHS, -ONE, AP( KC+N-K+2 ), 1,
+     $                     B( K+1, 1 ), LDB, B( K+2, 1 ), LDB )
+            END IF
+*
+*           Multiply by the inverse of the diagonal block.
+*
+            AKM1K = AP( KC+1 )
+            AKM1 = AP( KC ) / AKM1K
+            AK = AP( KC+N-K+1 ) / AKM1K
+            DENOM = AKM1*AK - ONE
+            DO 70 J = 1, NRHS
+               BKM1 = B( K, J ) / AKM1K
+               BK = B( K+1, J ) / AKM1K
+               B( K, J ) = ( AK*BKM1-BK ) / DENOM
+               B( K+1, J ) = ( AKM1*BK-BKM1 ) / DENOM
+   70       CONTINUE
+            KC = KC + 2*( N-K ) + 1
+            K = K + 2
+         END IF
+*
+         GO TO 60
+   80    CONTINUE
+*
+*        Next solve L'*X = B, overwriting B with X.
+*
+*        K is the main loop index, decreasing from N to 1 in steps of
+*        1 or 2, depending on the size of the diagonal blocks.
+*
+         K = N
+         KC = N*( N+1 ) / 2 + 1
+   90    CONTINUE
+*
+*        If K < 1, exit from loop.
+*
+         IF( K.LT.1 )
+     $      GO TO 100
+*
+         KC = KC - ( N-K+1 )
+         IF( IPIV( K ).GT.0 ) THEN
+*
+*           1 x 1 diagonal block
+*
+*           Multiply by inv(L'(K)), where L(K) is the transformation
+*           stored in column K of A.
+*
+            IF( K.LT.N )
+     $         CALL ZGEMV( 'Transpose', N-K, NRHS, -ONE, B( K+1, 1 ),
+     $                     LDB, AP( KC+1 ), 1, ONE, B( K, 1 ), LDB )
+*
+*           Interchange rows K and IPIV(K).
+*
+            KP = IPIV( K )
+            IF( KP.NE.K )
+     $         CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB )
+            K = K - 1
+         ELSE
+*
+*           2 x 2 diagonal block
+*
+*           Multiply by inv(L'(K-1)), where L(K-1) is the transformation
+*           stored in columns K-1 and K of A.
+*
+            IF( K.LT.N ) THEN
+               CALL ZGEMV( 'Transpose', N-K, NRHS, -ONE, B( K+1, 1 ),
+     $                     LDB, AP( KC+1 ), 1, ONE, B( K, 1 ), LDB )
+               CALL ZGEMV( 'Transpose', N-K, NRHS, -ONE, B( K+1, 1 ),
+     $                     LDB, AP( KC-( N-K ) ), 1, ONE, B( K-1, 1 ),
+     $                     LDB )
+            END IF
+*
+*           Interchange rows K and -IPIV(K).
+*
+            KP = -IPIV( K )
+            IF( KP.NE.K )
+     $         CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB )
+            KC = KC - ( N-K+2 )
+            K = K - 2
+         END IF
+*
+         GO TO 90
+  100    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of ZSPTRS
+*
+      END
diff --git a/costa/native/external/lapack/zstedc.f b/costa/native/external/lapack/zstedc.f
new file mode 100644
index 000000000..c992efd73
--- /dev/null
+++ b/costa/native/external/lapack/zstedc.f
@@ -0,0 +1,390 @@
+      SUBROUTINE ZSTEDC( COMPZ, N, D, E, Z, LDZ, WORK, LWORK, RWORK,
+     $                   LRWORK, IWORK, LIWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          COMPZ
+      INTEGER            INFO, LDZ, LIWORK, LRWORK, LWORK, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      DOUBLE PRECISION   D( * ), E( * ), RWORK( * )
+      COMPLEX*16         WORK( * ), Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZSTEDC computes all eigenvalues and, optionally, eigenvectors of a
+*  symmetric tridiagonal matrix using the divide and conquer method.
+*  The eigenvectors of a full or band complex Hermitian matrix can also
+*  be found if ZHETRD or ZHPTRD or ZHBTRD has been used to reduce this
+*  matrix to tridiagonal form.
+*
+*  This code makes very mild assumptions about floating point
+*  arithmetic. It will work on machines with a guard digit in
+*  add/subtract, or on those binary machines without guard digits
+*  which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2.
+*  It could conceivably fail on hexadecimal or decimal machines
+*  without guard digits, but we know of none.  See DLAED3 for details.
+*
+*  Arguments
+*  =========
+*
+*  COMPZ   (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only.
+*          = 'I':  Compute eigenvectors of tridiagonal matrix also.
+*          = 'V':  Compute eigenvectors of original Hermitian matrix
+*                  also.  On entry, Z contains the unitary matrix used
+*                  to reduce the original matrix to tridiagonal form.
+*
+*  N       (input) INTEGER
+*          The dimension of the symmetric tridiagonal matrix.  N >= 0.
+*
+*  D       (input/output) DOUBLE PRECISION array, dimension (N)
+*          On entry, the diagonal elements of the tridiagonal matrix.
+*          On exit, if INFO = 0, the eigenvalues in ascending order.
+*
+*  E       (input/output) DOUBLE PRECISION array, dimension (N-1)
+*          On entry, the subdiagonal elements of the tridiagonal matrix.
+*          On exit, E has been destroyed.
+*
+*  Z       (input/output) COMPLEX*16 array, dimension (LDZ,N)
+*          On entry, if COMPZ = 'V', then Z contains the unitary
+*          matrix used in the reduction to tridiagonal form.
+*          On exit, if INFO = 0, then if COMPZ = 'V', Z contains the
+*          orthonormal eigenvectors of the original Hermitian matrix,
+*          and if COMPZ = 'I', Z contains the orthonormal eigenvectors
+*          of the symmetric tridiagonal matrix.
+*          If  COMPZ = 'N', then Z is not referenced.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= 1.
+*          If eigenvectors are desired, then LDZ >= max(1,N).
+*
+*  WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.
+*          If COMPZ = 'N' or 'I', or N <= 1, LWORK must be at least 1.
+*          If COMPZ = 'V' and N > 1, LWORK must be at least N*N.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  RWORK   (workspace/output) DOUBLE PRECISION array,
+*                                         dimension (LRWORK)
+*          On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK.
+*
+*  LRWORK  (input) INTEGER
+*          The dimension of the array RWORK.
+*          If COMPZ = 'N' or N <= 1, LRWORK must be at least 1.
+*          If COMPZ = 'V' and N > 1, LRWORK must be at least
+*                         1 + 3*N + 2*N*lg N + 3*N**2 ,
+*                         where lg( N ) = smallest integer k such
+*                         that 2**k >= N.
+*          If COMPZ = 'I' and N > 1, LRWORK must be at least
+*                         1 + 4*N + 2*N**2 .
+*
+*          If LRWORK = -1, then a workspace query is assumed; the
+*          routine only calculates the optimal size of the RWORK array,
+*          returns this value as the first entry of the RWORK array, and
+*          no error message related to LRWORK is issued by XERBLA.
+*
+*  IWORK   (workspace/output) INTEGER array, dimension (LIWORK)
+*          On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK.
+*
+*  LIWORK  (input) INTEGER
+*          The dimension of the array IWORK.
+*          If COMPZ = 'N' or N <= 1, LIWORK must be at least 1.
+*          If COMPZ = 'V' or N > 1,  LIWORK must be at least
+*                                    6 + 6*N + 5*N*lg N.
+*          If COMPZ = 'I' or N > 1,  LIWORK must be at least
+*                                    3 + 5*N .
+*
+*          If LIWORK = -1, then a workspace query is assumed; the
+*          routine only calculates the optimal size of the IWORK array,
+*          returns this value as the first entry of the IWORK array, and
+*          no error message related to LIWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          > 0:  The algorithm failed to compute an eigenvalue while
+*                working on the submatrix lying in rows and columns
+*                INFO/(N+1) through mod(INFO,N+1).
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Jeff Rutter, Computer Science Division, University of California
+*     at Berkeley, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE, TWO
+      PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0, TWO = 2.0D0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            END, I, ICOMPZ, II, J, K, LGN, LIWMIN, LL,
+     $                   LRWMIN, LWMIN, M, SMLSIZ, START
+      DOUBLE PRECISION   EPS, ORGNRM, P, TINY
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      DOUBLE PRECISION   DLAMCH, DLANST
+      EXTERNAL           LSAME, ILAENV, DLAMCH, DLANST
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLASCL, DLASET, DSTEDC, DSTEQR, DSTERF, XERBLA,
+     $                   ZLACPY, ZLACRM, ZLAED0, ZSTEQR, ZSWAP
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, INT, LOG, MAX, MOD, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      LQUERY = ( LWORK.EQ.-1 .OR. LRWORK.EQ.-1 .OR. LIWORK.EQ.-1 )
+*
+      IF( LSAME( COMPZ, 'N' ) ) THEN
+         ICOMPZ = 0
+      ELSE IF( LSAME( COMPZ, 'V' ) ) THEN
+         ICOMPZ = 1
+      ELSE IF( LSAME( COMPZ, 'I' ) ) THEN
+         ICOMPZ = 2
+      ELSE
+         ICOMPZ = -1
+      END IF
+      IF( N.LE.1 .OR. ICOMPZ.LE.0 ) THEN
+         LWMIN = 1
+         LIWMIN = 1
+         LRWMIN = 1
+      ELSE
+         LGN = INT( LOG( DBLE( N ) ) / LOG( TWO ) )
+         IF( 2**LGN.LT.N )
+     $      LGN = LGN + 1
+         IF( 2**LGN.LT.N )
+     $      LGN = LGN + 1
+         IF( ICOMPZ.EQ.1 ) THEN
+            LWMIN = N*N
+            LRWMIN = 1 + 3*N + 2*N*LGN + 3*N**2
+            LIWMIN = 6 + 6*N + 5*N*LGN
+         ELSE IF( ICOMPZ.EQ.2 ) THEN
+            LWMIN = 1
+            LRWMIN = 1 + 4*N + 2*N**2
+            LIWMIN = 3 + 5*N
+         END IF
+      END IF
+      IF( ICOMPZ.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( ( LDZ.LT.1 ) .OR. ( ICOMPZ.GT.0 .AND. LDZ.LT.MAX( 1,
+     $         N ) ) ) THEN
+         INFO = -6
+      ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -8
+      ELSE IF( LRWORK.LT.LRWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -10
+      ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -12
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         WORK( 1 ) = LWMIN
+         RWORK( 1 ) = LRWMIN
+         IWORK( 1 ) = LIWMIN
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZSTEDC', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+      IF( N.EQ.1 ) THEN
+         IF( ICOMPZ.NE.0 )
+     $      Z( 1, 1 ) = ONE
+         RETURN
+      END IF
+*
+      SMLSIZ = ILAENV( 9, 'ZSTEDC', ' ', 0, 0, 0, 0 )
+*
+*     If the following conditional clause is removed, then the routine
+*     will use the Divide and Conquer routine to compute only the
+*     eigenvalues, which requires (3N + 3N**2) real workspace and
+*     (2 + 5N + 2N lg(N)) integer workspace.
+*     Since on many architectures DSTERF is much faster than any other
+*     algorithm for finding eigenvalues only, it is used here
+*     as the default.
+*
+*     If COMPZ = 'N', use DSTERF to compute the eigenvalues.
+*
+      IF( ICOMPZ.EQ.0 ) THEN
+         CALL DSTERF( N, D, E, INFO )
+         RETURN
+      END IF
+*
+*     If N is smaller than the minimum divide size (SMLSIZ+1), then
+*     solve the problem with another solver.
+*
+      IF( N.LE.SMLSIZ ) THEN
+         IF( ICOMPZ.EQ.0 ) THEN
+            CALL DSTERF( N, D, E, INFO )
+            RETURN
+         ELSE IF( ICOMPZ.EQ.2 ) THEN
+            CALL ZSTEQR( 'I', N, D, E, Z, LDZ, RWORK, INFO )
+            RETURN
+         ELSE
+            CALL ZSTEQR( 'V', N, D, E, Z, LDZ, RWORK, INFO )
+            RETURN
+         END IF
+      END IF
+*
+*     If COMPZ = 'I', we simply call DSTEDC instead.
+*
+      IF( ICOMPZ.EQ.2 ) THEN
+         CALL DLASET( 'Full', N, N, ZERO, ONE, RWORK, N )
+         LL = N*N + 1
+         CALL DSTEDC( 'I', N, D, E, RWORK, N, RWORK( LL ), LRWORK-LL+1,
+     $                IWORK, LIWORK, INFO )
+         DO 20 J = 1, N
+            DO 10 I = 1, N
+               Z( I, J ) = RWORK( ( J-1 )*N+I )
+   10       CONTINUE
+   20    CONTINUE
+         RETURN
+      END IF
+*
+*     From now on, only option left to be handled is COMPZ = 'V',
+*     i.e. ICOMPZ = 1.
+*
+*     Scale.
+*
+      ORGNRM = DLANST( 'M', N, D, E )
+      IF( ORGNRM.EQ.ZERO )
+     $   RETURN
+*
+      EPS = DLAMCH( 'Epsilon' )
+*
+      START = 1
+*
+*     while ( START <= N )
+*
+   30 CONTINUE
+      IF( START.LE.N ) THEN
+*
+*     Let END be the position of the next subdiagonal entry such that
+*     E( END ) <= TINY or END = N if no such subdiagonal exists.  The
+*     matrix identified by the elements between START and END
+*     constitutes an independent sub-problem.
+*
+         END = START
+   40    CONTINUE
+         IF( END.LT.N ) THEN
+            TINY = EPS*SQRT( ABS( D( END ) ) )*SQRT( ABS( D( END+1 ) ) )
+            IF( ABS( E( END ) ).GT.TINY ) THEN
+               END = END + 1
+               GO TO 40
+            END IF
+         END IF
+*
+*        (Sub) Problem determined.  Compute its size and solve it.
+*
+         M = END - START + 1
+         IF( M.GT.SMLSIZ ) THEN
+            INFO = SMLSIZ
+*
+*           Scale.
+*
+            ORGNRM = DLANST( 'M', M, D( START ), E( START ) )
+            CALL DLASCL( 'G', 0, 0, ORGNRM, ONE, M, 1, D( START ), M,
+     $                   INFO )
+            CALL DLASCL( 'G', 0, 0, ORGNRM, ONE, M-1, 1, E( START ),
+     $                   M-1, INFO )
+*
+            CALL ZLAED0( N, M, D( START ), E( START ), Z( 1, START ),
+     $                   LDZ, WORK, N, RWORK, IWORK, INFO )
+            IF( INFO.GT.0 ) THEN
+               INFO = ( INFO / ( M+1 )+START-1 )*( N+1 ) +
+     $                MOD( INFO, ( M+1 ) ) + START - 1
+               RETURN
+            END IF
+*
+*           Scale back.
+*
+            CALL DLASCL( 'G', 0, 0, ONE, ORGNRM, M, 1, D( START ), M,
+     $                   INFO )
+*
+         ELSE
+            CALL DSTEQR( 'I', M, D( START ), E( START ), RWORK, M,
+     $                   RWORK( M*M+1 ), INFO )
+            CALL ZLACRM( N, M, Z( 1, START ), LDZ, RWORK, M, WORK, N,
+     $                   RWORK( M*M+1 ) )
+            CALL ZLACPY( 'A', N, M, WORK, N, Z( 1, START ), LDZ )
+            IF( INFO.GT.0 ) THEN
+               INFO = START*( N+1 ) + END
+               RETURN
+            END IF
+         END IF
+*
+         START = END + 1
+         GO TO 30
+      END IF
+*
+*     endwhile
+*
+*     If the problem split any number of times, then the eigenvalues
+*     will not be properly ordered.  Here we permute the eigenvalues
+*     (and the associated eigenvectors) into ascending order.
+*
+      IF( M.NE.N ) THEN
+*
+*        Use Selection Sort to minimize swaps of eigenvectors
+*
+         DO 60 II = 2, N
+            I = II - 1
+            K = I
+            P = D( I )
+            DO 50 J = II, N
+               IF( D( J ).LT.P ) THEN
+                  K = J
+                  P = D( J )
+               END IF
+   50       CONTINUE
+            IF( K.NE.I ) THEN
+               D( K ) = D( I )
+               D( I ) = P
+               CALL ZSWAP( N, Z( 1, I ), 1, Z( 1, K ), 1 )
+            END IF
+   60    CONTINUE
+      END IF
+*
+      WORK( 1 ) = LWMIN
+      RWORK( 1 ) = LRWMIN
+      IWORK( 1 ) = LIWMIN
+*
+      RETURN
+*
+*     End of ZSTEDC
+*
+      END
diff --git a/costa/native/external/lapack/zstegr.f b/costa/native/external/lapack/zstegr.f
new file mode 100644
index 000000000..5188f76a0
--- /dev/null
+++ b/costa/native/external/lapack/zstegr.f
@@ -0,0 +1,405 @@
+      SUBROUTINE ZSTEGR( JOBZ, RANGE, N, D, E, VL, VU, IL, IU, ABSTOL,
+     $                   M, W, Z, LDZ, ISUPPZ, WORK, LWORK, IWORK,
+     $                   LIWORK, INFO )
+*
+*  -- LAPACK computational routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBZ, RANGE
+      INTEGER            IL, INFO, IU, LDZ, LIWORK, LWORK, M, N
+      DOUBLE PRECISION   ABSTOL, VL, VU
+*     ..
+*     .. Array Arguments ..
+      INTEGER            ISUPPZ( * ), IWORK( * )
+      DOUBLE PRECISION   D( * ), E( * ), W( * ), WORK( * )
+      COMPLEX*16         Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+* ZSTEGR computes selected eigenvalues and, optionally, eigenvectors
+* of a real symmetric tridiagonal matrix T.  Eigenvalues and
+* eigenvectors can be selected by specifying either a range of values
+* or a range of indices for the desired eigenvalues. The eigenvalues
+* are computed by the dqds algorithm, while orthogonal eigenvectors are
+* computed from various ``good'' L D L^T representations (also known as
+* Relatively Robust Representations). Gram-Schmidt orthogonalization is
+* avoided as far as possible. More specifically, the various steps of
+* the algorithm are as follows. For the i-th unreduced block of T,
+*     (a) Compute T - sigma_i = L_i D_i L_i^T, such that L_i D_i L_i^T
+*         is a relatively robust representation,
+*     (b) Compute the eigenvalues, lambda_j, of L_i D_i L_i^T to high
+*         relative accuracy by the dqds algorithm,
+*     (c) If there is a cluster of close eigenvalues, "choose" sigma_i
+*         close to the cluster, and go to step (a),
+*     (d) Given the approximate eigenvalue lambda_j of L_i D_i L_i^T,
+*         compute the corresponding eigenvector by forming a
+*         rank-revealing twisted factorization.
+*  The desired accuracy of the output can be specified by the input
+*  parameter ABSTOL.
+*
+*  For more details, see "A new O(n^2) algorithm for the symmetric
+*  tridiagonal eigenvalue/eigenvector problem", by Inderjit Dhillon,
+*  Computer Science Division Technical Report No. UCB/CSD-97-971,
+*  UC Berkeley, May 1997.
+*
+*  Note 1 : Currently ZSTEGR is only set up to find ALL the n
+*  eigenvalues and eigenvectors of T in O(n^2) time
+*  Note 2 : Currently the routine ZSTEIN is called when an appropriate
+*  sigma_i cannot be chosen in step (c) above. ZSTEIN invokes modified
+*  Gram-Schmidt when eigenvalues are close.
+*  Note 3 : ZSTEGR works only on machines which follow ieee-754
+*  floating-point standard in their handling of infinities and NaNs.
+*  Normal execution of ZSTEGR may create NaNs and infinities and hence
+*  may abort due to a floating point exception in environments which
+*  do not conform to the ieee standard.
+*
+*  Arguments
+*  =========
+*
+*  JOBZ    (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only;
+*          = 'V':  Compute eigenvalues and eigenvectors.
+*
+*  RANGE   (input) CHARACTER*1
+*          = 'A': all eigenvalues will be found.
+*          = 'V': all eigenvalues in the half-open interval (VL,VU]
+*                 will be found.
+*          = 'I': the IL-th through IU-th eigenvalues will be found.
+********** Only RANGE = 'A' is currently supported *********************
+*
+*  N       (input) INTEGER
+*          The order of the matrix.  N >= 0.
+*
+*  D       (input/output) DOUBLE PRECISION array, dimension (N)
+*          On entry, the n diagonal elements of the tridiagonal matrix
+*          T. On exit, D is overwritten.
+*
+*  E       (input/output) DOUBLE PRECISION array, dimension (N)
+*          On entry, the (n-1) subdiagonal elements of the tridiagonal
+*          matrix T in elements 1 to N-1 of E; E(N) need not be set.
+*          On exit, E is overwritten.
+*
+*  VL      (input) DOUBLE PRECISION
+*  VU      (input) DOUBLE PRECISION
+*          If RANGE='V', the lower and upper bounds of the interval to
+*          be searched for eigenvalues. VL < VU.
+*          Not referenced if RANGE = 'A' or 'I'.
+*
+*  IL      (input) INTEGER
+*  IU      (input) INTEGER
+*          If RANGE='I', the indices (in ascending order) of the
+*          smallest and largest eigenvalues to be returned.
+*          1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0.
+*          Not referenced if RANGE = 'A' or 'V'.
+*
+*  ABSTOL  (input) DOUBLE PRECISION
+*          The absolute error tolerance for the
+*          eigenvalues/eigenvectors. IF JOBZ = 'V', the eigenvalues and
+*          eigenvectors output have residual norms bounded by ABSTOL,
+*          and the dot products between different eigenvectors are
+*          bounded by ABSTOL. If ABSTOL is less than N*EPS*|T|, then
+*          N*EPS*|T| will be used in its place, where EPS is the
+*          machine precision and |T| is the 1-norm of the tridiagonal
+*          matrix. The eigenvalues are computed to an accuracy of
+*          EPS*|T| irrespective of ABSTOL. If high relative accuracy
+*          is important, set ABSTOL to DLAMCH( 'Safe minimum' ).
+*          See Barlow and Demmel "Computing Accurate Eigensystems of
+*          Scaled Diagonally Dominant Matrices", LAPACK Working Note #7
+*          for a discussion of which matrices define their eigenvalues
+*          to high relative accuracy.
+*
+*  M       (output) INTEGER
+*          The total number of eigenvalues found.  0 <= M <= N.
+*          If RANGE = 'A', M = N, and if RANGE = 'I', M = IU-IL+1.
+*
+*  W       (output) DOUBLE PRECISION array, dimension (N)
+*          The first M elements contain the selected eigenvalues in
+*          ascending order.
+*
+*  Z       (output) COMPLEX*16 array, dimension (LDZ, max(1,M) )
+*          If JOBZ = 'V', then if INFO = 0, the first M columns of Z
+*          contain the orthonormal eigenvectors of the matrix T
+*          corresponding to the selected eigenvalues, with the i-th
+*          column of Z holding the eigenvector associated with W(i).
+*          If JOBZ = 'N', then Z is not referenced.
+*          Note: the user must ensure that at least max(1,M) columns are
+*          supplied in the array Z; if RANGE = 'V', the exact value of M
+*          is not known in advance and an upper bound must be used.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= 1, and if
+*          JOBZ = 'V', LDZ >= max(1,N).
+*
+*  ISUPPZ  (output) INTEGER ARRAY, dimension ( 2*max(1,M) )
+*          The support of the eigenvectors in Z, i.e., the indices
+*          indicating the nonzero elements in Z. The i-th eigenvector
+*          is nonzero only in elements ISUPPZ( 2*i-1 ) through
+*          ISUPPZ( 2*i ).
+*
+*  WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal
+*          (and minimal) LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.  LWORK >= max(1,18*N)
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  IWORK   (workspace/output) INTEGER array, dimension (LIWORK)
+*          On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK.
+*
+*  LIWORK  (input) INTEGER
+*          The dimension of the array IWORK.  LIWORK >= max(1,10*N)
+*
+*          If LIWORK = -1, then a workspace query is assumed; the
+*          routine only calculates the optimal size of the IWORK array,
+*          returns this value as the first entry of the IWORK array, and
+*          no error message related to LIWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = 1, internal error in DLARRE,
+*                if INFO = 2, internal error in ZLARRV.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Inderjit Dhillon, IBM Almaden, USA
+*     Osni Marques, LBNL/NERSC, USA
+*     Ken Stanley, Computer Science Division, University of
+*       California at Berkeley, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0 )
+      COMPLEX*16         CZERO
+      PARAMETER          ( CZERO = ( 0.0D0, 0.0D0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            ALLEIG, INDEIG, LQUERY, VALEIG, WANTZ
+      INTEGER            I, IBEGIN, IEND, IINDBL, IINDWK, IINFO, IINSPL,
+     $                   INDGRS, INDWOF, INDWRK, ITMP, J, JJ, LIWMIN,
+     $                   LWMIN, NSPLIT
+      DOUBLE PRECISION   BIGNUM, EPS, RMAX, RMIN, SAFMIN, SCALE, SMLNUM,
+     $                   THRESH, TMP, TNRM, TOL
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DLAMCH, DLANST
+      EXTERNAL           LSAME, DLAMCH, DLANST
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLARRE, DSCAL, XERBLA, ZLARRV, ZLASET, ZSWAP
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          DBLE, MAX, MIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      WANTZ = LSAME( JOBZ, 'V' )
+      ALLEIG = LSAME( RANGE, 'A' )
+      VALEIG = LSAME( RANGE, 'V' )
+      INDEIG = LSAME( RANGE, 'I' )
+*
+      LQUERY = ( ( LWORK.EQ.-1 ) .OR. ( LIWORK.EQ.-1 ) )
+      LWMIN = 18*N
+      LIWMIN = 10*N
+*
+      INFO = 0
+      IF( .NOT.( WANTZ .OR. LSAME( JOBZ, 'N' ) ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( ALLEIG .OR. VALEIG .OR. INDEIG ) ) THEN
+         INFO = -2
+*
+*     The following two lines need to be removed once the
+*     RANGE = 'V' and RANGE = 'I' options are provided.
+*
+      ELSE IF( VALEIG .OR. INDEIG ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( VALEIG .AND. N.GT.0 .AND. VU.LE.VL ) THEN
+         INFO = -7
+      ELSE IF( INDEIG .AND. IL.LT.1 ) THEN
+         INFO = -8
+*     The following change should be made in DSTEVX also, otherwise
+*     IL can be specified as N+1 and IU as N.
+*     ELSE IF( INDEIG .AND. ( IU.LT.MIN( N, IL ) .OR. IU.GT.N ) ) THEN
+      ELSE IF( INDEIG .AND. ( IU.LT.IL .OR. IU.GT.N ) ) THEN
+         INFO = -9
+      ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN
+         INFO = -14
+      ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -17
+      ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -19
+      END IF
+      IF( INFO.EQ.0 ) THEN
+         WORK( 1 ) = LWMIN
+         IWORK( 1 ) = LIWMIN
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZSTEGR', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      M = 0
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      IF( N.EQ.1 ) THEN
+         IF( ALLEIG .OR. INDEIG ) THEN
+            M = 1
+            W( 1 ) = D( 1 )
+         ELSE
+            IF( VL.LT.D( 1 ) .AND. VU.GE.D( 1 ) ) THEN
+               M = 1
+               W( 1 ) = D( 1 )
+            END IF
+         END IF
+         IF( WANTZ )
+     $      Z( 1, 1 ) = ONE
+         RETURN
+      END IF
+*
+*     Get machine constants.
+*
+      SAFMIN = DLAMCH( 'Safe minimum' )
+      EPS = DLAMCH( 'Precision' )
+      SMLNUM = SAFMIN / EPS
+      BIGNUM = ONE / SMLNUM
+      RMIN = SQRT( SMLNUM )
+      RMAX = MIN( SQRT( BIGNUM ), ONE / SQRT( SQRT( SAFMIN ) ) )
+*
+*     Scale matrix to allowable range, if necessary.
+*
+      SCALE = ONE
+      TNRM = DLANST( 'M', N, D, E )
+      IF( TNRM.GT.ZERO .AND. TNRM.LT.RMIN ) THEN
+         SCALE = RMIN / TNRM
+      ELSE IF( TNRM.GT.RMAX ) THEN
+         SCALE = RMAX / TNRM
+      END IF
+      IF( SCALE.NE.ONE ) THEN
+         CALL DSCAL( N, SCALE, D, 1 )
+         CALL DSCAL( N-1, SCALE, E, 1 )
+         TNRM = TNRM*SCALE
+      END IF
+      INDGRS = 1
+      INDWOF = 2*N + 1
+      INDWRK = 3*N + 1
+*
+      IINSPL = 1
+      IINDBL = N + 1
+      IINDWK = 2*N + 1
+*
+      CALL ZLASET( 'Full', N, N, CZERO, CZERO, Z, LDZ )
+*
+*     Compute the desired eigenvalues of the tridiagonal after splitting
+*     into smaller subblocks if the corresponding of-diagonal elements
+*     are small
+*
+      THRESH = EPS*TNRM
+      CALL DLARRE( N, D, E, THRESH, NSPLIT, IWORK( IINSPL ), M, W,
+     $             WORK( INDWOF ), WORK( INDGRS ), WORK( INDWRK ),
+     $             IINFO )
+      IF( IINFO.NE.0 ) THEN
+         INFO = 1
+         RETURN
+      END IF
+*
+      IF( WANTZ ) THEN
+*
+*        Compute the desired eigenvectors corresponding to the computed
+*        eigenvalues
+*
+         TOL = MAX( ABSTOL, DBLE( N )*THRESH )
+         IBEGIN = 1
+         DO 20 I = 1, NSPLIT
+            IEND = IWORK( IINSPL+I-1 )
+            DO 10 J = IBEGIN, IEND
+               IWORK( IINDBL+J-1 ) = I
+   10       CONTINUE
+            IBEGIN = IEND + 1
+   20    CONTINUE
+*
+         CALL ZLARRV( N, D, E, IWORK( IINSPL ), M, W, IWORK( IINDBL ),
+     $                WORK( INDGRS ), TOL, Z, LDZ, ISUPPZ,
+     $                WORK( INDWRK ), IWORK( IINDWK ), IINFO )
+         IF( IINFO.NE.0 ) THEN
+            INFO = 2
+            RETURN
+         END IF
+*
+      END IF
+*
+      IBEGIN = 1
+      DO 40 I = 1, NSPLIT
+         IEND = IWORK( IINSPL+I-1 )
+         DO 30 J = IBEGIN, IEND
+            W( J ) = W( J ) + WORK( INDWOF+I-1 )
+   30    CONTINUE
+         IBEGIN = IEND + 1
+   40 CONTINUE
+*
+*     If matrix was scaled, then rescale eigenvalues appropriately.
+*
+      IF( SCALE.NE.ONE ) THEN
+         CALL DSCAL( M, ONE / SCALE, W, 1 )
+      END IF
+*
+*     If eigenvalues are not in order, then sort them, along with
+*     eigenvectors.
+*
+      IF( NSPLIT.GT.1 ) THEN
+         DO 60 J = 1, M - 1
+            I = 0
+            TMP = W( J )
+            DO 50 JJ = J + 1, M
+               IF( W( JJ ).LT.TMP ) THEN
+                  I = JJ
+                  TMP = W( JJ )
+               END IF
+   50       CONTINUE
+            IF( I.NE.0 ) THEN
+               W( I ) = W( J )
+               W( J ) = TMP
+               IF( WANTZ ) THEN
+                  CALL ZSWAP( N, Z( 1, I ), 1, Z( 1, J ), 1 )
+                  ITMP = ISUPPZ( 2*I-1 )
+                  ISUPPZ( 2*I-1 ) = ISUPPZ( 2*J-1 )
+                  ISUPPZ( 2*J-1 ) = ITMP
+                  ITMP = ISUPPZ( 2*I )
+                  ISUPPZ( 2*I ) = ISUPPZ( 2*J )
+                  ISUPPZ( 2*J ) = ITMP
+               END IF
+            END IF
+   60    CONTINUE
+      END IF
+*
+      WORK( 1 ) = LWMIN
+      IWORK( 1 ) = LIWMIN
+      RETURN
+*
+*     End of ZSTEGR
+*
+      END
diff --git a/costa/native/external/lapack/zstein.f b/costa/native/external/lapack/zstein.f
new file mode 100644
index 000000000..e921df06e
--- /dev/null
+++ b/costa/native/external/lapack/zstein.f
@@ -0,0 +1,377 @@
+      SUBROUTINE ZSTEIN( N, D, E, M, W, IBLOCK, ISPLIT, Z, LDZ, WORK,
+     $                   IWORK, IFAIL, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDZ, M, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IBLOCK( * ), IFAIL( * ), ISPLIT( * ),
+     $                   IWORK( * )
+      DOUBLE PRECISION   D( * ), E( * ), W( * ), WORK( * )
+      COMPLEX*16         Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZSTEIN computes the eigenvectors of a real symmetric tridiagonal
+*  matrix T corresponding to specified eigenvalues, using inverse
+*  iteration.
+*
+*  The maximum number of iterations allowed for each eigenvector is
+*  specified by an internal parameter MAXITS (currently set to 5).
+*
+*  Although the eigenvectors are real, they are stored in a complex
+*  array, which may be passed to ZUNMTR or ZUPMTR for back
+*  transformation to the eigenvectors of a complex Hermitian matrix
+*  which was reduced to tridiagonal form.
+*
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The order of the matrix.  N >= 0.
+*
+*  D       (input) DOUBLE PRECISION array, dimension (N)
+*          The n diagonal elements of the tridiagonal matrix T.
+*
+*  E       (input) DOUBLE PRECISION array, dimension (N)
+*          The (n-1) subdiagonal elements of the tridiagonal matrix
+*          T, stored in elements 1 to N-1; E(N) need not be set.
+*
+*  M       (input) INTEGER
+*          The number of eigenvectors to be found.  0 <= M <= N.
+*
+*  W       (input) DOUBLE PRECISION array, dimension (N)
+*          The first M elements of W contain the eigenvalues for
+*          which eigenvectors are to be computed.  The eigenvalues
+*          should be grouped by split-off block and ordered from
+*          smallest to largest within the block.  ( The output array
+*          W from DSTEBZ with ORDER = 'B' is expected here. )
+*
+*  IBLOCK  (input) INTEGER array, dimension (N)
+*          The submatrix indices associated with the corresponding
+*          eigenvalues in W; IBLOCK(i)=1 if eigenvalue W(i) belongs to
+*          the first submatrix from the top, =2 if W(i) belongs to
+*          the second submatrix, etc.  ( The output array IBLOCK
+*          from DSTEBZ is expected here. )
+*
+*  ISPLIT  (input) INTEGER array, dimension (N)
+*          The splitting points, at which T breaks up into submatrices.
+*          The first submatrix consists of rows/columns 1 to
+*          ISPLIT( 1 ), the second of rows/columns ISPLIT( 1 )+1
+*          through ISPLIT( 2 ), etc.
+*          ( The output array ISPLIT from DSTEBZ is expected here. )
+*
+*  Z       (output) COMPLEX*16 array, dimension (LDZ, M)
+*          The computed eigenvectors.  The eigenvector associated
+*          with the eigenvalue W(i) is stored in the i-th column of
+*          Z.  Any vector which fails to converge is set to its current
+*          iterate after MAXITS iterations.
+*          The imaginary parts of the eigenvectors are set to zero.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= max(1,N).
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (5*N)
+*
+*  IWORK   (workspace) INTEGER array, dimension (N)
+*
+*  IFAIL   (output) INTEGER array, dimension (M)
+*          On normal exit, all elements of IFAIL are zero.
+*          If one or more eigenvectors fail to converge after
+*          MAXITS iterations, then their indices are stored in
+*          array IFAIL.
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*          > 0: if INFO = i, then i eigenvectors failed to converge
+*               in MAXITS iterations.  Their indices are stored in
+*               array IFAIL.
+*
+*  Internal Parameters
+*  ===================
+*
+*  MAXITS  INTEGER, default = 5
+*          The maximum number of iterations performed.
+*
+*  EXTRA   INTEGER, default = 2
+*          The number of iterations performed after norm growth
+*          criterion is satisfied, should be at least 1.
+*
+* =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX*16         CZERO, CONE
+      PARAMETER          ( CZERO = ( 0.0D+0, 0.0D+0 ),
+     $                   CONE = ( 1.0D+0, 0.0D+0 ) )
+      DOUBLE PRECISION   ZERO, ONE, TEN, ODM3, ODM1
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0, TEN = 1.0D+1,
+     $                   ODM3 = 1.0D-3, ODM1 = 1.0D-1 )
+      INTEGER            MAXITS, EXTRA
+      PARAMETER          ( MAXITS = 5, EXTRA = 2 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            B1, BLKSIZ, BN, GPIND, I, IINFO, INDRV1,
+     $                   INDRV2, INDRV3, INDRV4, INDRV5, ITS, J, J1,
+     $                   JBLK, JMAX, JR, NBLK, NRMCHK
+      DOUBLE PRECISION   DTPCRT, EPS, EPS1, NRM, ONENRM, ORTOL, PERTOL,
+     $                   SCL, SEP, TOL, XJ, XJM, ZTR
+*     ..
+*     .. Local Arrays ..
+      INTEGER            ISEED( 4 )
+*     ..
+*     .. External Functions ..
+      INTEGER            IDAMAX
+      DOUBLE PRECISION   DASUM, DLAMCH, DNRM2
+      EXTERNAL           IDAMAX, DASUM, DLAMCH, DNRM2
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DCOPY, DLAGTF, DLAGTS, DLARNV, DSCAL, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, DCMPLX, MAX, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      DO 10 I = 1, M
+         IFAIL( I ) = 0
+   10 CONTINUE
+*
+      IF( N.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( M.LT.0 .OR. M.GT.N ) THEN
+         INFO = -4
+      ELSE IF( LDZ.LT.MAX( 1, N ) ) THEN
+         INFO = -9
+      ELSE
+         DO 20 J = 2, M
+            IF( IBLOCK( J ).LT.IBLOCK( J-1 ) ) THEN
+               INFO = -6
+               GO TO 30
+            END IF
+            IF( IBLOCK( J ).EQ.IBLOCK( J-1 ) .AND. W( J ).LT.W( J-1 ) )
+     $           THEN
+               INFO = -5
+               GO TO 30
+            END IF
+   20    CONTINUE
+   30    CONTINUE
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZSTEIN', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. M.EQ.0 ) THEN
+         RETURN
+      ELSE IF( N.EQ.1 ) THEN
+         Z( 1, 1 ) = CONE
+         RETURN
+      END IF
+*
+*     Get machine constants.
+*
+      EPS = DLAMCH( 'Precision' )
+*
+*     Initialize seed for random number generator DLARNV.
+*
+      DO 40 I = 1, 4
+         ISEED( I ) = 1
+   40 CONTINUE
+*
+*     Initialize pointers.
+*
+      INDRV1 = 0
+      INDRV2 = INDRV1 + N
+      INDRV3 = INDRV2 + N
+      INDRV4 = INDRV3 + N
+      INDRV5 = INDRV4 + N
+*
+*     Compute eigenvectors of matrix blocks.
+*
+      J1 = 1
+      DO 180 NBLK = 1, IBLOCK( M )
+*
+*        Find starting and ending indices of block nblk.
+*
+         IF( NBLK.EQ.1 ) THEN
+            B1 = 1
+         ELSE
+            B1 = ISPLIT( NBLK-1 ) + 1
+         END IF
+         BN = ISPLIT( NBLK )
+         BLKSIZ = BN - B1 + 1
+         IF( BLKSIZ.EQ.1 )
+     $      GO TO 60
+         GPIND = B1
+*
+*        Compute reorthogonalization criterion and stopping criterion.
+*
+         ONENRM = ABS( D( B1 ) ) + ABS( E( B1 ) )
+         ONENRM = MAX( ONENRM, ABS( D( BN ) )+ABS( E( BN-1 ) ) )
+         DO 50 I = B1 + 1, BN - 1
+            ONENRM = MAX( ONENRM, ABS( D( I ) )+ABS( E( I-1 ) )+
+     $               ABS( E( I ) ) )
+   50    CONTINUE
+         ORTOL = ODM3*ONENRM
+*
+         DTPCRT = SQRT( ODM1 / BLKSIZ )
+*
+*        Loop through eigenvalues of block nblk.
+*
+   60    CONTINUE
+         JBLK = 0
+         DO 170 J = J1, M
+            IF( IBLOCK( J ).NE.NBLK ) THEN
+               J1 = J
+               GO TO 180
+            END IF
+            JBLK = JBLK + 1
+            XJ = W( J )
+*
+*           Skip all the work if the block size is one.
+*
+            IF( BLKSIZ.EQ.1 ) THEN
+               WORK( INDRV1+1 ) = ONE
+               GO TO 140
+            END IF
+*
+*           If eigenvalues j and j-1 are too close, add a relatively
+*           small perturbation.
+*
+            IF( JBLK.GT.1 ) THEN
+               EPS1 = ABS( EPS*XJ )
+               PERTOL = TEN*EPS1
+               SEP = XJ - XJM
+               IF( SEP.LT.PERTOL )
+     $            XJ = XJM + PERTOL
+            END IF
+*
+            ITS = 0
+            NRMCHK = 0
+*
+*           Get random starting vector.
+*
+            CALL DLARNV( 2, ISEED, BLKSIZ, WORK( INDRV1+1 ) )
+*
+*           Copy the matrix T so it won't be destroyed in factorization.
+*
+            CALL DCOPY( BLKSIZ, D( B1 ), 1, WORK( INDRV4+1 ), 1 )
+            CALL DCOPY( BLKSIZ-1, E( B1 ), 1, WORK( INDRV2+2 ), 1 )
+            CALL DCOPY( BLKSIZ-1, E( B1 ), 1, WORK( INDRV3+1 ), 1 )
+*
+*           Compute LU factors with partial pivoting  ( PT = LU )
+*
+            TOL = ZERO
+            CALL DLAGTF( BLKSIZ, WORK( INDRV4+1 ), XJ, WORK( INDRV2+2 ),
+     $                   WORK( INDRV3+1 ), TOL, WORK( INDRV5+1 ), IWORK,
+     $                   IINFO )
+*
+*           Update iteration count.
+*
+   70       CONTINUE
+            ITS = ITS + 1
+            IF( ITS.GT.MAXITS )
+     $         GO TO 120
+*
+*           Normalize and scale the righthand side vector Pb.
+*
+            SCL = BLKSIZ*ONENRM*MAX( EPS,
+     $            ABS( WORK( INDRV4+BLKSIZ ) ) ) /
+     $            DASUM( BLKSIZ, WORK( INDRV1+1 ), 1 )
+            CALL DSCAL( BLKSIZ, SCL, WORK( INDRV1+1 ), 1 )
+*
+*           Solve the system LU = Pb.
+*
+            CALL DLAGTS( -1, BLKSIZ, WORK( INDRV4+1 ), WORK( INDRV2+2 ),
+     $                   WORK( INDRV3+1 ), WORK( INDRV5+1 ), IWORK,
+     $                   WORK( INDRV1+1 ), TOL, IINFO )
+*
+*           Reorthogonalize by modified Gram-Schmidt if eigenvalues are
+*           close enough.
+*
+            IF( JBLK.EQ.1 )
+     $         GO TO 110
+            IF( ABS( XJ-XJM ).GT.ORTOL )
+     $         GPIND = J
+            IF( GPIND.NE.J ) THEN
+               DO 100 I = GPIND, J - 1
+                  ZTR = ZERO
+                  DO 80 JR = 1, BLKSIZ
+                     ZTR = ZTR + WORK( INDRV1+JR )*
+     $                     DBLE( Z( B1-1+JR, I ) )
+   80             CONTINUE
+                  DO 90 JR = 1, BLKSIZ
+                     WORK( INDRV1+JR ) = WORK( INDRV1+JR ) -
+     $                                   ZTR*DBLE( Z( B1-1+JR, I ) )
+   90             CONTINUE
+  100          CONTINUE
+            END IF
+*
+*           Check the infinity norm of the iterate.
+*
+  110       CONTINUE
+            JMAX = IDAMAX( BLKSIZ, WORK( INDRV1+1 ), 1 )
+            NRM = ABS( WORK( INDRV1+JMAX ) )
+*
+*           Continue for additional iterations after norm reaches
+*           stopping criterion.
+*
+            IF( NRM.LT.DTPCRT )
+     $         GO TO 70
+            NRMCHK = NRMCHK + 1
+            IF( NRMCHK.LT.EXTRA+1 )
+     $         GO TO 70
+*
+            GO TO 130
+*
+*           If stopping criterion was not satisfied, update info and
+*           store eigenvector number in array ifail.
+*
+  120       CONTINUE
+            INFO = INFO + 1
+            IFAIL( INFO ) = J
+*
+*           Accept iterate as jth eigenvector.
+*
+  130       CONTINUE
+            SCL = ONE / DNRM2( BLKSIZ, WORK( INDRV1+1 ), 1 )
+            JMAX = IDAMAX( BLKSIZ, WORK( INDRV1+1 ), 1 )
+            IF( WORK( INDRV1+JMAX ).LT.ZERO )
+     $         SCL = -SCL
+            CALL DSCAL( BLKSIZ, SCL, WORK( INDRV1+1 ), 1 )
+  140       CONTINUE
+            DO 150 I = 1, N
+               Z( I, J ) = CZERO
+  150       CONTINUE
+            DO 160 I = 1, BLKSIZ
+               Z( B1+I-1, J ) = DCMPLX( WORK( INDRV1+I ), ZERO )
+  160       CONTINUE
+*
+*           Save the shift to check eigenvalue spacing at next
+*           iteration.
+*
+            XJM = XJ
+*
+  170    CONTINUE
+  180 CONTINUE
+*
+      RETURN
+*
+*     End of ZSTEIN
+*
+      END
diff --git a/costa/native/external/lapack/zsteqr.f b/costa/native/external/lapack/zsteqr.f
new file mode 100644
index 000000000..8dc609e4e
--- /dev/null
+++ b/costa/native/external/lapack/zsteqr.f
@@ -0,0 +1,504 @@
+      SUBROUTINE ZSTEQR( COMPZ, N, D, E, Z, LDZ, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          COMPZ
+      INTEGER            INFO, LDZ, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   D( * ), E( * ), WORK( * )
+      COMPLEX*16         Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZSTEQR computes all eigenvalues and, optionally, eigenvectors of a
+*  symmetric tridiagonal matrix using the implicit QL or QR method.
+*  The eigenvectors of a full or band complex Hermitian matrix can also
+*  be found if ZHETRD or ZHPTRD or ZHBTRD has been used to reduce this
+*  matrix to tridiagonal form.
+*
+*  Arguments
+*  =========
+*
+*  COMPZ   (input) CHARACTER*1
+*          = 'N':  Compute eigenvalues only.
+*          = 'V':  Compute eigenvalues and eigenvectors of the original
+*                  Hermitian matrix.  On entry, Z must contain the
+*                  unitary matrix used to reduce the original matrix
+*                  to tridiagonal form.
+*          = 'I':  Compute eigenvalues and eigenvectors of the
+*                  tridiagonal matrix.  Z is initialized to the identity
+*                  matrix.
+*
+*  N       (input) INTEGER
+*          The order of the matrix.  N >= 0.
+*
+*  D       (input/output) DOUBLE PRECISION array, dimension (N)
+*          On entry, the diagonal elements of the tridiagonal matrix.
+*          On exit, if INFO = 0, the eigenvalues in ascending order.
+*
+*  E       (input/output) DOUBLE PRECISION array, dimension (N-1)
+*          On entry, the (n-1) subdiagonal elements of the tridiagonal
+*          matrix.
+*          On exit, E has been destroyed.
+*
+*  Z       (input/output) COMPLEX*16 array, dimension (LDZ, N)
+*          On entry, if  COMPZ = 'V', then Z contains the unitary
+*          matrix used in the reduction to tridiagonal form.
+*          On exit, if INFO = 0, then if COMPZ = 'V', Z contains the
+*          orthonormal eigenvectors of the original Hermitian matrix,
+*          and if COMPZ = 'I', Z contains the orthonormal eigenvectors
+*          of the symmetric tridiagonal matrix.
+*          If COMPZ = 'N', then Z is not referenced.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z.  LDZ >= 1, and if
+*          eigenvectors are desired, then  LDZ >= max(1,N).
+*
+*  WORK    (workspace) DOUBLE PRECISION array, dimension (max(1,2*N-2))
+*          If COMPZ = 'N', then WORK is not referenced.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  the algorithm has failed to find all the eigenvalues in
+*                a total of 30*N iterations; if INFO = i, then i
+*                elements of E have not converged to zero; on exit, D
+*                and E contain the elements of a symmetric tridiagonal
+*                matrix which is unitarily similar to the original
+*                matrix.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE, TWO, THREE
+      PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0, TWO = 2.0D0,
+     $                   THREE = 3.0D0 )
+      COMPLEX*16         CZERO, CONE
+      PARAMETER          ( CZERO = ( 0.0D0, 0.0D0 ),
+     $                   CONE = ( 1.0D0, 0.0D0 ) )
+      INTEGER            MAXIT
+      PARAMETER          ( MAXIT = 30 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, ICOMPZ, II, ISCALE, J, JTOT, K, L, L1, LEND,
+     $                   LENDM1, LENDP1, LENDSV, LM1, LSV, M, MM, MM1,
+     $                   NM1, NMAXIT
+      DOUBLE PRECISION   ANORM, B, C, EPS, EPS2, F, G, P, R, RT1, RT2,
+     $                   S, SAFMAX, SAFMIN, SSFMAX, SSFMIN, TST
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DLAMCH, DLANST, DLAPY2
+      EXTERNAL           LSAME, DLAMCH, DLANST, DLAPY2
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLAE2, DLAEV2, DLARTG, DLASCL, DLASRT, XERBLA,
+     $                   ZLASET, ZLASR, ZSWAP
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX, SIGN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+*
+      IF( LSAME( COMPZ, 'N' ) ) THEN
+         ICOMPZ = 0
+      ELSE IF( LSAME( COMPZ, 'V' ) ) THEN
+         ICOMPZ = 1
+      ELSE IF( LSAME( COMPZ, 'I' ) ) THEN
+         ICOMPZ = 2
+      ELSE
+         ICOMPZ = -1
+      END IF
+      IF( ICOMPZ.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( ( LDZ.LT.1 ) .OR. ( ICOMPZ.GT.0 .AND. LDZ.LT.MAX( 1,
+     $         N ) ) ) THEN
+         INFO = -6
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZSTEQR', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      IF( N.EQ.1 ) THEN
+         IF( ICOMPZ.EQ.2 )
+     $      Z( 1, 1 ) = CONE
+         RETURN
+      END IF
+*
+*     Determine the unit roundoff and over/underflow thresholds.
+*
+      EPS = DLAMCH( 'E' )
+      EPS2 = EPS**2
+      SAFMIN = DLAMCH( 'S' )
+      SAFMAX = ONE / SAFMIN
+      SSFMAX = SQRT( SAFMAX ) / THREE
+      SSFMIN = SQRT( SAFMIN ) / EPS2
+*
+*     Compute the eigenvalues and eigenvectors of the tridiagonal
+*     matrix.
+*
+      IF( ICOMPZ.EQ.2 )
+     $   CALL ZLASET( 'Full', N, N, CZERO, CONE, Z, LDZ )
+*
+      NMAXIT = N*MAXIT
+      JTOT = 0
+*
+*     Determine where the matrix splits and choose QL or QR iteration
+*     for each block, according to whether top or bottom diagonal
+*     element is smaller.
+*
+      L1 = 1
+      NM1 = N - 1
+*
+   10 CONTINUE
+      IF( L1.GT.N )
+     $   GO TO 160
+      IF( L1.GT.1 )
+     $   E( L1-1 ) = ZERO
+      IF( L1.LE.NM1 ) THEN
+         DO 20 M = L1, NM1
+            TST = ABS( E( M ) )
+            IF( TST.EQ.ZERO )
+     $         GO TO 30
+            IF( TST.LE.( SQRT( ABS( D( M ) ) )*SQRT( ABS( D( M+
+     $          1 ) ) ) )*EPS ) THEN
+               E( M ) = ZERO
+               GO TO 30
+            END IF
+   20    CONTINUE
+      END IF
+      M = N
+*
+   30 CONTINUE
+      L = L1
+      LSV = L
+      LEND = M
+      LENDSV = LEND
+      L1 = M + 1
+      IF( LEND.EQ.L )
+     $   GO TO 10
+*
+*     Scale submatrix in rows and columns L to LEND
+*
+      ANORM = DLANST( 'I', LEND-L+1, D( L ), E( L ) )
+      ISCALE = 0
+      IF( ANORM.EQ.ZERO )
+     $   GO TO 10
+      IF( ANORM.GT.SSFMAX ) THEN
+         ISCALE = 1
+         CALL DLASCL( 'G', 0, 0, ANORM, SSFMAX, LEND-L+1, 1, D( L ), N,
+     $                INFO )
+         CALL DLASCL( 'G', 0, 0, ANORM, SSFMAX, LEND-L, 1, E( L ), N,
+     $                INFO )
+      ELSE IF( ANORM.LT.SSFMIN ) THEN
+         ISCALE = 2
+         CALL DLASCL( 'G', 0, 0, ANORM, SSFMIN, LEND-L+1, 1, D( L ), N,
+     $                INFO )
+         CALL DLASCL( 'G', 0, 0, ANORM, SSFMIN, LEND-L, 1, E( L ), N,
+     $                INFO )
+      END IF
+*
+*     Choose between QL and QR iteration
+*
+      IF( ABS( D( LEND ) ).LT.ABS( D( L ) ) ) THEN
+         LEND = LSV
+         L = LENDSV
+      END IF
+*
+      IF( LEND.GT.L ) THEN
+*
+*        QL Iteration
+*
+*        Look for small subdiagonal element.
+*
+   40    CONTINUE
+         IF( L.NE.LEND ) THEN
+            LENDM1 = LEND - 1
+            DO 50 M = L, LENDM1
+               TST = ABS( E( M ) )**2
+               IF( TST.LE.( EPS2*ABS( D( M ) ) )*ABS( D( M+1 ) )+
+     $             SAFMIN )GO TO 60
+   50       CONTINUE
+         END IF
+*
+         M = LEND
+*
+   60    CONTINUE
+         IF( M.LT.LEND )
+     $      E( M ) = ZERO
+         P = D( L )
+         IF( M.EQ.L )
+     $      GO TO 80
+*
+*        If remaining matrix is 2-by-2, use DLAE2 or SLAEV2
+*        to compute its eigensystem.
+*
+         IF( M.EQ.L+1 ) THEN
+            IF( ICOMPZ.GT.0 ) THEN
+               CALL DLAEV2( D( L ), E( L ), D( L+1 ), RT1, RT2, C, S )
+               WORK( L ) = C
+               WORK( N-1+L ) = S
+               CALL ZLASR( 'R', 'V', 'B', N, 2, WORK( L ),
+     $                     WORK( N-1+L ), Z( 1, L ), LDZ )
+            ELSE
+               CALL DLAE2( D( L ), E( L ), D( L+1 ), RT1, RT2 )
+            END IF
+            D( L ) = RT1
+            D( L+1 ) = RT2
+            E( L ) = ZERO
+            L = L + 2
+            IF( L.LE.LEND )
+     $         GO TO 40
+            GO TO 140
+         END IF
+*
+         IF( JTOT.EQ.NMAXIT )
+     $      GO TO 140
+         JTOT = JTOT + 1
+*
+*        Form shift.
+*
+         G = ( D( L+1 )-P ) / ( TWO*E( L ) )
+         R = DLAPY2( G, ONE )
+         G = D( M ) - P + ( E( L ) / ( G+SIGN( R, G ) ) )
+*
+         S = ONE
+         C = ONE
+         P = ZERO
+*
+*        Inner loop
+*
+         MM1 = M - 1
+         DO 70 I = MM1, L, -1
+            F = S*E( I )
+            B = C*E( I )
+            CALL DLARTG( G, F, C, S, R )
+            IF( I.NE.M-1 )
+     $         E( I+1 ) = R
+            G = D( I+1 ) - P
+            R = ( D( I )-G )*S + TWO*C*B
+            P = S*R
+            D( I+1 ) = G + P
+            G = C*R - B
+*
+*           If eigenvectors are desired, then save rotations.
+*
+            IF( ICOMPZ.GT.0 ) THEN
+               WORK( I ) = C
+               WORK( N-1+I ) = -S
+            END IF
+*
+   70    CONTINUE
+*
+*        If eigenvectors are desired, then apply saved rotations.
+*
+         IF( ICOMPZ.GT.0 ) THEN
+            MM = M - L + 1
+            CALL ZLASR( 'R', 'V', 'B', N, MM, WORK( L ), WORK( N-1+L ),
+     $                  Z( 1, L ), LDZ )
+         END IF
+*
+         D( L ) = D( L ) - P
+         E( L ) = G
+         GO TO 40
+*
+*        Eigenvalue found.
+*
+   80    CONTINUE
+         D( L ) = P
+*
+         L = L + 1
+         IF( L.LE.LEND )
+     $      GO TO 40
+         GO TO 140
+*
+      ELSE
+*
+*        QR Iteration
+*
+*        Look for small superdiagonal element.
+*
+   90    CONTINUE
+         IF( L.NE.LEND ) THEN
+            LENDP1 = LEND + 1
+            DO 100 M = L, LENDP1, -1
+               TST = ABS( E( M-1 ) )**2
+               IF( TST.LE.( EPS2*ABS( D( M ) ) )*ABS( D( M-1 ) )+
+     $             SAFMIN )GO TO 110
+  100       CONTINUE
+         END IF
+*
+         M = LEND
+*
+  110    CONTINUE
+         IF( M.GT.LEND )
+     $      E( M-1 ) = ZERO
+         P = D( L )
+         IF( M.EQ.L )
+     $      GO TO 130
+*
+*        If remaining matrix is 2-by-2, use DLAE2 or SLAEV2
+*        to compute its eigensystem.
+*
+         IF( M.EQ.L-1 ) THEN
+            IF( ICOMPZ.GT.0 ) THEN
+               CALL DLAEV2( D( L-1 ), E( L-1 ), D( L ), RT1, RT2, C, S )
+               WORK( M ) = C
+               WORK( N-1+M ) = S
+               CALL ZLASR( 'R', 'V', 'F', N, 2, WORK( M ),
+     $                     WORK( N-1+M ), Z( 1, L-1 ), LDZ )
+            ELSE
+               CALL DLAE2( D( L-1 ), E( L-1 ), D( L ), RT1, RT2 )
+            END IF
+            D( L-1 ) = RT1
+            D( L ) = RT2
+            E( L-1 ) = ZERO
+            L = L - 2
+            IF( L.GE.LEND )
+     $         GO TO 90
+            GO TO 140
+         END IF
+*
+         IF( JTOT.EQ.NMAXIT )
+     $      GO TO 140
+         JTOT = JTOT + 1
+*
+*        Form shift.
+*
+         G = ( D( L-1 )-P ) / ( TWO*E( L-1 ) )
+         R = DLAPY2( G, ONE )
+         G = D( M ) - P + ( E( L-1 ) / ( G+SIGN( R, G ) ) )
+*
+         S = ONE
+         C = ONE
+         P = ZERO
+*
+*        Inner loop
+*
+         LM1 = L - 1
+         DO 120 I = M, LM1
+            F = S*E( I )
+            B = C*E( I )
+            CALL DLARTG( G, F, C, S, R )
+            IF( I.NE.M )
+     $         E( I-1 ) = R
+            G = D( I ) - P
+            R = ( D( I+1 )-G )*S + TWO*C*B
+            P = S*R
+            D( I ) = G + P
+            G = C*R - B
+*
+*           If eigenvectors are desired, then save rotations.
+*
+            IF( ICOMPZ.GT.0 ) THEN
+               WORK( I ) = C
+               WORK( N-1+I ) = S
+            END IF
+*
+  120    CONTINUE
+*
+*        If eigenvectors are desired, then apply saved rotations.
+*
+         IF( ICOMPZ.GT.0 ) THEN
+            MM = L - M + 1
+            CALL ZLASR( 'R', 'V', 'F', N, MM, WORK( M ), WORK( N-1+M ),
+     $                  Z( 1, M ), LDZ )
+         END IF
+*
+         D( L ) = D( L ) - P
+         E( LM1 ) = G
+         GO TO 90
+*
+*        Eigenvalue found.
+*
+  130    CONTINUE
+         D( L ) = P
+*
+         L = L - 1
+         IF( L.GE.LEND )
+     $      GO TO 90
+         GO TO 140
+*
+      END IF
+*
+*     Undo scaling if necessary
+*
+  140 CONTINUE
+      IF( ISCALE.EQ.1 ) THEN
+         CALL DLASCL( 'G', 0, 0, SSFMAX, ANORM, LENDSV-LSV+1, 1,
+     $                D( LSV ), N, INFO )
+         CALL DLASCL( 'G', 0, 0, SSFMAX, ANORM, LENDSV-LSV, 1, E( LSV ),
+     $                N, INFO )
+      ELSE IF( ISCALE.EQ.2 ) THEN
+         CALL DLASCL( 'G', 0, 0, SSFMIN, ANORM, LENDSV-LSV+1, 1,
+     $                D( LSV ), N, INFO )
+         CALL DLASCL( 'G', 0, 0, SSFMIN, ANORM, LENDSV-LSV, 1, E( LSV ),
+     $                N, INFO )
+      END IF
+*
+*     Check for no convergence to an eigenvalue after a total
+*     of N*MAXIT iterations.
+*
+      IF( JTOT.EQ.NMAXIT ) THEN
+         DO 150 I = 1, N - 1
+            IF( E( I ).NE.ZERO )
+     $         INFO = INFO + 1
+  150    CONTINUE
+         RETURN
+      END IF
+      GO TO 10
+*
+*     Order eigenvalues and eigenvectors.
+*
+  160 CONTINUE
+      IF( ICOMPZ.EQ.0 ) THEN
+*
+*        Use Quick Sort
+*
+         CALL DLASRT( 'I', N, D, INFO )
+*
+      ELSE
+*
+*        Use Selection Sort to minimize swaps of eigenvectors
+*
+         DO 180 II = 2, N
+            I = II - 1
+            K = I
+            P = D( I )
+            DO 170 J = II, N
+               IF( D( J ).LT.P ) THEN
+                  K = J
+                  P = D( J )
+               END IF
+  170       CONTINUE
+            IF( K.NE.I ) THEN
+               D( K ) = D( I )
+               D( I ) = P
+               CALL ZSWAP( N, Z( 1, I ), 1, Z( 1, K ), 1 )
+            END IF
+  180    CONTINUE
+      END IF
+      RETURN
+*
+*     End of ZSTEQR
+*
+      END
diff --git a/costa/native/external/lapack/zsycon.f b/costa/native/external/lapack/zsycon.f
new file mode 100644
index 000000000..bc8a95e6a
--- /dev/null
+++ b/costa/native/external/lapack/zsycon.f
@@ -0,0 +1,159 @@
+      SUBROUTINE ZSYCON( UPLO, N, A, LDA, IPIV, ANORM, RCOND, WORK,
+     $                   INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, N
+      DOUBLE PRECISION   ANORM, RCOND
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      COMPLEX*16         A( LDA, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZSYCON estimates the reciprocal of the condition number (in the
+*  1-norm) of a complex symmetric matrix A using the factorization
+*  A = U*D*U**T or A = L*D*L**T computed by ZSYTRF.
+*
+*  An estimate is obtained for norm(inv(A)), and the reciprocal of the
+*  condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))).
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the details of the factorization are stored
+*          as an upper or lower triangular matrix.
+*          = 'U':  Upper triangular, form is A = U*D*U**T;
+*          = 'L':  Lower triangular, form is A = L*D*L**T.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input) COMPLEX*16 array, dimension (LDA,N)
+*          The block diagonal matrix D and the multipliers used to
+*          obtain the factor U or L as computed by ZSYTRF.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  IPIV    (input) INTEGER array, dimension (N)
+*          Details of the interchanges and the block structure of D
+*          as determined by ZSYTRF.
+*
+*  ANORM   (input) DOUBLE PRECISION
+*          The 1-norm of the original matrix A.
+*
+*  RCOND   (output) DOUBLE PRECISION
+*          The reciprocal of the condition number of the matrix A,
+*          computed as RCOND = 1/(ANORM * AINVNM), where AINVNM is an
+*          estimate of the 1-norm of inv(A) computed in this routine.
+*
+*  WORK    (workspace) COMPLEX*16 array, dimension (2*N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            I, KASE
+      DOUBLE PRECISION   AINVNM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZLACON, ZSYTRS
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -4
+      ELSE IF( ANORM.LT.ZERO ) THEN
+         INFO = -6
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZSYCON', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      RCOND = ZERO
+      IF( N.EQ.0 ) THEN
+         RCOND = ONE
+         RETURN
+      ELSE IF( ANORM.LE.ZERO ) THEN
+         RETURN
+      END IF
+*
+*     Check that the diagonal matrix D is nonsingular.
+*
+      IF( UPPER ) THEN
+*
+*        Upper triangular storage: examine D from bottom to top
+*
+         DO 10 I = N, 1, -1
+            IF( IPIV( I ).GT.0 .AND. A( I, I ).EQ.ZERO )
+     $         RETURN
+   10    CONTINUE
+      ELSE
+*
+*        Lower triangular storage: examine D from top to bottom.
+*
+         DO 20 I = 1, N
+            IF( IPIV( I ).GT.0 .AND. A( I, I ).EQ.ZERO )
+     $         RETURN
+   20    CONTINUE
+      END IF
+*
+*     Estimate the 1-norm of the inverse.
+*
+      KASE = 0
+   30 CONTINUE
+      CALL ZLACON( N, WORK( N+1 ), WORK, AINVNM, KASE )
+      IF( KASE.NE.0 ) THEN
+*
+*        Multiply by inv(L*D*L') or inv(U*D*U').
+*
+         CALL ZSYTRS( UPLO, N, 1, A, LDA, IPIV, WORK, N, INFO )
+         GO TO 30
+      END IF
+*
+*     Compute the estimate of the reciprocal condition number.
+*
+      IF( AINVNM.NE.ZERO )
+     $   RCOND = ( ONE / AINVNM ) / ANORM
+*
+      RETURN
+*
+*     End of ZSYCON
+*
+      END
diff --git a/costa/native/external/lapack/zsymv.f b/costa/native/external/lapack/zsymv.f
new file mode 100644
index 000000000..10451fc0a
--- /dev/null
+++ b/costa/native/external/lapack/zsymv.f
@@ -0,0 +1,265 @@
+      SUBROUTINE ZSYMV( UPLO, N, ALPHA, A, LDA, X, INCX, BETA, Y, INCY )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INCX, INCY, LDA, N
+      COMPLEX*16         ALPHA, BETA
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), X( * ), Y( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZSYMV  performs the matrix-vector  operation
+*
+*     y := alpha*A*x + beta*y,
+*
+*  where alpha and beta are scalars, x and y are n element vectors and
+*  A is an n by n symmetric matrix.
+*
+*  Arguments
+*  ==========
+*
+*  UPLO   - CHARACTER*1
+*           On entry, UPLO specifies whether the upper or lower
+*           triangular part of the array A is to be referenced as
+*           follows:
+*
+*              UPLO = 'U' or 'u'   Only the upper triangular part of A
+*                                  is to be referenced.
+*
+*              UPLO = 'L' or 'l'   Only the lower triangular part of A
+*                                  is to be referenced.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - COMPLEX*16
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  A      - COMPLEX*16 array, dimension ( LDA, N )
+*           Before entry, with  UPLO = 'U' or 'u', the leading n by n
+*           upper triangular part of the array A must contain the upper
+*           triangular part of the symmetric matrix and the strictly
+*           lower triangular part of A is not referenced.
+*           Before entry, with UPLO = 'L' or 'l', the leading n by n
+*           lower triangular part of the array A must contain the lower
+*           triangular part of the symmetric matrix and the strictly
+*           upper triangular part of A is not referenced.
+*           Unchanged on exit.
+*
+*  LDA    - INTEGER
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           max( 1, N ).
+*           Unchanged on exit.
+*
+*  X      - COMPLEX*16 array, dimension at least
+*           ( 1 + ( N - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the N-
+*           element vector x.
+*           Unchanged on exit.
+*
+*  INCX   - INTEGER
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*  BETA   - COMPLEX*16
+*           On entry, BETA specifies the scalar beta. When BETA is
+*           supplied as zero then Y need not be set on input.
+*           Unchanged on exit.
+*
+*  Y      - COMPLEX*16 array, dimension at least
+*           ( 1 + ( N - 1 )*abs( INCY ) ).
+*           Before entry, the incremented array Y must contain the n
+*           element vector y. On exit, Y is overwritten by the updated
+*           vector y.
+*
+*  INCY   - INTEGER
+*           On entry, INCY specifies the increment for the elements of
+*           Y. INCY must not be zero.
+*           Unchanged on exit.
+*
+* =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX*16         ONE
+      PARAMETER          ( ONE = ( 1.0D+0, 0.0D+0 ) )
+      COMPLEX*16         ZERO
+      PARAMETER          ( ZERO = ( 0.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, INFO, IX, IY, J, JX, JY, KX, KY
+      COMPLEX*16         TEMP1, TEMP2
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = 1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = 2
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = 5
+      ELSE IF( INCX.EQ.0 ) THEN
+         INFO = 7
+      ELSE IF( INCY.EQ.0 ) THEN
+         INFO = 10
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZSYMV ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( N.EQ.0 ) .OR. ( ( ALPHA.EQ.ZERO ) .AND. ( BETA.EQ.ONE ) ) )
+     $   RETURN
+*
+*     Set up the start points in  X  and  Y.
+*
+      IF( INCX.GT.0 ) THEN
+         KX = 1
+      ELSE
+         KX = 1 - ( N-1 )*INCX
+      END IF
+      IF( INCY.GT.0 ) THEN
+         KY = 1
+      ELSE
+         KY = 1 - ( N-1 )*INCY
+      END IF
+*
+*     Start the operations. In this version the elements of A are
+*     accessed sequentially with one pass through the triangular part
+*     of A.
+*
+*     First form  y := beta*y.
+*
+      IF( BETA.NE.ONE ) THEN
+         IF( INCY.EQ.1 ) THEN
+            IF( BETA.EQ.ZERO ) THEN
+               DO 10 I = 1, N
+                  Y( I ) = ZERO
+   10          CONTINUE
+            ELSE
+               DO 20 I = 1, N
+                  Y( I ) = BETA*Y( I )
+   20          CONTINUE
+            END IF
+         ELSE
+            IY = KY
+            IF( BETA.EQ.ZERO ) THEN
+               DO 30 I = 1, N
+                  Y( IY ) = ZERO
+                  IY = IY + INCY
+   30          CONTINUE
+            ELSE
+               DO 40 I = 1, N
+                  Y( IY ) = BETA*Y( IY )
+                  IY = IY + INCY
+   40          CONTINUE
+            END IF
+         END IF
+      END IF
+      IF( ALPHA.EQ.ZERO )
+     $   RETURN
+      IF( LSAME( UPLO, 'U' ) ) THEN
+*
+*        Form  y  when A is stored in upper triangle.
+*
+         IF( ( INCX.EQ.1 ) .AND. ( INCY.EQ.1 ) ) THEN
+            DO 60 J = 1, N
+               TEMP1 = ALPHA*X( J )
+               TEMP2 = ZERO
+               DO 50 I = 1, J - 1
+                  Y( I ) = Y( I ) + TEMP1*A( I, J )
+                  TEMP2 = TEMP2 + A( I, J )*X( I )
+   50          CONTINUE
+               Y( J ) = Y( J ) + TEMP1*A( J, J ) + ALPHA*TEMP2
+   60       CONTINUE
+         ELSE
+            JX = KX
+            JY = KY
+            DO 80 J = 1, N
+               TEMP1 = ALPHA*X( JX )
+               TEMP2 = ZERO
+               IX = KX
+               IY = KY
+               DO 70 I = 1, J - 1
+                  Y( IY ) = Y( IY ) + TEMP1*A( I, J )
+                  TEMP2 = TEMP2 + A( I, J )*X( IX )
+                  IX = IX + INCX
+                  IY = IY + INCY
+   70          CONTINUE
+               Y( JY ) = Y( JY ) + TEMP1*A( J, J ) + ALPHA*TEMP2
+               JX = JX + INCX
+               JY = JY + INCY
+   80       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  y  when A is stored in lower triangle.
+*
+         IF( ( INCX.EQ.1 ) .AND. ( INCY.EQ.1 ) ) THEN
+            DO 100 J = 1, N
+               TEMP1 = ALPHA*X( J )
+               TEMP2 = ZERO
+               Y( J ) = Y( J ) + TEMP1*A( J, J )
+               DO 90 I = J + 1, N
+                  Y( I ) = Y( I ) + TEMP1*A( I, J )
+                  TEMP2 = TEMP2 + A( I, J )*X( I )
+   90          CONTINUE
+               Y( J ) = Y( J ) + ALPHA*TEMP2
+  100       CONTINUE
+         ELSE
+            JX = KX
+            JY = KY
+            DO 120 J = 1, N
+               TEMP1 = ALPHA*X( JX )
+               TEMP2 = ZERO
+               Y( JY ) = Y( JY ) + TEMP1*A( J, J )
+               IX = JX
+               IY = JY
+               DO 110 I = J + 1, N
+                  IX = IX + INCX
+                  IY = IY + INCY
+                  Y( IY ) = Y( IY ) + TEMP1*A( I, J )
+                  TEMP2 = TEMP2 + A( I, J )*X( IX )
+  110          CONTINUE
+               Y( JY ) = Y( JY ) + ALPHA*TEMP2
+               JX = JX + INCX
+               JY = JY + INCY
+  120       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of ZSYMV
+*
+      END
diff --git a/costa/native/external/lapack/zsyr.f b/costa/native/external/lapack/zsyr.f
new file mode 100644
index 000000000..69f522642
--- /dev/null
+++ b/costa/native/external/lapack/zsyr.f
@@ -0,0 +1,199 @@
+      SUBROUTINE ZSYR( UPLO, N, ALPHA, X, INCX, A, LDA )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     October 31, 1992
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INCX, LDA, N
+      COMPLEX*16         ALPHA
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), X( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZSYR   performs the symmetric rank 1 operation
+*
+*     A := alpha*x*( x' ) + A,
+*
+*  where alpha is a complex scalar, x is an n element vector and A is an
+*  n by n symmetric matrix.
+*
+*  Arguments
+*  ==========
+*
+*  UPLO   - CHARACTER*1
+*           On entry, UPLO specifies whether the upper or lower
+*           triangular part of the array A is to be referenced as
+*           follows:
+*
+*              UPLO = 'U' or 'u'   Only the upper triangular part of A
+*                                  is to be referenced.
+*
+*              UPLO = 'L' or 'l'   Only the lower triangular part of A
+*                                  is to be referenced.
+*
+*           Unchanged on exit.
+*
+*  N      - INTEGER
+*           On entry, N specifies the order of the matrix A.
+*           N must be at least zero.
+*           Unchanged on exit.
+*
+*  ALPHA  - COMPLEX*16
+*           On entry, ALPHA specifies the scalar alpha.
+*           Unchanged on exit.
+*
+*  X      - COMPLEX*16 array, dimension at least
+*           ( 1 + ( N - 1 )*abs( INCX ) ).
+*           Before entry, the incremented array X must contain the N-
+*           element vector x.
+*           Unchanged on exit.
+*
+*  INCX   - INTEGER
+*           On entry, INCX specifies the increment for the elements of
+*           X. INCX must not be zero.
+*           Unchanged on exit.
+*
+*  A      - COMPLEX*16 array, dimension ( LDA, N )
+*           Before entry, with  UPLO = 'U' or 'u', the leading n by n
+*           upper triangular part of the array A must contain the upper
+*           triangular part of the symmetric matrix and the strictly
+*           lower triangular part of A is not referenced. On exit, the
+*           upper triangular part of the array A is overwritten by the
+*           upper triangular part of the updated matrix.
+*           Before entry, with UPLO = 'L' or 'l', the leading n by n
+*           lower triangular part of the array A must contain the lower
+*           triangular part of the symmetric matrix and the strictly
+*           upper triangular part of A is not referenced. On exit, the
+*           lower triangular part of the array A is overwritten by the
+*           lower triangular part of the updated matrix.
+*
+*  LDA    - INTEGER
+*           On entry, LDA specifies the first dimension of A as declared
+*           in the calling (sub) program. LDA must be at least
+*           max( 1, N ).
+*           Unchanged on exit.
+*
+* =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX*16         ZERO
+      PARAMETER          ( ZERO = ( 0.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, INFO, IX, J, JX, KX
+      COMPLEX*16         TEMP
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = 1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = 2
+      ELSE IF( INCX.EQ.0 ) THEN
+         INFO = 5
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = 7
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZSYR  ', INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( ( N.EQ.0 ) .OR. ( ALPHA.EQ.ZERO ) )
+     $   RETURN
+*
+*     Set the start point in X if the increment is not unity.
+*
+      IF( INCX.LE.0 ) THEN
+         KX = 1 - ( N-1 )*INCX
+      ELSE IF( INCX.NE.1 ) THEN
+         KX = 1
+      END IF
+*
+*     Start the operations. In this version the elements of A are
+*     accessed sequentially with one pass through the triangular part
+*     of A.
+*
+      IF( LSAME( UPLO, 'U' ) ) THEN
+*
+*        Form  A  when A is stored in upper triangle.
+*
+         IF( INCX.EQ.1 ) THEN
+            DO 20 J = 1, N
+               IF( X( J ).NE.ZERO ) THEN
+                  TEMP = ALPHA*X( J )
+                  DO 10 I = 1, J
+                     A( I, J ) = A( I, J ) + X( I )*TEMP
+   10             CONTINUE
+               END IF
+   20       CONTINUE
+         ELSE
+            JX = KX
+            DO 40 J = 1, N
+               IF( X( JX ).NE.ZERO ) THEN
+                  TEMP = ALPHA*X( JX )
+                  IX = KX
+                  DO 30 I = 1, J
+                     A( I, J ) = A( I, J ) + X( IX )*TEMP
+                     IX = IX + INCX
+   30             CONTINUE
+               END IF
+               JX = JX + INCX
+   40       CONTINUE
+         END IF
+      ELSE
+*
+*        Form  A  when A is stored in lower triangle.
+*
+         IF( INCX.EQ.1 ) THEN
+            DO 60 J = 1, N
+               IF( X( J ).NE.ZERO ) THEN
+                  TEMP = ALPHA*X( J )
+                  DO 50 I = J, N
+                     A( I, J ) = A( I, J ) + X( I )*TEMP
+   50             CONTINUE
+               END IF
+   60       CONTINUE
+         ELSE
+            JX = KX
+            DO 80 J = 1, N
+               IF( X( JX ).NE.ZERO ) THEN
+                  TEMP = ALPHA*X( JX )
+                  IX = JX
+                  DO 70 I = J, N
+                     A( I, J ) = A( I, J ) + X( IX )*TEMP
+                     IX = IX + INCX
+   70             CONTINUE
+               END IF
+               JX = JX + INCX
+   80       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of ZSYR
+*
+      END
diff --git a/costa/native/external/lapack/zsyrfs.f b/costa/native/external/lapack/zsyrfs.f
new file mode 100644
index 000000000..62fc4ea76
--- /dev/null
+++ b/costa/native/external/lapack/zsyrfs.f
@@ -0,0 +1,339 @@
+      SUBROUTINE ZSYRFS( UPLO, N, NRHS, A, LDA, AF, LDAF, IPIV, B, LDB,
+     $                   X, LDX, FERR, BERR, WORK, RWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, LDAF, LDB, LDX, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      DOUBLE PRECISION   BERR( * ), FERR( * ), RWORK( * )
+      COMPLEX*16         A( LDA, * ), AF( LDAF, * ), B( LDB, * ),
+     $                   WORK( * ), X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZSYRFS improves the computed solution to a system of linear
+*  equations when the coefficient matrix is symmetric indefinite, and
+*  provides error bounds and backward error estimates for the solution.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrices B and X.  NRHS >= 0.
+*
+*  A       (input) COMPLEX*16 array, dimension (LDA,N)
+*          The symmetric matrix A.  If UPLO = 'U', the leading N-by-N
+*          upper triangular part of A contains the upper triangular part
+*          of the matrix A, and the strictly lower triangular part of A
+*          is not referenced.  If UPLO = 'L', the leading N-by-N lower
+*          triangular part of A contains the lower triangular part of
+*          the matrix A, and the strictly upper triangular part of A is
+*          not referenced.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  AF      (input) COMPLEX*16 array, dimension (LDAF,N)
+*          The factored form of the matrix A.  AF contains the block
+*          diagonal matrix D and the multipliers used to obtain the
+*          factor U or L from the factorization A = U*D*U**T or
+*          A = L*D*L**T as computed by ZSYTRF.
+*
+*  LDAF    (input) INTEGER
+*          The leading dimension of the array AF.  LDAF >= max(1,N).
+*
+*  IPIV    (input) INTEGER array, dimension (N)
+*          Details of the interchanges and the block structure of D
+*          as determined by ZSYTRF.
+*
+*  B       (input) COMPLEX*16 array, dimension (LDB,NRHS)
+*          The right hand side matrix B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  X       (input/output) COMPLEX*16 array, dimension (LDX,NRHS)
+*          On entry, the solution matrix X, as computed by ZSYTRS.
+*          On exit, the improved solution matrix X.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(1,N).
+*
+*  FERR    (output) DOUBLE PRECISION array, dimension (NRHS)
+*          The estimated forward error bound for each solution vector
+*          X(j) (the j-th column of the solution matrix X).
+*          If XTRUE is the true solution corresponding to X(j), FERR(j)
+*          is an estimated upper bound for the magnitude of the largest
+*          element in (X(j) - XTRUE) divided by the magnitude of the
+*          largest element in X(j).  The estimate is as reliable as
+*          the estimate for RCOND, and is almost always a slight
+*          overestimate of the true error.
+*
+*  BERR    (output) DOUBLE PRECISION array, dimension (NRHS)
+*          The componentwise relative backward error of each solution
+*          vector X(j) (i.e., the smallest relative change in
+*          any element of A or B that makes X(j) an exact solution).
+*
+*  WORK    (workspace) COMPLEX*16 array, dimension (2*N)
+*
+*  RWORK   (workspace) DOUBLE PRECISION array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Internal Parameters
+*  ===================
+*
+*  ITMAX is the maximum number of steps of iterative refinement.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            ITMAX
+      PARAMETER          ( ITMAX = 5 )
+      DOUBLE PRECISION   ZERO
+      PARAMETER          ( ZERO = 0.0D+0 )
+      COMPLEX*16         ONE
+      PARAMETER          ( ONE = ( 1.0D+0, 0.0D+0 ) )
+      DOUBLE PRECISION   TWO
+      PARAMETER          ( TWO = 2.0D+0 )
+      DOUBLE PRECISION   THREE
+      PARAMETER          ( THREE = 3.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            COUNT, I, J, K, KASE, NZ
+      DOUBLE PRECISION   EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK
+      COMPLEX*16         ZDUM
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZAXPY, ZCOPY, ZLACON, ZSYMV, ZSYTRS
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, DIMAG, MAX
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DLAMCH
+      EXTERNAL           LSAME, DLAMCH
+*     ..
+*     .. Statement Functions ..
+      DOUBLE PRECISION   CABS1
+*     ..
+*     .. Statement Function definitions ..
+      CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -10
+      ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+         INFO = -12
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZSYRFS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN
+         DO 10 J = 1, NRHS
+            FERR( J ) = ZERO
+            BERR( J ) = ZERO
+   10    CONTINUE
+         RETURN
+      END IF
+*
+*     NZ = maximum number of nonzero elements in each row of A, plus 1
+*
+      NZ = N + 1
+      EPS = DLAMCH( 'Epsilon' )
+      SAFMIN = DLAMCH( 'Safe minimum' )
+      SAFE1 = NZ*SAFMIN
+      SAFE2 = SAFE1 / EPS
+*
+*     Do for each right hand side
+*
+      DO 140 J = 1, NRHS
+*
+         COUNT = 1
+         LSTRES = THREE
+   20    CONTINUE
+*
+*        Loop until stopping criterion is satisfied.
+*
+*        Compute residual R = B - A * X
+*
+         CALL ZCOPY( N, B( 1, J ), 1, WORK, 1 )
+         CALL ZSYMV( UPLO, N, -ONE, A, LDA, X( 1, J ), 1, ONE, WORK, 1 )
+*
+*        Compute componentwise relative backward error from formula
+*
+*        max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) )
+*
+*        where abs(Z) is the componentwise absolute value of the matrix
+*        or vector Z.  If the i-th component of the denominator is less
+*        than SAFE2, then SAFE1 is added to the i-th components of the
+*        numerator and denominator before dividing.
+*
+         DO 30 I = 1, N
+            RWORK( I ) = CABS1( B( I, J ) )
+   30    CONTINUE
+*
+*        Compute abs(A)*abs(X) + abs(B).
+*
+         IF( UPPER ) THEN
+            DO 50 K = 1, N
+               S = ZERO
+               XK = CABS1( X( K, J ) )
+               DO 40 I = 1, K - 1
+                  RWORK( I ) = RWORK( I ) + CABS1( A( I, K ) )*XK
+                  S = S + CABS1( A( I, K ) )*CABS1( X( I, J ) )
+   40          CONTINUE
+               RWORK( K ) = RWORK( K ) + CABS1( A( K, K ) )*XK + S
+   50       CONTINUE
+         ELSE
+            DO 70 K = 1, N
+               S = ZERO
+               XK = CABS1( X( K, J ) )
+               RWORK( K ) = RWORK( K ) + CABS1( A( K, K ) )*XK
+               DO 60 I = K + 1, N
+                  RWORK( I ) = RWORK( I ) + CABS1( A( I, K ) )*XK
+                  S = S + CABS1( A( I, K ) )*CABS1( X( I, J ) )
+   60          CONTINUE
+               RWORK( K ) = RWORK( K ) + S
+   70       CONTINUE
+         END IF
+         S = ZERO
+         DO 80 I = 1, N
+            IF( RWORK( I ).GT.SAFE2 ) THEN
+               S = MAX( S, CABS1( WORK( I ) ) / RWORK( I ) )
+            ELSE
+               S = MAX( S, ( CABS1( WORK( I ) )+SAFE1 ) /
+     $             ( RWORK( I )+SAFE1 ) )
+            END IF
+   80    CONTINUE
+         BERR( J ) = S
+*
+*        Test stopping criterion. Continue iterating if
+*           1) The residual BERR(J) is larger than machine epsilon, and
+*           2) BERR(J) decreased by at least a factor of 2 during the
+*              last iteration, and
+*           3) At most ITMAX iterations tried.
+*
+         IF( BERR( J ).GT.EPS .AND. TWO*BERR( J ).LE.LSTRES .AND.
+     $       COUNT.LE.ITMAX ) THEN
+*
+*           Update solution and try again.
+*
+            CALL ZSYTRS( UPLO, N, 1, AF, LDAF, IPIV, WORK, N, INFO )
+            CALL ZAXPY( N, ONE, WORK, 1, X( 1, J ), 1 )
+            LSTRES = BERR( J )
+            COUNT = COUNT + 1
+            GO TO 20
+         END IF
+*
+*        Bound error from formula
+*
+*        norm(X - XTRUE) / norm(X) .le. FERR =
+*        norm( abs(inv(A))*
+*           ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X)
+*
+*        where
+*          norm(Z) is the magnitude of the largest component of Z
+*          inv(A) is the inverse of A
+*          abs(Z) is the componentwise absolute value of the matrix or
+*             vector Z
+*          NZ is the maximum number of nonzeros in any row of A, plus 1
+*          EPS is machine epsilon
+*
+*        The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B))
+*        is incremented by SAFE1 if the i-th component of
+*        abs(A)*abs(X) + abs(B) is less than SAFE2.
+*
+*        Use ZLACON to estimate the infinity-norm of the matrix
+*           inv(A) * diag(W),
+*        where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) )))
+*
+         DO 90 I = 1, N
+            IF( RWORK( I ).GT.SAFE2 ) THEN
+               RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I )
+            ELSE
+               RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) +
+     $                      SAFE1
+            END IF
+   90    CONTINUE
+*
+         KASE = 0
+  100    CONTINUE
+         CALL ZLACON( N, WORK( N+1 ), WORK, FERR( J ), KASE )
+         IF( KASE.NE.0 ) THEN
+            IF( KASE.EQ.1 ) THEN
+*
+*              Multiply by diag(W)*inv(A').
+*
+               CALL ZSYTRS( UPLO, N, 1, AF, LDAF, IPIV, WORK, N, INFO )
+               DO 110 I = 1, N
+                  WORK( I ) = RWORK( I )*WORK( I )
+  110          CONTINUE
+            ELSE IF( KASE.EQ.2 ) THEN
+*
+*              Multiply by inv(A)*diag(W).
+*
+               DO 120 I = 1, N
+                  WORK( I ) = RWORK( I )*WORK( I )
+  120          CONTINUE
+               CALL ZSYTRS( UPLO, N, 1, AF, LDAF, IPIV, WORK, N, INFO )
+            END IF
+            GO TO 100
+         END IF
+*
+*        Normalize error.
+*
+         LSTRES = ZERO
+         DO 130 I = 1, N
+            LSTRES = MAX( LSTRES, CABS1( X( I, J ) ) )
+  130    CONTINUE
+         IF( LSTRES.NE.ZERO )
+     $      FERR( J ) = FERR( J ) / LSTRES
+*
+  140 CONTINUE
+*
+      RETURN
+*
+*     End of ZSYRFS
+*
+      END
diff --git a/costa/native/external/lapack/zsysv.f b/costa/native/external/lapack/zsysv.f
new file mode 100644
index 000000000..38a59a8a1
--- /dev/null
+++ b/costa/native/external/lapack/zsysv.f
@@ -0,0 +1,171 @@
+      SUBROUTINE ZSYSV( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, WORK,
+     $                  LWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, LDB, LWORK, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      COMPLEX*16         A( LDA, * ), B( LDB, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZSYSV computes the solution to a complex system of linear equations
+*     A * X = B,
+*  where A is an N-by-N symmetric matrix and X and B are N-by-NRHS
+*  matrices.
+*
+*  The diagonal pivoting method is used to factor A as
+*     A = U * D * U**T,  if UPLO = 'U', or
+*     A = L * D * L**T,  if UPLO = 'L',
+*  where U (or L) is a product of permutation and unit upper (lower)
+*  triangular matrices, and D is symmetric and block diagonal with
+*  1-by-1 and 2-by-2 diagonal blocks.  The factored form of A is then
+*  used to solve the system of equations A * X = B.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The number of linear equations, i.e., the order of the
+*          matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+*          On entry, the symmetric matrix A.  If UPLO = 'U', the leading
+*          N-by-N upper triangular part of A contains the upper
+*          triangular part of the matrix A, and the strictly lower
+*          triangular part of A is not referenced.  If UPLO = 'L', the
+*          leading N-by-N lower triangular part of A contains the lower
+*          triangular part of the matrix A, and the strictly upper
+*          triangular part of A is not referenced.
+*
+*          On exit, if INFO = 0, the block diagonal matrix D and the
+*          multipliers used to obtain the factor U or L from the
+*          factorization A = U*D*U**T or A = L*D*L**T as computed by
+*          ZSYTRF.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  IPIV    (output) INTEGER array, dimension (N)
+*          Details of the interchanges and the block structure of D, as
+*          determined by ZSYTRF.  If IPIV(k) > 0, then rows and columns
+*          k and IPIV(k) were interchanged, and D(k,k) is a 1-by-1
+*          diagonal block.  If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0,
+*          then rows and columns k-1 and -IPIV(k) were interchanged and
+*          D(k-1:k,k-1:k) is a 2-by-2 diagonal block.  If UPLO = 'L' and
+*          IPIV(k) = IPIV(k+1) < 0, then rows and columns k+1 and
+*          -IPIV(k) were interchanged and D(k:k+1,k:k+1) is a 2-by-2
+*          diagonal block.
+*
+*  B       (input/output) COMPLEX*16 array, dimension (LDB,NRHS)
+*          On entry, the N-by-NRHS right hand side matrix B.
+*          On exit, if INFO = 0, the N-by-NRHS solution matrix X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The length of WORK.  LWORK >= 1, and for best performance
+*          LWORK >= N*NB, where NB is the optimal blocksize for
+*          ZSYTRF.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*          > 0: if INFO = i, D(i,i) is exactly zero.  The factorization
+*               has been completed, but the block diagonal matrix D is
+*               exactly singular, so the solution could not be computed.
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            LWKOPT, NB
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           LSAME, ILAENV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZSYTRF, ZSYTRS
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -8
+      ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN
+         INFO = -10
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         NB = ILAENV( 1, 'ZSYTRF', UPLO, N, -1, -1, -1 )
+         LWKOPT = N*NB
+         WORK( 1 ) = LWKOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZSYSV ', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Compute the factorization A = U*D*U' or A = L*D*L'.
+*
+      CALL ZSYTRF( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO )
+      IF( INFO.EQ.0 ) THEN
+*
+*        Solve the system A*X = B, overwriting B with X.
+*
+         CALL ZSYTRS( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, INFO )
+*
+      END IF
+*
+      WORK( 1 ) = LWKOPT
+*
+      RETURN
+*
+*     End of ZSYSV
+*
+      END
diff --git a/costa/native/external/lapack/zsysvx.f b/costa/native/external/lapack/zsysvx.f
new file mode 100644
index 000000000..a6a8ff948
--- /dev/null
+++ b/costa/native/external/lapack/zsysvx.f
@@ -0,0 +1,299 @@
+      SUBROUTINE ZSYSVX( FACT, UPLO, N, NRHS, A, LDA, AF, LDAF, IPIV, B,
+     $                   LDB, X, LDX, RCOND, FERR, BERR, WORK, LWORK,
+     $                   RWORK, INFO )
+*
+*  -- LAPACK driver routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          FACT, UPLO
+      INTEGER            INFO, LDA, LDAF, LDB, LDX, LWORK, N, NRHS
+      DOUBLE PRECISION   RCOND
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      DOUBLE PRECISION   BERR( * ), FERR( * ), RWORK( * )
+      COMPLEX*16         A( LDA, * ), AF( LDAF, * ), B( LDB, * ),
+     $                   WORK( * ), X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZSYSVX uses the diagonal pivoting factorization to compute the
+*  solution to a complex system of linear equations A * X = B,
+*  where A is an N-by-N symmetric matrix and X and B are N-by-NRHS
+*  matrices.
+*
+*  Error bounds on the solution and a condition estimate are also
+*  provided.
+*
+*  Description
+*  ===========
+*
+*  The following steps are performed:
+*
+*  1. If FACT = 'N', the diagonal pivoting method is used to factor A.
+*     The form of the factorization is
+*        A = U * D * U**T,  if UPLO = 'U', or
+*        A = L * D * L**T,  if UPLO = 'L',
+*     where U (or L) is a product of permutation and unit upper (lower)
+*     triangular matrices, and D is symmetric and block diagonal with
+*     1-by-1 and 2-by-2 diagonal blocks.
+*
+*  2. If some D(i,i)=0, so that D is exactly singular, then the routine
+*     returns with INFO = i. Otherwise, the factored form of A is used
+*     to estimate the condition number of the matrix A.  If the
+*     reciprocal of the condition number is less than machine precision,
+*     INFO = N+1 is returned as a warning, but the routine still goes on
+*     to solve for X and compute error bounds as described below.
+*
+*  3. The system of equations is solved for X using the factored form
+*     of A.
+*
+*  4. Iterative refinement is applied to improve the computed solution
+*     matrix and calculate error bounds and backward error estimates
+*     for it.
+*
+*  Arguments
+*  =========
+*
+*  FACT    (input) CHARACTER*1
+*          Specifies whether or not the factored form of A has been
+*          supplied on entry.
+*          = 'F':  On entry, AF and IPIV contain the factored form
+*                  of A.  A, AF and IPIV will not be modified.
+*          = 'N':  The matrix A will be copied to AF and factored.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The number of linear equations, i.e., the order of the
+*          matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrices B and X.  NRHS >= 0.
+*
+*  A       (input) COMPLEX*16 array, dimension (LDA,N)
+*          The symmetric matrix A.  If UPLO = 'U', the leading N-by-N
+*          upper triangular part of A contains the upper triangular part
+*          of the matrix A, and the strictly lower triangular part of A
+*          is not referenced.  If UPLO = 'L', the leading N-by-N lower
+*          triangular part of A contains the lower triangular part of
+*          the matrix A, and the strictly upper triangular part of A is
+*          not referenced.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  AF      (input or output) COMPLEX*16 array, dimension (LDAF,N)
+*          If FACT = 'F', then AF is an input argument and on entry
+*          contains the block diagonal matrix D and the multipliers used
+*          to obtain the factor U or L from the factorization
+*          A = U*D*U**T or A = L*D*L**T as computed by ZSYTRF.
+*
+*          If FACT = 'N', then AF is an output argument and on exit
+*          returns the block diagonal matrix D and the multipliers used
+*          to obtain the factor U or L from the factorization
+*          A = U*D*U**T or A = L*D*L**T.
+*
+*  LDAF    (input) INTEGER
+*          The leading dimension of the array AF.  LDAF >= max(1,N).
+*
+*  IPIV    (input or output) INTEGER array, dimension (N)
+*          If FACT = 'F', then IPIV is an input argument and on entry
+*          contains details of the interchanges and the block structure
+*          of D, as determined by ZSYTRF.
+*          If IPIV(k) > 0, then rows and columns k and IPIV(k) were
+*          interchanged and D(k,k) is a 1-by-1 diagonal block.
+*          If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and
+*          columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k)
+*          is a 2-by-2 diagonal block.  If UPLO = 'L' and IPIV(k) =
+*          IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were
+*          interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block.
+*
+*          If FACT = 'N', then IPIV is an output argument and on exit
+*          contains details of the interchanges and the block structure
+*          of D, as determined by ZSYTRF.
+*
+*  B       (input) COMPLEX*16 array, dimension (LDB,NRHS)
+*          The N-by-NRHS right hand side matrix B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  X       (output) COMPLEX*16 array, dimension (LDX,NRHS)
+*          If INFO = 0 or INFO = N+1, the N-by-NRHS solution matrix X.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(1,N).
+*
+*  RCOND   (output) DOUBLE PRECISION
+*          The estimate of the reciprocal condition number of the matrix
+*          A.  If RCOND is less than the machine precision (in
+*          particular, if RCOND = 0), the matrix is singular to working
+*          precision.  This condition is indicated by a return code of
+*          INFO > 0.
+*
+*  FERR    (output) DOUBLE PRECISION array, dimension (NRHS)
+*          The estimated forward error bound for each solution vector
+*          X(j) (the j-th column of the solution matrix X).
+*          If XTRUE is the true solution corresponding to X(j), FERR(j)
+*          is an estimated upper bound for the magnitude of the largest
+*          element in (X(j) - XTRUE) divided by the magnitude of the
+*          largest element in X(j).  The estimate is as reliable as
+*          the estimate for RCOND, and is almost always a slight
+*          overestimate of the true error.
+*
+*  BERR    (output) DOUBLE PRECISION array, dimension (NRHS)
+*          The componentwise relative backward error of each solution
+*          vector X(j) (i.e., the smallest relative change in
+*          any element of A or B that makes X(j) an exact solution).
+*
+*  WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The length of WORK.  LWORK >= 2*N, and for best performance
+*          LWORK >= N*NB, where NB is the optimal blocksize for
+*          ZSYTRF.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  RWORK   (workspace) DOUBLE PRECISION array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*          > 0: if INFO = i, and i is
+*                <= N:  D(i,i) is exactly zero.  The factorization
+*                       has been completed but the factor D is exactly
+*                       singular, so the solution and error bounds could
+*                       not be computed. RCOND = 0 is returned.
+*                = N+1: D is nonsingular, but RCOND is less than machine
+*                       precision, meaning that the matrix is singular
+*                       to working precision.  Nevertheless, the
+*                       solution and error bounds are computed because
+*                       there are a number of situations where the
+*                       computed solution can be more accurate than the
+*                       value of RCOND would suggest.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO
+      PARAMETER          ( ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY, NOFACT
+      INTEGER            LWKOPT, NB
+      DOUBLE PRECISION   ANORM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      DOUBLE PRECISION   DLAMCH, ZLANSY
+      EXTERNAL           LSAME, ILAENV, DLAMCH, ZLANSY
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZLACPY, ZSYCON, ZSYRFS, ZSYTRF, ZSYTRS
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      NOFACT = LSAME( FACT, 'N' )
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( .NOT.NOFACT .AND. .NOT.LSAME( FACT, 'F' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) )
+     $          THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -6
+      ELSE IF( LDAF.LT.MAX( 1, N ) ) THEN
+         INFO = -8
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -11
+      ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+         INFO = -13
+      ELSE IF( LWORK.LT.MAX( 1, 2*N ) .AND. .NOT.LQUERY ) THEN
+         INFO = -18
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         NB = ILAENV( 1, 'ZSYTRF', UPLO, N, -1, -1, -1 )
+         LWKOPT = N*NB
+         WORK( 1 ) = LWKOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZSYSVX', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+      IF( NOFACT ) THEN
+*
+*        Compute the factorization A = U*D*U' or A = L*D*L'.
+*
+         CALL ZLACPY( UPLO, N, N, A, LDA, AF, LDAF )
+         CALL ZSYTRF( UPLO, N, AF, LDAF, IPIV, WORK, LWORK, INFO )
+*
+*        Return if INFO is non-zero.
+*
+         IF( INFO.NE.0 ) THEN
+            IF( INFO.GT.0 )
+     $         RCOND = ZERO
+            RETURN
+         END IF
+      END IF
+*
+*     Compute the norm of the matrix A.
+*
+      ANORM = ZLANSY( 'I', UPLO, N, A, LDA, RWORK )
+*
+*     Compute the reciprocal of the condition number of A.
+*
+      CALL ZSYCON( UPLO, N, AF, LDAF, IPIV, ANORM, RCOND, WORK, INFO )
+*
+*     Set INFO = N+1 if the matrix is singular to working precision.
+*
+      IF( RCOND.LT.DLAMCH( 'Epsilon' ) )
+     $   INFO = N + 1
+*
+*     Compute the solution vectors X.
+*
+      CALL ZLACPY( 'Full', N, NRHS, B, LDB, X, LDX )
+      CALL ZSYTRS( UPLO, N, NRHS, AF, LDAF, IPIV, X, LDX, INFO )
+*
+*     Use iterative refinement to improve the computed solutions and
+*     compute error bounds and backward error estimates for them.
+*
+      CALL ZSYRFS( UPLO, N, NRHS, A, LDA, AF, LDAF, IPIV, B, LDB, X,
+     $             LDX, FERR, BERR, WORK, RWORK, INFO )
+*
+      WORK( 1 ) = LWKOPT
+*
+      RETURN
+*
+*     End of ZSYSVX
+*
+      END
diff --git a/costa/native/external/lapack/zsytf2.f b/costa/native/external/lapack/zsytf2.f
new file mode 100644
index 000000000..0ef31eae9
--- /dev/null
+++ b/costa/native/external/lapack/zsytf2.f
@@ -0,0 +1,515 @@
+      SUBROUTINE ZSYTF2( UPLO, N, A, LDA, IPIV, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      COMPLEX*16         A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZSYTF2 computes the factorization of a complex symmetric matrix A
+*  using the Bunch-Kaufman diagonal pivoting method:
+*
+*     A = U*D*U'  or  A = L*D*L'
+*
+*  where U (or L) is a product of permutation and unit upper (lower)
+*  triangular matrices, U' is the transpose of U, and D is symmetric and
+*  block diagonal with 1-by-1 and 2-by-2 diagonal blocks.
+*
+*  This is the unblocked version of the algorithm, calling Level 2 BLAS.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the upper or lower triangular part of the
+*          symmetric matrix A is stored:
+*          = 'U':  Upper triangular
+*          = 'L':  Lower triangular
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+*          On entry, the symmetric matrix A.  If UPLO = 'U', the leading
+*          n-by-n upper triangular part of A contains the upper
+*          triangular part of the matrix A, and the strictly lower
+*          triangular part of A is not referenced.  If UPLO = 'L', the
+*          leading n-by-n lower triangular part of A contains the lower
+*          triangular part of the matrix A, and the strictly upper
+*          triangular part of A is not referenced.
+*
+*          On exit, the block diagonal matrix D and the multipliers used
+*          to obtain the factor U or L (see below for further details).
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  IPIV    (output) INTEGER array, dimension (N)
+*          Details of the interchanges and the block structure of D.
+*          If IPIV(k) > 0, then rows and columns k and IPIV(k) were
+*          interchanged and D(k,k) is a 1-by-1 diagonal block.
+*          If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and
+*          columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k)
+*          is a 2-by-2 diagonal block.  If UPLO = 'L' and IPIV(k) =
+*          IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were
+*          interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block.
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -k, the k-th argument had an illegal value
+*          > 0: if INFO = k, D(k,k) is exactly zero.  The factorization
+*               has been completed, but the block diagonal matrix D is
+*               exactly singular, and division by zero will occur if it
+*               is used to solve a system of equations.
+*
+*  Further Details
+*  ===============
+*
+*  1-96 - Based on modifications by J. Lewis, Boeing Computer Services
+*         Company
+*
+*  If UPLO = 'U', then A = U*D*U', where
+*     U = P(n)*U(n)* ... *P(k)U(k)* ...,
+*  i.e., U is a product of terms P(k)*U(k), where k decreases from n to
+*  1 in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1
+*  and 2-by-2 diagonal blocks D(k).  P(k) is a permutation matrix as
+*  defined by IPIV(k), and U(k) is a unit upper triangular matrix, such
+*  that if the diagonal block D(k) is of order s (s = 1 or 2), then
+*
+*             (   I    v    0   )   k-s
+*     U(k) =  (   0    I    0   )   s
+*             (   0    0    I   )   n-k
+*                k-s   s   n-k
+*
+*  If s = 1, D(k) overwrites A(k,k), and v overwrites A(1:k-1,k).
+*  If s = 2, the upper triangle of D(k) overwrites A(k-1,k-1), A(k-1,k),
+*  and A(k,k), and v overwrites A(1:k-2,k-1:k).
+*
+*  If UPLO = 'L', then A = L*D*L', where
+*     L = P(1)*L(1)* ... *P(k)*L(k)* ...,
+*  i.e., L is a product of terms P(k)*L(k), where k increases from 1 to
+*  n in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1
+*  and 2-by-2 diagonal blocks D(k).  P(k) is a permutation matrix as
+*  defined by IPIV(k), and L(k) is a unit lower triangular matrix, such
+*  that if the diagonal block D(k) is of order s (s = 1 or 2), then
+*
+*             (   I    0     0   )  k-1
+*     L(k) =  (   0    I     0   )  s
+*             (   0    v     I   )  n-k-s+1
+*                k-1   s  n-k-s+1
+*
+*  If s = 1, D(k) overwrites A(k,k), and v overwrites A(k+1:n,k).
+*  If s = 2, the lower triangle of D(k) overwrites A(k,k), A(k+1,k),
+*  and A(k+1,k+1), and v overwrites A(k+2:n,k:k+1).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+      DOUBLE PRECISION   EIGHT, SEVTEN
+      PARAMETER          ( EIGHT = 8.0D+0, SEVTEN = 17.0D+0 )
+      COMPLEX*16         CONE
+      PARAMETER          ( CONE = ( 1.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            I, IMAX, J, JMAX, K, KK, KP, KSTEP
+      DOUBLE PRECISION   ABSAKK, ALPHA, COLMAX, ROWMAX
+      COMPLEX*16         D11, D12, D21, D22, R1, T, WK, WKM1, WKP1, Z
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            IZAMAX
+      EXTERNAL           LSAME, IZAMAX
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZSCAL, ZSWAP, ZSYR
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, DIMAG, MAX, SQRT
+*     ..
+*     .. Statement Functions ..
+      DOUBLE PRECISION   CABS1
+*     ..
+*     .. Statement Function definitions ..
+      CABS1( Z ) = ABS( DBLE( Z ) ) + ABS( DIMAG( Z ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZSYTF2', -INFO )
+         RETURN
+      END IF
+*
+*     Initialize ALPHA for use in choosing pivot block size.
+*
+      ALPHA = ( ONE+SQRT( SEVTEN ) ) / EIGHT
+*
+      IF( UPPER ) THEN
+*
+*        Factorize A as U*D*U' using the upper triangle of A
+*
+*        K is the main loop index, decreasing from N to 1 in steps of
+*        1 or 2
+*
+         K = N
+   10    CONTINUE
+*
+*        If K < 1, exit from loop
+*
+         IF( K.LT.1 )
+     $      GO TO 70
+         KSTEP = 1
+*
+*        Determine rows and columns to be interchanged and whether
+*        a 1-by-1 or 2-by-2 pivot block will be used
+*
+         ABSAKK = CABS1( A( K, K ) )
+*
+*        IMAX is the row-index of the largest off-diagonal element in
+*        column K, and COLMAX is its absolute value
+*
+         IF( K.GT.1 ) THEN
+            IMAX = IZAMAX( K-1, A( 1, K ), 1 )
+            COLMAX = CABS1( A( IMAX, K ) )
+         ELSE
+            COLMAX = ZERO
+         END IF
+*
+         IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN
+*
+*           Column K is zero: set INFO and continue
+*
+            IF( INFO.EQ.0 )
+     $         INFO = K
+            KP = K
+         ELSE
+            IF( ABSAKK.GE.ALPHA*COLMAX ) THEN
+*
+*              no interchange, use 1-by-1 pivot block
+*
+               KP = K
+            ELSE
+*
+*              JMAX is the column-index of the largest off-diagonal
+*              element in row IMAX, and ROWMAX is its absolute value
+*
+               JMAX = IMAX + IZAMAX( K-IMAX, A( IMAX, IMAX+1 ), LDA )
+               ROWMAX = CABS1( A( IMAX, JMAX ) )
+               IF( IMAX.GT.1 ) THEN
+                  JMAX = IZAMAX( IMAX-1, A( 1, IMAX ), 1 )
+                  ROWMAX = MAX( ROWMAX, CABS1( A( JMAX, IMAX ) ) )
+               END IF
+*
+               IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN
+*
+*                 no interchange, use 1-by-1 pivot block
+*
+                  KP = K
+               ELSE IF( CABS1( A( IMAX, IMAX ) ).GE.ALPHA*ROWMAX ) THEN
+*
+*                 interchange rows and columns K and IMAX, use 1-by-1
+*                 pivot block
+*
+                  KP = IMAX
+               ELSE
+*
+*                 interchange rows and columns K-1 and IMAX, use 2-by-2
+*                 pivot block
+*
+                  KP = IMAX
+                  KSTEP = 2
+               END IF
+            END IF
+*
+            KK = K - KSTEP + 1
+            IF( KP.NE.KK ) THEN
+*
+*              Interchange rows and columns KK and KP in the leading
+*              submatrix A(1:k,1:k)
+*
+               CALL ZSWAP( KP-1, A( 1, KK ), 1, A( 1, KP ), 1 )
+               CALL ZSWAP( KK-KP-1, A( KP+1, KK ), 1, A( KP, KP+1 ),
+     $                     LDA )
+               T = A( KK, KK )
+               A( KK, KK ) = A( KP, KP )
+               A( KP, KP ) = T
+               IF( KSTEP.EQ.2 ) THEN
+                  T = A( K-1, K )
+                  A( K-1, K ) = A( KP, K )
+                  A( KP, K ) = T
+               END IF
+            END IF
+*
+*           Update the leading submatrix
+*
+            IF( KSTEP.EQ.1 ) THEN
+*
+*              1-by-1 pivot block D(k): column k now holds
+*
+*              W(k) = U(k)*D(k)
+*
+*              where U(k) is the k-th column of U
+*
+*              Perform a rank-1 update of A(1:k-1,1:k-1) as
+*
+*              A := A - U(k)*D(k)*U(k)' = A - W(k)*1/D(k)*W(k)'
+*
+               R1 = CONE / A( K, K )
+               CALL ZSYR( UPLO, K-1, -R1, A( 1, K ), 1, A, LDA )
+*
+*              Store U(k) in column k
+*
+               CALL ZSCAL( K-1, R1, A( 1, K ), 1 )
+            ELSE
+*
+*              2-by-2 pivot block D(k): columns k and k-1 now hold
+*
+*              ( W(k-1) W(k) ) = ( U(k-1) U(k) )*D(k)
+*
+*              where U(k) and U(k-1) are the k-th and (k-1)-th columns
+*              of U
+*
+*              Perform a rank-2 update of A(1:k-2,1:k-2) as
+*
+*              A := A - ( U(k-1) U(k) )*D(k)*( U(k-1) U(k) )'
+*                 = A - ( W(k-1) W(k) )*inv(D(k))*( W(k-1) W(k) )'
+*
+               IF( K.GT.2 ) THEN
+*
+                  D12 = A( K-1, K )
+                  D22 = A( K-1, K-1 ) / D12
+                  D11 = A( K, K ) / D12
+                  T = CONE / ( D11*D22-CONE )
+                  D12 = T / D12
+*
+                  DO 30 J = K - 2, 1, -1
+                     WKM1 = D12*( D11*A( J, K-1 )-A( J, K ) )
+                     WK = D12*( D22*A( J, K )-A( J, K-1 ) )
+                     DO 20 I = J, 1, -1
+                        A( I, J ) = A( I, J ) - A( I, K )*WK -
+     $                              A( I, K-1 )*WKM1
+   20                CONTINUE
+                     A( J, K ) = WK
+                     A( J, K-1 ) = WKM1
+   30             CONTINUE
+*
+               END IF
+*
+            END IF
+         END IF
+*
+*        Store details of the interchanges in IPIV
+*
+         IF( KSTEP.EQ.1 ) THEN
+            IPIV( K ) = KP
+         ELSE
+            IPIV( K ) = -KP
+            IPIV( K-1 ) = -KP
+         END IF
+*
+*        Decrease K and return to the start of the main loop
+*
+         K = K - KSTEP
+         GO TO 10
+*
+      ELSE
+*
+*        Factorize A as L*D*L' using the lower triangle of A
+*
+*        K is the main loop index, increasing from 1 to N in steps of
+*        1 or 2
+*
+         K = 1
+   40    CONTINUE
+*
+*        If K > N, exit from loop
+*
+         IF( K.GT.N )
+     $      GO TO 70
+         KSTEP = 1
+*
+*        Determine rows and columns to be interchanged and whether
+*        a 1-by-1 or 2-by-2 pivot block will be used
+*
+         ABSAKK = CABS1( A( K, K ) )
+*
+*        IMAX is the row-index of the largest off-diagonal element in
+*        column K, and COLMAX is its absolute value
+*
+         IF( K.LT.N ) THEN
+            IMAX = K + IZAMAX( N-K, A( K+1, K ), 1 )
+            COLMAX = CABS1( A( IMAX, K ) )
+         ELSE
+            COLMAX = ZERO
+         END IF
+*
+         IF( MAX( ABSAKK, COLMAX ).EQ.ZERO ) THEN
+*
+*           Column K is zero: set INFO and continue
+*
+            IF( INFO.EQ.0 )
+     $         INFO = K
+            KP = K
+         ELSE
+            IF( ABSAKK.GE.ALPHA*COLMAX ) THEN
+*
+*              no interchange, use 1-by-1 pivot block
+*
+               KP = K
+            ELSE
+*
+*              JMAX is the column-index of the largest off-diagonal
+*              element in row IMAX, and ROWMAX is its absolute value
+*
+               JMAX = K - 1 + IZAMAX( IMAX-K, A( IMAX, K ), LDA )
+               ROWMAX = CABS1( A( IMAX, JMAX ) )
+               IF( IMAX.LT.N ) THEN
+                  JMAX = IMAX + IZAMAX( N-IMAX, A( IMAX+1, IMAX ), 1 )
+                  ROWMAX = MAX( ROWMAX, CABS1( A( JMAX, IMAX ) ) )
+               END IF
+*
+               IF( ABSAKK.GE.ALPHA*COLMAX*( COLMAX / ROWMAX ) ) THEN
+*
+*                 no interchange, use 1-by-1 pivot block
+*
+                  KP = K
+               ELSE IF( CABS1( A( IMAX, IMAX ) ).GE.ALPHA*ROWMAX ) THEN
+*
+*                 interchange rows and columns K and IMAX, use 1-by-1
+*                 pivot block
+*
+                  KP = IMAX
+               ELSE
+*
+*                 interchange rows and columns K+1 and IMAX, use 2-by-2
+*                 pivot block
+*
+                  KP = IMAX
+                  KSTEP = 2
+               END IF
+            END IF
+*
+            KK = K + KSTEP - 1
+            IF( KP.NE.KK ) THEN
+*
+*              Interchange rows and columns KK and KP in the trailing
+*              submatrix A(k:n,k:n)
+*
+               IF( KP.LT.N )
+     $            CALL ZSWAP( N-KP, A( KP+1, KK ), 1, A( KP+1, KP ), 1 )
+               CALL ZSWAP( KP-KK-1, A( KK+1, KK ), 1, A( KP, KK+1 ),
+     $                     LDA )
+               T = A( KK, KK )
+               A( KK, KK ) = A( KP, KP )
+               A( KP, KP ) = T
+               IF( KSTEP.EQ.2 ) THEN
+                  T = A( K+1, K )
+                  A( K+1, K ) = A( KP, K )
+                  A( KP, K ) = T
+               END IF
+            END IF
+*
+*           Update the trailing submatrix
+*
+            IF( KSTEP.EQ.1 ) THEN
+*
+*              1-by-1 pivot block D(k): column k now holds
+*
+*              W(k) = L(k)*D(k)
+*
+*              where L(k) is the k-th column of L
+*
+               IF( K.LT.N ) THEN
+*
+*                 Perform a rank-1 update of A(k+1:n,k+1:n) as
+*
+*                 A := A - L(k)*D(k)*L(k)' = A - W(k)*(1/D(k))*W(k)'
+*
+                  R1 = CONE / A( K, K )
+                  CALL ZSYR( UPLO, N-K, -R1, A( K+1, K ), 1,
+     $                       A( K+1, K+1 ), LDA )
+*
+*                 Store L(k) in column K
+*
+                  CALL ZSCAL( N-K, R1, A( K+1, K ), 1 )
+               END IF
+            ELSE
+*
+*              2-by-2 pivot block D(k)
+*
+               IF( K.LT.N-1 ) THEN
+*
+*                 Perform a rank-2 update of A(k+2:n,k+2:n) as
+*
+*                 A := A - ( L(k) L(k+1) )*D(k)*( L(k) L(k+1) )'
+*                    = A - ( W(k) W(k+1) )*inv(D(k))*( W(k) W(k+1) )'
+*
+*                 where L(k) and L(k+1) are the k-th and (k+1)-th
+*                 columns of L
+*
+                  D21 = A( K+1, K )
+                  D11 = A( K+1, K+1 ) / D21
+                  D22 = A( K, K ) / D21
+                  T = CONE / ( D11*D22-CONE )
+                  D21 = T / D21
+*
+                  DO 60 J = K + 2, N
+                     WK = D21*( D11*A( J, K )-A( J, K+1 ) )
+                     WKP1 = D21*( D22*A( J, K+1 )-A( J, K ) )
+                     DO 50 I = J, N
+                        A( I, J ) = A( I, J ) - A( I, K )*WK -
+     $                              A( I, K+1 )*WKP1
+   50                CONTINUE
+                     A( J, K ) = WK
+                     A( J, K+1 ) = WKP1
+   60             CONTINUE
+               END IF
+            END IF
+         END IF
+*
+*        Store details of the interchanges in IPIV
+*
+         IF( KSTEP.EQ.1 ) THEN
+            IPIV( K ) = KP
+         ELSE
+            IPIV( K ) = -KP
+            IPIV( K+1 ) = -KP
+         END IF
+*
+*        Increase K and return to the start of the main loop
+*
+         K = K + KSTEP
+         GO TO 40
+*
+      END IF
+*
+   70 CONTINUE
+      RETURN
+*
+*     End of ZSYTF2
+*
+      END
diff --git a/costa/native/external/lapack/zsytrf.f b/costa/native/external/lapack/zsytrf.f
new file mode 100644
index 000000000..bb2fc730c
--- /dev/null
+++ b/costa/native/external/lapack/zsytrf.f
@@ -0,0 +1,287 @@
+      SUBROUTINE ZSYTRF( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, LWORK, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      COMPLEX*16         A( LDA, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZSYTRF computes the factorization of a complex symmetric matrix A
+*  using the Bunch-Kaufman diagonal pivoting method.  The form of the
+*  factorization is
+*
+*     A = U*D*U**T  or  A = L*D*L**T
+*
+*  where U (or L) is a product of permutation and unit upper (lower)
+*  triangular matrices, and D is symmetric and block diagonal with
+*  with 1-by-1 and 2-by-2 diagonal blocks.
+*
+*  This is the blocked version of the algorithm, calling Level 3 BLAS.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  Upper triangle of A is stored;
+*          = 'L':  Lower triangle of A is stored.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+*          On entry, the symmetric matrix A.  If UPLO = 'U', the leading
+*          N-by-N upper triangular part of A contains the upper
+*          triangular part of the matrix A, and the strictly lower
+*          triangular part of A is not referenced.  If UPLO = 'L', the
+*          leading N-by-N lower triangular part of A contains the lower
+*          triangular part of the matrix A, and the strictly upper
+*          triangular part of A is not referenced.
+*
+*          On exit, the block diagonal matrix D and the multipliers used
+*          to obtain the factor U or L (see below for further details).
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  IPIV    (output) INTEGER array, dimension (N)
+*          Details of the interchanges and the block structure of D.
+*          If IPIV(k) > 0, then rows and columns k and IPIV(k) were
+*          interchanged and D(k,k) is a 1-by-1 diagonal block.
+*          If UPLO = 'U' and IPIV(k) = IPIV(k-1) < 0, then rows and
+*          columns k-1 and -IPIV(k) were interchanged and D(k-1:k,k-1:k)
+*          is a 2-by-2 diagonal block.  If UPLO = 'L' and IPIV(k) =
+*          IPIV(k+1) < 0, then rows and columns k+1 and -IPIV(k) were
+*          interchanged and D(k:k+1,k:k+1) is a 2-by-2 diagonal block.
+*
+*  WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The length of WORK.  LWORK >=1.  For best performance
+*          LWORK >= N*NB, where NB is the block size returned by ILAENV.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, D(i,i) is exactly zero.  The factorization
+*                has been completed, but the block diagonal matrix D is
+*                exactly singular, and division by zero will occur if it
+*                is used to solve a system of equations.
+*
+*  Further Details
+*  ===============
+*
+*  If UPLO = 'U', then A = U*D*U', where
+*     U = P(n)*U(n)* ... *P(k)U(k)* ...,
+*  i.e., U is a product of terms P(k)*U(k), where k decreases from n to
+*  1 in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1
+*  and 2-by-2 diagonal blocks D(k).  P(k) is a permutation matrix as
+*  defined by IPIV(k), and U(k) is a unit upper triangular matrix, such
+*  that if the diagonal block D(k) is of order s (s = 1 or 2), then
+*
+*             (   I    v    0   )   k-s
+*     U(k) =  (   0    I    0   )   s
+*             (   0    0    I   )   n-k
+*                k-s   s   n-k
+*
+*  If s = 1, D(k) overwrites A(k,k), and v overwrites A(1:k-1,k).
+*  If s = 2, the upper triangle of D(k) overwrites A(k-1,k-1), A(k-1,k),
+*  and A(k,k), and v overwrites A(1:k-2,k-1:k).
+*
+*  If UPLO = 'L', then A = L*D*L', where
+*     L = P(1)*L(1)* ... *P(k)*L(k)* ...,
+*  i.e., L is a product of terms P(k)*L(k), where k increases from 1 to
+*  n in steps of 1 or 2, and D is a block diagonal matrix with 1-by-1
+*  and 2-by-2 diagonal blocks D(k).  P(k) is a permutation matrix as
+*  defined by IPIV(k), and L(k) is a unit lower triangular matrix, such
+*  that if the diagonal block D(k) is of order s (s = 1 or 2), then
+*
+*             (   I    0     0   )  k-1
+*     L(k) =  (   0    I     0   )  s
+*             (   0    v     I   )  n-k-s+1
+*                k-1   s  n-k-s+1
+*
+*  If s = 1, D(k) overwrites A(k,k), and v overwrites A(k+1:n,k).
+*  If s = 2, the lower triangle of D(k) overwrites A(k,k), A(k+1,k),
+*  and A(k+1,k+1), and v overwrites A(k+2:n,k:k+1).
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            LQUERY, UPPER
+      INTEGER            IINFO, IWS, J, K, KB, LDWORK, LWKOPT, NB, NBMIN
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           LSAME, ILAENV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZLASYF, ZSYTF2
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -4
+      ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN
+         INFO = -7
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+*
+*        Determine the block size
+*
+         NB = ILAENV( 1, 'ZSYTRF', UPLO, N, -1, -1, -1 )
+         LWKOPT = N*NB
+         WORK( 1 ) = LWKOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZSYTRF', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+      NBMIN = 2
+      LDWORK = N
+      IF( NB.GT.1 .AND. NB.LT.N ) THEN
+         IWS = LDWORK*NB
+         IF( LWORK.LT.IWS ) THEN
+            NB = MAX( LWORK / LDWORK, 1 )
+            NBMIN = MAX( 2, ILAENV( 2, 'ZSYTRF', UPLO, N, -1, -1, -1 ) )
+         END IF
+      ELSE
+         IWS = 1
+      END IF
+      IF( NB.LT.NBMIN )
+     $   NB = N
+*
+      IF( UPPER ) THEN
+*
+*        Factorize A as U*D*U' using the upper triangle of A
+*
+*        K is the main loop index, decreasing from N to 1 in steps of
+*        KB, where KB is the number of columns factorized by ZLASYF;
+*        KB is either NB or NB-1, or K for the last block
+*
+         K = N
+   10    CONTINUE
+*
+*        If K < 1, exit from loop
+*
+         IF( K.LT.1 )
+     $      GO TO 40
+*
+         IF( K.GT.NB ) THEN
+*
+*           Factorize columns k-kb+1:k of A and use blocked code to
+*           update columns 1:k-kb
+*
+            CALL ZLASYF( UPLO, K, NB, KB, A, LDA, IPIV, WORK, N, IINFO )
+         ELSE
+*
+*           Use unblocked code to factorize columns 1:k of A
+*
+            CALL ZSYTF2( UPLO, K, A, LDA, IPIV, IINFO )
+            KB = K
+         END IF
+*
+*        Set INFO on the first occurrence of a zero pivot
+*
+         IF( INFO.EQ.0 .AND. IINFO.GT.0 )
+     $      INFO = IINFO
+*
+*        Decrease K and return to the start of the main loop
+*
+         K = K - KB
+         GO TO 10
+*
+      ELSE
+*
+*        Factorize A as L*D*L' using the lower triangle of A
+*
+*        K is the main loop index, increasing from 1 to N in steps of
+*        KB, where KB is the number of columns factorized by ZLASYF;
+*        KB is either NB or NB-1, or N-K+1 for the last block
+*
+         K = 1
+   20    CONTINUE
+*
+*        If K > N, exit from loop
+*
+         IF( K.GT.N )
+     $      GO TO 40
+*
+         IF( K.LE.N-NB ) THEN
+*
+*           Factorize columns k:k+kb-1 of A and use blocked code to
+*           update columns k+kb:n
+*
+            CALL ZLASYF( UPLO, N-K+1, NB, KB, A( K, K ), LDA, IPIV( K ),
+     $                   WORK, N, IINFO )
+         ELSE
+*
+*           Use unblocked code to factorize columns k:n of A
+*
+            CALL ZSYTF2( UPLO, N-K+1, A( K, K ), LDA, IPIV( K ), IINFO )
+            KB = N - K + 1
+         END IF
+*
+*        Set INFO on the first occurrence of a zero pivot
+*
+         IF( INFO.EQ.0 .AND. IINFO.GT.0 )
+     $      INFO = IINFO + K - 1
+*
+*        Adjust IPIV
+*
+         DO 30 J = K, K + KB - 1
+            IF( IPIV( J ).GT.0 ) THEN
+               IPIV( J ) = IPIV( J ) + K - 1
+            ELSE
+               IPIV( J ) = IPIV( J ) - K + 1
+            END IF
+   30    CONTINUE
+*
+*        Increase K and return to the start of the main loop
+*
+         K = K + KB
+         GO TO 20
+*
+      END IF
+*
+   40 CONTINUE
+      WORK( 1 ) = LWKOPT
+      RETURN
+*
+*     End of ZSYTRF
+*
+      END
diff --git a/costa/native/external/lapack/zsytri.f b/costa/native/external/lapack/zsytri.f
new file mode 100644
index 000000000..4fca2a71f
--- /dev/null
+++ b/costa/native/external/lapack/zsytri.f
@@ -0,0 +1,314 @@
+      SUBROUTINE ZSYTRI( UPLO, N, A, LDA, IPIV, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, N
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      COMPLEX*16         A( LDA, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZSYTRI computes the inverse of a complex symmetric indefinite matrix
+*  A using the factorization A = U*D*U**T or A = L*D*L**T computed by
+*  ZSYTRF.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the details of the factorization are stored
+*          as an upper or lower triangular matrix.
+*          = 'U':  Upper triangular, form is A = U*D*U**T;
+*          = 'L':  Lower triangular, form is A = L*D*L**T.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+*          On entry, the block diagonal matrix D and the multipliers
+*          used to obtain the factor U or L as computed by ZSYTRF.
+*
+*          On exit, if INFO = 0, the (symmetric) inverse of the original
+*          matrix.  If UPLO = 'U', the upper triangular part of the
+*          inverse is formed and the part of A below the diagonal is not
+*          referenced; if UPLO = 'L' the lower triangular part of the
+*          inverse is formed and the part of A above the diagonal is
+*          not referenced.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  IPIV    (input) INTEGER array, dimension (N)
+*          Details of the interchanges and the block structure of D
+*          as determined by ZSYTRF.
+*
+*  WORK    (workspace) COMPLEX*16 array, dimension (2*N)
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*          > 0: if INFO = i, D(i,i) = 0; the matrix is singular and its
+*               inverse could not be computed.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX*16         ONE, ZERO
+      PARAMETER          ( ONE = ( 1.0D+0, 0.0D+0 ),
+     $                   ZERO = ( 0.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            K, KP, KSTEP
+      COMPLEX*16         AK, AKKP1, AKP1, D, T, TEMP
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      COMPLEX*16         ZDOTU
+      EXTERNAL           LSAME, ZDOTU
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZCOPY, ZSWAP, ZSYMV
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZSYTRI', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Check that the diagonal matrix D is nonsingular.
+*
+      IF( UPPER ) THEN
+*
+*        Upper triangular storage: examine D from bottom to top
+*
+         DO 10 INFO = N, 1, -1
+            IF( IPIV( INFO ).GT.0 .AND. A( INFO, INFO ).EQ.ZERO )
+     $         RETURN
+   10    CONTINUE
+      ELSE
+*
+*        Lower triangular storage: examine D from top to bottom.
+*
+         DO 20 INFO = 1, N
+            IF( IPIV( INFO ).GT.0 .AND. A( INFO, INFO ).EQ.ZERO )
+     $         RETURN
+   20    CONTINUE
+      END IF
+      INFO = 0
+*
+      IF( UPPER ) THEN
+*
+*        Compute inv(A) from the factorization A = U*D*U'.
+*
+*        K is the main loop index, increasing from 1 to N in steps of
+*        1 or 2, depending on the size of the diagonal blocks.
+*
+         K = 1
+   30    CONTINUE
+*
+*        If K > N, exit from loop.
+*
+         IF( K.GT.N )
+     $      GO TO 40
+*
+         IF( IPIV( K ).GT.0 ) THEN
+*
+*           1 x 1 diagonal block
+*
+*           Invert the diagonal block.
+*
+            A( K, K ) = ONE / A( K, K )
+*
+*           Compute column K of the inverse.
+*
+            IF( K.GT.1 ) THEN
+               CALL ZCOPY( K-1, A( 1, K ), 1, WORK, 1 )
+               CALL ZSYMV( UPLO, K-1, -ONE, A, LDA, WORK, 1, ZERO,
+     $                     A( 1, K ), 1 )
+               A( K, K ) = A( K, K ) - ZDOTU( K-1, WORK, 1, A( 1, K ),
+     $                     1 )
+            END IF
+            KSTEP = 1
+         ELSE
+*
+*           2 x 2 diagonal block
+*
+*           Invert the diagonal block.
+*
+            T = A( K, K+1 )
+            AK = A( K, K ) / T
+            AKP1 = A( K+1, K+1 ) / T
+            AKKP1 = A( K, K+1 ) / T
+            D = T*( AK*AKP1-ONE )
+            A( K, K ) = AKP1 / D
+            A( K+1, K+1 ) = AK / D
+            A( K, K+1 ) = -AKKP1 / D
+*
+*           Compute columns K and K+1 of the inverse.
+*
+            IF( K.GT.1 ) THEN
+               CALL ZCOPY( K-1, A( 1, K ), 1, WORK, 1 )
+               CALL ZSYMV( UPLO, K-1, -ONE, A, LDA, WORK, 1, ZERO,
+     $                     A( 1, K ), 1 )
+               A( K, K ) = A( K, K ) - ZDOTU( K-1, WORK, 1, A( 1, K ),
+     $                     1 )
+               A( K, K+1 ) = A( K, K+1 ) -
+     $                       ZDOTU( K-1, A( 1, K ), 1, A( 1, K+1 ), 1 )
+               CALL ZCOPY( K-1, A( 1, K+1 ), 1, WORK, 1 )
+               CALL ZSYMV( UPLO, K-1, -ONE, A, LDA, WORK, 1, ZERO,
+     $                     A( 1, K+1 ), 1 )
+               A( K+1, K+1 ) = A( K+1, K+1 ) -
+     $                         ZDOTU( K-1, WORK, 1, A( 1, K+1 ), 1 )
+            END IF
+            KSTEP = 2
+         END IF
+*
+         KP = ABS( IPIV( K ) )
+         IF( KP.NE.K ) THEN
+*
+*           Interchange rows and columns K and KP in the leading
+*           submatrix A(1:k+1,1:k+1)
+*
+            CALL ZSWAP( KP-1, A( 1, K ), 1, A( 1, KP ), 1 )
+            CALL ZSWAP( K-KP-1, A( KP+1, K ), 1, A( KP, KP+1 ), LDA )
+            TEMP = A( K, K )
+            A( K, K ) = A( KP, KP )
+            A( KP, KP ) = TEMP
+            IF( KSTEP.EQ.2 ) THEN
+               TEMP = A( K, K+1 )
+               A( K, K+1 ) = A( KP, K+1 )
+               A( KP, K+1 ) = TEMP
+            END IF
+         END IF
+*
+         K = K + KSTEP
+         GO TO 30
+   40    CONTINUE
+*
+      ELSE
+*
+*        Compute inv(A) from the factorization A = L*D*L'.
+*
+*        K is the main loop index, increasing from 1 to N in steps of
+*        1 or 2, depending on the size of the diagonal blocks.
+*
+         K = N
+   50    CONTINUE
+*
+*        If K < 1, exit from loop.
+*
+         IF( K.LT.1 )
+     $      GO TO 60
+*
+         IF( IPIV( K ).GT.0 ) THEN
+*
+*           1 x 1 diagonal block
+*
+*           Invert the diagonal block.
+*
+            A( K, K ) = ONE / A( K, K )
+*
+*           Compute column K of the inverse.
+*
+            IF( K.LT.N ) THEN
+               CALL ZCOPY( N-K, A( K+1, K ), 1, WORK, 1 )
+               CALL ZSYMV( UPLO, N-K, -ONE, A( K+1, K+1 ), LDA, WORK, 1,
+     $                     ZERO, A( K+1, K ), 1 )
+               A( K, K ) = A( K, K ) - ZDOTU( N-K, WORK, 1, A( K+1, K ),
+     $                     1 )
+            END IF
+            KSTEP = 1
+         ELSE
+*
+*           2 x 2 diagonal block
+*
+*           Invert the diagonal block.
+*
+            T = A( K, K-1 )
+            AK = A( K-1, K-1 ) / T
+            AKP1 = A( K, K ) / T
+            AKKP1 = A( K, K-1 ) / T
+            D = T*( AK*AKP1-ONE )
+            A( K-1, K-1 ) = AKP1 / D
+            A( K, K ) = AK / D
+            A( K, K-1 ) = -AKKP1 / D
+*
+*           Compute columns K-1 and K of the inverse.
+*
+            IF( K.LT.N ) THEN
+               CALL ZCOPY( N-K, A( K+1, K ), 1, WORK, 1 )
+               CALL ZSYMV( UPLO, N-K, -ONE, A( K+1, K+1 ), LDA, WORK, 1,
+     $                     ZERO, A( K+1, K ), 1 )
+               A( K, K ) = A( K, K ) - ZDOTU( N-K, WORK, 1, A( K+1, K ),
+     $                     1 )
+               A( K, K-1 ) = A( K, K-1 ) -
+     $                       ZDOTU( N-K, A( K+1, K ), 1, A( K+1, K-1 ),
+     $                       1 )
+               CALL ZCOPY( N-K, A( K+1, K-1 ), 1, WORK, 1 )
+               CALL ZSYMV( UPLO, N-K, -ONE, A( K+1, K+1 ), LDA, WORK, 1,
+     $                     ZERO, A( K+1, K-1 ), 1 )
+               A( K-1, K-1 ) = A( K-1, K-1 ) -
+     $                         ZDOTU( N-K, WORK, 1, A( K+1, K-1 ), 1 )
+            END IF
+            KSTEP = 2
+         END IF
+*
+         KP = ABS( IPIV( K ) )
+         IF( KP.NE.K ) THEN
+*
+*           Interchange rows and columns K and KP in the trailing
+*           submatrix A(k-1:n,k-1:n)
+*
+            IF( KP.LT.N )
+     $         CALL ZSWAP( N-KP, A( KP+1, K ), 1, A( KP+1, KP ), 1 )
+            CALL ZSWAP( KP-K-1, A( K+1, K ), 1, A( KP, K+1 ), LDA )
+            TEMP = A( K, K )
+            A( K, K ) = A( KP, KP )
+            A( KP, KP ) = TEMP
+            IF( KSTEP.EQ.2 ) THEN
+               TEMP = A( K, K-1 )
+               A( K, K-1 ) = A( KP, K-1 )
+               A( KP, K-1 ) = TEMP
+            END IF
+         END IF
+*
+         K = K - KSTEP
+         GO TO 50
+   60    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of ZSYTRI
+*
+      END
diff --git a/costa/native/external/lapack/zsytrs.f b/costa/native/external/lapack/zsytrs.f
new file mode 100644
index 000000000..b0cc85938
--- /dev/null
+++ b/costa/native/external/lapack/zsytrs.f
@@ -0,0 +1,370 @@
+      SUBROUTINE ZSYTRS( UPLO, N, NRHS, A, LDA, IPIV, B, LDB, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, LDB, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IPIV( * )
+      COMPLEX*16         A( LDA, * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZSYTRS solves a system of linear equations A*X = B with a complex
+*  symmetric matrix A using the factorization A = U*D*U**T or
+*  A = L*D*L**T computed by ZSYTRF.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the details of the factorization are stored
+*          as an upper or lower triangular matrix.
+*          = 'U':  Upper triangular, form is A = U*D*U**T;
+*          = 'L':  Lower triangular, form is A = L*D*L**T.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  A       (input) COMPLEX*16 array, dimension (LDA,N)
+*          The block diagonal matrix D and the multipliers used to
+*          obtain the factor U or L as computed by ZSYTRF.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  IPIV    (input) INTEGER array, dimension (N)
+*          Details of the interchanges and the block structure of D
+*          as determined by ZSYTRF.
+*
+*  B       (input/output) COMPLEX*16 array, dimension (LDB,NRHS)
+*          On entry, the right hand side matrix B.
+*          On exit, the solution matrix X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX*16         ONE
+      PARAMETER          ( ONE = ( 1.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            J, K, KP
+      COMPLEX*16         AK, AKM1, AKM1K, BK, BKM1, DENOM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZGEMV, ZGERU, ZSCAL, ZSWAP
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -8
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZSYTRS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 )
+     $   RETURN
+*
+      IF( UPPER ) THEN
+*
+*        Solve A*X = B, where A = U*D*U'.
+*
+*        First solve U*D*X = B, overwriting B with X.
+*
+*        K is the main loop index, decreasing from N to 1 in steps of
+*        1 or 2, depending on the size of the diagonal blocks.
+*
+         K = N
+   10    CONTINUE
+*
+*        If K < 1, exit from loop.
+*
+         IF( K.LT.1 )
+     $      GO TO 30
+*
+         IF( IPIV( K ).GT.0 ) THEN
+*
+*           1 x 1 diagonal block
+*
+*           Interchange rows K and IPIV(K).
+*
+            KP = IPIV( K )
+            IF( KP.NE.K )
+     $         CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB )
+*
+*           Multiply by inv(U(K)), where U(K) is the transformation
+*           stored in column K of A.
+*
+            CALL ZGERU( K-1, NRHS, -ONE, A( 1, K ), 1, B( K, 1 ), LDB,
+     $                  B( 1, 1 ), LDB )
+*
+*           Multiply by the inverse of the diagonal block.
+*
+            CALL ZSCAL( NRHS, ONE / A( K, K ), B( K, 1 ), LDB )
+            K = K - 1
+         ELSE
+*
+*           2 x 2 diagonal block
+*
+*           Interchange rows K-1 and -IPIV(K).
+*
+            KP = -IPIV( K )
+            IF( KP.NE.K-1 )
+     $         CALL ZSWAP( NRHS, B( K-1, 1 ), LDB, B( KP, 1 ), LDB )
+*
+*           Multiply by inv(U(K)), where U(K) is the transformation
+*           stored in columns K-1 and K of A.
+*
+            CALL ZGERU( K-2, NRHS, -ONE, A( 1, K ), 1, B( K, 1 ), LDB,
+     $                  B( 1, 1 ), LDB )
+            CALL ZGERU( K-2, NRHS, -ONE, A( 1, K-1 ), 1, B( K-1, 1 ),
+     $                  LDB, B( 1, 1 ), LDB )
+*
+*           Multiply by the inverse of the diagonal block.
+*
+            AKM1K = A( K-1, K )
+            AKM1 = A( K-1, K-1 ) / AKM1K
+            AK = A( K, K ) / AKM1K
+            DENOM = AKM1*AK - ONE
+            DO 20 J = 1, NRHS
+               BKM1 = B( K-1, J ) / AKM1K
+               BK = B( K, J ) / AKM1K
+               B( K-1, J ) = ( AK*BKM1-BK ) / DENOM
+               B( K, J ) = ( AKM1*BK-BKM1 ) / DENOM
+   20       CONTINUE
+            K = K - 2
+         END IF
+*
+         GO TO 10
+   30    CONTINUE
+*
+*        Next solve U'*X = B, overwriting B with X.
+*
+*        K is the main loop index, increasing from 1 to N in steps of
+*        1 or 2, depending on the size of the diagonal blocks.
+*
+         K = 1
+   40    CONTINUE
+*
+*        If K > N, exit from loop.
+*
+         IF( K.GT.N )
+     $      GO TO 50
+*
+         IF( IPIV( K ).GT.0 ) THEN
+*
+*           1 x 1 diagonal block
+*
+*           Multiply by inv(U'(K)), where U(K) is the transformation
+*           stored in column K of A.
+*
+            CALL ZGEMV( 'Transpose', K-1, NRHS, -ONE, B, LDB, A( 1, K ),
+     $                  1, ONE, B( K, 1 ), LDB )
+*
+*           Interchange rows K and IPIV(K).
+*
+            KP = IPIV( K )
+            IF( KP.NE.K )
+     $         CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB )
+            K = K + 1
+         ELSE
+*
+*           2 x 2 diagonal block
+*
+*           Multiply by inv(U'(K+1)), where U(K+1) is the transformation
+*           stored in columns K and K+1 of A.
+*
+            CALL ZGEMV( 'Transpose', K-1, NRHS, -ONE, B, LDB, A( 1, K ),
+     $                  1, ONE, B( K, 1 ), LDB )
+            CALL ZGEMV( 'Transpose', K-1, NRHS, -ONE, B, LDB,
+     $                  A( 1, K+1 ), 1, ONE, B( K+1, 1 ), LDB )
+*
+*           Interchange rows K and -IPIV(K).
+*
+            KP = -IPIV( K )
+            IF( KP.NE.K )
+     $         CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB )
+            K = K + 2
+         END IF
+*
+         GO TO 40
+   50    CONTINUE
+*
+      ELSE
+*
+*        Solve A*X = B, where A = L*D*L'.
+*
+*        First solve L*D*X = B, overwriting B with X.
+*
+*        K is the main loop index, increasing from 1 to N in steps of
+*        1 or 2, depending on the size of the diagonal blocks.
+*
+         K = 1
+   60    CONTINUE
+*
+*        If K > N, exit from loop.
+*
+         IF( K.GT.N )
+     $      GO TO 80
+*
+         IF( IPIV( K ).GT.0 ) THEN
+*
+*           1 x 1 diagonal block
+*
+*           Interchange rows K and IPIV(K).
+*
+            KP = IPIV( K )
+            IF( KP.NE.K )
+     $         CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB )
+*
+*           Multiply by inv(L(K)), where L(K) is the transformation
+*           stored in column K of A.
+*
+            IF( K.LT.N )
+     $         CALL ZGERU( N-K, NRHS, -ONE, A( K+1, K ), 1, B( K, 1 ),
+     $                     LDB, B( K+1, 1 ), LDB )
+*
+*           Multiply by the inverse of the diagonal block.
+*
+            CALL ZSCAL( NRHS, ONE / A( K, K ), B( K, 1 ), LDB )
+            K = K + 1
+         ELSE
+*
+*           2 x 2 diagonal block
+*
+*           Interchange rows K+1 and -IPIV(K).
+*
+            KP = -IPIV( K )
+            IF( KP.NE.K+1 )
+     $         CALL ZSWAP( NRHS, B( K+1, 1 ), LDB, B( KP, 1 ), LDB )
+*
+*           Multiply by inv(L(K)), where L(K) is the transformation
+*           stored in columns K and K+1 of A.
+*
+            IF( K.LT.N-1 ) THEN
+               CALL ZGERU( N-K-1, NRHS, -ONE, A( K+2, K ), 1, B( K, 1 ),
+     $                     LDB, B( K+2, 1 ), LDB )
+               CALL ZGERU( N-K-1, NRHS, -ONE, A( K+2, K+1 ), 1,
+     $                     B( K+1, 1 ), LDB, B( K+2, 1 ), LDB )
+            END IF
+*
+*           Multiply by the inverse of the diagonal block.
+*
+            AKM1K = A( K+1, K )
+            AKM1 = A( K, K ) / AKM1K
+            AK = A( K+1, K+1 ) / AKM1K
+            DENOM = AKM1*AK - ONE
+            DO 70 J = 1, NRHS
+               BKM1 = B( K, J ) / AKM1K
+               BK = B( K+1, J ) / AKM1K
+               B( K, J ) = ( AK*BKM1-BK ) / DENOM
+               B( K+1, J ) = ( AKM1*BK-BKM1 ) / DENOM
+   70       CONTINUE
+            K = K + 2
+         END IF
+*
+         GO TO 60
+   80    CONTINUE
+*
+*        Next solve L'*X = B, overwriting B with X.
+*
+*        K is the main loop index, decreasing from N to 1 in steps of
+*        1 or 2, depending on the size of the diagonal blocks.
+*
+         K = N
+   90    CONTINUE
+*
+*        If K < 1, exit from loop.
+*
+         IF( K.LT.1 )
+     $      GO TO 100
+*
+         IF( IPIV( K ).GT.0 ) THEN
+*
+*           1 x 1 diagonal block
+*
+*           Multiply by inv(L'(K)), where L(K) is the transformation
+*           stored in column K of A.
+*
+            IF( K.LT.N )
+     $         CALL ZGEMV( 'Transpose', N-K, NRHS, -ONE, B( K+1, 1 ),
+     $                     LDB, A( K+1, K ), 1, ONE, B( K, 1 ), LDB )
+*
+*           Interchange rows K and IPIV(K).
+*
+            KP = IPIV( K )
+            IF( KP.NE.K )
+     $         CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB )
+            K = K - 1
+         ELSE
+*
+*           2 x 2 diagonal block
+*
+*           Multiply by inv(L'(K-1)), where L(K-1) is the transformation
+*           stored in columns K-1 and K of A.
+*
+            IF( K.LT.N ) THEN
+               CALL ZGEMV( 'Transpose', N-K, NRHS, -ONE, B( K+1, 1 ),
+     $                     LDB, A( K+1, K ), 1, ONE, B( K, 1 ), LDB )
+               CALL ZGEMV( 'Transpose', N-K, NRHS, -ONE, B( K+1, 1 ),
+     $                     LDB, A( K+1, K-1 ), 1, ONE, B( K-1, 1 ),
+     $                     LDB )
+            END IF
+*
+*           Interchange rows K and -IPIV(K).
+*
+            KP = -IPIV( K )
+            IF( KP.NE.K )
+     $         CALL ZSWAP( NRHS, B( K, 1 ), LDB, B( KP, 1 ), LDB )
+            K = K - 2
+         END IF
+*
+         GO TO 90
+  100    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of ZSYTRS
+*
+      END
diff --git a/costa/native/external/lapack/ztbcon.f b/costa/native/external/lapack/ztbcon.f
new file mode 100644
index 000000000..38dcc896f
--- /dev/null
+++ b/costa/native/external/lapack/ztbcon.f
@@ -0,0 +1,205 @@
+      SUBROUTINE ZTBCON( NORM, UPLO, DIAG, N, KD, AB, LDAB, RCOND, WORK,
+     $                   RWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIAG, NORM, UPLO
+      INTEGER            INFO, KD, LDAB, N
+      DOUBLE PRECISION   RCOND
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   RWORK( * )
+      COMPLEX*16         AB( LDAB, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZTBCON estimates the reciprocal of the condition number of a
+*  triangular band matrix A, in either the 1-norm or the infinity-norm.
+*
+*  The norm of A is computed and an estimate is obtained for
+*  norm(inv(A)), then the reciprocal of the condition number is
+*  computed as
+*     RCOND = 1 / ( norm(A) * norm(inv(A)) ).
+*
+*  Arguments
+*  =========
+*
+*  NORM    (input) CHARACTER*1
+*          Specifies whether the 1-norm condition number or the
+*          infinity-norm condition number is required:
+*          = '1' or 'O':  1-norm;
+*          = 'I':         Infinity-norm.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  A is upper triangular;
+*          = 'L':  A is lower triangular.
+*
+*  DIAG    (input) CHARACTER*1
+*          = 'N':  A is non-unit triangular;
+*          = 'U':  A is unit triangular.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  KD      (input) INTEGER
+*          The number of superdiagonals or subdiagonals of the
+*          triangular band matrix A.  KD >= 0.
+*
+*  AB      (input) COMPLEX*16 array, dimension (LDAB,N)
+*          The upper or lower triangular band matrix A, stored in the
+*          first kd+1 rows of the array. The j-th column of A is stored
+*          in the j-th column of the array AB as follows:
+*          if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j;
+*          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+kd).
+*          If DIAG = 'U', the diagonal elements of A are not referenced
+*          and are assumed to be 1.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KD+1.
+*
+*  RCOND   (output) DOUBLE PRECISION
+*          The reciprocal of the condition number of the matrix A,
+*          computed as RCOND = 1/(norm(A) * norm(inv(A))).
+*
+*  WORK    (workspace) COMPLEX*16 array, dimension (2*N)
+*
+*  RWORK   (workspace) DOUBLE PRECISION array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOUNIT, ONENRM, UPPER
+      CHARACTER          NORMIN
+      INTEGER            IX, KASE, KASE1
+      DOUBLE PRECISION   AINVNM, ANORM, SCALE, SMLNUM, XNORM
+      COMPLEX*16         ZDUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            IZAMAX
+      DOUBLE PRECISION   DLAMCH, ZLANTB
+      EXTERNAL           LSAME, IZAMAX, DLAMCH, ZLANTB
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZDRSCL, ZLACON, ZLATBS
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, DIMAG, MAX
+*     ..
+*     .. Statement Functions ..
+      DOUBLE PRECISION   CABS1
+*     ..
+*     .. Statement Function definitions ..
+      CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      ONENRM = NORM.EQ.'1' .OR. LSAME( NORM, 'O' )
+      NOUNIT = LSAME( DIAG, 'N' )
+*
+      IF( .NOT.ONENRM .AND. .NOT.LSAME( NORM, 'I' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( KD.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( LDAB.LT.KD+1 ) THEN
+         INFO = -7
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZTBCON', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 ) THEN
+         RCOND = ONE
+         RETURN
+      END IF
+*
+      RCOND = ZERO
+      SMLNUM = DLAMCH( 'Safe minimum' )*DBLE( MAX( N, 1 ) )
+*
+*     Compute the 1-norm of the triangular matrix A or A'.
+*
+      ANORM = ZLANTB( NORM, UPLO, DIAG, N, KD, AB, LDAB, RWORK )
+*
+*     Continue only if ANORM > 0.
+*
+      IF( ANORM.GT.ZERO ) THEN
+*
+*        Estimate the 1-norm of the inverse of A.
+*
+         AINVNM = ZERO
+         NORMIN = 'N'
+         IF( ONENRM ) THEN
+            KASE1 = 1
+         ELSE
+            KASE1 = 2
+         END IF
+         KASE = 0
+   10    CONTINUE
+         CALL ZLACON( N, WORK( N+1 ), WORK, AINVNM, KASE )
+         IF( KASE.NE.0 ) THEN
+            IF( KASE.EQ.KASE1 ) THEN
+*
+*              Multiply by inv(A).
+*
+               CALL ZLATBS( UPLO, 'No transpose', DIAG, NORMIN, N, KD,
+     $                      AB, LDAB, WORK, SCALE, RWORK, INFO )
+            ELSE
+*
+*              Multiply by inv(A').
+*
+               CALL ZLATBS( UPLO, 'Conjugate transpose', DIAG, NORMIN,
+     $                      N, KD, AB, LDAB, WORK, SCALE, RWORK, INFO )
+            END IF
+            NORMIN = 'Y'
+*
+*           Multiply by 1/SCALE if doing so will not cause overflow.
+*
+            IF( SCALE.NE.ONE ) THEN
+               IX = IZAMAX( N, WORK, 1 )
+               XNORM = CABS1( WORK( IX ) )
+               IF( SCALE.LT.XNORM*SMLNUM .OR. SCALE.EQ.ZERO )
+     $            GO TO 20
+               CALL ZDRSCL( N, SCALE, WORK, 1 )
+            END IF
+            GO TO 10
+         END IF
+*
+*        Compute the estimate of the reciprocal condition number.
+*
+         IF( AINVNM.NE.ZERO )
+     $      RCOND = ( ONE / ANORM ) / AINVNM
+      END IF
+*
+   20 CONTINUE
+      RETURN
+*
+*     End of ZTBCON
+*
+      END
diff --git a/costa/native/external/lapack/ztbrfs.f b/costa/native/external/lapack/ztbrfs.f
new file mode 100644
index 000000000..c1197909b
--- /dev/null
+++ b/costa/native/external/lapack/ztbrfs.f
@@ -0,0 +1,393 @@
+      SUBROUTINE ZTBRFS( UPLO, TRANS, DIAG, N, KD, NRHS, AB, LDAB, B,
+     $                   LDB, X, LDX, FERR, BERR, WORK, RWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIAG, TRANS, UPLO
+      INTEGER            INFO, KD, LDAB, LDB, LDX, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   BERR( * ), FERR( * ), RWORK( * )
+      COMPLEX*16         AB( LDAB, * ), B( LDB, * ), WORK( * ),
+     $                   X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZTBRFS provides error bounds and backward error estimates for the
+*  solution to a system of linear equations with a triangular band
+*  coefficient matrix.
+*
+*  The solution matrix X must be computed by ZTBTRS or some other
+*  means before entering this routine.  ZTBRFS does not do iterative
+*  refinement because doing so cannot improve the backward error.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  A is upper triangular;
+*          = 'L':  A is lower triangular.
+*
+*  TRANS   (input) CHARACTER*1
+*          Specifies the form of the system of equations:
+*          = 'N':  A * X = B     (No transpose)
+*          = 'T':  A**T * X = B  (Transpose)
+*          = 'C':  A**H * X = B  (Conjugate transpose)
+*
+*  DIAG    (input) CHARACTER*1
+*          = 'N':  A is non-unit triangular;
+*          = 'U':  A is unit triangular.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  KD      (input) INTEGER
+*          The number of superdiagonals or subdiagonals of the
+*          triangular band matrix A.  KD >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrices B and X.  NRHS >= 0.
+*
+*  AB      (input) COMPLEX*16 array, dimension (LDAB,N)
+*          The upper or lower triangular band matrix A, stored in the
+*          first kd+1 rows of the array. The j-th column of A is stored
+*          in the j-th column of the array AB as follows:
+*          if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j;
+*          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+kd).
+*          If DIAG = 'U', the diagonal elements of A are not referenced
+*          and are assumed to be 1.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KD+1.
+*
+*  B       (input) COMPLEX*16 array, dimension (LDB,NRHS)
+*          The right hand side matrix B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  X       (input) COMPLEX*16 array, dimension (LDX,NRHS)
+*          The solution matrix X.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(1,N).
+*
+*  FERR    (output) DOUBLE PRECISION array, dimension (NRHS)
+*          The estimated forward error bound for each solution vector
+*          X(j) (the j-th column of the solution matrix X).
+*          If XTRUE is the true solution corresponding to X(j), FERR(j)
+*          is an estimated upper bound for the magnitude of the largest
+*          element in (X(j) - XTRUE) divided by the magnitude of the
+*          largest element in X(j).  The estimate is as reliable as
+*          the estimate for RCOND, and is almost always a slight
+*          overestimate of the true error.
+*
+*  BERR    (output) DOUBLE PRECISION array, dimension (NRHS)
+*          The componentwise relative backward error of each solution
+*          vector X(j) (i.e., the smallest relative change in
+*          any element of A or B that makes X(j) an exact solution).
+*
+*  WORK    (workspace) COMPLEX*16 array, dimension (2*N)
+*
+*  RWORK   (workspace) DOUBLE PRECISION array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO
+      PARAMETER          ( ZERO = 0.0D+0 )
+      COMPLEX*16         ONE
+      PARAMETER          ( ONE = ( 1.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOTRAN, NOUNIT, UPPER
+      CHARACTER          TRANSN, TRANST
+      INTEGER            I, J, K, KASE, NZ
+      DOUBLE PRECISION   EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK
+      COMPLEX*16         ZDUM
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZAXPY, ZCOPY, ZLACON, ZTBMV, ZTBSV
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, DIMAG, MAX, MIN
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DLAMCH
+      EXTERNAL           LSAME, DLAMCH
+*     ..
+*     .. Statement Functions ..
+      DOUBLE PRECISION   CABS1
+*     ..
+*     .. Statement Function definitions ..
+      CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      NOTRAN = LSAME( TRANS, 'N' )
+      NOUNIT = LSAME( DIAG, 'N' )
+*
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT.
+     $         LSAME( TRANS, 'C' ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( KD.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -6
+      ELSE IF( LDAB.LT.KD+1 ) THEN
+         INFO = -8
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -10
+      ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+         INFO = -12
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZTBRFS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN
+         DO 10 J = 1, NRHS
+            FERR( J ) = ZERO
+            BERR( J ) = ZERO
+   10    CONTINUE
+         RETURN
+      END IF
+*
+      IF( NOTRAN ) THEN
+         TRANSN = 'N'
+         TRANST = 'C'
+      ELSE
+         TRANSN = 'C'
+         TRANST = 'N'
+      END IF
+*
+*     NZ = maximum number of nonzero elements in each row of A, plus 1
+*
+      NZ = KD + 2
+      EPS = DLAMCH( 'Epsilon' )
+      SAFMIN = DLAMCH( 'Safe minimum' )
+      SAFE1 = NZ*SAFMIN
+      SAFE2 = SAFE1 / EPS
+*
+*     Do for each right hand side
+*
+      DO 250 J = 1, NRHS
+*
+*        Compute residual R = B - op(A) * X,
+*        where op(A) = A, A**T, or A**H, depending on TRANS.
+*
+         CALL ZCOPY( N, X( 1, J ), 1, WORK, 1 )
+         CALL ZTBMV( UPLO, TRANS, DIAG, N, KD, AB, LDAB, WORK, 1 )
+         CALL ZAXPY( N, -ONE, B( 1, J ), 1, WORK, 1 )
+*
+*        Compute componentwise relative backward error from formula
+*
+*        max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) )
+*
+*        where abs(Z) is the componentwise absolute value of the matrix
+*        or vector Z.  If the i-th component of the denominator is less
+*        than SAFE2, then SAFE1 is added to the i-th components of the
+*        numerator and denominator before dividing.
+*
+         DO 20 I = 1, N
+            RWORK( I ) = CABS1( B( I, J ) )
+   20    CONTINUE
+*
+         IF( NOTRAN ) THEN
+*
+*           Compute abs(A)*abs(X) + abs(B).
+*
+            IF( UPPER ) THEN
+               IF( NOUNIT ) THEN
+                  DO 40 K = 1, N
+                     XK = CABS1( X( K, J ) )
+                     DO 30 I = MAX( 1, K-KD ), K
+                        RWORK( I ) = RWORK( I ) +
+     $                               CABS1( AB( KD+1+I-K, K ) )*XK
+   30                CONTINUE
+   40             CONTINUE
+               ELSE
+                  DO 60 K = 1, N
+                     XK = CABS1( X( K, J ) )
+                     DO 50 I = MAX( 1, K-KD ), K - 1
+                        RWORK( I ) = RWORK( I ) +
+     $                               CABS1( AB( KD+1+I-K, K ) )*XK
+   50                CONTINUE
+                     RWORK( K ) = RWORK( K ) + XK
+   60             CONTINUE
+               END IF
+            ELSE
+               IF( NOUNIT ) THEN
+                  DO 80 K = 1, N
+                     XK = CABS1( X( K, J ) )
+                     DO 70 I = K, MIN( N, K+KD )
+                        RWORK( I ) = RWORK( I ) +
+     $                               CABS1( AB( 1+I-K, K ) )*XK
+   70                CONTINUE
+   80             CONTINUE
+               ELSE
+                  DO 100 K = 1, N
+                     XK = CABS1( X( K, J ) )
+                     DO 90 I = K + 1, MIN( N, K+KD )
+                        RWORK( I ) = RWORK( I ) +
+     $                               CABS1( AB( 1+I-K, K ) )*XK
+   90                CONTINUE
+                     RWORK( K ) = RWORK( K ) + XK
+  100             CONTINUE
+               END IF
+            END IF
+         ELSE
+*
+*           Compute abs(A**H)*abs(X) + abs(B).
+*
+            IF( UPPER ) THEN
+               IF( NOUNIT ) THEN
+                  DO 120 K = 1, N
+                     S = ZERO
+                     DO 110 I = MAX( 1, K-KD ), K
+                        S = S + CABS1( AB( KD+1+I-K, K ) )*
+     $                      CABS1( X( I, J ) )
+  110                CONTINUE
+                     RWORK( K ) = RWORK( K ) + S
+  120             CONTINUE
+               ELSE
+                  DO 140 K = 1, N
+                     S = CABS1( X( K, J ) )
+                     DO 130 I = MAX( 1, K-KD ), K - 1
+                        S = S + CABS1( AB( KD+1+I-K, K ) )*
+     $                      CABS1( X( I, J ) )
+  130                CONTINUE
+                     RWORK( K ) = RWORK( K ) + S
+  140             CONTINUE
+               END IF
+            ELSE
+               IF( NOUNIT ) THEN
+                  DO 160 K = 1, N
+                     S = ZERO
+                     DO 150 I = K, MIN( N, K+KD )
+                        S = S + CABS1( AB( 1+I-K, K ) )*
+     $                      CABS1( X( I, J ) )
+  150                CONTINUE
+                     RWORK( K ) = RWORK( K ) + S
+  160             CONTINUE
+               ELSE
+                  DO 180 K = 1, N
+                     S = CABS1( X( K, J ) )
+                     DO 170 I = K + 1, MIN( N, K+KD )
+                        S = S + CABS1( AB( 1+I-K, K ) )*
+     $                      CABS1( X( I, J ) )
+  170                CONTINUE
+                     RWORK( K ) = RWORK( K ) + S
+  180             CONTINUE
+               END IF
+            END IF
+         END IF
+         S = ZERO
+         DO 190 I = 1, N
+            IF( RWORK( I ).GT.SAFE2 ) THEN
+               S = MAX( S, CABS1( WORK( I ) ) / RWORK( I ) )
+            ELSE
+               S = MAX( S, ( CABS1( WORK( I ) )+SAFE1 ) /
+     $             ( RWORK( I )+SAFE1 ) )
+            END IF
+  190    CONTINUE
+         BERR( J ) = S
+*
+*        Bound error from formula
+*
+*        norm(X - XTRUE) / norm(X) .le. FERR =
+*        norm( abs(inv(op(A)))*
+*           ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X)
+*
+*        where
+*          norm(Z) is the magnitude of the largest component of Z
+*          inv(op(A)) is the inverse of op(A)
+*          abs(Z) is the componentwise absolute value of the matrix or
+*             vector Z
+*          NZ is the maximum number of nonzeros in any row of A, plus 1
+*          EPS is machine epsilon
+*
+*        The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B))
+*        is incremented by SAFE1 if the i-th component of
+*        abs(op(A))*abs(X) + abs(B) is less than SAFE2.
+*
+*        Use ZLACON to estimate the infinity-norm of the matrix
+*           inv(op(A)) * diag(W),
+*        where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) )))
+*
+         DO 200 I = 1, N
+            IF( RWORK( I ).GT.SAFE2 ) THEN
+               RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I )
+            ELSE
+               RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) +
+     $                      SAFE1
+            END IF
+  200    CONTINUE
+*
+         KASE = 0
+  210    CONTINUE
+         CALL ZLACON( N, WORK( N+1 ), WORK, FERR( J ), KASE )
+         IF( KASE.NE.0 ) THEN
+            IF( KASE.EQ.1 ) THEN
+*
+*              Multiply by diag(W)*inv(op(A)**H).
+*
+               CALL ZTBSV( UPLO, TRANST, DIAG, N, KD, AB, LDAB, WORK,
+     $                     1 )
+               DO 220 I = 1, N
+                  WORK( I ) = RWORK( I )*WORK( I )
+  220          CONTINUE
+            ELSE
+*
+*              Multiply by inv(op(A))*diag(W).
+*
+               DO 230 I = 1, N
+                  WORK( I ) = RWORK( I )*WORK( I )
+  230          CONTINUE
+               CALL ZTBSV( UPLO, TRANSN, DIAG, N, KD, AB, LDAB, WORK,
+     $                     1 )
+            END IF
+            GO TO 210
+         END IF
+*
+*        Normalize error.
+*
+         LSTRES = ZERO
+         DO 240 I = 1, N
+            LSTRES = MAX( LSTRES, CABS1( X( I, J ) ) )
+  240    CONTINUE
+         IF( LSTRES.NE.ZERO )
+     $      FERR( J ) = FERR( J ) / LSTRES
+*
+  250 CONTINUE
+*
+      RETURN
+*
+*     End of ZTBRFS
+*
+      END
diff --git a/costa/native/external/lapack/ztbtrs.f b/costa/native/external/lapack/ztbtrs.f
new file mode 100644
index 000000000..c432d54df
--- /dev/null
+++ b/costa/native/external/lapack/ztbtrs.f
@@ -0,0 +1,163 @@
+      SUBROUTINE ZTBTRS( UPLO, TRANS, DIAG, N, KD, NRHS, AB, LDAB, B,
+     $                   LDB, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIAG, TRANS, UPLO
+      INTEGER            INFO, KD, LDAB, LDB, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         AB( LDAB, * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZTBTRS solves a triangular system of the form
+*
+*     A * X = B,  A**T * X = B,  or  A**H * X = B,
+*
+*  where A is a triangular band matrix of order N, and B is an
+*  N-by-NRHS matrix.  A check is made to verify that A is nonsingular.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  A is upper triangular;
+*          = 'L':  A is lower triangular.
+*
+*  TRANS   (input) CHARACTER*1
+*          Specifies the form of the system of equations:
+*          = 'N':  A * X = B     (No transpose)
+*          = 'T':  A**T * X = B  (Transpose)
+*          = 'C':  A**H * X = B  (Conjugate transpose)
+*
+*  DIAG    (input) CHARACTER*1
+*          = 'N':  A is non-unit triangular;
+*          = 'U':  A is unit triangular.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  KD      (input) INTEGER
+*          The number of superdiagonals or subdiagonals of the
+*          triangular band matrix A.  KD >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  AB      (input) COMPLEX*16 array, dimension (LDAB,N)
+*          The upper or lower triangular band matrix A, stored in the
+*          first kd+1 rows of AB.  The j-th column of A is stored
+*          in the j-th column of the array AB as follows:
+*          if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j;
+*          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+kd).
+*          If DIAG = 'U', the diagonal elements of A are not referenced
+*          and are assumed to be 1.
+*
+*  LDAB    (input) INTEGER
+*          The leading dimension of the array AB.  LDAB >= KD+1.
+*
+*  B       (input/output) COMPLEX*16 array, dimension (LDB,NRHS)
+*          On entry, the right hand side matrix B.
+*          On exit, if INFO = 0, the solution matrix X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, the i-th diagonal element of A is zero,
+*                indicating that the matrix is singular and the
+*                solutions X have not been computed.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX*16         ZERO
+      PARAMETER          ( ZERO = ( 0.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOUNIT, UPPER
+      INTEGER            J
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZTBSV
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      NOUNIT = LSAME( DIAG, 'N' )
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.LSAME( TRANS, 'N' ) .AND. .NOT.
+     $         LSAME( TRANS, 'T' ) .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( KD.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -6
+      ELSE IF( LDAB.LT.KD+1 ) THEN
+         INFO = -8
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -10
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZTBTRS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Check for singularity.
+*
+      IF( NOUNIT ) THEN
+         IF( UPPER ) THEN
+            DO 10 INFO = 1, N
+               IF( AB( KD+1, INFO ).EQ.ZERO )
+     $            RETURN
+   10       CONTINUE
+         ELSE
+            DO 20 INFO = 1, N
+               IF( AB( 1, INFO ).EQ.ZERO )
+     $            RETURN
+   20       CONTINUE
+         END IF
+      END IF
+      INFO = 0
+*
+*     Solve A * X = B,  A**T * X = B,  or  A**H * X = B.
+*
+      DO 30 J = 1, NRHS
+         CALL ZTBSV( UPLO, TRANS, DIAG, N, KD, AB, LDAB, B( 1, J ), 1 )
+   30 CONTINUE
+*
+      RETURN
+*
+*     End of ZTBTRS
+*
+      END
diff --git a/costa/native/external/lapack/ztgevc.f b/costa/native/external/lapack/ztgevc.f
new file mode 100644
index 000000000..65febe3ec
--- /dev/null
+++ b/costa/native/external/lapack/ztgevc.f
@@ -0,0 +1,632 @@
+      SUBROUTINE ZTGEVC( SIDE, HOWMNY, SELECT, N, A, LDA, B, LDB, VL,
+     $                   LDVL, VR, LDVR, MM, M, WORK, RWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          HOWMNY, SIDE
+      INTEGER            INFO, LDA, LDB, LDVL, LDVR, M, MM, N
+*     ..
+*     .. Array Arguments ..
+      LOGICAL            SELECT( * )
+      DOUBLE PRECISION   RWORK( * )
+      COMPLEX*16         A( LDA, * ), B( LDB, * ), VL( LDVL, * ),
+     $                   VR( LDVR, * ), WORK( * )
+*     ..
+*
+*
+*  Purpose
+*  =======
+*
+*  ZTGEVC computes some or all of the right and/or left generalized
+*  eigenvectors of a pair of complex upper triangular matrices (A,B).
+*
+*  The right generalized eigenvector x and the left generalized
+*  eigenvector y of (A,B) corresponding to a generalized eigenvalue
+*  w are defined by:
+*
+*          (A - wB) * x = 0  and  y**H * (A - wB) = 0
+*
+*  where y**H denotes the conjugate tranpose of y.
+*
+*  If an eigenvalue w is determined by zero diagonal elements of both A
+*  and B, a unit vector is returned as the corresponding eigenvector.
+*
+*  If all eigenvectors are requested, the routine may either return
+*  the matrices X and/or Y of right or left eigenvectors of (A,B), or
+*  the products Z*X and/or Q*Y, where Z and Q are input unitary
+*  matrices.  If (A,B) was obtained from the generalized Schur
+*  factorization of an original pair of matrices
+*     (A0,B0) = (Q*A*Z**H,Q*B*Z**H),
+*  then Z*X and Q*Y are the matrices of right or left eigenvectors of
+*  A.
+*
+*  Arguments
+*  =========
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'R': compute right eigenvectors only;
+*          = 'L': compute left eigenvectors only;
+*          = 'B': compute both right and left eigenvectors.
+*
+*  HOWMNY  (input) CHARACTER*1
+*          = 'A': compute all right and/or left eigenvectors;
+*          = 'B': compute all right and/or left eigenvectors, and
+*                 backtransform them using the input matrices supplied
+*                 in VR and/or VL;
+*          = 'S': compute selected right and/or left eigenvectors,
+*                 specified by the logical array SELECT.
+*
+*  SELECT  (input) LOGICAL array, dimension (N)
+*          If HOWMNY='S', SELECT specifies the eigenvectors to be
+*          computed.
+*          If HOWMNY='A' or 'B', SELECT is not referenced.
+*          To select the eigenvector corresponding to the j-th
+*          eigenvalue, SELECT(j) must be set to .TRUE..
+*
+*  N       (input) INTEGER
+*          The order of the matrices A and B.  N >= 0.
+*
+*  A       (input) COMPLEX*16 array, dimension (LDA,N)
+*          The upper triangular matrix A.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of array A.  LDA >= max(1,N).
+*
+*  B       (input) COMPLEX*16 array, dimension (LDB,N)
+*          The upper triangular matrix B.  B must have real diagonal
+*          elements.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of array B.  LDB >= max(1,N).
+*
+*  VL      (input/output) COMPLEX*16 array, dimension (LDVL,MM)
+*          On entry, if SIDE = 'L' or 'B' and HOWMNY = 'B', VL must
+*          contain an N-by-N matrix Q (usually the unitary matrix Q
+*          of left Schur vectors returned by ZHGEQZ).
+*          On exit, if SIDE = 'L' or 'B', VL contains:
+*          if HOWMNY = 'A', the matrix Y of left eigenvectors of (A,B);
+*          if HOWMNY = 'B', the matrix Q*Y;
+*          if HOWMNY = 'S', the left eigenvectors of (A,B) specified by
+*                      SELECT, stored consecutively in the columns of
+*                      VL, in the same order as their eigenvalues.
+*          If SIDE = 'R', VL is not referenced.
+*
+*  LDVL    (input) INTEGER
+*          The leading dimension of array VL.
+*          LDVL >= max(1,N) if SIDE = 'L' or 'B'; LDVL >= 1 otherwise.
+*
+*  VR      (input/output) COMPLEX*16 array, dimension (LDVR,MM)
+*          On entry, if SIDE = 'R' or 'B' and HOWMNY = 'B', VR must
+*          contain an N-by-N matrix Q (usually the unitary matrix Z
+*          of right Schur vectors returned by ZHGEQZ).
+*          On exit, if SIDE = 'R' or 'B', VR contains:
+*          if HOWMNY = 'A', the matrix X of right eigenvectors of (A,B);
+*          if HOWMNY = 'B', the matrix Z*X;
+*          if HOWMNY = 'S', the right eigenvectors of (A,B) specified by
+*                      SELECT, stored consecutively in the columns of
+*                      VR, in the same order as their eigenvalues.
+*          If SIDE = 'L', VR is not referenced.
+*
+*  LDVR    (input) INTEGER
+*          The leading dimension of the array VR.
+*          LDVR >= max(1,N) if SIDE = 'R' or 'B'; LDVR >= 1 otherwise.
+*
+*  MM      (input) INTEGER
+*          The number of columns in the arrays VL and/or VR. MM >= M.
+*
+*  M       (output) INTEGER
+*          The number of columns in the arrays VL and/or VR actually
+*          used to store the eigenvectors.  If HOWMNY = 'A' or 'B', M
+*          is set to N.  Each selected eigenvector occupies one column.
+*
+*  WORK    (workspace) COMPLEX*16 array, dimension (2*N)
+*
+*  RWORK   (workspace) DOUBLE PRECISION array, dimension (2*N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit.
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+      COMPLEX*16         CZERO, CONE
+      PARAMETER          ( CZERO = ( 0.0D+0, 0.0D+0 ),
+     $                   CONE = ( 1.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            COMPL, COMPR, ILALL, ILBACK, ILBBAD, ILCOMP,
+     $                   LSA, LSB
+      INTEGER            I, IBEG, IEIG, IEND, IHWMNY, IM, ISIDE, ISRC,
+     $                   J, JE, JR
+      DOUBLE PRECISION   ACOEFA, ACOEFF, ANORM, ASCALE, BCOEFA, BIG,
+     $                   BIGNUM, BNORM, BSCALE, DMIN, SAFMIN, SBETA,
+     $                   SCALE, SMALL, TEMP, ULP, XMAX
+      COMPLEX*16         BCOEFF, CA, CB, D, SALPHA, SUM, SUMA, SUMB, X
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DLAMCH
+      COMPLEX*16         ZLADIV
+      EXTERNAL           LSAME, DLAMCH, ZLADIV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLABAD, XERBLA, ZGEMV
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, DCMPLX, DCONJG, DIMAG, MAX, MIN
+*     ..
+*     .. Statement Functions ..
+      DOUBLE PRECISION   ABS1
+*     ..
+*     .. Statement Function definitions ..
+      ABS1( X ) = ABS( DBLE( X ) ) + ABS( DIMAG( X ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode and Test the input parameters
+*
+      IF( LSAME( HOWMNY, 'A' ) ) THEN
+         IHWMNY = 1
+         ILALL = .TRUE.
+         ILBACK = .FALSE.
+      ELSE IF( LSAME( HOWMNY, 'S' ) ) THEN
+         IHWMNY = 2
+         ILALL = .FALSE.
+         ILBACK = .FALSE.
+      ELSE IF( LSAME( HOWMNY, 'B' ) .OR. LSAME( HOWMNY, 'T' ) ) THEN
+         IHWMNY = 3
+         ILALL = .TRUE.
+         ILBACK = .TRUE.
+      ELSE
+         IHWMNY = -1
+      END IF
+*
+      IF( LSAME( SIDE, 'R' ) ) THEN
+         ISIDE = 1
+         COMPL = .FALSE.
+         COMPR = .TRUE.
+      ELSE IF( LSAME( SIDE, 'L' ) ) THEN
+         ISIDE = 2
+         COMPL = .TRUE.
+         COMPR = .FALSE.
+      ELSE IF( LSAME( SIDE, 'B' ) ) THEN
+         ISIDE = 3
+         COMPL = .TRUE.
+         COMPR = .TRUE.
+      ELSE
+         ISIDE = -1
+      END IF
+*
+      INFO = 0
+      IF( ISIDE.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( IHWMNY.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -6
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -8
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZTGEVC', -INFO )
+         RETURN
+      END IF
+*
+*     Count the number of eigenvectors
+*
+      IF( .NOT.ILALL ) THEN
+         IM = 0
+         DO 10 J = 1, N
+            IF( SELECT( J ) )
+     $         IM = IM + 1
+   10    CONTINUE
+      ELSE
+         IM = N
+      END IF
+*
+*     Check diagonal of B
+*
+      ILBBAD = .FALSE.
+      DO 20 J = 1, N
+         IF( DIMAG( B( J, J ) ).NE.ZERO )
+     $      ILBBAD = .TRUE.
+   20 CONTINUE
+*
+      IF( ILBBAD ) THEN
+         INFO = -7
+      ELSE IF( COMPL .AND. LDVL.LT.N .OR. LDVL.LT.1 ) THEN
+         INFO = -10
+      ELSE IF( COMPR .AND. LDVR.LT.N .OR. LDVR.LT.1 ) THEN
+         INFO = -12
+      ELSE IF( MM.LT.IM ) THEN
+         INFO = -13
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZTGEVC', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      M = IM
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Machine Constants
+*
+      SAFMIN = DLAMCH( 'Safe minimum' )
+      BIG = ONE / SAFMIN
+      CALL DLABAD( SAFMIN, BIG )
+      ULP = DLAMCH( 'Epsilon' )*DLAMCH( 'Base' )
+      SMALL = SAFMIN*N / ULP
+      BIG = ONE / SMALL
+      BIGNUM = ONE / ( SAFMIN*N )
+*
+*     Compute the 1-norm of each column of the strictly upper triangular
+*     part of A and B to check for possible overflow in the triangular
+*     solver.
+*
+      ANORM = ABS1( A( 1, 1 ) )
+      BNORM = ABS1( B( 1, 1 ) )
+      RWORK( 1 ) = ZERO
+      RWORK( N+1 ) = ZERO
+      DO 40 J = 2, N
+         RWORK( J ) = ZERO
+         RWORK( N+J ) = ZERO
+         DO 30 I = 1, J - 1
+            RWORK( J ) = RWORK( J ) + ABS1( A( I, J ) )
+            RWORK( N+J ) = RWORK( N+J ) + ABS1( B( I, J ) )
+   30    CONTINUE
+         ANORM = MAX( ANORM, RWORK( J )+ABS1( A( J, J ) ) )
+         BNORM = MAX( BNORM, RWORK( N+J )+ABS1( B( J, J ) ) )
+   40 CONTINUE
+*
+      ASCALE = ONE / MAX( ANORM, SAFMIN )
+      BSCALE = ONE / MAX( BNORM, SAFMIN )
+*
+*     Left eigenvectors
+*
+      IF( COMPL ) THEN
+         IEIG = 0
+*
+*        Main loop over eigenvalues
+*
+         DO 140 JE = 1, N
+            IF( ILALL ) THEN
+               ILCOMP = .TRUE.
+            ELSE
+               ILCOMP = SELECT( JE )
+            END IF
+            IF( ILCOMP ) THEN
+               IEIG = IEIG + 1
+*
+               IF( ABS1( A( JE, JE ) ).LE.SAFMIN .AND.
+     $             ABS( DBLE( B( JE, JE ) ) ).LE.SAFMIN ) THEN
+*
+*                 Singular matrix pencil -- return unit eigenvector
+*
+                  DO 50 JR = 1, N
+                     VL( JR, IEIG ) = CZERO
+   50             CONTINUE
+                  VL( IEIG, IEIG ) = CONE
+                  GO TO 140
+               END IF
+*
+*              Non-singular eigenvalue:
+*              Compute coefficients  a  and  b  in
+*                   H
+*                 y  ( a A - b B ) = 0
+*
+               TEMP = ONE / MAX( ABS1( A( JE, JE ) )*ASCALE,
+     $                ABS( DBLE( B( JE, JE ) ) )*BSCALE, SAFMIN )
+               SALPHA = ( TEMP*A( JE, JE ) )*ASCALE
+               SBETA = ( TEMP*DBLE( B( JE, JE ) ) )*BSCALE
+               ACOEFF = SBETA*ASCALE
+               BCOEFF = SALPHA*BSCALE
+*
+*              Scale to avoid underflow
+*
+               LSA = ABS( SBETA ).GE.SAFMIN .AND. ABS( ACOEFF ).LT.SMALL
+               LSB = ABS1( SALPHA ).GE.SAFMIN .AND. ABS1( BCOEFF ).LT.
+     $               SMALL
+*
+               SCALE = ONE
+               IF( LSA )
+     $            SCALE = ( SMALL / ABS( SBETA ) )*MIN( ANORM, BIG )
+               IF( LSB )
+     $            SCALE = MAX( SCALE, ( SMALL / ABS1( SALPHA ) )*
+     $                    MIN( BNORM, BIG ) )
+               IF( LSA .OR. LSB ) THEN
+                  SCALE = MIN( SCALE, ONE /
+     $                    ( SAFMIN*MAX( ONE, ABS( ACOEFF ),
+     $                    ABS1( BCOEFF ) ) ) )
+                  IF( LSA ) THEN
+                     ACOEFF = ASCALE*( SCALE*SBETA )
+                  ELSE
+                     ACOEFF = SCALE*ACOEFF
+                  END IF
+                  IF( LSB ) THEN
+                     BCOEFF = BSCALE*( SCALE*SALPHA )
+                  ELSE
+                     BCOEFF = SCALE*BCOEFF
+                  END IF
+               END IF
+*
+               ACOEFA = ABS( ACOEFF )
+               BCOEFA = ABS1( BCOEFF )
+               XMAX = ONE
+               DO 60 JR = 1, N
+                  WORK( JR ) = CZERO
+   60          CONTINUE
+               WORK( JE ) = CONE
+               DMIN = MAX( ULP*ACOEFA*ANORM, ULP*BCOEFA*BNORM, SAFMIN )
+*
+*                                              H
+*              Triangular solve of  (a A - b B)  y = 0
+*
+*                                      H
+*              (rowwise in  (a A - b B) , or columnwise in a A - b B)
+*
+               DO 100 J = JE + 1, N
+*
+*                 Compute
+*                       j-1
+*                 SUM = sum  conjg( a*A(k,j) - b*B(k,j) )*x(k)
+*                       k=je
+*                 (Scale if necessary)
+*
+                  TEMP = ONE / XMAX
+                  IF( ACOEFA*RWORK( J )+BCOEFA*RWORK( N+J ).GT.BIGNUM*
+     $                TEMP ) THEN
+                     DO 70 JR = JE, J - 1
+                        WORK( JR ) = TEMP*WORK( JR )
+   70                CONTINUE
+                     XMAX = ONE
+                  END IF
+                  SUMA = CZERO
+                  SUMB = CZERO
+*
+                  DO 80 JR = JE, J - 1
+                     SUMA = SUMA + DCONJG( A( JR, J ) )*WORK( JR )
+                     SUMB = SUMB + DCONJG( B( JR, J ) )*WORK( JR )
+   80             CONTINUE
+                  SUM = ACOEFF*SUMA - DCONJG( BCOEFF )*SUMB
+*
+*                 Form x(j) = - SUM / conjg( a*A(j,j) - b*B(j,j) )
+*
+*                 with scaling and perturbation of the denominator
+*
+                  D = DCONJG( ACOEFF*A( J, J )-BCOEFF*B( J, J ) )
+                  IF( ABS1( D ).LE.DMIN )
+     $               D = DCMPLX( DMIN )
+*
+                  IF( ABS1( D ).LT.ONE ) THEN
+                     IF( ABS1( SUM ).GE.BIGNUM*ABS1( D ) ) THEN
+                        TEMP = ONE / ABS1( SUM )
+                        DO 90 JR = JE, J - 1
+                           WORK( JR ) = TEMP*WORK( JR )
+   90                   CONTINUE
+                        XMAX = TEMP*XMAX
+                        SUM = TEMP*SUM
+                     END IF
+                  END IF
+                  WORK( J ) = ZLADIV( -SUM, D )
+                  XMAX = MAX( XMAX, ABS1( WORK( J ) ) )
+  100          CONTINUE
+*
+*              Back transform eigenvector if HOWMNY='B'.
+*
+               IF( ILBACK ) THEN
+                  CALL ZGEMV( 'N', N, N+1-JE, CONE, VL( 1, JE ), LDVL,
+     $                        WORK( JE ), 1, CZERO, WORK( N+1 ), 1 )
+                  ISRC = 2
+                  IBEG = 1
+               ELSE
+                  ISRC = 1
+                  IBEG = JE
+               END IF
+*
+*              Copy and scale eigenvector into column of VL
+*
+               XMAX = ZERO
+               DO 110 JR = IBEG, N
+                  XMAX = MAX( XMAX, ABS1( WORK( ( ISRC-1 )*N+JR ) ) )
+  110          CONTINUE
+*
+               IF( XMAX.GT.SAFMIN ) THEN
+                  TEMP = ONE / XMAX
+                  DO 120 JR = IBEG, N
+                     VL( JR, IEIG ) = TEMP*WORK( ( ISRC-1 )*N+JR )
+  120             CONTINUE
+               ELSE
+                  IBEG = N + 1
+               END IF
+*
+               DO 130 JR = 1, IBEG - 1
+                  VL( JR, IEIG ) = CZERO
+  130          CONTINUE
+*
+            END IF
+  140    CONTINUE
+      END IF
+*
+*     Right eigenvectors
+*
+      IF( COMPR ) THEN
+         IEIG = IM + 1
+*
+*        Main loop over eigenvalues
+*
+         DO 250 JE = N, 1, -1
+            IF( ILALL ) THEN
+               ILCOMP = .TRUE.
+            ELSE
+               ILCOMP = SELECT( JE )
+            END IF
+            IF( ILCOMP ) THEN
+               IEIG = IEIG - 1
+*
+               IF( ABS1( A( JE, JE ) ).LE.SAFMIN .AND.
+     $             ABS( DBLE( B( JE, JE ) ) ).LE.SAFMIN ) THEN
+*
+*                 Singular matrix pencil -- return unit eigenvector
+*
+                  DO 150 JR = 1, N
+                     VR( JR, IEIG ) = CZERO
+  150             CONTINUE
+                  VR( IEIG, IEIG ) = CONE
+                  GO TO 250
+               END IF
+*
+*              Non-singular eigenvalue:
+*              Compute coefficients  a  and  b  in
+*
+*              ( a A - b B ) x  = 0
+*
+               TEMP = ONE / MAX( ABS1( A( JE, JE ) )*ASCALE,
+     $                ABS( DBLE( B( JE, JE ) ) )*BSCALE, SAFMIN )
+               SALPHA = ( TEMP*A( JE, JE ) )*ASCALE
+               SBETA = ( TEMP*DBLE( B( JE, JE ) ) )*BSCALE
+               ACOEFF = SBETA*ASCALE
+               BCOEFF = SALPHA*BSCALE
+*
+*              Scale to avoid underflow
+*
+               LSA = ABS( SBETA ).GE.SAFMIN .AND. ABS( ACOEFF ).LT.SMALL
+               LSB = ABS1( SALPHA ).GE.SAFMIN .AND. ABS1( BCOEFF ).LT.
+     $               SMALL
+*
+               SCALE = ONE
+               IF( LSA )
+     $            SCALE = ( SMALL / ABS( SBETA ) )*MIN( ANORM, BIG )
+               IF( LSB )
+     $            SCALE = MAX( SCALE, ( SMALL / ABS1( SALPHA ) )*
+     $                    MIN( BNORM, BIG ) )
+               IF( LSA .OR. LSB ) THEN
+                  SCALE = MIN( SCALE, ONE /
+     $                    ( SAFMIN*MAX( ONE, ABS( ACOEFF ),
+     $                    ABS1( BCOEFF ) ) ) )
+                  IF( LSA ) THEN
+                     ACOEFF = ASCALE*( SCALE*SBETA )
+                  ELSE
+                     ACOEFF = SCALE*ACOEFF
+                  END IF
+                  IF( LSB ) THEN
+                     BCOEFF = BSCALE*( SCALE*SALPHA )
+                  ELSE
+                     BCOEFF = SCALE*BCOEFF
+                  END IF
+               END IF
+*
+               ACOEFA = ABS( ACOEFF )
+               BCOEFA = ABS1( BCOEFF )
+               XMAX = ONE
+               DO 160 JR = 1, N
+                  WORK( JR ) = CZERO
+  160          CONTINUE
+               WORK( JE ) = CONE
+               DMIN = MAX( ULP*ACOEFA*ANORM, ULP*BCOEFA*BNORM, SAFMIN )
+*
+*              Triangular solve of  (a A - b B) x = 0  (columnwise)
+*
+*              WORK(1:j-1) contains sums w,
+*              WORK(j+1:JE) contains x
+*
+               DO 170 JR = 1, JE - 1
+                  WORK( JR ) = ACOEFF*A( JR, JE ) - BCOEFF*B( JR, JE )
+  170          CONTINUE
+               WORK( JE ) = CONE
+*
+               DO 210 J = JE - 1, 1, -1
+*
+*                 Form x(j) := - w(j) / d
+*                 with scaling and perturbation of the denominator
+*
+                  D = ACOEFF*A( J, J ) - BCOEFF*B( J, J )
+                  IF( ABS1( D ).LE.DMIN )
+     $               D = DCMPLX( DMIN )
+*
+                  IF( ABS1( D ).LT.ONE ) THEN
+                     IF( ABS1( WORK( J ) ).GE.BIGNUM*ABS1( D ) ) THEN
+                        TEMP = ONE / ABS1( WORK( J ) )
+                        DO 180 JR = 1, JE
+                           WORK( JR ) = TEMP*WORK( JR )
+  180                   CONTINUE
+                     END IF
+                  END IF
+*
+                  WORK( J ) = ZLADIV( -WORK( J ), D )
+*
+                  IF( J.GT.1 ) THEN
+*
+*                    w = w + x(j)*(a A(*,j) - b B(*,j) ) with scaling
+*
+                     IF( ABS1( WORK( J ) ).GT.ONE ) THEN
+                        TEMP = ONE / ABS1( WORK( J ) )
+                        IF( ACOEFA*RWORK( J )+BCOEFA*RWORK( N+J ).GE.
+     $                      BIGNUM*TEMP ) THEN
+                           DO 190 JR = 1, JE
+                              WORK( JR ) = TEMP*WORK( JR )
+  190                      CONTINUE
+                        END IF
+                     END IF
+*
+                     CA = ACOEFF*WORK( J )
+                     CB = BCOEFF*WORK( J )
+                     DO 200 JR = 1, J - 1
+                        WORK( JR ) = WORK( JR ) + CA*A( JR, J ) -
+     $                               CB*B( JR, J )
+  200                CONTINUE
+                  END IF
+  210          CONTINUE
+*
+*              Back transform eigenvector if HOWMNY='B'.
+*
+               IF( ILBACK ) THEN
+                  CALL ZGEMV( 'N', N, JE, CONE, VR, LDVR, WORK, 1,
+     $                        CZERO, WORK( N+1 ), 1 )
+                  ISRC = 2
+                  IEND = N
+               ELSE
+                  ISRC = 1
+                  IEND = JE
+               END IF
+*
+*              Copy and scale eigenvector into column of VR
+*
+               XMAX = ZERO
+               DO 220 JR = 1, IEND
+                  XMAX = MAX( XMAX, ABS1( WORK( ( ISRC-1 )*N+JR ) ) )
+  220          CONTINUE
+*
+               IF( XMAX.GT.SAFMIN ) THEN
+                  TEMP = ONE / XMAX
+                  DO 230 JR = 1, IEND
+                     VR( JR, IEIG ) = TEMP*WORK( ( ISRC-1 )*N+JR )
+  230             CONTINUE
+               ELSE
+                  IEND = 0
+               END IF
+*
+               DO 240 JR = IEND + 1, N
+                  VR( JR, IEIG ) = CZERO
+  240          CONTINUE
+*
+            END IF
+  250    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of ZTGEVC
+*
+      END
diff --git a/costa/native/external/lapack/ztgex2.f b/costa/native/external/lapack/ztgex2.f
new file mode 100644
index 000000000..50c18c1dc
--- /dev/null
+++ b/costa/native/external/lapack/ztgex2.f
@@ -0,0 +1,268 @@
+      SUBROUTINE ZTGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z,
+     $                   LDZ, J1, INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      LOGICAL            WANTQ, WANTZ
+      INTEGER            INFO, J1, LDA, LDB, LDQ, LDZ, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), B( LDB, * ), Q( LDQ, * ),
+     $                   Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZTGEX2 swaps adjacent diagonal 1 by 1 blocks (A11,B11) and (A22,B22)
+*  in an upper triangular matrix pair (A, B) by an unitary equivalence
+*  transformation.
+*
+*  (A, B) must be in generalized Schur canonical form, that is, A and
+*  B are both upper triangular.
+*
+*  Optionally, the matrices Q and Z of generalized Schur vectors are
+*  updated.
+*
+*         Q(in) * A(in) * Z(in)' = Q(out) * A(out) * Z(out)'
+*         Q(in) * B(in) * Z(in)' = Q(out) * B(out) * Z(out)'
+*
+*
+*  Arguments
+*  =========
+*
+*  WANTQ   (input) LOGICAL
+*          .TRUE. : update the left transformation matrix Q;
+*          .FALSE.: do not update Q.
+*
+*  WANTZ   (input) LOGICAL
+*          .TRUE. : update the right transformation matrix Z;
+*          .FALSE.: do not update Z.
+*
+*  N       (input) INTEGER
+*          The order of the matrices A and B. N >= 0.
+*
+*  A       (input/output) COMPLEX*16 arrays, dimensions (LDA,N)
+*          On entry, the matrix A in the pair (A, B).
+*          On exit, the updated matrix A.
+*
+*  LDA     (input)  INTEGER
+*          The leading dimension of the array A. LDA >= max(1,N).
+*
+*  B       (input/output) COMPLEX*16 arrays, dimensions (LDB,N)
+*          On entry, the matrix B in the pair (A, B).
+*          On exit, the updated matrix B.
+*
+*  LDB     (input)  INTEGER
+*          The leading dimension of the array B. LDB >= max(1,N).
+*
+*  Q       (input/output) COMPLEX*16 array, dimension (LDZ,N)
+*          If WANTQ = .TRUE, on entry, the unitary matrix Q. On exit,
+*          the updated matrix Q.
+*          Not referenced if WANTQ = .FALSE..
+*
+*  LDQ     (input) INTEGER
+*          The leading dimension of the array Q. LDQ >= 1;
+*          If WANTQ = .TRUE., LDQ >= N.
+*
+*  Z       (input/output) COMPLEX*16 array, dimension (LDZ,N)
+*          If WANTZ = .TRUE, on entry, the unitary matrix Z. On exit,
+*          the updated matrix Z.
+*          Not referenced if WANTZ = .FALSE..
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z. LDZ >= 1;
+*          If WANTZ = .TRUE., LDZ >= N.
+*
+*  J1      (input) INTEGER
+*          The index to the first block (A11, B11).
+*
+*  INFO    (output) INTEGER
+*           =0:  Successful exit.
+*           =1:  The transformed matrix pair (A, B) would be too far
+*                from generalized Schur form; the problem is ill-
+*                conditioned. (A, B) may have been partially reordered,
+*                and ILST points to the first row of the current
+*                position of the block being moved.
+*
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Bo Kagstrom and Peter Poromaa, Department of Computing Science,
+*     Umea University, S-901 87 Umea, Sweden.
+*
+*  In the current code both weak and strong stability tests are
+*  performed. The user can omit the strong stability test by changing
+*  the internal logical parameter WANDS to .FALSE.. See ref. [2] for
+*  details.
+*
+*  [1] B. Kagstrom; A Direct Method for Reordering Eigenvalues in the
+*      Generalized Real Schur Form of a Regular Matrix Pair (A, B), in
+*      M.S. Moonen et al (eds), Linear Algebra for Large Scale and
+*      Real-Time Applications, Kluwer Academic Publ. 1993, pp 195-218.
+*
+*  [2] B. Kagstrom and P. Poromaa; Computing Eigenspaces with Specified
+*      Eigenvalues of a Regular Matrix Pair (A, B) and Condition
+*      Estimation: Theory, Algorithms and Software, Report UMINF-94.04,
+*      Department of Computing Science, Umea University, S-901 87 Umea,
+*      Sweden, 1994. Also as LAPACK Working Note 87. To appear in
+*      Numerical Algorithms, 1996.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX*16         CZERO, CONE
+      PARAMETER          ( CZERO = ( 0.0D+0, 0.0D+0 ),
+     $                   CONE = ( 1.0D+0, 0.0D+0 ) )
+      DOUBLE PRECISION   TEN
+      PARAMETER          ( TEN = 10.0D+0 )
+      INTEGER            LDST
+      PARAMETER          ( LDST = 2 )
+      LOGICAL            WANDS
+      PARAMETER          ( WANDS = .TRUE. )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            DTRONG, WEAK
+      INTEGER            I, M
+      DOUBLE PRECISION   CQ, CZ, EPS, SA, SB, SCALE, SMLNUM, SS, SUM,
+     $                   THRESH, WS
+      COMPLEX*16         CDUM, F, G, SQ, SZ
+*     ..
+*     .. Local Arrays ..
+      COMPLEX*16         S( LDST, LDST ), T( LDST, LDST ), WORK( 8 )
+*     ..
+*     .. External Functions ..
+      DOUBLE PRECISION   DLAMCH
+      EXTERNAL           DLAMCH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           ZLACPY, ZLARTG, ZLASSQ, ZROT
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, DCONJG, MAX, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+      INFO = 0
+*
+*     Quick return if possible
+*
+      IF( N.LE.1 )
+     $   RETURN
+*
+      M = LDST
+      WEAK = .FALSE.
+      DTRONG = .FALSE.
+*
+*     Make a local copy of selected block in (A, B)
+*
+      CALL ZLACPY( 'Full', M, M, A( J1, J1 ), LDA, S, LDST )
+      CALL ZLACPY( 'Full', M, M, B( J1, J1 ), LDB, T, LDST )
+*
+*     Compute the threshold for testing the acceptance of swapping.
+*
+      EPS = DLAMCH( 'P' )
+      SMLNUM = DLAMCH( 'S' ) / EPS
+      SCALE = DBLE( CZERO )
+      SUM = DBLE( CONE )
+      CALL ZLACPY( 'Full', M, M, S, LDST, WORK, M )
+      CALL ZLACPY( 'Full', M, M, T, LDST, WORK( M*M+1 ), M )
+      CALL ZLASSQ( 2*M*M, WORK, 1, SCALE, SUM )
+      SA = SCALE*SQRT( SUM )
+      THRESH = MAX( TEN*EPS*SA, SMLNUM )
+*
+*     Compute unitary QL and RQ that swap 1-by-1 and 1-by-1 blocks
+*     using Givens rotations and perform the swap tentatively.
+*
+      F = S( 2, 2 )*T( 1, 1 ) - T( 2, 2 )*S( 1, 1 )
+      G = S( 2, 2 )*T( 1, 2 ) - T( 2, 2 )*S( 1, 2 )
+      SA = ABS( S( 2, 2 ) )
+      SB = ABS( T( 2, 2 ) )
+      CALL ZLARTG( G, F, CZ, SZ, CDUM )
+      SZ = -SZ
+      CALL ZROT( 2, S( 1, 1 ), 1, S( 1, 2 ), 1, CZ, DCONJG( SZ ) )
+      CALL ZROT( 2, T( 1, 1 ), 1, T( 1, 2 ), 1, CZ, DCONJG( SZ ) )
+      IF( SA.GE.SB ) THEN
+         CALL ZLARTG( S( 1, 1 ), S( 2, 1 ), CQ, SQ, CDUM )
+      ELSE
+         CALL ZLARTG( T( 1, 1 ), T( 2, 1 ), CQ, SQ, CDUM )
+      END IF
+      CALL ZROT( 2, S( 1, 1 ), LDST, S( 2, 1 ), LDST, CQ, SQ )
+      CALL ZROT( 2, T( 1, 1 ), LDST, T( 2, 1 ), LDST, CQ, SQ )
+*
+*     Weak stability test: |S21| + |T21| <= O(EPS F-norm((S, T)))
+*
+      WS = ABS( S( 2, 1 ) ) + ABS( T( 2, 1 ) )
+      WEAK = WS.LE.THRESH
+      IF( .NOT.WEAK )
+     $   GO TO 20
+*
+      IF( WANDS ) THEN
+*
+*        Strong stability test:
+*           F-norm((A-QL'*S*QR, B-QL'*T*QR)) <= O(EPS*F-norm((A, B)))
+*
+         CALL ZLACPY( 'Full', M, M, S, LDST, WORK, M )
+         CALL ZLACPY( 'Full', M, M, T, LDST, WORK( M*M+1 ), M )
+         CALL ZROT( 2, WORK, 1, WORK( 3 ), 1, CZ, -DCONJG( SZ ) )
+         CALL ZROT( 2, WORK( 5 ), 1, WORK( 7 ), 1, CZ, -DCONJG( SZ ) )
+         CALL ZROT( 2, WORK, 2, WORK( 2 ), 2, CQ, -SQ )
+         CALL ZROT( 2, WORK( 5 ), 2, WORK( 6 ), 2, CQ, -SQ )
+         DO 10 I = 1, 2
+            WORK( I ) = WORK( I ) - A( J1+I-1, J1 )
+            WORK( I+2 ) = WORK( I+2 ) - A( J1+I-1, J1+1 )
+            WORK( I+4 ) = WORK( I+4 ) - B( J1+I-1, J1 )
+            WORK( I+6 ) = WORK( I+6 ) - B( J1+I-1, J1+1 )
+   10    CONTINUE
+         SCALE = DBLE( CZERO )
+         SUM = DBLE( CONE )
+         CALL ZLASSQ( 2*M*M, WORK, 1, SCALE, SUM )
+         SS = SCALE*SQRT( SUM )
+         DTRONG = SS.LE.THRESH
+         IF( .NOT.DTRONG )
+     $      GO TO 20
+      END IF
+*
+*     If the swap is accepted ("weakly" and "strongly"), apply the
+*     equivalence transformations to the original matrix pair (A,B)
+*
+      CALL ZROT( J1+1, A( 1, J1 ), 1, A( 1, J1+1 ), 1, CZ,
+     $           DCONJG( SZ ) )
+      CALL ZROT( J1+1, B( 1, J1 ), 1, B( 1, J1+1 ), 1, CZ,
+     $           DCONJG( SZ ) )
+      CALL ZROT( N-J1+1, A( J1, J1 ), LDA, A( J1+1, J1 ), LDA, CQ, SQ )
+      CALL ZROT( N-J1+1, B( J1, J1 ), LDB, B( J1+1, J1 ), LDB, CQ, SQ )
+*
+*     Set  N1 by N2 (2,1) blocks to 0
+*
+      A( J1+1, J1 ) = CZERO
+      B( J1+1, J1 ) = CZERO
+*
+*     Accumulate transformations into Q and Z if requested.
+*
+      IF( WANTZ )
+     $   CALL ZROT( N, Z( 1, J1 ), 1, Z( 1, J1+1 ), 1, CZ,
+     $              DCONJG( SZ ) )
+      IF( WANTQ )
+     $   CALL ZROT( N, Q( 1, J1 ), 1, Q( 1, J1+1 ), 1, CQ,
+     $              DCONJG( SQ ) )
+*
+*     Exit with INFO = 0 if swap was successfully performed.
+*
+      RETURN
+*
+*     Exit with INFO = 1 if swap was rejected.
+*
+   20 CONTINUE
+      INFO = 1
+      RETURN
+*
+*     End of ZTGEX2
+*
+      END
diff --git a/costa/native/external/lapack/ztgexc.f b/costa/native/external/lapack/ztgexc.f
new file mode 100644
index 000000000..fe28d7358
--- /dev/null
+++ b/costa/native/external/lapack/ztgexc.f
@@ -0,0 +1,207 @@
+      SUBROUTINE ZTGEXC( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z,
+     $                   LDZ, IFST, ILST, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      LOGICAL            WANTQ, WANTZ
+      INTEGER            IFST, ILST, INFO, LDA, LDB, LDQ, LDZ, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), B( LDB, * ), Q( LDQ, * ),
+     $                   Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZTGEXC reorders the generalized Schur decomposition of a complex
+*  matrix pair (A,B), using an unitary equivalence transformation
+*  (A, B) := Q * (A, B) * Z', so that the diagonal block of (A, B) with
+*  row index IFST is moved to row ILST.
+*
+*  (A, B) must be in generalized Schur canonical form, that is, A and
+*  B are both upper triangular.
+*
+*  Optionally, the matrices Q and Z of generalized Schur vectors are
+*  updated.
+*
+*         Q(in) * A(in) * Z(in)' = Q(out) * A(out) * Z(out)'
+*         Q(in) * B(in) * Z(in)' = Q(out) * B(out) * Z(out)'
+*
+*  Arguments
+*  =========
+*
+*  WANTQ   (input) LOGICAL
+*          .TRUE. : update the left transformation matrix Q;
+*          .FALSE.: do not update Q.
+*
+*  WANTZ   (input) LOGICAL
+*          .TRUE. : update the right transformation matrix Z;
+*          .FALSE.: do not update Z.
+*
+*  N       (input) INTEGER
+*          The order of the matrices A and B. N >= 0.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+*          On entry, the upper triangular matrix A in the pair (A, B).
+*          On exit, the updated matrix A.
+*
+*  LDA     (input)  INTEGER
+*          The leading dimension of the array A. LDA >= max(1,N).
+*
+*  B       (input/output) COMPLEX*16 array, dimension (LDB,N)
+*          On entry, the upper triangular matrix B in the pair (A, B).
+*          On exit, the updated matrix B.
+*
+*  LDB     (input)  INTEGER
+*          The leading dimension of the array B. LDB >= max(1,N).
+*
+*  Q       (input/output) COMPLEX*16 array, dimension (LDZ,N)
+*          On entry, if WANTQ = .TRUE., the unitary matrix Q.
+*          On exit, the updated matrix Q.
+*          If WANTQ = .FALSE., Q is not referenced.
+*
+*  LDQ     (input) INTEGER
+*          The leading dimension of the array Q. LDQ >= 1;
+*          If WANTQ = .TRUE., LDQ >= N.
+*
+*  Z       (input/output) COMPLEX*16 array, dimension (LDZ,N)
+*          On entry, if WANTZ = .TRUE., the unitary matrix Z.
+*          On exit, the updated matrix Z.
+*          If WANTZ = .FALSE., Z is not referenced.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z. LDZ >= 1;
+*          If WANTZ = .TRUE., LDZ >= N.
+*
+*  IFST    (input/output) INTEGER
+*  ILST    (input/output) INTEGER
+*          Specify the reordering of the diagonal blocks of (A, B).
+*          The block with row index IFST is moved to row ILST, by a
+*          sequence of swapping between adjacent blocks.
+*
+*  INFO    (output) INTEGER
+*           =0:  Successful exit.
+*           <0:  if INFO = -i, the i-th argument had an illegal value.
+*           =1:  The transformed matrix pair (A, B) would be too far
+*                from generalized Schur form; the problem is ill-
+*                conditioned. (A, B) may have been partially reordered,
+*                and ILST points to the first row of the current
+*                position of the block being moved.
+*
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Bo Kagstrom and Peter Poromaa, Department of Computing Science,
+*     Umea University, S-901 87 Umea, Sweden.
+*
+*  [1] B. Kagstrom; A Direct Method for Reordering Eigenvalues in the
+*      Generalized Real Schur Form of a Regular Matrix Pair (A, B), in
+*      M.S. Moonen et al (eds), Linear Algebra for Large Scale and
+*      Real-Time Applications, Kluwer Academic Publ. 1993, pp 195-218.
+*
+*  [2] B. Kagstrom and P. Poromaa; Computing Eigenspaces with Specified
+*      Eigenvalues of a Regular Matrix Pair (A, B) and Condition
+*      Estimation: Theory, Algorithms and Software, Report
+*      UMINF - 94.04, Department of Computing Science, Umea University,
+*      S-901 87 Umea, Sweden, 1994. Also as LAPACK Working Note 87.
+*      To appear in Numerical Algorithms, 1996.
+*
+*  [3] B. Kagstrom and P. Poromaa, LAPACK-Style Algorithms and Software
+*      for Solving the Generalized Sylvester Equation and Estimating the
+*      Separation between Regular Matrix Pairs, Report UMINF - 93.23,
+*      Department of Computing Science, Umea University, S-901 87 Umea,
+*      Sweden, December 1993, Revised April 1994, Also as LAPACK working
+*      Note 75. To appear in ACM Trans. on Math. Software, Vol 22, No 1,
+*      1996.
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      INTEGER            HERE
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZTGEX2
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode and test input arguments.
+      INFO = 0
+      IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      ELSE IF( LDQ.LT.1 .OR. WANTQ .AND. ( LDQ.LT.MAX( 1, N ) ) ) THEN
+         INFO = -9
+      ELSE IF( LDZ.LT.1 .OR. WANTZ .AND. ( LDZ.LT.MAX( 1, N ) ) ) THEN
+         INFO = -11
+      ELSE IF( IFST.LT.1 .OR. IFST.GT.N ) THEN
+         INFO = -12
+      ELSE IF( ILST.LT.1 .OR. ILST.GT.N ) THEN
+         INFO = -13
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZTGEXC', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.LE.1 )
+     $   RETURN
+      IF( IFST.EQ.ILST )
+     $   RETURN
+*
+      IF( IFST.LT.ILST ) THEN
+*
+         HERE = IFST
+*
+   10    CONTINUE
+*
+*        Swap with next one below
+*
+         CALL ZTGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z, LDZ,
+     $                HERE, INFO )
+         IF( INFO.NE.0 ) THEN
+            ILST = HERE
+            RETURN
+         END IF
+         HERE = HERE + 1
+         IF( HERE.LT.ILST )
+     $      GO TO 10
+         HERE = HERE - 1
+      ELSE
+         HERE = IFST - 1
+*
+   20    CONTINUE
+*
+*        Swap with next one above
+*
+         CALL ZTGEX2( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z, LDZ,
+     $                HERE, INFO )
+         IF( INFO.NE.0 ) THEN
+            ILST = HERE
+            RETURN
+         END IF
+         HERE = HERE - 1
+         IF( HERE.GE.ILST )
+     $      GO TO 20
+         HERE = HERE + 1
+      END IF
+      ILST = HERE
+      RETURN
+*
+*     End of ZTGEXC
+*
+      END
diff --git a/costa/native/external/lapack/ztgsen.f b/costa/native/external/lapack/ztgsen.f
new file mode 100644
index 000000000..2d7bffc0a
--- /dev/null
+++ b/costa/native/external/lapack/ztgsen.f
@@ -0,0 +1,645 @@
+      SUBROUTINE ZTGSEN( IJOB, WANTQ, WANTZ, SELECT, N, A, LDA, B, LDB,
+     $                   ALPHA, BETA, Q, LDQ, Z, LDZ, M, PL, PR, DIF,
+     $                   WORK, LWORK, IWORK, LIWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      LOGICAL            WANTQ, WANTZ
+      INTEGER            IJOB, INFO, LDA, LDB, LDQ, LDZ, LIWORK, LWORK,
+     $                   M, N
+      DOUBLE PRECISION   PL, PR
+*     ..
+*     .. Array Arguments ..
+      LOGICAL            SELECT( * )
+      INTEGER            IWORK( * )
+      DOUBLE PRECISION   DIF( * )
+      COMPLEX*16         A( LDA, * ), ALPHA( * ), B( LDB, * ),
+     $                   BETA( * ), Q( LDQ, * ), WORK( * ), Z( LDZ, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZTGSEN reorders the generalized Schur decomposition of a complex
+*  matrix pair (A, B) (in terms of an unitary equivalence trans-
+*  formation Q' * (A, B) * Z), so that a selected cluster of eigenvalues
+*  appears in the leading diagonal blocks of the pair (A,B). The leading
+*  columns of Q and Z form unitary bases of the corresponding left and
+*  right eigenspaces (deflating subspaces). (A, B) must be in
+*  generalized Schur canonical form, that is, A and B are both upper
+*  triangular.
+*
+*  ZTGSEN also computes the generalized eigenvalues
+*
+*           w(j)= ALPHA(j) / BETA(j)
+*
+*  of the reordered matrix pair (A, B).
+*
+*  Optionally, the routine computes estimates of reciprocal condition
+*  numbers for eigenvalues and eigenspaces. These are Difu[(A11,B11),
+*  (A22,B22)] and Difl[(A11,B11), (A22,B22)], i.e. the separation(s)
+*  between the matrix pairs (A11, B11) and (A22,B22) that correspond to
+*  the selected cluster and the eigenvalues outside the cluster, resp.,
+*  and norms of "projections" onto left and right eigenspaces w.r.t.
+*  the selected cluster in the (1,1)-block.
+*
+*
+*  Arguments
+*  =========
+*
+*  IJOB    (input) integer
+*          Specifies whether condition numbers are required for the
+*          cluster of eigenvalues (PL and PR) or the deflating subspaces
+*          (Difu and Difl):
+*           =0: Only reorder w.r.t. SELECT. No extras.
+*           =1: Reciprocal of norms of "projections" onto left and right
+*               eigenspaces w.r.t. the selected cluster (PL and PR).
+*           =2: Upper bounds on Difu and Difl. F-norm-based estimate
+*               (DIF(1:2)).
+*           =3: Estimate of Difu and Difl. 1-norm-based estimate
+*               (DIF(1:2)).
+*               About 5 times as expensive as IJOB = 2.
+*           =4: Compute PL, PR and DIF (i.e. 0, 1 and 2 above): Economic
+*               version to get it all.
+*           =5: Compute PL, PR and DIF (i.e. 0, 1 and 3 above)
+*
+*  WANTQ   (input) LOGICAL
+*          .TRUE. : update the left transformation matrix Q;
+*          .FALSE.: do not update Q.
+*
+*  WANTZ   (input) LOGICAL
+*          .TRUE. : update the right transformation matrix Z;
+*          .FALSE.: do not update Z.
+*
+*  SELECT  (input) LOGICAL array, dimension (N)
+*          SELECT specifies the eigenvalues in the selected cluster. To
+*          select an eigenvalue w(j), SELECT(j) must be set to
+*          .TRUE..
+*
+*  N       (input) INTEGER
+*          The order of the matrices A and B. N >= 0.
+*
+*  A       (input/output) COMPLEX*16 array, dimension(LDA,N)
+*          On entry, the upper triangular matrix A, in generalized
+*          Schur canonical form.
+*          On exit, A is overwritten by the reordered matrix A.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,N).
+*
+*  B       (input/output) COMPLEX*16 array, dimension(LDB,N)
+*          On entry, the upper triangular matrix B, in generalized
+*          Schur canonical form.
+*          On exit, B is overwritten by the reordered matrix B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B. LDB >= max(1,N).
+*
+*  ALPHA   (output) COMPLEX*16 array, dimension (N)
+*  BETA    (output) COMPLEX*16 array, dimension (N)
+*          The diagonal elements of A and B, respectively,
+*          when the pair (A,B) has been reduced to generalized Schur
+*          form.  ALPHA(i)/BETA(i) i=1,...,N are the generalized
+*          eigenvalues.
+*
+*  Q       (input/output) COMPLEX*16 array, dimension (LDQ,N)
+*          On entry, if WANTQ = .TRUE., Q is an N-by-N matrix.
+*          On exit, Q has been postmultiplied by the left unitary
+*          transformation matrix which reorder (A, B); The leading M
+*          columns of Q form orthonormal bases for the specified pair of
+*          left eigenspaces (deflating subspaces).
+*          If WANTQ = .FALSE., Q is not referenced.
+*
+*  LDQ     (input) INTEGER
+*          The leading dimension of the array Q. LDQ >= 1.
+*          If WANTQ = .TRUE., LDQ >= N.
+*
+*  Z       (input/output) COMPLEX*16 array, dimension (LDZ,N)
+*          On entry, if WANTZ = .TRUE., Z is an N-by-N matrix.
+*          On exit, Z has been postmultiplied by the left unitary
+*          transformation matrix which reorder (A, B); The leading M
+*          columns of Z form orthonormal bases for the specified pair of
+*          left eigenspaces (deflating subspaces).
+*          If WANTZ = .FALSE., Z is not referenced.
+*
+*  LDZ     (input) INTEGER
+*          The leading dimension of the array Z. LDZ >= 1.
+*          If WANTZ = .TRUE., LDZ >= N.
+*
+*  M       (output) INTEGER
+*          The dimension of the specified pair of left and right
+*          eigenspaces, (deflating subspaces) 0 <= M <= N.
+*
+*  PL, PR  (output) DOUBLE PRECISION
+*          If IJOB = 1, 4 or 5, PL, PR are lower bounds on the
+*          reciprocal  of the norm of "projections" onto left and right
+*          eigenspace with respect to the selected cluster.
+*          0 < PL, PR <= 1.
+*          If M = 0 or M = N, PL = PR  = 1.
+*          If IJOB = 0, 2 or 3 PL, PR are not referenced.
+*
+*  DIF     (output) DOUBLE PRECISION array, dimension (2).
+*          If IJOB >= 2, DIF(1:2) store the estimates of Difu and Difl.
+*          If IJOB = 2 or 4, DIF(1:2) are F-norm-based upper bounds on
+*          Difu and Difl. If IJOB = 3 or 5, DIF(1:2) are 1-norm-based
+*          estimates of Difu and Difl, computed using reversed
+*          communication with ZLACON.
+*          If M = 0 or N, DIF(1:2) = F-norm([A, B]).
+*          If IJOB = 0 or 1, DIF is not referenced.
+*
+*  WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+*          IF IJOB = 0, WORK is not referenced.  Otherwise,
+*          on exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK. LWORK >=  1
+*          If IJOB = 1, 2 or 4, LWORK >=  2*M*(N-M)
+*          If IJOB = 3 or 5, LWORK >=  4*M*(N-M)
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  IWORK   (workspace/output) INTEGER, dimension (LIWORK)
+*          IF IJOB = 0, IWORK is not referenced.  Otherwise,
+*          on exit, if INFO = 0, IWORK(1) returns the optimal LIWORK.
+*
+*  LIWORK  (input) INTEGER
+*          The dimension of the array IWORK. LIWORK >= 1.
+*          If IJOB = 1, 2 or 4, LIWORK >=  N+2;
+*          If IJOB = 3 or 5, LIWORK >= MAX(N+2, 2*M*(N-M));
+*
+*          If LIWORK = -1, then a workspace query is assumed; the
+*          routine only calculates the optimal size of the IWORK array,
+*          returns this value as the first entry of the IWORK array, and
+*          no error message related to LIWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*            =0: Successful exit.
+*            <0: If INFO = -i, the i-th argument had an illegal value.
+*            =1: Reordering of (A, B) failed because the transformed
+*                matrix pair (A, B) would be too far from generalized
+*                Schur form; the problem is very ill-conditioned.
+*                (A, B) may have been partially reordered.
+*                If requested, 0 is returned in DIF(*), PL and PR.
+*
+*
+*  Further Details
+*  ===============
+*
+*  ZTGSEN first collects the selected eigenvalues by computing unitary
+*  U and W that move them to the top left corner of (A, B). In other
+*  words, the selected eigenvalues are the eigenvalues of (A11, B11) in
+*
+*                U'*(A, B)*W = (A11 A12) (B11 B12) n1
+*                              ( 0  A22),( 0  B22) n2
+*                                n1  n2    n1  n2
+*
+*  where N = n1+n2 and U' means the conjugate transpose of U. The first
+*  n1 columns of U and W span the specified pair of left and right
+*  eigenspaces (deflating subspaces) of (A, B).
+*
+*  If (A, B) has been obtained from the generalized real Schur
+*  decomposition of a matrix pair (C, D) = Q*(A, B)*Z', then the
+*  reordered generalized Schur form of (C, D) is given by
+*
+*           (C, D) = (Q*U)*(U'*(A, B)*W)*(Z*W)',
+*
+*  and the first n1 columns of Q*U and Z*W span the corresponding
+*  deflating subspaces of (C, D) (Q and Z store Q*U and Z*W, resp.).
+*
+*  Note that if the selected eigenvalue is sufficiently ill-conditioned,
+*  then its value may differ significantly from its value before
+*  reordering.
+*
+*  The reciprocal condition numbers of the left and right eigenspaces
+*  spanned by the first n1 columns of U and W (or Q*U and Z*W) may
+*  be returned in DIF(1:2), corresponding to Difu and Difl, resp.
+*
+*  The Difu and Difl are defined as:
+*
+*       Difu[(A11, B11), (A22, B22)] = sigma-min( Zu )
+*  and
+*       Difl[(A11, B11), (A22, B22)] = Difu[(A22, B22), (A11, B11)],
+*
+*  where sigma-min(Zu) is the smallest singular value of the
+*  (2*n1*n2)-by-(2*n1*n2) matrix
+*
+*       Zu = [ kron(In2, A11)  -kron(A22', In1) ]
+*            [ kron(In2, B11)  -kron(B22', In1) ].
+*
+*  Here, Inx is the identity matrix of size nx and A22' is the
+*  transpose of A22. kron(X, Y) is the Kronecker product between
+*  the matrices X and Y.
+*
+*  When DIF(2) is small, small changes in (A, B) can cause large changes
+*  in the deflating subspace. An approximate (asymptotic) bound on the
+*  maximum angular error in the computed deflating subspaces is
+*
+*       EPS * norm((A, B)) / DIF(2),
+*
+*  where EPS is the machine precision.
+*
+*  The reciprocal norm of the projectors on the left and right
+*  eigenspaces associated with (A11, B11) may be returned in PL and PR.
+*  They are computed as follows. First we compute L and R so that
+*  P*(A, B)*Q is block diagonal, where
+*
+*       P = ( I -L ) n1           Q = ( I R ) n1
+*           ( 0  I ) n2    and        ( 0 I ) n2
+*             n1 n2                    n1 n2
+*
+*  and (L, R) is the solution to the generalized Sylvester equation
+*
+*       A11*R - L*A22 = -A12
+*       B11*R - L*B22 = -B12
+*
+*  Then PL = (F-norm(L)**2+1)**(-1/2) and PR = (F-norm(R)**2+1)**(-1/2).
+*  An approximate (asymptotic) bound on the average absolute error of
+*  the selected eigenvalues is
+*
+*       EPS * norm((A, B)) / PL.
+*
+*  There are also global error bounds which valid for perturbations up
+*  to a certain restriction:  A lower bound (x) on the smallest
+*  F-norm(E,F) for which an eigenvalue of (A11, B11) may move and
+*  coalesce with an eigenvalue of (A22, B22) under perturbation (E,F),
+*  (i.e. (A + E, B + F), is
+*
+*   x = min(Difu,Difl)/((1/(PL*PL)+1/(PR*PR))**(1/2)+2*max(1/PL,1/PR)).
+*
+*  An approximate bound on x can be computed from DIF(1:2), PL and PR.
+*
+*  If y = ( F-norm(E,F) / x) <= 1, the angles between the perturbed
+*  (L', R') and unperturbed (L, R) left and right deflating subspaces
+*  associated with the selected cluster in the (1,1)-blocks can be
+*  bounded as
+*
+*   max-angle(L, L') <= arctan( y * PL / (1 - y * (1 - PL * PL)**(1/2))
+*   max-angle(R, R') <= arctan( y * PR / (1 - y * (1 - PR * PR)**(1/2))
+*
+*  See LAPACK User's Guide section 4.11 or the following references
+*  for more information.
+*
+*  Note that if the default method for computing the Frobenius-norm-
+*  based estimate DIF is not wanted (see ZLATDF), then the parameter
+*  IDIFJB (see below) should be changed from 3 to 4 (routine ZLATDF
+*  (IJOB = 2 will be used)). See ZTGSYL for more details.
+*
+*  Based on contributions by
+*     Bo Kagstrom and Peter Poromaa, Department of Computing Science,
+*     Umea University, S-901 87 Umea, Sweden.
+*
+*  References
+*  ==========
+*
+*  [1] B. Kagstrom; A Direct Method for Reordering Eigenvalues in the
+*      Generalized Real Schur Form of a Regular Matrix Pair (A, B), in
+*      M.S. Moonen et al (eds), Linear Algebra for Large Scale and
+*      Real-Time Applications, Kluwer Academic Publ. 1993, pp 195-218.
+*
+*  [2] B. Kagstrom and P. Poromaa; Computing Eigenspaces with Specified
+*      Eigenvalues of a Regular Matrix Pair (A, B) and Condition
+*      Estimation: Theory, Algorithms and Software, Report
+*      UMINF - 94.04, Department of Computing Science, Umea University,
+*      S-901 87 Umea, Sweden, 1994. Also as LAPACK Working Note 87.
+*      To appear in Numerical Algorithms, 1996.
+*
+*  [3] B. Kagstrom and P. Poromaa, LAPACK-Style Algorithms and Software
+*      for Solving the Generalized Sylvester Equation and Estimating the
+*      Separation between Regular Matrix Pairs, Report UMINF - 93.23,
+*      Department of Computing Science, Umea University, S-901 87 Umea,
+*      Sweden, December 1993, Revised April 1994, Also as LAPACK working
+*      Note 75. To appear in ACM Trans. on Math. Software, Vol 22, No 1,
+*      1996.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            IDIFJB
+      PARAMETER          ( IDIFJB = 3 )
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY, SWAP, WANTD, WANTD1, WANTD2, WANTP
+      INTEGER            I, IERR, IJB, K, KASE, KS, LIWMIN, LWMIN, MN2,
+     $                   N1, N2
+      DOUBLE PRECISION   DSCALE, DSUM, RDSCAL, SAFMIN
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZLACON, ZLACPY, ZLASSQ, ZSCAL, ZTGEXC,
+     $                   ZTGSYL
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DCMPLX, DCONJG, MAX, SQRT
+*     ..
+*     .. External Functions ..
+      DOUBLE PRECISION   DLAMCH
+      EXTERNAL           DLAMCH
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode and test the input parameters
+*
+      INFO = 0
+      LQUERY = ( LWORK.EQ.-1 .OR. LIWORK.EQ.-1 )
+*
+      IF( IJOB.LT.0 .OR. IJOB.GT.5 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -9
+      ELSE IF( LDQ.LT.1 .OR. ( WANTQ .AND. LDQ.LT.N ) ) THEN
+         INFO = -13
+      ELSE IF( LDZ.LT.1 .OR. ( WANTZ .AND. LDZ.LT.N ) ) THEN
+         INFO = -15
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZTGSEN', -INFO )
+         RETURN
+      END IF
+*
+      IERR = 0
+*
+      WANTP = IJOB.EQ.1 .OR. IJOB.GE.4
+      WANTD1 = IJOB.EQ.2 .OR. IJOB.EQ.4
+      WANTD2 = IJOB.EQ.3 .OR. IJOB.EQ.5
+      WANTD = WANTD1 .OR. WANTD2
+*
+*     Set M to the dimension of the specified pair of deflating
+*     subspaces.
+*
+      M = 0
+      DO 10 K = 1, N
+         ALPHA( K ) = A( K, K )
+         BETA( K ) = B( K, K )
+         IF( K.LT.N ) THEN
+            IF( SELECT( K ) )
+     $         M = M + 1
+         ELSE
+            IF( SELECT( N ) )
+     $         M = M + 1
+         END IF
+   10 CONTINUE
+*
+      IF( IJOB.EQ.1 .OR. IJOB.EQ.2 .OR. IJOB.EQ.4 ) THEN
+         LWMIN = MAX( 1, 2*M*( N-M ) )
+         LIWMIN = MAX( 1, N+2 )
+      ELSE IF( IJOB.EQ.3 .OR. IJOB.EQ.5 ) THEN
+         LWMIN = MAX( 1, 4*M*( N-M ) )
+         LIWMIN = MAX( 1, 2*M*( N-M ), N+2 )
+      ELSE
+         LWMIN = 1
+         LIWMIN = 1
+      END IF
+*
+      WORK( 1 ) = LWMIN
+      IWORK( 1 ) = LIWMIN
+*
+      IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -21
+      ELSE IF( LIWORK.LT.LIWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -23
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZTGSEN', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( M.EQ.N .OR. M.EQ.0 ) THEN
+         IF( WANTP ) THEN
+            PL = ONE
+            PR = ONE
+         END IF
+         IF( WANTD ) THEN
+            DSCALE = ZERO
+            DSUM = ONE
+            DO 20 I = 1, N
+               CALL ZLASSQ( N, A( 1, I ), 1, DSCALE, DSUM )
+               CALL ZLASSQ( N, B( 1, I ), 1, DSCALE, DSUM )
+   20       CONTINUE
+            DIF( 1 ) = DSCALE*SQRT( DSUM )
+            DIF( 2 ) = DIF( 1 )
+         END IF
+         GO TO 70
+      END IF
+*
+*     Get machine constant
+*
+      SAFMIN = DLAMCH( 'S' )
+*
+*     Collect the selected blocks at the top-left corner of (A, B).
+*
+      KS = 0
+      DO 30 K = 1, N
+         SWAP = SELECT( K )
+         IF( SWAP ) THEN
+            KS = KS + 1
+*
+*           Swap the K-th block to position KS. Compute unitary Q
+*           and Z that will swap adjacent diagonal blocks in (A, B).
+*
+            IF( K.NE.KS )
+     $         CALL ZTGEXC( WANTQ, WANTZ, N, A, LDA, B, LDB, Q, LDQ, Z,
+     $                      LDZ, K, KS, IERR )
+*
+            IF( IERR.GT.0 ) THEN
+*
+*              Swap is rejected: exit.
+*
+               INFO = 1
+               IF( WANTP ) THEN
+                  PL = ZERO
+                  PR = ZERO
+               END IF
+               IF( WANTD ) THEN
+                  DIF( 1 ) = ZERO
+                  DIF( 2 ) = ZERO
+               END IF
+               GO TO 70
+            END IF
+         END IF
+   30 CONTINUE
+      IF( WANTP ) THEN
+*
+*        Solve generalized Sylvester equation for R and L:
+*                   A11 * R - L * A22 = A12
+*                   B11 * R - L * B22 = B12
+*
+         N1 = M
+         N2 = N - M
+         I = N1 + 1
+         CALL ZLACPY( 'Full', N1, N2, A( 1, I ), LDA, WORK, N1 )
+         CALL ZLACPY( 'Full', N1, N2, B( 1, I ), LDB, WORK( N1*N2+1 ),
+     $                N1 )
+         IJB = 0
+         CALL ZTGSYL( 'N', IJB, N1, N2, A, LDA, A( I, I ), LDA, WORK,
+     $                N1, B, LDB, B( I, I ), LDB, WORK( N1*N2+1 ), N1,
+     $                DSCALE, DIF( 1 ), WORK( N1*N2*2+1 ),
+     $                LWORK-2*N1*N2, IWORK, IERR )
+*
+*        Estimate the reciprocal of norms of "projections" onto
+*        left and right eigenspaces
+*
+         RDSCAL = ZERO
+         DSUM = ONE
+         CALL ZLASSQ( N1*N2, WORK, 1, RDSCAL, DSUM )
+         PL = RDSCAL*SQRT( DSUM )
+         IF( PL.EQ.ZERO ) THEN
+            PL = ONE
+         ELSE
+            PL = DSCALE / ( SQRT( DSCALE*DSCALE / PL+PL )*SQRT( PL ) )
+         END IF
+         RDSCAL = ZERO
+         DSUM = ONE
+         CALL ZLASSQ( N1*N2, WORK( N1*N2+1 ), 1, RDSCAL, DSUM )
+         PR = RDSCAL*SQRT( DSUM )
+         IF( PR.EQ.ZERO ) THEN
+            PR = ONE
+         ELSE
+            PR = DSCALE / ( SQRT( DSCALE*DSCALE / PR+PR )*SQRT( PR ) )
+         END IF
+      END IF
+      IF( WANTD ) THEN
+*
+*        Compute estimates Difu and Difl.
+*
+         IF( WANTD1 ) THEN
+            N1 = M
+            N2 = N - M
+            I = N1 + 1
+            IJB = IDIFJB
+*
+*           Frobenius norm-based Difu estimate.
+*
+            CALL ZTGSYL( 'N', IJB, N1, N2, A, LDA, A( I, I ), LDA, WORK,
+     $                   N1, B, LDB, B( I, I ), LDB, WORK( N1*N2+1 ),
+     $                   N1, DSCALE, DIF( 1 ), WORK( N1*N2*2+1 ),
+     $                   LWORK-2*N1*N2, IWORK, IERR )
+*
+*           Frobenius norm-based Difl estimate.
+*
+            CALL ZTGSYL( 'N', IJB, N2, N1, A( I, I ), LDA, A, LDA, WORK,
+     $                   N2, B( I, I ), LDB, B, LDB, WORK( N1*N2+1 ),
+     $                   N2, DSCALE, DIF( 2 ), WORK( N1*N2*2+1 ),
+     $                   LWORK-2*N1*N2, IWORK, IERR )
+         ELSE
+*
+*           Compute 1-norm-based estimates of Difu and Difl using
+*           reversed communication with ZLACON. In each step a
+*           generalized Sylvester equation or a transposed variant
+*           is solved.
+*
+            KASE = 0
+            N1 = M
+            N2 = N - M
+            I = N1 + 1
+            IJB = 0
+            MN2 = 2*N1*N2
+*
+*           1-norm-based estimate of Difu.
+*
+   40       CONTINUE
+            CALL ZLACON( MN2, WORK( MN2+1 ), WORK, DIF( 1 ), KASE )
+            IF( KASE.NE.0 ) THEN
+               IF( KASE.EQ.1 ) THEN
+*
+*                 Solve generalized Sylvester equation
+*
+                  CALL ZTGSYL( 'N', IJB, N1, N2, A, LDA, A( I, I ), LDA,
+     $                         WORK, N1, B, LDB, B( I, I ), LDB,
+     $                         WORK( N1*N2+1 ), N1, DSCALE, DIF( 1 ),
+     $                         WORK( N1*N2*2+1 ), LWORK-2*N1*N2, IWORK,
+     $                         IERR )
+               ELSE
+*
+*                 Solve the transposed variant.
+*
+                  CALL ZTGSYL( 'C', IJB, N1, N2, A, LDA, A( I, I ), LDA,
+     $                         WORK, N1, B, LDB, B( I, I ), LDB,
+     $                         WORK( N1*N2+1 ), N1, DSCALE, DIF( 1 ),
+     $                         WORK( N1*N2*2+1 ), LWORK-2*N1*N2, IWORK,
+     $                         IERR )
+               END IF
+               GO TO 40
+            END IF
+            DIF( 1 ) = DSCALE / DIF( 1 )
+*
+*           1-norm-based estimate of Difl.
+*
+   50       CONTINUE
+            CALL ZLACON( MN2, WORK( MN2+1 ), WORK, DIF( 2 ), KASE )
+            IF( KASE.NE.0 ) THEN
+               IF( KASE.EQ.1 ) THEN
+*
+*                 Solve generalized Sylvester equation
+*
+                  CALL ZTGSYL( 'N', IJB, N2, N1, A( I, I ), LDA, A, LDA,
+     $                         WORK, N2, B( I, I ), LDB, B, LDB,
+     $                         WORK( N1*N2+1 ), N2, DSCALE, DIF( 2 ),
+     $                         WORK( N1*N2*2+1 ), LWORK-2*N1*N2, IWORK,
+     $                         IERR )
+               ELSE
+*
+*                 Solve the transposed variant.
+*
+                  CALL ZTGSYL( 'C', IJB, N2, N1, A( I, I ), LDA, A, LDA,
+     $                         WORK, N2, B, LDB, B( I, I ), LDB,
+     $                         WORK( N1*N2+1 ), N2, DSCALE, DIF( 2 ),
+     $                         WORK( N1*N2*2+1 ), LWORK-2*N1*N2, IWORK,
+     $                         IERR )
+               END IF
+               GO TO 50
+            END IF
+            DIF( 2 ) = DSCALE / DIF( 2 )
+         END IF
+      END IF
+*
+*     If B(K,K) is complex, make it real and positive (normalization
+*     of the generalized Schur form) and Store the generalized
+*     eigenvalues of reordered pair (A, B)
+*
+      DO 60 K = 1, N
+         DSCALE = ABS( B( K, K ) )
+         IF( DSCALE.GT.SAFMIN ) THEN
+            WORK( 1 ) = DCONJG( B( K, K ) / DSCALE )
+            WORK( 2 ) = B( K, K ) / DSCALE
+            B( K, K ) = DSCALE
+            CALL ZSCAL( N-K, WORK( 1 ), B( K, K+1 ), LDB )
+            CALL ZSCAL( N-K+1, WORK( 1 ), A( K, K ), LDA )
+            IF( WANTQ )
+     $         CALL ZSCAL( N, WORK( 2 ), Q( 1, K ), 1 )
+         ELSE
+            B( K, K ) = DCMPLX( ZERO, ZERO )
+         END IF
+*
+         ALPHA( K ) = A( K, K )
+         BETA( K ) = B( K, K )
+*
+   60 CONTINUE
+*
+   70 CONTINUE
+*
+      WORK( 1 ) = LWMIN
+      IWORK( 1 ) = LIWMIN
+*
+      RETURN
+*
+*     End of ZTGSEN
+*
+      END
diff --git a/costa/native/external/lapack/ztgsja.f b/costa/native/external/lapack/ztgsja.f
new file mode 100644
index 000000000..d3415fe5c
--- /dev/null
+++ b/costa/native/external/lapack/ztgsja.f
@@ -0,0 +1,526 @@
+      SUBROUTINE ZTGSJA( JOBU, JOBV, JOBQ, M, P, N, K, L, A, LDA, B,
+     $                   LDB, TOLA, TOLB, ALPHA, BETA, U, LDU, V, LDV,
+     $                   Q, LDQ, WORK, NCYCLE, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          JOBQ, JOBU, JOBV
+      INTEGER            INFO, K, L, LDA, LDB, LDQ, LDU, LDV, M, N,
+     $                   NCYCLE, P
+      DOUBLE PRECISION   TOLA, TOLB
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   ALPHA( * ), BETA( * )
+      COMPLEX*16         A( LDA, * ), B( LDB, * ), Q( LDQ, * ),
+     $                   U( LDU, * ), V( LDV, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZTGSJA computes the generalized singular value decomposition (GSVD)
+*  of two complex upper triangular (or trapezoidal) matrices A and B.
+*
+*  On entry, it is assumed that matrices A and B have the following
+*  forms, which may be obtained by the preprocessing subroutine ZGGSVP
+*  from a general M-by-N matrix A and P-by-N matrix B:
+*
+*               N-K-L  K    L
+*     A =    K ( 0    A12  A13 ) if M-K-L >= 0;
+*            L ( 0     0   A23 )
+*        M-K-L ( 0     0    0  )
+*
+*             N-K-L  K    L
+*     A =  K ( 0    A12  A13 ) if M-K-L < 0;
+*        M-K ( 0     0   A23 )
+*
+*             N-K-L  K    L
+*     B =  L ( 0     0   B13 )
+*        P-L ( 0     0    0  )
+*
+*  where the K-by-K matrix A12 and L-by-L matrix B13 are nonsingular
+*  upper triangular; A23 is L-by-L upper triangular if M-K-L >= 0,
+*  otherwise A23 is (M-K)-by-L upper trapezoidal.
+*
+*  On exit,
+*
+*         U'*A*Q = D1*( 0 R ),    V'*B*Q = D2*( 0 R ),
+*
+*  where U, V and Q are unitary matrices, Z' denotes the conjugate
+*  transpose of Z, R is a nonsingular upper triangular matrix, and D1
+*  and D2 are ``diagonal'' matrices, which are of the following
+*  structures:
+*
+*  If M-K-L >= 0,
+*
+*                      K  L
+*         D1 =     K ( I  0 )
+*                  L ( 0  C )
+*              M-K-L ( 0  0 )
+*
+*                     K  L
+*         D2 = L   ( 0  S )
+*              P-L ( 0  0 )
+*
+*                 N-K-L  K    L
+*    ( 0 R ) = K (  0   R11  R12 ) K
+*              L (  0    0   R22 ) L
+*
+*  where
+*
+*    C = diag( ALPHA(K+1), ... , ALPHA(K+L) ),
+*    S = diag( BETA(K+1),  ... , BETA(K+L) ),
+*    C**2 + S**2 = I.
+*
+*    R is stored in A(1:K+L,N-K-L+1:N) on exit.
+*
+*  If M-K-L < 0,
+*
+*                 K M-K K+L-M
+*      D1 =   K ( I  0    0   )
+*           M-K ( 0  C    0   )
+*
+*                   K M-K K+L-M
+*      D2 =   M-K ( 0  S    0   )
+*           K+L-M ( 0  0    I   )
+*             P-L ( 0  0    0   )
+*
+*                 N-K-L  K   M-K  K+L-M
+* ( 0 R ) =    K ( 0    R11  R12  R13  )
+*            M-K ( 0     0   R22  R23  )
+*          K+L-M ( 0     0    0   R33  )
+*
+*  where
+*  C = diag( ALPHA(K+1), ... , ALPHA(M) ),
+*  S = diag( BETA(K+1),  ... , BETA(M) ),
+*  C**2 + S**2 = I.
+*
+*  R = ( R11 R12 R13 ) is stored in A(1:M, N-K-L+1:N) and R33 is stored
+*      (  0  R22 R23 )
+*  in B(M-K+1:L,N+M-K-L+1:N) on exit.
+*
+*  The computation of the unitary transformation matrices U, V or Q
+*  is optional.  These matrices may either be formed explicitly, or they
+*  may be postmultiplied into input matrices U1, V1, or Q1.
+*
+*  Arguments
+*  =========
+*
+*  JOBU    (input) CHARACTER*1
+*          = 'U':  U must contain a unitary matrix U1 on entry, and
+*                  the product U1*U is returned;
+*          = 'I':  U is initialized to the unit matrix, and the
+*                  unitary matrix U is returned;
+*          = 'N':  U is not computed.
+*
+*  JOBV    (input) CHARACTER*1
+*          = 'V':  V must contain a unitary matrix V1 on entry, and
+*                  the product V1*V is returned;
+*          = 'I':  V is initialized to the unit matrix, and the
+*                  unitary matrix V is returned;
+*          = 'N':  V is not computed.
+*
+*  JOBQ    (input) CHARACTER*1
+*          = 'Q':  Q must contain a unitary matrix Q1 on entry, and
+*                  the product Q1*Q is returned;
+*          = 'I':  Q is initialized to the unit matrix, and the
+*                  unitary matrix Q is returned;
+*          = 'N':  Q is not computed.
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  P       (input) INTEGER
+*          The number of rows of the matrix B.  P >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrices A and B.  N >= 0.
+*
+*  K       (input) INTEGER
+*  L       (input) INTEGER
+*          K and L specify the subblocks in the input matrices A and B:
+*          A23 = A(K+1:MIN(K+L,M),N-L+1:N) and B13 = B(1:L,,N-L+1:N)
+*          of A and B, whose GSVD is going to be computed by ZTGSJA.
+*          See Further details.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+*          On entry, the M-by-N matrix A.
+*          On exit, A(N-K+1:N,1:MIN(K+L,M) ) contains the triangular
+*          matrix R or part of R.  See Purpose for details.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,M).
+*
+*  B       (input/output) COMPLEX*16 array, dimension (LDB,N)
+*          On entry, the P-by-N matrix B.
+*          On exit, if necessary, B(M-K+1:L,N+M-K-L+1:N) contains
+*          a part of R.  See Purpose for details.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B. LDB >= max(1,P).
+*
+*  TOLA    (input) DOUBLE PRECISION
+*  TOLB    (input) DOUBLE PRECISION
+*          TOLA and TOLB are the convergence criteria for the Jacobi-
+*          Kogbetliantz iteration procedure. Generally, they are the
+*          same as used in the preprocessing step, say
+*              TOLA = MAX(M,N)*norm(A)*MAZHEPS,
+*              TOLB = MAX(P,N)*norm(B)*MAZHEPS.
+*
+*  ALPHA   (output) DOUBLE PRECISION array, dimension (N)
+*  BETA    (output) DOUBLE PRECISION array, dimension (N)
+*          On exit, ALPHA and BETA contain the generalized singular
+*          value pairs of A and B;
+*            ALPHA(1:K) = 1,
+*            BETA(1:K)  = 0,
+*          and if M-K-L >= 0,
+*            ALPHA(K+1:K+L) = diag(C),
+*            BETA(K+1:K+L)  = diag(S),
+*          or if M-K-L < 0,
+*            ALPHA(K+1:M)= C, ALPHA(M+1:K+L)= 0
+*            BETA(K+1:M) = S, BETA(M+1:K+L) = 1.
+*          Furthermore, if K+L < N,
+*            ALPHA(K+L+1:N) = 0
+*            BETA(K+L+1:N)  = 0.
+*
+*  U       (input/output) COMPLEX*16 array, dimension (LDU,M)
+*          On entry, if JOBU = 'U', U must contain a matrix U1 (usually
+*          the unitary matrix returned by ZGGSVP).
+*          On exit,
+*          if JOBU = 'I', U contains the unitary matrix U;
+*          if JOBU = 'U', U contains the product U1*U.
+*          If JOBU = 'N', U is not referenced.
+*
+*  LDU     (input) INTEGER
+*          The leading dimension of the array U. LDU >= max(1,M) if
+*          JOBU = 'U'; LDU >= 1 otherwise.
+*
+*  V       (input/output) COMPLEX*16 array, dimension (LDV,P)
+*          On entry, if JOBV = 'V', V must contain a matrix V1 (usually
+*          the unitary matrix returned by ZGGSVP).
+*          On exit,
+*          if JOBV = 'I', V contains the unitary matrix V;
+*          if JOBV = 'V', V contains the product V1*V.
+*          If JOBV = 'N', V is not referenced.
+*
+*  LDV     (input) INTEGER
+*          The leading dimension of the array V. LDV >= max(1,P) if
+*          JOBV = 'V'; LDV >= 1 otherwise.
+*
+*  Q       (input/output) COMPLEX*16 array, dimension (LDQ,N)
+*          On entry, if JOBQ = 'Q', Q must contain a matrix Q1 (usually
+*          the unitary matrix returned by ZGGSVP).
+*          On exit,
+*          if JOBQ = 'I', Q contains the unitary matrix Q;
+*          if JOBQ = 'Q', Q contains the product Q1*Q.
+*          If JOBQ = 'N', Q is not referenced.
+*
+*  LDQ     (input) INTEGER
+*          The leading dimension of the array Q. LDQ >= max(1,N) if
+*          JOBQ = 'Q'; LDQ >= 1 otherwise.
+*
+*  WORK    (workspace) COMPLEX*16 array, dimension (2*N)
+*
+*  NCYCLE  (output) INTEGER
+*          The number of cycles required for convergence.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*          = 1:  the procedure does not converge after MAXIT cycles.
+*
+*  Internal Parameters
+*  ===================
+*
+*  MAXIT   INTEGER
+*          MAXIT specifies the total loops that the iterative procedure
+*          may take. If after MAXIT cycles, the routine fails to
+*          converge, we return INFO = 1.
+*
+*  Further Details
+*  ===============
+*
+*  ZTGSJA essentially uses a variant of Kogbetliantz algorithm to reduce
+*  min(L,M-K)-by-L triangular (or trapezoidal) matrix A23 and L-by-L
+*  matrix B13 to the form:
+*
+*           U1'*A13*Q1 = C1*R1; V1'*B13*Q1 = S1*R1,
+*
+*  where U1, V1 and Q1 are unitary matrix, and Z' is the conjugate
+*  transpose of Z.  C1 and S1 are diagonal matrices satisfying
+*
+*                C1**2 + S1**2 = I,
+*
+*  and R1 is an L-by-L nonsingular upper triangular matrix.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            MAXIT
+      PARAMETER          ( MAXIT = 40 )
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+      COMPLEX*16         CZERO, CONE
+      PARAMETER          ( CZERO = ( 0.0D+0, 0.0D+0 ),
+     $                   CONE = ( 1.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+*
+      LOGICAL            INITQ, INITU, INITV, UPPER, WANTQ, WANTU, WANTV
+      INTEGER            I, J, KCYCLE
+      DOUBLE PRECISION   A1, A3, B1, B3, CSQ, CSU, CSV, ERROR, GAMMA,
+     $                   RWK, SSMIN
+      COMPLEX*16         A2, B2, SNQ, SNU, SNV
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DLARTG, XERBLA, ZCOPY, ZDSCAL, ZLAGS2, ZLAPLL,
+     $                   ZLASET, ZROT
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, DCONJG, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode and test the input parameters
+*
+      INITU = LSAME( JOBU, 'I' )
+      WANTU = INITU .OR. LSAME( JOBU, 'U' )
+*
+      INITV = LSAME( JOBV, 'I' )
+      WANTV = INITV .OR. LSAME( JOBV, 'V' )
+*
+      INITQ = LSAME( JOBQ, 'I' )
+      WANTQ = INITQ .OR. LSAME( JOBQ, 'Q' )
+*
+      INFO = 0
+      IF( .NOT.( INITU .OR. WANTU .OR. LSAME( JOBU, 'N' ) ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( INITV .OR. WANTV .OR. LSAME( JOBV, 'N' ) ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.( INITQ .OR. WANTQ .OR. LSAME( JOBQ, 'N' ) ) ) THEN
+         INFO = -3
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( P.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -6
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -10
+      ELSE IF( LDB.LT.MAX( 1, P ) ) THEN
+         INFO = -12
+      ELSE IF( LDU.LT.1 .OR. ( WANTU .AND. LDU.LT.M ) ) THEN
+         INFO = -18
+      ELSE IF( LDV.LT.1 .OR. ( WANTV .AND. LDV.LT.P ) ) THEN
+         INFO = -20
+      ELSE IF( LDQ.LT.1 .OR. ( WANTQ .AND. LDQ.LT.N ) ) THEN
+         INFO = -22
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZTGSJA', -INFO )
+         RETURN
+      END IF
+*
+*     Initialize U, V and Q, if necessary
+*
+      IF( INITU )
+     $   CALL ZLASET( 'Full', M, M, CZERO, CONE, U, LDU )
+      IF( INITV )
+     $   CALL ZLASET( 'Full', P, P, CZERO, CONE, V, LDV )
+      IF( INITQ )
+     $   CALL ZLASET( 'Full', N, N, CZERO, CONE, Q, LDQ )
+*
+*     Loop until convergence
+*
+      UPPER = .FALSE.
+      DO 40 KCYCLE = 1, MAXIT
+*
+         UPPER = .NOT.UPPER
+*
+         DO 20 I = 1, L - 1
+            DO 10 J = I + 1, L
+*
+               A1 = ZERO
+               A2 = CZERO
+               A3 = ZERO
+               IF( K+I.LE.M )
+     $            A1 = DBLE( A( K+I, N-L+I ) )
+               IF( K+J.LE.M )
+     $            A3 = DBLE( A( K+J, N-L+J ) )
+*
+               B1 = DBLE( B( I, N-L+I ) )
+               B3 = DBLE( B( J, N-L+J ) )
+*
+               IF( UPPER ) THEN
+                  IF( K+I.LE.M )
+     $               A2 = A( K+I, N-L+J )
+                  B2 = B( I, N-L+J )
+               ELSE
+                  IF( K+J.LE.M )
+     $               A2 = A( K+J, N-L+I )
+                  B2 = B( J, N-L+I )
+               END IF
+*
+               CALL ZLAGS2( UPPER, A1, A2, A3, B1, B2, B3, CSU, SNU,
+     $                      CSV, SNV, CSQ, SNQ )
+*
+*              Update (K+I)-th and (K+J)-th rows of matrix A: U'*A
+*
+               IF( K+J.LE.M )
+     $            CALL ZROT( L, A( K+J, N-L+1 ), LDA, A( K+I, N-L+1 ),
+     $                       LDA, CSU, DCONJG( SNU ) )
+*
+*              Update I-th and J-th rows of matrix B: V'*B
+*
+               CALL ZROT( L, B( J, N-L+1 ), LDB, B( I, N-L+1 ), LDB,
+     $                    CSV, DCONJG( SNV ) )
+*
+*              Update (N-L+I)-th and (N-L+J)-th columns of matrices
+*              A and B: A*Q and B*Q
+*
+               CALL ZROT( MIN( K+L, M ), A( 1, N-L+J ), 1,
+     $                    A( 1, N-L+I ), 1, CSQ, SNQ )
+*
+               CALL ZROT( L, B( 1, N-L+J ), 1, B( 1, N-L+I ), 1, CSQ,
+     $                    SNQ )
+*
+               IF( UPPER ) THEN
+                  IF( K+I.LE.M )
+     $               A( K+I, N-L+J ) = CZERO
+                  B( I, N-L+J ) = CZERO
+               ELSE
+                  IF( K+J.LE.M )
+     $               A( K+J, N-L+I ) = CZERO
+                  B( J, N-L+I ) = CZERO
+               END IF
+*
+*              Ensure that the diagonal elements of A and B are real.
+*
+               IF( K+I.LE.M )
+     $            A( K+I, N-L+I ) = DBLE( A( K+I, N-L+I ) )
+               IF( K+J.LE.M )
+     $            A( K+J, N-L+J ) = DBLE( A( K+J, N-L+J ) )
+               B( I, N-L+I ) = DBLE( B( I, N-L+I ) )
+               B( J, N-L+J ) = DBLE( B( J, N-L+J ) )
+*
+*              Update unitary matrices U, V, Q, if desired.
+*
+               IF( WANTU .AND. K+J.LE.M )
+     $            CALL ZROT( M, U( 1, K+J ), 1, U( 1, K+I ), 1, CSU,
+     $                       SNU )
+*
+               IF( WANTV )
+     $            CALL ZROT( P, V( 1, J ), 1, V( 1, I ), 1, CSV, SNV )
+*
+               IF( WANTQ )
+     $            CALL ZROT( N, Q( 1, N-L+J ), 1, Q( 1, N-L+I ), 1, CSQ,
+     $                       SNQ )
+*
+   10       CONTINUE
+   20    CONTINUE
+*
+         IF( .NOT.UPPER ) THEN
+*
+*           The matrices A13 and B13 were lower triangular at the start
+*           of the cycle, and are now upper triangular.
+*
+*           Convergence test: test the parallelism of the corresponding
+*           rows of A and B.
+*
+            ERROR = ZERO
+            DO 30 I = 1, MIN( L, M-K )
+               CALL ZCOPY( L-I+1, A( K+I, N-L+I ), LDA, WORK, 1 )
+               CALL ZCOPY( L-I+1, B( I, N-L+I ), LDB, WORK( L+1 ), 1 )
+               CALL ZLAPLL( L-I+1, WORK, 1, WORK( L+1 ), 1, SSMIN )
+               ERROR = MAX( ERROR, SSMIN )
+   30       CONTINUE
+*
+            IF( ABS( ERROR ).LE.MIN( TOLA, TOLB ) )
+     $         GO TO 50
+         END IF
+*
+*        End of cycle loop
+*
+   40 CONTINUE
+*
+*     The algorithm has not converged after MAXIT cycles.
+*
+      INFO = 1
+      GO TO 100
+*
+   50 CONTINUE
+*
+*     If ERROR <= MIN(TOLA,TOLB), then the algorithm has converged.
+*     Compute the generalized singular value pairs (ALPHA, BETA), and
+*     set the triangular matrix R to array A.
+*
+      DO 60 I = 1, K
+         ALPHA( I ) = ONE
+         BETA( I ) = ZERO
+   60 CONTINUE
+*
+      DO 70 I = 1, MIN( L, M-K )
+*
+         A1 = DBLE( A( K+I, N-L+I ) )
+         B1 = DBLE( B( I, N-L+I ) )
+*
+         IF( A1.NE.ZERO ) THEN
+            GAMMA = B1 / A1
+*
+            IF( GAMMA.LT.ZERO ) THEN
+               CALL ZDSCAL( L-I+1, -ONE, B( I, N-L+I ), LDB )
+               IF( WANTV )
+     $            CALL ZDSCAL( P, -ONE, V( 1, I ), 1 )
+            END IF
+*
+            CALL DLARTG( ABS( GAMMA ), ONE, BETA( K+I ), ALPHA( K+I ),
+     $                   RWK )
+*
+            IF( ALPHA( K+I ).GE.BETA( K+I ) ) THEN
+               CALL ZDSCAL( L-I+1, ONE / ALPHA( K+I ), A( K+I, N-L+I ),
+     $                      LDA )
+            ELSE
+               CALL ZDSCAL( L-I+1, ONE / BETA( K+I ), B( I, N-L+I ),
+     $                      LDB )
+               CALL ZCOPY( L-I+1, B( I, N-L+I ), LDB, A( K+I, N-L+I ),
+     $                     LDA )
+            END IF
+*
+         ELSE
+            ALPHA( K+I ) = ZERO
+            BETA( K+I ) = ONE
+            CALL ZCOPY( L-I+1, B( I, N-L+I ), LDB, A( K+I, N-L+I ),
+     $                  LDA )
+         END IF
+   70 CONTINUE
+*
+*     Post-assignment
+*
+      DO 80 I = M + 1, K + L
+         ALPHA( I ) = ZERO
+         BETA( I ) = ONE
+   80 CONTINUE
+*
+      IF( K+L.LT.N ) THEN
+         DO 90 I = K + L + 1, N
+            ALPHA( I ) = ZERO
+            BETA( I ) = ZERO
+   90    CONTINUE
+      END IF
+*
+  100 CONTINUE
+      NCYCLE = KCYCLE
+*
+      RETURN
+*
+*     End of ZTGSJA
+*
+      END
diff --git a/costa/native/external/lapack/ztgsna.f b/costa/native/external/lapack/ztgsna.f
new file mode 100644
index 000000000..4f02aab17
--- /dev/null
+++ b/costa/native/external/lapack/ztgsna.f
@@ -0,0 +1,402 @@
+      SUBROUTINE ZTGSNA( JOB, HOWMNY, SELECT, N, A, LDA, B, LDB, VL,
+     $                   LDVL, VR, LDVR, S, DIF, MM, M, WORK, LWORK,
+     $                   IWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          HOWMNY, JOB
+      INTEGER            INFO, LDA, LDB, LDVL, LDVR, LWORK, M, MM, N
+*     ..
+*     .. Array Arguments ..
+      LOGICAL            SELECT( * )
+      INTEGER            IWORK( * )
+      DOUBLE PRECISION   DIF( * ), S( * )
+      COMPLEX*16         A( LDA, * ), B( LDB, * ), VL( LDVL, * ),
+     $                   VR( LDVR, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZTGSNA estimates reciprocal condition numbers for specified
+*  eigenvalues and/or eigenvectors of a matrix pair (A, B).
+*
+*  (A, B) must be in generalized Schur canonical form, that is, A and
+*  B are both upper triangular.
+*
+*  Arguments
+*  =========
+*
+*  JOB     (input) CHARACTER*1
+*          Specifies whether condition numbers are required for
+*          eigenvalues (S) or eigenvectors (DIF):
+*          = 'E': for eigenvalues only (S);
+*          = 'V': for eigenvectors only (DIF);
+*          = 'B': for both eigenvalues and eigenvectors (S and DIF).
+*
+*  HOWMNY  (input) CHARACTER*1
+*          = 'A': compute condition numbers for all eigenpairs;
+*          = 'S': compute condition numbers for selected eigenpairs
+*                 specified by the array SELECT.
+*
+*  SELECT  (input) LOGICAL array, dimension (N)
+*          If HOWMNY = 'S', SELECT specifies the eigenpairs for which
+*          condition numbers are required. To select condition numbers
+*          for the corresponding j-th eigenvalue and/or eigenvector,
+*          SELECT(j) must be set to .TRUE..
+*          If HOWMNY = 'A', SELECT is not referenced.
+*
+*  N       (input) INTEGER
+*          The order of the square matrix pair (A, B). N >= 0.
+*
+*  A       (input) COMPLEX*16 array, dimension (LDA,N)
+*          The upper triangular matrix A in the pair (A,B).
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,N).
+*
+*  B       (input) COMPLEX*16 array, dimension (LDB,N)
+*          The upper triangular matrix B in the pair (A, B).
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B. LDB >= max(1,N).
+*
+*  VL      (input) COMPLEX*16 array, dimension (LDVL,M)
+*          IF JOB = 'E' or 'B', VL must contain left eigenvectors of
+*          (A, B), corresponding to the eigenpairs specified by HOWMNY
+*          and SELECT.  The eigenvectors must be stored in consecutive
+*          columns of VL, as returned by ZTGEVC.
+*          If JOB = 'V', VL is not referenced.
+*
+*  LDVL    (input) INTEGER
+*          The leading dimension of the array VL. LDVL >= 1; and
+*          If JOB = 'E' or 'B', LDVL >= N.
+*
+*  VR      (input) COMPLEX*16 array, dimension (LDVR,M)
+*          IF JOB = 'E' or 'B', VR must contain right eigenvectors of
+*          (A, B), corresponding to the eigenpairs specified by HOWMNY
+*          and SELECT.  The eigenvectors must be stored in consecutive
+*          columns of VR, as returned by ZTGEVC.
+*          If JOB = 'V', VR is not referenced.
+*
+*  LDVR    (input) INTEGER
+*          The leading dimension of the array VR. LDVR >= 1;
+*          If JOB = 'E' or 'B', LDVR >= N.
+*
+*  S       (output) DOUBLE PRECISION array, dimension (MM)
+*          If JOB = 'E' or 'B', the reciprocal condition numbers of the
+*          selected eigenvalues, stored in consecutive elements of the
+*          array.
+*          If JOB = 'V', S is not referenced.
+*
+*  DIF     (output) DOUBLE PRECISION array, dimension (MM)
+*          If JOB = 'V' or 'B', the estimated reciprocal condition
+*          numbers of the selected eigenvectors, stored in consecutive
+*          elements of the array.
+*          If the eigenvalues cannot be reordered to compute DIF(j),
+*          DIF(j) is set to 0; this can only occur when the true value
+*          would be very small anyway.
+*          For each eigenvalue/vector specified by SELECT, DIF stores
+*          a Frobenius norm-based estimate of Difl.
+*          If JOB = 'E', DIF is not referenced.
+*
+*  MM      (input) INTEGER
+*          The number of elements in the arrays S and DIF. MM >= M.
+*
+*  M       (output) INTEGER
+*          The number of elements of the arrays S and DIF used to store
+*          the specified condition numbers; for each selected eigenvalue
+*          one element is used. If HOWMNY = 'A', M is set to N.
+*
+*  WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+*          If JOB = 'E', WORK is not referenced.  Otherwise,
+*          on exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK  (input) INTEGER
+*          The dimension of the array WORK. LWORK >= 1.
+*          If JOB = 'V' or 'B', LWORK >= 2*N*N.
+*
+*  IWORK   (workspace) INTEGER array, dimension (N+2)
+*          If JOB = 'E', IWORK is not referenced.
+*
+*  INFO    (output) INTEGER
+*          = 0: Successful exit
+*          < 0: If INFO = -i, the i-th argument had an illegal value
+*
+*  Further Details
+*  ===============
+*
+*  The reciprocal of the condition number of the i-th generalized
+*  eigenvalue w = (a, b) is defined as
+*
+*          S(I) = (|v'Au|**2 + |v'Bu|**2)**(1/2) / (norm(u)*norm(v))
+*
+*  where u and v are the right and left eigenvectors of (A, B)
+*  corresponding to w; |z| denotes the absolute value of the complex
+*  number, and norm(u) denotes the 2-norm of the vector u. The pair
+*  (a, b) corresponds to an eigenvalue w = a/b (= v'Au/v'Bu) of the
+*  matrix pair (A, B). If both a and b equal zero, then (A,B) is
+*  singular and S(I) = -1 is returned.
+*
+*  An approximate error bound on the chordal distance between the i-th
+*  computed generalized eigenvalue w and the corresponding exact
+*  eigenvalue lambda is
+*
+*          chord(w, lambda) <=   EPS * norm(A, B) / S(I),
+*
+*  where EPS is the machine precision.
+*
+*  The reciprocal of the condition number of the right eigenvector u
+*  and left eigenvector v corresponding to the generalized eigenvalue w
+*  is defined as follows. Suppose
+*
+*                   (A, B) = ( a   *  ) ( b  *  )  1
+*                            ( 0  A22 ),( 0 B22 )  n-1
+*                              1  n-1     1 n-1
+*
+*  Then the reciprocal condition number DIF(I) is
+*
+*          Difl[(a, b), (A22, B22)]  = sigma-min( Zl )
+*
+*  where sigma-min(Zl) denotes the smallest singular value of
+*
+*         Zl = [ kron(a, In-1) -kron(1, A22) ]
+*              [ kron(b, In-1) -kron(1, B22) ].
+*
+*  Here In-1 is the identity matrix of size n-1 and X' is the conjugate
+*  transpose of X. kron(X, Y) is the Kronecker product between the
+*  matrices X and Y.
+*
+*  We approximate the smallest singular value of Zl with an upper
+*  bound. This is done by ZLATDF.
+*
+*  An approximate error bound for a computed eigenvector VL(i) or
+*  VR(i) is given by
+*
+*                      EPS * norm(A, B) / DIF(i).
+*
+*  See ref. [2-3] for more details and further references.
+*
+*  Based on contributions by
+*     Bo Kagstrom and Peter Poromaa, Department of Computing Science,
+*     Umea University, S-901 87 Umea, Sweden.
+*
+*  References
+*  ==========
+*
+*  [1] B. Kagstrom; A Direct Method for Reordering Eigenvalues in the
+*      Generalized Real Schur Form of a Regular Matrix Pair (A, B), in
+*      M.S. Moonen et al (eds), Linear Algebra for Large Scale and
+*      Real-Time Applications, Kluwer Academic Publ. 1993, pp 195-218.
+*
+*  [2] B. Kagstrom and P. Poromaa; Computing Eigenspaces with Specified
+*      Eigenvalues of a Regular Matrix Pair (A, B) and Condition
+*      Estimation: Theory, Algorithms and Software, Report
+*      UMINF - 94.04, Department of Computing Science, Umea University,
+*      S-901 87 Umea, Sweden, 1994. Also as LAPACK Working Note 87.
+*      To appear in Numerical Algorithms, 1996.
+*
+*  [3] B. Kagstrom and P. Poromaa, LAPACK-Style Algorithms and Software
+*      for Solving the Generalized Sylvester Equation and Estimating the
+*      Separation between Regular Matrix Pairs, Report UMINF - 93.23,
+*      Department of Computing Science, Umea University, S-901 87 Umea,
+*      Sweden, December 1993, Revised April 1994, Also as LAPACK Working
+*      Note 75.
+*      To appear in ACM Trans. on Math. Software, Vol 22, No 1, 1996.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      INTEGER            IDIFJB
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0, IDIFJB = 3 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY, SOMCON, WANTBH, WANTDF, WANTS
+      INTEGER            I, IERR, IFST, ILST, K, KS, LLWRK, LWMIN, N1,
+     $                   N2
+      DOUBLE PRECISION   BIGNUM, COND, EPS, LNRM, RNRM, SCALE, SMLNUM
+      COMPLEX*16         YHAX, YHBX
+*     ..
+*     .. Local Arrays ..
+      COMPLEX*16         DUMMY( 1 ), DUMMY1( 1 )
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DLAMCH, DLAPY2, DZNRM2
+      COMPLEX*16         ZDOTC
+      EXTERNAL           LSAME, DLAMCH, DLAPY2, DZNRM2, ZDOTC
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZGEMV, ZLACPY, ZTGEXC, ZTGSYL
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DCMPLX, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode and test the input parameters
+*
+      WANTBH = LSAME( JOB, 'B' )
+      WANTS = LSAME( JOB, 'E' ) .OR. WANTBH
+      WANTDF = LSAME( JOB, 'V' ) .OR. WANTBH
+*
+      SOMCON = LSAME( HOWMNY, 'S' )
+*
+      INFO = 0
+      LQUERY = ( LWORK.EQ.-1 )
+*
+      IF( LSAME( JOB, 'V' ) .OR. LSAME( JOB, 'B' ) ) THEN
+         LWMIN = MAX( 1, 2*N*N )
+      ELSE
+         LWMIN = 1
+      END IF
+*
+      IF( .NOT.WANTS .AND. .NOT.WANTDF ) THEN
+         INFO = -1
+      ELSE IF( .NOT.LSAME( HOWMNY, 'A' ) .AND. .NOT.SOMCON ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -6
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -8
+      ELSE IF( WANTS .AND. LDVL.LT.N ) THEN
+         INFO = -10
+      ELSE IF( WANTS .AND. LDVR.LT.N ) THEN
+         INFO = -12
+      ELSE
+*
+*        Set M to the number of eigenpairs for which condition numbers
+*        are required, and test MM.
+*
+         IF( SOMCON ) THEN
+            M = 0
+            DO 10 K = 1, N
+               IF( SELECT( K ) )
+     $            M = M + 1
+   10       CONTINUE
+         ELSE
+            M = N
+         END IF
+*
+         IF( MM.LT.M ) THEN
+            INFO = -15
+         ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN
+            INFO = -18
+         END IF
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         WORK( 1 ) = LWMIN
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZTGSNA', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Get machine constants
+*
+      EPS = DLAMCH( 'P' )
+      SMLNUM = DLAMCH( 'S' ) / EPS
+      BIGNUM = ONE / SMLNUM
+      CALL DLABAD( SMLNUM, BIGNUM )
+      LLWRK = LWORK - 2*N*N
+      KS = 0
+      DO 20 K = 1, N
+*
+*        Determine whether condition numbers are required for the k-th
+*        eigenpair.
+*
+         IF( SOMCON ) THEN
+            IF( .NOT.SELECT( K ) )
+     $         GO TO 20
+         END IF
+*
+         KS = KS + 1
+*
+         IF( WANTS ) THEN
+*
+*           Compute the reciprocal condition number of the k-th
+*           eigenvalue.
+*
+            RNRM = DZNRM2( N, VR( 1, KS ), 1 )
+            LNRM = DZNRM2( N, VL( 1, KS ), 1 )
+            CALL ZGEMV( 'N', N, N, DCMPLX( ONE, ZERO ), A, LDA,
+     $                  VR( 1, KS ), 1, DCMPLX( ZERO, ZERO ), WORK, 1 )
+            YHAX = ZDOTC( N, WORK, 1, VL( 1, KS ), 1 )
+            CALL ZGEMV( 'N', N, N, DCMPLX( ONE, ZERO ), B, LDB,
+     $                  VR( 1, KS ), 1, DCMPLX( ZERO, ZERO ), WORK, 1 )
+            YHBX = ZDOTC( N, WORK, 1, VL( 1, KS ), 1 )
+            COND = DLAPY2( ABS( YHAX ), ABS( YHBX ) )
+            IF( COND.EQ.ZERO ) THEN
+               S( KS ) = -ONE
+            ELSE
+               S( KS ) = COND / ( RNRM*LNRM )
+            END IF
+         END IF
+*
+         IF( WANTDF ) THEN
+            IF( N.EQ.1 ) THEN
+               DIF( KS ) = DLAPY2( ABS( A( 1, 1 ) ), ABS( B( 1, 1 ) ) )
+               GO TO 20
+            END IF
+*
+*           Estimate the reciprocal condition number of the k-th
+*           eigenvectors.
+*
+*           Copy the matrix (A, B) to the array WORK and move the
+*           (k,k)th pair to the (1,1) position.
+*
+            CALL ZLACPY( 'Full', N, N, A, LDA, WORK, N )
+            CALL ZLACPY( 'Full', N, N, B, LDB, WORK( N*N+1 ), N )
+            IFST = K
+            ILST = 1
+*
+            CALL ZTGEXC( .FALSE., .FALSE., N, WORK, N, WORK( N*N+1 ), N,
+     $                   DUMMY, 1, DUMMY1, 1, IFST, ILST, IERR )
+*
+            IF( IERR.GT.0 ) THEN
+*
+*              Ill-conditioned problem - swap rejected.
+*
+               DIF( KS ) = ZERO
+            ELSE
+*
+*              Reordering successful, solve generalized Sylvester
+*              equation for R and L,
+*                         A22 * R - L * A11 = A12
+*                         B22 * R - L * B11 = B12,
+*              and compute estimate of Difl[(A11,B11), (A22, B22)].
+*
+               N1 = 1
+               N2 = N - N1
+               I = N*N + 1
+               CALL ZTGSYL( 'N', IDIFJB, N2, N1, WORK( N*N1+N1+1 ), N,
+     $                      WORK, N, WORK( N1+1 ), N, WORK( N*N1+N1+I ),
+     $                      N, WORK( I ), N, WORK( N1+I ), N, SCALE,
+     $                      DIF( KS ), WORK( N*N*2+1 ), LLWRK, IWORK,
+     $                      IERR )
+            END IF
+         END IF
+*
+   20 CONTINUE
+      WORK( 1 ) = LWMIN
+      RETURN
+*
+*     End of ZTGSNA
+*
+      END
diff --git a/costa/native/external/lapack/ztgsy2.f b/costa/native/external/lapack/ztgsy2.f
new file mode 100644
index 000000000..5ee762bc0
--- /dev/null
+++ b/costa/native/external/lapack/ztgsy2.f
@@ -0,0 +1,357 @@
+      SUBROUTINE ZTGSY2( TRANS, IJOB, M, N, A, LDA, B, LDB, C, LDC, D,
+     $                   LDD, E, LDE, F, LDF, SCALE, RDSUM, RDSCAL,
+     $                   INFO )
+*
+*  -- LAPACK auxiliary routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          TRANS
+      INTEGER            IJOB, INFO, LDA, LDB, LDC, LDD, LDE, LDF, M, N
+      DOUBLE PRECISION   RDSCAL, RDSUM, SCALE
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), B( LDB, * ), C( LDC, * ),
+     $                   D( LDD, * ), E( LDE, * ), F( LDF, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZTGSY2 solves the generalized Sylvester equation
+*
+*              A * R - L * B = scale *   C               (1)
+*              D * R - L * E = scale * F
+*
+*  using Level 1 and 2 BLAS, where R and L are unknown M-by-N matrices,
+*  (A, D), (B, E) and (C, F) are given matrix pairs of size M-by-M,
+*  N-by-N and M-by-N, respectively. A, B, D and E are upper triangular
+*  (i.e., (A,D) and (B,E) in generalized Schur form).
+*
+*  The solution (R, L) overwrites (C, F). 0 <= SCALE <= 1 is an output
+*  scaling factor chosen to avoid overflow.
+*
+*  In matrix notation solving equation (1) corresponds to solve
+*  Zx = scale * b, where Z is defined as
+*
+*         Z = [ kron(In, A)  -kron(B', Im) ]             (2)
+*             [ kron(In, D)  -kron(E', Im) ],
+*
+*  Ik is the identity matrix of size k and X' is the transpose of X.
+*  kron(X, Y) is the Kronecker product between the matrices X and Y.
+*
+*  If TRANS = 'C', y in the conjugate transposed system Z'y = scale*b
+*  is solved for, which is equivalent to solve for R and L in
+*
+*              A' * R  + D' * L   = scale *  C           (3)
+*              R  * B' + L  * E'  = scale * -F
+*
+*  This case is used to compute an estimate of Dif[(A, D), (B, E)] =
+*  = sigma_min(Z) using reverse communicaton with ZLACON.
+*
+*  ZTGSY2 also (IJOB >= 1) contributes to the computation in ZTGSYL
+*  of an upper bound on the separation between to matrix pairs. Then
+*  the input (A, D), (B, E) are sub-pencils of two matrix pairs in
+*  ZTGSYL.
+*
+*  Arguments
+*  =========
+*
+*  TRANS   (input) CHARACTER
+*          = 'N', solve the generalized Sylvester equation (1).
+*          = 'T': solve the 'transposed' system (3).
+*
+*  IJOB    (input) INTEGER
+*          Specifies what kind of functionality to be performed.
+*          =0: solve (1) only.
+*          =1: A contribution from this subsystem to a Frobenius
+*              norm-based estimate of the separation between two matrix
+*              pairs is computed. (look ahead strategy is used).
+*          =2: A contribution from this subsystem to a Frobenius
+*              norm-based estimate of the separation between two matrix
+*              pairs is computed. (DGECON on sub-systems is used.)
+*          Not referenced if TRANS = 'T'.
+*
+*  M       (input) INTEGER
+*          On entry, M specifies the order of A and D, and the row
+*          dimension of C, F, R and L.
+*
+*  N       (input) INTEGER
+*          On entry, N specifies the order of B and E, and the column
+*          dimension of C, F, R and L.
+*
+*  A       (input) COMPLEX*16 array, dimension (LDA, M)
+*          On entry, A contains an upper triangular matrix.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the matrix A. LDA >= max(1, M).
+*
+*  B       (input) COMPLEX*16 array, dimension (LDB, N)
+*          On entry, B contains an upper triangular matrix.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the matrix B. LDB >= max(1, N).
+*
+*  C       (input/ output) COMPLEX*16 array, dimension (LDC, N)
+*          On entry, C contains the right-hand-side of the first matrix
+*          equation in (1).
+*          On exit, if IJOB = 0, C has been overwritten by the solution
+*          R.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the matrix C. LDC >= max(1, M).
+*
+*  D       (input) COMPLEX*16 array, dimension (LDD, M)
+*          On entry, D contains an upper triangular matrix.
+*
+*  LDD     (input) INTEGER
+*          The leading dimension of the matrix D. LDD >= max(1, M).
+*
+*  E       (input) COMPLEX*16 array, dimension (LDE, N)
+*          On entry, E contains an upper triangular matrix.
+*
+*  LDE     (input) INTEGER
+*          The leading dimension of the matrix E. LDE >= max(1, N).
+*
+*  F       (input/ output) COMPLEX*16 array, dimension (LDF, N)
+*          On entry, F contains the right-hand-side of the second matrix
+*          equation in (1).
+*          On exit, if IJOB = 0, F has been overwritten by the solution
+*          L.
+*
+*  LDF     (input) INTEGER
+*          The leading dimension of the matrix F. LDF >= max(1, M).
+*
+*  SCALE   (output) DOUBLE PRECISION
+*          On exit, 0 <= SCALE <= 1. If 0 < SCALE < 1, the solutions
+*          R and L (C and F on entry) will hold the solutions to a
+*          slightly perturbed system but the input matrices A, B, D and
+*          E have not been changed. If SCALE = 0, R and L will hold the
+*          solutions to the homogeneous system with C = F = 0.
+*          Normally, SCALE = 1.
+*
+*  RDSUM   (input/output) DOUBLE PRECISION
+*          On entry, the sum of squares of computed contributions to
+*          the Dif-estimate under computation by ZTGSYL, where the
+*          scaling factor RDSCAL (see below) has been factored out.
+*          On exit, the corresponding sum of squares updated with the
+*          contributions from the current sub-system.
+*          If TRANS = 'T' RDSUM is not touched.
+*          NOTE: RDSUM only makes sense when ZTGSY2 is called by
+*          ZTGSYL.
+*
+*  RDSCAL  (input/output) DOUBLE PRECISION
+*          On entry, scaling factor used to prevent overflow in RDSUM.
+*          On exit, RDSCAL is updated w.r.t. the current contributions
+*          in RDSUM.
+*          If TRANS = 'T', RDSCAL is not touched.
+*          NOTE: RDSCAL only makes sense when ZTGSY2 is called by
+*          ZTGSYL.
+*
+*  INFO    (output) INTEGER
+*          On exit, if INFO is set to
+*            =0: Successful exit
+*            <0: If INFO = -i, input argument number i is illegal.
+*            >0: The matrix pairs (A, D) and (B, E) have common or very
+*                close eigenvalues.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Bo Kagstrom and Peter Poromaa, Department of Computing Science,
+*     Umea University, S-901 87 Umea, Sweden.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      INTEGER            LDZ
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0, LDZ = 2 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOTRAN
+      INTEGER            I, IERR, J, K
+      DOUBLE PRECISION   SCALOC
+      COMPLEX*16         ALPHA
+*     ..
+*     .. Local Arrays ..
+      INTEGER            IPIV( LDZ ), JPIV( LDZ )
+      COMPLEX*16         RHS( LDZ ), Z( LDZ, LDZ )
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZAXPY, ZGESC2, ZGETC2, ZLATDF, ZSCAL
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          DCMPLX, DCONJG, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode and test input parameters
+*
+      INFO = 0
+      IERR = 0
+      NOTRAN = LSAME( TRANS, 'N' )
+      IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN
+         INFO = -1
+      ELSE IF( ( IJOB.LT.0 ) .OR. ( IJOB.GT.2 ) ) THEN
+         INFO = -2
+      ELSE IF( M.LE.0 ) THEN
+         INFO = -3
+      ELSE IF( N.LE.0 ) THEN
+         INFO = -4
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -8
+      ELSE IF( LDC.LT.MAX( 1, M ) ) THEN
+         INFO = -10
+      ELSE IF( LDD.LT.MAX( 1, M ) ) THEN
+         INFO = -12
+      ELSE IF( LDE.LT.MAX( 1, N ) ) THEN
+         INFO = -14
+      ELSE IF( LDF.LT.MAX( 1, M ) ) THEN
+         INFO = -16
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZTGSY2', -INFO )
+         RETURN
+      END IF
+*
+      IF( NOTRAN ) THEN
+*
+*        Solve (I, J) - system
+*           A(I, I) * R(I, J) - L(I, J) * B(J, J) = C(I, J)
+*           D(I, I) * R(I, J) - L(I, J) * E(J, J) = F(I, J)
+*        for I = M, M - 1, ..., 1; J = 1, 2, ..., N
+*
+         SCALE = ONE
+         SCALOC = ONE
+         DO 30 J = 1, N
+            DO 20 I = M, 1, -1
+*
+*              Build 2 by 2 system
+*
+               Z( 1, 1 ) = A( I, I )
+               Z( 2, 1 ) = D( I, I )
+               Z( 1, 2 ) = -B( J, J )
+               Z( 2, 2 ) = -E( J, J )
+*
+*              Set up right hand side(s)
+*
+               RHS( 1 ) = C( I, J )
+               RHS( 2 ) = F( I, J )
+*
+*              Solve Z * x = RHS
+*
+               CALL ZGETC2( LDZ, Z, LDZ, IPIV, JPIV, IERR )
+               IF( IERR.GT.0 )
+     $            INFO = IERR
+               IF( IJOB.EQ.0 ) THEN
+                  CALL ZGESC2( LDZ, Z, LDZ, RHS, IPIV, JPIV, SCALOC )
+                  IF( SCALOC.NE.ONE ) THEN
+                     DO 10 K = 1, N
+                        CALL ZSCAL( M, DCMPLX( SCALOC, ZERO ),
+     $                              C( 1, K ), 1 )
+                        CALL ZSCAL( M, DCMPLX( SCALOC, ZERO ),
+     $                              F( 1, K ), 1 )
+   10                CONTINUE
+                     SCALE = SCALE*SCALOC
+                  END IF
+               ELSE
+                  CALL ZLATDF( IJOB, LDZ, Z, LDZ, RHS, RDSUM, RDSCAL,
+     $                         IPIV, JPIV )
+               END IF
+*
+*              Unpack solution vector(s)
+*
+               C( I, J ) = RHS( 1 )
+               F( I, J ) = RHS( 2 )
+*
+*              Substitute R(I, J) and L(I, J) into remaining equation.
+*
+               IF( I.GT.1 ) THEN
+                  ALPHA = -RHS( 1 )
+                  CALL ZAXPY( I-1, ALPHA, A( 1, I ), 1, C( 1, J ), 1 )
+                  CALL ZAXPY( I-1, ALPHA, D( 1, I ), 1, F( 1, J ), 1 )
+               END IF
+               IF( J.LT.N ) THEN
+                  CALL ZAXPY( N-J, RHS( 2 ), B( J, J+1 ), LDB,
+     $                        C( I, J+1 ), LDC )
+                  CALL ZAXPY( N-J, RHS( 2 ), E( J, J+1 ), LDE,
+     $                        F( I, J+1 ), LDF )
+               END IF
+*
+   20       CONTINUE
+   30    CONTINUE
+      ELSE
+*
+*        Solve transposed (I, J) - system:
+*           A(I, I)' * R(I, J) + D(I, I)' * L(J, J) = C(I, J)
+*           R(I, I) * B(J, J) + L(I, J) * E(J, J)   = -F(I, J)
+*        for I = 1, 2, ..., M, J = N, N - 1, ..., 1
+*
+         SCALE = ONE
+         SCALOC = ONE
+         DO 80 I = 1, M
+            DO 70 J = N, 1, -1
+*
+*              Build 2 by 2 system Z'
+*
+               Z( 1, 1 ) = DCONJG( A( I, I ) )
+               Z( 2, 1 ) = -DCONJG( B( J, J ) )
+               Z( 1, 2 ) = DCONJG( D( I, I ) )
+               Z( 2, 2 ) = -DCONJG( E( J, J ) )
+*
+*
+*              Set up right hand side(s)
+*
+               RHS( 1 ) = C( I, J )
+               RHS( 2 ) = F( I, J )
+*
+*              Solve Z' * x = RHS
+*
+               CALL ZGETC2( LDZ, Z, LDZ, IPIV, JPIV, IERR )
+               IF( IERR.GT.0 )
+     $            INFO = IERR
+               CALL ZGESC2( LDZ, Z, LDZ, RHS, IPIV, JPIV, SCALOC )
+               IF( SCALOC.NE.ONE ) THEN
+                  DO 40 K = 1, N
+                     CALL ZSCAL( M, DCMPLX( SCALOC, ZERO ), C( 1, K ),
+     $                           1 )
+                     CALL ZSCAL( M, DCMPLX( SCALOC, ZERO ), F( 1, K ),
+     $                           1 )
+   40             CONTINUE
+                  SCALE = SCALE*SCALOC
+               END IF
+*
+*              Unpack solution vector(s)
+*
+               C( I, J ) = RHS( 1 )
+               F( I, J ) = RHS( 2 )
+*
+*              Substitute R(I, J) and L(I, J) into remaining equation.
+*
+               DO 50 K = 1, J - 1
+                  F( I, K ) = F( I, K ) + RHS( 1 )*DCONJG( B( K, J ) ) +
+     $                        RHS( 2 )*DCONJG( E( K, J ) )
+   50          CONTINUE
+               DO 60 K = I + 1, M
+                  C( K, J ) = C( K, J ) - DCONJG( A( I, K ) )*RHS( 1 ) -
+     $                        DCONJG( D( I, K ) )*RHS( 2 )
+   60          CONTINUE
+*
+   70       CONTINUE
+   80    CONTINUE
+      END IF
+      RETURN
+*
+*     End of ZTGSY2
+*
+      END
diff --git a/costa/native/external/lapack/ztgsyl.f b/costa/native/external/lapack/ztgsyl.f
new file mode 100644
index 000000000..dfb3de503
--- /dev/null
+++ b/costa/native/external/lapack/ztgsyl.f
@@ -0,0 +1,550 @@
+      SUBROUTINE ZTGSYL( TRANS, IJOB, M, N, A, LDA, B, LDB, C, LDC, D,
+     $                   LDD, E, LDE, F, LDF, SCALE, DIF, WORK, LWORK,
+     $                   IWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          TRANS
+      INTEGER            IJOB, INFO, LDA, LDB, LDC, LDD, LDE, LDF,
+     $                   LWORK, M, N
+      DOUBLE PRECISION   DIF, SCALE
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      COMPLEX*16         A( LDA, * ), B( LDB, * ), C( LDC, * ),
+     $                   D( LDD, * ), E( LDE, * ), F( LDF, * ),
+     $                   WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZTGSYL solves the generalized Sylvester equation:
+*
+*              A * R - L * B = scale * C            (1)
+*              D * R - L * E = scale * F
+*
+*  where R and L are unknown m-by-n matrices, (A, D), (B, E) and
+*  (C, F) are given matrix pairs of size m-by-m, n-by-n and m-by-n,
+*  respectively, with complex entries. A, B, D and E are upper
+*  triangular (i.e., (A,D) and (B,E) in generalized Schur form).
+*
+*  The solution (R, L) overwrites (C, F). 0 <= SCALE <= 1
+*  is an output scaling factor chosen to avoid overflow.
+*
+*  In matrix notation (1) is equivalent to solve Zx = scale*b, where Z
+*  is defined as
+*
+*         Z = [ kron(In, A)  -kron(B', Im) ]        (2)
+*             [ kron(In, D)  -kron(E', Im) ],
+*
+*  Here Ix is the identity matrix of size x and X' is the conjugate
+*  transpose of X. Kron(X, Y) is the Kronecker product between the
+*  matrices X and Y.
+*
+*  If TRANS = 'C', y in the conjugate transposed system Z'*y = scale*b
+*  is solved for, which is equivalent to solve for R and L in
+*
+*              A' * R + D' * L = scale * C           (3)
+*              R * B' + L * E' = scale * -F
+*
+*  This case (TRANS = 'C') is used to compute an one-norm-based estimate
+*  of Dif[(A,D), (B,E)], the separation between the matrix pairs (A,D)
+*  and (B,E), using ZLACON.
+*
+*  If IJOB >= 1, ZTGSYL computes a Frobenius norm-based estimate of
+*  Dif[(A,D),(B,E)]. That is, the reciprocal of a lower bound on the
+*  reciprocal of the smallest singular value of Z.
+*
+*  This is a level-3 BLAS algorithm.
+*
+*  Arguments
+*  =========
+*
+*  TRANS   (input) CHARACTER*1
+*          = 'N': solve the generalized sylvester equation (1).
+*          = 'C': solve the "conjugate transposed" system (3).
+*
+*  IJOB    (input) INTEGER
+*          Specifies what kind of functionality to be performed.
+*          =0: solve (1) only.
+*          =1: The functionality of 0 and 3.
+*          =2: The functionality of 0 and 4.
+*          =3: Only an estimate of Dif[(A,D), (B,E)] is computed.
+*              (look ahead strategy is used).
+*          =4: Only an estimate of Dif[(A,D), (B,E)] is computed.
+*              (ZGECON on sub-systems is used).
+*          Not referenced if TRANS = 'C'.
+*
+*  M       (input) INTEGER
+*          The order of the matrices A and D, and the row dimension of
+*          the matrices C, F, R and L.
+*
+*  N       (input) INTEGER
+*          The order of the matrices B and E, and the column dimension
+*          of the matrices C, F, R and L.
+*
+*  A       (input) COMPLEX*16 array, dimension (LDA, M)
+*          The upper triangular matrix A.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1, M).
+*
+*  B       (input) COMPLEX*16 array, dimension (LDB, N)
+*          The upper triangular matrix B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B. LDB >= max(1, N).
+*
+*  C       (input/output) COMPLEX*16 array, dimension (LDC, N)
+*          On entry, C contains the right-hand-side of the first matrix
+*          equation in (1) or (3).
+*          On exit, if IJOB = 0, 1 or 2, C has been overwritten by
+*          the solution R. If IJOB = 3 or 4 and TRANS = 'N', C holds R,
+*          the solution achieved during the computation of the
+*          Dif-estimate.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C. LDC >= max(1, M).
+*
+*  D       (input) COMPLEX*16 array, dimension (LDD, M)
+*          The upper triangular matrix D.
+*
+*  LDD     (input) INTEGER
+*          The leading dimension of the array D. LDD >= max(1, M).
+*
+*  E       (input) COMPLEX*16 array, dimension (LDE, N)
+*          The upper triangular matrix E.
+*
+*  LDE     (input) INTEGER
+*          The leading dimension of the array E. LDE >= max(1, N).
+*
+*  F       (input/output) COMPLEX*16 array, dimension (LDF, N)
+*          On entry, F contains the right-hand-side of the second matrix
+*          equation in (1) or (3).
+*          On exit, if IJOB = 0, 1 or 2, F has been overwritten by
+*          the solution L. If IJOB = 3 or 4 and TRANS = 'N', F holds L,
+*          the solution achieved during the computation of the
+*          Dif-estimate.
+*
+*  LDF     (input) INTEGER
+*          The leading dimension of the array F. LDF >= max(1, M).
+*
+*  DIF     (output) DOUBLE PRECISION
+*          On exit DIF is the reciprocal of a lower bound of the
+*          reciprocal of the Dif-function, i.e. DIF is an upper bound of
+*          Dif[(A,D), (B,E)] = sigma-min(Z), where Z as in (2).
+*          IF IJOB = 0 or TRANS = 'C', DIF is not referenced.
+*
+*  SCALE   (output) DOUBLE PRECISION
+*          On exit SCALE is the scaling factor in (1) or (3).
+*          If 0 < SCALE < 1, C and F hold the solutions R and L, resp.,
+*          to a slightly perturbed system but the input matrices A, B,
+*          D and E have not been changed. If SCALE = 0, R and L will
+*          hold the solutions to the homogenious system with C = F = 0.
+*
+*  WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+*          IF IJOB = 0, WORK is not referenced.  Otherwise,
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK. LWORK > = 1.
+*          If IJOB = 1 or 2 and TRANS = 'N', LWORK >= 2*M*N.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  IWORK   (workspace) INTEGER array, dimension (M+N+2)
+*          If IJOB = 0, IWORK is not referenced.
+*
+*  INFO    (output) INTEGER
+*            =0: successful exit
+*            <0: If INFO = -i, the i-th argument had an illegal value.
+*            >0: (A, D) and (B, E) have common or very close
+*                eigenvalues.
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*     Bo Kagstrom and Peter Poromaa, Department of Computing Science,
+*     Umea University, S-901 87 Umea, Sweden.
+*
+*  [1] B. Kagstrom and P. Poromaa, LAPACK-Style Algorithms and Software
+*      for Solving the Generalized Sylvester Equation and Estimating the
+*      Separation between Regular Matrix Pairs, Report UMINF - 93.23,
+*      Department of Computing Science, Umea University, S-901 87 Umea,
+*      Sweden, December 1993, Revised April 1994, Also as LAPACK Working
+*      Note 75.  To appear in ACM Trans. on Math. Software, Vol 22,
+*      No 1, 1996.
+*
+*  [2] B. Kagstrom, A Perturbation Analysis of the Generalized Sylvester
+*      Equation (AR - LB, DR - LE ) = (C, F), SIAM J. Matrix Anal.
+*      Appl., 15(4):1045-1060, 1994.
+*
+*  [3] B. Kagstrom and L. Westin, Generalized Schur Methods with
+*      Condition Estimators for Solving the Generalized Sylvester
+*      Equation, IEEE Transactions on Automatic Control, Vol. 34, No. 7,
+*      July 1989, pp 745-751.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY, NOTRAN
+      INTEGER            I, IE, IFUNC, IROUND, IS, ISOLVE, J, JE, JS, K,
+     $                   LINFO, LWMIN, MB, NB, P, PQ, Q
+      DOUBLE PRECISION   DSCALE, DSUM, SCALE2, SCALOC
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           LSAME, ILAENV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZCOPY, ZGEMM, ZLACPY, ZSCAL, ZTGSY2
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          DBLE, DCMPLX, MAX, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode and test input parameters
+*
+      INFO = 0
+      NOTRAN = LSAME( TRANS, 'N' )
+      LQUERY = ( LWORK.EQ.-1 )
+*
+      IF( ( IJOB.EQ.1 .OR. IJOB.EQ.2 ) .AND. NOTRAN ) THEN
+         LWMIN = MAX( 1, 2*M*N )
+      ELSE
+         LWMIN = 1
+      END IF
+*
+      IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN
+         INFO = -1
+      ELSE IF( ( IJOB.LT.0 ) .OR. ( IJOB.GT.4 ) ) THEN
+         INFO = -2
+      ELSE IF( M.LE.0 ) THEN
+         INFO = -3
+      ELSE IF( N.LE.0 ) THEN
+         INFO = -4
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -6
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -8
+      ELSE IF( LDC.LT.MAX( 1, M ) ) THEN
+         INFO = -10
+      ELSE IF( LDD.LT.MAX( 1, M ) ) THEN
+         INFO = -12
+      ELSE IF( LDE.LT.MAX( 1, N ) ) THEN
+         INFO = -14
+      ELSE IF( LDF.LT.MAX( 1, M ) ) THEN
+         INFO = -16
+      ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -20
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         WORK( 1 ) = LWMIN
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZTGSYL', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Determine  optimal block sizes MB and NB
+*
+      MB = ILAENV( 2, 'ZTGSYL', TRANS, M, N, -1, -1 )
+      NB = ILAENV( 5, 'ZTGSYL', TRANS, M, N, -1, -1 )
+*
+      ISOLVE = 1
+      IFUNC = 0
+      IF( IJOB.GE.3 .AND. NOTRAN ) THEN
+         IFUNC = IJOB - 2
+         DO 10 J = 1, N
+            CALL ZCOPY( M, DCMPLX( ZERO, ZERO ), 0, C( 1, J ), 1 )
+            CALL ZCOPY( M, DCMPLX( ZERO, ZERO ), 0, F( 1, J ), 1 )
+   10    CONTINUE
+      ELSE IF( IJOB.GE.1 .AND. NOTRAN ) THEN
+         ISOLVE = 2
+      END IF
+*
+      IF( ( MB.LE.1 .AND. NB.LE.1 ) .OR. ( MB.GE.M .AND. NB.GE.N ) )
+     $     THEN
+*
+*        Use unblocked Level 2 solver
+*
+         DO 30 IROUND = 1, ISOLVE
+*
+            SCALE = ONE
+            DSCALE = ZERO
+            DSUM = ONE
+            PQ = M*N
+            CALL ZTGSY2( TRANS, IFUNC, M, N, A, LDA, B, LDB, C, LDC, D,
+     $                   LDD, E, LDE, F, LDF, SCALE, DSUM, DSCALE,
+     $                   INFO )
+            IF( DSCALE.NE.ZERO ) THEN
+               IF( IJOB.EQ.1 .OR. IJOB.EQ.3 ) THEN
+                  DIF = SQRT( DBLE( 2*M*N ) ) / ( DSCALE*SQRT( DSUM ) )
+               ELSE
+                  DIF = SQRT( DBLE( PQ ) ) / ( DSCALE*SQRT( DSUM ) )
+               END IF
+            END IF
+            IF( ISOLVE.EQ.2 .AND. IROUND.EQ.1 ) THEN
+               IFUNC = IJOB
+               SCALE2 = SCALE
+               CALL ZLACPY( 'F', M, N, C, LDC, WORK, M )
+               CALL ZLACPY( 'F', M, N, F, LDF, WORK( M*N+1 ), M )
+               DO 20 J = 1, N
+                  CALL ZCOPY( M, DCMPLX( ZERO, ZERO ), 0, C( 1, J ), 1 )
+                  CALL ZCOPY( M, DCMPLX( ZERO, ZERO ), 0, F( 1, J ), 1 )
+   20          CONTINUE
+            ELSE IF( ISOLVE.EQ.2 .AND. IROUND.EQ.2 ) THEN
+               CALL ZLACPY( 'F', M, N, WORK, M, C, LDC )
+               CALL ZLACPY( 'F', M, N, WORK( M*N+1 ), M, F, LDF )
+               SCALE = SCALE2
+            END IF
+   30    CONTINUE
+*
+         RETURN
+*
+      END IF
+*
+*     Determine block structure of A
+*
+      P = 0
+      I = 1
+   40 CONTINUE
+      IF( I.GT.M )
+     $   GO TO 50
+      P = P + 1
+      IWORK( P ) = I
+      I = I + MB
+      IF( I.GE.M )
+     $   GO TO 50
+      GO TO 40
+   50 CONTINUE
+      IWORK( P+1 ) = M + 1
+      IF( IWORK( P ).EQ.IWORK( P+1 ) )
+     $   P = P - 1
+*
+*     Determine block structure of B
+*
+      Q = P + 1
+      J = 1
+   60 CONTINUE
+      IF( J.GT.N )
+     $   GO TO 70
+*
+      Q = Q + 1
+      IWORK( Q ) = J
+      J = J + NB
+      IF( J.GE.N )
+     $   GO TO 70
+      GO TO 60
+*
+   70 CONTINUE
+      IWORK( Q+1 ) = N + 1
+      IF( IWORK( Q ).EQ.IWORK( Q+1 ) )
+     $   Q = Q - 1
+*
+      IF( NOTRAN ) THEN
+         DO 150 IROUND = 1, ISOLVE
+*
+*           Solve (I, J) - subsystem
+*               A(I, I) * R(I, J) - L(I, J) * B(J, J) = C(I, J)
+*               D(I, I) * R(I, J) - L(I, J) * E(J, J) = F(I, J)
+*           for I = P, P - 1, ..., 1; J = 1, 2, ..., Q
+*
+            PQ = 0
+            SCALE = ONE
+            DSCALE = ZERO
+            DSUM = ONE
+            DO 130 J = P + 2, Q
+               JS = IWORK( J )
+               JE = IWORK( J+1 ) - 1
+               NB = JE - JS + 1
+               DO 120 I = P, 1, -1
+                  IS = IWORK( I )
+                  IE = IWORK( I+1 ) - 1
+                  MB = IE - IS + 1
+                  CALL ZTGSY2( TRANS, IFUNC, MB, NB, A( IS, IS ), LDA,
+     $                         B( JS, JS ), LDB, C( IS, JS ), LDC,
+     $                         D( IS, IS ), LDD, E( JS, JS ), LDE,
+     $                         F( IS, JS ), LDF, SCALOC, DSUM, DSCALE,
+     $                         LINFO )
+                  IF( LINFO.GT.0 )
+     $               INFO = LINFO
+                  PQ = PQ + MB*NB
+                  IF( SCALOC.NE.ONE ) THEN
+                     DO 80 K = 1, JS - 1
+                        CALL ZSCAL( M, DCMPLX( SCALOC, ZERO ),
+     $                              C( 1, K ), 1 )
+                        CALL ZSCAL( M, DCMPLX( SCALOC, ZERO ),
+     $                              F( 1, K ), 1 )
+   80                CONTINUE
+                     DO 90 K = JS, JE
+                        CALL ZSCAL( IS-1, DCMPLX( SCALOC, ZERO ),
+     $                              C( 1, K ), 1 )
+                        CALL ZSCAL( IS-1, DCMPLX( SCALOC, ZERO ),
+     $                              F( 1, K ), 1 )
+   90                CONTINUE
+                     DO 100 K = JS, JE
+                        CALL ZSCAL( M-IE, DCMPLX( SCALOC, ZERO ),
+     $                              C( IE+1, K ), 1 )
+                        CALL ZSCAL( M-IE, DCMPLX( SCALOC, ZERO ),
+     $                              F( IE+1, K ), 1 )
+  100                CONTINUE
+                     DO 110 K = JE + 1, N
+                        CALL ZSCAL( M, DCMPLX( SCALOC, ZERO ),
+     $                              C( 1, K ), 1 )
+                        CALL ZSCAL( M, DCMPLX( SCALOC, ZERO ),
+     $                              F( 1, K ), 1 )
+  110                CONTINUE
+                     SCALE = SCALE*SCALOC
+                  END IF
+*
+*                 Substitute R(I,J) and L(I,J) into remaining equation.
+*
+                  IF( I.GT.1 ) THEN
+                     CALL ZGEMM( 'N', 'N', IS-1, NB, MB,
+     $                           DCMPLX( -ONE, ZERO ), A( 1, IS ), LDA,
+     $                           C( IS, JS ), LDC, DCMPLX( ONE, ZERO ),
+     $                           C( 1, JS ), LDC )
+                     CALL ZGEMM( 'N', 'N', IS-1, NB, MB,
+     $                           DCMPLX( -ONE, ZERO ), D( 1, IS ), LDD,
+     $                           C( IS, JS ), LDC, DCMPLX( ONE, ZERO ),
+     $                           F( 1, JS ), LDF )
+                  END IF
+                  IF( J.LT.Q ) THEN
+                     CALL ZGEMM( 'N', 'N', MB, N-JE, NB,
+     $                           DCMPLX( ONE, ZERO ), F( IS, JS ), LDF,
+     $                           B( JS, JE+1 ), LDB,
+     $                           DCMPLX( ONE, ZERO ), C( IS, JE+1 ),
+     $                           LDC )
+                     CALL ZGEMM( 'N', 'N', MB, N-JE, NB,
+     $                           DCMPLX( ONE, ZERO ), F( IS, JS ), LDF,
+     $                           E( JS, JE+1 ), LDE,
+     $                           DCMPLX( ONE, ZERO ), F( IS, JE+1 ),
+     $                           LDF )
+                  END IF
+  120          CONTINUE
+  130       CONTINUE
+            IF( DSCALE.NE.ZERO ) THEN
+               IF( IJOB.EQ.1 .OR. IJOB.EQ.3 ) THEN
+                  DIF = SQRT( DBLE( 2*M*N ) ) / ( DSCALE*SQRT( DSUM ) )
+               ELSE
+                  DIF = SQRT( DBLE( PQ ) ) / ( DSCALE*SQRT( DSUM ) )
+               END IF
+            END IF
+            IF( ISOLVE.EQ.2 .AND. IROUND.EQ.1 ) THEN
+               IFUNC = IJOB
+               SCALE2 = SCALE
+               CALL ZLACPY( 'F', M, N, C, LDC, WORK, M )
+               CALL ZLACPY( 'F', M, N, F, LDF, WORK( M*N+1 ), M )
+               DO 140 J = 1, N
+                  CALL ZCOPY( M, DCMPLX( ZERO, ZERO ), 0, C( 1, J ), 1 )
+                  CALL ZCOPY( M, DCMPLX( ZERO, ZERO ), 0, F( 1, J ), 1 )
+  140          CONTINUE
+            ELSE IF( ISOLVE.EQ.2 .AND. IROUND.EQ.2 ) THEN
+               CALL ZLACPY( 'F', M, N, WORK, M, C, LDC )
+               CALL ZLACPY( 'F', M, N, WORK( M*N+1 ), M, F, LDF )
+               SCALE = SCALE2
+            END IF
+  150    CONTINUE
+      ELSE
+*
+*        Solve transposed (I, J)-subsystem
+*            A(I, I)' * R(I, J) + D(I, I)' * L(I, J) = C(I, J)
+*            R(I, J) * B(J, J)  + L(I, J) * E(J, J) = -F(I, J)
+*        for I = 1,2,..., P; J = Q, Q-1,..., 1
+*
+         SCALE = ONE
+         DO 210 I = 1, P
+            IS = IWORK( I )
+            IE = IWORK( I+1 ) - 1
+            MB = IE - IS + 1
+            DO 200 J = Q, P + 2, -1
+               JS = IWORK( J )
+               JE = IWORK( J+1 ) - 1
+               NB = JE - JS + 1
+               CALL ZTGSY2( TRANS, IFUNC, MB, NB, A( IS, IS ), LDA,
+     $                      B( JS, JS ), LDB, C( IS, JS ), LDC,
+     $                      D( IS, IS ), LDD, E( JS, JS ), LDE,
+     $                      F( IS, JS ), LDF, SCALOC, DSUM, DSCALE,
+     $                      LINFO )
+               IF( LINFO.GT.0 )
+     $            INFO = LINFO
+               IF( SCALOC.NE.ONE ) THEN
+                  DO 160 K = 1, JS - 1
+                     CALL ZSCAL( M, DCMPLX( SCALOC, ZERO ), C( 1, K ),
+     $                           1 )
+                     CALL ZSCAL( M, DCMPLX( SCALOC, ZERO ), F( 1, K ),
+     $                           1 )
+  160             CONTINUE
+                  DO 170 K = JS, JE
+                     CALL ZSCAL( IS-1, DCMPLX( SCALOC, ZERO ),
+     $                           C( 1, K ), 1 )
+                     CALL ZSCAL( IS-1, DCMPLX( SCALOC, ZERO ),
+     $                           F( 1, K ), 1 )
+  170             CONTINUE
+                  DO 180 K = JS, JE
+                     CALL ZSCAL( M-IE, DCMPLX( SCALOC, ZERO ),
+     $                           C( IE+1, K ), 1 )
+                     CALL ZSCAL( M-IE, DCMPLX( SCALOC, ZERO ),
+     $                           F( IE+1, K ), 1 )
+  180             CONTINUE
+                  DO 190 K = JE + 1, N
+                     CALL ZSCAL( M, DCMPLX( SCALOC, ZERO ), C( 1, K ),
+     $                           1 )
+                     CALL ZSCAL( M, DCMPLX( SCALOC, ZERO ), F( 1, K ),
+     $                           1 )
+  190             CONTINUE
+                  SCALE = SCALE*SCALOC
+               END IF
+*
+*              Substitute R(I,J) and L(I,J) into remaining equation.
+*
+               IF( J.GT.P+2 ) THEN
+                  CALL ZGEMM( 'N', 'C', MB, JS-1, NB,
+     $                        DCMPLX( ONE, ZERO ), C( IS, JS ), LDC,
+     $                        B( 1, JS ), LDB, DCMPLX( ONE, ZERO ),
+     $                        F( IS, 1 ), LDF )
+                  CALL ZGEMM( 'N', 'C', MB, JS-1, NB,
+     $                        DCMPLX( ONE, ZERO ), F( IS, JS ), LDF,
+     $                        E( 1, JS ), LDE, DCMPLX( ONE, ZERO ),
+     $                        F( IS, 1 ), LDF )
+               END IF
+               IF( I.LT.P ) THEN
+                  CALL ZGEMM( 'C', 'N', M-IE, NB, MB,
+     $                        DCMPLX( -ONE, ZERO ), A( IS, IE+1 ), LDA,
+     $                        C( IS, JS ), LDC, DCMPLX( ONE, ZERO ),
+     $                        C( IE+1, JS ), LDC )
+                  CALL ZGEMM( 'C', 'N', M-IE, NB, MB,
+     $                        DCMPLX( -ONE, ZERO ), D( IS, IE+1 ), LDD,
+     $                        F( IS, JS ), LDF, DCMPLX( ONE, ZERO ),
+     $                        C( IE+1, JS ), LDC )
+               END IF
+  200       CONTINUE
+  210    CONTINUE
+      END IF
+*
+      WORK( 1 ) = LWMIN
+*
+      RETURN
+*
+*     End of ZTGSYL
+*
+      END
diff --git a/costa/native/external/lapack/ztpcon.f b/costa/native/external/lapack/ztpcon.f
new file mode 100644
index 000000000..a55d87011
--- /dev/null
+++ b/costa/native/external/lapack/ztpcon.f
@@ -0,0 +1,194 @@
+      SUBROUTINE ZTPCON( NORM, UPLO, DIAG, N, AP, RCOND, WORK, RWORK,
+     $                   INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIAG, NORM, UPLO
+      INTEGER            INFO, N
+      DOUBLE PRECISION   RCOND
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   RWORK( * )
+      COMPLEX*16         AP( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZTPCON estimates the reciprocal of the condition number of a packed
+*  triangular matrix A, in either the 1-norm or the infinity-norm.
+*
+*  The norm of A is computed and an estimate is obtained for
+*  norm(inv(A)), then the reciprocal of the condition number is
+*  computed as
+*     RCOND = 1 / ( norm(A) * norm(inv(A)) ).
+*
+*  Arguments
+*  =========
+*
+*  NORM    (input) CHARACTER*1
+*          Specifies whether the 1-norm condition number or the
+*          infinity-norm condition number is required:
+*          = '1' or 'O':  1-norm;
+*          = 'I':         Infinity-norm.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  A is upper triangular;
+*          = 'L':  A is lower triangular.
+*
+*  DIAG    (input) CHARACTER*1
+*          = 'N':  A is non-unit triangular;
+*          = 'U':  A is unit triangular.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  AP      (input) COMPLEX*16 array, dimension (N*(N+1)/2)
+*          The upper or lower triangular matrix A, packed columnwise in
+*          a linear array.  The j-th column of A is stored in the array
+*          AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n.
+*          If DIAG = 'U', the diagonal elements of A are not referenced
+*          and are assumed to be 1.
+*
+*  RCOND   (output) DOUBLE PRECISION
+*          The reciprocal of the condition number of the matrix A,
+*          computed as RCOND = 1/(norm(A) * norm(inv(A))).
+*
+*  WORK    (workspace) COMPLEX*16 array, dimension (2*N)
+*
+*  RWORK   (workspace) DOUBLE PRECISION array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOUNIT, ONENRM, UPPER
+      CHARACTER          NORMIN
+      INTEGER            IX, KASE, KASE1
+      DOUBLE PRECISION   AINVNM, ANORM, SCALE, SMLNUM, XNORM
+      COMPLEX*16         ZDUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            IZAMAX
+      DOUBLE PRECISION   DLAMCH, ZLANTP
+      EXTERNAL           LSAME, IZAMAX, DLAMCH, ZLANTP
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZDRSCL, ZLACON, ZLATPS
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, DIMAG, MAX
+*     ..
+*     .. Statement Functions ..
+      DOUBLE PRECISION   CABS1
+*     ..
+*     .. Statement Function definitions ..
+      CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      ONENRM = NORM.EQ.'1' .OR. LSAME( NORM, 'O' )
+      NOUNIT = LSAME( DIAG, 'N' )
+*
+      IF( .NOT.ONENRM .AND. .NOT.LSAME( NORM, 'I' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZTPCON', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 ) THEN
+         RCOND = ONE
+         RETURN
+      END IF
+*
+      RCOND = ZERO
+      SMLNUM = DLAMCH( 'Safe minimum' )*DBLE( MAX( 1, N ) )
+*
+*     Compute the norm of the triangular matrix A.
+*
+      ANORM = ZLANTP( NORM, UPLO, DIAG, N, AP, RWORK )
+*
+*     Continue only if ANORM > 0.
+*
+      IF( ANORM.GT.ZERO ) THEN
+*
+*        Estimate the norm of the inverse of A.
+*
+         AINVNM = ZERO
+         NORMIN = 'N'
+         IF( ONENRM ) THEN
+            KASE1 = 1
+         ELSE
+            KASE1 = 2
+         END IF
+         KASE = 0
+   10    CONTINUE
+         CALL ZLACON( N, WORK( N+1 ), WORK, AINVNM, KASE )
+         IF( KASE.NE.0 ) THEN
+            IF( KASE.EQ.KASE1 ) THEN
+*
+*              Multiply by inv(A).
+*
+               CALL ZLATPS( UPLO, 'No transpose', DIAG, NORMIN, N, AP,
+     $                      WORK, SCALE, RWORK, INFO )
+            ELSE
+*
+*              Multiply by inv(A').
+*
+               CALL ZLATPS( UPLO, 'Conjugate transpose', DIAG, NORMIN,
+     $                      N, AP, WORK, SCALE, RWORK, INFO )
+            END IF
+            NORMIN = 'Y'
+*
+*           Multiply by 1/SCALE if doing so will not cause overflow.
+*
+            IF( SCALE.NE.ONE ) THEN
+               IX = IZAMAX( N, WORK, 1 )
+               XNORM = CABS1( WORK( IX ) )
+               IF( SCALE.LT.XNORM*SMLNUM .OR. SCALE.EQ.ZERO )
+     $            GO TO 20
+               CALL ZDRSCL( N, SCALE, WORK, 1 )
+            END IF
+            GO TO 10
+         END IF
+*
+*        Compute the estimate of the reciprocal condition number.
+*
+         IF( AINVNM.NE.ZERO )
+     $      RCOND = ( ONE / ANORM ) / AINVNM
+      END IF
+*
+   20 CONTINUE
+      RETURN
+*
+*     End of ZTPCON
+*
+      END
diff --git a/costa/native/external/lapack/ztprfs.f b/costa/native/external/lapack/ztprfs.f
new file mode 100644
index 000000000..b2f62a5a0
--- /dev/null
+++ b/costa/native/external/lapack/ztprfs.f
@@ -0,0 +1,387 @@
+      SUBROUTINE ZTPRFS( UPLO, TRANS, DIAG, N, NRHS, AP, B, LDB, X, LDX,
+     $                   FERR, BERR, WORK, RWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIAG, TRANS, UPLO
+      INTEGER            INFO, LDB, LDX, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   BERR( * ), FERR( * ), RWORK( * )
+      COMPLEX*16         AP( * ), B( LDB, * ), WORK( * ), X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZTPRFS provides error bounds and backward error estimates for the
+*  solution to a system of linear equations with a triangular packed
+*  coefficient matrix.
+*
+*  The solution matrix X must be computed by ZTPTRS or some other
+*  means before entering this routine.  ZTPRFS does not do iterative
+*  refinement because doing so cannot improve the backward error.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  A is upper triangular;
+*          = 'L':  A is lower triangular.
+*
+*  TRANS   (input) CHARACTER*1
+*          Specifies the form of the system of equations:
+*          = 'N':  A * X = B     (No transpose)
+*          = 'T':  A**T * X = B  (Transpose)
+*          = 'C':  A**H * X = B  (Conjugate transpose)
+*
+*  DIAG    (input) CHARACTER*1
+*          = 'N':  A is non-unit triangular;
+*          = 'U':  A is unit triangular.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrices B and X.  NRHS >= 0.
+*
+*  AP      (input) COMPLEX*16 array, dimension (N*(N+1)/2)
+*          The upper or lower triangular matrix A, packed columnwise in
+*          a linear array.  The j-th column of A is stored in the array
+*          AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2n-j)/2) = A(i,j) for j<=i<=n.
+*          If DIAG = 'U', the diagonal elements of A are not referenced
+*          and are assumed to be 1.
+*
+*  B       (input) COMPLEX*16 array, dimension (LDB,NRHS)
+*          The right hand side matrix B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  X       (input) COMPLEX*16 array, dimension (LDX,NRHS)
+*          The solution matrix X.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(1,N).
+*
+*  FERR    (output) DOUBLE PRECISION array, dimension (NRHS)
+*          The estimated forward error bound for each solution vector
+*          X(j) (the j-th column of the solution matrix X).
+*          If XTRUE is the true solution corresponding to X(j), FERR(j)
+*          is an estimated upper bound for the magnitude of the largest
+*          element in (X(j) - XTRUE) divided by the magnitude of the
+*          largest element in X(j).  The estimate is as reliable as
+*          the estimate for RCOND, and is almost always a slight
+*          overestimate of the true error.
+*
+*  BERR    (output) DOUBLE PRECISION array, dimension (NRHS)
+*          The componentwise relative backward error of each solution
+*          vector X(j) (i.e., the smallest relative change in
+*          any element of A or B that makes X(j) an exact solution).
+*
+*  WORK    (workspace) COMPLEX*16 array, dimension (2*N)
+*
+*  RWORK   (workspace) DOUBLE PRECISION array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO
+      PARAMETER          ( ZERO = 0.0D+0 )
+      COMPLEX*16         ONE
+      PARAMETER          ( ONE = ( 1.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOTRAN, NOUNIT, UPPER
+      CHARACTER          TRANSN, TRANST
+      INTEGER            I, J, K, KASE, KC, NZ
+      DOUBLE PRECISION   EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK
+      COMPLEX*16         ZDUM
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZAXPY, ZCOPY, ZLACON, ZTPMV, ZTPSV
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, DIMAG, MAX
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DLAMCH
+      EXTERNAL           LSAME, DLAMCH
+*     ..
+*     .. Statement Functions ..
+      DOUBLE PRECISION   CABS1
+*     ..
+*     .. Statement Function definitions ..
+      CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      NOTRAN = LSAME( TRANS, 'N' )
+      NOUNIT = LSAME( DIAG, 'N' )
+*
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT.
+     $         LSAME( TRANS, 'C' ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -8
+      ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+         INFO = -10
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZTPRFS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN
+         DO 10 J = 1, NRHS
+            FERR( J ) = ZERO
+            BERR( J ) = ZERO
+   10    CONTINUE
+         RETURN
+      END IF
+*
+      IF( NOTRAN ) THEN
+         TRANSN = 'N'
+         TRANST = 'C'
+      ELSE
+         TRANSN = 'C'
+         TRANST = 'N'
+      END IF
+*
+*     NZ = maximum number of nonzero elements in each row of A, plus 1
+*
+      NZ = N + 1
+      EPS = DLAMCH( 'Epsilon' )
+      SAFMIN = DLAMCH( 'Safe minimum' )
+      SAFE1 = NZ*SAFMIN
+      SAFE2 = SAFE1 / EPS
+*
+*     Do for each right hand side
+*
+      DO 250 J = 1, NRHS
+*
+*        Compute residual R = B - op(A) * X,
+*        where op(A) = A, A**T, or A**H, depending on TRANS.
+*
+         CALL ZCOPY( N, X( 1, J ), 1, WORK, 1 )
+         CALL ZTPMV( UPLO, TRANS, DIAG, N, AP, WORK, 1 )
+         CALL ZAXPY( N, -ONE, B( 1, J ), 1, WORK, 1 )
+*
+*        Compute componentwise relative backward error from formula
+*
+*        max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) )
+*
+*        where abs(Z) is the componentwise absolute value of the matrix
+*        or vector Z.  If the i-th component of the denominator is less
+*        than SAFE2, then SAFE1 is added to the i-th components of the
+*        numerator and denominator before dividing.
+*
+         DO 20 I = 1, N
+            RWORK( I ) = CABS1( B( I, J ) )
+   20    CONTINUE
+*
+         IF( NOTRAN ) THEN
+*
+*           Compute abs(A)*abs(X) + abs(B).
+*
+            IF( UPPER ) THEN
+               KC = 1
+               IF( NOUNIT ) THEN
+                  DO 40 K = 1, N
+                     XK = CABS1( X( K, J ) )
+                     DO 30 I = 1, K
+                        RWORK( I ) = RWORK( I ) +
+     $                               CABS1( AP( KC+I-1 ) )*XK
+   30                CONTINUE
+                     KC = KC + K
+   40             CONTINUE
+               ELSE
+                  DO 60 K = 1, N
+                     XK = CABS1( X( K, J ) )
+                     DO 50 I = 1, K - 1
+                        RWORK( I ) = RWORK( I ) +
+     $                               CABS1( AP( KC+I-1 ) )*XK
+   50                CONTINUE
+                     RWORK( K ) = RWORK( K ) + XK
+                     KC = KC + K
+   60             CONTINUE
+               END IF
+            ELSE
+               KC = 1
+               IF( NOUNIT ) THEN
+                  DO 80 K = 1, N
+                     XK = CABS1( X( K, J ) )
+                     DO 70 I = K, N
+                        RWORK( I ) = RWORK( I ) +
+     $                               CABS1( AP( KC+I-K ) )*XK
+   70                CONTINUE
+                     KC = KC + N - K + 1
+   80             CONTINUE
+               ELSE
+                  DO 100 K = 1, N
+                     XK = CABS1( X( K, J ) )
+                     DO 90 I = K + 1, N
+                        RWORK( I ) = RWORK( I ) +
+     $                               CABS1( AP( KC+I-K ) )*XK
+   90                CONTINUE
+                     RWORK( K ) = RWORK( K ) + XK
+                     KC = KC + N - K + 1
+  100             CONTINUE
+               END IF
+            END IF
+         ELSE
+*
+*           Compute abs(A**H)*abs(X) + abs(B).
+*
+            IF( UPPER ) THEN
+               KC = 1
+               IF( NOUNIT ) THEN
+                  DO 120 K = 1, N
+                     S = ZERO
+                     DO 110 I = 1, K
+                        S = S + CABS1( AP( KC+I-1 ) )*CABS1( X( I, J ) )
+  110                CONTINUE
+                     RWORK( K ) = RWORK( K ) + S
+                     KC = KC + K
+  120             CONTINUE
+               ELSE
+                  DO 140 K = 1, N
+                     S = CABS1( X( K, J ) )
+                     DO 130 I = 1, K - 1
+                        S = S + CABS1( AP( KC+I-1 ) )*CABS1( X( I, J ) )
+  130                CONTINUE
+                     RWORK( K ) = RWORK( K ) + S
+                     KC = KC + K
+  140             CONTINUE
+               END IF
+            ELSE
+               KC = 1
+               IF( NOUNIT ) THEN
+                  DO 160 K = 1, N
+                     S = ZERO
+                     DO 150 I = K, N
+                        S = S + CABS1( AP( KC+I-K ) )*CABS1( X( I, J ) )
+  150                CONTINUE
+                     RWORK( K ) = RWORK( K ) + S
+                     KC = KC + N - K + 1
+  160             CONTINUE
+               ELSE
+                  DO 180 K = 1, N
+                     S = CABS1( X( K, J ) )
+                     DO 170 I = K + 1, N
+                        S = S + CABS1( AP( KC+I-K ) )*CABS1( X( I, J ) )
+  170                CONTINUE
+                     RWORK( K ) = RWORK( K ) + S
+                     KC = KC + N - K + 1
+  180             CONTINUE
+               END IF
+            END IF
+         END IF
+         S = ZERO
+         DO 190 I = 1, N
+            IF( RWORK( I ).GT.SAFE2 ) THEN
+               S = MAX( S, CABS1( WORK( I ) ) / RWORK( I ) )
+            ELSE
+               S = MAX( S, ( CABS1( WORK( I ) )+SAFE1 ) /
+     $             ( RWORK( I )+SAFE1 ) )
+            END IF
+  190    CONTINUE
+         BERR( J ) = S
+*
+*        Bound error from formula
+*
+*        norm(X - XTRUE) / norm(X) .le. FERR =
+*        norm( abs(inv(op(A)))*
+*           ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X)
+*
+*        where
+*          norm(Z) is the magnitude of the largest component of Z
+*          inv(op(A)) is the inverse of op(A)
+*          abs(Z) is the componentwise absolute value of the matrix or
+*             vector Z
+*          NZ is the maximum number of nonzeros in any row of A, plus 1
+*          EPS is machine epsilon
+*
+*        The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B))
+*        is incremented by SAFE1 if the i-th component of
+*        abs(op(A))*abs(X) + abs(B) is less than SAFE2.
+*
+*        Use ZLACON to estimate the infinity-norm of the matrix
+*           inv(op(A)) * diag(W),
+*        where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) )))
+*
+         DO 200 I = 1, N
+            IF( RWORK( I ).GT.SAFE2 ) THEN
+               RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I )
+            ELSE
+               RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) +
+     $                      SAFE1
+            END IF
+  200    CONTINUE
+*
+         KASE = 0
+  210    CONTINUE
+         CALL ZLACON( N, WORK( N+1 ), WORK, FERR( J ), KASE )
+         IF( KASE.NE.0 ) THEN
+            IF( KASE.EQ.1 ) THEN
+*
+*              Multiply by diag(W)*inv(op(A)**H).
+*
+               CALL ZTPSV( UPLO, TRANST, DIAG, N, AP, WORK, 1 )
+               DO 220 I = 1, N
+                  WORK( I ) = RWORK( I )*WORK( I )
+  220          CONTINUE
+            ELSE
+*
+*              Multiply by inv(op(A))*diag(W).
+*
+               DO 230 I = 1, N
+                  WORK( I ) = RWORK( I )*WORK( I )
+  230          CONTINUE
+               CALL ZTPSV( UPLO, TRANSN, DIAG, N, AP, WORK, 1 )
+            END IF
+            GO TO 210
+         END IF
+*
+*        Normalize error.
+*
+         LSTRES = ZERO
+         DO 240 I = 1, N
+            LSTRES = MAX( LSTRES, CABS1( X( I, J ) ) )
+  240    CONTINUE
+         IF( LSTRES.NE.ZERO )
+     $      FERR( J ) = FERR( J ) / LSTRES
+*
+  250 CONTINUE
+*
+      RETURN
+*
+*     End of ZTPRFS
+*
+      END
diff --git a/costa/native/external/lapack/ztptri.f b/costa/native/external/lapack/ztptri.f
new file mode 100644
index 000000000..bc90876d5
--- /dev/null
+++ b/costa/native/external/lapack/ztptri.f
@@ -0,0 +1,177 @@
+      SUBROUTINE ZTPTRI( UPLO, DIAG, N, AP, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIAG, UPLO
+      INTEGER            INFO, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         AP( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZTPTRI computes the inverse of a complex upper or lower triangular
+*  matrix A stored in packed format.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  A is upper triangular;
+*          = 'L':  A is lower triangular.
+*
+*  DIAG    (input) CHARACTER*1
+*          = 'N':  A is non-unit triangular;
+*          = 'U':  A is unit triangular.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  AP      (input/output) COMPLEX*16 array, dimension (N*(N+1)/2)
+*          On entry, the upper or lower triangular matrix A, stored
+*          columnwise in a linear array.  The j-th column of A is stored
+*          in the array AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*((2*n-j)/2) = A(i,j) for j<=i<=n.
+*          See below for further details.
+*          On exit, the (triangular) inverse of the original matrix, in
+*          the same packed storage format.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, A(i,i) is exactly zero.  The triangular
+*                matrix is singular and its inverse can not be computed.
+*
+*  Further Details
+*  ===============
+*
+*  A triangular matrix A can be transferred to packed storage using one
+*  of the following program segments:
+*
+*  UPLO = 'U':                      UPLO = 'L':
+*
+*        JC = 1                           JC = 1
+*        DO 2 J = 1, N                    DO 2 J = 1, N
+*           DO 1 I = 1, J                    DO 1 I = J, N
+*              AP(JC+I-1) = A(I,J)              AP(JC+I-J) = A(I,J)
+*      1    CONTINUE                    1    CONTINUE
+*           JC = JC + J                      JC = JC + N - J + 1
+*      2 CONTINUE                       2 CONTINUE
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX*16         ONE, ZERO
+      PARAMETER          ( ONE = ( 1.0D+0, 0.0D+0 ),
+     $                   ZERO = ( 0.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOUNIT, UPPER
+      INTEGER            J, JC, JCLAST, JJ
+      COMPLEX*16         AJJ
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZSCAL, ZTPMV
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      NOUNIT = LSAME( DIAG, 'N' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZTPTRI', -INFO )
+         RETURN
+      END IF
+*
+*     Check for singularity if non-unit.
+*
+      IF( NOUNIT ) THEN
+         IF( UPPER ) THEN
+            JJ = 0
+            DO 10 INFO = 1, N
+               JJ = JJ + INFO
+               IF( AP( JJ ).EQ.ZERO )
+     $            RETURN
+   10       CONTINUE
+         ELSE
+            JJ = 1
+            DO 20 INFO = 1, N
+               IF( AP( JJ ).EQ.ZERO )
+     $            RETURN
+               JJ = JJ + N - INFO + 1
+   20       CONTINUE
+         END IF
+         INFO = 0
+      END IF
+*
+      IF( UPPER ) THEN
+*
+*        Compute inverse of upper triangular matrix.
+*
+         JC = 1
+         DO 30 J = 1, N
+            IF( NOUNIT ) THEN
+               AP( JC+J-1 ) = ONE / AP( JC+J-1 )
+               AJJ = -AP( JC+J-1 )
+            ELSE
+               AJJ = -ONE
+            END IF
+*
+*           Compute elements 1:j-1 of j-th column.
+*
+            CALL ZTPMV( 'Upper', 'No transpose', DIAG, J-1, AP,
+     $                  AP( JC ), 1 )
+            CALL ZSCAL( J-1, AJJ, AP( JC ), 1 )
+            JC = JC + J
+   30    CONTINUE
+*
+      ELSE
+*
+*        Compute inverse of lower triangular matrix.
+*
+         JC = N*( N+1 ) / 2
+         DO 40 J = N, 1, -1
+            IF( NOUNIT ) THEN
+               AP( JC ) = ONE / AP( JC )
+               AJJ = -AP( JC )
+            ELSE
+               AJJ = -ONE
+            END IF
+            IF( J.LT.N ) THEN
+*
+*              Compute elements j+1:n of j-th column.
+*
+               CALL ZTPMV( 'Lower', 'No transpose', DIAG, N-J,
+     $                     AP( JCLAST ), AP( JC+1 ), 1 )
+               CALL ZSCAL( N-J, AJJ, AP( JC+1 ), 1 )
+            END IF
+            JCLAST = JC
+            JC = JC - N + J - 2
+   40    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of ZTPTRI
+*
+      END
diff --git a/costa/native/external/lapack/ztptrs.f b/costa/native/external/lapack/ztptrs.f
new file mode 100644
index 000000000..0fe620a7d
--- /dev/null
+++ b/costa/native/external/lapack/ztptrs.f
@@ -0,0 +1,154 @@
+      SUBROUTINE ZTPTRS( UPLO, TRANS, DIAG, N, NRHS, AP, B, LDB, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIAG, TRANS, UPLO
+      INTEGER            INFO, LDB, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         AP( * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZTPTRS solves a triangular system of the form
+*
+*     A * X = B,  A**T * X = B,  or  A**H * X = B,
+*
+*  where A is a triangular matrix of order N stored in packed format,
+*  and B is an N-by-NRHS matrix.  A check is made to verify that A is
+*  nonsingular.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  A is upper triangular;
+*          = 'L':  A is lower triangular.
+*
+*  TRANS   (input) CHARACTER*1
+*          Specifies the form of the system of equations:
+*          = 'N':  A * X = B     (No transpose)
+*          = 'T':  A**T * X = B  (Transpose)
+*          = 'C':  A**H * X = B  (Conjugate transpose)
+*
+*  DIAG    (input) CHARACTER*1
+*          = 'N':  A is non-unit triangular;
+*          = 'U':  A is unit triangular.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  AP      (input) COMPLEX*16 array, dimension (N*(N+1)/2)
+*          The upper or lower triangular matrix A, packed columnwise in
+*          a linear array.  The j-th column of A is stored in the array
+*          AP as follows:
+*          if UPLO = 'U', AP(i + (j-1)*j/2) = A(i,j) for 1<=i<=j;
+*          if UPLO = 'L', AP(i + (j-1)*(2*n-j)/2) = A(i,j) for j<=i<=n.
+*
+*  B       (input/output) COMPLEX*16 array, dimension (LDB,NRHS)
+*          On entry, the right hand side matrix B.
+*          On exit, if INFO = 0, the solution matrix X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*          > 0:  if INFO = i, the i-th diagonal element of A is zero,
+*                indicating that the matrix is singular and the
+*                solutions X have not been computed.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX*16         ZERO
+      PARAMETER          ( ZERO = ( 0.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOUNIT, UPPER
+      INTEGER            J, JC
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZTPSV
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      NOUNIT = LSAME( DIAG, 'N' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.LSAME( TRANS, 'N' ) .AND. .NOT.
+     $         LSAME( TRANS, 'T' ) .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -8
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZTPTRS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Check for singularity.
+*
+      IF( NOUNIT ) THEN
+         IF( UPPER ) THEN
+            JC = 1
+            DO 10 INFO = 1, N
+               IF( AP( JC+INFO-1 ).EQ.ZERO )
+     $            RETURN
+               JC = JC + INFO
+   10       CONTINUE
+         ELSE
+            JC = 1
+            DO 20 INFO = 1, N
+               IF( AP( JC ).EQ.ZERO )
+     $            RETURN
+               JC = JC + N - INFO + 1
+   20       CONTINUE
+         END IF
+      END IF
+      INFO = 0
+*
+*     Solve  A * x = b,  A**T * x = b,  or  A**H * x = b.
+*
+      DO 30 J = 1, NRHS
+         CALL ZTPSV( UPLO, TRANS, DIAG, N, AP, B( 1, J ), 1 )
+   30 CONTINUE
+*
+      RETURN
+*
+*     End of ZTPTRS
+*
+      END
diff --git a/costa/native/external/lapack/ztrcon.f b/costa/native/external/lapack/ztrcon.f
new file mode 100644
index 000000000..aa3d3b11a
--- /dev/null
+++ b/costa/native/external/lapack/ztrcon.f
@@ -0,0 +1,200 @@
+      SUBROUTINE ZTRCON( NORM, UPLO, DIAG, N, A, LDA, RCOND, WORK,
+     $                   RWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIAG, NORM, UPLO
+      INTEGER            INFO, LDA, N
+      DOUBLE PRECISION   RCOND
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   RWORK( * )
+      COMPLEX*16         A( LDA, * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZTRCON estimates the reciprocal of the condition number of a
+*  triangular matrix A, in either the 1-norm or the infinity-norm.
+*
+*  The norm of A is computed and an estimate is obtained for
+*  norm(inv(A)), then the reciprocal of the condition number is
+*  computed as
+*     RCOND = 1 / ( norm(A) * norm(inv(A)) ).
+*
+*  Arguments
+*  =========
+*
+*  NORM    (input) CHARACTER*1
+*          Specifies whether the 1-norm condition number or the
+*          infinity-norm condition number is required:
+*          = '1' or 'O':  1-norm;
+*          = 'I':         Infinity-norm.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  A is upper triangular;
+*          = 'L':  A is lower triangular.
+*
+*  DIAG    (input) CHARACTER*1
+*          = 'N':  A is non-unit triangular;
+*          = 'U':  A is unit triangular.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input) COMPLEX*16 array, dimension (LDA,N)
+*          The triangular matrix A.  If UPLO = 'U', the leading N-by-N
+*          upper triangular part of the array A contains the upper
+*          triangular matrix, and the strictly lower triangular part of
+*          A is not referenced.  If UPLO = 'L', the leading N-by-N lower
+*          triangular part of the array A contains the lower triangular
+*          matrix, and the strictly upper triangular part of A is not
+*          referenced.  If DIAG = 'U', the diagonal elements of A are
+*          also not referenced and are assumed to be 1.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  RCOND   (output) DOUBLE PRECISION
+*          The reciprocal of the condition number of the matrix A,
+*          computed as RCOND = 1/(norm(A) * norm(inv(A))).
+*
+*  WORK    (workspace) COMPLEX*16 array, dimension (2*N)
+*
+*  RWORK   (workspace) DOUBLE PRECISION array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE, ZERO
+      PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOUNIT, ONENRM, UPPER
+      CHARACTER          NORMIN
+      INTEGER            IX, KASE, KASE1
+      DOUBLE PRECISION   AINVNM, ANORM, SCALE, SMLNUM, XNORM
+      COMPLEX*16         ZDUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            IZAMAX
+      DOUBLE PRECISION   DLAMCH, ZLANTR
+      EXTERNAL           LSAME, IZAMAX, DLAMCH, ZLANTR
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZDRSCL, ZLACON, ZLATRS
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, DIMAG, MAX
+*     ..
+*     .. Statement Functions ..
+      DOUBLE PRECISION   CABS1
+*     ..
+*     .. Statement Function definitions ..
+      CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      ONENRM = NORM.EQ.'1' .OR. LSAME( NORM, 'O' )
+      NOUNIT = LSAME( DIAG, 'N' )
+*
+      IF( .NOT.ONENRM .AND. .NOT.LSAME( NORM, 'I' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -6
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZTRCON', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 ) THEN
+         RCOND = ONE
+         RETURN
+      END IF
+*
+      RCOND = ZERO
+      SMLNUM = DLAMCH( 'Safe minimum' )*DBLE( MAX( 1, N ) )
+*
+*     Compute the norm of the triangular matrix A.
+*
+      ANORM = ZLANTR( NORM, UPLO, DIAG, N, N, A, LDA, RWORK )
+*
+*     Continue only if ANORM > 0.
+*
+      IF( ANORM.GT.ZERO ) THEN
+*
+*        Estimate the norm of the inverse of A.
+*
+         AINVNM = ZERO
+         NORMIN = 'N'
+         IF( ONENRM ) THEN
+            KASE1 = 1
+         ELSE
+            KASE1 = 2
+         END IF
+         KASE = 0
+   10    CONTINUE
+         CALL ZLACON( N, WORK( N+1 ), WORK, AINVNM, KASE )
+         IF( KASE.NE.0 ) THEN
+            IF( KASE.EQ.KASE1 ) THEN
+*
+*              Multiply by inv(A).
+*
+               CALL ZLATRS( UPLO, 'No transpose', DIAG, NORMIN, N, A,
+     $                      LDA, WORK, SCALE, RWORK, INFO )
+            ELSE
+*
+*              Multiply by inv(A').
+*
+               CALL ZLATRS( UPLO, 'Conjugate transpose', DIAG, NORMIN,
+     $                      N, A, LDA, WORK, SCALE, RWORK, INFO )
+            END IF
+            NORMIN = 'Y'
+*
+*           Multiply by 1/SCALE if doing so will not cause overflow.
+*
+            IF( SCALE.NE.ONE ) THEN
+               IX = IZAMAX( N, WORK, 1 )
+               XNORM = CABS1( WORK( IX ) )
+               IF( SCALE.LT.XNORM*SMLNUM .OR. SCALE.EQ.ZERO )
+     $            GO TO 20
+               CALL ZDRSCL( N, SCALE, WORK, 1 )
+            END IF
+            GO TO 10
+         END IF
+*
+*        Compute the estimate of the reciprocal condition number.
+*
+         IF( AINVNM.NE.ZERO )
+     $      RCOND = ( ONE / ANORM ) / AINVNM
+      END IF
+*
+   20 CONTINUE
+      RETURN
+*
+*     End of ZTRCON
+*
+      END
diff --git a/costa/native/external/lapack/ztrevc.f b/costa/native/external/lapack/ztrevc.f
new file mode 100644
index 000000000..d1b4b7206
--- /dev/null
+++ b/costa/native/external/lapack/ztrevc.f
@@ -0,0 +1,390 @@
+      SUBROUTINE ZTREVC( SIDE, HOWMNY, SELECT, N, T, LDT, VL, LDVL, VR,
+     $                   LDVR, MM, M, WORK, RWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          HOWMNY, SIDE
+      INTEGER            INFO, LDT, LDVL, LDVR, M, MM, N
+*     ..
+*     .. Array Arguments ..
+      LOGICAL            SELECT( * )
+      DOUBLE PRECISION   RWORK( * )
+      COMPLEX*16         T( LDT, * ), VL( LDVL, * ), VR( LDVR, * ),
+     $                   WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZTREVC computes some or all of the right and/or left eigenvectors of
+*  a complex upper triangular matrix T.
+*
+*  The right eigenvector x and the left eigenvector y of T corresponding
+*  to an eigenvalue w are defined by:
+*
+*               T*x = w*x,     y'*T = w*y'
+*
+*  where y' denotes the conjugate transpose of the vector y.
+*
+*  If all eigenvectors are requested, the routine may either return the
+*  matrices X and/or Y of right or left eigenvectors of T, or the
+*  products Q*X and/or Q*Y, where Q is an input unitary
+*  matrix. If T was obtained from the Schur factorization of an
+*  original matrix A = Q*T*Q', then Q*X and Q*Y are the matrices of
+*  right or left eigenvectors of A.
+*
+*  Arguments
+*  =========
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'R':  compute right eigenvectors only;
+*          = 'L':  compute left eigenvectors only;
+*          = 'B':  compute both right and left eigenvectors.
+*
+*  HOWMNY  (input) CHARACTER*1
+*          = 'A':  compute all right and/or left eigenvectors;
+*          = 'B':  compute all right and/or left eigenvectors,
+*                  and backtransform them using the input matrices
+*                  supplied in VR and/or VL;
+*          = 'S':  compute selected right and/or left eigenvectors,
+*                  specified by the logical array SELECT.
+*
+*  SELECT  (input) LOGICAL array, dimension (N)
+*          If HOWMNY = 'S', SELECT specifies the eigenvectors to be
+*          computed.
+*          If HOWMNY = 'A' or 'B', SELECT is not referenced.
+*          To select the eigenvector corresponding to the j-th
+*          eigenvalue, SELECT(j) must be set to .TRUE..
+*
+*  N       (input) INTEGER
+*          The order of the matrix T. N >= 0.
+*
+*  T       (input/output) COMPLEX*16 array, dimension (LDT,N)
+*          The upper triangular matrix T.  T is modified, but restored
+*          on exit.
+*
+*  LDT     (input) INTEGER
+*          The leading dimension of the array T. LDT >= max(1,N).
+*
+*  VL      (input/output) COMPLEX*16 array, dimension (LDVL,MM)
+*          On entry, if SIDE = 'L' or 'B' and HOWMNY = 'B', VL must
+*          contain an N-by-N matrix Q (usually the unitary matrix Q of
+*          Schur vectors returned by ZHSEQR).
+*          On exit, if SIDE = 'L' or 'B', VL contains:
+*          if HOWMNY = 'A', the matrix Y of left eigenvectors of T;
+*                           VL is lower triangular. The i-th column
+*                           VL(i) of VL is the eigenvector corresponding
+*                           to T(i,i).
+*          if HOWMNY = 'B', the matrix Q*Y;
+*          if HOWMNY = 'S', the left eigenvectors of T specified by
+*                           SELECT, stored consecutively in the columns
+*                           of VL, in the same order as their
+*                           eigenvalues.
+*          If SIDE = 'R', VL is not referenced.
+*
+*  LDVL    (input) INTEGER
+*          The leading dimension of the array VL.  LDVL >= max(1,N) if
+*          SIDE = 'L' or 'B'; LDVL >= 1 otherwise.
+*
+*  VR      (input/output) COMPLEX*16 array, dimension (LDVR,MM)
+*          On entry, if SIDE = 'R' or 'B' and HOWMNY = 'B', VR must
+*          contain an N-by-N matrix Q (usually the unitary matrix Q of
+*          Schur vectors returned by ZHSEQR).
+*          On exit, if SIDE = 'R' or 'B', VR contains:
+*          if HOWMNY = 'A', the matrix X of right eigenvectors of T;
+*                           VR is upper triangular. The i-th column
+*                           VR(i) of VR is the eigenvector corresponding
+*                           to T(i,i).
+*          if HOWMNY = 'B', the matrix Q*X;
+*          if HOWMNY = 'S', the right eigenvectors of T specified by
+*                           SELECT, stored consecutively in the columns
+*                           of VR, in the same order as their
+*                           eigenvalues.
+*          If SIDE = 'L', VR is not referenced.
+*
+*  LDVR    (input) INTEGER
+*          The leading dimension of the array VR.  LDVR >= max(1,N) if
+*           SIDE = 'R' or 'B'; LDVR >= 1 otherwise.
+*
+*  MM      (input) INTEGER
+*          The number of columns in the arrays VL and/or VR. MM >= M.
+*
+*  M       (output) INTEGER
+*          The number of columns in the arrays VL and/or VR actually
+*          used to store the eigenvectors.  If HOWMNY = 'A' or 'B', M
+*          is set to N.  Each selected eigenvector occupies one
+*          column.
+*
+*  WORK    (workspace) COMPLEX*16 array, dimension (2*N)
+*
+*  RWORK   (workspace) DOUBLE PRECISION array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Further Details
+*  ===============
+*
+*  The algorithm used in this program is basically backward (forward)
+*  substitution, with scaling to make the the code robust against
+*  possible overflow.
+*
+*  Each eigenvector is normalized so that the element of largest
+*  magnitude has magnitude 1; here the magnitude of a complex number
+*  (x,y) is taken to be |x| + |y|.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+      COMPLEX*16         CMZERO, CMONE
+      PARAMETER          ( CMZERO = ( 0.0D+0, 0.0D+0 ),
+     $                   CMONE = ( 1.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            ALLV, BOTHV, LEFTV, OVER, RIGHTV, SOMEV
+      INTEGER            I, II, IS, J, K, KI
+      DOUBLE PRECISION   OVFL, REMAX, SCALE, SMIN, SMLNUM, ULP, UNFL
+      COMPLEX*16         CDUM
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            IZAMAX
+      DOUBLE PRECISION   DLAMCH, DZASUM
+      EXTERNAL           LSAME, IZAMAX, DLAMCH, DZASUM
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZCOPY, ZDSCAL, ZGEMV, ZLATRS
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, DCMPLX, DCONJG, DIMAG, MAX
+*     ..
+*     .. Statement Functions ..
+      DOUBLE PRECISION   CABS1
+*     ..
+*     .. Statement Function definitions ..
+      CABS1( CDUM ) = ABS( DBLE( CDUM ) ) + ABS( DIMAG( CDUM ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode and test the input parameters
+*
+      BOTHV = LSAME( SIDE, 'B' )
+      RIGHTV = LSAME( SIDE, 'R' ) .OR. BOTHV
+      LEFTV = LSAME( SIDE, 'L' ) .OR. BOTHV
+*
+      ALLV = LSAME( HOWMNY, 'A' )
+      OVER = LSAME( HOWMNY, 'B' )
+      SOMEV = LSAME( HOWMNY, 'S' )
+*
+*     Set M to the number of columns required to store the selected
+*     eigenvectors.
+*
+      IF( SOMEV ) THEN
+         M = 0
+         DO 10 J = 1, N
+            IF( SELECT( J ) )
+     $         M = M + 1
+   10    CONTINUE
+      ELSE
+         M = N
+      END IF
+*
+      INFO = 0
+      IF( .NOT.RIGHTV .AND. .NOT.LEFTV ) THEN
+         INFO = -1
+      ELSE IF( .NOT.ALLV .AND. .NOT.OVER .AND. .NOT.SOMEV ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDT.LT.MAX( 1, N ) ) THEN
+         INFO = -6
+      ELSE IF( LDVL.LT.1 .OR. ( LEFTV .AND. LDVL.LT.N ) ) THEN
+         INFO = -8
+      ELSE IF( LDVR.LT.1 .OR. ( RIGHTV .AND. LDVR.LT.N ) ) THEN
+         INFO = -10
+      ELSE IF( MM.LT.M ) THEN
+         INFO = -11
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZTREVC', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible.
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Set the constants to control overflow.
+*
+      UNFL = DLAMCH( 'Safe minimum' )
+      OVFL = ONE / UNFL
+      CALL DLABAD( UNFL, OVFL )
+      ULP = DLAMCH( 'Precision' )
+      SMLNUM = UNFL*( N / ULP )
+*
+*     Store the diagonal elements of T in working array WORK.
+*
+      DO 20 I = 1, N
+         WORK( I+N ) = T( I, I )
+   20 CONTINUE
+*
+*     Compute 1-norm of each column of strictly upper triangular
+*     part of T to control overflow in triangular solver.
+*
+      RWORK( 1 ) = ZERO
+      DO 30 J = 2, N
+         RWORK( J ) = DZASUM( J-1, T( 1, J ), 1 )
+   30 CONTINUE
+*
+      IF( RIGHTV ) THEN
+*
+*        Compute right eigenvectors.
+*
+         IS = M
+         DO 80 KI = N, 1, -1
+*
+            IF( SOMEV ) THEN
+               IF( .NOT.SELECT( KI ) )
+     $            GO TO 80
+            END IF
+            SMIN = MAX( ULP*( CABS1( T( KI, KI ) ) ), SMLNUM )
+*
+            WORK( 1 ) = CMONE
+*
+*           Form right-hand side.
+*
+            DO 40 K = 1, KI - 1
+               WORK( K ) = -T( K, KI )
+   40       CONTINUE
+*
+*           Solve the triangular system:
+*              (T(1:KI-1,1:KI-1) - T(KI,KI))*X = SCALE*WORK.
+*
+            DO 50 K = 1, KI - 1
+               T( K, K ) = T( K, K ) - T( KI, KI )
+               IF( CABS1( T( K, K ) ).LT.SMIN )
+     $            T( K, K ) = SMIN
+   50       CONTINUE
+*
+            IF( KI.GT.1 ) THEN
+               CALL ZLATRS( 'Upper', 'No transpose', 'Non-unit', 'Y',
+     $                      KI-1, T, LDT, WORK( 1 ), SCALE, RWORK,
+     $                      INFO )
+               WORK( KI ) = SCALE
+            END IF
+*
+*           Copy the vector x or Q*x to VR and normalize.
+*
+            IF( .NOT.OVER ) THEN
+               CALL ZCOPY( KI, WORK( 1 ), 1, VR( 1, IS ), 1 )
+*
+               II = IZAMAX( KI, VR( 1, IS ), 1 )
+               REMAX = ONE / CABS1( VR( II, IS ) )
+               CALL ZDSCAL( KI, REMAX, VR( 1, IS ), 1 )
+*
+               DO 60 K = KI + 1, N
+                  VR( K, IS ) = CMZERO
+   60          CONTINUE
+            ELSE
+               IF( KI.GT.1 )
+     $            CALL ZGEMV( 'N', N, KI-1, CMONE, VR, LDVR, WORK( 1 ),
+     $                        1, DCMPLX( SCALE ), VR( 1, KI ), 1 )
+*
+               II = IZAMAX( N, VR( 1, KI ), 1 )
+               REMAX = ONE / CABS1( VR( II, KI ) )
+               CALL ZDSCAL( N, REMAX, VR( 1, KI ), 1 )
+            END IF
+*
+*           Set back the original diagonal elements of T.
+*
+            DO 70 K = 1, KI - 1
+               T( K, K ) = WORK( K+N )
+   70       CONTINUE
+*
+            IS = IS - 1
+   80    CONTINUE
+      END IF
+*
+      IF( LEFTV ) THEN
+*
+*        Compute left eigenvectors.
+*
+         IS = 1
+         DO 130 KI = 1, N
+*
+            IF( SOMEV ) THEN
+               IF( .NOT.SELECT( KI ) )
+     $            GO TO 130
+            END IF
+            SMIN = MAX( ULP*( CABS1( T( KI, KI ) ) ), SMLNUM )
+*
+            WORK( N ) = CMONE
+*
+*           Form right-hand side.
+*
+            DO 90 K = KI + 1, N
+               WORK( K ) = -DCONJG( T( KI, K ) )
+   90       CONTINUE
+*
+*           Solve the triangular system:
+*              (T(KI+1:N,KI+1:N) - T(KI,KI))'*X = SCALE*WORK.
+*
+            DO 100 K = KI + 1, N
+               T( K, K ) = T( K, K ) - T( KI, KI )
+               IF( CABS1( T( K, K ) ).LT.SMIN )
+     $            T( K, K ) = SMIN
+  100       CONTINUE
+*
+            IF( KI.LT.N ) THEN
+               CALL ZLATRS( 'Upper', 'Conjugate transpose', 'Non-unit',
+     $                      'Y', N-KI, T( KI+1, KI+1 ), LDT,
+     $                      WORK( KI+1 ), SCALE, RWORK, INFO )
+               WORK( KI ) = SCALE
+            END IF
+*
+*           Copy the vector x or Q*x to VL and normalize.
+*
+            IF( .NOT.OVER ) THEN
+               CALL ZCOPY( N-KI+1, WORK( KI ), 1, VL( KI, IS ), 1 )
+*
+               II = IZAMAX( N-KI+1, VL( KI, IS ), 1 ) + KI - 1
+               REMAX = ONE / CABS1( VL( II, IS ) )
+               CALL ZDSCAL( N-KI+1, REMAX, VL( KI, IS ), 1 )
+*
+               DO 110 K = 1, KI - 1
+                  VL( K, IS ) = CMZERO
+  110          CONTINUE
+            ELSE
+               IF( KI.LT.N )
+     $            CALL ZGEMV( 'N', N, N-KI, CMONE, VL( 1, KI+1 ), LDVL,
+     $                        WORK( KI+1 ), 1, DCMPLX( SCALE ),
+     $                        VL( 1, KI ), 1 )
+*
+               II = IZAMAX( N, VL( 1, KI ), 1 )
+               REMAX = ONE / CABS1( VL( II, KI ) )
+               CALL ZDSCAL( N, REMAX, VL( 1, KI ), 1 )
+            END IF
+*
+*           Set back the original diagonal elements of T.
+*
+            DO 120 K = KI + 1, N
+               T( K, K ) = WORK( K+N )
+  120       CONTINUE
+*
+            IS = IS + 1
+  130    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of ZTREVC
+*
+      END
diff --git a/costa/native/external/lapack/ztrexc.f b/costa/native/external/lapack/ztrexc.f
new file mode 100644
index 000000000..4ba7b4a95
--- /dev/null
+++ b/costa/native/external/lapack/ztrexc.f
@@ -0,0 +1,163 @@
+      SUBROUTINE ZTREXC( COMPQ, N, T, LDT, Q, LDQ, IFST, ILST, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     March 31, 1993
+*
+*     .. Scalar Arguments ..
+      CHARACTER          COMPQ
+      INTEGER            IFST, ILST, INFO, LDQ, LDT, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         Q( LDQ, * ), T( LDT, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZTREXC reorders the Schur factorization of a complex matrix
+*  A = Q*T*Q**H, so that the diagonal element of T with row index IFST
+*  is moved to row ILST.
+*
+*  The Schur form T is reordered by a unitary similarity transformation
+*  Z**H*T*Z, and optionally the matrix Q of Schur vectors is updated by
+*  postmultplying it with Z.
+*
+*  Arguments
+*  =========
+*
+*  COMPQ   (input) CHARACTER*1
+*          = 'V':  update the matrix Q of Schur vectors;
+*          = 'N':  do not update Q.
+*
+*  N       (input) INTEGER
+*          The order of the matrix T. N >= 0.
+*
+*  T       (input/output) COMPLEX*16 array, dimension (LDT,N)
+*          On entry, the upper triangular matrix T.
+*          On exit, the reordered upper triangular matrix.
+*
+*  LDT     (input) INTEGER
+*          The leading dimension of the array T. LDT >= max(1,N).
+*
+*  Q       (input/output) COMPLEX*16 array, dimension (LDQ,N)
+*          On entry, if COMPQ = 'V', the matrix Q of Schur vectors.
+*          On exit, if COMPQ = 'V', Q has been postmultiplied by the
+*          unitary transformation matrix Z which reorders T.
+*          If COMPQ = 'N', Q is not referenced.
+*
+*  LDQ     (input) INTEGER
+*          The leading dimension of the array Q.  LDQ >= max(1,N).
+*
+*  IFST    (input) INTEGER
+*  ILST    (input) INTEGER
+*          Specify the reordering of the diagonal elements of T:
+*          The element with row index IFST is moved to row ILST by a
+*          sequence of transpositions between adjacent elements.
+*          1 <= IFST <= N; 1 <= ILST <= N.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            WANTQ
+      INTEGER            K, M1, M2, M3
+      DOUBLE PRECISION   CS
+      COMPLEX*16         SN, T11, T22, TEMP
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZLARTG, ZROT
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          DCONJG, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode and test the input parameters.
+*
+      INFO = 0
+      WANTQ = LSAME( COMPQ, 'V' )
+      IF( .NOT.LSAME( COMPQ, 'N' ) .AND. .NOT.WANTQ ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDT.LT.MAX( 1, N ) ) THEN
+         INFO = -4
+      ELSE IF( LDQ.LT.1 .OR. ( WANTQ .AND. LDQ.LT.MAX( 1, N ) ) ) THEN
+         INFO = -6
+      ELSE IF( IFST.LT.1 .OR. IFST.GT.N ) THEN
+         INFO = -7
+      ELSE IF( ILST.LT.1 .OR. ILST.GT.N ) THEN
+         INFO = -8
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZTREXC', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.1 .OR. IFST.EQ.ILST )
+     $   RETURN
+*
+      IF( IFST.LT.ILST ) THEN
+*
+*        Move the IFST-th diagonal element forward down the diagonal.
+*
+         M1 = 0
+         M2 = -1
+         M3 = 1
+      ELSE
+*
+*        Move the IFST-th diagonal element backward up the diagonal.
+*
+         M1 = -1
+         M2 = 0
+         M3 = -1
+      END IF
+*
+      DO 10 K = IFST + M1, ILST + M2, M3
+*
+*        Interchange the k-th and (k+1)-th diagonal elements.
+*
+         T11 = T( K, K )
+         T22 = T( K+1, K+1 )
+*
+*        Determine the transformation to perform the interchange.
+*
+         CALL ZLARTG( T( K, K+1 ), T22-T11, CS, SN, TEMP )
+*
+*        Apply transformation to the matrix T.
+*
+         IF( K+2.LE.N )
+     $      CALL ZROT( N-K-1, T( K, K+2 ), LDT, T( K+1, K+2 ), LDT, CS,
+     $                 SN )
+         CALL ZROT( K-1, T( 1, K ), 1, T( 1, K+1 ), 1, CS,
+     $              DCONJG( SN ) )
+*
+         T( K, K ) = T22
+         T( K+1, K+1 ) = T11
+*
+         IF( WANTQ ) THEN
+*
+*           Accumulate transformation in the matrix Q.
+*
+            CALL ZROT( N, Q( 1, K ), 1, Q( 1, K+1 ), 1, CS,
+     $                 DCONJG( SN ) )
+         END IF
+*
+   10 CONTINUE
+*
+      RETURN
+*
+*     End of ZTREXC
+*
+      END
diff --git a/costa/native/external/lapack/ztrrfs.f b/costa/native/external/lapack/ztrrfs.f
new file mode 100644
index 000000000..08a0383b1
--- /dev/null
+++ b/costa/native/external/lapack/ztrrfs.f
@@ -0,0 +1,378 @@
+      SUBROUTINE ZTRRFS( UPLO, TRANS, DIAG, N, NRHS, A, LDA, B, LDB, X,
+     $                   LDX, FERR, BERR, WORK, RWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIAG, TRANS, UPLO
+      INTEGER            INFO, LDA, LDB, LDX, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   BERR( * ), FERR( * ), RWORK( * )
+      COMPLEX*16         A( LDA, * ), B( LDB, * ), WORK( * ),
+     $                   X( LDX, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZTRRFS provides error bounds and backward error estimates for the
+*  solution to a system of linear equations with a triangular
+*  coefficient matrix.
+*
+*  The solution matrix X must be computed by ZTRTRS or some other
+*  means before entering this routine.  ZTRRFS does not do iterative
+*  refinement because doing so cannot improve the backward error.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  A is upper triangular;
+*          = 'L':  A is lower triangular.
+*
+*  TRANS   (input) CHARACTER*1
+*          Specifies the form of the system of equations:
+*          = 'N':  A * X = B     (No transpose)
+*          = 'T':  A**T * X = B  (Transpose)
+*          = 'C':  A**H * X = B  (Conjugate transpose)
+*
+*  DIAG    (input) CHARACTER*1
+*          = 'N':  A is non-unit triangular;
+*          = 'U':  A is unit triangular.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrices B and X.  NRHS >= 0.
+*
+*  A       (input) COMPLEX*16 array, dimension (LDA,N)
+*          The triangular matrix A.  If UPLO = 'U', the leading N-by-N
+*          upper triangular part of the array A contains the upper
+*          triangular matrix, and the strictly lower triangular part of
+*          A is not referenced.  If UPLO = 'L', the leading N-by-N lower
+*          triangular part of the array A contains the lower triangular
+*          matrix, and the strictly upper triangular part of A is not
+*          referenced.  If DIAG = 'U', the diagonal elements of A are
+*          also not referenced and are assumed to be 1.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  B       (input) COMPLEX*16 array, dimension (LDB,NRHS)
+*          The right hand side matrix B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  X       (input) COMPLEX*16 array, dimension (LDX,NRHS)
+*          The solution matrix X.
+*
+*  LDX     (input) INTEGER
+*          The leading dimension of the array X.  LDX >= max(1,N).
+*
+*  FERR    (output) DOUBLE PRECISION array, dimension (NRHS)
+*          The estimated forward error bound for each solution vector
+*          X(j) (the j-th column of the solution matrix X).
+*          If XTRUE is the true solution corresponding to X(j), FERR(j)
+*          is an estimated upper bound for the magnitude of the largest
+*          element in (X(j) - XTRUE) divided by the magnitude of the
+*          largest element in X(j).  The estimate is as reliable as
+*          the estimate for RCOND, and is almost always a slight
+*          overestimate of the true error.
+*
+*  BERR    (output) DOUBLE PRECISION array, dimension (NRHS)
+*          The componentwise relative backward error of each solution
+*          vector X(j) (i.e., the smallest relative change in
+*          any element of A or B that makes X(j) an exact solution).
+*
+*  WORK    (workspace) COMPLEX*16 array, dimension (2*N)
+*
+*  RWORK   (workspace) DOUBLE PRECISION array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO
+      PARAMETER          ( ZERO = 0.0D+0 )
+      COMPLEX*16         ONE
+      PARAMETER          ( ONE = ( 1.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOTRAN, NOUNIT, UPPER
+      CHARACTER          TRANSN, TRANST
+      INTEGER            I, J, K, KASE, NZ
+      DOUBLE PRECISION   EPS, LSTRES, S, SAFE1, SAFE2, SAFMIN, XK
+      COMPLEX*16         ZDUM
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZAXPY, ZCOPY, ZLACON, ZTRMV, ZTRSV
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, DIMAG, MAX
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DLAMCH
+      EXTERNAL           LSAME, DLAMCH
+*     ..
+*     .. Statement Functions ..
+      DOUBLE PRECISION   CABS1
+*     ..
+*     .. Statement Function definitions ..
+      CABS1( ZDUM ) = ABS( DBLE( ZDUM ) ) + ABS( DIMAG( ZDUM ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      NOTRAN = LSAME( TRANS, 'N' )
+      NOUNIT = LSAME( DIAG, 'N' )
+*
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT.
+     $         LSAME( TRANS, 'C' ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -9
+      ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
+         INFO = -11
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZTRRFS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 .OR. NRHS.EQ.0 ) THEN
+         DO 10 J = 1, NRHS
+            FERR( J ) = ZERO
+            BERR( J ) = ZERO
+   10    CONTINUE
+         RETURN
+      END IF
+*
+      IF( NOTRAN ) THEN
+         TRANSN = 'N'
+         TRANST = 'C'
+      ELSE
+         TRANSN = 'C'
+         TRANST = 'N'
+      END IF
+*
+*     NZ = maximum number of nonzero elements in each row of A, plus 1
+*
+      NZ = N + 1
+      EPS = DLAMCH( 'Epsilon' )
+      SAFMIN = DLAMCH( 'Safe minimum' )
+      SAFE1 = NZ*SAFMIN
+      SAFE2 = SAFE1 / EPS
+*
+*     Do for each right hand side
+*
+      DO 250 J = 1, NRHS
+*
+*        Compute residual R = B - op(A) * X,
+*        where op(A) = A, A**T, or A**H, depending on TRANS.
+*
+         CALL ZCOPY( N, X( 1, J ), 1, WORK, 1 )
+         CALL ZTRMV( UPLO, TRANS, DIAG, N, A, LDA, WORK, 1 )
+         CALL ZAXPY( N, -ONE, B( 1, J ), 1, WORK, 1 )
+*
+*        Compute componentwise relative backward error from formula
+*
+*        max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) )
+*
+*        where abs(Z) is the componentwise absolute value of the matrix
+*        or vector Z.  If the i-th component of the denominator is less
+*        than SAFE2, then SAFE1 is added to the i-th components of the
+*        numerator and denominator before dividing.
+*
+         DO 20 I = 1, N
+            RWORK( I ) = CABS1( B( I, J ) )
+   20    CONTINUE
+*
+         IF( NOTRAN ) THEN
+*
+*           Compute abs(A)*abs(X) + abs(B).
+*
+            IF( UPPER ) THEN
+               IF( NOUNIT ) THEN
+                  DO 40 K = 1, N
+                     XK = CABS1( X( K, J ) )
+                     DO 30 I = 1, K
+                        RWORK( I ) = RWORK( I ) + CABS1( A( I, K ) )*XK
+   30                CONTINUE
+   40             CONTINUE
+               ELSE
+                  DO 60 K = 1, N
+                     XK = CABS1( X( K, J ) )
+                     DO 50 I = 1, K - 1
+                        RWORK( I ) = RWORK( I ) + CABS1( A( I, K ) )*XK
+   50                CONTINUE
+                     RWORK( K ) = RWORK( K ) + XK
+   60             CONTINUE
+               END IF
+            ELSE
+               IF( NOUNIT ) THEN
+                  DO 80 K = 1, N
+                     XK = CABS1( X( K, J ) )
+                     DO 70 I = K, N
+                        RWORK( I ) = RWORK( I ) + CABS1( A( I, K ) )*XK
+   70                CONTINUE
+   80             CONTINUE
+               ELSE
+                  DO 100 K = 1, N
+                     XK = CABS1( X( K, J ) )
+                     DO 90 I = K + 1, N
+                        RWORK( I ) = RWORK( I ) + CABS1( A( I, K ) )*XK
+   90                CONTINUE
+                     RWORK( K ) = RWORK( K ) + XK
+  100             CONTINUE
+               END IF
+            END IF
+         ELSE
+*
+*           Compute abs(A**H)*abs(X) + abs(B).
+*
+            IF( UPPER ) THEN
+               IF( NOUNIT ) THEN
+                  DO 120 K = 1, N
+                     S = ZERO
+                     DO 110 I = 1, K
+                        S = S + CABS1( A( I, K ) )*CABS1( X( I, J ) )
+  110                CONTINUE
+                     RWORK( K ) = RWORK( K ) + S
+  120             CONTINUE
+               ELSE
+                  DO 140 K = 1, N
+                     S = CABS1( X( K, J ) )
+                     DO 130 I = 1, K - 1
+                        S = S + CABS1( A( I, K ) )*CABS1( X( I, J ) )
+  130                CONTINUE
+                     RWORK( K ) = RWORK( K ) + S
+  140             CONTINUE
+               END IF
+            ELSE
+               IF( NOUNIT ) THEN
+                  DO 160 K = 1, N
+                     S = ZERO
+                     DO 150 I = K, N
+                        S = S + CABS1( A( I, K ) )*CABS1( X( I, J ) )
+  150                CONTINUE
+                     RWORK( K ) = RWORK( K ) + S
+  160             CONTINUE
+               ELSE
+                  DO 180 K = 1, N
+                     S = CABS1( X( K, J ) )
+                     DO 170 I = K + 1, N
+                        S = S + CABS1( A( I, K ) )*CABS1( X( I, J ) )
+  170                CONTINUE
+                     RWORK( K ) = RWORK( K ) + S
+  180             CONTINUE
+               END IF
+            END IF
+         END IF
+         S = ZERO
+         DO 190 I = 1, N
+            IF( RWORK( I ).GT.SAFE2 ) THEN
+               S = MAX( S, CABS1( WORK( I ) ) / RWORK( I ) )
+            ELSE
+               S = MAX( S, ( CABS1( WORK( I ) )+SAFE1 ) /
+     $             ( RWORK( I )+SAFE1 ) )
+            END IF
+  190    CONTINUE
+         BERR( J ) = S
+*
+*        Bound error from formula
+*
+*        norm(X - XTRUE) / norm(X) .le. FERR =
+*        norm( abs(inv(op(A)))*
+*           ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X)
+*
+*        where
+*          norm(Z) is the magnitude of the largest component of Z
+*          inv(op(A)) is the inverse of op(A)
+*          abs(Z) is the componentwise absolute value of the matrix or
+*             vector Z
+*          NZ is the maximum number of nonzeros in any row of A, plus 1
+*          EPS is machine epsilon
+*
+*        The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B))
+*        is incremented by SAFE1 if the i-th component of
+*        abs(op(A))*abs(X) + abs(B) is less than SAFE2.
+*
+*        Use ZLACON to estimate the infinity-norm of the matrix
+*           inv(op(A)) * diag(W),
+*        where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) )))
+*
+         DO 200 I = 1, N
+            IF( RWORK( I ).GT.SAFE2 ) THEN
+               RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I )
+            ELSE
+               RWORK( I ) = CABS1( WORK( I ) ) + NZ*EPS*RWORK( I ) +
+     $                      SAFE1
+            END IF
+  200    CONTINUE
+*
+         KASE = 0
+  210    CONTINUE
+         CALL ZLACON( N, WORK( N+1 ), WORK, FERR( J ), KASE )
+         IF( KASE.NE.0 ) THEN
+            IF( KASE.EQ.1 ) THEN
+*
+*              Multiply by diag(W)*inv(op(A)**H).
+*
+               CALL ZTRSV( UPLO, TRANST, DIAG, N, A, LDA, WORK, 1 )
+               DO 220 I = 1, N
+                  WORK( I ) = RWORK( I )*WORK( I )
+  220          CONTINUE
+            ELSE
+*
+*              Multiply by inv(op(A))*diag(W).
+*
+               DO 230 I = 1, N
+                  WORK( I ) = RWORK( I )*WORK( I )
+  230          CONTINUE
+               CALL ZTRSV( UPLO, TRANSN, DIAG, N, A, LDA, WORK, 1 )
+            END IF
+            GO TO 210
+         END IF
+*
+*        Normalize error.
+*
+         LSTRES = ZERO
+         DO 240 I = 1, N
+            LSTRES = MAX( LSTRES, CABS1( X( I, J ) ) )
+  240    CONTINUE
+         IF( LSTRES.NE.ZERO )
+     $      FERR( J ) = FERR( J ) / LSTRES
+*
+  250 CONTINUE
+*
+      RETURN
+*
+*     End of ZTRRFS
+*
+      END
diff --git a/costa/native/external/lapack/ztrsen.f b/costa/native/external/lapack/ztrsen.f
new file mode 100644
index 000000000..6eda60719
--- /dev/null
+++ b/costa/native/external/lapack/ztrsen.f
@@ -0,0 +1,358 @@
+      SUBROUTINE ZTRSEN( JOB, COMPQ, SELECT, N, T, LDT, Q, LDQ, W, M, S,
+     $                   SEP, WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          COMPQ, JOB
+      INTEGER            INFO, LDQ, LDT, LWORK, M, N
+      DOUBLE PRECISION   S, SEP
+*     ..
+*     .. Array Arguments ..
+      LOGICAL            SELECT( * )
+      COMPLEX*16         Q( LDQ, * ), T( LDT, * ), W( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZTRSEN reorders the Schur factorization of a complex matrix
+*  A = Q*T*Q**H, so that a selected cluster of eigenvalues appears in
+*  the leading positions on the diagonal of the upper triangular matrix
+*  T, and the leading columns of Q form an orthonormal basis of the
+*  corresponding right invariant subspace.
+*
+*  Optionally the routine computes the reciprocal condition numbers of
+*  the cluster of eigenvalues and/or the invariant subspace.
+*
+*  Arguments
+*  =========
+*
+*  JOB     (input) CHARACTER*1
+*          Specifies whether condition numbers are required for the
+*          cluster of eigenvalues (S) or the invariant subspace (SEP):
+*          = 'N': none;
+*          = 'E': for eigenvalues only (S);
+*          = 'V': for invariant subspace only (SEP);
+*          = 'B': for both eigenvalues and invariant subspace (S and
+*                 SEP).
+*
+*  COMPQ   (input) CHARACTER*1
+*          = 'V': update the matrix Q of Schur vectors;
+*          = 'N': do not update Q.
+*
+*  SELECT  (input) LOGICAL array, dimension (N)
+*          SELECT specifies the eigenvalues in the selected cluster. To
+*          select the j-th eigenvalue, SELECT(j) must be set to .TRUE..
+*
+*  N       (input) INTEGER
+*          The order of the matrix T. N >= 0.
+*
+*  T       (input/output) COMPLEX*16 array, dimension (LDT,N)
+*          On entry, the upper triangular matrix T.
+*          On exit, T is overwritten by the reordered matrix T, with the
+*          selected eigenvalues as the leading diagonal elements.
+*
+*  LDT     (input) INTEGER
+*          The leading dimension of the array T. LDT >= max(1,N).
+*
+*  Q       (input/output) COMPLEX*16 array, dimension (LDQ,N)
+*          On entry, if COMPQ = 'V', the matrix Q of Schur vectors.
+*          On exit, if COMPQ = 'V', Q has been postmultiplied by the
+*          unitary transformation matrix which reorders T; the leading M
+*          columns of Q form an orthonormal basis for the specified
+*          invariant subspace.
+*          If COMPQ = 'N', Q is not referenced.
+*
+*  LDQ     (input) INTEGER
+*          The leading dimension of the array Q.
+*          LDQ >= 1; and if COMPQ = 'V', LDQ >= N.
+*
+*  W       (output) COMPLEX*16 array, dimension (N)
+*          The reordered eigenvalues of T, in the same order as they
+*          appear on the diagonal of T.
+*
+*  M       (output) INTEGER
+*          The dimension of the specified invariant subspace.
+*          0 <= M <= N.
+*
+*  S       (output) DOUBLE PRECISION
+*          If JOB = 'E' or 'B', S is a lower bound on the reciprocal
+*          condition number for the selected cluster of eigenvalues.
+*          S cannot underestimate the true reciprocal condition number
+*          by more than a factor of sqrt(N). If M = 0 or N, S = 1.
+*          If JOB = 'N' or 'V', S is not referenced.
+*
+*  SEP     (output) DOUBLE PRECISION
+*          If JOB = 'V' or 'B', SEP is the estimated reciprocal
+*          condition number of the specified invariant subspace. If
+*          M = 0 or N, SEP = norm(T).
+*          If JOB = 'N' or 'E', SEP is not referenced.
+*
+*  WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+*          If JOB = 'N', WORK is not referenced.  Otherwise,
+*          on exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.
+*          If JOB = 'N', LWORK >= 1;
+*          if JOB = 'E', LWORK = M*(N-M);
+*          if JOB = 'V' or 'B', LWORK >= 2*M*(N-M).
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Further Details
+*  ===============
+*
+*  ZTRSEN first collects the selected eigenvalues by computing a unitary
+*  transformation Z to move them to the top left corner of T. In other
+*  words, the selected eigenvalues are the eigenvalues of T11 in:
+*
+*                Z'*T*Z = ( T11 T12 ) n1
+*                         (  0  T22 ) n2
+*                            n1  n2
+*
+*  where N = n1+n2 and Z' means the conjugate transpose of Z. The first
+*  n1 columns of Z span the specified invariant subspace of T.
+*
+*  If T has been obtained from the Schur factorization of a matrix
+*  A = Q*T*Q', then the reordered Schur factorization of A is given by
+*  A = (Q*Z)*(Z'*T*Z)*(Q*Z)', and the first n1 columns of Q*Z span the
+*  corresponding invariant subspace of A.
+*
+*  The reciprocal condition number of the average of the eigenvalues of
+*  T11 may be returned in S. S lies between 0 (very badly conditioned)
+*  and 1 (very well conditioned). It is computed as follows. First we
+*  compute R so that
+*
+*                         P = ( I  R ) n1
+*                             ( 0  0 ) n2
+*                               n1 n2
+*
+*  is the projector on the invariant subspace associated with T11.
+*  R is the solution of the Sylvester equation:
+*
+*                        T11*R - R*T22 = T12.
+*
+*  Let F-norm(M) denote the Frobenius-norm of M and 2-norm(M) denote
+*  the two-norm of M. Then S is computed as the lower bound
+*
+*                      (1 + F-norm(R)**2)**(-1/2)
+*
+*  on the reciprocal of 2-norm(P), the true reciprocal condition number.
+*  S cannot underestimate 1 / 2-norm(P) by more than a factor of
+*  sqrt(N).
+*
+*  An approximate error bound for the computed average of the
+*  eigenvalues of T11 is
+*
+*                         EPS * norm(T) / S
+*
+*  where EPS is the machine precision.
+*
+*  The reciprocal condition number of the right invariant subspace
+*  spanned by the first n1 columns of Z (or of Q*Z) is returned in SEP.
+*  SEP is defined as the separation of T11 and T22:
+*
+*                     sep( T11, T22 ) = sigma-min( C )
+*
+*  where sigma-min(C) is the smallest singular value of the
+*  n1*n2-by-n1*n2 matrix
+*
+*     C  = kprod( I(n2), T11 ) - kprod( transpose(T22), I(n1) )
+*
+*  I(m) is an m by m identity matrix, and kprod denotes the Kronecker
+*  product. We estimate sigma-min(C) by the reciprocal of an estimate of
+*  the 1-norm of inverse(C). The true reciprocal 1-norm of inverse(C)
+*  cannot differ from sigma-min(C) by more than a factor of sqrt(n1*n2).
+*
+*  When SEP is small, small changes in T can cause large changes in
+*  the invariant subspace. An approximate bound on the maximum angular
+*  error in the computed right invariant subspace is
+*
+*                      EPS * norm(T) / SEP
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY, WANTBH, WANTQ, WANTS, WANTSP
+      INTEGER            IERR, K, KASE, KS, LWMIN, N1, N2, NN
+      DOUBLE PRECISION   EST, RNORM, SCALE
+*     ..
+*     .. Local Arrays ..
+      DOUBLE PRECISION   RWORK( 1 )
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   ZLANGE
+      EXTERNAL           LSAME, ZLANGE
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZLACON, ZLACPY, ZTREXC, ZTRSYL
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode and test the input parameters.
+*
+      WANTBH = LSAME( JOB, 'B' )
+      WANTS = LSAME( JOB, 'E' ) .OR. WANTBH
+      WANTSP = LSAME( JOB, 'V' ) .OR. WANTBH
+      WANTQ = LSAME( COMPQ, 'V' )
+*
+*     Set M to the number of selected eigenvalues.
+*
+      M = 0
+      DO 10 K = 1, N
+         IF( SELECT( K ) )
+     $      M = M + 1
+   10 CONTINUE
+*
+      N1 = M
+      N2 = N - M
+      NN = N1*N2
+*
+      INFO = 0
+      LQUERY = ( LWORK.EQ.-1 )
+*
+      IF( WANTSP ) THEN
+         LWMIN = MAX( 1, 2*NN )
+      ELSE IF( LSAME( JOB, 'N' ) ) THEN
+         LWMIN = 1
+      ELSE IF( LSAME( JOB, 'E' ) ) THEN
+         LWMIN = MAX( 1, NN )
+      END IF
+*
+      IF( .NOT.LSAME( JOB, 'N' ) .AND. .NOT.WANTS .AND. .NOT.WANTSP )
+     $     THEN
+         INFO = -1
+      ELSE IF( .NOT.LSAME( COMPQ, 'N' ) .AND. .NOT.WANTQ ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDT.LT.MAX( 1, N ) ) THEN
+         INFO = -6
+      ELSE IF( LDQ.LT.1 .OR. ( WANTQ .AND. LDQ.LT.N ) ) THEN
+         INFO = -8
+      ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN
+         INFO = -14
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         WORK( 1 ) = LWMIN
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZTRSEN', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.N .OR. M.EQ.0 ) THEN
+         IF( WANTS )
+     $      S = ONE
+         IF( WANTSP )
+     $      SEP = ZLANGE( '1', N, N, T, LDT, RWORK )
+         GO TO 40
+      END IF
+*
+*     Collect the selected eigenvalues at the top left corner of T.
+*
+      KS = 0
+      DO 20 K = 1, N
+         IF( SELECT( K ) ) THEN
+            KS = KS + 1
+*
+*           Swap the K-th eigenvalue to position KS.
+*
+            IF( K.NE.KS )
+     $         CALL ZTREXC( COMPQ, N, T, LDT, Q, LDQ, K, KS, IERR )
+         END IF
+   20 CONTINUE
+*
+      IF( WANTS ) THEN
+*
+*        Solve the Sylvester equation for R:
+*
+*           T11*R - R*T22 = scale*T12
+*
+         CALL ZLACPY( 'F', N1, N2, T( 1, N1+1 ), LDT, WORK, N1 )
+         CALL ZTRSYL( 'N', 'N', -1, N1, N2, T, LDT, T( N1+1, N1+1 ),
+     $                LDT, WORK, N1, SCALE, IERR )
+*
+*        Estimate the reciprocal of the condition number of the cluster
+*        of eigenvalues.
+*
+         RNORM = ZLANGE( 'F', N1, N2, WORK, N1, RWORK )
+         IF( RNORM.EQ.ZERO ) THEN
+            S = ONE
+         ELSE
+            S = SCALE / ( SQRT( SCALE*SCALE / RNORM+RNORM )*
+     $          SQRT( RNORM ) )
+         END IF
+      END IF
+*
+      IF( WANTSP ) THEN
+*
+*        Estimate sep(T11,T22).
+*
+         EST = ZERO
+         KASE = 0
+   30    CONTINUE
+         CALL ZLACON( NN, WORK( NN+1 ), WORK, EST, KASE )
+         IF( KASE.NE.0 ) THEN
+            IF( KASE.EQ.1 ) THEN
+*
+*              Solve T11*R - R*T22 = scale*X.
+*
+               CALL ZTRSYL( 'N', 'N', -1, N1, N2, T, LDT,
+     $                      T( N1+1, N1+1 ), LDT, WORK, N1, SCALE,
+     $                      IERR )
+            ELSE
+*
+*              Solve T11'*R - R*T22' = scale*X.
+*
+               CALL ZTRSYL( 'C', 'C', -1, N1, N2, T, LDT,
+     $                      T( N1+1, N1+1 ), LDT, WORK, N1, SCALE,
+     $                      IERR )
+            END IF
+            GO TO 30
+         END IF
+*
+         SEP = SCALE / EST
+      END IF
+*
+   40 CONTINUE
+*
+*     Copy reordered eigenvalues to W.
+*
+      DO 50 K = 1, N
+         W( K ) = T( K, K )
+   50 CONTINUE
+*
+      WORK( 1 ) = LWMIN
+*
+      RETURN
+*
+*     End of ZTRSEN
+*
+      END
diff --git a/costa/native/external/lapack/ztrsna.f b/costa/native/external/lapack/ztrsna.f
new file mode 100644
index 000000000..3398e8734
--- /dev/null
+++ b/costa/native/external/lapack/ztrsna.f
@@ -0,0 +1,353 @@
+      SUBROUTINE ZTRSNA( JOB, HOWMNY, SELECT, N, T, LDT, VL, LDVL, VR,
+     $                   LDVR, S, SEP, MM, M, WORK, LDWORK, RWORK,
+     $                   INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          HOWMNY, JOB
+      INTEGER            INFO, LDT, LDVL, LDVR, LDWORK, M, MM, N
+*     ..
+*     .. Array Arguments ..
+      LOGICAL            SELECT( * )
+      DOUBLE PRECISION   RWORK( * ), S( * ), SEP( * )
+      COMPLEX*16         T( LDT, * ), VL( LDVL, * ), VR( LDVR, * ),
+     $                   WORK( LDWORK, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZTRSNA estimates reciprocal condition numbers for specified
+*  eigenvalues and/or right eigenvectors of a complex upper triangular
+*  matrix T (or of any matrix Q*T*Q**H with Q unitary).
+*
+*  Arguments
+*  =========
+*
+*  JOB     (input) CHARACTER*1
+*          Specifies whether condition numbers are required for
+*          eigenvalues (S) or eigenvectors (SEP):
+*          = 'E': for eigenvalues only (S);
+*          = 'V': for eigenvectors only (SEP);
+*          = 'B': for both eigenvalues and eigenvectors (S and SEP).
+*
+*  HOWMNY  (input) CHARACTER*1
+*          = 'A': compute condition numbers for all eigenpairs;
+*          = 'S': compute condition numbers for selected eigenpairs
+*                 specified by the array SELECT.
+*
+*  SELECT  (input) LOGICAL array, dimension (N)
+*          If HOWMNY = 'S', SELECT specifies the eigenpairs for which
+*          condition numbers are required. To select condition numbers
+*          for the j-th eigenpair, SELECT(j) must be set to .TRUE..
+*          If HOWMNY = 'A', SELECT is not referenced.
+*
+*  N       (input) INTEGER
+*          The order of the matrix T. N >= 0.
+*
+*  T       (input) COMPLEX*16 array, dimension (LDT,N)
+*          The upper triangular matrix T.
+*
+*  LDT     (input) INTEGER
+*          The leading dimension of the array T. LDT >= max(1,N).
+*
+*  VL      (input) COMPLEX*16 array, dimension (LDVL,M)
+*          If JOB = 'E' or 'B', VL must contain left eigenvectors of T
+*          (or of any Q*T*Q**H with Q unitary), corresponding to the
+*          eigenpairs specified by HOWMNY and SELECT. The eigenvectors
+*          must be stored in consecutive columns of VL, as returned by
+*          ZHSEIN or ZTREVC.
+*          If JOB = 'V', VL is not referenced.
+*
+*  LDVL    (input) INTEGER
+*          The leading dimension of the array VL.
+*          LDVL >= 1; and if JOB = 'E' or 'B', LDVL >= N.
+*
+*  VR      (input) COMPLEX*16 array, dimension (LDVR,M)
+*          If JOB = 'E' or 'B', VR must contain right eigenvectors of T
+*          (or of any Q*T*Q**H with Q unitary), corresponding to the
+*          eigenpairs specified by HOWMNY and SELECT. The eigenvectors
+*          must be stored in consecutive columns of VR, as returned by
+*          ZHSEIN or ZTREVC.
+*          If JOB = 'V', VR is not referenced.
+*
+*  LDVR    (input) INTEGER
+*          The leading dimension of the array VR.
+*          LDVR >= 1; and if JOB = 'E' or 'B', LDVR >= N.
+*
+*  S       (output) DOUBLE PRECISION array, dimension (MM)
+*          If JOB = 'E' or 'B', the reciprocal condition numbers of the
+*          selected eigenvalues, stored in consecutive elements of the
+*          array. Thus S(j), SEP(j), and the j-th columns of VL and VR
+*          all correspond to the same eigenpair (but not in general the
+*          j-th eigenpair, unless all eigenpairs are selected).
+*          If JOB = 'V', S is not referenced.
+*
+*  SEP     (output) DOUBLE PRECISION array, dimension (MM)
+*          If JOB = 'V' or 'B', the estimated reciprocal condition
+*          numbers of the selected eigenvectors, stored in consecutive
+*          elements of the array.
+*          If JOB = 'E', SEP is not referenced.
+*
+*  MM      (input) INTEGER
+*          The number of elements in the arrays S (if JOB = 'E' or 'B')
+*           and/or SEP (if JOB = 'V' or 'B'). MM >= M.
+*
+*  M       (output) INTEGER
+*          The number of elements of the arrays S and/or SEP actually
+*          used to store the estimated condition numbers.
+*          If HOWMNY = 'A', M is set to N.
+*
+*  WORK    (workspace) COMPLEX*16 array, dimension (LDWORK,N+1)
+*          If JOB = 'E', WORK is not referenced.
+*
+*  LDWORK  (input) INTEGER
+*          The leading dimension of the array WORK.
+*          LDWORK >= 1; and if JOB = 'V' or 'B', LDWORK >= N.
+*
+*  RWORK   (workspace) DOUBLE PRECISION array, dimension (N)
+*          If JOB = 'E', RWORK is not referenced.
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*
+*  Further Details
+*  ===============
+*
+*  The reciprocal of the condition number of an eigenvalue lambda is
+*  defined as
+*
+*          S(lambda) = |v'*u| / (norm(u)*norm(v))
+*
+*  where u and v are the right and left eigenvectors of T corresponding
+*  to lambda; v' denotes the conjugate transpose of v, and norm(u)
+*  denotes the Euclidean norm. These reciprocal condition numbers always
+*  lie between zero (very badly conditioned) and one (very well
+*  conditioned). If n = 1, S(lambda) is defined to be 1.
+*
+*  An approximate error bound for a computed eigenvalue W(i) is given by
+*
+*                      EPS * norm(T) / S(i)
+*
+*  where EPS is the machine precision.
+*
+*  The reciprocal of the condition number of the right eigenvector u
+*  corresponding to lambda is defined as follows. Suppose
+*
+*              T = ( lambda  c  )
+*                  (   0    T22 )
+*
+*  Then the reciprocal condition number is
+*
+*          SEP( lambda, T22 ) = sigma-min( T22 - lambda*I )
+*
+*  where sigma-min denotes the smallest singular value. We approximate
+*  the smallest singular value by the reciprocal of an estimate of the
+*  one-norm of the inverse of T22 - lambda*I. If n = 1, SEP(1) is
+*  defined to be abs(T(1,1)).
+*
+*  An approximate error bound for a computed right eigenvector VR(i)
+*  is given by
+*
+*                      EPS * norm(T) / SEP(i)
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ZERO, ONE
+      PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D0+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            SOMCON, WANTBH, WANTS, WANTSP
+      CHARACTER          NORMIN
+      INTEGER            I, IERR, IX, J, K, KASE, KS
+      DOUBLE PRECISION   BIGNUM, EPS, EST, LNRM, RNRM, SCALE, SMLNUM,
+     $                   XNORM
+      COMPLEX*16         CDUM, PROD
+*     ..
+*     .. Local Arrays ..
+      COMPLEX*16         DUMMY( 1 )
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            IZAMAX
+      DOUBLE PRECISION   DLAMCH, DZNRM2
+      COMPLEX*16         ZDOTC
+      EXTERNAL           LSAME, IZAMAX, DLAMCH, DZNRM2, ZDOTC
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZDRSCL, ZLACON, ZLACPY, ZLATRS, ZTREXC
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, DIMAG, MAX
+*     ..
+*     .. Statement Functions ..
+      DOUBLE PRECISION   CABS1
+*     ..
+*     .. Statement Function definitions ..
+      CABS1( CDUM ) = ABS( DBLE( CDUM ) ) + ABS( DIMAG( CDUM ) )
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode and test the input parameters
+*
+      WANTBH = LSAME( JOB, 'B' )
+      WANTS = LSAME( JOB, 'E' ) .OR. WANTBH
+      WANTSP = LSAME( JOB, 'V' ) .OR. WANTBH
+*
+      SOMCON = LSAME( HOWMNY, 'S' )
+*
+*     Set M to the number of eigenpairs for which condition numbers are
+*     to be computed.
+*
+      IF( SOMCON ) THEN
+         M = 0
+         DO 10 J = 1, N
+            IF( SELECT( J ) )
+     $         M = M + 1
+   10    CONTINUE
+      ELSE
+         M = N
+      END IF
+*
+      INFO = 0
+      IF( .NOT.WANTS .AND. .NOT.WANTSP ) THEN
+         INFO = -1
+      ELSE IF( .NOT.LSAME( HOWMNY, 'A' ) .AND. .NOT.SOMCON ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDT.LT.MAX( 1, N ) ) THEN
+         INFO = -6
+      ELSE IF( LDVL.LT.1 .OR. ( WANTS .AND. LDVL.LT.N ) ) THEN
+         INFO = -8
+      ELSE IF( LDVR.LT.1 .OR. ( WANTS .AND. LDVR.LT.N ) ) THEN
+         INFO = -10
+      ELSE IF( MM.LT.M ) THEN
+         INFO = -13
+      ELSE IF( LDWORK.LT.1 .OR. ( WANTSP .AND. LDWORK.LT.N ) ) THEN
+         INFO = -16
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZTRSNA', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      IF( N.EQ.1 ) THEN
+         IF( SOMCON ) THEN
+            IF( .NOT.SELECT( 1 ) )
+     $         RETURN
+         END IF
+         IF( WANTS )
+     $      S( 1 ) = ONE
+         IF( WANTSP )
+     $      SEP( 1 ) = ABS( T( 1, 1 ) )
+         RETURN
+      END IF
+*
+*     Get machine constants
+*
+      EPS = DLAMCH( 'P' )
+      SMLNUM = DLAMCH( 'S' ) / EPS
+      BIGNUM = ONE / SMLNUM
+      CALL DLABAD( SMLNUM, BIGNUM )
+*
+      KS = 1
+      DO 50 K = 1, N
+*
+         IF( SOMCON ) THEN
+            IF( .NOT.SELECT( K ) )
+     $         GO TO 50
+         END IF
+*
+         IF( WANTS ) THEN
+*
+*           Compute the reciprocal condition number of the k-th
+*           eigenvalue.
+*
+            PROD = ZDOTC( N, VR( 1, KS ), 1, VL( 1, KS ), 1 )
+            RNRM = DZNRM2( N, VR( 1, KS ), 1 )
+            LNRM = DZNRM2( N, VL( 1, KS ), 1 )
+            S( KS ) = ABS( PROD ) / ( RNRM*LNRM )
+*
+         END IF
+*
+         IF( WANTSP ) THEN
+*
+*           Estimate the reciprocal condition number of the k-th
+*           eigenvector.
+*
+*           Copy the matrix T to the array WORK and swap the k-th
+*           diagonal element to the (1,1) position.
+*
+            CALL ZLACPY( 'Full', N, N, T, LDT, WORK, LDWORK )
+            CALL ZTREXC( 'No Q', N, WORK, LDWORK, DUMMY, 1, K, 1, IERR )
+*
+*           Form  C = T22 - lambda*I in WORK(2:N,2:N).
+*
+            DO 20 I = 2, N
+               WORK( I, I ) = WORK( I, I ) - WORK( 1, 1 )
+   20       CONTINUE
+*
+*           Estimate a lower bound for the 1-norm of inv(C'). The 1st
+*           and (N+1)th columns of WORK are used to store work vectors.
+*
+            SEP( KS ) = ZERO
+            EST = ZERO
+            KASE = 0
+            NORMIN = 'N'
+   30       CONTINUE
+            CALL ZLACON( N-1, WORK( 1, N+1 ), WORK, EST, KASE )
+*
+            IF( KASE.NE.0 ) THEN
+               IF( KASE.EQ.1 ) THEN
+*
+*                 Solve C'*x = scale*b
+*
+                  CALL ZLATRS( 'Upper', 'Conjugate transpose',
+     $                         'Nonunit', NORMIN, N-1, WORK( 2, 2 ),
+     $                         LDWORK, WORK, SCALE, RWORK, IERR )
+               ELSE
+*
+*                 Solve C*x = scale*b
+*
+                  CALL ZLATRS( 'Upper', 'No transpose', 'Nonunit',
+     $                         NORMIN, N-1, WORK( 2, 2 ), LDWORK, WORK,
+     $                         SCALE, RWORK, IERR )
+               END IF
+               NORMIN = 'Y'
+               IF( SCALE.NE.ONE ) THEN
+*
+*                 Multiply by 1/SCALE if doing so will not cause
+*                 overflow.
+*
+                  IX = IZAMAX( N-1, WORK, 1 )
+                  XNORM = CABS1( WORK( IX, 1 ) )
+                  IF( SCALE.LT.XNORM*SMLNUM .OR. SCALE.EQ.ZERO )
+     $               GO TO 40
+                  CALL ZDRSCL( N, SCALE, WORK, 1 )
+               END IF
+               GO TO 30
+            END IF
+*
+            SEP( KS ) = ONE / MAX( EST, SMLNUM )
+         END IF
+*
+   40    CONTINUE
+         KS = KS + 1
+   50 CONTINUE
+      RETURN
+*
+*     End of ZTRSNA
+*
+      END
diff --git a/costa/native/external/lapack/ztrsyl.f b/costa/native/external/lapack/ztrsyl.f
new file mode 100644
index 000000000..8fcbb3aaf
--- /dev/null
+++ b/costa/native/external/lapack/ztrsyl.f
@@ -0,0 +1,368 @@
+      SUBROUTINE ZTRSYL( TRANA, TRANB, ISGN, M, N, A, LDA, B, LDB, C,
+     $                   LDC, SCALE, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          TRANA, TRANB
+      INTEGER            INFO, ISGN, LDA, LDB, LDC, M, N
+      DOUBLE PRECISION   SCALE
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), B( LDB, * ), C( LDC, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZTRSYL solves the complex Sylvester matrix equation:
+*
+*     op(A)*X + X*op(B) = scale*C or
+*     op(A)*X - X*op(B) = scale*C,
+*
+*  where op(A) = A or A**H, and A and B are both upper triangular. A is
+*  M-by-M and B is N-by-N; the right hand side C and the solution X are
+*  M-by-N; and scale is an output scale factor, set <= 1 to avoid
+*  overflow in X.
+*
+*  Arguments
+*  =========
+*
+*  TRANA   (input) CHARACTER*1
+*          Specifies the option op(A):
+*          = 'N': op(A) = A    (No transpose)
+*          = 'C': op(A) = A**H (Conjugate transpose)
+*
+*  TRANB   (input) CHARACTER*1
+*          Specifies the option op(B):
+*          = 'N': op(B) = B    (No transpose)
+*          = 'C': op(B) = B**H (Conjugate transpose)
+*
+*  ISGN    (input) INTEGER
+*          Specifies the sign in the equation:
+*          = +1: solve op(A)*X + X*op(B) = scale*C
+*          = -1: solve op(A)*X - X*op(B) = scale*C
+*
+*  M       (input) INTEGER
+*          The order of the matrix A, and the number of rows in the
+*          matrices X and C. M >= 0.
+*
+*  N       (input) INTEGER
+*          The order of the matrix B, and the number of columns in the
+*          matrices X and C. N >= 0.
+*
+*  A       (input) COMPLEX*16 array, dimension (LDA,M)
+*          The upper triangular matrix A.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,M).
+*
+*  B       (input) COMPLEX*16 array, dimension (LDB,N)
+*          The upper triangular matrix B.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B. LDB >= max(1,N).
+*
+*  C       (input/output) COMPLEX*16 array, dimension (LDC,N)
+*          On entry, the M-by-N right hand side matrix C.
+*          On exit, C is overwritten by the solution matrix X.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C. LDC >= max(1,M)
+*
+*  SCALE   (output) DOUBLE PRECISION
+*          The scale factor, scale, set <= 1 to avoid overflow in X.
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*          = 1: A and B have common or very close eigenvalues; perturbed
+*               values were used to solve the equation (but the matrices
+*               A and B are unchanged).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      DOUBLE PRECISION   ONE
+      PARAMETER          ( ONE = 1.0D+0 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOTRNA, NOTRNB
+      INTEGER            J, K, L
+      DOUBLE PRECISION   BIGNUM, DA11, DB, EPS, SCALOC, SGN, SMIN,
+     $                   SMLNUM
+      COMPLEX*16         A11, SUML, SUMR, VEC, X11
+*     ..
+*     .. Local Arrays ..
+      DOUBLE PRECISION   DUM( 1 )
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DLAMCH, ZLANGE
+      COMPLEX*16         ZDOTC, ZDOTU, ZLADIV
+      EXTERNAL           LSAME, DLAMCH, ZLANGE, ZDOTC, ZDOTU, ZLADIV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZDSCAL
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, DBLE, DCMPLX, DCONJG, DIMAG, MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode and Test input parameters
+*
+      NOTRNA = LSAME( TRANA, 'N' )
+      NOTRNB = LSAME( TRANB, 'N' )
+*
+      INFO = 0
+      IF( .NOT.NOTRNA .AND. .NOT.LSAME( TRANA, 'T' ) .AND. .NOT.
+     $    LSAME( TRANA, 'C' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.NOTRNB .AND. .NOT.LSAME( TRANB, 'T' ) .AND. .NOT.
+     $         LSAME( TRANB, 'C' ) ) THEN
+         INFO = -2
+      ELSE IF( ISGN.NE.1 .AND. ISGN.NE.-1 ) THEN
+         INFO = -3
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -7
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -9
+      ELSE IF( LDC.LT.MAX( 1, M ) ) THEN
+         INFO = -11
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZTRSYL', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 )
+     $   RETURN
+*
+*     Set constants to control overflow
+*
+      EPS = DLAMCH( 'P' )
+      SMLNUM = DLAMCH( 'S' )
+      BIGNUM = ONE / SMLNUM
+      CALL DLABAD( SMLNUM, BIGNUM )
+      SMLNUM = SMLNUM*DBLE( M*N ) / EPS
+      BIGNUM = ONE / SMLNUM
+      SMIN = MAX( SMLNUM, EPS*ZLANGE( 'M', M, M, A, LDA, DUM ),
+     $       EPS*ZLANGE( 'M', N, N, B, LDB, DUM ) )
+      SCALE = ONE
+      SGN = ISGN
+*
+      IF( NOTRNA .AND. NOTRNB ) THEN
+*
+*        Solve    A*X + ISGN*X*B = scale*C.
+*
+*        The (K,L)th block of X is determined starting from
+*        bottom-left corner column by column by
+*
+*            A(K,K)*X(K,L) + ISGN*X(K,L)*B(L,L) = C(K,L) - R(K,L)
+*
+*        Where
+*                    M                        L-1
+*          R(K,L) = SUM [A(K,I)*X(I,L)] +ISGN*SUM [X(K,J)*B(J,L)].
+*                  I=K+1                      J=1
+*
+         DO 30 L = 1, N
+            DO 20 K = M, 1, -1
+*
+               SUML = ZDOTU( M-K, A( K, MIN( K+1, M ) ), LDA,
+     $                C( MIN( K+1, M ), L ), 1 )
+               SUMR = ZDOTU( L-1, C( K, 1 ), LDC, B( 1, L ), 1 )
+               VEC = C( K, L ) - ( SUML+SGN*SUMR )
+*
+               SCALOC = ONE
+               A11 = A( K, K ) + SGN*B( L, L )
+               DA11 = ABS( DBLE( A11 ) ) + ABS( DIMAG( A11 ) )
+               IF( DA11.LE.SMIN ) THEN
+                  A11 = SMIN
+                  DA11 = SMIN
+                  INFO = 1
+               END IF
+               DB = ABS( DBLE( VEC ) ) + ABS( DIMAG( VEC ) )
+               IF( DA11.LT.ONE .AND. DB.GT.ONE ) THEN
+                  IF( DB.GT.BIGNUM*DA11 )
+     $               SCALOC = ONE / DB
+               END IF
+               X11 = ZLADIV( VEC*DCMPLX( SCALOC ), A11 )
+*
+               IF( SCALOC.NE.ONE ) THEN
+                  DO 10 J = 1, N
+                     CALL ZDSCAL( M, SCALOC, C( 1, J ), 1 )
+   10             CONTINUE
+                  SCALE = SCALE*SCALOC
+               END IF
+               C( K, L ) = X11
+*
+   20       CONTINUE
+   30    CONTINUE
+*
+      ELSE IF( .NOT.NOTRNA .AND. NOTRNB ) THEN
+*
+*        Solve    A' *X + ISGN*X*B = scale*C.
+*
+*        The (K,L)th block of X is determined starting from
+*        upper-left corner column by column by
+*
+*            A'(K,K)*X(K,L) + ISGN*X(K,L)*B(L,L) = C(K,L) - R(K,L)
+*
+*        Where
+*                   K-1                         L-1
+*          R(K,L) = SUM [A'(I,K)*X(I,L)] + ISGN*SUM [X(K,J)*B(J,L)]
+*                   I=1                         J=1
+*
+         DO 60 L = 1, N
+            DO 50 K = 1, M
+*
+               SUML = ZDOTC( K-1, A( 1, K ), 1, C( 1, L ), 1 )
+               SUMR = ZDOTU( L-1, C( K, 1 ), LDC, B( 1, L ), 1 )
+               VEC = C( K, L ) - ( SUML+SGN*SUMR )
+*
+               SCALOC = ONE
+               A11 = DCONJG( A( K, K ) ) + SGN*B( L, L )
+               DA11 = ABS( DBLE( A11 ) ) + ABS( DIMAG( A11 ) )
+               IF( DA11.LE.SMIN ) THEN
+                  A11 = SMIN
+                  DA11 = SMIN
+                  INFO = 1
+               END IF
+               DB = ABS( DBLE( VEC ) ) + ABS( DIMAG( VEC ) )
+               IF( DA11.LT.ONE .AND. DB.GT.ONE ) THEN
+                  IF( DB.GT.BIGNUM*DA11 )
+     $               SCALOC = ONE / DB
+               END IF
+*
+               X11 = ZLADIV( VEC*DCMPLX( SCALOC ), A11 )
+*
+               IF( SCALOC.NE.ONE ) THEN
+                  DO 40 J = 1, N
+                     CALL ZDSCAL( M, SCALOC, C( 1, J ), 1 )
+   40             CONTINUE
+                  SCALE = SCALE*SCALOC
+               END IF
+               C( K, L ) = X11
+*
+   50       CONTINUE
+   60    CONTINUE
+*
+      ELSE IF( .NOT.NOTRNA .AND. .NOT.NOTRNB ) THEN
+*
+*        Solve    A'*X + ISGN*X*B' = C.
+*
+*        The (K,L)th block of X is determined starting from
+*        upper-right corner column by column by
+*
+*            A'(K,K)*X(K,L) + ISGN*X(K,L)*B'(L,L) = C(K,L) - R(K,L)
+*
+*        Where
+*                    K-1
+*           R(K,L) = SUM [A'(I,K)*X(I,L)] +
+*                    I=1
+*                           N
+*                     ISGN*SUM [X(K,J)*B'(L,J)].
+*                          J=L+1
+*
+         DO 90 L = N, 1, -1
+            DO 80 K = 1, M
+*
+               SUML = ZDOTC( K-1, A( 1, K ), 1, C( 1, L ), 1 )
+               SUMR = ZDOTC( N-L, C( K, MIN( L+1, N ) ), LDC,
+     $                B( L, MIN( L+1, N ) ), LDB )
+               VEC = C( K, L ) - ( SUML+SGN*DCONJG( SUMR ) )
+*
+               SCALOC = ONE
+               A11 = DCONJG( A( K, K )+SGN*B( L, L ) )
+               DA11 = ABS( DBLE( A11 ) ) + ABS( DIMAG( A11 ) )
+               IF( DA11.LE.SMIN ) THEN
+                  A11 = SMIN
+                  DA11 = SMIN
+                  INFO = 1
+               END IF
+               DB = ABS( DBLE( VEC ) ) + ABS( DIMAG( VEC ) )
+               IF( DA11.LT.ONE .AND. DB.GT.ONE ) THEN
+                  IF( DB.GT.BIGNUM*DA11 )
+     $               SCALOC = ONE / DB
+               END IF
+*
+               X11 = ZLADIV( VEC*DCMPLX( SCALOC ), A11 )
+*
+               IF( SCALOC.NE.ONE ) THEN
+                  DO 70 J = 1, N
+                     CALL ZDSCAL( M, SCALOC, C( 1, J ), 1 )
+   70             CONTINUE
+                  SCALE = SCALE*SCALOC
+               END IF
+               C( K, L ) = X11
+*
+   80       CONTINUE
+   90    CONTINUE
+*
+      ELSE IF( NOTRNA .AND. .NOT.NOTRNB ) THEN
+*
+*        Solve    A*X + ISGN*X*B' = C.
+*
+*        The (K,L)th block of X is determined starting from
+*        bottom-left corner column by column by
+*
+*           A(K,K)*X(K,L) + ISGN*X(K,L)*B'(L,L) = C(K,L) - R(K,L)
+*
+*        Where
+*                    M                          N
+*          R(K,L) = SUM [A(K,I)*X(I,L)] + ISGN*SUM [X(K,J)*B'(L,J)]
+*                  I=K+1                      J=L+1
+*
+         DO 120 L = N, 1, -1
+            DO 110 K = M, 1, -1
+*
+               SUML = ZDOTU( M-K, A( K, MIN( K+1, M ) ), LDA,
+     $                C( MIN( K+1, M ), L ), 1 )
+               SUMR = ZDOTC( N-L, C( K, MIN( L+1, N ) ), LDC,
+     $                B( L, MIN( L+1, N ) ), LDB )
+               VEC = C( K, L ) - ( SUML+SGN*DCONJG( SUMR ) )
+*
+               SCALOC = ONE
+               A11 = A( K, K ) + SGN*DCONJG( B( L, L ) )
+               DA11 = ABS( DBLE( A11 ) ) + ABS( DIMAG( A11 ) )
+               IF( DA11.LE.SMIN ) THEN
+                  A11 = SMIN
+                  DA11 = SMIN
+                  INFO = 1
+               END IF
+               DB = ABS( DBLE( VEC ) ) + ABS( DIMAG( VEC ) )
+               IF( DA11.LT.ONE .AND. DB.GT.ONE ) THEN
+                  IF( DB.GT.BIGNUM*DA11 )
+     $               SCALOC = ONE / DB
+               END IF
+*
+               X11 = ZLADIV( VEC*DCMPLX( SCALOC ), A11 )
+*
+               IF( SCALOC.NE.ONE ) THEN
+                  DO 100 J = 1, N
+                     CALL ZDSCAL( M, SCALOC, C( 1, J ), 1 )
+  100             CONTINUE
+                  SCALE = SCALE*SCALOC
+               END IF
+               C( K, L ) = X11
+*
+  110       CONTINUE
+  120    CONTINUE
+*
+      END IF
+*
+      RETURN
+*
+*     End of ZTRSYL
+*
+      END
diff --git a/costa/native/external/lapack/ztrti2.f b/costa/native/external/lapack/ztrti2.f
new file mode 100644
index 000000000..f8bab3b18
--- /dev/null
+++ b/costa/native/external/lapack/ztrti2.f
@@ -0,0 +1,147 @@
+      SUBROUTINE ZTRTI2( UPLO, DIAG, N, A, LDA, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIAG, UPLO
+      INTEGER            INFO, LDA, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZTRTI2 computes the inverse of a complex upper or lower triangular
+*  matrix.
+*
+*  This is the Level 2 BLAS version of the algorithm.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          Specifies whether the matrix A is upper or lower triangular.
+*          = 'U':  Upper triangular
+*          = 'L':  Lower triangular
+*
+*  DIAG    (input) CHARACTER*1
+*          Specifies whether or not the matrix A is unit triangular.
+*          = 'N':  Non-unit triangular
+*          = 'U':  Unit triangular
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+*          On entry, the triangular matrix A.  If UPLO = 'U', the
+*          leading n by n upper triangular part of the array A contains
+*          the upper triangular matrix, and the strictly lower
+*          triangular part of A is not referenced.  If UPLO = 'L', the
+*          leading n by n lower triangular part of the array A contains
+*          the lower triangular matrix, and the strictly upper
+*          triangular part of A is not referenced.  If DIAG = 'U', the
+*          diagonal elements of A are also not referenced and are
+*          assumed to be 1.
+*
+*          On exit, the (triangular) inverse of the original matrix, in
+*          the same storage format.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -k, the k-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX*16         ONE
+      PARAMETER          ( ONE = ( 1.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOUNIT, UPPER
+      INTEGER            J
+      COMPLEX*16         AJJ
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZSCAL, ZTRMV
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      NOUNIT = LSAME( DIAG, 'N' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZTRTI2', -INFO )
+         RETURN
+      END IF
+*
+      IF( UPPER ) THEN
+*
+*        Compute inverse of upper triangular matrix.
+*
+         DO 10 J = 1, N
+            IF( NOUNIT ) THEN
+               A( J, J ) = ONE / A( J, J )
+               AJJ = -A( J, J )
+            ELSE
+               AJJ = -ONE
+            END IF
+*
+*           Compute elements 1:j-1 of j-th column.
+*
+            CALL ZTRMV( 'Upper', 'No transpose', DIAG, J-1, A, LDA,
+     $                  A( 1, J ), 1 )
+            CALL ZSCAL( J-1, AJJ, A( 1, J ), 1 )
+   10    CONTINUE
+      ELSE
+*
+*        Compute inverse of lower triangular matrix.
+*
+         DO 20 J = N, 1, -1
+            IF( NOUNIT ) THEN
+               A( J, J ) = ONE / A( J, J )
+               AJJ = -A( J, J )
+            ELSE
+               AJJ = -ONE
+            END IF
+            IF( J.LT.N ) THEN
+*
+*              Compute elements j+1:n of j-th column.
+*
+               CALL ZTRMV( 'Lower', 'No transpose', DIAG, N-J,
+     $                     A( J+1, J+1 ), LDA, A( J+1, J ), 1 )
+               CALL ZSCAL( N-J, AJJ, A( J+1, J ), 1 )
+            END IF
+   20    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of ZTRTI2
+*
+      END
diff --git a/costa/native/external/lapack/ztrtri.f b/costa/native/external/lapack/ztrtri.f
new file mode 100644
index 000000000..53c2356d4
--- /dev/null
+++ b/costa/native/external/lapack/ztrtri.f
@@ -0,0 +1,178 @@
+      SUBROUTINE ZTRTRI( UPLO, DIAG, N, A, LDA, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIAG, UPLO
+      INTEGER            INFO, LDA, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZTRTRI computes the inverse of a complex upper or lower triangular
+*  matrix A.
+*
+*  This is the Level 3 BLAS version of the algorithm.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  A is upper triangular;
+*          = 'L':  A is lower triangular.
+*
+*  DIAG    (input) CHARACTER*1
+*          = 'N':  A is non-unit triangular;
+*          = 'U':  A is unit triangular.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+*          On entry, the triangular matrix A.  If UPLO = 'U', the
+*          leading N-by-N upper triangular part of the array A contains
+*          the upper triangular matrix, and the strictly lower
+*          triangular part of A is not referenced.  If UPLO = 'L', the
+*          leading N-by-N lower triangular part of the array A contains
+*          the lower triangular matrix, and the strictly upper
+*          triangular part of A is not referenced.  If DIAG = 'U', the
+*          diagonal elements of A are also not referenced and are
+*          assumed to be 1.
+*          On exit, the (triangular) inverse of the original matrix, in
+*          the same storage format.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*          > 0: if INFO = i, A(i,i) is exactly zero.  The triangular
+*               matrix is singular and its inverse can not be computed.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX*16         ONE, ZERO
+      PARAMETER          ( ONE = ( 1.0D+0, 0.0D+0 ),
+     $                   ZERO = ( 0.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOUNIT, UPPER
+      INTEGER            J, JB, NB, NN
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           LSAME, ILAENV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZTRMM, ZTRSM, ZTRTI2
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      NOUNIT = LSAME( DIAG, 'N' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZTRTRI', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Check for singularity if non-unit.
+*
+      IF( NOUNIT ) THEN
+         DO 10 INFO = 1, N
+            IF( A( INFO, INFO ).EQ.ZERO )
+     $         RETURN
+   10    CONTINUE
+         INFO = 0
+      END IF
+*
+*     Determine the block size for this environment.
+*
+      NB = ILAENV( 1, 'ZTRTRI', UPLO // DIAG, N, -1, -1, -1 )
+      IF( NB.LE.1 .OR. NB.GE.N ) THEN
+*
+*        Use unblocked code
+*
+         CALL ZTRTI2( UPLO, DIAG, N, A, LDA, INFO )
+      ELSE
+*
+*        Use blocked code
+*
+         IF( UPPER ) THEN
+*
+*           Compute inverse of upper triangular matrix
+*
+            DO 20 J = 1, N, NB
+               JB = MIN( NB, N-J+1 )
+*
+*              Compute rows 1:j-1 of current block column
+*
+               CALL ZTRMM( 'Left', 'Upper', 'No transpose', DIAG, J-1,
+     $                     JB, ONE, A, LDA, A( 1, J ), LDA )
+               CALL ZTRSM( 'Right', 'Upper', 'No transpose', DIAG, J-1,
+     $                     JB, -ONE, A( J, J ), LDA, A( 1, J ), LDA )
+*
+*              Compute inverse of current diagonal block
+*
+               CALL ZTRTI2( 'Upper', DIAG, JB, A( J, J ), LDA, INFO )
+   20       CONTINUE
+         ELSE
+*
+*           Compute inverse of lower triangular matrix
+*
+            NN = ( ( N-1 ) / NB )*NB + 1
+            DO 30 J = NN, 1, -NB
+               JB = MIN( NB, N-J+1 )
+               IF( J+JB.LE.N ) THEN
+*
+*                 Compute rows j+jb:n of current block column
+*
+                  CALL ZTRMM( 'Left', 'Lower', 'No transpose', DIAG,
+     $                        N-J-JB+1, JB, ONE, A( J+JB, J+JB ), LDA,
+     $                        A( J+JB, J ), LDA )
+                  CALL ZTRSM( 'Right', 'Lower', 'No transpose', DIAG,
+     $                        N-J-JB+1, JB, -ONE, A( J, J ), LDA,
+     $                        A( J+JB, J ), LDA )
+               END IF
+*
+*              Compute inverse of current diagonal block
+*
+               CALL ZTRTI2( 'Lower', DIAG, JB, A( J, J ), LDA, INFO )
+   30       CONTINUE
+         END IF
+      END IF
+*
+      RETURN
+*
+*     End of ZTRTRI
+*
+      END
diff --git a/costa/native/external/lapack/ztrtrs.f b/costa/native/external/lapack/ztrtrs.f
new file mode 100644
index 000000000..d3d8f2be6
--- /dev/null
+++ b/costa/native/external/lapack/ztrtrs.f
@@ -0,0 +1,149 @@
+      SUBROUTINE ZTRTRS( UPLO, TRANS, DIAG, N, NRHS, A, LDA, B, LDB,
+     $                   INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          DIAG, TRANS, UPLO
+      INTEGER            INFO, LDA, LDB, N, NRHS
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), B( LDB, * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZTRTRS solves a triangular system of the form
+*
+*     A * X = B,  A**T * X = B,  or  A**H * X = B,
+*
+*  where A is a triangular matrix of order N, and B is an N-by-NRHS
+*  matrix.  A check is made to verify that A is nonsingular.
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U':  A is upper triangular;
+*          = 'L':  A is lower triangular.
+*
+*  TRANS   (input) CHARACTER*1
+*          Specifies the form of the system of equations:
+*          = 'N':  A * X = B     (No transpose)
+*          = 'T':  A**T * X = B  (Transpose)
+*          = 'C':  A**H * X = B  (Conjugate transpose)
+*
+*  DIAG    (input) CHARACTER*1
+*          = 'N':  A is non-unit triangular;
+*          = 'U':  A is unit triangular.
+*
+*  N       (input) INTEGER
+*          The order of the matrix A.  N >= 0.
+*
+*  NRHS    (input) INTEGER
+*          The number of right hand sides, i.e., the number of columns
+*          of the matrix B.  NRHS >= 0.
+*
+*  A       (input) COMPLEX*16 array, dimension (LDA,N)
+*          The triangular matrix A.  If UPLO = 'U', the leading N-by-N
+*          upper triangular part of the array A contains the upper
+*          triangular matrix, and the strictly lower triangular part of
+*          A is not referenced.  If UPLO = 'L', the leading N-by-N lower
+*          triangular part of the array A contains the lower triangular
+*          matrix, and the strictly upper triangular part of A is not
+*          referenced.  If DIAG = 'U', the diagonal elements of A are
+*          also not referenced and are assumed to be 1.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,N).
+*
+*  B       (input/output) COMPLEX*16 array, dimension (LDB,NRHS)
+*          On entry, the right hand side matrix B.
+*          On exit, if INFO = 0, the solution matrix X.
+*
+*  LDB     (input) INTEGER
+*          The leading dimension of the array B.  LDB >= max(1,N).
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*          > 0: if INFO = i, the i-th diagonal element of A is zero,
+*               indicating that the matrix is singular and the solutions
+*               X have not been computed.
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX*16         ZERO, ONE
+      PARAMETER          ( ZERO = ( 0.0D+0, 0.0D+0 ),
+     $                   ONE = ( 1.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            NOUNIT
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZTRSM
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      NOUNIT = LSAME( DIAG, 'N' )
+      IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.LSAME( TRANS, 'N' ) .AND. .NOT.
+     $         LSAME( TRANS, 'T' ) .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( NRHS.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -7
+      ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
+         INFO = -9
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZTRTRS', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+*     Check for singularity.
+*
+      IF( NOUNIT ) THEN
+         DO 10 INFO = 1, N
+            IF( A( INFO, INFO ).EQ.ZERO )
+     $         RETURN
+   10    CONTINUE
+      END IF
+      INFO = 0
+*
+*     Solve A * x = b,  A**T * x = b,  or  A**H * x = b.
+*
+      CALL ZTRSM( 'Left', UPLO, TRANS, DIAG, N, NRHS, ONE, A, LDA, B,
+     $            LDB )
+*
+      RETURN
+*
+*     End of ZTRTRS
+*
+      END
diff --git a/costa/native/external/lapack/ztzrqf.f b/costa/native/external/lapack/ztzrqf.f
new file mode 100644
index 000000000..8f4278df8
--- /dev/null
+++ b/costa/native/external/lapack/ztzrqf.f
@@ -0,0 +1,174 @@
+      SUBROUTINE ZTZRQF( M, N, A, LDA, TAU, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, M, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), TAU( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  This routine is deprecated and has been replaced by routine ZTZRZF.
+*
+*  ZTZRQF reduces the M-by-N ( M<=N ) complex upper trapezoidal matrix A
+*  to upper triangular form by means of unitary transformations.
+*
+*  The upper trapezoidal matrix A is factored as
+*
+*     A = ( R  0 ) * Z,
+*
+*  where Z is an N-by-N unitary matrix and R is an M-by-M upper
+*  triangular matrix.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= M.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+*          On entry, the leading M-by-N upper trapezoidal part of the
+*          array A must contain the matrix to be factorized.
+*          On exit, the leading M-by-M upper triangular part of A
+*          contains the upper triangular matrix R, and elements M+1 to
+*          N of the first M rows of A, with the array TAU, represent the
+*          unitary matrix Z as a product of M elementary reflectors.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  TAU     (output) COMPLEX*16 array, dimension (M)
+*          The scalar factors of the elementary reflectors.
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*
+*  Further Details
+*  ===============
+*
+*  The  factorization is obtained by Householder's method.  The kth
+*  transformation matrix, Z( k ), whose conjugate transpose is used to
+*  introduce zeros into the (m - k + 1)th row of A, is given in the form
+*
+*     Z( k ) = ( I     0   ),
+*              ( 0  T( k ) )
+*
+*  where
+*
+*     T( k ) = I - tau*u( k )*u( k )',   u( k ) = (   1    ),
+*                                                 (   0    )
+*                                                 ( z( k ) )
+*
+*  tau is a scalar and z( k ) is an ( n - m ) element vector.
+*  tau and z( k ) are chosen to annihilate the elements of the kth row
+*  of X.
+*
+*  The scalar tau is returned in the kth element of TAU and the vector
+*  u( k ) in the kth row of A, such that the elements of z( k ) are
+*  in  a( k, m + 1 ), ..., a( k, n ). The elements of R are returned in
+*  the upper triangular part of A.
+*
+*  Z is given by
+*
+*     Z =  Z( 1 ) * Z( 2 ) * ... * Z( m ).
+*
+* =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX*16         CONE, CZERO
+      PARAMETER          ( CONE = ( 1.0D+0, 0.0D+0 ),
+     $                   CZERO = ( 0.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, K, M1
+      COMPLEX*16         ALPHA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          DCONJG, MAX, MIN
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZAXPY, ZCOPY, ZGEMV, ZGERC, ZLACGV,
+     $                   ZLARFG
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.M ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -4
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZTZRQF', -INFO )
+         RETURN
+      END IF
+*
+*     Perform the factorization.
+*
+      IF( M.EQ.0 )
+     $   RETURN
+      IF( M.EQ.N ) THEN
+         DO 10 I = 1, N
+            TAU( I ) = CZERO
+   10    CONTINUE
+      ELSE
+         M1 = MIN( M+1, N )
+         DO 20 K = M, 1, -1
+*
+*           Use a Householder reflection to zero the kth row of A.
+*           First set up the reflection.
+*
+            A( K, K ) = DCONJG( A( K, K ) )
+            CALL ZLACGV( N-M, A( K, M1 ), LDA )
+            ALPHA = A( K, K )
+            CALL ZLARFG( N-M+1, ALPHA, A( K, M1 ), LDA, TAU( K ) )
+            A( K, K ) = ALPHA
+            TAU( K ) = DCONJG( TAU( K ) )
+*
+            IF( TAU( K ).NE.CZERO .AND. K.GT.1 ) THEN
+*
+*              We now perform the operation  A := A*P( k )'.
+*
+*              Use the first ( k - 1 ) elements of TAU to store  a( k ),
+*              where  a( k ) consists of the first ( k - 1 ) elements of
+*              the  kth column  of  A.  Also  let  B  denote  the  first
+*              ( k - 1 ) rows of the last ( n - m ) columns of A.
+*
+               CALL ZCOPY( K-1, A( 1, K ), 1, TAU, 1 )
+*
+*              Form   w = a( k ) + B*z( k )  in TAU.
+*
+               CALL ZGEMV( 'No transpose', K-1, N-M, CONE, A( 1, M1 ),
+     $                     LDA, A( K, M1 ), LDA, CONE, TAU, 1 )
+*
+*              Now form  a( k ) := a( k ) - conjg(tau)*w
+*              and       B      := B      - conjg(tau)*w*z( k )'.
+*
+               CALL ZAXPY( K-1, -DCONJG( TAU( K ) ), TAU, 1, A( 1, K ),
+     $                     1 )
+               CALL ZGERC( K-1, N-M, -DCONJG( TAU( K ) ), TAU, 1,
+     $                     A( K, M1 ), LDA, A( 1, M1 ), LDA )
+            END IF
+   20    CONTINUE
+      END IF
+*
+      RETURN
+*
+*     End of ZTZRQF
+*
+      END
diff --git a/costa/native/external/lapack/ztzrzf.f b/costa/native/external/lapack/ztzrzf.f
new file mode 100644
index 000000000..87f22ba46
--- /dev/null
+++ b/costa/native/external/lapack/ztzrzf.f
@@ -0,0 +1,241 @@
+      SUBROUTINE ZTZRZF( M, N, A, LDA, TAU, WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, LDA, LWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZTZRZF reduces the M-by-N ( M<=N ) complex upper trapezoidal matrix A
+*  to upper triangular form by means of unitary transformations.
+*
+*  The upper trapezoidal matrix A is factored as
+*
+*     A = ( R  0 ) * Z,
+*
+*  where Z is an N-by-N unitary matrix and R is an M-by-M upper
+*  triangular matrix.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix A.  M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix A.  N >= 0.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+*          On entry, the leading M-by-N upper trapezoidal part of the
+*          array A must contain the matrix to be factorized.
+*          On exit, the leading M-by-M upper triangular part of A
+*          contains the upper triangular matrix R, and elements M+1 to
+*          N of the first M rows of A, with the array TAU, represent the
+*          unitary matrix Z as a product of M elementary reflectors.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.  LDA >= max(1,M).
+*
+*  TAU     (output) COMPLEX*16 array, dimension (M)
+*          The scalar factors of the elementary reflectors.
+*
+*  WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.  LWORK >= max(1,M).
+*          For optimum performance LWORK >= M*NB, where NB is
+*          the optimal blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*    A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA
+*
+*  The factorization is obtained by Householder's method.  The kth
+*  transformation matrix, Z( k ), which is used to introduce zeros into
+*  the ( m - k + 1 )th row of A, is given in the form
+*
+*     Z( k ) = ( I     0   ),
+*              ( 0  T( k ) )
+*
+*  where
+*
+*     T( k ) = I - tau*u( k )*u( k )',   u( k ) = (   1    ),
+*                                                 (   0    )
+*                                                 ( z( k ) )
+*
+*  tau is a scalar and z( k ) is an ( n - m ) element vector.
+*  tau and z( k ) are chosen to annihilate the elements of the kth row
+*  of X.
+*
+*  The scalar tau is returned in the kth element of TAU and the vector
+*  u( k ) in the kth row of A, such that the elements of z( k ) are
+*  in  a( k, m + 1 ), ..., a( k, n ). The elements of R are returned in
+*  the upper triangular part of A.
+*
+*  Z is given by
+*
+*     Z =  Z( 1 ) * Z( 2 ) * ... * Z( m ).
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX*16         ZERO
+      PARAMETER          ( ZERO = ( 0.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            I, IB, IWS, KI, KK, LDWORK, LWKOPT, M1, MU, NB,
+     $                   NBMIN, NX
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZLARZB, ZLARZT, ZLATRZ
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.M ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -4
+      ELSE IF( LWORK.LT.MAX( 1, M ) .AND. .NOT.LQUERY ) THEN
+         INFO = -7
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+*
+*        Determine the block size.
+*
+         NB = ILAENV( 1, 'ZGERQF', ' ', M, N, -1, -1 )
+         LWKOPT = M*NB
+         WORK( 1 ) = LWKOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZTZRZF', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      ELSE IF( M.EQ.N ) THEN
+         DO 10 I = 1, N
+            TAU( I ) = ZERO
+   10    CONTINUE
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      NBMIN = 2
+      NX = 1
+      IWS = M
+      IF( NB.GT.1 .AND. NB.LT.M ) THEN
+*
+*        Determine when to cross over from blocked to unblocked code.
+*
+         NX = MAX( 0, ILAENV( 3, 'ZGERQF', ' ', M, N, -1, -1 ) )
+         IF( NX.LT.M ) THEN
+*
+*           Determine if workspace is large enough for blocked code.
+*
+            LDWORK = M
+            IWS = LDWORK*NB
+            IF( LWORK.LT.IWS ) THEN
+*
+*              Not enough workspace to use optimal NB:  reduce NB and
+*              determine the minimum value of NB.
+*
+               NB = LWORK / LDWORK
+               NBMIN = MAX( 2, ILAENV( 2, 'ZGERQF', ' ', M, N, -1,
+     $                 -1 ) )
+            END IF
+         END IF
+      END IF
+*
+      IF( NB.GE.NBMIN .AND. NB.LT.M .AND. NX.LT.M ) THEN
+*
+*        Use blocked code initially.
+*        The last kk rows are handled by the block method.
+*
+         M1 = MIN( M+1, N )
+         KI = ( ( M-NX-1 ) / NB )*NB
+         KK = MIN( M, KI+NB )
+*
+         DO 20 I = M - KK + KI + 1, M - KK + 1, -NB
+            IB = MIN( M-I+1, NB )
+*
+*           Compute the TZ factorization of the current block
+*           A(i:i+ib-1,i:n)
+*
+            CALL ZLATRZ( IB, N-I+1, N-M, A( I, I ), LDA, TAU( I ),
+     $                   WORK )
+            IF( I.GT.1 ) THEN
+*
+*              Form the triangular factor of the block reflector
+*              H = H(i+ib-1) . . . H(i+1) H(i)
+*
+               CALL ZLARZT( 'Backward', 'Rowwise', N-M, IB, A( I, M1 ),
+     $                      LDA, TAU( I ), WORK, LDWORK )
+*
+*              Apply H to A(1:i-1,i:n) from the right
+*
+               CALL ZLARZB( 'Right', 'No transpose', 'Backward',
+     $                      'Rowwise', I-1, N-I+1, IB, N-M, A( I, M1 ),
+     $                      LDA, WORK, LDWORK, A( 1, I ), LDA,
+     $                      WORK( IB+1 ), LDWORK )
+            END IF
+   20    CONTINUE
+         MU = I + NB - 1
+      ELSE
+         MU = M
+      END IF
+*
+*     Use unblocked code to factor the last or only block
+*
+      IF( MU.GT.0 )
+     $   CALL ZLATRZ( MU, N, N-M, A, LDA, TAU, WORK )
+*
+      WORK( 1 ) = LWKOPT
+*
+      RETURN
+*
+*     End of ZTZRZF
+*
+      END
diff --git a/costa/native/external/lapack/zung2l.f b/costa/native/external/lapack/zung2l.f
new file mode 100644
index 000000000..c3d3a1766
--- /dev/null
+++ b/costa/native/external/lapack/zung2l.f
@@ -0,0 +1,129 @@
+      SUBROUTINE ZUNG2L( M, N, K, A, LDA, TAU, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, K, LDA, M, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZUNG2L generates an m by n complex matrix Q with orthonormal columns,
+*  which is defined as the last n columns of a product of k elementary
+*  reflectors of order m
+*
+*        Q  =  H(k) . . . H(2) H(1)
+*
+*  as returned by ZGEQLF.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix Q. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix Q. M >= N >= 0.
+*
+*  K       (input) INTEGER
+*          The number of elementary reflectors whose product defines the
+*          matrix Q. N >= K >= 0.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+*          On entry, the (n-k+i)-th column must contain the vector which
+*          defines the elementary reflector H(i), for i = 1,2,...,k, as
+*          returned by ZGEQLF in the last k columns of its array
+*          argument A.
+*          On exit, the m-by-n matrix Q.
+*
+*  LDA     (input) INTEGER
+*          The first dimension of the array A. LDA >= max(1,M).
+*
+*  TAU     (input) COMPLEX*16 array, dimension (K)
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by ZGEQLF.
+*
+*  WORK    (workspace) COMPLEX*16 array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument has an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX*16         ONE, ZERO
+      PARAMETER          ( ONE = ( 1.0D+0, 0.0D+0 ),
+     $                   ZERO = ( 0.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, II, J, L
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZLARF, ZSCAL
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 .OR. N.GT.M ) THEN
+         INFO = -2
+      ELSE IF( K.LT.0 .OR. K.GT.N ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -5
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZUNG2L', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.LE.0 )
+     $   RETURN
+*
+*     Initialise columns 1:n-k to columns of the unit matrix
+*
+      DO 20 J = 1, N - K
+         DO 10 L = 1, M
+            A( L, J ) = ZERO
+   10    CONTINUE
+         A( M-N+J, J ) = ONE
+   20 CONTINUE
+*
+      DO 40 I = 1, K
+         II = N - K + I
+*
+*        Apply H(i) to A(1:m-k+i,1:n-k+i) from the left
+*
+         A( M-N+II, II ) = ONE
+         CALL ZLARF( 'Left', M-N+II, II-1, A( 1, II ), 1, TAU( I ), A,
+     $               LDA, WORK )
+         CALL ZSCAL( M-N+II-1, -TAU( I ), A( 1, II ), 1 )
+         A( M-N+II, II ) = ONE - TAU( I )
+*
+*        Set A(m-k+i+1:m,n-k+i) to zero
+*
+         DO 30 L = M - N + II + 1, M
+            A( L, II ) = ZERO
+   30    CONTINUE
+   40 CONTINUE
+      RETURN
+*
+*     End of ZUNG2L
+*
+      END
diff --git a/costa/native/external/lapack/zung2r.f b/costa/native/external/lapack/zung2r.f
new file mode 100644
index 000000000..53208c673
--- /dev/null
+++ b/costa/native/external/lapack/zung2r.f
@@ -0,0 +1,131 @@
+      SUBROUTINE ZUNG2R( M, N, K, A, LDA, TAU, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, K, LDA, M, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZUNG2R generates an m by n complex matrix Q with orthonormal columns,
+*  which is defined as the first n columns of a product of k elementary
+*  reflectors of order m
+*
+*        Q  =  H(1) H(2) . . . H(k)
+*
+*  as returned by ZGEQRF.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix Q. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix Q. M >= N >= 0.
+*
+*  K       (input) INTEGER
+*          The number of elementary reflectors whose product defines the
+*          matrix Q. N >= K >= 0.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+*          On entry, the i-th column must contain the vector which
+*          defines the elementary reflector H(i), for i = 1,2,...,k, as
+*          returned by ZGEQRF in the first k columns of its array
+*          argument A.
+*          On exit, the m by n matrix Q.
+*
+*  LDA     (input) INTEGER
+*          The first dimension of the array A. LDA >= max(1,M).
+*
+*  TAU     (input) COMPLEX*16 array, dimension (K)
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by ZGEQRF.
+*
+*  WORK    (workspace) COMPLEX*16 array, dimension (N)
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument has an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX*16         ONE, ZERO
+      PARAMETER          ( ONE = ( 1.0D+0, 0.0D+0 ),
+     $                   ZERO = ( 0.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J, L
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZLARF, ZSCAL
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 .OR. N.GT.M ) THEN
+         INFO = -2
+      ELSE IF( K.LT.0 .OR. K.GT.N ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -5
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZUNG2R', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.LE.0 )
+     $   RETURN
+*
+*     Initialise columns k+1:n to columns of the unit matrix
+*
+      DO 20 J = K + 1, N
+         DO 10 L = 1, M
+            A( L, J ) = ZERO
+   10    CONTINUE
+         A( J, J ) = ONE
+   20 CONTINUE
+*
+      DO 40 I = K, 1, -1
+*
+*        Apply H(i) to A(i:m,i:n) from the left
+*
+         IF( I.LT.N ) THEN
+            A( I, I ) = ONE
+            CALL ZLARF( 'Left', M-I+1, N-I, A( I, I ), 1, TAU( I ),
+     $                  A( I, I+1 ), LDA, WORK )
+         END IF
+         IF( I.LT.M )
+     $      CALL ZSCAL( M-I, -TAU( I ), A( I+1, I ), 1 )
+         A( I, I ) = ONE - TAU( I )
+*
+*        Set A(1:i-1,i) to zero
+*
+         DO 30 L = 1, I - 1
+            A( L, I ) = ZERO
+   30    CONTINUE
+   40 CONTINUE
+      RETURN
+*
+*     End of ZUNG2R
+*
+      END
diff --git a/costa/native/external/lapack/zungbr.f b/costa/native/external/lapack/zungbr.f
new file mode 100644
index 000000000..4c699781d
--- /dev/null
+++ b/costa/native/external/lapack/zungbr.f
@@ -0,0 +1,246 @@
+      SUBROUTINE ZUNGBR( VECT, M, N, K, A, LDA, TAU, WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          VECT
+      INTEGER            INFO, K, LDA, LWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZUNGBR generates one of the complex unitary matrices Q or P**H
+*  determined by ZGEBRD when reducing a complex matrix A to bidiagonal
+*  form: A = Q * B * P**H.  Q and P**H are defined as products of
+*  elementary reflectors H(i) or G(i) respectively.
+*
+*  If VECT = 'Q', A is assumed to have been an M-by-K matrix, and Q
+*  is of order M:
+*  if m >= k, Q = H(1) H(2) . . . H(k) and ZUNGBR returns the first n
+*  columns of Q, where m >= n >= k;
+*  if m < k, Q = H(1) H(2) . . . H(m-1) and ZUNGBR returns Q as an
+*  M-by-M matrix.
+*
+*  If VECT = 'P', A is assumed to have been a K-by-N matrix, and P**H
+*  is of order N:
+*  if k < n, P**H = G(k) . . . G(2) G(1) and ZUNGBR returns the first m
+*  rows of P**H, where n >= m >= k;
+*  if k >= n, P**H = G(n-1) . . . G(2) G(1) and ZUNGBR returns P**H as
+*  an N-by-N matrix.
+*
+*  Arguments
+*  =========
+*
+*  VECT    (input) CHARACTER*1
+*          Specifies whether the matrix Q or the matrix P**H is
+*          required, as defined in the transformation applied by ZGEBRD:
+*          = 'Q':  generate Q;
+*          = 'P':  generate P**H.
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix Q or P**H to be returned.
+*          M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix Q or P**H to be returned.
+*          N >= 0.
+*          If VECT = 'Q', M >= N >= min(M,K);
+*          if VECT = 'P', N >= M >= min(N,K).
+*
+*  K       (input) INTEGER
+*          If VECT = 'Q', the number of columns in the original M-by-K
+*          matrix reduced by ZGEBRD.
+*          If VECT = 'P', the number of rows in the original K-by-N
+*          matrix reduced by ZGEBRD.
+*          K >= 0.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+*          On entry, the vectors which define the elementary reflectors,
+*          as returned by ZGEBRD.
+*          On exit, the M-by-N matrix Q or P**H.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= M.
+*
+*  TAU     (input) COMPLEX*16 array, dimension
+*                                (min(M,K)) if VECT = 'Q'
+*                                (min(N,K)) if VECT = 'P'
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i) or G(i), which determines Q or P**H, as
+*          returned by ZGEBRD in its array argument TAUQ or TAUP.
+*
+*  WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK. LWORK >= max(1,min(M,N)).
+*          For optimum performance LWORK >= min(M,N)*NB, where NB
+*          is the optimal blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX*16         ZERO, ONE
+      PARAMETER          ( ZERO = ( 0.0D+0, 0.0D+0 ),
+     $                   ONE = ( 1.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY, WANTQ
+      INTEGER            I, IINFO, J, LWKOPT, MN, NB
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           LSAME, ILAENV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZUNGLQ, ZUNGQR
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      WANTQ = LSAME( VECT, 'Q' )
+      MN = MIN( M, N )
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( .NOT.WANTQ .AND. .NOT.LSAME( VECT, 'P' ) ) THEN
+         INFO = -1
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 .OR. ( WANTQ .AND. ( N.GT.M .OR. N.LT.MIN( M,
+     $         K ) ) ) .OR. ( .NOT.WANTQ .AND. ( M.GT.N .OR. M.LT.
+     $         MIN( N, K ) ) ) ) THEN
+         INFO = -3
+      ELSE IF( K.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -6
+      ELSE IF( LWORK.LT.MAX( 1, MN ) .AND. .NOT.LQUERY ) THEN
+         INFO = -9
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         IF( WANTQ ) THEN
+            NB = ILAENV( 1, 'ZUNGQR', ' ', M, N, K, -1 )
+         ELSE
+            NB = ILAENV( 1, 'ZUNGLQ', ' ', M, N, K, -1 )
+         END IF
+         LWKOPT = MAX( 1, MN )*NB
+         WORK( 1 ) = LWKOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZUNGBR', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      IF( WANTQ ) THEN
+*
+*        Form Q, determined by a call to ZGEBRD to reduce an m-by-k
+*        matrix
+*
+         IF( M.GE.K ) THEN
+*
+*           If m >= k, assume m >= n >= k
+*
+            CALL ZUNGQR( M, N, K, A, LDA, TAU, WORK, LWORK, IINFO )
+*
+         ELSE
+*
+*           If m < k, assume m = n
+*
+*           Shift the vectors which define the elementary reflectors one
+*           column to the right, and set the first row and column of Q
+*           to those of the unit matrix
+*
+            DO 20 J = M, 2, -1
+               A( 1, J ) = ZERO
+               DO 10 I = J + 1, M
+                  A( I, J ) = A( I, J-1 )
+   10          CONTINUE
+   20       CONTINUE
+            A( 1, 1 ) = ONE
+            DO 30 I = 2, M
+               A( I, 1 ) = ZERO
+   30       CONTINUE
+            IF( M.GT.1 ) THEN
+*
+*              Form Q(2:m,2:m)
+*
+               CALL ZUNGQR( M-1, M-1, M-1, A( 2, 2 ), LDA, TAU, WORK,
+     $                      LWORK, IINFO )
+            END IF
+         END IF
+      ELSE
+*
+*        Form P', determined by a call to ZGEBRD to reduce a k-by-n
+*        matrix
+*
+         IF( K.LT.N ) THEN
+*
+*           If k < n, assume k <= m <= n
+*
+            CALL ZUNGLQ( M, N, K, A, LDA, TAU, WORK, LWORK, IINFO )
+*
+         ELSE
+*
+*           If k >= n, assume m = n
+*
+*           Shift the vectors which define the elementary reflectors one
+*           row downward, and set the first row and column of P' to
+*           those of the unit matrix
+*
+            A( 1, 1 ) = ONE
+            DO 40 I = 2, N
+               A( I, 1 ) = ZERO
+   40       CONTINUE
+            DO 60 J = 2, N
+               DO 50 I = J - 1, 2, -1
+                  A( I, J ) = A( I-1, J )
+   50          CONTINUE
+               A( 1, J ) = ZERO
+   60       CONTINUE
+            IF( N.GT.1 ) THEN
+*
+*              Form P'(2:n,2:n)
+*
+               CALL ZUNGLQ( N-1, N-1, N-1, A( 2, 2 ), LDA, TAU, WORK,
+     $                      LWORK, IINFO )
+            END IF
+         END IF
+      END IF
+      WORK( 1 ) = LWKOPT
+      RETURN
+*
+*     End of ZUNGBR
+*
+      END
diff --git a/costa/native/external/lapack/zunghr.f b/costa/native/external/lapack/zunghr.f
new file mode 100644
index 000000000..e8cd55a8a
--- /dev/null
+++ b/costa/native/external/lapack/zunghr.f
@@ -0,0 +1,166 @@
+      SUBROUTINE ZUNGHR( N, ILO, IHI, A, LDA, TAU, WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            IHI, ILO, INFO, LDA, LWORK, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZUNGHR generates a complex unitary matrix Q which is defined as the
+*  product of IHI-ILO elementary reflectors of order N, as returned by
+*  ZGEHRD:
+*
+*  Q = H(ilo) H(ilo+1) . . . H(ihi-1).
+*
+*  Arguments
+*  =========
+*
+*  N       (input) INTEGER
+*          The order of the matrix Q. N >= 0.
+*
+*  ILO     (input) INTEGER
+*  IHI     (input) INTEGER
+*          ILO and IHI must have the same values as in the previous call
+*          of ZGEHRD. Q is equal to the unit matrix except in the
+*          submatrix Q(ilo+1:ihi,ilo+1:ihi).
+*          1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+*          On entry, the vectors which define the elementary reflectors,
+*          as returned by ZGEHRD.
+*          On exit, the N-by-N unitary matrix Q.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,N).
+*
+*  TAU     (input) COMPLEX*16 array, dimension (N-1)
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by ZGEHRD.
+*
+*  WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK. LWORK >= IHI-ILO.
+*          For optimum performance LWORK >= (IHI-ILO)*NB, where NB is
+*          the optimal blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX*16         ZERO, ONE
+      PARAMETER          ( ZERO = ( 0.0D+0, 0.0D+0 ),
+     $                   ONE = ( 1.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            I, IINFO, J, LWKOPT, NB, NH
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZUNGQR
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      NH = IHI - ILO
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( N.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, N ) ) THEN
+         INFO = -2
+      ELSE IF( IHI.LT.MIN( ILO, N ) .OR. IHI.GT.N ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -5
+      ELSE IF( LWORK.LT.MAX( 1, NH ) .AND. .NOT.LQUERY ) THEN
+         INFO = -8
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         NB = ILAENV( 1, 'ZUNGQR', ' ', NH, NH, NH, -1 )
+         LWKOPT = MAX( 1, NH )*NB
+         WORK( 1 ) = LWKOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZUNGHR', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+*     Shift the vectors which define the elementary reflectors one
+*     column to the right, and set the first ilo and the last n-ihi
+*     rows and columns to those of the unit matrix
+*
+      DO 40 J = IHI, ILO + 1, -1
+         DO 10 I = 1, J - 1
+            A( I, J ) = ZERO
+   10    CONTINUE
+         DO 20 I = J + 1, IHI
+            A( I, J ) = A( I, J-1 )
+   20    CONTINUE
+         DO 30 I = IHI + 1, N
+            A( I, J ) = ZERO
+   30    CONTINUE
+   40 CONTINUE
+      DO 60 J = 1, ILO
+         DO 50 I = 1, N
+            A( I, J ) = ZERO
+   50    CONTINUE
+         A( J, J ) = ONE
+   60 CONTINUE
+      DO 80 J = IHI + 1, N
+         DO 70 I = 1, N
+            A( I, J ) = ZERO
+   70    CONTINUE
+         A( J, J ) = ONE
+   80 CONTINUE
+*
+      IF( NH.GT.0 ) THEN
+*
+*        Generate Q(ilo+1:ihi,ilo+1:ihi)
+*
+         CALL ZUNGQR( NH, NH, NH, A( ILO+1, ILO+1 ), LDA, TAU( ILO ),
+     $                WORK, LWORK, IINFO )
+      END IF
+      WORK( 1 ) = LWKOPT
+      RETURN
+*
+*     End of ZUNGHR
+*
+      END
diff --git a/costa/native/external/lapack/zungl2.f b/costa/native/external/lapack/zungl2.f
new file mode 100644
index 000000000..b72ad06cb
--- /dev/null
+++ b/costa/native/external/lapack/zungl2.f
@@ -0,0 +1,137 @@
+      SUBROUTINE ZUNGL2( M, N, K, A, LDA, TAU, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, K, LDA, M, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZUNGL2 generates an m-by-n complex matrix Q with orthonormal rows,
+*  which is defined as the first m rows of a product of k elementary
+*  reflectors of order n
+*
+*        Q  =  H(k)' . . . H(2)' H(1)'
+*
+*  as returned by ZGELQF.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix Q. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix Q. N >= M.
+*
+*  K       (input) INTEGER
+*          The number of elementary reflectors whose product defines the
+*          matrix Q. M >= K >= 0.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+*          On entry, the i-th row must contain the vector which defines
+*          the elementary reflector H(i), for i = 1,2,...,k, as returned
+*          by ZGELQF in the first k rows of its array argument A.
+*          On exit, the m by n matrix Q.
+*
+*  LDA     (input) INTEGER
+*          The first dimension of the array A. LDA >= max(1,M).
+*
+*  TAU     (input) COMPLEX*16 array, dimension (K)
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by ZGELQF.
+*
+*  WORK    (workspace) COMPLEX*16 array, dimension (M)
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument has an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX*16         ONE, ZERO
+      PARAMETER          ( ONE = ( 1.0D+0, 0.0D+0 ),
+     $                   ZERO = ( 0.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J, L
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZLACGV, ZLARF, ZSCAL
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          DCONJG, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.M ) THEN
+         INFO = -2
+      ELSE IF( K.LT.0 .OR. K.GT.M ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -5
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZUNGL2', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.LE.0 )
+     $   RETURN
+*
+      IF( K.LT.M ) THEN
+*
+*        Initialise rows k+1:m to rows of the unit matrix
+*
+         DO 20 J = 1, N
+            DO 10 L = K + 1, M
+               A( L, J ) = ZERO
+   10       CONTINUE
+            IF( J.GT.K .AND. J.LE.M )
+     $         A( J, J ) = ONE
+   20    CONTINUE
+      END IF
+*
+      DO 40 I = K, 1, -1
+*
+*        Apply H(i)' to A(i:m,i:n) from the right
+*
+         IF( I.LT.N ) THEN
+            CALL ZLACGV( N-I, A( I, I+1 ), LDA )
+            IF( I.LT.M ) THEN
+               A( I, I ) = ONE
+               CALL ZLARF( 'Right', M-I, N-I+1, A( I, I ), LDA,
+     $                     DCONJG( TAU( I ) ), A( I+1, I ), LDA, WORK )
+            END IF
+            CALL ZSCAL( N-I, -TAU( I ), A( I, I+1 ), LDA )
+            CALL ZLACGV( N-I, A( I, I+1 ), LDA )
+         END IF
+         A( I, I ) = ONE - DCONJG( TAU( I ) )
+*
+*        Set A(i,1:i-1) to zero
+*
+         DO 30 L = 1, I - 1
+            A( I, L ) = ZERO
+   30    CONTINUE
+   40 CONTINUE
+      RETURN
+*
+*     End of ZUNGL2
+*
+      END
diff --git a/costa/native/external/lapack/zunglq.f b/costa/native/external/lapack/zunglq.f
new file mode 100644
index 000000000..5b4da67cf
--- /dev/null
+++ b/costa/native/external/lapack/zunglq.f
@@ -0,0 +1,216 @@
+      SUBROUTINE ZUNGLQ( M, N, K, A, LDA, TAU, WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, K, LDA, LWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZUNGLQ generates an M-by-N complex matrix Q with orthonormal rows,
+*  which is defined as the first M rows of a product of K elementary
+*  reflectors of order N
+*
+*        Q  =  H(k)' . . . H(2)' H(1)'
+*
+*  as returned by ZGELQF.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix Q. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix Q. N >= M.
+*
+*  K       (input) INTEGER
+*          The number of elementary reflectors whose product defines the
+*          matrix Q. M >= K >= 0.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+*          On entry, the i-th row must contain the vector which defines
+*          the elementary reflector H(i), for i = 1,2,...,k, as returned
+*          by ZGELQF in the first k rows of its array argument A.
+*          On exit, the M-by-N matrix Q.
+*
+*  LDA     (input) INTEGER
+*          The first dimension of the array A. LDA >= max(1,M).
+*
+*  TAU     (input) COMPLEX*16 array, dimension (K)
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by ZGELQF.
+*
+*  WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK. LWORK >= max(1,M).
+*          For optimum performance LWORK >= M*NB, where NB is
+*          the optimal blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit;
+*          < 0:  if INFO = -i, the i-th argument has an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX*16         ZERO
+      PARAMETER          ( ZERO = ( 0.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            I, IB, IINFO, IWS, J, KI, KK, L, LDWORK,
+     $                   LWKOPT, NB, NBMIN, NX
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZLARFB, ZLARFT, ZUNGL2
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      NB = ILAENV( 1, 'ZUNGLQ', ' ', M, N, K, -1 )
+      LWKOPT = MAX( 1, M )*NB
+      WORK( 1 ) = LWKOPT
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.M ) THEN
+         INFO = -2
+      ELSE IF( K.LT.0 .OR. K.GT.M ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -5
+      ELSE IF( LWORK.LT.MAX( 1, M ) .AND. .NOT.LQUERY ) THEN
+         INFO = -8
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZUNGLQ', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.LE.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      NBMIN = 2
+      NX = 0
+      IWS = M
+      IF( NB.GT.1 .AND. NB.LT.K ) THEN
+*
+*        Determine when to cross over from blocked to unblocked code.
+*
+         NX = MAX( 0, ILAENV( 3, 'ZUNGLQ', ' ', M, N, K, -1 ) )
+         IF( NX.LT.K ) THEN
+*
+*           Determine if workspace is large enough for blocked code.
+*
+            LDWORK = M
+            IWS = LDWORK*NB
+            IF( LWORK.LT.IWS ) THEN
+*
+*              Not enough workspace to use optimal NB:  reduce NB and
+*              determine the minimum value of NB.
+*
+               NB = LWORK / LDWORK
+               NBMIN = MAX( 2, ILAENV( 2, 'ZUNGLQ', ' ', M, N, K, -1 ) )
+            END IF
+         END IF
+      END IF
+*
+      IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN
+*
+*        Use blocked code after the last block.
+*        The first kk rows are handled by the block method.
+*
+         KI = ( ( K-NX-1 ) / NB )*NB
+         KK = MIN( K, KI+NB )
+*
+*        Set A(kk+1:m,1:kk) to zero.
+*
+         DO 20 J = 1, KK
+            DO 10 I = KK + 1, M
+               A( I, J ) = ZERO
+   10       CONTINUE
+   20    CONTINUE
+      ELSE
+         KK = 0
+      END IF
+*
+*     Use unblocked code for the last or only block.
+*
+      IF( KK.LT.M )
+     $   CALL ZUNGL2( M-KK, N-KK, K-KK, A( KK+1, KK+1 ), LDA,
+     $                TAU( KK+1 ), WORK, IINFO )
+*
+      IF( KK.GT.0 ) THEN
+*
+*        Use blocked code
+*
+         DO 50 I = KI + 1, 1, -NB
+            IB = MIN( NB, K-I+1 )
+            IF( I+IB.LE.M ) THEN
+*
+*              Form the triangular factor of the block reflector
+*              H = H(i) H(i+1) . . . H(i+ib-1)
+*
+               CALL ZLARFT( 'Forward', 'Rowwise', N-I+1, IB, A( I, I ),
+     $                      LDA, TAU( I ), WORK, LDWORK )
+*
+*              Apply H' to A(i+ib:m,i:n) from the right
+*
+               CALL ZLARFB( 'Right', 'Conjugate transpose', 'Forward',
+     $                      'Rowwise', M-I-IB+1, N-I+1, IB, A( I, I ),
+     $                      LDA, WORK, LDWORK, A( I+IB, I ), LDA,
+     $                      WORK( IB+1 ), LDWORK )
+            END IF
+*
+*           Apply H' to columns i:n of current block
+*
+            CALL ZUNGL2( IB, N-I+1, IB, A( I, I ), LDA, TAU( I ), WORK,
+     $                   IINFO )
+*
+*           Set columns 1:i-1 of current block to zero
+*
+            DO 40 J = 1, I - 1
+               DO 30 L = I, I + IB - 1
+                  A( L, J ) = ZERO
+   30          CONTINUE
+   40       CONTINUE
+   50    CONTINUE
+      END IF
+*
+      WORK( 1 ) = IWS
+      RETURN
+*
+*     End of ZUNGLQ
+*
+      END
diff --git a/costa/native/external/lapack/zungql.f b/costa/native/external/lapack/zungql.f
new file mode 100644
index 000000000..96768817c
--- /dev/null
+++ b/costa/native/external/lapack/zungql.f
@@ -0,0 +1,214 @@
+      SUBROUTINE ZUNGQL( M, N, K, A, LDA, TAU, WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, K, LDA, LWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZUNGQL generates an M-by-N complex matrix Q with orthonormal columns,
+*  which is defined as the last N columns of a product of K elementary
+*  reflectors of order M
+*
+*        Q  =  H(k) . . . H(2) H(1)
+*
+*  as returned by ZGEQLF.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix Q. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix Q. M >= N >= 0.
+*
+*  K       (input) INTEGER
+*          The number of elementary reflectors whose product defines the
+*          matrix Q. N >= K >= 0.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+*          On entry, the (n-k+i)-th column must contain the vector which
+*          defines the elementary reflector H(i), for i = 1,2,...,k, as
+*          returned by ZGEQLF in the last k columns of its array
+*          argument A.
+*          On exit, the M-by-N matrix Q.
+*
+*  LDA     (input) INTEGER
+*          The first dimension of the array A. LDA >= max(1,M).
+*
+*  TAU     (input) COMPLEX*16 array, dimension (K)
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by ZGEQLF.
+*
+*  WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK. LWORK >= max(1,N).
+*          For optimum performance LWORK >= N*NB, where NB is the
+*          optimal blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument has an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX*16         ZERO
+      PARAMETER          ( ZERO = ( 0.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            I, IB, IINFO, IWS, J, KK, L, LDWORK, LWKOPT,
+     $                   NB, NBMIN, NX
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZLARFB, ZLARFT, ZUNG2L
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      NB = ILAENV( 1, 'ZUNGQL', ' ', M, N, K, -1 )
+      LWKOPT = MAX( 1, N )*NB
+      WORK( 1 ) = LWKOPT
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 .OR. N.GT.M ) THEN
+         INFO = -2
+      ELSE IF( K.LT.0 .OR. K.GT.N ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -5
+      ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN
+         INFO = -8
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZUNGQL', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.LE.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      NBMIN = 2
+      NX = 0
+      IWS = N
+      IF( NB.GT.1 .AND. NB.LT.K ) THEN
+*
+*        Determine when to cross over from blocked to unblocked code.
+*
+         NX = MAX( 0, ILAENV( 3, 'ZUNGQL', ' ', M, N, K, -1 ) )
+         IF( NX.LT.K ) THEN
+*
+*           Determine if workspace is large enough for blocked code.
+*
+            LDWORK = N
+            IWS = LDWORK*NB
+            IF( LWORK.LT.IWS ) THEN
+*
+*              Not enough workspace to use optimal NB:  reduce NB and
+*              determine the minimum value of NB.
+*
+               NB = LWORK / LDWORK
+               NBMIN = MAX( 2, ILAENV( 2, 'ZUNGQL', ' ', M, N, K, -1 ) )
+            END IF
+         END IF
+      END IF
+*
+      IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN
+*
+*        Use blocked code after the first block.
+*        The last kk columns are handled by the block method.
+*
+         KK = MIN( K, ( ( K-NX+NB-1 ) / NB )*NB )
+*
+*        Set A(m-kk+1:m,1:n-kk) to zero.
+*
+         DO 20 J = 1, N - KK
+            DO 10 I = M - KK + 1, M
+               A( I, J ) = ZERO
+   10       CONTINUE
+   20    CONTINUE
+      ELSE
+         KK = 0
+      END IF
+*
+*     Use unblocked code for the first or only block.
+*
+      CALL ZUNG2L( M-KK, N-KK, K-KK, A, LDA, TAU, WORK, IINFO )
+*
+      IF( KK.GT.0 ) THEN
+*
+*        Use blocked code
+*
+         DO 50 I = K - KK + 1, K, NB
+            IB = MIN( NB, K-I+1 )
+            IF( N-K+I.GT.1 ) THEN
+*
+*              Form the triangular factor of the block reflector
+*              H = H(i+ib-1) . . . H(i+1) H(i)
+*
+               CALL ZLARFT( 'Backward', 'Columnwise', M-K+I+IB-1, IB,
+     $                      A( 1, N-K+I ), LDA, TAU( I ), WORK, LDWORK )
+*
+*              Apply H to A(1:m-k+i+ib-1,1:n-k+i-1) from the left
+*
+               CALL ZLARFB( 'Left', 'No transpose', 'Backward',
+     $                      'Columnwise', M-K+I+IB-1, N-K+I-1, IB,
+     $                      A( 1, N-K+I ), LDA, WORK, LDWORK, A, LDA,
+     $                      WORK( IB+1 ), LDWORK )
+            END IF
+*
+*           Apply H to rows 1:m-k+i+ib-1 of current block
+*
+            CALL ZUNG2L( M-K+I+IB-1, IB, IB, A( 1, N-K+I ), LDA,
+     $                   TAU( I ), WORK, IINFO )
+*
+*           Set rows m-k+i+ib:m of current block to zero
+*
+            DO 40 J = N - K + I, N - K + I + IB - 1
+               DO 30 L = M - K + I + IB, M
+                  A( L, J ) = ZERO
+   30          CONTINUE
+   40       CONTINUE
+   50    CONTINUE
+      END IF
+*
+      WORK( 1 ) = IWS
+      RETURN
+*
+*     End of ZUNGQL
+*
+      END
diff --git a/costa/native/external/lapack/zungqr.f b/costa/native/external/lapack/zungqr.f
new file mode 100644
index 000000000..dad895e10
--- /dev/null
+++ b/costa/native/external/lapack/zungqr.f
@@ -0,0 +1,217 @@
+      SUBROUTINE ZUNGQR( M, N, K, A, LDA, TAU, WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, K, LDA, LWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZUNGQR generates an M-by-N complex matrix Q with orthonormal columns,
+*  which is defined as the first N columns of a product of K elementary
+*  reflectors of order M
+*
+*        Q  =  H(1) H(2) . . . H(k)
+*
+*  as returned by ZGEQRF.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix Q. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix Q. M >= N >= 0.
+*
+*  K       (input) INTEGER
+*          The number of elementary reflectors whose product defines the
+*          matrix Q. N >= K >= 0.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+*          On entry, the i-th column must contain the vector which
+*          defines the elementary reflector H(i), for i = 1,2,...,k, as
+*          returned by ZGEQRF in the first k columns of its array
+*          argument A.
+*          On exit, the M-by-N matrix Q.
+*
+*  LDA     (input) INTEGER
+*          The first dimension of the array A. LDA >= max(1,M).
+*
+*  TAU     (input) COMPLEX*16 array, dimension (K)
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by ZGEQRF.
+*
+*  WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK. LWORK >= max(1,N).
+*          For optimum performance LWORK >= N*NB, where NB is the
+*          optimal blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument has an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX*16         ZERO
+      PARAMETER          ( ZERO = ( 0.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            I, IB, IINFO, IWS, J, KI, KK, L, LDWORK,
+     $                   LWKOPT, NB, NBMIN, NX
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZLARFB, ZLARFT, ZUNG2R
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      NB = ILAENV( 1, 'ZUNGQR', ' ', M, N, K, -1 )
+      LWKOPT = MAX( 1, N )*NB
+      WORK( 1 ) = LWKOPT
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 .OR. N.GT.M ) THEN
+         INFO = -2
+      ELSE IF( K.LT.0 .OR. K.GT.N ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -5
+      ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN
+         INFO = -8
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZUNGQR', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.LE.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      NBMIN = 2
+      NX = 0
+      IWS = N
+      IF( NB.GT.1 .AND. NB.LT.K ) THEN
+*
+*        Determine when to cross over from blocked to unblocked code.
+*
+         NX = MAX( 0, ILAENV( 3, 'ZUNGQR', ' ', M, N, K, -1 ) )
+         IF( NX.LT.K ) THEN
+*
+*           Determine if workspace is large enough for blocked code.
+*
+            LDWORK = N
+            IWS = LDWORK*NB
+            IF( LWORK.LT.IWS ) THEN
+*
+*              Not enough workspace to use optimal NB:  reduce NB and
+*              determine the minimum value of NB.
+*
+               NB = LWORK / LDWORK
+               NBMIN = MAX( 2, ILAENV( 2, 'ZUNGQR', ' ', M, N, K, -1 ) )
+            END IF
+         END IF
+      END IF
+*
+      IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN
+*
+*        Use blocked code after the last block.
+*        The first kk columns are handled by the block method.
+*
+         KI = ( ( K-NX-1 ) / NB )*NB
+         KK = MIN( K, KI+NB )
+*
+*        Set A(1:kk,kk+1:n) to zero.
+*
+         DO 20 J = KK + 1, N
+            DO 10 I = 1, KK
+               A( I, J ) = ZERO
+   10       CONTINUE
+   20    CONTINUE
+      ELSE
+         KK = 0
+      END IF
+*
+*     Use unblocked code for the last or only block.
+*
+      IF( KK.LT.N )
+     $   CALL ZUNG2R( M-KK, N-KK, K-KK, A( KK+1, KK+1 ), LDA,
+     $                TAU( KK+1 ), WORK, IINFO )
+*
+      IF( KK.GT.0 ) THEN
+*
+*        Use blocked code
+*
+         DO 50 I = KI + 1, 1, -NB
+            IB = MIN( NB, K-I+1 )
+            IF( I+IB.LE.N ) THEN
+*
+*              Form the triangular factor of the block reflector
+*              H = H(i) H(i+1) . . . H(i+ib-1)
+*
+               CALL ZLARFT( 'Forward', 'Columnwise', M-I+1, IB,
+     $                      A( I, I ), LDA, TAU( I ), WORK, LDWORK )
+*
+*              Apply H to A(i:m,i+ib:n) from the left
+*
+               CALL ZLARFB( 'Left', 'No transpose', 'Forward',
+     $                      'Columnwise', M-I+1, N-I-IB+1, IB,
+     $                      A( I, I ), LDA, WORK, LDWORK, A( I, I+IB ),
+     $                      LDA, WORK( IB+1 ), LDWORK )
+            END IF
+*
+*           Apply H to rows i:m of current block
+*
+            CALL ZUNG2R( M-I+1, IB, IB, A( I, I ), LDA, TAU( I ), WORK,
+     $                   IINFO )
+*
+*           Set rows 1:i-1 of current block to zero
+*
+            DO 40 J = I, I + IB - 1
+               DO 30 L = 1, I - 1
+                  A( L, J ) = ZERO
+   30          CONTINUE
+   40       CONTINUE
+   50    CONTINUE
+      END IF
+*
+      WORK( 1 ) = IWS
+      RETURN
+*
+*     End of ZUNGQR
+*
+      END
diff --git a/costa/native/external/lapack/zungr2.f b/costa/native/external/lapack/zungr2.f
new file mode 100644
index 000000000..7254c5d66
--- /dev/null
+++ b/costa/native/external/lapack/zungr2.f
@@ -0,0 +1,135 @@
+      SUBROUTINE ZUNGR2( M, N, K, A, LDA, TAU, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, K, LDA, M, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZUNGR2 generates an m by n complex matrix Q with orthonormal rows,
+*  which is defined as the last m rows of a product of k elementary
+*  reflectors of order n
+*
+*        Q  =  H(1)' H(2)' . . . H(k)'
+*
+*  as returned by ZGERQF.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix Q. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix Q. N >= M.
+*
+*  K       (input) INTEGER
+*          The number of elementary reflectors whose product defines the
+*          matrix Q. M >= K >= 0.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+*          On entry, the (m-k+i)-th row must contain the vector which
+*          defines the elementary reflector H(i), for i = 1,2,...,k, as
+*          returned by ZGERQF in the last k rows of its array argument
+*          A.
+*          On exit, the m-by-n matrix Q.
+*
+*  LDA     (input) INTEGER
+*          The first dimension of the array A. LDA >= max(1,M).
+*
+*  TAU     (input) COMPLEX*16 array, dimension (K)
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by ZGERQF.
+*
+*  WORK    (workspace) COMPLEX*16 array, dimension (M)
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument has an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX*16         ONE, ZERO
+      PARAMETER          ( ONE = ( 1.0D+0, 0.0D+0 ),
+     $                   ZERO = ( 0.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, II, J, L
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZLACGV, ZLARF, ZSCAL
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          DCONJG, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.M ) THEN
+         INFO = -2
+      ELSE IF( K.LT.0 .OR. K.GT.M ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -5
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZUNGR2', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.LE.0 )
+     $   RETURN
+*
+      IF( K.LT.M ) THEN
+*
+*        Initialise rows 1:m-k to rows of the unit matrix
+*
+         DO 20 J = 1, N
+            DO 10 L = 1, M - K
+               A( L, J ) = ZERO
+   10       CONTINUE
+            IF( J.GT.N-M .AND. J.LE.N-K )
+     $         A( M-N+J, J ) = ONE
+   20    CONTINUE
+      END IF
+*
+      DO 40 I = 1, K
+         II = M - K + I
+*
+*        Apply H(i)' to A(1:m-k+i,1:n-k+i) from the right
+*
+         CALL ZLACGV( N-M+II-1, A( II, 1 ), LDA )
+         A( II, N-M+II ) = ONE
+         CALL ZLARF( 'Right', II-1, N-M+II, A( II, 1 ), LDA,
+     $               DCONJG( TAU( I ) ), A, LDA, WORK )
+         CALL ZSCAL( N-M+II-1, -TAU( I ), A( II, 1 ), LDA )
+         CALL ZLACGV( N-M+II-1, A( II, 1 ), LDA )
+         A( II, N-M+II ) = ONE - DCONJG( TAU( I ) )
+*
+*        Set A(m-k+i,n-k+i+1:n) to zero
+*
+         DO 30 L = N - M + II + 1, N
+            A( II, L ) = ZERO
+   30    CONTINUE
+   40 CONTINUE
+      RETURN
+*
+*     End of ZUNGR2
+*
+      END
diff --git a/costa/native/external/lapack/zungrq.f b/costa/native/external/lapack/zungrq.f
new file mode 100644
index 000000000..b1ac7f014
--- /dev/null
+++ b/costa/native/external/lapack/zungrq.f
@@ -0,0 +1,215 @@
+      SUBROUTINE ZUNGRQ( M, N, K, A, LDA, TAU, WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      INTEGER            INFO, K, LDA, LWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZUNGRQ generates an M-by-N complex matrix Q with orthonormal rows,
+*  which is defined as the last M rows of a product of K elementary
+*  reflectors of order N
+*
+*        Q  =  H(1)' H(2)' . . . H(k)'
+*
+*  as returned by ZGERQF.
+*
+*  Arguments
+*  =========
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix Q. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix Q. N >= M.
+*
+*  K       (input) INTEGER
+*          The number of elementary reflectors whose product defines the
+*          matrix Q. M >= K >= 0.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+*          On entry, the (m-k+i)-th row must contain the vector which
+*          defines the elementary reflector H(i), for i = 1,2,...,k, as
+*          returned by ZGERQF in the last k rows of its array argument
+*          A.
+*          On exit, the M-by-N matrix Q.
+*
+*  LDA     (input) INTEGER
+*          The first dimension of the array A. LDA >= max(1,M).
+*
+*  TAU     (input) COMPLEX*16 array, dimension (K)
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by ZGERQF.
+*
+*  WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK. LWORK >= max(1,M).
+*          For optimum performance LWORK >= M*NB, where NB is the
+*          optimal blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument has an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX*16         ZERO
+      PARAMETER          ( ZERO = ( 0.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY
+      INTEGER            I, IB, II, IINFO, IWS, J, KK, L, LDWORK,
+     $                   LWKOPT, NB, NBMIN, NX
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZLARFB, ZLARFT, ZUNGR2
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. External Functions ..
+      INTEGER            ILAENV
+      EXTERNAL           ILAENV
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      NB = ILAENV( 1, 'ZUNGRQ', ' ', M, N, K, -1 )
+      LWKOPT = MAX( 1, M )*NB
+      WORK( 1 ) = LWKOPT
+      LQUERY = ( LWORK.EQ.-1 )
+      IF( M.LT.0 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.M ) THEN
+         INFO = -2
+      ELSE IF( K.LT.0 .OR. K.GT.M ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -5
+      ELSE IF( LWORK.LT.MAX( 1, M ) .AND. .NOT.LQUERY ) THEN
+         INFO = -8
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZUNGRQ', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.LE.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      NBMIN = 2
+      NX = 0
+      IWS = M
+      IF( NB.GT.1 .AND. NB.LT.K ) THEN
+*
+*        Determine when to cross over from blocked to unblocked code.
+*
+         NX = MAX( 0, ILAENV( 3, 'ZUNGRQ', ' ', M, N, K, -1 ) )
+         IF( NX.LT.K ) THEN
+*
+*           Determine if workspace is large enough for blocked code.
+*
+            LDWORK = M
+            IWS = LDWORK*NB
+            IF( LWORK.LT.IWS ) THEN
+*
+*              Not enough workspace to use optimal NB:  reduce NB and
+*              determine the minimum value of NB.
+*
+               NB = LWORK / LDWORK
+               NBMIN = MAX( 2, ILAENV( 2, 'ZUNGRQ', ' ', M, N, K, -1 ) )
+            END IF
+         END IF
+      END IF
+*
+      IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN
+*
+*        Use blocked code after the first block.
+*        The last kk rows are handled by the block method.
+*
+         KK = MIN( K, ( ( K-NX+NB-1 ) / NB )*NB )
+*
+*        Set A(1:m-kk,n-kk+1:n) to zero.
+*
+         DO 20 J = N - KK + 1, N
+            DO 10 I = 1, M - KK
+               A( I, J ) = ZERO
+   10       CONTINUE
+   20    CONTINUE
+      ELSE
+         KK = 0
+      END IF
+*
+*     Use unblocked code for the first or only block.
+*
+      CALL ZUNGR2( M-KK, N-KK, K-KK, A, LDA, TAU, WORK, IINFO )
+*
+      IF( KK.GT.0 ) THEN
+*
+*        Use blocked code
+*
+         DO 50 I = K - KK + 1, K, NB
+            IB = MIN( NB, K-I+1 )
+            II = M - K + I
+            IF( II.GT.1 ) THEN
+*
+*              Form the triangular factor of the block reflector
+*              H = H(i+ib-1) . . . H(i+1) H(i)
+*
+               CALL ZLARFT( 'Backward', 'Rowwise', N-K+I+IB-1, IB,
+     $                      A( II, 1 ), LDA, TAU( I ), WORK, LDWORK )
+*
+*              Apply H' to A(1:m-k+i-1,1:n-k+i+ib-1) from the right
+*
+               CALL ZLARFB( 'Right', 'Conjugate transpose', 'Backward',
+     $                      'Rowwise', II-1, N-K+I+IB-1, IB, A( II, 1 ),
+     $                      LDA, WORK, LDWORK, A, LDA, WORK( IB+1 ),
+     $                      LDWORK )
+            END IF
+*
+*           Apply H' to columns 1:n-k+i+ib-1 of current block
+*
+            CALL ZUNGR2( IB, N-K+I+IB-1, IB, A( II, 1 ), LDA, TAU( I ),
+     $                   WORK, IINFO )
+*
+*           Set columns n-k+i+ib:n of current block to zero
+*
+            DO 40 L = N - K + I + IB, N
+               DO 30 J = II, II + IB - 1
+                  A( J, L ) = ZERO
+   30          CONTINUE
+   40       CONTINUE
+   50    CONTINUE
+      END IF
+*
+      WORK( 1 ) = IWS
+      RETURN
+*
+*     End of ZUNGRQ
+*
+      END
diff --git a/costa/native/external/lapack/zungtr.f b/costa/native/external/lapack/zungtr.f
new file mode 100644
index 000000000..d731be20a
--- /dev/null
+++ b/costa/native/external/lapack/zungtr.f
@@ -0,0 +1,185 @@
+      SUBROUTINE ZUNGTR( UPLO, N, A, LDA, TAU, WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDA, LWORK, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZUNGTR generates a complex unitary matrix Q which is defined as the
+*  product of n-1 elementary reflectors of order N, as returned by
+*  ZHETRD:
+*
+*  if UPLO = 'U', Q = H(n-1) . . . H(2) H(1),
+*
+*  if UPLO = 'L', Q = H(1) H(2) . . . H(n-1).
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U': Upper triangle of A contains elementary reflectors
+*                 from ZHETRD;
+*          = 'L': Lower triangle of A contains elementary reflectors
+*                 from ZHETRD.
+*
+*  N       (input) INTEGER
+*          The order of the matrix Q. N >= 0.
+*
+*  A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+*          On entry, the vectors which define the elementary reflectors,
+*          as returned by ZHETRD.
+*          On exit, the N-by-N unitary matrix Q.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= N.
+*
+*  TAU     (input) COMPLEX*16 array, dimension (N-1)
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by ZHETRD.
+*
+*  WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK. LWORK >= N-1.
+*          For optimum performance LWORK >= (N-1)*NB, where NB is
+*          the optimal blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX*16         ZERO, ONE
+      PARAMETER          ( ZERO = ( 0.0D+0, 0.0D+0 ),
+     $                   ONE = ( 1.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LQUERY, UPPER
+      INTEGER            I, IINFO, J, LWKOPT, NB
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           LSAME, ILAENV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZUNGQL, ZUNGQR
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      LQUERY = ( LWORK.EQ.-1 )
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
+         INFO = -4
+      ELSE IF( LWORK.LT.MAX( 1, N-1 ) .AND. .NOT.LQUERY ) THEN
+         INFO = -7
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         IF( UPPER ) THEN
+            NB = ILAENV( 1, 'ZUNGQL', ' ', N-1, N-1, N-1, -1 )
+         ELSE
+            NB = ILAENV( 1, 'ZUNGQR', ' ', N-1, N-1, N-1, -1 )
+         END IF
+         LWKOPT = MAX( 1, N-1 )*NB
+         WORK( 1 ) = LWKOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZUNGTR', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      IF( UPPER ) THEN
+*
+*        Q was determined by a call to ZHETRD with UPLO = 'U'
+*
+*        Shift the vectors which define the elementary reflectors one
+*        column to the left, and set the last row and column of Q to
+*        those of the unit matrix
+*
+         DO 20 J = 1, N - 1
+            DO 10 I = 1, J - 1
+               A( I, J ) = A( I, J+1 )
+   10       CONTINUE
+            A( N, J ) = ZERO
+   20    CONTINUE
+         DO 30 I = 1, N - 1
+            A( I, N ) = ZERO
+   30    CONTINUE
+         A( N, N ) = ONE
+*
+*        Generate Q(1:n-1,1:n-1)
+*
+         CALL ZUNGQL( N-1, N-1, N-1, A, LDA, TAU, WORK, LWORK, IINFO )
+*
+      ELSE
+*
+*        Q was determined by a call to ZHETRD with UPLO = 'L'.
+*
+*        Shift the vectors which define the elementary reflectors one
+*        column to the right, and set the first row and column of Q to
+*        those of the unit matrix
+*
+         DO 50 J = N, 2, -1
+            A( 1, J ) = ZERO
+            DO 40 I = J + 1, N
+               A( I, J ) = A( I, J-1 )
+   40       CONTINUE
+   50    CONTINUE
+         A( 1, 1 ) = ONE
+         DO 60 I = 2, N
+            A( I, 1 ) = ZERO
+   60    CONTINUE
+         IF( N.GT.1 ) THEN
+*
+*           Generate Q(2:n,2:n)
+*
+            CALL ZUNGQR( N-1, N-1, N-1, A( 2, 2 ), LDA, TAU, WORK,
+     $                   LWORK, IINFO )
+         END IF
+      END IF
+      WORK( 1 ) = LWKOPT
+      RETURN
+*
+*     End of ZUNGTR
+*
+      END
diff --git a/costa/native/external/lapack/zunm2l.f b/costa/native/external/lapack/zunm2l.f
new file mode 100644
index 000000000..453711701
--- /dev/null
+++ b/costa/native/external/lapack/zunm2l.f
@@ -0,0 +1,197 @@
+      SUBROUTINE ZUNM2L( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC,
+     $                   WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          SIDE, TRANS
+      INTEGER            INFO, K, LDA, LDC, M, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZUNM2L overwrites the general complex m-by-n matrix C with
+*
+*        Q * C  if SIDE = 'L' and TRANS = 'N', or
+*
+*        Q'* C  if SIDE = 'L' and TRANS = 'C', or
+*
+*        C * Q  if SIDE = 'R' and TRANS = 'N', or
+*
+*        C * Q' if SIDE = 'R' and TRANS = 'C',
+*
+*  where Q is a complex unitary matrix defined as the product of k
+*  elementary reflectors
+*
+*        Q = H(k) . . . H(2) H(1)
+*
+*  as returned by ZGEQLF. Q is of order m if SIDE = 'L' and of order n
+*  if SIDE = 'R'.
+*
+*  Arguments
+*  =========
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'L': apply Q or Q' from the Left
+*          = 'R': apply Q or Q' from the Right
+*
+*  TRANS   (input) CHARACTER*1
+*          = 'N': apply Q  (No transpose)
+*          = 'C': apply Q' (Conjugate transpose)
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix C. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix C. N >= 0.
+*
+*  K       (input) INTEGER
+*          The number of elementary reflectors whose product defines
+*          the matrix Q.
+*          If SIDE = 'L', M >= K >= 0;
+*          if SIDE = 'R', N >= K >= 0.
+*
+*  A       (input) COMPLEX*16 array, dimension (LDA,K)
+*          The i-th column must contain the vector which defines the
+*          elementary reflector H(i), for i = 1,2,...,k, as returned by
+*          ZGEQLF in the last k columns of its array argument A.
+*          A is modified by the routine but restored on exit.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.
+*          If SIDE = 'L', LDA >= max(1,M);
+*          if SIDE = 'R', LDA >= max(1,N).
+*
+*  TAU     (input) COMPLEX*16 array, dimension (K)
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by ZGEQLF.
+*
+*  C       (input/output) COMPLEX*16 array, dimension (LDC,N)
+*          On entry, the m-by-n matrix C.
+*          On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C. LDC >= max(1,M).
+*
+*  WORK    (workspace) COMPLEX*16 array, dimension
+*                                   (N) if SIDE = 'L',
+*                                   (M) if SIDE = 'R'
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX*16         ONE
+      PARAMETER          ( ONE = ( 1.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LEFT, NOTRAN
+      INTEGER            I, I1, I2, I3, MI, NI, NQ
+      COMPLEX*16         AII, TAUI
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZLARF
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          DCONJG, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      LEFT = LSAME( SIDE, 'L' )
+      NOTRAN = LSAME( TRANS, 'N' )
+*
+*     NQ is the order of Q
+*
+      IF( LEFT ) THEN
+         NQ = M
+      ELSE
+         NQ = N
+      END IF
+      IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN
+         INFO = -2
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN
+         INFO = -5
+      ELSE IF( LDA.LT.MAX( 1, NQ ) ) THEN
+         INFO = -7
+      ELSE IF( LDC.LT.MAX( 1, M ) ) THEN
+         INFO = -10
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZUNM2L', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 )
+     $   RETURN
+*
+      IF( ( LEFT .AND. NOTRAN .OR. .NOT.LEFT .AND. .NOT.NOTRAN ) ) THEN
+         I1 = 1
+         I2 = K
+         I3 = 1
+      ELSE
+         I1 = K
+         I2 = 1
+         I3 = -1
+      END IF
+*
+      IF( LEFT ) THEN
+         NI = N
+      ELSE
+         MI = M
+      END IF
+*
+      DO 10 I = I1, I2, I3
+         IF( LEFT ) THEN
+*
+*           H(i) or H(i)' is applied to C(1:m-k+i,1:n)
+*
+            MI = M - K + I
+         ELSE
+*
+*           H(i) or H(i)' is applied to C(1:m,1:n-k+i)
+*
+            NI = N - K + I
+         END IF
+*
+*        Apply H(i) or H(i)'
+*
+         IF( NOTRAN ) THEN
+            TAUI = TAU( I )
+         ELSE
+            TAUI = DCONJG( TAU( I ) )
+         END IF
+         AII = A( NQ-K+I, I )
+         A( NQ-K+I, I ) = ONE
+         CALL ZLARF( SIDE, MI, NI, A( 1, I ), 1, TAUI, C, LDC, WORK )
+         A( NQ-K+I, I ) = AII
+   10 CONTINUE
+      RETURN
+*
+*     End of ZUNM2L
+*
+      END
diff --git a/costa/native/external/lapack/zunm2r.f b/costa/native/external/lapack/zunm2r.f
new file mode 100644
index 000000000..a17d12f93
--- /dev/null
+++ b/costa/native/external/lapack/zunm2r.f
@@ -0,0 +1,202 @@
+      SUBROUTINE ZUNM2R( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC,
+     $                   WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          SIDE, TRANS
+      INTEGER            INFO, K, LDA, LDC, M, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZUNM2R overwrites the general complex m-by-n matrix C with
+*
+*        Q * C  if SIDE = 'L' and TRANS = 'N', or
+*
+*        Q'* C  if SIDE = 'L' and TRANS = 'C', or
+*
+*        C * Q  if SIDE = 'R' and TRANS = 'N', or
+*
+*        C * Q' if SIDE = 'R' and TRANS = 'C',
+*
+*  where Q is a complex unitary matrix defined as the product of k
+*  elementary reflectors
+*
+*        Q = H(1) H(2) . . . H(k)
+*
+*  as returned by ZGEQRF. Q is of order m if SIDE = 'L' and of order n
+*  if SIDE = 'R'.
+*
+*  Arguments
+*  =========
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'L': apply Q or Q' from the Left
+*          = 'R': apply Q or Q' from the Right
+*
+*  TRANS   (input) CHARACTER*1
+*          = 'N': apply Q  (No transpose)
+*          = 'C': apply Q' (Conjugate transpose)
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix C. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix C. N >= 0.
+*
+*  K       (input) INTEGER
+*          The number of elementary reflectors whose product defines
+*          the matrix Q.
+*          If SIDE = 'L', M >= K >= 0;
+*          if SIDE = 'R', N >= K >= 0.
+*
+*  A       (input) COMPLEX*16 array, dimension (LDA,K)
+*          The i-th column must contain the vector which defines the
+*          elementary reflector H(i), for i = 1,2,...,k, as returned by
+*          ZGEQRF in the first k columns of its array argument A.
+*          A is modified by the routine but restored on exit.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.
+*          If SIDE = 'L', LDA >= max(1,M);
+*          if SIDE = 'R', LDA >= max(1,N).
+*
+*  TAU     (input) COMPLEX*16 array, dimension (K)
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by ZGEQRF.
+*
+*  C       (input/output) COMPLEX*16 array, dimension (LDC,N)
+*          On entry, the m-by-n matrix C.
+*          On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C. LDC >= max(1,M).
+*
+*  WORK    (workspace) COMPLEX*16 array, dimension
+*                                   (N) if SIDE = 'L',
+*                                   (M) if SIDE = 'R'
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX*16         ONE
+      PARAMETER          ( ONE = ( 1.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LEFT, NOTRAN
+      INTEGER            I, I1, I2, I3, IC, JC, MI, NI, NQ
+      COMPLEX*16         AII, TAUI
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZLARF
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          DCONJG, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      LEFT = LSAME( SIDE, 'L' )
+      NOTRAN = LSAME( TRANS, 'N' )
+*
+*     NQ is the order of Q
+*
+      IF( LEFT ) THEN
+         NQ = M
+      ELSE
+         NQ = N
+      END IF
+      IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN
+         INFO = -2
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN
+         INFO = -5
+      ELSE IF( LDA.LT.MAX( 1, NQ ) ) THEN
+         INFO = -7
+      ELSE IF( LDC.LT.MAX( 1, M ) ) THEN
+         INFO = -10
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZUNM2R', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 )
+     $   RETURN
+*
+      IF( ( LEFT .AND. .NOT.NOTRAN .OR. .NOT.LEFT .AND. NOTRAN ) ) THEN
+         I1 = 1
+         I2 = K
+         I3 = 1
+      ELSE
+         I1 = K
+         I2 = 1
+         I3 = -1
+      END IF
+*
+      IF( LEFT ) THEN
+         NI = N
+         JC = 1
+      ELSE
+         MI = M
+         IC = 1
+      END IF
+*
+      DO 10 I = I1, I2, I3
+         IF( LEFT ) THEN
+*
+*           H(i) or H(i)' is applied to C(i:m,1:n)
+*
+            MI = M - I + 1
+            IC = I
+         ELSE
+*
+*           H(i) or H(i)' is applied to C(1:m,i:n)
+*
+            NI = N - I + 1
+            JC = I
+         END IF
+*
+*        Apply H(i) or H(i)'
+*
+         IF( NOTRAN ) THEN
+            TAUI = TAU( I )
+         ELSE
+            TAUI = DCONJG( TAU( I ) )
+         END IF
+         AII = A( I, I )
+         A( I, I ) = ONE
+         CALL ZLARF( SIDE, MI, NI, A( I, I ), 1, TAUI, C( IC, JC ), LDC,
+     $               WORK )
+         A( I, I ) = AII
+   10 CONTINUE
+      RETURN
+*
+*     End of ZUNM2R
+*
+      END
diff --git a/costa/native/external/lapack/zunmbr.f b/costa/native/external/lapack/zunmbr.f
new file mode 100644
index 000000000..ae0a86f70
--- /dev/null
+++ b/costa/native/external/lapack/zunmbr.f
@@ -0,0 +1,281 @@
+      SUBROUTINE ZUNMBR( VECT, SIDE, TRANS, M, N, K, A, LDA, TAU, C,
+     $                   LDC, WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          SIDE, TRANS, VECT
+      INTEGER            INFO, K, LDA, LDC, LWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  If VECT = 'Q', ZUNMBR overwrites the general complex M-by-N matrix C
+*  with
+*                  SIDE = 'L'     SIDE = 'R'
+*  TRANS = 'N':      Q * C          C * Q
+*  TRANS = 'C':      Q**H * C       C * Q**H
+*
+*  If VECT = 'P', ZUNMBR overwrites the general complex M-by-N matrix C
+*  with
+*                  SIDE = 'L'     SIDE = 'R'
+*  TRANS = 'N':      P * C          C * P
+*  TRANS = 'C':      P**H * C       C * P**H
+*
+*  Here Q and P**H are the unitary matrices determined by ZGEBRD when
+*  reducing a complex matrix A to bidiagonal form: A = Q * B * P**H. Q
+*  and P**H are defined as products of elementary reflectors H(i) and
+*  G(i) respectively.
+*
+*  Let nq = m if SIDE = 'L' and nq = n if SIDE = 'R'. Thus nq is the
+*  order of the unitary matrix Q or P**H that is applied.
+*
+*  If VECT = 'Q', A is assumed to have been an NQ-by-K matrix:
+*  if nq >= k, Q = H(1) H(2) . . . H(k);
+*  if nq < k, Q = H(1) H(2) . . . H(nq-1).
+*
+*  If VECT = 'P', A is assumed to have been a K-by-NQ matrix:
+*  if k < nq, P = G(1) G(2) . . . G(k);
+*  if k >= nq, P = G(1) G(2) . . . G(nq-1).
+*
+*  Arguments
+*  =========
+*
+*  VECT    (input) CHARACTER*1
+*          = 'Q': apply Q or Q**H;
+*          = 'P': apply P or P**H.
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'L': apply Q, Q**H, P or P**H from the Left;
+*          = 'R': apply Q, Q**H, P or P**H from the Right.
+*
+*  TRANS   (input) CHARACTER*1
+*          = 'N':  No transpose, apply Q or P;
+*          = 'C':  Conjugate transpose, apply Q**H or P**H.
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix C. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix C. N >= 0.
+*
+*  K       (input) INTEGER
+*          If VECT = 'Q', the number of columns in the original
+*          matrix reduced by ZGEBRD.
+*          If VECT = 'P', the number of rows in the original
+*          matrix reduced by ZGEBRD.
+*          K >= 0.
+*
+*  A       (input) COMPLEX*16 array, dimension
+*                                (LDA,min(nq,K)) if VECT = 'Q'
+*                                (LDA,nq)        if VECT = 'P'
+*          The vectors which define the elementary reflectors H(i) and
+*          G(i), whose products determine the matrices Q and P, as
+*          returned by ZGEBRD.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.
+*          If VECT = 'Q', LDA >= max(1,nq);
+*          if VECT = 'P', LDA >= max(1,min(nq,K)).
+*
+*  TAU     (input) COMPLEX*16 array, dimension (min(nq,K))
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i) or G(i) which determines Q or P, as returned
+*          by ZGEBRD in the array argument TAUQ or TAUP.
+*
+*  C       (input/output) COMPLEX*16 array, dimension (LDC,N)
+*          On entry, the M-by-N matrix C.
+*          On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q
+*          or P*C or P**H*C or C*P or C*P**H.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C. LDC >= max(1,M).
+*
+*  WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.
+*          If SIDE = 'L', LWORK >= max(1,N);
+*          if SIDE = 'R', LWORK >= max(1,M).
+*          For optimum performance LWORK >= N*NB if SIDE = 'L', and
+*          LWORK >= M*NB if SIDE = 'R', where NB is the optimal
+*          blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            APPLYQ, LEFT, LQUERY, NOTRAN
+      CHARACTER          TRANST
+      INTEGER            I1, I2, IINFO, LWKOPT, MI, NB, NI, NQ, NW
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           LSAME, ILAENV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZUNMLQ, ZUNMQR
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      APPLYQ = LSAME( VECT, 'Q' )
+      LEFT = LSAME( SIDE, 'L' )
+      NOTRAN = LSAME( TRANS, 'N' )
+      LQUERY = ( LWORK.EQ.-1 )
+*
+*     NQ is the order of Q or P and NW is the minimum dimension of WORK
+*
+      IF( LEFT ) THEN
+         NQ = M
+         NW = N
+      ELSE
+         NQ = N
+         NW = M
+      END IF
+      IF( .NOT.APPLYQ .AND. .NOT.LSAME( VECT, 'P' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN
+         INFO = -3
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( K.LT.0 ) THEN
+         INFO = -6
+      ELSE IF( ( APPLYQ .AND. LDA.LT.MAX( 1, NQ ) ) .OR.
+     $         ( .NOT.APPLYQ .AND. LDA.LT.MAX( 1, MIN( NQ, K ) ) ) )
+     $          THEN
+         INFO = -8
+      ELSE IF( LDC.LT.MAX( 1, M ) ) THEN
+         INFO = -11
+      ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN
+         INFO = -13
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         IF( APPLYQ ) THEN
+            IF( LEFT ) THEN
+               NB = ILAENV( 1, 'ZUNMQR', SIDE // TRANS, M-1, N, M-1,
+     $              -1 )
+            ELSE
+               NB = ILAENV( 1, 'ZUNMQR', SIDE // TRANS, M, N-1, N-1,
+     $              -1 )
+            END IF
+         ELSE
+            IF( LEFT ) THEN
+               NB = ILAENV( 1, 'ZUNMLQ', SIDE // TRANS, M-1, N, M-1,
+     $              -1 )
+            ELSE
+               NB = ILAENV( 1, 'ZUNMLQ', SIDE // TRANS, M, N-1, N-1,
+     $              -1 )
+            END IF
+         END IF
+         LWKOPT = MAX( 1, NW )*NB
+         WORK( 1 ) = LWKOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZUNMBR', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+      END IF
+*
+*     Quick return if possible
+*
+      WORK( 1 ) = 1
+      IF( M.EQ.0 .OR. N.EQ.0 )
+     $   RETURN
+*
+      IF( APPLYQ ) THEN
+*
+*        Apply Q
+*
+         IF( NQ.GE.K ) THEN
+*
+*           Q was determined by a call to ZGEBRD with nq >= k
+*
+            CALL ZUNMQR( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC,
+     $                   WORK, LWORK, IINFO )
+         ELSE IF( NQ.GT.1 ) THEN
+*
+*           Q was determined by a call to ZGEBRD with nq < k
+*
+            IF( LEFT ) THEN
+               MI = M - 1
+               NI = N
+               I1 = 2
+               I2 = 1
+            ELSE
+               MI = M
+               NI = N - 1
+               I1 = 1
+               I2 = 2
+            END IF
+            CALL ZUNMQR( SIDE, TRANS, MI, NI, NQ-1, A( 2, 1 ), LDA, TAU,
+     $                   C( I1, I2 ), LDC, WORK, LWORK, IINFO )
+         END IF
+      ELSE
+*
+*        Apply P
+*
+         IF( NOTRAN ) THEN
+            TRANST = 'C'
+         ELSE
+            TRANST = 'N'
+         END IF
+         IF( NQ.GT.K ) THEN
+*
+*           P was determined by a call to ZGEBRD with nq > k
+*
+            CALL ZUNMLQ( SIDE, TRANST, M, N, K, A, LDA, TAU, C, LDC,
+     $                   WORK, LWORK, IINFO )
+         ELSE IF( NQ.GT.1 ) THEN
+*
+*           P was determined by a call to ZGEBRD with nq <= k
+*
+            IF( LEFT ) THEN
+               MI = M - 1
+               NI = N
+               I1 = 2
+               I2 = 1
+            ELSE
+               MI = M
+               NI = N - 1
+               I1 = 1
+               I2 = 2
+            END IF
+            CALL ZUNMLQ( SIDE, TRANST, MI, NI, NQ-1, A( 1, 2 ), LDA,
+     $                   TAU, C( I1, I2 ), LDC, WORK, LWORK, IINFO )
+         END IF
+      END IF
+      WORK( 1 ) = LWKOPT
+      RETURN
+*
+*     End of ZUNMBR
+*
+      END
diff --git a/costa/native/external/lapack/zunmhr.f b/costa/native/external/lapack/zunmhr.f
new file mode 100644
index 000000000..f3b3fc71c
--- /dev/null
+++ b/costa/native/external/lapack/zunmhr.f
@@ -0,0 +1,202 @@
+      SUBROUTINE ZUNMHR( SIDE, TRANS, M, N, ILO, IHI, A, LDA, TAU, C,
+     $                   LDC, WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          SIDE, TRANS
+      INTEGER            IHI, ILO, INFO, LDA, LDC, LWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZUNMHR overwrites the general complex M-by-N matrix C with
+*
+*                  SIDE = 'L'     SIDE = 'R'
+*  TRANS = 'N':      Q * C          C * Q
+*  TRANS = 'C':      Q**H * C       C * Q**H
+*
+*  where Q is a complex unitary matrix of order nq, with nq = m if
+*  SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of
+*  IHI-ILO elementary reflectors, as returned by ZGEHRD:
+*
+*  Q = H(ilo) H(ilo+1) . . . H(ihi-1).
+*
+*  Arguments
+*  =========
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'L': apply Q or Q**H from the Left;
+*          = 'R': apply Q or Q**H from the Right.
+*
+*  TRANS   (input) CHARACTER*1
+*          = 'N': apply Q  (No transpose)
+*          = 'C': apply Q**H (Conjugate transpose)
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix C. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix C. N >= 0.
+*
+*  ILO     (input) INTEGER
+*  IHI     (input) INTEGER
+*          ILO and IHI must have the same values as in the previous call
+*          of ZGEHRD. Q is equal to the unit matrix except in the
+*          submatrix Q(ilo+1:ihi,ilo+1:ihi).
+*          If SIDE = 'L', then 1 <= ILO <= IHI <= M, if M > 0, and
+*          ILO = 1 and IHI = 0, if M = 0;
+*          if SIDE = 'R', then 1 <= ILO <= IHI <= N, if N > 0, and
+*          ILO = 1 and IHI = 0, if N = 0.
+*
+*  A       (input) COMPLEX*16 array, dimension
+*                               (LDA,M) if SIDE = 'L'
+*                               (LDA,N) if SIDE = 'R'
+*          The vectors which define the elementary reflectors, as
+*          returned by ZGEHRD.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.
+*          LDA >= max(1,M) if SIDE = 'L'; LDA >= max(1,N) if SIDE = 'R'.
+*
+*  TAU     (input) COMPLEX*16 array, dimension
+*                               (M-1) if SIDE = 'L'
+*                               (N-1) if SIDE = 'R'
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by ZGEHRD.
+*
+*  C       (input/output) COMPLEX*16 array, dimension (LDC,N)
+*          On entry, the M-by-N matrix C.
+*          On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C. LDC >= max(1,M).
+*
+*  WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.
+*          If SIDE = 'L', LWORK >= max(1,N);
+*          if SIDE = 'R', LWORK >= max(1,M).
+*          For optimum performance LWORK >= N*NB if SIDE = 'L', and
+*          LWORK >= M*NB if SIDE = 'R', where NB is the optimal
+*          blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            LEFT, LQUERY
+      INTEGER            I1, I2, IINFO, LWKOPT, MI, NB, NH, NI, NQ, NW
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           LSAME, ILAENV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZUNMQR
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      NH = IHI - ILO
+      LEFT = LSAME( SIDE, 'L' )
+      LQUERY = ( LWORK.EQ.-1 )
+*
+*     NQ is the order of Q and NW is the minimum dimension of WORK
+*
+      IF( LEFT ) THEN
+         NQ = M
+         NW = N
+      ELSE
+         NQ = N
+         NW = M
+      END IF
+      IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.LSAME( TRANS, 'N' ) .AND. .NOT.LSAME( TRANS, 'C' ) )
+     $          THEN
+         INFO = -2
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( ILO.LT.1 .OR. ILO.GT.MAX( 1, NQ ) ) THEN
+         INFO = -5
+      ELSE IF( IHI.LT.MIN( ILO, NQ ) .OR. IHI.GT.NQ ) THEN
+         INFO = -6
+      ELSE IF( LDA.LT.MAX( 1, NQ ) ) THEN
+         INFO = -8
+      ELSE IF( LDC.LT.MAX( 1, M ) ) THEN
+         INFO = -11
+      ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN
+         INFO = -13
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         IF( LEFT ) THEN
+            NB = ILAENV( 1, 'ZUNMQR', SIDE // TRANS, NH, N, NH, -1 )
+         ELSE
+            NB = ILAENV( 1, 'ZUNMQR', SIDE // TRANS, M, NH, NH, -1 )
+         END IF
+         LWKOPT = MAX( 1, NW )*NB
+         WORK( 1 ) = LWKOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZUNMHR', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 .OR. NH.EQ.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      IF( LEFT ) THEN
+         MI = NH
+         NI = N
+         I1 = ILO + 1
+         I2 = 1
+      ELSE
+         MI = M
+         NI = NH
+         I1 = 1
+         I2 = ILO + 1
+      END IF
+*
+      CALL ZUNMQR( SIDE, TRANS, MI, NI, NH, A( ILO+1, ILO ), LDA,
+     $             TAU( ILO ), C( I1, I2 ), LDC, WORK, LWORK, IINFO )
+*
+      WORK( 1 ) = LWKOPT
+      RETURN
+*
+*     End of ZUNMHR
+*
+      END
diff --git a/costa/native/external/lapack/zunml2.f b/costa/native/external/lapack/zunml2.f
new file mode 100644
index 000000000..650710ec9
--- /dev/null
+++ b/costa/native/external/lapack/zunml2.f
@@ -0,0 +1,206 @@
+      SUBROUTINE ZUNML2( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC,
+     $                   WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          SIDE, TRANS
+      INTEGER            INFO, K, LDA, LDC, M, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZUNML2 overwrites the general complex m-by-n matrix C with
+*
+*        Q * C  if SIDE = 'L' and TRANS = 'N', or
+*
+*        Q'* C  if SIDE = 'L' and TRANS = 'C', or
+*
+*        C * Q  if SIDE = 'R' and TRANS = 'N', or
+*
+*        C * Q' if SIDE = 'R' and TRANS = 'C',
+*
+*  where Q is a complex unitary matrix defined as the product of k
+*  elementary reflectors
+*
+*        Q = H(k)' . . . H(2)' H(1)'
+*
+*  as returned by ZGELQF. Q is of order m if SIDE = 'L' and of order n
+*  if SIDE = 'R'.
+*
+*  Arguments
+*  =========
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'L': apply Q or Q' from the Left
+*          = 'R': apply Q or Q' from the Right
+*
+*  TRANS   (input) CHARACTER*1
+*          = 'N': apply Q  (No transpose)
+*          = 'C': apply Q' (Conjugate transpose)
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix C. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix C. N >= 0.
+*
+*  K       (input) INTEGER
+*          The number of elementary reflectors whose product defines
+*          the matrix Q.
+*          If SIDE = 'L', M >= K >= 0;
+*          if SIDE = 'R', N >= K >= 0.
+*
+*  A       (input) COMPLEX*16 array, dimension
+*                               (LDA,M) if SIDE = 'L',
+*                               (LDA,N) if SIDE = 'R'
+*          The i-th row must contain the vector which defines the
+*          elementary reflector H(i), for i = 1,2,...,k, as returned by
+*          ZGELQF in the first k rows of its array argument A.
+*          A is modified by the routine but restored on exit.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,K).
+*
+*  TAU     (input) COMPLEX*16 array, dimension (K)
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by ZGELQF.
+*
+*  C       (input/output) COMPLEX*16 array, dimension (LDC,N)
+*          On entry, the m-by-n matrix C.
+*          On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C. LDC >= max(1,M).
+*
+*  WORK    (workspace) COMPLEX*16 array, dimension
+*                                   (N) if SIDE = 'L',
+*                                   (M) if SIDE = 'R'
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX*16         ONE
+      PARAMETER          ( ONE = ( 1.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LEFT, NOTRAN
+      INTEGER            I, I1, I2, I3, IC, JC, MI, NI, NQ
+      COMPLEX*16         AII, TAUI
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZLACGV, ZLARF
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          DCONJG, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      LEFT = LSAME( SIDE, 'L' )
+      NOTRAN = LSAME( TRANS, 'N' )
+*
+*     NQ is the order of Q
+*
+      IF( LEFT ) THEN
+         NQ = M
+      ELSE
+         NQ = N
+      END IF
+      IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN
+         INFO = -2
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN
+         INFO = -5
+      ELSE IF( LDA.LT.MAX( 1, K ) ) THEN
+         INFO = -7
+      ELSE IF( LDC.LT.MAX( 1, M ) ) THEN
+         INFO = -10
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZUNML2', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 )
+     $   RETURN
+*
+      IF( ( LEFT .AND. NOTRAN .OR. .NOT.LEFT .AND. .NOT.NOTRAN ) ) THEN
+         I1 = 1
+         I2 = K
+         I3 = 1
+      ELSE
+         I1 = K
+         I2 = 1
+         I3 = -1
+      END IF
+*
+      IF( LEFT ) THEN
+         NI = N
+         JC = 1
+      ELSE
+         MI = M
+         IC = 1
+      END IF
+*
+      DO 10 I = I1, I2, I3
+         IF( LEFT ) THEN
+*
+*           H(i) or H(i)' is applied to C(i:m,1:n)
+*
+            MI = M - I + 1
+            IC = I
+         ELSE
+*
+*           H(i) or H(i)' is applied to C(1:m,i:n)
+*
+            NI = N - I + 1
+            JC = I
+         END IF
+*
+*        Apply H(i) or H(i)'
+*
+         IF( NOTRAN ) THEN
+            TAUI = DCONJG( TAU( I ) )
+         ELSE
+            TAUI = TAU( I )
+         END IF
+         IF( I.LT.NQ )
+     $      CALL ZLACGV( NQ-I, A( I, I+1 ), LDA )
+         AII = A( I, I )
+         A( I, I ) = ONE
+         CALL ZLARF( SIDE, MI, NI, A( I, I ), LDA, TAUI, C( IC, JC ),
+     $               LDC, WORK )
+         A( I, I ) = AII
+         IF( I.LT.NQ )
+     $      CALL ZLACGV( NQ-I, A( I, I+1 ), LDA )
+   10 CONTINUE
+      RETURN
+*
+*     End of ZUNML2
+*
+      END
diff --git a/costa/native/external/lapack/zunmlq.f b/costa/native/external/lapack/zunmlq.f
new file mode 100644
index 000000000..3d69f2e4e
--- /dev/null
+++ b/costa/native/external/lapack/zunmlq.f
@@ -0,0 +1,268 @@
+      SUBROUTINE ZUNMLQ( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC,
+     $                   WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          SIDE, TRANS
+      INTEGER            INFO, K, LDA, LDC, LWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZUNMLQ overwrites the general complex M-by-N matrix C with
+*
+*                  SIDE = 'L'     SIDE = 'R'
+*  TRANS = 'N':      Q * C          C * Q
+*  TRANS = 'C':      Q**H * C       C * Q**H
+*
+*  where Q is a complex unitary matrix defined as the product of k
+*  elementary reflectors
+*
+*        Q = H(k)' . . . H(2)' H(1)'
+*
+*  as returned by ZGELQF. Q is of order M if SIDE = 'L' and of order N
+*  if SIDE = 'R'.
+*
+*  Arguments
+*  =========
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'L': apply Q or Q**H from the Left;
+*          = 'R': apply Q or Q**H from the Right.
+*
+*  TRANS   (input) CHARACTER*1
+*          = 'N':  No transpose, apply Q;
+*          = 'C':  Conjugate transpose, apply Q**H.
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix C. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix C. N >= 0.
+*
+*  K       (input) INTEGER
+*          The number of elementary reflectors whose product defines
+*          the matrix Q.
+*          If SIDE = 'L', M >= K >= 0;
+*          if SIDE = 'R', N >= K >= 0.
+*
+*  A       (input) COMPLEX*16 array, dimension
+*                               (LDA,M) if SIDE = 'L',
+*                               (LDA,N) if SIDE = 'R'
+*          The i-th row must contain the vector which defines the
+*          elementary reflector H(i), for i = 1,2,...,k, as returned by
+*          ZGELQF in the first k rows of its array argument A.
+*          A is modified by the routine but restored on exit.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,K).
+*
+*  TAU     (input) COMPLEX*16 array, dimension (K)
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by ZGELQF.
+*
+*  C       (input/output) COMPLEX*16 array, dimension (LDC,N)
+*          On entry, the M-by-N matrix C.
+*          On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C. LDC >= max(1,M).
+*
+*  WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.
+*          If SIDE = 'L', LWORK >= max(1,N);
+*          if SIDE = 'R', LWORK >= max(1,M).
+*          For optimum performance LWORK >= N*NB if SIDE 'L', and
+*          LWORK >= M*NB if SIDE = 'R', where NB is the optimal
+*          blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            NBMAX, LDT
+      PARAMETER          ( NBMAX = 64, LDT = NBMAX+1 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LEFT, LQUERY, NOTRAN
+      CHARACTER          TRANST
+      INTEGER            I, I1, I2, I3, IB, IC, IINFO, IWS, JC, LDWORK,
+     $                   LWKOPT, MI, NB, NBMIN, NI, NQ, NW
+*     ..
+*     .. Local Arrays ..
+      COMPLEX*16         T( LDT, NBMAX )
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           LSAME, ILAENV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZLARFB, ZLARFT, ZUNML2
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      LEFT = LSAME( SIDE, 'L' )
+      NOTRAN = LSAME( TRANS, 'N' )
+      LQUERY = ( LWORK.EQ.-1 )
+*
+*     NQ is the order of Q and NW is the minimum dimension of WORK
+*
+      IF( LEFT ) THEN
+         NQ = M
+         NW = N
+      ELSE
+         NQ = N
+         NW = M
+      END IF
+      IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN
+         INFO = -2
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN
+         INFO = -5
+      ELSE IF( LDA.LT.MAX( 1, K ) ) THEN
+         INFO = -7
+      ELSE IF( LDC.LT.MAX( 1, M ) ) THEN
+         INFO = -10
+      ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN
+         INFO = -12
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+*
+*        Determine the block size.  NB may be at most NBMAX, where NBMAX
+*        is used to define the local array T.
+*
+         NB = MIN( NBMAX, ILAENV( 1, 'ZUNMLQ', SIDE // TRANS, M, N, K,
+     $        -1 ) )
+         LWKOPT = MAX( 1, NW )*NB
+         WORK( 1 ) = LWKOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZUNMLQ', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      NBMIN = 2
+      LDWORK = NW
+      IF( NB.GT.1 .AND. NB.LT.K ) THEN
+         IWS = NW*NB
+         IF( LWORK.LT.IWS ) THEN
+            NB = LWORK / LDWORK
+            NBMIN = MAX( 2, ILAENV( 2, 'ZUNMLQ', SIDE // TRANS, M, N, K,
+     $              -1 ) )
+         END IF
+      ELSE
+         IWS = NW
+      END IF
+*
+      IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN
+*
+*        Use unblocked code
+*
+         CALL ZUNML2( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, WORK,
+     $                IINFO )
+      ELSE
+*
+*        Use blocked code
+*
+         IF( ( LEFT .AND. NOTRAN ) .OR.
+     $       ( .NOT.LEFT .AND. .NOT.NOTRAN ) ) THEN
+            I1 = 1
+            I2 = K
+            I3 = NB
+         ELSE
+            I1 = ( ( K-1 ) / NB )*NB + 1
+            I2 = 1
+            I3 = -NB
+         END IF
+*
+         IF( LEFT ) THEN
+            NI = N
+            JC = 1
+         ELSE
+            MI = M
+            IC = 1
+         END IF
+*
+         IF( NOTRAN ) THEN
+            TRANST = 'C'
+         ELSE
+            TRANST = 'N'
+         END IF
+*
+         DO 10 I = I1, I2, I3
+            IB = MIN( NB, K-I+1 )
+*
+*           Form the triangular factor of the block reflector
+*           H = H(i) H(i+1) . . . H(i+ib-1)
+*
+            CALL ZLARFT( 'Forward', 'Rowwise', NQ-I+1, IB, A( I, I ),
+     $                   LDA, TAU( I ), T, LDT )
+            IF( LEFT ) THEN
+*
+*              H or H' is applied to C(i:m,1:n)
+*
+               MI = M - I + 1
+               IC = I
+            ELSE
+*
+*              H or H' is applied to C(1:m,i:n)
+*
+               NI = N - I + 1
+               JC = I
+            END IF
+*
+*           Apply H or H'
+*
+            CALL ZLARFB( SIDE, TRANST, 'Forward', 'Rowwise', MI, NI, IB,
+     $                   A( I, I ), LDA, T, LDT, C( IC, JC ), LDC, WORK,
+     $                   LDWORK )
+   10    CONTINUE
+      END IF
+      WORK( 1 ) = LWKOPT
+      RETURN
+*
+*     End of ZUNMLQ
+*
+      END
diff --git a/costa/native/external/lapack/zunmql.f b/costa/native/external/lapack/zunmql.f
new file mode 100644
index 000000000..0ba198a05
--- /dev/null
+++ b/costa/native/external/lapack/zunmql.f
@@ -0,0 +1,257 @@
+      SUBROUTINE ZUNMQL( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC,
+     $                   WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          SIDE, TRANS
+      INTEGER            INFO, K, LDA, LDC, LWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZUNMQL overwrites the general complex M-by-N matrix C with
+*
+*                  SIDE = 'L'     SIDE = 'R'
+*  TRANS = 'N':      Q * C          C * Q
+*  TRANS = 'C':      Q**H * C       C * Q**H
+*
+*  where Q is a complex unitary matrix defined as the product of k
+*  elementary reflectors
+*
+*        Q = H(k) . . . H(2) H(1)
+*
+*  as returned by ZGEQLF. Q is of order M if SIDE = 'L' and of order N
+*  if SIDE = 'R'.
+*
+*  Arguments
+*  =========
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'L': apply Q or Q**H from the Left;
+*          = 'R': apply Q or Q**H from the Right.
+*
+*  TRANS   (input) CHARACTER*1
+*          = 'N':  No transpose, apply Q;
+*          = 'C':  Transpose, apply Q**H.
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix C. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix C. N >= 0.
+*
+*  K       (input) INTEGER
+*          The number of elementary reflectors whose product defines
+*          the matrix Q.
+*          If SIDE = 'L', M >= K >= 0;
+*          if SIDE = 'R', N >= K >= 0.
+*
+*  A       (input) COMPLEX*16 array, dimension (LDA,K)
+*          The i-th column must contain the vector which defines the
+*          elementary reflector H(i), for i = 1,2,...,k, as returned by
+*          ZGEQLF in the last k columns of its array argument A.
+*          A is modified by the routine but restored on exit.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.
+*          If SIDE = 'L', LDA >= max(1,M);
+*          if SIDE = 'R', LDA >= max(1,N).
+*
+*  TAU     (input) COMPLEX*16 array, dimension (K)
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by ZGEQLF.
+*
+*  C       (input/output) COMPLEX*16 array, dimension (LDC,N)
+*          On entry, the M-by-N matrix C.
+*          On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C. LDC >= max(1,M).
+*
+*  WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.
+*          If SIDE = 'L', LWORK >= max(1,N);
+*          if SIDE = 'R', LWORK >= max(1,M).
+*          For optimum performance LWORK >= N*NB if SIDE = 'L', and
+*          LWORK >= M*NB if SIDE = 'R', where NB is the optimal
+*          blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            NBMAX, LDT
+      PARAMETER          ( NBMAX = 64, LDT = NBMAX+1 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LEFT, LQUERY, NOTRAN
+      INTEGER            I, I1, I2, I3, IB, IINFO, IWS, LDWORK, LWKOPT,
+     $                   MI, NB, NBMIN, NI, NQ, NW
+*     ..
+*     .. Local Arrays ..
+      COMPLEX*16         T( LDT, NBMAX )
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           LSAME, ILAENV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZLARFB, ZLARFT, ZUNM2L
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      LEFT = LSAME( SIDE, 'L' )
+      NOTRAN = LSAME( TRANS, 'N' )
+      LQUERY = ( LWORK.EQ.-1 )
+*
+*     NQ is the order of Q and NW is the minimum dimension of WORK
+*
+      IF( LEFT ) THEN
+         NQ = M
+         NW = N
+      ELSE
+         NQ = N
+         NW = M
+      END IF
+      IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN
+         INFO = -2
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN
+         INFO = -5
+      ELSE IF( LDA.LT.MAX( 1, NQ ) ) THEN
+         INFO = -7
+      ELSE IF( LDC.LT.MAX( 1, M ) ) THEN
+         INFO = -10
+      ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN
+         INFO = -12
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+*
+*        Determine the block size.  NB may be at most NBMAX, where NBMAX
+*        is used to define the local array T.
+*
+         NB = MIN( NBMAX, ILAENV( 1, 'ZUNMQL', SIDE // TRANS, M, N, K,
+     $        -1 ) )
+         LWKOPT = MAX( 1, NW )*NB
+         WORK( 1 ) = LWKOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZUNMQL', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      NBMIN = 2
+      LDWORK = NW
+      IF( NB.GT.1 .AND. NB.LT.K ) THEN
+         IWS = NW*NB
+         IF( LWORK.LT.IWS ) THEN
+            NB = LWORK / LDWORK
+            NBMIN = MAX( 2, ILAENV( 2, 'ZUNMQL', SIDE // TRANS, M, N, K,
+     $              -1 ) )
+         END IF
+      ELSE
+         IWS = NW
+      END IF
+*
+      IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN
+*
+*        Use unblocked code
+*
+         CALL ZUNM2L( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, WORK,
+     $                IINFO )
+      ELSE
+*
+*        Use blocked code
+*
+         IF( ( LEFT .AND. NOTRAN ) .OR.
+     $       ( .NOT.LEFT .AND. .NOT.NOTRAN ) ) THEN
+            I1 = 1
+            I2 = K
+            I3 = NB
+         ELSE
+            I1 = ( ( K-1 ) / NB )*NB + 1
+            I2 = 1
+            I3 = -NB
+         END IF
+*
+         IF( LEFT ) THEN
+            NI = N
+         ELSE
+            MI = M
+         END IF
+*
+         DO 10 I = I1, I2, I3
+            IB = MIN( NB, K-I+1 )
+*
+*           Form the triangular factor of the block reflector
+*           H = H(i+ib-1) . . . H(i+1) H(i)
+*
+            CALL ZLARFT( 'Backward', 'Columnwise', NQ-K+I+IB-1, IB,
+     $                   A( 1, I ), LDA, TAU( I ), T, LDT )
+            IF( LEFT ) THEN
+*
+*              H or H' is applied to C(1:m-k+i+ib-1,1:n)
+*
+               MI = M - K + I + IB - 1
+            ELSE
+*
+*              H or H' is applied to C(1:m,1:n-k+i+ib-1)
+*
+               NI = N - K + I + IB - 1
+            END IF
+*
+*           Apply H or H'
+*
+            CALL ZLARFB( SIDE, TRANS, 'Backward', 'Columnwise', MI, NI,
+     $                   IB, A( 1, I ), LDA, T, LDT, C, LDC, WORK,
+     $                   LDWORK )
+   10    CONTINUE
+      END IF
+      WORK( 1 ) = LWKOPT
+      RETURN
+*
+*     End of ZUNMQL
+*
+      END
diff --git a/costa/native/external/lapack/zunmqr.f b/costa/native/external/lapack/zunmqr.f
new file mode 100644
index 000000000..54168a04c
--- /dev/null
+++ b/costa/native/external/lapack/zunmqr.f
@@ -0,0 +1,261 @@
+      SUBROUTINE ZUNMQR( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC,
+     $                   WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          SIDE, TRANS
+      INTEGER            INFO, K, LDA, LDC, LWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZUNMQR overwrites the general complex M-by-N matrix C with
+*
+*                  SIDE = 'L'     SIDE = 'R'
+*  TRANS = 'N':      Q * C          C * Q
+*  TRANS = 'C':      Q**H * C       C * Q**H
+*
+*  where Q is a complex unitary matrix defined as the product of k
+*  elementary reflectors
+*
+*        Q = H(1) H(2) . . . H(k)
+*
+*  as returned by ZGEQRF. Q is of order M if SIDE = 'L' and of order N
+*  if SIDE = 'R'.
+*
+*  Arguments
+*  =========
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'L': apply Q or Q**H from the Left;
+*          = 'R': apply Q or Q**H from the Right.
+*
+*  TRANS   (input) CHARACTER*1
+*          = 'N':  No transpose, apply Q;
+*          = 'C':  Conjugate transpose, apply Q**H.
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix C. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix C. N >= 0.
+*
+*  K       (input) INTEGER
+*          The number of elementary reflectors whose product defines
+*          the matrix Q.
+*          If SIDE = 'L', M >= K >= 0;
+*          if SIDE = 'R', N >= K >= 0.
+*
+*  A       (input) COMPLEX*16 array, dimension (LDA,K)
+*          The i-th column must contain the vector which defines the
+*          elementary reflector H(i), for i = 1,2,...,k, as returned by
+*          ZGEQRF in the first k columns of its array argument A.
+*          A is modified by the routine but restored on exit.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.
+*          If SIDE = 'L', LDA >= max(1,M);
+*          if SIDE = 'R', LDA >= max(1,N).
+*
+*  TAU     (input) COMPLEX*16 array, dimension (K)
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by ZGEQRF.
+*
+*  C       (input/output) COMPLEX*16 array, dimension (LDC,N)
+*          On entry, the M-by-N matrix C.
+*          On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C. LDC >= max(1,M).
+*
+*  WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.
+*          If SIDE = 'L', LWORK >= max(1,N);
+*          if SIDE = 'R', LWORK >= max(1,M).
+*          For optimum performance LWORK >= N*NB if SIDE = 'L', and
+*          LWORK >= M*NB if SIDE = 'R', where NB is the optimal
+*          blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            NBMAX, LDT
+      PARAMETER          ( NBMAX = 64, LDT = NBMAX+1 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LEFT, LQUERY, NOTRAN
+      INTEGER            I, I1, I2, I3, IB, IC, IINFO, IWS, JC, LDWORK,
+     $                   LWKOPT, MI, NB, NBMIN, NI, NQ, NW
+*     ..
+*     .. Local Arrays ..
+      COMPLEX*16         T( LDT, NBMAX )
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           LSAME, ILAENV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZLARFB, ZLARFT, ZUNM2R
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      LEFT = LSAME( SIDE, 'L' )
+      NOTRAN = LSAME( TRANS, 'N' )
+      LQUERY = ( LWORK.EQ.-1 )
+*
+*     NQ is the order of Q and NW is the minimum dimension of WORK
+*
+      IF( LEFT ) THEN
+         NQ = M
+         NW = N
+      ELSE
+         NQ = N
+         NW = M
+      END IF
+      IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN
+         INFO = -2
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN
+         INFO = -5
+      ELSE IF( LDA.LT.MAX( 1, NQ ) ) THEN
+         INFO = -7
+      ELSE IF( LDC.LT.MAX( 1, M ) ) THEN
+         INFO = -10
+      ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN
+         INFO = -12
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+*
+*        Determine the block size.  NB may be at most NBMAX, where NBMAX
+*        is used to define the local array T.
+*
+         NB = MIN( NBMAX, ILAENV( 1, 'ZUNMQR', SIDE // TRANS, M, N, K,
+     $        -1 ) )
+         LWKOPT = MAX( 1, NW )*NB
+         WORK( 1 ) = LWKOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZUNMQR', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      NBMIN = 2
+      LDWORK = NW
+      IF( NB.GT.1 .AND. NB.LT.K ) THEN
+         IWS = NW*NB
+         IF( LWORK.LT.IWS ) THEN
+            NB = LWORK / LDWORK
+            NBMIN = MAX( 2, ILAENV( 2, 'ZUNMQR', SIDE // TRANS, M, N, K,
+     $              -1 ) )
+         END IF
+      ELSE
+         IWS = NW
+      END IF
+*
+      IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN
+*
+*        Use unblocked code
+*
+         CALL ZUNM2R( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, WORK,
+     $                IINFO )
+      ELSE
+*
+*        Use blocked code
+*
+         IF( ( LEFT .AND. .NOT.NOTRAN ) .OR.
+     $       ( .NOT.LEFT .AND. NOTRAN ) ) THEN
+            I1 = 1
+            I2 = K
+            I3 = NB
+         ELSE
+            I1 = ( ( K-1 ) / NB )*NB + 1
+            I2 = 1
+            I3 = -NB
+         END IF
+*
+         IF( LEFT ) THEN
+            NI = N
+            JC = 1
+         ELSE
+            MI = M
+            IC = 1
+         END IF
+*
+         DO 10 I = I1, I2, I3
+            IB = MIN( NB, K-I+1 )
+*
+*           Form the triangular factor of the block reflector
+*           H = H(i) H(i+1) . . . H(i+ib-1)
+*
+            CALL ZLARFT( 'Forward', 'Columnwise', NQ-I+1, IB, A( I, I ),
+     $                   LDA, TAU( I ), T, LDT )
+            IF( LEFT ) THEN
+*
+*              H or H' is applied to C(i:m,1:n)
+*
+               MI = M - I + 1
+               IC = I
+            ELSE
+*
+*              H or H' is applied to C(1:m,i:n)
+*
+               NI = N - I + 1
+               JC = I
+            END IF
+*
+*           Apply H or H'
+*
+            CALL ZLARFB( SIDE, TRANS, 'Forward', 'Columnwise', MI, NI,
+     $                   IB, A( I, I ), LDA, T, LDT, C( IC, JC ), LDC,
+     $                   WORK, LDWORK )
+   10    CONTINUE
+      END IF
+      WORK( 1 ) = LWKOPT
+      RETURN
+*
+*     End of ZUNMQR
+*
+      END
diff --git a/costa/native/external/lapack/zunmr2.f b/costa/native/external/lapack/zunmr2.f
new file mode 100644
index 000000000..15f403efc
--- /dev/null
+++ b/costa/native/external/lapack/zunmr2.f
@@ -0,0 +1,199 @@
+      SUBROUTINE ZUNMR2( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC,
+     $                   WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          SIDE, TRANS
+      INTEGER            INFO, K, LDA, LDC, M, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZUNMR2 overwrites the general complex m-by-n matrix C with
+*
+*        Q * C  if SIDE = 'L' and TRANS = 'N', or
+*
+*        Q'* C  if SIDE = 'L' and TRANS = 'C', or
+*
+*        C * Q  if SIDE = 'R' and TRANS = 'N', or
+*
+*        C * Q' if SIDE = 'R' and TRANS = 'C',
+*
+*  where Q is a complex unitary matrix defined as the product of k
+*  elementary reflectors
+*
+*        Q = H(1)' H(2)' . . . H(k)'
+*
+*  as returned by ZGERQF. Q is of order m if SIDE = 'L' and of order n
+*  if SIDE = 'R'.
+*
+*  Arguments
+*  =========
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'L': apply Q or Q' from the Left
+*          = 'R': apply Q or Q' from the Right
+*
+*  TRANS   (input) CHARACTER*1
+*          = 'N': apply Q  (No transpose)
+*          = 'C': apply Q' (Conjugate transpose)
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix C. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix C. N >= 0.
+*
+*  K       (input) INTEGER
+*          The number of elementary reflectors whose product defines
+*          the matrix Q.
+*          If SIDE = 'L', M >= K >= 0;
+*          if SIDE = 'R', N >= K >= 0.
+*
+*  A       (input) COMPLEX*16 array, dimension
+*                               (LDA,M) if SIDE = 'L',
+*                               (LDA,N) if SIDE = 'R'
+*          The i-th row must contain the vector which defines the
+*          elementary reflector H(i), for i = 1,2,...,k, as returned by
+*          ZGERQF in the last k rows of its array argument A.
+*          A is modified by the routine but restored on exit.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,K).
+*
+*  TAU     (input) COMPLEX*16 array, dimension (K)
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by ZGERQF.
+*
+*  C       (input/output) COMPLEX*16 array, dimension (LDC,N)
+*          On entry, the m-by-n matrix C.
+*          On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C. LDC >= max(1,M).
+*
+*  WORK    (workspace) COMPLEX*16 array, dimension
+*                                   (N) if SIDE = 'L',
+*                                   (M) if SIDE = 'R'
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX*16         ONE
+      PARAMETER          ( ONE = ( 1.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LEFT, NOTRAN
+      INTEGER            I, I1, I2, I3, MI, NI, NQ
+      COMPLEX*16         AII, TAUI
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZLACGV, ZLARF
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          DCONJG, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      LEFT = LSAME( SIDE, 'L' )
+      NOTRAN = LSAME( TRANS, 'N' )
+*
+*     NQ is the order of Q
+*
+      IF( LEFT ) THEN
+         NQ = M
+      ELSE
+         NQ = N
+      END IF
+      IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN
+         INFO = -2
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN
+         INFO = -5
+      ELSE IF( LDA.LT.MAX( 1, K ) ) THEN
+         INFO = -7
+      ELSE IF( LDC.LT.MAX( 1, M ) ) THEN
+         INFO = -10
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZUNMR2', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 )
+     $   RETURN
+*
+      IF( ( LEFT .AND. .NOT.NOTRAN .OR. .NOT.LEFT .AND. NOTRAN ) ) THEN
+         I1 = 1
+         I2 = K
+         I3 = 1
+      ELSE
+         I1 = K
+         I2 = 1
+         I3 = -1
+      END IF
+*
+      IF( LEFT ) THEN
+         NI = N
+      ELSE
+         MI = M
+      END IF
+*
+      DO 10 I = I1, I2, I3
+         IF( LEFT ) THEN
+*
+*           H(i) or H(i)' is applied to C(1:m-k+i,1:n)
+*
+            MI = M - K + I
+         ELSE
+*
+*           H(i) or H(i)' is applied to C(1:m,1:n-k+i)
+*
+            NI = N - K + I
+         END IF
+*
+*        Apply H(i) or H(i)'
+*
+         IF( NOTRAN ) THEN
+            TAUI = DCONJG( TAU( I ) )
+         ELSE
+            TAUI = TAU( I )
+         END IF
+         CALL ZLACGV( NQ-K+I-1, A( I, 1 ), LDA )
+         AII = A( I, NQ-K+I )
+         A( I, NQ-K+I ) = ONE
+         CALL ZLARF( SIDE, MI, NI, A( I, 1 ), LDA, TAUI, C, LDC, WORK )
+         A( I, NQ-K+I ) = AII
+         CALL ZLACGV( NQ-K+I-1, A( I, 1 ), LDA )
+   10 CONTINUE
+      RETURN
+*
+*     End of ZUNMR2
+*
+      END
diff --git a/costa/native/external/lapack/zunmr3.f b/costa/native/external/lapack/zunmr3.f
new file mode 100644
index 000000000..204b7bfd1
--- /dev/null
+++ b/costa/native/external/lapack/zunmr3.f
@@ -0,0 +1,213 @@
+      SUBROUTINE ZUNMR3( SIDE, TRANS, M, N, K, L, A, LDA, TAU, C, LDC,
+     $                   WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          SIDE, TRANS
+      INTEGER            INFO, K, L, LDA, LDC, M, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZUNMR3 overwrites the general complex m by n matrix C with
+*
+*        Q * C  if SIDE = 'L' and TRANS = 'N', or
+*
+*        Q'* C  if SIDE = 'L' and TRANS = 'C', or
+*
+*        C * Q  if SIDE = 'R' and TRANS = 'N', or
+*
+*        C * Q' if SIDE = 'R' and TRANS = 'C',
+*
+*  where Q is a complex unitary matrix defined as the product of k
+*  elementary reflectors
+*
+*        Q = H(1) H(2) . . . H(k)
+*
+*  as returned by ZTZRZF. Q is of order m if SIDE = 'L' and of order n
+*  if SIDE = 'R'.
+*
+*  Arguments
+*  =========
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'L': apply Q or Q' from the Left
+*          = 'R': apply Q or Q' from the Right
+*
+*  TRANS   (input) CHARACTER*1
+*          = 'N': apply Q  (No transpose)
+*          = 'C': apply Q' (Conjugate transpose)
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix C. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix C. N >= 0.
+*
+*  K       (input) INTEGER
+*          The number of elementary reflectors whose product defines
+*          the matrix Q.
+*          If SIDE = 'L', M >= K >= 0;
+*          if SIDE = 'R', N >= K >= 0.
+*
+*  L       (input) INTEGER
+*          The number of columns of the matrix A containing
+*          the meaningful part of the Householder reflectors.
+*          If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0.
+*
+*  A       (input) COMPLEX*16 array, dimension
+*                               (LDA,M) if SIDE = 'L',
+*                               (LDA,N) if SIDE = 'R'
+*          The i-th row must contain the vector which defines the
+*          elementary reflector H(i), for i = 1,2,...,k, as returned by
+*          ZTZRZF in the last k rows of its array argument A.
+*          A is modified by the routine but restored on exit.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,K).
+*
+*  TAU     (input) COMPLEX*16 array, dimension (K)
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by ZTZRZF.
+*
+*  C       (input/output) COMPLEX*16 array, dimension (LDC,N)
+*          On entry, the m-by-n matrix C.
+*          On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C. LDC >= max(1,M).
+*
+*  WORK    (workspace) COMPLEX*16 array, dimension
+*                                   (N) if SIDE = 'L',
+*                                   (M) if SIDE = 'R'
+*
+*  INFO    (output) INTEGER
+*          = 0: successful exit
+*          < 0: if INFO = -i, the i-th argument had an illegal value
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*    A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            LEFT, NOTRAN
+      INTEGER            I, I1, I2, I3, IC, JA, JC, MI, NI, NQ
+      COMPLEX*16         TAUI
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZLARZ
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          DCONJG, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      LEFT = LSAME( SIDE, 'L' )
+      NOTRAN = LSAME( TRANS, 'N' )
+*
+*     NQ is the order of Q
+*
+      IF( LEFT ) THEN
+         NQ = M
+      ELSE
+         NQ = N
+      END IF
+      IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN
+         INFO = -2
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN
+         INFO = -5
+      ELSE IF( L.LT.0 .OR. ( LEFT .AND. ( L.GT.M ) ) .OR.
+     $         ( .NOT.LEFT .AND. ( L.GT.N ) ) ) THEN
+         INFO = -6
+      ELSE IF( LDA.LT.MAX( 1, K ) ) THEN
+         INFO = -8
+      ELSE IF( LDC.LT.MAX( 1, M ) ) THEN
+         INFO = -11
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZUNMR3', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 )
+     $   RETURN
+*
+      IF( ( LEFT .AND. .NOT.NOTRAN .OR. .NOT.LEFT .AND. NOTRAN ) ) THEN
+         I1 = 1
+         I2 = K
+         I3 = 1
+      ELSE
+         I1 = K
+         I2 = 1
+         I3 = -1
+      END IF
+*
+      IF( LEFT ) THEN
+         NI = N
+         JA = M - L + 1
+         JC = 1
+      ELSE
+         MI = M
+         JA = N - L + 1
+         IC = 1
+      END IF
+*
+      DO 10 I = I1, I2, I3
+         IF( LEFT ) THEN
+*
+*           H(i) or H(i)' is applied to C(i:m,1:n)
+*
+            MI = M - I + 1
+            IC = I
+         ELSE
+*
+*           H(i) or H(i)' is applied to C(1:m,i:n)
+*
+            NI = N - I + 1
+            JC = I
+         END IF
+*
+*        Apply H(i) or H(i)'
+*
+         IF( NOTRAN ) THEN
+            TAUI = TAU( I )
+         ELSE
+            TAUI = DCONJG( TAU( I ) )
+         END IF
+         CALL ZLARZ( SIDE, MI, NI, L, A( I, JA ), LDA, TAUI,
+     $               C( IC, JC ), LDC, WORK )
+*
+   10 CONTINUE
+*
+      RETURN
+*
+*     End of ZUNMR3
+*
+      END
diff --git a/costa/native/external/lapack/zunmrq.f b/costa/native/external/lapack/zunmrq.f
new file mode 100644
index 000000000..edee06e96
--- /dev/null
+++ b/costa/native/external/lapack/zunmrq.f
@@ -0,0 +1,264 @@
+      SUBROUTINE ZUNMRQ( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC,
+     $                   WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          SIDE, TRANS
+      INTEGER            INFO, K, LDA, LDC, LWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZUNMRQ overwrites the general complex M-by-N matrix C with
+*
+*                  SIDE = 'L'     SIDE = 'R'
+*  TRANS = 'N':      Q * C          C * Q
+*  TRANS = 'C':      Q**H * C       C * Q**H
+*
+*  where Q is a complex unitary matrix defined as the product of k
+*  elementary reflectors
+*
+*        Q = H(1)' H(2)' . . . H(k)'
+*
+*  as returned by ZGERQF. Q is of order M if SIDE = 'L' and of order N
+*  if SIDE = 'R'.
+*
+*  Arguments
+*  =========
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'L': apply Q or Q**H from the Left;
+*          = 'R': apply Q or Q**H from the Right.
+*
+*  TRANS   (input) CHARACTER*1
+*          = 'N':  No transpose, apply Q;
+*          = 'C':  Transpose, apply Q**H.
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix C. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix C. N >= 0.
+*
+*  K       (input) INTEGER
+*          The number of elementary reflectors whose product defines
+*          the matrix Q.
+*          If SIDE = 'L', M >= K >= 0;
+*          if SIDE = 'R', N >= K >= 0.
+*
+*  A       (input) COMPLEX*16 array, dimension
+*                               (LDA,M) if SIDE = 'L',
+*                               (LDA,N) if SIDE = 'R'
+*          The i-th row must contain the vector which defines the
+*          elementary reflector H(i), for i = 1,2,...,k, as returned by
+*          ZGERQF in the last k rows of its array argument A.
+*          A is modified by the routine but restored on exit.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,K).
+*
+*  TAU     (input) COMPLEX*16 array, dimension (K)
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by ZGERQF.
+*
+*  C       (input/output) COMPLEX*16 array, dimension (LDC,N)
+*          On entry, the M-by-N matrix C.
+*          On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C. LDC >= max(1,M).
+*
+*  WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.
+*          If SIDE = 'L', LWORK >= max(1,N);
+*          if SIDE = 'R', LWORK >= max(1,M).
+*          For optimum performance LWORK >= N*NB if SIDE = 'L', and
+*          LWORK >= M*NB if SIDE = 'R', where NB is the optimal
+*          blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            NBMAX, LDT
+      PARAMETER          ( NBMAX = 64, LDT = NBMAX+1 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LEFT, LQUERY, NOTRAN
+      CHARACTER          TRANST
+      INTEGER            I, I1, I2, I3, IB, IINFO, IWS, LDWORK, LWKOPT,
+     $                   MI, NB, NBMIN, NI, NQ, NW
+*     ..
+*     .. Local Arrays ..
+      COMPLEX*16         T( LDT, NBMAX )
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           LSAME, ILAENV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZLARFB, ZLARFT, ZUNMR2
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      LEFT = LSAME( SIDE, 'L' )
+      NOTRAN = LSAME( TRANS, 'N' )
+      LQUERY = ( LWORK.EQ.-1 )
+*
+*     NQ is the order of Q and NW is the minimum dimension of WORK
+*
+      IF( LEFT ) THEN
+         NQ = M
+         NW = N
+      ELSE
+         NQ = N
+         NW = M
+      END IF
+      IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN
+         INFO = -2
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN
+         INFO = -5
+      ELSE IF( LDA.LT.MAX( 1, K ) ) THEN
+         INFO = -7
+      ELSE IF( LDC.LT.MAX( 1, M ) ) THEN
+         INFO = -10
+      ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN
+         INFO = -12
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+*
+*        Determine the block size.  NB may be at most NBMAX, where NBMAX
+*        is used to define the local array T.
+*
+         NB = MIN( NBMAX, ILAENV( 1, 'ZUNMRQ', SIDE // TRANS, M, N, K,
+     $        -1 ) )
+         LWKOPT = MAX( 1, NW )*NB
+         WORK( 1 ) = LWKOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZUNMRQ', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      NBMIN = 2
+      LDWORK = NW
+      IF( NB.GT.1 .AND. NB.LT.K ) THEN
+         IWS = NW*NB
+         IF( LWORK.LT.IWS ) THEN
+            NB = LWORK / LDWORK
+            NBMIN = MAX( 2, ILAENV( 2, 'ZUNMRQ', SIDE // TRANS, M, N, K,
+     $              -1 ) )
+         END IF
+      ELSE
+         IWS = NW
+      END IF
+*
+      IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN
+*
+*        Use unblocked code
+*
+         CALL ZUNMR2( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC, WORK,
+     $                IINFO )
+      ELSE
+*
+*        Use blocked code
+*
+         IF( ( LEFT .AND. .NOT.NOTRAN ) .OR.
+     $       ( .NOT.LEFT .AND. NOTRAN ) ) THEN
+            I1 = 1
+            I2 = K
+            I3 = NB
+         ELSE
+            I1 = ( ( K-1 ) / NB )*NB + 1
+            I2 = 1
+            I3 = -NB
+         END IF
+*
+         IF( LEFT ) THEN
+            NI = N
+         ELSE
+            MI = M
+         END IF
+*
+         IF( NOTRAN ) THEN
+            TRANST = 'C'
+         ELSE
+            TRANST = 'N'
+         END IF
+*
+         DO 10 I = I1, I2, I3
+            IB = MIN( NB, K-I+1 )
+*
+*           Form the triangular factor of the block reflector
+*           H = H(i+ib-1) . . . H(i+1) H(i)
+*
+            CALL ZLARFT( 'Backward', 'Rowwise', NQ-K+I+IB-1, IB,
+     $                   A( I, 1 ), LDA, TAU( I ), T, LDT )
+            IF( LEFT ) THEN
+*
+*              H or H' is applied to C(1:m-k+i+ib-1,1:n)
+*
+               MI = M - K + I + IB - 1
+            ELSE
+*
+*              H or H' is applied to C(1:m,1:n-k+i+ib-1)
+*
+               NI = N - K + I + IB - 1
+            END IF
+*
+*           Apply H or H'
+*
+            CALL ZLARFB( SIDE, TRANST, 'Backward', 'Rowwise', MI, NI,
+     $                   IB, A( I, 1 ), LDA, T, LDT, C, LDC, WORK,
+     $                   LDWORK )
+   10    CONTINUE
+      END IF
+      WORK( 1 ) = LWKOPT
+      RETURN
+*
+*     End of ZUNMRQ
+*
+      END
diff --git a/costa/native/external/lapack/zunmrz.f b/costa/native/external/lapack/zunmrz.f
new file mode 100644
index 000000000..39eb863e5
--- /dev/null
+++ b/costa/native/external/lapack/zunmrz.f
@@ -0,0 +1,293 @@
+      SUBROUTINE ZUNMRZ( SIDE, TRANS, M, N, K, L, A, LDA, TAU, C, LDC,
+     $                   WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          SIDE, TRANS
+      INTEGER            INFO, K, L, LDA, LDC, LWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZUNMRZ overwrites the general complex M-by-N matrix C with
+*
+*                  SIDE = 'L'     SIDE = 'R'
+*  TRANS = 'N':      Q * C          C * Q
+*  TRANS = 'C':      Q**H * C       C * Q**H
+*
+*  where Q is a complex unitary matrix defined as the product of k
+*  elementary reflectors
+*
+*        Q = H(1) H(2) . . . H(k)
+*
+*  as returned by ZTZRZF. Q is of order M if SIDE = 'L' and of order N
+*  if SIDE = 'R'.
+*
+*  Arguments
+*  =========
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'L': apply Q or Q**H from the Left;
+*          = 'R': apply Q or Q**H from the Right.
+*
+*  TRANS   (input) CHARACTER*1
+*          = 'N':  No transpose, apply Q;
+*          = 'C':  Conjugate transpose, apply Q**H.
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix C. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix C. N >= 0.
+*
+*  K       (input) INTEGER
+*          The number of elementary reflectors whose product defines
+*          the matrix Q.
+*          If SIDE = 'L', M >= K >= 0;
+*          if SIDE = 'R', N >= K >= 0.
+*
+*  L       (input) INTEGER
+*          The number of columns of the matrix A containing
+*          the meaningful part of the Householder reflectors.
+*          If SIDE = 'L', M >= L >= 0, if SIDE = 'R', N >= L >= 0.
+*
+*  A       (input) COMPLEX*16 array, dimension
+*                               (LDA,M) if SIDE = 'L',
+*                               (LDA,N) if SIDE = 'R'
+*          The i-th row must contain the vector which defines the
+*          elementary reflector H(i), for i = 1,2,...,k, as returned by
+*          ZTZRZF in the last k rows of its array argument A.
+*          A is modified by the routine but restored on exit.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A. LDA >= max(1,K).
+*
+*  TAU     (input) COMPLEX*16 array, dimension (K)
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by ZTZRZF.
+*
+*  C       (input/output) COMPLEX*16 array, dimension (LDC,N)
+*          On entry, the M-by-N matrix C.
+*          On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C. LDC >= max(1,M).
+*
+*  WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.
+*          If SIDE = 'L', LWORK >= max(1,N);
+*          if SIDE = 'R', LWORK >= max(1,M).
+*          For optimum performance LWORK >= N*NB if SIDE = 'L', and
+*          LWORK >= M*NB if SIDE = 'R', where NB is the optimal
+*          blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  Further Details
+*  ===============
+*
+*  Based on contributions by
+*    A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            NBMAX, LDT
+      PARAMETER          ( NBMAX = 64, LDT = NBMAX+1 )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            LEFT, LQUERY, NOTRAN
+      CHARACTER          TRANST
+      INTEGER            I, I1, I2, I3, IB, IC, IINFO, IWS, JA, JC,
+     $                   LDWORK, LWKOPT, MI, NB, NBMIN, NI, NQ, NW
+*     ..
+*     .. Local Arrays ..
+      COMPLEX*16         T( LDT, NBMAX )
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           LSAME, ILAENV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZLARZB, ZLARZT, ZUNMR3
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX, MIN
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      LEFT = LSAME( SIDE, 'L' )
+      NOTRAN = LSAME( TRANS, 'N' )
+      LQUERY = ( LWORK.EQ.-1 )
+*
+*     NQ is the order of Q and NW is the minimum dimension of WORK
+*
+      IF( LEFT ) THEN
+         NQ = M
+         NW = N
+      ELSE
+         NQ = N
+         NW = M
+      END IF
+      IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN
+         INFO = -2
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( K.LT.0 .OR. K.GT.NQ ) THEN
+         INFO = -5
+      ELSE IF( L.LT.0 .OR. ( LEFT .AND. ( L.GT.M ) ) .OR.
+     $         ( .NOT.LEFT .AND. ( L.GT.N ) ) ) THEN
+         INFO = -6
+      ELSE IF( LDA.LT.MAX( 1, K ) ) THEN
+         INFO = -8
+      ELSE IF( LDC.LT.MAX( 1, M ) ) THEN
+         INFO = -11
+      ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN
+         INFO = -13
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+*
+*        Determine the block size.  NB may be at most NBMAX, where NBMAX
+*        is used to define the local array T.
+*
+         NB = MIN( NBMAX, ILAENV( 1, 'ZUNMRQ', SIDE // TRANS, M, N, K,
+     $        -1 ) )
+         LWKOPT = MAX( 1, NW )*NB
+         WORK( 1 ) = LWKOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZUNMRZ', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 .OR. K.EQ.0 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+*     Determine the block size.  NB may be at most NBMAX, where NBMAX
+*     is used to define the local array T.
+*
+      NB = MIN( NBMAX, ILAENV( 1, 'ZUNMRQ', SIDE // TRANS, M, N, K,
+     $     -1 ) )
+      NBMIN = 2
+      LDWORK = NW
+      IF( NB.GT.1 .AND. NB.LT.K ) THEN
+         IWS = NW*NB
+         IF( LWORK.LT.IWS ) THEN
+            NB = LWORK / LDWORK
+            NBMIN = MAX( 2, ILAENV( 2, 'ZUNMRQ', SIDE // TRANS, M, N, K,
+     $              -1 ) )
+         END IF
+      ELSE
+         IWS = NW
+      END IF
+*
+      IF( NB.LT.NBMIN .OR. NB.GE.K ) THEN
+*
+*        Use unblocked code
+*
+         CALL ZUNMR3( SIDE, TRANS, M, N, K, L, A, LDA, TAU, C, LDC,
+     $                WORK, IINFO )
+      ELSE
+*
+*        Use blocked code
+*
+         IF( ( LEFT .AND. .NOT.NOTRAN ) .OR.
+     $       ( .NOT.LEFT .AND. NOTRAN ) ) THEN
+            I1 = 1
+            I2 = K
+            I3 = NB
+         ELSE
+            I1 = ( ( K-1 ) / NB )*NB + 1
+            I2 = 1
+            I3 = -NB
+         END IF
+*
+         IF( LEFT ) THEN
+            NI = N
+            JC = 1
+            JA = M - L + 1
+         ELSE
+            MI = M
+            IC = 1
+            JA = N - L + 1
+         END IF
+*
+         IF( NOTRAN ) THEN
+            TRANST = 'C'
+         ELSE
+            TRANST = 'N'
+         END IF
+*
+         DO 10 I = I1, I2, I3
+            IB = MIN( NB, K-I+1 )
+*
+*           Form the triangular factor of the block reflector
+*           H = H(i+ib-1) . . . H(i+1) H(i)
+*
+            CALL ZLARZT( 'Backward', 'Rowwise', L, IB, A( I, JA ), LDA,
+     $                   TAU( I ), T, LDT )
+*
+            IF( LEFT ) THEN
+*
+*              H or H' is applied to C(i:m,1:n)
+*
+               MI = M - I + 1
+               IC = I
+            ELSE
+*
+*              H or H' is applied to C(1:m,i:n)
+*
+               NI = N - I + 1
+               JC = I
+            END IF
+*
+*           Apply H or H'
+*
+            CALL ZLARZB( SIDE, TRANST, 'Backward', 'Rowwise', MI, NI,
+     $                   IB, L, A( I, JA ), LDA, T, LDT, C( IC, JC ),
+     $                   LDC, WORK, LDWORK )
+   10    CONTINUE
+*
+      END IF
+*
+      WORK( 1 ) = LWKOPT
+*
+      RETURN
+*
+*     End of ZUNMRZ
+*
+      END
diff --git a/costa/native/external/lapack/zunmtr.f b/costa/native/external/lapack/zunmtr.f
new file mode 100644
index 000000000..8fc2f9fda
--- /dev/null
+++ b/costa/native/external/lapack/zunmtr.f
@@ -0,0 +1,223 @@
+      SUBROUTINE ZUNMTR( SIDE, UPLO, TRANS, M, N, A, LDA, TAU, C, LDC,
+     $                   WORK, LWORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     June 30, 1999
+*
+*     .. Scalar Arguments ..
+      CHARACTER          SIDE, TRANS, UPLO
+      INTEGER            INFO, LDA, LDC, LWORK, M, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * ), C( LDC, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZUNMTR overwrites the general complex M-by-N matrix C with
+*
+*                  SIDE = 'L'     SIDE = 'R'
+*  TRANS = 'N':      Q * C          C * Q
+*  TRANS = 'C':      Q**H * C       C * Q**H
+*
+*  where Q is a complex unitary matrix of order nq, with nq = m if
+*  SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of
+*  nq-1 elementary reflectors, as returned by ZHETRD:
+*
+*  if UPLO = 'U', Q = H(nq-1) . . . H(2) H(1);
+*
+*  if UPLO = 'L', Q = H(1) H(2) . . . H(nq-1).
+*
+*  Arguments
+*  =========
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'L': apply Q or Q**H from the Left;
+*          = 'R': apply Q or Q**H from the Right.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U': Upper triangle of A contains elementary reflectors
+*                 from ZHETRD;
+*          = 'L': Lower triangle of A contains elementary reflectors
+*                 from ZHETRD.
+*
+*  TRANS   (input) CHARACTER*1
+*          = 'N':  No transpose, apply Q;
+*          = 'C':  Conjugate transpose, apply Q**H.
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix C. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix C. N >= 0.
+*
+*  A       (input) COMPLEX*16 array, dimension
+*                               (LDA,M) if SIDE = 'L'
+*                               (LDA,N) if SIDE = 'R'
+*          The vectors which define the elementary reflectors, as
+*          returned by ZHETRD.
+*
+*  LDA     (input) INTEGER
+*          The leading dimension of the array A.
+*          LDA >= max(1,M) if SIDE = 'L'; LDA >= max(1,N) if SIDE = 'R'.
+*
+*  TAU     (input) COMPLEX*16 array, dimension
+*                               (M-1) if SIDE = 'L'
+*                               (N-1) if SIDE = 'R'
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by ZHETRD.
+*
+*  C       (input/output) COMPLEX*16 array, dimension (LDC,N)
+*          On entry, the M-by-N matrix C.
+*          On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C. LDC >= max(1,M).
+*
+*  WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*
+*  LWORK   (input) INTEGER
+*          The dimension of the array WORK.
+*          If SIDE = 'L', LWORK >= max(1,N);
+*          if SIDE = 'R', LWORK >= max(1,M).
+*          For optimum performance LWORK >= N*NB if SIDE = 'L', and
+*          LWORK >=M*NB if SIDE = 'R', where NB is the optimal
+*          blocksize.
+*
+*          If LWORK = -1, then a workspace query is assumed; the routine
+*          only calculates the optimal size of the WORK array, returns
+*          this value as the first entry of the WORK array, and no error
+*          message related to LWORK is issued by XERBLA.
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            LEFT, LQUERY, UPPER
+      INTEGER            I1, I2, IINFO, LWKOPT, MI, NB, NI, NQ, NW
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      INTEGER            ILAENV
+      EXTERNAL           LSAME, ILAENV
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZUNMQL, ZUNMQR
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      LEFT = LSAME( SIDE, 'L' )
+      UPPER = LSAME( UPLO, 'U' )
+      LQUERY = ( LWORK.EQ.-1 )
+*
+*     NQ is the order of Q and NW is the minimum dimension of WORK
+*
+      IF( LEFT ) THEN
+         NQ = M
+         NW = N
+      ELSE
+         NQ = N
+         NW = M
+      END IF
+      IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.LSAME( TRANS, 'N' ) .AND. .NOT.LSAME( TRANS, 'C' ) )
+     $          THEN
+         INFO = -3
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( LDA.LT.MAX( 1, NQ ) ) THEN
+         INFO = -7
+      ELSE IF( LDC.LT.MAX( 1, M ) ) THEN
+         INFO = -10
+      ELSE IF( LWORK.LT.MAX( 1, NW ) .AND. .NOT.LQUERY ) THEN
+         INFO = -12
+      END IF
+*
+      IF( INFO.EQ.0 ) THEN
+         IF( UPPER ) THEN
+            IF( LEFT ) THEN
+               NB = ILAENV( 1, 'ZUNMQL', SIDE // TRANS, M-1, N, M-1,
+     $              -1 )
+            ELSE
+               NB = ILAENV( 1, 'ZUNMQL', SIDE // TRANS, M, N-1, N-1,
+     $              -1 )
+            END IF
+         ELSE
+            IF( LEFT ) THEN
+               NB = ILAENV( 1, 'ZUNMQR', SIDE // TRANS, M-1, N, M-1,
+     $              -1 )
+            ELSE
+               NB = ILAENV( 1, 'ZUNMQR', SIDE // TRANS, M, N-1, N-1,
+     $              -1 )
+            END IF
+         END IF
+         LWKOPT = MAX( 1, NW )*NB
+         WORK( 1 ) = LWKOPT
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZUNMTR', -INFO )
+         RETURN
+      ELSE IF( LQUERY ) THEN
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 .OR. NQ.EQ.1 ) THEN
+         WORK( 1 ) = 1
+         RETURN
+      END IF
+*
+      IF( LEFT ) THEN
+         MI = M - 1
+         NI = N
+      ELSE
+         MI = M
+         NI = N - 1
+      END IF
+*
+      IF( UPPER ) THEN
+*
+*        Q was determined by a call to ZHETRD with UPLO = 'U'
+*
+         CALL ZUNMQL( SIDE, TRANS, MI, NI, NQ-1, A( 1, 2 ), LDA, TAU, C,
+     $                LDC, WORK, LWORK, IINFO )
+      ELSE
+*
+*        Q was determined by a call to ZHETRD with UPLO = 'L'
+*
+         IF( LEFT ) THEN
+            I1 = 2
+            I2 = 1
+         ELSE
+            I1 = 1
+            I2 = 2
+         END IF
+         CALL ZUNMQR( SIDE, TRANS, MI, NI, NQ-1, A( 2, 1 ), LDA, TAU,
+     $                C( I1, I2 ), LDC, WORK, LWORK, IINFO )
+      END IF
+      WORK( 1 ) = LWKOPT
+      RETURN
+*
+*     End of ZUNMTR
+*
+      END
diff --git a/costa/native/external/lapack/zupgtr.f b/costa/native/external/lapack/zupgtr.f
new file mode 100644
index 000000000..d44124bfd
--- /dev/null
+++ b/costa/native/external/lapack/zupgtr.f
@@ -0,0 +1,162 @@
+      SUBROUTINE ZUPGTR( UPLO, N, AP, TAU, Q, LDQ, WORK, INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          UPLO
+      INTEGER            INFO, LDQ, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         AP( * ), Q( LDQ, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZUPGTR generates a complex unitary matrix Q which is defined as the
+*  product of n-1 elementary reflectors H(i) of order n, as returned by
+*  ZHPTRD using packed storage:
+*
+*  if UPLO = 'U', Q = H(n-1) . . . H(2) H(1),
+*
+*  if UPLO = 'L', Q = H(1) H(2) . . . H(n-1).
+*
+*  Arguments
+*  =========
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U': Upper triangular packed storage used in previous
+*                 call to ZHPTRD;
+*          = 'L': Lower triangular packed storage used in previous
+*                 call to ZHPTRD.
+*
+*  N       (input) INTEGER
+*          The order of the matrix Q. N >= 0.
+*
+*  AP      (input) COMPLEX*16 array, dimension (N*(N+1)/2)
+*          The vectors which define the elementary reflectors, as
+*          returned by ZHPTRD.
+*
+*  TAU     (input) COMPLEX*16 array, dimension (N-1)
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by ZHPTRD.
+*
+*  Q       (output) COMPLEX*16 array, dimension (LDQ,N)
+*          The N-by-N unitary matrix Q.
+*
+*  LDQ     (input) INTEGER
+*          The leading dimension of the array Q. LDQ >= max(1,N).
+*
+*  WORK    (workspace) COMPLEX*16 array, dimension (N-1)
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX*16         CZERO, CONE
+      PARAMETER          ( CZERO = ( 0.0D+0, 0.0D+0 ),
+     $                   CONE = ( 1.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            UPPER
+      INTEGER            I, IINFO, IJ, J
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZUNG2L, ZUNG2R
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      UPPER = LSAME( UPLO, 'U' )
+      IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN
+         INFO = -6
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZUPGTR', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.EQ.0 )
+     $   RETURN
+*
+      IF( UPPER ) THEN
+*
+*        Q was determined by a call to ZHPTRD with UPLO = 'U'
+*
+*        Unpack the vectors which define the elementary reflectors and
+*        set the last row and column of Q equal to those of the unit
+*        matrix
+*
+         IJ = 2
+         DO 20 J = 1, N - 1
+            DO 10 I = 1, J - 1
+               Q( I, J ) = AP( IJ )
+               IJ = IJ + 1
+   10       CONTINUE
+            IJ = IJ + 2
+            Q( N, J ) = CZERO
+   20    CONTINUE
+         DO 30 I = 1, N - 1
+            Q( I, N ) = CZERO
+   30    CONTINUE
+         Q( N, N ) = CONE
+*
+*        Generate Q(1:n-1,1:n-1)
+*
+         CALL ZUNG2L( N-1, N-1, N-1, Q, LDQ, TAU, WORK, IINFO )
+*
+      ELSE
+*
+*        Q was determined by a call to ZHPTRD with UPLO = 'L'.
+*
+*        Unpack the vectors which define the elementary reflectors and
+*        set the first row and column of Q equal to those of the unit
+*        matrix
+*
+         Q( 1, 1 ) = CONE
+         DO 40 I = 2, N
+            Q( I, 1 ) = CZERO
+   40    CONTINUE
+         IJ = 3
+         DO 60 J = 2, N
+            Q( 1, J ) = CZERO
+            DO 50 I = J + 1, N
+               Q( I, J ) = AP( IJ )
+               IJ = IJ + 1
+   50       CONTINUE
+            IJ = IJ + 2
+   60    CONTINUE
+         IF( N.GT.1 ) THEN
+*
+*           Generate Q(2:n,2:n)
+*
+            CALL ZUNG2R( N-1, N-1, N-1, Q( 2, 2 ), LDQ, TAU, WORK,
+     $                   IINFO )
+         END IF
+      END IF
+      RETURN
+*
+*     End of ZUPGTR
+*
+      END
diff --git a/costa/native/external/lapack/zupmtr.f b/costa/native/external/lapack/zupmtr.f
new file mode 100644
index 000000000..9156336e8
--- /dev/null
+++ b/costa/native/external/lapack/zupmtr.f
@@ -0,0 +1,268 @@
+      SUBROUTINE ZUPMTR( SIDE, UPLO, TRANS, M, N, AP, TAU, C, LDC, WORK,
+     $                   INFO )
+*
+*  -- LAPACK routine (version 3.0) --
+*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+*     Courant Institute, Argonne National Lab, and Rice University
+*     September 30, 1994
+*
+*     .. Scalar Arguments ..
+      CHARACTER          SIDE, TRANS, UPLO
+      INTEGER            INFO, LDC, M, N
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         AP( * ), C( LDC, * ), TAU( * ), WORK( * )
+*     ..
+*
+*  Purpose
+*  =======
+*
+*  ZUPMTR overwrites the general complex M-by-N matrix C with
+*
+*                  SIDE = 'L'     SIDE = 'R'
+*  TRANS = 'N':      Q * C          C * Q
+*  TRANS = 'C':      Q**H * C       C * Q**H
+*
+*  where Q is a complex unitary matrix of order nq, with nq = m if
+*  SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of
+*  nq-1 elementary reflectors, as returned by ZHPTRD using packed
+*  storage:
+*
+*  if UPLO = 'U', Q = H(nq-1) . . . H(2) H(1);
+*
+*  if UPLO = 'L', Q = H(1) H(2) . . . H(nq-1).
+*
+*  Arguments
+*  =========
+*
+*  SIDE    (input) CHARACTER*1
+*          = 'L': apply Q or Q**H from the Left;
+*          = 'R': apply Q or Q**H from the Right.
+*
+*  UPLO    (input) CHARACTER*1
+*          = 'U': Upper triangular packed storage used in previous
+*                 call to ZHPTRD;
+*          = 'L': Lower triangular packed storage used in previous
+*                 call to ZHPTRD.
+*
+*  TRANS   (input) CHARACTER*1
+*          = 'N':  No transpose, apply Q;
+*          = 'C':  Conjugate transpose, apply Q**H.
+*
+*  M       (input) INTEGER
+*          The number of rows of the matrix C. M >= 0.
+*
+*  N       (input) INTEGER
+*          The number of columns of the matrix C. N >= 0.
+*
+*  AP      (input) COMPLEX*16 array, dimension
+*                               (M*(M+1)/2) if SIDE = 'L'
+*                               (N*(N+1)/2) if SIDE = 'R'
+*          The vectors which define the elementary reflectors, as
+*          returned by ZHPTRD.  AP is modified by the routine but
+*          restored on exit.
+*
+*  TAU     (input) COMPLEX*16 array, dimension (M-1) if SIDE = 'L'
+*                                     or (N-1) if SIDE = 'R'
+*          TAU(i) must contain the scalar factor of the elementary
+*          reflector H(i), as returned by ZHPTRD.
+*
+*  C       (input/output) COMPLEX*16 array, dimension (LDC,N)
+*          On entry, the M-by-N matrix C.
+*          On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q.
+*
+*  LDC     (input) INTEGER
+*          The leading dimension of the array C. LDC >= max(1,M).
+*
+*  WORK    (workspace) COMPLEX*16 array, dimension
+*                                   (N) if SIDE = 'L'
+*                                   (M) if SIDE = 'R'
+*
+*  INFO    (output) INTEGER
+*          = 0:  successful exit
+*          < 0:  if INFO = -i, the i-th argument had an illegal value
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX*16         ONE
+      PARAMETER          ( ONE = ( 1.0D+0, 0.0D+0 ) )
+*     ..
+*     .. Local Scalars ..
+      LOGICAL            FORWRD, LEFT, NOTRAN, UPPER
+      INTEGER            I, I1, I2, I3, IC, II, JC, MI, NI, NQ
+      COMPLEX*16         AII, TAUI
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZLARF
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          DCONJG, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input arguments
+*
+      INFO = 0
+      LEFT = LSAME( SIDE, 'L' )
+      NOTRAN = LSAME( TRANS, 'N' )
+      UPPER = LSAME( UPLO, 'U' )
+*
+*     NQ is the order of Q
+*
+      IF( LEFT ) THEN
+         NQ = M
+      ELSE
+         NQ = N
+      END IF
+      IF( .NOT.LEFT .AND. .NOT.LSAME( SIDE, 'R' ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'C' ) ) THEN
+         INFO = -3
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( LDC.LT.MAX( 1, M ) ) THEN
+         INFO = -9
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZUPMTR', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.EQ.0 .OR. N.EQ.0 )
+     $   RETURN
+*
+      IF( UPPER ) THEN
+*
+*        Q was determined by a call to ZHPTRD with UPLO = 'U'
+*
+         FORWRD = ( LEFT .AND. NOTRAN ) .OR.
+     $            ( .NOT.LEFT .AND. .NOT.NOTRAN )
+*
+         IF( FORWRD ) THEN
+            I1 = 1
+            I2 = NQ - 1
+            I3 = 1
+            II = 2
+         ELSE
+            I1 = NQ - 1
+            I2 = 1
+            I3 = -1
+            II = NQ*( NQ+1 ) / 2 - 1
+         END IF
+*
+         IF( LEFT ) THEN
+            NI = N
+         ELSE
+            MI = M
+         END IF
+*
+         DO 10 I = I1, I2, I3
+            IF( LEFT ) THEN
+*
+*              H(i) or H(i)' is applied to C(1:i,1:n)
+*
+               MI = I
+            ELSE
+*
+*              H(i) or H(i)' is applied to C(1:m,1:i)
+*
+               NI = I
+            END IF
+*
+*           Apply H(i) or H(i)'
+*
+            IF( NOTRAN ) THEN
+               TAUI = TAU( I )
+            ELSE
+               TAUI = DCONJG( TAU( I ) )
+            END IF
+            AII = AP( II )
+            AP( II ) = ONE
+            CALL ZLARF( SIDE, MI, NI, AP( II-I+1 ), 1, TAUI, C, LDC,
+     $                  WORK )
+            AP( II ) = AII
+*
+            IF( FORWRD ) THEN
+               II = II + I + 2
+            ELSE
+               II = II - I - 1
+            END IF
+   10    CONTINUE
+      ELSE
+*
+*        Q was determined by a call to ZHPTRD with UPLO = 'L'.
+*
+         FORWRD = ( LEFT .AND. .NOT.NOTRAN ) .OR.
+     $            ( .NOT.LEFT .AND. NOTRAN )
+*
+         IF( FORWRD ) THEN
+            I1 = 1
+            I2 = NQ - 1
+            I3 = 1
+            II = 2
+         ELSE
+            I1 = NQ - 1
+            I2 = 1
+            I3 = -1
+            II = NQ*( NQ+1 ) / 2 - 1
+         END IF
+*
+         IF( LEFT ) THEN
+            NI = N
+            JC = 1
+         ELSE
+            MI = M
+            IC = 1
+         END IF
+*
+         DO 20 I = I1, I2, I3
+            AII = AP( II )
+            AP( II ) = ONE
+            IF( LEFT ) THEN
+*
+*              H(i) or H(i)' is applied to C(i+1:m,1:n)
+*
+               MI = M - I
+               IC = I + 1
+            ELSE
+*
+*              H(i) or H(i)' is applied to C(1:m,i+1:n)
+*
+               NI = N - I
+               JC = I + 1
+            END IF
+*
+*           Apply H(i) or H(i)'
+*
+            IF( NOTRAN ) THEN
+               TAUI = TAU( I )
+            ELSE
+               TAUI = DCONJG( TAU( I ) )
+            END IF
+            CALL ZLARF( SIDE, MI, NI, AP( II ), 1, TAUI, C( IC, JC ),
+     $                  LDC, WORK )
+            AP( II ) = AII
+*
+            IF( FORWRD ) THEN
+               II = II + NQ - I + 1
+            ELSE
+               II = II - NQ + I - 2
+            END IF
+   20    CONTINUE
+      END IF
+      RETURN
+*
+*     End of ZUPMTR
+*
+      END

From a4713b1eac8349c8a1f0d7cecaa5d20f4a6ad04a Mon Sep 17 00:00:00 2001
From: Werner Kramer <werner.kramer@vortech.nl>
Date: Mon, 2 Jan 2023 13:20:06 +0100
Subject: [PATCH 02/15] Use system jni and libxml2

---
 costa/native/CMakeLists.txt                   |  11 +
 costa/native/README.md                        |  13 +
 costa/native/cta/src/CMakeLists.txt           |   5 +-
 costa/native/cta_f90/generated/CMakeLists.txt |  19 +
 costa/native/external/blas/CMakeLists.txt     |  21 +
 costa/native/external/lapack/CMakeLists.txt   | 106 +++
 .../jni_cta_CtaObsdescr_NativeToJava.c        | 258 ++++++
 .../jni_cta_CtaObsdescr_NativeToJava.h        |  33 +
 .../openda/bridge/include/jni_cta_utils.h     |  45 +
 .../openda/bridge/include/jni_datatypes.h     |  31 +
 .../include/org_openda_costa_CtaArray.h       | 213 +++++
 .../include/org_openda_costa_CtaInitialize.h  |  29 +
 .../include/org_openda_costa_CtaModelState.h  |  21 +
 .../include/org_openda_costa_CtaObject.h      |  21 +
 ..._openda_costa_CtaObservationDescriptions.h |  77 ++
 .../include/org_openda_costa_CtaOpenDaModel.h | 189 +++++
 .../include/org_openda_costa_CtaParallel.h    |  29 +
 .../org_openda_costa_CtaRelationTable.h       |  61 ++
 .../org_openda_costa_CtaStochObserver.h       | 101 +++
 .../bridge/include/org_openda_costa_CtaTime.h | 157 ++++
 .../include/org_openda_costa_CtaTreeVector.h  | 137 +++
 .../include/org_openda_costa_CtaUtils.h       |  21 +
 .../include/org_openda_costa_CtaVector.h      | 155 ++++
 ..._openda_resultwriters_NativeResultWriter.h |  45 +
 ...a_resultwriters_NetcdfResultWriterNative.h |  29 +
 costa/native/openda/bridge/src/CMakeLists.txt |  26 +
 .../openda/bridge/src/jni_cta_utils.cpp       | 220 +++++
 .../bridge/src/org_openda_costa_CtaArray.cpp  | 600 +++++++++++++
 .../src/org_openda_costa_CtaInitialize.cpp    |  85 ++
 .../src/org_openda_costa_CtaModelState.cpp    |  63 ++
 .../bridge/src/org_openda_costa_CtaObject.cpp |  39 +
 ...penda_costa_CtaObservationDescriptions.cpp | 307 +++++++
 .../src/org_openda_costa_CtaOpenDaModel.cpp   | 798 ++++++++++++++++++
 .../src/org_openda_costa_CtaParallel.cpp      |  83 ++
 .../src/org_openda_costa_CtaRelationTable.cpp | 171 ++++
 .../src/org_openda_costa_CtaStochObserver.cpp | 320 +++++++
 .../bridge/src/org_openda_costa_CtaTime.cpp   | 286 +++++++
 .../src/org_openda_costa_CtaTreeVector.cpp    | 513 +++++++++++
 .../bridge/src/org_openda_costa_CtaUtils.cpp  |  39 +
 .../bridge/src/org_openda_costa_CtaVector.cpp | 547 ++++++++++++
 ...penda_resultwriters_NativeResultWriter.cpp | 122 +++
 ...resultwriters_NetcdfResultWriterNative.cpp | 111 +++
 42 files changed, 6156 insertions(+), 1 deletion(-)
 create mode 100644 costa/native/CMakeLists.txt
 create mode 100644 costa/native/README.md
 create mode 100644 costa/native/cta_f90/generated/CMakeLists.txt
 create mode 100644 costa/native/external/blas/CMakeLists.txt
 create mode 100644 costa/native/external/lapack/CMakeLists.txt
 create mode 100644 costa/native/openda/bridge/include/jni_cta_CtaObsdescr_NativeToJava.c
 create mode 100644 costa/native/openda/bridge/include/jni_cta_CtaObsdescr_NativeToJava.h
 create mode 100644 costa/native/openda/bridge/include/jni_cta_utils.h
 create mode 100644 costa/native/openda/bridge/include/jni_datatypes.h
 create mode 100644 costa/native/openda/bridge/include/org_openda_costa_CtaArray.h
 create mode 100644 costa/native/openda/bridge/include/org_openda_costa_CtaInitialize.h
 create mode 100644 costa/native/openda/bridge/include/org_openda_costa_CtaModelState.h
 create mode 100644 costa/native/openda/bridge/include/org_openda_costa_CtaObject.h
 create mode 100644 costa/native/openda/bridge/include/org_openda_costa_CtaObservationDescriptions.h
 create mode 100644 costa/native/openda/bridge/include/org_openda_costa_CtaOpenDaModel.h
 create mode 100644 costa/native/openda/bridge/include/org_openda_costa_CtaParallel.h
 create mode 100644 costa/native/openda/bridge/include/org_openda_costa_CtaRelationTable.h
 create mode 100644 costa/native/openda/bridge/include/org_openda_costa_CtaStochObserver.h
 create mode 100644 costa/native/openda/bridge/include/org_openda_costa_CtaTime.h
 create mode 100644 costa/native/openda/bridge/include/org_openda_costa_CtaTreeVector.h
 create mode 100644 costa/native/openda/bridge/include/org_openda_costa_CtaUtils.h
 create mode 100644 costa/native/openda/bridge/include/org_openda_costa_CtaVector.h
 create mode 100644 costa/native/openda/bridge/include/org_openda_resultwriters_NativeResultWriter.h
 create mode 100644 costa/native/openda/bridge/include/org_openda_resultwriters_NetcdfResultWriterNative.h
 create mode 100644 costa/native/openda/bridge/src/CMakeLists.txt
 create mode 100644 costa/native/openda/bridge/src/jni_cta_utils.cpp
 create mode 100644 costa/native/openda/bridge/src/org_openda_costa_CtaArray.cpp
 create mode 100644 costa/native/openda/bridge/src/org_openda_costa_CtaInitialize.cpp
 create mode 100644 costa/native/openda/bridge/src/org_openda_costa_CtaModelState.cpp
 create mode 100644 costa/native/openda/bridge/src/org_openda_costa_CtaObject.cpp
 create mode 100644 costa/native/openda/bridge/src/org_openda_costa_CtaObservationDescriptions.cpp
 create mode 100644 costa/native/openda/bridge/src/org_openda_costa_CtaOpenDaModel.cpp
 create mode 100644 costa/native/openda/bridge/src/org_openda_costa_CtaParallel.cpp
 create mode 100644 costa/native/openda/bridge/src/org_openda_costa_CtaRelationTable.cpp
 create mode 100644 costa/native/openda/bridge/src/org_openda_costa_CtaStochObserver.cpp
 create mode 100644 costa/native/openda/bridge/src/org_openda_costa_CtaTime.cpp
 create mode 100644 costa/native/openda/bridge/src/org_openda_costa_CtaTreeVector.cpp
 create mode 100644 costa/native/openda/bridge/src/org_openda_costa_CtaUtils.cpp
 create mode 100644 costa/native/openda/bridge/src/org_openda_costa_CtaVector.cpp
 create mode 100644 costa/native/openda/bridge/src/org_openda_resultwriters_NativeResultWriter.cpp
 create mode 100644 costa/native/openda/bridge/src/org_openda_resultwriters_NetcdfResultWriterNative.cpp

diff --git a/costa/native/CMakeLists.txt b/costa/native/CMakeLists.txt
new file mode 100644
index 000000000..92c16cb1e
--- /dev/null
+++ b/costa/native/CMakeLists.txt
@@ -0,0 +1,11 @@
+cmake_minimum_required(VERSION 3.9.1)
+
+project(openda)
+#add_subdirectory(libxml)
+add_subdirectory(external/blas)
+add_subdirectory(external/lapack)
+
+add_subdirectory(cta/src)
+add_subdirectory(cta_f90/generated)
+add_subdirectory(openda/bridge/src)
+
diff --git a/costa/native/README.md b/costa/native/README.md
new file mode 100644
index 000000000..f20432f61
--- /dev/null
+++ b/costa/native/README.md
@@ -0,0 +1,13 @@
+ 
+
+
+
+```shell
+export JAVA_HOME=/opt/apps/java/amazon-corretto-17.0.5.8.1-linux-x64/amazon-corretto-17.0.5.8.1-linux-x64
+```
+
+
+```shell
+cd build
+cmake .. -DCMAKE_Fortran_COMPILER=ifort -DCMAKE_C_COMPILER=gcc
+```
diff --git a/costa/native/cta/src/CMakeLists.txt b/costa/native/cta/src/CMakeLists.txt
index afd3dcd6a..d225b4c14 100644
--- a/costa/native/cta/src/CMakeLists.txt
+++ b/costa/native/cta/src/CMakeLists.txt
@@ -1,4 +1,5 @@
 cmake_minimum_required(VERSION 3.9.1)
+find_package(LibXml2 REQUIRED)
 
 set(LIBRARY_OUTPUT_PATH  ${CMAKE_BINARY_DIR}/lib)
 set(SOURCES
@@ -16,8 +17,10 @@ set(SOURCES
 )
 
 add_library(cta SHARED ${SOURCES})
+
+
 target_include_directories(cta PUBLIC ../include)
 target_include_directories(cta PUBLIC ${CMAKE_SOURCE_DIR}/external)
-
+target_include_directories(cta PUBLIC ${LIBXML2_INCLUDE_DIR}) 
 #target_link_libraries(cta INTERFACE libxml)
 
diff --git a/costa/native/cta_f90/generated/CMakeLists.txt b/costa/native/cta_f90/generated/CMakeLists.txt
new file mode 100644
index 000000000..e94c5652e
--- /dev/null
+++ b/costa/native/cta_f90/generated/CMakeLists.txt
@@ -0,0 +1,19 @@
+cmake_minimum_required(VERSION 3.9.1)
+enable_language(Fortran)
+
+set(LIBRARY_OUTPUT_PATH  ${CMAKE_BINARY_DIR}/lib)
+set(SOURCES
+cta_f90.f90             cta_f90_interface.f90      cta_f90_model_utilities.f90  cta_f90_time.f90
+cta_f90_array.f90       cta_f90_matrix.f90         cta_f90_obsdescr.f90         cta_f90_tree.f90
+cta_f90_datatypes.f90   cta_f90_mem.f90            cta_f90_pack.f90             cta_f90_treevector.f90
+cta_f90_datetime.f90    cta_f90_message.f90        cta_f90_par.f90              cta_f90_util_methods.f90
+cta_f90_file.f90        cta_f90_metainfo.f90       cta_f90_parameters.f90       cta_f90_util_sort.f90
+cta_f90_flush_mod.f90   cta_f90_method.f90         cta_f90_reltable.f90         cta_f90_util_statistics.f90
+cta_f90_functions.f90   cta_f90_modbuild_par.f90   cta_f90_resultwriter.f90     cta_f90_vector.f90
+cta_f90_handles.f90     cta_f90_model.f90          cta_f90_sobs.f90             cta_f90_xml.f90
+cta_f90_initialise.f90  cta_f90_model_factory.f90  cta_f90_string.f90
+)
+
+add_library(cta_f90 SHARED ${SOURCES})
+target_include_directories(cta_f90 PUBLIC ../include)
+
diff --git a/costa/native/external/blas/CMakeLists.txt b/costa/native/external/blas/CMakeLists.txt
new file mode 100644
index 000000000..7e8e5636c
--- /dev/null
+++ b/costa/native/external/blas/CMakeLists.txt
@@ -0,0 +1,21 @@
+cmake_minimum_required(VERSION 3.9.1)
+enable_language(Fortran)
+
+set(LIBRARY_OUTPUT_PATH  ${CMAKE_BINARY_DIR}/lib)
+set(SOURCES
+caxpy.f  cher.f    csyr2k.f  dcabs1.f  dspmv.f   dtpmv.f   lsame.f   srot.f   ssyr2k.f  zcopy.f   zher.f    zsyrk.f
+ccopy.f  cher2.f   csyrk.f   dcopy.f   dspr.f    dtpsv.f   sasum.f   srotg.f  ssyrk.f   zdotc.f   zher2.f   ztbmv.f
+cdotc.f  cher2k.f  ctbmv.f   ddot.f    dspr2.f   dtrmm.f   saxpy.f   ssbmv.f  stbmv.f   zdotu.f   zher2k.f  ztbsv.f
+cdotu.f  cherk.f   ctbsv.f   dgbmv.f   dswap.f   dtrmv.f   scasum.f  sscal.f  stbsv.f   zdscal.f  zherk.f   ztpmv.f
+cgbmv.f  chpmv.f   ctpmv.f   dgemm.f   dsymm.f   dtrsm.f   scnrm2.f  sspmv.f  stpmv.f   zgbmv.f   zhpmv.f   ztpsv.f
+cgemm.f  chpr.f    ctpsv.f   dgemv.f   dsymv.f   dtrsv.f   scopy.f   sspr.f   stpsv.f   zgemm.f   zhpr.f    ztrmm.f
+cgemv.f  chpr2.f   ctrmm.f   dger.f    dsyr.f    dzasum.f  sdot.f    sspr2.f  strmm.f   zgemv.f   zhpr2.f   ztrmv.f
+cgerc.f  crotg.f   ctrmv.f   dnrm2.f   dsyr2.f   dznrm2.f  sgbmv.f   sswap.f  strmv.f   zgerc.f   zrotg.f   ztrsm.f
+cgeru.f  cscal.f   ctrsm.f   drot.f    dsyr2k.f  icamax.f  sgemm.f   ssymm.f  strsm.f   zgeru.f   zscal.f   ztrsv.f
+chbmv.f  csscal.f  ctrsv.f   drotg.f   dsyrk.f   idamax.f  sgemv.f   ssymv.f  strsv.f   zhbmv.f   zswap.f
+chemm.f  cswap.f   dasum.f   dsbmv.f   dtbmv.f   isamax.f  sger.f    ssyr.f   xerbla.f  zhemm.f   zsymm.f
+chemv.f  csymm.f   daxpy.f   dscal.f   dtbsv.f   izamax.f  snrm2.f   ssyr2.f  zaxpy.f   zhemv.f   zsyr2k.f
+)
+
+add_library(blas SHARED ${SOURCES})
+
diff --git a/costa/native/external/lapack/CMakeLists.txt b/costa/native/external/lapack/CMakeLists.txt
new file mode 100644
index 000000000..df3abfc2c
--- /dev/null
+++ b/costa/native/external/lapack/CMakeLists.txt
@@ -0,0 +1,106 @@
+cmake_minimum_required(VERSION 3.9.1)
+enable_language(Fortran)
+# add_compile_options(-Wall -Weffc++ -pedantic -std=c++0x)
+
+set(LIBRARY_OUTPUT_PATH  ${CMAKE_BINARY_DIR}/lib)
+set(SOURCES
+cbdsqr.f  chetrd.f  clatbs.f  ctrtrs.f  dggsvp.f  dlasd1.f  dpttrs.f  sdisna.f  slaev2.f  slatrs.f  ssteqr.f  zgerq2.f  zlahef.f  zptsv.f
+cgbbrd.f  chetrf.f  clatdf.f  ctzrqf.f  dgtcon.f  dlasd2.f  dptts2.f  second.f  slaexc.f  slatrz.f  ssterf.f  zgerqf.f  zlahqr.f  zptsvx.f
+cgbcon.f  chetri.f  clatps.f  ctzrzf.f  dgtrfs.f  dlasd3.f  drscl.f   sgbbrd.f  slag2.f   slatzm.f  sstev.f   zgesc2.f  zlahrd.f  zpttrf.f
+cgbequ.f  chetrs.f  clatrd.f  cung2l.f  dgtsv.f   dlasd4.f  dsbev.f   sgbcon.f  slags2.f  slauu2.f  sstevd.f  zgesdd.f  zlaic1.f  zpttrs.f
+cgbrfs.f  chgeqz.f  clatrs.f  cung2r.f  dgtsvx.f  dlasd5.f  dsbevd.f  sgbequ.f  slagtf.f  slauum.f  sstevr.f  zgesv.f   zlals0.f  zptts2.f
+cgbsv.f   chpcon.f  clatrz.f  cungbr.f  dgttrf.f  dlasd6.f  dsbevx.f  sgbrfs.f  slagtm.f  sopgtr.f  sstevx.f  zgesvd.f  zlalsa.f  zrot.f
+cgbsvx.f  chpev.f   clatzm.f  cunghr.f  dgttrs.f  dlasd7.f  dsbgst.f  sgbsv.f   slagts.f  sopmtr.f  ssycon.f  zgesvx.f  zlalsd.f  zspcon.f
+cgbtf2.f  chpevd.f  clauu2.f  cungl2.f  dgtts2.f  dlasd8.f  dsbgv.f   sgbsvx.f  slagv2.f  sorg2l.f  ssyev.f   zgetc2.f  zlangb.f  zspmv.f
+cgbtrf.f  chpevx.f  clauum.f  cunglq.f  dhgeqz.f  dlasd9.f  dsbgvd.f  sgbtf2.f  slahqr.f  sorg2r.f  ssyevd.f  zgetf2.f  zlange.f  zspr.f
+cgbtrs.f  chpgst.f  cpbcon.f  cungql.f  dhsein.f  dlasda.f  dsbgvx.f  sgbtrf.f  slahrd.f  sorgbr.f  ssyevr.f  zgetrf.f  zlangt.f  zsprfs.f
+cgebak.f  chpgv.f   cpbequ.f  cungqr.f  dhseqr.f  dlasdq.f  dsbtrd.f  sgbtrs.f  slaic1.f  sorghr.f  ssyevx.f  zgetri.f  zlanhb.f  zspsv.f
+cgebal.f  chpgvd.f  cpbrfs.f  cungr2.f  dlabad.f  dlasdt.f  dsecnd.f  sgebak.f  slaln2.f  sorgl2.f  ssygs2.f  zgetrs.f  zlanhe.f  zspsvx.f
+cgebd2.f  chpgvx.f  cpbstf.f  cungrq.f  dlabrd.f  dlaset.f  dspcon.f  sgebal.f  slals0.f  sorglq.f  ssygst.f  zggbak.f  zlanhp.f  zsptrf.f
+cgebrd.f  chprfs.f  cpbsv.f   cungtr.f  dlacon.f  dlasq1.f  dspev.f   sgebd2.f  slalsa.f  sorgql.f  ssygv.f   zggbal.f  zlanhs.f  zsptri.f
+cgecon.f  chpsv.f   cpbsvx.f  cunm2l.f  dlacpy.f  dlasq2.f  dspevd.f  sgebrd.f  slalsd.f  sorgqr.f  ssygvd.f  zgges.f   zlanht.f  zsptrs.f
+cgeequ.f  chpsvx.f  cpbtf2.f  cunm2r.f  dladiv.f  dlasq3.f  dspevx.f  sgecon.f  slamch.f  sorgr2.f  ssygvx.f  zggesx.f  zlansb.f  zstedc.f
+cgees.f   chptrd.f  cpbtrf.f  cunmbr.f  dlae2.f   dlasq4.f  dspgst.f  sgeequ.f  slamrg.f  sorgrq.f  ssyrfs.f  zggev.f   zlansp.f  zstegr.f
+cgeesx.f  chptrf.f  cpbtrs.f  cunmhr.f  dlaebz.f  dlasq5.f  dspgv.f   sgees.f   slangb.f  sorgtr.f  ssysv.f   zggevx.f  zlansy.f  zstein.f
+cgeev.f   chptri.f  cpocon.f  cunml2.f  dlaed0.f  dlasq6.f  dspgvd.f  sgeesx.f  slange.f  sorm2l.f  ssysvx.f  zggglm.f  zlantb.f  zsteqr.f
+cgeevx.f  chptrs.f  cpoequ.f  cunmlq.f  dlaed1.f  dlasr.f   dspgvx.f  sgeev.f   slangt.f  sorm2r.f  ssytd2.f  zgghrd.f  zlantp.f  zsycon.f
+cgegs.f   chsein.f  cporfs.f  cunmql.f  dlaed2.f  dlasrt.f  dsprfs.f  sgeevx.f  slanhs.f  sormbr.f  ssytf2.f  zgglse.f  zlantr.f  zsymv.f
+cgegv.f   chseqr.f  cposv.f   cunmqr.f  dlaed3.f  dlassq.f  dspsv.f   sgegs.f   slansb.f  sormhr.f  ssytrd.f  zggqrf.f  zlapll.f  zsyr.f
+cgehd2.f  clabrd.f  cposvx.f  cunmr2.f  dlaed4.f  dlasv2.f  dspsvx.f  sgegv.f   slansp.f  sorml2.f  ssytrf.f  zggrqf.f  zlapmt.f  zsyrfs.f
+cgehrd.f  clacgv.f  cpotf2.f  cunmr3.f  dlaed5.f  dlaswp.f  dsptrd.f  sgehd2.f  slanst.f  sormlq.f  ssytri.f  zggsvd.f  zlaqgb.f  zsysv.f
+cgelq2.f  clacon.f  cpotrf.f  cunmrq.f  dlaed6.f  dlasy2.f  dsptrf.f  sgehrd.f  slansy.f  sormql.f  ssytrs.f  zggsvp.f  zlaqge.f  zsysvx.f
+cgelqf.f  clacp2.f  cpotri.f  cunmrz.f  dlaed7.f  dlasyf.f  dsptri.f  sgelq2.f  slantb.f  sormqr.f  stbcon.f  zgtcon.f  zlaqhb.f  zsytf2.f
+cgels.f   clacpy.f  cpotrs.f  cunmtr.f  dlaed8.f  dlatbs.f  dsptrs.f  sgelqf.f  slantp.f  sormr2.f  stbrfs.f  zgtrfs.f  zlaqhe.f  zsytrf.f
+cgelsd.f  clacrm.f  cppcon.f  cupgtr.f  dlaed9.f  dlatdf.f  dstebz.f  sgels.f   slantr.f  sormr3.f  stbtrs.f  zgtsv.f   zlaqhp.f  zsytri.f
+cgelss.f  clacrt.f  cppequ.f  cupmtr.f  dlaeda.f  dlatps.f  dstedc.f  sgelsd.f  slanv2.f  sormrq.f  stgevc.f  zgtsvx.f  zlaqp2.f  zsytrs.f
+cgelsx.f  cladiv.f  cpprfs.f  dbdsdc.f  dlaein.f  dlatrd.f  dstegr.f  sgelss.f  slapll.f  sormrz.f  stgex2.f  zgttrf.f  zlaqps.f  ztbcon.f
+cgelsy.f  claed0.f  cppsv.f   dbdsqr.f  dlaev2.f  dlatrs.f  dstein.f  sgelsx.f  slapmt.f  sormtr.f  stgexc.f  zgttrs.f  zlaqsb.f  ztbrfs.f
+cgeql2.f  claed7.f  cppsvx.f  ddisna.f  dlaexc.f  dlatrz.f  dsteqr.f  sgelsy.f  slapy2.f  spbcon.f  stgsen.f  zgtts2.f  zlaqsp.f  ztbtrs.f
+cgeqlf.f  claed8.f  cpptrf.f  dgbbrd.f  dlag2.f   dlatzm.f  dsterf.f  sgeql2.f  slapy3.f  spbequ.f  stgsja.f  zhbev.f   zlaqsy.f  ztgevc.f
+cgeqp3.f  claein.f  cpptri.f  dgbcon.f  dlags2.f  dlauu2.f  dstev.f   sgeqlf.f  slaqgb.f  spbrfs.f  stgsna.f  zhbevd.f  zlar1v.f  ztgex2.f
+cgeqpf.f  claesy.f  cpptrs.f  dgbequ.f  dlagtf.f  dlauum.f  dstevd.f  sgeqp3.f  slaqge.f  spbstf.f  stgsy2.f  zhbevx.f  zlar2v.f  ztgexc.f
+cgeqr2.f  claev2.f  cptcon.f  dgbrfs.f  dlagtm.f  dopgtr.f  dstevr.f  sgeqpf.f  slaqp2.f  spbsv.f   stgsyl.f  zhbgst.f  zlarcm.f  ztgsen.f
+cgeqrf.f  clags2.f  cpteqr.f  dgbsv.f   dlagts.f  dopmtr.f  dstevx.f  sgeqr2.f  slaqps.f  spbsvx.f  stpcon.f  zhbgv.f   zlarf.f   ztgsja.f
+cgerfs.f  clagtm.f  cptrfs.f  dgbsvx.f  dlagv2.f  dorg2l.f  dsycon.f  sgeqrf.f  slaqsb.f  spbtf2.f  stprfs.f  zhbgvd.f  zlarfb.f  ztgsna.f
+cgerq2.f  clahef.f  cptsv.f   dgbtf2.f  dlahqr.f  dorg2r.f  dsyev.f   sgerfs.f  slaqsp.f  spbtrf.f  stptri.f  zhbgvx.f  zlarfg.f  ztgsy2.f
+cgerqf.f  clahqr.f  cptsvx.f  dgbtrf.f  dlahrd.f  dorgbr.f  dsyevd.f  sgerq2.f  slaqsy.f  spbtrs.f  stptrs.f  zhbtrd.f  zlarft.f  ztgsyl.f
+cgesc2.f  clahrd.f  cpttrf.f  dgbtrs.f  dlaic1.f  dorghr.f  dsyevr.f  sgerqf.f  slaqtr.f  spocon.f  strcon.f  zhecon.f  zlarfx.f  ztpcon.f
+cgesdd.f  claic1.f  cpttrs.f  dgebak.f  dlaln2.f  dorgl2.f  dsyevx.f  sgesc2.f  slar1v.f  spoequ.f  strevc.f  zheev.f   zlargv.f  ztprfs.f
+cgesv.f   clals0.f  cptts2.f  dgebal.f  dlals0.f  dorglq.f  dsygs2.f  sgesdd.f  slar2v.f  sporfs.f  strexc.f  zheevd.f  zlarnv.f  ztptri.f
+cgesvd.f  clalsa.f  crot.f    dgebd2.f  dlalsa.f  dorgql.f  dsygst.f  sgesv.f   slarf.f   sposv.f   strrfs.f  zheevr.f  zlarrv.f  ztptrs.f
+cgesvx.f  clalsd.f  cspcon.f  dgebrd.f  dlalsd.f  dorgqr.f  dsygv.f   sgesvd.f  slarfb.f  sposvx.f  strsen.f  zheevx.f  zlartg.f  ztrcon.f
+cgetc2.f  clangb.f  cspmv.f   dgecon.f  dlamch.f  dorgr2.f  dsygvd.f  sgesvx.f  slarfg.f  spotf2.f  strsna.f  zhegs2.f  zlartv.f  ztrevc.f
+cgetf2.f  clange.f  cspr.f    dgeequ.f  dlamrg.f  dorgrq.f  dsygvx.f  sgetc2.f  slarft.f  spotrf.f  strsyl.f  zhegst.f  zlarz.f   ztrexc.f
+cgetrf.f  clangt.f  csprfs.f  dgees.f   dlangb.f  dorgtr.f  dsyrfs.f  sgetf2.f  slarfx.f  spotri.f  strti2.f  zhegv.f   zlarzb.f  ztrrfs.f
+cgetri.f  clanhb.f  cspsv.f   dgeesx.f  dlange.f  dorm2l.f  dsysv.f   sgetrf.f  slargv.f  spotrs.f  strtri.f  zhegvd.f  zlarzt.f  ztrsen.f
+cgetrs.f  clanhe.f  cspsvx.f  dgeev.f   dlangt.f  dorm2r.f  dsysvx.f  sgetri.f  slarnv.f  sppcon.f  strtrs.f  zhegvx.f  zlascl.f  ztrsna.f
+cggbak.f  clanhp.f  csptrf.f  dgeevx.f  dlanhs.f  dormbr.f  dsytd2.f  sgetrs.f  slarrb.f  sppequ.f  stzrqf.f  zherfs.f  zlaset.f  ztrsyl.f
+cggbal.f  clanhs.f  csptri.f  dgegs.f   dlansb.f  dormhr.f  dsytf2.f  sggbak.f  slarre.f  spprfs.f  stzrzf.f  zhesv.f   zlasr.f   ztrti2.f
+cgges.f   clanht.f  csptrs.f  dgegv.f   dlansp.f  dorml2.f  dsytrd.f  sggbal.f  slarrf.f  sppsv.f   xerbla.f  zhesvx.f  zlassq.f  ztrtri.f
+cggesx.f  clansb.f  csrot.f   dgehd2.f  dlanst.f  dormlq.f  dsytrf.f  sgges.f   slarrv.f  sppsvx.f  zbdsqr.f  zhetd2.f  zlaswp.f  ztrtrs.f
+cggev.f   clansp.f  csrscl.f  dgehrd.f  dlansy.f  dormql.f  dsytri.f  sggesx.f  slartg.f  spptrf.f  zdrot.f   zhetf2.f  zlasyf.f  ztzrqf.f
+cggevx.f  clansy.f  cstedc.f  dgelq2.f  dlantb.f  dormqr.f  dsytrs.f  sggev.f   slartv.f  spptri.f  zdrscl.f  zhetrd.f  zlatbs.f  ztzrzf.f
+cggglm.f  clantb.f  cstegr.f  dgelqf.f  dlantp.f  dormr2.f  dtbcon.f  sggevx.f  slaruv.f  spptrs.f  zgbbrd.f  zhetrf.f  zlatdf.f  zung2l.f
+cgghrd.f  clantp.f  cstein.f  dgels.f   dlantr.f  dormr3.f  dtbrfs.f  sggglm.f  slarz.f   sptcon.f  zgbcon.f  zhetri.f  zlatps.f  zung2r.f
+cgglse.f  clantr.f  csteqr.f  dgelsd.f  dlanv2.f  dormrq.f  dtbtrs.f  sgghrd.f  slarzb.f  spteqr.f  zgbequ.f  zhetrs.f  zlatrd.f  zungbr.f
+cggqrf.f  clapll.f  csycon.f  dgelss.f  dlapll.f  dormrz.f  dtgevc.f  sgglse.f  slarzt.f  sptrfs.f  zgbrfs.f  zhgeqz.f  zlatrs.f  zunghr.f
+cggrqf.f  clapmt.f  csymv.f   dgelsx.f  dlapmt.f  dormtr.f  dtgex2.f  sggqrf.f  slas2.f   sptsv.f   zgbsv.f   zhpcon.f  zlatrz.f  zungl2.f
+cggsvd.f  claqgb.f  csyr.f    dgelsy.f  dlapy2.f  dpbcon.f  dtgexc.f  sggrqf.f  slascl.f  sptsvx.f  zgbsvx.f  zhpev.f   zlatzm.f  zunglq.f
+cggsvp.f  claqge.f  csyrfs.f  dgeql2.f  dlapy3.f  dpbequ.f  dtgsen.f  sggsvd.f  slasd0.f  spttrf.f  zgbtf2.f  zhpevd.f  zlauu2.f  zungql.f
+cgtcon.f  claqhb.f  csysv.f   dgeqlf.f  dlaqgb.f  dpbrfs.f  dtgsja.f  sggsvp.f  slasd1.f  spttrs.f  zgbtrf.f  zhpevx.f  zlauum.f  zungqr.f
+cgtrfs.f  claqhe.f  csysvx.f  dgeqp3.f  dlaqge.f  dpbstf.f  dtgsna.f  sgtcon.f  slasd2.f  sptts2.f  zgbtrs.f  zhpgst.f  zpbcon.f  zungr2.f
+cgtsv.f   claqhp.f  csytf2.f  dgeqpf.f  dlaqp2.f  dpbsv.f   dtgsy2.f  sgtrfs.f  slasd3.f  srscl.f   zgebak.f  zhpgv.f   zpbequ.f  zungrq.f
+cgtsvx.f  claqp2.f  csytrf.f  dgeqr2.f  dlaqps.f  dpbsvx.f  dtgsyl.f  sgtsv.f   slasd4.f  ssbev.f   zgebal.f  zhpgvd.f  zpbrfs.f  zungtr.f
+cgttrf.f  claqps.f  csytri.f  dgeqrf.f  dlaqsb.f  dpbtf2.f  dtpcon.f  sgtsvx.f  slasd5.f  ssbevd.f  zgebd2.f  zhpgvx.f  zpbstf.f  zunm2l.f
+cgttrs.f  claqsb.f  csytrs.f  dgerfs.f  dlaqsp.f  dpbtrf.f  dtprfs.f  sgttrf.f  slasd6.f  ssbevx.f  zgebrd.f  zhprfs.f  zpbsv.f   zunm2r.f
+cgtts2.f  claqsp.f  ctbcon.f  dgerq2.f  dlaqsy.f  dpbtrs.f  dtptri.f  sgttrs.f  slasd7.f  ssbgst.f  zgecon.f  zhpsv.f   zpbsvx.f  zunmbr.f
+chbev.f   claqsy.f  ctbrfs.f  dgerqf.f  dlaqtr.f  dpocon.f  dtptrs.f  sgtts2.f  slasd8.f  ssbgv.f   zgeequ.f  zhpsvx.f  zpbtf2.f  zunmhr.f
+chbevd.f  clar1v.f  ctbtrs.f  dgesc2.f  dlar1v.f  dpoequ.f  dtrcon.f  shgeqz.f  slasd9.f  ssbgvd.f  zgees.f   zhptrd.f  zpbtrf.f  zunml2.f
+chbevx.f  clar2v.f  ctgevc.f  dgesdd.f  dlar2v.f  dporfs.f  dtrevc.f  shsein.f  slasda.f  ssbgvx.f  zgeesx.f  zhptrf.f  zpbtrs.f  zunmlq.f
+chbgst.f  clarcm.f  ctgex2.f  dgesv.f   dlarf.f   dposv.f   dtrexc.f  shseqr.f  slasdq.f  ssbtrd.f  zgeev.f   zhptri.f  zpocon.f  zunmql.f
+chbgv.f   clarf.f   ctgexc.f  dgesvd.f  dlarfb.f  dposvx.f  dtrrfs.f  slabad.f  slasdt.f  sspcon.f  zgeevx.f  zhptrs.f  zpoequ.f  zunmqr.f
+chbgvd.f  clarfb.f  ctgsen.f  dgesvx.f  dlarfg.f  dpotf2.f  dtrsen.f  slabrd.f  slaset.f  sspev.f   zgegs.f   zhsein.f  zporfs.f  zunmr2.f
+chbgvx.f  clarfg.f  ctgsja.f  dgetc2.f  dlarft.f  dpotrf.f  dtrsna.f  slacon.f  slasq1.f  sspevd.f  zgegv.f   zhseqr.f  zposv.f   zunmr3.f
+chbtrd.f  clarft.f  ctgsna.f  dgetf2.f  dlarfx.f  dpotri.f  dtrsyl.f  slacpy.f  slasq2.f  sspevx.f  zgehd2.f  zlabrd.f  zposvx.f  zunmrq.f
+checon.f  clarfx.f  ctgsy2.f  dgetrf.f  dlargv.f  dpotrs.f  dtrti2.f  sladiv.f  slasq3.f  sspgst.f  zgehrd.f  zlacgv.f  zpotf2.f  zunmrz.f
+cheev.f   clargv.f  ctgsyl.f  dgetri.f  dlarnv.f  dppcon.f  dtrtri.f  slae2.f   slasq4.f  sspgv.f   zgelq2.f  zlacon.f  zpotrf.f  zunmtr.f
+cheevd.f  clarnv.f  ctpcon.f  dgetrs.f  dlarrb.f  dppequ.f  dtrtrs.f  slaebz.f  slasq5.f  sspgvd.f  zgelqf.f  zlacp2.f  zpotri.f  zupgtr.f
+cheevr.f  clarrv.f  ctprfs.f  dggbak.f  dlarre.f  dpprfs.f  dtzrqf.f  slaed0.f  slasq6.f  sspgvx.f  zgels.f   zlacpy.f  zpotrs.f  zupmtr.f
+cheevx.f  clartg.f  ctptri.f  dggbal.f  dlarrf.f  dppsv.f   dtzrzf.f  slaed1.f  slasr.f   ssprfs.f  zgelsd.f  zlacrm.f  zppcon.f
+chegs2.f  clartv.f  ctptrs.f  dgges.f   dlarrv.f  dppsvx.f  dzsum1.f  slaed2.f  slasrt.f  sspsv.f   zgelss.f  zlacrt.f  zppequ.f
+chegst.f  clarz.f   ctrcon.f  dggesx.f  dlartg.f  dpptrf.f  icmax1.f  slaed3.f  slassq.f  sspsvx.f  zgelsx.f  zladiv.f  zpprfs.f
+chegv.f   clarzb.f  ctrevc.f  dggev.f   dlartv.f  dpptri.f  ieeeck.f  slaed4.f  slasv2.f  ssptrd.f  zgelsy.f  zlaed0.f  zppsv.f
+chegvd.f  clarzt.f  ctrexc.f  dggevx.f  dlaruv.f  dpptrs.f  ilaenv.f  slaed5.f  slaswp.f  ssptrf.f  zgeql2.f  zlaed7.f  zppsvx.f
+chegvx.f  clascl.f  ctrrfs.f  dggglm.f  dlarz.f   dptcon.f  izmax1.f  slaed6.f  slasy2.f  ssptri.f  zgeqlf.f  zlaed8.f  zpptrf.f
+cherfs.f  claset.f  ctrsen.f  dgghrd.f  dlarzb.f  dpteqr.f  lsame.f   slaed7.f  slasyf.f  ssptrs.f  zgeqp3.f  zlaein.f  zpptri.f
+chesv.f   clasr.f   ctrsna.f  dgglse.f  dlarzt.f  dptrfs.f  lsamen.f  slaed8.f  slatbs.f  sstebz.f  zgeqpf.f  zlaesy.f  zpptrs.f
+chesvx.f  classq.f  ctrsyl.f  dggqrf.f  dlas2.f   dptsv.f   sbdsdc.f  slaed9.f  slatdf.f  sstedc.f  zgeqr2.f  zlaev2.f  zptcon.f
+chetd2.f  claswp.f  ctrti2.f  dggrqf.f  dlascl.f  dptsvx.f  sbdsqr.f  slaeda.f  slatps.f  sstegr.f  zgeqrf.f  zlags2.f  zpteqr.f
+chetf2.f  clasyf.f  ctrtri.f  dggsvd.f  dlasd0.f  dpttrf.f  scsum1.f  slaein.f  slatrd.f  sstein.f  zgerfs.f  zlagtm.f  zptrfs.f
+)
+
+set_source_files_properties(slamch.f PROPERTIES COMPILE_FLAGS -O0)
+set_source_files_properties(dlamch.f PROPERTIES COMPILE_FLAGS -O0)
+
+add_library(lapack SHARED ${SOURCES})
+
diff --git a/costa/native/openda/bridge/include/jni_cta_CtaObsdescr_NativeToJava.c b/costa/native/openda/bridge/include/jni_cta_CtaObsdescr_NativeToJava.c
new file mode 100644
index 000000000..a331b2210
--- /dev/null
+++ b/costa/native/openda/bridge/include/jni_cta_CtaObsdescr_NativeToJava.c
@@ -0,0 +1,258 @@
+/*
+$URL: https://repos.deltares.nl/repos/openda/trunk/costa/src/cta/cta_obsdescr_combine.c $
+$Revision: 671 $, $Date: 2008-10-07 14:49:42 +0200 (Tue, 07 Oct 2008) $
+
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2010  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+/**
+\file  cta_handles.h
+\brief Implemenation of a generic OpenDA observation description wrapper for use in native COSTA code 
+*/
+
+
+#include <string.h>
+#include <stdio.h>
+#include "cta.h"
+
+#include "jni_datatypes.h"
+#include "jni.h"
+#include "jni_cta_utils.h"
+
+
+#define CLASSNAME  "CtaObsDescr_nativeToJava"
+
+#define IDEBUG (0)
+
+typedef struct {
+CTA_Handle myhandle;
+sJni_Class classInstance;
+} CTAI_ObsDescr_NativeToJava;
+
+
+#define METHOD "CTAI_ObsDescr_nativeToJava_Create_Size"
+void CTAI_ObsDescr_nativeToJava_Create_Size(int *memsize, int *retval){
+   *memsize=(int) sizeof(CTAI_ObsDescr_NativeToJava);
+   *retval=CTA_OK;
+};
+
+#undef METHOD
+#define METHOD "CTAI_ObsDescr_nativeToJava_Create_Init"
+void CTAI_ObsDescr_nativeToJava_Create_Init(CTA_ObsDescr *myhandle, CTAI_ObsDescr_NativeToJava *descr,
+                                            CTA_Handle *usrdat, int *retval)
+{
+   JNIEnv *env;
+   sJni_Class *classIn;
+
+   //Set the runtime and class from userdata
+   
+   CTA_Handle_GetData(*usrdat, (void *) &classIn);
+   env     = classIn->env;
+ 
+   descr->classInstance.env = env;
+   descr->classInstance.cls = (*env)->NewGlobalRef(env, classIn->cls);
+   descr->classInstance.obj = (*env)->NewGlobalRef(env, classIn->obj);
+   descr->myhandle          = *myhandle;
+   
+};
+
+#undef METHOD
+#define METHOD "CTAI_ObsDescr_nativeToJava_CreateSel"
+void CTAI_ObsDescr_nativeToJava_CreateSel(CTAI_ObsDescr_NativeToJava *descr,
+          CTA_String *selection, CTA_RelTable *reltab, 
+          CTA_ObsDescr *myhandle_out, 
+          CTAI_ObsDescr_NativeToJava *descrout, int *retval){
+
+CTA_WRITE_ERROR("Method not implemented");
+*retval=CTA_NOT_IMPLEMENTED;
+
+}
+         
+
+
+#undef METHOD
+#define METHOD "CTAI_ObsDescr_nativeToJava_Get_Keys"
+void CTAI_ObsDescr_nativeToJava_Get_Keys(CTAI_ObsDescr_NativeToJava *descr,
+                                         CTA_Vector *keys, int *retval)
+{
+   jobject jKeys;
+
+   // Find method
+   JNIEnv *env= descr->classInstance.env;
+   jclass cls= descr->classInstance.cls;
+   jobject obj = descr->classInstance.obj;
+   jmethodID func;
+   
+   /* Gewoon aanroepen zoals in alle voorbeelden staat */
+   func = (*env)->GetMethodID(env, cls, "getPropertyKeys", "()[Ljava/lang/String;");
+   if (func) {
+      // call method and get keys
+      jKeys =(*env)->CallObjectMethod(env,obj,func);
+
+      // convert java array of strings to native vector of strings
+      *retval = cta_jni_JavaStringVecToNativeVec(env,  (jobjectArray) jKeys, *keys);
+   }
+   else {
+      CTA_WRITE_ERROR("Cannot find java method name=getPropertyKeys signature=()[Ljava/lang/String;");
+      *retval = CTA_INTERNAL_ERROR;
+   } 
+};
+
+#undef METHOD
+#define METHOD "CTAI_ObsDescr_nativeToJava_Property_Count"
+void CTAI_ObsDescr_nativeToJava_Property_Count(
+         CTAI_ObsDescr_NativeToJava *descr, 
+         int *nkeys, 
+         int *retval)
+{
+   // Find method
+   JNIEnv *env = descr->classInstance.env;
+   jobject obj = descr->classInstance.obj;
+   jclass  cls = descr->classInstance.cls;
+
+   jmethodID func = (*env)->GetMethodID(env, cls, "getPropertyCount", "()I");
+   if (func) {
+      // Call method
+      *nkeys=(*env)->CallIntMethod(env,obj,func);
+      *retval=CTA_OK;
+   }
+   else {
+      CTA_WRITE_ERROR("Cannot find java method name=getPropertyCount signature=()I");
+      *retval=CTA_INTERNAL_ERROR;
+   }   
+}
+;
+
+#undef METHOD
+#define METHOD "CTAI_ObsDescr_nativeToJava_Observation_Count"
+   void CTAI_ObsDescr_nativeToJava_Observation_Count(
+      CTAI_ObsDescr_NativeToJava *descr, 
+         int *nobs, 
+         int *retval)
+{
+
+   JNIEnv *env= descr->classInstance.env;
+   jobject obj = descr->classInstance.obj;
+   jclass cls= descr->classInstance.cls;
+
+   jmethodID func = (*env)->GetMethodID(env, cls, "getObservationCount", "()I");
+   if (func) {
+
+      // Call method
+      *nobs=(*env)->CallIntMethod(env,obj,func);
+      *retval=CTA_OK;
+   }
+   else {
+      CTA_WRITE_ERROR("Cannot find java method name=getObservationCount signature=()I");
+      *retval=CTA_INTERNAL_ERROR;
+   }   
+};
+
+
+#undef METHOD
+#define METHOD "CTAI_ObsDescr_nativeToJava_Get_Properties"
+void CTAI_ObsDescr_nativeToJava_Get_Properties(
+         CTAI_ObsDescr_NativeToJava *descr, 
+         const char *Key,
+         CTA_Vector *Properties,
+         CTA_Datatype *datatype,
+         int *retval)
+{
+   jobject jProperties;
+   jstring jKey;
+
+   // Find method
+   JNIEnv *env= descr->classInstance.env;
+   jclass cls= descr->classInstance.cls;
+   jobject obj = descr->classInstance.obj;
+   jmethodID func;
+   
+   func = (*env)->GetMethodID(env, cls, "getStringProperties", "(Ljava/lang/String;)[Ljava/lang/String;");
+   if (func) {
+      // call method and get properties
+      jKey= (*env)->NewStringUTF(env, Key);
+
+      jProperties =(*env)->CallObjectMethod(env,obj,func,jKey);
+      
+      // convert java array of strings to native vector
+      if (jProperties){
+         *retval = cta_jni_JavaStringVecToNativeVec(env,  (jobjectArray) jProperties, *Properties);
+      } 
+      else {
+         CTA_WRITE_ERROR("getStringProperties returned no properties");
+         *retval = CTA_JNI_INTERFACING_ERROR;
+      }
+   }
+   else {
+      CTA_WRITE_ERROR("Cannot find java method name=getStringProperties signature=(Ljava/lang/String;)[Ljava/lang/String;");
+      *retval = CTA_INTERNAL_ERROR;
+   }   
+}
+
+
+#undef METHOD
+#define METHOD "CTAI_ObsDescr_nativeToJava_Free"
+void CTAI_ObsDescr_nativeToJava_Free(
+         CTAI_ObsDescr_NativeToJava *descr, 
+         int *retval)
+{
+   // Release the global references
+   JNIEnv *env = descr->classInstance.env;
+   (*env)->DeleteGlobalRef(env, descr->classInstance.cls);
+   (*env)->DeleteGlobalRef(env, descr->classInstance.obj);
+
+   // Since we did not allocate any memory, there is nothing to do   
+   *retval=CTA_OK;
+}
+
+
+
+#undef METHOD
+#define METHOD "CTA_ObsDescr_nativeToJava_initialise"
+void CTA_ObsDescr_nativeToJava_initialise(CTA_ObsDescrClass *hobsdescrcl)
+{
+   CTA_Intf hintf=0;
+   CTA_Func h_func[I_CTA_OBSDESCR_NUMFUNC];
+
+   int ierr;
+
+   // The vector h_func is filled with COSTA-function handles of the 
+   // implementations in this file.
+   ierr=CTA_Func_Create(" ",&CTAI_ObsDescr_nativeToJava_Create_Size,    hintf,
+                            &h_func[I_CTA_OBSDESCR_CREATE_SIZE]);
+   ierr=CTA_Func_Create(" ",&CTAI_ObsDescr_nativeToJava_Create_Init,    hintf,
+                            &h_func[I_CTA_OBSDESCR_CREATE_INIT]);
+   ierr=CTA_Func_Create(" ",&CTAI_ObsDescr_nativeToJava_Property_Count,    hintf,
+                            &h_func[I_CTA_OBSDESCR_COUNT_PROPERTIES]);
+   ierr=CTA_Func_Create(" ",&CTAI_ObsDescr_nativeToJava_Get_Properties,    hintf,
+                            &h_func[I_CTA_OBSDESCR_GET_PROPERTIES]);
+   ierr=CTA_Func_Create(" ",&CTAI_ObsDescr_nativeToJava_Observation_Count, hintf,
+                            &h_func[I_CTA_OBSDESCR_COUNT_OBSERVATIONS]);
+   ierr=CTA_Func_Create(" ",&CTAI_ObsDescr_nativeToJava_Get_Keys,    hintf,
+                            &h_func[I_CTA_OBSDESCR_GET_KEYS]);
+   ierr=CTA_Func_Create(" ",&CTAI_ObsDescr_nativeToJava_Free,    hintf,
+                            &h_func[I_CTA_OBSDESCR_FREE]);
+   ierr=CTA_Func_Create(" ",&CTAI_ObsDescr_nativeToJava_CreateSel,    hintf,
+                            &h_func[I_CTA_OBSDESCR_SELECTION]);
+   ierr=CTA_ObsDescr_DefineClass("cta_obsdescr_nativeToJava",h_func,hobsdescrcl);
+
+}
+
+
+
+
diff --git a/costa/native/openda/bridge/include/jni_cta_CtaObsdescr_NativeToJava.h b/costa/native/openda/bridge/include/jni_cta_CtaObsdescr_NativeToJava.h
new file mode 100644
index 000000000..b642da618
--- /dev/null
+++ b/costa/native/openda/bridge/include/jni_cta_CtaObsdescr_NativeToJava.h
@@ -0,0 +1,33 @@
+/*
+$URL: https://repos.deltares.nl/repos/openda/trunk/costa/src/cta/cta_obsdescr_combine.c $
+$Revision: 671 $, $Date: 2008-10-07 14:49:42 +0200 (Tue, 07 Oct 2008) $
+
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2010  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void CTA_ObsDescr_nativeToJava_initialise(CTA_ObsDescrClass *hobsdescrcl);
+
+#ifdef __cplusplus
+}
+#endif
+
+
diff --git a/costa/native/openda/bridge/include/jni_cta_utils.h b/costa/native/openda/bridge/include/jni_cta_utils.h
new file mode 100644
index 000000000..c22e4c6d6
--- /dev/null
+++ b/costa/native/openda/bridge/include/jni_cta_utils.h
@@ -0,0 +1,45 @@
+/*
+$URL: https://repos.deltares.nl/repos/openda/public/trunk/core/native/src/openda/bridge/jni_cta_utils.h $
+$Revision: 1553 $, $Date: 2010-05-07 14:55:23 +0200 (Fri, 07 May 2010) $
+
+OpenDA interface for COSTA.
+Copyright (C) 2007  Stef Hummel / Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+
+#include "jni.h"
+#include "cta.h"
+void cta_jni_setJavaEnv(JNIEnv *env);
+#ifdef __cplusplus
+void cta_jni_exception(JNIEnv *env, const char *name, const char *msg);
+#endif
+void cta_jni_exception(JNIEnv *env, const char *name, const char *msg, int retVal);
+void cta_jni_free(JNIEnv * env, jobject obj_this);
+CTA_Handle cta_jni_getCtaHandle(JNIEnv * env, jobject obj_this);
+
+void cta_jni_setCtaHandle(JNIEnv * env, jobject obj_this, CTA_Handle ctaHandle);
+void cta_jni_ExternalMessageWriter(char *className, char *method, char *message, char type);
+void cta_jni_ExternalMessageWriterSetID(JNIEnv *env, jclass classID, jmethodID methodID);
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+int cta_jni_JavaStringVecToNativeVec(JNIEnv *env,  jobjectArray jArray, CTA_Vector cVec);
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/costa/native/openda/bridge/include/jni_datatypes.h b/costa/native/openda/bridge/include/jni_datatypes.h
new file mode 100644
index 000000000..4ec184a68
--- /dev/null
+++ b/costa/native/openda/bridge/include/jni_datatypes.h
@@ -0,0 +1,31 @@
+/*
+$URL: https://repos.deltares.nl/repos/openda/openda_1/public/trunk/core/native/src/openda/bridge/jni_cta_utils.h $
+$Revision: 1257 $, $Date: 2009-12-21 13:42:01 +0100 (ma, 21 dec 2009) $
+
+OpenDA interface for COSTA.
+Copyright (C) 2010  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+
+#include <jni.h>
+
+typedef struct {
+JNIEnv *env;           //Poiner to Java runtime environment;
+jclass cls;            //JNI handle to the java class instance;
+jobject obj;           //JNI object handle;
+} sJni_Class;
+
diff --git a/costa/native/openda/bridge/include/org_openda_costa_CtaArray.h b/costa/native/openda/bridge/include/org_openda_costa_CtaArray.h
new file mode 100644
index 000000000..b3b315950
--- /dev/null
+++ b/costa/native/openda/bridge/include/org_openda_costa_CtaArray.h
@@ -0,0 +1,213 @@
+/* DO NOT EDIT THIS FILE - it is machine generated */
+#include <jni.h>
+/* Header for class org_openda_costa_CtaArray */
+
+#ifndef _Included_org_openda_costa_CtaArray
+#define _Included_org_openda_costa_CtaArray
+#ifdef __cplusplus
+extern "C" {
+#endif
+/*
+ * Class:     org_openda_costa_CtaArray
+ * Method:    create
+ * Signature: ([D[I)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaArray_create
+  (JNIEnv *, jobject, jdoubleArray, jintArray);
+
+/*
+ * Class:     org_openda_costa_CtaArray
+ * Method:    getNumberOfDimensions
+ * Signature: ()I
+ */
+JNIEXPORT jint JNICALL Java_org_openda_costa_CtaArray_getNumberOfDimensions
+  (JNIEnv *, jobject);
+
+/*
+ * Class:     org_openda_costa_CtaArray
+ * Method:    getDimensions
+ * Signature: ()[I
+ */
+JNIEXPORT jintArray JNICALL Java_org_openda_costa_CtaArray_getDimensions
+  (JNIEnv *, jobject);
+
+/*
+ * Class:     org_openda_costa_CtaArray
+ * Method:    length
+ * Signature: ()I
+ */
+JNIEXPORT jint JNICALL Java_org_openda_costa_CtaArray_length
+  (JNIEnv *, jobject);
+
+/*
+ * Class:     org_openda_costa_CtaArray
+ * Method:    getValuesAsDoubles
+ * Signature: ()[D
+ */
+JNIEXPORT jdoubleArray JNICALL Java_org_openda_costa_CtaArray_getValuesAsDoubles__
+  (JNIEnv *, jobject);
+
+/*
+ * Class:     org_openda_costa_CtaArray
+ * Method:    getValuesAsDoubles
+ * Signature: (Z)[D
+ */
+JNIEXPORT jdoubleArray JNICALL Java_org_openda_costa_CtaArray_getValuesAsDoubles__Z
+  (JNIEnv *, jobject, jboolean);
+
+/*
+ * Class:     org_openda_costa_CtaArray
+ * Method:    getValuesAsDoubles
+ * Signature: (II)[D
+ */
+JNIEXPORT jdoubleArray JNICALL Java_org_openda_costa_CtaArray_getValuesAsDoubles__II
+  (JNIEnv *, jobject, jint, jint);
+
+/*
+ * Class:     org_openda_costa_CtaArray
+ * Method:    getValueAsDouble
+ * Signature: (I)D
+ */
+JNIEXPORT jdouble JNICALL Java_org_openda_costa_CtaArray_getValueAsDouble__I
+  (JNIEnv *, jobject, jint);
+
+/*
+ * Class:     org_openda_costa_CtaArray
+ * Method:    getValueAsDouble
+ * Signature: ([I)D
+ */
+JNIEXPORT jdouble JNICALL Java_org_openda_costa_CtaArray_getValueAsDouble___3I
+  (JNIEnv *, jobject, jintArray);
+
+/*
+ * Class:     org_openda_costa_CtaArray
+ * Method:    setConstant
+ * Signature: (D)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaArray_setConstant
+  (JNIEnv *, jobject, jdouble);
+
+/*
+ * Class:     org_openda_costa_CtaArray
+ * Method:    setValuesAsDoubles
+ * Signature: ([D)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaArray_setValuesAsDoubles___3D
+  (JNIEnv *, jobject, jdoubleArray);
+
+/*
+ * Class:     org_openda_costa_CtaArray
+ * Method:    setValuesAsDoubles
+ * Signature: (II[D)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaArray_setValuesAsDoubles__II_3D
+  (JNIEnv *, jobject, jint, jint, jdoubleArray);
+
+/*
+ * Class:     org_openda_costa_CtaArray
+ * Method:    setValueAsDouble
+ * Signature: (ID)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaArray_setValueAsDouble__ID
+  (JNIEnv *, jobject, jint, jdouble);
+
+/*
+ * Class:     org_openda_costa_CtaArray
+ * Method:    setValueAsDouble
+ * Signature: ([ID)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaArray_setValueAsDouble___3ID
+  (JNIEnv *, jobject, jintArray, jdouble);
+
+/*
+ * Class:     org_openda_costa_CtaArray
+ * Method:    axpyOnValues
+ * Signature: (D[D)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaArray_axpyOnValues
+  (JNIEnv *, jobject, jdouble, jdoubleArray);
+
+/*
+ * Class:     org_openda_costa_CtaArray
+ * Method:    multiplyValues
+ * Signature: ([D)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaArray_multiplyValues
+  (JNIEnv *, jobject, jdoubleArray);
+
+/*
+ * Class:     org_openda_costa_CtaArray
+ * Method:    reshape
+ * Signature: ([I)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaArray_reshape
+  (JNIEnv *, jobject, jintArray);
+
+/*
+ * Class:     org_openda_costa_CtaArray
+ * Method:    getSlice
+ * Signature: (II)Lorg/openda/interfaces/IArray;
+ */
+JNIEXPORT jobject JNICALL Java_org_openda_costa_CtaArray_getSlice__II
+  (JNIEnv *, jobject, jint, jint);
+
+/*
+ * Class:     org_openda_costa_CtaArray
+ * Method:    getSlice
+ * Signature: (III)Lorg/openda/interfaces/IArray;
+ */
+JNIEXPORT jobject JNICALL Java_org_openda_costa_CtaArray_getSlice__III
+  (JNIEnv *, jobject, jint, jint, jint);
+
+/*
+ * Class:     org_openda_costa_CtaArray
+ * Method:    getSliceAsDoubles
+ * Signature: (III)[D
+ */
+JNIEXPORT jdoubleArray JNICALL Java_org_openda_costa_CtaArray_getSliceAsDoubles
+  (JNIEnv *, jobject, jint, jint, jint);
+
+/*
+ * Class:     org_openda_costa_CtaArray
+ * Method:    setSlice
+ * Signature: ([DII)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaArray_setSlice___3DII
+  (JNIEnv *, jobject, jdoubleArray, jint, jint);
+
+/*
+ * Class:     org_openda_costa_CtaArray
+ * Method:    setSliceBySlice
+ * Signature: (Lorg/openda/interfaces/IArray;II)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaArray_setSliceBySlice__Lorg_openda_interfaces_IArray_2II
+  (JNIEnv *, jobject, jobject, jint, jint);
+
+/*
+ * Class:     org_openda_costa_CtaArray
+ * Method:    setSliceBySlice
+ * Signature: (Lorg/openda/interfaces/IArray;III)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaArray_setSliceBySlice__Lorg_openda_interfaces_IArray_2III
+  (JNIEnv *, jobject, jobject, jint, jint, jint);
+
+/*
+ * Class:     org_openda_costa_CtaArray
+ * Method:    setSlice
+ * Signature: ([DIII)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaArray_setSlice___3DIII
+  (JNIEnv *, jobject, jdoubleArray, jint, jint, jint);
+
+/*
+ * Class:     org_openda_costa_CtaArray
+ * Method:    valueIndex
+ * Signature: ([I)I
+ */
+JNIEXPORT jint JNICALL Java_org_openda_costa_CtaArray_valueIndex
+  (JNIEnv *, jobject, jintArray);
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/costa/native/openda/bridge/include/org_openda_costa_CtaInitialize.h b/costa/native/openda/bridge/include/org_openda_costa_CtaInitialize.h
new file mode 100644
index 000000000..35acb09f4
--- /dev/null
+++ b/costa/native/openda/bridge/include/org_openda_costa_CtaInitialize.h
@@ -0,0 +1,29 @@
+/* DO NOT EDIT THIS FILE - it is machine generated */
+#include <jni.h>
+/* Header for class org_openda_costa_CtaInitialize */
+
+#ifndef _Included_org_openda_costa_CtaInitialize
+#define _Included_org_openda_costa_CtaInitialize
+#ifdef __cplusplus
+extern "C" {
+#endif
+/*
+ * Class:     org_openda_costa_CtaInitialize
+ * Method:    ctaInit
+ * Signature: ()V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaInitialize_ctaInit
+  (JNIEnv *, jclass);
+
+/*
+ * Class:     org_openda_costa_CtaInitialize
+ * Method:    setRandomSeed
+ * Signature: (I)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaInitialize_setRandomSeed
+  (JNIEnv *, jclass, jint);
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/costa/native/openda/bridge/include/org_openda_costa_CtaModelState.h b/costa/native/openda/bridge/include/org_openda_costa_CtaModelState.h
new file mode 100644
index 000000000..04f443ec4
--- /dev/null
+++ b/costa/native/openda/bridge/include/org_openda_costa_CtaModelState.h
@@ -0,0 +1,21 @@
+/* DO NOT EDIT THIS FILE - it is machine generated */
+#include <jni.h>
+/* Header for class org_openda_costa_CtaModelState */
+
+#ifndef _Included_org_openda_costa_CtaModelState
+#define _Included_org_openda_costa_CtaModelState
+#ifdef __cplusplus
+extern "C" {
+#endif
+/*
+ * Class:     org_openda_costa_CtaModelState
+ * Method:    nativeSavePersistentState
+ * Signature: (Ljava/lang/String;Ljava/lang/String;I)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaModelState_nativeSavePersistentState
+  (JNIEnv *, jobject, jstring, jstring, jint);
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/costa/native/openda/bridge/include/org_openda_costa_CtaObject.h b/costa/native/openda/bridge/include/org_openda_costa_CtaObject.h
new file mode 100644
index 000000000..730327211
--- /dev/null
+++ b/costa/native/openda/bridge/include/org_openda_costa_CtaObject.h
@@ -0,0 +1,21 @@
+/* DO NOT EDIT THIS FILE - it is machine generated */
+#include <jni.h>
+/* Header for class org_openda_costa_CtaObject */
+
+#ifndef _Included_org_openda_costa_CtaObject
+#define _Included_org_openda_costa_CtaObject
+#ifdef __cplusplus
+extern "C" {
+#endif
+/*
+ * Class:     org_openda_costa_CtaObject
+ * Method:    ctaFree
+ * Signature: ()V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaObject_ctaFree
+  (JNIEnv *, jobject);
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/costa/native/openda/bridge/include/org_openda_costa_CtaObservationDescriptions.h b/costa/native/openda/bridge/include/org_openda_costa_CtaObservationDescriptions.h
new file mode 100644
index 000000000..e4f58d10a
--- /dev/null
+++ b/costa/native/openda/bridge/include/org_openda_costa_CtaObservationDescriptions.h
@@ -0,0 +1,77 @@
+/* DO NOT EDIT THIS FILE - it is machine generated */
+#include <jni.h>
+/* Header for class org_openda_costa_CtaObservationDescriptions */
+
+#ifndef _Included_org_openda_costa_CtaObservationDescriptions
+#define _Included_org_openda_costa_CtaObservationDescriptions
+#ifdef __cplusplus
+extern "C" {
+#endif
+/*
+ * Class:     org_openda_costa_CtaObservationDescriptions
+ * Method:    getPropertyKeys
+ * Signature: ()[Ljava/lang/String;
+ */
+JNIEXPORT jobjectArray JNICALL Java_org_openda_costa_CtaObservationDescriptions_getPropertyKeys
+  (JNIEnv *, jobject);
+
+/*
+ * Class:     org_openda_costa_CtaObservationDescriptions
+ * Method:    ctaCreateSelection
+ * Signature: (Ljava/lang/String;)I
+ */
+JNIEXPORT jint JNICALL Java_org_openda_costa_CtaObservationDescriptions_ctaCreateSelection
+  (JNIEnv *, jobject, jstring);
+
+/*
+ * Class:     org_openda_costa_CtaObservationDescriptions
+ * Method:    ctaCreateTimeSelection
+ * Signature: (I)I
+ */
+JNIEXPORT jint JNICALL Java_org_openda_costa_CtaObservationDescriptions_ctaCreateTimeSelection
+  (JNIEnv *, jobject, jint);
+
+/*
+ * Class:     org_openda_costa_CtaObservationDescriptions
+ * Method:    getObservationCount
+ * Signature: ()I
+ */
+JNIEXPORT jint JNICALL Java_org_openda_costa_CtaObservationDescriptions_getObservationCount
+  (JNIEnv *, jobject);
+
+/*
+ * Class:     org_openda_costa_CtaObservationDescriptions
+ * Method:    getPropertyCount
+ * Signature: ()I
+ */
+JNIEXPORT jint JNICALL Java_org_openda_costa_CtaObservationDescriptions_getPropertyCount
+  (JNIEnv *, jobject);
+
+/*
+ * Class:     org_openda_costa_CtaObservationDescriptions
+ * Method:    getStringProperties
+ * Signature: (Ljava/lang/String;)[Ljava/lang/String;
+ */
+JNIEXPORT jobjectArray JNICALL Java_org_openda_costa_CtaObservationDescriptions_getStringProperties
+  (JNIEnv *, jobject, jstring);
+
+/*
+ * Class:     org_openda_costa_CtaObservationDescriptions
+ * Method:    ctaCreateNativeToJavaObserver
+ * Signature: (Lorg/openda/interfaces/IObservationDescriptions;)I
+ */
+JNIEXPORT jint JNICALL Java_org_openda_costa_CtaObservationDescriptions_ctaCreateNativeToJavaObserver
+  (JNIEnv *, jobject, jobject);
+
+/*
+ * Class:     org_openda_costa_CtaObservationDescriptions
+ * Method:    ctaGetValueProperties
+ * Signature: (Ljava/lang/String;I)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaObservationDescriptions_ctaGetValueProperties
+  (JNIEnv *, jobject, jstring, jint);
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/costa/native/openda/bridge/include/org_openda_costa_CtaOpenDaModel.h b/costa/native/openda/bridge/include/org_openda_costa_CtaOpenDaModel.h
new file mode 100644
index 000000000..b468a42b1
--- /dev/null
+++ b/costa/native/openda/bridge/include/org_openda_costa_CtaOpenDaModel.h
@@ -0,0 +1,189 @@
+/* DO NOT EDIT THIS FILE - it is machine generated */
+#include <jni.h>
+/* Header for class org_openda_costa_CtaOpenDaModel */
+
+#ifndef _Included_org_openda_costa_CtaOpenDaModel
+#define _Included_org_openda_costa_CtaOpenDaModel
+#ifdef __cplusplus
+extern "C" {
+#endif
+/*
+ * Class:     org_openda_costa_CtaOpenDaModel
+ * Method:    getCurrentTime
+ * Signature: ()Lorg/openda/interfaces/ITime;
+ */
+JNIEXPORT jobject JNICALL Java_org_openda_costa_CtaOpenDaModel_getCurrentTime
+  (JNIEnv *, jobject);
+
+/*
+ * Class:     org_openda_costa_CtaOpenDaModel
+ * Method:    getTimeHorizon
+ * Signature: ()Lorg/openda/interfaces/ITime;
+ */
+JNIEXPORT jobject JNICALL Java_org_openda_costa_CtaOpenDaModel_getTimeHorizon
+  (JNIEnv *, jobject);
+
+/*
+ * Class:     org_openda_costa_CtaOpenDaModel
+ * Method:    setAutomaticNoiseGeneration
+ * Signature: (Z)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaOpenDaModel_setAutomaticNoiseGeneration
+  (JNIEnv *, jobject, jboolean);
+
+/*
+ * Class:     org_openda_costa_CtaOpenDaModel
+ * Method:    ctaCreate
+ * Signature: (Ljava/lang/String;Ljava/lang/String;)I
+ */
+JNIEXPORT jint JNICALL Java_org_openda_costa_CtaOpenDaModel_ctaCreate
+  (JNIEnv *, jobject, jstring, jstring);
+
+/*
+ * Class:     org_openda_costa_CtaOpenDaModel
+ * Method:    ctaAnnounceObservedValues
+ * Signature: (Lorg/openda/interfaces/IObservationDescriptions;)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaOpenDaModel_ctaAnnounceObservedValues
+  (JNIEnv *, jobject, jobject);
+
+/*
+ * Class:     org_openda_costa_CtaOpenDaModel
+ * Method:    ctaAxpyOnParameters
+ * Signature: (DLorg/openda/interfaces/IVector;)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaOpenDaModel_ctaAxpyOnParameters
+  (JNIEnv *, jobject, jdouble, jobject);
+
+/*
+ * Class:     org_openda_costa_CtaOpenDaModel
+ * Method:    ctaAxpyOnStateDomain
+ * Signature: (DLorg/openda/interfaces/IVector;I)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaOpenDaModel_ctaAxpyOnStateDomain
+  (JNIEnv *, jobject, jdouble, jobject, jint);
+
+/*
+ * Class:     org_openda_costa_CtaOpenDaModel
+ * Method:    ctaCompute
+ * Signature: (Lorg/openda/interfaces/ITime;)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaOpenDaModel_ctaCompute
+  (JNIEnv *, jobject, jobject);
+
+/*
+ * Class:     org_openda_costa_CtaOpenDaModel
+ * Method:    ctaGetObservedValues
+ * Signature: (Lorg/openda/interfaces/IObservationDescriptions;Lorg/openda/costa/CtaVector;)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaOpenDaModel_ctaGetObservedValues
+  (JNIEnv *, jobject, jobject, jobject);
+
+/*
+ * Class:     org_openda_costa_CtaOpenDaModel
+ * Method:    ctaGetState
+ * Signature: ()Lorg/openda/interfaces/IVector;
+ */
+JNIEXPORT jobject JNICALL Java_org_openda_costa_CtaOpenDaModel_ctaGetState
+  (JNIEnv *, jobject);
+
+/*
+ * Class:     org_openda_costa_CtaOpenDaModel
+ * Method:    ctaGetStateDomain
+ * Signature: (I)Lorg/openda/interfaces/IVector;
+ */
+JNIEXPORT jobject JNICALL Java_org_openda_costa_CtaOpenDaModel_ctaGetStateDomain
+  (JNIEnv *, jobject, jint);
+
+/*
+ * Class:     org_openda_costa_CtaOpenDaModel
+ * Method:    ctaGetParameters
+ * Signature: ()Lorg/openda/interfaces/IVector;
+ */
+JNIEXPORT jobject JNICALL Java_org_openda_costa_CtaOpenDaModel_ctaGetParameters
+  (JNIEnv *, jobject);
+
+/*
+ * Class:     org_openda_costa_CtaOpenDaModel
+ * Method:    ctaGetStateScaling
+ * Signature: ()Lorg/openda/interfaces/IVector;
+ */
+JNIEXPORT jobject JNICALL Java_org_openda_costa_CtaOpenDaModel_ctaGetStateScaling
+  (JNIEnv *, jobject);
+
+/*
+ * Class:     org_openda_costa_CtaOpenDaModel
+ * Method:    ctaReleaseInternalState
+ * Signature: (Ljava/lang/String;)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaOpenDaModel_ctaReleaseInternalState
+  (JNIEnv *, jobject, jstring);
+
+/*
+ * Class:     org_openda_costa_CtaOpenDaModel
+ * Method:    ctaLoadPersistentState
+ * Signature: (Ljava/lang/String;)Lorg/openda/interfaces/IModelState;
+ */
+JNIEXPORT jobject JNICALL Java_org_openda_costa_CtaOpenDaModel_ctaLoadPersistentState
+  (JNIEnv *, jobject, jstring);
+
+/*
+ * Class:     org_openda_costa_CtaOpenDaModel
+ * Method:    ctaSaveInternalState
+ * Signature: ()Lorg/openda/interfaces/IModelState;
+ */
+JNIEXPORT jobject JNICALL Java_org_openda_costa_CtaOpenDaModel_ctaSaveInternalState
+  (JNIEnv *, jobject);
+
+/*
+ * Class:     org_openda_costa_CtaOpenDaModel
+ * Method:    ctaRestoreInternalState
+ * Signature: (Ljava/lang/String;)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaOpenDaModel_ctaRestoreInternalState
+  (JNIEnv *, jobject, jstring);
+
+/*
+ * Class:     org_openda_costa_CtaOpenDaModel
+ * Method:    ctaGetObservedLocalization
+ * Signature: (Lorg/openda/interfaces/IObservationDescriptions;D)[I
+ */
+JNIEXPORT jintArray JNICALL Java_org_openda_costa_CtaOpenDaModel_ctaGetObservedLocalization
+  (JNIEnv *, jobject, jobject, jdouble);
+
+/*
+ * Class:     org_openda_costa_CtaOpenDaModel
+ * Method:    ctaGetObservedLocalizationDomain
+ * Signature: (Lorg/openda/interfaces/IObservationDescriptions;DI)[I
+ */
+JNIEXPORT jintArray JNICALL Java_org_openda_costa_CtaOpenDaModel_ctaGetObservedLocalizationDomain
+  (JNIEnv *, jobject, jobject, jdouble, jint);
+
+/*
+ * Class:     org_openda_costa_CtaOpenDaModel
+ * Method:    ctaSetParameters
+ * Signature: (Lorg/openda/interfaces/IVector;)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaOpenDaModel_ctaSetParameters
+  (JNIEnv *, jobject, jobject);
+
+/*
+ * Class:     org_openda_costa_CtaOpenDaModel
+ * Method:    ctaGetNumDomains
+ * Signature: (D)I
+ */
+JNIEXPORT jint JNICALL Java_org_openda_costa_CtaOpenDaModel_ctaGetNumDomains
+  (JNIEnv *, jobject, jdouble);
+
+/*
+ * Class:     org_openda_costa_CtaOpenDaModel
+ * Method:    ctaGetObservationSelector
+ * Signature: (Lorg/openda/interfaces/IObservationDescriptions;DI)[I
+ */
+JNIEXPORT jintArray JNICALL Java_org_openda_costa_CtaOpenDaModel_ctaGetObservationSelector
+  (JNIEnv *, jobject, jobject, jdouble, jint);
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/costa/native/openda/bridge/include/org_openda_costa_CtaParallel.h b/costa/native/openda/bridge/include/org_openda_costa_CtaParallel.h
new file mode 100644
index 000000000..c030c8860
--- /dev/null
+++ b/costa/native/openda/bridge/include/org_openda_costa_CtaParallel.h
@@ -0,0 +1,29 @@
+/* DO NOT EDIT THIS FILE - it is machine generated */
+#include <jni.h>
+/* Header for class org_openda_costa_CtaParallel */
+
+#ifndef _Included_org_openda_costa_CtaParallel
+#define _Included_org_openda_costa_CtaParallel
+#ifdef __cplusplus
+extern "C" {
+#endif
+/*
+ * Class:     org_openda_costa_CtaParallel
+ * Method:    nativeInit
+ * Signature: (Ljava/lang/String;)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaParallel_nativeInit
+  (JNIEnv *, jclass, jstring);
+
+/*
+ * Class:     org_openda_costa_CtaParallel
+ * Method:    finalizeParallelEnvironment
+ * Signature: ()V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaParallel_finalizeParallelEnvironment
+  (JNIEnv *, jclass);
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/costa/native/openda/bridge/include/org_openda_costa_CtaRelationTable.h b/costa/native/openda/bridge/include/org_openda_costa_CtaRelationTable.h
new file mode 100644
index 000000000..334c45895
--- /dev/null
+++ b/costa/native/openda/bridge/include/org_openda_costa_CtaRelationTable.h
@@ -0,0 +1,61 @@
+/* DO NOT EDIT THIS FILE - it is machine generated */
+#include <jni.h>
+/* Header for class org_costa_CtaRelationTable */
+
+#ifndef _Included_org_costa_CtaRelationTable
+#define _Included_org_costa_CtaRelationTable
+#ifdef __cplusplus
+extern "C" {
+#endif
+/*
+ * Class:     org_costa_CtaRelationTable
+ * Method:    ctaCreate
+ * Signature: ()I
+ */
+JNIEXPORT jint JNICALL Java_org_openda_costa_CtaRelationTable_ctaCreate
+  (JNIEnv *, jobject);
+
+/*
+ * Class:     org_costa_CtaRelationTable
+ * Method:    SetTableCombine
+ * Signature: (Lorg/openda/interfaces/IRelationTable;ZLorg/openda/interfaces/IRelationTable;Z)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaRelationTable_SetTableCombine
+  (JNIEnv *, jobject, jobject, jboolean, jobject, jboolean);
+
+/*
+ * Class:     org_costa_CtaRelationTable
+ * Method:    SetSelect
+ * Signature: (Lorg/openda/interfaces/IVector;)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaRelationTable_SetSelect
+  (JNIEnv *, jobject, jobject);
+
+/*
+ * Class:     org_costa_CtaRelationTable
+ * Method:    apply
+ * Signature: (Lorg/openda/interfaces/IVector;Lorg/openda/interfaces/IVector;)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaRelationTable_apply
+  (JNIEnv *, jobject, jobject, jobject);
+
+/*
+ * Class:     org_costa_CtaRelationTable
+ * Method:    applyInv
+ * Signature: (Lorg/openda/interfaces/IVector;Lorg/openda/interfaces/IVector;)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaRelationTable_applyInv
+  (JNIEnv *, jobject, jobject, jobject);
+
+/*
+ * Class:     org_costa_CtaRelationTable
+ * Method:    free
+ * Signature: ()V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaRelationTable_free
+  (JNIEnv *, jobject);
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/costa/native/openda/bridge/include/org_openda_costa_CtaStochObserver.h b/costa/native/openda/bridge/include/org_openda_costa_CtaStochObserver.h
new file mode 100644
index 000000000..86ddef6ad
--- /dev/null
+++ b/costa/native/openda/bridge/include/org_openda_costa_CtaStochObserver.h
@@ -0,0 +1,101 @@
+/* DO NOT EDIT THIS FILE - it is machine generated */
+#include <jni.h>
+/* Header for class org_openda_costa_CtaStochObserver */
+
+#ifndef _Included_org_openda_costa_CtaStochObserver
+#define _Included_org_openda_costa_CtaStochObserver
+#ifdef __cplusplus
+extern "C" {
+#endif
+/*
+ * Class:     org_openda_costa_CtaStochObserver
+ * Method:    ctaCreateNative
+ * Signature: (Ljava/lang/String;Ljava/lang/String;)I
+ */
+JNIEXPORT jint JNICALL Java_org_openda_costa_CtaStochObserver_ctaCreateNative
+  (JNIEnv *, jobject, jstring, jstring);
+
+/*
+ * Class:     org_openda_costa_CtaStochObserver
+ * Method:    ctaGetStandardDeviation
+ * Signature: (I)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaStochObserver_ctaGetStandardDeviation
+  (JNIEnv *, jobject, jint);
+
+/*
+ * Class:     org_openda_costa_CtaStochObserver
+ * Method:    getCount
+ * Signature: ()I
+ */
+JNIEXPORT jint JNICALL Java_org_openda_costa_CtaStochObserver_getCount
+  (JNIEnv *, jobject);
+
+/*
+ * Class:     org_openda_costa_CtaStochObserver
+ * Method:    ctaGetExpectations
+ * Signature: (I)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaStochObserver_ctaGetExpectations
+  (JNIEnv *, jobject, jint);
+
+/*
+ * Class:     org_openda_costa_CtaStochObserver
+ * Method:    ctaGetObservationDescriptions
+ * Signature: ()I
+ */
+JNIEXPORT jint JNICALL Java_org_openda_costa_CtaStochObserver_ctaGetObservationDescriptions
+  (JNIEnv *, jobject);
+
+/*
+ * Class:     org_openda_costa_CtaStochObserver
+ * Method:    ctaGetRealizations
+ * Signature: (I)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaStochObserver_ctaGetRealizations
+  (JNIEnv *, jobject, jint);
+
+/*
+ * Class:     org_openda_costa_CtaStochObserver
+ * Method:    ctaGetValues
+ * Signature: (I)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaStochObserver_ctaGetValues
+  (JNIEnv *, jobject, jint);
+
+/*
+ * Class:     org_openda_costa_CtaStochObserver
+ * Method:    ctaGetVariances
+ * Signature: (I)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaStochObserver_ctaGetVariances
+  (JNIEnv *, jobject, jint);
+
+/*
+ * Class:     org_openda_costa_CtaStochObserver
+ * Method:    ctaCreateSelection
+ * Signature: (Ljava/lang/String;)I
+ */
+JNIEXPORT jint JNICALL Java_org_openda_costa_CtaStochObserver_ctaCreateSelection
+  (JNIEnv *, jobject, jstring);
+
+/*
+ * Class:     org_openda_costa_CtaStochObserver
+ * Method:    ctaCreateTimeSelection
+ * Signature: (I)I
+ */
+JNIEXPORT jint JNICALL Java_org_openda_costa_CtaStochObserver_ctaCreateTimeSelection
+  (JNIEnv *, jobject, jint);
+
+/*
+ * Class:     org_openda_costa_CtaStochObserver
+ * Method:    ctaEvaluatePDF
+ * Signature: (II)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaStochObserver_ctaEvaluatePDF
+  (JNIEnv *, jobject, jint, jint);
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/costa/native/openda/bridge/include/org_openda_costa_CtaTime.h b/costa/native/openda/bridge/include/org_openda_costa_CtaTime.h
new file mode 100644
index 000000000..c1da6bd47
--- /dev/null
+++ b/costa/native/openda/bridge/include/org_openda_costa_CtaTime.h
@@ -0,0 +1,157 @@
+/* DO NOT EDIT THIS FILE - it is machine generated */
+#include <jni.h>
+/* Header for class org_costa_CtaTime */
+
+#ifndef _Included_org_costa_CtaTime
+#define _Included_org_costa_CtaTime
+#ifdef __cplusplus
+extern "C" {
+#endif
+/*
+ * Class:     org_costa_CtaTime
+ * Method:    create
+ * Signature: ()I
+ */
+JNIEXPORT jint JNICALL Java_org_openda_costa_CtaTime_create
+  (JNIEnv *, jobject);
+
+/*
+ * Class:     org_costa_CtaTime
+ * Method:    isStamp
+ * Signature: ()Z
+ */
+JNIEXPORT jboolean JNICALL Java_org_openda_costa_CtaTime_isStamp
+  (JNIEnv *, jobject);
+
+/*
+ * Class:     org_costa_CtaTime
+ * Method:    getMJD
+ * Signature: ()D
+ */
+JNIEXPORT jdouble JNICALL Java_org_openda_costa_CtaTime_getMJD
+  (JNIEnv *, jobject);
+
+/*
+ * Class:     org_costa_CtaTime
+ * Method:    getUserTime
+ * Signature: ()D
+ */
+JNIEXPORT jdouble JNICALL Java_org_openda_costa_CtaTime_getUserTime
+  (JNIEnv *, jobject);
+
+/*
+ * Class:     org_costa_CtaTime
+ * Method:    setMJD
+ * Signature: (D)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaTime_setMJD
+  (JNIEnv *, jobject, jdouble);
+
+/*
+ * Class:     org_costa_CtaTime
+ * Method:    setUserTime
+ * Signature: (D)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaTime_setUserTime
+  (JNIEnv *, jobject, jdouble);
+
+/*
+ * Class:     org_costa_CtaTime
+ * Method:    isSpan
+ * Signature: ()Z
+ */
+JNIEXPORT jboolean JNICALL Java_org_openda_costa_CtaTime_isSpan
+  (JNIEnv *, jobject);
+
+/*
+ * Class:     org_costa_CtaTime
+ * Method:    getStep
+ * Signature: ()J
+ */
+JNIEXPORT jlong JNICALL Java_org_openda_costa_CtaTime_getStep
+  (JNIEnv *, jobject);
+
+/*
+ * Class:     org_costa_CtaTime
+ * Method:    getBeginMJD
+ * Signature: ()D
+ */
+JNIEXPORT jdouble JNICALL Java_org_openda_costa_CtaTime_getBeginMJD
+  (JNIEnv *, jobject);
+
+/*
+ * Class:     org_costa_CtaTime
+ * Method:    getEndMJD
+ * Signature: ()D
+ */
+JNIEXPORT jdouble JNICALL Java_org_openda_costa_CtaTime_getEndMJD
+  (JNIEnv *, jobject);
+
+/*
+ * Class:     org_costa_CtaTime
+ * Method:    getStepMJD
+ * Signature: ()D
+ */
+JNIEXPORT jdouble JNICALL Java_org_openda_costa_CtaTime_getStepMJD
+  (JNIEnv *, jobject);
+
+/*
+ * Class:     org_costa_CtaTime
+ * Method:    getBeginUserTime
+ * Signature: ()D
+ */
+JNIEXPORT jdouble JNICALL Java_org_openda_costa_CtaTime_getBeginUserTime
+  (JNIEnv *, jobject);
+
+/*
+ * Class:     org_costa_CtaTime
+ * Method:    getEndUserTime
+ * Signature: ()D
+ */
+JNIEXPORT jdouble JNICALL Java_org_openda_costa_CtaTime_getEndUserTime
+  (JNIEnv *, jobject);
+
+/*
+ * Class:     org_costa_CtaTime
+ * Method:    getUserStep
+ * Signature: ()D
+ */
+JNIEXPORT jdouble JNICALL Java_org_openda_costa_CtaTime_getUserStep
+  (JNIEnv *, jobject);
+
+/*
+ * Class:     org_costa_CtaTime
+ * Method:    setSpanMJD
+ * Signature: (DDD)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaTime_setSpanMJD
+  (JNIEnv *, jobject, jdouble, jdouble, jdouble);
+
+/*
+ * Class:     org_costa_CtaTime
+ * Method:    setUserSpan
+ * Signature: (DDD)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaTime_setUserSpan
+  (JNIEnv *, jobject, jdouble, jdouble, jdouble);
+
+/*
+ * Class:     org_costa_CtaTime
+ * Method:    getStepCount
+ * Signature: ()J
+ */
+JNIEXPORT jlong JNICALL Java_org_openda_costa_CtaTime_getStepCount
+  (JNIEnv *, jobject);
+
+/*
+ * Class:     org_costa_CtaTime
+ * Method:    clone
+ * Signature: ()Lorg/openda/interfaces/ITime;
+ */
+JNIEXPORT jobject JNICALL Java_org_openda_costa_CtaTime_clone
+  (JNIEnv *, jobject);
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/costa/native/openda/bridge/include/org_openda_costa_CtaTreeVector.h b/costa/native/openda/bridge/include/org_openda_costa_CtaTreeVector.h
new file mode 100644
index 000000000..8bea84740
--- /dev/null
+++ b/costa/native/openda/bridge/include/org_openda_costa_CtaTreeVector.h
@@ -0,0 +1,137 @@
+/* DO NOT EDIT THIS FILE - it is machine generated */
+#include <jni.h>
+/* Header for class org_costa_CtaTreeVector */
+
+#ifndef _Included_org_costa_CtaTreeVector
+#define _Included_org_costa_CtaTreeVector
+#ifdef __cplusplus
+extern "C" {
+#endif
+/*
+ * Class:     org_costa_CtaTreeVector
+ * Method:    getId
+ * Signature: ()Ljava/lang/String;
+ */
+JNIEXPORT jstring JNICALL Java_org_openda_costa_CtaTreeVector_getId
+  (JNIEnv *, jobject);
+
+/*
+ * Class:     org_costa_CtaTreeVector
+ * Method:    ctaCreate_from_vector
+ * Signature: ()I
+ */
+JNIEXPORT jint JNICALL Java_org_openda_costa_CtaTreeVector_ctaCreateFromVector
+  (JNIEnv *, jobject, jstring, jstring, jint);
+
+/*
+ * Class:     org_costa_CtaTreeVector
+ * Method:    ctaCreateFromSubtreevectors
+ * Signature: ()I
+ */
+JNIEXPORT jint JNICALL Java_org_openda_costa_CtaTreeVector_ctaCreateFromSubtreevectors
+  (JNIEnv *, jobject, jstring, jstring, jint, jintArray);
+
+/*
+ * Class:     org_costa_ctatreevector
+ * Method:    ctaSetRegGrid
+ * Signature: ()V
+*/
+ JNIEXPORT jint JNICALL Java_org_openda_costa_CtaTreeVector_ctaSetRegGrid
+  (JNIEnv * , jobject, jint, jint, jint, jdouble, jdouble, jdouble,
+                 jdouble, jdouble, jdouble);
+
+/*
+ * Class:     org_costa_CtaTreeVector
+ * Method:    ctaGetSubTreeVector
+ * Signature: (Ljava/lang/String;)I
+ */
+JNIEXPORT jint JNICALL Java_org_openda_costa_CtaTreeVector_ctaGetSubTreeVector
+  (JNIEnv *, jobject, jstring);
+
+/*
+ * Class:     org_costa_CtaTreeVector
+ * Method:    ctaGetSubTreeVectorId
+ * Signature: ()Ljava/lang/String;
+ */
+JNIEXPORT jstring JNICALL Java_org_openda_costa_CtaTreeVector_ctaGetSubTreeVectorId
+(JNIEnv *env, jobject obj_this, jint index);
+
+
+/*
+ * Class:     org_costa_CtaTreeVector
+ * Method:    ctaGetNumSubTreeVectors
+ * Signature: (Ljava/lang/String;)I
+ */
+JNIEXPORT jint JNICALL Java_org_openda_costa_CtaTreeVector_ctaGetNumSubTreeVectors
+(JNIEnv *env, jobject obj_this);
+
+/*
+ * Class:     org_costa_CtaTreeVector
+ * Method:    ctaImport
+ * Signature: (Ljava/lang/String;)I
+ */
+JNIEXPORT jint JNICALL Java_org_openda_costa_CtaTreeVector_ctaImport
+(JNIEnv *, jobject , jint );
+
+/*
+ * Class:     org_costa_CtaTreeVector
+ * Method:    ctaVImport
+ * Signature: (Ljava/lang/String;)I
+ */
+JNIEXPORT jint JNICALL Java_org_openda_costa_CtaTreeVector_ctaVImport
+(JNIEnv *, jobject , jint );
+
+
+
+JNIEXPORT jint JNICALL Java_org_openda_costa_CtaTreeVector_ctaNetcdfClose
+  (JNIEnv *, jobject, jint );
+
+/*
+ * Class:     org_resultwriters_NetcdfResultWriter
+ * Method:    ctaNetcdfClose
+ * Signature: (Ljava/lang/String;)I
+ */
+JNIEXPORT jint JNICALL Java_org_openda_resultwriters_NetcdfResultWriter_ctaNetcdfClose
+  (JNIEnv *, jobject, jint);
+
+
+
+JNIEXPORT jint JNICALL Java_org_openda_costa_CtaTreeVector_ctaNetcdfInit
+  (JNIEnv *, jobject, jstring, jstring);
+/*
+ * Class:     org_resultwriters_NetcdfResultWriter
+ * Method:    ctaNetcdfInit
+ * Signature: (Ljava/lang/String;)I
+ */
+JNIEXPORT jint JNICALL Java_org_openda_resultwriters_NetcdfResultWriter_ctaNetcdfInit
+  (JNIEnv *, jobject, jstring, jstring);
+
+/*
+ * Class:     org_costa_CtaTreeVector
+ * Method:    ctaExport
+ * Signature: (Ljava/lang/String;)I
+ */
+JNIEXPORT jint JNICALL Java_org_openda_costa_CtaTreeVector_ctaExport
+  (JNIEnv *, jobject, jint);
+
+
+/*
+ * Class:     org_costa_CtaTreeVector
+ * Method:    getCaption
+ * Signature: ()Ljava/lang/String;
+ */
+JNIEXPORT jstring JNICALL Java_org_openda_costa_CtaTreeVector_getCaption
+  (JNIEnv *, jobject);
+
+/*
+ * Class:     org_costa_CtaTreeVector
+ * Method:    clone
+ * Signature: ()Lorg/openda/interfaces/ITreeVector;
+ */
+JNIEXPORT jobject JNICALL Java_org_openda_costa_CtaTreeVector_clone
+  (JNIEnv *, jobject);
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/costa/native/openda/bridge/include/org_openda_costa_CtaUtils.h b/costa/native/openda/bridge/include/org_openda_costa_CtaUtils.h
new file mode 100644
index 000000000..2511876ef
--- /dev/null
+++ b/costa/native/openda/bridge/include/org_openda_costa_CtaUtils.h
@@ -0,0 +1,21 @@
+/* DO NOT EDIT THIS FILE - it is machine generated */
+#include <jni.h>
+/* Header for class org_openda_costa_CtaUtils */
+
+#ifndef _Included_org_openda_costa_CtaUtils
+#define _Included_org_openda_costa_CtaUtils
+#ifdef __cplusplus
+extern "C" {
+#endif
+/*
+ * Class:     org_openda_costa_CtaUtils
+ * Method:    print_memory
+ * Signature: (Ljava/lang/String;I)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaUtils_print_1memory
+  (JNIEnv *, jclass, jstring, jint);
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/costa/native/openda/bridge/include/org_openda_costa_CtaVector.h b/costa/native/openda/bridge/include/org_openda_costa_CtaVector.h
new file mode 100644
index 000000000..3cc509377
--- /dev/null
+++ b/costa/native/openda/bridge/include/org_openda_costa_CtaVector.h
@@ -0,0 +1,155 @@
+/* DO NOT EDIT THIS FILE - it is machine generated */
+#include <jni.h>
+/* Header for class org_costa_CtaVector */
+
+#ifndef _Included_org_costa_CtaVector
+#define _Included_org_costa_CtaVector
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/*
+ * Class:     org_costa_CtaVector
+ * Method:    ctaPrint handles
+ * Signature: ()Lorg/openda/interfaces/IVector;
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaVector_ctaPrintHandles
+(JNIEnv *, jobject, jstring);
+
+
+
+/*
+ * Class:     org_costa_CtaVector
+ * Method:    clone
+ * Signature: ()Lorg/openda/interfaces/IVector;
+ */
+JNIEXPORT jobject JNICALL Java_org_openda_costa_CtaVector_clone
+  (JNIEnv *, jobject);
+
+/*
+ * Class:     org_costa_CtaVector
+ * Method:    setConstant
+ * Signature: (D)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaVector_setConstant
+  (JNIEnv *, jobject, jdouble);
+
+/*
+ * Class:     org_costa_CtaVector
+ * Method:    scale
+ * Signature: (D)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaVector_scale
+  (JNIEnv *, jobject, jdouble);
+
+/*
+ * Class:     org_costa_CtaVector
+ * Method:    setValues
+ * Signature: ([D)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaVector_setValues
+  (JNIEnv *, jobject, jdoubleArray);
+
+/*
+ * Class:     org_costa_CtaVector
+ * Method:    getValues
+ * Signature: ()[D
+ */
+JNIEXPORT jdoubleArray JNICALL Java_org_openda_costa_CtaVector_getValues
+  (JNIEnv *, jobject);
+
+/*
+ * Class:     org_costa_CtaVector
+ * Method:    setValue
+ * Signature: (ID)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaVector_setValue
+  (JNIEnv *, jobject, jint, jdouble);
+
+/*
+ * Class:     org_costa_CtaVector
+ * Method:    getValue
+ * Signature: (I)D
+ */
+JNIEXPORT jdouble JNICALL Java_org_openda_costa_CtaVector_getValue
+  (JNIEnv *, jobject, jint);
+
+/*
+ * Class:     org_costa_CtaVector
+ * Method:    getSize
+ * Signature: ()I
+ */
+JNIEXPORT jint JNICALL Java_org_openda_costa_CtaVector_getSize
+  (JNIEnv *, jobject);
+
+/*
+ * Class:     org_costa_CtaVector
+ * Method:    norm2
+ * Signature: ()D
+ */
+JNIEXPORT jdouble JNICALL Java_org_openda_costa_CtaVector_norm2
+  (JNIEnv *, jobject);
+
+/*
+ * Class:     org_costa_CtaVector
+ * Method:    ctaCreate
+ * Signature: (I)I
+ */
+JNIEXPORT jint JNICALL Java_org_openda_costa_CtaVector_ctaCreate
+  (JNIEnv *, jobject, jint);
+
+/*
+ * Class:     org_costa_CtaVector
+ * Method:    ctaDotProduct
+ * Signature: (I)D
+ */
+JNIEXPORT jdouble JNICALL Java_org_openda_costa_CtaVector_ctaDotProduct
+  (JNIEnv *, jobject, jint);
+
+/*
+ * Class:     org_costa_CtaVector
+ * Method:    ctaPointwiseDivide
+ * Signature: (I)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaVector_ctaPointwiseDivide
+  (JNIEnv *, jobject, jint);
+
+/*
+ * Class:     org_costa_CtaVector
+ * Method:    ctaPointwiseMultiply
+ * Signature: (I)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaVector_ctaPointwiseMultiply
+  (JNIEnv *, jobject, jint);
+
+/*
+ * Class:     org_costa_CtaVector
+ * Method:    ctaSetValues
+ * Signature: (I)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaVector_ctaSetValues
+  (JNIEnv *, jobject, jint);
+
+/*
+ * Class:     org_costa_CtaVector
+ * Method:    ctaAxpy
+ * Signature: (DI)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaVector_ctaAxpy
+  (JNIEnv *, jobject, jdouble, jint);
+
+
+/*
+ * Class:     org_costa_CtaVector
+ * Method:    ctaSqrt
+ * Signature: ()V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaVector_ctaSqrt
+  (JNIEnv *, jobject);
+
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/costa/native/openda/bridge/include/org_openda_resultwriters_NativeResultWriter.h b/costa/native/openda/bridge/include/org_openda_resultwriters_NativeResultWriter.h
new file mode 100644
index 000000000..784d9c037
--- /dev/null
+++ b/costa/native/openda/bridge/include/org_openda_resultwriters_NativeResultWriter.h
@@ -0,0 +1,45 @@
+/* DO NOT EDIT THIS FILE - it is machine generated */
+#include <jni.h>
+/* Header for class org_openda_resultwriters_NativeResultWriter */
+
+#ifndef _Included_org_openda_resultwriters_NativeResultWriter
+#define _Included_org_openda_resultwriters_NativeResultWriter
+#ifdef __cplusplus
+extern "C" {
+#endif
+/*
+ * Class:     org_openda_resultwriters_NativeResultWriter
+ * Method:    putMessage
+ * Signature: (ILjava/lang/String;Ljava/lang/String;Ljava/lang/String;)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_resultwriters_NativeResultWriter_putMessage
+  (JNIEnv *, jobject, jint, jstring, jstring, jstring);
+
+/*
+ * Class:     org_openda_resultwriters_NativeResultWriter
+ * Method:    putValue
+ * Signature: (ILjava/lang/String;Ljava/lang/String;Ljava/lang/String;IILjava/lang/String;I)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_resultwriters_NativeResultWriter_putValue
+  (JNIEnv *, jobject, jint, jstring, jstring, jstring, jint, jint, jstring, jint);
+
+/*
+ * Class:     org_openda_resultwriters_NativeResultWriter
+ * Method:    putIterationReport
+ * Signature: (ILjava/lang/String;Ljava/lang/String;IDI)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_resultwriters_NativeResultWriter_putIterationReport
+  (JNIEnv *, jobject, jint, jstring, jstring, jint, jdouble, jint);
+
+/*
+ * Class:     org_openda_resultwriters_NativeResultWriter
+ * Method:    free
+ * Signature: (I)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_resultwriters_NativeResultWriter_free
+  (JNIEnv *, jobject, jint);
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/costa/native/openda/bridge/include/org_openda_resultwriters_NetcdfResultWriterNative.h b/costa/native/openda/bridge/include/org_openda_resultwriters_NetcdfResultWriterNative.h
new file mode 100644
index 000000000..9774ecd35
--- /dev/null
+++ b/costa/native/openda/bridge/include/org_openda_resultwriters_NetcdfResultWriterNative.h
@@ -0,0 +1,29 @@
+/* DO NOT EDIT THIS FILE - it is machine generated */
+#include <jni.h>
+/* Header for class org_openda_resultwriters_NetcdfResultWriterNative */
+
+#ifndef _Included_org_openda_resultwriters_NetcdfResultWriterNative
+#define _Included_org_openda_resultwriters_NetcdfResultWriterNative
+#ifdef __cplusplus
+extern "C" {
+#endif
+/*
+ * Class:     org_openda_resultwriters_NetcdfResultWriterNative
+ * Method:    ctaNetcdfInit
+ * Signature: (Ljava/lang/String;Ljava/lang/String;)I
+ */
+JNIEXPORT jint JNICALL Java_org_openda_resultwriters_NetcdfResultWriterNative_ctaNetcdfInit
+  (JNIEnv *, jobject, jstring, jstring);
+
+/*
+ * Class:     org_openda_resultwriters_NetcdfResultWriterNative
+ * Method:    ctaNetcdfClose
+ * Signature: (I)I
+ */
+JNIEXPORT jint JNICALL Java_org_openda_resultwriters_NetcdfResultWriterNative_ctaNetcdfClose
+  (JNIEnv *, jobject, jint);
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/costa/native/openda/bridge/src/CMakeLists.txt b/costa/native/openda/bridge/src/CMakeLists.txt
new file mode 100644
index 000000000..7e4c9e1b7
--- /dev/null
+++ b/costa/native/openda/bridge/src/CMakeLists.txt
@@ -0,0 +1,26 @@
+cmake_minimum_required(VERSION 3.9.1)
+find_package(LibXml2 REQUIRED)
+find_package(JNI REQUIRED)
+
+set(LIBRARY_OUTPUT_PATH  ${CMAKE_BINARY_DIR}/lib)
+set(SOURCES
+  jni_cta_utils.cpp                                org_openda_costa_CtaRelationTable.cpp
+  org_openda_costa_CtaArray.cpp                    org_openda_costa_CtaStochObserver.cpp
+  org_openda_costa_CtaInitialize.cpp               org_openda_costa_CtaTime.cpp
+  org_openda_costa_CtaModelState.cpp               org_openda_costa_CtaTreeVector.cpp
+  org_openda_costa_CtaObject.cpp                   org_openda_costa_CtaUtils.cpp
+  org_openda_costa_CtaObservationDescriptions.cpp  org_openda_costa_CtaVector.cpp
+  org_openda_costa_CtaOpenDaModel.cpp              org_openda_resultwriters_NativeResultWriter.cpp
+  org_openda_costa_CtaParallel.cpp                 org_openda_resultwriters_NetcdfResultWriterNative.cpp
+)
+
+add_library(opendabridge SHARED ${SOURCES})
+
+
+target_include_directories(opendabridge PUBLIC ../include)
+target_include_directories(opendabridge PUBLIC ${CMAKE_SOURCE_DIR}/cta/include)
+target_include_directories(opendabridge PUBLIC ${CMAKE_SOURCE_DIR}/external)
+target_include_directories(opendabridge PUBLIC ${LIBXML2_INCLUDE_DIR}) 
+target_include_directories(opendabridge PUBLIC ${JNI_INCLUDE_DIRS})
+#target_link_libraries(cta INTERFACE libxml)
+
diff --git a/costa/native/openda/bridge/src/jni_cta_utils.cpp b/costa/native/openda/bridge/src/jni_cta_utils.cpp
new file mode 100644
index 000000000..3df977437
--- /dev/null
+++ b/costa/native/openda/bridge/src/jni_cta_utils.cpp
@@ -0,0 +1,220 @@
+/*
+$URL: https://repos.deltares.nl/repos/openda/public/trunk/core/native/src/openda/bridge/jni_cta_utils.cpp $
+$Revision: 2694 $, $Date: 2011-08-24 08:32:07 +0200 (Wed, 24 Aug 2011) $
+
+OpenDA interface for COSTA.
+Copyright (C) 2007  Stef Hummel / Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+#define CLASSNAME  "CtaObsDescr_nativeToJava"
+#define IDEBUG 0
+
+#define MAX_EXCEPTION_LEN 256
+
+#include <stdio.h>
+#include <math.h>
+#include "jni_cta_utils.h"
+#include "cta.h"
+
+
+static jmethodID javaIDWriter;
+static jclass    javaClass;
+static JNIEnv*   javaEnv;
+
+
+#define METHOD "cta_jni_setJavaEnv"
+void cta_jni_setJavaEnv(JNIEnv *env){
+   javaEnv=env;
+}
+
+#undef METHOD
+#define METHOD "cta_jni_exception"
+void cta_jni_exception(JNIEnv *env, const char *ctaClassName, const char *message, int retVal)
+{
+	char messageString[MAX_EXCEPTION_LEN+1];
+	sprintf(messageString, "%s, Costa Error Code=%d", message, retVal);
+	cta_jni_exception(env, ctaClassName, messageString);
+	return;
+}
+
+#undef METHOD
+#define METHOD "cta_jni_exception"
+void cta_jni_exception(JNIEnv *env, const char *ctaClassName, const char *message)
+{
+	char messageString[MAX_EXCEPTION_LEN+1];
+	sprintf(messageString, "%s: %s", ctaClassName, message);
+	jclass cls = env->FindClass("java/lang/RuntimeException");
+	/* if cls is NULL, an exception has already been thrown */
+	if (cls != NULL) {
+		env->ThrowNew(cls, messageString);
+	} else {
+		env->FatalError("Could not get RuntimeException class");	
+	}
+     /* free the local ref */
+    env->DeleteLocalRef(cls);
+}
+
+#undef METHOD
+#define METHOD "cta_jni_free"
+void cta_jni_free(JNIEnv * env, jobject obj_this) {
+
+	CTA_Handle ctaHandle = cta_jni_getCtaHandle(env, obj_this);
+
+	if ( ctaHandle != CTA_NULL )
+	{
+		CTA_Handle_Free_All(&ctaHandle);
+		jclass class_CtaObject = env->GetObjectClass(obj_this);
+		jfieldID ctaHandleField = env->GetFieldID(class_CtaObject, "ctaHandle", "I");
+		env->SetIntField(obj_this, ctaHandleField, ctaHandle);
+	}
+
+}
+
+
+
+#undef METHOD
+#define METHOD "cta_jni_getCtaHandle"
+CTA_Handle cta_jni_getCtaHandle(JNIEnv * env, jobject obj_this) {
+
+	jclass class_CtaObject = env->GetObjectClass(obj_this);
+	jfieldID ctaHandleField = env->GetFieldID(class_CtaObject, "ctaHandle", "I");
+	return env->GetIntField(obj_this, ctaHandleField);
+
+}
+
+#undef METHOD
+#define METHOD "cta_jni_setCtaHandle"
+void cta_jni_setCtaHandle(JNIEnv * env, jobject obj_this, CTA_Handle ctaHandle) {
+
+	jclass class_CtaObject = env->GetObjectClass(obj_this);
+	jfieldID ctaHandleField = env->GetFieldID(class_CtaObject, "ctaHandle", "I");
+
+   env->SetIntField(obj_this, ctaHandleField, ctaHandle);
+}
+
+#undef METHOD
+#define METHOD "cta_jni_ExternalMessageWriter"
+void cta_jni_ExternalMessageWriter(char *className, char *method, char *message, char type){
+   
+   jstring jClassName, jMethod ,jMessage,jType;
+   char sType[2];
+   sType[0]=type;
+   sType[1]=0;
+
+   // Create 4 Java strings
+   jClassName =(javaEnv)->NewStringUTF(className);
+   jMethod    =(javaEnv)->NewStringUTF(method);
+   jMessage   =(javaEnv)->NewStringUTF(message);
+   jType      =(javaEnv)->NewStringUTF(sType);
+   
+   
+   // Get the Java class of the message writer
+   jclass cls = javaEnv->FindClass("org/openda/costa/CtaMessageWriter");
+   jmethodID my_method=NULL;
+   if (cls){
+      my_method = javaEnv->GetStaticMethodID(cls, "Write", "(Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;)V");
+   } 
+
+   if (my_method){
+      (javaEnv)->CallStaticVoidMethod(cls, my_method, jClassName, jMethod, jMessage, jType);
+   } 
+   else {
+      printf("Null JNI pointers found. I cannot call the message writer; the message to write is\n");
+      printf("class:%s method:%s message:%s type:%s\n", className, method, message, sType);
+   }
+}
+
+#undef METHOD
+#define METHOD "cta_jni_ExternalMessageWriterSetID"
+void cta_jni_ExternalMessageWriterSetID(JNIEnv *env, jclass classID, jmethodID methodID){
+   javaIDWriter =methodID;
+   javaClass    =classID;
+   javaEnv      =env;
+};
+
+#undef METHOD
+#define METHOD "cta_jni_JavaStringVecToNativeStringVec"
+int cta_jni_JavaStringVecToNativeVec(JNIEnv *env,  jobjectArray jArray, CTA_Vector cVec){
+   CTA_String aString;
+   jint lenArray;
+   int nVec, ival;
+   jstring j_str;
+   jboolean isCopy;
+   CTA_Datatype datatype;
+   double val;
+
+      // Check dimensions
+      CTA_Vector_GetSize(cVec, &nVec);
+      lenArray = env->GetArrayLength(jArray);
+      if (nVec == lenArray) {
+
+         // Create work string
+         CTA_String_Create(&aString);
+
+         // Get the datatype of the vector
+         CTA_Vector_GetDatatype(cVec, &datatype);
+
+         // Unpack the java vector
+         int retval=CTA_OK;
+         for (int i=0; i<lenArray && retval==CTA_OK; i++){
+            const char *c_str;
+            // Put Java string into COSTA string
+            j_str = (jstring) env->GetObjectArrayElement(jArray, i);
+            c_str = env->GetStringUTFChars(j_str,&isCopy);
+            
+         
+            // Store java string in COSTA string or convert value to double
+            if (datatype == CTA_STRING) {
+               if (IDEBUG) printf("cta_jni_JavaStringVecToNativeStringVec :string %d is %s\n",i+1,c_str);
+               CTA_String_Set(aString,c_str);
+            }
+            else {
+               val=atof(c_str);
+               if (IDEBUG) printf("cta_jni_JavaStringVecToNativeStringVec :value %d is %f\n",i+1,val);
+               
+            }
+            env->ReleaseStringUTFChars(j_str, c_str);
+
+            // Store value in COSTA vector
+            if (datatype==CTA_STRING) {
+               // Put COSTA string into array of strings
+               retval=CTA_Vector_SetVal(cVec, i+1, &aString, CTA_STRING);
+            }
+            else {
+               if (datatype == CTA_INTEGER){
+                  ival = (int) val;
+                  retval=CTA_Vector_SetVal(cVec, i+1, &ival, CTA_INTEGER);
+               }
+               else {
+                  retval=CTA_Vector_SetVal(cVec, i+1, &val, CTA_DOUBLE);
+               }
+            }
+         }
+         CTA_String_Free(&aString);
+         return retval;
+      }
+      else {
+         char message[256];
+         sprintf(message,"Dimension error: the size of the native array is %d and the java array has size %d",nVec,lenArray);
+         CTA_WRITE_ERROR(message);
+         return CTA_DIMENSION_ERROR;
+      }
+
+
+
+
+
+}
diff --git a/costa/native/openda/bridge/src/org_openda_costa_CtaArray.cpp b/costa/native/openda/bridge/src/org_openda_costa_CtaArray.cpp
new file mode 100644
index 000000000..5a6024719
--- /dev/null
+++ b/costa/native/openda/bridge/src/org_openda_costa_CtaArray.cpp
@@ -0,0 +1,600 @@
+/*
+$URL: https://repos.deltares.nl/repos/openda/openda_1/public/trunk/core/native/src/openda/bridge/org_openda_costa_CtaVector.cpp $
+$Revision: 2738 $, $Date: 2011-09-05 10:48:32 +0200 (Mon, 05 Sep 2011) $
+
+OpenDA interface for COSTA.
+Copyright (C) 2012  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+#include "org_openda_costa_CtaArray.h"
+#include "cta_array.h"
+#include "jni_cta_utils.h"
+#include "cta_errors.h"
+#include "cta_defaults.h"
+
+/*
+ * Class:     org_openda_costa_CtaArray
+ * Method:    create
+ * Signature: ([D[I)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaArray_create
+  (JNIEnv *env, jobject obj_this, jdoubleArray jValues, jintArray jDimensions){
+
+   cta_jni_setJavaEnv(env);
+
+   // Get dimension and pointers to the data of the java arrays
+   //int nValues = env->GetArrayLength(jValues);
+   jboolean isCopy = JNI_FALSE; 
+   jdouble * cValues = env->GetDoubleArrayElements(jValues, &isCopy);
+
+   int nDimensions = env->GetArrayLength(jDimensions);
+   jint * cDimensions = env->GetIntArrayElements(jDimensions, &isCopy);
+
+   // Call native constructor
+   CTA_Array h;  
+   int ierr=CTA_Array_CreateAsDoubles(cValues, nDimensions, (int*) cDimensions, &h);
+   if ( ierr != CTA_OK ) {
+      cta_jni_exception(env, "CtaArray", "Could create new Array", ierr);
+   }
+
+   // Set the native handle in the java class
+   cta_jni_setCtaHandle(env, obj_this, (CTA_Handle) h);
+
+   // Release the data of the arrays
+   env->ReleaseDoubleArrayElements(jValues, cValues,0);
+   env->ReleaseIntArrayElements(jDimensions, cDimensions,0);
+}
+
+/*
+ * Class:     org_openda_costa_CtaArray
+ * Method:    getNumberOfDimensions
+ * Signature: ()I
+ */
+JNIEXPORT jint JNICALL Java_org_openda_costa_CtaArray_getNumberOfDimensions
+  (JNIEnv *env, jobject obj_this){
+
+   cta_jni_setJavaEnv(env);
+   CTA_Array h = (CTA_Array) cta_jni_getCtaHandle(env, obj_this);
+   return (jint) CTA_Array_getNumberOfDimensions(h);
+}
+
+/*
+ * Class:     org_openda_costa_CtaArray
+ * Method:    getDimensions
+ * Signature: ()[I
+ */
+JNIEXPORT jintArray JNICALL Java_org_openda_costa_CtaArray_getDimensions
+  (JNIEnv *env, jobject obj_this){
+
+   cta_jni_setJavaEnv(env);
+   CTA_Array h = (CTA_Array) cta_jni_getCtaHandle(env, obj_this);
+
+   int nDimensions=CTA_Array_getNumberOfDimensions(h);
+   jintArray jDimensions = env->NewIntArray(nDimensions);
+   jint * cDimensions = new jint[nDimensions];
+
+   int ierr = CTA_Array_getDimensions(h, (int*) cDimensions);
+   if (ierr == CTA_OK){
+      env->SetIntArrayRegion(jDimensions, 0, nDimensions, cDimensions);
+   }
+   else {
+      cta_jni_exception(env, "CtaArray", "could not call getDimensions", ierr);
+   }
+   delete [] cDimensions;
+   return jDimensions;
+}
+
+/*
+ * Class:     org_openda_costa_CtaArray
+ * Method:    length
+ * Signature: ()I
+ */
+JNIEXPORT jint JNICALL Java_org_openda_costa_CtaArray_length
+  (JNIEnv *env, jobject obj_this){
+
+   cta_jni_setJavaEnv(env);
+   CTA_Array h = (CTA_Array) cta_jni_getCtaHandle(env, obj_this);
+
+   return (jint) CTA_Array_length(h);
+
+}
+
+/*
+ * Class:     org_openda_costa_CtaArray
+ * Method:    getValuesAsDoubles
+ * Signature: ()[D
+ */
+JNIEXPORT jdoubleArray JNICALL Java_org_openda_costa_CtaArray_getValuesAsDoubles__
+  (JNIEnv *env, jobject obj_this){
+
+   cta_jni_setJavaEnv(env);
+   CTA_Array h = (CTA_Array) cta_jni_getCtaHandle(env, obj_this);
+   int n = CTA_Array_length(h);
+   jdoubleArray jValues = env->NewDoubleArray(n);
+   double *cValues = new double[n];
+
+   int ierr = CTA_Array_getValuesAsDoubles(h, cValues);
+   if (ierr == CTA_OK){
+      env->SetDoubleArrayRegion(jValues, 0, n, cValues);
+   }
+   else {
+      cta_jni_exception(env, "CtaArray", "could not call getValuesAsDoubles", ierr);
+   }
+   delete [] cValues;
+   return jValues;
+}
+
+/*
+ * Class:     org_openda_costa_CtaArray
+ * Method:    getValuesAsDoubles
+ * Signature: (Z)[D
+ */
+JNIEXPORT jdoubleArray JNICALL Java_org_openda_costa_CtaArray_getValuesAsDoubles__Z
+  (JNIEnv *env, jobject obj_this, jboolean){
+
+   //Note since this is a JNI binding we ignore the boolean
+
+   cta_jni_setJavaEnv(env);
+   CTA_Array h = (CTA_Array) cta_jni_getCtaHandle(env, obj_this);
+   int n = CTA_Array_length(h);
+   jdoubleArray jValues = env->NewDoubleArray(n);
+   double *cValues = new double[n];
+
+   int ierr = CTA_Array_getValuesAsDoubles(h, cValues);
+   if (ierr == CTA_OK){
+      env->SetDoubleArrayRegion(jValues, 0, n, cValues);
+   }
+   else {
+      cta_jni_exception(env, "CtaArray", "could not call getValuesAsDoubles", ierr);
+   }
+   delete [] cValues;
+   return jValues;
+}
+
+/*
+ * Class:     org_openda_costa_CtaArray
+ * Method:    getValuesAsDoubles
+ * Signature: (II)[D
+ */
+JNIEXPORT jdoubleArray JNICALL Java_org_openda_costa_CtaArray_getValuesAsDoubles__II
+  (JNIEnv *env, jobject obj_this, jint firstIndex, jint lastIndex){
+
+   cta_jni_setJavaEnv(env);
+   CTA_Array h = (CTA_Array) cta_jni_getCtaHandle(env, obj_this);
+   int n = lastIndex-firstIndex+1;
+   jdoubleArray jValues = env->NewDoubleArray(n);
+   double *cValues = new double[n];
+
+   int ierr = CTA_Array_getValuesAsDoubles_indexrange(h, firstIndex, lastIndex, cValues);
+   if (ierr == CTA_OK){
+      env->SetDoubleArrayRegion(jValues, 0, n, cValues);
+   }
+   else {
+      cta_jni_exception(env, "CtaArray", "could not call getValuesAsDoubles_indexrange", ierr);
+   }
+   delete [] cValues;
+   return jValues;
+
+
+}
+
+/*
+ * Class:     org_openda_costa_CtaArray
+ * Method:    getValueAsDouble
+ * Signature: (I)D
+ */
+JNIEXPORT jdouble JNICALL Java_org_openda_costa_CtaArray_getValueAsDouble__I
+  (JNIEnv *env, jobject obj_this, jint index){
+
+   cta_jni_setJavaEnv(env);
+   CTA_Array h = (CTA_Array) cta_jni_getCtaHandle(env, obj_this);
+   jdouble cValue=-99.0;
+
+   int ierr = CTA_Array_getValueAsDoubles_index(h, index, &cValue);
+   if (ierr!=CTA_OK){
+      cta_jni_exception(env, "CtaArray", "could not call getValuesAsDoubles_indexrange", ierr);
+   }
+   return cValue;
+}
+
+/*
+ * Class:     org_openda_costa_CtaArray
+ * Method:    getValueAsDouble
+ * Signature: ([I)D
+ */
+JNIEXPORT jdouble JNICALL Java_org_openda_costa_CtaArray_getValueAsDouble___3I
+  (JNIEnv *env, jobject obj_this, jintArray jIndices){
+
+   cta_jni_setJavaEnv(env);
+   CTA_Array h = (CTA_Array) cta_jni_getCtaHandle(env, obj_this);
+
+   jboolean isCopy = JNI_FALSE; 
+   jint * cIndices = env->GetIntArrayElements(jIndices, &isCopy);
+   int nIndices = env->GetArrayLength(jIndices);
+
+   jdouble cValue;
+   int ierr = CTA_Array_getValueAsDouble_indices(h, nIndices, (int *) cIndices, &cValue);
+   if (ierr != CTA_OK){
+      cta_jni_exception(env, "CtaArray", "could not call CTA_Array_getValueAsDouble_indices", ierr);
+   }
+   env->ReleaseIntArrayElements(jIndices, cIndices,0);
+
+   return cValue;
+}
+
+/*
+ * Class:     org_openda_costa_CtaArray
+ * Method:    setConstant
+ * Signature: (D)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaArray_setConstant
+  (JNIEnv *env, jobject obj_this, jdouble jValue){
+
+   cta_jni_setJavaEnv(env);
+   CTA_Array h = (CTA_Array) cta_jni_getCtaHandle(env, obj_this);
+   int ierr = CTA_Array_setConstant(h, jValue);
+   if (ierr != CTA_OK){
+      cta_jni_exception(env, "CtaArray", "could not call CTA_Array_setConstant", ierr);
+   }
+}
+
+/*
+ * Class:     org_openda_costa_CtaArray
+ * Method:    setValuesAsDoubles
+ * Signature: ([D)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaArray_setValuesAsDoubles___3D
+  (JNIEnv *env, jobject obj_this, jdoubleArray jValues){
+
+   cta_jni_setJavaEnv(env);
+   CTA_Array h = (CTA_Array) cta_jni_getCtaHandle(env, obj_this);
+
+   jboolean isCopy = JNI_FALSE; 
+   jdouble * cValues = env->GetDoubleArrayElements(jValues, &isCopy);
+   int length = env->GetArrayLength(jValues);
+   
+   int ierr = CTA_Array_setValuesAsDoubles(h, cValues, length);
+   if ( ierr != CTA_OK ) {
+      cta_jni_exception(env, "CtaArray", "Could call CTA_Array_setValuesAsDoubles", ierr);
+   }
+   env->ReleaseDoubleArrayElements(jValues, cValues,0);
+}
+
+/*
+ * Class:     org_openda_costa_CtaArray
+ * Method:    setValuesAsDoubles
+ * Signature: (II[D)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaArray_setValuesAsDoubles__II_3D
+  (JNIEnv *env, jobject obj_this, jint firstIndex, jint lastIndex, jdoubleArray jValues){
+
+   cta_jni_setJavaEnv(env);
+   CTA_Array h = (CTA_Array) cta_jni_getCtaHandle(env, obj_this);
+
+   jboolean isCopy = JNI_FALSE; 
+   jdouble * cValues = env->GetDoubleArrayElements(jValues, &isCopy);
+
+   int ierr = CTA_Array_setValuesAsDoubles_indexrange(h, firstIndex, lastIndex, cValues);
+   if ( ierr != CTA_OK ) {
+      cta_jni_exception(env, "CtaArray", "Could call CTA_Array_setValuesAsDoubles_indexrange", ierr);
+   }
+   env->ReleaseDoubleArrayElements(jValues, cValues,0);
+}
+
+/*
+ * Class:     org_openda_costa_CtaArray
+ * Method:    setValueAsDouble
+ * Signature: (ID)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaArray_setValueAsDouble__ID
+  (JNIEnv *env, jobject obj_this, jint jIndex, jdouble jValue){
+
+   cta_jni_setJavaEnv(env);
+   CTA_Array h = (CTA_Array) cta_jni_getCtaHandle(env, obj_this);
+
+   int ierr = CTA_Array_setValueAsDouble_index(h, jIndex, jValue);
+   if ( ierr != CTA_OK ) {
+      cta_jni_exception(env, "CtaArray", "Could call CTA_Array_setValueAsDouble", ierr);
+   }
+}
+
+/*
+ * Class:     org_openda_costa_CtaArray
+ * Method:    setValueAsDouble
+ * Signature: ([ID)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaArray_setValueAsDouble___3ID
+  (JNIEnv *env, jobject obj_this, jintArray jIndices, jdouble jValue){
+
+   cta_jni_setJavaEnv(env);
+   CTA_Array h = (CTA_Array) cta_jni_getCtaHandle(env, obj_this);
+
+   jboolean isCopy = JNI_FALSE; 
+   jint * cIndices = env->GetIntArrayElements(jIndices, &isCopy);
+   int nIndices = env->GetArrayLength(jIndices);
+ 
+
+   int ierr = CTA_Array_setValueAsDouble_indices(h, nIndices, (int *) cIndices, jValue);
+   if ( ierr != CTA_OK ) {
+      cta_jni_exception(env, "CtaArray", "Could call setValueAsDouble_indices", ierr);
+   }
+   env->ReleaseIntArrayElements(jIndices, cIndices,0);
+
+}
+
+/*
+ * Class:     org_openda_costa_CtaArray
+ * Method:    axpyOnValues
+ * Signature: (D[D)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaArray_axpyOnValues
+  (JNIEnv *env, jobject obj_this, jdouble jAlpha, jdoubleArray jValues){
+
+   cta_jni_setJavaEnv(env);
+   CTA_Array h = (CTA_Array) cta_jni_getCtaHandle(env, obj_this);
+
+   jboolean isCopy = JNI_FALSE; 
+   jdouble * cValues = env->GetDoubleArrayElements(jValues, &isCopy);
+
+   int ierr = CTA_Array_axpyOnValues(h, jAlpha, cValues);
+   if ( ierr != CTA_OK ) {
+      cta_jni_exception(env, "CtaArray", "Could call CTA_Array_axpyOnValues", ierr);
+   }
+   env->ReleaseDoubleArrayElements(jValues, cValues,0);
+}
+
+/*
+ * Class:     org_openda_costa_CtaArray
+ * Method:    multiplyValues
+ * Signature: ([D)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaArray_multiplyValues
+  (JNIEnv *env, jobject obj_this, jdoubleArray jValues){
+
+   cta_jni_setJavaEnv(env);
+   CTA_Array h = (CTA_Array) cta_jni_getCtaHandle(env, obj_this);
+
+   jboolean isCopy = JNI_FALSE; 
+   jdouble * cValues = env->GetDoubleArrayElements(jValues, &isCopy);
+
+   int ierr = CTA_Array_multiplyValues(h, cValues);
+   if ( ierr != CTA_OK ) {
+      cta_jni_exception(env, "CtaArray", "Could call CTA_Array_axpyOnValues", ierr);
+   }
+   env->ReleaseDoubleArrayElements(jValues, cValues,0);
+}
+
+/*
+ * Class:     org_openda_costa_CtaArray
+ * Method:    reshape
+ * Signature: ([I)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaArray_reshape
+  (JNIEnv *env, jobject obj_this, jintArray jDimensions){
+
+   cta_jni_setJavaEnv(env);
+   CTA_Array h = (CTA_Array) cta_jni_getCtaHandle(env, obj_this);
+
+   jboolean isCopy = JNI_FALSE; 
+   int nDimensions = env->GetArrayLength(jDimensions);
+   jint * cDimensions = env->GetIntArrayElements(jDimensions, &isCopy);
+
+   int ierr = CTA_Array_reshape(h, nDimensions, (int *) cDimensions);
+   if ( ierr != CTA_OK ) {
+      cta_jni_exception(env, "CtaArray", "Could call CTA_Array_reshape", ierr);
+   }
+   env->ReleaseIntArrayElements(jDimensions, cDimensions,0);
+}
+
+/*
+ * Class:     org_openda_costa_CtaArray
+ * Method:    getSlice
+ * Signature: (II)Lorg/openda/interfaces/IArray;
+ */
+JNIEXPORT jobject JNICALL Java_org_openda_costa_CtaArray_getSlice__II
+  (JNIEnv *env, jobject obj_this, jint jDimension, jint jIndex){
+
+   cta_jni_setJavaEnv(env);
+   CTA_Array h = (CTA_Array) cta_jni_getCtaHandle(env, obj_this);
+    
+   CTA_Array h_out;
+   jobject jArray=NULL;
+
+   int ierr = CTA_Array_getSlice(h, jDimension, jIndex, &h_out);
+   if ( ierr == CTA_OK ) {
+      /* Create a Java CtaVector */
+      jclass clsArray = env->FindClass("org/openda/costa/CtaArray");
+      jmethodID constructorID = env->GetMethodID (clsArray, "<init>", "()V");
+      jArray = env->NewObject(clsArray, constructorID);
+
+      cta_jni_setCtaHandle(env, jArray, h_out);
+   }
+   else {
+      cta_jni_exception(env, "CtaArray", "Could call CTA_Array_getSlice", ierr);
+   }
+   return jArray;
+}
+
+/*
+ * Class:     org_openda_costa_CtaArray
+ * Method:    getSlice
+ * Signature: (III)Lorg/openda/interfaces/IArray;
+ */
+JNIEXPORT jobject JNICALL Java_org_openda_costa_CtaArray_getSlice__III
+  (JNIEnv *env, jobject obj_this, jint jDimension, jint jMinIndex, jint jMaxIndex){
+
+   cta_jni_setJavaEnv(env);
+   CTA_Array h = (CTA_Array) cta_jni_getCtaHandle(env, obj_this);
+    
+   CTA_Array h_out;
+   jobject jArray=NULL;
+
+   int ierr = CTA_Array_getSlice_range(h, jDimension, jMinIndex, jMaxIndex, &h_out);
+   if ( ierr == CTA_OK ) {
+      /* Create a Java CtaVector */
+      jclass clsArray = env->FindClass("org/openda/costa/CtaArray");
+      jmethodID constructorID = env->GetMethodID (clsArray, "<init>", "()V");
+      jArray = env->NewObject(clsArray, constructorID);
+
+      cta_jni_setCtaHandle(env, jArray, h_out);
+   }
+   else {
+      cta_jni_exception(env, "CtaArray", "Could call CTA_Array_getSlice_range", ierr);
+   }
+   return jArray;
+}
+
+/*
+ * Class:     org_openda_costa_CtaArray
+ * Method:    getSliceAsDoubles
+ * Signature: (III)[D
+ */
+JNIEXPORT jdoubleArray JNICALL Java_org_openda_costa_CtaArray_getSliceAsDoubles
+  (JNIEnv *env, jobject obj_this, jint jDimension, jint jMinIndex, jint jMaxIndex){
+
+   cta_jni_setJavaEnv(env);
+   CTA_Array h = (CTA_Array) cta_jni_getCtaHandle(env, obj_this);
+
+   int nDimensions;
+   CTA_Array_getnDimensions(h, &nDimensions);
+   int *dimensions = new int[nDimensions];
+   CTA_Array_getDimensions(h, dimensions);
+
+   int n=jMaxIndex-jMinIndex+1;
+   int i;
+   for (i=0;i<jDimension;i++){ n*=dimensions[i];}
+   for (i=jDimension+1;i<nDimensions;i++){ n*=dimensions[i];}
+   delete [] dimensions;
+
+   jdoubleArray jValues = env->NewDoubleArray(n);
+   double *cValues = new double[n];
+
+   int ierr = CTA_Array_getSliceAsDoubles_range(h, jDimension, jMinIndex, jMaxIndex, cValues);
+   if (ierr == CTA_OK){
+      env->SetDoubleArrayRegion(jValues, 0, n, cValues);
+   }
+   else {
+      cta_jni_exception(env, "CtaArray", "could not call CTA_Array_getSliceAsDoubles_range", ierr);
+   }
+   return jValues;
+
+}
+
+/*
+ * Class:     org_openda_costa_CtaArray
+ * Method:    setSlice
+ * Signature: ([DII)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaArray_setSlice___3DII
+  (JNIEnv *env, jobject obj_this, jdoubleArray jValues, jint jDimension, jint jIndex){
+
+   cta_jni_setJavaEnv(env);
+   CTA_Array h = (CTA_Array) cta_jni_getCtaHandle(env, obj_this);
+
+   jboolean isCopy = JNI_FALSE; 
+   jdouble * cValues = env->GetDoubleArrayElements(jValues, &isCopy);
+
+   int ierr = CTA_Array_setSliceAsDoubles(h, cValues, jDimension, jIndex);
+   if (ierr != CTA_OK){
+      cta_jni_exception(env, "CtaArray", "could not call CTA_Array_setSliceAsDoubles", ierr);
+   }
+   env->ReleaseDoubleArrayElements(jValues, cValues,0);
+}
+
+/*
+ * Class:     org_openda_costa_CtaArray
+ * Method:    setSliceBySlice
+ * Signature: (Lorg/openda/interfaces/IArray;II)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaArray_setSliceBySlice__Lorg_openda_interfaces_IArray_2II
+  (JNIEnv *env, jobject obj_this, jobject jSlice, jint jDimension, jint jIndex){
+
+   cta_jni_setJavaEnv(env);
+   CTA_Array h       = (CTA_Array) cta_jni_getCtaHandle(env, obj_this);
+   CTA_Array h_slice = (CTA_Array) cta_jni_getCtaHandle(env, jSlice);
+
+   int ierr = CTA_Array_setSliceAsArray(h, h_slice, jDimension, jIndex);
+   if (ierr != CTA_OK){
+      cta_jni_exception(env, "CtaArray", "could not call CTA_Array_setSliceAsArray", ierr);
+   }
+}
+
+/*
+ * Class:     org_openda_costa_CtaArray
+ * Method:    setSliceBySlice
+ * Signature: (Lorg/openda/interfaces/IArray;III)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaArray_setSliceBySlice__Lorg_openda_interfaces_IArray_2III
+  (JNIEnv *env, jobject obj_this, jobject jSlice, jint jDimension, jint jMinIndex, jint jMaxIndex){
+
+   cta_jni_setJavaEnv(env);
+   CTA_Array h       = (CTA_Array) cta_jni_getCtaHandle(env, obj_this);
+   CTA_Array h_slice = (CTA_Array) cta_jni_getCtaHandle(env, jSlice);
+
+   int ierr = CTA_Array_setSliceAsArray_range(h, h_slice, jDimension, jMinIndex, jMaxIndex);
+   if (ierr != CTA_OK){
+      cta_jni_exception(env, "CtaArray", "could not call CTA_Array_setSliceAsArray_range", ierr);
+   }
+}
+
+/*
+ * Class:     org_openda_costa_CtaArray
+ * Method:    setSlice
+ * Signature: ([DIII)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaArray_setSlice___3DIII
+  (JNIEnv *env, jobject obj_this, jdoubleArray jValues, jint jDimension, jint jMinIndex, jint jMaxIndex){
+
+   cta_jni_setJavaEnv(env);
+   CTA_Array h = (CTA_Array) cta_jni_getCtaHandle(env, obj_this);
+
+   jboolean isCopy = JNI_FALSE; 
+   jdouble * cValues = env->GetDoubleArrayElements(jValues, &isCopy);
+
+   int ierr = CTA_Array_setSliceAsDoubles_range(h, cValues, jDimension, jMinIndex, jMaxIndex);
+   if (ierr != CTA_OK){
+      cta_jni_exception(env, "CtaArray", "could not call CTA_Array_setSliceAsDoubles_range", ierr);
+   }
+   env->ReleaseDoubleArrayElements(jValues, cValues,0);
+
+
+}
+
+/*
+ * Class:     org_openda_costa_CtaArray
+ * Method:    valueIndex
+ * Signature: ([I)I
+ */
+JNIEXPORT jint JNICALL Java_org_openda_costa_CtaArray_valueIndex
+  (JNIEnv *env, jobject obj_this, jintArray jIndices){
+
+   cta_jni_setJavaEnv(env);
+   CTA_Array h = (CTA_Array) cta_jni_getCtaHandle(env, obj_this);
+
+   jboolean isCopy = JNI_FALSE; 
+   jint * cIndices = env->GetIntArrayElements(jIndices, &isCopy);
+   int nIndices = env->GetArrayLength(jIndices);
+   jint jIndex;
+   int ierr = CTA_Array_valueIndex(h, nIndices, (int *) cIndices, (int*) &jIndex);
+   if (ierr != CTA_OK){
+      cta_jni_exception(env, "CtaArray", "could not call CTA_Array_valueIndex", ierr);
+   }
+
+   env->ReleaseIntArrayElements(jIndices, cIndices,0);
+   return jIndex;
+}
+
diff --git a/costa/native/openda/bridge/src/org_openda_costa_CtaInitialize.cpp b/costa/native/openda/bridge/src/org_openda_costa_CtaInitialize.cpp
new file mode 100644
index 000000000..d30f6810e
--- /dev/null
+++ b/costa/native/openda/bridge/src/org_openda_costa_CtaInitialize.cpp
@@ -0,0 +1,85 @@
+/*
+$URL: https://repos.deltares.nl/repos/openda/openda_1/public/trunk/core/native/src/openda/bridge/org_openda_costa_CtaObject.cpp $
+$Revision: 2211 $, $Date: 2011-04-01 13:48:25 +0200 (Fri, 01 Apr 2011) $
+
+OpenDA interface for COSTA.
+Copyright (C) 2013  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+#include "org_openda_costa_CtaInitialize.h"
+#include "jni_cta_utils.h"
+#include "cta.h"
+#include "jni_cta_CtaObsdescr_NativeToJava.h"
+
+
+/*
+ * Class:     org_openda_costa_CtaInitialize
+ * Method:    ctaInit
+ * Signature: ()V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaInitialize_ctaInit
+  (JNIEnv *env, jclass){
+
+   CTA_Func hMessageWriter;
+
+	// Initialize the costa core environment
+   CTA_Core_Initialise();
+   
+   // Create a COSTA function handle for the JNI-message writer
+   CTA_Func_Create("CTA_cta_jni_ExternalMessageWriter", (CTA_Function*) &cta_jni_ExternalMessageWriter, 
+                   CTA_NULL, &hMessageWriter);
+
+   // Get the Java class of the message writer
+   jclass clsMessageWriter = env->FindClass("org/openda/costa/CtaMessageWriter");
+   
+
+   if (!clsMessageWriter){
+      CTA_Message_Write("org_costa_CtaObject", "ctaInit", "Cannot find Java class org/openda/costa/CtaMessageWriter", 'F');
+   }
+   // Get the Method ID of the Java write-function
+   jmethodID javaIDWriter = env->GetStaticMethodID(clsMessageWriter, "Write", "(Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;)V");
+   if (!javaIDWriter){
+      CTA_Message_Write("org_costa_CtaObject", "ctaInit", "Member method Write in Java class org/openda/costa/CtaMessageWriter", 'F');
+   }
+
+   // Save ID's of the message writer
+   cta_jni_ExternalMessageWriterSetID(env, clsMessageWriter, javaIDWriter);
+
+   // Set the JNI-writer as message writer in COSTA 
+   CTA_Message_SetExternalWriter(hMessageWriter);
+
+   // Use the JNI-writer for informing that initialization is succesfull
+   CTA_Message_Write("org_costa_CtaObject", "ctaInit", "Java message writer is initialized", 'I');
+
+   // Administrate the CtaObsdescr_NativeToJava.c class
+   CTA_ObsDescrClass hobsdescrcl;
+   CTA_ObsDescr_nativeToJava_initialise(&hobsdescrcl);
+
+}
+
+/*
+ * Class:     org_openda_costa_CtaInitialize
+ * Method:    setRandomSeed
+ * Signature: (I)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaInitialize_setRandomSeed
+  (JNIEnv *, jclass, jint seed){
+  long lseed=seed;
+
+ CTA_rand_seed(lseed); 
+
+}
+
diff --git a/costa/native/openda/bridge/src/org_openda_costa_CtaModelState.cpp b/costa/native/openda/bridge/src/org_openda_costa_CtaModelState.cpp
new file mode 100644
index 000000000..f2b2ae41f
--- /dev/null
+++ b/costa/native/openda/bridge/src/org_openda_costa_CtaModelState.cpp
@@ -0,0 +1,63 @@
+/*
+$URL:  $
+$Revision: $, $Date: $
+
+Copyright (c) 2012 OpenDA Association
+All rights reserved.
+
+This file is part of OpenDA.
+
+OpenDA is free software: you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as
+published by the Free Software Foundation, either version 3 of
+the License, or (at your option) any later version.
+
+OpenDA is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with OpenDA.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "org_openda_costa_CtaModelState.h"
+#include "cta_model.h"
+#include "jni_cta_utils.h"
+#include "cta_errors.h"
+#include "cta_defaults.h"
+
+#define IDEBUG (0)
+/*
+ * Class:     org_openda_costa_CtaModelState
+ * Method:    nativeSavePersistentState
+ * Signature: (Ljava/lang/String;Ljava/lang/String;I)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaModelState_nativeSavePersistentState
+  (JNIEnv *env, jobject jPersisState, jstring jFileName, jstring jID, jint ctaModel){
+
+   cta_jni_setJavaEnv(env);
+   if (IDEBUG>0) {
+     printf("Java_org_openda_costa_CtaModelState_nativeSavePersistentState: Start of function:\n");
+   }
+
+   const char *fileName = env->GetStringUTFChars(jFileName, 0);
+   const char *ID       = env->GetStringUTFChars(jID, 0);
+   CTA_String cFileName;
+   CTA_String cID;
+   CTA_String_Create(&cFileName);
+   CTA_String_Create(&cID);
+   CTA_String_Set(cFileName, fileName);
+   CTA_String_Set(cID,ID);
+   env->ReleaseStringUTFChars(jFileName, fileName);
+   env->ReleaseStringUTFChars(jID, ID);
+
+   int retVal = CTA_Model_SavePersistentState(ctaModel, cFileName, cID);
+   if ( retVal != CTA_OK ) {
+      cta_jni_exception(env, "CtaModelState", "Error calling SavePersistentState ", retVal);
+      return;
+   }
+   if (IDEBUG>0) {
+     printf("Java_org_openda_costa_CtaModelState_nativeSavePersistentState: End of function:\n");
+   }
+}
diff --git a/costa/native/openda/bridge/src/org_openda_costa_CtaObject.cpp b/costa/native/openda/bridge/src/org_openda_costa_CtaObject.cpp
new file mode 100644
index 000000000..f18734a57
--- /dev/null
+++ b/costa/native/openda/bridge/src/org_openda_costa_CtaObject.cpp
@@ -0,0 +1,39 @@
+/*
+$URL: https://repos.deltares.nl/repos/openda/public/trunk/core/native/src/openda/bridge/org_openda_costa_CtaObject.cpp $
+$Revision: 4061 $, $Date: 2013-07-15 11:45:09 +0200 (Mon, 15 Jul 2013) $
+
+OpenDA interface for COSTA.
+Copyright (C) 2007  Stef Hummel / Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+
+#include "org_openda_costa_CtaObject.h"
+#include "jni_cta_utils.h"
+#include "cta.h"
+#include "jni_cta_CtaObsdescr_NativeToJava.h"
+
+/*
+ * Class:     org_costa_CtaObject
+ * Method:    ctaFree
+ * Signature: ()V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaObject_ctaFree
+  (JNIEnv * env, jobject obj_this)
+{
+	cta_jni_free(env, obj_this);
+}
+
diff --git a/costa/native/openda/bridge/src/org_openda_costa_CtaObservationDescriptions.cpp b/costa/native/openda/bridge/src/org_openda_costa_CtaObservationDescriptions.cpp
new file mode 100644
index 000000000..dd85acde6
--- /dev/null
+++ b/costa/native/openda/bridge/src/org_openda_costa_CtaObservationDescriptions.cpp
@@ -0,0 +1,307 @@
+/*
+$URL: https://repos.deltares.nl/repos/openda/public/trunk/core/native/src/openda/bridge/org_openda_costa_CtaObservationDescriptions.cpp $
+$Revision: 2557 $, $Date: 2011-07-05 10:39:56 +0200 (Tue, 05 Jul 2011) $
+
+OpenDA interface for COSTA.
+Copyright (C) 2010  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+
+#include "org_openda_costa_CtaObservationDescriptions.h"
+#include "cta.h"
+#include "jni_cta_utils.h"
+#include "jni_datatypes.h"
+#define IDEBUG (0)
+
+/*
+ * Class:     org_openda_costa_CtaObservationDescriptions
+ * Method:    getPropertyKeys
+ * Signature: ()[Ljava/lang/String;
+ */
+JNIEXPORT jobjectArray JNICALL Java_org_openda_costa_CtaObservationDescriptions_getPropertyKeys
+(JNIEnv *env, jobject jObsDescr){
+
+   cta_jni_setJavaEnv(env);
+   CTA_ObsDescr ctaObsDescr   = cta_jni_getCtaHandle(env, jObsDescr);
+
+   int nKeys;
+   CTA_ObsDescr_Property_Count (ctaObsDescr, &nKeys);
+
+   CTA_Vector vKeys;
+   CTA_Vector_Create(CTA_DEFAULT_VECTOR, nKeys, CTA_STRING, CTA_NULL, &vKeys);
+   int retVal=CTA_ObsDescr_Get_PropertyKeys(ctaObsDescr, vKeys);
+   if (retVal!=CTA_OK){
+      cta_jni_exception(env, "CtaObservationDescriptions", "Could not get Keys", retVal);
+	  return NULL;
+   }
+
+
+   /* Create and fill a Java array of strings */
+   jclass clsString = env->FindClass("java/lang/String");
+   jobjectArray results = env->NewObjectArray((long) nKeys, clsString, NULL);
+
+   CTA_String sKey;
+   CTA_String_Create(&sKey);
+   for (int i=0; i<nKeys; i++){
+      CTA_Vector_GetVal(vKeys,i+1,&sKey,CTA_STRING);
+      jstring str = env->NewStringUTF(CTAI_String_GetPtr(sKey));
+		env->SetObjectArrayElement(results, i, str);
+   }
+
+   CTA_String_Free(&sKey);
+   CTA_Vector_Free(&vKeys);
+   return results;
+}
+
+
+/*
+ * Class:     org_openda_costa_CtaObservationDescriptions
+ * Method:    ctaCreateSelection
+ * Signature: (Ljava/lang/String;)I
+ */
+JNIEXPORT jint JNICALL Java_org_openda_costa_CtaObservationDescriptions_ctaCreateSelection
+(JNIEnv *env, jobject jObsDescr, jstring jSelect){
+
+   CTA_String select;
+
+   printf("Debug we maken een selectie \n");
+   cta_jni_setJavaEnv(env);
+   /* create COSTA string with selection */
+   CTA_String_Create(&select);
+   const char *sel  = env->GetStringUTFChars(jSelect, 0);
+   printf("Selectie is %s\n",sel);
+   CTA_String_Set(select,sel);
+   env->ReleaseStringUTFChars(jSelect, sel);
+
+   CTA_ObsDescr descrOut;
+   CTA_ObsDescr ctaObsDescr = cta_jni_getCtaHandle(env, jObsDescr);
+
+   printf("De obs descr waar we het mee doen is %d\n",ctaObsDescr);
+   int retVal = CTA_SObs_CreateSel (ctaObsDescr, select, &descrOut);
+   if (retVal!=CTA_OK){
+      cta_jni_exception(env, "CtaObservationDescriptions", "Could not create selection", retVal);
+	  return 0;
+   }
+   CTA_String_Free(&select);
+
+   return descrOut;
+}
+
+
+/*
+ * Class:     org_openda_costa_CtaObservationDescriptions
+ * Method:    ctaCreateTimeSelection
+ * Signature: (I)I
+ */
+JNIEXPORT jint JNICALL Java_org_openda_costa_CtaObservationDescriptions_ctaCreateTimeSelection
+(JNIEnv *env, jobject jObsDescr, jint timesel){
+
+   CTA_ObsDescr descrOut;
+   CTA_ObsDescr ctaObsDescr;
+
+   cta_jni_setJavaEnv(env);
+
+   ctaObsDescr = cta_jni_getCtaHandle(env, jObsDescr);
+   int retVal = CTA_SObs_CreateTimSel (ctaObsDescr, timesel, &descrOut);
+   if (retVal!=CTA_OK){
+      cta_jni_exception(env, "CtaStochObserver", "Could not create selection", retVal);
+	  return 0;
+   }
+
+   return descrOut;
+}
+
+
+/*
+ * Class:     org_costa_CtaObservationDescriptions
+ * Method:    getObservationCount
+ * Signature: ()I
+ */
+JNIEXPORT jint JNICALL Java_org_openda_costa_CtaObservationDescriptions_getObservationCount
+(JNIEnv *env, jobject jObsDescr){
+
+   CTA_ObsDescr ctaObsDescr;
+   int nobs;
+   int retVal;
+
+   cta_jni_setJavaEnv(env);
+   ctaObsDescr = cta_jni_getCtaHandle(env, jObsDescr);
+   retVal = CTA_ObsDescr_Observation_Count(ctaObsDescr, &nobs);
+   if (retVal!=CTA_OK){
+      cta_jni_exception(env, "CtaStochObserver", "Could not get observation count", retVal);
+	  return 0;
+   }
+   return nobs;
+}
+
+
+
+/*
+ * Class:     org_costa_CtaObservationDescriptions
+ * Method:    getPropertyCount
+ * Signature: ()I
+ */
+JNIEXPORT jint JNICALL Java_org_openda_costa_CtaObservationDescriptions_getPropertyCount
+(JNIEnv *env, jobject jObsDescr){
+   CTA_ObsDescr ctaObsDescr;
+   int nprop;
+   int retVal;
+
+   cta_jni_setJavaEnv(env);
+   ctaObsDescr = cta_jni_getCtaHandle(env, jObsDescr);
+   
+   retVal = CTA_ObsDescr_Property_Count(ctaObsDescr, &nprop);
+   if (retVal!=CTA_OK){
+      cta_jni_exception(env, "CtaStochObserver", "Could not get property count", retVal);
+	  return 0;
+   }
+   return nprop;
+}
+
+
+/*
+ * Class:     org_costa_CtaObservationDescriptions
+ * Method:    getStringProperties
+ * Signature: (Ljava/lang/String;)[Ljava/lang/String;
+ */
+JNIEXPORT jobjectArray JNICALL Java_org_openda_costa_CtaObservationDescriptions_getStringProperties
+(JNIEnv *env, jobject jObsDescr, jstring jKey){
+
+   CTA_ObsDescr ctaObsDescr;
+   int nMeasr;
+
+   cta_jni_setJavaEnv(env);
+   
+   ctaObsDescr   = cta_jni_getCtaHandle(env, jObsDescr);
+   CTA_ObsDescr_Observation_Count (ctaObsDescr, &nMeasr);
+
+   CTA_Vector vProp;
+   CTA_Vector_Create(CTA_DEFAULT_VECTOR, nMeasr, CTA_STRING, CTA_NULL, &vProp);
+
+
+   const char *key=env->GetStringUTFChars(jKey, 0);
+   int retVal = CTA_ObsDescr_Get_ValueProperties(ctaObsDescr, key, vProp, CTA_STRING);
+   env->ReleaseStringUTFChars(jKey, key);
+   if (retVal!=CTA_OK){
+      printf("hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh\n");
+      cta_jni_exception(env, "CtaObservationDescriptions", "Could not get String Properties", retVal);
+	  return NULL;
+   }
+
+
+   /* Create and fill a Java array of strings */
+   jclass clsString = env->FindClass("java/lang/String");
+   jobjectArray results = env->NewObjectArray((long) nMeasr, clsString, NULL);
+
+   CTA_String sProp;
+   CTA_String_Create(&sProp);
+   for (int i=0; i<nMeasr; i++){
+      CTA_Vector_GetVal(vProp,i+1,&sProp,CTA_STRING);
+      jstring str = env->NewStringUTF(CTAI_String_GetPtr(sProp));
+		env->SetObjectArrayElement(results, i, str);
+   }
+
+   CTA_String_Free(&sProp);
+   CTA_Vector_Free(&vProp);
+   return results;
+}
+
+
+
+/*
+ * Class:     org_openda_costa_CtaObservationDescriptions
+ * Method:    ctaCreateNativeToJavaObserver
+ * Signature: (Lorg/openda/interfaces/IObservationDescriptions;)I
+ */
+JNIEXPORT jint JNICALL Java_org_openda_costa_CtaObservationDescriptions_ctaCreateNativeToJavaObserver
+(JNIEnv *env, jobject jObsDescr, jobject jJavaObsDescr){
+
+   cta_jni_setJavaEnv(env);
+
+   if (IDEBUG>0) {printf("Debug: Creating observation descriptions \n");}
+   // Get the class of the Java implementation
+   jclass cls = env->GetObjectClass(jJavaObsDescr);
+
+   // Find the COSTA class
+   CTA_ObsDescrClass hobsdescrcl;
+   CTA_String hClass;
+   CTA_String_Create(&hClass);
+   CTA_String_Set(hClass,"cta_obsdescr_nativeToJava");
+   CTA_Handle_Find(hClass, CTA_OBSDESCRCLASS, &hobsdescrcl);
+
+   // Create this new class
+   CTA_ObsDescr hobsdscr;
+
+   // create the user input argument
+   sJni_Class jniClass;
+   jniClass.env = env;
+   jniClass.cls = cls;
+   jniClass.obj = jJavaObsDescr;
+
+   // Create the actual class
+   CTA_Handle hJniClass;
+   CTA_Handle_Create("Java class", CTA_DATABLOCK,&jniClass, &hJniClass);
+
+   CTA_ObsDescr_Create( hobsdescrcl, hJniClass, &hobsdscr);
+   if (IDEBUG>0) {printf("Debug: Handle of created observation descriptions is %d\n",hobsdscr);}
+
+
+
+   //Free work objects / mem
+   CTA_Handle_Free(&hJniClass);
+   CTA_String_Free(&hClass);
+
+   return hobsdscr;
+
+}
+
+
+
+/*
+ * Class:     org_costa_CtaObservationDescriptions
+ * Method:    ctaGetValueProperties
+ * Signature: (Ljava/lang/String;I)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaObservationDescriptions_ctaGetValueProperties
+(JNIEnv *env, jobject jObsDescr, jstring jKey, jint vProp){
+
+   cta_jni_setJavaEnv(env);
+
+   CTA_ObsDescr ctaObsDescr   = cta_jni_getCtaHandle(env, jObsDescr);
+   printf("De handle van de obsdescr is %d\n",ctaObsDescr);
+
+   const char *key=env->GetStringUTFChars(jKey, 0);
+   printf("De key is %s\n",key);
+   int retVal = CTA_ObsDescr_Get_ValueProperties(ctaObsDescr, key, vProp, CTA_DOUBLE);
+   env->ReleaseStringUTFChars(jKey, key);
+   if (retVal!=CTA_OK){
+      printf("HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH\n");
+      cta_jni_exception(env, "CtaObservationDescriptions", "Could not get String Properties", retVal);
+	  return;
+   }
+}
+
+
+
+
+
+
+
+
+
+
+
diff --git a/costa/native/openda/bridge/src/org_openda_costa_CtaOpenDaModel.cpp b/costa/native/openda/bridge/src/org_openda_costa_CtaOpenDaModel.cpp
new file mode 100644
index 000000000..966353091
--- /dev/null
+++ b/costa/native/openda/bridge/src/org_openda_costa_CtaOpenDaModel.cpp
@@ -0,0 +1,798 @@
+/*
+$URL: https://repos.deltares.nl/repos/openda/public/trunk/core/native/src/openda/bridge/org_openda_costa_CtaOpenDaModel.cpp $
+$Revision: 3319 $, $Date: 2012-05-30 10:12:43 +0200 (Wed, 30 May 2012) $
+
+OpenDA interface for COSTA.
+Copyright (C) 2007  Stef Hummel / Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+#include "org_openda_costa_CtaOpenDaModel.h"
+#include "org_openda_costa_CtaTreeVector.h"
+#include "cta_model.h"
+#include "cta_model_factory.h"
+#include "jni_cta_utils.h"
+#include "cta_errors.h"
+#include "cta_defaults.h"
+#include "cta_mem.h"
+#include "string.h"
+
+#define IDEBUG (0)
+
+/* Store a list of all classes that are created */
+static int            CTAI_nClasses          = 0;
+static CTA_ModelClass *CTAI_Created_Classes  = NULL;
+static char           **CTAI_ClassFile       = NULL;
+static char           **CTAI_ClassConfigFile = NULL;
+
+
+/*Private functions */
+
+jintArray Java_org_openda_costa_CtaOpenDaModel_ctaGetObservedLocalization
+  (JNIEnv *env, jobject jModel, jobject jObsDescr, jdouble distance, jint iDomain){
+
+   int nObs;
+   CTA_Vector locVecs;
+
+   cta_jni_setJavaEnv(env);
+   CTA_Model ctaModel       = cta_jni_getCtaHandle(env, jModel);
+   CTA_ObsDescr ctaObsDescr = cta_jni_getCtaHandle(env, jObsDescr);
+
+   /* Get number of observations */
+   CTA_ObsDescr_Observation_Count(ctaObsDescr, &nObs);
+
+   /* Create array */
+   jintArray jValues = env->NewIntArray(nObs);
+
+   /* Create COSTA vector */
+   CTA_Vector_Create(CTA_DEFAULT_VECTOR, nObs, CTA_HANDLE, CTA_NULL, &locVecs);
+
+   /* Set all indices NULL */
+   CTA_Handle nullHandle=CTA_NULL;
+   CTA_Vector_SetConstant(locVecs, &nullHandle, CTA_HANDLE);
+
+   if (iDomain >=0){
+      /* Get localization from model */
+      CTA_Model_GetObsLocalizationDomain(ctaModel, ctaObsDescr, distance, iDomain, locVecs);
+   }
+   else {
+      CTA_Model_GetObsLocalization(ctaModel, ctaObsDescr, distance, locVecs);
+   }
+
+   /* Copy costa handles into Java integer array */
+   int *cValues = new int[nObs];
+   CTA_Vector_GetVals(locVecs, (void *) cValues, nObs, CTA_HANDLE);
+   env->SetIntArrayRegion(jValues, 0, nObs, (jint *) cValues);
+
+   /* Set all handles to CTA_NULL in order to avoid deallocation */
+   CTA_Vector_SetConstant(locVecs, &nullHandle, CTA_HANDLE);
+
+   /* Free work variables */
+   delete [] cValues;
+   CTA_Vector_Free(&locVecs);
+
+   return jValues;
+}
+
+
+
+
+
+
+/*
+ * Class:     org_costa_CtaOpenDaModel
+ * Method:    CtaOpenDaModelCreate
+ * Signature: (Ljava/lang/String;)I
+ */
+JNIEXPORT jint JNICALL Java_org_openda_costa_CtaOpenDaModel_ctaCreate
+(JNIEnv *env, jobject, jstring jModelCls, jstring jConfigFile){
+
+   cta_jni_setJavaEnv(env);
+
+   /* Translate jModelCls and jConfigFile to C-strings */
+   const char *modelClassConfigFile = env->GetStringUTFChars(jModelCls, 0);
+   const char *ConfigFile = env->GetStringUTFChars(jConfigFile, 0);
+
+   if (IDEBUG>0) {
+     printf("Java_org_openda_costa_CtaOpenDaModel_ctaCreate: Start of function:\n");
+     printf("modelClassConfigFile   =%s\n",modelClassConfigFile);
+     printf("ConfigFile   =%s\n",ConfigFile);
+   }
+
+   /* Get the COSTA modelClass object */
+   /* Figure out whether this class already exists*/
+   CTA_ModelClass modelClass;
+   bool newClass=true;
+   for (int iClass=0; iClass<CTAI_nClasses; iClass++){
+      if (strcmp(CTAI_ClassFile[iClass],modelClassConfigFile)==0 &&
+          strcmp(CTAI_ClassConfigFile[iClass],ConfigFile)==0){
+          newClass=false;
+          modelClass=CTAI_Created_Classes[iClass];
+      }
+   }
+
+   /* If the class does not exist yet, create it and administrate it in the list of all classes */
+   if (newClass){
+      /* extend the list */
+      CTAI_nClasses++;
+      CTAI_Created_Classes = (CTA_ModelClass *) CTA_Realloc(CTAI_Created_Classes, CTAI_nClasses*sizeof(CTA_ModelClass));
+      CTAI_ClassFile       = (char**) CTA_Realloc(CTAI_ClassFile,       CTAI_nClasses*sizeof(char*));
+      CTAI_ClassConfigFile = (char**) CTA_Realloc(CTAI_ClassConfigFile, CTAI_nClasses*sizeof(char*));
+
+      /* store the name of the ClassFile and ClassConfigFile */
+      CTAI_ClassFile[CTAI_nClasses-1]       = (char*) CTA_Malloc((strlen(modelClassConfigFile)+1)*sizeof(char));
+      CTAI_ClassConfigFile[CTAI_nClasses-1] = (char*) CTA_Malloc((strlen(ConfigFile)+1)*sizeof(char));
+      strcpy(CTAI_ClassFile[CTAI_nClasses-1],modelClassConfigFile);
+      strcpy(CTAI_ClassConfigFile[CTAI_nClasses-1],ConfigFile);
+
+      /* Create a new modelclass*/
+      int retVal = CTA_ModelFactory_New(modelClassConfigFile, &modelClass);
+      CTAI_Created_Classes[CTAI_nClasses-1] = modelClass;
+
+      /* Release the jModelCls C-string */
+      env->ReleaseStringUTFChars(jModelCls, modelClassConfigFile );
+      if ( retVal != CTA_OK ) {
+         cta_jni_exception(env, "CtaOpenDaModel", "Could create model class", retVal);
+         return 0;
+      }
+
+      if (IDEBUG>0) {
+         printf("Java_org_openda_costa_CtaOpenDaModel_ctaCreate: handle of modelClass=%d\n",modelClass);
+      }
+   }
+
+   /* Create a model instance */
+
+   /* -Create costa string with name of the model configuration */
+   CTA_String ctaConfigFile;
+   CTA_String_Create(&ctaConfigFile);
+   CTA_String_Set(ctaConfigFile, ConfigFile);
+   env->ReleaseStringUTFChars(jConfigFile, ConfigFile);
+
+   /* -Create the model */
+   CTA_Model ctaModel;
+   if (IDEBUG>0) printf("Java_org_openda_costa_CtaOpenDaModel_ctaCreate: before model_create ");
+   int retVal = CTA_Model_Create(modelClass,ctaConfigFile,&ctaModel);
+   if (IDEBUG>0) printf("Java_org_openda_costa_CtaOpenDaModel_ctaCreate: after model_create ");
+   if ( retVal != CTA_OK ) {
+      cta_jni_exception(env, "CtaOpenDaModel", "Could NOT perform CTA_Model_Create", retVal);
+      return 0;
+   }
+
+   if (IDEBUG>0) {
+      printf("Java_org_openda_costa_CtaOpenDaModel_ctaCreate: Handle of model is %d\n",ctaModel);
+      printf("Java_org_openda_costa_CtaOpenDaModel_ctaCreate: End of function:\n");
+   }
+   return ctaModel;
+}
+
+/*
+ * Class:     org_costa_CtaOpenDaModel
+ * Method:    CtaModelAnnounceObservedValues
+ * Signature: (Lorg/openda/interfaces/IObservationDescriptions;)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaOpenDaModel_ctaAnnounceObservedValues
+(JNIEnv *env, jobject jModel, jobject jObsDescr){
+
+   cta_jni_setJavaEnv(env);
+   CTA_Model ctaModel       = cta_jni_getCtaHandle(env, jModel);
+   CTA_ObsDescr ctaObsDescr = cta_jni_getCtaHandle(env, jObsDescr);
+
+   int retVal=CTA_Model_AnnounceObsValues(ctaModel, ctaObsDescr);
+   if ( retVal != CTA_OK ) {
+     cta_jni_exception(env, "CtaOpenDaModel", "Could NOT perform CTA_Model_AnnounceObsValues", retVal);
+     return;
+   }
+}
+
+/*
+ * Class:     org_costa_CtaOpenDaModel
+ * Method:    CtaModelAxpyOnParameters
+ * Signature: (DLorg/openda/interfaces/IVector;)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaOpenDaModel_ctaAxpyOnParameters
+(JNIEnv *env, jobject jModel, jdouble alpha, jobject jX){
+
+   cta_jni_setJavaEnv(env);
+   CTA_Model ctaModel  = cta_jni_getCtaHandle(env, jModel);
+   CTA_TreeVector ctaX = cta_jni_getCtaHandle(env, jX);
+
+   int retVal=CTA_Model_AxpyParam(ctaModel, alpha, ctaX);
+   if ( retVal != CTA_OK ) {
+     cta_jni_exception(env, "CtaOpenDaModel", "Could perform CTA_Model_AxpyParam", retVal);
+     return;
+   }
+}
+
+
+/*
+ * Class:     org_costa_CtaOpenDaModel
+ * Method:    CtaModelCompute
+ * Signature: (Lorg/openda/interfaces/ITime;)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaOpenDaModel_ctaCompute
+(JNIEnv *env, jobject jModel, jobject jTime){
+
+   double tstart, tstop;
+
+   cta_jni_setJavaEnv(env);
+
+   if (IDEBUG>0) {
+     printf("Java_org_openda_costa_CtaOpenDaModel_ctaCompute: Start of function:\n");
+   }
+
+   CTA_Model ctaModel = cta_jni_getCtaHandle(env, jModel);
+   CTA_Time  ctaTime  = cta_jni_getCtaHandle(env, jTime);
+
+
+   if (IDEBUG>0) {
+     printf("Java_org_openda_costa_CtaOpenDaModel_ctaCompute: model handle =%d:\n",ctaModel);
+     printf("Java_org_openda_costa_CtaOpenDaModel_ctaCompute: time  handle =%d:\n",ctaTime);
+     tstart=-9999.0;
+     tstop=-9999.0;
+     CTA_Time_GetSpan(ctaTime,&tstart,&tstop);
+     printf("Java_org_openda_costa_CtaOpenDaModel_ctaCompute: from =%g %g :\n",tstart,tstop);
+   }
+
+
+   int retVal=CTA_Model_Compute(ctaModel, ctaTime);
+   if ( retVal != CTA_OK ) {
+     cta_jni_exception(env, "CtaOpenDaModel", "Could perform CTA_Model_Compute", retVal);
+     return;
+   }
+
+   if (IDEBUG>0) {
+     printf("Java_org_openda_costa_CtaOpenDaModel_ctaCompute: End of function:\n");
+   }
+
+}
+
+
+
+
+/*
+ * Class:     org_costa_CtaOpenDaModel
+ * Method:    CtaModelgetObservedValues
+ * Signature: (Lorg/openda/interfaces/IObservationDescriptions;Lorg/costa/CtaVector;)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaOpenDaModel_ctaGetObservedValues
+(JNIEnv *env, jobject jModel, jobject jObsDescr, jobject jValues){
+
+   cta_jni_setJavaEnv(env);
+   CTA_Model ctaModel       = cta_jni_getCtaHandle(env, jModel);
+   CTA_ObsDescr ctaObsDescr = cta_jni_getCtaHandle(env, jObsDescr);
+   CTA_Vector ctaValues     = cta_jni_getCtaHandle(env, jValues);
+   CTA_Time time;
+
+   // Create an empty time interval
+   CTA_Time_Create(&time);
+   CTA_Time_SetSpan(time, 0.0, -1.0);
+
+   int retVal=CTA_Model_GetObsValues (ctaModel, time, ctaObsDescr, ctaValues);
+
+   CTA_Time_Free(&time);
+
+   if ( retVal != CTA_OK ) {
+      cta_jni_exception(env, "CtaOpenDaModel", "Could not perform CTA_Model_GetObsValues", retVal);
+      return;
+   }
+}
+
+/*
+ * Class:     org_costa_CtaOpenDaModel
+ * Method:    ctaGetParameters
+ * Signature: ()Lorg/openda/interfaces/IVector;
+ */
+JNIEXPORT jobject JNICALL Java_org_openda_costa_CtaOpenDaModel_ctaGetParameters
+(JNIEnv *env, jobject jModel){
+
+   cta_jni_setJavaEnv(env);
+
+   CTA_Model ctaModel       = cta_jni_getCtaHandle(env, jModel);
+   CTA_TreeVector ctaParams =CTA_NULL;
+
+   int retVal=CTA_Model_GetParam (ctaModel, &ctaParams);
+   if ( retVal != CTA_OK ) {
+     cta_jni_exception(env, "CtaOpenDaModel", "Could perform CTA_Model_GetParam", retVal);
+     return NULL;
+   }
+
+   /* Create a Java CtaTreeVector */
+   jclass clsTreeVector = env->FindClass("org/openda/costa/CtaTreeVector");
+   jmethodID constructorID = env->GetMethodID (clsTreeVector, "<init>", "()V");
+   jobject jState = env->NewObject(clsTreeVector, constructorID);
+
+   cta_jni_setCtaHandle(env, jState, ctaParams);
+
+   return jState;
+}
+
+/*
+ * Class:     org_costa_CtaOpenDaModel
+ * Method:    ctaGetState
+ * Signature: ()Lorg/openda/interfaces/IVector;
+ */
+JNIEXPORT jobject JNICALL Java_org_openda_costa_CtaOpenDaModel_ctaGetState
+(JNIEnv *env, jobject jModel){
+
+   cta_jni_setJavaEnv(env);
+   CTA_Model ctaModel       = cta_jni_getCtaHandle(env, jModel);
+   CTA_TreeVector ctaState =CTA_NULL;
+   int retVal=CTA_Model_GetState (ctaModel, &ctaState);
+   if ( retVal != CTA_OK ) {
+     cta_jni_exception(env, "CtaOpenDaModel", "Could perform CTA_Model_GetState", retVal);
+     return NULL;
+   }
+
+   /* Create a Java CtaTreeVector */
+   jclass clsTreeVector = env->FindClass("org/openda/costa/CtaTreeVector");
+   jmethodID constructorID = env->GetMethodID (clsTreeVector, "<init>", "()V");
+   jobject jState = env->NewObject(clsTreeVector, constructorID);
+
+   cta_jni_setCtaHandle(env, jState, ctaState);
+
+   return jState;
+}
+
+/*
+ * Class:     org_costa_CtaOpenDaModel
+ * Method:    ctaGetStateScaling
+ * Signature: ()Lorg/openda/interfaces/IVector;
+ */
+JNIEXPORT jobject JNICALL Java_org_openda_costa_CtaOpenDaModel_ctaGetStateScaling
+(JNIEnv *env, jobject jModel){
+
+   cta_jni_setJavaEnv(env);
+   CTA_Model ctaModel       = cta_jni_getCtaHandle(env, jModel);
+   CTA_TreeVector ctaScal =CTA_NULL;
+
+   int retVal=CTA_Model_GetStateScaling (ctaModel, &ctaScal);
+   if ( retVal != CTA_OK ) {
+     cta_jni_exception(env, "CtaOpenDaModel", "Could perform CTA_Model_GetStateScaling", retVal);
+     return NULL;
+   }
+
+   /* Create a Java CtaTreeVector */
+   jclass clsTreeVector = env->FindClass("org/openda/costa/CtaTreeVector");
+   jmethodID constructorID = env->GetMethodID (clsTreeVector, "<init>", "()V");
+   jobject jState = env->NewObject(clsTreeVector, constructorID);
+
+   cta_jni_setCtaHandle(env, jState, ctaScal);
+
+   return jState;
+}
+
+
+/*
+ * Class:     org_openda_costa_CtaOpenDaModel
+ * Method:    ctaGetObservedLocalization
+ * Signature: (Lorg/openda/interfaces/IObservationDescriptions;D)[I
+ */
+JNIEXPORT jintArray JNICALL Java_org_openda_costa_CtaOpenDaModel_ctaGetObservedLocalization
+  (JNIEnv *env, jobject jModel, jobject jObsDescr, jdouble distance){
+
+   return Java_org_openda_costa_CtaOpenDaModel_ctaGetObservedLocalization
+      (env, jModel, jObsDescr, distance, -1);
+}
+
+/*
+ * Class:     org_openda_costa_CtaOpenDaModel
+ * Method:    ctaReleaseInternalState
+ * Signature: (Ljava/lang/String;)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaOpenDaModel_ctaReleaseInternalState
+  (JNIEnv *env, jobject jModel, jstring jID){
+  // Release resources used to save a state at some earlier time.
+
+   cta_jni_setJavaEnv(env);
+   CTA_Model ctaModel       = cta_jni_getCtaHandle(env, jModel);
+   if (IDEBUG>0) {
+     printf("Java_org_openda_costa_CtaOpenDaModel_ctaReleaseInternalState: Start of function:\n");
+   }
+
+   // create costa string with ID of state to be released
+   const char *ID = env->GetStringUTFChars(jID, 0);
+   CTA_String cID;
+   CTA_String_Create(&cID);
+   CTA_String_Set(cID,ID);
+   env->ReleaseStringUTFChars(jID, ID);
+
+   int retVal = CTA_Model_ReleaseInternalState(ctaModel,cID);
+   if ( retVal != CTA_OK ) {
+      cta_jni_exception(env, "CtaOpenDaModel", "Could perform CTA_Model_ReleaseInternalState", retVal);
+      return;
+   }
+   if (IDEBUG>0) {
+     printf("Java_org_openda_costa_CtaOpenDaModel_ctaReleaseInternalState: End of function:\n");
+   }
+}
+
+/*
+ * Class:     org_openda_costa_CtaOpenDaModel
+ * Method:    ctaSaveInternalState
+ * Signature: ()Lorg/openda/interfaces/IModelState;
+ */
+JNIEXPORT jobject JNICALL Java_org_openda_costa_CtaOpenDaModel_ctaSaveInternalState
+  (JNIEnv *env, jobject jModel){
+
+   cta_jni_setJavaEnv(env);
+   if (IDEBUG>0) {
+     printf("Java_org_openda_costa_CtaOpenDaModel_ctaSaveInternalState: Start of function:\n");
+   }
+
+   CTA_Model ctaModel = cta_jni_getCtaHandle(env, jModel);
+   /* create costa string */
+   CTA_String cID;
+   CTA_String_Create(&cID);
+   /* save the state and get the ID of it back */
+   int retVal = CTA_Model_SaveInternalState(ctaModel,&cID);
+   if ( retVal != CTA_OK ) {
+      cta_jni_exception(env, "CtaOpenDaModel", "Could perform CTA_Model_SaveInternalState", retVal);
+      return NULL;
+   }
+
+   /* put the returned costa string into a java string */
+   char *ID = CTAI_String_GetPtr(cID);
+   jstring jID =(env)->NewStringUTF(ID);
+   if (IDEBUG>0) {
+      printf("# returned instanceID bridge = '%s'\n",ID);
+   }
+
+   /* create the ctaModelState object */
+   jclass clsModelState = env->FindClass("org/openda/costa/CtaModelState");
+   jmethodID constructorID = env->GetMethodID (clsModelState, "<init>", "(Ljava/lang/String;I)V");
+   jobject jModelState = env->NewObject(clsModelState, constructorID, jID, (jint) ctaModel );
+   if (IDEBUG>0) {
+     printf("Java_org_openda_costa_CtaOpenDaModel_ctaSaveInternalState: End of function:\n");
+   }
+   CTA_String_Free(&cID);
+   return jModelState;
+}
+
+/*
+ * Class:     org_openda_costa_CtaOpenDaModel
+ * Method:    ctaLoadPersistentState
+ * Signature: (Ljava/lang/String;)Lorg/openda/interfaces/IModelState;
+ */
+JNIEXPORT jobject JNICALL Java_org_openda_costa_CtaOpenDaModel_ctaLoadPersistentState
+  (JNIEnv *env, jobject jModel, jstring jFileName){
+
+   cta_jni_setJavaEnv(env);
+   if (IDEBUG>0) {
+     printf("Java_org_openda_costa_CtaOpenDaModel_ctaLoadPersistentState: Start of function:\n");
+   }
+
+   CTA_Model ctaModel = cta_jni_getCtaHandle(env, jModel);
+   /* create costa strings*/
+   CTA_String cFileName;
+   CTA_String cID;
+   const char *fileName = env->GetStringUTFChars(jFileName, 0);
+
+   CTA_String_Create(&cFileName);
+   CTA_String_Create(&cID);
+   CTA_String_Set(cFileName, fileName);
+   env->ReleaseStringUTFChars(jFileName, fileName);
+
+   /* load the state from file, get ID of loaded state back*/
+   CTA_Model_LoadPersistentState(ctaModel, cFileName, &cID);
+
+   /* put the returned costa string into a java string */
+   char *ID = CTAI_String_GetPtr(cID);
+   jstring jID =(env)->NewStringUTF(ID);
+
+   /* create the ctaModelState object */
+   jclass clsModelState = env->FindClass("org/openda/costa/CtaModelState");
+   jmethodID constructorID = env->GetMethodID (clsModelState, "<init>", "(Ljava/lang/String;I)V");
+   jobject jModelState = env->NewObject(clsModelState, constructorID, jID, (jint) ctaModel );
+
+   CTA_String_Free(&cID);
+   return jModelState;
+}
+
+/*
+ * Class:     org_costa_CtaOpenDaModel
+ * Method:    setAutomaticNoiseGeneration
+ * Signature: (Z)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaOpenDaModel_setAutomaticNoiseGeneration
+(JNIEnv *env, jobject jModel, jboolean add_noise){
+
+   cta_jni_setJavaEnv(env);
+   CTA_Model ctaModel = cta_jni_getCtaHandle(env, jModel);
+   CTA_Time timespan;
+   double t1,t2;
+   CTA_Time_Create(&timespan);
+   if (add_noise){
+      t1=-999e99;
+      t2= 999e99;
+   }
+   else {
+      t1=999e99;
+      t2=-999e99;
+   }
+
+   CTA_Time_SetSpan(timespan,t1,t2);
+
+
+   int retVal=CTA_Model_AddNoise (ctaModel, timespan);
+   CTA_Time_Free(&timespan);
+   if ( retVal != CTA_OK ) {
+      cta_jni_exception(env, "CtaOpenDaModel", "Could perform CTA_Model_AddNoise", retVal);
+      return;
+   }
+}
+
+/*
+ * Class:     org_openda_costa_CtaOpenDaModel
+ * Method:    ctaRestoreInternalState
+ * Signature: (Ljava/lang/String;)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaOpenDaModel_ctaRestoreInternalState
+   (JNIEnv *env, jobject jModel, jstring jID){
+   //Restore a previously saved state of the model
+   int retVal;
+
+   cta_jni_setJavaEnv(env);
+   if (IDEBUG>0) {
+     printf("Java_org_openda_costa_CtaOpenDaModel_ctaRestoreInternalState: Start of function:\n");
+   }
+   CTA_Model ctaModel = cta_jni_getCtaHandle(env, jModel);
+   const char*ID = env->GetStringUTFChars(jID, 0);
+   CTA_String cID;
+   CTA_String_Create(&cID);
+   CTA_String_Set(cID,ID);
+
+   retVal = CTA_Model_RestoreInternalState(ctaModel,cID);
+   if ( retVal != CTA_OK ) {
+      cta_jni_exception(env, "CtaOpenDaModel", "Could perform CTA_Model_RestoreInternalState", retVal);
+      return;
+   }
+}
+
+/*
+ * Class:     org_costa_CtaOpenDaModel
+ * Method:    CtaModelSetParameters
+ * Signature: (Lorg/openda/interfaces/IVector;)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaOpenDaModel_ctaSetParameters
+(JNIEnv *env, jobject jModel, jobject jParams){
+
+   cta_jni_setJavaEnv(env);
+   CTA_Model ctaModel = cta_jni_getCtaHandle(env, jModel);
+   CTA_TreeVector ctaParams = cta_jni_getCtaHandle(env, jParams);
+   int retVal=CTA_Model_SetParam (ctaModel, ctaParams);
+   if ( retVal != CTA_OK ) {
+      cta_jni_exception(env, "CtaOpenDaModel", "Could perform CTA_Model_AddNoise", retVal);
+      return;
+   }
+}
+
+/*
+ * Class:     org_costa_CtaOpenDaModel
+ * Method:    getTimeHorizon
+ * Signature: ()Lorg/openda/interfaces/ITime;
+ */
+JNIEXPORT jobject JNICALL Java_org_openda_costa_CtaOpenDaModel_getTimeHorizon
+   (JNIEnv *env, jobject jModel){
+
+   CTA_Time timeHorizon;
+
+   cta_jni_setJavaEnv(env);
+
+   /* Create a new time instance */
+   CTA_Time_Create(&timeHorizon);
+
+   /* Get the handle of the model */
+   CTA_Model ctaModel       = cta_jni_getCtaHandle(env, jModel);
+
+   /* Call the method */
+   int retVal=CTA_Model_GetTimeHorizon (ctaModel, timeHorizon);
+   if ( retVal != CTA_OK ) {
+         	cta_jni_exception(env, "CtaOpenDaModel", "Could perform CTA_Model_GetTimeHorizon", retVal);
+         	return NULL;
+    }
+
+   /* Create Java Time */
+   jclass clsTime = env->FindClass("org/openda/costa/CtaTime");
+   jmethodID constructorID = env->GetMethodID (clsTime, "<init>", "()V");
+   jobject jTime = env->NewObject(clsTime, constructorID);
+   /* Set the handle */
+   cta_jni_setCtaHandle(env, jTime, timeHorizon);
+
+   return jTime;
+}
+
+/*
+ * Class:     org_costa_CtaOpenDaModel
+ * Method:    getCurrentTime
+ * Signature: ()Lorg/openda/interfaces/ITime;
+ */
+JNIEXPORT jobject JNICALL Java_org_openda_costa_CtaOpenDaModel_getCurrentTime
+   (JNIEnv *env, jobject jModel){
+
+   CTA_Time currentTime;
+
+   cta_jni_setJavaEnv(env);
+
+   /* Create a new time instance */
+   CTA_Time_Create(&currentTime);
+
+   /* Get the handle of the model */
+   CTA_Model ctaModel       = cta_jni_getCtaHandle(env, jModel);
+
+   /* Call the method */
+   int retVal=CTA_Model_GetCurrentTime (ctaModel, currentTime);
+   if ( retVal != CTA_OK ) {
+     cta_jni_exception(env, "CtaOpenDaModel", "Could perform CTA_Model_GetCurrentTime", retVal);
+     return NULL;
+   }
+
+   /* Create Java Time */
+   jclass clsTime = env->FindClass("org/openda/costa/CtaTime");
+   jmethodID constructorID = env->GetMethodID (clsTime, "<init>", "()V");
+   jobject jTime = env->NewObject(clsTime, constructorID);
+   /* Set the handle */
+   cta_jni_setCtaHandle(env, jTime, currentTime);
+
+   return jTime;
+}
+
+
+/*
+ * Class:     org_openda_costa_CtaOpenDaModel
+ * Method:    ctaAxpyOnStateDomain
+ * Signature: (DLorg/openda/interfaces/IVector;I)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaOpenDaModel_ctaAxpyOnStateDomain
+  (JNIEnv *env, jobject jModel, jdouble alpha, jobject jX, jint iDomain){
+
+   cta_jni_setJavaEnv(env);
+   CTA_Model ctaModel  = cta_jni_getCtaHandle(env, jModel);
+   CTA_TreeVector ctaX = cta_jni_getCtaHandle(env, jX);
+
+   int retVal=CTA_OK;
+   if (iDomain<0){
+      retVal=CTA_Model_AxpyState(ctaModel, alpha, ctaX);
+   }
+   else {
+      retVal=CTA_Model_AxpyStateDomain(ctaModel, alpha, iDomain, ctaX);
+   }
+   if ( retVal != CTA_OK ) {
+     cta_jni_exception(env, "CtaOpenDaModel", "Could perform CTA_Model_AxpyState", retVal);
+     return;
+   }
+}
+
+
+/*
+ * Class:     org_openda_costa_CtaOpenDaModel
+ * Method:    ctaGetStateDomain
+ * Signature: (I)Lorg/openda/interfaces/IVector;
+ */
+JNIEXPORT jobject JNICALL Java_org_openda_costa_CtaOpenDaModel_ctaGetStateDomain
+  (JNIEnv *env, jobject jModel, jint iDomain){
+   cta_jni_setJavaEnv(env);
+   CTA_Model ctaModel       = cta_jni_getCtaHandle(env, jModel);
+   CTA_TreeVector ctaState =CTA_NULL;
+   int retVal=CTA_Model_GetStateDomain(ctaModel, iDomain, &ctaState);
+   if ( retVal != CTA_OK ) {
+     cta_jni_exception(env, "CtaOpenDaModel", "Could perform CTA_Model_GetState", retVal);
+     return NULL;
+   }
+
+   /* Create a Java CtaTreeVector */
+   jclass clsTreeVector = env->FindClass("org/openda/costa/CtaTreeVector");
+   jmethodID constructorID = env->GetMethodID (clsTreeVector, "<init>", "()V");
+   jobject jState = env->NewObject(clsTreeVector, constructorID);
+
+   cta_jni_setCtaHandle(env, jState, ctaState);
+
+   return jState;
+}
+
+
+/*
+ * Class:     org_openda_costa_CtaOpenDaModel
+ * Method:    ctaGetObservedLocalizationDomain
+ * Signature: (Lorg/openda/interfaces/IObservationDescriptions;DI)[I
+ */
+JNIEXPORT jintArray JNICALL Java_org_openda_costa_CtaOpenDaModel_ctaGetObservedLocalizationDomain
+  (JNIEnv *env, jobject jModel, jobject jObsDescr, jdouble distance, jint iDomain){
+
+   return Java_org_openda_costa_CtaOpenDaModel_ctaGetObservedLocalization
+      (env, jModel, jObsDescr, distance, iDomain);
+}
+
+
+/*
+ * Class:     org_openda_costa_CtaOpenDaModel
+ * Method:    ctaGetNumDomains
+ * Signature: (D)I
+ */
+JNIEXPORT jint JNICALL Java_org_openda_costa_CtaOpenDaModel_ctaGetNumDomains
+  (JNIEnv *env, jobject jModel, jdouble distance){
+
+   cta_jni_setJavaEnv(env);
+   CTA_Model ctaModel  = cta_jni_getCtaHandle(env, jModel);
+
+   int nDomains;
+   int retVal=CTA_Model_GetNumDomains(ctaModel, distance, &nDomains);
+   if ( retVal != CTA_OK ) {
+     cta_jni_exception(env, "CtaOpenDaModel", "Could perform CTA_Model_AxpyState", retVal);
+     return 0;
+   }
+   return nDomains;
+}
+
+
+/*
+ * Class:     org_openda_costa_CtaOpenDaModel
+ * Method:    ctaGetObservationSelector
+ * Signature: (Lorg/openda/interfaces/IObservationDescriptions;DI)[I
+ */
+JNIEXPORT jintArray JNICALL Java_org_openda_costa_CtaOpenDaModel_ctaGetObservationSelector
+  (JNIEnv *env, jobject jModel, jobject jObsDescr, jdouble distance, jint iDomain){
+
+   cta_jni_setJavaEnv(env);
+   CTA_Model ctaModel       = cta_jni_getCtaHandle(env, jModel);
+   CTA_ObsDescr ctaObsDescr = cta_jni_getCtaHandle(env, jObsDescr);
+   CTA_Time time;
+
+
+   CTA_Vector ctaSelVec = CTA_NULL;
+   int retVal=CTA_Model_GetObsSelector (ctaModel, ctaObsDescr, distance, iDomain, &ctaSelVec);
+   if ( retVal != CTA_OK ) {
+      cta_jni_exception(env, "CtaModel", "ObservationSelector returned an error ", retVal);
+      return NULL;           
+   }
+   int nSel;
+   retVal=CTA_Vector_GetSize(ctaSelVec, &nSel);
+   if ( retVal != CTA_OK ) {
+      cta_jni_exception(env, "CtaModel", "ObservationSelector cannot get length of returned selction. Error ", retVal);
+      return NULL;           
+   }
+   int *iVals = new int[nSel];
+   retVal = CTA_Vector_GetVals(ctaSelVec,iVals,nSel,CTA_INTEGER);
+   if ( retVal != CTA_OK ) {
+      cta_jni_exception(env, "CtaModel", "ObservationSelector cannot get values from selection vector. Error ", retVal);
+      delete [] iVals;
+      return NULL;           
+   }
+   retVal= CTA_Vector_Free(&ctaSelVec);
+   if ( retVal != CTA_OK ) {
+      cta_jni_exception(env, "CtaModel", "ObservationSelector cannot get values from selection vector. Error ", retVal);
+      delete [] iVals;
+      return NULL;           
+   }
+
+   /* Copy values to java integer array */
+   jintArray jValues = env->NewIntArray(nSel);
+   jint *jIVals= new jint[nSel];
+   for (int i=0;i<nSel;i++){jIVals[i]=iVals[i];}
+
+   env->SetIntArrayRegion(jValues, 0, nSel, jIVals);
+   delete [] jIVals;
+   delete [] iVals;
+   return jValues;
+}
+
+
+
+
+
+
+
+
diff --git a/costa/native/openda/bridge/src/org_openda_costa_CtaParallel.cpp b/costa/native/openda/bridge/src/org_openda_costa_CtaParallel.cpp
new file mode 100644
index 000000000..7337933b0
--- /dev/null
+++ b/costa/native/openda/bridge/src/org_openda_costa_CtaParallel.cpp
@@ -0,0 +1,83 @@
+/*
+$URL: https://repos.deltares.nl/repos/openda/openda_1/public/trunk/core/native/src/openda/bridge/org_costa_CtaOpenDaModel.cpp $
+$Revision: 1429 $, $Date: 2010-04-01 17:14:23 +0200 (Thu, 01 Apr 2010) $
+
+OpenDA interface for COSTA.
+Copyright (C) 2007  Stef Hummel / Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+#include "org_openda_costa_CtaParallel.h"
+#include "cta.h"
+#include "cta_modbuild_par.h"
+#include "jni_cta_utils.h"
+
+#define IDEBUG (0)
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+   int CTA_Par_CreateGroups(CTA_Tree,int);
+#ifdef __cplusplus
+}
+#endif
+
+
+/*
+ * Class:     org_openda_costa_CtaParallel
+ * Method:    nativeInit
+ * Signature: (Ljava/lang/String;)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaParallel_nativeInit
+  (JNIEnv *env, jclass myClass, jstring jConfigName){
+
+   CTA_String hfile;
+   CTA_Tree htree;
+   int retVal;
+
+   /* Create C string of jave string of config file name */
+   const char *configName = env->GetStringUTFChars(jConfigName, 0);
+
+   /* Parse the input file */
+   CTA_String_Create(&hfile);
+   CTA_String_Set(hfile, configName);
+   env->ReleaseStringUTFChars(jConfigName, configName);
+   retVal=CTA_XML_Read(hfile,&htree);
+   if (retVal!=CTA_OK) {
+      cta_jni_exception(env, "CtaParallel",
+                        "Could parse configuration file", retVal);
+   }
+   CTA_String_Free(&hfile);
+
+   /* initialize the parallel environment */
+   retVal=CTA_Par_CreateGroups(htree,CTA_TRUE);
+   if (retVal!=CTA_OK) {
+      cta_jni_exception(env, "CtaParallel",
+                        "Could not initialize parallel environment", retVal);
+   }
+}
+
+/*
+ * Class:     org_openda_costa_CtaParallel
+ * Method:    finalizeParallelEnvironment
+ * Signature: ()V
+ */
+
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaParallel_finalizeParallelEnvironment
+  (JNIEnv *, jclass){
+   CTA_Modbuild_par_Finalize();
+}
+
diff --git a/costa/native/openda/bridge/src/org_openda_costa_CtaRelationTable.cpp b/costa/native/openda/bridge/src/org_openda_costa_CtaRelationTable.cpp
new file mode 100644
index 000000000..f3ff77036
--- /dev/null
+++ b/costa/native/openda/bridge/src/org_openda_costa_CtaRelationTable.cpp
@@ -0,0 +1,171 @@
+/*
+$URL: https://repos.deltares.nl/repos/openda/public/trunk/core/native/src/openda/bridge/org_openda_costa_CtaRelationTable.cpp $
+$Revision: 2211 $, $Date: 2011-04-01 13:48:25 +0200 (Fri, 01 Apr 2011) $
+
+OpenDA interface for COSTA.
+Copyright (C) 2007  Stef Hummel / Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+#include "jni_cta_utils.h"
+#include "cta_defaults.h"
+#include "cta_vector.h"
+#include "cta_errors.h"
+#include "cta_reltable.h"
+#include "org_openda_costa_CtaRelationTable.h"
+
+/*
+ * Class:     org_costa_CtaRelationTable
+ * Method:    ctaCreate
+ * Signature: ()I
+ */
+JNIEXPORT jint JNICALL Java_org_openda_costa_CtaRelationTable_ctaCreate
+(JNIEnv *env, jobject obj_this){
+
+   cta_jni_setJavaEnv(env);
+
+	CTA_RelTable ctaRelTab = CTA_NULL;
+	int retVal = CTA_RelTable_Create( &ctaRelTab);
+	if ( retVal != CTA_OK ) {
+		cta_jni_exception(env, "CtaRelationTable", "Could not create relation table", retVal);
+		return 0;
+	}
+	return ctaRelTab;
+}
+
+/*
+ * Class:     org_costa_CtaRelationTable
+ * Method:    SetTableCombine
+ * Signature: (Lorg/openda/interfaces/IRelationTable;ZLorg/openda/interfaces/IRelationTable;Z)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaRelationTable_SetTableCombine
+(JNIEnv *env, jobject newtable, jobject reltab1, jboolean inv1, jobject reltab2, jboolean inv2){
+
+   cta_jni_setJavaEnv(env);
+
+   /* Get the COSTA handles */
+   CTA_RelTable ctaRelTab = cta_jni_getCtaHandle(env, newtable);
+   CTA_RelTable ctaRel1   = cta_jni_getCtaHandle(env, reltab1);
+   CTA_RelTable ctaRel2   = cta_jni_getCtaHandle(env, reltab2);
+
+   int retVal=CTA_RelTable_SetTableCombine (ctaRelTab, ctaRel1, inv1, ctaRel2, inv2);
+
+	if ( retVal != CTA_OK ) {
+		cta_jni_exception(env, "CtaRelationTable", "Could not perform CTA_RelTable_SetTableCombine", retVal);
+		return;
+   }
+}
+
+/*
+ * Class:     org_costa_CtaRelationTable
+ * Method:    SetSelect
+ * Signature: (Lorg/openda/interfaces/IVector;)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaRelationTable_SetSelect
+(JNIEnv *env, jobject jRelTab, jobject jSelect){
+
+   cta_jni_setJavaEnv(env);
+
+   /* Get the COSTA handles */
+   CTA_RelTable ctaRelTab = cta_jni_getCtaHandle(env, jRelTab);
+   CTA_Vector ctaVecSelect = cta_jni_getCtaHandle(env, jSelect);
+
+   /* Create Integervector */
+   int n;
+   CTA_Vector_GetSize(ctaVecSelect,&n);
+   CTA_Vector ctaIVecSelect;
+   CTA_Vector_Create(CTA_DEFAULT_VECTOR,n,CTA_INTEGER,CTA_NULL,&ctaIVecSelect);
+
+   double *dval=(double *) malloc(n*sizeof(double));
+   CTA_Vector_GetVals(ctaVecSelect,dval,n,CTA_DOUBLE);
+   int *ival= (int *) malloc(n*sizeof(int));
+   for (int i=0;i<n;i++){ival[i]=(int) dval[i];}
+
+   CTA_Vector_SetVals(ctaIVecSelect,ival,n,CTA_INTEGER);
+
+   int retVal=CTA_RelTable_SetSelect (ctaRelTab, ctaIVecSelect);
+
+   CTA_Vector_Free(&ctaIVecSelect);
+   free(ival);
+   free(dval);
+   if ( retVal != CTA_OK ) {
+		cta_jni_exception(env, "CtaRelationTable", "Could not perform CTA_RelTable_SetSelect", retVal);
+		return;
+   }
+
+}
+
+/*
+ * Class:     org_costa_CtaRelationTable
+ * Method:    apply
+ * Signature: (Lorg/openda/interfaces/IVector;Lorg/openda/interfaces/IVector;)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaRelationTable_apply
+(JNIEnv *env, jobject jRelTab, jobject jfrom, jobject jto){
+
+   cta_jni_setJavaEnv(env);
+   
+   CTA_RelTable ctaRelTab = cta_jni_getCtaHandle(env, jRelTab);
+   CTA_Vector ctaVecFrom  = cta_jni_getCtaHandle(env, jfrom);
+   CTA_Vector ctaVecTo    = cta_jni_getCtaHandle(env, jto);
+
+   int retVal =CTA_RelTable_Apply (ctaRelTab, ctaVecFrom, ctaVecTo);
+	if ( retVal != CTA_OK ) {
+		cta_jni_exception(env, "CtaRelationTable", "Could not perform CTA_RelTable_Apply", retVal);
+		return;
+   }
+
+}
+
+/*
+ * Class:     org_costa_CtaRelationTable
+ * Method:    applyInv
+ * Signature: (Lorg/openda/interfaces/IVector;Lorg/openda/interfaces/IVector;)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaRelationTable_applyInv
+(JNIEnv *env, jobject jRelTab, jobject jfrom, jobject jto){
+
+   cta_jni_setJavaEnv(env);
+
+   CTA_RelTable ctaRelTab = cta_jni_getCtaHandle(env, jRelTab);
+   CTA_Vector ctaVecFrom  = cta_jni_getCtaHandle(env, jfrom);
+   CTA_Vector ctaVecTo    = cta_jni_getCtaHandle(env, jto);
+
+   int retVal =CTA_RelTable_ApplyInv (ctaRelTab, ctaVecFrom, ctaVecTo);
+   if ( retVal != CTA_OK ) {
+		cta_jni_exception(env, "CtaRelationTable", "Could not perform CTA_RelTable_ApplyInv", retVal);
+		return;
+   }
+
+}
+
+/*
+ * Class:     org_costa_CtaRelationTable
+ * Method:    free
+ * Signature: ()V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaRelationTable_free
+(JNIEnv *env, jobject jRelTab){
+
+   cta_jni_setJavaEnv(env);
+   
+   CTA_RelTable ctaRelTab = cta_jni_getCtaHandle(env, jRelTab);
+   int retVal = CTA_RelTable_Free(&ctaRelTab);
+   if ( retVal != CTA_OK ) {
+		cta_jni_exception(env, "CtaRelationTable", "Could not perform CTA_RelTable_Free", retVal);
+		return;
+   }
+}
diff --git a/costa/native/openda/bridge/src/org_openda_costa_CtaStochObserver.cpp b/costa/native/openda/bridge/src/org_openda_costa_CtaStochObserver.cpp
new file mode 100644
index 000000000..35dc63f26
--- /dev/null
+++ b/costa/native/openda/bridge/src/org_openda_costa_CtaStochObserver.cpp
@@ -0,0 +1,320 @@
+/*
+$URL: https://repos.deltares.nl/repos/openda/public/trunk/core/native/src/openda/bridge/org_openda_costa_CtaStochObserver.cpp $
+$Revision: 3443 $, $Date: 2012-08-28 15:33:36 +0200 (Tue, 28 Aug 2012) $
+
+OpenDA interface for COSTA.
+Copyright (C) 2007  Stef Hummel / Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+#include <string.h>
+#include "org_openda_costa_CtaStochObserver.h"
+#include "cta_sobs.h"
+#include "jni_cta_utils.h"
+#include "cta_errors.h"
+#include "cta_defaults.h"
+
+
+/*
+ * Class:     org_costa_CtaStochObserver
+ * Method:    CtaStochObserver_createNative
+ * Signature: (Ljava/lang/String;Ljava/lang/String)I
+ */
+JNIEXPORT jint JNICALL Java_org_openda_costa_CtaStochObserver_ctaCreateNative
+(JNIEnv *env, jobject, jstring jTypeObserver, jstring jFilename){
+
+   cta_jni_setJavaEnv(env);
+   printf("Debug: start: Java_org_openda_costa_CtaStochObserver_ctaCreateNative\n");
+   CTA_StochObs hstochobs;
+   CTA_String Filename,sTypeObserver;
+   CTA_SObsClass hsobsclass;
+   CTA_Tree htree;
+   int retVal=0;
+   
+   CTA_String_Create(&Filename);
+   const char *str  = env->GetStringUTFChars(jFilename, 0);
+   CTA_String_Set(Filename, str);
+   printf("Debug Observation filename=%s\n",str);
+   env->ReleaseStringUTFChars(jFilename, str);
+
+   const char *typeObserver = env->GetStringUTFChars(jTypeObserver, 0);
+   if (strcmp(typeObserver,"SQLITE")==0){
+      retVal = CTA_SObs_Create (CTA_DEFAULT_SOBS, Filename, &hstochobs);
+   }
+   else if (strcmp(typeObserver,"MAORI")==0) {
+      printf("DEBUG vanuit bridge. Create de MAORI filename=%d\n",Filename);
+      retVal = CTA_SObs_Create (CTA_MAORI_SOBS, Filename, &hstochobs);
+   }
+   else if (strcmp(typeObserver,"USER")==0) {
+      printf("DEBUG vanuit bridge. Create de USER filename=%d\n",Filename);
+      retVal = CTA_SObs_Create (CTA_USER_SOBS, Filename, &hstochobs);
+   }
+   else {
+      char message[1000];
+
+      CTA_String_Create(&sTypeObserver);
+      CTA_String_Set(sTypeObserver,typeObserver);
+      retVal=CTA_XML_Read(sTypeObserver,&htree);
+      CTA_String_Free(&sTypeObserver);
+      if (retVal != CTA_OK) {
+         sprintf(message, "Error reading xml-file %s with Stochastic Observer class. Error code %d", typeObserver, retVal);
+         cta_jni_exception(env, "CtaStochObserver", message, retVal);
+         hstochobs = CTA_NULL;
+      } else {
+         retVal=CTA_Tree_GetHandleStr(htree, (char *) "COSTA/sobsclass", &hsobsclass);
+         if (retVal != CTA_OK) {
+            sprintf(message, "Error get class from tree (created from xml-file %s. Error code %d\n" ,typeObserver, retVal);
+            cta_jni_exception(env, "CtaStochObserver", message, retVal);
+            hstochobs = CTA_NULL;
+        } else {
+           retVal = CTA_SObs_Create (hsobsclass, Filename, &hstochobs);            
+           if (retVal != CTA_OK) {
+              sprintf(message, "Cannot create stochastic Observer Error code %d\n" ,retVal);
+              cta_jni_exception(env, "CtaStochObserver", message, retVal);
+              hstochobs = CTA_NULL;
+           }
+        }
+      }
+      CTA_Tree_Free(&htree);
+   }
+/*   else {
+      char message[256];
+      sprintf(message, "Unknown Observer type: %s", typeObserver);
+      retVal    = CTA_NOT_YET_SUPPORTED;
+      hstochobs = CTA_NULL;
+      cta_jni_exception(env, "CtaStochObserver", message, retVal);
+   } */
+
+   printf("Debug Observation handle is =%d\n", hstochobs);
+   env->ReleaseStringUTFChars(jTypeObserver, typeObserver);
+   if (retVal!=CTA_OK){
+      cta_jni_exception(env, "CtaStochObserver", "Could create new stoch observer", retVal);
+		return 0;
+   }
+
+   CTA_String_Free(&Filename);
+   printf("Debug: end: Java_org_openda_costa_CtaStochObserver_ctaCreateNative\n");
+   return hstochobs;
+
+}
+
+
+/*
+ * Class:     org_costa_CtaStochObserver
+ * Method:    getCount
+ * Signature: ()I
+ */
+JNIEXPORT jint JNICALL Java_org_openda_costa_CtaStochObserver_getCount
+(JNIEnv *env, jobject jStochObs){
+
+   cta_jni_setJavaEnv(env);
+   
+   CTA_StochObs ctaStochObs = cta_jni_getCtaHandle(env, jStochObs);
+   int nmeasr;
+   int retVal = CTA_SObs_Count(ctaStochObs, &nmeasr);
+   if (retVal!=CTA_OK){
+      cta_jni_exception(env, "CtaStochObserver", "Could not get Count", retVal);
+		return 0;
+   }
+   return nmeasr;
+}
+
+/*
+ * Class:     org_costa_CtaStochObserver
+ * Method:    ctaStochObserverGetExpectations
+ * Signature: (I)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaStochObserver_ctaGetExpectations
+(JNIEnv *env, jobject jStochObs, jint ctaVector){
+
+   cta_jni_setJavaEnv(env);
+
+   CTA_StochObs ctaStochObs = cta_jni_getCtaHandle(env, jStochObs);
+   int retVal = CTA_SObs_GetExpectation(ctaStochObs, ctaVector);
+   if (retVal!=CTA_OK){
+      cta_jni_exception(env, "CtaStochObserver", "Could not get Expectation", retVal);
+		return;
+   }
+ }
+
+/*
+ * Class:     org_costa_CtaStochObserver
+ * Method:    CtagetObservationDescriptions
+ * Signature: ()I
+ */
+JNIEXPORT jint JNICALL Java_org_openda_costa_CtaStochObserver_ctaGetObservationDescriptions
+(JNIEnv *env, jobject jStochObs){
+
+   cta_jni_setJavaEnv(env);
+   
+   CTA_StochObs ctaStochObs = cta_jni_getCtaHandle(env, jStochObs);
+   CTA_ObsDescr hobsdescr;
+   int retVal = CTA_SObs_GetDescription(ctaStochObs, &hobsdescr);
+   if (retVal!=CTA_OK){
+      cta_jni_exception(env, "CtaStochObserver", "Could not get Description", retVal);
+		return 0;
+   }         
+   return hobsdescr;
+}
+
+/*
+ * Class:     org_costa_CtaStochObserver
+ * Method:    ctaStochObserverGetRealizations
+ * Signature: (I)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaStochObserver_ctaGetRealizations
+(JNIEnv *env, jobject jStochObs, jint ctaVector){
+
+   cta_jni_setJavaEnv(env);
+   
+   CTA_StochObs ctaStochObs = cta_jni_getCtaHandle(env, jStochObs);
+   int retVal = CTA_SObs_GetRealisation(ctaStochObs, ctaVector);
+   if (retVal!=CTA_OK){
+      cta_jni_exception(env, "CtaStochObserver", "Could not get Realizations", retVal);
+		return;
+   }
+ }
+
+/*
+ * Class:     org_costa_CtaStochObserver
+ * Method:    ctaStochObserverGetValues
+ * Signature: (I)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaStochObserver_ctaGetValues
+(JNIEnv *env, jobject jStochObs, jint ctaVector){
+
+   cta_jni_setJavaEnv(env);
+
+   CTA_StochObs ctaStochObs = cta_jni_getCtaHandle(env, jStochObs);
+   int retVal = CTA_SObs_GetVal(ctaStochObs, ctaVector);
+   if (retVal!=CTA_OK){
+      cta_jni_exception(env, "CtaStochObserver", "Could not get Values", retVal);
+		return;
+   }
+ }
+
+/*
+ * Class:     org_costa_CtaStochObserver
+ * Method:    ctaStochObserverGetVariances
+ * Signature: (I)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaStochObserver_ctaGetVariances
+(JNIEnv *env, jobject jStochObs, jint ctaVector){
+
+   cta_jni_setJavaEnv(env);
+   
+   CTA_StochObs ctaStochObs = cta_jni_getCtaHandle(env, jStochObs);
+   int retVal = CTA_SObs_GetVar(ctaStochObs, ctaVector);
+   if (retVal!=CTA_OK){
+      cta_jni_exception(env, "CtaStochObserver", "Could not get Variance", retVal);
+		return;
+   }
+ }
+
+/*
+ * Class:     org_costa_CtaStochObserver
+ * Method:    ctaGetStandardDeviation
+ * Signature: (I)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaStochObserver_ctaGetStandardDeviation
+(JNIEnv *env, jobject jStochObs, jint ctaVector){
+   
+   cta_jni_setJavaEnv(env);
+
+   CTA_StochObs ctaStochObs = cta_jni_getCtaHandle(env, jStochObs);
+   int retVal = CTA_SObs_GetStd(ctaStochObs, ctaVector);
+   if (retVal!=CTA_OK){
+       cta_jni_exception(env, "CtaStochObserver", "Could not get Standard deviation", retVal);
+     return;
+   }
+   return;
+}
+
+
+
+
+
+
+
+/*
+ * Class:     org_costa_CtaStochObserver
+ * Method:    ctaStochObserverCreateSelection
+ * Signature: (Ljava/lang/String;)I
+ */
+JNIEXPORT jint JNICALL Java_org_openda_costa_CtaStochObserver_ctaCreateSelection
+(JNIEnv *env, jobject jStochObs, jstring jSelect){
+
+   cta_jni_setJavaEnv(env);
+   
+   CTA_String select;
+   /* create COSTA string with selection */
+   CTA_String_Create(&select);
+   const char *sel  = env->GetStringUTFChars(jSelect, 0);
+   CTA_String_Set(select,sel);
+   env->ReleaseStringUTFChars(jSelect, sel);
+
+   CTA_StochObs hsobsout;
+   CTA_StochObs ctaStochObs = cta_jni_getCtaHandle(env, jStochObs);
+   int retVal = CTA_SObs_CreateSel (ctaStochObs, select, &hsobsout);
+   if (retVal!=CTA_OK){
+      cta_jni_exception(env, "CtaStochObserver", "Could not create selection", retVal);
+		return 0;
+   }
+   CTA_String_Free(&select); 
+
+   return hsobsout;
+}
+
+/*
+ * Class:     org_costa_CtaStochObserver
+ * Method:    ctaStochObserverCreateTimeSelection
+ * Signature: (I)I
+ */
+JNIEXPORT jint JNICALL Java_org_openda_costa_CtaStochObserver_ctaCreateTimeSelection
+(JNIEnv *env, jobject jStochObs, jint timesel){
+
+   cta_jni_setJavaEnv(env);
+
+   CTA_StochObs hsobsout;
+   CTA_StochObs ctaStochObs = cta_jni_getCtaHandle(env, jStochObs);
+   int retVal = CTA_SObs_CreateTimSel (ctaStochObs, timesel, &hsobsout);
+   if (retVal!=CTA_OK){
+      cta_jni_exception(env, "CtaStochObserver", "Could not create selection", retVal);
+		return 0;
+   }
+   return hsobsout;
+
+}
+
+/*
+ * Class:     org_costa_CtaStochObserver
+ * Method:    ctaStochObserverEvaluatePDF
+ * Signature: (II)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaStochObserver_ctaEvaluatePDF
+(JNIEnv *env, jobject jStochObs, jint ctaVectorX, jint ctaVectorY){
+
+   cta_jni_setJavaEnv(env);
+
+   CTA_StochObs ctaStochObs = cta_jni_getCtaHandle(env, jStochObs);
+   int retVal = CTA_SObs_EvalPDF(ctaStochObs, ctaVectorX, ctaVectorY);
+   if (retVal!=CTA_OK){
+      cta_jni_exception(env, "CtaStochObserver", "Could not get evaluate PDF", retVal);
+		return;
+   }
+ }
+
+
diff --git a/costa/native/openda/bridge/src/org_openda_costa_CtaTime.cpp b/costa/native/openda/bridge/src/org_openda_costa_CtaTime.cpp
new file mode 100644
index 000000000..4b6ea32eb
--- /dev/null
+++ b/costa/native/openda/bridge/src/org_openda_costa_CtaTime.cpp
@@ -0,0 +1,286 @@
+/*
+$URL: https://repos.deltares.nl/repos/openda/public/trunk/core/native/src/openda/bridge/org_openda_costa_CtaTime.cpp $
+$Revision: 2872 $, $Date: 2011-11-07 10:48:40 +0100 (Mon, 07 Nov 2011) $
+
+OpenDA interface for COSTA.
+Copyright (C) 2007  Stef Hummel / Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+
+#include "org_openda_costa_CtaTime.h"
+#include "cta_defaults.h"
+#include "jni_cta_utils.h"
+#include "cta_time.h"
+#include "cta_errors.h"
+
+
+/*
+ * Class:     org_costa_CtaTime
+ * Method:    create
+ * Signature: ()I
+ */
+JNIEXPORT jint JNICALL Java_org_openda_costa_CtaTime_create
+  (JNIEnv * env, jobject obj_this)
+{
+
+   cta_jni_setJavaEnv(env);
+   
+   int handle = CTA_NULL;
+
+   int retVal = CTA_Time_Create((CTA_Time *) &handle);
+   if ( retVal != CTA_OK ) {
+      cta_jni_exception(env, "CtaTime_Create", "", retVal);
+      return 0;
+   }
+   jint handle2 = (jint) handle;
+   return handle2;
+}
+
+/*
+ * Class:     org_costa_CtaTime
+ * Method:    getMJD
+ * Signature: ()D
+ */
+JNIEXPORT jdouble JNICALL Java_org_openda_costa_CtaTime_getMJD
+  (JNIEnv * env, jobject obj_this)
+{
+   cta_jni_setJavaEnv(env);
+
+   CTA_Handle ctaHandle = cta_jni_getCtaHandle(env, obj_this);
+   double t1,t2;
+   int retVal=CTA_Time_GetSpan(ctaHandle, &t1, &t2);
+   if ( retVal != CTA_OK ) {
+      cta_jni_exception(env, "CtaTime", "Could not getMJD ", retVal);
+      return 0.0;
+   }
+   if (t1!=t2)  {
+      cta_jni_exception(env, "CtaTime", "Could not getMJD this is a time span ",0);
+      return 0.0;
+   }
+   return (jdouble) t2;
+}
+
+/*
+ * Class:     org_costa_CtaTime
+ * Method:    getUserTime
+ * Signature: ()D
+ */
+JNIEXPORT jdouble JNICALL Java_org_openda_costa_CtaTime_getUserTime
+  (JNIEnv * env, jobject obj_this)
+{
+   cta_jni_setJavaEnv(env);
+	return 0;
+}
+
+/*
+ * Class:     org_costa_CtaTime
+ * Method:    setMJD
+ * Signature: (D)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaTime_setMJD
+  (JNIEnv *env, jobject obj_this, jdouble t)
+{
+   cta_jni_setJavaEnv(env);
+
+   CTA_Handle ctaHandle = cta_jni_getCtaHandle(env, obj_this);
+   int retVal=CTA_Time_SetSpan(ctaHandle, t, t);
+   if ( retVal != CTA_OK ) {
+		cta_jni_exception(env, "CtaTime", "Could not setMJD ", retVal);
+		return;
+	}
+  //CTA_Time_Export(ctaHandle,CTA_FILE_STDOUT);
+}
+
+/*
+ * Class:     org_costa_CtaTime
+ * Method:    setUserTime
+ * Signature: (D)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaTime_setUserTime
+  (JNIEnv *env, jobject, jdouble)
+{
+   cta_jni_setJavaEnv(env);
+}
+
+/*
+ * Class:     org_costa_CtaTime
+ * Method:    getStep
+ * Signature: ()J
+ */
+JNIEXPORT jlong JNICALL Java_org_openda_costa_CtaTime_getStep
+  (JNIEnv * env, jobject obj_this)
+{
+   cta_jni_setJavaEnv(env);
+	return 0;
+}
+
+/*
+ * Class:     org_costa_CtaTime
+ * Method:    getBeginMJD
+ * Signature: ()D
+ */
+JNIEXPORT jdouble JNICALL Java_org_openda_costa_CtaTime_getBeginMJD
+  (JNIEnv * env, jobject obj_this)
+{
+   cta_jni_setJavaEnv(env);
+
+   double t1,t2;
+   CTA_Handle ctaHandle = cta_jni_getCtaHandle(env, obj_this);
+   int retVal=CTA_Time_GetSpan(ctaHandle, &t1, &t2);
+   if ( retVal != CTA_OK ) {
+      cta_jni_exception(env, "CtaTime", "Could not getBeginMJD ", retVal);
+      return 0.0;
+   }
+   return (jdouble) t1;
+}
+
+/*
+ * Class:     org_costa_CtaTime
+ * Method:    getEndMJD
+ * Signature: ()D
+ */
+JNIEXPORT jdouble JNICALL Java_org_openda_costa_CtaTime_getEndMJD
+  (JNIEnv * env, jobject obj_this)
+{
+   cta_jni_setJavaEnv(env);
+
+   double t1,t2;
+   CTA_Handle ctaHandle = cta_jni_getCtaHandle(env, obj_this);
+   int retVal=CTA_Time_GetSpan(ctaHandle, &t1, &t2);
+   if ( retVal != CTA_OK ) {
+      cta_jni_exception(env, "CtaTime", "Could not getEndMJD ", retVal);
+      return 0.0;
+   }
+   return (jdouble) t2;
+}
+
+/*
+ * Class:     org_costa_CtaTime
+ * Method:    getStepMJD
+ * Signature: ()D
+ */
+JNIEXPORT jdouble JNICALL Java_org_openda_costa_CtaTime_getStepMJD
+  (JNIEnv * env, jobject obj_this)
+{
+   cta_jni_setJavaEnv(env);
+
+   double t1;
+   CTA_Handle ctaHandle = cta_jni_getCtaHandle(env, obj_this);
+   int retVal=CTA_Time_GetStep(ctaHandle, &t1);
+   if ( retVal != CTA_OK ) {
+      cta_jni_exception(env, "CtaTime", "Could not getStepMJD ", retVal);
+      return 0.0;
+   }
+   return (jdouble) t1;
+}
+
+/*
+ * Class:     org_costa_CtaTime
+ * Method:    getBeginUserTime
+ * Signature: ()D
+ */
+JNIEXPORT jdouble JNICALL Java_org_openda_costa_CtaTime_getBeginUserTime
+  (JNIEnv * env, jobject obj_this)
+{
+   cta_jni_setJavaEnv(env);
+	return 0;
+}
+
+/*
+ * Class:     org_costa_CtaTime
+ * Method:    getEndUserTime
+ * Signature: ()D
+ */
+JNIEXPORT jdouble JNICALL Java_org_openda_costa_CtaTime_getEndUserTime
+  (JNIEnv * env, jobject obj_this)
+{
+   cta_jni_setJavaEnv(env);
+	return 0;
+}
+
+/*
+ * Class:     org_costa_CtaTime
+ * Method:    getUserStep
+ * Signature: ()D
+ */
+JNIEXPORT jdouble JNICALL Java_org_openda_costa_CtaTime_getUserStep
+  (JNIEnv * env, jobject obj_this)
+{
+   cta_jni_setJavaEnv(env);
+	return 0;
+}
+
+/*
+ * Class:     org_costa_CtaTime
+ * Method:    setSpanMJD
+ * Signature: (DDD)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaTime_setSpanMJD
+  (JNIEnv *env, jobject obj_this, jdouble tstart, jdouble tend, jdouble tstep)
+{
+   cta_jni_setJavaEnv(env);
+
+   CTA_Handle ctaHandle = cta_jni_getCtaHandle(env, obj_this);
+   int retVal=CTA_Time_SetSpan(ctaHandle, tstart, tend);
+   if ( retVal != CTA_OK ) {
+		cta_jni_exception(env, "CtaTime", "Could not setSpanMJD ", retVal);
+		return;
+	}
+
+   retVal=CTA_Time_SetStep(ctaHandle, tstep);
+   if ( retVal != CTA_OK ) {
+		cta_jni_exception(env, "CtaTime", "Could not setSpanMJD ", retVal);
+		return;
+	}
+}
+
+/*
+ * Class:     org_costa_CtaTime
+ * Method:    setUserSpan
+ * Signature: (DDD)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaTime_setUserSpan
+  (JNIEnv *env, jobject, jdouble, jdouble, jdouble)
+{
+   cta_jni_setJavaEnv(env);
+
+}
+
+/*
+ * Class:     org_costa_CtaTime
+ * Method:    getStepCount
+ * Signature: ()J
+ */
+JNIEXPORT jlong JNICALL Java_org_openda_costa_CtaTime_getStepCount
+  (JNIEnv * env, jobject obj_this)
+{
+   cta_jni_setJavaEnv(env);
+	return 0;
+}
+
+/*
+ * Class:     org_costa_CtaTime
+ * Method:    clone
+ * Signature: ()Lorg/openda/interfaces/ITime;
+ */
+JNIEXPORT jobject JNICALL Java_org_openda_costa_CtaTime_clone
+  (JNIEnv * env, jobject obj_this)
+{
+   cta_jni_setJavaEnv(env);
+	return 0;
+}
+
diff --git a/costa/native/openda/bridge/src/org_openda_costa_CtaTreeVector.cpp b/costa/native/openda/bridge/src/org_openda_costa_CtaTreeVector.cpp
new file mode 100644
index 000000000..dafc0970a
--- /dev/null
+++ b/costa/native/openda/bridge/src/org_openda_costa_CtaTreeVector.cpp
@@ -0,0 +1,513 @@
+/*
+$URL: https://repos.deltares.nl/repos/openda/public/trunk/core/native/src/openda/bridge/org_openda_costa_CtaTreeVector.cpp $
+$Revision: 2763 $, $Date: 2011-09-13 13:52:01 +0200 (Tue, 13 Sep 2011) $
+
+OpenDA interface for COSTA.
+Copyright (C) 2007  Stef Hummel / Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+
+#include "org_openda_costa_CtaTreeVector.h"
+#include "cta_treevector.h"
+#include "jni_cta_utils.h"
+#include "cta_errors.h"
+#include "cta_defaults.h"
+#include "cta_metainfo.h"
+
+
+/*
+ * Class:     org_costa_CtaTreeVector
+ * Method:    getId
+ * Signature: ()Ljava/lang/String;
+ */
+JNIEXPORT jstring JNICALL Java_org_openda_costa_CtaTreeVector_getId
+(JNIEnv *env, jobject obj_this){
+
+   cta_jni_setJavaEnv(env);
+
+   char id[CTA_STRLEN_TAG];
+   CTA_TreeVector ctaHandle = cta_jni_getCtaHandle(env, obj_this);
+
+   int retVal = CTA_TreeVector_GetTag (ctaHandle, id);
+
+	if ( retVal != CTA_OK ) {
+		cta_jni_exception(env, "CtaVector", "Could not get ID", retVal);
+		return NULL;
+	}
+
+   return env->NewStringUTF(id);
+
+}
+
+/*
+ * Class:     org_costa_CtaTreeVector
+ * Method:    ctaGetNumSubTreeVectors
+ * Signature: (Ljava/lang/String;)I
+ */
+JNIEXPORT jint JNICALL Java_org_openda_costa_CtaTreeVector_ctaGetNumSubTreeVectors
+(JNIEnv *env, jobject obj_this){
+   cta_jni_setJavaEnv(env);
+   CTA_TreeVector ctaHandle = cta_jni_getCtaHandle(env, obj_this);
+   int numSubTrees;
+   CTA_TreeVector_GetNumSubTree(ctaHandle, &numSubTrees);
+   return numSubTrees;
+}
+
+/*
+ * Class:     org_costa_CtaTreeVector
+ * Method:    ctaGetSubTreeVector
+ * Signature: (Ljava/lang/String;)I
+ */
+JNIEXPORT jint JNICALL Java_org_openda_costa_CtaTreeVector_ctaGetSubTreeVector
+(JNIEnv *env, jobject obj_this, jstring path){
+
+   cta_jni_setJavaEnv(env);
+
+   CTA_TreeVector ctaHandle = cta_jni_getCtaHandle(env, obj_this);
+   CTA_TreeVector subtreevec;
+   const char *spath  = env->GetStringUTFChars(path, 0);
+
+   int retVal=CTA_TreeVector_GetSubTreeVec (ctaHandle, spath, &subtreevec);
+   env->ReleaseStringUTFChars(path, spath);
+   
+   
+	if ( retVal != CTA_OK ) {
+		cta_jni_exception(env, "CtaVector", "Could not get SubTreeVector", retVal);
+		return 0;
+	}
+   
+   /* Increase reference count to subtree-vector */
+   CTA_TreeVector_IncRefCount(subtreevec);   
+   
+   return subtreevec;
+}
+
+
+/*
+ * Class:     org_costa_ctatreevector
+ * Method:    ctaSetRegGrid
+ * Signature: ()V
+ */
+JNIEXPORT jint JNICALL Java_org_openda_costa_CtaTreeVector_ctaSetRegGrid
+  (JNIEnv * env, jobject obj_this, jint nx, jint ny, jint nz, jdouble x0, jdouble y0, jdouble z0,
+                 jdouble dx, jdouble dy, jdouble dz)
+{
+  int retval;
+  CTA_Metainfo minfo;
+  CTA_TreeVector ctaTreeVec = cta_jni_getCtaHandle(env, obj_this);
+  char cGrid[]="grid";
+  
+  CTA_Metainfo_Create(&minfo); 
+
+  retval = CTA_Metainfo_setRegGrid(minfo,cGrid,nx,ny,nz,x0,y0,z0,dx,dy,dz);
+  if (retval) return retval;
+  retval = CTA_TreeVector_SetMetainfo(ctaTreeVec, minfo);
+
+  return retval;
+}
+
+
+/*
+ * Class:     org_costa_treevector
+ * Method:    ctaNetcdfClose
+ * Signature: ()V
+ */
+JNIEXPORT jint JNICALL Java_org_openda_costa_CtaTreeVector_ctaNetcdfClose
+  (JNIEnv * env, jobject obj_this, jint ctafilehandle)
+{
+  cta_jni_setJavaEnv(env);
+
+  int ierr;
+  int foutput;
+
+
+  foutput = ctafilehandle;
+  ierr = CTA_File_Free(&foutput);
+  if ( ierr != CTA_OK ) {
+     cta_jni_exception(env, "CtaTreeVector_export", "Could not close (netcdf) cta_file", ierr);
+     return -1;
+
+   }
+   return ierr;
+   
+ 
+}
+
+/*
+ * Class:     org_resultwriters_NetcdfResultWriter
+ * Method:    ctaNetcdfClose
+ * Signature: ()V
+ */
+JNIEXPORT jint JNICALL Java_org_openda_resultwriters_NetcdfResultWriter_ctaNetcdfClose
+  (JNIEnv * env, jobject obj_this, jint ctafilehandle)
+{
+  cta_jni_setJavaEnv(env);
+
+  int ierr;
+  int foutput;
+
+
+  foutput = ctafilehandle;
+  ierr = CTA_File_Free(&foutput);
+  if ( ierr != CTA_OK ) {
+     cta_jni_exception(env, "CtaTreeVector_export", "Could not close (netcdf) cta_file", ierr);
+     return -1;
+
+   }
+   return ierr;
+}
+
+
+/*
+ * Class:     org_costa_treevector
+ * Method:    ctaNetcdfInit
+ * Signature: (Ljava/lang/String;)I
+ */
+JNIEXPORT jint JNICALL Java_org_openda_costa_CtaTreeVector_ctaNetcdfInit
+  (JNIEnv * env, jobject obj_this, jstring path, jstring action)
+{
+ cta_jni_setJavaEnv(env);
+
+  int ierr;
+  int foutput;
+  int soutfile;
+  int haction;
+
+   const char *spath  = env->GetStringUTFChars(path, 0);
+   const char *saction  = env->GetStringUTFChars(action, 0);
+
+   // Maybe we should first check if filename in 'path' does exist and is opened?
+   // In that case, we need to retrieve the file handle somehow.
+
+
+   ierr = CTA_String_Create(&soutfile);
+   if ( ierr != CTA_OK ) {
+     cta_jni_exception(env, "CtaTreeVector_netcdfinit", "Could not create string", ierr);
+     return -1;
+   }
+   ierr = CTA_String_Set(soutfile, spath);
+   if ( ierr != CTA_OK ) {
+     cta_jni_exception(env, "CtaTreeVector_netcdfinit", "Could not set filename string", ierr);
+     return -1;
+   }
+   // Note: if file already exists, this is not OK.
+   ierr = CTA_File_Create(&foutput);
+   if ( ierr != CTA_OK ) {
+     cta_jni_exception(env, "CtaTreeVector_netcdfinit", "Could not create file handle", ierr);
+     return -1;
+   }
+   // Note: if file is already open, export should occur as 'append'
+   // in netcdf case. 
+   // Note further, that cta_file_open will delete the contents of an existing netcdf file!
+
+   env->ReleaseStringUTFChars(path, spath);
+   env->ReleaseStringUTFChars(action, saction);
+
+   CTA_String_Create(&haction);
+   CTA_String_Set(haction, saction);
+   ierr = CTA_File_Open(foutput,soutfile,haction);
+   if ( ierr != CTA_OK ) {
+     cta_jni_exception(env, "CtaTreeVector_netcdfinit", "Could not open file", ierr);
+     return -1;
+   }
+
+   CTA_String_Free(&haction);
+   CTA_String_Free(&soutfile);
+   
+   return foutput;  //this is the cta filehandle!
+
+}
+
+/*
+ * Class:     org_resultwriters_NetcdfResultWriter
+ * Method:    ctaNetcdfInit
+ * Signature: (Ljava/lang/String;)I
+ */
+JNIEXPORT jint JNICALL Java_org_openda_resultwriters_NetcdfResultWriter_ctaNetcdfInit
+  (JNIEnv * env, jobject obj_this, jstring path, jstring action)
+{
+  cta_jni_setJavaEnv(env);
+
+  int ierr;
+  int foutput;
+  int soutfile;
+  int haction;
+
+   const char *spath  = env->GetStringUTFChars(path, 0);
+   const char *saction  = env->GetStringUTFChars(action, 0);
+
+   // Maybe we should first check if filename in 'path' does exist and is opened?
+   // In that case, we need to retrieve the file handle somehow.
+
+ 
+   ierr = CTA_String_Create(&soutfile);
+   if ( ierr != CTA_OK ) {
+     cta_jni_exception(env, "CtaTreeVector_netcdfinit", "Could not create string", ierr);
+     return -1;
+   }
+   ierr = CTA_String_Set(soutfile, spath);
+   if ( ierr != CTA_OK ) {
+     cta_jni_exception(env, "CtaTreeVector_netcdfinit", "Could not set filename string", ierr);
+     return -1;
+   }
+   // Note: if file already exists, this is not OK.
+   ierr = CTA_File_Create(&foutput);
+   if ( ierr != CTA_OK ) {
+     cta_jni_exception(env, "CtaTreeVector_netcdfinit", "Could not create file handle", ierr);
+     return -1;
+   }
+
+   // Note: if file is already open, export should occur as 'append'
+   // in netcdf case
+   CTA_String_Create(&haction);
+   CTA_String_Set(haction, saction);
+   ierr = CTA_File_Open(foutput,soutfile,haction);
+   if ( ierr != CTA_OK ) {
+     cta_jni_exception(env, "CtaTreeVector_netcdfinit", "Could not open file", ierr);
+     return -1;
+   }
+   env->ReleaseStringUTFChars(path, spath);
+   env->ReleaseStringUTFChars(action, saction);
+   CTA_String_Free(&haction);
+   ierr = CTA_String_Free(&soutfile);
+   if ( ierr != CTA_OK ) {
+     cta_jni_exception(env, "CtaTreeVector_netcdfinit", "Could not free cta_string", ierr);
+     return -1;
+   }
+
+   return foutput;  //this is the cta filehandle!
+
+}
+
+
+
+
+
+/*
+ * Class:     org_costa_CtaTreeVector
+ * Method:    ctaTreeExport
+ * Signature: (Ljava/lang/String;)I
+ */
+JNIEXPORT jint JNICALL Java_org_openda_costa_CtaTreeVector_ctaExport
+(JNIEnv *env, jobject obj_this, jint ctafilehandle){
+
+   cta_jni_setJavaEnv(env);
+
+  // int soutfile;
+   int foutput;
+
+   CTA_TreeVector ctaHandletv = cta_jni_getCtaHandle(env, obj_this);
+
+   foutput = ctafilehandle;
+ 
+   int retVal=CTA_TreeVector_Export (ctaHandletv, foutput);
+
+   if ( retVal != CTA_OK ) {
+     cta_jni_exception(env, "CtaTreeVector_export", "Could not export Treevector", retVal);
+     return 0;
+   }
+
+   return 0;
+}
+
+/*
+ * Class:     org_costa_CtaTreeVector
+ * Method:    ctaImport
+ * Signature: (Ljava/lang/String;)I
+ */
+JNIEXPORT jint JNICALL Java_org_openda_costa_CtaTreeVector_ctaImport
+(JNIEnv *env, jobject obj_this, jint ctafilehandle){
+
+   cta_jni_setJavaEnv(env);
+
+   int finput;
+
+   CTA_TreeVector ctaHandletv = cta_jni_getCtaHandle(env, obj_this);
+
+   finput = ctafilehandle;
+   int retVal=CTA_TreeVector_Import (ctaHandletv, finput);
+
+   if ( retVal != CTA_OK ) {
+     cta_jni_exception(env, "CtaTreeVector_import", "Could not import Treevector", retVal);
+     return 0;
+   }
+
+   return 0;
+}
+
+/*
+ * Class:     org_costa_CtaTreeVector
+ * Method:    ctaVImport
+ * Signature: (Ljava/lang/String;)I
+ */
+JNIEXPORT jint JNICALL Java_org_openda_costa_CtaTreeVector_ctaVImport
+(JNIEnv *env, jobject obj_this, jint ctafilehandle){
+
+   cta_jni_setJavaEnv(env);
+
+   int finput;
+
+   CTA_TreeVector ctaHandletv = cta_jni_getCtaHandle(env, obj_this);
+
+   finput = ctafilehandle;
+   int retVal=CTA_TreeVector_VImport (ctaHandletv, finput);
+
+   if ( retVal != CTA_OK ) {
+     cta_jni_exception(env, "CtaTreeVector_Vimport", "Could not import Treevector", retVal);
+     return 0;
+   }
+
+   return 0;
+}
+
+/*
+ * Class:     org_costa_CtaTreeVector
+ * Method:    ctaGetSubTreeVectorId
+ * Signature: ()Ljava/lang/String;
+ */
+JNIEXPORT jstring JNICALL Java_org_openda_costa_CtaTreeVector_ctaGetSubTreeVectorId
+(JNIEnv *env, jobject obj_this, jint index){
+   char tag[CTA_STRLEN_TAG];
+   cta_jni_setJavaEnv(env);
+   CTA_TreeVector ctaTreeVec = cta_jni_getCtaHandle(env, obj_this);
+   CTA_TreeVector_GetSubTreeVecId (ctaTreeVec, index, tag);
+   return env->NewStringUTF(tag);
+}
+
+
+/*
+ * Class:     org_costa_CtaTreeVector
+ * Method:    getCaption
+ * Signature: ()Ljava/lang/String;
+ */
+JNIEXPORT jstring JNICALL Java_org_openda_costa_CtaTreeVector_getCaption
+(JNIEnv *env, jobject obj_this){
+   cta_jni_exception(env, "CtaTreeVector", "Get Caption is not implemented", 911);
+
+   cta_jni_setJavaEnv(env);
+
+   return NULL;
+
+}
+
+/*
+ * Class:     org_costa_CtaTreeVector
+ * Method:    ctaCreateFromVector
+ * Signature: ()I
+ */
+JNIEXPORT jint JNICALL Java_org_openda_costa_CtaTreeVector_ctaCreateFromVector
+(JNIEnv *env , jobject obj_this, jstring str_id, jstring str_tag, jint ctavectorhandle){
+
+   cta_jni_setJavaEnv(env);
+
+   int handle = CTA_NULL;
+   const char *sid  = env->GetStringUTFChars(str_id, 0);
+   const char *stag  = env->GetStringUTFChars(str_tag, 0);
+   int retVal = CTA_TreeVector_Create(sid,stag, &handle);
+   if ( retVal != CTA_OK ) {
+      cta_jni_exception(env, "CtaTreeVector I", "Could not create treevector", retVal);
+      return 0;
+   }
+   env->ReleaseStringUTFChars(str_id, sid);
+   env->ReleaseStringUTFChars(str_tag, stag);
+
+   // now immediately fill the treevector!
+    retVal = CTA_TreeVector_SetVec(handle,ctavectorhandle);
+	if ( retVal != CTA_OK ) {
+		cta_jni_exception(env, "CtaTreeVector I", "Could not fill the created treevector", retVal);
+		return 0;
+	}
+
+   return handle;
+};
+
+/*
+ * Class:     org_costa_CtaTreeVector
+ * Method:    ctaCreateFromSubtreevectors
+ * Signature: ()I
+ */
+JNIEXPORT jint JNICALL Java_org_openda_costa_CtaTreeVector_ctaCreateFromSubtreevectors
+(JNIEnv *env , jobject obj_this, jstring str_id, jstring str_tag, jint nsubtrees, jintArray jsubtreevectors){
+
+   cta_jni_setJavaEnv(env);
+   
+   int handle = CTA_NULL;
+   const char *sid  = env->GetStringUTFChars(str_id, 0);
+   const char *stag  = env->GetStringUTFChars(str_tag, 0);
+   int retVal = CTA_TreeVector_Create(sid,stag, &handle);
+   env->ReleaseStringUTFChars(str_id, sid);
+   env->ReleaseStringUTFChars(str_tag, stag);
+   if ( retVal != CTA_OK ) {
+	cta_jni_exception(env, "CtaTreeVector II", "Could not create treevector", retVal);
+	return 0;
+   }
+   // now concatenate the  subtreevectors!
+	int cCount = env->GetArrayLength(jsubtreevectors);
+
+	jint * csubtreevectors = env->GetIntArrayElements(jsubtreevectors, 0);
+	int *csub2 = new int[cCount] ;
+
+	// a bit awkward, but it is not possible to retrieve csubtreevectors as int instead of jint?
+	for (int i=0;i<cCount;i++){
+      csub2[i] = csubtreevectors[i];
+      CTA_TreeVector_IncRefCount(csub2[i]);
+	}
+
+    retVal = CTA_TreeVector_Conc(handle, csub2, nsubtrees);
+	if ( retVal != CTA_OK ) {
+		cta_jni_exception(env, "CtaTreeVector II", "Could not concatenate the created treevector", retVal);
+		return 0;
+	}
+        delete [] csub2;
+
+   return handle;
+};
+
+
+/*
+ * Class:     org_costa_CtaTreeVector
+ * Method:    clone
+ * Signature: ()Lorg/openda/interfaces/ITreeVector;
+ */
+JNIEXPORT jobject JNICALL Java_org_openda_costa_CtaTreeVector_clone
+  (JNIEnv *env, jobject obj_this){
+
+   cta_jni_setJavaEnv(env);
+
+   CTA_TreeVector ctaHandle = cta_jni_getCtaHandle(env, obj_this);
+   CTA_TreeVector ctaDuplicate;
+   int retVal=CTA_TreeVector_Duplicate(ctaHandle, &ctaDuplicate);
+   if ( retVal != CTA_OK ) {
+      cta_jni_exception(env, "CtaTreeVector", "Could not duplicate", retVal);
+      return NULL;
+   }
+
+   /* Create a Java CtaTreeVector */
+   jclass clsTreeVector = env->FindClass("org/openda/costa/CtaTreeVector");
+   jmethodID constructorID = env->GetMethodID (clsTreeVector, "<init>", "()V");
+   jobject jTreeVec = env->NewObject(clsTreeVector, constructorID);
+
+   cta_jni_setCtaHandle(env, jTreeVec, ctaDuplicate);
+   return jTreeVec;
+
+
+  }
+
+
+
+
+
diff --git a/costa/native/openda/bridge/src/org_openda_costa_CtaUtils.cpp b/costa/native/openda/bridge/src/org_openda_costa_CtaUtils.cpp
new file mode 100644
index 000000000..018dd61b0
--- /dev/null
+++ b/costa/native/openda/bridge/src/org_openda_costa_CtaUtils.cpp
@@ -0,0 +1,39 @@
+/*
+$URL: https://repos.deltares.nl/repos/openda/openda_1/public/trunk/core/native/src/openda/bridge/org_openda_costa_CtaVector.cpp $
+$Revision: 2738 $, $Date: 2011-09-05 10:48:32 +0200 (Mon, 05 Sep 2011) $
+
+OpenDA interface for COSTA.
+Copyright (C) 2013  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+#include "org_openda_costa_CtaUtils.h"
+#include "jni_cta_utils.h"
+#include "cta_handles.h"
+
+/*
+ * Class:     org_openda_costa_CtaUtils
+ * Method:    print_memory
+ * Signature: (Ljava/lang/String;I)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaUtils_print_1memory
+  (JNIEnv *env, jclass obj_this, jstring where, jint level){
+
+   cta_jni_setJavaEnv(env);
+   const char *swhere  = env->GetStringUTFChars(where, 0);
+   CTA_Handle_PrintInfo(swhere);
+   env->ReleaseStringUTFChars(where, swhere);
+}
diff --git a/costa/native/openda/bridge/src/org_openda_costa_CtaVector.cpp b/costa/native/openda/bridge/src/org_openda_costa_CtaVector.cpp
new file mode 100644
index 000000000..30972474a
--- /dev/null
+++ b/costa/native/openda/bridge/src/org_openda_costa_CtaVector.cpp
@@ -0,0 +1,547 @@
+/*
+$URL: https://repos.deltares.nl/repos/openda/public/trunk/core/native/src/openda/bridge/org_openda_costa_CtaVector.cpp $
+$Revision: 4445 $, $Date: 2014-06-03 08:56:12 +0200 (Tue, 03 Jun 2014) $
+
+OpenDA interface for COSTA.
+Copyright (C) 2007  Stef Hummel / Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+
+#include "org_openda_costa_CtaVector.h"
+#include "cta_vector.h"
+#include "jni_cta_utils.h"
+#include "cta_errors.h"
+#include "cta_defaults.h"
+
+/*
+ * Class:     org_costa_CtaVector
+ * Method:    setConstant
+ * Signature: (D)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaVector_setConstant
+  (JNIEnv * env, jobject obj_this, jdouble val){
+
+   cta_jni_setJavaEnv(env);
+
+   CTA_Handle ctaHandle = cta_jni_getCtaHandle(env, obj_this);
+
+   CTA_Datatype datatype;
+   CTA_Handle_GetDatatype (ctaHandle, &datatype);
+
+   int retVal;
+   if (datatype==CTA_VECTOR){
+      retVal = CTA_Vector_SetConstant(ctaHandle, &val, CTA_DOUBLE);
+   }
+   else {
+      retVal = CTA_TreeVector_SetConstant(ctaHandle, &val, CTA_DOUBLE);
+   }
+
+	if ( retVal != CTA_OK ) {
+		cta_jni_exception(env, "CtaVector", "Could not set constant ", retVal);
+		return;
+	}
+}
+
+/*
+ * Class:     org_costa_CtaVector
+ * Method:    scale
+ * Signature: (D)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaVector_scale
+  (JNIEnv * env, jobject obj_this, jdouble val){
+
+   cta_jni_setJavaEnv(env);
+
+   CTA_Handle ctaHandle = cta_jni_getCtaHandle(env, obj_this);
+
+   CTA_Datatype datatype;
+   CTA_Handle_GetDatatype (ctaHandle, &datatype);
+
+   int retVal;
+   if (datatype==CTA_VECTOR){
+      retVal = CTA_Vector_Scal(ctaHandle, val);
+   }
+   else {
+      retVal = CTA_TreeVector_Scal(ctaHandle, val);
+   }
+
+	if ( retVal != CTA_OK ) {
+		cta_jni_exception(env, "CtaVector", "Could not scale ", retVal);
+		return;
+	}
+  }
+
+
+
+/*
+ * Class:     org_costa_CtaVector
+ * Method:    ctaPrintHandles
+ * Signature: (Z)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaVector_ctaPrintHandles
+(JNIEnv *env, jobject jModel, jstring jlocation){
+
+   cta_jni_setJavaEnv(env);
+
+   const char *location = env->GetStringUTFChars(jlocation, 0);
+
+   CTA_Handle_PrintInfo(location);
+
+}
+
+/*
+ * Class:     org_costa_CtaVector
+ * Method:    setValues
+ * Signature: ([D)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaVector_setValues
+  (JNIEnv * env, jobject obj_this, jdoubleArray jValues) {
+   
+    cta_jni_setJavaEnv(env);
+
+	CTA_Handle ctaHandle = cta_jni_getCtaHandle(env, obj_this);
+
+	int cCount = env->GetArrayLength(jValues);
+	jboolean isCopy = JNI_FALSE; 
+	jdouble * cValues = env->GetDoubleArrayElements(jValues, &isCopy);
+
+    CTA_Datatype datatype;
+    CTA_Handle_GetDatatype (ctaHandle, &datatype);
+
+    int retVal;
+    if (datatype==CTA_VECTOR){
+	   retVal = CTA_Vector_SetVals(ctaHandle, (void *) cValues, cCount, CTA_DOUBLE);
+    }
+    else {
+	   retVal = CTA_TreeVector_SetVals(ctaHandle, (void *) cValues, cCount, CTA_DOUBLE);
+    }
+
+     env->ReleaseDoubleArrayElements(jValues, cValues,0); 
+
+   if ( retVal != CTA_OK ) {
+		cta_jni_exception(env, "CtaVector", "Could not set values", retVal);
+		return;
+	}
+}
+
+
+/*
+ * Class:     org_costa_CtaVector
+ * Method:    getValues
+ * Signature: ()[D
+ */
+JNIEXPORT jdoubleArray JNICALL Java_org_openda_costa_CtaVector_getValues
+  (JNIEnv * env, jobject obj_this) {
+
+	cta_jni_setJavaEnv(env);
+   
+   CTA_Handle ctaHandle = cta_jni_getCtaHandle(env, obj_this);
+
+	int cCount = 0;
+
+   CTA_Datatype datatype;
+   CTA_Handle_GetDatatype (ctaHandle, &datatype);
+
+   int retVal;
+   if (datatype==CTA_VECTOR){
+      retVal = CTA_Vector_GetSize(ctaHandle, &cCount);
+   } else {
+      retVal = CTA_TreeVector_GetSize(ctaHandle, &cCount);
+   }
+
+   if ( retVal != CTA_OK ) {
+		cta_jni_exception(env, "CtaVector", "Could not get size", retVal);
+		return NULL;
+	}
+
+   jdoubleArray jValues = env->NewDoubleArray(cCount);
+   double * cValues = new double[cCount];
+
+   if (datatype==CTA_VECTOR){
+      retVal = CTA_Vector_GetVals(ctaHandle, (void *) cValues, cCount, CTA_DOUBLE);
+   }
+   else {
+      retVal = CTA_TreeVector_GetVals(ctaHandle, (void *) cValues, cCount, CTA_DOUBLE);
+   }
+
+   if ( retVal != CTA_OK ) {
+      cta_jni_exception(env, "CtaVector", "Could not get values", retVal);
+      return NULL;
+   }
+   else {
+      env->SetDoubleArrayRegion(jValues, 0, cCount, cValues);
+   }
+   delete [] cValues;
+   return jValues;
+}
+
+/*
+ * Class:     org_costa_CtaVector
+ * Method:    setValue
+ * Signature: (ID)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaVector_setValue
+  (JNIEnv * env, jobject obj_this, jint indx, jdouble val){
+
+   cta_jni_setJavaEnv(env);
+     
+   CTA_Handle ctaHandle = cta_jni_getCtaHandle(env, obj_this);
+
+   CTA_Datatype datatype;
+   CTA_Handle_GetDatatype (ctaHandle, &datatype);
+
+   int retVal;
+   if (datatype==CTA_VECTOR){
+      retVal = CTA_Vector_SetVal(ctaHandle, indx+1, &val, CTA_DOUBLE);
+   }
+   else {
+      retVal = CTA_TreeVector_SetVal(ctaHandle, indx+1, &val, CTA_DOUBLE);
+   }
+   if ( retVal != CTA_OK ) {
+      cta_jni_exception(env, "CtaVector", "Could not set value ", retVal);
+		return;
+   }
+}
+
+/*
+ * Class:     org_costa_CtaVector
+ * Method:    getValue
+ * Signature: (I)D
+ */
+JNIEXPORT jdouble JNICALL Java_org_openda_costa_CtaVector_getValue
+  (JNIEnv * env, jobject obj_this, jint indx){
+
+   cta_jni_setJavaEnv(env);
+     
+   CTA_Handle ctaHandle = cta_jni_getCtaHandle(env, obj_this);
+
+   double val;
+   CTA_Datatype datatype;
+   CTA_Handle_GetDatatype (ctaHandle, &datatype);
+
+   int retVal;
+   if (datatype==CTA_VECTOR){
+      retVal = CTA_Vector_GetVal(ctaHandle, indx+1, &val, CTA_DOUBLE);
+   }
+   else {
+      retVal = CTA_TreeVector_GetVal(ctaHandle, indx+1, &val, CTA_DOUBLE);
+   }
+
+
+   if ( retVal != CTA_OK ) {
+      cta_jni_exception(env, "CtaVector", "Could not get value ", retVal);
+		return 0.0;
+   }
+   return val;
+}
+
+/*
+ * Class:     org_costa_CtaVector
+ * Method:    getSize
+ * Signature: ()I
+ */
+JNIEXPORT jint JNICALL Java_org_openda_costa_CtaVector_getSize
+  (JNIEnv * env, jobject obj_this){
+
+   cta_jni_setJavaEnv(env);
+
+   CTA_Handle ctaHandle = cta_jni_getCtaHandle(env, obj_this);
+   int n;
+
+   CTA_Datatype datatype;
+   CTA_Handle_GetDatatype (ctaHandle, &datatype);
+
+   int retVal;
+   if (datatype==CTA_VECTOR){
+      retVal = CTA_Vector_GetSize(ctaHandle, &n);
+   }
+   else {
+      retVal = CTA_TreeVector_GetSize(ctaHandle, &n);
+   }
+
+
+   if ( retVal != CTA_OK ) {
+      cta_jni_exception(env, "CtaVector", "Could not get value ", retVal);
+		return 0;
+   }
+   return n;
+}
+
+/*
+ * Class:     org_costa_CtaVector
+ * Method:    norm2
+ * Signature: ()D
+ */
+JNIEXPORT jdouble JNICALL Java_org_openda_costa_CtaVector_norm2
+  (JNIEnv * env, jobject obj_this){
+
+   cta_jni_setJavaEnv(env);
+     
+   CTA_Handle ctaHandle = cta_jni_getCtaHandle(env, obj_this);
+   double nrm;
+
+   CTA_Datatype datatype;
+   CTA_Handle_GetDatatype (ctaHandle, &datatype);
+
+   int retVal;
+   if (datatype==CTA_VECTOR){
+      retVal = CTA_Vector_Nrm2(ctaHandle, &nrm);
+   }
+   else {
+      retVal = CTA_TreeVector_Nrm2(ctaHandle, &nrm);
+   }
+
+
+   if ( retVal != CTA_OK ) {
+      cta_jni_exception(env, "CtaVector", "Could not compute 2-norm ", retVal);
+		return 0.0;
+   }
+   return nrm;
+}
+
+/*
+ * Class:     org_costa_CtaVector
+ * Method:    create
+ * Signature: (I)I
+ */
+JNIEXPORT jint JNICALL Java_org_openda_costa_CtaVector_ctaCreate
+  (JNIEnv * env, jobject obj_this, jint length) {
+
+   cta_jni_setJavaEnv(env);
+   
+   int handle = CTA_NULL;
+	int retVal = CTA_Vector_Create(CTA_DEFAULT_VECTOR, length, CTA_DOUBLE, CTA_NULL, &handle);
+	if ( retVal != CTA_OK ) {
+		cta_jni_exception(env, "CtaVector", "Could not create vector", retVal);
+
+		return 0;
+	} else {
+		double initialValue = 0;
+		int retVal = CTA_Vector_SetConstant(handle, &initialValue, CTA_DOUBLE);
+		if ( retVal != CTA_OK ) {
+			cta_jni_exception(env, "CtaVector", "Could not set initial values", retVal);
+			return 0;
+		}
+	}
+	return handle;
+}
+
+/*
+ * Class:     org_costa_CtaVector
+ * Method:    ctaDotProduct
+ * Signature: (I)D
+ */
+JNIEXPORT jdouble JNICALL Java_org_openda_costa_CtaVector_ctaDotProduct
+  (JNIEnv * env, jobject obj_this, jint handleToOtherVector) {
+
+   cta_jni_setJavaEnv(env);
+
+   CTA_Handle ctaHandle = cta_jni_getCtaHandle(env, obj_this);
+   double dot;
+
+   CTA_Datatype datatype;
+   CTA_Handle_GetDatatype (ctaHandle, &datatype);
+
+   int retVal;
+   if (datatype==CTA_VECTOR){
+      retVal = CTA_Vector_Dot(ctaHandle, handleToOtherVector, &dot);
+   }
+   else {
+      retVal = CTA_TreeVector_Dot(ctaHandle, handleToOtherVector, &dot);
+   }
+   if ( retVal != CTA_OK ) {
+      cta_jni_exception(env, "CtaVector", "Could not compute dot product", retVal);
+		return 0.0;
+   }
+   return dot;
+}
+
+/*
+ * Class:     org_costa_CtaVector
+ * Method:    ctaPointwiseDivide
+ * Signature: (I)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaVector_ctaPointwiseDivide
+  (JNIEnv *env, jobject obj_this, jint handleToOtherVector){
+
+   cta_jni_setJavaEnv(env);
+
+   CTA_Handle ctaHandle = cta_jni_getCtaHandle(env, obj_this);
+
+   CTA_Datatype datatype;
+   CTA_Handle_GetDatatype (ctaHandle, &datatype);
+
+   int retVal;
+   if (datatype==CTA_VECTOR){
+      retVal = CTA_Vector_ElmDiv(ctaHandle, handleToOtherVector);
+   }
+   else {
+      retVal = CTA_TreeVector_ElmDiv(ctaHandle, handleToOtherVector);
+   }
+
+   if ( retVal != CTA_OK ) {
+      cta_jni_exception(env, "CtaVector", "Could not compute pointwise division", retVal);
+		return;
+   }
+}
+
+/*
+ * Class:     org_costa_CtaVector
+ * Method:    ctaPointwiseMultiply
+ * Signature: (I)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaVector_ctaPointwiseMultiply
+  (JNIEnv *env, jobject obj_this, jint handleToOtherVector){
+
+   cta_jni_setJavaEnv(env);
+
+   CTA_Handle ctaHandle = cta_jni_getCtaHandle(env, obj_this);
+
+   CTA_Datatype datatype;
+   CTA_Handle_GetDatatype (ctaHandle, &datatype);
+
+   int retVal;
+   if (datatype==CTA_VECTOR){
+      retVal = CTA_Vector_ElmProd(ctaHandle, handleToOtherVector);
+   }
+   else {
+      retVal = CTA_TreeVector_ElmProd(ctaHandle, handleToOtherVector);
+   }
+
+   if ( retVal != CTA_OK ) {
+      cta_jni_exception(env, "CtaVector", "Could not compute pointwise multiply", retVal);
+		return;
+   }
+}
+
+/*
+ * Class:     org_costa_CtaVectorJava_org_openda_costa_CtaObject_ctaInit
+ * Method:    ctaSetValues
+ * Signature: (I)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaVector_ctaSetValues
+  (JNIEnv *env, jobject obj_this, jint handleToOtherVector){
+
+   cta_jni_setJavaEnv(env);
+
+   CTA_Handle ctaHandle = cta_jni_getCtaHandle(env, obj_this);
+
+   CTA_Datatype datatype;
+   CTA_Handle_GetDatatype (ctaHandle, &datatype);
+
+   int retVal;
+   if (datatype==CTA_VECTOR){
+      retVal=CTA_Vector_Copy( handleToOtherVector, ctaHandle);
+   }
+   else {
+      retVal=CTA_TreeVector_Copy( handleToOtherVector, ctaHandle);
+   }
+
+   if ( retVal != CTA_OK ) {
+      cta_jni_exception(env, "CtaVector", "Could not copy values of vectors", retVal);
+		return;
+   }
+
+}
+
+/*
+ * Class:     org_costa_CtaVector
+ * Method:    ctaAxpy
+ * Signature: (DI)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaVector_ctaAxpy
+  (JNIEnv *env, jobject obj_this, jdouble alpha, jint handleToOtherVector){
+
+   cta_jni_setJavaEnv(env);
+
+   CTA_Handle ctaHandle = cta_jni_getCtaHandle(env, obj_this);
+
+   CTA_Datatype datatype;
+   CTA_Handle_GetDatatype (ctaHandle, &datatype);
+
+   int retVal;
+   if (datatype==CTA_VECTOR){
+      retVal=CTA_Vector_Axpy(ctaHandle, alpha, handleToOtherVector);
+   }
+   else {
+      retVal=CTA_TreeVector_Axpy(ctaHandle, alpha, handleToOtherVector);
+   }
+   if ( retVal != CTA_OK ) {
+      cta_jni_exception(env, "CtaVector", "Could not compute axpy", retVal);
+		return;
+   }
+}
+
+/*
+ * Class:     org_costa_CtaVector
+ * Method:    clone
+ * Signature: ()Lorg/openda/interfaces/IVector;
+ */
+JNIEXPORT jobject JNICALL Java_org_openda_costa_CtaVector_clone
+  (JNIEnv *env, jobject obj_this){
+
+   cta_jni_setJavaEnv(env);
+
+   CTA_Vector ctaHandle = cta_jni_getCtaHandle(env, obj_this);
+   CTA_Vector ctaDuplicate;
+   int retVal=CTA_Vector_Duplicate(ctaHandle, &ctaDuplicate);
+   if ( retVal != CTA_OK ) {
+      cta_jni_exception(env, "CtaVector", "Could not duplicate", retVal);
+      return NULL;
+   }
+
+   /* Create a Java CtaVector */
+   jclass clsVector = env->FindClass("org/openda/costa/CtaVector");
+   jmethodID constructorID = env->GetMethodID (clsVector, "<init>", "()V");
+   jobject jVec = env->NewObject(clsVector, constructorID);
+
+   cta_jni_setCtaHandle(env, jVec, ctaDuplicate);
+
+   return jVec;
+  }
+
+
+/*
+ * Class:     org_costa_CtaVector
+ * Method:    ctaSqrt
+ * Signature: ()V
+ */
+JNIEXPORT void JNICALL Java_org_openda_costa_CtaVector_ctaSqrt
+  (JNIEnv *env, jobject obj_this){
+
+   cta_jni_setJavaEnv(env);
+
+   CTA_Handle ctaHandle = cta_jni_getCtaHandle(env, obj_this);
+
+   CTA_Datatype datatype;
+   CTA_Handle_GetDatatype (ctaHandle, &datatype);
+
+   int retVal;
+   if (datatype==CTA_VECTOR){
+      retVal=CTA_Vector_ElmSqrt(ctaHandle);
+   }
+   else {
+      retVal=CTA_TreeVector_ElmSqrt(ctaHandle);
+   }
+   if ( retVal != CTA_OK ) {
+      cta_jni_exception(env, "CtaVector", "Could compute sqrt", retVal);
+		return;
+   }
+}
+
+
+
diff --git a/costa/native/openda/bridge/src/org_openda_resultwriters_NativeResultWriter.cpp b/costa/native/openda/bridge/src/org_openda_resultwriters_NativeResultWriter.cpp
new file mode 100644
index 000000000..268cad73e
--- /dev/null
+++ b/costa/native/openda/bridge/src/org_openda_resultwriters_NativeResultWriter.cpp
@@ -0,0 +1,122 @@
+/*
+$URL: https://repos.deltares.nl/repos/openda/openda_1/public/trunk/core/native/src/openda/bridge/org_openda_costa_CtaVector.cpp $
+$Revision: 2738 $, $Date: 2011-09-05 10:48:32 +0200 (Mon, 05 Sep 2011) $
+
+OpenDA interface for COSTA.
+Copyright (C) 2013  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+#include <string.h>
+#include "org_openda_resultwriters_NativeResultWriter.h"
+#include "jni_cta_utils.h"
+#include "cta_resultwriter.h"
+
+char *copyJavaStringToNormalString(JNIEnv *env, jstring javaString){
+
+   if (javaString==NULL) return NULL;
+   const char *sJava=env->GetStringUTFChars(javaString,     0);
+   size_t n=strlen(sJava)+1;
+   char *sString = (char*) malloc(n*sizeof(char));
+   for (size_t i=0;i<n;i++){
+       sString[i]=sJava[i];
+   }
+   env->ReleaseStringUTFChars(javaString, sJava);
+   return sString;
+
+}
+
+
+
+/*
+ * Class:     org_openda_resultwriters_NativeResultWriter
+ * Method:    putMessage
+ * Signature: (ILjava/lang/String;Ljava/lang/String;Ljava/lang/String;)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_resultwriters_NativeResultWriter_putMessage
+  (JNIEnv *env, jobject, jint iDWriter, jstring config, jstring workingDir, jstring message){
+   cta_jni_setJavaEnv(env);
+
+   char *sConfig      = copyJavaStringToNormalString(env, config);
+   char *sWorkingDir  = copyJavaStringToNormalString(env, workingDir);
+   char *sMessage     = copyJavaStringToNormalString(env, message);
+
+   // To do add method
+   CTA_Resultwriter_putmessage(iDWriter, sConfig, sWorkingDir, sMessage); 
+
+   if (sConfig)     free(sConfig);
+   if (sWorkingDir) free(sWorkingDir);
+   if (sMessage)    free(sMessage);
+}
+
+/*
+ * Class:     org_openda_resultwriters_NativeResultWriter
+ * Method:    putValue
+ * Signature: (ILjava/lang/String;Ljava/lang/String;Ljava/lang/String;IILjava/lang/String;I)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_resultwriters_NativeResultWriter_putValue
+  (JNIEnv *env, jobject, jint iDWriter, jstring config, jstring workingDir, jstring id, jint handle, jint outputLevel, jstring context, jint iteration){
+   cta_jni_setJavaEnv(env);
+
+   char *sConfig      = copyJavaStringToNormalString(env, config);
+   char *sWorkingDir  = copyJavaStringToNormalString(env, workingDir);
+   char *sId          = copyJavaStringToNormalString(env, id);
+   char *sContext     = copyJavaStringToNormalString(env, context);
+
+   // To do add method
+    CTA_Resultwriter_putvalue(iDWriter, sConfig, sWorkingDir, sId, handle, outputLevel, sContext, iteration);
+   if (sConfig)     free(sConfig);
+   if (sWorkingDir) free(sWorkingDir);
+   if (sId)         free(sId);
+   if (sContext)    free(sContext);
+}
+/*
+ * Class:     org_openda_resultwriters_NativeResultWriter
+ * Method:    putIterationReport
+ * Signature: (ILjava/lang/String;Ljava/lang/String;IDI)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_resultwriters_NativeResultWriter_putIterationReport
+  (JNIEnv *env, jobject, jint iDWriter, jstring config, jstring workingDir, jint iteration, jdouble cost, jint handle){
+   cta_jni_setJavaEnv(env);
+
+   char *sConfig      = copyJavaStringToNormalString(env, config);
+   char *sWorkingDir  = copyJavaStringToNormalString(env, workingDir);
+
+   // To do add method
+    CTA_Resultwriter_putiterationreport(iDWriter, sConfig, sWorkingDir, iteration, cost, handle);
+
+   if (sConfig)     free(sConfig);
+   if (sWorkingDir) free(sWorkingDir);
+}
+
+/*
+ * Class:     org_openda_resultwriters_NativeResultWriter
+ * Method:    free
+ * Signature: (I)V
+ */
+JNIEXPORT void JNICALL Java_org_openda_resultwriters_NativeResultWriter_free
+  (JNIEnv *, jobject, jint iDWriter){
+
+   // To do add method
+    CTA_Resultwriter_free(iDWriter);
+}
+
+
+
+
+
+
+
diff --git a/costa/native/openda/bridge/src/org_openda_resultwriters_NetcdfResultWriterNative.cpp b/costa/native/openda/bridge/src/org_openda_resultwriters_NetcdfResultWriterNative.cpp
new file mode 100644
index 000000000..ababb0bbd
--- /dev/null
+++ b/costa/native/openda/bridge/src/org_openda_resultwriters_NetcdfResultWriterNative.cpp
@@ -0,0 +1,111 @@
+/*
+$URL: https://repos.deltares.nl/repos/openda/openda_1/public/trunk/core/native/src/openda/bridge/org_openda_costa_CtaVector.cpp $
+$Revision: 2738 $, $Date: 2011-09-05 10:48:32 +0200 (Mon, 05 Sep 2011) $
+
+OpenDA interface for COSTA.
+Copyright (C) 2013  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+#include <string.h>
+#include "org_openda_resultwriters_NetcdfResultWriterNative.h"
+#include "jni_cta_utils.h"
+#include "cta_resultwriter.h"
+
+
+CTA_String copyJavaStringToCostaString(JNIEnv *env, jstring javaString){
+
+   int ierr;
+   CTA_String hString=CTA_NULL;
+
+   if (javaString==NULL) return hString;
+   const char *sJava=env->GetStringUTFChars(javaString,     0);
+   size_t n=strlen(sJava)+1;
+   char *sString = (char*) malloc(n*sizeof(char));
+   for (size_t i=0;i<n;i++){
+       sString[i]=sJava[i];
+   }
+   env->ReleaseStringUTFChars(javaString, sJava);
+   
+   ierr=CTA_String_Create(&hString);
+   if (ierr!=CTA_OK){
+      cta_jni_exception(env, "copyJavaStringToCostaString", "Cannot create COSTA string.", ierr);
+   }
+   ierr=CTA_String_Set(hString, sString);
+   if (ierr!=CTA_OK){
+      cta_jni_exception(env, "copyJavaStringToCostaString", "Cannot set value of COSTA string.", ierr);
+   }
+   
+   free(sString);
+
+   return hString;
+}
+
+
+/*
+ * Class:     org_openda_resultwriters_NetcdfResultWriterNative
+ * Method:    ctaNetcdfInit
+ * Signature: (Ljava/lang/String;Ljava/lang/String;)I
+ */
+JNIEXPORT jint JNICALL Java_org_openda_resultwriters_NetcdfResultWriterNative_ctaNetcdfInit
+  (JNIEnv *env , jobject , jstring netcdfname, jstring action){
+
+   cta_jni_setJavaEnv(env);
+
+   int ierr;
+   CTA_String hFilename    = copyJavaStringToCostaString(env, netcdfname);
+   CTA_String hAction      = copyJavaStringToCostaString(env, action);
+   CTA_File hFile;
+
+   ierr=CTA_File_Create(&hFile);
+   if (ierr!=CTA_OK){
+      cta_jni_exception(env, "Java_org_openda_resultwriters_NetcdfResultWriterNative_ctaNetcdfInit",
+                             "Cannot create COSTA file.", ierr);
+   }
+
+   ierr=CTA_File_Open(hFile,hFilename,hAction);
+   if (ierr!=CTA_OK){
+      cta_jni_exception(env, "Java_org_openda_resultwriters_NetcdfResultWriterNative_ctaNetcdfInit",
+                             "Cannot open Netcdf file.", ierr);
+   }
+
+   CTA_String_Free(&hFilename);
+   CTA_String_Free(&hAction);
+   return hFile;
+
+}
+
+/*
+ * Class:     org_openda_resultwriters_NetcdfResultWriterNative
+ * Method:    ctaNetcdfClose
+ * Signature: (I)I
+ */
+JNIEXPORT jint JNICALL Java_org_openda_resultwriters_NetcdfResultWriterNative_ctaNetcdfClose
+  (JNIEnv *env, jobject, jint hFile){
+
+   cta_jni_setJavaEnv(env);
+
+   int ierr;
+   int iHFile = hFile;
+   ierr=CTA_File_Free(&iHFile);
+   if (ierr!=CTA_OK){
+      cta_jni_exception(env, "Java_org_openda_resultwriters_NetcdfResultWriterNative_ctaNetcdfInit",
+                             "Cannot close Netcdf file.", ierr);
+   }
+   return ierr;
+}
+
+

From dabf0c4e5de77cc19a3d0b3433d912c5d9b62271 Mon Sep 17 00:00:00 2001
From: Werner Kramer <werner.kramer@vortech.nl>
Date: Wed, 13 Sep 2023 12:32:30 +0200
Subject: [PATCH 03/15] Move openda/bridge to bridge

---
 costa/native/CMakeLists.txt                                     | 2 +-
 .../bridge/include/jni_cta_CtaObsdescr_NativeToJava.c           | 0
 .../bridge/include/jni_cta_CtaObsdescr_NativeToJava.h           | 0
 costa/native/{openda => }/bridge/include/jni_cta_utils.h        | 0
 costa/native/{openda => }/bridge/include/jni_datatypes.h        | 0
 .../{openda => }/bridge/include/org_openda_costa_CtaArray.h     | 0
 .../bridge/include/org_openda_costa_CtaInitialize.h             | 0
 .../bridge/include/org_openda_costa_CtaModelState.h             | 0
 .../{openda => }/bridge/include/org_openda_costa_CtaObject.h    | 0
 .../include/org_openda_costa_CtaObservationDescriptions.h       | 0
 .../bridge/include/org_openda_costa_CtaOpenDaModel.h            | 0
 .../{openda => }/bridge/include/org_openda_costa_CtaParallel.h  | 0
 .../bridge/include/org_openda_costa_CtaRelationTable.h          | 0
 .../bridge/include/org_openda_costa_CtaStochObserver.h          | 0
 .../{openda => }/bridge/include/org_openda_costa_CtaTime.h      | 0
 .../bridge/include/org_openda_costa_CtaTreeVector.h             | 0
 .../{openda => }/bridge/include/org_openda_costa_CtaUtils.h     | 0
 .../{openda => }/bridge/include/org_openda_costa_CtaVector.h    | 0
 .../include/org_openda_resultwriters_NativeResultWriter.h       | 0
 .../include/org_openda_resultwriters_NetcdfResultWriterNative.h | 0
 costa/native/{openda => }/bridge/src/CMakeLists.txt             | 0
 costa/native/{openda => }/bridge/src/jni_cta_utils.cpp          | 0
 .../{openda => }/bridge/src/org_openda_costa_CtaArray.cpp       | 0
 .../{openda => }/bridge/src/org_openda_costa_CtaInitialize.cpp  | 0
 .../{openda => }/bridge/src/org_openda_costa_CtaModelState.cpp  | 0
 .../{openda => }/bridge/src/org_openda_costa_CtaObject.cpp      | 0
 .../bridge/src/org_openda_costa_CtaObservationDescriptions.cpp  | 0
 .../{openda => }/bridge/src/org_openda_costa_CtaOpenDaModel.cpp | 0
 .../{openda => }/bridge/src/org_openda_costa_CtaParallel.cpp    | 0
 .../bridge/src/org_openda_costa_CtaRelationTable.cpp            | 0
 .../bridge/src/org_openda_costa_CtaStochObserver.cpp            | 0
 .../native/{openda => }/bridge/src/org_openda_costa_CtaTime.cpp | 0
 .../{openda => }/bridge/src/org_openda_costa_CtaTreeVector.cpp  | 0
 .../{openda => }/bridge/src/org_openda_costa_CtaUtils.cpp       | 0
 .../{openda => }/bridge/src/org_openda_costa_CtaVector.cpp      | 0
 .../bridge/src/org_openda_resultwriters_NativeResultWriter.cpp  | 0
 .../src/org_openda_resultwriters_NetcdfResultWriterNative.cpp   | 0
 37 files changed, 1 insertion(+), 1 deletion(-)
 rename costa/native/{openda => }/bridge/include/jni_cta_CtaObsdescr_NativeToJava.c (100%)
 rename costa/native/{openda => }/bridge/include/jni_cta_CtaObsdescr_NativeToJava.h (100%)
 rename costa/native/{openda => }/bridge/include/jni_cta_utils.h (100%)
 rename costa/native/{openda => }/bridge/include/jni_datatypes.h (100%)
 rename costa/native/{openda => }/bridge/include/org_openda_costa_CtaArray.h (100%)
 rename costa/native/{openda => }/bridge/include/org_openda_costa_CtaInitialize.h (100%)
 rename costa/native/{openda => }/bridge/include/org_openda_costa_CtaModelState.h (100%)
 rename costa/native/{openda => }/bridge/include/org_openda_costa_CtaObject.h (100%)
 rename costa/native/{openda => }/bridge/include/org_openda_costa_CtaObservationDescriptions.h (100%)
 rename costa/native/{openda => }/bridge/include/org_openda_costa_CtaOpenDaModel.h (100%)
 rename costa/native/{openda => }/bridge/include/org_openda_costa_CtaParallel.h (100%)
 rename costa/native/{openda => }/bridge/include/org_openda_costa_CtaRelationTable.h (100%)
 rename costa/native/{openda => }/bridge/include/org_openda_costa_CtaStochObserver.h (100%)
 rename costa/native/{openda => }/bridge/include/org_openda_costa_CtaTime.h (100%)
 rename costa/native/{openda => }/bridge/include/org_openda_costa_CtaTreeVector.h (100%)
 rename costa/native/{openda => }/bridge/include/org_openda_costa_CtaUtils.h (100%)
 rename costa/native/{openda => }/bridge/include/org_openda_costa_CtaVector.h (100%)
 rename costa/native/{openda => }/bridge/include/org_openda_resultwriters_NativeResultWriter.h (100%)
 rename costa/native/{openda => }/bridge/include/org_openda_resultwriters_NetcdfResultWriterNative.h (100%)
 rename costa/native/{openda => }/bridge/src/CMakeLists.txt (100%)
 rename costa/native/{openda => }/bridge/src/jni_cta_utils.cpp (100%)
 rename costa/native/{openda => }/bridge/src/org_openda_costa_CtaArray.cpp (100%)
 rename costa/native/{openda => }/bridge/src/org_openda_costa_CtaInitialize.cpp (100%)
 rename costa/native/{openda => }/bridge/src/org_openda_costa_CtaModelState.cpp (100%)
 rename costa/native/{openda => }/bridge/src/org_openda_costa_CtaObject.cpp (100%)
 rename costa/native/{openda => }/bridge/src/org_openda_costa_CtaObservationDescriptions.cpp (100%)
 rename costa/native/{openda => }/bridge/src/org_openda_costa_CtaOpenDaModel.cpp (100%)
 rename costa/native/{openda => }/bridge/src/org_openda_costa_CtaParallel.cpp (100%)
 rename costa/native/{openda => }/bridge/src/org_openda_costa_CtaRelationTable.cpp (100%)
 rename costa/native/{openda => }/bridge/src/org_openda_costa_CtaStochObserver.cpp (100%)
 rename costa/native/{openda => }/bridge/src/org_openda_costa_CtaTime.cpp (100%)
 rename costa/native/{openda => }/bridge/src/org_openda_costa_CtaTreeVector.cpp (100%)
 rename costa/native/{openda => }/bridge/src/org_openda_costa_CtaUtils.cpp (100%)
 rename costa/native/{openda => }/bridge/src/org_openda_costa_CtaVector.cpp (100%)
 rename costa/native/{openda => }/bridge/src/org_openda_resultwriters_NativeResultWriter.cpp (100%)
 rename costa/native/{openda => }/bridge/src/org_openda_resultwriters_NetcdfResultWriterNative.cpp (100%)

diff --git a/costa/native/CMakeLists.txt b/costa/native/CMakeLists.txt
index 92c16cb1e..a84fb4394 100644
--- a/costa/native/CMakeLists.txt
+++ b/costa/native/CMakeLists.txt
@@ -7,5 +7,5 @@ add_subdirectory(external/lapack)
 
 add_subdirectory(cta/src)
 add_subdirectory(cta_f90/generated)
-add_subdirectory(openda/bridge/src)
+add_subdirectory(bridge/src)
 
diff --git a/costa/native/openda/bridge/include/jni_cta_CtaObsdescr_NativeToJava.c b/costa/native/bridge/include/jni_cta_CtaObsdescr_NativeToJava.c
similarity index 100%
rename from costa/native/openda/bridge/include/jni_cta_CtaObsdescr_NativeToJava.c
rename to costa/native/bridge/include/jni_cta_CtaObsdescr_NativeToJava.c
diff --git a/costa/native/openda/bridge/include/jni_cta_CtaObsdescr_NativeToJava.h b/costa/native/bridge/include/jni_cta_CtaObsdescr_NativeToJava.h
similarity index 100%
rename from costa/native/openda/bridge/include/jni_cta_CtaObsdescr_NativeToJava.h
rename to costa/native/bridge/include/jni_cta_CtaObsdescr_NativeToJava.h
diff --git a/costa/native/openda/bridge/include/jni_cta_utils.h b/costa/native/bridge/include/jni_cta_utils.h
similarity index 100%
rename from costa/native/openda/bridge/include/jni_cta_utils.h
rename to costa/native/bridge/include/jni_cta_utils.h
diff --git a/costa/native/openda/bridge/include/jni_datatypes.h b/costa/native/bridge/include/jni_datatypes.h
similarity index 100%
rename from costa/native/openda/bridge/include/jni_datatypes.h
rename to costa/native/bridge/include/jni_datatypes.h
diff --git a/costa/native/openda/bridge/include/org_openda_costa_CtaArray.h b/costa/native/bridge/include/org_openda_costa_CtaArray.h
similarity index 100%
rename from costa/native/openda/bridge/include/org_openda_costa_CtaArray.h
rename to costa/native/bridge/include/org_openda_costa_CtaArray.h
diff --git a/costa/native/openda/bridge/include/org_openda_costa_CtaInitialize.h b/costa/native/bridge/include/org_openda_costa_CtaInitialize.h
similarity index 100%
rename from costa/native/openda/bridge/include/org_openda_costa_CtaInitialize.h
rename to costa/native/bridge/include/org_openda_costa_CtaInitialize.h
diff --git a/costa/native/openda/bridge/include/org_openda_costa_CtaModelState.h b/costa/native/bridge/include/org_openda_costa_CtaModelState.h
similarity index 100%
rename from costa/native/openda/bridge/include/org_openda_costa_CtaModelState.h
rename to costa/native/bridge/include/org_openda_costa_CtaModelState.h
diff --git a/costa/native/openda/bridge/include/org_openda_costa_CtaObject.h b/costa/native/bridge/include/org_openda_costa_CtaObject.h
similarity index 100%
rename from costa/native/openda/bridge/include/org_openda_costa_CtaObject.h
rename to costa/native/bridge/include/org_openda_costa_CtaObject.h
diff --git a/costa/native/openda/bridge/include/org_openda_costa_CtaObservationDescriptions.h b/costa/native/bridge/include/org_openda_costa_CtaObservationDescriptions.h
similarity index 100%
rename from costa/native/openda/bridge/include/org_openda_costa_CtaObservationDescriptions.h
rename to costa/native/bridge/include/org_openda_costa_CtaObservationDescriptions.h
diff --git a/costa/native/openda/bridge/include/org_openda_costa_CtaOpenDaModel.h b/costa/native/bridge/include/org_openda_costa_CtaOpenDaModel.h
similarity index 100%
rename from costa/native/openda/bridge/include/org_openda_costa_CtaOpenDaModel.h
rename to costa/native/bridge/include/org_openda_costa_CtaOpenDaModel.h
diff --git a/costa/native/openda/bridge/include/org_openda_costa_CtaParallel.h b/costa/native/bridge/include/org_openda_costa_CtaParallel.h
similarity index 100%
rename from costa/native/openda/bridge/include/org_openda_costa_CtaParallel.h
rename to costa/native/bridge/include/org_openda_costa_CtaParallel.h
diff --git a/costa/native/openda/bridge/include/org_openda_costa_CtaRelationTable.h b/costa/native/bridge/include/org_openda_costa_CtaRelationTable.h
similarity index 100%
rename from costa/native/openda/bridge/include/org_openda_costa_CtaRelationTable.h
rename to costa/native/bridge/include/org_openda_costa_CtaRelationTable.h
diff --git a/costa/native/openda/bridge/include/org_openda_costa_CtaStochObserver.h b/costa/native/bridge/include/org_openda_costa_CtaStochObserver.h
similarity index 100%
rename from costa/native/openda/bridge/include/org_openda_costa_CtaStochObserver.h
rename to costa/native/bridge/include/org_openda_costa_CtaStochObserver.h
diff --git a/costa/native/openda/bridge/include/org_openda_costa_CtaTime.h b/costa/native/bridge/include/org_openda_costa_CtaTime.h
similarity index 100%
rename from costa/native/openda/bridge/include/org_openda_costa_CtaTime.h
rename to costa/native/bridge/include/org_openda_costa_CtaTime.h
diff --git a/costa/native/openda/bridge/include/org_openda_costa_CtaTreeVector.h b/costa/native/bridge/include/org_openda_costa_CtaTreeVector.h
similarity index 100%
rename from costa/native/openda/bridge/include/org_openda_costa_CtaTreeVector.h
rename to costa/native/bridge/include/org_openda_costa_CtaTreeVector.h
diff --git a/costa/native/openda/bridge/include/org_openda_costa_CtaUtils.h b/costa/native/bridge/include/org_openda_costa_CtaUtils.h
similarity index 100%
rename from costa/native/openda/bridge/include/org_openda_costa_CtaUtils.h
rename to costa/native/bridge/include/org_openda_costa_CtaUtils.h
diff --git a/costa/native/openda/bridge/include/org_openda_costa_CtaVector.h b/costa/native/bridge/include/org_openda_costa_CtaVector.h
similarity index 100%
rename from costa/native/openda/bridge/include/org_openda_costa_CtaVector.h
rename to costa/native/bridge/include/org_openda_costa_CtaVector.h
diff --git a/costa/native/openda/bridge/include/org_openda_resultwriters_NativeResultWriter.h b/costa/native/bridge/include/org_openda_resultwriters_NativeResultWriter.h
similarity index 100%
rename from costa/native/openda/bridge/include/org_openda_resultwriters_NativeResultWriter.h
rename to costa/native/bridge/include/org_openda_resultwriters_NativeResultWriter.h
diff --git a/costa/native/openda/bridge/include/org_openda_resultwriters_NetcdfResultWriterNative.h b/costa/native/bridge/include/org_openda_resultwriters_NetcdfResultWriterNative.h
similarity index 100%
rename from costa/native/openda/bridge/include/org_openda_resultwriters_NetcdfResultWriterNative.h
rename to costa/native/bridge/include/org_openda_resultwriters_NetcdfResultWriterNative.h
diff --git a/costa/native/openda/bridge/src/CMakeLists.txt b/costa/native/bridge/src/CMakeLists.txt
similarity index 100%
rename from costa/native/openda/bridge/src/CMakeLists.txt
rename to costa/native/bridge/src/CMakeLists.txt
diff --git a/costa/native/openda/bridge/src/jni_cta_utils.cpp b/costa/native/bridge/src/jni_cta_utils.cpp
similarity index 100%
rename from costa/native/openda/bridge/src/jni_cta_utils.cpp
rename to costa/native/bridge/src/jni_cta_utils.cpp
diff --git a/costa/native/openda/bridge/src/org_openda_costa_CtaArray.cpp b/costa/native/bridge/src/org_openda_costa_CtaArray.cpp
similarity index 100%
rename from costa/native/openda/bridge/src/org_openda_costa_CtaArray.cpp
rename to costa/native/bridge/src/org_openda_costa_CtaArray.cpp
diff --git a/costa/native/openda/bridge/src/org_openda_costa_CtaInitialize.cpp b/costa/native/bridge/src/org_openda_costa_CtaInitialize.cpp
similarity index 100%
rename from costa/native/openda/bridge/src/org_openda_costa_CtaInitialize.cpp
rename to costa/native/bridge/src/org_openda_costa_CtaInitialize.cpp
diff --git a/costa/native/openda/bridge/src/org_openda_costa_CtaModelState.cpp b/costa/native/bridge/src/org_openda_costa_CtaModelState.cpp
similarity index 100%
rename from costa/native/openda/bridge/src/org_openda_costa_CtaModelState.cpp
rename to costa/native/bridge/src/org_openda_costa_CtaModelState.cpp
diff --git a/costa/native/openda/bridge/src/org_openda_costa_CtaObject.cpp b/costa/native/bridge/src/org_openda_costa_CtaObject.cpp
similarity index 100%
rename from costa/native/openda/bridge/src/org_openda_costa_CtaObject.cpp
rename to costa/native/bridge/src/org_openda_costa_CtaObject.cpp
diff --git a/costa/native/openda/bridge/src/org_openda_costa_CtaObservationDescriptions.cpp b/costa/native/bridge/src/org_openda_costa_CtaObservationDescriptions.cpp
similarity index 100%
rename from costa/native/openda/bridge/src/org_openda_costa_CtaObservationDescriptions.cpp
rename to costa/native/bridge/src/org_openda_costa_CtaObservationDescriptions.cpp
diff --git a/costa/native/openda/bridge/src/org_openda_costa_CtaOpenDaModel.cpp b/costa/native/bridge/src/org_openda_costa_CtaOpenDaModel.cpp
similarity index 100%
rename from costa/native/openda/bridge/src/org_openda_costa_CtaOpenDaModel.cpp
rename to costa/native/bridge/src/org_openda_costa_CtaOpenDaModel.cpp
diff --git a/costa/native/openda/bridge/src/org_openda_costa_CtaParallel.cpp b/costa/native/bridge/src/org_openda_costa_CtaParallel.cpp
similarity index 100%
rename from costa/native/openda/bridge/src/org_openda_costa_CtaParallel.cpp
rename to costa/native/bridge/src/org_openda_costa_CtaParallel.cpp
diff --git a/costa/native/openda/bridge/src/org_openda_costa_CtaRelationTable.cpp b/costa/native/bridge/src/org_openda_costa_CtaRelationTable.cpp
similarity index 100%
rename from costa/native/openda/bridge/src/org_openda_costa_CtaRelationTable.cpp
rename to costa/native/bridge/src/org_openda_costa_CtaRelationTable.cpp
diff --git a/costa/native/openda/bridge/src/org_openda_costa_CtaStochObserver.cpp b/costa/native/bridge/src/org_openda_costa_CtaStochObserver.cpp
similarity index 100%
rename from costa/native/openda/bridge/src/org_openda_costa_CtaStochObserver.cpp
rename to costa/native/bridge/src/org_openda_costa_CtaStochObserver.cpp
diff --git a/costa/native/openda/bridge/src/org_openda_costa_CtaTime.cpp b/costa/native/bridge/src/org_openda_costa_CtaTime.cpp
similarity index 100%
rename from costa/native/openda/bridge/src/org_openda_costa_CtaTime.cpp
rename to costa/native/bridge/src/org_openda_costa_CtaTime.cpp
diff --git a/costa/native/openda/bridge/src/org_openda_costa_CtaTreeVector.cpp b/costa/native/bridge/src/org_openda_costa_CtaTreeVector.cpp
similarity index 100%
rename from costa/native/openda/bridge/src/org_openda_costa_CtaTreeVector.cpp
rename to costa/native/bridge/src/org_openda_costa_CtaTreeVector.cpp
diff --git a/costa/native/openda/bridge/src/org_openda_costa_CtaUtils.cpp b/costa/native/bridge/src/org_openda_costa_CtaUtils.cpp
similarity index 100%
rename from costa/native/openda/bridge/src/org_openda_costa_CtaUtils.cpp
rename to costa/native/bridge/src/org_openda_costa_CtaUtils.cpp
diff --git a/costa/native/openda/bridge/src/org_openda_costa_CtaVector.cpp b/costa/native/bridge/src/org_openda_costa_CtaVector.cpp
similarity index 100%
rename from costa/native/openda/bridge/src/org_openda_costa_CtaVector.cpp
rename to costa/native/bridge/src/org_openda_costa_CtaVector.cpp
diff --git a/costa/native/openda/bridge/src/org_openda_resultwriters_NativeResultWriter.cpp b/costa/native/bridge/src/org_openda_resultwriters_NativeResultWriter.cpp
similarity index 100%
rename from costa/native/openda/bridge/src/org_openda_resultwriters_NativeResultWriter.cpp
rename to costa/native/bridge/src/org_openda_resultwriters_NativeResultWriter.cpp
diff --git a/costa/native/openda/bridge/src/org_openda_resultwriters_NetcdfResultWriterNative.cpp b/costa/native/bridge/src/org_openda_resultwriters_NetcdfResultWriterNative.cpp
similarity index 100%
rename from costa/native/openda/bridge/src/org_openda_resultwriters_NetcdfResultWriterNative.cpp
rename to costa/native/bridge/src/org_openda_resultwriters_NetcdfResultWriterNative.cpp

From 888b7cf5876010fa8911b430f48b94dfeead2f2e Mon Sep 17 00:00:00 2001
From: Werner Kramer <werner.kramer@vortech.nl>
Date: Wed, 13 Sep 2023 12:37:30 +0200
Subject: [PATCH 04/15] Link libraries, add install

---
 costa/native/.gitignore                       |   5 +
 costa/native/CMakeLists.txt                   |  13 +-
 costa/native/bridge/src/CMakeLists.txt        |  14 +-
 costa/native/cta/src/CMakeLists.txt           |  15 +-
 .../native/cta_f90/cta_f90-create-interfaces  | 948 ++++++++++++++++++
 costa/native/cta_f90/generated/CMakeLists.txt |   3 +-
 6 files changed, 985 insertions(+), 13 deletions(-)
 create mode 100644 costa/native/.gitignore
 create mode 100644 costa/native/cta_f90/cta_f90-create-interfaces

diff --git a/costa/native/.gitignore b/costa/native/.gitignore
new file mode 100644
index 000000000..b8d3856e6
--- /dev/null
+++ b/costa/native/.gitignore
@@ -0,0 +1,5 @@
+build/
+install/
+sangoma/
+user_templates/
+
diff --git a/costa/native/CMakeLists.txt b/costa/native/CMakeLists.txt
index a84fb4394..2eb93442e 100644
--- a/costa/native/CMakeLists.txt
+++ b/costa/native/CMakeLists.txt
@@ -1,7 +1,7 @@
 cmake_minimum_required(VERSION 3.9.1)
 
 project(openda)
-#add_subdirectory(libxml)
+
 add_subdirectory(external/blas)
 add_subdirectory(external/lapack)
 
@@ -9,3 +9,14 @@ add_subdirectory(cta/src)
 add_subdirectory(cta_f90/generated)
 add_subdirectory(bridge/src)
 
+file(GLOB CTA_PUBLIC_HEADERS
+    "cta/include/*.h"
+)
+
+install(TARGETS cta cta_f90 opendabridge 
+    LIBRARY DESTINATION lib
+)
+
+install(FILES ${CTA_PUBLIC_HEADERS}
+    DESTINATION include
+)
diff --git a/costa/native/bridge/src/CMakeLists.txt b/costa/native/bridge/src/CMakeLists.txt
index 7e4c9e1b7..d96444e7b 100644
--- a/costa/native/bridge/src/CMakeLists.txt
+++ b/costa/native/bridge/src/CMakeLists.txt
@@ -16,11 +16,11 @@ set(SOURCES
 
 add_library(opendabridge SHARED ${SOURCES})
 
-
-target_include_directories(opendabridge PUBLIC ../include)
-target_include_directories(opendabridge PUBLIC ${CMAKE_SOURCE_DIR}/cta/include)
-target_include_directories(opendabridge PUBLIC ${CMAKE_SOURCE_DIR}/external)
-target_include_directories(opendabridge PUBLIC ${LIBXML2_INCLUDE_DIR}) 
-target_include_directories(opendabridge PUBLIC ${JNI_INCLUDE_DIRS})
-#target_link_libraries(cta INTERFACE libxml)
+target_include_directories(opendabridge PRIVATE ../include)
+target_include_directories(opendabridge PRIVATE ${CMAKE_SOURCE_DIR}/cta/include)
+target_include_directories(opendabridge PRIVATE ${CMAKE_SOURCE_DIR}/external)
+target_include_directories(opendabridge PRIVATE ${LIBXML2_INCLUDE_DIR}) 
+target_include_directories(opendabridge PRIVATE ${JNI_INCLUDE_DIRS})
+target_link_libraries(opendabridge PRIVATE ${LIBXML2_LIBRARIES})
+target_link_libraries(opendabridge PRIVATE cta)
 
diff --git a/costa/native/cta/src/CMakeLists.txt b/costa/native/cta/src/CMakeLists.txt
index d225b4c14..72c8a1ef7 100644
--- a/costa/native/cta/src/CMakeLists.txt
+++ b/costa/native/cta/src/CMakeLists.txt
@@ -17,10 +17,17 @@ set(SOURCES
 )
 
 add_library(cta SHARED ${SOURCES})
+file(GLOB HEADER_FILES
+  "../include/*.h"
+)
+
+set(CTA_PUBLIC_HEADERS ${HEADER_FILES})
 
+target_include_directories(cta PRIVATE ../include)
+target_include_directories(cta PRIVATE ${CMAKE_SOURCE_DIR}/external)
+target_include_directories(cta PRIVATE ${LIBXML2_INCLUDE_DIR}) 
+target_link_libraries(cta PRIVATE ${LIBXML2_LIBRARIES})
+target_link_libraries(cta PRIVATE blas)
+target_link_libraries(cta PRIVATE lapack)
 
-target_include_directories(cta PUBLIC ../include)
-target_include_directories(cta PUBLIC ${CMAKE_SOURCE_DIR}/external)
-target_include_directories(cta PUBLIC ${LIBXML2_INCLUDE_DIR}) 
-#target_link_libraries(cta INTERFACE libxml)
 
diff --git a/costa/native/cta_f90/cta_f90-create-interfaces b/costa/native/cta_f90/cta_f90-create-interfaces
new file mode 100644
index 000000000..aadb5b844
--- /dev/null
+++ b/costa/native/cta_f90/cta_f90-create-interfaces
@@ -0,0 +1,948 @@
+#! /usr/bin/env python
+
+# $URL: https://repos.deltares.nl/repos/openda/openda_1/public/trunk/core/native/src/cta/cta_vector.c $
+# $Revision: 2751 $, $Date: 2011-09-09 08:58:46 +0200 (Fri, 09 Sep 2011) $
+# 
+# COSTA: Problem solving environment for data assimilation
+# Copyright (C) 2012  Arjo Segers
+# 
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+# 
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+# 
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+
+"""
+Generate Fortran90 interfaces from function headers.
+Tool for OpenDA native core (COSTA).
+Uses: explicit and implicit tags:
+ implicit: header files containing usr, methods containing CTAI
+ explicit: CTAEXPORT to export or CTANOEXPORT or CTANOF90 to ingnore.
+"""
+
+# modules:
+import os
+import shutil
+
+
+########################################################################
+###
+### routines
+###
+########################################################################
+
+
+def cta_f90_interface( procname, procargs, comments ) :
+
+    """
+    Return list with Fortran90 source lines with the interface to C function.
+    
+    Arguments:
+      procname
+        Procedure name.
+      procargs
+        List with function argument descriptions. 
+        Each item of the list is a dictionairy that of the form:
+          { 'name'   : 'values',    # variable name
+            'type'   : 'int',       # data type
+            'intent' : 'in',        # intent
+            'dims'   : '(*)' }      # array shape (if relevant)
+      comments
+        List with the original DoxyGen comment in the C header file.
+    """
+    
+    # target name:
+    newprocname = procname.replace('CTA','CTA_F90')
+    
+    # create argument list:
+    argsline = ''
+    for iarg in range(len(procargs)) :
+        farg = procargs[iarg]
+        if iarg > 0 : argsline = argsline+', '
+        argsline = argsline+farg['name']
+    #endfor
+    
+    # init list with required parameters:
+    params = []
+    # scan arguments:
+    for arg in procargs :
+        # search for known parameters:
+        for param in ['CTA_HANDLE_IKIND','CTA_TIME_RKIND'] :
+            # needed for this type ?
+            if param in arg['type'] :
+                # add if not present yet:
+                if param not in params : params.append(param)
+            #endif
+        #endfor  # known parameters
+    #endfor  # arguments
+    
+    # void arguments ?
+    with_void_args = False
+    for arg in procargs :
+        if 'void' in arg['type'] :
+            with_void_args = True
+            break
+        #endif
+    #endfor
+            
+    
+    # init ifcult:
+    ifc = []
+
+    # indent etc:
+    indent = '  '
+    # add original comment:
+    for comment in comments :
+        ifc.append( indent+'! %s' % comment )
+    #endfor
+    # extra seperation comment:
+    ifc.append( indent+'!' )
+    # comment ?
+    if with_void_args : indent = '  !'
+    # start interface:
+    ifc.append( indent+'interface %s' % newprocname )
+    # routine header:
+    ifc.append( indent+'  subroutine %s( %s )' % (procname,argsline) )
+    # use parameters if necessary:
+    for param in params :
+        ifc.append( indent+'    use CTA_F90_Parameters, only : %s' % param )
+    #endfor
+    # loop over arguments:
+    for arg in procargs :
+        # add argument line:
+        ifc.append( indent+'    %-30s, intent(%-5s)     ::  %s%s' % (arg['type'],arg['intent'],arg['name'],arg['dims']) )
+    #endfor
+    # end of routine interface:
+    ifc.append( indent+'  end subroutine %s' % procname )
+    # end of interface:
+    ifc.append( indent+'end interface' )
+    ## add external declaration if void arguments were present ..
+    #if with_void_args : ifc.append( '  external   ::  %s' % procname )
+    ifc.append( '' )
+    
+    # ok
+    return ifc,[]
+    
+#enddef  # cta_f90_interface
+
+
+# ***
+
+
+def cta_f90_interface_generic( procname, procargs, comments, specials ) :
+
+    """
+    Return list with Fortran90 source lines with the interface to C function.
+
+    Implement a number of specific routines for different argument data types,
+    for example 'integer', 'real(4)', 'real(8)', etc.
+    Add a generic interface to collect the specific routines.
+    
+    Arguments:
+      procname
+        Procedure name.
+      procargs
+        List with function argument descriptions. 
+        Each item of the list is a dictionairy that of the form:
+          { 'name'   : 'values',    # variable name
+            'type'   : 'int',       # data type
+            'intent' : 'in',        # intent
+            'dims'   : '(*)' }      # array shape (if relevant)
+      comments
+        List with the original DoxyGen comment in the C header file.
+    
+    """
+    
+    # target name:
+    newprocname = procname.replace('CTA','CTA_F90')
+    
+    # current specials:
+    if procname in specials.keys() :
+        special = specials[procname]
+    else :
+        special = {}
+    #endif
+
+    ## info ...
+    #print 'xxx3 special = ', special
+    
+    # dummy:
+    argname_nval = 'no-nval'
+    argname_datatype = 'no-datatype'
+    multishape = False
+    # create argument lists:
+    argsline = ''
+    argsline_call = ''
+    # loop over initial arguments:
+    for iarg in range(len(procargs)) :
+        # current:
+        farg = procargs[iarg]
+        # extract:
+        argname = farg['name']
+        ## info ...
+        #print 'xxx4 testing argument "%s" ...' % argname
+        # add to line with arguments to called routine:
+        if len(argsline_call) > 0 : argsline_call = argsline_call+', '
+        argsline_call = argsline_call+argname
+        # generic argument ?
+        if ('generic' in special.keys()) and (argname == special['generic']) :
+            ## info ...
+            #print '  x4 generic variable ...'
+            # multi shape dimension ?
+            if (argname+'_dims' in special.keys()) and (special[argname+'_dims'] == '(*)') :
+                #print '  x4 multi shape ...'
+                multishape = True
+            #endif
+        #endif
+        # 'nval' is not needed, implied by specific routine:
+        if argname == 'nval' :
+            # store for later usage:
+            argname_nval = argname
+            # next:
+            continue
+        #endif
+        # 'datatype' is not needed, implied by specific routine:
+        if argname == 'datatype' :
+            # store for later usage:
+            argname_datatype = argname
+            # next:
+            continue
+        #endif
+        # add to line:
+        if len(argsline) > 0 : argsline = argsline+', '
+        argsline = argsline+argname
+    #endfor
+    
+    # init list with required parameters:
+    params = []
+    # scan arguments:
+    for arg in procargs :
+        # search for known parameters:
+        for param in ['CTA_HANDLE_IKIND','CTA_TIME_RKIND'] :
+            # needed for this type ?
+            if param in arg['type'] :
+                # add if not present yet:
+                if param not in params : params.append(param)
+            #endif
+        #endfor  # known parameters
+    #endfor  # arguments
+    
+    # init ifcult:
+    ifc = []
+    rtn = []
+    
+    # generic types:
+    gtypes = ['integer','real4','real8']
+    if procname == 'CTA_Vector_SetVal' : gtypes.append('char')
+
+    # add original comment:
+    for comment in comments :
+        ifc.append( '  ! %s' % comment )
+    #endfor
+    # extra seperation comment:
+    ifc.append( '  !' )
+    # start interface:
+    ifc.append( '  interface %s' % newprocname )
+    # loop over types:
+    for gtype in gtypes :
+
+        ## info ...
+        #print '      xxx generic type : ', gtype, '; multishape ', multishape
+        
+        # set:
+        if gtype == 'integer' :
+            spectype = 'integer'
+            cta_kind = 'CTA_INTEGER'
+        elif gtype == 'real4' : 
+            spectype = 'real(4)'
+            cta_kind = 'CTA_REAL'
+        elif gtype == 'real8' : 
+            spectype = 'real(8)'
+            cta_kind = 'CTA_DOUBLE'
+        elif gtype == 'char' : 
+            spectype = 'character(len=*)'
+            cta_kind = 'CTA_STRING'
+        else :
+            print 'ERROR - generic type "%s" not supported' % gtype
+            raise Exception
+        #endif
+        
+        # multiple shapes ?
+        if multishape and (gtype != 'char') :
+        
+            # loop over suported ranks:
+            for rank in range(1,8) :
+        
+                # specific routine:
+                rname = '%s_%s_%id' % (procname,spectype.replace('(','').replace(')',''),rank)
+
+                # add line to interface:
+                ifc.append( '    module procedure %s' % rname )
+
+                # routine header:
+                rtn.append( '    subroutine %s( %s )' % (rname,argsline) )
+                # use parameters if necessary:
+                for param in params :
+                    rtn.append( '      use CTA_F90_Parameters, only : %s' % param )
+                #endfor
+                # use kind parameter:
+                rtn.append( '      use CTA_F90_Parameters, only : %s' % cta_kind )
+                # loop over arguments:
+                for arg in procargs :
+                    # extract:
+                    argname = arg['name']
+                    argtype = arg['type']
+                    argdims = arg['dims']
+                    # skip some ...
+                    if argname == argname_nval     : continue
+                    if argname == argname_datatype : continue
+                    # change to specific type:
+                    if argtype == 'void' : argtype = spectype
+                    # adhoc ...
+                    if argname+'_dims' in special.keys() : argdims = special[argname+'_dims']
+                    # replace:
+                    if argdims == '(*)' :
+                        for k in range(rank) :
+                            if k == 0 :
+                                argdims = '(:'
+                            else :
+                                argdims = argdims+',:'
+                            #endif
+                        #endfor
+                        argdims = argdims+')'
+                    #endif
+                    ## info ...
+                    #print '        xxx argument : ', argname, argdims
+                    # add argument line:
+                    rtn.append( '      %-30s, intent(%-5s)     ::  %s%s' % (argtype,arg['intent'],arg['name'],argdims) )
+                #endfor
+                # add call to C-routine:
+                rtn.append( '      call %s( %s )' % (procname,argsline_call.replace(argname_datatype,cta_kind).replace(argname_nval,'size(val)')) )
+                # end of routine interface:
+                rtn.append( '    end subroutine %s' % rname )
+                rtn.append( '' )
+                
+            #endfor  # ranks
+            
+        else :
+
+            # specific routine:
+            rname = '%s_%s' % (procname,gtype)
+
+            # add line to interface:
+            ifc.append( '    module procedure %s' % rname )
+
+            # routine header:
+            rtn.append( '    subroutine %s( %s )' % (rname,argsline) )
+            # use parameters if necessary:
+            for param in params :
+                rtn.append( '      use CTA_F90_Parameters, only : %s' % param )
+            #endfor
+            # use kind parameter:
+            rtn.append( '      use CTA_F90_Parameters, only : %s' % cta_kind )
+            # other, not always necessary actually:
+            rtn.append( '      use CTA_F90_Parameters, only : CTA_OK' )
+            # loop over arguments:
+            for arg in procargs :
+                # extract:
+                argname = arg['name']
+                argtype = arg['type']
+                argdims = arg['dims']
+                # skip some ...
+                if argname == argname_nval     : continue
+                if argname == argname_datatype : continue
+                # change to specific type:
+                if argtype == 'void' : argtype = spectype
+                # adhoc ...
+                if argname+'_dims' in special.keys() : argdims = special[argname+'_dims']
+                # add argument line:
+                rtn.append( '      %-30s, intent(%-5s)     ::  %s%s' % (argtype,arg['intent'],arg['name'],argdims) )
+            #endfor
+            
+            # switch:
+            if (procname == 'CTA_Vector_SetVal') and (gtype == 'char') :
+            
+                # add calls to C-routines:
+                rtn.append( '      integer(CTA_HANDLE_IKIND)  ::  sval' )
+                rtn.append( '      call CTA_String_Create( sval, status )' )
+                rtn.append( '      if (status/=CTA_OK) return' )
+                rtn.append( '      call CTA_String_Set( sval, val, status )' )
+                rtn.append( '      if (status/=CTA_OK) return' )
+                rtn.append( '      call %s( %s )' % (procname,argsline_call.replace('val','sval').replace(argname_datatype,cta_kind).replace(argname_nval,'size(val)')) )
+                rtn.append( '      if (status/=CTA_OK) return' )
+                rtn.append( '      call CTA_String_Free( sval, status )' )
+                rtn.append( '      if (status/=CTA_OK) return' )
+                
+            else :
+
+                # add call to C-routine:
+                rtn.append( '      call %s( %s )' % (procname,argsline_call.replace(argname_datatype,cta_kind).replace(argname_nval,'size(val)')) )
+                
+            #endif
+
+            # end of routine interface:
+            rtn.append( '    end subroutine %s' % rname )
+            rtn.append( '' )
+            
+        #endif  # multiple shapes
+
+    #endfor  # generic types
+    # end of interface:
+    ifc.append( '  end interface' )
+    ifc.append( '' )
+
+    # add routine for 'handle' type ?
+    if procname in ['CTA_Vector_GetVal','CTA_Vector_SetVal'] :
+
+        # specific routines:
+        #   CTA_Vector_GetVal   ->   CTA_F90_Vector_GetHandle
+        #   CTA_Vector_SetVal   ->   CTA_F90_Vector_SetHandle
+        rname = procname.replace('CTA_','CTA_F90_').replace('Val','Handle')
+        # which kind ?
+        spectype = 'integer'
+        cta_kind = 'CTA_HANDLE'
+
+        # routine header:
+        rtn.append( '    subroutine %s( %s )' % (rname,argsline) )
+        # use parameters if necessary:
+        for param in params :
+            rtn.append( '      use CTA_F90_Parameters, only : %s' % param )
+        #endfor
+        # use kind parameter:
+        rtn.append( '      use CTA_F90_Parameters, only : %s' % cta_kind )
+        # other
+        rtn.append( '      use CTA_F90_Parameters, only : CTA_OK' )
+        # loop over arguments:
+        for arg in procargs :
+            # extract:
+            argname = arg['name']
+            argtype = arg['type']
+            argdims = arg['dims']
+            # skip some ...
+            if argname == argname_nval     : continue
+            if argname == argname_datatype : continue
+            # change to specific type:
+            if argtype == 'void' : argtype = spectype
+            # adhoc ...
+            if argname+'_dims' in special.keys() : argdims = special[argname+'_dims']
+            # add argument line:
+            rtn.append( '      %-30s, intent(%-5s)     ::  %s%s' % (argtype,arg['intent'],arg['name'],argdims) )
+        #endfor
+
+        # add call to C-routine:
+        rtn.append( '      call %s( %s )' % (procname,argsline_call.replace(argname_datatype,cta_kind).replace(argname_nval,'size(val)')) )
+
+        # end of routine interface:
+        rtn.append( '    end subroutine %s' % rname )
+        rtn.append( '' )
+
+    #endif # handle type
+
+    # ok
+    return ifc,rtn
+    
+#enddef  # cta_f90_interface_generic
+
+
+# ***
+
+        
+def cta_module_from_hfile( hfile, modname, specials ) :
+
+    """
+    Create F90 module from C header file.
+    """
+
+    # basename:
+    bname = os.path.basename(hfile).rstrip('.h')
+
+    # read file:
+    f = open( hfile, 'r' )
+    lines = f.readlines()
+    f.close()
+
+    # start of function header:
+    fheader_comment_start = '/** \\brief'
+
+    # init results:
+    interface_lines = []
+    routine_lines = []
+
+    # loop over lines:
+    while len(lines) > 0 :
+
+        # next line:
+        line = lines.pop(0).strip()
+
+        # headers start with DoxyGen desxription:
+        if not line.startswith(fheader_comment_start) : continue
+
+        # collect comment:
+        comments = [ line.replace('/**','',1) ]
+        while True :
+            # next line:
+            line = lines.pop(0).strip()
+            # end of comment ? then leave:
+            if line.startswith('*/') : break
+            # add:
+            comments.append(line.replace('*','',1))
+        #endwhile
+
+        # collect function definition;
+        # first get next line:
+        line = lines.pop(0).strip()
+        # remove trailing comment:  xxx /* ... */
+        if '/*' in line : line,cmnt = line.split('/*')
+        #  init function line:
+        fline = line
+        
+        ## testing ...
+        ##testing = 'CTA_TreeVector_GetVals'
+        #testing = 'CTA_TreeVector_SetVals'
+        #if testing not in fline : continue
+        #print 'TESTING "%s" ...' % testing
+
+        # info ...
+        print '  found procedure line: %s ...' % fline
+        
+        # skip pre-processor functions:
+        if fline.startswith('#define') :
+            print '    --> pre-processor function; skip ...'
+            continue
+        #endif
+        # add rest:
+        while not fline.endswith(';') :
+            # next line:
+            line = lines.pop(0).strip()
+            # remove trailing comment:  xxx /* ... */
+            if '/*' in line : line,cmnt = line.split('/*')
+            # add:
+            fline = fline+line
+        #endwhile
+        # split in function result/name and arguments:
+        ret_and_name,argline = fline.rstrip(');').split('(')
+        # split in result and name:
+        procret,procname = ret_and_name.rsplit(None,1)
+        # info:
+        print '    name : %s' % procname
+        print '    args : %s' % argline
+        print '    ret  : %s' % procret
+        
+        # no internal routines:
+        if procname.startswith('CTAI_') :
+            print '    --> internal procedure, skip ...'
+            continue
+        #endif
+
+        # no usr modules:
+        if hfile.find('usr_')>=0 :
+            print '    --> example procedure, skip ...'
+            continue
+        #endif
+        
+        # explicitly skip this ?
+        if (procname in specials.keys()) and specials[procname].has_key('skip') :
+            print '    --> special, skip this procedure ...'
+            continue
+        #endif
+        
+        # by default no return status assumed:
+        with_return_status = False
+        # only certain return values supported ...
+        if procret == 'CTAEXPORT int' :
+            # set flag:
+            with_return_status = True
+        elif procret == 'CTAEXPORT void' :
+            # noting special ..
+            pass
+        elif procret.startswith('CTANOF90') :
+            # skip because of this label
+            print '    --> CTANOEXPORT found, skip'
+            continue
+        elif procret.startswith('CTANOEXPORT') :
+            # skip because of this label
+            print '    --> CTANOEXPORT found, skip'
+            continue
+        else :
+            # unknown ..
+            print '    --> ERROR: unsupported return value type, skip ...'
+            raise Exception
+        #endif
+
+        # add status argument ?
+        if with_return_status :
+            # extend argument line:
+            if len(argline) > 0 : argline = argline+', '
+            argline = argline+'int status'
+            # loop over comment lines to search for '\return' tag;
+            # note that if no such tag is found, an error will be raised
+            # later on since no '\param status' description is present
+            for icomment in range(len(comments)) :
+                # contains \return key ?
+                if '\\return' in comments[icomment] :
+                    # change return status from function result to argument:
+                    comments[icomment] = comments[icomment].replace('\\return','\\param status O')
+                    # leave loop over comments:
+                    break
+                #endif
+            #endfor  # comment lines
+        #endif  # return status argument
+
+        # extract argument info from comment:  
+        #    argis['data']{'intent'      : 'in',
+        #                  'description' : 'input data'}
+        argis = {}
+        # loop over 
+        for comment in comments :
+            # cleanup:
+            comment = comment.strip()
+            # only the parameter descriptions:
+            if not comment.startswith('\param') : continue
+            # split:
+            try :
+                tag,name,ichar,description = comment.split(None,3)
+            except :
+                print 'ERROR - could not split comment into tag, name intent, and description :'
+                print 'ERROR - comment line : %s' % comment
+                print 'ERROR - file         : %s' % hfile
+                raise Exception
+            # convert intent:
+            ichar = ichar.lower()
+            if ichar in ['i','(i)','(input)'] :
+                intent = 'in'
+            elif ichar in ['o','(o)','(output)'] :
+                intent = 'out'
+            elif ichar in ['io','(io)','(input/output)'] :
+                intent = 'inout'
+            else :
+                print 'ERROR - could not translate intent "%s"' % ichar
+                print 'ERROR - comment line : %s' % comment
+                print 'ERROR - file         : %s' % hfile
+                raise Exception
+            #endif
+            # store:
+            argis[name] = { 'intent' : intent, 'description' : description }
+        #endfor
+
+        # init list with fortan argument descriptions:
+        fargs = []
+        # any arguments at all ?
+        if len(argline) > 0 :
+            # split into c-arguments:
+            cargs = argline.split(',')
+            # translate into fortran arguments:
+            for carg in cargs :
+                # cleanup:
+                carg = carg.strip()
+                ## remove 'const' part:
+                #if carg.startswith('const') : carg = carg[6:]
+                # split into type and name:
+                try :
+                    argtype,argname = carg.rsplit(None,1)
+                except :
+                    print 'ERROR - could not split carg "%s"' % carg
+                    raise Exception
+                #endtry
+                # dereferenced point ? remove starts:
+                if argname.startswith('**') : argname = argname[2:]
+                # pointer values ?
+                ispointer = argname.startswith('*')
+                if ispointer : argname = argname[1:]
+                # translate type:
+                ctype = argtype
+                if ctype in ['char','const char','char*','const char*'] :
+                    ftype = 'character(len=*)'
+                elif ctype in ['int','const int','int*'] :
+                    ftype = 'integer'
+                elif ctype == 'double' :
+                    if 'time' in procname.lower() :
+                        ftype = 'real(CTA_TIME_RKIND)'
+                    else :
+                        ftype = 'real(8)'
+                    #endif
+                elif ctype in ['CTA_Datatype','BOOL','FILE'] :
+                    ftype = 'integer'
+                elif ctype.startswith('CTA_') or ctype.startswith('const CTA_') :
+                    ftype = 'integer(CTA_HANDLE_IKIND)'
+                elif ctype in ['void','const void'] :
+                    ftype = 'void'
+                else :
+                    print 'ERROR - could not translate C-type : %s' % ctype
+                    raise Exception
+                #endif
+                # cleanup name:
+                argname = argname.replace('*','')
+                # array ?
+                if '[' in argname :
+                    argname,dimlist = argname.rstrip(']').split('[')
+                    ndim = dimlist.count(',')+1
+                    argdims = '('
+                    for idim in range(ndim) :
+                        if idim > 0 : argdims = argdims+','
+                        argdims = argdims+'*'
+                    #endfor
+                    argdims = argdims+')'
+                else :
+                    argdims = ''
+                #endif
+                # check ..
+                if argname not in argis.keys() :
+                    print 'ERROR - no argument description found for "%s"' % argname
+                    print 'ERROR - procedure : %s' % procname
+                    print 'ERROR - filename  : %s' % hfile
+                    raise Exception
+                #endif
+                # extract intent:
+                intent = argis[argname]['intent']
+                # input pointer ? probably an array:
+                if ispointer and (intent == 'in') and \
+                     (ctype not in ['const char','FILE']) :
+                    argdims = '(*)'
+                #endif
+                # add:
+                fargs.append( { 'type' : ftype, 'intent' : intent, 'name' : argname, 'dims' : argdims } )
+            #endfor  # arguments
+        #endif  # any arguments at all
+        
+        # get f90 interface lines:
+        if (procname in specials.keys()) and ('generic' in specials[procname]) :
+            ## info ...
+            #print 'TESTING: create generic interfaces'
+            # convert:
+            interface,routines = cta_f90_interface_generic( procname, fargs, comments, specials )
+        else :
+            ## info ...
+            #print 'TESTING: create interface'
+            # convert:
+            interface,routines = cta_f90_interface( procname, fargs, comments )
+        #endif
+        
+        ## testing ...
+        #if procname == 'CTA_File_Set' : 
+        #    for iline in ilines : print iline
+        #    print ''
+        #    raise Exception
+        ##endif
+
+        # add:
+        if len(interface) > 0 : interface_lines.extend( interface )
+
+        # add:
+        if len(routines) > 0 : routine_lines.extend( routines )
+
+    #endwhile
+    
+    # init empty module:
+    mlines = []
+    # interfaces defined ?
+    if (len(interface_lines) > 0) or (len(routine_lines) > 0) :
+        # add module header:
+        mlines.append( 'module %s' % modname )
+        mlines.append( '' )
+        mlines.append( '  implicit none' )
+        mlines.append( '' )
+        mlines.append( '  public' )
+        mlines.append( '' )
+        if len(interface_lines) :
+            mlines.extend(interface_lines)
+        #endif
+        mlines.append( '' )
+        if len(routine_lines) > 0 :
+            mlines.append( 'contains' )
+            mlines.append( '' )
+            mlines.extend( routine_lines )
+        mlines.append( 'end module %s' % modname )
+        mlines.append( '' )
+    #endif
+    
+    # ok
+    return mlines
+
+#enddef  # cta_module_from_hfile
+
+
+####################################################################
+###
+### main program
+###
+####################################################################
+
+if __name__ == "__main__" :
+
+    # info ...
+    print ''
+    print '** genenerate cta_f90 interface **'
+    print ''
+
+    # directory with include files:
+    #hdir = 'openda_1.0/public/core/native/include'
+    #hdir = 'openda_1.1_beta/core/native/include/'
+    #hdir = 'openda_2.0/core/native/include/'
+    #hdir = 'openda_2.x/core/native/include/'
+    hdir = '../../include'
+    
+    # extra source files, mainly templates:
+    srcdir = '.'
+    
+    # Adhoc fixes ..
+    # Fill a dictionairy with CTA routine names for which something should be fixed.
+    # Assign a dictionairy to this with one or more of the following key/value pairs:
+    #
+    #   "generic" : <variable>       # <variable> is generic, thus integer, real, etc.
+    #
+    #   "<variable>_dims" : <shape>  # dimensions of <variable> are not explicitly
+    #                                # mentioned in header file, therefore specify them here
+    specials = {}
+    specials['CTA_Vector_GetVal'         ] = { 'generic' : 'vals', 'val_dims'  : ''    } # should have been "val" ...
+    specials['CTA_Vector_GetVals'        ] = { 'generic' : 'vals', 'vals_dims' : '(*)' }
+    specials['CTA_Vector_SetVal'         ] = { 'generic' : 'val' , 'val_dims'  : ''    }
+    specials['CTA_Vector_SetVals'        ] = { 'generic' : 'vals', 'vals_dims' : '(*)' }
+#    specials['CTA_Vector_AppendVal'      ] = { 'generic' : 'val' , 'val_dims'  : ''    }
+    specials['CTA_Vector_AppendVal'      ] = { 'skip' : True }
+    specials['CTA_Vector_SetConstant'    ] = { 'generic' : 'val' , 'val_dims'  : ''    }
+    specials['CTA_TreeVector_GetVal'     ] = { 'generic' : 'val' , 'val_dims'  : ''    }
+    specials['CTA_TreeVector_GetVals'    ] = { 'generic' : 'val' , 'val_dims'  : '(*)' }
+    specials['CTA_TreeVector_SetVal'     ] = { 'generic' : 'val' , 'val_dims'  : ''    }
+    specials['CTA_TreeVector_SetVals'    ] = { 'generic' : 'val' , 'val_dims'  : '(*)' } # should have been "vals" ...
+    specials['CTA_TreeVector_AppendVal'  ] = { 'generic' : 'val' , 'val_dims'  : ''    }
+    specials['CTA_TreeVector_SetConstant'] = { 'generic' : 'val' , 'val_dims'  : ''    }
+    specials['CTA_Pack_Add'              ] = { 'generic' : 'data', 'data_dims' : '(*)' }
+    specials['CTA_Pack_Get'              ] = { 'generic' : 'data', 'data_dims' : '(*)' }
+    
+    # destination:
+    mdir = 'generated'
+    
+    # create destination directory if necessary:
+    if not os.path.isdir(mdir) : os.makedirs(mdir)
+    
+    # list of created modules:
+    modnames = []
+    
+    # info ...
+    print 'scan "%s" for header files ...' % hdir
+
+    # loop over all files:
+    for fname in os.listdir(hdir) :
+
+        # only header files ...
+        if not fname.startswith('cta_') : continue
+        if not fname.endswith('.h') : continue
+        
+        ## testing ...
+        #if fname != 'cta_treevector.h' : continue
+        #print 'TESTING: only file "%s" ...' % fname
+    
+        ## info ...
+        #print '  header file "%s" ...' % fname
+        
+        # full path:
+        hfile = os.path.join(hdir,fname)
+
+        # split:
+        bname,ext = os.path.splitext(fname)
+        # target module name:
+        modname = bname.replace('cta_','cta_f90_')
+        
+        # avoid problems with module names that are the same as one of their routines:
+        if modname in ['cta_f90_flush'] :
+            modname = modname+'_mod'
+        #endif
+
+        # info ...
+        print ''
+        print 'processing %s ...' % hfile
+
+        # get module lines:
+        mlines = cta_module_from_hfile( hfile, modname, specials )
+
+        # write ?
+        if len(mlines) > 0 :
+            # name of module file:
+            mfile = os.path.join( mdir, modname+'.f90' )
+            # write:
+            f = open( mfile, 'w' )
+            for mline in mlines :
+                f.write( '%s\n' % mline )
+            #endfor
+            f.close()
+            # add to collection:
+            modnames.append( modname )
+        else :
+            print '  (no procedures found)'
+        #endif
+
+    #endfor  # filenames
+    
+    # collection module:
+    mname = 'cta_f90'
+    # info ...
+    print ''
+    print 'create collection module %s ...' % mname
+    #
+    # copy parameter module:
+    pfile = 'cta_f90_parameters.f90'
+    print '  copy %s ...' % pfile
+    shutil.copy( os.path.join(srcdir,pfile), os.path.join(mdir,pfile) )
+    #
+    # copy makefile
+    #mfile = 'cta_f90.make'
+    #print '  copy %s ...' % mfile
+    #shutil.copy( os.path.join(srcdir,mfile), os.path.join(mdir,'Makefile') )
+    #
+    # read contained routines:
+    incfile = 'cta_f90_contains.inc'
+    print '  read %s ...' % incfile
+    f = open( os.path.join(srcdir,incfile) )
+    lines = f.readlines()
+    f.close()
+    # cleanup:
+    inclines = []
+    for line in lines :
+        inclines.append( line.rstrip() )
+    #endfor
+    #
+    # fill lines:
+    print '  fill main module ...'
+    mlines = []
+    mlines.append( 'module %s' % mname )
+    mlines.append( '' )
+    mlines.append( '  use cta_f90_parameters' )
+    for modname in modnames :
+        mlines.append( '  use %s' % modname )
+    #endfor
+    mlines.append( '' )
+    mlines.append( '  implicit none' )
+    mlines.append( '' )
+    mlines.append( '  public' )
+    mlines.append( '' )
+    mlines.append( 'contains' )
+    mlines.append( '' )
+    mlines.extend( inclines )
+    mlines.append( '' )
+    mlines.append( 'end module %s' % mname )
+    # target file:
+    mfile = os.path.join( mdir, mname+'.f90' )
+    # write:
+    print '  write %s ...' % mfile
+    f = open( mfile, 'w' )
+    for mline in mlines :
+        f.write( '%s\n' % mline )
+    #endfor
+    f.close()
+    
+    # ok
+    print ''
+    print 'End.'
+    print ''
+
+#endif   # __main__
+
diff --git a/costa/native/cta_f90/generated/CMakeLists.txt b/costa/native/cta_f90/generated/CMakeLists.txt
index e94c5652e..7b24ac3e7 100644
--- a/costa/native/cta_f90/generated/CMakeLists.txt
+++ b/costa/native/cta_f90/generated/CMakeLists.txt
@@ -15,5 +15,6 @@ cta_f90_initialise.f90  cta_f90_model_factory.f90  cta_f90_string.f90
 )
 
 add_library(cta_f90 SHARED ${SOURCES})
-target_include_directories(cta_f90 PUBLIC ../include)
+target_include_directories(cta_f90 PRIVATE ../include)
+target_link_libraries(cta_f90 PRIVATE cta)
 

From 35fb2f8793f61ae84b93b7a7c137b4aee2c375eb Mon Sep 17 00:00:00 2001
From: Werner Kramer <werner.kramer@vortech.nl>
Date: Wed, 13 Sep 2023 14:08:18 +0200
Subject: [PATCH 05/15] Add blas and lapack to install targets

---
 costa/native/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/costa/native/CMakeLists.txt b/costa/native/CMakeLists.txt
index 2eb93442e..a4e393e6e 100644
--- a/costa/native/CMakeLists.txt
+++ b/costa/native/CMakeLists.txt
@@ -13,7 +13,7 @@ file(GLOB CTA_PUBLIC_HEADERS
     "cta/include/*.h"
 )
 
-install(TARGETS cta cta_f90 opendabridge 
+install(TARGETS blas lapack cta cta_f90 opendabridge
     LIBRARY DESTINATION lib
 )
 

From 7aeb8658d10125f0cc49baafd9dd0b52637fd860 Mon Sep 17 00:00:00 2001
From: Werner Kramer <werner.kramer@vortech.nl>
Date: Wed, 13 Sep 2023 15:02:17 +0200
Subject: [PATCH 06/15] Add missing openda bridge .c and .h files

---
 costa/native/bridge/src/CMakeLists.txt        |   2 +
 .../src/jni_cta_CtaObsdescr_NativeToJava.c    | 258 ++++++++++++++++++
 .../src/jni_cta_CtaObsdescr_NativeToJava.h    |  33 +++
 costa/native/bridge/src/jni_cta_utils.h       |  45 +++
 costa/native/bridge/src/jni_datatypes.h       |  31 +++
 5 files changed, 369 insertions(+)
 create mode 100644 costa/native/bridge/src/jni_cta_CtaObsdescr_NativeToJava.c
 create mode 100644 costa/native/bridge/src/jni_cta_CtaObsdescr_NativeToJava.h
 create mode 100644 costa/native/bridge/src/jni_cta_utils.h
 create mode 100644 costa/native/bridge/src/jni_datatypes.h

diff --git a/costa/native/bridge/src/CMakeLists.txt b/costa/native/bridge/src/CMakeLists.txt
index d96444e7b..843749bbc 100644
--- a/costa/native/bridge/src/CMakeLists.txt
+++ b/costa/native/bridge/src/CMakeLists.txt
@@ -4,6 +4,8 @@ find_package(JNI REQUIRED)
 
 set(LIBRARY_OUTPUT_PATH  ${CMAKE_BINARY_DIR}/lib)
 set(SOURCES
+  jni_cta_CtaObsdescr_NativeToJava.c               jni_cta_CtaObsdescr_NativeToJava.h
+  jni_cta_utils.h                                  jni_datatypes.h
   jni_cta_utils.cpp                                org_openda_costa_CtaRelationTable.cpp
   org_openda_costa_CtaArray.cpp                    org_openda_costa_CtaStochObserver.cpp
   org_openda_costa_CtaInitialize.cpp               org_openda_costa_CtaTime.cpp
diff --git a/costa/native/bridge/src/jni_cta_CtaObsdescr_NativeToJava.c b/costa/native/bridge/src/jni_cta_CtaObsdescr_NativeToJava.c
new file mode 100644
index 000000000..a331b2210
--- /dev/null
+++ b/costa/native/bridge/src/jni_cta_CtaObsdescr_NativeToJava.c
@@ -0,0 +1,258 @@
+/*
+$URL: https://repos.deltares.nl/repos/openda/trunk/costa/src/cta/cta_obsdescr_combine.c $
+$Revision: 671 $, $Date: 2008-10-07 14:49:42 +0200 (Tue, 07 Oct 2008) $
+
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2010  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+/**
+\file  cta_handles.h
+\brief Implemenation of a generic OpenDA observation description wrapper for use in native COSTA code 
+*/
+
+
+#include <string.h>
+#include <stdio.h>
+#include "cta.h"
+
+#include "jni_datatypes.h"
+#include "jni.h"
+#include "jni_cta_utils.h"
+
+
+#define CLASSNAME  "CtaObsDescr_nativeToJava"
+
+#define IDEBUG (0)
+
+typedef struct {
+CTA_Handle myhandle;
+sJni_Class classInstance;
+} CTAI_ObsDescr_NativeToJava;
+
+
+#define METHOD "CTAI_ObsDescr_nativeToJava_Create_Size"
+void CTAI_ObsDescr_nativeToJava_Create_Size(int *memsize, int *retval){
+   *memsize=(int) sizeof(CTAI_ObsDescr_NativeToJava);
+   *retval=CTA_OK;
+};
+
+#undef METHOD
+#define METHOD "CTAI_ObsDescr_nativeToJava_Create_Init"
+void CTAI_ObsDescr_nativeToJava_Create_Init(CTA_ObsDescr *myhandle, CTAI_ObsDescr_NativeToJava *descr,
+                                            CTA_Handle *usrdat, int *retval)
+{
+   JNIEnv *env;
+   sJni_Class *classIn;
+
+   //Set the runtime and class from userdata
+   
+   CTA_Handle_GetData(*usrdat, (void *) &classIn);
+   env     = classIn->env;
+ 
+   descr->classInstance.env = env;
+   descr->classInstance.cls = (*env)->NewGlobalRef(env, classIn->cls);
+   descr->classInstance.obj = (*env)->NewGlobalRef(env, classIn->obj);
+   descr->myhandle          = *myhandle;
+   
+};
+
+#undef METHOD
+#define METHOD "CTAI_ObsDescr_nativeToJava_CreateSel"
+void CTAI_ObsDescr_nativeToJava_CreateSel(CTAI_ObsDescr_NativeToJava *descr,
+          CTA_String *selection, CTA_RelTable *reltab, 
+          CTA_ObsDescr *myhandle_out, 
+          CTAI_ObsDescr_NativeToJava *descrout, int *retval){
+
+CTA_WRITE_ERROR("Method not implemented");
+*retval=CTA_NOT_IMPLEMENTED;
+
+}
+         
+
+
+#undef METHOD
+#define METHOD "CTAI_ObsDescr_nativeToJava_Get_Keys"
+void CTAI_ObsDescr_nativeToJava_Get_Keys(CTAI_ObsDescr_NativeToJava *descr,
+                                         CTA_Vector *keys, int *retval)
+{
+   jobject jKeys;
+
+   // Find method
+   JNIEnv *env= descr->classInstance.env;
+   jclass cls= descr->classInstance.cls;
+   jobject obj = descr->classInstance.obj;
+   jmethodID func;
+   
+   /* Gewoon aanroepen zoals in alle voorbeelden staat */
+   func = (*env)->GetMethodID(env, cls, "getPropertyKeys", "()[Ljava/lang/String;");
+   if (func) {
+      // call method and get keys
+      jKeys =(*env)->CallObjectMethod(env,obj,func);
+
+      // convert java array of strings to native vector of strings
+      *retval = cta_jni_JavaStringVecToNativeVec(env,  (jobjectArray) jKeys, *keys);
+   }
+   else {
+      CTA_WRITE_ERROR("Cannot find java method name=getPropertyKeys signature=()[Ljava/lang/String;");
+      *retval = CTA_INTERNAL_ERROR;
+   } 
+};
+
+#undef METHOD
+#define METHOD "CTAI_ObsDescr_nativeToJava_Property_Count"
+void CTAI_ObsDescr_nativeToJava_Property_Count(
+         CTAI_ObsDescr_NativeToJava *descr, 
+         int *nkeys, 
+         int *retval)
+{
+   // Find method
+   JNIEnv *env = descr->classInstance.env;
+   jobject obj = descr->classInstance.obj;
+   jclass  cls = descr->classInstance.cls;
+
+   jmethodID func = (*env)->GetMethodID(env, cls, "getPropertyCount", "()I");
+   if (func) {
+      // Call method
+      *nkeys=(*env)->CallIntMethod(env,obj,func);
+      *retval=CTA_OK;
+   }
+   else {
+      CTA_WRITE_ERROR("Cannot find java method name=getPropertyCount signature=()I");
+      *retval=CTA_INTERNAL_ERROR;
+   }   
+}
+;
+
+#undef METHOD
+#define METHOD "CTAI_ObsDescr_nativeToJava_Observation_Count"
+   void CTAI_ObsDescr_nativeToJava_Observation_Count(
+      CTAI_ObsDescr_NativeToJava *descr, 
+         int *nobs, 
+         int *retval)
+{
+
+   JNIEnv *env= descr->classInstance.env;
+   jobject obj = descr->classInstance.obj;
+   jclass cls= descr->classInstance.cls;
+
+   jmethodID func = (*env)->GetMethodID(env, cls, "getObservationCount", "()I");
+   if (func) {
+
+      // Call method
+      *nobs=(*env)->CallIntMethod(env,obj,func);
+      *retval=CTA_OK;
+   }
+   else {
+      CTA_WRITE_ERROR("Cannot find java method name=getObservationCount signature=()I");
+      *retval=CTA_INTERNAL_ERROR;
+   }   
+};
+
+
+#undef METHOD
+#define METHOD "CTAI_ObsDescr_nativeToJava_Get_Properties"
+void CTAI_ObsDescr_nativeToJava_Get_Properties(
+         CTAI_ObsDescr_NativeToJava *descr, 
+         const char *Key,
+         CTA_Vector *Properties,
+         CTA_Datatype *datatype,
+         int *retval)
+{
+   jobject jProperties;
+   jstring jKey;
+
+   // Find method
+   JNIEnv *env= descr->classInstance.env;
+   jclass cls= descr->classInstance.cls;
+   jobject obj = descr->classInstance.obj;
+   jmethodID func;
+   
+   func = (*env)->GetMethodID(env, cls, "getStringProperties", "(Ljava/lang/String;)[Ljava/lang/String;");
+   if (func) {
+      // call method and get properties
+      jKey= (*env)->NewStringUTF(env, Key);
+
+      jProperties =(*env)->CallObjectMethod(env,obj,func,jKey);
+      
+      // convert java array of strings to native vector
+      if (jProperties){
+         *retval = cta_jni_JavaStringVecToNativeVec(env,  (jobjectArray) jProperties, *Properties);
+      } 
+      else {
+         CTA_WRITE_ERROR("getStringProperties returned no properties");
+         *retval = CTA_JNI_INTERFACING_ERROR;
+      }
+   }
+   else {
+      CTA_WRITE_ERROR("Cannot find java method name=getStringProperties signature=(Ljava/lang/String;)[Ljava/lang/String;");
+      *retval = CTA_INTERNAL_ERROR;
+   }   
+}
+
+
+#undef METHOD
+#define METHOD "CTAI_ObsDescr_nativeToJava_Free"
+void CTAI_ObsDescr_nativeToJava_Free(
+         CTAI_ObsDescr_NativeToJava *descr, 
+         int *retval)
+{
+   // Release the global references
+   JNIEnv *env = descr->classInstance.env;
+   (*env)->DeleteGlobalRef(env, descr->classInstance.cls);
+   (*env)->DeleteGlobalRef(env, descr->classInstance.obj);
+
+   // Since we did not allocate any memory, there is nothing to do   
+   *retval=CTA_OK;
+}
+
+
+
+#undef METHOD
+#define METHOD "CTA_ObsDescr_nativeToJava_initialise"
+void CTA_ObsDescr_nativeToJava_initialise(CTA_ObsDescrClass *hobsdescrcl)
+{
+   CTA_Intf hintf=0;
+   CTA_Func h_func[I_CTA_OBSDESCR_NUMFUNC];
+
+   int ierr;
+
+   // The vector h_func is filled with COSTA-function handles of the 
+   // implementations in this file.
+   ierr=CTA_Func_Create(" ",&CTAI_ObsDescr_nativeToJava_Create_Size,    hintf,
+                            &h_func[I_CTA_OBSDESCR_CREATE_SIZE]);
+   ierr=CTA_Func_Create(" ",&CTAI_ObsDescr_nativeToJava_Create_Init,    hintf,
+                            &h_func[I_CTA_OBSDESCR_CREATE_INIT]);
+   ierr=CTA_Func_Create(" ",&CTAI_ObsDescr_nativeToJava_Property_Count,    hintf,
+                            &h_func[I_CTA_OBSDESCR_COUNT_PROPERTIES]);
+   ierr=CTA_Func_Create(" ",&CTAI_ObsDescr_nativeToJava_Get_Properties,    hintf,
+                            &h_func[I_CTA_OBSDESCR_GET_PROPERTIES]);
+   ierr=CTA_Func_Create(" ",&CTAI_ObsDescr_nativeToJava_Observation_Count, hintf,
+                            &h_func[I_CTA_OBSDESCR_COUNT_OBSERVATIONS]);
+   ierr=CTA_Func_Create(" ",&CTAI_ObsDescr_nativeToJava_Get_Keys,    hintf,
+                            &h_func[I_CTA_OBSDESCR_GET_KEYS]);
+   ierr=CTA_Func_Create(" ",&CTAI_ObsDescr_nativeToJava_Free,    hintf,
+                            &h_func[I_CTA_OBSDESCR_FREE]);
+   ierr=CTA_Func_Create(" ",&CTAI_ObsDescr_nativeToJava_CreateSel,    hintf,
+                            &h_func[I_CTA_OBSDESCR_SELECTION]);
+   ierr=CTA_ObsDescr_DefineClass("cta_obsdescr_nativeToJava",h_func,hobsdescrcl);
+
+}
+
+
+
+
diff --git a/costa/native/bridge/src/jni_cta_CtaObsdescr_NativeToJava.h b/costa/native/bridge/src/jni_cta_CtaObsdescr_NativeToJava.h
new file mode 100644
index 000000000..b642da618
--- /dev/null
+++ b/costa/native/bridge/src/jni_cta_CtaObsdescr_NativeToJava.h
@@ -0,0 +1,33 @@
+/*
+$URL: https://repos.deltares.nl/repos/openda/trunk/costa/src/cta/cta_obsdescr_combine.c $
+$Revision: 671 $, $Date: 2008-10-07 14:49:42 +0200 (Tue, 07 Oct 2008) $
+
+COSTA: Problem solving environment for data assimilation
+Copyright (C) 2010  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void CTA_ObsDescr_nativeToJava_initialise(CTA_ObsDescrClass *hobsdescrcl);
+
+#ifdef __cplusplus
+}
+#endif
+
+
diff --git a/costa/native/bridge/src/jni_cta_utils.h b/costa/native/bridge/src/jni_cta_utils.h
new file mode 100644
index 000000000..c22e4c6d6
--- /dev/null
+++ b/costa/native/bridge/src/jni_cta_utils.h
@@ -0,0 +1,45 @@
+/*
+$URL: https://repos.deltares.nl/repos/openda/public/trunk/core/native/src/openda/bridge/jni_cta_utils.h $
+$Revision: 1553 $, $Date: 2010-05-07 14:55:23 +0200 (Fri, 07 May 2010) $
+
+OpenDA interface for COSTA.
+Copyright (C) 2007  Stef Hummel / Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+
+#include "jni.h"
+#include "cta.h"
+void cta_jni_setJavaEnv(JNIEnv *env);
+#ifdef __cplusplus
+void cta_jni_exception(JNIEnv *env, const char *name, const char *msg);
+#endif
+void cta_jni_exception(JNIEnv *env, const char *name, const char *msg, int retVal);
+void cta_jni_free(JNIEnv * env, jobject obj_this);
+CTA_Handle cta_jni_getCtaHandle(JNIEnv * env, jobject obj_this);
+
+void cta_jni_setCtaHandle(JNIEnv * env, jobject obj_this, CTA_Handle ctaHandle);
+void cta_jni_ExternalMessageWriter(char *className, char *method, char *message, char type);
+void cta_jni_ExternalMessageWriterSetID(JNIEnv *env, jclass classID, jmethodID methodID);
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+int cta_jni_JavaStringVecToNativeVec(JNIEnv *env,  jobjectArray jArray, CTA_Vector cVec);
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/costa/native/bridge/src/jni_datatypes.h b/costa/native/bridge/src/jni_datatypes.h
new file mode 100644
index 000000000..4ec184a68
--- /dev/null
+++ b/costa/native/bridge/src/jni_datatypes.h
@@ -0,0 +1,31 @@
+/*
+$URL: https://repos.deltares.nl/repos/openda/openda_1/public/trunk/core/native/src/openda/bridge/jni_cta_utils.h $
+$Revision: 1257 $, $Date: 2009-12-21 13:42:01 +0100 (ma, 21 dec 2009) $
+
+OpenDA interface for COSTA.
+Copyright (C) 2010  Nils van Velzen
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+
+#include <jni.h>
+
+typedef struct {
+JNIEnv *env;           //Poiner to Java runtime environment;
+jclass cls;            //JNI handle to the java class instance;
+jobject obj;           //JNI object handle;
+} sJni_Class;
+

From 20cab716c79546e0b1be8970c1ea8a6223c72a4b Mon Sep 17 00:00:00 2001
From: Werner Kramer <werner.kramer@vortech.nl>
Date: Wed, 13 Sep 2023 17:25:36 +0200
Subject: [PATCH 07/15] Link to sqlite3

---
 costa/native/cta/src/CMakeLists.txt | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/costa/native/cta/src/CMakeLists.txt b/costa/native/cta/src/CMakeLists.txt
index 72c8a1ef7..c4b45d21e 100644
--- a/costa/native/cta/src/CMakeLists.txt
+++ b/costa/native/cta/src/CMakeLists.txt
@@ -1,5 +1,6 @@
 cmake_minimum_required(VERSION 3.9.1)
 find_package(LibXml2 REQUIRED)
+find_package(SQLite3 REQUIRED)
 
 set(LIBRARY_OUTPUT_PATH  ${CMAKE_BINARY_DIR}/lib)
 set(SOURCES
@@ -25,8 +26,12 @@ set(CTA_PUBLIC_HEADERS ${HEADER_FILES})
 
 target_include_directories(cta PRIVATE ../include)
 target_include_directories(cta PRIVATE ${CMAKE_SOURCE_DIR}/external)
-target_include_directories(cta PRIVATE ${LIBXML2_INCLUDE_DIR}) 
+target_include_directories(cta PRIVATE ${LIBXML2_INCLUDE_DIR})
+target_include_directories(cta PRIVATE ${SQLite3_INCLUDE_DIRS}) 
+
 target_link_libraries(cta PRIVATE ${LIBXML2_LIBRARIES})
+target_link_libraries(cta PRIVATE ${SQLite3_LIBRARIES})
+
 target_link_libraries(cta PRIVATE blas)
 target_link_libraries(cta PRIVATE lapack)
 

From 6f8e5a8aa3988c9ba6bb116adea7ab6e5de1db46 Mon Sep 17 00:00:00 2001
From: Werner Kramer <werner.kramer@vortech.nl>
Date: Wed, 13 Sep 2023 17:49:31 +0200
Subject: [PATCH 08/15] Link to netcdf

---
 costa/native/cta/src/CMakeLists.txt | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/costa/native/cta/src/CMakeLists.txt b/costa/native/cta/src/CMakeLists.txt
index c4b45d21e..4e4d779cb 100644
--- a/costa/native/cta/src/CMakeLists.txt
+++ b/costa/native/cta/src/CMakeLists.txt
@@ -2,6 +2,9 @@ cmake_minimum_required(VERSION 3.9.1)
 find_package(LibXml2 REQUIRED)
 find_package(SQLite3 REQUIRED)
 
+string(REPLACE ":" ";" LIBRARY_DIRS $ENV{LD_LIBRARY_PATH})
+find_library (NETCDF_LIBRARIES libnetcdf.so PATHS ${LIBRARY_DIRS})
+
 set(LIBRARY_OUTPUT_PATH  ${CMAKE_BINARY_DIR}/lib)
 set(SOURCES
   cta_array.c       cta_matrix_blas.c      cta_obsdescr_combine.c  cta_sobs_combine.c  cta_util_sort.c
@@ -31,6 +34,7 @@ target_include_directories(cta PRIVATE ${SQLite3_INCLUDE_DIRS})
 
 target_link_libraries(cta PRIVATE ${LIBXML2_LIBRARIES})
 target_link_libraries(cta PRIVATE ${SQLite3_LIBRARIES})
+target_link_libraries(cta PRIVATE ${NETCDF_LIBRARIES})
 
 target_link_libraries(cta PRIVATE blas)
 target_link_libraries(cta PRIVATE lapack)

From dd7337237beed89c93a91f1b61dbef48a5001b66 Mon Sep 17 00:00:00 2001
From: Werner Kramer <werner.kramer@vortech.nl>
Date: Fri, 22 Sep 2023 13:32:12 +0200
Subject: [PATCH 09/15] Add check for HAVE_DLFCN_H

---
 costa/native/cta/src/CMakeLists.txt | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/costa/native/cta/src/CMakeLists.txt b/costa/native/cta/src/CMakeLists.txt
index 4e4d779cb..f8e5ffdbd 100644
--- a/costa/native/cta/src/CMakeLists.txt
+++ b/costa/native/cta/src/CMakeLists.txt
@@ -1,4 +1,13 @@
 cmake_minimum_required(VERSION 3.9.1)
+# Check for the presence of dlfcn.h
+include(CheckIncludeFile)
+
+# Perform the check
+CHECK_INCLUDE_FILE("dlfcn.h" HAVE_DLFCN_H)
+if(HAVE_DLFCN_H)
+    add_definitions(-DHAVE_DLFCN_H)
+endif()
+
 find_package(LibXml2 REQUIRED)
 find_package(SQLite3 REQUIRED)
 

From 7a004ff3085b77f21f808bb43081cd75e1b63a1f Mon Sep 17 00:00:00 2001
From: Werner Kramer <werner.kramer@vortech.nl>
Date: Tue, 26 Sep 2023 16:24:15 +0200
Subject: [PATCH 10/15] Add FindMPI

---
 costa/native/cta/src/CMakeLists.txt | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/costa/native/cta/src/CMakeLists.txt b/costa/native/cta/src/CMakeLists.txt
index f8e5ffdbd..e12fa6af7 100644
--- a/costa/native/cta/src/CMakeLists.txt
+++ b/costa/native/cta/src/CMakeLists.txt
@@ -1,6 +1,12 @@
 cmake_minimum_required(VERSION 3.9.1)
 # Check for the presence of dlfcn.h
 include(CheckIncludeFile)
+include(FindMPI)
+
+if(MPI_FOUND)
+  message("Found MPI: ${MPI_FOUND}\n")
+  add_definitions(-DUSE_MPI)
+endif()
 
 # Perform the check
 CHECK_INCLUDE_FILE("dlfcn.h" HAVE_DLFCN_H)

From f6e8eaff215523ddde84dbc45e4af01eb608f6c1 Mon Sep 17 00:00:00 2001
From: Werner Kramer <werner.kramer@vortech.nl>
Date: Tue, 26 Sep 2023 16:25:36 +0200
Subject: [PATCH 11/15] Remove unused libraries for bridge

---
 costa/native/bridge/src/CMakeLists.txt | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/costa/native/bridge/src/CMakeLists.txt b/costa/native/bridge/src/CMakeLists.txt
index 843749bbc..a00d1fd57 100644
--- a/costa/native/bridge/src/CMakeLists.txt
+++ b/costa/native/bridge/src/CMakeLists.txt
@@ -18,11 +18,10 @@ set(SOURCES
 
 add_library(opendabridge SHARED ${SOURCES})
 
+target_include_directories(opendabridge PRIVATE ${LIBXML2_INCLUDE_DIR})
+target_include_directories(opendabridge PRIVATE ${JNI_INCLUDE_DIRS})
+
 target_include_directories(opendabridge PRIVATE ../include)
 target_include_directories(opendabridge PRIVATE ${CMAKE_SOURCE_DIR}/cta/include)
-target_include_directories(opendabridge PRIVATE ${CMAKE_SOURCE_DIR}/external)
-target_include_directories(opendabridge PRIVATE ${LIBXML2_INCLUDE_DIR}) 
-target_include_directories(opendabridge PRIVATE ${JNI_INCLUDE_DIRS})
-target_link_libraries(opendabridge PRIVATE ${LIBXML2_LIBRARIES})
 target_link_libraries(opendabridge PRIVATE cta)
 

From a354666437bc60665afc08404198b7cb3b06fb63 Mon Sep 17 00:00:00 2001
From: Werner Kramer <werner.kramer@vortech.nl>
Date: Wed, 27 Sep 2023 09:39:37 +0200
Subject: [PATCH 12/15] Enable DEBUG in cta_par.c

---
 costa/native/cta/src/cta_par.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/costa/native/cta/src/cta_par.c b/costa/native/cta/src/cta_par.c
index f0b86abdf..134ad27e6 100644
--- a/costa/native/cta/src/cta_par.c
+++ b/costa/native/cta/src/cta_par.c
@@ -36,7 +36,7 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 #define CTA_PAR_CREATEGROUPS_F77  F77_CALL(cta_par_creategroups,CTA_PAR_CREATEGROUPS)
 #define CTA_PAR_GETGROUPINFO_F77  F77_CALL(cta_par_getgroupinfo,CTA_PAR_GETGROUPINFO)
 
-#define IDEBUG (0)
+#define IDEBUG (1)
 enum CTAI_ParType  {WorkerWorker,MasterWorker};
 enum CTA_ParProcType CTA_MY_PROC_TYPE=CTA_ParMaster;
 int  CTA_IS_PARALLEL=CTA_FALSE;

From a2cc35fc6ce3fcf6cbe24a3e62a5a7210dc1435e Mon Sep 17 00:00:00 2001
From: Werner Kramer <werner.kramer@vortech.nl>
Date: Wed, 27 Sep 2023 11:31:02 +0200
Subject: [PATCH 13/15] Add HAVE_LIBNETCDF to definitions

---
 costa/native/cta/src/CMakeLists.txt | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/costa/native/cta/src/CMakeLists.txt b/costa/native/cta/src/CMakeLists.txt
index e12fa6af7..94f097c5b 100644
--- a/costa/native/cta/src/CMakeLists.txt
+++ b/costa/native/cta/src/CMakeLists.txt
@@ -20,6 +20,8 @@ find_package(SQLite3 REQUIRED)
 string(REPLACE ":" ";" LIBRARY_DIRS $ENV{LD_LIBRARY_PATH})
 find_library (NETCDF_LIBRARIES libnetcdf.so PATHS ${LIBRARY_DIRS})
 
+add_definitions(-DHAVE_LIBNETCDF)
+
 set(LIBRARY_OUTPUT_PATH  ${CMAKE_BINARY_DIR}/lib)
 set(SOURCES
   cta_array.c       cta_matrix_blas.c      cta_obsdescr_combine.c  cta_sobs_combine.c  cta_util_sort.c

From e9c6c0f5c35615340cf206e768e5cd507c53afb7 Mon Sep 17 00:00:00 2001
From: Werner Kramer <werner.kramer@vortech.nl>
Date: Wed, 27 Sep 2023 14:57:06 +0200
Subject: [PATCH 14/15] Add netcdf include dir

---
 costa/native/cta/src/CMakeLists.txt | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/costa/native/cta/src/CMakeLists.txt b/costa/native/cta/src/CMakeLists.txt
index 94f097c5b..59a327bbd 100644
--- a/costa/native/cta/src/CMakeLists.txt
+++ b/costa/native/cta/src/CMakeLists.txt
@@ -19,8 +19,12 @@ find_package(SQLite3 REQUIRED)
 
 string(REPLACE ":" ";" LIBRARY_DIRS $ENV{LD_LIBRARY_PATH})
 find_library (NETCDF_LIBRARIES libnetcdf.so PATHS ${LIBRARY_DIRS})
+set(NETCDF_INCLUDE_DIRS ${NETCDF_LIBRARIES})
+cmake_path(REMOVE_FILENAME NETCDF_INCLUDE_DIRS)
 
+string(REPLACE "/lib" "/include" NETCDF_INCLUDE_DIRS ${NETCDF_INCLUDE_DIRS})
 add_definitions(-DHAVE_LIBNETCDF)
+message("NetCDF include : ${NETCDF_INCLUDE_DIRS}\n")
 
 set(LIBRARY_OUTPUT_PATH  ${CMAKE_BINARY_DIR}/lib)
 set(SOURCES
@@ -48,6 +52,7 @@ target_include_directories(cta PRIVATE ../include)
 target_include_directories(cta PRIVATE ${CMAKE_SOURCE_DIR}/external)
 target_include_directories(cta PRIVATE ${LIBXML2_INCLUDE_DIR})
 target_include_directories(cta PRIVATE ${SQLite3_INCLUDE_DIRS}) 
+target_include_directories(cta PRIVATE ${NETCDF_INCLUDE_DIRS})
 
 target_link_libraries(cta PRIVATE ${LIBXML2_LIBRARIES})
 target_link_libraries(cta PRIVATE ${SQLite3_LIBRARIES})

From 9b986739572c95043e82f721af3029d3635b707c Mon Sep 17 00:00:00 2001
From: Werner Kramer <werner.kramer@vortech.nl>
Date: Mon, 13 Nov 2023 13:43:18 +0100
Subject: [PATCH 15/15] Remove .so extension from find_library

---
 costa/native/cta/src/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/costa/native/cta/src/CMakeLists.txt b/costa/native/cta/src/CMakeLists.txt
index 59a327bbd..567cb54b2 100644
--- a/costa/native/cta/src/CMakeLists.txt
+++ b/costa/native/cta/src/CMakeLists.txt
@@ -18,7 +18,7 @@ find_package(LibXml2 REQUIRED)
 find_package(SQLite3 REQUIRED)
 
 string(REPLACE ":" ";" LIBRARY_DIRS $ENV{LD_LIBRARY_PATH})
-find_library (NETCDF_LIBRARIES libnetcdf.so PATHS ${LIBRARY_DIRS})
+find_library (NETCDF_LIBRARIES libnetcdf PATHS ${LIBRARY_DIRS})
 set(NETCDF_INCLUDE_DIRS ${NETCDF_LIBRARIES})
 cmake_path(REMOVE_FILENAME NETCDF_INCLUDE_DIRS)